From 3784c8c23801a387d0b74e76f985a82ea79f1e38 Mon Sep 17 00:00:00 2001 From: buran Date: Tue, 8 Jan 2019 16:06:53 +0000 Subject: [PATCH 001/223] Had to stop working on new stuff to keep fixing 0-1 --- VariantValidator/modules/__init__.py | 0 .../__init__.py => modules/defaultConfig.ini} | 33 +- VariantValidator/modules/vvObjects.py | 82 + VariantValidator/testing/new_variants.txt | 15582 ---------------- VariantValidator/testing/test_vv.py | 2 +- VariantValidator/testing/vvTestFunctions.py | 12 +- VariantValidator/testing/vvTestSave.py | 67 +- 7 files changed, 137 insertions(+), 15641 deletions(-) create mode 100644 VariantValidator/modules/__init__.py rename VariantValidator/{data/__init__.py => modules/defaultConfig.ini} (64%) create mode 100644 VariantValidator/modules/vvObjects.py delete mode 100644 VariantValidator/testing/new_variants.txt diff --git a/VariantValidator/modules/__init__.py b/VariantValidator/modules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/VariantValidator/data/__init__.py b/VariantValidator/modules/defaultConfig.ini similarity index 64% rename from VariantValidator/data/__init__.py rename to VariantValidator/modules/defaultConfig.ini index f44f5309..ad76e6ee 100644 --- a/VariantValidator/data/__init__.py +++ b/VariantValidator/modules/defaultConfig.ini @@ -1,15 +1,40 @@ +[variantValidator] +version = 0.1.0_dev_pre_a +release_date = tbc + +[mysql] +host = 127.0.0.1 +database = validator +user = vvadmin +password = var1ant + +[seqrepo] +version='2018-08-21' +location + +[uta] +version='uta_20180821' +location='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + +[logging] +string = info console + +[EntrezID] +entrezid = admin@variantvalidator.org + +# # Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# +# # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -# \ No newline at end of file +# diff --git a/VariantValidator/modules/vvObjects.py b/VariantValidator/modules/vvObjects.py new file mode 100644 index 00000000..2eae0048 --- /dev/null +++ b/VariantValidator/modules/vvObjects.py @@ -0,0 +1,82 @@ +import os +from configparser import ConfigParser,RawConfigParser +import io + +# uta_current_version='uta_20180821' +# UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version +# seqrepo_current_version='2018-08-21' +# HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version + +class Validator(): + #This object contains configuration options. + def __init__(self,hgvsPath=None,utaPath=None): + #First load from the configuration file, if it exists. + configName="config.ini" + homePath=os.path.expanduser("~") + configPath=os.path.join(homePath,".config","VariantValidator") + if not os.path.isdir(configPath): + os.makedirs(configPath) + #Now configpath points to the config file itself. + configPath=os.path.join(configPath,configName) + #Does the file exist? + if not os.path.exists(configPath): + self.createConfig(configPath) + + #Load the configuration file. + with open(configPath) as file: + lines=file.read() + config=RawConfigParser(allow_no_value=True) + #print(configPath) + config.read(configPath) + #print config.sections() + print config["seqrepo"]["location"] + ''' + #Load hgvs + if hgvsPath!=None: + os.environ['HGVS_SEQREPO_DIR']=hgvsPath + self.hgvsPath=hgvsPath + else: + self.hgvsPath=hgvsPath + if utaPath!=None: + os.environ['UTA_DB_URL']=utaPath + self.utaPath=utaPath + else: + self.utaPath=utaPath + seqrepo_current_version='2018-08-21' + HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version + #HGVS_SEQREPO_DIR='/local/seqrepo/'+seqrepo_current_version + os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR + uta_current_version='uta_20180821' + UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version + #export postgresql://uta_admin:uta_admin@127.0.0.1/uta/uta_20180821 + os.environ['UTA_DB_URL']=UTA_DB_URL + #from VariantValidator import variantValidator as vv + + ''' + + def validate(self): + pass + + def createConfig(self,outPath): + #This function reads from the default configuration file stored in the same folder as this module. + #Outpath should include a filename. + lines=[] + inPath=os.path.join(os.path.dirname(os.path.realpath(__file__)),"defaultConfig.ini") +# print(os.path.join(inPath,"defaultConfig.ini")) + with open(inPath) as file: + for l in file: + lines.append(l) + with open(outPath, "w") as file: + for l in lines: + file.write(l) + + +class Validation(): + #Validation objects contain a number of variant interpretations + pass + +class ValOutput(): + #This object contains a single possible interpretation of a variant + pass + + diff --git a/VariantValidator/testing/new_variants.txt b/VariantValidator/testing/new_variants.txt deleted file mode 100644 index f119b0c8..00000000 --- a/VariantValidator/testing/new_variants.txt +++ /dev/null @@ -1,15582 +0,0 @@ -[ - { - "NC_000016.9:g.2099572TC>T": { - "NM_000548.3 Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_487t1:c.138+821del", - "HGVS_LRG_variant": "LRG_487:g.5269del", - "HGVS_RefSeqGene_variant": "NG_005895.1:g.5269del", - "HGVS_predicted_protein_consequence": "NP_000539.2(LRG_487p1):p.?", - "HGVS_transcript_variant": "NM_000548.3:c.138+821del", - "RefSeqGene_context_intronic_sequence": "NG_005895.1(NM_000548.3):c.138+821del", - "alt_genomic_loci": [], - "gene_symbol": "TSC2", - "genome_context_intronic_sequence": "NC_000016.9(NM_000548.3):c.138+821del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2099572", - "ref": "TC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2099572", - "ref": "TC" - } - } - }, - "submitted_variant": "NC_000016.9:g.2099572TC>T", - "transcript_description": "Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC", - "A more recent version of the selected reference sequence NM_000548.3 is available (NM_000548.4)", - "NM_000548.4:c.138+821del MUST be fully validated prior to use in reports", - "select_variants=NM_000548.4:c.138+821del" - ] - }, - "NM_000548.4 Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000539.2(LRG_487p1):p.?", - "HGVS_transcript_variant": "NM_000548.4:c.138+821del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC2", - "genome_context_intronic_sequence": "NC_000016.9(NM_000548.4):c.138+821del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2099572", - "ref": "TC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2049571", - "ref": "TC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2099572", - "ref": "TC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2049571", - "ref": "TC" - } - } - }, - "submitted_variant": "NC_000016.9:g.2099572TC>T", - "transcript_description": "Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC", - "RefSeqGene record not available" - ] - }, - "NM_001077183.1 Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001070651.1:p.?", - "HGVS_transcript_variant": "NM_001077183.1:c.138+821del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC2", - "genome_context_intronic_sequence": "NC_000016.9(NM_001077183.1):c.138+821del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2099572", - "ref": "TC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2099572", - "ref": "TC" - } - } - }, - "submitted_variant": "NC_000016.9:g.2099572TC>T", - "transcript_description": "Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA", - "validation_warnings": [ - "NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC", - "A more recent version of the selected reference sequence NM_001077183.1 is available (NM_001077183.2)", - "NM_001077183.2:c.138+821del MUST be fully validated prior to use in reports", - "select_variants=NM_001077183.2:c.138+821del", - "RefSeqGene record not available" - ] - }, - "NM_001077183.2 Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001070651.1:p.?", - "HGVS_transcript_variant": "NM_001077183.2:c.138+821del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC2", - "genome_context_intronic_sequence": "NC_000016.9(NM_001077183.2):c.138+821del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2099572", - "ref": "TC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2049571", - "ref": "TC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2099572", - "ref": "TC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2049571", - "ref": "TC" - } - } - }, - "submitted_variant": "NC_000016.9:g.2099572TC>T", - "transcript_description": "Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA", - "validation_warnings": [ - "NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC", - "RefSeqGene record not available" - ] - }, - "NM_001114382.1 Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001107854.1:p.?", - "HGVS_transcript_variant": "NM_001114382.1:c.138+821del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC2", - "genome_context_intronic_sequence": "NC_000016.9(NM_001114382.1):c.138+821del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2099572", - "ref": "TC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2099572", - "ref": "TC" - } - } - }, - "submitted_variant": "NC_000016.9:g.2099572TC>T", - "transcript_description": "Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA", - "validation_warnings": [ - "NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC", - "A more recent version of the selected reference sequence NM_001114382.1 is available (NM_001114382.2)", - "NM_001114382.2:c.138+821del MUST be fully validated prior to use in reports", - "select_variants=NM_001114382.2:c.138+821del", - "RefSeqGene record not available" - ] - }, - "NM_001114382.2 Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001107854.1:p.?", - "HGVS_transcript_variant": "NM_001114382.2:c.138+821del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC2", - "genome_context_intronic_sequence": "NC_000016.9(NM_001114382.2):c.138+821del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2099572", - "ref": "TC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2049571", - "ref": "TC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2099572", - "ref": "TC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2049571", - "ref": "TC" - } - } - }, - "submitted_variant": "NC_000016.9:g.2099572TC>T", - "transcript_description": "Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA", - "validation_warnings": [ - "NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC", - "RefSeqGene record not available" - ] - }, - "NM_001318827.1 Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001305756.1:p.?", - "HGVS_transcript_variant": "NM_001318827.1:c.138+821del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC2", - "genome_context_intronic_sequence": "NC_000016.9(NM_001318827.1):c.138+821del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2099572", - "ref": "TC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2049571", - "ref": "TC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2099572", - "ref": "TC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2049571", - "ref": "TC" - } - } - }, - "submitted_variant": "NC_000016.9:g.2099572TC>T", - "transcript_description": "Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA", - "validation_warnings": [ - "NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC", - "RefSeqGene record not available" - ] - }, - "NM_001318829.1 Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001305758.1:p.?", - "HGVS_transcript_variant": "NM_001318829.1:c.-9-826del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC2", - "genome_context_intronic_sequence": "NC_000016.9(NM_001318829.1):c.-9-826del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2099572", - "ref": "TC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2049571", - "ref": "TC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2099572", - "ref": "TC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2049571", - "ref": "TC" - } - } - }, - "submitted_variant": "NC_000016.9:g.2099572TC>T", - "transcript_description": "Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA", - "validation_warnings": [ - "NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC", - "RefSeqGene record not available" - ] - }, - "NM_001318831.1 Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001305760.1:p.?", - "HGVS_transcript_variant": "NM_001318831.1:c.-89+821del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC2", - "genome_context_intronic_sequence": "NC_000016.9(NM_001318831.1):c.-89+821del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2099572", - "ref": "TC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2049571", - "ref": "TC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2099572", - "ref": "TC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2049571", - "ref": "TC" - } - } - }, - "submitted_variant": "NC_000016.9:g.2099572TC>T", - "transcript_description": "Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA", - "validation_warnings": [ - "NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC", - "RefSeqGene record not available" - ] - }, - "NM_001318832.1 Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001305761.1:p.?", - "HGVS_transcript_variant": "NM_001318832.1:c.171+821del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC2", - "genome_context_intronic_sequence": "NC_000016.9(NM_001318832.1):c.171+821del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2099572", - "ref": "TC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "16", - "pos": "2049571", - "ref": "TC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.2099575del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2099572", - "ref": "TC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.2049574del", - "vcf": { - "alt": "T", - "chr": "chr16", - "pos": "2049571", - "ref": "TC" - } - } - }, - "submitted_variant": "NC_000016.9:g.2099572TC>T", - "transcript_description": "Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA", - "validation_warnings": [ - "NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.589GG>CT": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589_590delinsCT", - "HGVS_LRG_variant": "LRG_1:g.8638_8639delinsCT", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8638_8639delinsCT", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Gly197Leu)", - "HGVS_transcript_variant": "NM_000088.3:c.589_590delinsCT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275362_48275363delinsAG", - "vcf": { - "alt": "AG", - "chr": "17", - "pos": "48275362", - "ref": "CC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198001_50198002delinsAG", - "vcf": { - "alt": "AG", - "chr": "17", - "pos": "50198001", - "ref": "CC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275362_48275363delinsAG", - "vcf": { - "alt": "AG", - "chr": "chr17", - "pos": "48275362", - "ref": "CC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198001_50198002delinsAG", - "vcf": { - "alt": "AG", - "chr": "chr17", - "pos": "50198001", - "ref": "CC" - } - } - }, - "submitted_variant": "NM_000088.3:c.589GG>CT", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.589GG>CT automapped to NM_000088.3:c.589_590delGGinsCT" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000094.3:c.6751-2_6751-3del": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_000094.3:c.6751-2_6751-3del", - "transcript_description": "", - "validation_warnings": [ - "base start position must be <= end position", - "Did you mean NM_000094.3:c.6751-3_6751-2del?" - ] - }, - "flag": "warning" - } - }, - { - "COL5A1:c.5071A>T": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "COL5A1:c.5071A>T", - "transcript_description": "", - "validation_warnings": [ - "HGVS variant nomenclature does not allow the use of a gene symbol (COL5A1) in place of a valid reference sequence", - "Re-submit COL5A1:c.5071A>T and specify transcripts from the following", - "select_transcripts=NM_000093.4|NM_000093.3|NM_001278074.1" - ] - }, - "flag": "warning" - } - }, - { - "NG_007400.1:c.5071A>T": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NG_007400.1:c.5071A>T", - "transcript_description": "", - "validation_warnings": [ - "A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit NG_007400.1:c.5071A>T but also specify transcripts from the following", - "select_transcripts=NM_000088.3" - ] - }, - "flag": "warning" - } - }, - { - "chr16:15832508_15832509delinsAC": { - "NM_001040113.1 Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_009299.1:g.123379_123380inv", - "HGVS_predicted_protein_consequence": "NP_001035202.1:p.(Thr1019Val)", - "HGVS_transcript_variant": "NM_001040113.1:c.3055_3056inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "HSCHR16_1_CTG1", - "pos": "1396662", - "ref": "GT" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "chr16_KI270853v1_alt", - "pos": "1396662", - "ref": "GT" - } - } - } - ], - "gene_symbol": "MYH11", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15832508", - "ref": "GT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15738651", - "ref": "GT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15832508", - "ref": "GT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15738651", - "ref": "GT" - } - } - }, - "submitted_variant": "chr16:15832508_15832509delinsAC", - "transcript_description": "Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA", - "validation_warnings": [] - }, - "NM_001040114.1 Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001035203.1:p.(Thr1019Val)", - "HGVS_transcript_variant": "NM_001040114.1:c.3055_3056inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "HSCHR16_1_CTG1", - "pos": "1396662", - "ref": "GT" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "chr16_KI270853v1_alt", - "pos": "1396662", - "ref": "GT" - } - } - } - ], - "gene_symbol": "MYH11", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15832508", - "ref": "GT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15738651", - "ref": "GT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15832508", - "ref": "GT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15738651", - "ref": "GT" - } - } - }, - "submitted_variant": "chr16:15832508_15832509delinsAC", - "transcript_description": "Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_002474.2 Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_002465.1:p.(Thr1012Val)", - "HGVS_transcript_variant": "NM_002474.2:c.3034_3035inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "HSCHR16_1_CTG1", - "pos": "1396662", - "ref": "GT" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "chr16_KI270853v1_alt", - "pos": "1396662", - "ref": "GT" - } - } - } - ], - "gene_symbol": "MYH11", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15832508", - "ref": "GT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15738651", - "ref": "GT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15832508", - "ref": "GT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15738651", - "ref": "GT" - } - } - }, - "submitted_variant": "chr16:15832508_15832509delinsAC", - "transcript_description": "Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_022844.2 Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_074035.1:p.(Thr1012Val)", - "HGVS_transcript_variant": "NM_022844.2:c.3034_3035inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "HSCHR16_1_CTG1", - "pos": "1396662", - "ref": "GT" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "chr16_KI270853v1_alt", - "pos": "1396662", - "ref": "GT" - } - } - } - ], - "gene_symbol": "MYH11", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15832508", - "ref": "GT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15738651", - "ref": "GT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15832508", - "ref": "GT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15738651", - "ref": "GT" - } - } - }, - "submitted_variant": "chr16:15832508_15832509delinsAC", - "transcript_description": "Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.589-1GG>G": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.590del", - "HGVS_LRG_variant": "LRG_1:g.8639del", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8639del", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.590del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275364del", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48275361", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198003del", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50198000", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275364del", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48275361", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198003del", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50198000", - "ref": "AC" - } - } - }, - "submitted_variant": "NM_000088.3:c.589-1GG>G", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.589-1GG>G automapped to NM_000088.3:c.589-1_589delGGinsG", - "NM_000088.3:c.589-1_589delinsG automapped to NM_000088.3:c.589-1del", - "NM_000088.3:c.589-1del normalized to NM_000088.3:c.590del" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.642+1GT>G": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.642+2del", - "HGVS_LRG_variant": "LRG_1:g.8693del", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8693del", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.642+2del", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.642+2del", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.642+2del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275308del", - "vcf": { - "alt": "T", - "chr": "17", - "pos": "48275307", - "ref": "TA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50197947del", - "vcf": { - "alt": "T", - "chr": "17", - "pos": "50197946", - "ref": "TA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275308del", - "vcf": { - "alt": "T", - "chr": "chr17", - "pos": "48275307", - "ref": "TA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50197947del", - "vcf": { - "alt": "T", - "chr": "chr17", - "pos": "50197946", - "ref": "TA" - } - } - }, - "submitted_variant": "NM_000088.3:c.642+1GT>G", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.642+1GT>G automapped to NM_000088.3:c.642+1_642+2delGTinsG", - "NM_000088.3:c.642+1_642+2delinsG automapped to NM_000088.3:c.642+2del" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.589-2AG>G": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589-2del", - "HGVS_LRG_variant": "LRG_1:g.8636del", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8636del", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.589-2del", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.589-2del", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.589-2del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275366del", - "vcf": { - "alt": "C", - "chr": "17", - "pos": "48275364", - "ref": "CT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198005del", - "vcf": { - "alt": "C", - "chr": "17", - "pos": "50198003", - "ref": "CT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275366del", - "vcf": { - "alt": "C", - "chr": "chr17", - "pos": "48275364", - "ref": "CT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198005del", - "vcf": { - "alt": "C", - "chr": "chr17", - "pos": "50198003", - "ref": "CT" - } - } - }, - "submitted_variant": "NM_000088.3:c.589-2AG>G", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.589-2AG>G automapped to NM_000088.3:c.589-2_589-1delAGinsG", - "NM_000088.3:c.589-2_589-1delinsG automapped to NM_000088.3:c.589-3del", - "NM_000088.3:c.589-3del normalized to NM_000088.3:c.589-2del" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000017.10:g.48279242G>T": { - "Intergenic_Variant_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "LRG_1:g.4759C>A", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.4759C>A", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48279242G>T", - "vcf": { - "alt": "T", - "chr": "17", - "pos": "48279242", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50201881G>T", - "vcf": { - "alt": "T", - "chr": "17", - "pos": "50201881", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48279242G>T", - "vcf": { - "alt": "T", - "chr": "chr17", - "pos": "48279242", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50201881G>T", - "vcf": { - "alt": "T", - "chr": "chr17", - "pos": "50201881", - "ref": "G" - } - } - }, - "submitted_variant": "NC_000017.10:g.48279242G>T", - "transcript_description": "", - "validation_warnings": [ - "Suspected intergenic region, No transcripts fully overlap the input genomic coordinates" - ] - }, - "flag": "intergenic" - } - }, - { - "NM_000500.7:c.-107-19C>T": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_000500.7:c.-107-19C>T", - "transcript_description": "", - "validation_warnings": [ - "Using a transcript reference sequence to specify an intergenic variant position that lies 5 to the transcript reference sequence is not HGVS compliant. Instead use NC_000006.11:g.32006074C>T" - ] - }, - "flag": "warning" - } - }, - { - "NM_000518.4:c.-130C>T": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_000518.4:c.-130C>T", - "transcript_description": "", - "validation_warnings": [ - "The given coordinate is outside the bounds of the reference sequence." - ] - }, - "flag": "warning" - } - }, - { - "NR_138595.1:n.-810C>T": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NR_138595.1:n.-810C>T", - "transcript_description": "", - "validation_warnings": [ - "The given coordinate is outside the bounds of the reference sequence." - ] - }, - "flag": "warning" - } - }, - { - "NR_138595.1:n.1-810C>T": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NR_138595.1:n.1-810C>T", - "transcript_description": "", - "validation_warnings": [ - "Using a transcript reference sequence to specify an intergenic variant position that lies 5 to the transcript reference sequence is not HGVS compliant. Instead use NC_000014.8:g.36989536G>A" - ] - }, - "flag": "warning" - } - }, - { - "NC_000017.10:g.48261457_48261463TTATGTT=": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.*1400_*1406=", - "HGVS_LRG_variant": "LRG_1:g.22538_22544=", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.22538_22544=", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.*1400_*1406=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48261457_48261463=", - "vcf": { - "alt": "TTATGTT", - "chr": "17", - "pos": "48261457", - "ref": "TTATGTT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50184096_50184102=", - "vcf": { - "alt": "TTATGTT", - "chr": "17", - "pos": "50184096", - "ref": "TTATGTT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48261457_48261463=", - "vcf": { - "alt": "TTATGTT", - "chr": "chr17", - "pos": "48261457", - "ref": "TTATGTT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50184096_50184102=", - "vcf": { - "alt": "TTATGTT", - "chr": "chr17", - "pos": "50184096", - "ref": "TTATGTT" - } - } - }, - "submitted_variant": "NC_000017.10:g.48261457_48261463TTATGTT=", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NC_000017.10:g.48275363C>A": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589G>T", - "HGVS_LRG_variant": "LRG_1:g.8638G>T", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8638G>T", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Gly197Cys)", - "HGVS_transcript_variant": "NM_000088.3:c.589G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48275363", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50198002", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48275363", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50198002", - "ref": "C" - } - } - }, - "submitted_variant": "NC_000017.10:g.48275363C>A", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.589-1G>T": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589-1G>T", - "HGVS_LRG_variant": "LRG_1:g.8637G>T", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8637G>T", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.589-1G>T", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.589-1G>T", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.589-1G>T", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275364C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48275364", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198003C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50198003", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275364C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48275364", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198003C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50198003", - "ref": "C" - } - } - }, - "submitted_variant": "NM_000088.3:c.589-1G>T", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.591_593inv": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.591_593inv", - "HGVS_LRG_variant": "LRG_1:g.8640_8642inv", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8640_8642inv", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Pro198Asp)", - "HGVS_transcript_variant": "NM_000088.3:c.591_593inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275359_48275361inv", - "vcf": { - "alt": "TCC", - "chr": "17", - "pos": "48275359", - "ref": "GGA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50197998_50198000inv", - "vcf": { - "alt": "TCC", - "chr": "17", - "pos": "50197998", - "ref": "GGA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275359_48275361inv", - "vcf": { - "alt": "TCC", - "chr": "chr17", - "pos": "48275359", - "ref": "GGA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50197998_50198000inv", - "vcf": { - "alt": "TCC", - "chr": "chr17", - "pos": "50197998", - "ref": "GGA" - } - } - }, - "submitted_variant": "NM_000088.3:c.591_593inv", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "11-5248232-T-A": { - "NM_000518.4 Homo sapiens hemoglobin subunit beta (HBB), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_000007.3:g.70614A>T", - "HGVS_predicted_protein_consequence": "NP_000509.1:p.(Glu7Val)", - "HGVS_transcript_variant": "NM_000518.4:c.20A>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "HBB", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000011.9:g.5248232T>A", - "vcf": { - "alt": "A", - "chr": "11", - "pos": "5248232", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000011.10:g.5227002T>A", - "vcf": { - "alt": "A", - "chr": "11", - "pos": "5227002", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000011.9:g.5248232T>A", - "vcf": { - "alt": "A", - "chr": "chr11", - "pos": "5248232", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000011.10:g.5227002T>A", - "vcf": { - "alt": "A", - "chr": "chr11", - "pos": "5227002", - "ref": "T" - } - } - }, - "submitted_variant": "11-5248232-T-A", - "transcript_description": "Homo sapiens hemoglobin subunit beta (HBB), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NG_007400.1(NM_000088.3):c.589-1G>T": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589-1G>T", - "HGVS_LRG_variant": "LRG_1:g.8637G>T", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8637G>T", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.589-1G>T", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.589-1G>T", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.589-1G>T", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275364C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48275364", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198003C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50198003", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275364C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48275364", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198003C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50198003", - "ref": "C" - } - } - }, - "submitted_variant": "NG_007400.1(NM_000088.3):c.589-1G>T", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "1:150550916G>A": { - "NM_001197320.1 Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001184249.1:p.(Ser94Phe)", - "HGVS_transcript_variant": "NM_001197320.1:c.281C>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MCL1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150550916", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150578440", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150550916", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150578440", - "ref": "G" - } - } - }, - "submitted_variant": "1:150550916G>A", - "transcript_description": "Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_021960.4 Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_029146.1:g.6299C>T", - "HGVS_predicted_protein_consequence": "NP_068779.1:p.(Ser247Phe)", - "HGVS_transcript_variant": "NM_021960.4:c.740C>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MCL1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150550916", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150578440", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150550916", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150578440", - "ref": "G" - } - } - }, - "submitted_variant": "1:150550916G>A", - "transcript_description": "Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA", - "validation_warnings": [] - }, - "NM_182763.2 Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_877495.1:p.?", - "HGVS_transcript_variant": "NM_182763.2:c.688+403C>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MCL1", - "genome_context_intronic_sequence": "NC_000001.10(NM_182763.2):c.688+403C>T", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150550916", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150578440", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150550916", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150578440", - "ref": "G" - } - } - }, - "submitted_variant": "1:150550916G>A", - "transcript_description": "Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "1-150550916-G-A": { - "NM_001197320.1 Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001184249.1:p.(Ser94Phe)", - "HGVS_transcript_variant": "NM_001197320.1:c.281C>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MCL1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150550916", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150578440", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150550916", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150578440", - "ref": "G" - } - } - }, - "submitted_variant": "1-150550916-G-A", - "transcript_description": "Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_021960.4 Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_029146.1:g.6299C>T", - "HGVS_predicted_protein_consequence": "NP_068779.1:p.(Ser247Phe)", - "HGVS_transcript_variant": "NM_021960.4:c.740C>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MCL1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150550916", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150578440", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150550916", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150578440", - "ref": "G" - } - } - }, - "submitted_variant": "1-150550916-G-A", - "transcript_description": "Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA", - "validation_warnings": [] - }, - "NM_182763.2 Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_877495.1:p.?", - "HGVS_transcript_variant": "NM_182763.2:c.688+403C>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MCL1", - "genome_context_intronic_sequence": "NC_000001.10(NM_182763.2):c.688+403C>T", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150550916", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "1", - "pos": "150578440", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.150550916G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150550916", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.150578440G>A", - "vcf": { - "alt": "A", - "chr": "chr1", - "pos": "150578440", - "ref": "G" - } - } - }, - "submitted_variant": "1-150550916-G-A", - "transcript_description": "Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NG_008123.1(LEPRE1_v003):c.2055+18G>A": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NG_008123.1(LEPRE1_v003):c.2055+18G>A", - "transcript_description": "", - "validation_warnings": [ - "NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation", - "For additional assistance, submit NG_008123.1:c.2055+18G>A to VariantValidator" - ] - }, - "flag": "warning" - } - }, - { - "NG_008123.1:c.2055+18G>A": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NG_008123.1:c.2055+18G>A", - "transcript_description": "", - "validation_warnings": [ - "A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit NG_008123.1:c.2055+18G>A but also specify transcripts from the following", - "select_transcripts=NM_001146289.1|NM_022356.3|NM_001243246.1" - ] - }, - "flag": "warning" - } - }, - { - "NG_008123.1(NM_022356.3):c.2055+18G>A": { - "NM_022356.3 Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_5t1:c.2055+18G>A", - "HGVS_LRG_variant": "LRG_5:g.24831G>A", - "HGVS_RefSeqGene_variant": "NG_008123.1:g.24831G>A", - "HGVS_predicted_protein_consequence": "NP_071751.3(LRG_5p1):p.?", - "HGVS_transcript_variant": "NM_022356.3:c.2055+18G>A", - "RefSeqGene_context_intronic_sequence": "NG_008123.1(NM_022356.3):c.2055+18G>A", - "alt_genomic_loci": [], - "gene_symbol": "P3H1", - "genome_context_intronic_sequence": "NC_000001.10(NM_022356.3):c.2055+18G>A", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.43212925C>T", - "vcf": { - "alt": "T", - "chr": "1", - "pos": "43212925", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.42747254C>T", - "vcf": { - "alt": "T", - "chr": "1", - "pos": "42747254", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.43212925C>T", - "vcf": { - "alt": "T", - "chr": "chr1", - "pos": "43212925", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.42747254C>T", - "vcf": { - "alt": "T", - "chr": "chr1", - "pos": "42747254", - "ref": "C" - } - } - }, - "submitted_variant": "NG_008123.1(NM_022356.3):c.2055+18G>A", - "transcript_description": "Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_021983.4:c.490G>C": { - "NM_021983.4 Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_002433.1:g.5724C>G", - "HGVS_predicted_protein_consequence": "NP_068818.4:p.(Gly164Arg)", - "HGVS_transcript_variant": "NM_021983.4:c.490G>C", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NT_167246.1:g.3848158T>G", - "vcf": { - "alt": "G", - "chr": "HSCHR6_MHC_MANN_CTG1", - "pos": "3848158", - "ref": "T" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167246.1:g.3848158T>G", - "vcf": { - "alt": "G", - "chr": "chr6_mann_hap4", - "pos": "3848158", - "ref": "T" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167246.2:g.3842538T>G", - "vcf": { - "alt": "G", - "chr": "HSCHR6_MHC_MANN_CTG1", - "pos": "3842538", - "ref": "T" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167246.2:g.3842538T>G", - "vcf": { - "alt": "G", - "chr": "chr6_GL000253v2_alt", - "pos": "3842538", - "ref": "T" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167247.1:g.3884432C>G", - "vcf": { - "alt": "G", - "chr": "HSCHR6_MHC_MCF_CTG1", - "pos": "3884432", - "ref": "C" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167247.1:g.3884432C>G", - "vcf": { - "alt": "G", - "chr": "chr6_mcf_hap5", - "pos": "3884432", - "ref": "C" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167249.1:g.3852542C>G", - "vcf": { - "alt": "G", - "chr": "HSCHR6_MHC_SSTO_CTG1", - "pos": "3852542", - "ref": "C" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167249.1:g.3852542C>G", - "vcf": { - "alt": "G", - "chr": "chr6_ssto_hap7", - "pos": "3852542", - "ref": "C" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167249.2:g.3853244C>G", - "vcf": { - "alt": "G", - "chr": "HSCHR6_MHC_SSTO_CTG1", - "pos": "3853244", - "ref": "C" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167249.2:g.3853244C>G", - "vcf": { - "alt": "G", - "chr": "chr6_GL000256v2_alt", - "pos": "3853244", - "ref": "C" - } - } - } - ], - "gene_symbol": "HLA-DRB4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_021983.4:c.490G>C", - "transcript_description": "Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA", - "validation_warnings": [ - "NM_021983.4:c.490G>C can not be mapped directly to genome build GRCh37. See Alternative genomic loci for aligned genomic positions" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_032470.3:c.4del": { - "NM_032470.3 Homo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_115859.2:p.(Arg2AlafsTer91)", - "HGVS_transcript_variant": "NM_032470.3:c.4del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NT_113891.2:g.3483644del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_COX_CTG1", - "pos": "3483643", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_113891.2:g.3483644del", - "vcf": { - "alt": "C", - "chr": "chr6_cox_hap2", - "pos": "3483643", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_113891.3:g.3483538del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_COX_CTG1", - "pos": "3483537", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_113891.3:g.3483538del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000251v2_alt", - "pos": "3483537", - "ref": "CG" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167245.1:g.3292210del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_DBB_CTG1", - "pos": "3292209", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167245.1:g.3292210del", - "vcf": { - "alt": "C", - "chr": "chr6_dbb_hap3", - "pos": "3292209", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167245.2:g.3286625del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_DBB_CTG1", - "pos": "3286624", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167245.2:g.3286625del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000252v2_alt", - "pos": "3286624", - "ref": "CG" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167247.1:g.3392834del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_MCF_CTG1", - "pos": "3392833", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167247.1:g.3392834del", - "vcf": { - "alt": "C", - "chr": "chr6_mcf_hap5", - "pos": "3392833", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167247.2:g.3387249del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_MCF_CTG1", - "pos": "3387248", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167247.2:g.3387249del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000254v2_alt", - "pos": "3387248", - "ref": "CG" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167248.1:g.3274047del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_QBL_CTG1", - "pos": "3274046", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167248.1:g.3274047del", - "vcf": { - "alt": "C", - "chr": "chr6_qbl_hap6", - "pos": "3274046", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167248.2:g.3268451del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_QBL_CTG1", - "pos": "3268450", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167248.2:g.3268451del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000255v2_alt", - "pos": "3268450", - "ref": "CG" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167249.1:g.3345701del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_SSTO_CTG1", - "pos": "3345700", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167249.1:g.3345701del", - "vcf": { - "alt": "C", - "chr": "chr6_ssto_hap7", - "pos": "3345700", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167249.2:g.3346403del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_SSTO_CTG1", - "pos": "3346402", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167249.2:g.3346403del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000256v2_alt", - "pos": "3346402", - "ref": "CG" - } - } - } - ], - "gene_symbol": "TNXB", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000006.11:g.32012993del", - "vcf": { - "alt": "C", - "chr": "6", - "pos": "32012992", - "ref": "CG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000006.12:g.32045216del", - "vcf": { - "alt": "C", - "chr": "6", - "pos": "32045215", - "ref": "CG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000006.11:g.32012993del", - "vcf": { - "alt": "C", - "chr": "chr6", - "pos": "32012992", - "ref": "CG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000006.12:g.32045216del", - "vcf": { - "alt": "C", - "chr": "chr6", - "pos": "32045215", - "ref": "CG" - } - } - }, - "submitted_variant": "NM_032470.3:c.4del", - "transcript_description": "Homo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_001194958.2:c.20C>A": { - "NM_001194958.2 Homo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_033093.1:g.15284C>A", - "HGVS_predicted_protein_consequence": "NP_001181887.2:p.(Ala7Asp)", - "HGVS_transcript_variant": "NM_001194958.2:c.20C>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_003315950.2:g.355171C>A", - "vcf": { - "alt": "A", - "chr": "HG987_PATCH", - "pos": "355171", - "ref": "C" - } - } - } - ], - "gene_symbol": "KCNJ18", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.21702806C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "21702806", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.21702806C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "21702806", - "ref": "C" - } - } - }, - "submitted_variant": "NM_001194958.2:c.20C>A", - "transcript_description": "Homo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA", - "validation_warnings": [ - "NM_001194958.2:c.20C>A can not be mapped directly to genome build GRCh37. See alt_genomic_loci for aligned genomic positions" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000022.2:c.534A>G": { - "NM_000022.2 Homo sapiens adenosine deaminase (ADA), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_16t1:c.534A>G", - "HGVS_LRG_variant": "LRG_16:g.32462A>G", - "HGVS_RefSeqGene_variant": "NG_007385.1:g.32462A>G", - "HGVS_predicted_protein_consequence": "NP_000013.2(LRG_16p1):p.(Val178=)", - "HGVS_transcript_variant": "NM_000022.2:c.534A>G", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ADA", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "20", - "pos": "43252915", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "chr20", - "pos": "43252915", - "ref": "T" - } - } - }, - "submitted_variant": "NM_000022.2:c.534A>G", - "transcript_description": "Homo sapiens adenosine deaminase (ADA), transcript variant 1, mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_000022.2 is available (NM_000022.3)", - "NM_000022.3:c.534A>G MUST be fully validated prior to use in reports", - "select_variants=NM_000022.3:c.534A>G" - ] - }, - "flag": "gene_variant" - } - }, - { - "HSCHR6_MHC_SSTO_CTG1-3852542-C-G": { - "NM_021983.4 Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_002433.1:g.5724C>G", - "HGVS_predicted_protein_consequence": "NP_068818.4:p.(Gly164Arg)", - "HGVS_transcript_variant": "NM_021983.4:c.490G>C", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NT_167246.1:g.3848158T>G", - "vcf": { - "alt": "G", - "chr": "HSCHR6_MHC_MANN_CTG1", - "pos": "3848158", - "ref": "T" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167246.1:g.3848158T>G", - "vcf": { - "alt": "G", - "chr": "chr6_mann_hap4", - "pos": "3848158", - "ref": "T" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167246.2:g.3842538T>G", - "vcf": { - "alt": "G", - "chr": "HSCHR6_MHC_MANN_CTG1", - "pos": "3842538", - "ref": "T" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167246.2:g.3842538T>G", - "vcf": { - "alt": "G", - "chr": "chr6_GL000253v2_alt", - "pos": "3842538", - "ref": "T" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167247.1:g.3884432C>G", - "vcf": { - "alt": "G", - "chr": "HSCHR6_MHC_MCF_CTG1", - "pos": "3884432", - "ref": "C" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167247.1:g.3884432C>G", - "vcf": { - "alt": "G", - "chr": "chr6_mcf_hap5", - "pos": "3884432", - "ref": "C" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167249.1:g.3852542C>G", - "vcf": { - "alt": "G", - "chr": "HSCHR6_MHC_SSTO_CTG1", - "pos": "3852542", - "ref": "C" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167249.1:g.3852542C>G", - "vcf": { - "alt": "G", - "chr": "chr6_ssto_hap7", - "pos": "3852542", - "ref": "C" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167249.2:g.3853244C>G", - "vcf": { - "alt": "G", - "chr": "HSCHR6_MHC_SSTO_CTG1", - "pos": "3853244", - "ref": "C" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167249.2:g.3853244C>G", - "vcf": { - "alt": "G", - "chr": "chr6_GL000256v2_alt", - "pos": "3853244", - "ref": "C" - } - } - } - ], - "gene_symbol": "HLA-DRB4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "HSCHR6_MHC_SSTO_CTG1-3852542-C-G", - "transcript_description": "Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA", - "validation_warnings": [ - "NM_021983.4:c.490G>C can not be mapped directly to genome build GRCh37. See Alternative genomic loci for aligned genomic positions" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000368.4:c.363+1dupG": { - "NM_000368.4 Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_486t1:c.363+1dup", - "HGVS_LRG_variant": "LRG_486:g.24048dup", - "HGVS_RefSeqGene_variant": "NG_012386.1:g.24048dup", - "HGVS_predicted_protein_consequence": "NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)", - "HGVS_transcript_variant": "NM_000368.4:c.363+1dup", - "RefSeqGene_context_intronic_sequence": "NG_012386.1(NM_000368.4):c.363+1dup", - "alt_genomic_loci": [], - "gene_symbol": "TSC1", - "genome_context_intronic_sequence": "NC_000009.11(NM_000368.4):c.363+1dup", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "135800972", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "132925585", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "135800972", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "132925585", - "ref": "AC" - } - } - }, - "submitted_variant": "NM_000368.4:c.363+1dupG", - "transcript_description": "Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA", - "validation_warnings": [ - "NM_000368.4:c.363+1dup automapped to NM_000368.4:c.363dup", - "NM_000368.4:c.363dup normalized to NM_000368.4:c.363+1dup" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000368.4:c.363dupG": { - "NM_000368.4 Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_486t1:c.363+1dup", - "HGVS_LRG_variant": "LRG_486:g.24048dup", - "HGVS_RefSeqGene_variant": "NG_012386.1:g.24048dup", - "HGVS_predicted_protein_consequence": "NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)", - "HGVS_transcript_variant": "NM_000368.4:c.363+1dup", - "RefSeqGene_context_intronic_sequence": "NG_012386.1(NM_000368.4):c.363+1dup", - "alt_genomic_loci": [], - "gene_symbol": "TSC1", - "genome_context_intronic_sequence": "NC_000009.11(NM_000368.4):c.363+1dup", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "135800972", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "132925585", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "135800972", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "132925585", - "ref": "AC" - } - } - }, - "submitted_variant": "NM_000368.4:c.363dupG", - "transcript_description": "Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA", - "validation_warnings": [ - "NM_000368.4:c.363dup normalized to NM_000368.4:c.363+1dup" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000089.3:c.1033_1035delGTT": { - "NM_000089.3 Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_2t1:c.1035_1035+2del", - "HGVS_LRG_variant": "LRG_2:g.20261_20263del", - "HGVS_RefSeqGene_variant": "NG_007405.1:g.20261_20263del", - "HGVS_predicted_protein_consequence": "NP_000080.2(LRG_2p1):p.(Val345del)", - "HGVS_transcript_variant": "NM_000089.3:c.1035_1035+2del", - "RefSeqGene_context_intronic_sequence": "NG_007405.1(NM_000089.3):c.1035_1035+2del", - "alt_genomic_loci": [], - "gene_symbol": "COL1A2", - "genome_context_intronic_sequence": "NC_000007.13(NM_000089.3):c.1035_1035+2del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000007.13:g.94039133_94039135del", - "vcf": { - "alt": "C", - "chr": "7", - "pos": "94039128", - "ref": "CTTG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000007.14:g.94409821_94409823del", - "vcf": { - "alt": "C", - "chr": "7", - "pos": "94409816", - "ref": "CTTG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000007.13:g.94039133_94039135del", - "vcf": { - "alt": "C", - "chr": "chr7", - "pos": "94039128", - "ref": "CTTG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000007.14:g.94409821_94409823del", - "vcf": { - "alt": "C", - "chr": "chr7", - "pos": "94409816", - "ref": "CTTG" - } - } - }, - "submitted_variant": "NM_000089.3:c.1033_1035delGTT", - "transcript_description": "Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA", - "validation_warnings": [ - "NM_000089.3:c.1033_1035del normalized to NM_000089.3:c.1035_1035+2del" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000089.3:c.1035_1035+2delTGT": { - "NM_000089.3 Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_2t1:c.1035_1035+2del", - "HGVS_LRG_variant": "LRG_2:g.20261_20263del", - "HGVS_RefSeqGene_variant": "NG_007405.1:g.20261_20263del", - "HGVS_predicted_protein_consequence": "NP_000080.2(LRG_2p1):p.(Val345del)", - "HGVS_transcript_variant": "NM_000089.3:c.1035_1035+2del", - "RefSeqGene_context_intronic_sequence": "NG_007405.1(NM_000089.3):c.1035_1035+2del", - "alt_genomic_loci": [], - "gene_symbol": "COL1A2", - "genome_context_intronic_sequence": "NC_000007.13(NM_000089.3):c.1035_1035+2del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000007.13:g.94039133_94039135del", - "vcf": { - "alt": "C", - "chr": "7", - "pos": "94039128", - "ref": "CTTG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000007.14:g.94409821_94409823del", - "vcf": { - "alt": "C", - "chr": "7", - "pos": "94409816", - "ref": "CTTG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000007.13:g.94039133_94039135del", - "vcf": { - "alt": "C", - "chr": "chr7", - "pos": "94039128", - "ref": "CTTG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000007.14:g.94409821_94409823del", - "vcf": { - "alt": "C", - "chr": "chr7", - "pos": "94409816", - "ref": "CTTG" - } - } - }, - "submitted_variant": "NM_000089.3:c.1035_1035+2delTGT", - "transcript_description": "Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.2023_2028delGCAAGA": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.2024_2028+1del", - "HGVS_LRG_variant": "LRG_1:g.14656_14661del", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.14656_14661del", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)", - "HGVS_transcript_variant": "NM_000088.3:c.2024_2028+1del", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.2024_2028+1del", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.2024_2028+1del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48269343_48269348del", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48269339", - "ref": "ACTCTTG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50191982_50191987del", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50191978", - "ref": "ACTCTTG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48269343_48269348del", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48269339", - "ref": "ACTCTTG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50191982_50191987del", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50191978", - "ref": "ACTCTTG" - } - } - }, - "submitted_variant": "NM_000088.3:c.2023_2028delGCAAGA", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.2023_2028del normalized to NM_000088.3:c.2024_2028+1del" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000089.3:c.938-1delG": { - "NM_000089.3 Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_2t1:c.938del", - "HGVS_LRG_variant": "LRG_2:g.20164del", - "HGVS_RefSeqGene_variant": "NG_007405.1:g.20164del", - "HGVS_predicted_protein_consequence": "NP_000080.2(LRG_2p1):p.?", - "HGVS_transcript_variant": "NM_000089.3:c.938del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000007.13:g.94039036del", - "vcf": { - "alt": "A", - "chr": "7", - "pos": "94039033", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000007.14:g.94409724del", - "vcf": { - "alt": "A", - "chr": "7", - "pos": "94409721", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000007.13:g.94039036del", - "vcf": { - "alt": "A", - "chr": "chr7", - "pos": "94039033", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000007.14:g.94409724del", - "vcf": { - "alt": "A", - "chr": "chr7", - "pos": "94409721", - "ref": "AG" - } - } - }, - "submitted_variant": "NM_000089.3:c.938-1delG", - "transcript_description": "Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA", - "validation_warnings": [ - "NM_000089.3:c.938-1del automapped to NM_000089.3:c.938del" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.589G=": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589G=", - "HGVS_LRG_variant": "LRG_1:g.8638G=", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8638G=", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Gly197=)", - "HGVS_transcript_variant": "NM_000088.3:c.589G=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C=", - "vcf": { - "alt": "C", - "chr": "17", - "pos": "48275363", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C=", - "vcf": { - "alt": "C", - "chr": "17", - "pos": "50198002", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C=", - "vcf": { - "alt": "C", - "chr": "chr17", - "pos": "48275363", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C=", - "vcf": { - "alt": "C", - "chr": "chr17", - "pos": "50198002", - "ref": "C" - } - } - }, - "submitted_variant": "NM_000088.3:c.589G=", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.642A=": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.642A=", - "HGVS_LRG_variant": "LRG_1:g.8691A=", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8691A=", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Ser214=)", - "HGVS_transcript_variant": "NM_000088.3:c.642A=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275310T=", - "vcf": { - "alt": "T", - "chr": "17", - "pos": "48275310", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50197949T=", - "vcf": { - "alt": "T", - "chr": "17", - "pos": "50197949", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275310T=", - "vcf": { - "alt": "T", - "chr": "chr17", - "pos": "48275310", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50197949T=", - "vcf": { - "alt": "T", - "chr": "chr17", - "pos": "50197949", - "ref": "T" - } - } - }, - "submitted_variant": "NM_000088.3:c.642A=", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.642+1GG>G": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_000088.3:c.642+1GG>G", - "transcript_description": "", - "validation_warnings": [ - "NM_000088.3:c.642+1GG>G automapped to NM_000088.3:c.642+1_642+2delGGinsG", - "NC_000017.10:g.48275308_48275309delCCinsC", - "Variant reference (CC) does not agree with reference sequence (AC)" - ] - }, - "flag": "warning" - } - }, - { - "NM_000088.3:c.589-2GG>G": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_000088.3:c.589-2GG>G", - "transcript_description": "", - "validation_warnings": [ - "NM_000088.3:c.589-2GG>G automapped to NM_000088.3:c.589-2_589-1delGGinsG", - "NC_000017.10:g.48275364_48275365delCCinsC", - "Variant reference (CC) does not agree with reference sequence (CT)" - ] - }, - "flag": "warning" - } - }, - { - "NM_000088.3:c.589-6_589-5insTTTT": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589-5_589-4insTTTT", - "HGVS_LRG_variant": "LRG_1:g.8633_8634insTTTT", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8633_8634insTTTT", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.589-5_589-4insTTTT", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.589-5_589-4insTTTT", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.589-5_589-4insTTTT", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275368_48275369insAAAA", - "vcf": { - "alt": "GAAAA", - "chr": "17", - "pos": "48275367", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198007_50198008insAAAA", - "vcf": { - "alt": "GAAAA", - "chr": "17", - "pos": "50198006", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275368_48275369insAAAA", - "vcf": { - "alt": "GAAAA", - "chr": "chr17", - "pos": "48275367", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198007_50198008insAAAA", - "vcf": { - "alt": "GAAAA", - "chr": "chr17", - "pos": "50198006", - "ref": "G" - } - } - }, - "submitted_variant": "NM_000088.3:c.589-6_589-5insTTTT", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.589-6_589-5insTTTT normalized to NM_000088.3:c.589-5_589-4insTTTT" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.642+3_642+4insAAAA": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.642+4_642+5insAAAA", - "HGVS_LRG_variant": "LRG_1:g.8695_8696insAAAA", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8695_8696insAAAA", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.642+4_642+5insAAAA", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.642+4_642+5insAAAA", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.642+4_642+5insAAAA", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275307_48275308insTTTT", - "vcf": { - "alt": "CTTTT", - "chr": "17", - "pos": "48275305", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50197946_50197947insTTTT", - "vcf": { - "alt": "CTTTT", - "chr": "17", - "pos": "50197944", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275307_48275308insTTTT", - "vcf": { - "alt": "CTTTT", - "chr": "chr17", - "pos": "48275305", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50197946_50197947insTTTT", - "vcf": { - "alt": "CTTTT", - "chr": "chr17", - "pos": "50197944", - "ref": "C" - } - } - }, - "submitted_variant": "NM_000088.3:c.642+3_642+4insAAAA", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.642+3_642+4insAAAA automapped to NM_000088.3:c.642+2_642+3insAAAA", - "NM_000088.3:c.642+2_642+3insAAAA normalized to NM_000088.3:c.642+4_642+5insAAAA" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.589-4_589-3insTT": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589-4_589-3insTT", - "HGVS_LRG_variant": "LRG_1:g.8634_8635insTT", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8634_8635insTT", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.589-4_589-3insTT", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.589-4_589-3insTT", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.589-4_589-3insTT", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275366_48275367insAA", - "vcf": { - "alt": "TAA", - "chr": "17", - "pos": "48275366", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198005_50198006insAA", - "vcf": { - "alt": "TAA", - "chr": "17", - "pos": "50198005", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275366_48275367insAA", - "vcf": { - "alt": "TAA", - "chr": "chr17", - "pos": "48275366", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198005_50198006insAA", - "vcf": { - "alt": "TAA", - "chr": "chr17", - "pos": "50198005", - "ref": "T" - } - } - }, - "submitted_variant": "NM_000088.3:c.589-4_589-3insTT", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.589-8del": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589-7del", - "HGVS_LRG_variant": "LRG_1:g.8631del", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8631del", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.589-7del", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.589-7del", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.589-7del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275371del", - "vcf": { - "alt": "G", - "chr": "17", - "pos": "48275369", - "ref": "GA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198010del", - "vcf": { - "alt": "G", - "chr": "17", - "pos": "50198008", - "ref": "GA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275371del", - "vcf": { - "alt": "G", - "chr": "chr17", - "pos": "48275369", - "ref": "GA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198010del", - "vcf": { - "alt": "G", - "chr": "chr17", - "pos": "50198008", - "ref": "GA" - } - } - }, - "submitted_variant": "NM_000088.3:c.589-8del", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.589-8del normalized to NM_000088.3:c.589-7del" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000527.4:c.-187_-185delCTC": { - "NM_000527.4 Homo sapiens low density lipoprotein receptor (LDLR), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_274t1:c.-187_-185del", - "HGVS_LRG_variant": "LRG_274:g.4982_4984del", - "HGVS_RefSeqGene_variant": "NG_009060.1:g.4982_4984del", - "HGVS_predicted_protein_consequence": "NP_000518.1(LRG_274p1):p.?", - "HGVS_transcript_variant": "NM_000527.4:c.-187_-185del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LDLR", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.11200038_11200040del", - "vcf": { - "alt": "A", - "chr": "19", - "pos": "11200031", - "ref": "ACTC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.11089362_11089364del", - "vcf": { - "alt": "A", - "chr": "19", - "pos": "11089355", - "ref": "ACTC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.11200038_11200040del", - "vcf": { - "alt": "A", - "chr": "chr19", - "pos": "11200031", - "ref": "ACTC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.11089362_11089364del", - "vcf": { - "alt": "A", - "chr": "chr19", - "pos": "11089355", - "ref": "ACTC" - } - } - }, - "submitted_variant": "NM_000527.4:c.-187_-185delCTC", - "transcript_description": "Homo sapiens low density lipoprotein receptor (LDLR), transcript variant 1, mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_206933.2:c.6317C>G": { - "NM_206933.2 Homo sapiens usherin (USH2A), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_009497.1:g.381958C>G", - "HGVS_predicted_protein_consequence": "NP_996816.2:p.(Thr2106Arg)", - "HGVS_transcript_variant": "NM_206933.2:c.6317C>G", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "USH2A", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.216219781A>C", - "vcf": { - "alt": "C", - "chr": "1", - "pos": "216219781", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.216046439A>C", - "vcf": { - "alt": "C", - "chr": "1", - "pos": "216046439", - "ref": "A" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.216219781A>C", - "vcf": { - "alt": "C", - "chr": "chr1", - "pos": "216219781", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.216046439A>C", - "vcf": { - "alt": "C", - "chr": "chr1", - "pos": "216046439", - "ref": "A" - } - } - }, - "submitted_variant": "NM_206933.2:c.6317C>G", - "transcript_description": "Homo sapiens usherin (USH2A), transcript variant 2, mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NC_000013.10:g.32929387T>C": { - "NM_000059.3 Homo sapiens BRCA2, DNA repair associated (BRCA2), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_293t1:c.7397C=", - "HGVS_LRG_variant": "LRG_293:g.44771C=", - "HGVS_RefSeqGene_variant": "NG_012772.3:g.44771C=", - "HGVS_predicted_protein_consequence": "NP_000050.2(LRG_293p1):p.(Ala2466=)", - "HGVS_transcript_variant": "NM_000059.3:c.7397C=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "BRCA2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000013.10:g.32929387T>C", - "vcf": { - "alt": "C", - "chr": "13", - "pos": "32929387", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000013.11:g.32355250T>C", - "vcf": { - "alt": "C", - "chr": "13", - "pos": "32355250", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000013.10:g.32929387T>C", - "vcf": { - "alt": "C", - "chr": "chr13", - "pos": "32929387", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000013.11:g.32355250T>C", - "vcf": { - "alt": "C", - "chr": "chr13", - "pos": "32355250", - "ref": "T" - } - } - }, - "submitted_variant": "NC_000013.10:g.32929387T>C", - "transcript_description": "Homo sapiens BRCA2, DNA repair associated (BRCA2), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_015102.3:c.2818-2T>A": { - "NM_015102.3 Homo sapiens nephrocystin 4 (NPHP4), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_011724.2:g.122370A=", - "HGVS_predicted_protein_consequence": "NP_055917.1:p.?", - "HGVS_transcript_variant": "NM_015102.3:c.2818-2T>A", - "RefSeqGene_context_intronic_sequence": "NG_011724.2(NM_015102.3):c.2818-2A=", - "alt_genomic_loci": [], - "gene_symbol": "NPHP4", - "genome_context_intronic_sequence": "NC_000001.10(NM_015102.3):c.2818-2T>A", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.5935162A>T", - "vcf": { - "alt": "T", - "chr": "1", - "pos": "5935162", - "ref": "A" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.5935162A>T", - "vcf": { - "alt": "T", - "chr": "chr1", - "pos": "5935162", - "ref": "A" - } - } - }, - "submitted_variant": "NM_015102.3:c.2818-2T>A", - "transcript_description": "Homo sapiens nephrocystin 4 (NPHP4), transcript variant 1, mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_015102.3 is available (NM_015102.4)", - "NM_015102.4:c.2818-2T>A MUST be fully validated prior to use in reports", - "select_variants=NM_015102.4:c.2818-2T>A" - ] - }, - "flag": "gene_variant" - } - }, - { - "19-41123094-G-GG": { - "NM_001042544.1 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29022_29024=", - "HGVS_predicted_protein_consequence": "NP_001036009.1:p.(Gln1078=)", - "HGVS_transcript_variant": "NM_001042544.1:c.3233_3235=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "40617187", - "ref": "AGG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "40617187", - "ref": "AGG" - } - } - }, - "submitted_variant": "19-41123094-G-GG", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG", - "The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37", - "NM_001042544.1:c.3233_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_001042545.1 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29022_29024=", - "HGVS_predicted_protein_consequence": "NP_001036010.1:p.(Gln1011=)", - "HGVS_transcript_variant": "NM_001042545.1:c.3032_3034=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "40617187", - "ref": "AGG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "40617187", - "ref": "AGG" - } - } - }, - "submitted_variant": "19-41123094-G-GG", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG", - "The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37", - "NM_001042545.1:c.3032_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_003573.2 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29022_29024=", - "HGVS_predicted_protein_consequence": "NP_003564.2:p.(Gln1041=)", - "HGVS_transcript_variant": "NM_003573.2:c.3122_3124=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "40617187", - "ref": "AGG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "40617187", - "ref": "AGG" - } - } - }, - "submitted_variant": "19-41123094-G-GG", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG", - "The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37", - "NM_003573.2:c.3122_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "15-72105928-AC-A": { - "NM_014249.2 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_055064.1:p.(Asp316=)", - "HGVS_transcript_variant": "NM_014249.2:c.947_948=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105933del", - "vcf": { - "alt": "A", - "chr": "15", - "pos": "72105928", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105933del", - "vcf": { - "alt": "A", - "chr": "chr15", - "pos": "72105928", - "ref": "AC" - } - } - }, - "submitted_variant": "15-72105928-AC-A", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC", - "The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37", - "Genome position NC_000015.9:g.72105930 aligns within a 1-bp gap in transcript NM_014249.2 between positions c.947_948", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)", - "NM_014249.3:c.947_948AC= MUST be fully validated prior to use in reports", - "select_variants=NM_014249.3:c.947_948AC=", - "RefSeqGene record not available" - ] - }, - "NM_014249.3 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_009113.1:g.8035_8036=", - "HGVS_predicted_protein_consequence": "NP_055064.1:p.(Asp316=)", - "HGVS_transcript_variant": "NM_014249.3:c.947_948=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105933del", - "vcf": { - "alt": "A", - "chr": "15", - "pos": "72105928", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000015.10:g.71813588_71813589=", - "vcf": { - "alt": "AC", - "chr": "15", - "pos": "71813588", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105933del", - "vcf": { - "alt": "A", - "chr": "chr15", - "pos": "72105928", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000015.10:g.71813588_71813589=", - "vcf": { - "alt": "AC", - "chr": "chr15", - "pos": "71813588", - "ref": "AC" - } - } - }, - "submitted_variant": "15-72105928-AC-A", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC", - "The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37", - "Genome position NC_000015.9:g.72105930 aligns within a 1-bp gap in transcript NM_014249.3 between positions c.947_948", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_016346.2 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_057430.1:p.(Asp316=)", - "HGVS_transcript_variant": "NM_016346.2:c.947_948=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105933del", - "vcf": { - "alt": "A", - "chr": "15", - "pos": "72105928", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105933del", - "vcf": { - "alt": "A", - "chr": "chr15", - "pos": "72105928", - "ref": "AC" - } - } - }, - "submitted_variant": "15-72105928-AC-A", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC", - "The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37", - "Genome position NC_000015.9:g.72105930 aligns within a 1-bp gap in transcript NM_016346.2 between positions c.947_948", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)", - "NM_016346.3:c.947_948AC= MUST be fully validated prior to use in reports", - "select_variants=NM_016346.3:c.947_948AC=", - "RefSeqGene record not available" - ] - }, - "NM_016346.3 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_057430.1:p.(Asp316=)", - "HGVS_transcript_variant": "NM_016346.3:c.947_948=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105933del", - "vcf": { - "alt": "A", - "chr": "15", - "pos": "72105928", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000015.10:g.71813588_71813589=", - "vcf": { - "alt": "AC", - "chr": "15", - "pos": "71813588", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105933del", - "vcf": { - "alt": "A", - "chr": "chr15", - "pos": "72105928", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000015.10:g.71813588_71813589=", - "vcf": { - "alt": "AC", - "chr": "chr15", - "pos": "71813588", - "ref": "AC" - } - } - }, - "submitted_variant": "15-72105928-AC-A", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC", - "The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37", - "Genome position NC_000015.9:g.72105930 aligns within a 1-bp gap in transcript NM_016346.3 between positions c.947_948", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "12-122064773-CCCGCCA-C": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.126_128=", - "HGVS_LRG_variant": "LRG_93:g.5299_5301=", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5299_5301=", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Ala42=)", - "HGVS_transcript_variant": "NM_032790.3:c.126_128=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302883_302888del", - "vcf": { - "alt": "C", - "chr": "HG1595_PATCH", - "pos": "302871", - "ref": "CCCGCCA" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064785_122064790del", - "vcf": { - "alt": "C", - "chr": "12", - "pos": "122064773", - "ref": "CCCGCCA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626873_121626875=", - "vcf": { - "alt": "CCC", - "chr": "12", - "pos": "121626873", - "ref": "CCC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064785_122064790del", - "vcf": { - "alt": "C", - "chr": "chr12", - "pos": "122064773", - "ref": "CCCGCCA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626873_121626875=", - "vcf": { - "alt": "CCC", - "chr": "chr12", - "pos": "121626873", - "ref": "CCC" - } - } - }, - "submitted_variant": "12-122064773-CCCGCCA-C", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "NC_000012.11:g.122064773CCCGCCA>C automapped to NC_000012.11:g.122064785_122064790del", - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "Genome position NC_000012.11:g.122064780 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "12-122064774-CCGCCA-CCGCCA": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.132_137dup", - "HGVS_LRG_variant": "LRG_93:g.5305_5310dup", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5305_5310dup", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Pro46_Pro47dup)", - "HGVS_transcript_variant": "NM_032790.3:c.132_137dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302869_302885=", - "vcf": { - "alt": "GCCCCGCCACCGCCACC", - "chr": "HG1595_PATCH", - "pos": "302869", - "ref": "GCCCCGCCACCGCCACC" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064771_122064787=", - "vcf": { - "alt": "GCCCCGCCACCGCCACC", - "chr": "12", - "pos": "122064771", - "ref": "GCCCCGCCACCGCCACC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626879_121626884dup", - "vcf": { - "alt": "CCCGCCACCGCCA", - "chr": "12", - "pos": "121626873", - "ref": "CCCGCCA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064771_122064787=", - "vcf": { - "alt": "GCCCCGCCACCGCCACC", - "chr": "chr12", - "pos": "122064771", - "ref": "GCCCCGCCACCGCCACC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626879_121626884dup", - "vcf": { - "alt": "CCCGCCACCGCCA", - "chr": "chr12", - "pos": "121626873", - "ref": "CCCGCCA" - } - } - }, - "submitted_variant": "12-122064774-CCGCCA-CCGCCA", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "NC_000012.11:g.122064774CCGCCA>CCGCCA automapped to NC_000012.11:g.122064774_122064779CCGCCA=", - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "NC_000012.11:g.122064772_122064786 contains 6 genomic base(s) that fail to align to transcript NM_032790.3", - "Genome position NC_000012.11:g.122064780 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.144_150dup", - "HGVS_LRG_variant": "LRG_93:g.5317_5323dup", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5317_5323dup", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Thr51ArgfsTer39)", - "HGVS_transcript_variant": "NM_032790.3:c.144_150dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302895_302901dup", - "vcf": { - "alt": "GCCGCCGTCCGCCGT", - "chr": "HG1595_PATCH", - "pos": "302886", - "ref": "GCCGCCGT" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064797_122064803dup", - "vcf": { - "alt": "GCCGCCGTCCGCCGT", - "chr": "12", - "pos": "122064788", - "ref": "GCCGCCGT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626891_121626897dup", - "vcf": { - "alt": "GCCGCCGTCCGCCGT", - "chr": "12", - "pos": "121626882", - "ref": "GCCGCCGT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064797_122064803dup", - "vcf": { - "alt": "GCCGCCGTCCGCCGT", - "chr": "chr12", - "pos": "122064788", - "ref": "GCCGCCGT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626891_121626897dup", - "vcf": { - "alt": "GCCGCCGTCCGCCGT", - "chr": "chr12", - "pos": "121626882", - "ref": "GCCGCCGT" - } - } - }, - "submitted_variant": "12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "NC_000012.11:g.122064773CCCGCCACCGCCACCGC>CCCGCCACCGCCGCCGTC automapped to NC_000012.11:g.122064785_122064788delinsGCCGT" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000012.11:g.122064777C>A": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.129_130insACACCG", - "HGVS_LRG_variant": "LRG_93:g.5302_5303insACACCG", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5302_5303insACACCG", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Pro43_Pro44insThrPro)", - "HGVS_transcript_variant": "NM_032790.3:c.129_130insACACCG", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302875C>A", - "vcf": { - "alt": "A", - "chr": "HG1595_PATCH", - "pos": "302875", - "ref": "C" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064777C>A", - "vcf": { - "alt": "A", - "chr": "12", - "pos": "122064777", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626876_121626877insACACCG", - "vcf": { - "alt": "CCCGACA", - "chr": "12", - "pos": "121626873", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064777C>A", - "vcf": { - "alt": "A", - "chr": "chr12", - "pos": "122064777", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626876_121626877insACACCG", - "vcf": { - "alt": "CCCGACA", - "chr": "chr12", - "pos": "121626873", - "ref": "C" - } - } - }, - "submitted_variant": "NC_000012.11:g.122064777C>A", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "NC_000012.11:g.122064777 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000012.11:g.122064776delG": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.128_129insCCACC", - "HGVS_LRG_variant": "LRG_93:g.5301_5302insCCACC", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5301_5302insCCACC", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Pro44HisfsTer22)", - "HGVS_transcript_variant": "NM_032790.3:c.128_129insCCACC", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302874del", - "vcf": { - "alt": "C", - "chr": "HG1595_PATCH", - "pos": "302873", - "ref": "CG" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064776del", - "vcf": { - "alt": "C", - "chr": "12", - "pos": "122064775", - "ref": "CG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626875_121626876insCCACC", - "vcf": { - "alt": "CCCCCA", - "chr": "12", - "pos": "121626873", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064776del", - "vcf": { - "alt": "C", - "chr": "chr12", - "pos": "122064775", - "ref": "CG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626875_121626876insCCACC", - "vcf": { - "alt": "CCCCCA", - "chr": "chr12", - "pos": "121626873", - "ref": "C" - } - } - }, - "submitted_variant": "NC_000012.11:g.122064776delG", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "NC_000012.11:g.122064775 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127", - "NC_000012.11:g.122064776 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000012.11:g.122064776dupG": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.129_130insGCCACCG", - "HGVS_LRG_variant": "LRG_93:g.5302_5303insGCCACCG", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5302_5303insGCCACCG", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Pro44AlafsTer46)", - "HGVS_transcript_variant": "NM_032790.3:c.129_130insGCCACCG", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302874dup", - "vcf": { - "alt": "CGG", - "chr": "HG1595_PATCH", - "pos": "302873", - "ref": "CG" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064776dup", - "vcf": { - "alt": "CGG", - "chr": "12", - "pos": "122064775", - "ref": "CG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626876_121626877insGCCACCG", - "vcf": { - "alt": "CCCGGCCA", - "chr": "12", - "pos": "121626873", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064776dup", - "vcf": { - "alt": "CGG", - "chr": "chr12", - "pos": "122064775", - "ref": "CG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626876_121626877insGCCACCG", - "vcf": { - "alt": "CCCGGCCA", - "chr": "chr12", - "pos": "121626873", - "ref": "C" - } - } - }, - "submitted_variant": "NC_000012.11:g.122064776dupG", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "NC_000012.11:g.122064775 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000012.11:g.122064776_122064777insTTT": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.129_130insTTTCCACCG", - "HGVS_LRG_variant": "LRG_93:g.5302_5303insTTTCCACCG", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5302_5303insTTTCCACCG", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Pro43_Pro44insPheProPro)", - "HGVS_transcript_variant": "NM_032790.3:c.129_130insTTTCCACCG", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302874_302875insTTT", - "vcf": { - "alt": "GTTT", - "chr": "HG1595_PATCH", - "pos": "302874", - "ref": "G" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064776_122064777insTTT", - "vcf": { - "alt": "GTTT", - "chr": "12", - "pos": "122064776", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626876_121626877insTTTCCACCG", - "vcf": { - "alt": "CCCGTTTCCA", - "chr": "12", - "pos": "121626873", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064776_122064777insTTT", - "vcf": { - "alt": "GTTT", - "chr": "chr12", - "pos": "122064776", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626876_121626877insTTTCCACCG", - "vcf": { - "alt": "CCCGTTTCCA", - "chr": "chr12", - "pos": "121626873", - "ref": "C" - } - } - }, - "submitted_variant": "NC_000012.11:g.122064776_122064777insTTT", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "NC_000012.11:g.122064776 is one of 7 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000012.11:g.122064772_122064775del": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.125_126delinsGCCA", - "HGVS_LRG_variant": "LRG_93:g.5298_5299delinsGCCA", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5298_5299delinsGCCA", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Ala42GlyfsTer23)", - "HGVS_transcript_variant": "NM_032790.3:c.125_126delinsGCCA", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302870_302873del", - "vcf": { - "alt": "G", - "chr": "HG1595_PATCH", - "pos": "302869", - "ref": "GCCCC" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064772_122064775del", - "vcf": { - "alt": "G", - "chr": "12", - "pos": "122064771", - "ref": "GCCCC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626872_121626873delinsGCCA", - "vcf": { - "alt": "GCCA", - "chr": "12", - "pos": "121626872", - "ref": "CC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064772_122064775del", - "vcf": { - "alt": "G", - "chr": "chr12", - "pos": "122064771", - "ref": "GCCCC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626872_121626873delinsGCCA", - "vcf": { - "alt": "GCCA", - "chr": "chr12", - "pos": "121626872", - "ref": "CC" - } - } - }, - "submitted_variant": "NC_000012.11:g.122064772_122064775del", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "Genome position NC_000012.11:g.122064776 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000012.11:g.122064772_122064775dup": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.128_129insCCCCGCCACC", - "HGVS_LRG_variant": "LRG_93:g.5301_5302insCCCCGCCACC", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5301_5302insCCCCGCCACC", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Pro45AlafsTer46)", - "HGVS_transcript_variant": "NM_032790.3:c.128_129insCCCCGCCACC", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302870_302873dup", - "vcf": { - "alt": "GCCCCCCCC", - "chr": "HG1595_PATCH", - "pos": "302869", - "ref": "GCCCC" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064772_122064775dup", - "vcf": { - "alt": "GCCCCCCCC", - "chr": "12", - "pos": "122064771", - "ref": "GCCCC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626875_121626876insCCCCGCCACC", - "vcf": { - "alt": "CCCCCCCGCCA", - "chr": "12", - "pos": "121626873", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064772_122064775dup", - "vcf": { - "alt": "GCCCCCCCC", - "chr": "chr12", - "pos": "122064771", - "ref": "GCCCC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626875_121626876insCCCCGCCACC", - "vcf": { - "alt": "CCCCCCCGCCA", - "chr": "chr12", - "pos": "121626873", - "ref": "C" - } - } - }, - "submitted_variant": "NC_000012.11:g.122064772_122064775dup", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "NC_000012.11:g.122064771_122064781 contains 6 genomic base(s) that fail to align to transcript NM_032790.3", - "Genome position NC_000012.11:g.122064776 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000012.11:g.122064773_122064774insTTTT": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.126_127insTTTTCCGCCA", - "HGVS_LRG_variant": "LRG_93:g.5299_5300insTTTTCCGCCA", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5299_5300insTTTTCCGCCA", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Pro43PhefsTer48)", - "HGVS_transcript_variant": "NM_032790.3:c.126_127insTTTTCCGCCA", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302871_302872insTTTT", - "vcf": { - "alt": "CTTTT", - "chr": "HG1595_PATCH", - "pos": "302871", - "ref": "C" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064773_122064774insTTTT", - "vcf": { - "alt": "CTTTT", - "chr": "12", - "pos": "122064773", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064773_122064774insTTTT", - "vcf": { - "alt": "CTTTT", - "chr": "chr12", - "pos": "122064773", - "ref": "C" - } - } - }, - "submitted_variant": "NC_000012.11:g.122064773_122064774insTTTT", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "NM_032790.3:c.126_127insTTTTCCGCCA can not be mapped directly to genome build GRCh37. See Alternative genomic loci for aligned genomic positions" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000012.11:g.122064772_122064777del": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.126C>A", - "HGVS_LRG_variant": "LRG_93:g.5299C>A", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5299C>A", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Ala42=)", - "HGVS_transcript_variant": "NM_032790.3:c.126C>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302871_302876del", - "vcf": { - "alt": "G", - "chr": "HG1595_PATCH", - "pos": "302869", - "ref": "GCCCCGC" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064773_122064778del", - "vcf": { - "alt": "G", - "chr": "12", - "pos": "122064771", - "ref": "GCCCCGC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626873C>A", - "vcf": { - "alt": "A", - "chr": "12", - "pos": "121626873", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064773_122064778del", - "vcf": { - "alt": "G", - "chr": "chr12", - "pos": "122064771", - "ref": "GCCCCGC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626873C>A", - "vcf": { - "alt": "A", - "chr": "chr12", - "pos": "121626873", - "ref": "C" - } - } - }, - "submitted_variant": "NC_000012.11:g.122064772_122064777del", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "Genome position NC_000012.11:g.122064778 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000012.11:g.122064772_122064777dup": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.131_132insCCCGCCACCGCC", - "HGVS_LRG_variant": "LRG_93:g.5304_5305insCCCGCCACCGCC", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5304_5305insCCCGCCACCGCC", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Pro44_Pro47dup)", - "HGVS_transcript_variant": "NM_032790.3:c.131_132insCCCGCCACCGCC", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302871_302876dup", - "vcf": { - "alt": "GCCCCGCCCCCGC", - "chr": "HG1595_PATCH", - "pos": "302869", - "ref": "GCCCCGC" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064773_122064778dup", - "vcf": { - "alt": "GCCCCGCCCCCGC", - "chr": "12", - "pos": "122064771", - "ref": "GCCCCGC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626878_121626879insCCCGCCACCGCC", - "vcf": { - "alt": "CCCGCCCCCGCCA", - "chr": "12", - "pos": "121626873", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064773_122064778dup", - "vcf": { - "alt": "GCCCCGCCCCCGC", - "chr": "chr12", - "pos": "122064771", - "ref": "GCCCCGC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626878_121626879insCCCGCCACCGCC", - "vcf": { - "alt": "CCCGCCCCCGCCA", - "chr": "chr12", - "pos": "121626873", - "ref": "C" - } - } - }, - "submitted_variant": "NC_000012.11:g.122064772_122064777dup", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "NC_000012.11:g.122064771_122064783 contains 6 genomic base(s) that fail to align to transcript NM_032790.3", - "Genome position NC_000012.11:g.122064778 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000012.11:g.122064779_122064782dup": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.135_136insACCGCCACCG", - "HGVS_LRG_variant": "LRG_93:g.5308_5309insACCGCCACCG", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5308_5309insACCGCCACCG", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Pro46ThrfsTer45)", - "HGVS_transcript_variant": "NM_032790.3:c.135_136insACCGCCACCG", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302877_302880dup", - "vcf": { - "alt": "CACCGACCG", - "chr": "HG1595_PATCH", - "pos": "302876", - "ref": "CACCG" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064779_122064782dup", - "vcf": { - "alt": "CACCGACCG", - "chr": "12", - "pos": "122064778", - "ref": "CACCG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626882_121626883insACCGCCACCG", - "vcf": { - "alt": "CCCGCCACCGA", - "chr": "12", - "pos": "121626873", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064779_122064782dup", - "vcf": { - "alt": "CACCGACCG", - "chr": "chr12", - "pos": "122064778", - "ref": "CACCG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626882_121626883insACCGCCACCG", - "vcf": { - "alt": "CCCGCCACCGA", - "chr": "chr12", - "pos": "121626873", - "ref": "C" - } - } - }, - "submitted_variant": "NC_000012.11:g.122064779_122064782dup", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "NC_000012.11:g.122064778 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000012.11:g.122064772_122064782del": { - "NM_032790.3 Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_93t1:c.127_131del", - "HGVS_LRG_variant": "LRG_93:g.5300_5304del", - "HGVS_RefSeqGene_variant": "NG_007500.1:g.5300_5304del", - "HGVS_predicted_protein_consequence": "NP_116179.2(LRG_93p1):p.(Pro43ThrfsTer43)", - "HGVS_transcript_variant": "NM_032790.3:c.127_131del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004504303.2:g.302872_302882del", - "vcf": { - "alt": "G", - "chr": "HG1595_PATCH", - "pos": "302868", - "ref": "GGCCCCGCCACC" - } - } - } - ], - "gene_symbol": "ORAI1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000012.11:g.122064774_122064784del", - "vcf": { - "alt": "G", - "chr": "12", - "pos": "122064770", - "ref": "GGCCCCGCCACC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000012.12:g.121626869_121626878del", - "vcf": { - "alt": "G", - "chr": "12", - "pos": "121626865", - "ref": "GGCCCCGCCCC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000012.11:g.122064774_122064784del", - "vcf": { - "alt": "G", - "chr": "chr12", - "pos": "122064770", - "ref": "GGCCCCGCCACC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000012.12:g.121626869_121626878del", - "vcf": { - "alt": "G", - "chr": "chr12", - "pos": "121626865", - "ref": "GGCCCCGCCCC" - } - } - }, - "submitted_variant": "NC_000012.11:g.122064772_122064782del", - "transcript_description": "Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37", - "NC_000012.11:g.122064770_122064781 contains 6 genomic base(s) that fail to align to transcript NM_032790.3", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000002.11:g.95847041_95847043GCG=": { - "NM_001017396.1 Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001017396.1:p.(Arg117dup)", - "HGVS_transcript_variant": "NM_001017396.1:c.345_347dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ZNF2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "chr2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - } - }, - "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=", - "transcript_description": "Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001017396.1 with genome build GRCh37", - "NC_000002.11:g.95847039_95847047 contains 3 genomic base(s) that fail to align to transcript NM_001017396.1", - "Genome position NC_000002.11:g.95847044 aligns within a 3-bp gap in transcript NM_001017396.1 between positions c.341_342", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_001017396.1 is available (NM_001017396.2)", - "NM_001017396.2:c.345_347dupGCG MUST be fully validated prior to use in reports", - "select_variants=NM_001017396.2:c.345_347dupGCG", - "RefSeqGene record not available" - ] - }, - "NM_001017396.2 Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001017396.1:p.(Arg117dup)", - "HGVS_transcript_variant": "NM_001017396.2:c.345_347dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ZNF2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.95181299_95181301dup", - "vcf": { - "alt": "TGCGGCG", - "chr": "2", - "pos": "95181295", - "ref": "TGCG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "chr2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.95181299_95181301dup", - "vcf": { - "alt": "TGCGGCG", - "chr": "chr2", - "pos": "95181295", - "ref": "TGCG" - } - } - }, - "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=", - "transcript_description": "Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001017396.2 with genome build GRCh37", - "Genome position NC_000002.11:g.95847044 aligns within a 3-bp gap in transcript NM_001017396.2 between positions c.341_342", - "NC_000002.11:g.95847039_95847047 contains 3 genomic base(s) that fail to align to transcript NM_001017396.2", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_001282398.1 Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001269327.1:p.(Arg121dup)", - "HGVS_transcript_variant": "NM_001282398.1:c.357_359dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ZNF2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.95181299_95181301dup", - "vcf": { - "alt": "TGCGGCG", - "chr": "2", - "pos": "95181295", - "ref": "TGCG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "chr2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.95181299_95181301dup", - "vcf": { - "alt": "TGCGGCG", - "chr": "chr2", - "pos": "95181295", - "ref": "TGCG" - } - } - }, - "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=", - "transcript_description": "Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 3, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001282398.1 with genome build GRCh37", - "Genome position NC_000002.11:g.95847044 aligns within a 3-bp gap in transcript NM_001282398.1 between positions c.353_354", - "NC_000002.11:g.95847039_95847047 contains 3 genomic base(s) that fail to align to transcript NM_001282398.1", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_001291604.1 Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 4, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001278533.1:p.(Arg79dup)", - "HGVS_transcript_variant": "NM_001291604.1:c.231_233dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ZNF2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.95181299_95181301dup", - "vcf": { - "alt": "TGCGGCG", - "chr": "2", - "pos": "95181295", - "ref": "TGCG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "chr2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.95181299_95181301dup", - "vcf": { - "alt": "TGCGGCG", - "chr": "chr2", - "pos": "95181295", - "ref": "TGCG" - } - } - }, - "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=", - "transcript_description": "Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 4, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001291604.1 with genome build GRCh37", - "NC_000002.11:g.95847039_95847047 contains 3 genomic base(s) that fail to align to transcript NM_001291604.1", - "Genome position NC_000002.11:g.95847044 aligns within a 3-bp gap in transcript NM_001291604.1 between positions c.227_228", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_001291605.1 Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 5, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001278534.1:p.(Arg172dup)", - "HGVS_transcript_variant": "NM_001291605.1:c.510_512dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ZNF2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.95181299_95181301dup", - "vcf": { - "alt": "TGCGGCG", - "chr": "2", - "pos": "95181295", - "ref": "TGCG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "chr2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.95181299_95181301dup", - "vcf": { - "alt": "TGCGGCG", - "chr": "chr2", - "pos": "95181295", - "ref": "TGCG" - } - } - }, - "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=", - "transcript_description": "Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 5, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001291605.1 with genome build GRCh37", - "NC_000002.11:g.95847039_95847047 contains 3 genomic base(s) that fail to align to transcript NM_001291605.1", - "Genome position NC_000002.11:g.95847044 aligns within a 3-bp gap in transcript NM_001291605.1 between positions c.506_507", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_021088.2 Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_066574.2:p.(Arg159dup)", - "HGVS_transcript_variant": "NM_021088.2:c.471_473dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ZNF2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "chr2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - } - }, - "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=", - "transcript_description": "Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_021088.2 with genome build GRCh37", - "Genome position NC_000002.11:g.95847044 aligns within a 3-bp gap in transcript NM_021088.2 between positions c.467_468", - "NC_000002.11:g.95847039_95847047 contains 3 genomic base(s) that fail to align to transcript NM_021088.2", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_021088.2 is available (NM_021088.3)", - "NM_021088.3:c.471_473dupGCG MUST be fully validated prior to use in reports", - "select_variants=NM_021088.3:c.471_473dupGCG", - "RefSeqGene record not available" - ] - }, - "NM_021088.3 Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_033798.1:g.20883_20885dup", - "HGVS_predicted_protein_consequence": "NP_066574.2:p.(Arg159dup)", - "HGVS_transcript_variant": "NM_021088.3:c.471_473dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ZNF2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.95181299_95181301dup", - "vcf": { - "alt": "TGCGGCG", - "chr": "2", - "pos": "95181295", - "ref": "TGCG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.95847038_95847048=", - "vcf": { - "alt": "CTTGCGGCGGC", - "chr": "chr2", - "pos": "95847038", - "ref": "CTTGCGGCGGC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.95181299_95181301dup", - "vcf": { - "alt": "TGCGGCG", - "chr": "chr2", - "pos": "95181295", - "ref": "TGCG" - } - } - }, - "submitted_variant": "NC_000002.11:g.95847041_95847043GCG=", - "transcript_description": "Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_021088.3 with genome build GRCh37", - "Genome position NC_000002.11:g.95847044 aligns within a 3-bp gap in transcript NM_021088.3 between positions c.467_468", - "NC_000002.11:g.95847039_95847047 contains 3 genomic base(s) that fail to align to transcript NM_021088.3", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=": { - "NM_001083585.1 Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001077054.1:p.?", - "HGVS_transcript_variant": "NM_001083585.1:c.*344_*368dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "RABEP1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.5286859_5286913=", - "vcf": { - "alt": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA", - "chr": "17", - "pos": "5286859", - "ref": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.5286859_5286913=", - "vcf": { - "alt": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA", - "chr": "chr17", - "pos": "5286859", - "ref": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA" - } - } - }, - "submitted_variant": "NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=", - "transcript_description": "Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001083585.1 with genome build GRCh37", - "NC_000017.10:g.5286860_5286912 contains 25 genomic base(s) that fail to align to transcript NM_001083585.1", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_001083585.1 is available (NM_001083585.2)", - "NM_001083585.2:c.*344_*368dupTAGTGTTTGGAATTTTCTGTTCATA MUST be fully validated prior to use in reports", - "select_variants=NM_001083585.2:c.*344_*368dupTAGTGTTTGGAATTTTCTGTTCATA", - "RefSeqGene record not available" - ] - }, - "NM_001083585.2 Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001077054.1:p.?", - "HGVS_transcript_variant": "NM_001083585.2:c.*344_*368dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "RABEP1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.5286859_5286913=", - "vcf": { - "alt": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA", - "chr": "17", - "pos": "5286859", - "ref": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.5383567_5383591dup", - "vcf": { - "alt": "GTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA", - "chr": "17", - "pos": "5383566", - "ref": "GTAGTGTTTGGAATTTTCTGTTCATA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.5286859_5286913=", - "vcf": { - "alt": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA", - "chr": "chr17", - "pos": "5286859", - "ref": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.5383567_5383591dup", - "vcf": { - "alt": "GTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA", - "chr": "chr17", - "pos": "5383566", - "ref": "GTAGTGTTTGGAATTTTCTGTTCATA" - } - } - }, - "submitted_variant": "NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=", - "transcript_description": "Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001083585.2 with genome build GRCh37", - "NC_000017.10:g.5286860_5286912 contains 25 genomic base(s) that fail to align to transcript NM_001083585.2", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_001291581.1 Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001278510.1:p.?", - "HGVS_transcript_variant": "NM_001291581.1:c.*344_*368dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "RABEP1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.5286859_5286913=", - "vcf": { - "alt": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA", - "chr": "17", - "pos": "5286859", - "ref": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.5383567_5383591dup", - "vcf": { - "alt": "GTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA", - "chr": "17", - "pos": "5383566", - "ref": "GTAGTGTTTGGAATTTTCTGTTCATA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.5286859_5286913=", - "vcf": { - "alt": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA", - "chr": "chr17", - "pos": "5286859", - "ref": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.5383567_5383591dup", - "vcf": { - "alt": "GTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA", - "chr": "chr17", - "pos": "5383566", - "ref": "GTAGTGTTTGGAATTTTCTGTTCATA" - } - } - }, - "submitted_variant": "NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=", - "transcript_description": "Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 3, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001291581.1 with genome build GRCh37", - "NC_000017.10:g.5286860_5286912 contains 25 genomic base(s) that fail to align to transcript NM_001291581.1", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_004703.4 Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_004694.2:p.?", - "HGVS_transcript_variant": "NM_004703.4:c.*344_*368dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "RABEP1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.5286859_5286913=", - "vcf": { - "alt": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA", - "chr": "17", - "pos": "5286859", - "ref": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.5286859_5286913=", - "vcf": { - "alt": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA", - "chr": "chr17", - "pos": "5286859", - "ref": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA" - } - } - }, - "submitted_variant": "NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=", - "transcript_description": "Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_004703.4 with genome build GRCh37", - "NC_000017.10:g.5286860_5286912 contains 25 genomic base(s) that fail to align to transcript NM_004703.4", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_004703.4 is available (NM_004703.5)", - "NM_004703.5:c.*344_*368dupTAGTGTTTGGAATTTTCTGTTCATA MUST be fully validated prior to use in reports", - "select_variants=NM_004703.5:c.*344_*368dupTAGTGTTTGGAATTTTCTGTTCATA", - "RefSeqGene record not available" - ] - }, - "NM_004703.5 Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_004694.2:p.?", - "HGVS_transcript_variant": "NM_004703.5:c.*344_*368dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "RABEP1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.5286859_5286913=", - "vcf": { - "alt": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA", - "chr": "17", - "pos": "5286859", - "ref": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.5383567_5383591dup", - "vcf": { - "alt": "GTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA", - "chr": "17", - "pos": "5383566", - "ref": "GTAGTGTTTGGAATTTTCTGTTCATA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.5286859_5286913=", - "vcf": { - "alt": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA", - "chr": "chr17", - "pos": "5286859", - "ref": "CAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.5383567_5383591dup", - "vcf": { - "alt": "GTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA", - "chr": "chr17", - "pos": "5383566", - "ref": "GTAGTGTTTGGAATTTTCTGTTCATA" - } - } - }, - "submitted_variant": "NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=", - "transcript_description": "Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_004703.5 with genome build GRCh37", - "NC_000017.10:g.5286860_5286912 contains 25 genomic base(s) that fail to align to transcript NM_004703.5", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000003.11:g.14561629_14561630GC=": { - "NM_001080423.2 Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001073892.2:p.(Ser438GlnfsTer4)", - "HGVS_transcript_variant": "NM_001080423.2:c.1311del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIP2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000003.11:g.14561629_14561632=", - "vcf": { - "alt": "GCCT", - "chr": "3", - "pos": "14561629", - "ref": "GCCT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000003.11:g.14561629_14561632=", - "vcf": { - "alt": "GCCT", - "chr": "chr3", - "pos": "14561629", - "ref": "GCCT" - } - } - }, - "submitted_variant": "NC_000003.11:g.14561629_14561630GC=", - "transcript_description": "Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001080423.2 with genome build GRCh37", - "NM_001080423.2:c.1307_1309 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)", - "NM_001080423.3:c.1311delG MUST be fully validated prior to use in reports", - "select_variants=NM_001080423.3:c.1311delG", - "RefSeqGene record not available" - ] - }, - "NM_001080423.3 Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001073892.3:p.(Ser341GlnfsTer4)", - "HGVS_transcript_variant": "NM_001080423.3:c.1020del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIP2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000003.11:g.14561629_14561632=", - "vcf": { - "alt": "GCCT", - "chr": "3", - "pos": "14561629", - "ref": "GCCT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000003.12:g.14520122del", - "vcf": { - "alt": "A", - "chr": "3", - "pos": "14520119", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000003.11:g.14561629_14561632=", - "vcf": { - "alt": "GCCT", - "chr": "chr3", - "pos": "14561629", - "ref": "GCCT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000003.12:g.14520122del", - "vcf": { - "alt": "A", - "chr": "chr3", - "pos": "14520119", - "ref": "AG" - } - } - }, - "submitted_variant": "NC_000003.11:g.14561629_14561630GC=", - "transcript_description": "Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001080423.3 with genome build GRCh37", - "NM_001080423.3:c.1016_1018 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000003.11:g.14561629_14561630insG": { - "NM_001080423.2 Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001073892.2:p.(Arg436=)", - "HGVS_transcript_variant": "NM_001080423.2:c.1307_1311=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIP2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000003.11:g.14561629dup", - "vcf": { - "alt": "AGG", - "chr": "3", - "pos": "14561627", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000003.11:g.14561629dup", - "vcf": { - "alt": "AGG", - "chr": "chr3", - "pos": "14561627", - "ref": "AG" - } - } - }, - "submitted_variant": "NC_000003.11:g.14561629_14561630insG", - "transcript_description": "Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)", - "NM_001080423.3:c.1307_1311TGTCG= MUST be fully validated prior to use in reports", - "select_variants=NM_001080423.3:c.1307_1311TGTCG=", - "RefSeqGene record not available" - ] - }, - "NM_001080423.3 Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001073892.3:p.(Arg339=)", - "HGVS_transcript_variant": "NM_001080423.3:c.1016_1020=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIP2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000003.11:g.14561629dup", - "vcf": { - "alt": "AGG", - "chr": "3", - "pos": "14561627", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000003.12:g.14520120_14520124=", - "vcf": { - "alt": "GGGCC", - "chr": "3", - "pos": "14520120", - "ref": "GGGCC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000003.11:g.14561629dup", - "vcf": { - "alt": "AGG", - "chr": "chr3", - "pos": "14561627", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000003.12:g.14520120_14520124=", - "vcf": { - "alt": "GGGCC", - "chr": "chr3", - "pos": "14520120", - "ref": "GGGCC" - } - } - }, - "submitted_variant": "NC_000003.11:g.14561629_14561630insG", - "transcript_description": "Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000004.11:g.140811111_140811122del": { - "NM_018717.4 Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_061187.2:p.(Gln489=)", - "HGVS_transcript_variant": "NM_018717.4:c.1465_1469=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MAML3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000004.11:g.140811111_140811122del", - "vcf": { - "alt": "T", - "chr": "4", - "pos": "140811063", - "ref": "TTGCTGCTGCTGC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000004.12:g.139889957_139889968del", - "vcf": { - "alt": "T", - "chr": "4", - "pos": "139889909", - "ref": "TTGCTGCTGCTGC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000004.11:g.140811111_140811122del", - "vcf": { - "alt": "T", - "chr": "chr4", - "pos": "140811063", - "ref": "TTGCTGCTGCTGC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000004.12:g.139889957_139889968del", - "vcf": { - "alt": "T", - "chr": "chr4", - "pos": "139889909", - "ref": "TTGCTGCTGCTGC" - } - } - }, - "submitted_variant": "NC_000004.11:g.140811111_140811122del", - "transcript_description": "Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37", - "NC_000004.11:g.140811063_140811075 contains 12 genomic base(s) that fail to align to transcript NM_018717.4", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=": { - "NM_018717.4 Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_061187.2:p.(Gln503_Gln506dup)", - "HGVS_transcript_variant": "NM_018717.4:c.1503_1514dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MAML3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000004.11:g.140811098_140811125=", - "vcf": { - "alt": "GCTGCTGCTGCTGCTGCTGCTGCTGTTG", - "chr": "4", - "pos": "140811098", - "ref": "GCTGCTGCTGCTGCTGCTGCTGCTGTTG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000004.12:g.139889944_139889971=", - "vcf": { - "alt": "GCTGCTGCTGCTGCTGCTGCTGCTGTTG", - "chr": "4", - "pos": "139889944", - "ref": "GCTGCTGCTGCTGCTGCTGCTGCTGTTG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000004.11:g.140811098_140811125=", - "vcf": { - "alt": "GCTGCTGCTGCTGCTGCTGCTGCTGTTG", - "chr": "chr4", - "pos": "140811098", - "ref": "GCTGCTGCTGCTGCTGCTGCTGCTGTTG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000004.12:g.139889944_139889971=", - "vcf": { - "alt": "GCTGCTGCTGCTGCTGCTGCTGCTGTTG", - "chr": "chr4", - "pos": "139889944", - "ref": "GCTGCTGCTGCTGCTGCTGCTGCTGTTG" - } - } - }, - "submitted_variant": "NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=", - "transcript_description": "Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37", - "NC_000004.11:g.140811063_140811075 contains 12 genomic base(s) that fail to align to transcript NM_018717.4", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000004.11:g.140811117_140811122del": { - "NM_018717.4 Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_061187.2:p.(Gln505_Gln506dup)", - "HGVS_transcript_variant": "NM_018717.4:c.1509_1514dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MAML3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000004.11:g.140811117_140811122del", - "vcf": { - "alt": "T", - "chr": "4", - "pos": "140811063", - "ref": "TTGCTGC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000004.12:g.139889963_139889968del", - "vcf": { - "alt": "T", - "chr": "4", - "pos": "139889909", - "ref": "TTGCTGC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000004.11:g.140811117_140811122del", - "vcf": { - "alt": "T", - "chr": "chr4", - "pos": "140811063", - "ref": "TTGCTGC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000004.12:g.139889963_139889968del", - "vcf": { - "alt": "T", - "chr": "chr4", - "pos": "139889909", - "ref": "TTGCTGC" - } - } - }, - "submitted_variant": "NC_000004.11:g.140811117_140811122del", - "transcript_description": "Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37", - "NC_000004.11:g.140811063_140811069 contains 12 genomic base(s) that fail to align to transcript NM_018717.4", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000004.11:g.140811111_140811117del": { - "NM_018717.4 Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_061187.2:p.(Gln491HisfsTer29)", - "HGVS_transcript_variant": "NM_018717.4:c.1468_1472dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MAML3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000004.11:g.140811111_140811117del", - "vcf": { - "alt": "G", - "chr": "4", - "pos": "140811110", - "ref": "GCTGCTGC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000004.12:g.139889957_139889963del", - "vcf": { - "alt": "G", - "chr": "4", - "pos": "139889956", - "ref": "GCTGCTGC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000004.11:g.140811111_140811117del", - "vcf": { - "alt": "G", - "chr": "chr4", - "pos": "140811110", - "ref": "GCTGCTGC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000004.12:g.139889957_139889963del", - "vcf": { - "alt": "G", - "chr": "chr4", - "pos": "139889956", - "ref": "GCTGCTGC" - } - } - }, - "submitted_variant": "NC_000004.11:g.140811111_140811117del", - "transcript_description": "Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37", - "NC_000004.11:g.140811110 is one of 12 genomic base(s) that fail to align to transcript NM_018717.4 between positions c.1467_1468", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000004.11:g.140811117C>A": { - "NM_018717.4 Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_061187.2:p.(Gln490_Gln491insHisGlnGlnGln)", - "HGVS_transcript_variant": "NM_018717.4:c.1472_1473insTCAGCAGCAGCA", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MAML3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000004.11:g.140811117C>A", - "vcf": { - "alt": "A", - "chr": "4", - "pos": "140811117", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000004.12:g.139889962_139889963insATGCTGCTGCTG", - "vcf": { - "alt": "CTGCTGCTGCTGA", - "chr": "4", - "pos": "139889951", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000004.11:g.140811117C>A", - "vcf": { - "alt": "A", - "chr": "chr4", - "pos": "140811117", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000004.12:g.139889962_139889963insATGCTGCTGCTG", - "vcf": { - "alt": "CTGCTGCTGCTGA", - "chr": "chr4", - "pos": "139889951", - "ref": "C" - } - } - }, - "submitted_variant": "NC_000004.11:g.140811117C>A", - "transcript_description": "Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37", - "NC_000004.11:g.140811117 is one of 12 genomic base(s) that fail to align to transcript NM_018717.4 between positions c.1467_1468", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000002.11:g.73675227_73675228insCTC": { - "NM_015120.4 Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_741t1:c.1573_1579=", - "HGVS_LRG_variant": "LRG_741:g.67345_67351=", - "HGVS_RefSeqGene_variant": "NG_011690.1:g.67345_67351=", - "HGVS_predicted_protein_consequence": "NP_055935.4(LRG_741p1):p.(Ser525=)", - "HGVS_transcript_variant": "NM_015120.4:c.1573_1579=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ALMS1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.73675228_73675230dup", - "vcf": { - "alt": "TCTCCTC", - "chr": "2", - "pos": "73675227", - "ref": "TCTC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.73448097_73448103=", - "vcf": { - "alt": "TCTCCTC", - "chr": "2", - "pos": "73448097", - "ref": "TCTCCTC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.73675228_73675230dup", - "vcf": { - "alt": "TCTCCTC", - "chr": "chr2", - "pos": "73675227", - "ref": "TCTC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.73448097_73448103=", - "vcf": { - "alt": "TCTCCTC", - "chr": "chr2", - "pos": "73448097", - "ref": "TCTCCTC" - } - } - }, - "submitted_variant": "NC_000002.11:g.73675227_73675228insCTC", - "transcript_description": "Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37", - "NM_015120.4:c.1573_1579 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "9-136132908-T-TC": { - "NM_020469.2 Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_792t1:c.260_262=", - "HGVS_LRG_variant": "LRG_792:g.20145_20147=", - "HGVS_RefSeqGene_variant": "NG_006669.1:g.20145_20147=", - "HGVS_predicted_protein_consequence": "NP_065202.2(LRG_792p1):p.(Val87=)", - "HGVS_transcript_variant": "NM_020469.2:c.260_262=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_003315925.1:g.83614_83616=", - "vcf": { - "alt": "TCA", - "chr": "HG79_PATCH", - "pos": "83614", - "ref": "TCA" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NW_009646201.1:g.83614_83616=", - "vcf": { - "alt": "TCA", - "chr": "HG2030_PATCH", - "pos": "83614", - "ref": "TCA" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NW_009646201.1:g.83614_83616=", - "vcf": { - "alt": "TCA", - "chr": "NW_009646201.1", - "pos": "83614", - "ref": "TCA" - } - } - } - ], - "gene_symbol": "ABO", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.136132908_136132909insC", - "vcf": { - "alt": "TC", - "chr": "9", - "pos": "136132908", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.133257521_133257522insC", - "vcf": { - "alt": "TC", - "chr": "9", - "pos": "133257521", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.136132908_136132909insC", - "vcf": { - "alt": "TC", - "chr": "chr9", - "pos": "136132908", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.133257521_133257522insC", - "vcf": { - "alt": "TC", - "chr": "chr9", - "pos": "133257521", - "ref": "T" - } - } - }, - "submitted_variant": "9-136132908-T-TC", - "transcript_description": "Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA", - "validation_warnings": [ - "NC_000009.11:g.136132908T>TC automapped to NC_000009.11:g.136132908_136132909insC", - "The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37", - "NM_020469.2:c.260_262 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence" - ] - }, - "flag": "gene_variant" - } - }, - { - "9-136132908-TAC-TCA": { - "NM_020469.2 Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_792t1:c.259del", - "HGVS_LRG_variant": "LRG_792:g.20144del", - "HGVS_RefSeqGene_variant": "NG_006669.1:g.20144del", - "HGVS_predicted_protein_consequence": "NP_065202.2(LRG_792p1):p.(Val87Ter)", - "HGVS_transcript_variant": "NM_020469.2:c.259del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_003315925.1:g.83618del", - "vcf": { - "alt": "A", - "chr": "HG79_PATCH", - "pos": "83616", - "ref": "AC" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NW_009646201.1:g.83618del", - "vcf": { - "alt": "A", - "chr": "HG2030_PATCH", - "pos": "83616", - "ref": "AC" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NW_009646201.1:g.83618del", - "vcf": { - "alt": "A", - "chr": "NW_009646201.1", - "pos": "83616", - "ref": "AC" - } - } - } - ], - "gene_symbol": "ABO", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.136132909_136132910delinsCA", - "vcf": { - "alt": "CA", - "chr": "9", - "pos": "136132909", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.133257522_133257523delinsCA", - "vcf": { - "alt": "CA", - "chr": "9", - "pos": "133257522", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.136132909_136132910delinsCA", - "vcf": { - "alt": "CA", - "chr": "chr9", - "pos": "136132909", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.133257522_133257523delinsCA", - "vcf": { - "alt": "CA", - "chr": "chr9", - "pos": "133257522", - "ref": "AC" - } - } - }, - "submitted_variant": "9-136132908-TAC-TCA", - "transcript_description": "Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA", - "validation_warnings": [ - "NC_000009.11:g.136132908TAC>TCA automapped to NC_000009.11:g.136132909_136132910delACinsCA", - "The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37", - "NM_020469.2:c.258_261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence" - ] - }, - "flag": "gene_variant" - } - }, - { - "9-136132908-TA-TA": { - "NM_020469.2 Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_792t1:c.261del", - "HGVS_LRG_variant": "LRG_792:g.20146del", - "HGVS_RefSeqGene_variant": "NG_006669.1:g.20146del", - "HGVS_predicted_protein_consequence": "NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)", - "HGVS_transcript_variant": "NM_020469.2:c.261del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_003315925.1:g.83615del", - "vcf": { - "alt": "T", - "chr": "HG79_PATCH", - "pos": "83614", - "ref": "TC" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NW_009646201.1:g.83615del", - "vcf": { - "alt": "T", - "chr": "HG2030_PATCH", - "pos": "83614", - "ref": "TC" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NW_009646201.1:g.83615del", - "vcf": { - "alt": "T", - "chr": "NW_009646201.1", - "pos": "83614", - "ref": "TC" - } - } - } - ], - "gene_symbol": "ABO", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.136132908_136132909=", - "vcf": { - "alt": "TA", - "chr": "9", - "pos": "136132908", - "ref": "TA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.133257521_133257522=", - "vcf": { - "alt": "TA", - "chr": "9", - "pos": "133257521", - "ref": "TA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.136132908_136132909=", - "vcf": { - "alt": "TA", - "chr": "chr9", - "pos": "136132908", - "ref": "TA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.133257521_133257522=", - "vcf": { - "alt": "TA", - "chr": "chr9", - "pos": "133257521", - "ref": "TA" - } - } - }, - "submitted_variant": "9-136132908-TA-TA", - "transcript_description": "Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA", - "validation_warnings": [ - "NC_000009.11:g.136132908TA>TA automapped to NC_000009.11:g.136132908_136132909TA=", - "The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37", - "NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_020469.2:c.258delG": { - "NM_020469.2 Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_792t1:c.259del", - "HGVS_LRG_variant": "LRG_792:g.20144del", - "HGVS_RefSeqGene_variant": "NG_006669.1:g.20144del", - "HGVS_predicted_protein_consequence": "NP_065202.2(LRG_792p1):p.(Val87Ter)", - "HGVS_transcript_variant": "NM_020469.2:c.259del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_003315925.1:g.83618del", - "vcf": { - "alt": "A", - "chr": "HG79_PATCH", - "pos": "83616", - "ref": "AC" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NW_009646201.1:g.83618del", - "vcf": { - "alt": "A", - "chr": "HG2030_PATCH", - "pos": "83616", - "ref": "AC" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NW_009646201.1:g.83618del", - "vcf": { - "alt": "A", - "chr": "NW_009646201.1", - "pos": "83616", - "ref": "AC" - } - } - } - ], - "gene_symbol": "ABO", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.136132909_136132910delinsCA", - "vcf": { - "alt": "CA", - "chr": "9", - "pos": "136132909", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.133257522_133257523delinsCA", - "vcf": { - "alt": "CA", - "chr": "9", - "pos": "133257522", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.136132909_136132910delinsCA", - "vcf": { - "alt": "CA", - "chr": "chr9", - "pos": "136132909", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.133257522_133257523delinsCA", - "vcf": { - "alt": "CA", - "chr": "chr9", - "pos": "133257522", - "ref": "AC" - } - } - }, - "submitted_variant": "NM_020469.2:c.258delG", - "transcript_description": "Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA", - "validation_warnings": [ - "NM_020469.2:c.258delG automapped to NM_020469.2:c.259delG", - "The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37", - "NM_020469.2:c.258_261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_020469.2:c.260_262TGA=": { - "NM_020469.2 Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_792t1:c.260_262=", - "HGVS_LRG_variant": "LRG_792:g.20145_20147=", - "HGVS_RefSeqGene_variant": "NG_006669.1:g.20145_20147=", - "HGVS_predicted_protein_consequence": "NP_065202.2(LRG_792p1):p.(Val87=)", - "HGVS_transcript_variant": "NM_020469.2:c.260_262=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_003315925.1:g.83614_83616=", - "vcf": { - "alt": "TCA", - "chr": "HG79_PATCH", - "pos": "83614", - "ref": "TCA" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NW_009646201.1:g.83614_83616=", - "vcf": { - "alt": "TCA", - "chr": "HG2030_PATCH", - "pos": "83614", - "ref": "TCA" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NW_009646201.1:g.83614_83616=", - "vcf": { - "alt": "TCA", - "chr": "NW_009646201.1", - "pos": "83614", - "ref": "TCA" - } - } - } - ], - "gene_symbol": "ABO", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.136132908_136132909insC", - "vcf": { - "alt": "TC", - "chr": "9", - "pos": "136132908", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.133257521_133257522insC", - "vcf": { - "alt": "TC", - "chr": "9", - "pos": "133257521", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.136132908_136132909insC", - "vcf": { - "alt": "TC", - "chr": "chr9", - "pos": "136132908", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.133257521_133257522insC", - "vcf": { - "alt": "TC", - "chr": "chr9", - "pos": "133257521", - "ref": "T" - } - } - }, - "submitted_variant": "NM_020469.2:c.260_262TGA=", - "transcript_description": "Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37", - "NM_020469.2:c.260_262 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_020469.2:c.261delG": { - "NM_020469.2 Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_792t1:c.261del", - "HGVS_LRG_variant": "LRG_792:g.20146del", - "HGVS_RefSeqGene_variant": "NG_006669.1:g.20146del", - "HGVS_predicted_protein_consequence": "NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)", - "HGVS_transcript_variant": "NM_020469.2:c.261del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_003315925.1:g.83615del", - "vcf": { - "alt": "T", - "chr": "HG79_PATCH", - "pos": "83614", - "ref": "TC" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NW_009646201.1:g.83615del", - "vcf": { - "alt": "T", - "chr": "HG2030_PATCH", - "pos": "83614", - "ref": "TC" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NW_009646201.1:g.83615del", - "vcf": { - "alt": "T", - "chr": "NW_009646201.1", - "pos": "83614", - "ref": "TC" - } - } - } - ], - "gene_symbol": "ABO", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.136132908_136132909=", - "vcf": { - "alt": "TA", - "chr": "9", - "pos": "136132908", - "ref": "TA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.133257521_133257522=", - "vcf": { - "alt": "TA", - "chr": "9", - "pos": "133257521", - "ref": "TA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.136132908_136132909=", - "vcf": { - "alt": "TA", - "chr": "chr9", - "pos": "136132908", - "ref": "TA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.133257521_133257522=", - "vcf": { - "alt": "TA", - "chr": "chr9", - "pos": "133257521", - "ref": "TA" - } - } - }, - "submitted_variant": "NM_020469.2:c.261delG", - "transcript_description": "Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37", - "NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_020469.2:c.261dupG": { - "NM_020469.2 Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_792t1:c.261dup", - "HGVS_LRG_variant": "LRG_792:g.20146dup", - "HGVS_RefSeqGene_variant": "NG_006669.1:g.20146dup", - "HGVS_predicted_protein_consequence": "NP_065202.2(LRG_792p1):p.(Thr88AspfsTer107)", - "HGVS_transcript_variant": "NM_020469.2:c.261dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_003315925.1:g.83615dup", - "vcf": { - "alt": "TCC", - "chr": "HG79_PATCH", - "pos": "83614", - "ref": "TC" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NW_009646201.1:g.83615dup", - "vcf": { - "alt": "TCC", - "chr": "HG2030_PATCH", - "pos": "83614", - "ref": "TC" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NW_009646201.1:g.83615dup", - "vcf": { - "alt": "TCC", - "chr": "NW_009646201.1", - "pos": "83614", - "ref": "TC" - } - } - } - ], - "gene_symbol": "ABO", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.136132908_136132909insCC", - "vcf": { - "alt": "TCC", - "chr": "9", - "pos": "136132908", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.133257521_133257522insCC", - "vcf": { - "alt": "TCC", - "chr": "9", - "pos": "133257521", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.136132908_136132909insCC", - "vcf": { - "alt": "TCC", - "chr": "chr9", - "pos": "136132908", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.133257521_133257522insCC", - "vcf": { - "alt": "TCC", - "chr": "chr9", - "pos": "133257521", - "ref": "T" - } - } - }, - "submitted_variant": "NM_020469.2:c.261dupG", - "transcript_description": "Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37", - "NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_020469.2:c.261_262insTT": { - "NM_020469.2 Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_792t1:c.261_262insTT", - "HGVS_LRG_variant": "LRG_792:g.20146_20147insTT", - "HGVS_RefSeqGene_variant": "NG_006669.1:g.20146_20147insTT", - "HGVS_predicted_protein_consequence": "NP_065202.2(LRG_792p1):p.(Thr88LeufsTer32)", - "HGVS_transcript_variant": "NM_020469.2:c.261_262insTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_003315925.1:g.83614_83615insAA", - "vcf": { - "alt": "TAA", - "chr": "HG79_PATCH", - "pos": "83614", - "ref": "T" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NW_009646201.1:g.83614_83615insAA", - "vcf": { - "alt": "TAA", - "chr": "HG2030_PATCH", - "pos": "83614", - "ref": "T" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NW_009646201.1:g.83614_83615insAA", - "vcf": { - "alt": "TAA", - "chr": "NW_009646201.1", - "pos": "83614", - "ref": "T" - } - } - } - ], - "gene_symbol": "ABO", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.136132909_136132910insATA", - "vcf": { - "alt": "GTAA", - "chr": "9", - "pos": "136132907", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.133257522_133257523insATA", - "vcf": { - "alt": "GTAA", - "chr": "9", - "pos": "133257520", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.136132909_136132910insATA", - "vcf": { - "alt": "GTAA", - "chr": "chr9", - "pos": "136132907", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.133257522_133257523insATA", - "vcf": { - "alt": "GTAA", - "chr": "chr9", - "pos": "133257520", - "ref": "G" - } - } - }, - "submitted_variant": "NM_020469.2:c.261_262insTT", - "transcript_description": "Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA", - "validation_warnings": [ - "The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence", - "NM_020469.2:c.261_262insTT can not be mapped directly to genome build GRCh37. See Alternative genomic loci for aligned genomic positions" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.590_591inv": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.590_591inv", - "HGVS_LRG_variant": "LRG_1:g.8639_8640inv", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8639_8640inv", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Gly197Asp)", - "HGVS_transcript_variant": "NM_000088.3:c.590_591inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275361_48275362inv", - "vcf": { - "alt": "GT", - "chr": "17", - "pos": "48275361", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198000_50198001inv", - "vcf": { - "alt": "GT", - "chr": "17", - "pos": "50198000", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275361_48275362inv", - "vcf": { - "alt": "GT", - "chr": "chr17", - "pos": "48275361", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198000_50198001inv", - "vcf": { - "alt": "GT", - "chr": "chr17", - "pos": "50198000", - "ref": "AC" - } - } - }, - "submitted_variant": "NM_000088.3:c.590_591inv", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_024989.3:c.1778_1779inv": { - "NM_024989.3 Homo sapiens post-GPI attachment to proteins 1 (PGAP1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_079265.2:p.(Phe593Ter)", - "HGVS_transcript_variant": "NM_024989.3:c.1778_1779inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PGAP1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.197729793_197729794inv", - "vcf": { - "alt": "TT", - "chr": "2", - "pos": "197729793", - "ref": "AA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.196865069_196865070inv", - "vcf": { - "alt": "TT", - "chr": "2", - "pos": "196865069", - "ref": "AA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.197729793_197729794inv", - "vcf": { - "alt": "TT", - "chr": "chr2", - "pos": "197729793", - "ref": "AA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.196865069_196865070inv", - "vcf": { - "alt": "TT", - "chr": "chr2", - "pos": "196865069", - "ref": "AA" - } - } - }, - "submitted_variant": "NM_024989.3:c.1778_1779inv", - "transcript_description": "Homo sapiens post-GPI attachment to proteins 1 (PGAP1), transcript variant 1, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_032815.3:c.555_556inv": { - "NM_032815.3 Homo sapiens nuclear factor of activated T cells 2 interacting protein (NFATC2IP), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_116204.3:p.(Glu185_Glu186delinsAspTer)", - "HGVS_transcript_variant": "NM_032815.3:c.555_556inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NFATC2IP", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.28965980_28965981inv", - "vcf": { - "alt": "CT", - "chr": "16", - "pos": "28965980", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.28954659_28954660inv", - "vcf": { - "alt": "CT", - "chr": "16", - "pos": "28954659", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.28965980_28965981inv", - "vcf": { - "alt": "CT", - "chr": "chr16", - "pos": "28965980", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.28954659_28954660inv", - "vcf": { - "alt": "CT", - "chr": "chr16", - "pos": "28954659", - "ref": "AG" - } - } - }, - "submitted_variant": "NM_032815.3:c.555_556inv", - "transcript_description": "Homo sapiens nuclear factor of activated T cells 2 interacting protein (NFATC2IP), mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_006138.4:c.3_4inv": { - "NM_006138.4 Homo sapiens membrane spanning 4-domains A3 (MS4A3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_006129.4:p.(Met1?)", - "HGVS_transcript_variant": "NM_006138.4:c.3_4inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MS4A3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000011.9:g.59828636_59828637inv", - "vcf": { - "alt": "CC", - "chr": "11", - "pos": "59828636", - "ref": "GG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000011.10:g.60061163_60061164inv", - "vcf": { - "alt": "CC", - "chr": "11", - "pos": "60061163", - "ref": "GG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000011.9:g.59828636_59828637inv", - "vcf": { - "alt": "CC", - "chr": "chr11", - "pos": "59828636", - "ref": "GG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000011.10:g.60061163_60061164inv", - "vcf": { - "alt": "CC", - "chr": "chr11", - "pos": "60061163", - "ref": "GG" - } - } - }, - "submitted_variant": "NM_006138.4:c.3_4inv", - "transcript_description": "Homo sapiens membrane spanning 4-domains A3 (MS4A3), transcript variant 1, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000038.5:c.3927_3928delAAinsTT": { - "NM_000038.5 Homo sapiens APC, WNT signaling pathway regulator (APC), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000029.2(LRG_130p1):p.(Glu1309_Lys1310delinsAspTer)", - "HGVS_transcript_variant": "NM_000038.5:c.3927_3928inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "APC", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.112175218_112175219inv", - "vcf": { - "alt": "TT", - "chr": "5", - "pos": "112175218", - "ref": "AA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000005.10:g.112839521_112839522inv", - "vcf": { - "alt": "TT", - "chr": "5", - "pos": "112839521", - "ref": "AA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.112175218_112175219inv", - "vcf": { - "alt": "TT", - "chr": "chr5", - "pos": "112175218", - "ref": "AA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000005.10:g.112839521_112839522inv", - "vcf": { - "alt": "TT", - "chr": "chr5", - "pos": "112839521", - "ref": "AA" - } - } - }, - "submitted_variant": "NM_000038.5:c.3927_3928delAAinsTT", - "transcript_description": "Homo sapiens APC, WNT signaling pathway regulator (APC), transcript variant 3, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_001034853.1:c.2847_2848delAGinsCT": { - "NM_001034853.1 Homo sapiens retinitis pigmentosa GTPase regulator (RPGR), transcript variant C, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001030025.1:p.(Glu949_Glu950delinsAspTer)", - "HGVS_transcript_variant": "NM_001034853.1:c.2847_2848inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "RPGR", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.38145404_38145405inv", - "vcf": { - "alt": "AG", - "chr": "X", - "pos": "38145404", - "ref": "CT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.38286151_38286152inv", - "vcf": { - "alt": "AG", - "chr": "X", - "pos": "38286151", - "ref": "CT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.38145404_38145405inv", - "vcf": { - "alt": "AG", - "chr": "chrX", - "pos": "38145404", - "ref": "CT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.38286151_38286152inv", - "vcf": { - "alt": "AG", - "chr": "chrX", - "pos": "38286151", - "ref": "CT" - } - } - }, - "submitted_variant": "NM_001034853.1:c.2847_2848delAGinsCT", - "transcript_description": "Homo sapiens retinitis pigmentosa GTPase regulator (RPGR), transcript variant C, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.4392_*2inv": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.4394_4395inv", - "HGVS_LRG_variant": "LRG_1:g.21137_21138inv", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.21137_21138inv", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Ter1465PheextTer27)", - "HGVS_transcript_variant": "NM_000088.3:c.4394_4395inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48262863_48262864inv", - "vcf": { - "alt": "AA", - "chr": "17", - "pos": "48262863", - "ref": "TT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50185502_50185503inv", - "vcf": { - "alt": "AA", - "chr": "17", - "pos": "50185502", - "ref": "TT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48262863_48262864inv", - "vcf": { - "alt": "AA", - "chr": "chr17", - "pos": "48262863", - "ref": "TT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50185502_50185503inv", - "vcf": { - "alt": "AA", - "chr": "chr17", - "pos": "50185502", - "ref": "TT" - } - } - }, - "submitted_variant": "NM_000088.3:c.4392_*2inv", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.4392_*2inv normalized to NM_000088.3:c.4394_4395inv" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.4392_*5inv": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.4393_*4inv", - "HGVS_LRG_variant": "LRG_1:g.21136_21142inv", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.21136_21142inv", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Ter1465GluextTer27)", - "HGVS_transcript_variant": "NM_000088.3:c.4392_*5inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48262859_48262865inv", - "vcf": { - "alt": "TAAACTC", - "chr": "17", - "pos": "48262859", - "ref": "GAGTTTA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50185498_50185504inv", - "vcf": { - "alt": "TAAACTC", - "chr": "17", - "pos": "50185498", - "ref": "GAGTTTA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48262859_48262865inv", - "vcf": { - "alt": "TAAACTC", - "chr": "chr17", - "pos": "48262859", - "ref": "GAGTTTA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50185498_50185504inv", - "vcf": { - "alt": "TAAACTC", - "chr": "chr17", - "pos": "50185498", - "ref": "GAGTTTA" - } - } - }, - "submitted_variant": "NM_000088.3:c.4392_*5inv", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.4390_*7inv": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.4390_*7inv", - "HGVS_LRG_variant": "LRG_1:g.21133_21145inv", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.21133_21145inv", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Leu1464_Ter1465delinsArgGluPheThrAlaIleProThrTrpLeuProProThrGlnProThrPheProProThrArgLysGlnThrSerAsnProAsnTer)", - "HGVS_transcript_variant": "NM_000088.3:c.4390_*7inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48262856_48262868inv", - "vcf": { - "alt": "CTGTAAACTCCCT", - "chr": "17", - "pos": "48262856", - "ref": "AGGGAGTTTACAG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50185495_50185507inv", - "vcf": { - "alt": "CTGTAAACTCCCT", - "chr": "17", - "pos": "50185495", - "ref": "AGGGAGTTTACAG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48262856_48262868inv", - "vcf": { - "alt": "CTGTAAACTCCCT", - "chr": "chr17", - "pos": "48262856", - "ref": "AGGGAGTTTACAG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50185495_50185507inv", - "vcf": { - "alt": "CTGTAAACTCCCT", - "chr": "chr17", - "pos": "50185495", - "ref": "AGGGAGTTTACAG" - } - } - }, - "submitted_variant": "NM_000088.3:c.4390_*7inv", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_005732.3:c.2923-5insT": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_005732.3:c.2923-5insT", - "transcript_description": "", - "validation_warnings": [ - "insertion length must be 1" - ] - }, - "flag": "warning" - } - }, - { - "NM_198283.1(EYS):c.*743120C>T": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_198283.1(EYS):c.*743120C>T", - "transcript_description": "", - "validation_warnings": [ - "The given coordinate is outside the bounds of the reference sequence." - ] - }, - "flag": "warning" - } - }, - { - "NM_133379.4(TTN):c.*265+26591C>T": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_133379.4(TTN):c.*265+26591C>T", - "transcript_description": "", - "validation_warnings": [ - "start or end or both are beyond the bounds of transcript record NM_133379.4" - ] - }, - "flag": "warning" - } - }, - { - "NM_000088.3:c.589-2_589-1AG>G": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589-2del", - "HGVS_LRG_variant": "LRG_1:g.8636del", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8636del", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.589-2del", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.589-2del", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.589-2del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275366del", - "vcf": { - "alt": "C", - "chr": "17", - "pos": "48275364", - "ref": "CT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198005del", - "vcf": { - "alt": "C", - "chr": "17", - "pos": "50198003", - "ref": "CT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275366del", - "vcf": { - "alt": "C", - "chr": "chr17", - "pos": "48275364", - "ref": "CT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198005del", - "vcf": { - "alt": "C", - "chr": "chr17", - "pos": "50198003", - "ref": "CT" - } - } - }, - "submitted_variant": "NM_000088.3:c.589-2_589-1AG>G", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.589-2_589-1AG>G automapped to NM_000088.3:c.589-2_589-1delAGinsG", - "NM_000088.3:c.589-2_589-1delinsG automapped to NM_000088.3:c.589-3del", - "NM_000088.3:c.589-3del normalized to NM_000088.3:c.589-2del" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.642+1_642+2delGTinsG": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.642+2del", - "HGVS_LRG_variant": "LRG_1:g.8693del", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8693del", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.642+2del", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.642+2del", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.642+2del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275308del", - "vcf": { - "alt": "T", - "chr": "17", - "pos": "48275307", - "ref": "TA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50197947del", - "vcf": { - "alt": "T", - "chr": "17", - "pos": "50197946", - "ref": "TA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275308del", - "vcf": { - "alt": "T", - "chr": "chr17", - "pos": "48275307", - "ref": "TA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50197947del", - "vcf": { - "alt": "T", - "chr": "chr17", - "pos": "50197946", - "ref": "TA" - } - } - }, - "submitted_variant": "NM_000088.3:c.642+1_642+2delGTinsG", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.642+1_642+2delinsG automapped to NM_000088.3:c.642+2del" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_004415.3:c.1-1insA": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_004415.3:c.1-1insA", - "transcript_description": "", - "validation_warnings": [ - "insertion length must be 1" - ] - }, - "flag": "warning" - } - }, - { - "NM_000273.2:c.1-5028_253del": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_000273.2:c.1-5028_253del", - "transcript_description": "", - "validation_warnings": [ - "start or end or both are beyond the bounds of transcript record" - ] - }, - "flag": "warning" - } - }, - { - "NM_002929.2:c.1006C>T": { - "NM_002929.2 Homo sapiens G protein-coupled receptor kinase 1 (GRK1), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_002920.1:p.(Leu336Phe)", - "HGVS_transcript_variant": "NM_002929.2:c.1006C>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRK1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh38": { - "HGVS_genomic_description": "NC_000013.11:g.113723094C>T", - "vcf": { - "alt": "T", - "chr": "13", - "pos": "113723094", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000013.11:g.113723094C>T", - "vcf": { - "alt": "T", - "chr": "chr13", - "pos": "113723094", - "ref": "C" - } - } - }, - "submitted_variant": "NM_002929.2:c.1006C>T", - "transcript_description": "Homo sapiens G protein-coupled receptor kinase 1 (GRK1), mRNA", - "validation_warnings": [ - "RefSeqGene record not available", - "NM_002929.2:c.1006C>T can not be mapped directly to genome build GRCh37. See alt_genomic_loci for aligned genomic positions" - ] - }, - "flag": "gene_variant" - } - }, - { - "NR_125367.1:n.167+18165G>A": { - "NR_125367.1 Homo sapiens myosin heavy chain gene cluster antisense RNA (MYHAS), long non-coding RNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "Non-coding transcript :n.", - "HGVS_transcript_variant": "NR_125367.1:n.167+18165G>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MYHAS", - "genome_context_intronic_sequence": "NC_000017.10(NR_125367.1):c.167+18165G>A", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.10327720G>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "10327720", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.10424403G>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "10424403", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.10327720G>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "10327720", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.10424403G>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "10424403", - "ref": "G" - } - } - }, - "submitted_variant": "NR_125367.1:n.167+18165G>A", - "transcript_description": "Homo sapiens myosin heavy chain gene cluster antisense RNA (MYHAS), long non-coding RNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_006005.3:c.3071_3073delinsTTA": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_006005.3:c.3071_3073delinsTTA", - "transcript_description": "", - "validation_warnings": [ - "Variant coordinate is out of the bound of CDS region (CDS length ", - "2673)" - ] - }, - "flag": "warning" - } - }, - { - "NM_000089.3:n.1504_1506del": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_000089.3:n.1504_1506del", - "transcript_description": "", - "validation_warnings": [ - "Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. Did you mean NM_000089.3:c.1504_1506del?" - ] - }, - "flag": "warning" - } - }, - { - "NC_012920.1:m.1011C>T": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_012920.1:g.1011C>T", - "vcf": { - "alt": "T", - "chr": "M", - "pos": "1011", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_012920.1:g.1011C>T", - "vcf": { - "alt": "T", - "chr": "M", - "pos": "1011", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_012920.1:g.1011C>T", - "vcf": { - "alt": "T", - "chr": "chrM", - "pos": "1011", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_012920.1:g.1011C>T", - "vcf": { - "alt": "T", - "chr": "chrM", - "pos": "1011", - "ref": "C" - } - } - }, - "submitted_variant": "NC_012920.1:m.1011C>T", - "transcript_description": "Homo sapiens mitochondrion, complete genome", - "validation_warnings": [] - }, - "flag": "warning" - } - }, - { - "NC_000006.11:g.90403795G=": { - "NM_014611.1 Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_055426.1:p.(Val3293=)", - "HGVS_transcript_variant": "NM_014611.1:c.9879T>C", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MDN1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G=", - "vcf": { - "alt": "G", - "chr": "6", - "pos": "90403795", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G=", - "vcf": { - "alt": "G", - "chr": "chr6", - "pos": "90403795", - "ref": "G" - } - } - }, - "submitted_variant": "NC_000006.11:g.90403795G=", - "transcript_description": "Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)", - "NM_014611.2:c.9879C= MUST be fully validated prior to use in reports", - "select_variants=NM_014611.2:c.9879C=", - "RefSeqGene record not available" - ] - }, - "NM_014611.2 Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_055426.1:p.(Val3293=)", - "HGVS_transcript_variant": "NM_014611.2:c.9879C=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MDN1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G=", - "vcf": { - "alt": "G", - "chr": "6", - "pos": "90403795", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000006.12:g.89694076G=", - "vcf": { - "alt": "G", - "chr": "6", - "pos": "89694076", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G=", - "vcf": { - "alt": "G", - "chr": "chr6", - "pos": "90403795", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000006.12:g.89694076G=", - "vcf": { - "alt": "G", - "chr": "chr6", - "pos": "89694076", - "ref": "G" - } - } - }, - "submitted_variant": "NC_000006.11:g.90403795G=", - "transcript_description": "Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "1-169519049-T-.": { - "NM_000130.4 Homo sapiens coagulation factor V (F5), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_553t1:c.1601G>A", - "HGVS_LRG_variant": "LRG_553:g.41721G>A", - "HGVS_RefSeqGene_variant": "NG_011806.1:g.41721G>A", - "HGVS_predicted_protein_consequence": "NP_000121.2(LRG_553p1):p.(Arg534Gln)", - "HGVS_transcript_variant": "NM_000130.4:c.1601G>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "F5", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.169519049T=", - "vcf": { - "alt": "T", - "chr": "1", - "pos": "169519049", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.169549811C>T", - "vcf": { - "alt": "T", - "chr": "1", - "pos": "169549811", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.169519049T=", - "vcf": { - "alt": "T", - "chr": "chr1", - "pos": "169519049", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.169549811C>T", - "vcf": { - "alt": "T", - "chr": "chr1", - "pos": "169549811", - "ref": "C" - } - } - }, - "submitted_variant": "1-169519049-T-.", - "transcript_description": "Homo sapiens coagulation factor V (F5), mRNA", - "validation_warnings": [ - "Not stating ALT bases is ambiguous because VCF specification 4.0 would treat 1-169519049-T-. as a deletion whereas VCF specification 4.1 onwards would treat 1-169519049-T-. as ALT = REF", - "VariantValidator has output both alternatives" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000005.9:g.35058667_35058668AG=": { - " ": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NC_000005.9:g.35058667_35058668AG=", - "transcript_description": "", - "validation_warnings": [ - "Required information for NR_037910.1 is missing from the Universal Transcript Archive, please select an alternative version of NR_037910.1 by submitting NR_037910.1 or PRLR to https://variantvalidator.org/ref_finder/, or select an alternative genome build" - ] - }, - "NM_000949.5 Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_029042.1:g.177156_177157=", - "HGVS_predicted_protein_consequence": "NP_000940.1:p.?", - "HGVS_transcript_variant": "NM_000949.5:c.*6523_*6524=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058668=", - "vcf": { - "alt": "AG", - "chr": "5", - "pos": "35058667", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058668=", - "vcf": { - "alt": "AG", - "chr": "chr5", - "pos": "35058667", - "ref": "AG" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058667_35058668AG=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_000949.5 is available (NM_000949.6)", - "NM_000949.6:c.*6523_*6524AT= MUST be fully validated prior to use in reports", - "select_variants=NM_000949.6:c.*6523_*6524AT=" - ] - }, - "NM_000949.6 Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000940.1:p.?", - "HGVS_transcript_variant": "NM_000949.6:c.*6528del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058670=", - "vcf": { - "alt": "AGAT", - "chr": "5", - "pos": "35058667", - "ref": "AGAT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000005.10:g.35058563del", - "vcf": { - "alt": "C", - "chr": "5", - "pos": "35058560", - "ref": "CA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058670=", - "vcf": { - "alt": "AGAT", - "chr": "chr5", - "pos": "35058667", - "ref": "AGAT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000005.10:g.35058563del", - "vcf": { - "alt": "C", - "chr": "chr5", - "pos": "35058560", - "ref": "CA" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058667_35058668AG=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_000949.6 with genome build GRCh37", - "NM_000949.6:c.*6524_*6526 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_001204314.1 Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001191243.1:p.?", - "HGVS_transcript_variant": "NM_001204314.1:c.*6523_*6524=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058668=", - "vcf": { - "alt": "AG", - "chr": "5", - "pos": "35058667", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058668=", - "vcf": { - "alt": "AG", - "chr": "chr5", - "pos": "35058667", - "ref": "AG" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058667_35058668AG=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_001204314.1 is available (NM_001204314.2)", - "NM_001204314.2:c.*6523_*6524AT= MUST be fully validated prior to use in reports", - "select_variants=NM_001204314.2:c.*6523_*6524AT=", - "RefSeqGene record not available" - ] - }, - "NM_001204314.2 Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001191243.1:p.?", - "HGVS_transcript_variant": "NM_001204314.2:c.*6528del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058670=", - "vcf": { - "alt": "AGAT", - "chr": "5", - "pos": "35058667", - "ref": "AGAT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000005.10:g.35058563del", - "vcf": { - "alt": "C", - "chr": "5", - "pos": "35058560", - "ref": "CA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058670=", - "vcf": { - "alt": "AGAT", - "chr": "chr5", - "pos": "35058667", - "ref": "AGAT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000005.10:g.35058563del", - "vcf": { - "alt": "C", - "chr": "chr5", - "pos": "35058560", - "ref": "CA" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058667_35058668AG=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001204314.2 with genome build GRCh37", - "NM_001204314.2:c.*6524_*6526 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_001204316.1 Homo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001191245.1:p.?", - "HGVS_transcript_variant": "NM_001204316.1:c.1009+7383_1009+7384=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "NC_000005.9(NM_001204316.1):c.1009+7383_1009+7384=", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058668_35058669del", - "vcf": { - "alt": "A", - "chr": "5", - "pos": "35058666", - "ref": "AAG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000005.10:g.35058565_35058566del", - "vcf": { - "alt": "G", - "chr": "5", - "pos": "35058564", - "ref": "GAT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058668_35058669del", - "vcf": { - "alt": "A", - "chr": "chr5", - "pos": "35058666", - "ref": "AAG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000005.10:g.35058565_35058566del", - "vcf": { - "alt": "G", - "chr": "chr5", - "pos": "35058564", - "ref": "GAT" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058667_35058668AG=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_001204317.1 Homo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001191246.1:p.?", - "HGVS_transcript_variant": "NM_001204317.1:c.856-9155_856-9154=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "NC_000005.9(NM_001204317.1):c.856-9155_856-9154=", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058668_35058669del", - "vcf": { - "alt": "A", - "chr": "5", - "pos": "35058666", - "ref": "AAG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000005.10:g.35058562_35058563del", - "vcf": { - "alt": "C", - "chr": "5", - "pos": "35058560", - "ref": "CAA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058668_35058669del", - "vcf": { - "alt": "A", - "chr": "chr5", - "pos": "35058666", - "ref": "AAG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000005.10:g.35058562_35058563del", - "vcf": { - "alt": "C", - "chr": "chr5", - "pos": "35058560", - "ref": "CAA" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058667_35058668AG=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_001204318.1 Homo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001191247.1:p.?", - "HGVS_transcript_variant": "NM_001204318.1:c.686-9155_686-9154=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "NC_000005.9(NM_001204318.1):c.686-9155_686-9154=", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058668_35058669del", - "vcf": { - "alt": "A", - "chr": "5", - "pos": "35058666", - "ref": "AAG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000005.10:g.35058562_35058563del", - "vcf": { - "alt": "C", - "chr": "5", - "pos": "35058560", - "ref": "CAA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058668_35058669del", - "vcf": { - "alt": "A", - "chr": "chr5", - "pos": "35058666", - "ref": "AAG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000005.10:g.35058562_35058563del", - "vcf": { - "alt": "C", - "chr": "chr5", - "pos": "35058560", - "ref": "CAA" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058667_35058668AG=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000251.1:c.1296_1348del": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_000251.1:c.1296_1348del", - "transcript_description": "", - "validation_warnings": [ - "Required information for NM_000251.1 is missing from the Universal Transcript Archive, please select an alternative version of NM_000251.1 by submitting NM_000251.1 or MSH2 to https://variantvalidator.org/ref_finder/, or select an alternative genome build" - ] - }, - "flag": "warning" - } - }, - { - "NM_000088.3:c.2023_2028del": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.2024_2028+1del", - "HGVS_LRG_variant": "LRG_1:g.14656_14661del", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.14656_14661del", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)", - "HGVS_transcript_variant": "NM_000088.3:c.2024_2028+1del", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.2024_2028+1del", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.2024_2028+1del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48269343_48269348del", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48269339", - "ref": "ACTCTTG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50191982_50191987del", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50191978", - "ref": "ACTCTTG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48269343_48269348del", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48269339", - "ref": "ACTCTTG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50191982_50191987del", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50191978", - "ref": "ACTCTTG" - } - } - }, - "submitted_variant": "NM_000088.3:c.2023_2028del", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.2023_2028del normalized to NM_000088.3:c.2024_2028+1del" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000088.3:c.2024_2028+1del": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.2024_2028+1del", - "HGVS_LRG_variant": "LRG_1:g.14656_14661del", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.14656_14661del", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)", - "HGVS_transcript_variant": "NM_000088.3:c.2024_2028+1del", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.2024_2028+1del", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.2024_2028+1del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48269343_48269348del", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48269339", - "ref": "ACTCTTG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50191982_50191987del", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50191978", - "ref": "ACTCTTG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48269343_48269348del", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48269339", - "ref": "ACTCTTG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50191982_50191987del", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50191978", - "ref": "ACTCTTG" - } - } - }, - "submitted_variant": "NM_000088.3:c.2024_2028+1del", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NM_000088.3:c.2024_2028+1del automapped to NM_000088.3:c.2023_2028del", - "NM_000088.3:c.2023_2028del normalized to NM_000088.3:c.2024_2028+1del" - ] - }, - "flag": "gene_variant" - } - }, - { - "ENST00000450616.1:n.31+1G>C": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "ENST00000450616.1:n.31+1G>C", - "transcript_description": "", - "validation_warnings": [ - "Unable to map ENST00000450616.1 to an equivalent RefSeq transcript" - ] - }, - "flag": "warning" - } - }, - { - "ENST00000491747:c.5071A>T": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "ENST00000491747:c.5071A>T", - "transcript_description": "", - "validation_warnings": [ - "Unable to map ENST00000491747 to an equivalent RefSeq transcript" - ] - }, - "flag": "warning" - } - }, - { - "NM_000088.3:c.589G>T": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589G>T", - "HGVS_LRG_variant": "LRG_1:g.8638G>T", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8638G>T", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Gly197Cys)", - "HGVS_transcript_variant": "NM_000088.3:c.589G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48275363", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50198002", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48275363", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50198002", - "ref": "C" - } - } - }, - "submitted_variant": "NM_000088.3:c.589G>T", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NG_007400.1:g.8638G>T": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589G>T", - "HGVS_LRG_variant": "LRG_1:g.8638G>T", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8638G>T", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Gly197Cys)", - "HGVS_transcript_variant": "NM_000088.3:c.589G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48275363", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50198002", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48275363", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50198002", - "ref": "C" - } - } - }, - "submitted_variant": "NG_007400.1:g.8638G>T", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "LRG_1:g.8638G>T": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589G>T", - "HGVS_LRG_variant": "LRG_1:g.8638G>T", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8638G>T", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Gly197Cys)", - "HGVS_transcript_variant": "NM_000088.3:c.589G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48275363", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50198002", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48275363", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50198002", - "ref": "C" - } - } - }, - "submitted_variant": "LRG_1:g.8638G>T", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "LRG_1:g.8638G>T automapped to NG_007400.1:g.8638G>T" - ] - }, - "flag": "gene_variant" - } - }, - { - "LRG_1t1:c.589G>T": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589G>T", - "HGVS_LRG_variant": "LRG_1:g.8638G>T", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8638G>T", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Gly197Cys)", - "HGVS_transcript_variant": "NM_000088.3:c.589G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48275363", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50198002", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48275363", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50198002", - "ref": "C" - } - } - }, - "submitted_variant": "LRG_1t1:c.589G>T", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "LRG_1t1:c.589G>T automapped to NM_000088.3:c.589G>T" - ] - }, - "flag": "gene_variant" - } - }, - { - "chr16:g.15832508_15832509delinsAC": { - "NM_001040113.1 Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_009299.1:g.123379_123380inv", - "HGVS_predicted_protein_consequence": "NP_001035202.1:p.(Thr1019Val)", - "HGVS_transcript_variant": "NM_001040113.1:c.3055_3056inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "HSCHR16_1_CTG1", - "pos": "1396662", - "ref": "GT" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "chr16_KI270853v1_alt", - "pos": "1396662", - "ref": "GT" - } - } - } - ], - "gene_symbol": "MYH11", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15832508", - "ref": "GT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15738651", - "ref": "GT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15832508", - "ref": "GT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15738651", - "ref": "GT" - } - } - }, - "submitted_variant": "chr16:g.15832508_15832509delinsAC", - "transcript_description": "Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA", - "validation_warnings": [] - }, - "NM_001040114.1 Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001035203.1:p.(Thr1019Val)", - "HGVS_transcript_variant": "NM_001040114.1:c.3055_3056inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "HSCHR16_1_CTG1", - "pos": "1396662", - "ref": "GT" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "chr16_KI270853v1_alt", - "pos": "1396662", - "ref": "GT" - } - } - } - ], - "gene_symbol": "MYH11", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15832508", - "ref": "GT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15738651", - "ref": "GT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15832508", - "ref": "GT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15738651", - "ref": "GT" - } - } - }, - "submitted_variant": "chr16:g.15832508_15832509delinsAC", - "transcript_description": "Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_002474.2 Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_002465.1:p.(Thr1012Val)", - "HGVS_transcript_variant": "NM_002474.2:c.3034_3035inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "HSCHR16_1_CTG1", - "pos": "1396662", - "ref": "GT" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "chr16_KI270853v1_alt", - "pos": "1396662", - "ref": "GT" - } - } - } - ], - "gene_symbol": "MYH11", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15832508", - "ref": "GT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15738651", - "ref": "GT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15832508", - "ref": "GT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15738651", - "ref": "GT" - } - } - }, - "submitted_variant": "chr16:g.15832508_15832509delinsAC", - "transcript_description": "Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_022844.2 Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_074035.1:p.(Thr1012Val)", - "HGVS_transcript_variant": "NM_022844.2:c.3034_3035inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "HSCHR16_1_CTG1", - "pos": "1396662", - "ref": "GT" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_187607.1:g.1396662_1396663inv", - "vcf": { - "alt": "AC", - "chr": "chr16_KI270853v1_alt", - "pos": "1396662", - "ref": "GT" - } - } - } - ], - "gene_symbol": "MYH11", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15832508", - "ref": "GT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "16", - "pos": "15738651", - "ref": "GT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000016.9:g.15832508_15832509inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15832508", - "ref": "GT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000016.10:g.15738651_15738652inv", - "vcf": { - "alt": "AC", - "chr": "chr16", - "pos": "15738651", - "ref": "GT" - } - } - }, - "submitted_variant": "chr16:g.15832508_15832509delinsAC", - "transcript_description": "Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NG_012386.1:g.24048dupG": { - "NM_000368.4 Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_486t1:c.363+1dup", - "HGVS_LRG_variant": "LRG_486:g.24048dup", - "HGVS_RefSeqGene_variant": "NG_012386.1:g.24048dup", - "HGVS_predicted_protein_consequence": "NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)", - "HGVS_transcript_variant": "NM_000368.4:c.363+1dup", - "RefSeqGene_context_intronic_sequence": "NG_012386.1(NM_000368.4):c.363+1dup", - "alt_genomic_loci": [], - "gene_symbol": "TSC1", - "genome_context_intronic_sequence": "NC_000009.11(NM_000368.4):c.363+1dup", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "135800972", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "132925585", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "135800972", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "132925585", - "ref": "AC" - } - } - }, - "submitted_variant": "NG_012386.1:g.24048dupG", - "transcript_description": "Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA", - "validation_warnings": [ - "NM_000368.4:c.363+1dup automapped to NM_000368.4:c.363dup", - "NM_000368.4:c.363dup normalized to NM_000368.4:c.363+1dup" - ] - }, - "NM_001162426.1 Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001155898.1:p.(Met122AspfsTer4)", - "HGVS_transcript_variant": "NM_001162426.1:c.363+1dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC1", - "genome_context_intronic_sequence": "NC_000009.11(NM_001162426.1):c.363+1dup", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "135800972", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "132925585", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "135800972", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "132925585", - "ref": "AC" - } - } - }, - "submitted_variant": "NG_012386.1:g.24048dupG", - "transcript_description": "Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA", - "validation_warnings": [ - "NM_001162426.1:c.363+1dup automapped to NM_001162426.1:c.363dup", - "NM_001162426.1:c.363dup normalized to NM_001162426.1:c.363+1dup", - "RefSeqGene record not available" - ] - }, - "NM_001162427.1 Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001155899.1:p.?", - "HGVS_transcript_variant": "NM_001162427.1:c.210+1615dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC1", - "genome_context_intronic_sequence": "NC_000009.11(NM_001162427.1):c.210+1615dup", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "135800972", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "132925585", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "135800972", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "132925585", - "ref": "AC" - } - } - }, - "submitted_variant": "NG_012386.1:g.24048dupG", - "transcript_description": "Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA", - "validation_warnings": [ - "NM_001162427.1:c.210+1615dup automapped to NM_001162427.1:c.210+1614dup", - "NM_001162427.1:c.210+1614dup normalized to NM_001162427.1:c.210+1615dup", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_033517.1:c.1307_1309delCGA": { - "NM_033517.1 Homo sapiens SH3 and multiple ankyrin repeat domains 3 (SHANK3), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_277052.1:p.(Pro436_Ser437delinsArg)", - "HGVS_transcript_variant": "NM_033517.1:c.1307_1309del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh38": { - "HGVS_genomic_description": "NW_015148969.1:g.33721_33723del", - "vcf": { - "alt": "C", - "chr": "HG1311_PATCH", - "pos": "33720", - "ref": "CCGA" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NW_015148969.1:g.33721_33723del", - "vcf": { - "alt": "C", - "chr": "NW_015148969.1", - "pos": "33720", - "ref": "CCGA" - } - } - } - ], - "gene_symbol": "SHANK3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NM_033517.1:c.1307_1309delCGA", - "transcript_description": "Homo sapiens SH3 and multiple ankyrin repeat domains 3 (SHANK3), mRNA", - "validation_warnings": [ - "RefSeqGene record not available", - "NM_033517.1:c.1307_1309delCGA can not be mapped directly to genome build GRCh37. See Alternative genomic loci for aligned genomic positions" - ] - }, - "flag": "gene_variant" - } - }, - { - "HG1311_PATCH-33720-CCGA-C": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "HG1311_PATCH-33720-CCGA-C", - "transcript_description": "", - "validation_warnings": [ - "HG1311_PATCH is not part of genome build GRCh37" - ] - }, - "flag": "warning" - } - }, - { - "2-73675227-TCTC-TCTCCTC": { - "NM_015120.4 Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_741t1:c.1573_1579=", - "HGVS_LRG_variant": "LRG_741:g.67345_67351=", - "HGVS_RefSeqGene_variant": "NG_011690.1:g.67345_67351=", - "HGVS_predicted_protein_consequence": "NP_055935.4(LRG_741p1):p.(Ser525=)", - "HGVS_transcript_variant": "NM_015120.4:c.1573_1579=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ALMS1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.73675228_73675230dup", - "vcf": { - "alt": "TCTCCTC", - "chr": "2", - "pos": "73675227", - "ref": "TCTC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.73448097_73448103=", - "vcf": { - "alt": "TCTCCTC", - "chr": "2", - "pos": "73448097", - "ref": "TCTCCTC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.73675228_73675230dup", - "vcf": { - "alt": "TCTCCTC", - "chr": "chr2", - "pos": "73675227", - "ref": "TCTC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.73448097_73448103=", - "vcf": { - "alt": "TCTCCTC", - "chr": "chr2", - "pos": "73448097", - "ref": "TCTCCTC" - } - } - }, - "submitted_variant": "2-73675227-TCTC-TCTCCTC", - "transcript_description": "Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA", - "validation_warnings": [ - "NC_000002.11:g.73675227TCTC>TCTCCTC automapped to NC_000002.11:g.73675228_73675230dupCTC", - "The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37", - "NM_015120.4:c.1573_1579 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "2-73675227-TC-TC": { - "NM_015120.4 Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_741t1:c.1577_1579del", - "HGVS_LRG_variant": "LRG_741:g.67349_67351del", - "HGVS_RefSeqGene_variant": "NG_011690.1:g.67349_67351del", - "HGVS_predicted_protein_consequence": "NP_055935.4(LRG_741p1):p.(Pro526del)", - "HGVS_transcript_variant": "NM_015120.4:c.1577_1579del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ALMS1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.73675227_73675228=", - "vcf": { - "alt": "TC", - "chr": "2", - "pos": "73675227", - "ref": "TC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.73448101_73448103del", - "vcf": { - "alt": "T", - "chr": "2", - "pos": "73448097", - "ref": "TCTC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.73675227_73675228=", - "vcf": { - "alt": "TC", - "chr": "chr2", - "pos": "73675227", - "ref": "TC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.73448101_73448103del", - "vcf": { - "alt": "T", - "chr": "chr2", - "pos": "73448097", - "ref": "TCTC" - } - } - }, - "submitted_variant": "2-73675227-TC-TC", - "transcript_description": "Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA", - "validation_warnings": [ - "NC_000002.11:g.73675227TC>TC automapped to NC_000002.11:g.73675227_73675228TC=", - "The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37", - "NM_015120.4:c.1574_1576 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "3-14561627-AG-AGG": { - "NM_001080423.2 Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001073892.2:p.(Arg436=)", - "HGVS_transcript_variant": "NM_001080423.2:c.1307_1311=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIP2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000003.11:g.14561629dup", - "vcf": { - "alt": "AGG", - "chr": "3", - "pos": "14561627", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000003.11:g.14561629dup", - "vcf": { - "alt": "AGG", - "chr": "chr3", - "pos": "14561627", - "ref": "AG" - } - } - }, - "submitted_variant": "3-14561627-AG-AGG", - "transcript_description": "Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA", - "validation_warnings": [ - "NC_000003.11:g.14561627AG>AGG automapped to NC_000003.11:g.14561629dupG", - "A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)", - "NM_001080423.3:c.1307_1311TGTCG= MUST be fully validated prior to use in reports", - "select_variants=NM_001080423.3:c.1307_1311TGTCG=", - "RefSeqGene record not available" - ] - }, - "NM_001080423.3 Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001073892.3:p.(Arg339=)", - "HGVS_transcript_variant": "NM_001080423.3:c.1016_1020=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIP2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000003.11:g.14561629dup", - "vcf": { - "alt": "AGG", - "chr": "3", - "pos": "14561627", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000003.12:g.14520120_14520124=", - "vcf": { - "alt": "GGGCC", - "chr": "3", - "pos": "14520120", - "ref": "GGGCC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000003.11:g.14561629dup", - "vcf": { - "alt": "AGG", - "chr": "chr3", - "pos": "14561627", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000003.12:g.14520120_14520124=", - "vcf": { - "alt": "GGGCC", - "chr": "chr3", - "pos": "14520120", - "ref": "GGGCC" - } - } - }, - "submitted_variant": "3-14561627-AG-AGG", - "transcript_description": "Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA", - "validation_warnings": [ - "NC_000003.11:g.14561627AG>AGG automapped to NC_000003.11:g.14561629dupG", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "3-14561630-CC-CC": { - "NM_001080423.2 Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001073892.2:p.(Ser438GlnfsTer4)", - "HGVS_transcript_variant": "NM_001080423.2:c.1311del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIP2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000003.11:g.14561629_14561632=", - "vcf": { - "alt": "GCCT", - "chr": "3", - "pos": "14561629", - "ref": "GCCT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000003.11:g.14561629_14561632=", - "vcf": { - "alt": "GCCT", - "chr": "chr3", - "pos": "14561629", - "ref": "GCCT" - } - } - }, - "submitted_variant": "3-14561630-CC-CC", - "transcript_description": "Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA", - "validation_warnings": [ - "NC_000003.11:g.14561630CC>CC automapped to NC_000003.11:g.14561630_14561631CC=", - "The displayed variants may be artefacts of aligning NM_001080423.2 with genome build GRCh37", - "NM_001080423.2:c.1307_1309 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)", - "NM_001080423.3:c.1311delG MUST be fully validated prior to use in reports", - "select_variants=NM_001080423.3:c.1311delG", - "RefSeqGene record not available" - ] - }, - "NM_001080423.3 Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001073892.3:p.(Ser341GlnfsTer4)", - "HGVS_transcript_variant": "NM_001080423.3:c.1020del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIP2", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000003.11:g.14561629_14561632=", - "vcf": { - "alt": "GCCT", - "chr": "3", - "pos": "14561629", - "ref": "GCCT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000003.12:g.14520122del", - "vcf": { - "alt": "A", - "chr": "3", - "pos": "14520119", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000003.11:g.14561629_14561632=", - "vcf": { - "alt": "GCCT", - "chr": "chr3", - "pos": "14561629", - "ref": "GCCT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000003.12:g.14520122del", - "vcf": { - "alt": "A", - "chr": "chr3", - "pos": "14520119", - "ref": "AG" - } - } - }, - "submitted_variant": "3-14561630-CC-CC", - "transcript_description": "Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA", - "validation_warnings": [ - "NC_000003.11:g.14561630CC>CC automapped to NC_000003.11:g.14561630_14561631CC=", - "The displayed variants may be artefacts of aligning NM_001080423.3 with genome build GRCh37", - "NM_001080423.3:c.1016_1018 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "6-90403795-G-G": { - "NM_014611.1 Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_055426.1:p.(Val3293=)", - "HGVS_transcript_variant": "NM_014611.1:c.9879T>C", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MDN1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G=", - "vcf": { - "alt": "G", - "chr": "6", - "pos": "90403795", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G=", - "vcf": { - "alt": "G", - "chr": "chr6", - "pos": "90403795", - "ref": "G" - } - } - }, - "submitted_variant": "6-90403795-G-G", - "transcript_description": "Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)", - "NM_014611.2:c.9879C= MUST be fully validated prior to use in reports", - "select_variants=NM_014611.2:c.9879C=", - "RefSeqGene record not available" - ] - }, - "NM_014611.2 Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_055426.1:p.(Val3293=)", - "HGVS_transcript_variant": "NM_014611.2:c.9879C=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MDN1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G=", - "vcf": { - "alt": "G", - "chr": "6", - "pos": "90403795", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000006.12:g.89694076G=", - "vcf": { - "alt": "G", - "chr": "6", - "pos": "89694076", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G=", - "vcf": { - "alt": "G", - "chr": "chr6", - "pos": "90403795", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000006.12:g.89694076G=", - "vcf": { - "alt": "G", - "chr": "chr6", - "pos": "89694076", - "ref": "G" - } - } - }, - "submitted_variant": "6-90403795-G-G", - "transcript_description": "Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "6-90403795-G-A": { - "NM_014611.1 Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_055426.1:p.(Val3293=)", - "HGVS_transcript_variant": "NM_014611.1:c.9879T=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MDN1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G>A", - "vcf": { - "alt": "A", - "chr": "6", - "pos": "90403795", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G>A", - "vcf": { - "alt": "A", - "chr": "chr6", - "pos": "90403795", - "ref": "G" - } - } - }, - "submitted_variant": "6-90403795-G-A", - "transcript_description": "Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)", - "NM_014611.2:c.9879C>T MUST be fully validated prior to use in reports", - "select_variants=NM_014611.2:c.9879C>T", - "RefSeqGene record not available" - ] - }, - "NM_014611.2 Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_055426.1:p.(Val3293=)", - "HGVS_transcript_variant": "NM_014611.2:c.9879C>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "MDN1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G>A", - "vcf": { - "alt": "A", - "chr": "6", - "pos": "90403795", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000006.12:g.89694076G>A", - "vcf": { - "alt": "A", - "chr": "6", - "pos": "89694076", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000006.11:g.90403795G>A", - "vcf": { - "alt": "A", - "chr": "chr6", - "pos": "90403795", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000006.12:g.89694076G>A", - "vcf": { - "alt": "A", - "chr": "chr6", - "pos": "89694076", - "ref": "G" - } - } - }, - "submitted_variant": "6-90403795-G-A", - "transcript_description": "Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "6-32012992-CG-C": { - "NM_019105.6 Homo sapiens tenascin XB (TNXB), transcript variant XB, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_008337.2:g.69159del", - "HGVS_predicted_protein_consequence": "NP_061978.6:p.(Arg3571AlafsTer91)", - "HGVS_transcript_variant": "NM_019105.6:c.10711del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NT_113891.2:g.3483644del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_COX_CTG1", - "pos": "3483643", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_113891.2:g.3483644del", - "vcf": { - "alt": "C", - "chr": "chr6_cox_hap2", - "pos": "3483643", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_113891.3:g.3483538del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_COX_CTG1", - "pos": "3483537", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_113891.3:g.3483538del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000251v2_alt", - "pos": "3483537", - "ref": "CG" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167245.1:g.3292210del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_DBB_CTG1", - "pos": "3292209", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167245.1:g.3292210del", - "vcf": { - "alt": "C", - "chr": "chr6_dbb_hap3", - "pos": "3292209", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167245.2:g.3286625del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_DBB_CTG1", - "pos": "3286624", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167245.2:g.3286625del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000252v2_alt", - "pos": "3286624", - "ref": "CG" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167247.1:g.3392834del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_MCF_CTG1", - "pos": "3392833", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167247.1:g.3392834del", - "vcf": { - "alt": "C", - "chr": "chr6_mcf_hap5", - "pos": "3392833", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167247.2:g.3387249del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_MCF_CTG1", - "pos": "3387248", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167247.2:g.3387249del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000254v2_alt", - "pos": "3387248", - "ref": "CG" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167248.1:g.3271861G>C", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_QBL_CTG1", - "pos": "3271861", - "ref": "G" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167248.1:g.3271861G>C", - "vcf": { - "alt": "C", - "chr": "chr6_qbl_hap6", - "pos": "3271861", - "ref": "G" - } - } - } - ], - "gene_symbol": "TNXB", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000006.11:g.32012993del", - "vcf": { - "alt": "C", - "chr": "6", - "pos": "32012992", - "ref": "CG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000006.12:g.32045216del", - "vcf": { - "alt": "C", - "chr": "6", - "pos": "32045215", - "ref": "CG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000006.11:g.32012993del", - "vcf": { - "alt": "C", - "chr": "chr6", - "pos": "32012992", - "ref": "CG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000006.12:g.32045216del", - "vcf": { - "alt": "C", - "chr": "chr6", - "pos": "32045215", - "ref": "CG" - } - } - }, - "submitted_variant": "6-32012992-CG-C", - "transcript_description": "Homo sapiens tenascin XB (TNXB), transcript variant XB, mRNA", - "validation_warnings": [ - "NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG" - ] - }, - "NM_032470.3 Homo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_115859.2:p.(Arg2AlafsTer91)", - "HGVS_transcript_variant": "NM_032470.3:c.4del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NT_113891.2:g.3483644del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_COX_CTG1", - "pos": "3483643", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_113891.2:g.3483644del", - "vcf": { - "alt": "C", - "chr": "chr6_cox_hap2", - "pos": "3483643", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_113891.3:g.3483538del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_COX_CTG1", - "pos": "3483537", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_113891.3:g.3483538del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000251v2_alt", - "pos": "3483537", - "ref": "CG" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167245.1:g.3292210del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_DBB_CTG1", - "pos": "3292209", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167245.1:g.3292210del", - "vcf": { - "alt": "C", - "chr": "chr6_dbb_hap3", - "pos": "3292209", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167245.2:g.3286625del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_DBB_CTG1", - "pos": "3286624", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167245.2:g.3286625del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000252v2_alt", - "pos": "3286624", - "ref": "CG" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167247.1:g.3392834del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_MCF_CTG1", - "pos": "3392833", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167247.1:g.3392834del", - "vcf": { - "alt": "C", - "chr": "chr6_mcf_hap5", - "pos": "3392833", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167247.2:g.3387249del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_MCF_CTG1", - "pos": "3387248", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167247.2:g.3387249del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000254v2_alt", - "pos": "3387248", - "ref": "CG" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167248.1:g.3274047del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_QBL_CTG1", - "pos": "3274046", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167248.1:g.3274047del", - "vcf": { - "alt": "C", - "chr": "chr6_qbl_hap6", - "pos": "3274046", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167248.2:g.3268451del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_QBL_CTG1", - "pos": "3268450", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167248.2:g.3268451del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000255v2_alt", - "pos": "3268450", - "ref": "CG" - } - } - }, - { - "GRCh37": { - "HGVS_genomic_description": "NT_167249.1:g.3345701del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_SSTO_CTG1", - "pos": "3345700", - "ref": "CG" - } - } - }, - { - "hg19": { - "HGVS_genomic_description": "NT_167249.1:g.3345701del", - "vcf": { - "alt": "C", - "chr": "chr6_ssto_hap7", - "pos": "3345700", - "ref": "CG" - } - } - }, - { - "GRCh38": { - "HGVS_genomic_description": "NT_167249.2:g.3346403del", - "vcf": { - "alt": "C", - "chr": "HSCHR6_MHC_SSTO_CTG1", - "pos": "3346402", - "ref": "CG" - } - } - }, - { - "hg38": { - "HGVS_genomic_description": "NT_167249.2:g.3346403del", - "vcf": { - "alt": "C", - "chr": "chr6_GL000256v2_alt", - "pos": "3346402", - "ref": "CG" - } - } - } - ], - "gene_symbol": "TNXB", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000006.11:g.32012993del", - "vcf": { - "alt": "C", - "chr": "6", - "pos": "32012992", - "ref": "CG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000006.12:g.32045216del", - "vcf": { - "alt": "C", - "chr": "6", - "pos": "32045215", - "ref": "CG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000006.11:g.32012993del", - "vcf": { - "alt": "C", - "chr": "chr6", - "pos": "32012992", - "ref": "CG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000006.12:g.32045216del", - "vcf": { - "alt": "C", - "chr": "chr6", - "pos": "32045215", - "ref": "CG" - } - } - }, - "submitted_variant": "6-32012992-CG-C", - "transcript_description": "Homo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA", - "validation_warnings": [ - "NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "17-48275363-C-A": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589G>T", - "HGVS_LRG_variant": "LRG_1:g.8638G>T", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8638G>T", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Gly197Cys)", - "HGVS_transcript_variant": "NM_000088.3:c.589G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48275363", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50198002", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275363C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48275363", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198002C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50198002", - "ref": "C" - } - } - }, - "submitted_variant": "17-48275363-C-A", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "17-48275364-C-A": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.589-1G>T", - "HGVS_LRG_variant": "LRG_1:g.8637G>T", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8637G>T", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.?", - "HGVS_transcript_variant": "NM_000088.3:c.589-1G>T", - "RefSeqGene_context_intronic_sequence": "NG_007400.1(NM_000088.3):c.589-1G>T", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "NC_000017.10(NM_000088.3):c.589-1G>T", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275364C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "48275364", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50198003C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "50198003", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275364C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "48275364", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50198003C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "50198003", - "ref": "C" - } - } - }, - "submitted_variant": "17-48275364-C-A", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "17-48275359-GGA-TCC": { - "NM_000088.3 Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_1t1:c.591_593inv", - "HGVS_LRG_variant": "LRG_1:g.8640_8642inv", - "HGVS_RefSeqGene_variant": "NG_007400.1:g.8640_8642inv", - "HGVS_predicted_protein_consequence": "NP_000079.2(LRG_1p1):p.(Pro198Asp)", - "HGVS_transcript_variant": "NM_000088.3:c.591_593inv", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "COL1A1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000017.10:g.48275359_48275361inv", - "vcf": { - "alt": "TCC", - "chr": "17", - "pos": "48275359", - "ref": "GGA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.50197998_50198000inv", - "vcf": { - "alt": "TCC", - "chr": "17", - "pos": "50197998", - "ref": "GGA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000017.10:g.48275359_48275361inv", - "vcf": { - "alt": "TCC", - "chr": "chr17", - "pos": "48275359", - "ref": "GGA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.50197998_50198000inv", - "vcf": { - "alt": "TCC", - "chr": "chr17", - "pos": "50197998", - "ref": "GGA" - } - } - }, - "submitted_variant": "17-48275359-GGA-TCC", - "transcript_description": "Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA", - "validation_warnings": [ - "NC_000017.10:g.48275359GGA>TCC automapped to NC_000017.10:g.48275359_48275361inv" - ] - }, - "flag": "gene_variant" - } - }, - { - "7-94039128-CTTG-C": { - "NM_000089.3 Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_2t1:c.1035_1035+2del", - "HGVS_LRG_variant": "LRG_2:g.20261_20263del", - "HGVS_RefSeqGene_variant": "NG_007405.1:g.20261_20263del", - "HGVS_predicted_protein_consequence": "NP_000080.2(LRG_2p1):p.(Val345del)", - "HGVS_transcript_variant": "NM_000089.3:c.1035_1035+2del", - "RefSeqGene_context_intronic_sequence": "NG_007405.1(NM_000089.3):c.1035_1035+2del", - "alt_genomic_loci": [], - "gene_symbol": "COL1A2", - "genome_context_intronic_sequence": "NC_000007.13(NM_000089.3):c.1035_1035+2del", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000007.13:g.94039133_94039135del", - "vcf": { - "alt": "C", - "chr": "7", - "pos": "94039128", - "ref": "CTTG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000007.14:g.94409821_94409823del", - "vcf": { - "alt": "C", - "chr": "7", - "pos": "94409816", - "ref": "CTTG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000007.13:g.94039133_94039135del", - "vcf": { - "alt": "C", - "chr": "chr7", - "pos": "94039128", - "ref": "CTTG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000007.14:g.94409821_94409823del", - "vcf": { - "alt": "C", - "chr": "chr7", - "pos": "94409816", - "ref": "CTTG" - } - } - }, - "submitted_variant": "7-94039128-CTTG-C", - "transcript_description": "Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA", - "validation_warnings": [ - "NC_000007.13:g.94039128CTTG>C automapped to NC_000007.13:g.94039133_94039135delTGT" - ] - }, - "flag": "gene_variant" - } - }, - { - "9-135800972-AC-ACC": { - "NM_000368.4 Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_486t1:c.363+1dup", - "HGVS_LRG_variant": "LRG_486:g.24048dup", - "HGVS_RefSeqGene_variant": "NG_012386.1:g.24048dup", - "HGVS_predicted_protein_consequence": "NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)", - "HGVS_transcript_variant": "NM_000368.4:c.363+1dup", - "RefSeqGene_context_intronic_sequence": "NG_012386.1(NM_000368.4):c.363+1dup", - "alt_genomic_loci": [], - "gene_symbol": "TSC1", - "genome_context_intronic_sequence": "NC_000009.11(NM_000368.4):c.363+1dup", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "135800972", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "132925585", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "135800972", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "132925585", - "ref": "AC" - } - } - }, - "submitted_variant": "9-135800972-AC-ACC", - "transcript_description": "Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC", - "NM_000368.4:c.363dup normalized to NM_000368.4:c.363+1dup" - ] - }, - "NM_001162426.1 Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001155898.1:p.(Met122AspfsTer4)", - "HGVS_transcript_variant": "NM_001162426.1:c.363+1dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC1", - "genome_context_intronic_sequence": "NC_000009.11(NM_001162426.1):c.363+1dup", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "135800972", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "132925585", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "135800972", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "132925585", - "ref": "AC" - } - } - }, - "submitted_variant": "9-135800972-AC-ACC", - "transcript_description": "Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA", - "validation_warnings": [ - "NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC", - "NM_001162426.1:c.363dup normalized to NM_001162426.1:c.363+1dup", - "RefSeqGene record not available" - ] - }, - "NM_001162427.1 Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001155899.1:p.?", - "HGVS_transcript_variant": "NM_001162427.1:c.210+1615dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "TSC1", - "genome_context_intronic_sequence": "NC_000009.11(NM_001162427.1):c.210+1615dup", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "135800972", - "ref": "AC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "9", - "pos": "132925585", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.135800974dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "135800972", - "ref": "AC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.132925587dup", - "vcf": { - "alt": "ACC", - "chr": "chr9", - "pos": "132925585", - "ref": "AC" - } - } - }, - "submitted_variant": "9-135800972-AC-ACC", - "transcript_description": "Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA", - "validation_warnings": [ - "NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC", - "NM_001162427.1:c.210+1615dup automapped to NM_001162427.1:c.210+1614dup", - "NM_001162427.1:c.210+1614dup normalized to NM_001162427.1:c.210+1615dup", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "1-43212925-C-T": { - "NM_001146289.1 Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_5t2:c.2073G>A", - "HGVS_LRG_variant": "LRG_5:g.24831G>A", - "HGVS_RefSeqGene_variant": "NG_008123.1:g.24831G>A", - "HGVS_predicted_protein_consequence": "NP_001139761.1:p.(Ala691=)", - "HGVS_transcript_variant": "NM_001146289.1:c.2073G>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "P3H1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.43212925C>T", - "vcf": { - "alt": "T", - "chr": "1", - "pos": "43212925", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.42747254C>T", - "vcf": { - "alt": "T", - "chr": "1", - "pos": "42747254", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.43212925C>T", - "vcf": { - "alt": "T", - "chr": "chr1", - "pos": "43212925", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.42747254C>T", - "vcf": { - "alt": "T", - "chr": "chr1", - "pos": "42747254", - "ref": "C" - } - } - }, - "submitted_variant": "1-43212925-C-T", - "transcript_description": "Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 2, mRNA", - "validation_warnings": [] - }, - "NM_001243246.1 Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001230175.1:p.(Ala691=)", - "HGVS_transcript_variant": "NM_001243246.1:c.2073G>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "P3H1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.43212925C>T", - "vcf": { - "alt": "T", - "chr": "1", - "pos": "43212925", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.42747254C>T", - "vcf": { - "alt": "T", - "chr": "1", - "pos": "42747254", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.43212925C>T", - "vcf": { - "alt": "T", - "chr": "chr1", - "pos": "43212925", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.42747254C>T", - "vcf": { - "alt": "T", - "chr": "chr1", - "pos": "42747254", - "ref": "C" - } - } - }, - "submitted_variant": "1-43212925-C-T", - "transcript_description": "Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 3, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_022356.3 Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_5t1:c.2055+18G>A", - "HGVS_LRG_variant": "LRG_5:g.24831G>A", - "HGVS_RefSeqGene_variant": "NG_008123.1:g.24831G>A", - "HGVS_predicted_protein_consequence": "NP_071751.3(LRG_5p1):p.?", - "HGVS_transcript_variant": "NM_022356.3:c.2055+18G>A", - "RefSeqGene_context_intronic_sequence": "NG_008123.1(NM_022356.3):c.2055+18G>A", - "alt_genomic_loci": [], - "gene_symbol": "P3H1", - "genome_context_intronic_sequence": "NC_000001.10(NM_022356.3):c.2055+18G>A", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.43212925C>T", - "vcf": { - "alt": "T", - "chr": "1", - "pos": "43212925", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.42747254C>T", - "vcf": { - "alt": "T", - "chr": "1", - "pos": "42747254", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.43212925C>T", - "vcf": { - "alt": "T", - "chr": "chr1", - "pos": "43212925", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.42747254C>T", - "vcf": { - "alt": "T", - "chr": "chr1", - "pos": "42747254", - "ref": "C" - } - } - }, - "submitted_variant": "1-43212925-C-T", - "transcript_description": "Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "HG987_PATCH-355171-C-A": { - "NM_001194958.2 Homo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_033093.1:g.15284C>A", - "HGVS_predicted_protein_consequence": "NP_001181887.2:p.(Ala7Asp)", - "HGVS_transcript_variant": "NM_001194958.2:c.20C>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_003315950.2:g.355171C>A", - "vcf": { - "alt": "A", - "chr": "HG987_PATCH", - "pos": "355171", - "ref": "C" - } - } - } - ], - "gene_symbol": "KCNJ18", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh38": { - "HGVS_genomic_description": "NC_000017.11:g.21702806C>A", - "vcf": { - "alt": "A", - "chr": "17", - "pos": "21702806", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000017.11:g.21702806C>A", - "vcf": { - "alt": "A", - "chr": "chr17", - "pos": "21702806", - "ref": "C" - } - } - }, - "submitted_variant": "HG987_PATCH-355171-C-A", - "transcript_description": "Homo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA", - "validation_warnings": [ - "NM_001194958.2:c.20C>A can not be mapped directly to genome build GRCh37. See alt_genomic_loci for aligned genomic positions" - ] - }, - "flag": "gene_variant" - } - }, - { - "20-43252915-T-C": { - "NM_000022.2 Homo sapiens adenosine deaminase (ADA), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_16t1:c.534A>G", - "HGVS_LRG_variant": "LRG_16:g.32462A>G", - "HGVS_RefSeqGene_variant": "NG_007385.1:g.32462A>G", - "HGVS_predicted_protein_consequence": "NP_000013.2(LRG_16p1):p.(Val178=)", - "HGVS_transcript_variant": "NM_000022.2:c.534A>G", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ADA", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "20", - "pos": "43252915", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "chr20", - "pos": "43252915", - "ref": "T" - } - } - }, - "submitted_variant": "20-43252915-T-C", - "transcript_description": "Homo sapiens adenosine deaminase (ADA), transcript variant 1, mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_000022.2 is available (NM_000022.3)", - "NM_000022.3:c.534A>G MUST be fully validated prior to use in reports", - "select_variants=NM_000022.3:c.534A>G" - ] - }, - "NM_000022.3 Homo sapiens adenosine deaminase (ADA), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000013.2(LRG_16p1):p.(Val178=)", - "HGVS_transcript_variant": "NM_000022.3:c.534A>G", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ADA", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "20", - "pos": "43252915", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000020.11:g.44624274T>C", - "vcf": { - "alt": "C", - "chr": "20", - "pos": "44624274", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "chr20", - "pos": "43252915", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000020.11:g.44624274T>C", - "vcf": { - "alt": "C", - "chr": "chr20", - "pos": "44624274", - "ref": "T" - } - } - }, - "submitted_variant": "20-43252915-T-C", - "transcript_description": "Homo sapiens adenosine deaminase (ADA), transcript variant 1, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_001322050.1 Homo sapiens adenosine deaminase (ADA), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001308979.1:p.(Val43=)", - "HGVS_transcript_variant": "NM_001322050.1:c.129A>G", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ADA", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "20", - "pos": "43252915", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000020.11:g.44624274T>C", - "vcf": { - "alt": "C", - "chr": "20", - "pos": "44624274", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "chr20", - "pos": "43252915", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000020.11:g.44624274T>C", - "vcf": { - "alt": "C", - "chr": "chr20", - "pos": "44624274", - "ref": "T" - } - } - }, - "submitted_variant": "20-43252915-T-C", - "transcript_description": "Homo sapiens adenosine deaminase (ADA), transcript variant 2, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_001322051.1 Homo sapiens adenosine deaminase (ADA), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001308980.1:p.(Val178=)", - "HGVS_transcript_variant": "NM_001322051.1:c.534A>G", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ADA", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "20", - "pos": "43252915", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000020.11:g.44624274T>C", - "vcf": { - "alt": "C", - "chr": "20", - "pos": "44624274", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "chr20", - "pos": "43252915", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000020.11:g.44624274T>C", - "vcf": { - "alt": "C", - "chr": "chr20", - "pos": "44624274", - "ref": "T" - } - } - }, - "submitted_variant": "20-43252915-T-C", - "transcript_description": "Homo sapiens adenosine deaminase (ADA), transcript variant 3, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NR_136160.1 Homo sapiens adenosine deaminase (ADA), transcript variant 4, non-coding RNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "Non-coding transcript :n.", - "HGVS_transcript_variant": "NR_136160.1:n.685A>G", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ADA", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "20", - "pos": "43252915", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000020.11:g.44624274T>C", - "vcf": { - "alt": "C", - "chr": "20", - "pos": "44624274", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000020.10:g.43252915T>C", - "vcf": { - "alt": "C", - "chr": "chr20", - "pos": "43252915", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000020.11:g.44624274T>C", - "vcf": { - "alt": "C", - "chr": "chr20", - "pos": "44624274", - "ref": "T" - } - } - }, - "submitted_variant": "20-43252915-T-C", - "transcript_description": "Homo sapiens adenosine deaminase (ADA), transcript variant 4, non-coding RNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "1-216219781-A-C": { - "NM_206933.2 Homo sapiens usherin (USH2A), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_009497.1:g.381958C>G", - "HGVS_predicted_protein_consequence": "NP_996816.2:p.(Thr2106Arg)", - "HGVS_transcript_variant": "NM_206933.2:c.6317C>G", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "USH2A", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000001.10:g.216219781A>C", - "vcf": { - "alt": "C", - "chr": "1", - "pos": "216219781", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000001.11:g.216046439A>C", - "vcf": { - "alt": "C", - "chr": "1", - "pos": "216046439", - "ref": "A" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000001.10:g.216219781A>C", - "vcf": { - "alt": "C", - "chr": "chr1", - "pos": "216219781", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000001.11:g.216046439A>C", - "vcf": { - "alt": "C", - "chr": "chr1", - "pos": "216046439", - "ref": "A" - } - } - }, - "submitted_variant": "1-216219781-A-C", - "transcript_description": "Homo sapiens usherin (USH2A), transcript variant 2, mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "2-209113113-G-A,C,T": { - "NM_001282386.1 Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001269315.1:p.(Arg132Ser)", - "HGVS_transcript_variant": "NM_001282386.1:c.394C>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "IDH1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.209113113G>T", - "vcf": { - "alt": "T", - "chr": "2", - "pos": "209113113", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.208248389G>T", - "vcf": { - "alt": "T", - "chr": "2", - "pos": "208248389", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.209113113G>T", - "vcf": { - "alt": "T", - "chr": "chr2", - "pos": "209113113", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.208248389G>T", - "vcf": { - "alt": "T", - "chr": "chr2", - "pos": "208248389", - "ref": "G" - } - } - }, - "submitted_variant": "2-209113113-G-A,C,T", - "transcript_description": "Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA", - "validation_warnings": [ - "Multiple ALT sequences detected", - "auto-submitting all possible combinations", - "RefSeqGene record not available" - ] - }, - "NM_001282387.1 Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001269316.1:p.(Arg132Ser)", - "HGVS_transcript_variant": "NM_001282387.1:c.394C>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "IDH1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.209113113G>T", - "vcf": { - "alt": "T", - "chr": "2", - "pos": "209113113", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.208248389G>T", - "vcf": { - "alt": "T", - "chr": "2", - "pos": "208248389", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.209113113G>T", - "vcf": { - "alt": "T", - "chr": "chr2", - "pos": "209113113", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.208248389G>T", - "vcf": { - "alt": "T", - "chr": "chr2", - "pos": "208248389", - "ref": "G" - } - } - }, - "submitted_variant": "2-209113113-G-A,C,T", - "transcript_description": "Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA", - "validation_warnings": [ - "Multiple ALT sequences detected", - "auto-submitting all possible combinations", - "RefSeqGene record not available" - ] - }, - "NM_005896.2 Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "LRG_610:g.22686C>A", - "HGVS_RefSeqGene_variant": "NG_023319.2:g.22686C>A", - "HGVS_predicted_protein_consequence": "NP_005887.2(LRG_610p1):p.(Arg132Ser)", - "HGVS_transcript_variant": "NM_005896.2:c.394C>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "IDH1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.209113113G>T", - "vcf": { - "alt": "T", - "chr": "2", - "pos": "209113113", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.209113113G>T", - "vcf": { - "alt": "T", - "chr": "chr2", - "pos": "209113113", - "ref": "G" - } - } - }, - "submitted_variant": "2-209113113-G-A,C,T", - "transcript_description": "Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA", - "validation_warnings": [ - "Multiple ALT sequences detected", - "auto-submitting all possible combinations", - "A more recent version of the selected reference sequence NM_005896.2 is available (NM_005896.3)", - "NM_005896.3:c.394C>A MUST be fully validated prior to use in reports", - "select_variants=NM_005896.3:c.394C>A" - ] - }, - "NM_005896.3 Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_005887.2(LRG_610p1):p.(Arg132Ser)", - "HGVS_transcript_variant": "NM_005896.3:c.394C>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "IDH1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.209113113G>T", - "vcf": { - "alt": "T", - "chr": "2", - "pos": "209113113", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.208248389G>T", - "vcf": { - "alt": "T", - "chr": "2", - "pos": "208248389", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.209113113G>T", - "vcf": { - "alt": "T", - "chr": "chr2", - "pos": "209113113", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.208248389G>T", - "vcf": { - "alt": "T", - "chr": "chr2", - "pos": "208248389", - "ref": "G" - } - } - }, - "submitted_variant": "2-209113113-G-A,C,T", - "transcript_description": "Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA", - "validation_warnings": [ - "Multiple ALT sequences detected", - "auto-submitting all possible combinations", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000005.9:g.35058665_35058666CA=": { - " ": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "NC_000005.9:g.35058665_35058666CA=", - "transcript_description": "", - "validation_warnings": [ - "Required information for NR_037910.1 is missing from the Universal Transcript Archive, please select an alternative version of NR_037910.1 by submitting NR_037910.1 or PRLR to https://variantvalidator.org/ref_finder/, or select an alternative genome build" - ] - }, - "NM_000949.5 Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_029042.1:g.177158_177159=", - "HGVS_predicted_protein_consequence": "NP_000940.1:p.?", - "HGVS_transcript_variant": "NM_000949.5:c.*6525_*6526=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058665_35058666=", - "vcf": { - "alt": "CA", - "chr": "5", - "pos": "35058665", - "ref": "CA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058665_35058666=", - "vcf": { - "alt": "CA", - "chr": "chr5", - "pos": "35058665", - "ref": "CA" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058665_35058666CA=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_000949.5 is available (NM_000949.6)", - "NM_000949.6:c.*6525_*6526CT= MUST be fully validated prior to use in reports", - "select_variants=NM_000949.6:c.*6525_*6526CT=" - ] - }, - "NM_000949.6 Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000940.1:p.?", - "HGVS_transcript_variant": "NM_000949.6:c.*6528del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058670=", - "vcf": { - "alt": "AGAT", - "chr": "5", - "pos": "35058667", - "ref": "AGAT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000005.10:g.35058563del", - "vcf": { - "alt": "C", - "chr": "5", - "pos": "35058560", - "ref": "CA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058670=", - "vcf": { - "alt": "AGAT", - "chr": "chr5", - "pos": "35058667", - "ref": "AGAT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000005.10:g.35058563del", - "vcf": { - "alt": "C", - "chr": "chr5", - "pos": "35058560", - "ref": "CA" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058665_35058666CA=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_000949.6 with genome build GRCh37", - "NM_000949.6:c.*6524_*6526 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_001204314.1 Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001191243.1:p.?", - "HGVS_transcript_variant": "NM_001204314.1:c.*6525_*6526=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058665_35058666=", - "vcf": { - "alt": "CA", - "chr": "5", - "pos": "35058665", - "ref": "CA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058665_35058666=", - "vcf": { - "alt": "CA", - "chr": "chr5", - "pos": "35058665", - "ref": "CA" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058665_35058666CA=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_001204314.1 is available (NM_001204314.2)", - "NM_001204314.2:c.*6525_*6526CT= MUST be fully validated prior to use in reports", - "select_variants=NM_001204314.2:c.*6525_*6526CT=", - "RefSeqGene record not available" - ] - }, - "NM_001204314.2 Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001191243.1:p.?", - "HGVS_transcript_variant": "NM_001204314.2:c.*6528del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058670=", - "vcf": { - "alt": "AGAT", - "chr": "5", - "pos": "35058667", - "ref": "AGAT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000005.10:g.35058563del", - "vcf": { - "alt": "C", - "chr": "5", - "pos": "35058560", - "ref": "CA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058667_35058670=", - "vcf": { - "alt": "AGAT", - "chr": "chr5", - "pos": "35058667", - "ref": "AGAT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000005.10:g.35058563del", - "vcf": { - "alt": "C", - "chr": "chr5", - "pos": "35058560", - "ref": "CA" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058665_35058666CA=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_001204314.2 with genome build GRCh37", - "NM_001204314.2:c.*6524_*6526 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_001204316.1 Homo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001191245.1:p.?", - "HGVS_transcript_variant": "NM_001204316.1:c.1009+7385_1009+7386=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "NC_000005.9(NM_001204316.1):c.1009+7385_1009+7386=", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058665_35058666del", - "vcf": { - "alt": "G", - "chr": "5", - "pos": "35058663", - "ref": "GAC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000005.10:g.35058564_35058565del", - "vcf": { - "alt": "A", - "chr": "5", - "pos": "35058562", - "ref": "AAG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058665_35058666del", - "vcf": { - "alt": "G", - "chr": "chr5", - "pos": "35058663", - "ref": "GAC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000005.10:g.35058564_35058565del", - "vcf": { - "alt": "A", - "chr": "chr5", - "pos": "35058562", - "ref": "AAG" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058665_35058666CA=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_001204317.1 Homo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001191246.1:p.?", - "HGVS_transcript_variant": "NM_001204317.1:c.856-9153_856-9152=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "NC_000005.9(NM_001204317.1):c.856-9153_856-9152=", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058665_35058666del", - "vcf": { - "alt": "G", - "chr": "5", - "pos": "35058663", - "ref": "GAC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000005.10:g.35058560_35058561del", - "vcf": { - "alt": "G", - "chr": "5", - "pos": "35058558", - "ref": "GAC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058665_35058666del", - "vcf": { - "alt": "G", - "chr": "chr5", - "pos": "35058663", - "ref": "GAC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000005.10:g.35058560_35058561del", - "vcf": { - "alt": "G", - "chr": "chr5", - "pos": "35058558", - "ref": "GAC" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058665_35058666CA=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_001204318.1 Homo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001191247.1:p.?", - "HGVS_transcript_variant": "NM_001204318.1:c.686-9153_686-9152=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "PRLR", - "genome_context_intronic_sequence": "NC_000005.9(NM_001204318.1):c.686-9153_686-9152=", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000005.9:g.35058665_35058666del", - "vcf": { - "alt": "G", - "chr": "5", - "pos": "35058663", - "ref": "GAC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000005.10:g.35058560_35058561del", - "vcf": { - "alt": "G", - "chr": "5", - "pos": "35058558", - "ref": "GAC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000005.9:g.35058665_35058666del", - "vcf": { - "alt": "G", - "chr": "chr5", - "pos": "35058663", - "ref": "GAC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000005.10:g.35058560_35058561del", - "vcf": { - "alt": "G", - "chr": "chr5", - "pos": "35058558", - "ref": "GAC" - } - } - }, - "submitted_variant": "NC_000005.9:g.35058665_35058666CA=", - "transcript_description": "Homo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NC_000002.11:g.73675227_73675229delTCTinsTCTCTC": { - "NM_015120.4 Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA": { - "HGVS_LRG_transcript_variant": "LRG_741t1:c.1580_1581insCCT", - "HGVS_LRG_variant": "LRG_741:g.67352_67353insCCT", - "HGVS_RefSeqGene_variant": "NG_011690.1:g.67352_67353insCCT", - "HGVS_predicted_protein_consequence": "NP_055935.4(LRG_741p1):p.(Leu527dup)", - "HGVS_transcript_variant": "NM_015120.4:c.1580_1581insCCT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ALMS1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.73675231_73675232insCCT", - "vcf": { - "alt": "TCTC", - "chr": "2", - "pos": "73675229", - "ref": "T" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.73448104_73448105insCCT", - "vcf": { - "alt": "TCTC", - "chr": "2", - "pos": "73448102", - "ref": "T" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.73675231_73675232insCCT", - "vcf": { - "alt": "TCTC", - "chr": "chr2", - "pos": "73675229", - "ref": "T" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.73448104_73448105insCCT", - "vcf": { - "alt": "TCTC", - "chr": "chr2", - "pos": "73448102", - "ref": "T" - } - } - }, - "submitted_variant": "NC_000002.11:g.73675227_73675229delTCTinsTCTCTC", - "transcript_description": "Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA", - "validation_warnings": [] - }, - "flag": "gene_variant" - } - }, - { - "NM_000828.4:c.-2dupG": { - "NM_000828.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000819.3:p.?", - "HGVS_transcript_variant": "NM_000828.4:c.-2dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insGG", - "vcf": { - "alt": "AGG", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534dup", - "vcf": { - "alt": "AGG", - "chr": "X", - "pos": "123184533", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insGG", - "vcf": { - "alt": "AGG", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534dup", - "vcf": { - "alt": "AGG", - "chr": "chrX", - "pos": "123184533", - "ref": "AG" - } - } - }, - "submitted_variant": "NM_000828.4:c.-2dupG", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37", - "NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "X-122318386-A-AGG": { - "NM_000828.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000819.3:p.?", - "HGVS_transcript_variant": "NM_000828.4:c.-2dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insGG", - "vcf": { - "alt": "AGG", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534dup", - "vcf": { - "alt": "AGG", - "chr": "X", - "pos": "123184533", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insGG", - "vcf": { - "alt": "AGG", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534dup", - "vcf": { - "alt": "AGG", - "chr": "chrX", - "pos": "123184533", - "ref": "AG" - } - } - }, - "submitted_variant": "X-122318386-A-AGG", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000023.10:g.122318386A>AGG automapped to NC_000023.10:g.122318386_122318387insGG", - "The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37", - "NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_001256743.1 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001243672.1:p.?", - "HGVS_transcript_variant": "NM_001256743.1:c.-2dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insGG", - "vcf": { - "alt": "AGG", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534dup", - "vcf": { - "alt": "AGG", - "chr": "X", - "pos": "123184533", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insGG", - "vcf": { - "alt": "AGG", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534dup", - "vcf": { - "alt": "AGG", - "chr": "chrX", - "pos": "123184533", - "ref": "AG" - } - } - }, - "submitted_variant": "X-122318386-A-AGG", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA", - "validation_warnings": [ - "NC_000023.10:g.122318386A>AGG automapped to NC_000023.10:g.122318386_122318387insGG", - "The displayed variants may be artefacts of aligning NM_001256743.1 with genome build GRCh37", - "NM_001256743.1:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_007325.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_009377.1:g.5292dup", - "HGVS_predicted_protein_consequence": "NP_015564.4:p.?", - "HGVS_transcript_variant": "NM_007325.4:c.-2dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insGG", - "vcf": { - "alt": "AGG", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534dup", - "vcf": { - "alt": "AGG", - "chr": "X", - "pos": "123184533", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insGG", - "vcf": { - "alt": "AGG", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534dup", - "vcf": { - "alt": "AGG", - "chr": "chrX", - "pos": "123184533", - "ref": "AG" - } - } - }, - "submitted_variant": "X-122318386-A-AGG", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000023.10:g.122318386A>AGG automapped to NC_000023.10:g.122318386_122318387insGG", - "The displayed variants may be artefacts of aligning NM_007325.4 with genome build GRCh37", - "NM_007325.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000828.4:c.-2G>T": { - "NM_000828.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000819.3:p.?", - "HGVS_transcript_variant": "NM_000828.4:c.-2G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insT", - "vcf": { - "alt": "AT", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534G>T", - "vcf": { - "alt": "T", - "chr": "X", - "pos": "123184534", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insT", - "vcf": { - "alt": "AT", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534G>T", - "vcf": { - "alt": "T", - "chr": "chrX", - "pos": "123184534", - "ref": "G" - } - } - }, - "submitted_variant": "NM_000828.4:c.-2G>T", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37", - "NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000828.4:c.-2G=": { - "NM_000828.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000819.3:p.?", - "HGVS_transcript_variant": "NM_000828.4:c.-3_-1=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insG", - "vcf": { - "alt": "AG", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184533_123184535=", - "vcf": { - "alt": "AGC", - "chr": "X", - "pos": "123184533", - "ref": "AGC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insG", - "vcf": { - "alt": "AG", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184533_123184535=", - "vcf": { - "alt": "AGC", - "chr": "chrX", - "pos": "123184533", - "ref": "AGC" - } - } - }, - "submitted_variant": "NM_000828.4:c.-2G=", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37", - "NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "X-122318386-A-AT": { - "NM_000828.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000819.3:p.?", - "HGVS_transcript_variant": "NM_000828.4:c.-2G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insT", - "vcf": { - "alt": "AT", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534G>T", - "vcf": { - "alt": "T", - "chr": "X", - "pos": "123184534", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insT", - "vcf": { - "alt": "AT", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534G>T", - "vcf": { - "alt": "T", - "chr": "chrX", - "pos": "123184534", - "ref": "G" - } - } - }, - "submitted_variant": "X-122318386-A-AT", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT", - "The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37", - "NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_001256743.1 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001243672.1:p.?", - "HGVS_transcript_variant": "NM_001256743.1:c.-2G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insT", - "vcf": { - "alt": "AT", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534G>T", - "vcf": { - "alt": "T", - "chr": "X", - "pos": "123184534", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insT", - "vcf": { - "alt": "AT", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534G>T", - "vcf": { - "alt": "T", - "chr": "chrX", - "pos": "123184534", - "ref": "G" - } - } - }, - "submitted_variant": "X-122318386-A-AT", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA", - "validation_warnings": [ - "NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT", - "The displayed variants may be artefacts of aligning NM_001256743.1 with genome build GRCh37", - "NM_001256743.1:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "NM_007325.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_009377.1:g.5292G>T", - "HGVS_predicted_protein_consequence": "NP_015564.4:p.?", - "HGVS_transcript_variant": "NM_007325.4:c.-2G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insT", - "vcf": { - "alt": "AT", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534G>T", - "vcf": { - "alt": "T", - "chr": "X", - "pos": "123184534", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insT", - "vcf": { - "alt": "AT", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534G>T", - "vcf": { - "alt": "T", - "chr": "chrX", - "pos": "123184534", - "ref": "G" - } - } - }, - "submitted_variant": "X-122318386-A-AT", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT", - "The displayed variants may be artefacts of aligning NM_007325.4 with genome build GRCh37", - "NM_007325.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000828.4:c.-2_-1insT": { - "NM_000828.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000819.3:p.?", - "HGVS_transcript_variant": "NM_000828.4:c.-2delinsCT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318387_122318388insTC", - "vcf": { - "alt": "ACT", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534delinsCT", - "vcf": { - "alt": "CT", - "chr": "X", - "pos": "123184534", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318387_122318388insTC", - "vcf": { - "alt": "ACT", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534delinsCT", - "vcf": { - "alt": "CT", - "chr": "chrX", - "pos": "123184534", - "ref": "G" - } - } - }, - "submitted_variant": "NM_000828.4:c.-2_-1insT", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37", - "NM_000828.4:c.-2_-1 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000828.4:c.-3_-2insT": { - "NM_000828.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000819.3:p.?", - "HGVS_transcript_variant": "NM_000828.4:c.-3_-2insT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insTG", - "vcf": { - "alt": "ATG", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184533_123184534insT", - "vcf": { - "alt": "AT", - "chr": "X", - "pos": "123184533", - "ref": "A" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insTG", - "vcf": { - "alt": "ATG", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184533_123184534insT", - "vcf": { - "alt": "AT", - "chr": "chrX", - "pos": "123184533", - "ref": "A" - } - } - }, - "submitted_variant": "NM_000828.4:c.-3_-2insT", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37", - "NM_000828.4:c.-3_-1 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000828.4:c.-2delGinsTT": { - "NM_000828.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000819.3:p.?", - "HGVS_transcript_variant": "NM_000828.4:c.-2delinsTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insTT", - "vcf": { - "alt": "ATT", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534delinsTT", - "vcf": { - "alt": "TT", - "chr": "X", - "pos": "123184534", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386_122318387insTT", - "vcf": { - "alt": "ATT", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534delinsTT", - "vcf": { - "alt": "TT", - "chr": "chrX", - "pos": "123184534", - "ref": "G" - } - } - }, - "submitted_variant": "NM_000828.4:c.-2delGinsTT", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37", - "NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000828.4:c.-2_-1delGCinsTT": { - "NM_000828.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000819.3:p.?", - "HGVS_transcript_variant": "NM_000828.4:c.-2_-1delinsTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318387delinsTT", - "vcf": { - "alt": "TT", - "chr": "X", - "pos": "122318387", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534_123184535delinsTT", - "vcf": { - "alt": "TT", - "chr": "X", - "pos": "123184534", - "ref": "GC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318387delinsTT", - "vcf": { - "alt": "TT", - "chr": "chrX", - "pos": "122318387", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184534_123184535delinsTT", - "vcf": { - "alt": "TT", - "chr": "chrX", - "pos": "123184534", - "ref": "GC" - } - } - }, - "submitted_variant": "NM_000828.4:c.-2_-1delGCinsTT", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37", - "NM_000828.4:c.-2_-1 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_000828.4:c.-3_-2delAGinsTT": { - "NM_000828.4 Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000819.3:p.?", - "HGVS_transcript_variant": "NM_000828.4:c.-3_-2delinsTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "GRIA3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.122318386delinsTT", - "vcf": { - "alt": "TT", - "chr": "X", - "pos": "122318386", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.123184533_123184534delinsTT", - "vcf": { - "alt": "TT", - "chr": "X", - "pos": "123184533", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.122318386delinsTT", - "vcf": { - "alt": "TT", - "chr": "chrX", - "pos": "122318386", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.123184533_123184534delinsTT", - "vcf": { - "alt": "TT", - "chr": "chrX", - "pos": "123184533", - "ref": "AG" - } - } - }, - "submitted_variant": "NM_000828.4:c.-3_-2delAGinsTT", - "transcript_description": "Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "15-72105929-C-C": { - "NM_014249.2 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_055064.1:p.(Thr318HisfsTer23)", - "HGVS_transcript_variant": "NM_014249.2:c.951dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105926_72105932=", - "vcf": { - "alt": "GGACCCC", - "chr": "15", - "pos": "72105926", - "ref": "GGACCCC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105926_72105932=", - "vcf": { - "alt": "GGACCCC", - "chr": "chr15", - "pos": "72105926", - "ref": "GGACCCC" - } - } - }, - "submitted_variant": "15-72105929-C-C", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37", - "NC_000015.9:g.72105927_72105931 contains 1 genomic base(s) that fail to align to transcript NM_014249.2", - "Genome position NC_000015.9:g.72105930 aligns within a 1-bp gap in transcript NM_014249.2 between positions c.947_948", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)", - "NM_014249.3:c.951dupC MUST be fully validated prior to use in reports", - "select_variants=NM_014249.3:c.951dupC", - "RefSeqGene record not available" - ] - }, - "NM_014249.3 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_009113.1:g.8039dup", - "HGVS_predicted_protein_consequence": "NP_055064.1:p.(Thr318HisfsTer23)", - "HGVS_transcript_variant": "NM_014249.3:c.951dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105926_72105932=", - "vcf": { - "alt": "GGACCCC", - "chr": "15", - "pos": "72105926", - "ref": "GGACCCC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000015.10:g.71813592dup", - "vcf": { - "alt": "ACC", - "chr": "15", - "pos": "71813588", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105926_72105932=", - "vcf": { - "alt": "GGACCCC", - "chr": "chr15", - "pos": "72105926", - "ref": "GGACCCC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000015.10:g.71813592dup", - "vcf": { - "alt": "ACC", - "chr": "chr15", - "pos": "71813588", - "ref": "AC" - } - } - }, - "submitted_variant": "15-72105929-C-C", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37", - "NC_000015.9:g.72105927_72105931 contains 1 genomic base(s) that fail to align to transcript NM_014249.3", - "Genome position NC_000015.9:g.72105930 aligns within a 1-bp gap in transcript NM_014249.3 between positions c.947_948", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_016346.2 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_057430.1:p.(Thr318HisfsTer23)", - "HGVS_transcript_variant": "NM_016346.2:c.951dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105926_72105932=", - "vcf": { - "alt": "GGACCCC", - "chr": "15", - "pos": "72105926", - "ref": "GGACCCC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105926_72105932=", - "vcf": { - "alt": "GGACCCC", - "chr": "chr15", - "pos": "72105926", - "ref": "GGACCCC" - } - } - }, - "submitted_variant": "15-72105929-C-C", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37", - "Genome position NC_000015.9:g.72105930 aligns within a 1-bp gap in transcript NM_016346.2 between positions c.947_948", - "NC_000015.9:g.72105927_72105931 contains 1 genomic base(s) that fail to align to transcript NM_016346.2", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)", - "NM_016346.3:c.951dupC MUST be fully validated prior to use in reports", - "select_variants=NM_016346.3:c.951dupC", - "RefSeqGene record not available" - ] - }, - "NM_016346.3 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_057430.1:p.(Thr318HisfsTer23)", - "HGVS_transcript_variant": "NM_016346.3:c.951dup", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105926_72105932=", - "vcf": { - "alt": "GGACCCC", - "chr": "15", - "pos": "72105926", - "ref": "GGACCCC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000015.10:g.71813592dup", - "vcf": { - "alt": "ACC", - "chr": "15", - "pos": "71813588", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105926_72105932=", - "vcf": { - "alt": "GGACCCC", - "chr": "chr15", - "pos": "72105926", - "ref": "GGACCCC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000015.10:g.71813592dup", - "vcf": { - "alt": "ACC", - "chr": "chr15", - "pos": "71813588", - "ref": "AC" - } - } - }, - "submitted_variant": "15-72105929-C-C", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA", - "validation_warnings": [ - "The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37", - "NC_000015.9:g.72105927_72105931 contains 1 genomic base(s) that fail to align to transcript NM_016346.3", - "Genome position NC_000015.9:g.72105930 aligns within a 1-bp gap in transcript NM_016346.3 between positions c.947_948", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "15-72105928-AC-ATT": { - "NM_014249.2 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_055064.1:p.(Pro317SerfsTer8)", - "HGVS_transcript_variant": "NM_014249.2:c.947_948insTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "15-72105928-AC-ATT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT", - "The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37", - "Genome position NC_000015.9:g.72105930 aligns within a 1-bp gap in transcript NM_014249.2 between positions c.947_948", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)", - "NM_014249.3:c.947_948insTT MUST be fully validated prior to use in reports", - "select_variants=NM_014249.3:c.947_948insTT", - "RefSeqGene record not available" - ] - }, - "NM_014249.3 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_009113.1:g.8035_8036insTT", - "HGVS_predicted_protein_consequence": "NP_055064.1:p.(Pro317SerfsTer8)", - "HGVS_transcript_variant": "NM_014249.3:c.947_948insTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105929delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "72105929", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000015.10:g.71813588_71813589insTT", - "vcf": { - "alt": "ATT", - "chr": "15", - "pos": "71813588", - "ref": "A" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105929delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "72105929", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000015.10:g.71813588_71813589insTT", - "vcf": { - "alt": "ATT", - "chr": "chr15", - "pos": "71813588", - "ref": "A" - } - } - }, - "submitted_variant": "15-72105928-AC-ATT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT", - "NM_014249.3:c.947_948insTT can not be mapped directly to genome build GRCh37. See Alternative genomic loci for aligned genomic positions" - ] - }, - "NM_016346.2 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_057430.1:p.(Pro317SerfsTer8)", - "HGVS_transcript_variant": "NM_016346.2:c.947_948insTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "15-72105928-AC-ATT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT", - "The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37", - "Genome position NC_000015.9:g.72105930 aligns within a 1-bp gap in transcript NM_016346.2 between positions c.947_948", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)", - "NM_016346.3:c.947_948insTT MUST be fully validated prior to use in reports", - "select_variants=NM_016346.3:c.947_948insTT", - "RefSeqGene record not available" - ] - }, - "NM_016346.3 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_057430.1:p.(Pro317SerfsTer8)", - "HGVS_transcript_variant": "NM_016346.3:c.947_948insTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "15-72105928-AC-ATT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT", - "RefSeqGene record not available", - "NM_016346.3:c.947_948insTT can not be mapped directly to genome build GRCh37. See alt_genomic_loci for aligned genomic positions" - ] - }, - "flag": "gene_variant" - } - }, - { - "15-72105928-ACC-ATT": { - "NM_014249.2 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_055064.1:p.(Pro317SerfsTer24)", - "HGVS_transcript_variant": "NM_014249.2:c.948delinsTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105929_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "72105929", - "ref": "CC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105929_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "72105929", - "ref": "CC" - } - } - }, - "submitted_variant": "15-72105928-ACC-ATT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT", - "The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37", - "NC_000015.9:g.72105928_72105930 contains 1 genomic base(s) that fail to align to transcript NM_014249.2", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)", - "NM_014249.3:c.948delCinsTT MUST be fully validated prior to use in reports", - "select_variants=NM_014249.3:c.948delCinsTT", - "RefSeqGene record not available" - ] - }, - "NM_014249.3 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_009113.1:g.8036delinsTT", - "HGVS_predicted_protein_consequence": "NP_055064.1:p.(Pro317SerfsTer24)", - "HGVS_transcript_variant": "NM_014249.3:c.948delinsTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105929_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "72105929", - "ref": "CC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000015.10:g.71813589delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "71813589", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105929_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "72105929", - "ref": "CC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000015.10:g.71813589delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "71813589", - "ref": "C" - } - } - }, - "submitted_variant": "15-72105928-ACC-ATT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT", - "The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37", - "NC_000015.9:g.72105928_72105930 contains 1 genomic base(s) that fail to align to transcript NM_014249.3", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_016346.2 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_057430.1:p.(Pro317SerfsTer24)", - "HGVS_transcript_variant": "NM_016346.2:c.948delinsTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105929_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "72105929", - "ref": "CC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105929_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "72105929", - "ref": "CC" - } - } - }, - "submitted_variant": "15-72105928-ACC-ATT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT", - "The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37", - "NC_000015.9:g.72105928_72105930 contains 1 genomic base(s) that fail to align to transcript NM_016346.2", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)", - "NM_016346.3:c.948delCinsTT MUST be fully validated prior to use in reports", - "select_variants=NM_016346.3:c.948delCinsTT", - "RefSeqGene record not available" - ] - }, - "NM_016346.3 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_057430.1:p.(Pro317SerfsTer24)", - "HGVS_transcript_variant": "NM_016346.3:c.948delinsTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105929_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "72105929", - "ref": "CC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000015.10:g.71813589delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "71813589", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105929_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "72105929", - "ref": "CC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000015.10:g.71813589delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "71813589", - "ref": "C" - } - } - }, - "submitted_variant": "15-72105928-ACC-ATT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT", - "The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37", - "NC_000015.9:g.72105928_72105930 contains 1 genomic base(s) that fail to align to transcript NM_016346.3", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "15-72105927-GACC-GTT": { - "NM_014249.2 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_055064.1:p.(Asp316Val)", - "HGVS_transcript_variant": "NM_014249.2:c.947_948delinsTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105928_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "72105928", - "ref": "ACC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105928_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "72105928", - "ref": "ACC" - } - } - }, - "submitted_variant": "15-72105927-GACC-GTT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT", - "The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37", - "NC_000015.9:g.72105927_72105930 contains 1 genomic base(s) that fail to align to transcript NM_014249.2", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)", - "NM_014249.3:c.947_948delACinsTT MUST be fully validated prior to use in reports", - "select_variants=NM_014249.3:c.947_948delACinsTT", - "RefSeqGene record not available" - ] - }, - "NM_014249.3 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_009113.1:g.8035_8036delinsTT", - "HGVS_predicted_protein_consequence": "NP_055064.1:p.(Asp316Val)", - "HGVS_transcript_variant": "NM_014249.3:c.947_948delinsTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105928_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "72105928", - "ref": "ACC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000015.10:g.71813588_71813589delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "71813588", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105928_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "72105928", - "ref": "ACC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000015.10:g.71813588_71813589delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "71813588", - "ref": "AC" - } - } - }, - "submitted_variant": "15-72105927-GACC-GTT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT", - "The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37", - "NC_000015.9:g.72105927_72105930 contains 1 genomic base(s) that fail to align to transcript NM_014249.3", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_016346.2 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_057430.1:p.(Asp316Val)", - "HGVS_transcript_variant": "NM_016346.2:c.947_948delinsTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105928_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "72105928", - "ref": "ACC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105928_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "72105928", - "ref": "ACC" - } - } - }, - "submitted_variant": "15-72105927-GACC-GTT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT", - "The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37", - "NC_000015.9:g.72105927_72105930 contains 1 genomic base(s) that fail to align to transcript NM_016346.2", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)", - "NM_016346.3:c.947_948delACinsTT MUST be fully validated prior to use in reports", - "select_variants=NM_016346.3:c.947_948delACinsTT", - "RefSeqGene record not available" - ] - }, - "NM_016346.3 Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_057430.1:p.(Asp316Val)", - "HGVS_transcript_variant": "NM_016346.3:c.947_948delinsTT", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "NR2E3", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000015.9:g.72105928_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "72105928", - "ref": "ACC" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000015.10:g.71813588_71813589delinsTT", - "vcf": { - "alt": "TT", - "chr": "15", - "pos": "71813588", - "ref": "AC" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000015.9:g.72105928_72105930delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "72105928", - "ref": "ACC" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000015.10:g.71813588_71813589delinsTT", - "vcf": { - "alt": "TT", - "chr": "chr15", - "pos": "71813588", - "ref": "AC" - } - } - }, - "submitted_variant": "15-72105927-GACC-GTT", - "transcript_description": "Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT", - "The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37", - "NC_000015.9:g.72105927_72105930 contains 1 genomic base(s) that fail to align to transcript NM_016346.3", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin", - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "19-41123093-A-AG": { - "NM_001042544.1 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29022_29024=", - "HGVS_predicted_protein_consequence": "NP_001036009.1:p.(Gln1078=)", - "HGVS_transcript_variant": "NM_001042544.1:c.3233_3235=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "40617187", - "ref": "AGG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "40617187", - "ref": "AGG" - } - } - }, - "submitted_variant": "19-41123093-A-AG", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG", - "The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37", - "NM_001042544.1:c.3233_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_001042545.1 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29022_29024=", - "HGVS_predicted_protein_consequence": "NP_001036010.1:p.(Gln1011=)", - "HGVS_transcript_variant": "NM_001042545.1:c.3032_3034=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "40617187", - "ref": "AGG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "40617187", - "ref": "AGG" - } - } - }, - "submitted_variant": "19-41123093-A-AG", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG", - "The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37", - "NM_001042545.1:c.3032_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_003573.2 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29022_29024=", - "HGVS_predicted_protein_consequence": "NP_003564.2:p.(Gln1041=)", - "HGVS_transcript_variant": "NM_003573.2:c.3122_3124=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "19", - "pos": "40617187", - "ref": "AGG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123095dup", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617187_40617189=", - "vcf": { - "alt": "AGG", - "chr": "chr19", - "pos": "40617187", - "ref": "AGG" - } - } - }, - "submitted_variant": "19-41123093-A-AG", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG", - "The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37", - "NM_003573.2:c.3122_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "19-41123093-A-AT": { - "NM_001042544.1 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29023G>T", - "HGVS_predicted_protein_consequence": "NP_001036009.1:p.(Gln1078His)", - "HGVS_transcript_variant": "NM_001042544.1:c.3234G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094insT", - "vcf": { - "alt": "AT", - "chr": "19", - "pos": "41123093", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617188G>T", - "vcf": { - "alt": "T", - "chr": "19", - "pos": "40617188", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094insT", - "vcf": { - "alt": "AT", - "chr": "chr19", - "pos": "41123093", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617188G>T", - "vcf": { - "alt": "T", - "chr": "chr19", - "pos": "40617188", - "ref": "G" - } - } - }, - "submitted_variant": "19-41123093-A-AT", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT", - "The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37", - "NM_001042544.1:c.3234 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_001042545.1 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29023G>T", - "HGVS_predicted_protein_consequence": "NP_001036010.1:p.(Gln1011His)", - "HGVS_transcript_variant": "NM_001042545.1:c.3033G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094insT", - "vcf": { - "alt": "AT", - "chr": "19", - "pos": "41123093", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617188G>T", - "vcf": { - "alt": "T", - "chr": "19", - "pos": "40617188", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094insT", - "vcf": { - "alt": "AT", - "chr": "chr19", - "pos": "41123093", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617188G>T", - "vcf": { - "alt": "T", - "chr": "chr19", - "pos": "40617188", - "ref": "G" - } - } - }, - "submitted_variant": "19-41123093-A-AT", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT", - "The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37", - "NM_001042545.1:c.3033 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_003573.2 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29023G>T", - "HGVS_predicted_protein_consequence": "NP_003564.2:p.(Gln1041His)", - "HGVS_transcript_variant": "NM_003573.2:c.3123G>T", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094insT", - "vcf": { - "alt": "AT", - "chr": "19", - "pos": "41123093", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617188G>T", - "vcf": { - "alt": "T", - "chr": "19", - "pos": "40617188", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094insT", - "vcf": { - "alt": "AT", - "chr": "chr19", - "pos": "41123093", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617188G>T", - "vcf": { - "alt": "T", - "chr": "chr19", - "pos": "40617188", - "ref": "G" - } - } - }, - "submitted_variant": "19-41123093-A-AT", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT", - "The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37", - "NM_003573.2:c.3123 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "19-41123093-AG-A": { - "NM_001042544.1 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29024_29025del", - "HGVS_predicted_protein_consequence": "NP_001036009.1:p.(Gly1079ValfsTer14)", - "HGVS_transcript_variant": "NM_001042544.1:c.3235_3236del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123095del", - "vcf": { - "alt": "A", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617189_40617190del", - "vcf": { - "alt": "A", - "chr": "19", - "pos": "40617187", - "ref": "AGG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123095del", - "vcf": { - "alt": "A", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617189_40617190del", - "vcf": { - "alt": "A", - "chr": "chr19", - "pos": "40617187", - "ref": "AGG" - } - } - }, - "submitted_variant": "19-41123093-AG-A", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG", - "The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37", - "NM_001042544.1:c.3234_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_001042545.1 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29024_29025del", - "HGVS_predicted_protein_consequence": "NP_001036010.1:p.(Gly1012ValfsTer14)", - "HGVS_transcript_variant": "NM_001042545.1:c.3034_3035del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123095del", - "vcf": { - "alt": "A", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617189_40617190del", - "vcf": { - "alt": "A", - "chr": "19", - "pos": "40617187", - "ref": "AGG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123095del", - "vcf": { - "alt": "A", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617189_40617190del", - "vcf": { - "alt": "A", - "chr": "chr19", - "pos": "40617187", - "ref": "AGG" - } - } - }, - "submitted_variant": "19-41123093-AG-A", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG", - "The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37", - "NM_001042545.1:c.3033_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_003573.2 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29024_29025del", - "HGVS_predicted_protein_consequence": "NP_003564.2:p.(Gly1042ValfsTer14)", - "HGVS_transcript_variant": "NM_003573.2:c.3124_3125del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123095del", - "vcf": { - "alt": "A", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617189_40617190del", - "vcf": { - "alt": "A", - "chr": "19", - "pos": "40617187", - "ref": "AGG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123095del", - "vcf": { - "alt": "A", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617189_40617190del", - "vcf": { - "alt": "A", - "chr": "chr19", - "pos": "40617187", - "ref": "AGG" - } - } - }, - "submitted_variant": "19-41123093-AG-A", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG", - "The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37", - "NM_003573.2:c.3123_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "19-41123093-AG-AG": { - "NM_001042544.1 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29025del", - "HGVS_predicted_protein_consequence": "NP_001036009.1:p.(Gly1079ValfsTer14)", - "HGVS_transcript_variant": "NM_001042544.1:c.3236del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094=", - "vcf": { - "alt": "AG", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617190del", - "vcf": { - "alt": "A", - "chr": "19", - "pos": "40617187", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094=", - "vcf": { - "alt": "AG", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617190del", - "vcf": { - "alt": "A", - "chr": "chr19", - "pos": "40617187", - "ref": "AG" - } - } - }, - "submitted_variant": "19-41123093-AG-AG", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=", - "The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37", - "NM_001042544.1:c.3234 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_001042545.1 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29025del", - "HGVS_predicted_protein_consequence": "NP_001036010.1:p.(Gly1012ValfsTer14)", - "HGVS_transcript_variant": "NM_001042545.1:c.3035del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094=", - "vcf": { - "alt": "AG", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617190del", - "vcf": { - "alt": "A", - "chr": "19", - "pos": "40617187", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094=", - "vcf": { - "alt": "AG", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617190del", - "vcf": { - "alt": "A", - "chr": "chr19", - "pos": "40617187", - "ref": "AG" - } - } - }, - "submitted_variant": "19-41123093-AG-AG", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=", - "The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37", - "NM_001042545.1:c.3033 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "NM_003573.2 Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_021201.1:g.29025del", - "HGVS_predicted_protein_consequence": "NP_003564.2:p.(Gly1042ValfsTer14)", - "HGVS_transcript_variant": "NM_003573.2:c.3125del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "LTBP4", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094=", - "vcf": { - "alt": "AG", - "chr": "19", - "pos": "41123093", - "ref": "AG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000019.10:g.40617190del", - "vcf": { - "alt": "A", - "chr": "19", - "pos": "40617187", - "ref": "AG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000019.9:g.41123093_41123094=", - "vcf": { - "alt": "AG", - "chr": "chr19", - "pos": "41123093", - "ref": "AG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000019.10:g.40617190del", - "vcf": { - "alt": "A", - "chr": "chr19", - "pos": "40617187", - "ref": "AG" - } - } - }, - "submitted_variant": "19-41123093-AG-AG", - "transcript_description": "Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA", - "validation_warnings": [ - "NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=", - "The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37", - "NM_003573.2:c.3123 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9", - "Caution should be used when reporting the displayed variant descriptions", - "If you are unsure, please contact admin" - ] - }, - "flag": "gene_variant" - } - }, - { - "NM_012309.4:c.913-5058G>A": { - "NM_012309.4 Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_036441.2:p.?", - "HGVS_transcript_variant": "NM_012309.4:c.913-5058G>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [ - { - "GRCh37": { - "HGVS_genomic_description": "NW_004070871.1:g.574546C>T", - "vcf": { - "alt": "T", - "chr": "HG865_PATCH", - "pos": "574546", - "ref": "C" - } - } - } - ], - "gene_symbol": "SHANK2", - "genome_context_intronic_sequence": "NC_000011.10(NM_012309.4):c.913-5058G>A", - "primary_assembly_loci": { - "GRCh38": { - "HGVS_genomic_description": "NC_000011.10:g.71080333C>T", - "vcf": { - "alt": "T", - "chr": "11", - "pos": "71080333", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000011.10:g.71080333C>T", - "vcf": { - "alt": "T", - "chr": "chr11", - "pos": "71080333", - "ref": "C" - } - } - }, - "submitted_variant": "NM_012309.4:c.913-5058G>A", - "transcript_description": "Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA", - "validation_warnings": [ - "RefSeqGene record not available", - "NM_012309.4:c.913-5058G>A can not be mapped directly to genome build GRCh37. See alt_genomic_loci for aligned genomic positions" - ] - }, - "flag": "gene_variant" - } - }, - { - "LRG_199t1:c.2376[G>C];[G>C]": { - "NM_004006.2 Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_199t1:c.2376G>C", - "HGVS_LRG_variant": "LRG_199:g.842851G>C", - "HGVS_RefSeqGene_variant": "NG_012232.1:g.842851G>C", - "HGVS_predicted_protein_consequence": "NP_003997.1(LRG_199p1):p.(Val792=)", - "HGVS_transcript_variant": "NM_004006.2:c.2376G>C", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "DMD", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.32519876C>G", - "vcf": { - "alt": "G", - "chr": "X", - "pos": "32519876", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.32501759C>G", - "vcf": { - "alt": "G", - "chr": "X", - "pos": "32501759", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.32519876C>G", - "vcf": { - "alt": "G", - "chr": "chrX", - "pos": "32519876", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.32501759C>G", - "vcf": { - "alt": "G", - "chr": "chrX", - "pos": "32501759", - "ref": "C" - } - } - }, - "submitted_variant": "LRG_199t1:c.2376[G>C];[G>C]", - "transcript_description": "Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA", - "validation_warnings": [ - "LRG_199t1:c.2376[G>C];[G>C] automapped to NM_004006.2:c.2376[G>C];[G>C]", - "Automap has extracted possible variant descriptions" - ] - }, - "flag": "gene_variant" - } - }, - { - "LRG_199t1:c.[2376G>C];[3103del]": { - "NM_004006.2 Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_199t1:c.3103del", - "HGVS_LRG_variant": "LRG_199:g.876053del", - "HGVS_RefSeqGene_variant": "NG_012232.1:g.876053del", - "HGVS_predicted_protein_consequence": "NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)", - "HGVS_transcript_variant": "NM_004006.2:c.3103del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "DMD", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.32486676del", - "vcf": { - "alt": "T", - "chr": "X", - "pos": "32486673", - "ref": "TG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.32468559del", - "vcf": { - "alt": "T", - "chr": "X", - "pos": "32468556", - "ref": "TG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.32486676del", - "vcf": { - "alt": "T", - "chr": "chrX", - "pos": "32486673", - "ref": "TG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.32468559del", - "vcf": { - "alt": "T", - "chr": "chrX", - "pos": "32468556", - "ref": "TG" - } - } - }, - "submitted_variant": "LRG_199t1:c.[2376G>C];[3103del]", - "transcript_description": "Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA", - "validation_warnings": [ - "LRG_199t1:c.[2376G>C];[3103del] automapped to NM_004006.2:c.[2376G>C];[3103del]", - "Automap has extracted possible variant descriptions" - ] - }, - "flag": "gene_variant" - } - }, - { - "LRG_199t1:c.[4358_4359del;4361_4372del]": { - "NM_004006.2 Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_199t1:c.4358_4372delinsG", - "HGVS_LRG_variant": "LRG_199:g.954949_954963delinsG", - "HGVS_RefSeqGene_variant": "NG_012232.1:g.954949_954963delinsG", - "HGVS_predicted_protein_consequence": "NP_003997.1(LRG_199p1):p.(Asp1453GlyfsTer11)", - "HGVS_transcript_variant": "NM_004006.2:c.4358_4372delinsG", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "DMD", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.32407764_32407778delinsC", - "vcf": { - "alt": "C", - "chr": "X", - "pos": "32407764", - "ref": "ACTTCATGGAGACAT" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.32389647_32389661delinsC", - "vcf": { - "alt": "C", - "chr": "X", - "pos": "32389647", - "ref": "ACTTCATGGAGACAT" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.32407764_32407778delinsC", - "vcf": { - "alt": "C", - "chr": "chrX", - "pos": "32407764", - "ref": "ACTTCATGGAGACAT" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.32389647_32389661delinsC", - "vcf": { - "alt": "C", - "chr": "chrX", - "pos": "32389647", - "ref": "ACTTCATGGAGACAT" - } - } - }, - "submitted_variant": "LRG_199t1:c.[4358_4359del;4361_4372del]", - "transcript_description": "Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA", - "validation_warnings": [ - "LRG_199t1:c.[4358_4359del;4361_4372del] automapped to NM_004006.2:c.[4358_4359del;4361_4372del]", - "Automap has extracted possible variant descriptions" - ] - }, - "flag": "gene_variant" - } - }, - { - "LRG_199t1:c.2376G>C(;)3103del": { - "NM_004006.2 Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_199t1:c.3103del", - "HGVS_LRG_variant": "LRG_199:g.876053del", - "HGVS_RefSeqGene_variant": "NG_012232.1:g.876053del", - "HGVS_predicted_protein_consequence": "NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)", - "HGVS_transcript_variant": "NM_004006.2:c.3103del", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "DMD", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.32486676del", - "vcf": { - "alt": "T", - "chr": "X", - "pos": "32486673", - "ref": "TG" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.32468559del", - "vcf": { - "alt": "T", - "chr": "X", - "pos": "32468556", - "ref": "TG" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.32486676del", - "vcf": { - "alt": "T", - "chr": "chrX", - "pos": "32486673", - "ref": "TG" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.32468559del", - "vcf": { - "alt": "T", - "chr": "chrX", - "pos": "32468556", - "ref": "TG" - } - } - }, - "submitted_variant": "LRG_199t1:c.2376G>C(;)3103del", - "transcript_description": "Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA", - "validation_warnings": [ - "LRG_199t1:c.2376G>C(;)3103del automapped to NM_004006.2:c.2376G>C(;)3103del", - "Automap has extracted possible variant descriptions" - ] - }, - "flag": "gene_variant" - } - }, - { - "LRG_199t1:c.2376[G>C];[(G>C)]": { - "NM_004006.2 Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_199t1:c.2376G>C", - "HGVS_LRG_variant": "LRG_199:g.842851G>C", - "HGVS_RefSeqGene_variant": "NG_012232.1:g.842851G>C", - "HGVS_predicted_protein_consequence": "NP_003997.1(LRG_199p1):p.(Val792=)", - "HGVS_transcript_variant": "NM_004006.2:c.2376G>C", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "DMD", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.32519876C>G", - "vcf": { - "alt": "G", - "chr": "X", - "pos": "32519876", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.32501759C>G", - "vcf": { - "alt": "G", - "chr": "X", - "pos": "32501759", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.32519876C>G", - "vcf": { - "alt": "G", - "chr": "chrX", - "pos": "32519876", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.32501759C>G", - "vcf": { - "alt": "G", - "chr": "chrX", - "pos": "32501759", - "ref": "C" - } - } - }, - "submitted_variant": "LRG_199t1:c.2376[G>C];[(G>C)]", - "transcript_description": "Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA", - "validation_warnings": [ - "LRG_199t1:c.2376[G>C];[(G>C)] automapped to NM_004006.2:c.2376[G>C];[(G>C)]", - "Automap has extracted possible variant descriptions" - ] - }, - "flag": "gene_variant" - } - }, - { - "LRG_199t1:c.[2376G>C];[?]": { - "NM_004006.2 Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_199t1:c.2376G>C", - "HGVS_LRG_variant": "LRG_199:g.842851G>C", - "HGVS_RefSeqGene_variant": "NG_012232.1:g.842851G>C", - "HGVS_predicted_protein_consequence": "NP_003997.1(LRG_199p1):p.(Val792=)", - "HGVS_transcript_variant": "NM_004006.2:c.2376G>C", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "DMD", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.32519876C>G", - "vcf": { - "alt": "G", - "chr": "X", - "pos": "32519876", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.32501759C>G", - "vcf": { - "alt": "G", - "chr": "X", - "pos": "32501759", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.32519876C>G", - "vcf": { - "alt": "G", - "chr": "chrX", - "pos": "32519876", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.32501759C>G", - "vcf": { - "alt": "G", - "chr": "chrX", - "pos": "32501759", - "ref": "C" - } - } - }, - "submitted_variant": "LRG_199t1:c.[2376G>C];[?]", - "transcript_description": "Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA", - "validation_warnings": [ - "LRG_199t1:c.[2376G>C];[?] automapped to NM_004006.2:c.[2376G>C];[?]", - "Automap has extracted possible variant descriptions" - ] - }, - "flag": "gene_variant" - } - }, - { - "LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C": { - "NM_004006.2 Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_199t1:c.476T=", - "HGVS_LRG_variant": "LRG_199:g.528088T=", - "HGVS_RefSeqGene_variant": "NG_012232.1:g.528088T=", - "HGVS_predicted_protein_consequence": "NP_003997.1(LRG_199p1):p.(Phe159=)", - "HGVS_transcript_variant": "NM_004006.2:c.476T=", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "DMD", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.32834639A=", - "vcf": { - "alt": "A", - "chr": "X", - "pos": "32834639", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.32816522A=", - "vcf": { - "alt": "A", - "chr": "X", - "pos": "32816522", - "ref": "A" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.32834639A=", - "vcf": { - "alt": "A", - "chr": "chrX", - "pos": "32834639", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.32816522A=", - "vcf": { - "alt": "A", - "chr": "chrX", - "pos": "32816522", - "ref": "A" - } - } - }, - "submitted_variant": "LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C", - "transcript_description": "Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA", - "validation_warnings": [ - "LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C automapped to NM_004006.2:c.[296T>G;476T=];[476T=](;)1083A>C", - "Automap has extracted possible variant descriptions" - ] - }, - "flag": "gene_variant" - } - }, - { - "LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del": { - "NM_004006.2 Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_199t1:c.476T>C", - "HGVS_LRG_variant": "LRG_199:g.528088T>C", - "HGVS_RefSeqGene_variant": "NG_012232.1:g.528088T>C", - "HGVS_predicted_protein_consequence": "NP_003997.1(LRG_199p1):p.(Phe159Ser)", - "HGVS_transcript_variant": "NM_004006.2:c.476T>C", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "DMD", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000023.10:g.32834639A>G", - "vcf": { - "alt": "G", - "chr": "X", - "pos": "32834639", - "ref": "A" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000023.11:g.32816522A>G", - "vcf": { - "alt": "G", - "chr": "X", - "pos": "32816522", - "ref": "A" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000023.10:g.32834639A>G", - "vcf": { - "alt": "G", - "chr": "chrX", - "pos": "32834639", - "ref": "A" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000023.11:g.32816522A>G", - "vcf": { - "alt": "G", - "chr": "chrX", - "pos": "32816522", - "ref": "A" - } - } - }, - "submitted_variant": "LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del", - "transcript_description": "Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA", - "validation_warnings": [ - "LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del automapped to NM_004006.2:c.[296T>G];[476T>C](;)1083A>C(;)1406del", - "Automap has extracted possible variant descriptions" - ] - }, - "flag": "gene_variant" - } - }, - { - "LRG_199t1:c.[976-20T>A;976-17_976-1dup]": { - "Validation_Warning_1": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "", - "HGVS_transcript_variant": "", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": {}, - "submitted_variant": "LRG_199t1:c.[976-20T>A;976-17_976-1dup]", - "transcript_description": "", - "validation_warnings": [ - "LRG_199t1:c.[976-20T>A;976-17_976-1dup] automapped to NM_004006.2:c.[976-20T>A;976-17_976-1dup]", - "Intronic positions not supported for HGVS Allele descriptions" - ] - }, - "flag": "warning" - } - }, - { - "chr2:g.[29443695G>T];[29443695G>C;29443697A>G]": { - "NM_001353765.1 Homo sapiens ALK receptor tyrosine kinase (ALK), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001340694.1:p.(Phe106Leu)", - "HGVS_transcript_variant": "NM_001353765.1:c.316_318delinsCTG", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ALK", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.29443695_29443697delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "2", - "pos": "29443695", - "ref": "GAA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.29220829_29220831delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "2", - "pos": "29220829", - "ref": "GAA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.29443695_29443697delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "chr2", - "pos": "29443695", - "ref": "GAA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.29220829_29220831delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "chr2", - "pos": "29220829", - "ref": "GAA" - } - } - }, - "submitted_variant": "chr2:g.[29443695G>T];[29443695G>C;29443697A>G]", - "transcript_description": "Homo sapiens ALK receptor tyrosine kinase (ALK), transcript variant 2, mRNA", - "validation_warnings": [ - "Automap has extracted possible variant descriptions", - "RefSeqGene record not available" - ] - }, - "NM_004304.4 Homo sapiens ALK receptor tyrosine kinase (ALK), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "LRG_488:g.705736_705738delinsCTG", - "HGVS_RefSeqGene_variant": "NG_009445.1:g.705736_705738delinsCTG", - "HGVS_predicted_protein_consequence": "NP_004295.2(LRG_488p1):p.(Phe1174Leu)", - "HGVS_transcript_variant": "NM_004304.4:c.3520_3522delinsCTG", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ALK", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.29443695_29443697delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "2", - "pos": "29443695", - "ref": "GAA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.29220829_29220831delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "2", - "pos": "29220829", - "ref": "GAA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.29443695_29443697delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "chr2", - "pos": "29443695", - "ref": "GAA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.29220829_29220831delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "chr2", - "pos": "29220829", - "ref": "GAA" - } - } - }, - "submitted_variant": "chr2:g.[29443695G>T];[29443695G>C;29443697A>G]", - "transcript_description": "Homo sapiens ALK receptor tyrosine kinase (ALK), transcript variant 1, mRNA", - "validation_warnings": [ - "Automap has extracted possible variant descriptions" - ] - }, - "flag": "gene_variant" - } - }, - { - "chr7:g.87053221C>T": { - "NM_000443.3 Homo sapiens ATP binding cassette subfamily B member 4 (ABCB4), transcript variant A, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_000434.1:p.?", - "HGVS_transcript_variant": "NM_000443.3:c.2211+1G>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ABCB4", - "genome_context_intronic_sequence": "NC_000007.13(NM_000443.3):c.2211+1G>A", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000007.13:g.87053221C>T", - "vcf": { - "alt": "T", - "chr": "7", - "pos": "87053221", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000007.14:g.87423905C>T", - "vcf": { - "alt": "T", - "chr": "7", - "pos": "87423905", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000007.13:g.87053221C>T", - "vcf": { - "alt": "T", - "chr": "chr7", - "pos": "87053221", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000007.14:g.87423905C>T", - "vcf": { - "alt": "T", - "chr": "chr7", - "pos": "87423905", - "ref": "C" - } - } - }, - "submitted_variant": "chr7:g.87053221C>T", - "transcript_description": "Homo sapiens ATP binding cassette subfamily B member 4 (ABCB4), transcript variant A, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_018849.2 Homo sapiens ATP binding cassette subfamily B member 4 (ABCB4), transcript variant B, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "NG_007118.1:g.61528G>A", - "HGVS_predicted_protein_consequence": "NP_061337.1:p.?", - "HGVS_transcript_variant": "NM_018849.2:c.2211+1G>A", - "RefSeqGene_context_intronic_sequence": "NG_007118.1(NM_018849.2):c.2211+1G>A", - "alt_genomic_loci": [], - "gene_symbol": "ABCB4", - "genome_context_intronic_sequence": "NC_000007.13(NM_018849.2):c.2211+1G>A", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000007.13:g.87053221C>T", - "vcf": { - "alt": "T", - "chr": "7", - "pos": "87053221", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000007.14:g.87423905C>T", - "vcf": { - "alt": "T", - "chr": "7", - "pos": "87423905", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000007.13:g.87053221C>T", - "vcf": { - "alt": "T", - "chr": "chr7", - "pos": "87053221", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000007.14:g.87423905C>T", - "vcf": { - "alt": "T", - "chr": "chr7", - "pos": "87423905", - "ref": "C" - } - } - }, - "submitted_variant": "chr7:g.87053221C>T", - "transcript_description": "Homo sapiens ATP binding cassette subfamily B member 4 (ABCB4), transcript variant B, mRNA", - "validation_warnings": [] - }, - "NM_018850.2 Homo sapiens ATP binding cassette subfamily B member 4 (ABCB4), transcript variant C, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_061338.1:p.?", - "HGVS_transcript_variant": "NM_018850.2:c.2211+1G>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ABCB4", - "genome_context_intronic_sequence": "NC_000007.13(NM_018850.2):c.2211+1G>A", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000007.13:g.87053221C>T", - "vcf": { - "alt": "T", - "chr": "7", - "pos": "87053221", - "ref": "C" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000007.14:g.87423905C>T", - "vcf": { - "alt": "T", - "chr": "7", - "pos": "87423905", - "ref": "C" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000007.13:g.87053221C>T", - "vcf": { - "alt": "T", - "chr": "chr7", - "pos": "87053221", - "ref": "C" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000007.14:g.87423905C>T", - "vcf": { - "alt": "T", - "chr": "chr7", - "pos": "87423905", - "ref": "C" - } - } - }, - "submitted_variant": "chr7:g.87053221C>T", - "transcript_description": "Homo sapiens ATP binding cassette subfamily B member 4 (ABCB4), transcript variant C, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "flag": "gene_variant" - } - }, - { - "chr9:g.133738306G>A": { - "NM_005157.4 Homo sapiens ABL proto-oncogene 1, non-receptor tyrosine kinase (ABL1), transcript variant a, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "LRG_769:g.154039G>A", - "HGVS_RefSeqGene_variant": "NG_012034.1:g.154039G>A", - "HGVS_predicted_protein_consequence": "NP_005148.2(LRG_769p1):p.(Glu236Lys)", - "HGVS_transcript_variant": "NM_005157.4:c.706G>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ABL1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.133738306G>A", - "vcf": { - "alt": "A", - "chr": "9", - "pos": "133738306", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.133738306G>A", - "vcf": { - "alt": "A", - "chr": "chr9", - "pos": "133738306", - "ref": "G" - } - } - }, - "submitted_variant": "chr9:g.133738306G>A", - "transcript_description": "Homo sapiens ABL proto-oncogene 1, non-receptor tyrosine kinase (ABL1), transcript variant a, mRNA", - "validation_warnings": [ - "A more recent version of the selected reference sequence NM_005157.4 is available (NM_005157.5)", - "NM_005157.5:c.706G>A MUST be fully validated prior to use in reports", - "select_variants=NM_005157.5:c.706G>A", - "The current status of LRG_769 is pending therefore changes may be made to the LRG reference sequence" - ] - }, - "NM_005157.5 Homo sapiens ABL proto-oncogene 1, non-receptor tyrosine kinase (ABL1), transcript variant a, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_005148.2(LRG_769p1):p.(Glu236Lys)", - "HGVS_transcript_variant": "NM_005157.5:c.706G>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ABL1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.133738306G>A", - "vcf": { - "alt": "A", - "chr": "9", - "pos": "133738306", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.130862919G>A", - "vcf": { - "alt": "A", - "chr": "9", - "pos": "130862919", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.133738306G>A", - "vcf": { - "alt": "A", - "chr": "chr9", - "pos": "133738306", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.130862919G>A", - "vcf": { - "alt": "A", - "chr": "chr9", - "pos": "130862919", - "ref": "G" - } - } - }, - "submitted_variant": "chr9:g.133738306G>A", - "transcript_description": "Homo sapiens ABL proto-oncogene 1, non-receptor tyrosine kinase (ABL1), transcript variant a, mRNA", - "validation_warnings": [ - "RefSeqGene record not available" - ] - }, - "NM_007313.2 Homo sapiens ABL proto-oncogene 1, non-receptor tyrosine kinase (ABL1), transcript variant b, mRNA": { - "HGVS_LRG_transcript_variant": "LRG_769t2:c.763G>A", - "HGVS_LRG_variant": "LRG_769:g.154039G>A", - "HGVS_RefSeqGene_variant": "NG_012034.1:g.154039G>A", - "HGVS_predicted_protein_consequence": "NP_009297.2(LRG_769p2):p.(Glu255Lys)", - "HGVS_transcript_variant": "NM_007313.2:c.763G>A", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ABL1", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000009.11:g.133738306G>A", - "vcf": { - "alt": "A", - "chr": "9", - "pos": "133738306", - "ref": "G" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000009.12:g.130862919G>A", - "vcf": { - "alt": "A", - "chr": "9", - "pos": "130862919", - "ref": "G" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000009.11:g.133738306G>A", - "vcf": { - "alt": "A", - "chr": "chr9", - "pos": "133738306", - "ref": "G" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000009.12:g.130862919G>A", - "vcf": { - "alt": "A", - "chr": "chr9", - "pos": "130862919", - "ref": "G" - } - } - }, - "submitted_variant": "chr9:g.133738306G>A", - "transcript_description": "Homo sapiens ABL proto-oncogene 1, non-receptor tyrosine kinase (ABL1), transcript variant b, mRNA", - "validation_warnings": [ - "The current status of LRG_769 is pending therefore changes may be made to the LRG reference sequence" - ] - }, - "flag": "gene_variant" - } - }, - { - "chr2:g.[29443695G>T];[29443695G>C;29443697A>G]": { - "NM_001353765.1 Homo sapiens ALK receptor tyrosine kinase (ALK), transcript variant 2, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "", - "HGVS_RefSeqGene_variant": "", - "HGVS_predicted_protein_consequence": "NP_001340694.1:p.(Phe106Leu)", - "HGVS_transcript_variant": "NM_001353765.1:c.316_318delinsCTG", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ALK", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.29443695_29443697delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "2", - "pos": "29443695", - "ref": "GAA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.29220829_29220831delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "2", - "pos": "29220829", - "ref": "GAA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.29443695_29443697delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "chr2", - "pos": "29443695", - "ref": "GAA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.29220829_29220831delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "chr2", - "pos": "29220829", - "ref": "GAA" - } - } - }, - "submitted_variant": "chr2:g.[29443695G>T];[29443695G>C;29443697A>G]", - "transcript_description": "Homo sapiens ALK receptor tyrosine kinase (ALK), transcript variant 2, mRNA", - "validation_warnings": [ - "Automap has extracted possible variant descriptions", - "RefSeqGene record not available" - ] - }, - "NM_004304.4 Homo sapiens ALK receptor tyrosine kinase (ALK), transcript variant 1, mRNA": { - "HGVS_LRG_transcript_variant": "", - "HGVS_LRG_variant": "LRG_488:g.705736_705738delinsCTG", - "HGVS_RefSeqGene_variant": "NG_009445.1:g.705736_705738delinsCTG", - "HGVS_predicted_protein_consequence": "NP_004295.2(LRG_488p1):p.(Phe1174Leu)", - "HGVS_transcript_variant": "NM_004304.4:c.3520_3522delinsCTG", - "RefSeqGene_context_intronic_sequence": "", - "alt_genomic_loci": [], - "gene_symbol": "ALK", - "genome_context_intronic_sequence": "", - "primary_assembly_loci": { - "GRCh37": { - "HGVS_genomic_description": "NC_000002.11:g.29443695_29443697delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "2", - "pos": "29443695", - "ref": "GAA" - } - }, - "GRCh38": { - "HGVS_genomic_description": "NC_000002.12:g.29220829_29220831delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "2", - "pos": "29220829", - "ref": "GAA" - } - }, - "hg19": { - "HGVS_genomic_description": "NC_000002.11:g.29443695_29443697delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "chr2", - "pos": "29443695", - "ref": "GAA" - } - }, - "hg38": { - "HGVS_genomic_description": "NC_000002.12:g.29220829_29220831delinsCAG", - "vcf": { - "alt": "CAG", - "chr": "chr2", - "pos": "29220829", - "ref": "GAA" - } - } - }, - "submitted_variant": "chr2:g.[29443695G>T];[29443695G>C;29443697A>G]", - "transcript_description": "Homo sapiens ALK receptor tyrosine kinase (ALK), transcript variant 1, mRNA", - "validation_warnings": [ - "Automap has extracted possible variant descriptions" - ] - }, - "flag": "gene_variant" - } - } -] \ No newline at end of file diff --git a/VariantValidator/testing/test_vv.py b/VariantValidator/testing/test_vv.py index d8a7e0f1..d8860f6e 100644 --- a/VariantValidator/testing/test_vv.py +++ b/VariantValidator/testing/test_vv.py @@ -38,7 +38,7 @@ def constructHub(): hub=vvHub() return hub -@pytest.fixture(params=inputVariants[:]) +@pytest.fixture(params=["NC_000012.11:g.122064777C>A","NC_000012.11:g.122064776delG","NC_000012.11:g.122064776dupG"])#inputVariants[:]) def constructValidation(request): hub=constructHub() # print request.param diff --git a/VariantValidator/testing/vvTestFunctions.py b/VariantValidator/testing/vvTestFunctions.py index d24d45c8..6b278adf 100644 --- a/VariantValidator/testing/vvTestFunctions.py +++ b/VariantValidator/testing/vvTestFunctions.py @@ -18,6 +18,8 @@ hl=logging.getLogger("hgvs.dataproviders.uta") hl.addHandler(logConsoleHandler) + +''' try: print("Configuring for personal linux") seqrepo_current_version='2018-08-21' @@ -39,13 +41,15 @@ os.environ['PYLIFTOVER_DIR'] = '/Users/pjf9/variant_validator_data/pyLiftover/' from VariantValidator import variantValidator as vv -def generateTestFolder(path, inputVariants): +''' + +def generateTestFolder(path, inputVariants, validator): #Saves the results of running inputVariants to a folder given in saveDirectory. if not os.path.isdir(path): os.mkdir(path) variantArray=loadVariantFile(inputVariants) #Go through the variant array, validating, and save the results. - batch=validateBatch(variantArray) + batch=validateBatch(variantArray,validator) #Save copy of the resulting dictionary saveValidationsAsFolder(path,batch) @@ -114,7 +118,7 @@ def loadValidations(path): #print(type(out[-1])) return out -def validateBatch(variantArray): +def validateBatch(variantArray,validator): #Returns an array of validations (themselves dictionary objects). out=[] selectTranscripts='all' @@ -122,7 +126,7 @@ def validateBatch(variantArray): for i,v in enumerate(variantArray): print("VALIDATING Variant"+str(i)+" "+str(i+1)+"/"+str(len(variantArray))+" "+str(v)) try: - out.append(vv.validator(v,selectedAssembly,selectTranscripts)) + out.append(validator.vv.validator(v,selectedAssembly,selectTranscripts)) except KeyboardInterrupt: print("Exiting...") sys.exit() diff --git a/VariantValidator/testing/vvTestSave.py b/VariantValidator/testing/vvTestSave.py index 62c042d3..6e014754 100644 --- a/VariantValidator/testing/vvTestSave.py +++ b/VariantValidator/testing/vvTestSave.py @@ -6,56 +6,23 @@ import sqlite3 import os -try: - print("Configuring for lamp") - seqrepo_current_version='2018-08-21' - HGVS_SEQREPO_DIR='/local/seqrepo/'+seqrepo_current_version - os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR - uta_current_version='uta_20180821' - UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version - os.environ['UTA_DB_URL']=UTA_DB_URL - from VariantValidator import variantValidator as vv - vv.my_config() -except sqlite3.OperationalError: - print("Configuring for VM") - seqrepo_current_version='2018-08-21' - HGVS_SEQREPO_DIR='/home/pjdp2/seqrepo/'+seqrepo_current_version - os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR - uta_current_version='uta_20180821' - UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version - os.environ['UTA_DB_URL']=UTA_DB_URL - try: +class vvHub(): + #Variant validator configuration hub object + def __init__(self): + seqrepo_current_version='2018-08-21' + HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version + os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR + self.hvgsSeqrepoPath=HGVS_SEQREPO_DIR + uta_current_version='uta_20180821' + UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version + os.environ['UTA_DB_URL']=UTA_DB_URL + self.utaPath=UTA_DB_URL + import VariantValidator.variantanalyser.vvLogging as vvLogging + self.logger=vvLogging.logger from VariantValidator import variantValidator as vv - vv.my_config() - except sqlite3.OperationalError: - print("Configuring for VM") - seqrepo_current_version = '2018-08-21' - HGVS_SEQREPO_DIR = '/Users/pjf9/variant_validator_data/seqrepo/' + seqrepo_current_version - os.environ['HGVS_SEQREPO_DIR'] = HGVS_SEQREPO_DIR - uta_current_version = 'uta_20180821' - UTA_DB_URL = 'postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version - os.environ['UTA_DB_URL'] = UTA_DB_URL - os.environ['PYLIFTOVER_DIR'] = '/Users/pjf9/variant_validator_data/pyLiftover/' - from VariantValidator import variantValidator as vv - vv.my_config() -except OSError: - print("Configuring for personal linux") - seqrepo_current_version='2018-08-21' - HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version - os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR - uta_current_version='uta_20180821' - UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version - os.environ['UTA_DB_URL']=UTA_DB_URL - from VariantValidator import variantValidator as vv - vv.my_config() - - -sysOut=StringIO() - -#sys.stdout=sysOut + self.vv=vv + self.vv.my_config() -inputVariants="inputVariants.txt" -#saveOut="testJSON.json" -#fn.generateTestJSON(saveOut,inputVariants,sysOut) -fn.generateTestFolder("testOutputs",inputVariants) +hub=vvHub() +fn.generateTestFolder("testOutputs","inputVariants.txt",hub) \ No newline at end of file From 89bcf2a1f653dbc0a1f1b1680562a1d1f0a9b0d6 Mon Sep 17 00:00:00 2001 From: buran Date: Wed, 9 Jan 2019 09:16:51 +0000 Subject: [PATCH 002/223] More meddling --- VariantValidator/modules/vvObjects.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/VariantValidator/modules/vvObjects.py b/VariantValidator/modules/vvObjects.py index 2eae0048..a27365f0 100644 --- a/VariantValidator/modules/vvObjects.py +++ b/VariantValidator/modules/vvObjects.py @@ -54,6 +54,10 @@ def __init__(self,hgvsPath=None,utaPath=None): ''' + + + + def validate(self): pass From 4d2b18e9a79e8fb53ba7651495a62db3761a911a Mon Sep 17 00:00:00 2001 From: buran Date: Sat, 12 Jan 2019 10:19:03 +0000 Subject: [PATCH 003/223] Added database code to restructured files --- VariantValidator/modules/defaultConfig.ini | 10 +- VariantValidator/modules/vvDBGet.py | 85 +++++++++++ VariantValidator/modules/vvDBInsert.py | 112 +++++++++++++++ VariantValidator/modules/vvDatabase.py | 156 +++++++++++++++++++++ VariantValidator/modules/vvFunctions.py | 75 ++++++++++ VariantValidator/modules/vvLogging.py | 144 +++++++++++++++++++ VariantValidator/modules/vvObjects.py | 154 +++++++++++++++----- 7 files changed, 695 insertions(+), 41 deletions(-) create mode 100644 VariantValidator/modules/vvDBGet.py create mode 100644 VariantValidator/modules/vvDBInsert.py create mode 100644 VariantValidator/modules/vvDatabase.py create mode 100644 VariantValidator/modules/vvFunctions.py create mode 100644 VariantValidator/modules/vvLogging.py diff --git a/VariantValidator/modules/defaultConfig.ini b/VariantValidator/modules/defaultConfig.ini index ad76e6ee..c719865d 100644 --- a/VariantValidator/modules/defaultConfig.ini +++ b/VariantValidator/modules/defaultConfig.ini @@ -9,12 +9,12 @@ user = vvadmin password = var1ant [seqrepo] -version='2018-08-21' +version = 2018-08-21 location [uta] -version='uta_20180821' -location='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' +version = uta_20180821 +location = postgresql://uta_admin:uta_admin@127.0.0.1/uta/ [logging] string = info console @@ -22,6 +22,10 @@ string = info console [EntrezID] entrezid = admin@variantvalidator.org +[liftover] +location + + # # Copyright (C) 2018 Peter Causey-Freeman, University of Leicester # diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py new file mode 100644 index 00000000..35ffd9d7 --- /dev/null +++ b/VariantValidator/modules/vvDBGet.py @@ -0,0 +1,85 @@ +from vvFunctions import handleCursor +from vvLogging import logger + +class vvDBGet: + def __init__(self,conn,cursor): + # These are inherited by reference from the vvDatabase object. + self.conn=conn + self.cursor=cursor + + @handleCursor + def execute(self,query): + self.cursor.execute(query) + row = self.cursor.fetchone() + if row is None: + logger.debug("No data returned from query "+str(query)) + row = ['none', 'No data'] + return row + @handleCursor + def executeAll(self,query): + self.cursor.execute(query) + rows = self.cursor.fetchone() + if rows==[]: + logger.debug("No data returned from query "+str(query)) + row = ['none', 'No data'] + return rows + # from dbfetchone + def get_utaSymbol(self,gene_symbol): + query= "SELECT utaSymbol FROM transcript_info WHERE hgncSymbol = '%s'" %(gene_symbol) + return self.execute(query) + def get_hgncSymbol(self,gene_symbol): + query= "SELECT hgncSymbol FROM transcript_info WHERE utaSymbol = '%s'" %(gene_symbol) + return self.execute(query) + def get_transcript_description(self,transcript_id): + query= "SELECT description FROM transcript_info WHERE refSeqID = '%s'" %(transcript_id) + return str(self.execute(query)[0]) + def get_gene_symbol_from_transcriptID(self,transcript_id): + query = "SELECT hgncSymbol FROM transcript_info WHERE refSeqID = '%s'" %(transcript_id) + return str(self.execute(query)[0]) + def get_refSeqGene_data_by_refSeqGeneID(self,refSeqGeneID, genomeBuild): + query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, orientation, totalLength, chrPos, rsgPos, entrezID, hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s' AND genomeBuild = '%s'" %(refSeqGeneID, genomeBuild) + return self.execute(query) + def get_gene_symbol_from_refSeqGeneID(self,refSeqGeneID): + query = "SELECT hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s'" %(refSeqGeneID) + return self.execute(query)[0] + def get_RefSeqGeneID_from_lrgID(self,lrgID): + query = "SELECT RefSeqGeneID FROM LRG_RSG_lookup WHERE lrgID = '%s'" %(lrgID) + return self.execute(query)[0] + def get_RefSeqTranscriptID_from_lrgTranscriptID(self,lrgtxID): + query = "SELECT RefSeqTranscriptID FROM LRG_transcripts WHERE LRGtranscriptID = '%s'" %(lrgtxID) + return self.execute(query)[0] + def get_lrgTranscriptID_from_RefSeqTranscriptID(self,rstID): + query = "SELECT LRGtranscriptID FROM LRG_transcripts WHERE RefSeqTranscriptID = '%s'" %(rstID) + return self.execute(query)[0] + def get_lrgID_from_RefSeqGeneID(self,rsgID): + query = "SELECT lrgID, status FROM LRG_RSG_lookup WHERE RefSeqGeneID = '%s'" %(rsgID) + return self.execute(query) + def get_refseqgene_info(self,refseqgene_id, primary_assembly): + query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos FROM refSeqGene_loci WHERE refSeqGeneID = '%s' AND genomeBuild = '%s'" %(refseqgene_id, primary_assembly) + return self.execute(query) + def get_RefSeqProteinID_from_lrgProteinID(self,lrg_p): + query = "SELECT RefSeqProteinID FROM LRG_proteins WHERE LRGproteinID = '%s'" %(lrg_p) + return self.execute(query)[0] + def get_lrgProteinID_from_RefSeqProteinID(self,rs_p): + query = "SELECT LRGproteinID FROM LRG_proteins WHERE RefSeqProteinID = '%s'" %(rs_p) + return self.execute(query)[0] + def get_LRG_data_from_LRGid(self,lrg_id): + query = "SELECT * FROM LRG_RSG_lookup WHERE lrgID = '%s'" %(lrg_id) + return self.execute(query) + #from dbfetchall + def get_transcript_info_for_gene(self,gene_symbol): + query = "SELECT refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated, IF(updated < NOW() - INTERVAL 3 MONTH , 'true', 'false') FROM transcript_info WHERE hgncSymbol = '%s'" %(gene_symbol) + return self.executeAll(query) + def get_g_to_g_info(self): + query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol, genomeBuild FROM refSeqGene_loci" + return self.executeAll(query) + def get_all_transcriptID(self): + query = "SELECT refSeqID FROM transcript_info" + return self.executeAll(query) + # Direct methods (GET) + def get_uta_symbol(self,gene_symbol): + # returns the UTA gene symbol when HGNC gene symbol is input + return str(self.get_utaSymbol(gene_symbol)[0]) + def get_hgnc_symbol(self,gene_symbol): + # returns the HGNC gene symbol when UTA gene symbol is input + return str(self.get_hgncSymbol(gene_symbol)[0]) diff --git a/VariantValidator/modules/vvDBInsert.py b/VariantValidator/modules/vvDBInsert.py new file mode 100644 index 00000000..3bcd323b --- /dev/null +++ b/VariantValidator/modules/vvDBInsert.py @@ -0,0 +1,112 @@ +from vvDatabase import handleCursor + +class vvDBInsert: + def __init__(self,conn,cursor): + # These are inherited by reference from the vvDatabase object. + self.conn=conn + self.cursor=cursor + # Add new entry + def add_entry(self,entry, data, table): + return self.insert(entry, data, table) + def insert_transcript_loci(self,add_data, primary_assembly): + return self.insert_transcript_loci(add_data, primary_assembly) + + #from dbinsert + @handleCursor + def insert(self,entry, data, table): + # MySQL queries + if table == 'transcript_info': + accession = entry + description = data[1] + variant = data[2] + version = data[3] + hgnc_symbol = data[4] + uta_symbol = data[5] + query = "INSERT INTO transcript_info(refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated) VALUES(%s,%s, %s, %s, %s, %s, NOW())" + self.cursor.execute(query, (accession, description, variant, version, hgnc_symbol, uta_symbol)) + # Query report + if self.cursor.lastrowid: + success = 'true' + else: + success = 'Unknown error' + + # Commit and close connection + self.conn.commit() + return success + @handleCursor + def insert_refSeqGene_data(self,rsg_data): + query = "INSERT INTO refSeqGene_loci(refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, orientation, totalLength, chrPos, rsgPos, entrezID, hgncSymbol, updated) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())" + self.cursor.execute(query, (rsg_data[0], rsg_data[1], rsg_data[2], rsg_data[3], rsg_data[4], rsg_data[5], rsg_data[6], rsg_data[7], rsg_data[8], rsg_data[9], rsg_data[10])) + # Query report + if self.cursor.lastrowid: + success = 'true' + else: + success = 'Unknown error' + # Commit and close connection + self.conn.commit() + return success + @handleCursor + def insert_RefSeqGeneID_from_lrgID(self,lrg_rs_lookup): + query = "INSERT INTO LRG_RSG_lookup(lrgID, hgncSymbol, RefSeqGeneID, status) VALUES(%s,%s,%s,%s)" + self.cursor.execute(query, (lrg_rs_lookup[0], lrg_rs_lookup[1], lrg_rs_lookup[2], lrg_rs_lookup[3])) + # Query report + if self.cursor.lastrowid: + success = 'true' + else: + success = 'Unknown error' + # Commit and close connection + self.conn.commit() + return success + @handleCursor + def insert_LRG_transcript_data(self,lrgtx_to_rstID): + query = "INSERT INTO LRG_transcripts(LRGtranscriptID, RefSeqTranscriptID) VALUES(%s,%s)" + self.cursor.execute(query, (lrgtx_to_rstID[0], lrgtx_to_rstID[1])) + # Query report + if self.cursor.lastrowid: + success = 'true' + else: + success = 'Unknown error' + + # Commit and close connection + self.conn.commit() + return success + @handleCursor + def insert_LRG_protein_data(self,lrg_p, rs_p): + query = "INSERT INTO LRG_proteins(LRGproteinID, RefSeqProteinID) VALUES(%s,%s)" + self.cursor.execute(query, (lrg_p, rs_p)) + # Query report + if self.cursor.lastrowid: + success = 'true' + else: + success = 'Unknown error' + + # Commit and close connection + self.conn.commit() + return success + # from dbupdate + @handleCursor + def update(self,entry, data, table): + # MySQL queries + #if table == 'transcript_info': + accession = entry + description = data[1] + variant = data[2] + version = data[3] + hgnc_symbol = data[4] + uta_symbol = data[5] + query = "UPDATE transcript_info SET description=%s, transcriptVariant=%s, currentVersion=%s, hgncSymbol=%s, utaSymbol=%s, updated=NOW() WHERE refSeqID = %s" + self.cursor.execute(query, (description, variant, version, hgnc_symbol, uta_symbol, accession)) + success = 'true' + self.conn.commit() + return success + # 'true'??? check this. + @handleCursor + def update_refSeqGene_data(self,rsg_data): + query = "UPDATE refSeqGene_loci SET hgncSymbol=%s, updated=NOW() WHERE refSeqGeneID=%s" + self.cursor.execute(query, (rsg_data[10], rsg_data[0])) + success = 'true' + self.conn.commit() + return success + # Update entries + def update_entry(self,entry, data, table): + return self.update(entry, data, table) diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py new file mode 100644 index 00000000..54869fab --- /dev/null +++ b/VariantValidator/modules/vvDatabase.py @@ -0,0 +1,156 @@ +import mysql.connector +from mysql.connector.pooling import MySQLConnectionPool +from vvLogging import logger +from vvFunctions import entrez_efetch,hgnc_rest,handleCursor +from vvDBInsert import vvDBInsert +from vvDBGet import vvDBGet +import re +import os + +class vvDatabase: + # This class contains and handles the mysql connections for the variant validator database. + def __init__(self,val,dbConfig): + self.conn = mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **dbConfig) + # self.cursor will be none UNLESS you're wrapping a function in @handlecursor, which automatically opens and + # closes connections for you. + self.cursor=None + self.dbConfig=dbConfig + # Construct database URL + #'mysqlx://vvadmin:var1ant@127.0.0.1/validator' + self.path="mysqlx://"+dbConfig["user"]+":"+dbConfig["password"]+"@"+dbConfig["host"]+"/"+dbConfig["database"] + os.environ["VALIDATOR_DB_URL"]=self.path + self.val=val + self.insert = vvDBInsert(self.conn,self.cursor) # contains dbinsert, dbupdate + self.get = vvDBGet(self.conn,self.cursor) # contains dbfetchone, dbfetchall + + # from dbquery + @handleCursor + def query_with_fetchone(self,entry, table): + #if table == 'transcript_info': + query = "SELECT refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated, IF(updated < NOW() - INTERVAL 3 MONTH , 'true', 'false') FROM transcript_info WHERE refSeqID = '%s'" %(entry) + self.cursor.execute(query) + row = self.cursor.fetchone() + if row is None: + row = ['none', 'No data'] + logger.debug("No data returned from query "+str(query)) + return row + # From data + # Retrieve transcript information + def in_entries(self,entry, table): + # Use dbquery.py to connect to mysql and return the necessary data + data={} + if table == 'transcript_info': + row = self.query_with_fetchone(entry, table) + if row[0] == 'error': + data['error'] = row[0] + data['description'] = row[1] + elif row[0] == 'none': + data['none'] = row[0] + data['description'] = row[1] + else: + data['accession'] = row[0] + data['description'] = row[1] + data['variant'] = row[2] + data['version'] = row[3] + data['hgnc_symbol'] = row[4] + data['uta_symbol'] = row[5] + data['updated'] = row[6] + data['expiry'] = row[7] + return data + def update_transcript_info_record(self,accession, hdp): + # Search Entrez for corresponding record for the RefSeq ID + # Prime these entries, just in case. + previous_entry = self.in_entries(accession, 'transcript_info') + accession = accession + description = previous_entry['description'] + variant = previous_entry['variant'] + version = previous_entry['version'] + hgnc_symbol = previous_entry['hgnc_symbol'] + uta_symbol = previous_entry['uta_symbol'] + try: + record = entrez_efetch(self.val,db="nucleotide", id=accession, rettype="gb", retmode="text") + version = record.id + description = record.description + variant = '0' + + if 'transcript variant' in description: + tv = re.search('transcript variant \w+', description) + tv = str(tv.group(0)) + tv = tv.replace('transcript variant', '') + variant = tv.strip() + variant = variant.upper() # Some tv descriptions are a or A + else: + variant = '0' + + # Get information from UTA + try: + uta_info = hdp.get_tx_identity_info(version) + except: + version_ac_ver = version.split('.') + version = version_ac_ver[0] + '.' + str(int(version_ac_ver[1]) - 1) + uta_info = hdp.get_tx_identity_info(version) + + uta_symbol = str(uta_info[6]) + + # First perform a search against the input gene symbol or the symbol inferred from UTA + initial = hgnc_rest(path = "/fetch/symbol/" + uta_symbol) + # Check for a record + if str(initial['record']['response']['numFound']) != '0': + hgnc_symbol = uta_symbol + # No record found, is it a previous symbol? + else: + # Search hgnc rest to see if symbol is out of date + rest_data = hgnc_rest(path = "/search/prev_symbol/" + uta_symbol) + # If the name is correct no record will be found + if rest_data['error'] == 'false': + if int(rest_data['record']['response']['numFound']) == 0: + hgnc_symbol = uta_info[6] + else: + hgnc_symbol = rest_data['record']['response']['docs'][0]['symbol'] + else: + hgnc_symbol = 'unassigned' + + # List of connection error types. May need to be expanded. + # Outcome - Put off update for 3 months! + except Exception as e: + if not str(e) == '': + # Issues with DNSSEC for the nih.gov + raise + + # Query information + # query_info = [accession, description, variant, version, hgnc_symbol, uta_symbol] + query_info = [version, description, variant, version, hgnc_symbol, uta_symbol] + table='transcript_info' + + # Update the transcript_info table (needs plugging in) + returned_data = self.in_entries(version, table) + # If the entry is not in the database add it + if 'none' in returned_data: + self.insert.add_entry(version, query_info, table) + # If the data in the entry has changed, update it + else: + self.insert.update_entry(version, query_info, table) + return + + def update_refSeqGene_loci(self,rsg_data): + # First query the database + entry_exists = self.get.get_refSeqGene_data_by_refSeqGeneID(rsg_data[0], rsg_data[2]) + if entry_exists[0] == 'none': + self.insert.insert_refSeqGene_data(rsg_data) + else: + self.insert.update_refSeqGene_data(rsg_data) + def update_lrg_rs_lookup(self,lrg_rs_lookup): + # First query the database + rsgID = self.get.get_RefSeqGeneID_from_lrgID(lrg_rs_lookup[0]) + if rsgID == 'none': + self.insert.insert_RefSeqGeneID_from_lrgID(lrg_rs_lookup) + def update_lrgt_rst(self,lrgtx_to_rstID): + # First query the database + rstID = self.get.get_RefSeqTranscriptID_from_lrgTranscriptID(lrgtx_to_rstID[0]) + if rstID == 'none': + self.insert.insert_LRG_transcript_data(lrgtx_to_rstID) + def update_lrg_p_rs_p_lookup(self,lrg_p, rs_p): + # First query the database + rspID = self.get.get_RefSeqProteinID_from_lrgProteinID(lrg_p) + if rspID == 'none': + self.insert.insert_LRG_protein_data(lrg_p, rs_p) diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py new file mode 100644 index 00000000..37f3fc22 --- /dev/null +++ b/VariantValidator/modules/vvFunctions.py @@ -0,0 +1,75 @@ +from Bio import Entrez,SeqIO +import httplib2 as http +import json +from urlparse import urlparse #Python 2 +import functools +#from urllib.parse import urlparse #Python 3 + +def handleCursor(func): + #Decorator function for handling opening and closing cursors. + @functools.wraps(func) + def wrapper(self,*args,**kwargs): + try: + self.cursor = self.conn.cursor(buffered=True) + out=func(*args,**kwargs) + self.cursor.close() + self.cursor=None + return out + except: + try: + self.cursor.close() + self.cursor=None + except: + self.cursor=None + raise + return wrapper + +def entrez_efetch(val, db, id, rettype, retmode): + Entrez.email = val.entrezID + handle = Entrez.efetch(db=db, id=id, rettype=rettype, retmode=retmode) + record = SeqIO.read(handle, "gb") + handle.close() + return record + +def hgnc_rest(path): + data = { + 'record': '', + 'error': 'false' + } + # HGNC server + headers = { + 'Accept': 'application/json', + } + uri = 'http://rest.genenames.org' + target = urlparse(uri + path) + method = 'GET' + body = '' + h = http.Http() + # collect the response + response, content = h.request( + target.geturl(), + method, + body, + headers) + if response['status'] == '200': + # assume that content is a json reply + # parse content with the json module + data['record'] = json.loads(content) + else: + data['error'] = "Unable to contact the HGNC database: Please try again later" + return data + +# method for final validation and stringifying parsed hgvs variants prior to printing/passing to html +def valstr(hgvs_variant): + """ + Function to ensure the required number of reference bases are displayed in descriptions + """ + cp_hgvs_variant = copy.deepcopy(hgvs_variant) + if cp_hgvs_variant.posedit.edit.type == 'identity': + if len(cp_hgvs_variant.posedit.edit.ref) > 1: + cp_hgvs_variant = output_formatter.remove_reference(cp_hgvs_variant) + cp_hgvs_variant = str(cp_hgvs_variant) + else: + cp_hgvs_variant = output_formatter.remove_reference(cp_hgvs_variant) + cp_hgvs_variant = str(cp_hgvs_variant) + return cp_hgvs_variant diff --git a/VariantValidator/modules/vvLogging.py b/VariantValidator/modules/vvLogging.py new file mode 100644 index 00000000..6f66d7ff --- /dev/null +++ b/VariantValidator/modules/vvLogging.py @@ -0,0 +1,144 @@ + +import logging +import datetime +import os +from StringIO import StringIO + +VALIDATOR_DEBUG=os.environ.get('VALIDATOR_DEBUG') + +class logger(): + #Grand unified variant validator logging static class. + #logString=StringIO() + @staticmethod + def loggingSetup(): + # Set up logging + # I need to use the VVObfuscator in the logger global dictionary + # becuase it's a global variable tied to the logger module + # Modules are singletons, but their variables are not. Consequently + # this is the only sensible way to ensure that the logging setup is called + # once. If another programmer has any better ideas that leave these functions + # with a configured VV logger object that only has its handlers added once, + # feel free to fix it up. + #print("Entering setup") + #The logger must be at the very least drawn from the logging library's dictionary + #for every time this module is imported. + logger.logger = logging.getLogger("VV") + if "VVObfuscator" in logging.Logger.manager.loggerDict: + return + logging.getLogger("VVObfuscator") + #print("Engaging setup") + + global VALIDATOR_DEBUG + # Check envrionment variables + VALIDATOR_DEBUG=os.environ.get('VALIDATOR_DEBUG') + #print("VD",os.environ.get('VALIDATOR_DEBUG')) + + if VALIDATOR_DEBUG is None: + VALIDATOR_DEBUG = "info console" # Set default value + # Set logging urgency levels. + if "debug" in VALIDATOR_DEBUG: + logLevel = logging.DEBUG + elif "warning" in VALIDATOR_DEBUG: + logLevel = logging.WARNING + elif "info" in VALIDATOR_DEBUG: + logLevel = logging.INFO + elif "error" in VALIDATOR_DEBUG: + logLevel = logging.ERROR + elif "critical" in VALIDATOR_DEBUG: + logLevel = logging.CRITICAL + + if "file" in VALIDATOR_DEBUG: + logFileHandler = logging.FileHandler("VV-log.txt") + logFileHandler.setLevel(logLevel) + logger.logger.addHandler(logFileHandler) + if "console" in VALIDATOR_DEBUG: + logConsoleHandler = logging.StreamHandler() + logConsoleHandler.setLevel(logLevel) + logger.logger.addHandler(logConsoleHandler) + # Create a log string to add to validations. + # Since it has to survive multiple imports, I'm stuffing it into the logger dictionary. + # Feel free to amend this coding monstrosity without my knowledge. + logging.Logger.manager.loggerDict["VVLogString"]=StringIO() + logStringHandler = logging.StreamHandler(logging.Logger.manager.loggerDict["VVLogString"]) + # We want the validation metadata to not contain debug info which may change with program operation + logStringHandler.setLevel(logging.INFO) + logger.logger.addHandler(logStringHandler) + logger.logger.setLevel(logging.DEBUG) # The logger itself must be set with an appropriate level of urgency. + + logger.logger.propagate = False + @staticmethod + def debug(s): + logger.loggingSetup() + logger.logger.debug("DEBUG: "+s) + @staticmethod + def info(s): + logger.loggingSetup() + logger.logger.info("INFO : "+s) + @staticmethod + def warning(s): + logger.loggingSetup() + logger.logger.warning("WARN : "+s) + @staticmethod + def error(s): + logger.loggingSetup() + logger.logger.error("ERROR: "+s) + @staticmethod + def critical(s): + logger.loggingSetup() + logger.logger.critical("CRIT : "+s) + @staticmethod + def trace(s,v=None): + #v should be a dictionary with a 'timing' key. + #global VALIDATOR_DEBUG + #print(VALIDATOR_DEBUG) + #if "trace" in VALIDATOR_DEBUG: + # logger.loggingSetup() + if not v: + logger.logger.debug("TRACE: "+s) + else: + logger.logger.debug("TRACE: "+s) + v['timing']['traceLabels'].append(s) + v['timing']['traceTimes'].append(str((datetime.datetime.now()-v['timing']['checkDT']).microseconds//1000)) + v['timing']['checkDT']=datetime.datetime.now() + @staticmethod + def resub(s): + #Resubmit one or multiple variants + logger.loggingSetup() + logger.logger.warning("RESUB: "+s) + @staticmethod + def getString(): + logger.loggingSetup() + #print("RETURNING:") + #print(logging.Logger.manager.loggerDict["VVLogString"].getvalue()) + return logging.Logger.manager.loggerDict["VVLogString"].getvalue() + @staticmethod + def traceStart(v): + logger.loggingSetup() +# global VALIDATOR_DEBUG +# if "trace" in VALIDATOR_DEBUG: + if True: + v['timing']={} + v['timing']['traceLabels']=[] + v['timing']['traceTimes']=[] + v['timing']['startDT']=datetime.datetime.now() + v['timing']['checkDT']=datetime.datetime.now() + @staticmethod + def traceEnd(v): + logger.loggingSetup() + #global VALIDATOR_DEBUG + #if "trace" in VALIDATOR_DEBUG: + if True: + v['timing']['traceLabels'].append("complete") + v['timing']['traceTimes'].append((datetime.datetime.now()-v['timing']['startDT']).microseconds//1000) + del v['timing']['startDT'] + del v['timing']['checkDT'] + +#Test +#logger.debug("Message D") +#logger.info("Message I") +#logger.warning("Message W") +#logger.error("Message E") +#logger.critical("Message C")# + +#print("TEST "+logString.getvalue()) + diff --git a/VariantValidator/modules/vvObjects.py b/VariantValidator/modules/vvObjects.py index a27365f0..70ba2c2c 100644 --- a/VariantValidator/modules/vvObjects.py +++ b/VariantValidator/modules/vvObjects.py @@ -1,69 +1,147 @@ import os from configparser import ConfigParser,RawConfigParser -import io +import hgvs +import hgvs.parser +import hgvs.dataproviders.uta +import hgvs.dataproviders.seqfetcher +import hgvs.assemblymapper +import hgvs.variantmapper +import hgvs.sequencevariant +import hgvs.validator +import hgvs.exceptions +import hgvs.location +import hgvs.posedit +import hgvs.edit +import hgvs.normalizer +import re +#import io +from vvDatabase import vvDatabase +from vvLogging import logger + +# Custom Exceptions +class variantValidatorError(Exception): + pass + +''' +This file contains the validator object, which is instantiated in order to perform validator functions. +The validator contains configuration information and permanent copies of database links and the like. +Much of the validator's inner workings are stored in special one-off function container objects: +validator.db : The validator's MySQL database access functions + +The validator configuration is stored in ~/.config/VariantValidator/config.ini . This is loaded +when the validator object is initialized. + +Running variant validator should hopefully be as simple as writing a script like this: +import VariantValidator + +val=Validator() +val.validate("some kind of gene situation","the transcripts to use") + +''' + +''' + Renaming of variables : + 'seqrepo_directory': HGVS_SEQREPO_DIR, #self.seqrepoPath + 'uta_url': UTA_DB_URL, #self.utaPath + 'py_liftover_directory': PYLIFTOVER_DIR, #self.liftoverPath + 'variantvalidator_data_url': VALIDATOR_DB_URL, #self.db.path + 'entrez_id': ENTREZ_ID, #self.entrezID + 'variantvalidator_version': VERSION, #self.version + 'variantvalidator_hgvs_version': hgvs_version, #self.hgvsVersion + 'uta_schema': str(hdp.data_version()), #self.uta_schema + 'seqrepo_db': HGVS_SEQREPO_DIR.split('/')[-1] #self.seqrepoVersion +''' + -# uta_current_version='uta_20180821' -# UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version -# seqrepo_current_version='2018-08-21' -# HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version class Validator(): - #This object contains configuration options. - def __init__(self,hgvsPath=None,utaPath=None): - #First load from the configuration file, if it exists. + # This object contains configuration options. + def __init__(self): + # First load from the configuration file, if it exists. configName="config.ini" homePath=os.path.expanduser("~") configPath=os.path.join(homePath,".config","VariantValidator") if not os.path.isdir(configPath): os.makedirs(configPath) - #Now configpath points to the config file itself. + # Now configpath points to the config file itself. configPath=os.path.join(configPath,configName) - #Does the file exist? + # Does the file exist? if not os.path.exists(configPath): self.createConfig(configPath) - #Load the configuration file. + # Load the configuration file. with open(configPath) as file: lines=file.read() config=RawConfigParser(allow_no_value=True) - #print(configPath) config.read(configPath) - #print config.sections() - print config["seqrepo"]["location"] - ''' - #Load hgvs - if hgvsPath!=None: - os.environ['HGVS_SEQREPO_DIR']=hgvsPath - self.hgvsPath=hgvsPath + # The custom vvLogging module will set itself up using the VALDIATOR_DEBUG environment variable. + logString = config["logging"]['string'] + os.environ["VALIDATOR_DEBUG"] = logString + + # Handle databases + self.entrezID=config["EntrezID"]["entrezID"] + if config["seqrepo"]["location"]!=None: + self.seqrepoVersion=config["seqrepo"]["version"] + self.seqrepoPath=config["seqrepo"]["location"]+self.seqrepoVersion + os.environ['HGVS_SEQREPO_DIR']=self.seqrepoPath else: - self.hgvsPath=hgvsPath - if utaPath!=None: - os.environ['UTA_DB_URL']=utaPath - self.utaPath=utaPath + raise ValueError("The seqrepo location has not been set in ~/.config/VariantValidator/config.ini") + os.environ['UTA_DB_URL']=config["uta"]["location"]+config["uta"]["version"] + self.utaPath=config["uta"]["location"]+config["uta"]["version"] + self.dbConfig = { + 'user': config["mysql"]["user"], + 'password':config["mysql"]["password"], + 'host': config["mysql"]["host"], + 'database':config["mysql"]["database"], + 'raise_on_warnings': True + } + self.db=vvDatabase(self,self.dbConfig) + # Set up versions + __version__ = config["variantValidator"]['version'] + self.version=__version__ + if re.match('^\d+\.\d+\.\d+$', __version__) is not None: + self.releasedVersion=True + _is_released_version = True else: - self.utaPath=utaPath - seqrepo_current_version='2018-08-21' - HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version - #HGVS_SEQREPO_DIR='/local/seqrepo/'+seqrepo_current_version - os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR - uta_current_version='uta_20180821' - UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version - #export postgresql://uta_admin:uta_admin@127.0.0.1/uta/uta_20180821 - os.environ['UTA_DB_URL']=UTA_DB_URL - #from VariantValidator import variantValidator as vv - - ''' - + self.releasedVersion=False + self.hgvsVersion=hgvs.__version__ + # Set up other configuration variables + self.liftoverPath=config["liftover"]["location"] + if not self.liftoverPath==None: + os.environ['PYLIFTOVER_DIR']=self.liftoverPath + self.entrezID=config["EntrezID"]['entrezid'] + # Set up HGVS + # Configure hgvs package global settings + hgvs.global_config.uta.pool_max = 25 + hgvs.global_config.formatting.max_ref_length = 1000000 + # Create HGVS objects + self.hdp = hgvs.dataproviders.uta.connect(pooling=True) + self.hp = hgvs.parser.Parser() #P arser + self.vr = hgvs.validator.Validator(self.hdp) # Validator + self.vm = hgvs.variantmapper.VariantMapper(self.hdp) # Variant mapper + # Create a lose vm instance + self.lose_vm = hgvs.variantmapper.VariantMapper(self.hdp, + replace_reference=True, + prevalidation_level=None + ) + self.nr_vm = hgvs.variantmapper.VariantMapper(self.hdp, replace_reference=False) + self.sf = hgvs.dataproviders.seqfetcher.SeqFetcher() # Seqfetcher + # Set standard genome builds + self.genome_builds = ['GRCh37', 'hg19', 'GRCh38'] + self.uta_schema = str(self.hdp.data_version()) + #Transfer function handle from other file. + self.validate=vvCore.validate def validate(self): pass def createConfig(self,outPath): - #This function reads from the default configuration file stored in the same folder as this module. - #Outpath should include a filename. + # This function reads from the default configuration file stored in the same folder as this module, + # and transfers it to outPath. + # Outpath should include a filename. lines=[] inPath=os.path.join(os.path.dirname(os.path.realpath(__file__)),"defaultConfig.ini") # print(os.path.join(inPath,"defaultConfig.ini")) From c2bc0c84cd911b87e8a7fc55443faba26eed4d97 Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 14 Jan 2019 17:10:44 +0000 Subject: [PATCH 004/223] don som mo --- VariantValidator/modules/vvChromasomes.py | 2846 +++++++ VariantValidator/modules/vvCore.py | 8267 +++++++++++++++++++++ VariantValidator/modules/vvDBGet.py | 30 + VariantValidator/modules/vvDatabase.py | 386 +- VariantValidator/modules/vvFunctions.py | 20 +- VariantValidator/modules/vvObjects.py | 1 + 6 files changed, 11544 insertions(+), 6 deletions(-) create mode 100644 VariantValidator/modules/vvChromasomes.py create mode 100644 VariantValidator/modules/vvCore.py diff --git a/VariantValidator/modules/vvChromasomes.py b/VariantValidator/modules/vvChromasomes.py new file mode 100644 index 00000000..d7a49d5d --- /dev/null +++ b/VariantValidator/modules/vvChromasomes.py @@ -0,0 +1,2846 @@ + +""" +vvChromasomes.py + +Contains the necessary functions for matching RefSeq accessions with chromosome numbers +or alt loci names based on genome builds +""" + + +def supported_for_mapping(ac, primary_assembly): + sfm = 'false' + test_grc = to_chr_num_refseq(ac, primary_assembly) + if test_grc is not None: + sfm = 'true' + test_ucsc = to_chr_num_ucsc(ac, primary_assembly) + if test_ucsc is not None: + sfm = 'true' + return sfm + + +def to_accession(chr_num, primary_assembly): + # Available genome builds + GRCh37 = { + "1": "NC_000001.10", + "2": "NC_000002.11", + "3": "NC_000003.11", + "4": "NC_000004.11", + "5": "NC_000005.9", + "6": "NC_000006.11", + "7": "NC_000007.13", + "8": "NC_000008.10", + "9": "NC_000009.11", + "10": "NC_000010.10", + "11": "NC_000011.9", + "12": "NC_000012.11", + "13": "NC_000013.10", + "14": "NC_000014.8", + "15": "NC_000015.9", + "16": "NC_000016.9", + "17": "NC_000017.10", + "18": "NC_000018.9", + "19": "NC_000019.9", + "20": "NC_000020.10", + "21": "NC_000021.8", + "22": "NC_000022.10", + "23": "NC_000023.10", + "24": "NC_000024.9", + "x": "NC_000023.10", + "y": "NC_000024.9", + "X": "NC_000023.10", + "Y": "NC_000024.9", + "M": "NC_012920.1", + "m": "NC_012920.1", + + # UCSC alts + "11_GL000202_RANDOM": "NT_113921.2", + "17_CTG5_HAP1": "NT_167251.1", + "17_GL000203_RANDOM": "NT_113941.1", + "17_GL000204_RANDOM": "NT_113943.1", + "17_GL000205_RANDOM": "NT_113930.1", + "17_GL000206_RANDOM": "NT_113945.1", + "18_GL000207_RANDOM": "NT_113947.1", + "19_GL000208_RANDOM": "NT_113948.1", + "19_GL000209_RANDOM": "NT_113949.1", + "1_GL000191_RANDOM": "NT_113878.1", + "1_GL000192_RANDOM": "NT_167207.1", + "21_GL000210_RANDOM": "NT_113950.2", + "4_CTG9_HAP1": "NT_167250.1", + "4_GL000193_RANDOM": "NT_113885.1", + "4_GL000194_RANDOM": "NT_113888.1", + "6_APD_HAP1": "NT_167244.1", + "6_COX_HAP2": "NT_113891.2", + "6_DBB_HAP3": "NT_167245.1", + "6_MANN_HAP4": "NT_167246.1", + "6_MCF_HAP5": "NT_167247.1", + "6_QBL_HAP6": "NT_167248.1", + "6_SSTO_HAP7": "NT_167249.1", + "7_GL000195_RANDOM": "NT_113901.1", + "8_GL000196_RANDOM": "NT_113909.1", + "8_GL000197_RANDOM": "NT_113907.1", + "9_GL000198_RANDOM": "NT_113914.1", + "9_GL000199_RANDOM": "NT_113916.2", + "9_GL000200_RANDOM": "NT_113915.1", + "9_GL000201_RANDOM": "NT_113911.1", + "UN_GL000211": "NT_113961.1", + "UN_GL000212": "NT_113923.1", + "UN_GL000213": "NT_167208.1", + "UN_GL000214": "NT_167209.1", + "UN_GL000215": "NT_167210.1", + "UN_GL000216": "NT_167211.1", + "UN_GL000217": "NT_167212.1", + "UN_GL000218": "NT_113889.1", + "UN_GL000219": "NT_167213.1", + "UN_GL000220": "NT_167214.1", + "UN_GL000221": "NT_167215.1", + "UN_GL000222": "NT_167216.1", + "UN_GL000223": "NT_167217.1", + "UN_GL000224": "NT_167218.1", + "UN_GL000225": "NT_167219.1", + "UN_GL000226": "NT_167220.1", + "UN_GL000227": "NT_167221.1", + "UN_GL000228": "NT_167222.1", + "UN_GL000229": "NT_167223.1", + "UN_GL000230": "NT_167224.1", + "UN_GL000231": "NT_167225.1", + "UN_GL000232": "NT_167226.1", + "UN_GL000233": "NT_167227.1", + "UN_GL000234": "NT_167228.1", + "UN_GL000235": "NT_167229.1", + "UN_GL000236": "NT_167230.1", + "UN_GL000237": "NT_167231.1", + "UN_GL000238": "NT_167232.1", + "UN_GL000239": "NT_167233.1", + "UN_GL000240": "NT_167234.1", + "UN_GL000241": "NT_167235.1", + "UN_GL000242": "NT_167236.1", + "UN_GL000243": "NT_167237.1", + "UN_GL000244": "NT_167238.1", + "UN_GL000245": "NT_167239.1", + "UN_GL000246": "NT_167240.1", + "UN_GL000247": "NT_167241.1", + "UN_GL000248": "NT_167242.1", + "UN_GL000249": "NT_167243.1", + + # GRC Alts + 'HG1472_PATCH': 'NW_004070864.2', + 'HG989_PATCH': 'NW_003571030.1', + 'HG1292_PATCH': 'NW_003871056.3', + 'HG1287_PATCH': 'NW_003871055.3', + 'HSCHR1_1_CTG31': 'NW_003315905.1', + 'HSCHR1_2_CTG31': 'NW_003315906.1', + 'HSCHR1_3_CTG31': 'NW_003315907.1', + 'HG1471_PATCH': 'NW_004070863.1', + 'HG1293_PATCH': 'NW_003871057.1', + 'HG1473_PATCH': 'NW_004070865.1', + 'HG999_1_PATCH': 'NW_003315903.1', + 'HG999_2_PATCH': 'NW_003315904.1', + 'HSCHR2_1_CTG1': 'NW_003315908.1', + 'HG953_PATCH': 'NW_004504299.1', + 'HG686_PATCH': 'NW_003571032.1', + 'HSCHR2_2_CTG12': 'NW_003571033.2', + 'HSCHR2_1_CTG12': 'NW_003315909.1', + 'HG1007_PATCH': 'NW_003571031.1', + 'HSCHR3_1_CTG1': 'NW_003871060.1', + 'HG325_PATCH': 'NW_003871059.1', + 'HG186_PATCH': 'NW_003315910.1', + 'HG957_PATCH': 'NW_004775426.1', + 'HG280_PATCH': 'NW_003315911.1', + 'HG1091_PATCH': 'NW_003871058.1', + 'HG991_PATCH': 'NW_003315912.1', + 'HSCHR3_1_CTG2_1': 'NW_003315913.1', + 'HG174_HG254_PATCH': 'NW_004775427.1', + 'HSCHR4_1_CTG6': 'NW_003315915.1', + 'HSCHR4_2_CTG9': 'NW_003315916.1', + 'HG706_PATCH': 'NW_003571035.1', + 'HSCHR4_1_CTG12': 'NW_003315914.1', + 'HG1032_PATCH': 'NW_003571034.1', + 'HSCHR5_2_CTG1': 'NW_003315920.1', + 'HSCHR5_3_CTG1': 'NW_003571036.1', + 'HSCHR5_1_CTG1': 'NW_003315917.2', + 'HSCHR5_1_CTG2': 'NW_003315918.1', + 'HG1063_PATCH': 'NW_003871061.1', + 'HG1082_HG167_PATCH': 'NW_004775428.1', + 'HSCHR5_1_CTG5': 'NW_003315919.1', + 'HG27_PATCH': 'NW_004070866.1', + 'HG1322_PATCH': 'NW_003871063.1', + 'HSCHR6_1_CTG5': 'NW_003315921.1', + 'HG357_PATCH': 'NW_004504300.1', + 'HG1304_PATCH': 'NW_003871062.1', + 'HG193_PATCH': 'NW_004775429.1', + 'HSCHR6_2_CTG5': 'NW_004166862.1', + 'HG736_PATCH': 'NW_003571039.1', + 'HG14_PATCH': 'NW_003571038.1', + 'HG444_PATCH': 'NW_004775430.1', + 'HG1257_PATCH': 'NW_003871064.1', + 'HG946_PATCH': 'NW_003571041.1', + 'HG115_PATCH': 'NW_003571037.1', + 'HG1308_PATCH': 'NW_003871065.1', + 'HSCHR7_1_CTG6': 'NW_003315922.2', + 'HG7_PATCH': 'NW_003571040.1', + 'HG19_PATCH': 'NW_003571042.1', + 'HG1699_PATCH': 'NW_004775431.1', + 'HG418_PATCH': 'NW_003871066.2', + 'HG104_HG975_PATCH': 'NW_003315923.1', + 'HG243_PATCH': 'NW_003315924.1', + 'HSCHR9_1_CTG1': 'NW_003315928.1', + 'HG962_PATCH': 'NW_003871067.1', + 'HSCHR9_1_CTG35': 'NW_003315929.1', + 'HSCHR9_2_CTG35': 'NW_003315930.1', + 'HSCHR9_3_CTG35': 'NW_003315931.1', + 'HG50_PATCH': 'NW_004504301.1', + 'HG1502_PATCH': 'NW_004070869.1', + 'HG79_PATCH': 'NW_003315925.1', + 'HG1500_PATCH': 'NW_004070867.1', + 'HG1501_PATCH': 'NW_004070868.1', + 'HG998_1_PATCH': 'NW_003315926.1', + 'HG998_2_PATCH': 'NW_003315927.1', + 'HG905_PATCH': 'NW_003571043.1', + 'HG871_PATCH': 'NW_003871071.1', + 'HG544_PATCH': 'NW_003315932.1', + 'HSCHR10_1_CTG2': 'NW_003315934.1', + 'HSCHR10_1_CTG5': 'NW_003315935.1', + 'HG1211_PATCH': 'NW_003871068.1', + 'HG1074_PATCH': 'NW_004504302.1', + 'HG339_PATCH': 'NW_003871070.1', + 'HG979_PATCH': 'NW_004775432.1', + 'HG311_PATCH': 'NW_003871069.1', + 'HG995_PATCH': 'NW_003315933.1', + 'HG1479_PATCH': 'NW_004070870.1', + 'HG256_PATCH': 'NW_003871075.1', + 'HG873_PATCH': 'NW_003871082.1', + 'HSCHR11_1_CTG1_1': 'NW_003315936.1', + 'HG281_PATCH': 'NW_003571045.1', + 'HG142_HG150_NOVEL_TEST': 'NW_003871073.1', + 'HG151_NOVEL_TEST': 'NW_003871074.1', + 'HG536_PATCH': 'NW_003571046.1', + 'HG865_PATCH': 'NW_004070871.1', + 'HG414_PATCH': 'NW_003871081.1', + 'HG348_PATCH': 'NW_003871079.1', + 'HG305_PATCH': 'NW_003871077.1', + 'HG388_HG400_PATCH': 'NW_003871080.1', + 'HG306_PATCH': 'NW_003871078.1', + 'HG122_PATCH': 'NW_003871072.2', + 'HG299_PATCH': 'NW_003871076.1', + 'HG858_PATCH': 'NW_003571048.1', + 'HSCHR12_1_CTG1': 'NW_003571049.1', + 'HG344_PATCH': 'NW_003871083.2', + 'HG1133_PATCH': 'NW_003571047.1', + 'HSCHR12_2_CTG2': 'NW_003571050.1', + 'HSCHR12_1_CTG2': 'NW_003315938.1', + 'HSCHR12_1_CTG2_1': 'NW_003315939.1', + 'HSCHR12_2_CTG2_1': 'NW_003315941.1', + 'HSCHR12_3_CTG2_1': 'NW_003315942.2', + 'HG1595_PATCH': 'NW_004504303.2', + 'HSCHR12_1_CTG5': 'NW_003315940.1', + 'HG996_PATCH': 'NW_003315937.1', + 'HG531_PATCH': 'NW_003571051.1', + 'HG1592_PATCH': 'NW_004166863.1', + 'HSCHR15_1_CTG4': 'NW_003315943.1', + 'HSCHR15_1_CTG8': 'NW_003315944.1', + 'HG971_PATCH': 'NW_003871084.1', + 'HSCHR16_1_CTG3_1': 'NW_003315945.1', + 'HG1208_PATCH': 'NW_003871085.1', + 'HSCHR16_2_CTG3_1': 'NW_003315946.1', + 'HG417_PATCH': 'NW_004070872.2', + 'HSCHR17_1_CTG1': 'NW_003315952.2', + 'HG990_PATCH': 'NW_003315951.1', + 'HG987_PATCH': 'NW_003315950.2', + 'HG1591_PATCH': 'NW_004775433.1', + 'HG883_PATCH': 'NW_003871090.1', + 'HG385_PATCH': 'NW_004166864.2', + 'HG75_PATCH': 'NW_003315949.1', + 'HG745_PATCH': 'NW_003315948.2', + 'HSCHR17_4_CTG4': 'NW_003871091.1', + 'HSCHR17_6_CTG4': 'NW_003871093.1', + 'HSCHR17_5_CTG4': 'NW_003871092.1', + 'HSCHR17_1_CTG4': 'NW_003315953.1', + 'HG185_PATCH': 'NW_003571052.1', + 'HG1146_PATCH': 'NW_003871086.1', + 'HG183_PATCH': 'NW_003315947.1', + 'HG747_PATCH': 'NW_003871088.1', + 'HSCHR17_2_CTG4': 'NW_003315954.1', + 'HSCHR17_3_CTG4': 'NW_003315955.1', + 'HG748_PATCH': 'NW_003871089.1', + 'HG271_PATCH': 'NW_003871087.1', + 'HSCHR18_1_CTG1_1': 'NW_003315956.1', + 'HSCHR18_2_CTG1_1': 'NW_003315959.1', + 'HSCHR18_2_CTG2': 'NW_003315960.1', + 'HSCHR18_1_CTG2': 'NW_003315957.1', + 'HSCHR18_1_CTG2_1': 'NW_003315958.1', + 'HSCHR18_2_CTG2_1': 'NW_003315961.1', + 'HG729_PATCH': 'NW_003871094.1', + 'HG730_PATCH': 'NW_003571053.2', + 'HSCHR19_1_CTG3': 'NW_003315962.1', + 'HSCHR19_2_CTG3': 'NW_003315964.2', + 'HSCHR19_3_CTG3': 'NW_003315965.1', + 'HSCHR19_1_CTG3_1': 'NW_003315963.1', + 'HG1350_HG959_PATCH': 'NW_004775434.1', + 'HG1079_PATCH': 'NW_004166865.1', + 'HSCHR19LRC_COX1_CTG1': 'NW_003571054.1', + 'HSCHR19LRC_COX2_CTG1': 'NW_003571055.1', + 'HSCHR19LRC_LRC_I_CTG1': 'NW_003571056.1', + 'HSCHR19LRC_LRC_J_CTG1': 'NW_003571057.1', + 'HSCHR19LRC_LRC_S_CTG1': 'NW_003571058.1', + 'HSCHR19LRC_LRC_T_CTG1': 'NW_003571059.1', + 'HSCHR19LRC_PGF1_CTG1': 'NW_003571060.1', + 'HSCHR19LRC_PGF2_CTG1': 'NW_003571061.1', + 'HSCHR20_1_CTG1': 'NW_003315966.1', + 'HG144_PATCH': 'NW_003871095.1', + 'HG944_PATCH': 'NW_004504304.1', + 'HG506_HG507_HG1000_PATCH': 'NW_003571063.2', + 'HSCHR21_1_CTG1_1': 'NW_003315967.1', + 'HSCHR21_2_CTG1_1': 'NW_003315968.1', + 'HSCHR21_3_CTG1_1': 'NW_003315969.1', + 'HSCHR21_4_CTG1_1': 'NW_003315970.1', + 'HG237_PATCH': 'NW_004775435.1', + 'HG1487_PATCH': 'NW_004070874.1', + 'HG1486_PATCH': 'NW_004070873.1', + 'HG1488_PATCH': 'NW_004070875.1', + 'HG329_PATCH': 'NW_003871096.1', + 'HSCHR22_1_CTG2': 'NW_003315972.1', + 'HSCHR22_1_CTG1': 'NW_003315971.2', + 'HSCHR22_2_CTG1': 'NW_004504305.1', + 'HG497_PATCH': 'NW_004070876.1', + 'HG480_HG481_PATCH': 'NW_003571064.2', + 'HG1423_PATCH': 'NW_003871098.1', + 'HG1424_PATCH': 'NW_003871099.1', + 'HG1435_PATCH': 'NW_004070879.1', + 'HG29_PATCH': 'NW_004166866.1', + 'HG1436_HG1432_PATCH': 'NW_004070880.2', + 'HG1433_PATCH': 'NW_004070877.1', + 'HG1437_PATCH': 'NW_004070881.1', + 'HG1438_PATCH': 'NW_004070882.1', + 'HG1425_PATCH': 'NW_003871100.1', + 'HG1426_PATCH': 'NW_003871101.3', + 'HG1439_PATCH': 'NW_004070883.1', + 'HG1440_PATCH': 'NW_004070884.1', + 'HG1441_PATCH': 'NW_004070885.1', + 'HG375_PATCH': 'NW_003871102.1', + 'HG1434_PATCH': 'NW_004070878.1', + 'HG1462_PATCH': 'NW_004070891.1', + 'HG1463_PATCH': 'NW_004070892.1', + 'HG1490_PATCH': 'NW_004070893.1', + 'HG1442_PATCH': 'NW_004070886.1', + 'HG1443_HG1444_PATCH': 'NW_004070887.1', + 'HG1453_PATCH': 'NW_004070888.1', + 'HG1458_PATCH': 'NW_004070889.1', + 'HG1459_PATCH': 'NW_004070890.2', + 'HG1497_PATCH': 'NW_003871103.3', + 'HSCHR6_MHC_APD_CTG1': 'NT_167244.1', + 'HSCHR6_MHC_COX_CTG1': 'NT_113891.2', + 'HSCHR6_MHC_DBB_CTG1': 'NT_167245.1', + 'HSCHR6_MHC_MANN_CTG1': 'NT_167246.1', + 'HSCHR6_MHC_MCF_CTG1': 'NT_167247.1', + 'HSCHR6_MHC_QBL_CTG1': 'NT_167248.1', + 'HSCHR6_MHC_SSTO_CTG1': 'NT_167249.1', + 'HSCHR4_1_CTG9': 'NT_167250.1', + 'HSCHR17_1_CTG5': 'NT_167251.1' + } + + hg19 = { + "1": "NC_000001.10", + "2": "NC_000002.11", + "3": "NC_000003.11", + "4": "NC_000004.11", + "5": "NC_000005.9", + "6": "NC_000006.11", + "7": "NC_000007.13", + "8": "NC_000008.10", + "9": "NC_000009.11", + "10": "NC_000010.10", + "11": "NC_000011.9", + "12": "NC_000012.11", + "13": "NC_000013.10", + "14": "NC_000014.8", + "15": "NC_000015.9", + "16": "NC_000016.9", + "17": "NC_000017.10", + "18": "NC_000018.9", + "19": "NC_000019.9", + "20": "NC_000020.10", + "21": "NC_000021.8", + "22": "NC_000022.10", + "23": "NC_000023.10", + "24": "NC_000024.9", + "x": "NC_000023.10", + "y": "NC_000024.9", + "X": "NC_000023.10", + "Y": "NC_000024.9", + "M": "NC_001807.4", + "m": "NC_001807.4", + + # UCSC alts + "11_GL000202_RANDOM": "NT_113921.2", + "17_CTG5_HAP1": "NT_167251.1", + "17_GL000203_RANDOM": "NT_113941.1", + "17_GL000204_RANDOM": "NT_113943.1", + "17_GL000205_RANDOM": "NT_113930.1", + "17_GL000206_RANDOM": "NT_113945.1", + "18_GL000207_RANDOM": "NT_113947.1", + "19_GL000208_RANDOM": "NT_113948.1", + "19_GL000209_RANDOM": "NT_113949.1", + "1_GL000191_RANDOM": "NT_113878.1", + "1_GL000192_RANDOM": "NT_167207.1", + "21_GL000210_RANDOM": "NT_113950.2", + "4_CTG9_HAP1": "NT_167250.1", + "4_GL000193_RANDOM": "NT_113885.1", + "4_GL000194_RANDOM": "NT_113888.1", + "6_APD_HAP1": "NT_167244.1", + "6_COX_HAP2": "NT_113891.2", + "6_DBB_HAP3": "NT_167245.1", + "6_MANN_HAP4": "NT_167246.1", + "6_MCF_HAP5": "NT_167247.1", + "6_QBL_HAP6": "NT_167248.1", + "6_SSTO_HAP7": "NT_167249.1", + "7_GL000195_RANDOM": "NT_113901.1", + "8_GL000196_RANDOM": "NT_113909.1", + "8_GL000197_RANDOM": "NT_113907.1", + "9_GL000198_RANDOM": "NT_113914.1", + "9_GL000199_RANDOM": "NT_113916.2", + "9_GL000200_RANDOM": "NT_113915.1", + "9_GL000201_RANDOM": "NT_113911.1", + "UN_GL000211": "NT_113961.1", + "UN_GL000212": "NT_113923.1", + "UN_GL000213": "NT_167208.1", + "UN_GL000214": "NT_167209.1", + "UN_GL000215": "NT_167210.1", + "UN_GL000216": "NT_167211.1", + "UN_GL000217": "NT_167212.1", + "UN_GL000218": "NT_113889.1", + "UN_GL000219": "NT_167213.1", + "UN_GL000220": "NT_167214.1", + "UN_GL000221": "NT_167215.1", + "UN_GL000222": "NT_167216.1", + "UN_GL000223": "NT_167217.1", + "UN_GL000224": "NT_167218.1", + "UN_GL000225": "NT_167219.1", + "UN_GL000226": "NT_167220.1", + "UN_GL000227": "NT_167221.1", + "UN_GL000228": "NT_167222.1", + "UN_GL000229": "NT_167223.1", + "UN_GL000230": "NT_167224.1", + "UN_GL000231": "NT_167225.1", + "UN_GL000232": "NT_167226.1", + "UN_GL000233": "NT_167227.1", + "UN_GL000234": "NT_167228.1", + "UN_GL000235": "NT_167229.1", + "UN_GL000236": "NT_167230.1", + "UN_GL000237": "NT_167231.1", + "UN_GL000238": "NT_167232.1", + "UN_GL000239": "NT_167233.1", + "UN_GL000240": "NT_167234.1", + "UN_GL000241": "NT_167235.1", + "UN_GL000242": "NT_167236.1", + "UN_GL000243": "NT_167237.1", + "UN_GL000244": "NT_167238.1", + "UN_GL000245": "NT_167239.1", + "UN_GL000246": "NT_167240.1", + "UN_GL000247": "NT_167241.1", + "UN_GL000248": "NT_167242.1", + "UN_GL000249": "NT_167243.1", + + # GRC Alts + 'HG1472_PATCH': 'NW_004070864.2', + 'HG989_PATCH': 'NW_003571030.1', + 'HG1292_PATCH': 'NW_003871056.3', + 'HG1287_PATCH': 'NW_003871055.3', + 'HSCHR1_1_CTG31': 'NW_003315905.1', + 'HSCHR1_2_CTG31': 'NW_003315906.1', + 'HSCHR1_3_CTG31': 'NW_003315907.1', + 'HG1471_PATCH': 'NW_004070863.1', + 'HG1293_PATCH': 'NW_003871057.1', + 'HG1473_PATCH': 'NW_004070865.1', + 'HG999_1_PATCH': 'NW_003315903.1', + 'HG999_2_PATCH': 'NW_003315904.1', + 'HSCHR2_1_CTG1': 'NW_003315908.1', + 'HG953_PATCH': 'NW_004504299.1', + 'HG686_PATCH': 'NW_003571032.1', + 'HSCHR2_2_CTG12': 'NW_003571033.2', + 'HSCHR2_1_CTG12': 'NW_003315909.1', + 'HG1007_PATCH': 'NW_003571031.1', + 'HSCHR3_1_CTG1': 'NW_003871060.1', + 'HG325_PATCH': 'NW_003871059.1', + 'HG186_PATCH': 'NW_003315910.1', + 'HG957_PATCH': 'NW_004775426.1', + 'HG280_PATCH': 'NW_003315911.1', + 'HG1091_PATCH': 'NW_003871058.1', + 'HG991_PATCH': 'NW_003315912.1', + 'HSCHR3_1_CTG2_1': 'NW_003315913.1', + 'HG174_HG254_PATCH': 'NW_004775427.1', + 'HSCHR4_1_CTG6': 'NW_003315915.1', + 'HSCHR4_2_CTG9': 'NW_003315916.1', + 'HG706_PATCH': 'NW_003571035.1', + 'HSCHR4_1_CTG12': 'NW_003315914.1', + 'HG1032_PATCH': 'NW_003571034.1', + 'HSCHR5_2_CTG1': 'NW_003315920.1', + 'HSCHR5_3_CTG1': 'NW_003571036.1', + 'HSCHR5_1_CTG1': 'NW_003315917.2', + 'HSCHR5_1_CTG2': 'NW_003315918.1', + 'HG1063_PATCH': 'NW_003871061.1', + 'HG1082_HG167_PATCH': 'NW_004775428.1', + 'HSCHR5_1_CTG5': 'NW_003315919.1', + 'HG27_PATCH': 'NW_004070866.1', + 'HG1322_PATCH': 'NW_003871063.1', + 'HSCHR6_1_CTG5': 'NW_003315921.1', + 'HG357_PATCH': 'NW_004504300.1', + 'HG1304_PATCH': 'NW_003871062.1', + 'HG193_PATCH': 'NW_004775429.1', + 'HSCHR6_2_CTG5': 'NW_004166862.1', + 'HG736_PATCH': 'NW_003571039.1', + 'HG14_PATCH': 'NW_003571038.1', + 'HG444_PATCH': 'NW_004775430.1', + 'HG1257_PATCH': 'NW_003871064.1', + 'HG946_PATCH': 'NW_003571041.1', + 'HG115_PATCH': 'NW_003571037.1', + 'HG1308_PATCH': 'NW_003871065.1', + 'HSCHR7_1_CTG6': 'NW_003315922.2', + 'HG7_PATCH': 'NW_003571040.1', + 'HG19_PATCH': 'NW_003571042.1', + 'HG1699_PATCH': 'NW_004775431.1', + 'HG418_PATCH': 'NW_003871066.2', + 'HG104_HG975_PATCH': 'NW_003315923.1', + 'HG243_PATCH': 'NW_003315924.1', + 'HSCHR9_1_CTG1': 'NW_003315928.1', + 'HG962_PATCH': 'NW_003871067.1', + 'HSCHR9_1_CTG35': 'NW_003315929.1', + 'HSCHR9_2_CTG35': 'NW_003315930.1', + 'HSCHR9_3_CTG35': 'NW_003315931.1', + 'HG50_PATCH': 'NW_004504301.1', + 'HG1502_PATCH': 'NW_004070869.1', + 'HG79_PATCH': 'NW_003315925.1', + 'HG1500_PATCH': 'NW_004070867.1', + 'HG1501_PATCH': 'NW_004070868.1', + 'HG998_1_PATCH': 'NW_003315926.1', + 'HG998_2_PATCH': 'NW_003315927.1', + 'HG905_PATCH': 'NW_003571043.1', + 'HG871_PATCH': 'NW_003871071.1', + 'HG544_PATCH': 'NW_003315932.1', + 'HSCHR10_1_CTG2': 'NW_003315934.1', + 'HSCHR10_1_CTG5': 'NW_003315935.1', + 'HG1211_PATCH': 'NW_003871068.1', + 'HG1074_PATCH': 'NW_004504302.1', + 'HG339_PATCH': 'NW_003871070.1', + 'HG979_PATCH': 'NW_004775432.1', + 'HG311_PATCH': 'NW_003871069.1', + 'HG995_PATCH': 'NW_003315933.1', + 'HG1479_PATCH': 'NW_004070870.1', + 'HG256_PATCH': 'NW_003871075.1', + 'HG873_PATCH': 'NW_003871082.1', + 'HSCHR11_1_CTG1_1': 'NW_003315936.1', + 'HG281_PATCH': 'NW_003571045.1', + 'HG142_HG150_NOVEL_TEST': 'NW_003871073.1', + 'HG151_NOVEL_TEST': 'NW_003871074.1', + 'HG536_PATCH': 'NW_003571046.1', + 'HG865_PATCH': 'NW_004070871.1', + 'HG414_PATCH': 'NW_003871081.1', + 'HG348_PATCH': 'NW_003871079.1', + 'HG305_PATCH': 'NW_003871077.1', + 'HG388_HG400_PATCH': 'NW_003871080.1', + 'HG306_PATCH': 'NW_003871078.1', + 'HG122_PATCH': 'NW_003871072.2', + 'HG299_PATCH': 'NW_003871076.1', + 'HG858_PATCH': 'NW_003571048.1', + 'HSCHR12_1_CTG1': 'NW_003571049.1', + 'HG344_PATCH': 'NW_003871083.2', + 'HG1133_PATCH': 'NW_003571047.1', + 'HSCHR12_2_CTG2': 'NW_003571050.1', + 'HSCHR12_1_CTG2': 'NW_003315938.1', + 'HSCHR12_1_CTG2_1': 'NW_003315939.1', + 'HSCHR12_2_CTG2_1': 'NW_003315941.1', + 'HSCHR12_3_CTG2_1': 'NW_003315942.2', + 'HG1595_PATCH': 'NW_004504303.2', + 'HSCHR12_1_CTG5': 'NW_003315940.1', + 'HG996_PATCH': 'NW_003315937.1', + 'HG531_PATCH': 'NW_003571051.1', + 'HG1592_PATCH': 'NW_004166863.1', + 'HSCHR15_1_CTG4': 'NW_003315943.1', + 'HSCHR15_1_CTG8': 'NW_003315944.1', + 'HG971_PATCH': 'NW_003871084.1', + 'HSCHR16_1_CTG3_1': 'NW_003315945.1', + 'HG1208_PATCH': 'NW_003871085.1', + 'HSCHR16_2_CTG3_1': 'NW_003315946.1', + 'HG417_PATCH': 'NW_004070872.2', + 'HSCHR17_1_CTG1': 'NW_003315952.2', + 'HG990_PATCH': 'NW_003315951.1', + 'HG987_PATCH': 'NW_003315950.2', + 'HG1591_PATCH': 'NW_004775433.1', + 'HG883_PATCH': 'NW_003871090.1', + 'HG385_PATCH': 'NW_004166864.2', + 'HG75_PATCH': 'NW_003315949.1', + 'HG745_PATCH': 'NW_003315948.2', + 'HSCHR17_4_CTG4': 'NW_003871091.1', + 'HSCHR17_6_CTG4': 'NW_003871093.1', + 'HSCHR17_5_CTG4': 'NW_003871092.1', + 'HSCHR17_1_CTG4': 'NW_003315953.1', + 'HG185_PATCH': 'NW_003571052.1', + 'HG1146_PATCH': 'NW_003871086.1', + 'HG183_PATCH': 'NW_003315947.1', + 'HG747_PATCH': 'NW_003871088.1', + 'HSCHR17_2_CTG4': 'NW_003315954.1', + 'HSCHR17_3_CTG4': 'NW_003315955.1', + 'HG748_PATCH': 'NW_003871089.1', + 'HG271_PATCH': 'NW_003871087.1', + 'HSCHR18_1_CTG1_1': 'NW_003315956.1', + 'HSCHR18_2_CTG1_1': 'NW_003315959.1', + 'HSCHR18_2_CTG2': 'NW_003315960.1', + 'HSCHR18_1_CTG2': 'NW_003315957.1', + 'HSCHR18_1_CTG2_1': 'NW_003315958.1', + 'HSCHR18_2_CTG2_1': 'NW_003315961.1', + 'HG729_PATCH': 'NW_003871094.1', + 'HG730_PATCH': 'NW_003571053.2', + 'HSCHR19_1_CTG3': 'NW_003315962.1', + 'HSCHR19_2_CTG3': 'NW_003315964.2', + 'HSCHR19_3_CTG3': 'NW_003315965.1', + 'HSCHR19_1_CTG3_1': 'NW_003315963.1', + 'HG1350_HG959_PATCH': 'NW_004775434.1', + 'HG1079_PATCH': 'NW_004166865.1', + 'HSCHR19LRC_COX1_CTG1': 'NW_003571054.1', + 'HSCHR19LRC_COX2_CTG1': 'NW_003571055.1', + 'HSCHR19LRC_LRC_I_CTG1': 'NW_003571056.1', + 'HSCHR19LRC_LRC_J_CTG1': 'NW_003571057.1', + 'HSCHR19LRC_LRC_S_CTG1': 'NW_003571058.1', + 'HSCHR19LRC_LRC_T_CTG1': 'NW_003571059.1', + 'HSCHR19LRC_PGF1_CTG1': 'NW_003571060.1', + 'HSCHR19LRC_PGF2_CTG1': 'NW_003571061.1', + 'HSCHR20_1_CTG1': 'NW_003315966.1', + 'HG144_PATCH': 'NW_003871095.1', + 'HG944_PATCH': 'NW_004504304.1', + 'HG506_HG507_HG1000_PATCH': 'NW_003571063.2', + 'HSCHR21_1_CTG1_1': 'NW_003315967.1', + 'HSCHR21_2_CTG1_1': 'NW_003315968.1', + 'HSCHR21_3_CTG1_1': 'NW_003315969.1', + 'HSCHR21_4_CTG1_1': 'NW_003315970.1', + 'HG237_PATCH': 'NW_004775435.1', + 'HG1487_PATCH': 'NW_004070874.1', + 'HG1486_PATCH': 'NW_004070873.1', + 'HG1488_PATCH': 'NW_004070875.1', + 'HG329_PATCH': 'NW_003871096.1', + 'HSCHR22_1_CTG2': 'NW_003315972.1', + 'HSCHR22_1_CTG1': 'NW_003315971.2', + 'HSCHR22_2_CTG1': 'NW_004504305.1', + 'HG497_PATCH': 'NW_004070876.1', + 'HG480_HG481_PATCH': 'NW_003571064.2', + 'HG1423_PATCH': 'NW_003871098.1', + 'HG1424_PATCH': 'NW_003871099.1', + 'HG1435_PATCH': 'NW_004070879.1', + 'HG29_PATCH': 'NW_004166866.1', + 'HG1436_HG1432_PATCH': 'NW_004070880.2', + 'HG1433_PATCH': 'NW_004070877.1', + 'HG1437_PATCH': 'NW_004070881.1', + 'HG1438_PATCH': 'NW_004070882.1', + 'HG1425_PATCH': 'NW_003871100.1', + 'HG1426_PATCH': 'NW_003871101.3', + 'HG1439_PATCH': 'NW_004070883.1', + 'HG1440_PATCH': 'NW_004070884.1', + 'HG1441_PATCH': 'NW_004070885.1', + 'HG375_PATCH': 'NW_003871102.1', + 'HG1434_PATCH': 'NW_004070878.1', + 'HG1462_PATCH': 'NW_004070891.1', + 'HG1463_PATCH': 'NW_004070892.1', + 'HG1490_PATCH': 'NW_004070893.1', + 'HG1442_PATCH': 'NW_004070886.1', + 'HG1443_HG1444_PATCH': 'NW_004070887.1', + 'HG1453_PATCH': 'NW_004070888.1', + 'HG1458_PATCH': 'NW_004070889.1', + 'HG1459_PATCH': 'NW_004070890.2', + 'HG1497_PATCH': 'NW_003871103.3', + 'HSCHR6_MHC_APD_CTG1': 'NT_167244.1', + 'HSCHR6_MHC_COX_CTG1': 'NT_113891.2', + 'HSCHR6_MHC_DBB_CTG1': 'NT_167245.1', + 'HSCHR6_MHC_MANN_CTG1': 'NT_167246.1', + 'HSCHR6_MHC_MCF_CTG1': 'NT_167247.1', + 'HSCHR6_MHC_QBL_CTG1': 'NT_167248.1', + 'HSCHR6_MHC_SSTO_CTG1': 'NT_167249.1', + 'HSCHR4_1_CTG9': 'NT_167250.1', + 'HSCHR17_1_CTG5': 'NT_167251.1' + } + + GRCh38 = { + "1": "NC_000001.11", + "2": "NC_000002.12", + "3": "NC_000003.12", + "4": "NC_000004.12", + "5": "NC_000005.10", + "6": "NC_000006.12", + "7": "NC_000007.14", + "8": "NC_000008.11", + "9": "NC_000009.12", + "10": "NC_000010.11", + "11": "NC_000011.10", + "12": "NC_000012.12", + "13": "NC_000013.11", + "14": "NC_000014.9", + "15": "NC_000015.10", + "16": "NC_000016.10", + "17": "NC_000017.11", + "18": "NC_000018.10", + "19": "NC_000019.10", + "20": "NC_000020.11", + "21": "NC_000021.9", + "22": "NC_000022.11", + "23": "NC_000023.11", + "24": "NC_000024.10", + "x": "NC_000023.11", + "y": "NC_000024.10", + "X": "NC_000023.11", + "Y": "NC_000024.10", + "M": "NC_012920.1", + "m": "NC_012920.1", + + # UCSC Alts + "10_GL383545V1_ALT": "NW_003315934.1", + "10_GL383546V1_ALT": "NW_003315935.1", + "10_KI270824V1_ALT": "NT_187579.1", + "10_KI270825V1_ALT": "NT_187580.1", + "11_GL383547V1_ALT": "NW_003315936.1", + "11_JH159136V1_ALT": "NW_003871073.1", + "11_JH159137V1_ALT": "NW_003871074.1", + "11_KI270721V1_RANDOM": "NT_187376.1", + "11_KI270826V1_ALT": "NT_187581.1", + "11_KI270827V1_ALT": "NT_187582.1", + "11_KI270829V1_ALT": "NT_187583.1", + "11_KI270830V1_ALT": "NT_187584.1", + "11_KI270831V1_ALT": "NT_187585.1", + "11_KI270832V1_ALT": "NT_187586.1", + "11_KI270902V1_ALT": "NT_187656.1", + "11_KI270903V1_ALT": "NT_187657.1", + "11_KI270927V1_ALT": "NT_187681.1", + "12_GL383549V1_ALT": "NW_003315938.1", + "12_GL383550V2_ALT": "NW_003315939.2", + "12_GL383551V1_ALT": "NW_003315940.1", + "12_GL383552V1_ALT": "NW_003315941.1", + "12_GL383553V2_ALT": "NW_003315942.2", + "12_GL877875V1_ALT": "NW_003571049.1", + "12_GL877876V1_ALT": "NW_003571050.1", + "12_KI270833V1_ALT": "NT_187589.1", + "12_KI270834V1_ALT": "NT_187590.1", + "12_KI270835V1_ALT": "NT_187587.1", + "12_KI270836V1_ALT": "NT_187591.1", + "12_KI270837V1_ALT": "NT_187588.1", + "12_KI270904V1_ALT": "NT_187658.1", + "13_KI270838V1_ALT": "NT_187592.1", + "13_KI270839V1_ALT": "NT_187593.1", + "13_KI270840V1_ALT": "NT_187594.1", + "13_KI270841V1_ALT": "NT_187595.1", + "13_KI270842V1_ALT": "NT_187596.1", + "13_KI270843V1_ALT": "NT_187597.1", + "14_GL000009V2_RANDOM": "NT_113796.3", + "14_GL000194V1_RANDOM": "NT_113888.1", + "14_GL000225V1_RANDOM": "NT_167219.1", + "14_KI270722V1_RANDOM": "NT_187377.1", + "14_KI270723V1_RANDOM": "NT_187378.1", + "14_KI270724V1_RANDOM": "NT_187379.1", + "14_KI270725V1_RANDOM": "NT_187380.1", + "14_KI270726V1_RANDOM": "NT_187381.1", + "14_KI270844V1_ALT": "NT_187598.1", + "14_KI270845V1_ALT": "NT_187599.1", + "14_KI270846V1_ALT": "NT_187600.1", + "14_KI270847V1_ALT": "NT_187601.1", + "15_GL383554V1_ALT": "NW_003315943.1", + "15_GL383555V2_ALT": "NW_003315944.2", + "15_KI270727V1_RANDOM": "NT_187382.1", + "15_KI270848V1_ALT": "NT_187603.1", + "15_KI270849V1_ALT": "NT_187605.1", + "15_KI270850V1_ALT": "NT_187606.1", + "15_KI270851V1_ALT": "NT_187604.1", + "15_KI270852V1_ALT": "NT_187602.1", + "15_KI270905V1_ALT": "NT_187660.1", + "15_KI270906V1_ALT": "NT_187659.1", + "16_GL383556V1_ALT": "NW_003315945.1", + "16_GL383557V1_ALT": "NW_003315946.1", + "16_KI270728V1_RANDOM": "NT_187383.1", + "16_KI270853V1_ALT": "NT_187607.1", + "16_KI270854V1_ALT": "NT_187610.1", + "16_KI270855V1_ALT": "NT_187608.1", + "16_KI270856V1_ALT": "NT_187609.1", + "17_GL000205V2_RANDOM": "NT_113930.2", + "17_GL000258V2_ALT": "NT_167251.2", + "17_GL383563V3_ALT": "NW_003315952.3", + "17_GL383564V2_ALT": "NW_003315953.2", + "17_GL383565V1_ALT": "NW_003315954.1", + "17_GL383566V1_ALT": "NW_003315955.1", + "17_JH159146V1_ALT": "NW_003871091.1", + "17_JH159147V1_ALT": "NW_003871092.1", + "17_JH159148V1_ALT": "NW_003871093.1", + "17_KI270729V1_RANDOM": "NT_187384.1", + "17_KI270730V1_RANDOM": "NT_187385.1", + "17_KI270857V1_ALT": "NT_187614.1", + "17_KI270858V1_ALT": "NT_187615.1", + "17_KI270859V1_ALT": "NT_187616.1", + "17_KI270860V1_ALT": "NT_187612.1", + "17_KI270861V1_ALT": "NT_187611.1", + "17_KI270862V1_ALT": "NT_187613.1", + "17_KI270907V1_ALT": "NT_187662.1", + "17_KI270908V1_ALT": "NT_187663.1", + "17_KI270909V1_ALT": "NT_187661.1", + "17_KI270910V1_ALT": "NT_187664.1", + "18_GL383567V1_ALT": "NW_003315956.1", + "18_GL383568V1_ALT": "NW_003315957.1", + "18_GL383569V1_ALT": "NW_003315958.1", + "18_GL383570V1_ALT": "NW_003315959.1", + "18_GL383571V1_ALT": "NW_003315960.1", + "18_GL383572V1_ALT": "NW_003315961.1", + "18_KI270863V1_ALT": "NT_187617.1", + "18_KI270864V1_ALT": "NT_187618.1", + "18_KI270911V1_ALT": "NT_187666.1", + "18_KI270912V1_ALT": "NT_187665.1", + "19_GL000209V2_ALT": "NT_113949.2", + "19_GL383573V1_ALT": "NW_003315962.1", + "19_GL383574V1_ALT": "NW_003315963.1", + "19_GL383575V2_ALT": "NW_003315964.2", + "19_GL383576V1_ALT": "NW_003315965.1", + "19_GL949746V1_ALT": "NW_003571054.1", + "19_GL949747V2_ALT": "NW_003571055.2", + "19_GL949748V2_ALT": "NW_003571056.2", + "19_GL949749V2_ALT": "NW_003571057.2", + "19_GL949750V2_ALT": "NW_003571058.2", + "19_GL949751V2_ALT": "NW_003571059.2", + "19_GL949752V1_ALT": "NW_003571060.1", + "19_GL949753V2_ALT": "NW_003571061.2", + "19_KI270865V1_ALT": "NT_187621.1", + "19_KI270866V1_ALT": "NT_187619.1", + "19_KI270867V1_ALT": "NT_187620.1", + "19_KI270868V1_ALT": "NT_187622.1", + "19_KI270882V1_ALT": "NT_187636.1", + "19_KI270883V1_ALT": "NT_187637.1", + "19_KI270884V1_ALT": "NT_187638.1", + "19_KI270885V1_ALT": "NT_187639.1", + "19_KI270886V1_ALT": "NT_187640.1", + "19_KI270887V1_ALT": "NT_187641.1", + "19_KI270888V1_ALT": "NT_187642.1", + "19_KI270889V1_ALT": "NT_187643.1", + "19_KI270890V1_ALT": "NT_187644.1", + "19_KI270891V1_ALT": "NT_187645.1", + "19_KI270914V1_ALT": "NT_187668.1", + "19_KI270915V1_ALT": "NT_187669.1", + "19_KI270916V1_ALT": "NT_187670.1", + "19_KI270917V1_ALT": "NT_187671.1", + "19_KI270918V1_ALT": "NT_187672.1", + "19_KI270919V1_ALT": "NT_187673.1", + "19_KI270920V1_ALT": "NT_187674.1", + "19_KI270921V1_ALT": "NT_187675.1", + "19_KI270922V1_ALT": "NT_187676.1", + "19_KI270923V1_ALT": "NT_187677.1", + "19_KI270929V1_ALT": "NT_187683.1", + "19_KI270930V1_ALT": "NT_187684.1", + "19_KI270931V1_ALT": "NT_187685.1", + "19_KI270932V1_ALT": "NT_187686.1", + "19_KI270933V1_ALT": "NT_187687.1", + "19_KI270938V1_ALT": "NT_187693.1", + "1_GL383518V1_ALT": "NW_003315905.1", + "1_GL383519V1_ALT": "NW_003315906.1", + "1_GL383520V2_ALT": "NW_003315907.2", + "1_KI270706V1_RANDOM": "NT_187361.1", + "1_KI270707V1_RANDOM": "NT_187362.1", + "1_KI270708V1_RANDOM": "NT_187363.1", + "1_KI270709V1_RANDOM": "NT_187364.1", + "1_KI270710V1_RANDOM": "NT_187365.1", + "1_KI270711V1_RANDOM": "NT_187366.1", + "1_KI270712V1_RANDOM": "NT_187367.1", + "1_KI270713V1_RANDOM": "NT_187368.1", + "1_KI270714V1_RANDOM": "NT_187369.1", + "1_KI270759V1_ALT": "NT_187516.1", + "1_KI270760V1_ALT": "NT_187514.1", + "1_KI270761V1_ALT": "NT_187518.1", + "1_KI270762V1_ALT": "NT_187515.1", + "1_KI270763V1_ALT": "NT_187519.1", + "1_KI270764V1_ALT": "NT_187521.1", + "1_KI270765V1_ALT": "NT_187520.1", + "1_KI270766V1_ALT": "NT_187517.1", + "1_KI270892V1_ALT": "NT_187646.1", + "20_GL383577V2_ALT": "NW_003315966.2", + "20_KI270869V1_ALT": "NT_187623.1", + "20_KI270870V1_ALT": "NT_187624.1", + "20_KI270871V1_ALT": "NT_187625.1", + "21_GL383578V2_ALT": "NW_003315967.2", + "21_GL383579V2_ALT": "NW_003315968.2", + "21_GL383580V2_ALT": "NW_003315969.2", + "21_GL383581V2_ALT": "NW_003315970.2", + "21_KI270872V1_ALT": "NT_187626.1", + "21_KI270873V1_ALT": "NT_187627.1", + "21_KI270874V1_ALT": "NT_187628.1", + "22_GL383582V2_ALT": "NW_003315971.2", + "22_GL383583V2_ALT": "NW_003315972.2", + "22_KB663609V1_ALT": "NW_004504305.1", + "22_KI270731V1_RANDOM": "NT_187386.1", + "22_KI270732V1_RANDOM": "NT_187387.1", + "22_KI270733V1_RANDOM": "NT_187388.1", + "22_KI270734V1_RANDOM": "NT_187389.1", + "22_KI270735V1_RANDOM": "NT_187390.1", + "22_KI270736V1_RANDOM": "NT_187391.1", + "22_KI270737V1_RANDOM": "NT_187392.1", + "22_KI270738V1_RANDOM": "NT_187393.1", + "22_KI270739V1_RANDOM": "NT_187394.1", + "22_KI270875V1_ALT": "NT_187629.1", + "22_KI270876V1_ALT": "NT_187630.1", + "22_KI270877V1_ALT": "NT_187631.1", + "22_KI270878V1_ALT": "NT_187632.1", + "22_KI270879V1_ALT": "NT_187633.1", + "22_KI270928V1_ALT": "NT_187682.1", + "2_GL383521V1_ALT": "NW_003315908.1", + "2_GL383522V1_ALT": "NW_003315909.1", + "2_GL582966V2_ALT": "NW_003571033.2", + "2_KI270715V1_RANDOM": "NT_187370.1", + "2_KI270716V1_RANDOM": "NT_187371.1", + "2_KI270767V1_ALT": "NT_187523.1", + "2_KI270768V1_ALT": "NT_187528.1", + "2_KI270769V1_ALT": "NT_187522.1", + "2_KI270770V1_ALT": "NT_187525.1", + "2_KI270771V1_ALT": "NT_187530.1", + "2_KI270772V1_ALT": "NT_187524.1", + "2_KI270773V1_ALT": "NT_187526.1", + "2_KI270774V1_ALT": "NT_187529.1", + "2_KI270775V1_ALT": "NT_187531.1", + "2_KI270776V1_ALT": "NT_187527.1", + "2_KI270893V1_ALT": "NT_187647.1", + "2_KI270894V1_ALT": "NT_187648.1", + "3_GL000221V1_RANDOM": "NT_167215.1", + "3_GL383526V1_ALT": "NW_003315913.1", + "3_JH636055V2_ALT": "NW_003871060.2", + "3_KI270777V1_ALT": "NT_187533.1", + "3_KI270778V1_ALT": "NT_187536.1", + "3_KI270779V1_ALT": "NT_187532.1", + "3_KI270780V1_ALT": "NT_187537.1", + "3_KI270781V1_ALT": "NT_187538.1", + "3_KI270782V1_ALT": "NT_187534.1", + "3_KI270783V1_ALT": "NT_187535.1", + "3_KI270784V1_ALT": "NT_187539.1", + "3_KI270895V1_ALT": "NT_187649.1", + "3_KI270924V1_ALT": "NT_187678.1", + "3_KI270934V1_ALT": "NT_187688.1", + "3_KI270935V1_ALT": "NT_187689.1", + "3_KI270936V1_ALT": "NT_187690.1", + "3_KI270937V1_ALT": "NT_187691.1", + "4_GL000008V2_RANDOM": "NT_113793.3", + "4_GL000257V2_ALT": "NT_167250.2", + "4_GL383527V1_ALT": "NW_003315914.1", + "4_GL383528V1_ALT": "NW_003315915.1", + "4_KI270785V1_ALT": "NT_187542.1", + "4_KI270786V1_ALT": "NT_187543.1", + "4_KI270787V1_ALT": "NT_187541.1", + "4_KI270788V1_ALT": "NT_187544.1", + "4_KI270789V1_ALT": "NT_187545.1", + "4_KI270790V1_ALT": "NT_187540.1", + "4_KI270896V1_ALT": "NT_187650.1", + "4_KI270925V1_ALT": "NT_187679.1", + "5_GL000208V1_RANDOM": "NT_113948.1", + "5_GL339449V2_ALT": "NW_003315917.2", + "5_GL383530V1_ALT": "NW_003315918.1", + "5_GL383531V1_ALT": "NW_003315919.1", + "5_GL383532V1_ALT": "NW_003315920.1", + "5_GL949742V1_ALT": "NW_003571036.1", + "5_KI270791V1_ALT": "NT_187547.1", + "5_KI270792V1_ALT": "NT_187548.1", + "5_KI270793V1_ALT": "NT_187550.1", + "5_KI270794V1_ALT": "NT_187551.1", + "5_KI270795V1_ALT": "NT_187546.1", + "5_KI270796V1_ALT": "NT_187549.1", + "5_KI270897V1_ALT": "NT_187651.1", + "5_KI270898V1_ALT": "NT_187652.1", + "6_GL000250V2_ALT": "NT_167244.2", + "6_GL000251V2_ALT": "NT_113891.3", + "6_GL000252V2_ALT": "NT_167245.2", + "6_GL000253V2_ALT": "NT_167246.2", + "6_GL000254V2_ALT": "NT_167247.2", + "6_GL000255V2_ALT": "NT_167248.2", + "6_GL000256V2_ALT": "NT_167249.2", + "6_GL383533V1_ALT": "NW_003315921.1", + "6_KB021644V2_ALT": "NW_004166862.2", + "6_KI270758V1_ALT": "NT_187692.1", + "6_KI270797V1_ALT": "NT_187552.1", + "6_KI270798V1_ALT": "NT_187553.1", + "6_KI270799V1_ALT": "NT_187554.1", + "6_KI270800V1_ALT": "NT_187555.1", + "6_KI270801V1_ALT": "NT_187556.1", + "6_KI270802V1_ALT": "NT_187557.1", + "7_GL383534V2_ALT": "NW_003315922.2", + "7_KI270803V1_ALT": "NT_187562.1", + "7_KI270804V1_ALT": "NT_187558.1", + "7_KI270805V1_ALT": "NT_187560.1", + "7_KI270806V1_ALT": "NT_187559.1", + "7_KI270807V1_ALT": "NT_187563.1", + "7_KI270808V1_ALT": "NT_187564.1", + "7_KI270809V1_ALT": "NT_187561.1", + "7_KI270899V1_ALT": "NT_187653.1", + "8_KI270810V1_ALT": "NT_187567.1", + "8_KI270811V1_ALT": "NT_187565.1", + "8_KI270812V1_ALT": "NT_187568.1", + "8_KI270813V1_ALT": "NT_187570.1", + "8_KI270814V1_ALT": "NT_187566.1", + "8_KI270815V1_ALT": "NT_187569.1", + "8_KI270816V1_ALT": "NT_187571.1", + "8_KI270817V1_ALT": "NT_187573.1", + "8_KI270818V1_ALT": "NT_187572.1", + "8_KI270819V1_ALT": "NT_187574.1", + "8_KI270820V1_ALT": "NT_187575.1", + "8_KI270821V1_ALT": "NT_187576.1", + "8_KI270822V1_ALT": "NT_187577.1", + "8_KI270900V1_ALT": "NT_187654.1", + "8_KI270901V1_ALT": "NT_187655.1", + "8_KI270926V1_ALT": "NT_187680.1", + "9_GL383539V1_ALT": "NW_003315928.1", + "9_GL383540V1_ALT": "NW_003315929.1", + "9_GL383541V1_ALT": "NW_003315930.1", + "9_GL383542V1_ALT": "NW_003315931.1", + "9_KI270717V1_RANDOM": "NT_187372.1", + "9_KI270718V1_RANDOM": "NT_187373.1", + "9_KI270719V1_RANDOM": "NT_187374.1", + "9_KI270720V1_RANDOM": "NT_187375.1", + "9_KI270823V1_ALT": "NT_187578.1", + "UN_GL000195V1": "NT_113901.1", + "UN_GL000213V1": "NT_167208.1", + "UN_GL000214V1": "NT_167209.1", + "UN_GL000216V2": "NT_167211.2", + "UN_GL000218V1": "NT_113889.1", + "UN_GL000219V1": "NT_167213.1", + "UN_GL000220V1": "NT_167214.1", + "UN_GL000224V1": "NT_167218.1", + "UN_GL000226V1": "NT_167220.1", + "UN_KI270302V1": "NT_187396.1", + "UN_KI270303V1": "NT_187398.1", + "UN_KI270304V1": "NT_187397.1", + "UN_KI270305V1": "NT_187399.1", + "UN_KI270310V1": "NT_187402.1", + "UN_KI270311V1": "NT_187406.1", + "UN_KI270312V1": "NT_187405.1", + "UN_KI270315V1": "NT_187404.1", + "UN_KI270316V1": "NT_187403.1", + "UN_KI270317V1": "NT_187407.1", + "UN_KI270320V1": "NT_187401.1", + "UN_KI270322V1": "NT_187400.1", + "UN_KI270329V1": "NT_187459.1", + "UN_KI270330V1": "NT_187458.1", + "UN_KI270333V1": "NT_187461.1", + "UN_KI270334V1": "NT_187460.1", + "UN_KI270335V1": "NT_187462.1", + "UN_KI270336V1": "NT_187465.1", + "UN_KI270337V1": "NT_187466.1", + "UN_KI270338V1": "NT_187463.1", + "UN_KI270340V1": "NT_187464.1", + "UN_KI270362V1": "NT_187469.1", + "UN_KI270363V1": "NT_187467.1", + "UN_KI270364V1": "NT_187468.1", + "UN_KI270366V1": "NT_187470.1", + "UN_KI270371V1": "NT_187494.1", + "UN_KI270372V1": "NT_187491.1", + "UN_KI270373V1": "NT_187492.1", + "UN_KI270374V1": "NT_187490.1", + "UN_KI270375V1": "NT_187493.1", + "UN_KI270376V1": "NT_187489.1", + "UN_KI270378V1": "NT_187471.1", + "UN_KI270379V1": "NT_187472.1", + "UN_KI270381V1": "NT_187486.1", + "UN_KI270382V1": "NT_187488.1", + "UN_KI270383V1": "NT_187482.1", + "UN_KI270384V1": "NT_187484.1", + "UN_KI270385V1": "NT_187487.1", + "UN_KI270386V1": "NT_187480.1", + "UN_KI270387V1": "NT_187475.1", + "UN_KI270388V1": "NT_187478.1", + "UN_KI270389V1": "NT_187473.1", + "UN_KI270390V1": "NT_187474.1", + "UN_KI270391V1": "NT_187481.1", + "UN_KI270392V1": "NT_187485.1", + "UN_KI270393V1": "NT_187483.1", + "UN_KI270394V1": "NT_187479.1", + "UN_KI270395V1": "NT_187476.1", + "UN_KI270396V1": "NT_187477.1", + "UN_KI270411V1": "NT_187409.1", + "UN_KI270412V1": "NT_187408.1", + "UN_KI270414V1": "NT_187410.1", + "UN_KI270417V1": "NT_187415.1", + "UN_KI270418V1": "NT_187412.1", + "UN_KI270419V1": "NT_187411.1", + "UN_KI270420V1": "NT_187413.1", + "UN_KI270422V1": "NT_187416.1", + "UN_KI270423V1": "NT_187417.1", + "UN_KI270424V1": "NT_187414.1", + "UN_KI270425V1": "NT_187418.1", + "UN_KI270429V1": "NT_187419.1", + "UN_KI270435V1": "NT_187424.1", + "UN_KI270438V1": "NT_187425.1", + "UN_KI270442V1": "NT_187420.1", + "UN_KI270448V1": "NT_187495.1", + "UN_KI270465V1": "NT_187422.1", + "UN_KI270466V1": "NT_187421.1", + "UN_KI270467V1": "NT_187423.1", + "UN_KI270468V1": "NT_187426.1", + "UN_KI270507V1": "NT_187437.1", + "UN_KI270508V1": "NT_187430.1", + "UN_KI270509V1": "NT_187428.1", + "UN_KI270510V1": "NT_187427.1", + "UN_KI270511V1": "NT_187435.1", + "UN_KI270512V1": "NT_187432.1", + "UN_KI270515V1": "NT_187436.1", + "UN_KI270516V1": "NT_187431.1", + "UN_KI270517V1": "NT_187438.1", + "UN_KI270518V1": "NT_187429.1", + "UN_KI270519V1": "NT_187433.1", + "UN_KI270521V1": "NT_187496.1", + "UN_KI270522V1": "NT_187434.1", + "UN_KI270528V1": "NT_187440.1", + "UN_KI270529V1": "NT_187439.1", + "UN_KI270530V1": "NT_187441.1", + "UN_KI270538V1": "NT_187443.1", + "UN_KI270539V1": "NT_187442.1", + "UN_KI270544V1": "NT_187444.1", + "UN_KI270548V1": "NT_187445.1", + "UN_KI270579V1": "NT_187450.1", + "UN_KI270580V1": "NT_187448.1", + "UN_KI270581V1": "NT_187449.1", + "UN_KI270582V1": "NT_187454.1", + "UN_KI270583V1": "NT_187446.1", + "UN_KI270584V1": "NT_187453.1", + "UN_KI270587V1": "NT_187447.1", + "UN_KI270588V1": "NT_187455.1", + "UN_KI270589V1": "NT_187451.1", + "UN_KI270590V1": "NT_187452.1", + "UN_KI270591V1": "NT_187457.1", + "UN_KI270593V1": "NT_187456.1", + "UN_KI270741V1": "NT_187497.1", + "UN_KI270742V1": "NT_187513.1", + "UN_KI270743V1": "NT_187498.1", + "UN_KI270744V1": "NT_187499.1", + "UN_KI270745V1": "NT_187500.1", + "UN_KI270746V1": "NT_187501.1", + "UN_KI270747V1": "NT_187502.1", + "UN_KI270748V1": "NT_187503.1", + "UN_KI270749V1": "NT_187504.1", + "UN_KI270750V1": "NT_187505.1", + "UN_KI270751V1": "NT_187506.1", + "UN_KI270752V1": "NT_187507.1", + "UN_KI270753V1": "NT_187508.1", + "UN_KI270754V1": "NT_187509.1", + "UN_KI270755V1": "NT_187510.1", + "UN_KI270756V1": "NT_187511.1", + "UN_KI270757V1": "NT_187512.1", + "X_KI270880V1_ALT": "NT_187634.1", + "X_KI270881V1_ALT": "NT_187635.1", + "X_KI270913V1_ALT": "NT_187667.1", + "Y_KI270740V1_RANDOM": "NT_187395.1", + + # GRC Alts + 'HG1342_HG2282_PATCH': 'NW_012132914.1', + 'HSCHR1_5_CTG3': 'NW_015495298.1', + 'HG2095_PATCH': 'NW_011332688.1', + 'HSCHR1_4_CTG3': 'NW_014040926.1', + 'HG2058_PATCH': 'NW_009646195.1', + 'HSCHR1_8_CTG3': 'NW_018654706.1', + 'HG460_PATCH': 'NW_019805487.1', + 'HG986_PATCH': 'NW_009646194.1', + 'HSCHR1_9_CTG3': 'NW_018654707.1', + 'HSCHR1_3_CTG3': 'NW_014040925.1', + 'HSCHR1_6_CTG3': 'NW_017852928.1', + 'HG2104_PATCH': 'NW_009646196.1', + 'HG1832_PATCH': 'NW_011332687.1', + 'HG2002_PATCH': 'NW_018654708.1', + 'HSCHR1_5_CTG32_1': 'NW_014040927.1', + 'HG2290_PATCH': 'NW_012132915.1', + 'HSCHR2_7_CTG7_2': 'NW_018654709.1', + 'HSCHR2_6_CTG7_2': 'NW_015495299.1', + 'HSCHR2_8_CTG7_2': 'NW_018654710.1', + 'HG2232_PATCH': 'NW_011332690.1', + 'HG2233_PATCH': 'NW_011332689.1', + 'HG2236_PATCH': 'NW_017363813.1', + 'HG2066_PATCH': 'NW_009646197.1', + 'HG2235_PATCH': 'NW_012132916.1', + 'HG126_PATCH': 'NW_011332691.1', + 'HSCHR3_4_CTG1': 'NW_018654711.1', + 'HG2237_PATCH': 'NW_012132917.1', + 'HG2022_PATCH': 'NW_009646198.1', + 'HG2133_PATCH': 'NW_019805491.1', + 'HSCHR3_6_CTG2_1': 'NW_019805492.1', + 'HSCHR3_9_CTG2_1': 'NW_019805490.1', + 'HSCHR3_8_CTG2_1': 'NW_019805489.1', + 'HSCHR3_7_CTG2_1': 'NW_019805488.1', + 'HSCHR4_2_CTG4': 'NW_013171799.1', + 'HSCHR4_8_CTG12': 'NW_013171800.1', + 'HSCHR4_9_CTG12': 'NW_013171801.1', + 'HSCHR4_12_CTG12': 'NW_017363814.1', + 'HG2023_PATCH': 'NW_015495300.1', + 'HSCHR4_11_CTG12': 'NW_015495301.1', + 'HSCHR5_9_CTG1': 'NW_018654712.1', + 'HSCHR5_7_CTG1': 'NW_009646199.1', + 'HSCHR5_8_CTG1': 'NW_016107297.1', + 'HG30_PATCH': 'NW_016107298.1', + 'HG2057_PATCH': 'NW_018654713.1', + 'HSCHR6_1_CTG10': 'NW_013171803.1', + 'HG1651_PATCH': 'NW_012132918.1', + 'HG2128_PATCH': 'NW_009646200.1', + 'HG2072_PATCH': 'NW_013171802.1', + 'HG2121_PATCH': 'NW_017363815.1', + 'HSCHR7_3_CTG1': 'NW_019805493.1', + 'HG2088_PATCH': 'NW_017852929.1', + 'HG2266_PATCH': 'NW_017852930.1', + 'HG708_PATCH': 'NW_018654714.1', + 'HSCHR7_3_CTG4_4': 'NW_018654715.1', + 'HG2239_PATCH': 'NW_012132919.1', + 'HG76_PATCH': 'NW_018654717.1', + 'HG2068_PATCH': 'NW_017852932.1', + 'HG2067_PATCH': 'NW_017852931.1', + 'HSCHR8_7_CTG7': 'NW_019805494.1', + 'HG2419_PATCH': 'NW_018654716.1', + 'HSCHR9_1_CTG6': 'NW_013171804.1', + 'HSCHR9_1_CTG7': 'NW_013171805.1', + 'HG2030_PATCH': 'NW_009646201.1', + 'HG2244_HG2245_PATCH': 'NW_011332694.1', + 'HSCHR10_1_CTG6': 'NW_013171806.1', + 'HG2191_PATCH': 'NW_009646202.1', + 'HG2334_PATCH': 'NW_013171807.1', + 'HG2242_HG2243_PATCH': 'NW_011332693.1', + 'HG2241_PATCH': 'NW_011332692.1', + 'HG107_PATCH': 'NW_015148966.1', + 'HSCHR11_1_CTG1_2': 'NW_011332695.1', + 'HG2114_PATCH': 'NW_019805496.1', + 'HG2060_PATCH': 'NW_019805495.1', + 'HG1708_PATCH': 'NW_017363816.1', + 'HSCHR11_1_CTG3_1': 'NW_019805498.1', + 'HSCHR11_2_CTG8': 'NW_019805497.1', + 'HG2116_PATCH': 'NW_013171808.1', + 'HG2217_PATCH': 'NW_009646203.1', + 'HSCHR12_2_CTG1': 'NW_013171809.1', + 'HG1815_PATCH': 'NW_018654718.1', + 'HG1362_PATCH': 'NW_011332696.1', + 'HG23_PATCH': 'NW_009646204.1', + 'HSCHR12_8_CTG2_1': 'NW_018654720.1', + 'HG2063_PATCH': 'NW_015148967.1', + 'HG2047_PATCH': 'NW_018654719.1', + 'HG2247_PATCH': 'NW_011332697.1', + 'HSCHR12_9_CTG2_1': 'NW_019805499.1', + 'HG2291_PATCH': 'NW_011332699.1', + 'HSCHR13_1_CTG7': 'NW_013171810.1', + 'HG2216_PATCH': 'NW_009646205.1', + 'HG2249_PATCH': 'NW_011332700.1', + 'HSCHR13_1_CTG8': 'NW_013171811.1', + 'HG2288_HG2289_PATCH': 'NW_011332698.1', + 'HG1_PATCH': 'NW_018654722.1', + 'HSCHR14_8_CTG1': 'NW_018654721.1', + 'HG2139_PATCH': 'NW_011332701.1', + 'HSCHR15_6_CTG8': 'NW_012132920.1', + 'HSCHR16_5_CTG1': 'NW_013171812.1', + 'HG2263_PATCH': 'NW_019805500.1', + 'HG926_PATCH': 'NW_017852933.1', + 'HSCHR16_4_CTG3_1': 'NW_013171813.1', + 'HSCHR16_5_CTG3_1': 'NW_018654723.1', + 'HSCHR16_3_CTG3_1': 'NW_012132921.1', + 'HG2285_HG106_HG2252_PATCH': 'NW_017363817.1', + 'HG2046_PATCH': 'NW_016107299.1', + 'HSCHR17_3_CTG1': 'NW_017363819.1', + 'HSCHR17_11_CTG4': 'NW_017363818.1', + 'HSCHR17_12_CTG4': 'NW_019805501.1', + 'HSCHR18_1_CTG1': 'NW_019805503.1', + 'HSCHR18_5_CTG1_1': 'NW_014040928.1', + 'HG2412_PATCH': 'NW_019805502.1', + 'HG2213_PATCH': 'NW_013171814.1', + 'HG2442_PATCH': 'NW_018654724.1', + 'HG26_PATCH': 'NW_014040929.1', + 'HG2021_PATCH': 'NW_009646206.1', + 'HSCHR19KIR_0019-4656-A_CTG3_1': 'NW_016107300.1', + 'HSCHR19KIR_CA01-TA01_1_CTG3_1': 'NW_016107301.1', + 'HSCHR19KIR_CA01-TA01_2_CTG3_1': 'NW_016107302.1', + 'HSCHR19KIR_CA01-TB04_CTG3_1': 'NW_016107303.1', + 'HSCHR19KIR_CA01-TB01_CTG3_1': 'NW_016107304.1', + 'HSCHR19KIR_HG2394_CTG3_1': 'NW_016107305.1', + 'HSCHR19KIR_502960008-2_CTG3_1': 'NW_016107306.1', + 'HSCHR19KIR_502960008-1_CTG3_1': 'NW_016107307.1', + 'HSCHR19KIR_0010-5217-AB_CTG3_1': 'NW_016107308.1', + 'HSCHR19KIR_7191059-1_CTG3_1': 'NW_016107309.1', + 'HSCHR19KIR_0019-4656-B_CTG3_1': 'NW_016107310.1', + 'HSCHR19KIR_CA04_CTG3_1': 'NW_016107311.1', + 'HSCHR19KIR_7191059-2_CTG3_1': 'NW_016107313.1', + 'HSCHR19KIR_HG2396_CTG3_1': 'NW_016107314.1', + 'HSCHR19KIR_HG2393_CTG3_1': 'NW_016107312.1', + 'HSCHR22_4_CTG1': 'NW_009646207.1', + 'HSCHR22_6_CTG1': 'NW_014040930.1', + 'HSCHR22_7_CTG1': 'NW_014040931.1', + 'HSCHR22_5_CTG1': 'NW_009646208.1', + 'HSCHR22_8_CTG1': 'NW_015148968.1', + 'HG1311_PATCH': 'NW_015148969.1', + 'HSCHRX_3_CTG7': 'NW_017363820.1', + 'HG1531_PATCH': 'NW_018654725.1', + 'HG1535_PATCH': 'NW_018654726.1', + 'HG2062_PATCH': 'NW_009646209.1', + 'HSCHR1_1_CTG3': 'NT_187515.1', + 'HSCHR1_2_CTG3': 'NT_187517.1', + 'HSCHR1_1_CTG11': 'NT_187514.1', + 'HSCHR1_4_CTG31': 'NT_187520.1', + 'HSCHR1_1_CTG31': 'NW_003315905.1', + 'HSCHR1_2_CTG31': 'NW_003315906.1', + 'HSCHR1_3_CTG31': 'NW_003315907.2', + 'HSCHR1_4_CTG32_1': 'NT_187521.1', + 'HSCHR1_3_CTG32_1': 'NT_187519.1', + 'HSCHR1_1_CTG32_1': 'NT_187516.1', + 'HSCHR1_2_CTG32_1': 'NT_187518.1', + 'HSCHR2_2_CTG1': 'NT_187525.1', + 'HSCHR2_3_CTG1': 'NT_187526.1', + 'HSCHR2_4_CTG1': 'NT_187529.1', + 'HSCHR2_1_CTG1': 'NT_187522.1', + 'HSCHR2_1_CTG5': 'NW_003315908.1', + 'HSCHR2_1_CTG7': 'NT_187524.1', + 'HSCHR2_5_CTG7_2': 'NT_187531.1', + 'HSCHR2_4_CTG7_2': 'NT_187530.1', + 'HSCHR2_3_CTG7_2': 'NT_187528.1', + 'HSCHR2_2_CTG7_2': 'NW_003571033.2', + 'HSCHR2_1_CTG7_2': 'NW_003315909.1', + 'HSCHR2_3_CTG15': 'NT_187527.1', + 'HSCHR2_1_CTG15': 'NT_187523.1', + 'HSCHR3_1_CTG1': 'NW_003871060.2', + 'HSCHR3_3_CTG1': 'NT_187535.1', + 'HSCHR3_4_CTG2_1': 'NT_187537.1', + 'HSCHR3_1_CTG2_1': 'NW_003315913.1', + 'HSCHR3_2_CTG2_1': 'NT_187533.1', + 'HSCHR3_3_CTG2_1': 'NT_187536.1', + 'HSCHR3_5_CTG2_1': 'NT_187538.1', + 'HSCHR3_1_CTG3': 'NT_187532.1', + 'HSCHR3_2_CTG3': 'NT_187534.1', + 'HSCHR3_9_CTG3': 'NT_187539.1', + 'HSCHR4_1_CTG4': 'NT_187540.1', + 'HSCHR4_1_CTG6': 'NW_003315915.1', + 'HSCHR4_1_CTG8_1': 'NT_187541.1', + 'HSCHR4_1_CTG9': 'NT_167250.2', + 'HSCHR4_4_CTG12': 'NT_187544.1', + 'HSCHR4_1_CTG12': 'NW_003315914.1', + 'HSCHR4_2_CTG12': 'NT_187542.1', + 'HSCHR4_5_CTG12': 'NT_187545.1', + 'HSCHR4_3_CTG12': 'NT_187543.1', + 'HSCHR5_5_CTG1': 'NT_187550.1', + 'HSCHR5_4_CTG1': 'NT_187548.1', + 'HSCHR5_3_CTG1': 'NT_187547.1', + 'HSCHR5_1_CTG1': 'NW_003315920.1', + 'HSCHR5_2_CTG1': 'NW_003571036.1', + 'HSCHR5_6_CTG1': 'NT_187551.1', + 'HSCHR5_2_CTG1_1': 'NW_003315917.2', + 'HSCHR5_3_CTG1_1': 'NW_003315918.1', + 'HSCHR5_4_CTG1_1': 'NT_187549.1', + 'HSCHR5_1_CTG5': 'NW_003315919.1', + 'HSCHR5_2_CTG5': 'NT_187546.1', + 'HSCHR6_MHC_APD_CTG1': 'NT_167244.2', + 'HSCHR6_1_CTG7': 'NT_187555.1', + 'HSCHR6_1_CTG6': 'NT_187554.1', + 'HSCHR6_1_CTG2': 'NW_003315921.1', + 'HSCHR6_1_CTG8': 'NT_187556.1', + 'HSCHR6_1_CTG9': 'NT_187557.1', + 'HSCHR6_1_CTG3': 'NW_004166862.2', + 'HSCHR6_1_CTG4': 'NT_187552.1', + 'HSCHR6_1_CTG5': 'NT_187553.1', + 'HSCHR7_1_CTG1': 'NT_187558.1', + 'HSCHR7_2_CTG4_4': 'NT_187561.1', + 'HSCHR7_1_CTG4_4': 'NT_187559.1', + 'HSCHR7_1_CTG6': 'NW_003315922.2', + 'HSCHR7_2_CTG6': 'NT_187562.1', + 'HSCHR7_3_CTG6': 'NT_187564.1', + 'HSCHR7_2_CTG7': 'NT_187563.1', + 'HSCHR7_1_CTG7': 'NT_187560.1', + 'HSCHR8_4_CTG1': 'NT_187572.1', + 'HSCHR8_2_CTG1': 'NT_187568.1', + 'HSCHR8_1_CTG1': 'NT_187565.1', + 'HSCHR8_8_CTG1': 'NT_187576.1', + 'HSCHR8_3_CTG1': 'NT_187570.1', + 'HSCHR8_9_CTG1': 'NT_187577.1', + 'HSCHR8_1_CTG6': 'NT_187566.1', + 'HSCHR8_1_CTG7': 'NT_187567.1', + 'HSCHR8_5_CTG7': 'NT_187574.1', + 'HSCHR8_6_CTG7': 'NT_187575.1', + 'HSCHR8_4_CTG7': 'NT_187573.1', + 'HSCHR8_3_CTG7': 'NT_187571.1', + 'HSCHR8_2_CTG7': 'NT_187569.1', + 'HSCHR9_1_CTG1': 'NW_003315928.1', + 'HSCHR9_1_CTG2': 'NW_003315929.1', + 'HSCHR9_1_CTG3': 'NW_003315930.1', + 'HSCHR9_1_CTG4': 'NW_003315931.1', + 'HSCHR9_1_CTG5': 'NT_187578.1', + 'HSCHR10_1_CTG1': 'NW_003315934.1', + 'HSCHR10_1_CTG3': 'NT_187579.1', + 'HSCHR10_1_CTG2': 'NW_003315935.1', + 'HSCHR10_1_CTG4': 'NT_187580.1', + 'HSCHR11_1_CTG8': 'NT_187586.1', + 'HSCHR11_1_CTG6': 'NT_187584.1', + 'HSCHR11_1_CTG7': 'NT_187585.1', + 'HSCHR11_1_CTG5': 'NT_187583.1', + 'HSCHR11_1_CTG1_1': 'NW_003315936.1', + 'HG142_HG150_NOVEL_TEST': 'NW_003871073.1', + 'HG151_NOVEL_TEST': 'NW_003871074.1', + 'HSCHR11_1_CTG3': 'NT_187582.1', + 'HSCHR11_1_CTG2': 'NT_187581.1', + 'HSCHR12_1_CTG1': 'NW_003571049.1', + 'HSCHR12_2_CTG2': 'NW_003571050.1', + 'HSCHR12_5_CTG2': 'NT_187588.1', + 'HSCHR12_1_CTG2': 'NW_003315938.1', + 'HSCHR12_4_CTG2': 'NT_187587.1', + 'HSCHR12_1_CTG2_1': 'NW_003315939.2', + 'HSCHR12_2_CTG2_1': 'NW_003315941.1', + 'HSCHR12_3_CTG2_1': 'NW_003315942.2', + 'HSCHR12_6_CTG2_1': 'NT_187590.1', + 'HSCHR12_4_CTG2_1': 'NW_003315940.1', + 'HSCHR12_5_CTG2_1': 'NT_187589.1', + 'HSCHR12_7_CTG2_1': 'NT_187591.1', + 'HSCHR13_1_CTG3': 'NT_187594.1', + 'HSCHR13_1_CTG2': 'NT_187593.1', + 'HSCHR13_1_CTG6': 'NT_187597.1', + 'HSCHR13_1_CTG4': 'NT_187595.1', + 'HSCHR13_1_CTG1': 'NT_187592.1', + 'HSCHR13_1_CTG5': 'NT_187596.1', + 'HSCHR14_1_CTG1': 'NT_187598.1', + 'HSCHR14_7_CTG1': 'NT_187601.1', + 'HSCHR14_2_CTG1': 'NT_187599.1', + 'HSCHR14_3_CTG1': 'NT_187600.1', + 'HSCHR15_1_CTG1': 'NT_187602.1', + 'HSCHR15_3_CTG3': 'NT_187604.1', + 'HSCHR15_1_CTG3': 'NT_187603.1', + 'HSCHR15_1_CTG8': 'NW_003315943.1', + 'HSCHR15_3_CTG8': 'NT_187605.1', + 'HSCHR15_2_CTG8': 'NW_003315944.2', + 'HSCHR15_5_CTG8': 'NT_187606.1', + 'HSCHR16_CTG2': 'NT_187610.1', + 'HSCHR16_4_CTG1': 'NT_187609.1', + 'HSCHR16_3_CTG1': 'NT_187608.1', + 'HSCHR16_1_CTG1': 'NT_187607.1', + 'HSCHR16_1_CTG3_1': 'NW_003315945.1', + 'HSCHR16_2_CTG3_1': 'NW_003315946.1', + 'HSCHR17_1_CTG1': 'NW_003315952.3', + 'HSCHR17_2_CTG2': 'NT_187613.1', + 'HSCHR17_1_CTG2': 'NT_187611.1', + 'HSCHR17_7_CTG4': 'NT_187614.1', + 'HSCHR17_4_CTG4': 'NW_003871091.1', + 'HSCHR17_5_CTG4': 'NW_003871092.1', + 'HSCHR17_1_CTG4': 'NW_003315953.2', + 'HSCHR17_1_CTG5': 'NT_167251.2', + 'HSCHR17_2_CTG4': 'NW_003315954.1', + 'HSCHR17_8_CTG4': 'NT_187615.1', + 'HSCHR17_9_CTG4': 'NT_187616.1', + 'HSCHR17_3_CTG4': 'NW_003315955.1', + 'HSCHR17_1_CTG9': 'NT_187612.1', + 'HSCHR18_4_CTG1_1': 'NT_187618.1', + 'HSCHR18_1_CTG1_1': 'NW_003315956.1', + 'HSCHR18_2_CTG1_1': 'NW_003315959.1', + 'HSCHR18_2_CTG2': 'NW_003315960.1', + 'HSCHR18_1_CTG2': 'NW_003315957.1', + 'HSCHR18_1_CTG2_1': 'NW_003315958.1', + 'HSCHR18_2_CTG2_1': 'NW_003315961.1', + 'HSCHR18_3_CTG2_1': 'NT_187617.1', + 'HSCHR19_5_CTG2': 'NT_187622.1', + 'HSCHR19_4_CTG2': 'NT_187621.1', + 'HSCHR19_1_CTG2': 'NW_003315962.1', + 'HSCHR19_2_CTG2': 'NW_003315964.2', + 'HSCHR19_3_CTG2': 'NW_003315965.1', + 'HSCHR19_1_CTG3_1': 'NW_003315963.1', + 'HSCHR19_2_CTG3_1': 'NT_187619.1', + 'HSCHR19_3_CTG3_1': 'NT_187620.1', + 'HSCHR19LRC_COX1_CTG3_1': 'NW_003571054.1', + 'HSCHR20_1_CTG1': 'NW_003315966.2', + 'HSCHR20_1_CTG2': 'NT_187623.1', + 'HSCHR20_1_CTG4': 'NT_187625.1', + 'HSCHR20_1_CTG3': 'NT_187624.1', + 'HSCHR21_1_CTG1_1': 'NW_003315967.2', + 'HSCHR21_8_CTG1_1': 'NT_187628.1', + 'HSCHR21_6_CTG1_1': 'NT_187627.1', + 'HSCHR21_2_CTG1_1': 'NW_003315968.2', + 'HSCHR21_3_CTG1_1': 'NW_003315969.2', + 'HSCHR21_4_CTG1_1': 'NW_003315970.2', + 'HSCHR21_5_CTG2': 'NT_187626.1', + 'HSCHR22_1_CTG3': 'NT_187629.1', + 'HSCHR22_1_CTG6': 'NT_187632.1', + 'HSCHR22_1_CTG7': 'NT_187633.1', + 'HSCHR22_1_CTG4': 'NT_187630.1', + 'HSCHR22_1_CTG5': 'NT_187631.1', + 'HSCHR22_1_CTG2': 'NW_003315972.2', + 'HSCHR22_1_CTG1': 'NW_003315971.2', + 'HSCHRX_1_CTG3': 'NT_187634.1', + 'HSCHRX_2_CTG12': 'NT_187635.1', + 'HSCHR1_ALT2_1_CTG32_1': 'NT_187646.1', + 'HSCHR2_2_CTG7': 'NT_187648.1', + 'HSCHR2_2_CTG15': 'NT_187647.1', + 'HSCHR3_3_CTG3': 'NT_187649.1', + 'HSCHR4_6_CTG12': 'NT_187650.1', + 'HSCHR5_1_CTG1_1': 'NT_187651.1', + 'HSCHR5_3_CTG5': 'NT_187652.1', + 'HSCHR6_MHC_COX_CTG1': 'NT_113891.3', + 'HSCHR7_2_CTG1': 'NT_187653.1', + 'HSCHR8_6_CTG1': 'NT_187655.1', + 'HSCHR8_5_CTG1': 'NT_187654.1', + 'HSCHR11_2_CTG1': 'NT_187656.1', + 'HSCHR11_2_CTG1_1': 'NT_187657.1', + 'HSCHR12_3_CTG2': 'NT_187658.1', + 'HSCHR15_2_CTG3': 'NT_187659.1', + 'HSCHR15_4_CTG8': 'NT_187660.1', + 'HSCHR17_2_CTG1': 'NT_187662.1', + 'HSCHR17_3_CTG2': 'NT_187664.1', + 'HSCHR17_10_CTG4': 'NT_187661.1', + 'HSCHR17_6_CTG4': 'NW_003871093.1', + 'HSCHR17_2_CTG5': 'NT_187663.1', + 'HSCHR18_ALT21_CTG2_1': 'NT_187665.1', + 'HSCHR18_ALT2_CTG2_1': 'NT_187666.1', + 'HSCHR19LRC_COX2_CTG3_1': 'NW_003571055.2', + 'HSCHR22_2_CTG1': 'NW_004504305.1', + 'HSCHRX_2_CTG3': 'NT_187667.1', + 'HSCHR3_4_CTG3': 'NT_187678.1', + 'HSCHR4_7_CTG12': 'NT_187679.1', + 'HSCHR6_MHC_DBB_CTG1': 'NT_167245.2', + 'HSCHR8_7_CTG1': 'NT_187680.1', + 'HSCHR11_3_CTG1': 'NT_187681.1', + 'HSCHR19LRC_LRC_I_CTG3_1': 'NW_003571056.2', + 'HSCHR22_3_CTG1': 'NT_187682.1', + 'HSCHR3_5_CTG3': 'NT_187688.1', + 'HSCHR6_MHC_MANN_CTG1': 'NT_167246.2', + 'HSCHR19LRC_LRC_J_CTG3_1': 'NW_003571057.2', + 'HSCHR3_6_CTG3': 'NT_187689.1', + 'HSCHR6_MHC_MCF_CTG1': 'NT_167247.2', + 'HSCHR19LRC_LRC_S_CTG3_1': 'NW_003571058.2', + 'HSCHR3_7_CTG3': 'NT_187690.1', + 'HSCHR6_MHC_QBL_CTG1': 'NT_167248.2', + 'HSCHR19LRC_LRC_T_CTG3_1': 'NW_003571059.2', + 'HSCHR3_8_CTG3': 'NT_187691.1', + 'HSCHR6_MHC_SSTO_CTG1': 'NT_167249.2', + 'HSCHR19LRC_PGF1_CTG3_1': 'NW_003571060.1', + 'HSCHR6_8_CTG1': 'NT_187692.1', + 'HSCHR19LRC_PGF2_CTG3_1': 'NW_003571061.2', + 'HSCHR19_4_CTG3_1': 'NT_187693.1', + 'HSCHR19KIR_FH15_B_HAP_CTG3_1': 'NT_187636.1', + 'HSCHR19KIR_G085_A_HAP_CTG3_1': 'NT_187637.1', + 'HSCHR19KIR_G085_BA1_HAP_CTG3_1': 'NT_187638.1', + 'HSCHR19KIR_G248_A_HAP_CTG3_1': 'NT_187639.1', + 'HSCHR19KIR_G248_BA2_HAP_CTG3_1': 'NT_187640.1', + 'HSCHR19KIR_GRC212_AB_HAP_CTG3_1': 'NT_187641.1', + 'HSCHR19KIR_GRC212_BA1_HAP_CTG3_1': 'NT_187642.1', + 'HSCHR19KIR_LUCE_A_HAP_CTG3_1': 'NT_187643.1', + 'HSCHR19KIR_LUCE_BDEL_HAP_CTG3_1': 'NT_187644.1', + 'HSCHR19KIR_RSH_A_HAP_CTG3_1': 'NT_187645.1', + 'HSCHR19KIR_RSH_BA2_HAP_CTG3_1': 'NT_187668.1', + 'HSCHR19KIR_T7526_A_HAP_CTG3_1': 'NT_187669.1', + 'HSCHR19KIR_T7526_BDEL_HAP_CTG3_1': 'NT_187670.1', + 'HSCHR19KIR_ABC08_A1_HAP_CTG3_1': 'NT_187671.1', + 'HSCHR19KIR_ABC08_AB_HAP_C_P_CTG3_1': 'NT_187672.1', + 'HSCHR19KIR_ABC08_AB_HAP_T_P_CTG3_1': 'NT_187673.1', + 'HSCHR19KIR_FH05_A_HAP_CTG3_1': 'NT_187674.1', + 'HSCHR19KIR_FH05_B_HAP_CTG3_1': 'NT_187675.1', + 'HSCHR19KIR_FH06_A_HAP_CTG3_1': 'NT_187676.1', + 'HSCHR19KIR_FH06_BA1_HAP_CTG3_1': 'NT_187677.1', + 'HSCHR19KIR_FH08_A_HAP_CTG3_1': 'NT_187683.1', + 'HSCHR19KIR_FH08_BAX_HAP_CTG3_1': 'NT_187684.1', + 'HSCHR19KIR_FH13_A_HAP_CTG3_1': 'NT_187685.1', + 'HSCHR19KIR_FH13_BA2_HAP_CTG3_1': 'NT_187686.1', + 'HSCHR19KIR_FH15_A_HAP_CTG3_1': 'NT_187687.1', + 'HSCHR19KIR_RP5_B_HAP_CTG3_1': 'NT_113949.2', + 'HSCHR22_CTG1_3': 'NT_167235.1' + } + # Convert call line to rs line + chr_num = chr_num.upper() + if 'CHR' in chr_num[:3]: + chr_num = chr_num[3:] + if primary_assembly == 'GRCh37': + chr_accession = GRCh37.get(chr_num) + if primary_assembly == 'GRCh38' or primary_assembly == 'hg38': + chr_accession = GRCh38.get(chr_num) + if primary_assembly == 'hg19': + chr_accession = hg19.get(chr_num) + return chr_accession + +""" +Mark for removal at testing +""" +# def to_chr_num(accession): +# # Available genome builds - Primary assembly only, Otherwise leave the RefSeq accession in place +# chr_num_convert = { +# "NC_000001": "1", +# "NC_000002": "2", +# "NC_000003": "3", +# "NC_000004": "4", +# "NC_000005": "5", +# "NC_000006": "6", +# "NC_000007": "7", +# "NC_000008": "8", +# "NC_000009": "9", +# "NC_000010": "10", +# "NC_000011": "11", +# "NC_000012": "12", +# "NC_000013": "13", +# "NC_000014": "14", +# "NC_000015": "15", +# "NC_000016": "16", +# "NC_000017": "17", +# "NC_000018": "18", +# "NC_000019": "19", +# "NC_000020": "20", +# "NC_000021": "21", +# "NC_000022": "22", +# "NC_000023": "X", +# "NC_000024": "Y" +# } +# accession = accession.split('.')[0] +# chr_num = chr_num_convert.get(accession) +# return chr_num + + +""" +Simple dictionary lookup function that takes the RefSeq chromosome identifier and returns the +UCSC genome build formatted VCF identifier. + +Note, UCSC and GenBank have different aliases for the ALT and Patch identifiers +""" + + +def to_chr_num_ucsc(accession, primary_assembly): + # Available genome builds + chr_num_convert_37 = { + "NC_000001.10": "chr1", + "NC_000002.11": "chr2", + "NC_000003.11": "chr3", + "NC_000004.11": "chr4", + "NC_000005.9": "chr5", + "NC_000006.11": "chr6", + "NC_000007.13": "chr7", + "NC_000008.10": "chr8", + "NC_000009.11": "chr9", + "NC_000010.10": "chr10", + "NC_000011.9": "chr11", + "NC_000012.11": "chr12", + "NC_000013.10": "chr13", + "NC_000014.8": "chr14", + "NC_000015.9": "chr15", + "NC_000016.9": "chr16", + "NC_000017.10": "chr17", + "NC_000018.9": "chr18", + "NC_000019.9": "chr19", + "NC_000020.10": "chr20", + "NC_000021.8": "chr21", + "NC_000022.10": "chr22", + "NC_000023.10": "chrX", + "NC_000024.9": "chrY", + "NC_012920.1": "chrM", # Cambridge revised mitochondrial + "NC_001807.1": "chrM", # hg19 mitochondrial + + # UCSC hg19 ALTS + "NT_113921.2": "chr11_gl000202_random", + "NT_167251.1": "chr17_ctg5_hap1", + "NT_113941.1": "chr17_gl000203_random", + "NT_113943.1": "chr17_gl000204_random", + "NT_113930.1": "chr17_gl000205_random", + "NT_113945.1": "chr17_gl000206_random", + "NT_113947.1": "chr18_gl000207_random", + "NT_113948.1": "chr19_gl000208_random", + "NT_113949.1": "chr19_gl000209_random", + "NT_113878.1": "chr1_gl000191_random", + "NT_167207.1": "chr1_gl000192_random", + "NT_113950.2": "chr21_gl000210_random", + "NT_167250.1": "chr4_ctg9_hap1", + "NT_113885.1": "chr4_gl000193_random", + "NT_113888.1": "chr4_gl000194_random", + "NT_167244.1": "chr6_apd_hap1", + "NT_113891.2": "chr6_cox_hap2", + "NT_167245.1": "chr6_dbb_hap3", + "NT_167246.1": "chr6_mann_hap4", + "NT_167247.1": "chr6_mcf_hap5", + "NT_167248.1": "chr6_qbl_hap6", + "NT_167249.1": "chr6_ssto_hap7", + "NT_113901.1": "chr7_gl000195_random", + "NT_113909.1": "chr8_gl000196_random", + "NT_113907.1": "chr8_gl000197_random", + "NT_113914.1": "chr9_gl000198_random", + "NT_113916.2": "chr9_gl000199_random", + "NT_113915.1": "chr9_gl000200_random", + "NT_113911.1": "chr9_gl000201_random", + "NT_113961.1": "chrUn_gl000211", + "NT_113923.1": "chrUn_gl000212", + "NT_167208.1": "chrUn_gl000213", + "NT_167209.1": "chrUn_gl000214", + "NT_167210.1": "chrUn_gl000215", + "NT_167211.1": "chrUn_gl000216", + "NT_167212.1": "chrUn_gl000217", + "NT_113889.1": "chrUn_gl000218", + "NT_167213.1": "chrUn_gl000219", + "NT_167214.1": "chrUn_gl000220", + "NT_167215.1": "chrUn_gl000221", + "NT_167216.1": "chrUn_gl000222", + "NT_167217.1": "chrUn_gl000223", + "NT_167218.1": "chrUn_gl000224", + "NT_167219.1": "chrUn_gl000225", + "NT_167220.1": "chrUn_gl000226", + "NT_167221.1": "chrUn_gl000227", + "NT_167222.1": "chrUn_gl000228", + "NT_167223.1": "chrUn_gl000229", + "NT_167224.1": "chrUn_gl000230", + "NT_167225.1": "chrUn_gl000231", + "NT_167226.1": "chrUn_gl000232", + "NT_167227.1": "chrUn_gl000233", + "NT_167228.1": "chrUn_gl000234", + "NT_167229.1": "chrUn_gl000235", + "NT_167230.1": "chrUn_gl000236", + "NT_167231.1": "chrUn_gl000237", + "NT_167232.1": "chrUn_gl000238", + "NT_167233.1": "chrUn_gl000239", + "NT_167234.1": "chrUn_gl000240", + "NT_167235.1": "chrUn_gl000241", + "NT_167236.1": "chrUn_gl000242", + "NT_167237.1": "chrUn_gl000243", + "NT_167238.1": "chrUn_gl000244", + "NT_167239.1": "chrUn_gl000245", + "NT_167240.1": "chrUn_gl000246", + "NT_167241.1": "chrUn_gl000247", + "NT_167242.1": "chrUn_gl000248", + "NT_167243.1": "chrUn_gl000249" + } + + chr_num_convert_38 = { + "NC_000001.11": "chr1", + "NC_000002.12": "chr2", + "NC_000003.12": "chr3", + "NC_000004.12": "chr4", + "NC_000005.10": "chr5", + "NC_000006.12": "chr6", + "NC_000007.14": "chr7", + "NC_000008.11": "chr8", + "NC_000009.12": "chr9", + "NC_000010.11": "chr10", + "NC_000011.10": "chr11", + "NC_000012.12": "chr12", + "NC_000013.11": "chr13", + "NC_000014.9": "chr14", + "NC_000015.10": "chr15", + "NC_000016.10": "chr16", + "NC_000017.11": "chr17", + "NC_000018.10": "chr18", + "NC_000019.10": "chr19", + "NC_000020.11": "chr20", + "NC_000021.9": "chr21", + "NC_000022.11": "chr22", + "NC_000023.11": "chrX", + "NC_000024.10": "chrY", + "NC_012920.1": "chrM", + + # UCSC hg38 Alts + "NW_003315934.1": "chr10_GL383545v1_alt", + "NW_003315935.1": "chr10_GL383546v1_alt", + "NT_187579.1": "chr10_KI270824v1_alt", + "NT_187580.1": "chr10_KI270825v1_alt", + "NW_003315936.1": "chr11_GL383547v1_alt", + "NW_003871073.1": "chr11_JH159136v1_alt", + "NW_003871074.1": "chr11_JH159137v1_alt", + "NT_187376.1": "chr11_KI270721v1_random", + "NT_187581.1": "chr11_KI270826v1_alt", + "NT_187582.1": "chr11_KI270827v1_alt", + "NT_187583.1": "chr11_KI270829v1_alt", + "NT_187584.1": "chr11_KI270830v1_alt", + "NT_187585.1": "chr11_KI270831v1_alt", + "NT_187586.1": "chr11_KI270832v1_alt", + "NT_187656.1": "chr11_KI270902v1_alt", + "NT_187657.1": "chr11_KI270903v1_alt", + "NT_187681.1": "chr11_KI270927v1_alt", + "NW_003315938.1": "chr12_GL383549v1_alt", + "NW_003315939.2": "chr12_GL383550v2_alt", + "NW_003315940.1": "chr12_GL383551v1_alt", + "NW_003315941.1": "chr12_GL383552v1_alt", + "NW_003315942.2": "chr12_GL383553v2_alt", + "NW_003571049.1": "chr12_GL877875v1_alt", + "NW_003571050.1": "chr12_GL877876v1_alt", + "NT_187589.1": "chr12_KI270833v1_alt", + "NT_187590.1": "chr12_KI270834v1_alt", + "NT_187587.1": "chr12_KI270835v1_alt", + "NT_187591.1": "chr12_KI270836v1_alt", + "NT_187588.1": "chr12_KI270837v1_alt", + "NT_187658.1": "chr12_KI270904v1_alt", + "NT_187592.1": "chr13_KI270838v1_alt", + "NT_187593.1": "chr13_KI270839v1_alt", + "NT_187594.1": "chr13_KI270840v1_alt", + "NT_187595.1": "chr13_KI270841v1_alt", + "NT_187596.1": "chr13_KI270842v1_alt", + "NT_187597.1": "chr13_KI270843v1_alt", + "NT_113796.3": "chr14_GL000009v2_random", + "NT_113888.1": "chr14_GL000194v1_random", + "NT_167219.1": "chr14_GL000225v1_random", + "NT_187377.1": "chr14_KI270722v1_random", + "NT_187378.1": "chr14_KI270723v1_random", + "NT_187379.1": "chr14_KI270724v1_random", + "NT_187380.1": "chr14_KI270725v1_random", + "NT_187381.1": "chr14_KI270726v1_random", + "NT_187598.1": "chr14_KI270844v1_alt", + "NT_187599.1": "chr14_KI270845v1_alt", + "NT_187600.1": "chr14_KI270846v1_alt", + "NT_187601.1": "chr14_KI270847v1_alt", + "NW_003315943.1": "chr15_GL383554v1_alt", + "NW_003315944.2": "chr15_GL383555v2_alt", + "NT_187382.1": "chr15_KI270727v1_random", + "NT_187603.1": "chr15_KI270848v1_alt", + "NT_187605.1": "chr15_KI270849v1_alt", + "NT_187606.1": "chr15_KI270850v1_alt", + "NT_187604.1": "chr15_KI270851v1_alt", + "NT_187602.1": "chr15_KI270852v1_alt", + "NT_187660.1": "chr15_KI270905v1_alt", + "NT_187659.1": "chr15_KI270906v1_alt", + "NW_003315945.1": "chr16_GL383556v1_alt", + "NW_003315946.1": "chr16_GL383557v1_alt", + "NT_187383.1": "chr16_KI270728v1_random", + "NT_187607.1": "chr16_KI270853v1_alt", + "NT_187610.1": "chr16_KI270854v1_alt", + "NT_187608.1": "chr16_KI270855v1_alt", + "NT_187609.1": "chr16_KI270856v1_alt", + "NT_113930.2": "chr17_GL000205v2_random", + "NT_167251.2": "chr17_GL000258v2_alt", + "NW_003315952.3": "chr17_GL383563v3_alt", + "NW_003315953.2": "chr17_GL383564v2_alt", + "NW_003315954.1": "chr17_GL383565v1_alt", + "NW_003315955.1": "chr17_GL383566v1_alt", + "NW_003871091.1": "chr17_JH159146v1_alt", + "NW_003871092.1": "chr17_JH159147v1_alt", + "NW_003871093.1": "chr17_JH159148v1_alt", + "NT_187384.1": "chr17_KI270729v1_random", + "NT_187385.1": "chr17_KI270730v1_random", + "NT_187614.1": "chr17_KI270857v1_alt", + "NT_187615.1": "chr17_KI270858v1_alt", + "NT_187616.1": "chr17_KI270859v1_alt", + "NT_187612.1": "chr17_KI270860v1_alt", + "NT_187611.1": "chr17_KI270861v1_alt", + "NT_187613.1": "chr17_KI270862v1_alt", + "NT_187662.1": "chr17_KI270907v1_alt", + "NT_187663.1": "chr17_KI270908v1_alt", + "NT_187661.1": "chr17_KI270909v1_alt", + "NT_187664.1": "chr17_KI270910v1_alt", + "NW_003315956.1": "chr18_GL383567v1_alt", + "NW_003315957.1": "chr18_GL383568v1_alt", + "NW_003315958.1": "chr18_GL383569v1_alt", + "NW_003315959.1": "chr18_GL383570v1_alt", + "NW_003315960.1": "chr18_GL383571v1_alt", + "NW_003315961.1": "chr18_GL383572v1_alt", + "NT_187617.1": "chr18_KI270863v1_alt", + "NT_187618.1": "chr18_KI270864v1_alt", + "NT_187666.1": "chr18_KI270911v1_alt", + "NT_187665.1": "chr18_KI270912v1_alt", + "NT_113949.2": "chr19_GL000209v2_alt", + "NW_003315962.1": "chr19_GL383573v1_alt", + "NW_003315963.1": "chr19_GL383574v1_alt", + "NW_003315964.2": "chr19_GL383575v2_alt", + "NW_003315965.1": "chr19_GL383576v1_alt", + "NW_003571054.1": "chr19_GL949746v1_alt", + "NW_003571055.2": "chr19_GL949747v2_alt", + "NW_003571056.2": "chr19_GL949748v2_alt", + "NW_003571057.2": "chr19_GL949749v2_alt", + "NW_003571058.2": "chr19_GL949750v2_alt", + "NW_003571059.2": "chr19_GL949751v2_alt", + "NW_003571060.1": "chr19_GL949752v1_alt", + "NW_003571061.2": "chr19_GL949753v2_alt", + "NT_187621.1": "chr19_KI270865v1_alt", + "NT_187619.1": "chr19_KI270866v1_alt", + "NT_187620.1": "chr19_KI270867v1_alt", + "NT_187622.1": "chr19_KI270868v1_alt", + "NT_187636.1": "chr19_KI270882v1_alt", + "NT_187637.1": "chr19_KI270883v1_alt", + "NT_187638.1": "chr19_KI270884v1_alt", + "NT_187639.1": "chr19_KI270885v1_alt", + "NT_187640.1": "chr19_KI270886v1_alt", + "NT_187641.1": "chr19_KI270887v1_alt", + "NT_187642.1": "chr19_KI270888v1_alt", + "NT_187643.1": "chr19_KI270889v1_alt", + "NT_187644.1": "chr19_KI270890v1_alt", + "NT_187645.1": "chr19_KI270891v1_alt", + "NT_187668.1": "chr19_KI270914v1_alt", + "NT_187669.1": "chr19_KI270915v1_alt", + "NT_187670.1": "chr19_KI270916v1_alt", + "NT_187671.1": "chr19_KI270917v1_alt", + "NT_187672.1": "chr19_KI270918v1_alt", + "NT_187673.1": "chr19_KI270919v1_alt", + "NT_187674.1": "chr19_KI270920v1_alt", + "NT_187675.1": "chr19_KI270921v1_alt", + "NT_187676.1": "chr19_KI270922v1_alt", + "NT_187677.1": "chr19_KI270923v1_alt", + "NT_187683.1": "chr19_KI270929v1_alt", + "NT_187684.1": "chr19_KI270930v1_alt", + "NT_187685.1": "chr19_KI270931v1_alt", + "NT_187686.1": "chr19_KI270932v1_alt", + "NT_187687.1": "chr19_KI270933v1_alt", + "NT_187693.1": "chr19_KI270938v1_alt", + "NW_003315905.1": "chr1_GL383518v1_alt", + "NW_003315906.1": "chr1_GL383519v1_alt", + "NW_003315907.2": "chr1_GL383520v2_alt", + "NT_187361.1": "chr1_KI270706v1_random", + "NT_187362.1": "chr1_KI270707v1_random", + "NT_187363.1": "chr1_KI270708v1_random", + "NT_187364.1": "chr1_KI270709v1_random", + "NT_187365.1": "chr1_KI270710v1_random", + "NT_187366.1": "chr1_KI270711v1_random", + "NT_187367.1": "chr1_KI270712v1_random", + "NT_187368.1": "chr1_KI270713v1_random", + "NT_187369.1": "chr1_KI270714v1_random", + "NT_187516.1": "chr1_KI270759v1_alt", + "NT_187514.1": "chr1_KI270760v1_alt", + "NT_187518.1": "chr1_KI270761v1_alt", + "NT_187515.1": "chr1_KI270762v1_alt", + "NT_187519.1": "chr1_KI270763v1_alt", + "NT_187521.1": "chr1_KI270764v1_alt", + "NT_187520.1": "chr1_KI270765v1_alt", + "NT_187517.1": "chr1_KI270766v1_alt", + "NT_187646.1": "chr1_KI270892v1_alt", + "NW_003315966.2": "chr20_GL383577v2_alt", + "NT_187623.1": "chr20_KI270869v1_alt", + "NT_187624.1": "chr20_KI270870v1_alt", + "NT_187625.1": "chr20_KI270871v1_alt", + "NW_003315967.2": "chr21_GL383578v2_alt", + "NW_003315968.2": "chr21_GL383579v2_alt", + "NW_003315969.2": "chr21_GL383580v2_alt", + "NW_003315970.2": "chr21_GL383581v2_alt", + "NT_187626.1": "chr21_KI270872v1_alt", + "NT_187627.1": "chr21_KI270873v1_alt", + "NT_187628.1": "chr21_KI270874v1_alt", + "NW_003315971.2": "chr22_GL383582v2_alt", + "NW_003315972.2": "chr22_GL383583v2_alt", + "NW_004504305.1": "chr22_KB663609v1_alt", + "NT_187386.1": "chr22_KI270731v1_random", + "NT_187387.1": "chr22_KI270732v1_random", + "NT_187388.1": "chr22_KI270733v1_random", + "NT_187389.1": "chr22_KI270734v1_random", + "NT_187390.1": "chr22_KI270735v1_random", + "NT_187391.1": "chr22_KI270736v1_random", + "NT_187392.1": "chr22_KI270737v1_random", + "NT_187393.1": "chr22_KI270738v1_random", + "NT_187394.1": "chr22_KI270739v1_random", + "NT_187629.1": "chr22_KI270875v1_alt", + "NT_187630.1": "chr22_KI270876v1_alt", + "NT_187631.1": "chr22_KI270877v1_alt", + "NT_187632.1": "chr22_KI270878v1_alt", + "NT_187633.1": "chr22_KI270879v1_alt", + "NT_187682.1": "chr22_KI270928v1_alt", + "NW_003315908.1": "chr2_GL383521v1_alt", + "NW_003315909.1": "chr2_GL383522v1_alt", + "NW_003571033.2": "chr2_GL582966v2_alt", + "NT_187370.1": "chr2_KI270715v1_random", + "NT_187371.1": "chr2_KI270716v1_random", + "NT_187523.1": "chr2_KI270767v1_alt", + "NT_187528.1": "chr2_KI270768v1_alt", + "NT_187522.1": "chr2_KI270769v1_alt", + "NT_187525.1": "chr2_KI270770v1_alt", + "NT_187530.1": "chr2_KI270771v1_alt", + "NT_187524.1": "chr2_KI270772v1_alt", + "NT_187526.1": "chr2_KI270773v1_alt", + "NT_187529.1": "chr2_KI270774v1_alt", + "NT_187531.1": "chr2_KI270775v1_alt", + "NT_187527.1": "chr2_KI270776v1_alt", + "NT_187647.1": "chr2_KI270893v1_alt", + "NT_187648.1": "chr2_KI270894v1_alt", + "NT_167215.1": "chr3_GL000221v1_random", + "NW_003315913.1": "chr3_GL383526v1_alt", + "NW_003871060.2": "chr3_JH636055v2_alt", + "NT_187533.1": "chr3_KI270777v1_alt", + "NT_187536.1": "chr3_KI270778v1_alt", + "NT_187532.1": "chr3_KI270779v1_alt", + "NT_187537.1": "chr3_KI270780v1_alt", + "NT_187538.1": "chr3_KI270781v1_alt", + "NT_187534.1": "chr3_KI270782v1_alt", + "NT_187535.1": "chr3_KI270783v1_alt", + "NT_187539.1": "chr3_KI270784v1_alt", + "NT_187649.1": "chr3_KI270895v1_alt", + "NT_187678.1": "chr3_KI270924v1_alt", + "NT_187688.1": "chr3_KI270934v1_alt", + "NT_187689.1": "chr3_KI270935v1_alt", + "NT_187690.1": "chr3_KI270936v1_alt", + "NT_187691.1": "chr3_KI270937v1_alt", + "NT_113793.3": "chr4_GL000008v2_random", + "NT_167250.2": "chr4_GL000257v2_alt", + "NW_003315914.1": "chr4_GL383527v1_alt", + "NW_003315915.1": "chr4_GL383528v1_alt", + "NT_187542.1": "chr4_KI270785v1_alt", + "NT_187543.1": "chr4_KI270786v1_alt", + "NT_187541.1": "chr4_KI270787v1_alt", + "NT_187544.1": "chr4_KI270788v1_alt", + "NT_187545.1": "chr4_KI270789v1_alt", + "NT_187540.1": "chr4_KI270790v1_alt", + "NT_187650.1": "chr4_KI270896v1_alt", + "NT_187679.1": "chr4_KI270925v1_alt", + "NT_113948.1": "chr5_GL000208v1_random", + "NW_003315917.2": "chr5_GL339449v2_alt", + "NW_003315918.1": "chr5_GL383530v1_alt", + "NW_003315919.1": "chr5_GL383531v1_alt", + "NW_003315920.1": "chr5_GL383532v1_alt", + "NW_003571036.1": "chr5_GL949742v1_alt", + "NT_187547.1": "chr5_KI270791v1_alt", + "NT_187548.1": "chr5_KI270792v1_alt", + "NT_187550.1": "chr5_KI270793v1_alt", + "NT_187551.1": "chr5_KI270794v1_alt", + "NT_187546.1": "chr5_KI270795v1_alt", + "NT_187549.1": "chr5_KI270796v1_alt", + "NT_187651.1": "chr5_KI270897v1_alt", + "NT_187652.1": "chr5_KI270898v1_alt", + "NT_167244.2": "chr6_GL000250v2_alt", + "NT_113891.3": "chr6_GL000251v2_alt", + "NT_167245.2": "chr6_GL000252v2_alt", + "NT_167246.2": "chr6_GL000253v2_alt", + "NT_167247.2": "chr6_GL000254v2_alt", + "NT_167248.2": "chr6_GL000255v2_alt", + "NT_167249.2": "chr6_GL000256v2_alt", + "NW_003315921.1": "chr6_GL383533v1_alt", + "NW_004166862.2": "chr6_KB021644v2_alt", + "NT_187692.1": "chr6_KI270758v1_alt", + "NT_187552.1": "chr6_KI270797v1_alt", + "NT_187553.1": "chr6_KI270798v1_alt", + "NT_187554.1": "chr6_KI270799v1_alt", + "NT_187555.1": "chr6_KI270800v1_alt", + "NT_187556.1": "chr6_KI270801v1_alt", + "NT_187557.1": "chr6_KI270802v1_alt", + "NW_003315922.2": "chr7_GL383534v2_alt", + "NT_187562.1": "chr7_KI270803v1_alt", + "NT_187558.1": "chr7_KI270804v1_alt", + "NT_187560.1": "chr7_KI270805v1_alt", + "NT_187559.1": "chr7_KI270806v1_alt", + "NT_187563.1": "chr7_KI270807v1_alt", + "NT_187564.1": "chr7_KI270808v1_alt", + "NT_187561.1": "chr7_KI270809v1_alt", + "NT_187653.1": "chr7_KI270899v1_alt", + "NT_187567.1": "chr8_KI270810v1_alt", + "NT_187565.1": "chr8_KI270811v1_alt", + "NT_187568.1": "chr8_KI270812v1_alt", + "NT_187570.1": "chr8_KI270813v1_alt", + "NT_187566.1": "chr8_KI270814v1_alt", + "NT_187569.1": "chr8_KI270815v1_alt", + "NT_187571.1": "chr8_KI270816v1_alt", + "NT_187573.1": "chr8_KI270817v1_alt", + "NT_187572.1": "chr8_KI270818v1_alt", + "NT_187574.1": "chr8_KI270819v1_alt", + "NT_187575.1": "chr8_KI270820v1_alt", + "NT_187576.1": "chr8_KI270821v1_alt", + "NT_187577.1": "chr8_KI270822v1_alt", + "NT_187654.1": "chr8_KI270900v1_alt", + "NT_187655.1": "chr8_KI270901v1_alt", + "NT_187680.1": "chr8_KI270926v1_alt", + "NW_003315928.1": "chr9_GL383539v1_alt", + "NW_003315929.1": "chr9_GL383540v1_alt", + "NW_003315930.1": "chr9_GL383541v1_alt", + "NW_003315931.1": "chr9_GL383542v1_alt", + "NT_187372.1": "chr9_KI270717v1_random", + "NT_187373.1": "chr9_KI270718v1_random", + "NT_187374.1": "chr9_KI270719v1_random", + "NT_187375.1": "chr9_KI270720v1_random", + "NT_187578.1": "chr9_KI270823v1_alt", + "NT_113901.1": "chrUn_GL000195v1", + "NT_167208.1": "chrUn_GL000213v1", + "NT_167209.1": "chrUn_GL000214v1", + "NT_167211.2": "chrUn_GL000216v2", + "NT_113889.1": "chrUn_GL000218v1", + "NT_167213.1": "chrUn_GL000219v1", + "NT_167214.1": "chrUn_GL000220v1", + "NT_167218.1": "chrUn_GL000224v1", + "NT_167220.1": "chrUn_GL000226v1", + "NT_187396.1": "chrUn_KI270302v1", + "NT_187398.1": "chrUn_KI270303v1", + "NT_187397.1": "chrUn_KI270304v1", + "NT_187399.1": "chrUn_KI270305v1", + "NT_187402.1": "chrUn_KI270310v1", + "NT_187406.1": "chrUn_KI270311v1", + "NT_187405.1": "chrUn_KI270312v1", + "NT_187404.1": "chrUn_KI270315v1", + "NT_187403.1": "chrUn_KI270316v1", + "NT_187407.1": "chrUn_KI270317v1", + "NT_187401.1": "chrUn_KI270320v1", + "NT_187400.1": "chrUn_KI270322v1", + "NT_187459.1": "chrUn_KI270329v1", + "NT_187458.1": "chrUn_KI270330v1", + "NT_187461.1": "chrUn_KI270333v1", + "NT_187460.1": "chrUn_KI270334v1", + "NT_187462.1": "chrUn_KI270335v1", + "NT_187465.1": "chrUn_KI270336v1", + "NT_187466.1": "chrUn_KI270337v1", + "NT_187463.1": "chrUn_KI270338v1", + "NT_187464.1": "chrUn_KI270340v1", + "NT_187469.1": "chrUn_KI270362v1", + "NT_187467.1": "chrUn_KI270363v1", + "NT_187468.1": "chrUn_KI270364v1", + "NT_187470.1": "chrUn_KI270366v1", + "NT_187494.1": "chrUn_KI270371v1", + "NT_187491.1": "chrUn_KI270372v1", + "NT_187492.1": "chrUn_KI270373v1", + "NT_187490.1": "chrUn_KI270374v1", + "NT_187493.1": "chrUn_KI270375v1", + "NT_187489.1": "chrUn_KI270376v1", + "NT_187471.1": "chrUn_KI270378v1", + "NT_187472.1": "chrUn_KI270379v1", + "NT_187486.1": "chrUn_KI270381v1", + "NT_187488.1": "chrUn_KI270382v1", + "NT_187482.1": "chrUn_KI270383v1", + "NT_187484.1": "chrUn_KI270384v1", + "NT_187487.1": "chrUn_KI270385v1", + "NT_187480.1": "chrUn_KI270386v1", + "NT_187475.1": "chrUn_KI270387v1", + "NT_187478.1": "chrUn_KI270388v1", + "NT_187473.1": "chrUn_KI270389v1", + "NT_187474.1": "chrUn_KI270390v1", + "NT_187481.1": "chrUn_KI270391v1", + "NT_187485.1": "chrUn_KI270392v1", + "NT_187483.1": "chrUn_KI270393v1", + "NT_187479.1": "chrUn_KI270394v1", + "NT_187476.1": "chrUn_KI270395v1", + "NT_187477.1": "chrUn_KI270396v1", + "NT_187409.1": "chrUn_KI270411v1", + "NT_187408.1": "chrUn_KI270412v1", + "NT_187410.1": "chrUn_KI270414v1", + "NT_187415.1": "chrUn_KI270417v1", + "NT_187412.1": "chrUn_KI270418v1", + "NT_187411.1": "chrUn_KI270419v1", + "NT_187413.1": "chrUn_KI270420v1", + "NT_187416.1": "chrUn_KI270422v1", + "NT_187417.1": "chrUn_KI270423v1", + "NT_187414.1": "chrUn_KI270424v1", + "NT_187418.1": "chrUn_KI270425v1", + "NT_187419.1": "chrUn_KI270429v1", + "NT_187424.1": "chrUn_KI270435v1", + "NT_187425.1": "chrUn_KI270438v1", + "NT_187420.1": "chrUn_KI270442v1", + "NT_187495.1": "chrUn_KI270448v1", + "NT_187422.1": "chrUn_KI270465v1", + "NT_187421.1": "chrUn_KI270466v1", + "NT_187423.1": "chrUn_KI270467v1", + "NT_187426.1": "chrUn_KI270468v1", + "NT_187437.1": "chrUn_KI270507v1", + "NT_187430.1": "chrUn_KI270508v1", + "NT_187428.1": "chrUn_KI270509v1", + "NT_187427.1": "chrUn_KI270510v1", + "NT_187435.1": "chrUn_KI270511v1", + "NT_187432.1": "chrUn_KI270512v1", + "NT_187436.1": "chrUn_KI270515v1", + "NT_187431.1": "chrUn_KI270516v1", + "NT_187438.1": "chrUn_KI270517v1", + "NT_187429.1": "chrUn_KI270518v1", + "NT_187433.1": "chrUn_KI270519v1", + "NT_187496.1": "chrUn_KI270521v1", + "NT_187434.1": "chrUn_KI270522v1", + "NT_187440.1": "chrUn_KI270528v1", + "NT_187439.1": "chrUn_KI270529v1", + "NT_187441.1": "chrUn_KI270530v1", + "NT_187443.1": "chrUn_KI270538v1", + "NT_187442.1": "chrUn_KI270539v1", + "NT_187444.1": "chrUn_KI270544v1", + "NT_187445.1": "chrUn_KI270548v1", + "NT_187450.1": "chrUn_KI270579v1", + "NT_187448.1": "chrUn_KI270580v1", + "NT_187449.1": "chrUn_KI270581v1", + "NT_187454.1": "chrUn_KI270582v1", + "NT_187446.1": "chrUn_KI270583v1", + "NT_187453.1": "chrUn_KI270584v1", + "NT_187447.1": "chrUn_KI270587v1", + "NT_187455.1": "chrUn_KI270588v1", + "NT_187451.1": "chrUn_KI270589v1", + "NT_187452.1": "chrUn_KI270590v1", + "NT_187457.1": "chrUn_KI270591v1", + "NT_187456.1": "chrUn_KI270593v1", + "NT_187497.1": "chrUn_KI270741v1", + "NT_187513.1": "chrUn_KI270742v1", + "NT_187498.1": "chrUn_KI270743v1", + "NT_187499.1": "chrUn_KI270744v1", + "NT_187500.1": "chrUn_KI270745v1", + "NT_187501.1": "chrUn_KI270746v1", + "NT_187502.1": "chrUn_KI270747v1", + "NT_187503.1": "chrUn_KI270748v1", + "NT_187504.1": "chrUn_KI270749v1", + "NT_187505.1": "chrUn_KI270750v1", + "NT_187506.1": "chrUn_KI270751v1", + "NT_187507.1": "chrUn_KI270752v1", + "NT_187508.1": "chrUn_KI270753v1", + "NT_187509.1": "chrUn_KI270754v1", + "NT_187510.1": "chrUn_KI270755v1", + "NT_187511.1": "chrUn_KI270756v1", + "NT_187512.1": "chrUn_KI270757v1", + "NT_187634.1": "chrX_KI270880v1_alt", + "NT_187635.1": "chrX_KI270881v1_alt", + "NT_187667.1": "chrX_KI270913v1_alt", + "NT_187395.1": "chrY_KI270740v1_random" + } + if primary_assembly == 'hg38': + chr_num = chr_num_convert_38.get(accession) + if primary_assembly == 'hg19': + chr_num = chr_num_convert_37.get(accession) + try: + return chr_num + except UnboundLocalError: + chr_num = None + return chr_num + + +""" +Simple dictionary lookup function that takes the RefSeq chromosome identifier and returns the +Genbank genome build formatted VCF identifier. + +Note, UCSC and GenBank have different aliases for the ALT and Patch identifiers +""" + + +def to_chr_num_refseq(accession, primary_assembly): + # Available genome builds + chr_num_convert_37 = { + "NC_000001.10": "1", + "NC_000002.11": "2", + "NC_000003.11": "3", + "NC_000004.11": "4", + "NC_000005.9": "5", + "NC_000006.11": "6", + "NC_000007.13": "7", + "NC_000008.10": "8", + "NC_000009.11": "9", + "NC_000010.10": "10", + "NC_000011.9": "11", + "NC_000012.11": "12", + "NC_000013.10": "13", + "NC_000014.8": "14", + "NC_000015.9": "15", + "NC_000016.9": "16", + "NC_000017.10": "17", + "NC_000018.9": "18", + "NC_000019.9": "19", + "NC_000020.10": "20", + "NC_000021.8": "21", + "NC_000022.10": "22", + "NC_000023.10": "X", + "NC_000024.9": "Y", + "NC_012920.1": "M", + + # GRC GRCh37 alts + 'NW_004070864.2': 'HG1472_PATCH', + 'NW_003571030.1': 'HG989_PATCH', + 'NW_003871056.3': 'HG1292_PATCH', + 'NW_003871055.3': 'HG1287_PATCH', + 'NW_003315905.1': 'HSCHR1_1_CTG31', + 'NW_003315906.1': 'HSCHR1_2_CTG31', + 'NW_003315907.1': 'HSCHR1_3_CTG31', + 'NW_004070863.1': 'HG1471_PATCH', + 'NW_003871057.1': 'HG1293_PATCH', + 'NW_004070865.1': 'HG1473_PATCH', + 'NW_003315903.1': 'HG999_1_PATCH', + 'NW_003315904.1': 'HG999_2_PATCH', + 'NW_003315908.1': 'HSCHR2_1_CTG1', + 'NW_004504299.1': 'HG953_PATCH', + 'NW_003571032.1': 'HG686_PATCH', + 'NW_003571033.2': 'HSCHR2_2_CTG12', + 'NW_003315909.1': 'HSCHR2_1_CTG12', + 'NW_003571031.1': 'HG1007_PATCH', + 'NW_003871060.1': 'HSCHR3_1_CTG1', + 'NW_003871059.1': 'HG325_PATCH', + 'NW_003315910.1': 'HG186_PATCH', + 'NW_004775426.1': 'HG957_PATCH', + 'NW_003315911.1': 'HG280_PATCH', + 'NW_003871058.1': 'HG1091_PATCH', + 'NW_003315912.1': 'HG991_PATCH', + 'NW_003315913.1': 'HSCHR3_1_CTG2_1', + 'NW_004775427.1': 'HG174_HG254_PATCH', + 'NW_003315915.1': 'HSCHR4_1_CTG6', + 'NW_003315916.1': 'HSCHR4_2_CTG9', + 'NW_003571035.1': 'HG706_PATCH', + 'NW_003315914.1': 'HSCHR4_1_CTG12', + 'NW_003571034.1': 'HG1032_PATCH', + 'NW_003315920.1': 'HSCHR5_2_CTG1', + 'NW_003571036.1': 'HSCHR5_3_CTG1', + 'NW_003315917.2': 'HSCHR5_1_CTG1', + 'NW_003315918.1': 'HSCHR5_1_CTG2', + 'NW_003871061.1': 'HG1063_PATCH', + 'NW_004775428.1': 'HG1082_HG167_PATCH', + 'NW_003315919.1': 'HSCHR5_1_CTG5', + 'NW_004070866.1': 'HG27_PATCH', + 'NW_003871063.1': 'HG1322_PATCH', + 'NW_003315921.1': 'HSCHR6_1_CTG5', + 'NW_004504300.1': 'HG357_PATCH', + 'NW_003871062.1': 'HG1304_PATCH', + 'NW_004775429.1': 'HG193_PATCH', + 'NW_004166862.1': 'HSCHR6_2_CTG5', + 'NW_003571039.1': 'HG736_PATCH', + 'NW_003571038.1': 'HG14_PATCH', + 'NW_004775430.1': 'HG444_PATCH', + 'NW_003871064.1': 'HG1257_PATCH', + 'NW_003571041.1': 'HG946_PATCH', + 'NW_003571037.1': 'HG115_PATCH', + 'NW_003871065.1': 'HG1308_PATCH', + 'NW_003315922.2': 'HSCHR7_1_CTG6', + 'NW_003571040.1': 'HG7_PATCH', + 'NW_003571042.1': 'HG19_PATCH', + 'NW_004775431.1': 'HG1699_PATCH', + 'NW_003871066.2': 'HG418_PATCH', + 'NW_003315923.1': 'HG104_HG975_PATCH', + 'NW_003315924.1': 'HG243_PATCH', + 'NW_003315928.1': 'HSCHR9_1_CTG1', + 'NW_003871067.1': 'HG962_PATCH', + 'NW_003315929.1': 'HSCHR9_1_CTG35', + 'NW_003315930.1': 'HSCHR9_2_CTG35', + 'NW_003315931.1': 'HSCHR9_3_CTG35', + 'NW_004504301.1': 'HG50_PATCH', + 'NW_004070869.1': 'HG1502_PATCH', + 'NW_003315925.1': 'HG79_PATCH', + 'NW_004070867.1': 'HG1500_PATCH', + 'NW_004070868.1': 'HG1501_PATCH', + 'NW_003315926.1': 'HG998_1_PATCH', + 'NW_003315927.1': 'HG998_2_PATCH', + 'NW_003571043.1': 'HG905_PATCH', + 'NW_003871071.1': 'HG871_PATCH', + 'NW_003315932.1': 'HG544_PATCH', + 'NW_003315934.1': 'HSCHR10_1_CTG2', + 'NW_003315935.1': 'HSCHR10_1_CTG5', + 'NW_003871068.1': 'HG1211_PATCH', + 'NW_004504302.1': 'HG1074_PATCH', + 'NW_003871070.1': 'HG339_PATCH', + 'NW_004775432.1': 'HG979_PATCH', + 'NW_003871069.1': 'HG311_PATCH', + 'NW_003315933.1': 'HG995_PATCH', + 'NW_004070870.1': 'HG1479_PATCH', + 'NW_003871075.1': 'HG256_PATCH', + 'NW_003871082.1': 'HG873_PATCH', + 'NW_003315936.1': 'HSCHR11_1_CTG1_1', + 'NW_003571045.1': 'HG281_PATCH', + 'NW_003871073.1': 'HG142_HG150_NOVEL_TEST', + 'NW_003871074.1': 'HG151_NOVEL_TEST', + 'NW_003571046.1': 'HG536_PATCH', + 'NW_004070871.1': 'HG865_PATCH', + 'NW_003871081.1': 'HG414_PATCH', + 'NW_003871079.1': 'HG348_PATCH', + 'NW_003871077.1': 'HG305_PATCH', + 'NW_003871080.1': 'HG388_HG400_PATCH', + 'NW_003871078.1': 'HG306_PATCH', + 'NW_003871072.2': 'HG122_PATCH', + 'NW_003871076.1': 'HG299_PATCH', + 'NW_003571048.1': 'HG858_PATCH', + 'NW_003571049.1': 'HSCHR12_1_CTG1', + 'NW_003871083.2': 'HG344_PATCH', + 'NW_003571047.1': 'HG1133_PATCH', + 'NW_003571050.1': 'HSCHR12_2_CTG2', + 'NW_003315938.1': 'HSCHR12_1_CTG2', + 'NW_003315939.1': 'HSCHR12_1_CTG2_1', + 'NW_003315941.1': 'HSCHR12_2_CTG2_1', + 'NW_003315942.2': 'HSCHR12_3_CTG2_1', + 'NW_004504303.2': 'HG1595_PATCH', + 'NW_003315940.1': 'HSCHR12_1_CTG5', + 'NW_003315937.1': 'HG996_PATCH', + 'NW_003571051.1': 'HG531_PATCH', + 'NW_004166863.1': 'HG1592_PATCH', + 'NW_003315943.1': 'HSCHR15_1_CTG4', + 'NW_003315944.1': 'HSCHR15_1_CTG8', + 'NW_003871084.1': 'HG971_PATCH', + 'NW_003315945.1': 'HSCHR16_1_CTG3_1', + 'NW_003871085.1': 'HG1208_PATCH', + 'NW_003315946.1': 'HSCHR16_2_CTG3_1', + 'NW_004070872.2': 'HG417_PATCH', + 'NW_003315952.2': 'HSCHR17_1_CTG1', + 'NW_003315951.1': 'HG990_PATCH', + 'NW_003315950.2': 'HG987_PATCH', + 'NW_004775433.1': 'HG1591_PATCH', + 'NW_003871090.1': 'HG883_PATCH', + 'NW_004166864.2': 'HG385_PATCH', + 'NW_003315949.1': 'HG75_PATCH', + 'NW_003315948.2': 'HG745_PATCH', + 'NW_003871091.1': 'HSCHR17_4_CTG4', + 'NW_003871093.1': 'HSCHR17_6_CTG4', + 'NW_003871092.1': 'HSCHR17_5_CTG4', + 'NW_003315953.1': 'HSCHR17_1_CTG4', + 'NW_003571052.1': 'HG185_PATCH', + 'NW_003871086.1': 'HG1146_PATCH', + 'NW_003315947.1': 'HG183_PATCH', + 'NW_003871088.1': 'HG747_PATCH', + 'NW_003315954.1': 'HSCHR17_2_CTG4', + 'NW_003315955.1': 'HSCHR17_3_CTG4', + 'NW_003871089.1': 'HG748_PATCH', + 'NW_003871087.1': 'HG271_PATCH', + 'NW_003315956.1': 'HSCHR18_1_CTG1_1', + 'NW_003315959.1': 'HSCHR18_2_CTG1_1', + 'NW_003315960.1': 'HSCHR18_2_CTG2', + 'NW_003315957.1': 'HSCHR18_1_CTG2', + 'NW_003315958.1': 'HSCHR18_1_CTG2_1', + 'NW_003315961.1': 'HSCHR18_2_CTG2_1', + 'NW_003871094.1': 'HG729_PATCH', + 'NW_003571053.2': 'HG730_PATCH', + 'NW_003315962.1': 'HSCHR19_1_CTG3', + 'NW_003315964.2': 'HSCHR19_2_CTG3', + 'NW_003315965.1': 'HSCHR19_3_CTG3', + 'NW_003315963.1': 'HSCHR19_1_CTG3_1', + 'NW_004775434.1': 'HG1350_HG959_PATCH', + 'NW_004166865.1': 'HG1079_PATCH', + 'NW_003571054.1': 'HSCHR19LRC_COX1_CTG1', + 'NW_003571055.1': 'HSCHR19LRC_COX2_CTG1', + 'NW_003571056.1': 'HSCHR19LRC_LRC_I_CTG1', + 'NW_003571057.1': 'HSCHR19LRC_LRC_J_CTG1', + 'NW_003571058.1': 'HSCHR19LRC_LRC_S_CTG1', + 'NW_003571059.1': 'HSCHR19LRC_LRC_T_CTG1', + 'NW_003571060.1': 'HSCHR19LRC_PGF1_CTG1', + 'NW_003571061.1': 'HSCHR19LRC_PGF2_CTG1', + 'NW_003315966.1': 'HSCHR20_1_CTG1', + 'NW_003871095.1': 'HG144_PATCH', + 'NW_004504304.1': 'HG944_PATCH', + 'NW_003571063.2': 'HG506_HG507_HG1000_PATCH', + 'NW_003315967.1': 'HSCHR21_1_CTG1_1', + 'NW_003315968.1': 'HSCHR21_2_CTG1_1', + 'NW_003315969.1': 'HSCHR21_3_CTG1_1', + 'NW_003315970.1': 'HSCHR21_4_CTG1_1', + 'NW_004775435.1': 'HG237_PATCH', + 'NW_004070874.1': 'HG1487_PATCH', + 'NW_004070873.1': 'HG1486_PATCH', + 'NW_004070875.1': 'HG1488_PATCH', + 'NW_003871096.1': 'HG329_PATCH', + 'NW_003315972.1': 'HSCHR22_1_CTG2', + 'NW_003315971.2': 'HSCHR22_1_CTG1', + 'NW_004504305.1': 'HSCHR22_2_CTG1', + 'NW_004070876.1': 'HG497_PATCH', + 'NW_003571064.2': 'HG480_HG481_PATCH', + 'NW_003871098.1': 'HG1423_PATCH', + 'NW_003871099.1': 'HG1424_PATCH', + 'NW_004070879.1': 'HG1435_PATCH', + 'NW_004166866.1': 'HG29_PATCH', + 'NW_004070880.2': 'HG1436_HG1432_PATCH', + 'NW_004070877.1': 'HG1433_PATCH', + 'NW_004070881.1': 'HG1437_PATCH', + 'NW_004070882.1': 'HG1438_PATCH', + 'NW_003871100.1': 'HG1425_PATCH', + 'NW_003871101.3': 'HG1426_PATCH', + 'NW_004070883.1': 'HG1439_PATCH', + 'NW_004070884.1': 'HG1440_PATCH', + 'NW_004070885.1': 'HG1441_PATCH', + 'NW_003871102.1': 'HG375_PATCH', + 'NW_004070878.1': 'HG1434_PATCH', + 'NW_004070891.1': 'HG1462_PATCH', + 'NW_004070892.1': 'HG1463_PATCH', + 'NW_004070893.1': 'HG1490_PATCH', + 'NW_004070886.1': 'HG1442_PATCH', + 'NW_004070887.1': 'HG1443_HG1444_PATCH', + 'NW_004070888.1': 'HG1453_PATCH', + 'NW_004070889.1': 'HG1458_PATCH', + 'NW_004070890.2': 'HG1459_PATCH', + 'NW_003871103.3': 'HG1497_PATCH', + 'NT_167244.1': 'HSCHR6_MHC_APD_CTG1', + 'NT_113891.2': 'HSCHR6_MHC_COX_CTG1', + 'NT_167245.1': 'HSCHR6_MHC_DBB_CTG1', + 'NT_167246.1': 'HSCHR6_MHC_MANN_CTG1', + 'NT_167247.1': 'HSCHR6_MHC_MCF_CTG1', + 'NT_167248.1': 'HSCHR6_MHC_QBL_CTG1', + 'NT_167249.1': 'HSCHR6_MHC_SSTO_CTG1', + 'NT_167250.1': 'HSCHR4_1_CTG9', + 'NT_167251.1': 'HSCHR17_1_CTG5' + } + + chr_num_convert_38 = { + "NC_000001.11": "1", + "NC_000002.12": "2", + "NC_000003.12": "3", + "NC_000004.12": "4", + "NC_000005.10": "5", + "NC_000006.12": "6", + "NC_000007.14": "7", + "NC_000008.11": "8", + "NC_000009.12": "9", + "NC_000010.11": "10", + "NC_000011.10": "11", + "NC_000012.12": "12", + "NC_000013.11": "13", + "NC_000014.9": "14", + "NC_000015.10": "15", + "NC_000016.10": "16", + "NC_000017.11": "17", + "NC_000018.10": "18", + "NC_000019.10": "19", + "NC_000020.11": "20", + "NC_000021.9": "21", + "NC_000022.11": "22", + "NC_000023.11": "X", + "NC_000024.10": "Y", + "NC_012920.1": "M", + + # GRCh38 alts + 'NW_012132914.1': 'HG1342_HG2282_PATCH', + 'NW_015495298.1': 'HSCHR1_5_CTG3', + 'NW_011332688.1': 'HG2095_PATCH', + 'NW_014040926.1': 'HSCHR1_4_CTG3', + 'NW_009646195.1': 'HG2058_PATCH', + 'NW_018654706.1': 'HSCHR1_8_CTG3', + 'NW_019805487.1': 'HG460_PATCH', + 'NW_009646194.1': 'HG986_PATCH', + 'NW_018654707.1': 'HSCHR1_9_CTG3', + 'NW_014040925.1': 'HSCHR1_3_CTG3', + 'NW_017852928.1': 'HSCHR1_6_CTG3', + 'NW_009646196.1': 'HG2104_PATCH', + 'NW_011332687.1': 'HG1832_PATCH', + 'NW_018654708.1': 'HG2002_PATCH', + 'NW_014040927.1': 'HSCHR1_5_CTG32_1', + 'NW_012132915.1': 'HG2290_PATCH', + 'NW_018654709.1': 'HSCHR2_7_CTG7_2', + 'NW_015495299.1': 'HSCHR2_6_CTG7_2', + 'NW_018654710.1': 'HSCHR2_8_CTG7_2', + 'NW_011332690.1': 'HG2232_PATCH', + 'NW_011332689.1': 'HG2233_PATCH', + 'NW_017363813.1': 'HG2236_PATCH', + 'NW_009646197.1': 'HG2066_PATCH', + 'NW_012132916.1': 'HG2235_PATCH', + 'NW_011332691.1': 'HG126_PATCH', + 'NW_018654711.1': 'HSCHR3_4_CTG1', + 'NW_012132917.1': 'HG2237_PATCH', + 'NW_009646198.1': 'HG2022_PATCH', + 'NW_019805491.1': 'HG2133_PATCH', + 'NW_019805492.1': 'HSCHR3_6_CTG2_1', + 'NW_019805490.1': 'HSCHR3_9_CTG2_1', + 'NW_019805489.1': 'HSCHR3_8_CTG2_1', + 'NW_019805488.1': 'HSCHR3_7_CTG2_1', + 'NW_013171799.1': 'HSCHR4_2_CTG4', + 'NW_013171800.1': 'HSCHR4_8_CTG12', + 'NW_013171801.1': 'HSCHR4_9_CTG12', + 'NW_017363814.1': 'HSCHR4_12_CTG12', + 'NW_015495300.1': 'HG2023_PATCH', + 'NW_015495301.1': 'HSCHR4_11_CTG12', + 'NW_018654712.1': 'HSCHR5_9_CTG1', + 'NW_009646199.1': 'HSCHR5_7_CTG1', + 'NW_016107297.1': 'HSCHR5_8_CTG1', + 'NW_016107298.1': 'HG30_PATCH', + 'NW_018654713.1': 'HG2057_PATCH', + 'NW_013171803.1': 'HSCHR6_1_CTG10', + 'NW_012132918.1': 'HG1651_PATCH', + 'NW_009646200.1': 'HG2128_PATCH', + 'NW_013171802.1': 'HG2072_PATCH', + 'NW_017363815.1': 'HG2121_PATCH', + 'NW_019805493.1': 'HSCHR7_3_CTG1', + 'NW_017852929.1': 'HG2088_PATCH', + 'NW_017852930.1': 'HG2266_PATCH', + 'NW_018654714.1': 'HG708_PATCH', + 'NW_018654715.1': 'HSCHR7_3_CTG4_4', + 'NW_012132919.1': 'HG2239_PATCH', + 'NW_018654717.1': 'HG76_PATCH', + 'NW_017852932.1': 'HG2068_PATCH', + 'NW_017852931.1': 'HG2067_PATCH', + 'NW_019805494.1': 'HSCHR8_7_CTG7', + 'NW_018654716.1': 'HG2419_PATCH', + 'NW_013171804.1': 'HSCHR9_1_CTG6', + 'NW_013171805.1': 'HSCHR9_1_CTG7', + 'NW_009646201.1': 'HG2030_PATCH', + 'NW_011332694.1': 'HG2244_HG2245_PATCH', + 'NW_013171806.1': 'HSCHR10_1_CTG6', + 'NW_009646202.1': 'HG2191_PATCH', + 'NW_013171807.1': 'HG2334_PATCH', + 'NW_011332693.1': 'HG2242_HG2243_PATCH', + 'NW_011332692.1': 'HG2241_PATCH', + 'NW_015148966.1': 'HG107_PATCH', + 'NW_011332695.1': 'HSCHR11_1_CTG1_2', + 'NW_019805496.1': 'HG2114_PATCH', + 'NW_019805495.1': 'HG2060_PATCH', + 'NW_017363816.1': 'HG1708_PATCH', + 'NW_019805498.1': 'HSCHR11_1_CTG3_1', + 'NW_019805497.1': 'HSCHR11_2_CTG8', + 'NW_013171808.1': 'HG2116_PATCH', + 'NW_009646203.1': 'HG2217_PATCH', + 'NW_013171809.1': 'HSCHR12_2_CTG1', + 'NW_018654718.1': 'HG1815_PATCH', + 'NW_011332696.1': 'HG1362_PATCH', + 'NW_009646204.1': 'HG23_PATCH', + 'NW_018654720.1': 'HSCHR12_8_CTG2_1', + 'NW_015148967.1': 'HG2063_PATCH', + 'NW_018654719.1': 'HG2047_PATCH', + 'NW_011332697.1': 'HG2247_PATCH', + 'NW_019805499.1': 'HSCHR12_9_CTG2_1', + 'NW_011332699.1': 'HG2291_PATCH', + 'NW_013171810.1': 'HSCHR13_1_CTG7', + 'NW_009646205.1': 'HG2216_PATCH', + 'NW_011332700.1': 'HG2249_PATCH', + 'NW_013171811.1': 'HSCHR13_1_CTG8', + 'NW_011332698.1': 'HG2288_HG2289_PATCH', + 'NW_018654722.1': 'HG1_PATCH', + 'NW_018654721.1': 'HSCHR14_8_CTG1', + 'NW_011332701.1': 'HG2139_PATCH', + 'NW_012132920.1': 'HSCHR15_6_CTG8', + 'NW_013171812.1': 'HSCHR16_5_CTG1', + 'NW_019805500.1': 'HG2263_PATCH', + 'NW_017852933.1': 'HG926_PATCH', + 'NW_013171813.1': 'HSCHR16_4_CTG3_1', + 'NW_018654723.1': 'HSCHR16_5_CTG3_1', + 'NW_012132921.1': 'HSCHR16_3_CTG3_1', + 'NW_017363817.1': 'HG2285_HG106_HG2252_PATCH', + 'NW_016107299.1': 'HG2046_PATCH', + 'NW_017363819.1': 'HSCHR17_3_CTG1', + 'NW_017363818.1': 'HSCHR17_11_CTG4', + 'NW_019805501.1': 'HSCHR17_12_CTG4', + 'NW_019805503.1': 'HSCHR18_1_CTG1', + 'NW_014040928.1': 'HSCHR18_5_CTG1_1', + 'NW_019805502.1': 'HG2412_PATCH', + 'NW_013171814.1': 'HG2213_PATCH', + 'NW_018654724.1': 'HG2442_PATCH', + 'NW_014040929.1': 'HG26_PATCH', + 'NW_009646206.1': 'HG2021_PATCH', + 'NW_016107300.1': 'HSCHR19KIR_0019-4656-A_CTG3_1', + 'NW_016107301.1': 'HSCHR19KIR_CA01-TA01_1_CTG3_1', + 'NW_016107302.1': 'HSCHR19KIR_CA01-TA01_2_CTG3_1', + 'NW_016107303.1': 'HSCHR19KIR_CA01-TB04_CTG3_1', + 'NW_016107304.1': 'HSCHR19KIR_CA01-TB01_CTG3_1', + 'NW_016107305.1': 'HSCHR19KIR_HG2394_CTG3_1', + 'NW_016107306.1': 'HSCHR19KIR_502960008-2_CTG3_1', + 'NW_016107307.1': 'HSCHR19KIR_502960008-1_CTG3_1', + 'NW_016107308.1': 'HSCHR19KIR_0010-5217-AB_CTG3_1', + 'NW_016107309.1': 'HSCHR19KIR_7191059-1_CTG3_1', + 'NW_016107310.1': 'HSCHR19KIR_0019-4656-B_CTG3_1', + 'NW_016107311.1': 'HSCHR19KIR_CA04_CTG3_1', + 'NW_016107313.1': 'HSCHR19KIR_7191059-2_CTG3_1', + 'NW_016107314.1': 'HSCHR19KIR_HG2396_CTG3_1', + 'NW_016107312.1': 'HSCHR19KIR_HG2393_CTG3_1', + 'NW_009646207.1': 'HSCHR22_4_CTG1', + 'NW_014040930.1': 'HSCHR22_6_CTG1', + 'NW_014040931.1': 'HSCHR22_7_CTG1', + 'NW_009646208.1': 'HSCHR22_5_CTG1', + 'NW_015148968.1': 'HSCHR22_8_CTG1', + 'NW_015148969.1': 'HG1311_PATCH', + 'NW_017363820.1': 'HSCHRX_3_CTG7', + 'NW_018654725.1': 'HG1531_PATCH', + 'NW_018654726.1': 'HG1535_PATCH', + 'NW_009646209.1': 'HG2062_PATCH', + 'NT_187515.1': 'HSCHR1_1_CTG3', + 'NT_187517.1': 'HSCHR1_2_CTG3', + 'NT_187514.1': 'HSCHR1_1_CTG11', + 'NT_187520.1': 'HSCHR1_4_CTG31', + 'NW_003315905.1': 'HSCHR1_1_CTG31', + 'NW_003315906.1': 'HSCHR1_2_CTG31', + 'NW_003315907.2': 'HSCHR1_3_CTG31', + 'NT_187521.1': 'HSCHR1_4_CTG32_1', + 'NT_187519.1': 'HSCHR1_3_CTG32_1', + 'NT_187516.1': 'HSCHR1_1_CTG32_1', + 'NT_187518.1': 'HSCHR1_2_CTG32_1', + 'NT_187525.1': 'HSCHR2_2_CTG1', + 'NT_187526.1': 'HSCHR2_3_CTG1', + 'NT_187529.1': 'HSCHR2_4_CTG1', + 'NT_187522.1': 'HSCHR2_1_CTG1', + 'NW_003315908.1': 'HSCHR2_1_CTG5', + 'NT_187524.1': 'HSCHR2_1_CTG7', + 'NT_187531.1': 'HSCHR2_5_CTG7_2', + 'NT_187530.1': 'HSCHR2_4_CTG7_2', + 'NT_187528.1': 'HSCHR2_3_CTG7_2', + 'NW_003571033.2': 'HSCHR2_2_CTG7_2', + 'NW_003315909.1': 'HSCHR2_1_CTG7_2', + 'NT_187527.1': 'HSCHR2_3_CTG15', + 'NT_187523.1': 'HSCHR2_1_CTG15', + 'NW_003871060.2': 'HSCHR3_1_CTG1', + 'NT_187535.1': 'HSCHR3_3_CTG1', + 'NT_187537.1': 'HSCHR3_4_CTG2_1', + 'NW_003315913.1': 'HSCHR3_1_CTG2_1', + 'NT_187533.1': 'HSCHR3_2_CTG2_1', + 'NT_187536.1': 'HSCHR3_3_CTG2_1', + 'NT_187538.1': 'HSCHR3_5_CTG2_1', + 'NT_187532.1': 'HSCHR3_1_CTG3', + 'NT_187534.1': 'HSCHR3_2_CTG3', + 'NT_187539.1': 'HSCHR3_9_CTG3', + 'NT_187540.1': 'HSCHR4_1_CTG4', + 'NW_003315915.1': 'HSCHR4_1_CTG6', + 'NT_187541.1': 'HSCHR4_1_CTG8_1', + 'NT_167250.2': 'HSCHR4_1_CTG9', + 'NT_187544.1': 'HSCHR4_4_CTG12', + 'NW_003315914.1': 'HSCHR4_1_CTG12', + 'NT_187542.1': 'HSCHR4_2_CTG12', + 'NT_187545.1': 'HSCHR4_5_CTG12', + 'NT_187543.1': 'HSCHR4_3_CTG12', + 'NT_187550.1': 'HSCHR5_5_CTG1', + 'NT_187548.1': 'HSCHR5_4_CTG1', + 'NT_187547.1': 'HSCHR5_3_CTG1', + 'NW_003315920.1': 'HSCHR5_1_CTG1', + 'NW_003571036.1': 'HSCHR5_2_CTG1', + 'NT_187551.1': 'HSCHR5_6_CTG1', + 'NW_003315917.2': 'HSCHR5_2_CTG1_1', + 'NW_003315918.1': 'HSCHR5_3_CTG1_1', + 'NT_187549.1': 'HSCHR5_4_CTG1_1', + 'NW_003315919.1': 'HSCHR5_1_CTG5', + 'NT_187546.1': 'HSCHR5_2_CTG5', + 'NT_167244.2': 'HSCHR6_MHC_APD_CTG1', + 'NT_187555.1': 'HSCHR6_1_CTG7', + 'NT_187554.1': 'HSCHR6_1_CTG6', + 'NW_003315921.1': 'HSCHR6_1_CTG2', + 'NT_187556.1': 'HSCHR6_1_CTG8', + 'NT_187557.1': 'HSCHR6_1_CTG9', + 'NW_004166862.2': 'HSCHR6_1_CTG3', + 'NT_187552.1': 'HSCHR6_1_CTG4', + 'NT_187553.1': 'HSCHR6_1_CTG5', + 'NT_187558.1': 'HSCHR7_1_CTG1', + 'NT_187561.1': 'HSCHR7_2_CTG4_4', + 'NT_187559.1': 'HSCHR7_1_CTG4_4', + 'NW_003315922.2': 'HSCHR7_1_CTG6', + 'NT_187562.1': 'HSCHR7_2_CTG6', + 'NT_187564.1': 'HSCHR7_3_CTG6', + 'NT_187563.1': 'HSCHR7_2_CTG7', + 'NT_187560.1': 'HSCHR7_1_CTG7', + 'NT_187572.1': 'HSCHR8_4_CTG1', + 'NT_187568.1': 'HSCHR8_2_CTG1', + 'NT_187565.1': 'HSCHR8_1_CTG1', + 'NT_187576.1': 'HSCHR8_8_CTG1', + 'NT_187570.1': 'HSCHR8_3_CTG1', + 'NT_187577.1': 'HSCHR8_9_CTG1', + 'NT_187566.1': 'HSCHR8_1_CTG6', + 'NT_187567.1': 'HSCHR8_1_CTG7', + 'NT_187574.1': 'HSCHR8_5_CTG7', + 'NT_187575.1': 'HSCHR8_6_CTG7', + 'NT_187573.1': 'HSCHR8_4_CTG7', + 'NT_187571.1': 'HSCHR8_3_CTG7', + 'NT_187569.1': 'HSCHR8_2_CTG7', + 'NW_003315928.1': 'HSCHR9_1_CTG1', + 'NW_003315929.1': 'HSCHR9_1_CTG2', + 'NW_003315930.1': 'HSCHR9_1_CTG3', + 'NW_003315931.1': 'HSCHR9_1_CTG4', + 'NT_187578.1': 'HSCHR9_1_CTG5', + 'NW_003315934.1': 'HSCHR10_1_CTG1', + 'NT_187579.1': 'HSCHR10_1_CTG3', + 'NW_003315935.1': 'HSCHR10_1_CTG2', + 'NT_187580.1': 'HSCHR10_1_CTG4', + 'NT_187586.1': 'HSCHR11_1_CTG8', + 'NT_187584.1': 'HSCHR11_1_CTG6', + 'NT_187585.1': 'HSCHR11_1_CTG7', + 'NT_187583.1': 'HSCHR11_1_CTG5', + 'NW_003315936.1': 'HSCHR11_1_CTG1_1', + 'NW_003871073.1': 'HG142_HG150_NOVEL_TEST', + 'NW_003871074.1': 'HG151_NOVEL_TEST', + 'NT_187582.1': 'HSCHR11_1_CTG3', + 'NT_187581.1': 'HSCHR11_1_CTG2', + 'NW_003571049.1': 'HSCHR12_1_CTG1', + 'NW_003571050.1': 'HSCHR12_2_CTG2', + 'NT_187588.1': 'HSCHR12_5_CTG2', + 'NW_003315938.1': 'HSCHR12_1_CTG2', + 'NT_187587.1': 'HSCHR12_4_CTG2', + 'NW_003315939.2': 'HSCHR12_1_CTG2_1', + 'NW_003315941.1': 'HSCHR12_2_CTG2_1', + 'NW_003315942.2': 'HSCHR12_3_CTG2_1', + 'NT_187590.1': 'HSCHR12_6_CTG2_1', + 'NW_003315940.1': 'HSCHR12_4_CTG2_1', + 'NT_187589.1': 'HSCHR12_5_CTG2_1', + 'NT_187591.1': 'HSCHR12_7_CTG2_1', + 'NT_187594.1': 'HSCHR13_1_CTG3', + 'NT_187593.1': 'HSCHR13_1_CTG2', + 'NT_187597.1': 'HSCHR13_1_CTG6', + 'NT_187595.1': 'HSCHR13_1_CTG4', + 'NT_187592.1': 'HSCHR13_1_CTG1', + 'NT_187596.1': 'HSCHR13_1_CTG5', + 'NT_187598.1': 'HSCHR14_1_CTG1', + 'NT_187601.1': 'HSCHR14_7_CTG1', + 'NT_187599.1': 'HSCHR14_2_CTG1', + 'NT_187600.1': 'HSCHR14_3_CTG1', + 'NT_187602.1': 'HSCHR15_1_CTG1', + 'NT_187604.1': 'HSCHR15_3_CTG3', + 'NT_187603.1': 'HSCHR15_1_CTG3', + 'NW_003315943.1': 'HSCHR15_1_CTG8', + 'NT_187605.1': 'HSCHR15_3_CTG8', + 'NW_003315944.2': 'HSCHR15_2_CTG8', + 'NT_187606.1': 'HSCHR15_5_CTG8', + 'NT_187610.1': 'HSCHR16_CTG2', + 'NT_187609.1': 'HSCHR16_4_CTG1', + 'NT_187608.1': 'HSCHR16_3_CTG1', + 'NT_187607.1': 'HSCHR16_1_CTG1', + 'NW_003315945.1': 'HSCHR16_1_CTG3_1', + 'NW_003315946.1': 'HSCHR16_2_CTG3_1', + 'NW_003315952.3': 'HSCHR17_1_CTG1', + 'NT_187613.1': 'HSCHR17_2_CTG2', + 'NT_187611.1': 'HSCHR17_1_CTG2', + 'NT_187614.1': 'HSCHR17_7_CTG4', + 'NW_003871091.1': 'HSCHR17_4_CTG4', + 'NW_003871092.1': 'HSCHR17_5_CTG4', + 'NW_003315953.2': 'HSCHR17_1_CTG4', + 'NT_167251.2': 'HSCHR17_1_CTG5', + 'NW_003315954.1': 'HSCHR17_2_CTG4', + 'NT_187615.1': 'HSCHR17_8_CTG4', + 'NT_187616.1': 'HSCHR17_9_CTG4', + 'NW_003315955.1': 'HSCHR17_3_CTG4', + 'NT_187612.1': 'HSCHR17_1_CTG9', + 'NT_187618.1': 'HSCHR18_4_CTG1_1', + 'NW_003315956.1': 'HSCHR18_1_CTG1_1', + 'NW_003315959.1': 'HSCHR18_2_CTG1_1', + 'NW_003315960.1': 'HSCHR18_2_CTG2', + 'NW_003315957.1': 'HSCHR18_1_CTG2', + 'NW_003315958.1': 'HSCHR18_1_CTG2_1', + 'NW_003315961.1': 'HSCHR18_2_CTG2_1', + 'NT_187617.1': 'HSCHR18_3_CTG2_1', + 'NT_187622.1': 'HSCHR19_5_CTG2', + 'NT_187621.1': 'HSCHR19_4_CTG2', + 'NW_003315962.1': 'HSCHR19_1_CTG2', + 'NW_003315964.2': 'HSCHR19_2_CTG2', + 'NW_003315965.1': 'HSCHR19_3_CTG2', + 'NW_003315963.1': 'HSCHR19_1_CTG3_1', + 'NT_187619.1': 'HSCHR19_2_CTG3_1', + 'NT_187620.1': 'HSCHR19_3_CTG3_1', + 'NW_003571054.1': 'HSCHR19LRC_COX1_CTG3_1', + 'NW_003315966.2': 'HSCHR20_1_CTG1', + 'NT_187623.1': 'HSCHR20_1_CTG2', + 'NT_187625.1': 'HSCHR20_1_CTG4', + 'NT_187624.1': 'HSCHR20_1_CTG3', + 'NW_003315967.2': 'HSCHR21_1_CTG1_1', + 'NT_187628.1': 'HSCHR21_8_CTG1_1', + 'NT_187627.1': 'HSCHR21_6_CTG1_1', + 'NW_003315968.2': 'HSCHR21_2_CTG1_1', + 'NW_003315969.2': 'HSCHR21_3_CTG1_1', + 'NW_003315970.2': 'HSCHR21_4_CTG1_1', + 'NT_187626.1': 'HSCHR21_5_CTG2', + 'NT_187629.1': 'HSCHR22_1_CTG3', + 'NT_187632.1': 'HSCHR22_1_CTG6', + 'NT_187633.1': 'HSCHR22_1_CTG7', + 'NT_187630.1': 'HSCHR22_1_CTG4', + 'NT_187631.1': 'HSCHR22_1_CTG5', + 'NW_003315972.2': 'HSCHR22_1_CTG2', + 'NW_003315971.2': 'HSCHR22_1_CTG1', + 'NT_187634.1': 'HSCHRX_1_CTG3', + 'NT_187635.1': 'HSCHRX_2_CTG12', + 'NT_187646.1': 'HSCHR1_ALT2_1_CTG32_1', + 'NT_187648.1': 'HSCHR2_2_CTG7', + 'NT_187647.1': 'HSCHR2_2_CTG15', + 'NT_187649.1': 'HSCHR3_3_CTG3', + 'NT_187650.1': 'HSCHR4_6_CTG12', + 'NT_187651.1': 'HSCHR5_1_CTG1_1', + 'NT_187652.1': 'HSCHR5_3_CTG5', + 'NT_113891.3': 'HSCHR6_MHC_COX_CTG1', + 'NT_187653.1': 'HSCHR7_2_CTG1', + 'NT_187655.1': 'HSCHR8_6_CTG1', + 'NT_187654.1': 'HSCHR8_5_CTG1', + 'NT_187656.1': 'HSCHR11_2_CTG1', + 'NT_187657.1': 'HSCHR11_2_CTG1_1', + 'NT_187658.1': 'HSCHR12_3_CTG2', + 'NT_187659.1': 'HSCHR15_2_CTG3', + 'NT_187660.1': 'HSCHR15_4_CTG8', + 'NT_187662.1': 'HSCHR17_2_CTG1', + 'NT_187664.1': 'HSCHR17_3_CTG2', + 'NT_187661.1': 'HSCHR17_10_CTG4', + 'NW_003871093.1': 'HSCHR17_6_CTG4', + 'NT_187663.1': 'HSCHR17_2_CTG5', + 'NT_187665.1': 'HSCHR18_ALT21_CTG2_1', + 'NT_187666.1': 'HSCHR18_ALT2_CTG2_1', + 'NW_003571055.2': 'HSCHR19LRC_COX2_CTG3_1', + 'NW_004504305.1': 'HSCHR22_2_CTG1', + 'NT_187667.1': 'HSCHRX_2_CTG3', + 'NT_187678.1': 'HSCHR3_4_CTG3', + 'NT_187679.1': 'HSCHR4_7_CTG12', + 'NT_167245.2': 'HSCHR6_MHC_DBB_CTG1', + 'NT_187680.1': 'HSCHR8_7_CTG1', + 'NT_187681.1': 'HSCHR11_3_CTG1', + 'NW_003571056.2': 'HSCHR19LRC_LRC_I_CTG3_1', + 'NT_187682.1': 'HSCHR22_3_CTG1', + 'NT_187688.1': 'HSCHR3_5_CTG3', + 'NT_167246.2': 'HSCHR6_MHC_MANN_CTG1', + 'NW_003571057.2': 'HSCHR19LRC_LRC_J_CTG3_1', + 'NT_187689.1': 'HSCHR3_6_CTG3', + 'NT_167247.2': 'HSCHR6_MHC_MCF_CTG1', + 'NW_003571058.2': 'HSCHR19LRC_LRC_S_CTG3_1', + 'NT_187690.1': 'HSCHR3_7_CTG3', + 'NT_167248.2': 'HSCHR6_MHC_QBL_CTG1', + 'NW_003571059.2': 'HSCHR19LRC_LRC_T_CTG3_1', + 'NT_187691.1': 'HSCHR3_8_CTG3', + 'NT_167249.2': 'HSCHR6_MHC_SSTO_CTG1', + 'NW_003571060.1': 'HSCHR19LRC_PGF1_CTG3_1', + 'NT_187692.1': 'HSCHR6_8_CTG1', + 'NW_003571061.2': 'HSCHR19LRC_PGF2_CTG3_1', + 'NT_187693.1': 'HSCHR19_4_CTG3_1', + 'NT_187636.1': 'HSCHR19KIR_FH15_B_HAP_CTG3_1', + 'NT_187637.1': 'HSCHR19KIR_G085_A_HAP_CTG3_1', + 'NT_187638.1': 'HSCHR19KIR_G085_BA1_HAP_CTG3_1', + 'NT_187639.1': 'HSCHR19KIR_G248_A_HAP_CTG3_1', + 'NT_187640.1': 'HSCHR19KIR_G248_BA2_HAP_CTG3_1', + 'NT_187641.1': 'HSCHR19KIR_GRC212_AB_HAP_CTG3_1', + 'NT_187642.1': 'HSCHR19KIR_GRC212_BA1_HAP_CTG3_1', + 'NT_187643.1': 'HSCHR19KIR_LUCE_A_HAP_CTG3_1', + 'NT_187644.1': 'HSCHR19KIR_LUCE_BDEL_HAP_CTG3_1', + 'NT_187645.1': 'HSCHR19KIR_RSH_A_HAP_CTG3_1', + 'NT_187668.1': 'HSCHR19KIR_RSH_BA2_HAP_CTG3_1', + 'NT_187669.1': 'HSCHR19KIR_T7526_A_HAP_CTG3_1', + 'NT_187670.1': 'HSCHR19KIR_T7526_BDEL_HAP_CTG3_1', + 'NT_187671.1': 'HSCHR19KIR_ABC08_A1_HAP_CTG3_1', + 'NT_187672.1': 'HSCHR19KIR_ABC08_AB_HAP_C_P_CTG3_1', + 'NT_187673.1': 'HSCHR19KIR_ABC08_AB_HAP_T_P_CTG3_1', + 'NT_187674.1': 'HSCHR19KIR_FH05_A_HAP_CTG3_1', + 'NT_187675.1': 'HSCHR19KIR_FH05_B_HAP_CTG3_1', + 'NT_187676.1': 'HSCHR19KIR_FH06_A_HAP_CTG3_1', + 'NT_187677.1': 'HSCHR19KIR_FH06_BA1_HAP_CTG3_1', + 'NT_187683.1': 'HSCHR19KIR_FH08_A_HAP_CTG3_1', + 'NT_187684.1': 'HSCHR19KIR_FH08_BAX_HAP_CTG3_1', + 'NT_187685.1': 'HSCHR19KIR_FH13_A_HAP_CTG3_1', + 'NT_187686.1': 'HSCHR19KIR_FH13_BA2_HAP_CTG3_1', + 'NT_187687.1': 'HSCHR19KIR_FH15_A_HAP_CTG3_1', + 'NT_113949.2': 'HSCHR19KIR_RP5_B_HAP_CTG3_1', + 'NT_167235.1': 'HSCHR22_CTG1_3' + } + if primary_assembly == 'GRCh38' or primary_assembly == 'hg38': + chr_num = chr_num_convert_38.get(accession) + if primary_assembly == 'GRCh37' or primary_assembly == 'hg19': + chr_num = chr_num_convert_37.get(accession) + try: + return chr_num + except UnboundLocalError: + chr_num = None + return chr_num diff --git a/VariantValidator/modules/vvCore.py b/VariantValidator/modules/vvCore.py new file mode 100644 index 00000000..f96bd3cd --- /dev/null +++ b/VariantValidator/modules/vvCore.py @@ -0,0 +1,8267 @@ +''' +This module contains the main function for variant validator. It's added to the Validator object in the vvObjects file. +''' + +import hgvs +import hgvs.parser +import hgvs.dataproviders.uta +import hgvs.dataproviders.seqfetcher +import hgvs.assemblymapper +import hgvs.variantmapper +import hgvs.sequencevariant +import hgvs.validator +import hgvs.exceptions +import hgvs.location +import hgvs.posedit +import hgvs.edit +import hgvs.normalizer + +# IMPORT PYTHON MODULES +import re +import time +import datetime +import copy +import os +import sys +import warnings +from operator import itemgetter +from pyliftover import LiftOver +import traceback +from configparser import ConfigParser + +from Bio.Seq import Seq + +# Import variantanalyser and peripheral VV modules +#import ref_seq_type +#import external +#import output_formatter +#import variantanalyser +from vvLogging import logger +import hgvs +#from variantanalyser import functions as va_func +#from variantanalyser import dbControls as va_dbCrl +#from variantanalyser import hgvs2vcf as va_H2V +#from variantanalyser import batch as va_btch +#from variantanalyser import g_to_g as va_g2g +#from variantanalyser import supported_chromosome_builds as va_scb +#from variantanalyser import gap_genes as gapGenes +#from variantanalyser.liftover import liftover as lift_over + +import vvFunctions as fn +import vvDatabase +import vvChromasomes + +def validate(self, batch_variant, selected_assembly, select_transcripts, transcriptSet="refseq"): + logger.info(batch_variant + ' : ' + selected_assembly) + # Take start time + start_time = time.time() + + # Set pre defined variables + # SeqFetcher + sf = hgvs.dataproviders.seqfetcher.SeqFetcher() + + try: + # Validation + ############ + + # Create a dictionary of transcript ID : '' + if select_transcripts != 'all': + select_transcripts_list = select_transcripts.split('|') + select_transcripts_dict = {} + select_transcripts_dict_plus_version = {} + for id in select_transcripts_list: + id = id.strip() + if re.match('LRG', id): + id = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(id) + if id == 'none': + continue + select_transcripts_dict_plus_version[id] = '' + id = id.split('.')[0] + select_transcripts_dict[id] = '' + # Set up gene list dictionary + input_genes = {} + + # Remove genes if transcripts selected + # if select_transcripts != 'all': + + # split the batch queries into a list + batch_queries = batch_variant.split('|') + + # Turn each variant into a dictionary. The dictionary will be compiled during validation + batch_list = [] + for queries in batch_queries: + queries = queries.strip() + query = {'quibble': queries, 'id': queries, 'warnings': '', 'description': '', 'coding': '', 'coding_g': '', + 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': 'false', + 'order': 'false'} + batch_list.append(query) + + # Create List to carry batch data + batch_out = [] + + # Ensure batch_list is pulled into the function so that it can be appended to + batch_list = batch_list + + # Enter the validation loop + ########################### + # Allow order by input + ordering = 0 + + """ + Set a flag to mark the final output type + flag : warning + flag : error + flag : intragenic + flag : gene + """ + set_output_type_flag = 'warning' + logger.debug("Batch list length " + str(len(batch_list))) + for validation in batch_list: + # Start timing + logger.traceStart(validation) + # Re-set cautions and automaps + + if transcriptSet == "refseq": + alt_aln_method = 'splign' + elif transcriptSet == "ensembl": + alt_aln_method = 'genebuild' + logger.warning("Ensembl is currently not supported") + validation['warnings'] += ': ' + "Ensembl is currently not supported" + continue + else: + logger.warning( + "The transcript set variable " + transcriptSet + " is invalid, it needs to be 'refseq' or 'ensembl'") + validation[ + 'warnings'] += ': ' + "The transcript set variable " + transcriptSet + " is invalid, it needs to be 'refseq' or 'ensembl'" + continue + + # Create Normalizers + hn = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=3, + alt_aln_method=alt_aln_method + ) + reverse_normalizer = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=5, + alt_aln_method=alt_aln_method + ) + + # Blank cautions + caution = '' + automap = '' + + # This will be used to order the final output + if str(validation['order']) == 'false': + ordering = ordering + 1 + validation['order'] = ordering + else: + pass + # Bug catcher + try: + # Note, ID is not touched. It is always the input variant description. Quibble will be altered but id will not if type = g. + input = validation['quibble'] + logger.trace("Commenced validation of " + str(input), validation) + + # Test for rich text unicode characters + try: + unicode_test = u"{}".format(input) + except UnicodeDecodeError as e: + # Format the trapped character into unicode for styled printing + my_unicode = e[1] + my_unicode = my_unicode.decode('utf-8') + + # Test for rich text unicode characters + try: + str(my_unicode) + except UnicodeEncodeError as e: + # Format the trapped character into unicode for styled printing + unicoded_it = e[1] + unicoded_it_list = unicoded_it.split() + for try_me in unicoded_it_list: + try: + str(try_me) + except UnicodeEncodeError as e: + found_unicode = try_me + found_error = str(e) + found_at = found_unicode.encode('raw_unicode_escape') + break + # Extract character from the error + unicode = re.findall("u'\\\\\w+'", found_error) + character = unicode[0] + search_term = character.replace("u'", '') + search_term = search_term.replace("'", '') + found_at_decoded = found_at.decode('raw_unicode_escape') + found_at = found_at_decoded.encode('raw_unicode_escape') + string_char = str(character) + # Create a human readable U+ representation + human_code = re.sub("u'\\\\\w", 'U+', string_char) + human_code = human_code.replace("'", "") + format_human = u"{}".format(human_code) + format_human = format_human.upper() + found_at = re.sub(search_term, u'<' + format_human + u'>', found_at) + slasher = re.compile("\\\\") + found_at = re.sub(slasher, '', found_at) + validation['id'] = found_at + error = u'Submitted variant description contains an invalid character which is represented by Unicode character ' + format_human + u' at position ' + found_at + u': Please remove this character and re-submit: A useful search function for Unicode characters can be found at https://unicode-search.net/' + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(error) + continue + else: + pass + else: + pass + + # Remove whitespace + ws = copy.copy(input) + input = input.strip() + input = ''.join(input.split()) + if input != ws: + caution = 'Whitespace removed from variant description ' + str(ws) + validation['warnings'] = validation['warnings'] + ': ' + caution + logger.info(caution) + stash_input = copy.copy(input) + # Set the primary_assembly + if validation['primary_assembly'] == 'false': + if selected_assembly == 'hg19': + primary_assembly = 'GRCh37' + elif selected_assembly == 'hg38': + primary_assembly = 'GRCh38' + # Ensure genome build is correctly formatted + elif re.search('GRC', selected_assembly, re.IGNORECASE): + selected_assembly = selected_assembly.replace('g', 'G') + selected_assembly = selected_assembly.replace('r', 'R') + selected_assembly = selected_assembly.replace('c', 'C') + selected_assembly = selected_assembly.replace('H', 'h') + primary_assembly = selected_assembly + # Catch invalid genome build + valid_build = False + for genome_build in self.genome_builds: + if primary_assembly == genome_build: + valid_build = True + if valid_build is False: + primary_assembly = 'GRCh38' + validation['warnings'] = validation[ + 'warnings'] + ': Invalid genome build has been specified. Automap has selected the default build (GRCh38)' + logger.warning( + 'Invalid genome build has been specified. Automap has selected the default build ' + primary_assembly) + else: + validation['primary_assembly'] = primary_assembly + else: + primary_assembly = validation['primary_assembly'] + logger.trace("Completed string formatting", validation) + # Set variables that batch will not use but are required + crossing = 'false' + boundary = 'false' + + # VCF type 1 + """ + VCF2HGVS stage 1. converts chr-pos-ref-alt into chr:posRef>Alt + The output format is a common mistake caused by inaccurate conversion of + VCF variants into HGVS - hence the need for conversion step 2 + """ + if re.search('[-:]\d+[-:][GATC]+[-:][GATC]+', input): + input = input.replace(':', '-') + # Extract primary_assembly if provided + if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): + in_list = input.split('-') + selected_assembly = in_list[0] + input = '-'.join(in_list[1:]) + pre_input = copy.deepcopy(input) + vcf_elements = pre_input.split('-') + input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) + elif re.search('[-:]\d+[-:][GATC]+[-:]', input): + input = input.replace(':', '-') + # Extract primary_assembly if provided + if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): + in_list = input.split('-') + selected_assembly = in_list[0] + input = '-'.join(in_list[1:]) + pre_input = copy.deepcopy(input) + vcf_elements = pre_input.split('-') + validation[ + 'warnings'] = 'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF' + validation['warnings'] = validation['warnings'] + ': VariantValidator has output both alternatives' + logger.resub('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + + ' as ALT = REF. Validator will output both alternatives.') + validation['write'] = 'false' + input_A = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], 'del') + input_B = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) + queryA = {'quibble': input_A, 'id': validation['id'], 'warnings': validation['warnings'], + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', + 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} + queryB = {'quibble': input_B, 'id': validation['id'], 'warnings': validation['warnings'], + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', + 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(queryA) + batch_list.append(queryB) + continue + elif re.search('[-:]\d+[-:][-:][GATC]+', input) or re.search('[-:]\d+[-:][.][-:][GATC]+', input): + input = input.replace(':', '-') + if re.search('-.-', input): + input = input.replace('-.-', '-ins-') + if re.search('--', input): + input = input.replace('--', '-ins-') + # Extract primary_assembly if provided + if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): + in_list = input.split('-') + selected_assembly = in_list[0] + input = '-'.join(in_list[1:]) + pre_input = copy.deepcopy(input) + vcf_elements = pre_input.split('-') + input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) + stash_input = input + logger.trace("Completed VCF-HVGS step 1", validation) + # API type non-HGVS + # e.g. Chr16:2099572TC>T + """ + VCF2HGVS conversion step 2 identifies the correct chromosomal reference + sequence based upon the non compliant identifier e.g. :2099572TC>T. + The data is currently stored in variantanalyser.supported_chromosome_builds. + Anticipated future builds will be transferred to MySQL which can be more + easily updated and maintained. + LRGs and LRG_ts also need to be assigned the correct reference sequence identifier. + The LRG ID data ia stored in the VariantValidator MySQL database. + The reference sequence type is also assigned. + """ + if re.search('\w+\:', input) and not re.search('\w+\:[gcnmrp]\.', input): + if re.search('\w+\:[gcnmrp]', input) and not re.search('\w+\:[gcnmrp]\.', input): + # Missing dot + pass + else: + try: + if re.search('GRCh37', input) or re.search('hg19', input): + primary_assembly = 'GRCh37' + elif re.search('GRCh38', input) or re.search('hg38', input): + primary_assembly = 'GRCh38' + pre_input = copy.deepcopy(input) + input_list = input.split(':') + pos_ref_alt = str(input_list[1]) + positionAndEdit = input_list[1] + if not re.match('N[CGTWMRP]_', input) and not re.match('LRG_', input): + chr_num = str(input_list[0]) + chr_num = chr_num.upper() + chr_num = chr_num.strip() + if re.match('CHR', chr_num): + chr_num = chr_num.replace('CHR', '') + # Use selected assembly + accession = vvChromasomes.to_accession(chr_num, selected_assembly) + if accession is None: + validation['warnings'] = validation[ + 'warnings'] + ': ' + chr_num + \ + ' is not part of genome build ' + selected_assembly + logger.warning(chr_num + ' is not part of genome build ' + selected_assembly) + continue + else: + accession = input_list[0] + if re.search('>', pre_input): + if re.search('del', pre_input): + pos = re.match('\d+', pos_ref_alt) + position = pos.group(0) + old_ref, old_alt = pos_ref_alt.split('>') + old_ref = old_ref.replace(position, '') + position = int(position) - 1 + required_base = sf.fetch_seq(accession, start_i=position - 1, end_i=position) + ref = required_base + old_ref + alt = required_base + positionAndEdit = str(position) + ref + '>' + alt + elif re.search('ins', pre_input): + pos = re.match('\d+', pos_ref_alt) + position = pos.group(0) + old_ref, old_alt = pos_ref_alt.split('>') + # old_ref = old_ref.replace(position, '') + position = int(position) - 1 + required_base = sf.fetch_seq(accession, start_i=position - 1, end_i=position) + ref = required_base + alt = required_base + old_alt + positionAndEdit = str(position) + ref + '>' + alt + # Assign reference sequence type + ref_type = self.db.ref_type_assign(accession) + if re.match('LRG_', accession): + if ref_type == ':g.': + accession = self.db.get.get_refseqgeneId_from_lrgID(accession) + else: + accession = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) + else: + accession = accession + input = str(accession) + ref_type + str(positionAndEdit) + stash_input = input + except: + exceptPass(validation) + + # Descriptions lacking the colon : + if re.search('[gcnmrp]\.', input) and not re.search(':[gcnmrp]\.', input): + error = 'Unable to identify a colon (:) in the variant description %s. A colon is required in HGVS variant descriptions to separate the reference accession from the reference type i.e. :. e.g. :c.' % ( + input) + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(error) + continue + + # Ambiguous chr reference + logger.trace("Completed VCF-HVGS step 2", validation) + """ + VCF2HGVS conversion step 3 is similar to step 2 but handles + formats like Chr16:g.2099572TC>T which are provided by Alamut and other + software + """ + if re.search('\w+:[gcnmrp]\.', input) and not re.match('N[CGTWMRP]_', input): + # Take out lowercase Accession characters + lower_cased_list = input.split(':') + if re.search('LRG', lower_cased_list[0], re.IGNORECASE): + lower_case_accession = lower_cased_list[0] + lower_case_accession = lower_case_accession.replace('l', 'L') + lower_case_accession = lower_case_accession.replace('r', 'R') + lower_case_accession = lower_case_accession.replace('g', 'G') + else: + lower_case_accession = lower_cased_list[0] + lower_case_accession = lower_case_accession.upper() + input = ''.join(lower_cased_list[1:]) + input = lower_case_accession + ':' + input + if not re.match('LRG_', input) and not re.match('ENS', input) and not re.match('N[MRPC]_', input): + try: + if re.search('GRCh37', input) or re.search('hg19', input): + primary_assembly = 'GRCh37' + elif re.search('GRCh38', input) or re.search('hg38', input): + primary_assembly = 'GRCh38' + pre_input = copy.deepcopy(input) + input_list = input.split(':') + query_a_symbol = input_list[0] + is_it_a_gene = va_dbCrl.data.get_hgnc_symbol(query_a_symbol) + if is_it_a_gene == 'none': + pos_ref_alt = str(input_list[1]) + positionAndEdit = input_list[1] + chr_num = str(input_list[0]) + chr_num = chr_num.upper() + chr_num = chr_num.strip() + if re.match('CHR', chr_num): + chr_num = chr_num.replace('CHR', '') # Use selected assembly + accession = vvChromasomes.to_accession(chr_num, selected_assembly) + if accession is None: + validation['warnings'] = validation['warnings'] + ': ' + chr_num + \ + ' is not part of genome build ' + selected_assembly + continue + input = str(accession) + ':' + str(positionAndEdit) + stash_input = input + else: + pass + except Exception as e: + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + tbk = [str(exc_type), str(exc_value), str(te)] + er = str('\n'.join(tbk)) + logger.warning(str(exc_type) + " " + str(exc_value)) + logger.debug(er) + + # GENE_SYMBOL:c. n. types + logger.trace("Completed VCF-HGVS step 3", validation) + """ + Searches for gene symbols that have been used as reference sequence + identifiers. Provides a sufficiently repremanding warning, but also provides + correctly formatted variant descriptions with appropriate transcript + reference sequence identifiers i.e. NM_ .... + Note: the output from the function must be validated because VV has no way + of knowing which the users intended reference sequence was, and the exon + boundaries etc of the alternative transcript variants may not be equivalent + """ + if re.search('\w+\:[cn]\.', input): + try: + pre_input = copy.deepcopy(input) + query_a_symbol = pre_input.split(':')[0] + tx_edit = pre_input.split(':')[1] + is_it_a_gene = va_dbCrl.data.get_hgnc_symbol(query_a_symbol) + if is_it_a_gene != 'none': + uta_symbol = va_dbCrl.data.get_uta_symbol(is_it_a_gene) + available_transcripts = hdp.get_tx_for_gene(uta_symbol) + select_from_these_transcripts = {} + for tx in available_transcripts: + if re.match('NM_', tx[3]) or re.match('NR_', tx[3]): + if tx[3] not in select_from_these_transcripts.keys(): + select_from_these_transcripts[tx[3]] = '' + else: + continue + else: + continue + select_from_these_transcripts = '|'.join(select_from_these_transcripts.keys()) + if select_transcripts != 'all': + validation['write'] = 'false' + for transcript in select_transcripts_dict_plus_version.keys(): + validation[ + 'warnings'] = 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + query_a_symbol + ') in place of a valid reference sequence' + refreshed_description = transcript + ':' + tx_edit + query = {'quibble': refreshed_description, 'id': validation['id'], + 'warnings': validation['warnings'], 'description': '', 'coding': '', + 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', + 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(query) + logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + query_a_symbol + ') in place of a valid reference sequence') + else: + validation['warnings'] = validation['warnings'] + \ + ': ' + 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + input + \ + ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts + logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + input + \ + ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts) + continue + else: + pass + except: + exceptPass() + logger.trace("Gene symbol reference catching complete", validation) + + # NG_:c. or NC_:c. + """ + Similar to the GENE_SYMBOL:c. n. types function, but spots RefSeqGene or + Chromosomal reference sequence identifiers used in the context of c. variant + descriptions + """ + if re.search('\w+\:[cn]', input): + try: + if re.match('^NG_', input): + refSeqGeneID = input.split(':')[0] + tx_edit = input.split(':')[1] + gene_symbol = va_dbCrl.data.get_gene_symbol_from_refSeqGeneID(refSeqGeneID) + if gene_symbol != 'none': + uta_symbol = va_dbCrl.data.get_uta_symbol(gene_symbol) + available_transcripts = hdp.get_tx_for_gene(uta_symbol) + select_from_these_transcripts = {} + for tx in available_transcripts: + if re.match('NM_', tx[3]) or re.match('NR_', tx[3]): + if tx[3] not in select_from_these_transcripts.keys(): + select_from_these_transcripts[tx[3]] = '' + else: + continue + else: + continue + select_from_these_transcripts = '|'.join(select_from_these_transcripts.keys()) + if select_transcripts != 'all': + validation['write'] = 'false' + for transcript in select_transcripts_dict_plus_version.keys(): + validation[ + 'warnings'] = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' + refreshed_description = refSeqGeneID + '(' + transcript + ')' + ':' + tx_edit + query = {'quibble': refreshed_description, 'id': validation['id'], + 'warnings': validation['warnings'], 'description': '', 'coding': '', + 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', + 'write': 'true', 'primary_assembly': primary_assembly, + 'order': ordering} + logger.resub( + 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation. Resubmitting corrected version.') + batch_list.append(query) + else: + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + input + ' but also specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts + logger.warning( + + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + + str( + input) + ' but also specify transcripts from the following: ' + 'select_transcripts=' + str( + select_from_these_transcripts)) + continue + else: + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation' + logger.warning( + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation') + continue + elif re.match('^NC_', input): + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified' + logger.warning( + 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified') + continue + else: + pass + except: + exceptPass() + + logger.trace("Chromosomal/RefSeqGene reference catching complete", validation) + # Find not_sub type in input e.g. GGGG>G + """ + VCF2HGVS conversion step 4 has two purposes + 1. VCF is frequently inappropriately converted into HGVS like descriptions + such as GGGG>G which is actually a delins, del or ins. The function assigns + the correct edit type + 2. Detects and extracts multiple ALT sequences into HGVS descriptions and + automatically submits them for validation + """ + not_sub = copy.deepcopy(input) + not_sub_find = re.compile("([GATCgatc]+)>([GATCgatc]+)") + if not_sub_find.search(not_sub): + try: + # If the length of either side of the substitution delimer (>) is >1 + matches = not_sub_find.search(not_sub) + if len(matches.group(1)) > 1 or len(matches.group(2)) > 1 or re.search( + "([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", input): + # Search for and remove range + interval_range = re.compile("([0-9]+)_([0-9]+)") + if interval_range.search(not_sub): + m = not_sub_find.search(not_sub) + start = m.group(1) + delete = m.group(2) + beginning_string, middle_string = not_sub.split(':') + middle_string = middle_string.split('_')[0] + end_string = start + '>' + delete + not_sub = beginning_string + ':' + middle_string + end_string + # Split description + split_colon = not_sub.split(':') + ref_ac = split_colon[0] + remainder = split_colon[1] + split_dot = remainder.split('.') + ref_type = split_dot[0] + remainder = split_dot[1] + posedit = remainder + split_greater = remainder.split('>') + insert = split_greater[1] + remainder = split_greater[0] + # Split remainder using matches + r = re.compile("([0-9]+)([GATCgatc]+)") + try: + m = r.search(remainder) + start = m.group(1) + delete = m.group(2) + starts = posedit.split(delete)[0] + re_try = ref_ac + ':' + ref_type + '.' + starts + 'del' + delete[0] + 'ins' + insert + hgvs_re_try = hp.parse_hgvs_variant(re_try) + hgvs_re_try.posedit.edit.ref = delete + start_pos = str(hgvs_re_try.posedit.pos.start) + if re.search('\-', start_pos): + base, offset = start_pos.split('-') + new_offset = 0 - int(offset) + (len(delete)) + end_pos = int(base) + hgvs_re_try.posedit.pos.end.base = int(end_pos) + hgvs_re_try.posedit.pos.end.offset = int(new_offset) - 1 + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert + elif re.search('\+', start_pos): + base, offset = start_pos.split('+') + end_pos = int(base) + (len(delete) - int(offset) - 1) + new_offset = 0 + int(offset) + (len(delete) - 1) + hgvs_re_try.posedit.pos.end.base = int(end_pos) + hgvs_re_try.posedit.pos.end.offset = int(new_offset) + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert + else: + end_pos = int(start_pos) + (len(delete) - 1) + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + end_pos) + 'del' + delete + 'ins' + insert + except: + exceptPass() + not_delins = not_sub + # Parse into hgvs object + try: + hgvs_not_delins = hp.parse_hgvs_variant(not_delins) + except hgvs.exceptions.HGVSError as e: + # Sort out multiple ALTS from VCF inputs + if re.search("([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): + header, alts = not_delins.split('>') + # Split up the alts into a list + alt_list = alts.split(',') + # Assemble and re-submit + for alt in alt_list: + validation[ + 'warnings'] = 'Multiple ALT sequences detected: auto-submitting all possible combinations' + validation['write'] = 'false' + refreshed_description = header + '>' + alt + query = {'quibble': refreshed_description, 'id': validation['id'], + 'warnings': validation['warnings'], 'description': '', 'coding': '', + 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', + 'write': 'true', 'primary_assembly': primary_assembly, + 'order': ordering} + batch_list.append(query) + logger.resub( + 'Multiple ALT sequences detected. Auto-submitting all possible combinations.') + continue + else: + error = str(e) + issue_link = '' + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(str(e)) + continue + + # Re-Stash the input as an HGVS + stash_input = copy.copy(hgvs_not_delins) + try: + not_delins = str(hn.normalize(hgvs_not_delins)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('Normalization of intronic variants is not supported', error): + not_delins = not_delins + else: + issue_link = '' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(e)) + continue + # Create warning + caution = 'Variant description ' + input + ' is not HGVS compliant' + automap = input + ' automapped to ' + not_delins + validation['warnings'] = validation['warnings'] + ': ' + automap + # Change input to normalized variant + input = not_delins + else: + pass + except: + exceptPass() + else: + pass + logger.trace("Completed VCF-HVGS step 4", validation) + + # Tackle edit1234 type + """ + Warns that descriptions such as c.ins12 or g.del69 are not HGVS compliant + Strips the trailing numbers and tries to parse the description into an + hgvs object. + If parses, provides a warning including links to the VarNomen web page, but + continues validation + If not, an error message is generated and the loop continues + """ + edit_pass = re.compile('_\d+$') + edit_fail = re.compile('\d+$') + if edit_fail.search(input): + if edit_pass.search(input): + pass + else: + error = 'false' + issue_link = 'false' + failed = copy.deepcopy(input) + # Catch the trailing digits + digits = re.search(r"(\d+$)", failed) + digits = digits.group(1) + # Remove them so that the string SHOULD parse + try: + hgvs_failed = hp.parse_hgvs_variant(failed) + except hgvs.exceptions.HGVSError as e: + error = str(e) + error = 'The syntax of the input variant description is invalid ' + if re.search('ins\d+', failed): + issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' + error = error + ' please refer to ' + issue_link + validation['warnings'] = validation['warnings'] + error + logger.warning(error + " " + e) + continue + hgvs_failed = hp.parse_hgvs_variant(failed) + hgvs_failed.posedit.edit = str(hgvs_failed.posedit.edit).replace(digits, '') + failed = str(hgvs_failed) + hgvs_failed = hp.parse_hgvs_variant(failed) + automap = 'Non HGVS compliant variant description ' + input + ' automapped to ' + failed + validation['warnings'] = validation['warnings'] + ': ' + automap + logger.warning(automap) + input = failed + + logger.trace("Ins/Del reference catching complete", validation) + # Tackle compound variant descriptions NG or NC (NM_) i.e. correctly input NG/NC_(NM_):c. + """ + Fully HGVS compliant intronic variant descriptions take the format e.g + NG_007400.1(NM_000088.3):c.589-1G>T. However, hgvs cannot parse and map + these variant strings. + This function: + Removes the g. reference sequence + NG_007400.1(NM_000088.3):c.589-1G>T ---> (NM_000088.3):c.589-1G>T + Removes the parintheses + (NM_000088.3):c.589-1G>T ---> NM_000088.3:c.589-1G>T + hgvs can now parse the string into an hgvs variant object and manipulate it + """ + caution = '' + compounder = re.compile('\(NM_') + compounder_b = re.compile('\(ENST') + if compounder.search(input): + # Find pattern e.g. +0000 and assign to a variable + transy = re.search(r"(NM_.+)", input) + transy = transy.group(1) + transy = transy.replace(')', '') + input = transy + logger.trace("HVGS typesetting complete", validation) + # Extract variants from HGVS allele descriptions + # http://varnomen.hgvs.org/recommendations/DNA/variant/alleles/ + """ + HGVS allele string parsing function Occurance #1 + Takes a single HGVS allele description and separates each allele into a + list of HGVS variants. The variants are then automatically submitted for + validation. + Note: In this context, it is inappropriate to validate descriptions + containing intronic variant descriptions. In such instances, allele + descriptions should be re-submitted by the user at the gene or genome level + """ + if (re.search(':[gcnr].\[', input) and re.search('\;', input)) or ( + re.search(':[gcrn].\d+\[', input) and re.search('\;', input)) or (re.search('\(\;\)', input)): + # handle LRG inputs + if re.match('^LRG', input): + if re.match('^LRG\d+', input): + string, remainder = input.split(':') + reference = string.replace('LRG', 'LRG_') + input = reference + ':' + remainder + caution = string + ' updated to ' + reference + if not re.match('^LRG_\d+', input): + pass + elif re.match('^LRG_\d+:g.', input) or re.match('^LRG_\d+:p.', input) or re.match('^LRG_\d+:c.', + input) or re.match( + '^LRG_\d+:n.', input): + lrg_reference, variation = input.split(':') + refseqgene_reference = self.db.get.get_refseqgeneId_from_lrgID(lrg_reference) + if refseqgene_reference != 'none': + input = refseqgene_reference + ':' + variation + if caution == '': + caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + else: + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + logger.warning(str(caution)) + elif re.match('^LRG_\d+t\d+:c.', input) or re.match('^LRG_\d+t\d+:n.', input) or re.match( + '^LRG_\d+t\d+:p.', input) or re.match('^LRG_\d+t\d+:g.', input): + lrg_reference, variation = input.split(':') + refseqtranscript_reference = va_dbCrl.data.get_RefSeqTranscriptID_from_lrgTranscriptID( + lrg_reference) + if refseqtranscript_reference != 'none': + input = refseqtranscript_reference + ':' + variation + if caution == '': + caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + else: + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + logger.warning(str(caution)) + else: + pass + try: + # Submit to allele extraction function + alleles = va_func.hgvs_alleles(input, hp, vr, hn, vm, sf) + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'Automap has extracted possible variant descriptions' + logger.resub('Automap has extracted possible variant descriptions, resubmitting') + for allele in alleles: + query = {'quibble': allele, 'id': validation['id'], 'warnings': validation['warnings'], + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', + 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, + 'order': ordering} + coding = 'intergenic' + batch_list.append(query) + validation['write'] = 'false' + continue + except va_func.alleleVariantError as e: + if re.search("Cannot validate sequence of an intronic variant", str(e)): + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'Intronic positions not supported for HGVS Allele descriptions' + logger.warning('Intronic positions not supported for HGVS Allele descriptions') + continue + else: + raise variantValidatorError(str(e)) + logger.trace("HVGS String allele parsing pass 1 complete", validation) + # INITIAL USER INPUT FORMATTING + """ + Removes whitespace from the ends of the string + Removes anything in brackets + Identifies variant type + Returns a dictionary containing the formatted input string and the variant type + Accepts c, g, n, r currently + """ + formatted = va_func.user_input(input) + + # Validator specific variables, note, not all will be necessary for batch, but keep to ensure that batch works + # vars = [] + # refseq_gene = '' + # relevant = '' + warning = '' + automap = 'false' + # vmapped = 'false' + # coords = 'false' + # ensembl_gene = 'false' + hgnc_gene_info = 'false' + # issue_link = 'false' + # cr_available = 'false' + # rcmds_tab = 'false' + + # Check the initial validity of the input + if formatted == 'invalid': + if re.search('\w+\:[gcnmrp]', input) and not re.search('\w+\:[gcnmrp]\.', input): + error = 'Variant description ' + input + ' lacks the . character between and in the expected pattern :.' + else: + error = 'Variant description ' + input + ' is not in an accepted format' + validation['warnings'] = validation[ + 'warnings'] + ': ' + error + logger.warning(error) + continue + else: + variant = formatted['variant'] + input = formatted['variant'] + stash_input = formatted['variant'] + type = formatted['type'] + logger.trace("Variant input formatted, proceeding to validate.", validation) + # Conversions + """ + Conversions are not currently supported. The HGVS format for conversions + is rarely seen wrt genomic sequencing data and needs to be re-evaluated + """ + conversion = re.compile('con') + if conversion.search(variant): + validation['warnings'] = validation['warnings'] + ': ' + 'Gene conversions currently unsupported' + logger.warning('Gene conversions currently unsupported') + continue + + # Primary check that hgvs will accept the variant + error = 'false' + # Change RNA bases to upper case but nothing else + if type == ":r.": + variant = variant.upper() + variant = variant.replace(':R.', ':r.') + # lowercase the supported variant types + variant = variant.replace('DEL', 'del') + variant = variant.replace('INS', 'ins') + variant = variant.replace('INV', 'inv') + variant = variant.replace('DUP', 'dup') + + try: + input_parses = hp.parse_hgvs_variant(variant) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'false': + input_parses.ac = input_parses.ac.upper() + if hasattr(input_parses.posedit.edit, 'alt'): + if input_parses.posedit.edit.alt is not None: + input_parses.posedit.edit.alt = input_parses.posedit.edit.alt.upper() + if hasattr(input_parses.posedit.edit, 'ref'): + if input_parses.posedit.edit.ref is not None: + input_parses.posedit.edit.ref = input_parses.posedit.edit.ref.upper() + variant = str(input_parses) + input = str(input_parses) + pass + else: + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + + """ + ENST support needs to be re-evaluated, but is very low priority + ENST not supported by ACMG and is under review by HGVS + """ + if re.match('^ENST', str(input_parses)): + trap_ens_in = str(input_parses) + sim_tx = hdp.get_similar_transcripts(input_parses.ac) + for line in sim_tx: + if str(line[2]) == 'True' and str(line[3]) == 'True' and str(line[4]) == 'True' and str( + line[5]) == 'True' and str(line[6]) == 'True': + input_parses.ac = (line[1]) + input = str(input_parses) + variant = input + break + if re.match('^ENST', str(input_parses)): + error = 'Unable to map ' + str(input_parses.ac) + ' to an equivalent RefSeq transcript' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + validation['warnings'] = validation['warnings'] + ': ' + str( + trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + variant + logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + variant) + logger.trace("HVGS acceptance test passed", validation) + # Check whether supported genome build is requested for non g. descriptions + historic_assembly = 'false' + mapable_assemblies = { + 'GRCh37': 'true', + 'GRCh38': 'true', + 'NCBI36': 'false' + } + is_mapable = mapable_assemblies.get(primary_assembly) + if is_mapable == 'true': + + # These objects cannot be moved outside of the main function because they gather data from the + # iuser input e.g. alignment method and genome build + # They initiate quickly, so no need to move them unnecessarily + + # Create easy variant mapper (over variant mapper) and splign locked evm + evm = hgvs.assemblymapper.AssemblyMapper(hdp, + assembly_name=primary_assembly, + alt_aln_method=alt_aln_method, + normalize=True, + replace_reference=True + ) + + # Setup a reverse normalize instance and non-normalize evm + no_norm_evm = hgvs.assemblymapper.AssemblyMapper(hdp, + assembly_name=primary_assembly, + alt_aln_method=alt_aln_method, + normalize=False, + replace_reference=True + ) + + # Create a specific minimal evm with no normalizer and no replace_reference + min_evm = hgvs.assemblymapper.AssemblyMapper(hdp, + assembly_name=primary_assembly, + alt_aln_method=alt_aln_method, + normalize=False, + replace_reference=False + ) + + else: + error = 'Mapping of ' + variant + ' to genome assembly ' + primary_assembly + ' is not supported' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + # Catch interval end > interval start + """ + hgvs did/does not handle 3' UTR position ordering well. This function + ensures that end pos is not > start pos wrt 3' UTRs. + Also identifies some variants which span into the downstream sequence + i.e. out of bounds + """ + astr = re.compile('\*') + if astr.search(str(input_parses.posedit)): + input_parses_copy = copy.deepcopy(input_parses) + input_parses_copy.type = "c" + # Map to n. position + # Create easy variant mapper (over variant mapper) and splign locked evm + try: + to_n = evm.c_to_n(input_parses_copy) + except hgvs.exceptions.HGVSError as e: + exceptPass() + else: + if to_n.posedit.pos.end.base < to_n.posedit.pos.start.base: + error = 'Interval end position < interval start position ' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif input_parses.posedit.pos.end.base < input_parses.posedit.pos.start.base: + error = 'Interval end position ' + str( + input_parses.posedit.pos.end.base) + ' < interval start position ' + str( + input_parses.posedit.pos.start.base) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + pass + + # Catch missing version number in refseq + ref_type = re.compile("^N\w\w\d") + is_version = re.compile("\d\.\d") + en_type = re.compile('^ENS') + lrg_type = re.compile('LRG') + if (ref_type.search(str(input_parses)) and is_version.search(str(input_parses))) or ( + en_type.search(str(input_parses))): + pass + else: + if lrg_type.search(str(input_parses)): + pass + if ref_type.search(str(input_parses)): + error = 'RefSeq variant accession numbers MUST include a version number' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + logger.trace("HVGS interval/version mapping complete", validation) + + # handle LRG inputs + """ + LRG and LRG_t reference sequence identifiers need to be replaced with + equivalent RefSeq identifiers. The lookup data is stored in the + VariantValidator MySQL database + """ + if re.match('^LRG', str(input_parses)): + if re.match('^LRG\d+', str(input_parses.ac)): + string = str(input_parses.ac) + reference = string.replace('LRG', 'LRG_') + input_parses.ac = reference + caution = string + ' updated to ' + reference + if not re.match('^LRG_\d+', str(input_parses)): + pass + elif re.match('^LRG_\d+:g.', str(input_parses)) or re.match('^LRG_\d+:p.', + str(input_parses)) or re.match( + '^LRG_\d+:c.', str(input_parses)) or re.match('^LRG_\d+:n.', str(input_parses)): + lrg_reference, variation = str(input_parses).split(':') + refseqgene_reference = self.db.get.get_refseqgeneId_from_lrgID(lrg_reference) + if refseqgene_reference != 'none': + input_parses.ac = refseqgene_reference + variant = str(input_parses) + input = str(input_parses) + stash_input = input + if caution == '': + caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + else: + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + logger.warning(str(caution)) + elif re.match('^LRG_\d+t\d+:c.', str(input_parses)) or re.match('^LRG_\d+t\d+:n.', + str(input_parses)) or re.match( + '^LRG_\d+t\d+:p.', str(input_parses)) or re.match('^LRG_\d+t\d+:g.', str(input_parses)): + lrg_reference, variation = str(input_parses).split(':') + refseqtranscript_reference = va_dbCrl.data.get_RefSeqTranscriptID_from_lrgTranscriptID( + lrg_reference) + if refseqtranscript_reference != 'none': + input_parses.ac = refseqtranscript_reference + variant = str(input_parses) + input = str(input_parses) + stash_input = input + if caution == '': + caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + else: + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + logger.warning(str(caution)) + else: + pass + logger.trace("LRG check for conversion to refseq completed", validation) + # Additional Incorrectly input variant capture training + """ + Evolving list of common mistakes, see sections below + """ + # NM_ .g + if (re.search('^NM_', variant) or re.search('^NR_', variant)) and re.search(':g.', variant): + suggestion = input.replace(':g.', ':c.') + error = 'Transcript reference sequence input as genomic (g.) reference sequence. Did you mean ' + suggestion + '?' + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(error) + continue + # NR_ c. + if re.search('^NR_', input) and re.search(':c.', input): + suggestion = input.replace(':c.', ':n.') + error = 'Non-coding transcript reference sequence input as coding (c.) reference sequence. Did you mean ' + suggestion + '?' + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(error) + continue + # NM_ n. + if re.search('^NM_', input) and re.search(':n.', input): + suggestion = input.replace(':n.', ':c.') + error = 'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. Did you mean ' + suggestion + '?' + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(error) + continue + + # NM_ NC_ NG_ NR_ p. + if (re.search('^NM_', variant) or re.search('^NR_', variant) or re.search('^NC_', variant) or re.search( + '^NG_', variant)) and re.search(':p.', variant): + issue_link = 'http://varnomen.hgvs.org/recommendations/protein/' + error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(error) + continue + + # NG_ c or NC_c.. + if (re.search('^NG_', variant) or re.search('^NC_', variant)) and re.search(':c.', variant): + suggestion = ': For additional assistance, submit ' + str(variant) + ' to VariantValidator' + error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' + suggestion + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(error) + continue + + logger.trace("Passed 'common mistakes' catcher", validation) + # Primary validation of the input + """ + An evolving set of variant structure and content searches which identify + and warn users about inappropriate use of HGVS + Primarily, this code filters out variants that cannot realistically be + auto corrected and will cause the downstream functions to return errors + """ + input_parses = hp.parse_hgvs_variant(input) + if input_parses.type == 'g': + if re.match('^NC_', input_parses.ac) or re.match('^NG_', input_parses.ac) or re.match('^NT_', + input_parses.ac) or re.match( + '^NW_', input_parses.ac): + pass + else: + error = 'Invalid reference sequence identifier (' + input_parses.ac + ')' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + except Exception as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + # Additional test + try: + hn.normalize(input_parses) + except hgvs.exceptions.HGVSError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + else: + exceptPass() + + elif input_parses.type == 'c': + if re.search('\*', str(input_parses)) or re.search('c.\-', str(input_parses)): + # Catch variation in UTRs + # These should be in the sequence so can be directly validated. Need to pass to n. + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('datums is ill-defined', error): + called_ref = input_parses.posedit.edit.ref + try: + to_n = evm.c_to_n(input_parses) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + actual_ref = to_n.posedit.edit.ref + if called_ref != actual_ref: + error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + else: + input_parses.posedit.edit.ref = '' + variant = str(input_parses) + else: + if re.search('bounds', error) or re.search('intronic variant', error): + try: + hn.normalize(input_parses) + except hgvs.exceptions.HGVSError as e: + exceptPass() + if re.search('bounds', str(e)): + try: + identity_info = hdp.get_tx_identity_info(input_parses.ac) + ref_start = identity_info[3] + ref_end = identity_info[4] + if re.match('-', str( + input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: + # upstream positions + boundary = int('-' + str(ref_start)) + remainder = int(str(input_parses.posedit.pos.start)) - boundary + input_parses.posedit.pos.start.base = boundary + input_parses.posedit.pos.start.offset = remainder + if re.match('-', str( + input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: + boundary = int('-' + str(ref_start)) + remainder = int(str(input_parses.posedit.pos.end)) - boundary + input_parses.posedit.pos.end.base = boundary + input_parses.posedit.pos.end.offset = remainder + if re.match('\*', str( + input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: + # downstream positions + tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') + ts_seq = sf.fetch_seq(input_parses.ac) + boundary = len(ts_seq) - ref_end + input_parses.posedit.pos.start.base = boundary + offset = int(tot_end_pos) - int(boundary) + input_parses.posedit.pos.start.offset = offset + if re.match('\*', str( + input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: + tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') + ts_seq = sf.fetch_seq(input_parses.ac) + boundary = len(ts_seq) - ref_end + input_parses.posedit.pos.end.base = boundary + offset = int(tot_end_pos) - int(boundary) + input_parses.posedit.pos.end.offset = offset + + # Create a lose vm instance + lose_vm = hgvs.variantmapper.VariantMapper(hdp, + replace_reference=True, + prevalidation_level=None + ) + + + report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, + primary_assembly, lose_vm, hp, hn, sf, nr_vm) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant: Instead use ' + valstr( + report_gen) + except Exception as e: + exceptPass() + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + pass + else: + pass + + try: + input_parses = evm.c_to_n(input_parses) + except hgvs.exceptions.HGVSError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(e)) + continue + + if re.search('n.1-', str(input_parses)): + input_parses = evm.n_to_c(input_parses) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + genomic_position = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, + vm, hp, hn, sf, nr_vm) + error = error + valstr(genomic_position) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + pass + + # Re-map input_parses back to c. variant + input_parses = evm.n_to_c(input_parses) + + # Intronic positions in UTRs + if re.search('\d\-\d', str(input_parses)) or re.search('\d\+\d', str(input_parses)): + # Can we go c-g-c + try: + to_genome = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, vm, + hp, hn, sf, nr_vm) + to_tx = evm.g_to_t(to_genome, input_parses.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + error = str(e) + if re.search('bounds', error): + try: + identity_info = hdp.get_tx_identity_info(input_parses.ac) + ref_start = identity_info[3] + ref_end = identity_info[4] + if re.match('-', str(input_parses.posedit.pos.start)): + # upstream positions + boundary = int('-' + str(ref_start)) + remainder = int(str(input_parses.posedit.pos.start)) - boundary + input_parses.posedit.pos.start.base = boundary + input_parses.posedit.pos.start.offset = remainder + if re.match('-', str(input_parses.posedit.pos.end)): + boundary = int('-' + str(ref_start)) + remainder = int(str(input_parses.posedit.pos.end)) - boundary + input_parses.posedit.pos.end.base = boundary + input_parses.posedit.pos.end.offset = remainder + if re.match('\*', str(input_parses.posedit.pos.start)): + # downstream positions + tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') + ts_seq = sf.fetch_seq(input_parses.ac) + boundary = len(ts_seq) - ref_end + input_parses.posedit.pos.start.base = boundary + te1, te2 = tot_end_pos.split('+') + tot_end_pos = int(te1) + int(te2) + offset = int(tot_end_pos) - int(boundary) + input_parses.posedit.pos.start.offset = offset + if re.match('\*', str(input_parses.posedit.pos.end)): + tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') + ts_seq = sf.fetch_seq(input_parses.ac) + boundary = len(ts_seq) - ref_end + input_parses.posedit.pos.end.base = boundary + te1, te2 = tot_end_pos.split('+') + tot_end_pos = int(te1) + int(te2) + offset = int(tot_end_pos) - int(boundary) + input_parses.posedit.pos.end.offset = offset + + report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, + primary_assembly, lose_vm, hp, hn, sf, nr_vm) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( + report_gen) + except Exception as e: + exceptPass() + else: + pass + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + logger.warning(str(error)) + continue + + except hgvs.exceptions.HGVSDataNotAvailableError as e: + error = str(e) + if 'Alignment is incomplete' in error: + e_list = error.split('~') + gens = [] + for el in e_list: + el_l = el.split('/') + if el_l[-1] == '': + continue + gens.append(el_l[-1]) + acs = '; '.join(gens) + error = 'Cannot map ' + valstr( + input_parses) + ' to a genomic position. ' + input_parses.ac + ' can only be partially aligned to genomic reference sequences ' + acs + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + logger.warning(str(error)) + continue + + elif re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): + # Quick look at syntax validation + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if re.search('bounds', error): + try: + report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, + lose_vm, hp, hn, sf, nr_vm) + except hgvs.exceptions.HGVSError as e: + exceptPass() + else: + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( + report_gen) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif re.search('insertion length must be 1', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + # Create a specific minimal evm with no normalizer and no replace_reference + # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence + try: + output = va_func.noreplace_myevm_t_to_g(input_parses, evm, hdp, primary_assembly, vm, hn, + hp, sf, no_norm_evm) + except hgvs.exceptions.HGVSDataNotAvailableError as e: + tx_ac = input_parses.ac + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + except: + gene_symbol = 'none' + if gene_symbol == 'none': + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + else: + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except ValueError as e: + error = str(e) + if re.search('> end', error): + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + try: + evm.g_to_t(output, input_parses.ac) + except hgvs.exceptions.HGVSError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + try: + vr.validate(output) + except hgvs.exceptions.HGVSError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # All other variation + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSUnsupportedOperationError: + exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + """ + #Phil: Honestly not sure what the purpose of any of these is, we act the same regardless of what + #kind of error it is. + if re.search('Length implied by coordinates', error): + # Applies to del and inv + # NOTE, there has been no normalization at all so this error is valid here + validation['warnings'] = validation['warnings'] + ': ' + str(error) + # Will apply to > del and inv + if re.search('does not agree with reference sequence', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + # ensures x_y for insertions + if re.search('insertion length must be 1', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + # Boundary issue + if re.search('Variant coordinate is out of the bound of CDS region', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + """ + # This catches errors in introns + if re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + except hgvs.exceptions.HGVSDataNotAvailableError as e: + error = e + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('bounds', error): + error = error + ' (' + input_parses.ac + ')' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + exceptPass() + + + elif input_parses.type == 'n': + if re.search('\+', str(input_parses)) or re.search('\-', str(input_parses)): + # Catch variation in UTRs + # These should be in the sequence so can be directly validated. Need to pass to n. + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('intronic variant', error): + pass + elif re.search('datums is ill-defined', error): + called_ref = input_parses.posedit.edit.ref + to_n = evm.c_to_n(input_parses) + actual_ref = to_n.posedit.edit.ref + if called_ref != actual_ref: + error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + input_parses.posedit.edit.ref = '' + variant = str(input_parses) + + elif re.search('base must be >=1 for datum = SEQ_START or CDS_END', error): + error = 'The given coordinate is outside the bounds of the reference sequence.' + + try: + if re.match('-', str(input_parses.posedit.pos.start)): + # upstream positions + boundary = 1 + remainder = int(str(input_parses.posedit.pos.start)) - boundary + remainder = remainder + 1 + input_parses.posedit.pos.start.base = boundary + input_parses.posedit.pos.start.offset = remainder + if re.match('-', str(input_parses.posedit.pos.end)): + boundary = 1 + remainder = int(str(input_parses.posedit.pos.end)) - boundary + remainder = remainder + 1 + input_parses.posedit.pos.end.base = boundary + input_parses.posedit.pos.end.offset = remainder + report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, + lose_vm, hp, hn, sf, nr_vm) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( + report_gen) + except Exception as e: + exceptPass() + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + if re.search('n.1-', str(input_parses)): + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + genomic_position = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, vm, + hp, hn, sf, nr_vm) + error = error + valstr(genomic_position) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + pass + + if re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): + # Quick look at syntax validation + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if re.search('bounds', error): + try: + report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, + lose_vm, hp, hn, sf, nr_vm) + except hgvs.exceptions.HGVSError as e: + exceptPass() + else: + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( + report_gen) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif re.search('insertion length must be 1', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif re.search('Cannot validate sequence of an intronic variant', error): + try: + test_g = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, vm, + hp, hn, sf, nr_vm) + back_to_n = evm.g_to_t(test_g, input_parses.ac) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('bounds', error): + report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, + primary_assembly, lose_vm, hp, hn, sf, nr_vm) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( + report_gen) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + exceptPass() + + # Create a specific minimal evm with no normalizer and no replace_reference + # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence + try: + output = va_func.noreplace_myevm_t_to_g(input_parses, evm, hdp, primary_assembly, vm, hn, + hp, sf, no_norm_evm) + except hgvs.exceptions.HGVSDataNotAvailableError as e: + tx_ac = input_parses.ac + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + except: + gene_symbol = 'none' + if gene_symbol == 'none': + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + else: + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except ValueError as e: + error = str(e) + if re.search('> end', error): + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + try: + vr.validate(output) + except hgvs.exceptions.HGVSError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # All other variation + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSUnsupportedOperationError: + + exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + """ + if re.search('Length implied by coordinates', error): + # Applies to del and inv + # NOTE, there has been no normalization at all so this error is valid here + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + # Will apply to > del and inv + if re.search('does not agree with reference sequence', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + # ensures x_y for insertions + if re.search('insertion length must be 1', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + # Boundary issue + if re.search('Variant coordinate is out of the bound of CDS region', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + """ + # This catches errors in introns + if re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) + logger.warning(str(error)) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except hgvs.exceptions.HGVSDataNotAvailableError as e: + error = e + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('bounds', error): + error = error + ' (' + input_parses.ac + ')' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + pass + logger.trace("Variant structure and contents searches passed", validation) + # Mitochondrial variants + """ + Reformat m. into the new HGVS standard which is now m again! + """ + if type == ':m.' or re.match('NC_012920.1', str(input_parses.ac)) or re.match('NC_001807.4', + str(input_parses.ac)): + hgvs_mito = copy.deepcopy(input_parses) + if (re.match('NC_012920.1', str(hgvs_mito.ac)) and hgvs_mito.type == 'g') or ( + re.match('NC_001807.4', str(hgvs_mito.ac)) and hgvs_mito.type == 'g'): + hgvs_mito.type = 'm' + caution = '' + try: + vr.validate(hgvs_mito) + except hgvs.exceptions.HGVSError as e: + error = caution + ': ' + str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except KeyError as e: + error = caution + ': Currently unable to validate ' + hgvs_mito.ac + ' sequence variation' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + # Any transcripts? + rel_var = va_func.relevant_transcripts(hgvs_mito, evm, hdp, alt_aln_method, reverse_normalizer) + hgvs_genomic = copy.deepcopy(hgvs_mito) + if len(rel_var) == 0: + validation['genomic_g'] = valstr(hgvs_mito) + validation['description'] = 'Homo sapiens mitochondrion, complete genome' + logger.info('Homo sapiens mitochondrion, complete genome') + continue + # Currently we are not expecting this path to be activated because not m. transcripts seem to be NM_ + # This route may throw up errors in the future + else: + pass + + # handle :p. + if type == ':p.': + error = 'false' + # Try to validate the variant + try: + hgvs_object = hp.parse_hgvs_variant(variant) + except hgvs.exceptions.HGVSError as e: + error = str(e) + try: + vr.validate(hgvs_object) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + # Get accurate descriptions from the relevant databases + # RefSeq databases + if alt_aln_method != 'genebuild': + # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID + # accession number + hgvs_object = hp.parse_hgvs_variant(variant) + accession = hgvs_object.ac + # Look for the accession in our database + # Connect to database and send request + record = va_func.entrez_efetch(db="nuccore", id=accession, rettype="gb", retmode="text") + try: + description = record.description + except: + description = 'Unable to recover the description of ' + accession + ' from Entrez' + try: + vr.validate(hgvs_object) + except hgvs.exceptions.HGVSError as e: + error = str(e) + else: + error = str( + hgvs_object) + ' is HGVS compliant and contains a valid reference amino acid description' + reason = 'Protein level variant descriptions are not fully supported due to redundancy in the genetic code' + validation['warnings'] = validation['warnings'] + ': ' + str(reason) + ': ' + str(error) + validation['protein'] = str(hgvs_object) + logger.warning(str(reason) + ": " + str(error)) + continue + + # handle :r. + """ + convert r, into c. + """ + trapped_input = input + if type == ':r.': + hgvs_input = hp.parse_hgvs_variant(input) # Traps the hgvs variant of r. for further use + # Change to coding variant + type = ':c.' + # Change input to reflect! + try: + hgvs_c = va_func.hgvs_r_to_c(hgvs_input) + except hgvs.exceptions.HGVSDataNotAvailableError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + input = str(hgvs_c) + variant = str(hgvs_c) + + # COLLECT gene symbol, name and ACCESSION INFORMATION + # Gene symbol + logger.trace("Handled mitochondrial variants", validation) + """ + Identifies the transcript reference sequence name and HGNC gene symbol + """ + if (type != ':g.'): + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(variant) + try: + tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + logger.warning(error) + if error != 'false': + error = 'Please inform UTA admin of the following error: ' + str(error) + issue_link = "https://bitbucket.org/biocommons/uta/issues?status=new&status=open" + reason = "VariantValidator cannot recover information for transcript " + str( + hgvs_vt.ac) + ' beacuse it is not available in the Universal Transcript Archive' + validation['warnings'] = validation['warnings'] + ': ' + str(reason) + logger.warning(str(reason) + ": " + str(error)) + continue + else: + # Get hgnc Gene name from command + hgnc = tx_id_info[6] + issue_link = 'false' + + # ACCESS THE GENE INFORMATION RECORDS ON THE UTA DATABASE + # Refseq accession + tx_for_gene = va_func.tx_for_gene(hgnc, hdp) + refseq_ac = va_func.ng_extract(tx_for_gene) + + # Additional gene info + gene_info = hdp.get_gene_info(hgnc) + # Chromosomal location + try: + maploc = gene_info[1] + except: + maploc = '' + chr_loc = ("Chromosome location: " + maploc) + + # Get accurate transcript descriptions from the relevant databases + # RefSeq databases + if alt_aln_method != 'genebuild': + # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID + # accession number + hgvs_object = hp.parse_hgvs_variant(variant) + accession = hgvs_object.ac + # Look for the accession in our database + # Connect to database and send request + entry = va_dbCrl.data.in_entries(accession, 'transcript_info') + + # Analyse the returned data and take the necessary actions + # If the error key exists + if 'error' in entry: + # Open a hgvs exception log file in append mode + error = entry['description'] + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + ': A Database error occurred, please contact admin' + logger.warning(str(error) + ": A Database error occurred, please contact admin") + continue + + # If the accession key is found + elif 'accession' in entry: + description = entry['description'] + # If the current entry is too old + if entry['expiry'] == 'true': + dbaction = 'update' + try: + entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, + accession=accession, dbaction=dbaction, hp=hp, evm=evm, + hdp=hdp) + except hgvs.exceptions.HGVSError as e: + error = 'Transcript %s is not currently supported' % (accession) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except Exception as e: + error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + hgnc_gene_info = entry['description'] + else: + hgnc_gene_info = entry['description'] + # If the none key is found add the description to the database + elif 'none' in entry: + dbaction = 'insert' + try: + entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, + accession=accession, dbaction=dbaction, hp=hp, evm=evm, + hdp=hdp) + except Exception as e: + logger.warning(str(e)) + error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + hgnc_gene_info = entry['description'] + + # If no correct keys are found + else: + # Open a hgvs exception log file in append mode + error = 'Unknown error type' + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + ': A Database error occurred, please contact admin' + logger.warning(str(error)) + continue + + # Ensembl databases + else: + # accession number + hgvs_object = hp.parse_hgvs_variant(variant) + accession = hgvs_object.ac + # Look for the accession in our database + # Connect to database and send request + entry = va_dbCrl.data.in_entries(accession, 'transcript_info') + + # Analyse the returned data and take the necessary actions + # If the error key exists + if 'error' in entry: + # Open a hgvs exception log file in append mode + error = entry['description'] + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + ': A Database error occurred, please contact admin' + logger.warning(str(error)) + continue + + # If the accession key is found + elif 'accession' in entry: + description = entry['description'] + # If the current entry is too old + if entry['expiry'] == 'true': + dbaction = 'update' + entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, + accession=accession, dbaction=dbaction, hp=hp, evm=evm, + hdp=hdp) + hgnc_gene_info = entry['description'] + else: + hgnc_gene_info = entry['description'] + # If the none key is found add the description to the database + elif 'none' in entry: + dbaction = 'insert' + try: + entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, + accession=accession, dbaction=dbaction, hp=hp, evm=evm, + hdp=hdp) + except Exception as e: + logger.warning(str(e)) + error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + hgnc_gene_info = entry['description'] + + # If no correct keys are found + else: + # Open a hgvs exception log file in append mode + error = 'Unknown error type' + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + ': A Database error occurred, please contact admin' + logger.warning(str(error)) + continue + + # Genomic type variants will need to be mapped to transcripts + """ + The following section is used to project genomic variants accurately onto + all relevant transcripts + """ + + if (type == ':g.'): + g_query = hp.parse_hgvs_variant(variant) + + # Genomic coordinates can be validated immediately + error = 'false' + try: + vr.validate(g_query) + except hgvs.exceptions.HGVSError as e: + error = str(e) + except KeyError: + error = 'Reference sequence ' + hgvs_genomic.ac + ' is either not supported or does not exist' + if error != 'false': + reason = 'Invalid variant description' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + pass + + # Set test to see if Norm alters the coords + g_test = hn.normalize(g_query) + + # Perform test + if g_query.posedit.pos != g_test.posedit.pos: + # validation['warnings'] = validation['warnings'] + ': ' + 'Input variant description normalized to ' + str(g_test) + hgvs_genomic = g_test + else: + hgvs_genomic = g_query + + # Collect rel_var + # rel_var is a keyworded list of relevant transcripts with associated coding variants + """ + Initial simple projection from the provided g. position all overlapping + transcripts + """ + rel_var = va_func.relevant_transcripts(hgvs_genomic, evm, hdp, alt_aln_method, reverse_normalizer) + + # Double check rel_vars have not been missed when mapping from a RefSeqGene + if len(rel_var) != 0 and re.match('NG_', str(hgvs_genomic.ac)): + for var in rel_var: + hgvs_coding_variant = hp.parse_hgvs_variant(var) + try: + hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding_variant, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + except hgvs.exceptions.HGVSError as e: + try_rel_var = [] + else: + try_rel_var = va_func.relevant_transcripts(hgvs_genomic, evm, hdp, alt_aln_method, + reverse_normalizer) + if len(try_rel_var) > len(rel_var): + rel_var = try_rel_var + break + else: + continue + + # Tripple check this assumption by querying the gene position database + if len(rel_var) == 0: + vcf_dict = va_H2V.hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf) + not_di = str(hgvs_genomic.ac) + ':g.' + str(vcf_dict['pos']) + '_' + str( + int(vcf_dict['pos']) + (len(vcf_dict['ref']) - 1)) + 'del' + vcf_dict['ref'] + 'ins' + \ + vcf_dict['alt'] + hgvs_not_di = hp.parse_hgvs_variant(not_di) + rel_var = va_func.relevant_transcripts(hgvs_not_di, evm, hdp, alt_aln_method, + reverse_normalizer) + + # list return statements + """ + If mapping to transcripts has been unsuccessful, provide relevant details + """ + if len(rel_var) == 0: + + # Check for NG_ + rsg = re.compile('^NG_') + if rsg.search(variant): + # parse + hgvs_refseqgene = hp.parse_hgvs_variant(variant) + # Convert to chromosomal position + refseqgene_data = va_g2g.rsg_to_chr(hgvs_refseqgene, primary_assembly, hn, vr) + # There should only ever be one description returned + refseqgene_data = refseqgene_data[0] + + # Extract data + if refseqgene_data['valid'] == 'true': + input = refseqgene_data['hgvs_genomic'] + # re_submit + # Tag the line so that it is not written out + validation['warnings'] = validation[ + 'warnings'] + ': ' + variant + ' automapped to genome position ' + str( + input) + query = {'quibble': input, 'id': validation['id'], 'warnings': validation['warnings'], + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', + 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + coding = 'intergenic' + batch_list.append(query) + else: + error = 'Mapping unavailable for RefSeqGene ' + variant + ' using alignment method = ' + alt_aln_method + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + # Chromosome build is not supported or intergenic??? + else: + sfm = vvChromasomes.supported_for_mapping(hgvs_genomic.ac, primary_assembly) + if sfm == 'true': + try: + vr.validate(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + # Map to RefSeqGene if available + refseqgene_data = va_g2g.chr_to_rsg(hgvs_genomic, hn, vr) + rsg_data = '' + # Example {'gene': 'NTHL1', 'hgvs_refseqgene': 'NG_008412.1:g.3455_3464delCAAACACACA', 'valid': 'true'} + for data in refseqgene_data: + if data['valid'] == 'true': + data['hgvs_refseqgene'] = hp.parse_hgvs_variant(data['hgvs_refseqgene']) + data['hgvs_refseqgene'] = valstr(data['hgvs_refseqgene']) + rsg_data = rsg_data + data['hgvs_refseqgene'] + ' (' + data['gene'] + '), ' + + error = 'No transcripts found that fully overlap the described variation in the genomic sequence' + # set output type flag + set_output_type_flag = 'intergenic' + # set genomic and where available RefSeqGene outputs + validation['warnings'] = validation['warnings'] + ': ' + str(error) + validation['genomic_g'] = valstr(hgvs_genomic) + validation['genomic_r'] = str(rsg_data.split('(')[0]) + logger.warning(str(error)) + continue + else: + error = 'Please ensure the requested chromosome version relates to a supported genome build. Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Tag the line so that it is not written out + validation['write'] = 'false' + + """ + Gap aware projection from g. to c. + """ + + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' + + # Create a pseudo VCF so that normalization can be applied and a delins can be generated + hgvs_genomic_variant = hgvs_genomic + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + + # VCF + vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, + reverse_normalizer, sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # take a look at the input genomic variant for potential base salvage + stash_ac = vcf_dict['chr'] + stash_pos = int(vcf_dict['pos']) + stash_ref = vcf_dict['ref'] + stash_alt = vcf_dict['alt'] + stash_end = end + # Re-Analyse genomic positions + if re.match('NG_', str(stash_input)): + c = hp.parse_hgvs_variant(rel_var[0]) + if hasattr(c.posedit.edit, 'ref') and c.posedit.edit.ref is not None: + c.posedit.edit.ref = c.posedit.edit.ref.upper() + if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: + c.posedit.edit.alt = c.posedit.edit.alt.upper() + stash_input = va_func.myevm_t_to_g(c, hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, + nr_vm) + if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', + str( + stash_input)): + try: + hgvs_stash = hp.parse_hgvs_variant(stash_input) + except: + hgvs_stash = stash_input + if hasattr(hgvs_stash.posedit.edit, 'ref') and hgvs_stash.posedit.edit.ref is not None: + hgvs_stash.posedit.edit.ref = hgvs_stash.posedit.edit.ref.upper() + if hasattr(hgvs_stash.posedit.edit, 'alt') and hgvs_stash.posedit.edit.alt is not None: + hgvs_stash.posedit.edit.alt = hgvs_stash.posedit.edit.alt.upper() + + stash_ac = hgvs_stash.ac + # MAKE A NO NORM HGVS2VCF + stash_dict = va_H2V.pos_lock_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, sf) + stash_ac = hgvs_stash.ac + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + + # Store a not real deletion insertion + stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_genomic_5pr.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + + # Set non-valid caution to false + non_valid_caution = 'false' + + # make an empty rel_var + nw_rel_var = [] + + # loop through rel_var and amend where required + for var in rel_var: + # Store the current hgvs:c. description + saved_hgvs_coding = hp.parse_hgvs_variant(var) + + # Remove un-selected transcripts + if select_transcripts != 'all': + tx_ac = saved_hgvs_coding.ac + # If it's in the selected tx dict, keep it + if tx_ac.split('.')[0] in select_transcripts_dict.keys(): + pass + # If not get rid of it! + else: + continue + + # Get orientation of the gene wrt genome and a list of exons mapped to the genome + ori = va_func.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + alt_aln_method=alt_aln_method, hdp=hdp) + orientation = int(ori[0]['alt_strand']) + intronic_variant = 'false' + + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding + + try: + intron_test = hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'hard_fail': + if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', + str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + # If exonic, process + if intronic_variant != 'true': + # map form reverse normalized g. to c. + hgvs_from_5n_g = no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) + + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + disparity_deletion_in = ['false', 'false'] + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + pass + else: + pass + + try: + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError: + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_genomic_5pr, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + tx_hgvs_not_delins = saved_hgvs_coding + + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +ve base and adjust + if (re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) or re.search( + '\-', str(rn_tx_hgvs_not_delins.posedit.pos.end))): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + exceptPass() + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + exceptPass() + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + hgvs_stash_t = vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) + if len(stash_hgvs_not_delins.posedit.edit.ref) > len( + hgvs_stash_t.posedit.edit.ref): + try: + hn.normalize(hgvs_stash_t) + except: + exceptPass() + else: + gap_length = len(stash_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_stash_t.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + try: + tx_hgvs_not_delins = vm.c_to_n(hgvs_stash_t) + except: + tx_hgvs_not_delins = hgvs_stash_t + hgvs_not_delins = stash_hgvs_not_delins + elif hgvs_stash_t.posedit.pos.start.offset != 0 or hgvs_stash_t.posedit.pos.end.offset != 0: + disparity_deletion_in = ['transcript', 'Requires Analysis'] + try: + tx_hgvs_not_delins = vm.c_to_n(hgvs_stash_t) + except: + tx_hgvs_not_delins = hgvs_stash_t + hgvs_not_delins = stash_hgvs_not_delins + hgvs_genomic_5pr = stash_hgvs_not_delins + else: + pass + + # Final sanity checks + try: + vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + hgvs_not_delins = saved_hgvs_coding + disparity_deletion_in = ['false', 'false'] + logger.warning(str(e)) + try: + hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_not_delins = saved_hgvs_coding + disparity_deletion_in = ['false', 'false'] + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + logger.warning(error) + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search('\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) + except: + exceptPass() + genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match('\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + exceptPass() + + if re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + c2 = vm.g_to_t(g2, c2.ac) + reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] + alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] + c3 = copy.deepcopy(c1) + c3.posedit.pos.end = c2.posedit.pos.end + c3.posedit.edit.ref = '' # reference + c3.posedit.edit.alt = alternate + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + # Set warning variables + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + hgvs_refreshed_variant = tx_hgvs_not_delins + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' + else: + # Try the push + hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) + stash_ac = hgvs_stash.ac + # Make a hard left and hard right not delins g. + stash_dict_right = va_H2V.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) + stash_pos_right = int(stash_dict_right['pos']) + stash_ref_right = stash_dict_right['ref'] + stash_alt_right = stash_dict_right['alt'] + stash_end_right = str(stash_pos_right + len(stash_ref_right) - 1) + stash_hgvs_not_delins_right = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) + stash_dict_left = va_H2V.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, + reverse_normalizer, sf) + stash_pos_left = int(stash_dict_left['pos']) + stash_ref_left = stash_dict_left['ref'] + stash_alt_left = stash_dict_left['alt'] + stash_end_left = str(stash_pos_left + len(stash_ref_left) - 1) + stash_hgvs_not_delins_left = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos_left) + '_' + stash_end_left + 'del' + stash_ref_left + 'ins' + stash_alt_left) + # Map in-situ to the transcript left and right + try: + tx_hard_right = vm.g_to_t(stash_hgvs_not_delins_right, saved_hgvs_coding.ac) + except Exception as e: + tx_hard_right = saved_hgvs_coding + else: + normalize_stash_right = hn.normalize(stash_hgvs_not_delins_right) + if str(normalize_stash_right.posedit) == str(stash_hgvs_not_delins.posedit): + tx_hard_right = saved_hgvs_coding + try: + tx_hard_left = vm.g_to_t(stash_hgvs_not_delins_left, saved_hgvs_coding.ac) + except Exception as e: + tx_hard_left = saved_hgvs_coding + else: + normalize_stash_left = hn.normalize(stash_hgvs_not_delins_left) + if str(normalize_stash_left.posedit) == str(stash_hgvs_not_delins.posedit): + tx_hard_left = saved_hgvs_coding + # The Logic - Currently limited to genome gaps + if len(stash_hgvs_not_delins_right.posedit.edit.ref) < len( + tx_hard_right.posedit.edit.ref): + tx_hard_right = hn.normalize(tx_hard_right) + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + hgvs_refreshed_variant = tx_hard_right + gapped_transcripts = gapped_transcripts + str(tx_hard_right.ac) + ' ' + elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len( + tx_hard_left.posedit.edit.ref): + tx_hard_left = hn.normalize(tx_hard_left) + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + hgvs_refreshed_variant = tx_hard_left + gapped_transcripts = gapped_transcripts + str(tx_hard_left.ac) + ' ' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = saved_hgvs_coding + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = evm.n_to_c(hgvs_refreshed_variant) + else: + pass + try: + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + hgvs_refreshed_variant.posedit.edit.alt[-1]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 0:-1] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 0:-1] + hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[0] == \ + hgvs_refreshed_variant.posedit.edit.alt[0]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 1:] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 1:] + hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + pass + exceptPass() + # Send to empty nw_rel_var + nw_rel_var.append(hgvs_refreshed_variant) + + # Otherwise these variants need to be set + else: + corrective_action_taken = '' + gapped_alignment_warning = '' + # Send to empty nw_rel_var + nw_rel_var.append(saved_hgvs_coding) + + # Warn the user that the g. description is not valid + if gapped_alignment_warning != '': + if disparity_deletion_in[0] == 'transcript': + corrective_action_taken = 'Automap has deleted ' + str( + disparity_deletion_in[1]) + ' bp from chromosomal reference sequence ' + str( + hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s)' + gapped_transcripts + if disparity_deletion_in[0] == 'chromosome': + corrective_action_taken = 'Automap has added ' + str( + disparity_deletion_in[1]) + ' bp to chromosomal reference sequence ' + str( + hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s) ' + gapped_transcripts + + # Add additional data to the front of automap + if auto_info != '': + automap = auto_info + '\n' + automap + + rel_var = copy.deepcopy(nw_rel_var) + + # Set the values and append to batch_list + for c_description in rel_var: + query = {'quibble': str(c_description), 'id': validation['id'], + 'warnings': validation['warnings'], 'description': '', 'coding': '', + 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(query) + logger.warning("Continue reached when mapping transcript types to variants") + # Call next description + continue + # TYPE = :c. + + if type == ':c.' or type == ':n.': + + # Flag for validation + valid = 'false' + # Collect information for genomic level validation + obj = hp.parse_hgvs_variant(variant) + + tx_ac = obj.ac + + # Do we keep it? + if select_transcripts != 'all': + if tx_ac in select_transcripts_dict_plus_version.keys(): + pass + # If not get rid of it! + else: + # By marking it as Do Not Write and continuing through the validation loop + validation['write'] = 'false' + continue + else: + pass + + # Set a cross_variant object + cross_variant = 'false' + # Se rec_var to '' so it can be updated later + rec_var = '' + try: + to_g = va_func.myevm_t_to_g(obj, hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, nr_vm) + genomic_ac = to_g.ac + except hgvs.exceptions.HGVSDataNotAvailableError as e: + if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))) or re.match( + "No relevant genomic mapping options available", str(e)): + reason = 'Unable to map the input variant onto a genomic position' + if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))): + error_list = str(e).split('~')[:-1] + combos = [ + 'Full alignment data between the specified transcript reference sequence and all GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are not available: Consequently the input variant description cannot be fully validated and is not supported: Use the Gene to Transcripts function to determine whether an updated transcript reference sequence is available'] # Partial alignment data is available for the following genomic reference sequences: '] + error = '; '.join(combos) + error = error.replace(': ;', ': ') + else: + error = str(e) + error = error + ': Consequently the input variant description cannot be fully validated and is not supported: Use the Gene to Transcripts function to determine whether an updated transcript reference sequence is available' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + except: + gene_symbol = 'none' + if gene_symbol == 'none': + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + else: + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except TypeError as e: + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + except: + gene_symbol = 'none' + if gene_symbol == 'none': + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + else: + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + # Get orientation of the gene wrt genome and a list of exons mapped to the genome + ori = va_func.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=alt_aln_method, hdp=hdp) + orientation = int(ori[0]['alt_strand']) + intronic_variant = 'false' + + # Collect variant sequence information via normalisation (normalizer) or if intronic via mapping + # INTRONIC OFFSETS - Required for Exon table + # Variable to collect offset to exon boundary + ex_offset = 0 + plus = re.compile("\d\+\d") # finds digit + digit + minus = re.compile("\d\-\d") # finds digit - digit + + geno = re.compile(':g.') + if plus.search(input) or minus.search(input): + es = re.compile('error') + if es.search(str(to_g)): + if alt_aln_method != 'genebuild': + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Insertions at exon boundaries are miss-handled by vm.g_to_t + if ( + obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( + obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): + variant = str(obj) + else: + # Normalize was I believe to replace ref. Mapping does this anyway + # to_g = hn.normalize(to_g) + variant = str(va_func.myevm_g_to_t(evm, to_g, tx_ac)) + tx_ac = '' + + elif geno.search(input): + if plus.search(variant) or minus.search(variant): + to_g = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, nr_vm) + es = re.compile('error') + if es.search(str(to_g)): + if alt_aln_method != 'genebuild': + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + # Insertions at exon boundaries are miss-handled by vm.g_to_t + if ( + obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( + obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): + variant = str(obj) + else: + # Normalize was I believe to replace ref. Mapping does this anyway + # to_g = hn.normalize(to_g) + variant = str(va_func.myevm_g_to_t(evm, to_g, tx_ac)) + tx_ac = '' + + else: + # Normalize the variant + error = 'false' + try: + h_variant = hn.normalize(obj) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Unsupported normalization of variants spanning the exon-intron boundary', + error): + h_variant = obj + variant = variant + caution = 'This coding sequence variant description spans at least one intron' + automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + automap) + logger.warning(str(caution) + ": " + str(automap)) + else: + variant = str(h_variant) + + tx_ac = '' + # Create a crosser (exon boundary crossed) variant + crossed_variant = str(evm._maybe_normalize(obj)) + if variant == crossed_variant: + cross_variant = 'false' + else: + hgvs_crossed_variant = evm._maybe_normalize(obj) + cross_variant = [ + "Coding sequence allowing for exon boundary crossing (default = no crossing)", + crossed_variant, hgvs_crossed_variant.ac] + cr_available = 'true' + + # control of cross_variant + if boundary == 'false': + cross_variant = 'false' + + error = va_func.validate(variant, hp=hp, vr=vr) + if error == 'false': + valid = 'true' + else: + excep = "%s -- %s -- %s\n" % (time.ctime(), error, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + # Tackle the plus intronic offset + cck = 'false' + if (plus.search(input)): + # Regular expression catches the start of the interval only based on .00+00 pattern + inv_start = re.compile("\.\d+\+\d") + if (inv_start.search(input)): + # Find pattern e.g. +0000 and assign to a variable + off_value = re.search(r"(\+\d+)", input) + off_value = off_value.group(1) + # Integerise the value and assign to ex_offset + ex_offset = int(off_value) + cck = 'true' + if (minus.search(input)): + # Regular expression catches the start of the interval only based on .00-00 pattern + inv_start = re.compile("\.\d+\-\d") + if (inv_start.search(input)): + # Find pattern e.g. -0000 and assign to a variable + off_value = re.search(r"(\-\d+)", input) + off_value = off_value.group(1) + # Integerise the value and assign to ex_offset + ex_offset = int(off_value) + cck = 'true' + + # COORDINATE CHECKER + # hgvs will handle incorrect coordinates so need to automap errors + # Make sure any input intronic coordinates are correct + # Get the desired transcript + pat_r = re.compile(':r.') + pat_g = re.compile(':g.') + if cck == 'true': + dl = re.compile('del') + # This should only ever hit coding and RNA variants + if dl.search(variant): + # RNA + if pat_r.search(trapped_input): + + coding = va_func.coding(variant, hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, + nr_vm) + # genome back to C coordinates + post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + + test = hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + # Change to rna variant + posedit = query.posedit + posedit = posedit.lower() + query.posedit = posedit + query.type = 'r' + post_var = str(query) + automap = trapped_input + ' automapped to ' + str(post_var) + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions[1])) + try: + tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest( + path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + logger.warning(str(error)) + continue + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', + 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(query) + + # Coding + else: + coding = va_func.coding(variant, hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = hp.parse_hgvs_variant(variant) + try: + pre_var = va_func.myevm_t_to_g(pre_var, hdp, no_norm_evm, primary_assembly, vm, hp, + hn, sf, nr_vm) + except: + e = sys.exc_info()[1] + error = str(e) + reason = 'Input coordinates may be invalid' + if error == 'expected from_start_i <= from_end_i': + error = 'Automap is unable to correct the input exon/intron boundary coordinates, please check your variant description' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + else: + exceptPass() + else: + exceptPass() + # genome back to C coordinates + try: + post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + except hgvs.exceptions.HGVSError as error: + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + query = post_var + test = hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + automap = trapped_input + ' automapped to ' + str(post_var) + validation['warnings'] = str(validation['warnings']) + str(caution) + ': ' + str( + automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions[1])) + try: + tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest( + path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', + 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(query) + + else: + if pat_r.search(trapped_input): + coding = va_func.coding(variant, hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, + nr_vm) + # genome back to C coordinates + post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + + test = hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + # Change to rna variant + posedit = query.posedit + posedit = posedit.lower() + query.posedit = posedit + query.type = 'r' + post_var = str(query) + automap = input + ' automapped to ' + post_var + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', + 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(query) + + else: + coding = va_func.coding(variant, hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, + nr_vm) + # genome back to C coordinates + post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + + test = hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + automap = str(trapped_input) + ' automapped to ' + str(post_var) + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + reason = 'Cannot currently display the required information:' + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', + 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(query) + + + # If cck not true + elif pat_r.search(trapped_input): + # set input hgvs object + hgvs_rna_input = hp.parse_hgvs_variant( + trapped_input) # Traps the hgvs variant of r. for further use + inp = str(va_func.hgvs_r_to_c(hgvs_rna_input)) + # Regex + plus = re.compile("\d\+\d") # finds digit + digit + minus = re.compile("\d\-\d") # finds digit - digit + if plus.search(input) or minus.search(input): + to_g = va_func.genomic(inp, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, nr_vm) + es = re.compile('error') + if es.search(str(to_g)): + if alt_aln_method != 'genebuild': + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set variants pre and post genomic norm + hgvs_inp = va_func.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) + to_g = hn.normalize(to_g) + hgvs_otp = va_func.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) + tx_ac = '' + else: + # Set variants pre and post RNA norm + hgvs_inp = hp.parse_hgvs_variant(inp) + try: + hgvs_otp = hn.normalize(hgvs_inp) + except hgvs.exceptions.HGVSError as e: + hgvs_otp = hgvs_inp + tx_ac = '' + + # Set remaining variables + redit = str(hgvs_otp.posedit.edit) + redit = redit.lower() + hgvs_otp.posedit.edit = redit + otp = str(hgvs_otp) + query = str(hgvs_otp.posedit.pos) + test = str(hgvs_inp.posedit.pos) + query = query.replace('T', 'U') + query = query.replace('ENSU', 'ENST') + test = test.replace('T', 'U') + test = test.replace('ENSU', 'ENST') + output = otp.replace(':c.', ':r.') + # Apply coordinates test + if query != test: + caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' + automap = 'Automap has corrected the variant description' + # automapping of variant completed + automap = trapped_input + ' automapped to ' + output + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str(automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(output) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', + 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, + 'order': ordering} + batch_list.append(query) + + elif pat_g.search(input): + pass + + else: + query = hp.parse_hgvs_variant(variant) + test = hp.parse_hgvs_variant(input) + if query.posedit.pos != test.posedit.pos: + caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' + automap = 'Automap has corrected the variant description' + # automapping of variant completed + automap = str(test) + ' automapped to ' + str(query) + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str(automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(query) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + reason = 'Cannot currently display the required information:' + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', + 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, + 'order': ordering} + batch_list.append(query) + + # VALIDATION of intronic variants + pre_valid = hp.parse_hgvs_variant(input) + post_valid = hp.parse_hgvs_variant(variant) + if valid == 'false': + error = 'false' + genomic_validation = str( + va_func.genomic(input, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, nr_vm)) + del_end = re.compile('\ddel$') + delins = re.compile('delins') + inv = re.compile('inv') + if valstr(pre_valid) != valstr(post_valid): + if type != ':g.': + if caution == '': + caution = valstr(pre_valid) + ' automapped to ' + valstr(post_valid) + else: + pass + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + logger.warning(str(caution)) + else: + pass + else: + pass + + # Apply validation to intronic variant descriptions (should be valid but make sure) + error = va_func.validate(genomic_validation, hp=hp, vr=vr) + if error == 'false': + valid = 'true' + else: + + excep = "%s -- %s -- %s\n" % (time.ctime(), error, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + + if valid == 'true': + var_tab = 'true' + cores = "HGVS-compliant variant descriptions" + warning + + # v0.1a1 edit + if valstr(pre_valid) != valstr(post_valid): + if type == ':g.': + if caution == '': + caution = valstr(pre_valid) + ' automapped to ' + valstr(post_valid) + else: + pass + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + else: + pass + else: + pass + + # COLLECT VARIANT DESCRIPTIONS + ############################## + + # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC + hgvs_coding = va_func.coding(variant, hp) + boundary = re.compile('exon-intron boundary') + spanning = re.compile('exon/intron') + + try: + hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSError as e: + error = str(e) + + # Gap compensating code status + gap_compensation = True + + # Gap gene black list + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + except Exception: + exceptPass() + else: + # If the gene symbol is not in the list, the value False will be returned + gap_compensation = gapGenes.gap_black_list(gene_symbol) + + # Intron spanning variants + if re.search('boundary', str(error)) or re.search('spanning', str(error)): + try: + hgvs_coding = evm._maybe_normalize(hgvs_coding) + gap_compensation = False + except hgvs.exceptions.HGVSError as error: + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + pass + + # Warn status + logger.warning("gap_compensation_1 = " + str(gap_compensation)) + coding = valstr(hgvs_coding) + + # RNA sequence + hgvs_rna = copy.deepcopy(hgvs_coding) + hgvs_rna = va_func.hgvs_c_to_r(hgvs_rna) + rna = str(hgvs_rna) + + # Genomic sequence + hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding, hdp, no_norm_evm, primary_assembly, vm, hp, hn, + sf, nr_vm) + final_hgvs_genomic = hgvs_genomic + + # genomic_possibilities + # 1. take the simple 3 pr normalized hgvs_genomic + # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome + hgvs_genomic_possibilities = [] + + # Loop out gap finding code under these circumstances! + if gap_compensation is True: + logger.warning('g_to_t gap code 1 active') + rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) + hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if orientation != -1: + try: + chromosome_normalized_hgvs_coding = reverse_normalizer.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding + else: + try: + chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding + + most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, + hgvs_genomic.ac, no_norm_evm, vm, hp, hn, sf, + nr_vm) + hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + + # Push from side to side to try pick up odd placements + # MAKE A NO NORM HGVS2VCF + # First to the right + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = va_H2V.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + exceptPass() + # Store a tx copy for later use + test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) + stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, no_norm_evm, + vm, hp, hn, sf, nr_vm) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_right.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + stash_tx_right = test_stash_tx_right + if hasattr(test_stash_tx_right.posedit.edit, + 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + alt = test_stash_tx_right.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_right.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_right).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + try: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + hn.normalize(test_stash_tx_right) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + test_stash_tx_right = copy.deepcopy(hgvs_coding) + exceptPass() + # Intronic positions not supported. Will cause a Value Error + except ValueError: + test_stash_tx_right = copy.deepcopy(hgvs_coding) + exceptPass() + + # Then to the left + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = va_H2V.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, + sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + exceptPass() + # Store a tx copy for later use + test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) + stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, no_norm_evm, + vm, hp, hn, sf, nr_vm) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_left.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + stash_tx_left = test_stash_tx_left + if hasattr(test_stash_tx_left.posedit.edit, + 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + alt = test_stash_tx_left.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + + if (len(alt) - ( + test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_left.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_left).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + try: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + hn.normalize(test_stash_tx_left) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + test_stash_tx_left = copy.deepcopy(hgvs_coding) + exceptPass() + except ValueError: + test_stash_tx_left = copy.deepcopy(hgvs_coding) + exceptPass() + + # direct mapping from reverse_normalized transcript insertions in the delins format + try: + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the variant ref and alt + pr3_ref = sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + # Normalize - If the variant spans a gap it should then form a static genomic variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + try: + genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) + + if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) + if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) + + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + pass + + logger.info('\nGENOMIC POSSIBILITIES') + for possibility in hgvs_genomic_possibilities: + if possibility == '': + logger.info('X') + else: + logger.info(valstr(possibility)) + + logger.info('\n') + + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' + + # Mark as not disparity detected + disparity_deletion_in = ['false', 'false'] + + # Loop through to see if a gap can be located + # Set the variables required for corrective normalization + possibility_counter = 0 + suppress_c_normalization = 'false' # Applies to boundary crossing normalization + + # Copy a version of hgvs_genomic_possibilities + for possibility in hgvs_genomic_possibilities: + possibility_counter = possibility_counter + 1 + + # Loop out stash possibilities which will not spot gaps so are empty + if possibility == '': + continue + + # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + hgvs_genomic_variant = copy.deepcopy(possibility) + stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + try: + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error): + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) + if re.search('insertion length must be 1', error): + if hgvs_genomic.posedit.edit.type == 'ins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start, end) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # Store a copy for later use + stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + + # Create VCF + vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, + reverse_normalizer, sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Look for exonic gaps within transcript or chromosome + no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # Store a not real deletion insertion to test for gapping + stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + v = [chr, pos, ref, alt] + + # Detect intronic variation using normalization + intronic_variant = 'false' + + # Save a copy of current hgvs_coding + try: + saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + saved_hgvs_coding = hgvs_coding + intronic_variant = 'true' + continue + else: + saved_hgvs_coding = no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + hgvs_coding.ac) + + # Look for normalized variant options that do not match hgvs_coding + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + try: + intron_test = hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'hard_fail': + if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', + str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'true': + # Flag RefSeqGene for ammendment + # amend_RefSeqGene = 'false' + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + pass + else: + pass + try: + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError: + tx_hgvs_not_delins = no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + saved_hgvs_coding.ac) + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + + # Check for +1 base and adjust + if re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + '\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + pass + + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, + nr_vm) + + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + + # tx_hgvs_not_delins = rn_tx_hgvs_not_delins + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + '\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + pass + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for internal_possibility in hgvs_genomic_possibilities: + if internal_possibility == '': + continue + + hgvs_t_possibility = vm.g_to_t(internal_possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) + except: + exceptPass() + ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) + except: + exceptPass() + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if internal_possibility.posedit.edit.type == 'ins': + ins_ref = sf.fetch_seq(internal_possibility.ac, + internal_possibility.posedit.pos.start.base - 1, + internal_possibility.posedit.pos.end.base) + internal_possibility.posedit.edit.ref = ins_ref + internal_possibility.posedit.edit.alt = ins_ref[ + 0] + internal_possibility.posedit.edit.alt + \ + ins_ref[1] + + if len(hgvs_t_possibility.posedit.edit.ref) < len( + internal_possibility.posedit.edit.ref): + gap_length = len(internal_possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, + hgvs_t_possibility] + hgvs_not_delins = internal_possibility + hgvs_genomic_5pr = internal_possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # 'At hgvs_genomic' + # Final sanity checks + try: + vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + continue + try: + hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + continue + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # amend_RefSeqGene = 'false' + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Find oddly placed gaps where the tx variant is encompassed in the gap + if disparity_deletion_in[0] == 'false' and ( + possibility_counter == 3 or possibility_counter == 4): + rg = reverse_normalizer.normalize(hgvs_not_delins) + rtx = vm.g_to_t(rg, tx_hgvs_not_delins.ac) + fg = hn.normalize(hgvs_not_delins) + ftx = vm.g_to_t(fg, tx_hgvs_not_delins.ac) + if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + exons = hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) + exonic = False + for ex_test in exons: + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): + exonic = True + if exonic is True: + hgvs_not_delins = fg + hgvs_genomic = fg + hgvs_genomic_5pr = fg + try: + tx_hgvs_not_delins = vm.c_to_n(ftx) + except Exception: + tx_hgvs_not_delins = ftx + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup) + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + # Suppress intron boundary crossing due to non-intron intron based c. seq annotations + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + '\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + '\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) + except: + exceptPass() + genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, + 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match('\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + exceptPass() + + if re.search('\+', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\+', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( + tx_hgvs_not_delins.posedit.edit.ref) - 1 + hgvs_refreshed_variant = tx_hgvs_not_delins + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' + if possibility_counter == 3: + hgvs_refreshed_variant = stash_tx_right + elif possibility_counter == 4: + hgvs_refreshed_variant = stash_tx_left + else: + hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = hgvs_coding + # amend_RefSeqGene = 'false' + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) + else: + pass + + try: + hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + logger.warning(error) + continue + + # Quick check to make sure the coding variant has not changed + try: + to_test = hn.normalize(hgvs_refreshed_variant) + except: + to_test = hgvs_refreshed_variant + if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # Try the next available genomic option + if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + hgvs_coding = to_test + else: + continue + # Update hgvs_genomic + hgvs_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, + no_norm_evm, vm, hp, hn, sf, nr_vm) + if hgvs_genomic.posedit.edit.type == 'identity': + re_c = vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) + if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): + shuffle_left_g = copy.copy(hgvs_genomic) + shuffle_left_g.posedit.edit.ref = '' + shuffle_left_g.posedit.edit.alt = '' + shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) + re_c = vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): + hgvs_genomic = shuffle_left_g + + # If it is intronic, these vairables will not have been set + else: + # amend_RefSeqGene = 'false' + no_normalized_c = 'false' + + # Break if gap has been detected + if disparity_deletion_in[0] != 'false': + break + + # Warn user about gapping + if auto_info != '': + info_lines = auto_info.split('\n') + info_keys = {} + for information in info_lines: + info_keys[information] = '' + info_out = [] + info_out.append( + 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + primary_assembly) + for ky in info_keys.keys(): + info_out.append(ky) + auto_info = '\n'.join(info_out) + auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' + auto_info = str(auto_info.replace('\n', ': ')) + validation['warnings'] = validation['warnings'] + ': ' + str(auto_info) + logger.warning(str(auto_info)) + # Normailse hgvs_genomic + try: + hgvs_genomic = hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + + if re.search('base start position must be <= end position', error) and \ + disparity_deletion_in[0] == 'chromosome': + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = hn.normalize(hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = hn.normalize(hgvs_genomic) + genomic = valstr(hgvs_genomic) + + else: + stored_hgvs_genomic_variant = hgvs_genomic + suppress_c_normalization = 'false' + gapped_alignment_warning = '' + auto_info = '' + genomic = valstr(hgvs_genomic) + + # Create pseudo VCF based on amended hgvs_genomic + hgvs_genomic_variant = hgvs_genomic + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + + # Create vcf + vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, + reverse_normalizer, sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Create a VCF call + vcf_component_list = [str(chr), str(pos), str(ref), (alt)] + vcf_genomic = '-'.join(vcf_component_list) + + # DO NOT DELETE + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # DO NOT DELETE + stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + + # Apply gap code to re-format hgvs_coding + # Store the current hgvs:c. description + saved_hgvs_coding = copy.deepcopy(hgvs_coding) + + # Get orientation of the gene wrt genome and a list of exons mapped to the genome + ori = va_func.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + alt_aln_method=alt_aln_method, hdp=hdp) + orientation = int(ori[0]['alt_strand']) + + # Look for normalized variant options that do not match hgvs_coding + hgvs_genomic = copy.deepcopy(hgvs_genomic_variant) + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding + + # Loop out gap finding code under these circumstances! + logger.warning("gap_compensation_2 = " + str(gap_compensation)) + if gap_compensation is True: + logger.warning('g_to_t gap code 2 active') + # is it in an exon? + is_it_in_an_exon = 'no' + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + # Take from stored copy + # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + is_it_in_an_exon = 'yes' + if is_it_in_an_exon == 'yes': + # map form reverse normalized g. to c. + hgvs_from_5n_g = no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) + + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + disparity_deletion_in = ['false', 'false'] + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + pass + else: + pass + + hard_fail = 'false' + try: + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + tx_hgvs_not_delins = hgvs_coding + hard_fail = 'true' + + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +ve base and adjust + if re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + exceptPass() + + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + exceptPass() + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for internal_possibility in hgvs_genomic_possibilities: + + if internal_possibility == '': + continue + + hgvs_t_possibility = vm.g_to_t(internal_possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) + except: + exceptPass() + ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) + except: + exceptPass() + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if internal_possibility.posedit.edit.type == 'ins': + ins_ref = sf.fetch_seq(internal_possibility.ac, + internal_possibility.posedit.pos.start.base - 1, + internal_possibility.posedit.pos.end.base) + internal_possibility.posedit.edit.ref = ins_ref + internal_possibility.posedit.edit.alt = ins_ref[ + 0] + internal_possibility.posedit.edit.alt + \ + ins_ref[1] + + if len(hgvs_t_possibility.posedit.edit.ref) < len( + internal_possibility.posedit.edit.ref): + gap_length = len(internal_possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] + hgvs_not_delins = internal_possibility + hgvs_genomic_5pr = internal_possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # Final sanity checks + try: + vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + logger.warning(str(e)) + continue + try: + hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + logger.warning(error) + continue + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + if hard_fail == 'true': + disparity_deletion_in = ['false', 'false'] + + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search('\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) + except: + exceptPass() + genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match('\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + exceptPass() + + if re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # GAP IN THE CHROMOSOME + + elif disparity_deletion_in[0] == 'chromosome': + # Set warning variables + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + hgvs_refreshed_variant = tx_hgvs_not_delins + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = saved_hgvs_coding + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = evm.n_to_c(hgvs_refreshed_variant) + else: + pass + try: + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + hgvs_refreshed_variant.posedit.edit.alt[-1]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 0:-1] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 0:-1] + hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[0] == \ + hgvs_refreshed_variant.posedit.edit.alt[0]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 1:] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 1:] + hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + pass + + # Sort out equality to equality c. events where the code will add 2 additional bases + if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): + pass + else: + hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) + coding = valstr(hgvs_coding) + variant = coding + + # OBTAIN THE RefSeqGene coordinates + # Attempt 1 = UTA + sequences_for_tx = hdp.get_tx_mapping_options(hgvs_coding.ac) + recovered_rsg = [] + + for sequence in sequences_for_tx: + if re.match('^NG_', sequence[1]): + recovered_rsg.append(sequence[1]) + recovered_rsg.sort() + recovered_rsg.reverse() + + if 'NG_' in recovered_rsg: + refseqgene_ac = recovered_rsg + else: + refseqgene_ac = '' + + # Given the difficulties with mapping to and from RefSeqGenes, we now solely rely on UTA + if refseqgene_ac != '': + hgvs_refseq = vm.t_to_g(hgvs_coding, refseqgene_ac) + # Normalize the RefSeqGene Variant to the correct position + try: + hgvs_refseq = hn.normalize(hgvs_refseq) + except Exception as e: + # if re.search('insertion length must be 1', error): + hgvs_refseq = 'RefSeqGene record not available' + refseq = 'RefSeqGene record not available' + hgvs_refseq_ac = 'RefSeqGene record not available' + pass + else: + refseq = valstr(hgvs_refseq) + hgvs_refseq_ac = hgvs_refseq.ac + else: + hgvs_refseq = 'RefSeqGene record not available' + refseq = 'RefSeqGene record not available' + hgvs_refseq_ac = 'RefSeqGene record not available' + + # Predicted effect on protein + protein_dict = va_func.myc_to_p(hgvs_coding, evm, hdp, hp, hn, vm, sf, re_to_p=False) + if protein_dict['error'] == '': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + error = protein_dict['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + logger.error(error) + continue + + # Gene orientation wrt genome + ori = va_func.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, + alt_aln_method=alt_aln_method, hdp=hdp) + ori = int(ori[0]['alt_strand']) + + # Look for normalized variant options that do not match hgvs_coding + # boundary crossing normalization + # Re-Save the required variants + hgvs_seek_var = copy.deepcopy(hgvs_coding) + saved_hgvs_coding = copy.deepcopy(hgvs_coding) + + if ori == -1: + # position genomic at its most 5 prime position + try: + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif suppress_c_normalization == 'true': + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + try: + automap = valstr(saved_hgvs_coding) + ' normalized to ' + valstr(hgvs_seek_var) + hgvs_coding = hgvs_seek_var + coding = valstr(hgvs_coding) + validation['warnings'] = validation['warnings'] + ': ' + automap + rng = hn.normalize(query_genomic) + except NotImplementedError: + pass + try: + c_for_p = vm.g_to_t(rng, hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + c_for_p = seek_var + try: + # Predicted effect on protein + protein_dict = va_func.myc_to_p(c_for_p, evm, hdp, hp, hn, vm, sf, re_to_p=False) + if protein_dict['error'] == '': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + error = protein_dict['error'] + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': + hgvs_protein = protein_dict['hgvs_protein'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + # Replace protein description in vars table + protein = str(hgvs_protein) + except NotImplementedError: + exceptPass() + else: + # Double check protein position by normalize genomic, and normalize back to c. for normalize or not to normalize issue + coding = valstr(hgvs_coding) + + elif ori != -1: + # position genomic at its most 3 prime position + try: + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif suppress_c_normalization == 'true': + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + try: + automap = valstr(saved_hgvs_coding) + ' normalized to ' + valstr(hgvs_seek_var) + hgvs_coding = hgvs_seek_var + coding = valstr(hgvs_coding) + validation['warnings'] = validation['warnings'] + ': ' + automap + except NotImplementedError: + exceptPass() + else: + # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue + coding = valstr(hgvs_coding) + rng = reverse_normalizer.normalize(query_genomic) + try: + # Diagram where - = intron and E = Exon + + # 3 prime + # ---------EEEEEEEEEEEEEEEEE----------- + # < + # Result, normalize of new variant will baulk at intronic + # 5 prime + # < + # Result, normalize of new variant will be happy + c_for_p = vm.g_to_t(rng, hgvs_coding.ac) + try: + hn.normalize(c_for_p) + except hgvs.exceptions.HGVSError as e: + exceptPass() + else: + # hgvs_protein = va_func.protein(str(c_for_p), evm, hp) + protein_dict = va_func.myc_to_p(c_for_p, evm, hdp, hp, hn, vm, sf, + re_to_p=False) + if protein_dict['error'] == '': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + error = protein_dict['error'] + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': + hgvs_protein = protein_dict['hgvs_protein'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + # Replace protein description in vars table + protein = str(hgvs_protein) + except Exception: + exceptPass() + + # Check for up-to-date transcript version + updated_transcript_variant = 'None' + tx_id_info = hdp.get_tx_identity_info(hgvs_coding.ac) + uta_gene_symbol = tx_id_info[6] + tx_for_gene = hdp.get_tx_for_gene(uta_gene_symbol) + ac_root, ac_version = hgvs_coding.ac.split('.') + version_tracking = '0' + update = '' + for accession in tx_for_gene: + try: + if re.match(ac_root, accession[3]): + query_version = accession[3].split('.')[1] + if int(query_version) > int(ac_version) and int(query_version) > int( + version_tracking): + version_tracking = query_version + update = accession[3] + except ValueError: + exceptPass() + + if update != '': + hgvs_updated = copy.deepcopy(hgvs_coding) + hgvs_updated.ac = update + try: + vr.validate(hgvs_updated) + # Updated reference sequence + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('does not agree with reference sequence', str(error)): + match = re.findall('\(([GATC]+)\)', error) + new_ref = match[1] + hgvs_updated.posedit.edit.ref = new_ref + vr.validate(hgvs_updated) + updated_transcript_variant = hgvs_updated + else: + pass + updated_transcript_variant = hgvs_updated + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( + updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + valstr( + updated_transcript_variant) + + # Set the data + set_output_type_flag = 'gene' + validation['description'] = hgnc_gene_info + validation['coding'] = str(hgvs_coding) + validation['genomic_r'] = str(hgvs_refseq) + validation['genomic_g'] = str(hgvs_genomic) + validation['protein'] = str(hgvs_protein) + validation['primary_assembly'] = primary_assembly + if gap_compensation is True: + validation['test_stash_tx_left'] = test_stash_tx_left + validation['test_stash_tx_right'] = test_stash_tx_right + # finish timing + logger.traceEnd(validation) + # Report errors to User and VV admin + except KeyboardInterrupt: + raise + except: + set_output_type_flag = 'error' + error = 'Validation error' + validation['warnings'] = str(error) + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + tbk = [str(exc_type), str(exc_value), str(te)] + er = str('\n'.join(tbk)) + logger.error(str(exc_type) + " " + str(exc_value)) + logger.debug(er) + + continue + + # Outside the for loop + ###################### + logger.trace("End of for loop") + # order the rows + # from operator import itemgetter + by_order = sorted(batch_list, key=itemgetter('order')) + + for valid in by_order: + if 'write' in valid.keys(): + if valid['write'] == 'true': + # Blank VCF + # chr = '' + # pos = '' + # ref = '' + # alt = '' + + # Fromulate a json type response + dict_out = {} + + # Set gap compensation bool + gap_compensation = True + + # warngins + warnings = valid['warnings'] + warnings = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warnings) + warnings = re.sub('^: ', '', warnings) + warnings = re.sub('::', ':', warnings) + + # Submitted variant + submitted = valid['id'] + + # Genomic sequence variation + genomic_variant = valid['genomic_g'] + + # genomic accession + if genomic_variant != '': + hgvs_genomic_variant = hp.parse_hgvs_variant(genomic_variant) + genomic_variant = valstr(hgvs_genomic_variant) + genomic_accession = hgvs_genomic_variant.ac + else: + genomic_accession = '' + + # RefSeqGene variation + refseqgene_variant = valid['genomic_r'] + refseqgene_variant = refseqgene_variant.strip() + if re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant == '': + warnings = warnings + ': ' + refseqgene_variant + refseqgene_variant = '' + lrg_variant = '' + hgvs_refseqgene_variant = 'false' + else: + hgvs_refseqgene_variant = hp.parse_hgvs_variant(refseqgene_variant) + rsg_ac = va_dbCrl.data.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) + if rsg_ac[0] == 'none': + lrg_variant = '' + else: + hgvs_lrg = copy.deepcopy(hgvs_refseqgene_variant) + hgvs_lrg.ac = rsg_ac[0] + lrg_variant = valstr(hgvs_lrg) + if rsg_ac[1] == 'public': + pass + else: + warnings = warnings + ': The current status of ' + str( + hgvs_lrg.ac) + ' is pending therefore changes may be made to the LRG reference sequence' + + # Transcript sequence variation + tx_variant = valid['coding'] + if tx_variant != '': + if '(' in tx_variant and ')' in tx_variant: + tx_variant = tx_variant.split('(')[1] + tx_variant = tx_variant.replace(')', '') + + # transcript accession + hgvs_tx_variant = hp.parse_hgvs_variant(tx_variant) + tx_variant = valstr(hgvs_tx_variant) + hgvs_transcript_variant = hp.parse_hgvs_variant(tx_variant) + transcript_accession = hgvs_transcript_variant.ac + + # Handle LRG + lrg_status = 'public' + lrg_transcript = va_dbCrl.data.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) + if lrg_transcript == 'none': + lrg_transcript_variant = '' + else: + # Note - LRG availability is dependant on UTA containing the data. In some + # instances we will be able to display the LRG_tx without being able to + # display the LRG gene data + + # if not re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant != '': + # if hgvs_refseqgene_variant != 'RefSeqGene record not available' and hgvs_refseqgene_variant != 'false': + try: + hgvs_lrg_t = vm.g_to_t(hgvs_refseqgene_variant, transcript_accession) + hgvs_lrg_t.ac = lrg_transcript + lrg_transcript_variant = valstr(hgvs_lrg_t) + except: + if hgvs_transcript_variant.posedit.pos.start.offset == 0 and hgvs_transcript_variant.posedit.pos.end.offset == 0: + hgvs_lrg_t = copy.copy(hgvs_transcript_variant) + hgvs_lrg_t.ac = lrg_transcript + lrg_transcript_variant = valstr(hgvs_lrg_t) + else: + lrg_transcript_variant = '' + else: + transcript_accession = '' + lrg_transcript_variant = '' + + # Look for intronic variants + if transcript_accession != '' and genomic_accession != '': + # Remove del bases + str_transcript = valstr(hgvs_transcript_variant) + hgvs_transcript_variant = hp.parse_hgvs_variant(str_transcript) + try: + vr.validate(hgvs_transcript_variant) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('intronic variant', error): + genome_context_transcript_variant = genomic_accession + '(' + transcript_accession + '):c.' + str( + hgvs_transcript_variant.posedit) + if refseqgene_variant != '': + hgvs_refseqgene_variant = hp.parse_hgvs_variant(refseqgene_variant) + refseqgene_accession = hgvs_refseqgene_variant.ac + hgvs_coding_from_refseqgene = vm.g_to_t(hgvs_refseqgene_variant, + hgvs_transcript_variant.ac) + hgvs_coding_from_refseqgene = valstr(hgvs_coding_from_refseqgene) + hgvs_coding_from_refseqgene = hp.parse_hgvs_variant(hgvs_coding_from_refseqgene) + RefSeqGene_context_transcript_variant = refseqgene_accession + '(' + transcript_accession + '):c.' + str( + hgvs_coding_from_refseqgene.posedit.pos) + str( + hgvs_coding_from_refseqgene.posedit.edit) + else: + RefSeqGene_context_transcript_variant = '' + else: + genome_context_transcript_variant = '' # transcript_variant + RefSeqGene_context_transcript_variant = '' + else: + genome_context_transcript_variant = '' # transcript_variant + RefSeqGene_context_transcript_variant = '' + else: + genome_context_transcript_variant = '' + RefSeqGene_context_transcript_variant = '' + + # Protein description + predicted_protein_variant = valid['protein'] + if re.match('NP_', predicted_protein_variant): + rs_p, pred_prot_posedit = predicted_protein_variant.split(':') + lrg_p = va_dbCrl.data.get_lrgProteinID_from_RefSeqProteinID(rs_p) + if re.match('LRG', lrg_p): + predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit + + # Gene + if transcript_accession != '': + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(transcript_accession) + except: + gene_symbol = 'Unable to verify gene symbol for ' + str(transcript_accession) + else: + gene_symbol = '' + + # Transcript description + transcript_description = valid['description'] + + # Stashed variants + if 'test_stash_tx_left' not in validation: + pass + else: + test_stash_tx_left = validation['test_stash_tx_left'] + if 'test_stash_tx_right' not in validation: + pass + else: + test_stash_tx_right = validation['test_stash_tx_right'] + + # Multiple genomic variants + # multi_gen_vars = [] + if tx_variant != '': + hgvs_coding = hp.parse_hgvs_variant(str(tx_variant)) + # Gap gene black list + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + except Exception: + exceptPass() + else: + # If the gene symbol is not in the list, the value False will be returned + gap_compensation = gapGenes.gap_black_list(gene_symbol) + + # Look for variants spanning introns + try: + hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.search('boundary', str(error)) or re.search('spanning', str(error)): + gap_compensation = False + else: + pass + except hgvs.exceptions.HGVSError: + exceptPass() + + # Warn gap code status + logger.warning("gap_compensation_3 = " + str(gap_compensation)) + multi_g = [] + multi_list = [] + mapping_options = hdp.get_tx_mapping_options(hgvs_coding.ac) + for alt_chr in mapping_options: + if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', + alt_chr[1])) and \ + alt_chr[2] == alt_aln_method: + multi_list.append(alt_chr[1]) + + for alt_chr in multi_list: + try: + # Re set ori + ori = va_func.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, + alt_aln_method=alt_aln_method, hdp=hdp) + orientation = int(ori[0]['alt_strand']) + hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, vm, hp, hn, + sf, nr_vm) + # Set hgvs_genomic accordingly + hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) + + # genomic_possibilities + # 1. take the simple 3 pr normalized hgvs_genomic + # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome + hgvs_genomic_possibilities = [] + + # Loop out gap code under these circumstances! + if gap_compensation is True: + logger.warning('g_to_t gap code 3 active') + rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_alt_genomic) + hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if orientation != -1: + try: + chromosome_normalized_hgvs_coding = reverse_normalizer.normalize( + hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + chromosome_normalized_hgvs_coding = hgvs_coding + else: + try: + chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding + + most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, + alt_chr, + no_norm_evm, vm, hp, hn, sf, nr_vm) + hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + + # First to the right + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = va_H2V.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + exceptPass() + # Store a tx copy for later use + test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, + no_norm_evm, vm, hp, hn, sf, nr_vm) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_right.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + stash_tx_right = test_stash_tx_right + if hasattr(test_stash_tx_right.posedit.edit, + 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + alt = test_stash_tx_right.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_right.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_right).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + try: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + hn.normalize(test_stash_tx_right) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + exceptPass() + except ValueError: + exceptPass() + + # Then to the left + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = va_H2V.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, + reverse_normalizer, sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + exceptPass() + # Store a tx copy for later use + test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, + no_norm_evm, vm, hp, hn, sf, nr_vm) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_left.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + stash_tx_left = test_stash_tx_left + if hasattr(test_stash_tx_left.posedit.edit, + 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + alt = test_stash_tx_left.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_left.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_left).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + try: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + hn.normalize(test_stash_tx_left) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + exceptPass() + except ValueError: + exceptPass() + + # direct mapping from reverse_normalized transcript insertions in the delins format + try: + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the variant ref and alt + pr3_ref = sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + + # Normalize - If the variant spans a gap it should then form a static genomic variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + try: + genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) + + if len( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append( + genomic_from_most_3pr_hgvs_transcript_variant) + if len( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append( + genomic_from_most_5pr_hgvs_transcript_variant) + + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + pass + exceptPass() + + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' + + # Mark as not disparity detected + disparity_deletion_in = ['false', 'false'] + # Loop through to see if a gap can be located + possibility_counter = 0 + for possibility in hgvs_genomic_possibilities: + possibility_counter = possibility_counter + 1 + # Loop out stash possibilities which will not spot gaps so are empty + if possibility == '': + continue + + # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + hgvs_genomic_variant = possibility + stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + try: + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic_variant) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error): + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + if re.search('insertion length must be 1', error): + if hgvs_genomic.posedit.edit.type == 'ins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start, end) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # Store a copy for later use + stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + + # Make VCF + vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, + reverse_normalizer, sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Look for exonic gaps within transcript or chromosome + no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # Store a not real deletion insertion to test for gapping + stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + v = [chr, pos, ref, alt] + + # Save a copy of current hgvs_coding + try: + saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, + hgvs_coding.ac) + except Exception as e: + if str( + e) == 'start or end or both are beyond the bounds of transcript record': + saved_hgvs_coding = hgvs_coding + continue + + # Detect intronic variation using normalization + intronic_variant = 'false' + # Look for normalized variant options that do not match hgvs_coding + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + seek_var = valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + try: + intron_test = hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'hard_fail': + if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', + str( + hgvs_seek_var.posedit.pos)) or re.search( + '\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str( + hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'true': + # Flag RefSeqGene for ammendment + # amend_RefSeqGene = 'false' + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', + hgvs_genomic_5pr.posedit.edit.type) or re.search( + 'ins', hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and re.search('del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and re.search('del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + else: + pass + else: + pass + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + saved_hgvs_coding.ac) + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +1 base and adjust + if re.search('\+', + str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + '\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + exceptPass() + + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search('\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + '\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + exceptPass() + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + exceptPass() + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for possibility in hgvs_genomic_possibilities: + if possibility == '': + continue + hgvs_t_possibility = vm.g_to_t(possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) + except: + continue + if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: + continue + ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) + except: + continue + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if possibility.posedit.edit.type == 'ins': + ins_ref = sf.fetch_seq(possibility.ac, + possibility.posedit.pos.start.base - 1, + possibility.posedit.pos.end.base) + possibility.posedit.edit.ref = ins_ref + possibility.posedit.edit.alt = ins_ref[ + 0] + possibility.posedit.edit.alt + \ + ins_ref[1] + if len(hgvs_t_possibility.posedit.edit.ref) < len( + possibility.posedit.edit.ref): + gap_length = len(possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, + hgvs_t_possibility] + hgvs_not_delins = possibility + hgvs_genomic_5pr = possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # Final sanity checks + try: + vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str( + e) == 'start or end or both are beyond the bounds of transcript record': + continue + try: + hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + continue + elif re.match('Normalization of intronic variants is not supported', + error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Find oddly placed gaps where the tx variant is encompassed in the gap + if disparity_deletion_in[0] == 'false' and ( + possibility_counter == 3 or possibility_counter == 4): + rg = reverse_normalizer.normalize(hgvs_not_delins) + rtx = vm.g_to_t(rg, tx_hgvs_not_delins.ac) + fg = hn.normalize(hgvs_not_delins) + ftx = vm.g_to_t(fg, tx_hgvs_not_delins.ac) + if ( + rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + exons = hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) + exonic = False + for ex_test in exons: + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], + ex_test[7]): + exonic = True + if exonic is True: + hgvs_not_delins = fg + hgvs_genomic = fg + hgvs_genomic_5pr = fg + try: + tx_hgvs_not_delins = vm.c_to_n(ftx) + except Exception: + tx_hgvs_not_delins = ftx + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup) + + if disparity_deletion_in[0] == 'transcript': + # amend_RefSeqGene = 'true' + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search('\+', + str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search('\+', + str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int( + '0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int( + '0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int( + '0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int( + '0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) + except: + exceptPass() + genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range( + genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range( + genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, + 1): + if integer in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match('\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + exceptPass() + + if re.search('\+', str( + tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\+', + str( + tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\+', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search('\+', + str( + tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', str( + tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\-', + str( + tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search('\-', + str( + tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + # amend_RefSeqGene = 'true' + if possibility_counter == 3: + hgvs_refreshed_variant = stash_tx_right + elif possibility_counter == 4: + hgvs_refreshed_variant = stash_tx_left + else: + hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = hgvs_coding + # amend_RefSeqGene = 'false' + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', + str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) + else: + pass + + try: + hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + continue + + # Quick check to make sure the coding variant has not changed + try: + to_test = hn.normalize(hgvs_refreshed_variant) + except: + to_test = hgvs_refreshed_variant + if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # Try the next available genomic option + if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + hgvs_coding = to_test + else: + continue + + # Update hgvs_genomic + hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, + no_norm_evm, vm, hp, hn, sf, nr_vm) + if hgvs_alt_genomic.posedit.edit.type == 'identity': + re_c = vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) + if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): + shuffle_left_g = copy.copy(hgvs_alt_genomic) + shuffle_left_g.posedit.edit.ref = '' + shuffle_left_g.posedit.edit.alt = '' + shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) + re_c = vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): + hgvs_alt_genomic = shuffle_left_g + + # If it is intronic, these vairables will not have been set + else: + # amend_RefSeqGene = 'false' + no_normalized_c = 'false' + + # Break if gap has been detected + if disparity_deletion_in[0] != 'false': + break + + # Normailse hgvs_genomic + try: + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error) and \ + disparity_deletion_in[0] == 'chromosome': + if hgvs_alt_genomic.posedit.edit.type == 'delins': + start = hgvs_alt_genomic.posedit.pos.start.base + end = hgvs_alt_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb + hgvs_alt_genomic.posedit.pos.start.base = end + hgvs_alt_genomic.posedit.pos.end.base = start + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + if hgvs_alt_genomic.posedit.edit.type == 'del': + start = hgvs_alt_genomic.posedit.pos.start.base + end = hgvs_alt_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + hgvs_alt_genomic.posedit.edit.alt = lhb + rhb + hgvs_alt_genomic.posedit.pos.start.base = end + hgvs_alt_genomic.posedit.pos.end.base = start + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + + # Refresh the :g. variant + multi_g.append(hgvs_alt_genomic) + else: + multi_g.append(hgvs_alt_genomic) + corrective_action_taken = 'false' + + # In this instance, the gap code has generally found an incomplete-alignment rather than a + # truly gapped alignment. + except KeyError: + warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ + 'genomic reference sequence %s' % (hgvs_coding.ac, + alt_chr) + continue + except hgvs.exceptions.HGVSError as e: + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + error = str(te) + logger.error(str(exc_type) + " " + str(exc_value)) + logger.debug(error) + continue + + if multi_g != []: + multi_g.sort() + multi_gen_vars = multi_g # '|'.join(multi_g) + else: + multi_gen_vars = [] + else: + # HGVS genomic in the absence of a transcript variant + if genomic_variant != '': + multi_gen_vars = [hgvs_genomic_variant] + else: + multi_gen_vars = [] + + # Dictionaries of genomic loci + alt_genomic_dicts = [] + primary_genomic_dicts = {} + + if len(multi_gen_vars) != 0: + for alt_gen_var in multi_gen_vars: + for build in self.genome_builds: + test = vvChromasomes.supported_for_mapping(alt_gen_var.ac, build) + if test == 'true': + try: + vcf_dict = va_H2V.report_hgvs2vcf(alt_gen_var, build, reverse_normalizer, sf) + except hgvs.exceptions.HGVSInvalidVariantError as e: + continue + # Identify primary assembly positions + if re.match('NC_', alt_gen_var.ac): + if re.match('GRC', build): + primary_genomic_dicts[build.lower()] = { + 'hgvs_genomic_description': valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + + else: + primary_genomic_dicts[build.lower()] = { + 'hgvs_genomic_description': valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + if build == 'GRCh38': + vcf_dict = va_H2V.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, + sf) + primary_genomic_dicts['hg38'] = { + 'hgvs_genomic_description': valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + + continue + + else: + if re.match('GRC', build): + dict = {build.lower(): {'hgvs_genomic_description': valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } + else: + dict = {build.lower(): {'hgvs_genomic_description': valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } + # Append + alt_genomic_dicts.append(dict) + + if build == 'GRCh38': + vcf_dict = va_H2V.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, + sf) + dict = {'hg38': {'hgvs_genomic_description': valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } + # Append + alt_genomic_dicts.append(dict) + continue + else: + # May need to account for ALT NC_ + pass + + # Warn not directly mapped to specified genome build + if genomic_accession != '': + caution = '' + if primary_assembly.lower() not in primary_genomic_dicts.keys(): + warnings = warnings + ': ' + str( + hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + ': See alternative genomic loci or alternative genome builds for aligned genomic positions' + + warn_list = warnings.split(': ') + warnings_out = [] + for warning in warn_list: + warning.strip() + warning = warning.replace("'", "") + if warning == '': + continue + warnings_out.append(warning) + # Remove duplicate elements but maintain the order + seen = {} + no_rep_list = [seen.setdefault(x, x) for x in warnings_out if x not in seen] + warnings_out = no_rep_list + + # Ensure Variants have had the refs removed. + # if not hasattr(posedit, refseqgene_variant): + if refseqgene_variant != '': + try: + refseqgene_variant = valstr(hgvs_refseqgene_variant) + except: + exceptPass() + + # Add single letter AA code to protein descriptions + predicted_protein_variant_dict = {"tlr": str(predicted_protein_variant), "slr": ''} + if predicted_protein_variant != '': + if not 'Non-coding :n.' in predicted_protein_variant: + try: + format_p = predicted_protein_variant + format_p = re.sub('\(LRG_.+?\)', '', format_p) + re_parse_protein = hp.parse_hgvs_variant(format_p) + re_parse_protein_singleAA = output_formatter.single_letter_protein(re_parse_protein) + predicted_protein_variant_dict["slr"] = str(re_parse_protein_singleAA) + except hgvs.exceptions.HGVSParseError: + exceptPass() + else: + predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) + + # Populate the dictionary + dict_out['submitted_variant'] = submitted + dict_out['gene_symbol'] = gene_symbol + dict_out['transcript_description'] = transcript_description + dict_out['hgvs_transcript_variant'] = tx_variant + dict_out['genome_context_intronic_sequence'] = genome_context_transcript_variant + dict_out['refseqgene_context_intronic_sequence'] = RefSeqGene_context_transcript_variant + dict_out['hgvs_refseqgene_variant'] = refseqgene_variant + dict_out['hgvs_predicted_protein_consequence'] = predicted_protein_variant_dict + dict_out['validation_warnings'] = warnings_out + dict_out['hgvs_lrg_transcript_variant'] = lrg_transcript_variant + dict_out['hgvs_lrg_variant'] = lrg_variant + dict_out['alt_genomic_loci'] = alt_genomic_dicts + dict_out['primary_assembly_loci'] = primary_genomic_dicts + dict_out['reference_sequence_records'] = '' + + # Add links to reference_sequence_records + ref_records = external.get_urls(dict_out) + if ref_records != {}: + dict_out['reference_sequence_records'] = ref_records + + # Append to a list for return + batch_out.append(dict_out) + else: + continue + else: + continue + + """ + Structure the output into dictionaries rather than a list with descriptive keys + and a validation type flag + """ + logger.trace("Populating output dictionary") + # Create output dictionary + validation_output = {'flag': None} + + # For gene outputs, i.e. those that hit transcripts + # dotter = '' + if set_output_type_flag == 'gene': + validation_output['flag'] = 'gene_variant' + validation_error_counter = 0 + for valid_v in batch_out: + if valid_v['validation_warnings'] == ['Validation error']: + validation_error_counter = validation_error_counter + 1 + identification_key = 'Validation_Error_%s' % (str(validation_error_counter)) + else: + identification_key = '%s' % (str(valid_v['hgvs_transcript_variant'])) + + # if identification_key not in validation_output.keys(): + validation_output[identification_key] = valid_v + # else: + # dotter = dotter + ' ' + # validation_output[identification_key + dotter] = valid_v + + # For warning only outputs + # Should only ever be 1 output as an error or a warning of the following types + # Gene symbol as reference sequence + # Gene as transcript reference sequence + if set_output_type_flag == 'warning': + validation_output['flag'] = 'warning' + validation_error_counter = 0 + validation_warning_counter = 0 + if len(batch_out) == 0: + validation_output['flag'] = 'empty_result' + for valid_v in batch_out: + if valid_v['validation_warnings'] == ['Validation error']: + validation_error_counter = validation_error_counter + 1 + identification_key = 'validation_error_%s' % (str(validation_error_counter)) + else: + validation_warning_counter = validation_warning_counter + 1 + identification_key = 'validation_warning_%s' % (str(validation_warning_counter)) + validation_output[identification_key] = valid_v + + # Intergenic variants + validation_intergenic_counter = 0 + if set_output_type_flag == 'intergenic': + validation_output['flag'] = 'intergenic' + for valid_v in batch_out: + validation_intergenic_counter = validation_intergenic_counter + 1 + identification_key = 'Intergenic_Variant_%s' % (str(validation_intergenic_counter)) + + # Attempt to liftover between genome builds + # Note: pyliftover uses the UCSC liftOver tool. + # https://pypi.org/project/pyliftover/ + genomic_position_info = valid_v['primary_assembly_loci'] + for g_p_key in genomic_position_info.keys(): + + # Identify the current build and hgvs_genomic descripsion + if re.match('hg', g_p_key): + # incoming_vcf = genomic_position_info[g_p_key]['vcf'] + # set builds + if g_p_key == 'hg38': + build_to = 'hg19' + build_from = 'hg38' + if g_p_key == 'hg19': + build_to = 'hg38' + build_from = 'hg19' + elif re.match('grc', g_p_key): + # incoming_vcf = genomic_position_info[g_p_key]['vcf'] + # set builds + if g_p_key == 'grch38': + build_to = 'GRCh37' + build_from = 'GRCh38' + if g_p_key == 'grch37': + build_to = 'GRCh38' + build_from = 'GRCh37' + + # Liftover + lifted_response = lift_over(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, build_to, hn, vm, vr, hdp, hp, reverse_normalizer, sf, evm) + + # Sort the respomse into primary assembly and ALT + primary_assembly_loci = {} + alt_genomic_loci = [] + for build_key, accession_dict in lifted_response.iteritems(): + try: + accession_key = accession_dict.keys()[0] + if re.match('NC_', accession_dict[accession_key]['hgvs_genomic_description']): + primary_assembly_loci[build_key.lower()] = accession_dict[accession_key] + else: + alt_genomic_loci.append({build_key.lower(): accession_dict[accession_key]}) + + # KeyError if the dicts are empty + except KeyError: + continue + + # Add the dictionaries from lifted response to the output + if primary_assembly_loci != {}: + valid_v['primary_assembly_loci'] = primary_assembly_loci + if alt_genomic_loci != []: + valid_v['alt_genomic_loci'] = alt_genomic_loci + + # Finalise the output dictionary + validation_output[identification_key] = valid_v + + # Add error strings to validation output + # ''' + metadata = {} + logger.info("Variant successfully validated") + logs = [] + logString = logger.getString() + for l in logger.getString().split("\n"): + logs.append(l) + metadata["logs"] = logString + metadata["variant"] = batch_variant + metadata["assembly"] = selected_assembly + metadata["transcripts"] = select_transcripts + metadata['seqrepo_directory'] = HGVS_SEQREPO_DIR + metadata['uta_url'] = UTA_DB_URL + metadata['py_liftover_directory'] = PYLIFTOVER_DIR + metadata['variantvalidator_data_url'] = VALIDATOR_DB_URL + metadata['entrez_id'] = ENTREZ_ID + metadata['variantvalidator_version'] = VERSION + metadata['variantvalidator_hgvs_version'] = hgvs_version + metadata['uta_schema'] = str(hdp.data_version()) + metadata['seqrepo_db'] = HGVS_SEQREPO_DIR.split('/')[-1] + validation_output["metadata"] = metadata + # ''' + # Measure time elapsed + time_now = time.time() + elapsed_time = time_now - start_time + logger.debug('validation time = ' + str(elapsed_time)) + + # return batch_out + return validation_output + + # Bug catcher + except KeyboardInterrupt: + raise + except BaseException as e: + # Debug mode + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + # tr = ''.join(traceback.format_stack()) + tbk = [str(exc_type), str(exc_value), str(te)] + er = '\n'.join(tbk) + # raise variantValidatorError('Validation error') + # Return + # return + logger.critical(str(exc_type) + " " + str(exc_value)) + logger.debug(str(er)) diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index 35ffd9d7..4870cd21 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -83,3 +83,33 @@ def get_uta_symbol(self,gene_symbol): def get_hgnc_symbol(self,gene_symbol): # returns the HGNC gene symbol when UTA gene symbol is input return str(self.get_hgncSymbol(gene_symbol)[0]) + # from external.py + def get_urls(self,dict_out): + # Provide direct links to reference sequence records + # Add urls + report_urls = {} + if 'NM_' in dict_out['hgvs_transcript_variant'] or 'NR_' in dict_out['hgvs_transcript_variant']: + report_urls['transcript'] = 'https://www.ncbi.nlm.nih.gov' \ + '/nuccore/%s' % dict_out['hgvs_transcript_variant'].split(':')[0] + if 'NP_' in dict_out['hgvs_predicted_protein_consequence']['slr']: + report_urls['protein'] = 'https://www.ncbi.nlm.nih.gov' \ + '/nuccore/%s' % str(dict_out['hgvs_predicted_protein_consequence']['slr']).split(':')[0] + if 'NG_' in dict_out['hgvs_refseqgene_variant']: + report_urls['refseqgene'] = 'https://www.ncbi.nlm.nih.gov' \ + '/nuccore/%s' % dict_out['hgvs_refseqgene_variant'].split(':')[0] + if 'LRG' in dict_out['hgvs_lrg_variant']: + lrg_id = dict_out['hgvs_lrg_variant'].split(':')[0] + lrg_data = self.get_LRG_data_from_LRGid(lrg_id) + lrg_status = str(lrg_data[4]) + if lrg_status == 'public': + report_urls['lrg'] = 'http://ftp.ebi.ac.uk/pub' \ + '/databases/lrgex/%s.xml' % dict_out['hgvs_lrg_variant'].split(':')[0] + else: + report_urls['lrg'] = 'http://ftp.ebi.ac.uk' \ + '/pub/databases/lrgex' \ + '/pending/%s.xml' % dict_out['hgvs_lrg_variant'].split(':')[0] + # Ensembl needs to be added at a later date + # "http://www.ensembl.org/id/" ? What about historic versions????? + + return report_urls + diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 54869fab..2faa5372 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -1,9 +1,13 @@ import mysql.connector from mysql.connector.pooling import MySQLConnectionPool from vvLogging import logger -from vvFunctions import entrez_efetch,hgnc_rest,handleCursor +import vvFunctions as fn +from vvFunctions import handleCursor from vvDBInsert import vvDBInsert from vvDBGet import vvDBGet +import urllib2 +import copy + import re import os @@ -68,7 +72,7 @@ def update_transcript_info_record(self,accession, hdp): hgnc_symbol = previous_entry['hgnc_symbol'] uta_symbol = previous_entry['uta_symbol'] try: - record = entrez_efetch(self.val,db="nucleotide", id=accession, rettype="gb", retmode="text") + record = fn.entrez_efetch(self.val,db="nucleotide", id=accession, rettype="gb", retmode="text") version = record.id description = record.description variant = '0' @@ -93,14 +97,14 @@ def update_transcript_info_record(self,accession, hdp): uta_symbol = str(uta_info[6]) # First perform a search against the input gene symbol or the symbol inferred from UTA - initial = hgnc_rest(path = "/fetch/symbol/" + uta_symbol) + initial = fn.hgnc_rest(path = "/fetch/symbol/" + uta_symbol) # Check for a record if str(initial['record']['response']['numFound']) != '0': hgnc_symbol = uta_symbol # No record found, is it a previous symbol? else: # Search hgnc rest to see if symbol is out of date - rest_data = hgnc_rest(path = "/search/prev_symbol/" + uta_symbol) + rest_data = fn.hgnc_rest(path = "/search/prev_symbol/" + uta_symbol) # If the name is correct no record will be found if rest_data['error'] == 'false': if int(rest_data['record']['response']['numFound']) == 0: @@ -154,3 +158,377 @@ def update_lrg_p_rs_p_lookup(self,lrg_p, rs_p): rspID = self.get.get_RefSeqProteinID_from_lrgProteinID(lrg_p) if rspID == 'none': self.insert.insert_LRG_protein_data(lrg_p, rs_p) + # From variantValidator.py + def update_vv_data(self): + # Update refSeqGene Primary assembly alignment data + self.update_rsg() + # Update LRG records + self.update_lrg() + # From update_refseqgene_nomissmatch.py + def update_rsg(self): + logger.info('Updating RefSeqGene no Missmatch MySQL data') + # Set os path + # Set up os paths data and log folders + ROOT = os.path.dirname(os.path.abspath(__file__)) + + # Download data from RefSeqGene + # Download data + rsg = urllib2.Request('http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/gene_RefSeqGene') + response = urllib2.urlopen(rsg) + rsg_file = response.read() + rsg_data_line = rsg_file.split('\n') + rsg_data = [] + for data in rsg_data_line: + rsg_data.append(data) + + # Download data + grch37 = urllib2.Request( + 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.25_refseqgene_alignments.gff3') + response = urllib2.urlopen(grch37) + grch37_file = response.read() + grch37_data_line = grch37_file.split('\n') + grch37_align_data = [] + for data in grch37_data_line: + grch37_align_data.append(data) + + # Download data + grch38 = urllib2.Request( + 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.28_refseqgene_alignments.gff3') + response = urllib2.urlopen(grch38) + grch38_file = response.read() + grch38_data_line = grch38_file.split('\n') + grch38_align_data = [] + for data in grch38_data_line: + grch38_align_data.append(data) + + # Open Lists + # rsg_data = open(os.path.join(ROOT, 'gene_RefSeqGene'), 'r') + rsg_id_info = [] + # grch37_align_data = open(os.path.join(ROOT, 'GCF_000001405.25_refseqgene_alignments.gff3'), 'r') + grch37_align = [] + # grch38_align_data = open(os.path.join(ROOT, 'GCF_000001405.28_refseqgene_alignments.gff3'), 'r') + grch38_align = [] + + # Place the required data from each file into a dictionary + hash = re.compile('#') + for line in rsg_data: + if hash.search(line): + pass + else: + line = line.strip() + info = line.split() + if len(info) == 0: + pass + else: + dict = {'symbol': info[2], 'rsg_id': info[3], 'gene_id': info[1]} + rsg_id_info.append(dict) + + # Create dictionary to store RefSeqGene and gene symbol data NOTE RefSeqGene ID stored without version number! + rsg_to_symbol = {} + # Collect the data + for ent in rsg_id_info: + rsg_id = copy.deepcopy(ent['rsg_id']) + rsg_id = rsg_id.split('.')[0] + rsg_to_symbol[rsg_id] = {'symbol': ent['symbol'], 'gene_id': ent['gene_id']} + + # Count total number of NG to NC mappings + total_rsg_to_nc = 0 + total_rsg_to_nc_rejected = 0 + for line in grch37_align_data: + # Count NG_ to NC_ and remove the entries we don't care about! + if re.search('NC_', line) and re.search('NG_', line): + total_rsg_to_nc = total_rsg_to_nc + 1 + else: + continue + if hash.search(line): + pass + elif not re.search('gap_count=0', line): + if re.search('NC_', line) and re.search('NG_', line): + total_rsg_to_nc_rejected = total_rsg_to_nc_rejected + 1 + # print line + pass + else: + line = line.strip() + info = line.split('\t') + if len(info) != 9: + pass + else: + metrics = info[8].split(';') + id_ori = metrics[1].replace('Target=', '') + id_ori_list = id_ori.split() + dict = {'rsg_id': id_ori_list[0], 'chr_id': info[0], 'rsg_start': info[3], 'rsg_end': info[4], + 'ori': id_ori_list[3]} + grch37_align.append(dict) + + for line in grch38_align_data: + if re.search('NC_', line) and re.search('NG_', line): + total_rsg_to_nc = total_rsg_to_nc + 1 + else: + continue + if hash.search(line): + pass + elif not re.search('gap_count=0', line): + if re.search('NC_', line) and re.search('NG_', line): + total_rsg_to_nc_rejected = total_rsg_to_nc_rejected + 1 + # print line + pass + else: + line = line.strip() + info = line.split('\t') + if len(info) != 9: + pass + else: + metrics = info[8].split(';') + id_ori = metrics[1].replace('Target=', '') + id_ori_list = id_ori.split() + dict = {'rsg_id': id_ori_list[0], 'chr_id': info[0], 'rsg_start': info[3], 'rsg_end': info[4], + 'ori': id_ori_list[3]} + grch38_align.append(dict) + + # Create a data array containing the database + db = [] + # map line + for line in grch37_align: + ml = [] + link = line['rsg_id'] + ml.append(link) + ml.append(line['chr_id']) + ml.append('GRCh37') + ml.append(line['rsg_start']) + ml.append(line['rsg_end']) + ml.append(line['ori']) + # Add the additional data from rsg_id_info + for data in rsg_id_info: + if link == data['rsg_id']: + ml.append(data['symbol']) + ml.append(data['gene_id']) + else: + continue + # Create the entry and append to db + db.append(ml) + + for line in grch38_align: + ml = [] + link = line['rsg_id'] + ml.append(link) + ml.append(line['chr_id']) + ml.append('GRCh38') + ml.append(line['rsg_start']) + ml.append(line['rsg_end']) + ml.append(line['ori']) + # Add the additional data from rsg_id_info + for data in rsg_id_info: + if link == data['rsg_id']: + ml.append(data['symbol']) + ml.append(data['gene_id']) + else: + continue + # Create the entry and append to db + db.append(ml) + + # Known missing identifiers + known = { + 'NG_021289.1' : {'symbol' : 'CFAP47', 'gene_id' : '286464'}, + 'NG_027707.1' : {'symbol' : 'DUX4L1', 'gene_id' : '22947'}, + 'NG_033266.1' : {'symbol' : 'DSE', 'gene_id': '29940'}, + 'NG_061543.1' : {'symbol' : 'CYP1A2', 'gene_id': '1544'}, + 'NG_061374.1' : {'symbol' : 'CYP1A1', 'gene_id': '1543'}, + 'NG_059281.1' : {'symbol' : 'HBB', 'gene_id': '3043'}, + 'NG_012639.1' : {'symbol' : 'VHLL', 'gene_id': '391104'}, + 'NG_059186.1' : {'symbol' : 'HBA1', 'gene_id': '3040'}, + 'NG_059271.1' : {'symbol' : 'HBA2', 'gene_id': '3040'} + } + + # Known Obsolete identifiers + obsolete = { + 'NG_016553.1': 'OBSOLETE', + 'NG_012639.1': 'Removed due to questionable status' + } + + # Identify lines with missing data e.g. gene symbols + for line in db: + try: + line[6] + except IndexError: + try: + identifier = copy.deepcopy(line[0]) + identifier = identifier.split('.')[0] + line.append(rsg_to_symbol[identifier]['symbol']) + line.append(rsg_to_symbol[identifier]['gene_id']) + except KeyError: + try: + line.append(known[line[0]]['symbol']) + line.append(known[line[0]]['gene_id']) + except KeyError: + check = obsolete[line[0]] + logger.info(str(line[0]) + ' : ' + check) + + # Open a text file to be used as a simple database and write the database + # rsg_db = open(os.path.join(ROOT, 'rsg_chr_db.txt'), 'w') + + to_mysql = [] + for line in db: + if line[0] in obsolete.keys(): + continue + # Only gap-less RefSeqGenes will have passed. The rest will be alternatively curated + write = [] + # Take the mapping data + write = copy.deepcopy(line[0:6]) + # add RSG ranges + write.append('1') + end_rsg = int(line[4]) - int(line[3]) + 1 + end_rsg = str(end_rsg) + write.append(end_rsg) + # Create block data chr then rsg + chr_block = str(line[3]) + '-' + str(line[4]) + write.append(chr_block) + rsg_block = str(write[6]) + '-' + str(write[7]) + write.append(rsg_block) + # Add gene ID and Gene symbol(s) + write.append(line[7]) + write.append(line[6]) + # write_me = '\t'.join(write) + # rsg_db.write(write_me + '\n') + del write[6] + to_mysql.append(write) + + # Set up code to write to database + for line in to_mysql: + current_symbol = self.get.get_gene_symbol_from_refSeqGeneID(line[0]) + if line[10] == current_symbol: + pass + else: + if current_symbol != 'none': + line[10] = current_symbol + else: + pass + self.update_refSeqGene_loci(line) + + # Close database + # rsg_db.close() + + logger.info( 'Total NG_ to NC_ alignments = ' + str(total_rsg_to_nc)) + logger.info( 'Gapps within NG_ to NC_ alignments = ' + str(total_rsg_to_nc_rejected)) + + logger.info( 'complete') + return + #from compile_lrg_data, this function was originally just called "update" + def update_lrg(self): + logger.info('Updating LRG lookup tables') + lr2rs_download = urllib2.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt') + # Open and read + lr2rs_data = urllib2.urlopen(lr2rs_download) + lr2rs = lr2rs_data.read() + # List the data + lr2rs = lr2rs.strip() + lr2rs = lr2rs.split('\n') + + # Download + lrg_status_download = urllib2.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_GRCh38.txt') + # Open and read + lrg_status_data = urllib2.urlopen(lrg_status_download) + lrg_status = lrg_status_data.read() + # List the data + lrg_status = lrg_status.strip() + lrg_status = lrg_status.split('\n') + + # Download + rs2lr_download = urllib2.Request('http://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene') + # Open and read + rs2lr_data = urllib2.urlopen(rs2lr_download) + rs2lr = rs2lr_data.read() + # List the data + rs2lr = rs2lr.strip() + rs2lr = rs2lr.split('\n') + + # Download LRG transcript (_t) to LRG Protein (__p) data file + lr_t2p_downloaded = urllib2.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_proteins_RefSeq.txt') + # Open and read + lr_t2p_data = urllib2.urlopen(lr_t2p_downloaded) + lr_t2p = lr_t2p_data.read() + # List the data + lr_t2p = lr_t2p.strip() + lr_t2p = lr_t2p.split('\n') + + # Dictionary the status by LRG_ID + lrg_status_dict = {} + # Compile dictionary + for line in lrg_status: + if re.search('^#', line): + continue + else: + list = line.split() + lrgID = list[0] + stat = list[2] + lrg_status_dict[lrgID] = stat + + # Required lookup tables + # LRG_ID GeneSymbol RefSeqGeneID status + # LRG_ID RefSeqTranscriptID + # LRG_T2LRG_P + + logger.info( 'Update LRG and LRG_transcript lookup tables' ) + # Populate lists lrg_rs_lookup (LRG to RefSeqGene) and lrg_t2nm_ (LRG Transcript to RefSeq Transcript) + for line in lr2rs: + if re.search('^#', line): + continue + else: + list = line.split() + # Assign objects + lrg_id = list[0] + symbol = list[1] + rsgid = list[2] + lrg_tx = str(list[0]) + str(list[3]) + rstid = list[4] + status = lrg_status_dict[lrg_id] + # pass data to relevant lists + # lrg_rs_lookup + lrg_rs_lookup = [lrg_id, symbol, rsgid, status] + + # update LRG to RefSeqGene database + self.update_lrg_rs_lookup(lrg_rs_lookup) + + # lrg_t2nm_ + lrgtx_to_rstID = [lrg_tx, rstid] + # update database + self.update_lrgt_rst(lrgtx_to_rstID) + + logger.info( 'Update LRG protein lookup table') + # Populate LRG protein RefSeqProtein lokup table + for line in lr_t2p: + if re.search('^#', line): + continue + else: + list = line.split() + # Assign objects + lrg_p = list[0] + rs_p = list[1] + # update LRG to RefSeqGene database + self.update_lrg_p_rs_p_lookup(lrg_p, rs_p) + + logger.info('LRG lookup tables updated') + return + #From ref_seq_type + def ref_type_assign(self,accession): + if 'NC_' in accession or 'NG_' in accession or 'NT_' in accession or 'NW_' in accession: + ref_type = ':g.' + elif re.match('NM_', accession): + ref_type = ':c.' + elif re.match('NR_', accession): + ref_type = ':n.' + elif re.match('NP_', accession): + ref_type = ':p.' + elif re.match('LRG_', accession): + if re.search('t', accession): + refseqtranscript_reference = self.get.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) + if re.match('NM_', refseqtranscript_reference): + ref_type = ':c.' + else: + ref_type = ':n.' + elif re.search('_p', accession): + ref_type = ':p.' + else: + ref_type = ':g.' + return ref_type + + diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index 37f3fc22..bfb31c61 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -3,6 +3,8 @@ import json from urlparse import urlparse #Python 2 import functools +import re +import copy #from urllib.parse import urlparse #Python 3 def handleCursor(func): @@ -67,9 +69,23 @@ def valstr(hgvs_variant): cp_hgvs_variant = copy.deepcopy(hgvs_variant) if cp_hgvs_variant.posedit.edit.type == 'identity': if len(cp_hgvs_variant.posedit.edit.ref) > 1: - cp_hgvs_variant = output_formatter.remove_reference(cp_hgvs_variant) + cp_hgvs_variant = remove_reference(cp_hgvs_variant) cp_hgvs_variant = str(cp_hgvs_variant) else: - cp_hgvs_variant = output_formatter.remove_reference(cp_hgvs_variant) + cp_hgvs_variant = remove_reference(cp_hgvs_variant) cp_hgvs_variant = str(cp_hgvs_variant) return cp_hgvs_variant + +# From output_formatter +""" +format protein description into single letter aa code +""" +def single_letter_protein(hgvs_protein): + hgvs_protein_slc = hgvs_protein.format({'p_3_letter': False}) + return hgvs_protein_slc +""" +format nucleotide descriptions to not display reference base +""" +def remove_reference(hgvs_nucleotide): + hgvs_nucleotide_refless = hgvs_nucleotide.format({'max_ref_length': 0}) + return hgvs_nucleotide_refless diff --git a/VariantValidator/modules/vvObjects.py b/VariantValidator/modules/vvObjects.py index 70ba2c2c..a54ebe1f 100644 --- a/VariantValidator/modules/vvObjects.py +++ b/VariantValidator/modules/vvObjects.py @@ -17,6 +17,7 @@ #import io from vvDatabase import vvDatabase from vvLogging import logger +import vvCore # Custom Exceptions class variantValidatorError(Exception): From 57bf89daa587f4936b4de5a0a039c040fa62ab3f Mon Sep 17 00:00:00 2001 From: buran Date: Tue, 15 Jan 2019 17:36:44 +0000 Subject: [PATCH 005/223] Moved over more functions, fixed mixin --- .../{vvChromasomes.py => vvChromosomes.py} | 0 VariantValidator/modules/vvConverters.py | 2225 +++ VariantValidator/modules/vvCore.py | 14089 ++++++++-------- VariantValidator/modules/vvFunctions.py | 373 +- VariantValidator/modules/vvHGVS.py | 831 + VariantValidator/modules/vvObjects.py | 334 +- .../testing/testOutputsMasterITS/variant0.txt | 143 - .../testing/testOutputsMasterITS/variant1.txt | 145 - .../testOutputsMasterITS/variant10.txt | 60 - .../testOutputsMasterITS/variant100.txt | 275 - .../testOutputsMasterITS/variant101.txt | 262 - .../testOutputsMasterITS/variant102.txt | 268 - .../testOutputsMasterITS/variant103.txt | 248 - .../testOutputsMasterITS/variant104.txt | 268 - .../testOutputsMasterITS/variant105.txt | 268 - .../testOutputsMasterITS/variant106.txt | 271 - .../testOutputsMasterITS/variant107.txt | 153 - .../testOutputsMasterITS/variant108.txt | 153 - .../testOutputsMasterITS/variant109.txt | 153 - .../testOutputsMasterITS/variant11.txt | 636 - .../testOutputsMasterITS/variant110.txt | 152 - .../testOutputsMasterITS/variant111.txt | 152 - .../testOutputsMasterITS/variant112.txt | 153 - .../testOutputsMasterITS/variant113.txt | 148 - .../testOutputsMasterITS/variant114.txt | 147 - .../testOutputsMasterITS/variant115.txt | 147 - .../testOutputsMasterITS/variant116.txt | 147 - .../testOutputsMasterITS/variant117.txt | 147 - .../testOutputsMasterITS/variant118.txt | 147 - .../testOutputsMasterITS/variant119.txt | 150 - .../testOutputsMasterITS/variant12.txt | 148 - .../testOutputsMasterITS/variant120.txt | 148 - .../testOutputsMasterITS/variant121.txt | 148 - .../testOutputsMasterITS/variant122.txt | 58 - .../testOutputsMasterITS/variant123.txt | 58 - .../testOutputsMasterITS/variant124.txt | 58 - .../testOutputsMasterITS/variant125.txt | 148 - .../testOutputsMasterITS/variant126.txt | 146 - .../testOutputsMasterITS/variant127.txt | 58 - .../testOutputsMasterITS/variant128.txt | 144 - .../testOutputsMasterITS/variant129.txt | 58 - .../testOutputsMasterITS/variant13.txt | 148 - .../testOutputsMasterITS/variant130.txt | 111 - .../testOutputsMasterITS/variant131.txt | 141 - .../testOutputsMasterITS/variant132.txt | 60 - .../testOutputsMasterITS/variant133.txt | 58 - .../testOutputsMasterITS/variant134.txt | 135 - .../testOutputsMasterITS/variant135.txt | 219 - .../testOutputsMasterITS/variant136.txt | 265 - .../testOutputsMasterITS/variant137.txt | 844 - .../testOutputsMasterITS/variant138.txt | 58 - .../testOutputsMasterITS/variant139.txt | 148 - .../testOutputsMasterITS/variant14.txt | 148 - .../testOutputsMasterITS/variant140.txt | 146 - .../testOutputsMasterITS/variant141.txt | 58 - .../testOutputsMasterITS/variant142.txt | 58 - .../testOutputsMasterITS/variant143.txt | 142 - .../testOutputsMasterITS/variant144.txt | 142 - .../testOutputsMasterITS/variant145.txt | 144 - .../testOutputsMasterITS/variant146.txt | 58 - .../testOutputsMasterITS/variant147.txt | 636 - .../testOutputsMasterITS/variant148.txt | 474 - .../testOutputsMasterITS/variant149.txt | 114 - .../testOutputsMasterITS/variant15.txt | 138 - .../testOutputsMasterITS/variant150.txt | 58 - .../testOutputsMasterITS/variant151.txt | 155 - .../testOutputsMasterITS/variant152.txt | 154 - .../testOutputsMasterITS/variant153.txt | 227 - .../testOutputsMasterITS/variant154.txt | 241 - .../testOutputsMasterITS/variant155.txt | 219 - .../testOutputsMasterITS/variant156.txt | 220 - .../testOutputsMasterITS/variant157.txt | 1257 -- .../testOutputsMasterITS/variant158.txt | 142 - .../testOutputsMasterITS/variant159.txt | 143 - .../testOutputsMasterITS/variant16.txt | 58 - .../testOutputsMasterITS/variant160.txt | 150 - .../testOutputsMasterITS/variant161.txt | 148 - .../testOutputsMasterITS/variant162.txt | 486 - .../testOutputsMasterITS/variant163.txt | 354 - .../testOutputsMasterITS/variant164.txt | 129 - .../testOutputsMasterITS/variant165.txt | 531 - .../testOutputsMasterITS/variant166.txt | 140 - .../testOutputsMasterITS/variant167.txt | 1266 -- .../testOutputsMasterITS/variant168.txt | 844 - .../testOutputsMasterITS/variant169.txt | 145 - .../testOutputsMasterITS/variant17.txt | 60 - .../testOutputsMasterITS/variant170.txt | 6 - .../testOutputsMasterITS/variant171.txt | 6 - .../testOutputsMasterITS/variant172.txt | 152 - .../testOutputsMasterITS/variant173.txt | 151 - .../testOutputsMasterITS/variant174.txt | 385 - .../testOutputsMasterITS/variant175.txt | 153 - .../testOutputsMasterITS/variant176.txt | 152 - .../testOutputsMasterITS/variant177.txt | 152 - .../testOutputsMasterITS/variant178.txt | 152 - .../testOutputsMasterITS/variant179.txt | 144 - .../testOutputsMasterITS/variant18.txt | 58 - .../testOutputsMasterITS/variant180.txt | 445 - .../testOutputsMasterITS/variant181.txt | 448 - .../testOutputsMasterITS/variant182.txt | 448 - .../testOutputsMasterITS/variant183.txt | 453 - .../testOutputsMasterITS/variant184.txt | 384 - .../testOutputsMasterITS/variant185.txt | 383 - .../testOutputsMasterITS/variant186.txt | 387 - .../testOutputsMasterITS/variant187.txt | 382 - .../testOutputsMasterITS/variant188.txt | 131 - .../testOutputsMasterITS/variant189.txt | 255 - .../testOutputsMasterITS/variant19.txt | 60 - .../testOutputsMasterITS/variant190.txt | 260 - .../testOutputsMasterITS/variant191.txt | 149 - .../testOutputsMasterITS/variant192.txt | 260 - .../testOutputsMasterITS/variant193.txt | 146 - .../testOutputsMasterITS/variant194.txt | 146 - .../testOutputsMasterITS/variant195.txt | 377 - .../testOutputsMasterITS/variant196.txt | 480 - .../testOutputsMasterITS/variant197.txt | 60 - .../testOutputsMasterITS/variant198.txt | 536 - .../testOutputsMasterITS/variant199.txt | 246 - .../testing/testOutputsMasterITS/variant2.txt | 142 - .../testOutputsMasterITS/variant20.txt | 149 - .../testOutputsMasterITS/variant200.txt | 488 - .../testOutputsMasterITS/variant201.txt | 145 - .../testOutputsMasterITS/variant202.txt | 145 - .../testOutputsMasterITS/variant203.txt | 141 - .../testOutputsMasterITS/variant204.txt | 397 - .../testOutputsMasterITS/variant205.txt | 262 - .../testOutputsMasterITS/variant206.txt | 896 - .../testOutputsMasterITS/variant207.txt | 559 - .../testOutputsMasterITS/variant208.txt | 324 - .../testOutputsMasterITS/variant209.txt | 143 - .../testOutputsMasterITS/variant21.txt | 60 - .../testOutputsMasterITS/variant210.txt | 464 - .../testOutputsMasterITS/variant211.txt | 187 - .../testOutputsMasterITS/variant212.txt | 660 - .../testOutputsMasterITS/variant213.txt | 257 - .../testOutputsMasterITS/variant214.txt | 2477 --- .../testOutputsMasterITS/variant215.txt | 248 - .../testOutputsMasterITS/variant216.txt | 428 - .../testOutputsMasterITS/variant217.txt | 142 - .../testOutputsMasterITS/variant218.txt | 371 - .../testOutputsMasterITS/variant219.txt | 355 - .../testOutputsMasterITS/variant22.txt | 58 - .../testOutputsMasterITS/variant220.txt | 1060 -- .../testOutputsMasterITS/variant221.txt | 365 - .../testOutputsMasterITS/variant222.txt | 147 - .../testOutputsMasterITS/variant223.txt | 454 - .../testOutputsMasterITS/variant224.txt | 479 - .../testOutputsMasterITS/variant225.txt | 401 - .../testOutputsMasterITS/variant226.txt | 369 - .../testOutputsMasterITS/variant227.txt | 369 - .../testOutputsMasterITS/variant228.txt | 699 - .../testOutputsMasterITS/variant229.txt | 704 - .../testOutputsMasterITS/variant23.txt | 60 - .../testOutputsMasterITS/variant230.txt | 142 - .../testOutputsMasterITS/variant231.txt | 449 - .../testOutputsMasterITS/variant232.txt | 245 - .../testOutputsMasterITS/variant233.txt | 1146 -- .../testOutputsMasterITS/variant234.txt | 244 - .../testOutputsMasterITS/variant235.txt | 244 - .../testOutputsMasterITS/variant236.txt | 140 - .../testOutputsMasterITS/variant237.txt | 317 - .../testOutputsMasterITS/variant238.txt | 472 - .../testOutputsMasterITS/variant239.txt | 495 - .../testOutputsMasterITS/variant24.txt | 58 - .../testOutputsMasterITS/variant240.txt | 508 - .../testOutputsMasterITS/variant241.txt | 478 - .../testOutputsMasterITS/variant242.txt | 983 -- .../testOutputsMasterITS/variant243.txt | 478 - .../testOutputsMasterITS/variant244.txt | 994 -- .../testOutputsMasterITS/variant245.txt | 508 - .../testOutputsMasterITS/variant246.txt | 495 - .../testOutputsMasterITS/variant247.txt | 307 - .../testOutputsMasterITS/variant248.txt | 303 - .../testOutputsMasterITS/variant249.txt | 290 - .../testOutputsMasterITS/variant25.txt | 58 - .../testOutputsMasterITS/variant250.txt | 1694 -- .../testOutputsMasterITS/variant251.txt | 1694 -- .../testOutputsMasterITS/variant252.txt | 570 - .../testOutputsMasterITS/variant253.txt | 696 - .../testOutputsMasterITS/variant254.txt | 668 - .../testOutputsMasterITS/variant255.txt | 420 - .../testOutputsMasterITS/variant256.txt | 505 - .../testOutputsMasterITS/variant257.txt | 150 - .../testOutputsMasterITS/variant258.txt | 147 - .../testOutputsMasterITS/variant259.txt | 147 - .../testOutputsMasterITS/variant26.txt | 58 - .../testOutputsMasterITS/variant260.txt | 142 - .../testOutputsMasterITS/variant261.txt | 140 - .../testOutputsMasterITS/variant262.txt | 818 - .../testOutputsMasterITS/variant263.txt | 140 - .../testOutputsMasterITS/variant264.txt | 137 - .../testOutputsMasterITS/variant265.txt | 246 - .../testOutputsMasterITS/variant266.txt | 2770 --- .../testOutputsMasterITS/variant267.txt | 1816 -- .../testOutputsMasterITS/variant268.txt | 1497 -- .../testOutputsMasterITS/variant269.txt | 348 - .../testOutputsMasterITS/variant27.txt | 60 - .../testOutputsMasterITS/variant270.txt | 348 - .../testOutputsMasterITS/variant271.txt | 2299 --- .../testOutputsMasterITS/variant272.txt | 2260 --- .../testOutputsMasterITS/variant273.txt | 1028 -- .../testOutputsMasterITS/variant274.txt | 146 - .../testOutputsMasterITS/variant275.txt | 244 - .../testOutputsMasterITS/variant276.txt | 354 - .../testOutputsMasterITS/variant277.txt | 326 - .../testOutputsMasterITS/variant278.txt | 1104 -- .../testOutputsMasterITS/variant279.txt | 371 - .../testOutputsMasterITS/variant28.txt | 58 - .../testOutputsMasterITS/variant280.txt | 793 - .../testOutputsMasterITS/variant281.txt | 999 -- .../testOutputsMasterITS/variant282.txt | 563 - .../testOutputsMasterITS/variant283.txt | 244 - .../testOutputsMasterITS/variant284.txt | 448 - .../testOutputsMasterITS/variant285.txt | 138 - .../testOutputsMasterITS/variant286.txt | 339 - .../testOutputsMasterITS/variant287.txt | 674 - .../testOutputsMasterITS/variant288.txt | 674 - .../testOutputsMasterITS/variant289.txt | 365 - .../testOutputsMasterITS/variant29.txt | 142 - .../testOutputsMasterITS/variant290.txt | 142 - .../testOutputsMasterITS/variant291.txt | 147 - .../testOutputsMasterITS/variant292.txt | 147 - .../testOutputsMasterITS/variant293.txt | 251 - .../testOutputsMasterITS/variant294.txt | 251 - .../testOutputsMasterITS/variant295.txt | 1747 -- .../testOutputsMasterITS/variant296.txt | 813 - .../testOutputsMasterITS/variant297.txt | 961 -- .../testOutputsMasterITS/variant298.txt | 252 - .../testOutputsMasterITS/variant299.txt | 264 - .../testing/testOutputsMasterITS/variant3.txt | 142 - .../testOutputsMasterITS/variant30.txt | 142 - .../testOutputsMasterITS/variant300.txt | 256 - .../testOutputsMasterITS/variant301.txt | 488 - .../testOutputsMasterITS/variant302.txt | 458 - .../testOutputsMasterITS/variant303.txt | 458 - .../testOutputsMasterITS/variant304.txt | 246 - .../testOutputsMasterITS/variant305.txt | 243 - .../testOutputsMasterITS/variant306.txt | 243 - .../testOutputsMasterITS/variant307.txt | 532 - .../testOutputsMasterITS/variant308.txt | 476 - .../testOutputsMasterITS/variant309.txt | 534 - .../testOutputsMasterITS/variant31.txt | 143 - .../testOutputsMasterITS/variant310.txt | 484 - .../testOutputsMasterITS/variant311.txt | 454 - .../testOutputsMasterITS/variant312.txt | 164 - .../testOutputsMasterITS/variant313.txt | 496 - .../testOutputsMasterITS/variant314.txt | 130 - .../testOutputsMasterITS/variant315.txt | 131 - .../testOutputsMasterITS/variant316.txt | 152 - .../testOutputsMasterITS/variant317.txt | 257 - .../testOutputsMasterITS/variant318.txt | 256 - .../testOutputsMasterITS/variant319.txt | 248 - .../testOutputsMasterITS/variant32.txt | 148 - .../testOutputsMasterITS/variant320.txt | 492 - .../testOutputsMasterITS/variant321.txt | 407 - .../testOutputsMasterITS/variant322.txt | 146 - .../testOutputsMasterITS/variant323.txt | 144 - .../testOutputsMasterITS/variant324.txt | 144 - .../testOutputsMasterITS/variant325.txt | 144 - .../testOutputsMasterITS/variant326.txt | 141 - .../testOutputsMasterITS/variant327.txt | 146 - .../testOutputsMasterITS/variant328.txt | 146 - .../testOutputsMasterITS/variant329.txt | 141 - .../testOutputsMasterITS/variant33.txt | 219 - .../testOutputsMasterITS/variant330.txt | 145 - .../testOutputsMasterITS/variant331.txt | 420 - .../testOutputsMasterITS/variant332.txt | 165 - .../testOutputsMasterITS/variant333.txt | 113 - .../testOutputsMasterITS/variant34.txt | 143 - .../testOutputsMasterITS/variant35.txt | 349 - .../testOutputsMasterITS/variant36.txt | 349 - .../testOutputsMasterITS/variant37.txt | 60 - .../testOutputsMasterITS/variant38.txt | 60 - .../testOutputsMasterITS/variant39.txt | 143 - .../testing/testOutputsMasterITS/variant4.txt | 247 - .../testOutputsMasterITS/variant40.txt | 257 - .../testOutputsMasterITS/variant41.txt | 519 - .../testOutputsMasterITS/variant42.txt | 129 - .../testOutputsMasterITS/variant43.txt | 114 - .../testOutputsMasterITS/variant44.txt | 257 - .../testOutputsMasterITS/variant45.txt | 146 - .../testOutputsMasterITS/variant46.txt | 148 - .../testOutputsMasterITS/variant47.txt | 148 - .../testOutputsMasterITS/variant48.txt | 146 - .../testOutputsMasterITS/variant49.txt | 148 - .../testing/testOutputsMasterITS/variant5.txt | 111 - .../testOutputsMasterITS/variant50.txt | 147 - .../testOutputsMasterITS/variant51.txt | 141 - .../testOutputsMasterITS/variant52.txt | 141 - .../testOutputsMasterITS/variant53.txt | 62 - .../testOutputsMasterITS/variant54.txt | 62 - .../testOutputsMasterITS/variant55.txt | 148 - .../testOutputsMasterITS/variant56.txt | 148 - .../testOutputsMasterITS/variant57.txt | 146 - .../testOutputsMasterITS/variant58.txt | 148 - .../testOutputsMasterITS/variant59.txt | 145 - .../testing/testOutputsMasterITS/variant6.txt | 1218 -- .../testOutputsMasterITS/variant60.txt | 140 - .../testOutputsMasterITS/variant61.txt | 142 - .../testOutputsMasterITS/variant62.txt | 114 - .../testOutputsMasterITS/variant63.txt | 384 - .../testOutputsMasterITS/variant64.txt | 449 - .../testOutputsMasterITS/variant65.txt | 173 - .../testOutputsMasterITS/variant66.txt | 174 - .../testOutputsMasterITS/variant67.txt | 168 - .../testOutputsMasterITS/variant68.txt | 170 - .../testOutputsMasterITS/variant69.txt | 174 - .../testing/testOutputsMasterITS/variant7.txt | 148 - .../testOutputsMasterITS/variant70.txt | 175 - .../testOutputsMasterITS/variant71.txt | 173 - .../testOutputsMasterITS/variant72.txt | 173 - .../testOutputsMasterITS/variant73.txt | 177 - .../testOutputsMasterITS/variant74.txt | 172 - .../testOutputsMasterITS/variant75.txt | 172 - .../testOutputsMasterITS/variant76.txt | 177 - .../testOutputsMasterITS/variant77.txt | 175 - .../testOutputsMasterITS/variant78.txt | 173 - .../testOutputsMasterITS/variant79.txt | 788 - .../testing/testOutputsMasterITS/variant8.txt | 60 - .../testOutputsMasterITS/variant80.txt | 556 - .../testOutputsMasterITS/variant81.txt | 237 - .../testOutputsMasterITS/variant82.txt | 223 - .../testOutputsMasterITS/variant83.txt | 232 - .../testOutputsMasterITS/variant84.txt | 228 - .../testOutputsMasterITS/variant85.txt | 232 - .../testOutputsMasterITS/variant86.txt | 232 - .../testOutputsMasterITS/variant87.txt | 228 - .../testOutputsMasterITS/variant88.txt | 153 - .../testOutputsMasterITS/variant89.txt | 213 - .../testing/testOutputsMasterITS/variant9.txt | 62 - .../testOutputsMasterITS/variant90.txt | 216 - .../testOutputsMasterITS/variant91.txt | 212 - .../testOutputsMasterITS/variant92.txt | 216 - .../testOutputsMasterITS/variant93.txt | 211 - .../testOutputsMasterITS/variant94.txt | 210 - .../testOutputsMasterITS/variant95.txt | 213 - .../testOutputsMasterITS/variant96.txt | 212 - .../testOutputsMasterITS/variant97.txt | 268 - .../testOutputsMasterITS/variant98.txt | 268 - .../testOutputsMasterITS/variant99.txt | 268 - 340 files changed, 10800 insertions(+), 116165 deletions(-) rename VariantValidator/modules/{vvChromasomes.py => vvChromosomes.py} (100%) create mode 100644 VariantValidator/modules/vvConverters.py create mode 100644 VariantValidator/modules/vvHGVS.py delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant0.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant1.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant10.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant100.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant101.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant102.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant103.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant104.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant105.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant106.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant107.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant108.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant109.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant11.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant110.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant111.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant112.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant113.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant114.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant115.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant116.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant117.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant118.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant119.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant12.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant120.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant121.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant122.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant123.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant124.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant125.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant126.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant127.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant128.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant129.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant13.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant130.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant131.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant132.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant133.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant134.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant135.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant136.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant137.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant138.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant139.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant14.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant140.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant141.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant142.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant143.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant144.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant145.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant146.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant147.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant148.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant149.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant15.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant150.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant151.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant152.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant153.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant154.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant155.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant156.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant157.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant158.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant159.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant16.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant160.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant161.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant162.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant163.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant164.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant165.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant166.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant167.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant168.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant169.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant17.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant170.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant171.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant172.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant173.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant174.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant175.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant176.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant177.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant178.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant179.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant18.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant180.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant181.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant182.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant183.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant184.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant185.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant186.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant187.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant188.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant189.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant19.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant190.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant191.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant192.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant193.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant194.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant195.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant196.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant197.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant198.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant199.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant2.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant20.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant200.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant201.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant202.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant203.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant204.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant205.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant206.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant207.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant208.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant209.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant21.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant210.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant211.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant212.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant213.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant214.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant215.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant216.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant217.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant218.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant219.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant22.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant220.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant221.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant222.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant223.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant224.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant225.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant226.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant227.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant228.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant229.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant23.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant230.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant231.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant232.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant233.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant234.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant235.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant236.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant237.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant238.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant239.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant24.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant240.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant241.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant242.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant243.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant244.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant245.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant246.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant247.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant248.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant249.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant25.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant250.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant251.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant252.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant253.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant254.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant255.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant256.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant257.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant258.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant259.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant26.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant260.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant261.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant262.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant263.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant264.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant265.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant266.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant267.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant268.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant269.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant27.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant270.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant271.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant272.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant273.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant274.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant275.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant276.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant277.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant278.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant279.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant28.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant280.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant281.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant282.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant283.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant284.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant285.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant286.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant287.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant288.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant289.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant29.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant290.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant291.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant292.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant293.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant294.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant295.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant296.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant297.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant298.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant299.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant3.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant30.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant300.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant301.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant302.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant303.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant304.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant305.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant306.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant307.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant308.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant309.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant31.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant310.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant311.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant312.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant313.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant314.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant315.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant316.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant317.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant318.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant319.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant32.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant320.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant321.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant322.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant323.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant324.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant325.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant326.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant327.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant328.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant329.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant33.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant330.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant331.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant332.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant333.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant34.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant35.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant36.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant37.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant38.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant39.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant4.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant40.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant41.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant42.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant43.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant44.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant45.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant46.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant47.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant48.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant49.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant5.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant50.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant51.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant52.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant53.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant54.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant55.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant56.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant57.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant58.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant59.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant6.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant60.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant61.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant62.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant63.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant64.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant65.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant66.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant67.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant68.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant69.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant7.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant70.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant71.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant72.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant73.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant74.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant75.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant76.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant77.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant78.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant79.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant8.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant80.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant81.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant82.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant83.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant84.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant85.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant86.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant87.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant88.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant89.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant9.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant90.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant91.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant92.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant93.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant94.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant95.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant96.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant97.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant98.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant99.txt diff --git a/VariantValidator/modules/vvChromasomes.py b/VariantValidator/modules/vvChromosomes.py similarity index 100% rename from VariantValidator/modules/vvChromasomes.py rename to VariantValidator/modules/vvChromosomes.py diff --git a/VariantValidator/modules/vvConverters.py b/VariantValidator/modules/vvConverters.py new file mode 100644 index 00000000..e2452ece --- /dev/null +++ b/VariantValidator/modules/vvConverters.py @@ -0,0 +1,2225 @@ +import re +import os +import sys +import copy +from vvLogging import logger +import hgvs +import hgvs.exceptions +from hgvs.dataproviders import uta +from hgvs.dataproviders import seqfetcher +import hgvs.normalizer +import hgvs.validator +import hgvs.parser +import hgvs.variantmapper +import hgvs.sequencevariant + +#Error setup +from hgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError +class mergeHGVSerror(Exception): + pass +class alleleVariantError(Exception): + pass + + +""" +r_to_c +parses r. variant strings into hgvs object and maps to the c. equivalent. +""" +def r_to_c(variant, evm, hp): + # convert the input string into a hgvs object by parsing + var_r = hp.parse_hgvs_variant(variant) + # map to the coding sequence + var_c = evm.r_to_c(var_r) # coding level variant + variant = str(var_c) + c_from_r = {'variant': variant, 'type': ':c.'} + return c_from_r + +""" +Maps transcript variant descriptions onto specified RefSeqGene reference sequences +Return an hgvs object containing the genomic sequence variant relative to the RefSeqGene +acession +refseq_ac = RefSeqGene ac +""" + + +def refseq(variant, vm, refseq_ac, hp, evm, hdp, primary_assembly): + vr = hgvs.validator.Validator(hdp) + # parse the variant into hgvs object + var_c = hp.parse_hgvs_variant(variant) + # map to the genomic co-ordinates using the easy variant mapper set to alt_aln_method = alt_aln_method + var_g = myevm_t_to_g(var_c, evm, hdp, primary_assembly) + # Get overlapping transcripts - forcing a splign alignment + start_i = var_g.posedit.pos.start.base + end_i = var_g.posedit.pos.end.base + alt_ac = var_g.ac + alt_aln_method = 'splign' + transcripts = hdp.get_tx_for_region(alt_ac, alt_aln_method, start_i - 1, end_i) + # Take the first transcript + for trans in transcripts: + tx_ac = trans[0] + try: + ref_c = vm.g_to_t(var_g, tx_ac, alt_aln_method='splign') + except: + continue + else: + # map the variant co-ordinates to the refseq Gene accession using vm + ref_g_dict = { + 'ref_g': '', + 'error': 'false' + } + try: + ref_g_dict['ref_g'] = vm.t_to_g(ref_c, alt_ac=refseq_ac, alt_aln_method='splign') + except: + e = sys.exc_info()[0] + ref_g_dict['error'] = e + try: + vr.validate(ref_g_dict['ref_g']) + except: + e = sys.exc_info()[0] + ref_g_dict['error'] = e + if ref_g_dict['error'] == 'false': + return ref_g_dict + else: + continue + # Return as an error if all fail + return ref_g_dict + + +""" +Parses genomic variant strings into hgvs objects +Maps genomic hgvs object into a coding hgvs object if the c accession string is provided +returns a c. variant description string +""" + + +def g_to_c(var_g, tx_ac, hp, evm): + pat_g = re.compile("\:g\.") # Pattern looks for :g. + # If the :g. pattern is present in the input variant + if pat_g.search(var_g): + # convert the input string into a hgvs object by parsing + var_g = hp.parse_hgvs_variant(var_g) + # Map to coding variant + var_c = str(evm.g_to_c(var_g, tx_ac)) + return var_c + + +""" +Parses genomic variant strings into hgvs objects +Maps genomic hgvs object into a non-coding hgvs object if the n accession string is provided +returns a n. variant description string +""" + + +def g_to_n(var_g, tx_ac, hp, evm): + pat_g = re.compile("\:g\.") # Pattern looks for :g. + # If the :g. pattern is present in the input variant + if pat_g.search(var_g): + # convert the input string into a hgvs object by parsing + var_g = hp.parse_hgvs_variant(var_g) + # Map to coding variant + var_n = str(evm.g_to_n(var_g, tx_ac)) + return var_n + + +""" +Ensures variant strings are transcript c. or n. +returns parsed hgvs c. or n. object +""" + + +def coding(variant, hp): + # If the :c. pattern is present in the input variant + if re.search(':c.', variant) or re.search(':n.', variant): + # convert the input string into a hgvs object + var_c = hp.parse_hgvs_variant(variant) + return var_c + + +""" +Mapping transcript to genomic position +Ensures variant strings are transcript c. or n. +returns parsed hgvs g. object +""" + + +def genomic(variant, evm, hp, hdp, primary_assembly): + # Set regular expressions for if statements + pat_g = re.compile("\:g\.") # Pattern looks for :g. + pat_n = re.compile("\:n\.") + pat_c = re.compile("\:c\.") # Pattern looks for :c. + + # If the :c. pattern is present in the input variant + if pat_c.search(variant) or pat_n.search(variant): + error = 'false' + hgvs_var = hp.parse_hgvs_variant(variant) + try: + var_g = myevm_t_to_g(hgvs_var, evm, hdp, primary_assembly) # genomic level variant + except hgvs.exceptions.HGVSError as e: + error = e + if error != 'false': + var_g = 'error ' + str(e) + return var_g + + # If the :g. pattern is present in the input variant + elif (pat_g.search(variant)): # or (pat_n.search(variant)): + # convert the input string into a hgvs object + var_g = hp.parse_hgvs_variant(variant) + return var_g + + +""" +Mapping transcript to protein prediction +Ensures variant strings are transcript c. +returns parsed hgvs p. object +""" + + + + +""" +Function which takes a NORMALIZED hgvs Python transcript variant and maps to a specified protein reference sequence. A protein +level hgvs python object is returned. + +Note the function currently assumes that the transcript description is correctly normalized having come from the +previous g_to_t function +""" + + + + + +""" +Ensures variant strings are g. +returns parsed hgvs g. object +""" + + +def hgvs_genomic(variant, hp): + # Set regular expressions for if statements + pat_g = re.compile("\:g\.") # Pattern looks for :g. Note (gene) has been removed + # If the :g. pattern is present in the input variant + if pat_g.search(variant): + # convert the input string into a hgvs object + var_g = hp.parse_hgvs_variant(variant) + return var_g + + +""" +Enhanced transcript to genome position mapping function using evm +Deals with mapping from transcript positions that do not exist in the genomic sequence +i.e. the stated position aligns to a genomic gap! +Trys to ensure that a genomic position is always returned even if the c. or n. transcript +will not map to the specified genome build primary assembly. +Deals with transcript mapping to several genomic assemblies +Order +Map to a single NC_ for the specified genome build primary assembly +Map to a single NC_ for an alternate genome build primary assembly +Map to an NT_ from the specified genome build +Map to an NT_ from an alternative genome build +Map to an NW_ from the specified genome build +Map to an NW_ from an alternative genome buildRequires parsed c. or n. object +returns parsed hgvs g. object +""" + + +def myevm_t_to_g(hgvs_c, evm, hdp, primary_assembly): + # create no_norm_evm + if primary_assembly == 'GRCh38': + no_norm_evm = no_norm_evm_38 + elif primary_assembly == 'GRCh37': + no_norm_evm = no_norm_evm_37 + + # store the input + stored_hgvs_c = copy.deepcopy(hgvs_c) + expand_out = 'false' + utilise_gap_code = True + + # Gap gene black list + try: + gene_symbol = dbControls.data.get_gene_symbol_from_transcriptID(hgvs_c.ac) + except Exception: + utilise_gap_code = False + else: + # If the gene symbol is not in the list, the value False will be returned + utilise_gap_code = gap_genes.gap_black_list(gene_symbol) + # Warn gap code in use + logger.warning("gap_compensation_myevm = " + str(utilise_gap_code)) + + if utilise_gap_code is True and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type =='delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): + + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + hgvs_c = no_norm_evm.c_to_n(hgvs_c) + + # Check for intronic + try: + hn.normalize(hgvs_c) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('intronic variant', error): + pass + elif re.search('Length implied by coordinates must equal sequence deletion length', error) and re.match( + 'NR_', hgvs_c.ac): + hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 + + # Check again before continuing + if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search( + '\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): + pass + + else: + try: + # For non-intronic sequence + hgvs_t = copy.deepcopy(hgvs_c) + if hgvs_t.posedit.edit.type == 'inv': + inv_alt = revcomp(hgvs_t.posedit.edit.ref) + t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + hgvs_t_delins = hp.parse_hgvs_variant(t_delins) + pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base + inv_alt = pre_base + inv_alt + post_base + hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 + start = hgvs_t.posedit.pos.start.base + hgvs_t.posedit.pos.start.base = start + 1 + hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 + end = hgvs_t.posedit.pos.end.base + hgvs_t.posedit.pos.start.base = start + hgvs_t.posedit.pos.end.base = end + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + hgvs_t = hp.parse_hgvs_variant(hgvs_str) + elif hgvs_c.posedit.edit.type == 'dup': + pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + alt = pre_base + hgvs_t.posedit.edit.ref + hgvs_t.posedit.edit.ref + post_base + ref = pre_base + hgvs_t.posedit.edit.ref + post_base + dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str((hgvs_t.posedit.pos.start.base + len(ref)) -2) + 'del' + ref + 'ins' + alt + hgvs_t = hp.parse_hgvs_variant(dup_to_delins) + elif hgvs_c.posedit.edit.type == 'ins': + ins_ref = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.end.base+1) + ins_alt = ins_ref[:2] + hgvs_t.posedit.edit.alt + ins_ref[-2:] + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str(hgvs_t.posedit.pos.end.base +1 ) + 'del' + ins_ref + 'ins' + ins_alt + hgvs_t = hp.parse_hgvs_variant(ins_to_delins) + else: + if str(hgvs_t.posedit.edit.alt) == 'None': + hgvs_t.posedit.edit.alt = '' + pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base + hgvs_t.posedit.edit.alt = pre_base + hgvs_t.posedit.edit.alt + post_base + hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 + start = hgvs_t.posedit.pos.start.base + hgvs_t.posedit.pos.start.base = start + 1 + hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 + end = hgvs_t.posedit.pos.end.base + hgvs_t.posedit.pos.start.base = start + hgvs_t.posedit.pos.end.base = end + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str(hgvs_t.posedit.edit) + hgvs_t = hp.parse_hgvs_variant(hgvs_str) + hgvs_c = copy.deepcopy(hgvs_t) + + # Set expanded out test to true + expand_out = 'true' + + except Exception: + hgvs_c = hgvs_c + + if re.match('NM_', str(hgvs_c.ac)): + try: + hgvs_c = no_norm_evm.n_to_c(hgvs_c) + except hgvs.exceptions.HGVSError as e: + hgvs_c = copy.deepcopy(stored_hgvs_c) + + # Ensure the altered c. variant has not crossed intro exon boundaries + hgvs_check_boundaries = copy.deepcopy(hgvs_c) + try: + h_variant = hn.normalize(hgvs_check_boundaries) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + hgvs_c = copy.deepcopy(stored_hgvs_c) + # Catch identity at the exon/intron boundary by trying to normalize ref only + if hgvs_check_boundaries.posedit.edit.type == 'identity': + reform_ident = str(hgvs_c).split(':')[0] + reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str(hgvs_c.posedit.edit.ref)# + 'ins' + str(hgvs_c.posedit.edit.alt) + hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + try: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error) or re.search('Normalization of intronic variants', error): + hgvs_c = copy.deepcopy(stored_hgvs_c) + try: + hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) + hn.normalize(hgvs_genomic) # Check the validity of the mapping + # This will fail on multiple refs for NC_ + except hgvs.exceptions.HGVSError as e: + # Recover all available mapping options from UTA + mapping_options = hdp.get_tx_mapping_options(hgvs_c.ac) + + if mapping_options == []: + raise HGVSDataNotAvailableError("No alignment data between the specified transcript reference sequence and any GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are available.") + + # Capture errors from attempted mappings + attempted_mapping_error = '' + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NC_', option[1]): + chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) + if chr_num != 'false': + try: + hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print e + continue + + # If not mapped, raise error + try: + hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NC_', option[1]): + try: + hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + if re.search(option[1], attempted_mapping_error): + pass + else: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print e + continue + try: + hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NT_', option[1]): + try: + hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print e + continue + try: + hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NW_', option[1]): + try: + hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print e + continue + # Only a RefSeqGene available + try: + hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NG_', option[1]): + try: + hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print e + continue + # If not mapped, raise error + try: + hgvs_genomic + except Exception: + raise HGVSDataNotAvailableError(attempted_mapping_error) + + if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and hgvs_genomic.posedit.edit.alt == '' and expand_out != 'true': + hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref + if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: + try: + hgvs_genomic = hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'insertion length must be 1': + ref = sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + hgvs_genomic.posedit.edit.ref = ref + hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] + hgvs_genomic = hn.normalize(hgvs_genomic) + if error == 'base start position must be <= end position': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = hn.normalize(hgvs_genomic) + + # Statements required to reformat the stored_hgvs_c into a useable synonym + if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': + if stored_hgvs_c.type == 'c': + stored_hgvs_n = vm.c_to_n(stored_hgvs_c) + else: + stored_hgvs_n = stored_hgvs_c + stored_ref = sf.fetch_seq(str(stored_hgvs_n.ac),stored_hgvs_n.posedit.pos.start.base-1,stored_hgvs_n.posedit.pos.end.base) + stored_hgvs_c.posedit.edit.ref = stored_ref + + if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': + if hgvs_genomic.posedit.edit.type == 'ins': + stored_ref = sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] + hgvs_genomic.posedit.edit.ref = stored_ref + hgvs_genomic.posedit.edit.alt = stored_alt + + # First look for variants mapping to the flanks of gaps + # either in the gap or on the flank but not fully within the gap + if expand_out == 'true': + nr_genomic = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) + try: + hn.normalize(nr_genomic) + except hgvs.exceptions.HGVSInvalidVariantError as e: + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str(e) == 'base start position must be <= end position': + # Effectively, this code is designed to handle variants that are directly proximal to + # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases due to + # the deletion length being > the specified range. + + # Warn of variant location wrt the gap + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + logger.warning('Variant is proximal to the flank of a genomic gap') + genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + try: + hn.normalize(genomic_gap_variant) + except Exception: + pass + else: + genomic_gap_variant = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) + + if str(e) == 'base start position must be <= end position': + logger.warning('Variant is fully within a genomic gap') + genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + + # Logic + # We have checked that the variant does not cross boundaries, or is intronic + # So is likely mapping to a genomic gap + try: + hn.normalize(genomic_gap_variant) + except Exception as e: + if str(e) == 'base start position must be <= end position': + # This will only happen when the variant is fully within the gap + gap_start = genomic_gap_variant.posedit.pos.end.base + gap_end = genomic_gap_variant.posedit.pos.start.base + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + # This will only happen if the variant is flanking the gap but is + # not inside the gap + logger.warning('Variant is on the flank of a genomic gap but not within the gap') + gap_start = genomic_gap_variant.posedit.pos.start.base - 1 + gap_end = genomic_gap_variant.posedit.pos.end.base + 1 + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + genomic_gap_variant.posedit.edit.ref = '' + stored_hgvs_c = copy.deepcopy(hgvs_c) + + # Remove alt + try: + genomic_gap_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + pass + + # Should be a delins so will normalize statically and replace the reference bases + genomic_gap_variant = hn.normalize(genomic_gap_variant) + # Static map to c. and static normalize + transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) + stored_transcript_gap_variant = transcript_gap_variant + if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + transcript_gap_variant = hn.normalize(transcript_gap_variant) + + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) + transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) + else: + transcript_gap_n = transcript_gap_variant + transcript_gap_alt_n = stored_hgvs_c + + # Ensure an ALT exists + try: + if transcript_gap_alt_n.posedit.edit.alt is None: + transcript_gap_alt_n.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str(transcript_gap_n.posedit.pos.start.base) + '_' + str(transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) + transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str(transcript_gap_alt_n.posedit.pos.start.base) + '_' + str(transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(transcript_gap_n.posedit.edit.ref) + if transcript_gap_alt_n.posedit.edit.alt is not None: + alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = transcript_gap_n.posedit.pos.start.base + alt_start = transcript_gap_alt_n.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for int in range(transcript_gap_alt_n.posedit.pos.start.base, transcript_gap_alt_n.posedit.pos.end.base+1, 1): + if int == alt_start: + alt_base_dict[int] = str(''.join(alternate_bases)) + else: + alt_base_dict[int] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base+1, 1): + if int in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[int]) + else: + alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Update variant, map to genome using vm and normalize + transcript_gap_n.posedit.edit.alt = alternate_sequence + + try: + transcript_gap_variant = vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + + try: + hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = hn.normalize(hgvs_genomic) + except Exception as e: + if str(e) == "base start position must be <= end position": + # Expansion out is required to map back to the genomic position + pre_base = sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.start.base-2,transcript_gap_n.posedit.pos.start.base-1) + post_base = sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.end.base,transcript_gap_n.posedit.pos.end.base+1) + transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 + transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 + transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base + transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base + try: + transcript_gap_variant = vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = hn.normalize(hgvs_genomic) + + # Bypass the next bit of gap code + expand_out = 'false' + + else: + pass + # No map to the flank of a gap or within the gap + else: + pass + + + # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS + # Remove identity bases + if hgvs_c == stored_hgvs_c: + expand_out = 'false' + elif expand_out == 'false' or utilise_gap_code is False: + pass + # Correct expansion ref + 2 + elif expand_out == 'true' and ( + len(hgvs_genomic.posedit.edit.ref) == (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: + hgvs_genomic.posedit.pos.start.base = hgvs_genomic.posedit.pos.start.base + 1 + hgvs_genomic.posedit.pos.end.base = hgvs_genomic.posedit.pos.end.base - 1 + hgvs_genomic.posedit.edit.ref = hgvs_genomic.posedit.edit.ref[1:-1] + if hgvs_genomic.posedit.edit.alt is not None: + hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.alt[1:-1] + elif expand_out == 'true' and ( + len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: + if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: + gn = hn.normalize(hgvs_genomic) + pass + + # Likely if the start or end position aligns to a gap in the genomic sequence + # Logic + # We have checked that the variant does not cross boundaries, or is intronic + # So is likely mapping to a genomic gap + elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: + # Incorrect expansion, likely < ref + 2 + genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + try: + hn.normalize(genomic_gap_variant) + except Exception as e: + if str(e) == 'base start position must be <= end position': + gap_start = genomic_gap_variant.posedit.pos.end.base + gap_end = genomic_gap_variant.posedit.pos.start.base + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + # Remove alt + try: + genomic_gap_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + pass + # Should be a delins so will normalize statically and replace the reference bases + genomic_gap_variant = hn.normalize(genomic_gap_variant) + # Static map to c. and static normalize + transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) + stored_transcript_gap_variant = transcript_gap_variant + transcript_gap_variant = hn.normalize(transcript_gap_variant) + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) + transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) + else: + transcript_gap_n = transcript_gap_variant + transcript_gap_alt_n = stored_hgvs_c + + # Ensure an ALT exists + try: + if transcript_gap_alt_n.posedit.edit.alt is None: + transcript_gap_alt_n.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( + transcript_gap_n.posedit.pos.start.base) + '_' + str( + transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) + transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( + transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( + transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(transcript_gap_n.posedit.edit.ref) + if transcript_gap_alt_n.posedit.edit.alt is not None: + alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = transcript_gap_n.posedit.pos.start.base + alt_start = transcript_gap_alt_n.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for int in range(transcript_gap_alt_n.posedit.pos.start.base, + transcript_gap_alt_n.posedit.pos.end.base + 1, 1): + if int == alt_start: + alt_base_dict[int] = str(''.join(alternate_bases)) + else: + alt_base_dict[int] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): + if int in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[int]) + else: + alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Update variant, map to genome using vm and normalize + transcript_gap_n.posedit.edit.alt = alternate_sequence + + try: + transcript_gap_variant = vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + + try: + hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = hn.normalize(hgvs_genomic) + except Exception as e: + if str(e) == "base start position must be <= end position": + # Expansion out is required to map back to the genomic position + pre_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, + transcript_gap_n.posedit.pos.start.base - 1) + post_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, + transcript_gap_n.posedit.pos.end.base + 1) + transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 + transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 + transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base + transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base + try: + transcript_gap_variant = vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = hn.normalize(hgvs_genomic) + + # Ins variants map badly - Especially between c. exon/exon boundary + if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: + try: + hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'insertion length must be 1': + if hgvs_c.type == 'c': + hgvs_t = vm.c_to_n(hgvs_c) + else: + hgvs_t = copy.copy(hgvs_c) + ins_ref = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-1,hgvs_t.posedit.pos.end.base) + ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt + hgvs_t = hp.parse_hgvs_variant(ins_to_delins) + try: + hgvs_c = vm.n_to_c(hgvs_t) + except Exception: + hgvs_c = copy.copy(hgvs_t) + try: + hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) + except Exception as e: + error = str(e) + logger.warning('Ins mapping error in myt_to_g ' + error) + + return hgvs_genomic + +""" +USE WITH MAPPER THAT DOES NOT REPLACE THE REFERENCE GENOMIC BASES AND DOED NOT NORMALIZE + +Enhanced transcript to genome position mapping function using evm +Trys to ensure that a genomic position is always returned even if the c. or n. transcript +will not map to the specified genome build primary assembly. +Deals with transcript mapping to several genomic assemblies +Order +Map to a single NC_ (or ALT) for the specified genome build +returns parsed hgvs g. object +""" + + +def noreplace_myevm_t_to_g(hgvs_c, evm, hdp, primary_assembly): + try: + hgvs_genomic = evm.t_to_g(hgvs_c) + hn.normalize(hgvs_genomic) + # This will fail on multiple refs for NC_ + except hgvs.exceptions.HGVSError as e: + # Recover all available mapping options from UTA + mapping_options = hdp.get_tx_mapping_options(hgvs_c.ac) + if mapping_options == []: + raise HGVSDataNotAvailableError("no g. mapping options available") + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NC_', option[1]): + chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) + if chr_num != 'false': + try: + hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + continue + try: + hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NC_', option[1]): + chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) + try: + hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + continue + + try: + hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NT_', option[1]): + chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) + if chr_num != 'false': + try: + hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + continue + try: + hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NW_', option[1]): + chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), + primary_assembly) + if chr_num != 'false': + try: + hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + continue + + # Only a RefSeqGene available + try: + hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NG_', option[1]): + try: + hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) + break + except: + continue + try: + hgvs_genomic + except Exception: + + raise HGVSDataNotAvailableError('No available t_to_g liftover') + + # Ins variants map badly - Especially between c. exon/exon boundary + if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: + try: + hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'insertion length must be 1': + if hgvs_c.type == 'c': + hgvs_t = vm.c_to_n(hgvs_c) + else: + hgvs_t = copy.copy(hgvs_c) + ins_ref = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-1,hgvs_t.posedit.pos.end.base) + ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt + hgvs_t = hp.parse_hgvs_variant(ins_to_delins) + try: + hgvs_c = vm.n_to_c(hgvs_t) + except Exception: + hgvs_c = copy.copy(hgvs_t) + try: + hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) + except Exception as e: + error = str(e) + logger.warning('Ins mapping error in myt_to_g ' + error) + + return hgvs_genomic + + +""" +Enhanced transcript to genome position on a specified genomic reference using vm +Deals with mapping from transcript positions that do not exist in the genomic sequence +i.e. the stated position aligns to a genomic gap! +returns parsed hgvs g. object +""" + + +def myvm_t_to_g(hgvs_c, alt_chr, vm, hn, hdp, primary_assembly): + # create no_norm_evm + if primary_assembly == 'GRCh38': + no_norm_evm = no_norm_evm_38 + elif primary_assembly == 'GRCh37': + no_norm_evm = no_norm_evm_37 + + # store the input + stored_hgvs_c = copy.deepcopy(hgvs_c) + expand_out = 'false' + utilise_gap_code = True + + # Gap gene black list + try: + gene_symbol = dbControls.data.get_gene_symbol_from_transcriptID(hgvs_c.ac) + except Exception: + utilise_gap_code = False + else: + # If the gene symbol is not in the list, the value False will be returned + utilise_gap_code = gap_genes.gap_black_list(gene_symbol) + # Warn gap code in use + logger.warning("gap_compensation_mvm = " + str(utilise_gap_code)) + + if utilise_gap_code is True and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type =='delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): + + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + hgvs_c = no_norm_evm.c_to_n(hgvs_c) + + # Check for intronic + try: + hn.normalize(hgvs_c) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('intronic variant', error): + pass + elif re.search('Length implied by coordinates must equal sequence deletion length', error) and re.match( + 'NR_', hgvs_c.ac): + hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 + + # Check again before continuing + if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search('\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): + pass + + else: + try: + # For non-intronic sequence + hgvs_t = copy.deepcopy(hgvs_c) + # handle inversions + if hgvs_t.posedit.edit.type == 'inv': + inv_alt = revcomp(hgvs_t.posedit.edit.ref) + t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + hgvs_t_delins = hp.parse_hgvs_variant(t_delins) + pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base + inv_alt = pre_base + inv_alt + post_base + hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 + start = hgvs_t.posedit.pos.start.base + hgvs_t.posedit.pos.start.base = start + 1 + hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 + end = hgvs_t.posedit.pos.end.base + hgvs_t.posedit.pos.start.base = start + hgvs_t.posedit.pos.end.base = end + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + hgvs_t = hp.parse_hgvs_variant(hgvs_str) + if hgvs_c.posedit.edit.type == 'dup': + # hgvs_t = reverse_normalize.normalize(hgvs_t) + pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + alt = pre_base + hgvs_t.posedit.edit.ref + hgvs_t.posedit.edit.ref + post_base + ref = pre_base + hgvs_t.posedit.edit.ref + post_base + dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str((hgvs_t.posedit.pos.start.base + len(ref)) -2) + 'del' + ref + 'ins' + alt + hgvs_t = hp.parse_hgvs_variant(dup_to_delins) + elif hgvs_c.posedit.edit.type == 'ins': + ins_ref = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.end.base+1) + ins_alt = ins_ref[:2] + hgvs_t.posedit.edit.alt + ins_ref[-2:] + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str(hgvs_t.posedit.pos.end.base +1 ) + 'del' + ins_ref + 'ins' + ins_alt + hgvs_t = hp.parse_hgvs_variant(ins_to_delins) + else: + if str(hgvs_t.posedit.edit.alt) == 'None': + hgvs_t.posedit.edit.alt = '' + pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base + hgvs_t.posedit.edit.alt = pre_base + hgvs_t.posedit.edit.alt + post_base + hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 + start = hgvs_t.posedit.pos.start.base + hgvs_t.posedit.pos.start.base = start + 1 + hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 + end = hgvs_t.posedit.pos.end.base + hgvs_t.posedit.pos.start.base = start + hgvs_t.posedit.pos.end.base = end + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str(hgvs_t.posedit.edit) + hgvs_t = hp.parse_hgvs_variant(hgvs_str) + hgvs_c = copy.deepcopy(hgvs_t) + + # Set expanded out test to true + expand_out = 'true' + + except Exception: + hgvs_c = hgvs_c + + if re.match('NM_', str(hgvs_c.ac)): + try: + hgvs_c = no_norm_evm.n_to_c(hgvs_c) + except hgvs.exceptions.HGVSError as e: + hgvs_c = copy.deepcopy(stored_hgvs_c) + + # Ensure the altered c. variant has not crossed intro exon boundaries + hgvs_check_boundaries = copy.deepcopy(hgvs_c) + try: + h_variant = hn.normalize(hgvs_check_boundaries) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + hgvs_c = copy.deepcopy(stored_hgvs_c) + # Catch identity at the exon/intron boundary by trying to normalize ref only + if hgvs_check_boundaries.posedit.edit.type == 'identity': + reform_ident = str(hgvs_c).split(':')[0] + reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str(hgvs_c.posedit.edit.ref)# + 'ins' + str(hgvs_c.posedit.edit.alt) + hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + try: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error) or re.search('Normalization of intronic variants', error): + hgvs_c = copy.deepcopy(stored_hgvs_c) + + hgvs_genomic = vm.t_to_g(hgvs_c, alt_chr) + if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and hgvs_genomic.posedit.edit.alt == '' and expand_out != 'true': + hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref + if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: + try: + hgvs_genomic = hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'insertion length must be 1': + ref = sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + hgvs_genomic.posedit.edit.ref = ref + hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] + hgvs_genomic = hn.normalize(hgvs_genomic) + if error == 'base start position must be <= end position': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = hn.normalize(hgvs_genomic) + + # Statements required to reformat the stored_hgvs_c into a useable synonym + if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': + if stored_hgvs_c.type == 'c': + stored_hgvs_n = vm.c_to_n(stored_hgvs_c) + else: + stored_hgvs_n = stored_hgvs_c + stored_ref = sf.fetch_seq(str(stored_hgvs_n.ac),stored_hgvs_n.posedit.pos.start.base-1,stored_hgvs_n.posedit.pos.end.base) + stored_hgvs_c.posedit.edit.ref = stored_ref + + if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': + if hgvs_genomic.posedit.edit.type == 'ins': + stored_ref = sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] + hgvs_genomic.posedit.edit.ref = stored_ref + hgvs_genomic.posedit.edit.alt = stored_alt + + # First look for variants mapping to the flanks of gaps + # either in the gap or on the flank but not fully within the gap + if expand_out == 'true': + nr_genomic = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) + try: + hn.normalize(nr_genomic) + except hgvs.exceptions.HGVSInvalidVariantError as e: + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str(e) == 'base start position must be <= end position': + # Effectively, this code is designed to handle variants that are directly proximal to + # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases due to + # the deletion length being > the specified range. + + # Warn of variant location wrt the gap + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + logger.warning('Variant is proximal to the flank of a genomic gap') + genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + try: + hn.normalize(genomic_gap_variant) + except Exception: + pass + else: + genomic_gap_variant = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) + + if str(e) == 'base start position must be <= end position': + logger.warning('Variant is fully within a genomic gap') + genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + + # Logic + # We have checked that the variant does not cross boundaries, or is intronic + # So is likely mapping to a genomic gap + try: + hn.normalize(genomic_gap_variant) + except Exception as e: + if str(e) == 'base start position must be <= end position': + # This will only happen when the variant is fully within the gap + gap_start = genomic_gap_variant.posedit.pos.end.base + gap_end = genomic_gap_variant.posedit.pos.start.base + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + # This will only happen if the variant is flanking the gap but is + # not inside the gap + logger.warning('Variant is on the flank of a genomic gap but not within the gap') + gap_start = genomic_gap_variant.posedit.pos.start.base - 1 + gap_end = genomic_gap_variant.posedit.pos.end.base + 1 + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + genomic_gap_variant.posedit.edit.ref = '' + stored_hgvs_c = copy.deepcopy(hgvs_c) + + # Remove alt + try: + genomic_gap_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + pass + + # Should be a delins so will normalize statically and replace the reference bases + genomic_gap_variant = hn.normalize(genomic_gap_variant) + # Static map to c. and static normalize + transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) + stored_transcript_gap_variant = transcript_gap_variant + if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + transcript_gap_variant = hn.normalize(transcript_gap_variant) + + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) + transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) + else: + transcript_gap_n = transcript_gap_variant + transcript_gap_alt_n = stored_hgvs_c + + # Ensure an ALT exists + try: + if transcript_gap_alt_n.posedit.edit.alt is None: + transcript_gap_alt_n.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str(transcript_gap_n.posedit.pos.start.base) + '_' + str(transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) + transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str(transcript_gap_alt_n.posedit.pos.start.base) + '_' + str(transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(transcript_gap_n.posedit.edit.ref) + if transcript_gap_alt_n.posedit.edit.alt is not None: + alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = transcript_gap_n.posedit.pos.start.base + alt_start = transcript_gap_alt_n.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for int in range(transcript_gap_alt_n.posedit.pos.start.base, transcript_gap_alt_n.posedit.pos.end.base+1, 1): + if int == alt_start: + alt_base_dict[int] = str(''.join(alternate_bases)) + else: + alt_base_dict[int] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base+1, 1): + if int in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[int]) + else: + alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Update variant, map to genome using vm and normalize + transcript_gap_n.posedit.edit.alt = alternate_sequence + + try: + transcript_gap_variant = vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + + try: + hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = hn.normalize(hgvs_genomic) + except Exception as e: + if str(e) == "base start position must be <= end position": + # Expansion out is required to map back to the genomic position + pre_base = sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.start.base-2,transcript_gap_n.posedit.pos.start.base-1) + post_base = sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.end.base,transcript_gap_n.posedit.pos.end.base+1) + transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 + transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 + transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base + transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base + try: + transcript_gap_variant = vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = hn.normalize(hgvs_genomic) + + # Bypass the next bit of gap code + expand_out = 'false' + + else: + pass + # No map to the flank of a gap or within the gap + else: + pass + + # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS + # Remove identity bases + if hgvs_c == stored_hgvs_c: + expand_out = 'false' + elif expand_out == 'false' or utilise_gap_code is False: + pass + # Correct expansion ref + 2 + elif expand_out == 'true' and ( + len(hgvs_genomic.posedit.edit.ref) == (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: + hgvs_genomic.posedit.pos.start.base = hgvs_genomic.posedit.pos.start.base + 1 + hgvs_genomic.posedit.pos.end.base = hgvs_genomic.posedit.pos.end.base - 1 + hgvs_genomic.posedit.edit.ref = hgvs_genomic.posedit.edit.ref[1:-1] + if hgvs_genomic.posedit.edit.alt is not None: + hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.alt[1:-1] + elif expand_out == 'true' and ( + len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: + if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: + gn = hn.normalize(hgvs_genomic) + pass + + # Likely if the start or end position aligns to a gap in the genomic sequence + # Logic + # We have checked that the variant does not cross boundaries, or is intronic + # So is likely mapping to a genomic gap + elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: + # Incorrect expansion, likely < ref + 2 + genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + try: + hn.normalize(genomic_gap_variant) + except Exception as e: + if str(e) == 'base start position must be <= end position': + gap_start = genomic_gap_variant.posedit.pos.end.base + gap_end = genomic_gap_variant.posedit.pos.start.base + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + # Remove alt + try: + genomic_gap_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + pass + # Should be a delins so will normalize statically and replace the reference bases + genomic_gap_variant = hn.normalize(genomic_gap_variant) + # Static map to c. and static normalize + transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) + stored_transcript_gap_variant = transcript_gap_variant + transcript_gap_variant = hn.normalize(transcript_gap_variant) + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) + transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) + else: + transcript_gap_n = transcript_gap_variant + transcript_gap_alt_n = stored_hgvs_c + + # Ensure an ALT exists + try: + if transcript_gap_alt_n.posedit.edit.alt is None: + transcript_gap_alt_n.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( + transcript_gap_n.posedit.pos.start.base) + '_' + str( + transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) + transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( + transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( + transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(transcript_gap_n.posedit.edit.ref) + if transcript_gap_alt_n.posedit.edit.alt is not None: + alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = transcript_gap_n.posedit.pos.start.base + alt_start = transcript_gap_alt_n.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for int in range(transcript_gap_alt_n.posedit.pos.start.base, + transcript_gap_alt_n.posedit.pos.end.base + 1, 1): + if int == alt_start: + alt_base_dict[int] = str(''.join(alternate_bases)) + else: + alt_base_dict[int] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): + if int in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[int]) + else: + alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Update variant, map to genome using vm and normalize + transcript_gap_n.posedit.edit.alt = alternate_sequence + + try: + transcript_gap_variant = vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + + try: + hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = hn.normalize(hgvs_genomic) + except Exception as e: + if str(e) == "base start position must be <= end position": + # Expansion out is required to map back to the genomic position + pre_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, + transcript_gap_n.posedit.pos.start.base - 1) + post_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, + transcript_gap_n.posedit.pos.end.base + 1) + transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 + transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 + transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base + transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base + try: + transcript_gap_variant = vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = hn.normalize(hgvs_genomic) + + # Ins variants map badly - Especially between c. exon/exon boundary + if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: + try: + hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'insertion length must be 1': + if hgvs_c.type == 'c': + hgvs_t = vm.c_to_n(hgvs_c) + else: + hgvs_t = copy.copy(hgvs_c) + ins_ref = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-1,hgvs_t.posedit.pos.end.base) + ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt + hgvs_t = hp.parse_hgvs_variant(ins_to_delins) + try: + hgvs_c = vm.n_to_c(hgvs_t) + except Exception: + hgvs_c = copy.copy(hgvs_t) + try: + hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) + except Exception as e: + error = str(e) + logger.warning('Ins mapping error in myt_to_g ' + error) + + return hgvs_genomic + + +""" +Simple hgvs g. to c. or n. mapping +returns parsed hgvs c. or n. object +""" + + +def myevm_g_to_t(hdp, evm, hgvs_genomic, alt_ac): + hgvs_t = evm.g_to_t(hgvs_genomic, alt_ac) + return hgvs_t + + +""" +parse p. strings into hgvs p. objects +""" + + +def hgvs_protein(variant, hp): + # Set regular expressions for if statements + pat_p = re.compile("\:p\.") # Pattern looks for :g. Note (gene) has been removed + # If the :p. pattern is present in the input variant + if pat_p.search(variant): + # convert the input string into a hgvs object + var_p = hp.parse_hgvs_variant(variant) + return var_p + + +""" +Convert r. into c. +""" + + +def hgvs_r_to_c(hgvs_object): + # check for LRG_t with r. + if re.match('LRG', hgvs_object.ac): + transcript_ac = dbControls.data.get_RefSeqTranscriptID_from_lrgTranscriptID(hgvs_object.ac) + if transcript_ac == 'none': + raise HGVSDataNotAvailableError('Unable to identify a relevant transcript for ' + hgvs_object.ac) + else: + hgvs_object.ac = transcript_ac + hgvs_object.type = 'c' + edit = str(hgvs_object.posedit.edit) + edit = edit.upper() + # lowercase the supported variant types + edit = edit.replace('DEL', 'del') + edit = edit.replace('INS', 'ins') + edit = edit.replace('INV', 'inv') + edit = edit.replace('DUP', 'dup') + # edit = edit.replace('CON', 'con') + # edit = edit.replace('TRA', 'tra') + edit = edit.replace('U', 'T') + hgvs_object.posedit.edit = edit + return hgvs_object + + +""" +Convert c. into r. +""" + + +def hgvs_c_to_r(hgvs_object): + hgvs_object.type = 'r' + edit = str(hgvs_object.posedit.edit) + edit = edit.lower() + edit = edit.replace('t', 'u') + hgvs_object.posedit.edit = edit + return hgvs_object + + +""" +Input c. r. n. variant string +Use uta.py (hdp) to return the identity information for the transcript variant +see hgvs.dataproviders.uta.py for details +""" + + +def tx_identity_info(variant, hdp): + # Set regular expressions for if statements + pat_c = re.compile("\:c\.") # Pattern looks for :c. Note (gene) has been removed + pat_n = re.compile("\:n\.") # Pattern looks for :c. Note (gene) has been removed + pat_r = re.compile("\:r\.") # Pattern looks for :c. Note (gene) has been removed + + # If the :c. pattern is present in the input variant + if pat_c.search(variant): + # Remove all text to the right and including pat_c + tx_ac = variant[:variant.index(':c.') + len(':c.')] + tx_ac = pat_c.sub('', tx_ac) + # Interface with the UTA database via get_tx_identity in uta.py + tx_id_info = hdp.get_tx_identity_info(tx_ac) + # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + return tx_id_info + + # If the :n. pattern is present in the input variant + if pat_n.search(variant): + # Remove all text to the right and including pat_c + tx_ac = variant[:variant.index(':n.') + len(':n.')] + tx_ac = pat_n.sub('', tx_ac) + # Interface with the UTA database via get_tx_identity in uta.py + tx_id_info = hdp.get_tx_identity_info(tx_ac) + # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + return tx_id_info + + # If the :r. pattern is present in the input variant + if pat_r.search(variant): + # Remove all text to the right and including pat_c + tx_ac = variant[:variant.index(':r.') + len(':r.')] + tx_ac = pat_r.sub('', tx_ac) + # Interface with the UTA database via get_tx_identity in uta.py + tx_id_info = hdp.get_tx_identity_info(tx_ac) + # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + return tx_id_info + + +""" +Input c. r. nd accession string +Use uta.py (hdp) to return the identity information for the transcript variant +see hgvs.dataproviders.uta.py for details +""" + + +def tx_id_info(alt_ac, hdp): + tx_id_info = hdp.get_tx_identity_info(alt_ac) + # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + return tx_id_info + + +""" +Use uta.py (hdp) to return the transcript information for a specified gene (HGNC SYMBOL) +see hgvs.dataproviders.uta.py for details +""" + + +def tx_for_gene(hgnc, hdp): + # Interface with the UTA database via get_tx_for_gene in uta.py + tx_for_gene = hdp.get_tx_for_gene(hgnc) + return tx_for_gene + + +""" +Extract RefSeqGene Accession from transcript information +see hgvs.dataproviders.uta.py for details +""" + + +def ng_extract(tx_for_gene): + # Set regular expressions for if statements + pat_NG = re.compile("^NG_") # Pattern looks for NG_ at beginning of a string + # For each list in the list of lists tx_for_gene + for list in tx_for_gene: + # If the pattern NG_ is found in element 4 + if pat_NG.search(list[4]): + # The gene accession is set to list element 4 + gene_ac = list[4] + return gene_ac + +""" +Returns exon information for a given transcript +e.g. how the exons align to the genomic reference +see hgvs.dataproviders.uta.py for details +""" + + +def tx_exons(tx_ac, alt_ac, alt_aln_method, hdp): + # Interface with the UTA database via get_tx_exons in uta.py + try: + tx_exons = hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) + except hgvs.exceptions.HGVSError as e: + e + tx_exons = 'hgvs Exception: ' + str(e) + return tx_exons + try: + completion = tx_exons[0]['alt_strand'] + except TypeError: + tx_exons = 'error' + return tx_exons + # If on the reverse strand, reverse the order of elements + if tx_exons[0]['alt_strand'] == -1: + tx_exons = tx_exons[::-1] + return tx_exons + else: + return tx_exons + + +""" +Automatically maps genomic positions onto all overlapping transcripts +""" + + +def relevant_transcripts(hgvs_genomic, evm, hdp, alt_aln_method): + # Pass relevant transcripts for the input variant to rts + # Note, the evm method misses one end, the hdp. method misses the other. Combine both + rts_list = hdp.get_tx_for_region(hgvs_genomic.ac, alt_aln_method, hgvs_genomic.posedit.pos.start.base-1, hgvs_genomic.posedit.pos.end.base-1) + rts_dict = {} + for tx_dat in rts_list: + rts_dict[tx_dat[0]] = True + rts_list_2 = evm.relevant_transcripts(hgvs_genomic) + for tx_dat_2 in rts_list_2: + rts_dict[tx_dat_2] = True + rts = rts_dict.keys() + + # Project genomic variants to new transcripts + # and populate a code_var list + ############################################# + # Open a list to store relevant transcripts + code_var = [] + # Populate transcripts - The keys become the list elements from rel_trs + for x in rts: + y = x.rstrip() # Chomp any whitespace from the right of x ($_) - Assign to y + # Easy variant mapper used to map the input variant to the relevant transcripts + # Check for coding transcripts + try: + variant = evm.g_to_t(hgvs_genomic, y) + except hgvs.exceptions.HGVSError as e: + # Check for non-coding transcripts + try: + variant = evm.g_to_t(hgvs_genomic, y) + except hgvs.exceptions.HGVSError as e: + continue + except: + continue + + # Corrective Normalisation of intronic descriptions in the antisense oriemtation + pl = re.compile('\+') + mi = re.compile('\-') + ast = re.compile('\*') + if pl.search(str(variant)) or mi.search(str(variant)) or ast.search(str(variant)): + tx_ac = variant.ac + alt_ac = hgvs_genomic.ac + + # Interface with the UTA database via get_tx_exons in uta.py + try: + tx_exons = hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) + except hgvs.exceptions.HGVSError as e: + e + tx_exons = 'hgvs Exception: ' + str(e) + return tx_exons + try: + completion = tx_exons[0]['alt_strand'] + except TypeError: + tx_exons = 'error' + return tx_exons + # If on the reverse strand, reverse the order of elements + if tx_exons[0]['alt_strand'] == -1: + tx_exons = tx_exons[::-1] + else: + pass + + # Gene orientation + if tx_exons[0]['alt_strand'] == -1: + antisense = 'true' + else: + antisense = 'false' + + # Pass if antisense = 'false' + if antisense == 'false': + pass + else: + # Reverse normalize hgvs_genomic + rev_hgvs_genomic = reverse_hn.normalize(hgvs_genomic) + # map back to coding + variant = evm.g_to_t(rev_hgvs_genomic, tx_ac) + code_var.append(str(variant)) + return code_var + + +""" +Take HGVS string, parse into hgvs object and validate +""" + + +def validate(input, hp, vr): + hgvs_input = hp.parse_hgvs_variant(input) + g = re.compile(":g.") + p = re.compile(":p.") + if p.search(input): + if hasattr(hgvs_input.posedit.pos.start, 'offset'): + pass + else: + hgvs_input.posedit.pos.start.offset = 0 + if hasattr(hgvs_input.posedit.pos.end, 'offset'): + pass + else: + hgvs_input.posedit.pos.end.offset = 0 + if hasattr(hgvs_input.posedit.pos.start, 'datum'): + pass + else: + hgvs_input.posedit.pos.start.datum = 0 + if hasattr(hgvs_input.posedit.pos.end, 'datum'): + pass + else: + hgvs_input.posedit.pos.end.datum = 0 + if hasattr(hgvs_input.posedit.edit, 'ref_n'): + pass + else: + hgvs_input.posedit.edit.ref_n = hgvs_input.posedit.pos.end.base - hgvs_input.posedit.pos.start.base + 1 + + try: + vr.validate(hgvs_input) + except hgvs.exceptions.HGVSError as e: + + error = e + return error + + else: + error = 'false' + return error + +""" +Search HGNC rest +""" + + +def hgnc_rest(path): + data = { + 'record': '', + 'error': 'false' + } + # HGNC server + headers = { + 'Accept': 'application/json', + } + uri = 'http://rest.genenames.org' + target = urlparse(uri + path) + method = 'GET' + body = '' + h = http.Http() + # collect the response + response, content = h.request( + target.geturl(), + method, + body, + headers) + if response['status'] == '200': + # assume that content is a json reply + # parse content with the json module + data['record'] = json.loads(content) + else: + data['error'] = "Unable to contact the HGNC database: Please try again later" + return data + + +""" +Search Entrez databases with efetch and SeqIO +""" + + +def entrez_efetch(db, id, rettype, retmode): + # IMPORT Bio modules + # from Bio import Entrez + Entrez.email = ENTREZ_ID + # from Bio import SeqIO + handle = Entrez.efetch(db=db, id=id, rettype=rettype, retmode=retmode) + # Get record + record = SeqIO.read(handle, "gb") + # Place into text + # text = handle.read() + handle.close() + return record + + +""" +search Entrez databases with efetch and read +""" + + +def entrez_read(db, id, retmode): + # IMPORT Bio modules + # from Bio import Entrez + Entrez.email = ENTREZ_ID + # from Bio import SeqIO + handle = Entrez.efetch(db=db, id=id, retmode=retmode) + # Get record + record = Entrez.read(handle) + # Place into text + # text = handle.read() + handle.close() + return record + + +""" +Simple reverse complement function for nucleotide sequences +""" + + +def revcomp(bases): + l2 = [] + l = list(bases) + element = 0 + for base in l: + element = element + 1 + if base == 'G': + l2.append('C') + if base == 'C': + l2.append('G') + if base == 'A': + l2.append('T') + if base == 'T': + l2.append('A') + revcomp = ''.join(l2) + revcomp = revcomp[::-1] + return revcomp + + +""" +Function designed to merge multiple HGVS variants (hgvs objects) into a single delins +using 3 prime normalization +""" + + +def merge_hgvs_3pr(hgvs_variant_list): + # Ensure c. is mapped to the + h_list = [] + + # Sanity check and format the submitted variants + for hgvs_v in hgvs_variant_list: + # For testing include parser + try: + hgvs_v = hp.parse_hgvs_variant(hgvs_v) + except Exception as e: + print e + pass + + # Validate + vr.validate(hgvs_v) # Let hgvs errors deal with invalid variants and not hgvs objects + if hgvs_v.type == 'c': + try: + hgvs_v = vm.c_to_n(hgvs_v) + h_list.append(hgvs_v) + except: + raise mergeHGVSerror("Unable to map from c. position to absolute position") + elif hgvs_v.type == 'g': + h_list.append(hgvs_v) + if h_list != []: + hgvs_variant_list = copy.deepcopy(h_list) + + # Define accession and start/end positions + accession = None + merge_start_pos = None + merge_end_pos = None + type = None + full_list = [] + + # Loop through the submitted variants and gather the required info + for hgvs_v in hgvs_variant_list: + # No intronic positions + try: + if hgvs_v.posedit.pos.start.offset != 0: + raise mergeHGVSerror("Base-offset position submitted") + if hgvs_v.posedit.pos.end.offset != 0: + raise mergeHGVSerror("Base-offset position submitted") + except AttributeError: + pass + + # Normalize the variant (allow cross intron) which also adds the reference sequence (?) + hgvs_v = hn.normalize(hgvs_v) + + # Set the accession and ensure that multiple reference sequences have not been queried + if accession is None: + accession = hgvs_v.ac + type = hgvs_v.type + else: + if hgvs_v.ac != accession: + raise mergeHGVSerror("More than one reference sequence submitted") + else: + pass + + # Set initial start and end positions + if merge_start_pos is None: + merge_start_pos = hgvs_v.posedit.pos.start.base + merge_end_pos = hgvs_v.posedit.pos.end.base + # Append to the final list of variants + full_list.append(hgvs_v) + continue + # Ensure variants are in the correct order and not overlapping + else: + # ! hgvs_v.posedit.pos.start.base !> + if hgvs_v.posedit.pos.start.base <= merge_end_pos: + raise mergeHGVSerror("Submitted variants are out of order or their ranges overlap") + else: + # Create a fake variant to handle the missing sequence + ins_seq = sf.fetch_seq(hgvs_v.ac, merge_end_pos, hgvs_v.posedit.pos.start.base - 1) + gapping = hgvs_v.ac + ':' + hgvs_v.type + '.' + str(merge_end_pos + 1) + '_' + str( + hgvs_v.posedit.pos.start.base - 1) + 'delins' + ins_seq + hgvs_gapping = hp.parse_hgvs_variant(gapping) + full_list.append(hgvs_gapping) + # update end_pos + merge_end_pos = hgvs_v.posedit.pos.end.base + # Append to the final list of variants + full_list.append(hgvs_v) + + # Generate the alt sequence + alt_sequence = '' + for hgvs_v in full_list: + ref_alt = hgvs2vcf.hgvs_ref_alt(hgvs_v) + alt_sequence = alt_sequence + ref_alt['alt'] + + # Fetch the reference sequence and copy it for the basis of the alt sequence + reference_sequence = sf.fetch_seq(accession, merge_start_pos - 1, merge_end_pos) + # Generate an hgvs_delins + if alt_sequence == '': + delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + merge_end_pos) + 'del' + reference_sequence + else: + delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + merge_end_pos) + 'del' + reference_sequence + 'ins' + alt_sequence + hgvs_delins = hp.parse_hgvs_variant(delins) + try: + hgvs_delins = vm.n_to_c(hgvs_delins) + except: + pass + # Normalize (allow variants crossing into different exons) + try: + hgvs_delins = hn.normalize(hgvs_delins) + except HGVSUnsupportedOperationError: + pass + return hgvs_delins + + +""" +Function designed to merge multiple HGVS variants (hgvs objects) into a single delins +using 5 prime normalization +""" + + +def merge_hgvs_5pr(hgvs_variant_list): + # Ensure c. is mapped to the + h_list = [] + + # Sanity check and format the submitted variants + for hgvs_v in hgvs_variant_list: + # For testing include parser + try: + hgvs_v = hp.parse_hgvs_variant(hgvs_v) + except: + pass + + # Validate + vr.validate(hgvs_v) # Let hgvs errors deal with invalid variants and not hgvs objects + if hgvs_v.type == 'c': + try: + hgvs_v = vm.c_to_n(hgvs_v) + h_list.append(hgvs_v) + except: + raise mergeHGVSerror("Unable to map from c. position to absolute position") + if h_list != []: + hgvs_variant_list = copy.deepcopy(h_list) + + # Define accession and start/end positions + accession = None + merge_start_pos = None + merge_end_pos = None + type = None + full_list = [] + + # Loop through the submitted variants and gather the required info + for hgvs_v in hgvs_variant_list: + try: + # No intronic positions + if hgvs_v.posedit.pos.start.offset != 0: + raise mergeHGVSerror("Base-offset position submitted") + if hgvs_v.posedit.pos.end.offset != 0: + raise mergeHGVSerror("Base-offset position submitted") + except AttributeError: + pass + + # Normalize the variant (allow cross intron) which also adds the reference sequence (?) + hgvs_v = reverse_hn.normalize(hgvs_v) + + # Set the accession and ensure that multiple reference sequences have not been queried + if accession is None: + accession = hgvs_v.ac + type = hgvs_v.type + else: + if hgvs_v.ac != accession: + raise mergeHGVSerror("More than one reference sequence submitted") + else: + pass + + # Set initial start and end positions + if merge_start_pos is None: + merge_start_pos = hgvs_v.posedit.pos.start.base + merge_end_pos = hgvs_v.posedit.pos.end.base + # Append to the final list of variants + full_list.append(hgvs_v) + continue + # Ensure variants are in the correct order and not overlapping + else: + # ! hgvs_v.posedit.pos.start.base !> + if hgvs_v.posedit.pos.start.base <= merge_end_pos: + raise mergeHGVSerror("Submitted variants are out of order or their ranges overlap") + else: + # Create a fake variant to handle the missing sequence + ins_seq = sf.fetch_seq(hgvs_v.ac, merge_end_pos, hgvs_v.posedit.pos.start.base - 1) + gapping = hgvs_v.ac + ':' + hgvs_v.type + '.' + str(merge_end_pos + 1) + '_' + str( + hgvs_v.posedit.pos.start.base - 1) + 'delins' + ins_seq + hgvs_gapping = hp.parse_hgvs_variant(gapping) + full_list.append(hgvs_gapping) + # update end_pos + merge_end_pos = hgvs_v.posedit.pos.end.base + # Append to the final list of variants + full_list.append(hgvs_v) + + # Generate the alt sequence + alt_sequence = '' + for hgvs_v in full_list: + ref_alt = hgvs2vcf.hgvs_ref_alt(hgvs_v) + alt_sequence = alt_sequence + ref_alt['alt'] + + # Fetch the reference sequence and copy it for the basis of the alt sequence + reference_sequence = sf.fetch_seq(accession, merge_start_pos - 1, merge_end_pos) + + # Generate an hgvs_delins + if alt_sequence == '': + delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + merge_end_pos) + 'del' + reference_sequence + else: + delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + merge_end_pos) + 'del' + reference_sequence + 'ins' + alt_sequence + hgvs_delins = hp.parse_hgvs_variant(delins) + try: + hgvs_delins = vm.n_to_c(hgvs_delins) + except: + pass + # Normalize (allow variants crossing into different exons) + try: + hgvs_delins = reverse_hn.normalize(hgvs_delins) + except HGVSUnsupportedOperationError: + pass + return hgvs_delins + + +""" +Function designed to merge multiple pseudo VCF variants (strings) into a single HGVS delins +using 5 prime normalization then return a 3 prime normalized final HGVS object +""" + + +def merge_pseudo_vcf(vcf_list, genome_build): + hgvs_list = [] + # Convert pseudo_vcf list into a HGVS list + for call in vcf_list: + hgvs = pseudo_vcf2hgvs.pvcf_to_hgvs(call, genome_build, normalization_direction=5) + hgvs_list.append(hgvs) + # Merge + hgvs_delins = merge_hgvs_5pr(hgvs_list) + # normalize 3 prime + hgvs_delins = hn.normalize(hgvs_delins) + # return + return hgvs_delins + + +""" +HGVS allele handling function which takes a single HGVS allele description and +separates each allele into a list of HGVS variants +""" + + +def hgvs_alleles(variant_description): + try: + # Split up the description + accession, remainder = variant_description.split(':') + # Branch + if re.search('[gcn]\.\d+\[', remainder): + # NM_004006.2:c.2376[G>C];[(G>C)] + # if re.search('\(', remainder): + # raise alleleVariantError('Unsupported format ' + remainder) + # NM_004006.2:c.2376[G>C];[G>C] + type, remainder = remainder.split('.') + pos = re.match('\d+', remainder) + pos = pos.group(0) + remainder = remainder.replace(pos, '') + remainder = remainder[1:-1] + alleles = remainder.split('];[') + my_alleles = [] + for posedit in alleles: + if re.search('\(', posedit): + # NM_004006.2:c.2376[G>C];[(G>C)] + continue + posedit_list = [posedit] + current_allele = [] + for pe in posedit_list: + vrt = accession + ':' + type + '.' + str(pos) + pe + current_allele.append(vrt) + my_alleles.append(current_allele) + else: + type, remainder = remainder.split('.') + if re.search('\(;\)', remainder) and re.search('\];', remainder): + # NM_004006.2:c.[296T>G];[476T>C](;)1083A>C(;)1406del + pre_alleles = remainder.split('(;)') + pre_merges = [] + alleles = [] + for allele in pre_alleles: + if re.match('\[', allele): + pre_merges.append(allele) + else: + alleles.append(allele) + # Extract descriptions + my_alleles = [] + # First alleles + for posedits in alleles: + posedit_list = posedits.split(';') + current_allele = [] + for pe in posedit_list: + vrt = accession + ':' + type + '.' + pe + current_allele.append(vrt) + my_alleles.append(current_allele) + + # Then Merges + alleles = [] + remainder = ';'.join(pre_merges) + remainder = remainder[1:-1] # removes the first [ and the last ] + alleles = remainder.split('];[') + # now separate out the variants in each allele§ + for posedits in alleles: + posedit_list = posedits.split(';') + current_allele = [] + for pe in posedit_list: + vrt = accession + ':' + type + '.' + pe + current_allele.append(vrt) + my_alleles.append(current_allele) + # Now merge the alleles into a single variant + merged_alleles = [] + for each_allele in my_alleles: + if re.search('\?', str(each_allele)): + # NM_004006.2:c.[2376G>C];[?] + continue + merge = [] + allele = str(merge_hgvs_3pr(each_allele)) + merge.append(allele) + merged_alleles.append(merge) + my_alleles = merged_alleles + + elif re.search('\(;\)', remainder): + # If statement for uncertainties + # NM_004006.2:c.[296T>G;476C>T];[476C>T](;)1083A>C + if re.search('\[', remainder): + raise alleleVariantError('Unsupported format ' + type + '.' + remainder) + # NM_004006.2:c.2376G>C(;)3103del + # NM_000548.3:c.3623_3647del(;)3745_3756dup + alleles = remainder.split('(;)') + # now separate out the variants in each allele§ + my_alleles = [] + for posedits in alleles: + posedit_list = posedits.split(';') + current_allele = [] + for pe in posedit_list: + vrt = accession + ':' + type + '.' + pe + current_allele.append(vrt) + my_alleles.append(current_allele) + else: + # If statement for uncertainties + if re.search('\(', remainder): + raise alleleVariantError('Unsupported format ' + type + '.' + remainder) + # NM_004006.2:c.[2376G>C];[3103del] + # NM_004006.2:c.[2376G>C];[3103del] + # NM_004006.2:c.[296T>G;476C>T;1083A>C];[296T>G;1083A>C] + # NM_000548.3:c.[4358_4359del;4361_4372del] + remainder = remainder[1:-1] # removes the first [ and the last ] + alleles = remainder.split('];[') + # now separate out the variants in each allele§ + my_alleles = [] + for posedits in alleles: + posedit_list = posedits.split(';') + current_allele = [] + for pe in posedit_list: + vrt = accession + ':' + type + '.' + pe + current_allele.append(vrt) + my_alleles.append(current_allele) + # Now merge the alleles into a single variant + merged_alleles = [] + + for each_allele in my_alleles: + print each_allele + if re.search('\?', str(each_allele)): + # NM_004006.2:c.[2376G>C];[?] + continue + merge = [] + allele = str(merge_hgvs_3pr(each_allele)) + merge.append(allele) + merged_alleles.append(merge) + my_alleles = merged_alleles + + # Extract alleles into strings + allele_strings = [] + for alleles_l in my_alleles: + for allele in alleles_l: + allele_strings.append(allele) + my_alleles = allele_strings + + # return + return my_alleles + except Exception as e: + import traceback + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + raise alleleVariantError(str(e)) diff --git a/VariantValidator/modules/vvCore.py b/VariantValidator/modules/vvCore.py index f96bd3cd..0c9ce7da 100644 --- a/VariantValidator/modules/vvCore.py +++ b/VariantValidator/modules/vvCore.py @@ -38,9 +38,10 @@ #import variantanalyser from vvLogging import logger import hgvs +import vvHGVS #from variantanalyser import functions as va_func #from variantanalyser import dbControls as va_dbCrl -#from variantanalyser import hgvs2vcf as va_H2V +#from variantanalyser import hgvs2vcf as vvHGVS #from variantanalyser import batch as va_btch #from variantanalyser import g_to_g as va_g2g #from variantanalyser import supported_chromosome_builds as va_scb @@ -49,484 +50,436 @@ import vvFunctions as fn import vvDatabase -import vvChromasomes - -def validate(self, batch_variant, selected_assembly, select_transcripts, transcriptSet="refseq"): - logger.info(batch_variant + ' : ' + selected_assembly) - # Take start time - start_time = time.time() - - # Set pre defined variables - # SeqFetcher - sf = hgvs.dataproviders.seqfetcher.SeqFetcher() - - try: - # Validation - ############ - - # Create a dictionary of transcript ID : '' - if select_transcripts != 'all': - select_transcripts_list = select_transcripts.split('|') - select_transcripts_dict = {} - select_transcripts_dict_plus_version = {} - for id in select_transcripts_list: - id = id.strip() - if re.match('LRG', id): - id = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(id) - if id == 'none': - continue - select_transcripts_dict_plus_version[id] = '' - id = id.split('.')[0] - select_transcripts_dict[id] = '' - # Set up gene list dictionary - input_genes = {} - - # Remove genes if transcripts selected - # if select_transcripts != 'all': - - # split the batch queries into a list - batch_queries = batch_variant.split('|') - - # Turn each variant into a dictionary. The dictionary will be compiled during validation - batch_list = [] - for queries in batch_queries: - queries = queries.strip() - query = {'quibble': queries, 'id': queries, 'warnings': '', 'description': '', 'coding': '', 'coding_g': '', - 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': 'false', - 'order': 'false'} - batch_list.append(query) - - # Create List to carry batch data - batch_out = [] - - # Ensure batch_list is pulled into the function so that it can be appended to - batch_list = batch_list - - # Enter the validation loop - ########################### - # Allow order by input - ordering = 0 - - """ - Set a flag to mark the final output type - flag : warning - flag : error - flag : intragenic - flag : gene - """ - set_output_type_flag = 'warning' - logger.debug("Batch list length " + str(len(batch_list))) - for validation in batch_list: - # Start timing - logger.traceStart(validation) - # Re-set cautions and automaps - - if transcriptSet == "refseq": - alt_aln_method = 'splign' - elif transcriptSet == "ensembl": - alt_aln_method = 'genebuild' - logger.warning("Ensembl is currently not supported") - validation['warnings'] += ': ' + "Ensembl is currently not supported" - continue - else: - logger.warning( - "The transcript set variable " + transcriptSet + " is invalid, it needs to be 'refseq' or 'ensembl'") - validation[ - 'warnings'] += ': ' + "The transcript set variable " + transcriptSet + " is invalid, it needs to be 'refseq' or 'ensembl'" - continue - - # Create Normalizers - hn = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=3, - alt_aln_method=alt_aln_method - ) - reverse_normalizer = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=5, - alt_aln_method=alt_aln_method - ) - - # Blank cautions - caution = '' - automap = '' - - # This will be used to order the final output - if str(validation['order']) == 'false': - ordering = ordering + 1 - validation['order'] = ordering - else: - pass - # Bug catcher - try: - # Note, ID is not touched. It is always the input variant description. Quibble will be altered but id will not if type = g. - input = validation['quibble'] - logger.trace("Commenced validation of " + str(input), validation) - - # Test for rich text unicode characters +import vvChromosomes +from vvObjects import variantValidatorError + + +class mixin: + def validate(self, batch_variant, selected_assembly, select_transcripts, transcriptSet="refseq"): + logger.info(batch_variant + ' : ' + selected_assembly) + # Take start time + start_time = time.time() + + # Set pre defined variables + # SeqFetcher + sf = hgvs.dataproviders.seqfetcher.SeqFetcher() + primary_assembly=None + + try: + # Validation + ############ + + # Create a dictionary of transcript ID : '' + if select_transcripts != 'all': + select_transcripts_list = select_transcripts.split('|') + select_transcripts_dict = {} + select_transcripts_dict_plus_version = {} + for id in select_transcripts_list: + id = id.strip() + if re.match('LRG', id): + id = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(id) + if id == 'none': + continue + select_transcripts_dict_plus_version[id] = '' + id = id.split('.')[0] + select_transcripts_dict[id] = '' + # Set up gene list dictionary + input_genes = {} + + # Remove genes if transcripts selected + # if select_transcripts != 'all': + + # split the batch queries into a list + batch_queries = batch_variant.split('|') + + # Turn each variant into a dictionary. The dictionary will be compiled during validation + batch_list = [] + for queries in batch_queries: + queries = queries.strip() + query = {'quibble': queries, 'id': queries, 'warnings': '', 'description': '', 'coding': '', 'coding_g': '', + 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': 'false', + 'order': 'false'} + batch_list.append(query) + + # Create List to carry batch data + batch_out = [] + + # Ensure batch_list is pulled into the function so that it can be appended to + batch_list = batch_list + + # Enter the validation loop + ########################### + # Allow order by input + ordering = 0 + + """ + Set a flag to mark the final output type + flag : warning + flag : error + flag : intragenic + flag : gene + """ + set_output_type_flag = 'warning' + logger.debug("Batch list length " + str(len(batch_list))) + for validation in batch_list: + # Start timing + logger.traceStart(validation) + # Re-set cautions and automaps + + if transcriptSet == "refseq": + alt_aln_method = 'splign' + elif transcriptSet == "ensembl": + alt_aln_method = 'genebuild' + logger.warning("Ensembl is currently not supported") + validation['warnings'] += ': ' + "Ensembl is currently not supported" + continue + else: + logger.warning( + "The transcript set variable " + transcriptSet + " is invalid, it needs to be 'refseq' or 'ensembl'") + validation[ + 'warnings'] += ': ' + "The transcript set variable " + transcriptSet + " is invalid, it needs to be 'refseq' or 'ensembl'" + continue + + # Create Normalizers + hn = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=3, + alt_aln_method=alt_aln_method + ) + reverse_normalizer = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=5, + alt_aln_method=alt_aln_method + ) + + # Blank cautions + caution = '' + automap = '' + + # This will be used to order the final output + if str(validation['order']) == 'false': + ordering = ordering + 1 + validation['order'] = ordering + else: + pass + # Bug catcher try: - unicode_test = u"{}".format(input) - except UnicodeDecodeError as e: - # Format the trapped character into unicode for styled printing - my_unicode = e[1] - my_unicode = my_unicode.decode('utf-8') + # Note, ID is not touched. It is always the input variant description. Quibble will be altered but id will not if type = g. + input = validation['quibble'] + logger.trace("Commenced validation of " + str(input), validation) # Test for rich text unicode characters try: - str(my_unicode) - except UnicodeEncodeError as e: + unicode_test = u"{}".format(input) + except UnicodeDecodeError as e: # Format the trapped character into unicode for styled printing - unicoded_it = e[1] - unicoded_it_list = unicoded_it.split() - for try_me in unicoded_it_list: - try: - str(try_me) - except UnicodeEncodeError as e: - found_unicode = try_me - found_error = str(e) - found_at = found_unicode.encode('raw_unicode_escape') - break - # Extract character from the error - unicode = re.findall("u'\\\\\w+'", found_error) - character = unicode[0] - search_term = character.replace("u'", '') - search_term = search_term.replace("'", '') - found_at_decoded = found_at.decode('raw_unicode_escape') - found_at = found_at_decoded.encode('raw_unicode_escape') - string_char = str(character) - # Create a human readable U+ representation - human_code = re.sub("u'\\\\\w", 'U+', string_char) - human_code = human_code.replace("'", "") - format_human = u"{}".format(human_code) - format_human = format_human.upper() - found_at = re.sub(search_term, u'<' + format_human + u'>', found_at) - slasher = re.compile("\\\\") - found_at = re.sub(slasher, '', found_at) - validation['id'] = found_at - error = u'Submitted variant description contains an invalid character which is represented by Unicode character ' + format_human + u' at position ' + found_at + u': Please remove this character and re-submit: A useful search function for Unicode characters can be found at https://unicode-search.net/' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - else: - pass - else: - pass + my_unicode = e[1] + my_unicode = my_unicode.decode('utf-8') - # Remove whitespace - ws = copy.copy(input) - input = input.strip() - input = ''.join(input.split()) - if input != ws: - caution = 'Whitespace removed from variant description ' + str(ws) - validation['warnings'] = validation['warnings'] + ': ' + caution - logger.info(caution) - stash_input = copy.copy(input) - # Set the primary_assembly - if validation['primary_assembly'] == 'false': - if selected_assembly == 'hg19': - primary_assembly = 'GRCh37' - elif selected_assembly == 'hg38': - primary_assembly = 'GRCh38' - # Ensure genome build is correctly formatted - elif re.search('GRC', selected_assembly, re.IGNORECASE): - selected_assembly = selected_assembly.replace('g', 'G') - selected_assembly = selected_assembly.replace('r', 'R') - selected_assembly = selected_assembly.replace('c', 'C') - selected_assembly = selected_assembly.replace('H', 'h') - primary_assembly = selected_assembly - # Catch invalid genome build - valid_build = False - for genome_build in self.genome_builds: - if primary_assembly == genome_build: - valid_build = True - if valid_build is False: - primary_assembly = 'GRCh38' - validation['warnings'] = validation[ - 'warnings'] + ': Invalid genome build has been specified. Automap has selected the default build (GRCh38)' - logger.warning( - 'Invalid genome build has been specified. Automap has selected the default build ' + primary_assembly) + # Test for rich text unicode characters + try: + str(my_unicode) + except UnicodeEncodeError as e: + # Format the trapped character into unicode for styled printing + unicoded_it = e[1] + unicoded_it_list = unicoded_it.split() + found_error="" + found_at=None + for try_me in unicoded_it_list: + try: + str(try_me) + except UnicodeEncodeError as e: + found_unicode = try_me + found_error = str(e) + found_at = found_unicode.encode('raw_unicode_escape') + break + # Extract character from the error + unicode = re.findall("u'\\\\\w+'", found_error) + character = unicode[0] + search_term = character.replace("u'", '') + search_term = search_term.replace("'", '') + found_at_decoded = found_at.decode('raw_unicode_escape') + found_at = found_at_decoded.encode('raw_unicode_escape') + string_char = str(character) + # Create a human readable U+ representation + human_code = re.sub("u'\\\\\w", 'U+', string_char) + human_code = human_code.replace("'", "") + format_human = u"{}".format(human_code) + format_human = format_human.upper() + found_at = re.sub(search_term, u'<' + format_human + u'>', found_at) + slasher = re.compile("\\\\") + found_at = re.sub(slasher, '', found_at) + validation['id'] = found_at + error = u'Submitted variant description contains an invalid character which is represented by Unicode character ' + format_human + u' at position ' + found_at + u': Please remove this character and re-submit: A useful search function for Unicode characters can be found at https://unicode-search.net/' + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(error) + continue + else: + pass else: - validation['primary_assembly'] = primary_assembly - else: - primary_assembly = validation['primary_assembly'] - logger.trace("Completed string formatting", validation) - # Set variables that batch will not use but are required - crossing = 'false' - boundary = 'false' - - # VCF type 1 - """ - VCF2HGVS stage 1. converts chr-pos-ref-alt into chr:posRef>Alt - The output format is a common mistake caused by inaccurate conversion of - VCF variants into HGVS - hence the need for conversion step 2 - """ - if re.search('[-:]\d+[-:][GATC]+[-:][GATC]+', input): - input = input.replace(':', '-') - # Extract primary_assembly if provided - if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): - in_list = input.split('-') - selected_assembly = in_list[0] - input = '-'.join(in_list[1:]) - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) - elif re.search('[-:]\d+[-:][GATC]+[-:]', input): - input = input.replace(':', '-') - # Extract primary_assembly if provided - if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): - in_list = input.split('-') - selected_assembly = in_list[0] - input = '-'.join(in_list[1:]) - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - validation[ - 'warnings'] = 'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF' - validation['warnings'] = validation['warnings'] + ': VariantValidator has output both alternatives' - logger.resub('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + - pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + - ' as ALT = REF. Validator will output both alternatives.') - validation['write'] = 'false' - input_A = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], 'del') - input_B = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) - queryA = {'quibble': input_A, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - queryB = {'quibble': input_B, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(queryA) - batch_list.append(queryB) - continue - elif re.search('[-:]\d+[-:][-:][GATC]+', input) or re.search('[-:]\d+[-:][.][-:][GATC]+', input): - input = input.replace(':', '-') - if re.search('-.-', input): - input = input.replace('-.-', '-ins-') - if re.search('--', input): - input = input.replace('--', '-ins-') - # Extract primary_assembly if provided - if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): - in_list = input.split('-') - selected_assembly = in_list[0] - input = '-'.join(in_list[1:]) - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) - stash_input = input - logger.trace("Completed VCF-HVGS step 1", validation) - # API type non-HGVS - # e.g. Chr16:2099572TC>T - """ - VCF2HGVS conversion step 2 identifies the correct chromosomal reference - sequence based upon the non compliant identifier e.g. :2099572TC>T. - The data is currently stored in variantanalyser.supported_chromosome_builds. - Anticipated future builds will be transferred to MySQL which can be more - easily updated and maintained. - LRGs and LRG_ts also need to be assigned the correct reference sequence identifier. - The LRG ID data ia stored in the VariantValidator MySQL database. - The reference sequence type is also assigned. - """ - if re.search('\w+\:', input) and not re.search('\w+\:[gcnmrp]\.', input): - if re.search('\w+\:[gcnmrp]', input) and not re.search('\w+\:[gcnmrp]\.', input): - # Missing dot pass - else: - try: - if re.search('GRCh37', input) or re.search('hg19', input): - primary_assembly = 'GRCh37' - elif re.search('GRCh38', input) or re.search('hg38', input): - primary_assembly = 'GRCh38' - pre_input = copy.deepcopy(input) - input_list = input.split(':') - pos_ref_alt = str(input_list[1]) - positionAndEdit = input_list[1] - if not re.match('N[CGTWMRP]_', input) and not re.match('LRG_', input): - chr_num = str(input_list[0]) - chr_num = chr_num.upper() - chr_num = chr_num.strip() - if re.match('CHR', chr_num): - chr_num = chr_num.replace('CHR', '') - # Use selected assembly - accession = vvChromasomes.to_accession(chr_num, selected_assembly) - if accession is None: - validation['warnings'] = validation[ - 'warnings'] + ': ' + chr_num + \ - ' is not part of genome build ' + selected_assembly - logger.warning(chr_num + ' is not part of genome build ' + selected_assembly) - continue - else: - accession = input_list[0] - if re.search('>', pre_input): - if re.search('del', pre_input): - pos = re.match('\d+', pos_ref_alt) - position = pos.group(0) - old_ref, old_alt = pos_ref_alt.split('>') - old_ref = old_ref.replace(position, '') - position = int(position) - 1 - required_base = sf.fetch_seq(accession, start_i=position - 1, end_i=position) - ref = required_base + old_ref - alt = required_base - positionAndEdit = str(position) + ref + '>' + alt - elif re.search('ins', pre_input): - pos = re.match('\d+', pos_ref_alt) - position = pos.group(0) - old_ref, old_alt = pos_ref_alt.split('>') - # old_ref = old_ref.replace(position, '') - position = int(position) - 1 - required_base = sf.fetch_seq(accession, start_i=position - 1, end_i=position) - ref = required_base - alt = required_base + old_alt - positionAndEdit = str(position) + ref + '>' + alt - # Assign reference sequence type - ref_type = self.db.ref_type_assign(accession) - if re.match('LRG_', accession): - if ref_type == ':g.': - accession = self.db.get.get_refseqgeneId_from_lrgID(accession) - else: - accession = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) - else: - accession = accession - input = str(accession) + ref_type + str(positionAndEdit) - stash_input = input - except: - exceptPass(validation) - - # Descriptions lacking the colon : - if re.search('[gcnmrp]\.', input) and not re.search(':[gcnmrp]\.', input): - error = 'Unable to identify a colon (:) in the variant description %s. A colon is required in HGVS variant descriptions to separate the reference accession from the reference type i.e. :. e.g. :c.' % ( - input) - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - # Ambiguous chr reference - logger.trace("Completed VCF-HVGS step 2", validation) - """ - VCF2HGVS conversion step 3 is similar to step 2 but handles - formats like Chr16:g.2099572TC>T which are provided by Alamut and other - software - """ - if re.search('\w+:[gcnmrp]\.', input) and not re.match('N[CGTWMRP]_', input): - # Take out lowercase Accession characters - lower_cased_list = input.split(':') - if re.search('LRG', lower_cased_list[0], re.IGNORECASE): - lower_case_accession = lower_cased_list[0] - lower_case_accession = lower_case_accession.replace('l', 'L') - lower_case_accession = lower_case_accession.replace('r', 'R') - lower_case_accession = lower_case_accession.replace('g', 'G') + # Remove whitespace + ws = copy.copy(input) + input = input.strip() + input = ''.join(input.split()) + if input != ws: + caution = 'Whitespace removed from variant description ' + str(ws) + validation['warnings'] = validation['warnings'] + ': ' + caution + logger.info(caution) + stash_input = copy.copy(input) + # Set the primary_assembly + if validation['primary_assembly'] == 'false': + if selected_assembly == 'hg19': + primary_assembly = 'GRCh37' + elif selected_assembly == 'hg38': + primary_assembly = 'GRCh38' + # Ensure genome build is correctly formatted + elif re.search('GRC', selected_assembly, re.IGNORECASE): + selected_assembly = selected_assembly.replace('g', 'G') + selected_assembly = selected_assembly.replace('r', 'R') + selected_assembly = selected_assembly.replace('c', 'C') + selected_assembly = selected_assembly.replace('H', 'h') + primary_assembly = selected_assembly + # Catch invalid genome build + valid_build = False + for genome_build in self.genome_builds: + if primary_assembly == genome_build: + valid_build = True + if valid_build is False: + primary_assembly = 'GRCh38' + validation['warnings'] = validation[ + 'warnings'] + ': Invalid genome build has been specified. Automap has selected the default build (GRCh38)' + logger.warning( + 'Invalid genome build has been specified. Automap has selected the default build ' + primary_assembly) + else: + validation['primary_assembly'] = primary_assembly else: - lower_case_accession = lower_cased_list[0] - lower_case_accession = lower_case_accession.upper() - input = ''.join(lower_cased_list[1:]) - input = lower_case_accession + ':' + input - if not re.match('LRG_', input) and not re.match('ENS', input) and not re.match('N[MRPC]_', input): - try: - if re.search('GRCh37', input) or re.search('hg19', input): - primary_assembly = 'GRCh37' - elif re.search('GRCh38', input) or re.search('hg38', input): - primary_assembly = 'GRCh38' - pre_input = copy.deepcopy(input) - input_list = input.split(':') - query_a_symbol = input_list[0] - is_it_a_gene = va_dbCrl.data.get_hgnc_symbol(query_a_symbol) - if is_it_a_gene == 'none': + primary_assembly = validation['primary_assembly'] + logger.trace("Completed string formatting", validation) + # Set variables that batch will not use but are required + crossing = 'false' + boundary = 'false' + + # VCF type 1 + """ + VCF2HGVS stage 1. converts chr-pos-ref-alt into chr:posRef>Alt + The output format is a common mistake caused by inaccurate conversion of + VCF variants into HGVS - hence the need for conversion step 2 + """ + if re.search('[-:]\d+[-:][GATC]+[-:][GATC]+', input): + input = input.replace(':', '-') + # Extract primary_assembly if provided + if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): + in_list = input.split('-') + selected_assembly = in_list[0] + input = '-'.join(in_list[1:]) + pre_input = copy.deepcopy(input) + vcf_elements = pre_input.split('-') + input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) + elif re.search('[-:]\d+[-:][GATC]+[-:]', input): + input = input.replace(':', '-') + # Extract primary_assembly if provided + if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): + in_list = input.split('-') + selected_assembly = in_list[0] + input = '-'.join(in_list[1:]) + pre_input = copy.deepcopy(input) + vcf_elements = pre_input.split('-') + validation[ + 'warnings'] = 'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF' + validation['warnings'] = validation['warnings'] + ': VariantValidator has output both alternatives' + logger.resub('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + + ' as ALT = REF. Validator will output both alternatives.') + validation['write'] = 'false' + input_A = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], 'del') + input_B = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) + queryA = {'quibble': input_A, 'id': validation['id'], 'warnings': validation['warnings'], + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', + 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} + queryB = {'quibble': input_B, 'id': validation['id'], 'warnings': validation['warnings'], + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', + 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(queryA) + batch_list.append(queryB) + continue + elif re.search('[-:]\d+[-:][-:][GATC]+', input) or re.search('[-:]\d+[-:][.][-:][GATC]+', input): + input = input.replace(':', '-') + if re.search('-.-', input): + input = input.replace('-.-', '-ins-') + if re.search('--', input): + input = input.replace('--', '-ins-') + # Extract primary_assembly if provided + if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): + in_list = input.split('-') + selected_assembly = in_list[0] + input = '-'.join(in_list[1:]) + pre_input = copy.deepcopy(input) + vcf_elements = pre_input.split('-') + input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) + stash_input = input + logger.trace("Completed VCF-HVGS step 1", validation) + # API type non-HGVS + # e.g. Chr16:2099572TC>T + """ + VCF2HGVS conversion step 2 identifies the correct chromosomal reference + sequence based upon the non compliant identifier e.g. :2099572TC>T. + The data is currently stored in variantanalyser.supported_chromosome_builds. + Anticipated future builds will be transferred to MySQL which can be more + easily updated and maintained. + LRGs and LRG_ts also need to be assigned the correct reference sequence identifier. + The LRG ID data ia stored in the VariantValidator MySQL database. + The reference sequence type is also assigned. + """ + if re.search('\w+\:', input) and not re.search('\w+\:[gcnmrp]\.', input): + if re.search('\w+\:[gcnmrp]', input) and not re.search('\w+\:[gcnmrp]\.', input): + # Missing dot + pass + else: + try: + if re.search('GRCh37', input) or re.search('hg19', input): + primary_assembly = 'GRCh37' + elif re.search('GRCh38', input) or re.search('hg38', input): + primary_assembly = 'GRCh38' + pre_input = copy.deepcopy(input) + input_list = input.split(':') pos_ref_alt = str(input_list[1]) positionAndEdit = input_list[1] - chr_num = str(input_list[0]) - chr_num = chr_num.upper() - chr_num = chr_num.strip() - if re.match('CHR', chr_num): - chr_num = chr_num.replace('CHR', '') # Use selected assembly - accession = vvChromasomes.to_accession(chr_num, selected_assembly) - if accession is None: - validation['warnings'] = validation['warnings'] + ': ' + chr_num + \ - ' is not part of genome build ' + selected_assembly - continue - input = str(accession) + ':' + str(positionAndEdit) - stash_input = input - else: - pass - except Exception as e: - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - tbk = [str(exc_type), str(exc_value), str(te)] - er = str('\n'.join(tbk)) - logger.warning(str(exc_type) + " " + str(exc_value)) - logger.debug(er) - - # GENE_SYMBOL:c. n. types - logger.trace("Completed VCF-HGVS step 3", validation) - """ - Searches for gene symbols that have been used as reference sequence - identifiers. Provides a sufficiently repremanding warning, but also provides - correctly formatted variant descriptions with appropriate transcript - reference sequence identifiers i.e. NM_ .... - Note: the output from the function must be validated because VV has no way - of knowing which the users intended reference sequence was, and the exon - boundaries etc of the alternative transcript variants may not be equivalent - """ - if re.search('\w+\:[cn]\.', input): - try: - pre_input = copy.deepcopy(input) - query_a_symbol = pre_input.split(':')[0] - tx_edit = pre_input.split(':')[1] - is_it_a_gene = va_dbCrl.data.get_hgnc_symbol(query_a_symbol) - if is_it_a_gene != 'none': - uta_symbol = va_dbCrl.data.get_uta_symbol(is_it_a_gene) - available_transcripts = hdp.get_tx_for_gene(uta_symbol) - select_from_these_transcripts = {} - for tx in available_transcripts: - if re.match('NM_', tx[3]) or re.match('NR_', tx[3]): - if tx[3] not in select_from_these_transcripts.keys(): - select_from_these_transcripts[tx[3]] = '' - else: + if not re.match('N[CGTWMRP]_', input) and not re.match('LRG_', input): + chr_num = str(input_list[0]) + chr_num = chr_num.upper() + chr_num = chr_num.strip() + if re.match('CHR', chr_num): + chr_num = chr_num.replace('CHR', '') + # Use selected assembly + accession = vvChromosomes.to_accession(chr_num, selected_assembly) + if accession is None: + validation['warnings'] = validation[ + 'warnings'] + ': ' + chr_num + \ + ' is not part of genome build ' + selected_assembly + logger.warning(chr_num + ' is not part of genome build ' + selected_assembly) continue else: - continue - select_from_these_transcripts = '|'.join(select_from_these_transcripts.keys()) - if select_transcripts != 'all': - validation['write'] = 'false' - for transcript in select_transcripts_dict_plus_version.keys(): - validation[ - 'warnings'] = 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence' - refreshed_description = transcript + ':' + tx_edit - query = {'quibble': refreshed_description, 'id': validation['id'], - 'warnings': validation['warnings'], 'description': '', 'coding': '', - 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', - 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) - logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence') - else: - validation['warnings'] = validation['warnings'] + \ - ': ' + 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + input + \ - ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts - logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + input + \ - ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts) - continue + accession = input_list[0] + if re.search('>', pre_input): + if re.search('del', pre_input): + pos = re.match('\d+', pos_ref_alt) + position = pos.group(0) + old_ref, old_alt = pos_ref_alt.split('>') + old_ref = old_ref.replace(position, '') + position = int(position) - 1 + required_base = sf.fetch_seq(accession, start_i=position - 1, end_i=position) + ref = required_base + old_ref + alt = required_base + positionAndEdit = str(position) + ref + '>' + alt + elif re.search('ins', pre_input): + pos = re.match('\d+', pos_ref_alt) + position = pos.group(0) + old_ref, old_alt = pos_ref_alt.split('>') + # old_ref = old_ref.replace(position, '') + position = int(position) - 1 + required_base = sf.fetch_seq(accession, start_i=position - 1, end_i=position) + ref = required_base + alt = required_base + old_alt + positionAndEdit = str(position) + ref + '>' + alt + # Assign reference sequence type + ref_type = self.db.ref_type_assign(accession) + if re.match('LRG_', accession): + if ref_type == ':g.': + accession = self.db.get.get_refseqgeneId_from_lrgID(accession) + else: + accession = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) + else: + accession = accession + input = str(accession) + ref_type + str(positionAndEdit) + stash_input = input + except: + fn.exceptPass(validation) + + # Descriptions lacking the colon : + if re.search('[gcnmrp]\.', input) and not re.search(':[gcnmrp]\.', input): + error = 'Unable to identify a colon (:) in the variant description %s. A colon is required in HGVS variant descriptions to separate the reference accession from the reference type i.e. :. e.g. :c.' % ( + input) + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(error) + continue + + # Ambiguous chr reference + logger.trace("Completed VCF-HVGS step 2", validation) + """ + VCF2HGVS conversion step 3 is similar to step 2 but handles + formats like Chr16:g.2099572TC>T which are provided by Alamut and other + software + """ + if re.search('\w+:[gcnmrp]\.', input) and not re.match('N[CGTWMRP]_', input): + # Take out lowercase Accession characters + lower_cased_list = input.split(':') + if re.search('LRG', lower_cased_list[0], re.IGNORECASE): + lower_case_accession = lower_cased_list[0] + lower_case_accession = lower_case_accession.replace('l', 'L') + lower_case_accession = lower_case_accession.replace('r', 'R') + lower_case_accession = lower_case_accession.replace('g', 'G') else: - pass - except: - exceptPass() - logger.trace("Gene symbol reference catching complete", validation) - - # NG_:c. or NC_:c. - """ - Similar to the GENE_SYMBOL:c. n. types function, but spots RefSeqGene or - Chromosomal reference sequence identifiers used in the context of c. variant - descriptions - """ - if re.search('\w+\:[cn]', input): - try: - if re.match('^NG_', input): - refSeqGeneID = input.split(':')[0] - tx_edit = input.split(':')[1] - gene_symbol = va_dbCrl.data.get_gene_symbol_from_refSeqGeneID(refSeqGeneID) - if gene_symbol != 'none': - uta_symbol = va_dbCrl.data.get_uta_symbol(gene_symbol) - available_transcripts = hdp.get_tx_for_gene(uta_symbol) + lower_case_accession = lower_cased_list[0] + lower_case_accession = lower_case_accession.upper() + input = ''.join(lower_cased_list[1:]) + input = lower_case_accession + ':' + input + if not re.match('LRG_', input) and not re.match('ENS', input) and not re.match('N[MRPC]_', input): + try: + if re.search('GRCh37', input) or re.search('hg19', input): + primary_assembly = 'GRCh37' + elif re.search('GRCh38', input) or re.search('hg38', input): + primary_assembly = 'GRCh38' + pre_input = copy.deepcopy(input) + input_list = input.split(':') + query_a_symbol = input_list[0] + is_it_a_gene = self.db.get.get_hgnc_symbol(query_a_symbol) + if is_it_a_gene == 'none': + pos_ref_alt = str(input_list[1]) + positionAndEdit = input_list[1] + chr_num = str(input_list[0]) + chr_num = chr_num.upper() + chr_num = chr_num.strip() + if re.match('CHR', chr_num): + chr_num = chr_num.replace('CHR', '') # Use selected assembly + accession = vvChromosomes.to_accession(chr_num, selected_assembly) + if accession is None: + validation['warnings'] = validation['warnings'] + ': ' + chr_num + \ + ' is not part of genome build ' + selected_assembly + continue + input = str(accession) + ':' + str(positionAndEdit) + stash_input = input + else: + pass + except Exception as e: + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + tbk = [str(exc_type), str(exc_value), str(te)] + er = str('\n'.join(tbk)) + logger.warning(str(exc_type) + " " + str(exc_value)) + logger.debug(er) + + # GENE_SYMBOL:c. n. types + logger.trace("Completed VCF-HGVS step 3", validation) + """ + Searches for gene symbols that have been used as reference sequence + identifiers. Provides a sufficiently repremanding warning, but also provides + correctly formatted variant descriptions with appropriate transcript + reference sequence identifiers i.e. NM_ .... + Note: the output from the function must be validated because VV has no way + of knowing which the users intended reference sequence was, and the exon + boundaries etc of the alternative transcript variants may not be equivalent + """ + if re.search('\w+\:[cn]\.', input): + try: + pre_input = copy.deepcopy(input) + query_a_symbol = pre_input.split(':')[0] + tx_edit = pre_input.split(':')[1] + is_it_a_gene = self.db.get.get_hgnc_symbol(query_a_symbol) + if is_it_a_gene != 'none': + uta_symbol = self.db.get.get_uta_symbol(is_it_a_gene) + available_transcripts = self.hdp.get_tx_for_gene(uta_symbol) select_from_these_transcripts = {} for tx in available_transcripts: if re.match('NM_', tx[3]) or re.match('NR_', tx[3]): @@ -541,1269 +494,1411 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr validation['write'] = 'false' for transcript in select_transcripts_dict_plus_version.keys(): validation[ - 'warnings'] = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' - refreshed_description = refSeqGeneID + '(' + transcript + ')' + ':' + tx_edit + 'warnings'] = 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + query_a_symbol + ') in place of a valid reference sequence' + refreshed_description = transcript + ':' + tx_edit query = {'quibble': refreshed_description, 'id': validation['id'], 'warnings': validation['warnings'], 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', - 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} - logger.resub( - 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation. Resubmitting corrected version.') + 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} batch_list.append(query) + logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + query_a_symbol + ') in place of a valid reference sequence') else: - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + input + ' but also specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts - logger.warning( - + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + - str( - input) + ' but also specify transcripts from the following: ' + 'select_transcripts=' + str( - select_from_these_transcripts)) + validation['warnings'] = validation['warnings'] + \ + ': ' + 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + input + \ + ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts + logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + input + \ + ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts) continue else: - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation' - logger.warning( - 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation') - continue - elif re.match('^NC_', input): - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified' - logger.warning( - 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified') - continue - else: - pass - except: - exceptPass() - - logger.trace("Chromosomal/RefSeqGene reference catching complete", validation) - # Find not_sub type in input e.g. GGGG>G - """ - VCF2HGVS conversion step 4 has two purposes - 1. VCF is frequently inappropriately converted into HGVS like descriptions - such as GGGG>G which is actually a delins, del or ins. The function assigns - the correct edit type - 2. Detects and extracts multiple ALT sequences into HGVS descriptions and - automatically submits them for validation - """ - not_sub = copy.deepcopy(input) - not_sub_find = re.compile("([GATCgatc]+)>([GATCgatc]+)") - if not_sub_find.search(not_sub): - try: - # If the length of either side of the substitution delimer (>) is >1 - matches = not_sub_find.search(not_sub) - if len(matches.group(1)) > 1 or len(matches.group(2)) > 1 or re.search( - "([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", input): - # Search for and remove range - interval_range = re.compile("([0-9]+)_([0-9]+)") - if interval_range.search(not_sub): - m = not_sub_find.search(not_sub) - start = m.group(1) - delete = m.group(2) - beginning_string, middle_string = not_sub.split(':') - middle_string = middle_string.split('_')[0] - end_string = start + '>' + delete - not_sub = beginning_string + ':' + middle_string + end_string - # Split description - split_colon = not_sub.split(':') - ref_ac = split_colon[0] - remainder = split_colon[1] - split_dot = remainder.split('.') - ref_type = split_dot[0] - remainder = split_dot[1] - posedit = remainder - split_greater = remainder.split('>') - insert = split_greater[1] - remainder = split_greater[0] - # Split remainder using matches - r = re.compile("([0-9]+)([GATCgatc]+)") - try: - m = r.search(remainder) - start = m.group(1) - delete = m.group(2) - starts = posedit.split(delete)[0] - re_try = ref_ac + ':' + ref_type + '.' + starts + 'del' + delete[0] + 'ins' + insert - hgvs_re_try = hp.parse_hgvs_variant(re_try) - hgvs_re_try.posedit.edit.ref = delete - start_pos = str(hgvs_re_try.posedit.pos.start) - if re.search('\-', start_pos): - base, offset = start_pos.split('-') - new_offset = 0 - int(offset) + (len(delete)) - end_pos = int(base) - hgvs_re_try.posedit.pos.end.base = int(end_pos) - hgvs_re_try.posedit.pos.end.offset = int(new_offset) - 1 - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert - elif re.search('\+', start_pos): - base, offset = start_pos.split('+') - end_pos = int(base) + (len(delete) - int(offset) - 1) - new_offset = 0 + int(offset) + (len(delete) - 1) - hgvs_re_try.posedit.pos.end.base = int(end_pos) - hgvs_re_try.posedit.pos.end.offset = int(new_offset) - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert - else: - end_pos = int(start_pos) + (len(delete) - 1) - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - end_pos) + 'del' + delete + 'ins' + insert - except: - exceptPass() - not_delins = not_sub - # Parse into hgvs object - try: - hgvs_not_delins = hp.parse_hgvs_variant(not_delins) - except hgvs.exceptions.HGVSError as e: - # Sort out multiple ALTS from VCF inputs - if re.search("([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): - header, alts = not_delins.split('>') - # Split up the alts into a list - alt_list = alts.split(',') - # Assemble and re-submit - for alt in alt_list: - validation[ - 'warnings'] = 'Multiple ALT sequences detected: auto-submitting all possible combinations' + pass + except: + fn.exceptPass() + logger.trace("Gene symbol reference catching complete", validation) + + # NG_:c. or NC_:c. + """ + Similar to the GENE_SYMBOL:c. n. types function, but spots RefSeqGene or + Chromosomal reference sequence identifiers used in the context of c. variant + descriptions + """ + if re.search('\w+\:[cn]', input): + try: + if re.match('^NG_', input): + refSeqGeneID = input.split(':')[0] + tx_edit = input.split(':')[1] + gene_symbol = self.db.get.get_gene_symbol_from_refSeqGeneID(refSeqGeneID) + if gene_symbol != 'none': + uta_symbol = self.db.get.get_uta_symbol(gene_symbol) + available_transcripts = self.hdp.get_tx_for_gene(uta_symbol) + select_from_these_transcripts = {} + for tx in available_transcripts: + if re.match('NM_', tx[3]) or re.match('NR_', tx[3]): + if tx[3] not in select_from_these_transcripts.keys(): + select_from_these_transcripts[tx[3]] = '' + else: + continue + else: + continue + select_from_these_transcripts = '|'.join(select_from_these_transcripts.keys()) + if select_transcripts != 'all': validation['write'] = 'false' - refreshed_description = header + '>' + alt - query = {'quibble': refreshed_description, 'id': validation['id'], - 'warnings': validation['warnings'], 'description': '', 'coding': '', - 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', - 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} - batch_list.append(query) - logger.resub( - 'Multiple ALT sequences detected. Auto-submitting all possible combinations.') + for transcript in select_transcripts_dict_plus_version.keys(): + validation[ + 'warnings'] = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' + refreshed_description = refSeqGeneID + '(' + transcript + ')' + ':' + tx_edit + query = {'quibble': refreshed_description, 'id': validation['id'], + 'warnings': validation['warnings'], 'description': '', 'coding': '', + 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', + 'write': 'true', 'primary_assembly': primary_assembly, + 'order': ordering} + logger.resub( + 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation. Resubmitting corrected version.') + batch_list.append(query) + else: + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + input + ' but also specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts + logger.warning( + + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + + str( + input) + ' but also specify transcripts from the following: ' + 'select_transcripts=' + str( + select_from_these_transcripts)) continue else: + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation' + logger.warning( + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation') + continue + elif re.match('^NC_', input): + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified' + logger.warning( + 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified') + continue + else: + pass + except: + fn.exceptPass() + + logger.trace("Chromosomal/RefSeqGene reference catching complete", validation) + # Find not_sub type in input e.g. GGGG>G + """ + VCF2HGVS conversion step 4 has two purposes + 1. VCF is frequently inappropriately converted into HGVS like descriptions + such as GGGG>G which is actually a delins, del or ins. The function assigns + the correct edit type + 2. Detects and extracts multiple ALT sequences into HGVS descriptions and + automatically submits them for validation + """ + not_sub = copy.deepcopy(input) + not_sub_find = re.compile("([GATCgatc]+)>([GATCgatc]+)") + if not_sub_find.search(not_sub): + try: + # If the length of either side of the substitution delimer (>) is >1 + matches = not_sub_find.search(not_sub) + if len(matches.group(1)) > 1 or len(matches.group(2)) > 1 or re.search( + "([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", input): + # Search for and remove range + interval_range = re.compile("([0-9]+)_([0-9]+)") + if interval_range.search(not_sub): + m = not_sub_find.search(not_sub) + start = m.group(1) + delete = m.group(2) + beginning_string, middle_string = not_sub.split(':') + middle_string = middle_string.split('_')[0] + end_string = start + '>' + delete + not_sub = beginning_string + ':' + middle_string + end_string + # Split description + split_colon = not_sub.split(':') + ref_ac = split_colon[0] + remainder = split_colon[1] + split_dot = remainder.split('.') + ref_type = split_dot[0] + remainder = split_dot[1] + posedit = remainder + split_greater = remainder.split('>') + insert = split_greater[1] + remainder = split_greater[0] + # Split remainder using matches + r = re.compile("([0-9]+)([GATCgatc]+)") + try: + m = r.search(remainder) + start = m.group(1) + delete = m.group(2) + starts = posedit.split(delete)[0] + re_try = ref_ac + ':' + ref_type + '.' + starts + 'del' + delete[0] + 'ins' + insert + hgvs_re_try = hp.parse_hgvs_variant(re_try) + hgvs_re_try.posedit.edit.ref = delete + start_pos = str(hgvs_re_try.posedit.pos.start) + if re.search('\-', start_pos): + base, offset = start_pos.split('-') + new_offset = 0 - int(offset) + (len(delete)) + end_pos = int(base) + hgvs_re_try.posedit.pos.end.base = int(end_pos) + hgvs_re_try.posedit.pos.end.offset = int(new_offset) - 1 + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert + elif re.search('\+', start_pos): + base, offset = start_pos.split('+') + end_pos = int(base) + (len(delete) - int(offset) - 1) + new_offset = 0 + int(offset) + (len(delete) - 1) + hgvs_re_try.posedit.pos.end.base = int(end_pos) + hgvs_re_try.posedit.pos.end.offset = int(new_offset) + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert + else: + end_pos = int(start_pos) + (len(delete) - 1) + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + end_pos) + 'del' + delete + 'ins' + insert + except: + fn.exceptPass() + not_delins = not_sub + # Parse into hgvs object + try: + hgvs_not_delins = hp.parse_hgvs_variant(not_delins) + except hgvs.exceptions.HGVSError as e: + # Sort out multiple ALTS from VCF inputs + if re.search("([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): + header, alts = not_delins.split('>') + # Split up the alts into a list + alt_list = alts.split(',') + # Assemble and re-submit + for alt in alt_list: + validation[ + 'warnings'] = 'Multiple ALT sequences detected: auto-submitting all possible combinations' + validation['write'] = 'false' + refreshed_description = header + '>' + alt + query = {'quibble': refreshed_description, 'id': validation['id'], + 'warnings': validation['warnings'], 'description': '', 'coding': '', + 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', + 'write': 'true', 'primary_assembly': primary_assembly, + 'order': ordering} + batch_list.append(query) + logger.resub( + 'Multiple ALT sequences detected. Auto-submitting all possible combinations.') + continue + else: + error = str(e) + issue_link = '' + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(str(e)) + continue + + # Re-Stash the input as an HGVS + stash_input = copy.copy(hgvs_not_delins) + try: + not_delins = str(hn.normalize(hgvs_not_delins)) + except hgvs.exceptions.HGVSError as e: error = str(e) - issue_link = '' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(str(e)) - continue + if re.search('Normalization of intronic variants is not supported', error): + not_delins = not_delins + else: + issue_link = '' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(e)) + continue + # Create warning + caution = 'Variant description ' + input + ' is not HGVS compliant' + automap = input + ' automapped to ' + not_delins + validation['warnings'] = validation['warnings'] + ': ' + automap + # Change input to normalized variant + input = not_delins + else: + pass + except: + fn.exceptPass() + else: + pass + logger.trace("Completed VCF-HVGS step 4", validation) - # Re-Stash the input as an HGVS - stash_input = copy.copy(hgvs_not_delins) + # Tackle edit1234 type + """ + Warns that descriptions such as c.ins12 or g.del69 are not HGVS compliant + Strips the trailing numbers and tries to parse the description into an + hgvs object. + If parses, provides a warning including links to the VarNomen web page, but + continues validation + If not, an error message is generated and the loop continues + """ + edit_pass = re.compile('_\d+$') + edit_fail = re.compile('\d+$') + if edit_fail.search(input): + if edit_pass.search(input): + pass + else: + error = 'false' + issue_link = 'false' + failed = copy.deepcopy(input) + # Catch the trailing digits + digits = re.search(r"(\d+$)", failed) + digits = digits.group(1) + # Remove them so that the string SHOULD parse try: - not_delins = str(hn.normalize(hgvs_not_delins)) + hgvs_failed = hp.parse_hgvs_variant(failed) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('Normalization of intronic variants is not supported', error): - not_delins = not_delins - else: - issue_link = '' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(e)) - continue - # Create warning - caution = 'Variant description ' + input + ' is not HGVS compliant' - automap = input + ' automapped to ' + not_delins + error = 'The syntax of the input variant description is invalid ' + if re.search('ins\d+', failed): + issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' + error = error + ' please refer to ' + issue_link + validation['warnings'] = validation['warnings'] + error + logger.warning(error + " " + e) + continue + hgvs_failed = hp.parse_hgvs_variant(failed) + hgvs_failed.posedit.edit = str(hgvs_failed.posedit.edit).replace(digits, '') + failed = str(hgvs_failed) + hgvs_failed = hp.parse_hgvs_variant(failed) + automap = 'Non HGVS compliant variant description ' + input + ' automapped to ' + failed validation['warnings'] = validation['warnings'] + ': ' + automap - # Change input to normalized variant - input = not_delins - else: - pass - except: - exceptPass() - else: - pass - logger.trace("Completed VCF-HVGS step 4", validation) - - # Tackle edit1234 type - """ - Warns that descriptions such as c.ins12 or g.del69 are not HGVS compliant - Strips the trailing numbers and tries to parse the description into an - hgvs object. - If parses, provides a warning including links to the VarNomen web page, but - continues validation - If not, an error message is generated and the loop continues - """ - edit_pass = re.compile('_\d+$') - edit_fail = re.compile('\d+$') - if edit_fail.search(input): - if edit_pass.search(input): - pass - else: - error = 'false' - issue_link = 'false' - failed = copy.deepcopy(input) - # Catch the trailing digits - digits = re.search(r"(\d+$)", failed) - digits = digits.group(1) - # Remove them so that the string SHOULD parse + logger.warning(automap) + input = failed + + logger.trace("Ins/Del reference catching complete", validation) + # Tackle compound variant descriptions NG or NC (NM_) i.e. correctly input NG/NC_(NM_):c. + """ + Fully HGVS compliant intronic variant descriptions take the format e.g + NG_007400.1(NM_000088.3):c.589-1G>T. However, hgvs cannot parse and map + these variant strings. + This function: + Removes the g. reference sequence + NG_007400.1(NM_000088.3):c.589-1G>T ---> (NM_000088.3):c.589-1G>T + Removes the parintheses + (NM_000088.3):c.589-1G>T ---> NM_000088.3:c.589-1G>T + hgvs can now parse the string into an hgvs variant object and manipulate it + """ + caution = '' + compounder = re.compile('\(NM_') + compounder_b = re.compile('\(ENST') + if compounder.search(input): + # Find pattern e.g. +0000 and assign to a variable + transy = re.search(r"(NM_.+)", input) + transy = transy.group(1) + transy = transy.replace(')', '') + input = transy + logger.trace("HVGS typesetting complete", validation) + # Extract variants from HGVS allele descriptions + # http://varnomen.hgvs.org/recommendations/DNA/variant/alleles/ + """ + HGVS allele string parsing function Occurance #1 + Takes a single HGVS allele description and separates each allele into a + list of HGVS variants. The variants are then automatically submitted for + validation. + Note: In this context, it is inappropriate to validate descriptions + containing intronic variant descriptions. In such instances, allele + descriptions should be re-submitted by the user at the gene or genome level + """ + if (re.search(':[gcnr].\[', input) and re.search('\;', input)) or ( + re.search(':[gcrn].\d+\[', input) and re.search('\;', input)) or (re.search('\(\;\)', input)): + # handle LRG inputs + if re.match('^LRG', input): + if re.match('^LRG\d+', input): + string, remainder = input.split(':') + reference = string.replace('LRG', 'LRG_') + input = reference + ':' + remainder + caution = string + ' updated to ' + reference + if not re.match('^LRG_\d+', input): + pass + elif re.match('^LRG_\d+:g.', input) or re.match('^LRG_\d+:p.', input) or re.match('^LRG_\d+:c.', + input) or re.match( + '^LRG_\d+:n.', input): + lrg_reference, variation = input.split(':') + refseqgene_reference = self.db.get.get_refseqgeneId_from_lrgID(lrg_reference) + if refseqgene_reference != 'none': + input = refseqgene_reference + ':' + variation + if caution == '': + caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + else: + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + logger.warning(str(caution)) + elif re.match('^LRG_\d+t\d+:c.', input) or re.match('^LRG_\d+t\d+:n.', input) or re.match( + '^LRG_\d+t\d+:p.', input) or re.match('^LRG_\d+t\d+:g.', input): + lrg_reference, variation = input.split(':') + refseqtranscript_reference = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID( + lrg_reference) + if refseqtranscript_reference != 'none': + input = refseqtranscript_reference + ':' + variation + if caution == '': + caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + else: + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + logger.warning(str(caution)) + else: + pass try: - hgvs_failed = hp.parse_hgvs_variant(failed) - except hgvs.exceptions.HGVSError as e: - error = str(e) - error = 'The syntax of the input variant description is invalid ' - if re.search('ins\d+', failed): - issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' - error = error + ' please refer to ' + issue_link - validation['warnings'] = validation['warnings'] + error - logger.warning(error + " " + e) + # Submit to allele extraction function + alleles = va_func.hgvs_alleles(input, hp, vr, hn, vm, sf) + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'Automap has extracted possible variant descriptions' + logger.resub('Automap has extracted possible variant descriptions, resubmitting') + for allele in alleles: + query = {'quibble': allele, 'id': validation['id'], 'warnings': validation['warnings'], + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', + 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, + 'order': ordering} + coding = 'intergenic' + batch_list.append(query) + validation['write'] = 'false' continue - hgvs_failed = hp.parse_hgvs_variant(failed) - hgvs_failed.posedit.edit = str(hgvs_failed.posedit.edit).replace(digits, '') - failed = str(hgvs_failed) - hgvs_failed = hp.parse_hgvs_variant(failed) - automap = 'Non HGVS compliant variant description ' + input + ' automapped to ' + failed - validation['warnings'] = validation['warnings'] + ': ' + automap - logger.warning(automap) - input = failed - - logger.trace("Ins/Del reference catching complete", validation) - # Tackle compound variant descriptions NG or NC (NM_) i.e. correctly input NG/NC_(NM_):c. - """ - Fully HGVS compliant intronic variant descriptions take the format e.g - NG_007400.1(NM_000088.3):c.589-1G>T. However, hgvs cannot parse and map - these variant strings. - This function: - Removes the g. reference sequence - NG_007400.1(NM_000088.3):c.589-1G>T ---> (NM_000088.3):c.589-1G>T - Removes the parintheses - (NM_000088.3):c.589-1G>T ---> NM_000088.3:c.589-1G>T - hgvs can now parse the string into an hgvs variant object and manipulate it - """ - caution = '' - compounder = re.compile('\(NM_') - compounder_b = re.compile('\(ENST') - if compounder.search(input): - # Find pattern e.g. +0000 and assign to a variable - transy = re.search(r"(NM_.+)", input) - transy = transy.group(1) - transy = transy.replace(')', '') - input = transy - logger.trace("HVGS typesetting complete", validation) - # Extract variants from HGVS allele descriptions - # http://varnomen.hgvs.org/recommendations/DNA/variant/alleles/ - """ - HGVS allele string parsing function Occurance #1 - Takes a single HGVS allele description and separates each allele into a - list of HGVS variants. The variants are then automatically submitted for - validation. - Note: In this context, it is inappropriate to validate descriptions - containing intronic variant descriptions. In such instances, allele - descriptions should be re-submitted by the user at the gene or genome level - """ - if (re.search(':[gcnr].\[', input) and re.search('\;', input)) or ( - re.search(':[gcrn].\d+\[', input) and re.search('\;', input)) or (re.search('\(\;\)', input)): - # handle LRG inputs - if re.match('^LRG', input): - if re.match('^LRG\d+', input): - string, remainder = input.split(':') - reference = string.replace('LRG', 'LRG_') - input = reference + ':' + remainder - caution = string + ' updated to ' + reference - if not re.match('^LRG_\d+', input): - pass - elif re.match('^LRG_\d+:g.', input) or re.match('^LRG_\d+:p.', input) or re.match('^LRG_\d+:c.', - input) or re.match( - '^LRG_\d+:n.', input): - lrg_reference, variation = input.split(':') - refseqgene_reference = self.db.get.get_refseqgeneId_from_lrgID(lrg_reference) - if refseqgene_reference != 'none': - input = refseqgene_reference + ':' + variation - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - logger.warning(str(caution)) - elif re.match('^LRG_\d+t\d+:c.', input) or re.match('^LRG_\d+t\d+:n.', input) or re.match( - '^LRG_\d+t\d+:p.', input) or re.match('^LRG_\d+t\d+:g.', input): - lrg_reference, variation = input.split(':') - refseqtranscript_reference = va_dbCrl.data.get_RefSeqTranscriptID_from_lrgTranscriptID( - lrg_reference) - if refseqtranscript_reference != 'none': - input = refseqtranscript_reference + ':' + variation - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - logger.warning(str(caution)) + except va_func.alleleVariantError as e: + if re.search("Cannot validate sequence of an intronic variant", str(e)): + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'Intronic positions not supported for HGVS Allele descriptions' + logger.warning('Intronic positions not supported for HGVS Allele descriptions') + continue + else: + raise variantValidatorError(str(e)) + logger.trace("HVGS String allele parsing pass 1 complete", validation) + # INITIAL USER INPUT FORMATTING + """ + Removes whitespace from the ends of the string + Removes anything in brackets + Identifies variant type + Returns a dictionary containing the formatted input string and the variant type + Accepts c, g, n, r currently + """ + formatted = va_func.user_input(input) + + # Validator specific variables, note, not all will be necessary for batch, but keep to ensure that batch works + # vars = [] + # refseq_gene = '' + # relevant = '' + warning = '' + automap = 'false' + # vmapped = 'false' + # coords = 'false' + # ensembl_gene = 'false' + hgnc_gene_info = 'false' + # issue_link = 'false' + # cr_available = 'false' + # rcmds_tab = 'false' + + # Check the initial validity of the input + if formatted == 'invalid': + if re.search('\w+\:[gcnmrp]', input) and not re.search('\w+\:[gcnmrp]\.', input): + error = 'Variant description ' + input + ' lacks the . character between and in the expected pattern :.' else: - pass - try: - # Submit to allele extraction function - alleles = va_func.hgvs_alleles(input, hp, vr, hn, vm, sf) + error = 'Variant description ' + input + ' is not in an accepted format' validation['warnings'] = validation[ - 'warnings'] + ': ' + 'Automap has extracted possible variant descriptions' - logger.resub('Automap has extracted possible variant descriptions, resubmitting') - for allele in alleles: - query = {'quibble': allele, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} - coding = 'intergenic' - batch_list.append(query) - validation['write'] = 'false' + 'warnings'] + ': ' + error + logger.warning(error) continue - except va_func.alleleVariantError as e: - if re.search("Cannot validate sequence of an intronic variant", str(e)): - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'Intronic positions not supported for HGVS Allele descriptions' - logger.warning('Intronic positions not supported for HGVS Allele descriptions') - continue - else: - raise variantValidatorError(str(e)) - logger.trace("HVGS String allele parsing pass 1 complete", validation) - # INITIAL USER INPUT FORMATTING - """ - Removes whitespace from the ends of the string - Removes anything in brackets - Identifies variant type - Returns a dictionary containing the formatted input string and the variant type - Accepts c, g, n, r currently - """ - formatted = va_func.user_input(input) - - # Validator specific variables, note, not all will be necessary for batch, but keep to ensure that batch works - # vars = [] - # refseq_gene = '' - # relevant = '' - warning = '' - automap = 'false' - # vmapped = 'false' - # coords = 'false' - # ensembl_gene = 'false' - hgnc_gene_info = 'false' - # issue_link = 'false' - # cr_available = 'false' - # rcmds_tab = 'false' - - # Check the initial validity of the input - if formatted == 'invalid': - if re.search('\w+\:[gcnmrp]', input) and not re.search('\w+\:[gcnmrp]\.', input): - error = 'Variant description ' + input + ' lacks the . character between and in the expected pattern :.' else: - error = 'Variant description ' + input + ' is not in an accepted format' - validation['warnings'] = validation[ - 'warnings'] + ': ' + error - logger.warning(error) - continue - else: - variant = formatted['variant'] - input = formatted['variant'] - stash_input = formatted['variant'] - type = formatted['type'] - logger.trace("Variant input formatted, proceeding to validate.", validation) - # Conversions - """ - Conversions are not currently supported. The HGVS format for conversions - is rarely seen wrt genomic sequencing data and needs to be re-evaluated - """ - conversion = re.compile('con') - if conversion.search(variant): - validation['warnings'] = validation['warnings'] + ': ' + 'Gene conversions currently unsupported' - logger.warning('Gene conversions currently unsupported') - continue - - # Primary check that hgvs will accept the variant - error = 'false' - # Change RNA bases to upper case but nothing else - if type == ":r.": - variant = variant.upper() - variant = variant.replace(':R.', ':r.') - # lowercase the supported variant types - variant = variant.replace('DEL', 'del') - variant = variant.replace('INS', 'ins') - variant = variant.replace('INV', 'inv') - variant = variant.replace('DUP', 'dup') + variant = formatted['variant'] + input = formatted['variant'] + stash_input = formatted['variant'] + type = formatted['type'] + logger.trace("Variant input formatted, proceeding to validate.", validation) + # Conversions + """ + Conversions are not currently supported. The HGVS format for conversions + is rarely seen wrt genomic sequencing data and needs to be re-evaluated + """ + conversion = re.compile('con') + if conversion.search(variant): + validation['warnings'] = validation['warnings'] + ': ' + 'Gene conversions currently unsupported' + logger.warning('Gene conversions currently unsupported') + continue - try: - input_parses = hp.parse_hgvs_variant(variant) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'false': - input_parses.ac = input_parses.ac.upper() - if hasattr(input_parses.posedit.edit, 'alt'): - if input_parses.posedit.edit.alt is not None: - input_parses.posedit.edit.alt = input_parses.posedit.edit.alt.upper() - if hasattr(input_parses.posedit.edit, 'ref'): - if input_parses.posedit.edit.ref is not None: - input_parses.posedit.edit.ref = input_parses.posedit.edit.ref.upper() - variant = str(input_parses) - input = str(input_parses) - pass - else: - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(error) - continue + # Primary check that hgvs will accept the variant + error = 'false' + # Change RNA bases to upper case but nothing else + if type == ":r.": + variant = variant.upper() + variant = variant.replace(':R.', ':r.') + # lowercase the supported variant types + variant = variant.replace('DEL', 'del') + variant = variant.replace('INS', 'ins') + variant = variant.replace('INV', 'inv') + variant = variant.replace('DUP', 'dup') - """ - ENST support needs to be re-evaluated, but is very low priority - ENST not supported by ACMG and is under review by HGVS - """ - if re.match('^ENST', str(input_parses)): - trap_ens_in = str(input_parses) - sim_tx = hdp.get_similar_transcripts(input_parses.ac) - for line in sim_tx: - if str(line[2]) == 'True' and str(line[3]) == 'True' and str(line[4]) == 'True' and str( - line[5]) == 'True' and str(line[6]) == 'True': - input_parses.ac = (line[1]) - input = str(input_parses) - variant = input - break - if re.match('^ENST', str(input_parses)): - error = 'Unable to map ' + str(input_parses.ac) + ' to an equivalent RefSeq transcript' + try: + input_parses = hp.parse_hgvs_variant(variant) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'false': + input_parses.ac = input_parses.ac.upper() + if hasattr(input_parses.posedit.edit, 'alt'): + if input_parses.posedit.edit.alt is not None: + input_parses.posedit.edit.alt = input_parses.posedit.edit.alt.upper() + if hasattr(input_parses.posedit.edit, 'ref'): + if input_parses.posedit.edit.ref is not None: + input_parses.posedit.edit.ref = input_parses.posedit.edit.ref.upper() + variant = str(input_parses) + input = str(input_parses) + pass + else: validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) + logger.warning(error) continue - else: - validation['warnings'] = validation['warnings'] + ': ' + str( - trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + variant - logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + variant) - logger.trace("HVGS acceptance test passed", validation) - # Check whether supported genome build is requested for non g. descriptions - historic_assembly = 'false' - mapable_assemblies = { - 'GRCh37': 'true', - 'GRCh38': 'true', - 'NCBI36': 'false' - } - is_mapable = mapable_assemblies.get(primary_assembly) - if is_mapable == 'true': - - # These objects cannot be moved outside of the main function because they gather data from the - # iuser input e.g. alignment method and genome build - # They initiate quickly, so no need to move them unnecessarily - - # Create easy variant mapper (over variant mapper) and splign locked evm - evm = hgvs.assemblymapper.AssemblyMapper(hdp, - assembly_name=primary_assembly, - alt_aln_method=alt_aln_method, - normalize=True, - replace_reference=True - ) - - # Setup a reverse normalize instance and non-normalize evm - no_norm_evm = hgvs.assemblymapper.AssemblyMapper(hdp, - assembly_name=primary_assembly, - alt_aln_method=alt_aln_method, - normalize=False, - replace_reference=True - ) - # Create a specific minimal evm with no normalizer and no replace_reference - min_evm = hgvs.assemblymapper.AssemblyMapper(hdp, + """ + ENST support needs to be re-evaluated, but is very low priority + ENST not supported by ACMG and is under review by HGVS + """ + if re.match('^ENST', str(input_parses)): + trap_ens_in = str(input_parses) + sim_tx = self.hdp.get_similar_transcripts(input_parses.ac) + for line in sim_tx: + if str(line[2]) == 'True' and str(line[3]) == 'True' and str(line[4]) == 'True' and str( + line[5]) == 'True' and str(line[6]) == 'True': + input_parses.ac = (line[1]) + input = str(input_parses) + variant = input + break + if re.match('^ENST', str(input_parses)): + error = 'Unable to map ' + str(input_parses.ac) + ' to an equivalent RefSeq transcript' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + validation['warnings'] = validation['warnings'] + ': ' + str( + trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + variant + logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + variant) + logger.trace("HVGS acceptance test passed", validation) + # Check whether supported genome build is requested for non g. descriptions + historic_assembly = 'false' + mapable_assemblies = { + 'GRCh37': 'true', + 'GRCh38': 'true', + 'NCBI36': 'false' + } + is_mapable = mapable_assemblies.get(primary_assembly) + if is_mapable == 'true': + + # These objects cannot be moved outside of the main function because they gather data from the + # iuser input e.g. alignment method and genome build + # They initiate quickly, so no need to move them unnecessarily + + # Create easy variant mapper (over variant mapper) and splign locked evm + evm = hgvs.assemblymapper.AssemblyMapper(hdp, assembly_name=primary_assembly, alt_aln_method=alt_aln_method, - normalize=False, - replace_reference=False + normalize=True, + replace_reference=True ) - else: - error = 'Mapping of ' + variant + ' to genome assembly ' + primary_assembly + ' is not supported' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - # Catch interval end > interval start - """ - hgvs did/does not handle 3' UTR position ordering well. This function - ensures that end pos is not > start pos wrt 3' UTRs. - Also identifies some variants which span into the downstream sequence - i.e. out of bounds - """ - astr = re.compile('\*') - if astr.search(str(input_parses.posedit)): - input_parses_copy = copy.deepcopy(input_parses) - input_parses_copy.type = "c" - # Map to n. position - # Create easy variant mapper (over variant mapper) and splign locked evm - try: - to_n = evm.c_to_n(input_parses_copy) - except hgvs.exceptions.HGVSError as e: - exceptPass() - else: - if to_n.posedit.pos.end.base < to_n.posedit.pos.start.base: - error = 'Interval end position < interval start position ' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - elif input_parses.posedit.pos.end.base < input_parses.posedit.pos.start.base: - error = 'Interval end position ' + str( - input_parses.posedit.pos.end.base) + ' < interval start position ' + str( - input_parses.posedit.pos.start.base) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass + # Setup a reverse normalize instance and non-normalize evm + no_norm_evm = hgvs.assemblymapper.AssemblyMapper(hdp, + assembly_name=primary_assembly, + alt_aln_method=alt_aln_method, + normalize=False, + replace_reference=True + ) - # Catch missing version number in refseq - ref_type = re.compile("^N\w\w\d") - is_version = re.compile("\d\.\d") - en_type = re.compile('^ENS') - lrg_type = re.compile('LRG') - if (ref_type.search(str(input_parses)) and is_version.search(str(input_parses))) or ( - en_type.search(str(input_parses))): - pass - else: - if lrg_type.search(str(input_parses)): - pass - if ref_type.search(str(input_parses)): - error = 'RefSeq variant accession numbers MUST include a version number' + # Create a specific minimal evm with no normalizer and no replace_reference + min_evm = hgvs.assemblymapper.AssemblyMapper(hdp, + assembly_name=primary_assembly, + alt_aln_method=alt_aln_method, + normalize=False, + replace_reference=False + ) + + else: + error = 'Mapping of ' + variant + ' to genome assembly ' + primary_assembly + ' is not supported' validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + # Catch interval end > interval start + """ + hgvs did/does not handle 3' UTR position ordering well. This function + ensures that end pos is not > start pos wrt 3' UTRs. + Also identifies some variants which span into the downstream sequence + i.e. out of bounds + """ + astr = re.compile('\*') + if astr.search(str(input_parses.posedit)): + input_parses_copy = copy.deepcopy(input_parses) + input_parses_copy.type = "c" + # Map to n. position + # Create easy variant mapper (over variant mapper) and splign locked evm + try: + to_n = evm.c_to_n(input_parses_copy) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + else: + if to_n.posedit.pos.end.base < to_n.posedit.pos.start.base: + error = 'Interval end position < interval start position ' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif input_parses.posedit.pos.end.base < input_parses.posedit.pos.start.base: + error = 'Interval end position ' + str( + input_parses.posedit.pos.end.base) + ' < interval start position ' + str( + input_parses.posedit.pos.start.base) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) continue - logger.trace("HVGS interval/version mapping complete", validation) - - # handle LRG inputs - """ - LRG and LRG_t reference sequence identifiers need to be replaced with - equivalent RefSeq identifiers. The lookup data is stored in the - VariantValidator MySQL database - """ - if re.match('^LRG', str(input_parses)): - if re.match('^LRG\d+', str(input_parses.ac)): - string = str(input_parses.ac) - reference = string.replace('LRG', 'LRG_') - input_parses.ac = reference - caution = string + ' updated to ' + reference - if not re.match('^LRG_\d+', str(input_parses)): - pass - elif re.match('^LRG_\d+:g.', str(input_parses)) or re.match('^LRG_\d+:p.', - str(input_parses)) or re.match( - '^LRG_\d+:c.', str(input_parses)) or re.match('^LRG_\d+:n.', str(input_parses)): - lrg_reference, variation = str(input_parses).split(':') - refseqgene_reference = self.db.get.get_refseqgeneId_from_lrgID(lrg_reference) - if refseqgene_reference != 'none': - input_parses.ac = refseqgene_reference - variant = str(input_parses) - input = str(input_parses) - stash_input = input - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - logger.warning(str(caution)) - elif re.match('^LRG_\d+t\d+:c.', str(input_parses)) or re.match('^LRG_\d+t\d+:n.', - str(input_parses)) or re.match( - '^LRG_\d+t\d+:p.', str(input_parses)) or re.match('^LRG_\d+t\d+:g.', str(input_parses)): - lrg_reference, variation = str(input_parses).split(':') - refseqtranscript_reference = va_dbCrl.data.get_RefSeqTranscriptID_from_lrgTranscriptID( - lrg_reference) - if refseqtranscript_reference != 'none': - input_parses.ac = refseqtranscript_reference - variant = str(input_parses) - input = str(input_parses) - stash_input = input - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - logger.warning(str(caution)) else: pass - logger.trace("LRG check for conversion to refseq completed", validation) - # Additional Incorrectly input variant capture training - """ - Evolving list of common mistakes, see sections below - """ - # NM_ .g - if (re.search('^NM_', variant) or re.search('^NR_', variant)) and re.search(':g.', variant): - suggestion = input.replace(':g.', ':c.') - error = 'Transcript reference sequence input as genomic (g.) reference sequence. Did you mean ' + suggestion + '?' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - # NR_ c. - if re.search('^NR_', input) and re.search(':c.', input): - suggestion = input.replace(':c.', ':n.') - error = 'Non-coding transcript reference sequence input as coding (c.) reference sequence. Did you mean ' + suggestion + '?' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - # NM_ n. - if re.search('^NM_', input) and re.search(':n.', input): - suggestion = input.replace(':n.', ':c.') - error = 'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. Did you mean ' + suggestion + '?' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - - # NM_ NC_ NG_ NR_ p. - if (re.search('^NM_', variant) or re.search('^NR_', variant) or re.search('^NC_', variant) or re.search( - '^NG_', variant)) and re.search(':p.', variant): - issue_link = 'http://varnomen.hgvs.org/recommendations/protein/' - error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - - # NG_ c or NC_c.. - if (re.search('^NG_', variant) or re.search('^NC_', variant)) and re.search(':c.', variant): - suggestion = ': For additional assistance, submit ' + str(variant) + ' to VariantValidator' - error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' + suggestion - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - logger.trace("Passed 'common mistakes' catcher", validation) - # Primary validation of the input - """ - An evolving set of variant structure and content searches which identify - and warn users about inappropriate use of HGVS - Primarily, this code filters out variants that cannot realistically be - auto corrected and will cause the downstream functions to return errors - """ - input_parses = hp.parse_hgvs_variant(input) - if input_parses.type == 'g': - if re.match('^NC_', input_parses.ac) or re.match('^NG_', input_parses.ac) or re.match('^NT_', - input_parses.ac) or re.match( - '^NW_', input_parses.ac): + # Catch missing version number in refseq + ref_type = re.compile("^N\w\w\d") + is_version = re.compile("\d\.\d") + en_type = re.compile('^ENS') + lrg_type = re.compile('LRG') + if (ref_type.search(str(input_parses)) and is_version.search(str(input_parses))) or ( + en_type.search(str(input_parses))): pass else: - error = 'Invalid reference sequence identifier (' + input_parses.ac + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + if lrg_type.search(str(input_parses)): + pass + if ref_type.search(str(input_parses)): + error = 'RefSeq variant accession numbers MUST include a version number' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + logger.trace("HVGS interval/version mapping complete", validation) + + # handle LRG inputs + """ + LRG and LRG_t reference sequence identifiers need to be replaced with + equivalent RefSeq identifiers. The lookup data is stored in the + VariantValidator MySQL database + """ + if re.match('^LRG', str(input_parses)): + if re.match('^LRG\d+', str(input_parses.ac)): + string = str(input_parses.ac) + reference = string.replace('LRG', 'LRG_') + input_parses.ac = reference + caution = string + ' updated to ' + reference + if not re.match('^LRG_\d+', str(input_parses)): + pass + elif re.match('^LRG_\d+:g.', str(input_parses)) or re.match('^LRG_\d+:p.', + str(input_parses)) or re.match( + '^LRG_\d+:c.', str(input_parses)) or re.match('^LRG_\d+:n.', str(input_parses)): + lrg_reference, variation = str(input_parses).split(':') + refseqgene_reference = self.db.get.get_refseqgeneId_from_lrgID(lrg_reference) + if refseqgene_reference != 'none': + input_parses.ac = refseqgene_reference + variant = str(input_parses) + input = str(input_parses) + stash_input = input + if caution == '': + caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + else: + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + logger.warning(str(caution)) + elif re.match('^LRG_\d+t\d+:c.', str(input_parses)) or re.match('^LRG_\d+t\d+:n.', + str(input_parses)) or re.match( + '^LRG_\d+t\d+:p.', str(input_parses)) or re.match('^LRG_\d+t\d+:g.', str(input_parses)): + lrg_reference, variation = str(input_parses).split(':') + refseqtranscript_reference = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID( + lrg_reference) + if refseqtranscript_reference != 'none': + input_parses.ac = refseqtranscript_reference + variant = str(input_parses) + input = str(input_parses) + stash_input = input + if caution == '': + caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + else: + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + logger.warning(str(caution)) + else: + pass + logger.trace("LRG check for conversion to refseq completed", validation) + # Additional Incorrectly input variant capture training + """ + Evolving list of common mistakes, see sections below + """ + # NM_ .g + if (re.search('^NM_', variant) or re.search('^NR_', variant)) and re.search(':g.', variant): + suggestion = input.replace(':g.', ':c.') + error = 'Transcript reference sequence input as genomic (g.) reference sequence. Did you mean ' + suggestion + '?' + validation['warnings'] = validation['warnings'] + ': ' + error logger.warning(error) continue - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + # NR_ c. + if re.search('^NR_', input) and re.search(':c.', input): + suggestion = input.replace(':c.', ':n.') + error = 'Non-coding transcript reference sequence input as coding (c.) reference sequence. Did you mean ' + suggestion + '?' + validation['warnings'] = validation['warnings'] + ': ' + error logger.warning(error) continue - except Exception as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + # NM_ n. + if re.search('^NM_', input) and re.search(':n.', input): + suggestion = input.replace(':n.', ':c.') + error = 'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. Did you mean ' + suggestion + '?' + validation['warnings'] = validation['warnings'] + ': ' + error logger.warning(error) continue - # Additional test - try: - hn.normalize(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + + # NM_ NC_ NG_ NR_ p. + if (re.search('^NM_', variant) or re.search('^NR_', variant) or re.search('^NC_', variant) or re.search( + '^NG_', variant)) and re.search(':p.', variant): + issue_link = 'http://varnomen.hgvs.org/recommendations/protein/' + error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(error) + continue + + # NG_ c or NC_c.. + if (re.search('^NG_', variant) or re.search('^NC_', variant)) and re.search(':c.', variant): + suggestion = ': For additional assistance, submit ' + str(variant) + ' to VariantValidator' + error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' + suggestion + validation['warnings'] = validation['warnings'] + ': ' + error logger.warning(error) continue - else: - exceptPass() - elif input_parses.type == 'c': - if re.search('\*', str(input_parses)) or re.search('c.\-', str(input_parses)): - # Catch variation in UTRs - # These should be in the sequence so can be directly validated. Need to pass to n. + logger.trace("Passed 'common mistakes' catcher", validation) + # Primary validation of the input + """ + An evolving set of variant structure and content searches which identify + and warn users about inappropriate use of HGVS + Primarily, this code filters out variants that cannot realistically be + auto corrected and will cause the downstream functions to return errors + """ + input_parses = hp.parse_hgvs_variant(input) + if input_parses.type == 'g': + if re.match('^NC_', input_parses.ac) or re.match('^NG_', input_parses.ac) or re.match('^NT_', + input_parses.ac) or re.match( + '^NW_', input_parses.ac): + pass + else: + error = 'Invalid reference sequence identifier (' + input_parses.ac + ')' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue try: vr.validate(input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('datums is ill-defined', error): - called_ref = input_parses.posedit.edit.ref - try: - to_n = evm.c_to_n(input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(error) - continue - actual_ref = to_n.posedit.edit.ref - if called_ref != actual_ref: - error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(error) - continue + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + except Exception as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + # Additional test + try: + hn.normalize(input_parses) + except hgvs.exceptions.HGVSError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + else: + fn.exceptPass() + + elif input_parses.type == 'c': + if re.search('\*', str(input_parses)) or re.search('c.\-', str(input_parses)): + # Catch variation in UTRs + # These should be in the sequence so can be directly validated. Need to pass to n. + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('datums is ill-defined', error): + called_ref = input_parses.posedit.edit.ref + try: + to_n = evm.c_to_n(input_parses) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + actual_ref = to_n.posedit.edit.ref + if called_ref != actual_ref: + error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(error) + continue + else: + input_parses.posedit.edit.ref = '' + variant = str(input_parses) else: - input_parses.posedit.edit.ref = '' - variant = str(input_parses) + if re.search('bounds', error) or re.search('intronic variant', error): + try: + hn.normalize(input_parses) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + if re.search('bounds', str(e)): + try: + identity_info = self.hdp.get_tx_identity_info(input_parses.ac) + ref_start = identity_info[3] + ref_end = identity_info[4] + if re.match('-', str( + input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: + # upstream positions + boundary = int('-' + str(ref_start)) + remainder = int(str(input_parses.posedit.pos.start)) - boundary + input_parses.posedit.pos.start.base = boundary + input_parses.posedit.pos.start.offset = remainder + if re.match('-', str( + input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: + boundary = int('-' + str(ref_start)) + remainder = int(str(input_parses.posedit.pos.end)) - boundary + input_parses.posedit.pos.end.base = boundary + input_parses.posedit.pos.end.offset = remainder + if re.match('\*', str( + input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: + # downstream positions + tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') + ts_seq = sf.fetch_seq(input_parses.ac) + boundary = len(ts_seq) - ref_end + input_parses.posedit.pos.start.base = boundary + offset = int(tot_end_pos) - int(boundary) + input_parses.posedit.pos.start.offset = offset + if re.match('\*', str( + input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: + tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') + ts_seq = sf.fetch_seq(input_parses.ac) + boundary = len(ts_seq) - ref_end + input_parses.posedit.pos.end.base = boundary + offset = int(tot_end_pos) - int(boundary) + input_parses.posedit.pos.end.offset = offset + + # Create a lose vm instance + lose_vm = hgvs.variantmapper.VariantMapper(hdp, + replace_reference=True, + prevalidation_level=None + ) + + + report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, + primary_assembly, lose_vm, hp, hn, sf, nr_vm) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant: Instead use ' + fn.valstr( + report_gen) + except Exception as e: + fn.exceptPass() + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + pass + else: + pass + + try: + input_parses = evm.c_to_n(input_parses) + except hgvs.exceptions.HGVSError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(e)) + continue + + if re.search('n.1-', str(input_parses)): + input_parses = evm.n_to_c(input_parses) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + genomic_position = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, + vm, hp, hn, sf, nr_vm) + error = error + fn.valstr(genomic_position) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue else: - if re.search('bounds', error) or re.search('intronic variant', error): - try: - hn.normalize(input_parses) - except hgvs.exceptions.HGVSError as e: - exceptPass() - if re.search('bounds', str(e)): + pass + + # Re-map input_parses back to c. variant + input_parses = evm.n_to_c(input_parses) + + # Intronic positions in UTRs + if re.search('\d\-\d', str(input_parses)) or re.search('\d\+\d', str(input_parses)): + # Can we go c-g-c + try: + to_genome = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, vm, + hp, hn, sf, nr_vm) + to_tx = evm.g_to_t(to_genome, input_parses.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + error = str(e) + if re.search('bounds', error): try: - identity_info = hdp.get_tx_identity_info(input_parses.ac) + identity_info = self.hdp.get_tx_identity_info(input_parses.ac) ref_start = identity_info[3] ref_end = identity_info[4] - if re.match('-', str( - input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: + if re.match('-', str(input_parses.posedit.pos.start)): # upstream positions boundary = int('-' + str(ref_start)) remainder = int(str(input_parses.posedit.pos.start)) - boundary input_parses.posedit.pos.start.base = boundary input_parses.posedit.pos.start.offset = remainder - if re.match('-', str( - input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: + if re.match('-', str(input_parses.posedit.pos.end)): boundary = int('-' + str(ref_start)) remainder = int(str(input_parses.posedit.pos.end)) - boundary input_parses.posedit.pos.end.base = boundary input_parses.posedit.pos.end.offset = remainder - if re.match('\*', str( - input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: + if re.match('\*', str(input_parses.posedit.pos.start)): # downstream positions tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') ts_seq = sf.fetch_seq(input_parses.ac) boundary = len(ts_seq) - ref_end input_parses.posedit.pos.start.base = boundary + te1, te2 = tot_end_pos.split('+') + tot_end_pos = int(te1) + int(te2) offset = int(tot_end_pos) - int(boundary) input_parses.posedit.pos.start.offset = offset - if re.match('\*', str( - input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: + if re.match('\*', str(input_parses.posedit.pos.end)): tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') ts_seq = sf.fetch_seq(input_parses.ac) boundary = len(ts_seq) - ref_end input_parses.posedit.pos.end.base = boundary + te1, te2 = tot_end_pos.split('+') + tot_end_pos = int(te1) + int(te2) offset = int(tot_end_pos) - int(boundary) input_parses.posedit.pos.end.offset = offset - # Create a lose vm instance - lose_vm = hgvs.variantmapper.VariantMapper(hdp, - replace_reference=True, - prevalidation_level=None - ) - - - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, + report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, lose_vm, hp, hn, sf, nr_vm) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant: Instead use ' + valstr( + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) except Exception as e: - exceptPass() - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue + fn.exceptPass() else: pass - else: - pass - - try: - input_parses = evm.c_to_n(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(e)) - continue - - if re.search('n.1-', str(input_parses)): - input_parses = evm.n_to_c(input_parses) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, - vm, hp, hn, sf, nr_vm) - error = error + valstr(genomic_position) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + logger.warning(str(error)) + continue - # Re-map input_parses back to c. variant - input_parses = evm.n_to_c(input_parses) + except hgvs.exceptions.HGVSDataNotAvailableError as e: + error = str(e) + if 'Alignment is incomplete' in error: + e_list = error.split('~') + gens = [] + for el in e_list: + el_l = el.split('/') + if el_l[-1] == '': + continue + gens.append(el_l[-1]) + acs = '; '.join(gens) + error = 'Cannot map ' + fn.valstr( + input_parses) + ' to a genomic position. ' + input_parses.ac + ' can only be partially aligned to genomic reference sequences ' + acs + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + logger.warning(str(error)) + continue - # Intronic positions in UTRs - if re.search('\d\-\d', str(input_parses)) or re.search('\d\+\d', str(input_parses)): - # Can we go c-g-c + elif re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): + # Quick look at syntax validation try: - to_genome = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, vm, - hp, hn, sf, nr_vm) - to_tx = evm.g_to_t(to_genome, input_parses.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: + vr.validate(input_parses) + except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if re.search('bounds', error): try: - identity_info = hdp.get_tx_identity_info(input_parses.ac) - ref_start = identity_info[3] - ref_end = identity_info[4] - if re.match('-', str(input_parses.posedit.pos.start)): - # upstream positions - boundary = int('-' + str(ref_start)) - remainder = int(str(input_parses.posedit.pos.start)) - boundary - input_parses.posedit.pos.start.base = boundary - input_parses.posedit.pos.start.offset = remainder - if re.match('-', str(input_parses.posedit.pos.end)): - boundary = int('-' + str(ref_start)) - remainder = int(str(input_parses.posedit.pos.end)) - boundary - input_parses.posedit.pos.end.base = boundary - input_parses.posedit.pos.end.offset = remainder - if re.match('\*', str(input_parses.posedit.pos.start)): - # downstream positions - tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') - ts_seq = sf.fetch_seq(input_parses.ac) - boundary = len(ts_seq) - ref_end - input_parses.posedit.pos.start.base = boundary - te1, te2 = tot_end_pos.split('+') - tot_end_pos = int(te1) + int(te2) - offset = int(tot_end_pos) - int(boundary) - input_parses.posedit.pos.start.offset = offset - if re.match('\*', str(input_parses.posedit.pos.end)): - tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') - ts_seq = sf.fetch_seq(input_parses.ac) - boundary = len(ts_seq) - ref_end - input_parses.posedit.pos.end.base = boundary - te1, te2 = tot_end_pos.split('+') - tot_end_pos = int(te1) + int(te2) - offset = int(tot_end_pos) - int(boundary) - input_parses.posedit.pos.end.offset = offset - - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, - primary_assembly, lose_vm, hp, hn, sf, nr_vm) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( + report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, + lose_vm, hp, hn, sf, nr_vm) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + else: + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) - except Exception as e: - exceptPass() - else: - pass - validation['warnings'] = validation['warnings'] + ': ' + str( - error) - logger.warning(str(error)) - continue - - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = str(e) - if 'Alignment is incomplete' in error: - e_list = error.split('~') - gens = [] - for el in e_list: - el_l = el.split('/') - if el_l[-1] == '': - continue - gens.append(el_l[-1]) - acs = '; '.join(gens) - error = 'Cannot map ' + valstr( - input_parses) + ' to a genomic position. ' + input_parses.ac + ' can only be partially aligned to genomic reference sequences ' + acs - validation['warnings'] = validation['warnings'] + ': ' + str( - error) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif re.search('insertion length must be 1', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - elif re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): - # Quick look at syntax validation - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if re.search('bounds', error): + # Create a specific minimal evm with no normalizer and no replace_reference + # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence + try: + output = va_func.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, vm, hn, + hp, sf, no_norm_evm) + except hgvs.exceptions.HGVSDataNotAvailableError as e: + tx_ac = input_parses.ac try: - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, - lose_vm, hp, hn, sf, nr_vm) - except hgvs.exceptions.HGVSError as e: - exceptPass() + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + except: + gene_symbol = 'none' + if gene_symbol == 'none': + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' else: - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( - report_gen) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('insertion length must be 1', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - - # Create a specific minimal evm with no normalizer and no replace_reference - # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence - try: - output = va_func.noreplace_myevm_t_to_g(input_parses, evm, hdp, primary_assembly, vm, hn, - hp, sf, no_norm_evm) - except hgvs.exceptions.HGVSDataNotAvailableError as e: - tx_ac = input_parses.ac + except ValueError as e: + error = str(e) + if re.search('> end', error): + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except ValueError as e: - error = str(e) - if re.search('> end', error): - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) + evm.g_to_t(output, input_parses.ac) + except hgvs.exceptions.HGVSError as e: + error = str(e) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) + + try: + vr.validate(output) + except hgvs.exceptions.HGVSError as e: + error = str(e) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - else: + + else: + # All other variation + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSUnsupportedOperationError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) + """ + #Phil: Honestly not sure what the purpose of any of these is, we act the same regardless of what + #kind of error it is. + if re.search('Length implied by coordinates', error): + # Applies to del and inv + # NOTE, there has been no normalization at all so this error is valid here + validation['warnings'] = validation['warnings'] + ': ' + str(error) + # Will apply to > del and inv + if re.search('does not agree with reference sequence', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + # ensures x_y for insertions + if re.search('insertion length must be 1', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + # Boundary issue + if re.search('Variant coordinate is out of the bound of CDS region', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + """ + # This catches errors in introns + if re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - try: - evm.g_to_t(output, input_parses.ac) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - try: - vr.validate(output) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # All other variation - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSUnsupportedOperationError: - exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - """ - #Phil: Honestly not sure what the purpose of any of these is, we act the same regardless of what - #kind of error it is. - if re.search('Length implied by coordinates', error): - # Applies to del and inv - # NOTE, there has been no normalization at all so this error is valid here - validation['warnings'] = validation['warnings'] + ': ' + str(error) - # Will apply to > del and inv - if re.search('does not agree with reference sequence', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - # ensures x_y for insertions - if re.search('insertion length must be 1', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - # Boundary issue - if re.search('Variant coordinate is out of the bound of CDS region', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - """ - # This catches errors in introns - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = e - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('bounds', error): - error = error + ' (' + input_parses.ac + ')' + except hgvs.exceptions.HGVSDataNotAvailableError as e: + error = e validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - exceptPass() + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('bounds', error): + error = error + ' (' + input_parses.ac + ')' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + fn.exceptPass() - elif input_parses.type == 'n': - if re.search('\+', str(input_parses)) or re.search('\-', str(input_parses)): - # Catch variation in UTRs - # These should be in the sequence so can be directly validated. Need to pass to n. - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('intronic variant', error): - pass - elif re.search('datums is ill-defined', error): - called_ref = input_parses.posedit.edit.ref - to_n = evm.c_to_n(input_parses) - actual_ref = to_n.posedit.edit.ref - if called_ref != actual_ref: - error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' + elif input_parses.type == 'n': + if re.search('\+', str(input_parses)) or re.search('\-', str(input_parses)): + # Catch variation in UTRs + # These should be in the sequence so can be directly validated. Need to pass to n. + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('intronic variant', error): + pass + elif re.search('datums is ill-defined', error): + called_ref = input_parses.posedit.edit.ref + to_n = evm.c_to_n(input_parses) + actual_ref = to_n.posedit.edit.ref + if called_ref != actual_ref: + error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + input_parses.posedit.edit.ref = '' + variant = str(input_parses) + + elif re.search('base must be >=1 for datum = SEQ_START or CDS_END', error): + error = 'The given coordinate is outside the bounds of the reference sequence.' + + try: + if re.match('-', str(input_parses.posedit.pos.start)): + # upstream positions + boundary = 1 + remainder = int(str(input_parses.posedit.pos.start)) - boundary + remainder = remainder + 1 + input_parses.posedit.pos.start.base = boundary + input_parses.posedit.pos.start.offset = remainder + if re.match('-', str(input_parses.posedit.pos.end)): + boundary = 1 + remainder = int(str(input_parses.posedit.pos.end)) - boundary + remainder = remainder + 1 + input_parses.posedit.pos.end.base = boundary + input_parses.posedit.pos.end.offset = remainder + report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, + lose_vm, hp, hn, sf, nr_vm) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( + report_gen) + except Exception as e: + fn.exceptPass() validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue else: - input_parses.posedit.edit.ref = '' - variant = str(input_parses) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + if re.search('n.1-', str(input_parses)): + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + genomic_position = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, vm, + hp, hn, sf, nr_vm) + error = error + fn.valstr(genomic_position) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + pass - elif re.search('base must be >=1 for datum = SEQ_START or CDS_END', error): - error = 'The given coordinate is outside the bounds of the reference sequence.' + if re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): + # Quick look at syntax validation + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if re.search('bounds', error): + try: + report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, + lose_vm, hp, hn, sf, nr_vm) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + else: + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( + report_gen) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif re.search('insertion length must be 1', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + elif re.search('Cannot validate sequence of an intronic variant', error): + try: + test_g = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, vm, + hp, hn, sf, nr_vm) + back_to_n = evm.g_to_t(test_g, input_parses.ac) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('bounds', error): + report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, + primary_assembly, lose_vm, hp, hn, sf, nr_vm) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( + report_gen) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + fn.exceptPass() + # Create a specific minimal evm with no normalizer and no replace_reference + # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence + try: + output = va_func.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, vm, hn, + hp, sf, no_norm_evm) + except hgvs.exceptions.HGVSDataNotAvailableError as e: + tx_ac = input_parses.ac try: - if re.match('-', str(input_parses.posedit.pos.start)): - # upstream positions - boundary = 1 - remainder = int(str(input_parses.posedit.pos.start)) - boundary - remainder = remainder + 1 - input_parses.posedit.pos.start.base = boundary - input_parses.posedit.pos.start.offset = remainder - if re.match('-', str(input_parses.posedit.pos.end)): - boundary = 1 - remainder = int(str(input_parses.posedit.pos.end)) - boundary - remainder = remainder + 1 - input_parses.posedit.pos.end.base = boundary - input_parses.posedit.pos.end.offset = remainder - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, - lose_vm, hp, hn, sf, nr_vm) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( - report_gen) - except Exception as e: - exceptPass() + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + except: + gene_symbol = 'none' + if gene_symbol == 'none': + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + else: + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - else: + except ValueError as e: + error = str(e) + if re.search('> end', error): + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + try: + vr.validate(output) + except hgvs.exceptions.HGVSError as e: + error = str(e) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - if re.search('n.1-', str(input_parses)): - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, vm, - hp, hn, sf, nr_vm) - error = error + valstr(genomic_position) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass + else: + # All other variation + try: + vr.validate(input_parses) + except hgvs.exceptions.HGVSUnsupportedOperationError: - if re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): - # Quick look at syntax validation - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if re.search('bounds', error): - try: - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, - lose_vm, hp, hn, sf, nr_vm) - except hgvs.exceptions.HGVSError as e: - exceptPass() - else: - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( - report_gen) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('insertion length must be 1', error): + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + """ + if re.search('Length implied by coordinates', error): + # Applies to del and inv + # NOTE, there has been no normalization at all so this error is valid here + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + # Will apply to > del and inv + if re.search('does not agree with reference sequence', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + # ensures x_y for insertions + if re.search('insertion length must be 1', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + # Boundary issue + if re.search('Variant coordinate is out of the bound of CDS region', error): + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + """ + # This catches errors in introns + if re.search('base start position must be <= end position', error): + correction = copy.deepcopy(input_parses) + st = input_parses.posedit.pos.start + ed = input_parses.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str( + input_parses.posedit.pos.start) + ' > interval end position ' + str( + input_parses.posedit.pos.end) + logger.warning(str(error)) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - elif re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) + except hgvs.exceptions.HGVSDataNotAvailableError as e: + error = e validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - elif re.search('Cannot validate sequence of an intronic variant', error): - try: - test_g = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, vm, - hp, hn, sf, nr_vm) - back_to_n = evm.g_to_t(test_g, input_parses.ac) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('bounds', error): - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, - primary_assembly, lose_vm, hp, hn, sf, nr_vm) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( - report_gen) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - exceptPass() - - # Create a specific minimal evm with no normalizer and no replace_reference - # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('bounds', error): + error = error + ' (' + input_parses.ac + ')' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + pass + logger.trace("Variant structure and contents searches passed", validation) + # Mitochondrial variants + """ + Reformat m. into the new HGVS standard which is now m again! + """ + if type == ':m.' or re.match('NC_012920.1', str(input_parses.ac)) or re.match('NC_001807.4', + str(input_parses.ac)): + hgvs_mito = copy.deepcopy(input_parses) + if (re.match('NC_012920.1', str(hgvs_mito.ac)) and hgvs_mito.type == 'g') or ( + re.match('NC_001807.4', str(hgvs_mito.ac)) and hgvs_mito.type == 'g'): + hgvs_mito.type = 'm' + caution = '' try: - output = va_func.noreplace_myevm_t_to_g(input_parses, evm, hdp, primary_assembly, vm, hn, - hp, sf, no_norm_evm) - except hgvs.exceptions.HGVSDataNotAvailableError as e: - tx_ac = input_parses.ac - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + vr.validate(hgvs_mito) + except hgvs.exceptions.HGVSError as e: + error = caution + ': ' + str(e) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - except ValueError as e: - error = str(e) - if re.search('> end', error): - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) + except KeyError as e: + error = caution + ': Currently unable to validate ' + hgvs_mito.ac + ' sequence variation' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + # Any transcripts? + rel_var = va_func.relevant_transcripts(hgvs_mito, evm, self.hdp, alt_aln_method, reverse_normalizer) + hgvs_genomic = copy.deepcopy(hgvs_mito) + if len(rel_var) == 0: + validation['genomic_g'] = fn.valstr(hgvs_mito) + validation['description'] = 'Homo sapiens mitochondrion, complete genome' + logger.info('Homo sapiens mitochondrion, complete genome') continue - except hgvs.exceptions.HGVSInvalidVariantError as e: + # Currently we are not expecting this path to be activated because not m. transcripts seem to be NM_ + # This route may throw up errors in the future + else: + pass + + # handle :p. + if type == ':p.': + error = 'false' + # Try to validate the variant + try: + hgvs_object = hp.parse_hgvs_variant(variant) + except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue try: - vr.validate(output) + vr.validate(hgvs_object) except hgvs.exceptions.HGVSError as e: error = str(e) + if error != 'false': validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue + else: + # Get accurate descriptions from the relevant databases + # RefSeq databases + if alt_aln_method != 'genebuild': + # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID + # accession number + hgvs_object = hp.parse_hgvs_variant(variant) + accession = hgvs_object.ac + # Look for the accession in our database + # Connect to database and send request + record = va_func.entrez_efetch(db="nuccore", id=accession, rettype="gb", retmode="text") + try: + description = record.description + except: + description = 'Unable to recover the description of ' + accession + ' from Entrez' + try: + vr.validate(hgvs_object) + except hgvs.exceptions.HGVSError as e: + error = str(e) + else: + error = str( + hgvs_object) + ' is HGVS compliant and contains a valid reference amino acid description' + reason = 'Protein level variant descriptions are not fully supported due to redundancy in the genetic code' + validation['warnings'] = validation['warnings'] + ': ' + str(reason) + ': ' + str(error) + validation['protein'] = str(hgvs_object) + logger.warning(str(reason) + ": " + str(error)) + continue - else: - # All other variation + # handle :r. + """ + convert r, into c. + """ + trapped_input = input + if type == ':r.': + hgvs_input = hp.parse_hgvs_variant(input) # Traps the hgvs variant of r. for further use + # Change to coding variant + type = ':c.' + # Change input to reflect! try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSUnsupportedOperationError: - - exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError as e: + hgvs_c = va_func.hgvs_r_to_c(hgvs_input) + except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) - """ - if re.search('Length implied by coordinates', error): - # Applies to del and inv - # NOTE, there has been no normalization at all so this error is valid here - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - # Will apply to > del and inv - if re.search('does not agree with reference sequence', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - # ensures x_y for insertions - if re.search('insertion length must be 1', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - # Boundary issue - if re.search('Variant coordinate is out of the bound of CDS region', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - """ - # This catches errors in introns - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - logger.warning(str(error)) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = e validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue + input = str(hgvs_c) + variant = str(hgvs_c) + + # COLLECT gene symbol, name and ACCESSION INFORMATION + # Gene symbol + logger.trace("Handled mitochondrial variants", validation) + """ + Identifies the transcript reference sequence name and HGNC gene symbol + """ + if (type != ':g.'): + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(variant) + try: + tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('bounds', error): - error = error + ' (' + input_parses.ac + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - logger.trace("Variant structure and contents searches passed", validation) - # Mitochondrial variants - """ - Reformat m. into the new HGVS standard which is now m again! - """ - if type == ':m.' or re.match('NC_012920.1', str(input_parses.ac)) or re.match('NC_001807.4', - str(input_parses.ac)): - hgvs_mito = copy.deepcopy(input_parses) - if (re.match('NC_012920.1', str(hgvs_mito.ac)) and hgvs_mito.type == 'g') or ( - re.match('NC_001807.4', str(hgvs_mito.ac)) and hgvs_mito.type == 'g'): - hgvs_mito.type = 'm' - caution = '' - try: - vr.validate(hgvs_mito) - except hgvs.exceptions.HGVSError as e: - error = caution + ': ' + str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except KeyError as e: - error = caution + ': Currently unable to validate ' + hgvs_mito.ac + ' sequence variation' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Any transcripts? - rel_var = va_func.relevant_transcripts(hgvs_mito, evm, hdp, alt_aln_method, reverse_normalizer) - hgvs_genomic = copy.deepcopy(hgvs_mito) - if len(rel_var) == 0: - validation['genomic_g'] = valstr(hgvs_mito) - validation['description'] = 'Homo sapiens mitochondrion, complete genome' - logger.info('Homo sapiens mitochondrion, complete genome') + logger.warning(error) + if error != 'false': + error = 'Please inform UTA admin of the following error: ' + str(error) + issue_link = "https://bitbucket.org/biocommons/uta/issues?status=new&status=open" + reason = "VariantValidator cannot recover information for transcript " + str( + hgvs_vt.ac) + ' beacuse it is not available in the Universal Transcript Archive' + validation['warnings'] = validation['warnings'] + ': ' + str(reason) + logger.warning(str(reason) + ": " + str(error)) continue - # Currently we are not expecting this path to be activated because not m. transcripts seem to be NM_ - # This route may throw up errors in the future else: - pass + # Get hgnc Gene name from command + hgnc = tx_id_info[6] + issue_link = 'false' + + # ACCESS THE GENE INFORMATION RECORDS ON THE UTA DATABASE + # Refseq accession + tx_for_gene = va_func.tx_for_gene(hgnc, self.hdp) + refseq_ac = va_func.ng_extract(tx_for_gene) + + # Additional gene info + gene_info = self.hdp.get_gene_info(hgnc) + # Chromosomal location + try: + maploc = gene_info[1] + except: + maploc = '' + chr_loc = ("Chromosome location: " + maploc) - # handle :p. - if type == ':p.': - error = 'false' - # Try to validate the variant - try: - hgvs_object = hp.parse_hgvs_variant(variant) - except hgvs.exceptions.HGVSError as e: - error = str(e) - try: - vr.validate(hgvs_object) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Get accurate descriptions from the relevant databases + # Get accurate transcript descriptions from the relevant databases # RefSeq databases if alt_aln_method != 'genebuild': # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID @@ -1812,511 +1907,440 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr accession = hgvs_object.ac # Look for the accession in our database # Connect to database and send request - record = va_func.entrez_efetch(db="nuccore", id=accession, rettype="gb", retmode="text") - try: - description = record.description - except: - description = 'Unable to recover the description of ' + accession + ' from Entrez' - try: - vr.validate(hgvs_object) - except hgvs.exceptions.HGVSError as e: - error = str(e) - else: - error = str( - hgvs_object) + ' is HGVS compliant and contains a valid reference amino acid description' - reason = 'Protein level variant descriptions are not fully supported due to redundancy in the genetic code' - validation['warnings'] = validation['warnings'] + ': ' + str(reason) + ': ' + str(error) - validation['protein'] = str(hgvs_object) - logger.warning(str(reason) + ": " + str(error)) - continue + entry = va_dbCrl.data.in_entries(accession, 'transcript_info') - # handle :r. - """ - convert r, into c. - """ - trapped_input = input - if type == ':r.': - hgvs_input = hp.parse_hgvs_variant(input) # Traps the hgvs variant of r. for further use - # Change to coding variant - type = ':c.' - # Change input to reflect! - try: - hgvs_c = va_func.hgvs_r_to_c(hgvs_input) - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - input = str(hgvs_c) - variant = str(hgvs_c) - - # COLLECT gene symbol, name and ACCESSION INFORMATION - # Gene symbol - logger.trace("Handled mitochondrial variants", validation) - """ - Identifies the transcript reference sequence name and HGNC gene symbol - """ - if (type != ':g.'): - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(variant) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - logger.warning(error) - if error != 'false': - error = 'Please inform UTA admin of the following error: ' + str(error) - issue_link = "https://bitbucket.org/biocommons/uta/issues?status=new&status=open" - reason = "VariantValidator cannot recover information for transcript " + str( - hgvs_vt.ac) + ' beacuse it is not available in the Universal Transcript Archive' - validation['warnings'] = validation['warnings'] + ': ' + str(reason) - logger.warning(str(reason) + ": " + str(error)) - continue - else: - # Get hgnc Gene name from command - hgnc = tx_id_info[6] - issue_link = 'false' - - # ACCESS THE GENE INFORMATION RECORDS ON THE UTA DATABASE - # Refseq accession - tx_for_gene = va_func.tx_for_gene(hgnc, hdp) - refseq_ac = va_func.ng_extract(tx_for_gene) - - # Additional gene info - gene_info = hdp.get_gene_info(hgnc) - # Chromosomal location - try: - maploc = gene_info[1] - except: - maploc = '' - chr_loc = ("Chromosome location: " + maploc) - - # Get accurate transcript descriptions from the relevant databases - # RefSeq databases - if alt_aln_method != 'genebuild': - # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID - # accession number - hgvs_object = hp.parse_hgvs_variant(variant) - accession = hgvs_object.ac - # Look for the accession in our database - # Connect to database and send request - entry = va_dbCrl.data.in_entries(accession, 'transcript_info') - - # Analyse the returned data and take the necessary actions - # If the error key exists - if 'error' in entry: - # Open a hgvs exception log file in append mode - error = entry['description'] - validation['warnings'] = validation['warnings'] + ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error) + ": A Database error occurred, please contact admin") - continue + # Analyse the returned data and take the necessary actions + # If the error key exists + if 'error' in entry: + # Open a hgvs exception log file in append mode + error = entry['description'] + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + ': A Database error occurred, please contact admin' + logger.warning(str(error) + ": A Database error occurred, please contact admin") + continue - # If the accession key is found - elif 'accession' in entry: - description = entry['description'] - # If the current entry is too old - if entry['expiry'] == 'true': - dbaction = 'update' + # If the accession key is found + elif 'accession' in entry: + description = entry['description'] + # If the current entry is too old + if entry['expiry'] == 'true': + dbaction = 'update' + try: + entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, + accession=accession, dbaction=dbaction, hp=hp, evm=evm, + hdp=self.hdp) + except hgvs.exceptions.HGVSError as e: + error = 'Transcript %s is not currently supported' % (accession) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except Exception as e: + error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + hgnc_gene_info = entry['description'] + else: + hgnc_gene_info = entry['description'] + # If the none key is found add the description to the database + elif 'none' in entry: + dbaction = 'insert' try: entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, accession=accession, dbaction=dbaction, hp=hp, evm=evm, - hdp=hdp) - except hgvs.exceptions.HGVSError as e: - error = 'Transcript %s is not currently supported' % (accession) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue + hdp=self.hdp) except Exception as e: + logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue hgnc_gene_info = entry['description'] + + # If no correct keys are found else: - hgnc_gene_info = entry['description'] - # If the none key is found add the description to the database - elif 'none' in entry: - dbaction = 'insert' - try: - entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=hp, evm=evm, - hdp=hdp) - except Exception as e: - logger.warning(str(e)) - error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + # Open a hgvs exception log file in append mode + error = 'Unknown error type' + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + ': A Database error occurred, please contact admin' logger.warning(str(error)) continue - hgnc_gene_info = entry['description'] - # If no correct keys are found + # Ensembl databases else: - # Open a hgvs exception log file in append mode - error = 'Unknown error type' - validation['warnings'] = validation['warnings'] + ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error)) - continue + # accession number + hgvs_object = hp.parse_hgvs_variant(variant) + accession = hgvs_object.ac + # Look for the accession in our database + # Connect to database and send request + entry = va_dbCrl.data.in_entries(accession, 'transcript_info') - # Ensembl databases - else: - # accession number - hgvs_object = hp.parse_hgvs_variant(variant) - accession = hgvs_object.ac - # Look for the accession in our database - # Connect to database and send request - entry = va_dbCrl.data.in_entries(accession, 'transcript_info') - - # Analyse the returned data and take the necessary actions - # If the error key exists - if 'error' in entry: - # Open a hgvs exception log file in append mode - error = entry['description'] - validation['warnings'] = validation['warnings'] + ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error)) - continue + # Analyse the returned data and take the necessary actions + # If the error key exists + if 'error' in entry: + # Open a hgvs exception log file in append mode + error = entry['description'] + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + ': A Database error occurred, please contact admin' + logger.warning(str(error)) + continue - # If the accession key is found - elif 'accession' in entry: - description = entry['description'] - # If the current entry is too old - if entry['expiry'] == 'true': - dbaction = 'update' - entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=hp, evm=evm, - hdp=hdp) + # If the accession key is found + elif 'accession' in entry: + description = entry['description'] + # If the current entry is too old + if entry['expiry'] == 'true': + dbaction = 'update' + entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, + accession=accession, dbaction=dbaction, hp=hp, evm=evm, + hdp=self.hdp) + hgnc_gene_info = entry['description'] + else: + hgnc_gene_info = entry['description'] + # If the none key is found add the description to the database + elif 'none' in entry: + dbaction = 'insert' + try: + entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, + accession=accession, dbaction=dbaction, hp=hp, evm=evm, + hdp=self.hdp) + except Exception as e: + logger.warning(str(e)) + error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue hgnc_gene_info = entry['description'] + + # If no correct keys are found else: - hgnc_gene_info = entry['description'] - # If the none key is found add the description to the database - elif 'none' in entry: - dbaction = 'insert' - try: - entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=hp, evm=evm, - hdp=hdp) - except Exception as e: - logger.warning(str(e)) - error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + # Open a hgvs exception log file in append mode + error = 'Unknown error type' + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + ': A Database error occurred, please contact admin' logger.warning(str(error)) continue - hgnc_gene_info = entry['description'] - - # If no correct keys are found - else: - # Open a hgvs exception log file in append mode - error = 'Unknown error type' - validation['warnings'] = validation['warnings'] + ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error)) - continue - - # Genomic type variants will need to be mapped to transcripts - """ - The following section is used to project genomic variants accurately onto - all relevant transcripts - """ - - if (type == ':g.'): - g_query = hp.parse_hgvs_variant(variant) - - # Genomic coordinates can be validated immediately - error = 'false' - try: - vr.validate(g_query) - except hgvs.exceptions.HGVSError as e: - error = str(e) - except KeyError: - error = 'Reference sequence ' + hgvs_genomic.ac + ' is either not supported or does not exist' - if error != 'false': - reason = 'Invalid variant description' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - # Set test to see if Norm alters the coords - g_test = hn.normalize(g_query) - - # Perform test - if g_query.posedit.pos != g_test.posedit.pos: - # validation['warnings'] = validation['warnings'] + ': ' + 'Input variant description normalized to ' + str(g_test) - hgvs_genomic = g_test - else: - hgvs_genomic = g_query - - # Collect rel_var - # rel_var is a keyworded list of relevant transcripts with associated coding variants + # Genomic type variants will need to be mapped to transcripts """ - Initial simple projection from the provided g. position all overlapping - transcripts + The following section is used to project genomic variants accurately onto + all relevant transcripts """ - rel_var = va_func.relevant_transcripts(hgvs_genomic, evm, hdp, alt_aln_method, reverse_normalizer) - # Double check rel_vars have not been missed when mapping from a RefSeqGene - if len(rel_var) != 0 and re.match('NG_', str(hgvs_genomic.ac)): - for var in rel_var: - hgvs_coding_variant = hp.parse_hgvs_variant(var) - try: - hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding_variant, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - except hgvs.exceptions.HGVSError as e: - try_rel_var = [] - else: - try_rel_var = va_func.relevant_transcripts(hgvs_genomic, evm, hdp, alt_aln_method, - reverse_normalizer) - if len(try_rel_var) > len(rel_var): - rel_var = try_rel_var - break - else: - continue + if (type == ':g.'): + g_query = hp.parse_hgvs_variant(variant) - # Tripple check this assumption by querying the gene position database - if len(rel_var) == 0: - vcf_dict = va_H2V.hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf) - not_di = str(hgvs_genomic.ac) + ':g.' + str(vcf_dict['pos']) + '_' + str( - int(vcf_dict['pos']) + (len(vcf_dict['ref']) - 1)) + 'del' + vcf_dict['ref'] + 'ins' + \ - vcf_dict['alt'] - hgvs_not_di = hp.parse_hgvs_variant(not_di) - rel_var = va_func.relevant_transcripts(hgvs_not_di, evm, hdp, alt_aln_method, - reverse_normalizer) - - # list return statements - """ - If mapping to transcripts has been unsuccessful, provide relevant details - """ - if len(rel_var) == 0: - - # Check for NG_ - rsg = re.compile('^NG_') - if rsg.search(variant): - # parse - hgvs_refseqgene = hp.parse_hgvs_variant(variant) - # Convert to chromosomal position - refseqgene_data = va_g2g.rsg_to_chr(hgvs_refseqgene, primary_assembly, hn, vr) - # There should only ever be one description returned - refseqgene_data = refseqgene_data[0] - - # Extract data - if refseqgene_data['valid'] == 'true': - input = refseqgene_data['hgvs_genomic'] - # re_submit - # Tag the line so that it is not written out - validation['warnings'] = validation[ - 'warnings'] + ': ' + variant + ' automapped to genome position ' + str( - input) - query = {'quibble': input, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - coding = 'intergenic' - batch_list.append(query) - else: - error = 'Mapping unavailable for RefSeqGene ' + variant + ' using alignment method = ' + alt_aln_method - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue + # Genomic coordinates can be validated immediately + error = 'false' + try: + vr.validate(g_query) + except hgvs.exceptions.HGVSError as e: + error = str(e) + except KeyError: + error = 'Reference sequence ' + hgvs_genomic.ac + ' is either not supported or does not exist' + if error != 'false': + reason = 'Invalid variant description' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + pass - # Chromosome build is not supported or intergenic??? + # Set test to see if Norm alters the coords + g_test = hn.normalize(g_query) + + # Perform test + if g_query.posedit.pos != g_test.posedit.pos: + # validation['warnings'] = validation['warnings'] + ': ' + 'Input variant description normalized to ' + str(g_test) + hgvs_genomic = g_test else: - sfm = vvChromasomes.supported_for_mapping(hgvs_genomic.ac, primary_assembly) - if sfm == 'true': + hgvs_genomic = g_query + + # Collect rel_var + # rel_var is a keyworded list of relevant transcripts with associated coding variants + """ + Initial simple projection from the provided g. position all overlapping + transcripts + """ + rel_var = va_func.relevant_transcripts(hgvs_genomic, evm, self.hdp, alt_aln_method, reverse_normalizer) + + # Double check rel_vars have not been missed when mapping from a RefSeqGene + if len(rel_var) != 0 and re.match('NG_', str(hgvs_genomic.ac)): + for var in rel_var: + hgvs_coding_variant = hp.parse_hgvs_variant(var) try: - vr.validate(hgvs_genomic) + hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding_variant, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) except hgvs.exceptions.HGVSError as e: - error = str(e) + try_rel_var = [] + else: + try_rel_var = va_func.relevant_transcripts(hgvs_genomic, evm, self.hdp, alt_aln_method, + reverse_normalizer) + if len(try_rel_var) > len(rel_var): + rel_var = try_rel_var + break + else: + continue + + # Tripple check this assumption by querying the gene position database + if len(rel_var) == 0: + vcf_dict = vvHGVS.hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf) + not_di = str(hgvs_genomic.ac) + ':g.' + str(vcf_dict['pos']) + '_' + str( + int(vcf_dict['pos']) + (len(vcf_dict['ref']) - 1)) + 'del' + vcf_dict['ref'] + 'ins' + \ + vcf_dict['alt'] + hgvs_not_di = hp.parse_hgvs_variant(not_di) + rel_var = va_func.relevant_transcripts(hgvs_not_di, evm, self.hdp, alt_aln_method, + reverse_normalizer) + + # list return statements + """ + If mapping to transcripts has been unsuccessful, provide relevant details + """ + if len(rel_var) == 0: + + # Check for NG_ + rsg = re.compile('^NG_') + if rsg.search(variant): + # parse + hgvs_refseqgene = hp.parse_hgvs_variant(variant) + # Convert to chromosomal position + refseqgene_data = va_g2g.rsg_to_chr(hgvs_refseqgene, primary_assembly, hn, vr) + # There should only ever be one description returned + refseqgene_data = refseqgene_data[0] + + # Extract data + if refseqgene_data['valid'] == 'true': + input = refseqgene_data['hgvs_genomic'] + # re_submit + # Tag the line so that it is not written out + validation['warnings'] = validation[ + 'warnings'] + ': ' + variant + ' automapped to genome position ' + str( + input) + query = {'quibble': input, 'id': validation['id'], 'warnings': validation['warnings'], + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', + 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + coding = 'intergenic' + batch_list.append(query) + else: + error = 'Mapping unavailable for RefSeqGene ' + variant + ' using alignment method = ' + alt_aln_method validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue + + # Chromosome build is not supported or intergenic??? + else: + sfm = vvChromosomes.supported_for_mapping(hgvs_genomic.ac, primary_assembly) + if sfm == 'true': + try: + vr.validate(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + # Map to RefSeqGene if available + refseqgene_data = va_g2g.chr_to_rsg(hgvs_genomic, hn, vr) + rsg_data = '' + # Example {'gene': 'NTHL1', 'hgvs_refseqgene': 'NG_008412.1:g.3455_3464delCAAACACACA', 'valid': 'true'} + for data in refseqgene_data: + if data['valid'] == 'true': + data['hgvs_refseqgene'] = hp.parse_hgvs_variant(data['hgvs_refseqgene']) + data['hgvs_refseqgene'] = fn.valstr(data['hgvs_refseqgene']) + rsg_data = rsg_data + data['hgvs_refseqgene'] + ' (' + data['gene'] + '), ' + + error = 'No transcripts found that fully overlap the described variation in the genomic sequence' + # set output type flag + set_output_type_flag = 'intergenic' + # set genomic and where available RefSeqGene outputs + validation['warnings'] = validation['warnings'] + ': ' + str(error) + validation['genomic_g'] = fn.valstr(hgvs_genomic) + validation['genomic_r'] = str(rsg_data.split('(')[0]) + logger.warning(str(error)) + continue else: - # Map to RefSeqGene if available - refseqgene_data = va_g2g.chr_to_rsg(hgvs_genomic, hn, vr) - rsg_data = '' - # Example {'gene': 'NTHL1', 'hgvs_refseqgene': 'NG_008412.1:g.3455_3464delCAAACACACA', 'valid': 'true'} - for data in refseqgene_data: - if data['valid'] == 'true': - data['hgvs_refseqgene'] = hp.parse_hgvs_variant(data['hgvs_refseqgene']) - data['hgvs_refseqgene'] = valstr(data['hgvs_refseqgene']) - rsg_data = rsg_data + data['hgvs_refseqgene'] + ' (' + data['gene'] + '), ' - - error = 'No transcripts found that fully overlap the described variation in the genomic sequence' - # set output type flag - set_output_type_flag = 'intergenic' - # set genomic and where available RefSeqGene outputs + error = 'Please ensure the requested chromosome version relates to a supported genome build. Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' validation['warnings'] = validation['warnings'] + ': ' + str(error) - validation['genomic_g'] = valstr(hgvs_genomic) - validation['genomic_r'] = str(rsg_data.split('(')[0]) logger.warning(str(error)) continue - else: - error = 'Please ensure the requested chromosome version relates to a supported genome build. Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Tag the line so that it is not written out - validation['write'] = 'false' + else: + # Tag the line so that it is not written out + validation['write'] = 'false' - """ - Gap aware projection from g. to c. - """ + """ + Gap aware projection from g. to c. + """ - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' - - # Create a pseudo VCF so that normalization can be applied and a delins can be generated - hgvs_genomic_variant = hgvs_genomic - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # VCF - vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # take a look at the input genomic variant for potential base salvage - stash_ac = vcf_dict['chr'] - stash_pos = int(vcf_dict['pos']) - stash_ref = vcf_dict['ref'] - stash_alt = vcf_dict['alt'] - stash_end = end - # Re-Analyse genomic positions - if re.match('NG_', str(stash_input)): - c = hp.parse_hgvs_variant(rel_var[0]) - if hasattr(c.posedit.edit, 'ref') and c.posedit.edit.ref is not None: - c.posedit.edit.ref = c.posedit.edit.ref.upper() - if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: - c.posedit.edit.alt = c.posedit.edit.alt.upper() - stash_input = va_func.myevm_t_to_g(c, hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, - nr_vm) - if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', - str( - stash_input)): - try: - hgvs_stash = hp.parse_hgvs_variant(stash_input) - except: - hgvs_stash = stash_input - if hasattr(hgvs_stash.posedit.edit, 'ref') and hgvs_stash.posedit.edit.ref is not None: - hgvs_stash.posedit.edit.ref = hgvs_stash.posedit.edit.ref.upper() - if hasattr(hgvs_stash.posedit.edit, 'alt') and hgvs_stash.posedit.edit.alt is not None: - hgvs_stash.posedit.edit.alt = hgvs_stash.posedit.edit.alt.upper() - - stash_ac = hgvs_stash.ac - # MAKE A NO NORM HGVS2VCF - stash_dict = va_H2V.pos_lock_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, sf) - stash_ac = hgvs_stash.ac - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' - # Store a not real deletion insertion - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - stash_hgvs_not_delins = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_genomic_5pr.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + # Create a pseudo VCF so that normalization can be applied and a delins can be generated + hgvs_genomic_variant = hgvs_genomic + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Set non-valid caution to false - non_valid_caution = 'false' + # VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, + reverse_normalizer, sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] - # make an empty rel_var - nw_rel_var = [] + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # take a look at the input genomic variant for potential base salvage + stash_ac = vcf_dict['chr'] + stash_pos = int(vcf_dict['pos']) + stash_ref = vcf_dict['ref'] + stash_alt = vcf_dict['alt'] + stash_end = end + # Re-Analyse genomic positions + if re.match('NG_', str(stash_input)): + c = hp.parse_hgvs_variant(rel_var[0]) + if hasattr(c.posedit.edit, 'ref') and c.posedit.edit.ref is not None: + c.posedit.edit.ref = c.posedit.edit.ref.upper() + if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: + c.posedit.edit.alt = c.posedit.edit.alt.upper() + stash_input = va_func.myevm_t_to_g(c, self.hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, + nr_vm) + if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', + str( + stash_input)): + try: + hgvs_stash = hp.parse_hgvs_variant(stash_input) + except: + hgvs_stash = stash_input + if hasattr(hgvs_stash.posedit.edit, 'ref') and hgvs_stash.posedit.edit.ref is not None: + hgvs_stash.posedit.edit.ref = hgvs_stash.posedit.edit.ref.upper() + if hasattr(hgvs_stash.posedit.edit, 'alt') and hgvs_stash.posedit.edit.alt is not None: + hgvs_stash.posedit.edit.alt = hgvs_stash.posedit.edit.alt.upper() - # loop through rel_var and amend where required - for var in rel_var: - # Store the current hgvs:c. description - saved_hgvs_coding = hp.parse_hgvs_variant(var) + stash_ac = hgvs_stash.ac + # MAKE A NO NORM HGVS2VCF + stash_dict = vvHGVS.pos_lock_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, sf) + stash_ac = hgvs_stash.ac + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) - # Remove un-selected transcripts - if select_transcripts != 'all': - tx_ac = saved_hgvs_coding.ac - # If it's in the selected tx dict, keep it - if tx_ac.split('.')[0] in select_transcripts_dict.keys(): - pass - # If not get rid of it! - else: - continue + # Store a not real deletion insertion + stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_genomic_5pr.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = va_func.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, - alt_aln_method=alt_aln_method, hdp=hdp) - orientation = int(ori[0]['alt_strand']) - intronic_variant = 'false' + # Set non-valid caution to false + non_valid_caution = 'false' - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement + # make an empty rel_var + nw_rel_var = [] + + # loop through rel_var and amend where required + for var in rel_var: + # Store the current hgvs:c. description + saved_hgvs_coding = hp.parse_hgvs_variant(var) + + # Remove un-selected transcripts + if select_transcripts != 'all': + tx_ac = saved_hgvs_coding.ac + # If it's in the selected tx dict, keep it + if tx_ac.split('.')[0] in select_transcripts_dict.keys(): + pass + # If not get rid of it! + else: + continue + + # Get orientation of the gene wrt genome and a list of exons mapped to the genome + ori = va_func.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + alt_aln_method=alt_aln_method, hdp=self.hdp) + orientation = int(ori[0]['alt_strand']) + intronic_variant = 'false' + + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement try: hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) except hgvs.exceptions.HGVSError as e: hgvs_seek_var = saved_hgvs_coding else: - seek_var = valstr(hgvs_seek_var) + seek_var = fn.valstr(hgvs_seek_var) seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding - elif orientation != -1: - # position genomic at its most 3 prime position try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding - - try: - intron_test = hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( + intron_test = hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', error) or re.match( 'Unsupported normalization of variants spanning the exon-intron boundary', error): - intronic_variant = 'hard_fail' - else: + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'hard_fail': + if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', + str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -2329,11 +2353,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: intronic_variant = 'true' - if intronic_variant != 'hard_fail': if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', - str(hgvs_seek_var.posedit.pos)): + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -2346,897 +2368,849 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: intronic_variant = 'true' - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - # If exonic, process - if intronic_variant != 'true': - # map form reverse normalized g. to c. - hgvs_from_5n_g = no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) + # If exonic, process + if intronic_variant != 'true': + # map form reverse normalized g. to c. + hgvs_from_5n_g = no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + disparity_deletion_in = ['false', 'false'] + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + pass else: pass - else: - pass - - try: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_genomic_5pr, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = saved_hgvs_coding - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +ve base and adjust - if (re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) or re.search( - '\-', str(rn_tx_hgvs_not_delins.posedit.pos.end))): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError: + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_genomic_5pr, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + tx_hgvs_not_delins = saved_hgvs_coding - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - hgvs_stash_t = vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) - if len(stash_hgvs_not_delins.posedit.edit.ref) > len( - hgvs_stash_t.posedit.edit.ref): + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +ve base and adjust + if (re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) or re.search( + '\-', str(rn_tx_hgvs_not_delins.posedit.pos.end))): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' try: - hn.normalize(hgvs_stash_t) + rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: - exceptPass() + fn.exceptPass() + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: - gap_length = len(stash_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_stash_t.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - try: - tx_hgvs_not_delins = vm.c_to_n(hgvs_stash_t) - except: - tx_hgvs_not_delins = hgvs_stash_t - hgvs_not_delins = stash_hgvs_not_delins - elif hgvs_stash_t.posedit.pos.start.offset != 0 or hgvs_stash_t.posedit.pos.end.offset != 0: - disparity_deletion_in = ['transcript', 'Requires Analysis'] + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' try: - tx_hgvs_not_delins = vm.c_to_n(hgvs_stash_t) + rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: - tx_hgvs_not_delins = hgvs_stash_t - hgvs_not_delins = stash_hgvs_not_delins - hgvs_genomic_5pr = stash_hgvs_not_delins - else: - pass - - # Final sanity checks - try: - vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - hgvs_not_delins = saved_hgvs_coding - disparity_deletion_in = ['false', 'false'] - logger.warning(str(e)) - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_not_delins = saved_hgvs_coding - disparity_deletion_in = ['false', 'false'] - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - logger.warning(error) - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search('\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) - except: - exceptPass() - genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) + fn.exceptPass() + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[integer]) + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match('\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. + pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + hgvs_stash_t = vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) + if len(stash_hgvs_not_delins.posedit.edit.ref) > len( + hgvs_stash_t.posedit.edit.ref): + try: + hn.normalize(hgvs_stash_t) + except: + fn.exceptPass() + else: + gap_length = len(stash_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_stash_t.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + try: + tx_hgvs_not_delins = vm.c_to_n(hgvs_stash_t) + except: + tx_hgvs_not_delins = hgvs_stash_t + hgvs_not_delins = stash_hgvs_not_delins + elif hgvs_stash_t.posedit.pos.start.offset != 0 or hgvs_stash_t.posedit.pos.end.offset != 0: + disparity_deletion_in = ['transcript', 'Requires Analysis'] + try: + tx_hgvs_not_delins = vm.c_to_n(hgvs_stash_t) + except: + tx_hgvs_not_delins = hgvs_stash_t + hgvs_not_delins = stash_hgvs_not_delins + hgvs_genomic_5pr = stash_hgvs_not_delins + else: + pass + + # Final sanity checks + try: + vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + hgvs_not_delins = saved_hgvs_coding + disparity_deletion_in = ['false', 'false'] + logger.warning(str(e)) + try: + hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_not_delins = saved_hgvs_coding + disparity_deletion_in = ['false', 'false'] + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + logger.warning(error) + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search('\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) except: - c1 = tx_hgvs_not_delins - g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) + fn.exceptPass() + genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + try: - c2 = vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass + c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - exceptPass() - - if re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.end)): + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( tx_hgvs_not_delins.ac) non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 + # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match('\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: gps = for_location_c.posedit.pos.start.base gpe = for_location_c.posedit.pos.start.base + 1 gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref + c2 = vm.g_to_t(g2, c2.ac) + reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] + alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] + c3 = copy.deepcopy(c1) + c3.posedit.pos.end = c2.posedit.pos.end + c3.posedit.edit.ref = '' # reference + c3.posedit.edit.alt = alternate + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - c2 = vm.g_to_t(g2, c2.ac) - reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] - alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] - c3 = copy.deepcopy(c1) - c3.posedit.pos.end = c2.posedit.pos.end - c3.posedit.edit.ref = '' # reference - c3.posedit.edit.alt = alternate - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' - else: - # Try the push - hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) - stash_ac = hgvs_stash.ac - # Make a hard left and hard right not delins g. - stash_dict_right = va_H2V.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) - stash_pos_right = int(stash_dict_right['pos']) - stash_ref_right = stash_dict_right['ref'] - stash_alt_right = stash_dict_right['alt'] - stash_end_right = str(stash_pos_right + len(stash_ref_right) - 1) - stash_hgvs_not_delins_right = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) - stash_dict_left = va_H2V.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, - reverse_normalizer, sf) - stash_pos_left = int(stash_dict_left['pos']) - stash_ref_left = stash_dict_left['ref'] - stash_alt_left = stash_dict_left['alt'] - stash_end_left = str(stash_pos_left + len(stash_ref_left) - 1) - stash_hgvs_not_delins_left = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos_left) + '_' + stash_end_left + 'del' + stash_ref_left + 'ins' + stash_alt_left) - # Map in-situ to the transcript left and right - try: - tx_hard_right = vm.g_to_t(stash_hgvs_not_delins_right, saved_hgvs_coding.ac) - except Exception as e: - tx_hard_right = saved_hgvs_coding - else: - normalize_stash_right = hn.normalize(stash_hgvs_not_delins_right) - if str(normalize_stash_right.posedit) == str(stash_hgvs_not_delins.posedit): - tx_hard_right = saved_hgvs_coding - try: - tx_hard_left = vm.g_to_t(stash_hgvs_not_delins_left, saved_hgvs_coding.ac) - except Exception as e: - tx_hard_left = saved_hgvs_coding - else: - normalize_stash_left = hn.normalize(stash_hgvs_not_delins_left) - if str(normalize_stash_left.posedit) == str(stash_hgvs_not_delins.posedit): - tx_hard_left = saved_hgvs_coding - # The Logic - Currently limited to genome gaps - if len(stash_hgvs_not_delins_right.posedit.edit.ref) < len( - tx_hard_right.posedit.edit.ref): - tx_hard_right = hn.normalize(tx_hard_right) - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hard_right - gapped_transcripts = gapped_transcripts + str(tx_hard_right.ac) + ' ' - elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len( - tx_hard_left.posedit.edit.ref): - tx_hard_left = hn.normalize(tx_hard_left) + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + # Set warning variables gap_position = '' gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hard_left - gapped_transcripts = gapped_transcripts + str(tx_hard_left.ac) + ' ' + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + hgvs_refreshed_variant = tx_hgvs_not_delins + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = saved_hgvs_coding + # Try the push + hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) + stash_ac = hgvs_stash.ac + # Make a hard left and hard right not delins g. + stash_dict_right = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) + stash_pos_right = int(stash_dict_right['pos']) + stash_ref_right = stash_dict_right['ref'] + stash_alt_right = stash_dict_right['alt'] + stash_end_right = str(stash_pos_right + len(stash_ref_right) - 1) + stash_hgvs_not_delins_right = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) + stash_dict_left = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, + reverse_normalizer, sf) + stash_pos_left = int(stash_dict_left['pos']) + stash_ref_left = stash_dict_left['ref'] + stash_alt_left = stash_dict_left['alt'] + stash_end_left = str(stash_pos_left + len(stash_ref_left) - 1) + stash_hgvs_not_delins_left = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos_left) + '_' + stash_end_left + 'del' + stash_ref_left + 'ins' + stash_alt_left) + # Map in-situ to the transcript left and right + try: + tx_hard_right = vm.g_to_t(stash_hgvs_not_delins_right, saved_hgvs_coding.ac) + except Exception as e: + tx_hard_right = saved_hgvs_coding + else: + normalize_stash_right = hn.normalize(stash_hgvs_not_delins_right) + if str(normalize_stash_right.posedit) == str(stash_hgvs_not_delins.posedit): + tx_hard_right = saved_hgvs_coding + try: + tx_hard_left = vm.g_to_t(stash_hgvs_not_delins_left, saved_hgvs_coding.ac) + except Exception as e: + tx_hard_left = saved_hgvs_coding + else: + normalize_stash_left = hn.normalize(stash_hgvs_not_delins_left) + if str(normalize_stash_left.posedit) == str(stash_hgvs_not_delins.posedit): + tx_hard_left = saved_hgvs_coding + # The Logic - Currently limited to genome gaps + if len(stash_hgvs_not_delins_right.posedit.edit.ref) < len( + tx_hard_right.posedit.edit.ref): + tx_hard_right = hn.normalize(tx_hard_right) + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + hgvs_refreshed_variant = tx_hard_right + gapped_transcripts = gapped_transcripts + str(tx_hard_right.ac) + ' ' + elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len( + tx_hard_left.posedit.edit.ref): + tx_hard_left = hn.normalize(tx_hard_left) + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + hgvs_refreshed_variant = tx_hard_left + gapped_transcripts = gapped_transcripts + str(tx_hard_left.ac) + ' ' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = saved_hgvs_coding - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = evm.n_to_c(hgvs_refreshed_variant) - else: - pass - try: - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = evm.n_to_c(hgvs_refreshed_variant) else: pass - exceptPass() - # Send to empty nw_rel_var - nw_rel_var.append(hgvs_refreshed_variant) - - # Otherwise these variants need to be set - else: - corrective_action_taken = '' - gapped_alignment_warning = '' - # Send to empty nw_rel_var - nw_rel_var.append(saved_hgvs_coding) - - # Warn the user that the g. description is not valid - if gapped_alignment_warning != '': - if disparity_deletion_in[0] == 'transcript': - corrective_action_taken = 'Automap has deleted ' + str( - disparity_deletion_in[1]) + ' bp from chromosomal reference sequence ' + str( - hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s)' + gapped_transcripts - if disparity_deletion_in[0] == 'chromosome': - corrective_action_taken = 'Automap has added ' + str( - disparity_deletion_in[1]) + ' bp to chromosomal reference sequence ' + str( - hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s) ' + gapped_transcripts - - # Add additional data to the front of automap - if auto_info != '': - automap = auto_info + '\n' + automap - - rel_var = copy.deepcopy(nw_rel_var) - - # Set the values and append to batch_list - for c_description in rel_var: - query = {'quibble': str(c_description), 'id': validation['id'], - 'warnings': validation['warnings'], 'description': '', 'coding': '', - 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) - logger.warning("Continue reached when mapping transcript types to variants") - # Call next description - continue - # TYPE = :c. + try: + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + hgvs_refreshed_variant.posedit.edit.alt[-1]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 0:-1] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 0:-1] + hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[0] == \ + hgvs_refreshed_variant.posedit.edit.alt[0]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 1:] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 1:] + hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + pass + fn.exceptPass() + # Send to empty nw_rel_var + nw_rel_var.append(hgvs_refreshed_variant) - if type == ':c.' or type == ':n.': + # Otherwise these variants need to be set + else: + corrective_action_taken = '' + gapped_alignment_warning = '' + # Send to empty nw_rel_var + nw_rel_var.append(saved_hgvs_coding) - # Flag for validation - valid = 'false' - # Collect information for genomic level validation - obj = hp.parse_hgvs_variant(variant) + # Warn the user that the g. description is not valid + if gapped_alignment_warning != '': + if disparity_deletion_in[0] == 'transcript': + corrective_action_taken = 'Automap has deleted ' + str( + disparity_deletion_in[1]) + ' bp from chromosomal reference sequence ' + str( + hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s)' + gapped_transcripts + if disparity_deletion_in[0] == 'chromosome': + corrective_action_taken = 'Automap has added ' + str( + disparity_deletion_in[1]) + ' bp to chromosomal reference sequence ' + str( + hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s) ' + gapped_transcripts + + # Add additional data to the front of automap + if auto_info != '': + automap = auto_info + '\n' + automap - tx_ac = obj.ac + rel_var = copy.deepcopy(nw_rel_var) - # Do we keep it? - if select_transcripts != 'all': - if tx_ac in select_transcripts_dict_plus_version.keys(): - pass - # If not get rid of it! - else: - # By marking it as Do Not Write and continuing through the validation loop - validation['write'] = 'false' + # Set the values and append to batch_list + for c_description in rel_var: + query = {'quibble': str(c_description), 'id': validation['id'], + 'warnings': validation['warnings'], 'description': '', 'coding': '', + 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(query) + logger.warning("Continue reached when mapping transcript types to variants") + # Call next description continue - else: - pass + # TYPE = :c. - # Set a cross_variant object - cross_variant = 'false' - # Se rec_var to '' so it can be updated later - rec_var = '' - try: - to_g = va_func.myevm_t_to_g(obj, hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, nr_vm) - genomic_ac = to_g.ac - except hgvs.exceptions.HGVSDataNotAvailableError as e: - if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))) or re.match( - "No relevant genomic mapping options available", str(e)): - reason = 'Unable to map the input variant onto a genomic position' - if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))): - error_list = str(e).split('~')[:-1] - combos = [ - 'Full alignment data between the specified transcript reference sequence and all GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are not available: Consequently the input variant description cannot be fully validated and is not supported: Use the Gene to Transcripts function to determine whether an updated transcript reference sequence is available'] # Partial alignment data is available for the following genomic reference sequences: '] - error = '; '.join(combos) - error = error.replace(': ;', ': ') + if type == ':c.' or type == ':n.': + + # Flag for validation + valid = 'false' + # Collect information for genomic level validation + obj = hp.parse_hgvs_variant(variant) + + tx_ac = obj.ac + + # Do we keep it? + if select_transcripts != 'all': + if tx_ac in select_transcripts_dict_plus_version.keys(): + pass + # If not get rid of it! else: - error = str(e) - error = error + ': Consequently the input variant description cannot be fully validated and is not supported: Use the Gene to Transcripts function to determine whether an updated transcript reference sequence is available' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except TypeError as e: - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + # By marking it as Do Not Write and continuing through the validation loop + validation['write'] = 'false' + continue else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue + pass - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = va_func.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=alt_aln_method, hdp=hdp) - orientation = int(ori[0]['alt_strand']) - intronic_variant = 'false' - - # Collect variant sequence information via normalisation (normalizer) or if intronic via mapping - # INTRONIC OFFSETS - Required for Exon table - # Variable to collect offset to exon boundary - ex_offset = 0 - plus = re.compile("\d\+\d") # finds digit + digit - minus = re.compile("\d\-\d") # finds digit - digit - - geno = re.compile(':g.') - if plus.search(input) or minus.search(input): - es = re.compile('error') - if es.search(str(to_g)): - if alt_aln_method != 'genebuild': - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + # Set a cross_variant object + cross_variant = 'false' + # Se rec_var to '' so it can be updated later + rec_var = '' + try: + to_g = va_func.myevm_t_to_g(obj, self.hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, nr_vm) + genomic_ac = to_g.ac + except hgvs.exceptions.HGVSDataNotAvailableError as e: + if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))) or re.match( + "No relevant genomic mapping options available", str(e)): + reason = 'Unable to map the input variant onto a genomic position' + if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))): + error_list = str(e).split('~')[:-1] + combos = [ + 'Full alignment data between the specified transcript reference sequence and all GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are not available: Consequently the input variant description cannot be fully validated and is not supported: Use the Gene to Transcripts function to determine whether an updated transcript reference sequence is available'] # Partial alignment data is available for the following genomic reference sequences: '] + error = '; '.join(combos) + error = error.replace(': ;', ': ') + else: + error = str(e) + error = error + ': Consequently the input variant description cannot be fully validated and is not supported: Use the Gene to Transcripts function to determine whether an updated transcript reference sequence is available' validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue - + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + except: + gene_symbol = 'none' + if gene_symbol == 'none': + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' else: - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Insertions at exon boundaries are miss-handled by vm.g_to_t - if ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): - variant = str(obj) + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + except TypeError as e: + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + except: + gene_symbol = 'none' + if gene_symbol == 'none': + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' else: - # Normalize was I believe to replace ref. Mapping does this anyway - # to_g = hn.normalize(to_g) - variant = str(va_func.myevm_g_to_t(evm, to_g, tx_ac)) - tx_ac = '' + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue - elif geno.search(input): - if plus.search(variant) or minus.search(variant): - to_g = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, nr_vm) + # Get orientation of the gene wrt genome and a list of exons mapped to the genome + ori = va_func.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=alt_aln_method, hdp=self.hdp) + orientation = int(ori[0]['alt_strand']) + intronic_variant = 'false' + + # Collect variant sequence information via normalisation (normalizer) or if intronic via mapping + # INTRONIC OFFSETS - Required for Exon table + # Variable to collect offset to exon boundary + ex_offset = 0 + plus = re.compile("\d\+\d") # finds digit + digit + minus = re.compile("\d\-\d") # finds digit - digit + + geno = re.compile(':g.') + if plus.search(input) or minus.search(input): es = re.compile('error') if es.search(str(to_g)): if alt_aln_method != 'genebuild': @@ -3254,1059 +3228,2063 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue + + else: + # Insertions at exon boundaries are miss-handled by vm.g_to_t + if ( + obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( + obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): + variant = str(obj) + else: + # Normalize was I believe to replace ref. Mapping does this anyway + # to_g = hn.normalize(to_g) + variant = str(va_func.myevm_g_to_t(evm, to_g, tx_ac)) + tx_ac = '' + + elif geno.search(input): + if plus.search(variant) or minus.search(variant): + to_g = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, nr_vm) + es = re.compile('error') + if es.search(str(to_g)): + if alt_aln_method != 'genebuild': + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + # Insertions at exon boundaries are miss-handled by vm.g_to_t + if ( + obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( + obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): + variant = str(obj) + else: + # Normalize was I believe to replace ref. Mapping does this anyway + # to_g = hn.normalize(to_g) + variant = str(va_func.myevm_g_to_t(evm, to_g, tx_ac)) + tx_ac = '' + else: - # Insertions at exon boundaries are miss-handled by vm.g_to_t - if ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): - variant = str(obj) + # Normalize the variant + error = 'false' + try: + h_variant = hn.normalize(obj) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Unsupported normalization of variants spanning the exon-intron boundary', + error): + h_variant = obj + variant = variant + caution = 'This coding sequence variant description spans at least one intron' + automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + automap) + logger.warning(str(caution) + ": " + str(automap)) + else: + variant = str(h_variant) + + tx_ac = '' + # Create a crosser (exon boundary crossed) variant + crossed_variant = str(evm._maybe_normalize(obj)) + if variant == crossed_variant: + cross_variant = 'false' else: - # Normalize was I believe to replace ref. Mapping does this anyway - # to_g = hn.normalize(to_g) - variant = str(va_func.myevm_g_to_t(evm, to_g, tx_ac)) + hgvs_crossed_variant = evm._maybe_normalize(obj) + cross_variant = [ + "Coding sequence allowing for exon boundary crossing (default = no crossing)", + crossed_variant, hgvs_crossed_variant.ac] + cr_available = 'true' + + # control of cross_variant + if boundary == 'false': + cross_variant = 'false' + + error = va_func.validate(variant, hp=hp, vr=vr) + if error == 'false': + valid = 'true' + else: + excep = "%s -- %s -- %s\n" % (time.ctime(), error, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + # Tackle the plus intronic offset + cck = 'false' + if (plus.search(input)): + # Regular expression catches the start of the interval only based on .00+00 pattern + inv_start = re.compile("\.\d+\+\d") + if (inv_start.search(input)): + # Find pattern e.g. +0000 and assign to a variable + off_value = re.search(r"(\+\d+)", input) + off_value = off_value.group(1) + # Integerise the value and assign to ex_offset + ex_offset = int(off_value) + cck = 'true' + if (minus.search(input)): + # Regular expression catches the start of the interval only based on .00-00 pattern + inv_start = re.compile("\.\d+\-\d") + if (inv_start.search(input)): + # Find pattern e.g. -0000 and assign to a variable + off_value = re.search(r"(\-\d+)", input) + off_value = off_value.group(1) + # Integerise the value and assign to ex_offset + ex_offset = int(off_value) + cck = 'true' + + # COORDINATE CHECKER + # hgvs will handle incorrect coordinates so need to automap errors + # Make sure any input intronic coordinates are correct + # Get the desired transcript + pat_r = re.compile(':r.') + pat_g = re.compile(':g.') + if cck == 'true': + dl = re.compile('del') + # This should only ever hit coding and RNA variants + if dl.search(variant): + # RNA + if pat_r.search(trapped_input): + + coding = va_func.coding(variant, hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, + nr_vm) + # genome back to C coordinates + post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + + test = hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + # Change to rna variant + posedit = query.posedit + posedit = posedit.lower() + query.posedit = posedit + query.type = 'r' + post_var = str(query) + automap = trapped_input + ' automapped to ' + str(post_var) + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions[1])) + try: + tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest( + path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + logger.warning(str(error)) + continue + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', + 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(query) + + # Coding + else: + coding = va_func.coding(variant, hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = hp.parse_hgvs_variant(variant) + try: + pre_var = va_func.myevm_t_to_g(pre_var, self.hdp, no_norm_evm, primary_assembly, vm, hp, + hn, sf, nr_vm) + except: + e = sys.exc_info()[1] + error = str(e) + reason = 'Input coordinates may be invalid' + if error == 'expected from_start_i <= from_end_i': + error = 'Automap is unable to correct the input exon/intron boundary coordinates, please check your variant description' + validation['warnings'] = validation['warnings'] + ': ' + str(error) + continue + else: + fn.exceptPass() + else: + fn.exceptPass() + # genome back to C coordinates + try: + post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + except hgvs.exceptions.HGVSError as error: + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + query = post_var + test = hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + automap = trapped_input + ' automapped to ' + str(post_var) + validation['warnings'] = str(validation['warnings']) + str(caution) + ': ' + str( + automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions[1])) + try: + tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest( + path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str( + error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', + 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(query) + + else: + if pat_r.search(trapped_input): + coding = va_func.coding(variant, hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, + nr_vm) + # genome back to C coordinates + post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + + test = hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + # Change to rna variant + posedit = query.posedit + posedit = posedit.lower() + query.posedit = posedit + query.type = 'r' + post_var = str(query) + automap = input + ' automapped to ' + post_var + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', + 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(query) + + else: + coding = va_func.coding(variant, hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, + nr_vm) + # genome back to C coordinates + post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + + test = hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + automap = str(trapped_input) + ' automapped to ' + str(post_var) + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + reason = 'Cannot currently display the required information:' + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', + 'genomic_g': '', 'protein': '', 'write': 'true', + 'primary_assembly': primary_assembly, 'order': ordering} + batch_list.append(query) + + + # If cck not true + elif pat_r.search(trapped_input): + # set input hgvs object + hgvs_rna_input = hp.parse_hgvs_variant( + trapped_input) # Traps the hgvs variant of r. for further use + inp = str(va_func.hgvs_r_to_c(hgvs_rna_input)) + # Regex + plus = re.compile("\d\+\d") # finds digit + digit + minus = re.compile("\d\-\d") # finds digit - digit + if plus.search(input) or minus.search(input): + to_g = va_func.genomic(inp, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, nr_vm) + es = re.compile('error') + if es.search(str(to_g)): + if alt_aln_method != 'genebuild': + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set variants pre and post genomic norm + hgvs_inp = va_func.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) + to_g = hn.normalize(to_g) + hgvs_otp = va_func.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) + tx_ac = '' + else: + # Set variants pre and post RNA norm + hgvs_inp = hp.parse_hgvs_variant(inp) + try: + hgvs_otp = hn.normalize(hgvs_inp) + except hgvs.exceptions.HGVSError as e: + hgvs_otp = hgvs_inp tx_ac = '' - else: - # Normalize the variant - error = 'false' - try: - h_variant = hn.normalize(obj) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Unsupported normalization of variants spanning the exon-intron boundary', - error): - h_variant = obj - variant = variant - caution = 'This coding sequence variant description spans at least one intron' - automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( - automap) - logger.warning(str(caution) + ": " + str(automap)) - else: - variant = str(h_variant) + # Set remaining variables + redit = str(hgvs_otp.posedit.edit) + redit = redit.lower() + hgvs_otp.posedit.edit = redit + otp = str(hgvs_otp) + query = str(hgvs_otp.posedit.pos) + test = str(hgvs_inp.posedit.pos) + query = query.replace('T', 'U') + query = query.replace('ENSU', 'ENST') + test = test.replace('T', 'U') + test = test.replace('ENSU', 'ENST') + output = otp.replace(':c.', ':r.') + # Apply coordinates test + if query != test: + caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' + automap = 'Automap has corrected the variant description' + # automapping of variant completed + automap = trapped_input + ' automapped to ' + output + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str(automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(output) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', + 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, + 'order': ordering} + batch_list.append(query) + + elif pat_g.search(input): + pass - tx_ac = '' - # Create a crosser (exon boundary crossed) variant - crossed_variant = str(evm._maybe_normalize(obj)) - if variant == crossed_variant: - cross_variant = 'false' else: - hgvs_crossed_variant = evm._maybe_normalize(obj) - cross_variant = [ - "Coding sequence allowing for exon boundary crossing (default = no crossing)", - crossed_variant, hgvs_crossed_variant.ac] - cr_available = 'true' + query = hp.parse_hgvs_variant(variant) + test = hp.parse_hgvs_variant(input) + if query.posedit.pos != test.posedit.pos: + caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' + automap = 'Automap has corrected the variant description' + # automapping of variant completed + automap = str(test) + ' automapped to ' + str(query) + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str(automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(query) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + reason = 'Cannot currently display the required information:' + error = data['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + validation['write'] = 'false' + # Set the values and append to batch_list + query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, + 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', + 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, + 'order': ordering} + batch_list.append(query) - # control of cross_variant - if boundary == 'false': - cross_variant = 'false' + # VALIDATION of intronic variants + pre_valid = hp.parse_hgvs_variant(input) + post_valid = hp.parse_hgvs_variant(variant) + if valid == 'false': + error = 'false' + genomic_validation = str( + va_func.genomic(input, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, nr_vm)) + del_end = re.compile('\ddel$') + delins = re.compile('delins') + inv = re.compile('inv') + if fn.valstr(pre_valid) != fn.valstr(post_valid): + if type != ':g.': + if caution == '': + caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) + else: + pass + validation['warnings'] = validation['warnings'] + ': ' + str(caution) + logger.warning(str(caution)) + else: + pass + else: + pass - error = va_func.validate(variant, hp=hp, vr=vr) + # Apply validation to intronic variant descriptions (should be valid but make sure) + error = va_func.validate(genomic_validation, hp=hp, vr=vr) if error == 'false': valid = 'true' else: + excep = "%s -- %s -- %s\n" % (time.ctime(), error, variant) validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) continue - # Tackle the plus intronic offset - cck = 'false' - if (plus.search(input)): - # Regular expression catches the start of the interval only based on .00+00 pattern - inv_start = re.compile("\.\d+\+\d") - if (inv_start.search(input)): - # Find pattern e.g. +0000 and assign to a variable - off_value = re.search(r"(\+\d+)", input) - off_value = off_value.group(1) - # Integerise the value and assign to ex_offset - ex_offset = int(off_value) - cck = 'true' - if (minus.search(input)): - # Regular expression catches the start of the interval only based on .00-00 pattern - inv_start = re.compile("\.\d+\-\d") - if (inv_start.search(input)): - # Find pattern e.g. -0000 and assign to a variable - off_value = re.search(r"(\-\d+)", input) - off_value = off_value.group(1) - # Integerise the value and assign to ex_offset - ex_offset = int(off_value) - cck = 'true' - - # COORDINATE CHECKER - # hgvs will handle incorrect coordinates so need to automap errors - # Make sure any input intronic coordinates are correct - # Get the desired transcript - pat_r = re.compile(':r.') - pat_g = re.compile(':g.') - if cck == 'true': - dl = re.compile('del') - # This should only ever hit coding and RNA variants - if dl.search(variant): - # RNA - if pat_r.search(trapped_input): - - coding = va_func.coding(variant, hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, - nr_vm) - # genome back to C coordinates - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) - - test = hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - # Change to rna variant - posedit = query.posedit - posedit = posedit.lower() - query.posedit = posedit - query.type = 'r' - post_var = str(query) - automap = trapped_input + ' automapped to ' + str(post_var) - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions[1])) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest( - path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str( - error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + if valid == 'true': + var_tab = 'true' + cores = "HGVS-compliant variant descriptions" + warning - # Coding - else: - coding = va_func.coding(variant, hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = hp.parse_hgvs_variant(variant) - try: - pre_var = va_func.myevm_t_to_g(pre_var, hdp, no_norm_evm, primary_assembly, vm, hp, - hn, sf, nr_vm) - except: - e = sys.exc_info()[1] - error = str(e) - reason = 'Input coordinates may be invalid' - if error == 'expected from_start_i <= from_end_i': - error = 'Automap is unable to correct the input exon/intron boundary coordinates, please check your variant description' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue + # v0.1a1 edit + if fn.valstr(pre_valid) != fn.valstr(post_valid): + if type == ':g.': + if caution == '': + caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) else: - exceptPass() + pass + validation['warnings'] = validation['warnings'] + ': ' + str(caution) else: - exceptPass() - # genome back to C coordinates - try: - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) - except hgvs.exceptions.HGVSError as error: - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - query = post_var - test = hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - automap = trapped_input + ' automapped to ' + str(post_var) - validation['warnings'] = str(validation['warnings']) + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue + pass + else: + pass - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions[1])) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest( - path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str( - error) - logger.warning(str(error)) - continue + # COLLECT VARIANT DESCRIPTIONS + ############################## - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC + hgvs_coding = va_func.coding(variant, hp) + boundary = re.compile('exon-intron boundary') + spanning = re.compile('exon/intron') - else: - if pat_r.search(trapped_input): - coding = va_func.coding(variant, hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, - nr_vm) - # genome back to C coordinates - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) - - test = hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - # Change to rna variant - posedit = query.posedit - posedit = posedit.lower() - query.posedit = posedit - query.type = 'r' - post_var = str(query) - automap = input + ' automapped to ' + post_var - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue + try: + hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSError as e: + error = str(e) - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + # Gap compensating code status + gap_compensation = True + # Gap gene black list + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + except Exception: + fn.exceptPass() else: - coding = va_func.coding(variant, hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, - nr_vm) - # genome back to C coordinates - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) - - test = hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - automap = str(trapped_input) + ' automapped to ' + str(post_var) - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - reason = 'Cannot currently display the required information:' - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue + # If the gene symbol is not in the list, the value False will be returned + gap_compensation = gapGenes.gap_black_list(gene_symbol) - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) - - - # If cck not true - elif pat_r.search(trapped_input): - # set input hgvs object - hgvs_rna_input = hp.parse_hgvs_variant( - trapped_input) # Traps the hgvs variant of r. for further use - inp = str(va_func.hgvs_r_to_c(hgvs_rna_input)) - # Regex - plus = re.compile("\d\+\d") # finds digit + digit - minus = re.compile("\d\-\d") # finds digit - digit - if plus.search(input) or minus.search(input): - to_g = va_func.genomic(inp, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, nr_vm) - es = re.compile('error') - if es.search(str(to_g)): - if alt_aln_method != 'genebuild': - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + # Intron spanning variants + if re.search('boundary', str(error)) or re.search('spanning', str(error)): + try: + hgvs_coding = evm._maybe_normalize(hgvs_coding) + gap_compensation = False + except hgvs.exceptions.HGVSError as error: validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue + else: + pass + # Warn status + logger.warning("gap_compensation_1 = " + str(gap_compensation)) + coding = fn.valstr(hgvs_coding) + + # RNA sequence + hgvs_rna = copy.deepcopy(hgvs_coding) + hgvs_rna = va_func.hgvs_c_to_r(hgvs_rna) + rna = str(hgvs_rna) + + # Genomic sequence + hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding, self.hdp, no_norm_evm, primary_assembly, vm, hp, hn, + sf, nr_vm) + final_hgvs_genomic = hgvs_genomic + + # genomic_possibilities + # 1. take the simple 3 pr normalized hgvs_genomic + # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome + hgvs_genomic_possibilities = [] + + # Loop out gap finding code under these circumstances! + if gap_compensation is True: + logger.warning('g_to_t gap code 1 active') + rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) + hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if orientation != -1: + try: + chromosome_normalized_hgvs_coding = reverse_normalizer.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding else: - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue + try: + chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding - else: - # Set variants pre and post genomic norm - hgvs_inp = va_func.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) - to_g = hn.normalize(to_g) - hgvs_otp = va_func.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) - tx_ac = '' - else: - # Set variants pre and post RNA norm - hgvs_inp = hp.parse_hgvs_variant(inp) - try: - hgvs_otp = hn.normalize(hgvs_inp) - except hgvs.exceptions.HGVSError as e: - hgvs_otp = hgvs_inp - tx_ac = '' + most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, + hgvs_genomic.ac, no_norm_evm, vm, hp, hn, sf, + nr_vm) + hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - # Set remaining variables - redit = str(hgvs_otp.posedit.edit) - redit = redit.lower() - hgvs_otp.posedit.edit = redit - otp = str(hgvs_otp) - query = str(hgvs_otp.posedit.pos) - test = str(hgvs_inp.posedit.pos) - query = query.replace('T', 'U') - query = query.replace('ENSU', 'ENST') - test = test.replace('T', 'U') - test = test.replace('ENSU', 'ENST') - output = otp.replace(':c.', ':r.') - # Apply coordinates test - if query != test: - caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' - automap = 'Automap has corrected the variant description' - # automapping of variant completed - automap = trapped_input + ' automapped to ' + output - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str(automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(output) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + # Push from side to side to try pick up odd placements + # MAKE A NO NORM HGVS2VCF + # First to the right + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + fn.exceptPass() + # Store a tx copy for later use + test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) + stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, no_norm_evm, + vm, hp, hn, sf, nr_vm) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_right.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + stash_tx_right = test_stash_tx_right + if hasattr(test_stash_tx_right.posedit.edit, + 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + alt = test_stash_tx_right.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_right.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_right).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + try: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + hn.normalize(test_stash_tx_right) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue + test_stash_tx_right = copy.deepcopy(hgvs_coding) + fn.exceptPass() + # Intronic positions not supported. Will cause a Value Error + except ValueError: + test_stash_tx_right = copy.deepcopy(hgvs_coding) + fn.exceptPass() + + # Then to the left + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, + sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + fn.exceptPass() + # Store a tx copy for later use + test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) + stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, no_norm_evm, + vm, hp, hn, sf, nr_vm) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_left.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + stash_tx_left = test_stash_tx_left + if hasattr(test_stash_tx_left.posedit.edit, + 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + alt = test_stash_tx_left.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_left.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_left).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + try: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] + try: + hn.normalize(test_stash_tx_left) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} - batch_list.append(query) - - elif pat_g.search(input): - pass + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + test_stash_tx_left = copy.deepcopy(hgvs_coding) + fn.exceptPass() + except ValueError: + test_stash_tx_left = copy.deepcopy(hgvs_coding) + fn.exceptPass() - else: - query = hp.parse_hgvs_variant(variant) - test = hp.parse_hgvs_variant(input) - if query.posedit.pos != test.posedit.pos: - caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' - automap = 'Automap has corrected the variant description' - # automapping of variant completed - automap = str(test) + ' automapped to ' + str(query) - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str(automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(query) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + # direct mapping from reverse_normalized transcript insertions in the delins format try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - reason = 'Cannot currently display the required information:' - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the variant ref and alt + pr3_ref = sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + # Normalize - If the variant spans a gap it should then form a static genomic variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + try: + genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} - batch_list.append(query) - - # VALIDATION of intronic variants - pre_valid = hp.parse_hgvs_variant(input) - post_valid = hp.parse_hgvs_variant(variant) - if valid == 'false': - error = 'false' - genomic_validation = str( - va_func.genomic(input, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, nr_vm)) - del_end = re.compile('\ddel$') - delins = re.compile('delins') - inv = re.compile('inv') - if valstr(pre_valid) != valstr(post_valid): - if type != ':g.': - if caution == '': - caution = valstr(pre_valid) + ' automapped to ' + valstr(post_valid) - else: - pass - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - logger.warning(str(caution)) - else: - pass - else: - pass + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) - # Apply validation to intronic variant descriptions (should be valid but make sure) - error = va_func.validate(genomic_validation, hp=hp, vr=vr) - if error == 'false': - valid = 'true' - else: + try: + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - excep = "%s -- %s -- %s\n" % (time.ctime(), error, variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue + try: + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) - if valid == 'true': - var_tab = 'true' - cores = "HGVS-compliant variant descriptions" + warning + if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) + if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) - # v0.1a1 edit - if valstr(pre_valid) != valstr(post_valid): - if type == ':g.': - if caution == '': - caution = valstr(pre_valid) + ' automapped to ' + valstr(post_valid) - else: - pass - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - else: - pass - else: - pass + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + pass - # COLLECT VARIANT DESCRIPTIONS - ############################## + logger.info('\nGENOMIC POSSIBILITIES') + for possibility in hgvs_genomic_possibilities: + if possibility == '': + logger.info('X') + else: + logger.info(fn.valstr(possibility)) - # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC - hgvs_coding = va_func.coding(variant, hp) - boundary = re.compile('exon-intron boundary') - spanning = re.compile('exon/intron') + logger.info('\n') - try: - hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSError as e: - error = str(e) + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' - # Gap compensating code status - gap_compensation = True + # Mark as not disparity detected + disparity_deletion_in = ['false', 'false'] - # Gap gene black list - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(hgvs_coding.ac) - except Exception: - exceptPass() - else: - # If the gene symbol is not in the list, the value False will be returned - gap_compensation = gapGenes.gap_black_list(gene_symbol) + # Loop through to see if a gap can be located + # Set the variables required for corrective normalization + possibility_counter = 0 + suppress_c_normalization = 'false' # Applies to boundary crossing normalization - # Intron spanning variants - if re.search('boundary', str(error)) or re.search('spanning', str(error)): - try: - hgvs_coding = evm._maybe_normalize(hgvs_coding) - gap_compensation = False - except hgvs.exceptions.HGVSError as error: - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass + # Copy a version of hgvs_genomic_possibilities + for possibility in hgvs_genomic_possibilities: + possibility_counter = possibility_counter + 1 - # Warn status - logger.warning("gap_compensation_1 = " + str(gap_compensation)) - coding = valstr(hgvs_coding) - - # RNA sequence - hgvs_rna = copy.deepcopy(hgvs_coding) - hgvs_rna = va_func.hgvs_c_to_r(hgvs_rna) - rna = str(hgvs_rna) - - # Genomic sequence - hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding, hdp, no_norm_evm, primary_assembly, vm, hp, hn, - sf, nr_vm) - final_hgvs_genomic = hgvs_genomic - - # genomic_possibilities - # 1. take the simple 3 pr normalized hgvs_genomic - # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - hgvs_genomic_possibilities = [] - - # Loop out gap finding code under these circumstances! - if gap_compensation is True: - logger.warning('g_to_t gap code 1 active') - rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: - try: - chromosome_normalized_hgvs_coding = reverse_normalizer.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: - chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding + # Loop out stash possibilities which will not spot gaps so are empty + if possibility == '': + continue - most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, - hgvs_genomic.ac, no_norm_evm, vm, hp, hn, sf, - nr_vm) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + hgvs_genomic_variant = copy.deepcopy(possibility) + stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) - # Push from side to side to try pick up odd placements - # MAKE A NO NORM HGVS2VCF - # First to the right - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = va_H2V.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - exceptPass() - # Store a tx copy for later use - test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, no_norm_evm, - vm, hp, hn, sf, nr_vm) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_right.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - stash_tx_right = test_stash_tx_right - if hasattr(test_stash_tx_right.posedit.edit, - 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - alt = test_stash_tx_right.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_right.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_right).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref try: - hn.normalize(hgvs_reform_ident) + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - else: + if re.search('base start position must be <= end position', error): + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) + if re.search('insertion length must be 1', error): + if hgvs_genomic.posedit.edit.type == 'ins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start, end) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # Store a copy for later use + stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + + # Create VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, + reverse_normalizer, sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Look for exonic gaps within transcript or chromosome + no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # Store a not real deletion insertion to test for gapping + stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + v = [chr, pos, ref, alt] + + # Detect intronic variation using normalization + intronic_variant = 'false' + + # Save a copy of current hgvs_coding try: - hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - exceptPass() - # Intronic positions not supported. Will cause a Value Error - except ValueError: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - exceptPass() - - # Then to the left - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = va_H2V.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, - sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - exceptPass() - # Store a tx copy for later use - test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, no_norm_evm, - vm, hp, hn, sf, nr_vm) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left - if hasattr(test_stash_tx_left.posedit.edit, - 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - alt = test_stash_tx_left.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' + saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + saved_hgvs_coding = hgvs_coding + intronic_variant = 'true' + continue + else: + saved_hgvs_coding = no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + hgvs_coding.ac) - if (len(alt) - ( - test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_left.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_left).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - exceptPass() - except ValueError: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - exceptPass() + # Look for normalized variant options that do not match hgvs_coding + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding - # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) try: - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: + intron_test = hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) + if intronic_variant != 'hard_fail': + if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', + str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) - if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) + if intronic_variant != 'true': + # Flag RefSeqGene for ammendment + # amend_RefSeqGene = 'false' + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + pass + else: + pass + try: + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError: + tx_hgvs_not_delins = no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + saved_hgvs_coding.ac) + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass + # Check for +1 base and adjust + if re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + '\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + pass + + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, + nr_vm) + + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + + # tx_hgvs_not_delins = rn_tx_hgvs_not_delins + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + '\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + pass + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for internal_possibility in hgvs_genomic_possibilities: + if internal_possibility == '': + continue + + hgvs_t_possibility = vm.g_to_t(internal_possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) + except: + fn.exceptPass() + ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) + except: + fn.exceptPass() + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if internal_possibility.posedit.edit.type == 'ins': + ins_ref = sf.fetch_seq(internal_possibility.ac, + internal_possibility.posedit.pos.start.base - 1, + internal_possibility.posedit.pos.end.base) + internal_possibility.posedit.edit.ref = ins_ref + internal_possibility.posedit.edit.alt = ins_ref[ + 0] + internal_possibility.posedit.edit.alt + \ + ins_ref[1] + + if len(hgvs_t_possibility.posedit.edit.ref) < len( + internal_possibility.posedit.edit.ref): + gap_length = len(internal_possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, + hgvs_t_possibility] + hgvs_not_delins = internal_possibility + hgvs_genomic_5pr = internal_possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # 'At hgvs_genomic' + # Final sanity checks + try: + vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + continue + try: + hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + continue + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # amend_RefSeqGene = 'false' + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Find oddly placed gaps where the tx variant is encompassed in the gap + if disparity_deletion_in[0] == 'false' and ( + possibility_counter == 3 or possibility_counter == 4): + rg = reverse_normalizer.normalize(hgvs_not_delins) + rtx = vm.g_to_t(rg, tx_hgvs_not_delins.ac) + fg = hn.normalize(hgvs_not_delins) + ftx = vm.g_to_t(fg, tx_hgvs_not_delins.ac) + if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + exons = self.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) + exonic = False + for ex_test in exons: + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): + exonic = True + if exonic is True: + hgvs_not_delins = fg + hgvs_genomic = fg + hgvs_genomic_5pr = fg + try: + tx_hgvs_not_delins = vm.c_to_n(ftx) + except Exception: + tx_hgvs_not_delins = ftx + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup) + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + # Suppress intron boundary crossing due to non-intron intron based c. seq annotations + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + '\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + '\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, + 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' - logger.info('\nGENOMIC POSSIBILITIES') - for possibility in hgvs_genomic_possibilities: - if possibility == '': - logger.info('X') - else: - logger.info(valstr(possibility)) + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match('\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) - logger.info('\n') + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' + if re.search('\+', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\+', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( + tx_hgvs_not_delins.posedit.edit.ref) - 1 + hgvs_refreshed_variant = tx_hgvs_not_delins + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' + if possibility_counter == 3: + hgvs_refreshed_variant = stash_tx_right + elif possibility_counter == 4: + hgvs_refreshed_variant = stash_tx_left + else: + hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = hgvs_coding + # amend_RefSeqGene = 'false' + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) + else: + pass - # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] + try: + hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) - # Loop through to see if a gap can be located - # Set the variables required for corrective normalization - possibility_counter = 0 - suppress_c_normalization = 'false' # Applies to boundary crossing normalization + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + logger.warning(error) + continue - # Copy a version of hgvs_genomic_possibilities - for possibility in hgvs_genomic_possibilities: - possibility_counter = possibility_counter + 1 + # Quick check to make sure the coding variant has not changed + try: + to_test = hn.normalize(hgvs_refreshed_variant) + except: + to_test = hgvs_refreshed_variant + if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # Try the next available genomic option + if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + hgvs_coding = to_test + else: + continue + # Update hgvs_genomic + hgvs_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, + no_norm_evm, vm, hp, hn, sf, nr_vm) + if hgvs_genomic.posedit.edit.type == 'identity': + re_c = vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) + if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): + shuffle_left_g = copy.copy(hgvs_genomic) + shuffle_left_g.posedit.edit.ref = '' + shuffle_left_g.posedit.edit.alt = '' + shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) + re_c = vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): + hgvs_genomic = shuffle_left_g - # Loop out stash possibilities which will not spot gaps so are empty - if possibility == '': - continue + # If it is intronic, these vairables will not have been set + else: + # amend_RefSeqGene = 'false' + no_normalized_c = 'false' - # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - hgvs_genomic_variant = copy.deepcopy(possibility) - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + # Break if gap has been detected + if disparity_deletion_in[0] != 'false': + break - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + # Warn user about gapping + if auto_info != '': + info_lines = auto_info.split('\n') + info_keys = {} + for information in info_lines: + info_keys[information] = '' + info_out = [] + info_out.append( + 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + primary_assembly) + for ky in info_keys.keys(): + info_out.append(ky) + auto_info = '\n'.join(info_out) + auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' + auto_info = str(auto_info.replace('\n', ': ')) + validation['warnings'] = validation['warnings'] + ': ' + str(auto_info) + logger.warning(str(auto_info)) + # Normailse hgvs_genomic try: - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) + hgvs_genomic = hn.normalize(hgvs_genomic) except hgvs.exceptions.HGVSError as e: # Strange error caused by gap in genomic error = str(e) - if re.search('base start position must be <= end position', error): + + if re.search('base start position must be <= end position', error) and \ + disparity_deletion_in[0] == 'chromosome': if hgvs_genomic.posedit.edit.type == 'delins': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base @@ -4316,7 +5294,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb hgvs_genomic.posedit.pos.start.base = end hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) if hgvs_genomic.posedit.edit.type == 'del': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base @@ -4326,165 +5304,120 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_genomic.posedit.edit.alt = lhb + rhb hgvs_genomic.posedit.pos.start.base = end hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) - if re.search('insertion length must be 1', error): - if hgvs_genomic.posedit.edit.type == 'ins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start, end) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) + genomic = fn.valstr(hgvs_genomic) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + else: + stored_hgvs_genomic_variant = hgvs_genomic + suppress_c_normalization = 'false' + gapped_alignment_warning = '' + auto_info = '' + genomic = fn.valstr(hgvs_genomic) - # Create VCF - vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] + # Create pseudo VCF based on amended hgvs_genomic + hgvs_genomic_variant = hgvs_genomic + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) + # Create vcf + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, + reverse_normalizer, sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] - # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] + # Create a VCF call + vcf_component_list = [str(chr), str(pos), str(ref), (alt)] + vcf_genomic = '-'.join(vcf_component_list) - # Detect intronic variation using normalization - intronic_variant = 'false' + # DO NOT DELETE + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) - # Save a copy of current hgvs_coding - try: - saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - saved_hgvs_coding = hgvs_coding - intronic_variant = 'true' - continue - else: - saved_hgvs_coding = no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - hgvs_coding.ac) + # DO NOT DELETE + stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding + # Apply gap code to re-format hgvs_coding + # Store the current hgvs:c. description + saved_hgvs_coding = copy.deepcopy(hgvs_coding) - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding + # Get orientation of the gene wrt genome and a list of exons mapped to the genome + ori = va_func.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + alt_aln_method=alt_aln_method, hdp=self.hdp) + orientation = int(ori[0]['alt_strand']) + # Look for normalized variant options that do not match hgvs_coding + hgvs_genomic = copy.deepcopy(hgvs_genomic_variant) + if orientation == -1: + # position genomic at its most 5 prime position try: - intron_test = hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding - if intronic_variant != 'hard_fail': - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', - str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' + # Loop out gap finding code under these circumstances! + logger.warning("gap_compensation_2 = " + str(gap_compensation)) + if gap_compensation is True: + logger.warning('g_to_t gap code 2 active') + # is it in an exon? + is_it_in_an_exon = 'no' + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + # Take from stored copy + # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + is_it_in_an_exon = 'yes' + if is_it_in_an_exon == 'yes': + # map form reverse normalized g. to c. + hgvs_from_5n_g = no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) - if intronic_variant != 'true': - # Flag RefSeqGene for ammendment - # amend_RefSeqGene = 'false' # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + disparity_deletion_in = ['false', 'false'] if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) @@ -4510,13 +5443,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( 'del', str(hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 @@ -4536,13 +5467,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( 'del', str(hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 @@ -4556,20 +5485,21 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass else: pass + + hard_fail = 'false' try: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - saved_hgvs_coding.ac) + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + tx_hgvs_not_delins = hgvs_coding + hard_fail = 'true' + # Create normalized version of tx_hgvs_not_delins rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - - # Check for +1 base and adjust - if re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): + # Check for +ve base and adjust + if re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): # Remove offsetting to span the gap rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 @@ -4578,7 +5508,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: - pass + fn.exceptPass() elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): # move tx end base to next available non-offset base @@ -4590,37 +5520,31 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, - nr_vm) - + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) - - # tx_hgvs_not_delins = rn_tx_hgvs_not_delins elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + rn_tx_hgvs_not_delins.posedit.edit.ref = '' # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' if re.match('NM_', str(rn_tx_hgvs_not_delins)): test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, - nr_vm) + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass + # else: + # pass # Check for -ve base and adjust - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): # Remove offsetting to span the gap rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 @@ -4629,8 +5553,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: - pass + fn.exceptPass() elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 # Delete the ref rn_tx_hgvs_not_delins.posedit.edit.ref = '' @@ -4644,9 +5570,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, - nr_vm) + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -4659,9 +5584,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, - nr_vm) + hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, vm, hp, hn, sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -4682,6 +5606,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: re_capture_tx_variant = [] for internal_possibility in hgvs_genomic_possibilities: + if internal_possibility == '': continue @@ -4690,14 +5615,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) except: - exceptPass() + fn.exceptPass() ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, hgvs_t_possibility.posedit.pos.start.base - 1, hgvs_t_possibility.posedit.pos.start.base + 1) try: hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) except: - exceptPass() + fn.exceptPass() hgvs_t_possibility.posedit.edit.ref = ins_ref hgvs_t_possibility.posedit.edit.alt = ins_ref[ 0] + hgvs_t_possibility.posedit.edit.alt + \ @@ -4715,8 +5640,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr internal_possibility.posedit.edit.ref): gap_length = len(internal_possibility.posedit.edit.ref) - len( hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] + re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] hgvs_not_delins = internal_possibility hgvs_genomic_5pr = internal_possibility break @@ -4730,12 +5654,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: pass - # 'At hgvs_genomic' # Final sanity checks try: vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': + logger.warning(str(e)) continue try: hn.normalize(tx_hgvs_not_delins) @@ -4748,41 +5672,19 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.match( 'Unsupported normalization of variants spanning the exon-intron boundary', error): + logger.warning(error) continue elif re.match('Normalization of intronic variants is not supported', error): # We know that this cannot be because of an intronic variant, so must be aligned to tx gap disparity_deletion_in = ['transcript', 'Requires Analysis'] - # amend_RefSeqGene = 'false' + if hard_fail == 'true': + disparity_deletion_in = ['false', 'false'] + # Recreate hgvs_genomic if disparity_deletion_in[0] == 'transcript': hgvs_genomic = hgvs_not_delins - # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): - rg = reverse_normalizer.normalize(hgvs_not_delins) - rtx = vm.g_to_t(rg, tx_hgvs_not_delins.ac) - fg = hn.normalize(hgvs_not_delins) - ftx = vm.g_to_t(fg, tx_hgvs_not_delins.ac) - if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) - exonic = False - for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): - exonic = True - if exonic is True: - hgvs_not_delins = fg - hgvs_genomic = fg - hgvs_genomic_5pr = fg - try: - tx_hgvs_not_delins = vm.c_to_n(ftx) - except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] - # Pre-processing of tx_hgvs_not_delins try: if tx_hgvs_not_delins.posedit.edit.alt is None: @@ -4792,24 +5694,22 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( tx_hgvs_not_delins.posedit.pos.start) + '_' + str( tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant( - tx_hgvs_not_delins_delins_from_dup) + tx_hgvs_not_delins = hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + # GAP IN THE TRANSCRIPT DISPARITY DETECTED if disparity_deletion_in[0] == 'transcript': - # Suppress intron boundary crossing due to non-intron intron based c. seq annotations - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - '\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - '\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search('\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) # Copy the current variant @@ -4825,7 +5725,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant = hp.parse_hgvs_variant( tx_gap_fill_variant_delins_from_dup) - # Identify which half of the NOT-intron the start position of the variant is in + # Identify which half of the NOT-intron the start position of the variant is in if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') @@ -4842,7 +5742,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) except: - exceptPass() + fn.exceptPass() genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, reverse_normalized_hgvs_genomic.ac) genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref @@ -4886,8 +5786,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) else: # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) + pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) alternate_bases = [] for base in pre_alternate_bases: alternate_bases.append('X') @@ -4908,8 +5807,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Note, all variants will be forced into the format delete insert # Deleted bases in the ALT will be substituted for X for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, - 1): + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): if integer == alt_start: alt_base_dict[integer] = str(''.join(alternate_bases)) else: @@ -4976,13 +5874,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) except hgvs.exceptions.HGVSError: - exceptPass() + fn.exceptPass() except hgvs.exceptions.HGVSInvalidVariantError: - exceptPass() + fn.exceptPass() - if re.search('\+', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.end)): + if re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( disparity_deletion_in[ @@ -5021,20 +5918,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.start.base gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\+', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( disparity_deletion_in[1]) + '-bp gap in transcript ' + str( tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: c1 = vm.n_to_c(tx_hgvs_not_delins) @@ -5068,8 +5962,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.end.base gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) elif re.search('\-', @@ -5113,20 +6006,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search('\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( disparity_deletion_in[1]) + '-bp gap in transcript ' + str( tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: c1 = vm.n_to_c(tx_hgvs_not_delins) @@ -5160,8 +6050,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.end.base - 1 gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) else: @@ -5170,1133 +6059,177 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr disparity_deletion_in[ 1]) + ' genomic base(s) that fail to align to transcript ' + str( tx_hgvs_not_delins.ac) + '\n' - tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( - tx_hgvs_not_delins.posedit.edit.ref) - 1 hgvs_refreshed_variant = tx_hgvs_not_delins + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - if possibility_counter == 3: - hgvs_refreshed_variant = stash_tx_right - elif possibility_counter == 4: - hgvs_refreshed_variant = stash_tx_left - else: - hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Set warning variables + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly + hgvs_refreshed_variant = tx_hgvs_not_delins # Warn auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( hgvs_genomic.ac) + '\n' + gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' else: # Keep the same by re-setting rel_var - hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) - else: - pass - - try: - hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - logger.warning(error) - continue - - # Quick check to make sure the coding variant has not changed - try: - to_test = hn.normalize(hgvs_refreshed_variant) - except: - to_test = hgvs_refreshed_variant - if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # Try the next available genomic option - if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - hgvs_coding = to_test - else: - continue - # Update hgvs_genomic - hgvs_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, - no_norm_evm, vm, hp, hn, sf, nr_vm) - if hgvs_genomic.posedit.edit.type == 'identity': - re_c = vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - shuffle_left_g = copy.copy(hgvs_genomic) - shuffle_left_g.posedit.edit.ref = '' - shuffle_left_g.posedit.edit.alt = '' - shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) - re_c = vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - hgvs_genomic = shuffle_left_g - - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' - - # Break if gap has been detected - if disparity_deletion_in[0] != 'false': - break - - # Warn user about gapping - if auto_info != '': - info_lines = auto_info.split('\n') - info_keys = {} - for information in info_lines: - info_keys[information] = '' - info_out = [] - info_out.append( - 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + primary_assembly) - for ky in info_keys.keys(): - info_out.append(ky) - auto_info = '\n'.join(info_out) - auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' - auto_info = str(auto_info.replace('\n', ': ')) - validation['warnings'] = validation['warnings'] + ': ' + str(auto_info) - logger.warning(str(auto_info)) - # Normailse hgvs_genomic - try: - hgvs_genomic = hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - - if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = hn.normalize(hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = hn.normalize(hgvs_genomic) - genomic = valstr(hgvs_genomic) - - else: - stored_hgvs_genomic_variant = hgvs_genomic - suppress_c_normalization = 'false' - gapped_alignment_warning = '' - auto_info = '' - genomic = valstr(hgvs_genomic) - - # Create pseudo VCF based on amended hgvs_genomic - hgvs_genomic_variant = hgvs_genomic - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # Create vcf - vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Create a VCF call - vcf_component_list = [str(chr), str(pos), str(ref), (alt)] - vcf_genomic = '-'.join(vcf_component_list) - - # DO NOT DELETE - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # DO NOT DELETE - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - - # Apply gap code to re-format hgvs_coding - # Store the current hgvs:c. description - saved_hgvs_coding = copy.deepcopy(hgvs_coding) - - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = va_func.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, - alt_aln_method=alt_aln_method, hdp=hdp) - orientation = int(ori[0]['alt_strand']) - - # Look for normalized variant options that do not match hgvs_coding - hgvs_genomic = copy.deepcopy(hgvs_genomic_variant) - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding - - # Loop out gap finding code under these circumstances! - logger.warning("gap_compensation_2 = " + str(gap_compensation)) - if gap_compensation is True: - logger.warning('g_to_t gap code 2 active') - # is it in an exon? - is_it_in_an_exon = 'no' - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - # Take from stored copy - # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - is_it_in_an_exon = 'yes' - if is_it_in_an_exon == 'yes': - # map form reverse normalized g. to c. - hgvs_from_5n_g = no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) - - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - pass - else: - pass - - hard_fail = 'false' - try: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = hgvs_coding - hard_fail = 'true' - - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +ve base and adjust - if re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for internal_possibility in hgvs_genomic_possibilities: - - if internal_possibility == '': - continue - - hgvs_t_possibility = vm.g_to_t(internal_possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) - except: - exceptPass() - ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) - except: - exceptPass() - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if internal_possibility.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(internal_possibility.ac, - internal_possibility.posedit.pos.start.base - 1, - internal_possibility.posedit.pos.end.base) - internal_possibility.posedit.edit.ref = ins_ref - internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] - - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] - hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # Final sanity checks - try: - vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - logger.warning(str(e)) - continue - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - logger.warning(error) - continue - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - if hard_fail == 'true': - disparity_deletion_in = ['false', 'false'] - - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) - - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search('\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) - except: - exceptPass() - genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match('\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - exceptPass() - - if re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) + hgvs_refreshed_variant = saved_hgvs_coding + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = evm.n_to_c(hgvs_refreshed_variant) + else: + pass + try: + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + hgvs_refreshed_variant.posedit.edit.alt[-1]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 0:-1] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 0:-1] + hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[0] == \ + hgvs_refreshed_variant.posedit.edit.alt[0]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 1:] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 1:] + hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + pass - # GAP IN THE CHROMOSOME - - elif disparity_deletion_in[0] == 'chromosome': - # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = saved_hgvs_coding + # Sort out equality to equality c. events where the code will add 2 additional bases + if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): + pass + else: + hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) + coding = fn.valstr(hgvs_coding) + variant = coding + + # OBTAIN THE RefSeqGene coordinates + # Attempt 1 = UTA + sequences_for_tx = self.hdp.get_tx_mapping_options(hgvs_coding.ac) + recovered_rsg = [] + + for sequence in sequences_for_tx: + if re.match('^NG_', sequence[1]): + recovered_rsg.append(sequence[1]) + recovered_rsg.sort() + recovered_rsg.reverse() + + if 'NG_' in recovered_rsg: + refseqgene_ac = recovered_rsg + else: + refseqgene_ac = '' - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = evm.n_to_c(hgvs_refreshed_variant) - else: - pass + # Given the difficulties with mapping to and from RefSeqGenes, we now solely rely on UTA + if refseqgene_ac != '': + hgvs_refseq = vm.t_to_g(hgvs_coding, refseqgene_ac) + # Normalize the RefSeqGene Variant to the correct position try: - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) + hgvs_refseq = hn.normalize(hgvs_refseq) except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - pass - - # Sort out equality to equality c. events where the code will add 2 additional bases - if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): + # if re.search('insertion length must be 1', error): + hgvs_refseq = 'RefSeqGene record not available' + refseq = 'RefSeqGene record not available' + hgvs_refseq_ac = 'RefSeqGene record not available' pass else: - hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) - coding = valstr(hgvs_coding) - variant = coding - - # OBTAIN THE RefSeqGene coordinates - # Attempt 1 = UTA - sequences_for_tx = hdp.get_tx_mapping_options(hgvs_coding.ac) - recovered_rsg = [] - - for sequence in sequences_for_tx: - if re.match('^NG_', sequence[1]): - recovered_rsg.append(sequence[1]) - recovered_rsg.sort() - recovered_rsg.reverse() - - if 'NG_' in recovered_rsg: - refseqgene_ac = recovered_rsg - else: - refseqgene_ac = '' - - # Given the difficulties with mapping to and from RefSeqGenes, we now solely rely on UTA - if refseqgene_ac != '': - hgvs_refseq = vm.t_to_g(hgvs_coding, refseqgene_ac) - # Normalize the RefSeqGene Variant to the correct position - try: - hgvs_refseq = hn.normalize(hgvs_refseq) - except Exception as e: - # if re.search('insertion length must be 1', error): + refseq = fn.valstr(hgvs_refseq) + hgvs_refseq_ac = hgvs_refseq.ac + else: hgvs_refseq = 'RefSeqGene record not available' refseq = 'RefSeqGene record not available' hgvs_refseq_ac = 'RefSeqGene record not available' - pass - else: - refseq = valstr(hgvs_refseq) - hgvs_refseq_ac = hgvs_refseq.ac - else: - hgvs_refseq = 'RefSeqGene record not available' - refseq = 'RefSeqGene record not available' - hgvs_refseq_ac = 'RefSeqGene record not available' - - # Predicted effect on protein - protein_dict = va_func.myc_to_p(hgvs_coding, evm, hdp, hp, hn, vm, sf, re_to_p=False) - if protein_dict['error'] == '': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - error = protein_dict['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': + + # Predicted effect on protein + protein_dict = va_func.myc_to_p(hgvs_coding, evm, self.hdp, hp, hn, vm, sf, re_to_p=False) + if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) else: - logger.error(error) - continue - - # Gene orientation wrt genome - ori = va_func.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, - alt_aln_method=alt_aln_method, hdp=hdp) - ori = int(ori[0]['alt_strand']) + error = protein_dict['error'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + logger.error(error) + continue - # Look for normalized variant options that do not match hgvs_coding - # boundary crossing normalization - # Re-Save the required variants - hgvs_seek_var = copy.deepcopy(hgvs_coding) - saved_hgvs_coding = copy.deepcopy(hgvs_coding) + # Gene orientation wrt genome + ori = va_func.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, + alt_aln_method=alt_aln_method, hdp=self.hdp) + ori = int(ori[0]['alt_strand']) - if ori == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif suppress_c_normalization == 'true': - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - try: - automap = valstr(saved_hgvs_coding) + ' normalized to ' + valstr(hgvs_seek_var) - hgvs_coding = hgvs_seek_var - coding = valstr(hgvs_coding) - validation['warnings'] = validation['warnings'] + ': ' + automap - rng = hn.normalize(query_genomic) - except NotImplementedError: - pass - try: - c_for_p = vm.g_to_t(rng, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - c_for_p = seek_var - try: - # Predicted effect on protein - protein_dict = va_func.myc_to_p(c_for_p, evm, hdp, hp, hn, vm, sf, re_to_p=False) - if protein_dict['error'] == '': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - error = protein_dict['error'] - if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': - hgvs_protein = protein_dict['hgvs_protein'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - # Replace protein description in vars table - protein = str(hgvs_protein) - except NotImplementedError: - exceptPass() - else: - # Double check protein position by normalize genomic, and normalize back to c. for normalize or not to normalize issue - coding = valstr(hgvs_coding) + # Look for normalized variant options that do not match hgvs_coding + # boundary crossing normalization + # Re-Save the required variants + hgvs_seek_var = copy.deepcopy(hgvs_coding) + saved_hgvs_coding = copy.deepcopy(hgvs_coding) - elif ori != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif suppress_c_normalization == 'true': - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + if ori == -1: + # position genomic at its most 5 prime position try: - automap = valstr(saved_hgvs_coding) + ' normalized to ' + valstr(hgvs_seek_var) - hgvs_coding = hgvs_seek_var - coding = valstr(hgvs_coding) - validation['warnings'] = validation['warnings'] + ': ' + automap - except NotImplementedError: - exceptPass() - else: - # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue - coding = valstr(hgvs_coding) - rng = reverse_normalizer.normalize(query_genomic) + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement try: - # Diagram where - = intron and E = Exon - - # 3 prime - # ---------EEEEEEEEEEEEEEEEE----------- - # < - # Result, normalize of new variant will baulk at intronic - # 5 prime - # < - # Result, normalize of new variant will be happy - c_for_p = vm.g_to_t(rng, hgvs_coding.ac) + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif suppress_c_normalization == 'true': + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': try: - hn.normalize(c_for_p) - except hgvs.exceptions.HGVSError as e: - exceptPass() - else: - # hgvs_protein = va_func.protein(str(c_for_p), evm, hp) - protein_dict = va_func.myc_to_p(c_for_p, evm, hdp, hp, hn, vm, sf, - re_to_p=False) + automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) + hgvs_coding = hgvs_seek_var + coding = fn.valstr(hgvs_coding) + validation['warnings'] = validation['warnings'] + ': ' + automap + rng = hn.normalize(query_genomic) + except NotImplementedError: + pass + try: + c_for_p = vm.g_to_t(rng, hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + c_for_p = seek_var + try: + # Predicted effect on protein + protein_dict = va_func.myc_to_p(c_for_p, evm, self.hdp, hp, hn, vm, sf, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) @@ -6307,1961 +6240,2035 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr validation['warnings'] = validation['warnings'] + ': ' + str(error) # Replace protein description in vars table protein = str(hgvs_protein) - except Exception: - exceptPass() - - # Check for up-to-date transcript version - updated_transcript_variant = 'None' - tx_id_info = hdp.get_tx_identity_info(hgvs_coding.ac) - uta_gene_symbol = tx_id_info[6] - tx_for_gene = hdp.get_tx_for_gene(uta_gene_symbol) - ac_root, ac_version = hgvs_coding.ac.split('.') - version_tracking = '0' - update = '' - for accession in tx_for_gene: - try: - if re.match(ac_root, accession[3]): - query_version = accession[3].split('.')[1] - if int(query_version) > int(ac_version) and int(query_version) > int( - version_tracking): - version_tracking = query_version - update = accession[3] - except ValueError: - exceptPass() - - if update != '': - hgvs_updated = copy.deepcopy(hgvs_coding) - hgvs_updated.ac = update - try: - vr.validate(hgvs_updated) - # Updated reference sequence - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('does not agree with reference sequence', str(error)): - match = re.findall('\(([GATC]+)\)', error) - new_ref = match[1] - hgvs_updated.posedit.edit.ref = new_ref - vr.validate(hgvs_updated) - updated_transcript_variant = hgvs_updated + except NotImplementedError: + fn.exceptPass() else: - pass - updated_transcript_variant = hgvs_updated - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( - updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + valstr( - updated_transcript_variant) - - # Set the data - set_output_type_flag = 'gene' - validation['description'] = hgnc_gene_info - validation['coding'] = str(hgvs_coding) - validation['genomic_r'] = str(hgvs_refseq) - validation['genomic_g'] = str(hgvs_genomic) - validation['protein'] = str(hgvs_protein) - validation['primary_assembly'] = primary_assembly - if gap_compensation is True: - validation['test_stash_tx_left'] = test_stash_tx_left - validation['test_stash_tx_right'] = test_stash_tx_right - # finish timing - logger.traceEnd(validation) - # Report errors to User and VV admin - except KeyboardInterrupt: - raise - except: - set_output_type_flag = 'error' - error = 'Validation error' - validation['warnings'] = str(error) - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - tbk = [str(exc_type), str(exc_value), str(te)] - er = str('\n'.join(tbk)) - logger.error(str(exc_type) + " " + str(exc_value)) - logger.debug(er) - - continue - - # Outside the for loop - ###################### - logger.trace("End of for loop") - # order the rows - # from operator import itemgetter - by_order = sorted(batch_list, key=itemgetter('order')) - - for valid in by_order: - if 'write' in valid.keys(): - if valid['write'] == 'true': - # Blank VCF - # chr = '' - # pos = '' - # ref = '' - # alt = '' - - # Fromulate a json type response - dict_out = {} - - # Set gap compensation bool - gap_compensation = True - - # warngins - warnings = valid['warnings'] - warnings = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warnings) - warnings = re.sub('^: ', '', warnings) - warnings = re.sub('::', ':', warnings) - - # Submitted variant - submitted = valid['id'] - - # Genomic sequence variation - genomic_variant = valid['genomic_g'] - - # genomic accession - if genomic_variant != '': - hgvs_genomic_variant = hp.parse_hgvs_variant(genomic_variant) - genomic_variant = valstr(hgvs_genomic_variant) - genomic_accession = hgvs_genomic_variant.ac - else: - genomic_accession = '' - - # RefSeqGene variation - refseqgene_variant = valid['genomic_r'] - refseqgene_variant = refseqgene_variant.strip() - if re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant == '': - warnings = warnings + ': ' + refseqgene_variant - refseqgene_variant = '' - lrg_variant = '' - hgvs_refseqgene_variant = 'false' - else: - hgvs_refseqgene_variant = hp.parse_hgvs_variant(refseqgene_variant) - rsg_ac = va_dbCrl.data.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) - if rsg_ac[0] == 'none': - lrg_variant = '' - else: - hgvs_lrg = copy.deepcopy(hgvs_refseqgene_variant) - hgvs_lrg.ac = rsg_ac[0] - lrg_variant = valstr(hgvs_lrg) - if rsg_ac[1] == 'public': - pass - else: - warnings = warnings + ': The current status of ' + str( - hgvs_lrg.ac) + ' is pending therefore changes may be made to the LRG reference sequence' - - # Transcript sequence variation - tx_variant = valid['coding'] - if tx_variant != '': - if '(' in tx_variant and ')' in tx_variant: - tx_variant = tx_variant.split('(')[1] - tx_variant = tx_variant.replace(')', '') - - # transcript accession - hgvs_tx_variant = hp.parse_hgvs_variant(tx_variant) - tx_variant = valstr(hgvs_tx_variant) - hgvs_transcript_variant = hp.parse_hgvs_variant(tx_variant) - transcript_accession = hgvs_transcript_variant.ac - - # Handle LRG - lrg_status = 'public' - lrg_transcript = va_dbCrl.data.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) - if lrg_transcript == 'none': - lrg_transcript_variant = '' - else: - # Note - LRG availability is dependant on UTA containing the data. In some - # instances we will be able to display the LRG_tx without being able to - # display the LRG gene data + # Double check protein position by normalize genomic, and normalize back to c. for normalize or not to normalize issue + coding = fn.valstr(hgvs_coding) - # if not re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant != '': - # if hgvs_refseqgene_variant != 'RefSeqGene record not available' and hgvs_refseqgene_variant != 'false': - try: - hgvs_lrg_t = vm.g_to_t(hgvs_refseqgene_variant, transcript_accession) - hgvs_lrg_t.ac = lrg_transcript - lrg_transcript_variant = valstr(hgvs_lrg_t) - except: - if hgvs_transcript_variant.posedit.pos.start.offset == 0 and hgvs_transcript_variant.posedit.pos.end.offset == 0: - hgvs_lrg_t = copy.copy(hgvs_transcript_variant) - hgvs_lrg_t.ac = lrg_transcript - lrg_transcript_variant = valstr(hgvs_lrg_t) + elif ori != -1: + # position genomic at its most 3 prime position + try: + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding else: - lrg_transcript_variant = '' - else: - transcript_accession = '' - lrg_transcript_variant = '' - - # Look for intronic variants - if transcript_accession != '' and genomic_accession != '': - # Remove del bases - str_transcript = valstr(hgvs_transcript_variant) - hgvs_transcript_variant = hp.parse_hgvs_variant(str_transcript) - try: - vr.validate(hgvs_transcript_variant) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('intronic variant', error): - genome_context_transcript_variant = genomic_accession + '(' + transcript_accession + '):c.' + str( - hgvs_transcript_variant.posedit) - if refseqgene_variant != '': - hgvs_refseqgene_variant = hp.parse_hgvs_variant(refseqgene_variant) - refseqgene_accession = hgvs_refseqgene_variant.ac - hgvs_coding_from_refseqgene = vm.g_to_t(hgvs_refseqgene_variant, - hgvs_transcript_variant.ac) - hgvs_coding_from_refseqgene = valstr(hgvs_coding_from_refseqgene) - hgvs_coding_from_refseqgene = hp.parse_hgvs_variant(hgvs_coding_from_refseqgene) - RefSeqGene_context_transcript_variant = refseqgene_accession + '(' + transcript_accession + '):c.' + str( - hgvs_coding_from_refseqgene.posedit.pos) + str( - hgvs_coding_from_refseqgene.posedit.edit) + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif suppress_c_normalization == 'true': + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + try: + automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) + hgvs_coding = hgvs_seek_var + coding = fn.valstr(hgvs_coding) + validation['warnings'] = validation['warnings'] + ': ' + automap + except NotImplementedError: + fn.exceptPass() else: - RefSeqGene_context_transcript_variant = '' - else: - genome_context_transcript_variant = '' # transcript_variant - RefSeqGene_context_transcript_variant = '' - else: - genome_context_transcript_variant = '' # transcript_variant - RefSeqGene_context_transcript_variant = '' - else: - genome_context_transcript_variant = '' - RefSeqGene_context_transcript_variant = '' - - # Protein description - predicted_protein_variant = valid['protein'] - if re.match('NP_', predicted_protein_variant): - rs_p, pred_prot_posedit = predicted_protein_variant.split(':') - lrg_p = va_dbCrl.data.get_lrgProteinID_from_RefSeqProteinID(rs_p) - if re.match('LRG', lrg_p): - predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit - - # Gene - if transcript_accession != '': - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(transcript_accession) - except: - gene_symbol = 'Unable to verify gene symbol for ' + str(transcript_accession) - else: - gene_symbol = '' - - # Transcript description - transcript_description = valid['description'] + # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue + coding = fn.valstr(hgvs_coding) + rng = reverse_normalizer.normalize(query_genomic) + try: + # Diagram where - = intron and E = Exon + + # 3 prime + # ---------EEEEEEEEEEEEEEEEE----------- + # < + # Result, normalize of new variant will baulk at intronic + # 5 prime + # < + # Result, normalize of new variant will be happy + c_for_p = vm.g_to_t(rng, hgvs_coding.ac) + try: + hn.normalize(c_for_p) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + else: + # hgvs_protein = va_func.protein(str(c_for_p), evm, hp) + protein_dict = va_func.myc_to_p(c_for_p, evm, self.hdp, hp, hn, vm, sf, + re_to_p=False) + if protein_dict['error'] == '': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + error = protein_dict['error'] + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': + hgvs_protein = protein_dict['hgvs_protein'] + validation['warnings'] = validation['warnings'] + ': ' + str(error) + # Replace protein description in vars table + protein = str(hgvs_protein) + except Exception: + fn.exceptPass() + + # Check for up-to-date transcript version + updated_transcript_variant = 'None' + tx_id_info = self.hdp.get_tx_identity_info(hgvs_coding.ac) + uta_gene_symbol = tx_id_info[6] + tx_for_gene = self.hdp.get_tx_for_gene(uta_gene_symbol) + ac_root, ac_version = hgvs_coding.ac.split('.') + version_tracking = '0' + update = '' + for accession in tx_for_gene: + try: + if re.match(ac_root, accession[3]): + query_version = accession[3].split('.')[1] + if int(query_version) > int(ac_version) and int(query_version) > int( + version_tracking): + version_tracking = query_version + update = accession[3] + except ValueError: + fn.exceptPass() + + if update != '': + hgvs_updated = copy.deepcopy(hgvs_coding) + hgvs_updated.ac = update + try: + vr.validate(hgvs_updated) + # Updated reference sequence + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('does not agree with reference sequence', str(error)): + match = re.findall('\(([GATC]+)\)', error) + new_ref = match[1] + hgvs_updated.posedit.edit.ref = new_ref + vr.validate(hgvs_updated) + updated_transcript_variant = hgvs_updated + else: + pass + updated_transcript_variant = hgvs_updated + validation['warnings'] = validation[ + 'warnings'] + ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( + updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + fn.valstr( + updated_transcript_variant) + + # Set the data + set_output_type_flag = 'gene' + validation['description'] = hgnc_gene_info + validation['coding'] = str(hgvs_coding) + validation['genomic_r'] = str(hgvs_refseq) + validation['genomic_g'] = str(hgvs_genomic) + validation['protein'] = str(hgvs_protein) + validation['primary_assembly'] = primary_assembly + if gap_compensation is True: + validation['test_stash_tx_left'] = test_stash_tx_left + validation['test_stash_tx_right'] = test_stash_tx_right + # finish timing + logger.traceEnd(validation) + # Report errors to User and VV admin + except KeyboardInterrupt: + raise + except: + set_output_type_flag = 'error' + error = 'Validation error' + validation['warnings'] = str(error) + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + tbk = [str(exc_type), str(exc_value), str(te)] + er = str('\n'.join(tbk)) + logger.error(str(exc_type) + " " + str(exc_value)) + logger.debug(er) - # Stashed variants - if 'test_stash_tx_left' not in validation: - pass - else: - test_stash_tx_left = validation['test_stash_tx_left'] - if 'test_stash_tx_right' not in validation: - pass - else: - test_stash_tx_right = validation['test_stash_tx_right'] + continue - # Multiple genomic variants - # multi_gen_vars = [] - if tx_variant != '': - hgvs_coding = hp.parse_hgvs_variant(str(tx_variant)) - # Gap gene black list - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(hgvs_coding.ac) - except Exception: - exceptPass() - else: - # If the gene symbol is not in the list, the value False will be returned - gap_compensation = gapGenes.gap_black_list(gene_symbol) + # Outside the for loop + ###################### + logger.trace("End of for loop") + # order the rows + # from operator import itemgetter + by_order = sorted(batch_list, key=itemgetter('order')) + + for valid in by_order: + if 'write' in valid.keys(): + if valid['write'] == 'true': + # Blank VCF + # chr = '' + # pos = '' + # ref = '' + # alt = '' + + # Fromulate a json type response + dict_out = {} + + # Set gap compensation bool + gap_compensation = True - # Look for variants spanning introns - try: - hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.search('boundary', str(error)) or re.search('spanning', str(error)): - gap_compensation = False - else: - pass - except hgvs.exceptions.HGVSError: - exceptPass() - - # Warn gap code status - logger.warning("gap_compensation_3 = " + str(gap_compensation)) - multi_g = [] - multi_list = [] - mapping_options = hdp.get_tx_mapping_options(hgvs_coding.ac) - for alt_chr in mapping_options: - if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', - alt_chr[1])) and \ - alt_chr[2] == alt_aln_method: - multi_list.append(alt_chr[1]) - - for alt_chr in multi_list: - try: - # Re set ori - ori = va_func.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, - alt_aln_method=alt_aln_method, hdp=hdp) - orientation = int(ori[0]['alt_strand']) - hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, vm, hp, hn, - sf, nr_vm) - # Set hgvs_genomic accordingly - hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) - - # genomic_possibilities - # 1. take the simple 3 pr normalized hgvs_genomic - # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - hgvs_genomic_possibilities = [] - - # Loop out gap code under these circumstances! - if gap_compensation is True: - logger.warning('g_to_t gap code 3 active') - rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_alt_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: - try: - chromosome_normalized_hgvs_coding = reverse_normalizer.normalize( - hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: - chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding + # warngins + warnings = valid['warnings'] + warnings = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warnings) + warnings = re.sub('^: ', '', warnings) + warnings = re.sub('::', ':', warnings) - most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, - alt_chr, - no_norm_evm, vm, hp, hn, sf, nr_vm) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + # Submitted variant + submitted = valid['id'] - # First to the right - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = va_H2V.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - exceptPass() - # Store a tx copy for later use - test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, - no_norm_evm, vm, hp, hn, sf, nr_vm) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_right.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - stash_tx_right = test_stash_tx_right - if hasattr(test_stash_tx_right.posedit.edit, - 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - alt = test_stash_tx_right.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_right.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_right).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - exceptPass() - except ValueError: - exceptPass() + # Genomic sequence variation + genomic_variant = valid['genomic_g'] - # Then to the left - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = va_H2V.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, - reverse_normalizer, sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - exceptPass() - # Store a tx copy for later use - test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, - no_norm_evm, vm, hp, hn, sf, nr_vm) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left - if hasattr(test_stash_tx_left.posedit.edit, - 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - alt = test_stash_tx_left.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_left.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_left).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - exceptPass() - except ValueError: - exceptPass() + # genomic accession + if genomic_variant != '': + hgvs_genomic_variant = hp.parse_hgvs_variant(genomic_variant) + genomic_variant = fn.valstr(hgvs_genomic_variant) + genomic_accession = hgvs_genomic_variant.ac + else: + genomic_accession = '' + + # RefSeqGene variation + refseqgene_variant = valid['genomic_r'] + refseqgene_variant = refseqgene_variant.strip() + if re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant == '': + warnings = warnings + ': ' + refseqgene_variant + refseqgene_variant = '' + lrg_variant = '' + hgvs_refseqgene_variant = 'false' + else: + hgvs_refseqgene_variant = hp.parse_hgvs_variant(refseqgene_variant) + rsg_ac = va_dbCrl.data.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) + if rsg_ac[0] == 'none': + lrg_variant = '' + else: + hgvs_lrg = copy.deepcopy(hgvs_refseqgene_variant) + hgvs_lrg.ac = rsg_ac[0] + lrg_variant = fn.valstr(hgvs_lrg) + if rsg_ac[1] == 'public': + pass + else: + warnings = warnings + ': The current status of ' + str( + hgvs_lrg.ac) + ' is pending therefore changes may be made to the LRG reference sequence' + + # Transcript sequence variation + tx_variant = valid['coding'] + if tx_variant != '': + if '(' in tx_variant and ')' in tx_variant: + tx_variant = tx_variant.split('(')[1] + tx_variant = tx_variant.replace(')', '') + + # transcript accession + hgvs_tx_variant = hp.parse_hgvs_variant(tx_variant) + tx_variant = fn.valstr(hgvs_tx_variant) + hgvs_transcript_variant = hp.parse_hgvs_variant(tx_variant) + transcript_accession = hgvs_transcript_variant.ac + + # Handle LRG + lrg_status = 'public' + lrg_transcript = va_dbCrl.data.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) + if lrg_transcript == 'none': + lrg_transcript_variant = '' + else: + # Note - LRG availability is dependant on UTA containing the data. In some + # instances we will be able to display the LRG_tx without being able to + # display the LRG gene data - # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - try: - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) + # if not re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant != '': + # if hgvs_refseqgene_variant != 'RefSeqGene record not available' and hgvs_refseqgene_variant != 'false': + try: + hgvs_lrg_t = vm.g_to_t(hgvs_refseqgene_variant, transcript_accession) + hgvs_lrg_t.ac = lrg_transcript + lrg_transcript_variant = fn.valstr(hgvs_lrg_t) + except: + if hgvs_transcript_variant.posedit.pos.start.offset == 0 and hgvs_transcript_variant.posedit.pos.end.offset == 0: + hgvs_lrg_t = copy.copy(hgvs_transcript_variant) + hgvs_lrg_t.ac = lrg_transcript + lrg_transcript_variant = fn.valstr(hgvs_lrg_t) + else: + lrg_transcript_variant = '' + else: + transcript_accession = '' + lrg_transcript_variant = '' - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + # Look for intronic variants + if transcript_accession != '' and genomic_accession != '': + # Remove del bases + str_transcript = fn.valstr(hgvs_transcript_variant) + hgvs_transcript_variant = hp.parse_hgvs_variant(str_transcript) + try: + vr.validate(hgvs_transcript_variant) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('intronic variant', error): + genome_context_transcript_variant = genomic_accession + '(' + transcript_accession + '):c.' + str( + hgvs_transcript_variant.posedit) + if refseqgene_variant != '': + hgvs_refseqgene_variant = hp.parse_hgvs_variant(refseqgene_variant) + refseqgene_accession = hgvs_refseqgene_variant.ac + hgvs_coding_from_refseqgene = vm.g_to_t(hgvs_refseqgene_variant, + hgvs_transcript_variant.ac) + hgvs_coding_from_refseqgene = fn.valstr(hgvs_coding_from_refseqgene) + hgvs_coding_from_refseqgene = hp.parse_hgvs_variant(hgvs_coding_from_refseqgene) + RefSeqGene_context_transcript_variant = refseqgene_accession + '(' + transcript_accession + '):c.' + str( + hgvs_coding_from_refseqgene.posedit.pos) + str( + hgvs_coding_from_refseqgene.posedit.edit) + else: + RefSeqGene_context_transcript_variant = '' + else: + genome_context_transcript_variant = '' # transcript_variant + RefSeqGene_context_transcript_variant = '' + else: + genome_context_transcript_variant = '' # transcript_variant + RefSeqGene_context_transcript_variant = '' + else: + genome_context_transcript_variant = '' + RefSeqGene_context_transcript_variant = '' - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) + # Protein description + predicted_protein_variant = valid['protein'] + if re.match('NP_', predicted_protein_variant): + rs_p, pred_prot_posedit = predicted_protein_variant.split(':') + lrg_p = va_dbCrl.data.get_lrgProteinID_from_RefSeqProteinID(rs_p) + if re.match('LRG', lrg_p): + predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + # Gene + if transcript_accession != '': + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(transcript_accession) + except: + gene_symbol = 'Unable to verify gene symbol for ' + str(transcript_accession) + else: + gene_symbol = '' - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append( - genomic_from_most_3pr_hgvs_transcript_variant) - if len( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append( - genomic_from_most_5pr_hgvs_transcript_variant) + # Transcript description + transcript_description = valid['description'] - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass - exceptPass() + # Stashed variants + if 'test_stash_tx_left' not in validation: + pass + else: + test_stash_tx_left = validation['test_stash_tx_left'] + if 'test_stash_tx_right' not in validation: + pass + else: + test_stash_tx_right = validation['test_stash_tx_right'] - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' + # Multiple genomic variants + # multi_gen_vars = [] + if tx_variant != '': + hgvs_coding = hp.parse_hgvs_variant(str(tx_variant)) + # Gap gene black list + try: + gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + except Exception: + fn.exceptPass() + else: + # If the gene symbol is not in the list, the value False will be returned + gap_compensation = gapGenes.gap_black_list(gene_symbol) - # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] - # Loop through to see if a gap can be located - possibility_counter = 0 - for possibility in hgvs_genomic_possibilities: - possibility_counter = possibility_counter + 1 - # Loop out stash possibilities which will not spot gaps so are empty - if possibility == '': - continue + # Look for variants spanning introns + try: + hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.search('boundary', str(error)) or re.search('spanning', str(error)): + gap_compensation = False + else: + pass + except hgvs.exceptions.HGVSError: + fn.exceptPass() + + # Warn gap code status + logger.warning("gap_compensation_3 = " + str(gap_compensation)) + multi_g = [] + multi_list = [] + mapping_options = self.hdp.get_tx_mapping_options(hgvs_coding.ac) + for alt_chr in mapping_options: + if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', + alt_chr[1])) and \ + alt_chr[2] == alt_aln_method: + multi_list.append(alt_chr[1]) + + for alt_chr in multi_list: + try: + # Re set ori + ori = va_func.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, + alt_aln_method=alt_aln_method, hdp=self.hdp) + orientation = int(ori[0]['alt_strand']) + hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, vm, hp, hn, + sf, nr_vm) + # Set hgvs_genomic accordingly + hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) + + # genomic_possibilities + # 1. take the simple 3 pr normalized hgvs_genomic + # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome + hgvs_genomic_possibilities = [] + + # Loop out gap code under these circumstances! + if gap_compensation is True: + logger.warning('g_to_t gap code 3 active') + rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_alt_genomic) + hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if orientation != -1: + try: + chromosome_normalized_hgvs_coding = reverse_normalizer.normalize( + hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + chromosome_normalized_hgvs_coding = hgvs_coding + else: + try: + chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding - # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - hgvs_genomic_variant = possibility - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, + alt_chr, + no_norm_evm, vm, hp, hn, sf, nr_vm) + hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + # First to the right + hgvs_stash = copy.deepcopy(hgvs_coding) try: - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic_variant) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error): - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - if re.search('insertion length must be 1', error): - if hgvs_genomic.posedit.edit.type == 'ins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start, end) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - - # Make VCF - vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] - - # Save a copy of current hgvs_coding + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() try: - saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, - hgvs_coding.ac) - except Exception as e: - if str( - e) == 'start or end or both are beyond the bounds of transcript record': - saved_hgvs_coding = hgvs_coding - continue - - # Detect intronic variation using normalization - intronic_variant = 'false' - # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass + fn.exceptPass() + # Store a tx copy for later use + test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, + no_norm_evm, vm, hp, hn, sf, nr_vm) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_right.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + stash_tx_right = test_stash_tx_right + if hasattr(test_stash_tx_right.posedit.edit, + 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + alt = test_stash_tx_right.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_right.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_right).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + try: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) else: - hgvs_seek_var = saved_hgvs_coding + try: + hn.normalize(test_stash_tx_right) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + except ValueError: + fn.exceptPass() - elif orientation != -1: - # position genomic at its most 3 prime position + # Then to the left + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, + reverse_normalizer, sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: - query_genomic = hn.normalize(hgvs_genomic) + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass + fn.exceptPass() + # Store a tx copy for later use + test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, + no_norm_evm, vm, hp, hn, sf, nr_vm) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_left.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + stash_tx_left = test_stash_tx_left + if hasattr(test_stash_tx_left.posedit.edit, + 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + alt = test_stash_tx_left.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_left.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_left).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + try: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) else: - hgvs_seek_var = saved_hgvs_coding + try: + hn.normalize(test_stash_tx_left) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + except ValueError: + fn.exceptPass() + # direct mapping from reverse_normalized transcript insertions in the delins format try: - intron_test = hn.normalize(hgvs_seek_var) + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the variant ref and alt + pr3_ref = sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + + # Normalize - If the variant spans a gap it should then form a static genomic variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + try: + genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) + + if len( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append( + genomic_from_most_3pr_hgvs_transcript_variant) + if len( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if re.match('Normalization of intronic variants is not supported', error) or re.match( 'Unsupported normalization of variants spanning the exon-intron boundary', error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' + pass + fn.exceptPass() - if intronic_variant != 'hard_fail': - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', - str( - hgvs_seek_var.posedit.pos)) or re.search( - '\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str( - hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' - if intronic_variant != 'true': - # Flag RefSeqGene for ammendment - # amend_RefSeqGene = 'false' - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', - hgvs_genomic_5pr.posedit.edit.type) or re.search( - 'ins', hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and re.search('del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and re.search('del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - else: - pass - else: - pass - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +1 base and adjust - if re.search('\+', - str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, - nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, - nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search('\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, - nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, - nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - exceptPass() - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for possibility in hgvs_genomic_possibilities: - if possibility == '': - continue - hgvs_t_possibility = vm.g_to_t(possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) - except: - continue - if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: - continue - ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) - except: - continue - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if possibility.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(possibility.ac, - possibility.posedit.pos.start.base - 1, - possibility.posedit.pos.end.base) - possibility.posedit.edit.ref = ins_ref - possibility.posedit.edit.alt = ins_ref[ - 0] + possibility.posedit.edit.alt + \ - ins_ref[1] - if len(hgvs_t_possibility.posedit.edit.ref) < len( - possibility.posedit.edit.ref): - gap_length = len(possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] - hgvs_not_delins = possibility - hgvs_genomic_5pr = possibility - break + # Mark as not disparity detected + disparity_deletion_in = ['false', 'false'] + # Loop through to see if a gap can be located + possibility_counter = 0 + for possibility in hgvs_genomic_possibilities: + possibility_counter = possibility_counter + 1 + # Loop out stash possibilities which will not spot gaps so are empty + if possibility == '': + continue - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass + # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + hgvs_genomic_variant = possibility + stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) - # Final sanity checks + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + try: + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic_variant) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error): + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + if re.search('insertion length must be 1', error): + if hgvs_genomic.posedit.edit.type == 'ins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = sf.fetch_seq(str(hgvs_genomic.ac), start, end) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # Store a copy for later use + stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + + # Make VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, + reverse_normalizer, sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Look for exonic gaps within transcript or chromosome + no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # Store a not real deletion insertion to test for gapping + stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + v = [chr, pos, ref, alt] + + # Save a copy of current hgvs_coding try: - vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, + hgvs_coding.ac) except Exception as e: if str( e) == 'start or end or both are beyond the bounds of transcript record': + saved_hgvs_coding = hgvs_coding continue + + # Detect intronic variation using normalization + intronic_variant = 'false' + # Look for normalized variant options that do not match hgvs_coding + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + try: - hn.normalize(tx_hgvs_not_delins) + intron_test = hn.normalize(hgvs_seek_var) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( 'Unsupported normalization of variants spanning the exon-intron boundary', error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - continue - elif re.match('Normalization of intronic variants is not supported', - error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): - rg = reverse_normalizer.normalize(hgvs_not_delins) - rtx = vm.g_to_t(rg, tx_hgvs_not_delins.ac) - fg = hn.normalize(hgvs_not_delins) - ftx = vm.g_to_t(fg, tx_hgvs_not_delins.ac) - if ( - rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) - exonic = False - for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], - ex_test[7]): - exonic = True - if exonic is True: - hgvs_not_delins = fg - hgvs_genomic = fg - hgvs_genomic_5pr = fg - try: - tx_hgvs_not_delins = vm.c_to_n(ftx) - except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant( - tx_hgvs_not_delins_delins_from_dup) - - if disparity_deletion_in[0] == 'transcript': - # amend_RefSeqGene = 'true' - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', - str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', - str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) - except: - exceptPass() - genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.alt) + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' else: - # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range( - genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range( - genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, - 1): - if integer in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match('\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) + if intronic_variant != 'hard_fail': + if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', + str( + hgvs_seek_var.posedit.pos)) or re.search( + '\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str( + hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass + if intronic_variant != 'true': + # Flag RefSeqGene for ammendment + # amend_RefSeqGene = 'false' + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', + hgvs_genomic_5pr.posedit.edit.type) or re.search( + 'ins', hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and re.search('del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - exceptPass() - - if re.search('\+', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and re.search('del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + else: + pass + else: + pass + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + saved_hgvs_coding.ac) + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +1 base and adjust + if re.search('\+', + str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( '\+', str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' try: - c2 = vm.n_to_c(tx_hgvs_not_delins) + rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref + fn.exceptPass() + + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search('\+', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search('\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( '\-', str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' try: - c2 = vm.n_to_c(tx_hgvs_not_delins) + rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref + fn.exceptPass() + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search('\-', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, vm, hp, hn, sf, + nr_vm) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + fn.exceptPass() + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for possibility in hgvs_genomic_possibilities: + if possibility == '': + continue + hgvs_t_possibility = vm.g_to_t(possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) + except: + continue + if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: + continue + ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) + except: + continue + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if possibility.posedit.edit.type == 'ins': + ins_ref = sf.fetch_seq(possibility.ac, + possibility.posedit.pos.start.base - 1, + possibility.posedit.pos.end.base) + possibility.posedit.edit.ref = ins_ref + possibility.posedit.edit.alt = ins_ref[ + 0] + possibility.posedit.edit.alt + \ + ins_ref[1] + if len(hgvs_t_possibility.posedit.edit.ref) < len( + possibility.posedit.edit.ref): + gap_length = len(possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, + hgvs_t_possibility] + hgvs_not_delins = possibility + hgvs_genomic_5pr = possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # Final sanity checks + try: + vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str( + e) == 'start or end or both are beyond the bounds of transcript record': + continue + try: + hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + continue + elif re.match('Normalization of intronic variants is not supported', + error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Find oddly placed gaps where the tx variant is encompassed in the gap + if disparity_deletion_in[0] == 'false' and ( + possibility_counter == 3 or possibility_counter == 4): + rg = reverse_normalizer.normalize(hgvs_not_delins) + rtx = vm.g_to_t(rg, tx_hgvs_not_delins.ac) + fg = hn.normalize(hgvs_not_delins) + ftx = vm.g_to_t(fg, tx_hgvs_not_delins.ac) + if ( + rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + exons = self.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) + exonic = False + for ex_test in exons: + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], + ex_test[7]): + exonic = True + if exonic is True: + hgvs_not_delins = fg + hgvs_genomic = fg + hgvs_genomic_5pr = fg + try: + tx_hgvs_not_delins = vm.c_to_n(ftx) + except Exception: + tx_hgvs_not_delins = ftx + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup) + + if disparity_deletion_in[0] == 'transcript': + # amend_RefSeqGene = 'true' + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search('\+', + str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search('\+', + str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + '\-', str(tx_hgvs_not_delins.posedit.pos.end))): gapped_transcripts = gapped_transcripts + ' ' + str( tx_hgvs_not_delins.ac) - non_valid_caution = 'true' + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int( + '0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int( + '0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int( + '0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int( + '0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref + fn.exceptPass() + genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.alt) else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 + # Deletions with no ins + pre_alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range( + genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range( + genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, + 1): + if integer in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base + if re.match('\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 gap_position = ' between positions c.' + str(gps) + '_' + str( gpe) + '\n' - # Warn update auto_info = auto_info + '%s' % (gap_position) + else: - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - # amend_RefSeqGene = 'true' - if possibility_counter == 3: - hgvs_refreshed_variant = stash_tx_right - elif possibility_counter == 4: - hgvs_refreshed_variant = stash_tx_left + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search('\+', str( + tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\+', + str( + tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\+', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search('\+', + str( + tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', str( + tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + '\-', + str( + tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search('\-', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search('\-', + str( + tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = vm.t_to_g(c1, hgvs_genomic.ac) + g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = vm.t_to_g(c2, hgvs_genomic.ac) + g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + # amend_RefSeqGene = 'true' + if possibility_counter == 3: + hgvs_refreshed_variant = stash_tx_right + elif possibility_counter == 4: + hgvs_refreshed_variant = stash_tx_left + else: + hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' else: - hgvs_refreshed_variant = chromosome_normalized_hgvs_coding - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', - str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) - else: - pass - - try: - hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = hgvs_coding + # amend_RefSeqGene = 'false' + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', + str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) else: - continue + pass - # Quick check to make sure the coding variant has not changed - try: - to_test = hn.normalize(hgvs_refreshed_variant) - except: - to_test = hgvs_refreshed_variant - if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # Try the next available genomic option - if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - hgvs_coding = to_test - else: - continue + try: + hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + continue - # Update hgvs_genomic - hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, - no_norm_evm, vm, hp, hn, sf, nr_vm) - if hgvs_alt_genomic.posedit.edit.type == 'identity': - re_c = vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - shuffle_left_g = copy.copy(hgvs_alt_genomic) - shuffle_left_g.posedit.edit.ref = '' - shuffle_left_g.posedit.edit.alt = '' - shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) - re_c = vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + # Quick check to make sure the coding variant has not changed + try: + to_test = hn.normalize(hgvs_refreshed_variant) + except: + to_test = hgvs_refreshed_variant + if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # Try the next available genomic option + if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + hgvs_coding = to_test + else: + continue + + # Update hgvs_genomic + hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, + no_norm_evm, vm, hp, hn, sf, nr_vm) + if hgvs_alt_genomic.posedit.edit.type == 'identity': + re_c = vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - hgvs_alt_genomic = shuffle_left_g + shuffle_left_g = copy.copy(hgvs_alt_genomic) + shuffle_left_g.posedit.edit.ref = '' + shuffle_left_g.posedit.edit.alt = '' + shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) + re_c = vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): + hgvs_alt_genomic = shuffle_left_g + + # If it is intronic, these vairables will not have been set + else: + # amend_RefSeqGene = 'false' + no_normalized_c = 'false' - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' + # Break if gap has been detected + if disparity_deletion_in[0] != 'false': + break - # Break if gap has been detected - if disparity_deletion_in[0] != 'false': - break + # Normailse hgvs_genomic + try: + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error) and \ + disparity_deletion_in[0] == 'chromosome': + if hgvs_alt_genomic.posedit.edit.type == 'delins': + start = hgvs_alt_genomic.posedit.pos.start.base + end = hgvs_alt_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb + hgvs_alt_genomic.posedit.pos.start.base = end + hgvs_alt_genomic.posedit.pos.end.base = start + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + if hgvs_alt_genomic.posedit.edit.type == 'del': + start = hgvs_alt_genomic.posedit.pos.start.base + end = hgvs_alt_genomic.posedit.pos.end.base + lhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + hgvs_alt_genomic.posedit.edit.alt = lhb + rhb + hgvs_alt_genomic.posedit.pos.start.base = end + hgvs_alt_genomic.posedit.pos.end.base = start + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + + # Refresh the :g. variant + multi_g.append(hgvs_alt_genomic) + else: + multi_g.append(hgvs_alt_genomic) + corrective_action_taken = 'false' + + # In this instance, the gap code has generally found an incomplete-alignment rather than a + # truly gapped alignment. + except KeyError: + warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ + 'genomic reference sequence %s' % (hgvs_coding.ac, + alt_chr) + continue + except hgvs.exceptions.HGVSError as e: + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + error = str(te) + logger.error(str(exc_type) + " " + str(exc_value)) + logger.debug(error) + continue - # Normailse hgvs_genomic - try: - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': - if hgvs_alt_genomic.posedit.edit.type == 'delins': - start = hgvs_alt_genomic.posedit.pos.start.base - end = hgvs_alt_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb - hgvs_alt_genomic.posedit.pos.start.base = end - hgvs_alt_genomic.posedit.pos.end.base = start - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - if hgvs_alt_genomic.posedit.edit.type == 'del': - start = hgvs_alt_genomic.posedit.pos.start.base - end = hgvs_alt_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - hgvs_alt_genomic.posedit.edit.alt = lhb + rhb - hgvs_alt_genomic.posedit.pos.start.base = end - hgvs_alt_genomic.posedit.pos.end.base = start - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - - # Refresh the :g. variant - multi_g.append(hgvs_alt_genomic) - else: - multi_g.append(hgvs_alt_genomic) - corrective_action_taken = 'false' + if multi_g != []: + multi_g.sort() + multi_gen_vars = multi_g # '|'.join(multi_g) + else: + multi_gen_vars = [] + else: + # HGVS genomic in the absence of a transcript variant + if genomic_variant != '': + multi_gen_vars = [hgvs_genomic_variant] + else: + multi_gen_vars = [] - # In this instance, the gap code has generally found an incomplete-alignment rather than a - # truly gapped alignment. - except KeyError: - warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ - 'genomic reference sequence %s' % (hgvs_coding.ac, - alt_chr) - continue - except hgvs.exceptions.HGVSError as e: - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - error = str(te) - logger.error(str(exc_type) + " " + str(exc_value)) - logger.debug(error) - continue + # Dictionaries of genomic loci + alt_genomic_dicts = [] + primary_genomic_dicts = {} - if multi_g != []: - multi_g.sort() - multi_gen_vars = multi_g # '|'.join(multi_g) - else: - multi_gen_vars = [] - else: - # HGVS genomic in the absence of a transcript variant - if genomic_variant != '': - multi_gen_vars = [hgvs_genomic_variant] - else: - multi_gen_vars = [] + if len(multi_gen_vars) != 0: + for alt_gen_var in multi_gen_vars: + for build in self.genome_builds: + test = vvChromosomes.supported_for_mapping(alt_gen_var.ac, build) + if test == 'true': + try: + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, reverse_normalizer, sf) + except hgvs.exceptions.HGVSInvalidVariantError as e: + continue + # Identify primary assembly positions + if re.match('NC_', alt_gen_var.ac): + if re.match('GRC', build): + primary_genomic_dicts[build.lower()] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } - # Dictionaries of genomic loci - alt_genomic_dicts = [] - primary_genomic_dicts = {} + else: + primary_genomic_dicts[build.lower()] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + if build == 'GRCh38': + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, + sf) + primary_genomic_dicts['hg38'] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } - if len(multi_gen_vars) != 0: - for alt_gen_var in multi_gen_vars: - for build in self.genome_builds: - test = vvChromasomes.supported_for_mapping(alt_gen_var.ac, build) - if test == 'true': - try: - vcf_dict = va_H2V.report_hgvs2vcf(alt_gen_var, build, reverse_normalizer, sf) - except hgvs.exceptions.HGVSInvalidVariantError as e: - continue - # Identify primary assembly positions - if re.match('NC_', alt_gen_var.ac): - if re.match('GRC', build): - primary_genomic_dicts[build.lower()] = { - 'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } + continue else: - primary_genomic_dicts[build.lower()] = { - 'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] + if re.match('GRC', build): + dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } } - } - if build == 'GRCh38': - vcf_dict = va_H2V.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - sf) - primary_genomic_dicts['hg38'] = { - 'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] + else: + dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } } - } - - continue - - else: - if re.match('GRC', build): - dict = {build.lower(): {'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } - else: - dict = {build.lower(): {'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } - # Append - alt_genomic_dicts.append(dict) - - if build == 'GRCh38': - vcf_dict = va_H2V.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - sf) - dict = {'hg38': {'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } # Append alt_genomic_dicts.append(dict) - continue - else: - # May need to account for ALT NC_ - pass - # Warn not directly mapped to specified genome build - if genomic_accession != '': - caution = '' - if primary_assembly.lower() not in primary_genomic_dicts.keys(): - warnings = warnings + ': ' + str( - hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + ': See alternative genomic loci or alternative genome builds for aligned genomic positions' - - warn_list = warnings.split(': ') - warnings_out = [] - for warning in warn_list: - warning.strip() - warning = warning.replace("'", "") - if warning == '': - continue - warnings_out.append(warning) - # Remove duplicate elements but maintain the order - seen = {} - no_rep_list = [seen.setdefault(x, x) for x in warnings_out if x not in seen] - warnings_out = no_rep_list - - # Ensure Variants have had the refs removed. - # if not hasattr(posedit, refseqgene_variant): - if refseqgene_variant != '': - try: - refseqgene_variant = valstr(hgvs_refseqgene_variant) - except: - exceptPass() + if build == 'GRCh38': + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, + sf) + dict = {'hg38': {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } + # Append + alt_genomic_dicts.append(dict) + continue + else: + # May need to account for ALT NC_ + pass - # Add single letter AA code to protein descriptions - predicted_protein_variant_dict = {"tlr": str(predicted_protein_variant), "slr": ''} - if predicted_protein_variant != '': - if not 'Non-coding :n.' in predicted_protein_variant: + # Warn not directly mapped to specified genome build + if genomic_accession != '': + caution = '' + if primary_assembly.lower() not in primary_genomic_dicts.keys(): + warnings = warnings + ': ' + str( + hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + ': See alternative genomic loci or alternative genome builds for aligned genomic positions' + + warn_list = warnings.split(': ') + warnings_out = [] + for warning in warn_list: + warning.strip() + warning = warning.replace("'", "") + if warning == '': + continue + warnings_out.append(warning) + # Remove duplicate elements but maintain the order + seen = {} + no_rep_list = [seen.setdefault(x, x) for x in warnings_out if x not in seen] + warnings_out = no_rep_list + + # Ensure Variants have had the refs removed. + # if not hasattr(posedit, refseqgene_variant): + if refseqgene_variant != '': try: - format_p = predicted_protein_variant - format_p = re.sub('\(LRG_.+?\)', '', format_p) - re_parse_protein = hp.parse_hgvs_variant(format_p) - re_parse_protein_singleAA = output_formatter.single_letter_protein(re_parse_protein) - predicted_protein_variant_dict["slr"] = str(re_parse_protein_singleAA) - except hgvs.exceptions.HGVSParseError: - exceptPass() - else: - predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) - - # Populate the dictionary - dict_out['submitted_variant'] = submitted - dict_out['gene_symbol'] = gene_symbol - dict_out['transcript_description'] = transcript_description - dict_out['hgvs_transcript_variant'] = tx_variant - dict_out['genome_context_intronic_sequence'] = genome_context_transcript_variant - dict_out['refseqgene_context_intronic_sequence'] = RefSeqGene_context_transcript_variant - dict_out['hgvs_refseqgene_variant'] = refseqgene_variant - dict_out['hgvs_predicted_protein_consequence'] = predicted_protein_variant_dict - dict_out['validation_warnings'] = warnings_out - dict_out['hgvs_lrg_transcript_variant'] = lrg_transcript_variant - dict_out['hgvs_lrg_variant'] = lrg_variant - dict_out['alt_genomic_loci'] = alt_genomic_dicts - dict_out['primary_assembly_loci'] = primary_genomic_dicts - dict_out['reference_sequence_records'] = '' - - # Add links to reference_sequence_records - ref_records = external.get_urls(dict_out) - if ref_records != {}: - dict_out['reference_sequence_records'] = ref_records - - # Append to a list for return - batch_out.append(dict_out) + refseqgene_variant = fn.valstr(hgvs_refseqgene_variant) + except: + fn.exceptPass() + + # Add single letter AA code to protein descriptions + predicted_protein_variant_dict = {"tlr": str(predicted_protein_variant), "slr": ''} + if predicted_protein_variant != '': + if not 'Non-coding :n.' in predicted_protein_variant: + try: + format_p = predicted_protein_variant + format_p = re.sub('\(LRG_.+?\)', '', format_p) + re_parse_protein = self.hp.parse_hgvs_variant(format_p) + re_parse_protein_singleAA = fn.single_letter_protein(re_parse_protein) + predicted_protein_variant_dict["slr"] = str(re_parse_protein_singleAA) + except hgvs.exceptions.HGVSParseError: + fn.exceptPass() + else: + predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) + + # Populate the dictionary + dict_out['submitted_variant'] = submitted + dict_out['gene_symbol'] = gene_symbol + dict_out['transcript_description'] = transcript_description + dict_out['hgvs_transcript_variant'] = tx_variant + dict_out['genome_context_intronic_sequence'] = genome_context_transcript_variant + dict_out['refseqgene_context_intronic_sequence'] = RefSeqGene_context_transcript_variant + dict_out['hgvs_refseqgene_variant'] = refseqgene_variant + dict_out['hgvs_predicted_protein_consequence'] = predicted_protein_variant_dict + dict_out['validation_warnings'] = warnings_out + dict_out['hgvs_lrg_transcript_variant'] = lrg_transcript_variant + dict_out['hgvs_lrg_variant'] = lrg_variant + dict_out['alt_genomic_loci'] = alt_genomic_dicts + dict_out['primary_assembly_loci'] = primary_genomic_dicts + dict_out['reference_sequence_records'] = '' + + # Add links to reference_sequence_records + ref_records = self.db.get.get_urls(dict_out) + if ref_records != {}: + dict_out['reference_sequence_records'] = ref_records + + # Append to a list for return + batch_out.append(dict_out) + else: + continue else: continue - else: - continue - - """ - Structure the output into dictionaries rather than a list with descriptive keys - and a validation type flag - """ - logger.trace("Populating output dictionary") - # Create output dictionary - validation_output = {'flag': None} - - # For gene outputs, i.e. those that hit transcripts - # dotter = '' - if set_output_type_flag == 'gene': - validation_output['flag'] = 'gene_variant' - validation_error_counter = 0 - for valid_v in batch_out: - if valid_v['validation_warnings'] == ['Validation error']: - validation_error_counter = validation_error_counter + 1 - identification_key = 'Validation_Error_%s' % (str(validation_error_counter)) - else: - identification_key = '%s' % (str(valid_v['hgvs_transcript_variant'])) - - # if identification_key not in validation_output.keys(): - validation_output[identification_key] = valid_v - # else: - # dotter = dotter + ' ' - # validation_output[identification_key + dotter] = valid_v - - # For warning only outputs - # Should only ever be 1 output as an error or a warning of the following types - # Gene symbol as reference sequence - # Gene as transcript reference sequence - if set_output_type_flag == 'warning': - validation_output['flag'] = 'warning' - validation_error_counter = 0 - validation_warning_counter = 0 - if len(batch_out) == 0: - validation_output['flag'] = 'empty_result' - for valid_v in batch_out: - if valid_v['validation_warnings'] == ['Validation error']: - validation_error_counter = validation_error_counter + 1 - identification_key = 'validation_error_%s' % (str(validation_error_counter)) - else: - validation_warning_counter = validation_warning_counter + 1 - identification_key = 'validation_warning_%s' % (str(validation_warning_counter)) - validation_output[identification_key] = valid_v - - # Intergenic variants - validation_intergenic_counter = 0 - if set_output_type_flag == 'intergenic': - validation_output['flag'] = 'intergenic' - for valid_v in batch_out: - validation_intergenic_counter = validation_intergenic_counter + 1 - identification_key = 'Intergenic_Variant_%s' % (str(validation_intergenic_counter)) - - # Attempt to liftover between genome builds - # Note: pyliftover uses the UCSC liftOver tool. - # https://pypi.org/project/pyliftover/ - genomic_position_info = valid_v['primary_assembly_loci'] - for g_p_key in genomic_position_info.keys(): - - # Identify the current build and hgvs_genomic descripsion - if re.match('hg', g_p_key): - # incoming_vcf = genomic_position_info[g_p_key]['vcf'] - # set builds - if g_p_key == 'hg38': - build_to = 'hg19' - build_from = 'hg38' - if g_p_key == 'hg19': - build_to = 'hg38' - build_from = 'hg19' - elif re.match('grc', g_p_key): - # incoming_vcf = genomic_position_info[g_p_key]['vcf'] - # set builds - if g_p_key == 'grch38': - build_to = 'GRCh37' - build_from = 'GRCh38' - if g_p_key == 'grch37': - build_to = 'GRCh38' - build_from = 'GRCh37' - - # Liftover - lifted_response = lift_over(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, build_to, hn, vm, vr, hdp, hp, reverse_normalizer, sf, evm) - - # Sort the respomse into primary assembly and ALT - primary_assembly_loci = {} - alt_genomic_loci = [] - for build_key, accession_dict in lifted_response.iteritems(): - try: - accession_key = accession_dict.keys()[0] - if re.match('NC_', accession_dict[accession_key]['hgvs_genomic_description']): - primary_assembly_loci[build_key.lower()] = accession_dict[accession_key] - else: - alt_genomic_loci.append({build_key.lower(): accession_dict[accession_key]}) - # KeyError if the dicts are empty - except KeyError: - continue + """ + Structure the output into dictionaries rather than a list with descriptive keys + and a validation type flag + """ + logger.trace("Populating output dictionary") + # Create output dictionary + validation_output = {'flag': None} + + # For gene outputs, i.e. those that hit transcripts + # dotter = '' + if set_output_type_flag == 'gene': + validation_output['flag'] = 'gene_variant' + validation_error_counter = 0 + for valid_v in batch_out: + if valid_v['validation_warnings'] == ['Validation error']: + validation_error_counter = validation_error_counter + 1 + identification_key = 'Validation_Error_%s' % (str(validation_error_counter)) + else: + identification_key = '%s' % (str(valid_v['hgvs_transcript_variant'])) + + # if identification_key not in validation_output.keys(): + validation_output[identification_key] = valid_v + # else: + # dotter = dotter + ' ' + # validation_output[identification_key + dotter] = valid_v + + # For warning only outputs + # Should only ever be 1 output as an error or a warning of the following types + # Gene symbol as reference sequence + # Gene as transcript reference sequence + if set_output_type_flag == 'warning': + validation_output['flag'] = 'warning' + validation_error_counter = 0 + validation_warning_counter = 0 + if len(batch_out) == 0: + validation_output['flag'] = 'empty_result' + for valid_v in batch_out: + if valid_v['validation_warnings'] == ['Validation error']: + validation_error_counter = validation_error_counter + 1 + identification_key = 'validation_error_%s' % (str(validation_error_counter)) + else: + validation_warning_counter = validation_warning_counter + 1 + identification_key = 'validation_warning_%s' % (str(validation_warning_counter)) + validation_output[identification_key] = valid_v + + # Intergenic variants + validation_intergenic_counter = 0 + if set_output_type_flag == 'intergenic': + validation_output['flag'] = 'intergenic' + for valid_v in batch_out: + validation_intergenic_counter = validation_intergenic_counter + 1 + identification_key = 'Intergenic_Variant_%s' % (str(validation_intergenic_counter)) + + # Attempt to liftover between genome builds + # Note: pyliftover uses the UCSC liftOver tool. + # https://pypi.org/project/pyliftover/ + genomic_position_info = valid_v['primary_assembly_loci'] + for g_p_key in genomic_position_info.keys(): + + # Identify the current build and hgvs_genomic descripsion + if re.match('hg', g_p_key): + # incoming_vcf = genomic_position_info[g_p_key]['vcf'] + # set builds + if g_p_key == 'hg38': + build_to = 'hg19' + build_from = 'hg38' + if g_p_key == 'hg19': + build_to = 'hg38' + build_from = 'hg19' + elif re.match('grc', g_p_key): + # incoming_vcf = genomic_position_info[g_p_key]['vcf'] + # set builds + if g_p_key == 'grch38': + build_to = 'GRCh37' + build_from = 'GRCh38' + if g_p_key == 'grch37': + build_to = 'GRCh38' + build_from = 'GRCh37' + + # Liftover + lifted_response = lift_over(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, build_to, hn, vm, vr, self.hdp, hp, reverse_normalizer, sf, evm) + + # Sort the respomse into primary assembly and ALT + primary_assembly_loci = {} + alt_genomic_loci = [] + for build_key, accession_dict in lifted_response.iteritems(): + try: + accession_key = accession_dict.keys()[0] + if re.match('NC_', accession_dict[accession_key]['hgvs_genomic_description']): + primary_assembly_loci[build_key.lower()] = accession_dict[accession_key] + else: + alt_genomic_loci.append({build_key.lower(): accession_dict[accession_key]}) + + # KeyError if the dicts are empty + except KeyError: + continue - # Add the dictionaries from lifted response to the output - if primary_assembly_loci != {}: - valid_v['primary_assembly_loci'] = primary_assembly_loci - if alt_genomic_loci != []: - valid_v['alt_genomic_loci'] = alt_genomic_loci - - # Finalise the output dictionary - validation_output[identification_key] = valid_v - - # Add error strings to validation output - # ''' - metadata = {} - logger.info("Variant successfully validated") - logs = [] - logString = logger.getString() - for l in logger.getString().split("\n"): - logs.append(l) - metadata["logs"] = logString - metadata["variant"] = batch_variant - metadata["assembly"] = selected_assembly - metadata["transcripts"] = select_transcripts - metadata['seqrepo_directory'] = HGVS_SEQREPO_DIR - metadata['uta_url'] = UTA_DB_URL - metadata['py_liftover_directory'] = PYLIFTOVER_DIR - metadata['variantvalidator_data_url'] = VALIDATOR_DB_URL - metadata['entrez_id'] = ENTREZ_ID - metadata['variantvalidator_version'] = VERSION - metadata['variantvalidator_hgvs_version'] = hgvs_version - metadata['uta_schema'] = str(hdp.data_version()) - metadata['seqrepo_db'] = HGVS_SEQREPO_DIR.split('/')[-1] - validation_output["metadata"] = metadata - # ''' - # Measure time elapsed - time_now = time.time() - elapsed_time = time_now - start_time - logger.debug('validation time = ' + str(elapsed_time)) - - # return batch_out - return validation_output - - # Bug catcher - except KeyboardInterrupt: - raise - except BaseException as e: - # Debug mode - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - # tr = ''.join(traceback.format_stack()) - tbk = [str(exc_type), str(exc_value), str(te)] - er = '\n'.join(tbk) - # raise variantValidatorError('Validation error') - # Return - # return - logger.critical(str(exc_type) + " " + str(exc_value)) - logger.debug(str(er)) + # Add the dictionaries from lifted response to the output + if primary_assembly_loci != {}: + valid_v['primary_assembly_loci'] = primary_assembly_loci + if alt_genomic_loci != []: + valid_v['alt_genomic_loci'] = alt_genomic_loci + + # Finalise the output dictionary + validation_output[identification_key] = valid_v + + # Add error strings to validation output + # ''' + metadata = {} + logger.info("Variant successfully validated") + logs = [] + logString = logger.getString() + for l in logger.getString().split("\n"): + logs.append(l) + metadata["logs"] = logString + metadata["variant"] = batch_variant + metadata["assembly"] = selected_assembly + metadata["transcripts"] = select_transcripts + metadata['seqrepo_directory'] = self.seqrepoPath + metadata['uta_url'] = self.utaPath + metadata['py_liftover_directory'] = self.liftoverPath + metadata['variantvalidator_data_url'] = self.db.path + metadata['entrez_id'] = self.entrezID + metadata['variantvalidator_version'] = self.version + metadata['variantvalidator_hgvs_version'] = self.hgvsVersion + metadata['uta_schema'] = self.utaSchema + metadata['seqrepo_db'] = self.seqrepoVersion + validation_output["metadata"] = metadata + # ''' + # Measure time elapsed + time_now = time.time() + elapsed_time = time_now - start_time + logger.debug('validation time = ' + str(elapsed_time)) + + # return batch_out + return validation_output + + # Bug catcher + except KeyboardInterrupt: + raise + except BaseException as e: + # Debug mode + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + # tr = ''.join(traceback.format_stack()) + tbk = [str(exc_type), str(exc_value), str(te)] + er = '\n'.join(tbk) + # raise variantValidatorError('Validation error') + # Return + # return + logger.critical(str(exc_type) + " " + str(exc_value)) + logger.debug(str(er)) diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index bfb31c61..6de9b662 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -1,10 +1,17 @@ from Bio import Entrez,SeqIO +from Bio.Seq import Seq +from Bio.Alphabet import IUPAC import httplib2 as http import json from urlparse import urlparse #Python 2 import functools +import traceback +import sys +from vvLogging import logger import re import copy + + #from urllib.parse import urlparse #Python 3 def handleCursor(func): @@ -81,11 +88,373 @@ def valstr(hgvs_variant): format protein description into single letter aa code """ def single_letter_protein(hgvs_protein): - hgvs_protein_slc = hgvs_protein.format({'p_3_letter': False}) - return hgvs_protein_slc + return hgvs_protein.format({'p_3_letter': False}) """ format nucleotide descriptions to not display reference base """ def remove_reference(hgvs_nucleotide): hgvs_nucleotide_refless = hgvs_nucleotide.format({'max_ref_length': 0}) return hgvs_nucleotide_refless + +def exceptPass(validation=None): + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + tbk = [str(exc_type), str(exc_value), str(te)] + er = str('\n'.join(tbk)) + if last_traceback: + logger.warning( + "Except pass for " + str(exc_type) + " " + str(exc_value) + " at line " + str(last_traceback.tb_lineno)) + else: + logger.warning("Except pass for " + str(exc_type) + " " + str(exc_value)) + logger.debug(er) + +# From functions.py +""" +user_input +collect the input from the form and convert to a hgvs readable string + Removes brackets and contained information -if given + Identifies variant type (p. c. etc) + Returns a dictionary containing a formated input string which is optimal for hgvs + parsing and the variant type + Accepts c, g, n, r currently. And now P also 15.07.15 +""" +def user_input(input): + raw_variant = input.strip() + + # Set regular expressions for if statements + pat_g = re.compile("\:g\.") # Pattern looks for :g. + pat_gene = re.compile('\(.+?\)') # Pattern looks for (....) + pat_c = re.compile("\:c\.") # Pattern looks for :c. + pat_r = re.compile("\:r\.") # Pattern looks for :r. + pat_n = re.compile("\:n\.") # Pattern looks for :n. + pat_p = re.compile("\:p\.") # Pattern looks for :p. + pat_m = re.compile("\:m\.") # Pattern looks for :m. + pat_est = re.compile("\d\:\d") # Pattern looks for number:number + + # If statements + if pat_g.search(raw_variant): # If the :g. pattern is present in the raw_variant, g_in is linked to the raw_variant + if pat_gene.search(raw_variant): # If pat gene is present in the raw_variant + variant = pat_gene.sub('', + raw_variant) # variant is set to the raw_variant string with the pattern (...) substituted out + formated = {'variant': variant, 'type': ':g.'} + return formated + else: + variant = raw_variant # Otherwise it is set to raw_variant + formated = {'variant': variant, 'type': ':g.'} + return formated + + elif pat_r.search(raw_variant): + if pat_gene.search(raw_variant): + variant = pat_gene.sub('', raw_variant) + formated = {'variant': variant, 'type': ':r.'} + return formated + else: + variant = raw_variant + formated = {'variant': variant, 'type': ':r.'} + return formated + + elif pat_n.search(raw_variant): + if pat_gene.search(raw_variant): + variant = pat_gene.sub('', raw_variant) + formated = {'variant': variant, 'type': ':n.'} + return formated + else: + variant = raw_variant + formated = {'variant': variant, 'type': ':n.'} + return formated + + elif pat_c.search(raw_variant): + if pat_gene.search(raw_variant): + variant = pat_gene.sub('', raw_variant) + formated = {'variant': variant, 'type': ':c.'} + return formated + else: + variant = raw_variant + formated = {'variant': variant, 'type': ':c.'} + return formated + + elif pat_p.search(raw_variant): + variant = raw_variant + formated = {'variant': variant, 'type': ':p.'} + return formated + + elif pat_m.search(raw_variant): + variant = raw_variant + formated = {'variant': variant, 'type': ':m.'} + return formated + elif pat_est.search(raw_variant): + variant = raw_variant + formated = {'variant': variant, 'type': 'est'} + return formated + else: + formatted = 'invalid' + return formatted + +# From links.py +""" +Function which predicts the protein effect of c. inversions +""" + +def pro_inv_info(prot_ref_seq, prot_var_seq): + info = { + 'variant': 'true', + 'prot_del_seq': '', + 'prot_ins_seq': '', + 'edit_start': 0, + 'edit_end': 0, + 'terminate': 'false', + 'ter_pos': 0, + 'error': 'false' + } + + # Is there actually any variation? + if prot_ref_seq == prot_var_seq: + info['variant'] = 'false' + else: + # Deal with terminations + term = re.compile("\*") + if term.search(prot_var_seq): + # Set the termination reporter to true + info['terminate'] = 'true' + # The termination position will be equal to the length of the variant sequence because it's a TERMINATOR!!! + info['ter_pos'] = len(prot_var_seq) + # cut the ref sequence to == size + prot_ref_seq = prot_ref_seq[0:info['ter_pos']] + prot_var_seq = prot_var_seq[0:info['ter_pos']] + + # Whether terminated or not, the sequences should now be the same length + # Unless the termination codon has been disrupted + if len(prot_var_seq) < len(prot_ref_seq): + info['error'] = 'true' + return info + else: + # Set the counter + aa_counter = 0 + + # Make list copies of the sequences to gather the required info + ref = list(prot_ref_seq) + var = list(prot_var_seq) + + # Loop through ref list to find the first missmatch position + for aa in ref: + if ref[aa_counter] == var[aa_counter]: + aa_counter = aa_counter + 1 + else: + break + + # Enter the start position + info['edit_start'] = aa_counter + 1 + # Remove those elements form the list + del ref[0:aa_counter] + del var[0:aa_counter] + + # the sequences should now be the same length + # Except if the termination codon was removed + if len(ref) > len(var): + info['error'] = 'true' + return info + else: + # Reset the aa_counter but to go backwards + aa_counter = 0 + # reverse the lists + ref = ref[::-1] + var = var[::-1] + # Reverse loop through ref list to find the first missmatch position + for aa in ref: + if var[aa_counter] == '\*': + break + if aa == var[aa_counter]: + aa_counter = aa_counter + 1 + else: + break + # Remove those elements form the list + del ref[0:aa_counter] + del var[0:aa_counter] + # re-reverse the lists + ref = ref[::-1] + var = var[::-1] + + # If the var is > ref, the ter has been removed, need to re-add ter to each + if len(ref) < len(var): + ref.append('*') + if prot_var_seq[-1] == '*': + var.append('*') + # the sequences should now be the same length + # Except if the ter was removed + if len(ref) > len(var): + info['error'] = 'true' + return info + else: + # Enter the sequences + info['prot_del_seq'] = ''.join(ref) + info['prot_ins_seq'] = ''.join(var) + info['edit_end'] = info['edit_start'] + len(ref) - 1 + return info + +def pro_delins_info(prot_ref_seq, prot_var_seq): + info = { + 'variant' : 'true', + 'prot_del_seq' : '', + 'prot_ins_seq' : '', + 'edit_start' : 0, + 'edit_end' : 0, + 'terminate' : 'false', + 'ter_pos' : 0, + 'error' : 'false' + } + + # Is there actually any variation? + if prot_ref_seq == prot_var_seq: + info['variant'] = 'false' + else: + # Deal with terminations + term = re.compile("\*") + if term.search(prot_var_seq): + # Set the termination reporter to true + info['terminate'] = 'true' + # The termination position will be equal to the length of the variant sequence because it's a TERMINATOR!!! + info['ter_pos'] = len(prot_var_seq) + # cut the ref sequence to == size + prot_ref_seq = prot_ref_seq[0:info['ter_pos']] + prot_var_seq = prot_var_seq[0:info['ter_pos']] + + # Whether terminated or not, the sequences should now be the same length + # Unless the termination codon has been disrupted + if len(prot_var_seq) < len(prot_ref_seq): + info['error'] = 'true' + return info + else: + # Set the counter + aa_counter = 0 + + # Make list copies of the sequences to gather the required info + ref = list(prot_ref_seq) + var = list(prot_var_seq) + + # Loop through ref list to find the first missmatch position + for aa in ref: + if ref[aa_counter] == var[aa_counter]: + aa_counter = aa_counter + 1 + else: + break + + # Enter the start position + info['edit_start'] = aa_counter + 1 + # Remove those elements form the list + del ref[0:aa_counter] + del var[0:aa_counter] + + # the sequences should now be the same length + # Except if the termination codon was removed + if len(ref) > len(var): + info['error'] = 'true' + return info + else: + # Reset the aa_counter but to go backwards + aa_counter = 0 + # reverse the lists + ref = ref[::-1] + var = var[::-1] + # Reverse loop through ref list to find the first missmatch position + for aa in ref: + if var[aa_counter] == '\*': + break + if aa == var[aa_counter]: + aa_counter = aa_counter + 1 + else: + break + # Remove those elements form the list + del ref[0:aa_counter] + del var[0:aa_counter] + # re-reverse the lists + ref = ref[::-1] + var = var[::-1] + + # If the var is > ref, the ter has been removed, need to re-add ter to each +# if len(ref) < len(var): +# ref.append('*') +# if prot_var_seq[-1] == '*': +# var.append('*') + + # the sequences should now be the same length + # Except if the ter was removed + if len(ref) > len(var): + info['error'] = 'true' + return info + else: + # Enter the sequences + info['prot_del_seq'] = ''.join(ref) + info['prot_ins_seq'] = ''.join(var) + info['edit_end'] = info['edit_start'] + len(ref) -1 + return info + +""" +Translate c. reference sequences, including those that have been modified +must have the CDS in the specified position +""" +def translate(ed_seq, cds_start): + # ed_seq = ed_seq.replace('\n', '') + ed_seq = ed_seq.strip() + # Ensure the starting codon is in the correct position + met = ed_seq[cds_start:cds_start + 3] + if (met == 'ATG') or (met == 'atg'): + # Remove the 5 prime UTR + sequence = ed_seq[cds_start:] + coding_dna = Seq(str(sequence), IUPAC.unambiguous_dna) + # Translate + trans = coding_dna.translate() + aain = list(trans) + aaout = [] + count = 0 + while aain: + if aain[count] != '*': + aaout.append(aain[count]) + count = count + 1 + else: + aaout.append(aain[count]) + break + translation = ''.join(aaout) + # Apply a width of 60 characters to the string output + # translation = textwrap.fill(translation, width=60) + return translation + else: + translation = 'error' + return translation + +""" +Convert single letter amino acid code to 3 letter code +""" +def one_to_three(seq): + aacode = { + 'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu', + 'F': 'Phe', 'G': 'Gly', 'H': 'His', 'I': 'Ile', + 'K': 'Lys', 'L': 'Leu', 'M': 'Met', 'N': 'Asn', + 'P': 'Pro', 'Q': 'Gln', 'R': 'Arg', 'S': 'Ser', + 'T': 'Thr', 'V': 'Val', 'W': 'Trp', 'Y': 'Tyr', + '*': 'Ter'} + + oned = list(seq) + out = [] + for aa in oned: + get_value = aacode.get(aa) + out.append(get_value) + + threed_up = ''.join(out) + + return threed_up + + +""" +Takes a reference sequence and inverts the specified position +""" +# n. Inversions - This comes from VariantValidator, not validation!!!! +def n_inversion(ref_seq, del_seq, inv_seq, interval_start, interval_end): + sequence = '' + # Use string indexing to check whether the sequences are the same + test = ref_seq[interval_start - 1:interval_end] + if test == del_seq: + sequence = ref_seq[0:interval_start - 1] + inv_seq + ref_seq[interval_end:] + return sequence + else: + sequence = 'error' + return sequence diff --git a/VariantValidator/modules/vvHGVS.py b/VariantValidator/modules/vvHGVS.py new file mode 100644 index 00000000..3fbc0373 --- /dev/null +++ b/VariantValidator/modules/vvHGVS.py @@ -0,0 +1,831 @@ +""" +A variety of functions that convert parder hgvs objects into VCF component parts +Each function has a slightly difference emphasis +1. hgvs2vcf +Simple conversionwhich ensures identity is as 5 prime as possible by adding an extra 5 +prime base. Necessary for most gap handling situations +2. report_hgvs2vcf +Used to report the Most true representation of the VCF i.e. 5 prime normalized but no +additional bases added. NOTE: no gap handling capabilities +3. pos_lock_hgvs2vcf +No normalization at all. No additional bases added. Simply returns an in-situ VCF +4. hard_right_hgvs2vcf and hard_left_hgvs2vcf +Designed specifically for gap handling. +hard left pushes as 5 prime as possible and adds additional bases +hard right pushes as 3 prime as possible and adds additional bases +""" + +# Import modules +import re +import copy +import vvChromosomes as supportedChromosomeBuilds + +# Import Biopython modules +from Bio.Seq import Seq + + +# Database connections and hgvs objects are now passed from VariantValidator.py + +def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): + hgvs_genomic_variant = hgvs_genomic + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) + # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + + # Chr + chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) + if chr is not None: + pass + else: + chr = reverse_normalized_hgvs_genomic.ac + + if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): + pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) + ref = reverse_normalized_hgvs_genomic.posedit.edit.ref + alt = reverse_normalized_hgvs_genomic.posedit.edit.ref + + # Insertions + elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( + reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + alt_start = start - 1 # + # Recover sequences + ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), alt_start, end - 1) + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + # Assemble + pos = start + ref = ref_seq + alt = ref_seq + ins_seq + + # Substitutions + elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): + ref = reverse_normalized_hgvs_genomic.posedit.edit.ref + alt = reverse_normalized_hgvs_genomic.posedit.edit.alt + pos = str(reverse_normalized_hgvs_genomic.posedit.pos) + + # Deletions + elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( + reverse_normalized_hgvs_genomic.posedit)): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 2 + start = start - 1 + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) + # Assemble + pos = str(start) + ref = pre_base + hgvs_del_seq + alt = pre_base + + + # inv + elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 1 + start = start + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) + # Assemble + pos = str(start) + ref = vcf_del_seq + alt = ins_seq + if re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + my_seq = Seq(vcf_del_seq) + # alt = bs + str(my_seq.reverse_complement()) + alt = str(my_seq.reverse_complement()) + + + # Delins + elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( + reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) + adj_start = start - 1 + start = start + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + # Assemble + pos = str(start) + ref = vcf_del_seq + alt = vcf_del_seq[:1] + ins_seq + + # Duplications + elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 2 # + start = start - 1 # + # Recover sequences + dup_seq = reverse_normalized_hgvs_genomic.posedit.edit.ref + vcf_ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + # Assemble + pos = str(start) + ref = vcf_ref_seq + alt = vcf_ref_seq + dup_seq + else: + chr = '' + ref = '' + alt = '' + pos = '' + + # ensure as 5' as possible + if chr != '' and pos != '' and ref != '' and alt != '': + if len(ref) > 1: + rsb = list(str(ref)) + if reverse_normalized_hgvs_genomic.posedit.edit.type == 'identity': + pos = int(pos) - 1 + prev = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), pos - 1, pos) + pos = str(pos) + ref = prev + ref + alt = prev + alt + + # Dictionary the VCF + vcf_dict = {'chr': chr, 'pos': pos, 'ref': ref, 'alt': alt, 'normalized_hgvs': reverse_normalized_hgvs_genomic} + return vcf_dict + + +def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): + hgvs_genomic_variant = hgvs_genomic + + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) + # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + + # Sort the primary assemblies + if re.match('GRC', primary_assembly): + if re.search('37', primary_assembly): + ucsc_pa = 'hg19' + grc_pa = primary_assembly + if re.search('38', primary_assembly): + ucsc_pa = 'hg38' + grc_pa = primary_assembly + else: + if re.search('19', primary_assembly): + ucsc_pa = primary_assembly + grc_pa = 'GRCh37' + if re.search('38', primary_assembly): + ucsc_pa = primary_assembly + grc_pa = 'GRCh38' + + # UCSC Chr + ucsc_chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, ucsc_pa) + if ucsc_chr is not None: + pass + else: + ucsc_chr = reverse_normalized_hgvs_genomic.ac + + # GRC Chr + grc_chr = supportedChromosomeBuilds.to_chr_num_refseq(reverse_normalized_hgvs_genomic.ac, grc_pa) + if grc_chr is not None: + pass + else: + grc_chr = reverse_normalized_hgvs_genomic.ac + + if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): + pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) + ref = reverse_normalized_hgvs_genomic.posedit.edit.ref + alt = reverse_normalized_hgvs_genomic.posedit.edit.ref + + # Insertions + elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( + reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + alt_start = start - 1 # + # Recover sequences + ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), alt_start, end - 1) + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + # Assemble + pos = start + ref = ref_seq + alt = ref_seq + ins_seq + + # Substitutions + elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): + ref = reverse_normalized_hgvs_genomic.posedit.edit.ref + alt = reverse_normalized_hgvs_genomic.posedit.edit.alt + pos = str(reverse_normalized_hgvs_genomic.posedit.pos) + + # Deletions + elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( + reverse_normalized_hgvs_genomic.posedit)): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 2 + start = start - 1 + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) + # Assemble + pos = str(start) + ref = pre_base + hgvs_del_seq + alt = pre_base + + + # inv + elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 1 + start = start + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) + # Assemble + pos = str(start) + # pos = str(start-1) + # ref = bs + vcf_del_seq + ref = vcf_del_seq + alt = ins_seq + if re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + my_seq = Seq(vcf_del_seq) + # alt = bs + str(my_seq.reverse_complement()) + alt = str(my_seq.reverse_complement()) + + # Delins + elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( + reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) + adj_start = start - 1 + start = start + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + # Assemble + # pos = str(start) + # ref = vcf_del_seq + # alt = vcf_del_seq[:1] + ins_seq + pos = str(start + 1) + ref = vcf_del_seq[1:] + alt = ins_seq + + # Duplications + elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 2 # + start = start - 1 # + # Recover sequences + dup_seq = reverse_normalized_hgvs_genomic.posedit.edit.ref + vcf_ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + # Assemble + pos = str(start + 1) + ref = vcf_ref_seq[1:] + alt = vcf_ref_seq[1:] + dup_seq + else: + chr = '' + ref = '' + alt = '' + pos = '' + + # Dictionary the VCF + vcf_dict = {'pos': str(pos), 'ref': ref, 'alt': alt, 'ucsc_chr': ucsc_chr, 'grc_chr': grc_chr, + 'normalized_hgvs': reverse_normalized_hgvs_genomic} + return vcf_dict + + +def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): + # Replace reference manually + if hgvs_genomic.posedit.edit.ref == '': + hgvs_genomic.posedit.edit.ref = sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, + hgvs_genomic.posedit.pos.end.base) + + reverse_normalized_hgvs_genomic = hgvs_genomic + if reverse_normalized_hgvs_genomic.posedit.edit.type == 'identity' and len( + reverse_normalized_hgvs_genomic.posedit.edit.ref) == 0: + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(reverse_normalized_hgvs_genomic) + + # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + + # Chr + chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) + if chr is not None: + pass + else: + chr = reverse_normalized_hgvs_genomic.ac + + if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): + pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) + ref = reverse_normalized_hgvs_genomic.posedit.edit.ref + alt = reverse_normalized_hgvs_genomic.posedit.edit.ref + + # Insertions + elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( + reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + alt_start = start - 1 # + # Recover sequences + ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), alt_start, end - 1) + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + # Assemble + pos = start + ref = ref_seq + alt = ref_seq + ins_seq + + # Substitutions + elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): + ref = reverse_normalized_hgvs_genomic.posedit.edit.ref + alt = reverse_normalized_hgvs_genomic.posedit.edit.alt + pos = str(reverse_normalized_hgvs_genomic.posedit.pos) + + # Deletions + elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( + reverse_normalized_hgvs_genomic.posedit)): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 2 + start = start - 1 + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) + # Assemble + pos = str(start) + ref = pre_base + hgvs_del_seq + alt = pre_base + + + # inv + elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 1 + start = start + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) + # Assemble + pos = str(start) + # pos = str(start-1) + # ref = bs + vcf_del_seq + ref = vcf_del_seq + alt = ins_seq + if re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + my_seq = Seq(vcf_del_seq) + # alt = bs + str(my_seq.reverse_complement()) + alt = str(my_seq.reverse_complement()) + + # Delins + elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( + reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) + adj_start = start - 1 + start = start + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + # Assemble + pos = str(start) + ref = vcf_del_seq + alt = vcf_del_seq[:1] + ins_seq + + + # Duplications + elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 2 # + start = start - 1 # + # Recover sequences + dup_seq = reverse_normalized_hgvs_genomic.posedit.edit.ref + vcf_ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + # Assemble + pos = str(start) + ref = vcf_ref_seq + alt = vcf_ref_seq + dup_seq + else: + chr = '' + ref = '' + alt = '' + pos = '' + + vcf_dict = {'chr': chr, 'pos': pos, 'ref': ref, 'alt': alt, 'normalized_hgvs': reverse_normalized_hgvs_genomic} + return vcf_dict + + +def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): + hgvs_genomic_variant = hgvs_genomic + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + normalized_hgvs_genomic = hn.normalize(hgvs_genomic_variant) + + # Chr + chr = supportedChromosomeBuilds.to_chr_num_ucsc(normalized_hgvs_genomic.ac, primary_assembly) + if chr is not None: + pass + else: + chr = normalized_hgvs_genomic.ac + + if re.search('[GATC]+\=', str(normalized_hgvs_genomic.posedit)): + pos = str(normalized_hgvs_genomic.posedit.pos.start) + ref = normalized_hgvs_genomic.posedit.edit.ref + alt = normalized_hgvs_genomic.posedit.edit.ref + + # Insertions + elif (re.search('ins', str(normalized_hgvs_genomic.posedit)) and not re.search('del', str( + normalized_hgvs_genomic.posedit))): + end = int(normalized_hgvs_genomic.posedit.pos.end.base) + start = int(normalized_hgvs_genomic.posedit.pos.start.base) + alt_start = start - 1 # + # Recover sequences + ref_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), alt_start, end - 1) + ins_seq = normalized_hgvs_genomic.posedit.edit.alt + # Assemble + pos = start + ref = ref_seq + alt = ref_seq + ins_seq + + # Substitutions + elif re.search('>', str(normalized_hgvs_genomic.posedit)): + ref = normalized_hgvs_genomic.posedit.edit.ref + alt = normalized_hgvs_genomic.posedit.edit.alt + pos = str(normalized_hgvs_genomic.posedit.pos) + + # Deletions + elif re.search('del', str(normalized_hgvs_genomic.posedit)) and not re.search('ins', + str(normalized_hgvs_genomic.posedit)): + end = int(normalized_hgvs_genomic.posedit.pos.end.base) + start = int(normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 2 + start = start - 1 + try: + ins_seq = normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), start, end) + pre_base = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start, start) + # Assemble + pos = str(start) + ref = pre_base + hgvs_del_seq + alt = pre_base + + # inv + elif re.search('inv', str(normalized_hgvs_genomic.posedit)): + end = int(normalized_hgvs_genomic.posedit.pos.end.base) + start = int(normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 1 + start = start + try: + ins_seq = normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), start, end) + vcf_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start, end) + bs = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start - 1, adj_start) + # Assemble + pos = str(start) + # pos = str(start-1) + # ref = bs + vcf_del_seq + ref = vcf_del_seq + alt = ins_seq + if re.search('inv', str(normalized_hgvs_genomic.posedit)): + my_seq = Seq(vcf_del_seq) + # alt = bs + str(my_seq.reverse_complement()) + alt = str(my_seq.reverse_complement()) + + # Delins + elif (re.search('del', str(normalized_hgvs_genomic.posedit)) and re.search('ins', + str(normalized_hgvs_genomic.posedit))): + end = int(normalized_hgvs_genomic.posedit.pos.end.base) + start = int(normalized_hgvs_genomic.posedit.pos.start.base - 1) + adj_start = start - 1 + start = start + try: + ins_seq = normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), start, end) + vcf_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start, end) + # Assemble + pos = str(start) + ref = vcf_del_seq + alt = vcf_del_seq[:1] + ins_seq + + + # Duplications + elif (re.search('dup', str(normalized_hgvs_genomic.posedit))): + end = int(normalized_hgvs_genomic.posedit.pos.end.base) # + start = int(normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 2 # + start = start - 1 # + # Recover sequences + dup_seq = normalized_hgvs_genomic.posedit.edit.ref + vcf_ref_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start, end) + # Assemble + pos = str(start) + ref = vcf_ref_seq + alt = vcf_ref_seq + dup_seq + else: + chr = '' + ref = '' + alt = '' + pos = '' + + # ADD SURROUNDING BASES + if chr != '' and pos != '' and ref != '' and alt != '': + # Add 2 post bases + pos = int(pos) + pre_end_pos = pos + len(ref) + end_pos = pre_end_pos + 1 + post = sf.fetch_seq(str(normalized_hgvs_genomic.ac), pre_end_pos - 1, end_pos) + ref = ref + post + alt = alt + post + + # Dictionary the VCF + vcf_dict = {'chr': chr, 'pos': pos, 'ref': ref, 'alt': alt, 'normalized_hgvs': normalized_hgvs_genomic} + return vcf_dict + + +def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): + hgvs_genomic_variant = hgvs_genomic + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + + # Chr + chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) + if chr is not None: + pass + else: + chr = reverse_normalized_hgvs_genomic.ac + + if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): + pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) + ref = reverse_normalized_hgvs_genomic.posedit.edit.ref + alt = reverse_normalized_hgvs_genomic.posedit.edit.ref + + # Insertions + elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( + reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + alt_start = start - 1 # + # Recover sequences + ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), alt_start, end - 1) + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + # Assemble + pos = start + ref = ref_seq + alt = ref_seq + ins_seq + + # Substitutions + elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): + ref = reverse_normalized_hgvs_genomic.posedit.edit.ref + alt = reverse_normalized_hgvs_genomic.posedit.edit.alt + pos = str(reverse_normalized_hgvs_genomic.posedit.pos) + + # Deletions + elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( + reverse_normalized_hgvs_genomic.posedit)): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 2 + start = start - 1 + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) + # Assemble + pos = str(start) + ref = pre_base + hgvs_del_seq + alt = pre_base + + + # inv + elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 1 + start = start + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) + # Assemble + pos = str(start) + # pos = str(start-1) + # ref = bs + vcf_del_seq + ref = vcf_del_seq + alt = ins_seq + if re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + my_seq = Seq(vcf_del_seq) + # alt = bs + str(my_seq.reverse_complement()) + alt = str(my_seq.reverse_complement()) + + # Delins + elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( + reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) + adj_start = start - 1 + start = start + try: + ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt + except: + ins_seq = '' + else: + if str(ins_seq) == 'None': + ins_seq = '' + # Recover sequences + hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) + vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + # Assemble + pos = str(start) + ref = vcf_del_seq + alt = vcf_del_seq[:1] + ins_seq + + + # Duplications + elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): + end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # + start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) + adj_start = start - 2 # + start = start - 1 # + # Recover sequences + dup_seq = reverse_normalized_hgvs_genomic.posedit.edit.ref + vcf_ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) + # Assemble + pos = str(start) + ref = vcf_ref_seq + alt = vcf_ref_seq + dup_seq + else: + chr = '' + ref = '' + alt = '' + pos = '' + + # ADD SURROUNDING BASES + if chr != '' and pos != '' and ref != '' and alt != '': + pre_pos = int(pos) - 1 + pre_pos + prev = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), pre_pos - 1, pre_pos) + pos = str(pre_pos) + ref = prev + ref + alt = prev + alt + + # Dictionary the VCF + vcf_dict = {'chr': chr, 'pos': pos, 'ref': ref, 'alt': alt, 'normalized_hgvs': reverse_normalized_hgvs_genomic} + return vcf_dict + + +def hgvs_ref_alt(hgvs_variant, sf): + if re.search('[GATC]+\=', str(hgvs_variant.posedit)): + ref = hgvs_variant.posedit.edit.ref + alt = hgvs_variant.posedit.edit.ref + + # Insertions + elif (re.search('ins', str(hgvs_variant.posedit)) and not re.search('del', str(hgvs_variant.posedit))): + end = int(hgvs_variant.posedit.pos.end.base) + start = int(hgvs_variant.posedit.pos.start.base) + alt_start = start - 1 # + # Recover sequences + ref_seq = sf.fetch_seq(str(hgvs_variant.ac), alt_start, end) + ins_seq = hgvs_variant.posedit.edit.alt + # Assemble + ref = ref_seq + alt = ref_seq[:1] + ins_seq + ref_seq[-1:] + + # Substitutions + elif re.search('>', str(hgvs_variant.posedit)): + ref = hgvs_variant.posedit.edit.ref + alt = hgvs_variant.posedit.edit.alt + + # Deletions + elif re.search('del', str(hgvs_variant.posedit)) and not re.search('ins', str(hgvs_variant.posedit)): + ref = hgvs_variant.posedit.edit.ref + alt = '' + + # inv + elif re.search('inv', str(hgvs_variant.posedit)): + ref = hgvs_variant.posedit + my_seq = Seq(ref) + alt = str(my_seq.reverse_complement()) + + # Delins + elif (re.search('del', str(hgvs_variant.posedit)) and re.search('ins', str(hgvs_variant.posedit))): + ref = hgvs_variant.posedit.edit.ref + alt = hgvs_variant.posedit.edit.alt + + # Duplications + elif (re.search('dup', str(hgvs_variant.posedit))): + ref = hgvs_variant.posedit.edit.ref + alt = hgvs_variant.posedit.edit.ref + hgvs_variant.posedit.edit.ref + else: + ref = '' + alt = '' + + ref_alt_dict = {'ref': ref, 'alt': alt} + return ref_alt_dict + +# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# \ No newline at end of file diff --git a/VariantValidator/modules/vvObjects.py b/VariantValidator/modules/vvObjects.py index a54ebe1f..bd11b422 100644 --- a/VariantValidator/modules/vvObjects.py +++ b/VariantValidator/modules/vvObjects.py @@ -13,11 +13,15 @@ import hgvs.posedit import hgvs.edit import hgvs.normalizer +from Bio.Seq import Seq + import re +import copy #import io from vvDatabase import vvDatabase from vvLogging import logger import vvCore +import vvFunctions as fn # Custom Exceptions class variantValidatorError(Exception): @@ -55,8 +59,9 @@ class variantValidatorError(Exception): -class Validator(): - # This object contains configuration options. +class Validator(vvCore.mixin): + # This object contains configuration options for the validator, but it inherits the mixin + # class in vvCore that contains the enormous validator function. def __init__(self): # First load from the configuration file, if it exists. configName="config.ini" @@ -119,7 +124,7 @@ def __init__(self): hgvs.global_config.formatting.max_ref_length = 1000000 # Create HGVS objects self.hdp = hgvs.dataproviders.uta.connect(pooling=True) - self.hp = hgvs.parser.Parser() #P arser + self.hp = hgvs.parser.Parser() #Parser self.vr = hgvs.validator.Validator(self.hdp) # Validator self.vm = hgvs.variantmapper.VariantMapper(self.hdp) # Variant mapper # Create a lose vm instance @@ -127,18 +132,56 @@ def __init__(self): replace_reference=True, prevalidation_level=None ) - self.nr_vm = hgvs.variantmapper.VariantMapper(self.hdp, replace_reference=False) + self.nr_vm = hgvs.variantmapper.VariantMapper(self.hdp, replace_reference=False) #No reverse variant mapper self.sf = hgvs.dataproviders.seqfetcher.SeqFetcher() # Seqfetcher # Set standard genome builds self.genome_builds = ['GRCh37', 'hg19', 'GRCh38'] self.uta_schema = str(self.hdp.data_version()) - #Transfer function handle from other file. - self.validate=vvCore.validate + # Create normalizer + self.hn = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='splign' + ) + self.reverse_hn = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=5, + alt_aln_method='splign' + ) + + # Create normalizer + self.merge_normalizer = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='splign', + validate=False + ) + self.reverse_merge_normalizer = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='splign', + validate=False + ) + #create no_norm_evm + self.no_norm_evm_38 = hgvs.assemblymapper.AssemblyMapper(self.hdp, + assembly_name='GRCh38', + alt_aln_method='splign', + normalize=False, + replace_reference=True + ) + + self.no_norm_evm_37 = hgvs.assemblymapper.AssemblyMapper(self.hdp, + assembly_name='GRCh37', + alt_aln_method='splign', + normalize=False, + replace_reference=True + ) - def validate(self): - pass + + #def validate(self): # <-------------- this is imported from the mixin class in vvCore. + # pass def createConfig(self,outPath): # This function reads from the default configuration file stored in the same folder as this module, # and transfers it to outPath. @@ -152,7 +195,280 @@ def createConfig(self,outPath): with open(outPath, "w") as file: for l in lines: file.write(l) - + def protein(self,variant, evm, hp): + # Set regular expressions for if statements + pat_c = re.compile("\:c\.") # Pattern looks for :c. Note (gene) has been removed + + # If the :c. pattern is present in the input variant + if pat_c.search(variant): + # convert the input string into a hgvs object + var_c = hp.parse_hgvs_variant(variant) + # Does the edit affect the start codon? + if ((var_c.posedit.pos.start.base >= 1 and var_c.posedit.pos.start.base <= 3 and var_c.posedit.pos.start.offset == 0) or ( + var_c.posedit.pos.end.base >= 1 and var_c.posedit.pos.end.base <= 3 and var_c.posedit.pos.end.offset == 0)) and not re.search('\*', str( + var_c.posedit.pos)): + ass_prot = self.hdp.get_pro_ac_for_tx_ac(var_c.ac) + if str(ass_prot) == 'None': + cod = str(var_c) + cod = cod.replace('inv', 'del') + cod = hp.parse_hgvs_variant(cod) + p = evm.c_to_p(cod) + ass_prot = p.ac + var_p = hgvs.sequencevariant.SequenceVariant(ac=ass_prot, type='p', posedit='(Met1?)') + else: + var_p = evm.c_to_p(var_c) + return var_p + if re.search(':n.', variant): + var_p = hp.parse_hgvs_variant(variant) + var_p.ac = 'Non-coding transcript' + var_p.posedit = '' + return var_p + def myc_to_p(self,hgvs_transcript, evm, re_to_p): + # Create dictionary to store the information + hgvs_transcript_to_hgvs_protein = {'error': '', 'hgvs_protein': '', 'ref_residues': ''} + + # Collect the associated protein + if hgvs_transcript.type == 'c': + associated_protein_accession = self.hdp.get_pro_ac_for_tx_ac(hgvs_transcript.ac) + # This method sometimes fails + if str(associated_protein_accession) == 'None': + cod = str(hgvs_transcript) + cod = cod.replace('inv', 'del') + cod = self.hp.parse_hgvs_variant(cod) + p = evm.c_to_p(cod) + associated_protein_accession = p.ac + else: + pass + + # Check for non-coding transcripts + if hgvs_transcript.type == 'c': + # Handle non inversions with simple c_to_p mapping + + if (hgvs_transcript.posedit.edit.type != 'inv') and (hgvs_transcript.posedit.edit.type != 'delins') and (re_to_p is False): + # Does the edit affect the start codon? + if ((hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) or ( + hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ + and not re.search('\*', str( + hgvs_transcript.posedit.pos)): + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + type='p', posedit='(Met1?)') + else: + try: + hgvs_protein = evm.c_to_p(hgvs_transcript) + except IndexError as e: + error = str(e) + if re.search('string index out of range', error) and re.search('dup', str(hgvs_transcript)): + hgvs_ins = self.hp.parse_hgvs_variant(str(hgvs_transcript)) + hgvs_ins = self.hn.normalize(hgvs_ins) + inst = hgvs_ins.ac + ':c.' + str(hgvs_ins.posedit.pos.start.base - 1) + '_' + str(hgvs_ins.posedit.pos.start.base) + 'ins' + hgvs_ins.posedit.edit.ref + hgvs_transcript = self.hp.parse_hgvs_variant(inst) + hgvs_protein = evm.c_to_p(hgvs_transcript) + + try: + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + except UnboundLocalError: + hgvs_transcript_to_hgvs_protein = self.myc_to_p(hgvs_transcript, evm, re_to_p = True) + return hgvs_transcript_to_hgvs_protein + + else: + # Additional code required to process inversions + # Note, this code was developed for VariantValidator and is not native to the biocommons hgvs Python package + # Convert positions to n. position + hgvs_naughty = self.vm.c_to_n(hgvs_transcript) + + # Collect the deleted sequence using fetch_seq + del_seq = self.sf.fetch_seq(str(hgvs_naughty.ac), start_i=hgvs_naughty.posedit.pos.start.base - 1, end_i=hgvs_naughty.posedit.pos.end.base) + + # Make the inverted sequence + my_seq = Seq(del_seq) + + if hgvs_transcript.posedit.edit.type == 'inv': + inv_seq = my_seq.reverse_complement() + else: + inv_seq = hgvs_transcript.posedit.edit.alt + if inv_seq is None: + inv_seq = '' + + # Look for p. delins or del + not_delins = True + if hgvs_transcript.posedit.edit.type != 'inv': + try: + shifts = evm.c_to_p(hgvs_transcript) + if re.search('del', shifts.posedit.edit.type): + not_delins = False + except Exception: + not_delins = False + else: + not_delins = False + + # Use inv delins code? + if not_delins == False: + # Collect the associated protein + associated_protein_accession = self.hdp.get_pro_ac_for_tx_ac(hgvs_transcript.ac) + + # Intronic inversions are marked as uncertain i.e. p.? + if re.search('\d+\-', str(hgvs_transcript.posedit.pos)) or re.search('\d+\+', str(hgvs_transcript.posedit.pos)) or re.search('\*', str(hgvs_transcript.posedit.pos)) or re.search('[cn].\-', str(hgvs_transcript)): + if (( + hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) + or + (hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ + and not re.search('\*', str(hgvs_transcript.posedit.pos)): + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', + posedit='(Met1?)') + else: + # Make the variant + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit='?') + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + else: + # Need to obtain the cds_start + inf = self.hdp.get_tx_identity_info(hgvs_transcript.ac) + cds_start = inf[3] + + # Extract the reference coding sequence from SeqRepo + try: + ref_seq = self.sf.fetch_seq(str(hgvs_naughty.ac)) + except Exception as e: + error = str(e) + hgvs_transcript_to_hgvs_protein['error'] = error + return hgvs_transcript_to_hgvs_protein + + # Create the variant coding sequence + var_seq = fn.n_inversion(ref_seq, del_seq, inv_seq, + hgvs_naughty.posedit.pos.start.base, + hgvs_naughty.posedit.pos.end.base) + # Translate the reference and variant proteins + prot_ref_seq = fn.translate(ref_seq, cds_start) + + try: + prot_var_seq = fn.translate(var_seq, cds_start) + except IndexError: + hgvs_transcript_to_hgvs_protein['error'] = 'Cannot identify an in-frame Termination codon in the variant mRNA sequence' + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', + posedit='?') + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + + if prot_ref_seq == 'error': + error = 'Unable to generate protein variant description' + hgvs_transcript_to_hgvs_protein['error'] = error + return hgvs_transcript_to_hgvs_protein + elif prot_var_seq == 'error': + # Does the edit affect the start codon? + if (( + hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) + or + (hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ + and not re.search('\*', str(hgvs_transcript.posedit.pos)): + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', + posedit='(Met1?)') + + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + else: + error = 'Unable to generate protein variant description' + hgvs_transcript_to_hgvs_protein['error'] = error + return hgvs_transcript_to_hgvs_protein + else: + # Gather the required information regarding variant interval and sequences + if hgvs_transcript.posedit.edit.type != 'delins': + pro_inv_info = fn.pro_inv_info(prot_ref_seq, prot_var_seq) + else: + pro_inv_info = fn.pro_delins_info(prot_ref_seq, prot_var_seq) + + # Error has occurred + if pro_inv_info['error'] == 'true': + error = 'Translation error occurred, please contact admin' + hgvs_transcript_to_hgvs_protein['error'] = error + return hgvs_transcript_to_hgvs_protein + + # The Nucleotide variant has not affected the protein sequence i.e. synonymous + elif pro_inv_info['variant'] != 'true': + # Make the variant + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', + posedit='=') + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + + else: + # Early termination i.e. stop gained + # if pro_inv_info['terminate'] == 'true': + # end = 'Ter' + str(pro_inv_info['ter_pos']) + # pro_inv_info['prot_ins_seq'].replace('*', end) + + # Complete variant description + # Recode the single letter del and ins sequences into three letter amino acid codes + del_thr = fn.one_to_three(pro_inv_info['prot_del_seq']) + ins_thr = fn.one_to_three(pro_inv_info['prot_ins_seq']) + + # Write the HGVS position and edit + del_len = len(del_thr) + from_aa = del_thr[0:3] + to_aa = del_thr[del_len - 3:] + + # Handle a range of amino acids + if pro_inv_info['edit_start'] != pro_inv_info['edit_end']: + if len(ins_thr) > 0: + if re.search('Ter', del_thr) and ins_thr[-3:] != 'Ter': + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( + pro_inv_info['edit_end']) + 'delins' + ins_thr + '?)' + else: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( + pro_inv_info['edit_end']) + 'delins' + ins_thr + ')' + else: + if re.search('Ter', del_thr) and ins_thr[-3:] != 'Ter': + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( + pro_inv_info['edit_end']) + 'del?)' + else: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( + pro_inv_info['edit_end']) + 'del)' + else: + # Handle extended proteins i.e. stop_lost + if del_thr == 'Ter' and (len(ins_thr) > len(del_thr)): + # Nucleotide variant range aligns to the Termination codon + if ins_thr[-3:] == 'Ter': + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( + ins_thr[:3]) + 'ext' + str(ins_thr[-3:]) + str((len(ins_thr) / 3) - 1) + ')' + # Nucleotide variant range spans the Termination codon + else: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( + ins_thr[:3]) + 'ext?)' + + # Nucleotide variation has not affected the length of the protein thus substitution or del + else: + if len(ins_thr) == 3: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + ins_thr + ')' + elif len(ins_thr) == 0: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + 'del)' + else: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + 'delins' + ins_thr + ')' + + # Complete the variant + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', + posedit=posedit) + + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + + else: + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = shifts + + # Return + return hgvs_transcript_to_hgvs_protein + + + # Handle non-coding transcript and non transcript descriptions + elif hgvs_transcript.type == 'n': + # non-coding transcripts + hgvs_protein = copy.deepcopy(hgvs_transcript) + hgvs_protein.ac = 'Non-coding ' + hgvs_protein.posedit = '' + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + else: + hgvs_transcript_to_hgvs_protein['error'] = 'Unable to map %s to %s' % ( + hgvs_transcript.ac, associated_protein_accession) + return hgvs_transcript_to_hgvs_protein class Validation(): #Validation objects contain a number of variant interpretations diff --git a/VariantValidator/testing/testOutputsMasterITS/variant0.txt b/VariantValidator/testing/testOutputsMasterITS/variant0.txt deleted file mode 100644 index fa31ece7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant0.txt +++ /dev/null @@ -1,143 +0,0 @@ -(dp0 -S'NM_015120.4:c.35T>C' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_741t1:c.35T>C' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -S'' -p8 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p12 -sS'gene_symbol' -p13 -S'ALMS1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_055935.4(LRG_741p1):p.(Leu12Pro)' -p18 -sS'slr' -p19 -S'NP_055935.4:p.(L12P)' -p20 -ssS'submitted_variant' -p21 -S'NM_015120.4:c.35T>C' -p22 -sS'genome_context_intronic_sequence' -p23 -g8 -sS'HGVS_LRG_variant' -p24 -S'LRG_741:g.5146T>C' -p25 -sS'HGVS_transcript_variant' -p26 -S'NM_015120.4:c.35T>C' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_011690.1:g.5146T>C' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000002.11:g.73613031delinsCGGA' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr2' -p39 -sS'ref' -p40 -S'T' -p41 -sS'pos' -p42 -S'73613031' -p43 -sS'alt' -p44 -S'CGGA' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000002.12:g.73385903delinsCGGA' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'73385903' -p50 -sg44 -S'CGGA' -p51 -sssS'GRCh37' -p52 -(dp53 -g34 -S'NC_000002.11:g.73613031delinsCGGA' -p54 -sg36 -(dp55 -g38 -S'2' -p56 -sg40 -g41 -sg42 -S'73613031' -p57 -sg44 -g45 -sssS'GRCh38' -p58 -(dp59 -g34 -S'NC_000002.12:g.73385903delinsCGGA' -p60 -sg36 -(dp61 -g38 -g56 -sg40 -g41 -sg42 -S'73385903' -p62 -sg44 -g51 -sssssS'flag' -p63 -S'gene_variant' -p64 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant1.txt b/VariantValidator/testing/testOutputsMasterITS/variant1.txt deleted file mode 100644 index 095300b9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant1.txt +++ /dev/null @@ -1,145 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_015120.4:c.39G>C' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_741t1:c.39G>C' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p14 -sS'gene_symbol' -p15 -S'ALMS1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_055935.4(LRG_741p1):p.(Glu13Asp)' -p20 -sS'slr' -p21 -S'NP_055935.4:p.(E13D)' -p22 -ssS'submitted_variant' -p23 -S'NM_015120.4:c.39G>C' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_741:g.5150G>C' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_015120.4:c.39G>C' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_011690.1:g.5150G>C' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000002.11:g.73613034_73613035insCGA' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr2' -p41 -sS'ref' -p42 -S'G' -p43 -sS'pos' -p44 -S'73613032' -p45 -sS'alt' -p46 -S'GGAC' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000002.12:g.73385906_73385907insCGA' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'73385904' -p52 -sg46 -S'GGAC' -p53 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000002.11:g.73613034_73613035insCGA' -p56 -sg38 -(dp57 -g40 -S'2' -p58 -sg42 -g43 -sg44 -S'73613032' -p59 -sg46 -S'GGAC' -p60 -sssS'GRCh38' -p61 -(dp62 -g36 -S'NC_000002.12:g.73385906_73385907insCGA' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'73385904' -p65 -sg46 -S'GGAC' -p66 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant10.txt b/VariantValidator/testing/testOutputsMasterITS/variant10.txt deleted file mode 100644 index 4f268e2e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant10.txt +++ /dev/null @@ -1,60 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit NG_007400.1:c.5071A>T but also specify transcripts from the following' -p7 -aS'select_transcripts=NM_000088.3' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NG_007400.1:c.5071A>T' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'flag' -p26 -S'warning' -p27 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant100.txt b/VariantValidator/testing/testOutputsMasterITS/variant100.txt deleted file mode 100644 index 500eaec6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant100.txt +++ /dev/null @@ -1,275 +0,0 @@ -(dp0 -S'NM_007121.5:c.514_520=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p7 -aS'NM_007121.5:c.514_520 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p16 -sS'gene_symbol' -p17 -S'NR1H2' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_009052.3:p.(Lys172=)' -p22 -sS'slr' -p23 -S'NP_009052.3:p.(K172=)' -p24 -ssS'submitted_variant' -p25 -S'NC_000019.10:g.50378565_50378567dup' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_007121.5:c.514_520=' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'GRCh38' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000019.10:g.50378565_50378567dup' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'19' -p41 -sS'ref' -p42 -S'AAC' -p43 -sS'pos' -p44 -S'50378564' -p45 -sS'alt' -p46 -VAACAAC -p47 -sssS'GRCh37' -p48 -(dp49 -g36 -S'NC_000019.9:g.50881822_50881824dup' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'AAC' -p52 -sg44 -S'50881821' -p53 -sg46 -VAACAAC -p54 -sssS'hg38' -p55 -(dp56 -g36 -S'NC_000019.10:g.50378565_50378567dup' -p57 -sg38 -(dp58 -g40 -S'chr19' -p59 -sg42 -S'AAC' -p60 -sg44 -S'50378564' -p61 -sg46 -VAACAAC -p62 -sssS'hg19' -p63 -(dp64 -g36 -S'NC_000019.9:g.50881822_50881824dup' -p65 -sg38 -(dp66 -g40 -g59 -sg42 -S'AAC' -p67 -sg44 -S'50881821' -p68 -sg46 -VAACAAC -p69 -sssssS'NM_001256647.1:c.223_229=' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p73 -aS'NM_001256647.1:c.223_229 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p74 -aS'Caution should be used when reporting the displayed variant descriptions' -p75 -aS'If you are unsure, please contact admin' -p76 -aS'RefSeqGene record not available' -p77 -asg12 -g4 -sg13 -(lp78 -sg15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p79 -sg17 -S'NR1H2' -p80 -sg19 -(dp81 -g21 -S'NP_001243576.1:p.(Lys75=)' -p82 -sg23 -S'NP_001243576.1:p.(K75=)' -p83 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001256647.1:c.223_229=' -p84 -sg31 -g4 -sg32 -(dp85 -g34 -(dp86 -g36 -S'NC_000019.10:g.50378565_50378567dup' -p87 -sg38 -(dp88 -g40 -g41 -sg42 -S'AAC' -p89 -sg44 -S'50378564' -p90 -sg46 -VAACAAC -p91 -sssg48 -(dp92 -g36 -S'NC_000019.9:g.50881822_50881824dup' -p93 -sg38 -(dp94 -g40 -g41 -sg42 -S'AAC' -p95 -sg44 -S'50881821' -p96 -sg46 -VAACAAC -p97 -sssg55 -(dp98 -g36 -S'NC_000019.10:g.50378565_50378567dup' -p99 -sg38 -(dp100 -g40 -g59 -sg42 -S'AAC' -p101 -sg44 -S'50378564' -p102 -sg46 -VAACAAC -p103 -sssg63 -(dp104 -g36 -S'NC_000019.9:g.50881822_50881824dup' -p105 -sg38 -(dp106 -g40 -g59 -sg42 -S'AAC' -p107 -sg44 -S'50881821' -p108 -sg46 -VAACAAC -p109 -sssssS'flag' -p110 -S'gene_variant' -p111 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant101.txt b/VariantValidator/testing/testOutputsMasterITS/variant101.txt deleted file mode 100644 index e3ceeb51..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant101.txt +++ /dev/null @@ -1,262 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_007121.5:c.519_521del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p9 -aS'NM_007121.5:c.515_517 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p18 -sS'gene_symbol' -p19 -S'NR1H2' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_009052.3:p.(Gln176del)' -p24 -sS'slr' -p25 -S'NP_009052.3:p.(Q176del)' -p26 -ssS'submitted_variant' -p27 -S'NC_000019.10:g.50378563_50378564=' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_007121.5:c.519_521del' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'GRCh38' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000019.10:g.50378564_50378566=' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'19' -p43 -sS'ref' -p44 -S'AAC' -p45 -sS'pos' -p46 -S'50378564' -p47 -sS'alt' -p48 -g45 -sssS'GRCh37' -p49 -(dp50 -g38 -S'NC_000019.9:g.50881821_50881823=' -p51 -sg40 -(dp52 -g42 -g43 -sg44 -S'AAC' -p53 -sg46 -S'50881821' -p54 -sg48 -g53 -sssS'hg38' -p55 -(dp56 -g38 -S'NC_000019.10:g.50378564_50378566=' -p57 -sg40 -(dp58 -g42 -S'chr19' -p59 -sg44 -g45 -sg46 -S'50378564' -p60 -sg48 -g45 -sssS'hg19' -p61 -(dp62 -g38 -S'NC_000019.9:g.50881821_50881823=' -p63 -sg40 -(dp64 -g42 -g59 -sg44 -g53 -sg46 -S'50881821' -p65 -sg48 -g53 -sssssS'NM_001256647.1:c.228_230del' -p66 -(dp67 -g5 -g6 -sg7 -(lp68 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p69 -aS'NM_001256647.1:c.224_226 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p70 -aS'Caution should be used when reporting the displayed variant descriptions' -p71 -aS'If you are unsure, please contact admin' -p72 -aS'RefSeqGene record not available' -p73 -asg14 -g6 -sg15 -(lp74 -sg17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p75 -sg19 -S'NR1H2' -p76 -sg21 -(dp77 -g23 -S'NP_001243576.1:p.(Gln79del)' -p78 -sg25 -S'NP_001243576.1:p.(Q79del)' -p79 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_001256647.1:c.228_230del' -p80 -sg33 -g6 -sg34 -(dp81 -g36 -(dp82 -g38 -S'NC_000019.10:g.50378564_50378566=' -p83 -sg40 -(dp84 -g42 -g43 -sg44 -S'AAC' -p85 -sg46 -S'50378564' -p86 -sg48 -g85 -sssg49 -(dp87 -g38 -S'NC_000019.9:g.50881821_50881823=' -p88 -sg40 -(dp89 -g42 -g43 -sg44 -g53 -sg46 -S'50881821' -p90 -sg48 -g53 -sssg55 -(dp91 -g38 -S'NC_000019.10:g.50378564_50378566=' -p92 -sg40 -(dp93 -g42 -g59 -sg44 -g85 -sg46 -S'50378564' -p94 -sg48 -g85 -sssg61 -(dp95 -g38 -S'NC_000019.9:g.50881821_50881823=' -p96 -sg40 -(dp97 -g42 -g59 -sg44 -g53 -sg46 -S'50881821' -p98 -sg48 -g53 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant102.txt b/VariantValidator/testing/testOutputsMasterITS/variant102.txt deleted file mode 100644 index 85742fb9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant102.txt +++ /dev/null @@ -1,268 +0,0 @@ -(dp0 -S'NM_001256647.1:c.224_226delinsTCGG' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p7 -aS'NM_001256647.1:c.223_228 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'NR1H2' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_001243576.1:p.(Lys75IlefsTer35)' -p22 -sS'slr' -p23 -S'NP_001243576.1:p.(K75Ifs*35)' -p24 -ssS'submitted_variant' -p25 -S'NC_000019.10:g.50378563_50378564insTCGG' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_001256647.1:c.224_226delinsTCGG' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'GRCh38' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000019.10:g.50378563_50378564insTCGG' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'19' -p41 -sS'ref' -p42 -S'A' -p43 -sS'pos' -p44 -S'50378563' -p45 -sS'alt' -p46 -S'ATCGG' -p47 -sssS'GRCh37' -p48 -(dp49 -g36 -S'NC_000019.9:g.50881820_50881821insTCGG' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'50881820' -p52 -sg46 -S'ATCGG' -p53 -sssS'hg38' -p54 -(dp55 -g36 -S'NC_000019.10:g.50378563_50378564insTCGG' -p56 -sg38 -(dp57 -g40 -S'chr19' -p58 -sg42 -g43 -sg44 -S'50378563' -p59 -sg46 -S'ATCGG' -p60 -sssS'hg19' -p61 -(dp62 -g36 -S'NC_000019.9:g.50881820_50881821insTCGG' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'50881820' -p65 -sg46 -S'ATCGG' -p66 -sssssS'flag' -p67 -S'gene_variant' -p68 -sS'NM_007121.5:c.515_517delinsTCGG' -p69 -(dp70 -g3 -g4 -sg5 -(lp71 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p72 -aS'NM_007121.5:c.514_519 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p73 -aS'Caution should be used when reporting the displayed variant descriptions' -p74 -aS'If you are unsure, please contact admin' -p75 -aS'RefSeqGene record not available' -p76 -asg12 -g4 -sg13 -(lp77 -sg15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p78 -sg17 -S'NR1H2' -p79 -sg19 -(dp80 -g21 -S'NP_009052.3:p.(Lys172IlefsTer35)' -p81 -sg23 -S'NP_009052.3:p.(K172Ifs*35)' -p82 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_007121.5:c.515_517delinsTCGG' -p83 -sg31 -g4 -sg32 -(dp84 -g34 -(dp85 -g36 -S'NC_000019.10:g.50378563_50378564insTCGG' -p86 -sg38 -(dp87 -g40 -g41 -sg42 -g43 -sg44 -S'50378563' -p88 -sg46 -S'ATCGG' -p89 -sssg48 -(dp90 -g36 -S'NC_000019.9:g.50881820_50881821insTCGG' -p91 -sg38 -(dp92 -g40 -g41 -sg42 -g43 -sg44 -S'50881820' -p93 -sg46 -S'ATCGG' -p94 -sssg54 -(dp95 -g36 -S'NC_000019.10:g.50378563_50378564insTCGG' -p96 -sg38 -(dp97 -g40 -g58 -sg42 -g43 -sg44 -S'50378563' -p98 -sg46 -S'ATCGG' -p99 -sssg61 -(dp100 -g36 -S'NC_000019.9:g.50881820_50881821insTCGG' -p101 -sg38 -(dp102 -g40 -g58 -sg42 -g43 -sg44 -S'50881820' -p103 -sg46 -S'ATCGG' -p104 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant103.txt b/VariantValidator/testing/testOutputsMasterITS/variant103.txt deleted file mode 100644 index 1a98835a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant103.txt +++ /dev/null @@ -1,248 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_007121.5:c.514_515inv' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'NR1H2' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_009052.3:p.(Lys172Leu)' -p20 -sS'slr' -p21 -S'NP_009052.3:p.(K172L)' -p22 -ssS'submitted_variant' -p23 -S'NC_000019.10:g.50378563delinsTTAC' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_007121.5:c.514_515inv' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'GRCh38' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000019.10:g.50378563delinsTTAC' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'19' -p39 -sS'ref' -p40 -S'A' -p41 -sS'pos' -p42 -S'50378563' -p43 -sS'alt' -p44 -S'TTAC' -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000019.9:g.50881820delinsTTAC' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'50881820' -p50 -sg44 -S'TTAC' -p51 -sssS'hg38' -p52 -(dp53 -g34 -S'NC_000019.10:g.50378563delinsTTAC' -p54 -sg36 -(dp55 -g38 -S'chr19' -p56 -sg40 -g41 -sg42 -S'50378563' -p57 -sg44 -g45 -sssS'hg19' -p58 -(dp59 -g34 -S'NC_000019.9:g.50881820delinsTTAC' -p60 -sg36 -(dp61 -g38 -g56 -sg40 -g41 -sg42 -S'50881820' -p62 -sg44 -g51 -sssssS'NM_001256647.1:c.223_224inv' -p63 -(dp64 -g5 -g6 -sg7 -(lp65 -S'RefSeqGene record not available' -p66 -asg10 -g6 -sg11 -(lp67 -sg13 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p68 -sg15 -S'NR1H2' -p69 -sg17 -(dp70 -g19 -S'NP_001243576.1:p.(Lys75Leu)' -p71 -sg21 -S'NP_001243576.1:p.(K75L)' -p72 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_001256647.1:c.223_224inv' -p73 -sg29 -g6 -sg30 -(dp74 -g32 -(dp75 -g34 -S'NC_000019.10:g.50378563delinsTTAC' -p76 -sg36 -(dp77 -g38 -g39 -sg40 -g41 -sg42 -S'50378563' -p78 -sg44 -S'TTAC' -p79 -sssg46 -(dp80 -g34 -S'NC_000019.9:g.50881820delinsTTAC' -p81 -sg36 -(dp82 -g38 -g39 -sg40 -g41 -sg42 -S'50881820' -p83 -sg44 -S'TTAC' -p84 -sssg52 -(dp85 -g34 -S'NC_000019.10:g.50378563delinsTTAC' -p86 -sg36 -(dp87 -g38 -g56 -sg40 -g41 -sg42 -S'50378563' -p88 -sg44 -g79 -sssg58 -(dp89 -g34 -S'NC_000019.9:g.50881820delinsTTAC' -p90 -sg36 -(dp91 -g38 -g56 -sg40 -g41 -sg42 -S'50881820' -p92 -sg44 -g84 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant104.txt b/VariantValidator/testing/testOutputsMasterITS/variant104.txt deleted file mode 100644 index 6f979cf9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant104.txt +++ /dev/null @@ -1,268 +0,0 @@ -(dp0 -S'NM_007121.5:c.514_515insT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p7 -aS'NM_007121.5:c.514_515 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p16 -sS'gene_symbol' -p17 -S'NR1H2' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_009052.3:p.(Lys172IlefsTer35)' -p22 -sS'slr' -p23 -S'NP_009052.3:p.(K172Ifs*35)' -p24 -ssS'submitted_variant' -p25 -S'NC_000019.10:g.50378563_50378564insTAAC' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_007121.5:c.514_515insT' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'GRCh38' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000019.10:g.50378563_50378564insTAAC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'19' -p41 -sS'ref' -p42 -S'A' -p43 -sS'pos' -p44 -S'50378563' -p45 -sS'alt' -p46 -S'ATAAC' -p47 -sssS'GRCh37' -p48 -(dp49 -g36 -S'NC_000019.9:g.50881820_50881821insTAAC' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'50881820' -p52 -sg46 -S'ATAAC' -p53 -sssS'hg38' -p54 -(dp55 -g36 -S'NC_000019.10:g.50378563_50378564insTAAC' -p56 -sg38 -(dp57 -g40 -S'chr19' -p58 -sg42 -g43 -sg44 -S'50378563' -p59 -sg46 -S'ATAAC' -p60 -sssS'hg19' -p61 -(dp62 -g36 -S'NC_000019.9:g.50881820_50881821insTAAC' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'50881820' -p65 -sg46 -S'ATAAC' -p66 -sssssS'NM_001256647.1:c.223_224insT' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p70 -aS'NM_001256647.1:c.223_224 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p71 -aS'Caution should be used when reporting the displayed variant descriptions' -p72 -aS'If you are unsure, please contact admin' -p73 -aS'RefSeqGene record not available' -p74 -asg12 -g4 -sg13 -(lp75 -sg15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p76 -sg17 -S'NR1H2' -p77 -sg19 -(dp78 -g21 -S'NP_001243576.1:p.(Lys75IlefsTer35)' -p79 -sg23 -S'NP_001243576.1:p.(K75Ifs*35)' -p80 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001256647.1:c.223_224insT' -p81 -sg31 -g4 -sg32 -(dp82 -g34 -(dp83 -g36 -S'NC_000019.10:g.50378563_50378564insTAAC' -p84 -sg38 -(dp85 -g40 -g41 -sg42 -g43 -sg44 -S'50378563' -p86 -sg46 -S'ATAAC' -p87 -sssg48 -(dp88 -g36 -S'NC_000019.9:g.50881820_50881821insTAAC' -p89 -sg38 -(dp90 -g40 -g41 -sg42 -g43 -sg44 -S'50881820' -p91 -sg46 -S'ATAAC' -p92 -sssg54 -(dp93 -g36 -S'NC_000019.10:g.50378563_50378564insTAAC' -p94 -sg38 -(dp95 -g40 -g58 -sg42 -g43 -sg44 -S'50378563' -p96 -sg46 -S'ATAAC' -p97 -sssg61 -(dp98 -g36 -S'NC_000019.9:g.50881820_50881821insTAAC' -p99 -sg38 -(dp100 -g40 -g58 -sg42 -g43 -sg44 -S'50881820' -p101 -sg46 -S'ATAAC' -p102 -sssssS'flag' -p103 -S'gene_variant' -p104 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant105.txt b/VariantValidator/testing/testOutputsMasterITS/variant105.txt deleted file mode 100644 index 3e471440..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant105.txt +++ /dev/null @@ -1,268 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001256647.1:c.222_228del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p9 -aS'NM_001256647.1:c.222_228 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'NR1H2' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_001243576.1:p.(Lys75SerfsTer47)' -p24 -sS'slr' -p25 -S'NP_001243576.1:p.(K75Sfs*47)' -p26 -ssS'submitted_variant' -p27 -S'NC_000019.10:g.50378562_50378565del' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_001256647.1:c.222_228del' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'GRCh38' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000019.10:g.50378562_50378565del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'19' -p43 -sS'ref' -p44 -S'GGAAA' -p45 -sS'pos' -p46 -S'50378561' -p47 -sS'alt' -p48 -S'G' -p49 -sssS'GRCh37' -p50 -(dp51 -g38 -S'NC_000019.9:g.50881819_50881822del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'GGAAA' -p54 -sg46 -S'50881818' -p55 -sg48 -g49 -sssS'hg38' -p56 -(dp57 -g38 -S'NC_000019.10:g.50378562_50378565del' -p58 -sg40 -(dp59 -g42 -S'chr19' -p60 -sg44 -S'GGAAA' -p61 -sg46 -S'50378561' -p62 -sg48 -g49 -sssS'hg19' -p63 -(dp64 -g38 -S'NC_000019.9:g.50881819_50881822del' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'GGAAA' -p67 -sg46 -S'50881818' -p68 -sg48 -g49 -sssssS'NM_007121.5:c.513_519del' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p72 -aS'NM_007121.5:c.513_519 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p73 -aS'Caution should be used when reporting the displayed variant descriptions' -p74 -aS'If you are unsure, please contact admin' -p75 -aS'RefSeqGene record not available' -p76 -asg14 -g6 -sg15 -(lp77 -sg17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p78 -sg19 -S'NR1H2' -p79 -sg21 -(dp80 -g23 -S'NP_009052.3:p.(Lys172SerfsTer47)' -p81 -sg25 -S'NP_009052.3:p.(K172Sfs*47)' -p82 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_007121.5:c.513_519del' -p83 -sg33 -g6 -sg34 -(dp84 -g36 -(dp85 -g38 -S'NC_000019.10:g.50378562_50378565del' -p86 -sg40 -(dp87 -g42 -g43 -sg44 -S'GGAAA' -p88 -sg46 -S'50378561' -p89 -sg48 -g49 -sssg50 -(dp90 -g38 -S'NC_000019.9:g.50881819_50881822del' -p91 -sg40 -(dp92 -g42 -g43 -sg44 -S'GGAAA' -p93 -sg46 -S'50881818' -p94 -sg48 -g49 -sssg56 -(dp95 -g38 -S'NC_000019.10:g.50378562_50378565del' -p96 -sg40 -(dp97 -g42 -g60 -sg44 -S'GGAAA' -p98 -sg46 -S'50378561' -p99 -sg48 -g49 -sssg63 -(dp100 -g38 -S'NC_000019.9:g.50881819_50881822del' -p101 -sg40 -(dp102 -g42 -g60 -sg44 -S'GGAAA' -p103 -sg46 -S'50881818' -p104 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant106.txt b/VariantValidator/testing/testOutputsMasterITS/variant106.txt deleted file mode 100644 index fde329ba..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant106.txt +++ /dev/null @@ -1,271 +0,0 @@ -(dp0 -S'NM_001256647.1:c.222_228delinsTC' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p7 -aS'NM_001256647.1:c.222_228 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'NR1H2' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_001243576.1:p.(Lys75ProfsTer33)' -p22 -sS'slr' -p23 -S'NP_001243576.1:p.(K75Pfs*33)' -p24 -ssS'submitted_variant' -p25 -S'NC_000019.10:g.50378562_50378565delinsTC' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_001256647.1:c.222_228delinsTC' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'GRCh38' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000019.10:g.50378562_50378565delinsTC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'19' -p41 -sS'ref' -p42 -S'GAAA' -p43 -sS'pos' -p44 -S'50378562' -p45 -sS'alt' -p46 -S'TC' -p47 -sssS'GRCh37' -p48 -(dp49 -g36 -S'NC_000019.9:g.50881819_50881822delinsTC' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'GAAA' -p52 -sg44 -S'50881819' -p53 -sg46 -S'TC' -p54 -sssS'hg38' -p55 -(dp56 -g36 -S'NC_000019.10:g.50378562_50378565delinsTC' -p57 -sg38 -(dp58 -g40 -S'chr19' -p59 -sg42 -S'GAAA' -p60 -sg44 -S'50378562' -p61 -sg46 -g47 -sssS'hg19' -p62 -(dp63 -g36 -S'NC_000019.9:g.50881819_50881822delinsTC' -p64 -sg38 -(dp65 -g40 -g59 -sg42 -S'GAAA' -p66 -sg44 -S'50881819' -p67 -sg46 -g54 -sssssS'flag' -p68 -S'gene_variant' -p69 -sS'NM_007121.5:c.513_519delinsTC' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p73 -aS'NM_007121.5:c.513_519 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p74 -aS'Caution should be used when reporting the displayed variant descriptions' -p75 -aS'If you are unsure, please contact admin' -p76 -aS'RefSeqGene record not available' -p77 -asg12 -g4 -sg13 -(lp78 -sg15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p79 -sg17 -S'NR1H2' -p80 -sg19 -(dp81 -g21 -S'NP_009052.3:p.(Lys172ProfsTer33)' -p82 -sg23 -S'NP_009052.3:p.(K172Pfs*33)' -p83 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_007121.5:c.513_519delinsTC' -p84 -sg31 -g4 -sg32 -(dp85 -g34 -(dp86 -g36 -S'NC_000019.10:g.50378562_50378565delinsTC' -p87 -sg38 -(dp88 -g40 -g41 -sg42 -S'GAAA' -p89 -sg44 -S'50378562' -p90 -sg46 -S'TC' -p91 -sssg48 -(dp92 -g36 -S'NC_000019.9:g.50881819_50881822delinsTC' -p93 -sg38 -(dp94 -g40 -g41 -sg42 -S'GAAA' -p95 -sg44 -S'50881819' -p96 -sg46 -S'TC' -p97 -sssg55 -(dp98 -g36 -S'NC_000019.10:g.50378562_50378565delinsTC' -p99 -sg38 -(dp100 -g40 -g59 -sg42 -S'GAAA' -p101 -sg44 -S'50378562' -p102 -sg46 -g91 -sssg62 -(dp103 -g36 -S'NC_000019.9:g.50881819_50881822delinsTC' -p104 -sg38 -(dp105 -g40 -g59 -sg42 -S'GAAA' -p106 -sg44 -S'50881819' -p107 -sg46 -g97 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant107.txt b/VariantValidator/testing/testOutputsMasterITS/variant107.txt deleted file mode 100644 index 30b27b63..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant107.txt +++ /dev/null @@ -1,153 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_198455.2:c.1115_1116insT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p9 -aS'Genome position NC_000007.13:g.149476667 aligns within a Requires Analysis-bp gap in transcript NM_198455.2 between positions c.1116_1117' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens SCO-spondin (SSPO), mRNA -p18 -sS'gene_symbol' -p19 -S'SSPO' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_940857.2:p.(Leu374ProfsTer16)' -p24 -sS'slr' -p25 -S'NP_940857.2:p.(L374Pfs*16)' -p26 -ssS'submitted_variant' -p27 -S'NC_000007.14:g.149779575_149779577delinsT' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_198455.2:c.1115_1116insT' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000007.13:g.149476664_149476666delinsTC' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr7' -p43 -sS'ref' -p44 -S'CAG' -p45 -sS'pos' -p46 -S'149476664' -p47 -sS'alt' -p48 -S'TC' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000007.14:g.149779575_149779577delinsT' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'CAG' -p54 -sg46 -S'149779575' -p55 -sg48 -S'T' -p56 -sssS'GRCh37' -p57 -(dp58 -g38 -S'NC_000007.13:g.149476664_149476666delinsTC' -p59 -sg40 -(dp60 -g42 -S'7' -p61 -sg44 -S'CAG' -p62 -sg46 -S'149476664' -p63 -sg48 -g49 -sssS'GRCh38' -p64 -(dp65 -g38 -S'NC_000007.14:g.149779575_149779577delinsT' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -S'CAG' -p68 -sg46 -S'149779575' -p69 -sg48 -g56 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant108.txt b/VariantValidator/testing/testOutputsMasterITS/variant108.txt deleted file mode 100644 index 4d9eefed..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant108.txt +++ /dev/null @@ -1,153 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_198455.2:c.1116_1118=' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p9 -aS'Genome position NC_000007.13:g.149476667 aligns within a Requires Analysis-bp gap in transcript NM_198455.2 between positions c.1116_1117' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens SCO-spondin (SSPO), mRNA -p18 -sS'gene_symbol' -p19 -S'SSPO' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_940857.2:p.(Asp372=)' -p24 -sS'slr' -p25 -S'NP_940857.2:p.(D372=)' -p26 -ssS'submitted_variant' -p27 -S'NC_000007.14:g.149779575_149779577=' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_198455.2:c.1116_1118=' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000007.13:g.149476665_149476666del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr7' -p43 -sS'ref' -p44 -S'CAG' -p45 -sS'pos' -p46 -S'149476664' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000007.14:g.149779576_149779578del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'ACAG' -p54 -sg46 -S'149779574' -p55 -sg48 -S'A' -p56 -sssS'GRCh37' -p57 -(dp58 -g38 -S'NC_000007.13:g.149476665_149476666del' -p59 -sg40 -(dp60 -g42 -S'7' -p61 -sg44 -S'CAG' -p62 -sg46 -S'149476664' -p63 -sg48 -g49 -sssS'GRCh38' -p64 -(dp65 -g38 -S'NC_000007.14:g.149779576_149779578del' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -S'ACAG' -p68 -sg46 -S'149779574' -p69 -sg48 -g56 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant109.txt b/VariantValidator/testing/testOutputsMasterITS/variant109.txt deleted file mode 100644 index a2bd490c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant109.txt +++ /dev/null @@ -1,153 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_198455.2:c.1116_1118=' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p9 -aS'Genome position NC_000007.13:g.149476667 aligns within a Requires Analysis-bp gap in transcript NM_198455.2 between positions c.1116_1117' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens SCO-spondin (SSPO), mRNA -p18 -sS'gene_symbol' -p19 -S'SSPO' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_940857.2:p.(Asp372=)' -p24 -sS'slr' -p25 -S'NP_940857.2:p.(D372=)' -p26 -ssS'submitted_variant' -p27 -S'NC_000007.14:g.149779576_149779578del' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_198455.2:c.1116_1118=' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000007.13:g.149476665_149476666del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr7' -p43 -sS'ref' -p44 -S'CAG' -p45 -sS'pos' -p46 -S'149476664' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000007.14:g.149779576_149779578del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'ACAG' -p54 -sg46 -S'149779574' -p55 -sg48 -S'A' -p56 -sssS'GRCh37' -p57 -(dp58 -g38 -S'NC_000007.13:g.149476665_149476666del' -p59 -sg40 -(dp60 -g42 -S'7' -p61 -sg44 -S'CAG' -p62 -sg46 -S'149476664' -p63 -sg48 -g49 -sssS'GRCh38' -p64 -(dp65 -g38 -S'NC_000007.14:g.149779576_149779578del' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -S'ACAG' -p68 -sg46 -S'149779574' -p69 -sg48 -g56 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant11.txt b/VariantValidator/testing/testOutputsMasterITS/variant11.txt deleted file mode 100644 index b3d8b85d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant11.txt +++ /dev/null @@ -1,636 +0,0 @@ -(dp0 -S'NM_001040113.1:c.3055_3056inv' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -g4 -sS'alt_genomic_loci' -p8 -(lp9 -(dp10 -S'GRCh38' -p11 -(dp12 -S'HGVS_genomic_description' -p13 -S'NT_187607.1:g.1396662_1396663inv' -p14 -sS'vcf' -p15 -(dp16 -S'chr' -p17 -S'HSCHR16_1_CTG1' -p18 -sS'ref' -p19 -S'GT' -p20 -sS'pos' -p21 -S'1396662' -p22 -sS'alt' -p23 -S'AC' -p24 -sssa(dp25 -S'hg38' -p26 -(dp27 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p28 -sg15 -(dp29 -g17 -S'chr16_KI270853v1_alt' -p30 -sg19 -S'GT' -p31 -sg21 -S'1396662' -p32 -sg23 -S'AC' -p33 -sssasS'transcript_description' -p34 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA -p35 -sS'gene_symbol' -p36 -S'MYH11' -p37 -sS'HGVS_predicted_protein_consequence' -p38 -(dp39 -S'tlr' -p40 -S'NP_001035202.1:p.(Thr1019Val)' -p41 -sS'slr' -p42 -S'NP_001035202.1:p.(T1019V)' -p43 -ssS'submitted_variant' -p44 -S'chr16:15832508_15832509delinsAC' -p45 -sS'genome_context_intronic_sequence' -p46 -g4 -sS'HGVS_LRG_variant' -p47 -g4 -sS'HGVS_transcript_variant' -p48 -S'NM_001040113.1:c.3055_3056inv' -p49 -sS'HGVS_RefSeqGene_variant' -p50 -S'NG_009299.1:g.123379_123380inv' -p51 -sS'primary_assembly_loci' -p52 -(dp53 -g11 -(dp54 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p55 -sg15 -(dp56 -g17 -S'16' -p57 -sg19 -S'GT' -p58 -sg21 -S'15738651' -p59 -sg23 -S'AC' -p60 -sssS'GRCh37' -p61 -(dp62 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p63 -sg15 -(dp64 -g17 -g57 -sg19 -S'GT' -p65 -sg21 -S'15832508' -p66 -sg23 -S'AC' -p67 -sssg26 -(dp68 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p69 -sg15 -(dp70 -g17 -S'chr16' -p71 -sg19 -S'GT' -p72 -sg21 -S'15738651' -p73 -sg23 -S'AC' -p74 -sssS'hg19' -p75 -(dp76 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p77 -sg15 -(dp78 -g17 -g71 -sg19 -S'GT' -p79 -sg21 -S'15832508' -p80 -sg23 -S'AC' -p81 -sssssS'NM_001040114.1:c.3055_3056inv' -p82 -(dp83 -g3 -g4 -sg5 -(lp84 -S'RefSeqGene record not available' -p85 -asg7 -g4 -sg8 -(lp86 -(dp87 -g11 -(dp88 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p89 -sg15 -(dp90 -g17 -g18 -sg19 -S'GT' -p91 -sg21 -S'1396662' -p92 -sg23 -S'AC' -p93 -sssa(dp94 -g26 -(dp95 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p96 -sg15 -(dp97 -g17 -g30 -sg19 -S'GT' -p98 -sg21 -S'1396662' -p99 -sg23 -S'AC' -p100 -sssasg34 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA -p101 -sg36 -S'MYH11' -p102 -sg38 -(dp103 -g40 -S'NP_001035203.1:p.(Thr1019Val)' -p104 -sg42 -S'NP_001035203.1:p.(T1019V)' -p105 -ssg44 -g45 -sg46 -g4 -sg47 -g4 -sg48 -S'NM_001040114.1:c.3055_3056inv' -p106 -sg50 -g4 -sg52 -(dp107 -g11 -(dp108 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p109 -sg15 -(dp110 -g17 -g57 -sg19 -S'GT' -p111 -sg21 -S'15738651' -p112 -sg23 -S'AC' -p113 -sssg61 -(dp114 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p115 -sg15 -(dp116 -g17 -g57 -sg19 -S'GT' -p117 -sg21 -S'15832508' -p118 -sg23 -S'AC' -p119 -sssg26 -(dp120 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p121 -sg15 -(dp122 -g17 -g71 -sg19 -S'GT' -p123 -sg21 -S'15738651' -p124 -sg23 -S'AC' -p125 -sssg75 -(dp126 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p127 -sg15 -(dp128 -g17 -g71 -sg19 -S'GT' -p129 -sg21 -S'15832508' -p130 -sg23 -S'AC' -p131 -sssssS'flag' -p132 -S'gene_variant' -p133 -sS'NM_002474.2:c.3034_3035inv' -p134 -(dp135 -g3 -g4 -sg5 -(lp136 -S'RefSeqGene record not available' -p137 -asg7 -g4 -sg8 -(lp138 -(dp139 -g11 -(dp140 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p141 -sg15 -(dp142 -g17 -g18 -sg19 -S'GT' -p143 -sg21 -S'1396662' -p144 -sg23 -S'AC' -p145 -sssa(dp146 -g26 -(dp147 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p148 -sg15 -(dp149 -g17 -g30 -sg19 -S'GT' -p150 -sg21 -S'1396662' -p151 -sg23 -S'AC' -p152 -sssasg34 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA -p153 -sg36 -S'MYH11' -p154 -sg38 -(dp155 -g40 -S'NP_002465.1:p.(Thr1012Val)' -p156 -sg42 -S'NP_002465.1:p.(T1012V)' -p157 -ssg44 -g45 -sg46 -g4 -sg47 -g4 -sg48 -S'NM_002474.2:c.3034_3035inv' -p158 -sg50 -g4 -sg52 -(dp159 -g11 -(dp160 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p161 -sg15 -(dp162 -g17 -g57 -sg19 -S'GT' -p163 -sg21 -S'15738651' -p164 -sg23 -S'AC' -p165 -sssg61 -(dp166 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p167 -sg15 -(dp168 -g17 -g57 -sg19 -S'GT' -p169 -sg21 -S'15832508' -p170 -sg23 -S'AC' -p171 -sssg26 -(dp172 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p173 -sg15 -(dp174 -g17 -g71 -sg19 -S'GT' -p175 -sg21 -S'15738651' -p176 -sg23 -S'AC' -p177 -sssg75 -(dp178 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p179 -sg15 -(dp180 -g17 -g71 -sg19 -S'GT' -p181 -sg21 -S'15832508' -p182 -sg23 -S'AC' -p183 -sssssS'NM_022844.2:c.3034_3035inv' -p184 -(dp185 -g3 -g4 -sg5 -(lp186 -S'RefSeqGene record not available' -p187 -asg7 -g4 -sg8 -(lp188 -(dp189 -g11 -(dp190 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p191 -sg15 -(dp192 -g17 -g18 -sg19 -S'GT' -p193 -sg21 -S'1396662' -p194 -sg23 -S'AC' -p195 -sssa(dp196 -g26 -(dp197 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p198 -sg15 -(dp199 -g17 -g30 -sg19 -S'GT' -p200 -sg21 -S'1396662' -p201 -sg23 -S'AC' -p202 -sssasg34 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA -p203 -sg36 -S'MYH11' -p204 -sg38 -(dp205 -g40 -S'NP_074035.1:p.(Thr1012Val)' -p206 -sg42 -S'NP_074035.1:p.(T1012V)' -p207 -ssg44 -g45 -sg46 -g4 -sg47 -g4 -sg48 -S'NM_022844.2:c.3034_3035inv' -p208 -sg50 -g4 -sg52 -(dp209 -g11 -(dp210 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p211 -sg15 -(dp212 -g17 -g57 -sg19 -S'GT' -p213 -sg21 -S'15738651' -p214 -sg23 -S'AC' -p215 -sssg61 -(dp216 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p217 -sg15 -(dp218 -g17 -g57 -sg19 -S'GT' -p219 -sg21 -S'15832508' -p220 -sg23 -S'AC' -p221 -sssg26 -(dp222 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p223 -sg15 -(dp224 -g17 -g71 -sg19 -S'GT' -p225 -sg21 -S'15738651' -p226 -sg23 -S'AC' -p227 -sssg75 -(dp228 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p229 -sg15 -(dp230 -g17 -g71 -sg19 -S'GT' -p231 -sg21 -S'15832508' -p232 -sg23 -S'AC' -p233 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant110.txt b/VariantValidator/testing/testOutputsMasterITS/variant110.txt deleted file mode 100644 index ea7da593..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant110.txt +++ /dev/null @@ -1,152 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_198455.2:c.1115_1116dup' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p9 -aS'NC_000007.13:g.149476666 is one of 2 genomic base(s) that fail to align to transcript NM_198455.2 between positions c.1116_1117' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens SCO-spondin (SSPO), mRNA -p18 -sS'gene_symbol' -p19 -S'SSPO' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_940857.2:p.(Pro373ThrfsTer6)' -p24 -sS'slr' -p25 -S'NP_940857.2:p.(P373Tfs*6)' -p26 -ssS'submitted_variant' -p27 -S'NC_000007.14:g.149779577del' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_198455.2:c.1115_1116dup' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000007.13:g.149476666G>C' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr7' -p43 -sS'ref' -p44 -S'G' -p45 -sS'pos' -p46 -S'149476666' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000007.14:g.149779577del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'AG' -p54 -sg46 -S'149779576' -p55 -sg48 -S'A' -p56 -sssS'GRCh37' -p57 -(dp58 -g38 -S'NC_000007.13:g.149476666G>C' -p59 -sg40 -(dp60 -g42 -S'7' -p61 -sg44 -g45 -sg46 -S'149476666' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000007.14:g.149779577del' -p65 -sg40 -(dp66 -g42 -g61 -sg44 -S'AG' -p67 -sg46 -S'149779576' -p68 -sg48 -g56 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant111.txt b/VariantValidator/testing/testOutputsMasterITS/variant111.txt deleted file mode 100644 index e1bdbab1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant111.txt +++ /dev/null @@ -1,152 +0,0 @@ -(dp0 -S'NM_198455.2:c.1114_1117del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p7 -aS'NC_000007.13:g.149476661_149476667 contains 2 genomic base(s) that fail to align to transcript NM_198455.2' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens SCO-spondin (SSPO), mRNA -p16 -sS'gene_symbol' -p17 -S'SSPO' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_940857.2:p.(Asp372ProfsTer5)' -p22 -sS'slr' -p23 -S'NP_940857.2:p.(D372Pfs*5)' -p24 -ssS'submitted_variant' -p25 -S'NC_000007.14:g.149779573_149779579del' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_198455.2:c.1114_1117del' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000007.13:g.149476662_149476667del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr7' -p41 -sS'ref' -p42 -S'TGACAGC' -p43 -sS'pos' -p44 -S'149476661' -p45 -sS'alt' -p46 -S'T' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000007.14:g.149779573_149779579del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'TGACAGCC' -p52 -sg44 -S'149779572' -p53 -sg46 -g47 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000007.13:g.149476662_149476667del' -p56 -sg38 -(dp57 -g40 -S'7' -p58 -sg42 -S'TGACAGC' -p59 -sg44 -S'149476661' -p60 -sg46 -g47 -sssS'GRCh38' -p61 -(dp62 -g36 -S'NC_000007.14:g.149779573_149779579del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'TGACAGCC' -p65 -sg44 -S'149779572' -p66 -sg46 -g47 -sssssS'flag' -p67 -S'gene_variant' -p68 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant112.txt b/VariantValidator/testing/testOutputsMasterITS/variant112.txt deleted file mode 100644 index 71cecdfd..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant112.txt +++ /dev/null @@ -1,153 +0,0 @@ -(dp0 -S'NM_198455.2:c.1114_1117delinsCA' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p7 -aS'NC_000007.13:g.149476661_149476667 contains 2 genomic base(s) that fail to align to transcript NM_198455.2' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens SCO-spondin (SSPO), mRNA -p16 -sS'gene_symbol' -p17 -S'SSPO' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_940857.2:p.(Asp372HisfsTer17)' -p22 -sS'slr' -p23 -S'NP_940857.2:p.(D372Hfs*17)' -p24 -ssS'submitted_variant' -p25 -S'NC_000007.14:g.149779573_149779579delinsCA' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_198455.2:c.1114_1117delinsCA' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000007.13:g.149476662_149476667delinsCA' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr7' -p41 -sS'ref' -p42 -S'GACAGC' -p43 -sS'pos' -p44 -S'149476662' -p45 -sS'alt' -p46 -S'CA' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000007.14:g.149779573_149779579delinsCA' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'GACAGCC' -p52 -sg44 -S'149779573' -p53 -sg46 -S'CA' -p54 -sssS'GRCh37' -p55 -(dp56 -g36 -S'NC_000007.13:g.149476662_149476667delinsCA' -p57 -sg38 -(dp58 -g40 -S'7' -p59 -sg42 -S'GACAGC' -p60 -sg44 -S'149476662' -p61 -sg46 -g47 -sssS'GRCh38' -p62 -(dp63 -g36 -S'NC_000007.14:g.149779573_149779579delinsCA' -p64 -sg38 -(dp65 -g40 -g59 -sg42 -S'GACAGCC' -p66 -sg44 -S'149779573' -p67 -sg46 -g54 -sssssS'flag' -p68 -S'gene_variant' -p69 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant113.txt b/VariantValidator/testing/testOutputsMasterITS/variant113.txt deleted file mode 100644 index 3c2eb204..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant113.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'NM_000088.3:c.590_591inv' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_1t1:c.590_591inv' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -S'' -p8 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p12 -sS'gene_symbol' -p13 -S'COL1A1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000079.2(LRG_1p1):p.(Gly197Asp)' -p18 -sS'slr' -p19 -S'NP_000079.2:p.(G197D)' -p20 -ssS'submitted_variant' -p21 -S'NM_000088.3:c.590_591inv' -p22 -sS'genome_context_intronic_sequence' -p23 -g8 -sS'HGVS_LRG_variant' -p24 -S'LRG_1:g.8639_8640inv' -p25 -sS'HGVS_transcript_variant' -p26 -S'NM_000088.3:c.590_591inv' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_007400.1:g.8639_8640inv' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000017.10:g.48275361_48275362inv' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -S'AC' -p41 -sS'pos' -p42 -S'48275361' -p43 -sS'alt' -p44 -S'GT' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.50198000_50198001inv' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'AC' -p50 -sg42 -S'50198000' -p51 -sg44 -S'GT' -p52 -sssS'GRCh37' -p53 -(dp54 -g34 -S'NC_000017.10:g.48275361_48275362inv' -p55 -sg36 -(dp56 -g38 -S'17' -p57 -sg40 -S'AC' -p58 -sg42 -S'48275361' -p59 -sg44 -S'GT' -p60 -sssS'GRCh38' -p61 -(dp62 -g34 -S'NC_000017.11:g.50198000_50198001inv' -p63 -sg36 -(dp64 -g38 -g57 -sg40 -S'AC' -p65 -sg42 -S'50198000' -p66 -sg44 -S'GT' -p67 -sssssS'flag' -p68 -S'gene_variant' -p69 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant114.txt b/VariantValidator/testing/testOutputsMasterITS/variant114.txt deleted file mode 100644 index 647eb66c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant114.txt +++ /dev/null @@ -1,147 +0,0 @@ -(dp0 -S'NM_024989.3:c.1778_1779inv' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens post-GPI attachment to proteins 1 (PGAP1), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'PGAP1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_079265.2:p.(Phe593Ter)' -p18 -sS'slr' -p19 -S'NP_079265.2:p.(F593*)' -p20 -ssS'submitted_variant' -p21 -S'NM_024989.3:c.1778_1779inv' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_024989.3:c.1778_1779inv' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000002.11:g.197729793_197729794inv' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'AA' -p39 -sS'pos' -p40 -S'197729793' -p41 -sS'alt' -p42 -S'TT' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.196865069_196865070inv' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'AA' -p48 -sg40 -S'196865069' -p49 -sg42 -S'TT' -p50 -sssS'GRCh37' -p51 -(dp52 -g32 -S'NC_000002.11:g.197729793_197729794inv' -p53 -sg34 -(dp54 -g36 -S'2' -p55 -sg38 -S'AA' -p56 -sg40 -S'197729793' -p57 -sg42 -S'TT' -p58 -sssS'GRCh38' -p59 -(dp60 -g32 -S'NC_000002.12:g.196865069_196865070inv' -p61 -sg34 -(dp62 -g36 -g55 -sg38 -S'AA' -p63 -sg40 -S'196865069' -p64 -sg42 -S'TT' -p65 -sssssS'flag' -p66 -S'gene_variant' -p67 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant115.txt b/VariantValidator/testing/testOutputsMasterITS/variant115.txt deleted file mode 100644 index d2e14d5c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant115.txt +++ /dev/null @@ -1,147 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032815.3:c.555_556inv' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens nuclear factor of activated T cells 2 interacting protein (NFATC2IP), mRNA -p14 -sS'gene_symbol' -p15 -S'NFATC2IP' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_116204.3:p.(Glu185_Glu186delinsAspTer)' -p20 -sS'slr' -p21 -S'NP_116204.3:p.(E185_E186delinsD*)' -p22 -ssS'submitted_variant' -p23 -S'NM_032815.3:c.555_556inv' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_032815.3:c.555_556inv' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'GRCh38' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000016.10:g.28954659_28954660inv' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'16' -p39 -sS'ref' -p40 -S'AG' -p41 -sS'pos' -p42 -S'28954659' -p43 -sS'alt' -p44 -S'CT' -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000016.9:g.28965980_28965981inv' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'AG' -p50 -sg42 -S'28965980' -p51 -sg44 -S'CT' -p52 -sssS'hg38' -p53 -(dp54 -g34 -S'NC_000016.10:g.28954659_28954660inv' -p55 -sg36 -(dp56 -g38 -S'chr16' -p57 -sg40 -S'AG' -p58 -sg42 -S'28954659' -p59 -sg44 -S'CT' -p60 -sssS'hg19' -p61 -(dp62 -g34 -S'NC_000016.9:g.28965980_28965981inv' -p63 -sg36 -(dp64 -g38 -g57 -sg40 -S'AG' -p65 -sg42 -S'28965980' -p66 -sg44 -S'CT' -p67 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant116.txt b/VariantValidator/testing/testOutputsMasterITS/variant116.txt deleted file mode 100644 index 51a14122..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant116.txt +++ /dev/null @@ -1,147 +0,0 @@ -(dp0 -S'NM_006138.4:c.3_4inv' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens membrane spanning 4-domains A3 (MS4A3), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'MS4A3' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_006129.4:p.(Met1?)' -p18 -sS'slr' -p19 -S'NP_006129.4:p.(M1?)' -p20 -ssS'submitted_variant' -p21 -S'NM_006138.4:c.3_4inv' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_006138.4:c.3_4inv' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'GRCh38' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000011.10:g.60061163_60061164inv' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'11' -p37 -sS'ref' -p38 -S'GG' -p39 -sS'pos' -p40 -S'60061163' -p41 -sS'alt' -p42 -S'CC' -p43 -sssS'GRCh37' -p44 -(dp45 -g32 -S'NC_000011.9:g.59828636_59828637inv' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'GG' -p48 -sg40 -S'59828636' -p49 -sg42 -S'CC' -p50 -sssS'hg38' -p51 -(dp52 -g32 -S'NC_000011.10:g.60061163_60061164inv' -p53 -sg34 -(dp54 -g36 -S'chr11' -p55 -sg38 -S'GG' -p56 -sg40 -S'60061163' -p57 -sg42 -S'CC' -p58 -sssS'hg19' -p59 -(dp60 -g32 -S'NC_000011.9:g.59828636_59828637inv' -p61 -sg34 -(dp62 -g36 -g55 -sg38 -S'GG' -p63 -sg40 -S'59828636' -p64 -sg42 -S'CC' -p65 -sssssS'flag' -p66 -S'gene_variant' -p67 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant117.txt b/VariantValidator/testing/testOutputsMasterITS/variant117.txt deleted file mode 100644 index db18fad3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant117.txt +++ /dev/null @@ -1,147 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000038.5:c.3927_3928inv' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens APC, WNT signaling pathway regulator (APC), transcript variant 3, mRNA -p14 -sS'gene_symbol' -p15 -S'APC' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000029.2(LRG_130p1):p.(Glu1309_Lys1310delinsAspTer)' -p20 -sS'slr' -p21 -S'NP_000029.2:p.(E1309_K1310delinsD*)' -p22 -ssS'submitted_variant' -p23 -S'NM_000038.5:c.3927_3928delAAinsTT' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_000038.5:c.3927_3928inv' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'GRCh38' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000005.10:g.112839521_112839522inv' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'5' -p39 -sS'ref' -p40 -S'AA' -p41 -sS'pos' -p42 -S'112839521' -p43 -sS'alt' -p44 -S'TT' -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000005.9:g.112175218_112175219inv' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'AA' -p50 -sg42 -S'112175218' -p51 -sg44 -S'TT' -p52 -sssS'hg38' -p53 -(dp54 -g34 -S'NC_000005.10:g.112839521_112839522inv' -p55 -sg36 -(dp56 -g38 -S'chr5' -p57 -sg40 -S'AA' -p58 -sg42 -S'112839521' -p59 -sg44 -S'TT' -p60 -sssS'hg19' -p61 -(dp62 -g34 -S'NC_000005.9:g.112175218_112175219inv' -p63 -sg36 -(dp64 -g38 -g57 -sg40 -S'AA' -p65 -sg42 -S'112175218' -p66 -sg44 -S'TT' -p67 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant118.txt b/VariantValidator/testing/testOutputsMasterITS/variant118.txt deleted file mode 100644 index b6ef4fdf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant118.txt +++ /dev/null @@ -1,147 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001034853.1:c.2847_2848inv' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens retinitis pigmentosa GTPase regulator (RPGR), transcript variant C, mRNA -p14 -sS'gene_symbol' -p15 -S'RPGR' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001030025.1:p.(Glu949_Glu950delinsAspTer)' -p20 -sS'slr' -p21 -S'NP_001030025.1:p.(E949_E950delinsD*)' -p22 -ssS'submitted_variant' -p23 -S'NM_001034853.1:c.2847_2848delAGinsCT' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_001034853.1:c.2847_2848inv' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000023.10:g.38145404_38145405inv' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chrX' -p39 -sS'ref' -p40 -S'CT' -p41 -sS'pos' -p42 -S'38145404' -p43 -sS'alt' -p44 -S'AG' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000023.11:g.38286151_38286152inv' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'CT' -p50 -sg42 -S'38286151' -p51 -sg44 -S'AG' -p52 -sssS'GRCh37' -p53 -(dp54 -g34 -S'NC_000023.10:g.38145404_38145405inv' -p55 -sg36 -(dp56 -g38 -S'X' -p57 -sg40 -S'CT' -p58 -sg42 -S'38145404' -p59 -sg44 -S'AG' -p60 -sssS'GRCh38' -p61 -(dp62 -g34 -S'NC_000023.11:g.38286151_38286152inv' -p63 -sg36 -(dp64 -g38 -g57 -sg40 -S'CT' -p65 -sg42 -S'38286151' -p66 -sg44 -S'AG' -p67 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant119.txt b/VariantValidator/testing/testOutputsMasterITS/variant119.txt deleted file mode 100644 index b1a2b9a3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant119.txt +++ /dev/null @@ -1,150 +0,0 @@ -(dp0 -S'NM_000088.3:c.4394_4395inv' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_1t1:c.4394_4395inv' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NM_000088.3:c.4392_*2inv normalized to NM_000088.3:c.4394_4395inv' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -S'' -p9 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p13 -sS'gene_symbol' -p14 -S'COL1A1' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000079.2(LRG_1p1):p.(Ter1465PheextTer27)' -p19 -sS'slr' -p20 -S'NP_000079.2:p.(*1465Fext*27)' -p21 -ssS'submitted_variant' -p22 -S'NM_000088.3:c.4392_*2inv' -p23 -sS'genome_context_intronic_sequence' -p24 -g9 -sS'HGVS_LRG_variant' -p25 -S'LRG_1:g.21137_21138inv' -p26 -sS'HGVS_transcript_variant' -p27 -S'NM_000088.3:c.4394_4395inv' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -S'NG_007400.1:g.21137_21138inv' -p30 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000017.10:g.48262863_48262864inv' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr17' -p40 -sS'ref' -p41 -S'TT' -p42 -sS'pos' -p43 -S'48262863' -p44 -sS'alt' -p45 -S'AA' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000017.11:g.50185502_50185503inv' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'TT' -p51 -sg43 -S'50185502' -p52 -sg45 -S'AA' -p53 -sssS'GRCh37' -p54 -(dp55 -g35 -S'NC_000017.10:g.48262863_48262864inv' -p56 -sg37 -(dp57 -g39 -S'17' -p58 -sg41 -S'TT' -p59 -sg43 -S'48262863' -p60 -sg45 -S'AA' -p61 -sssS'GRCh38' -p62 -(dp63 -g35 -S'NC_000017.11:g.50185502_50185503inv' -p64 -sg37 -(dp65 -g39 -g58 -sg41 -S'TT' -p66 -sg43 -S'50185502' -p67 -sg45 -S'AA' -p68 -sssssS'flag' -p69 -S'gene_variant' -p70 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant12.txt b/VariantValidator/testing/testOutputsMasterITS/variant12.txt deleted file mode 100644 index 35e1be1a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant12.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-1_589delinsG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.590del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589-1GG>G automapped to NM_000088.3:c.589-1_589delGGinsG' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_007400.1(NM_000088.3):c.590del' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2(LRG_1p1):p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589-1GG>G' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.589-1_589delinsG' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_1:g.8639del' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000088.3:c.589-1_589delinsG' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_007400.1:g.8639del' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000017.10:g.48275363_48275364delinsC' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr17' -p43 -sS'ref' -p44 -S'AC' -p45 -sS'pos' -p46 -S'48275361' -p47 -sS'alt' -p48 -S'A' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000017.11:g.50198002_50198003delinsC' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'AC' -p54 -sg46 -S'50198000' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000017.10:g.48275363_48275364delinsC' -p58 -sg40 -(dp59 -g42 -S'17' -p60 -sg44 -S'AC' -p61 -sg46 -S'48275361' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000017.11:g.50198002_50198003delinsC' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'AC' -p67 -sg46 -S'50198000' -p68 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant120.txt b/VariantValidator/testing/testOutputsMasterITS/variant120.txt deleted file mode 100644 index 974e47b1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant120.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.4392_*5inv' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.4393_*4inv' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.4392_*5inv' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_1:g.21136_21142inv' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000088.3:c.4392_*5inv' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007400.1:g.21136_21142inv' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.48262858_48262866inv' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'GAGTTTA' -p43 -sS'pos' -p44 -S'48262859' -p45 -sS'alt' -p46 -S'TAAACTC' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50185497_50185505inv' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'GAGTTTA' -p52 -sg44 -S'50185498' -p53 -sg46 -S'TAAACTC' -p54 -sssS'GRCh37' -p55 -(dp56 -g36 -S'NC_000017.10:g.48262858_48262866inv' -p57 -sg38 -(dp58 -g40 -S'17' -p59 -sg42 -S'GAGTTTA' -p60 -sg44 -S'48262859' -p61 -sg46 -S'TAAACTC' -p62 -sssS'GRCh38' -p63 -(dp64 -g36 -S'NC_000017.11:g.50185497_50185505inv' -p65 -sg38 -(dp66 -g40 -g59 -sg42 -S'GAGTTTA' -p67 -sg44 -S'50185498' -p68 -sg46 -S'TAAACTC' -p69 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant121.txt b/VariantValidator/testing/testOutputsMasterITS/variant121.txt deleted file mode 100644 index 1ead316d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant121.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.4390_*7inv' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.4390_*7inv' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.4390_*7inv' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_1:g.21133_21145inv' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000088.3:c.4390_*7inv' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007400.1:g.21133_21145inv' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.48262856_48262868inv' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'AGGGAGTTTACAG' -p43 -sS'pos' -p44 -S'48262856' -p45 -sS'alt' -p46 -S'CTGTAAACTCCCT' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50185495_50185507inv' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'AGGGAGTTTACAG' -p52 -sg44 -S'50185495' -p53 -sg46 -S'CTGTAAACTCCCT' -p54 -sssS'GRCh37' -p55 -(dp56 -g36 -S'NC_000017.10:g.48262856_48262868inv' -p57 -sg38 -(dp58 -g40 -S'17' -p59 -sg42 -S'AGGGAGTTTACAG' -p60 -sg44 -S'48262856' -p61 -sg46 -S'CTGTAAACTCCCT' -p62 -sssS'GRCh38' -p63 -(dp64 -g36 -S'NC_000017.11:g.50185495_50185507inv' -p65 -sg38 -(dp66 -g40 -g59 -sg42 -S'AGGGAGTTTACAG' -p67 -sg44 -S'50185495' -p68 -sg46 -S'CTGTAAACTCCCT' -p69 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant122.txt b/VariantValidator/testing/testOutputsMasterITS/variant122.txt deleted file mode 100644 index 35026e0a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant122.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'insertion length must be 1' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_005732.3:c.2923-5insT' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant123.txt b/VariantValidator/testing/testOutputsMasterITS/variant123.txt deleted file mode 100644 index c8bb17e1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant123.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The given coordinate is outside the bounds of the reference sequence.' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_198283.1(EYS):c.*743120C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant124.txt b/VariantValidator/testing/testOutputsMasterITS/variant124.txt deleted file mode 100644 index c9872a4b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant124.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'start or end or both are beyond the bounds of transcript record' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_133379.4(TTN):c.*265+26591C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant125.txt b/VariantValidator/testing/testOutputsMasterITS/variant125.txt deleted file mode 100644 index 88df4bf3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant125.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-2_589-1delinsG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589-2del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589-2_589-1AG>G automapped to NM_000088.3:c.589-2_589-1delAGinsG' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_007400.1(NM_000088.3):c.589-2del' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2(LRG_1p1):p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589-2_589-1AG>G' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_1:g.8636del' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000088.3:c.589-2_589-1delinsG' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_007400.1:g.8636del' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000017.10:g.48275364_48275365delinsC' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr17' -p43 -sS'ref' -p44 -S'CT' -p45 -sS'pos' -p46 -S'48275364' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000017.11:g.50198003_50198004delinsC' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'CT' -p54 -sg46 -S'50198003' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000017.10:g.48275364_48275365delinsC' -p58 -sg40 -(dp59 -g42 -S'17' -p60 -sg44 -S'CT' -p61 -sg46 -S'48275364' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000017.11:g.50198003_50198004delinsC' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'CT' -p67 -sg46 -S'50198003' -p68 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant126.txt b/VariantValidator/testing/testOutputsMasterITS/variant126.txt deleted file mode 100644 index aada6408..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant126.txt +++ /dev/null @@ -1,146 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.642+1_642+2delinsG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.642+2del' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_007400.1(NM_000088.3):c.642+2del' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.642+1_642+2delGTinsG' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' -p26 -sS'HGVS_LRG_variant' -p27 -S'LRG_1:g.8693del' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000088.3:c.642+1_642+2delinsG' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_007400.1:g.8693del' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000017.10:g.48275308_48275309delinsC' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr17' -p42 -sS'ref' -p43 -S'TA' -p44 -sS'pos' -p45 -S'48275307' -p46 -sS'alt' -p47 -S'T' -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000017.11:g.50197947_50197948delinsC' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'TA' -p53 -sg45 -S'50197946' -p54 -sg47 -g48 -sssS'GRCh37' -p55 -(dp56 -g37 -S'NC_000017.10:g.48275308_48275309delinsC' -p57 -sg39 -(dp58 -g41 -S'17' -p59 -sg43 -S'TA' -p60 -sg45 -S'48275307' -p61 -sg47 -g48 -sssS'GRCh38' -p62 -(dp63 -g37 -S'NC_000017.11:g.50197947_50197948delinsC' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -S'TA' -p66 -sg45 -S'50197946' -p67 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant127.txt b/VariantValidator/testing/testOutputsMasterITS/variant127.txt deleted file mode 100644 index 6a73068d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant127.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'insertion length must be 1' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_004415.3:c.1-1insA' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant128.txt b/VariantValidator/testing/testOutputsMasterITS/variant128.txt deleted file mode 100644 index 3c22dbb3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant128.txt +++ /dev/null @@ -1,144 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_004415.3:c.-1_1insA' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens desmoplakin (DSP), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'DSP' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_004406.2(LRG_423p1):p.(Met1?)' -p20 -sS'slr' -p21 -S'NP_004406.2:p.(M1?)' -p22 -ssS'submitted_variant' -p23 -S'NM_004415.3:c.-1_1insA' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_004415.3:c.-1_1insA' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000006.11:g.7542148_7542149insA' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr6' -p39 -sS'ref' -p40 -S'A' -p41 -sS'pos' -p42 -S'7542149' -p43 -sS'alt' -p44 -S'AA' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000006.12:g.7541915_7541916insA' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'7541916' -p50 -sg44 -S'AA' -p51 -sssS'GRCh37' -p52 -(dp53 -g34 -S'NC_000006.11:g.7542148_7542149insA' -p54 -sg36 -(dp55 -g38 -S'6' -p56 -sg40 -g41 -sg42 -S'7542149' -p57 -sg44 -S'AA' -p58 -sssS'GRCh38' -p59 -(dp60 -g34 -S'NC_000006.12:g.7541915_7541916insA' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -g41 -sg42 -S'7541916' -p63 -sg44 -S'AA' -p64 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant129.txt b/VariantValidator/testing/testOutputsMasterITS/variant129.txt deleted file mode 100644 index 68b7255f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant129.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'start or end or both are beyond the bounds of transcript record' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000273.2:c.1-5028_253del' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant13.txt b/VariantValidator/testing/testOutputsMasterITS/variant13.txt deleted file mode 100644 index 5e50fc43..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant13.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.642+1_642+2delinsG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.642+2del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.642+1GT>G automapped to NM_000088.3:c.642+1_642+2delGTinsG' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_007400.1(NM_000088.3):c.642+2del' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2(LRG_1p1):p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.642+1GT>G' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_1:g.8693del' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000088.3:c.642+1_642+2delinsG' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_007400.1:g.8693del' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000017.10:g.48275308_48275309delinsC' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr17' -p43 -sS'ref' -p44 -S'TA' -p45 -sS'pos' -p46 -S'48275307' -p47 -sS'alt' -p48 -S'T' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000017.11:g.50197947_50197948delinsC' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'TA' -p54 -sg46 -S'50197946' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000017.10:g.48275308_48275309delinsC' -p58 -sg40 -(dp59 -g42 -S'17' -p60 -sg44 -S'TA' -p61 -sg46 -S'48275307' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000017.11:g.50197947_50197948delinsC' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'TA' -p67 -sg46 -S'50197946' -p68 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant130.txt b/VariantValidator/testing/testOutputsMasterITS/variant130.txt deleted file mode 100644 index d75823e0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant130.txt +++ /dev/null @@ -1,111 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_002929.2:c.1006C>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'NM_002929.2:c.1006C>T cannot be mapped directly to genome build GRCh37' -p10 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens G protein-coupled receptor kinase 1 (GRK1), mRNA -p16 -sS'gene_symbol' -p17 -S'GRK1' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_002920.1:p.(Leu336Phe)' -p22 -sS'slr' -p23 -S'NP_002920.1:p.(L336F)' -p24 -ssS'submitted_variant' -p25 -S'NM_002929.2:c.1006C>T' -p26 -sS'genome_context_intronic_sequence' -p27 -g6 -sS'HGVS_LRG_variant' -p28 -g6 -sS'HGVS_transcript_variant' -p29 -S'NM_002929.2:c.1006C>T' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'GRCh38' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000013.11:g.113723094C>T' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'13' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'113723094' -p45 -sS'alt' -p46 -VT -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000013.11:g.113723094C>T' -p50 -sg38 -(dp51 -g40 -S'chr13' -p52 -sg42 -g43 -sg44 -S'113723094' -p53 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant131.txt b/VariantValidator/testing/testOutputsMasterITS/variant131.txt deleted file mode 100644 index ad33c250..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant131.txt +++ /dev/null @@ -1,141 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NR_125367.1:n.167+18165G>A' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens myosin heavy chain gene cluster antisense RNA (MYHAS), long non-coding RNA -p14 -sS'gene_symbol' -p15 -S'MYHAS' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'Non-coding :n.' -p20 -sS'slr' -p21 -g6 -ssS'submitted_variant' -p22 -S'NR_125367.1:n.167+18165G>A' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000017.10(NR_125367.1):c.167+18165G>A' -p25 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NR_125367.1:n.167+18165G>A' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000017.10:g.10327720G>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -S'G' -p41 -sS'pos' -p42 -S'10327720' -p43 -sS'alt' -p44 -S'A' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.10424403G>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'10424403' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000017.10:g.10327720G>A' -p53 -sg36 -(dp54 -g38 -S'17' -p55 -sg40 -g41 -sg42 -S'10327720' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000017.11:g.10424403G>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'10424403' -p61 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant132.txt b/VariantValidator/testing/testOutputsMasterITS/variant132.txt deleted file mode 100644 index 1d20ec5b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant132.txt +++ /dev/null @@ -1,60 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Variant coordinate is out of the bound of CDS region (CDS length ' -p7 -aS'2673)' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_006005.3:c.3071_3073delinsTTA' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'flag' -p26 -S'warning' -p27 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant133.txt b/VariantValidator/testing/testOutputsMasterITS/variant133.txt deleted file mode 100644 index 6c61b2c6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant133.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. Did you mean NM_000089.3:c.1504_1506del?' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000089.3:n.1504_1506del' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant134.txt b/VariantValidator/testing/testOutputsMasterITS/variant134.txt deleted file mode 100644 index 123957e1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant134.txt +++ /dev/null @@ -1,135 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -g4 -sS'alt_genomic_loci' -p8 -(lp9 -sS'transcript_description' -p10 -S'Homo sapiens mitochondrion, complete genome' -p11 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NC_012920.1:m.1011C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -S'hg19' -p25 -(dp26 -S'HGVS_genomic_description' -p27 -S'NC_012920.1:m.1011C>T' -p28 -sS'vcf' -p29 -(dp30 -S'chr' -p31 -S'chrM' -p32 -sS'ref' -p33 -S'C' -p34 -sS'pos' -p35 -S'1011' -p36 -sS'alt' -p37 -S'T' -p38 -sssS'hg38' -p39 -(dp40 -g27 -S'NC_012920.1:m.1011C>T' -p41 -sg29 -(dp42 -g31 -g32 -sg33 -g34 -sg35 -S'1011' -p43 -sg37 -g38 -sssS'GRCh37' -p44 -(dp45 -g27 -S'NC_012920.1:m.1011C>T' -p46 -sg29 -(dp47 -g31 -S'M' -p48 -sg33 -g34 -sg35 -S'1011' -p49 -sg37 -g38 -sssS'GRCh38' -p50 -(dp51 -g27 -S'NC_012920.1:m.1011C>T' -p52 -sg29 -(dp53 -g31 -g48 -sg33 -g34 -sg35 -S'1011' -p54 -sg37 -g38 -sssssS'flag' -p55 -S'warning' -p56 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant135.txt b/VariantValidator/testing/testOutputsMasterITS/variant135.txt deleted file mode 100644 index 02aa5541..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant135.txt +++ /dev/null @@ -1,219 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_014611.1:c.9879T>C' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)' -p9 -aS'NM_014611.2:c.9879C= MUST be fully validated prior to use in reports' -p10 -aS'select_variants=NM_014611.2:c.9879C=' -p11 -aS'RefSeqGene record not available' -p12 -asS'RefSeqGene_context_intronic_sequence' -p13 -g6 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA -p17 -sS'gene_symbol' -p18 -S'MDN1' -p19 -sS'HGVS_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_055426.1:p.(Val3293=)' -p23 -sS'slr' -p24 -S'NP_055426.1:p.(V3293=)' -p25 -ssS'submitted_variant' -p26 -S'NC_000006.11:g.90403795G=' -p27 -sS'genome_context_intronic_sequence' -p28 -g6 -sS'HGVS_LRG_variant' -p29 -g6 -sS'HGVS_transcript_variant' -p30 -S'NM_014611.1:c.9879T>C' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -g6 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000006.11:g.90403795G=' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr6' -p42 -sS'ref' -p43 -S'G' -p44 -sS'pos' -p45 -S'90403795' -p46 -sS'alt' -p47 -g44 -sssS'GRCh37' -p48 -(dp49 -g37 -S'NC_000006.11:g.90403795G=' -p50 -sg39 -(dp51 -g41 -S'6' -p52 -sg43 -g44 -sg45 -S'90403795' -p53 -sg47 -g44 -sssssS'NM_014611.2:c.9879C=' -p54 -(dp55 -g5 -g6 -sg7 -(lp56 -S'RefSeqGene record not available' -p57 -asg13 -g6 -sg14 -(lp58 -sg16 -VHomo sapiens midasin AAA ATPase 1 (MDN1), mRNA -p59 -sg18 -S'MDN1' -p60 -sg20 -(dp61 -g22 -S'NP_055426.1:p.(Val3293=)' -p62 -sg24 -S'NP_055426.1:p.(V3293=)' -p63 -ssg26 -g27 -sg28 -g6 -sg29 -g6 -sg30 -S'NM_014611.2:c.9879C=' -p64 -sg32 -g6 -sg33 -(dp65 -g35 -(dp66 -g37 -S'NC_000006.11:g.90403795G=' -p67 -sg39 -(dp68 -g41 -g42 -sg43 -VG -p69 -sg45 -S'90403795' -p70 -sg47 -g69 -sssS'hg38' -p71 -(dp72 -g37 -S'NC_000006.12:g.89694076G=' -p73 -sg39 -(dp74 -g41 -g42 -sg43 -g69 -sg45 -S'89694076' -p75 -sg47 -g69 -sssg48 -(dp76 -g37 -S'NC_000006.11:g.90403795G=' -p77 -sg39 -(dp78 -g41 -g52 -sg43 -g69 -sg45 -S'90403795' -p79 -sg47 -g69 -sssS'GRCh38' -p80 -(dp81 -g37 -S'NC_000006.12:g.89694076G=' -p82 -sg39 -(dp83 -g41 -g52 -sg43 -g69 -sg45 -S'89694076' -p84 -sg47 -g69 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant136.txt b/VariantValidator/testing/testOutputsMasterITS/variant136.txt deleted file mode 100644 index fe32f47c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant136.txt +++ /dev/null @@ -1,265 +0,0 @@ -(dp0 -S'NM_000130.4:c.1602del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_553t1:c.1601del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat 1-169519049-T-. as a deletion whereas VCF specification 4.1 onwards would treat 1-169519049-T-. as ALT = REF' -p7 -aS'VariantValidator has output both alternatives' -p8 -aS'NC_000001.10:g.169519048TT>T automapped to NC_000001.10:g.169519049delT' -p9 -aS'NM_000130.4:c.1601del normalized to NM_000130.4:c.1602del' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens coagulation factor V (F5), mRNA -p16 -sS'gene_symbol' -p17 -S'F5' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_000121.2(LRG_553p1):p.(Arg534GlnfsTer40)' -p22 -sS'slr' -p23 -S'NP_000121.2:p.(R534Qfs*40)' -p24 -ssS'submitted_variant' -p25 -S'1-169519049-T-.' -p26 -sS'genome_context_intronic_sequence' -p27 -g12 -sS'HGVS_LRG_variant' -p28 -S'LRG_553:g.41721del' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000130.4:c.1602del' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_011806.1:g.41721del' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000001.10:g.169519048del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr1' -p43 -sS'ref' -p44 -S'CT' -p45 -sS'pos' -p46 -S'169519047' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000001.11:g.169549810del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'CT' -p54 -sg46 -S'169549809' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000001.10:g.169519048del' -p58 -sg40 -(dp59 -g42 -S'1' -p60 -sg44 -S'CT' -p61 -sg46 -S'169519047' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000001.11:g.169549810del' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'CT' -p67 -sg46 -S'169549809' -p68 -sg48 -g49 -sssssS'flag' -p69 -S'gene_variant' -p70 -sS'NM_000130.4:c.1601G>A' -p71 -(dp72 -g3 -S'LRG_553t1:c.1601G>A' -p73 -sg5 -(lp74 -S'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat 1-169519049-T-. as a deletion whereas VCF specification 4.1 onwards would treat 1-169519049-T-. as ALT = REF' -p75 -aS'VariantValidator has output both alternatives' -p76 -asg11 -g12 -sg13 -(lp77 -sg15 -VHomo sapiens coagulation factor V (F5), mRNA -p78 -sg17 -S'F5' -p79 -sg19 -(dp80 -g21 -S'NP_000121.2(LRG_553p1):p.(Arg534Gln)' -p81 -sg23 -S'NP_000121.2:p.(R534Q)' -p82 -ssg25 -g26 -sg27 -g12 -sg28 -S'LRG_553:g.41721G>A' -p83 -sg30 -S'NM_000130.4:c.1601G>A' -p84 -sg32 -S'NG_011806.1:g.41721G>A' -p85 -sg34 -(dp86 -g36 -(dp87 -g38 -S'NC_000001.10:g.169519049T=' -p88 -sg40 -(dp89 -g42 -g43 -sg44 -S'T' -p90 -sg46 -S'169519049' -p91 -sg48 -g90 -sssg50 -(dp92 -g38 -S'NC_000001.11:g.169549811C>T' -p93 -sg40 -(dp94 -g42 -g43 -sg44 -VC -p95 -sg46 -S'169549811' -p96 -sg48 -VT -p97 -sssg56 -(dp98 -g38 -S'NC_000001.10:g.169519049T=' -p99 -sg40 -(dp100 -g42 -g60 -sg44 -g90 -sg46 -S'169519049' -p101 -sg48 -g90 -sssg63 -(dp102 -g38 -S'NC_000001.11:g.169549811C>T' -p103 -sg40 -(dp104 -g42 -g60 -sg44 -g95 -sg46 -S'169549811' -p105 -sg48 -g97 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant137.txt b/VariantValidator/testing/testOutputsMasterITS/variant137.txt deleted file mode 100644 index 539b6ce2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant137.txt +++ /dev/null @@ -1,844 +0,0 @@ -(dp0 -S'NM_001204317.1:c.856-9155_856-9154=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA -p12 -sS'gene_symbol' -p13 -S'PRLR' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001191246.1:p.?' -p18 -sS'slr' -p19 -S'NP_001191246.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'NC_000005.9:g.35058667_35058668AG=' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000005.9(NM_001204317.1):c.856-9155_856-9154=' -p24 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001204317.1:c.856-9155_856-9154=' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'GRCh38' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000005.10:g.35058562_35058563=' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'5' -p38 -sS'ref' -p39 -S'AA' -p40 -sS'pos' -p41 -S'35058562' -p42 -sS'alt' -p43 -g40 -sssS'GRCh37' -p44 -(dp45 -g33 -S'NC_000005.9:g.35058667_35058668=' -p46 -sg35 -(dp47 -g37 -g38 -sg39 -S'AG' -p48 -sg41 -S'35058667' -p49 -sg43 -g48 -sssS'hg38' -p50 -(dp51 -g33 -S'NC_000005.10:g.35058562_35058563=' -p52 -sg35 -(dp53 -g37 -S'chr5' -p54 -sg39 -g40 -sg41 -S'35058562' -p55 -sg43 -g40 -sssS'hg19' -p56 -(dp57 -g33 -S'NC_000005.9:g.35058667_35058668=' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g48 -sg41 -S'35058667' -p60 -sg43 -g48 -sssssS'NM_001204316.1:c.1009+7383_1009+7384=' -p61 -(dp62 -g3 -g4 -sg5 -(lp63 -S'RefSeqGene record not available' -p64 -asg8 -g4 -sg9 -(lp65 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA -p66 -sg13 -S'PRLR' -p67 -sg15 -(dp68 -g17 -S'NP_001191245.1:p.?' -p69 -sg19 -S'NP_001191245.1:p.?' -p70 -ssg21 -g22 -sg23 -S'NC_000005.9(NM_001204316.1):c.1009+7383_1009+7384=' -p71 -sg25 -g4 -sg26 -S'NM_001204316.1:c.1009+7383_1009+7384=' -p72 -sg28 -g4 -sg29 -(dp73 -g31 -(dp74 -g33 -S'NC_000005.10:g.35058565_35058566=' -p75 -sg35 -(dp76 -g37 -g38 -sg39 -S'AT' -p77 -sg41 -S'35058565' -p78 -sg43 -g77 -sssg44 -(dp79 -g33 -S'NC_000005.9:g.35058667_35058668=' -p80 -sg35 -(dp81 -g37 -g38 -sg39 -g48 -sg41 -S'35058667' -p82 -sg43 -g48 -sssg50 -(dp83 -g33 -S'NC_000005.10:g.35058565_35058566=' -p84 -sg35 -(dp85 -g37 -g54 -sg39 -g77 -sg41 -S'35058565' -p86 -sg43 -g77 -sssg56 -(dp87 -g33 -S'NC_000005.9:g.35058667_35058668=' -p88 -sg35 -(dp89 -g37 -g54 -sg39 -g48 -sg41 -S'35058667' -p90 -sg43 -g48 -sssssS'NM_001204314.2:c.*6528del' -p91 -(dp92 -g3 -g4 -sg5 -(lp93 -S'The displayed variants may be artefacts of aligning NM_001204314.2 with genome build GRCh37' -p94 -aS'NM_001204314.2:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' -p95 -aS'Caution should be used when reporting the displayed variant descriptions' -p96 -aS'If you are unsure, please contact admin' -p97 -aS'RefSeqGene record not available' -p98 -asg8 -g4 -sg9 -(lp99 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA -p100 -sg13 -S'PRLR' -p101 -sg15 -(dp102 -g17 -S'NP_001191243.1:p.?' -p103 -sg19 -S'NP_001191243.1:p.?' -p104 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_001204314.2:c.*6528del' -p105 -sg28 -g4 -sg29 -(dp106 -g31 -(dp107 -g33 -S'NC_000005.10:g.35058563del' -p108 -sg35 -(dp109 -g37 -g38 -sg39 -S'CA' -p110 -sg41 -S'35058560' -p111 -sg43 -S'C' -p112 -sssg44 -(dp113 -g33 -S'NC_000005.9:g.35058662_35058668=' -p114 -sg35 -(dp115 -g37 -g38 -sg39 -S'AGACAAG' -p116 -sg41 -S'35058662' -p117 -sg43 -g116 -sssg50 -(dp118 -g33 -S'NC_000005.10:g.35058563del' -p119 -sg35 -(dp120 -g37 -g54 -sg39 -S'CA' -p121 -sg41 -S'35058560' -p122 -sg43 -g112 -sssg56 -(dp123 -g33 -S'NC_000005.9:g.35058662_35058668=' -p124 -sg35 -(dp125 -g37 -g54 -sg39 -g116 -sg41 -S'35058662' -p126 -sg43 -g116 -sssssS'NM_001204318.1:c.686-9155_686-9154=' -p127 -(dp128 -g3 -g4 -sg5 -(lp129 -S'RefSeqGene record not available' -p130 -asg8 -g4 -sg9 -(lp131 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA -p132 -sg13 -S'PRLR' -p133 -sg15 -(dp134 -g17 -S'NP_001191247.1:p.?' -p135 -sg19 -S'NP_001191247.1:p.?' -p136 -ssg21 -g22 -sg23 -S'NC_000005.9(NM_001204318.1):c.686-9155_686-9154=' -p137 -sg25 -g4 -sg26 -S'NM_001204318.1:c.686-9155_686-9154=' -p138 -sg28 -g4 -sg29 -(dp139 -g31 -(dp140 -g33 -S'NC_000005.10:g.35058562_35058563=' -p141 -sg35 -(dp142 -g37 -g38 -sg39 -g40 -sg41 -S'35058562' -p143 -sg43 -g40 -sssg44 -(dp144 -g33 -S'NC_000005.9:g.35058667_35058668=' -p145 -sg35 -(dp146 -g37 -g38 -sg39 -g48 -sg41 -S'35058667' -p147 -sg43 -g48 -sssg50 -(dp148 -g33 -S'NC_000005.10:g.35058562_35058563=' -p149 -sg35 -(dp150 -g37 -g54 -sg39 -g40 -sg41 -S'35058562' -p151 -sg43 -g40 -sssg56 -(dp152 -g33 -S'NC_000005.9:g.35058667_35058668=' -p153 -sg35 -(dp154 -g37 -g54 -sg39 -g48 -sg41 -S'35058667' -p155 -sg43 -g48 -sssssS'NR_037910.1:n.828-9155_828-9154=' -p156 -(dp157 -g3 -g4 -sg5 -(lp158 -S'RefSeqGene record not available' -p159 -asg8 -g4 -sg9 -(lp160 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 7, non-coding RNA -p161 -sg13 -S'PRLR' -p162 -sg15 -(dp163 -g17 -S'Non-coding :n.' -p164 -sg19 -g4 -ssg21 -g22 -sg23 -S'NC_000005.9(NR_037910.1):c.828-9155_828-9154=' -p165 -sg25 -g4 -sg26 -S'NR_037910.1:n.828-9155_828-9154=' -p166 -sg28 -g4 -sg29 -(dp167 -g31 -(dp168 -g33 -S'NC_000005.10:g.35058562_35058563=' -p169 -sg35 -(dp170 -g37 -g38 -sg39 -g40 -sg41 -S'35058562' -p171 -sg43 -g40 -sssg44 -(dp172 -g33 -S'NC_000005.9:g.35058667_35058668=' -p173 -sg35 -(dp174 -g37 -g38 -sg39 -g48 -sg41 -S'35058667' -p175 -sg43 -g48 -sssg50 -(dp176 -g33 -S'NC_000005.10:g.35058562_35058563=' -p177 -sg35 -(dp178 -g37 -g54 -sg39 -g40 -sg41 -S'35058562' -p179 -sg43 -g40 -sssg56 -(dp180 -g33 -S'NC_000005.9:g.35058667_35058668=' -p181 -sg35 -(dp182 -g37 -g54 -sg39 -g48 -sg41 -S'35058667' -p183 -sg43 -g48 -sssssS'flag' -p184 -S'gene_variant' -p185 -sS'NM_000949.5:c.*6523_*6524=' -p186 -(dp187 -g3 -g4 -sg5 -(lp188 -S'A more recent version of the selected reference sequence NM_000949.5 is available (NM_000949.6)' -p189 -aS'NM_000949.6:c.*6523_*6524delATinsCT MUST be fully validated prior to use in reports' -p190 -aS'select_variants=NM_000949.6:c.*6523_*6524delinsCT' -p191 -asg8 -g4 -sg9 -(lp192 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA -p193 -sg13 -S'PRLR' -p194 -sg15 -(dp195 -g17 -S'NP_000940.1:p.?' -p196 -sg19 -S'NP_000940.1:p.?' -p197 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_000949.5:c.*6523_*6524=' -p198 -sg28 -S'NG_029042.1:g.177156_177157=' -p199 -sg29 -(dp200 -g56 -(dp201 -g33 -S'NC_000005.9:g.35058666_35058669=' -p202 -sg35 -(dp203 -g37 -g54 -sg39 -VAAGA -p204 -sg41 -S'35058666' -p205 -sg43 -g204 -sssg44 -(dp206 -g33 -S'NC_000005.9:g.35058666_35058669=' -p207 -sg35 -(dp208 -g37 -g38 -sg39 -g204 -sg41 -S'35058666' -p209 -sg43 -g204 -sssssS'NM_001204314.1:c.*6523_*6524=' -p210 -(dp211 -g3 -g4 -sg5 -(lp212 -S'A more recent version of the selected reference sequence NM_001204314.1 is available (NM_001204314.2)' -p213 -aS'NM_001204314.2:c.*6523_*6524delATinsCT MUST be fully validated prior to use in reports' -p214 -aS'select_variants=NM_001204314.2:c.*6523_*6524delinsCT' -p215 -aS'RefSeqGene record not available' -p216 -asg8 -g4 -sg9 -(lp217 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA -p218 -sg13 -S'PRLR' -p219 -sg15 -(dp220 -g17 -S'NP_001191243.1:p.?' -p221 -sg19 -S'NP_001191243.1:p.?' -p222 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_001204314.1:c.*6523_*6524=' -p223 -sg28 -g4 -sg29 -(dp224 -g56 -(dp225 -g33 -S'NC_000005.9:g.35058667_35058668=' -p226 -sg35 -(dp227 -g37 -g54 -sg39 -VAG -p228 -sg41 -S'35058667' -p229 -sg43 -g228 -sssg44 -(dp230 -g33 -S'NC_000005.9:g.35058667_35058668=' -p231 -sg35 -(dp232 -g37 -g38 -sg39 -g228 -sg41 -S'35058667' -p233 -sg43 -g228 -sssssS'NM_000949.6:c.*6528del' -p234 -(dp235 -g3 -g4 -sg5 -(lp236 -S'The displayed variants may be artefacts of aligning NM_000949.6 with genome build GRCh37' -p237 -aS'NM_000949.6:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' -p238 -aS'Caution should be used when reporting the displayed variant descriptions' -p239 -aS'If you are unsure, please contact admin' -p240 -aS'RefSeqGene record not available' -p241 -asg8 -g4 -sg9 -(lp242 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA -p243 -sg13 -S'PRLR' -p244 -sg15 -(dp245 -g17 -S'NP_000940.1:p.?' -p246 -sg19 -S'NP_000940.1:p.?' -p247 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_000949.6:c.*6528del' -p248 -sg28 -g4 -sg29 -(dp249 -g31 -(dp250 -g33 -S'NC_000005.10:g.35058563del' -p251 -sg35 -(dp252 -g37 -g38 -sg39 -S'CA' -p253 -sg41 -S'35058560' -p254 -sg43 -g112 -sssg44 -(dp255 -g33 -S'NC_000005.9:g.35058662_35058668=' -p256 -sg35 -(dp257 -g37 -g38 -sg39 -g116 -sg41 -S'35058662' -p258 -sg43 -g116 -sssg50 -(dp259 -g33 -S'NC_000005.10:g.35058563del' -p260 -sg35 -(dp261 -g37 -g54 -sg39 -S'CA' -p262 -sg41 -S'35058560' -p263 -sg43 -g112 -sssg56 -(dp264 -g33 -S'NC_000005.9:g.35058662_35058668=' -p265 -sg35 -(dp266 -g37 -g54 -sg39 -g116 -sg41 -S'35058662' -p267 -sg43 -g116 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant138.txt b/VariantValidator/testing/testOutputsMasterITS/variant138.txt deleted file mode 100644 index e3c8ec69..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant138.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Required information for NM_000251.1 is missing from the Universal Transcript Archive, please select an alternative version of NM_000251.1 by submitting NM_000251.1 or MSH2 to https://variantvalidator.org/ref_finder/, or select an alternative genome build' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000251.1:c.1296_1348del' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant139.txt b/VariantValidator/testing/testOutputsMasterITS/variant139.txt deleted file mode 100644 index 70f9f50b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant139.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.2024_2028+1del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.2024_2028+1del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.2023_2028del normalized to NM_000088.3:c.2024_2028+1del' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_007400.1(NM_000088.3):c.2024_2028+1del' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)' -p21 -sS'slr' -p22 -S'NP_000079.2:p.(A675_R676del)' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.2023_2028del' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.2024_2028+1del' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_1:g.14656_14661del' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000088.3:c.2024_2028+1del' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_007400.1:g.14656_14661del' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000017.10:g.48269340_48269345del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr17' -p43 -sS'ref' -p44 -S'ACTCTTG' -p45 -sS'pos' -p46 -S'48269339' -p47 -sS'alt' -p48 -S'A' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000017.11:g.50191979_50191984del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'ACTCTTG' -p54 -sg46 -S'50191978' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000017.10:g.48269340_48269345del' -p58 -sg40 -(dp59 -g42 -S'17' -p60 -sg44 -S'ACTCTTG' -p61 -sg46 -S'48269339' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000017.11:g.50191979_50191984del' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'ACTCTTG' -p67 -sg46 -S'50191978' -p68 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant14.txt b/VariantValidator/testing/testOutputsMasterITS/variant14.txt deleted file mode 100644 index dc58e11e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant14.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-2_589-1delinsG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589-2del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589-2AG>G automapped to NM_000088.3:c.589-2_589-1delAGinsG' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_007400.1(NM_000088.3):c.589-2del' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2(LRG_1p1):p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589-2AG>G' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_1:g.8636del' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000088.3:c.589-2_589-1delinsG' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_007400.1:g.8636del' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000017.10:g.48275364_48275365delinsC' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr17' -p43 -sS'ref' -p44 -S'CT' -p45 -sS'pos' -p46 -S'48275364' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000017.11:g.50198003_50198004delinsC' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'CT' -p54 -sg46 -S'50198003' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000017.10:g.48275364_48275365delinsC' -p58 -sg40 -(dp59 -g42 -S'17' -p60 -sg44 -S'CT' -p61 -sg46 -S'48275364' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000017.11:g.50198003_50198004delinsC' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'CT' -p67 -sg46 -S'50198003' -p68 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant140.txt b/VariantValidator/testing/testOutputsMasterITS/variant140.txt deleted file mode 100644 index c4eca449..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant140.txt +++ /dev/null @@ -1,146 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.2024_2028+1del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.2024_2028+1del' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_007400.1(NM_000088.3):c.2024_2028+1del' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.2024_2028+1del' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.2024_2028+1del' -p26 -sS'HGVS_LRG_variant' -p27 -S'LRG_1:g.14656_14661del' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000088.3:c.2024_2028+1del' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_007400.1:g.14656_14661del' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000017.10:g.48269340_48269345del' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr17' -p42 -sS'ref' -p43 -S'ACTCTTG' -p44 -sS'pos' -p45 -S'48269339' -p46 -sS'alt' -p47 -S'A' -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000017.11:g.50191979_50191984del' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'ACTCTTG' -p53 -sg45 -S'50191978' -p54 -sg47 -g48 -sssS'GRCh37' -p55 -(dp56 -g37 -S'NC_000017.10:g.48269340_48269345del' -p57 -sg39 -(dp58 -g41 -S'17' -p59 -sg43 -S'ACTCTTG' -p60 -sg45 -S'48269339' -p61 -sg47 -g48 -sssS'GRCh38' -p62 -(dp63 -g37 -S'NC_000017.11:g.50191979_50191984del' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -S'ACTCTTG' -p66 -sg45 -S'50191978' -p67 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant141.txt b/VariantValidator/testing/testOutputsMasterITS/variant141.txt deleted file mode 100644 index 683e7fcd..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant141.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Unable to map ENST00000450616.1 to an equivalent RefSeq transcript' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'ENST00000450616.1:n.31+1G>C' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant142.txt b/VariantValidator/testing/testOutputsMasterITS/variant142.txt deleted file mode 100644 index d492daf6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant142.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Unable to map ENST00000491747 to an equivalent RefSeq transcript' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'ENST00000491747:c.5071A>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant143.txt b/VariantValidator/testing/testOutputsMasterITS/variant143.txt deleted file mode 100644 index 90a14d52..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant143.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589G>T' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.(Gly197Cys)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(G197C)' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.589G>T' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_1:g.8638G>T' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000088.3:c.589G>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007400.1:g.8638G>T' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.48275363C>A' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'48275363' -p45 -sS'alt' -p46 -VA -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50198002C>A' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'50198002' -p52 -sg46 -g47 -sssS'GRCh37' -p53 -(dp54 -g36 -S'NC_000017.10:g.48275363C>A' -p55 -sg38 -(dp56 -g40 -S'17' -p57 -sg42 -g43 -sg44 -S'48275363' -p58 -sg46 -g47 -sssS'GRCh38' -p59 -(dp60 -g36 -S'NC_000017.11:g.50198002C>A' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g43 -sg44 -S'50198002' -p63 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant144.txt b/VariantValidator/testing/testOutputsMasterITS/variant144.txt deleted file mode 100644 index ada5af5a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant144.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589G>T' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.(Gly197Cys)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(G197C)' -p22 -ssS'submitted_variant' -p23 -S'NG_007400.1:g.8638G>T' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_1:g.8638G>T' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000088.3:c.589G>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007400.1:g.8638G>T' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.48275363C>A' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'48275363' -p45 -sS'alt' -p46 -VA -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50198002C>A' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'50198002' -p52 -sg46 -g47 -sssS'GRCh37' -p53 -(dp54 -g36 -S'NC_000017.10:g.48275363C>A' -p55 -sg38 -(dp56 -g40 -S'17' -p57 -sg42 -g43 -sg44 -S'48275363' -p58 -sg46 -g47 -sssS'GRCh38' -p59 -(dp60 -g36 -S'NC_000017.11:g.50198002C>A' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g43 -sg44 -S'50198002' -p63 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant145.txt b/VariantValidator/testing/testOutputsMasterITS/variant145.txt deleted file mode 100644 index 8434f3b6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant145.txt +++ /dev/null @@ -1,144 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589G>T' -p6 -sS'validation_warnings' -p7 -(lp8 -S'LRG_1:g.8638G>T automapped to NG_007400.1:g.8638G>T' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2(LRG_1p1):p.(Gly197Cys)' -p21 -sS'slr' -p22 -S'NP_000079.2:p.(G197C)' -p23 -ssS'submitted_variant' -p24 -S'LRG_1:g.8638G>T' -p25 -sS'genome_context_intronic_sequence' -p26 -g11 -sS'HGVS_LRG_variant' -p27 -S'LRG_1:g.8638G>T' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000088.3:c.589G>T' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_007400.1:g.8638G>T' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000017.10:g.48275363C>A' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr17' -p42 -sS'ref' -p43 -VC -p44 -sS'pos' -p45 -S'48275363' -p46 -sS'alt' -p47 -VA -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000017.11:g.50198002C>A' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'50198002' -p53 -sg47 -g48 -sssS'GRCh37' -p54 -(dp55 -g37 -S'NC_000017.10:g.48275363C>A' -p56 -sg39 -(dp57 -g41 -S'17' -p58 -sg43 -g44 -sg45 -S'48275363' -p59 -sg47 -g48 -sssS'GRCh38' -p60 -(dp61 -g37 -S'NC_000017.11:g.50198002C>A' -p62 -sg39 -(dp63 -g41 -g58 -sg43 -g44 -sg45 -S'50198002' -p64 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant146.txt b/VariantValidator/testing/testOutputsMasterITS/variant146.txt deleted file mode 100644 index c1cc0e06..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant146.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'VariantValidator cannot recover information for transcript LRG_1T1 beacuse it is not available in the Universal Transcript Archive' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'LRG_1t1:c.589G>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant147.txt b/VariantValidator/testing/testOutputsMasterITS/variant147.txt deleted file mode 100644 index 672f9cb9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant147.txt +++ /dev/null @@ -1,636 +0,0 @@ -(dp0 -S'NM_001040113.1:c.3055_3056inv' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -g4 -sS'alt_genomic_loci' -p8 -(lp9 -(dp10 -S'GRCh38' -p11 -(dp12 -S'HGVS_genomic_description' -p13 -S'NT_187607.1:g.1396662_1396663inv' -p14 -sS'vcf' -p15 -(dp16 -S'chr' -p17 -S'HSCHR16_1_CTG1' -p18 -sS'ref' -p19 -S'GT' -p20 -sS'pos' -p21 -S'1396662' -p22 -sS'alt' -p23 -S'AC' -p24 -sssa(dp25 -S'hg38' -p26 -(dp27 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p28 -sg15 -(dp29 -g17 -S'chr16_KI270853v1_alt' -p30 -sg19 -S'GT' -p31 -sg21 -S'1396662' -p32 -sg23 -S'AC' -p33 -sssasS'transcript_description' -p34 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA -p35 -sS'gene_symbol' -p36 -S'MYH11' -p37 -sS'HGVS_predicted_protein_consequence' -p38 -(dp39 -S'tlr' -p40 -S'NP_001035202.1:p.(Thr1019Val)' -p41 -sS'slr' -p42 -S'NP_001035202.1:p.(T1019V)' -p43 -ssS'submitted_variant' -p44 -S'chr16:g.15832508_15832509delinsAC' -p45 -sS'genome_context_intronic_sequence' -p46 -g4 -sS'HGVS_LRG_variant' -p47 -g4 -sS'HGVS_transcript_variant' -p48 -S'NM_001040113.1:c.3055_3056inv' -p49 -sS'HGVS_RefSeqGene_variant' -p50 -S'NG_009299.1:g.123379_123380inv' -p51 -sS'primary_assembly_loci' -p52 -(dp53 -g11 -(dp54 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p55 -sg15 -(dp56 -g17 -S'16' -p57 -sg19 -S'GT' -p58 -sg21 -S'15738651' -p59 -sg23 -S'AC' -p60 -sssS'GRCh37' -p61 -(dp62 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p63 -sg15 -(dp64 -g17 -g57 -sg19 -S'GT' -p65 -sg21 -S'15832508' -p66 -sg23 -S'AC' -p67 -sssg26 -(dp68 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p69 -sg15 -(dp70 -g17 -S'chr16' -p71 -sg19 -S'GT' -p72 -sg21 -S'15738651' -p73 -sg23 -S'AC' -p74 -sssS'hg19' -p75 -(dp76 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p77 -sg15 -(dp78 -g17 -g71 -sg19 -S'GT' -p79 -sg21 -S'15832508' -p80 -sg23 -S'AC' -p81 -sssssS'NM_001040114.1:c.3055_3056inv' -p82 -(dp83 -g3 -g4 -sg5 -(lp84 -S'RefSeqGene record not available' -p85 -asg7 -g4 -sg8 -(lp86 -(dp87 -g11 -(dp88 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p89 -sg15 -(dp90 -g17 -g18 -sg19 -S'GT' -p91 -sg21 -S'1396662' -p92 -sg23 -S'AC' -p93 -sssa(dp94 -g26 -(dp95 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p96 -sg15 -(dp97 -g17 -g30 -sg19 -S'GT' -p98 -sg21 -S'1396662' -p99 -sg23 -S'AC' -p100 -sssasg34 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA -p101 -sg36 -S'MYH11' -p102 -sg38 -(dp103 -g40 -S'NP_001035203.1:p.(Thr1019Val)' -p104 -sg42 -S'NP_001035203.1:p.(T1019V)' -p105 -ssg44 -g45 -sg46 -g4 -sg47 -g4 -sg48 -S'NM_001040114.1:c.3055_3056inv' -p106 -sg50 -g4 -sg52 -(dp107 -g11 -(dp108 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p109 -sg15 -(dp110 -g17 -g57 -sg19 -S'GT' -p111 -sg21 -S'15738651' -p112 -sg23 -S'AC' -p113 -sssg61 -(dp114 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p115 -sg15 -(dp116 -g17 -g57 -sg19 -S'GT' -p117 -sg21 -S'15832508' -p118 -sg23 -S'AC' -p119 -sssg26 -(dp120 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p121 -sg15 -(dp122 -g17 -g71 -sg19 -S'GT' -p123 -sg21 -S'15738651' -p124 -sg23 -S'AC' -p125 -sssg75 -(dp126 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p127 -sg15 -(dp128 -g17 -g71 -sg19 -S'GT' -p129 -sg21 -S'15832508' -p130 -sg23 -S'AC' -p131 -sssssS'flag' -p132 -S'gene_variant' -p133 -sS'NM_002474.2:c.3034_3035inv' -p134 -(dp135 -g3 -g4 -sg5 -(lp136 -S'RefSeqGene record not available' -p137 -asg7 -g4 -sg8 -(lp138 -(dp139 -g11 -(dp140 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p141 -sg15 -(dp142 -g17 -g18 -sg19 -S'GT' -p143 -sg21 -S'1396662' -p144 -sg23 -S'AC' -p145 -sssa(dp146 -g26 -(dp147 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p148 -sg15 -(dp149 -g17 -g30 -sg19 -S'GT' -p150 -sg21 -S'1396662' -p151 -sg23 -S'AC' -p152 -sssasg34 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA -p153 -sg36 -S'MYH11' -p154 -sg38 -(dp155 -g40 -S'NP_002465.1:p.(Thr1012Val)' -p156 -sg42 -S'NP_002465.1:p.(T1012V)' -p157 -ssg44 -g45 -sg46 -g4 -sg47 -g4 -sg48 -S'NM_002474.2:c.3034_3035inv' -p158 -sg50 -g4 -sg52 -(dp159 -g11 -(dp160 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p161 -sg15 -(dp162 -g17 -g57 -sg19 -S'GT' -p163 -sg21 -S'15738651' -p164 -sg23 -S'AC' -p165 -sssg61 -(dp166 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p167 -sg15 -(dp168 -g17 -g57 -sg19 -S'GT' -p169 -sg21 -S'15832508' -p170 -sg23 -S'AC' -p171 -sssg26 -(dp172 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p173 -sg15 -(dp174 -g17 -g71 -sg19 -S'GT' -p175 -sg21 -S'15738651' -p176 -sg23 -S'AC' -p177 -sssg75 -(dp178 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p179 -sg15 -(dp180 -g17 -g71 -sg19 -S'GT' -p181 -sg21 -S'15832508' -p182 -sg23 -S'AC' -p183 -sssssS'NM_022844.2:c.3034_3035inv' -p184 -(dp185 -g3 -g4 -sg5 -(lp186 -S'RefSeqGene record not available' -p187 -asg7 -g4 -sg8 -(lp188 -(dp189 -g11 -(dp190 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p191 -sg15 -(dp192 -g17 -g18 -sg19 -S'GT' -p193 -sg21 -S'1396662' -p194 -sg23 -S'AC' -p195 -sssa(dp196 -g26 -(dp197 -g13 -S'NT_187607.1:g.1396662_1396663inv' -p198 -sg15 -(dp199 -g17 -g30 -sg19 -S'GT' -p200 -sg21 -S'1396662' -p201 -sg23 -S'AC' -p202 -sssasg34 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA -p203 -sg36 -S'MYH11' -p204 -sg38 -(dp205 -g40 -S'NP_074035.1:p.(Thr1012Val)' -p206 -sg42 -S'NP_074035.1:p.(T1012V)' -p207 -ssg44 -g45 -sg46 -g4 -sg47 -g4 -sg48 -S'NM_022844.2:c.3034_3035inv' -p208 -sg50 -g4 -sg52 -(dp209 -g11 -(dp210 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p211 -sg15 -(dp212 -g17 -g57 -sg19 -S'GT' -p213 -sg21 -S'15738651' -p214 -sg23 -S'AC' -p215 -sssg61 -(dp216 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p217 -sg15 -(dp218 -g17 -g57 -sg19 -S'GT' -p219 -sg21 -S'15832508' -p220 -sg23 -S'AC' -p221 -sssg26 -(dp222 -g13 -S'NC_000016.10:g.15738651_15738652inv' -p223 -sg15 -(dp224 -g17 -g71 -sg19 -S'GT' -p225 -sg21 -S'15738651' -p226 -sg23 -S'AC' -p227 -sssg75 -(dp228 -g13 -S'NC_000016.9:g.15832508_15832509inv' -p229 -sg15 -(dp230 -g17 -g71 -sg19 -S'GT' -p231 -sg21 -S'15832508' -p232 -sg23 -S'AC' -p233 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant148.txt b/VariantValidator/testing/testOutputsMasterITS/variant148.txt deleted file mode 100644 index c237c7b7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant148.txt +++ /dev/null @@ -1,474 +0,0 @@ -(dp0 -S'NM_001162426.1:c.363+1dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA -p12 -sS'gene_symbol' -p13 -S'TSC1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001155898.1:p.?' -p18 -sS'slr' -p19 -S'NP_001155898.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'NG_012386.1:g.24048dupG' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000009.11(NM_001162426.1):c.363+1dup' -p24 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001162426.1:c.363+1dup' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000009.11:g.135800973dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr9' -p38 -sS'ref' -p39 -S'C' -p40 -sS'pos' -p41 -S'135800973' -p42 -sS'alt' -p43 -S'CC' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000009.12:g.132925586dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'132925586' -p49 -sg43 -S'CC' -p50 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000009.11:g.135800973dup' -p53 -sg35 -(dp54 -g37 -S'9' -p55 -sg39 -g40 -sg41 -S'135800973' -p56 -sg43 -S'CC' -p57 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000009.12:g.132925586dup' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'132925586' -p62 -sg43 -S'CC' -p63 -sssssS'flag' -p64 -S'gene_variant' -p65 -sS'NM_001362177.1:c.-1+1dup' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'RefSeqGene record not available' -p69 -asg8 -g4 -sg9 -(lp70 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA -p71 -sg13 -S'TSC1' -p72 -sg15 -(dp73 -g17 -S'NP_001349106.1:p.?' -p74 -sg19 -S'NP_001349106.1:p.?' -p75 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_001362177.1):c.-1+1dup' -p76 -sg25 -g4 -sg26 -S'NM_001362177.1:c.-1+1dup' -p77 -sg28 -g4 -sg29 -(dp78 -g31 -(dp79 -g33 -S'NC_000009.11:g.135800973dup' -p80 -sg35 -(dp81 -g37 -g38 -sg39 -g40 -sg41 -S'135800973' -p82 -sg43 -S'CC' -p83 -sssg45 -(dp84 -g33 -S'NC_000009.12:g.132925586dup' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -g40 -sg41 -S'132925586' -p87 -sg43 -S'CC' -p88 -sssg51 -(dp89 -g33 -S'NC_000009.11:g.135800973dup' -p90 -sg35 -(dp91 -g37 -g55 -sg39 -g40 -sg41 -S'135800973' -p92 -sg43 -S'CC' -p93 -sssg58 -(dp94 -g33 -S'NC_000009.12:g.132925586dup' -p95 -sg35 -(dp96 -g37 -g55 -sg39 -g40 -sg41 -S'132925586' -p97 -sg43 -S'CC' -p98 -sssssS'NM_000368.4:c.363+1dup' -p99 -(dp100 -g3 -S'LRG_486t1:c.363+1dup' -p101 -sg5 -(lp102 -sg8 -S'NG_012386.1(NM_000368.4):c.363+1dup' -p103 -sg9 -(lp104 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA -p105 -sg13 -S'TSC1' -p106 -sg15 -(dp107 -g17 -S'NP_000359.1(LRG_486p1):p.?' -p108 -sg19 -S'NP_000359.1:p.?' -p109 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_000368.4):c.363+1dup' -p110 -sg25 -S'LRG_486:g.24048dup' -p111 -sg26 -S'NM_000368.4:c.363+1dup' -p112 -sg28 -S'NG_012386.1:g.24048dup' -p113 -sg29 -(dp114 -g31 -(dp115 -g33 -S'NC_000009.11:g.135800973dup' -p116 -sg35 -(dp117 -g37 -g38 -sg39 -g40 -sg41 -S'135800973' -p118 -sg43 -S'CC' -p119 -sssg45 -(dp120 -g33 -S'NC_000009.12:g.132925586dup' -p121 -sg35 -(dp122 -g37 -g38 -sg39 -g40 -sg41 -S'132925586' -p123 -sg43 -S'CC' -p124 -sssg51 -(dp125 -g33 -S'NC_000009.11:g.135800973dup' -p126 -sg35 -(dp127 -g37 -g55 -sg39 -g40 -sg41 -S'135800973' -p128 -sg43 -S'CC' -p129 -sssg58 -(dp130 -g33 -S'NC_000009.12:g.132925586dup' -p131 -sg35 -(dp132 -g37 -g55 -sg39 -g40 -sg41 -S'132925586' -p133 -sg43 -S'CC' -p134 -sssssS'NM_001162427.1:c.210+1615dup' -p135 -(dp136 -g3 -g4 -sg5 -(lp137 -S'RefSeqGene record not available' -p138 -asg8 -g4 -sg9 -(lp139 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA -p140 -sg13 -S'TSC1' -p141 -sg15 -(dp142 -g17 -S'NP_001155899.1:p.?' -p143 -sg19 -S'NP_001155899.1:p.?' -p144 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_001162427.1):c.210+1615dup' -p145 -sg25 -g4 -sg26 -S'NM_001162427.1:c.210+1615dup' -p146 -sg28 -g4 -sg29 -(dp147 -g31 -(dp148 -g33 -S'NC_000009.11:g.135800973dup' -p149 -sg35 -(dp150 -g37 -g38 -sg39 -g40 -sg41 -S'135800973' -p151 -sg43 -S'CC' -p152 -sssg45 -(dp153 -g33 -S'NC_000009.12:g.132925586dup' -p154 -sg35 -(dp155 -g37 -g38 -sg39 -g40 -sg41 -S'132925586' -p156 -sg43 -S'CC' -p157 -sssg51 -(dp158 -g33 -S'NC_000009.11:g.135800973dup' -p159 -sg35 -(dp160 -g37 -g55 -sg39 -g40 -sg41 -S'135800973' -p161 -sg43 -S'CC' -p162 -sssg58 -(dp163 -g33 -S'NC_000009.12:g.132925586dup' -p164 -sg35 -(dp165 -g37 -g55 -sg39 -g40 -sg41 -S'132925586' -p166 -sg43 -S'CC' -p167 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant149.txt b/VariantValidator/testing/testOutputsMasterITS/variant149.txt deleted file mode 100644 index e5aa0372..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant149.txt +++ /dev/null @@ -1,114 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_033517.1:c.1307_1309del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'NM_033517.1:c.1307_1309delCGA cannot be mapped directly to genome build GRCh37' -p10 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'GRCh38' -p16 -(dp17 -S'HGVS_genomic_description' -p18 -S'NW_015148969.1:g.33721_33723del' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG1311_PATCH' -p23 -sS'ref' -p24 -S'CCGA' -p25 -sS'pos' -p26 -S'33720' -p27 -sS'alt' -p28 -S'C' -p29 -sssa(dp30 -S'hg38' -p31 -(dp32 -g18 -S'NW_015148969.1:g.33721_33723del' -p33 -sg20 -(dp34 -g22 -S'NW_015148969.1' -p35 -sg24 -S'CCGA' -p36 -sg26 -S'33720' -p37 -sg28 -g29 -sssasS'transcript_description' -p38 -VHomo sapiens SH3 and multiple ankyrin repeat domains 3 (SHANK3), mRNA -p39 -sS'gene_symbol' -p40 -S'SHANK3' -p41 -sS'HGVS_predicted_protein_consequence' -p42 -(dp43 -S'tlr' -p44 -S'NP_277052.1:p.(Pro436_Ser437delinsArg)' -p45 -sS'slr' -p46 -S'NP_277052.1:p.(P436_S437delinsR)' -p47 -ssS'submitted_variant' -p48 -S'NM_033517.1:c.1307_1309delCGA' -p49 -sS'genome_context_intronic_sequence' -p50 -g6 -sS'HGVS_LRG_variant' -p51 -g6 -sS'HGVS_transcript_variant' -p52 -S'NM_033517.1:c.1307_1309del' -p53 -sS'HGVS_RefSeqGene_variant' -p54 -g6 -sS'primary_assembly_loci' -p55 -(dp56 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant15.txt b/VariantValidator/testing/testOutputsMasterITS/variant15.txt deleted file mode 100644 index 46f59459..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant15.txt +++ /dev/null @@ -1,138 +0,0 @@ -(dp0 -S'flag' -p1 -S'intergenic' -p2 -sS'Intergenic_Variant_1' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'No transcripts found that fully overlap the described variation in the genomic sequence' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -g6 -sS'gene_symbol' -p14 -g6 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -g6 -sS'slr' -p18 -g6 -ssS'submitted_variant' -p19 -S'NC_000017.10:g.48279242G>T' -p20 -sS'genome_context_intronic_sequence' -p21 -g6 -sS'HGVS_LRG_variant' -p22 -S'LRG_1:g.4759C>A' -p23 -sS'HGVS_transcript_variant' -p24 -g6 -sS'HGVS_RefSeqGene_variant' -p25 -S'NG_007400.1:g.4759C>A' -p26 -sS'primary_assembly_loci' -p27 -(dp28 -S'hg19' -p29 -(dp30 -S'HGVS_genomic_description' -p31 -S'NC_000017.10:g.48279242G>T' -p32 -sS'vcf' -p33 -(dp34 -S'chr' -p35 -S'chr17' -p36 -sS'ref' -p37 -S'G' -p38 -sS'pos' -p39 -S'48279242' -p40 -sS'alt' -p41 -S'T' -p42 -sssS'hg38' -p43 -(dp44 -g31 -S'NC_000017.11:g.50201881G>T' -p45 -sg33 -(dp46 -g35 -g36 -sg37 -g38 -sg39 -S'50201881' -p47 -sg41 -g42 -sssS'GRCh37' -p48 -(dp49 -g31 -S'NC_000017.10:g.48279242G>T' -p50 -sg33 -(dp51 -g35 -S'17' -p52 -sg37 -g38 -sg39 -S'48279242' -p53 -sg41 -g42 -sssS'GRCh38' -p54 -(dp55 -g31 -S'NC_000017.11:g.50201881G>T' -p56 -sg33 -(dp57 -g35 -g52 -sg37 -g38 -sg39 -S'50201881' -p58 -sg41 -g42 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant150.txt b/VariantValidator/testing/testOutputsMasterITS/variant150.txt deleted file mode 100644 index 69c30c40..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant150.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'HG1311_PATCH is not part of genome build GRCh37' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'HG1311_PATCH-33720-CCGA-C' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant151.txt b/VariantValidator/testing/testOutputsMasterITS/variant151.txt deleted file mode 100644 index 77f25f44..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant151.txt +++ /dev/null @@ -1,155 +0,0 @@ -(dp0 -S'NM_015120.4:c.1573_1579=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_741t1:c.1573_1579=' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000002.11:g.73675227TCTC>TCTCCTC automapped to NC_000002.11:g.73675228_73675230dupCTC' -p7 -aS'The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37' -p8 -aS'NM_015120.4:c.1573_1579 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -S'' -p13 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p17 -sS'gene_symbol' -p18 -S'ALMS1' -p19 -sS'HGVS_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_055935.4(LRG_741p1):p.(Ser525=)' -p23 -sS'slr' -p24 -S'NP_055935.4:p.(S525=)' -p25 -ssS'submitted_variant' -p26 -S'2-73675227-TCTC-TCTCCTC' -p27 -sS'genome_context_intronic_sequence' -p28 -g13 -sS'HGVS_LRG_variant' -p29 -S'LRG_741:g.67345_67351=' -p30 -sS'HGVS_transcript_variant' -p31 -S'NM_015120.4:c.1573_1579=' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -S'NG_011690.1:g.67345_67351=' -p34 -sS'primary_assembly_loci' -p35 -(dp36 -S'hg19' -p37 -(dp38 -S'HGVS_genomic_description' -p39 -S'NC_000002.11:g.73675228_73675230dup' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'chr2' -p44 -sS'ref' -p45 -S'CTC' -p46 -sS'pos' -p47 -S'73675228' -p48 -sS'alt' -p49 -VCTCCTC -p50 -sssS'hg38' -p51 -(dp52 -g39 -S'NC_000002.12:g.73448097_73448103=' -p53 -sg41 -(dp54 -g43 -g44 -sg45 -VTCTCCTC -p55 -sg47 -S'73448097' -p56 -sg49 -g55 -sssS'GRCh37' -p57 -(dp58 -g39 -S'NC_000002.11:g.73675228_73675230dup' -p59 -sg41 -(dp60 -g43 -S'2' -p61 -sg45 -S'CTC' -p62 -sg47 -S'73675228' -p63 -sg49 -VCTCCTC -p64 -sssS'GRCh38' -p65 -(dp66 -g39 -S'NC_000002.12:g.73448097_73448103=' -p67 -sg41 -(dp68 -g43 -g61 -sg45 -g55 -sg47 -S'73448097' -p69 -sg49 -g55 -sssssS'flag' -p70 -S'gene_variant' -p71 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant152.txt b/VariantValidator/testing/testOutputsMasterITS/variant152.txt deleted file mode 100644 index 30f6c013..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant152.txt +++ /dev/null @@ -1,154 +0,0 @@ -(dp0 -S'NM_015120.4:c.1577_1579del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_741t1:c.1577_1579del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000002.11:g.73675227TC>TC automapped to NC_000002.11:g.73675227_73675228TC=' -p7 -aS'The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37' -p8 -aS'NM_015120.4:c.1574_1576 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -S'' -p13 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p17 -sS'gene_symbol' -p18 -S'ALMS1' -p19 -sS'HGVS_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_055935.4(LRG_741p1):p.(Pro526del)' -p23 -sS'slr' -p24 -S'NP_055935.4:p.(P526del)' -p25 -ssS'submitted_variant' -p26 -S'2-73675227-TC-TC' -p27 -sS'genome_context_intronic_sequence' -p28 -g13 -sS'HGVS_LRG_variant' -p29 -S'LRG_741:g.67349_67351del' -p30 -sS'HGVS_transcript_variant' -p31 -S'NM_015120.4:c.1577_1579del' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -S'NG_011690.1:g.67349_67351del' -p34 -sS'primary_assembly_loci' -p35 -(dp36 -S'hg19' -p37 -(dp38 -S'HGVS_genomic_description' -p39 -S'NC_000002.11:g.73675227_73675229=' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'chr2' -p44 -sS'ref' -p45 -S'TCT' -p46 -sS'pos' -p47 -S'73675227' -p48 -sS'alt' -p49 -g46 -sssS'hg38' -p50 -(dp51 -g39 -S'NC_000002.12:g.73448101_73448103del' -p52 -sg41 -(dp53 -g43 -g44 -sg45 -S'TCTC' -p54 -sg47 -S'73448097' -p55 -sg49 -S'T' -p56 -sssS'GRCh37' -p57 -(dp58 -g39 -S'NC_000002.11:g.73675227_73675229=' -p59 -sg41 -(dp60 -g43 -S'2' -p61 -sg45 -g46 -sg47 -S'73675227' -p62 -sg49 -g46 -sssS'GRCh38' -p63 -(dp64 -g39 -S'NC_000002.12:g.73448101_73448103del' -p65 -sg41 -(dp66 -g43 -g61 -sg45 -S'TCTC' -p67 -sg47 -S'73448097' -p68 -sg49 -g56 -sssssS'flag' -p69 -S'gene_variant' -p70 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant153.txt b/VariantValidator/testing/testOutputsMasterITS/variant153.txt deleted file mode 100644 index 6672fd49..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant153.txt +++ /dev/null @@ -1,227 +0,0 @@ -(dp0 -S'NM_001080423.3:c.1016_1020=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000003.11:g.14561627AG>AGG automapped to NC_000003.11:g.14561629dupG' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p13 -sS'gene_symbol' -p14 -S'GRIP2' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001073892.3:p.(Arg339=)' -p19 -sS'slr' -p20 -S'NP_001073892.3:p.(R339=)' -p21 -ssS'submitted_variant' -p22 -S'3-14561627-AG-AGG' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001080423.3:c.1016_1020=' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000003.11:g.14561629dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr3' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'14561628' -p42 -sS'alt' -p43 -VGG -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000003.12:g.14520120_14520124=' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -VGGGCC -p49 -sg41 -S'14520120' -p50 -sg43 -g49 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000003.11:g.14561629dup' -p53 -sg35 -(dp54 -g37 -S'3' -p55 -sg39 -g40 -sg41 -S'14561628' -p56 -sg43 -VGG -p57 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000003.12:g.14520120_14520124=' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g49 -sg41 -S'14520120' -p62 -sg43 -g49 -sssssS'flag' -p63 -S'gene_variant' -p64 -sS'NM_001080423.2:c.1307_1311=' -p65 -(dp66 -g3 -g4 -sg5 -(lp67 -S'NC_000003.11:g.14561627AG>AGG automapped to NC_000003.11:g.14561629dupG' -p68 -aS'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' -p69 -aS'NM_001080423.3:c.1307_1311delinsGGCCC MUST be fully validated prior to use in reports' -p70 -aS'select_variants=NM_001080423.3:c.1307_1311delinsGGCCC' -p71 -aS'RefSeqGene record not available' -p72 -asg9 -g4 -sg10 -(lp73 -sg12 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p74 -sg14 -S'GRIP2' -p75 -sg16 -(dp76 -g18 -S'NP_001073892.2:p.(Arg436=)' -p77 -sg20 -S'NP_001073892.2:p.(R436=)' -p78 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001080423.2:c.1307_1311=' -p79 -sg28 -g4 -sg29 -(dp80 -g31 -(dp81 -g33 -S'NC_000003.11:g.14561629dup' -p82 -sg35 -(dp83 -g37 -g38 -sg39 -g40 -sg41 -S'14561628' -p84 -sg43 -VGG -p85 -sssg51 -(dp86 -g33 -S'NC_000003.11:g.14561629dup' -p87 -sg35 -(dp88 -g37 -g55 -sg39 -g40 -sg41 -S'14561628' -p89 -sg43 -VGG -p90 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant154.txt b/VariantValidator/testing/testOutputsMasterITS/variant154.txt deleted file mode 100644 index 0fb9fe2d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant154.txt +++ /dev/null @@ -1,241 +0,0 @@ -(dp0 -S'NM_001080423.3:c.1020del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000003.11:g.14561630CC>CC automapped to NC_000003.11:g.14561630_14561631CC=' -p7 -aS'The displayed variants may be artefacts of aligning NM_001080423.3 with genome build GRCh37' -p8 -aS'NM_001080423.3:c.1019_1022 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'RefSeqGene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p17 -sS'gene_symbol' -p18 -S'GRIP2' -p19 -sS'HGVS_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_001073892.3:p.(Ser341GlnfsTer4)' -p23 -sS'slr' -p24 -S'NP_001073892.3:p.(S341Qfs*4)' -p25 -ssS'submitted_variant' -p26 -S'3-14561630-CC-CC' -p27 -sS'genome_context_intronic_sequence' -p28 -g4 -sS'HGVS_LRG_variant' -p29 -g4 -sS'HGVS_transcript_variant' -p30 -S'NM_001080423.3:c.1020del' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000003.11:g.14561624_14561630=' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr3' -p42 -sS'ref' -p43 -S'CTGAGGC' -p44 -sS'pos' -p45 -S'14561624' -p46 -sS'alt' -p47 -g44 -sssS'hg38' -p48 -(dp49 -g37 -S'NC_000003.12:g.14520122del' -p50 -sg39 -(dp51 -g41 -g42 -sg43 -S'AG' -p52 -sg45 -S'14520119' -p53 -sg47 -S'A' -p54 -sssS'GRCh37' -p55 -(dp56 -g37 -S'NC_000003.11:g.14561624_14561630=' -p57 -sg39 -(dp58 -g41 -S'3' -p59 -sg43 -g44 -sg45 -S'14561624' -p60 -sg47 -g44 -sssS'GRCh38' -p61 -(dp62 -g37 -S'NC_000003.12:g.14520122del' -p63 -sg39 -(dp64 -g41 -g59 -sg43 -S'AG' -p65 -sg45 -S'14520119' -p66 -sg47 -g54 -sssssS'flag' -p67 -S'gene_variant' -p68 -sS'NM_001080423.2:c.1311del' -p69 -(dp70 -g3 -g4 -sg5 -(lp71 -S'NC_000003.11:g.14561630CC>CC automapped to NC_000003.11:g.14561630_14561631CC=' -p72 -aS'The displayed variants may be artefacts of aligning NM_001080423.2 with genome build GRCh37' -p73 -aS'NM_001080423.2:c.1310_1313 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' -p74 -aS'Caution should be used when reporting the displayed variant descriptions' -p75 -aS'If you are unsure, please contact admin' -p76 -aS'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' -p77 -aS'NM_001080423.3:c.1311delG MUST be fully validated prior to use in reports' -p78 -aS'select_variants=NM_001080423.3:c.1311del' -p79 -aS'RefSeqGene record not available' -p80 -asg13 -g4 -sg14 -(lp81 -sg16 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p82 -sg18 -S'GRIP2' -p83 -sg20 -(dp84 -g22 -S'NP_001073892.2:p.(Ser438GlnfsTer4)' -p85 -sg24 -S'NP_001073892.2:p.(S438Qfs*4)' -p86 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_001080423.2:c.1311del' -p87 -sg32 -g4 -sg33 -(dp88 -g35 -(dp89 -g37 -S'NC_000003.11:g.14561624_14561630=' -p90 -sg39 -(dp91 -g41 -g42 -sg43 -g44 -sg45 -S'14561624' -p92 -sg47 -g44 -sssg55 -(dp93 -g37 -S'NC_000003.11:g.14561624_14561630=' -p94 -sg39 -(dp95 -g41 -g59 -sg43 -g44 -sg45 -S'14561624' -p96 -sg47 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant155.txt b/VariantValidator/testing/testOutputsMasterITS/variant155.txt deleted file mode 100644 index 2e421001..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant155.txt +++ /dev/null @@ -1,219 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_014611.1:c.9879T>C' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)' -p9 -aS'NM_014611.2:c.9879C= MUST be fully validated prior to use in reports' -p10 -aS'select_variants=NM_014611.2:c.9879C=' -p11 -aS'RefSeqGene record not available' -p12 -asS'RefSeqGene_context_intronic_sequence' -p13 -g6 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA -p17 -sS'gene_symbol' -p18 -S'MDN1' -p19 -sS'HGVS_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_055426.1:p.(Val3293=)' -p23 -sS'slr' -p24 -S'NP_055426.1:p.(V3293=)' -p25 -ssS'submitted_variant' -p26 -S'6-90403795-G-G' -p27 -sS'genome_context_intronic_sequence' -p28 -g6 -sS'HGVS_LRG_variant' -p29 -g6 -sS'HGVS_transcript_variant' -p30 -S'NM_014611.1:c.9879T>C' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -g6 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000006.11:g.90403795G=' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr6' -p42 -sS'ref' -p43 -S'G' -p44 -sS'pos' -p45 -S'90403795' -p46 -sS'alt' -p47 -g44 -sssS'GRCh37' -p48 -(dp49 -g37 -S'NC_000006.11:g.90403795G=' -p50 -sg39 -(dp51 -g41 -S'6' -p52 -sg43 -g44 -sg45 -S'90403795' -p53 -sg47 -g44 -sssssS'NM_014611.2:c.9879C=' -p54 -(dp55 -g5 -g6 -sg7 -(lp56 -S'RefSeqGene record not available' -p57 -asg13 -g6 -sg14 -(lp58 -sg16 -VHomo sapiens midasin AAA ATPase 1 (MDN1), mRNA -p59 -sg18 -S'MDN1' -p60 -sg20 -(dp61 -g22 -S'NP_055426.1:p.(Val3293=)' -p62 -sg24 -S'NP_055426.1:p.(V3293=)' -p63 -ssg26 -g27 -sg28 -g6 -sg29 -g6 -sg30 -S'NM_014611.2:c.9879C=' -p64 -sg32 -g6 -sg33 -(dp65 -g35 -(dp66 -g37 -S'NC_000006.11:g.90403795G=' -p67 -sg39 -(dp68 -g41 -g42 -sg43 -VG -p69 -sg45 -S'90403795' -p70 -sg47 -g69 -sssS'hg38' -p71 -(dp72 -g37 -S'NC_000006.12:g.89694076G=' -p73 -sg39 -(dp74 -g41 -g42 -sg43 -g69 -sg45 -S'89694076' -p75 -sg47 -g69 -sssg48 -(dp76 -g37 -S'NC_000006.11:g.90403795G=' -p77 -sg39 -(dp78 -g41 -g52 -sg43 -g69 -sg45 -S'90403795' -p79 -sg47 -g69 -sssS'GRCh38' -p80 -(dp81 -g37 -S'NC_000006.12:g.89694076G=' -p82 -sg39 -(dp83 -g41 -g52 -sg43 -g69 -sg45 -S'89694076' -p84 -sg47 -g69 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant156.txt b/VariantValidator/testing/testOutputsMasterITS/variant156.txt deleted file mode 100644 index e46c00e9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant156.txt +++ /dev/null @@ -1,220 +0,0 @@ -(dp0 -S'NM_014611.2:c.9879C>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens midasin AAA ATPase 1 (MDN1), mRNA -p12 -sS'gene_symbol' -p13 -S'MDN1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_055426.1:p.(Val3293=)' -p18 -sS'slr' -p19 -S'NP_055426.1:p.(V3293=)' -p20 -ssS'submitted_variant' -p21 -S'6-90403795-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_014611.2:c.9879C>T' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000006.11:g.90403795G>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr6' -p37 -sS'ref' -p38 -VG -p39 -sS'pos' -p40 -S'90403795' -p41 -sS'alt' -p42 -VA -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000006.12:g.89694076G>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'89694076' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000006.11:g.90403795G>A' -p51 -sg34 -(dp52 -g36 -S'6' -p53 -sg38 -g39 -sg40 -S'90403795' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000006.12:g.89694076G>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'89694076' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -sS'NM_014611.1:c.9879T=' -p62 -(dp63 -g3 -g4 -sg5 -(lp64 -S'A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)' -p65 -aS'NM_014611.2:c.9879C>T MUST be fully validated prior to use in reports' -p66 -aS'select_variants=NM_014611.2:c.9879C>T' -p67 -aS'RefSeqGene record not available' -p68 -asg8 -g4 -sg9 -(lp69 -sg11 -VHomo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA -p70 -sg13 -S'MDN1' -p71 -sg15 -(dp72 -g17 -S'NP_055426.1:p.(Val3293=)' -p73 -sg19 -S'NP_055426.1:p.(V3293=)' -p74 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_014611.1:c.9879T=' -p75 -sg27 -g4 -sg28 -(dp76 -g30 -(dp77 -g32 -S'NC_000006.11:g.90403795G>A' -p78 -sg34 -(dp79 -g36 -g37 -sg38 -S'G' -p80 -sg40 -S'90403795' -p81 -sg42 -g43 -sssg49 -(dp82 -g32 -S'NC_000006.11:g.90403795G>A' -p83 -sg34 -(dp84 -g36 -g53 -sg38 -g80 -sg40 -S'90403795' -p85 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant157.txt b/VariantValidator/testing/testOutputsMasterITS/variant157.txt deleted file mode 100644 index 38bfeaa3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant157.txt +++ /dev/null @@ -1,1257 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_019105.6:c.10711del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' -p9 -aS'A more recent version of the selected reference sequence NM_019105.6 is available (NM_019105.7)' -p10 -aS'NM_019105.7:c.10711delC MUST be fully validated prior to use in reports' -p11 -aS'select_variants=NM_019105.7:c.10711del' -p12 -asS'RefSeqGene_context_intronic_sequence' -p13 -g6 -sS'alt_genomic_loci' -p14 -(lp15 -(dp16 -S'GRCh37' -p17 -(dp18 -S'HGVS_genomic_description' -p19 -S'NT_113891.2:g.3483644del' -p20 -sS'vcf' -p21 -(dp22 -S'chr' -p23 -S'HSCHR6_MHC_COX_CTG1' -p24 -sS'ref' -p25 -S'CG' -p26 -sS'pos' -p27 -S'3483643' -p28 -sS'alt' -p29 -S'C' -p30 -sssa(dp31 -S'hg19' -p32 -(dp33 -g19 -S'NT_113891.2:g.3483644del' -p34 -sg21 -(dp35 -g23 -S'chr6_cox_hap2' -p36 -sg25 -S'CG' -p37 -sg27 -S'3483643' -p38 -sg29 -g30 -sssa(dp39 -S'GRCh38' -p40 -(dp41 -g19 -S'NT_113891.3:g.3483538del' -p42 -sg21 -(dp43 -g23 -g24 -sg25 -S'CG' -p44 -sg27 -S'3483537' -p45 -sg29 -g30 -sssa(dp46 -S'hg38' -p47 -(dp48 -g19 -S'NT_113891.3:g.3483538del' -p49 -sg21 -(dp50 -g23 -S'chr6_GL000251v2_alt' -p51 -sg25 -S'CG' -p52 -sg27 -S'3483537' -p53 -sg29 -g30 -sssa(dp54 -g17 -(dp55 -g19 -S'NT_167245.1:g.3292210del' -p56 -sg21 -(dp57 -g23 -S'HSCHR6_MHC_DBB_CTG1' -p58 -sg25 -S'CG' -p59 -sg27 -S'3292209' -p60 -sg29 -g30 -sssa(dp61 -g32 -(dp62 -g19 -S'NT_167245.1:g.3292210del' -p63 -sg21 -(dp64 -g23 -S'chr6_dbb_hap3' -p65 -sg25 -S'CG' -p66 -sg27 -S'3292209' -p67 -sg29 -g30 -sssa(dp68 -g40 -(dp69 -g19 -S'NT_167245.2:g.3286625del' -p70 -sg21 -(dp71 -g23 -g58 -sg25 -S'CG' -p72 -sg27 -S'3286624' -p73 -sg29 -g30 -sssa(dp74 -g47 -(dp75 -g19 -S'NT_167245.2:g.3286625del' -p76 -sg21 -(dp77 -g23 -S'chr6_GL000252v2_alt' -p78 -sg25 -S'CG' -p79 -sg27 -S'3286624' -p80 -sg29 -g30 -sssa(dp81 -g17 -(dp82 -g19 -S'NT_167247.1:g.3392834del' -p83 -sg21 -(dp84 -g23 -S'HSCHR6_MHC_MCF_CTG1' -p85 -sg25 -S'CG' -p86 -sg27 -S'3392833' -p87 -sg29 -g30 -sssa(dp88 -g32 -(dp89 -g19 -S'NT_167247.1:g.3392834del' -p90 -sg21 -(dp91 -g23 -S'chr6_mcf_hap5' -p92 -sg25 -S'CG' -p93 -sg27 -S'3392833' -p94 -sg29 -g30 -sssa(dp95 -g40 -(dp96 -g19 -S'NT_167247.2:g.3387249del' -p97 -sg21 -(dp98 -g23 -g85 -sg25 -S'CG' -p99 -sg27 -S'3387248' -p100 -sg29 -g30 -sssa(dp101 -g47 -(dp102 -g19 -S'NT_167247.2:g.3387249del' -p103 -sg21 -(dp104 -g23 -S'chr6_GL000254v2_alt' -p105 -sg25 -S'CG' -p106 -sg27 -S'3387248' -p107 -sg29 -g30 -sssa(dp108 -g17 -(dp109 -g19 -S'NT_167248.1:g.3271861del' -p110 -sg21 -(dp111 -g23 -S'HSCHR6_MHC_QBL_CTG1' -p112 -sg25 -S'AG' -p113 -sg27 -S'3271858' -p114 -sg29 -S'A' -p115 -sssa(dp116 -g32 -(dp117 -g19 -S'NT_167248.1:g.3271861del' -p118 -sg21 -(dp119 -g23 -S'chr6_qbl_hap6' -p120 -sg25 -S'AG' -p121 -sg27 -S'3271858' -p122 -sg29 -g115 -sssasS'transcript_description' -p123 -VHomo sapiens tenascin XB (TNXB), transcript variant XB, mRNA -p124 -sS'gene_symbol' -p125 -S'TNXB' -p126 -sS'HGVS_predicted_protein_consequence' -p127 -(dp128 -S'tlr' -p129 -S'NP_061978.6:p.(Arg3571AlafsTer91)' -p130 -sS'slr' -p131 -S'NP_061978.6:p.(R3571Afs*91)' -p132 -ssS'submitted_variant' -p133 -S'6-32012992-CG-C' -p134 -sS'genome_context_intronic_sequence' -p135 -g6 -sS'HGVS_LRG_variant' -p136 -g6 -sS'HGVS_transcript_variant' -p137 -S'NM_019105.6:c.10711del' -p138 -sS'HGVS_RefSeqGene_variant' -p139 -S'NG_008337.2:g.69159del' -p140 -sS'primary_assembly_loci' -p141 -(dp142 -g32 -(dp143 -g19 -S'NC_000006.11:g.32012993del' -p144 -sg21 -(dp145 -g23 -S'chr6' -p146 -sg25 -S'CG' -p147 -sg27 -S'32012992' -p148 -sg29 -g30 -sssg47 -(dp149 -g19 -S'NC_000006.12:g.32045216del' -p150 -sg21 -(dp151 -g23 -g146 -sg25 -S'CG' -p152 -sg27 -S'32045215' -p153 -sg29 -g30 -sssg17 -(dp154 -g19 -S'NC_000006.11:g.32012993del' -p155 -sg21 -(dp156 -g23 -S'6' -p157 -sg25 -S'CG' -p158 -sg27 -S'32012992' -p159 -sg29 -g30 -sssg40 -(dp160 -g19 -S'NC_000006.12:g.32045216del' -p161 -sg21 -(dp162 -g23 -g157 -sg25 -S'CG' -p163 -sg27 -S'32045215' -p164 -sg29 -g30 -sssssS'NM_032470.3:c.4del' -p165 -(dp166 -g5 -g6 -sg7 -(lp167 -S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' -p168 -aS'RefSeqGene record not available' -p169 -asg13 -g6 -sg14 -(lp170 -(dp171 -g17 -(dp172 -g19 -S'NT_113891.2:g.3483644del' -p173 -sg21 -(dp174 -g23 -g24 -sg25 -S'CG' -p175 -sg27 -S'3483643' -p176 -sg29 -g30 -sssa(dp177 -g32 -(dp178 -g19 -S'NT_113891.2:g.3483644del' -p179 -sg21 -(dp180 -g23 -g36 -sg25 -S'CG' -p181 -sg27 -S'3483643' -p182 -sg29 -g30 -sssa(dp183 -g40 -(dp184 -g19 -S'NT_113891.3:g.3483538del' -p185 -sg21 -(dp186 -g23 -g24 -sg25 -S'CG' -p187 -sg27 -S'3483537' -p188 -sg29 -g30 -sssa(dp189 -g47 -(dp190 -g19 -S'NT_113891.3:g.3483538del' -p191 -sg21 -(dp192 -g23 -g51 -sg25 -S'CG' -p193 -sg27 -S'3483537' -p194 -sg29 -g30 -sssa(dp195 -g17 -(dp196 -g19 -S'NT_167245.1:g.3292210del' -p197 -sg21 -(dp198 -g23 -g58 -sg25 -S'CG' -p199 -sg27 -S'3292209' -p200 -sg29 -g30 -sssa(dp201 -g32 -(dp202 -g19 -S'NT_167245.1:g.3292210del' -p203 -sg21 -(dp204 -g23 -g65 -sg25 -S'CG' -p205 -sg27 -S'3292209' -p206 -sg29 -g30 -sssa(dp207 -g40 -(dp208 -g19 -S'NT_167245.2:g.3286625del' -p209 -sg21 -(dp210 -g23 -g58 -sg25 -S'CG' -p211 -sg27 -S'3286624' -p212 -sg29 -g30 -sssa(dp213 -g47 -(dp214 -g19 -S'NT_167245.2:g.3286625del' -p215 -sg21 -(dp216 -g23 -g78 -sg25 -S'CG' -p217 -sg27 -S'3286624' -p218 -sg29 -g30 -sssa(dp219 -g17 -(dp220 -g19 -S'NT_167247.1:g.3392834del' -p221 -sg21 -(dp222 -g23 -g85 -sg25 -S'CG' -p223 -sg27 -S'3392833' -p224 -sg29 -g30 -sssa(dp225 -g32 -(dp226 -g19 -S'NT_167247.1:g.3392834del' -p227 -sg21 -(dp228 -g23 -g92 -sg25 -S'CG' -p229 -sg27 -S'3392833' -p230 -sg29 -g30 -sssa(dp231 -g40 -(dp232 -g19 -S'NT_167247.2:g.3387249del' -p233 -sg21 -(dp234 -g23 -g85 -sg25 -S'CG' -p235 -sg27 -S'3387248' -p236 -sg29 -g30 -sssa(dp237 -g47 -(dp238 -g19 -S'NT_167247.2:g.3387249del' -p239 -sg21 -(dp240 -g23 -g105 -sg25 -S'CG' -p241 -sg27 -S'3387248' -p242 -sg29 -g30 -sssa(dp243 -g17 -(dp244 -g19 -S'NT_167248.1:g.3274047del' -p245 -sg21 -(dp246 -g23 -g112 -sg25 -S'CG' -p247 -sg27 -S'3274046' -p248 -sg29 -g30 -sssa(dp249 -g32 -(dp250 -g19 -S'NT_167248.1:g.3274047del' -p251 -sg21 -(dp252 -g23 -g120 -sg25 -S'CG' -p253 -sg27 -S'3274046' -p254 -sg29 -g30 -sssa(dp255 -g40 -(dp256 -g19 -S'NT_167248.2:g.3268451del' -p257 -sg21 -(dp258 -g23 -g112 -sg25 -S'CG' -p259 -sg27 -S'3268450' -p260 -sg29 -g30 -sssa(dp261 -g47 -(dp262 -g19 -S'NT_167248.2:g.3268451del' -p263 -sg21 -(dp264 -g23 -S'chr6_GL000255v2_alt' -p265 -sg25 -S'CG' -p266 -sg27 -S'3268450' -p267 -sg29 -g30 -sssa(dp268 -g17 -(dp269 -g19 -S'NT_167249.1:g.3345701del' -p270 -sg21 -(dp271 -g23 -S'HSCHR6_MHC_SSTO_CTG1' -p272 -sg25 -S'CG' -p273 -sg27 -S'3345700' -p274 -sg29 -g30 -sssa(dp275 -g32 -(dp276 -g19 -S'NT_167249.1:g.3345701del' -p277 -sg21 -(dp278 -g23 -S'chr6_ssto_hap7' -p279 -sg25 -S'CG' -p280 -sg27 -S'3345700' -p281 -sg29 -g30 -sssa(dp282 -g40 -(dp283 -g19 -S'NT_167249.2:g.3346403del' -p284 -sg21 -(dp285 -g23 -g272 -sg25 -S'CG' -p286 -sg27 -S'3346402' -p287 -sg29 -g30 -sssa(dp288 -g47 -(dp289 -g19 -S'NT_167249.2:g.3346403del' -p290 -sg21 -(dp291 -g23 -S'chr6_GL000256v2_alt' -p292 -sg25 -S'CG' -p293 -sg27 -S'3346402' -p294 -sg29 -g30 -sssasg123 -VHomo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA -p295 -sg125 -S'TNXB' -p296 -sg127 -(dp297 -g129 -S'NP_115859.2:p.(Arg2AlafsTer91)' -p298 -sg131 -S'NP_115859.2:p.(R2Afs*91)' -p299 -ssg133 -g134 -sg135 -g6 -sg136 -g6 -sg137 -S'NM_032470.3:c.4del' -p300 -sg139 -g6 -sg141 -(dp301 -g32 -(dp302 -g19 -S'NC_000006.11:g.32012993del' -p303 -sg21 -(dp304 -g23 -g146 -sg25 -S'CG' -p305 -sg27 -S'32012992' -p306 -sg29 -g30 -sssg47 -(dp307 -g19 -S'NC_000006.12:g.32045216del' -p308 -sg21 -(dp309 -g23 -g146 -sg25 -S'CG' -p310 -sg27 -S'32045215' -p311 -sg29 -g30 -sssg17 -(dp312 -g19 -S'NC_000006.11:g.32012993del' -p313 -sg21 -(dp314 -g23 -g157 -sg25 -S'CG' -p315 -sg27 -S'32012992' -p316 -sg29 -g30 -sssg40 -(dp317 -g19 -S'NC_000006.12:g.32045216del' -p318 -sg21 -(dp319 -g23 -g157 -sg25 -S'CG' -p320 -sg27 -S'32045215' -p321 -sg29 -g30 -sssssS'NM_001365276.1:c.10717del' -p322 -(dp323 -g5 -g6 -sg7 -(lp324 -S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' -p325 -aS'RefSeqGene record not available' -p326 -asg13 -g6 -sg14 -(lp327 -(dp328 -g17 -(dp329 -g19 -S'NT_113891.2:g.3483644del' -p330 -sg21 -(dp331 -g23 -g24 -sg25 -S'CG' -p332 -sg27 -S'3483643' -p333 -sg29 -g30 -sssa(dp334 -g32 -(dp335 -g19 -S'NT_113891.2:g.3483644del' -p336 -sg21 -(dp337 -g23 -g36 -sg25 -S'CG' -p338 -sg27 -S'3483643' -p339 -sg29 -g30 -sssa(dp340 -g17 -(dp341 -g19 -S'NT_167245.1:g.3292210del' -p342 -sg21 -(dp343 -g23 -g58 -sg25 -S'CG' -p344 -sg27 -S'3292209' -p345 -sg29 -g30 -sssa(dp346 -g32 -(dp347 -g19 -S'NT_167245.1:g.3292210del' -p348 -sg21 -(dp349 -g23 -g65 -sg25 -S'CG' -p350 -sg27 -S'3292209' -p351 -sg29 -g30 -sssa(dp352 -g17 -(dp353 -g19 -S'NT_167247.1:g.3392834del' -p354 -sg21 -(dp355 -g23 -g85 -sg25 -S'CG' -p356 -sg27 -S'3392833' -p357 -sg29 -g30 -sssa(dp358 -g32 -(dp359 -g19 -S'NT_167247.1:g.3392834del' -p360 -sg21 -(dp361 -g23 -g92 -sg25 -S'CG' -p362 -sg27 -S'3392833' -p363 -sg29 -g30 -sssasg123 -VHomo sapiens tenascin XB (TNXB), transcript variant 3, mRNA -p364 -sg125 -S'TNXB' -p365 -sg127 -(dp366 -g129 -S'NP_001352205.1:p.(Arg3573AlafsTer91)' -p367 -sg131 -S'NP_001352205.1:p.(R3573Afs*91)' -p368 -ssg133 -g134 -sg135 -g6 -sg136 -g6 -sg137 -S'NM_001365276.1:c.10717del' -p369 -sg139 -g6 -sg141 -(dp370 -g32 -(dp371 -g19 -S'NC_000006.11:g.32012993del' -p372 -sg21 -(dp373 -g23 -g146 -sg25 -S'CG' -p374 -sg27 -S'32012992' -p375 -sg29 -g30 -sssg17 -(dp376 -g19 -S'NC_000006.11:g.32012993del' -p377 -sg21 -(dp378 -g23 -g157 -sg25 -S'CG' -p379 -sg27 -S'32012992' -p380 -sg29 -g30 -sssssS'NM_019105.7:c.10711del' -p381 -(dp382 -g5 -g6 -sg7 -(lp383 -S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' -p384 -aS'RefSeqGene record not available' -p385 -asg13 -g6 -sg14 -(lp386 -(dp387 -g17 -(dp388 -g19 -S'NT_113891.2:g.3483644del' -p389 -sg21 -(dp390 -g23 -g24 -sg25 -S'CG' -p391 -sg27 -S'3483643' -p392 -sg29 -g30 -sssa(dp393 -g32 -(dp394 -g19 -S'NT_113891.2:g.3483644del' -p395 -sg21 -(dp396 -g23 -g36 -sg25 -S'CG' -p397 -sg27 -S'3483643' -p398 -sg29 -g30 -sssa(dp399 -g17 -(dp400 -g19 -S'NT_167245.1:g.3292210del' -p401 -sg21 -(dp402 -g23 -g58 -sg25 -S'CG' -p403 -sg27 -S'3292209' -p404 -sg29 -g30 -sssa(dp405 -g32 -(dp406 -g19 -S'NT_167245.1:g.3292210del' -p407 -sg21 -(dp408 -g23 -g65 -sg25 -S'CG' -p409 -sg27 -S'3292209' -p410 -sg29 -g30 -sssa(dp411 -g17 -(dp412 -g19 -S'NT_167247.1:g.3392834del' -p413 -sg21 -(dp414 -g23 -g85 -sg25 -S'CG' -p415 -sg27 -S'3392833' -p416 -sg29 -g30 -sssa(dp417 -g32 -(dp418 -g19 -S'NT_167247.1:g.3392834del' -p419 -sg21 -(dp420 -g23 -g92 -sg25 -S'CG' -p421 -sg27 -S'3392833' -p422 -sg29 -g30 -sssasg123 -VHomo sapiens tenascin XB (TNXB), transcript variant XB, mRNA -p423 -sg125 -S'TNXB' -p424 -sg127 -(dp425 -g129 -S'NP_061978.6:p.(Arg3571AlafsTer91)' -p426 -sg131 -S'NP_061978.6:p.(R3571Afs*91)' -p427 -ssg133 -g134 -sg135 -g6 -sg136 -g6 -sg137 -S'NM_019105.7:c.10711del' -p428 -sg139 -g6 -sg141 -(dp429 -g32 -(dp430 -g19 -S'NC_000006.11:g.32012993del' -p431 -sg21 -(dp432 -g23 -g146 -sg25 -S'CG' -p433 -sg27 -S'32012992' -p434 -sg29 -g30 -sssg17 -(dp435 -g19 -S'NC_000006.11:g.32012993del' -p436 -sg21 -(dp437 -g23 -g157 -sg25 -S'CG' -p438 -sg27 -S'32012992' -p439 -sg29 -g30 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant158.txt b/VariantValidator/testing/testOutputsMasterITS/variant158.txt deleted file mode 100644 index d4dc5b1f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant158.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589G>T' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.(Gly197Cys)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(G197C)' -p22 -ssS'submitted_variant' -p23 -S'17-48275363-C-A' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_1:g.8638G>T' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000088.3:c.589G>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007400.1:g.8638G>T' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.48275363C>A' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'48275363' -p45 -sS'alt' -p46 -VA -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50198002C>A' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'50198002' -p52 -sg46 -g47 -sssS'GRCh37' -p53 -(dp54 -g36 -S'NC_000017.10:g.48275363C>A' -p55 -sg38 -(dp56 -g40 -S'17' -p57 -sg42 -g43 -sg44 -S'48275363' -p58 -sg46 -g47 -sssS'GRCh38' -p59 -(dp60 -g36 -S'NC_000017.11:g.50198002C>A' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g43 -sg44 -S'50198002' -p63 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant159.txt b/VariantValidator/testing/testOutputsMasterITS/variant159.txt deleted file mode 100644 index 6bb6b1bc..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant159.txt +++ /dev/null @@ -1,143 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-1G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589-1G>T' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_007400.1(NM_000088.3):c.589-1G>T' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'17-48275364-C-A' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.589-1G>T' -p26 -sS'HGVS_LRG_variant' -p27 -S'LRG_1:g.8637G>T' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000088.3:c.589-1G>T' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_007400.1:g.8637G>T' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000017.10:g.48275364C>A' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr17' -p42 -sS'ref' -p43 -VC -p44 -sS'pos' -p45 -S'48275364' -p46 -sS'alt' -p47 -VA -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000017.11:g.50198003C>A' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'50198003' -p53 -sg47 -g48 -sssS'GRCh37' -p54 -(dp55 -g37 -S'NC_000017.10:g.48275364C>A' -p56 -sg39 -(dp57 -g41 -S'17' -p58 -sg43 -g44 -sg45 -S'48275364' -p59 -sg47 -g48 -sssS'GRCh38' -p60 -(dp61 -g37 -S'NC_000017.11:g.50198003C>A' -p62 -sg39 -(dp63 -g41 -g58 -sg43 -g44 -sg45 -S'50198003' -p64 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant16.txt b/VariantValidator/testing/testOutputsMasterITS/variant16.txt deleted file mode 100644 index 8e2dfcff..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant16.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000006.11:g.32006074C>T' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000500.7:c.-107-19C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant160.txt b/VariantValidator/testing/testOutputsMasterITS/variant160.txt deleted file mode 100644 index f981cca4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant160.txt +++ /dev/null @@ -1,150 +0,0 @@ -(dp0 -S'NM_000088.3:c.591_593inv' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_1t1:c.591_593inv' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000017.10:g.48275359GGA>TCC automapped to NC_000017.10:g.48275359_48275361inv' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -S'' -p9 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p13 -sS'gene_symbol' -p14 -S'COL1A1' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000079.2(LRG_1p1):p.(Pro198Asp)' -p19 -sS'slr' -p20 -S'NP_000079.2:p.(P198D)' -p21 -ssS'submitted_variant' -p22 -S'17-48275359-GGA-TCC' -p23 -sS'genome_context_intronic_sequence' -p24 -g9 -sS'HGVS_LRG_variant' -p25 -S'LRG_1:g.8640_8642inv' -p26 -sS'HGVS_transcript_variant' -p27 -S'NM_000088.3:c.591_593inv' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -S'NG_007400.1:g.8640_8642inv' -p30 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000017.10:g.48275359_48275361inv' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr17' -p40 -sS'ref' -p41 -S'GGA' -p42 -sS'pos' -p43 -S'48275359' -p44 -sS'alt' -p45 -S'TCC' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000017.11:g.50197998_50198000inv' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'GGA' -p51 -sg43 -S'50197998' -p52 -sg45 -S'TCC' -p53 -sssS'GRCh37' -p54 -(dp55 -g35 -S'NC_000017.10:g.48275359_48275361inv' -p56 -sg37 -(dp57 -g39 -S'17' -p58 -sg41 -S'GGA' -p59 -sg43 -S'48275359' -p60 -sg45 -S'TCC' -p61 -sssS'GRCh38' -p62 -(dp63 -g35 -S'NC_000017.11:g.50197998_50198000inv' -p64 -sg37 -(dp65 -g39 -g58 -sg41 -S'GGA' -p66 -sg43 -S'50197998' -p67 -sg45 -S'TCC' -p68 -sssssS'flag' -p69 -S'gene_variant' -p70 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant161.txt b/VariantValidator/testing/testOutputsMasterITS/variant161.txt deleted file mode 100644 index 77daa050..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant161.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000089.3:c.1035_1035+2del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_2t1:c.1035_1035+2del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000007.13:g.94039128CTTG>C automapped to NC_000007.13:g.94039133_94039135delTGT' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_007405.1(NM_000089.3):c.1035_1035+2del' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A2' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000080.2(LRG_2p1):p.(Val345del)' -p21 -sS'slr' -p22 -S'NP_000080.2:p.(V345del)' -p23 -ssS'submitted_variant' -p24 -S'7-94039128-CTTG-C' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000007.13(NM_000089.3):c.1035_1035+2del' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_2:g.20261_20263del' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000089.3:c.1035_1035+2del' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_007405.1:g.20261_20263del' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000007.13:g.94039133_94039135del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr7' -p43 -sS'ref' -p44 -S'CTTG' -p45 -sS'pos' -p46 -S'94039128' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000007.14:g.94409821_94409823del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'CTTG' -p54 -sg46 -S'94409816' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000007.13:g.94039133_94039135del' -p58 -sg40 -(dp59 -g42 -S'7' -p60 -sg44 -S'CTTG' -p61 -sg46 -S'94039128' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000007.14:g.94409821_94409823del' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'CTTG' -p67 -sg46 -S'94409816' -p68 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant162.txt b/VariantValidator/testing/testOutputsMasterITS/variant162.txt deleted file mode 100644 index b0f71165..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant162.txt +++ /dev/null @@ -1,486 +0,0 @@ -(dp0 -S'NM_001162426.1:c.363+1dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' -p7 -aS'NM_001162426.1:c.363dup normalized to NM_001162426.1:c.363+1dup' -p8 -aS'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g4 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA -p14 -sS'gene_symbol' -p15 -S'TSC1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001155898.1:p.(Met122AspfsTer4)' -p20 -sS'slr' -p21 -S'NP_001155898.1:p.(M122Dfs*4)' -p22 -ssS'submitted_variant' -p23 -S'9-135800972-AC-ACC' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000009.11(NM_001162426.1):c.363+1dup' -p26 -sS'HGVS_LRG_variant' -p27 -g4 -sS'HGVS_transcript_variant' -p28 -S'NM_001162426.1:c.363+1dup' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000009.11:g.135800973dup' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr9' -p40 -sS'ref' -p41 -S'C' -p42 -sS'pos' -p43 -S'135800973' -p44 -sS'alt' -p45 -S'CC' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000009.12:g.132925586dup' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'132925586' -p51 -sg45 -S'CC' -p52 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000009.11:g.135800973dup' -p55 -sg37 -(dp56 -g39 -S'9' -p57 -sg41 -g42 -sg43 -S'135800973' -p58 -sg45 -S'CC' -p59 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000009.12:g.132925586dup' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -g42 -sg43 -S'132925586' -p64 -sg45 -S'CC' -p65 -sssssS'flag' -p66 -S'gene_variant' -p67 -sS'NM_001362177.1:c.-1+1dup' -p68 -(dp69 -g3 -g4 -sg5 -(lp70 -S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' -p71 -aS'RefSeqGene record not available' -p72 -asg10 -g4 -sg11 -(lp73 -sg13 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA -p74 -sg15 -S'TSC1' -p75 -sg17 -(dp76 -g19 -S'NP_001349106.1:p.?' -p77 -sg21 -S'NP_001349106.1:p.?' -p78 -ssg23 -g24 -sg25 -S'NC_000009.11(NM_001362177.1):c.-1+1dup' -p79 -sg27 -g4 -sg28 -S'NM_001362177.1:c.-1+1dup' -p80 -sg30 -g4 -sg31 -(dp81 -g33 -(dp82 -g35 -S'NC_000009.11:g.135800973dup' -p83 -sg37 -(dp84 -g39 -g40 -sg41 -g42 -sg43 -S'135800973' -p85 -sg45 -S'CC' -p86 -sssg47 -(dp87 -g35 -S'NC_000009.12:g.132925586dup' -p88 -sg37 -(dp89 -g39 -g40 -sg41 -g42 -sg43 -S'132925586' -p90 -sg45 -S'CC' -p91 -sssg53 -(dp92 -g35 -S'NC_000009.11:g.135800973dup' -p93 -sg37 -(dp94 -g39 -g57 -sg41 -g42 -sg43 -S'135800973' -p95 -sg45 -S'CC' -p96 -sssg60 -(dp97 -g35 -S'NC_000009.12:g.132925586dup' -p98 -sg37 -(dp99 -g39 -g57 -sg41 -g42 -sg43 -S'132925586' -p100 -sg45 -S'CC' -p101 -sssssS'NM_000368.4:c.363+1dup' -p102 -(dp103 -g3 -S'LRG_486t1:c.363+1dup' -p104 -sg5 -(lp105 -S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' -p106 -aS'NM_000368.4:c.363dup normalized to NM_000368.4:c.363+1dup' -p107 -asg10 -S'NG_012386.1(NM_000368.4):c.363+1dup' -p108 -sg11 -(lp109 -sg13 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA -p110 -sg15 -S'TSC1' -p111 -sg17 -(dp112 -g19 -S'NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)' -p113 -sg21 -S'NP_000359.1:p.(M122Dfs*4)' -p114 -ssg23 -g24 -sg25 -S'NC_000009.11(NM_000368.4):c.363+1dup' -p115 -sg27 -S'LRG_486:g.24048dup' -p116 -sg28 -S'NM_000368.4:c.363+1dup' -p117 -sg30 -S'NG_012386.1:g.24048dup' -p118 -sg31 -(dp119 -g33 -(dp120 -g35 -S'NC_000009.11:g.135800973dup' -p121 -sg37 -(dp122 -g39 -g40 -sg41 -g42 -sg43 -S'135800973' -p123 -sg45 -S'CC' -p124 -sssg47 -(dp125 -g35 -S'NC_000009.12:g.132925586dup' -p126 -sg37 -(dp127 -g39 -g40 -sg41 -g42 -sg43 -S'132925586' -p128 -sg45 -S'CC' -p129 -sssg53 -(dp130 -g35 -S'NC_000009.11:g.135800973dup' -p131 -sg37 -(dp132 -g39 -g57 -sg41 -g42 -sg43 -S'135800973' -p133 -sg45 -S'CC' -p134 -sssg60 -(dp135 -g35 -S'NC_000009.12:g.132925586dup' -p136 -sg37 -(dp137 -g39 -g57 -sg41 -g42 -sg43 -S'132925586' -p138 -sg45 -S'CC' -p139 -sssssS'NM_001162427.1:c.210+1615dup' -p140 -(dp141 -g3 -g4 -sg5 -(lp142 -S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' -p143 -aS'RefSeqGene record not available' -p144 -asg10 -g4 -sg11 -(lp145 -sg13 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA -p146 -sg15 -S'TSC1' -p147 -sg17 -(dp148 -g19 -S'NP_001155899.1:p.?' -p149 -sg21 -S'NP_001155899.1:p.?' -p150 -ssg23 -g24 -sg25 -S'NC_000009.11(NM_001162427.1):c.210+1615dup' -p151 -sg27 -g4 -sg28 -S'NM_001162427.1:c.210+1615dup' -p152 -sg30 -g4 -sg31 -(dp153 -g33 -(dp154 -g35 -S'NC_000009.11:g.135800973dup' -p155 -sg37 -(dp156 -g39 -g40 -sg41 -g42 -sg43 -S'135800973' -p157 -sg45 -S'CC' -p158 -sssg47 -(dp159 -g35 -S'NC_000009.12:g.132925586dup' -p160 -sg37 -(dp161 -g39 -g40 -sg41 -g42 -sg43 -S'132925586' -p162 -sg45 -S'CC' -p163 -sssg53 -(dp164 -g35 -S'NC_000009.11:g.135800973dup' -p165 -sg37 -(dp166 -g39 -g57 -sg41 -g42 -sg43 -S'135800973' -p167 -sg45 -S'CC' -p168 -sssg60 -(dp169 -g35 -S'NC_000009.12:g.132925586dup' -p170 -sg37 -(dp171 -g39 -g57 -sg41 -g42 -sg43 -S'132925586' -p172 -sg45 -S'CC' -p173 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant163.txt b/VariantValidator/testing/testOutputsMasterITS/variant163.txt deleted file mode 100644 index f1045065..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant163.txt +++ /dev/null @@ -1,354 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001243246.1:c.2073G>A' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_5t3:c.2073G>A' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 3, mRNA -p15 -sS'gene_symbol' -p16 -S'P3H1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_001230175.1:p.(Ala691=)' -p21 -sS'slr' -p22 -S'NP_001230175.1:p.(A691=)' -p23 -ssS'submitted_variant' -p24 -S'1-43212925-C-T' -p25 -sS'genome_context_intronic_sequence' -p26 -g11 -sS'HGVS_LRG_variant' -p27 -g11 -sS'HGVS_transcript_variant' -p28 -S'NM_001243246.1:c.2073G>A' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g11 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000001.10:g.43212925C>T' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr1' -p40 -sS'ref' -p41 -VC -p42 -sS'pos' -p43 -S'43212925' -p44 -sS'alt' -p45 -VT -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000001.11:g.42747254C>T' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'42747254' -p51 -sg45 -g46 -sssS'GRCh37' -p52 -(dp53 -g35 -S'NC_000001.10:g.43212925C>T' -p54 -sg37 -(dp55 -g39 -S'1' -p56 -sg41 -g42 -sg43 -S'43212925' -p57 -sg45 -g46 -sssS'GRCh38' -p58 -(dp59 -g35 -S'NC_000001.11:g.42747254C>T' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'42747254' -p62 -sg45 -g46 -sssssS'NM_001146289.1:c.2073G>A' -p63 -(dp64 -g5 -S'LRG_5t2:c.2073G>A' -p65 -sg7 -(lp66 -sg10 -g11 -sg12 -(lp67 -sg14 -VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 2, mRNA -p68 -sg16 -S'P3H1' -p69 -sg18 -(dp70 -g20 -S'NP_001139761.1:p.(Ala691=)' -p71 -sg22 -S'NP_001139761.1:p.(A691=)' -p72 -ssg24 -g25 -sg26 -g11 -sg27 -S'LRG_5:g.24831G>A' -p73 -sg28 -S'NM_001146289.1:c.2073G>A' -p74 -sg30 -S'NG_008123.1:g.24831G>A' -p75 -sg31 -(dp76 -g33 -(dp77 -g35 -S'NC_000001.10:g.43212925C>T' -p78 -sg37 -(dp79 -g39 -g40 -sg41 -g42 -sg43 -S'43212925' -p80 -sg45 -g46 -sssg47 -(dp81 -g35 -S'NC_000001.11:g.42747254C>T' -p82 -sg37 -(dp83 -g39 -g40 -sg41 -g42 -sg43 -S'42747254' -p84 -sg45 -g46 -sssg52 -(dp85 -g35 -S'NC_000001.10:g.43212925C>T' -p86 -sg37 -(dp87 -g39 -g56 -sg41 -g42 -sg43 -S'43212925' -p88 -sg45 -g46 -sssg58 -(dp89 -g35 -S'NC_000001.11:g.42747254C>T' -p90 -sg37 -(dp91 -g39 -g56 -sg41 -g42 -sg43 -S'42747254' -p92 -sg45 -g46 -sssssS'NM_022356.3:c.2055+18G>A' -p93 -(dp94 -g5 -S'LRG_5t1:c.2055+18G>A' -p95 -sg7 -(lp96 -sg10 -S'NG_008123.1(NM_022356.3):c.2055+18G>A' -p97 -sg12 -(lp98 -sg14 -VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA -p99 -sg16 -S'P3H1' -p100 -sg18 -(dp101 -g20 -S'NP_071751.3(LRG_5p1):p.?' -p102 -sg22 -S'NP_071751.3:p.?' -p103 -ssg24 -g25 -sg26 -S'NC_000001.10(NM_022356.3):c.2055+18G>A' -p104 -sg27 -S'LRG_5:g.24831G>A' -p105 -sg28 -S'NM_022356.3:c.2055+18G>A' -p106 -sg30 -S'NG_008123.1:g.24831G>A' -p107 -sg31 -(dp108 -g33 -(dp109 -g35 -S'NC_000001.10:g.43212925C>T' -p110 -sg37 -(dp111 -g39 -g40 -sg41 -g42 -sg43 -S'43212925' -p112 -sg45 -g46 -sssg47 -(dp113 -g35 -S'NC_000001.11:g.42747254C>T' -p114 -sg37 -(dp115 -g39 -g40 -sg41 -g42 -sg43 -S'42747254' -p116 -sg45 -g46 -sssg52 -(dp117 -g35 -S'NC_000001.10:g.43212925C>T' -p118 -sg37 -(dp119 -g39 -g56 -sg41 -g42 -sg43 -S'43212925' -p120 -sg45 -g46 -sssg58 -(dp121 -g35 -S'NC_000001.11:g.42747254C>T' -p122 -sg37 -(dp123 -g39 -g56 -sg41 -g42 -sg43 -S'42747254' -p124 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant164.txt b/VariantValidator/testing/testOutputsMasterITS/variant164.txt deleted file mode 100644 index 0169929d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant164.txt +++ /dev/null @@ -1,129 +0,0 @@ -(dp0 -S'NM_001194958.2:c.20C>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NM_001194958.2:c.20C>A cannot be mapped directly to genome build GRCh37' -p7 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -(dp12 -S'GRCh37' -p13 -(dp14 -S'HGVS_genomic_description' -p15 -S'NW_003315950.2:g.355171C>A' -p16 -sS'vcf' -p17 -(dp18 -S'chr' -p19 -S'HG987_PATCH' -p20 -sS'ref' -p21 -S'C' -p22 -sS'pos' -p23 -S'355171' -p24 -sS'alt' -p25 -S'A' -p26 -sssasS'transcript_description' -p27 -VHomo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA -p28 -sS'gene_symbol' -p29 -S'KCNJ18' -p30 -sS'HGVS_predicted_protein_consequence' -p31 -(dp32 -S'tlr' -p33 -S'NP_001181887.2:p.(Ala7Asp)' -p34 -sS'slr' -p35 -S'NP_001181887.2:p.(A7D)' -p36 -ssS'submitted_variant' -p37 -S'HG987_PATCH-355171-C-A' -p38 -sS'genome_context_intronic_sequence' -p39 -g4 -sS'HGVS_LRG_variant' -p40 -g4 -sS'HGVS_transcript_variant' -p41 -S'NM_001194958.2:c.20C>A' -p42 -sS'HGVS_RefSeqGene_variant' -p43 -S'NG_033093.1:g.15284C>A' -p44 -sS'primary_assembly_loci' -p45 -(dp46 -S'GRCh38' -p47 -(dp48 -g15 -S'NC_000017.11:g.21702806C>A' -p49 -sg17 -(dp50 -g19 -S'17' -p51 -sg21 -g22 -sg23 -S'21702806' -p52 -sg25 -g26 -sssS'hg38' -p53 -(dp54 -g15 -S'NC_000017.11:g.21702806C>A' -p55 -sg17 -(dp56 -g19 -S'chr17' -p57 -sg21 -g22 -sg23 -S'21702806' -p58 -sg25 -g26 -sssssS'flag' -p59 -S'gene_variant' -p60 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant165.txt b/VariantValidator/testing/testOutputsMasterITS/variant165.txt deleted file mode 100644 index aa09f742..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant165.txt +++ /dev/null @@ -1,531 +0,0 @@ -(dp0 -S'NM_000022.3:c.534A>G' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens adenosine deaminase (ADA), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'ADA' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000013.2(LRG_16p1):p.(Val178=)' -p18 -sS'slr' -p19 -S'NP_000013.2:p.(V178=)' -p20 -ssS'submitted_variant' -p21 -S'20-43252915-T-C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_000022.3:c.534A>G' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000020.10:g.43252915T>C' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr20' -p37 -sS'ref' -p38 -VT -p39 -sS'pos' -p40 -S'43252915' -p41 -sS'alt' -p42 -VC -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000020.11:g.44624274T>C' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'44624274' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000020.10:g.43252915T>C' -p51 -sg34 -(dp52 -g36 -S'20' -p53 -sg38 -g39 -sg40 -S'43252915' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000020.11:g.44624274T>C' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'44624274' -p59 -sg42 -g43 -sssssS'NM_001322051.1:c.534A>G' -p60 -(dp61 -g3 -g4 -sg5 -(lp62 -S'RefSeqGene record not available' -p63 -asg8 -g4 -sg9 -(lp64 -sg11 -VHomo sapiens adenosine deaminase (ADA), transcript variant 3, mRNA -p65 -sg13 -S'ADA' -p66 -sg15 -(dp67 -g17 -S'NP_001308980.1:p.(Val178=)' -p68 -sg19 -S'NP_001308980.1:p.(V178=)' -p69 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322051.1:c.534A>G' -p70 -sg27 -g4 -sg28 -(dp71 -g30 -(dp72 -g32 -S'NC_000020.10:g.43252915T>C' -p73 -sg34 -(dp74 -g36 -g37 -sg38 -g39 -sg40 -S'43252915' -p75 -sg42 -g43 -sssg44 -(dp76 -g32 -S'NC_000020.11:g.44624274T>C' -p77 -sg34 -(dp78 -g36 -g37 -sg38 -g39 -sg40 -S'44624274' -p79 -sg42 -g43 -sssg49 -(dp80 -g32 -S'NC_000020.10:g.43252915T>C' -p81 -sg34 -(dp82 -g36 -g53 -sg38 -g39 -sg40 -S'43252915' -p83 -sg42 -g43 -sssg55 -(dp84 -g32 -S'NC_000020.11:g.44624274T>C' -p85 -sg34 -(dp86 -g36 -g53 -sg38 -g39 -sg40 -S'44624274' -p87 -sg42 -g43 -sssssS'NM_000022.2:c.534A>G' -p88 -(dp89 -g3 -S'LRG_16t1:c.534A>G' -p90 -sg5 -(lp91 -S'A more recent version of the selected reference sequence NM_000022.2 is available (NM_000022.3)' -p92 -aS'NM_000022.3:c.534A>G MUST be fully validated prior to use in reports' -p93 -aS'select_variants=NM_000022.3:c.534A>G' -p94 -asg8 -g4 -sg9 -(lp95 -sg11 -VHomo sapiens adenosine deaminase (ADA), mRNA -p96 -sg13 -S'ADA' -p97 -sg15 -(dp98 -g17 -S'NP_000013.2(LRG_16p1):p.(Val178=)' -p99 -sg19 -S'NP_000013.2:p.(V178=)' -p100 -ssg21 -g22 -sg23 -g4 -sg24 -S'LRG_16:g.32462A>G' -p101 -sg25 -S'NM_000022.2:c.534A>G' -p102 -sg27 -S'NG_007385.1:g.32462A>G' -p103 -sg28 -(dp104 -g30 -(dp105 -g32 -S'NC_000020.10:g.43252915T>C' -p106 -sg34 -(dp107 -g36 -g37 -sg38 -g39 -sg40 -S'43252915' -p108 -sg42 -g43 -sssg49 -(dp109 -g32 -S'NC_000020.10:g.43252915T>C' -p110 -sg34 -(dp111 -g36 -g53 -sg38 -g39 -sg40 -S'43252915' -p112 -sg42 -g43 -sssssS'flag' -p113 -S'gene_variant' -p114 -sS'NM_001322050.1:c.129A>G' -p115 -(dp116 -g3 -g4 -sg5 -(lp117 -S'RefSeqGene record not available' -p118 -asg8 -g4 -sg9 -(lp119 -sg11 -VHomo sapiens adenosine deaminase (ADA), transcript variant 2, mRNA -p120 -sg13 -S'ADA' -p121 -sg15 -(dp122 -g17 -S'NP_001308979.1:p.(Val43=)' -p123 -sg19 -S'NP_001308979.1:p.(V43=)' -p124 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322050.1:c.129A>G' -p125 -sg27 -g4 -sg28 -(dp126 -g30 -(dp127 -g32 -S'NC_000020.10:g.43252915T>C' -p128 -sg34 -(dp129 -g36 -g37 -sg38 -g39 -sg40 -S'43252915' -p130 -sg42 -g43 -sssg44 -(dp131 -g32 -S'NC_000020.11:g.44624274T>C' -p132 -sg34 -(dp133 -g36 -g37 -sg38 -g39 -sg40 -S'44624274' -p134 -sg42 -g43 -sssg49 -(dp135 -g32 -S'NC_000020.10:g.43252915T>C' -p136 -sg34 -(dp137 -g36 -g53 -sg38 -g39 -sg40 -S'43252915' -p138 -sg42 -g43 -sssg55 -(dp139 -g32 -S'NC_000020.11:g.44624274T>C' -p140 -sg34 -(dp141 -g36 -g53 -sg38 -g39 -sg40 -S'44624274' -p142 -sg42 -g43 -sssssS'NR_136160.1:n.685A>G' -p143 -(dp144 -g3 -g4 -sg5 -(lp145 -S'RefSeqGene record not available' -p146 -asg8 -g4 -sg9 -(lp147 -sg11 -VHomo sapiens adenosine deaminase (ADA), transcript variant 4, non-coding RNA -p148 -sg13 -S'ADA' -p149 -sg15 -(dp150 -g17 -S'Non-coding :n.' -p151 -sg19 -g4 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NR_136160.1:n.685A>G' -p152 -sg27 -g4 -sg28 -(dp153 -g30 -(dp154 -g32 -S'NC_000020.10:g.43252915T>C' -p155 -sg34 -(dp156 -g36 -g37 -sg38 -g39 -sg40 -S'43252915' -p157 -sg42 -g43 -sssg44 -(dp158 -g32 -S'NC_000020.11:g.44624274T>C' -p159 -sg34 -(dp160 -g36 -g37 -sg38 -g39 -sg40 -S'44624274' -p161 -sg42 -g43 -sssg49 -(dp162 -g32 -S'NC_000020.10:g.43252915T>C' -p163 -sg34 -(dp164 -g36 -g53 -sg38 -g39 -sg40 -S'43252915' -p165 -sg42 -g43 -sssg55 -(dp166 -g32 -S'NC_000020.11:g.44624274T>C' -p167 -sg34 -(dp168 -g36 -g53 -sg38 -g39 -sg40 -S'44624274' -p169 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant166.txt b/VariantValidator/testing/testOutputsMasterITS/variant166.txt deleted file mode 100644 index 82175481..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant166.txt +++ /dev/null @@ -1,140 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_206933.2:c.6317C>G' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -g6 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens usherin (USH2A), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'USH2A' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_996816.2:p.(Thr2106Arg)' -p19 -sS'slr' -p20 -S'NP_996816.2:p.(T2106R)' -p21 -ssS'submitted_variant' -p22 -S'1-216219781-A-C' -p23 -sS'genome_context_intronic_sequence' -p24 -g6 -sS'HGVS_LRG_variant' -p25 -g6 -sS'HGVS_transcript_variant' -p26 -S'NM_206933.2:c.6317C>G' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_009497.1:g.381958C>G' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000001.10:g.216219781A>C' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -S'A' -p41 -sS'pos' -p42 -S'216219781' -p43 -sS'alt' -p44 -VC -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000001.11:g.216046439A>C' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'216046439' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000001.10:g.216219781A>C' -p53 -sg36 -(dp54 -g38 -S'1' -p55 -sg40 -g41 -sg42 -S'216219781' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000001.11:g.216046439A>C' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'216046439' -p61 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant167.txt b/VariantValidator/testing/testOutputsMasterITS/variant167.txt deleted file mode 100644 index 154d995c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant167.txt +++ /dev/null @@ -1,1266 +0,0 @@ -(dp0 -S'NM_005896.3:c.394C>G' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_610t1:c.394C>G' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Multiple ALT sequences detected' -p7 -aS'auto-submitting all possible combinations' -p8 -aS'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'IDH1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_005887.2(LRG_610p1):p.(Arg132Gly)' -p21 -sS'slr' -p22 -S'NP_005887.2:p.(R132G)' -p23 -ssS'submitted_variant' -p24 -S'2-209113113-G-A,C,T' -p25 -sS'genome_context_intronic_sequence' -p26 -g11 -sS'HGVS_LRG_variant' -p27 -g11 -sS'HGVS_transcript_variant' -p28 -S'NM_005896.3:c.394C>G' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g11 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000002.11:g.209113113G>C' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr2' -p40 -sS'ref' -p41 -VG -p42 -sS'pos' -p43 -S'209113113' -p44 -sS'alt' -p45 -VC -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000002.12:g.208248389G>C' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'208248389' -p51 -sg45 -g46 -sssS'GRCh37' -p52 -(dp53 -g35 -S'NC_000002.11:g.209113113G>C' -p54 -sg37 -(dp55 -g39 -S'2' -p56 -sg41 -g42 -sg43 -S'209113113' -p57 -sg45 -g46 -sssS'GRCh38' -p58 -(dp59 -g35 -S'NC_000002.12:g.208248389G>C' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'208248389' -p62 -sg45 -g46 -sssssS'NM_001282387.1:c.394C>G' -p63 -(dp64 -g3 -S'LRG_610t2:c.394C>G' -p65 -sg5 -(lp66 -S'Multiple ALT sequences detected' -p67 -aS'auto-submitting all possible combinations' -p68 -aS'RefSeqGene record not available' -p69 -asg10 -g11 -sg12 -(lp70 -sg14 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA -p71 -sg16 -S'IDH1' -p72 -sg18 -(dp73 -g20 -S'NP_001269316.1:p.(Arg132Gly)' -p74 -sg22 -S'NP_001269316.1:p.(R132G)' -p75 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg28 -S'NM_001282387.1:c.394C>G' -p76 -sg30 -g11 -sg31 -(dp77 -g33 -(dp78 -g35 -S'NC_000002.11:g.209113113G>C' -p79 -sg37 -(dp80 -g39 -g40 -sg41 -g42 -sg43 -S'209113113' -p81 -sg45 -g46 -sssg47 -(dp82 -g35 -S'NC_000002.12:g.208248389G>C' -p83 -sg37 -(dp84 -g39 -g40 -sg41 -g42 -sg43 -S'208248389' -p85 -sg45 -g46 -sssg52 -(dp86 -g35 -S'NC_000002.11:g.209113113G>C' -p87 -sg37 -(dp88 -g39 -g56 -sg41 -g42 -sg43 -S'209113113' -p89 -sg45 -g46 -sssg58 -(dp90 -g35 -S'NC_000002.12:g.208248389G>C' -p91 -sg37 -(dp92 -g39 -g56 -sg41 -g42 -sg43 -S'208248389' -p93 -sg45 -g46 -sssssS'NM_001282387.1:c.394C>A' -p94 -(dp95 -g3 -S'LRG_610t2:c.394C>A' -p96 -sg5 -(lp97 -S'Multiple ALT sequences detected' -p98 -aS'auto-submitting all possible combinations' -p99 -aS'RefSeqGene record not available' -p100 -asg10 -g11 -sg12 -(lp101 -sg14 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA -p102 -sg16 -S'IDH1' -p103 -sg18 -(dp104 -g20 -S'NP_001269316.1:p.(Arg132Ser)' -p105 -sg22 -S'NP_001269316.1:p.(R132S)' -p106 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg28 -S'NM_001282387.1:c.394C>A' -p107 -sg30 -g11 -sg31 -(dp108 -g33 -(dp109 -g35 -S'NC_000002.11:g.209113113G>T' -p110 -sg37 -(dp111 -g39 -g40 -sg41 -g42 -sg43 -S'209113113' -p112 -sg45 -VT -p113 -sssg47 -(dp114 -g35 -S'NC_000002.12:g.208248389G>T' -p115 -sg37 -(dp116 -g39 -g40 -sg41 -g42 -sg43 -S'208248389' -p117 -sg45 -g113 -sssg52 -(dp118 -g35 -S'NC_000002.11:g.209113113G>T' -p119 -sg37 -(dp120 -g39 -g56 -sg41 -g42 -sg43 -S'209113113' -p121 -sg45 -g113 -sssg58 -(dp122 -g35 -S'NC_000002.12:g.208248389G>T' -p123 -sg37 -(dp124 -g39 -g56 -sg41 -g42 -sg43 -S'208248389' -p125 -sg45 -g113 -sssssS'NM_005896.3:c.394C>A' -p126 -(dp127 -g3 -S'LRG_610t1:c.394C>A' -p128 -sg5 -(lp129 -S'Multiple ALT sequences detected' -p130 -aS'auto-submitting all possible combinations' -p131 -aS'RefSeqGene record not available' -p132 -asg10 -g11 -sg12 -(lp133 -sg14 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA -p134 -sg16 -S'IDH1' -p135 -sg18 -(dp136 -g20 -S'NP_005887.2(LRG_610p1):p.(Arg132Ser)' -p137 -sg22 -S'NP_005887.2:p.(R132S)' -p138 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg28 -S'NM_005896.3:c.394C>A' -p139 -sg30 -g11 -sg31 -(dp140 -g33 -(dp141 -g35 -S'NC_000002.11:g.209113113G>T' -p142 -sg37 -(dp143 -g39 -g40 -sg41 -g42 -sg43 -S'209113113' -p144 -sg45 -g113 -sssg47 -(dp145 -g35 -S'NC_000002.12:g.208248389G>T' -p146 -sg37 -(dp147 -g39 -g40 -sg41 -g42 -sg43 -S'208248389' -p148 -sg45 -g113 -sssg52 -(dp149 -g35 -S'NC_000002.11:g.209113113G>T' -p150 -sg37 -(dp151 -g39 -g56 -sg41 -g42 -sg43 -S'209113113' -p152 -sg45 -g113 -sssg58 -(dp153 -g35 -S'NC_000002.12:g.208248389G>T' -p154 -sg37 -(dp155 -g39 -g56 -sg41 -g42 -sg43 -S'208248389' -p156 -sg45 -g113 -sssssS'NM_001282386.1:c.394C>T' -p157 -(dp158 -g3 -S'LRG_610t3:c.394C>T' -p159 -sg5 -(lp160 -S'Multiple ALT sequences detected' -p161 -aS'auto-submitting all possible combinations' -p162 -aS'RefSeqGene record not available' -p163 -asg10 -g11 -sg12 -(lp164 -sg14 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA -p165 -sg16 -S'IDH1' -p166 -sg18 -(dp167 -g20 -S'NP_001269315.1:p.(Arg132Cys)' -p168 -sg22 -S'NP_001269315.1:p.(R132C)' -p169 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg28 -S'NM_001282386.1:c.394C>T' -p170 -sg30 -g11 -sg31 -(dp171 -g33 -(dp172 -g35 -S'NC_000002.11:g.209113113G>A' -p173 -sg37 -(dp174 -g39 -g40 -sg41 -g42 -sg43 -S'209113113' -p175 -sg45 -VA -p176 -sssg47 -(dp177 -g35 -S'NC_000002.12:g.208248389G>A' -p178 -sg37 -(dp179 -g39 -g40 -sg41 -g42 -sg43 -S'208248389' -p180 -sg45 -g176 -sssg52 -(dp181 -g35 -S'NC_000002.11:g.209113113G>A' -p182 -sg37 -(dp183 -g39 -g56 -sg41 -g42 -sg43 -S'209113113' -p184 -sg45 -g176 -sssg58 -(dp185 -g35 -S'NC_000002.12:g.208248389G>A' -p186 -sg37 -(dp187 -g39 -g56 -sg41 -g42 -sg43 -S'208248389' -p188 -sg45 -g176 -sssssS'NM_005896.2:c.394C>A' -p189 -(dp190 -g3 -g11 -sg5 -(lp191 -S'Multiple ALT sequences detected' -p192 -aS'auto-submitting all possible combinations' -p193 -aS'A more recent version of the selected reference sequence NM_005896.2 is available (NM_005896.3)' -p194 -aS'NM_005896.3:c.394C>A MUST be fully validated prior to use in reports' -p195 -aS'select_variants=NM_005896.3:c.394C>A' -p196 -asg10 -g11 -sg12 -(lp197 -sg14 -VHomo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA -p198 -sg16 -S'IDH1' -p199 -sg18 -(dp200 -g20 -S'NP_005887.2(LRG_610p1):p.(Arg132Ser)' -p201 -sg22 -S'NP_005887.2:p.(R132S)' -p202 -ssg24 -g25 -sg26 -g11 -sg27 -S'LRG_610:g.22686C>A' -p203 -sg28 -S'NM_005896.2:c.394C>A' -p204 -sg30 -S'NG_023319.2:g.22686C>A' -p205 -sg31 -(dp206 -g33 -(dp207 -g35 -S'NC_000002.11:g.209113113G>T' -p208 -sg37 -(dp209 -g39 -g40 -sg41 -g42 -sg43 -S'209113113' -p210 -sg45 -g113 -sssg52 -(dp211 -g35 -S'NC_000002.11:g.209113113G>T' -p212 -sg37 -(dp213 -g39 -g56 -sg41 -g42 -sg43 -S'209113113' -p214 -sg45 -g113 -sssssS'NM_005896.2:c.394C>G' -p215 -(dp216 -g3 -g11 -sg5 -(lp217 -S'Multiple ALT sequences detected' -p218 -aS'auto-submitting all possible combinations' -p219 -aS'A more recent version of the selected reference sequence NM_005896.2 is available (NM_005896.3)' -p220 -aS'NM_005896.3:c.394C>G MUST be fully validated prior to use in reports' -p221 -aS'select_variants=NM_005896.3:c.394C>G' -p222 -asg10 -g11 -sg12 -(lp223 -sg14 -VHomo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA -p224 -sg16 -S'IDH1' -p225 -sg18 -(dp226 -g20 -S'NP_005887.2(LRG_610p1):p.(Arg132Gly)' -p227 -sg22 -S'NP_005887.2:p.(R132G)' -p228 -ssg24 -g25 -sg26 -g11 -sg27 -S'LRG_610:g.22686C>G' -p229 -sg28 -S'NM_005896.2:c.394C>G' -p230 -sg30 -S'NG_023319.2:g.22686C>G' -p231 -sg31 -(dp232 -g33 -(dp233 -g35 -S'NC_000002.11:g.209113113G>C' -p234 -sg37 -(dp235 -g39 -g40 -sg41 -g42 -sg43 -S'209113113' -p236 -sg45 -g46 -sssg52 -(dp237 -g35 -S'NC_000002.11:g.209113113G>C' -p238 -sg37 -(dp239 -g39 -g56 -sg41 -g42 -sg43 -S'209113113' -p240 -sg45 -g46 -sssssS'flag' -p241 -S'gene_variant' -p242 -sS'NM_005896.3:c.394C>T' -p243 -(dp244 -g3 -S'LRG_610t1:c.394C>T' -p245 -sg5 -(lp246 -S'Multiple ALT sequences detected' -p247 -aS'auto-submitting all possible combinations' -p248 -aS'RefSeqGene record not available' -p249 -asg10 -g11 -sg12 -(lp250 -sg14 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA -p251 -sg16 -S'IDH1' -p252 -sg18 -(dp253 -g20 -S'NP_005887.2(LRG_610p1):p.(Arg132Cys)' -p254 -sg22 -S'NP_005887.2:p.(R132C)' -p255 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg28 -S'NM_005896.3:c.394C>T' -p256 -sg30 -g11 -sg31 -(dp257 -g33 -(dp258 -g35 -S'NC_000002.11:g.209113113G>A' -p259 -sg37 -(dp260 -g39 -g40 -sg41 -g42 -sg43 -S'209113113' -p261 -sg45 -g176 -sssg47 -(dp262 -g35 -S'NC_000002.12:g.208248389G>A' -p263 -sg37 -(dp264 -g39 -g40 -sg41 -g42 -sg43 -S'208248389' -p265 -sg45 -g176 -sssg52 -(dp266 -g35 -S'NC_000002.11:g.209113113G>A' -p267 -sg37 -(dp268 -g39 -g56 -sg41 -g42 -sg43 -S'209113113' -p269 -sg45 -g176 -sssg58 -(dp270 -g35 -S'NC_000002.12:g.208248389G>A' -p271 -sg37 -(dp272 -g39 -g56 -sg41 -g42 -sg43 -S'208248389' -p273 -sg45 -g176 -sssssS'NM_001282387.1:c.394C>T' -p274 -(dp275 -g3 -S'LRG_610t2:c.394C>T' -p276 -sg5 -(lp277 -S'Multiple ALT sequences detected' -p278 -aS'auto-submitting all possible combinations' -p279 -aS'RefSeqGene record not available' -p280 -asg10 -g11 -sg12 -(lp281 -sg14 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA -p282 -sg16 -S'IDH1' -p283 -sg18 -(dp284 -g20 -S'NP_001269316.1:p.(Arg132Cys)' -p285 -sg22 -S'NP_001269316.1:p.(R132C)' -p286 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg28 -S'NM_001282387.1:c.394C>T' -p287 -sg30 -g11 -sg31 -(dp288 -g33 -(dp289 -g35 -S'NC_000002.11:g.209113113G>A' -p290 -sg37 -(dp291 -g39 -g40 -sg41 -g42 -sg43 -S'209113113' -p292 -sg45 -g176 -sssg47 -(dp293 -g35 -S'NC_000002.12:g.208248389G>A' -p294 -sg37 -(dp295 -g39 -g40 -sg41 -g42 -sg43 -S'208248389' -p296 -sg45 -g176 -sssg52 -(dp297 -g35 -S'NC_000002.11:g.209113113G>A' -p298 -sg37 -(dp299 -g39 -g56 -sg41 -g42 -sg43 -S'209113113' -p300 -sg45 -g176 -sssg58 -(dp301 -g35 -S'NC_000002.12:g.208248389G>A' -p302 -sg37 -(dp303 -g39 -g56 -sg41 -g42 -sg43 -S'208248389' -p304 -sg45 -g176 -sssssS'NM_001282386.1:c.394C>G' -p305 -(dp306 -g3 -S'LRG_610t3:c.394C>G' -p307 -sg5 -(lp308 -S'Multiple ALT sequences detected' -p309 -aS'auto-submitting all possible combinations' -p310 -aS'RefSeqGene record not available' -p311 -asg10 -g11 -sg12 -(lp312 -sg14 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA -p313 -sg16 -S'IDH1' -p314 -sg18 -(dp315 -g20 -S'NP_001269315.1:p.(Arg132Gly)' -p316 -sg22 -S'NP_001269315.1:p.(R132G)' -p317 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg28 -S'NM_001282386.1:c.394C>G' -p318 -sg30 -g11 -sg31 -(dp319 -g33 -(dp320 -g35 -S'NC_000002.11:g.209113113G>C' -p321 -sg37 -(dp322 -g39 -g40 -sg41 -g42 -sg43 -S'209113113' -p323 -sg45 -g46 -sssg47 -(dp324 -g35 -S'NC_000002.12:g.208248389G>C' -p325 -sg37 -(dp326 -g39 -g40 -sg41 -g42 -sg43 -S'208248389' -p327 -sg45 -g46 -sssg52 -(dp328 -g35 -S'NC_000002.11:g.209113113G>C' -p329 -sg37 -(dp330 -g39 -g56 -sg41 -g42 -sg43 -S'209113113' -p331 -sg45 -g46 -sssg58 -(dp332 -g35 -S'NC_000002.12:g.208248389G>C' -p333 -sg37 -(dp334 -g39 -g56 -sg41 -g42 -sg43 -S'208248389' -p335 -sg45 -g46 -sssssS'NM_005896.2:c.394C>T' -p336 -(dp337 -g3 -g11 -sg5 -(lp338 -S'Multiple ALT sequences detected' -p339 -aS'auto-submitting all possible combinations' -p340 -aS'A more recent version of the selected reference sequence NM_005896.2 is available (NM_005896.3)' -p341 -aS'NM_005896.3:c.394C>T MUST be fully validated prior to use in reports' -p342 -aS'select_variants=NM_005896.3:c.394C>T' -p343 -asg10 -g11 -sg12 -(lp344 -sg14 -VHomo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA -p345 -sg16 -S'IDH1' -p346 -sg18 -(dp347 -g20 -S'NP_005887.2(LRG_610p1):p.(Arg132Cys)' -p348 -sg22 -S'NP_005887.2:p.(R132C)' -p349 -ssg24 -g25 -sg26 -g11 -sg27 -S'LRG_610:g.22686C>T' -p350 -sg28 -S'NM_005896.2:c.394C>T' -p351 -sg30 -S'NG_023319.2:g.22686C>T' -p352 -sg31 -(dp353 -g33 -(dp354 -g35 -S'NC_000002.11:g.209113113G>A' -p355 -sg37 -(dp356 -g39 -g40 -sg41 -g42 -sg43 -S'209113113' -p357 -sg45 -g176 -sssg52 -(dp358 -g35 -S'NC_000002.11:g.209113113G>A' -p359 -sg37 -(dp360 -g39 -g56 -sg41 -g42 -sg43 -S'209113113' -p361 -sg45 -g176 -sssssS'NM_001282386.1:c.394C>A' -p362 -(dp363 -g3 -S'LRG_610t3:c.394C>A' -p364 -sg5 -(lp365 -S'Multiple ALT sequences detected' -p366 -aS'auto-submitting all possible combinations' -p367 -aS'RefSeqGene record not available' -p368 -asg10 -g11 -sg12 -(lp369 -sg14 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA -p370 -sg16 -S'IDH1' -p371 -sg18 -(dp372 -g20 -S'NP_001269315.1:p.(Arg132Ser)' -p373 -sg22 -S'NP_001269315.1:p.(R132S)' -p374 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg28 -S'NM_001282386.1:c.394C>A' -p375 -sg30 -g11 -sg31 -(dp376 -g33 -(dp377 -g35 -S'NC_000002.11:g.209113113G>T' -p378 -sg37 -(dp379 -g39 -g40 -sg41 -g42 -sg43 -S'209113113' -p380 -sg45 -g113 -sssg47 -(dp381 -g35 -S'NC_000002.12:g.208248389G>T' -p382 -sg37 -(dp383 -g39 -g40 -sg41 -g42 -sg43 -S'208248389' -p384 -sg45 -g113 -sssg52 -(dp385 -g35 -S'NC_000002.11:g.209113113G>T' -p386 -sg37 -(dp387 -g39 -g56 -sg41 -g42 -sg43 -S'209113113' -p388 -sg45 -g113 -sssg58 -(dp389 -g35 -S'NC_000002.12:g.208248389G>T' -p390 -sg37 -(dp391 -g39 -g56 -sg41 -g42 -sg43 -S'208248389' -p392 -sg45 -g113 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant168.txt b/VariantValidator/testing/testOutputsMasterITS/variant168.txt deleted file mode 100644 index 4e3ace5a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant168.txt +++ /dev/null @@ -1,844 +0,0 @@ -(dp0 -S'NM_001204314.1:c.*6525_*6526=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_001204314.1 is available (NM_001204314.2)' -p7 -aS'NM_001204314.2:c.*6525_*6526delCTinsTG MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_001204314.2:c.*6525_*6526delinsTG' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA -p15 -sS'gene_symbol' -p16 -S'PRLR' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_001191243.1:p.?' -p21 -sS'slr' -p22 -S'NP_001191243.1:p.?' -p23 -ssS'submitted_variant' -p24 -S'NC_000005.9:g.35058665_35058666CA=' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'HGVS_LRG_variant' -p27 -g4 -sS'HGVS_transcript_variant' -p28 -S'NM_001204314.1:c.*6525_*6526=' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000005.9:g.35058665_35058666=' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr5' -p40 -sS'ref' -p41 -VCA -p42 -sS'pos' -p43 -S'35058665' -p44 -sS'alt' -p45 -g42 -sssS'GRCh37' -p46 -(dp47 -g35 -S'NC_000005.9:g.35058665_35058666=' -p48 -sg37 -(dp49 -g39 -S'5' -p50 -sg41 -g42 -sg43 -S'35058665' -p51 -sg45 -g42 -sssssS'NM_001204314.2:c.*6528del' -p52 -(dp53 -g3 -g4 -sg5 -(lp54 -S'The displayed variants may be artefacts of aligning NM_001204314.2 with genome build GRCh37' -p55 -aS'NM_001204314.2:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' -p56 -aS'Caution should be used when reporting the displayed variant descriptions' -p57 -aS'If you are unsure, please contact admin' -p58 -aS'RefSeqGene record not available' -p59 -asg11 -g4 -sg12 -(lp60 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA -p61 -sg16 -S'PRLR' -p62 -sg18 -(dp63 -g20 -S'NP_001191243.1:p.?' -p64 -sg22 -S'NP_001191243.1:p.?' -p65 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001204314.2:c.*6528del' -p66 -sg30 -g4 -sg31 -(dp67 -S'GRCh38' -p68 -(dp69 -g35 -S'NC_000005.10:g.35058563del' -p70 -sg37 -(dp71 -g39 -g50 -sg41 -S'CA' -p72 -sg43 -S'35058560' -p73 -sg45 -S'C' -p74 -sssg46 -(dp75 -g35 -S'NC_000005.9:g.35058662_35058668=' -p76 -sg37 -(dp77 -g39 -g50 -sg41 -S'AGACAAG' -p78 -sg43 -S'35058662' -p79 -sg45 -g78 -sssS'hg38' -p80 -(dp81 -g35 -S'NC_000005.10:g.35058563del' -p82 -sg37 -(dp83 -g39 -g40 -sg41 -S'CA' -p84 -sg43 -S'35058560' -p85 -sg45 -g74 -sssg33 -(dp86 -g35 -S'NC_000005.9:g.35058662_35058668=' -p87 -sg37 -(dp88 -g39 -g40 -sg41 -g78 -sg43 -S'35058662' -p89 -sg45 -g78 -sssssS'NM_001204317.1:c.856-9153_856-9152=' -p90 -(dp91 -g3 -g4 -sg5 -(lp92 -S'RefSeqGene record not available' -p93 -asg11 -g4 -sg12 -(lp94 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA -p95 -sg16 -S'PRLR' -p96 -sg18 -(dp97 -g20 -S'NP_001191246.1:p.?' -p98 -sg22 -S'NP_001191246.1:p.?' -p99 -ssg24 -g25 -sg26 -S'NC_000005.9(NM_001204317.1):c.856-9153_856-9152=' -p100 -sg27 -g4 -sg28 -S'NM_001204317.1:c.856-9153_856-9152=' -p101 -sg30 -g4 -sg31 -(dp102 -g68 -(dp103 -g35 -S'NC_000005.10:g.35058560_35058561=' -p104 -sg37 -(dp105 -g39 -g50 -sg41 -S'CA' -p106 -sg43 -S'35058560' -p107 -sg45 -g106 -sssg46 -(dp108 -g35 -S'NC_000005.9:g.35058665_35058666=' -p109 -sg37 -(dp110 -g39 -g50 -sg41 -S'CA' -p111 -sg43 -S'35058665' -p112 -sg45 -g111 -sssg80 -(dp113 -g35 -S'NC_000005.10:g.35058560_35058561=' -p114 -sg37 -(dp115 -g39 -g40 -sg41 -g106 -sg43 -S'35058560' -p116 -sg45 -g106 -sssg33 -(dp117 -g35 -S'NC_000005.9:g.35058665_35058666=' -p118 -sg37 -(dp119 -g39 -g40 -sg41 -g111 -sg43 -S'35058665' -p120 -sg45 -g111 -sssssS'NM_001204316.1:c.1009+7385_1009+7386=' -p121 -(dp122 -g3 -g4 -sg5 -(lp123 -S'RefSeqGene record not available' -p124 -asg11 -g4 -sg12 -(lp125 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA -p126 -sg16 -S'PRLR' -p127 -sg18 -(dp128 -g20 -S'NP_001191245.1:p.?' -p129 -sg22 -S'NP_001191245.1:p.?' -p130 -ssg24 -g25 -sg26 -S'NC_000005.9(NM_001204316.1):c.1009+7385_1009+7386=' -p131 -sg27 -g4 -sg28 -S'NM_001204316.1:c.1009+7385_1009+7386=' -p132 -sg30 -g4 -sg31 -(dp133 -g68 -(dp134 -g35 -S'NC_000005.10:g.35058563_35058564=' -p135 -sg37 -(dp136 -g39 -g50 -sg41 -S'AG' -p137 -sg43 -S'35058563' -p138 -sg45 -g137 -sssg46 -(dp139 -g35 -S'NC_000005.9:g.35058665_35058666=' -p140 -sg37 -(dp141 -g39 -g50 -sg41 -g111 -sg43 -S'35058665' -p142 -sg45 -g111 -sssg80 -(dp143 -g35 -S'NC_000005.10:g.35058563_35058564=' -p144 -sg37 -(dp145 -g39 -g40 -sg41 -g137 -sg43 -S'35058563' -p146 -sg45 -g137 -sssg33 -(dp147 -g35 -S'NC_000005.9:g.35058665_35058666=' -p148 -sg37 -(dp149 -g39 -g40 -sg41 -g111 -sg43 -S'35058665' -p150 -sg45 -g111 -sssssS'flag' -p151 -S'gene_variant' -p152 -sS'NR_037910.1:n.828-9153_828-9152=' -p153 -(dp154 -g3 -g4 -sg5 -(lp155 -S'RefSeqGene record not available' -p156 -asg11 -g4 -sg12 -(lp157 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 7, non-coding RNA -p158 -sg16 -S'PRLR' -p159 -sg18 -(dp160 -g20 -S'Non-coding :n.' -p161 -sg22 -g4 -ssg24 -g25 -sg26 -S'NC_000005.9(NR_037910.1):c.828-9153_828-9152=' -p162 -sg27 -g4 -sg28 -S'NR_037910.1:n.828-9153_828-9152=' -p163 -sg30 -g4 -sg31 -(dp164 -g68 -(dp165 -g35 -S'NC_000005.10:g.35058560_35058561=' -p166 -sg37 -(dp167 -g39 -g50 -sg41 -g106 -sg43 -S'35058560' -p168 -sg45 -g106 -sssg46 -(dp169 -g35 -S'NC_000005.9:g.35058665_35058666=' -p170 -sg37 -(dp171 -g39 -g50 -sg41 -g111 -sg43 -S'35058665' -p172 -sg45 -g111 -sssg80 -(dp173 -g35 -S'NC_000005.10:g.35058560_35058561=' -p174 -sg37 -(dp175 -g39 -g40 -sg41 -g106 -sg43 -S'35058560' -p176 -sg45 -g106 -sssg33 -(dp177 -g35 -S'NC_000005.9:g.35058665_35058666=' -p178 -sg37 -(dp179 -g39 -g40 -sg41 -g111 -sg43 -S'35058665' -p180 -sg45 -g111 -sssssS'NM_001204318.1:c.686-9153_686-9152=' -p181 -(dp182 -g3 -g4 -sg5 -(lp183 -S'RefSeqGene record not available' -p184 -asg11 -g4 -sg12 -(lp185 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA -p186 -sg16 -S'PRLR' -p187 -sg18 -(dp188 -g20 -S'NP_001191247.1:p.?' -p189 -sg22 -S'NP_001191247.1:p.?' -p190 -ssg24 -g25 -sg26 -S'NC_000005.9(NM_001204318.1):c.686-9153_686-9152=' -p191 -sg27 -g4 -sg28 -S'NM_001204318.1:c.686-9153_686-9152=' -p192 -sg30 -g4 -sg31 -(dp193 -g68 -(dp194 -g35 -S'NC_000005.10:g.35058560_35058561=' -p195 -sg37 -(dp196 -g39 -g50 -sg41 -g106 -sg43 -S'35058560' -p197 -sg45 -g106 -sssg46 -(dp198 -g35 -S'NC_000005.9:g.35058665_35058666=' -p199 -sg37 -(dp200 -g39 -g50 -sg41 -g111 -sg43 -S'35058665' -p201 -sg45 -g111 -sssg80 -(dp202 -g35 -S'NC_000005.10:g.35058560_35058561=' -p203 -sg37 -(dp204 -g39 -g40 -sg41 -g106 -sg43 -S'35058560' -p205 -sg45 -g106 -sssg33 -(dp206 -g35 -S'NC_000005.9:g.35058665_35058666=' -p207 -sg37 -(dp208 -g39 -g40 -sg41 -g111 -sg43 -S'35058665' -p209 -sg45 -g111 -sssssS'NM_000949.5:c.*6525_*6526=' -p210 -(dp211 -g3 -g4 -sg5 -(lp212 -S'A more recent version of the selected reference sequence NM_000949.5 is available (NM_000949.6)' -p213 -aS'NM_000949.6:c.*6525_*6526delCTinsTG MUST be fully validated prior to use in reports' -p214 -aS'select_variants=NM_000949.6:c.*6525_*6526delinsTG' -p215 -asg11 -g4 -sg12 -(lp216 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA -p217 -sg16 -S'PRLR' -p218 -sg18 -(dp219 -g20 -S'NP_000940.1:p.?' -p220 -sg22 -S'NP_000940.1:p.?' -p221 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_000949.5:c.*6525_*6526=' -p222 -sg30 -S'NG_029042.1:g.177158_177159=' -p223 -sg31 -(dp224 -g33 -(dp225 -g35 -S'NC_000005.9:g.35058665_35058666=' -p226 -sg37 -(dp227 -g39 -g40 -sg41 -VCA -p228 -sg43 -S'35058665' -p229 -sg45 -g228 -sssg46 -(dp230 -g35 -S'NC_000005.9:g.35058665_35058666=' -p231 -sg37 -(dp232 -g39 -g50 -sg41 -g228 -sg43 -S'35058665' -p233 -sg45 -g228 -sssssS'NM_000949.6:c.*6528del' -p234 -(dp235 -g3 -g4 -sg5 -(lp236 -S'The displayed variants may be artefacts of aligning NM_000949.6 with genome build GRCh37' -p237 -aS'NM_000949.6:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' -p238 -aS'Caution should be used when reporting the displayed variant descriptions' -p239 -aS'If you are unsure, please contact admin' -p240 -aS'RefSeqGene record not available' -p241 -asg11 -g4 -sg12 -(lp242 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA -p243 -sg16 -S'PRLR' -p244 -sg18 -(dp245 -g20 -S'NP_000940.1:p.?' -p246 -sg22 -S'NP_000940.1:p.?' -p247 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_000949.6:c.*6528del' -p248 -sg30 -g4 -sg31 -(dp249 -g68 -(dp250 -g35 -S'NC_000005.10:g.35058563del' -p251 -sg37 -(dp252 -g39 -g50 -sg41 -S'CA' -p253 -sg43 -S'35058560' -p254 -sg45 -g74 -sssg46 -(dp255 -g35 -S'NC_000005.9:g.35058662_35058668=' -p256 -sg37 -(dp257 -g39 -g50 -sg41 -g78 -sg43 -S'35058662' -p258 -sg45 -g78 -sssg80 -(dp259 -g35 -S'NC_000005.10:g.35058563del' -p260 -sg37 -(dp261 -g39 -g40 -sg41 -S'CA' -p262 -sg43 -S'35058560' -p263 -sg45 -g74 -sssg33 -(dp264 -g35 -S'NC_000005.9:g.35058662_35058668=' -p265 -sg37 -(dp266 -g39 -g40 -sg41 -g78 -sg43 -S'35058662' -p267 -sg45 -g78 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant169.txt b/VariantValidator/testing/testOutputsMasterITS/variant169.txt deleted file mode 100644 index 125edefb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant169.txt +++ /dev/null @@ -1,145 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_015120.4:c.1580_1581insCCT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_741t1:c.1580_1581insCCT' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p14 -sS'gene_symbol' -p15 -S'ALMS1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_055935.4(LRG_741p1):p.(Leu527dup)' -p20 -sS'slr' -p21 -S'NP_055935.4:p.(L527dup)' -p22 -ssS'submitted_variant' -p23 -S'NC_000002.11:g.73675227_73675229delTCTinsTCTCTC' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_741:g.67352_67353insCCT' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_015120.4:c.1580_1581insCCT' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_011690.1:g.67352_67353insCCT' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000002.11:g.73675231_73675232insCCT' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr2' -p41 -sS'ref' -p42 -S'T' -p43 -sS'pos' -p44 -S'73675229' -p45 -sS'alt' -p46 -VTCTC -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000002.12:g.73448104_73448105insCCT' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'73448102' -p52 -sg46 -VTCTC -p53 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000002.11:g.73675231_73675232insCCT' -p56 -sg38 -(dp57 -g40 -S'2' -p58 -sg42 -g43 -sg44 -S'73675229' -p59 -sg46 -VTCTC -p60 -sssS'GRCh38' -p61 -(dp62 -g36 -S'NC_000002.12:g.73448104_73448105insCCT' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'73448102' -p65 -sg46 -VTCTC -p66 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant17.txt b/VariantValidator/testing/testOutputsMasterITS/variant17.txt deleted file mode 100644 index d58e07c3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant17.txt +++ /dev/null @@ -1,60 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' -p7 -aS'Instead use NC_000011.9:g.5248381A=' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_000518.4:c.-130C>T' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'flag' -p26 -S'warning' -p27 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant170.txt b/VariantValidator/testing/testOutputsMasterITS/variant170.txt deleted file mode 100644 index e8c126e8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant170.txt +++ /dev/null @@ -1,6 +0,0 @@ -(dp0 -S'ERROR' -p1 -S'Validation error' -p2 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant171.txt b/VariantValidator/testing/testOutputsMasterITS/variant171.txt deleted file mode 100644 index e8c126e8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant171.txt +++ /dev/null @@ -1,6 +0,0 @@ -(dp0 -S'ERROR' -p1 -S'Validation error' -p2 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant172.txt b/VariantValidator/testing/testOutputsMasterITS/variant172.txt deleted file mode 100644 index 0832671f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant172.txt +++ /dev/null @@ -1,152 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000828.4:c.-2G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p9 -aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'GRIA3' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_000819.3:p.?' -p24 -sS'slr' -p25 -S'NP_000819.3:p.?' -p26 -ssS'submitted_variant' -p27 -S'NM_000828.4:c.-2G>T' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_000828.4:c.-2G>T' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000023.10:g.122318386_122318387insT' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -S'A' -p45 -sS'pos' -p46 -S'122318386' -p47 -sS'alt' -p48 -S'AT' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.123184534G>T' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -VG -p54 -sg46 -S'123184534' -p55 -sg48 -VT -p56 -sssS'GRCh37' -p57 -(dp58 -g38 -S'NC_000023.10:g.122318386_122318387insT' -p59 -sg40 -(dp60 -g42 -S'X' -p61 -sg44 -g45 -sg46 -S'122318386' -p62 -sg48 -S'AT' -p63 -sssS'GRCh38' -p64 -(dp65 -g38 -S'NC_000023.11:g.123184534G>T' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -g54 -sg46 -S'123184534' -p68 -sg48 -g56 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant173.txt b/VariantValidator/testing/testOutputsMasterITS/variant173.txt deleted file mode 100644 index 6f9c2b68..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant173.txt +++ /dev/null @@ -1,151 +0,0 @@ -(dp0 -S'NM_000828.4:c.-2G=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p7 -aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'GRIA3' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_000819.3:p.?' -p22 -sS'slr' -p23 -S'NP_000819.3:p.?' -p24 -ssS'submitted_variant' -p25 -S'NM_000828.4:c.-2G=' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_000828.4:c.-2G=' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000023.10:g.122318386_122318387insG' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chrX' -p41 -sS'ref' -p42 -S'A' -p43 -sS'pos' -p44 -S'122318386' -p45 -sS'alt' -p46 -VAG -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000023.11:g.123184534G=' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -VG -p52 -sg44 -S'123184534' -p53 -sg46 -g52 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000023.10:g.122318386_122318387insG' -p56 -sg38 -(dp57 -g40 -S'X' -p58 -sg42 -g43 -sg44 -S'122318386' -p59 -sg46 -VAG -p60 -sssS'GRCh38' -p61 -(dp62 -g36 -S'NC_000023.11:g.123184534G=' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g52 -sg44 -S'123184534' -p65 -sg46 -g52 -sssssS'flag' -p66 -S'gene_variant' -p67 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant174.txt b/VariantValidator/testing/testOutputsMasterITS/variant174.txt deleted file mode 100644 index 3701765e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant174.txt +++ /dev/null @@ -1,385 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000828.4:c.-2G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT' -p9 -aS'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p10 -aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'RefSeqGene record not available' -p14 -asS'RefSeqGene_context_intronic_sequence' -p15 -g6 -sS'alt_genomic_loci' -p16 -(lp17 -sS'transcript_description' -p18 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p19 -sS'gene_symbol' -p20 -S'GRIA3' -p21 -sS'HGVS_predicted_protein_consequence' -p22 -(dp23 -S'tlr' -p24 -S'NP_000819.3:p.?' -p25 -sS'slr' -p26 -S'NP_000819.3:p.?' -p27 -ssS'submitted_variant' -p28 -S'X-122318386-A-AT' -p29 -sS'genome_context_intronic_sequence' -p30 -g6 -sS'HGVS_LRG_variant' -p31 -g6 -sS'HGVS_transcript_variant' -p32 -S'NM_000828.4:c.-2G>T' -p33 -sS'HGVS_RefSeqGene_variant' -p34 -g6 -sS'primary_assembly_loci' -p35 -(dp36 -S'hg19' -p37 -(dp38 -S'HGVS_genomic_description' -p39 -S'NC_000023.10:g.122318386_122318387insT' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'chrX' -p44 -sS'ref' -p45 -S'A' -p46 -sS'pos' -p47 -S'122318386' -p48 -sS'alt' -p49 -S'AT' -p50 -sssS'hg38' -p51 -(dp52 -g39 -S'NC_000023.11:g.123184534G>T' -p53 -sg41 -(dp54 -g43 -g44 -sg45 -VG -p55 -sg47 -S'123184534' -p56 -sg49 -VT -p57 -sssS'GRCh37' -p58 -(dp59 -g39 -S'NC_000023.10:g.122318386_122318387insT' -p60 -sg41 -(dp61 -g43 -S'X' -p62 -sg45 -g46 -sg47 -S'122318386' -p63 -sg49 -S'AT' -p64 -sssS'GRCh38' -p65 -(dp66 -g39 -S'NC_000023.11:g.123184534G>T' -p67 -sg41 -(dp68 -g43 -g62 -sg45 -g55 -sg47 -S'123184534' -p69 -sg49 -g57 -sssssS'NM_001256743.1:c.-2G>T' -p70 -(dp71 -g5 -g6 -sg7 -(lp72 -S'NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT' -p73 -aS'The displayed variants may be artefacts of aligning NM_001256743.1 with genome build GRCh37' -p74 -aS'NM_001256743.1:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p75 -aS'Caution should be used when reporting the displayed variant descriptions' -p76 -aS'If you are unsure, please contact admin' -p77 -aS'RefSeqGene record not available' -p78 -asg15 -g6 -sg16 -(lp79 -sg18 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA -p80 -sg20 -S'GRIA3' -p81 -sg22 -(dp82 -g24 -S'NP_001243672.1:p.?' -p83 -sg26 -S'NP_001243672.1:p.?' -p84 -ssg28 -g29 -sg30 -g6 -sg31 -g6 -sg32 -S'NM_001256743.1:c.-2G>T' -p85 -sg34 -g6 -sg35 -(dp86 -g37 -(dp87 -g39 -S'NC_000023.10:g.122318386_122318387insT' -p88 -sg41 -(dp89 -g43 -g44 -sg45 -g46 -sg47 -S'122318386' -p90 -sg49 -S'AT' -p91 -sssg51 -(dp92 -g39 -S'NC_000023.11:g.123184534G>T' -p93 -sg41 -(dp94 -g43 -g44 -sg45 -g55 -sg47 -S'123184534' -p95 -sg49 -g57 -sssg58 -(dp96 -g39 -S'NC_000023.10:g.122318386_122318387insT' -p97 -sg41 -(dp98 -g43 -g62 -sg45 -g46 -sg47 -S'122318386' -p99 -sg49 -S'AT' -p100 -sssg65 -(dp101 -g39 -S'NC_000023.11:g.123184534G>T' -p102 -sg41 -(dp103 -g43 -g62 -sg45 -g55 -sg47 -S'123184534' -p104 -sg49 -g57 -sssssS'NM_007325.4:c.-2G>T' -p105 -(dp106 -g5 -g6 -sg7 -(lp107 -S'NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT' -p108 -aS'The displayed variants may be artefacts of aligning NM_007325.4 with genome build GRCh37' -p109 -aS'NM_007325.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p110 -aS'Caution should be used when reporting the displayed variant descriptions' -p111 -aS'If you are unsure, please contact admin' -p112 -asg15 -g6 -sg16 -(lp113 -sg18 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA -p114 -sg20 -S'GRIA3' -p115 -sg22 -(dp116 -g24 -S'NP_015564.4:p.?' -p117 -sg26 -S'NP_015564.4:p.?' -p118 -ssg28 -g29 -sg30 -g6 -sg31 -g6 -sg32 -S'NM_007325.4:c.-2G>T' -p119 -sg34 -S'NG_009377.1:g.5292G>T' -p120 -sg35 -(dp121 -g37 -(dp122 -g39 -S'NC_000023.10:g.122318386_122318387insT' -p123 -sg41 -(dp124 -g43 -g44 -sg45 -g46 -sg47 -S'122318386' -p125 -sg49 -S'AT' -p126 -sssg51 -(dp127 -g39 -S'NC_000023.11:g.123184534G>T' -p128 -sg41 -(dp129 -g43 -g44 -sg45 -g55 -sg47 -S'123184534' -p130 -sg49 -g57 -sssg58 -(dp131 -g39 -S'NC_000023.10:g.122318386_122318387insT' -p132 -sg41 -(dp133 -g43 -g62 -sg45 -g46 -sg47 -S'122318386' -p134 -sg49 -S'AT' -p135 -sssg65 -(dp136 -g39 -S'NC_000023.11:g.123184534G>T' -p137 -sg41 -(dp138 -g43 -g62 -sg45 -g55 -sg47 -S'123184534' -p139 -sg49 -g57 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant175.txt b/VariantValidator/testing/testOutputsMasterITS/variant175.txt deleted file mode 100644 index b317ee63..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant175.txt +++ /dev/null @@ -1,153 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000828.4:c.-2_-1insT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p9 -aS'NM_000828.4:c.-2_-1 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'GRIA3' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_000819.3:p.?' -p24 -sS'slr' -p25 -S'NP_000819.3:p.?' -p26 -ssS'submitted_variant' -p27 -S'NM_000828.4:c.-2_-1insT' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_000828.4:c.-2_-1insT' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000023.10:g.122318386_122318387insGT' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -S'A' -p45 -sS'pos' -p46 -S'122318386' -p47 -sS'alt' -p48 -S'AGT' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.123184534_123184535insT' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'G' -p54 -sg46 -S'123184534' -p55 -sg48 -VGT -p56 -sssS'GRCh37' -p57 -(dp58 -g38 -S'NC_000023.10:g.122318386_122318387insGT' -p59 -sg40 -(dp60 -g42 -S'X' -p61 -sg44 -g45 -sg46 -S'122318386' -p62 -sg48 -S'AGT' -p63 -sssS'GRCh38' -p64 -(dp65 -g38 -S'NC_000023.11:g.123184534_123184535insT' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -g54 -sg46 -S'123184534' -p68 -sg48 -VGT -p69 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant176.txt b/VariantValidator/testing/testOutputsMasterITS/variant176.txt deleted file mode 100644 index 2f77c512..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant176.txt +++ /dev/null @@ -1,152 +0,0 @@ -(dp0 -S'NM_000828.4:c.-3_-2insT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p7 -aS'NM_000828.4:c.-3_-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'GRIA3' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_000819.3:p.?' -p22 -sS'slr' -p23 -S'NP_000819.3:p.?' -p24 -ssS'submitted_variant' -p25 -S'NM_000828.4:c.-3_-2insT' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_000828.4:c.-3_-2insT' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000023.10:g.122318386_122318387insTG' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chrX' -p41 -sS'ref' -p42 -S'A' -p43 -sS'pos' -p44 -S'122318386' -p45 -sS'alt' -p46 -S'ATG' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000023.11:g.123184533_123184534insT' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'123184533' -p52 -sg46 -VAT -p53 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000023.10:g.122318386_122318387insTG' -p56 -sg38 -(dp57 -g40 -S'X' -p58 -sg42 -g43 -sg44 -S'122318386' -p59 -sg46 -S'ATG' -p60 -sssS'GRCh38' -p61 -(dp62 -g36 -S'NC_000023.11:g.123184533_123184534insT' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'123184533' -p65 -sg46 -VAT -p66 -sssssS'flag' -p67 -S'gene_variant' -p68 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant177.txt b/VariantValidator/testing/testOutputsMasterITS/variant177.txt deleted file mode 100644 index 6ebecb83..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant177.txt +++ /dev/null @@ -1,152 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000828.4:c.-2delinsTT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p9 -aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'GRIA3' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_000819.3:p.?' -p24 -sS'slr' -p25 -S'NP_000819.3:p.?' -p26 -ssS'submitted_variant' -p27 -S'NM_000828.4:c.-2delGinsTT' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_000828.4:c.-2delinsTT' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000023.10:g.122318386_122318387insTT' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -S'A' -p45 -sS'pos' -p46 -S'122318386' -p47 -sS'alt' -p48 -S'ATT' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.123184534delinsTT' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'G' -p54 -sg46 -S'123184534' -p55 -sg48 -VTT -p56 -sssS'GRCh37' -p57 -(dp58 -g38 -S'NC_000023.10:g.122318386_122318387insTT' -p59 -sg40 -(dp60 -g42 -S'X' -p61 -sg44 -g45 -sg46 -S'122318386' -p62 -sg48 -S'ATT' -p63 -sssS'GRCh38' -p64 -(dp65 -g38 -S'NC_000023.11:g.123184534delinsTT' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -g54 -sg46 -S'123184534' -p68 -sg48 -g56 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant178.txt b/VariantValidator/testing/testOutputsMasterITS/variant178.txt deleted file mode 100644 index 52bc03ef..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant178.txt +++ /dev/null @@ -1,152 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000828.4:c.-2_-1delinsTT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p9 -aS'NM_000828.4:c.-2_-1 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'GRIA3' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_000819.3:p.?' -p24 -sS'slr' -p25 -S'NP_000819.3:p.?' -p26 -ssS'submitted_variant' -p27 -S'NM_000828.4:c.-2_-1delGCinsTT' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_000828.4:c.-2_-1delinsTT' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000023.10:g.122318387delinsTT' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -S'C' -p45 -sS'pos' -p46 -S'122318387' -p47 -sS'alt' -p48 -S'TT' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.123184534_123184535delinsTT' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'GC' -p54 -sg46 -S'123184534' -p55 -sg48 -VTT -p56 -sssS'GRCh37' -p57 -(dp58 -g38 -S'NC_000023.10:g.122318387delinsTT' -p59 -sg40 -(dp60 -g42 -S'X' -p61 -sg44 -g45 -sg46 -S'122318387' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000023.11:g.123184534_123184535delinsTT' -p65 -sg40 -(dp66 -g42 -g61 -sg44 -S'GC' -p67 -sg46 -S'123184534' -p68 -sg48 -g56 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant179.txt b/VariantValidator/testing/testOutputsMasterITS/variant179.txt deleted file mode 100644 index f187380a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant179.txt +++ /dev/null @@ -1,144 +0,0 @@ -(dp0 -S'NM_000828.4:c.-3_-2delinsTT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'GRIA3' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000819.3:p.?' -p18 -sS'slr' -p19 -S'NP_000819.3:p.?' -p20 -ssS'submitted_variant' -p21 -S'NM_000828.4:c.-3_-2delAGinsTT' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_000828.4:c.-3_-2delinsTT' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000023.10:g.122318386delinsTT' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chrX' -p37 -sS'ref' -p38 -S'A' -p39 -sS'pos' -p40 -S'122318386' -p41 -sS'alt' -p42 -S'TT' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000023.11:g.123184533_123184534delinsTT' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'AG' -p48 -sg40 -S'123184533' -p49 -sg42 -VTT -p50 -sssS'GRCh37' -p51 -(dp52 -g32 -S'NC_000023.10:g.122318386delinsTT' -p53 -sg34 -(dp54 -g36 -S'X' -p55 -sg38 -g39 -sg40 -S'122318386' -p56 -sg42 -g43 -sssS'GRCh38' -p57 -(dp58 -g32 -S'NC_000023.11:g.123184533_123184534delinsTT' -p59 -sg34 -(dp60 -g36 -g55 -sg38 -S'AG' -p61 -sg40 -S'123184533' -p62 -sg42 -g50 -sssssS'flag' -p63 -S'gene_variant' -p64 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant18.txt b/VariantValidator/testing/testOutputsMasterITS/variant18.txt deleted file mode 100644 index ae623d8c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant18.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000011.9:g.5248381A=' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000518.4:c.-50-80C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant180.txt b/VariantValidator/testing/testOutputsMasterITS/variant180.txt deleted file mode 100644 index de15d78a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant180.txt +++ /dev/null @@ -1,445 +0,0 @@ -(dp0 -S'NM_014249.3:c.951dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p7 -aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_014249.3' -p8 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'NR2E3' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_055064.1:p.(Thr318HisfsTer23)' -p22 -sS'slr' -p23 -S'NP_055064.1:p.(T318Hfs*23)' -p24 -ssS'submitted_variant' -p25 -S'15-72105929-C-C' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_014249.3:c.951dup' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_009113.1:g.8039dup' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'GRCh38' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000015.10:g.71813592dup' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'15' -p42 -sS'ref' -p43 -S'C' -p44 -sS'pos' -p45 -S'71813589' -p46 -sS'alt' -p47 -VCC -p48 -sssS'GRCh37' -p49 -(dp50 -g37 -S'NC_000015.9:g.72105924_72105934=' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'GTGGACCCCCA' -p53 -sg45 -S'72105924' -p54 -sg47 -g53 -sssS'hg38' -p55 -(dp56 -g37 -S'NC_000015.10:g.71813592dup' -p57 -sg39 -(dp58 -g41 -S'chr15' -p59 -sg43 -g44 -sg45 -S'71813589' -p60 -sg47 -VCC -p61 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000015.9:g.72105924_72105934=' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -g53 -sg45 -S'72105924' -p66 -sg47 -g53 -sssssS'flag' -p67 -S'gene_variant' -p68 -sS'NM_016346.2:c.951dup' -p69 -(dp70 -g3 -g4 -sg5 -(lp71 -S'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p72 -aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_016346.2' -p73 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' -p74 -aS'Caution should be used when reporting the displayed variant descriptions' -p75 -aS'If you are unsure, please contact admin' -p76 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p77 -aS'NM_016346.3:c.951dupC MUST be fully validated prior to use in reports' -p78 -aS'select_variants=NM_016346.3:c.951dup' -p79 -aS'RefSeqGene record not available' -p80 -asg12 -g4 -sg13 -(lp81 -sg15 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p82 -sg17 -S'NR2E3' -p83 -sg19 -(dp84 -g21 -S'NP_057430.1:p.(Thr318HisfsTer23)' -p85 -sg23 -S'NP_057430.1:p.(T318Hfs*23)' -p86 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_016346.2:c.951dup' -p87 -sg31 -g4 -sg33 -(dp88 -g62 -(dp89 -g37 -S'NC_000015.9:g.72105924_72105934=' -p90 -sg39 -(dp91 -g41 -g59 -sg43 -S'GTGGACCCCCA' -p92 -sg45 -S'72105924' -p93 -sg47 -g92 -sssg49 -(dp94 -g37 -S'NC_000015.9:g.72105924_72105934=' -p95 -sg39 -(dp96 -g41 -g42 -sg43 -g92 -sg45 -S'72105924' -p97 -sg47 -g92 -sssssS'NM_016346.3:c.951dup' -p98 -(dp99 -g3 -g4 -sg5 -(lp100 -S'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p101 -aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_016346.3' -p102 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' -p103 -aS'Caution should be used when reporting the displayed variant descriptions' -p104 -aS'If you are unsure, please contact admin' -p105 -aS'RefSeqGene record not available' -p106 -asg12 -g4 -sg13 -(lp107 -sg15 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p108 -sg17 -S'NR2E3' -p109 -sg19 -(dp110 -g21 -S'NP_057430.1:p.(Thr318HisfsTer23)' -p111 -sg23 -S'NP_057430.1:p.(T318Hfs*23)' -p112 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_016346.3:c.951dup' -p113 -sg31 -g4 -sg33 -(dp114 -g35 -(dp115 -g37 -S'NC_000015.10:g.71813592dup' -p116 -sg39 -(dp117 -g41 -g42 -sg43 -g44 -sg45 -S'71813589' -p118 -sg47 -VCC -p119 -sssg49 -(dp120 -g37 -S'NC_000015.9:g.72105924_72105934=' -p121 -sg39 -(dp122 -g41 -g42 -sg43 -g92 -sg45 -S'72105924' -p123 -sg47 -g92 -sssg55 -(dp124 -g37 -S'NC_000015.10:g.71813592dup' -p125 -sg39 -(dp126 -g41 -g59 -sg43 -g44 -sg45 -S'71813589' -p127 -sg47 -VCC -p128 -sssg62 -(dp129 -g37 -S'NC_000015.9:g.72105924_72105934=' -p130 -sg39 -(dp131 -g41 -g59 -sg43 -g92 -sg45 -S'72105924' -p132 -sg47 -g92 -sssssS'NM_014249.2:c.951dup' -p133 -(dp134 -g3 -g4 -sg5 -(lp135 -S'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p136 -aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_014249.2' -p137 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' -p138 -aS'Caution should be used when reporting the displayed variant descriptions' -p139 -aS'If you are unsure, please contact admin' -p140 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p141 -aS'NM_014249.3:c.951dupC MUST be fully validated prior to use in reports' -p142 -aS'select_variants=NM_014249.3:c.951dup' -p143 -aS'RefSeqGene record not available' -p144 -asg12 -g4 -sg13 -(lp145 -sg15 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p146 -sg17 -S'NR2E3' -p147 -sg19 -(dp148 -g21 -S'NP_055064.1:p.(Thr318HisfsTer23)' -p149 -sg23 -S'NP_055064.1:p.(T318Hfs*23)' -p150 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_014249.2:c.951dup' -p151 -sg31 -g4 -sg33 -(dp152 -g62 -(dp153 -g37 -S'NC_000015.9:g.72105924_72105934=' -p154 -sg39 -(dp155 -g41 -g59 -sg43 -g92 -sg45 -S'72105924' -p156 -sg47 -g92 -sssg49 -(dp157 -g37 -S'NC_000015.9:g.72105924_72105934=' -p158 -sg39 -(dp159 -g41 -g42 -sg43 -g92 -sg45 -S'72105924' -p160 -sg47 -g92 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant181.txt b/VariantValidator/testing/testOutputsMasterITS/variant181.txt deleted file mode 100644 index 88982aac..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant181.txt +++ /dev/null @@ -1,448 +0,0 @@ -(dp0 -S'NM_014249.2:c.947_948insTT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' -p7 -aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p8 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p12 -aS'NM_014249.3:c.947_948insTT MUST be fully validated prior to use in reports' -p13 -aS'select_variants=NM_014249.3:c.947_948insTT' -p14 -aS'RefSeqGene record not available' -p15 -asS'RefSeqGene_context_intronic_sequence' -p16 -g4 -sS'alt_genomic_loci' -p17 -(lp18 -sS'transcript_description' -p19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p20 -sS'gene_symbol' -p21 -S'NR2E3' -p22 -sS'HGVS_predicted_protein_consequence' -p23 -(dp24 -S'tlr' -p25 -S'NP_055064.1:p.(Pro317SerfsTer8)' -p26 -sS'slr' -p27 -S'NP_055064.1:p.(P317Sfs*8)' -p28 -ssS'submitted_variant' -p29 -S'15-72105928-AC-ATT' -p30 -sS'genome_context_intronic_sequence' -p31 -g4 -sS'HGVS_LRG_variant' -p32 -g4 -sS'HGVS_transcript_variant' -p33 -S'NM_014249.2:c.947_948insTT' -p34 -sS'HGVS_RefSeqGene_variant' -p35 -g4 -sS'primary_assembly_loci' -p36 -(dp37 -S'hg19' -p38 -(dp39 -S'HGVS_genomic_description' -p40 -S'NC_000015.9:g.72105929delinsTT' -p41 -sS'vcf' -p42 -(dp43 -S'chr' -p44 -S'chr15' -p45 -sS'ref' -p46 -S'C' -p47 -sS'pos' -p48 -S'72105929' -p49 -sS'alt' -p50 -S'TT' -p51 -sssS'GRCh37' -p52 -(dp53 -g40 -S'NC_000015.9:g.72105929delinsTT' -p54 -sg42 -(dp55 -g44 -S'15' -p56 -sg46 -g47 -sg48 -S'72105929' -p57 -sg50 -g51 -sssssS'flag' -p58 -S'gene_variant' -p59 -sS'NM_016346.3:c.947_948insTT' -p60 -(dp61 -g3 -g4 -sg5 -(lp62 -S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' -p63 -aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p64 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' -p65 -aS'Caution should be used when reporting the displayed variant descriptions' -p66 -aS'If you are unsure, please contact admin' -p67 -aS'RefSeqGene record not available' -p68 -asg16 -g4 -sg17 -(lp69 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p70 -sg21 -S'NR2E3' -p71 -sg23 -(dp72 -g25 -S'NP_057430.1:p.(Pro317SerfsTer8)' -p73 -sg27 -S'NP_057430.1:p.(P317Sfs*8)' -p74 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.3:c.947_948insTT' -p75 -sg35 -g4 -sg36 -(dp76 -S'GRCh38' -p77 -(dp78 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p79 -sg42 -(dp80 -g44 -g56 -sg46 -S'A' -p81 -sg48 -S'71813588' -p82 -sg50 -VATT -p83 -sssg52 -(dp84 -g40 -S'NC_000015.9:g.72105929delinsTT' -p85 -sg42 -(dp86 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p87 -sg50 -S'TT' -p88 -sssS'hg38' -p89 -(dp90 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p91 -sg42 -(dp92 -g44 -g45 -sg46 -g81 -sg48 -S'71813588' -p93 -sg50 -VATT -p94 -sssg38 -(dp95 -g40 -S'NC_000015.9:g.72105929delinsTT' -p96 -sg42 -(dp97 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p98 -sg50 -g88 -sssssS'NM_014249.3:c.947_948insTT' -p99 -(dp100 -g3 -g4 -sg5 -(lp101 -S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' -p102 -aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p103 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' -p104 -aS'Caution should be used when reporting the displayed variant descriptions' -p105 -aS'If you are unsure, please contact admin' -p106 -asg16 -g4 -sg17 -(lp107 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p108 -sg21 -S'NR2E3' -p109 -sg23 -(dp110 -g25 -S'NP_055064.1:p.(Pro317SerfsTer8)' -p111 -sg27 -S'NP_055064.1:p.(P317Sfs*8)' -p112 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_014249.3:c.947_948insTT' -p113 -sg35 -S'NG_009113.1:g.8035_8036insTT' -p114 -sg36 -(dp115 -g77 -(dp116 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p117 -sg42 -(dp118 -g44 -g56 -sg46 -g81 -sg48 -S'71813588' -p119 -sg50 -VATT -p120 -sssg52 -(dp121 -g40 -S'NC_000015.9:g.72105929delinsTT' -p122 -sg42 -(dp123 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p124 -sg50 -S'TT' -p125 -sssg89 -(dp126 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p127 -sg42 -(dp128 -g44 -g45 -sg46 -g81 -sg48 -S'71813588' -p129 -sg50 -VATT -p130 -sssg38 -(dp131 -g40 -S'NC_000015.9:g.72105929delinsTT' -p132 -sg42 -(dp133 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p134 -sg50 -g125 -sssssS'NM_016346.2:c.947_948insTT' -p135 -(dp136 -g3 -g4 -sg5 -(lp137 -S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' -p138 -aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p139 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' -p140 -aS'Caution should be used when reporting the displayed variant descriptions' -p141 -aS'If you are unsure, please contact admin' -p142 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p143 -aS'NM_016346.3:c.947_948insTT MUST be fully validated prior to use in reports' -p144 -aS'select_variants=NM_016346.3:c.947_948insTT' -p145 -aS'RefSeqGene record not available' -p146 -asg16 -g4 -sg17 -(lp147 -sg19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p148 -sg21 -S'NR2E3' -p149 -sg23 -(dp150 -g25 -S'NP_057430.1:p.(Pro317SerfsTer8)' -p151 -sg27 -S'NP_057430.1:p.(P317Sfs*8)' -p152 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.2:c.947_948insTT' -p153 -sg35 -g4 -sg36 -(dp154 -g38 -(dp155 -g40 -S'NC_000015.9:g.72105929delinsTT' -p156 -sg42 -(dp157 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p158 -sg50 -S'TT' -p159 -sssg52 -(dp160 -g40 -S'NC_000015.9:g.72105929delinsTT' -p161 -sg42 -(dp162 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p163 -sg50 -g159 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant182.txt b/VariantValidator/testing/testOutputsMasterITS/variant182.txt deleted file mode 100644 index cc3508d0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant182.txt +++ /dev/null @@ -1,448 +0,0 @@ -(dp0 -S'NM_014249.2:c.947_948insTT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' -p7 -aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p8 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p12 -aS'NM_014249.3:c.947_948insTT MUST be fully validated prior to use in reports' -p13 -aS'select_variants=NM_014249.3:c.947_948insTT' -p14 -aS'RefSeqGene record not available' -p15 -asS'RefSeqGene_context_intronic_sequence' -p16 -g4 -sS'alt_genomic_loci' -p17 -(lp18 -sS'transcript_description' -p19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p20 -sS'gene_symbol' -p21 -S'NR2E3' -p22 -sS'HGVS_predicted_protein_consequence' -p23 -(dp24 -S'tlr' -p25 -S'NP_055064.1:p.(Pro317SerfsTer8)' -p26 -sS'slr' -p27 -S'NP_055064.1:p.(P317Sfs*8)' -p28 -ssS'submitted_variant' -p29 -S'15-72105928-ACC-ATT' -p30 -sS'genome_context_intronic_sequence' -p31 -g4 -sS'HGVS_LRG_variant' -p32 -g4 -sS'HGVS_transcript_variant' -p33 -S'NM_014249.2:c.947_948insTT' -p34 -sS'HGVS_RefSeqGene_variant' -p35 -g4 -sS'primary_assembly_loci' -p36 -(dp37 -S'hg19' -p38 -(dp39 -S'HGVS_genomic_description' -p40 -S'NC_000015.9:g.72105929delinsTT' -p41 -sS'vcf' -p42 -(dp43 -S'chr' -p44 -S'chr15' -p45 -sS'ref' -p46 -S'C' -p47 -sS'pos' -p48 -S'72105929' -p49 -sS'alt' -p50 -S'TT' -p51 -sssS'GRCh37' -p52 -(dp53 -g40 -S'NC_000015.9:g.72105929delinsTT' -p54 -sg42 -(dp55 -g44 -S'15' -p56 -sg46 -g47 -sg48 -S'72105929' -p57 -sg50 -g51 -sssssS'flag' -p58 -S'gene_variant' -p59 -sS'NM_016346.3:c.947_948insTT' -p60 -(dp61 -g3 -g4 -sg5 -(lp62 -S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' -p63 -aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p64 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' -p65 -aS'Caution should be used when reporting the displayed variant descriptions' -p66 -aS'If you are unsure, please contact admin' -p67 -aS'RefSeqGene record not available' -p68 -asg16 -g4 -sg17 -(lp69 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p70 -sg21 -S'NR2E3' -p71 -sg23 -(dp72 -g25 -S'NP_057430.1:p.(Pro317SerfsTer8)' -p73 -sg27 -S'NP_057430.1:p.(P317Sfs*8)' -p74 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.3:c.947_948insTT' -p75 -sg35 -g4 -sg36 -(dp76 -S'GRCh38' -p77 -(dp78 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p79 -sg42 -(dp80 -g44 -g56 -sg46 -S'A' -p81 -sg48 -S'71813588' -p82 -sg50 -VATT -p83 -sssg52 -(dp84 -g40 -S'NC_000015.9:g.72105929delinsTT' -p85 -sg42 -(dp86 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p87 -sg50 -S'TT' -p88 -sssS'hg38' -p89 -(dp90 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p91 -sg42 -(dp92 -g44 -g45 -sg46 -g81 -sg48 -S'71813588' -p93 -sg50 -VATT -p94 -sssg38 -(dp95 -g40 -S'NC_000015.9:g.72105929delinsTT' -p96 -sg42 -(dp97 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p98 -sg50 -g88 -sssssS'NM_014249.3:c.947_948insTT' -p99 -(dp100 -g3 -g4 -sg5 -(lp101 -S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' -p102 -aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p103 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' -p104 -aS'Caution should be used when reporting the displayed variant descriptions' -p105 -aS'If you are unsure, please contact admin' -p106 -asg16 -g4 -sg17 -(lp107 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p108 -sg21 -S'NR2E3' -p109 -sg23 -(dp110 -g25 -S'NP_055064.1:p.(Pro317SerfsTer8)' -p111 -sg27 -S'NP_055064.1:p.(P317Sfs*8)' -p112 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_014249.3:c.947_948insTT' -p113 -sg35 -S'NG_009113.1:g.8035_8036insTT' -p114 -sg36 -(dp115 -g77 -(dp116 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p117 -sg42 -(dp118 -g44 -g56 -sg46 -g81 -sg48 -S'71813588' -p119 -sg50 -VATT -p120 -sssg52 -(dp121 -g40 -S'NC_000015.9:g.72105929delinsTT' -p122 -sg42 -(dp123 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p124 -sg50 -S'TT' -p125 -sssg89 -(dp126 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p127 -sg42 -(dp128 -g44 -g45 -sg46 -g81 -sg48 -S'71813588' -p129 -sg50 -VATT -p130 -sssg38 -(dp131 -g40 -S'NC_000015.9:g.72105929delinsTT' -p132 -sg42 -(dp133 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p134 -sg50 -g125 -sssssS'NM_016346.2:c.947_948insTT' -p135 -(dp136 -g3 -g4 -sg5 -(lp137 -S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' -p138 -aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p139 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' -p140 -aS'Caution should be used when reporting the displayed variant descriptions' -p141 -aS'If you are unsure, please contact admin' -p142 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p143 -aS'NM_016346.3:c.947_948insTT MUST be fully validated prior to use in reports' -p144 -aS'select_variants=NM_016346.3:c.947_948insTT' -p145 -aS'RefSeqGene record not available' -p146 -asg16 -g4 -sg17 -(lp147 -sg19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p148 -sg21 -S'NR2E3' -p149 -sg23 -(dp150 -g25 -S'NP_057430.1:p.(Pro317SerfsTer8)' -p151 -sg27 -S'NP_057430.1:p.(P317Sfs*8)' -p152 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.2:c.947_948insTT' -p153 -sg35 -g4 -sg36 -(dp154 -g38 -(dp155 -g40 -S'NC_000015.9:g.72105929delinsTT' -p156 -sg42 -(dp157 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p158 -sg50 -S'TT' -p159 -sssg52 -(dp160 -g40 -S'NC_000015.9:g.72105929delinsTT' -p161 -sg42 -(dp162 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p163 -sg50 -g159 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant183.txt b/VariantValidator/testing/testOutputsMasterITS/variant183.txt deleted file mode 100644 index 5bec1e56..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant183.txt +++ /dev/null @@ -1,453 +0,0 @@ -(dp0 -S'NM_016346.3:c.947delinsTT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' -p7 -aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p8 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'RefSeqGene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p17 -sS'gene_symbol' -p18 -S'NR2E3' -p19 -sS'HGVS_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_057430.1:p.(Asp316ValfsTer25)' -p23 -sS'slr' -p24 -S'NP_057430.1:p.(D316Vfs*25)' -p25 -ssS'submitted_variant' -p26 -S'15-72105927-GACC-GTT' -p27 -sS'genome_context_intronic_sequence' -p28 -g4 -sS'HGVS_LRG_variant' -p29 -g4 -sS'HGVS_transcript_variant' -p30 -S'NM_016346.3:c.947delinsTT' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'GRCh38' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000015.10:g.71813588delinsTT' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'15' -p42 -sS'ref' -p43 -S'A' -p44 -sS'pos' -p45 -S'71813588' -p46 -sS'alt' -p47 -VTT -p48 -sssS'GRCh37' -p49 -(dp50 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'AC' -p53 -sg45 -S'72105928' -p54 -sg47 -S'TT' -p55 -sssS'hg38' -p56 -(dp57 -g37 -S'NC_000015.10:g.71813588delinsTT' -p58 -sg39 -(dp59 -g41 -S'chr15' -p60 -sg43 -g44 -sg45 -S'71813588' -p61 -sg47 -g48 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p64 -sg39 -(dp65 -g41 -g60 -sg43 -S'AC' -p66 -sg45 -S'72105928' -p67 -sg47 -g55 -sssssS'NM_014249.2:c.947delinsTT' -p68 -(dp69 -g3 -g4 -sg5 -(lp70 -S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' -p71 -aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p72 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' -p73 -aS'Caution should be used when reporting the displayed variant descriptions' -p74 -aS'If you are unsure, please contact admin' -p75 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p76 -aS'NM_014249.3:c.947delAinsTT MUST be fully validated prior to use in reports' -p77 -aS'select_variants=NM_014249.3:c.947delinsTT' -p78 -aS'RefSeqGene record not available' -p79 -asg13 -g4 -sg14 -(lp80 -sg16 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p81 -sg18 -S'NR2E3' -p82 -sg20 -(dp83 -g22 -S'NP_055064.1:p.(Asp316ValfsTer25)' -p84 -sg24 -S'NP_055064.1:p.(D316Vfs*25)' -p85 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_014249.2:c.947delinsTT' -p86 -sg32 -g4 -sg33 -(dp87 -g62 -(dp88 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p89 -sg39 -(dp90 -g41 -g60 -sg43 -S'AC' -p91 -sg45 -S'72105928' -p92 -sg47 -S'TT' -p93 -sssg49 -(dp94 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p95 -sg39 -(dp96 -g41 -g42 -sg43 -S'AC' -p97 -sg45 -S'72105928' -p98 -sg47 -g93 -sssssS'flag' -p99 -S'gene_variant' -p100 -sS'NM_014249.3:c.947delinsTT' -p101 -(dp102 -g3 -g4 -sg5 -(lp103 -S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' -p104 -aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p105 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' -p106 -aS'Caution should be used when reporting the displayed variant descriptions' -p107 -aS'If you are unsure, please contact admin' -p108 -asg13 -g4 -sg14 -(lp109 -sg16 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p110 -sg18 -S'NR2E3' -p111 -sg20 -(dp112 -g22 -S'NP_055064.1:p.(Asp316ValfsTer25)' -p113 -sg24 -S'NP_055064.1:p.(D316Vfs*25)' -p114 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_014249.3:c.947delinsTT' -p115 -sg32 -S'NG_009113.1:g.8035delinsTT' -p116 -sg33 -(dp117 -g35 -(dp118 -g37 -S'NC_000015.10:g.71813588delinsTT' -p119 -sg39 -(dp120 -g41 -g42 -sg43 -g44 -sg45 -S'71813588' -p121 -sg47 -VTT -p122 -sssg49 -(dp123 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p124 -sg39 -(dp125 -g41 -g42 -sg43 -S'AC' -p126 -sg45 -S'72105928' -p127 -sg47 -S'TT' -p128 -sssg56 -(dp129 -g37 -S'NC_000015.10:g.71813588delinsTT' -p130 -sg39 -(dp131 -g41 -g60 -sg43 -g44 -sg45 -S'71813588' -p132 -sg47 -g122 -sssg62 -(dp133 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p134 -sg39 -(dp135 -g41 -g60 -sg43 -S'AC' -p136 -sg45 -S'72105928' -p137 -sg47 -g128 -sssssS'NM_016346.2:c.947delinsTT' -p138 -(dp139 -g3 -g4 -sg5 -(lp140 -S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' -p141 -aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p142 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' -p143 -aS'Caution should be used when reporting the displayed variant descriptions' -p144 -aS'If you are unsure, please contact admin' -p145 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p146 -aS'NM_016346.3:c.947delAinsTT MUST be fully validated prior to use in reports' -p147 -aS'select_variants=NM_016346.3:c.947delinsTT' -p148 -aS'RefSeqGene record not available' -p149 -asg13 -g4 -sg14 -(lp150 -sg16 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p151 -sg18 -S'NR2E3' -p152 -sg20 -(dp153 -g22 -S'NP_057430.1:p.(Asp316ValfsTer25)' -p154 -sg24 -S'NP_057430.1:p.(D316Vfs*25)' -p155 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_016346.2:c.947delinsTT' -p156 -sg32 -g4 -sg33 -(dp157 -g62 -(dp158 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p159 -sg39 -(dp160 -g41 -g60 -sg43 -S'AC' -p161 -sg45 -S'72105928' -p162 -sg47 -S'TT' -p163 -sssg49 -(dp164 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p165 -sg39 -(dp166 -g41 -g42 -sg43 -S'AC' -p167 -sg45 -S'72105928' -p168 -sg47 -g163 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant184.txt b/VariantValidator/testing/testOutputsMasterITS/variant184.txt deleted file mode 100644 index 7e4e4277..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant184.txt +++ /dev/null @@ -1,384 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001042544.1:c.3233_3235=' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG' -p9 -aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' -p10 -aS'NM_001042544.1:c.3233_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA -p18 -sS'gene_symbol' -p19 -S'LTBP4' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_001036009.1:p.(Gln1078=)' -p24 -sS'slr' -p25 -S'NP_001036009.1:p.(Q1078=)' -p26 -ssS'submitted_variant' -p27 -S'19-41123093-A-AG' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_001042544.1:c.3233_3235=' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -S'NG_021201.1:g.29022_29024=' -p34 -sS'primary_assembly_loci' -p35 -(dp36 -S'GRCh38' -p37 -(dp38 -S'HGVS_genomic_description' -p39 -S'NC_000019.10:g.40617187_40617189=' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'19' -p44 -sS'ref' -p45 -VAGG -p46 -sS'pos' -p47 -S'40617187' -p48 -sS'alt' -p49 -g46 -sssS'GRCh37' -p50 -(dp51 -g39 -S'NC_000019.9:g.41123095dup' -p52 -sg41 -(dp53 -g43 -g44 -sg45 -S'G' -p54 -sg47 -S'41123094' -p55 -sg49 -VGG -p56 -sssS'hg38' -p57 -(dp58 -g39 -S'NC_000019.10:g.40617187_40617189=' -p59 -sg41 -(dp60 -g43 -S'chr19' -p61 -sg45 -g46 -sg47 -S'40617187' -p62 -sg49 -g46 -sssS'hg19' -p63 -(dp64 -g39 -S'NC_000019.9:g.41123095dup' -p65 -sg41 -(dp66 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p67 -sg49 -VGG -p68 -sssssS'NM_001042545.1:c.3032_3034=' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG' -p72 -aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' -p73 -aS'NM_001042545.1:c.3032_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p74 -aS'Caution should be used when reporting the displayed variant descriptions' -p75 -aS'If you are unsure, please contact admin' -p76 -asg14 -g6 -sg15 -(lp77 -sg17 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA -p78 -sg19 -S'LTBP4' -p79 -sg21 -(dp80 -g23 -S'NP_001036010.1:p.(Gln1011=)' -p81 -sg25 -S'NP_001036010.1:p.(Q1011=)' -p82 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_001042545.1:c.3032_3034=' -p83 -sg33 -S'NG_021201.1:g.29022_29024=' -p84 -sg35 -(dp85 -g37 -(dp86 -g39 -S'NC_000019.10:g.40617187_40617189=' -p87 -sg41 -(dp88 -g43 -g44 -sg45 -VAGG -p89 -sg47 -S'40617187' -p90 -sg49 -g89 -sssg50 -(dp91 -g39 -S'NC_000019.9:g.41123095dup' -p92 -sg41 -(dp93 -g43 -g44 -sg45 -g54 -sg47 -S'41123094' -p94 -sg49 -VGG -p95 -sssg57 -(dp96 -g39 -S'NC_000019.10:g.40617187_40617189=' -p97 -sg41 -(dp98 -g43 -g61 -sg45 -g89 -sg47 -S'40617187' -p99 -sg49 -g89 -sssg63 -(dp100 -g39 -S'NC_000019.9:g.41123095dup' -p101 -sg41 -(dp102 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p103 -sg49 -VGG -p104 -sssssS'NM_003573.2:c.3122_3124=' -p105 -(dp106 -g5 -g6 -sg7 -(lp107 -S'NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG' -p108 -aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' -p109 -aS'NM_003573.2:c.3122_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p110 -aS'Caution should be used when reporting the displayed variant descriptions' -p111 -aS'If you are unsure, please contact admin' -p112 -asg14 -g6 -sg15 -(lp113 -sg17 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA -p114 -sg19 -S'LTBP4' -p115 -sg21 -(dp116 -g23 -S'NP_003564.2:p.(Gln1041=)' -p117 -sg25 -S'NP_003564.2:p.(Q1041=)' -p118 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_003573.2:c.3122_3124=' -p119 -sg33 -S'NG_021201.1:g.29022_29024=' -p120 -sg35 -(dp121 -g37 -(dp122 -g39 -S'NC_000019.10:g.40617187_40617189=' -p123 -sg41 -(dp124 -g43 -g44 -sg45 -VAGG -p125 -sg47 -S'40617187' -p126 -sg49 -g125 -sssg50 -(dp127 -g39 -S'NC_000019.9:g.41123095dup' -p128 -sg41 -(dp129 -g43 -g44 -sg45 -g54 -sg47 -S'41123094' -p130 -sg49 -VGG -p131 -sssg57 -(dp132 -g39 -S'NC_000019.10:g.40617187_40617189=' -p133 -sg41 -(dp134 -g43 -g61 -sg45 -g125 -sg47 -S'40617187' -p135 -sg49 -g125 -sssg63 -(dp136 -g39 -S'NC_000019.9:g.41123095dup' -p137 -sg41 -(dp138 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p139 -sg49 -VGG -p140 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant185.txt b/VariantValidator/testing/testOutputsMasterITS/variant185.txt deleted file mode 100644 index a739f2b4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant185.txt +++ /dev/null @@ -1,383 +0,0 @@ -(dp0 -S'NM_003573.2:c.3123G>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT' -p7 -aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' -p8 -aS'NM_003573.2:c.3123_3125 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'LTBP4' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_003564.2:p.(Gln1041His)' -p22 -sS'slr' -p23 -S'NP_003564.2:p.(Q1041H)' -p24 -ssS'submitted_variant' -p25 -S'19-41123093-A-AT' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_003573.2:c.3123G>T' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_021201.1:g.29023G>T' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'GRCh38' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000019.10:g.40617188G>T' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'19' -p42 -sS'ref' -p43 -VG -p44 -sS'pos' -p45 -S'40617188' -p46 -sS'alt' -p47 -VT -p48 -sssS'GRCh37' -p49 -(dp50 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'A' -p53 -sg45 -S'41123093' -p54 -sg47 -S'AT' -p55 -sssS'hg38' -p56 -(dp57 -g37 -S'NC_000019.10:g.40617188G>T' -p58 -sg39 -(dp59 -g41 -S'chr19' -p60 -sg43 -g44 -sg45 -S'40617188' -p61 -sg47 -g48 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p64 -sg39 -(dp65 -g41 -g60 -sg43 -g53 -sg45 -S'41123093' -p66 -sg47 -S'AT' -p67 -sssssS'flag' -p68 -S'gene_variant' -p69 -sS'NM_001042545.1:c.3033G>T' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT' -p73 -aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' -p74 -aS'NM_001042545.1:c.3033_3035 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p75 -aS'Caution should be used when reporting the displayed variant descriptions' -p76 -aS'If you are unsure, please contact admin' -p77 -asg12 -g4 -sg13 -(lp78 -sg15 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA -p79 -sg17 -S'LTBP4' -p80 -sg19 -(dp81 -g21 -S'NP_001036010.1:p.(Gln1011His)' -p82 -sg23 -S'NP_001036010.1:p.(Q1011H)' -p83 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001042545.1:c.3033G>T' -p84 -sg31 -S'NG_021201.1:g.29023G>T' -p85 -sg33 -(dp86 -g35 -(dp87 -g37 -S'NC_000019.10:g.40617188G>T' -p88 -sg39 -(dp89 -g41 -g42 -sg43 -g44 -sg45 -S'40617188' -p90 -sg47 -g48 -sssg49 -(dp91 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p92 -sg39 -(dp93 -g41 -g42 -sg43 -g53 -sg45 -S'41123093' -p94 -sg47 -S'AT' -p95 -sssg56 -(dp96 -g37 -S'NC_000019.10:g.40617188G>T' -p97 -sg39 -(dp98 -g41 -g60 -sg43 -g44 -sg45 -S'40617188' -p99 -sg47 -g48 -sssg62 -(dp100 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p101 -sg39 -(dp102 -g41 -g60 -sg43 -g53 -sg45 -S'41123093' -p103 -sg47 -S'AT' -p104 -sssssS'NM_001042544.1:c.3234G>T' -p105 -(dp106 -g3 -g4 -sg5 -(lp107 -S'NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT' -p108 -aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' -p109 -aS'NM_001042544.1:c.3234_3236 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p110 -aS'Caution should be used when reporting the displayed variant descriptions' -p111 -aS'If you are unsure, please contact admin' -p112 -asg12 -g4 -sg13 -(lp113 -sg15 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA -p114 -sg17 -S'LTBP4' -p115 -sg19 -(dp116 -g21 -S'NP_001036009.1:p.(Gln1078His)' -p117 -sg23 -S'NP_001036009.1:p.(Q1078H)' -p118 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001042544.1:c.3234G>T' -p119 -sg31 -S'NG_021201.1:g.29023G>T' -p120 -sg33 -(dp121 -g35 -(dp122 -g37 -S'NC_000019.10:g.40617188G>T' -p123 -sg39 -(dp124 -g41 -g42 -sg43 -g44 -sg45 -S'40617188' -p125 -sg47 -g48 -sssg49 -(dp126 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p127 -sg39 -(dp128 -g41 -g42 -sg43 -g53 -sg45 -S'41123093' -p129 -sg47 -S'AT' -p130 -sssg56 -(dp131 -g37 -S'NC_000019.10:g.40617188G>T' -p132 -sg39 -(dp133 -g41 -g60 -sg43 -g44 -sg45 -S'40617188' -p134 -sg47 -g48 -sssg62 -(dp135 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p136 -sg39 -(dp137 -g41 -g60 -sg43 -g53 -sg45 -S'41123093' -p138 -sg47 -S'AT' -p139 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant186.txt b/VariantValidator/testing/testOutputsMasterITS/variant186.txt deleted file mode 100644 index b2c5691f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant186.txt +++ /dev/null @@ -1,387 +0,0 @@ -(dp0 -S'NM_001042544.1:c.3235_3236del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG' -p7 -aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' -p8 -aS'NM_001042544.1:c.3234_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA -p16 -sS'gene_symbol' -p17 -S'LTBP4' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_001036009.1:p.(Gly1079LeufsTer17)' -p22 -sS'slr' -p23 -S'NP_001036009.1:p.(G1079Lfs*17)' -p24 -ssS'submitted_variant' -p25 -S'19-41123093-AG-A' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_001042544.1:c.3235_3236del' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_021201.1:g.29024_29025del' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'GRCh38' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000019.10:g.40617189_40617190del' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'19' -p42 -sS'ref' -p43 -S'AGG' -p44 -sS'pos' -p45 -S'40617187' -p46 -sS'alt' -p47 -S'A' -p48 -sssS'GRCh37' -p49 -(dp50 -g37 -S'NC_000019.9:g.41123095del' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'AG' -p53 -sg45 -S'41123093' -p54 -sg47 -g48 -sssS'hg38' -p55 -(dp56 -g37 -S'NC_000019.10:g.40617189_40617190del' -p57 -sg39 -(dp58 -g41 -S'chr19' -p59 -sg43 -S'AGG' -p60 -sg45 -S'40617187' -p61 -sg47 -g48 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000019.9:g.41123095del' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -S'AG' -p66 -sg45 -S'41123093' -p67 -sg47 -g48 -sssssS'flag' -p68 -S'gene_variant' -p69 -sS'NM_001042545.1:c.3034_3035del' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG' -p73 -aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' -p74 -aS'NM_001042545.1:c.3033_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p75 -aS'Caution should be used when reporting the displayed variant descriptions' -p76 -aS'If you are unsure, please contact admin' -p77 -asg12 -g4 -sg13 -(lp78 -sg15 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA -p79 -sg17 -S'LTBP4' -p80 -sg19 -(dp81 -g21 -S'NP_001036010.1:p.(Gly1012LeufsTer17)' -p82 -sg23 -S'NP_001036010.1:p.(G1012Lfs*17)' -p83 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001042545.1:c.3034_3035del' -p84 -sg31 -S'NG_021201.1:g.29024_29025del' -p85 -sg33 -(dp86 -g35 -(dp87 -g37 -S'NC_000019.10:g.40617189_40617190del' -p88 -sg39 -(dp89 -g41 -g42 -sg43 -S'AGG' -p90 -sg45 -S'40617187' -p91 -sg47 -g48 -sssg49 -(dp92 -g37 -S'NC_000019.9:g.41123095del' -p93 -sg39 -(dp94 -g41 -g42 -sg43 -S'AG' -p95 -sg45 -S'41123093' -p96 -sg47 -g48 -sssg55 -(dp97 -g37 -S'NC_000019.10:g.40617189_40617190del' -p98 -sg39 -(dp99 -g41 -g59 -sg43 -S'AGG' -p100 -sg45 -S'40617187' -p101 -sg47 -g48 -sssg62 -(dp102 -g37 -S'NC_000019.9:g.41123095del' -p103 -sg39 -(dp104 -g41 -g59 -sg43 -S'AG' -p105 -sg45 -S'41123093' -p106 -sg47 -g48 -sssssS'NM_003573.2:c.3124_3125del' -p107 -(dp108 -g3 -g4 -sg5 -(lp109 -S'NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG' -p110 -aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' -p111 -aS'NM_003573.2:c.3123_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p112 -aS'Caution should be used when reporting the displayed variant descriptions' -p113 -aS'If you are unsure, please contact admin' -p114 -asg12 -g4 -sg13 -(lp115 -sg15 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA -p116 -sg17 -S'LTBP4' -p117 -sg19 -(dp118 -g21 -S'NP_003564.2:p.(Gly1042LeufsTer17)' -p119 -sg23 -S'NP_003564.2:p.(G1042Lfs*17)' -p120 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_003573.2:c.3124_3125del' -p121 -sg31 -S'NG_021201.1:g.29024_29025del' -p122 -sg33 -(dp123 -g35 -(dp124 -g37 -S'NC_000019.10:g.40617189_40617190del' -p125 -sg39 -(dp126 -g41 -g42 -sg43 -S'AGG' -p127 -sg45 -S'40617187' -p128 -sg47 -g48 -sssg49 -(dp129 -g37 -S'NC_000019.9:g.41123095del' -p130 -sg39 -(dp131 -g41 -g42 -sg43 -S'AG' -p132 -sg45 -S'41123093' -p133 -sg47 -g48 -sssg55 -(dp134 -g37 -S'NC_000019.10:g.40617189_40617190del' -p135 -sg39 -(dp136 -g41 -g59 -sg43 -S'AGG' -p137 -sg45 -S'40617187' -p138 -sg47 -g48 -sssg62 -(dp139 -g37 -S'NC_000019.9:g.41123095del' -p140 -sg39 -(dp141 -g41 -g59 -sg43 -S'AG' -p142 -sg45 -S'41123093' -p143 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant187.txt b/VariantValidator/testing/testOutputsMasterITS/variant187.txt deleted file mode 100644 index 1b02c04b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant187.txt +++ /dev/null @@ -1,382 +0,0 @@ -(dp0 -S'NM_001042545.1:c.3035del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=' -p7 -aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' -p8 -aS'NM_001042545.1:c.3033 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA -p16 -sS'gene_symbol' -p17 -S'LTBP4' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_001036010.1:p.(Gly1012ValfsTer14)' -p22 -sS'slr' -p23 -S'NP_001036010.1:p.(G1012Vfs*14)' -p24 -ssS'submitted_variant' -p25 -S'19-41123093-AG-AG' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_001042545.1:c.3035del' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_021201.1:g.29025del' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'GRCh38' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000019.10:g.40617190del' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'19' -p42 -sS'ref' -p43 -S'AG' -p44 -sS'pos' -p45 -S'40617187' -p46 -sS'alt' -p47 -S'A' -p48 -sssS'GRCh37' -p49 -(dp50 -g37 -S'NC_000019.9:g.41123094G=' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'G' -p53 -sg45 -S'41123094' -p54 -sg47 -g53 -sssS'hg38' -p55 -(dp56 -g37 -S'NC_000019.10:g.40617190del' -p57 -sg39 -(dp58 -g41 -S'chr19' -p59 -sg43 -S'AG' -p60 -sg45 -S'40617187' -p61 -sg47 -g48 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000019.9:g.41123094G=' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -g53 -sg45 -S'41123094' -p66 -sg47 -g53 -sssssS'flag' -p67 -S'gene_variant' -p68 -sS'NM_001042544.1:c.3236del' -p69 -(dp70 -g3 -g4 -sg5 -(lp71 -S'NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=' -p72 -aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' -p73 -aS'NM_001042544.1:c.3234 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p74 -aS'Caution should be used when reporting the displayed variant descriptions' -p75 -aS'If you are unsure, please contact admin' -p76 -asg12 -g4 -sg13 -(lp77 -sg15 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA -p78 -sg17 -S'LTBP4' -p79 -sg19 -(dp80 -g21 -S'NP_001036009.1:p.(Gly1079ValfsTer14)' -p81 -sg23 -S'NP_001036009.1:p.(G1079Vfs*14)' -p82 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001042544.1:c.3236del' -p83 -sg31 -S'NG_021201.1:g.29025del' -p84 -sg33 -(dp85 -g35 -(dp86 -g37 -S'NC_000019.10:g.40617190del' -p87 -sg39 -(dp88 -g41 -g42 -sg43 -S'AG' -p89 -sg45 -S'40617187' -p90 -sg47 -g48 -sssg49 -(dp91 -g37 -S'NC_000019.9:g.41123094G=' -p92 -sg39 -(dp93 -g41 -g42 -sg43 -g53 -sg45 -S'41123094' -p94 -sg47 -g53 -sssg55 -(dp95 -g37 -S'NC_000019.10:g.40617190del' -p96 -sg39 -(dp97 -g41 -g59 -sg43 -S'AG' -p98 -sg45 -S'40617187' -p99 -sg47 -g48 -sssg62 -(dp100 -g37 -S'NC_000019.9:g.41123094G=' -p101 -sg39 -(dp102 -g41 -g59 -sg43 -g53 -sg45 -S'41123094' -p103 -sg47 -g53 -sssssS'NM_003573.2:c.3125del' -p104 -(dp105 -g3 -g4 -sg5 -(lp106 -S'NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=' -p107 -aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' -p108 -aS'NM_003573.2:c.3123 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p109 -aS'Caution should be used when reporting the displayed variant descriptions' -p110 -aS'If you are unsure, please contact admin' -p111 -asg12 -g4 -sg13 -(lp112 -sg15 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA -p113 -sg17 -S'LTBP4' -p114 -sg19 -(dp115 -g21 -S'NP_003564.2:p.(Gly1042ValfsTer14)' -p116 -sg23 -S'NP_003564.2:p.(G1042Vfs*14)' -p117 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_003573.2:c.3125del' -p118 -sg31 -S'NG_021201.1:g.29025del' -p119 -sg33 -(dp120 -g35 -(dp121 -g37 -S'NC_000019.10:g.40617190del' -p122 -sg39 -(dp123 -g41 -g42 -sg43 -S'AG' -p124 -sg45 -S'40617187' -p125 -sg47 -g48 -sssg49 -(dp126 -g37 -S'NC_000019.9:g.41123094G=' -p127 -sg39 -(dp128 -g41 -g42 -sg43 -g53 -sg45 -S'41123094' -p129 -sg47 -g53 -sssg55 -(dp130 -g37 -S'NC_000019.10:g.40617190del' -p131 -sg39 -(dp132 -g41 -g59 -sg43 -S'AG' -p133 -sg45 -S'40617187' -p134 -sg47 -g48 -sssg62 -(dp135 -g37 -S'NC_000019.9:g.41123094G=' -p136 -sg39 -(dp137 -g41 -g59 -sg43 -g53 -sg45 -S'41123094' -p138 -sg47 -g53 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant188.txt b/VariantValidator/testing/testOutputsMasterITS/variant188.txt deleted file mode 100644 index 48003081..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant188.txt +++ /dev/null @@ -1,131 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_012309.4:c.913-5058G>A' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'NM_012309.4:c.913-5058G>A cannot be mapped directly to genome build GRCh37' -p10 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'GRCh37' -p16 -(dp17 -S'HGVS_genomic_description' -p18 -S'NW_004070871.1:g.574546C>T' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG865_PATCH' -p23 -sS'ref' -p24 -VC -p25 -sS'pos' -p26 -S'574546' -p27 -sS'alt' -p28 -VT -p29 -sssasS'transcript_description' -p30 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA -p31 -sS'gene_symbol' -p32 -S'SHANK2' -p33 -sS'HGVS_predicted_protein_consequence' -p34 -(dp35 -S'tlr' -p36 -S'NP_036441.2:p.?' -p37 -sS'slr' -p38 -S'NP_036441.2:p.?' -p39 -ssS'submitted_variant' -p40 -S'NM_012309.4:c.913-5058G>A' -p41 -sS'genome_context_intronic_sequence' -p42 -S'NC_000011.10(NM_012309.4):c.913-5058G>A' -p43 -sS'HGVS_LRG_variant' -p44 -g6 -sS'HGVS_transcript_variant' -p45 -S'NM_012309.4:c.913-5058G>A' -p46 -sS'HGVS_RefSeqGene_variant' -p47 -g6 -sS'primary_assembly_loci' -p48 -(dp49 -S'GRCh38' -p50 -(dp51 -g18 -S'NC_000011.10:g.71080333C>T' -p52 -sg20 -(dp53 -g22 -S'11' -p54 -sg24 -g25 -sg26 -S'71080333' -p55 -sg28 -g29 -sssS'hg38' -p56 -(dp57 -g18 -S'NC_000011.10:g.71080333C>T' -p58 -sg20 -(dp59 -g22 -S'chr11' -p60 -sg24 -g25 -sg26 -S'71080333' -p61 -sg28 -g29 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant189.txt b/VariantValidator/testing/testOutputsMasterITS/variant189.txt deleted file mode 100644 index abd64294..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant189.txt +++ /dev/null @@ -1,255 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_004006.2:c.2376G>C ' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_199t1:c.2376G>C' -p6 -sS'validation_warnings' -p7 -(lp8 -S'LRG_199t1:c.2376[G>C];[G>C] automapped to NM_004006.2:c.2376[G>C];[G>C]' -p9 -aS'Automap has extracted possible variant descriptions' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p16 -sS'gene_symbol' -p17 -S'DMD' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_003997.1(LRG_199p1):p.(Val792=)' -p22 -sS'slr' -p23 -S'NP_003997.1:p.(V792=)' -p24 -ssS'submitted_variant' -p25 -S'LRG_199t1:c.2376[G>C];[G>C]' -p26 -sS'genome_context_intronic_sequence' -p27 -g12 -sS'HGVS_LRG_variant' -p28 -S'LRG_199:g.842851G>C' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_004006.2:c.2376G>C' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_012232.1:g.842851G>C' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000023.10:g.32519876C>G' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -VC -p45 -sS'pos' -p46 -S'32519876' -p47 -sS'alt' -p48 -VG -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.32501759C>G' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -g45 -sg46 -S'32501759' -p54 -sg48 -g49 -sssS'GRCh37' -p55 -(dp56 -g38 -S'NC_000023.10:g.32519876C>G' -p57 -sg40 -(dp58 -g42 -S'X' -p59 -sg44 -g45 -sg46 -S'32519876' -p60 -sg48 -g49 -sssS'GRCh38' -p61 -(dp62 -g38 -S'NC_000023.11:g.32501759C>G' -p63 -sg40 -(dp64 -g42 -g59 -sg44 -g45 -sg46 -S'32501759' -p65 -sg48 -g49 -sssssS'NM_004006.2:c.2376G>C' -p66 -(dp67 -g5 -S'LRG_199t1:c.2376G>C' -p68 -sg7 -(lp69 -S'LRG_199t1:c.2376[G>C];[G>C] automapped to NM_004006.2:c.2376[G>C];[G>C]' -p70 -aS'Automap has extracted possible variant descriptions' -p71 -asg11 -g12 -sg13 -(lp72 -sg15 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p73 -sg17 -S'DMD' -p74 -sg19 -(dp75 -g21 -S'NP_003997.1(LRG_199p1):p.(Val792=)' -p76 -sg23 -S'NP_003997.1:p.(V792=)' -p77 -ssg25 -g26 -sg27 -g12 -sg28 -S'LRG_199:g.842851G>C' -p78 -sg30 -S'NM_004006.2:c.2376G>C' -p79 -sg32 -S'NG_012232.1:g.842851G>C' -p80 -sg34 -(dp81 -g36 -(dp82 -g38 -S'NC_000023.10:g.32519876C>G' -p83 -sg40 -(dp84 -g42 -g43 -sg44 -g45 -sg46 -S'32519876' -p85 -sg48 -g49 -sssg50 -(dp86 -g38 -S'NC_000023.11:g.32501759C>G' -p87 -sg40 -(dp88 -g42 -g43 -sg44 -g45 -sg46 -S'32501759' -p89 -sg48 -g49 -sssg55 -(dp90 -g38 -S'NC_000023.10:g.32519876C>G' -p91 -sg40 -(dp92 -g42 -g59 -sg44 -g45 -sg46 -S'32519876' -p93 -sg48 -g49 -sssg61 -(dp94 -g38 -S'NC_000023.11:g.32501759C>G' -p95 -sg40 -(dp96 -g42 -g59 -sg44 -g45 -sg46 -S'32501759' -p97 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant19.txt b/VariantValidator/testing/testOutputsMasterITS/variant19.txt deleted file mode 100644 index 21077d1a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant19.txt +++ /dev/null @@ -1,60 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' -p7 -aS'Instead use NC_000011.9:g.5246486_5246956delinsAAGTAG' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_000518.4:c.316_*342delinsCTACTT' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'flag' -p26 -S'warning' -p27 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant190.txt b/VariantValidator/testing/testOutputsMasterITS/variant190.txt deleted file mode 100644 index 00e36002..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant190.txt +++ /dev/null @@ -1,260 +0,0 @@ -(dp0 -S'NM_004006.2:c.3103del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_199t1:c.3103del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'LRG_199t1:c.[2376G>C];[3103del] automapped to NM_004006.2:c.[2376G>C];[3103del]' -p7 -aS'Automap has extracted possible variant descriptions' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p14 -sS'gene_symbol' -p15 -S'DMD' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)' -p20 -sS'slr' -p21 -S'NP_003997.1:p.(Q1035Sfs*9)' -p22 -ssS'submitted_variant' -p23 -S'LRG_199t1:c.[2376G>C];[3103del]' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_199:g.876053del' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_004006.2:c.3103del' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_012232.1:g.876053del' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000023.10:g.32486674del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chrX' -p41 -sS'ref' -p42 -S'TG' -p43 -sS'pos' -p44 -S'32486673' -p45 -sS'alt' -p46 -S'T' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000023.11:g.32468557del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'TG' -p52 -sg44 -S'32468556' -p53 -sg46 -g47 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000023.10:g.32486674del' -p56 -sg38 -(dp57 -g40 -S'X' -p58 -sg42 -S'TG' -p59 -sg44 -S'32486673' -p60 -sg46 -g47 -sssS'GRCh38' -p61 -(dp62 -g36 -S'NC_000023.11:g.32468557del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'TG' -p65 -sg44 -S'32468556' -p66 -sg46 -g47 -sssssS'flag' -p67 -S'gene_variant' -p68 -sS'NM_004006.2:c.2376G>C' -p69 -(dp70 -g3 -S'LRG_199t1:c.2376G>C' -p71 -sg5 -(lp72 -S'LRG_199t1:c.[2376G>C];[3103del] automapped to NM_004006.2:c.[2376G>C];[3103del]' -p73 -aS'Automap has extracted possible variant descriptions' -p74 -asg9 -g10 -sg11 -(lp75 -sg13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p76 -sg15 -S'DMD' -p77 -sg17 -(dp78 -g19 -S'NP_003997.1(LRG_199p1):p.(Val792=)' -p79 -sg21 -S'NP_003997.1:p.(V792=)' -p80 -ssg23 -g24 -sg25 -g10 -sg26 -S'LRG_199:g.842851G>C' -p81 -sg28 -S'NM_004006.2:c.2376G>C' -p82 -sg30 -S'NG_012232.1:g.842851G>C' -p83 -sg32 -(dp84 -g34 -(dp85 -g36 -S'NC_000023.10:g.32519876C>G' -p86 -sg38 -(dp87 -g40 -g41 -sg42 -VC -p88 -sg44 -S'32519876' -p89 -sg46 -VG -p90 -sssg48 -(dp91 -g36 -S'NC_000023.11:g.32501759C>G' -p92 -sg38 -(dp93 -g40 -g41 -sg42 -g88 -sg44 -S'32501759' -p94 -sg46 -g90 -sssg54 -(dp95 -g36 -S'NC_000023.10:g.32519876C>G' -p96 -sg38 -(dp97 -g40 -g58 -sg42 -g88 -sg44 -S'32519876' -p98 -sg46 -g90 -sssg61 -(dp99 -g36 -S'NC_000023.11:g.32501759C>G' -p100 -sg38 -(dp101 -g40 -g58 -sg42 -g88 -sg44 -S'32501759' -p102 -sg46 -g90 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant191.txt b/VariantValidator/testing/testOutputsMasterITS/variant191.txt deleted file mode 100644 index a3bb1db6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant191.txt +++ /dev/null @@ -1,149 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_004006.2:c.4358_4372delinsG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_199t1:c.4358_4372delinsG' -p6 -sS'validation_warnings' -p7 -(lp8 -S'LRG_199t1:c.[4358_4359del;4361_4372del] automapped to NM_004006.2:c.[4358_4359del;4361_4372del]' -p9 -aS'Automap has extracted possible variant descriptions' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p16 -sS'gene_symbol' -p17 -S'DMD' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_003997.1(LRG_199p1):p.(Asp1453GlyfsTer11)' -p22 -sS'slr' -p23 -S'NP_003997.1:p.(D1453Gfs*11)' -p24 -ssS'submitted_variant' -p25 -S'LRG_199t1:c.[4358_4359del;4361_4372del]' -p26 -sS'genome_context_intronic_sequence' -p27 -g12 -sS'HGVS_LRG_variant' -p28 -S'LRG_199:g.954949_954963delinsG' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_004006.2:c.4358_4372delinsG' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_012232.1:g.954949_954963delinsG' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000023.10:g.32407764_32407778delinsC' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -S'ACTTCATGGAGACAT' -p45 -sS'pos' -p46 -S'32407764' -p47 -sS'alt' -p48 -VC -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.32389647_32389661delinsC' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'ACTTCATGGAGACAT' -p54 -sg46 -S'32389647' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000023.10:g.32407764_32407778delinsC' -p58 -sg40 -(dp59 -g42 -S'X' -p60 -sg44 -S'ACTTCATGGAGACAT' -p61 -sg46 -S'32407764' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000023.11:g.32389647_32389661delinsC' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'ACTTCATGGAGACAT' -p67 -sg46 -S'32389647' -p68 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant192.txt b/VariantValidator/testing/testOutputsMasterITS/variant192.txt deleted file mode 100644 index 7ef4df0e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant192.txt +++ /dev/null @@ -1,260 +0,0 @@ -(dp0 -S'NM_004006.2:c.3103del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_199t1:c.3103del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'LRG_199t1:c.2376G>C(;)3103del automapped to NM_004006.2:c.2376G>C(;)3103del' -p7 -aS'Automap has extracted possible variant descriptions' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p14 -sS'gene_symbol' -p15 -S'DMD' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)' -p20 -sS'slr' -p21 -S'NP_003997.1:p.(Q1035Sfs*9)' -p22 -ssS'submitted_variant' -p23 -S'LRG_199t1:c.2376G>C(;)3103del' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_199:g.876053del' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_004006.2:c.3103del' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_012232.1:g.876053del' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000023.10:g.32486674del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chrX' -p41 -sS'ref' -p42 -S'TG' -p43 -sS'pos' -p44 -S'32486673' -p45 -sS'alt' -p46 -S'T' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000023.11:g.32468557del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'TG' -p52 -sg44 -S'32468556' -p53 -sg46 -g47 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000023.10:g.32486674del' -p56 -sg38 -(dp57 -g40 -S'X' -p58 -sg42 -S'TG' -p59 -sg44 -S'32486673' -p60 -sg46 -g47 -sssS'GRCh38' -p61 -(dp62 -g36 -S'NC_000023.11:g.32468557del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'TG' -p65 -sg44 -S'32468556' -p66 -sg46 -g47 -sssssS'flag' -p67 -S'gene_variant' -p68 -sS'NM_004006.2:c.2376G>C' -p69 -(dp70 -g3 -S'LRG_199t1:c.2376G>C' -p71 -sg5 -(lp72 -S'LRG_199t1:c.2376G>C(;)3103del automapped to NM_004006.2:c.2376G>C(;)3103del' -p73 -aS'Automap has extracted possible variant descriptions' -p74 -asg9 -g10 -sg11 -(lp75 -sg13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p76 -sg15 -S'DMD' -p77 -sg17 -(dp78 -g19 -S'NP_003997.1(LRG_199p1):p.(Val792=)' -p79 -sg21 -S'NP_003997.1:p.(V792=)' -p80 -ssg23 -g24 -sg25 -g10 -sg26 -S'LRG_199:g.842851G>C' -p81 -sg28 -S'NM_004006.2:c.2376G>C' -p82 -sg30 -S'NG_012232.1:g.842851G>C' -p83 -sg32 -(dp84 -g34 -(dp85 -g36 -S'NC_000023.10:g.32519876C>G' -p86 -sg38 -(dp87 -g40 -g41 -sg42 -VC -p88 -sg44 -S'32519876' -p89 -sg46 -VG -p90 -sssg48 -(dp91 -g36 -S'NC_000023.11:g.32501759C>G' -p92 -sg38 -(dp93 -g40 -g41 -sg42 -g88 -sg44 -S'32501759' -p94 -sg46 -g90 -sssg54 -(dp95 -g36 -S'NC_000023.10:g.32519876C>G' -p96 -sg38 -(dp97 -g40 -g58 -sg42 -g88 -sg44 -S'32519876' -p98 -sg46 -g90 -sssg61 -(dp99 -g36 -S'NC_000023.11:g.32501759C>G' -p100 -sg38 -(dp101 -g40 -g58 -sg42 -g88 -sg44 -S'32501759' -p102 -sg46 -g90 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant193.txt b/VariantValidator/testing/testOutputsMasterITS/variant193.txt deleted file mode 100644 index df92a753..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant193.txt +++ /dev/null @@ -1,146 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_004006.2:c.2376G>C' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_199t1:c.2376G>C' -p6 -sS'validation_warnings' -p7 -(lp8 -S'LRG_199t1:c.2376[G>C];[(G>C)] automapped to NM_004006.2:c.2376[G>C];[(G>C)]' -p9 -aS'Automap has extracted possible variant descriptions' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p16 -sS'gene_symbol' -p17 -S'DMD' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_003997.1(LRG_199p1):p.(Val792=)' -p22 -sS'slr' -p23 -S'NP_003997.1:p.(V792=)' -p24 -ssS'submitted_variant' -p25 -S'LRG_199t1:c.2376[G>C];[(G>C)]' -p26 -sS'genome_context_intronic_sequence' -p27 -g12 -sS'HGVS_LRG_variant' -p28 -S'LRG_199:g.842851G>C' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_004006.2:c.2376G>C' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_012232.1:g.842851G>C' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000023.10:g.32519876C>G' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -VC -p45 -sS'pos' -p46 -S'32519876' -p47 -sS'alt' -p48 -VG -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.32501759C>G' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -g45 -sg46 -S'32501759' -p54 -sg48 -g49 -sssS'GRCh37' -p55 -(dp56 -g38 -S'NC_000023.10:g.32519876C>G' -p57 -sg40 -(dp58 -g42 -S'X' -p59 -sg44 -g45 -sg46 -S'32519876' -p60 -sg48 -g49 -sssS'GRCh38' -p61 -(dp62 -g38 -S'NC_000023.11:g.32501759C>G' -p63 -sg40 -(dp64 -g42 -g59 -sg44 -g45 -sg46 -S'32501759' -p65 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant194.txt b/VariantValidator/testing/testOutputsMasterITS/variant194.txt deleted file mode 100644 index 1225e020..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant194.txt +++ /dev/null @@ -1,146 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_004006.2:c.2376G>C' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_199t1:c.2376G>C' -p6 -sS'validation_warnings' -p7 -(lp8 -S'LRG_199t1:c.[2376G>C];[?] automapped to NM_004006.2:c.[2376G>C];[?]' -p9 -aS'Automap has extracted possible variant descriptions' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p16 -sS'gene_symbol' -p17 -S'DMD' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_003997.1(LRG_199p1):p.(Val792=)' -p22 -sS'slr' -p23 -S'NP_003997.1:p.(V792=)' -p24 -ssS'submitted_variant' -p25 -S'LRG_199t1:c.[2376G>C];[?]' -p26 -sS'genome_context_intronic_sequence' -p27 -g12 -sS'HGVS_LRG_variant' -p28 -S'LRG_199:g.842851G>C' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_004006.2:c.2376G>C' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_012232.1:g.842851G>C' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000023.10:g.32519876C>G' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -VC -p45 -sS'pos' -p46 -S'32519876' -p47 -sS'alt' -p48 -VG -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.32501759C>G' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -g45 -sg46 -S'32501759' -p54 -sg48 -g49 -sssS'GRCh37' -p55 -(dp56 -g38 -S'NC_000023.10:g.32519876C>G' -p57 -sg40 -(dp58 -g42 -S'X' -p59 -sg44 -g45 -sg46 -S'32519876' -p60 -sg48 -g49 -sssS'GRCh38' -p61 -(dp62 -g38 -S'NC_000023.11:g.32501759C>G' -p63 -sg40 -(dp64 -g42 -g59 -sg44 -g45 -sg46 -S'32501759' -p65 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant195.txt b/VariantValidator/testing/testOutputsMasterITS/variant195.txt deleted file mode 100644 index b2fa8b9e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant195.txt +++ /dev/null @@ -1,377 +0,0 @@ -(dp0 -S'NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_199t1:c.296_358-3delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' -p4 -sS'validation_warnings' -p5 -(lp6 -S'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C automapped to NM_004006.2:c.[296T>G;476T=];[476T=](;)1083A>C' -p7 -aS'Automap has extracted possible variant descriptions' -p8 -aS'This coding sequence variant description spans at least one intron' -p9 -aS'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p16 -sS'gene_symbol' -p17 -S'DMD' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_003997.1(LRG_199p1):p.(Ile99Ser)' -p22 -sS'slr' -p23 -S'NP_003997.1:p.(I99S)' -p24 -ssS'submitted_variant' -p25 -S'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' -p26 -sS'genome_context_intronic_sequence' -p27 -g12 -sS'HGVS_LRG_variant' -p28 -S'LRG_199:g.521254_527967delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_012232.1:g.521254_527967delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000023.10:g.32834639_32841473delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -S'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA' -p45 -sS'pos' -p46 -S'32834760' -p47 -sS'alt' -p48 -VGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.32816522_32823356delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA' -p54 -sg46 -S'32816643' -p55 -sg48 -VGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC -p56 -sssS'GRCh37' -p57 -(dp58 -g38 -S'NC_000023.10:g.32834639_32841473delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC' -p59 -sg40 -(dp60 -g42 -S'X' -p61 -sg44 -S'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA' -p62 -sg46 -S'32834760' -p63 -sg48 -VGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC -p64 -sssS'GRCh38' -p65 -(dp66 -g38 -S'NC_000023.11:g.32816522_32823356delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC' -p67 -sg40 -(dp68 -g42 -g61 -sg44 -S'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA' -p69 -sg46 -S'32816643' -p70 -sg48 -VGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC -p71 -sssssS'flag' -p72 -S'gene_variant' -p73 -sS'NM_004006.2:c.476T=' -p74 -(dp75 -g3 -S'LRG_199t1:c.476T=' -p76 -sg5 -(lp77 -S'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C automapped to NM_004006.2:c.[296T>G;476T=];[476T=](;)1083A>C' -p78 -aS'Automap has extracted possible variant descriptions' -p79 -asg11 -g12 -sg13 -(lp80 -sg15 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p81 -sg17 -S'DMD' -p82 -sg19 -(dp83 -g21 -S'NP_003997.1(LRG_199p1):p.(Phe159=)' -p84 -sg23 -S'NP_003997.1:p.(F159=)' -p85 -ssg25 -g26 -sg27 -g12 -sg28 -S'LRG_199:g.528088T=' -p86 -sg30 -S'NM_004006.2:c.476T=' -p87 -sg32 -S'NG_012232.1:g.528088T=' -p88 -sg34 -(dp89 -g36 -(dp90 -g38 -S'NC_000023.10:g.32834639A=' -p91 -sg40 -(dp92 -g42 -g43 -sg44 -VA -p93 -sg46 -S'32834639' -p94 -sg48 -g93 -sssg50 -(dp95 -g38 -S'NC_000023.11:g.32816522A=' -p96 -sg40 -(dp97 -g42 -g43 -sg44 -g93 -sg46 -S'32816522' -p98 -sg48 -g93 -sssg57 -(dp99 -g38 -S'NC_000023.10:g.32834639A=' -p100 -sg40 -(dp101 -g42 -g61 -sg44 -g93 -sg46 -S'32834639' -p102 -sg48 -g93 -sssg65 -(dp103 -g38 -S'NC_000023.11:g.32816522A=' -p104 -sg40 -(dp105 -g42 -g61 -sg44 -g93 -sg46 -S'32816522' -p106 -sg48 -g93 -sssssS'NM_004006.2:c.1083A>C' -p107 -(dp108 -g3 -S'LRG_199t1:c.1083A>C' -p109 -sg5 -(lp110 -S'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C automapped to NM_004006.2:c.[296T>G;476T=];[476T=](;)1083A>C' -p111 -aS'Automap has extracted possible variant descriptions' -p112 -asg11 -g12 -sg13 -(lp113 -sg15 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p114 -sg17 -S'DMD' -p115 -sg19 -(dp116 -g21 -S'NP_003997.1(LRG_199p1):p.(Thr361=)' -p117 -sg23 -S'NP_003997.1:p.(T361=)' -p118 -ssg25 -g26 -sg27 -g12 -sg28 -S'LRG_199:g.699580A>C' -p119 -sg30 -S'NM_004006.2:c.1083A>C' -p120 -sg32 -S'NG_012232.1:g.699580A>C' -p121 -sg34 -(dp122 -g36 -(dp123 -g38 -S'NC_000023.10:g.32663147T>G' -p124 -sg40 -(dp125 -g42 -g43 -sg44 -VT -p126 -sg46 -S'32663147' -p127 -sg48 -VG -p128 -sssg50 -(dp129 -g38 -S'NC_000023.11:g.32645030T>G' -p130 -sg40 -(dp131 -g42 -g43 -sg44 -g126 -sg46 -S'32645030' -p132 -sg48 -g128 -sssg57 -(dp133 -g38 -S'NC_000023.10:g.32663147T>G' -p134 -sg40 -(dp135 -g42 -g61 -sg44 -g126 -sg46 -S'32663147' -p136 -sg48 -g128 -sssg65 -(dp137 -g38 -S'NC_000023.11:g.32645030T>G' -p138 -sg40 -(dp139 -g42 -g61 -sg44 -g126 -sg46 -S'32645030' -p140 -sg48 -g128 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant196.txt b/VariantValidator/testing/testOutputsMasterITS/variant196.txt deleted file mode 100644 index 6789b135..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant196.txt +++ /dev/null @@ -1,480 +0,0 @@ -(dp0 -S'NM_004006.2:c.1408del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_199t1:c.1408del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del automapped to NM_004006.2:c.[296T>G];[476T>C](;)1083A>C(;)1406del' -p7 -aS'Automap has extracted possible variant descriptions' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p14 -sS'gene_symbol' -p15 -S'DMD' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_003997.1(LRG_199p1):p.(Arg470GlufsTer17)' -p20 -sS'slr' -p21 -S'NP_003997.1:p.(R470Efs*17)' -p22 -ssS'submitted_variant' -p23 -S'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_199:g.730233del' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_004006.2:c.1408del' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_012232.1:g.730233del' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000023.10:g.32632494del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chrX' -p41 -sS'ref' -p42 -S'CT' -p43 -sS'pos' -p44 -S'32632493' -p45 -sS'alt' -p46 -S'C' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000023.11:g.32614377del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'CT' -p52 -sg44 -S'32614376' -p53 -sg46 -g47 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000023.10:g.32632494del' -p56 -sg38 -(dp57 -g40 -S'X' -p58 -sg42 -S'CT' -p59 -sg44 -S'32632493' -p60 -sg46 -g47 -sssS'GRCh38' -p61 -(dp62 -g36 -S'NC_000023.11:g.32614377del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'CT' -p65 -sg44 -S'32614376' -p66 -sg46 -g47 -sssssS'NM_004006.2:c.476T>C' -p67 -(dp68 -g3 -S'LRG_199t1:c.476T>C' -p69 -sg5 -(lp70 -S'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del automapped to NM_004006.2:c.[296T>G];[476T>C](;)1083A>C(;)1406del' -p71 -aS'Automap has extracted possible variant descriptions' -p72 -asg9 -g10 -sg11 -(lp73 -sg13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p74 -sg15 -S'DMD' -p75 -sg17 -(dp76 -g19 -S'NP_003997.1(LRG_199p1):p.(Phe159Ser)' -p77 -sg21 -S'NP_003997.1:p.(F159S)' -p78 -ssg23 -g24 -sg25 -g10 -sg26 -S'LRG_199:g.528088T>C' -p79 -sg28 -S'NM_004006.2:c.476T>C' -p80 -sg30 -S'NG_012232.1:g.528088T>C' -p81 -sg32 -(dp82 -g34 -(dp83 -g36 -S'NC_000023.10:g.32834639A>G' -p84 -sg38 -(dp85 -g40 -g41 -sg42 -VA -p86 -sg44 -S'32834639' -p87 -sg46 -VG -p88 -sssg48 -(dp89 -g36 -S'NC_000023.11:g.32816522A>G' -p90 -sg38 -(dp91 -g40 -g41 -sg42 -g86 -sg44 -S'32816522' -p92 -sg46 -g88 -sssg54 -(dp93 -g36 -S'NC_000023.10:g.32834639A>G' -p94 -sg38 -(dp95 -g40 -g58 -sg42 -g86 -sg44 -S'32834639' -p96 -sg46 -g88 -sssg61 -(dp97 -g36 -S'NC_000023.11:g.32816522A>G' -p98 -sg38 -(dp99 -g40 -g58 -sg42 -g86 -sg44 -S'32816522' -p100 -sg46 -g88 -sssssS'flag' -p101 -S'gene_variant' -p102 -sS'NM_004006.2:c.1083A>C' -p103 -(dp104 -g3 -S'LRG_199t1:c.1083A>C' -p105 -sg5 -(lp106 -S'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del automapped to NM_004006.2:c.[296T>G];[476T>C](;)1083A>C(;)1406del' -p107 -aS'Automap has extracted possible variant descriptions' -p108 -asg9 -g10 -sg11 -(lp109 -sg13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p110 -sg15 -S'DMD' -p111 -sg17 -(dp112 -g19 -S'NP_003997.1(LRG_199p1):p.(Thr361=)' -p113 -sg21 -S'NP_003997.1:p.(T361=)' -p114 -ssg23 -g24 -sg25 -g10 -sg26 -S'LRG_199:g.699580A>C' -p115 -sg28 -S'NM_004006.2:c.1083A>C' -p116 -sg30 -S'NG_012232.1:g.699580A>C' -p117 -sg32 -(dp118 -g34 -(dp119 -g36 -S'NC_000023.10:g.32663147T>G' -p120 -sg38 -(dp121 -g40 -g41 -sg42 -VT -p122 -sg44 -S'32663147' -p123 -sg46 -g88 -sssg48 -(dp124 -g36 -S'NC_000023.11:g.32645030T>G' -p125 -sg38 -(dp126 -g40 -g41 -sg42 -g122 -sg44 -S'32645030' -p127 -sg46 -g88 -sssg54 -(dp128 -g36 -S'NC_000023.10:g.32663147T>G' -p129 -sg38 -(dp130 -g40 -g58 -sg42 -g122 -sg44 -S'32663147' -p131 -sg46 -g88 -sssg61 -(dp132 -g36 -S'NC_000023.11:g.32645030T>G' -p133 -sg38 -(dp134 -g40 -g58 -sg42 -g122 -sg44 -S'32645030' -p135 -sg46 -g88 -sssssS'NM_004006.2:c.296T>G' -p136 -(dp137 -g3 -S'LRG_199t1:c.296T>G' -p138 -sg5 -(lp139 -S'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del automapped to NM_004006.2:c.[296T>G];[476T>C](;)1083A>C(;)1406del' -p140 -aS'Automap has extracted possible variant descriptions' -p141 -asg9 -g10 -sg11 -(lp142 -sg13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p143 -sg15 -S'DMD' -p144 -sg17 -(dp145 -g19 -S'NP_003997.1(LRG_199p1):p.(Ile99Ser)' -p146 -sg21 -S'NP_003997.1:p.(I99S)' -p147 -ssg23 -g24 -sg25 -g10 -sg26 -S'LRG_199:g.521254T>G' -p148 -sg28 -S'NM_004006.2:c.296T>G' -p149 -sg30 -S'NG_012232.1:g.521254T>G' -p150 -sg32 -(dp151 -g34 -(dp152 -g36 -S'NC_000023.10:g.32841473A>C' -p153 -sg38 -(dp154 -g40 -g41 -sg42 -g86 -sg44 -S'32841473' -p155 -sg46 -VC -p156 -sssg48 -(dp157 -g36 -S'NC_000023.11:g.32823356A>C' -p158 -sg38 -(dp159 -g40 -g41 -sg42 -g86 -sg44 -S'32823356' -p160 -sg46 -g156 -sssg54 -(dp161 -g36 -S'NC_000023.10:g.32841473A>C' -p162 -sg38 -(dp163 -g40 -g58 -sg42 -g86 -sg44 -S'32841473' -p164 -sg46 -g156 -sssg61 -(dp165 -g36 -S'NC_000023.11:g.32823356A>C' -p166 -sg38 -(dp167 -g40 -g58 -sg42 -g86 -sg44 -S'32823356' -p168 -sg46 -g156 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant197.txt b/VariantValidator/testing/testOutputsMasterITS/variant197.txt deleted file mode 100644 index 99dfa92f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant197.txt +++ /dev/null @@ -1,60 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'LRG_199t1:c.[976-20T>A;976-17_976-1dup] automapped to NM_004006.2:c.[976-20T>A;976-17_976-1dup]' -p7 -aS'Intronic positions not supported for HGVS Allele descriptions' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'LRG_199t1:c.[976-20T>A;976-17_976-1dup]' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'flag' -p26 -S'warning' -p27 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant198.txt b/VariantValidator/testing/testOutputsMasterITS/variant198.txt deleted file mode 100644 index b46532d3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant198.txt +++ /dev/null @@ -1,536 +0,0 @@ -(dp0 -S'NM_015102.3:c.2818-2T>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_015102.3 is available (NM_015102.4)' -p7 -aS'NM_015102.4:c.2818-2T>A MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_015102.4:c.2818-2T>A' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_011724.2(NM_015102.3):c.2818-2A=' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens nephronophthisis 4 (NPHP4), mRNA -p15 -sS'gene_symbol' -p16 -S'NPHP4' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_055917.1:p.?' -p21 -sS'slr' -p22 -S'NP_055917.1:p.?' -p23 -ssS'submitted_variant' -p24 -S'1-5935162-A-T' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000001.10(NM_015102.3):c.2818-2T>A' -p27 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_015102.3:c.2818-2T>A' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_011724.2:g.122370A=' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000001.10:g.5935162A>T' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr1' -p42 -sS'ref' -p43 -VA -p44 -sS'pos' -p45 -S'5935162' -p46 -sS'alt' -p47 -VT -p48 -sssS'GRCh37' -p49 -(dp50 -g37 -S'NC_000001.10:g.5935162A>T' -p51 -sg39 -(dp52 -g41 -S'1' -p53 -sg43 -g44 -sg45 -S'5935162' -p54 -sg47 -g48 -sssssS'NM_001291593.1:c.1279-2T>A' -p55 -(dp56 -g3 -g4 -sg5 -(lp57 -S'RefSeqGene record not available' -p58 -asg10 -g4 -sg12 -(lp59 -sg14 -VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 2, mRNA -p60 -sg16 -S'NPHP4' -p61 -sg18 -(dp62 -g20 -S'NP_001278522.1:p.?' -p63 -sg22 -S'NP_001278522.1:p.?' -p64 -ssg24 -g25 -sg26 -S'NC_000001.10(NM_001291593.1):c.1279-2T>A' -p65 -sg28 -g4 -sg29 -S'NM_001291593.1:c.1279-2T>A' -p66 -sg31 -g4 -sg33 -(dp67 -g35 -(dp68 -g37 -S'NC_000001.10:g.5935162A>T' -p69 -sg39 -(dp70 -g41 -g42 -sg43 -g44 -sg45 -S'5935162' -p71 -sg47 -g48 -sssS'hg38' -p72 -(dp73 -g37 -S'NC_000001.11:g.5875102T=' -p74 -sg39 -(dp75 -g41 -g42 -sg43 -S'T' -p76 -sg45 -S'5875102' -p77 -sg47 -g76 -sssg49 -(dp78 -g37 -S'NC_000001.10:g.5935162A>T' -p79 -sg39 -(dp80 -g41 -g53 -sg43 -g44 -sg45 -S'5935162' -p81 -sg47 -g48 -sssS'GRCh38' -p82 -(dp83 -g37 -S'NC_000001.11:g.5875102T=' -p84 -sg39 -(dp85 -g41 -g53 -sg43 -g76 -sg45 -S'5875102' -p86 -sg47 -g76 -sssssS'NM_015102.4:c.2818-2T>A' -p87 -(dp88 -g3 -g4 -sg5 -(lp89 -S'RefSeqGene record not available' -p90 -asg10 -g4 -sg12 -(lp91 -sg14 -VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 1, mRNA -p92 -sg16 -S'NPHP4' -p93 -sg18 -(dp94 -g20 -S'NP_055917.1:p.?' -p95 -sg22 -S'NP_055917.1:p.?' -p96 -ssg24 -g25 -sg26 -S'NC_000001.10(NM_015102.4):c.2818-2T>A' -p97 -sg28 -g4 -sg29 -S'NM_015102.4:c.2818-2T>A' -p98 -sg31 -g4 -sg33 -(dp99 -g35 -(dp100 -g37 -S'NC_000001.10:g.5935162A>T' -p101 -sg39 -(dp102 -g41 -g42 -sg43 -g44 -sg45 -S'5935162' -p103 -sg47 -g48 -sssg72 -(dp104 -g37 -S'NC_000001.11:g.5875102T=' -p105 -sg39 -(dp106 -g41 -g42 -sg43 -g76 -sg45 -S'5875102' -p107 -sg47 -g76 -sssg49 -(dp108 -g37 -S'NC_000001.10:g.5935162A>T' -p109 -sg39 -(dp110 -g41 -g53 -sg43 -g44 -sg45 -S'5935162' -p111 -sg47 -g48 -sssg82 -(dp112 -g37 -S'NC_000001.11:g.5875102T=' -p113 -sg39 -(dp114 -g41 -g53 -sg43 -g76 -sg45 -S'5875102' -p115 -sg47 -g76 -sssssS'NM_001291594.1:c.1282-2T>A' -p116 -(dp117 -g3 -g4 -sg5 -(lp118 -S'RefSeqGene record not available' -p119 -asg10 -g4 -sg12 -(lp120 -sg14 -VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 3, mRNA -p121 -sg16 -S'NPHP4' -p122 -sg18 -(dp123 -g20 -S'NP_001278523.1:p.?' -p124 -sg22 -S'NP_001278523.1:p.?' -p125 -ssg24 -g25 -sg26 -S'NC_000001.10(NM_001291594.1):c.1282-2T>A' -p126 -sg28 -g4 -sg29 -S'NM_001291594.1:c.1282-2T>A' -p127 -sg31 -g4 -sg33 -(dp128 -g35 -(dp129 -g37 -S'NC_000001.10:g.5935162A>T' -p130 -sg39 -(dp131 -g41 -g42 -sg43 -g44 -sg45 -S'5935162' -p132 -sg47 -g48 -sssg72 -(dp133 -g37 -S'NC_000001.11:g.5875102T=' -p134 -sg39 -(dp135 -g41 -g42 -sg43 -g76 -sg45 -S'5875102' -p136 -sg47 -g76 -sssg49 -(dp137 -g37 -S'NC_000001.10:g.5935162A>T' -p138 -sg39 -(dp139 -g41 -g53 -sg43 -g44 -sg45 -S'5935162' -p140 -sg47 -g48 -sssg82 -(dp141 -g37 -S'NC_000001.11:g.5875102T=' -p142 -sg39 -(dp143 -g41 -g53 -sg43 -g76 -sg45 -S'5875102' -p144 -sg47 -g76 -sssssS'flag' -p145 -S'gene_variant' -p146 -sS'NR_111987.1:n.3633-2T>A' -p147 -(dp148 -g3 -g4 -sg5 -(lp149 -S'RefSeqGene record not available' -p150 -asg10 -g4 -sg12 -(lp151 -sg14 -VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 4, non-coding RNA -p152 -sg16 -S'NPHP4' -p153 -sg18 -(dp154 -g20 -S'Non-coding :n.' -p155 -sg22 -g4 -ssg24 -g25 -sg26 -S'NC_000001.10(NR_111987.1):c.3633-2T>A' -p156 -sg28 -g4 -sg29 -S'NR_111987.1:n.3633-2T>A' -p157 -sg31 -g4 -sg33 -(dp158 -g35 -(dp159 -g37 -S'NC_000001.10:g.5935162A>T' -p160 -sg39 -(dp161 -g41 -g42 -sg43 -g44 -sg45 -S'5935162' -p162 -sg47 -g48 -sssg72 -(dp163 -g37 -S'NC_000001.11:g.5875102T=' -p164 -sg39 -(dp165 -g41 -g42 -sg43 -g76 -sg45 -S'5875102' -p166 -sg47 -g76 -sssg49 -(dp167 -g37 -S'NC_000001.10:g.5935162A>T' -p168 -sg39 -(dp169 -g41 -g53 -sg43 -g44 -sg45 -S'5935162' -p170 -sg47 -g48 -sssg82 -(dp171 -g37 -S'NC_000001.11:g.5875102T=' -p172 -sg39 -(dp173 -g41 -g53 -sg43 -g76 -sg45 -S'5875102' -p174 -sg47 -g76 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant199.txt b/VariantValidator/testing/testOutputsMasterITS/variant199.txt deleted file mode 100644 index 6c5c93d4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant199.txt +++ /dev/null @@ -1,246 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001127660.1:c.1676C>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens mitofusin 2 (MFN2), transcript variant 2, mRNA -p14 -sS'gene_symbol' -p15 -S'MFN2' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001121132.1:p.(Pro559Leu)' -p20 -sS'slr' -p21 -S'NP_001121132.1:p.(P559L)' -p22 -ssS'submitted_variant' -p23 -S'1-12065948-C-T' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_001127660.1:c.1676C>T' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000001.10:g.12065948C>T' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -S'C' -p41 -sS'pos' -p42 -S'12065948' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000001.11:g.12005891C>T' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'12005891' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000001.10:g.12065948C>T' -p53 -sg36 -(dp54 -g38 -S'1' -p55 -sg40 -g41 -sg42 -S'12065948' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000001.11:g.12005891C>T' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'12005891' -p61 -sg44 -g45 -sssssS'NM_014874.3:c.1676C>T' -p62 -(dp63 -g5 -S'LRG_255t1:c.1676C>T' -p64 -sg7 -(lp65 -sg10 -g6 -sg11 -(lp66 -sg13 -VHomo sapiens mitofusin 2 (MFN2), transcript variant 1, mRNA -p67 -sg15 -S'MFN2' -p68 -sg17 -(dp69 -g19 -S'NP_055689.1(LRG_255p1):p.(Pro559Leu)' -p70 -sg21 -S'NP_055689.1:p.(P559L)' -p71 -ssg23 -g24 -sg25 -g6 -sg26 -S'LRG_255:g.30711C>T' -p72 -sg27 -S'NM_014874.3:c.1676C>T' -p73 -sg29 -S'NG_007945.1:g.30711C>T' -p74 -sg30 -(dp75 -g32 -(dp76 -g34 -S'NC_000001.10:g.12065948C>T' -p77 -sg36 -(dp78 -g38 -g39 -sg40 -g41 -sg42 -S'12065948' -p79 -sg44 -g45 -sssg46 -(dp80 -g34 -S'NC_000001.11:g.12005891C>T' -p81 -sg36 -(dp82 -g38 -g39 -sg40 -g41 -sg42 -S'12005891' -p83 -sg44 -g45 -sssg51 -(dp84 -g34 -S'NC_000001.10:g.12065948C>T' -p85 -sg36 -(dp86 -g38 -g55 -sg40 -g41 -sg42 -S'12065948' -p87 -sg44 -g45 -sssg57 -(dp88 -g34 -S'NC_000001.11:g.12005891C>T' -p89 -sg36 -(dp90 -g38 -g55 -sg40 -g41 -sg42 -S'12005891' -p91 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant2.txt b/VariantValidator/testing/testOutputsMasterITS/variant2.txt deleted file mode 100644 index 163f93c4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant2.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_015120.4:c.34C>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_741t1:c.34C>T' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p14 -sS'gene_symbol' -p15 -S'ALMS1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_055935.4(LRG_741p1):p.(Leu12=)' -p20 -sS'slr' -p21 -S'NP_055935.4:p.(L12=)' -p22 -ssS'submitted_variant' -p23 -S'NM_015120.4:c.34C>T' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_741:g.5145C>T' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_015120.4:c.34C>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_011690.1:g.5145C>T' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000002.11:g.73613030C>T' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr2' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'73613030' -p45 -sS'alt' -p46 -VT -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000002.12:g.73385902C>T' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'73385902' -p52 -sg46 -g47 -sssS'GRCh37' -p53 -(dp54 -g36 -S'NC_000002.11:g.73613030C>T' -p55 -sg38 -(dp56 -g40 -S'2' -p57 -sg42 -g43 -sg44 -S'73613030' -p58 -sg46 -g47 -sssS'GRCh38' -p59 -(dp60 -g36 -S'NC_000002.12:g.73385902C>T' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g43 -sg44 -S'73385902' -p63 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant20.txt b/VariantValidator/testing/testOutputsMasterITS/variant20.txt deleted file mode 100644 index 8dc94396..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant20.txt +++ /dev/null @@ -1,149 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000518.4:c.316_*100del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'A more recent version of the selected reference sequence NM_000518.4 is available (NM_000518.5)' -p9 -aS'NM_000518.5:c.316_*100del MUST be fully validated prior to use in reports' -p10 -aS'select_variants=NM_000518.5:c.316_*100del' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens hemoglobin subunit beta (HBB), mRNA -p16 -sS'gene_symbol' -p17 -S'HBB' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_000509.1(LRG_1232p1):p.(Leu106SerfsTer3)' -p22 -sS'slr' -p23 -S'NP_000509.1:p.(L106Sfs*3)' -p24 -ssS'submitted_variant' -p25 -S'NM_000518.4:c.316_*100del' -p26 -sS'genome_context_intronic_sequence' -p27 -g6 -sS'HGVS_LRG_variant' -p28 -g6 -sS'HGVS_transcript_variant' -p29 -S'NM_000518.4:c.316_*100del' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_000007.3:g.71890_72118del' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'GRCh38' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000011.10:g.5225498_5225726del' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'11' -p42 -sS'ref' -p43 -S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' -p44 -sS'pos' -p45 -S'5225497' -p46 -sS'alt' -p47 -S'A' -p48 -sssS'GRCh37' -p49 -(dp50 -g37 -S'NC_000011.9:g.5246728_5246956del' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' -p53 -sg45 -S'5246727' -p54 -sg47 -g48 -sssS'hg38' -p55 -(dp56 -g37 -S'NC_000011.10:g.5225498_5225726del' -p57 -sg39 -(dp58 -g41 -S'chr11' -p59 -sg43 -S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' -p60 -sg45 -S'5225497' -p61 -sg47 -g48 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000011.9:g.5246728_5246956del' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' -p66 -sg45 -S'5246727' -p67 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant200.txt b/VariantValidator/testing/testOutputsMasterITS/variant200.txt deleted file mode 100644 index 8a2e4f10..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant200.txt +++ /dev/null @@ -1,488 +0,0 @@ -(dp0 -S'NM_017739.3:c.1895+5_1895+8del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_701t2:c.1895+5_1895+8del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' -p7 -aS'The current status of LRG_701 is pending therefore changes may be made to the LRG reference sequence' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_009205.2(NM_017739.3):c.1895+5_1895+8del' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'POMGNT1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_060209.3(LRG_701p2):p.?' -p20 -sS'slr' -p21 -S'NP_060209.3:p.?' -p22 -ssS'submitted_variant' -p23 -S'1-46655125-CTCAC-C' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000001.10(NM_017739.3):c.1895+5_1895+8del' -p26 -sS'HGVS_LRG_variant' -p27 -S'LRG_701:g.35853_35856del' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_017739.3:c.1895+5_1895+8del' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_009205.2:g.35853_35856del' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000001.10:g.46655122_46655125del' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr1' -p42 -sS'ref' -p43 -S'GTCAC' -p44 -sS'pos' -p45 -S'46655121' -p46 -sS'alt' -p47 -S'G' -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000001.11:g.46189450_46189453del' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'GTCAC' -p53 -sg45 -S'46189449' -p54 -sg47 -g48 -sssS'GRCh37' -p55 -(dp56 -g37 -S'NC_000001.10:g.46655122_46655125del' -p57 -sg39 -(dp58 -g41 -S'1' -p59 -sg43 -S'GTCAC' -p60 -sg45 -S'46655121' -p61 -sg47 -g48 -sssS'GRCh38' -p62 -(dp63 -g37 -S'NC_000001.11:g.46189450_46189453del' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -S'GTCAC' -p66 -sg45 -S'46189449' -p67 -sg47 -g48 -sssssS'NM_001243766.1:c.1869+31_1869+34del' -p68 -(dp69 -g3 -S'LRG_701t1:c.1869+31_1869+34del' -p70 -sg5 -(lp71 -S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' -p72 -aS'The current status of LRG_701 is pending therefore changes may be made to the LRG reference sequence' -p73 -asg9 -S'NG_009205.2(NM_001243766.1):c.1869+31_1869+34del' -p74 -sg11 -(lp75 -sg13 -VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 2, mRNA -p76 -sg15 -S'POMGNT1' -p77 -sg17 -(dp78 -g19 -S'NP_001230695.1:p.?' -p79 -sg21 -S'NP_001230695.1:p.?' -p80 -ssg23 -g24 -sg25 -S'NC_000001.10(NM_001243766.1):c.1869+31_1869+34del' -p81 -sg27 -S'LRG_701:g.35853_35856del' -p82 -sg29 -S'NM_001243766.1:c.1869+31_1869+34del' -p83 -sg31 -S'NG_009205.2:g.35853_35856del' -p84 -sg33 -(dp85 -g35 -(dp86 -g37 -S'NC_000001.10:g.46655122_46655125del' -p87 -sg39 -(dp88 -g41 -g42 -sg43 -S'GTCAC' -p89 -sg45 -S'46655121' -p90 -sg47 -g48 -sssg49 -(dp91 -g37 -S'NC_000001.11:g.46189450_46189453del' -p92 -sg39 -(dp93 -g41 -g42 -sg43 -S'GTCAC' -p94 -sg45 -S'46189449' -p95 -sg47 -g48 -sssg55 -(dp96 -g37 -S'NC_000001.10:g.46655122_46655125del' -p97 -sg39 -(dp98 -g41 -g59 -sg43 -S'GTCAC' -p99 -sg45 -S'46655121' -p100 -sg47 -g48 -sssg62 -(dp101 -g37 -S'NC_000001.11:g.46189450_46189453del' -p102 -sg39 -(dp103 -g41 -g59 -sg43 -S'GTCAC' -p104 -sg45 -S'46189449' -p105 -sg47 -g48 -sssssS'flag' -p106 -S'gene_variant' -p107 -sS'NM_001290129.1:c.1829+5_1829+8del' -p108 -(dp109 -g3 -S'' -p110 -sg5 -(lp111 -S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' -p112 -aS'RefSeqGene record not available' -p113 -asg9 -g110 -sg11 -(lp114 -sg13 -VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 3, mRNA -p115 -sg15 -S'POMGNT1' -p116 -sg17 -(dp117 -g19 -S'NP_001277058.1:p.?' -p118 -sg21 -S'NP_001277058.1:p.?' -p119 -ssg23 -g24 -sg25 -S'NC_000001.10(NM_001290129.1):c.1829+5_1829+8del' -p120 -sg27 -g110 -sg29 -S'NM_001290129.1:c.1829+5_1829+8del' -p121 -sg31 -g110 -sg33 -(dp122 -g35 -(dp123 -g37 -S'NC_000001.10:g.46655122_46655125del' -p124 -sg39 -(dp125 -g41 -g42 -sg43 -S'GTCAC' -p126 -sg45 -S'46655121' -p127 -sg47 -g48 -sssg49 -(dp128 -g37 -S'NC_000001.11:g.46189450_46189453del' -p129 -sg39 -(dp130 -g41 -g42 -sg43 -S'GTCAC' -p131 -sg45 -S'46189449' -p132 -sg47 -g48 -sssg55 -(dp133 -g37 -S'NC_000001.10:g.46655122_46655125del' -p134 -sg39 -(dp135 -g41 -g59 -sg43 -S'GTCAC' -p136 -sg45 -S'46655121' -p137 -sg47 -g48 -sssg62 -(dp138 -g37 -S'NC_000001.11:g.46189450_46189453del' -p139 -sg39 -(dp140 -g41 -g59 -sg43 -S'GTCAC' -p141 -sg45 -S'46189449' -p142 -sg47 -g48 -sssssS'NM_001290130.1:c.1466+5_1466+8del' -p143 -(dp144 -g3 -g110 -sg5 -(lp145 -S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' -p146 -aS'RefSeqGene record not available' -p147 -asg9 -g110 -sg11 -(lp148 -sg13 -VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 4, mRNA -p149 -sg15 -S'POMGNT1' -p150 -sg17 -(dp151 -g19 -S'NP_001277059.1:p.?' -p152 -sg21 -S'NP_001277059.1:p.?' -p153 -ssg23 -g24 -sg25 -S'NC_000001.10(NM_001290130.1):c.1466+5_1466+8del' -p154 -sg27 -g110 -sg29 -S'NM_001290130.1:c.1466+5_1466+8del' -p155 -sg31 -g110 -sg33 -(dp156 -g35 -(dp157 -g37 -S'NC_000001.10:g.46655122_46655125del' -p158 -sg39 -(dp159 -g41 -g42 -sg43 -S'GTCAC' -p160 -sg45 -S'46655121' -p161 -sg47 -g48 -sssg49 -(dp162 -g37 -S'NC_000001.11:g.46189450_46189453del' -p163 -sg39 -(dp164 -g41 -g42 -sg43 -S'GTCAC' -p165 -sg45 -S'46189449' -p166 -sg47 -g48 -sssg55 -(dp167 -g37 -S'NC_000001.10:g.46655122_46655125del' -p168 -sg39 -(dp169 -g41 -g59 -sg43 -S'GTCAC' -p170 -sg45 -S'46655121' -p171 -sg47 -g48 -sssg62 -(dp172 -g37 -S'NC_000001.11:g.46189450_46189453del' -p173 -sg39 -(dp174 -g41 -g59 -sg43 -S'GTCAC' -p175 -sg45 -S'46189449' -p176 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant201.txt b/VariantValidator/testing/testOutputsMasterITS/variant201.txt deleted file mode 100644 index 0e515dc4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant201.txt +++ /dev/null @@ -1,145 +0,0 @@ -(dp0 -S'NM_000329.2:c.106_114del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000001.10:g.68912523TGAGCCAGAG>T automapped to NC_000001.10:g.68912525_68912533del' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens RPE65, retinoid isomerohydrolase (RPE65), mRNA -p12 -sS'gene_symbol' -p13 -S'RPE65' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000320.1:p.(Leu36_Leu38del)' -p18 -sS'slr' -p19 -S'NP_000320.1:p.(L36_L38del)' -p20 -ssS'submitted_variant' -p21 -S'1-68912523-TGAGCCAGAG-T' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_000329.2:c.106_114del' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -S'NG_008472.1:g.8111_8119del' -p28 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000001.10:g.68912524_68912532del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr1' -p38 -sS'ref' -p39 -S'TGAGCCAGAG' -p40 -sS'pos' -p41 -S'68912523' -p42 -sS'alt' -p43 -S'T' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000001.11:g.68446841_68446849del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TGAGCCAGAG' -p49 -sg41 -S'68446840' -p50 -sg43 -g44 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000001.10:g.68912524_68912532del' -p53 -sg35 -(dp54 -g37 -S'1' -p55 -sg39 -S'TGAGCCAGAG' -p56 -sg41 -S'68912523' -p57 -sg43 -g44 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000001.11:g.68446841_68446849del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'TGAGCCAGAG' -p62 -sg41 -S'68446840' -p63 -sg43 -g44 -sssssS'flag' -p64 -S'gene_variant' -p65 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant202.txt b/VariantValidator/testing/testOutputsMasterITS/variant202.txt deleted file mode 100644 index df20a2f3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant202.txt +++ /dev/null @@ -1,145 +0,0 @@ -(dp0 -S'NM_000329.2:c.109_114del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000001.10:g.68912526GCCAGAG>G automapped to NC_000001.10:g.68912527_68912532del' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens RPE65, retinoid isomerohydrolase (RPE65), mRNA -p12 -sS'gene_symbol' -p13 -S'RPE65' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000320.1:p.(Trp37_Leu38del)' -p18 -sS'slr' -p19 -S'NP_000320.1:p.(W37_L38del)' -p20 -ssS'submitted_variant' -p21 -S'1-68912526-GCCAGAG-G' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_000329.2:c.109_114del' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -S'NG_008472.1:g.8114_8119del' -p28 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000001.10:g.68912524_68912529del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr1' -p38 -sS'ref' -p39 -S'TGAGCCA' -p40 -sS'pos' -p41 -S'68912523' -p42 -sS'alt' -p43 -S'T' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000001.11:g.68446841_68446846del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TGAGCCA' -p49 -sg41 -S'68446840' -p50 -sg43 -g44 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000001.10:g.68912524_68912529del' -p53 -sg35 -(dp54 -g37 -S'1' -p55 -sg39 -S'TGAGCCA' -p56 -sg41 -S'68912523' -p57 -sg43 -g44 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000001.11:g.68446841_68446846del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'TGAGCCA' -p62 -sg41 -S'68446840' -p63 -sg43 -g44 -sssssS'flag' -p64 -S'gene_variant' -p65 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant203.txt b/VariantValidator/testing/testOutputsMasterITS/variant203.txt deleted file mode 100644 index 27807a76..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant203.txt +++ /dev/null @@ -1,141 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001408.2:c.*919G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens cadherin EGF LAG seven-pass G-type receptor 2 (CELSR2), mRNA -p14 -sS'gene_symbol' -p15 -S'CELSR2' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001399.1:p.?' -p20 -sS'slr' -p21 -S'NP_001399.1:p.?' -p22 -ssS'submitted_variant' -p23 -S'1-109817590-G-T' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_001408.2:c.*919G>T' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000001.10:g.109817590G>T' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -S'G' -p41 -sS'pos' -p42 -S'109817590' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000001.11:g.109274968G>T' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'109274968' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000001.10:g.109817590G>T' -p53 -sg36 -(dp54 -g38 -S'1' -p55 -sg40 -g41 -sg42 -S'109817590' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000001.11:g.109274968G>T' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'109274968' -p61 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant204.txt b/VariantValidator/testing/testOutputsMasterITS/variant204.txt deleted file mode 100644 index f8e6f30a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant204.txt +++ /dev/null @@ -1,397 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_006468.6:c.1070+35_1070+38del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000001.10:g.145597475GAAGT>G automapped to NC_000001.10:g.145597477_145597480del' -p9 -aS'A more recent version of the selected reference sequence NM_006468.6 is available (NM_006468.7)' -p10 -aS'NM_006468.7:c.1070+35_1070+38del MUST be fully validated prior to use in reports' -p11 -aS'select_variants=NM_006468.7:c.1070+35_1070+38del' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'GRCh37' -p18 -(dp19 -S'HGVS_genomic_description' -p20 -S'NW_003871055.3:g.2653044_2653047del' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG1287_PATCH' -p25 -sS'ref' -p26 -S'ATACT' -p27 -sS'pos' -p28 -S'2653042' -p29 -sS'alt' -p30 -S'A' -p31 -sssasS'transcript_description' -p32 -VHomo sapiens polymerase (RNA) III (DNA directed) polypeptide C (62kD) (POLR3C), mRNA -p33 -sS'gene_symbol' -p34 -S'POLR3C' -p35 -sS'HGVS_predicted_protein_consequence' -p36 -(dp37 -S'tlr' -p38 -S'NP_006459.3:p.?' -p39 -sS'slr' -p40 -S'NP_006459.3:p.?' -p41 -ssS'submitted_variant' -p42 -S'1-145597475-GAAGT-G' -p43 -sS'genome_context_intronic_sequence' -p44 -S'NC_000001.10(NM_006468.6):c.1070+35_1070+38del' -p45 -sS'HGVS_LRG_variant' -p46 -g6 -sS'HGVS_transcript_variant' -p47 -S'NM_006468.6:c.1070+35_1070+38del' -p48 -sS'HGVS_RefSeqGene_variant' -p49 -g6 -sS'primary_assembly_loci' -p50 -(dp51 -S'hg19' -p52 -(dp53 -g20 -S'NC_000001.10:g.145597477_145597480del' -p54 -sg22 -(dp55 -g24 -S'chr1' -p56 -sg26 -S'GAAGT' -p57 -sg28 -S'145597475' -p58 -sg30 -S'G' -p59 -sssg18 -(dp60 -g20 -S'NC_000001.10:g.145597477_145597480del' -p61 -sg22 -(dp62 -g24 -S'1' -p63 -sg26 -S'GAAGT' -p64 -sg28 -S'145597475' -p65 -sg30 -g59 -sssssS'NM_001303456.1:c.1109+35_1109+38del' -p66 -(dp67 -g5 -g6 -sg7 -(lp68 -S'NC_000001.10:g.145597475GAAGT>G automapped to NC_000001.10:g.145597477_145597480del' -p69 -aS'RefSeqGene record not available' -p70 -asg14 -g6 -sg15 -(lp71 -(dp72 -g18 -(dp73 -g20 -S'NW_003871055.3:g.2653044_2653047del' -p74 -sg22 -(dp75 -g24 -g25 -sg26 -S'ATACT' -p76 -sg28 -S'2653042' -p77 -sg30 -g31 -sssasg32 -VHomo sapiens RNA polymerase III subunit C (POLR3C), transcript variant 2, mRNA -p78 -sg34 -S'POLR3C' -p79 -sg36 -(dp80 -g38 -S'NP_001290385.1:p.?' -p81 -sg40 -S'NP_001290385.1:p.?' -p82 -ssg42 -g43 -sg44 -S'NC_000001.10(NM_001303456.1):c.1109+35_1109+38del' -p83 -sg46 -g6 -sg47 -S'NM_001303456.1:c.1109+35_1109+38del' -p84 -sg49 -g6 -sg50 -(dp85 -g52 -(dp86 -g20 -S'NC_000001.10:g.145597477_145597480del' -p87 -sg22 -(dp88 -g24 -g56 -sg26 -S'GAAGT' -p89 -sg28 -S'145597475' -p90 -sg30 -g59 -sssS'hg38' -p91 -(dp92 -g20 -S'NC_000001.11:g.145837631_145837634del' -p93 -sg22 -(dp94 -g24 -g56 -sg26 -S'ATACT' -p95 -sg28 -S'145837629' -p96 -sg30 -g31 -sssg18 -(dp97 -g20 -S'NC_000001.10:g.145597477_145597480del' -p98 -sg22 -(dp99 -g24 -g63 -sg26 -S'GAAGT' -p100 -sg28 -S'145597475' -p101 -sg30 -g59 -sssS'GRCh38' -p102 -(dp103 -g20 -S'NC_000001.11:g.145837631_145837634del' -p104 -sg22 -(dp105 -g24 -g63 -sg26 -S'ATACT' -p106 -sg28 -S'145837629' -p107 -sg30 -g31 -sssssS'NM_006468.7:c.1070+35_1070+38del' -p108 -(dp109 -g5 -g6 -sg7 -(lp110 -S'NC_000001.10:g.145597475GAAGT>G automapped to NC_000001.10:g.145597477_145597480del' -p111 -aS'RefSeqGene record not available' -p112 -asg14 -g6 -sg15 -(lp113 -(dp114 -g18 -(dp115 -g20 -S'NW_003871055.3:g.2653044_2653047del' -p116 -sg22 -(dp117 -g24 -g25 -sg26 -S'ATACT' -p118 -sg28 -S'2653042' -p119 -sg30 -g31 -sssasg32 -VHomo sapiens RNA polymerase III subunit C (POLR3C), transcript variant 1, mRNA -p120 -sg34 -S'POLR3C' -p121 -sg36 -(dp122 -g38 -S'NP_006459.3:p.?' -p123 -sg40 -S'NP_006459.3:p.?' -p124 -ssg42 -g43 -sg44 -S'NC_000001.10(NM_006468.7):c.1070+35_1070+38del' -p125 -sg46 -g6 -sg47 -S'NM_006468.7:c.1070+35_1070+38del' -p126 -sg49 -g6 -sg50 -(dp127 -g52 -(dp128 -g20 -S'NC_000001.10:g.145597477_145597480del' -p129 -sg22 -(dp130 -g24 -g56 -sg26 -S'GAAGT' -p131 -sg28 -S'145597475' -p132 -sg30 -g59 -sssg91 -(dp133 -g20 -S'NC_000001.11:g.145837631_145837634del' -p134 -sg22 -(dp135 -g24 -g56 -sg26 -S'ATACT' -p136 -sg28 -S'145837629' -p137 -sg30 -g31 -sssg18 -(dp138 -g20 -S'NC_000001.10:g.145597477_145597480del' -p139 -sg22 -(dp140 -g24 -g63 -sg26 -S'GAAGT' -p141 -sg28 -S'145597475' -p142 -sg30 -g59 -sssg102 -(dp143 -g20 -S'NC_000001.11:g.145837631_145837634del' -p144 -sg22 -(dp145 -g24 -g63 -sg26 -S'ATACT' -p146 -sg28 -S'145837629' -p147 -sg30 -g31 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant205.txt b/VariantValidator/testing/testOutputsMasterITS/variant205.txt deleted file mode 100644 index 9844a841..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant205.txt +++ /dev/null @@ -1,262 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020699.2:c.562_563del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000001.10:g.153791300CTG>C automapped to NC_000001.10:g.153791302_153791303delGT' -p9 -aS'A more recent version of the selected reference sequence NM_020699.2 is available (NM_020699.3)' -p10 -aS'NM_020699.3:c.562_563delCA MUST be fully validated prior to use in reports' -p11 -aS'select_variants=NM_020699.3:c.562_563del' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA -p18 -sS'gene_symbol' -p19 -S'GATAD2B' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_065750.1:p.(Gln188GlufsTer36)' -p24 -sS'slr' -p25 -S'NP_065750.1:p.(Q188Efs*36)' -p26 -ssS'submitted_variant' -p27 -S'1-153791300-CTG-C' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_020699.2:c.562_563del' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000001.10:g.153791301_153791302del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr1' -p43 -sS'ref' -p44 -S'CTG' -p45 -sS'pos' -p46 -S'153791300' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000001.11:g.153818825_153818826del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'CTG' -p54 -sg46 -S'153818824' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000001.10:g.153791301_153791302del' -p58 -sg40 -(dp59 -g42 -S'1' -p60 -sg44 -S'CTG' -p61 -sg46 -S'153791300' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000001.11:g.153818825_153818826del' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'CTG' -p67 -sg46 -S'153818824' -p68 -sg48 -g49 -sssssS'NM_020699.3:c.562_563del' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'NC_000001.10:g.153791300CTG>C automapped to NC_000001.10:g.153791302_153791303delGT' -p72 -aS'RefSeqGene record not available' -p73 -asg14 -g6 -sg15 -(lp74 -sg17 -VHomo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA -p75 -sg19 -S'GATAD2B' -p76 -sg21 -(dp77 -g23 -S'NP_065750.1:p.(Gln188GlufsTer36)' -p78 -sg25 -S'NP_065750.1:p.(Q188Efs*36)' -p79 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_020699.3:c.562_563del' -p80 -sg33 -g6 -sg34 -(dp81 -g36 -(dp82 -g38 -S'NC_000001.10:g.153791301_153791302del' -p83 -sg40 -(dp84 -g42 -g43 -sg44 -S'CTG' -p85 -sg46 -S'153791300' -p86 -sg48 -g49 -sssg50 -(dp87 -g38 -S'NC_000001.11:g.153818825_153818826del' -p88 -sg40 -(dp89 -g42 -g43 -sg44 -S'CTG' -p90 -sg46 -S'153818824' -p91 -sg48 -g49 -sssg56 -(dp92 -g38 -S'NC_000001.10:g.153791301_153791302del' -p93 -sg40 -(dp94 -g42 -g60 -sg44 -S'CTG' -p95 -sg46 -S'153791300' -p96 -sg48 -g49 -sssg63 -(dp97 -g38 -S'NC_000001.11:g.153818825_153818826del' -p98 -sg40 -(dp99 -g42 -g60 -sg44 -S'CTG' -p100 -sg46 -S'153818824' -p101 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant206.txt b/VariantValidator/testing/testOutputsMasterITS/variant206.txt deleted file mode 100644 index 9450e3ab..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant206.txt +++ /dev/null @@ -1,896 +0,0 @@ -(dp0 -S'NM_005572.3:c.711_734delinsCCCC' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_254t1:c.711_734delinsCCCC' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -S'' -p9 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens lamin A/C (LMNA), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'LMNA' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_005563.1(LRG_254p1):p.(Glu238ProfsTer9)' -p19 -sS'slr' -p20 -S'NP_005563.1:p.(E238Pfs*9)' -p21 -ssS'submitted_variant' -p22 -S'1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' -p23 -sS'genome_context_intronic_sequence' -p24 -g9 -sS'HGVS_LRG_variant' -p25 -S'LRG_254:g.57304_57327delinsCCCC' -p26 -sS'HGVS_transcript_variant' -p27 -S'NM_005572.3:c.711_734delinsCCCC' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -S'NG_008692.2:g.57304_57327delinsCCCC' -p30 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr1' -p40 -sS'ref' -p41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p42 -sS'pos' -p43 -S'156104667' -p44 -sS'alt' -p45 -S'CCCC' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p51 -sg43 -S'156134876' -p52 -sg45 -g46 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p55 -sg37 -(dp56 -g39 -S'1' -p57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p58 -sg43 -S'156104667' -p59 -sg45 -g46 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p64 -sg43 -S'156134876' -p65 -sg45 -g46 -sssssS'NM_001257374.1:c.375_398delinsCCCC' -p66 -(dp67 -g3 -g9 -sg5 -(lp68 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p69 -aS'A more recent version of the selected reference sequence NM_001257374.1 is available (NM_001257374.2)' -p70 -aS'NM_001257374.2:c.375_398delinsCCCC MUST be fully validated prior to use in reports' -p71 -aS'select_variants=NM_001257374.2:c.375_398delinsCCCC' -p72 -aS'RefSeqGene record not available' -p73 -asg8 -g9 -sg10 -(lp74 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA -p75 -sg14 -S'LMNA' -p76 -sg16 -(dp77 -g18 -S'NP_001244303.1:p.(Glu126ProfsTer9)' -p78 -sg20 -S'NP_001244303.1:p.(E126Pfs*9)' -p79 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_001257374.1:c.375_398delinsCCCC' -p80 -sg29 -g9 -sg31 -(dp81 -g33 -(dp82 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p83 -sg37 -(dp84 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p85 -sg43 -S'156104667' -p86 -sg45 -S'CCCC' -p87 -sssg53 -(dp88 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p89 -sg37 -(dp90 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p91 -sg43 -S'156104667' -p92 -sg45 -g87 -sssssS'NM_001257374.2:c.375_398delinsCCCC' -p93 -(dp94 -g3 -g9 -sg5 -(lp95 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p96 -aS'RefSeqGene record not available' -p97 -asg8 -g9 -sg10 -(lp98 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA -p99 -sg14 -S'LMNA' -p100 -sg16 -(dp101 -g18 -S'NP_001244303.1:p.(Glu126ProfsTer9)' -p102 -sg20 -S'NP_001244303.1:p.(E126Pfs*9)' -p103 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_001257374.2:c.375_398delinsCCCC' -p104 -sg29 -g9 -sg31 -(dp105 -g33 -(dp106 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p107 -sg37 -(dp108 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p109 -sg43 -S'156104667' -p110 -sg45 -S'CCCC' -p111 -sssg47 -(dp112 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p113 -sg37 -(dp114 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p115 -sg43 -S'156134876' -p116 -sg45 -g111 -sssg53 -(dp117 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p118 -sg37 -(dp119 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p120 -sg43 -S'156104667' -p121 -sg45 -g111 -sssg60 -(dp122 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p123 -sg37 -(dp124 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p125 -sg43 -S'156134876' -p126 -sg45 -g111 -sssssS'NM_001282624.1:c.468_491delinsCCCC' -p127 -(dp128 -g3 -g9 -sg5 -(lp129 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p130 -aS'RefSeqGene record not available' -p131 -asg8 -g9 -sg10 -(lp132 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 5, mRNA -p133 -sg14 -S'LMNA' -p134 -sg16 -(dp135 -g18 -S'NP_001269553.1:p.(Glu157ProfsTer9)' -p136 -sg20 -S'NP_001269553.1:p.(E157Pfs*9)' -p137 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_001282624.1:c.468_491delinsCCCC' -p138 -sg29 -g9 -sg31 -(dp139 -g33 -(dp140 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p141 -sg37 -(dp142 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p143 -sg43 -S'156104667' -p144 -sg45 -S'CCCC' -p145 -sssg47 -(dp146 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p147 -sg37 -(dp148 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p149 -sg43 -S'156134876' -p150 -sg45 -g145 -sssg53 -(dp151 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p152 -sg37 -(dp153 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p154 -sg43 -S'156104667' -p155 -sg45 -g145 -sssg60 -(dp156 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p157 -sg37 -(dp158 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p159 -sg43 -S'156134876' -p160 -sg45 -g145 -sssssS'flag' -p161 -S'gene_variant' -p162 -sS'NM_170708.3:c.711_734delinsCCCC' -p163 -(dp164 -g3 -g9 -sg5 -(lp165 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p166 -aS'RefSeqGene record not available' -p167 -asg8 -g9 -sg10 -(lp168 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 3, mRNA -p169 -sg14 -S'LMNA' -p170 -sg16 -(dp171 -g18 -S'NP_733822.1(LRG_254p3):p.(Glu238ProfsTer9)' -p172 -sg20 -S'NP_733822.1:p.(E238Pfs*9)' -p173 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_170708.3:c.711_734delinsCCCC' -p174 -sg29 -g9 -sg31 -(dp175 -g33 -(dp176 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p177 -sg37 -(dp178 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p179 -sg43 -S'156104667' -p180 -sg45 -S'CCCC' -p181 -sssg47 -(dp182 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p183 -sg37 -(dp184 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p185 -sg43 -S'156134876' -p186 -sg45 -g181 -sssg53 -(dp187 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p188 -sg37 -(dp189 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p190 -sg43 -S'156104667' -p191 -sg45 -g181 -sssg60 -(dp192 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p193 -sg37 -(dp194 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p195 -sg43 -S'156134876' -p196 -sg45 -g181 -sssssS'NM_170707.3:c.711_734delinsCCCC' -p197 -(dp198 -g3 -g9 -sg5 -(lp199 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p200 -aS'RefSeqGene record not available' -p201 -asg8 -g9 -sg10 -(lp202 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 1, mRNA -p203 -sg14 -S'LMNA' -p204 -sg16 -(dp205 -g18 -S'NP_733821.1(LRG_254p2):p.(Glu238ProfsTer9)' -p206 -sg20 -S'NP_733821.1:p.(E238Pfs*9)' -p207 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_170707.3:c.711_734delinsCCCC' -p208 -sg29 -g9 -sg31 -(dp209 -g33 -(dp210 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p211 -sg37 -(dp212 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p213 -sg43 -S'156104667' -p214 -sg45 -S'CCCC' -p215 -sssg47 -(dp216 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p217 -sg37 -(dp218 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p219 -sg43 -S'156134876' -p220 -sg45 -g215 -sssg53 -(dp221 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p222 -sg37 -(dp223 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p224 -sg43 -S'156104667' -p225 -sg45 -g215 -sssg60 -(dp226 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p227 -sg37 -(dp228 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p229 -sg43 -S'156134876' -p230 -sg45 -g215 -sssssS'NM_001282626.1:c.711_734delinsCCCC' -p231 -(dp232 -g3 -g9 -sg5 -(lp233 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p234 -aS'RefSeqGene record not available' -p235 -asg8 -g9 -sg10 -(lp236 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 7, mRNA -p237 -sg14 -S'LMNA' -p238 -sg16 -(dp239 -g18 -S'NP_001269555.1:p.(Glu238ProfsTer9)' -p240 -sg20 -S'NP_001269555.1:p.(E238Pfs*9)' -p241 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_001282626.1:c.711_734delinsCCCC' -p242 -sg29 -g9 -sg31 -(dp243 -g33 -(dp244 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p245 -sg37 -(dp246 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p247 -sg43 -S'156104667' -p248 -sg45 -S'CCCC' -p249 -sssg47 -(dp250 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p251 -sg37 -(dp252 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p253 -sg43 -S'156134876' -p254 -sg45 -g249 -sssg53 -(dp255 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p256 -sg37 -(dp257 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p258 -sg43 -S'156104667' -p259 -sg45 -g249 -sssg60 -(dp260 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p261 -sg37 -(dp262 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p263 -sg43 -S'156134876' -p264 -sg45 -g249 -sssssS'NM_001282625.1:c.711_734delinsCCCC' -p265 -(dp266 -g3 -g9 -sg5 -(lp267 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p268 -aS'RefSeqGene record not available' -p269 -asg8 -g9 -sg10 -(lp270 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 6, mRNA -p271 -sg14 -S'LMNA' -p272 -sg16 -(dp273 -g18 -S'NP_001269554.1:p.(Glu238ProfsTer9)' -p274 -sg20 -S'NP_001269554.1:p.(E238Pfs*9)' -p275 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_001282625.1:c.711_734delinsCCCC' -p276 -sg29 -g9 -sg31 -(dp277 -g33 -(dp278 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p279 -sg37 -(dp280 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p281 -sg43 -S'156104667' -p282 -sg45 -S'CCCC' -p283 -sssg47 -(dp284 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p285 -sg37 -(dp286 -g39 -g40 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p287 -sg43 -S'156134876' -p288 -sg45 -g283 -sssg53 -(dp289 -g35 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p290 -sg37 -(dp291 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p292 -sg43 -S'156104667' -p293 -sg45 -g283 -sssg60 -(dp294 -g35 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p295 -sg37 -(dp296 -g39 -g57 -sg41 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p297 -sg43 -S'156134876' -p298 -sg45 -g283 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant207.txt b/VariantValidator/testing/testOutputsMasterITS/variant207.txt deleted file mode 100644 index d2706c1e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant207.txt +++ /dev/null @@ -1,559 +0,0 @@ -(dp0 -S'NM_170707.3:c.1961dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens lamin A/C (LMNA), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'LMNA' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_733821.1(LRG_254p2):p.(Thr655AsnfsTer49)' -p19 -sS'slr' -p20 -S'NP_733821.1:p.(T655Nfs*49)' -p21 -ssS'submitted_variant' -p22 -S'1-156108541-G-GG' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_170707.3:c.1961dup' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000001.10:g.156108541dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr1' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'156108541' -p42 -sS'alt' -p43 -S'GG' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000001.11:g.156138750dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'156138750' -p49 -sg43 -S'GG' -p50 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000001.10:g.156108541dup' -p53 -sg35 -(dp54 -g37 -S'1' -p55 -sg39 -g40 -sg41 -S'156108541' -p56 -sg43 -S'GG' -p57 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000001.11:g.156138750dup' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'156138750' -p62 -sg43 -S'GG' -p63 -sssssS'NM_001282626.1:c.1818+143dup' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' -p67 -aS'RefSeqGene record not available' -p68 -asg9 -g4 -sg10 -(lp69 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 7, mRNA -p70 -sg14 -S'LMNA' -p71 -sg16 -(dp72 -g18 -S'NP_001269555.1:p.?' -p73 -sg20 -S'NP_001269555.1:p.?' -p74 -ssg22 -g23 -sg24 -S'NC_000001.10(NM_001282626.1):c.1818+143dup' -p75 -sg25 -g4 -sg26 -S'NM_001282626.1:c.1818+143dup' -p76 -sg28 -g4 -sg29 -(dp77 -g31 -(dp78 -g33 -S'NC_000001.10:g.156108541dup' -p79 -sg35 -(dp80 -g37 -g38 -sg39 -g40 -sg41 -S'156108541' -p81 -sg43 -S'GG' -p82 -sssg45 -(dp83 -g33 -S'NC_000001.11:g.156138750dup' -p84 -sg35 -(dp85 -g37 -g38 -sg39 -g40 -sg41 -S'156138750' -p86 -sg43 -S'GG' -p87 -sssg51 -(dp88 -g33 -S'NC_000001.10:g.156108541dup' -p89 -sg35 -(dp90 -g37 -g55 -sg39 -g40 -sg41 -S'156108541' -p91 -sg43 -S'GG' -p92 -sssg58 -(dp93 -g33 -S'NC_000001.11:g.156138750dup' -p94 -sg35 -(dp95 -g37 -g55 -sg39 -g40 -sg41 -S'156138750' -p96 -sg43 -S'GG' -p97 -sssssS'flag' -p98 -S'gene_variant' -p99 -sS'NM_001257374.2:c.1625dup' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' -p103 -aS'RefSeqGene record not available' -p104 -asg9 -g4 -sg10 -(lp105 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA -p106 -sg14 -S'LMNA' -p107 -sg16 -(dp108 -g18 -S'NP_001244303.1:p.(Thr543AsnfsTer90)' -p109 -sg20 -S'NP_001244303.1:p.(T543Nfs*90)' -p110 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001257374.2:c.1625dup' -p111 -sg28 -g4 -sg29 -(dp112 -g31 -(dp113 -g33 -S'NC_000001.10:g.156108541dup' -p114 -sg35 -(dp115 -g37 -g38 -sg39 -g40 -sg41 -S'156108541' -p116 -sg43 -S'GG' -p117 -sssg45 -(dp118 -g33 -S'NC_000001.11:g.156138750dup' -p119 -sg35 -(dp120 -g37 -g38 -sg39 -g40 -sg41 -S'156138750' -p121 -sg43 -S'GG' -p122 -sssg51 -(dp123 -g33 -S'NC_000001.10:g.156108541dup' -p124 -sg35 -(dp125 -g37 -g55 -sg39 -g40 -sg41 -S'156108541' -p126 -sg43 -S'GG' -p127 -sssg58 -(dp128 -g33 -S'NC_000001.11:g.156138750dup' -p129 -sg35 -(dp130 -g37 -g55 -sg39 -g40 -sg41 -S'156138750' -p131 -sg43 -S'GG' -p132 -sssssS'NM_170708.3:c.1871dup' -p133 -(dp134 -g3 -g4 -sg5 -(lp135 -S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' -p136 -aS'RefSeqGene record not available' -p137 -asg9 -g4 -sg10 -(lp138 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 3, mRNA -p139 -sg14 -S'LMNA' -p140 -sg16 -(dp141 -g18 -S'NP_733822.1(LRG_254p3):p.(Thr625AsnfsTer49)' -p142 -sg20 -S'NP_733822.1:p.(T625Nfs*49)' -p143 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_170708.3:c.1871dup' -p144 -sg28 -g4 -sg29 -(dp145 -g31 -(dp146 -g33 -S'NC_000001.10:g.156108541dup' -p147 -sg35 -(dp148 -g37 -g38 -sg39 -g40 -sg41 -S'156108541' -p149 -sg43 -S'GG' -p150 -sssg45 -(dp151 -g33 -S'NC_000001.11:g.156138750dup' -p152 -sg35 -(dp153 -g37 -g38 -sg39 -g40 -sg41 -S'156138750' -p154 -sg43 -S'GG' -p155 -sssg51 -(dp156 -g33 -S'NC_000001.10:g.156108541dup' -p157 -sg35 -(dp158 -g37 -g55 -sg39 -g40 -sg41 -S'156108541' -p159 -sg43 -S'GG' -p160 -sssg58 -(dp161 -g33 -S'NC_000001.11:g.156138750dup' -p162 -sg35 -(dp163 -g37 -g55 -sg39 -g40 -sg41 -S'156138750' -p164 -sg43 -S'GG' -p165 -sssssS'NM_001257374.1:c.1625dup' -p166 -(dp167 -g3 -g4 -sg5 -(lp168 -S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' -p169 -aS'A more recent version of the selected reference sequence NM_001257374.1 is available (NM_001257374.2)' -p170 -aS'NM_001257374.2:c.1625dupG MUST be fully validated prior to use in reports' -p171 -aS'select_variants=NM_001257374.2:c.1625dup' -p172 -aS'RefSeqGene record not available' -p173 -asg9 -g4 -sg10 -(lp174 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA -p175 -sg14 -S'LMNA' -p176 -sg16 -(dp177 -g18 -S'NP_001244303.1:p.(Thr543AsnfsTer90)' -p178 -sg20 -S'NP_001244303.1:p.(T543Nfs*90)' -p179 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001257374.1:c.1625dup' -p180 -sg28 -g4 -sg29 -(dp181 -g31 -(dp182 -g33 -S'NC_000001.10:g.156108541dup' -p183 -sg35 -(dp184 -g37 -g38 -sg39 -g40 -sg41 -S'156108541' -p185 -sg43 -S'GG' -p186 -sssg51 -(dp187 -g33 -S'NC_000001.10:g.156108541dup' -p188 -sg35 -(dp189 -g37 -g55 -sg39 -g40 -sg41 -S'156108541' -p190 -sg43 -S'GG' -p191 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant208.txt b/VariantValidator/testing/testOutputsMasterITS/variant208.txt deleted file mode 100644 index f449b6cd..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant208.txt +++ /dev/null @@ -1,324 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001315491.1:c.1A>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens myelin protein zero (MPZ), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'MPZ' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001302420.1:p.(Met1?)' -p20 -sS'slr' -p21 -S'NP_001302420.1:p.(M1?)' -p22 -ssS'submitted_variant' -p23 -S'1-161279695-T-A' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_001315491.1:c.1A>T' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000001.10:g.161279695T>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -VT -p41 -sS'pos' -p42 -S'161279695' -p43 -sS'alt' -p44 -VA -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000001.11:g.161309905T>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'161309905' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000001.10:g.161279695T>A' -p53 -sg36 -(dp54 -g38 -S'1' -p55 -sg40 -g41 -sg42 -S'161279695' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000001.11:g.161309905T>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'161309905' -p61 -sg44 -g45 -sssssS'NM_000530.7:c.1A>T' -p62 -(dp63 -g5 -g6 -sg7 -(lp64 -S'RefSeqGene record not available' -p65 -asg10 -g6 -sg11 -(lp66 -sg13 -VHomo sapiens myelin protein zero (MPZ), transcript variant 1, mRNA -p67 -sg15 -S'MPZ' -p68 -sg17 -(dp69 -g19 -S'NP_000521.2(LRG_256p1):p.(Met1?)' -p70 -sg21 -S'NP_000521.2:p.(M1?)' -p71 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_000530.7:c.1A>T' -p72 -sg29 -g6 -sg30 -(dp73 -g32 -(dp74 -g34 -S'NC_000001.10:g.161279695T>A' -p75 -sg36 -(dp76 -g38 -g39 -sg40 -g41 -sg42 -S'161279695' -p77 -sg44 -g45 -sssg46 -(dp78 -g34 -S'NC_000001.11:g.161309905T>A' -p79 -sg36 -(dp80 -g38 -g39 -sg40 -g41 -sg42 -S'161309905' -p81 -sg44 -g45 -sssg51 -(dp82 -g34 -S'NC_000001.10:g.161279695T>A' -p83 -sg36 -(dp84 -g38 -g55 -sg40 -g41 -sg42 -S'161279695' -p85 -sg44 -g45 -sssg57 -(dp86 -g34 -S'NC_000001.11:g.161309905T>A' -p87 -sg36 -(dp88 -g38 -g55 -sg40 -g41 -sg42 -S'161309905' -p89 -sg44 -g45 -sssssS'NM_000530.6:c.1A>T' -p90 -(dp91 -g5 -S'LRG_256t1:c.1A>T' -p92 -sg7 -(lp93 -S'A more recent version of the selected reference sequence NM_000530.6 is available (NM_000530.7)' -p94 -aS'NM_000530.7:c.1A>T MUST be fully validated prior to use in reports' -p95 -aS'select_variants=NM_000530.7:c.1A>T' -p96 -asg10 -g6 -sg11 -(lp97 -sg13 -VHomo sapiens myelin protein zero (MPZ), mRNA -p98 -sg15 -S'MPZ' -p99 -sg17 -(dp100 -g19 -S'NP_000521.2(LRG_256p1):p.(Met1?)' -p101 -sg21 -S'NP_000521.2:p.(M1?)' -p102 -ssg23 -g24 -sg25 -g6 -sg26 -S'LRG_256:g.5068A>T' -p103 -sg27 -S'NM_000530.6:c.1A>T' -p104 -sg29 -S'NG_008055.1:g.5068A>T' -p105 -sg30 -(dp106 -g32 -(dp107 -g34 -S'NC_000001.10:g.161279695T>A' -p108 -sg36 -(dp109 -g38 -g39 -sg40 -g41 -sg42 -S'161279695' -p110 -sg44 -g45 -sssg51 -(dp111 -g34 -S'NC_000001.10:g.161279695T>A' -p112 -sg36 -(dp113 -g38 -g55 -sg40 -g41 -sg42 -S'161279695' -p114 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant209.txt b/VariantValidator/testing/testOutputsMasterITS/variant209.txt deleted file mode 100644 index 90a24d00..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant209.txt +++ /dev/null @@ -1,143 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000130.4:c.1601G>A' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_553t1:c.1601G>A' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens coagulation factor V (F5), mRNA -p14 -sS'gene_symbol' -p15 -S'F5' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000121.2(LRG_553p1):p.(Arg534Gln)' -p20 -sS'slr' -p21 -S'NP_000121.2:p.(R534Q)' -p22 -ssS'submitted_variant' -p23 -S'1-169519049-T-T' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_553:g.41721G>A' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000130.4:c.1601G>A' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_011806.1:g.41721G>A' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000001.10:g.169519049T=' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr1' -p41 -sS'ref' -p42 -S'T' -p43 -sS'pos' -p44 -S'169519049' -p45 -sS'alt' -p46 -g43 -sssS'hg38' -p47 -(dp48 -g36 -S'NC_000001.11:g.169549811C>T' -p49 -sg38 -(dp50 -g40 -g41 -sg42 -VC -p51 -sg44 -S'169549811' -p52 -sg46 -VT -p53 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000001.10:g.169519049T=' -p56 -sg38 -(dp57 -g40 -S'1' -p58 -sg42 -g43 -sg44 -S'169519049' -p59 -sg46 -g43 -sssS'GRCh38' -p60 -(dp61 -g36 -S'NC_000001.11:g.169549811C>T' -p62 -sg38 -(dp63 -g40 -g58 -sg42 -g51 -sg44 -S'169549811' -p64 -sg46 -g53 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant21.txt b/VariantValidator/testing/testOutputsMasterITS/variant21.txt deleted file mode 100644 index 3f642d54..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant21.txt +++ /dev/null @@ -1,60 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' -p7 -aS'Instead use NC_000011.9:g.5244828A=' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_000518.4:c.*2000C>T' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'flag' -p26 -S'warning' -p27 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant210.txt b/VariantValidator/testing/testOutputsMasterITS/variant210.txt deleted file mode 100644 index 4f8471d8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant210.txt +++ /dev/null @@ -1,464 +0,0 @@ -(dp0 -S'NM_001172425.1:c.672C>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_001172425.1 is available (NM_001172425.2)' -p7 -aS'NM_001172425.2:c.672C>T MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_001172425.2:c.672C>T' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 2, mRNA -p15 -sS'gene_symbol' -p16 -S'LEFTY2' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_001165896.1:p.(Thr224=)' -p21 -sS'slr' -p22 -S'NP_001165896.1:p.(T224=)' -p23 -ssS'submitted_variant' -p24 -S'1-226125468-G-A' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'HGVS_LRG_variant' -p27 -g4 -sS'HGVS_transcript_variant' -p28 -S'NM_001172425.1:c.672C>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000001.10:g.226125468G>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr1' -p40 -sS'ref' -p41 -VG -p42 -sS'pos' -p43 -S'226125468' -p44 -sS'alt' -p45 -VA -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000001.11:g.225937768G>A' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'225937768' -p51 -sg45 -g46 -sssS'GRCh37' -p52 -(dp53 -g35 -S'NC_000001.10:g.226125468G>A' -p54 -sg37 -(dp55 -g39 -S'1' -p56 -sg41 -g42 -sg43 -S'226125468' -p57 -sg45 -g46 -sssS'GRCh38' -p58 -(dp59 -g35 -S'NC_000001.11:g.225937768G>A' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'225937768' -p62 -sg45 -g46 -sssssS'flag' -p63 -S'gene_variant' -p64 -sS'NM_003240.4:c.774C>T' -p65 -(dp66 -g3 -g4 -sg5 -(lp67 -S'RefSeqGene record not available' -p68 -asg11 -g4 -sg12 -(lp69 -sg14 -VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 1, mRNA -p70 -sg16 -S'LEFTY2' -p71 -sg18 -(dp72 -g20 -S'NP_003231.2:p.(Thr258=)' -p73 -sg22 -S'NP_003231.2:p.(T258=)' -p74 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_003240.4:c.774C>T' -p75 -sg30 -g4 -sg31 -(dp76 -g33 -(dp77 -g35 -S'NC_000001.10:g.226125468G>A' -p78 -sg37 -(dp79 -g39 -g40 -sg41 -g42 -sg43 -S'226125468' -p80 -sg45 -g46 -sssg47 -(dp81 -g35 -S'NC_000001.11:g.225937768G>A' -p82 -sg37 -(dp83 -g39 -g40 -sg41 -g42 -sg43 -S'225937768' -p84 -sg45 -g46 -sssg52 -(dp85 -g35 -S'NC_000001.10:g.226125468G>A' -p86 -sg37 -(dp87 -g39 -g56 -sg41 -g42 -sg43 -S'226125468' -p88 -sg45 -g46 -sssg58 -(dp89 -g35 -S'NC_000001.11:g.225937768G>A' -p90 -sg37 -(dp91 -g39 -g56 -sg41 -g42 -sg43 -S'225937768' -p92 -sg45 -g46 -sssssS'NM_003240.3:c.774C>T' -p93 -(dp94 -g3 -g4 -sg5 -(lp95 -S'A more recent version of the selected reference sequence NM_003240.3 is available (NM_003240.4)' -p96 -aS'NM_003240.4:c.774C>T MUST be fully validated prior to use in reports' -p97 -aS'select_variants=NM_003240.4:c.774C>T' -p98 -asg11 -g4 -sg12 -(lp99 -sg14 -VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 1, mRNA -p100 -sg16 -S'LEFTY2' -p101 -sg18 -(dp102 -g20 -S'NP_003231.2:p.(Thr258=)' -p103 -sg22 -S'NP_003231.2:p.(T258=)' -p104 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_003240.3:c.774C>T' -p105 -sg30 -S'NG_008118.1:g.8453C>T' -p106 -sg31 -(dp107 -g33 -(dp108 -g35 -S'NC_000001.10:g.226125468G>A' -p109 -sg37 -(dp110 -g39 -g40 -sg41 -g42 -sg43 -S'226125468' -p111 -sg45 -g46 -sssg47 -(dp112 -g35 -S'NC_000001.11:g.225937768G>A' -p113 -sg37 -(dp114 -g39 -g40 -sg41 -g42 -sg43 -S'225937768' -p115 -sg45 -g46 -sssg52 -(dp116 -g35 -S'NC_000001.10:g.226125468G>A' -p117 -sg37 -(dp118 -g39 -g56 -sg41 -g42 -sg43 -S'226125468' -p119 -sg45 -g46 -sssg58 -(dp120 -g35 -S'NC_000001.11:g.225937768G>A' -p121 -sg37 -(dp122 -g39 -g56 -sg41 -g42 -sg43 -S'225937768' -p123 -sg45 -g46 -sssssS'NM_001172425.2:c.672C>T' -p124 -(dp125 -g3 -g4 -sg5 -(lp126 -S'RefSeqGene record not available' -p127 -asg11 -g4 -sg12 -(lp128 -sg14 -VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 2, mRNA -p129 -sg16 -S'LEFTY2' -p130 -sg18 -(dp131 -g20 -S'NP_001165896.1:p.(Thr224=)' -p132 -sg22 -S'NP_001165896.1:p.(T224=)' -p133 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001172425.2:c.672C>T' -p134 -sg30 -g4 -sg31 -(dp135 -g33 -(dp136 -g35 -S'NC_000001.10:g.226125468G>A' -p137 -sg37 -(dp138 -g39 -g40 -sg41 -g42 -sg43 -S'226125468' -p139 -sg45 -g46 -sssg47 -(dp140 -g35 -S'NC_000001.11:g.225937768G>A' -p141 -sg37 -(dp142 -g39 -g40 -sg41 -g42 -sg43 -S'225937768' -p143 -sg45 -g46 -sssg52 -(dp144 -g35 -S'NC_000001.10:g.226125468G>A' -p145 -sg37 -(dp146 -g39 -g56 -sg41 -g42 -sg43 -S'226125468' -p147 -sg45 -g46 -sssg58 -(dp148 -g35 -S'NC_000001.11:g.225937768G>A' -p149 -sg37 -(dp150 -g39 -g56 -sg41 -g42 -sg43 -S'225937768' -p151 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant211.txt b/VariantValidator/testing/testOutputsMasterITS/variant211.txt deleted file mode 100644 index 37a4d9cb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant211.txt +++ /dev/null @@ -1,187 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001126049.1:c.-794_-792del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1087t1:c.-794_-792del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000010.10:g.89623035CGCA>C automapped to NC_000010.10:g.89623039_89623041delGCA' -p9 -aS'The current status of LRG_1087 is pending therefore changes may be made to the LRG reference sequence' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'GRCh38' -p16 -(dp17 -S'HGVS_genomic_description' -p18 -S'NW_013171807.1:g.79106_79108del' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG2334_PATCH' -p23 -sS'ref' -p24 -S'CGCA' -p25 -sS'pos' -p26 -S'79102' -p27 -sS'alt' -p28 -S'C' -p29 -sssa(dp30 -S'hg38' -p31 -(dp32 -g18 -S'NW_013171807.1:g.79106_79108del' -p33 -sg20 -(dp34 -g22 -S'NW_013171807.1' -p35 -sg24 -S'CGCA' -p36 -sg26 -S'79102' -p37 -sg28 -g29 -sssasS'transcript_description' -p38 -VHomo sapiens killin, p53 regulated DNA replication inhibitor (KLLN), mRNA -p39 -sS'gene_symbol' -p40 -S'KLLN' -p41 -sS'HGVS_predicted_protein_consequence' -p42 -(dp43 -S'tlr' -p44 -S'NP_001119521.1:p.?' -p45 -sS'slr' -p46 -S'NP_001119521.1:p.?' -p47 -ssS'submitted_variant' -p48 -S'10-89623035-CGCA-C' -p49 -sS'genome_context_intronic_sequence' -p50 -g12 -sS'HGVS_LRG_variant' -p51 -S'LRG_1087:g.5157_5159del' -p52 -sS'HGVS_transcript_variant' -p53 -S'NM_001126049.1:c.-794_-792del' -p54 -sS'HGVS_RefSeqGene_variant' -p55 -S'NG_033079.1:g.5157_5159del' -p56 -sS'primary_assembly_loci' -p57 -(dp58 -S'hg19' -p59 -(dp60 -g18 -S'NC_000010.10:g.89623039_89623041del' -p61 -sg20 -(dp62 -g22 -S'chr10' -p63 -sg24 -S'CGCA' -p64 -sg26 -S'89623035' -p65 -sg28 -g29 -sssg31 -(dp66 -g18 -S'NC_000010.11:g.87863282_87863284del' -p67 -sg20 -(dp68 -g22 -g63 -sg24 -S'CGCA' -p69 -sg26 -S'87863278' -p70 -sg28 -g29 -sssS'GRCh37' -p71 -(dp72 -g18 -S'NC_000010.10:g.89623039_89623041del' -p73 -sg20 -(dp74 -g22 -S'10' -p75 -sg24 -S'CGCA' -p76 -sg26 -S'89623035' -p77 -sg28 -g29 -sssg16 -(dp78 -g18 -S'NC_000010.11:g.87863282_87863284del' -p79 -sg20 -(dp80 -g22 -g75 -sg24 -S'CGCA' -p81 -sg26 -S'87863278' -p82 -sg28 -g29 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant212.txt b/VariantValidator/testing/testOutputsMasterITS/variant212.txt deleted file mode 100644 index 21d97ae2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant212.txt +++ /dev/null @@ -1,660 +0,0 @@ -(dp0 -S'NR_037946.1:n.3896G>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens HNRNPUL2-BSCL2 readthrough (NMD candidate) (HNRNPUL2-BSCL2), long non-coding RNA -p12 -sS'gene_symbol' -p13 -S'HNRNPUL2-BSCL2' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'Non-coding :n.' -p18 -sS'slr' -p19 -g4 -ssS'submitted_variant' -p20 -S'11-62457852-C-A' -p21 -sS'genome_context_intronic_sequence' -p22 -g4 -sS'HGVS_LRG_variant' -p23 -g4 -sS'HGVS_transcript_variant' -p24 -S'NR_037946.1:n.3896G>T' -p25 -sS'HGVS_RefSeqGene_variant' -p26 -g4 -sS'primary_assembly_loci' -p27 -(dp28 -S'GRCh38' -p29 -(dp30 -S'HGVS_genomic_description' -p31 -S'NC_000011.10:g.62690380C>A' -p32 -sS'vcf' -p33 -(dp34 -S'chr' -p35 -S'11' -p36 -sS'ref' -p37 -VC -p38 -sS'pos' -p39 -S'62690380' -p40 -sS'alt' -p41 -VA -p42 -sssS'GRCh37' -p43 -(dp44 -g31 -S'NC_000011.9:g.62457852C>A' -p45 -sg33 -(dp46 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p47 -sg41 -g42 -sssS'hg38' -p48 -(dp49 -g31 -S'NC_000011.10:g.62690380C>A' -p50 -sg33 -(dp51 -g35 -S'chr11' -p52 -sg37 -g38 -sg39 -S'62690380' -p53 -sg41 -g42 -sssS'hg19' -p54 -(dp55 -g31 -S'NC_000011.9:g.62457852C>A' -p56 -sg33 -(dp57 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p58 -sg41 -g42 -sssssS'NM_032667.6:c.1184G>T' -p59 -(dp60 -g3 -S'LRG_235t2:c.1184G>T' -p61 -sg5 -(lp62 -S'RefSeqGene record not available' -p63 -asg8 -g4 -sg9 -(lp64 -sg11 -VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 2, mRNA -p65 -sg13 -S'BSCL2' -p66 -sg15 -(dp67 -g17 -S'NP_116056.3(LRG_235p2):p.(Cys395Phe)' -p68 -sg19 -S'NP_116056.3:p.(C395F)' -p69 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_032667.6:c.1184G>T' -p70 -sg26 -g4 -sg27 -(dp71 -g29 -(dp72 -g31 -S'NC_000011.10:g.62690380C>A' -p73 -sg33 -(dp74 -g35 -g36 -sg37 -g38 -sg39 -S'62690380' -p75 -sg41 -g42 -sssg43 -(dp76 -g31 -S'NC_000011.9:g.62457852C>A' -p77 -sg33 -(dp78 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p79 -sg41 -g42 -sssg48 -(dp80 -g31 -S'NC_000011.10:g.62690380C>A' -p81 -sg33 -(dp82 -g35 -g52 -sg37 -g38 -sg39 -S'62690380' -p83 -sg41 -g42 -sssg54 -(dp84 -g31 -S'NC_000011.9:g.62457852C>A' -p85 -sg33 -(dp86 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p87 -sg41 -g42 -sssssS'NR_037949.1:n.1984G>T' -p88 -(dp89 -g3 -g4 -sg5 -(lp90 -S'RefSeqGene record not available' -p91 -asg8 -g4 -sg9 -(lp92 -sg11 -VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 5, non-coding RNA -p93 -sg13 -S'BSCL2' -p94 -sg15 -(dp95 -g17 -S'Non-coding :n.' -p96 -sg19 -g4 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NR_037949.1:n.1984G>T' -p97 -sg26 -g4 -sg27 -(dp98 -g29 -(dp99 -g31 -S'NC_000011.10:g.62690380C>A' -p100 -sg33 -(dp101 -g35 -g36 -sg37 -g38 -sg39 -S'62690380' -p102 -sg41 -g42 -sssg43 -(dp103 -g31 -S'NC_000011.9:g.62457852C>A' -p104 -sg33 -(dp105 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p106 -sg41 -g42 -sssg48 -(dp107 -g31 -S'NC_000011.10:g.62690380C>A' -p108 -sg33 -(dp109 -g35 -g52 -sg37 -g38 -sg39 -S'62690380' -p110 -sg41 -g42 -sssg54 -(dp111 -g31 -S'NC_000011.9:g.62457852C>A' -p112 -sg33 -(dp113 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p114 -sg41 -g42 -sssssS'NR_037948.1:n.1978G>T' -p115 -(dp116 -g3 -g4 -sg5 -(lp117 -S'RefSeqGene record not available' -p118 -asg8 -g4 -sg9 -(lp119 -sg11 -VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 4, non-coding RNA -p120 -sg13 -S'BSCL2' -p121 -sg15 -(dp122 -g17 -S'Non-coding :n.' -p123 -sg19 -g4 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NR_037948.1:n.1978G>T' -p124 -sg26 -g4 -sg27 -(dp125 -g29 -(dp126 -g31 -S'NC_000011.10:g.62690380C>A' -p127 -sg33 -(dp128 -g35 -g36 -sg37 -g38 -sg39 -S'62690380' -p129 -sg41 -g42 -sssg43 -(dp130 -g31 -S'NC_000011.9:g.62457852C>A' -p131 -sg33 -(dp132 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p133 -sg41 -g42 -sssg48 -(dp134 -g31 -S'NC_000011.10:g.62690380C>A' -p135 -sg33 -(dp136 -g35 -g52 -sg37 -g38 -sg39 -S'62690380' -p137 -sg41 -g42 -sssg54 -(dp138 -g31 -S'NC_000011.9:g.62457852C>A' -p139 -sg33 -(dp140 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p141 -sg41 -g42 -sssssS'NM_001122955.3:c.1376G>T' -p142 -(dp143 -g3 -S'LRG_235t1:c.1376G>T' -p144 -sg5 -(lp145 -sg8 -g4 -sg9 -(lp146 -sg11 -VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 1, mRNA -p147 -sg13 -S'BSCL2' -p148 -sg15 -(dp149 -g17 -S'NP_001116427.1:p.(Cys459Phe)' -p150 -sg19 -S'NP_001116427.1:p.(C459F)' -p151 -ssg20 -g21 -sg22 -g4 -sg23 -S'LRG_235:g.24195G>T' -p152 -sg24 -S'NM_001122955.3:c.1376G>T' -p153 -sg26 -S'NG_008461.1:g.24195G>T' -p154 -sg27 -(dp155 -g29 -(dp156 -g31 -S'NC_000011.10:g.62690380C>A' -p157 -sg33 -(dp158 -g35 -g36 -sg37 -g38 -sg39 -S'62690380' -p159 -sg41 -g42 -sssg43 -(dp160 -g31 -S'NC_000011.9:g.62457852C>A' -p161 -sg33 -(dp162 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p163 -sg41 -g42 -sssg48 -(dp164 -g31 -S'NC_000011.10:g.62690380C>A' -p165 -sg33 -(dp166 -g35 -g52 -sg37 -g38 -sg39 -S'62690380' -p167 -sg41 -g42 -sssg54 -(dp168 -g31 -S'NC_000011.9:g.62457852C>A' -p169 -sg33 -(dp170 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p171 -sg41 -g42 -sssssS'flag' -p172 -S'gene_variant' -p173 -sS'NM_001130702.2:c.*178G>T' -p174 -(dp175 -g3 -g4 -sg5 -(lp176 -S'RefSeqGene record not available' -p177 -asg8 -g4 -sg9 -(lp178 -sg11 -VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 3, mRNA -p179 -sg13 -S'BSCL2' -p180 -sg15 -(dp181 -g17 -S'NP_001124174.2:p.?' -p182 -sg19 -S'NP_001124174.2:p.?' -p183 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001130702.2:c.*178G>T' -p184 -sg26 -g4 -sg27 -(dp185 -g29 -(dp186 -g31 -S'NC_000011.10:g.62690380C>A' -p187 -sg33 -(dp188 -g35 -g36 -sg37 -g38 -sg39 -S'62690380' -p189 -sg41 -g42 -sssg43 -(dp190 -g31 -S'NC_000011.9:g.62457852C>A' -p191 -sg33 -(dp192 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p193 -sg41 -g42 -sssg48 -(dp194 -g31 -S'NC_000011.10:g.62690380C>A' -p195 -sg33 -(dp196 -g35 -g52 -sg37 -g38 -sg39 -S'62690380' -p197 -sg41 -g42 -sssg54 -(dp198 -g31 -S'NC_000011.9:g.62457852C>A' -p199 -sg33 -(dp200 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p201 -sg41 -g42 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant213.txt b/VariantValidator/testing/testOutputsMasterITS/variant213.txt deleted file mode 100644 index 0f5ef1a4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant213.txt +++ /dev/null @@ -1,257 +0,0 @@ -(dp0 -S'NM_001351834.1:c.5761_5762insT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000011.9:g.108178710A>AT automapped to NC_000011.9:g.108178710_108178711insT' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens ATM serine/threonine kinase (ATM), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'ATM' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001338763.1:p.(Arg1921MetfsTer9)' -p19 -sS'slr' -p20 -S'NP_001338763.1:p.(R1921Mfs*9)' -p21 -ssS'submitted_variant' -p22 -S'11-108178710-A-AT' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001351834.1:c.5761_5762insT' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'GRCh38' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000011.10:g.108307983_108307984insT' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'11' -p38 -sS'ref' -p39 -S'A' -p40 -sS'pos' -p41 -S'108307983' -p42 -sS'alt' -p43 -S'AT' -p44 -sssS'GRCh37' -p45 -(dp46 -g33 -S'NC_000011.9:g.108178710_108178711insT' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'108178710' -p49 -sg43 -S'AT' -p50 -sssS'hg38' -p51 -(dp52 -g33 -S'NC_000011.10:g.108307983_108307984insT' -p53 -sg35 -(dp54 -g37 -S'chr11' -p55 -sg39 -g40 -sg41 -S'108307983' -p56 -sg43 -S'AT' -p57 -sssS'hg19' -p58 -(dp59 -g33 -S'NC_000011.9:g.108178710_108178711insT' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'108178710' -p62 -sg43 -S'AT' -p63 -sssssS'flag' -p64 -S'gene_variant' -p65 -sS'NM_000051.3:c.5761_5762insT' -p66 -(dp67 -g3 -S'LRG_135t1:c.5761_5762insT' -p68 -sg5 -(lp69 -S'NC_000011.9:g.108178710A>AT automapped to NC_000011.9:g.108178710_108178711insT' -p70 -asg9 -g4 -sg10 -(lp71 -sg12 -VHomo sapiens ATM serine/threonine kinase (ATM), transcript variant 2, mRNA -p72 -sg14 -S'ATM' -p73 -sg16 -(dp74 -g18 -S'NP_000042.3(LRG_135p1):p.(Arg1921MetfsTer9)' -p75 -sg20 -S'NP_000042.3:p.(R1921Mfs*9)' -p76 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_135:g.90152_90153insT' -p77 -sg26 -S'NM_000051.3:c.5761_5762insT' -p78 -sg28 -S'NG_009830.1:g.90152_90153insT' -p79 -sg29 -(dp80 -g31 -(dp81 -g33 -S'NC_000011.10:g.108307983_108307984insT' -p82 -sg35 -(dp83 -g37 -g38 -sg39 -g40 -sg41 -S'108307983' -p84 -sg43 -S'AT' -p85 -sssg45 -(dp86 -g33 -S'NC_000011.9:g.108178710_108178711insT' -p87 -sg35 -(dp88 -g37 -g38 -sg39 -g40 -sg41 -S'108178710' -p89 -sg43 -S'AT' -p90 -sssg51 -(dp91 -g33 -S'NC_000011.10:g.108307983_108307984insT' -p92 -sg35 -(dp93 -g37 -g55 -sg39 -g40 -sg41 -S'108307983' -p94 -sg43 -S'AT' -p95 -sssg58 -(dp96 -g33 -S'NC_000011.9:g.108178710_108178711insT' -p97 -sg35 -(dp98 -g37 -g55 -sg39 -g40 -sg41 -S'108178710' -p99 -sg43 -S'AT' -p100 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant214.txt b/VariantValidator/testing/testOutputsMasterITS/variant214.txt deleted file mode 100644 index 3e114696..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant214.txt +++ /dev/null @@ -1,2477 +0,0 @@ -(dp0 -S'NM_001352415.1:c.-108-7C>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -(dp11 -S'GRCh37' -p12 -(dp13 -S'HGVS_genomic_description' -p14 -S'NW_003871080.1:g.117249G>A' -p15 -sS'vcf' -p16 -(dp17 -S'chr' -p18 -S'HG388_HG400_PATCH' -p19 -sS'ref' -p20 -VG -p21 -sS'pos' -p22 -S'117249' -p23 -sS'alt' -p24 -VA -p25 -sssasS'transcript_description' -p26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 11, mRNA -p27 -sS'gene_symbol' -p28 -S'ALG9' -p29 -sS'HGVS_predicted_protein_consequence' -p30 -(dp31 -S'tlr' -p32 -S'NP_001339344.1:p.?' -p33 -sS'slr' -p34 -S'NP_001339344.1:p.?' -p35 -ssS'submitted_variant' -p36 -S'11-111735981-G-A' -p37 -sS'genome_context_intronic_sequence' -p38 -S'NC_000011.9(NM_001352415.1):c.-108-7C>T' -p39 -sS'HGVS_LRG_variant' -p40 -g4 -sS'HGVS_transcript_variant' -p41 -S'NM_001352415.1:c.-108-7C>T' -p42 -sS'HGVS_RefSeqGene_variant' -p43 -g4 -sS'primary_assembly_loci' -p44 -(dp45 -S'GRCh38' -p46 -(dp47 -g14 -S'NC_000011.10:g.111865258G>A' -p48 -sg16 -(dp49 -g18 -S'11' -p50 -sg20 -g21 -sg22 -S'111865258' -p51 -sg24 -g25 -sssg12 -(dp52 -g14 -S'NC_000011.9:g.111735981G>A' -p53 -sg16 -(dp54 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p55 -sg24 -g25 -sssS'hg38' -p56 -(dp57 -g14 -S'NC_000011.10:g.111865258G>A' -p58 -sg16 -(dp59 -g18 -S'chr11' -p60 -sg20 -g21 -sg22 -S'111865258' -p61 -sg24 -g25 -sssS'hg19' -p62 -(dp63 -g14 -S'NC_000011.9:g.111735981G>A' -p64 -sg16 -(dp65 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p66 -sg24 -g25 -sssssS'NM_001352410.1:c.-108-7C>T' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'RefSeqGene record not available' -p70 -asg8 -g4 -sg9 -(lp71 -(dp72 -g12 -(dp73 -g14 -S'NW_003871080.1:g.117249G>A' -p74 -sg16 -(dp75 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p76 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 6, mRNA -p77 -sg28 -S'ALG9' -p78 -sg30 -(dp79 -g32 -S'NP_001339339.1:p.?' -p80 -sg34 -S'NP_001339339.1:p.?' -p81 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352410.1):c.-108-7C>T' -p82 -sg40 -g4 -sg41 -S'NM_001352410.1:c.-108-7C>T' -p83 -sg43 -g4 -sg44 -(dp84 -g46 -(dp85 -g14 -S'NC_000011.10:g.111865258G>A' -p86 -sg16 -(dp87 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p88 -sg24 -g25 -sssg12 -(dp89 -g14 -S'NC_000011.9:g.111735981G>A' -p90 -sg16 -(dp91 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p92 -sg24 -g25 -sssg56 -(dp93 -g14 -S'NC_000011.10:g.111865258G>A' -p94 -sg16 -(dp95 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p96 -sg24 -g25 -sssg62 -(dp97 -g14 -S'NC_000011.9:g.111735981G>A' -p98 -sg16 -(dp99 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p100 -sg24 -g25 -sssssS'NM_024740.2:c.406-7C>T' -p101 -(dp102 -g3 -g4 -sg5 -(lp103 -sg8 -S'NG_009210.1(NM_024740.2):c.406-7C>T' -p104 -sg9 -(lp105 -(dp106 -g12 -(dp107 -g14 -S'NW_003871080.1:g.117249G>A' -p108 -sg16 -(dp109 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p110 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 1, mRNA -p111 -sg28 -S'ALG9' -p112 -sg30 -(dp113 -g32 -S'NP_079016.2:p.?' -p114 -sg34 -S'NP_079016.2:p.?' -p115 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_024740.2):c.406-7C>T' -p116 -sg40 -g4 -sg41 -S'NM_024740.2:c.406-7C>T' -p117 -sg43 -S'NG_009210.1:g.11324C>T' -p118 -sg44 -(dp119 -g46 -(dp120 -g14 -S'NC_000011.10:g.111865258G>A' -p121 -sg16 -(dp122 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p123 -sg24 -g25 -sssg12 -(dp124 -g14 -S'NC_000011.9:g.111735981G>A' -p125 -sg16 -(dp126 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p127 -sg24 -g25 -sssg56 -(dp128 -g14 -S'NC_000011.10:g.111865258G>A' -p129 -sg16 -(dp130 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p131 -sg24 -g25 -sssg62 -(dp132 -g14 -S'NC_000011.9:g.111735981G>A' -p133 -sg16 -(dp134 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p135 -sg24 -g25 -sssssS'NM_001352418.1:c.406-7C>T' -p136 -(dp137 -g3 -g4 -sg5 -(lp138 -S'RefSeqGene record not available' -p139 -asg8 -g4 -sg9 -(lp140 -(dp141 -g12 -(dp142 -g14 -S'NW_003871080.1:g.117249G>A' -p143 -sg16 -(dp144 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p145 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 14, mRNA -p146 -sg28 -S'ALG9' -p147 -sg30 -(dp148 -g32 -S'NP_001339347.1:p.?' -p149 -sg34 -S'NP_001339347.1:p.?' -p150 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352418.1):c.406-7C>T' -p151 -sg40 -g4 -sg41 -S'NM_001352418.1:c.406-7C>T' -p152 -sg43 -g4 -sg44 -(dp153 -g46 -(dp154 -g14 -S'NC_000011.10:g.111865258G>A' -p155 -sg16 -(dp156 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p157 -sg24 -g25 -sssg12 -(dp158 -g14 -S'NC_000011.9:g.111735981G>A' -p159 -sg16 -(dp160 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p161 -sg24 -g25 -sssg56 -(dp162 -g14 -S'NC_000011.10:g.111865258G>A' -p163 -sg16 -(dp164 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p165 -sg24 -g25 -sssg62 -(dp166 -g14 -S'NC_000011.9:g.111735981G>A' -p167 -sg16 -(dp168 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p169 -sg24 -g25 -sssssS'NM_001352421.1:c.-108-7C>T' -p170 -(dp171 -g3 -g4 -sg5 -(lp172 -S'RefSeqGene record not available' -p173 -asg8 -g4 -sg9 -(lp174 -(dp175 -g12 -(dp176 -g14 -S'NW_003871080.1:g.117249G>A' -p177 -sg16 -(dp178 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p179 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 17, mRNA -p180 -sg28 -S'ALG9' -p181 -sg30 -(dp182 -g32 -S'NP_001339350.1:p.?' -p183 -sg34 -S'NP_001339350.1:p.?' -p184 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352421.1):c.-108-7C>T' -p185 -sg40 -g4 -sg41 -S'NM_001352421.1:c.-108-7C>T' -p186 -sg43 -g4 -sg44 -(dp187 -g46 -(dp188 -g14 -S'NC_000011.10:g.111865258G>A' -p189 -sg16 -(dp190 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p191 -sg24 -g25 -sssg12 -(dp192 -g14 -S'NC_000011.9:g.111735981G>A' -p193 -sg16 -(dp194 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p195 -sg24 -g25 -sssg56 -(dp196 -g14 -S'NC_000011.10:g.111865258G>A' -p197 -sg16 -(dp198 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p199 -sg24 -g25 -sssg62 -(dp200 -g14 -S'NC_000011.9:g.111735981G>A' -p201 -sg16 -(dp202 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p203 -sg24 -g25 -sssssS'NM_001352417.1:c.406-7C>T' -p204 -(dp205 -g3 -g4 -sg5 -(lp206 -S'RefSeqGene record not available' -p207 -asg8 -g4 -sg9 -(lp208 -(dp209 -g12 -(dp210 -g14 -S'NW_003871080.1:g.117249G>A' -p211 -sg16 -(dp212 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p213 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 13, mRNA -p214 -sg28 -S'ALG9' -p215 -sg30 -(dp216 -g32 -S'NP_001339346.1:p.?' -p217 -sg34 -S'NP_001339346.1:p.?' -p218 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352417.1):c.406-7C>T' -p219 -sg40 -g4 -sg41 -S'NM_001352417.1:c.406-7C>T' -p220 -sg43 -g4 -sg44 -(dp221 -g46 -(dp222 -g14 -S'NC_000011.10:g.111865258G>A' -p223 -sg16 -(dp224 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p225 -sg24 -g25 -sssg12 -(dp226 -g14 -S'NC_000011.9:g.111735981G>A' -p227 -sg16 -(dp228 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p229 -sg24 -g25 -sssg56 -(dp230 -g14 -S'NC_000011.10:g.111865258G>A' -p231 -sg16 -(dp232 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p233 -sg24 -g25 -sssg62 -(dp234 -g14 -S'NC_000011.9:g.111735981G>A' -p235 -sg16 -(dp236 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p237 -sg24 -g25 -sssssS'NM_001352419.1:c.-108-7C>T' -p238 -(dp239 -g3 -g4 -sg5 -(lp240 -S'RefSeqGene record not available' -p241 -asg8 -g4 -sg9 -(lp242 -(dp243 -g12 -(dp244 -g14 -S'NW_003871080.1:g.117249G>A' -p245 -sg16 -(dp246 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p247 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 15, mRNA -p248 -sg28 -S'ALG9' -p249 -sg30 -(dp250 -g32 -S'NP_001339348.1:p.?' -p251 -sg34 -S'NP_001339348.1:p.?' -p252 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352419.1):c.-108-7C>T' -p253 -sg40 -g4 -sg41 -S'NM_001352419.1:c.-108-7C>T' -p254 -sg43 -g4 -sg44 -(dp255 -g46 -(dp256 -g14 -S'NC_000011.10:g.111865258G>A' -p257 -sg16 -(dp258 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p259 -sg24 -g25 -sssg12 -(dp260 -g14 -S'NC_000011.9:g.111735981G>A' -p261 -sg16 -(dp262 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p263 -sg24 -g25 -sssg56 -(dp264 -g14 -S'NC_000011.10:g.111865258G>A' -p265 -sg16 -(dp266 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p267 -sg24 -g25 -sssg62 -(dp268 -g14 -S'NC_000011.9:g.111735981G>A' -p269 -sg16 -(dp270 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p271 -sg24 -g25 -sssssS'NM_001352414.1:c.-108-7C>T' -p272 -(dp273 -g3 -g4 -sg5 -(lp274 -S'RefSeqGene record not available' -p275 -asg8 -g4 -sg9 -(lp276 -(dp277 -g12 -(dp278 -g14 -S'NW_003871080.1:g.117249G>A' -p279 -sg16 -(dp280 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p281 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 10, mRNA -p282 -sg28 -S'ALG9' -p283 -sg30 -(dp284 -g32 -S'NP_001339343.1:p.?' -p285 -sg34 -S'NP_001339343.1:p.?' -p286 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352414.1):c.-108-7C>T' -p287 -sg40 -g4 -sg41 -S'NM_001352414.1:c.-108-7C>T' -p288 -sg43 -g4 -sg44 -(dp289 -g46 -(dp290 -g14 -S'NC_000011.10:g.111865258G>A' -p291 -sg16 -(dp292 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p293 -sg24 -g25 -sssg12 -(dp294 -g14 -S'NC_000011.9:g.111735981G>A' -p295 -sg16 -(dp296 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p297 -sg24 -g25 -sssg56 -(dp298 -g14 -S'NC_000011.10:g.111865258G>A' -p299 -sg16 -(dp300 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p301 -sg24 -g25 -sssg62 -(dp302 -g14 -S'NC_000011.9:g.111735981G>A' -p303 -sg16 -(dp304 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p305 -sg24 -g25 -sssssS'NM_001352420.1:c.-108-7C>T' -p306 -(dp307 -g3 -g4 -sg5 -(lp308 -S'RefSeqGene record not available' -p309 -asg8 -g4 -sg9 -(lp310 -(dp311 -g12 -(dp312 -g14 -S'NW_003871080.1:g.117249G>A' -p313 -sg16 -(dp314 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p315 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 16, mRNA -p316 -sg28 -S'ALG9' -p317 -sg30 -(dp318 -g32 -S'NP_001339349.1:p.?' -p319 -sg34 -S'NP_001339349.1:p.?' -p320 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352420.1):c.-108-7C>T' -p321 -sg40 -g4 -sg41 -S'NM_001352420.1:c.-108-7C>T' -p322 -sg43 -g4 -sg44 -(dp323 -g46 -(dp324 -g14 -S'NC_000011.10:g.111865258G>A' -p325 -sg16 -(dp326 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p327 -sg24 -g25 -sssg12 -(dp328 -g14 -S'NC_000011.9:g.111735981G>A' -p329 -sg16 -(dp330 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p331 -sg24 -g25 -sssg56 -(dp332 -g14 -S'NC_000011.10:g.111865258G>A' -p333 -sg16 -(dp334 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p335 -sg24 -g25 -sssg62 -(dp336 -g14 -S'NC_000011.9:g.111735981G>A' -p337 -sg16 -(dp338 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p339 -sg24 -g25 -sssssS'NM_001352411.1:c.-108-7C>T' -p340 -(dp341 -g3 -g4 -sg5 -(lp342 -S'RefSeqGene record not available' -p343 -asg8 -g4 -sg9 -(lp344 -(dp345 -g12 -(dp346 -g14 -S'NW_003871080.1:g.117249G>A' -p347 -sg16 -(dp348 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p349 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 7, mRNA -p350 -sg28 -S'ALG9' -p351 -sg30 -(dp352 -g32 -S'NP_001339340.1:p.?' -p353 -sg34 -S'NP_001339340.1:p.?' -p354 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352411.1):c.-108-7C>T' -p355 -sg40 -g4 -sg41 -S'NM_001352411.1:c.-108-7C>T' -p356 -sg43 -g4 -sg44 -(dp357 -g46 -(dp358 -g14 -S'NC_000011.10:g.111865258G>A' -p359 -sg16 -(dp360 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p361 -sg24 -g25 -sssg12 -(dp362 -g14 -S'NC_000011.9:g.111735981G>A' -p363 -sg16 -(dp364 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p365 -sg24 -g25 -sssg56 -(dp366 -g14 -S'NC_000011.10:g.111865258G>A' -p367 -sg16 -(dp368 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p369 -sg24 -g25 -sssg62 -(dp370 -g14 -S'NC_000011.9:g.111735981G>A' -p371 -sg16 -(dp372 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p373 -sg24 -g25 -sssssS'NM_001077692.1:c.-108-7C>T' -p374 -(dp375 -g3 -g4 -sg5 -(lp376 -S'RefSeqGene record not available' -p377 -asg8 -g4 -sg9 -(lp378 -(dp379 -g12 -(dp380 -g14 -S'NW_003871080.1:g.117249G>A' -p381 -sg16 -(dp382 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p383 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 4, mRNA -p384 -sg28 -S'ALG9' -p385 -sg30 -(dp386 -g32 -S'NP_001071160.1:p.?' -p387 -sg34 -S'NP_001071160.1:p.?' -p388 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001077692.1):c.-108-7C>T' -p389 -sg40 -g4 -sg41 -S'NM_001077692.1:c.-108-7C>T' -p390 -sg43 -g4 -sg44 -(dp391 -g46 -(dp392 -g14 -S'NC_000011.10:g.111865258G>A' -p393 -sg16 -(dp394 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p395 -sg24 -g25 -sssg12 -(dp396 -g14 -S'NC_000011.9:g.111735981G>A' -p397 -sg16 -(dp398 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p399 -sg24 -g25 -sssg56 -(dp400 -g14 -S'NC_000011.10:g.111865258G>A' -p401 -sg16 -(dp402 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p403 -sg24 -g25 -sssg62 -(dp404 -g14 -S'NC_000011.9:g.111735981G>A' -p405 -sg16 -(dp406 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p407 -sg24 -g25 -sssssS'flag' -p408 -S'gene_variant' -p409 -sS'NR_147984.1:n.782-7C>T' -p410 -(dp411 -g3 -g4 -sg5 -(lp412 -S'RefSeqGene record not available' -p413 -asg8 -g4 -sg9 -(lp414 -(dp415 -g12 -(dp416 -g14 -S'NW_003871080.1:g.117249G>A' -p417 -sg16 -(dp418 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p419 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 20, non-coding RNA -p420 -sg28 -S'ALG9' -p421 -sg30 -(dp422 -g32 -S'Non-coding :n.' -p423 -sg34 -g4 -ssg36 -g37 -sg38 -S'NC_000011.9(NR_147984.1):c.782-7C>T' -p424 -sg40 -g4 -sg41 -S'NR_147984.1:n.782-7C>T' -p425 -sg43 -g4 -sg44 -(dp426 -g46 -(dp427 -g14 -S'NC_000011.10:g.111865258G>A' -p428 -sg16 -(dp429 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p430 -sg24 -g25 -sssg12 -(dp431 -g14 -S'NC_000011.9:g.111735981G>A' -p432 -sg16 -(dp433 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p434 -sg24 -g25 -sssg56 -(dp435 -g14 -S'NC_000011.10:g.111865258G>A' -p436 -sg16 -(dp437 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p438 -sg24 -g25 -sssg62 -(dp439 -g14 -S'NC_000011.9:g.111735981G>A' -p440 -sg16 -(dp441 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p442 -sg24 -g25 -sssssS'NM_001352409.1:c.-108-7C>T' -p443 -(dp444 -g3 -g4 -sg5 -(lp445 -S'RefSeqGene record not available' -p446 -asg8 -g4 -sg9 -(lp447 -(dp448 -g12 -(dp449 -g14 -S'NW_003871080.1:g.117249G>A' -p450 -sg16 -(dp451 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p452 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 5, mRNA -p453 -sg28 -S'ALG9' -p454 -sg30 -(dp455 -g32 -S'NP_001339338.1:p.?' -p456 -sg34 -S'NP_001339338.1:p.?' -p457 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352409.1):c.-108-7C>T' -p458 -sg40 -g4 -sg41 -S'NM_001352409.1:c.-108-7C>T' -p459 -sg43 -g4 -sg44 -(dp460 -g46 -(dp461 -g14 -S'NC_000011.10:g.111865258G>A' -p462 -sg16 -(dp463 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p464 -sg24 -g25 -sssg12 -(dp465 -g14 -S'NC_000011.9:g.111735981G>A' -p466 -sg16 -(dp467 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p468 -sg24 -g25 -sssg56 -(dp469 -g14 -S'NC_000011.10:g.111865258G>A' -p470 -sg16 -(dp471 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p472 -sg24 -g25 -sssg62 -(dp473 -g14 -S'NC_000011.9:g.111735981G>A' -p474 -sg16 -(dp475 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p476 -sg24 -g25 -sssssS'NM_001352412.1:c.-108-7C>T' -p477 -(dp478 -g3 -g4 -sg5 -(lp479 -S'RefSeqGene record not available' -p480 -asg8 -g4 -sg9 -(lp481 -(dp482 -g12 -(dp483 -g14 -S'NW_003871080.1:g.117249G>A' -p484 -sg16 -(dp485 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p486 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 8, mRNA -p487 -sg28 -S'ALG9' -p488 -sg30 -(dp489 -g32 -S'NP_001339341.1:p.?' -p490 -sg34 -S'NP_001339341.1:p.?' -p491 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352412.1):c.-108-7C>T' -p492 -sg40 -g4 -sg41 -S'NM_001352412.1:c.-108-7C>T' -p493 -sg43 -g4 -sg44 -(dp494 -g46 -(dp495 -g14 -S'NC_000011.10:g.111865258G>A' -p496 -sg16 -(dp497 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p498 -sg24 -g25 -sssg12 -(dp499 -g14 -S'NC_000011.9:g.111735981G>A' -p500 -sg16 -(dp501 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p502 -sg24 -g25 -sssg56 -(dp503 -g14 -S'NC_000011.10:g.111865258G>A' -p504 -sg16 -(dp505 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p506 -sg24 -g25 -sssg62 -(dp507 -g14 -S'NC_000011.9:g.111735981G>A' -p508 -sg16 -(dp509 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p510 -sg24 -g25 -sssssS'NM_001077691.1:c.-108-7C>T' -p511 -(dp512 -g3 -g4 -sg5 -(lp513 -S'RefSeqGene record not available' -p514 -asg8 -g4 -sg9 -(lp515 -(dp516 -g12 -(dp517 -g14 -S'NW_003871080.1:g.117249G>A' -p518 -sg16 -(dp519 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p520 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 3, mRNA -p521 -sg28 -S'ALG9' -p522 -sg30 -(dp523 -g32 -S'NP_001071159.1:p.?' -p524 -sg34 -S'NP_001071159.1:p.?' -p525 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001077691.1):c.-108-7C>T' -p526 -sg40 -g4 -sg41 -S'NM_001077691.1:c.-108-7C>T' -p527 -sg43 -g4 -sg44 -(dp528 -g46 -(dp529 -g14 -S'NC_000011.10:g.111865258G>A' -p530 -sg16 -(dp531 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p532 -sg24 -g25 -sssg12 -(dp533 -g14 -S'NC_000011.9:g.111735981G>A' -p534 -sg16 -(dp535 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p536 -sg24 -g25 -sssg56 -(dp537 -g14 -S'NC_000011.10:g.111865258G>A' -p538 -sg16 -(dp539 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p540 -sg24 -g25 -sssg62 -(dp541 -g14 -S'NC_000011.9:g.111735981G>A' -p542 -sg16 -(dp543 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p544 -sg24 -g25 -sssssS'NM_001352422.1:c.-326-7C>T' -p545 -(dp546 -g3 -g4 -sg5 -(lp547 -S'RefSeqGene record not available' -p548 -asg8 -g4 -sg9 -(lp549 -(dp550 -g12 -(dp551 -g14 -S'NW_003871080.1:g.117249G>A' -p552 -sg16 -(dp553 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p554 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 18, mRNA -p555 -sg28 -S'ALG9' -p556 -sg30 -(dp557 -g32 -S'NP_001339351.1:p.?' -p558 -sg34 -S'NP_001339351.1:p.?' -p559 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352422.1):c.-326-7C>T' -p560 -sg40 -g4 -sg41 -S'NM_001352422.1:c.-326-7C>T' -p561 -sg43 -g4 -sg44 -(dp562 -g46 -(dp563 -g14 -S'NC_000011.10:g.111865258G>A' -p564 -sg16 -(dp565 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p566 -sg24 -g25 -sssg12 -(dp567 -g14 -S'NC_000011.9:g.111735981G>A' -p568 -sg16 -(dp569 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p570 -sg24 -g25 -sssg56 -(dp571 -g14 -S'NC_000011.10:g.111865258G>A' -p572 -sg16 -(dp573 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p574 -sg24 -g25 -sssg62 -(dp575 -g14 -S'NC_000011.9:g.111735981G>A' -p576 -sg16 -(dp577 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p578 -sg24 -g25 -sssssS'NM_001077690.1:c.406-7C>T' -p579 -(dp580 -g3 -g4 -sg5 -(lp581 -S'RefSeqGene record not available' -p582 -asg8 -g4 -sg9 -(lp583 -(dp584 -g12 -(dp585 -g14 -S'NW_003871080.1:g.117249G>A' -p586 -sg16 -(dp587 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p588 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 2, mRNA -p589 -sg28 -S'ALG9' -p590 -sg30 -(dp591 -g32 -S'NP_001071158.1:p.?' -p592 -sg34 -S'NP_001071158.1:p.?' -p593 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001077690.1):c.406-7C>T' -p594 -sg40 -g4 -sg41 -S'NM_001077690.1:c.406-7C>T' -p595 -sg43 -g4 -sg44 -(dp596 -g46 -(dp597 -g14 -S'NC_000011.10:g.111865258G>A' -p598 -sg16 -(dp599 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p600 -sg24 -g25 -sssg12 -(dp601 -g14 -S'NC_000011.9:g.111735981G>A' -p602 -sg16 -(dp603 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p604 -sg24 -g25 -sssg56 -(dp605 -g14 -S'NC_000011.10:g.111865258G>A' -p606 -sg16 -(dp607 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p608 -sg24 -g25 -sssg62 -(dp609 -g14 -S'NC_000011.9:g.111735981G>A' -p610 -sg16 -(dp611 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p612 -sg24 -g25 -sssssS'NM_001352413.1:c.-108-7C>T' -p613 -(dp614 -g3 -g4 -sg5 -(lp615 -S'RefSeqGene record not available' -p616 -asg8 -g4 -sg9 -(lp617 -(dp618 -g12 -(dp619 -g14 -S'NW_003871080.1:g.117249G>A' -p620 -sg16 -(dp621 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p622 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 9, mRNA -p623 -sg28 -S'ALG9' -p624 -sg30 -(dp625 -g32 -S'NP_001339342.1:p.?' -p626 -sg34 -S'NP_001339342.1:p.?' -p627 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352413.1):c.-108-7C>T' -p628 -sg40 -g4 -sg41 -S'NM_001352413.1:c.-108-7C>T' -p629 -sg43 -g4 -sg44 -(dp630 -g46 -(dp631 -g14 -S'NC_000011.10:g.111865258G>A' -p632 -sg16 -(dp633 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p634 -sg24 -g25 -sssg12 -(dp635 -g14 -S'NC_000011.9:g.111735981G>A' -p636 -sg16 -(dp637 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p638 -sg24 -g25 -sssg56 -(dp639 -g14 -S'NC_000011.10:g.111865258G>A' -p640 -sg16 -(dp641 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p642 -sg24 -g25 -sssg62 -(dp643 -g14 -S'NC_000011.9:g.111735981G>A' -p644 -sg16 -(dp645 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p646 -sg24 -g25 -sssssS'NM_001352423.1:c.-108-7C>T' -p647 -(dp648 -g3 -g4 -sg5 -(lp649 -S'RefSeqGene record not available' -p650 -asg8 -g4 -sg9 -(lp651 -(dp652 -g12 -(dp653 -g14 -S'NW_003871080.1:g.117249G>A' -p654 -sg16 -(dp655 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p656 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 19, mRNA -p657 -sg28 -S'ALG9' -p658 -sg30 -(dp659 -g32 -S'NP_001339352.1:p.?' -p660 -sg34 -S'NP_001339352.1:p.?' -p661 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352423.1):c.-108-7C>T' -p662 -sg40 -g4 -sg41 -S'NM_001352423.1:c.-108-7C>T' -p663 -sg43 -g4 -sg44 -(dp664 -g46 -(dp665 -g14 -S'NC_000011.10:g.111865258G>A' -p666 -sg16 -(dp667 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p668 -sg24 -g25 -sssg12 -(dp669 -g14 -S'NC_000011.9:g.111735981G>A' -p670 -sg16 -(dp671 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p672 -sg24 -g25 -sssg56 -(dp673 -g14 -S'NC_000011.10:g.111865258G>A' -p674 -sg16 -(dp675 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p676 -sg24 -g25 -sssg62 -(dp677 -g14 -S'NC_000011.9:g.111735981G>A' -p678 -sg16 -(dp679 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p680 -sg24 -g25 -sssssS'NM_001352416.1:c.-108-7C>T' -p681 -(dp682 -g3 -g4 -sg5 -(lp683 -S'RefSeqGene record not available' -p684 -asg8 -g4 -sg9 -(lp685 -(dp686 -g12 -(dp687 -g14 -S'NW_003871080.1:g.117249G>A' -p688 -sg16 -(dp689 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p690 -sg24 -g25 -sssasg26 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 12, mRNA -p691 -sg28 -S'ALG9' -p692 -sg30 -(dp693 -g32 -S'NP_001339345.1:p.?' -p694 -sg34 -S'NP_001339345.1:p.?' -p695 -ssg36 -g37 -sg38 -S'NC_000011.9(NM_001352416.1):c.-108-7C>T' -p696 -sg40 -g4 -sg41 -S'NM_001352416.1:c.-108-7C>T' -p697 -sg43 -g4 -sg44 -(dp698 -g46 -(dp699 -g14 -S'NC_000011.10:g.111865258G>A' -p700 -sg16 -(dp701 -g18 -g50 -sg20 -g21 -sg22 -S'111865258' -p702 -sg24 -g25 -sssg12 -(dp703 -g14 -S'NC_000011.9:g.111735981G>A' -p704 -sg16 -(dp705 -g18 -g50 -sg20 -g21 -sg22 -S'111735981' -p706 -sg24 -g25 -sssg56 -(dp707 -g14 -S'NC_000011.10:g.111865258G>A' -p708 -sg16 -(dp709 -g18 -g60 -sg20 -g21 -sg22 -S'111865258' -p710 -sg24 -g25 -sssg62 -(dp711 -g14 -S'NC_000011.9:g.111735981G>A' -p712 -sg16 -(dp713 -g18 -g60 -sg20 -g21 -sg22 -S'111735981' -p714 -sg24 -g25 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant215.txt b/VariantValidator/testing/testOutputsMasterITS/variant215.txt deleted file mode 100644 index ddfa12e8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant215.txt +++ /dev/null @@ -1,248 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NR_037918.2:n.1184+11736G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -(dp13 -S'GRCh38' -p14 -(dp15 -S'HGVS_genomic_description' -p16 -S'NT_187658.1:g.69187C>A' -p17 -sS'vcf' -p18 -(dp19 -S'chr' -p20 -S'HSCHR12_3_CTG2' -p21 -sS'ref' -p22 -VC -p23 -sS'pos' -p24 -S'69187' -p25 -sS'alt' -p26 -VA -p27 -sssa(dp28 -S'hg38' -p29 -(dp30 -g16 -S'NT_187658.1:g.69187C>A' -p31 -sg18 -(dp32 -g20 -S'chr12_KI270904v1_alt' -p33 -sg22 -g23 -sg24 -S'69187' -p34 -sg26 -g27 -sssa(dp35 -S'GRCh37' -p36 -(dp37 -g16 -S'NW_003571047.1:g.69187C>A' -p38 -sg18 -(dp39 -g20 -S'HG1133_PATCH' -p40 -sg22 -g23 -sg24 -S'69187' -p41 -sg26 -g27 -sssa(dp42 -g36 -(dp43 -g16 -S'NW_003571050.1:g.69187C>A' -p44 -sg18 -(dp45 -g20 -S'HSCHR12_2_CTG2' -p46 -sg22 -g23 -sg24 -S'69187' -p47 -sg26 -g27 -sssa(dp48 -g14 -(dp49 -g16 -S'NW_003571050.1:g.69187C>A' -p50 -sg18 -(dp51 -g20 -g46 -sg22 -g23 -sg24 -S'69187' -p52 -sg26 -g27 -sssa(dp53 -g29 -(dp54 -g16 -S'NW_003571050.1:g.69187C>A' -p55 -sg18 -(dp56 -g20 -S'chr12_GL877876v1_alt' -p57 -sg22 -g23 -sg24 -S'69187' -p58 -sg26 -g27 -sssasS'transcript_description' -p59 -VHomo sapiens PRH1-PRR4 readthrough (PRH1-PRR4), long non-coding RNA -p60 -sS'gene_symbol' -p61 -S'PRH1-PRR4' -p62 -sS'HGVS_predicted_protein_consequence' -p63 -(dp64 -S'tlr' -p65 -S'Non-coding :n.' -p66 -sS'slr' -p67 -g6 -ssS'submitted_variant' -p68 -S'12-11023080-C-A' -p69 -sS'genome_context_intronic_sequence' -p70 -S'NC_000012.11(NR_037918.2):c.1184+11736G>T' -p71 -sS'HGVS_LRG_variant' -p72 -g6 -sS'HGVS_transcript_variant' -p73 -S'NR_037918.2:n.1184+11736G>T' -p74 -sS'HGVS_RefSeqGene_variant' -p75 -g6 -sS'primary_assembly_loci' -p76 -(dp77 -S'hg19' -p78 -(dp79 -g16 -S'NC_000012.11:g.11023080C>A' -p80 -sg18 -(dp81 -g20 -S'chr12' -p82 -sg22 -g23 -sg24 -S'11023080' -p83 -sg26 -g27 -sssg29 -(dp84 -g16 -S'NC_000012.12:g.10870481C>A' -p85 -sg18 -(dp86 -g20 -g82 -sg22 -g23 -sg24 -S'10870481' -p87 -sg26 -g27 -sssg36 -(dp88 -g16 -S'NC_000012.11:g.11023080C>A' -p89 -sg18 -(dp90 -g20 -S'12' -p91 -sg22 -g23 -sg24 -S'11023080' -p92 -sg26 -g27 -sssg14 -(dp93 -g16 -S'NC_000012.12:g.10870481C>A' -p94 -sg18 -(dp95 -g20 -g91 -sg22 -g23 -sg24 -S'10870481' -p96 -sg26 -g27 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant216.txt b/VariantValidator/testing/testOutputsMasterITS/variant216.txt deleted file mode 100644 index 508bb512..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant216.txt +++ /dev/null @@ -1,428 +0,0 @@ -(dp0 -S'NM_020297.3:c.2199-1302del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens ATP binding cassette subfamily C member 9 (ABCC9), transcript variant SUR2B, mRNA -p13 -sS'gene_symbol' -p14 -S'ABCC9' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_064693.2(LRG_377p1):p.?' -p19 -sS'slr' -p20 -S'NP_064693.2:p.?' -p21 -ssS'submitted_variant' -p22 -S'12-22018712-TC-T' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000012.11(NM_020297.3):c.2199-1302del' -p25 -sS'HGVS_LRG_variant' -p26 -g4 -sS'HGVS_transcript_variant' -p27 -S'NM_020297.3:c.2199-1302del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000012.11:g.22018713del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr12' -p39 -sS'ref' -p40 -S'TC' -p41 -sS'pos' -p42 -S'22018712' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000012.12:g.21865779del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'TC' -p50 -sg42 -S'21865778' -p51 -sg44 -g45 -sssS'GRCh37' -p52 -(dp53 -g34 -S'NC_000012.11:g.22018713del' -p54 -sg36 -(dp55 -g38 -S'12' -p56 -sg40 -S'TC' -p57 -sg42 -S'22018712' -p58 -sg44 -g45 -sssS'GRCh38' -p59 -(dp60 -g34 -S'NC_000012.12:g.21865779del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'TC' -p63 -sg42 -S'21865778' -p64 -sg44 -g45 -sssssS'flag' -p65 -S'gene_variant' -p66 -sS'NM_005691.3:c.2199-1302del' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' -p70 -aS'RefSeqGene record not available' -p71 -asg9 -g4 -sg10 -(lp72 -sg12 -VHomo sapiens ATP binding cassette subfamily C member 9 (ABCC9), transcript variant SUR2A, mRNA -p73 -sg14 -S'ABCC9' -p74 -sg16 -(dp75 -g18 -S'NP_005682.2(LRG_377p2):p.?' -p76 -sg20 -S'NP_005682.2:p.?' -p77 -ssg22 -g23 -sg24 -S'NC_000012.11(NM_005691.3):c.2199-1302del' -p78 -sg26 -g4 -sg27 -S'NM_005691.3:c.2199-1302del' -p79 -sg29 -g4 -sg30 -(dp80 -g32 -(dp81 -g34 -S'NC_000012.11:g.22018713del' -p82 -sg36 -(dp83 -g38 -g39 -sg40 -S'TC' -p84 -sg42 -S'22018712' -p85 -sg44 -g45 -sssg46 -(dp86 -g34 -S'NC_000012.12:g.21865779del' -p87 -sg36 -(dp88 -g38 -g39 -sg40 -S'TC' -p89 -sg42 -S'21865778' -p90 -sg44 -g45 -sssg52 -(dp91 -g34 -S'NC_000012.11:g.22018713del' -p92 -sg36 -(dp93 -g38 -g56 -sg40 -S'TC' -p94 -sg42 -S'22018712' -p95 -sg44 -g45 -sssg59 -(dp96 -g34 -S'NC_000012.12:g.21865779del' -p97 -sg36 -(dp98 -g38 -g56 -sg40 -S'TC' -p99 -sg42 -S'21865778' -p100 -sg44 -g45 -sssssS'NM_020297.2:c.2199-1302del' -p101 -(dp102 -g3 -S'LRG_377t1:c.2199-1302del' -p103 -sg5 -(lp104 -S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' -p105 -aS'A more recent version of the selected reference sequence NM_020297.2 is available (NM_020297.3)' -p106 -aS'NM_020297.3:c.2199-1302delG MUST be fully validated prior to use in reports' -p107 -aS'select_variants=NM_020297.3:c.2199-1302del' -p108 -asg9 -S'NG_012819.1(NM_020297.2):c.2199-1302del' -p109 -sg10 -(lp110 -sg12 -VHomo sapiens ATP-binding cassette, sub-family C (CFTR/MRP), member 9 (ABCC9), transcript variant SUR2B, mRNA -p111 -sg14 -S'ABCC9' -p112 -sg16 -(dp113 -g18 -S'NP_064693.2(LRG_377p1):p.?' -p114 -sg20 -S'NP_064693.2:p.?' -p115 -ssg22 -g23 -sg24 -S'NC_000012.11(NM_020297.2):c.2199-1302del' -p116 -sg26 -S'LRG_377:g.75916del' -p117 -sg27 -S'NM_020297.2:c.2199-1302del' -p118 -sg29 -S'NG_012819.1:g.75916del' -p119 -sg30 -(dp120 -g32 -(dp121 -g34 -S'NC_000012.11:g.22018713del' -p122 -sg36 -(dp123 -g38 -g39 -sg40 -S'TC' -p124 -sg42 -S'22018712' -p125 -sg44 -g45 -sssg52 -(dp126 -g34 -S'NC_000012.11:g.22018713del' -p127 -sg36 -(dp128 -g38 -g56 -sg40 -S'TC' -p129 -sg42 -S'22018712' -p130 -sg44 -g45 -sssssS'NM_005691.2:c.2199-1302del' -p131 -(dp132 -g3 -S'LRG_377t2:c.2199-1302del' -p133 -sg5 -(lp134 -S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' -p135 -aS'A more recent version of the selected reference sequence NM_005691.2 is available (NM_005691.3)' -p136 -aS'NM_005691.3:c.2199-1302delG MUST be fully validated prior to use in reports' -p137 -aS'select_variants=NM_005691.3:c.2199-1302del' -p138 -asg9 -S'NG_012819.1(NM_005691.2):c.2199-1302del' -p139 -sg10 -(lp140 -sg12 -VHomo sapiens ATP-binding cassette, sub-family C (CFTR/MRP), member 9 (ABCC9), transcript variant SUR2A, mRNA -p141 -sg14 -S'ABCC9' -p142 -sg16 -(dp143 -g18 -S'NP_005682.2(LRG_377p2):p.?' -p144 -sg20 -S'NP_005682.2:p.?' -p145 -ssg22 -g23 -sg24 -S'NC_000012.11(NM_005691.2):c.2199-1302del' -p146 -sg26 -S'LRG_377:g.75916del' -p147 -sg27 -S'NM_005691.2:c.2199-1302del' -p148 -sg29 -S'NG_012819.1:g.75916del' -p149 -sg30 -(dp150 -g32 -(dp151 -g34 -S'NC_000012.11:g.22018713del' -p152 -sg36 -(dp153 -g38 -g39 -sg40 -S'TC' -p154 -sg42 -S'22018712' -p155 -sg44 -g45 -sssg52 -(dp156 -g34 -S'NC_000012.11:g.22018713del' -p157 -sg36 -(dp158 -g38 -g56 -sg40 -S'TC' -p159 -sg42 -S'22018712' -p160 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant217.txt b/VariantValidator/testing/testOutputsMasterITS/variant217.txt deleted file mode 100644 index fc73c3d4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant217.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000424.3:c.556-2A>G' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_008297.1(NM_000424.3):c.556-2A>G' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens keratin 5 (KRT5), mRNA -p14 -sS'gene_symbol' -p15 -S'KRT5' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000415.2:p.?' -p20 -sS'slr' -p21 -S'NP_000415.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'12-52912946-T-C' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000012.11(NM_000424.3):c.556-2A>G' -p26 -sS'HGVS_LRG_variant' -p27 -g6 -sS'HGVS_transcript_variant' -p28 -S'NM_000424.3:c.556-2A>G' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_008297.1:g.6298A>G' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000012.11:g.52912946T>C' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr12' -p41 -sS'ref' -p42 -VT -p43 -sS'pos' -p44 -S'52912946' -p45 -sS'alt' -p46 -VC -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000012.12:g.52519162T>C' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'52519162' -p52 -sg46 -g47 -sssS'GRCh37' -p53 -(dp54 -g36 -S'NC_000012.11:g.52912946T>C' -p55 -sg38 -(dp56 -g40 -S'12' -p57 -sg42 -g43 -sg44 -S'52912946' -p58 -sg46 -g47 -sssS'GRCh38' -p59 -(dp60 -g36 -S'NC_000012.12:g.52519162T>C' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g43 -sg44 -S'52519162' -p63 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant218.txt b/VariantValidator/testing/testOutputsMasterITS/variant218.txt deleted file mode 100644 index 46a99fc2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant218.txt +++ /dev/null @@ -1,371 +0,0 @@ -(dp0 -S'NM_001354304.1:c.1200del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000012.11:g.103234292TC>T automapped to NC_000012.11:g.103234294delC' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'PAH' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001341233.1:p.(Asn401ThrfsTer51)' -p19 -sS'slr' -p20 -S'NP_001341233.1:p.(N401Tfs*51)' -p21 -ssS'submitted_variant' -p22 -S'12-103234292-TC-T' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001354304.1:c.1200del' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000012.11:g.103234294del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr12' -p38 -sS'ref' -p39 -S'TC' -p40 -sS'pos' -p41 -S'103234292' -p42 -sS'alt' -p43 -S'T' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000012.12:g.102840516del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TC' -p49 -sg41 -S'102840514' -p50 -sg43 -g44 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000012.11:g.103234294del' -p53 -sg35 -(dp54 -g37 -S'12' -p55 -sg39 -S'TC' -p56 -sg41 -S'103234292' -p57 -sg43 -g44 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000012.12:g.102840516del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'TC' -p62 -sg41 -S'102840514' -p63 -sg43 -g44 -sssssS'NM_000277.2:c.1200del' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'NC_000012.11:g.103234292TC>T automapped to NC_000012.11:g.103234294delC' -p67 -aS'RefSeqGene record not available' -p68 -asg9 -g4 -sg10 -(lp69 -sg12 -VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 1, mRNA -p70 -sg14 -S'PAH' -p71 -sg16 -(dp72 -g18 -S'NP_000268.1:p.(Asn401ThrfsTer51)' -p73 -sg20 -S'NP_000268.1:p.(N401Tfs*51)' -p74 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_000277.2:c.1200del' -p75 -sg28 -g4 -sg29 -(dp76 -g31 -(dp77 -g33 -S'NC_000012.11:g.103234294del' -p78 -sg35 -(dp79 -g37 -g38 -sg39 -S'TC' -p80 -sg41 -S'103234292' -p81 -sg43 -g44 -sssg45 -(dp82 -g33 -S'NC_000012.12:g.102840516del' -p83 -sg35 -(dp84 -g37 -g38 -sg39 -S'TC' -p85 -sg41 -S'102840514' -p86 -sg43 -g44 -sssg51 -(dp87 -g33 -S'NC_000012.11:g.103234294del' -p88 -sg35 -(dp89 -g37 -g55 -sg39 -S'TC' -p90 -sg41 -S'103234292' -p91 -sg43 -g44 -sssg58 -(dp92 -g33 -S'NC_000012.12:g.102840516del' -p93 -sg35 -(dp94 -g37 -g55 -sg39 -S'TC' -p95 -sg41 -S'102840514' -p96 -sg43 -g44 -sssssS'flag' -p97 -S'gene_variant' -p98 -sS'NM_000277.1:c.1200del' -p99 -(dp100 -g3 -g4 -sg5 -(lp101 -S'NC_000012.11:g.103234292TC>T automapped to NC_000012.11:g.103234294delC' -p102 -aS'A more recent version of the selected reference sequence NM_000277.1 is available (NM_000277.2)' -p103 -aS'NM_000277.2:c.1200delG MUST be fully validated prior to use in reports' -p104 -aS'select_variants=NM_000277.2:c.1200del' -p105 -asg9 -g4 -sg10 -(lp106 -sg12 -VHomo sapiens phenylalanine hydroxylase (PAH), mRNA -p107 -sg14 -S'PAH' -p108 -sg16 -(dp109 -g18 -S'NP_000268.1:p.(Asn401ThrfsTer51)' -p110 -sg20 -S'NP_000268.1:p.(N401Tfs*51)' -p111 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_000277.1:c.1200del' -p112 -sg28 -S'NG_008690.1:g.82088del' -p113 -sg29 -(dp114 -g31 -(dp115 -g33 -S'NC_000012.11:g.103234294del' -p116 -sg35 -(dp117 -g37 -g38 -sg39 -S'TC' -p118 -sg41 -S'103234292' -p119 -sg43 -g44 -sssg45 -(dp120 -g33 -S'NC_000012.12:g.102840516del' -p121 -sg35 -(dp122 -g37 -g38 -sg39 -S'TC' -p123 -sg41 -S'102840514' -p124 -sg43 -g44 -sssg51 -(dp125 -g33 -S'NC_000012.11:g.103234294del' -p126 -sg35 -(dp127 -g37 -g55 -sg39 -S'TC' -p128 -sg41 -S'103234292' -p129 -sg43 -g44 -sssg58 -(dp130 -g33 -S'NC_000012.12:g.102840516del' -p131 -sg35 -(dp132 -g37 -g55 -sg39 -S'TC' -p133 -sg41 -S'102840514' -p134 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant219.txt b/VariantValidator/testing/testOutputsMasterITS/variant219.txt deleted file mode 100644 index ad54642a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant219.txt +++ /dev/null @@ -1,355 +0,0 @@ -(dp0 -S'NM_001354304.1:c.-95-121A>G' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'PAH' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001341233.1:p.?' -p18 -sS'slr' -p19 -S'NP_001341233.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'12-103311124-T-C' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000012.11(NM_001354304.1):c.-95-121A>G' -p24 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001354304.1:c.-95-121A>G' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000012.11:g.103311124T>C' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr12' -p38 -sS'ref' -p39 -VT -p40 -sS'pos' -p41 -S'103311124' -p42 -sS'alt' -p43 -VC -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000012.12:g.102917346T>C' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'102917346' -p49 -sg43 -g44 -sssS'GRCh37' -p50 -(dp51 -g33 -S'NC_000012.11:g.103311124T>C' -p52 -sg35 -(dp53 -g37 -S'12' -p54 -sg39 -g40 -sg41 -S'103311124' -p55 -sg43 -g44 -sssS'GRCh38' -p56 -(dp57 -g33 -S'NC_000012.12:g.102917346T>C' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'102917346' -p60 -sg43 -g44 -sssssS'flag' -p61 -S'gene_variant' -p62 -sS'NM_000277.2:c.-216A>G' -p63 -(dp64 -g3 -g4 -sg5 -(lp65 -S'RefSeqGene record not available' -p66 -asg8 -g4 -sg9 -(lp67 -sg11 -VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 1, mRNA -p68 -sg13 -S'PAH' -p69 -sg15 -(dp70 -g17 -S'NP_000268.1:p.?' -p71 -sg19 -S'NP_000268.1:p.?' -p72 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_000277.2:c.-216A>G' -p73 -sg28 -g4 -sg29 -(dp74 -g31 -(dp75 -g33 -S'NC_000012.11:g.103311124T>C' -p76 -sg35 -(dp77 -g37 -g38 -sg39 -g40 -sg41 -S'103311124' -p78 -sg43 -g44 -sssg45 -(dp79 -g33 -S'NC_000012.12:g.102917346T>C' -p80 -sg35 -(dp81 -g37 -g38 -sg39 -g40 -sg41 -S'102917346' -p82 -sg43 -g44 -sssg50 -(dp83 -g33 -S'NC_000012.11:g.103311124T>C' -p84 -sg35 -(dp85 -g37 -g54 -sg39 -g40 -sg41 -S'103311124' -p86 -sg43 -g44 -sssg56 -(dp87 -g33 -S'NC_000012.12:g.102917346T>C' -p88 -sg35 -(dp89 -g37 -g54 -sg39 -g40 -sg41 -S'102917346' -p90 -sg43 -g44 -sssssS'NM_000277.1:c.-215A>G' -p91 -(dp92 -g3 -g4 -sg5 -(lp93 -S'A more recent version of the selected reference sequence NM_000277.1 is available (NM_000277.2)' -p94 -aS'NM_000277.2:c.-215C>G MUST be fully validated prior to use in reports' -p95 -aS'select_variants=NM_000277.2:c.-215C>G' -p96 -asg8 -g4 -sg9 -(lp97 -sg11 -VHomo sapiens phenylalanine hydroxylase (PAH), mRNA -p98 -sg13 -S'PAH' -p99 -sg15 -(dp100 -g17 -S'NP_000268.1:p.?' -p101 -sg19 -S'NP_000268.1:p.?' -p102 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_000277.1:c.-215A>G' -p103 -sg28 -S'NG_008690.1:g.5258A>G' -p104 -sg29 -(dp105 -g31 -(dp106 -g33 -S'NC_000012.11:g.103311124T>C' -p107 -sg35 -(dp108 -g37 -g38 -sg39 -g40 -sg41 -S'103311124' -p109 -sg43 -g44 -sssg45 -(dp110 -g33 -S'NC_000012.12:g.102917346T>C' -p111 -sg35 -(dp112 -g37 -g38 -sg39 -g40 -sg41 -S'102917346' -p113 -sg43 -g44 -sssg50 -(dp114 -g33 -S'NC_000012.11:g.103311124T>C' -p115 -sg35 -(dp116 -g37 -g54 -sg39 -g40 -sg41 -S'103311124' -p117 -sg43 -g44 -sssg56 -(dp118 -g33 -S'NC_000012.12:g.102917346T>C' -p119 -sg35 -(dp120 -g37 -g54 -sg39 -g40 -sg41 -S'102917346' -p121 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant22.txt b/VariantValidator/testing/testOutputsMasterITS/variant22.txt deleted file mode 100644 index ed00e72d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant22.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000011.9:g.5244828A=' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000518.4:c.*132+1868C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant220.txt b/VariantValidator/testing/testOutputsMasterITS/variant220.txt deleted file mode 100644 index 07cc05fc..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant220.txt +++ /dev/null @@ -1,1060 +0,0 @@ -(dp0 -S'NM_001319681.1:c.-366-1G>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 7, mRNA -p12 -sS'gene_symbol' -p13 -S'TCTN1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001306610.1:p.?' -p18 -sS'slr' -p19 -S'NP_001306610.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'12-111064166-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000012.11(NM_001319681.1):c.-366-1G>A' -p24 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001319681.1:c.-366-1G>A' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000012.11:g.111064166G>A' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr12' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'111064166' -p42 -sS'alt' -p43 -S'A' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000012.12:g.110626361G>A' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p49 -sg43 -g44 -sssS'GRCh37' -p50 -(dp51 -g33 -S'NC_000012.11:g.111064166G>A' -p52 -sg35 -(dp53 -g37 -S'12' -p54 -sg39 -g40 -sg41 -S'111064166' -p55 -sg43 -g44 -sssS'GRCh38' -p56 -(dp57 -g33 -S'NC_000012.12:g.110626361G>A' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p60 -sg43 -g44 -sssssS'NM_001319680.1:c.342-1G>A' -p61 -(dp62 -g3 -g4 -sg5 -(lp63 -S'RefSeqGene record not available' -p64 -asg8 -g4 -sg9 -(lp65 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 6, mRNA -p66 -sg13 -S'TCTN1' -p67 -sg15 -(dp68 -g17 -S'NP_001306609.1:p.?' -p69 -sg19 -S'NP_001306609.1:p.?' -p70 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001319680.1):c.342-1G>A' -p71 -sg25 -g4 -sg26 -S'NM_001319680.1:c.342-1G>A' -p72 -sg28 -g4 -sg29 -(dp73 -g31 -(dp74 -g33 -S'NC_000012.11:g.111064166G>A' -p75 -sg35 -(dp76 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p77 -sg43 -g44 -sssg45 -(dp78 -g33 -S'NC_000012.12:g.110626361G>A' -p79 -sg35 -(dp80 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p81 -sg43 -g44 -sssg50 -(dp82 -g33 -S'NC_000012.11:g.111064166G>A' -p83 -sg35 -(dp84 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p85 -sg43 -g44 -sssg56 -(dp86 -g33 -S'NC_000012.12:g.110626361G>A' -p87 -sg35 -(dp88 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p89 -sg43 -g44 -sssssS'NM_001082538.2:c.342-1G>A' -p90 -(dp91 -g3 -g4 -sg5 -(lp92 -S'RefSeqGene record not available' -p93 -asg8 -g4 -sg9 -(lp94 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 1, mRNA -p95 -sg13 -S'TCTN1' -p96 -sg15 -(dp97 -g17 -S'NP_001076007.1:p.?' -p98 -sg19 -S'NP_001076007.1:p.?' -p99 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001082538.2):c.342-1G>A' -p100 -sg25 -g4 -sg26 -S'NM_001082538.2:c.342-1G>A' -p101 -sg28 -g4 -sg29 -(dp102 -g31 -(dp103 -g33 -S'NC_000012.11:g.111064166G>A' -p104 -sg35 -(dp105 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p106 -sg43 -g44 -sssg45 -(dp107 -g33 -S'NC_000012.12:g.110626361G>A' -p108 -sg35 -(dp109 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p110 -sg43 -g44 -sssg50 -(dp111 -g33 -S'NC_000012.11:g.111064166G>A' -p112 -sg35 -(dp113 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p114 -sg43 -g44 -sssg56 -(dp115 -g33 -S'NC_000012.12:g.110626361G>A' -p116 -sg35 -(dp117 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p118 -sg43 -g44 -sssssS'NM_001173976.1:c.162-1G>A' -p119 -(dp120 -g3 -g4 -sg5 -(lp121 -S'RefSeqGene record not available' -p122 -asg8 -g4 -sg9 -(lp123 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 5, mRNA -p124 -sg13 -S'TCTN1' -p125 -sg15 -(dp126 -g17 -S'NP_001167447.1:p.?' -p127 -sg19 -S'NP_001167447.1:p.?' -p128 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001173976.1):c.162-1G>A' -p129 -sg25 -g4 -sg26 -S'NM_001173976.1:c.162-1G>A' -p130 -sg28 -g4 -sg29 -(dp131 -g31 -(dp132 -g33 -S'NC_000012.11:g.111064166G>A' -p133 -sg35 -(dp134 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p135 -sg43 -g44 -sssg45 -(dp136 -g33 -S'NC_000012.12:g.110626361G>A' -p137 -sg35 -(dp138 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p139 -sg43 -g44 -sssg50 -(dp140 -g33 -S'NC_000012.11:g.111064166G>A' -p141 -sg35 -(dp142 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p143 -sg43 -g44 -sssg56 -(dp144 -g33 -S'NC_000012.12:g.110626361G>A' -p145 -sg35 -(dp146 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p147 -sg43 -g44 -sssssS'flag' -p148 -S'gene_variant' -p149 -sS'NM_001082537.2:c.342-1G>A' -p150 -(dp151 -g3 -g4 -sg5 -(lp152 -S'RefSeqGene record not available' -p153 -asg8 -g4 -sg9 -(lp154 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 2, mRNA -p155 -sg13 -S'TCTN1' -p156 -sg15 -(dp157 -g17 -S'NP_001076006.1:p.?' -p158 -sg19 -S'NP_001076006.1:p.?' -p159 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001082537.2):c.342-1G>A' -p160 -sg25 -g4 -sg26 -S'NM_001082537.2:c.342-1G>A' -p161 -sg28 -g4 -sg29 -(dp162 -g31 -(dp163 -g33 -S'NC_000012.11:g.111064166G>A' -p164 -sg35 -(dp165 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p166 -sg43 -g44 -sssg45 -(dp167 -g33 -S'NC_000012.12:g.110626361G>A' -p168 -sg35 -(dp169 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p170 -sg43 -g44 -sssg50 -(dp171 -g33 -S'NC_000012.11:g.111064166G>A' -p172 -sg35 -(dp173 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p174 -sg43 -g44 -sssg56 -(dp175 -g33 -S'NC_000012.12:g.110626361G>A' -p176 -sg35 -(dp177 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p178 -sg43 -g44 -sssssS'NR_135088.1:n.559-1G>A' -p179 -(dp180 -g3 -g4 -sg5 -(lp181 -S'RefSeqGene record not available' -p182 -asg8 -g4 -sg9 -(lp183 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 9, non-coding RNA -p184 -sg13 -S'TCTN1' -p185 -sg15 -(dp186 -g17 -S'Non-coding :n.' -p187 -sg19 -g4 -ssg21 -g22 -sg23 -S'NC_000012.11(NR_135088.1):c.559-1G>A' -p188 -sg25 -g4 -sg26 -S'NR_135088.1:n.559-1G>A' -p189 -sg28 -g4 -sg29 -(dp190 -g31 -(dp191 -g33 -S'NC_000012.11:g.111064166G>A' -p192 -sg35 -(dp193 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p194 -sg43 -g44 -sssg45 -(dp195 -g33 -S'NC_000012.12:g.110626361G>A' -p196 -sg35 -(dp197 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p198 -sg43 -g44 -sssg50 -(dp199 -g33 -S'NC_000012.11:g.111064166G>A' -p200 -sg35 -(dp201 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p202 -sg43 -g44 -sssg56 -(dp203 -g33 -S'NC_000012.12:g.110626361G>A' -p204 -sg35 -(dp205 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p206 -sg43 -g44 -sssssS'NM_024549.5:c.342-1G>A' -p207 -(dp208 -g3 -g4 -sg5 -(lp209 -sg8 -S'NG_030381.1(NM_024549.5):c.342-1G>A' -p210 -sg9 -(lp211 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 3, mRNA -p212 -sg13 -S'TCTN1' -p213 -sg15 -(dp214 -g17 -S'NP_078825.2:p.?' -p215 -sg19 -S'NP_078825.2:p.?' -p216 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_024549.5):c.342-1G>A' -p217 -sg25 -g4 -sg26 -S'NM_024549.5:c.342-1G>A' -p218 -sg28 -S'NG_030381.1:g.17335G>A' -p219 -sg29 -(dp220 -g31 -(dp221 -g33 -S'NC_000012.11:g.111064166G>A' -p222 -sg35 -(dp223 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p224 -sg43 -g44 -sssg45 -(dp225 -g33 -S'NC_000012.12:g.110626361G>A' -p226 -sg35 -(dp227 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p228 -sg43 -g44 -sssg50 -(dp229 -g33 -S'NC_000012.11:g.111064166G>A' -p230 -sg35 -(dp231 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p232 -sg43 -g44 -sssg56 -(dp233 -g33 -S'NC_000012.12:g.110626361G>A' -p234 -sg35 -(dp235 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p236 -sg43 -g44 -sssssS'NM_001173975.2:c.174-1G>A' -p237 -(dp238 -g3 -g4 -sg5 -(lp239 -S'RefSeqGene record not available' -p240 -asg8 -g4 -sg9 -(lp241 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 4, mRNA -p242 -sg13 -S'TCTN1' -p243 -sg15 -(dp244 -g17 -S'NP_001167446.1:p.?' -p245 -sg19 -S'NP_001167446.1:p.?' -p246 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001173975.2):c.174-1G>A' -p247 -sg25 -g4 -sg26 -S'NM_001173975.2:c.174-1G>A' -p248 -sg28 -g4 -sg29 -(dp249 -g31 -(dp250 -g33 -S'NC_000012.11:g.111064166G>A' -p251 -sg35 -(dp252 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p253 -sg43 -g44 -sssg45 -(dp254 -g33 -S'NC_000012.12:g.110626361G>A' -p255 -sg35 -(dp256 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p257 -sg43 -g44 -sssg50 -(dp258 -g33 -S'NC_000012.11:g.111064166G>A' -p259 -sg35 -(dp260 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p261 -sg43 -g44 -sssg56 -(dp262 -g33 -S'NC_000012.12:g.110626361G>A' -p263 -sg35 -(dp264 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p265 -sg43 -g44 -sssssS'NM_001173975.1:c.174-1G>A' -p266 -(dp267 -g3 -g4 -sg5 -(lp268 -S'A more recent version of the selected reference sequence NM_001173975.1 is available (NM_001173975.2)' -p269 -aS'NM_001173975.2:c.174-1G>A MUST be fully validated prior to use in reports' -p270 -aS'select_variants=NM_001173975.2:c.174-1G>A' -p271 -aS'RefSeqGene record not available' -p272 -asg8 -g4 -sg9 -(lp273 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 4, mRNA -p274 -sg13 -S'TCTN1' -p275 -sg15 -(dp276 -g17 -S'NP_001167446.1:p.?' -p277 -sg19 -S'NP_001167446.1:p.?' -p278 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001173975.1):c.174-1G>A' -p279 -sg25 -g4 -sg26 -S'NM_001173975.1:c.174-1G>A' -p280 -sg28 -g4 -sg29 -(dp281 -g31 -(dp282 -g33 -S'NC_000012.11:g.111064166G>A' -p283 -sg35 -(dp284 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p285 -sg43 -g44 -sssg50 -(dp286 -g33 -S'NC_000012.11:g.111064166G>A' -p287 -sg35 -(dp288 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p289 -sg43 -g44 -sssssS'NM_001319682.1:c.174-1G>A' -p290 -(dp291 -g3 -g4 -sg5 -(lp292 -S'RefSeqGene record not available' -p293 -asg8 -g4 -sg9 -(lp294 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 8, mRNA -p295 -sg13 -S'TCTN1' -p296 -sg15 -(dp297 -g17 -S'NP_001306611.1:p.?' -p298 -sg19 -S'NP_001306611.1:p.?' -p299 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001319682.1):c.174-1G>A' -p300 -sg25 -g4 -sg26 -S'NM_001319682.1:c.174-1G>A' -p301 -sg28 -g4 -sg29 -(dp302 -g31 -(dp303 -g33 -S'NC_000012.11:g.111064166G>A' -p304 -sg35 -(dp305 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p306 -sg43 -g44 -sssg45 -(dp307 -g33 -S'NC_000012.12:g.110626361G>A' -p308 -sg35 -(dp309 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p310 -sg43 -g44 -sssg50 -(dp311 -g33 -S'NC_000012.11:g.111064166G>A' -p312 -sg35 -(dp313 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p314 -sg43 -g44 -sssg56 -(dp315 -g33 -S'NC_000012.12:g.110626361G>A' -p316 -sg35 -(dp317 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p318 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant221.txt b/VariantValidator/testing/testOutputsMasterITS/variant221.txt deleted file mode 100644 index 1c380e51..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant221.txt +++ /dev/null @@ -1,365 +0,0 @@ -(dp0 -S'NM_001194995.1:c.210del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000012.11:g.123738430CA>C automapped to NC_000012.11:g.123738431delA' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 3, mRNA -p13 -sS'gene_symbol' -p14 -S'C12orf65' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001181924.1:p.(Gly72AlafsTer13)' -p19 -sS'slr' -p20 -S'NP_001181924.1:p.(G72Afs*13)' -p21 -ssS'submitted_variant' -p22 -S'12-123738430-CA-C' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001194995.1:c.210del' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000012.11:g.123738431del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr12' -p38 -sS'ref' -p39 -S'CA' -p40 -sS'pos' -p41 -S'123738430' -p42 -sS'alt' -p43 -S'C' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000012.12:g.123253884del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'CA' -p49 -sg41 -S'123253883' -p50 -sg43 -g44 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000012.11:g.123738431del' -p53 -sg35 -(dp54 -g37 -S'12' -p55 -sg39 -S'CA' -p56 -sg41 -S'123738430' -p57 -sg43 -g44 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000012.12:g.123253884del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'CA' -p62 -sg41 -S'123253883' -p63 -sg43 -g44 -sssssS'flag' -p64 -S'gene_variant' -p65 -sS'NM_152269.4:c.210del' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'NC_000012.11:g.123738430CA>C automapped to NC_000012.11:g.123738431delA' -p69 -asg9 -g4 -sg10 -(lp70 -sg12 -VHomo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 1, mRNA -p71 -sg14 -S'C12orf65' -p72 -sg16 -(dp73 -g18 -S'NP_689482.1:p.(Gly72AlafsTer13)' -p74 -sg20 -S'NP_689482.1:p.(G72Afs*13)' -p75 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_152269.4:c.210del' -p76 -sg28 -S'NG_027517.1:g.25588del' -p77 -sg29 -(dp78 -g31 -(dp79 -g33 -S'NC_000012.11:g.123738431del' -p80 -sg35 -(dp81 -g37 -g38 -sg39 -S'CA' -p82 -sg41 -S'123738430' -p83 -sg43 -g44 -sssg45 -(dp84 -g33 -S'NC_000012.12:g.123253884del' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -S'CA' -p87 -sg41 -S'123253883' -p88 -sg43 -g44 -sssg51 -(dp89 -g33 -S'NC_000012.11:g.123738431del' -p90 -sg35 -(dp91 -g37 -g55 -sg39 -S'CA' -p92 -sg41 -S'123738430' -p93 -sg43 -g44 -sssg58 -(dp94 -g33 -S'NC_000012.12:g.123253884del' -p95 -sg35 -(dp96 -g37 -g55 -sg39 -S'CA' -p97 -sg41 -S'123253883' -p98 -sg43 -g44 -sssssS'NM_001143905.2:c.210del' -p99 -(dp100 -g3 -g4 -sg5 -(lp101 -S'NC_000012.11:g.123738430CA>C automapped to NC_000012.11:g.123738431delA' -p102 -aS'RefSeqGene record not available' -p103 -asg9 -g4 -sg10 -(lp104 -sg12 -VHomo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 2, mRNA -p105 -sg14 -S'C12orf65' -p106 -sg16 -(dp107 -g18 -S'NP_001137377.1:p.(Gly72AlafsTer13)' -p108 -sg20 -S'NP_001137377.1:p.(G72Afs*13)' -p109 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001143905.2:c.210del' -p110 -sg28 -g4 -sg29 -(dp111 -g31 -(dp112 -g33 -S'NC_000012.11:g.123738431del' -p113 -sg35 -(dp114 -g37 -g38 -sg39 -S'CA' -p115 -sg41 -S'123738430' -p116 -sg43 -g44 -sssg45 -(dp117 -g33 -S'NC_000012.12:g.123253884del' -p118 -sg35 -(dp119 -g37 -g38 -sg39 -S'CA' -p120 -sg41 -S'123253883' -p121 -sg43 -g44 -sssg51 -(dp122 -g33 -S'NC_000012.11:g.123738431del' -p123 -sg35 -(dp124 -g37 -g55 -sg39 -S'CA' -p125 -sg41 -S'123738430' -p126 -sg43 -g44 -sssg58 -(dp127 -g33 -S'NC_000012.12:g.123253884del' -p128 -sg35 -(dp129 -g37 -g55 -sg39 -S'CA' -p130 -sg41 -S'123253883' -p131 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant222.txt b/VariantValidator/testing/testOutputsMasterITS/variant222.txt deleted file mode 100644 index 92b5149a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant222.txt +++ /dev/null @@ -1,147 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_194318.3:c.71-5del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000013.10:g.31789169CT>C automapped to NC_000013.10:g.31789183delT' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_011732.1(NM_194318.3):c.71-5del' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens beta 3-glucosyltransferase (B3GLCT), mRNA -p15 -sS'gene_symbol' -p16 -S'B3GLCT' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_919299.3:p.?' -p21 -sS'slr' -p22 -S'NP_919299.3:p.?' -p23 -ssS'submitted_variant' -p24 -S'13-31789169-CT-C' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000013.10(NM_194318.3):c.71-5del' -p27 -sS'HGVS_LRG_variant' -p28 -g6 -sS'HGVS_transcript_variant' -p29 -S'NM_194318.3:c.71-5del' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_011732.1:g.20072del' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000013.10:g.31789183del' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr13' -p42 -sS'ref' -p43 -S'CT' -p44 -sS'pos' -p45 -S'31789169' -p46 -sS'alt' -p47 -S'C' -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000013.11:g.31215046del' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'CT' -p53 -sg45 -S'31215032' -p54 -sg47 -g48 -sssS'GRCh37' -p55 -(dp56 -g37 -S'NC_000013.10:g.31789183del' -p57 -sg39 -(dp58 -g41 -S'13' -p59 -sg43 -S'CT' -p60 -sg45 -S'31789169' -p61 -sg47 -g48 -sssS'GRCh38' -p62 -(dp63 -g37 -S'NC_000013.11:g.31215046del' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -S'CT' -p66 -sg45 -S'31215032' -p67 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant223.txt b/VariantValidator/testing/testOutputsMasterITS/variant223.txt deleted file mode 100644 index 3fae3133..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant223.txt +++ /dev/null @@ -1,454 +0,0 @@ -(dp0 -S'NM_001530.3:c.223G>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -g4 -sS'alt_genomic_loci' -p8 -(lp9 -sS'transcript_description' -p10 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 1, mRNA -p11 -sS'gene_symbol' -p12 -S'HIF1A' -p13 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -S'NP_001521.1:p.(Ala75Thr)' -p17 -sS'slr' -p18 -S'NP_001521.1:p.(A75T)' -p19 -ssS'submitted_variant' -p20 -S'14-62187287-G-A' -p21 -sS'genome_context_intronic_sequence' -p22 -g4 -sS'HGVS_LRG_variant' -p23 -g4 -sS'HGVS_transcript_variant' -p24 -S'NM_001530.3:c.223G>A' -p25 -sS'HGVS_RefSeqGene_variant' -p26 -S'NG_029606.1:g.30169G>A' -p27 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000014.8:g.62187287G>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr14' -p37 -sS'ref' -p38 -S'G' -p39 -sS'pos' -p40 -S'62187287' -p41 -sS'alt' -p42 -S'A' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000014.9:g.61720569G>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'61720569' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000014.8:g.62187287G>A' -p51 -sg34 -(dp52 -g36 -S'14' -p53 -sg38 -g39 -sg40 -S'62187287' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000014.9:g.61720569G>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'61720569' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -sS'NM_001243084.1:c.295G>A' -p62 -(dp63 -g3 -g4 -sg5 -(lp64 -S'RefSeqGene record not available' -p65 -asg7 -g4 -sg8 -(lp66 -sg10 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 3, mRNA -p67 -sg12 -S'HIF1A' -p68 -sg14 -(dp69 -g16 -S'NP_001230013.1:p.(Ala99Thr)' -p70 -sg18 -S'NP_001230013.1:p.(A99T)' -p71 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001243084.1:c.295G>A' -p72 -sg26 -g4 -sg28 -(dp73 -g30 -(dp74 -g32 -S'NC_000014.8:g.62187287G>A' -p75 -sg34 -(dp76 -g36 -g37 -sg38 -g39 -sg40 -S'62187287' -p77 -sg42 -g43 -sssg44 -(dp78 -g32 -S'NC_000014.9:g.61720569G>A' -p79 -sg34 -(dp80 -g36 -g37 -sg38 -g39 -sg40 -S'61720569' -p81 -sg42 -g43 -sssg49 -(dp82 -g32 -S'NC_000014.8:g.62187287G>A' -p83 -sg34 -(dp84 -g36 -g53 -sg38 -g39 -sg40 -S'62187287' -p85 -sg42 -g43 -sssg55 -(dp86 -g32 -S'NC_000014.9:g.61720569G>A' -p87 -sg34 -(dp88 -g36 -g53 -sg38 -g39 -sg40 -S'61720569' -p89 -sg42 -g43 -sssssS'NR_144368.1:n.214-3552C>T' -p90 -(dp91 -g3 -g4 -sg5 -(lp92 -S'RefSeqGene record not available' -p93 -asg7 -g4 -sg8 -(lp94 -sg10 -VHomo sapiens uncharacterized LOC105370526 (LOC105370526), long non-coding RNA -p95 -sg12 -S'LOC105370526' -p96 -sg14 -(dp97 -g16 -S'Non-coding :n.' -p98 -sg18 -g4 -ssg20 -g21 -sg22 -S'NC_000014.8(NR_144368.1):c.214-3552C>T' -p99 -sg23 -g4 -sg24 -S'NR_144368.1:n.214-3552C>T' -p100 -sg26 -g4 -sg28 -(dp101 -g30 -(dp102 -g32 -S'NC_000014.8:g.62187287G>A' -p103 -sg34 -(dp104 -g36 -g37 -sg38 -VG -p105 -sg40 -S'62187287' -p106 -sg42 -VA -p107 -sssg44 -(dp108 -g32 -S'NC_000014.9:g.61720569G>A' -p109 -sg34 -(dp110 -g36 -g37 -sg38 -g105 -sg40 -S'61720569' -p111 -sg42 -g107 -sssg49 -(dp112 -g32 -S'NC_000014.8:g.62187287G>A' -p113 -sg34 -(dp114 -g36 -g53 -sg38 -g105 -sg40 -S'62187287' -p115 -sg42 -g107 -sssg55 -(dp116 -g32 -S'NC_000014.9:g.61720569G>A' -p117 -sg34 -(dp118 -g36 -g53 -sg38 -g105 -sg40 -S'61720569' -p119 -sg42 -g107 -sssssS'NM_181054.2:c.223G>A' -p120 -(dp121 -g3 -g4 -sg5 -(lp122 -S'RefSeqGene record not available' -p123 -asg7 -g4 -sg8 -(lp124 -sg10 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 2, mRNA -p125 -sg12 -S'HIF1A' -p126 -sg14 -(dp127 -g16 -S'NP_851397.1:p.(Ala75Thr)' -p128 -sg18 -S'NP_851397.1:p.(A75T)' -p129 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_181054.2:c.223G>A' -p130 -sg26 -g4 -sg28 -(dp131 -g30 -(dp132 -g32 -S'NC_000014.8:g.62187287G>A' -p133 -sg34 -(dp134 -g36 -g37 -sg38 -g39 -sg40 -S'62187287' -p135 -sg42 -g43 -sssg44 -(dp136 -g32 -S'NC_000014.9:g.61720569G>A' -p137 -sg34 -(dp138 -g36 -g37 -sg38 -g39 -sg40 -S'61720569' -p139 -sg42 -g43 -sssg49 -(dp140 -g32 -S'NC_000014.8:g.62187287G>A' -p141 -sg34 -(dp142 -g36 -g53 -sg38 -g39 -sg40 -S'62187287' -p143 -sg42 -g43 -sssg55 -(dp144 -g32 -S'NC_000014.9:g.61720569G>A' -p145 -sg34 -(dp146 -g36 -g53 -sg38 -g39 -sg40 -S'61720569' -p147 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant224.txt b/VariantValidator/testing/testOutputsMasterITS/variant224.txt deleted file mode 100644 index b893faa7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant224.txt +++ /dev/null @@ -1,479 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001243084.1:c.303_304delinsGA' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 3, mRNA -p15 -sS'gene_symbol' -p16 -S'HIF1A' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_001230013.1:p.(Asp101_Leu102delinsGluMet)' -p21 -sS'slr' -p22 -S'NP_001230013.1:p.(D101_L102delinsEM)' -p23 -ssS'submitted_variant' -p24 -S'14-62188231-TT-GA' -p25 -sS'genome_context_intronic_sequence' -p26 -g6 -sS'HGVS_LRG_variant' -p27 -g6 -sS'HGVS_transcript_variant' -p28 -S'NM_001243084.1:c.303_304delinsGA' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000014.8:g.62188231_62188232delinsGA' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr14' -p40 -sS'ref' -p41 -S'TT' -p42 -sS'pos' -p43 -S'62188231' -p44 -sS'alt' -p45 -S'GA' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000014.9:g.61721513_61721514delinsGA' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'TT' -p51 -sg43 -S'61721513' -p52 -sg45 -g46 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000014.8:g.62188231_62188232delinsGA' -p55 -sg37 -(dp56 -g39 -S'14' -p57 -sg41 -S'TT' -p58 -sg43 -S'62188231' -p59 -sg45 -g46 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000014.9:g.61721513_61721514delinsGA' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'TT' -p64 -sg43 -S'61721513' -p65 -sg45 -g46 -sssssS'NM_181054.2:c.231_232delinsGA' -p66 -(dp67 -g5 -g6 -sg7 -(lp68 -S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' -p69 -aS'RefSeqGene record not available' -p70 -asg11 -g6 -sg12 -(lp71 -sg14 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 2, mRNA -p72 -sg16 -S'HIF1A' -p73 -sg18 -(dp74 -g20 -S'NP_851397.1:p.(Asp77_Leu78delinsGluMet)' -p75 -sg22 -S'NP_851397.1:p.(D77_L78delinsEM)' -p76 -ssg24 -g25 -sg26 -g6 -sg27 -g6 -sg28 -S'NM_181054.2:c.231_232delinsGA' -p77 -sg30 -g6 -sg31 -(dp78 -g33 -(dp79 -g35 -S'NC_000014.8:g.62188231_62188232delinsGA' -p80 -sg37 -(dp81 -g39 -g40 -sg41 -S'TT' -p82 -sg43 -S'62188231' -p83 -sg45 -S'GA' -p84 -sssg47 -(dp85 -g35 -S'NC_000014.9:g.61721513_61721514delinsGA' -p86 -sg37 -(dp87 -g39 -g40 -sg41 -S'TT' -p88 -sg43 -S'61721513' -p89 -sg45 -g84 -sssg53 -(dp90 -g35 -S'NC_000014.8:g.62188231_62188232delinsGA' -p91 -sg37 -(dp92 -g39 -g57 -sg41 -S'TT' -p93 -sg43 -S'62188231' -p94 -sg45 -g84 -sssg60 -(dp95 -g35 -S'NC_000014.9:g.61721513_61721514delinsGA' -p96 -sg37 -(dp97 -g39 -g57 -sg41 -S'TT' -p98 -sg43 -S'61721513' -p99 -sg45 -g84 -sssssS'NR_144368.1:n.214-4497_214-4496delinsTC' -p100 -(dp101 -g5 -g6 -sg7 -(lp102 -S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' -p103 -aS'RefSeqGene record not available' -p104 -asg11 -g6 -sg12 -(lp105 -sg14 -VHomo sapiens uncharacterized LOC105370526 (LOC105370526), long non-coding RNA -p106 -sg16 -S'LOC105370526' -p107 -sg18 -(dp108 -g20 -S'Non-coding :n.' -p109 -sg22 -g6 -ssg24 -g25 -sg26 -S'NC_000014.8(NR_144368.1):c.214-4497_214-4496delinsTC' -p110 -sg27 -g6 -sg28 -S'NR_144368.1:n.214-4497_214-4496delinsTC' -p111 -sg30 -g6 -sg31 -(dp112 -g33 -(dp113 -g35 -S'NC_000014.8:g.62188231_62188232delinsGA' -p114 -sg37 -(dp115 -g39 -g40 -sg41 -S'TT' -p116 -sg43 -S'62188231' -p117 -sg45 -VGA -p118 -sssg47 -(dp119 -g35 -S'NC_000014.9:g.61721513_61721514delinsGA' -p120 -sg37 -(dp121 -g39 -g40 -sg41 -S'TT' -p122 -sg43 -S'61721513' -p123 -sg45 -VGA -p124 -sssg53 -(dp125 -g35 -S'NC_000014.8:g.62188231_62188232delinsGA' -p126 -sg37 -(dp127 -g39 -g57 -sg41 -S'TT' -p128 -sg43 -S'62188231' -p129 -sg45 -g118 -sssg60 -(dp130 -g35 -S'NC_000014.9:g.61721513_61721514delinsGA' -p131 -sg37 -(dp132 -g39 -g57 -sg41 -S'TT' -p133 -sg43 -S'61721513' -p134 -sg45 -g124 -sssssS'NM_001530.3:c.231_232delinsGA' -p135 -(dp136 -g5 -g6 -sg7 -(lp137 -S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' -p138 -asg11 -g6 -sg12 -(lp139 -sg14 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 1, mRNA -p140 -sg16 -S'HIF1A' -p141 -sg18 -(dp142 -g20 -S'NP_001521.1:p.(Asp77_Leu78delinsGluMet)' -p143 -sg22 -S'NP_001521.1:p.(D77_L78delinsEM)' -p144 -ssg24 -g25 -sg26 -g6 -sg27 -g6 -sg28 -S'NM_001530.3:c.231_232delinsGA' -p145 -sg30 -S'NG_029606.1:g.31113_31114delinsGA' -p146 -sg31 -(dp147 -g33 -(dp148 -g35 -S'NC_000014.8:g.62188231_62188232delinsGA' -p149 -sg37 -(dp150 -g39 -g40 -sg41 -S'TT' -p151 -sg43 -S'62188231' -p152 -sg45 -S'GA' -p153 -sssg47 -(dp154 -g35 -S'NC_000014.9:g.61721513_61721514delinsGA' -p155 -sg37 -(dp156 -g39 -g40 -sg41 -S'TT' -p157 -sg43 -S'61721513' -p158 -sg45 -g153 -sssg53 -(dp159 -g35 -S'NC_000014.8:g.62188231_62188232delinsGA' -p160 -sg37 -(dp161 -g39 -g57 -sg41 -S'TT' -p162 -sg43 -S'62188231' -p163 -sg45 -g153 -sssg60 -(dp164 -g35 -S'NC_000014.9:g.61721513_61721514delinsGA' -p165 -sg37 -(dp166 -g39 -g57 -sg41 -S'TT' -p167 -sg43 -S'61721513' -p168 -sg45 -g153 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant225.txt b/VariantValidator/testing/testOutputsMasterITS/variant225.txt deleted file mode 100644 index 73f16d2f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant225.txt +++ /dev/null @@ -1,401 +0,0 @@ -(dp0 -S'NM_172375.1:c.*333G>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_172375.1 is available (NM_172375.2)' -p7 -aS'NM_172375.2:c.*333G>T MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_172375.2:c.*333G>T' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens potassium voltage-gated channel, subfamily H (eag-related), member 5 (KCNH5), transcript variant 3, mRNA -p15 -sS'gene_symbol' -p16 -S'KCNH5' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_758963.1:p.?' -p21 -sS'slr' -p22 -S'NP_758963.1:p.?' -p23 -ssS'submitted_variant' -p24 -S'14-63174827-C-A' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'HGVS_LRG_variant' -p27 -g4 -sS'HGVS_transcript_variant' -p28 -S'NM_172375.1:c.*333G>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000014.8:g.63174827C>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr14' -p40 -sS'ref' -p41 -VC -p42 -sS'pos' -p43 -S'63174827' -p44 -sS'alt' -p45 -VA -p46 -sssS'GRCh37' -p47 -(dp48 -g35 -S'NC_000014.8:g.63174827C>A' -p49 -sg37 -(dp50 -g39 -S'14' -p51 -sg41 -g42 -sg43 -S'63174827' -p52 -sg45 -g46 -sssssS'flag' -p53 -S'gene_variant' -p54 -sS'NM_139318.3:c.2366G>T' -p55 -(dp56 -g3 -g4 -sg5 -(lp57 -S'A more recent version of the selected reference sequence NM_139318.3 is available (NM_139318.4)' -p58 -aS'NM_139318.4:c.2366G>T MUST be fully validated prior to use in reports' -p59 -aS'select_variants=NM_139318.4:c.2366G>T' -p60 -aS'RefSeqGene record not available' -p61 -asg11 -g4 -sg12 -(lp62 -sg14 -VHomo sapiens potassium voltage-gated channel, subfamily H (eag-related), member 5 (KCNH5), transcript variant 1, mRNA -p63 -sg16 -S'KCNH5' -p64 -sg18 -(dp65 -g20 -S'NP_647479.2:p.(Gly789Val)' -p66 -sg22 -S'NP_647479.2:p.(G789V)' -p67 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_139318.3:c.2366G>T' -p68 -sg30 -g4 -sg31 -(dp69 -g33 -(dp70 -g35 -S'NC_000014.8:g.63174827C>A' -p71 -sg37 -(dp72 -g39 -g40 -sg41 -g42 -sg43 -S'63174827' -p73 -sg45 -g46 -sssg47 -(dp74 -g35 -S'NC_000014.8:g.63174827C>A' -p75 -sg37 -(dp76 -g39 -g51 -sg41 -g42 -sg43 -S'63174827' -p77 -sg45 -g46 -sssssS'NM_172375.2:c.*333G>T' -p78 -(dp79 -g3 -g4 -sg5 -(lp80 -S'RefSeqGene record not available' -p81 -asg11 -g4 -sg12 -(lp82 -sg14 -VHomo sapiens potassium voltage-gated channel subfamily H member 5 (KCNH5), transcript variant 3, mRNA -p83 -sg16 -S'KCNH5' -p84 -sg18 -(dp85 -g20 -S'NP_758963.1:p.?' -p86 -sg22 -S'NP_758963.1:p.?' -p87 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_172375.2:c.*333G>T' -p88 -sg30 -g4 -sg31 -(dp89 -g33 -(dp90 -g35 -S'NC_000014.8:g.63174827C>A' -p91 -sg37 -(dp92 -g39 -g40 -sg41 -g42 -sg43 -S'63174827' -p93 -sg45 -g46 -sssS'hg38' -p94 -(dp95 -g35 -S'NC_000014.9:g.62708109C>A' -p96 -sg37 -(dp97 -g39 -g40 -sg41 -g42 -sg43 -S'62708109' -p98 -sg45 -g46 -sssg47 -(dp99 -g35 -S'NC_000014.8:g.63174827C>A' -p100 -sg37 -(dp101 -g39 -g51 -sg41 -g42 -sg43 -S'63174827' -p102 -sg45 -g46 -sssS'GRCh38' -p103 -(dp104 -g35 -S'NC_000014.9:g.62708109C>A' -p105 -sg37 -(dp106 -g39 -g51 -sg41 -g42 -sg43 -S'62708109' -p107 -sg45 -g46 -sssssS'NM_139318.4:c.2366G>T' -p108 -(dp109 -g3 -g4 -sg5 -(lp110 -S'RefSeqGene record not available' -p111 -asg11 -g4 -sg12 -(lp112 -sg14 -VHomo sapiens potassium voltage-gated channel subfamily H member 5 (KCNH5), transcript variant 1, mRNA -p113 -sg16 -S'KCNH5' -p114 -sg18 -(dp115 -g20 -S'NP_647479.2:p.(Gly789Val)' -p116 -sg22 -S'NP_647479.2:p.(G789V)' -p117 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_139318.4:c.2366G>T' -p118 -sg30 -g4 -sg31 -(dp119 -g33 -(dp120 -g35 -S'NC_000014.8:g.63174827C>A' -p121 -sg37 -(dp122 -g39 -g40 -sg41 -g42 -sg43 -S'63174827' -p123 -sg45 -g46 -sssg94 -(dp124 -g35 -S'NC_000014.9:g.62708109C>A' -p125 -sg37 -(dp126 -g39 -g40 -sg41 -g42 -sg43 -S'62708109' -p127 -sg45 -g46 -sssg47 -(dp128 -g35 -S'NC_000014.8:g.63174827C>A' -p129 -sg37 -(dp130 -g39 -g51 -sg41 -g42 -sg43 -S'63174827' -p131 -sg45 -g46 -sssg103 -(dp132 -g35 -S'NC_000014.9:g.62708109C>A' -p133 -sg37 -(dp134 -g39 -g51 -sg41 -g42 -sg43 -S'62708109' -p135 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant226.txt b/VariantValidator/testing/testOutputsMasterITS/variant226.txt deleted file mode 100644 index 962bbb4f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant226.txt +++ /dev/null @@ -1,369 +0,0 @@ -(dp0 -S'NM_000070.2:c.550del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_849t1:c.550del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.42680000CA>C automapped to NC_000015.9:g.42680002delA' -p7 -aS'The current status of LRG_849 is pending therefore changes may be made to the LRG reference sequence' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'CAPN3' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000061.1(LRG_849p1):p.(Thr184ArgfsTer36)' -p20 -sS'slr' -p21 -S'NP_000061.1:p.(T184Rfs*36)' -p22 -ssS'submitted_variant' -p23 -S'15-42680000-CA-C' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_849:g.44702del' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000070.2:c.550del' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_008660.1:g.44702del' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'GRCh38' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000015.10:g.42387804del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'15' -p41 -sS'ref' -p42 -S'CA' -p43 -sS'pos' -p44 -S'42387802' -p45 -sS'alt' -p46 -S'C' -p47 -sssS'GRCh37' -p48 -(dp49 -g36 -S'NC_000015.9:g.42680002del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'CA' -p52 -sg44 -S'42680000' -p53 -sg46 -g47 -sssS'hg38' -p54 -(dp55 -g36 -S'NC_000015.10:g.42387804del' -p56 -sg38 -(dp57 -g40 -S'chr15' -p58 -sg42 -S'CA' -p59 -sg44 -S'42387802' -p60 -sg46 -g47 -sssS'hg19' -p61 -(dp62 -g36 -S'NC_000015.9:g.42680002del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'CA' -p65 -sg44 -S'42680000' -p66 -sg46 -g47 -sssssS'flag' -p67 -S'gene_variant' -p68 -sS'NM_024344.1:c.550del' -p69 -(dp70 -g3 -g10 -sg5 -(lp71 -S'NC_000015.9:g.42680000CA>C automapped to NC_000015.9:g.42680002delA' -p72 -aS'RefSeqGene record not available' -p73 -asg9 -g10 -sg11 -(lp74 -sg13 -VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA -p75 -sg15 -S'CAPN3' -p76 -sg17 -(dp77 -g19 -S'NP_077320.1:p.(Thr184ArgfsTer36)' -p78 -sg21 -S'NP_077320.1:p.(T184Rfs*36)' -p79 -ssg23 -g24 -sg25 -g10 -sg26 -g10 -sg28 -S'NM_024344.1:c.550del' -p80 -sg30 -g10 -sg32 -(dp81 -g34 -(dp82 -g36 -S'NC_000015.10:g.42387804del' -p83 -sg38 -(dp84 -g40 -g41 -sg42 -S'CA' -p85 -sg44 -S'42387802' -p86 -sg46 -g47 -sssg48 -(dp87 -g36 -S'NC_000015.9:g.42680002del' -p88 -sg38 -(dp89 -g40 -g41 -sg42 -S'CA' -p90 -sg44 -S'42680000' -p91 -sg46 -g47 -sssg54 -(dp92 -g36 -S'NC_000015.10:g.42387804del' -p93 -sg38 -(dp94 -g40 -g58 -sg42 -S'CA' -p95 -sg44 -S'42387802' -p96 -sg46 -g47 -sssg61 -(dp97 -g36 -S'NC_000015.9:g.42680002del' -p98 -sg38 -(dp99 -g40 -g58 -sg42 -S'CA' -p100 -sg44 -S'42680000' -p101 -sg46 -g47 -sssssS'NM_173087.1:c.550del' -p102 -(dp103 -g3 -g10 -sg5 -(lp104 -S'NC_000015.9:g.42680000CA>C automapped to NC_000015.9:g.42680002delA' -p105 -aS'RefSeqGene record not available' -p106 -asg9 -g10 -sg11 -(lp107 -sg13 -VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA -p108 -sg15 -S'CAPN3' -p109 -sg17 -(dp110 -g19 -S'NP_775110.1:p.(Thr184ArgfsTer36)' -p111 -sg21 -S'NP_775110.1:p.(T184Rfs*36)' -p112 -ssg23 -g24 -sg25 -g10 -sg26 -g10 -sg28 -S'NM_173087.1:c.550del' -p113 -sg30 -g10 -sg32 -(dp114 -g34 -(dp115 -g36 -S'NC_000015.10:g.42387804del' -p116 -sg38 -(dp117 -g40 -g41 -sg42 -S'CA' -p118 -sg44 -S'42387802' -p119 -sg46 -g47 -sssg48 -(dp120 -g36 -S'NC_000015.9:g.42680002del' -p121 -sg38 -(dp122 -g40 -g41 -sg42 -S'CA' -p123 -sg44 -S'42680000' -p124 -sg46 -g47 -sssg54 -(dp125 -g36 -S'NC_000015.10:g.42387804del' -p126 -sg38 -(dp127 -g40 -g58 -sg42 -S'CA' -p128 -sg44 -S'42387802' -p129 -sg46 -g47 -sssg61 -(dp130 -g36 -S'NC_000015.9:g.42680002del' -p131 -sg38 -(dp132 -g40 -g58 -sg42 -S'CA' -p133 -sg44 -S'42680000' -p134 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant227.txt b/VariantValidator/testing/testOutputsMasterITS/variant227.txt deleted file mode 100644 index 370c87b2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant227.txt +++ /dev/null @@ -1,369 +0,0 @@ -(dp0 -S'NM_024344.1:c.550dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.42680000CA>CAA automapped to NC_000015.9:g.42680002dupA' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'CAPN3' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_077320.1:p.(Thr184AsnfsTer16)' -p19 -sS'slr' -p20 -S'NP_077320.1:p.(T184Nfs*16)' -p21 -ssS'submitted_variant' -p22 -S'15-42680000-CA-CAA' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_024344.1:c.550dup' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'GRCh38' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000015.10:g.42387804dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'15' -p38 -sS'ref' -p39 -S'A' -p40 -sS'pos' -p41 -S'42387803' -p42 -sS'alt' -p43 -S'AA' -p44 -sssS'GRCh37' -p45 -(dp46 -g33 -S'NC_000015.9:g.42680002dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'42680001' -p49 -sg43 -S'AA' -p50 -sssS'hg38' -p51 -(dp52 -g33 -S'NC_000015.10:g.42387804dup' -p53 -sg35 -(dp54 -g37 -S'chr15' -p55 -sg39 -g40 -sg41 -S'42387803' -p56 -sg43 -S'AA' -p57 -sssS'hg19' -p58 -(dp59 -g33 -S'NC_000015.9:g.42680002dup' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'42680001' -p62 -sg43 -S'AA' -p63 -sssssS'NM_173087.1:c.550dup' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'NC_000015.9:g.42680000CA>CAA automapped to NC_000015.9:g.42680002dupA' -p67 -aS'RefSeqGene record not available' -p68 -asg9 -g4 -sg10 -(lp69 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA -p70 -sg14 -S'CAPN3' -p71 -sg16 -(dp72 -g18 -S'NP_775110.1:p.(Thr184AsnfsTer16)' -p73 -sg20 -S'NP_775110.1:p.(T184Nfs*16)' -p74 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173087.1:c.550dup' -p75 -sg28 -g4 -sg29 -(dp76 -g31 -(dp77 -g33 -S'NC_000015.10:g.42387804dup' -p78 -sg35 -(dp79 -g37 -g38 -sg39 -g40 -sg41 -S'42387803' -p80 -sg43 -S'AA' -p81 -sssg45 -(dp82 -g33 -S'NC_000015.9:g.42680002dup' -p83 -sg35 -(dp84 -g37 -g38 -sg39 -g40 -sg41 -S'42680001' -p85 -sg43 -S'AA' -p86 -sssg51 -(dp87 -g33 -S'NC_000015.10:g.42387804dup' -p88 -sg35 -(dp89 -g37 -g55 -sg39 -g40 -sg41 -S'42387803' -p90 -sg43 -S'AA' -p91 -sssg58 -(dp92 -g33 -S'NC_000015.9:g.42680002dup' -p93 -sg35 -(dp94 -g37 -g55 -sg39 -g40 -sg41 -S'42680001' -p95 -sg43 -S'AA' -p96 -sssssS'flag' -p97 -S'gene_variant' -p98 -sS'NM_000070.2:c.550dup' -p99 -(dp100 -g3 -S'LRG_849t1:c.550dup' -p101 -sg5 -(lp102 -S'NC_000015.9:g.42680000CA>CAA automapped to NC_000015.9:g.42680002dupA' -p103 -aS'The current status of LRG_849 is pending therefore changes may be made to the LRG reference sequence' -p104 -asg9 -g4 -sg10 -(lp105 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA -p106 -sg14 -S'CAPN3' -p107 -sg16 -(dp108 -g18 -S'NP_000061.1(LRG_849p1):p.(Thr184AsnfsTer16)' -p109 -sg20 -S'NP_000061.1:p.(T184Nfs*16)' -p110 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_849:g.44702dup' -p111 -sg26 -S'NM_000070.2:c.550dup' -p112 -sg28 -S'NG_008660.1:g.44702dup' -p113 -sg29 -(dp114 -g31 -(dp115 -g33 -S'NC_000015.10:g.42387804dup' -p116 -sg35 -(dp117 -g37 -g38 -sg39 -g40 -sg41 -S'42387803' -p118 -sg43 -S'AA' -p119 -sssg45 -(dp120 -g33 -S'NC_000015.9:g.42680002dup' -p121 -sg35 -(dp122 -g37 -g38 -sg39 -g40 -sg41 -S'42680001' -p123 -sg43 -S'AA' -p124 -sssg51 -(dp125 -g33 -S'NC_000015.10:g.42387804dup' -p126 -sg35 -(dp127 -g37 -g55 -sg39 -g40 -sg41 -S'42387803' -p128 -sg43 -S'AA' -p129 -sssg58 -(dp130 -g33 -S'NC_000015.9:g.42680002dup' -p131 -sg35 -(dp132 -g37 -g55 -sg39 -g40 -sg41 -S'42680001' -p133 -sg43 -S'AA' -p134 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant228.txt b/VariantValidator/testing/testOutputsMasterITS/variant228.txt deleted file mode 100644 index fda89ee6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant228.txt +++ /dev/null @@ -1,699 +0,0 @@ -(dp0 -S'NM_173088.1:c.825_826insTCA' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 4, mRNA -p13 -sS'gene_symbol' -p14 -S'CAPN3' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_775111.1:p.(Val275_Arg276insSer)' -p19 -sS'slr' -p20 -S'NP_775111.1:p.(V275_R276insS)' -p21 -ssS'submitted_variant' -p22 -S'15-42703179-T-TTCA' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_173088.1:c.825_826insTCA' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'GRCh38' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000015.10:g.42410981_42410982insTCA' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'15' -p38 -sS'ref' -p39 -S'T' -p40 -sS'pos' -p41 -S'42410981' -p42 -sS'alt' -p43 -S'TTCA' -p44 -sssS'GRCh37' -p45 -(dp46 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p49 -sg43 -S'TTCA' -p50 -sssS'hg38' -p51 -(dp52 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p53 -sg35 -(dp54 -g37 -S'chr15' -p55 -sg39 -g40 -sg41 -S'42410981' -p56 -sg43 -S'TTCA' -p57 -sssS'hg19' -p58 -(dp59 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p62 -sg43 -S'TTCA' -p63 -sssssS'NM_173090.1:c.366_367insTCA' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p67 -aS'RefSeqGene record not available' -p68 -asg9 -g4 -sg10 -(lp69 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 6, mRNA -p70 -sg14 -S'CAPN3' -p71 -sg16 -(dp72 -g18 -S'NP_775113.1:p.(Val122_Arg123insSer)' -p73 -sg20 -S'NP_775113.1:p.(V122_R123insS)' -p74 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173090.1:c.366_367insTCA' -p75 -sg28 -g4 -sg29 -(dp76 -g31 -(dp77 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p78 -sg35 -(dp79 -g37 -g38 -sg39 -g40 -sg41 -S'42410981' -p80 -sg43 -S'TTCA' -p81 -sssg45 -(dp82 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p83 -sg35 -(dp84 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p85 -sg43 -S'TTCA' -p86 -sssg51 -(dp87 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p88 -sg35 -(dp89 -g37 -g55 -sg39 -g40 -sg41 -S'42410981' -p90 -sg43 -S'TTCA' -p91 -sssg58 -(dp92 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p93 -sg35 -(dp94 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p95 -sg43 -S'TTCA' -p96 -sssssS'NM_173089.1:c.366_367insTCA' -p97 -(dp98 -g3 -g4 -sg5 -(lp99 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p100 -aS'RefSeqGene record not available' -p101 -asg9 -g4 -sg10 -(lp102 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 5, mRNA -p103 -sg14 -S'CAPN3' -p104 -sg16 -(dp105 -g18 -S'NP_775112.1:p.(Val122_Arg123insSer)' -p106 -sg20 -S'NP_775112.1:p.(V122_R123insS)' -p107 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173089.1:c.366_367insTCA' -p108 -sg28 -g4 -sg29 -(dp109 -g31 -(dp110 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p111 -sg35 -(dp112 -g37 -g38 -sg39 -g40 -sg41 -S'42410981' -p113 -sg43 -S'TTCA' -p114 -sssg45 -(dp115 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p116 -sg35 -(dp117 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p118 -sg43 -S'TTCA' -p119 -sssg51 -(dp120 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p121 -sg35 -(dp122 -g37 -g55 -sg39 -g40 -sg41 -S'42410981' -p123 -sg43 -S'TTCA' -p124 -sssg58 -(dp125 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p126 -sg35 -(dp127 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p128 -sg43 -S'TTCA' -p129 -sssssS'NM_173087.1:c.2085_2086insTCA' -p130 -(dp131 -g3 -g4 -sg5 -(lp132 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p133 -aS'RefSeqGene record not available' -p134 -asg9 -g4 -sg10 -(lp135 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA -p136 -sg14 -S'CAPN3' -p137 -sg16 -(dp138 -g18 -S'NP_775110.1:p.(Val695_Arg696insSer)' -p139 -sg20 -S'NP_775110.1:p.(V695_R696insS)' -p140 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173087.1:c.2085_2086insTCA' -p141 -sg28 -g4 -sg29 -(dp142 -g31 -(dp143 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p144 -sg35 -(dp145 -g37 -g38 -sg39 -g40 -sg41 -S'42410981' -p146 -sg43 -S'TTCA' -p147 -sssg45 -(dp148 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p149 -sg35 -(dp150 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p151 -sg43 -S'TTCA' -p152 -sssg51 -(dp153 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p154 -sg35 -(dp155 -g37 -g55 -sg39 -g40 -sg41 -S'42410981' -p156 -sg43 -S'TTCA' -p157 -sssg58 -(dp158 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p159 -sg35 -(dp160 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p161 -sg43 -S'TTCA' -p162 -sssssS'flag' -p163 -S'gene_variant' -p164 -sS'NM_000070.2:c.2361_2362insTCA' -p165 -(dp166 -g3 -S'LRG_849t1:c.2361_2362insTCA' -p167 -sg5 -(lp168 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p169 -aS'The current status of LRG_849 is pending therefore changes may be made to the LRG reference sequence' -p170 -asg9 -g4 -sg10 -(lp171 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA -p172 -sg14 -S'CAPN3' -p173 -sg16 -(dp174 -g18 -S'NP_000061.1(LRG_849p1):p.(Val787_Arg788insSer)' -p175 -sg20 -S'NP_000061.1:p.(V787_R788insS)' -p176 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_849:g.67879_67880insTCA' -p177 -sg26 -S'NM_000070.2:c.2361_2362insTCA' -p178 -sg28 -S'NG_008660.1:g.67879_67880insTCA' -p179 -sg29 -(dp180 -g31 -(dp181 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p182 -sg35 -(dp183 -g37 -g38 -sg39 -g40 -sg41 -S'42410981' -p184 -sg43 -S'TTCA' -p185 -sssg45 -(dp186 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p187 -sg35 -(dp188 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p189 -sg43 -S'TTCA' -p190 -sssg51 -(dp191 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p192 -sg35 -(dp193 -g37 -g55 -sg39 -g40 -sg41 -S'42410981' -p194 -sg43 -S'TTCA' -p195 -sssg58 -(dp196 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p197 -sg35 -(dp198 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p199 -sg43 -S'TTCA' -p200 -sssssS'NM_024344.1:c.2343_2344insTCA' -p201 -(dp202 -g3 -g4 -sg5 -(lp203 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p204 -aS'RefSeqGene record not available' -p205 -asg9 -g4 -sg10 -(lp206 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA -p207 -sg14 -S'CAPN3' -p208 -sg16 -(dp209 -g18 -S'NP_077320.1:p.(Val781_Arg782insSer)' -p210 -sg20 -S'NP_077320.1:p.(V781_R782insS)' -p211 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_024344.1:c.2343_2344insTCA' -p212 -sg28 -g4 -sg29 -(dp213 -g31 -(dp214 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p215 -sg35 -(dp216 -g37 -g38 -sg39 -g40 -sg41 -S'42410981' -p217 -sg43 -S'TTCA' -p218 -sssg45 -(dp219 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p220 -sg35 -(dp221 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p222 -sg43 -S'TTCA' -p223 -sssg51 -(dp224 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p225 -sg35 -(dp226 -g37 -g55 -sg39 -g40 -sg41 -S'42410981' -p227 -sg43 -S'TTCA' -p228 -sssg58 -(dp229 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p230 -sg35 -(dp231 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p232 -sg43 -S'TTCA' -p233 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant229.txt b/VariantValidator/testing/testOutputsMasterITS/variant229.txt deleted file mode 100644 index 0235dfbd..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant229.txt +++ /dev/null @@ -1,704 +0,0 @@ -(dp0 -S'NM_024344.1:c.2344_2345delinsTCATCT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'CAPN3' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_077320.1:p.(Arg782SerfsTer14)' -p19 -sS'slr' -p20 -S'NP_077320.1:p.(R782Sfs*14)' -p21 -ssS'submitted_variant' -p22 -S'15-42703179-TAG-TTCATCT' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_024344.1:c.2344_2345delinsTCATCT' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'GRCh38' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'15' -p38 -sS'ref' -p39 -S'AG' -p40 -sS'pos' -p41 -S'42410982' -p42 -sS'alt' -p43 -S'TCATCT' -p44 -sssS'GRCh37' -p45 -(dp46 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'AG' -p49 -sg41 -S'42703180' -p50 -sg43 -g44 -sssS'hg38' -p51 -(dp52 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p53 -sg35 -(dp54 -g37 -S'chr15' -p55 -sg39 -S'AG' -p56 -sg41 -S'42410982' -p57 -sg43 -g44 -sssS'hg19' -p58 -(dp59 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'AG' -p62 -sg41 -S'42703180' -p63 -sg43 -g44 -sssssS'NM_173090.1:c.367_368delinsTCATCT' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p67 -aS'RefSeqGene record not available' -p68 -asg9 -g4 -sg10 -(lp69 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 6, mRNA -p70 -sg14 -S'CAPN3' -p71 -sg16 -(dp72 -g18 -S'NP_775113.1:p.(Arg123SerfsTer14)' -p73 -sg20 -S'NP_775113.1:p.(R123Sfs*14)' -p74 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173090.1:c.367_368delinsTCATCT' -p75 -sg28 -g4 -sg29 -(dp76 -g31 -(dp77 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p78 -sg35 -(dp79 -g37 -g38 -sg39 -S'AG' -p80 -sg41 -S'42410982' -p81 -sg43 -S'TCATCT' -p82 -sssg45 -(dp83 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p84 -sg35 -(dp85 -g37 -g38 -sg39 -S'AG' -p86 -sg41 -S'42703180' -p87 -sg43 -g82 -sssg51 -(dp88 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p89 -sg35 -(dp90 -g37 -g55 -sg39 -S'AG' -p91 -sg41 -S'42410982' -p92 -sg43 -g82 -sssg58 -(dp93 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p94 -sg35 -(dp95 -g37 -g55 -sg39 -S'AG' -p96 -sg41 -S'42703180' -p97 -sg43 -g82 -sssssS'flag' -p98 -S'gene_variant' -p99 -sS'NM_000070.2:c.2362_2363delinsTCATCT' -p100 -(dp101 -g3 -S'LRG_849t1:c.2362_2363delinsTCATCT' -p102 -sg5 -(lp103 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p104 -aS'The current status of LRG_849 is pending therefore changes may be made to the LRG reference sequence' -p105 -asg9 -g4 -sg10 -(lp106 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA -p107 -sg14 -S'CAPN3' -p108 -sg16 -(dp109 -g18 -S'NP_000061.1(LRG_849p1):p.(Arg788SerfsTer14)' -p110 -sg20 -S'NP_000061.1:p.(R788Sfs*14)' -p111 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_849:g.67880_67881delinsTCATCT' -p112 -sg26 -S'NM_000070.2:c.2362_2363delinsTCATCT' -p113 -sg28 -S'NG_008660.1:g.67880_67881delinsTCATCT' -p114 -sg29 -(dp115 -g31 -(dp116 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p117 -sg35 -(dp118 -g37 -g38 -sg39 -S'AG' -p119 -sg41 -S'42410982' -p120 -sg43 -S'TCATCT' -p121 -sssg45 -(dp122 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p123 -sg35 -(dp124 -g37 -g38 -sg39 -S'AG' -p125 -sg41 -S'42703180' -p126 -sg43 -g121 -sssg51 -(dp127 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p128 -sg35 -(dp129 -g37 -g55 -sg39 -S'AG' -p130 -sg41 -S'42410982' -p131 -sg43 -g121 -sssg58 -(dp132 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p133 -sg35 -(dp134 -g37 -g55 -sg39 -S'AG' -p135 -sg41 -S'42703180' -p136 -sg43 -g121 -sssssS'NM_173088.1:c.826_827delinsTCATCT' -p137 -(dp138 -g3 -g4 -sg5 -(lp139 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p140 -aS'RefSeqGene record not available' -p141 -asg9 -g4 -sg10 -(lp142 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 4, mRNA -p143 -sg14 -S'CAPN3' -p144 -sg16 -(dp145 -g18 -S'NP_775111.1:p.(Arg276SerfsTer14)' -p146 -sg20 -S'NP_775111.1:p.(R276Sfs*14)' -p147 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173088.1:c.826_827delinsTCATCT' -p148 -sg28 -g4 -sg29 -(dp149 -g31 -(dp150 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p151 -sg35 -(dp152 -g37 -g38 -sg39 -S'AG' -p153 -sg41 -S'42410982' -p154 -sg43 -S'TCATCT' -p155 -sssg45 -(dp156 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p157 -sg35 -(dp158 -g37 -g38 -sg39 -S'AG' -p159 -sg41 -S'42703180' -p160 -sg43 -g155 -sssg51 -(dp161 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p162 -sg35 -(dp163 -g37 -g55 -sg39 -S'AG' -p164 -sg41 -S'42410982' -p165 -sg43 -g155 -sssg58 -(dp166 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p167 -sg35 -(dp168 -g37 -g55 -sg39 -S'AG' -p169 -sg41 -S'42703180' -p170 -sg43 -g155 -sssssS'NM_173089.1:c.367_368delinsTCATCT' -p171 -(dp172 -g3 -g4 -sg5 -(lp173 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p174 -aS'RefSeqGene record not available' -p175 -asg9 -g4 -sg10 -(lp176 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 5, mRNA -p177 -sg14 -S'CAPN3' -p178 -sg16 -(dp179 -g18 -S'NP_775112.1:p.(Arg123SerfsTer14)' -p180 -sg20 -S'NP_775112.1:p.(R123Sfs*14)' -p181 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173089.1:c.367_368delinsTCATCT' -p182 -sg28 -g4 -sg29 -(dp183 -g31 -(dp184 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p185 -sg35 -(dp186 -g37 -g38 -sg39 -S'AG' -p187 -sg41 -S'42410982' -p188 -sg43 -S'TCATCT' -p189 -sssg45 -(dp190 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p191 -sg35 -(dp192 -g37 -g38 -sg39 -S'AG' -p193 -sg41 -S'42703180' -p194 -sg43 -g189 -sssg51 -(dp195 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p196 -sg35 -(dp197 -g37 -g55 -sg39 -S'AG' -p198 -sg41 -S'42410982' -p199 -sg43 -g189 -sssg58 -(dp200 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p201 -sg35 -(dp202 -g37 -g55 -sg39 -S'AG' -p203 -sg41 -S'42703180' -p204 -sg43 -g189 -sssssS'NM_173087.1:c.2086_2087delinsTCATCT' -p205 -(dp206 -g3 -g4 -sg5 -(lp207 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p208 -aS'RefSeqGene record not available' -p209 -asg9 -g4 -sg10 -(lp210 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA -p211 -sg14 -S'CAPN3' -p212 -sg16 -(dp213 -g18 -S'NP_775110.1:p.(Arg696SerfsTer14)' -p214 -sg20 -S'NP_775110.1:p.(R696Sfs*14)' -p215 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173087.1:c.2086_2087delinsTCATCT' -p216 -sg28 -g4 -sg29 -(dp217 -g31 -(dp218 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p219 -sg35 -(dp220 -g37 -g38 -sg39 -S'AG' -p221 -sg41 -S'42410982' -p222 -sg43 -S'TCATCT' -p223 -sssg45 -(dp224 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p225 -sg35 -(dp226 -g37 -g38 -sg39 -S'AG' -p227 -sg41 -S'42703180' -p228 -sg43 -g223 -sssg51 -(dp229 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p230 -sg35 -(dp231 -g37 -g55 -sg39 -S'AG' -p232 -sg41 -S'42410982' -p233 -sg43 -g223 -sssg58 -(dp234 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p235 -sg35 -(dp236 -g37 -g55 -sg39 -S'AG' -p237 -sg41 -S'42703180' -p238 -sg43 -g223 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant23.txt b/VariantValidator/testing/testOutputsMasterITS/variant23.txt deleted file mode 100644 index c283393a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant23.txt +++ /dev/null @@ -1,60 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' -p7 -aS'Instead use NC_000011.9:g.5244828_5248381=' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_000518.4:c.-130_*2000=' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'flag' -p26 -S'warning' -p27 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant230.txt b/VariantValidator/testing/testOutputsMasterITS/variant230.txt deleted file mode 100644 index bb56c5b9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant230.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'NM_000138.4:c.2927G>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_778t1:c.2927G>A' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -S'' -p8 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens fibrillin 1 (FBN1), mRNA -p12 -sS'gene_symbol' -p13 -S'FBN1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000129.3(LRG_778p1):p.(Arg976His)' -p18 -sS'slr' -p19 -S'NP_000129.3:p.(R976H)' -p20 -ssS'submitted_variant' -p21 -S'15-48782203-C-T' -p22 -sS'genome_context_intronic_sequence' -p23 -g8 -sS'HGVS_LRG_variant' -p24 -S'LRG_778:g.160783G>A' -p25 -sS'HGVS_transcript_variant' -p26 -S'NM_000138.4:c.2927G>A' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_008805.2:g.160783G>A' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'GRCh38' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000015.10:g.48490006C>T' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'15' -p39 -sS'ref' -p40 -VC -p41 -sS'pos' -p42 -S'48490006' -p43 -sS'alt' -p44 -VT -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000015.9:g.48782203C>T' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'48782203' -p50 -sg44 -g45 -sssS'hg38' -p51 -(dp52 -g34 -S'NC_000015.10:g.48490006C>T' -p53 -sg36 -(dp54 -g38 -S'chr15' -p55 -sg40 -g41 -sg42 -S'48490006' -p56 -sg44 -g45 -sssS'hg19' -p57 -(dp58 -g34 -S'NC_000015.9:g.48782203C>T' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'48782203' -p61 -sg44 -g45 -sssssS'flag' -p62 -S'gene_variant' -p63 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant231.txt b/VariantValidator/testing/testOutputsMasterITS/variant231.txt deleted file mode 100644 index e7f8d161..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant231.txt +++ /dev/null @@ -1,449 +0,0 @@ -(dp0 -S'NM_014249.2:c.946_949=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' -p7 -aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p8 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.2' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p12 -aS'NM_014249.3:c.946_949GACC= MUST be fully validated prior to use in reports' -p13 -aS'select_variants=NM_014249.3:c.946_949=' -p14 -aS'RefSeqGene record not available' -p15 -asS'RefSeqGene_context_intronic_sequence' -p16 -g4 -sS'alt_genomic_loci' -p17 -(lp18 -sS'transcript_description' -p19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p20 -sS'gene_symbol' -p21 -S'NR2E3' -p22 -sS'HGVS_predicted_protein_consequence' -p23 -(dp24 -S'tlr' -p25 -S'NP_055064.1:p.(Asp316=)' -p26 -sS'slr' -p27 -S'NP_055064.1:p.(D316=)' -p28 -ssS'submitted_variant' -p29 -S'15-72105929-CC-C' -p30 -sS'genome_context_intronic_sequence' -p31 -g4 -sS'HGVS_LRG_variant' -p32 -g4 -sS'HGVS_transcript_variant' -p33 -S'NM_014249.2:c.946_949=' -p34 -sS'HGVS_RefSeqGene_variant' -p35 -g4 -sS'primary_assembly_loci' -p36 -(dp37 -S'hg19' -p38 -(dp39 -S'HGVS_genomic_description' -p40 -S'NC_000015.9:g.72105933del' -p41 -sS'vcf' -p42 -(dp43 -S'chr' -p44 -S'chr15' -p45 -sS'ref' -p46 -S'AC' -p47 -sS'pos' -p48 -S'72105928' -p49 -sS'alt' -p50 -S'A' -p51 -sssS'GRCh37' -p52 -(dp53 -g40 -S'NC_000015.9:g.72105933del' -p54 -sg42 -(dp55 -g44 -S'15' -p56 -sg46 -S'AC' -p57 -sg48 -S'72105928' -p58 -sg50 -g51 -sssssS'flag' -p59 -S'gene_variant' -p60 -sS'NM_014249.3:c.946_949=' -p61 -(dp62 -g3 -g4 -sg5 -(lp63 -S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' -p64 -aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p65 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.3' -p66 -aS'Caution should be used when reporting the displayed variant descriptions' -p67 -aS'If you are unsure, please contact admin' -p68 -asg16 -g4 -sg17 -(lp69 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p70 -sg21 -S'NR2E3' -p71 -sg23 -(dp72 -g25 -S'NP_055064.1:p.(Asp316=)' -p73 -sg27 -S'NP_055064.1:p.(D316=)' -p74 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_014249.3:c.946_949=' -p75 -sg35 -S'NG_009113.1:g.8034_8037=' -p76 -sg36 -(dp77 -S'GRCh38' -p78 -(dp79 -g40 -S'NC_000015.10:g.71813586_71813591=' -p80 -sg42 -(dp81 -g44 -g56 -sg46 -VGGACCC -p82 -sg48 -S'71813586' -p83 -sg50 -g82 -sssg52 -(dp84 -g40 -S'NC_000015.9:g.72105933del' -p85 -sg42 -(dp86 -g44 -g56 -sg46 -S'AC' -p87 -sg48 -S'72105928' -p88 -sg50 -g51 -sssS'hg38' -p89 -(dp90 -g40 -S'NC_000015.10:g.71813586_71813591=' -p91 -sg42 -(dp92 -g44 -g45 -sg46 -g82 -sg48 -S'71813586' -p93 -sg50 -g82 -sssg38 -(dp94 -g40 -S'NC_000015.9:g.72105933del' -p95 -sg42 -(dp96 -g44 -g45 -sg46 -S'AC' -p97 -sg48 -S'72105928' -p98 -sg50 -g51 -sssssS'NM_016346.2:c.946_949=' -p99 -(dp100 -g3 -g4 -sg5 -(lp101 -S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' -p102 -aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p103 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.2' -p104 -aS'Caution should be used when reporting the displayed variant descriptions' -p105 -aS'If you are unsure, please contact admin' -p106 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p107 -aS'NM_016346.3:c.946_949GACC= MUST be fully validated prior to use in reports' -p108 -aS'select_variants=NM_016346.3:c.946_949=' -p109 -aS'RefSeqGene record not available' -p110 -asg16 -g4 -sg17 -(lp111 -sg19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p112 -sg21 -S'NR2E3' -p113 -sg23 -(dp114 -g25 -S'NP_057430.1:p.(Asp316=)' -p115 -sg27 -S'NP_057430.1:p.(D316=)' -p116 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.2:c.946_949=' -p117 -sg35 -g4 -sg36 -(dp118 -g38 -(dp119 -g40 -S'NC_000015.9:g.72105933del' -p120 -sg42 -(dp121 -g44 -g45 -sg46 -S'AC' -p122 -sg48 -S'72105928' -p123 -sg50 -g51 -sssg52 -(dp124 -g40 -S'NC_000015.9:g.72105933del' -p125 -sg42 -(dp126 -g44 -g56 -sg46 -S'AC' -p127 -sg48 -S'72105928' -p128 -sg50 -g51 -sssssS'NM_016346.3:c.946_949=' -p129 -(dp130 -g3 -g4 -sg5 -(lp131 -S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' -p132 -aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p133 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.3' -p134 -aS'Caution should be used when reporting the displayed variant descriptions' -p135 -aS'If you are unsure, please contact admin' -p136 -aS'RefSeqGene record not available' -p137 -asg16 -g4 -sg17 -(lp138 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p139 -sg21 -S'NR2E3' -p140 -sg23 -(dp141 -g25 -S'NP_057430.1:p.(Asp316=)' -p142 -sg27 -S'NP_057430.1:p.(D316=)' -p143 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.3:c.946_949=' -p144 -sg35 -g4 -sg36 -(dp145 -g78 -(dp146 -g40 -S'NC_000015.10:g.71813587_71813590=' -p147 -sg42 -(dp148 -g44 -g56 -sg46 -VGACC -p149 -sg48 -S'71813587' -p150 -sg50 -g149 -sssg52 -(dp151 -g40 -S'NC_000015.9:g.72105933del' -p152 -sg42 -(dp153 -g44 -g56 -sg46 -S'AC' -p154 -sg48 -S'72105928' -p155 -sg50 -g51 -sssg89 -(dp156 -g40 -S'NC_000015.10:g.71813587_71813590=' -p157 -sg42 -(dp158 -g44 -g45 -sg46 -g149 -sg48 -S'71813587' -p159 -sg50 -g149 -sssg38 -(dp160 -g40 -S'NC_000015.9:g.72105933del' -p161 -sg42 -(dp162 -g44 -g45 -sg46 -S'AC' -p163 -sg48 -S'72105928' -p164 -sg50 -g51 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant232.txt b/VariantValidator/testing/testOutputsMasterITS/variant232.txt deleted file mode 100644 index dbf5f693..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant232.txt +++ /dev/null @@ -1,245 +0,0 @@ -(dp0 -S'NM_002693.2:c.752C>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_765t1:c.752C>T' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -S'' -p8 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'POLG' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_002684.1(LRG_765p1):p.(Thr251Ile)' -p18 -sS'slr' -p19 -S'NP_002684.1:p.(T251I)' -p20 -ssS'submitted_variant' -p21 -S'15-89873415-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g8 -sS'HGVS_LRG_variant' -p24 -g8 -sS'HGVS_transcript_variant' -p25 -S'NM_002693.2:c.752C>T' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -S'NG_008218.1:g.9612C>T' -p28 -sS'primary_assembly_loci' -p29 -(dp30 -S'GRCh38' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000015.10:g.89330184G>A' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'15' -p38 -sS'ref' -p39 -VG -p40 -sS'pos' -p41 -S'89330184' -p42 -sS'alt' -p43 -VA -p44 -sssS'GRCh37' -p45 -(dp46 -g33 -S'NC_000015.9:g.89873415G>A' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'89873415' -p49 -sg43 -g44 -sssS'hg38' -p50 -(dp51 -g33 -S'NC_000015.10:g.89330184G>A' -p52 -sg35 -(dp53 -g37 -S'chr15' -p54 -sg39 -g40 -sg41 -S'89330184' -p55 -sg43 -g44 -sssS'hg19' -p56 -(dp57 -g33 -S'NC_000015.9:g.89873415G>A' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'89873415' -p60 -sg43 -g44 -sssssS'flag' -p61 -S'gene_variant' -p62 -sS'NM_001126131.1:c.752C>T' -p63 -(dp64 -g3 -g8 -sg5 -(lp65 -S'RefSeqGene record not available' -p66 -asg7 -g8 -sg9 -(lp67 -sg11 -VHomo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 2, mRNA -p68 -sg13 -S'POLG' -p69 -sg15 -(dp70 -g17 -S'NP_001119603.1:p.(Thr251Ile)' -p71 -sg19 -S'NP_001119603.1:p.(T251I)' -p72 -ssg21 -g22 -sg23 -g8 -sg24 -g8 -sg25 -S'NM_001126131.1:c.752C>T' -p73 -sg27 -g8 -sg29 -(dp74 -g31 -(dp75 -g33 -S'NC_000015.10:g.89330184G>A' -p76 -sg35 -(dp77 -g37 -g38 -sg39 -g40 -sg41 -S'89330184' -p78 -sg43 -g44 -sssg45 -(dp79 -g33 -S'NC_000015.9:g.89873415G>A' -p80 -sg35 -(dp81 -g37 -g38 -sg39 -g40 -sg41 -S'89873415' -p82 -sg43 -g44 -sssg50 -(dp83 -g33 -S'NC_000015.10:g.89330184G>A' -p84 -sg35 -(dp85 -g37 -g54 -sg39 -g40 -sg41 -S'89330184' -p86 -sg43 -g44 -sssg56 -(dp87 -g33 -S'NC_000015.9:g.89873415G>A' -p88 -sg35 -(dp89 -g37 -g54 -sg39 -g40 -sg41 -S'89873415' -p90 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant233.txt b/VariantValidator/testing/testOutputsMasterITS/variant233.txt deleted file mode 100644 index f42c16d5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant233.txt +++ /dev/null @@ -1,1146 +0,0 @@ -(dp0 -S'NM_000548.3:c.277C>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_487t1:c.277C>T' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_000548.3 is available (NM_000548.4)' -p7 -aS'NM_000548.4:c.277C>T MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_000548.4:c.277C>T' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'TSC2' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000539.2(LRG_487p1):p.(Arg93Trp)' -p21 -sS'slr' -p22 -S'NP_000539.2:p.(R93W)' -p23 -ssS'submitted_variant' -p24 -S'16-2103394-C-T' -p25 -sS'genome_context_intronic_sequence' -p26 -g11 -sS'HGVS_LRG_variant' -p27 -S'LRG_487:g.9088C>T' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000548.3:c.277C>T' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_005895.1:g.9088C>T' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000016.9:g.2103394C>T' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr16' -p42 -sS'ref' -p43 -S'C' -p44 -sS'pos' -p45 -S'2103394' -p46 -sS'alt' -p47 -S'T' -p48 -sssS'GRCh37' -p49 -(dp50 -g37 -S'NC_000016.9:g.2103394C>T' -p51 -sg39 -(dp52 -g41 -S'16' -p53 -sg43 -g44 -sg45 -S'2103394' -p54 -sg47 -g48 -sssssS'NM_001318832.1:c.310C>T' -p55 -(dp56 -g3 -g11 -sg5 -(lp57 -S'RefSeqGene record not available' -p58 -asg10 -g11 -sg12 -(lp59 -sg14 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA -p60 -sg16 -S'TSC2' -p61 -sg18 -(dp62 -g20 -S'NP_001305761.1:p.(Arg104Trp)' -p63 -sg22 -S'NP_001305761.1:p.(R104W)' -p64 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_001318832.1:c.310C>T' -p65 -sg31 -g11 -sg33 -(dp66 -S'GRCh38' -p67 -(dp68 -g37 -S'NC_000016.10:g.2053393C>T' -p69 -sg39 -(dp70 -g41 -g53 -sg43 -g44 -sg45 -S'2053393' -p71 -sg47 -g48 -sssg49 -(dp72 -g37 -S'NC_000016.9:g.2103394C>T' -p73 -sg39 -(dp74 -g41 -g53 -sg43 -g44 -sg45 -S'2103394' -p75 -sg47 -g48 -sssS'hg38' -p76 -(dp77 -g37 -S'NC_000016.10:g.2053393C>T' -p78 -sg39 -(dp79 -g41 -g42 -sg43 -g44 -sg45 -S'2053393' -p80 -sg47 -g48 -sssg35 -(dp81 -g37 -S'NC_000016.9:g.2103394C>T' -p82 -sg39 -(dp83 -g41 -g42 -sg43 -g44 -sg45 -S'2103394' -p84 -sg47 -g48 -sssssS'NM_001318829.1:c.130C>T' -p85 -(dp86 -g3 -g11 -sg5 -(lp87 -S'RefSeqGene record not available' -p88 -asg10 -g11 -sg12 -(lp89 -sg14 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA -p90 -sg16 -S'TSC2' -p91 -sg18 -(dp92 -g20 -S'NP_001305758.1:p.(Arg44Trp)' -p93 -sg22 -S'NP_001305758.1:p.(R44W)' -p94 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_001318829.1:c.130C>T' -p95 -sg31 -g11 -sg33 -(dp96 -g67 -(dp97 -g37 -S'NC_000016.10:g.2053393C>T' -p98 -sg39 -(dp99 -g41 -g53 -sg43 -g44 -sg45 -S'2053393' -p100 -sg47 -g48 -sssg49 -(dp101 -g37 -S'NC_000016.9:g.2103394C>T' -p102 -sg39 -(dp103 -g41 -g53 -sg43 -g44 -sg45 -S'2103394' -p104 -sg47 -g48 -sssg76 -(dp105 -g37 -S'NC_000016.10:g.2053393C>T' -p106 -sg39 -(dp107 -g41 -g42 -sg43 -g44 -sg45 -S'2053393' -p108 -sg47 -g48 -sssg35 -(dp109 -g37 -S'NC_000016.9:g.2103394C>T' -p110 -sg39 -(dp111 -g41 -g42 -sg43 -g44 -sg45 -S'2103394' -p112 -sg47 -g48 -sssssS'NM_001077183.2:c.277C>T' -p113 -(dp114 -g3 -g11 -sg5 -(lp115 -S'RefSeqGene record not available' -p116 -asg10 -g11 -sg12 -(lp117 -sg14 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA -p118 -sg16 -S'TSC2' -p119 -sg18 -(dp120 -g20 -S'NP_001070651.1:p.(Arg93Trp)' -p121 -sg22 -S'NP_001070651.1:p.(R93W)' -p122 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_001077183.2:c.277C>T' -p123 -sg31 -g11 -sg33 -(dp124 -g67 -(dp125 -g37 -S'NC_000016.10:g.2053393C>T' -p126 -sg39 -(dp127 -g41 -g53 -sg43 -g44 -sg45 -S'2053393' -p128 -sg47 -g48 -sssg49 -(dp129 -g37 -S'NC_000016.9:g.2103394C>T' -p130 -sg39 -(dp131 -g41 -g53 -sg43 -g44 -sg45 -S'2103394' -p132 -sg47 -g48 -sssg76 -(dp133 -g37 -S'NC_000016.10:g.2053393C>T' -p134 -sg39 -(dp135 -g41 -g42 -sg43 -g44 -sg45 -S'2053393' -p136 -sg47 -g48 -sssg35 -(dp137 -g37 -S'NC_000016.9:g.2103394C>T' -p138 -sg39 -(dp139 -g41 -g42 -sg43 -g44 -sg45 -S'2103394' -p140 -sg47 -g48 -sssssS'NM_001114382.1:c.277C>T' -p141 -(dp142 -g3 -g11 -sg5 -(lp143 -S'A more recent version of the selected reference sequence NM_001114382.1 is available (NM_001114382.2)' -p144 -aS'NM_001114382.2:c.277C>T MUST be fully validated prior to use in reports' -p145 -aS'select_variants=NM_001114382.2:c.277C>T' -p146 -aS'RefSeqGene record not available' -p147 -asg10 -g11 -sg12 -(lp148 -sg14 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 5, mRNA -p149 -sg16 -S'TSC2' -p150 -sg18 -(dp151 -g20 -S'NP_001107854.1:p.(Arg93Trp)' -p152 -sg22 -S'NP_001107854.1:p.(R93W)' -p153 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_001114382.1:c.277C>T' -p154 -sg31 -g11 -sg33 -(dp155 -g35 -(dp156 -g37 -S'NC_000016.9:g.2103394C>T' -p157 -sg39 -(dp158 -g41 -g42 -sg43 -g44 -sg45 -S'2103394' -p159 -sg47 -g48 -sssg49 -(dp160 -g37 -S'NC_000016.9:g.2103394C>T' -p161 -sg39 -(dp162 -g41 -g53 -sg43 -g44 -sg45 -S'2103394' -p163 -sg47 -g48 -sssssS'NM_001077183.1:c.277C>T' -p164 -(dp165 -g3 -g11 -sg5 -(lp166 -S'A more recent version of the selected reference sequence NM_001077183.1 is available (NM_001077183.2)' -p167 -aS'NM_001077183.2:c.277C>T MUST be fully validated prior to use in reports' -p168 -aS'select_variants=NM_001077183.2:c.277C>T' -p169 -aS'RefSeqGene record not available' -p170 -asg10 -g11 -sg12 -(lp171 -sg14 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 4, mRNA -p172 -sg16 -S'TSC2' -p173 -sg18 -(dp174 -g20 -S'NP_001070651.1:p.(Arg93Trp)' -p175 -sg22 -S'NP_001070651.1:p.(R93W)' -p176 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_001077183.1:c.277C>T' -p177 -sg31 -g11 -sg33 -(dp178 -g35 -(dp179 -g37 -S'NC_000016.9:g.2103394C>T' -p180 -sg39 -(dp181 -g41 -g42 -sg43 -g44 -sg45 -S'2103394' -p182 -sg47 -g48 -sssg49 -(dp183 -g37 -S'NC_000016.9:g.2103394C>T' -p184 -sg39 -(dp185 -g41 -g53 -sg43 -g44 -sg45 -S'2103394' -p186 -sg47 -g48 -sssssS'NM_001318827.1:c.226-903C>T' -p187 -(dp188 -g3 -g11 -sg5 -(lp189 -S'RefSeqGene record not available' -p190 -asg10 -g11 -sg12 -(lp191 -sg14 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA -p192 -sg16 -S'TSC2' -p193 -sg18 -(dp194 -g20 -S'NP_001305756.1:p.?' -p195 -sg22 -S'NP_001305756.1:p.?' -p196 -ssg24 -g25 -sg26 -S'NC_000016.9(NM_001318827.1):c.226-903C>T' -p197 -sg27 -g11 -sg29 -S'NM_001318827.1:c.226-903C>T' -p198 -sg31 -g11 -sg33 -(dp199 -g67 -(dp200 -g37 -S'NC_000016.10:g.2053393C>T' -p201 -sg39 -(dp202 -g41 -g53 -sg43 -g44 -sg45 -S'2053393' -p203 -sg47 -g48 -sssg49 -(dp204 -g37 -S'NC_000016.9:g.2103394C>T' -p205 -sg39 -(dp206 -g41 -g53 -sg43 -g44 -sg45 -S'2103394' -p207 -sg47 -g48 -sssg76 -(dp208 -g37 -S'NC_000016.10:g.2053393C>T' -p209 -sg39 -(dp210 -g41 -g42 -sg43 -g44 -sg45 -S'2053393' -p211 -sg47 -g48 -sssg35 -(dp212 -g37 -S'NC_000016.9:g.2103394C>T' -p213 -sg39 -(dp214 -g41 -g42 -sg43 -g44 -sg45 -S'2103394' -p215 -sg47 -g48 -sssssS'flag' -p216 -S'gene_variant' -p217 -sS'NM_001114382.2:c.277C>T' -p218 -(dp219 -g3 -g11 -sg5 -(lp220 -S'RefSeqGene record not available' -p221 -asg10 -g11 -sg12 -(lp222 -sg14 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA -p223 -sg16 -S'TSC2' -p224 -sg18 -(dp225 -g20 -S'NP_001107854.1:p.(Arg93Trp)' -p226 -sg22 -S'NP_001107854.1:p.(R93W)' -p227 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_001114382.2:c.277C>T' -p228 -sg31 -g11 -sg33 -(dp229 -g67 -(dp230 -g37 -S'NC_000016.10:g.2053393C>T' -p231 -sg39 -(dp232 -g41 -g53 -sg43 -g44 -sg45 -S'2053393' -p233 -sg47 -g48 -sssg49 -(dp234 -g37 -S'NC_000016.9:g.2103394C>T' -p235 -sg39 -(dp236 -g41 -g53 -sg43 -g44 -sg45 -S'2103394' -p237 -sg47 -g48 -sssg76 -(dp238 -g37 -S'NC_000016.10:g.2053393C>T' -p239 -sg39 -(dp240 -g41 -g42 -sg43 -g44 -sg45 -S'2053393' -p241 -sg47 -g48 -sssg35 -(dp242 -g37 -S'NC_000016.9:g.2103394C>T' -p243 -sg39 -(dp244 -g41 -g42 -sg43 -g44 -sg45 -S'2103394' -p245 -sg47 -g48 -sssssS'NM_001363528.1:c.277C>T' -p246 -(dp247 -g3 -g11 -sg5 -(lp248 -S'RefSeqGene record not available' -p249 -asg10 -g11 -sg12 -(lp250 -sg14 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 10, mRNA -p251 -sg16 -S'TSC2' -p252 -sg18 -(dp253 -g20 -S'NP_001350457.1:p.(Arg93Trp)' -p254 -sg22 -S'NP_001350457.1:p.(R93W)' -p255 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_001363528.1:c.277C>T' -p256 -sg31 -g11 -sg33 -(dp257 -g35 -(dp258 -g37 -S'NC_000016.9:g.2103394C>T' -p259 -sg39 -(dp260 -g41 -g42 -sg43 -g44 -sg45 -S'2103394' -p261 -sg47 -g48 -sssg49 -(dp262 -g37 -S'NC_000016.9:g.2103394C>T' -p263 -sg39 -(dp264 -g41 -g53 -sg43 -g44 -sg45 -S'2103394' -p265 -sg47 -g48 -sssssS'NM_021055.2:c.277C>T' -p266 -(dp267 -g3 -g11 -sg5 -(lp268 -S'RefSeqGene record not available' -p269 -asg10 -g11 -sg12 -(lp270 -sg14 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 2, mRNA -p271 -sg16 -S'TSC2' -p272 -sg18 -(dp273 -g20 -S'NP_066399.2:p.(Arg93Trp)' -p274 -sg22 -S'NP_066399.2:p.(R93W)' -p275 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_021055.2:c.277C>T' -p276 -sg31 -g11 -sg33 -(dp277 -g35 -(dp278 -g37 -S'NC_000016.9:g.2103394C>T' -p279 -sg39 -(dp280 -g41 -g42 -sg43 -g44 -sg45 -S'2103394' -p281 -sg47 -g48 -sssg49 -(dp282 -g37 -S'NC_000016.9:g.2103394C>T' -p283 -sg39 -(dp284 -g41 -g53 -sg43 -g44 -sg45 -S'2103394' -p285 -sg47 -g48 -sssssS'NM_000548.4:c.277C>T' -p286 -(dp287 -g3 -g11 -sg5 -(lp288 -S'RefSeqGene record not available' -p289 -asg10 -g11 -sg12 -(lp290 -sg14 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA -p291 -sg16 -S'TSC2' -p292 -sg18 -(dp293 -g20 -S'NP_000539.2(LRG_487p1):p.(Arg93Trp)' -p294 -sg22 -S'NP_000539.2:p.(R93W)' -p295 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_000548.4:c.277C>T' -p296 -sg31 -g11 -sg33 -(dp297 -g67 -(dp298 -g37 -S'NC_000016.10:g.2053393C>T' -p299 -sg39 -(dp300 -g41 -g53 -sg43 -g44 -sg45 -S'2053393' -p301 -sg47 -g48 -sssg49 -(dp302 -g37 -S'NC_000016.9:g.2103394C>T' -p303 -sg39 -(dp304 -g41 -g53 -sg43 -g44 -sg45 -S'2103394' -p305 -sg47 -g48 -sssg76 -(dp306 -g37 -S'NC_000016.10:g.2053393C>T' -p307 -sg39 -(dp308 -g41 -g42 -sg43 -g44 -sg45 -S'2053393' -p309 -sg47 -g48 -sssg35 -(dp310 -g37 -S'NC_000016.9:g.2103394C>T' -p311 -sg39 -(dp312 -g41 -g42 -sg43 -g44 -sg45 -S'2103394' -p313 -sg47 -g48 -sssssS'NM_001318831.1:c.-1-2803C>T' -p314 -(dp315 -g3 -g11 -sg5 -(lp316 -S'RefSeqGene record not available' -p317 -asg10 -g11 -sg12 -(lp318 -sg14 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA -p319 -sg16 -S'TSC2' -p320 -sg18 -(dp321 -g20 -S'NP_001305760.1:p.?' -p322 -sg22 -S'NP_001305760.1:p.?' -p323 -ssg24 -g25 -sg26 -S'NC_000016.9(NM_001318831.1):c.-1-2803C>T' -p324 -sg27 -g11 -sg29 -S'NM_001318831.1:c.-1-2803C>T' -p325 -sg31 -g11 -sg33 -(dp326 -g67 -(dp327 -g37 -S'NC_000016.10:g.2053393C>T' -p328 -sg39 -(dp329 -g41 -g53 -sg43 -g44 -sg45 -S'2053393' -p330 -sg47 -g48 -sssg49 -(dp331 -g37 -S'NC_000016.9:g.2103394C>T' -p332 -sg39 -(dp333 -g41 -g53 -sg43 -g44 -sg45 -S'2103394' -p334 -sg47 -g48 -sssg76 -(dp335 -g37 -S'NC_000016.10:g.2053393C>T' -p336 -sg39 -(dp337 -g41 -g42 -sg43 -g44 -sg45 -S'2053393' -p338 -sg47 -g48 -sssg35 -(dp339 -g37 -S'NC_000016.9:g.2103394C>T' -p340 -sg39 -(dp341 -g41 -g42 -sg43 -g44 -sg45 -S'2103394' -p342 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant234.txt b/VariantValidator/testing/testOutputsMasterITS/variant234.txt deleted file mode 100644 index 7aaab48c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant234.txt +++ /dev/null @@ -1,244 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001079846.1:c.5634G>C' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens CREB binding protein (CREBBP), transcript variant 2, mRNA -p14 -sS'gene_symbol' -p15 -S'CREBBP' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001073315.1:p.(Met1878Ile)' -p20 -sS'slr' -p21 -S'NP_001073315.1:p.(M1878I)' -p22 -ssS'submitted_variant' -p23 -S'16-3779300-C-G' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_001079846.1:c.5634G>C' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'GRCh38' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000016.10:g.3729299C>G' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'16' -p39 -sS'ref' -p40 -VC -p41 -sS'pos' -p42 -S'3729299' -p43 -sS'alt' -p44 -VG -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000016.9:g.3779300C>G' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'3779300' -p50 -sg44 -g45 -sssS'hg38' -p51 -(dp52 -g34 -S'NC_000016.10:g.3729299C>G' -p53 -sg36 -(dp54 -g38 -S'chr16' -p55 -sg40 -g41 -sg42 -S'3729299' -p56 -sg44 -g45 -sssS'hg19' -p57 -(dp58 -g34 -S'NC_000016.9:g.3779300C>G' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'3779300' -p61 -sg44 -g45 -sssssS'NM_004380.2:c.5748G>C' -p62 -(dp63 -g5 -g6 -sg7 -(lp64 -sg10 -g6 -sg11 -(lp65 -sg13 -VHomo sapiens CREB binding protein (CREBBP), transcript variant 1, mRNA -p66 -sg15 -S'CREBBP' -p67 -sg17 -(dp68 -g19 -S'NP_004371.2:p.(Met1916Ile)' -p69 -sg21 -S'NP_004371.2:p.(M1916I)' -p70 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_004380.2:c.5748G>C' -p71 -sg29 -S'NG_009873.1:g.155822G>C' -p72 -sg30 -(dp73 -g32 -(dp74 -g34 -S'NC_000016.10:g.3729299C>G' -p75 -sg36 -(dp76 -g38 -g39 -sg40 -g41 -sg42 -S'3729299' -p77 -sg44 -g45 -sssg46 -(dp78 -g34 -S'NC_000016.9:g.3779300C>G' -p79 -sg36 -(dp80 -g38 -g39 -sg40 -g41 -sg42 -S'3779300' -p81 -sg44 -g45 -sssg51 -(dp82 -g34 -S'NC_000016.10:g.3729299C>G' -p83 -sg36 -(dp84 -g38 -g55 -sg40 -g41 -sg42 -S'3729299' -p85 -sg44 -g45 -sssg57 -(dp86 -g34 -S'NC_000016.9:g.3779300C>G' -p87 -sg36 -(dp88 -g38 -g55 -sg40 -g41 -sg42 -S'3779300' -p89 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant235.txt b/VariantValidator/testing/testOutputsMasterITS/variant235.txt deleted file mode 100644 index 2ab5e086..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant235.txt +++ /dev/null @@ -1,244 +0,0 @@ -(dp0 -S'NM_001330504.1:c.493C>G' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens ALG1, chitobiosyldiphosphodolichol beta-mannosyltransferase (ALG1), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'ALG1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001317433.1:p.(Arg165Gly)' -p18 -sS'slr' -p19 -S'NP_001317433.1:p.(R165G)' -p20 -ssS'submitted_variant' -p21 -S'16-5128843-C-G' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001330504.1:c.493C>G' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'GRCh38' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000016.10:g.5078842C>G' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'16' -p37 -sS'ref' -p38 -S'C' -p39 -sS'pos' -p40 -S'5078842' -p41 -sS'alt' -p42 -S'G' -p43 -sssS'GRCh37' -p44 -(dp45 -g32 -S'NC_000016.9:g.5128843C>G' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'5128843' -p48 -sg42 -g43 -sssS'hg38' -p49 -(dp50 -g32 -S'NC_000016.10:g.5078842C>G' -p51 -sg34 -(dp52 -g36 -S'chr16' -p53 -sg38 -g39 -sg40 -S'5078842' -p54 -sg42 -g43 -sssS'hg19' -p55 -(dp56 -g32 -S'NC_000016.9:g.5128843C>G' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'5128843' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -sS'NM_019109.4:c.826C>G' -p62 -(dp63 -g3 -g4 -sg5 -(lp64 -sg8 -g4 -sg9 -(lp65 -sg11 -VHomo sapiens ALG1, chitobiosyldiphosphodolichol beta-mannosyltransferase (ALG1), transcript variant 1, mRNA -p66 -sg13 -S'ALG1' -p67 -sg15 -(dp68 -g17 -S'NP_061982.3:p.(Arg276Gly)' -p69 -sg19 -S'NP_061982.3:p.(R276G)' -p70 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_019109.4:c.826C>G' -p71 -sg27 -S'NG_009202.1:g.12034C>G' -p72 -sg28 -(dp73 -g30 -(dp74 -g32 -S'NC_000016.10:g.5078842C>G' -p75 -sg34 -(dp76 -g36 -g37 -sg38 -g39 -sg40 -S'5078842' -p77 -sg42 -g43 -sssg44 -(dp78 -g32 -S'NC_000016.9:g.5128843C>G' -p79 -sg34 -(dp80 -g36 -g37 -sg38 -g39 -sg40 -S'5128843' -p81 -sg42 -g43 -sssg49 -(dp82 -g32 -S'NC_000016.10:g.5078842C>G' -p83 -sg34 -(dp84 -g36 -g53 -sg38 -g39 -sg40 -S'5078842' -p85 -sg42 -g43 -sssg55 -(dp86 -g32 -S'NC_000016.9:g.5128843C>G' -p87 -sg34 -(dp88 -g36 -g53 -sg38 -g39 -sg40 -S'5128843' -p89 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant236.txt b/VariantValidator/testing/testOutputsMasterITS/variant236.txt deleted file mode 100644 index a4db79d4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant236.txt +++ /dev/null @@ -1,140 +0,0 @@ -(dp0 -S'NM_024306.4:c.95G>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -g4 -sS'alt_genomic_loci' -p8 -(lp9 -sS'transcript_description' -p10 -VHomo sapiens fatty acid 2-hydroxylase (FA2H), mRNA -p11 -sS'gene_symbol' -p12 -S'FA2H' -p13 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -S'NP_077282.3:p.(Arg32His)' -p17 -sS'slr' -p18 -S'NP_077282.3:p.(R32H)' -p19 -ssS'submitted_variant' -p20 -S'16-74808559-C-T' -p21 -sS'genome_context_intronic_sequence' -p22 -g4 -sS'HGVS_LRG_variant' -p23 -g4 -sS'HGVS_transcript_variant' -p24 -S'NM_024306.4:c.95G>A' -p25 -sS'HGVS_RefSeqGene_variant' -p26 -S'NG_017070.1:g.5171G>A' -p27 -sS'primary_assembly_loci' -p28 -(dp29 -S'GRCh38' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000016.10:g.74774661C>T' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'16' -p37 -sS'ref' -p38 -VC -p39 -sS'pos' -p40 -S'74774661' -p41 -sS'alt' -p42 -VT -p43 -sssS'GRCh37' -p44 -(dp45 -g32 -S'NC_000016.9:g.74808559C>T' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'74808559' -p48 -sg42 -g43 -sssS'hg38' -p49 -(dp50 -g32 -S'NC_000016.10:g.74774661C>T' -p51 -sg34 -(dp52 -g36 -S'chr16' -p53 -sg38 -g39 -sg40 -S'74774661' -p54 -sg42 -g43 -sssS'hg19' -p55 -(dp56 -g32 -S'NC_000016.9:g.74808559C>T' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'74808559' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant237.txt b/VariantValidator/testing/testOutputsMasterITS/variant237.txt deleted file mode 100644 index 8ef3b69a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant237.txt +++ /dev/null @@ -1,317 +0,0 @@ -(dp0 -S'NM_003119.3:c.-22C>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'SPG7' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_003110.1:p.?' -p18 -sS'slr' -p19 -S'NP_003110.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'16-89574804-C-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_003119.3:c.-22C>A' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'GRCh38' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000016.10:g.89508396C>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'16' -p37 -sS'ref' -p38 -S'C' -p39 -sS'pos' -p40 -S'89508396' -p41 -sS'alt' -p42 -S'A' -p43 -sssS'GRCh37' -p44 -(dp45 -g32 -S'NC_000016.9:g.89574804C>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'89574804' -p48 -sg42 -g43 -sssS'hg38' -p49 -(dp50 -g32 -S'NC_000016.10:g.89508396C>A' -p51 -sg34 -(dp52 -g36 -S'chr16' -p53 -sg38 -g39 -sg40 -S'89508396' -p54 -sg42 -g43 -sssS'hg19' -p55 -(dp56 -g32 -S'NC_000016.9:g.89574804C>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'89574804' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -sS'NM_199367.2:c.-22C>A' -p62 -(dp63 -g3 -g4 -sg5 -(lp64 -S'RefSeqGene record not available' -p65 -asg8 -g4 -sg9 -(lp66 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p67 -sg13 -S'SPG7' -p68 -sg15 -(dp69 -g17 -S'NP_955399.1:p.?' -p70 -sg19 -S'NP_955399.1:p.?' -p71 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_199367.2:c.-22C>A' -p72 -sg27 -g4 -sg28 -(dp73 -g30 -(dp74 -g32 -S'NC_000016.10:g.89508396C>A' -p75 -sg34 -(dp76 -g36 -g37 -sg38 -g39 -sg40 -S'89508396' -p77 -sg42 -g43 -sssg44 -(dp78 -g32 -S'NC_000016.9:g.89574804C>A' -p79 -sg34 -(dp80 -g36 -g37 -sg38 -g39 -sg40 -S'89574804' -p81 -sg42 -g43 -sssg49 -(dp82 -g32 -S'NC_000016.10:g.89508396C>A' -p83 -sg34 -(dp84 -g36 -g53 -sg38 -g39 -sg40 -S'89508396' -p85 -sg42 -g43 -sssg55 -(dp86 -g32 -S'NC_000016.9:g.89574804C>A' -p87 -sg34 -(dp88 -g36 -g53 -sg38 -g39 -sg40 -S'89574804' -p89 -sg42 -g43 -sssssS'NM_001363850.1:c.-22C>A' -p90 -(dp91 -g3 -g4 -sg5 -(lp92 -S'RefSeqGene record not available' -p93 -asg8 -g4 -sg9 -(lp94 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p95 -sg13 -S'SPG7' -p96 -sg15 -(dp97 -g17 -S'NP_001350779.1:p.?' -p98 -sg19 -S'NP_001350779.1:p.?' -p99 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001363850.1:c.-22C>A' -p100 -sg27 -g4 -sg28 -(dp101 -g55 -(dp102 -g32 -S'NC_000016.9:g.89574804C>A' -p103 -sg34 -(dp104 -g36 -g53 -sg38 -g39 -sg40 -S'89574804' -p105 -sg42 -g43 -sssg44 -(dp106 -g32 -S'NC_000016.9:g.89574804C>A' -p107 -sg34 -(dp108 -g36 -g37 -sg38 -g39 -sg40 -S'89574804' -p109 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant238.txt b/VariantValidator/testing/testOutputsMasterITS/variant238.txt deleted file mode 100644 index 38524314..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant238.txt +++ /dev/null @@ -1,472 +0,0 @@ -(dp0 -S'NM_003119.2:c.1A>C' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p7 -aS'NM_003119.3:c.1A>C MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_003119.3:c.1A>C' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g4 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'SPG7' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_003110.1:p.(Met1?)' -p20 -sS'slr' -p21 -S'NP_003110.1:p.(M1?)' -p22 -ssS'submitted_variant' -p23 -S'16-89574826-A-C' -p24 -sS'genome_context_intronic_sequence' -p25 -g4 -sS'HGVS_LRG_variant' -p26 -g4 -sS'HGVS_transcript_variant' -p27 -S'NM_003119.2:c.1A>C' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -S'NG_008082.1:g.5022A>C' -p30 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000016.9:g.89574826A>C' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr16' -p40 -sS'ref' -p41 -S'A' -p42 -sS'pos' -p43 -S'89574826' -p44 -sS'alt' -p45 -S'C' -p46 -sssS'GRCh37' -p47 -(dp48 -g35 -S'NC_000016.9:g.89574826A>C' -p49 -sg37 -(dp50 -g39 -S'16' -p51 -sg41 -g42 -sg43 -S'89574826' -p52 -sg45 -g46 -sssssS'NM_199367.1:c.1A>C' -p53 -(dp54 -g3 -g4 -sg5 -(lp55 -S'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p56 -aS'NM_199367.2:c.1A>C MUST be fully validated prior to use in reports' -p57 -aS'select_variants=NM_199367.2:c.1A>C' -p58 -aS'RefSeqGene record not available' -p59 -asg10 -g4 -sg11 -(lp60 -sg13 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p61 -sg15 -S'SPG7' -p62 -sg17 -(dp63 -g19 -S'NP_955399.1:p.(Met1?)' -p64 -sg21 -S'NP_955399.1:p.(M1?)' -p65 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_199367.1:c.1A>C' -p66 -sg29 -g4 -sg31 -(dp67 -g33 -(dp68 -g35 -S'NC_000016.9:g.89574826A>C' -p69 -sg37 -(dp70 -g39 -g40 -sg41 -g42 -sg43 -S'89574826' -p71 -sg45 -g46 -sssg47 -(dp72 -g35 -S'NC_000016.9:g.89574826A>C' -p73 -sg37 -(dp74 -g39 -g51 -sg41 -g42 -sg43 -S'89574826' -p75 -sg45 -g46 -sssssS'NM_001363850.1:c.1A>C' -p76 -(dp77 -g3 -g4 -sg5 -(lp78 -S'RefSeqGene record not available' -p79 -asg10 -g4 -sg11 -(lp80 -sg13 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p81 -sg15 -S'SPG7' -p82 -sg17 -(dp83 -g19 -S'NP_001350779.1:p.(Met1?)' -p84 -sg21 -S'NP_001350779.1:p.(M1?)' -p85 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_001363850.1:c.1A>C' -p86 -sg29 -g4 -sg31 -(dp87 -g33 -(dp88 -g35 -S'NC_000016.9:g.89574826A>C' -p89 -sg37 -(dp90 -g39 -g40 -sg41 -g42 -sg43 -S'89574826' -p91 -sg45 -g46 -sssg47 -(dp92 -g35 -S'NC_000016.9:g.89574826A>C' -p93 -sg37 -(dp94 -g39 -g51 -sg41 -g42 -sg43 -S'89574826' -p95 -sg45 -g46 -sssssS'NM_199367.2:c.1A>C' -p96 -(dp97 -g3 -g4 -sg5 -(lp98 -S'RefSeqGene record not available' -p99 -asg10 -g4 -sg11 -(lp100 -sg13 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p101 -sg15 -S'SPG7' -p102 -sg17 -(dp103 -g19 -S'NP_955399.1:p.(Met1?)' -p104 -sg21 -S'NP_955399.1:p.(M1?)' -p105 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_199367.2:c.1A>C' -p106 -sg29 -g4 -sg31 -(dp107 -S'GRCh38' -p108 -(dp109 -g35 -S'NC_000016.10:g.89508418A>C' -p110 -sg37 -(dp111 -g39 -g51 -sg41 -g42 -sg43 -S'89508418' -p112 -sg45 -g46 -sssg47 -(dp113 -g35 -S'NC_000016.9:g.89574826A>C' -p114 -sg37 -(dp115 -g39 -g51 -sg41 -g42 -sg43 -S'89574826' -p116 -sg45 -g46 -sssS'hg38' -p117 -(dp118 -g35 -S'NC_000016.10:g.89508418A>C' -p119 -sg37 -(dp120 -g39 -g40 -sg41 -g42 -sg43 -S'89508418' -p121 -sg45 -g46 -sssg33 -(dp122 -g35 -S'NC_000016.9:g.89574826A>C' -p123 -sg37 -(dp124 -g39 -g40 -sg41 -g42 -sg43 -S'89574826' -p125 -sg45 -g46 -sssssS'flag' -p126 -S'gene_variant' -p127 -sS'NM_003119.3:c.1A>C' -p128 -(dp129 -g3 -g4 -sg5 -(lp130 -S'RefSeqGene record not available' -p131 -asg10 -g4 -sg11 -(lp132 -sg13 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p133 -sg15 -S'SPG7' -p134 -sg17 -(dp135 -g19 -S'NP_003110.1:p.(Met1?)' -p136 -sg21 -S'NP_003110.1:p.(M1?)' -p137 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_003119.3:c.1A>C' -p138 -sg29 -g4 -sg31 -(dp139 -g108 -(dp140 -g35 -S'NC_000016.10:g.89508418A>C' -p141 -sg37 -(dp142 -g39 -g51 -sg41 -g42 -sg43 -S'89508418' -p143 -sg45 -g46 -sssg47 -(dp144 -g35 -S'NC_000016.9:g.89574826A>C' -p145 -sg37 -(dp146 -g39 -g51 -sg41 -g42 -sg43 -S'89574826' -p147 -sg45 -g46 -sssg117 -(dp148 -g35 -S'NC_000016.10:g.89508418A>C' -p149 -sg37 -(dp150 -g39 -g40 -sg41 -g42 -sg43 -S'89508418' -p151 -sg45 -g46 -sssg33 -(dp152 -g35 -S'NC_000016.9:g.89574826A>C' -p153 -sg37 -(dp154 -g39 -g40 -sg41 -g42 -sg43 -S'89574826' -p155 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant239.txt b/VariantValidator/testing/testOutputsMasterITS/variant239.txt deleted file mode 100644 index 422035d0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant239.txt +++ /dev/null @@ -1,495 +0,0 @@ -(dp0 -S'NM_001363850.1:c.90dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p13 -sS'gene_symbol' -p14 -S'SPG7' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001350779.1:p.(Pro31SerfsTer43)' -p19 -sS'slr' -p20 -S'NP_001350779.1:p.(P31Sfs*43)' -p21 -ssS'submitted_variant' -p22 -S'16-89574914-G-GT' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001363850.1:c.90dup' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000016.9:g.89574915dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr16' -p38 -sS'ref' -p39 -S'T' -p40 -sS'pos' -p41 -S'89574915' -p42 -sS'alt' -p43 -S'TT' -p44 -sssS'GRCh37' -p45 -(dp46 -g33 -S'NC_000016.9:g.89574915dup' -p47 -sg35 -(dp48 -g37 -S'16' -p49 -sg39 -g40 -sg41 -S'89574915' -p50 -sg43 -S'TT' -p51 -sssssS'NM_199367.1:c.90dup' -p52 -(dp53 -g3 -g4 -sg5 -(lp54 -S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' -p55 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p56 -aS'NM_199367.2:c.90dupT MUST be fully validated prior to use in reports' -p57 -aS'select_variants=NM_199367.2:c.90dup' -p58 -aS'RefSeqGene record not available' -p59 -asg9 -g4 -sg10 -(lp60 -sg12 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p61 -sg14 -S'SPG7' -p62 -sg16 -(dp63 -g18 -S'NP_955399.1:p.(Pro31SerfsTer43)' -p64 -sg20 -S'NP_955399.1:p.(P31Sfs*43)' -p65 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_199367.1:c.90dup' -p66 -sg28 -g4 -sg29 -(dp67 -g31 -(dp68 -g33 -S'NC_000016.9:g.89574915dup' -p69 -sg35 -(dp70 -g37 -g38 -sg39 -g40 -sg41 -S'89574915' -p71 -sg43 -S'TT' -p72 -sssg45 -(dp73 -g33 -S'NC_000016.9:g.89574915dup' -p74 -sg35 -(dp75 -g37 -g49 -sg39 -g40 -sg41 -S'89574915' -p76 -sg43 -S'TT' -p77 -sssssS'NM_003119.2:c.90dup' -p78 -(dp79 -g3 -g4 -sg5 -(lp80 -S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' -p81 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p82 -aS'NM_003119.3:c.90dupT MUST be fully validated prior to use in reports' -p83 -aS'select_variants=NM_003119.3:c.90dup' -p84 -asg9 -g4 -sg10 -(lp85 -sg12 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p86 -sg14 -S'SPG7' -p87 -sg16 -(dp88 -g18 -S'NP_003110.1:p.(Pro31SerfsTer43)' -p89 -sg20 -S'NP_003110.1:p.(P31Sfs*43)' -p90 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_003119.2:c.90dup' -p91 -sg28 -S'NG_008082.1:g.5111dup' -p92 -sg29 -(dp93 -g31 -(dp94 -g33 -S'NC_000016.9:g.89574915dup' -p95 -sg35 -(dp96 -g37 -g38 -sg39 -g40 -sg41 -S'89574915' -p97 -sg43 -S'TT' -p98 -sssg45 -(dp99 -g33 -S'NC_000016.9:g.89574915dup' -p100 -sg35 -(dp101 -g37 -g49 -sg39 -g40 -sg41 -S'89574915' -p102 -sg43 -S'TT' -p103 -sssssS'NM_199367.2:c.90dup' -p104 -(dp105 -g3 -g4 -sg5 -(lp106 -S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' -p107 -aS'RefSeqGene record not available' -p108 -asg9 -g4 -sg10 -(lp109 -sg12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p110 -sg14 -S'SPG7' -p111 -sg16 -(dp112 -g18 -S'NP_955399.1:p.(Pro31SerfsTer43)' -p113 -sg20 -S'NP_955399.1:p.(P31Sfs*43)' -p114 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_199367.2:c.90dup' -p115 -sg28 -g4 -sg29 -(dp116 -S'GRCh38' -p117 -(dp118 -g33 -S'NC_000016.10:g.89508507dup' -p119 -sg35 -(dp120 -g37 -g49 -sg39 -g40 -sg41 -S'89508507' -p121 -sg43 -S'TT' -p122 -sssg45 -(dp123 -g33 -S'NC_000016.9:g.89574915dup' -p124 -sg35 -(dp125 -g37 -g49 -sg39 -g40 -sg41 -S'89574915' -p126 -sg43 -S'TT' -p127 -sssS'hg38' -p128 -(dp129 -g33 -S'NC_000016.10:g.89508507dup' -p130 -sg35 -(dp131 -g37 -g38 -sg39 -g40 -sg41 -S'89508507' -p132 -sg43 -S'TT' -p133 -sssg31 -(dp134 -g33 -S'NC_000016.9:g.89574915dup' -p135 -sg35 -(dp136 -g37 -g38 -sg39 -g40 -sg41 -S'89574915' -p137 -sg43 -S'TT' -p138 -sssssS'flag' -p139 -S'gene_variant' -p140 -sS'NM_003119.3:c.90dup' -p141 -(dp142 -g3 -g4 -sg5 -(lp143 -S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' -p144 -aS'RefSeqGene record not available' -p145 -asg9 -g4 -sg10 -(lp146 -sg12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p147 -sg14 -S'SPG7' -p148 -sg16 -(dp149 -g18 -S'NP_003110.1:p.(Pro31SerfsTer43)' -p150 -sg20 -S'NP_003110.1:p.(P31Sfs*43)' -p151 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_003119.3:c.90dup' -p152 -sg28 -g4 -sg29 -(dp153 -g117 -(dp154 -g33 -S'NC_000016.10:g.89508507dup' -p155 -sg35 -(dp156 -g37 -g49 -sg39 -g40 -sg41 -S'89508507' -p157 -sg43 -S'TT' -p158 -sssg45 -(dp159 -g33 -S'NC_000016.9:g.89574915dup' -p160 -sg35 -(dp161 -g37 -g49 -sg39 -g40 -sg41 -S'89574915' -p162 -sg43 -S'TT' -p163 -sssg128 -(dp164 -g33 -S'NC_000016.10:g.89508507dup' -p165 -sg35 -(dp166 -g37 -g38 -sg39 -g40 -sg41 -S'89508507' -p167 -sg43 -S'TT' -p168 -sssg31 -(dp169 -g33 -S'NC_000016.9:g.89574915dup' -p170 -sg35 -(dp171 -g37 -g38 -sg39 -g40 -sg41 -S'89574915' -p172 -sg43 -S'TT' -p173 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant24.txt b/VariantValidator/testing/testOutputsMasterITS/variant24.txt deleted file mode 100644 index 51e1ca9e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant24.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000011.9:g.5244828_5248381=' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000518.4:c.-50-80_*132+1868=' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant240.txt b/VariantValidator/testing/testOutputsMasterITS/variant240.txt deleted file mode 100644 index 6cb036fa..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant240.txt +++ /dev/null @@ -1,508 +0,0 @@ -(dp0 -S'NM_199367.2:c.89_91dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'SPG7' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_955399.1:p.(Ser30_Pro31insArg)' -p19 -sS'slr' -p20 -S'NP_955399.1:p.(S30_P31insR)' -p21 -ssS'submitted_variant' -p22 -S'16-89574916-C-CGTC' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_199367.2:c.89_91dup' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'GRCh38' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000016.10:g.89508506_89508508dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'16' -p38 -sS'ref' -p39 -S'GTC' -p40 -sS'pos' -p41 -S'89508506' -p42 -sS'alt' -p43 -S'GTCGTC' -p44 -sssS'GRCh37' -p45 -(dp46 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'GTC' -p49 -sg41 -S'89574914' -p50 -sg43 -S'GTCGTC' -p51 -sssS'hg38' -p52 -(dp53 -g33 -S'NC_000016.10:g.89508506_89508508dup' -p54 -sg35 -(dp55 -g37 -S'chr16' -p56 -sg39 -S'GTC' -p57 -sg41 -S'89508506' -p58 -sg43 -S'GTCGTC' -p59 -sssS'hg19' -p60 -(dp61 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p62 -sg35 -(dp63 -g37 -g56 -sg39 -S'GTC' -p64 -sg41 -S'89574914' -p65 -sg43 -S'GTCGTC' -p66 -sssssS'NM_003119.3:c.89_91dup' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' -p70 -aS'RefSeqGene record not available' -p71 -asg9 -g4 -sg10 -(lp72 -sg12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p73 -sg14 -S'SPG7' -p74 -sg16 -(dp75 -g18 -S'NP_003110.1:p.(Ser30_Pro31insArg)' -p76 -sg20 -S'NP_003110.1:p.(S30_P31insR)' -p77 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_003119.3:c.89_91dup' -p78 -sg28 -g4 -sg29 -(dp79 -g31 -(dp80 -g33 -S'NC_000016.10:g.89508506_89508508dup' -p81 -sg35 -(dp82 -g37 -g38 -sg39 -S'GTC' -p83 -sg41 -S'89508506' -p84 -sg43 -S'GTCGTC' -p85 -sssg45 -(dp86 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p87 -sg35 -(dp88 -g37 -g38 -sg39 -S'GTC' -p89 -sg41 -S'89574914' -p90 -sg43 -S'GTCGTC' -p91 -sssg52 -(dp92 -g33 -S'NC_000016.10:g.89508506_89508508dup' -p93 -sg35 -(dp94 -g37 -g56 -sg39 -S'GTC' -p95 -sg41 -S'89508506' -p96 -sg43 -S'GTCGTC' -p97 -sssg60 -(dp98 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p99 -sg35 -(dp100 -g37 -g56 -sg39 -S'GTC' -p101 -sg41 -S'89574914' -p102 -sg43 -S'GTCGTC' -p103 -sssssS'NM_001363850.1:c.89_91dup' -p104 -(dp105 -g3 -g4 -sg5 -(lp106 -S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' -p107 -aS'RefSeqGene record not available' -p108 -asg9 -g4 -sg10 -(lp109 -sg12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p110 -sg14 -S'SPG7' -p111 -sg16 -(dp112 -g18 -S'NP_001350779.1:p.(Ser30_Pro31insArg)' -p113 -sg20 -S'NP_001350779.1:p.(S30_P31insR)' -p114 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001363850.1:c.89_91dup' -p115 -sg28 -g4 -sg29 -(dp116 -g60 -(dp117 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p118 -sg35 -(dp119 -g37 -g56 -sg39 -S'GTC' -p120 -sg41 -S'89574914' -p121 -sg43 -S'GTCGTC' -p122 -sssg45 -(dp123 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p124 -sg35 -(dp125 -g37 -g38 -sg39 -S'GTC' -p126 -sg41 -S'89574914' -p127 -sg43 -S'GTCGTC' -p128 -sssssS'flag' -p129 -S'gene_variant' -p130 -sS'NM_199367.1:c.89_91dup' -p131 -(dp132 -g3 -g4 -sg5 -(lp133 -S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' -p134 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p135 -aS'NM_199367.2:c.89_91dupGTC MUST be fully validated prior to use in reports' -p136 -aS'select_variants=NM_199367.2:c.89_91dup' -p137 -aS'RefSeqGene record not available' -p138 -asg9 -g4 -sg10 -(lp139 -sg12 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p140 -sg14 -S'SPG7' -p141 -sg16 -(dp142 -g18 -S'NP_955399.1:p.(Ser30_Pro31insArg)' -p143 -sg20 -S'NP_955399.1:p.(S30_P31insR)' -p144 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_199367.1:c.89_91dup' -p145 -sg28 -g4 -sg29 -(dp146 -g60 -(dp147 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p148 -sg35 -(dp149 -g37 -g56 -sg39 -S'GTC' -p150 -sg41 -S'89574914' -p151 -sg43 -S'GTCGTC' -p152 -sssg45 -(dp153 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p154 -sg35 -(dp155 -g37 -g38 -sg39 -S'GTC' -p156 -sg41 -S'89574914' -p157 -sg43 -S'GTCGTC' -p158 -sssssS'NM_003119.2:c.89_91dup' -p159 -(dp160 -g3 -g4 -sg5 -(lp161 -S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' -p162 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p163 -aS'NM_003119.3:c.89_91dupGTC MUST be fully validated prior to use in reports' -p164 -aS'select_variants=NM_003119.3:c.89_91dup' -p165 -asg9 -g4 -sg10 -(lp166 -sg12 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p167 -sg14 -S'SPG7' -p168 -sg16 -(dp169 -g18 -S'NP_003110.1:p.(Ser30_Pro31insArg)' -p170 -sg20 -S'NP_003110.1:p.(S30_P31insR)' -p171 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_003119.2:c.89_91dup' -p172 -sg28 -S'NG_008082.1:g.5110_5112dup' -p173 -sg29 -(dp174 -g60 -(dp175 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p176 -sg35 -(dp177 -g37 -g56 -sg39 -S'GTC' -p178 -sg41 -S'89574914' -p179 -sg43 -S'GTCGTC' -p180 -sssg45 -(dp181 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p182 -sg35 -(dp183 -g37 -g38 -sg39 -S'GTC' -p184 -sg41 -S'89574914' -p185 -sg43 -S'GTCGTC' -p186 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant241.txt b/VariantValidator/testing/testOutputsMasterITS/variant241.txt deleted file mode 100644 index 07dcc74d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant241.txt +++ /dev/null @@ -1,478 +0,0 @@ -(dp0 -S'NM_199367.2:c.183+1G>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'SPG7' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_955399.1:p.?' -p18 -sS'slr' -p19 -S'NP_955399.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'16-89575009-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000016.9(NM_199367.2):c.183+1G>A' -p24 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_199367.2:c.183+1G>A' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'GRCh38' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000016.10:g.89508601G>A' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'16' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'89508601' -p42 -sS'alt' -p43 -S'A' -p44 -sssS'GRCh37' -p45 -(dp46 -g33 -S'NC_000016.9:g.89575009G>A' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'89575009' -p49 -sg43 -g44 -sssS'hg38' -p50 -(dp51 -g33 -S'NC_000016.10:g.89508601G>A' -p52 -sg35 -(dp53 -g37 -S'chr16' -p54 -sg39 -g40 -sg41 -S'89508601' -p55 -sg43 -g44 -sssS'hg19' -p56 -(dp57 -g33 -S'NC_000016.9:g.89575009G>A' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'89575009' -p60 -sg43 -g44 -sssssS'NM_003119.2:c.183+1G>A' -p61 -(dp62 -g3 -g4 -sg5 -(lp63 -S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p64 -aS'NM_003119.3:c.183+1G>A MUST be fully validated prior to use in reports' -p65 -aS'select_variants=NM_003119.3:c.183+1G>A' -p66 -asg8 -S'NG_008082.1(NM_003119.2):c.183+1G>A' -p67 -sg9 -(lp68 -sg11 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p69 -sg13 -S'SPG7' -p70 -sg15 -(dp71 -g17 -S'NP_003110.1:p.?' -p72 -sg19 -S'NP_003110.1:p.?' -p73 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_003119.2):c.183+1G>A' -p74 -sg25 -g4 -sg26 -S'NM_003119.2:c.183+1G>A' -p75 -sg28 -S'NG_008082.1:g.5205G>A' -p76 -sg29 -(dp77 -g56 -(dp78 -g33 -S'NC_000016.9:g.89575009G>A' -p79 -sg35 -(dp80 -g37 -g54 -sg39 -g40 -sg41 -S'89575009' -p81 -sg43 -g44 -sssg45 -(dp82 -g33 -S'NC_000016.9:g.89575009G>A' -p83 -sg35 -(dp84 -g37 -g38 -sg39 -g40 -sg41 -S'89575009' -p85 -sg43 -g44 -sssssS'flag' -p86 -S'gene_variant' -p87 -sS'NM_199367.1:c.183+1G>A' -p88 -(dp89 -g3 -g4 -sg5 -(lp90 -S'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p91 -aS'NM_199367.2:c.183+1G>A MUST be fully validated prior to use in reports' -p92 -aS'select_variants=NM_199367.2:c.183+1G>A' -p93 -aS'RefSeqGene record not available' -p94 -asg8 -g4 -sg9 -(lp95 -sg11 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p96 -sg13 -S'SPG7' -p97 -sg15 -(dp98 -g17 -S'NP_955399.1:p.?' -p99 -sg19 -S'NP_955399.1:p.?' -p100 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_199367.1):c.183+1G>A' -p101 -sg25 -g4 -sg26 -S'NM_199367.1:c.183+1G>A' -p102 -sg28 -g4 -sg29 -(dp103 -g56 -(dp104 -g33 -S'NC_000016.9:g.89575009G>A' -p105 -sg35 -(dp106 -g37 -g54 -sg39 -g40 -sg41 -S'89575009' -p107 -sg43 -g44 -sssg45 -(dp108 -g33 -S'NC_000016.9:g.89575009G>A' -p109 -sg35 -(dp110 -g37 -g38 -sg39 -g40 -sg41 -S'89575009' -p111 -sg43 -g44 -sssssS'NM_001363850.1:c.183+1G>A' -p112 -(dp113 -g3 -g4 -sg5 -(lp114 -S'RefSeqGene record not available' -p115 -asg8 -g4 -sg9 -(lp116 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p117 -sg13 -S'SPG7' -p118 -sg15 -(dp119 -g17 -S'NP_001350779.1:p.?' -p120 -sg19 -S'NP_001350779.1:p.?' -p121 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_001363850.1):c.183+1G>A' -p122 -sg25 -g4 -sg26 -S'NM_001363850.1:c.183+1G>A' -p123 -sg28 -g4 -sg29 -(dp124 -g56 -(dp125 -g33 -S'NC_000016.9:g.89575009G>A' -p126 -sg35 -(dp127 -g37 -g54 -sg39 -g40 -sg41 -S'89575009' -p128 -sg43 -g44 -sssg45 -(dp129 -g33 -S'NC_000016.9:g.89575009G>A' -p130 -sg35 -(dp131 -g37 -g38 -sg39 -g40 -sg41 -S'89575009' -p132 -sg43 -g44 -sssssS'NM_003119.3:c.183+1G>A' -p133 -(dp134 -g3 -g4 -sg5 -(lp135 -S'RefSeqGene record not available' -p136 -asg8 -g4 -sg9 -(lp137 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p138 -sg13 -S'SPG7' -p139 -sg15 -(dp140 -g17 -S'NP_003110.1:p.?' -p141 -sg19 -S'NP_003110.1:p.?' -p142 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_003119.3):c.183+1G>A' -p143 -sg25 -g4 -sg26 -S'NM_003119.3:c.183+1G>A' -p144 -sg28 -g4 -sg29 -(dp145 -g31 -(dp146 -g33 -S'NC_000016.10:g.89508601G>A' -p147 -sg35 -(dp148 -g37 -g38 -sg39 -g40 -sg41 -S'89508601' -p149 -sg43 -g44 -sssg45 -(dp150 -g33 -S'NC_000016.9:g.89575009G>A' -p151 -sg35 -(dp152 -g37 -g38 -sg39 -g40 -sg41 -S'89575009' -p153 -sg43 -g44 -sssg50 -(dp154 -g33 -S'NC_000016.10:g.89508601G>A' -p155 -sg35 -(dp156 -g37 -g54 -sg39 -g40 -sg41 -S'89508601' -p157 -sg43 -g44 -sssg56 -(dp158 -g33 -S'NC_000016.9:g.89575009G>A' -p159 -sg35 -(dp160 -g37 -g54 -sg39 -g40 -sg41 -S'89575009' -p161 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant242.txt b/VariantValidator/testing/testOutputsMasterITS/variant242.txt deleted file mode 100644 index 0971e051..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant242.txt +++ /dev/null @@ -1,983 +0,0 @@ -(dp0 -S'NM_199367.1:c.183+32_183+33insA' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Multiple ALT sequences detected' -p7 -aS'auto-submitting all possible combinations' -p8 -aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' -p9 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p10 -aS'NM_199367.2:c.183+32_183+33insA MUST be fully validated prior to use in reports' -p11 -aS'select_variants=NM_199367.2:c.183+32_183+33insA' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g4 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'SPG7' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_955399.1:p.?' -p24 -sS'slr' -p25 -S'NP_955399.1:p.?' -p26 -ssS'submitted_variant' -p27 -S'16-89575040-C-A,CA' -p28 -sS'genome_context_intronic_sequence' -p29 -S'NC_000016.9(NM_199367.1):c.183+32_183+33insA' -p30 -sS'HGVS_LRG_variant' -p31 -g4 -sS'HGVS_transcript_variant' -p32 -S'NM_199367.1:c.183+32_183+33insA' -p33 -sS'HGVS_RefSeqGene_variant' -p34 -g4 -sS'primary_assembly_loci' -p35 -(dp36 -S'hg19' -p37 -(dp38 -S'HGVS_genomic_description' -p39 -S'NC_000016.9:g.89575040_89575041insA' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'chr16' -p44 -sS'ref' -p45 -S'C' -p46 -sS'pos' -p47 -S'89575040' -p48 -sS'alt' -p49 -S'CA' -p50 -sssS'GRCh37' -p51 -(dp52 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p53 -sg41 -(dp54 -g43 -S'16' -p55 -sg45 -g46 -sg47 -S'89575040' -p56 -sg49 -S'CA' -p57 -sssssS'NM_001363850.1:c.183+32C>A' -p58 -(dp59 -g3 -g4 -sg5 -(lp60 -S'Multiple ALT sequences detected' -p61 -aS'auto-submitting all possible combinations' -p62 -aS'RefSeqGene record not available' -p63 -asg14 -g4 -sg15 -(lp64 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p65 -sg19 -S'SPG7' -p66 -sg21 -(dp67 -g23 -S'NP_001350779.1:p.?' -p68 -sg25 -S'NP_001350779.1:p.?' -p69 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_001363850.1):c.183+32C>A' -p70 -sg31 -g4 -sg32 -S'NM_001363850.1:c.183+32C>A' -p71 -sg34 -g4 -sg35 -(dp72 -g37 -(dp73 -g39 -S'NC_000016.9:g.89575040C>A' -p74 -sg41 -(dp75 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p76 -sg49 -S'A' -p77 -sssg51 -(dp78 -g39 -S'NC_000016.9:g.89575040C>A' -p79 -sg41 -(dp80 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p81 -sg49 -g77 -sssssS'NM_001363850.1:c.183+32_183+33insA' -p82 -(dp83 -g3 -g4 -sg5 -(lp84 -S'Multiple ALT sequences detected' -p85 -aS'auto-submitting all possible combinations' -p86 -aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' -p87 -aS'RefSeqGene record not available' -p88 -asg14 -g4 -sg15 -(lp89 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p90 -sg19 -S'SPG7' -p91 -sg21 -(dp92 -g23 -S'NP_001350779.1:p.?' -p93 -sg25 -S'NP_001350779.1:p.?' -p94 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_001363850.1):c.183+32_183+33insA' -p95 -sg31 -g4 -sg32 -S'NM_001363850.1:c.183+32_183+33insA' -p96 -sg34 -g4 -sg35 -(dp97 -g37 -(dp98 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p99 -sg41 -(dp100 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p101 -sg49 -S'CA' -p102 -sssg51 -(dp103 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p104 -sg41 -(dp105 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p106 -sg49 -S'CA' -p107 -sssssS'NM_199367.2:c.183+32C>A' -p108 -(dp109 -g3 -g4 -sg5 -(lp110 -S'Multiple ALT sequences detected' -p111 -aS'auto-submitting all possible combinations' -p112 -aS'RefSeqGene record not available' -p113 -asg14 -g4 -sg15 -(lp114 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p115 -sg19 -S'SPG7' -p116 -sg21 -(dp117 -g23 -S'NP_955399.1:p.?' -p118 -sg25 -S'NP_955399.1:p.?' -p119 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_199367.2):c.183+32C>A' -p120 -sg31 -g4 -sg32 -S'NM_199367.2:c.183+32C>A' -p121 -sg34 -g4 -sg35 -(dp122 -S'GRCh38' -p123 -(dp124 -g39 -S'NC_000016.10:g.89508632C>A' -p125 -sg41 -(dp126 -g43 -g55 -sg45 -g46 -sg47 -S'89508632' -p127 -sg49 -g77 -sssg51 -(dp128 -g39 -S'NC_000016.9:g.89575040C>A' -p129 -sg41 -(dp130 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p131 -sg49 -g77 -sssS'hg38' -p132 -(dp133 -g39 -S'NC_000016.10:g.89508632C>A' -p134 -sg41 -(dp135 -g43 -g44 -sg45 -g46 -sg47 -S'89508632' -p136 -sg49 -g77 -sssg37 -(dp137 -g39 -S'NC_000016.9:g.89575040C>A' -p138 -sg41 -(dp139 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p140 -sg49 -g77 -sssssS'NM_003119.3:c.183+32_183+33insA' -p141 -(dp142 -g3 -g4 -sg5 -(lp143 -S'Multiple ALT sequences detected' -p144 -aS'auto-submitting all possible combinations' -p145 -aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' -p146 -aS'RefSeqGene record not available' -p147 -asg14 -g4 -sg15 -(lp148 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p149 -sg19 -S'SPG7' -p150 -sg21 -(dp151 -g23 -S'NP_003110.1:p.?' -p152 -sg25 -S'NP_003110.1:p.?' -p153 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_003119.3):c.183+32_183+33insA' -p154 -sg31 -g4 -sg32 -S'NM_003119.3:c.183+32_183+33insA' -p155 -sg34 -g4 -sg35 -(dp156 -g123 -(dp157 -g39 -S'NC_000016.10:g.89508632_89508633insA' -p158 -sg41 -(dp159 -g43 -g55 -sg45 -g46 -sg47 -S'89508632' -p160 -sg49 -S'CA' -p161 -sssg51 -(dp162 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p163 -sg41 -(dp164 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p165 -sg49 -S'CA' -p166 -sssg132 -(dp167 -g39 -S'NC_000016.10:g.89508632_89508633insA' -p168 -sg41 -(dp169 -g43 -g44 -sg45 -g46 -sg47 -S'89508632' -p170 -sg49 -S'CA' -p171 -sssg37 -(dp172 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p173 -sg41 -(dp174 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p175 -sg49 -S'CA' -p176 -sssssS'flag' -p177 -S'gene_variant' -p178 -sS'NM_003119.2:c.183+32_183+33insA' -p179 -(dp180 -g3 -g4 -sg5 -(lp181 -S'Multiple ALT sequences detected' -p182 -aS'auto-submitting all possible combinations' -p183 -aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' -p184 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p185 -aS'NM_003119.3:c.183+32_183+33insA MUST be fully validated prior to use in reports' -p186 -aS'select_variants=NM_003119.3:c.183+32_183+33insA' -p187 -asg14 -S'NG_008082.1(NM_003119.2):c.183+32_183+33insA' -p188 -sg15 -(lp189 -sg17 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p190 -sg19 -S'SPG7' -p191 -sg21 -(dp192 -g23 -S'NP_003110.1:p.?' -p193 -sg25 -S'NP_003110.1:p.?' -p194 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_003119.2):c.183+32_183+33insA' -p195 -sg31 -g4 -sg32 -S'NM_003119.2:c.183+32_183+33insA' -p196 -sg34 -S'NG_008082.1:g.5236_5237insA' -p197 -sg35 -(dp198 -g37 -(dp199 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p200 -sg41 -(dp201 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p202 -sg49 -S'CA' -p203 -sssg51 -(dp204 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p205 -sg41 -(dp206 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p207 -sg49 -S'CA' -p208 -sssssS'NM_199367.1:c.183+32C>A' -p209 -(dp210 -g3 -g4 -sg5 -(lp211 -S'Multiple ALT sequences detected' -p212 -aS'auto-submitting all possible combinations' -p213 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p214 -aS'NM_199367.2:c.183+32C>A MUST be fully validated prior to use in reports' -p215 -aS'select_variants=NM_199367.2:c.183+32C>A' -p216 -aS'RefSeqGene record not available' -p217 -asg14 -g4 -sg15 -(lp218 -sg17 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p219 -sg19 -S'SPG7' -p220 -sg21 -(dp221 -g23 -S'NP_955399.1:p.?' -p222 -sg25 -S'NP_955399.1:p.?' -p223 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_199367.1):c.183+32C>A' -p224 -sg31 -g4 -sg32 -S'NM_199367.1:c.183+32C>A' -p225 -sg34 -g4 -sg35 -(dp226 -g37 -(dp227 -g39 -S'NC_000016.9:g.89575040C>A' -p228 -sg41 -(dp229 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p230 -sg49 -g77 -sssg51 -(dp231 -g39 -S'NC_000016.9:g.89575040C>A' -p232 -sg41 -(dp233 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p234 -sg49 -g77 -sssssS'NM_003119.3:c.183+32C>A' -p235 -(dp236 -g3 -g4 -sg5 -(lp237 -S'Multiple ALT sequences detected' -p238 -aS'auto-submitting all possible combinations' -p239 -aS'RefSeqGene record not available' -p240 -asg14 -g4 -sg15 -(lp241 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p242 -sg19 -S'SPG7' -p243 -sg21 -(dp244 -g23 -S'NP_003110.1:p.?' -p245 -sg25 -S'NP_003110.1:p.?' -p246 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_003119.3):c.183+32C>A' -p247 -sg31 -g4 -sg32 -S'NM_003119.3:c.183+32C>A' -p248 -sg34 -g4 -sg35 -(dp249 -g123 -(dp250 -g39 -S'NC_000016.10:g.89508632C>A' -p251 -sg41 -(dp252 -g43 -g55 -sg45 -g46 -sg47 -S'89508632' -p253 -sg49 -g77 -sssg51 -(dp254 -g39 -S'NC_000016.9:g.89575040C>A' -p255 -sg41 -(dp256 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p257 -sg49 -g77 -sssg132 -(dp258 -g39 -S'NC_000016.10:g.89508632C>A' -p259 -sg41 -(dp260 -g43 -g44 -sg45 -g46 -sg47 -S'89508632' -p261 -sg49 -g77 -sssg37 -(dp262 -g39 -S'NC_000016.9:g.89575040C>A' -p263 -sg41 -(dp264 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p265 -sg49 -g77 -sssssS'NM_199367.2:c.183+32_183+33insA' -p266 -(dp267 -g3 -g4 -sg5 -(lp268 -S'Multiple ALT sequences detected' -p269 -aS'auto-submitting all possible combinations' -p270 -aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' -p271 -aS'RefSeqGene record not available' -p272 -asg14 -g4 -sg15 -(lp273 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p274 -sg19 -S'SPG7' -p275 -sg21 -(dp276 -g23 -S'NP_955399.1:p.?' -p277 -sg25 -S'NP_955399.1:p.?' -p278 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_199367.2):c.183+32_183+33insA' -p279 -sg31 -g4 -sg32 -S'NM_199367.2:c.183+32_183+33insA' -p280 -sg34 -g4 -sg35 -(dp281 -g123 -(dp282 -g39 -S'NC_000016.10:g.89508632_89508633insA' -p283 -sg41 -(dp284 -g43 -g55 -sg45 -g46 -sg47 -S'89508632' -p285 -sg49 -S'CA' -p286 -sssg51 -(dp287 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p288 -sg41 -(dp289 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p290 -sg49 -S'CA' -p291 -sssg132 -(dp292 -g39 -S'NC_000016.10:g.89508632_89508633insA' -p293 -sg41 -(dp294 -g43 -g44 -sg45 -g46 -sg47 -S'89508632' -p295 -sg49 -S'CA' -p296 -sssg37 -(dp297 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p298 -sg41 -(dp299 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p300 -sg49 -S'CA' -p301 -sssssS'NM_003119.2:c.183+32C>A' -p302 -(dp303 -g3 -g4 -sg5 -(lp304 -S'Multiple ALT sequences detected' -p305 -aS'auto-submitting all possible combinations' -p306 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p307 -aS'NM_003119.3:c.183+32C>A MUST be fully validated prior to use in reports' -p308 -aS'select_variants=NM_003119.3:c.183+32C>A' -p309 -asg14 -S'NG_008082.1(NM_003119.2):c.183+32C>A' -p310 -sg15 -(lp311 -sg17 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p312 -sg19 -S'SPG7' -p313 -sg21 -(dp314 -g23 -S'NP_003110.1:p.?' -p315 -sg25 -S'NP_003110.1:p.?' -p316 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_003119.2):c.183+32C>A' -p317 -sg31 -g4 -sg32 -S'NM_003119.2:c.183+32C>A' -p318 -sg34 -S'NG_008082.1:g.5236C>A' -p319 -sg35 -(dp320 -g37 -(dp321 -g39 -S'NC_000016.9:g.89575040C>A' -p322 -sg41 -(dp323 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p324 -sg49 -g77 -sssg51 -(dp325 -g39 -S'NC_000016.9:g.89575040C>A' -p326 -sg41 -(dp327 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p328 -sg49 -g77 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant243.txt b/VariantValidator/testing/testOutputsMasterITS/variant243.txt deleted file mode 100644 index 421a405a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant243.txt +++ /dev/null @@ -1,478 +0,0 @@ -(dp0 -S'NM_199367.2:c.184-2A>C' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'SPG7' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_955399.1:p.?' -p18 -sS'slr' -p19 -S'NP_955399.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'16-89576896-A-C' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000016.9(NM_199367.2):c.184-2A>C' -p24 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_199367.2:c.184-2A>C' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'GRCh38' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000016.10:g.89510488A>C' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'16' -p38 -sS'ref' -p39 -S'A' -p40 -sS'pos' -p41 -S'89510488' -p42 -sS'alt' -p43 -S'C' -p44 -sssS'GRCh37' -p45 -(dp46 -g33 -S'NC_000016.9:g.89576896A>C' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'89576896' -p49 -sg43 -g44 -sssS'hg38' -p50 -(dp51 -g33 -S'NC_000016.10:g.89510488A>C' -p52 -sg35 -(dp53 -g37 -S'chr16' -p54 -sg39 -g40 -sg41 -S'89510488' -p55 -sg43 -g44 -sssS'hg19' -p56 -(dp57 -g33 -S'NC_000016.9:g.89576896A>C' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'89576896' -p60 -sg43 -g44 -sssssS'NM_003119.2:c.184-2A>C' -p61 -(dp62 -g3 -g4 -sg5 -(lp63 -S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p64 -aS'NM_003119.3:c.184-2A>C MUST be fully validated prior to use in reports' -p65 -aS'select_variants=NM_003119.3:c.184-2A>C' -p66 -asg8 -S'NG_008082.1(NM_003119.2):c.184-2A>C' -p67 -sg9 -(lp68 -sg11 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p69 -sg13 -S'SPG7' -p70 -sg15 -(dp71 -g17 -S'NP_003110.1:p.?' -p72 -sg19 -S'NP_003110.1:p.?' -p73 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_003119.2):c.184-2A>C' -p74 -sg25 -g4 -sg26 -S'NM_003119.2:c.184-2A>C' -p75 -sg28 -S'NG_008082.1:g.7092A>C' -p76 -sg29 -(dp77 -g56 -(dp78 -g33 -S'NC_000016.9:g.89576896A>C' -p79 -sg35 -(dp80 -g37 -g54 -sg39 -g40 -sg41 -S'89576896' -p81 -sg43 -g44 -sssg45 -(dp82 -g33 -S'NC_000016.9:g.89576896A>C' -p83 -sg35 -(dp84 -g37 -g38 -sg39 -g40 -sg41 -S'89576896' -p85 -sg43 -g44 -sssssS'NM_003119.3:c.184-2A>C' -p86 -(dp87 -g3 -g4 -sg5 -(lp88 -S'RefSeqGene record not available' -p89 -asg8 -g4 -sg9 -(lp90 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p91 -sg13 -S'SPG7' -p92 -sg15 -(dp93 -g17 -S'NP_003110.1:p.?' -p94 -sg19 -S'NP_003110.1:p.?' -p95 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_003119.3):c.184-2A>C' -p96 -sg25 -g4 -sg26 -S'NM_003119.3:c.184-2A>C' -p97 -sg28 -g4 -sg29 -(dp98 -g31 -(dp99 -g33 -S'NC_000016.10:g.89510488A>C' -p100 -sg35 -(dp101 -g37 -g38 -sg39 -g40 -sg41 -S'89510488' -p102 -sg43 -g44 -sssg45 -(dp103 -g33 -S'NC_000016.9:g.89576896A>C' -p104 -sg35 -(dp105 -g37 -g38 -sg39 -g40 -sg41 -S'89576896' -p106 -sg43 -g44 -sssg50 -(dp107 -g33 -S'NC_000016.10:g.89510488A>C' -p108 -sg35 -(dp109 -g37 -g54 -sg39 -g40 -sg41 -S'89510488' -p110 -sg43 -g44 -sssg56 -(dp111 -g33 -S'NC_000016.9:g.89576896A>C' -p112 -sg35 -(dp113 -g37 -g54 -sg39 -g40 -sg41 -S'89576896' -p114 -sg43 -g44 -sssssS'NM_001363850.1:c.184-2A>C' -p115 -(dp116 -g3 -g4 -sg5 -(lp117 -S'RefSeqGene record not available' -p118 -asg8 -g4 -sg9 -(lp119 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p120 -sg13 -S'SPG7' -p121 -sg15 -(dp122 -g17 -S'NP_001350779.1:p.?' -p123 -sg19 -S'NP_001350779.1:p.?' -p124 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_001363850.1):c.184-2A>C' -p125 -sg25 -g4 -sg26 -S'NM_001363850.1:c.184-2A>C' -p126 -sg28 -g4 -sg29 -(dp127 -g56 -(dp128 -g33 -S'NC_000016.9:g.89576896A>C' -p129 -sg35 -(dp130 -g37 -g54 -sg39 -g40 -sg41 -S'89576896' -p131 -sg43 -g44 -sssg45 -(dp132 -g33 -S'NC_000016.9:g.89576896A>C' -p133 -sg35 -(dp134 -g37 -g38 -sg39 -g40 -sg41 -S'89576896' -p135 -sg43 -g44 -sssssS'flag' -p136 -S'gene_variant' -p137 -sS'NM_199367.1:c.184-2A>C' -p138 -(dp139 -g3 -g4 -sg5 -(lp140 -S'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p141 -aS'NM_199367.2:c.184-2A>C MUST be fully validated prior to use in reports' -p142 -aS'select_variants=NM_199367.2:c.184-2A>C' -p143 -aS'RefSeqGene record not available' -p144 -asg8 -g4 -sg9 -(lp145 -sg11 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p146 -sg13 -S'SPG7' -p147 -sg15 -(dp148 -g17 -S'NP_955399.1:p.?' -p149 -sg19 -S'NP_955399.1:p.?' -p150 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_199367.1):c.184-2A>C' -p151 -sg25 -g4 -sg26 -S'NM_199367.1:c.184-2A>C' -p152 -sg28 -g4 -sg29 -(dp153 -g56 -(dp154 -g33 -S'NC_000016.9:g.89576896A>C' -p155 -sg35 -(dp156 -g37 -g54 -sg39 -g40 -sg41 -S'89576896' -p157 -sg43 -g44 -sssg45 -(dp158 -g33 -S'NC_000016.9:g.89576896A>C' -p159 -sg35 -(dp160 -g37 -g38 -sg39 -g40 -sg41 -S'89576896' -p161 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant244.txt b/VariantValidator/testing/testOutputsMasterITS/variant244.txt deleted file mode 100644 index 42774368..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant244.txt +++ /dev/null @@ -1,994 +0,0 @@ -(dp0 -S'NM_003119.3:c.216dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Multiple ALT sequences detected' -p7 -aS'auto-submitting all possible combinations' -p8 -aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'SPG7' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_003110.1:p.(Glu73Ter)' -p21 -sS'slr' -p22 -S'NP_003110.1:p.(E73*)' -p23 -ssS'submitted_variant' -p24 -S'16-89576930-T-TA,TT' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'HGVS_LRG_variant' -p27 -g4 -sS'HGVS_transcript_variant' -p28 -S'NM_003119.3:c.216dup' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'GRCh38' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000016.10:g.89510522dup' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'16' -p40 -sS'ref' -p41 -S'T' -p42 -sS'pos' -p43 -S'89510520' -p44 -sS'alt' -p45 -S'TT' -p46 -sssS'GRCh37' -p47 -(dp48 -g35 -S'NC_000016.9:g.89576930dup' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'89576928' -p51 -sg45 -S'TT' -p52 -sssS'hg38' -p53 -(dp54 -g35 -S'NC_000016.10:g.89510522dup' -p55 -sg37 -(dp56 -g39 -S'chr16' -p57 -sg41 -g42 -sg43 -S'89510520' -p58 -sg45 -S'TT' -p59 -sssS'hg19' -p60 -(dp61 -g35 -S'NC_000016.9:g.89576930dup' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -g42 -sg43 -S'89576928' -p64 -sg45 -S'TT' -p65 -sssssS'NM_003119.2:c.216_217insA' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'Multiple ALT sequences detected' -p69 -aS'auto-submitting all possible combinations' -p70 -aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' -p71 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p72 -aS'NM_003119.3:c.216_217insA MUST be fully validated prior to use in reports' -p73 -aS'select_variants=NM_003119.3:c.216_217insA' -p74 -asg11 -g4 -sg12 -(lp75 -sg14 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p76 -sg16 -S'SPG7' -p77 -sg18 -(dp78 -g20 -S'NP_003110.1:p.(Glu73ArgfsTer30)' -p79 -sg22 -S'NP_003110.1:p.(E73Rfs*30)' -p80 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_003119.2:c.216_217insA' -p81 -sg30 -S'NG_008082.1:g.7126_7127insA' -p82 -sg31 -(dp83 -g60 -(dp84 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p85 -sg37 -(dp86 -g39 -g57 -sg41 -g42 -sg43 -S'89576930' -p87 -sg45 -S'TA' -p88 -sssg47 -(dp89 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p90 -sg37 -(dp91 -g39 -g40 -sg41 -g42 -sg43 -S'89576930' -p92 -sg45 -S'TA' -p93 -sssssS'NM_199367.2:c.216dup' -p94 -(dp95 -g3 -g4 -sg5 -(lp96 -S'Multiple ALT sequences detected' -p97 -aS'auto-submitting all possible combinations' -p98 -aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' -p99 -aS'RefSeqGene record not available' -p100 -asg11 -g4 -sg12 -(lp101 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p102 -sg16 -S'SPG7' -p103 -sg18 -(dp104 -g20 -S'NP_955399.1:p.(Glu73Ter)' -p105 -sg22 -S'NP_955399.1:p.(E73*)' -p106 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_199367.2:c.216dup' -p107 -sg30 -g4 -sg31 -(dp108 -g33 -(dp109 -g35 -S'NC_000016.10:g.89510522dup' -p110 -sg37 -(dp111 -g39 -g40 -sg41 -g42 -sg43 -S'89510520' -p112 -sg45 -S'TT' -p113 -sssg47 -(dp114 -g35 -S'NC_000016.9:g.89576930dup' -p115 -sg37 -(dp116 -g39 -g40 -sg41 -g42 -sg43 -S'89576928' -p117 -sg45 -S'TT' -p118 -sssg53 -(dp119 -g35 -S'NC_000016.10:g.89510522dup' -p120 -sg37 -(dp121 -g39 -g57 -sg41 -g42 -sg43 -S'89510520' -p122 -sg45 -S'TT' -p123 -sssg60 -(dp124 -g35 -S'NC_000016.9:g.89576930dup' -p125 -sg37 -(dp126 -g39 -g57 -sg41 -g42 -sg43 -S'89576928' -p127 -sg45 -S'TT' -p128 -sssssS'NM_199367.2:c.216_217insA' -p129 -(dp130 -g3 -g4 -sg5 -(lp131 -S'Multiple ALT sequences detected' -p132 -aS'auto-submitting all possible combinations' -p133 -aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' -p134 -aS'RefSeqGene record not available' -p135 -asg11 -g4 -sg12 -(lp136 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p137 -sg16 -S'SPG7' -p138 -sg18 -(dp139 -g20 -S'NP_955399.1:p.(Glu73ArgfsTer30)' -p140 -sg22 -S'NP_955399.1:p.(E73Rfs*30)' -p141 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_199367.2:c.216_217insA' -p142 -sg30 -g4 -sg31 -(dp143 -g33 -(dp144 -g35 -S'NC_000016.10:g.89510522_89510523insA' -p145 -sg37 -(dp146 -g39 -g40 -sg41 -g42 -sg43 -S'89510522' -p147 -sg45 -S'TA' -p148 -sssg47 -(dp149 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p150 -sg37 -(dp151 -g39 -g40 -sg41 -g42 -sg43 -S'89576930' -p152 -sg45 -S'TA' -p153 -sssg53 -(dp154 -g35 -S'NC_000016.10:g.89510522_89510523insA' -p155 -sg37 -(dp156 -g39 -g57 -sg41 -g42 -sg43 -S'89510522' -p157 -sg45 -S'TA' -p158 -sssg60 -(dp159 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p160 -sg37 -(dp161 -g39 -g57 -sg41 -g42 -sg43 -S'89576930' -p162 -sg45 -S'TA' -p163 -sssssS'NM_001363850.1:c.216dup' -p164 -(dp165 -g3 -g4 -sg5 -(lp166 -S'Multiple ALT sequences detected' -p167 -aS'auto-submitting all possible combinations' -p168 -aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' -p169 -aS'RefSeqGene record not available' -p170 -asg11 -g4 -sg12 -(lp171 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p172 -sg16 -S'SPG7' -p173 -sg18 -(dp174 -g20 -S'NP_001350779.1:p.(Glu73Ter)' -p175 -sg22 -S'NP_001350779.1:p.(E73*)' -p176 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001363850.1:c.216dup' -p177 -sg30 -g4 -sg31 -(dp178 -g60 -(dp179 -g35 -S'NC_000016.9:g.89576930dup' -p180 -sg37 -(dp181 -g39 -g57 -sg41 -g42 -sg43 -S'89576928' -p182 -sg45 -S'TT' -p183 -sssg47 -(dp184 -g35 -S'NC_000016.9:g.89576930dup' -p185 -sg37 -(dp186 -g39 -g40 -sg41 -g42 -sg43 -S'89576928' -p187 -sg45 -S'TT' -p188 -sssssS'flag' -p189 -S'gene_variant' -p190 -sS'NM_001363850.1:c.216_217insA' -p191 -(dp192 -g3 -g4 -sg5 -(lp193 -S'Multiple ALT sequences detected' -p194 -aS'auto-submitting all possible combinations' -p195 -aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' -p196 -aS'RefSeqGene record not available' -p197 -asg11 -g4 -sg12 -(lp198 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p199 -sg16 -S'SPG7' -p200 -sg18 -(dp201 -g20 -S'NP_001350779.1:p.(Glu73ArgfsTer30)' -p202 -sg22 -S'NP_001350779.1:p.(E73Rfs*30)' -p203 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001363850.1:c.216_217insA' -p204 -sg30 -g4 -sg31 -(dp205 -g60 -(dp206 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p207 -sg37 -(dp208 -g39 -g57 -sg41 -g42 -sg43 -S'89576930' -p209 -sg45 -S'TA' -p210 -sssg47 -(dp211 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p212 -sg37 -(dp213 -g39 -g40 -sg41 -g42 -sg43 -S'89576930' -p214 -sg45 -S'TA' -p215 -sssssS'NM_199367.1:c.216_217insA' -p216 -(dp217 -g3 -g4 -sg5 -(lp218 -S'Multiple ALT sequences detected' -p219 -aS'auto-submitting all possible combinations' -p220 -aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' -p221 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p222 -aS'NM_199367.2:c.216_217insA MUST be fully validated prior to use in reports' -p223 -aS'select_variants=NM_199367.2:c.216_217insA' -p224 -aS'RefSeqGene record not available' -p225 -asg11 -g4 -sg12 -(lp226 -sg14 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p227 -sg16 -S'SPG7' -p228 -sg18 -(dp229 -g20 -S'NP_955399.1:p.(Glu73ArgfsTer30)' -p230 -sg22 -S'NP_955399.1:p.(E73Rfs*30)' -p231 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_199367.1:c.216_217insA' -p232 -sg30 -g4 -sg31 -(dp233 -g60 -(dp234 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p235 -sg37 -(dp236 -g39 -g57 -sg41 -g42 -sg43 -S'89576930' -p237 -sg45 -S'TA' -p238 -sssg47 -(dp239 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p240 -sg37 -(dp241 -g39 -g40 -sg41 -g42 -sg43 -S'89576930' -p242 -sg45 -S'TA' -p243 -sssssS'NM_199367.1:c.216dup' -p244 -(dp245 -g3 -g4 -sg5 -(lp246 -S'Multiple ALT sequences detected' -p247 -aS'auto-submitting all possible combinations' -p248 -aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' -p249 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p250 -aS'NM_199367.2:c.216dupT MUST be fully validated prior to use in reports' -p251 -aS'select_variants=NM_199367.2:c.216dup' -p252 -aS'RefSeqGene record not available' -p253 -asg11 -g4 -sg12 -(lp254 -sg14 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p255 -sg16 -S'SPG7' -p256 -sg18 -(dp257 -g20 -S'NP_955399.1:p.(Glu73Ter)' -p258 -sg22 -S'NP_955399.1:p.(E73*)' -p259 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_199367.1:c.216dup' -p260 -sg30 -g4 -sg31 -(dp261 -g60 -(dp262 -g35 -S'NC_000016.9:g.89576930dup' -p263 -sg37 -(dp264 -g39 -g57 -sg41 -g42 -sg43 -S'89576928' -p265 -sg45 -S'TT' -p266 -sssg47 -(dp267 -g35 -S'NC_000016.9:g.89576930dup' -p268 -sg37 -(dp269 -g39 -g40 -sg41 -g42 -sg43 -S'89576928' -p270 -sg45 -S'TT' -p271 -sssssS'NM_003119.3:c.216_217insA' -p272 -(dp273 -g3 -g4 -sg5 -(lp274 -S'Multiple ALT sequences detected' -p275 -aS'auto-submitting all possible combinations' -p276 -aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' -p277 -aS'RefSeqGene record not available' -p278 -asg11 -g4 -sg12 -(lp279 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p280 -sg16 -S'SPG7' -p281 -sg18 -(dp282 -g20 -S'NP_003110.1:p.(Glu73ArgfsTer30)' -p283 -sg22 -S'NP_003110.1:p.(E73Rfs*30)' -p284 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_003119.3:c.216_217insA' -p285 -sg30 -g4 -sg31 -(dp286 -g33 -(dp287 -g35 -S'NC_000016.10:g.89510522_89510523insA' -p288 -sg37 -(dp289 -g39 -g40 -sg41 -g42 -sg43 -S'89510522' -p290 -sg45 -S'TA' -p291 -sssg47 -(dp292 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p293 -sg37 -(dp294 -g39 -g40 -sg41 -g42 -sg43 -S'89576930' -p295 -sg45 -S'TA' -p296 -sssg53 -(dp297 -g35 -S'NC_000016.10:g.89510522_89510523insA' -p298 -sg37 -(dp299 -g39 -g57 -sg41 -g42 -sg43 -S'89510522' -p300 -sg45 -S'TA' -p301 -sssg60 -(dp302 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p303 -sg37 -(dp304 -g39 -g57 -sg41 -g42 -sg43 -S'89576930' -p305 -sg45 -S'TA' -p306 -sssssS'NM_003119.2:c.216dup' -p307 -(dp308 -g3 -g4 -sg5 -(lp309 -S'Multiple ALT sequences detected' -p310 -aS'auto-submitting all possible combinations' -p311 -aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' -p312 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p313 -aS'NM_003119.3:c.216dupT MUST be fully validated prior to use in reports' -p314 -aS'select_variants=NM_003119.3:c.216dup' -p315 -asg11 -g4 -sg12 -(lp316 -sg14 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p317 -sg16 -S'SPG7' -p318 -sg18 -(dp319 -g20 -S'NP_003110.1:p.(Glu73Ter)' -p320 -sg22 -S'NP_003110.1:p.(E73*)' -p321 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_003119.2:c.216dup' -p322 -sg30 -S'NG_008082.1:g.7126dup' -p323 -sg31 -(dp324 -g60 -(dp325 -g35 -S'NC_000016.9:g.89576930dup' -p326 -sg37 -(dp327 -g39 -g57 -sg41 -g42 -sg43 -S'89576928' -p328 -sg45 -S'TT' -p329 -sssg47 -(dp330 -g35 -S'NC_000016.9:g.89576930dup' -p331 -sg37 -(dp332 -g39 -g40 -sg41 -g42 -sg43 -S'89576928' -p333 -sg45 -S'TT' -p334 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant245.txt b/VariantValidator/testing/testOutputsMasterITS/variant245.txt deleted file mode 100644 index 8f2a5d0f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant245.txt +++ /dev/null @@ -1,508 +0,0 @@ -(dp0 -S'NM_199367.1:c.216_217dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' -p7 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p8 -aS'NM_199367.2:c.216_217dupTG MUST be fully validated prior to use in reports' -p9 -aS'select_variants=NM_199367.2:c.216_217dup' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'SPG7' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_955399.1:p.(Glu73ValfsTer9)' -p22 -sS'slr' -p23 -S'NP_955399.1:p.(E73Vfs*9)' -p24 -ssS'submitted_variant' -p25 -S'16-89576931-G-GTG' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_199367.1:c.216_217dup' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000016.9:g.89576930_89576931dup' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr16' -p41 -sS'ref' -p42 -S'TG' -p43 -sS'pos' -p44 -S'89576930' -p45 -sS'alt' -p46 -S'TGTG' -p47 -sssS'GRCh37' -p48 -(dp49 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p50 -sg38 -(dp51 -g40 -S'16' -p52 -sg42 -S'TG' -p53 -sg44 -S'89576930' -p54 -sg46 -S'TGTG' -p55 -sssssS'NM_003119.3:c.216_217dup' -p56 -(dp57 -g3 -g4 -sg5 -(lp58 -S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' -p59 -aS'RefSeqGene record not available' -p60 -asg12 -g4 -sg13 -(lp61 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p62 -sg17 -S'SPG7' -p63 -sg19 -(dp64 -g21 -S'NP_003110.1:p.(Glu73ValfsTer9)' -p65 -sg23 -S'NP_003110.1:p.(E73Vfs*9)' -p66 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_003119.3:c.216_217dup' -p67 -sg31 -g4 -sg32 -(dp68 -S'GRCh38' -p69 -(dp70 -g36 -S'NC_000016.10:g.89510522_89510523dup' -p71 -sg38 -(dp72 -g40 -g52 -sg42 -S'TG' -p73 -sg44 -S'89510522' -p74 -sg46 -S'TGTG' -p75 -sssg48 -(dp76 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p77 -sg38 -(dp78 -g40 -g52 -sg42 -S'TG' -p79 -sg44 -S'89576930' -p80 -sg46 -S'TGTG' -p81 -sssS'hg38' -p82 -(dp83 -g36 -S'NC_000016.10:g.89510522_89510523dup' -p84 -sg38 -(dp85 -g40 -g41 -sg42 -S'TG' -p86 -sg44 -S'89510522' -p87 -sg46 -S'TGTG' -p88 -sssg34 -(dp89 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p90 -sg38 -(dp91 -g40 -g41 -sg42 -S'TG' -p92 -sg44 -S'89576930' -p93 -sg46 -S'TGTG' -p94 -sssssS'NM_199367.2:c.216_217dup' -p95 -(dp96 -g3 -g4 -sg5 -(lp97 -S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' -p98 -aS'RefSeqGene record not available' -p99 -asg12 -g4 -sg13 -(lp100 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p101 -sg17 -S'SPG7' -p102 -sg19 -(dp103 -g21 -S'NP_955399.1:p.(Glu73ValfsTer9)' -p104 -sg23 -S'NP_955399.1:p.(E73Vfs*9)' -p105 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_199367.2:c.216_217dup' -p106 -sg31 -g4 -sg32 -(dp107 -g69 -(dp108 -g36 -S'NC_000016.10:g.89510522_89510523dup' -p109 -sg38 -(dp110 -g40 -g52 -sg42 -S'TG' -p111 -sg44 -S'89510522' -p112 -sg46 -S'TGTG' -p113 -sssg48 -(dp114 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p115 -sg38 -(dp116 -g40 -g52 -sg42 -S'TG' -p117 -sg44 -S'89576930' -p118 -sg46 -S'TGTG' -p119 -sssg82 -(dp120 -g36 -S'NC_000016.10:g.89510522_89510523dup' -p121 -sg38 -(dp122 -g40 -g41 -sg42 -S'TG' -p123 -sg44 -S'89510522' -p124 -sg46 -S'TGTG' -p125 -sssg34 -(dp126 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p127 -sg38 -(dp128 -g40 -g41 -sg42 -S'TG' -p129 -sg44 -S'89576930' -p130 -sg46 -S'TGTG' -p131 -sssssS'NM_003119.2:c.216_217dup' -p132 -(dp133 -g3 -g4 -sg5 -(lp134 -S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' -p135 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p136 -aS'NM_003119.3:c.216_217dupTG MUST be fully validated prior to use in reports' -p137 -aS'select_variants=NM_003119.3:c.216_217dup' -p138 -asg12 -g4 -sg13 -(lp139 -sg15 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p140 -sg17 -S'SPG7' -p141 -sg19 -(dp142 -g21 -S'NP_003110.1:p.(Glu73ValfsTer9)' -p143 -sg23 -S'NP_003110.1:p.(E73Vfs*9)' -p144 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_003119.2:c.216_217dup' -p145 -sg31 -S'NG_008082.1:g.7126_7127dup' -p146 -sg32 -(dp147 -g34 -(dp148 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p149 -sg38 -(dp150 -g40 -g41 -sg42 -S'TG' -p151 -sg44 -S'89576930' -p152 -sg46 -S'TGTG' -p153 -sssg48 -(dp154 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p155 -sg38 -(dp156 -g40 -g52 -sg42 -S'TG' -p157 -sg44 -S'89576930' -p158 -sg46 -S'TGTG' -p159 -sssssS'flag' -p160 -S'gene_variant' -p161 -sS'NM_001363850.1:c.216_217dup' -p162 -(dp163 -g3 -g4 -sg5 -(lp164 -S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' -p165 -aS'RefSeqGene record not available' -p166 -asg12 -g4 -sg13 -(lp167 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p168 -sg17 -S'SPG7' -p169 -sg19 -(dp170 -g21 -S'NP_001350779.1:p.(Glu73ValfsTer9)' -p171 -sg23 -S'NP_001350779.1:p.(E73Vfs*9)' -p172 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001363850.1:c.216_217dup' -p173 -sg31 -g4 -sg32 -(dp174 -g34 -(dp175 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p176 -sg38 -(dp177 -g40 -g41 -sg42 -S'TG' -p178 -sg44 -S'89576930' -p179 -sg46 -S'TGTG' -p180 -sssg48 -(dp181 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p182 -sg38 -(dp183 -g40 -g52 -sg42 -S'TG' -p184 -sg44 -S'89576930' -p185 -sg46 -S'TGTG' -p186 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant246.txt b/VariantValidator/testing/testOutputsMasterITS/variant246.txt deleted file mode 100644 index d2aa6880..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant246.txt +++ /dev/null @@ -1,495 +0,0 @@ -(dp0 -S'NM_199367.1:c.1046_1071del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' -p7 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p8 -aS'NM_199367.2:c.1046_1071del MUST be fully validated prior to use in reports' -p9 -aS'select_variants=NM_199367.2:c.1046_1071del' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'SPG7' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_955399.1:p.(Gly349AlafsTer38)' -p22 -sS'slr' -p23 -S'NP_955399.1:p.(G349Afs*38)' -p24 -ssS'submitted_variant' -p25 -S'16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_199367.1:c.1046_1071del' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000016.9:g.89598370_89598395del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr16' -p41 -sS'ref' -p42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p43 -sS'pos' -p44 -S'89598368' -p45 -sS'alt' -p46 -S'C' -p47 -sssS'GRCh37' -p48 -(dp49 -g36 -S'NC_000016.9:g.89598370_89598395del' -p50 -sg38 -(dp51 -g40 -S'16' -p52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p53 -sg44 -S'89598368' -p54 -sg46 -g47 -sssssS'NM_001363850.1:c.1046_1071del' -p55 -(dp56 -g3 -g4 -sg5 -(lp57 -S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' -p58 -aS'RefSeqGene record not available' -p59 -asg12 -g4 -sg13 -(lp60 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p61 -sg17 -S'SPG7' -p62 -sg19 -(dp63 -g21 -S'NP_001350779.1:p.(Gly349AlafsTer38)' -p64 -sg23 -S'NP_001350779.1:p.(G349Afs*38)' -p65 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001363850.1:c.1046_1071del' -p66 -sg31 -g4 -sg32 -(dp67 -g34 -(dp68 -g36 -S'NC_000016.9:g.89598370_89598395del' -p69 -sg38 -(dp70 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p71 -sg44 -S'89598368' -p72 -sg46 -g47 -sssg48 -(dp73 -g36 -S'NC_000016.9:g.89598370_89598395del' -p74 -sg38 -(dp75 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p76 -sg44 -S'89598368' -p77 -sg46 -g47 -sssssS'NM_199367.2:c.1046_1071del' -p78 -(dp79 -g3 -g4 -sg5 -(lp80 -S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' -p81 -aS'RefSeqGene record not available' -p82 -asg12 -g4 -sg13 -(lp83 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p84 -sg17 -S'SPG7' -p85 -sg19 -(dp86 -g21 -S'NP_955399.1:p.(Gly349AlafsTer38)' -p87 -sg23 -S'NP_955399.1:p.(G349Afs*38)' -p88 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_199367.2:c.1046_1071del' -p89 -sg31 -g4 -sg32 -(dp90 -S'GRCh38' -p91 -(dp92 -g36 -S'NC_000016.10:g.89531962_89531987del' -p93 -sg38 -(dp94 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p95 -sg44 -S'89531960' -p96 -sg46 -g47 -sssg48 -(dp97 -g36 -S'NC_000016.9:g.89598370_89598395del' -p98 -sg38 -(dp99 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p100 -sg44 -S'89598368' -p101 -sg46 -g47 -sssS'hg38' -p102 -(dp103 -g36 -S'NC_000016.10:g.89531962_89531987del' -p104 -sg38 -(dp105 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p106 -sg44 -S'89531960' -p107 -sg46 -g47 -sssg34 -(dp108 -g36 -S'NC_000016.9:g.89598370_89598395del' -p109 -sg38 -(dp110 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p111 -sg44 -S'89598368' -p112 -sg46 -g47 -sssssS'flag' -p113 -S'gene_variant' -p114 -sS'NM_003119.2:c.1046_1071del' -p115 -(dp116 -g3 -g4 -sg5 -(lp117 -S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' -p118 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p119 -aS'NM_003119.3:c.1046_1071del MUST be fully validated prior to use in reports' -p120 -aS'select_variants=NM_003119.3:c.1046_1071del' -p121 -asg12 -g4 -sg13 -(lp122 -sg15 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p123 -sg17 -S'SPG7' -p124 -sg19 -(dp125 -g21 -S'NP_003110.1:p.(Gly349AlafsTer38)' -p126 -sg23 -S'NP_003110.1:p.(G349Afs*38)' -p127 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_003119.2:c.1046_1071del' -p128 -sg31 -S'NG_008082.1:g.28566_28591del' -p129 -sg32 -(dp130 -g34 -(dp131 -g36 -S'NC_000016.9:g.89598370_89598395del' -p132 -sg38 -(dp133 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p134 -sg44 -S'89598368' -p135 -sg46 -g47 -sssg48 -(dp136 -g36 -S'NC_000016.9:g.89598370_89598395del' -p137 -sg38 -(dp138 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p139 -sg44 -S'89598368' -p140 -sg46 -g47 -sssssS'NM_003119.3:c.1046_1071del' -p141 -(dp142 -g3 -g4 -sg5 -(lp143 -S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' -p144 -aS'RefSeqGene record not available' -p145 -asg12 -g4 -sg13 -(lp146 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p147 -sg17 -S'SPG7' -p148 -sg19 -(dp149 -g21 -S'NP_003110.1:p.(Gly349AlafsTer38)' -p150 -sg23 -S'NP_003110.1:p.(G349Afs*38)' -p151 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_003119.3:c.1046_1071del' -p152 -sg31 -g4 -sg32 -(dp153 -g91 -(dp154 -g36 -S'NC_000016.10:g.89531962_89531987del' -p155 -sg38 -(dp156 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p157 -sg44 -S'89531960' -p158 -sg46 -g47 -sssg48 -(dp159 -g36 -S'NC_000016.9:g.89598370_89598395del' -p160 -sg38 -(dp161 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p162 -sg44 -S'89598368' -p163 -sg46 -g47 -sssg102 -(dp164 -g36 -S'NC_000016.10:g.89531962_89531987del' -p165 -sg38 -(dp166 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p167 -sg44 -S'89531960' -p168 -sg46 -g47 -sssg34 -(dp169 -g36 -S'NC_000016.9:g.89598370_89598395del' -p170 -sg38 -(dp171 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p172 -sg44 -S'89598368' -p173 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant247.txt b/VariantValidator/testing/testOutputsMasterITS/variant247.txt deleted file mode 100644 index e42b471f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant247.txt +++ /dev/null @@ -1,307 +0,0 @@ -(dp0 -S'NM_001363850.1:c.1450-1_1457delinsT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89613064AGGAGAGGCG>AT automapped to NC_000016.9:g.89613065_89613073delinsT' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p13 -sS'gene_symbol' -p14 -S'SPG7' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001350779.1:p.?' -p19 -sS'slr' -p20 -S'NP_001350779.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'16-89613064-AGGAGAGGCG-AT' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000016.9(NM_001363850.1):c.1450-1_1457delinsT' -p25 -sS'HGVS_LRG_variant' -p26 -g4 -sS'HGVS_transcript_variant' -p27 -S'NM_001363850.1:c.1450-1_1457delinsT' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000016.9:g.89613065_89613073delinsT' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr16' -p39 -sS'ref' -p40 -S'GGAGAGGCG' -p41 -sS'pos' -p42 -S'89613065' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000016.9:g.89613065_89613073delinsT' -p48 -sg36 -(dp49 -g38 -S'16' -p50 -sg40 -S'GGAGAGGCG' -p51 -sg42 -S'89613065' -p52 -sg44 -g45 -sssssS'flag' -p53 -S'gene_variant' -p54 -sS'NM_003119.2:c.1450-1_1457delinsT' -p55 -(dp56 -g3 -g4 -sg5 -(lp57 -S'NC_000016.9:g.89613064AGGAGAGGCG>AT automapped to NC_000016.9:g.89613065_89613073delinsT' -p58 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p59 -aS'NM_003119.3:c.1450-1_1457delinsT MUST be fully validated prior to use in reports' -p60 -aS'select_variants=NM_003119.3:c.1450-1_1457delinsT' -p61 -asg9 -S'NG_008082.1(NM_003119.2):c.1450-1_1457delinsT' -p62 -sg10 -(lp63 -sg12 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p64 -sg14 -S'SPG7' -p65 -sg16 -(dp66 -g18 -S'NP_003110.1:p.?' -p67 -sg20 -S'NP_003110.1:p.?' -p68 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_003119.2):c.1450-1_1457delinsT' -p69 -sg26 -g4 -sg27 -S'NM_003119.2:c.1450-1_1457delinsT' -p70 -sg29 -S'NG_008082.1:g.43261_43269delinsT' -p71 -sg30 -(dp72 -g32 -(dp73 -g34 -S'NC_000016.9:g.89613065_89613073delinsT' -p74 -sg36 -(dp75 -g38 -g39 -sg40 -S'GGAGAGGCG' -p76 -sg42 -S'89613065' -p77 -sg44 -g45 -sssg46 -(dp78 -g34 -S'NC_000016.9:g.89613065_89613073delinsT' -p79 -sg36 -(dp80 -g38 -g50 -sg40 -S'GGAGAGGCG' -p81 -sg42 -S'89613065' -p82 -sg44 -g45 -sssssS'NM_003119.3:c.1450-1_1457delinsT' -p83 -(dp84 -g3 -g4 -sg5 -(lp85 -S'NC_000016.9:g.89613064AGGAGAGGCG>AT automapped to NC_000016.9:g.89613065_89613073delinsT' -p86 -aS'RefSeqGene record not available' -p87 -asg9 -g4 -sg10 -(lp88 -sg12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p89 -sg14 -S'SPG7' -p90 -sg16 -(dp91 -g18 -S'NP_003110.1:p.?' -p92 -sg20 -S'NP_003110.1:p.?' -p93 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_003119.3):c.1450-1_1457delinsT' -p94 -sg26 -g4 -sg27 -S'NM_003119.3:c.1450-1_1457delinsT' -p95 -sg29 -g4 -sg30 -(dp96 -S'GRCh38' -p97 -(dp98 -g34 -S'NC_000016.10:g.89546657_89546665delinsT' -p99 -sg36 -(dp100 -g38 -g50 -sg40 -S'GGAGAGGCG' -p101 -sg42 -S'89546657' -p102 -sg44 -g45 -sssg46 -(dp103 -g34 -S'NC_000016.9:g.89613065_89613073delinsT' -p104 -sg36 -(dp105 -g38 -g50 -sg40 -S'GGAGAGGCG' -p106 -sg42 -S'89613065' -p107 -sg44 -g45 -sssS'hg38' -p108 -(dp109 -g34 -S'NC_000016.10:g.89546657_89546665delinsT' -p110 -sg36 -(dp111 -g38 -g39 -sg40 -S'GGAGAGGCG' -p112 -sg42 -S'89546657' -p113 -sg44 -g45 -sssg32 -(dp114 -g34 -S'NC_000016.9:g.89613065_89613073delinsT' -p115 -sg36 -(dp116 -g38 -g39 -sg40 -S'GGAGAGGCG' -p117 -sg42 -S'89613065' -p118 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant248.txt b/VariantValidator/testing/testOutputsMasterITS/variant248.txt deleted file mode 100644 index dd4414ca..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant248.txt +++ /dev/null @@ -1,303 +0,0 @@ -(dp0 -S'NM_003119.2:c.1454_1462delinsT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89613069AGGCGGGAGA>AT automapped to NC_000016.9:g.89613070_89613078delinsT' -p7 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p8 -aS'NM_003119.3:c.1454_1462delinsT MUST be fully validated prior to use in reports' -p9 -aS'select_variants=NM_003119.3:c.1454_1462delinsT' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'SPG7' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_003110.1:p.(Arg485IlefsTer3)' -p21 -sS'slr' -p22 -S'NP_003110.1:p.(R485Ifs*3)' -p23 -ssS'submitted_variant' -p24 -S'16-89613069-AGGCGGGAGA-AT' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'HGVS_LRG_variant' -p27 -g4 -sS'HGVS_transcript_variant' -p28 -S'NM_003119.2:c.1454_1462delinsT' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_008082.1:g.43266_43274delinsT' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000016.9:g.89613070_89613078delinsT' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr16' -p41 -sS'ref' -p42 -S'GGCGGGAGA' -p43 -sS'pos' -p44 -S'89613070' -p45 -sS'alt' -p46 -S'T' -p47 -sssS'GRCh37' -p48 -(dp49 -g36 -S'NC_000016.9:g.89613070_89613078delinsT' -p50 -sg38 -(dp51 -g40 -S'16' -p52 -sg42 -S'GGCGGGAGA' -p53 -sg44 -S'89613070' -p54 -sg46 -g47 -sssssS'flag' -p55 -S'gene_variant' -p56 -sS'NM_001363850.1:c.1454_1462delinsT' -p57 -(dp58 -g3 -g4 -sg5 -(lp59 -S'NC_000016.9:g.89613069AGGCGGGAGA>AT automapped to NC_000016.9:g.89613070_89613078delinsT' -p60 -aS'RefSeqGene record not available' -p61 -asg11 -g4 -sg12 -(lp62 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p63 -sg16 -S'SPG7' -p64 -sg18 -(dp65 -g20 -S'NP_001350779.1:p.(Arg485IlefsTer3)' -p66 -sg22 -S'NP_001350779.1:p.(R485Ifs*3)' -p67 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001363850.1:c.1454_1462delinsT' -p68 -sg30 -g4 -sg32 -(dp69 -g34 -(dp70 -g36 -S'NC_000016.9:g.89613070_89613078delinsT' -p71 -sg38 -(dp72 -g40 -g41 -sg42 -S'GGCGGGAGA' -p73 -sg44 -S'89613070' -p74 -sg46 -g47 -sssg48 -(dp75 -g36 -S'NC_000016.9:g.89613070_89613078delinsT' -p76 -sg38 -(dp77 -g40 -g52 -sg42 -S'GGCGGGAGA' -p78 -sg44 -S'89613070' -p79 -sg46 -g47 -sssssS'NM_003119.3:c.1454_1462delinsT' -p80 -(dp81 -g3 -g4 -sg5 -(lp82 -S'NC_000016.9:g.89613069AGGCGGGAGA>AT automapped to NC_000016.9:g.89613070_89613078delinsT' -p83 -aS'RefSeqGene record not available' -p84 -asg11 -g4 -sg12 -(lp85 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p86 -sg16 -S'SPG7' -p87 -sg18 -(dp88 -g20 -S'NP_003110.1:p.(Arg485IlefsTer3)' -p89 -sg22 -S'NP_003110.1:p.(R485Ifs*3)' -p90 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_003119.3:c.1454_1462delinsT' -p91 -sg30 -g4 -sg32 -(dp92 -S'GRCh38' -p93 -(dp94 -g36 -S'NC_000016.10:g.89546662_89546670delinsT' -p95 -sg38 -(dp96 -g40 -g52 -sg42 -S'GGCGGGAGA' -p97 -sg44 -S'89546662' -p98 -sg46 -g47 -sssg48 -(dp99 -g36 -S'NC_000016.9:g.89613070_89613078delinsT' -p100 -sg38 -(dp101 -g40 -g52 -sg42 -S'GGCGGGAGA' -p102 -sg44 -S'89613070' -p103 -sg46 -g47 -sssS'hg38' -p104 -(dp105 -g36 -S'NC_000016.10:g.89546662_89546670delinsT' -p106 -sg38 -(dp107 -g40 -g41 -sg42 -S'GGCGGGAGA' -p108 -sg44 -S'89546662' -p109 -sg46 -g47 -sssg34 -(dp110 -g36 -S'NC_000016.9:g.89613070_89613078delinsT' -p111 -sg38 -(dp112 -g40 -g41 -sg42 -S'GGCGGGAGA' -p113 -sg44 -S'89613070' -p114 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant249.txt b/VariantValidator/testing/testOutputsMasterITS/variant249.txt deleted file mode 100644 index 3daef8b9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant249.txt +++ /dev/null @@ -1,290 +0,0 @@ -(dp0 -S'NM_001363850.1:c.1529C>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p12 -sS'gene_symbol' -p13 -S'SPG7' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001350779.1:p.(Ala510Val)' -p18 -sS'slr' -p19 -S'NP_001350779.1:p.(A510V)' -p20 -ssS'submitted_variant' -p21 -S'16-89613145-C-T' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001363850.1:c.1529C>T' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000016.9:g.89613145C>T' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr16' -p37 -sS'ref' -p38 -S'C' -p39 -sS'pos' -p40 -S'89613145' -p41 -sS'alt' -p42 -S'T' -p43 -sssS'GRCh37' -p44 -(dp45 -g32 -S'NC_000016.9:g.89613145C>T' -p46 -sg34 -(dp47 -g36 -S'16' -p48 -sg38 -g39 -sg40 -S'89613145' -p49 -sg42 -g43 -sssssS'NM_003119.3:c.1529C>T' -p50 -(dp51 -g3 -g4 -sg5 -(lp52 -S'RefSeqGene record not available' -p53 -asg8 -g4 -sg9 -(lp54 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p55 -sg13 -S'SPG7' -p56 -sg15 -(dp57 -g17 -S'NP_003110.1:p.(Ala510Val)' -p58 -sg19 -S'NP_003110.1:p.(A510V)' -p59 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_003119.3:c.1529C>T' -p60 -sg27 -g4 -sg28 -(dp61 -S'GRCh38' -p62 -(dp63 -g32 -S'NC_000016.10:g.89546737C>T' -p64 -sg34 -(dp65 -g36 -g48 -sg38 -g39 -sg40 -S'89546737' -p66 -sg42 -g43 -sssg44 -(dp67 -g32 -S'NC_000016.9:g.89613145C>T' -p68 -sg34 -(dp69 -g36 -g48 -sg38 -g39 -sg40 -S'89613145' -p70 -sg42 -g43 -sssS'hg38' -p71 -(dp72 -g32 -S'NC_000016.10:g.89546737C>T' -p73 -sg34 -(dp74 -g36 -g37 -sg38 -g39 -sg40 -S'89546737' -p75 -sg42 -g43 -sssg30 -(dp76 -g32 -S'NC_000016.9:g.89613145C>T' -p77 -sg34 -(dp78 -g36 -g37 -sg38 -g39 -sg40 -S'89613145' -p79 -sg42 -g43 -sssssS'flag' -p80 -S'gene_variant' -p81 -sS'NM_003119.2:c.1529C>T' -p82 -(dp83 -g3 -g4 -sg5 -(lp84 -S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p85 -aS'NM_003119.3:c.1529C>T MUST be fully validated prior to use in reports' -p86 -aS'select_variants=NM_003119.3:c.1529C>T' -p87 -asg8 -g4 -sg9 -(lp88 -sg11 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p89 -sg13 -S'SPG7' -p90 -sg15 -(dp91 -g17 -S'NP_003110.1:p.(Ala510Val)' -p92 -sg19 -S'NP_003110.1:p.(A510V)' -p93 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_003119.2:c.1529C>T' -p94 -sg27 -S'NG_008082.1:g.43341C>T' -p95 -sg28 -(dp96 -g30 -(dp97 -g32 -S'NC_000016.9:g.89613145C>T' -p98 -sg34 -(dp99 -g36 -g37 -sg38 -g39 -sg40 -S'89613145' -p100 -sg42 -g43 -sssg44 -(dp101 -g32 -S'NC_000016.9:g.89613145C>T' -p102 -sg34 -(dp103 -g36 -g48 -sg38 -g39 -sg40 -S'89613145' -p104 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant25.txt b/VariantValidator/testing/testOutputsMasterITS/variant25.txt deleted file mode 100644 index 579b4740..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant25.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000014.8:g.36989536G>A' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NR_138595.1:n.-810C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant250.txt b/VariantValidator/testing/testOutputsMasterITS/variant250.txt deleted file mode 100644 index a5119373..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant250.txt +++ /dev/null @@ -1,1694 +0,0 @@ -(dp0 -S'NM_001276695.1:c.535_537del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA -p13 -sS'gene_symbol' -p14 -S'TP53' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001263624.1:p.(Val179del)' -p19 -sS'slr' -p20 -S'NP_001263624.1:p.(V179del)' -p21 -ssS'submitted_variant' -p22 -S'17-7578194-GCAC-G' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001276695.1:c.535_537del' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000017.10:g.7578195_7578197del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr17' -p38 -sS'ref' -p39 -S'GCAC' -p40 -sS'pos' -p41 -S'7578194' -p42 -sS'alt' -p43 -S'G' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000017.11:g.7674877_7674879del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'GCAC' -p49 -sg41 -S'7674876' -p50 -sg43 -g44 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000017.10:g.7578195_7578197del' -p53 -sg35 -(dp54 -g37 -S'17' -p55 -sg39 -S'GCAC' -p56 -sg41 -S'7578194' -p57 -sg43 -g44 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000017.11:g.7674877_7674879del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'GCAC' -p62 -sg41 -S'7674876' -p63 -sg43 -g44 -sssssS'NM_001126113.2:c.652_654del' -p64 -(dp65 -g3 -S'LRG_321t4:c.652_654del' -p66 -sg5 -(lp67 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p68 -asg9 -g4 -sg10 -(lp69 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA -p70 -sg14 -S'TP53' -p71 -sg16 -(dp72 -g18 -S'NP_001119585.1:p.(Val218del)' -p73 -sg20 -S'NP_001119585.1:p.(V218del)' -p74 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17672_17674del' -p75 -sg26 -S'NM_001126113.2:c.652_654del' -p76 -sg28 -S'NG_017013.2:g.17672_17674del' -p77 -sg29 -(dp78 -g31 -(dp79 -g33 -S'NC_000017.10:g.7578195_7578197del' -p80 -sg35 -(dp81 -g37 -g38 -sg39 -S'GCAC' -p82 -sg41 -S'7578194' -p83 -sg43 -g44 -sssg45 -(dp84 -g33 -S'NC_000017.11:g.7674877_7674879del' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -S'GCAC' -p87 -sg41 -S'7674876' -p88 -sg43 -g44 -sssg51 -(dp89 -g33 -S'NC_000017.10:g.7578195_7578197del' -p90 -sg35 -(dp91 -g37 -g55 -sg39 -S'GCAC' -p92 -sg41 -S'7578194' -p93 -sg43 -g44 -sssg58 -(dp94 -g33 -S'NC_000017.11:g.7674877_7674879del' -p95 -sg35 -(dp96 -g37 -g55 -sg39 -S'GCAC' -p97 -sg41 -S'7674876' -p98 -sg43 -g44 -sssssS'NM_001126118.1:c.535_537del' -p99 -(dp100 -g3 -S'LRG_321t8:c.535_537del' -p101 -sg5 -(lp102 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p103 -asg9 -g4 -sg10 -(lp104 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 8, mRNA -p105 -sg14 -S'TP53' -p106 -sg16 -(dp107 -g18 -S'NP_001119590.1:p.(Val179del)' -p108 -sg20 -S'NP_001119590.1:p.(V179del)' -p109 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17672_17674del' -p110 -sg26 -S'NM_001126118.1:c.535_537del' -p111 -sg28 -S'NG_017013.2:g.17672_17674del' -p112 -sg29 -(dp113 -g31 -(dp114 -g33 -S'NC_000017.10:g.7578195_7578197del' -p115 -sg35 -(dp116 -g37 -g38 -sg39 -S'GCAC' -p117 -sg41 -S'7578194' -p118 -sg43 -g44 -sssg45 -(dp119 -g33 -S'NC_000017.11:g.7674877_7674879del' -p120 -sg35 -(dp121 -g37 -g38 -sg39 -S'GCAC' -p122 -sg41 -S'7674876' -p123 -sg43 -g44 -sssg51 -(dp124 -g33 -S'NC_000017.10:g.7578195_7578197del' -p125 -sg35 -(dp126 -g37 -g55 -sg39 -S'GCAC' -p127 -sg41 -S'7578194' -p128 -sg43 -g44 -sssg58 -(dp129 -g33 -S'NC_000017.11:g.7674877_7674879del' -p130 -sg35 -(dp131 -g37 -g55 -sg39 -S'GCAC' -p132 -sg41 -S'7674876' -p133 -sg43 -g44 -sssssS'NM_001126116.1:c.256_258del' -p134 -(dp135 -g3 -S'LRG_321t6:c.256_258del' -p136 -sg5 -(lp137 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p138 -asg9 -g4 -sg10 -(lp139 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA -p140 -sg14 -S'TP53' -p141 -sg16 -(dp142 -g18 -S'NP_001119588.1:p.(Val86del)' -p143 -sg20 -S'NP_001119588.1:p.(V86del)' -p144 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17672_17674del' -p145 -sg26 -S'NM_001126116.1:c.256_258del' -p146 -sg28 -S'NG_017013.2:g.17672_17674del' -p147 -sg29 -(dp148 -g31 -(dp149 -g33 -S'NC_000017.10:g.7578195_7578197del' -p150 -sg35 -(dp151 -g37 -g38 -sg39 -S'GCAC' -p152 -sg41 -S'7578194' -p153 -sg43 -g44 -sssg45 -(dp154 -g33 -S'NC_000017.11:g.7674877_7674879del' -p155 -sg35 -(dp156 -g37 -g38 -sg39 -S'GCAC' -p157 -sg41 -S'7674876' -p158 -sg43 -g44 -sssg51 -(dp159 -g33 -S'NC_000017.10:g.7578195_7578197del' -p160 -sg35 -(dp161 -g37 -g55 -sg39 -S'GCAC' -p162 -sg41 -S'7578194' -p163 -sg43 -g44 -sssg58 -(dp164 -g33 -S'NC_000017.11:g.7674877_7674879del' -p165 -sg35 -(dp166 -g37 -g55 -sg39 -S'GCAC' -p167 -sg41 -S'7674876' -p168 -sg43 -g44 -sssssS'NM_001126117.1:c.256_258del' -p169 -(dp170 -g3 -S'LRG_321t7:c.256_258del' -p171 -sg5 -(lp172 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p173 -asg9 -g4 -sg10 -(lp174 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA -p175 -sg14 -S'TP53' -p176 -sg16 -(dp177 -g18 -S'NP_001119589.1:p.(Val86del)' -p178 -sg20 -S'NP_001119589.1:p.(V86del)' -p179 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17672_17674del' -p180 -sg26 -S'NM_001126117.1:c.256_258del' -p181 -sg28 -S'NG_017013.2:g.17672_17674del' -p182 -sg29 -(dp183 -g31 -(dp184 -g33 -S'NC_000017.10:g.7578195_7578197del' -p185 -sg35 -(dp186 -g37 -g38 -sg39 -S'GCAC' -p187 -sg41 -S'7578194' -p188 -sg43 -g44 -sssg45 -(dp189 -g33 -S'NC_000017.11:g.7674877_7674879del' -p190 -sg35 -(dp191 -g37 -g38 -sg39 -S'GCAC' -p192 -sg41 -S'7674876' -p193 -sg43 -g44 -sssg51 -(dp194 -g33 -S'NC_000017.10:g.7578195_7578197del' -p195 -sg35 -(dp196 -g37 -g55 -sg39 -S'GCAC' -p197 -sg41 -S'7578194' -p198 -sg43 -g44 -sssg58 -(dp199 -g33 -S'NC_000017.11:g.7674877_7674879del' -p200 -sg35 -(dp201 -g37 -g55 -sg39 -S'GCAC' -p202 -sg41 -S'7674876' -p203 -sg43 -g44 -sssssS'NM_001276761.1:c.535_537del' -p204 -(dp205 -g3 -g4 -sg5 -(lp206 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p207 -aS'RefSeqGene record not available' -p208 -asg9 -g4 -sg10 -(lp209 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA -p210 -sg14 -S'TP53' -p211 -sg16 -(dp212 -g18 -S'NP_001263690.1:p.(Val179del)' -p213 -sg20 -S'NP_001263690.1:p.(V179del)' -p214 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276761.1:c.535_537del' -p215 -sg28 -g4 -sg29 -(dp216 -g31 -(dp217 -g33 -S'NC_000017.10:g.7578195_7578197del' -p218 -sg35 -(dp219 -g37 -g38 -sg39 -S'GCAC' -p220 -sg41 -S'7578194' -p221 -sg43 -g44 -sssg45 -(dp222 -g33 -S'NC_000017.11:g.7674877_7674879del' -p223 -sg35 -(dp224 -g37 -g38 -sg39 -S'GCAC' -p225 -sg41 -S'7674876' -p226 -sg43 -g44 -sssg51 -(dp227 -g33 -S'NC_000017.10:g.7578195_7578197del' -p228 -sg35 -(dp229 -g37 -g55 -sg39 -S'GCAC' -p230 -sg41 -S'7578194' -p231 -sg43 -g44 -sssg58 -(dp232 -g33 -S'NC_000017.11:g.7674877_7674879del' -p233 -sg35 -(dp234 -g37 -g55 -sg39 -S'GCAC' -p235 -sg41 -S'7674876' -p236 -sg43 -g44 -sssssS'NM_001126112.2:c.652_654del' -p237 -(dp238 -g3 -S'LRG_321t2:c.652_654del' -p239 -sg5 -(lp240 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p241 -asg9 -g4 -sg10 -(lp242 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA -p243 -sg14 -S'TP53' -p244 -sg16 -(dp245 -g18 -S'NP_001119584.1:p.(Val218del)' -p246 -sg20 -S'NP_001119584.1:p.(V218del)' -p247 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17672_17674del' -p248 -sg26 -S'NM_001126112.2:c.652_654del' -p249 -sg28 -S'NG_017013.2:g.17672_17674del' -p250 -sg29 -(dp251 -g31 -(dp252 -g33 -S'NC_000017.10:g.7578195_7578197del' -p253 -sg35 -(dp254 -g37 -g38 -sg39 -S'GCAC' -p255 -sg41 -S'7578194' -p256 -sg43 -g44 -sssg45 -(dp257 -g33 -S'NC_000017.11:g.7674877_7674879del' -p258 -sg35 -(dp259 -g37 -g38 -sg39 -S'GCAC' -p260 -sg41 -S'7674876' -p261 -sg43 -g44 -sssg51 -(dp262 -g33 -S'NC_000017.10:g.7578195_7578197del' -p263 -sg35 -(dp264 -g37 -g55 -sg39 -S'GCAC' -p265 -sg41 -S'7578194' -p266 -sg43 -g44 -sssg58 -(dp267 -g33 -S'NC_000017.11:g.7674877_7674879del' -p268 -sg35 -(dp269 -g37 -g55 -sg39 -S'GCAC' -p270 -sg41 -S'7674876' -p271 -sg43 -g44 -sssssS'flag' -p272 -S'gene_variant' -p273 -sS'NM_001276697.1:c.175_177del' -p274 -(dp275 -g3 -g4 -sg5 -(lp276 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p277 -aS'RefSeqGene record not available' -p278 -asg9 -g4 -sg10 -(lp279 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA -p280 -sg14 -S'TP53' -p281 -sg16 -(dp282 -g18 -S'NP_001263626.1:p.(Val59del)' -p283 -sg20 -S'NP_001263626.1:p.(V59del)' -p284 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276697.1:c.175_177del' -p285 -sg28 -g4 -sg29 -(dp286 -g31 -(dp287 -g33 -S'NC_000017.10:g.7578195_7578197del' -p288 -sg35 -(dp289 -g37 -g38 -sg39 -S'GCAC' -p290 -sg41 -S'7578194' -p291 -sg43 -g44 -sssg45 -(dp292 -g33 -S'NC_000017.11:g.7674877_7674879del' -p293 -sg35 -(dp294 -g37 -g38 -sg39 -S'GCAC' -p295 -sg41 -S'7674876' -p296 -sg43 -g44 -sssg51 -(dp297 -g33 -S'NC_000017.10:g.7578195_7578197del' -p298 -sg35 -(dp299 -g37 -g55 -sg39 -S'GCAC' -p300 -sg41 -S'7578194' -p301 -sg43 -g44 -sssg58 -(dp302 -g33 -S'NC_000017.11:g.7674877_7674879del' -p303 -sg35 -(dp304 -g37 -g55 -sg39 -S'GCAC' -p305 -sg41 -S'7674876' -p306 -sg43 -g44 -sssssS'NM_001276696.1:c.535_537del' -p307 -(dp308 -g3 -g4 -sg5 -(lp309 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p310 -aS'RefSeqGene record not available' -p311 -asg9 -g4 -sg10 -(lp312 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA -p313 -sg14 -S'TP53' -p314 -sg16 -(dp315 -g18 -S'NP_001263625.1:p.(Val179del)' -p316 -sg20 -S'NP_001263625.1:p.(V179del)' -p317 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276696.1:c.535_537del' -p318 -sg28 -g4 -sg29 -(dp319 -g31 -(dp320 -g33 -S'NC_000017.10:g.7578195_7578197del' -p321 -sg35 -(dp322 -g37 -g38 -sg39 -S'GCAC' -p323 -sg41 -S'7578194' -p324 -sg43 -g44 -sssg45 -(dp325 -g33 -S'NC_000017.11:g.7674877_7674879del' -p326 -sg35 -(dp327 -g37 -g38 -sg39 -S'GCAC' -p328 -sg41 -S'7674876' -p329 -sg43 -g44 -sssg51 -(dp330 -g33 -S'NC_000017.10:g.7578195_7578197del' -p331 -sg35 -(dp332 -g37 -g55 -sg39 -S'GCAC' -p333 -sg41 -S'7578194' -p334 -sg43 -g44 -sssg58 -(dp335 -g33 -S'NC_000017.11:g.7674877_7674879del' -p336 -sg35 -(dp337 -g37 -g55 -sg39 -S'GCAC' -p338 -sg41 -S'7674876' -p339 -sg43 -g44 -sssssS'NM_001276698.1:c.175_177del' -p340 -(dp341 -g3 -g4 -sg5 -(lp342 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p343 -aS'RefSeqGene record not available' -p344 -asg9 -g4 -sg10 -(lp345 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA -p346 -sg14 -S'TP53' -p347 -sg16 -(dp348 -g18 -S'NP_001263627.1:p.(Val59del)' -p349 -sg20 -S'NP_001263627.1:p.(V59del)' -p350 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276698.1:c.175_177del' -p351 -sg28 -g4 -sg29 -(dp352 -g31 -(dp353 -g33 -S'NC_000017.10:g.7578195_7578197del' -p354 -sg35 -(dp355 -g37 -g38 -sg39 -S'GCAC' -p356 -sg41 -S'7578194' -p357 -sg43 -g44 -sssg45 -(dp358 -g33 -S'NC_000017.11:g.7674877_7674879del' -p359 -sg35 -(dp360 -g37 -g38 -sg39 -S'GCAC' -p361 -sg41 -S'7674876' -p362 -sg43 -g44 -sssg51 -(dp363 -g33 -S'NC_000017.10:g.7578195_7578197del' -p364 -sg35 -(dp365 -g37 -g55 -sg39 -S'GCAC' -p366 -sg41 -S'7578194' -p367 -sg43 -g44 -sssg58 -(dp368 -g33 -S'NC_000017.11:g.7674877_7674879del' -p369 -sg35 -(dp370 -g37 -g55 -sg39 -S'GCAC' -p371 -sg41 -S'7674876' -p372 -sg43 -g44 -sssssS'NM_001126115.1:c.256_258del' -p373 -(dp374 -g3 -S'LRG_321t5:c.256_258del' -p375 -sg5 -(lp376 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p377 -asg9 -g4 -sg10 -(lp378 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA -p379 -sg14 -S'TP53' -p380 -sg16 -(dp381 -g18 -S'NP_001119587.1:p.(Val86del)' -p382 -sg20 -S'NP_001119587.1:p.(V86del)' -p383 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17672_17674del' -p384 -sg26 -S'NM_001126115.1:c.256_258del' -p385 -sg28 -S'NG_017013.2:g.17672_17674del' -p386 -sg29 -(dp387 -g31 -(dp388 -g33 -S'NC_000017.10:g.7578195_7578197del' -p389 -sg35 -(dp390 -g37 -g38 -sg39 -S'GCAC' -p391 -sg41 -S'7578194' -p392 -sg43 -g44 -sssg45 -(dp393 -g33 -S'NC_000017.11:g.7674877_7674879del' -p394 -sg35 -(dp395 -g37 -g38 -sg39 -S'GCAC' -p396 -sg41 -S'7674876' -p397 -sg43 -g44 -sssg51 -(dp398 -g33 -S'NC_000017.10:g.7578195_7578197del' -p399 -sg35 -(dp400 -g37 -g55 -sg39 -S'GCAC' -p401 -sg41 -S'7578194' -p402 -sg43 -g44 -sssg58 -(dp403 -g33 -S'NC_000017.11:g.7674877_7674879del' -p404 -sg35 -(dp405 -g37 -g55 -sg39 -S'GCAC' -p406 -sg41 -S'7674876' -p407 -sg43 -g44 -sssssS'NM_001126114.2:c.652_654del' -p408 -(dp409 -g3 -S'LRG_321t3:c.652_654del' -p410 -sg5 -(lp411 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p412 -asg9 -g4 -sg10 -(lp413 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA -p414 -sg14 -S'TP53' -p415 -sg16 -(dp416 -g18 -S'NP_001119586.1:p.(Val218del)' -p417 -sg20 -S'NP_001119586.1:p.(V218del)' -p418 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17672_17674del' -p419 -sg26 -S'NM_001126114.2:c.652_654del' -p420 -sg28 -S'NG_017013.2:g.17672_17674del' -p421 -sg29 -(dp422 -g31 -(dp423 -g33 -S'NC_000017.10:g.7578195_7578197del' -p424 -sg35 -(dp425 -g37 -g38 -sg39 -S'GCAC' -p426 -sg41 -S'7578194' -p427 -sg43 -g44 -sssg45 -(dp428 -g33 -S'NC_000017.11:g.7674877_7674879del' -p429 -sg35 -(dp430 -g37 -g38 -sg39 -S'GCAC' -p431 -sg41 -S'7674876' -p432 -sg43 -g44 -sssg51 -(dp433 -g33 -S'NC_000017.10:g.7578195_7578197del' -p434 -sg35 -(dp435 -g37 -g55 -sg39 -S'GCAC' -p436 -sg41 -S'7578194' -p437 -sg43 -g44 -sssg58 -(dp438 -g33 -S'NC_000017.11:g.7674877_7674879del' -p439 -sg35 -(dp440 -g37 -g55 -sg39 -S'GCAC' -p441 -sg41 -S'7674876' -p442 -sg43 -g44 -sssssS'NM_001276699.1:c.175_177del' -p443 -(dp444 -g3 -g4 -sg5 -(lp445 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p446 -aS'RefSeqGene record not available' -p447 -asg9 -g4 -sg10 -(lp448 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA -p449 -sg14 -S'TP53' -p450 -sg16 -(dp451 -g18 -S'NP_001263628.1:p.(Val59del)' -p452 -sg20 -S'NP_001263628.1:p.(V59del)' -p453 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276699.1:c.175_177del' -p454 -sg28 -g4 -sg29 -(dp455 -g31 -(dp456 -g33 -S'NC_000017.10:g.7578195_7578197del' -p457 -sg35 -(dp458 -g37 -g38 -sg39 -S'GCAC' -p459 -sg41 -S'7578194' -p460 -sg43 -g44 -sssg45 -(dp461 -g33 -S'NC_000017.11:g.7674877_7674879del' -p462 -sg35 -(dp463 -g37 -g38 -sg39 -S'GCAC' -p464 -sg41 -S'7674876' -p465 -sg43 -g44 -sssg51 -(dp466 -g33 -S'NC_000017.10:g.7578195_7578197del' -p467 -sg35 -(dp468 -g37 -g55 -sg39 -S'GCAC' -p469 -sg41 -S'7578194' -p470 -sg43 -g44 -sssg58 -(dp471 -g33 -S'NC_000017.11:g.7674877_7674879del' -p472 -sg35 -(dp473 -g37 -g55 -sg39 -S'GCAC' -p474 -sg41 -S'7674876' -p475 -sg43 -g44 -sssssS'NM_001276760.1:c.535_537del' -p476 -(dp477 -g3 -g4 -sg5 -(lp478 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p479 -aS'RefSeqGene record not available' -p480 -asg9 -g4 -sg10 -(lp481 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA -p482 -sg14 -S'TP53' -p483 -sg16 -(dp484 -g18 -S'NP_001263689.1:p.(Val179del)' -p485 -sg20 -S'NP_001263689.1:p.(V179del)' -p486 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276760.1:c.535_537del' -p487 -sg28 -g4 -sg29 -(dp488 -g31 -(dp489 -g33 -S'NC_000017.10:g.7578195_7578197del' -p490 -sg35 -(dp491 -g37 -g38 -sg39 -S'GCAC' -p492 -sg41 -S'7578194' -p493 -sg43 -g44 -sssg45 -(dp494 -g33 -S'NC_000017.11:g.7674877_7674879del' -p495 -sg35 -(dp496 -g37 -g38 -sg39 -S'GCAC' -p497 -sg41 -S'7674876' -p498 -sg43 -g44 -sssg51 -(dp499 -g33 -S'NC_000017.10:g.7578195_7578197del' -p500 -sg35 -(dp501 -g37 -g55 -sg39 -S'GCAC' -p502 -sg41 -S'7578194' -p503 -sg43 -g44 -sssg58 -(dp504 -g33 -S'NC_000017.11:g.7674877_7674879del' -p505 -sg35 -(dp506 -g37 -g55 -sg39 -S'GCAC' -p507 -sg41 -S'7674876' -p508 -sg43 -g44 -sssssS'NM_000546.5:c.652_654del' -p509 -(dp510 -g3 -S'LRG_321t1:c.652_654del' -p511 -sg5 -(lp512 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p513 -asg9 -g4 -sg10 -(lp514 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA -p515 -sg14 -S'TP53' -p516 -sg16 -(dp517 -g18 -S'NP_000537.3(LRG_321p1):p.(Val218del)' -p518 -sg20 -S'NP_000537.3:p.(V218del)' -p519 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17672_17674del' -p520 -sg26 -S'NM_000546.5:c.652_654del' -p521 -sg28 -S'NG_017013.2:g.17672_17674del' -p522 -sg29 -(dp523 -g31 -(dp524 -g33 -S'NC_000017.10:g.7578195_7578197del' -p525 -sg35 -(dp526 -g37 -g38 -sg39 -S'GCAC' -p527 -sg41 -S'7578194' -p528 -sg43 -g44 -sssg45 -(dp529 -g33 -S'NC_000017.11:g.7674877_7674879del' -p530 -sg35 -(dp531 -g37 -g38 -sg39 -S'GCAC' -p532 -sg41 -S'7674876' -p533 -sg43 -g44 -sssg51 -(dp534 -g33 -S'NC_000017.10:g.7578195_7578197del' -p535 -sg35 -(dp536 -g37 -g55 -sg39 -S'GCAC' -p537 -sg41 -S'7578194' -p538 -sg43 -g44 -sssg58 -(dp539 -g33 -S'NC_000017.11:g.7674877_7674879del' -p540 -sg35 -(dp541 -g37 -g55 -sg39 -S'GCAC' -p542 -sg41 -S'7674876' -p543 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant251.txt b/VariantValidator/testing/testOutputsMasterITS/variant251.txt deleted file mode 100644 index c6163052..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant251.txt +++ /dev/null @@ -1,1694 +0,0 @@ -(dp0 -S'NM_001276760.1:c.289dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'TP53' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001263689.1:p.(Gln97ProfsTer13)' -p19 -sS'slr' -p20 -S'NP_001263689.1:p.(Q97Pfs*13)' -p21 -ssS'submitted_variant' -p22 -S'17-7578523-T-TG' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001276760.1:c.289dup' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000017.10:g.7578524dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr17' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'7578524' -p42 -sS'alt' -p43 -VGG -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000017.11:g.7675206dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p49 -sg43 -VGG -p50 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000017.10:g.7578524dup' -p53 -sg35 -(dp54 -g37 -S'17' -p55 -sg39 -g40 -sg41 -S'7578524' -p56 -sg43 -VGG -p57 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000017.11:g.7675206dup' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p62 -sg43 -VGG -p63 -sssssS'NM_001126118.1:c.289dup' -p64 -(dp65 -g3 -S'LRG_321t8:c.289dup' -p66 -sg5 -(lp67 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p68 -asg9 -g4 -sg10 -(lp69 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 8, mRNA -p70 -sg14 -S'TP53' -p71 -sg16 -(dp72 -g18 -S'NP_001119590.1:p.(Gln97ProfsTer13)' -p73 -sg20 -S'NP_001119590.1:p.(Q97Pfs*13)' -p74 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17345dup' -p75 -sg26 -S'NM_001126118.1:c.289dup' -p76 -sg28 -S'NG_017013.2:g.17345dup' -p77 -sg29 -(dp78 -g31 -(dp79 -g33 -S'NC_000017.10:g.7578524dup' -p80 -sg35 -(dp81 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p82 -sg43 -VGG -p83 -sssg45 -(dp84 -g33 -S'NC_000017.11:g.7675206dup' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p87 -sg43 -VGG -p88 -sssg51 -(dp89 -g33 -S'NC_000017.10:g.7578524dup' -p90 -sg35 -(dp91 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p92 -sg43 -VGG -p93 -sssg58 -(dp94 -g33 -S'NC_000017.11:g.7675206dup' -p95 -sg35 -(dp96 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p97 -sg43 -VGG -p98 -sssssS'NM_001276695.1:c.289dup' -p99 -(dp100 -g3 -g4 -sg5 -(lp101 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p102 -aS'RefSeqGene record not available' -p103 -asg9 -g4 -sg10 -(lp104 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA -p105 -sg14 -S'TP53' -p106 -sg16 -(dp107 -g18 -S'NP_001263624.1:p.(Gln97ProfsTer13)' -p108 -sg20 -S'NP_001263624.1:p.(Q97Pfs*13)' -p109 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276695.1:c.289dup' -p110 -sg28 -g4 -sg29 -(dp111 -g31 -(dp112 -g33 -S'NC_000017.10:g.7578524dup' -p113 -sg35 -(dp114 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p115 -sg43 -VGG -p116 -sssg45 -(dp117 -g33 -S'NC_000017.11:g.7675206dup' -p118 -sg35 -(dp119 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p120 -sg43 -VGG -p121 -sssg51 -(dp122 -g33 -S'NC_000017.10:g.7578524dup' -p123 -sg35 -(dp124 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p125 -sg43 -VGG -p126 -sssg58 -(dp127 -g33 -S'NC_000017.11:g.7675206dup' -p128 -sg35 -(dp129 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p130 -sg43 -VGG -p131 -sssssS'NM_001276699.1:c.-72dup' -p132 -(dp133 -g3 -g4 -sg5 -(lp134 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p135 -aS'RefSeqGene record not available' -p136 -asg9 -g4 -sg10 -(lp137 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA -p138 -sg14 -S'TP53' -p139 -sg16 -(dp140 -g18 -S'NP_001263628.1:p.?' -p141 -sg20 -S'NP_001263628.1:p.?' -p142 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276699.1:c.-72dup' -p143 -sg28 -g4 -sg29 -(dp144 -g31 -(dp145 -g33 -S'NC_000017.10:g.7578524dup' -p146 -sg35 -(dp147 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p148 -sg43 -VGG -p149 -sssg45 -(dp150 -g33 -S'NC_000017.11:g.7675206dup' -p151 -sg35 -(dp152 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p153 -sg43 -VGG -p154 -sssg51 -(dp155 -g33 -S'NC_000017.10:g.7578524dup' -p156 -sg35 -(dp157 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p158 -sg43 -VGG -p159 -sssg58 -(dp160 -g33 -S'NC_000017.11:g.7675206dup' -p161 -sg35 -(dp162 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p163 -sg43 -VGG -p164 -sssssS'NM_001126115.1:c.10dup' -p165 -(dp166 -g3 -S'LRG_321t5:c.10dup' -p167 -sg5 -(lp168 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p169 -asg9 -g4 -sg10 -(lp170 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA -p171 -sg14 -S'TP53' -p172 -sg16 -(dp173 -g18 -S'NP_001119587.1:p.(Gln4ProfsTer13)' -p174 -sg20 -S'NP_001119587.1:p.(Q4Pfs*13)' -p175 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17345dup' -p176 -sg26 -S'NM_001126115.1:c.10dup' -p177 -sg28 -S'NG_017013.2:g.17345dup' -p178 -sg29 -(dp179 -g31 -(dp180 -g33 -S'NC_000017.10:g.7578524dup' -p181 -sg35 -(dp182 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p183 -sg43 -VGG -p184 -sssg45 -(dp185 -g33 -S'NC_000017.11:g.7675206dup' -p186 -sg35 -(dp187 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p188 -sg43 -VGG -p189 -sssg51 -(dp190 -g33 -S'NC_000017.10:g.7578524dup' -p191 -sg35 -(dp192 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p193 -sg43 -VGG -p194 -sssg58 -(dp195 -g33 -S'NC_000017.11:g.7675206dup' -p196 -sg35 -(dp197 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p198 -sg43 -VGG -p199 -sssssS'NM_001276697.1:c.-72dup' -p200 -(dp201 -g3 -g4 -sg5 -(lp202 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p203 -aS'RefSeqGene record not available' -p204 -asg9 -g4 -sg10 -(lp205 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA -p206 -sg14 -S'TP53' -p207 -sg16 -(dp208 -g18 -S'NP_001263626.1:p.?' -p209 -sg20 -S'NP_001263626.1:p.?' -p210 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276697.1:c.-72dup' -p211 -sg28 -g4 -sg29 -(dp212 -g31 -(dp213 -g33 -S'NC_000017.10:g.7578524dup' -p214 -sg35 -(dp215 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p216 -sg43 -VGG -p217 -sssg45 -(dp218 -g33 -S'NC_000017.11:g.7675206dup' -p219 -sg35 -(dp220 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p221 -sg43 -VGG -p222 -sssg51 -(dp223 -g33 -S'NC_000017.10:g.7578524dup' -p224 -sg35 -(dp225 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p226 -sg43 -VGG -p227 -sssg58 -(dp228 -g33 -S'NC_000017.11:g.7675206dup' -p229 -sg35 -(dp230 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p231 -sg43 -VGG -p232 -sssssS'NM_001126117.1:c.10dup' -p233 -(dp234 -g3 -S'LRG_321t7:c.10dup' -p235 -sg5 -(lp236 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p237 -asg9 -g4 -sg10 -(lp238 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA -p239 -sg14 -S'TP53' -p240 -sg16 -(dp241 -g18 -S'NP_001119589.1:p.(Gln4ProfsTer13)' -p242 -sg20 -S'NP_001119589.1:p.(Q4Pfs*13)' -p243 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17345dup' -p244 -sg26 -S'NM_001126117.1:c.10dup' -p245 -sg28 -S'NG_017013.2:g.17345dup' -p246 -sg29 -(dp247 -g31 -(dp248 -g33 -S'NC_000017.10:g.7578524dup' -p249 -sg35 -(dp250 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p251 -sg43 -VGG -p252 -sssg45 -(dp253 -g33 -S'NC_000017.11:g.7675206dup' -p254 -sg35 -(dp255 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p256 -sg43 -VGG -p257 -sssg51 -(dp258 -g33 -S'NC_000017.10:g.7578524dup' -p259 -sg35 -(dp260 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p261 -sg43 -VGG -p262 -sssg58 -(dp263 -g33 -S'NC_000017.11:g.7675206dup' -p264 -sg35 -(dp265 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p266 -sg43 -VGG -p267 -sssssS'NM_000546.5:c.406dup' -p268 -(dp269 -g3 -S'LRG_321t1:c.406dup' -p270 -sg5 -(lp271 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p272 -asg9 -g4 -sg10 -(lp273 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA -p274 -sg14 -S'TP53' -p275 -sg16 -(dp276 -g18 -S'NP_000537.3(LRG_321p1):p.(Gln136ProfsTer13)' -p277 -sg20 -S'NP_000537.3:p.(Q136Pfs*13)' -p278 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17345dup' -p279 -sg26 -S'NM_000546.5:c.406dup' -p280 -sg28 -S'NG_017013.2:g.17345dup' -p281 -sg29 -(dp282 -g31 -(dp283 -g33 -S'NC_000017.10:g.7578524dup' -p284 -sg35 -(dp285 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p286 -sg43 -VGG -p287 -sssg45 -(dp288 -g33 -S'NC_000017.11:g.7675206dup' -p289 -sg35 -(dp290 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p291 -sg43 -VGG -p292 -sssg51 -(dp293 -g33 -S'NC_000017.10:g.7578524dup' -p294 -sg35 -(dp295 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p296 -sg43 -VGG -p297 -sssg58 -(dp298 -g33 -S'NC_000017.11:g.7675206dup' -p299 -sg35 -(dp300 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p301 -sg43 -VGG -p302 -sssssS'flag' -p303 -S'gene_variant' -p304 -sS'NM_001276696.1:c.289dup' -p305 -(dp306 -g3 -g4 -sg5 -(lp307 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p308 -aS'RefSeqGene record not available' -p309 -asg9 -g4 -sg10 -(lp310 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA -p311 -sg14 -S'TP53' -p312 -sg16 -(dp313 -g18 -S'NP_001263625.1:p.(Gln97ProfsTer13)' -p314 -sg20 -S'NP_001263625.1:p.(Q97Pfs*13)' -p315 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276696.1:c.289dup' -p316 -sg28 -g4 -sg29 -(dp317 -g31 -(dp318 -g33 -S'NC_000017.10:g.7578524dup' -p319 -sg35 -(dp320 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p321 -sg43 -VGG -p322 -sssg45 -(dp323 -g33 -S'NC_000017.11:g.7675206dup' -p324 -sg35 -(dp325 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p326 -sg43 -VGG -p327 -sssg51 -(dp328 -g33 -S'NC_000017.10:g.7578524dup' -p329 -sg35 -(dp330 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p331 -sg43 -VGG -p332 -sssg58 -(dp333 -g33 -S'NC_000017.11:g.7675206dup' -p334 -sg35 -(dp335 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p336 -sg43 -VGG -p337 -sssssS'NM_001276698.1:c.-72dup' -p338 -(dp339 -g3 -g4 -sg5 -(lp340 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p341 -aS'RefSeqGene record not available' -p342 -asg9 -g4 -sg10 -(lp343 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA -p344 -sg14 -S'TP53' -p345 -sg16 -(dp346 -g18 -S'NP_001263627.1:p.?' -p347 -sg20 -S'NP_001263627.1:p.?' -p348 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276698.1:c.-72dup' -p349 -sg28 -g4 -sg29 -(dp350 -g31 -(dp351 -g33 -S'NC_000017.10:g.7578524dup' -p352 -sg35 -(dp353 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p354 -sg43 -VGG -p355 -sssg45 -(dp356 -g33 -S'NC_000017.11:g.7675206dup' -p357 -sg35 -(dp358 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p359 -sg43 -VGG -p360 -sssg51 -(dp361 -g33 -S'NC_000017.10:g.7578524dup' -p362 -sg35 -(dp363 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p364 -sg43 -VGG -p365 -sssg58 -(dp366 -g33 -S'NC_000017.11:g.7675206dup' -p367 -sg35 -(dp368 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p369 -sg43 -VGG -p370 -sssssS'NM_001276761.1:c.289dup' -p371 -(dp372 -g3 -g4 -sg5 -(lp373 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p374 -aS'RefSeqGene record not available' -p375 -asg9 -g4 -sg10 -(lp376 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA -p377 -sg14 -S'TP53' -p378 -sg16 -(dp379 -g18 -S'NP_001263690.1:p.(Gln97ProfsTer13)' -p380 -sg20 -S'NP_001263690.1:p.(Q97Pfs*13)' -p381 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276761.1:c.289dup' -p382 -sg28 -g4 -sg29 -(dp383 -g31 -(dp384 -g33 -S'NC_000017.10:g.7578524dup' -p385 -sg35 -(dp386 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p387 -sg43 -VGG -p388 -sssg45 -(dp389 -g33 -S'NC_000017.11:g.7675206dup' -p390 -sg35 -(dp391 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p392 -sg43 -VGG -p393 -sssg51 -(dp394 -g33 -S'NC_000017.10:g.7578524dup' -p395 -sg35 -(dp396 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p397 -sg43 -VGG -p398 -sssg58 -(dp399 -g33 -S'NC_000017.11:g.7675206dup' -p400 -sg35 -(dp401 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p402 -sg43 -VGG -p403 -sssssS'NM_001126113.2:c.406dup' -p404 -(dp405 -g3 -S'LRG_321t4:c.406dup' -p406 -sg5 -(lp407 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p408 -asg9 -g4 -sg10 -(lp409 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA -p410 -sg14 -S'TP53' -p411 -sg16 -(dp412 -g18 -S'NP_001119585.1:p.(Gln136ProfsTer13)' -p413 -sg20 -S'NP_001119585.1:p.(Q136Pfs*13)' -p414 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17345dup' -p415 -sg26 -S'NM_001126113.2:c.406dup' -p416 -sg28 -S'NG_017013.2:g.17345dup' -p417 -sg29 -(dp418 -g31 -(dp419 -g33 -S'NC_000017.10:g.7578524dup' -p420 -sg35 -(dp421 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p422 -sg43 -VGG -p423 -sssg45 -(dp424 -g33 -S'NC_000017.11:g.7675206dup' -p425 -sg35 -(dp426 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p427 -sg43 -VGG -p428 -sssg51 -(dp429 -g33 -S'NC_000017.10:g.7578524dup' -p430 -sg35 -(dp431 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p432 -sg43 -VGG -p433 -sssg58 -(dp434 -g33 -S'NC_000017.11:g.7675206dup' -p435 -sg35 -(dp436 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p437 -sg43 -VGG -p438 -sssssS'NM_001126116.1:c.10dup' -p439 -(dp440 -g3 -S'LRG_321t6:c.10dup' -p441 -sg5 -(lp442 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p443 -asg9 -g4 -sg10 -(lp444 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA -p445 -sg14 -S'TP53' -p446 -sg16 -(dp447 -g18 -S'NP_001119588.1:p.(Gln4ProfsTer13)' -p448 -sg20 -S'NP_001119588.1:p.(Q4Pfs*13)' -p449 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17345dup' -p450 -sg26 -S'NM_001126116.1:c.10dup' -p451 -sg28 -S'NG_017013.2:g.17345dup' -p452 -sg29 -(dp453 -g31 -(dp454 -g33 -S'NC_000017.10:g.7578524dup' -p455 -sg35 -(dp456 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p457 -sg43 -VGG -p458 -sssg45 -(dp459 -g33 -S'NC_000017.11:g.7675206dup' -p460 -sg35 -(dp461 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p462 -sg43 -VGG -p463 -sssg51 -(dp464 -g33 -S'NC_000017.10:g.7578524dup' -p465 -sg35 -(dp466 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p467 -sg43 -VGG -p468 -sssg58 -(dp469 -g33 -S'NC_000017.11:g.7675206dup' -p470 -sg35 -(dp471 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p472 -sg43 -VGG -p473 -sssssS'NM_001126112.2:c.406dup' -p474 -(dp475 -g3 -S'LRG_321t2:c.406dup' -p476 -sg5 -(lp477 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p478 -asg9 -g4 -sg10 -(lp479 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA -p480 -sg14 -S'TP53' -p481 -sg16 -(dp482 -g18 -S'NP_001119584.1:p.(Gln136ProfsTer13)' -p483 -sg20 -S'NP_001119584.1:p.(Q136Pfs*13)' -p484 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17345dup' -p485 -sg26 -S'NM_001126112.2:c.406dup' -p486 -sg28 -S'NG_017013.2:g.17345dup' -p487 -sg29 -(dp488 -g31 -(dp489 -g33 -S'NC_000017.10:g.7578524dup' -p490 -sg35 -(dp491 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p492 -sg43 -VGG -p493 -sssg45 -(dp494 -g33 -S'NC_000017.11:g.7675206dup' -p495 -sg35 -(dp496 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p497 -sg43 -VGG -p498 -sssg51 -(dp499 -g33 -S'NC_000017.10:g.7578524dup' -p500 -sg35 -(dp501 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p502 -sg43 -VGG -p503 -sssg58 -(dp504 -g33 -S'NC_000017.11:g.7675206dup' -p505 -sg35 -(dp506 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p507 -sg43 -VGG -p508 -sssssS'NM_001126114.2:c.406dup' -p509 -(dp510 -g3 -S'LRG_321t3:c.406dup' -p511 -sg5 -(lp512 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p513 -asg9 -g4 -sg10 -(lp514 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA -p515 -sg14 -S'TP53' -p516 -sg16 -(dp517 -g18 -S'NP_001119586.1:p.(Gln136ProfsTer13)' -p518 -sg20 -S'NP_001119586.1:p.(Q136Pfs*13)' -p519 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_321:g.17345dup' -p520 -sg26 -S'NM_001126114.2:c.406dup' -p521 -sg28 -S'NG_017013.2:g.17345dup' -p522 -sg29 -(dp523 -g31 -(dp524 -g33 -S'NC_000017.10:g.7578524dup' -p525 -sg35 -(dp526 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p527 -sg43 -VGG -p528 -sssg45 -(dp529 -g33 -S'NC_000017.11:g.7675206dup' -p530 -sg35 -(dp531 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p532 -sg43 -VGG -p533 -sssg51 -(dp534 -g33 -S'NC_000017.10:g.7578524dup' -p535 -sg35 -(dp536 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p537 -sg43 -VGG -p538 -sssg58 -(dp539 -g33 -S'NC_000017.11:g.7675206dup' -p540 -sg35 -(dp541 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p542 -sg43 -VGG -p543 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant252.txt b/VariantValidator/testing/testOutputsMasterITS/variant252.txt deleted file mode 100644 index 17b5c3e3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant252.txt +++ /dev/null @@ -1,570 +0,0 @@ -(dp0 -S'NM_144997.6:c.1300+2T>G' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens folliculin (FLCN), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'FLCN' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_659434.2(LRG_325p1):p.?' -p18 -sS'slr' -p19 -S'NP_659434.2:p.?' -p20 -ssS'submitted_variant' -p21 -S'17-17119692-A-C' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000017.10(NM_144997.6):c.1300+2T>G' -p24 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_144997.6:c.1300+2T>G' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000017.10:g.17119692A>C' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr17' -p38 -sS'ref' -p39 -VA -p40 -sS'pos' -p41 -S'17119692' -p42 -sS'alt' -p43 -VC -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000017.11:g.17216378A>C' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'17216378' -p49 -sg43 -g44 -sssS'GRCh37' -p50 -(dp51 -g33 -S'NC_000017.10:g.17119692A>C' -p52 -sg35 -(dp53 -g37 -S'17' -p54 -sg39 -g40 -sg41 -S'17119692' -p55 -sg43 -g44 -sssS'GRCh38' -p56 -(dp57 -g33 -S'NC_000017.11:g.17216378A>C' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'17216378' -p60 -sg43 -g44 -sssssS'NM_001353230.1:c.1300+2T>G' -p61 -(dp62 -g3 -g4 -sg5 -(lp63 -S'RefSeqGene record not available' -p64 -asg8 -g4 -sg9 -(lp65 -sg11 -VHomo sapiens folliculin (FLCN), transcript variant 4, mRNA -p66 -sg13 -S'FLCN' -p67 -sg15 -(dp68 -g17 -S'NP_001340159.1:p.?' -p69 -sg19 -S'NP_001340159.1:p.?' -p70 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_001353230.1):c.1300+2T>G' -p71 -sg25 -g4 -sg26 -S'NM_001353230.1:c.1300+2T>G' -p72 -sg28 -g4 -sg29 -(dp73 -g31 -(dp74 -g33 -S'NC_000017.10:g.17119692A>C' -p75 -sg35 -(dp76 -g37 -g38 -sg39 -g40 -sg41 -S'17119692' -p77 -sg43 -g44 -sssg45 -(dp78 -g33 -S'NC_000017.11:g.17216378A>C' -p79 -sg35 -(dp80 -g37 -g38 -sg39 -g40 -sg41 -S'17216378' -p81 -sg43 -g44 -sssg50 -(dp82 -g33 -S'NC_000017.10:g.17119692A>C' -p83 -sg35 -(dp84 -g37 -g54 -sg39 -g40 -sg41 -S'17119692' -p85 -sg43 -g44 -sssg56 -(dp86 -g33 -S'NC_000017.11:g.17216378A>C' -p87 -sg35 -(dp88 -g37 -g54 -sg39 -g40 -sg41 -S'17216378' -p89 -sg43 -g44 -sssssS'NM_001353229.1:c.1354+2T>G' -p90 -(dp91 -g3 -g4 -sg5 -(lp92 -S'RefSeqGene record not available' -p93 -asg8 -g4 -sg9 -(lp94 -sg11 -VHomo sapiens folliculin (FLCN), transcript variant 3, mRNA -p95 -sg13 -S'FLCN' -p96 -sg15 -(dp97 -g17 -S'NP_001340158.1:p.?' -p98 -sg19 -S'NP_001340158.1:p.?' -p99 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_001353229.1):c.1354+2T>G' -p100 -sg25 -g4 -sg26 -S'NM_001353229.1:c.1354+2T>G' -p101 -sg28 -g4 -sg29 -(dp102 -g31 -(dp103 -g33 -S'NC_000017.10:g.17119692A>C' -p104 -sg35 -(dp105 -g37 -g38 -sg39 -g40 -sg41 -S'17119692' -p106 -sg43 -g44 -sssg45 -(dp107 -g33 -S'NC_000017.11:g.17216378A>C' -p108 -sg35 -(dp109 -g37 -g38 -sg39 -g40 -sg41 -S'17216378' -p110 -sg43 -g44 -sssg50 -(dp111 -g33 -S'NC_000017.10:g.17119692A>C' -p112 -sg35 -(dp113 -g37 -g54 -sg39 -g40 -sg41 -S'17119692' -p114 -sg43 -g44 -sssg56 -(dp115 -g33 -S'NC_000017.11:g.17216378A>C' -p116 -sg35 -(dp117 -g37 -g54 -sg39 -g40 -sg41 -S'17216378' -p118 -sg43 -g44 -sssssS'flag' -p119 -S'gene_variant' -p120 -sS'NM_144997.5:c.1300+2T>G' -p121 -(dp122 -g3 -S'LRG_325t1:c.1300+2T>G' -p123 -sg5 -(lp124 -S'A more recent version of the selected reference sequence NM_144997.5 is available (NM_144997.6)' -p125 -aS'NM_144997.6:c.1300+2T>G MUST be fully validated prior to use in reports' -p126 -aS'select_variants=NM_144997.6:c.1300+2T>G' -p127 -asg8 -S'NG_008001.2(NM_144997.5):c.1300+2T>G' -p128 -sg9 -(lp129 -sg11 -VHomo sapiens folliculin (FLCN), transcript variant 1, mRNA -p130 -sg13 -S'FLCN' -p131 -sg15 -(dp132 -g17 -S'NP_659434.2(LRG_325p1):p.?' -p133 -sg19 -S'NP_659434.2:p.?' -p134 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_144997.5):c.1300+2T>G' -p135 -sg25 -S'LRG_325:g.25811T>G' -p136 -sg26 -S'NM_144997.5:c.1300+2T>G' -p137 -sg28 -S'NG_008001.2:g.25811T>G' -p138 -sg29 -(dp139 -g31 -(dp140 -g33 -S'NC_000017.10:g.17119692A>C' -p141 -sg35 -(dp142 -g37 -g38 -sg39 -g40 -sg41 -S'17119692' -p143 -sg43 -g44 -sssg45 -(dp144 -g33 -S'NC_000017.11:g.17216378A>C' -p145 -sg35 -(dp146 -g37 -g38 -sg39 -g40 -sg41 -S'17216378' -p147 -sg43 -g44 -sssg50 -(dp148 -g33 -S'NC_000017.10:g.17119692A>C' -p149 -sg35 -(dp150 -g37 -g54 -sg39 -g40 -sg41 -S'17119692' -p151 -sg43 -g44 -sssg56 -(dp152 -g33 -S'NC_000017.11:g.17216378A>C' -p153 -sg35 -(dp154 -g37 -g54 -sg39 -g40 -sg41 -S'17216378' -p155 -sg43 -g44 -sssssS'NM_001353231.1:c.1300+2T>G' -p156 -(dp157 -g3 -g4 -sg5 -(lp158 -S'RefSeqGene record not available' -p159 -asg8 -g4 -sg9 -(lp160 -sg11 -VHomo sapiens folliculin (FLCN), transcript variant 5, mRNA -p161 -sg13 -S'FLCN' -p162 -sg15 -(dp163 -g17 -S'NP_001340160.1:p.?' -p164 -sg19 -S'NP_001340160.1:p.?' -p165 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_001353231.1):c.1300+2T>G' -p166 -sg25 -g4 -sg26 -S'NM_001353231.1:c.1300+2T>G' -p167 -sg28 -g4 -sg29 -(dp168 -g31 -(dp169 -g33 -S'NC_000017.10:g.17119692A>C' -p170 -sg35 -(dp171 -g37 -g38 -sg39 -g40 -sg41 -S'17119692' -p172 -sg43 -g44 -sssg45 -(dp173 -g33 -S'NC_000017.11:g.17216378A>C' -p174 -sg35 -(dp175 -g37 -g38 -sg39 -g40 -sg41 -S'17216378' -p176 -sg43 -g44 -sssg50 -(dp177 -g33 -S'NC_000017.10:g.17119692A>C' -p178 -sg35 -(dp179 -g37 -g54 -sg39 -g40 -sg41 -S'17119692' -p180 -sg43 -g44 -sssg56 -(dp181 -g33 -S'NC_000017.11:g.17216378A>C' -p182 -sg35 -(dp183 -g37 -g54 -sg39 -g40 -sg41 -S'17216378' -p184 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant253.txt b/VariantValidator/testing/testOutputsMasterITS/variant253.txt deleted file mode 100644 index 07b32739..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant253.txt +++ /dev/null @@ -1,696 +0,0 @@ -(dp0 -S'NM_007294.3:c.*103_*106del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_292t1:c.*103_*106del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -S'' -p9 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'BRCA1' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_009225.1(LRG_292p1):p.?' -p19 -sS'slr' -p20 -S'NP_009225.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'17-41197588-GGACA-G' -p23 -sS'genome_context_intronic_sequence' -p24 -g9 -sS'HGVS_LRG_variant' -p25 -S'LRG_292:g.172409_172412del' -p26 -sS'HGVS_transcript_variant' -p27 -S'NM_007294.3:c.*103_*106del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -S'NG_005905.2:g.172409_172412del' -p30 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000017.10:g.41197589_41197592del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr17' -p40 -sS'ref' -p41 -S'GGACA' -p42 -sS'pos' -p43 -S'41197588' -p44 -sS'alt' -p45 -S'G' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000017.11:g.43045572_43045575del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'GGACA' -p51 -sg43 -S'43045571' -p52 -sg45 -g46 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000017.10:g.41197589_41197592del' -p55 -sg37 -(dp56 -g39 -S'17' -p57 -sg41 -S'GGACA' -p58 -sg43 -S'41197588' -p59 -sg45 -g46 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000017.11:g.43045572_43045575del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'GGACA' -p64 -sg43 -S'43045571' -p65 -sg45 -g46 -sssssS'NM_007297.3:c.*103_*106del' -p66 -(dp67 -g3 -g9 -sg5 -(lp68 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p69 -aS'RefSeqGene record not available' -p70 -asg8 -g9 -sg10 -(lp71 -sg12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 3, mRNA -p72 -sg14 -S'BRCA1' -p73 -sg16 -(dp74 -g18 -S'NP_009228.2:p.?' -p75 -sg20 -S'NP_009228.2:p.?' -p76 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_007297.3:c.*103_*106del' -p77 -sg29 -g9 -sg31 -(dp78 -g33 -(dp79 -g35 -S'NC_000017.10:g.41197589_41197592del' -p80 -sg37 -(dp81 -g39 -g40 -sg41 -S'GGACA' -p82 -sg43 -S'41197588' -p83 -sg45 -g46 -sssg47 -(dp84 -g35 -S'NC_000017.11:g.43045572_43045575del' -p85 -sg37 -(dp86 -g39 -g40 -sg41 -S'GGACA' -p87 -sg43 -S'43045571' -p88 -sg45 -g46 -sssg53 -(dp89 -g35 -S'NC_000017.10:g.41197589_41197592del' -p90 -sg37 -(dp91 -g39 -g57 -sg41 -S'GGACA' -p92 -sg43 -S'41197588' -p93 -sg45 -g46 -sssg60 -(dp94 -g35 -S'NC_000017.11:g.43045572_43045575del' -p95 -sg37 -(dp96 -g39 -g57 -sg41 -S'GGACA' -p97 -sg43 -S'43045571' -p98 -sg45 -g46 -sssssS'NR_027676.1:n.5831_5834del' -p99 -(dp100 -g3 -g9 -sg5 -(lp101 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p102 -aS'RefSeqGene record not available' -p103 -asg8 -g9 -sg10 -(lp104 -sg12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 6, non-coding RNA -p105 -sg14 -S'BRCA1' -p106 -sg16 -(dp107 -g18 -S'Non-coding :n.' -p108 -sg20 -g9 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NR_027676.1:n.5831_5834del' -p109 -sg29 -g9 -sg31 -(dp110 -g33 -(dp111 -g35 -S'NC_000017.10:g.41197589_41197592del' -p112 -sg37 -(dp113 -g39 -g40 -sg41 -S'GGACA' -p114 -sg43 -S'41197588' -p115 -sg45 -g46 -sssg47 -(dp116 -g35 -S'NC_000017.11:g.43045572_43045575del' -p117 -sg37 -(dp118 -g39 -g40 -sg41 -S'GGACA' -p119 -sg43 -S'43045571' -p120 -sg45 -g46 -sssg53 -(dp121 -g35 -S'NC_000017.10:g.41197589_41197592del' -p122 -sg37 -(dp123 -g39 -g57 -sg41 -S'GGACA' -p124 -sg43 -S'41197588' -p125 -sg45 -g46 -sssg60 -(dp126 -g35 -S'NC_000017.11:g.43045572_43045575del' -p127 -sg37 -(dp128 -g39 -g57 -sg41 -S'GGACA' -p129 -sg43 -S'43045571' -p130 -sg45 -g46 -sssssS'NM_007300.3:c.*103_*106del' -p131 -(dp132 -g3 -g9 -sg5 -(lp133 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p134 -aS'RefSeqGene record not available' -p135 -asg8 -g9 -sg10 -(lp136 -sg12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 2, mRNA -p137 -sg14 -S'BRCA1' -p138 -sg16 -(dp139 -g18 -S'NP_009231.2:p.?' -p140 -sg20 -S'NP_009231.2:p.?' -p141 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_007300.3:c.*103_*106del' -p142 -sg29 -g9 -sg31 -(dp143 -g33 -(dp144 -g35 -S'NC_000017.10:g.41197589_41197592del' -p145 -sg37 -(dp146 -g39 -g40 -sg41 -S'GGACA' -p147 -sg43 -S'41197588' -p148 -sg45 -g46 -sssg47 -(dp149 -g35 -S'NC_000017.11:g.43045572_43045575del' -p150 -sg37 -(dp151 -g39 -g40 -sg41 -S'GGACA' -p152 -sg43 -S'43045571' -p153 -sg45 -g46 -sssg53 -(dp154 -g35 -S'NC_000017.10:g.41197589_41197592del' -p155 -sg37 -(dp156 -g39 -g57 -sg41 -S'GGACA' -p157 -sg43 -S'41197588' -p158 -sg45 -g46 -sssg60 -(dp159 -g35 -S'NC_000017.11:g.43045572_43045575del' -p160 -sg37 -(dp161 -g39 -g57 -sg41 -S'GGACA' -p162 -sg43 -S'43045571' -p163 -sg45 -g46 -sssssS'flag' -p164 -S'gene_variant' -p165 -sS'NM_007299.3:c.*209_*212del' -p166 -(dp167 -g3 -g9 -sg5 -(lp168 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p169 -aS'RefSeqGene record not available' -p170 -asg8 -g9 -sg10 -(lp171 -sg12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 5, mRNA -p172 -sg14 -S'BRCA1' -p173 -sg16 -(dp174 -g18 -S'NP_009230.2:p.?' -p175 -sg20 -S'NP_009230.2:p.?' -p176 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_007299.3:c.*209_*212del' -p177 -sg29 -g9 -sg31 -(dp178 -g33 -(dp179 -g35 -S'NC_000017.10:g.41197589_41197592del' -p180 -sg37 -(dp181 -g39 -g40 -sg41 -S'GGACA' -p182 -sg43 -S'41197588' -p183 -sg45 -g46 -sssg47 -(dp184 -g35 -S'NC_000017.11:g.43045572_43045575del' -p185 -sg37 -(dp186 -g39 -g40 -sg41 -S'GGACA' -p187 -sg43 -S'43045571' -p188 -sg45 -g46 -sssg53 -(dp189 -g35 -S'NC_000017.10:g.41197589_41197592del' -p190 -sg37 -(dp191 -g39 -g57 -sg41 -S'GGACA' -p192 -sg43 -S'41197588' -p193 -sg45 -g46 -sssg60 -(dp194 -g35 -S'NC_000017.11:g.43045572_43045575del' -p195 -sg37 -(dp196 -g39 -g57 -sg41 -S'GGACA' -p197 -sg43 -S'43045571' -p198 -sg45 -g46 -sssssS'NM_007298.3:c.*103_*106del' -p199 -(dp200 -g3 -g9 -sg5 -(lp201 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p202 -aS'RefSeqGene record not available' -p203 -asg8 -g9 -sg10 -(lp204 -sg12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 4, mRNA -p205 -sg14 -S'BRCA1' -p206 -sg16 -(dp207 -g18 -S'NP_009229.2:p.?' -p208 -sg20 -S'NP_009229.2:p.?' -p209 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_007298.3:c.*103_*106del' -p210 -sg29 -g9 -sg31 -(dp211 -g33 -(dp212 -g35 -S'NC_000017.10:g.41197589_41197592del' -p213 -sg37 -(dp214 -g39 -g40 -sg41 -S'GGACA' -p215 -sg43 -S'41197588' -p216 -sg45 -g46 -sssg47 -(dp217 -g35 -S'NC_000017.11:g.43045572_43045575del' -p218 -sg37 -(dp219 -g39 -g40 -sg41 -S'GGACA' -p220 -sg43 -S'43045571' -p221 -sg45 -g46 -sssg53 -(dp222 -g35 -S'NC_000017.10:g.41197589_41197592del' -p223 -sg37 -(dp224 -g39 -g57 -sg41 -S'GGACA' -p225 -sg43 -S'41197588' -p226 -sg45 -g46 -sssg60 -(dp227 -g35 -S'NC_000017.11:g.43045572_43045575del' -p228 -sg37 -(dp229 -g39 -g57 -sg41 -S'GGACA' -p230 -sg43 -S'43045571' -p231 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant254.txt b/VariantValidator/testing/testOutputsMasterITS/variant254.txt deleted file mode 100644 index 3fc04242..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant254.txt +++ /dev/null @@ -1,668 +0,0 @@ -(dp0 -S'NM_007299.3:c.301+1G>C' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 5, mRNA -p12 -sS'gene_symbol' -p13 -S'BRCA1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_009230.2:p.?' -p18 -sS'slr' -p19 -S'NP_009230.2:p.?' -p20 -ssS'submitted_variant' -p21 -S'17-41256884-C-G' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000017.10(NM_007299.3):c.301+1G>C' -p24 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_007299.3:c.301+1G>C' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000017.10:g.41256884C>G' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr17' -p38 -sS'ref' -p39 -VC -p40 -sS'pos' -p41 -S'41256884' -p42 -sS'alt' -p43 -VG -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000017.11:g.43104867C>G' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p49 -sg43 -g44 -sssS'GRCh37' -p50 -(dp51 -g33 -S'NC_000017.10:g.41256884C>G' -p52 -sg35 -(dp53 -g37 -S'17' -p54 -sg39 -g40 -sg41 -S'41256884' -p55 -sg43 -g44 -sssS'GRCh38' -p56 -(dp57 -g33 -S'NC_000017.11:g.43104867C>G' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p60 -sg43 -g44 -sssssS'NR_027676.1:n.440+1G>C' -p61 -(dp62 -g3 -g4 -sg5 -(lp63 -S'RefSeqGene record not available' -p64 -asg8 -g4 -sg9 -(lp65 -sg11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 6, non-coding RNA -p66 -sg13 -S'BRCA1' -p67 -sg15 -(dp68 -g17 -S'Non-coding :n.' -p69 -sg19 -g4 -ssg21 -g22 -sg23 -S'NC_000017.10(NR_027676.1):c.440+1G>C' -p70 -sg25 -g4 -sg26 -S'NR_027676.1:n.440+1G>C' -p71 -sg28 -g4 -sg29 -(dp72 -g31 -(dp73 -g33 -S'NC_000017.10:g.41256884C>G' -p74 -sg35 -(dp75 -g37 -g38 -sg39 -g40 -sg41 -S'41256884' -p76 -sg43 -g44 -sssg45 -(dp77 -g33 -S'NC_000017.11:g.43104867C>G' -p78 -sg35 -(dp79 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p80 -sg43 -g44 -sssg50 -(dp81 -g33 -S'NC_000017.10:g.41256884C>G' -p82 -sg35 -(dp83 -g37 -g54 -sg39 -g40 -sg41 -S'41256884' -p84 -sg43 -g44 -sssg56 -(dp85 -g33 -S'NC_000017.11:g.43104867C>G' -p86 -sg35 -(dp87 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p88 -sg43 -g44 -sssssS'NM_007300.3:c.301+1G>C' -p89 -(dp90 -g3 -g4 -sg5 -(lp91 -S'RefSeqGene record not available' -p92 -asg8 -g4 -sg9 -(lp93 -sg11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 2, mRNA -p94 -sg13 -S'BRCA1' -p95 -sg15 -(dp96 -g17 -S'NP_009231.2:p.?' -p97 -sg19 -S'NP_009231.2:p.?' -p98 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_007300.3):c.301+1G>C' -p99 -sg25 -g4 -sg26 -S'NM_007300.3:c.301+1G>C' -p100 -sg28 -g4 -sg29 -(dp101 -g31 -(dp102 -g33 -S'NC_000017.10:g.41256884C>G' -p103 -sg35 -(dp104 -g37 -g38 -sg39 -g40 -sg41 -S'41256884' -p105 -sg43 -g44 -sssg45 -(dp106 -g33 -S'NC_000017.11:g.43104867C>G' -p107 -sg35 -(dp108 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p109 -sg43 -g44 -sssg50 -(dp110 -g33 -S'NC_000017.10:g.41256884C>G' -p111 -sg35 -(dp112 -g37 -g54 -sg39 -g40 -sg41 -S'41256884' -p113 -sg43 -g44 -sssg56 -(dp114 -g33 -S'NC_000017.11:g.43104867C>G' -p115 -sg35 -(dp116 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p117 -sg43 -g44 -sssssS'NM_007298.3:c.301+1G>C' -p118 -(dp119 -g3 -g4 -sg5 -(lp120 -S'RefSeqGene record not available' -p121 -asg8 -g4 -sg9 -(lp122 -sg11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 4, mRNA -p123 -sg13 -S'BRCA1' -p124 -sg15 -(dp125 -g17 -S'NP_009229.2:p.?' -p126 -sg19 -S'NP_009229.2:p.?' -p127 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_007298.3):c.301+1G>C' -p128 -sg25 -g4 -sg26 -S'NM_007298.3:c.301+1G>C' -p129 -sg28 -g4 -sg29 -(dp130 -g31 -(dp131 -g33 -S'NC_000017.10:g.41256884C>G' -p132 -sg35 -(dp133 -g37 -g38 -sg39 -g40 -sg41 -S'41256884' -p134 -sg43 -g44 -sssg45 -(dp135 -g33 -S'NC_000017.11:g.43104867C>G' -p136 -sg35 -(dp137 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p138 -sg43 -g44 -sssg50 -(dp139 -g33 -S'NC_000017.10:g.41256884C>G' -p140 -sg35 -(dp141 -g37 -g54 -sg39 -g40 -sg41 -S'41256884' -p142 -sg43 -g44 -sssg56 -(dp143 -g33 -S'NC_000017.11:g.43104867C>G' -p144 -sg35 -(dp145 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p146 -sg43 -g44 -sssssS'NM_007297.3:c.160+1G>C' -p147 -(dp148 -g3 -g4 -sg5 -(lp149 -S'RefSeqGene record not available' -p150 -asg8 -g4 -sg9 -(lp151 -sg11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 3, mRNA -p152 -sg13 -S'BRCA1' -p153 -sg15 -(dp154 -g17 -S'NP_009228.2:p.?' -p155 -sg19 -S'NP_009228.2:p.?' -p156 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_007297.3):c.160+1G>C' -p157 -sg25 -g4 -sg26 -S'NM_007297.3:c.160+1G>C' -p158 -sg28 -g4 -sg29 -(dp159 -g31 -(dp160 -g33 -S'NC_000017.10:g.41256884C>G' -p161 -sg35 -(dp162 -g37 -g38 -sg39 -g40 -sg41 -S'41256884' -p163 -sg43 -g44 -sssg45 -(dp164 -g33 -S'NC_000017.11:g.43104867C>G' -p165 -sg35 -(dp166 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p167 -sg43 -g44 -sssg50 -(dp168 -g33 -S'NC_000017.10:g.41256884C>G' -p169 -sg35 -(dp170 -g37 -g54 -sg39 -g40 -sg41 -S'41256884' -p171 -sg43 -g44 -sssg56 -(dp172 -g33 -S'NC_000017.11:g.43104867C>G' -p173 -sg35 -(dp174 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p175 -sg43 -g44 -sssssS'flag' -p176 -S'gene_variant' -p177 -sS'NM_007294.3:c.301+1G>C' -p178 -(dp179 -g3 -S'LRG_292t1:c.301+1G>C' -p180 -sg5 -(lp181 -sg8 -S'NG_005905.2(NM_007294.3):c.301+1G>C' -p182 -sg9 -(lp183 -sg11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 1, mRNA -p184 -sg13 -S'BRCA1' -p185 -sg15 -(dp186 -g17 -S'NP_009225.1(LRG_292p1):p.?' -p187 -sg19 -S'NP_009225.1:p.?' -p188 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_007294.3):c.301+1G>C' -p189 -sg25 -S'LRG_292:g.113117G>C' -p190 -sg26 -S'NM_007294.3:c.301+1G>C' -p191 -sg28 -S'NG_005905.2:g.113117G>C' -p192 -sg29 -(dp193 -g31 -(dp194 -g33 -S'NC_000017.10:g.41256884C>G' -p195 -sg35 -(dp196 -g37 -g38 -sg39 -g40 -sg41 -S'41256884' -p197 -sg43 -g44 -sssg45 -(dp198 -g33 -S'NC_000017.11:g.43104867C>G' -p199 -sg35 -(dp200 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p201 -sg43 -g44 -sssg50 -(dp202 -g33 -S'NC_000017.10:g.41256884C>G' -p203 -sg35 -(dp204 -g37 -g54 -sg39 -g40 -sg41 -S'41256884' -p205 -sg43 -g44 -sssg56 -(dp206 -g33 -S'NC_000017.11:g.43104867C>G' -p207 -sg35 -(dp208 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p209 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant255.txt b/VariantValidator/testing/testOutputsMasterITS/variant255.txt deleted file mode 100644 index b65e37e5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant255.txt +++ /dev/null @@ -1,420 +0,0 @@ -(dp0 -S'NM_001131019.2:c.490G>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'GFAP' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001124491.1:p.(Glu164Ter)' -p18 -sS'slr' -p19 -S'NP_001124491.1:p.(E164*)' -p20 -ssS'submitted_variant' -p21 -S'17-42991428-C-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001131019.2:c.490G>T' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000017.10:g.42991428C>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr17' -p37 -sS'ref' -p38 -VC -p39 -sS'pos' -p40 -S'42991428' -p41 -sS'alt' -p42 -VA -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000017.11:g.44914060C>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'44914060' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000017.10:g.42991428C>A' -p51 -sg34 -(dp52 -g36 -S'17' -p53 -sg38 -g39 -sg40 -S'42991428' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000017.11:g.44914060C>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'44914060' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -sS'NM_001242376.1:c.490G>T' -p62 -(dp63 -g3 -g4 -sg5 -(lp64 -S'RefSeqGene record not available' -p65 -asg8 -g4 -sg9 -(lp66 -sg11 -VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 3, mRNA -p67 -sg13 -S'GFAP' -p68 -sg15 -(dp69 -g17 -S'NP_001229305.1:p.(Glu164Ter)' -p70 -sg19 -S'NP_001229305.1:p.(E164*)' -p71 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001242376.1:c.490G>T' -p72 -sg27 -g4 -sg28 -(dp73 -g30 -(dp74 -g32 -S'NC_000017.10:g.42991428C>A' -p75 -sg34 -(dp76 -g36 -g37 -sg38 -g39 -sg40 -S'42991428' -p77 -sg42 -g43 -sssg44 -(dp78 -g32 -S'NC_000017.11:g.44914060C>A' -p79 -sg34 -(dp80 -g36 -g37 -sg38 -g39 -sg40 -S'44914060' -p81 -sg42 -g43 -sssg49 -(dp82 -g32 -S'NC_000017.10:g.42991428C>A' -p83 -sg34 -(dp84 -g36 -g53 -sg38 -g39 -sg40 -S'42991428' -p85 -sg42 -g43 -sssg55 -(dp86 -g32 -S'NC_000017.11:g.44914060C>A' -p87 -sg34 -(dp88 -g36 -g53 -sg38 -g39 -sg40 -S'44914060' -p89 -sg42 -g43 -sssssS'NM_001363846.1:c.490G>T' -p90 -(dp91 -g3 -g4 -sg5 -(lp92 -S'RefSeqGene record not available' -p93 -asg8 -g4 -sg9 -(lp94 -sg11 -VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 4, mRNA -p95 -sg13 -S'GFAP' -p96 -sg15 -(dp97 -g17 -S'NP_001350775.1:p.(Glu164Ter)' -p98 -sg19 -S'NP_001350775.1:p.(E164*)' -p99 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001363846.1:c.490G>T' -p100 -sg27 -g4 -sg28 -(dp101 -g30 -(dp102 -g32 -S'NC_000017.10:g.42991428C>A' -p103 -sg34 -(dp104 -g36 -g37 -sg38 -g39 -sg40 -S'42991428' -p105 -sg42 -g43 -sssg49 -(dp106 -g32 -S'NC_000017.10:g.42991428C>A' -p107 -sg34 -(dp108 -g36 -g53 -sg38 -g39 -sg40 -S'42991428' -p109 -sg42 -g43 -sssssS'NM_002055.4:c.490G>T' -p110 -(dp111 -g3 -g4 -sg5 -(lp112 -sg8 -g4 -sg9 -(lp113 -sg11 -VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 1, mRNA -p114 -sg13 -S'GFAP' -p115 -sg15 -(dp116 -g17 -S'NP_002046.1:p.(Glu164Ter)' -p117 -sg19 -S'NP_002046.1:p.(E164*)' -p118 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_002055.4:c.490G>T' -p119 -sg27 -S'NG_008401.1:g.6487G>T' -p120 -sg28 -(dp121 -g30 -(dp122 -g32 -S'NC_000017.10:g.42991428C>A' -p123 -sg34 -(dp124 -g36 -g37 -sg38 -g39 -sg40 -S'42991428' -p125 -sg42 -g43 -sssg44 -(dp126 -g32 -S'NC_000017.11:g.44914060C>A' -p127 -sg34 -(dp128 -g36 -g37 -sg38 -g39 -sg40 -S'44914060' -p129 -sg42 -g43 -sssg49 -(dp130 -g32 -S'NC_000017.10:g.42991428C>A' -p131 -sg34 -(dp132 -g36 -g53 -sg38 -g39 -sg40 -S'42991428' -p133 -sg42 -g43 -sssg55 -(dp134 -g32 -S'NC_000017.11:g.44914060C>A' -p135 -sg34 -(dp136 -g36 -g53 -sg38 -g39 -sg40 -S'44914060' -p137 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant256.txt b/VariantValidator/testing/testOutputsMasterITS/variant256.txt deleted file mode 100644 index 8a03a7e0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant256.txt +++ /dev/null @@ -1,505 +0,0 @@ -(dp0 -S'NR_135553.1:n.1022A>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 3, non-coding RNA -p12 -sS'gene_symbol' -p13 -S'SGCA' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'Non-coding :n.' -p18 -sS'slr' -p19 -g4 -ssS'submitted_variant' -p20 -S'17-48252809-A-T' -p21 -sS'genome_context_intronic_sequence' -p22 -g4 -sS'HGVS_LRG_variant' -p23 -g4 -sS'HGVS_transcript_variant' -p24 -S'NR_135553.1:n.1022A>T' -p25 -sS'HGVS_RefSeqGene_variant' -p26 -g4 -sS'primary_assembly_loci' -p27 -(dp28 -S'hg19' -p29 -(dp30 -S'HGVS_genomic_description' -p31 -S'NC_000017.10:g.48252809A>T' -p32 -sS'vcf' -p33 -(dp34 -S'chr' -p35 -S'chr17' -p36 -sS'ref' -p37 -S'A' -p38 -sS'pos' -p39 -S'48252809' -p40 -sS'alt' -p41 -S'T' -p42 -sssS'hg38' -p43 -(dp44 -g31 -S'NC_000017.11:g.50175448A>T' -p45 -sg33 -(dp46 -g35 -g36 -sg37 -g38 -sg39 -S'50175448' -p47 -sg41 -g42 -sssS'GRCh37' -p48 -(dp49 -g31 -S'NC_000017.10:g.48252809A>T' -p50 -sg33 -(dp51 -g35 -S'17' -p52 -sg37 -g38 -sg39 -S'48252809' -p53 -sg41 -g42 -sssS'GRCh38' -p54 -(dp55 -g31 -S'NC_000017.11:g.50175448A>T' -p56 -sg33 -(dp57 -g35 -g52 -sg37 -g38 -sg39 -S'50175448' -p58 -sg41 -g42 -sssssS'NM_001135697.1:c.*11A>T' -p59 -(dp60 -g3 -g4 -sg5 -(lp61 -S'A more recent version of the selected reference sequence NM_001135697.1 is available (NM_001135697.2)' -p62 -aS'NM_001135697.2:c.*11A>T MUST be fully validated prior to use in reports' -p63 -aS'select_variants=NM_001135697.2:c.*11A>T' -p64 -aS'RefSeqGene record not available' -p65 -asg8 -g4 -sg9 -(lp66 -sg11 -VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 2, mRNA -p67 -sg13 -S'SGCA' -p68 -sg15 -(dp69 -g17 -S'NP_001129169.1:p.?' -p70 -sg19 -S'NP_001129169.1:p.?' -p71 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001135697.1:c.*11A>T' -p72 -sg26 -g4 -sg27 -(dp73 -g29 -(dp74 -g31 -S'NC_000017.10:g.48252809A>T' -p75 -sg33 -(dp76 -g35 -g36 -sg37 -g38 -sg39 -S'48252809' -p77 -sg41 -g42 -sssg48 -(dp78 -g31 -S'NC_000017.10:g.48252809A>T' -p79 -sg33 -(dp80 -g35 -g52 -sg37 -g38 -sg39 -S'48252809' -p81 -sg41 -g42 -sssssS'flag' -p82 -S'gene_variant' -p83 -sS'NM_000023.2:c.*11A>T' -p84 -(dp85 -g3 -S'LRG_203t1:c.*11A>T' -p86 -sg5 -(lp87 -S'A more recent version of the selected reference sequence NM_000023.2 is available (NM_000023.3)' -p88 -aS'NM_000023.3:c.*11A>T MUST be fully validated prior to use in reports' -p89 -aS'select_variants=NM_000023.3:c.*11A>T' -p90 -asg8 -g4 -sg9 -(lp91 -sg11 -VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 1, mRNA -p92 -sg13 -S'SGCA' -p93 -sg15 -(dp94 -g17 -S'NP_000014.1(LRG_203p1):p.?' -p95 -sg19 -S'NP_000014.1:p.?' -p96 -ssg20 -g21 -sg22 -g4 -sg23 -S'LRG_203:g.14444A>T' -p97 -sg24 -S'NM_000023.2:c.*11A>T' -p98 -sg26 -S'NG_008889.1:g.14444A>T' -p99 -sg27 -(dp100 -g29 -(dp101 -g31 -S'NC_000017.10:g.48252809A>T' -p102 -sg33 -(dp103 -g35 -g36 -sg37 -g38 -sg39 -S'48252809' -p104 -sg41 -g42 -sssg48 -(dp105 -g31 -S'NC_000017.10:g.48252809A>T' -p106 -sg33 -(dp107 -g35 -g52 -sg37 -g38 -sg39 -S'48252809' -p108 -sg41 -g42 -sssssS'NM_001135697.2:c.*11A>T' -p109 -(dp110 -g3 -g4 -sg5 -(lp111 -S'RefSeqGene record not available' -p112 -asg8 -g4 -sg9 -(lp113 -sg11 -VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 2, mRNA -p114 -sg13 -S'SGCA' -p115 -sg15 -(dp116 -g17 -S'NP_001129169.1:p.?' -p117 -sg19 -S'NP_001129169.1:p.?' -p118 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001135697.2:c.*11A>T' -p119 -sg26 -g4 -sg27 -(dp120 -g29 -(dp121 -g31 -S'NC_000017.10:g.48252809A>T' -p122 -sg33 -(dp123 -g35 -g36 -sg37 -g38 -sg39 -S'48252809' -p124 -sg41 -g42 -sssg43 -(dp125 -g31 -S'NC_000017.11:g.50175448A>T' -p126 -sg33 -(dp127 -g35 -g36 -sg37 -g38 -sg39 -S'50175448' -p128 -sg41 -g42 -sssg48 -(dp129 -g31 -S'NC_000017.10:g.48252809A>T' -p130 -sg33 -(dp131 -g35 -g52 -sg37 -g38 -sg39 -S'48252809' -p132 -sg41 -g42 -sssg54 -(dp133 -g31 -S'NC_000017.11:g.50175448A>T' -p134 -sg33 -(dp135 -g35 -g52 -sg37 -g38 -sg39 -S'50175448' -p136 -sg41 -g42 -sssssS'NM_000023.3:c.*11A>T' -p137 -(dp138 -g3 -g4 -sg5 -(lp139 -S'RefSeqGene record not available' -p140 -asg8 -g4 -sg9 -(lp141 -sg11 -VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 1, mRNA -p142 -sg13 -S'SGCA' -p143 -sg15 -(dp144 -g17 -S'NP_000014.1(LRG_203p1):p.?' -p145 -sg19 -S'NP_000014.1:p.?' -p146 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_000023.3:c.*11A>T' -p147 -sg26 -g4 -sg27 -(dp148 -g29 -(dp149 -g31 -S'NC_000017.10:g.48252809A>T' -p150 -sg33 -(dp151 -g35 -g36 -sg37 -g38 -sg39 -S'48252809' -p152 -sg41 -g42 -sssg43 -(dp153 -g31 -S'NC_000017.11:g.50175448A>T' -p154 -sg33 -(dp155 -g35 -g36 -sg37 -g38 -sg39 -S'50175448' -p156 -sg41 -g42 -sssg48 -(dp157 -g31 -S'NC_000017.10:g.48252809A>T' -p158 -sg33 -(dp159 -g35 -g52 -sg37 -g38 -sg39 -S'48252809' -p160 -sg41 -g42 -sssg54 -(dp161 -g31 -S'NC_000017.11:g.50175448A>T' -p162 -sg33 -(dp163 -g35 -g52 -sg37 -g38 -sg39 -S'50175448' -p164 -sg41 -g42 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant257.txt b/VariantValidator/testing/testOutputsMasterITS/variant257.txt deleted file mode 100644 index 17e505a7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant257.txt +++ /dev/null @@ -1,150 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000334.4:c.3720+9_3720+10dup' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000017.10:g.62022709G>GTC automapped to NC_000017.10:g.62022710_62022711dupTC' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_011699.1(NM_000334.4):c.3720+9_3720+10dup' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA -p15 -sS'gene_symbol' -p16 -S'SCN4A' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000325.4:p.?' -p21 -sS'slr' -p22 -S'NP_000325.4:p.?' -p23 -ssS'submitted_variant' -p24 -S'17-62022709-G-GTC' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000334.4):c.3720+9_3720+10dup' -p27 -sS'HGVS_LRG_variant' -p28 -g6 -sS'HGVS_transcript_variant' -p29 -S'NM_000334.4:c.3720+9_3720+10dup' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_011699.1:g.32568_32569dup' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000017.10:g.62022710_62022711dup' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr17' -p42 -sS'ref' -p43 -S'TC' -p44 -sS'pos' -p45 -S'62022710' -p46 -sS'alt' -p47 -S'TCTC' -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000017.11:g.63945350_63945351dup' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'TC' -p53 -sg45 -S'63945350' -p54 -sg47 -S'TCTC' -p55 -sssS'GRCh37' -p56 -(dp57 -g37 -S'NC_000017.10:g.62022710_62022711dup' -p58 -sg39 -(dp59 -g41 -S'17' -p60 -sg43 -S'TC' -p61 -sg45 -S'62022710' -p62 -sg47 -S'TCTC' -p63 -sssS'GRCh38' -p64 -(dp65 -g37 -S'NC_000017.11:g.63945350_63945351dup' -p66 -sg39 -(dp67 -g41 -g60 -sg43 -S'TC' -p68 -sg45 -S'63945350' -p69 -sg47 -S'TCTC' -p70 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant258.txt b/VariantValidator/testing/testOutputsMasterITS/variant258.txt deleted file mode 100644 index 53a060aa..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant258.txt +++ /dev/null @@ -1,147 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000334.4:c.3720+8_3720+9insA' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000017.10:g.62022711C>CT automapped to NC_000017.10:g.62022711_62022712insT' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_011699.1(NM_000334.4):c.3720+8_3720+9insA' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA -p15 -sS'gene_symbol' -p16 -S'SCN4A' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000325.4:p.?' -p21 -sS'slr' -p22 -S'NP_000325.4:p.?' -p23 -ssS'submitted_variant' -p24 -S'17-62022711-C-CT' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000334.4):c.3720+8_3720+9insA' -p27 -sS'HGVS_LRG_variant' -p28 -g6 -sS'HGVS_transcript_variant' -p29 -S'NM_000334.4:c.3720+8_3720+9insA' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_011699.1:g.32567_32568insA' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000017.10:g.62022711_62022712insT' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr17' -p42 -sS'ref' -p43 -S'C' -p44 -sS'pos' -p45 -S'62022711' -p46 -sS'alt' -p47 -VCT -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000017.11:g.63945351_63945352insT' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'63945351' -p53 -sg47 -VCT -p54 -sssS'GRCh37' -p55 -(dp56 -g37 -S'NC_000017.10:g.62022711_62022712insT' -p57 -sg39 -(dp58 -g41 -S'17' -p59 -sg43 -g44 -sg45 -S'62022711' -p60 -sg47 -VCT -p61 -sssS'GRCh38' -p62 -(dp63 -g37 -S'NC_000017.11:g.63945351_63945352insT' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -g44 -sg45 -S'63945351' -p66 -sg47 -VCT -p67 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant259.txt b/VariantValidator/testing/testOutputsMasterITS/variant259.txt deleted file mode 100644 index c0f62401..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant259.txt +++ /dev/null @@ -1,147 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000334.4:c.3442-8_3442-7insGC' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000017.10:g.62023005G>GGC automapped to NC_000017.10:g.62023005_62023006insGC' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_011699.1(NM_000334.4):c.3442-8_3442-7insGC' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA -p15 -sS'gene_symbol' -p16 -S'SCN4A' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000325.4:p.?' -p21 -sS'slr' -p22 -S'NP_000325.4:p.?' -p23 -ssS'submitted_variant' -p24 -S'17-62023005-G-GGC' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000334.4):c.3442-8_3442-7insGC' -p27 -sS'HGVS_LRG_variant' -p28 -g6 -sS'HGVS_transcript_variant' -p29 -S'NM_000334.4:c.3442-8_3442-7insGC' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_011699.1:g.32273_32274insGC' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000017.10:g.62023005_62023006insGC' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr17' -p42 -sS'ref' -p43 -S'G' -p44 -sS'pos' -p45 -S'62023005' -p46 -sS'alt' -p47 -VGGC -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000017.11:g.63945645_63945646insGC' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'63945645' -p53 -sg47 -VGGC -p54 -sssS'GRCh37' -p55 -(dp56 -g37 -S'NC_000017.10:g.62023005_62023006insGC' -p57 -sg39 -(dp58 -g41 -S'17' -p59 -sg43 -g44 -sg45 -S'62023005' -p60 -sg47 -VGGC -p61 -sssS'GRCh38' -p62 -(dp63 -g37 -S'NC_000017.11:g.63945645_63945646insGC' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -g44 -sg45 -S'63945645' -p66 -sg47 -VGGC -p67 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant26.txt b/VariantValidator/testing/testOutputsMasterITS/variant26.txt deleted file mode 100644 index 8ee0dfd2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant26.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000014.8:g.36989536G>A' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NR_138595.1:n.1-810C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant260.txt b/VariantValidator/testing/testOutputsMasterITS/variant260.txt deleted file mode 100644 index 81014a09..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant260.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000334.4:c.3442-8G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_011699.1(NM_000334.4):c.3442-8G>T' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA -p14 -sS'gene_symbol' -p15 -S'SCN4A' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000325.4:p.?' -p20 -sS'slr' -p21 -S'NP_000325.4:p.?' -p22 -ssS'submitted_variant' -p23 -S'17-62023006-C-A' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000334.4):c.3442-8G>T' -p26 -sS'HGVS_LRG_variant' -p27 -g6 -sS'HGVS_transcript_variant' -p28 -S'NM_000334.4:c.3442-8G>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_011699.1:g.32273G>T' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.62023006C>A' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'62023006' -p45 -sS'alt' -p46 -VA -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.63945646C>A' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'63945646' -p52 -sg46 -g47 -sssS'GRCh37' -p53 -(dp54 -g36 -S'NC_000017.10:g.62023006C>A' -p55 -sg38 -(dp56 -g40 -S'17' -p57 -sg42 -g43 -sg44 -S'62023006' -p58 -sg46 -g47 -sssS'GRCh38' -p59 -(dp60 -g36 -S'NC_000017.11:g.63945646C>A' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g43 -sg44 -S'63945646' -p63 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant261.txt b/VariantValidator/testing/testOutputsMasterITS/variant261.txt deleted file mode 100644 index 470e53df..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant261.txt +++ /dev/null @@ -1,140 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000334.4:c.2111C>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -g6 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA -p13 -sS'gene_symbol' -p14 -S'SCN4A' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000325.4:p.(Thr704Met)' -p19 -sS'slr' -p20 -S'NP_000325.4:p.(T704M)' -p21 -ssS'submitted_variant' -p22 -S'17-62034787-G-A' -p23 -sS'genome_context_intronic_sequence' -p24 -g6 -sS'HGVS_LRG_variant' -p25 -g6 -sS'HGVS_transcript_variant' -p26 -S'NM_000334.4:c.2111C>T' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_011699.1:g.20492C>T' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000017.10:g.62034787G>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -VG -p41 -sS'pos' -p42 -S'62034787' -p43 -sS'alt' -p44 -VA -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.63957427G>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'63957427' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000017.10:g.62034787G>A' -p53 -sg36 -(dp54 -g38 -S'17' -p55 -sg40 -g41 -sg42 -S'62034787' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000017.11:g.63957427G>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'63957427' -p61 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant262.txt b/VariantValidator/testing/testOutputsMasterITS/variant262.txt deleted file mode 100644 index 0e79d29c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant262.txt +++ /dev/null @@ -1,818 +0,0 @@ -(dp0 -S'NM_001351443.1:c.-16+941_-16+946del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 6, mRNA -p13 -sS'gene_symbol' -p14 -S'KCTD1' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001338372.1:p.?' -p19 -sS'slr' -p20 -S'NP_001338372.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'18-24128261-GTCCTCC-G' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000018.9(NM_001351443.1):c.-16+941_-16+946del' -p25 -sS'HGVS_LRG_variant' -p26 -g4 -sS'HGVS_transcript_variant' -p27 -S'NM_001351443.1:c.-16+941_-16+946del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'GRCh38' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000018.10:g.26548298_26548303del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'18' -p39 -sS'ref' -p40 -S'GTCCTCC' -p41 -sS'pos' -p42 -S'26548297' -p43 -sS'alt' -p44 -S'G' -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000018.9:g.24128262_24128267del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'GTCCTCC' -p50 -sg42 -S'24128261' -p51 -sg44 -g45 -sssS'hg38' -p52 -(dp53 -g34 -S'NC_000018.10:g.26548298_26548303del' -p54 -sg36 -(dp55 -g38 -S'chr18' -p56 -sg40 -S'GTCCTCC' -p57 -sg42 -S'26548297' -p58 -sg44 -g45 -sssS'hg19' -p59 -(dp60 -g34 -S'NC_000018.9:g.24128262_24128267del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'GTCCTCC' -p63 -sg42 -S'24128261' -p64 -sg44 -g45 -sssssS'NM_001258222.1:c.10-47053_10-47048del' -p65 -(dp66 -g3 -g4 -sg5 -(lp67 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p68 -aS'A more recent version of the selected reference sequence NM_001258222.1 is available (NM_001258222.2)' -p69 -aS'NM_001258222.2:c.10-47053_10-47048del MUST be fully validated prior to use in reports' -p70 -aS'select_variants=NM_001258222.2:c.10-47053_10-47048del' -p71 -aS'RefSeqGene record not available' -p72 -asg9 -g4 -sg10 -(lp73 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 5, mRNA -p74 -sg14 -S'KCTD1' -p75 -sg16 -(dp76 -g18 -S'NP_001245151.1:p.?' -p77 -sg20 -S'NP_001245151.1:p.?' -p78 -ssg22 -g23 -sg24 -S'NC_000018.9(NM_001258222.1):c.10-47053_10-47048del' -p79 -sg26 -g4 -sg27 -S'NM_001258222.1:c.10-47053_10-47048del' -p80 -sg29 -g4 -sg30 -(dp81 -g32 -(dp82 -g34 -S'NC_000018.10:g.26548298_26548303del' -p83 -sg36 -(dp84 -g38 -g39 -sg40 -S'GTCCTCC' -p85 -sg42 -S'26548297' -p86 -sg44 -g45 -sssg46 -(dp87 -g34 -S'NC_000018.9:g.24128262_24128267del' -p88 -sg36 -(dp89 -g38 -g39 -sg40 -S'GTCCTCC' -p90 -sg42 -S'24128261' -p91 -sg44 -g45 -sssg52 -(dp92 -g34 -S'NC_000018.10:g.26548298_26548303del' -p93 -sg36 -(dp94 -g38 -g56 -sg40 -S'GTCCTCC' -p95 -sg42 -S'26548297' -p96 -sg44 -g45 -sssg59 -(dp97 -g34 -S'NC_000018.9:g.24128262_24128267del' -p98 -sg36 -(dp99 -g38 -g56 -sg40 -S'GTCCTCC' -p100 -sg42 -S'24128261' -p101 -sg44 -g45 -sssssS'NM_001258221.1:c.-16+1426_-16+1431del' -p102 -(dp103 -g3 -g4 -sg5 -(lp104 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p105 -aS'RefSeqGene record not available' -p106 -asg9 -g4 -sg10 -(lp107 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 4, mRNA -p108 -sg14 -S'KCTD1' -p109 -sg16 -(dp110 -g18 -S'NP_001245150.1:p.?' -p111 -sg20 -S'NP_001245150.1:p.?' -p112 -ssg22 -g23 -sg24 -S'NC_000018.9(NM_001258221.1):c.-16+1426_-16+1431del' -p113 -sg26 -g4 -sg27 -S'NM_001258221.1:c.-16+1426_-16+1431del' -p114 -sg29 -g4 -sg30 -(dp115 -g32 -(dp116 -g34 -S'NC_000018.10:g.26548298_26548303del' -p117 -sg36 -(dp118 -g38 -g39 -sg40 -S'GTCCTCC' -p119 -sg42 -S'26548297' -p120 -sg44 -g45 -sssg46 -(dp121 -g34 -S'NC_000018.9:g.24128262_24128267del' -p122 -sg36 -(dp123 -g38 -g39 -sg40 -S'GTCCTCC' -p124 -sg42 -S'24128261' -p125 -sg44 -g45 -sssg52 -(dp126 -g34 -S'NC_000018.10:g.26548298_26548303del' -p127 -sg36 -(dp128 -g38 -g56 -sg40 -S'GTCCTCC' -p129 -sg42 -S'26548297' -p130 -sg44 -g45 -sssg59 -(dp131 -g34 -S'NC_000018.9:g.24128262_24128267del' -p132 -sg36 -(dp133 -g38 -g56 -sg40 -S'GTCCTCC' -p134 -sg42 -S'24128261' -p135 -sg44 -g45 -sssssS'NM_001258222.2:c.10-47053_10-47048del' -p136 -(dp137 -g3 -g4 -sg5 -(lp138 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p139 -aS'RefSeqGene record not available' -p140 -asg9 -g4 -sg10 -(lp141 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 5, mRNA -p142 -sg14 -S'KCTD1' -p143 -sg16 -(dp144 -g18 -S'NP_001245151.1:p.?' -p145 -sg20 -S'NP_001245151.1:p.?' -p146 -ssg22 -g23 -sg24 -S'NC_000018.9(NM_001258222.2):c.10-47053_10-47048del' -p147 -sg26 -g4 -sg27 -S'NM_001258222.2:c.10-47053_10-47048del' -p148 -sg29 -g4 -sg30 -(dp149 -g32 -(dp150 -g34 -S'NC_000018.10:g.26548298_26548303del' -p151 -sg36 -(dp152 -g38 -g39 -sg40 -S'GTCCTCC' -p153 -sg42 -S'26548297' -p154 -sg44 -g45 -sssg46 -(dp155 -g34 -S'NC_000018.9:g.24128262_24128267del' -p156 -sg36 -(dp157 -g38 -g39 -sg40 -S'GTCCTCC' -p158 -sg42 -S'24128261' -p159 -sg44 -g45 -sssg52 -(dp160 -g34 -S'NC_000018.10:g.26548298_26548303del' -p161 -sg36 -(dp162 -g38 -g56 -sg40 -S'GTCCTCC' -p163 -sg42 -S'26548297' -p164 -sg44 -g45 -sssg59 -(dp165 -g34 -S'NC_000018.9:g.24128262_24128267del' -p166 -sg36 -(dp167 -g38 -g56 -sg40 -S'GTCCTCC' -p168 -sg42 -S'24128261' -p169 -sg44 -g45 -sssssS'flag' -p170 -S'gene_variant' -p171 -sS'NM_001136205.2:c.-16+588_-16+593del' -p172 -(dp173 -g3 -g4 -sg5 -(lp174 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p175 -aS'RefSeqGene record not available' -p176 -asg9 -g4 -sg10 -(lp177 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 1, mRNA -p178 -sg14 -S'KCTD1' -p179 -sg16 -(dp180 -g18 -S'NP_001129677.1:p.?' -p181 -sg20 -S'NP_001129677.1:p.?' -p182 -ssg22 -g23 -sg24 -S'NC_000018.9(NM_001136205.2):c.-16+588_-16+593del' -p183 -sg26 -g4 -sg27 -S'NM_001136205.2:c.-16+588_-16+593del' -p184 -sg29 -g4 -sg30 -(dp185 -g32 -(dp186 -g34 -S'NC_000018.10:g.26548298_26548303del' -p187 -sg36 -(dp188 -g38 -g39 -sg40 -S'GTCCTCC' -p189 -sg42 -S'26548297' -p190 -sg44 -g45 -sssg46 -(dp191 -g34 -S'NC_000018.9:g.24128262_24128267del' -p192 -sg36 -(dp193 -g38 -g39 -sg40 -S'GTCCTCC' -p194 -sg42 -S'24128261' -p195 -sg44 -g45 -sssg52 -(dp196 -g34 -S'NC_000018.10:g.26548298_26548303del' -p197 -sg36 -(dp198 -g38 -g56 -sg40 -S'GTCCTCC' -p199 -sg42 -S'26548297' -p200 -sg44 -g45 -sssg59 -(dp201 -g34 -S'NC_000018.9:g.24128262_24128267del' -p202 -sg36 -(dp203 -g38 -g56 -sg40 -S'GTCCTCC' -p204 -sg42 -S'24128261' -p205 -sg44 -g45 -sssssS'NM_198991.3:c.-15-47053_-15-47048del' -p206 -(dp207 -g3 -g4 -sg5 -(lp208 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p209 -aS'RefSeqGene record not available' -p210 -asg9 -g4 -sg10 -(lp211 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 2, mRNA -p212 -sg14 -S'KCTD1' -p213 -sg16 -(dp214 -g18 -S'NP_945342.1:p.?' -p215 -sg20 -S'NP_945342.1:p.?' -p216 -ssg22 -g23 -sg24 -S'NC_000018.9(NM_198991.3):c.-15-47053_-15-47048del' -p217 -sg26 -g4 -sg27 -S'NM_198991.3:c.-15-47053_-15-47048del' -p218 -sg29 -g4 -sg30 -(dp219 -g32 -(dp220 -g34 -S'NC_000018.10:g.26548298_26548303del' -p221 -sg36 -(dp222 -g38 -g39 -sg40 -S'GTCCTCC' -p223 -sg42 -S'26548297' -p224 -sg44 -g45 -sssg46 -(dp225 -g34 -S'NC_000018.9:g.24128262_24128267del' -p226 -sg36 -(dp227 -g38 -g39 -sg40 -S'GTCCTCC' -p228 -sg42 -S'24128261' -p229 -sg44 -g45 -sssg52 -(dp230 -g34 -S'NC_000018.10:g.26548298_26548303del' -p231 -sg36 -(dp232 -g38 -g56 -sg40 -S'GTCCTCC' -p233 -sg42 -S'26548297' -p234 -sg44 -g45 -sssg59 -(dp235 -g34 -S'NC_000018.9:g.24128262_24128267del' -p236 -sg36 -(dp237 -g38 -g56 -sg40 -S'GTCCTCC' -p238 -sg42 -S'24128261' -p239 -sg44 -g45 -sssssS'NM_001142730.2:c.234_239del' -p240 -(dp241 -g3 -g4 -sg5 -(lp242 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p243 -aS'RefSeqGene record not available' -p244 -asg9 -g4 -sg10 -(lp245 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 3, mRNA -p246 -sg14 -S'KCTD1' -p247 -sg16 -(dp248 -g18 -S'NP_001136202.1:p.(Glu78_Glu79del)' -p249 -sg20 -S'NP_001136202.1:p.(E78_E79del)' -p250 -ssg22 -g23 -sg24 -g4 -sg26 -g4 -sg27 -S'NM_001142730.2:c.234_239del' -p251 -sg29 -g4 -sg30 -(dp252 -g32 -(dp253 -g34 -S'NC_000018.10:g.26548298_26548303del' -p254 -sg36 -(dp255 -g38 -g39 -sg40 -S'GTCCTCC' -p256 -sg42 -S'26548297' -p257 -sg44 -g45 -sssg46 -(dp258 -g34 -S'NC_000018.9:g.24128262_24128267del' -p259 -sg36 -(dp260 -g38 -g39 -sg40 -S'GTCCTCC' -p261 -sg42 -S'24128261' -p262 -sg44 -g45 -sssg52 -(dp263 -g34 -S'NC_000018.10:g.26548298_26548303del' -p264 -sg36 -(dp265 -g38 -g56 -sg40 -S'GTCCTCC' -p266 -sg42 -S'26548297' -p267 -sg44 -g45 -sssg59 -(dp268 -g34 -S'NC_000018.9:g.24128262_24128267del' -p269 -sg36 -(dp270 -g38 -g56 -sg40 -S'GTCCTCC' -p271 -sg42 -S'24128261' -p272 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant263.txt b/VariantValidator/testing/testOutputsMasterITS/variant263.txt deleted file mode 100644 index 1af28675..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant263.txt +++ /dev/null @@ -1,140 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000435.2:c.2992C>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -g6 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens notch 3 (NOTCH3), mRNA -p13 -sS'gene_symbol' -p14 -S'NOTCH3' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000426.2:p.(Gln998Ter)' -p19 -sS'slr' -p20 -S'NP_000426.2:p.(Q998*)' -p21 -ssS'submitted_variant' -p22 -S'19-15291774-G-A' -p23 -sS'genome_context_intronic_sequence' -p24 -g6 -sS'HGVS_LRG_variant' -p25 -g6 -sS'HGVS_transcript_variant' -p26 -S'NM_000435.2:c.2992C>T' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_009819.1:g.25019C>T' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'GRCh38' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000019.10:g.15180963G>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'19' -p39 -sS'ref' -p40 -VG -p41 -sS'pos' -p42 -S'15180963' -p43 -sS'alt' -p44 -VA -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000019.9:g.15291774G>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'15291774' -p50 -sg44 -g45 -sssS'hg38' -p51 -(dp52 -g34 -S'NC_000019.10:g.15180963G>A' -p53 -sg36 -(dp54 -g38 -S'chr19' -p55 -sg40 -g41 -sg42 -S'15180963' -p56 -sg44 -g45 -sssS'hg19' -p57 -(dp58 -g34 -S'NC_000019.9:g.15291774G>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'15291774' -p61 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant264.txt b/VariantValidator/testing/testOutputsMasterITS/variant264.txt deleted file mode 100644 index ff1c84df..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant264.txt +++ /dev/null @@ -1,137 +0,0 @@ -(dp0 -S'flag' -p1 -S'intergenic' -p2 -sS'Intergenic_Variant_1' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'No transcripts found that fully overlap the described variation in the genomic sequence' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -g6 -sS'gene_symbol' -p14 -g6 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -g6 -sS'slr' -p18 -g6 -ssS'submitted_variant' -p19 -S'19-15311794-A-G' -p20 -sS'genome_context_intronic_sequence' -p21 -g6 -sS'HGVS_LRG_variant' -p22 -g6 -sS'HGVS_transcript_variant' -p23 -g6 -sS'HGVS_RefSeqGene_variant' -p24 -S'NG_009819.1:g.4999T>C' -p25 -sS'primary_assembly_loci' -p26 -(dp27 -S'hg19' -p28 -(dp29 -S'HGVS_genomic_description' -p30 -S'NC_000019.9:g.15311794A>G' -p31 -sS'vcf' -p32 -(dp33 -S'chr' -p34 -S'chr19' -p35 -sS'ref' -p36 -S'A' -p37 -sS'pos' -p38 -S'15311794' -p39 -sS'alt' -p40 -S'G' -p41 -sssS'hg38' -p42 -(dp43 -g30 -S'NC_000019.10:g.15200983A>G' -p44 -sg32 -(dp45 -g34 -g35 -sg36 -g37 -sg38 -S'15200983' -p46 -sg40 -g41 -sssS'GRCh37' -p47 -(dp48 -g30 -S'NC_000019.9:g.15311794A>G' -p49 -sg32 -(dp50 -g34 -S'19' -p51 -sg36 -g37 -sg38 -S'15311794' -p52 -sg40 -g41 -sssS'GRCh38' -p53 -(dp54 -g30 -S'NC_000019.10:g.15200983A>G' -p55 -sg32 -(dp56 -g34 -g51 -sg36 -g37 -sg38 -S'15200983' -p57 -sg40 -g41 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant265.txt b/VariantValidator/testing/testOutputsMasterITS/variant265.txt deleted file mode 100644 index fe78c307..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant265.txt +++ /dev/null @@ -1,246 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000540.2:c.14818G>A' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_766t1:c.14818G>A' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens ryanodine receptor 1 (RYR1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'RYR1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000531.2(LRG_766p1):p.(Ala4940Thr)' -p20 -sS'slr' -p21 -S'NP_000531.2:p.(A4940T)' -p22 -ssS'submitted_variant' -p23 -S'19-39076592-G-A' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_766:g.157253G>A' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000540.2:c.14818G>A' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_008866.1:g.157253G>A' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'GRCh38' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000019.10:g.38585952G>A' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'19' -p41 -sS'ref' -p42 -S'G' -p43 -sS'pos' -p44 -S'38585952' -p45 -sS'alt' -p46 -S'A' -p47 -sssS'GRCh37' -p48 -(dp49 -g36 -S'NC_000019.9:g.39076592G>A' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'39076592' -p52 -sg46 -g47 -sssS'hg38' -p53 -(dp54 -g36 -S'NC_000019.10:g.38585952G>A' -p55 -sg38 -(dp56 -g40 -S'chr19' -p57 -sg42 -g43 -sg44 -S'38585952' -p58 -sg46 -g47 -sssS'hg19' -p59 -(dp60 -g36 -S'NC_000019.9:g.39076592G>A' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g43 -sg44 -S'39076592' -p63 -sg46 -g47 -sssssS'NM_001042723.1:c.14803G>A' -p64 -(dp65 -g5 -g10 -sg7 -(lp66 -S'RefSeqGene record not available' -p67 -asg9 -g10 -sg11 -(lp68 -sg13 -VHomo sapiens ryanodine receptor 1 (RYR1), transcript variant 2, mRNA -p69 -sg15 -S'RYR1' -p70 -sg17 -(dp71 -g19 -S'NP_001036188.1:p.(Ala4935Thr)' -p72 -sg21 -S'NP_001036188.1:p.(A4935T)' -p73 -ssg23 -g24 -sg25 -g10 -sg26 -g10 -sg28 -S'NM_001042723.1:c.14803G>A' -p74 -sg30 -g10 -sg32 -(dp75 -g34 -(dp76 -g36 -S'NC_000019.10:g.38585952G>A' -p77 -sg38 -(dp78 -g40 -g41 -sg42 -g43 -sg44 -S'38585952' -p79 -sg46 -g47 -sssg48 -(dp80 -g36 -S'NC_000019.9:g.39076592G>A' -p81 -sg38 -(dp82 -g40 -g41 -sg42 -g43 -sg44 -S'39076592' -p83 -sg46 -g47 -sssg53 -(dp84 -g36 -S'NC_000019.10:g.38585952G>A' -p85 -sg38 -(dp86 -g40 -g57 -sg42 -g43 -sg44 -S'38585952' -p87 -sg46 -g47 -sssg59 -(dp88 -g36 -S'NC_000019.9:g.39076592G>A' -p89 -sg38 -(dp90 -g40 -g57 -sg42 -g43 -sg44 -S'39076592' -p91 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant266.txt b/VariantValidator/testing/testOutputsMasterITS/variant266.txt deleted file mode 100644 index 5dd5bc33..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant266.txt +++ /dev/null @@ -1,2770 +0,0 @@ -(dp0 -S'NM_001330086.1:c.4245A>G' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha9, mRNA -p12 -sS'gene_symbol' -p13 -S'NRXN1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001317015.1:p.(Pro1415=)' -p18 -sS'slr' -p19 -S'NP_001317015.1:p.(P1415=)' -p20 -ssS'submitted_variant' -p21 -S'2-50149352-T-C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001330086.1:c.4245A>G' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000002.11:g.50149352T>C' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -VT -p39 -sS'pos' -p40 -S'50149352' -p41 -sS'alt' -p42 -VC -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.49922214T>C' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000002.11:g.50149352T>C' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'50149352' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000002.12:g.49922214T>C' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p59 -sg42 -g43 -sssssS'NM_001330083.1:c.4089A>G' -p60 -(dp61 -g3 -g4 -sg5 -(lp62 -S'RefSeqGene record not available' -p63 -asg8 -g4 -sg9 -(lp64 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha6, mRNA -p65 -sg13 -S'NRXN1' -p66 -sg15 -(dp67 -g17 -S'NP_001317012.1:p.(Pro1363=)' -p68 -sg19 -S'NP_001317012.1:p.(P1363=)' -p69 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330083.1:c.4089A>G' -p70 -sg27 -g4 -sg28 -(dp71 -g30 -(dp72 -g32 -S'NC_000002.11:g.50149352T>C' -p73 -sg34 -(dp74 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p75 -sg42 -g43 -sssg44 -(dp76 -g32 -S'NC_000002.12:g.49922214T>C' -p77 -sg34 -(dp78 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p79 -sg42 -g43 -sssg49 -(dp80 -g32 -S'NC_000002.11:g.50149352T>C' -p81 -sg34 -(dp82 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p83 -sg42 -g43 -sssg55 -(dp84 -g32 -S'NC_000002.12:g.49922214T>C' -p85 -sg34 -(dp86 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p87 -sg42 -g43 -sssssS'NM_001330095.1:c.4113A>G' -p88 -(dp89 -g3 -g4 -sg5 -(lp90 -S'RefSeqGene record not available' -p91 -asg8 -g4 -sg9 -(lp92 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha14, mRNA -p93 -sg13 -S'NRXN1' -p94 -sg15 -(dp95 -g17 -S'NP_001317024.1:p.(Pro1371=)' -p96 -sg19 -S'NP_001317024.1:p.(P1371=)' -p97 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330095.1:c.4113A>G' -p98 -sg27 -g4 -sg28 -(dp99 -g30 -(dp100 -g32 -S'NC_000002.11:g.50149352T>C' -p101 -sg34 -(dp102 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p103 -sg42 -g43 -sssg44 -(dp104 -g32 -S'NC_000002.12:g.49922214T>C' -p105 -sg34 -(dp106 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p107 -sg42 -g43 -sssg49 -(dp108 -g32 -S'NC_000002.11:g.50149352T>C' -p109 -sg34 -(dp110 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p111 -sg42 -g43 -sssg55 -(dp112 -g32 -S'NC_000002.12:g.49922214T>C' -p113 -sg34 -(dp114 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p115 -sg42 -g43 -sssssS'NM_138735.2:c.1059A>G' -p116 -(dp117 -g3 -g4 -sg5 -(lp118 -S'A more recent version of the selected reference sequence NM_138735.2 is available (NM_138735.4)' -p119 -aS'NM_138735.4:c.1059A>G MUST be fully validated prior to use in reports' -p120 -aS'select_variants=NM_138735.4:c.1059A>G' -p121 -aS'RefSeqGene record not available' -p122 -asg8 -g4 -sg9 -(lp123 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant beta, mRNA -p124 -sg13 -S'NRXN1' -p125 -sg15 -(dp126 -g17 -S'NP_620072.1:p.(Pro353=)' -p127 -sg19 -S'NP_620072.1:p.(P353=)' -p128 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_138735.2:c.1059A>G' -p129 -sg27 -g4 -sg28 -(dp130 -g30 -(dp131 -g32 -S'NC_000002.11:g.50149352T>C' -p132 -sg34 -(dp133 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p134 -sg42 -g43 -sssg44 -(dp135 -g32 -S'NC_000002.12:g.49922214T>C' -p136 -sg34 -(dp137 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p138 -sg42 -g43 -sssg49 -(dp139 -g32 -S'NC_000002.11:g.50149352T>C' -p140 -sg34 -(dp141 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p142 -sg42 -g43 -sssg55 -(dp143 -g32 -S'NC_000002.12:g.49922214T>C' -p144 -sg34 -(dp145 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p146 -sg42 -g43 -sssssS'NM_001330078.1:c.4254A>G' -p147 -(dp148 -g3 -g4 -sg5 -(lp149 -S'RefSeqGene record not available' -p150 -asg8 -g4 -sg9 -(lp151 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha4, mRNA -p152 -sg13 -S'NRXN1' -p153 -sg15 -(dp154 -g17 -S'NP_001317007.1:p.(Pro1418=)' -p155 -sg19 -S'NP_001317007.1:p.(P1418=)' -p156 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330078.1:c.4254A>G' -p157 -sg27 -g4 -sg28 -(dp158 -g30 -(dp159 -g32 -S'NC_000002.11:g.50149352T>C' -p160 -sg34 -(dp161 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p162 -sg42 -g43 -sssg44 -(dp163 -g32 -S'NC_000002.12:g.49922214T>C' -p164 -sg34 -(dp165 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p166 -sg42 -g43 -sssg49 -(dp167 -g32 -S'NC_000002.11:g.50149352T>C' -p168 -sg34 -(dp169 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p170 -sg42 -g43 -sssg55 -(dp171 -g32 -S'NC_000002.12:g.49922214T>C' -p172 -sg34 -(dp173 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p174 -sg42 -g43 -sssssS'NM_001330094.1:c.4233A>G' -p175 -(dp176 -g3 -g4 -sg5 -(lp177 -S'RefSeqGene record not available' -p178 -asg8 -g4 -sg9 -(lp179 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha13, mRNA -p180 -sg13 -S'NRXN1' -p181 -sg15 -(dp182 -g17 -S'NP_001317023.1:p.(Pro1411=)' -p183 -sg19 -S'NP_001317023.1:p.(P1411=)' -p184 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330094.1:c.4233A>G' -p185 -sg27 -g4 -sg28 -(dp186 -g30 -(dp187 -g32 -S'NC_000002.11:g.50149352T>C' -p188 -sg34 -(dp189 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p190 -sg42 -g43 -sssg44 -(dp191 -g32 -S'NC_000002.12:g.49922214T>C' -p192 -sg34 -(dp193 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p194 -sg42 -g43 -sssg49 -(dp195 -g32 -S'NC_000002.11:g.50149352T>C' -p196 -sg34 -(dp197 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p198 -sg42 -g43 -sssg55 -(dp199 -g32 -S'NC_000002.12:g.49922214T>C' -p200 -sg34 -(dp201 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p202 -sg42 -g43 -sssssS'NM_001320157.3:c.150A>G' -p203 -(dp204 -g3 -g4 -sg5 -(lp205 -S'RefSeqGene record not available' -p206 -asg8 -g4 -sg9 -(lp207 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma2, mRNA -p208 -sg13 -S'NRXN1' -p209 -sg15 -(dp210 -g17 -S'NP_001307086.1:p.(Pro50=)' -p211 -sg19 -S'NP_001307086.1:p.(P50=)' -p212 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001320157.3:c.150A>G' -p213 -sg27 -g4 -sg28 -(dp214 -g30 -(dp215 -g32 -S'NC_000002.11:g.50149352T>C' -p216 -sg34 -(dp217 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p218 -sg42 -g43 -sssg44 -(dp219 -g32 -S'NC_000002.12:g.49922214T>C' -p220 -sg34 -(dp221 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p222 -sg42 -g43 -sssg49 -(dp223 -g32 -S'NC_000002.11:g.50149352T>C' -p224 -sg34 -(dp225 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p226 -sg42 -g43 -sssg55 -(dp227 -g32 -S'NC_000002.12:g.49922214T>C' -p228 -sg34 -(dp229 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p230 -sg42 -g43 -sssssS'NM_001330088.1:c.4074A>G' -p231 -(dp232 -g3 -g4 -sg5 -(lp233 -S'RefSeqGene record not available' -p234 -asg8 -g4 -sg9 -(lp235 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha11, mRNA -p236 -sg13 -S'NRXN1' -p237 -sg15 -(dp238 -g17 -S'NP_001317017.1:p.(Pro1358=)' -p239 -sg19 -S'NP_001317017.1:p.(P1358=)' -p240 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330088.1:c.4074A>G' -p241 -sg27 -g4 -sg28 -(dp242 -g30 -(dp243 -g32 -S'NC_000002.11:g.50149352T>C' -p244 -sg34 -(dp245 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p246 -sg42 -g43 -sssg44 -(dp247 -g32 -S'NC_000002.12:g.49922214T>C' -p248 -sg34 -(dp249 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p250 -sg42 -g43 -sssg49 -(dp251 -g32 -S'NC_000002.11:g.50149352T>C' -p252 -sg34 -(dp253 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p254 -sg42 -g43 -sssg55 -(dp255 -g32 -S'NC_000002.12:g.49922214T>C' -p256 -sg34 -(dp257 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p258 -sg42 -g43 -sssssS'NM_001330092.1:c.1149A>G' -p259 -(dp260 -g3 -g4 -sg5 -(lp261 -S'RefSeqGene record not available' -p262 -asg8 -g4 -sg9 -(lp263 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant beta3, mRNA -p264 -sg13 -S'NRXN1' -p265 -sg15 -(dp266 -g17 -S'NP_001317021.1:p.(Pro383=)' -p267 -sg19 -S'NP_001317021.1:p.(P383=)' -p268 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330092.1:c.1149A>G' -p269 -sg27 -g4 -sg28 -(dp270 -g30 -(dp271 -g32 -S'NC_000002.11:g.50149352T>C' -p272 -sg34 -(dp273 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p274 -sg42 -g43 -sssg44 -(dp275 -g32 -S'NC_000002.12:g.49922214T>C' -p276 -sg34 -(dp277 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p278 -sg42 -g43 -sssg49 -(dp279 -g32 -S'NC_000002.11:g.50149352T>C' -p280 -sg34 -(dp281 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p282 -sg42 -g43 -sssg55 -(dp283 -g32 -S'NC_000002.12:g.49922214T>C' -p284 -sg34 -(dp285 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p286 -sg42 -g43 -sssssS'NM_138735.4:c.1059A>G' -p287 -(dp288 -g3 -g4 -sg5 -(lp289 -S'RefSeqGene record not available' -p290 -asg8 -g4 -sg9 -(lp291 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant beta1, mRNA -p292 -sg13 -S'NRXN1' -p293 -sg15 -(dp294 -g17 -S'NP_620072.1:p.(Pro353=)' -p295 -sg19 -S'NP_620072.1:p.(P353=)' -p296 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_138735.4:c.1059A>G' -p297 -sg27 -g4 -sg28 -(dp298 -g30 -(dp299 -g32 -S'NC_000002.11:g.50149352T>C' -p300 -sg34 -(dp301 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p302 -sg42 -g43 -sssg44 -(dp303 -g32 -S'NC_000002.12:g.49922214T>C' -p304 -sg34 -(dp305 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p306 -sg42 -g43 -sssg49 -(dp307 -g32 -S'NC_000002.11:g.50149352T>C' -p308 -sg34 -(dp309 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p310 -sg42 -g43 -sssg55 -(dp311 -g32 -S'NC_000002.12:g.49922214T>C' -p312 -sg34 -(dp313 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p314 -sg42 -g43 -sssssS'NM_001330096.1:c.4044A>G' -p315 -(dp316 -g3 -g4 -sg5 -(lp317 -S'RefSeqGene record not available' -p318 -asg8 -g4 -sg9 -(lp319 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha15, mRNA -p320 -sg13 -S'NRXN1' -p321 -sg15 -(dp322 -g17 -S'NP_001317025.1:p.(Pro1348=)' -p323 -sg19 -S'NP_001317025.1:p.(P1348=)' -p324 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330096.1:c.4044A>G' -p325 -sg27 -g4 -sg28 -(dp326 -g30 -(dp327 -g32 -S'NC_000002.11:g.50149352T>C' -p328 -sg34 -(dp329 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p330 -sg42 -g43 -sssg44 -(dp331 -g32 -S'NC_000002.12:g.49922214T>C' -p332 -sg34 -(dp333 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p334 -sg42 -g43 -sssg49 -(dp335 -g32 -S'NC_000002.11:g.50149352T>C' -p336 -sg34 -(dp337 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p338 -sg42 -g43 -sssg55 -(dp339 -g32 -S'NC_000002.12:g.49922214T>C' -p340 -sg34 -(dp341 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p342 -sg42 -g43 -sssssS'NM_001135659.2:c.4374A>G' -p343 -(dp344 -g3 -g4 -sg5 -(lp345 -S'RefSeqGene record not available' -p346 -asg8 -g4 -sg9 -(lp347 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA -p348 -sg13 -S'NRXN1' -p349 -sg15 -(dp350 -g17 -S'NP_001129131.1:p.(Pro1458=)' -p351 -sg19 -S'NP_001129131.1:p.(P1458=)' -p352 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001135659.2:c.4374A>G' -p353 -sg27 -g4 -sg28 -(dp354 -g30 -(dp355 -g32 -S'NC_000002.11:g.50149352T>C' -p356 -sg34 -(dp357 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p358 -sg42 -g43 -sssg44 -(dp359 -g32 -S'NC_000002.12:g.49922214T>C' -p360 -sg34 -(dp361 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p362 -sg42 -g43 -sssg49 -(dp363 -g32 -S'NC_000002.11:g.50149352T>C' -p364 -sg34 -(dp365 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p366 -sg42 -g43 -sssg55 -(dp367 -g32 -S'NC_000002.12:g.49922214T>C' -p368 -sg34 -(dp369 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p370 -sg42 -g43 -sssssS'NM_001330085.1:c.4227A>G' -p371 -(dp372 -g3 -g4 -sg5 -(lp373 -S'RefSeqGene record not available' -p374 -asg8 -g4 -sg9 -(lp375 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha8, mRNA -p376 -sg13 -S'NRXN1' -p377 -sg15 -(dp378 -g17 -S'NP_001317014.1:p.(Pro1409=)' -p379 -sg19 -S'NP_001317014.1:p.(P1409=)' -p380 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330085.1:c.4227A>G' -p381 -sg27 -g4 -sg28 -(dp382 -g30 -(dp383 -g32 -S'NC_000002.11:g.50149352T>C' -p384 -sg34 -(dp385 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p386 -sg42 -g43 -sssg44 -(dp387 -g32 -S'NC_000002.12:g.49922214T>C' -p388 -sg34 -(dp389 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p390 -sg42 -g43 -sssg49 -(dp391 -g32 -S'NC_000002.11:g.50149352T>C' -p392 -sg34 -(dp393 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p394 -sg42 -g43 -sssg55 -(dp395 -g32 -S'NC_000002.12:g.49922214T>C' -p396 -sg34 -(dp397 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p398 -sg42 -g43 -sssssS'NM_001320156.1:c.159A>G' -p399 -(dp400 -g3 -g4 -sg5 -(lp401 -S'A more recent version of the selected reference sequence NM_001320156.1 is available (NM_001320156.3)' -p402 -aS'NM_001320156.3:c.159A>G MUST be fully validated prior to use in reports' -p403 -aS'select_variants=NM_001320156.3:c.159A>G' -p404 -aS'RefSeqGene record not available' -p405 -asg8 -g4 -sg9 -(lp406 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma1, mRNA -p407 -sg13 -S'NRXN1' -p408 -sg15 -(dp409 -g17 -S'NP_001307085.1:p.(Pro53=)' -p410 -sg19 -S'NP_001307085.1:p.(P53=)' -p411 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001320156.1:c.159A>G' -p412 -sg27 -g4 -sg28 -(dp413 -g30 -(dp414 -g32 -S'NC_000002.11:g.50149352T>C' -p415 -sg34 -(dp416 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p417 -sg42 -g43 -sssg44 -(dp418 -g32 -S'NC_000002.12:g.49922214T>C' -p419 -sg34 -(dp420 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p421 -sg42 -g43 -sssg49 -(dp422 -g32 -S'NC_000002.11:g.50149352T>C' -p423 -sg34 -(dp424 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p425 -sg42 -g43 -sssg55 -(dp426 -g32 -S'NC_000002.12:g.49922214T>C' -p427 -sg34 -(dp428 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p429 -sg42 -g43 -sssssS'NM_001330077.1:c.4230A>G' -p430 -(dp431 -g3 -g4 -sg5 -(lp432 -S'RefSeqGene record not available' -p433 -asg8 -g4 -sg9 -(lp434 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha3, mRNA -p435 -sg13 -S'NRXN1' -p436 -sg15 -(dp437 -g17 -S'NP_001317006.1:p.(Pro1410=)' -p438 -sg19 -S'NP_001317006.1:p.(P1410=)' -p439 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330077.1:c.4230A>G' -p440 -sg27 -g4 -sg28 -(dp441 -g30 -(dp442 -g32 -S'NC_000002.11:g.50149352T>C' -p443 -sg34 -(dp444 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p445 -sg42 -g43 -sssg44 -(dp446 -g32 -S'NC_000002.12:g.49922214T>C' -p447 -sg34 -(dp448 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p449 -sg42 -g43 -sssg49 -(dp450 -g32 -S'NC_000002.11:g.50149352T>C' -p451 -sg34 -(dp452 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p453 -sg42 -g43 -sssg55 -(dp454 -g32 -S'NC_000002.12:g.49922214T>C' -p455 -sg34 -(dp456 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p457 -sg42 -g43 -sssssS'NM_001330093.1:c.4251A>G' -p458 -(dp459 -g3 -g4 -sg5 -(lp460 -S'RefSeqGene record not available' -p461 -asg8 -g4 -sg9 -(lp462 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha12, mRNA -p463 -sg13 -S'NRXN1' -p464 -sg15 -(dp465 -g17 -S'NP_001317022.1:p.(Pro1417=)' -p466 -sg19 -S'NP_001317022.1:p.(P1417=)' -p467 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330093.1:c.4251A>G' -p468 -sg27 -g4 -sg28 -(dp469 -g30 -(dp470 -g32 -S'NC_000002.11:g.50149352T>C' -p471 -sg34 -(dp472 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p473 -sg42 -g43 -sssg44 -(dp474 -g32 -S'NC_000002.12:g.49922214T>C' -p475 -sg34 -(dp476 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p477 -sg42 -g43 -sssg49 -(dp478 -g32 -S'NC_000002.11:g.50149352T>C' -p479 -sg34 -(dp480 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p481 -sg42 -g43 -sssg55 -(dp482 -g32 -S'NC_000002.12:g.49922214T>C' -p483 -sg34 -(dp484 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p485 -sg42 -g43 -sssssS'NM_001135659.1:c.4374A>G' -p486 -(dp487 -g3 -g4 -sg5 -(lp488 -S'A more recent version of the selected reference sequence NM_001135659.1 is available (NM_001135659.2)' -p489 -aS'NM_001135659.2:c.4374A>G MUST be fully validated prior to use in reports' -p490 -aS'select_variants=NM_001135659.2:c.4374A>G' -p491 -asg8 -g4 -sg9 -(lp492 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA -p493 -sg13 -S'NRXN1' -p494 -sg15 -(dp495 -g17 -S'NP_001129131.1:p.(Pro1458=)' -p496 -sg19 -S'NP_001129131.1:p.(P1458=)' -p497 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001135659.1:c.4374A>G' -p498 -sg27 -S'NG_011878.1:g.1115323A>G' -p499 -sg28 -(dp500 -g30 -(dp501 -g32 -S'NC_000002.11:g.50149352T>C' -p502 -sg34 -(dp503 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p504 -sg42 -g43 -sssg44 -(dp505 -g32 -S'NC_000002.12:g.49922214T>C' -p506 -sg34 -(dp507 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p508 -sg42 -g43 -sssg49 -(dp509 -g32 -S'NC_000002.11:g.50149352T>C' -p510 -sg34 -(dp511 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p512 -sg42 -g43 -sssg55 -(dp513 -g32 -S'NC_000002.12:g.49922214T>C' -p514 -sg34 -(dp515 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p516 -sg42 -g43 -sssssS'NM_001320157.1:c.150A>G' -p517 -(dp518 -g3 -g4 -sg5 -(lp519 -S'A more recent version of the selected reference sequence NM_001320157.1 is available (NM_001320157.3)' -p520 -aS'NM_001320157.3:c.150A>G MUST be fully validated prior to use in reports' -p521 -aS'select_variants=NM_001320157.3:c.150A>G' -p522 -aS'RefSeqGene record not available' -p523 -asg8 -g4 -sg9 -(lp524 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma2, mRNA -p525 -sg13 -S'NRXN1' -p526 -sg15 -(dp527 -g17 -S'NP_001307086.1:p.(Pro50=)' -p528 -sg19 -S'NP_001307086.1:p.(P50=)' -p529 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001320157.1:c.150A>G' -p530 -sg27 -g4 -sg28 -(dp531 -g30 -(dp532 -g32 -S'NC_000002.11:g.50149352T>C' -p533 -sg34 -(dp534 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p535 -sg42 -g43 -sssg44 -(dp536 -g32 -S'NC_000002.12:g.49922214T>C' -p537 -sg34 -(dp538 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p539 -sg42 -g43 -sssg49 -(dp540 -g32 -S'NC_000002.11:g.50149352T>C' -p541 -sg34 -(dp542 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p543 -sg42 -g43 -sssg55 -(dp544 -g32 -S'NC_000002.12:g.49922214T>C' -p545 -sg34 -(dp546 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p547 -sg42 -g43 -sssssS'NM_001330084.1:c.4188A>G' -p548 -(dp549 -g3 -g4 -sg5 -(lp550 -S'RefSeqGene record not available' -p551 -asg8 -g4 -sg9 -(lp552 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha7, mRNA -p553 -sg13 -S'NRXN1' -p554 -sg15 -(dp555 -g17 -S'NP_001317013.1:p.(Pro1396=)' -p556 -sg19 -S'NP_001317013.1:p.(P1396=)' -p557 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330084.1:c.4188A>G' -p558 -sg27 -g4 -sg28 -(dp559 -g30 -(dp560 -g32 -S'NC_000002.11:g.50149352T>C' -p561 -sg34 -(dp562 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p563 -sg42 -g43 -sssg44 -(dp564 -g32 -S'NC_000002.12:g.49922214T>C' -p565 -sg34 -(dp566 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p567 -sg42 -g43 -sssg49 -(dp568 -g32 -S'NC_000002.11:g.50149352T>C' -p569 -sg34 -(dp570 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p571 -sg42 -g43 -sssg55 -(dp572 -g32 -S'NC_000002.12:g.49922214T>C' -p573 -sg34 -(dp574 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p575 -sg42 -g43 -sssssS'NM_004801.4:c.4164A>G' -p576 -(dp577 -g3 -g4 -sg5 -(lp578 -S'A more recent version of the selected reference sequence NM_004801.4 is available (NM_004801.5)' -p579 -aS'NM_004801.5:c.4164A>G MUST be fully validated prior to use in reports' -p580 -aS'select_variants=NM_004801.5:c.4164A>G' -p581 -aS'RefSeqGene record not available' -p582 -asg8 -g4 -sg9 -(lp583 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA -p584 -sg13 -S'NRXN1' -p585 -sg15 -(dp586 -g17 -S'NP_004792.1:p.(Pro1388=)' -p587 -sg19 -S'NP_004792.1:p.(P1388=)' -p588 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004801.4:c.4164A>G' -p589 -sg27 -g4 -sg28 -(dp590 -g30 -(dp591 -g32 -S'NC_000002.11:g.50149352T>C' -p592 -sg34 -(dp593 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p594 -sg42 -g43 -sssg44 -(dp595 -g32 -S'NC_000002.12:g.49922214T>C' -p596 -sg34 -(dp597 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p598 -sg42 -g43 -sssg49 -(dp599 -g32 -S'NC_000002.11:g.50149352T>C' -p600 -sg34 -(dp601 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p602 -sg42 -g43 -sssg55 -(dp603 -g32 -S'NC_000002.12:g.49922214T>C' -p604 -sg34 -(dp605 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p606 -sg42 -g43 -sssssS'NM_001330082.1:c.4221A>G' -p607 -(dp608 -g3 -g4 -sg5 -(lp609 -S'RefSeqGene record not available' -p610 -asg8 -g4 -sg9 -(lp611 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha5, mRNA -p612 -sg13 -S'NRXN1' -p613 -sg15 -(dp614 -g17 -S'NP_001317011.1:p.(Pro1407=)' -p615 -sg19 -S'NP_001317011.1:p.(P1407=)' -p616 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330082.1:c.4221A>G' -p617 -sg27 -g4 -sg28 -(dp618 -g30 -(dp619 -g32 -S'NC_000002.11:g.50149352T>C' -p620 -sg34 -(dp621 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p622 -sg42 -g43 -sssg44 -(dp623 -g32 -S'NC_000002.12:g.49922214T>C' -p624 -sg34 -(dp625 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p626 -sg42 -g43 -sssg49 -(dp627 -g32 -S'NC_000002.11:g.50149352T>C' -p628 -sg34 -(dp629 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p630 -sg42 -g43 -sssg55 -(dp631 -g32 -S'NC_000002.12:g.49922214T>C' -p632 -sg34 -(dp633 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p634 -sg42 -g43 -sssssS'flag' -p635 -S'gene_variant' -p636 -sS'NM_001330091.1:c.1140A>G' -p637 -(dp638 -g3 -g4 -sg5 -(lp639 -S'RefSeqGene record not available' -p640 -asg8 -g4 -sg9 -(lp641 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant beta2, mRNA -p642 -sg13 -S'NRXN1' -p643 -sg15 -(dp644 -g17 -S'NP_001317020.1:p.(Pro380=)' -p645 -sg19 -S'NP_001317020.1:p.(P380=)' -p646 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330091.1:c.1140A>G' -p647 -sg27 -g4 -sg28 -(dp648 -g30 -(dp649 -g32 -S'NC_000002.11:g.50149352T>C' -p650 -sg34 -(dp651 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p652 -sg42 -g43 -sssg44 -(dp653 -g32 -S'NC_000002.12:g.49922214T>C' -p654 -sg34 -(dp655 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p656 -sg42 -g43 -sssg49 -(dp657 -g32 -S'NC_000002.11:g.50149352T>C' -p658 -sg34 -(dp659 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p660 -sg42 -g43 -sssg55 -(dp661 -g32 -S'NC_000002.12:g.49922214T>C' -p662 -sg34 -(dp663 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p664 -sg42 -g43 -sssssS'NM_001320156.3:c.159A>G' -p665 -(dp666 -g3 -g4 -sg5 -(lp667 -S'RefSeqGene record not available' -p668 -asg8 -g4 -sg9 -(lp669 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma1, mRNA -p670 -sg13 -S'NRXN1' -p671 -sg15 -(dp672 -g17 -S'NP_001307085.1:p.(Pro53=)' -p673 -sg19 -S'NP_001307085.1:p.(P53=)' -p674 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001320156.3:c.159A>G' -p675 -sg27 -g4 -sg28 -(dp676 -g30 -(dp677 -g32 -S'NC_000002.11:g.50149352T>C' -p678 -sg34 -(dp679 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p680 -sg42 -g43 -sssg44 -(dp681 -g32 -S'NC_000002.12:g.49922214T>C' -p682 -sg34 -(dp683 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p684 -sg42 -g43 -sssg49 -(dp685 -g32 -S'NC_000002.11:g.50149352T>C' -p686 -sg34 -(dp687 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p688 -sg42 -g43 -sssg55 -(dp689 -g32 -S'NC_000002.12:g.49922214T>C' -p690 -sg34 -(dp691 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p692 -sg42 -g43 -sssssS'NM_001330087.1:c.4053A>G' -p693 -(dp694 -g3 -g4 -sg5 -(lp695 -S'RefSeqGene record not available' -p696 -asg8 -g4 -sg9 -(lp697 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha10, mRNA -p698 -sg13 -S'NRXN1' -p699 -sg15 -(dp700 -g17 -S'NP_001317016.1:p.(Pro1351=)' -p701 -sg19 -S'NP_001317016.1:p.(P1351=)' -p702 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330087.1:c.4053A>G' -p703 -sg27 -g4 -sg28 -(dp704 -g30 -(dp705 -g32 -S'NC_000002.11:g.50149352T>C' -p706 -sg34 -(dp707 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p708 -sg42 -g43 -sssg44 -(dp709 -g32 -S'NC_000002.12:g.49922214T>C' -p710 -sg34 -(dp711 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p712 -sg42 -g43 -sssg49 -(dp713 -g32 -S'NC_000002.11:g.50149352T>C' -p714 -sg34 -(dp715 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p716 -sg42 -g43 -sssg55 -(dp717 -g32 -S'NC_000002.12:g.49922214T>C' -p718 -sg34 -(dp719 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p720 -sg42 -g43 -sssssS'NM_001330097.1:c.1050A>G' -p721 -(dp722 -g3 -g4 -sg5 -(lp723 -S'RefSeqGene record not available' -p724 -asg8 -g4 -sg9 -(lp725 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant beta4, mRNA -p726 -sg13 -S'NRXN1' -p727 -sg15 -(dp728 -g17 -S'NP_001317026.1:p.(Pro350=)' -p729 -sg19 -S'NP_001317026.1:p.(P350=)' -p730 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330097.1:c.1050A>G' -p731 -sg27 -g4 -sg28 -(dp732 -g30 -(dp733 -g32 -S'NC_000002.11:g.50149352T>C' -p734 -sg34 -(dp735 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p736 -sg42 -g43 -sssg44 -(dp737 -g32 -S'NC_000002.12:g.49922214T>C' -p738 -sg34 -(dp739 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p740 -sg42 -g43 -sssg49 -(dp741 -g32 -S'NC_000002.11:g.50149352T>C' -p742 -sg34 -(dp743 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p744 -sg42 -g43 -sssg55 -(dp745 -g32 -S'NC_000002.12:g.49922214T>C' -p746 -sg34 -(dp747 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p748 -sg42 -g43 -sssssS'NM_004801.5:c.4164A>G' -p749 -(dp750 -g3 -g4 -sg5 -(lp751 -S'RefSeqGene record not available' -p752 -asg8 -g4 -sg9 -(lp753 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA -p754 -sg13 -S'NRXN1' -p755 -sg15 -(dp756 -g17 -S'NP_004792.1:p.(Pro1388=)' -p757 -sg19 -S'NP_004792.1:p.(P1388=)' -p758 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004801.5:c.4164A>G' -p759 -sg27 -g4 -sg28 -(dp760 -g30 -(dp761 -g32 -S'NC_000002.11:g.50149352T>C' -p762 -sg34 -(dp763 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p764 -sg42 -g43 -sssg44 -(dp765 -g32 -S'NC_000002.12:g.49922214T>C' -p766 -sg34 -(dp767 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p768 -sg42 -g43 -sssg49 -(dp769 -g32 -S'NC_000002.11:g.50149352T>C' -p770 -sg34 -(dp771 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p772 -sg42 -g43 -sssg55 -(dp773 -g32 -S'NC_000002.12:g.49922214T>C' -p774 -sg34 -(dp775 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p776 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant267.txt b/VariantValidator/testing/testOutputsMasterITS/variant267.txt deleted file mode 100644 index 3c212fa9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant267.txt +++ /dev/null @@ -1,1816 +0,0 @@ -(dp0 -S'NM_001330096.1:c.1201C>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha15, mRNA -p12 -sS'gene_symbol' -p13 -S'NRXN1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001317025.1:p.(Pro401Ser)' -p18 -sS'slr' -p19 -S'NP_001317025.1:p.(P401S)' -p20 -ssS'submitted_variant' -p21 -S'2-50847195-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001330096.1:c.1201C>T' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000002.11:g.50847195G>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -VG -p39 -sS'pos' -p40 -S'50847195' -p41 -sS'alt' -p42 -VA -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.50620057G>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000002.11:g.50847195G>A' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'50847195' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000002.12:g.50620057G>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p59 -sg42 -g43 -sssssS'NM_001330084.1:c.1246C>T' -p60 -(dp61 -g3 -g4 -sg5 -(lp62 -S'RefSeqGene record not available' -p63 -asg8 -g4 -sg9 -(lp64 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha7, mRNA -p65 -sg13 -S'NRXN1' -p66 -sg15 -(dp67 -g17 -S'NP_001317013.1:p.(Pro416Ser)' -p68 -sg19 -S'NP_001317013.1:p.(P416S)' -p69 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330084.1:c.1246C>T' -p70 -sg27 -g4 -sg28 -(dp71 -g30 -(dp72 -g32 -S'NC_000002.11:g.50847195G>A' -p73 -sg34 -(dp74 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p75 -sg42 -g43 -sssg44 -(dp76 -g32 -S'NC_000002.12:g.50620057G>A' -p77 -sg34 -(dp78 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p79 -sg42 -g43 -sssg49 -(dp80 -g32 -S'NC_000002.11:g.50847195G>A' -p81 -sg34 -(dp82 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p83 -sg42 -g43 -sssg55 -(dp84 -g32 -S'NC_000002.12:g.50620057G>A' -p85 -sg34 -(dp86 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p87 -sg42 -g43 -sssssS'NM_001330077.1:c.1261C>T' -p88 -(dp89 -g3 -g4 -sg5 -(lp90 -S'RefSeqGene record not available' -p91 -asg8 -g4 -sg9 -(lp92 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha3, mRNA -p93 -sg13 -S'NRXN1' -p94 -sg15 -(dp95 -g17 -S'NP_001317006.1:p.(Pro421Ser)' -p96 -sg19 -S'NP_001317006.1:p.(P421S)' -p97 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330077.1:c.1261C>T' -p98 -sg27 -g4 -sg28 -(dp99 -g30 -(dp100 -g32 -S'NC_000002.11:g.50847195G>A' -p101 -sg34 -(dp102 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p103 -sg42 -g43 -sssg44 -(dp104 -g32 -S'NC_000002.12:g.50620057G>A' -p105 -sg34 -(dp106 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p107 -sg42 -g43 -sssg49 -(dp108 -g32 -S'NC_000002.11:g.50847195G>A' -p109 -sg34 -(dp110 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p111 -sg42 -g43 -sssg55 -(dp112 -g32 -S'NC_000002.12:g.50620057G>A' -p113 -sg34 -(dp114 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p115 -sg42 -g43 -sssssS'NM_001330086.1:c.1285C>T' -p116 -(dp117 -g3 -g4 -sg5 -(lp118 -S'RefSeqGene record not available' -p119 -asg8 -g4 -sg9 -(lp120 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha9, mRNA -p121 -sg13 -S'NRXN1' -p122 -sg15 -(dp123 -g17 -S'NP_001317015.1:p.(Pro429Ser)' -p124 -sg19 -S'NP_001317015.1:p.(P429S)' -p125 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330086.1:c.1285C>T' -p126 -sg27 -g4 -sg28 -(dp127 -g30 -(dp128 -g32 -S'NC_000002.11:g.50847195G>A' -p129 -sg34 -(dp130 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p131 -sg42 -g43 -sssg44 -(dp132 -g32 -S'NC_000002.12:g.50620057G>A' -p133 -sg34 -(dp134 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p135 -sg42 -g43 -sssg49 -(dp136 -g32 -S'NC_000002.11:g.50847195G>A' -p137 -sg34 -(dp138 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p139 -sg42 -g43 -sssg55 -(dp140 -g32 -S'NC_000002.12:g.50620057G>A' -p141 -sg34 -(dp142 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p143 -sg42 -g43 -sssssS'NM_001330088.1:c.1231C>T' -p144 -(dp145 -g3 -g4 -sg5 -(lp146 -S'RefSeqGene record not available' -p147 -asg8 -g4 -sg9 -(lp148 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha11, mRNA -p149 -sg13 -S'NRXN1' -p150 -sg15 -(dp151 -g17 -S'NP_001317017.1:p.(Pro411Ser)' -p152 -sg19 -S'NP_001317017.1:p.(P411S)' -p153 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330088.1:c.1231C>T' -p154 -sg27 -g4 -sg28 -(dp155 -g30 -(dp156 -g32 -S'NC_000002.11:g.50847195G>A' -p157 -sg34 -(dp158 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p159 -sg42 -g43 -sssg44 -(dp160 -g32 -S'NC_000002.12:g.50620057G>A' -p161 -sg34 -(dp162 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p163 -sg42 -g43 -sssg49 -(dp164 -g32 -S'NC_000002.11:g.50847195G>A' -p165 -sg34 -(dp166 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p167 -sg42 -g43 -sssg55 -(dp168 -g32 -S'NC_000002.12:g.50620057G>A' -p169 -sg34 -(dp170 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p171 -sg42 -g43 -sssssS'NM_001330093.1:c.1282C>T' -p172 -(dp173 -g3 -g4 -sg5 -(lp174 -S'RefSeqGene record not available' -p175 -asg8 -g4 -sg9 -(lp176 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha12, mRNA -p177 -sg13 -S'NRXN1' -p178 -sg15 -(dp179 -g17 -S'NP_001317022.1:p.(Pro428Ser)' -p180 -sg19 -S'NP_001317022.1:p.(P428S)' -p181 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330093.1:c.1282C>T' -p182 -sg27 -g4 -sg28 -(dp183 -g30 -(dp184 -g32 -S'NC_000002.11:g.50847195G>A' -p185 -sg34 -(dp186 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p187 -sg42 -g43 -sssg44 -(dp188 -g32 -S'NC_000002.12:g.50620057G>A' -p189 -sg34 -(dp190 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p191 -sg42 -g43 -sssg49 -(dp192 -g32 -S'NC_000002.11:g.50847195G>A' -p193 -sg34 -(dp194 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p195 -sg42 -g43 -sssg55 -(dp196 -g32 -S'NC_000002.12:g.50620057G>A' -p197 -sg34 -(dp198 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p199 -sg42 -g43 -sssssS'NM_001330087.1:c.1201C>T' -p200 -(dp201 -g3 -g4 -sg5 -(lp202 -S'RefSeqGene record not available' -p203 -asg8 -g4 -sg9 -(lp204 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha10, mRNA -p205 -sg13 -S'NRXN1' -p206 -sg15 -(dp207 -g17 -S'NP_001317016.1:p.(Pro401Ser)' -p208 -sg19 -S'NP_001317016.1:p.(P401S)' -p209 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330087.1:c.1201C>T' -p210 -sg27 -g4 -sg28 -(dp211 -g30 -(dp212 -g32 -S'NC_000002.11:g.50847195G>A' -p213 -sg34 -(dp214 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p215 -sg42 -g43 -sssg44 -(dp216 -g32 -S'NC_000002.12:g.50620057G>A' -p217 -sg34 -(dp218 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p219 -sg42 -g43 -sssg49 -(dp220 -g32 -S'NC_000002.11:g.50847195G>A' -p221 -sg34 -(dp222 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p223 -sg42 -g43 -sssg55 -(dp224 -g32 -S'NC_000002.12:g.50620057G>A' -p225 -sg34 -(dp226 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p227 -sg42 -g43 -sssssS'NM_001330082.1:c.1261C>T' -p228 -(dp229 -g3 -g4 -sg5 -(lp230 -S'RefSeqGene record not available' -p231 -asg8 -g4 -sg9 -(lp232 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha5, mRNA -p233 -sg13 -S'NRXN1' -p234 -sg15 -(dp235 -g17 -S'NP_001317011.1:p.(Pro421Ser)' -p236 -sg19 -S'NP_001317011.1:p.(P421S)' -p237 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330082.1:c.1261C>T' -p238 -sg27 -g4 -sg28 -(dp239 -g30 -(dp240 -g32 -S'NC_000002.11:g.50847195G>A' -p241 -sg34 -(dp242 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p243 -sg42 -g43 -sssg44 -(dp244 -g32 -S'NC_000002.12:g.50620057G>A' -p245 -sg34 -(dp246 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p247 -sg42 -g43 -sssg49 -(dp248 -g32 -S'NC_000002.11:g.50847195G>A' -p249 -sg34 -(dp250 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p251 -sg42 -g43 -sssg55 -(dp252 -g32 -S'NC_000002.12:g.50620057G>A' -p253 -sg34 -(dp254 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p255 -sg42 -g43 -sssssS'NM_001330078.1:c.1285C>T' -p256 -(dp257 -g3 -g4 -sg5 -(lp258 -S'RefSeqGene record not available' -p259 -asg8 -g4 -sg9 -(lp260 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha4, mRNA -p261 -sg13 -S'NRXN1' -p262 -sg15 -(dp263 -g17 -S'NP_001317007.1:p.(Pro429Ser)' -p264 -sg19 -S'NP_001317007.1:p.(P429S)' -p265 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330078.1:c.1285C>T' -p266 -sg27 -g4 -sg28 -(dp267 -g30 -(dp268 -g32 -S'NC_000002.11:g.50847195G>A' -p269 -sg34 -(dp270 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p271 -sg42 -g43 -sssg44 -(dp272 -g32 -S'NC_000002.12:g.50620057G>A' -p273 -sg34 -(dp274 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p275 -sg42 -g43 -sssg49 -(dp276 -g32 -S'NC_000002.11:g.50847195G>A' -p277 -sg34 -(dp278 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p279 -sg42 -g43 -sssg55 -(dp280 -g32 -S'NC_000002.12:g.50620057G>A' -p281 -sg34 -(dp282 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p283 -sg42 -g43 -sssssS'NM_001330094.1:c.1273C>T' -p284 -(dp285 -g3 -g4 -sg5 -(lp286 -S'RefSeqGene record not available' -p287 -asg8 -g4 -sg9 -(lp288 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha13, mRNA -p289 -sg13 -S'NRXN1' -p290 -sg15 -(dp291 -g17 -S'NP_001317023.1:p.(Pro425Ser)' -p292 -sg19 -S'NP_001317023.1:p.(P425S)' -p293 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330094.1:c.1273C>T' -p294 -sg27 -g4 -sg28 -(dp295 -g30 -(dp296 -g32 -S'NC_000002.11:g.50847195G>A' -p297 -sg34 -(dp298 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p299 -sg42 -g43 -sssg44 -(dp300 -g32 -S'NC_000002.12:g.50620057G>A' -p301 -sg34 -(dp302 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p303 -sg42 -g43 -sssg49 -(dp304 -g32 -S'NC_000002.11:g.50847195G>A' -p305 -sg34 -(dp306 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p307 -sg42 -g43 -sssg55 -(dp308 -g32 -S'NC_000002.12:g.50620057G>A' -p309 -sg34 -(dp310 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p311 -sg42 -g43 -sssssS'flag' -p312 -S'gene_variant' -p313 -sS'NM_001135659.2:c.1405C>T' -p314 -(dp315 -g3 -g4 -sg5 -(lp316 -S'RefSeqGene record not available' -p317 -asg8 -g4 -sg9 -(lp318 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA -p319 -sg13 -S'NRXN1' -p320 -sg15 -(dp321 -g17 -S'NP_001129131.1:p.(Pro469Ser)' -p322 -sg19 -S'NP_001129131.1:p.(P469S)' -p323 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001135659.2:c.1405C>T' -p324 -sg27 -g4 -sg28 -(dp325 -g30 -(dp326 -g32 -S'NC_000002.11:g.50847195G>A' -p327 -sg34 -(dp328 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p329 -sg42 -g43 -sssg44 -(dp330 -g32 -S'NC_000002.12:g.50620057G>A' -p331 -sg34 -(dp332 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p333 -sg42 -g43 -sssg49 -(dp334 -g32 -S'NC_000002.11:g.50847195G>A' -p335 -sg34 -(dp336 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p337 -sg42 -g43 -sssg55 -(dp338 -g32 -S'NC_000002.12:g.50620057G>A' -p339 -sg34 -(dp340 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p341 -sg42 -g43 -sssssS'NM_001330083.1:c.1246C>T' -p342 -(dp343 -g3 -g4 -sg5 -(lp344 -S'RefSeqGene record not available' -p345 -asg8 -g4 -sg9 -(lp346 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha6, mRNA -p347 -sg13 -S'NRXN1' -p348 -sg15 -(dp349 -g17 -S'NP_001317012.1:p.(Pro416Ser)' -p350 -sg19 -S'NP_001317012.1:p.(P416S)' -p351 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330083.1:c.1246C>T' -p352 -sg27 -g4 -sg28 -(dp353 -g30 -(dp354 -g32 -S'NC_000002.11:g.50847195G>A' -p355 -sg34 -(dp356 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p357 -sg42 -g43 -sssg44 -(dp358 -g32 -S'NC_000002.12:g.50620057G>A' -p359 -sg34 -(dp360 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p361 -sg42 -g43 -sssg49 -(dp362 -g32 -S'NC_000002.11:g.50847195G>A' -p363 -sg34 -(dp364 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p365 -sg42 -g43 -sssg55 -(dp366 -g32 -S'NC_000002.12:g.50620057G>A' -p367 -sg34 -(dp368 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p369 -sg42 -g43 -sssssS'NM_004801.5:c.1285C>T' -p370 -(dp371 -g3 -g4 -sg5 -(lp372 -S'RefSeqGene record not available' -p373 -asg8 -g4 -sg9 -(lp374 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA -p375 -sg13 -S'NRXN1' -p376 -sg15 -(dp377 -g17 -S'NP_004792.1:p.(Pro429Ser)' -p378 -sg19 -S'NP_004792.1:p.(P429S)' -p379 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004801.5:c.1285C>T' -p380 -sg27 -g4 -sg28 -(dp381 -g30 -(dp382 -g32 -S'NC_000002.11:g.50847195G>A' -p383 -sg34 -(dp384 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p385 -sg42 -g43 -sssg44 -(dp386 -g32 -S'NC_000002.12:g.50620057G>A' -p387 -sg34 -(dp388 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p389 -sg42 -g43 -sssg49 -(dp390 -g32 -S'NC_000002.11:g.50847195G>A' -p391 -sg34 -(dp392 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p393 -sg42 -g43 -sssg55 -(dp394 -g32 -S'NC_000002.12:g.50620057G>A' -p395 -sg34 -(dp396 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p397 -sg42 -g43 -sssssS'NM_001330085.1:c.1285C>T' -p398 -(dp399 -g3 -g4 -sg5 -(lp400 -S'RefSeqGene record not available' -p401 -asg8 -g4 -sg9 -(lp402 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha8, mRNA -p403 -sg13 -S'NRXN1' -p404 -sg15 -(dp405 -g17 -S'NP_001317014.1:p.(Pro429Ser)' -p406 -sg19 -S'NP_001317014.1:p.(P429S)' -p407 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330085.1:c.1285C>T' -p408 -sg27 -g4 -sg28 -(dp409 -g30 -(dp410 -g32 -S'NC_000002.11:g.50847195G>A' -p411 -sg34 -(dp412 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p413 -sg42 -g43 -sssg44 -(dp414 -g32 -S'NC_000002.12:g.50620057G>A' -p415 -sg34 -(dp416 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p417 -sg42 -g43 -sssg49 -(dp418 -g32 -S'NC_000002.11:g.50847195G>A' -p419 -sg34 -(dp420 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p421 -sg42 -g43 -sssg55 -(dp422 -g32 -S'NC_000002.12:g.50620057G>A' -p423 -sg34 -(dp424 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p425 -sg42 -g43 -sssssS'NM_001330095.1:c.1261C>T' -p426 -(dp427 -g3 -g4 -sg5 -(lp428 -S'RefSeqGene record not available' -p429 -asg8 -g4 -sg9 -(lp430 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha14, mRNA -p431 -sg13 -S'NRXN1' -p432 -sg15 -(dp433 -g17 -S'NP_001317024.1:p.(Pro421Ser)' -p434 -sg19 -S'NP_001317024.1:p.(P421S)' -p435 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330095.1:c.1261C>T' -p436 -sg27 -g4 -sg28 -(dp437 -g30 -(dp438 -g32 -S'NC_000002.11:g.50847195G>A' -p439 -sg34 -(dp440 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p441 -sg42 -g43 -sssg44 -(dp442 -g32 -S'NC_000002.12:g.50620057G>A' -p443 -sg34 -(dp444 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p445 -sg42 -g43 -sssg49 -(dp446 -g32 -S'NC_000002.11:g.50847195G>A' -p447 -sg34 -(dp448 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p449 -sg42 -g43 -sssg55 -(dp450 -g32 -S'NC_000002.12:g.50620057G>A' -p451 -sg34 -(dp452 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p453 -sg42 -g43 -sssssS'NM_004801.4:c.1285C>T' -p454 -(dp455 -g3 -g4 -sg5 -(lp456 -S'A more recent version of the selected reference sequence NM_004801.4 is available (NM_004801.5)' -p457 -aS'NM_004801.5:c.1285C>T MUST be fully validated prior to use in reports' -p458 -aS'select_variants=NM_004801.5:c.1285C>T' -p459 -aS'RefSeqGene record not available' -p460 -asg8 -g4 -sg9 -(lp461 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA -p462 -sg13 -S'NRXN1' -p463 -sg15 -(dp464 -g17 -S'NP_004792.1:p.(Pro429Ser)' -p465 -sg19 -S'NP_004792.1:p.(P429S)' -p466 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004801.4:c.1285C>T' -p467 -sg27 -g4 -sg28 -(dp468 -g30 -(dp469 -g32 -S'NC_000002.11:g.50847195G>A' -p470 -sg34 -(dp471 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p472 -sg42 -g43 -sssg44 -(dp473 -g32 -S'NC_000002.12:g.50620057G>A' -p474 -sg34 -(dp475 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p476 -sg42 -g43 -sssg49 -(dp477 -g32 -S'NC_000002.11:g.50847195G>A' -p478 -sg34 -(dp479 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p480 -sg42 -g43 -sssg55 -(dp481 -g32 -S'NC_000002.12:g.50620057G>A' -p482 -sg34 -(dp483 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p484 -sg42 -g43 -sssssS'NM_001135659.1:c.1405C>T' -p485 -(dp486 -g3 -g4 -sg5 -(lp487 -S'A more recent version of the selected reference sequence NM_001135659.1 is available (NM_001135659.2)' -p488 -aS'NM_001135659.2:c.1405C>T MUST be fully validated prior to use in reports' -p489 -aS'select_variants=NM_001135659.2:c.1405C>T' -p490 -asg8 -g4 -sg9 -(lp491 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA -p492 -sg13 -S'NRXN1' -p493 -sg15 -(dp494 -g17 -S'NP_001129131.1:p.(Pro469Ser)' -p495 -sg19 -S'NP_001129131.1:p.(P469S)' -p496 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001135659.1:c.1405C>T' -p497 -sg27 -S'NG_011878.1:g.417480C>T' -p498 -sg28 -(dp499 -g30 -(dp500 -g32 -S'NC_000002.11:g.50847195G>A' -p501 -sg34 -(dp502 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p503 -sg42 -g43 -sssg44 -(dp504 -g32 -S'NC_000002.12:g.50620057G>A' -p505 -sg34 -(dp506 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p507 -sg42 -g43 -sssg49 -(dp508 -g32 -S'NC_000002.11:g.50847195G>A' -p509 -sg34 -(dp510 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p511 -sg42 -g43 -sssg55 -(dp512 -g32 -S'NC_000002.12:g.50620057G>A' -p513 -sg34 -(dp514 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p515 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant268.txt b/VariantValidator/testing/testOutputsMasterITS/variant268.txt deleted file mode 100644 index 3a782187..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant268.txt +++ /dev/null @@ -1,1497 +0,0 @@ -(dp0 -S'NM_001130976.1:c.3582C>G' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens dysferlin (DYSF), transcript variant 9, mRNA -p12 -sS'gene_symbol' -p13 -S'DYSF' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001124448.1:p.(Ile1194Met)' -p18 -sS'slr' -p19 -S'NP_001124448.1:p.(I1194M)' -p20 -ssS'submitted_variant' -p21 -S'2-71825797-C-G' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001130976.1:c.3582C>G' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000002.11:g.71825797C>G' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'C' -p39 -sS'pos' -p40 -S'71825797' -p41 -sS'alt' -p42 -S'G' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.71598667C>G' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000002.11:g.71825797C>G' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'71825797' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000002.12:g.71598667C>G' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p59 -sg42 -g43 -sssssS'NM_001130981.1:c.3675C>G' -p60 -(dp61 -g3 -g4 -sg5 -(lp62 -S'RefSeqGene record not available' -p63 -asg8 -g4 -sg9 -(lp64 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 14, mRNA -p65 -sg13 -S'DYSF' -p66 -sg15 -(dp67 -g17 -S'NP_001124453.1:p.(Ile1225Met)' -p68 -sg19 -S'NP_001124453.1:p.(I1225M)' -p69 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130981.1:c.3675C>G' -p70 -sg27 -g4 -sg28 -(dp71 -g30 -(dp72 -g32 -S'NC_000002.11:g.71825797C>G' -p73 -sg34 -(dp74 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p75 -sg42 -g43 -sssg44 -(dp76 -g32 -S'NC_000002.12:g.71598667C>G' -p77 -sg34 -(dp78 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p79 -sg42 -g43 -sssg49 -(dp80 -g32 -S'NC_000002.11:g.71825797C>G' -p81 -sg34 -(dp82 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p83 -sg42 -g43 -sssg55 -(dp84 -g32 -S'NC_000002.12:g.71598667C>G' -p85 -sg34 -(dp86 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p87 -sg42 -g43 -sssssS'NM_001130979.1:c.3717C>G' -p88 -(dp89 -g3 -g4 -sg5 -(lp90 -S'RefSeqGene record not available' -p91 -asg8 -g4 -sg9 -(lp92 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 12, mRNA -p93 -sg13 -S'DYSF' -p94 -sg15 -(dp95 -g17 -S'NP_001124451.1:p.(Ile1239Met)' -p96 -sg19 -S'NP_001124451.1:p.(I1239M)' -p97 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130979.1:c.3717C>G' -p98 -sg27 -g4 -sg28 -(dp99 -g30 -(dp100 -g32 -S'NC_000002.11:g.71825797C>G' -p101 -sg34 -(dp102 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p103 -sg42 -g43 -sssg44 -(dp104 -g32 -S'NC_000002.12:g.71598667C>G' -p105 -sg34 -(dp106 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p107 -sg42 -g43 -sssg49 -(dp108 -g32 -S'NC_000002.11:g.71825797C>G' -p109 -sg34 -(dp110 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p111 -sg42 -g43 -sssg55 -(dp112 -g32 -S'NC_000002.12:g.71598667C>G' -p113 -sg34 -(dp114 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p115 -sg42 -g43 -sssssS'NM_001130985.1:c.3678C>G' -p116 -(dp117 -g3 -g4 -sg5 -(lp118 -S'RefSeqGene record not available' -p119 -asg8 -g4 -sg9 -(lp120 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 4, mRNA -p121 -sg13 -S'DYSF' -p122 -sg15 -(dp123 -g17 -S'NP_001124457.1:p.(Ile1226Met)' -p124 -sg19 -S'NP_001124457.1:p.(I1226M)' -p125 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130985.1:c.3678C>G' -p126 -sg27 -g4 -sg28 -(dp127 -g30 -(dp128 -g32 -S'NC_000002.11:g.71825797C>G' -p129 -sg34 -(dp130 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p131 -sg42 -g43 -sssg44 -(dp132 -g32 -S'NC_000002.12:g.71598667C>G' -p133 -sg34 -(dp134 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p135 -sg42 -g43 -sssg49 -(dp136 -g32 -S'NC_000002.11:g.71825797C>G' -p137 -sg34 -(dp138 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p139 -sg42 -g43 -sssg55 -(dp140 -g32 -S'NC_000002.12:g.71598667C>G' -p141 -sg34 -(dp142 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p143 -sg42 -g43 -sssssS'NM_001130987.1:c.3678C>G' -p144 -(dp145 -g3 -S'LRG_845t2:c.3678C>G' -p146 -sg5 -(lp147 -S'RefSeqGene record not available' -p148 -asg8 -g4 -sg9 -(lp149 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 1, mRNA -p150 -sg13 -S'DYSF' -p151 -sg15 -(dp152 -g17 -S'NP_001124459.1:p.(Ile1226Met)' -p153 -sg19 -S'NP_001124459.1:p.(I1226M)' -p154 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130987.1:c.3678C>G' -p155 -sg27 -g4 -sg28 -(dp156 -g30 -(dp157 -g32 -S'NC_000002.11:g.71825797C>G' -p158 -sg34 -(dp159 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p160 -sg42 -g43 -sssg44 -(dp161 -g32 -S'NC_000002.12:g.71598667C>G' -p162 -sg34 -(dp163 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p164 -sg42 -g43 -sssg49 -(dp165 -g32 -S'NC_000002.11:g.71825797C>G' -p166 -sg34 -(dp167 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p168 -sg42 -g43 -sssg55 -(dp169 -g32 -S'NC_000002.12:g.71598667C>G' -p170 -sg34 -(dp171 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p172 -sg42 -g43 -sssssS'NM_001130983.1:c.3627C>G' -p173 -(dp174 -g3 -g4 -sg5 -(lp175 -S'RefSeqGene record not available' -p176 -asg8 -g4 -sg9 -(lp177 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 6, mRNA -p178 -sg13 -S'DYSF' -p179 -sg15 -(dp180 -g17 -S'NP_001124455.1:p.(Ile1209Met)' -p181 -sg19 -S'NP_001124455.1:p.(I1209M)' -p182 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130983.1:c.3627C>G' -p183 -sg27 -g4 -sg28 -(dp184 -g30 -(dp185 -g32 -S'NC_000002.11:g.71825797C>G' -p186 -sg34 -(dp187 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p188 -sg42 -g43 -sssg44 -(dp189 -g32 -S'NC_000002.12:g.71598667C>G' -p190 -sg34 -(dp191 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p192 -sg42 -g43 -sssg49 -(dp193 -g32 -S'NC_000002.11:g.71825797C>G' -p194 -sg34 -(dp195 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p196 -sg42 -g43 -sssg55 -(dp197 -g32 -S'NC_000002.12:g.71598667C>G' -p198 -sg34 -(dp199 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p200 -sg42 -g43 -sssssS'flag' -p201 -S'gene_variant' -p202 -sS'NM_001130980.1:c.3675C>G' -p203 -(dp204 -g3 -g4 -sg5 -(lp205 -S'RefSeqGene record not available' -p206 -asg8 -g4 -sg9 -(lp207 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 13, mRNA -p208 -sg13 -S'DYSF' -p209 -sg15 -(dp210 -g17 -S'NP_001124452.1:p.(Ile1225Met)' -p211 -sg19 -S'NP_001124452.1:p.(I1225M)' -p212 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130980.1:c.3675C>G' -p213 -sg27 -g4 -sg28 -(dp214 -g30 -(dp215 -g32 -S'NC_000002.11:g.71825797C>G' -p216 -sg34 -(dp217 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p218 -sg42 -g43 -sssg44 -(dp219 -g32 -S'NC_000002.12:g.71598667C>G' -p220 -sg34 -(dp221 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p222 -sg42 -g43 -sssg49 -(dp223 -g32 -S'NC_000002.11:g.71825797C>G' -p224 -sg34 -(dp225 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p226 -sg42 -g43 -sssg55 -(dp227 -g32 -S'NC_000002.12:g.71598667C>G' -p228 -sg34 -(dp229 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p230 -sg42 -g43 -sssssS'NM_003494.3:c.3624C>G' -p231 -(dp232 -g3 -S'LRG_845t1:c.3624C>G' -p233 -sg5 -(lp234 -S'RefSeqGene record not available' -p235 -asg8 -g4 -sg9 -(lp236 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 8, mRNA -p237 -sg13 -S'DYSF' -p238 -sg15 -(dp239 -g17 -S'NP_003485.1(LRG_845p1):p.(Ile1208Met)' -p240 -sg19 -S'NP_003485.1:p.(I1208M)' -p241 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_003494.3:c.3624C>G' -p242 -sg27 -g4 -sg28 -(dp243 -g30 -(dp244 -g32 -S'NC_000002.11:g.71825797C>G' -p245 -sg34 -(dp246 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p247 -sg42 -g43 -sssg44 -(dp248 -g32 -S'NC_000002.12:g.71598667C>G' -p249 -sg34 -(dp250 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p251 -sg42 -g43 -sssg49 -(dp252 -g32 -S'NC_000002.11:g.71825797C>G' -p253 -sg34 -(dp254 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p255 -sg42 -g43 -sssg55 -(dp256 -g32 -S'NC_000002.12:g.71598667C>G' -p257 -sg34 -(dp258 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p259 -sg42 -g43 -sssssS'NM_001130984.1:c.3585C>G' -p260 -(dp261 -g3 -g4 -sg5 -(lp262 -S'RefSeqGene record not available' -p263 -asg8 -g4 -sg9 -(lp264 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 5, mRNA -p265 -sg13 -S'DYSF' -p266 -sg15 -(dp267 -g17 -S'NP_001124456.1:p.(Ile1195Met)' -p268 -sg19 -S'NP_001124456.1:p.(I1195M)' -p269 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130984.1:c.3585C>G' -p270 -sg27 -g4 -sg28 -(dp271 -g30 -(dp272 -g32 -S'NC_000002.11:g.71825797C>G' -p273 -sg34 -(dp274 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p275 -sg42 -g43 -sssg44 -(dp276 -g32 -S'NC_000002.12:g.71598667C>G' -p277 -sg34 -(dp278 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p279 -sg42 -g43 -sssg49 -(dp280 -g32 -S'NC_000002.11:g.71825797C>G' -p281 -sg34 -(dp282 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p283 -sg42 -g43 -sssg55 -(dp284 -g32 -S'NC_000002.12:g.71598667C>G' -p285 -sg34 -(dp286 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p287 -sg42 -g43 -sssssS'NM_001130977.1:c.3582C>G' -p288 -(dp289 -g3 -g4 -sg5 -(lp290 -S'RefSeqGene record not available' -p291 -asg8 -g4 -sg9 -(lp292 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 10, mRNA -p293 -sg13 -S'DYSF' -p294 -sg15 -(dp295 -g17 -S'NP_001124449.1:p.(Ile1194Met)' -p296 -sg19 -S'NP_001124449.1:p.(I1194M)' -p297 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130977.1:c.3582C>G' -p298 -sg27 -g4 -sg28 -(dp299 -g30 -(dp300 -g32 -S'NC_000002.11:g.71825797C>G' -p301 -sg34 -(dp302 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p303 -sg42 -g43 -sssg44 -(dp304 -g32 -S'NC_000002.12:g.71598667C>G' -p305 -sg34 -(dp306 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p307 -sg42 -g43 -sssg49 -(dp308 -g32 -S'NC_000002.11:g.71825797C>G' -p309 -sg34 -(dp310 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p311 -sg42 -g43 -sssg55 -(dp312 -g32 -S'NC_000002.12:g.71598667C>G' -p313 -sg34 -(dp314 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p315 -sg42 -g43 -sssssS'NM_001130455.1:c.3627C>G' -p316 -(dp317 -g3 -g4 -sg5 -(lp318 -S'RefSeqGene record not available' -p319 -asg8 -g4 -sg9 -(lp320 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 2, mRNA -p321 -sg13 -S'DYSF' -p322 -sg15 -(dp323 -g17 -S'NP_001123927.1:p.(Ile1209Met)' -p324 -sg19 -S'NP_001123927.1:p.(I1209M)' -p325 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130455.1:c.3627C>G' -p326 -sg27 -g4 -sg28 -(dp327 -g30 -(dp328 -g32 -S'NC_000002.11:g.71825797C>G' -p329 -sg34 -(dp330 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p331 -sg42 -g43 -sssg44 -(dp332 -g32 -S'NC_000002.12:g.71598667C>G' -p333 -sg34 -(dp334 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p335 -sg42 -g43 -sssg49 -(dp336 -g32 -S'NC_000002.11:g.71825797C>G' -p337 -sg34 -(dp338 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p339 -sg42 -g43 -sssg55 -(dp340 -g32 -S'NC_000002.12:g.71598667C>G' -p341 -sg34 -(dp342 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p343 -sg42 -g43 -sssssS'NM_001130978.1:c.3624C>G' -p344 -(dp345 -g3 -g4 -sg5 -(lp346 -S'The current status of LRG_845 is pending therefore changes may be made to the LRG reference sequence' -p347 -asg8 -g4 -sg9 -(lp348 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 11, mRNA -p349 -sg13 -S'DYSF' -p350 -sg15 -(dp351 -g17 -S'NP_001124450.1:p.(Ile1208Met)' -p352 -sg19 -S'NP_001124450.1:p.(I1208M)' -p353 -ssg21 -g22 -sg23 -g4 -sg24 -S'LRG_845:g.150045C>G' -p354 -sg25 -S'NM_001130978.1:c.3624C>G' -p355 -sg27 -S'NG_008694.1:g.150045C>G' -p356 -sg28 -(dp357 -g30 -(dp358 -g32 -S'NC_000002.11:g.71825797C>G' -p359 -sg34 -(dp360 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p361 -sg42 -g43 -sssg44 -(dp362 -g32 -S'NC_000002.12:g.71598667C>G' -p363 -sg34 -(dp364 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p365 -sg42 -g43 -sssg49 -(dp366 -g32 -S'NC_000002.11:g.71825797C>G' -p367 -sg34 -(dp368 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p369 -sg42 -g43 -sssg55 -(dp370 -g32 -S'NC_000002.12:g.71598667C>G' -p371 -sg34 -(dp372 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p373 -sg42 -g43 -sssssS'NM_001130982.1:c.3720C>G' -p374 -(dp375 -g3 -g4 -sg5 -(lp376 -S'RefSeqGene record not available' -p377 -asg8 -g4 -sg9 -(lp378 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 7, mRNA -p379 -sg13 -S'DYSF' -p380 -sg15 -(dp381 -g17 -S'NP_001124454.1:p.(Ile1240Met)' -p382 -sg19 -S'NP_001124454.1:p.(I1240M)' -p383 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130982.1:c.3720C>G' -p384 -sg27 -g4 -sg28 -(dp385 -g30 -(dp386 -g32 -S'NC_000002.11:g.71825797C>G' -p387 -sg34 -(dp388 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p389 -sg42 -g43 -sssg44 -(dp390 -g32 -S'NC_000002.12:g.71598667C>G' -p391 -sg34 -(dp392 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p393 -sg42 -g43 -sssg49 -(dp394 -g32 -S'NC_000002.11:g.71825797C>G' -p395 -sg34 -(dp396 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p397 -sg42 -g43 -sssg55 -(dp398 -g32 -S'NC_000002.12:g.71598667C>G' -p399 -sg34 -(dp400 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p401 -sg42 -g43 -sssssS'NM_001130986.1:c.3585C>G' -p402 -(dp403 -g3 -g4 -sg5 -(lp404 -S'RefSeqGene record not available' -p405 -asg8 -g4 -sg9 -(lp406 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 3, mRNA -p407 -sg13 -S'DYSF' -p408 -sg15 -(dp409 -g17 -S'NP_001124458.1:p.(Ile1195Met)' -p410 -sg19 -S'NP_001124458.1:p.(I1195M)' -p411 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130986.1:c.3585C>G' -p412 -sg27 -g4 -sg28 -(dp413 -g30 -(dp414 -g32 -S'NC_000002.11:g.71825797C>G' -p415 -sg34 -(dp416 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p417 -sg42 -g43 -sssg44 -(dp418 -g32 -S'NC_000002.12:g.71598667C>G' -p419 -sg34 -(dp420 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p421 -sg42 -g43 -sssg49 -(dp422 -g32 -S'NC_000002.11:g.71825797C>G' -p423 -sg34 -(dp424 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p425 -sg42 -g43 -sssg55 -(dp426 -g32 -S'NC_000002.12:g.71598667C>G' -p427 -sg34 -(dp428 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p429 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant269.txt b/VariantValidator/testing/testOutputsMasterITS/variant269.txt deleted file mode 100644 index bfd45c86..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant269.txt +++ /dev/null @@ -1,348 +0,0 @@ -(dp0 -S'NM_021007.2:c.1718G>C' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'SCN2A' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_066287.2:p.(Ser573Thr)' -p18 -sS'slr' -p19 -S'NP_066287.2:p.(S573T)' -p20 -ssS'submitted_variant' -p21 -S'2-166179712-G-C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_021007.2:c.1718G>C' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000002.11:g.166179712G>C' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'G' -p39 -sS'pos' -p40 -S'166179712' -p41 -sS'alt' -p42 -S'C' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.165323202G>C' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'165323202' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000002.11:g.166179712G>C' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'166179712' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000002.12:g.165323202G>C' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'165323202' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -sS'NM_001040143.1:c.1718G>C' -p62 -(dp63 -g3 -g4 -sg5 -(lp64 -S'RefSeqGene record not available' -p65 -asg8 -g4 -sg9 -(lp66 -sg11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 3, mRNA -p67 -sg13 -S'SCN2A' -p68 -sg15 -(dp69 -g17 -S'NP_001035233.1:p.(Ser573Thr)' -p70 -sg19 -S'NP_001035233.1:p.(S573T)' -p71 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001040143.1:c.1718G>C' -p72 -sg27 -g4 -sg28 -(dp73 -g30 -(dp74 -g32 -S'NC_000002.11:g.166179712G>C' -p75 -sg34 -(dp76 -g36 -g37 -sg38 -g39 -sg40 -S'166179712' -p77 -sg42 -g43 -sssg44 -(dp78 -g32 -S'NC_000002.12:g.165323202G>C' -p79 -sg34 -(dp80 -g36 -g37 -sg38 -g39 -sg40 -S'165323202' -p81 -sg42 -g43 -sssg49 -(dp82 -g32 -S'NC_000002.11:g.166179712G>C' -p83 -sg34 -(dp84 -g36 -g53 -sg38 -g39 -sg40 -S'166179712' -p85 -sg42 -g43 -sssg55 -(dp86 -g32 -S'NC_000002.12:g.165323202G>C' -p87 -sg34 -(dp88 -g36 -g53 -sg38 -g39 -sg40 -S'165323202' -p89 -sg42 -g43 -sssssS'NM_001040142.1:c.1718G>C' -p90 -(dp91 -g3 -g4 -sg5 -(lp92 -sg8 -g4 -sg9 -(lp93 -sg11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 2, mRNA -p94 -sg13 -S'SCN2A' -p95 -sg15 -(dp96 -g17 -S'NP_001035232.1:p.(Ser573Thr)' -p97 -sg19 -S'NP_001035232.1:p.(S573T)' -p98 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001040142.1:c.1718G>C' -p99 -sg27 -S'NG_008143.1:g.88801G>C' -p100 -sg28 -(dp101 -g30 -(dp102 -g32 -S'NC_000002.11:g.166179712G>C' -p103 -sg34 -(dp104 -g36 -g37 -sg38 -g39 -sg40 -S'166179712' -p105 -sg42 -g43 -sssg44 -(dp106 -g32 -S'NC_000002.12:g.165323202G>C' -p107 -sg34 -(dp108 -g36 -g37 -sg38 -g39 -sg40 -S'165323202' -p109 -sg42 -g43 -sssg49 -(dp110 -g32 -S'NC_000002.11:g.166179712G>C' -p111 -sg34 -(dp112 -g36 -g53 -sg38 -g39 -sg40 -S'166179712' -p113 -sg42 -g43 -sssg55 -(dp114 -g32 -S'NC_000002.12:g.165323202G>C' -p115 -sg34 -(dp116 -g36 -g53 -sg38 -g39 -sg40 -S'165323202' -p117 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant27.txt b/VariantValidator/testing/testOutputsMasterITS/variant27.txt deleted file mode 100644 index b6dd8eed..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant27.txt +++ /dev/null @@ -1,60 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000014.8:g.36942492T=' -p7 -aS'Variant reference (T) does not agree with reference sequence (A)' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NR_138595.1:n.1071+1A=' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'flag' -p26 -S'warning' -p27 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant270.txt b/VariantValidator/testing/testOutputsMasterITS/variant270.txt deleted file mode 100644 index 78287383..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant270.txt +++ /dev/null @@ -1,348 +0,0 @@ -(dp0 -S'NM_021007.2:c.2026A>G' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'SCN2A' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_066287.2:p.(Thr676Ala)' -p18 -sS'slr' -p19 -S'NP_066287.2:p.(T676A)' -p20 -ssS'submitted_variant' -p21 -S'2-166183371-A-G' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_021007.2:c.2026A>G' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000002.11:g.166183371A>G' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'A' -p39 -sS'pos' -p40 -S'166183371' -p41 -sS'alt' -p42 -S'G' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.165326861A>G' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'165326861' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000002.11:g.166183371A>G' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'166183371' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000002.12:g.165326861A>G' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'165326861' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -sS'NM_001040143.1:c.2026A>G' -p62 -(dp63 -g3 -g4 -sg5 -(lp64 -S'RefSeqGene record not available' -p65 -asg8 -g4 -sg9 -(lp66 -sg11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 3, mRNA -p67 -sg13 -S'SCN2A' -p68 -sg15 -(dp69 -g17 -S'NP_001035233.1:p.(Thr676Ala)' -p70 -sg19 -S'NP_001035233.1:p.(T676A)' -p71 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001040143.1:c.2026A>G' -p72 -sg27 -g4 -sg28 -(dp73 -g30 -(dp74 -g32 -S'NC_000002.11:g.166183371A>G' -p75 -sg34 -(dp76 -g36 -g37 -sg38 -g39 -sg40 -S'166183371' -p77 -sg42 -g43 -sssg44 -(dp78 -g32 -S'NC_000002.12:g.165326861A>G' -p79 -sg34 -(dp80 -g36 -g37 -sg38 -g39 -sg40 -S'165326861' -p81 -sg42 -g43 -sssg49 -(dp82 -g32 -S'NC_000002.11:g.166183371A>G' -p83 -sg34 -(dp84 -g36 -g53 -sg38 -g39 -sg40 -S'166183371' -p85 -sg42 -g43 -sssg55 -(dp86 -g32 -S'NC_000002.12:g.165326861A>G' -p87 -sg34 -(dp88 -g36 -g53 -sg38 -g39 -sg40 -S'165326861' -p89 -sg42 -g43 -sssssS'NM_001040142.1:c.2026A>G' -p90 -(dp91 -g3 -g4 -sg5 -(lp92 -sg8 -g4 -sg9 -(lp93 -sg11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 2, mRNA -p94 -sg13 -S'SCN2A' -p95 -sg15 -(dp96 -g17 -S'NP_001035232.1:p.(Thr676Ala)' -p97 -sg19 -S'NP_001035232.1:p.(T676A)' -p98 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001040142.1:c.2026A>G' -p99 -sg27 -S'NG_008143.1:g.92460A>G' -p100 -sg28 -(dp101 -g30 -(dp102 -g32 -S'NC_000002.11:g.166183371A>G' -p103 -sg34 -(dp104 -g36 -g37 -sg38 -g39 -sg40 -S'166183371' -p105 -sg42 -g43 -sssg44 -(dp106 -g32 -S'NC_000002.12:g.165326861A>G' -p107 -sg34 -(dp108 -g36 -g37 -sg38 -g39 -sg40 -S'165326861' -p109 -sg42 -g43 -sssg49 -(dp110 -g32 -S'NC_000002.11:g.166183371A>G' -p111 -sg34 -(dp112 -g36 -g53 -sg38 -g39 -sg40 -S'166183371' -p113 -sg42 -g43 -sssg55 -(dp114 -g32 -S'NC_000002.12:g.165326861A>G' -p115 -sg34 -(dp116 -g36 -g53 -sg38 -g39 -sg40 -S'165326861' -p117 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant271.txt b/VariantValidator/testing/testOutputsMasterITS/variant271.txt deleted file mode 100644 index d2839e20..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant271.txt +++ /dev/null @@ -1,2299 +0,0 @@ -(dp0 -S'NM_001353951.1:c.233_242delinsGT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 8, mRNA -p13 -sS'gene_symbol' -p14 -S'SCN1A' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001340880.1:p.(Glu78GlyfsTer7)' -p19 -sS'slr' -p20 -S'NP_001340880.1:p.(E78Gfs*7)' -p21 -ssS'submitted_variant' -p22 -S'2-166929889-GTCCAGGTCCT-GAC' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001353951.1:c.233_242delinsGT' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr2' -p38 -sS'ref' -p39 -S'TCCAGGTCCT' -p40 -sS'pos' -p41 -S'166929890' -p42 -sS'alt' -p43 -VAC -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p49 -sg41 -S'166073380' -p50 -sg43 -VAC -p51 -sssS'GRCh37' -p52 -(dp53 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p54 -sg35 -(dp55 -g37 -S'2' -p56 -sg39 -S'TCCAGGTCCT' -p57 -sg41 -S'166929890' -p58 -sg43 -g44 -sssS'GRCh38' -p59 -(dp60 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p61 -sg35 -(dp62 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p63 -sg41 -S'166073380' -p64 -sg43 -g51 -sssssS'NM_001353958.1:c.233_242delinsGT' -p65 -(dp66 -g3 -g4 -sg5 -(lp67 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p68 -aS'RefSeqGene record not available' -p69 -asg9 -g4 -sg10 -(lp70 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 13, mRNA -p71 -sg14 -S'SCN1A' -p72 -sg16 -(dp73 -g18 -S'NP_001340887.1:p.(Glu78GlyfsTer7)' -p74 -sg20 -S'NP_001340887.1:p.(E78Gfs*7)' -p75 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353958.1:c.233_242delinsGT' -p76 -sg28 -g4 -sg29 -(dp77 -g31 -(dp78 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p79 -sg35 -(dp80 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p81 -sg41 -S'166929890' -p82 -sg43 -VAC -p83 -sssg45 -(dp84 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p87 -sg41 -S'166073380' -p88 -sg43 -VAC -p89 -sssg52 -(dp90 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p91 -sg35 -(dp92 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p93 -sg41 -S'166929890' -p94 -sg43 -g83 -sssg59 -(dp95 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p96 -sg35 -(dp97 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p98 -sg41 -S'166073380' -p99 -sg43 -g89 -sssssS'NM_001202435.1:c.233_242delinsGT' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p103 -aS'A more recent version of the selected reference sequence NM_001202435.1 is available (NM_001202435.2)' -p104 -aS'NM_001202435.2:c.233_242delinsGT MUST be fully validated prior to use in reports' -p105 -aS'select_variants=NM_001202435.2:c.233_242delinsGT' -p106 -aS'RefSeqGene record not available' -p107 -asg9 -g4 -sg10 -(lp108 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA -p109 -sg14 -S'SCN1A' -p110 -sg16 -(dp111 -g18 -S'NP_001189364.1:p.(Glu78GlyfsTer7)' -p112 -sg20 -S'NP_001189364.1:p.(E78Gfs*7)' -p113 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001202435.1:c.233_242delinsGT' -p114 -sg28 -g4 -sg29 -(dp115 -g31 -(dp116 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p117 -sg35 -(dp118 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p119 -sg41 -S'166929890' -p120 -sg43 -VAC -p121 -sssg45 -(dp122 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p123 -sg35 -(dp124 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p125 -sg41 -S'166073380' -p126 -sg43 -VAC -p127 -sssg52 -(dp128 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p129 -sg35 -(dp130 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p131 -sg41 -S'166929890' -p132 -sg43 -g121 -sssg59 -(dp133 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p134 -sg35 -(dp135 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p136 -sg41 -S'166073380' -p137 -sg43 -g127 -sssssS'NM_001353955.1:c.233_242delinsGT' -p138 -(dp139 -g3 -g4 -sg5 -(lp140 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p141 -aS'RefSeqGene record not available' -p142 -asg9 -g4 -sg10 -(lp143 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 11, mRNA -p144 -sg14 -S'SCN1A' -p145 -sg16 -(dp146 -g18 -S'NP_001340884.1:p.(Glu78GlyfsTer7)' -p147 -sg20 -S'NP_001340884.1:p.(E78Gfs*7)' -p148 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353955.1:c.233_242delinsGT' -p149 -sg28 -g4 -sg29 -(dp150 -g31 -(dp151 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p152 -sg35 -(dp153 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p154 -sg41 -S'166929890' -p155 -sg43 -VAC -p156 -sssg45 -(dp157 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p158 -sg35 -(dp159 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p160 -sg41 -S'166073380' -p161 -sg43 -VAC -p162 -sssg52 -(dp163 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p164 -sg35 -(dp165 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p166 -sg41 -S'166929890' -p167 -sg43 -g156 -sssg59 -(dp168 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p169 -sg35 -(dp170 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p171 -sg41 -S'166073380' -p172 -sg43 -g162 -sssssS'NM_001165963.2:c.233_242delinsGT' -p173 -(dp174 -g3 -g4 -sg5 -(lp175 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p176 -aS'RefSeqGene record not available' -p177 -asg9 -g4 -sg10 -(lp178 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA -p179 -sg14 -S'SCN1A' -p180 -sg16 -(dp181 -g18 -S'NP_001159435.1:p.(Glu78GlyfsTer7)' -p182 -sg20 -S'NP_001159435.1:p.(E78Gfs*7)' -p183 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165963.2:c.233_242delinsGT' -p184 -sg28 -g4 -sg29 -(dp185 -g31 -(dp186 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p187 -sg35 -(dp188 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p189 -sg41 -S'166929890' -p190 -sg43 -VAC -p191 -sssg45 -(dp192 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p193 -sg35 -(dp194 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p195 -sg41 -S'166073380' -p196 -sg43 -VAC -p197 -sssg52 -(dp198 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p199 -sg35 -(dp200 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p201 -sg41 -S'166929890' -p202 -sg43 -g191 -sssg59 -(dp203 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p204 -sg35 -(dp205 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p206 -sg41 -S'166073380' -p207 -sg43 -g197 -sssssS'NM_001353950.1:c.233_242delinsGT' -p208 -(dp209 -g3 -g4 -sg5 -(lp210 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p211 -aS'RefSeqGene record not available' -p212 -asg9 -g4 -sg10 -(lp213 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 7, mRNA -p214 -sg14 -S'SCN1A' -p215 -sg16 -(dp216 -g18 -S'NP_001340879.1:p.(Glu78GlyfsTer7)' -p217 -sg20 -S'NP_001340879.1:p.(E78Gfs*7)' -p218 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353950.1:c.233_242delinsGT' -p219 -sg28 -g4 -sg29 -(dp220 -g31 -(dp221 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p222 -sg35 -(dp223 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p224 -sg41 -S'166929890' -p225 -sg43 -VAC -p226 -sssg45 -(dp227 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p228 -sg35 -(dp229 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p230 -sg41 -S'166073380' -p231 -sg43 -VAC -p232 -sssg52 -(dp233 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p234 -sg35 -(dp235 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p236 -sg41 -S'166929890' -p237 -sg43 -g226 -sssg59 -(dp238 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p239 -sg35 -(dp240 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p241 -sg41 -S'166073380' -p242 -sg43 -g232 -sssssS'NM_001353952.1:c.233_242delinsGT' -p243 -(dp244 -g3 -g4 -sg5 -(lp245 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p246 -aS'RefSeqGene record not available' -p247 -asg9 -g4 -sg10 -(lp248 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 9, mRNA -p249 -sg14 -S'SCN1A' -p250 -sg16 -(dp251 -g18 -S'NP_001340881.1:p.(Glu78GlyfsTer7)' -p252 -sg20 -S'NP_001340881.1:p.(E78Gfs*7)' -p253 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353952.1:c.233_242delinsGT' -p254 -sg28 -g4 -sg29 -(dp255 -g31 -(dp256 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p257 -sg35 -(dp258 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p259 -sg41 -S'166929890' -p260 -sg43 -VAC -p261 -sssg45 -(dp262 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p263 -sg35 -(dp264 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p265 -sg41 -S'166073380' -p266 -sg43 -VAC -p267 -sssg52 -(dp268 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p269 -sg35 -(dp270 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p271 -sg41 -S'166929890' -p272 -sg43 -g261 -sssg59 -(dp273 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p274 -sg35 -(dp275 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p276 -sg41 -S'166073380' -p277 -sg43 -g267 -sssssS'NM_001165963.1:c.233_242delinsGT' -p278 -(dp279 -g3 -g4 -sg5 -(lp280 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p281 -aS'A more recent version of the selected reference sequence NM_001165963.1 is available (NM_001165963.2)' -p282 -aS'NM_001165963.2:c.233_242delinsGT MUST be fully validated prior to use in reports' -p283 -aS'select_variants=NM_001165963.2:c.233_242delinsGT' -p284 -aS'RefSeqGene record not available' -p285 -asg9 -g4 -sg10 -(lp286 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA -p287 -sg14 -S'SCN1A' -p288 -sg16 -(dp289 -g18 -S'NP_001159435.1:p.(Glu78GlyfsTer7)' -p290 -sg20 -S'NP_001159435.1:p.(E78Gfs*7)' -p291 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165963.1:c.233_242delinsGT' -p292 -sg28 -g4 -sg29 -(dp293 -g31 -(dp294 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p295 -sg35 -(dp296 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p297 -sg41 -S'166929890' -p298 -sg43 -VAC -p299 -sssg45 -(dp300 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p301 -sg35 -(dp302 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p303 -sg41 -S'166073380' -p304 -sg43 -VAC -p305 -sssg52 -(dp306 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p307 -sg35 -(dp308 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p309 -sg41 -S'166929890' -p310 -sg43 -g299 -sssg59 -(dp311 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p312 -sg35 -(dp313 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p314 -sg41 -S'166073380' -p315 -sg43 -g305 -sssssS'NM_001353957.1:c.233_242delinsGT' -p316 -(dp317 -g3 -g4 -sg5 -(lp318 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p319 -aS'RefSeqGene record not available' -p320 -asg9 -g4 -sg10 -(lp321 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 12, mRNA -p322 -sg14 -S'SCN1A' -p323 -sg16 -(dp324 -g18 -S'NP_001340886.1:p.(Glu78GlyfsTer7)' -p325 -sg20 -S'NP_001340886.1:p.(E78Gfs*7)' -p326 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353957.1:c.233_242delinsGT' -p327 -sg28 -g4 -sg29 -(dp328 -g31 -(dp329 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p330 -sg35 -(dp331 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p332 -sg41 -S'166929890' -p333 -sg43 -VAC -p334 -sssg45 -(dp335 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p336 -sg35 -(dp337 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p338 -sg41 -S'166073380' -p339 -sg43 -VAC -p340 -sssg52 -(dp341 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p342 -sg35 -(dp343 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p344 -sg41 -S'166929890' -p345 -sg43 -g334 -sssg59 -(dp346 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p347 -sg35 -(dp348 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p349 -sg41 -S'166073380' -p350 -sg43 -g340 -sssssS'flag' -p351 -S'gene_variant' -p352 -sS'NR_148667.1:n.638_647delinsGT' -p353 -(dp354 -g3 -g4 -sg5 -(lp355 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p356 -aS'RefSeqGene record not available' -p357 -asg9 -g4 -sg10 -(lp358 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 16, non-coding RNA -p359 -sg14 -S'SCN1A' -p360 -sg16 -(dp361 -g18 -S'Non-coding :n.' -p362 -sg20 -g4 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NR_148667.1:n.638_647delinsGT' -p363 -sg28 -g4 -sg29 -(dp364 -g31 -(dp365 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p366 -sg35 -(dp367 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p368 -sg41 -S'166929890' -p369 -sg43 -VAC -p370 -sssg45 -(dp371 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p372 -sg35 -(dp373 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p374 -sg41 -S'166073380' -p375 -sg43 -VAC -p376 -sssg52 -(dp377 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p378 -sg35 -(dp379 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p380 -sg41 -S'166929890' -p381 -sg43 -g370 -sssg59 -(dp382 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p383 -sg35 -(dp384 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p385 -sg41 -S'166073380' -p386 -sg43 -g376 -sssssS'NM_001353954.1:c.233_242delinsGT' -p387 -(dp388 -g3 -g4 -sg5 -(lp389 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p390 -aS'RefSeqGene record not available' -p391 -asg9 -g4 -sg10 -(lp392 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 10, mRNA -p393 -sg14 -S'SCN1A' -p394 -sg16 -(dp395 -g18 -S'NP_001340883.1:p.(Glu78GlyfsTer7)' -p396 -sg20 -S'NP_001340883.1:p.(E78Gfs*7)' -p397 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353954.1:c.233_242delinsGT' -p398 -sg28 -g4 -sg29 -(dp399 -g31 -(dp400 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p401 -sg35 -(dp402 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p403 -sg41 -S'166929890' -p404 -sg43 -VAC -p405 -sssg45 -(dp406 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p407 -sg35 -(dp408 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p409 -sg41 -S'166073380' -p410 -sg43 -VAC -p411 -sssg52 -(dp412 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p413 -sg35 -(dp414 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p415 -sg41 -S'166929890' -p416 -sg43 -g405 -sssg59 -(dp417 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p418 -sg35 -(dp419 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p420 -sg41 -S'166073380' -p421 -sg43 -g411 -sssssS'NM_006920.4:c.233_242delinsGT' -p422 -(dp423 -g3 -S'LRG_8t1:c.233_242delinsGT' -p424 -sg5 -(lp425 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p426 -aS'A more recent version of the selected reference sequence NM_006920.4 is available (NM_006920.5)' -p427 -aS'NM_006920.5:c.233_242delinsGT MUST be fully validated prior to use in reports' -p428 -aS'select_variants=NM_006920.5:c.233_242delinsGT' -p429 -asg9 -g4 -sg10 -(lp430 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA -p431 -sg14 -S'SCN1A' -p432 -sg16 -(dp433 -g18 -S'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)' -p434 -sg20 -S'NP_008851.3:p.(E78Gfs*7)' -p435 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_8:g.5251_5260delinsGT' -p436 -sg26 -S'NM_006920.4:c.233_242delinsGT' -p437 -sg28 -S'NG_011906.1:g.5251_5260delinsGT' -p438 -sg29 -(dp439 -g31 -(dp440 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p441 -sg35 -(dp442 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p443 -sg41 -S'166929890' -p444 -sg43 -VAC -p445 -sssg45 -(dp446 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p447 -sg35 -(dp448 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p449 -sg41 -S'166073380' -p450 -sg43 -VAC -p451 -sssg52 -(dp452 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p453 -sg35 -(dp454 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p455 -sg41 -S'166929890' -p456 -sg43 -g445 -sssg59 -(dp457 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p458 -sg35 -(dp459 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p460 -sg41 -S'166073380' -p461 -sg43 -g451 -sssssS'NM_001165964.1:c.233_242delinsGT' -p462 -(dp463 -g3 -g4 -sg5 -(lp464 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p465 -aS'A more recent version of the selected reference sequence NM_001165964.1 is available (NM_001165964.2)' -p466 -aS'NM_001165964.2:c.233_242delinsGT MUST be fully validated prior to use in reports' -p467 -aS'select_variants=NM_001165964.2:c.233_242delinsGT' -p468 -aS'RefSeqGene record not available' -p469 -asg9 -g4 -sg10 -(lp470 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA -p471 -sg14 -S'SCN1A' -p472 -sg16 -(dp473 -g18 -S'NP_001159436.1:p.(Glu78GlyfsTer7)' -p474 -sg20 -S'NP_001159436.1:p.(E78Gfs*7)' -p475 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165964.1:c.233_242delinsGT' -p476 -sg28 -g4 -sg29 -(dp477 -g31 -(dp478 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p479 -sg35 -(dp480 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p481 -sg41 -S'166929890' -p482 -sg43 -VAC -p483 -sssg45 -(dp484 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p485 -sg35 -(dp486 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p487 -sg41 -S'166073380' -p488 -sg43 -VAC -p489 -sssg52 -(dp490 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p491 -sg35 -(dp492 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p493 -sg41 -S'166929890' -p494 -sg43 -g483 -sssg59 -(dp495 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p496 -sg35 -(dp497 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p498 -sg41 -S'166073380' -p499 -sg43 -g489 -sssssS'NM_001353960.1:c.233_242delinsGT' -p500 -(dp501 -g3 -g4 -sg5 -(lp502 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p503 -aS'RefSeqGene record not available' -p504 -asg9 -g4 -sg10 -(lp505 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 14, mRNA -p506 -sg14 -S'SCN1A' -p507 -sg16 -(dp508 -g18 -S'NP_001340889.1:p.(Glu78GlyfsTer7)' -p509 -sg20 -S'NP_001340889.1:p.(E78Gfs*7)' -p510 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353960.1:c.233_242delinsGT' -p511 -sg28 -g4 -sg29 -(dp512 -g31 -(dp513 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p514 -sg35 -(dp515 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p516 -sg41 -S'166929890' -p517 -sg43 -VAC -p518 -sssg45 -(dp519 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p520 -sg35 -(dp521 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p522 -sg41 -S'166073380' -p523 -sg43 -VAC -p524 -sssg52 -(dp525 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p526 -sg35 -(dp527 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p528 -sg41 -S'166929890' -p529 -sg43 -g518 -sssg59 -(dp530 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p531 -sg35 -(dp532 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p533 -sg41 -S'166073380' -p534 -sg43 -g524 -sssssS'NM_001202435.2:c.233_242delinsGT' -p535 -(dp536 -g3 -g4 -sg5 -(lp537 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p538 -aS'RefSeqGene record not available' -p539 -asg9 -g4 -sg10 -(lp540 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA -p541 -sg14 -S'SCN1A' -p542 -sg16 -(dp543 -g18 -S'NP_001189364.1:p.(Glu78GlyfsTer7)' -p544 -sg20 -S'NP_001189364.1:p.(E78Gfs*7)' -p545 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001202435.2:c.233_242delinsGT' -p546 -sg28 -g4 -sg29 -(dp547 -g31 -(dp548 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p549 -sg35 -(dp550 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p551 -sg41 -S'166929890' -p552 -sg43 -VAC -p553 -sssg45 -(dp554 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p555 -sg35 -(dp556 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p557 -sg41 -S'166073380' -p558 -sg43 -VAC -p559 -sssg52 -(dp560 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p561 -sg35 -(dp562 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p563 -sg41 -S'166929890' -p564 -sg43 -g553 -sssg59 -(dp565 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p566 -sg35 -(dp567 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p568 -sg41 -S'166073380' -p569 -sg43 -g559 -sssssS'NM_006920.5:c.233_242delinsGT' -p570 -(dp571 -g3 -g4 -sg5 -(lp572 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p573 -aS'RefSeqGene record not available' -p574 -asg9 -g4 -sg10 -(lp575 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA -p576 -sg14 -S'SCN1A' -p577 -sg16 -(dp578 -g18 -S'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)' -p579 -sg20 -S'NP_008851.3:p.(E78Gfs*7)' -p580 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_006920.5:c.233_242delinsGT' -p581 -sg28 -g4 -sg29 -(dp582 -g31 -(dp583 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p584 -sg35 -(dp585 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p586 -sg41 -S'166929890' -p587 -sg43 -VAC -p588 -sssg45 -(dp589 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p590 -sg35 -(dp591 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p592 -sg41 -S'166073380' -p593 -sg43 -VAC -p594 -sssg52 -(dp595 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p596 -sg35 -(dp597 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p598 -sg41 -S'166929890' -p599 -sg43 -g588 -sssg59 -(dp600 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p601 -sg35 -(dp602 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p603 -sg41 -S'166073380' -p604 -sg43 -g594 -sssssS'NM_001165964.2:c.233_242delinsGT' -p605 -(dp606 -g3 -g4 -sg5 -(lp607 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p608 -aS'RefSeqGene record not available' -p609 -asg9 -g4 -sg10 -(lp610 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA -p611 -sg14 -S'SCN1A' -p612 -sg16 -(dp613 -g18 -S'NP_001159436.1:p.(Glu78GlyfsTer7)' -p614 -sg20 -S'NP_001159436.1:p.(E78Gfs*7)' -p615 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165964.2:c.233_242delinsGT' -p616 -sg28 -g4 -sg29 -(dp617 -g31 -(dp618 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p619 -sg35 -(dp620 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p621 -sg41 -S'166929890' -p622 -sg43 -VAC -p623 -sssg45 -(dp624 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p625 -sg35 -(dp626 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p627 -sg41 -S'166073380' -p628 -sg43 -VAC -p629 -sssg52 -(dp630 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p631 -sg35 -(dp632 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p633 -sg41 -S'166929890' -p634 -sg43 -g623 -sssg59 -(dp635 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p636 -sg35 -(dp637 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p638 -sg41 -S'166073380' -p639 -sg43 -g629 -sssssS'NM_001353961.1:c.-2193_-2184delinsGT' -p640 -(dp641 -g3 -g4 -sg5 -(lp642 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p643 -aS'RefSeqGene record not available' -p644 -asg9 -g4 -sg10 -(lp645 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 15, mRNA -p646 -sg14 -S'SCN1A' -p647 -sg16 -(dp648 -g18 -S'NP_001340890.1:p.?' -p649 -sg20 -S'NP_001340890.1:p.?' -p650 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353961.1:c.-2193_-2184delinsGT' -p651 -sg28 -g4 -sg29 -(dp652 -g31 -(dp653 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p654 -sg35 -(dp655 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p656 -sg41 -S'166929890' -p657 -sg43 -VAC -p658 -sssg45 -(dp659 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p660 -sg35 -(dp661 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p662 -sg41 -S'166073380' -p663 -sg43 -VAC -p664 -sssg52 -(dp665 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p666 -sg35 -(dp667 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p668 -sg41 -S'166929890' -p669 -sg43 -g658 -sssg59 -(dp670 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p671 -sg35 -(dp672 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p673 -sg41 -S'166073380' -p674 -sg43 -g664 -sssssS'NM_001353948.1:c.233_242delinsGT' -p675 -(dp676 -g3 -g4 -sg5 -(lp677 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p678 -aS'RefSeqGene record not available' -p679 -asg9 -g4 -sg10 -(lp680 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 5, mRNA -p681 -sg14 -S'SCN1A' -p682 -sg16 -(dp683 -g18 -S'NP_001340877.1:p.(Glu78GlyfsTer7)' -p684 -sg20 -S'NP_001340877.1:p.(E78Gfs*7)' -p685 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353948.1:c.233_242delinsGT' -p686 -sg28 -g4 -sg29 -(dp687 -g31 -(dp688 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p689 -sg35 -(dp690 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p691 -sg41 -S'166929890' -p692 -sg43 -VAC -p693 -sssg45 -(dp694 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p695 -sg35 -(dp696 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p697 -sg41 -S'166073380' -p698 -sg43 -VAC -p699 -sssg52 -(dp700 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p701 -sg35 -(dp702 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p703 -sg41 -S'166929890' -p704 -sg43 -g693 -sssg59 -(dp705 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p706 -sg35 -(dp707 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p708 -sg41 -S'166073380' -p709 -sg43 -g699 -sssssS'NM_001353949.1:c.233_242delinsGT' -p710 -(dp711 -g3 -g4 -sg5 -(lp712 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p713 -aS'RefSeqGene record not available' -p714 -asg9 -g4 -sg10 -(lp715 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 6, mRNA -p716 -sg14 -S'SCN1A' -p717 -sg16 -(dp718 -g18 -S'NP_001340878.1:p.(Glu78GlyfsTer7)' -p719 -sg20 -S'NP_001340878.1:p.(E78Gfs*7)' -p720 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353949.1:c.233_242delinsGT' -p721 -sg28 -g4 -sg29 -(dp722 -g31 -(dp723 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p724 -sg35 -(dp725 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p726 -sg41 -S'166929890' -p727 -sg43 -VAC -p728 -sssg45 -(dp729 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p730 -sg35 -(dp731 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p732 -sg41 -S'166073380' -p733 -sg43 -VAC -p734 -sssg52 -(dp735 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p736 -sg35 -(dp737 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p738 -sg41 -S'166929890' -p739 -sg43 -g728 -sssg59 -(dp740 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p741 -sg35 -(dp742 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p743 -sg41 -S'166073380' -p744 -sg43 -g734 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant272.txt b/VariantValidator/testing/testOutputsMasterITS/variant272.txt deleted file mode 100644 index 92ba95aa..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant272.txt +++ /dev/null @@ -1,2260 +0,0 @@ -(dp0 -S'NM_001353950.1:c.233_240del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 7, mRNA -p13 -sS'gene_symbol' -p14 -S'SCN1A' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001340879.1:p.(Glu78GlyfsTer7)' -p19 -sS'slr' -p20 -S'NP_001340879.1:p.(E78Gfs*7)' -p21 -ssS'submitted_variant' -p22 -S'2-166929891-CCAGGTCCT-C' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001353950.1:c.233_240del' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000002.11:g.166929892_166929899del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr2' -p38 -sS'ref' -p39 -S'CCAGGTCCT' -p40 -sS'pos' -p41 -S'166929891' -p42 -sS'alt' -p43 -S'C' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000002.12:g.166073382_166073389del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'CCAGGTCCT' -p49 -sg41 -S'166073381' -p50 -sg43 -g44 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000002.11:g.166929892_166929899del' -p53 -sg35 -(dp54 -g37 -S'2' -p55 -sg39 -S'CCAGGTCCT' -p56 -sg41 -S'166929891' -p57 -sg43 -g44 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000002.12:g.166073382_166073389del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'CCAGGTCCT' -p62 -sg41 -S'166073381' -p63 -sg43 -g44 -sssssS'NM_001353960.1:c.233_240del' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p67 -aS'RefSeqGene record not available' -p68 -asg9 -g4 -sg10 -(lp69 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 14, mRNA -p70 -sg14 -S'SCN1A' -p71 -sg16 -(dp72 -g18 -S'NP_001340889.1:p.(Glu78GlyfsTer7)' -p73 -sg20 -S'NP_001340889.1:p.(E78Gfs*7)' -p74 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353960.1:c.233_240del' -p75 -sg28 -g4 -sg29 -(dp76 -g31 -(dp77 -g33 -S'NC_000002.11:g.166929892_166929899del' -p78 -sg35 -(dp79 -g37 -g38 -sg39 -S'CCAGGTCCT' -p80 -sg41 -S'166929891' -p81 -sg43 -g44 -sssg45 -(dp82 -g33 -S'NC_000002.12:g.166073382_166073389del' -p83 -sg35 -(dp84 -g37 -g38 -sg39 -S'CCAGGTCCT' -p85 -sg41 -S'166073381' -p86 -sg43 -g44 -sssg51 -(dp87 -g33 -S'NC_000002.11:g.166929892_166929899del' -p88 -sg35 -(dp89 -g37 -g55 -sg39 -S'CCAGGTCCT' -p90 -sg41 -S'166929891' -p91 -sg43 -g44 -sssg58 -(dp92 -g33 -S'NC_000002.12:g.166073382_166073389del' -p93 -sg35 -(dp94 -g37 -g55 -sg39 -S'CCAGGTCCT' -p95 -sg41 -S'166073381' -p96 -sg43 -g44 -sssssS'NM_006920.5:c.233_240del' -p97 -(dp98 -g3 -g4 -sg5 -(lp99 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p100 -aS'RefSeqGene record not available' -p101 -asg9 -g4 -sg10 -(lp102 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA -p103 -sg14 -S'SCN1A' -p104 -sg16 -(dp105 -g18 -S'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)' -p106 -sg20 -S'NP_008851.3:p.(E78Gfs*7)' -p107 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_006920.5:c.233_240del' -p108 -sg28 -g4 -sg29 -(dp109 -g31 -(dp110 -g33 -S'NC_000002.11:g.166929892_166929899del' -p111 -sg35 -(dp112 -g37 -g38 -sg39 -S'CCAGGTCCT' -p113 -sg41 -S'166929891' -p114 -sg43 -g44 -sssg45 -(dp115 -g33 -S'NC_000002.12:g.166073382_166073389del' -p116 -sg35 -(dp117 -g37 -g38 -sg39 -S'CCAGGTCCT' -p118 -sg41 -S'166073381' -p119 -sg43 -g44 -sssg51 -(dp120 -g33 -S'NC_000002.11:g.166929892_166929899del' -p121 -sg35 -(dp122 -g37 -g55 -sg39 -S'CCAGGTCCT' -p123 -sg41 -S'166929891' -p124 -sg43 -g44 -sssg58 -(dp125 -g33 -S'NC_000002.12:g.166073382_166073389del' -p126 -sg35 -(dp127 -g37 -g55 -sg39 -S'CCAGGTCCT' -p128 -sg41 -S'166073381' -p129 -sg43 -g44 -sssssS'NM_006920.4:c.233_240del' -p130 -(dp131 -g3 -S'LRG_8t1:c.233_240del' -p132 -sg5 -(lp133 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p134 -aS'A more recent version of the selected reference sequence NM_006920.4 is available (NM_006920.5)' -p135 -aS'NM_006920.5:c.233_240del MUST be fully validated prior to use in reports' -p136 -aS'select_variants=NM_006920.5:c.233_240del' -p137 -asg9 -g4 -sg10 -(lp138 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA -p139 -sg14 -S'SCN1A' -p140 -sg16 -(dp141 -g18 -S'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)' -p142 -sg20 -S'NP_008851.3:p.(E78Gfs*7)' -p143 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_8:g.5251_5258del' -p144 -sg26 -S'NM_006920.4:c.233_240del' -p145 -sg28 -S'NG_011906.1:g.5251_5258del' -p146 -sg29 -(dp147 -g31 -(dp148 -g33 -S'NC_000002.11:g.166929892_166929899del' -p149 -sg35 -(dp150 -g37 -g38 -sg39 -S'CCAGGTCCT' -p151 -sg41 -S'166929891' -p152 -sg43 -g44 -sssg45 -(dp153 -g33 -S'NC_000002.12:g.166073382_166073389del' -p154 -sg35 -(dp155 -g37 -g38 -sg39 -S'CCAGGTCCT' -p156 -sg41 -S'166073381' -p157 -sg43 -g44 -sssg51 -(dp158 -g33 -S'NC_000002.11:g.166929892_166929899del' -p159 -sg35 -(dp160 -g37 -g55 -sg39 -S'CCAGGTCCT' -p161 -sg41 -S'166929891' -p162 -sg43 -g44 -sssg58 -(dp163 -g33 -S'NC_000002.12:g.166073382_166073389del' -p164 -sg35 -(dp165 -g37 -g55 -sg39 -S'CCAGGTCCT' -p166 -sg41 -S'166073381' -p167 -sg43 -g44 -sssssS'NM_001353955.1:c.233_240del' -p168 -(dp169 -g3 -g4 -sg5 -(lp170 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p171 -aS'RefSeqGene record not available' -p172 -asg9 -g4 -sg10 -(lp173 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 11, mRNA -p174 -sg14 -S'SCN1A' -p175 -sg16 -(dp176 -g18 -S'NP_001340884.1:p.(Glu78GlyfsTer7)' -p177 -sg20 -S'NP_001340884.1:p.(E78Gfs*7)' -p178 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353955.1:c.233_240del' -p179 -sg28 -g4 -sg29 -(dp180 -g31 -(dp181 -g33 -S'NC_000002.11:g.166929892_166929899del' -p182 -sg35 -(dp183 -g37 -g38 -sg39 -S'CCAGGTCCT' -p184 -sg41 -S'166929891' -p185 -sg43 -g44 -sssg45 -(dp186 -g33 -S'NC_000002.12:g.166073382_166073389del' -p187 -sg35 -(dp188 -g37 -g38 -sg39 -S'CCAGGTCCT' -p189 -sg41 -S'166073381' -p190 -sg43 -g44 -sssg51 -(dp191 -g33 -S'NC_000002.11:g.166929892_166929899del' -p192 -sg35 -(dp193 -g37 -g55 -sg39 -S'CCAGGTCCT' -p194 -sg41 -S'166929891' -p195 -sg43 -g44 -sssg58 -(dp196 -g33 -S'NC_000002.12:g.166073382_166073389del' -p197 -sg35 -(dp198 -g37 -g55 -sg39 -S'CCAGGTCCT' -p199 -sg41 -S'166073381' -p200 -sg43 -g44 -sssssS'NM_001165964.2:c.233_240del' -p201 -(dp202 -g3 -g4 -sg5 -(lp203 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p204 -aS'RefSeqGene record not available' -p205 -asg9 -g4 -sg10 -(lp206 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA -p207 -sg14 -S'SCN1A' -p208 -sg16 -(dp209 -g18 -S'NP_001159436.1:p.(Glu78GlyfsTer7)' -p210 -sg20 -S'NP_001159436.1:p.(E78Gfs*7)' -p211 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165964.2:c.233_240del' -p212 -sg28 -g4 -sg29 -(dp213 -g31 -(dp214 -g33 -S'NC_000002.11:g.166929892_166929899del' -p215 -sg35 -(dp216 -g37 -g38 -sg39 -S'CCAGGTCCT' -p217 -sg41 -S'166929891' -p218 -sg43 -g44 -sssg45 -(dp219 -g33 -S'NC_000002.12:g.166073382_166073389del' -p220 -sg35 -(dp221 -g37 -g38 -sg39 -S'CCAGGTCCT' -p222 -sg41 -S'166073381' -p223 -sg43 -g44 -sssg51 -(dp224 -g33 -S'NC_000002.11:g.166929892_166929899del' -p225 -sg35 -(dp226 -g37 -g55 -sg39 -S'CCAGGTCCT' -p227 -sg41 -S'166929891' -p228 -sg43 -g44 -sssg58 -(dp229 -g33 -S'NC_000002.12:g.166073382_166073389del' -p230 -sg35 -(dp231 -g37 -g55 -sg39 -S'CCAGGTCCT' -p232 -sg41 -S'166073381' -p233 -sg43 -g44 -sssssS'NM_001165963.2:c.233_240del' -p234 -(dp235 -g3 -g4 -sg5 -(lp236 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p237 -aS'RefSeqGene record not available' -p238 -asg9 -g4 -sg10 -(lp239 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA -p240 -sg14 -S'SCN1A' -p241 -sg16 -(dp242 -g18 -S'NP_001159435.1:p.(Glu78GlyfsTer7)' -p243 -sg20 -S'NP_001159435.1:p.(E78Gfs*7)' -p244 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165963.2:c.233_240del' -p245 -sg28 -g4 -sg29 -(dp246 -g31 -(dp247 -g33 -S'NC_000002.11:g.166929892_166929899del' -p248 -sg35 -(dp249 -g37 -g38 -sg39 -S'CCAGGTCCT' -p250 -sg41 -S'166929891' -p251 -sg43 -g44 -sssg45 -(dp252 -g33 -S'NC_000002.12:g.166073382_166073389del' -p253 -sg35 -(dp254 -g37 -g38 -sg39 -S'CCAGGTCCT' -p255 -sg41 -S'166073381' -p256 -sg43 -g44 -sssg51 -(dp257 -g33 -S'NC_000002.11:g.166929892_166929899del' -p258 -sg35 -(dp259 -g37 -g55 -sg39 -S'CCAGGTCCT' -p260 -sg41 -S'166929891' -p261 -sg43 -g44 -sssg58 -(dp262 -g33 -S'NC_000002.12:g.166073382_166073389del' -p263 -sg35 -(dp264 -g37 -g55 -sg39 -S'CCAGGTCCT' -p265 -sg41 -S'166073381' -p266 -sg43 -g44 -sssssS'NM_001353957.1:c.233_240del' -p267 -(dp268 -g3 -g4 -sg5 -(lp269 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p270 -aS'RefSeqGene record not available' -p271 -asg9 -g4 -sg10 -(lp272 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 12, mRNA -p273 -sg14 -S'SCN1A' -p274 -sg16 -(dp275 -g18 -S'NP_001340886.1:p.(Glu78GlyfsTer7)' -p276 -sg20 -S'NP_001340886.1:p.(E78Gfs*7)' -p277 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353957.1:c.233_240del' -p278 -sg28 -g4 -sg29 -(dp279 -g31 -(dp280 -g33 -S'NC_000002.11:g.166929892_166929899del' -p281 -sg35 -(dp282 -g37 -g38 -sg39 -S'CCAGGTCCT' -p283 -sg41 -S'166929891' -p284 -sg43 -g44 -sssg45 -(dp285 -g33 -S'NC_000002.12:g.166073382_166073389del' -p286 -sg35 -(dp287 -g37 -g38 -sg39 -S'CCAGGTCCT' -p288 -sg41 -S'166073381' -p289 -sg43 -g44 -sssg51 -(dp290 -g33 -S'NC_000002.11:g.166929892_166929899del' -p291 -sg35 -(dp292 -g37 -g55 -sg39 -S'CCAGGTCCT' -p293 -sg41 -S'166929891' -p294 -sg43 -g44 -sssg58 -(dp295 -g33 -S'NC_000002.12:g.166073382_166073389del' -p296 -sg35 -(dp297 -g37 -g55 -sg39 -S'CCAGGTCCT' -p298 -sg41 -S'166073381' -p299 -sg43 -g44 -sssssS'NM_001353951.1:c.233_240del' -p300 -(dp301 -g3 -g4 -sg5 -(lp302 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p303 -aS'RefSeqGene record not available' -p304 -asg9 -g4 -sg10 -(lp305 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 8, mRNA -p306 -sg14 -S'SCN1A' -p307 -sg16 -(dp308 -g18 -S'NP_001340880.1:p.(Glu78GlyfsTer7)' -p309 -sg20 -S'NP_001340880.1:p.(E78Gfs*7)' -p310 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353951.1:c.233_240del' -p311 -sg28 -g4 -sg29 -(dp312 -g31 -(dp313 -g33 -S'NC_000002.11:g.166929892_166929899del' -p314 -sg35 -(dp315 -g37 -g38 -sg39 -S'CCAGGTCCT' -p316 -sg41 -S'166929891' -p317 -sg43 -g44 -sssg45 -(dp318 -g33 -S'NC_000002.12:g.166073382_166073389del' -p319 -sg35 -(dp320 -g37 -g38 -sg39 -S'CCAGGTCCT' -p321 -sg41 -S'166073381' -p322 -sg43 -g44 -sssg51 -(dp323 -g33 -S'NC_000002.11:g.166929892_166929899del' -p324 -sg35 -(dp325 -g37 -g55 -sg39 -S'CCAGGTCCT' -p326 -sg41 -S'166929891' -p327 -sg43 -g44 -sssg58 -(dp328 -g33 -S'NC_000002.12:g.166073382_166073389del' -p329 -sg35 -(dp330 -g37 -g55 -sg39 -S'CCAGGTCCT' -p331 -sg41 -S'166073381' -p332 -sg43 -g44 -sssssS'NM_001353952.1:c.233_240del' -p333 -(dp334 -g3 -g4 -sg5 -(lp335 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p336 -aS'RefSeqGene record not available' -p337 -asg9 -g4 -sg10 -(lp338 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 9, mRNA -p339 -sg14 -S'SCN1A' -p340 -sg16 -(dp341 -g18 -S'NP_001340881.1:p.(Glu78GlyfsTer7)' -p342 -sg20 -S'NP_001340881.1:p.(E78Gfs*7)' -p343 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353952.1:c.233_240del' -p344 -sg28 -g4 -sg29 -(dp345 -g31 -(dp346 -g33 -S'NC_000002.11:g.166929892_166929899del' -p347 -sg35 -(dp348 -g37 -g38 -sg39 -S'CCAGGTCCT' -p349 -sg41 -S'166929891' -p350 -sg43 -g44 -sssg45 -(dp351 -g33 -S'NC_000002.12:g.166073382_166073389del' -p352 -sg35 -(dp353 -g37 -g38 -sg39 -S'CCAGGTCCT' -p354 -sg41 -S'166073381' -p355 -sg43 -g44 -sssg51 -(dp356 -g33 -S'NC_000002.11:g.166929892_166929899del' -p357 -sg35 -(dp358 -g37 -g55 -sg39 -S'CCAGGTCCT' -p359 -sg41 -S'166929891' -p360 -sg43 -g44 -sssg58 -(dp361 -g33 -S'NC_000002.12:g.166073382_166073389del' -p362 -sg35 -(dp363 -g37 -g55 -sg39 -S'CCAGGTCCT' -p364 -sg41 -S'166073381' -p365 -sg43 -g44 -sssssS'NM_001202435.1:c.233_240del' -p366 -(dp367 -g3 -g4 -sg5 -(lp368 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p369 -aS'A more recent version of the selected reference sequence NM_001202435.1 is available (NM_001202435.2)' -p370 -aS'NM_001202435.2:c.233_240del MUST be fully validated prior to use in reports' -p371 -aS'select_variants=NM_001202435.2:c.233_240del' -p372 -aS'RefSeqGene record not available' -p373 -asg9 -g4 -sg10 -(lp374 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA -p375 -sg14 -S'SCN1A' -p376 -sg16 -(dp377 -g18 -S'NP_001189364.1:p.(Glu78GlyfsTer7)' -p378 -sg20 -S'NP_001189364.1:p.(E78Gfs*7)' -p379 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001202435.1:c.233_240del' -p380 -sg28 -g4 -sg29 -(dp381 -g31 -(dp382 -g33 -S'NC_000002.11:g.166929892_166929899del' -p383 -sg35 -(dp384 -g37 -g38 -sg39 -S'CCAGGTCCT' -p385 -sg41 -S'166929891' -p386 -sg43 -g44 -sssg45 -(dp387 -g33 -S'NC_000002.12:g.166073382_166073389del' -p388 -sg35 -(dp389 -g37 -g38 -sg39 -S'CCAGGTCCT' -p390 -sg41 -S'166073381' -p391 -sg43 -g44 -sssg51 -(dp392 -g33 -S'NC_000002.11:g.166929892_166929899del' -p393 -sg35 -(dp394 -g37 -g55 -sg39 -S'CCAGGTCCT' -p395 -sg41 -S'166929891' -p396 -sg43 -g44 -sssg58 -(dp397 -g33 -S'NC_000002.12:g.166073382_166073389del' -p398 -sg35 -(dp399 -g37 -g55 -sg39 -S'CCAGGTCCT' -p400 -sg41 -S'166073381' -p401 -sg43 -g44 -sssssS'NM_001353954.1:c.233_240del' -p402 -(dp403 -g3 -g4 -sg5 -(lp404 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p405 -aS'RefSeqGene record not available' -p406 -asg9 -g4 -sg10 -(lp407 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 10, mRNA -p408 -sg14 -S'SCN1A' -p409 -sg16 -(dp410 -g18 -S'NP_001340883.1:p.(Glu78GlyfsTer7)' -p411 -sg20 -S'NP_001340883.1:p.(E78Gfs*7)' -p412 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353954.1:c.233_240del' -p413 -sg28 -g4 -sg29 -(dp414 -g31 -(dp415 -g33 -S'NC_000002.11:g.166929892_166929899del' -p416 -sg35 -(dp417 -g37 -g38 -sg39 -S'CCAGGTCCT' -p418 -sg41 -S'166929891' -p419 -sg43 -g44 -sssg45 -(dp420 -g33 -S'NC_000002.12:g.166073382_166073389del' -p421 -sg35 -(dp422 -g37 -g38 -sg39 -S'CCAGGTCCT' -p423 -sg41 -S'166073381' -p424 -sg43 -g44 -sssg51 -(dp425 -g33 -S'NC_000002.11:g.166929892_166929899del' -p426 -sg35 -(dp427 -g37 -g55 -sg39 -S'CCAGGTCCT' -p428 -sg41 -S'166929891' -p429 -sg43 -g44 -sssg58 -(dp430 -g33 -S'NC_000002.12:g.166073382_166073389del' -p431 -sg35 -(dp432 -g37 -g55 -sg39 -S'CCAGGTCCT' -p433 -sg41 -S'166073381' -p434 -sg43 -g44 -sssssS'NM_001202435.2:c.233_240del' -p435 -(dp436 -g3 -g4 -sg5 -(lp437 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p438 -aS'RefSeqGene record not available' -p439 -asg9 -g4 -sg10 -(lp440 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA -p441 -sg14 -S'SCN1A' -p442 -sg16 -(dp443 -g18 -S'NP_001189364.1:p.(Glu78GlyfsTer7)' -p444 -sg20 -S'NP_001189364.1:p.(E78Gfs*7)' -p445 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001202435.2:c.233_240del' -p446 -sg28 -g4 -sg29 -(dp447 -g31 -(dp448 -g33 -S'NC_000002.11:g.166929892_166929899del' -p449 -sg35 -(dp450 -g37 -g38 -sg39 -S'CCAGGTCCT' -p451 -sg41 -S'166929891' -p452 -sg43 -g44 -sssg45 -(dp453 -g33 -S'NC_000002.12:g.166073382_166073389del' -p454 -sg35 -(dp455 -g37 -g38 -sg39 -S'CCAGGTCCT' -p456 -sg41 -S'166073381' -p457 -sg43 -g44 -sssg51 -(dp458 -g33 -S'NC_000002.11:g.166929892_166929899del' -p459 -sg35 -(dp460 -g37 -g55 -sg39 -S'CCAGGTCCT' -p461 -sg41 -S'166929891' -p462 -sg43 -g44 -sssg58 -(dp463 -g33 -S'NC_000002.12:g.166073382_166073389del' -p464 -sg35 -(dp465 -g37 -g55 -sg39 -S'CCAGGTCCT' -p466 -sg41 -S'166073381' -p467 -sg43 -g44 -sssssS'flag' -p468 -S'gene_variant' -p469 -sS'NM_001353961.1:c.-2193_-2186del' -p470 -(dp471 -g3 -g4 -sg5 -(lp472 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p473 -aS'RefSeqGene record not available' -p474 -asg9 -g4 -sg10 -(lp475 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 15, mRNA -p476 -sg14 -S'SCN1A' -p477 -sg16 -(dp478 -g18 -S'NP_001340890.1:p.?' -p479 -sg20 -S'NP_001340890.1:p.?' -p480 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353961.1:c.-2193_-2186del' -p481 -sg28 -g4 -sg29 -(dp482 -g31 -(dp483 -g33 -S'NC_000002.11:g.166929892_166929899del' -p484 -sg35 -(dp485 -g37 -g38 -sg39 -S'CCAGGTCCT' -p486 -sg41 -S'166929891' -p487 -sg43 -g44 -sssg45 -(dp488 -g33 -S'NC_000002.12:g.166073382_166073389del' -p489 -sg35 -(dp490 -g37 -g38 -sg39 -S'CCAGGTCCT' -p491 -sg41 -S'166073381' -p492 -sg43 -g44 -sssg51 -(dp493 -g33 -S'NC_000002.11:g.166929892_166929899del' -p494 -sg35 -(dp495 -g37 -g55 -sg39 -S'CCAGGTCCT' -p496 -sg41 -S'166929891' -p497 -sg43 -g44 -sssg58 -(dp498 -g33 -S'NC_000002.12:g.166073382_166073389del' -p499 -sg35 -(dp500 -g37 -g55 -sg39 -S'CCAGGTCCT' -p501 -sg41 -S'166073381' -p502 -sg43 -g44 -sssssS'NM_001165963.1:c.233_240del' -p503 -(dp504 -g3 -g4 -sg5 -(lp505 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p506 -aS'A more recent version of the selected reference sequence NM_001165963.1 is available (NM_001165963.2)' -p507 -aS'NM_001165963.2:c.233_240del MUST be fully validated prior to use in reports' -p508 -aS'select_variants=NM_001165963.2:c.233_240del' -p509 -aS'RefSeqGene record not available' -p510 -asg9 -g4 -sg10 -(lp511 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA -p512 -sg14 -S'SCN1A' -p513 -sg16 -(dp514 -g18 -S'NP_001159435.1:p.(Glu78GlyfsTer7)' -p515 -sg20 -S'NP_001159435.1:p.(E78Gfs*7)' -p516 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165963.1:c.233_240del' -p517 -sg28 -g4 -sg29 -(dp518 -g31 -(dp519 -g33 -S'NC_000002.11:g.166929892_166929899del' -p520 -sg35 -(dp521 -g37 -g38 -sg39 -S'CCAGGTCCT' -p522 -sg41 -S'166929891' -p523 -sg43 -g44 -sssg45 -(dp524 -g33 -S'NC_000002.12:g.166073382_166073389del' -p525 -sg35 -(dp526 -g37 -g38 -sg39 -S'CCAGGTCCT' -p527 -sg41 -S'166073381' -p528 -sg43 -g44 -sssg51 -(dp529 -g33 -S'NC_000002.11:g.166929892_166929899del' -p530 -sg35 -(dp531 -g37 -g55 -sg39 -S'CCAGGTCCT' -p532 -sg41 -S'166929891' -p533 -sg43 -g44 -sssg58 -(dp534 -g33 -S'NC_000002.12:g.166073382_166073389del' -p535 -sg35 -(dp536 -g37 -g55 -sg39 -S'CCAGGTCCT' -p537 -sg41 -S'166073381' -p538 -sg43 -g44 -sssssS'NM_001165964.1:c.233_240del' -p539 -(dp540 -g3 -g4 -sg5 -(lp541 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p542 -aS'A more recent version of the selected reference sequence NM_001165964.1 is available (NM_001165964.2)' -p543 -aS'NM_001165964.2:c.233_240del MUST be fully validated prior to use in reports' -p544 -aS'select_variants=NM_001165964.2:c.233_240del' -p545 -aS'RefSeqGene record not available' -p546 -asg9 -g4 -sg10 -(lp547 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA -p548 -sg14 -S'SCN1A' -p549 -sg16 -(dp550 -g18 -S'NP_001159436.1:p.(Glu78GlyfsTer7)' -p551 -sg20 -S'NP_001159436.1:p.(E78Gfs*7)' -p552 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165964.1:c.233_240del' -p553 -sg28 -g4 -sg29 -(dp554 -g31 -(dp555 -g33 -S'NC_000002.11:g.166929892_166929899del' -p556 -sg35 -(dp557 -g37 -g38 -sg39 -S'CCAGGTCCT' -p558 -sg41 -S'166929891' -p559 -sg43 -g44 -sssg45 -(dp560 -g33 -S'NC_000002.12:g.166073382_166073389del' -p561 -sg35 -(dp562 -g37 -g38 -sg39 -S'CCAGGTCCT' -p563 -sg41 -S'166073381' -p564 -sg43 -g44 -sssg51 -(dp565 -g33 -S'NC_000002.11:g.166929892_166929899del' -p566 -sg35 -(dp567 -g37 -g55 -sg39 -S'CCAGGTCCT' -p568 -sg41 -S'166929891' -p569 -sg43 -g44 -sssg58 -(dp570 -g33 -S'NC_000002.12:g.166073382_166073389del' -p571 -sg35 -(dp572 -g37 -g55 -sg39 -S'CCAGGTCCT' -p573 -sg41 -S'166073381' -p574 -sg43 -g44 -sssssS'NM_001353948.1:c.233_240del' -p575 -(dp576 -g3 -g4 -sg5 -(lp577 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p578 -aS'RefSeqGene record not available' -p579 -asg9 -g4 -sg10 -(lp580 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 5, mRNA -p581 -sg14 -S'SCN1A' -p582 -sg16 -(dp583 -g18 -S'NP_001340877.1:p.(Glu78GlyfsTer7)' -p584 -sg20 -S'NP_001340877.1:p.(E78Gfs*7)' -p585 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353948.1:c.233_240del' -p586 -sg28 -g4 -sg29 -(dp587 -g31 -(dp588 -g33 -S'NC_000002.11:g.166929892_166929899del' -p589 -sg35 -(dp590 -g37 -g38 -sg39 -S'CCAGGTCCT' -p591 -sg41 -S'166929891' -p592 -sg43 -g44 -sssg45 -(dp593 -g33 -S'NC_000002.12:g.166073382_166073389del' -p594 -sg35 -(dp595 -g37 -g38 -sg39 -S'CCAGGTCCT' -p596 -sg41 -S'166073381' -p597 -sg43 -g44 -sssg51 -(dp598 -g33 -S'NC_000002.11:g.166929892_166929899del' -p599 -sg35 -(dp600 -g37 -g55 -sg39 -S'CCAGGTCCT' -p601 -sg41 -S'166929891' -p602 -sg43 -g44 -sssg58 -(dp603 -g33 -S'NC_000002.12:g.166073382_166073389del' -p604 -sg35 -(dp605 -g37 -g55 -sg39 -S'CCAGGTCCT' -p606 -sg41 -S'166073381' -p607 -sg43 -g44 -sssssS'NM_001353958.1:c.233_240del' -p608 -(dp609 -g3 -g4 -sg5 -(lp610 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p611 -aS'RefSeqGene record not available' -p612 -asg9 -g4 -sg10 -(lp613 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 13, mRNA -p614 -sg14 -S'SCN1A' -p615 -sg16 -(dp616 -g18 -S'NP_001340887.1:p.(Glu78GlyfsTer7)' -p617 -sg20 -S'NP_001340887.1:p.(E78Gfs*7)' -p618 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353958.1:c.233_240del' -p619 -sg28 -g4 -sg29 -(dp620 -g31 -(dp621 -g33 -S'NC_000002.11:g.166929892_166929899del' -p622 -sg35 -(dp623 -g37 -g38 -sg39 -S'CCAGGTCCT' -p624 -sg41 -S'166929891' -p625 -sg43 -g44 -sssg45 -(dp626 -g33 -S'NC_000002.12:g.166073382_166073389del' -p627 -sg35 -(dp628 -g37 -g38 -sg39 -S'CCAGGTCCT' -p629 -sg41 -S'166073381' -p630 -sg43 -g44 -sssg51 -(dp631 -g33 -S'NC_000002.11:g.166929892_166929899del' -p632 -sg35 -(dp633 -g37 -g55 -sg39 -S'CCAGGTCCT' -p634 -sg41 -S'166929891' -p635 -sg43 -g44 -sssg58 -(dp636 -g33 -S'NC_000002.12:g.166073382_166073389del' -p637 -sg35 -(dp638 -g37 -g55 -sg39 -S'CCAGGTCCT' -p639 -sg41 -S'166073381' -p640 -sg43 -g44 -sssssS'NR_148667.1:n.638_645del' -p641 -(dp642 -g3 -g4 -sg5 -(lp643 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p644 -aS'RefSeqGene record not available' -p645 -asg9 -g4 -sg10 -(lp646 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 16, non-coding RNA -p647 -sg14 -S'SCN1A' -p648 -sg16 -(dp649 -g18 -S'Non-coding :n.' -p650 -sg20 -g4 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NR_148667.1:n.638_645del' -p651 -sg28 -g4 -sg29 -(dp652 -g31 -(dp653 -g33 -S'NC_000002.11:g.166929892_166929899del' -p654 -sg35 -(dp655 -g37 -g38 -sg39 -S'CCAGGTCCT' -p656 -sg41 -S'166929891' -p657 -sg43 -g44 -sssg45 -(dp658 -g33 -S'NC_000002.12:g.166073382_166073389del' -p659 -sg35 -(dp660 -g37 -g38 -sg39 -S'CCAGGTCCT' -p661 -sg41 -S'166073381' -p662 -sg43 -g44 -sssg51 -(dp663 -g33 -S'NC_000002.11:g.166929892_166929899del' -p664 -sg35 -(dp665 -g37 -g55 -sg39 -S'CCAGGTCCT' -p666 -sg41 -S'166929891' -p667 -sg43 -g44 -sssg58 -(dp668 -g33 -S'NC_000002.12:g.166073382_166073389del' -p669 -sg35 -(dp670 -g37 -g55 -sg39 -S'CCAGGTCCT' -p671 -sg41 -S'166073381' -p672 -sg43 -g44 -sssssS'NM_001353949.1:c.233_240del' -p673 -(dp674 -g3 -g4 -sg5 -(lp675 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p676 -aS'RefSeqGene record not available' -p677 -asg9 -g4 -sg10 -(lp678 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 6, mRNA -p679 -sg14 -S'SCN1A' -p680 -sg16 -(dp681 -g18 -S'NP_001340878.1:p.(Glu78GlyfsTer7)' -p682 -sg20 -S'NP_001340878.1:p.(E78Gfs*7)' -p683 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353949.1:c.233_240del' -p684 -sg28 -g4 -sg29 -(dp685 -g31 -(dp686 -g33 -S'NC_000002.11:g.166929892_166929899del' -p687 -sg35 -(dp688 -g37 -g38 -sg39 -S'CCAGGTCCT' -p689 -sg41 -S'166929891' -p690 -sg43 -g44 -sssg45 -(dp691 -g33 -S'NC_000002.12:g.166073382_166073389del' -p692 -sg35 -(dp693 -g37 -g38 -sg39 -S'CCAGGTCCT' -p694 -sg41 -S'166073381' -p695 -sg43 -g44 -sssg51 -(dp696 -g33 -S'NC_000002.11:g.166929892_166929899del' -p697 -sg35 -(dp698 -g37 -g55 -sg39 -S'CCAGGTCCT' -p699 -sg41 -S'166929891' -p700 -sg43 -g44 -sssg58 -(dp701 -g33 -S'NC_000002.12:g.166073382_166073389del' -p702 -sg35 -(dp703 -g37 -g55 -sg39 -S'CCAGGTCCT' -p704 -sg41 -S'166073381' -p705 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant273.txt b/VariantValidator/testing/testOutputsMasterITS/variant273.txt deleted file mode 100644 index c229280e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant273.txt +++ /dev/null @@ -1,1028 +0,0 @@ -(dp0 -S'NM_001256850.1:c.102051C>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens titin (TTN), transcript variant N2BA, mRNA -p12 -sS'gene_symbol' -p13 -S'TTN' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001243779.1:p.(Ser34017Arg)' -p18 -sS'slr' -p19 -S'NP_001243779.1:p.(S34017R)' -p20 -ssS'submitted_variant' -p21 -S'2-179393504-G-T' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001256850.1:c.102051C>A' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000002.11:g.179393504G>T' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -VG -p39 -sS'pos' -p40 -S'179393504' -p41 -sS'alt' -p42 -VT -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.178528777G>T' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000002.11:g.179393504G>T' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'179393504' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000002.12:g.178528777G>T' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p59 -sg42 -g43 -sssssS'NR_038271.1:n.446+5141G>T' -p60 -(dp61 -g3 -g4 -sg5 -(lp62 -S'RefSeqGene record not available' -p63 -asg8 -g4 -sg9 -(lp64 -sg11 -VHomo sapiens TTN antisense RNA 1 (TTN-AS1), transcript variant 2, long non-coding RNA -p65 -sg13 -S'TTN-AS1' -p66 -sg15 -(dp67 -g17 -S'Non-coding :n.' -p68 -sg19 -g4 -ssg21 -g22 -sg23 -S'NC_000002.11(NR_038271.1):c.446+5141G>T' -p69 -sg24 -g4 -sg25 -S'NR_038271.1:n.446+5141G>T' -p70 -sg27 -g4 -sg28 -(dp71 -g30 -(dp72 -g32 -S'NC_000002.11:g.179393504G>T' -p73 -sg34 -(dp74 -g36 -g37 -sg38 -S'G' -p75 -sg40 -S'179393504' -p76 -sg42 -S'T' -p77 -sssg44 -(dp78 -g32 -S'NC_000002.12:g.178528777G>T' -p79 -sg34 -(dp80 -g36 -g37 -sg38 -g75 -sg40 -S'178528777' -p81 -sg42 -g77 -sssg49 -(dp82 -g32 -S'NC_000002.11:g.179393504G>T' -p83 -sg34 -(dp84 -g36 -g53 -sg38 -g75 -sg40 -S'179393504' -p85 -sg42 -g77 -sssg55 -(dp86 -g32 -S'NC_000002.12:g.178528777G>T' -p87 -sg34 -(dp88 -g36 -g53 -sg38 -g75 -sg40 -S'178528777' -p89 -sg42 -g77 -sssssS'NM_133432.3:c.80154C>A' -p90 -(dp91 -g3 -g4 -sg5 -(lp92 -S'RefSeqGene record not available' -p93 -asg8 -g4 -sg9 -(lp94 -sg11 -VHomo sapiens titin (TTN), transcript variant novex-1, mRNA -p95 -sg13 -S'TTN' -p96 -sg15 -(dp97 -g17 -S'NP_597676.3:p.(Ser26718Arg)' -p98 -sg19 -S'NP_597676.3:p.(S26718R)' -p99 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_133432.3:c.80154C>A' -p100 -sg27 -g4 -sg28 -(dp101 -g30 -(dp102 -g32 -S'NC_000002.11:g.179393504G>T' -p103 -sg34 -(dp104 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p105 -sg42 -g43 -sssg44 -(dp106 -g32 -S'NC_000002.12:g.178528777G>T' -p107 -sg34 -(dp108 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p109 -sg42 -g43 -sssg49 -(dp110 -g32 -S'NC_000002.11:g.179393504G>T' -p111 -sg34 -(dp112 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p113 -sg42 -g43 -sssg55 -(dp114 -g32 -S'NC_000002.12:g.178528777G>T' -p115 -sg34 -(dp116 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p117 -sg42 -g43 -sssssS'NM_001267550.1:c.106974C>A' -p118 -(dp119 -g3 -S'LRG_391t1:c.106974C>A' -p120 -sg5 -(lp121 -S'A more recent version of the selected reference sequence NM_001267550.1 is available (NM_001267550.2)' -p122 -aS'NM_001267550.2:c.106974C>A MUST be fully validated prior to use in reports' -p123 -aS'select_variants=NM_001267550.2:c.106974C>A' -p124 -asg8 -g4 -sg9 -(lp125 -sg11 -VHomo sapiens titin (TTN), transcript variant IC, mRNA -p126 -sg13 -S'TTN' -p127 -sg15 -(dp128 -g17 -S'NP_001254479.1:p.(Ser35658Arg)' -p129 -sg19 -S'NP_001254479.1:p.(S35658R)' -p130 -ssg21 -g22 -sg23 -g4 -sg24 -S'LRG_391:g.307026C>A' -p131 -sg25 -S'NM_001267550.1:c.106974C>A' -p132 -sg27 -S'NG_011618.3:g.307026C>A' -p133 -sg28 -(dp134 -g30 -(dp135 -g32 -S'NC_000002.11:g.179393504G>T' -p136 -sg34 -(dp137 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p138 -sg42 -g43 -sssg49 -(dp139 -g32 -S'NC_000002.11:g.179393504G>T' -p140 -sg34 -(dp141 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p142 -sg42 -g43 -sssssS'NR_038272.1:n.219+5141G>T' -p143 -(dp144 -g3 -g4 -sg5 -(lp145 -S'RefSeqGene record not available' -p146 -asg8 -g4 -sg9 -(lp147 -sg11 -VHomo sapiens TTN antisense RNA 1 (TTN-AS1), transcript variant 1, long non-coding RNA -p148 -sg13 -S'TTN-AS1' -p149 -sg15 -(dp150 -g17 -S'Non-coding :n.' -p151 -sg19 -g4 -ssg21 -g22 -sg23 -S'NC_000002.11(NR_038272.1):c.219+5141G>T' -p152 -sg24 -g4 -sg25 -S'NR_038272.1:n.219+5141G>T' -p153 -sg27 -g4 -sg28 -(dp154 -g30 -(dp155 -g32 -S'NC_000002.11:g.179393504G>T' -p156 -sg34 -(dp157 -g36 -g37 -sg38 -g75 -sg40 -S'179393504' -p158 -sg42 -g77 -sssg44 -(dp159 -g32 -S'NC_000002.12:g.178528777G>T' -p160 -sg34 -(dp161 -g36 -g37 -sg38 -g75 -sg40 -S'178528777' -p162 -sg42 -g77 -sssg49 -(dp163 -g32 -S'NC_000002.11:g.179393504G>T' -p164 -sg34 -(dp165 -g36 -g53 -sg38 -g75 -sg40 -S'179393504' -p166 -sg42 -g77 -sssg55 -(dp167 -g32 -S'NC_000002.12:g.178528777G>T' -p168 -sg34 -(dp169 -g36 -g53 -sg38 -g75 -sg40 -S'178528777' -p170 -sg42 -g77 -sssssS'NM_133437.4:c.80355C>A' -p171 -(dp172 -g3 -g4 -sg5 -(lp173 -S'RefSeqGene record not available' -p174 -asg8 -g4 -sg9 -(lp175 -sg11 -VHomo sapiens titin (TTN), transcript variant novex-2, mRNA -p176 -sg13 -S'TTN' -p177 -sg15 -(dp178 -g17 -S'NP_597681.4:p.(Ser26785Arg)' -p179 -sg19 -S'NP_597681.4:p.(S26785R)' -p180 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_133437.4:c.80355C>A' -p181 -sg27 -g4 -sg28 -(dp182 -g30 -(dp183 -g32 -S'NC_000002.11:g.179393504G>T' -p184 -sg34 -(dp185 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p186 -sg42 -g43 -sssg44 -(dp187 -g32 -S'NC_000002.12:g.178528777G>T' -p188 -sg34 -(dp189 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p190 -sg42 -g43 -sssg49 -(dp191 -g32 -S'NC_000002.11:g.179393504G>T' -p192 -sg34 -(dp193 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p194 -sg42 -g43 -sssg55 -(dp195 -g32 -S'NC_000002.12:g.178528777G>T' -p196 -sg34 -(dp197 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p198 -sg42 -g43 -sssssS'flag' -p199 -S'gene_variant' -p200 -sS'NM_133378.4:c.99270C>A' -p201 -(dp202 -g3 -g4 -sg5 -(lp203 -S'RefSeqGene record not available' -p204 -asg8 -g4 -sg9 -(lp205 -sg11 -VHomo sapiens titin (TTN), transcript variant N2-A, mRNA -p206 -sg13 -S'TTN' -p207 -sg15 -(dp208 -g17 -S'NP_596869.4:p.(Ser33090Arg)' -p209 -sg19 -S'NP_596869.4:p.(S33090R)' -p210 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_133378.4:c.99270C>A' -p211 -sg27 -g4 -sg28 -(dp212 -g30 -(dp213 -g32 -S'NC_000002.11:g.179393504G>T' -p214 -sg34 -(dp215 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p216 -sg42 -g43 -sssg44 -(dp217 -g32 -S'NC_000002.12:g.178528777G>T' -p218 -sg34 -(dp219 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p220 -sg42 -g43 -sssg49 -(dp221 -g32 -S'NC_000002.11:g.179393504G>T' -p222 -sg34 -(dp223 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p224 -sg42 -g43 -sssg55 -(dp225 -g32 -S'NC_000002.12:g.178528777G>T' -p226 -sg34 -(dp227 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p228 -sg42 -g43 -sssssS'NM_001267550.2:c.106974C>A' -p229 -(dp230 -g3 -g4 -sg5 -(lp231 -S'RefSeqGene record not available' -p232 -asg8 -g4 -sg9 -(lp233 -sg11 -VHomo sapiens titin (TTN), transcript variant IC, mRNA -p234 -sg13 -S'TTN' -p235 -sg15 -(dp236 -g17 -S'NP_001254479.2:p.(Ser35658Arg)' -p237 -sg19 -S'NP_001254479.2:p.(S35658R)' -p238 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001267550.2:c.106974C>A' -p239 -sg27 -g4 -sg28 -(dp240 -g30 -(dp241 -g32 -S'NC_000002.11:g.179393504G>T' -p242 -sg34 -(dp243 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p244 -sg42 -g43 -sssg44 -(dp245 -g32 -S'NC_000002.12:g.178528777G>T' -p246 -sg34 -(dp247 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p248 -sg42 -g43 -sssg49 -(dp249 -g32 -S'NC_000002.11:g.179393504G>T' -p250 -sg34 -(dp251 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p252 -sg42 -g43 -sssg55 -(dp253 -g32 -S'NC_000002.12:g.178528777G>T' -p254 -sg34 -(dp255 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p256 -sg42 -g43 -sssssS'NM_133437.3:c.80355C>A' -p257 -(dp258 -g3 -g4 -sg5 -(lp259 -S'A more recent version of the selected reference sequence NM_133437.3 is available (NM_133437.4)' -p260 -aS'NM_133437.4:c.80355C>A MUST be fully validated prior to use in reports' -p261 -aS'select_variants=NM_133437.4:c.80355C>A' -p262 -aS'RefSeqGene record not available' -p263 -asg8 -g4 -sg9 -(lp264 -sg11 -VHomo sapiens titin (TTN), transcript variant novex-2, mRNA -p265 -sg13 -S'TTN' -p266 -sg15 -(dp267 -g17 -S'NP_597681.3:p.(Ser26785Arg)' -p268 -sg19 -S'NP_597681.3:p.(S26785R)' -p269 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_133437.3:c.80355C>A' -p270 -sg27 -g4 -sg28 -(dp271 -g30 -(dp272 -g32 -S'NC_000002.11:g.179393504G>T' -p273 -sg34 -(dp274 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p275 -sg42 -g43 -sssg49 -(dp276 -g32 -S'NC_000002.11:g.179393504G>T' -p277 -sg34 -(dp278 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p279 -sg42 -g43 -sssssS'NM_003319.4:c.79779C>A' -p280 -(dp281 -g3 -g4 -sg5 -(lp282 -S'RefSeqGene record not available' -p283 -asg8 -g4 -sg9 -(lp284 -sg11 -VHomo sapiens titin (TTN), transcript variant N2-B, mRNA -p285 -sg13 -S'TTN' -p286 -sg15 -(dp287 -g17 -S'NP_003310.4:p.(Ser26593Arg)' -p288 -sg19 -S'NP_003310.4:p.(S26593R)' -p289 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_003319.4:c.79779C>A' -p290 -sg27 -g4 -sg28 -(dp291 -g30 -(dp292 -g32 -S'NC_000002.11:g.179393504G>T' -p293 -sg34 -(dp294 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p295 -sg42 -g43 -sssg44 -(dp296 -g32 -S'NC_000002.12:g.178528777G>T' -p297 -sg34 -(dp298 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p299 -sg42 -g43 -sssg49 -(dp300 -g32 -S'NC_000002.11:g.179393504G>T' -p301 -sg34 -(dp302 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p303 -sg42 -g43 -sssg55 -(dp304 -g32 -S'NC_000002.12:g.178528777G>T' -p305 -sg34 -(dp306 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p307 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant274.txt b/VariantValidator/testing/testOutputsMasterITS/variant274.txt deleted file mode 100644 index 7797eba3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant274.txt +++ /dev/null @@ -1,146 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_194250.1:c.3324_3347del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000002.11:g.185803444TGCAGCTGCTGCAGCTGCAGCTGCA>T automapped to NC_000002.11:g.185803447_185803470del' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens zinc finger protein 804A (ZNF804A), mRNA -p15 -sS'gene_symbol' -p16 -S'ZNF804A' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_919226.1:p.(Ala1112_Ala1119del)' -p21 -sS'slr' -p22 -S'NP_919226.1:p.(A1112_A1119del)' -p23 -ssS'submitted_variant' -p24 -S'2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T' -p25 -sS'genome_context_intronic_sequence' -p26 -g6 -sS'HGVS_LRG_variant' -p27 -g6 -sS'HGVS_transcript_variant' -p28 -S'NM_194250.1:c.3324_3347del' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000002.11:g.185803447_185803470del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr2' -p40 -sS'ref' -p41 -S'TGCAGCTGCTGCAGCTGCAGCTGCA' -p42 -sS'pos' -p43 -S'185803444' -p44 -sS'alt' -p45 -S'T' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000002.12:g.184938720_184938743del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'TGCAGCTGCTGCAGCTGCAGCTGCA' -p51 -sg43 -S'184938717' -p52 -sg45 -g46 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000002.11:g.185803447_185803470del' -p55 -sg37 -(dp56 -g39 -S'2' -p57 -sg41 -S'TGCAGCTGCTGCAGCTGCAGCTGCA' -p58 -sg43 -S'185803444' -p59 -sg45 -g46 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000002.12:g.184938720_184938743del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'TGCAGCTGCTGCAGCTGCAGCTGCA' -p64 -sg43 -S'184938717' -p65 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant275.txt b/VariantValidator/testing/testOutputsMasterITS/variant275.txt deleted file mode 100644 index ebd12eb1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant275.txt +++ /dev/null @@ -1,244 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_002491.2:c.208G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -g6 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens NADH:ubiquinone oxidoreductase subunit B3 (NDUFB3), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'NDUFB3' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_002482.1:p.(Gly70Ter)' -p19 -sS'slr' -p20 -S'NP_002482.1:p.(G70*)' -p21 -ssS'submitted_variant' -p22 -S'2-201950249-G-T' -p23 -sS'genome_context_intronic_sequence' -p24 -g6 -sS'HGVS_LRG_variant' -p25 -g6 -sS'HGVS_transcript_variant' -p26 -S'NM_002491.2:c.208G>T' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_032156.1:g.18788G>T' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000002.11:g.201950249G>T' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr2' -p39 -sS'ref' -p40 -S'G' -p41 -sS'pos' -p42 -S'201950249' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000002.12:g.201085526G>T' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'201085526' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000002.11:g.201950249G>T' -p53 -sg36 -(dp54 -g38 -S'2' -p55 -sg40 -g41 -sg42 -S'201950249' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000002.12:g.201085526G>T' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'201085526' -p61 -sg44 -g45 -sssssS'NM_001257102.1:c.208G>T' -p62 -(dp63 -g5 -g6 -sg7 -(lp64 -S'RefSeqGene record not available' -p65 -asg9 -g6 -sg10 -(lp66 -sg12 -VHomo sapiens NADH:ubiquinone oxidoreductase subunit B3 (NDUFB3), transcript variant 2, mRNA -p67 -sg14 -S'NDUFB3' -p68 -sg16 -(dp69 -g18 -S'NP_001244031.1:p.(Gly70Ter)' -p70 -sg20 -S'NP_001244031.1:p.(G70*)' -p71 -ssg22 -g23 -sg24 -g6 -sg25 -g6 -sg26 -S'NM_001257102.1:c.208G>T' -p72 -sg28 -g6 -sg30 -(dp73 -g32 -(dp74 -g34 -S'NC_000002.11:g.201950249G>T' -p75 -sg36 -(dp76 -g38 -g39 -sg40 -g41 -sg42 -S'201950249' -p77 -sg44 -g45 -sssg46 -(dp78 -g34 -S'NC_000002.12:g.201085526G>T' -p79 -sg36 -(dp80 -g38 -g39 -sg40 -g41 -sg42 -S'201085526' -p81 -sg44 -g45 -sssg51 -(dp82 -g34 -S'NC_000002.11:g.201950249G>T' -p83 -sg36 -(dp84 -g38 -g55 -sg40 -g41 -sg42 -S'201950249' -p85 -sg44 -g45 -sssg57 -(dp86 -g34 -S'NC_000002.12:g.201085526G>T' -p87 -sg36 -(dp88 -g38 -g55 -sg40 -g41 -sg42 -S'201085526' -p89 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant276.txt b/VariantValidator/testing/testOutputsMasterITS/variant276.txt deleted file mode 100644 index 8b83c584..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant276.txt +++ /dev/null @@ -1,354 +0,0 @@ -(dp0 -S'NM_004369.3:c.6282+1G>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_473t1:c.6282+1G>T' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -S'NG_008676.1(NM_004369.3):c.6282+1G>T' -p8 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'COL6A3' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_004360.2(LRG_473p1):p.?' -p18 -sS'slr' -p19 -S'NP_004360.2:p.?' -p20 -ssS'submitted_variant' -p21 -S'2-238268730-C-A' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000002.11(NM_004369.3):c.6282+1G>T' -p24 -sS'HGVS_LRG_variant' -p25 -S'LRG_473:g.59121G>T' -p26 -sS'HGVS_transcript_variant' -p27 -S'NM_004369.3:c.6282+1G>T' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -S'NG_008676.1:g.59121G>T' -p30 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000002.11:g.238268730C>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr2' -p40 -sS'ref' -p41 -VC -p42 -sS'pos' -p43 -S'238268730' -p44 -sS'alt' -p45 -VA -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000002.12:g.237360087C>A' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'237360087' -p51 -sg45 -g46 -sssS'GRCh37' -p52 -(dp53 -g35 -S'NC_000002.11:g.238268730C>A' -p54 -sg37 -(dp55 -g39 -S'2' -p56 -sg41 -g42 -sg43 -S'238268730' -p57 -sg45 -g46 -sssS'GRCh38' -p58 -(dp59 -g35 -S'NC_000002.12:g.237360087C>A' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'237360087' -p62 -sg45 -g46 -sssssS'flag' -p63 -S'gene_variant' -p64 -sS'NM_057166.4:c.4461+1G>T' -p65 -(dp66 -g3 -S'' -p67 -sg5 -(lp68 -S'RefSeqGene record not available' -p69 -asg7 -g67 -sg9 -(lp70 -sg11 -VHomo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 4, mRNA -p71 -sg13 -S'COL6A3' -p72 -sg15 -(dp73 -g17 -S'NP_476507.3:p.?' -p74 -sg19 -S'NP_476507.3:p.?' -p75 -ssg21 -g22 -sg23 -S'NC_000002.11(NM_057166.4):c.4461+1G>T' -p76 -sg25 -g67 -sg27 -S'NM_057166.4:c.4461+1G>T' -p77 -sg29 -g67 -sg31 -(dp78 -g33 -(dp79 -g35 -S'NC_000002.11:g.238268730C>A' -p80 -sg37 -(dp81 -g39 -g40 -sg41 -g42 -sg43 -S'238268730' -p82 -sg45 -g46 -sssg47 -(dp83 -g35 -S'NC_000002.12:g.237360087C>A' -p84 -sg37 -(dp85 -g39 -g40 -sg41 -g42 -sg43 -S'237360087' -p86 -sg45 -g46 -sssg52 -(dp87 -g35 -S'NC_000002.11:g.238268730C>A' -p88 -sg37 -(dp89 -g39 -g56 -sg41 -g42 -sg43 -S'238268730' -p90 -sg45 -g46 -sssg58 -(dp91 -g35 -S'NC_000002.12:g.237360087C>A' -p92 -sg37 -(dp93 -g39 -g56 -sg41 -g42 -sg43 -S'237360087' -p94 -sg45 -g46 -sssssS'NM_057167.3:c.5664+1G>T' -p95 -(dp96 -g3 -g67 -sg5 -(lp97 -S'RefSeqGene record not available' -p98 -asg7 -g67 -sg9 -(lp99 -sg11 -VHomo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 5, mRNA -p100 -sg13 -S'COL6A3' -p101 -sg15 -(dp102 -g17 -S'NP_476508.2:p.?' -p103 -sg19 -S'NP_476508.2:p.?' -p104 -ssg21 -g22 -sg23 -S'NC_000002.11(NM_057167.3):c.5664+1G>T' -p105 -sg25 -g67 -sg27 -S'NM_057167.3:c.5664+1G>T' -p106 -sg29 -g67 -sg31 -(dp107 -g33 -(dp108 -g35 -S'NC_000002.11:g.238268730C>A' -p109 -sg37 -(dp110 -g39 -g40 -sg41 -g42 -sg43 -S'238268730' -p111 -sg45 -g46 -sssg47 -(dp112 -g35 -S'NC_000002.12:g.237360087C>A' -p113 -sg37 -(dp114 -g39 -g40 -sg41 -g42 -sg43 -S'237360087' -p115 -sg45 -g46 -sssg52 -(dp116 -g35 -S'NC_000002.11:g.238268730C>A' -p117 -sg37 -(dp118 -g39 -g56 -sg41 -g42 -sg43 -S'238268730' -p119 -sg45 -g46 -sssg58 -(dp120 -g35 -S'NC_000002.12:g.237360087C>A' -p121 -sg37 -(dp122 -g39 -g56 -sg41 -g42 -sg43 -S'237360087' -p123 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant277.txt b/VariantValidator/testing/testOutputsMasterITS/variant277.txt deleted file mode 100644 index b877785f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant277.txt +++ /dev/null @@ -1,326 +0,0 @@ -(dp0 -S'NM_080860.2:c.727+5G>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_080860.2 is available (NM_080860.3)' -p7 -aS'NM_080860.3:c.727+5G>A MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_080860.3:c.727+5G>A' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens radial spoke head 1 homolog (Chlamydomonas) (RSPH1), mRNA -p15 -sS'gene_symbol' -p16 -S'RSPH1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_543136.1:p.?' -p21 -sS'slr' -p22 -S'NP_543136.1:p.?' -p23 -ssS'submitted_variant' -p24 -S'21-43897396-C-T' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000021.8(NM_080860.2):c.727+5G>A' -p27 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_080860.2:c.727+5G>A' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000021.8:g.43897396C>T' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr21' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'43897396' -p45 -sS'alt' -p46 -VT -p47 -sssS'GRCh37' -p48 -(dp49 -g36 -S'NC_000021.8:g.43897396C>T' -p50 -sg38 -(dp51 -g40 -S'21' -p52 -sg42 -g43 -sg44 -S'43897396' -p53 -sg46 -g47 -sssssS'flag' -p54 -S'gene_variant' -p55 -sS'NM_080860.3:c.727+5G>A' -p56 -(dp57 -g3 -g4 -sg5 -(lp58 -S'RefSeqGene record not available' -p59 -asg11 -g4 -sg12 -(lp60 -sg14 -VHomo sapiens radial spoke head component 1 (RSPH1), transcript variant 1, mRNA -p61 -sg16 -S'RSPH1' -p62 -sg18 -(dp63 -g20 -S'NP_543136.1:p.?' -p64 -sg22 -S'NP_543136.1:p.?' -p65 -ssg24 -g25 -sg26 -S'NC_000021.8(NM_080860.3):c.727+5G>A' -p66 -sg28 -g4 -sg29 -S'NM_080860.3:c.727+5G>A' -p67 -sg31 -g4 -sg32 -(dp68 -g34 -(dp69 -g36 -S'NC_000021.8:g.43897396C>T' -p70 -sg38 -(dp71 -g40 -g41 -sg42 -g43 -sg44 -S'43897396' -p72 -sg46 -g47 -sssS'hg38' -p73 -(dp74 -g36 -S'NC_000021.9:g.42477286C>T' -p75 -sg38 -(dp76 -g40 -g41 -sg42 -g43 -sg44 -S'42477286' -p77 -sg46 -g47 -sssg48 -(dp78 -g36 -S'NC_000021.8:g.43897396C>T' -p79 -sg38 -(dp80 -g40 -g52 -sg42 -g43 -sg44 -S'43897396' -p81 -sg46 -g47 -sssS'GRCh38' -p82 -(dp83 -g36 -S'NC_000021.9:g.42477286C>T' -p84 -sg38 -(dp85 -g40 -g52 -sg42 -g43 -sg44 -S'42477286' -p86 -sg46 -g47 -sssssS'NM_001286506.1:c.613+5G>A' -p87 -(dp88 -g3 -g4 -sg5 -(lp89 -S'RefSeqGene record not available' -p90 -asg11 -g4 -sg12 -(lp91 -sg14 -VHomo sapiens radial spoke head component 1 (RSPH1), transcript variant 2, mRNA -p92 -sg16 -S'RSPH1' -p93 -sg18 -(dp94 -g20 -S'NP_001273435.1:p.?' -p95 -sg22 -S'NP_001273435.1:p.?' -p96 -ssg24 -g25 -sg26 -S'NC_000021.8(NM_001286506.1):c.613+5G>A' -p97 -sg28 -g4 -sg29 -S'NM_001286506.1:c.613+5G>A' -p98 -sg31 -g4 -sg32 -(dp99 -g34 -(dp100 -g36 -S'NC_000021.8:g.43897396C>T' -p101 -sg38 -(dp102 -g40 -g41 -sg42 -g43 -sg44 -S'43897396' -p103 -sg46 -g47 -sssg73 -(dp104 -g36 -S'NC_000021.9:g.42477286C>T' -p105 -sg38 -(dp106 -g40 -g41 -sg42 -g43 -sg44 -S'42477286' -p107 -sg46 -g47 -sssg48 -(dp108 -g36 -S'NC_000021.8:g.43897396C>T' -p109 -sg38 -(dp110 -g40 -g52 -sg42 -g43 -sg44 -S'43897396' -p111 -sg46 -g47 -sssg82 -(dp112 -g36 -S'NC_000021.9:g.42477286C>T' -p113 -sg38 -(dp114 -g40 -g52 -sg42 -g43 -sg44 -S'42477286' -p115 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant278.txt b/VariantValidator/testing/testOutputsMasterITS/variant278.txt deleted file mode 100644 index 125504ac..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant278.txt +++ /dev/null @@ -1,1104 +0,0 @@ -(dp0 -S'NM_000268.3:c.924_925insCGACGC' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_511t1:c.924_925insCGACGC' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -S'' -p9 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'NF2' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000259.1(LRG_511p1):p.(Arg310_Arg311dup)' -p19 -sS'slr' -p20 -S'NP_000259.1:p.(R310_R311dup)' -p21 -ssS'submitted_variant' -p22 -S'22-30064360-G-GCGACGC' -p23 -sS'genome_context_intronic_sequence' -p24 -g9 -sS'HGVS_LRG_variant' -p25 -S'LRG_511:g.69816_69817insCGACGC' -p26 -sS'HGVS_transcript_variant' -p27 -S'NM_000268.3:c.924_925insCGACGC' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -S'NG_009057.1:g.69816_69817insCGACGC' -p30 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr22' -p40 -sS'ref' -p41 -S'G' -p42 -sS'pos' -p43 -S'30064360' -p44 -sS'alt' -p45 -S'GCGACGC' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'29668371' -p51 -sg45 -S'GCGACGC' -p52 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p55 -sg37 -(dp56 -g39 -S'22' -p57 -sg41 -g42 -sg43 -S'30064360' -p58 -sg45 -S'GCGACGC' -p59 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -g42 -sg43 -S'29668371' -p64 -sg45 -S'GCGACGC' -p65 -sssssS'NM_181828.2:c.798_799insCGACGC' -p66 -(dp67 -g3 -g9 -sg5 -(lp68 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p69 -aS'RefSeqGene record not available' -p70 -asg8 -g9 -sg10 -(lp71 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 5, mRNA -p72 -sg14 -S'NF2' -p73 -sg16 -(dp74 -g18 -S'NP_861966.1:p.(Arg268_Arg269dup)' -p75 -sg20 -S'NP_861966.1:p.(R268_R269dup)' -p76 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_181828.2:c.798_799insCGACGC' -p77 -sg29 -g9 -sg31 -(dp78 -g33 -(dp79 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p80 -sg37 -(dp81 -g39 -g40 -sg41 -g42 -sg43 -S'30064360' -p82 -sg45 -S'GCGACGC' -p83 -sssg47 -(dp84 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p85 -sg37 -(dp86 -g39 -g40 -sg41 -g42 -sg43 -S'29668371' -p87 -sg45 -S'GCGACGC' -p88 -sssg53 -(dp89 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p90 -sg37 -(dp91 -g39 -g57 -sg41 -g42 -sg43 -S'30064360' -p92 -sg45 -S'GCGACGC' -p93 -sssg60 -(dp94 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p95 -sg37 -(dp96 -g39 -g57 -sg41 -g42 -sg43 -S'29668371' -p97 -sg45 -S'GCGACGC' -p98 -sssssS'NM_181830.2:c.675_676insCGACGC' -p99 -(dp100 -g3 -g9 -sg5 -(lp101 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p102 -aS'RefSeqGene record not available' -p103 -asg8 -g9 -sg10 -(lp104 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 7, mRNA -p105 -sg14 -S'NF2' -p106 -sg16 -(dp107 -g18 -S'NP_861968.1:p.(Arg227_Arg228dup)' -p108 -sg20 -S'NP_861968.1:p.(R227_R228dup)' -p109 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_181830.2:c.675_676insCGACGC' -p110 -sg29 -g9 -sg31 -(dp111 -g33 -(dp112 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p113 -sg37 -(dp114 -g39 -g40 -sg41 -g42 -sg43 -S'30064360' -p115 -sg45 -S'GCGACGC' -p116 -sssg47 -(dp117 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p118 -sg37 -(dp119 -g39 -g40 -sg41 -g42 -sg43 -S'29668371' -p120 -sg45 -S'GCGACGC' -p121 -sssg53 -(dp122 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p123 -sg37 -(dp124 -g39 -g57 -sg41 -g42 -sg43 -S'30064360' -p125 -sg45 -S'GCGACGC' -p126 -sssg60 -(dp127 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p128 -sg37 -(dp129 -g39 -g57 -sg41 -g42 -sg43 -S'29668371' -p130 -sg45 -S'GCGACGC' -p131 -sssssS'NM_181825.2:c.924_925insCGACGC' -p132 -(dp133 -g3 -g9 -sg5 -(lp134 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p135 -aS'RefSeqGene record not available' -p136 -asg8 -g9 -sg10 -(lp137 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 12, mRNA -p138 -sg14 -S'NF2' -p139 -sg16 -(dp140 -g18 -S'NP_861546.1:p.(Arg310_Arg311dup)' -p141 -sg20 -S'NP_861546.1:p.(R310_R311dup)' -p142 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_181825.2:c.924_925insCGACGC' -p143 -sg29 -g9 -sg31 -(dp144 -g33 -(dp145 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p146 -sg37 -(dp147 -g39 -g40 -sg41 -g42 -sg43 -S'30064360' -p148 -sg45 -S'GCGACGC' -p149 -sssg47 -(dp150 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p151 -sg37 -(dp152 -g39 -g40 -sg41 -g42 -sg43 -S'29668371' -p153 -sg45 -S'GCGACGC' -p154 -sssg53 -(dp155 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p156 -sg37 -(dp157 -g39 -g57 -sg41 -g42 -sg43 -S'30064360' -p158 -sg45 -S'GCGACGC' -p159 -sssg60 -(dp160 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p161 -sg37 -(dp162 -g39 -g57 -sg41 -g42 -sg43 -S'29668371' -p163 -sg45 -S'GCGACGC' -p164 -sssssS'NM_181832.2:c.924_925insCGACGC' -p165 -(dp166 -g3 -g9 -sg5 -(lp167 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p168 -aS'RefSeqGene record not available' -p169 -asg8 -g9 -sg10 -(lp170 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 8, mRNA -p171 -sg14 -S'NF2' -p172 -sg16 -(dp173 -g18 -S'NP_861970.1:p.(Arg310_Arg311dup)' -p174 -sg20 -S'NP_861970.1:p.(R310_R311dup)' -p175 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_181832.2:c.924_925insCGACGC' -p176 -sg29 -g9 -sg31 -(dp177 -g33 -(dp178 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p179 -sg37 -(dp180 -g39 -g40 -sg41 -g42 -sg43 -S'30064360' -p181 -sg45 -S'GCGACGC' -p182 -sssg47 -(dp183 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p184 -sg37 -(dp185 -g39 -g40 -sg41 -g42 -sg43 -S'29668371' -p186 -sg45 -S'GCGACGC' -p187 -sssg53 -(dp188 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p189 -sg37 -(dp190 -g39 -g57 -sg41 -g42 -sg43 -S'30064360' -p191 -sg45 -S'GCGACGC' -p192 -sssg60 -(dp193 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p194 -sg37 -(dp195 -g39 -g57 -sg41 -g42 -sg43 -S'29668371' -p196 -sg45 -S'GCGACGC' -p197 -sssssS'NM_181833.2:c.447+26086_447+26087insCGACGC' -p198 -(dp199 -g3 -g9 -sg5 -(lp200 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p201 -aS'RefSeqGene record not available' -p202 -asg8 -g9 -sg10 -(lp203 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 9, mRNA -p204 -sg14 -S'NF2' -p205 -sg16 -(dp206 -g18 -S'NP_861971.1:p.?' -p207 -sg20 -S'NP_861971.1:p.?' -p208 -ssg22 -g23 -sg24 -S'NC_000022.10(NM_181833.2):c.447+26086_447+26087insCGACGC' -p209 -sg25 -g9 -sg27 -S'NM_181833.2:c.447+26086_447+26087insCGACGC' -p210 -sg29 -g9 -sg31 -(dp211 -g33 -(dp212 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p213 -sg37 -(dp214 -g39 -g40 -sg41 -g42 -sg43 -S'30064360' -p215 -sg45 -S'GCGACGC' -p216 -sssg47 -(dp217 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p218 -sg37 -(dp219 -g39 -g40 -sg41 -g42 -sg43 -S'29668371' -p220 -sg45 -S'GCGACGC' -p221 -sssg53 -(dp222 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p223 -sg37 -(dp224 -g39 -g57 -sg41 -g42 -sg43 -S'30064360' -p225 -sg45 -S'GCGACGC' -p226 -sssg60 -(dp227 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p228 -sg37 -(dp229 -g39 -g57 -sg41 -g42 -sg43 -S'29668371' -p230 -sg45 -S'GCGACGC' -p231 -sssssS'NM_016418.5:c.924_925insCGACGC' -p232 -(dp233 -g3 -S'LRG_511t2:c.924_925insCGACGC' -p234 -sg5 -(lp235 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p236 -asg8 -g9 -sg10 -(lp237 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 2, mRNA -p238 -sg14 -S'NF2' -p239 -sg16 -(dp240 -g18 -S'NP_057502.2(LRG_511p2):p.(Arg310_Arg311dup)' -p241 -sg20 -S'NP_057502.2:p.(R310_R311dup)' -p242 -ssg22 -g23 -sg24 -g9 -sg25 -S'LRG_511:g.69816_69817insCGACGC' -p243 -sg27 -S'NM_016418.5:c.924_925insCGACGC' -p244 -sg29 -S'NG_009057.1:g.69816_69817insCGACGC' -p245 -sg31 -(dp246 -g33 -(dp247 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p248 -sg37 -(dp249 -g39 -g40 -sg41 -g42 -sg43 -S'30064360' -p250 -sg45 -S'GCGACGC' -p251 -sssg47 -(dp252 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p253 -sg37 -(dp254 -g39 -g40 -sg41 -g42 -sg43 -S'29668371' -p255 -sg45 -S'GCGACGC' -p256 -sssg53 -(dp257 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p258 -sg37 -(dp259 -g39 -g57 -sg41 -g42 -sg43 -S'30064360' -p260 -sg45 -S'GCGACGC' -p261 -sssg60 -(dp262 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p263 -sg37 -(dp264 -g39 -g57 -sg41 -g42 -sg43 -S'29668371' -p265 -sg45 -S'GCGACGC' -p266 -sssssS'NM_181829.2:c.801_802insCGACGC' -p267 -(dp268 -g3 -g9 -sg5 -(lp269 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p270 -aS'RefSeqGene record not available' -p271 -asg8 -g9 -sg10 -(lp272 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 6, mRNA -p273 -sg14 -S'NF2' -p274 -sg16 -(dp275 -g18 -S'NP_861967.1:p.(Arg269_Arg270dup)' -p276 -sg20 -S'NP_861967.1:p.(R269_R270dup)' -p277 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_181829.2:c.801_802insCGACGC' -p278 -sg29 -g9 -sg31 -(dp279 -g33 -(dp280 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p281 -sg37 -(dp282 -g39 -g40 -sg41 -g42 -sg43 -S'30064360' -p283 -sg45 -S'GCGACGC' -p284 -sssg47 -(dp285 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p286 -sg37 -(dp287 -g39 -g40 -sg41 -g42 -sg43 -S'29668371' -p288 -sg45 -S'GCGACGC' -p289 -sssg53 -(dp290 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p291 -sg37 -(dp292 -g39 -g57 -sg41 -g42 -sg43 -S'30064360' -p293 -sg45 -S'GCGACGC' -p294 -sssg60 -(dp295 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p296 -sg37 -(dp297 -g39 -g57 -sg41 -g42 -sg43 -S'29668371' -p298 -sg45 -S'GCGACGC' -p299 -sssssS'flag' -p300 -S'gene_variant' -p301 -sS'NR_156186.1:n.1483_1484insCGACGC' -p302 -(dp303 -g3 -g9 -sg5 -(lp304 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p305 -aS'RefSeqGene record not available' -p306 -asg8 -g9 -sg10 -(lp307 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 14, non-coding RNA -p308 -sg14 -S'NF2' -p309 -sg16 -(dp310 -g18 -S'Non-coding :n.' -p311 -sg20 -g9 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NR_156186.1:n.1483_1484insCGACGC' -p312 -sg29 -g9 -sg31 -(dp313 -g33 -(dp314 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p315 -sg37 -(dp316 -g39 -g40 -sg41 -g42 -sg43 -S'30064360' -p317 -sg45 -S'GCGACGC' -p318 -sssg53 -(dp319 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p320 -sg37 -(dp321 -g39 -g57 -sg41 -g42 -sg43 -S'30064360' -p322 -sg45 -S'GCGACGC' -p323 -sssssS'NM_181831.2:c.675_676insCGACGC' -p324 -(dp325 -g3 -g9 -sg5 -(lp326 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p327 -aS'RefSeqGene record not available' -p328 -asg8 -g9 -sg10 -(lp329 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 13, mRNA -p330 -sg14 -S'NF2' -p331 -sg16 -(dp332 -g18 -S'NP_861969.1:p.(Arg227_Arg228dup)' -p333 -sg20 -S'NP_861969.1:p.(R227_R228dup)' -p334 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_181831.2:c.675_676insCGACGC' -p335 -sg29 -g9 -sg31 -(dp336 -g33 -(dp337 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p338 -sg37 -(dp339 -g39 -g40 -sg41 -g42 -sg43 -S'30064360' -p340 -sg45 -S'GCGACGC' -p341 -sssg47 -(dp342 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p343 -sg37 -(dp344 -g39 -g40 -sg41 -g42 -sg43 -S'29668371' -p345 -sg45 -S'GCGACGC' -p346 -sssg53 -(dp347 -g35 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p348 -sg37 -(dp349 -g39 -g57 -sg41 -g42 -sg43 -S'30064360' -p350 -sg45 -S'GCGACGC' -p351 -sssg60 -(dp352 -g35 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p353 -sg37 -(dp354 -g39 -g57 -sg41 -g42 -sg43 -S'29668371' -p355 -sg45 -S'GCGACGC' -p356 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant279.txt b/VariantValidator/testing/testOutputsMasterITS/variant279.txt deleted file mode 100644 index 753ba2a1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant279.txt +++ /dev/null @@ -1,371 +0,0 @@ -(dp0 -S'NM_198156.2:c.341-3280_341-3271del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000003.11:g.10188187TGTCCCGATAG>T automapped to NC_000003.11:g.10188191_10188200del' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'VHL' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_937799.1:p.?' -p19 -sS'slr' -p20 -S'NP_937799.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'3-10188187-TGTCCCGATAG-T' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000003.11(NM_198156.2):c.341-3280_341-3271del' -p25 -sS'HGVS_LRG_variant' -p26 -g4 -sS'HGVS_transcript_variant' -p27 -S'NM_198156.2:c.341-3280_341-3271del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000003.11:g.10188191_10188200del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr3' -p39 -sS'ref' -p40 -S'TGTCCCGATAG' -p41 -sS'pos' -p42 -S'10188187' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000003.12:g.10146507_10146516del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'TGTCCCGATAG' -p50 -sg42 -S'10146503' -p51 -sg44 -g45 -sssS'GRCh37' -p52 -(dp53 -g34 -S'NC_000003.11:g.10188191_10188200del' -p54 -sg36 -(dp55 -g38 -S'3' -p56 -sg40 -S'TGTCCCGATAG' -p57 -sg42 -S'10188187' -p58 -sg44 -g45 -sssS'GRCh38' -p59 -(dp60 -g34 -S'NC_000003.12:g.10146507_10146516del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'TGTCCCGATAG' -p63 -sg42 -S'10146503' -p64 -sg44 -g45 -sssssS'flag' -p65 -S'gene_variant' -p66 -sS'NM_001354723.1:c.*18-3280_*18-3271del' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'NC_000003.11:g.10188187TGTCCCGATAG>T automapped to NC_000003.11:g.10188191_10188200del' -p70 -aS'RefSeqGene record not available' -p71 -asg9 -g4 -sg10 -(lp72 -sg12 -VHomo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 3, mRNA -p73 -sg14 -S'VHL' -p74 -sg16 -(dp75 -g18 -S'NP_001341652.1:p.?' -p76 -sg20 -S'NP_001341652.1:p.?' -p77 -ssg22 -g23 -sg24 -S'NC_000003.11(NM_001354723.1):c.*18-3280_*18-3271del' -p78 -sg26 -g4 -sg27 -S'NM_001354723.1:c.*18-3280_*18-3271del' -p79 -sg29 -g4 -sg30 -(dp80 -g32 -(dp81 -g34 -S'NC_000003.11:g.10188191_10188200del' -p82 -sg36 -(dp83 -g38 -g39 -sg40 -S'TGTCCCGATAG' -p84 -sg42 -S'10188187' -p85 -sg44 -g45 -sssg46 -(dp86 -g34 -S'NC_000003.12:g.10146507_10146516del' -p87 -sg36 -(dp88 -g38 -g39 -sg40 -S'TGTCCCGATAG' -p89 -sg42 -S'10146503' -p90 -sg44 -g45 -sssg52 -(dp91 -g34 -S'NC_000003.11:g.10188191_10188200del' -p92 -sg36 -(dp93 -g38 -g56 -sg40 -S'TGTCCCGATAG' -p94 -sg42 -S'10188187' -p95 -sg44 -g45 -sssg59 -(dp96 -g34 -S'NC_000003.12:g.10146507_10146516del' -p97 -sg36 -(dp98 -g38 -g56 -sg40 -S'TGTCCCGATAG' -p99 -sg42 -S'10146503' -p100 -sg44 -g45 -sssssS'NM_000551.3:c.341-7_343del' -p101 -(dp102 -g3 -S'LRG_322t1:c.341-7_343del' -p103 -sg5 -(lp104 -S'NC_000003.11:g.10188187TGTCCCGATAG>T automapped to NC_000003.11:g.10188191_10188200del' -p105 -asg9 -S'NG_008212.3(NM_000551.3):c.341-7_343del' -p106 -sg10 -(lp107 -sg12 -VHomo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 1, mRNA -p108 -sg14 -S'VHL' -p109 -sg16 -(dp110 -g18 -S'NP_000542.1(LRG_322p1):p.?' -p111 -sg20 -S'NP_000542.1:p.?' -p112 -ssg22 -g23 -sg24 -S'NC_000003.11(NM_000551.3):c.341-7_343del' -p113 -sg26 -S'LRG_322:g.9873_9882del' -p114 -sg27 -S'NM_000551.3:c.341-7_343del' -p115 -sg29 -S'NG_008212.3:g.9873_9882del' -p116 -sg30 -(dp117 -g32 -(dp118 -g34 -S'NC_000003.11:g.10188191_10188200del' -p119 -sg36 -(dp120 -g38 -g39 -sg40 -S'TGTCCCGATAG' -p121 -sg42 -S'10188187' -p122 -sg44 -g45 -sssg46 -(dp123 -g34 -S'NC_000003.12:g.10146507_10146516del' -p124 -sg36 -(dp125 -g38 -g39 -sg40 -S'TGTCCCGATAG' -p126 -sg42 -S'10146503' -p127 -sg44 -g45 -sssg52 -(dp128 -g34 -S'NC_000003.11:g.10188191_10188200del' -p129 -sg36 -(dp130 -g38 -g56 -sg40 -S'TGTCCCGATAG' -p131 -sg42 -S'10188187' -p132 -sg44 -g45 -sssg59 -(dp133 -g34 -S'NC_000003.12:g.10146507_10146516del' -p134 -sg36 -(dp135 -g38 -g56 -sg40 -S'TGTCCCGATAG' -p136 -sg42 -S'10146503' -p137 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant28.txt b/VariantValidator/testing/testOutputsMasterITS/variant28.txt deleted file mode 100644 index ec785300..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant28.txt +++ /dev/null @@ -1,58 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000014.8:g.36942492_36989536=' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'HGVS_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NR_138595.1:n.-810_1071+1=' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'HGVS_LRG_variant' -p20 -g4 -sS'HGVS_transcript_variant' -p21 -g4 -sS'HGVS_RefSeqGene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -ssS'flag' -p25 -S'warning' -p26 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant280.txt b/VariantValidator/testing/testOutputsMasterITS/variant280.txt deleted file mode 100644 index efbfd51f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant280.txt +++ /dev/null @@ -1,793 +0,0 @@ -(dp0 -S'NM_001005505.2:c.3408A>C' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'CACNA2D2' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001005505.1:p.(Gln1136His)' -p18 -sS'slr' -p19 -S'NP_001005505.1:p.(Q1136H)' -p20 -ssS'submitted_variant' -p21 -S'3-50402127-T-G' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001005505.2:c.3408A>C' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000003.11:g.50402127T>G' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr3' -p37 -sS'ref' -p38 -VT -p39 -sS'pos' -p40 -S'50402127' -p41 -sS'alt' -p42 -VG -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000003.12:g.50364696T>G' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'50364696' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000003.11:g.50402127T>G' -p51 -sg34 -(dp52 -g36 -S'3' -p53 -sg38 -g39 -sg40 -S'50402127' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000003.12:g.50364696T>G' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'50364696' -p59 -sg42 -g43 -sssssS'NM_006030.2:c.3402A>C' -p60 -(dp61 -g3 -g4 -sg5 -(lp62 -S'A more recent version of the selected reference sequence NM_006030.2 is available (NM_006030.3)' -p63 -aS'NM_006030.3:c.3402A>C MUST be fully validated prior to use in reports' -p64 -aS'select_variants=NM_006030.3:c.3402A>C' -p65 -aS'RefSeqGene record not available' -p66 -asg8 -g4 -sg9 -(lp67 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 2, mRNA -p68 -sg13 -S'CACNA2D2' -p69 -sg15 -(dp70 -g17 -S'NP_006021.2:p.(Gln1134His)' -p71 -sg19 -S'NP_006021.2:p.(Q1134H)' -p72 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_006030.2:c.3402A>C' -p73 -sg27 -g4 -sg28 -(dp74 -g30 -(dp75 -g32 -S'NC_000003.11:g.50402127T>G' -p76 -sg34 -(dp77 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p78 -sg42 -g43 -sssg49 -(dp79 -g32 -S'NC_000003.11:g.50402127T>G' -p80 -sg34 -(dp81 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p82 -sg42 -g43 -sssssS'NM_001174051.1:c.3423A>C' -p83 -(dp84 -g3 -g4 -sg5 -(lp85 -S'A more recent version of the selected reference sequence NM_001174051.1 is available (NM_001174051.2)' -p86 -aS'NM_001174051.2:c.3423A>C MUST be fully validated prior to use in reports' -p87 -aS'select_variants=NM_001174051.2:c.3423A>C' -p88 -aS'RefSeqGene record not available' -p89 -asg8 -g4 -sg9 -(lp90 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 3, mRNA -p91 -sg13 -S'CACNA2D2' -p92 -sg15 -(dp93 -g17 -S'NP_001167522.1:p.(Gln1141His)' -p94 -sg19 -S'NP_001167522.1:p.(Q1141H)' -p95 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001174051.1:c.3423A>C' -p96 -sg27 -g4 -sg28 -(dp97 -g30 -(dp98 -g32 -S'NC_000003.11:g.50402127T>G' -p99 -sg34 -(dp100 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p101 -sg42 -g43 -sssg49 -(dp102 -g32 -S'NC_000003.11:g.50402127T>G' -p103 -sg34 -(dp104 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p105 -sg42 -g43 -sssssS'NM_001174051.2:c.3423A>C' -p106 -(dp107 -g3 -g4 -sg5 -(lp108 -S'RefSeqGene record not available' -p109 -asg8 -g4 -sg9 -(lp110 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 3, mRNA -p111 -sg13 -S'CACNA2D2' -p112 -sg15 -(dp113 -g17 -S'NP_001167522.1:p.(Gln1141His)' -p114 -sg19 -S'NP_001167522.1:p.(Q1141H)' -p115 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001174051.2:c.3423A>C' -p116 -sg27 -g4 -sg28 -(dp117 -g30 -(dp118 -g32 -S'NC_000003.11:g.50402127T>G' -p119 -sg34 -(dp120 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p121 -sg42 -g43 -sssg44 -(dp122 -g32 -S'NC_000003.12:g.50364696T>G' -p123 -sg34 -(dp124 -g36 -g37 -sg38 -g39 -sg40 -S'50364696' -p125 -sg42 -g43 -sssg49 -(dp126 -g32 -S'NC_000003.11:g.50402127T>G' -p127 -sg34 -(dp128 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p129 -sg42 -g43 -sssg55 -(dp130 -g32 -S'NC_000003.12:g.50364696T>G' -p131 -sg34 -(dp132 -g36 -g53 -sg38 -g39 -sg40 -S'50364696' -p133 -sg42 -g43 -sssssS'NM_006030.3:c.3402A>C' -p134 -(dp135 -g3 -g4 -sg5 -(lp136 -S'RefSeqGene record not available' -p137 -asg8 -g4 -sg9 -(lp138 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 2, mRNA -p139 -sg13 -S'CACNA2D2' -p140 -sg15 -(dp141 -g17 -S'NP_006021.2:p.(Gln1134His)' -p142 -sg19 -S'NP_006021.2:p.(Q1134H)' -p143 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_006030.3:c.3402A>C' -p144 -sg27 -g4 -sg28 -(dp145 -g30 -(dp146 -g32 -S'NC_000003.11:g.50402127T>G' -p147 -sg34 -(dp148 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p149 -sg42 -g43 -sssg44 -(dp150 -g32 -S'NC_000003.12:g.50364696T>G' -p151 -sg34 -(dp152 -g36 -g37 -sg38 -g39 -sg40 -S'50364696' -p153 -sg42 -g43 -sssg49 -(dp154 -g32 -S'NC_000003.11:g.50402127T>G' -p155 -sg34 -(dp156 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p157 -sg42 -g43 -sssg55 -(dp158 -g32 -S'NC_000003.12:g.50364696T>G' -p159 -sg34 -(dp160 -g36 -g53 -sg38 -g39 -sg40 -S'50364696' -p161 -sg42 -g43 -sssssS'NM_001291101.1:c.3201A>C' -p162 -(dp163 -g3 -g4 -sg5 -(lp164 -S'RefSeqGene record not available' -p165 -asg8 -g4 -sg9 -(lp166 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 4, mRNA -p167 -sg13 -S'CACNA2D2' -p168 -sg15 -(dp169 -g17 -S'NP_001278030.1:p.(Gln1067His)' -p170 -sg19 -S'NP_001278030.1:p.(Q1067H)' -p171 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001291101.1:c.3201A>C' -p172 -sg27 -g4 -sg28 -(dp173 -g30 -(dp174 -g32 -S'NC_000003.11:g.50402127T>G' -p175 -sg34 -(dp176 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p177 -sg42 -g43 -sssg44 -(dp178 -g32 -S'NC_000003.12:g.50364696T>G' -p179 -sg34 -(dp180 -g36 -g37 -sg38 -g39 -sg40 -S'50364696' -p181 -sg42 -g43 -sssg49 -(dp182 -g32 -S'NC_000003.11:g.50402127T>G' -p183 -sg34 -(dp184 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p185 -sg42 -g43 -sssg55 -(dp186 -g32 -S'NC_000003.12:g.50364696T>G' -p187 -sg34 -(dp188 -g36 -g53 -sg38 -g39 -sg40 -S'50364696' -p189 -sg42 -g43 -sssssS'flag' -p190 -S'gene_variant' -p191 -sS'NR_111912.1:n.443-1601T>G' -p192 -(dp193 -g3 -g4 -sg5 -(lp194 -S'RefSeqGene record not available' -p195 -asg8 -g4 -sg9 -(lp196 -sg11 -VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 3, non-coding RNA -p197 -sg13 -S'CYB561D2' -p198 -sg15 -(dp199 -g17 -S'Non-coding :n.' -p200 -sg19 -g4 -ssg21 -g22 -sg23 -S'NC_000003.11(NR_111912.1):c.443-1601T>G' -p201 -sg24 -g4 -sg25 -S'NR_111912.1:n.443-1601T>G' -p202 -sg27 -g4 -sg28 -(dp203 -g30 -(dp204 -g32 -S'NC_000003.11:g.50402127T>G' -p205 -sg34 -(dp206 -g36 -g37 -sg38 -S'T' -p207 -sg40 -S'50402127' -p208 -sg42 -S'G' -p209 -sssg44 -(dp210 -g32 -S'NC_000003.12:g.50364696T>G' -p211 -sg34 -(dp212 -g36 -g37 -sg38 -g207 -sg40 -S'50364696' -p213 -sg42 -g209 -sssg49 -(dp214 -g32 -S'NC_000003.11:g.50402127T>G' -p215 -sg34 -(dp216 -g36 -g53 -sg38 -g207 -sg40 -S'50402127' -p217 -sg42 -g209 -sssg55 -(dp218 -g32 -S'NC_000003.12:g.50364696T>G' -p219 -sg34 -(dp220 -g36 -g53 -sg38 -g207 -sg40 -S'50364696' -p221 -sg42 -g209 -sssssS'NM_001005505.1:c.3408A>C' -p222 -(dp223 -g3 -g4 -sg5 -(lp224 -S'A more recent version of the selected reference sequence NM_001005505.1 is available (NM_001005505.2)' -p225 -aS'NM_001005505.2:c.3408A>C MUST be fully validated prior to use in reports' -p226 -aS'select_variants=NM_001005505.2:c.3408A>C' -p227 -aS'RefSeqGene record not available' -p228 -asg8 -g4 -sg9 -(lp229 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 1, mRNA -p230 -sg13 -S'CACNA2D2' -p231 -sg15 -(dp232 -g17 -S'NP_001005505.1:p.(Gln1136His)' -p233 -sg19 -S'NP_001005505.1:p.(Q1136H)' -p234 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001005505.1:c.3408A>C' -p235 -sg27 -g4 -sg28 -(dp236 -g30 -(dp237 -g32 -S'NC_000003.11:g.50402127T>G' -p238 -sg34 -(dp239 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p240 -sg42 -g43 -sssg49 -(dp241 -g32 -S'NC_000003.11:g.50402127T>G' -p242 -sg34 -(dp243 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p244 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant281.txt b/VariantValidator/testing/testOutputsMasterITS/variant281.txt deleted file mode 100644 index eebdca78..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant281.txt +++ /dev/null @@ -1,999 +0,0 @@ -(dp0 -S'NR_111913.1:n.126G>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 4, non-coding RNA -p12 -sS'gene_symbol' -p13 -S'CYB561D2' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'Non-coding :n.' -p18 -sS'slr' -p19 -g4 -ssS'submitted_variant' -p20 -S'3-50402890-G-A' -p21 -sS'genome_context_intronic_sequence' -p22 -g4 -sS'HGVS_LRG_variant' -p23 -g4 -sS'HGVS_transcript_variant' -p24 -S'NR_111913.1:n.126G>A' -p25 -sS'HGVS_RefSeqGene_variant' -p26 -g4 -sS'primary_assembly_loci' -p27 -(dp28 -S'hg19' -p29 -(dp30 -S'HGVS_genomic_description' -p31 -S'NC_000003.11:g.50402890G>A' -p32 -sS'vcf' -p33 -(dp34 -S'chr' -p35 -S'chr3' -p36 -sS'ref' -p37 -S'G' -p38 -sS'pos' -p39 -S'50402890' -p40 -sS'alt' -p41 -S'A' -p42 -sssS'hg38' -p43 -(dp44 -g31 -S'NC_000003.12:g.50365459G>A' -p45 -sg33 -(dp46 -g35 -g36 -sg37 -g38 -sg39 -S'50365459' -p47 -sg41 -g42 -sssS'GRCh37' -p48 -(dp49 -g31 -S'NC_000003.11:g.50402890G>A' -p50 -sg33 -(dp51 -g35 -S'3' -p52 -sg37 -g38 -sg39 -S'50402890' -p53 -sg41 -g42 -sssS'GRCh38' -p54 -(dp55 -g31 -S'NC_000003.12:g.50365459G>A' -p56 -sg33 -(dp57 -g35 -g52 -sg37 -g38 -sg39 -S'50365459' -p58 -sg41 -g42 -sssssS'NR_111912.1:n.443-838G>A' -p59 -(dp60 -g3 -g4 -sg5 -(lp61 -S'RefSeqGene record not available' -p62 -asg8 -g4 -sg9 -(lp63 -sg11 -VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 3, non-coding RNA -p64 -sg13 -S'CYB561D2' -p65 -sg15 -(dp66 -g17 -S'Non-coding :n.' -p67 -sg19 -g4 -ssg20 -g21 -sg22 -S'NC_000003.11(NR_111912.1):c.443-838G>A' -p68 -sg23 -g4 -sg24 -S'NR_111912.1:n.443-838G>A' -p69 -sg26 -g4 -sg27 -(dp70 -g29 -(dp71 -g31 -S'NC_000003.11:g.50402890G>A' -p72 -sg33 -(dp73 -g35 -g36 -sg37 -g38 -sg39 -S'50402890' -p74 -sg41 -g42 -sssg43 -(dp75 -g31 -S'NC_000003.12:g.50365459G>A' -p76 -sg33 -(dp77 -g35 -g36 -sg37 -g38 -sg39 -S'50365459' -p78 -sg41 -g42 -sssg48 -(dp79 -g31 -S'NC_000003.11:g.50402890G>A' -p80 -sg33 -(dp81 -g35 -g52 -sg37 -g38 -sg39 -S'50402890' -p82 -sg41 -g42 -sssg54 -(dp83 -g31 -S'NC_000003.12:g.50365459G>A' -p84 -sg33 -(dp85 -g35 -g52 -sg37 -g38 -sg39 -S'50365459' -p86 -sg41 -g42 -sssssS'NM_001291101.1:c.2788C>T' -p87 -(dp88 -g3 -g4 -sg5 -(lp89 -S'RefSeqGene record not available' -p90 -asg8 -g4 -sg9 -(lp91 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 4, mRNA -p92 -sg13 -S'CACNA2D2' -p93 -sg15 -(dp94 -g17 -S'NP_001278030.1:p.(Pro930Ser)' -p95 -sg19 -S'NP_001278030.1:p.(P930S)' -p96 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001291101.1:c.2788C>T' -p97 -sg26 -g4 -sg27 -(dp98 -g29 -(dp99 -g31 -S'NC_000003.11:g.50402890G>A' -p100 -sg33 -(dp101 -g35 -g36 -sg37 -VG -p102 -sg39 -S'50402890' -p103 -sg41 -VA -p104 -sssg43 -(dp105 -g31 -S'NC_000003.12:g.50365459G>A' -p106 -sg33 -(dp107 -g35 -g36 -sg37 -g102 -sg39 -S'50365459' -p108 -sg41 -g104 -sssg48 -(dp109 -g31 -S'NC_000003.11:g.50402890G>A' -p110 -sg33 -(dp111 -g35 -g52 -sg37 -g102 -sg39 -S'50402890' -p112 -sg41 -g104 -sssg54 -(dp113 -g31 -S'NC_000003.12:g.50365459G>A' -p114 -sg33 -(dp115 -g35 -g52 -sg37 -g102 -sg39 -S'50365459' -p116 -sg41 -g104 -sssssS'NM_006030.2:c.2995C>T' -p117 -(dp118 -g3 -g4 -sg5 -(lp119 -S'A more recent version of the selected reference sequence NM_006030.2 is available (NM_006030.3)' -p120 -aS'NM_006030.3:c.2995C>T MUST be fully validated prior to use in reports' -p121 -aS'select_variants=NM_006030.3:c.2995C>T' -p122 -aS'RefSeqGene record not available' -p123 -asg8 -g4 -sg9 -(lp124 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 2, mRNA -p125 -sg13 -S'CACNA2D2' -p126 -sg15 -(dp127 -g17 -S'NP_006021.2:p.(Pro999Ser)' -p128 -sg19 -S'NP_006021.2:p.(P999S)' -p129 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_006030.2:c.2995C>T' -p130 -sg26 -g4 -sg27 -(dp131 -g29 -(dp132 -g31 -S'NC_000003.11:g.50402890G>A' -p133 -sg33 -(dp134 -g35 -g36 -sg37 -g102 -sg39 -S'50402890' -p135 -sg41 -g104 -sssg48 -(dp136 -g31 -S'NC_000003.11:g.50402890G>A' -p137 -sg33 -(dp138 -g35 -g52 -sg37 -g102 -sg39 -S'50402890' -p139 -sg41 -g104 -sssssS'NR_111914.1:n.126G>A' -p140 -(dp141 -g3 -g4 -sg5 -(lp142 -S'RefSeqGene record not available' -p143 -asg8 -g4 -sg9 -(lp144 -sg11 -VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 5, non-coding RNA -p145 -sg13 -S'CYB561D2' -p146 -sg15 -(dp147 -g17 -S'Non-coding :n.' -p148 -sg19 -g4 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NR_111914.1:n.126G>A' -p149 -sg26 -g4 -sg27 -(dp150 -g29 -(dp151 -g31 -S'NC_000003.11:g.50402890G>A' -p152 -sg33 -(dp153 -g35 -g36 -sg37 -g38 -sg39 -S'50402890' -p154 -sg41 -g42 -sssg43 -(dp155 -g31 -S'NC_000003.12:g.50365459G>A' -p156 -sg33 -(dp157 -g35 -g36 -sg37 -g38 -sg39 -S'50365459' -p158 -sg41 -g42 -sssg48 -(dp159 -g31 -S'NC_000003.11:g.50402890G>A' -p160 -sg33 -(dp161 -g35 -g52 -sg37 -g38 -sg39 -S'50402890' -p162 -sg41 -g42 -sssg54 -(dp163 -g31 -S'NC_000003.12:g.50365459G>A' -p164 -sg33 -(dp165 -g35 -g52 -sg37 -g38 -sg39 -S'50365459' -p166 -sg41 -g42 -sssssS'NM_001005505.2:c.2995C>T' -p167 -(dp168 -g3 -g4 -sg5 -(lp169 -S'RefSeqGene record not available' -p170 -asg8 -g4 -sg9 -(lp171 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 1, mRNA -p172 -sg13 -S'CACNA2D2' -p173 -sg15 -(dp174 -g17 -S'NP_001005505.1:p.(Pro999Ser)' -p175 -sg19 -S'NP_001005505.1:p.(P999S)' -p176 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001005505.2:c.2995C>T' -p177 -sg26 -g4 -sg27 -(dp178 -g29 -(dp179 -g31 -S'NC_000003.11:g.50402890G>A' -p180 -sg33 -(dp181 -g35 -g36 -sg37 -g102 -sg39 -S'50402890' -p182 -sg41 -g104 -sssg43 -(dp183 -g31 -S'NC_000003.12:g.50365459G>A' -p184 -sg33 -(dp185 -g35 -g36 -sg37 -g102 -sg39 -S'50365459' -p186 -sg41 -g104 -sssg48 -(dp187 -g31 -S'NC_000003.11:g.50402890G>A' -p188 -sg33 -(dp189 -g35 -g52 -sg37 -g102 -sg39 -S'50402890' -p190 -sg41 -g104 -sssg54 -(dp191 -g31 -S'NC_000003.12:g.50365459G>A' -p192 -sg33 -(dp193 -g35 -g52 -sg37 -g102 -sg39 -S'50365459' -p194 -sg41 -g104 -sssssS'flag' -p195 -S'gene_variant' -p196 -sS'NM_001174051.1:c.3016C>T' -p197 -(dp198 -g3 -g4 -sg5 -(lp199 -S'A more recent version of the selected reference sequence NM_001174051.1 is available (NM_001174051.2)' -p200 -aS'NM_001174051.2:c.3016C>T MUST be fully validated prior to use in reports' -p201 -aS'select_variants=NM_001174051.2:c.3016C>T' -p202 -aS'RefSeqGene record not available' -p203 -asg8 -g4 -sg9 -(lp204 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 3, mRNA -p205 -sg13 -S'CACNA2D2' -p206 -sg15 -(dp207 -g17 -S'NP_001167522.1:p.(Pro1006Ser)' -p208 -sg19 -S'NP_001167522.1:p.(P1006S)' -p209 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001174051.1:c.3016C>T' -p210 -sg26 -g4 -sg27 -(dp211 -g29 -(dp212 -g31 -S'NC_000003.11:g.50402890G>A' -p213 -sg33 -(dp214 -g35 -g36 -sg37 -g102 -sg39 -S'50402890' -p215 -sg41 -g104 -sssg48 -(dp216 -g31 -S'NC_000003.11:g.50402890G>A' -p217 -sg33 -(dp218 -g35 -g52 -sg37 -g102 -sg39 -S'50402890' -p219 -sg41 -g104 -sssssS'NM_001174051.2:c.3016C>T' -p220 -(dp221 -g3 -g4 -sg5 -(lp222 -S'RefSeqGene record not available' -p223 -asg8 -g4 -sg9 -(lp224 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 3, mRNA -p225 -sg13 -S'CACNA2D2' -p226 -sg15 -(dp227 -g17 -S'NP_001167522.1:p.(Pro1006Ser)' -p228 -sg19 -S'NP_001167522.1:p.(P1006S)' -p229 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001174051.2:c.3016C>T' -p230 -sg26 -g4 -sg27 -(dp231 -g29 -(dp232 -g31 -S'NC_000003.11:g.50402890G>A' -p233 -sg33 -(dp234 -g35 -g36 -sg37 -g102 -sg39 -S'50402890' -p235 -sg41 -g104 -sssg43 -(dp236 -g31 -S'NC_000003.12:g.50365459G>A' -p237 -sg33 -(dp238 -g35 -g36 -sg37 -g102 -sg39 -S'50365459' -p239 -sg41 -g104 -sssg48 -(dp240 -g31 -S'NC_000003.11:g.50402890G>A' -p241 -sg33 -(dp242 -g35 -g52 -sg37 -g102 -sg39 -S'50402890' -p243 -sg41 -g104 -sssg54 -(dp244 -g31 -S'NC_000003.12:g.50365459G>A' -p245 -sg33 -(dp246 -g35 -g52 -sg37 -g102 -sg39 -S'50365459' -p247 -sg41 -g104 -sssssS'NM_006030.3:c.2995C>T' -p248 -(dp249 -g3 -g4 -sg5 -(lp250 -S'RefSeqGene record not available' -p251 -asg8 -g4 -sg9 -(lp252 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 2, mRNA -p253 -sg13 -S'CACNA2D2' -p254 -sg15 -(dp255 -g17 -S'NP_006021.2:p.(Pro999Ser)' -p256 -sg19 -S'NP_006021.2:p.(P999S)' -p257 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_006030.3:c.2995C>T' -p258 -sg26 -g4 -sg27 -(dp259 -g29 -(dp260 -g31 -S'NC_000003.11:g.50402890G>A' -p261 -sg33 -(dp262 -g35 -g36 -sg37 -g102 -sg39 -S'50402890' -p263 -sg41 -g104 -sssg43 -(dp264 -g31 -S'NC_000003.12:g.50365459G>A' -p265 -sg33 -(dp266 -g35 -g36 -sg37 -g102 -sg39 -S'50365459' -p267 -sg41 -g104 -sssg48 -(dp268 -g31 -S'NC_000003.11:g.50402890G>A' -p269 -sg33 -(dp270 -g35 -g52 -sg37 -g102 -sg39 -S'50402890' -p271 -sg41 -g104 -sssg54 -(dp272 -g31 -S'NC_000003.12:g.50365459G>A' -p273 -sg33 -(dp274 -g35 -g52 -sg37 -g102 -sg39 -S'50365459' -p275 -sg41 -g104 -sssssS'NM_001005505.1:c.2995C>T' -p276 -(dp277 -g3 -g4 -sg5 -(lp278 -S'A more recent version of the selected reference sequence NM_001005505.1 is available (NM_001005505.2)' -p279 -aS'NM_001005505.2:c.2995C>T MUST be fully validated prior to use in reports' -p280 -aS'select_variants=NM_001005505.2:c.2995C>T' -p281 -aS'RefSeqGene record not available' -p282 -asg8 -g4 -sg9 -(lp283 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 1, mRNA -p284 -sg13 -S'CACNA2D2' -p285 -sg15 -(dp286 -g17 -S'NP_001005505.1:p.(Pro999Ser)' -p287 -sg19 -S'NP_001005505.1:p.(P999S)' -p288 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001005505.1:c.2995C>T' -p289 -sg26 -g4 -sg27 -(dp290 -g29 -(dp291 -g31 -S'NC_000003.11:g.50402890G>A' -p292 -sg33 -(dp293 -g35 -g36 -sg37 -g102 -sg39 -S'50402890' -p294 -sg41 -g104 -sssg48 -(dp295 -g31 -S'NC_000003.11:g.50402890G>A' -p296 -sg33 -(dp297 -g35 -g52 -sg37 -g102 -sg39 -S'50402890' -p298 -sg41 -g104 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant282.txt b/VariantValidator/testing/testOutputsMasterITS/variant282.txt deleted file mode 100644 index 705d39bd..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant282.txt +++ /dev/null @@ -1,563 +0,0 @@ -(dp0 -S'' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p7 -aS'Unable to assign transcript identity records to NM_001304420.1, potentially an obsolete record :' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'3-57851007-AG-A' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'NM_001304420.2:c.1186+424del' -p26 -(dp27 -g3 -g4 -sg5 -(lp28 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p29 -aS'RefSeqGene record not available' -p30 -asg9 -g4 -sg10 -(lp31 -sg12 -VHomo sapiens sarcolemma associated protein (SLMAP), transcript variant 1, mRNA -p32 -sg13 -S'SLMAP' -p33 -sg14 -(dp34 -g16 -S'NP_001291349.1:p.?' -p35 -sg17 -S'NP_001291349.1:p.?' -p36 -ssg18 -g19 -sg20 -S'NC_000003.11(NM_001304420.2):c.1186+424del' -p37 -sg21 -g4 -sg22 -S'NM_001304420.2:c.1186+424del' -p38 -sg23 -g4 -sg24 -(dp39 -S'hg19' -p40 -(dp41 -S'HGVS_genomic_description' -p42 -S'NC_000003.11:g.57851008del' -p43 -sS'vcf' -p44 -(dp45 -S'chr' -p46 -S'chr3' -p47 -sS'ref' -p48 -S'AG' -p49 -sS'pos' -p50 -S'57851007' -p51 -sS'alt' -p52 -S'A' -p53 -sssS'hg38' -p54 -(dp55 -g42 -S'NC_000003.12:g.57865281del' -p56 -sg44 -(dp57 -g46 -g47 -sg48 -S'AG' -p58 -sg50 -S'57865280' -p59 -sg52 -g53 -sssS'GRCh37' -p60 -(dp61 -g42 -S'NC_000003.11:g.57851008del' -p62 -sg44 -(dp63 -g46 -S'3' -p64 -sg48 -S'AG' -p65 -sg50 -S'57851007' -p66 -sg52 -g53 -sssS'GRCh38' -p67 -(dp68 -g42 -S'NC_000003.12:g.57865281del' -p69 -sg44 -(dp70 -g46 -g64 -sg48 -S'AG' -p71 -sg50 -S'57865280' -p72 -sg52 -g53 -sssssS' ' -p73 -(dp74 -g3 -g4 -sg5 -(lp75 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p76 -aS'Unable to assign transcript identity records to NM_001304421.1, potentially an obsolete record :' -p77 -asg9 -g4 -sg10 -(lp78 -sg12 -g4 -sg13 -g4 -sg14 -(dp79 -g16 -g4 -sg17 -g4 -ssg18 -g19 -sg20 -g4 -sg21 -g4 -sg22 -g4 -sg23 -g4 -sg24 -(dp80 -ssS' ' -p81 -(dp82 -g3 -g4 -sg5 -(lp83 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p84 -aS'Unable to assign transcript identity records to NM_007159.3, potentially an obsolete record :' -p85 -asg9 -g4 -sg10 -(lp86 -sg12 -g4 -sg13 -g4 -sg14 -(dp87 -g16 -g4 -sg17 -g4 -ssg18 -g19 -sg20 -g4 -sg21 -g4 -sg22 -g4 -sg23 -g4 -sg24 -(dp88 -ssS'NM_007159.4:c.1135+565del' -p89 -(dp90 -g3 -g4 -sg5 -(lp91 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p92 -aS'RefSeqGene record not available' -p93 -asg9 -g4 -sg10 -(lp94 -sg12 -VHomo sapiens sarcolemma associated protein (SLMAP), transcript variant 2, mRNA -p95 -sg13 -S'SLMAP' -p96 -sg14 -(dp97 -g16 -S'NP_009090.2:p.?' -p98 -sg17 -S'NP_009090.2:p.?' -p99 -ssg18 -g19 -sg20 -S'NC_000003.11(NM_007159.4):c.1135+565del' -p100 -sg21 -g4 -sg22 -S'NM_007159.4:c.1135+565del' -p101 -sg23 -g4 -sg24 -(dp102 -g40 -(dp103 -g42 -S'NC_000003.11:g.57851008del' -p104 -sg44 -(dp105 -g46 -g47 -sg48 -S'AG' -p106 -sg50 -S'57851007' -p107 -sg52 -g53 -sssg54 -(dp108 -g42 -S'NC_000003.12:g.57865281del' -p109 -sg44 -(dp110 -g46 -g47 -sg48 -S'AG' -p111 -sg50 -S'57865280' -p112 -sg52 -g53 -sssg60 -(dp113 -g42 -S'NC_000003.11:g.57851008del' -p114 -sg44 -(dp115 -g46 -g64 -sg48 -S'AG' -p116 -sg50 -S'57851007' -p117 -sg52 -g53 -sssg67 -(dp118 -g42 -S'NC_000003.12:g.57865281del' -p119 -sg44 -(dp120 -g46 -g64 -sg48 -S'AG' -p121 -sg50 -S'57865280' -p122 -sg52 -g53 -sssssS'NM_001304421.2:c.1135+565del' -p123 -(dp124 -g3 -g4 -sg5 -(lp125 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p126 -aS'RefSeqGene record not available' -p127 -asg9 -g4 -sg10 -(lp128 -sg12 -VHomo sapiens sarcolemma associated protein (SLMAP), transcript variant 3, mRNA -p129 -sg13 -S'SLMAP' -p130 -sg14 -(dp131 -g16 -S'NP_001291350.1:p.?' -p132 -sg17 -S'NP_001291350.1:p.?' -p133 -ssg18 -g19 -sg20 -S'NC_000003.11(NM_001304421.2):c.1135+565del' -p134 -sg21 -g4 -sg22 -S'NM_001304421.2:c.1135+565del' -p135 -sg23 -g4 -sg24 -(dp136 -g40 -(dp137 -g42 -S'NC_000003.11:g.57851008del' -p138 -sg44 -(dp139 -g46 -g47 -sg48 -S'AG' -p140 -sg50 -S'57851007' -p141 -sg52 -g53 -sssg54 -(dp142 -g42 -S'NC_000003.12:g.57865281del' -p143 -sg44 -(dp144 -g46 -g47 -sg48 -S'AG' -p145 -sg50 -S'57865280' -p146 -sg52 -g53 -sssg60 -(dp147 -g42 -S'NC_000003.11:g.57851008del' -p148 -sg44 -(dp149 -g46 -g64 -sg48 -S'AG' -p150 -sg50 -S'57851007' -p151 -sg52 -g53 -sssg67 -(dp152 -g42 -S'NC_000003.12:g.57865281del' -p153 -sg44 -(dp154 -g46 -g64 -sg48 -S'AG' -p155 -sg50 -S'57865280' -p156 -sg52 -g53 -sssssS'flag' -p157 -S'gene_variant' -p158 -sS'NM_007159.2:c.1135+565del' -p159 -(dp160 -g3 -g4 -sg5 -(lp161 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p162 -aS'A more recent version of the selected reference sequence NM_007159.2 is available (NM_007159.4)' -p163 -aS'NM_007159.4:c.1135+565delG MUST be fully validated prior to use in reports' -p164 -aS'select_variants=NM_007159.4:c.1135+565del' -p165 -aS'RefSeqGene record not available' -p166 -asg9 -g4 -sg10 -(lp167 -sg12 -VHomo sapiens sarcolemma associated protein (SLMAP), mRNA -p168 -sg13 -S'SLMAP' -p169 -sg14 -(dp170 -g16 -S'NP_009090.2:p.?' -p171 -sg17 -S'NP_009090.2:p.?' -p172 -ssg18 -g19 -sg20 -S'NC_000003.11(NM_007159.2):c.1135+565del' -p173 -sg21 -g4 -sg22 -S'NM_007159.2:c.1135+565del' -p174 -sg23 -g4 -sg24 -(dp175 -g40 -(dp176 -g42 -S'NC_000003.11:g.57851008del' -p177 -sg44 -(dp178 -g46 -g47 -sg48 -S'AG' -p179 -sg50 -S'57851007' -p180 -sg52 -g53 -sssg60 -(dp181 -g42 -S'NC_000003.11:g.57851008del' -p182 -sg44 -(dp183 -g46 -g64 -sg48 -S'AG' -p184 -sg50 -S'57851007' -p185 -sg52 -g53 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant283.txt b/VariantValidator/testing/testOutputsMasterITS/variant283.txt deleted file mode 100644 index 8e329f71..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant283.txt +++ /dev/null @@ -1,244 +0,0 @@ -(dp0 -S'NM_001178065.1:c.3061C=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens calcium sensing receptor (CASR), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'CASR' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001171536.1:p.(Gln1021=)' -p18 -sS'slr' -p19 -S'NP_001171536.1:p.(Q1021=)' -p20 -ssS'submitted_variant' -p21 -S'3-122003832-G-C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001178065.1:c.3061C=' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000003.11:g.122003832G>C' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr3' -p37 -sS'ref' -p38 -S'G' -p39 -sS'pos' -p40 -S'122003832' -p41 -sS'alt' -p42 -S'C' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000003.12:g.122284985G>C' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'122284985' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000003.11:g.122003832G>C' -p51 -sg34 -(dp52 -g36 -S'3' -p53 -sg38 -g39 -sg40 -S'122003832' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000003.12:g.122284985G>C' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'122284985' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -sS'NM_000388.3:c.3031C=' -p62 -(dp63 -g3 -g4 -sg5 -(lp64 -sg8 -g4 -sg9 -(lp65 -sg11 -VHomo sapiens calcium sensing receptor (CASR), transcript variant 2, mRNA -p66 -sg13 -S'CASR' -p67 -sg15 -(dp68 -g17 -S'NP_000379.2:p.(Gln1011=)' -p69 -sg19 -S'NP_000379.2:p.(Q1011=)' -p70 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_000388.3:c.3031C=' -p71 -sg27 -S'NG_009058.1:g.106303C=' -p72 -sg28 -(dp73 -g30 -(dp74 -g32 -S'NC_000003.11:g.122003832G>C' -p75 -sg34 -(dp76 -g36 -g37 -sg38 -g39 -sg40 -S'122003832' -p77 -sg42 -g43 -sssg44 -(dp78 -g32 -S'NC_000003.12:g.122284985G>C' -p79 -sg34 -(dp80 -g36 -g37 -sg38 -g39 -sg40 -S'122284985' -p81 -sg42 -g43 -sssg49 -(dp82 -g32 -S'NC_000003.11:g.122003832G>C' -p83 -sg34 -(dp84 -g36 -g53 -sg38 -g39 -sg40 -S'122003832' -p85 -sg42 -g43 -sssg55 -(dp86 -g32 -S'NC_000003.12:g.122284985G>C' -p87 -sg34 -(dp88 -g36 -g53 -sg38 -g39 -sg40 -S'122284985' -p89 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant284.txt b/VariantValidator/testing/testOutputsMasterITS/variant284.txt deleted file mode 100644 index c0df247c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant284.txt +++ /dev/null @@ -1,448 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001257069.1:c.45_46insCCT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 4, mRNA -p15 -sS'gene_symbol' -p16 -S'FBXW7' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_001243998.1:p.(Thr15_Gly16insPro)' -p21 -sS'slr' -p22 -S'NP_001243998.1:p.(T15_G16insP)' -p23 -ssS'submitted_variant' -p24 -S'4-153332910-C-CAGG' -p25 -sS'genome_context_intronic_sequence' -p26 -g6 -sS'HGVS_LRG_variant' -p27 -g6 -sS'HGVS_transcript_variant' -p28 -S'NM_001257069.1:c.45_46insCCT' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000004.11:g.153332910_153332911insAGG' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr4' -p40 -sS'ref' -p41 -S'C' -p42 -sS'pos' -p43 -S'153332910' -p44 -sS'alt' -p45 -VCAGG -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000004.12:g.152411758_152411759insAGG' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'152411758' -p51 -sg45 -VCAGG -p52 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000004.11:g.153332910_153332911insAGG' -p55 -sg37 -(dp56 -g39 -S'4' -p57 -sg41 -g42 -sg43 -S'153332910' -p58 -sg45 -VCAGG -p59 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000004.12:g.152411758_152411759insAGG' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -g42 -sg43 -S'152411758' -p64 -sg45 -VCAGG -p65 -sssssS'NM_001349798.1:c.45_46insCCT' -p66 -(dp67 -g5 -g6 -sg7 -(lp68 -S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' -p69 -aS'A more recent version of the selected reference sequence NM_001349798.1 is available (NM_001349798.2)' -p70 -aS'NM_001349798.2:c.45_46insCCT MUST be fully validated prior to use in reports' -p71 -aS'select_variants=NM_001349798.2:c.45_46insCCT' -p72 -aS'RefSeqGene record not available' -p73 -asg11 -g6 -sg12 -(lp74 -sg14 -VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 5, mRNA -p75 -sg16 -S'FBXW7' -p76 -sg18 -(dp77 -g20 -S'NP_361014.1:p.(Thr15_Gly16insPro)' -p78 -sg22 -S'NP_361014.1:p.(T15_G16insP)' -p79 -ssg24 -g25 -sg26 -g6 -sg27 -g6 -sg28 -S'NM_001349798.1:c.45_46insCCT' -p80 -sg30 -g6 -sg31 -(dp81 -g33 -(dp82 -g35 -S'NC_000004.11:g.153332910_153332911insAGG' -p83 -sg37 -(dp84 -g39 -g40 -sg41 -g42 -sg43 -S'153332910' -p85 -sg45 -VCAGG -p86 -sssg47 -(dp87 -g35 -S'NC_000004.12:g.152411758_152411759insAGG' -p88 -sg37 -(dp89 -g39 -g40 -sg41 -g42 -sg43 -S'152411758' -p90 -sg45 -VCAGG -p91 -sssg53 -(dp92 -g35 -S'NC_000004.11:g.153332910_153332911insAGG' -p93 -sg37 -(dp94 -g39 -g57 -sg41 -g42 -sg43 -S'153332910' -p95 -sg45 -VCAGG -p96 -sssg60 -(dp97 -g35 -S'NC_000004.12:g.152411758_152411759insAGG' -p98 -sg37 -(dp99 -g39 -g57 -sg41 -g42 -sg43 -S'152411758' -p100 -sg45 -VCAGG -p101 -sssssS'NM_001349798.2:c.45_46insCCT' -p102 -(dp103 -g5 -S'LRG_1141t1:c.45_46insCCT' -p104 -sg7 -(lp105 -S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' -p106 -aS'RefSeqGene record not available' -p107 -asg11 -g6 -sg12 -(lp108 -sg14 -VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 5, mRNA -p109 -sg16 -S'FBXW7' -p110 -sg18 -(dp111 -g20 -S'NP_001336727.1:p.(Thr15_Gly16insPro)' -p112 -sg22 -S'NP_001336727.1:p.(T15_G16insP)' -p113 -ssg24 -g25 -sg26 -g6 -sg27 -g6 -sg28 -S'NM_001349798.2:c.45_46insCCT' -p114 -sg30 -g6 -sg31 -(dp115 -g33 -(dp116 -g35 -S'NC_000004.11:g.153332910_153332911insAGG' -p117 -sg37 -(dp118 -g39 -g40 -sg41 -g42 -sg43 -S'153332910' -p119 -sg45 -VCAGG -p120 -sssg53 -(dp121 -g35 -S'NC_000004.11:g.153332910_153332911insAGG' -p122 -sg37 -(dp123 -g39 -g57 -sg41 -g42 -sg43 -S'153332910' -p124 -sg45 -VCAGG -p125 -sssssS'NM_033632.3:c.45_46insCCT' -p126 -(dp127 -g5 -g6 -sg7 -(lp128 -S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' -p129 -asg11 -g6 -sg12 -(lp130 -sg14 -VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 1, mRNA -p131 -sg16 -S'FBXW7' -p132 -sg18 -(dp133 -g20 -S'NP_361014.1:p.(Thr15_Gly16insPro)' -p134 -sg22 -S'NP_361014.1:p.(T15_G16insP)' -p135 -ssg24 -g25 -sg26 -g6 -sg27 -g6 -sg28 -S'NM_033632.3:c.45_46insCCT' -p136 -sg30 -S'NG_029466.1:g.128262_128263insCCT' -p137 -sg31 -(dp138 -g33 -(dp139 -g35 -S'NC_000004.11:g.153332910_153332911insAGG' -p140 -sg37 -(dp141 -g39 -g40 -sg41 -g42 -sg43 -S'153332910' -p142 -sg45 -VCAGG -p143 -sssg47 -(dp144 -g35 -S'NC_000004.12:g.152411758_152411759insAGG' -p145 -sg37 -(dp146 -g39 -g40 -sg41 -g42 -sg43 -S'152411758' -p147 -sg45 -VCAGG -p148 -sssg53 -(dp149 -g35 -S'NC_000004.11:g.153332910_153332911insAGG' -p150 -sg37 -(dp151 -g39 -g57 -sg41 -g42 -sg43 -S'153332910' -p152 -sg45 -VCAGG -p153 -sssg60 -(dp154 -g35 -S'NC_000004.12:g.152411758_152411759insAGG' -p155 -sg37 -(dp156 -g39 -g57 -sg41 -g42 -sg43 -S'152411758' -p157 -sg45 -VCAGG -p158 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant285.txt b/VariantValidator/testing/testOutputsMasterITS/variant285.txt deleted file mode 100644 index ef07e19c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant285.txt +++ /dev/null @@ -1,138 +0,0 @@ -(dp0 -S'flag' -p1 -S'intergenic' -p2 -sS'Intergenic_Variant_1' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'No transcripts found that fully overlap the described variation in the genomic sequence' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -g6 -sS'gene_symbol' -p14 -g6 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -g6 -sS'slr' -p18 -g6 -ssS'submitted_variant' -p19 -S'5-1295183-G-A' -p20 -sS'genome_context_intronic_sequence' -p21 -g6 -sS'HGVS_LRG_variant' -p22 -S'LRG_343:g.4980C>T' -p23 -sS'HGVS_transcript_variant' -p24 -g6 -sS'HGVS_RefSeqGene_variant' -p25 -S'NG_009265.1:g.4980C>T' -p26 -sS'primary_assembly_loci' -p27 -(dp28 -S'hg19' -p29 -(dp30 -S'HGVS_genomic_description' -p31 -S'NC_000005.9:g.1295183G>A' -p32 -sS'vcf' -p33 -(dp34 -S'chr' -p35 -S'chr5' -p36 -sS'ref' -p37 -S'G' -p38 -sS'pos' -p39 -S'1295183' -p40 -sS'alt' -p41 -S'A' -p42 -sssS'hg38' -p43 -(dp44 -g31 -S'NC_000005.10:g.1295068G>A' -p45 -sg33 -(dp46 -g35 -g36 -sg37 -g38 -sg39 -S'1295068' -p47 -sg41 -g42 -sssS'GRCh37' -p48 -(dp49 -g31 -S'NC_000005.9:g.1295183G>A' -p50 -sg33 -(dp51 -g35 -S'5' -p52 -sg37 -g38 -sg39 -S'1295183' -p53 -sg41 -g42 -sssS'GRCh38' -p54 -(dp55 -g31 -S'NC_000005.10:g.1295068G>A' -p56 -sg33 -(dp57 -g35 -g52 -sg37 -g38 -sg39 -S'1295068' -p58 -sg41 -g42 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant286.txt b/VariantValidator/testing/testOutputsMasterITS/variant286.txt deleted file mode 100644 index 842fd174..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant286.txt +++ /dev/null @@ -1,339 +0,0 @@ -(dp0 -S'NM_003664.4:c.2409_2411del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_170t1:c.2409_2411del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000005.9:g.77396835TTTC>T automapped to NC_000005.9:g.77396838_77396840delCTT' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -S'' -p9 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens adaptor related protein complex 3 subunit beta 1 (AP3B1), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'AP3B1' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_003655.3(LRG_170p1):p.(Lys804del)' -p19 -sS'slr' -p20 -S'NP_003655.3:p.(K804del)' -p21 -ssS'submitted_variant' -p22 -S'5-77396835-TTTC-T' -p23 -sS'genome_context_intronic_sequence' -p24 -g9 -sS'HGVS_LRG_variant' -p25 -S'LRG_170:g.198691_198693del' -p26 -sS'HGVS_transcript_variant' -p27 -S'NM_003664.4:c.2409_2411del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -S'NG_007268.1:g.198691_198693del' -p30 -sS'primary_assembly_loci' -p31 -(dp32 -S'GRCh38' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000005.10:g.78101012_78101014del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'5' -p40 -sS'ref' -p41 -S'TTTC' -p42 -sS'pos' -p43 -S'78101011' -p44 -sS'alt' -p45 -S'T' -p46 -sssS'GRCh37' -p47 -(dp48 -g35 -S'NC_000005.9:g.77396836_77396838del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'TTTC' -p51 -sg43 -S'77396835' -p52 -sg45 -g46 -sssS'hg38' -p53 -(dp54 -g35 -S'NC_000005.10:g.78101012_78101014del' -p55 -sg37 -(dp56 -g39 -S'chr5' -p57 -sg41 -S'TTTC' -p58 -sg43 -S'78101011' -p59 -sg45 -g46 -sssS'hg19' -p60 -(dp61 -g35 -S'NC_000005.9:g.77396836_77396838del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'TTTC' -p64 -sg43 -S'77396835' -p65 -sg45 -g46 -sssssS'flag' -p66 -S'gene_variant' -p67 -sS'NM_003664.3:c.2409_2411del' -p68 -(dp69 -g3 -g9 -sg5 -(lp70 -S'NC_000005.9:g.77396835TTTC>T automapped to NC_000005.9:g.77396838_77396840delCTT' -p71 -aS'A more recent version of the selected reference sequence NM_003664.3 is available (NM_003664.4)' -p72 -aS'NM_003664.4:c.2409_2411delGAA MUST be fully validated prior to use in reports' -p73 -aS'select_variants=NM_003664.4:c.2409_2411del' -p74 -aS'RefSeqGene record not available' -p75 -asg8 -g9 -sg10 -(lp76 -sg12 -VHomo sapiens adaptor-related protein complex 3, beta 1 subunit (AP3B1), mRNA -p77 -sg14 -S'AP3B1' -p78 -sg16 -(dp79 -g18 -S'NP_003655.3(LRG_170p1):p.(Lys804del)' -p80 -sg20 -S'NP_003655.3:p.(K804del)' -p81 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_003664.3:c.2409_2411del' -p82 -sg29 -g9 -sg31 -(dp83 -g60 -(dp84 -g35 -S'NC_000005.9:g.77396836_77396838del' -p85 -sg37 -(dp86 -g39 -g57 -sg41 -S'TTTC' -p87 -sg43 -S'77396835' -p88 -sg45 -g46 -sssg47 -(dp89 -g35 -S'NC_000005.9:g.77396836_77396838del' -p90 -sg37 -(dp91 -g39 -g40 -sg41 -S'TTTC' -p92 -sg43 -S'77396835' -p93 -sg45 -g46 -sssssS'NM_001271769.1:c.2262_2264del' -p94 -(dp95 -g3 -g9 -sg5 -(lp96 -S'NC_000005.9:g.77396835TTTC>T automapped to NC_000005.9:g.77396838_77396840delCTT' -p97 -aS'RefSeqGene record not available' -p98 -asg8 -g9 -sg10 -(lp99 -sg12 -VHomo sapiens adaptor related protein complex 3 subunit beta 1 (AP3B1), transcript variant 2, mRNA -p100 -sg14 -S'AP3B1' -p101 -sg16 -(dp102 -g18 -S'NP_001258698.1:p.(Lys755del)' -p103 -sg20 -S'NP_001258698.1:p.(K755del)' -p104 -ssg22 -g23 -sg24 -g9 -sg25 -g9 -sg27 -S'NM_001271769.1:c.2262_2264del' -p105 -sg29 -g9 -sg31 -(dp106 -g33 -(dp107 -g35 -S'NC_000005.10:g.78101012_78101014del' -p108 -sg37 -(dp109 -g39 -g40 -sg41 -S'TTTC' -p110 -sg43 -S'78101011' -p111 -sg45 -g46 -sssg47 -(dp112 -g35 -S'NC_000005.9:g.77396836_77396838del' -p113 -sg37 -(dp114 -g39 -g40 -sg41 -S'TTTC' -p115 -sg43 -S'77396835' -p116 -sg45 -g46 -sssg53 -(dp117 -g35 -S'NC_000005.10:g.78101012_78101014del' -p118 -sg37 -(dp119 -g39 -g57 -sg41 -S'TTTC' -p120 -sg43 -S'78101011' -p121 -sg45 -g46 -sssg60 -(dp122 -g35 -S'NC_000005.9:g.77396836_77396838del' -p123 -sg37 -(dp124 -g39 -g57 -sg41 -S'TTTC' -p125 -sg43 -S'77396835' -p126 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant287.txt b/VariantValidator/testing/testOutputsMasterITS/variant287.txt deleted file mode 100644 index 146e1b2e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant287.txt +++ /dev/null @@ -1,674 +0,0 @@ -(dp0 -S'NM_000414.3:c.302+3_302+6del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -S'NG_008182.1(NM_000414.3):c.302+3_302+6del' -p9 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'HSD17B4' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000405.1:p.?' -p19 -sS'slr' -p20 -S'NP_000405.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'5-118811422-GGTGA-G' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000005.9(NM_000414.3):c.302+3_302+6del' -p25 -sS'HGVS_LRG_variant' -p26 -g4 -sS'HGVS_transcript_variant' -p27 -S'NM_000414.3:c.302+3_302+6del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -S'NG_008182.1:g.28278_28281del' -p30 -sS'primary_assembly_loci' -p31 -(dp32 -S'GRCh38' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000005.10:g.119475730_119475733del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'5' -p40 -sS'ref' -p41 -S'GGTGA' -p42 -sS'pos' -p43 -S'119475727' -p44 -sS'alt' -p45 -S'G' -p46 -sssS'GRCh37' -p47 -(dp48 -g35 -S'NC_000005.9:g.118811425_118811428del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'GGTGA' -p51 -sg43 -S'118811422' -p52 -sg45 -g46 -sssS'hg38' -p53 -(dp54 -g35 -S'NC_000005.10:g.119475730_119475733del' -p55 -sg37 -(dp56 -g39 -S'chr5' -p57 -sg41 -S'GGTGA' -p58 -sg43 -S'119475727' -p59 -sg45 -g46 -sssS'hg19' -p60 -(dp61 -g35 -S'NC_000005.9:g.118811425_118811428del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'GGTGA' -p64 -sg43 -S'118811422' -p65 -sg45 -g46 -sssssS'NM_001292028.1:c.-110+3_-110+6del' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p69 -aS'RefSeqGene record not available' -p70 -asg8 -g4 -sg10 -(lp71 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 5, mRNA -p72 -sg14 -S'HSD17B4' -p73 -sg16 -(dp74 -g18 -S'NP_001278957.1:p.?' -p75 -sg20 -S'NP_001278957.1:p.?' -p76 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001292028.1):c.-110+3_-110+6del' -p77 -sg26 -g4 -sg27 -S'NM_001292028.1:c.-110+3_-110+6del' -p78 -sg29 -g4 -sg31 -(dp79 -g33 -(dp80 -g35 -S'NC_000005.10:g.119475730_119475733del' -p81 -sg37 -(dp82 -g39 -g40 -sg41 -S'GGTGA' -p83 -sg43 -S'119475727' -p84 -sg45 -g46 -sssg47 -(dp85 -g35 -S'NC_000005.9:g.118811425_118811428del' -p86 -sg37 -(dp87 -g39 -g40 -sg41 -S'GGTGA' -p88 -sg43 -S'118811422' -p89 -sg45 -g46 -sssg53 -(dp90 -g35 -S'NC_000005.10:g.119475730_119475733del' -p91 -sg37 -(dp92 -g39 -g57 -sg41 -S'GGTGA' -p93 -sg43 -S'119475727' -p94 -sg45 -g46 -sssg60 -(dp95 -g35 -S'NC_000005.9:g.118811425_118811428del' -p96 -sg37 -(dp97 -g39 -g57 -sg41 -S'GGTGA' -p98 -sg43 -S'118811422' -p99 -sg45 -g46 -sssssS'NM_001199291.2:c.377+3_377+6del' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p103 -aS'RefSeqGene record not available' -p104 -asg8 -g4 -sg10 -(lp105 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA -p106 -sg14 -S'HSD17B4' -p107 -sg16 -(dp108 -g18 -S'NP_001186220.1:p.?' -p109 -sg20 -S'NP_001186220.1:p.?' -p110 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199291.2):c.377+3_377+6del' -p111 -sg26 -g4 -sg27 -S'NM_001199291.2:c.377+3_377+6del' -p112 -sg29 -g4 -sg31 -(dp113 -g33 -(dp114 -g35 -S'NC_000005.10:g.119475730_119475733del' -p115 -sg37 -(dp116 -g39 -g40 -sg41 -S'GGTGA' -p117 -sg43 -S'119475727' -p118 -sg45 -g46 -sssg47 -(dp119 -g35 -S'NC_000005.9:g.118811425_118811428del' -p120 -sg37 -(dp121 -g39 -g40 -sg41 -S'GGTGA' -p122 -sg43 -S'118811422' -p123 -sg45 -g46 -sssg53 -(dp124 -g35 -S'NC_000005.10:g.119475730_119475733del' -p125 -sg37 -(dp126 -g39 -g57 -sg41 -S'GGTGA' -p127 -sg43 -S'119475727' -p128 -sg45 -g46 -sssg60 -(dp129 -g35 -S'NC_000005.9:g.118811425_118811428del' -p130 -sg37 -(dp131 -g39 -g57 -sg41 -S'GGTGA' -p132 -sg43 -S'118811422' -p133 -sg45 -g46 -sssssS'flag' -p134 -S'gene_variant' -p135 -sS'NM_001292027.1:c.230+3_230+6del' -p136 -(dp137 -g3 -g4 -sg5 -(lp138 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p139 -aS'RefSeqGene record not available' -p140 -asg8 -g4 -sg10 -(lp141 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 4, mRNA -p142 -sg14 -S'HSD17B4' -p143 -sg16 -(dp144 -g18 -S'NP_001278956.1:p.?' -p145 -sg20 -S'NP_001278956.1:p.?' -p146 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001292027.1):c.230+3_230+6del' -p147 -sg26 -g4 -sg27 -S'NM_001292027.1:c.230+3_230+6del' -p148 -sg29 -g4 -sg31 -(dp149 -g33 -(dp150 -g35 -S'NC_000005.10:g.119475730_119475733del' -p151 -sg37 -(dp152 -g39 -g40 -sg41 -S'GGTGA' -p153 -sg43 -S'119475727' -p154 -sg45 -g46 -sssg47 -(dp155 -g35 -S'NC_000005.9:g.118811425_118811428del' -p156 -sg37 -(dp157 -g39 -g40 -sg41 -S'GGTGA' -p158 -sg43 -S'118811422' -p159 -sg45 -g46 -sssg53 -(dp160 -g35 -S'NC_000005.10:g.119475730_119475733del' -p161 -sg37 -(dp162 -g39 -g57 -sg41 -S'GGTGA' -p163 -sg43 -S'119475727' -p164 -sg45 -g46 -sssg60 -(dp165 -g35 -S'NC_000005.9:g.118811425_118811428del' -p166 -sg37 -(dp167 -g39 -g57 -sg41 -S'GGTGA' -p168 -sg43 -S'118811422' -p169 -sg45 -g46 -sssssS'NM_001199291.1:c.377+3_377+6del' -p170 -(dp171 -g3 -g4 -sg5 -(lp172 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p173 -aS'A more recent version of the selected reference sequence NM_001199291.1 is available (NM_001199291.2)' -p174 -aS'NM_001199291.2:c.377+3_377+6del MUST be fully validated prior to use in reports' -p175 -aS'select_variants=NM_001199291.2:c.377+3_377+6del' -p176 -aS'RefSeqGene record not available' -p177 -asg8 -g4 -sg10 -(lp178 -sg12 -VHomo sapiens hydroxysteroid (17-beta) dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA -p179 -sg14 -S'HSD17B4' -p180 -sg16 -(dp181 -g18 -S'NP_001186220.1:p.?' -p182 -sg20 -S'NP_001186220.1:p.?' -p183 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199291.1):c.377+3_377+6del' -p184 -sg26 -g4 -sg27 -S'NM_001199291.1:c.377+3_377+6del' -p185 -sg29 -g4 -sg31 -(dp186 -g60 -(dp187 -g35 -S'NC_000005.9:g.118811425_118811428del' -p188 -sg37 -(dp189 -g39 -g57 -sg41 -S'GGTGA' -p190 -sg43 -S'118811422' -p191 -sg45 -g46 -sssg47 -(dp192 -g35 -S'NC_000005.9:g.118811425_118811428del' -p193 -sg37 -(dp194 -g39 -g40 -sg41 -S'GGTGA' -p195 -sg43 -S'118811422' -p196 -sg45 -g46 -sssssS'NM_001199292.1:c.248+3_248+6del' -p197 -(dp198 -g3 -g4 -sg5 -(lp199 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p200 -aS'RefSeqGene record not available' -p201 -asg8 -g4 -sg10 -(lp202 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 3, mRNA -p203 -sg14 -S'HSD17B4' -p204 -sg16 -(dp205 -g18 -S'NP_001186221.1:p.?' -p206 -sg20 -S'NP_001186221.1:p.?' -p207 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199292.1):c.248+3_248+6del' -p208 -sg26 -g4 -sg27 -S'NM_001199292.1:c.248+3_248+6del' -p209 -sg29 -g4 -sg31 -(dp210 -g33 -(dp211 -g35 -S'NC_000005.10:g.119475730_119475733del' -p212 -sg37 -(dp213 -g39 -g40 -sg41 -S'GGTGA' -p214 -sg43 -S'119475727' -p215 -sg45 -g46 -sssg47 -(dp216 -g35 -S'NC_000005.9:g.118811425_118811428del' -p217 -sg37 -(dp218 -g39 -g40 -sg41 -S'GGTGA' -p219 -sg43 -S'118811422' -p220 -sg45 -g46 -sssg53 -(dp221 -g35 -S'NC_000005.10:g.119475730_119475733del' -p222 -sg37 -(dp223 -g39 -g57 -sg41 -S'GGTGA' -p224 -sg43 -S'119475727' -p225 -sg45 -g46 -sssg60 -(dp226 -g35 -S'NC_000005.9:g.118811425_118811428del' -p227 -sg37 -(dp228 -g39 -g57 -sg41 -S'GGTGA' -p229 -sg43 -S'118811422' -p230 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant288.txt b/VariantValidator/testing/testOutputsMasterITS/variant288.txt deleted file mode 100644 index 00ac56a1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant288.txt +++ /dev/null @@ -1,674 +0,0 @@ -(dp0 -S'NM_001292028.1:c.-110+1_-110+5del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 5, mRNA -p13 -sS'gene_symbol' -p14 -S'HSD17B4' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001278957.1:p.?' -p19 -sS'slr' -p20 -S'NP_001278957.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'5-118811422-GGTGAG-G' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000005.9(NM_001292028.1):c.-110+1_-110+5del' -p25 -sS'HGVS_LRG_variant' -p26 -g4 -sS'HGVS_transcript_variant' -p27 -S'NM_001292028.1:c.-110+1_-110+5del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'GRCh38' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000005.10:g.119475728_119475732del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'5' -p39 -sS'ref' -p40 -S'GGGTGA' -p41 -sS'pos' -p42 -S'119475726' -p43 -sS'alt' -p44 -S'G' -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000005.9:g.118811423_118811427del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'GGGTGA' -p50 -sg42 -S'118811421' -p51 -sg44 -g45 -sssS'hg38' -p52 -(dp53 -g34 -S'NC_000005.10:g.119475728_119475732del' -p54 -sg36 -(dp55 -g38 -S'chr5' -p56 -sg40 -S'GGGTGA' -p57 -sg42 -S'119475726' -p58 -sg44 -g45 -sssS'hg19' -p59 -(dp60 -g34 -S'NC_000005.9:g.118811423_118811427del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'GGGTGA' -p63 -sg42 -S'118811421' -p64 -sg44 -g45 -sssssS'NM_000414.3:c.302+1_302+5del' -p65 -(dp66 -g3 -g4 -sg5 -(lp67 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p68 -asg9 -S'NG_008182.1(NM_000414.3):c.302+1_302+5del' -p69 -sg10 -(lp70 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 2, mRNA -p71 -sg14 -S'HSD17B4' -p72 -sg16 -(dp73 -g18 -S'NP_000405.1:p.?' -p74 -sg20 -S'NP_000405.1:p.?' -p75 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_000414.3):c.302+1_302+5del' -p76 -sg26 -g4 -sg27 -S'NM_000414.3:c.302+1_302+5del' -p77 -sg29 -S'NG_008182.1:g.28276_28280del' -p78 -sg30 -(dp79 -g32 -(dp80 -g34 -S'NC_000005.10:g.119475728_119475732del' -p81 -sg36 -(dp82 -g38 -g39 -sg40 -S'GGGTGA' -p83 -sg42 -S'119475726' -p84 -sg44 -g45 -sssg46 -(dp85 -g34 -S'NC_000005.9:g.118811423_118811427del' -p86 -sg36 -(dp87 -g38 -g39 -sg40 -S'GGGTGA' -p88 -sg42 -S'118811421' -p89 -sg44 -g45 -sssg52 -(dp90 -g34 -S'NC_000005.10:g.119475728_119475732del' -p91 -sg36 -(dp92 -g38 -g56 -sg40 -S'GGGTGA' -p93 -sg42 -S'119475726' -p94 -sg44 -g45 -sssg59 -(dp95 -g34 -S'NC_000005.9:g.118811423_118811427del' -p96 -sg36 -(dp97 -g38 -g56 -sg40 -S'GGGTGA' -p98 -sg42 -S'118811421' -p99 -sg44 -g45 -sssssS'NM_001199291.2:c.377+1_377+5del' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p103 -aS'RefSeqGene record not available' -p104 -asg9 -g4 -sg10 -(lp105 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA -p106 -sg14 -S'HSD17B4' -p107 -sg16 -(dp108 -g18 -S'NP_001186220.1:p.?' -p109 -sg20 -S'NP_001186220.1:p.?' -p110 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199291.2):c.377+1_377+5del' -p111 -sg26 -g4 -sg27 -S'NM_001199291.2:c.377+1_377+5del' -p112 -sg29 -g4 -sg30 -(dp113 -g32 -(dp114 -g34 -S'NC_000005.10:g.119475728_119475732del' -p115 -sg36 -(dp116 -g38 -g39 -sg40 -S'GGGTGA' -p117 -sg42 -S'119475726' -p118 -sg44 -g45 -sssg46 -(dp119 -g34 -S'NC_000005.9:g.118811423_118811427del' -p120 -sg36 -(dp121 -g38 -g39 -sg40 -S'GGGTGA' -p122 -sg42 -S'118811421' -p123 -sg44 -g45 -sssg52 -(dp124 -g34 -S'NC_000005.10:g.119475728_119475732del' -p125 -sg36 -(dp126 -g38 -g56 -sg40 -S'GGGTGA' -p127 -sg42 -S'119475726' -p128 -sg44 -g45 -sssg59 -(dp129 -g34 -S'NC_000005.9:g.118811423_118811427del' -p130 -sg36 -(dp131 -g38 -g56 -sg40 -S'GGGTGA' -p132 -sg42 -S'118811421' -p133 -sg44 -g45 -sssssS'NM_001199292.1:c.248+1_248+5del' -p134 -(dp135 -g3 -g4 -sg5 -(lp136 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p137 -aS'RefSeqGene record not available' -p138 -asg9 -g4 -sg10 -(lp139 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 3, mRNA -p140 -sg14 -S'HSD17B4' -p141 -sg16 -(dp142 -g18 -S'NP_001186221.1:p.?' -p143 -sg20 -S'NP_001186221.1:p.?' -p144 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199292.1):c.248+1_248+5del' -p145 -sg26 -g4 -sg27 -S'NM_001199292.1:c.248+1_248+5del' -p146 -sg29 -g4 -sg30 -(dp147 -g32 -(dp148 -g34 -S'NC_000005.10:g.119475728_119475732del' -p149 -sg36 -(dp150 -g38 -g39 -sg40 -S'GGGTGA' -p151 -sg42 -S'119475726' -p152 -sg44 -g45 -sssg46 -(dp153 -g34 -S'NC_000005.9:g.118811423_118811427del' -p154 -sg36 -(dp155 -g38 -g39 -sg40 -S'GGGTGA' -p156 -sg42 -S'118811421' -p157 -sg44 -g45 -sssg52 -(dp158 -g34 -S'NC_000005.10:g.119475728_119475732del' -p159 -sg36 -(dp160 -g38 -g56 -sg40 -S'GGGTGA' -p161 -sg42 -S'119475726' -p162 -sg44 -g45 -sssg59 -(dp163 -g34 -S'NC_000005.9:g.118811423_118811427del' -p164 -sg36 -(dp165 -g38 -g56 -sg40 -S'GGGTGA' -p166 -sg42 -S'118811421' -p167 -sg44 -g45 -sssssS'flag' -p168 -S'gene_variant' -p169 -sS'NM_001199291.1:c.377+1_377+5del' -p170 -(dp171 -g3 -g4 -sg5 -(lp172 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p173 -aS'A more recent version of the selected reference sequence NM_001199291.1 is available (NM_001199291.2)' -p174 -aS'NM_001199291.2:c.377+1_377+5del MUST be fully validated prior to use in reports' -p175 -aS'select_variants=NM_001199291.2:c.377+1_377+5del' -p176 -aS'RefSeqGene record not available' -p177 -asg9 -g4 -sg10 -(lp178 -sg12 -VHomo sapiens hydroxysteroid (17-beta) dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA -p179 -sg14 -S'HSD17B4' -p180 -sg16 -(dp181 -g18 -S'NP_001186220.1:p.?' -p182 -sg20 -S'NP_001186220.1:p.?' -p183 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199291.1):c.377+1_377+5del' -p184 -sg26 -g4 -sg27 -S'NM_001199291.1:c.377+1_377+5del' -p185 -sg29 -g4 -sg30 -(dp186 -g59 -(dp187 -g34 -S'NC_000005.9:g.118811423_118811427del' -p188 -sg36 -(dp189 -g38 -g56 -sg40 -S'GGGTGA' -p190 -sg42 -S'118811421' -p191 -sg44 -g45 -sssg46 -(dp192 -g34 -S'NC_000005.9:g.118811423_118811427del' -p193 -sg36 -(dp194 -g38 -g39 -sg40 -S'GGGTGA' -p195 -sg42 -S'118811421' -p196 -sg44 -g45 -sssssS'NM_001292027.1:c.230+1_230+5del' -p197 -(dp198 -g3 -g4 -sg5 -(lp199 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p200 -aS'RefSeqGene record not available' -p201 -asg9 -g4 -sg10 -(lp202 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 4, mRNA -p203 -sg14 -S'HSD17B4' -p204 -sg16 -(dp205 -g18 -S'NP_001278956.1:p.?' -p206 -sg20 -S'NP_001278956.1:p.?' -p207 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001292027.1):c.230+1_230+5del' -p208 -sg26 -g4 -sg27 -S'NM_001292027.1:c.230+1_230+5del' -p209 -sg29 -g4 -sg30 -(dp210 -g32 -(dp211 -g34 -S'NC_000005.10:g.119475728_119475732del' -p212 -sg36 -(dp213 -g38 -g39 -sg40 -S'GGGTGA' -p214 -sg42 -S'119475726' -p215 -sg44 -g45 -sssg46 -(dp216 -g34 -S'NC_000005.9:g.118811423_118811427del' -p217 -sg36 -(dp218 -g38 -g39 -sg40 -S'GGGTGA' -p219 -sg42 -S'118811421' -p220 -sg44 -g45 -sssg52 -(dp221 -g34 -S'NC_000005.10:g.119475728_119475732del' -p222 -sg36 -(dp223 -g38 -g56 -sg40 -S'GGGTGA' -p224 -sg42 -S'119475726' -p225 -sg44 -g45 -sssg59 -(dp226 -g34 -S'NC_000005.9:g.118811423_118811427del' -p227 -sg36 -(dp228 -g38 -g56 -sg40 -S'GGGTGA' -p229 -sg42 -S'118811421' -p230 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant289.txt b/VariantValidator/testing/testOutputsMasterITS/variant289.txt deleted file mode 100644 index 543b0e71..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant289.txt +++ /dev/null @@ -1,365 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NR_110997.1:n.21del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000005.9:g.131705587CG>C automapped to NC_000005.9:g.131705590delG' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens MIR3936 host gene (MIR3936HG), long non-coding RNA -p15 -sS'gene_symbol' -p16 -S'MIR3936HG' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'Non-coding :n.' -p21 -sS'slr' -p22 -g6 -ssS'submitted_variant' -p23 -S'5-131705587-CG-C' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NR_110997.1:n.21del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'GRCh38' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000005.10:g.132369898del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'5' -p39 -sS'ref' -p40 -S'CG' -p41 -sS'pos' -p42 -S'132369895' -p43 -sS'alt' -p44 -S'C' -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000005.9:g.131705590del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'CG' -p50 -sg42 -S'131705587' -p51 -sg44 -g45 -sssS'hg38' -p52 -(dp53 -g34 -S'NC_000005.10:g.132369898del' -p54 -sg36 -(dp55 -g38 -S'chr5' -p56 -sg40 -S'CG' -p57 -sg42 -S'132369895' -p58 -sg44 -g45 -sssS'hg19' -p59 -(dp60 -g34 -S'NC_000005.9:g.131705590del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'CG' -p63 -sg42 -S'131705587' -p64 -sg44 -g45 -sssssS'NM_003060.3:c.-75del' -p65 -(dp66 -g5 -g6 -sg7 -(lp67 -S'NC_000005.9:g.131705587CG>C automapped to NC_000005.9:g.131705590delG' -p68 -aS'RefSeqGene record not available' -p69 -asg11 -g6 -sg12 -(lp70 -sg14 -VHomo sapiens solute carrier family 22 member 5 (SLC22A5), transcript variant 2, mRNA -p71 -sg16 -S'SLC22A5' -p72 -sg18 -(dp73 -g20 -S'NP_003051.1:p.?' -p74 -sg22 -S'NP_003051.1:p.?' -p75 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_003060.3:c.-75del' -p76 -sg29 -g6 -sg30 -(dp77 -g32 -(dp78 -g34 -S'NC_000005.10:g.132369898del' -p79 -sg36 -(dp80 -g38 -g39 -sg40 -S'CG' -p81 -sg42 -S'132369895' -p82 -sg44 -g45 -sssg46 -(dp83 -g34 -S'NC_000005.9:g.131705590del' -p84 -sg36 -(dp85 -g38 -g39 -sg40 -S'CG' -p86 -sg42 -S'131705587' -p87 -sg44 -g45 -sssg52 -(dp88 -g34 -S'NC_000005.10:g.132369898del' -p89 -sg36 -(dp90 -g38 -g56 -sg40 -S'CG' -p91 -sg42 -S'132369895' -p92 -sg44 -g45 -sssg59 -(dp93 -g34 -S'NC_000005.9:g.131705590del' -p94 -sg36 -(dp95 -g38 -g56 -sg40 -S'CG' -p96 -sg42 -S'131705587' -p97 -sg44 -g45 -sssssS'NM_001308122.1:c.-75del' -p98 -(dp99 -g5 -g6 -sg7 -(lp100 -S'NC_000005.9:g.131705587CG>C automapped to NC_000005.9:g.131705590delG' -p101 -aS'RefSeqGene record not available' -p102 -asg11 -g6 -sg12 -(lp103 -sg14 -VHomo sapiens solute carrier family 22 member 5 (SLC22A5), transcript variant 1, mRNA -p104 -sg16 -S'SLC22A5' -p105 -sg18 -(dp106 -g20 -S'NP_001295051.1:p.?' -p107 -sg22 -S'NP_001295051.1:p.?' -p108 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_001308122.1:c.-75del' -p109 -sg29 -g6 -sg30 -(dp110 -g32 -(dp111 -g34 -S'NC_000005.10:g.132369898del' -p112 -sg36 -(dp113 -g38 -g39 -sg40 -S'CG' -p114 -sg42 -S'132369895' -p115 -sg44 -g45 -sssg46 -(dp116 -g34 -S'NC_000005.9:g.131705590del' -p117 -sg36 -(dp118 -g38 -g39 -sg40 -S'CG' -p119 -sg42 -S'131705587' -p120 -sg44 -g45 -sssg52 -(dp121 -g34 -S'NC_000005.10:g.132369898del' -p122 -sg36 -(dp123 -g38 -g56 -sg40 -S'CG' -p124 -sg42 -S'132369895' -p125 -sg44 -g45 -sssg59 -(dp126 -g34 -S'NC_000005.9:g.131705590del' -p127 -sg36 -(dp128 -g38 -g56 -sg40 -S'CG' -p129 -sg42 -S'131705587' -p130 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant29.txt b/VariantValidator/testing/testOutputsMasterITS/variant29.txt deleted file mode 100644 index eb710194..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant29.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.*1400_*1406=' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.*1400_*1406=' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NC_000017.10:g.48261457_48261463TTATGTT=' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_1:g.22538_22544=' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000088.3:c.*1400_*1406=' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007400.1:g.22538_22544=' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.48261457_48261463=' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -VTTATGTT -p43 -sS'pos' -p44 -S'48261457' -p45 -sS'alt' -p46 -g43 -sssS'hg38' -p47 -(dp48 -g36 -S'NC_000017.11:g.50184096_50184102=' -p49 -sg38 -(dp50 -g40 -g41 -sg42 -VTTATGTT -p51 -sg44 -S'50184096' -p52 -sg46 -g51 -sssS'GRCh37' -p53 -(dp54 -g36 -S'NC_000017.10:g.48261457_48261463=' -p55 -sg38 -(dp56 -g40 -S'17' -p57 -sg42 -g43 -sg44 -S'48261457' -p58 -sg46 -g43 -sssS'GRCh38' -p59 -(dp60 -g36 -S'NC_000017.11:g.50184096_50184102=' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g51 -sg44 -S'50184096' -p63 -sg46 -g51 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant290.txt b/VariantValidator/testing/testOutputsMasterITS/variant290.txt deleted file mode 100644 index 1379656d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant290.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_024577.3:c.2813A>G' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_269t1:c.2813A>G' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens SH3 domain and tetratricopeptide repeats 2 (SH3TC2), mRNA -p14 -sS'gene_symbol' -p15 -S'SH3TC2' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_078853.2(LRG_269p1):p.(His938Arg)' -p20 -sS'slr' -p21 -S'NP_078853.2:p.(H938R)' -p22 -ssS'submitted_variant' -p23 -S'5-148406482-T-C' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_269:g.41256A>G' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_024577.3:c.2813A>G' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007947.2:g.41256A>G' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'GRCh38' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000005.10:g.149026919T>C' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'5' -p41 -sS'ref' -p42 -VT -p43 -sS'pos' -p44 -S'149026919' -p45 -sS'alt' -p46 -VC -p47 -sssS'GRCh37' -p48 -(dp49 -g36 -S'NC_000005.9:g.148406482T>C' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'148406482' -p52 -sg46 -g47 -sssS'hg38' -p53 -(dp54 -g36 -S'NC_000005.10:g.149026919T>C' -p55 -sg38 -(dp56 -g40 -S'chr5' -p57 -sg42 -g43 -sg44 -S'149026919' -p58 -sg46 -g47 -sssS'hg19' -p59 -(dp60 -g36 -S'NC_000005.9:g.148406482T>C' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g43 -sg44 -S'148406482' -p63 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant291.txt b/VariantValidator/testing/testOutputsMasterITS/variant291.txt deleted file mode 100644 index 9a0043b6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant291.txt +++ /dev/null @@ -1,147 +0,0 @@ -(dp0 -S'NM_014845.5:c.123_124insCAG' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_241t1:c.123_124insCAG' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000006.11:g.110036337T>TCAG automapped to NC_000006.11:g.110036337_110036338insCAG' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -S'' -p9 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens FIG4 phosphoinositide 5-phosphatase (FIG4), mRNA -p13 -sS'gene_symbol' -p14 -S'FIG4' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_055660.1(LRG_241p1):p.(Ile41_Asp42insGln)' -p19 -sS'slr' -p20 -S'NP_055660.1:p.(I41_D42insQ)' -p21 -ssS'submitted_variant' -p22 -S'6-110036337-T-TCAG' -p23 -sS'genome_context_intronic_sequence' -p24 -g9 -sS'HGVS_LRG_variant' -p25 -S'LRG_241:g.28914_28915insCAG' -p26 -sS'HGVS_transcript_variant' -p27 -S'NM_014845.5:c.123_124insCAG' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -S'NG_007977.1:g.28914_28915insCAG' -p30 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000006.11:g.110036337_110036338insCAG' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr6' -p40 -sS'ref' -p41 -S'T' -p42 -sS'pos' -p43 -S'110036337' -p44 -sS'alt' -p45 -S'TCAG' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000006.12:g.109715134_109715135insCAG' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'109715134' -p51 -sg45 -S'TCAG' -p52 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000006.11:g.110036337_110036338insCAG' -p55 -sg37 -(dp56 -g39 -S'6' -p57 -sg41 -g42 -sg43 -S'110036337' -p58 -sg45 -S'TCAG' -p59 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000006.12:g.109715134_109715135insCAG' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -g42 -sg43 -S'109715134' -p64 -sg45 -S'TCAG' -p65 -sssssS'flag' -p66 -S'gene_variant' -p67 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant292.txt b/VariantValidator/testing/testOutputsMasterITS/variant292.txt deleted file mode 100644 index 637d5c85..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant292.txt +++ /dev/null @@ -1,147 +0,0 @@ -(dp0 -S'NM_014845.5:c.124_126del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_241t1:c.124_126del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000006.11:g.110036337TGAT>T automapped to NC_000006.11:g.110036338_110036340delGAT' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -S'' -p9 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens FIG4 phosphoinositide 5-phosphatase (FIG4), mRNA -p13 -sS'gene_symbol' -p14 -S'FIG4' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_055660.1(LRG_241p1):p.(Asp42del)' -p19 -sS'slr' -p20 -S'NP_055660.1:p.(D42del)' -p21 -ssS'submitted_variant' -p22 -S'6-110036337-TGAT-T' -p23 -sS'genome_context_intronic_sequence' -p24 -g9 -sS'HGVS_LRG_variant' -p25 -S'LRG_241:g.28915_28917del' -p26 -sS'HGVS_transcript_variant' -p27 -S'NM_014845.5:c.124_126del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -S'NG_007977.1:g.28915_28917del' -p30 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000006.11:g.110036338_110036340del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr6' -p40 -sS'ref' -p41 -S'TTGA' -p42 -sS'pos' -p43 -S'110036336' -p44 -sS'alt' -p45 -S'T' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000006.12:g.109715135_109715137del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'TTGA' -p51 -sg43 -S'109715133' -p52 -sg45 -g46 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000006.11:g.110036338_110036340del' -p55 -sg37 -(dp56 -g39 -S'6' -p57 -sg41 -S'TTGA' -p58 -sg43 -S'110036336' -p59 -sg45 -g46 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000006.12:g.109715135_109715137del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'TTGA' -p64 -sg43 -S'109715133' -p65 -sg45 -g46 -sssssS'flag' -p66 -S'gene_variant' -p67 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant293.txt b/VariantValidator/testing/testOutputsMasterITS/variant293.txt deleted file mode 100644 index b0825550..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant293.txt +++ /dev/null @@ -1,251 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_182961.3:c.14018G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_427t1:c.14018G>T' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The current status of LRG_427 is pending therefore changes may be made to the LRG reference sequence' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'SYNE1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_892006.3(LRG_427p1):p.(Arg4673Leu)' -p21 -sS'slr' -p22 -S'NP_892006.3:p.(R4673L)' -p23 -ssS'submitted_variant' -p24 -S'6-152651802-C-A' -p25 -sS'genome_context_intronic_sequence' -p26 -g11 -sS'HGVS_LRG_variant' -p27 -S'LRG_427:g.311733G>T' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_182961.3:c.14018G>T' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_012855.1:g.311733G>T' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000006.11:g.152651802C>A' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr6' -p42 -sS'ref' -p43 -VC -p44 -sS'pos' -p45 -S'152651802' -p46 -sS'alt' -p47 -VA -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000006.12:g.152330667C>A' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'152330667' -p53 -sg47 -g48 -sssS'GRCh37' -p54 -(dp55 -g37 -S'NC_000006.11:g.152651802C>A' -p56 -sg39 -(dp57 -g41 -S'6' -p58 -sg43 -g44 -sg45 -S'152651802' -p59 -sg47 -g48 -sssS'GRCh38' -p60 -(dp61 -g37 -S'NC_000006.12:g.152330667C>A' -p62 -sg39 -(dp63 -g41 -g58 -sg43 -g44 -sg45 -S'152330667' -p64 -sg47 -g48 -sssssS'NM_033071.3:c.13805G>T' -p65 -(dp66 -g5 -S'LRG_427t2:c.13805G>T' -p67 -sg7 -(lp68 -S'The current status of LRG_427 is pending therefore changes may be made to the LRG reference sequence' -p69 -asg10 -g11 -sg12 -(lp70 -sg14 -VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 2, mRNA -p71 -sg16 -S'SYNE1' -p72 -sg18 -(dp73 -g20 -S'NP_149062.1(LRG_427p2):p.(Arg4602Leu)' -p74 -sg22 -S'NP_149062.1:p.(R4602L)' -p75 -ssg24 -g25 -sg26 -g11 -sg27 -S'LRG_427:g.311733G>T' -p76 -sg29 -S'NM_033071.3:c.13805G>T' -p77 -sg31 -S'NG_012855.1:g.311733G>T' -p78 -sg33 -(dp79 -g35 -(dp80 -g37 -S'NC_000006.11:g.152651802C>A' -p81 -sg39 -(dp82 -g41 -g42 -sg43 -g44 -sg45 -S'152651802' -p83 -sg47 -g48 -sssg49 -(dp84 -g37 -S'NC_000006.12:g.152330667C>A' -p85 -sg39 -(dp86 -g41 -g42 -sg43 -g44 -sg45 -S'152330667' -p87 -sg47 -g48 -sssg54 -(dp88 -g37 -S'NC_000006.11:g.152651802C>A' -p89 -sg39 -(dp90 -g41 -g58 -sg43 -g44 -sg45 -S'152651802' -p91 -sg47 -g48 -sssg60 -(dp92 -g37 -S'NC_000006.12:g.152330667C>A' -p93 -sg39 -(dp94 -g41 -g58 -sg43 -g44 -sg45 -S'152330667' -p95 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant294.txt b/VariantValidator/testing/testOutputsMasterITS/variant294.txt deleted file mode 100644 index de139c0c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant294.txt +++ /dev/null @@ -1,251 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_033071.3:c.5950G>C' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_427t2:c.5950G>C' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The current status of LRG_427 is pending therefore changes may be made to the LRG reference sequence' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 2, mRNA -p15 -sS'gene_symbol' -p16 -S'SYNE1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_149062.1(LRG_427p2):p.(Ala1984Pro)' -p21 -sS'slr' -p22 -S'NP_149062.1:p.(A1984P)' -p23 -ssS'submitted_variant' -p24 -S'6-152737643-C-G' -p25 -sS'genome_context_intronic_sequence' -p26 -g11 -sS'HGVS_LRG_variant' -p27 -S'LRG_427:g.225892G>C' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_033071.3:c.5950G>C' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_012855.1:g.225892G>C' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000006.11:g.152737643C>G' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr6' -p42 -sS'ref' -p43 -VC -p44 -sS'pos' -p45 -S'152737643' -p46 -sS'alt' -p47 -VG -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000006.12:g.152416508C>G' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'152416508' -p53 -sg47 -g48 -sssS'GRCh37' -p54 -(dp55 -g37 -S'NC_000006.11:g.152737643C>G' -p56 -sg39 -(dp57 -g41 -S'6' -p58 -sg43 -g44 -sg45 -S'152737643' -p59 -sg47 -g48 -sssS'GRCh38' -p60 -(dp61 -g37 -S'NC_000006.12:g.152416508C>G' -p62 -sg39 -(dp63 -g41 -g58 -sg43 -g44 -sg45 -S'152416508' -p64 -sg47 -g48 -sssssS'NM_182961.3:c.5929G>C' -p65 -(dp66 -g5 -S'LRG_427t1:c.5929G>C' -p67 -sg7 -(lp68 -S'The current status of LRG_427 is pending therefore changes may be made to the LRG reference sequence' -p69 -asg10 -g11 -sg12 -(lp70 -sg14 -VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 1, mRNA -p71 -sg16 -S'SYNE1' -p72 -sg18 -(dp73 -g20 -S'NP_892006.3(LRG_427p1):p.(Ala1977Pro)' -p74 -sg22 -S'NP_892006.3:p.(A1977P)' -p75 -ssg24 -g25 -sg26 -g11 -sg27 -S'LRG_427:g.225892G>C' -p76 -sg29 -S'NM_182961.3:c.5929G>C' -p77 -sg31 -S'NG_012855.1:g.225892G>C' -p78 -sg33 -(dp79 -g35 -(dp80 -g37 -S'NC_000006.11:g.152737643C>G' -p81 -sg39 -(dp82 -g41 -g42 -sg43 -g44 -sg45 -S'152737643' -p83 -sg47 -g48 -sssg49 -(dp84 -g37 -S'NC_000006.12:g.152416508C>G' -p85 -sg39 -(dp86 -g41 -g42 -sg43 -g44 -sg45 -S'152416508' -p87 -sg47 -g48 -sssg54 -(dp88 -g37 -S'NC_000006.11:g.152737643C>G' -p89 -sg39 -(dp90 -g41 -g58 -sg43 -g44 -sg45 -S'152737643' -p91 -sg47 -g48 -sssg60 -(dp92 -g37 -S'NC_000006.12:g.152416508C>G' -p93 -sg39 -(dp94 -g41 -g58 -sg43 -g44 -sg45 -S'152416508' -p95 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant295.txt b/VariantValidator/testing/testOutputsMasterITS/variant295.txt deleted file mode 100644 index 45ccdd1e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant295.txt +++ /dev/null @@ -1,1747 +0,0 @@ -(dp0 -S'NM_001322012.1:c.688A>G' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 11, mRNA -p12 -sS'gene_symbol' -p13 -S'PMS2' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001308941.1:p.(Lys230Glu)' -p18 -sS'slr' -p19 -S'NP_001308941.1:p.(K230E)' -p20 -ssS'submitted_variant' -p21 -S'7-6026775-T-C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001322012.1:c.688A>G' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000007.13:g.6026775T>C' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr7' -p37 -sS'ref' -p38 -VT -p39 -sS'pos' -p40 -S'6026775' -p41 -sS'alt' -p42 -VC -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000007.14:g.5987144T>C' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000007.13:g.6026775T>C' -p51 -sg34 -(dp52 -g36 -S'7' -p53 -sg38 -g39 -sg40 -S'6026775' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000007.14:g.5987144T>C' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p59 -sg42 -g43 -sssssS'NM_001322010.1:c.1060A>G' -p60 -(dp61 -g3 -g4 -sg5 -(lp62 -S'RefSeqGene record not available' -p63 -asg8 -g4 -sg9 -(lp64 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 9, mRNA -p65 -sg13 -S'PMS2' -p66 -sg15 -(dp67 -g17 -S'NP_001308939.1:p.(Lys354Glu)' -p68 -sg19 -S'NP_001308939.1:p.(K354E)' -p69 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322010.1:c.1060A>G' -p70 -sg27 -g4 -sg28 -(dp71 -g30 -(dp72 -g32 -S'NC_000007.13:g.6026775T>C' -p73 -sg34 -(dp74 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p75 -sg42 -g43 -sssg44 -(dp76 -g32 -S'NC_000007.14:g.5987144T>C' -p77 -sg34 -(dp78 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p79 -sg42 -g43 -sssg49 -(dp80 -g32 -S'NC_000007.13:g.6026775T>C' -p81 -sg34 -(dp82 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p83 -sg42 -g43 -sssg55 -(dp84 -g32 -S'NC_000007.14:g.5987144T>C' -p85 -sg34 -(dp86 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p87 -sg42 -g43 -sssssS'NM_001322015.1:c.1312A>G' -p88 -(dp89 -g3 -g4 -sg5 -(lp90 -S'RefSeqGene record not available' -p91 -asg8 -g4 -sg9 -(lp92 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 14, mRNA -p93 -sg13 -S'PMS2' -p94 -sg15 -(dp95 -g17 -S'NP_001308944.1:p.(Lys438Glu)' -p96 -sg19 -S'NP_001308944.1:p.(K438E)' -p97 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322015.1:c.1312A>G' -p98 -sg27 -g4 -sg28 -(dp99 -g30 -(dp100 -g32 -S'NC_000007.13:g.6026775T>C' -p101 -sg34 -(dp102 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p103 -sg42 -g43 -sssg44 -(dp104 -g32 -S'NC_000007.14:g.5987144T>C' -p105 -sg34 -(dp106 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p107 -sg42 -g43 -sssg49 -(dp108 -g32 -S'NC_000007.13:g.6026775T>C' -p109 -sg34 -(dp110 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p111 -sg42 -g43 -sssg55 -(dp112 -g32 -S'NC_000007.14:g.5987144T>C' -p113 -sg34 -(dp114 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p115 -sg42 -g43 -sssssS'NM_001322003.1:c.1216A>G' -p116 -(dp117 -g3 -g4 -sg5 -(lp118 -S'RefSeqGene record not available' -p119 -asg8 -g4 -sg9 -(lp120 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 2, mRNA -p121 -sg13 -S'PMS2' -p122 -sg15 -(dp123 -g17 -S'NP_001308932.1:p.(Lys406Glu)' -p124 -sg19 -S'NP_001308932.1:p.(K406E)' -p125 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322003.1:c.1216A>G' -p126 -sg27 -g4 -sg28 -(dp127 -g30 -(dp128 -g32 -S'NC_000007.13:g.6026775T>C' -p129 -sg34 -(dp130 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p131 -sg42 -g43 -sssg44 -(dp132 -g32 -S'NC_000007.14:g.5987144T>C' -p133 -sg34 -(dp134 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p135 -sg42 -g43 -sssg49 -(dp136 -g32 -S'NC_000007.13:g.6026775T>C' -p137 -sg34 -(dp138 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p139 -sg42 -g43 -sssg55 -(dp140 -g32 -S'NC_000007.14:g.5987144T>C' -p141 -sg34 -(dp142 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p143 -sg42 -g43 -sssssS'NM_001322014.1:c.1621A>G' -p144 -(dp145 -g3 -g4 -sg5 -(lp146 -S'RefSeqGene record not available' -p147 -asg8 -g4 -sg9 -(lp148 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 13, mRNA -p149 -sg13 -S'PMS2' -p150 -sg15 -(dp151 -g17 -S'NP_001308943.1:p.(Lys541Glu)' -p152 -sg19 -S'NP_001308943.1:p.(K541E)' -p153 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322014.1:c.1621A>G' -p154 -sg27 -g4 -sg28 -(dp155 -g30 -(dp156 -g32 -S'NC_000007.13:g.6026775T>C' -p157 -sg34 -(dp158 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p159 -sg42 -g43 -sssg44 -(dp160 -g32 -S'NC_000007.14:g.5987144T>C' -p161 -sg34 -(dp162 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p163 -sg42 -g43 -sssg49 -(dp164 -g32 -S'NC_000007.13:g.6026775T>C' -p165 -sg34 -(dp166 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p167 -sg42 -g43 -sssg55 -(dp168 -g32 -S'NC_000007.14:g.5987144T>C' -p169 -sg34 -(dp170 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p171 -sg42 -g43 -sssssS'NM_001322004.1:c.1216A>G' -p172 -(dp173 -g3 -g4 -sg5 -(lp174 -S'RefSeqGene record not available' -p175 -asg8 -g4 -sg9 -(lp176 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 3, mRNA -p177 -sg13 -S'PMS2' -p178 -sg15 -(dp179 -g17 -S'NP_001308933.1:p.(Lys406Glu)' -p180 -sg19 -S'NP_001308933.1:p.(K406E)' -p181 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322004.1:c.1216A>G' -p182 -sg27 -g4 -sg28 -(dp183 -g30 -(dp184 -g32 -S'NC_000007.13:g.6026775T>C' -p185 -sg34 -(dp186 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p187 -sg42 -g43 -sssg44 -(dp188 -g32 -S'NC_000007.14:g.5987144T>C' -p189 -sg34 -(dp190 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p191 -sg42 -g43 -sssg49 -(dp192 -g32 -S'NC_000007.13:g.6026775T>C' -p193 -sg34 -(dp194 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p195 -sg42 -g43 -sssg55 -(dp196 -g32 -S'NC_000007.14:g.5987144T>C' -p197 -sg34 -(dp198 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p199 -sg42 -g43 -sssssS'NM_001322008.1:c.1303A>G' -p200 -(dp201 -g3 -g4 -sg5 -(lp202 -S'RefSeqGene record not available' -p203 -asg8 -g4 -sg9 -(lp204 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 7, mRNA -p205 -sg13 -S'PMS2' -p206 -sg15 -(dp207 -g17 -S'NP_001308937.1:p.(Lys435Glu)' -p208 -sg19 -S'NP_001308937.1:p.(K435E)' -p209 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322008.1:c.1303A>G' -p210 -sg27 -g4 -sg28 -(dp211 -g30 -(dp212 -g32 -S'NC_000007.13:g.6026775T>C' -p213 -sg34 -(dp214 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p215 -sg42 -g43 -sssg44 -(dp216 -g32 -S'NC_000007.14:g.5987144T>C' -p217 -sg34 -(dp218 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p219 -sg42 -g43 -sssg49 -(dp220 -g32 -S'NC_000007.13:g.6026775T>C' -p221 -sg34 -(dp222 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p223 -sg42 -g43 -sssg55 -(dp224 -g32 -S'NC_000007.14:g.5987144T>C' -p225 -sg34 -(dp226 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p227 -sg42 -g43 -sssssS'NM_001322006.1:c.1465A>G' -p228 -(dp229 -g3 -g4 -sg5 -(lp230 -S'RefSeqGene record not available' -p231 -asg8 -g4 -sg9 -(lp232 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 5, mRNA -p233 -sg13 -S'PMS2' -p234 -sg15 -(dp235 -g17 -S'NP_001308935.1:p.(Lys489Glu)' -p236 -sg19 -S'NP_001308935.1:p.(K489E)' -p237 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322006.1:c.1465A>G' -p238 -sg27 -g4 -sg28 -(dp239 -g30 -(dp240 -g32 -S'NC_000007.13:g.6026775T>C' -p241 -sg34 -(dp242 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p243 -sg42 -g43 -sssg44 -(dp244 -g32 -S'NC_000007.14:g.5987144T>C' -p245 -sg34 -(dp246 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p247 -sg42 -g43 -sssg49 -(dp248 -g32 -S'NC_000007.13:g.6026775T>C' -p249 -sg34 -(dp250 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p251 -sg42 -g43 -sssg55 -(dp252 -g32 -S'NC_000007.14:g.5987144T>C' -p253 -sg34 -(dp254 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p255 -sg42 -g43 -sssssS'NM_001322013.1:c.1048A>G' -p256 -(dp257 -g3 -g4 -sg5 -(lp258 -S'RefSeqGene record not available' -p259 -asg8 -g4 -sg9 -(lp260 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 12, mRNA -p261 -sg13 -S'PMS2' -p262 -sg15 -(dp263 -g17 -S'NP_001308942.1:p.(Lys350Glu)' -p264 -sg19 -S'NP_001308942.1:p.(K350E)' -p265 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322013.1:c.1048A>G' -p266 -sg27 -g4 -sg28 -(dp267 -g30 -(dp268 -g32 -S'NC_000007.13:g.6026775T>C' -p269 -sg34 -(dp270 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p271 -sg42 -g43 -sssg44 -(dp272 -g32 -S'NC_000007.14:g.5987144T>C' -p273 -sg34 -(dp274 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p275 -sg42 -g43 -sssg49 -(dp276 -g32 -S'NC_000007.13:g.6026775T>C' -p277 -sg34 -(dp278 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p279 -sg42 -g43 -sssg55 -(dp280 -g32 -S'NC_000007.14:g.5987144T>C' -p281 -sg34 -(dp282 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p283 -sg42 -g43 -sssssS'NM_001322009.1:c.1216A>G' -p284 -(dp285 -g3 -g4 -sg5 -(lp286 -S'RefSeqGene record not available' -p287 -asg8 -g4 -sg9 -(lp288 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 8, mRNA -p289 -sg13 -S'PMS2' -p290 -sg15 -(dp291 -g17 -S'NP_001308938.1:p.(Lys406Glu)' -p292 -sg19 -S'NP_001308938.1:p.(K406E)' -p293 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322009.1:c.1216A>G' -p294 -sg27 -g4 -sg28 -(dp295 -g30 -(dp296 -g32 -S'NC_000007.13:g.6026775T>C' -p297 -sg34 -(dp298 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p299 -sg42 -g43 -sssg44 -(dp300 -g32 -S'NC_000007.14:g.5987144T>C' -p301 -sg34 -(dp302 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p303 -sg42 -g43 -sssg49 -(dp304 -g32 -S'NC_000007.13:g.6026775T>C' -p305 -sg34 -(dp306 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p307 -sg42 -g43 -sssg55 -(dp308 -g32 -S'NC_000007.14:g.5987144T>C' -p309 -sg34 -(dp310 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p311 -sg42 -g43 -sssssS'NR_003085.2:n.1703G=' -p312 -(dp313 -g3 -g4 -sg5 -(lp314 -S'RefSeqGene record not available' -p315 -asg8 -g4 -sg9 -(lp316 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 2, non-coding RNA -p317 -sg13 -S'PMS2' -p318 -sg15 -(dp319 -g17 -S'Non-coding :n.' -p320 -sg19 -g4 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NR_003085.2:n.1703G=' -p321 -sg27 -g4 -sg28 -(dp322 -g30 -(dp323 -g32 -S'NC_000007.13:g.6026775T>C' -p324 -sg34 -(dp325 -g36 -g37 -sg38 -S'T' -p326 -sg40 -S'6026775' -p327 -sg42 -g43 -sssg49 -(dp328 -g32 -S'NC_000007.13:g.6026775T>C' -p329 -sg34 -(dp330 -g36 -g53 -sg38 -g326 -sg40 -S'6026775' -p331 -sg42 -g43 -sssssS'flag' -p332 -S'gene_variant' -p333 -sS'NM_001322005.1:c.1216A>G' -p334 -(dp335 -g3 -g4 -sg5 -(lp336 -S'RefSeqGene record not available' -p337 -asg8 -g4 -sg9 -(lp338 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 4, mRNA -p339 -sg13 -S'PMS2' -p340 -sg15 -(dp341 -g17 -S'NP_001308934.1:p.(Lys406Glu)' -p342 -sg19 -S'NP_001308934.1:p.(K406E)' -p343 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322005.1:c.1216A>G' -p344 -sg27 -g4 -sg28 -(dp345 -g30 -(dp346 -g32 -S'NC_000007.13:g.6026775T>C' -p347 -sg34 -(dp348 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p349 -sg42 -g43 -sssg44 -(dp350 -g32 -S'NC_000007.14:g.5987144T>C' -p351 -sg34 -(dp352 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p353 -sg42 -g43 -sssg49 -(dp354 -g32 -S'NC_000007.13:g.6026775T>C' -p355 -sg34 -(dp356 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p357 -sg42 -g43 -sssg55 -(dp358 -g32 -S'NC_000007.14:g.5987144T>C' -p359 -sg34 -(dp360 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p361 -sg42 -g43 -sssssS'NM_001322007.1:c.1303A>G' -p362 -(dp363 -g3 -g4 -sg5 -(lp364 -S'RefSeqGene record not available' -p365 -asg8 -g4 -sg9 -(lp366 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 6, mRNA -p367 -sg13 -S'PMS2' -p368 -sg15 -(dp369 -g17 -S'NP_001308936.1:p.(Lys435Glu)' -p370 -sg19 -S'NP_001308936.1:p.(K435E)' -p371 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322007.1:c.1303A>G' -p372 -sg27 -g4 -sg28 -(dp373 -g30 -(dp374 -g32 -S'NC_000007.13:g.6026775T>C' -p375 -sg34 -(dp376 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p377 -sg42 -g43 -sssg44 -(dp378 -g32 -S'NC_000007.14:g.5987144T>C' -p379 -sg34 -(dp380 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p381 -sg42 -g43 -sssg49 -(dp382 -g32 -S'NC_000007.13:g.6026775T>C' -p383 -sg34 -(dp384 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p385 -sg42 -g43 -sssg55 -(dp386 -g32 -S'NC_000007.14:g.5987144T>C' -p387 -sg34 -(dp388 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p389 -sg42 -g43 -sssssS'NM_000535.5:c.1621G=' -p390 -(dp391 -g3 -S'LRG_161t1:c.1621G=' -p392 -sg5 -(lp393 -S'A more recent version of the selected reference sequence NM_000535.5 is available (NM_000535.6)' -p394 -aS'NM_000535.6:c.1621A>G MUST be fully validated prior to use in reports' -p395 -aS'select_variants=NM_000535.6:c.1621A>G' -p396 -asg8 -g4 -sg9 -(lp397 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 1, mRNA -p398 -sg13 -S'PMS2' -p399 -sg15 -(dp400 -g17 -S'NP_000526.1(LRG_161p1):p.(Glu541=)' -p401 -sg19 -S'NP_000526.1:p.(E541=)' -p402 -ssg21 -g22 -sg23 -g4 -sg24 -S'LRG_161:g.26963G=' -p403 -sg25 -S'NM_000535.5:c.1621G=' -p404 -sg27 -S'NG_008466.1:g.26963G=' -p405 -sg28 -(dp406 -g30 -(dp407 -g32 -S'NC_000007.13:g.6026775T>C' -p408 -sg34 -(dp409 -g36 -g37 -sg38 -g326 -sg40 -S'6026775' -p410 -sg42 -g43 -sssg49 -(dp411 -g32 -S'NC_000007.13:g.6026775T>C' -p412 -sg34 -(dp413 -g36 -g53 -sg38 -g326 -sg40 -S'6026775' -p414 -sg42 -g43 -sssssS'NR_136154.1:n.1708A>G' -p415 -(dp416 -g3 -g4 -sg5 -(lp417 -S'RefSeqGene record not available' -p418 -asg8 -g4 -sg9 -(lp419 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 15, non-coding RNA -p420 -sg13 -S'PMS2' -p421 -sg15 -(dp422 -g17 -S'Non-coding :n.' -p423 -sg19 -g4 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NR_136154.1:n.1708A>G' -p424 -sg27 -g4 -sg28 -(dp425 -g30 -(dp426 -g32 -S'NC_000007.13:g.6026775T>C' -p427 -sg34 -(dp428 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p429 -sg42 -g43 -sssg44 -(dp430 -g32 -S'NC_000007.14:g.5987144T>C' -p431 -sg34 -(dp432 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p433 -sg42 -g43 -sssg49 -(dp434 -g32 -S'NC_000007.13:g.6026775T>C' -p435 -sg34 -(dp436 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p437 -sg42 -g43 -sssg55 -(dp438 -g32 -S'NC_000007.14:g.5987144T>C' -p439 -sg34 -(dp440 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p441 -sg42 -g43 -sssssS'NM_001322011.1:c.688A>G' -p442 -(dp443 -g3 -g4 -sg5 -(lp444 -S'RefSeqGene record not available' -p445 -asg8 -g4 -sg9 -(lp446 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 10, mRNA -p447 -sg13 -S'PMS2' -p448 -sg15 -(dp449 -g17 -S'NP_001308940.1:p.(Lys230Glu)' -p450 -sg19 -S'NP_001308940.1:p.(K230E)' -p451 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322011.1:c.688A>G' -p452 -sg27 -g4 -sg28 -(dp453 -g30 -(dp454 -g32 -S'NC_000007.13:g.6026775T>C' -p455 -sg34 -(dp456 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p457 -sg42 -g43 -sssg44 -(dp458 -g32 -S'NC_000007.14:g.5987144T>C' -p459 -sg34 -(dp460 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p461 -sg42 -g43 -sssg49 -(dp462 -g32 -S'NC_000007.13:g.6026775T>C' -p463 -sg34 -(dp464 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p465 -sg42 -g43 -sssg55 -(dp466 -g32 -S'NC_000007.14:g.5987144T>C' -p467 -sg34 -(dp468 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p469 -sg42 -g43 -sssssS'NM_000535.6:c.1621A>G' -p470 -(dp471 -g3 -g4 -sg5 -(lp472 -S'RefSeqGene record not available' -p473 -asg8 -g4 -sg9 -(lp474 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 1, mRNA -p475 -sg13 -S'PMS2' -p476 -sg15 -(dp477 -g17 -S'NP_000526.2:p.(Lys541Glu)' -p478 -sg19 -S'NP_000526.2:p.(K541E)' -p479 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_000535.6:c.1621A>G' -p480 -sg27 -g4 -sg28 -(dp481 -g30 -(dp482 -g32 -S'NC_000007.13:g.6026775T>C' -p483 -sg34 -(dp484 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p485 -sg42 -g43 -sssg44 -(dp486 -g32 -S'NC_000007.14:g.5987144T>C' -p487 -sg34 -(dp488 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p489 -sg42 -g43 -sssg49 -(dp490 -g32 -S'NC_000007.13:g.6026775T>C' -p491 -sg34 -(dp492 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p493 -sg42 -g43 -sssg55 -(dp494 -g32 -S'NC_000007.14:g.5987144T>C' -p495 -sg34 -(dp496 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p497 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant296.txt b/VariantValidator/testing/testOutputsMasterITS/variant296.txt deleted file mode 100644 index 100bb402..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant296.txt +++ /dev/null @@ -1,813 +0,0 @@ -(dp0 -S'NM_001346900.1:c.2077_2091del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 8, mRNA -p13 -sS'gene_symbol' -p14 -S'EGFR' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001333829.1:p.(Glu693_Ala697del)' -p19 -sS'slr' -p20 -S'NP_001333829.1:p.(E693_A697del)' -p21 -ssS'submitted_variant' -p22 -S'7-55242465-GGAATTAAGAGAAGCA-G' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001346900.1:c.2077_2091del' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000007.13:g.55242466_55242480del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr7' -p38 -sS'ref' -p39 -S'GGAATTAAGAGAAGCA' -p40 -sS'pos' -p41 -S'55242465' -p42 -sS'alt' -p43 -S'G' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000007.14:g.55174773_55174787del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p49 -sg41 -S'55174772' -p50 -sg43 -g44 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000007.13:g.55242466_55242480del' -p53 -sg35 -(dp54 -g37 -S'7' -p55 -sg39 -S'GGAATTAAGAGAAGCA' -p56 -sg41 -S'55242465' -p57 -sg43 -g44 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000007.14:g.55174773_55174787del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p62 -sg41 -S'55174772' -p63 -sg43 -g44 -sssssS'NM_001346898.1:c.2236_2250del' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p67 -aS'RefSeqGene record not available' -p68 -asg9 -g4 -sg10 -(lp69 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 6, mRNA -p70 -sg14 -S'EGFR' -p71 -sg16 -(dp72 -g18 -S'NP_001333827.1:p.(Glu746_Ala750del)' -p73 -sg20 -S'NP_001333827.1:p.(E746_A750del)' -p74 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001346898.1:c.2236_2250del' -p75 -sg28 -g4 -sg29 -(dp76 -g31 -(dp77 -g33 -S'NC_000007.13:g.55242466_55242480del' -p78 -sg35 -(dp79 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p80 -sg41 -S'55242465' -p81 -sg43 -g44 -sssg45 -(dp82 -g33 -S'NC_000007.14:g.55174773_55174787del' -p83 -sg35 -(dp84 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p85 -sg41 -S'55174772' -p86 -sg43 -g44 -sssg51 -(dp87 -g33 -S'NC_000007.13:g.55242466_55242480del' -p88 -sg35 -(dp89 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p90 -sg41 -S'55242465' -p91 -sg43 -g44 -sssg58 -(dp92 -g33 -S'NC_000007.14:g.55174773_55174787del' -p93 -sg35 -(dp94 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p95 -sg41 -S'55174772' -p96 -sg43 -g44 -sssssS'NM_001346941.1:c.1435_1449del' -p97 -(dp98 -g3 -g4 -sg5 -(lp99 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p100 -aS'RefSeqGene record not available' -p101 -asg9 -g4 -sg10 -(lp102 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant EGFRvIII, mRNA -p103 -sg14 -S'EGFR' -p104 -sg16 -(dp105 -g18 -S'NP_001333870.1:p.(Glu479_Ala483del)' -p106 -sg20 -S'NP_001333870.1:p.(E479_A483del)' -p107 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001346941.1:c.1435_1449del' -p108 -sg28 -g4 -sg29 -(dp109 -g31 -(dp110 -g33 -S'NC_000007.13:g.55242466_55242480del' -p111 -sg35 -(dp112 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p113 -sg41 -S'55242465' -p114 -sg43 -g44 -sssg45 -(dp115 -g33 -S'NC_000007.14:g.55174773_55174787del' -p116 -sg35 -(dp117 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p118 -sg41 -S'55174772' -p119 -sg43 -g44 -sssg51 -(dp120 -g33 -S'NC_000007.13:g.55242466_55242480del' -p121 -sg35 -(dp122 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p123 -sg41 -S'55242465' -p124 -sg43 -g44 -sssg58 -(dp125 -g33 -S'NC_000007.14:g.55174773_55174787del' -p126 -sg35 -(dp127 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p128 -sg41 -S'55174772' -p129 -sg43 -g44 -sssssS'flag' -p130 -S'gene_variant' -p131 -sS'NM_001346899.1:c.2101_2115del' -p132 -(dp133 -g3 -g4 -sg5 -(lp134 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p135 -aS'RefSeqGene record not available' -p136 -asg9 -g4 -sg10 -(lp137 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 7, mRNA -p138 -sg14 -S'EGFR' -p139 -sg16 -(dp140 -g18 -S'NP_001333828.1:p.(Glu701_Ala705del)' -p141 -sg20 -S'NP_001333828.1:p.(E701_A705del)' -p142 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001346899.1:c.2101_2115del' -p143 -sg28 -g4 -sg29 -(dp144 -g31 -(dp145 -g33 -S'NC_000007.13:g.55242466_55242480del' -p146 -sg35 -(dp147 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p148 -sg41 -S'55242465' -p149 -sg43 -g44 -sssg45 -(dp150 -g33 -S'NC_000007.14:g.55174773_55174787del' -p151 -sg35 -(dp152 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p153 -sg41 -S'55174772' -p154 -sg43 -g44 -sssg51 -(dp155 -g33 -S'NC_000007.13:g.55242466_55242480del' -p156 -sg35 -(dp157 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p158 -sg41 -S'55242465' -p159 -sg43 -g44 -sssg58 -(dp160 -g33 -S'NC_000007.14:g.55174773_55174787del' -p161 -sg35 -(dp162 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p163 -sg41 -S'55174772' -p164 -sg43 -g44 -sssssS'NM_001346897.1:c.2101_2115del' -p165 -(dp166 -g3 -g4 -sg5 -(lp167 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p168 -aS'RefSeqGene record not available' -p169 -asg9 -g4 -sg10 -(lp170 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 5, mRNA -p171 -sg14 -S'EGFR' -p172 -sg16 -(dp173 -g18 -S'NP_001333826.1:p.(Glu701_Ala705del)' -p174 -sg20 -S'NP_001333826.1:p.(E701_A705del)' -p175 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001346897.1:c.2101_2115del' -p176 -sg28 -g4 -sg29 -(dp177 -g31 -(dp178 -g33 -S'NC_000007.13:g.55242466_55242480del' -p179 -sg35 -(dp180 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p181 -sg41 -S'55242465' -p182 -sg43 -g44 -sssg45 -(dp183 -g33 -S'NC_000007.14:g.55174773_55174787del' -p184 -sg35 -(dp185 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p186 -sg41 -S'55174772' -p187 -sg43 -g44 -sssg51 -(dp188 -g33 -S'NC_000007.13:g.55242466_55242480del' -p189 -sg35 -(dp190 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p191 -sg41 -S'55242465' -p192 -sg43 -g44 -sssg58 -(dp193 -g33 -S'NC_000007.14:g.55174773_55174787del' -p194 -sg35 -(dp195 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p196 -sg41 -S'55174772' -p197 -sg43 -g44 -sssssS'NM_005228.3:c.2236_2250del' -p198 -(dp199 -g3 -S'LRG_304t1:c.2236_2250del' -p200 -sg5 -(lp201 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p202 -aS'A more recent version of the selected reference sequence NM_005228.3 is available (NM_005228.4)' -p203 -aS'NM_005228.4:c.2236_2250del MUST be fully validated prior to use in reports' -p204 -aS'select_variants=NM_005228.4:c.2236_2250del' -p205 -asg9 -g4 -sg10 -(lp206 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA -p207 -sg14 -S'EGFR' -p208 -sg16 -(dp209 -g18 -S'NP_005219.2(LRG_304p1):p.(Glu746_Ala750del)' -p210 -sg20 -S'NP_005219.2:p.(E746_A750del)' -p211 -ssg22 -g23 -sg24 -g4 -sg25 -S'LRG_304:g.160742_160756del' -p212 -sg26 -S'NM_005228.3:c.2236_2250del' -p213 -sg28 -S'NG_007726.3:g.160742_160756del' -p214 -sg29 -(dp215 -g31 -(dp216 -g33 -S'NC_000007.13:g.55242466_55242480del' -p217 -sg35 -(dp218 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p219 -sg41 -S'55242465' -p220 -sg43 -g44 -sssg45 -(dp221 -g33 -S'NC_000007.14:g.55174773_55174787del' -p222 -sg35 -(dp223 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p224 -sg41 -S'55174772' -p225 -sg43 -g44 -sssg51 -(dp226 -g33 -S'NC_000007.13:g.55242466_55242480del' -p227 -sg35 -(dp228 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p229 -sg41 -S'55242465' -p230 -sg43 -g44 -sssg58 -(dp231 -g33 -S'NC_000007.14:g.55174773_55174787del' -p232 -sg35 -(dp233 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p234 -sg41 -S'55174772' -p235 -sg43 -g44 -sssssS'NM_005228.4:c.2236_2250del' -p236 -(dp237 -g3 -g4 -sg5 -(lp238 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p239 -aS'RefSeqGene record not available' -p240 -asg9 -g4 -sg10 -(lp241 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA -p242 -sg14 -S'EGFR' -p243 -sg16 -(dp244 -g18 -S'NP_005219.2(LRG_304p1):p.(Glu746_Ala750del)' -p245 -sg20 -S'NP_005219.2:p.(E746_A750del)' -p246 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_005228.4:c.2236_2250del' -p247 -sg28 -g4 -sg29 -(dp248 -g31 -(dp249 -g33 -S'NC_000007.13:g.55242466_55242480del' -p250 -sg35 -(dp251 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p252 -sg41 -S'55242465' -p253 -sg43 -g44 -sssg45 -(dp254 -g33 -S'NC_000007.14:g.55174773_55174787del' -p255 -sg35 -(dp256 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p257 -sg41 -S'55174772' -p258 -sg43 -g44 -sssg51 -(dp259 -g33 -S'NC_000007.13:g.55242466_55242480del' -p260 -sg35 -(dp261 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p262 -sg41 -S'55242465' -p263 -sg43 -g44 -sssg58 -(dp264 -g33 -S'NC_000007.14:g.55174773_55174787del' -p265 -sg35 -(dp266 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p267 -sg41 -S'55174772' -p268 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant297.txt b/VariantValidator/testing/testOutputsMasterITS/variant297.txt deleted file mode 100644 index 4f45c45d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant297.txt +++ /dev/null @@ -1,961 +0,0 @@ -(dp0 -S'NM_005228.3:c.2284-5_2290dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_304t1:c.2284-5_2290dup' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p7 -aS'A more recent version of the selected reference sequence NM_005228.3 is available (NM_005228.4)' -p8 -aS'NM_005228.4:c.2284-5_2290dupTCCAGGAAGCCT MUST be fully validated prior to use in reports' -p9 -aS'select_variants=NM_005228.4:c.2284-5_2290dup' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'NG_007726.3(NM_005228.3):c.2284-5_2290dup' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA -p16 -sS'gene_symbol' -p17 -S'EGFR' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_005219.2(LRG_304p1):p.?' -p22 -sS'slr' -p23 -S'NP_005219.2:p.?' -p24 -ssS'submitted_variant' -p25 -S'7-55248992-T-TTCCAGGAAGCCT' -p26 -sS'genome_context_intronic_sequence' -p27 -S'NC_000007.13(NM_005228.3):c.2284-5_2290dup' -p28 -sS'HGVS_LRG_variant' -p29 -S'LRG_304:g.167257_167268dup' -p30 -sS'HGVS_transcript_variant' -p31 -S'NM_005228.3:c.2284-5_2290dup' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -S'NG_007726.3:g.167257_167268dup' -p34 -sS'primary_assembly_loci' -p35 -(dp36 -S'hg19' -p37 -(dp38 -S'HGVS_genomic_description' -p39 -S'NC_000007.13:g.55248981_55248992dup' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'chr7' -p44 -sS'ref' -p45 -S'TCCAGGAAGCCT' -p46 -sS'pos' -p47 -S'55248981' -p48 -sS'alt' -p49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p50 -sssS'hg38' -p51 -(dp52 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p53 -sg41 -(dp54 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p55 -sg47 -S'55181288' -p56 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p57 -sssS'GRCh37' -p58 -(dp59 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p60 -sg41 -(dp61 -g43 -S'7' -p62 -sg45 -S'TCCAGGAAGCCT' -p63 -sg47 -S'55248981' -p64 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p65 -sssS'GRCh38' -p66 -(dp67 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p68 -sg41 -(dp69 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p70 -sg47 -S'55181288' -p71 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p72 -sssssS'NM_001346899.1:c.2149-5_2155dup' -p73 -(dp74 -g3 -S'' -p75 -sg5 -(lp76 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p77 -aS'RefSeqGene record not available' -p78 -asg11 -g75 -sg13 -(lp79 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 7, mRNA -p80 -sg17 -S'EGFR' -p81 -sg19 -(dp82 -g21 -S'NP_001333828.1:p.?' -p83 -sg23 -S'NP_001333828.1:p.?' -p84 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_001346899.1):c.2149-5_2155dup' -p85 -sg29 -g75 -sg31 -S'NM_001346899.1:c.2149-5_2155dup' -p86 -sg33 -g75 -sg35 -(dp87 -g37 -(dp88 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p89 -sg41 -(dp90 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p91 -sg47 -S'55248981' -p92 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p93 -sssg51 -(dp94 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p95 -sg41 -(dp96 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p97 -sg47 -S'55181288' -p98 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p99 -sssg58 -(dp100 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p101 -sg41 -(dp102 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p103 -sg47 -S'55248981' -p104 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p105 -sssg66 -(dp106 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p107 -sg41 -(dp108 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p109 -sg47 -S'55181288' -p110 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p111 -sssssS'NM_005228.4:c.2284-5_2290dup' -p112 -(dp113 -g3 -g75 -sg5 -(lp114 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p115 -aS'RefSeqGene record not available' -p116 -asg11 -g75 -sg13 -(lp117 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA -p118 -sg17 -S'EGFR' -p119 -sg19 -(dp120 -g21 -S'NP_005219.2(LRG_304p1):p.?' -p121 -sg23 -S'NP_005219.2:p.?' -p122 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_005228.4):c.2284-5_2290dup' -p123 -sg29 -g75 -sg31 -S'NM_005228.4:c.2284-5_2290dup' -p124 -sg33 -g75 -sg35 -(dp125 -g37 -(dp126 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p127 -sg41 -(dp128 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p129 -sg47 -S'55248981' -p130 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p131 -sssg51 -(dp132 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p133 -sg41 -(dp134 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p135 -sg47 -S'55181288' -p136 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p137 -sssg58 -(dp138 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p139 -sg41 -(dp140 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p141 -sg47 -S'55248981' -p142 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p143 -sssg66 -(dp144 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p145 -sg41 -(dp146 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p147 -sg47 -S'55181288' -p148 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p149 -sssssS'NM_001346898.1:c.2284-5_2290dup' -p150 -(dp151 -g3 -g75 -sg5 -(lp152 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p153 -aS'RefSeqGene record not available' -p154 -asg11 -g75 -sg13 -(lp155 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 6, mRNA -p156 -sg17 -S'EGFR' -p157 -sg19 -(dp158 -g21 -S'NP_001333827.1:p.?' -p159 -sg23 -S'NP_001333827.1:p.?' -p160 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_001346898.1):c.2284-5_2290dup' -p161 -sg29 -g75 -sg31 -S'NM_001346898.1:c.2284-5_2290dup' -p162 -sg33 -g75 -sg35 -(dp163 -g37 -(dp164 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p165 -sg41 -(dp166 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p167 -sg47 -S'55248981' -p168 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p169 -sssg51 -(dp170 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p171 -sg41 -(dp172 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p173 -sg47 -S'55181288' -p174 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p175 -sssg58 -(dp176 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p177 -sg41 -(dp178 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p179 -sg47 -S'55248981' -p180 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p181 -sssg66 -(dp182 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p183 -sg41 -(dp184 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p185 -sg47 -S'55181288' -p186 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p187 -sssssS'NM_001346941.1:c.1483-5_1489dup' -p188 -(dp189 -g3 -g75 -sg5 -(lp190 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p191 -aS'RefSeqGene record not available' -p192 -asg11 -g75 -sg13 -(lp193 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant EGFRvIII, mRNA -p194 -sg17 -S'EGFR' -p195 -sg19 -(dp196 -g21 -S'NP_001333870.1:p.?' -p197 -sg23 -S'NP_001333870.1:p.?' -p198 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_001346941.1):c.1483-5_1489dup' -p199 -sg29 -g75 -sg31 -S'NM_001346941.1:c.1483-5_1489dup' -p200 -sg33 -g75 -sg35 -(dp201 -g37 -(dp202 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p203 -sg41 -(dp204 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p205 -sg47 -S'55248981' -p206 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p207 -sssg51 -(dp208 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p209 -sg41 -(dp210 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p211 -sg47 -S'55181288' -p212 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p213 -sssg58 -(dp214 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p215 -sg41 -(dp216 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p217 -sg47 -S'55248981' -p218 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p219 -sssg66 -(dp220 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p221 -sg41 -(dp222 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p223 -sg47 -S'55181288' -p224 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p225 -sssssS'flag' -p226 -S'gene_variant' -p227 -sS'NM_001346900.1:c.2125-5_2131dup' -p228 -(dp229 -g3 -g75 -sg5 -(lp230 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p231 -aS'RefSeqGene record not available' -p232 -asg11 -g75 -sg13 -(lp233 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 8, mRNA -p234 -sg17 -S'EGFR' -p235 -sg19 -(dp236 -g21 -S'NP_001333829.1:p.?' -p237 -sg23 -S'NP_001333829.1:p.?' -p238 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_001346900.1):c.2125-5_2131dup' -p239 -sg29 -g75 -sg31 -S'NM_001346900.1:c.2125-5_2131dup' -p240 -sg33 -g75 -sg35 -(dp241 -g37 -(dp242 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p243 -sg41 -(dp244 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p245 -sg47 -S'55248981' -p246 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p247 -sssg51 -(dp248 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p249 -sg41 -(dp250 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p251 -sg47 -S'55181288' -p252 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p253 -sssg58 -(dp254 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p255 -sg41 -(dp256 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p257 -sg47 -S'55248981' -p258 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p259 -sssg66 -(dp260 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p261 -sg41 -(dp262 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p263 -sg47 -S'55181288' -p264 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p265 -sssssS'NR_047551.1:n.1272_1283dup' -p266 -(dp267 -g3 -g75 -sg5 -(lp268 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p269 -aS'RefSeqGene record not available' -p270 -asg11 -g75 -sg13 -(lp271 -sg15 -VHomo sapiens EGFR antisense RNA 1 (EGFR-AS1), long non-coding RNA -p272 -sg17 -S'EGFR-AS1' -p273 -sg19 -(dp274 -g21 -S'Non-coding :n.' -p275 -sg23 -g75 -ssg25 -g26 -sg27 -g75 -sg29 -g75 -sg31 -S'NR_047551.1:n.1272_1283dup' -p276 -sg33 -g75 -sg35 -(dp277 -g37 -(dp278 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p279 -sg41 -(dp280 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p281 -sg47 -S'55248981' -p282 -sg49 -VTCCAGGAAGCCTTCCAGGAAGCCT -p283 -sssg51 -(dp284 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p285 -sg41 -(dp286 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p287 -sg47 -S'55181288' -p288 -sg49 -VTCCAGGAAGCCTTCCAGGAAGCCT -p289 -sssg58 -(dp290 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p291 -sg41 -(dp292 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p293 -sg47 -S'55248981' -p294 -sg49 -VTCCAGGAAGCCTTCCAGGAAGCCT -p295 -sssg66 -(dp296 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p297 -sg41 -(dp298 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p299 -sg47 -S'55181288' -p300 -sg49 -VTCCAGGAAGCCTTCCAGGAAGCCT -p301 -sssssS'NM_001346897.1:c.2149-5_2155dup' -p302 -(dp303 -g3 -g75 -sg5 -(lp304 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p305 -aS'RefSeqGene record not available' -p306 -asg11 -g75 -sg13 -(lp307 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 5, mRNA -p308 -sg17 -S'EGFR' -p309 -sg19 -(dp310 -g21 -S'NP_001333826.1:p.?' -p311 -sg23 -S'NP_001333826.1:p.?' -p312 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_001346897.1):c.2149-5_2155dup' -p313 -sg29 -g75 -sg31 -S'NM_001346897.1:c.2149-5_2155dup' -p314 -sg33 -g75 -sg35 -(dp315 -g37 -(dp316 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p317 -sg41 -(dp318 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p319 -sg47 -S'55248981' -p320 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p321 -sssg51 -(dp322 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p323 -sg41 -(dp324 -g43 -g44 -sg45 -S'TCCAGGAAGCCT' -p325 -sg47 -S'55181288' -p326 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p327 -sssg58 -(dp328 -g39 -S'NC_000007.13:g.55248981_55248992dup' -p329 -sg41 -(dp330 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p331 -sg47 -S'55248981' -p332 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p333 -sssg66 -(dp334 -g39 -S'NC_000007.14:g.55181288_55181299dup' -p335 -sg41 -(dp336 -g43 -g62 -sg45 -S'TCCAGGAAGCCT' -p337 -sg47 -S'55181288' -p338 -sg49 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p339 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant298.txt b/VariantValidator/testing/testOutputsMasterITS/variant298.txt deleted file mode 100644 index e3549f9c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant298.txt +++ /dev/null @@ -1,252 +0,0 @@ -(dp0 -S'NM_001540.4:c.82C>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens heat shock protein family B (small) member 1 (HSPB1), mRNA -p12 -sS'gene_symbol' -p13 -S'HSPB1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001531.1(LRG_248p1):p.(Leu28Ile)' -p18 -sS'slr' -p19 -S'NP_001531.1:p.(L28I)' -p20 -ssS'submitted_variant' -p21 -S'7-75932111-C-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001540.4:c.82C>A' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000007.13:g.75932111C>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr7' -p37 -sS'ref' -p38 -S'C' -p39 -sS'pos' -p40 -S'75932111' -p41 -sS'alt' -p42 -S'A' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000007.14:g.76302794C>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'76302794' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000007.13:g.75932111C>A' -p51 -sg34 -(dp52 -g36 -S'7' -p53 -sg38 -g39 -sg40 -S'75932111' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000007.14:g.76302794C>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'76302794' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -sS'NM_001540.3:c.82C>A' -p62 -(dp63 -g3 -S'LRG_248t1:c.82C>A' -p64 -sg5 -(lp65 -S'A more recent version of the selected reference sequence NM_001540.3 is available (NM_001540.4)' -p66 -aS'NM_001540.4:c.82C>A MUST be fully validated prior to use in reports' -p67 -aS'select_variants=NM_001540.4:c.82C>A' -p68 -asg8 -g4 -sg9 -(lp69 -sg11 -VHomo sapiens heat shock protein family B (small) member 1 (HSPB1), mRNA -p70 -sg13 -S'HSPB1' -p71 -sg15 -(dp72 -g17 -S'NP_001531.1(LRG_248p1):p.(Leu28Ile)' -p73 -sg19 -S'NP_001531.1:p.(L28I)' -p74 -ssg21 -g22 -sg23 -g4 -sg24 -S'LRG_248:g.5237C>A' -p75 -sg25 -S'NM_001540.3:c.82C>A' -p76 -sg27 -S'NG_008995.1:g.5237C>A' -p77 -sg28 -(dp78 -g30 -(dp79 -g32 -S'NC_000007.13:g.75932111C>A' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -g39 -sg40 -S'75932111' -p82 -sg42 -g43 -sssg44 -(dp83 -g32 -S'NC_000007.14:g.76302794C>A' -p84 -sg34 -(dp85 -g36 -g37 -sg38 -g39 -sg40 -S'76302794' -p86 -sg42 -g43 -sssg49 -(dp87 -g32 -S'NC_000007.13:g.75932111C>A' -p88 -sg34 -(dp89 -g36 -g53 -sg38 -g39 -sg40 -S'75932111' -p90 -sg42 -g43 -sssg55 -(dp91 -g32 -S'NC_000007.14:g.76302794C>A' -p92 -sg34 -(dp93 -g36 -g53 -sg38 -g39 -sg40 -S'76302794' -p94 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant299.txt b/VariantValidator/testing/testOutputsMasterITS/variant299.txt deleted file mode 100644 index 0317235a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant299.txt +++ /dev/null @@ -1,264 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_005751.4:c.4004_4006dup' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_331t1:c.4004_4006dup' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000007.13:g.91652178A>AAAC automapped to NC_000007.13:g.91652179_91652181dupAAC' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens A-kinase anchoring protein 9 (AKAP9), transcript variant 2, mRNA -p15 -sS'gene_symbol' -p16 -S'AKAP9' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_005742.4(LRG_331p1):p.(Lys1335_Leu1336insGln)' -p21 -sS'slr' -p22 -S'NP_005742.4:p.(K1335_L1336insQ)' -p23 -ssS'submitted_variant' -p24 -S'7-91652178-A-AAAC' -p25 -sS'genome_context_intronic_sequence' -p26 -g11 -sS'HGVS_LRG_variant' -p27 -S'LRG_331:g.86991_86993dup' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_005751.4:c.4004_4006dup' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_011623.1:g.86991_86993dup' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000007.13:g.91652179_91652181dup' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr7' -p42 -sS'ref' -p43 -S'AAC' -p44 -sS'pos' -p45 -S'91652179' -p46 -sS'alt' -p47 -S'AACAAC' -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000007.14:g.92022865_92022867dup' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'AAC' -p53 -sg45 -S'92022865' -p54 -sg47 -S'AACAAC' -p55 -sssS'GRCh37' -p56 -(dp57 -g37 -S'NC_000007.13:g.91652179_91652181dup' -p58 -sg39 -(dp59 -g41 -S'7' -p60 -sg43 -S'AAC' -p61 -sg45 -S'91652179' -p62 -sg47 -S'AACAAC' -p63 -sssS'GRCh38' -p64 -(dp65 -g37 -S'NC_000007.14:g.92022865_92022867dup' -p66 -sg39 -(dp67 -g41 -g60 -sg43 -S'AAC' -p68 -sg45 -S'92022865' -p69 -sg47 -S'AACAAC' -p70 -sssssS'NM_147185.2:c.4004_4006dup' -p71 -(dp72 -g5 -g11 -sg7 -(lp73 -S'NC_000007.13:g.91652178A>AAAC automapped to NC_000007.13:g.91652179_91652181dupAAC' -p74 -aS'RefSeqGene record not available' -p75 -asg10 -g11 -sg12 -(lp76 -sg14 -VHomo sapiens A-kinase anchoring protein 9 (AKAP9), transcript variant 3, mRNA -p77 -sg16 -S'AKAP9' -p78 -sg18 -(dp79 -g20 -S'NP_671714.1:p.(Lys1335_Leu1336insGln)' -p80 -sg22 -S'NP_671714.1:p.(K1335_L1336insQ)' -p81 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_147185.2:c.4004_4006dup' -p82 -sg31 -g11 -sg33 -(dp83 -g35 -(dp84 -g37 -S'NC_000007.13:g.91652179_91652181dup' -p85 -sg39 -(dp86 -g41 -g42 -sg43 -S'AAC' -p87 -sg45 -S'91652179' -p88 -sg47 -S'AACAAC' -p89 -sssg49 -(dp90 -g37 -S'NC_000007.14:g.92022865_92022867dup' -p91 -sg39 -(dp92 -g41 -g42 -sg43 -S'AAC' -p93 -sg45 -S'92022865' -p94 -sg47 -S'AACAAC' -p95 -sssg56 -(dp96 -g37 -S'NC_000007.13:g.91652179_91652181dup' -p97 -sg39 -(dp98 -g41 -g60 -sg43 -S'AAC' -p99 -sg45 -S'91652179' -p100 -sg47 -S'AACAAC' -p101 -sssg64 -(dp102 -g37 -S'NC_000007.14:g.92022865_92022867dup' -p103 -sg39 -(dp104 -g41 -g60 -sg43 -S'AAC' -p105 -sg45 -S'92022865' -p106 -sg47 -S'AACAAC' -p107 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant3.txt b/VariantValidator/testing/testOutputsMasterITS/variant3.txt deleted file mode 100644 index a390f943..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant3.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_015120.4:c.34C>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_741t1:c.34C>T' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p14 -sS'gene_symbol' -p15 -S'ALMS1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_055935.4(LRG_741p1):p.(Leu12=)' -p20 -sS'slr' -p21 -S'NP_055935.4:p.(L12=)' -p22 -ssS'submitted_variant' -p23 -S'NC_000002.11:g.73613030C>T' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_741:g.5145C>T' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_015120.4:c.34C>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_011690.1:g.5145C>T' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000002.11:g.73613030C>T' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr2' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'73613030' -p45 -sS'alt' -p46 -VT -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000002.12:g.73385902C>T' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'73385902' -p52 -sg46 -g47 -sssS'GRCh37' -p53 -(dp54 -g36 -S'NC_000002.11:g.73613030C>T' -p55 -sg38 -(dp56 -g40 -S'2' -p57 -sg42 -g43 -sg44 -S'73613030' -p58 -sg46 -g47 -sssS'GRCh38' -p59 -(dp60 -g36 -S'NC_000002.12:g.73385902C>T' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g43 -sg44 -S'73385902' -p63 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant30.txt b/VariantValidator/testing/testOutputsMasterITS/variant30.txt deleted file mode 100644 index 9aadc5f5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant30.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589G>T' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.(Gly197Cys)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(G197C)' -p22 -ssS'submitted_variant' -p23 -S'NC_000017.10:g.48275363C>A' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_1:g.8638G>T' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000088.3:c.589G>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007400.1:g.8638G>T' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.48275363C>A' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'48275363' -p45 -sS'alt' -p46 -VA -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50198002C>A' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'50198002' -p52 -sg46 -g47 -sssS'GRCh37' -p53 -(dp54 -g36 -S'NC_000017.10:g.48275363C>A' -p55 -sg38 -(dp56 -g40 -S'17' -p57 -sg42 -g43 -sg44 -S'48275363' -p58 -sg46 -g47 -sssS'GRCh38' -p59 -(dp60 -g36 -S'NC_000017.11:g.50198002C>A' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g43 -sg44 -S'50198002' -p63 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant300.txt b/VariantValidator/testing/testOutputsMasterITS/variant300.txt deleted file mode 100644 index b767aa79..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant300.txt +++ /dev/null @@ -1,256 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NR_149084.1:n.221+1140_221+1142del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000007.13:g.117199644ATCT>A automapped to NC_000007.13:g.117199646_117199648delCTT' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens CFTR antisense RNA 1 (CFTR-AS1), long non-coding RNA -p15 -sS'gene_symbol' -p16 -S'CFTR-AS1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'Non-coding :n.' -p21 -sS'slr' -p22 -g6 -ssS'submitted_variant' -p23 -S'7-117199644-ATCT-A' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000007.13(NR_149084.1):c.221+1140_221+1142del' -p26 -sS'HGVS_LRG_variant' -p27 -g6 -sS'HGVS_transcript_variant' -p28 -S'NR_149084.1:n.221+1140_221+1142del' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000007.13:g.117199645_117199647del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr7' -p40 -sS'ref' -p41 -S'ATCT' -p42 -sS'pos' -p43 -S'117199644' -p44 -sS'alt' -p45 -S'A' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000007.14:g.117559591_117559593del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'ATCT' -p51 -sg43 -S'117559590' -p52 -sg45 -g46 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000007.13:g.117199645_117199647del' -p55 -sg37 -(dp56 -g39 -S'7' -p57 -sg41 -S'ATCT' -p58 -sg43 -S'117199644' -p59 -sg45 -g46 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000007.14:g.117559591_117559593del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'ATCT' -p64 -sg43 -S'117559590' -p65 -sg45 -g46 -sssssS'NM_000492.3:c.1521_1523del' -p66 -(dp67 -g5 -S'LRG_663t1:c.1521_1523del' -p68 -sg7 -(lp69 -S'NC_000007.13:g.117199644ATCT>A automapped to NC_000007.13:g.117199646_117199648delCTT' -p70 -asg11 -g6 -sg12 -(lp71 -sg14 -VHomo sapiens cystic fibrosis transmembrane conductance regulator (CFTR), mRNA -p72 -sg16 -S'CFTR' -p73 -sg18 -(dp74 -g20 -S'NP_000483.3(LRG_663p1):p.(Phe508del)' -p75 -sg22 -S'NP_000483.3:p.(F508del)' -p76 -ssg23 -g24 -sg25 -g6 -sg27 -g6 -sg28 -S'NM_000492.3:c.1521_1523del' -p77 -sg30 -S'NG_016465.3:g.98809_98811del' -p78 -sg31 -(dp79 -g33 -(dp80 -g35 -S'NC_000007.13:g.117199646_117199648del' -p81 -sg37 -(dp82 -g39 -g40 -sg41 -S'ATCT' -p83 -sg43 -S'117199644' -p84 -sg45 -g46 -sssg47 -(dp85 -g35 -S'NC_000007.14:g.117559592_117559594del' -p86 -sg37 -(dp87 -g39 -g40 -sg41 -S'ATCT' -p88 -sg43 -S'117559590' -p89 -sg45 -g46 -sssg53 -(dp90 -g35 -S'NC_000007.13:g.117199646_117199648del' -p91 -sg37 -(dp92 -g39 -g57 -sg41 -S'ATCT' -p93 -sg43 -S'117199644' -p94 -sg45 -g46 -sssg60 -(dp95 -g35 -S'NC_000007.14:g.117559592_117559594del' -p96 -sg37 -(dp97 -g39 -g57 -sg41 -S'ATCT' -p98 -sg43 -S'117559590' -p99 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant301.txt b/VariantValidator/testing/testOutputsMasterITS/variant301.txt deleted file mode 100644 index fa1b7cbc..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant301.txt +++ /dev/null @@ -1,488 +0,0 @@ -(dp0 -S'NM_004333.4:c.1798_1799delinsAG' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_299t1:c.1798_1799delinsAG' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' -p7 -aS'A more recent version of the selected reference sequence NM_004333.4 is available (NM_004333.5)' -p8 -aS'NM_004333.5:c.1798_1799delGTinsAG MUST be fully validated prior to use in reports' -p9 -aS'select_variants=NM_004333.5:c.1798_1799delinsAG' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA -p16 -sS'gene_symbol' -p17 -S'BRAF' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_004324.2(LRG_299p1):p.(Val600Arg)' -p22 -sS'slr' -p23 -S'NP_004324.2:p.(V600R)' -p24 -ssS'submitted_variant' -p25 -S'7-140453136-AC-CT' -p26 -sS'genome_context_intronic_sequence' -p27 -g12 -sS'HGVS_LRG_variant' -p28 -g12 -sS'HGVS_transcript_variant' -p29 -S'NM_004333.4:c.1798_1799delinsAG' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_007873.2:g.176428_176429delinsAG' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000007.13:g.140453136_140453137delinsCT' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr7' -p42 -sS'ref' -p43 -S'AC' -p44 -sS'pos' -p45 -S'140453136' -p46 -sS'alt' -p47 -VCT -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000007.14:g.140753336_140753337delinsCT' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'AC' -p53 -sg45 -S'140753336' -p54 -sg47 -VCT -p55 -sssS'GRCh37' -p56 -(dp57 -g37 -S'NC_000007.13:g.140453136_140453137delinsCT' -p58 -sg39 -(dp59 -g41 -S'7' -p60 -sg43 -S'AC' -p61 -sg45 -S'140453136' -p62 -sg47 -g48 -sssS'GRCh38' -p63 -(dp64 -g37 -S'NC_000007.14:g.140753336_140753337delinsCT' -p65 -sg39 -(dp66 -g41 -g60 -sg43 -S'AC' -p67 -sg45 -S'140753336' -p68 -sg47 -g55 -sssssS'NM_004333.5:c.1798_1799delinsAG' -p69 -(dp70 -g3 -g12 -sg5 -(lp71 -S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' -p72 -aS'RefSeqGene record not available' -p73 -asg11 -g12 -sg13 -(lp74 -sg15 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA -p75 -sg17 -S'BRAF' -p76 -sg19 -(dp77 -g21 -S'NP_004324.2(LRG_299p1):p.(Val600Arg)' -p78 -sg23 -S'NP_004324.2:p.(V600R)' -p79 -ssg25 -g26 -sg27 -g12 -sg28 -g12 -sg29 -S'NM_004333.5:c.1798_1799delinsAG' -p80 -sg31 -g12 -sg33 -(dp81 -g35 -(dp82 -g37 -S'NC_000007.13:g.140453136_140453137delinsCT' -p83 -sg39 -(dp84 -g41 -g42 -sg43 -S'AC' -p85 -sg45 -S'140453136' -p86 -sg47 -VCT -p87 -sssg49 -(dp88 -g37 -S'NC_000007.14:g.140753336_140753337delinsCT' -p89 -sg39 -(dp90 -g41 -g42 -sg43 -S'AC' -p91 -sg45 -S'140753336' -p92 -sg47 -VCT -p93 -sssg56 -(dp94 -g37 -S'NC_000007.13:g.140453136_140453137delinsCT' -p95 -sg39 -(dp96 -g41 -g60 -sg43 -S'AC' -p97 -sg45 -S'140453136' -p98 -sg47 -g87 -sssg63 -(dp99 -g37 -S'NC_000007.14:g.140753336_140753337delinsCT' -p100 -sg39 -(dp101 -g41 -g60 -sg43 -S'AC' -p102 -sg45 -S'140753336' -p103 -sg47 -g93 -sssssS'flag' -p104 -S'gene_variant' -p105 -sS'NR_148928.1:n.2896_2897delinsAG' -p106 -(dp107 -g3 -g12 -sg5 -(lp108 -S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' -p109 -aS'RefSeqGene record not available' -p110 -asg11 -g12 -sg13 -(lp111 -sg15 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA -p112 -sg17 -S'BRAF' -p113 -sg19 -(dp114 -g21 -S'Non-coding :n.' -p115 -sg23 -g12 -ssg25 -g26 -sg27 -g12 -sg28 -g12 -sg29 -S'NR_148928.1:n.2896_2897delinsAG' -p116 -sg31 -g12 -sg33 -(dp117 -g35 -(dp118 -g37 -S'NC_000007.13:g.140453136_140453137delinsCT' -p119 -sg39 -(dp120 -g41 -g42 -sg43 -S'AC' -p121 -sg45 -S'140453136' -p122 -sg47 -VCT -p123 -sssg49 -(dp124 -g37 -S'NC_000007.14:g.140753336_140753337delinsCT' -p125 -sg39 -(dp126 -g41 -g42 -sg43 -S'AC' -p127 -sg45 -S'140753336' -p128 -sg47 -VCT -p129 -sssg56 -(dp130 -g37 -S'NC_000007.13:g.140453136_140453137delinsCT' -p131 -sg39 -(dp132 -g41 -g60 -sg43 -S'AC' -p133 -sg45 -S'140453136' -p134 -sg47 -g123 -sssg63 -(dp135 -g37 -S'NC_000007.14:g.140753336_140753337delinsCT' -p136 -sg39 -(dp137 -g41 -g60 -sg43 -S'AC' -p138 -sg45 -S'140753336' -p139 -sg47 -g129 -sssssS'NM_001354609.1:c.1798_1799delinsAG' -p140 -(dp141 -g3 -g12 -sg5 -(lp142 -S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' -p143 -aS'RefSeqGene record not available' -p144 -asg11 -g12 -sg13 -(lp145 -sg15 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA -p146 -sg17 -S'BRAF' -p147 -sg19 -(dp148 -g21 -S'NP_001341538.1:p.(Val600Arg)' -p149 -sg23 -S'NP_001341538.1:p.(V600R)' -p150 -ssg25 -g26 -sg27 -g12 -sg28 -g12 -sg29 -S'NM_001354609.1:c.1798_1799delinsAG' -p151 -sg31 -g12 -sg33 -(dp152 -g35 -(dp153 -g37 -S'NC_000007.13:g.140453136_140453137delinsCT' -p154 -sg39 -(dp155 -g41 -g42 -sg43 -S'AC' -p156 -sg45 -S'140453136' -p157 -sg47 -VCT -p158 -sssg49 -(dp159 -g37 -S'NC_000007.14:g.140753336_140753337delinsCT' -p160 -sg39 -(dp161 -g41 -g42 -sg43 -S'AC' -p162 -sg45 -S'140753336' -p163 -sg47 -VCT -p164 -sssg56 -(dp165 -g37 -S'NC_000007.13:g.140453136_140453137delinsCT' -p166 -sg39 -(dp167 -g41 -g60 -sg43 -S'AC' -p168 -sg45 -S'140453136' -p169 -sg47 -g158 -sssg63 -(dp170 -g37 -S'NC_000007.14:g.140753336_140753337delinsCT' -p171 -sg39 -(dp172 -g41 -g60 -sg43 -S'AC' -p173 -sg45 -S'140753336' -p174 -sg47 -g164 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant302.txt b/VariantValidator/testing/testOutputsMasterITS/variant302.txt deleted file mode 100644 index 36fe16cb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant302.txt +++ /dev/null @@ -1,458 +0,0 @@ -(dp0 -S'NM_001354609.1:c.1799T>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'BRAF' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001341538.1:p.(Val600Glu)' -p18 -sS'slr' -p19 -S'NP_001341538.1:p.(V600E)' -p20 -ssS'submitted_variant' -p21 -S'7-140453136-A-T' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001354609.1:c.1799T>A' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000007.13:g.140453136A>T' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr7' -p37 -sS'ref' -p38 -VA -p39 -sS'pos' -p40 -S'140453136' -p41 -sS'alt' -p42 -VT -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000007.14:g.140753336A>T' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'140753336' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000007.13:g.140453136A>T' -p51 -sg34 -(dp52 -g36 -S'7' -p53 -sg38 -g39 -sg40 -S'140453136' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000007.14:g.140753336A>T' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'140753336' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -sS'NR_148928.1:n.2897T>A' -p62 -(dp63 -g3 -g4 -sg5 -(lp64 -S'RefSeqGene record not available' -p65 -asg8 -g4 -sg9 -(lp66 -sg11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA -p67 -sg13 -S'BRAF' -p68 -sg15 -(dp69 -g17 -S'Non-coding :n.' -p70 -sg19 -g4 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NR_148928.1:n.2897T>A' -p71 -sg27 -g4 -sg28 -(dp72 -g30 -(dp73 -g32 -S'NC_000007.13:g.140453136A>T' -p74 -sg34 -(dp75 -g36 -g37 -sg38 -g39 -sg40 -S'140453136' -p76 -sg42 -g43 -sssg44 -(dp77 -g32 -S'NC_000007.14:g.140753336A>T' -p78 -sg34 -(dp79 -g36 -g37 -sg38 -g39 -sg40 -S'140753336' -p80 -sg42 -g43 -sssg49 -(dp81 -g32 -S'NC_000007.13:g.140453136A>T' -p82 -sg34 -(dp83 -g36 -g53 -sg38 -g39 -sg40 -S'140453136' -p84 -sg42 -g43 -sssg55 -(dp85 -g32 -S'NC_000007.14:g.140753336A>T' -p86 -sg34 -(dp87 -g36 -g53 -sg38 -g39 -sg40 -S'140753336' -p88 -sg42 -g43 -sssssS'NM_004333.4:c.1799T>A' -p89 -(dp90 -g3 -S'LRG_299t1:c.1799T>A' -p91 -sg5 -(lp92 -S'A more recent version of the selected reference sequence NM_004333.4 is available (NM_004333.5)' -p93 -aS'NM_004333.5:c.1799T>A MUST be fully validated prior to use in reports' -p94 -aS'select_variants=NM_004333.5:c.1799T>A' -p95 -asg8 -g4 -sg9 -(lp96 -sg11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA -p97 -sg13 -S'BRAF' -p98 -sg15 -(dp99 -g17 -S'NP_004324.2(LRG_299p1):p.(Val600Glu)' -p100 -sg19 -S'NP_004324.2:p.(V600E)' -p101 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004333.4:c.1799T>A' -p102 -sg27 -S'NG_007873.2:g.176429T>A' -p103 -sg28 -(dp104 -g30 -(dp105 -g32 -S'NC_000007.13:g.140453136A>T' -p106 -sg34 -(dp107 -g36 -g37 -sg38 -g39 -sg40 -S'140453136' -p108 -sg42 -g43 -sssg44 -(dp109 -g32 -S'NC_000007.14:g.140753336A>T' -p110 -sg34 -(dp111 -g36 -g37 -sg38 -g39 -sg40 -S'140753336' -p112 -sg42 -g43 -sssg49 -(dp113 -g32 -S'NC_000007.13:g.140453136A>T' -p114 -sg34 -(dp115 -g36 -g53 -sg38 -g39 -sg40 -S'140453136' -p116 -sg42 -g43 -sssg55 -(dp117 -g32 -S'NC_000007.14:g.140753336A>T' -p118 -sg34 -(dp119 -g36 -g53 -sg38 -g39 -sg40 -S'140753336' -p120 -sg42 -g43 -sssssS'NM_004333.5:c.1799T>A' -p121 -(dp122 -g3 -g4 -sg5 -(lp123 -S'RefSeqGene record not available' -p124 -asg8 -g4 -sg9 -(lp125 -sg11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA -p126 -sg13 -S'BRAF' -p127 -sg15 -(dp128 -g17 -S'NP_004324.2(LRG_299p1):p.(Val600Glu)' -p129 -sg19 -S'NP_004324.2:p.(V600E)' -p130 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004333.5:c.1799T>A' -p131 -sg27 -g4 -sg28 -(dp132 -g30 -(dp133 -g32 -S'NC_000007.13:g.140453136A>T' -p134 -sg34 -(dp135 -g36 -g37 -sg38 -g39 -sg40 -S'140453136' -p136 -sg42 -g43 -sssg44 -(dp137 -g32 -S'NC_000007.14:g.140753336A>T' -p138 -sg34 -(dp139 -g36 -g37 -sg38 -g39 -sg40 -S'140753336' -p140 -sg42 -g43 -sssg49 -(dp141 -g32 -S'NC_000007.13:g.140453136A>T' -p142 -sg34 -(dp143 -g36 -g53 -sg38 -g39 -sg40 -S'140453136' -p144 -sg42 -g43 -sssg55 -(dp145 -g32 -S'NC_000007.14:g.140753336A>T' -p146 -sg34 -(dp147 -g36 -g53 -sg38 -g39 -sg40 -S'140753336' -p148 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant303.txt b/VariantValidator/testing/testOutputsMasterITS/variant303.txt deleted file mode 100644 index 5a9b2267..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant303.txt +++ /dev/null @@ -1,458 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_004333.5:c.1798G>A' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'BRAF' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_004324.2(LRG_299p1):p.(Val600Met)' -p20 -sS'slr' -p21 -S'NP_004324.2:p.(V600M)' -p22 -ssS'submitted_variant' -p23 -S'7-140453137-C-T' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_004333.5:c.1798G>A' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000007.13:g.140453137C>T' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr7' -p39 -sS'ref' -p40 -VC -p41 -sS'pos' -p42 -S'140453137' -p43 -sS'alt' -p44 -VT -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000007.14:g.140753337C>T' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'140753337' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000007.13:g.140453137C>T' -p53 -sg36 -(dp54 -g38 -S'7' -p55 -sg40 -g41 -sg42 -S'140453137' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000007.14:g.140753337C>T' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'140753337' -p61 -sg44 -g45 -sssssS'NR_148928.1:n.2896G>A' -p62 -(dp63 -g5 -g6 -sg7 -(lp64 -S'RefSeqGene record not available' -p65 -asg10 -g6 -sg11 -(lp66 -sg13 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA -p67 -sg15 -S'BRAF' -p68 -sg17 -(dp69 -g19 -S'Non-coding :n.' -p70 -sg21 -g6 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NR_148928.1:n.2896G>A' -p71 -sg29 -g6 -sg30 -(dp72 -g32 -(dp73 -g34 -S'NC_000007.13:g.140453137C>T' -p74 -sg36 -(dp75 -g38 -g39 -sg40 -g41 -sg42 -S'140453137' -p76 -sg44 -g45 -sssg46 -(dp77 -g34 -S'NC_000007.14:g.140753337C>T' -p78 -sg36 -(dp79 -g38 -g39 -sg40 -g41 -sg42 -S'140753337' -p80 -sg44 -g45 -sssg51 -(dp81 -g34 -S'NC_000007.13:g.140453137C>T' -p82 -sg36 -(dp83 -g38 -g55 -sg40 -g41 -sg42 -S'140453137' -p84 -sg44 -g45 -sssg57 -(dp85 -g34 -S'NC_000007.14:g.140753337C>T' -p86 -sg36 -(dp87 -g38 -g55 -sg40 -g41 -sg42 -S'140753337' -p88 -sg44 -g45 -sssssS'NM_001354609.1:c.1798G>A' -p89 -(dp90 -g5 -g6 -sg7 -(lp91 -S'RefSeqGene record not available' -p92 -asg10 -g6 -sg11 -(lp93 -sg13 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA -p94 -sg15 -S'BRAF' -p95 -sg17 -(dp96 -g19 -S'NP_001341538.1:p.(Val600Met)' -p97 -sg21 -S'NP_001341538.1:p.(V600M)' -p98 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_001354609.1:c.1798G>A' -p99 -sg29 -g6 -sg30 -(dp100 -g32 -(dp101 -g34 -S'NC_000007.13:g.140453137C>T' -p102 -sg36 -(dp103 -g38 -g39 -sg40 -g41 -sg42 -S'140453137' -p104 -sg44 -g45 -sssg46 -(dp105 -g34 -S'NC_000007.14:g.140753337C>T' -p106 -sg36 -(dp107 -g38 -g39 -sg40 -g41 -sg42 -S'140753337' -p108 -sg44 -g45 -sssg51 -(dp109 -g34 -S'NC_000007.13:g.140453137C>T' -p110 -sg36 -(dp111 -g38 -g55 -sg40 -g41 -sg42 -S'140453137' -p112 -sg44 -g45 -sssg57 -(dp113 -g34 -S'NC_000007.14:g.140753337C>T' -p114 -sg36 -(dp115 -g38 -g55 -sg40 -g41 -sg42 -S'140753337' -p116 -sg44 -g45 -sssssS'NM_004333.4:c.1798G>A' -p117 -(dp118 -g5 -S'LRG_299t1:c.1798G>A' -p119 -sg7 -(lp120 -S'A more recent version of the selected reference sequence NM_004333.4 is available (NM_004333.5)' -p121 -aS'NM_004333.5:c.1798G>A MUST be fully validated prior to use in reports' -p122 -aS'select_variants=NM_004333.5:c.1798G>A' -p123 -asg10 -g6 -sg11 -(lp124 -sg13 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA -p125 -sg15 -S'BRAF' -p126 -sg17 -(dp127 -g19 -S'NP_004324.2(LRG_299p1):p.(Val600Met)' -p128 -sg21 -S'NP_004324.2:p.(V600M)' -p129 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_004333.4:c.1798G>A' -p130 -sg29 -S'NG_007873.2:g.176428G>A' -p131 -sg30 -(dp132 -g32 -(dp133 -g34 -S'NC_000007.13:g.140453137C>T' -p134 -sg36 -(dp135 -g38 -g39 -sg40 -g41 -sg42 -S'140453137' -p136 -sg44 -g45 -sssg46 -(dp137 -g34 -S'NC_000007.14:g.140753337C>T' -p138 -sg36 -(dp139 -g38 -g39 -sg40 -g41 -sg42 -S'140753337' -p140 -sg44 -g45 -sssg51 -(dp141 -g34 -S'NC_000007.13:g.140453137C>T' -p142 -sg36 -(dp143 -g38 -g55 -sg40 -g41 -sg42 -S'140453137' -p144 -sg44 -g45 -sssg57 -(dp145 -g34 -S'NC_000007.14:g.140753337C>T' -p146 -sg36 -(dp147 -g38 -g55 -sg40 -g41 -sg42 -S'140753337' -p148 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant304.txt b/VariantValidator/testing/testOutputsMasterITS/variant304.txt deleted file mode 100644 index 0d1ff071..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant304.txt +++ /dev/null @@ -1,246 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000083.2:c.180+3A>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_009815.1(NM_000083.2):c.180+3A>T' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'CLCN1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000074.2:p.?' -p20 -sS'slr' -p21 -S'NP_000074.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'7-143013488-A-T' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000007.13(NM_000083.2):c.180+3A>T' -p26 -sS'HGVS_LRG_variant' -p27 -g6 -sS'HGVS_transcript_variant' -p28 -S'NM_000083.2:c.180+3A>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_009815.1:g.5270A>T' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000007.13:g.143013488A>T' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr7' -p41 -sS'ref' -p42 -S'A' -p43 -sS'pos' -p44 -S'143013488' -p45 -sS'alt' -p46 -S'T' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000007.14:g.143316395A>T' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'143316395' -p52 -sg46 -g47 -sssS'GRCh37' -p53 -(dp54 -g36 -S'NC_000007.13:g.143013488A>T' -p55 -sg38 -(dp56 -g40 -S'7' -p57 -sg42 -g43 -sg44 -S'143013488' -p58 -sg46 -g47 -sssS'GRCh38' -p59 -(dp60 -g36 -S'NC_000007.14:g.143316395A>T' -p61 -sg38 -(dp62 -g40 -g57 -sg42 -g43 -sg44 -S'143316395' -p63 -sg46 -g47 -sssssS'NR_046453.1:n.267+3A>T' -p64 -(dp65 -g5 -g6 -sg7 -(lp66 -S'RefSeqGene record not available' -p67 -asg9 -g6 -sg11 -(lp68 -sg13 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA -p69 -sg15 -S'CLCN1' -p70 -sg17 -(dp71 -g19 -S'Non-coding :n.' -p72 -sg21 -g6 -ssg23 -g24 -sg25 -S'NC_000007.13(NR_046453.1):c.267+3A>T' -p73 -sg27 -g6 -sg28 -S'NR_046453.1:n.267+3A>T' -p74 -sg30 -g6 -sg32 -(dp75 -g34 -(dp76 -g36 -S'NC_000007.13:g.143013488A>T' -p77 -sg38 -(dp78 -g40 -g41 -sg42 -g43 -sg44 -S'143013488' -p79 -sg46 -g47 -sssg48 -(dp80 -g36 -S'NC_000007.14:g.143316395A>T' -p81 -sg38 -(dp82 -g40 -g41 -sg42 -g43 -sg44 -S'143316395' -p83 -sg46 -g47 -sssg53 -(dp84 -g36 -S'NC_000007.13:g.143013488A>T' -p85 -sg38 -(dp86 -g40 -g57 -sg42 -g43 -sg44 -S'143013488' -p87 -sg46 -g47 -sssg59 -(dp88 -g36 -S'NC_000007.14:g.143316395A>T' -p89 -sg38 -(dp90 -g40 -g57 -sg42 -g43 -sg44 -S'143316395' -p91 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant305.txt b/VariantValidator/testing/testOutputsMasterITS/variant305.txt deleted file mode 100644 index 63449758..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant305.txt +++ /dev/null @@ -1,243 +0,0 @@ -(dp0 -S'NR_046453.1:n.776G>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA -p12 -sS'gene_symbol' -p13 -S'CLCN1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'Non-coding :n.' -p18 -sS'slr' -p19 -g4 -ssS'submitted_variant' -p20 -S'7-143018934-G-A' -p21 -sS'genome_context_intronic_sequence' -p22 -g4 -sS'HGVS_LRG_variant' -p23 -g4 -sS'HGVS_transcript_variant' -p24 -S'NR_046453.1:n.776G>A' -p25 -sS'HGVS_RefSeqGene_variant' -p26 -g4 -sS'primary_assembly_loci' -p27 -(dp28 -S'hg19' -p29 -(dp30 -S'HGVS_genomic_description' -p31 -S'NC_000007.13:g.143018934G>A' -p32 -sS'vcf' -p33 -(dp34 -S'chr' -p35 -S'chr7' -p36 -sS'ref' -p37 -S'G' -p38 -sS'pos' -p39 -S'143018934' -p40 -sS'alt' -p41 -S'A' -p42 -sssS'hg38' -p43 -(dp44 -g31 -S'NC_000007.14:g.143321841G>A' -p45 -sg33 -(dp46 -g35 -g36 -sg37 -g38 -sg39 -S'143321841' -p47 -sg41 -g42 -sssS'GRCh37' -p48 -(dp49 -g31 -S'NC_000007.13:g.143018934G>A' -p50 -sg33 -(dp51 -g35 -S'7' -p52 -sg37 -g38 -sg39 -S'143018934' -p53 -sg41 -g42 -sssS'GRCh38' -p54 -(dp55 -g31 -S'NC_000007.14:g.143321841G>A' -p56 -sg33 -(dp57 -g35 -g52 -sg37 -g38 -sg39 -S'143321841' -p58 -sg41 -g42 -sssssS'flag' -p59 -S'gene_variant' -p60 -sS'NM_000083.2:c.689G>A' -p61 -(dp62 -g3 -g4 -sg5 -(lp63 -sg8 -g4 -sg9 -(lp64 -sg11 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA -p65 -sg13 -S'CLCN1' -p66 -sg15 -(dp67 -g17 -S'NP_000074.2:p.(Gly230Glu)' -p68 -sg19 -S'NP_000074.2:p.(G230E)' -p69 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_000083.2:c.689G>A' -p70 -sg26 -S'NG_009815.1:g.10716G>A' -p71 -sg27 -(dp72 -g29 -(dp73 -g31 -S'NC_000007.13:g.143018934G>A' -p74 -sg33 -(dp75 -g35 -g36 -sg37 -g38 -sg39 -S'143018934' -p76 -sg41 -g42 -sssg43 -(dp77 -g31 -S'NC_000007.14:g.143321841G>A' -p78 -sg33 -(dp79 -g35 -g36 -sg37 -g38 -sg39 -S'143321841' -p80 -sg41 -g42 -sssg48 -(dp81 -g31 -S'NC_000007.13:g.143018934G>A' -p82 -sg33 -(dp83 -g35 -g52 -sg37 -g38 -sg39 -S'143018934' -p84 -sg41 -g42 -sssg54 -(dp85 -g31 -S'NC_000007.14:g.143321841G>A' -p86 -sg33 -(dp87 -g35 -g52 -sg37 -g38 -sg39 -S'143321841' -p88 -sg41 -g42 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant306.txt b/VariantValidator/testing/testOutputsMasterITS/variant306.txt deleted file mode 100644 index cdf85044..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant306.txt +++ /dev/null @@ -1,243 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NR_046453.1:n.2620C>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA -p14 -sS'gene_symbol' -p15 -S'CLCN1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'Non-coding :n.' -p20 -sS'slr' -p21 -g6 -ssS'submitted_variant' -p22 -S'7-143048771-C-T' -p23 -sS'genome_context_intronic_sequence' -p24 -g6 -sS'HGVS_LRG_variant' -p25 -g6 -sS'HGVS_transcript_variant' -p26 -S'NR_046453.1:n.2620C>T' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g6 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000007.13:g.143048771C>T' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr7' -p38 -sS'ref' -p39 -S'C' -p40 -sS'pos' -p41 -S'143048771' -p42 -sS'alt' -p43 -S'T' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000007.14:g.143351678C>T' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'143351678' -p49 -sg43 -g44 -sssS'GRCh37' -p50 -(dp51 -g33 -S'NC_000007.13:g.143048771C>T' -p52 -sg35 -(dp53 -g37 -S'7' -p54 -sg39 -g40 -sg41 -S'143048771' -p55 -sg43 -g44 -sssS'GRCh38' -p56 -(dp57 -g33 -S'NC_000007.14:g.143351678C>T' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'143351678' -p60 -sg43 -g44 -sssssS'NM_000083.2:c.2680C>T' -p61 -(dp62 -g5 -g6 -sg7 -(lp63 -sg10 -g6 -sg11 -(lp64 -sg13 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA -p65 -sg15 -S'CLCN1' -p66 -sg17 -(dp67 -g19 -S'NP_000074.2:p.(Arg894Ter)' -p68 -sg21 -S'NP_000074.2:p.(R894*)' -p69 -ssg22 -g23 -sg24 -g6 -sg25 -g6 -sg26 -S'NM_000083.2:c.2680C>T' -p70 -sg28 -S'NG_009815.1:g.40553C>T' -p71 -sg29 -(dp72 -g31 -(dp73 -g33 -S'NC_000007.13:g.143048771C>T' -p74 -sg35 -(dp75 -g37 -g38 -sg39 -g40 -sg41 -S'143048771' -p76 -sg43 -g44 -sssg45 -(dp77 -g33 -S'NC_000007.14:g.143351678C>T' -p78 -sg35 -(dp79 -g37 -g38 -sg39 -g40 -sg41 -S'143351678' -p80 -sg43 -g44 -sssg50 -(dp81 -g33 -S'NC_000007.13:g.143048771C>T' -p82 -sg35 -(dp83 -g37 -g54 -sg39 -g40 -sg41 -S'143048771' -p84 -sg43 -g44 -sssg56 -(dp85 -g33 -S'NC_000007.14:g.143351678C>T' -p86 -sg35 -(dp87 -g37 -g54 -sg39 -g40 -sg41 -S'143351678' -p88 -sg43 -g44 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant307.txt b/VariantValidator/testing/testOutputsMasterITS/variant307.txt deleted file mode 100644 index fbab23f7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant307.txt +++ /dev/null @@ -1,532 +0,0 @@ -(dp0 -S'NM_014629.2:c.2399C>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_234t1:c.2399C>T' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_014629.2 is available (NM_014629.3)' -p7 -aS'NM_014629.3:c.2399C>T MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_014629.3:c.2399C>T' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens Rho guanine nucleotide exchange factor (GEF) 10 (ARHGEF10), mRNA -p15 -sS'gene_symbol' -p16 -S'ARHGEF10' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_055444.2(LRG_234p1):p.(Pro800Leu)' -p21 -sS'slr' -p22 -S'NP_055444.2:p.(P800L)' -p23 -ssS'submitted_variant' -p24 -S'8-1871951-C-T' -p25 -sS'genome_context_intronic_sequence' -p26 -g11 -sS'HGVS_LRG_variant' -p27 -S'LRG_234:g.104803C>T' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_014629.2:c.2399C>T' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_008480.1:g.104803C>T' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000008.10:g.1871951C>T' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr8' -p42 -sS'ref' -p43 -S'C' -p44 -sS'pos' -p45 -S'1871951' -p46 -sS'alt' -p47 -S'T' -p48 -sssS'GRCh37' -p49 -(dp50 -g37 -S'NC_000008.10:g.1871951C>T' -p51 -sg39 -(dp52 -g41 -S'8' -p53 -sg43 -g44 -sg45 -S'1871951' -p54 -sg47 -g48 -sssssS'NM_014629.3:c.2399C>T' -p55 -(dp56 -g3 -g11 -sg5 -(lp57 -S'RefSeqGene record not available' -p58 -asg10 -g11 -sg12 -(lp59 -(dp60 -S'GRCh38' -p61 -(dp62 -g37 -S'NT_187576.1:g.107161C>T' -p63 -sg39 -(dp64 -g41 -S'HSCHR8_8_CTG1' -p65 -sg43 -g44 -sg45 -S'107161' -p66 -sg47 -g48 -sssa(dp67 -S'hg38' -p68 -(dp69 -g37 -S'NT_187576.1:g.107161C>T' -p70 -sg39 -(dp71 -g41 -S'chr8_KI270821v1_alt' -p72 -sg43 -g44 -sg45 -S'107161' -p73 -sg47 -g48 -sssasg14 -VHomo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 1, mRNA -p74 -sg16 -S'ARHGEF10' -p75 -sg18 -(dp76 -g20 -S'NP_055444.2(LRG_234p1):p.(Pro800Leu)' -p77 -sg22 -S'NP_055444.2:p.(P800L)' -p78 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_014629.3:c.2399C>T' -p79 -sg31 -g11 -sg33 -(dp80 -g35 -(dp81 -g37 -S'NC_000008.10:g.1871951C>T' -p82 -sg39 -(dp83 -g41 -g42 -sg43 -g44 -sg45 -S'1871951' -p84 -sg47 -g48 -sssg68 -(dp85 -g37 -S'NC_000008.11:g.1923785C>T' -p86 -sg39 -(dp87 -g41 -g42 -sg43 -g44 -sg45 -S'1923785' -p88 -sg47 -g48 -sssg49 -(dp89 -g37 -S'NC_000008.10:g.1871951C>T' -p90 -sg39 -(dp91 -g41 -g53 -sg43 -g44 -sg45 -S'1871951' -p92 -sg47 -g48 -sssg61 -(dp93 -g37 -S'NC_000008.11:g.1923785C>T' -p94 -sg39 -(dp95 -g41 -g53 -sg43 -g44 -sg45 -S'1923785' -p96 -sg47 -g48 -sssssS'flag' -p97 -S'gene_variant' -p98 -sS'NM_001308153.1:c.2471C>T' -p99 -(dp100 -g3 -g11 -sg5 -(lp101 -S'RefSeqGene record not available' -p102 -asg10 -g11 -sg12 -(lp103 -(dp104 -g61 -(dp105 -g37 -S'NT_187576.1:g.107161C>T' -p106 -sg39 -(dp107 -g41 -g65 -sg43 -g44 -sg45 -S'107161' -p108 -sg47 -g48 -sssa(dp109 -g68 -(dp110 -g37 -S'NT_187576.1:g.107161C>T' -p111 -sg39 -(dp112 -g41 -g72 -sg43 -g44 -sg45 -S'107161' -p113 -sg47 -g48 -sssasg14 -VHomo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 3, mRNA -p114 -sg16 -S'ARHGEF10' -p115 -sg18 -(dp116 -g20 -S'NP_001295082.1:p.(Pro824Leu)' -p117 -sg22 -S'NP_001295082.1:p.(P824L)' -p118 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_001308153.1:c.2471C>T' -p119 -sg31 -g11 -sg33 -(dp120 -g35 -(dp121 -g37 -S'NC_000008.10:g.1871951C>T' -p122 -sg39 -(dp123 -g41 -g42 -sg43 -g44 -sg45 -S'1871951' -p124 -sg47 -g48 -sssg68 -(dp125 -g37 -S'NC_000008.11:g.1923785C>T' -p126 -sg39 -(dp127 -g41 -g42 -sg43 -g44 -sg45 -S'1923785' -p128 -sg47 -g48 -sssg49 -(dp129 -g37 -S'NC_000008.10:g.1871951C>T' -p130 -sg39 -(dp131 -g41 -g53 -sg43 -g44 -sg45 -S'1871951' -p132 -sg47 -g48 -sssg61 -(dp133 -g37 -S'NC_000008.11:g.1923785C>T' -p134 -sg39 -(dp135 -g41 -g53 -sg43 -g44 -sg45 -S'1923785' -p136 -sg47 -g48 -sssssS'NM_001308152.1:c.2285C>T' -p137 -(dp138 -g3 -g11 -sg5 -(lp139 -S'RefSeqGene record not available' -p140 -asg10 -g11 -sg12 -(lp141 -(dp142 -g61 -(dp143 -g37 -S'NT_187576.1:g.107161C>T' -p144 -sg39 -(dp145 -g41 -g65 -sg43 -g44 -sg45 -S'107161' -p146 -sg47 -g48 -sssa(dp147 -g68 -(dp148 -g37 -S'NT_187576.1:g.107161C>T' -p149 -sg39 -(dp150 -g41 -g72 -sg43 -g44 -sg45 -S'107161' -p151 -sg47 -g48 -sssasg14 -VHomo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 2, mRNA -p152 -sg16 -S'ARHGEF10' -p153 -sg18 -(dp154 -g20 -S'NP_001295081.1:p.(Pro762Leu)' -p155 -sg22 -S'NP_001295081.1:p.(P762L)' -p156 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg29 -S'NM_001308152.1:c.2285C>T' -p157 -sg31 -g11 -sg33 -(dp158 -g35 -(dp159 -g37 -S'NC_000008.10:g.1871951C>T' -p160 -sg39 -(dp161 -g41 -g42 -sg43 -g44 -sg45 -S'1871951' -p162 -sg47 -g48 -sssg68 -(dp163 -g37 -S'NC_000008.11:g.1923785C>T' -p164 -sg39 -(dp165 -g41 -g42 -sg43 -g44 -sg45 -S'1923785' -p166 -sg47 -g48 -sssg49 -(dp167 -g37 -S'NC_000008.10:g.1871951C>T' -p168 -sg39 -(dp169 -g41 -g53 -sg43 -g44 -sg45 -S'1871951' -p170 -sg47 -g48 -sssg61 -(dp171 -g37 -S'NC_000008.11:g.1923785C>T' -p172 -sg39 -(dp173 -g41 -g53 -sg43 -g44 -sg45 -S'1923785' -p174 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant308.txt b/VariantValidator/testing/testOutputsMasterITS/variant308.txt deleted file mode 100644 index 12fe64b1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant308.txt +++ /dev/null @@ -1,476 +0,0 @@ -(dp0 -S'NM_001330637.1:c.5690dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 4, mRNA -p13 -sS'gene_symbol' -p14 -S'MPDZ' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001317566.1:p.(Thr1898AsnfsTer15)' -p19 -sS'slr' -p20 -S'NP_001317566.1:p.(T1898Nfs*15)' -p21 -ssS'submitted_variant' -p22 -S'9-13112056-T-TG' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_001330637.1:c.5690dup' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000009.11:g.13112057dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr9' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'13112057' -p42 -sS'alt' -p43 -VGG -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000009.12:g.13112058dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'13112058' -p49 -sg43 -VGG -p50 -sssS'GRCh37' -p51 -(dp52 -g33 -S'NC_000009.11:g.13112057dup' -p53 -sg35 -(dp54 -g37 -S'9' -p55 -sg39 -g40 -sg41 -S'13112057' -p56 -sg43 -VGG -p57 -sssS'GRCh38' -p58 -(dp59 -g33 -S'NC_000009.12:g.13112058dup' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'13112058' -p62 -sg43 -VGG -p63 -sssssS'flag' -p64 -S'gene_variant' -p65 -sS'NM_001261407.1:c.5504dup' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' -p69 -aS'RefSeqGene record not available' -p70 -asg9 -g4 -sg10 -(lp71 -sg12 -VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 3, mRNA -p72 -sg14 -S'MPDZ' -p73 -sg16 -(dp74 -g18 -S'NP_001248336.1:p.(Thr1836AsnfsTer15)' -p75 -sg20 -S'NP_001248336.1:p.(T1836Nfs*15)' -p76 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001261407.1:c.5504dup' -p77 -sg28 -g4 -sg29 -(dp78 -g31 -(dp79 -g33 -S'NC_000009.11:g.13112057dup' -p80 -sg35 -(dp81 -g37 -g38 -sg39 -g40 -sg41 -S'13112057' -p82 -sg43 -VGG -p83 -sssg45 -(dp84 -g33 -S'NC_000009.12:g.13112058dup' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -g40 -sg41 -S'13112058' -p87 -sg43 -VGG -p88 -sssg51 -(dp89 -g33 -S'NC_000009.11:g.13112057dup' -p90 -sg35 -(dp91 -g37 -g55 -sg39 -g40 -sg41 -S'13112057' -p92 -sg43 -VGG -p93 -sssg58 -(dp94 -g33 -S'NC_000009.12:g.13112058dup' -p95 -sg35 -(dp96 -g37 -g55 -sg39 -g40 -sg41 -S'13112058' -p97 -sg43 -VGG -p98 -sssssS'NM_003829.4:c.5603dup' -p99 -(dp100 -g3 -g4 -sg5 -(lp101 -S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' -p102 -aS'RefSeqGene record not available' -p103 -asg9 -g4 -sg10 -(lp104 -sg12 -VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 1, mRNA -p105 -sg14 -S'MPDZ' -p106 -sg16 -(dp107 -g18 -S'NP_003820.2:p.(Thr1869AsnfsTer15)' -p108 -sg20 -S'NP_003820.2:p.(T1869Nfs*15)' -p109 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_003829.4:c.5603dup' -p110 -sg28 -g4 -sg29 -(dp111 -g31 -(dp112 -g33 -S'NC_000009.11:g.13112057dup' -p113 -sg35 -(dp114 -g37 -g38 -sg39 -g40 -sg41 -S'13112057' -p115 -sg43 -VGG -p116 -sssg45 -(dp117 -g33 -S'NC_000009.12:g.13112058dup' -p118 -sg35 -(dp119 -g37 -g38 -sg39 -g40 -sg41 -S'13112058' -p120 -sg43 -VGG -p121 -sssg51 -(dp122 -g33 -S'NC_000009.11:g.13112057dup' -p123 -sg35 -(dp124 -g37 -g55 -sg39 -g40 -sg41 -S'13112057' -p125 -sg43 -VGG -p126 -sssg58 -(dp127 -g33 -S'NC_000009.12:g.13112058dup' -p128 -sg35 -(dp129 -g37 -g55 -sg39 -g40 -sg41 -S'13112058' -p130 -sg43 -VGG -p131 -sssssS'NM_001261406.1:c.5591dup' -p132 -(dp133 -g3 -g4 -sg5 -(lp134 -S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' -p135 -aS'RefSeqGene record not available' -p136 -asg9 -g4 -sg10 -(lp137 -sg12 -VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 2, mRNA -p138 -sg14 -S'MPDZ' -p139 -sg16 -(dp140 -g18 -S'NP_001248335.1:p.(Thr1865AsnfsTer15)' -p141 -sg20 -S'NP_001248335.1:p.(T1865Nfs*15)' -p142 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001261406.1:c.5591dup' -p143 -sg28 -g4 -sg29 -(dp144 -g31 -(dp145 -g33 -S'NC_000009.11:g.13112057dup' -p146 -sg35 -(dp147 -g37 -g38 -sg39 -g40 -sg41 -S'13112057' -p148 -sg43 -VGG -p149 -sssg45 -(dp150 -g33 -S'NC_000009.12:g.13112058dup' -p151 -sg35 -(dp152 -g37 -g38 -sg39 -g40 -sg41 -S'13112058' -p153 -sg43 -VGG -p154 -sssg51 -(dp155 -g33 -S'NC_000009.11:g.13112057dup' -p156 -sg35 -(dp157 -g37 -g55 -sg39 -g40 -sg41 -S'13112057' -p158 -sg43 -VGG -p159 -sssg58 -(dp160 -g33 -S'NC_000009.12:g.13112058dup' -p161 -sg35 -(dp162 -g37 -g55 -sg39 -g40 -sg41 -S'13112058' -p163 -sg43 -VGG -p164 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant309.txt b/VariantValidator/testing/testOutputsMasterITS/variant309.txt deleted file mode 100644 index f5b16672..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant309.txt +++ /dev/null @@ -1,534 +0,0 @@ -(dp0 -S'NM_058197.4:c.*74-1G>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 3, mRNA -p12 -sS'gene_symbol' -p13 -S'CDKN2A' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_478104.2:p.?' -p18 -sS'slr' -p19 -S'NP_478104.2:p.?' -p20 -ssS'submitted_variant' -p21 -S'9-21971208-C-A' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000009.11(NM_058197.4):c.*74-1G>T' -p24 -sS'HGVS_LRG_variant' -p25 -g4 -sS'HGVS_transcript_variant' -p26 -S'NM_058197.4:c.*74-1G>T' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000009.11:g.21971208C>A' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr9' -p38 -sS'ref' -p39 -VC -p40 -sS'pos' -p41 -S'21971208' -p42 -sS'alt' -p43 -VA -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000009.12:g.21971209C>A' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'21971209' -p49 -sg43 -g44 -sssS'GRCh37' -p50 -(dp51 -g33 -S'NC_000009.11:g.21971208C>A' -p52 -sg35 -(dp53 -g37 -S'9' -p54 -sg39 -g40 -sg41 -S'21971208' -p55 -sg43 -g44 -sssS'GRCh38' -p56 -(dp57 -g33 -S'NC_000009.12:g.21971209C>A' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'21971209' -p60 -sg43 -g44 -sssssS'NM_000077.4:c.151-1G>T' -p61 -(dp62 -g3 -S'LRG_11t1:c.151-1G>T' -p63 -sg5 -(lp64 -sg8 -S'NG_007485.1(NM_000077.4):c.151-1G>T' -p65 -sg9 -(lp66 -sg11 -VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 1, mRNA -p67 -sg13 -S'CDKN2A' -p68 -sg15 -(dp69 -g17 -S'NP_000068.1(LRG_11p1):p.?' -p70 -sg19 -S'NP_000068.1:p.?' -p71 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_000077.4):c.151-1G>T' -p72 -sg25 -S'LRG_11:g.28283G>T' -p73 -sg26 -S'NM_000077.4:c.151-1G>T' -p74 -sg28 -S'NG_007485.1:g.28283G>T' -p75 -sg29 -(dp76 -g31 -(dp77 -g33 -S'NC_000009.11:g.21971208C>A' -p78 -sg35 -(dp79 -g37 -g38 -sg39 -g40 -sg41 -S'21971208' -p80 -sg43 -g44 -sssg45 -(dp81 -g33 -S'NC_000009.12:g.21971209C>A' -p82 -sg35 -(dp83 -g37 -g38 -sg39 -g40 -sg41 -S'21971209' -p84 -sg43 -g44 -sssg50 -(dp85 -g33 -S'NC_000009.11:g.21971208C>A' -p86 -sg35 -(dp87 -g37 -g54 -sg39 -g40 -sg41 -S'21971208' -p88 -sg43 -g44 -sssg56 -(dp89 -g33 -S'NC_000009.12:g.21971209C>A' -p90 -sg35 -(dp91 -g37 -g54 -sg39 -g40 -sg41 -S'21971209' -p92 -sg43 -g44 -sssssS'NM_001363763.1:c.-3-1G>T' -p93 -(dp94 -g3 -g4 -sg5 -(lp95 -S'RefSeqGene record not available' -p96 -asg8 -g4 -sg9 -(lp97 -sg11 -VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 6, mRNA -p98 -sg13 -S'CDKN2A' -p99 -sg15 -(dp100 -g17 -S'NP_001350692.1:p.?' -p101 -sg19 -S'NP_001350692.1:p.?' -p102 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_001363763.1):c.-3-1G>T' -p103 -sg25 -g4 -sg26 -S'NM_001363763.1:c.-3-1G>T' -p104 -sg28 -g4 -sg29 -(dp105 -g31 -(dp106 -g33 -S'NC_000009.11:g.21971208C>A' -p107 -sg35 -(dp108 -g37 -g38 -sg39 -g40 -sg41 -S'21971208' -p109 -sg43 -g44 -sssg50 -(dp110 -g33 -S'NC_000009.11:g.21971208C>A' -p111 -sg35 -(dp112 -g37 -g54 -sg39 -g40 -sg41 -S'21971208' -p113 -sg43 -g44 -sssssS'NM_001195132.1:c.151-1G>T' -p114 -(dp115 -g3 -g4 -sg5 -(lp116 -S'RefSeqGene record not available' -p117 -asg8 -g4 -sg9 -(lp118 -sg11 -VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 5, mRNA -p119 -sg13 -S'CDKN2A' -p120 -sg15 -(dp121 -g17 -S'NP_001182061.1:p.?' -p122 -sg19 -S'NP_001182061.1:p.?' -p123 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_001195132.1):c.151-1G>T' -p124 -sg25 -g4 -sg26 -S'NM_001195132.1:c.151-1G>T' -p125 -sg28 -g4 -sg29 -(dp126 -g31 -(dp127 -g33 -S'NC_000009.11:g.21971208C>A' -p128 -sg35 -(dp129 -g37 -g38 -sg39 -g40 -sg41 -S'21971208' -p130 -sg43 -g44 -sssg45 -(dp131 -g33 -S'NC_000009.12:g.21971209C>A' -p132 -sg35 -(dp133 -g37 -g38 -sg39 -g40 -sg41 -S'21971209' -p134 -sg43 -g44 -sssg50 -(dp135 -g33 -S'NC_000009.11:g.21971208C>A' -p136 -sg35 -(dp137 -g37 -g54 -sg39 -g40 -sg41 -S'21971208' -p138 -sg43 -g44 -sssg56 -(dp139 -g33 -S'NC_000009.12:g.21971209C>A' -p140 -sg35 -(dp141 -g37 -g54 -sg39 -g40 -sg41 -S'21971209' -p142 -sg43 -g44 -sssssS'NM_058195.3:c.194-1G>T' -p143 -(dp144 -g3 -S'LRG_11t2:c.194-1G>T' -p145 -sg5 -(lp146 -sg8 -S'NG_007485.1(NM_058195.3):c.194-1G>T' -p147 -sg9 -(lp148 -sg11 -VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 4, mRNA -p149 -sg13 -S'CDKN2A' -p150 -sg15 -(dp151 -g17 -S'NP_478102.2(LRG_11p2):p.?' -p152 -sg19 -S'NP_478102.2:p.?' -p153 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_058195.3):c.194-1G>T' -p154 -sg25 -S'LRG_11:g.28283G>T' -p155 -sg26 -S'NM_058195.3:c.194-1G>T' -p156 -sg28 -S'NG_007485.1:g.28283G>T' -p157 -sg29 -(dp158 -g31 -(dp159 -g33 -S'NC_000009.11:g.21971208C>A' -p160 -sg35 -(dp161 -g37 -g38 -sg39 -g40 -sg41 -S'21971208' -p162 -sg43 -g44 -sssg45 -(dp163 -g33 -S'NC_000009.12:g.21971209C>A' -p164 -sg35 -(dp165 -g37 -g38 -sg39 -g40 -sg41 -S'21971209' -p166 -sg43 -g44 -sssg50 -(dp167 -g33 -S'NC_000009.11:g.21971208C>A' -p168 -sg35 -(dp169 -g37 -g54 -sg39 -g40 -sg41 -S'21971208' -p170 -sg43 -g44 -sssg56 -(dp171 -g33 -S'NC_000009.12:g.21971209C>A' -p172 -sg35 -(dp173 -g37 -g54 -sg39 -g40 -sg41 -S'21971209' -p174 -sg43 -g44 -sssssS'flag' -p175 -S'gene_variant' -p176 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant31.txt b/VariantValidator/testing/testOutputsMasterITS/variant31.txt deleted file mode 100644 index 64e8da44..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant31.txt +++ /dev/null @@ -1,143 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-1G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589-1G>T' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_007400.1(NM_000088.3):c.589-1G>T' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.589-1G>T' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.589-1G>T' -p26 -sS'HGVS_LRG_variant' -p27 -S'LRG_1:g.8637G>T' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000088.3:c.589-1G>T' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_007400.1:g.8637G>T' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000017.10:g.48275364C>A' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr17' -p42 -sS'ref' -p43 -VC -p44 -sS'pos' -p45 -S'48275364' -p46 -sS'alt' -p47 -VA -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000017.11:g.50198003C>A' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'50198003' -p53 -sg47 -g48 -sssS'GRCh37' -p54 -(dp55 -g37 -S'NC_000017.10:g.48275364C>A' -p56 -sg39 -(dp57 -g41 -S'17' -p58 -sg43 -g44 -sg45 -S'48275364' -p59 -sg47 -g48 -sssS'GRCh38' -p60 -(dp61 -g37 -S'NC_000017.11:g.50198003C>A' -p62 -sg39 -(dp63 -g41 -g58 -sg43 -g44 -sg45 -S'50198003' -p64 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant310.txt b/VariantValidator/testing/testOutputsMasterITS/variant310.txt deleted file mode 100644 index 012c28b8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant310.txt +++ /dev/null @@ -1,484 +0,0 @@ -(dp0 -S'NM_003289.3:c.773-3dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_680t2:c.773-3dup' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -S'NG_011620.1(NM_003289.3):c.773-3dup' -p9 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.2, mRNA -p13 -sS'gene_symbol' -p14 -S'TPM2' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_003280.2(LRG_680p2):p.?' -p19 -sS'slr' -p20 -S'NP_003280.2:p.?' -p21 -ssS'submitted_variant' -p22 -S'9-35683240-T-TG' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000009.11(NM_003289.3):c.773-3dup' -p25 -sS'HGVS_LRG_variant' -p26 -S'LRG_680:g.11814dup' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_003289.3:c.773-3dup' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_011620.1:g.11814dup' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000009.11:g.35683241dup' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr9' -p41 -sS'ref' -p42 -S'G' -p43 -sS'pos' -p44 -S'35683241' -p45 -sS'alt' -p46 -S'GG' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000009.12:g.35683244dup' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'35683244' -p52 -sg46 -S'GG' -p53 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000009.11:g.35683241dup' -p56 -sg38 -(dp57 -g40 -S'9' -p58 -sg42 -g43 -sg44 -S'35683241' -p59 -sg46 -S'GG' -p60 -sssS'GRCh38' -p61 -(dp62 -g36 -S'NC_000009.12:g.35683244dup' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'35683244' -p65 -sg46 -S'GG' -p66 -sssssS'flag' -p67 -S'gene_variant' -p68 -sS'NM_213674.1:c.772+1002dup' -p69 -(dp70 -g3 -S'LRG_680t1:c.772+1002dup' -p71 -sg5 -(lp72 -S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' -p73 -asg8 -S'NG_011620.1(NM_213674.1):c.772+1002dup' -p74 -sg10 -(lp75 -sg12 -VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.1, mRNA -p76 -sg14 -S'TPM2' -p77 -sg16 -(dp78 -g18 -S'NP_998839.1(LRG_680p1):p.?' -p79 -sg20 -S'NP_998839.1:p.?' -p80 -ssg22 -g23 -sg24 -S'NC_000009.11(NM_213674.1):c.772+1002dup' -p81 -sg26 -S'LRG_680:g.11814dup' -p82 -sg28 -S'NM_213674.1:c.772+1002dup' -p83 -sg30 -S'NG_011620.1:g.11814dup' -p84 -sg32 -(dp85 -g34 -(dp86 -g36 -S'NC_000009.11:g.35683241dup' -p87 -sg38 -(dp88 -g40 -g41 -sg42 -g43 -sg44 -S'35683241' -p89 -sg46 -S'GG' -p90 -sssg48 -(dp91 -g36 -S'NC_000009.12:g.35683244dup' -p92 -sg38 -(dp93 -g40 -g41 -sg42 -g43 -sg44 -S'35683244' -p94 -sg46 -S'GG' -p95 -sssg54 -(dp96 -g36 -S'NC_000009.11:g.35683241dup' -p97 -sg38 -(dp98 -g40 -g58 -sg42 -g43 -sg44 -S'35683241' -p99 -sg46 -S'GG' -p100 -sssg61 -(dp101 -g36 -S'NC_000009.12:g.35683244dup' -p102 -sg38 -(dp103 -g40 -g58 -sg42 -g43 -sg44 -S'35683244' -p104 -sg46 -S'GG' -p105 -sssssS'NM_001301226.1:c.772+1002dup' -p106 -(dp107 -g3 -S'' -p108 -sg5 -(lp109 -S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' -p110 -aS'RefSeqGene record not available' -p111 -asg8 -g108 -sg10 -(lp112 -sg12 -VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.3, mRNA -p113 -sg14 -S'TPM2' -p114 -sg16 -(dp115 -g18 -S'NP_001288155.1:p.?' -p116 -sg20 -S'NP_001288155.1:p.?' -p117 -ssg22 -g23 -sg24 -S'NC_000009.11(NM_001301226.1):c.772+1002dup' -p118 -sg26 -g108 -sg28 -S'NM_001301226.1:c.772+1002dup' -p119 -sg30 -g108 -sg32 -(dp120 -g34 -(dp121 -g36 -S'NC_000009.11:g.35683241dup' -p122 -sg38 -(dp123 -g40 -g41 -sg42 -g43 -sg44 -S'35683241' -p124 -sg46 -S'GG' -p125 -sssg48 -(dp126 -g36 -S'NC_000009.12:g.35683244dup' -p127 -sg38 -(dp128 -g40 -g41 -sg42 -g43 -sg44 -S'35683244' -p129 -sg46 -S'GG' -p130 -sssg54 -(dp131 -g36 -S'NC_000009.11:g.35683241dup' -p132 -sg38 -(dp133 -g40 -g58 -sg42 -g43 -sg44 -S'35683241' -p134 -sg46 -S'GG' -p135 -sssg61 -(dp136 -g36 -S'NC_000009.12:g.35683244dup' -p137 -sg38 -(dp138 -g40 -g58 -sg42 -g43 -sg44 -S'35683244' -p139 -sg46 -S'GG' -p140 -sssssS'NM_001301227.1:c.773-3dup' -p141 -(dp142 -g3 -g108 -sg5 -(lp143 -S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' -p144 -aS'RefSeqGene record not available' -p145 -asg8 -g108 -sg10 -(lp146 -sg12 -VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.4, mRNA -p147 -sg14 -S'TPM2' -p148 -sg16 -(dp149 -g18 -S'NP_001288156.1:p.?' -p150 -sg20 -S'NP_001288156.1:p.?' -p151 -ssg22 -g23 -sg24 -S'NC_000009.11(NM_001301227.1):c.773-3dup' -p152 -sg26 -g108 -sg28 -S'NM_001301227.1:c.773-3dup' -p153 -sg30 -g108 -sg32 -(dp154 -g34 -(dp155 -g36 -S'NC_000009.11:g.35683241dup' -p156 -sg38 -(dp157 -g40 -g41 -sg42 -g43 -sg44 -S'35683241' -p158 -sg46 -S'GG' -p159 -sssg48 -(dp160 -g36 -S'NC_000009.12:g.35683244dup' -p161 -sg38 -(dp162 -g40 -g41 -sg42 -g43 -sg44 -S'35683244' -p163 -sg46 -S'GG' -p164 -sssg54 -(dp165 -g36 -S'NC_000009.11:g.35683241dup' -p166 -sg38 -(dp167 -g40 -g58 -sg42 -g43 -sg44 -S'35683241' -p168 -sg46 -S'GG' -p169 -sssg61 -(dp170 -g36 -S'NC_000009.12:g.35683244dup' -p171 -sg38 -(dp172 -g40 -g58 -sg42 -g43 -sg44 -S'35683244' -p173 -sg46 -S'GG' -p174 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant311.txt b/VariantValidator/testing/testOutputsMasterITS/variant311.txt deleted file mode 100644 index ac56b4cd..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant311.txt +++ /dev/null @@ -1,454 +0,0 @@ -(dp0 -S'NM_000368.4:c.733C>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_486t1:c.733C>T' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -S'' -p8 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'TSC1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000359.1(LRG_486p1):p.(Arg245Ter)' -p18 -sS'slr' -p19 -S'NP_000359.1:p.(R245*)' -p20 -ssS'submitted_variant' -p21 -S'9-135796754-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g8 -sS'HGVS_LRG_variant' -p24 -S'LRG_486:g.28267C>T' -p25 -sS'HGVS_transcript_variant' -p26 -S'NM_000368.4:c.733C>T' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_012386.1:g.28267C>T' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000009.11:g.135796754G>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr9' -p39 -sS'ref' -p40 -VG -p41 -sS'pos' -p42 -S'135796754' -p43 -sS'alt' -p44 -VA -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000009.12:g.132921367G>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'132921367' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000009.11:g.135796754G>A' -p53 -sg36 -(dp54 -g38 -S'9' -p55 -sg40 -g41 -sg42 -S'135796754' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000009.12:g.132921367G>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'132921367' -p61 -sg44 -g45 -sssssS'NM_001162426.1:c.733C>T' -p62 -(dp63 -g3 -g8 -sg5 -(lp64 -S'RefSeqGene record not available' -p65 -asg7 -g8 -sg9 -(lp66 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA -p67 -sg13 -S'TSC1' -p68 -sg15 -(dp69 -g17 -S'NP_001155898.1:p.(Arg245Ter)' -p70 -sg19 -S'NP_001155898.1:p.(R245*)' -p71 -ssg21 -g22 -sg23 -g8 -sg24 -g8 -sg26 -S'NM_001162426.1:c.733C>T' -p72 -sg28 -g8 -sg30 -(dp73 -g32 -(dp74 -g34 -S'NC_000009.11:g.135796754G>A' -p75 -sg36 -(dp76 -g38 -g39 -sg40 -g41 -sg42 -S'135796754' -p77 -sg44 -g45 -sssg46 -(dp78 -g34 -S'NC_000009.12:g.132921367G>A' -p79 -sg36 -(dp80 -g38 -g39 -sg40 -g41 -sg42 -S'132921367' -p81 -sg44 -g45 -sssg51 -(dp82 -g34 -S'NC_000009.11:g.135796754G>A' -p83 -sg36 -(dp84 -g38 -g55 -sg40 -g41 -sg42 -S'135796754' -p85 -sg44 -g45 -sssg57 -(dp86 -g34 -S'NC_000009.12:g.132921367G>A' -p87 -sg36 -(dp88 -g38 -g55 -sg40 -g41 -sg42 -S'132921367' -p89 -sg44 -g45 -sssssS'flag' -p90 -S'gene_variant' -p91 -sS'NM_001362177.1:c.370C>T' -p92 -(dp93 -g3 -g8 -sg5 -(lp94 -S'RefSeqGene record not available' -p95 -asg7 -g8 -sg9 -(lp96 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA -p97 -sg13 -S'TSC1' -p98 -sg15 -(dp99 -g17 -S'NP_001349106.1:p.(Arg124Ter)' -p100 -sg19 -S'NP_001349106.1:p.(R124*)' -p101 -ssg21 -g22 -sg23 -g8 -sg24 -g8 -sg26 -S'NM_001362177.1:c.370C>T' -p102 -sg28 -g8 -sg30 -(dp103 -g32 -(dp104 -g34 -S'NC_000009.11:g.135796754G>A' -p105 -sg36 -(dp106 -g38 -g39 -sg40 -g41 -sg42 -S'135796754' -p107 -sg44 -g45 -sssg46 -(dp108 -g34 -S'NC_000009.12:g.132921367G>A' -p109 -sg36 -(dp110 -g38 -g39 -sg40 -g41 -sg42 -S'132921367' -p111 -sg44 -g45 -sssg51 -(dp112 -g34 -S'NC_000009.11:g.135796754G>A' -p113 -sg36 -(dp114 -g38 -g55 -sg40 -g41 -sg42 -S'135796754' -p115 -sg44 -g45 -sssg57 -(dp116 -g34 -S'NC_000009.12:g.132921367G>A' -p117 -sg36 -(dp118 -g38 -g55 -sg40 -g41 -sg42 -S'132921367' -p119 -sg44 -g45 -sssssS'NM_001162427.1:c.580C>T' -p120 -(dp121 -g3 -g8 -sg5 -(lp122 -S'RefSeqGene record not available' -p123 -asg7 -g8 -sg9 -(lp124 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA -p125 -sg13 -S'TSC1' -p126 -sg15 -(dp127 -g17 -S'NP_001155899.1:p.(Arg194Ter)' -p128 -sg19 -S'NP_001155899.1:p.(R194*)' -p129 -ssg21 -g22 -sg23 -g8 -sg24 -g8 -sg26 -S'NM_001162427.1:c.580C>T' -p130 -sg28 -g8 -sg30 -(dp131 -g32 -(dp132 -g34 -S'NC_000009.11:g.135796754G>A' -p133 -sg36 -(dp134 -g38 -g39 -sg40 -g41 -sg42 -S'135796754' -p135 -sg44 -g45 -sssg46 -(dp136 -g34 -S'NC_000009.12:g.132921367G>A' -p137 -sg36 -(dp138 -g38 -g39 -sg40 -g41 -sg42 -S'132921367' -p139 -sg44 -g45 -sssg51 -(dp140 -g34 -S'NC_000009.11:g.135796754G>A' -p141 -sg36 -(dp142 -g38 -g55 -sg40 -g41 -sg42 -S'135796754' -p143 -sg44 -g45 -sssg57 -(dp144 -g34 -S'NC_000009.12:g.132921367G>A' -p145 -sg36 -(dp146 -g38 -g55 -sg40 -g41 -sg42 -S'132921367' -p147 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant312.txt b/VariantValidator/testing/testOutputsMasterITS/variant312.txt deleted file mode 100644 index 86c8e310..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant312.txt +++ /dev/null @@ -1,164 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_005247.2:c.616del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NW_003571046.1:g.10391AC>A automapped to NW_003571046.1:g.10396delC' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -(dp13 -S'GRCh37' -p14 -(dp15 -S'HGVS_genomic_description' -p16 -S'NW_003571046.1:g.10392del' -p17 -sS'vcf' -p18 -(dp19 -S'chr' -p20 -S'HG536_PATCH' -p21 -sS'ref' -p22 -S'AC' -p23 -sS'pos' -p24 -S'10391' -p25 -sS'alt' -p26 -S'A' -p27 -sssasS'transcript_description' -p28 -VHomo sapiens fibroblast growth factor 3 (FGF3), mRNA -p29 -sS'gene_symbol' -p30 -S'FGF3' -p31 -sS'HGVS_predicted_protein_consequence' -p32 -(dp33 -S'tlr' -p34 -S'NP_005238.1:p.(Val206SerfsTer117)' -p35 -sS'slr' -p36 -S'NP_005238.1:p.(V206Sfs*117)' -p37 -ssS'submitted_variant' -p38 -S'HG536_PATCH-10391-AC-A' -p39 -sS'genome_context_intronic_sequence' -p40 -g6 -sS'HGVS_LRG_variant' -p41 -g6 -sS'HGVS_transcript_variant' -p42 -S'NM_005247.2:c.616del' -p43 -sS'HGVS_RefSeqGene_variant' -p44 -S'NG_009016.1:g.14016del' -p45 -sS'primary_assembly_loci' -p46 -(dp47 -S'GRCh38' -p48 -(dp49 -g16 -S'NC_000011.10:g.69810409del' -p50 -sg18 -(dp51 -g20 -S'11' -p52 -sg22 -S'AC' -p53 -sg24 -S'69810408' -p54 -sg26 -g27 -sssg14 -(dp55 -g16 -S'NC_000011.9:g.69625177del' -p56 -sg18 -(dp57 -g20 -g52 -sg22 -S'AC' -p58 -sg24 -S'69625176' -p59 -sg26 -g27 -sssS'hg38' -p60 -(dp61 -g16 -S'NC_000011.10:g.69810409del' -p62 -sg18 -(dp63 -g20 -S'chr11' -p64 -sg22 -S'AC' -p65 -sg24 -S'69810408' -p66 -sg26 -g27 -sssS'hg19' -p67 -(dp68 -g16 -S'NC_000011.9:g.69625177del' -p69 -sg18 -(dp70 -g20 -g64 -sg22 -S'AC' -p71 -sg24 -S'69625176' -p72 -sg26 -g27 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant313.txt b/VariantValidator/testing/testOutputsMasterITS/variant313.txt deleted file mode 100644 index b0e372a0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant313.txt +++ /dev/null @@ -1,496 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_133266.4:c.802C>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -(dp13 -S'GRCh37' -p14 -(dp15 -S'HGVS_genomic_description' -p16 -S'NW_004070871.1:g.33547G>A' -p17 -sS'vcf' -p18 -(dp19 -S'chr' -p20 -S'HG865_PATCH' -p21 -sS'ref' -p22 -VG -p23 -sS'pos' -p24 -S'33547' -p25 -sS'alt' -p26 -VA -p27 -sssasS'transcript_description' -p28 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA -p29 -sS'gene_symbol' -p30 -S'SHANK2' -p31 -sS'HGVS_predicted_protein_consequence' -p32 -(dp33 -S'tlr' -p34 -S'NP_573573.2:p.(Leu268=)' -p35 -sS'slr' -p36 -S'NP_573573.2:p.(L268=)' -p37 -ssS'submitted_variant' -p38 -S'HG865_PATCH-33547-G-A' -p39 -sS'genome_context_intronic_sequence' -p40 -g6 -sS'HGVS_LRG_variant' -p41 -g6 -sS'HGVS_transcript_variant' -p42 -S'NM_133266.4:c.802C>T' -p43 -sS'HGVS_RefSeqGene_variant' -p44 -g6 -sS'primary_assembly_loci' -p45 -(dp46 -S'GRCh38' -p47 -(dp48 -g16 -S'NC_000011.10:g.70489334G>A' -p49 -sg18 -(dp50 -g20 -S'11' -p51 -sg22 -g23 -sg24 -S'70489334' -p52 -sg26 -g27 -sssg14 -(dp53 -g16 -S'NC_000011.9:g.70335439G>A' -p54 -sg18 -(dp55 -g20 -g51 -sg22 -g23 -sg24 -S'70335439' -p56 -sg26 -g27 -sssS'hg38' -p57 -(dp58 -g16 -S'NC_000011.10:g.70489334G>A' -p59 -sg18 -(dp60 -g20 -S'chr11' -p61 -sg22 -g23 -sg24 -S'70489334' -p62 -sg26 -g27 -sssS'hg19' -p63 -(dp64 -g16 -S'NC_000011.9:g.70335439G>A' -p65 -sg18 -(dp66 -g20 -g61 -sg22 -g23 -sg24 -S'70335439' -p67 -sg26 -g27 -sssssS'NM_133266.3:c.802C>T' -p68 -(dp69 -g5 -g6 -sg7 -(lp70 -S'A more recent version of the selected reference sequence NM_133266.3 is available (NM_133266.4)' -p71 -aS'NM_133266.4:c.802C>T MUST be fully validated prior to use in reports' -p72 -aS'select_variants=NM_133266.4:c.802C>T' -p73 -aS'RefSeqGene record not available' -p74 -asg10 -g6 -sg11 -(lp75 -(dp76 -g14 -(dp77 -g16 -S'NW_004070871.1:g.33547G>A' -p78 -sg18 -(dp79 -g20 -g21 -sg22 -g23 -sg24 -S'33547' -p80 -sg26 -g27 -sssasg28 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA -p81 -sg30 -S'SHANK2' -p82 -sg32 -(dp83 -g34 -S'NP_573573.2:p.(Leu268=)' -p84 -sg36 -S'NP_573573.2:p.(L268=)' -p85 -ssg38 -g39 -sg40 -g6 -sg41 -g6 -sg42 -S'NM_133266.3:c.802C>T' -p86 -sg44 -g6 -sg45 -(dp87 -g63 -(dp88 -g16 -S'NC_000011.9:g.70335439G>A' -p89 -sg18 -(dp90 -g20 -g61 -sg22 -g23 -sg24 -S'70335439' -p91 -sg26 -g27 -sssg14 -(dp92 -g16 -S'NC_000011.9:g.70335439G>A' -p93 -sg18 -(dp94 -g20 -g51 -sg22 -g23 -sg24 -S'70335439' -p95 -sg26 -g27 -sssssS'NR_110766.1:n.833+969C>T' -p96 -(dp97 -g5 -g6 -sg7 -(lp98 -S'RefSeqGene record not available' -p99 -asg10 -g6 -sg11 -(lp100 -(dp101 -g14 -(dp102 -g16 -S'NW_004070871.1:g.33547G>A' -p103 -sg18 -(dp104 -g20 -g21 -sg22 -g23 -sg24 -S'33547' -p105 -sg26 -g27 -sssasg28 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 3, non-coding RNA -p106 -sg30 -S'SHANK2' -p107 -sg32 -(dp108 -g34 -S'Non-coding :n.' -p109 -sg36 -g6 -ssg38 -g39 -sg40 -S'NC_000011.9(NR_110766.1):c.833+969C>T' -p110 -sg41 -g6 -sg42 -S'NR_110766.1:n.833+969C>T' -p111 -sg44 -g6 -sg45 -(dp112 -g47 -(dp113 -g16 -S'NC_000011.10:g.70489334G>A' -p114 -sg18 -(dp115 -g20 -g51 -sg22 -g23 -sg24 -S'70489334' -p116 -sg26 -g27 -sssg14 -(dp117 -g16 -S'NC_000011.9:g.70335439G>A' -p118 -sg18 -(dp119 -g20 -g51 -sg22 -g23 -sg24 -S'70335439' -p120 -sg26 -g27 -sssg57 -(dp121 -g16 -S'NC_000011.10:g.70489334G>A' -p122 -sg18 -(dp123 -g20 -g61 -sg22 -g23 -sg24 -S'70489334' -p124 -sg26 -g27 -sssg63 -(dp125 -g16 -S'NC_000011.9:g.70335439G>A' -p126 -sg18 -(dp127 -g20 -g61 -sg22 -g23 -sg24 -S'70335439' -p128 -sg26 -g27 -sssssS'NM_012309.4:c.2566C>T' -p129 -(dp130 -g5 -g6 -sg7 -(lp131 -S'RefSeqGene record not available' -p132 -asg10 -g6 -sg11 -(lp133 -(dp134 -g14 -(dp135 -g16 -S'NW_004070871.1:g.33547G>A' -p136 -sg18 -(dp137 -g20 -g21 -sg22 -g23 -sg24 -S'33547' -p138 -sg26 -g27 -sssasg28 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA -p139 -sg30 -S'SHANK2' -p140 -sg32 -(dp141 -g34 -S'NP_036441.2:p.(Leu856=)' -p142 -sg36 -S'NP_036441.2:p.(L856=)' -p143 -ssg38 -g39 -sg40 -g6 -sg41 -g6 -sg42 -S'NM_012309.4:c.2566C>T' -p144 -sg44 -g6 -sg45 -(dp145 -g47 -(dp146 -g16 -S'NC_000011.10:g.70489334G>A' -p147 -sg18 -(dp148 -g20 -g51 -sg22 -g23 -sg24 -S'70489334' -p149 -sg26 -g27 -sssg14 -(dp150 -g16 -S'NC_000011.9:g.70336423G>A' -p151 -sg18 -(dp152 -g20 -g51 -sg22 -g23 -sg24 -S'70336423' -p153 -sg26 -g27 -sssg57 -(dp154 -g16 -S'NC_000011.10:g.70489334G>A' -p155 -sg18 -(dp156 -g20 -g61 -sg22 -g23 -sg24 -S'70489334' -p157 -sg26 -g27 -sssg63 -(dp158 -g16 -S'NC_000011.9:g.70336423G>A' -p159 -sg18 -(dp160 -g20 -g61 -sg22 -g23 -sg24 -S'70336423' -p161 -sg26 -g27 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant314.txt b/VariantValidator/testing/testOutputsMasterITS/variant314.txt deleted file mode 100644 index 53201fa6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant314.txt +++ /dev/null @@ -1,130 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_012309.4:c.960C>A' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'NM_012309.4:c.960C>A cannot be mapped directly to genome build GRCh37' -p10 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'GRCh37' -p16 -(dp17 -S'HGVS_genomic_description' -p18 -S'NW_004070871.1:g.569441G>T' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG865_PATCH' -p23 -sS'ref' -p24 -VG -p25 -sS'pos' -p26 -S'569441' -p27 -sS'alt' -p28 -VT -p29 -sssasS'transcript_description' -p30 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA -p31 -sS'gene_symbol' -p32 -S'SHANK2' -p33 -sS'HGVS_predicted_protein_consequence' -p34 -(dp35 -S'tlr' -p36 -S'NP_036441.2:p.(Tyr320Ter)' -p37 -sS'slr' -p38 -S'NP_036441.2:p.(Y320*)' -p39 -ssS'submitted_variant' -p40 -S'HG865_PATCH-569441-G-T' -p41 -sS'genome_context_intronic_sequence' -p42 -g6 -sS'HGVS_LRG_variant' -p43 -g6 -sS'HGVS_transcript_variant' -p44 -S'NM_012309.4:c.960C>A' -p45 -sS'HGVS_RefSeqGene_variant' -p46 -g6 -sS'primary_assembly_loci' -p47 -(dp48 -S'GRCh38' -p49 -(dp50 -g18 -S'NC_000011.10:g.71075228G>T' -p51 -sg20 -(dp52 -g22 -S'11' -p53 -sg24 -g25 -sg26 -S'71075228' -p54 -sg28 -g29 -sssS'hg38' -p55 -(dp56 -g18 -S'NC_000011.10:g.71075228G>T' -p57 -sg20 -(dp58 -g22 -S'chr11' -p59 -sg24 -g25 -sg26 -S'71075228' -p60 -sg28 -g29 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant315.txt b/VariantValidator/testing/testOutputsMasterITS/variant315.txt deleted file mode 100644 index d0473f85..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant315.txt +++ /dev/null @@ -1,131 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_012309.4:c.913-5058G>A' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'NM_012309.4:c.913-5058G>A cannot be mapped directly to genome build GRCh37' -p10 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'GRCh37' -p16 -(dp17 -S'HGVS_genomic_description' -p18 -S'NW_004070871.1:g.574546C>T' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG865_PATCH' -p23 -sS'ref' -p24 -VC -p25 -sS'pos' -p26 -S'574546' -p27 -sS'alt' -p28 -VT -p29 -sssasS'transcript_description' -p30 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA -p31 -sS'gene_symbol' -p32 -S'SHANK2' -p33 -sS'HGVS_predicted_protein_consequence' -p34 -(dp35 -S'tlr' -p36 -S'NP_036441.2:p.?' -p37 -sS'slr' -p38 -S'NP_036441.2:p.?' -p39 -ssS'submitted_variant' -p40 -S'HG865_PATCH-574546-C-T' -p41 -sS'genome_context_intronic_sequence' -p42 -S'NC_000011.10(NM_012309.4):c.913-5058G>A' -p43 -sS'HGVS_LRG_variant' -p44 -g6 -sS'HGVS_transcript_variant' -p45 -S'NM_012309.4:c.913-5058G>A' -p46 -sS'HGVS_RefSeqGene_variant' -p47 -g6 -sS'primary_assembly_loci' -p48 -(dp49 -S'GRCh38' -p50 -(dp51 -g18 -S'NC_000011.10:g.71080333C>T' -p52 -sg20 -(dp53 -g22 -S'11' -p54 -sg24 -g25 -sg26 -S'71080333' -p55 -sg28 -g29 -sssS'hg38' -p56 -(dp57 -g18 -S'NC_000011.10:g.71080333C>T' -p58 -sg20 -(dp59 -g22 -S'chr11' -p60 -sg24 -g25 -sg26 -S'71080333' -p61 -sg28 -g29 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant316.txt b/VariantValidator/testing/testOutputsMasterITS/variant316.txt deleted file mode 100644 index cfc9a5eb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant316.txt +++ /dev/null @@ -1,152 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020699.2:c.802_803insTT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NW_003315905.1:g.133178TAG>T automapped to NW_003315905.1:g.133179_133180delAG' -p9 -aS'A more recent version of the selected reference sequence NM_020699.2 is available (NM_020699.3)' -p10 -aS'NM_020699.3:c.802_803insTT MUST be fully validated prior to use in reports' -p11 -aS'select_variants=NM_020699.3:c.802_803insTT' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA -p18 -sS'gene_symbol' -p19 -S'GATAD2B' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_065750.1:p.(Pro268LeufsTer26)' -p24 -sS'slr' -p25 -S'NP_065750.1:p.(P268Lfs*26)' -p26 -ssS'submitted_variant' -p27 -S'HSCHR1_1_CTG31-133178-TAG-T' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_020699.2:c.802_803insTT' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000001.10:g.153789945_153789946delinsGAAG' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr1' -p43 -sS'ref' -p44 -S'G' -p45 -sS'pos' -p46 -S'153789945' -p47 -sS'alt' -p48 -VGAA -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000001.11:g.153817469_153817470insAA' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -g45 -sg46 -S'153817469' -p54 -sg48 -VGAA -p55 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000001.10:g.153789945_153789946delinsGAAG' -p58 -sg40 -(dp59 -g42 -S'1' -p60 -sg44 -g45 -sg46 -S'153789945' -p61 -sg48 -VGAA -p62 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000001.11:g.153817469_153817470insAA' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -g45 -sg46 -S'153817469' -p67 -sg48 -VGAA -p68 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant317.txt b/VariantValidator/testing/testOutputsMasterITS/variant317.txt deleted file mode 100644 index b696e1ea..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant317.txt +++ /dev/null @@ -1,257 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_021983.4:c.490G>C' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_021983.4:c.490G>C cannot be mapped directly to genome build GRCh37' -p9 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -(dp14 -S'GRCh37' -p15 -(dp16 -S'HGVS_genomic_description' -p17 -S'NT_167246.1:g.3848158T>G' -p18 -sS'vcf' -p19 -(dp20 -S'chr' -p21 -S'HSCHR6_MHC_MANN_CTG1' -p22 -sS'ref' -p23 -S'T' -p24 -sS'pos' -p25 -S'3848158' -p26 -sS'alt' -p27 -VG -p28 -sssa(dp29 -S'hg19' -p30 -(dp31 -g17 -S'NT_167246.1:g.3848158T>G' -p32 -sg19 -(dp33 -g21 -S'chr6_mann_hap4' -p34 -sg23 -g24 -sg25 -S'3848158' -p35 -sg27 -g28 -sssa(dp36 -S'GRCh38' -p37 -(dp38 -g17 -S'NT_167246.2:g.3842538T>G' -p39 -sg19 -(dp40 -g21 -g22 -sg23 -g24 -sg25 -S'3842538' -p41 -sg27 -g28 -sssa(dp42 -S'hg38' -p43 -(dp44 -g17 -S'NT_167246.2:g.3842538T>G' -p45 -sg19 -(dp46 -g21 -S'chr6_GL000253v2_alt' -p47 -sg23 -g24 -sg25 -S'3842538' -p48 -sg27 -g28 -sssa(dp49 -g15 -(dp50 -g17 -S'NT_167247.1:g.3884432C>G' -p51 -sg19 -(dp52 -g21 -S'HSCHR6_MHC_MCF_CTG1' -p53 -sg23 -VC -p54 -sg25 -S'3884432' -p55 -sg27 -g28 -sssa(dp56 -g30 -(dp57 -g17 -S'NT_167247.1:g.3884432C>G' -p58 -sg19 -(dp59 -g21 -S'chr6_mcf_hap5' -p60 -sg23 -g54 -sg25 -S'3884432' -p61 -sg27 -g28 -sssa(dp62 -g15 -(dp63 -g17 -S'NT_167249.1:g.3852542C>G' -p64 -sg19 -(dp65 -g21 -S'HSCHR6_MHC_SSTO_CTG1' -p66 -sg23 -g54 -sg25 -S'3852542' -p67 -sg27 -g28 -sssa(dp68 -g30 -(dp69 -g17 -S'NT_167249.1:g.3852542C>G' -p70 -sg19 -(dp71 -g21 -S'chr6_ssto_hap7' -p72 -sg23 -g54 -sg25 -S'3852542' -p73 -sg27 -g28 -sssa(dp74 -g37 -(dp75 -g17 -S'NT_167249.2:g.3853244C>G' -p76 -sg19 -(dp77 -g21 -g66 -sg23 -g54 -sg25 -S'3853244' -p78 -sg27 -g28 -sssa(dp79 -g43 -(dp80 -g17 -S'NT_167249.2:g.3853244C>G' -p81 -sg19 -(dp82 -g21 -S'chr6_GL000256v2_alt' -p83 -sg23 -g54 -sg25 -S'3853244' -p84 -sg27 -g28 -sssasS'transcript_description' -p85 -VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA -p86 -sS'gene_symbol' -p87 -S'HLA-DRB4' -p88 -sS'HGVS_predicted_protein_consequence' -p89 -(dp90 -S'tlr' -p91 -S'NP_068818.4:p.(Gly164Arg)' -p92 -sS'slr' -p93 -S'NP_068818.4:p.(G164R)' -p94 -ssS'submitted_variant' -p95 -S'HSCHR6_MHC_MANN_CTG1-3848158-T-G' -p96 -sS'genome_context_intronic_sequence' -p97 -g6 -sS'HGVS_LRG_variant' -p98 -g6 -sS'HGVS_transcript_variant' -p99 -S'NM_021983.4:c.490G>C' -p100 -sS'HGVS_RefSeqGene_variant' -p101 -S'NG_002433.1:g.5724C>G' -p102 -sS'primary_assembly_loci' -p103 -(dp104 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant318.txt b/VariantValidator/testing/testOutputsMasterITS/variant318.txt deleted file mode 100644 index 6e10f8bf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant318.txt +++ /dev/null @@ -1,256 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_021983.4:c.346G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_021983.4:c.346G>T cannot be mapped directly to genome build GRCh37' -p9 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -(dp14 -S'GRCh37' -p15 -(dp16 -S'HGVS_genomic_description' -p17 -S'NT_167246.1:g.3851043C>A' -p18 -sS'vcf' -p19 -(dp20 -S'chr' -p21 -S'HSCHR6_MHC_MANN_CTG1' -p22 -sS'ref' -p23 -VC -p24 -sS'pos' -p25 -S'3851043' -p26 -sS'alt' -p27 -VA -p28 -sssa(dp29 -S'hg19' -p30 -(dp31 -g17 -S'NT_167246.1:g.3851043C>A' -p32 -sg19 -(dp33 -g21 -S'chr6_mann_hap4' -p34 -sg23 -g24 -sg25 -S'3851043' -p35 -sg27 -g28 -sssa(dp36 -S'GRCh38' -p37 -(dp38 -g17 -S'NT_167246.2:g.3845423C>A' -p39 -sg19 -(dp40 -g21 -g22 -sg23 -g24 -sg25 -S'3845423' -p41 -sg27 -g28 -sssa(dp42 -S'hg38' -p43 -(dp44 -g17 -S'NT_167246.2:g.3845423C>A' -p45 -sg19 -(dp46 -g21 -S'chr6_GL000253v2_alt' -p47 -sg23 -g24 -sg25 -S'3845423' -p48 -sg27 -g28 -sssa(dp49 -g15 -(dp50 -g17 -S'NT_167247.1:g.3887313C>A' -p51 -sg19 -(dp52 -g21 -S'HSCHR6_MHC_MCF_CTG1' -p53 -sg23 -g24 -sg25 -S'3887313' -p54 -sg27 -g28 -sssa(dp55 -g30 -(dp56 -g17 -S'NT_167247.1:g.3887313C>A' -p57 -sg19 -(dp58 -g21 -S'chr6_mcf_hap5' -p59 -sg23 -g24 -sg25 -S'3887313' -p60 -sg27 -g28 -sssa(dp61 -g15 -(dp62 -g17 -S'NT_167249.1:g.3855423C>A' -p63 -sg19 -(dp64 -g21 -S'HSCHR6_MHC_SSTO_CTG1' -p65 -sg23 -g24 -sg25 -S'3855423' -p66 -sg27 -g28 -sssa(dp67 -g30 -(dp68 -g17 -S'NT_167249.1:g.3855423C>A' -p69 -sg19 -(dp70 -g21 -S'chr6_ssto_hap7' -p71 -sg23 -g24 -sg25 -S'3855423' -p72 -sg27 -g28 -sssa(dp73 -g37 -(dp74 -g17 -S'NT_167249.2:g.3856125C>A' -p75 -sg19 -(dp76 -g21 -g65 -sg23 -g24 -sg25 -S'3856125' -p77 -sg27 -g28 -sssa(dp78 -g43 -(dp79 -g17 -S'NT_167249.2:g.3856125C>A' -p80 -sg19 -(dp81 -g21 -S'chr6_GL000256v2_alt' -p82 -sg23 -g24 -sg25 -S'3856125' -p83 -sg27 -g28 -sssasS'transcript_description' -p84 -VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA -p85 -sS'gene_symbol' -p86 -S'HLA-DRB4' -p87 -sS'HGVS_predicted_protein_consequence' -p88 -(dp89 -S'tlr' -p90 -S'NP_068818.4:p.(Glu116Ter)' -p91 -sS'slr' -p92 -S'NP_068818.4:p.(E116*)' -p93 -ssS'submitted_variant' -p94 -S'HSCHR6_MHC_MANN_CTG1-3851043-C-A' -p95 -sS'genome_context_intronic_sequence' -p96 -g6 -sS'HGVS_LRG_variant' -p97 -g6 -sS'HGVS_transcript_variant' -p98 -S'NM_021983.4:c.346G>T' -p99 -sS'HGVS_RefSeqGene_variant' -p100 -S'NG_002433.1:g.8605C>A' -p101 -sS'primary_assembly_loci' -p102 -(dp103 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant319.txt b/VariantValidator/testing/testOutputsMasterITS/variant319.txt deleted file mode 100644 index fdacc432..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant319.txt +++ /dev/null @@ -1,248 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001097642.2:c.-16-441C>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_245t1:c.-16-441C>T' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_008357.1(NM_001097642.2):c.-16-441C>T' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens gap junction protein beta 1 (GJB1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'GJB1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001091111.1:p.?' -p20 -sS'slr' -p21 -S'NP_001091111.1:p.?' -p22 -ssS'submitted_variant' -p23 -S'X-70443101-C-T' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000023.10(NM_001097642.2):c.-16-441C>T' -p26 -sS'HGVS_LRG_variant' -p27 -S'LRG_245:g.13040C>T' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_001097642.2:c.-16-441C>T' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_008357.1:g.13040C>T' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000023.10:g.70443101C>T' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chrX' -p42 -sS'ref' -p43 -S'C' -p44 -sS'pos' -p45 -S'70443101' -p46 -sS'alt' -p47 -S'T' -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000023.11:g.71223251C>T' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'71223251' -p53 -sg47 -g48 -sssS'GRCh37' -p54 -(dp55 -g37 -S'NC_000023.10:g.70443101C>T' -p56 -sg39 -(dp57 -g41 -S'X' -p58 -sg43 -g44 -sg45 -S'70443101' -p59 -sg47 -g48 -sssS'GRCh38' -p60 -(dp61 -g37 -S'NC_000023.11:g.71223251C>T' -p62 -sg39 -(dp63 -g41 -g58 -sg43 -g44 -sg45 -S'71223251' -p64 -sg47 -g48 -sssssS'NM_000166.5:c.-101C>T' -p65 -(dp66 -g5 -S'' -p67 -sg7 -(lp68 -S'RefSeqGene record not available' -p69 -asg9 -g67 -sg11 -(lp70 -sg13 -VHomo sapiens gap junction protein beta 1 (GJB1), transcript variant 2, mRNA -p71 -sg15 -S'GJB1' -p72 -sg17 -(dp73 -g19 -S'NP_000157.1:p.?' -p74 -sg21 -S'NP_000157.1:p.?' -p75 -ssg23 -g24 -sg25 -g67 -sg27 -g67 -sg29 -S'NM_000166.5:c.-101C>T' -p76 -sg31 -g67 -sg33 -(dp77 -g35 -(dp78 -g37 -S'NC_000023.10:g.70443101C>T' -p79 -sg39 -(dp80 -g41 -g42 -sg43 -g44 -sg45 -S'70443101' -p81 -sg47 -g48 -sssg49 -(dp82 -g37 -S'NC_000023.11:g.71223251C>T' -p83 -sg39 -(dp84 -g41 -g42 -sg43 -g44 -sg45 -S'71223251' -p85 -sg47 -g48 -sssg54 -(dp86 -g37 -S'NC_000023.10:g.70443101C>T' -p87 -sg39 -(dp88 -g41 -g58 -sg43 -g44 -sg45 -S'70443101' -p89 -sg47 -g48 -sssg60 -(dp90 -g37 -S'NC_000023.11:g.71223251C>T' -p91 -sg39 -(dp92 -g41 -g58 -sg43 -g44 -sg45 -S'71223251' -p93 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant32.txt b/VariantValidator/testing/testOutputsMasterITS/variant32.txt deleted file mode 100644 index b747274b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant32.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'NM_000088.3:c.591_593inv' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_1t1:c.591_593inv' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -S'' -p8 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p12 -sS'gene_symbol' -p13 -S'COL1A1' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000079.2(LRG_1p1):p.(Pro198Asp)' -p18 -sS'slr' -p19 -S'NP_000079.2:p.(P198D)' -p20 -ssS'submitted_variant' -p21 -S'NM_000088.3:c.591_593inv' -p22 -sS'genome_context_intronic_sequence' -p23 -g8 -sS'HGVS_LRG_variant' -p24 -S'LRG_1:g.8640_8642inv' -p25 -sS'HGVS_transcript_variant' -p26 -S'NM_000088.3:c.591_593inv' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_007400.1:g.8640_8642inv' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000017.10:g.48275359_48275361inv' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -S'GGA' -p41 -sS'pos' -p42 -S'48275359' -p43 -sS'alt' -p44 -S'TCC' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.50197998_50198000inv' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'GGA' -p50 -sg42 -S'50197998' -p51 -sg44 -S'TCC' -p52 -sssS'GRCh37' -p53 -(dp54 -g34 -S'NC_000017.10:g.48275359_48275361inv' -p55 -sg36 -(dp56 -g38 -S'17' -p57 -sg40 -S'GGA' -p58 -sg42 -S'48275359' -p59 -sg44 -S'TCC' -p60 -sssS'GRCh38' -p61 -(dp62 -g34 -S'NC_000017.11:g.50197998_50198000inv' -p63 -sg36 -(dp64 -g38 -g57 -sg40 -S'GGA' -p65 -sg42 -S'50197998' -p66 -sg44 -S'TCC' -p67 -sssssS'flag' -p68 -S'gene_variant' -p69 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant320.txt b/VariantValidator/testing/testOutputsMasterITS/variant320.txt deleted file mode 100644 index 11c33dc3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant320.txt +++ /dev/null @@ -1,492 +0,0 @@ -(dp0 -S'NM_000495.4:c.2130_2135del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Multiple ALT sequences detected' -p7 -aS'auto-submitting all possible combinations' -p8 -aS'NC_000023.10:g.107845202GACCACC>G automapped to NC_000023.10:g.107845203_107845208del' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'COL4A5' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000486.1(LRG_232p1):p.(Pro711_Pro712del)' -p21 -sS'slr' -p22 -S'NP_000486.1:p.(P711_P712del)' -p23 -ssS'submitted_variant' -p24 -S'X-107845202-GACCACC-GACC,G' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'HGVS_LRG_variant' -p27 -g4 -sS'HGVS_transcript_variant' -p28 -S'NM_000495.4:c.2130_2135del' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000023.10:g.107845203_107845208del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chrX' -p40 -sS'ref' -p41 -S'GACCACC' -p42 -sS'pos' -p43 -S'107845202' -p44 -sS'alt' -p45 -S'G' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000023.11:g.108601973_108601978del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'GACCACC' -p51 -sg43 -S'108601972' -p52 -sg45 -g46 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000023.10:g.107845203_107845208del' -p55 -sg37 -(dp56 -g39 -S'X' -p57 -sg41 -S'GACCACC' -p58 -sg43 -S'107845202' -p59 -sg45 -g46 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000023.11:g.108601973_108601978del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'GACCACC' -p64 -sg43 -S'108601972' -p65 -sg45 -g46 -sssssS'flag' -p66 -S'gene_variant' -p67 -sS'NM_033380.2:c.2130_2135del' -p68 -(dp69 -g3 -g4 -sg5 -(lp70 -S'Multiple ALT sequences detected' -p71 -aS'auto-submitting all possible combinations' -p72 -aS'NC_000023.10:g.107845202GACCACC>G automapped to NC_000023.10:g.107845203_107845208del' -p73 -aS'RefSeqGene record not available' -p74 -asg11 -g4 -sg12 -(lp75 -sg14 -VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 2, mRNA -p76 -sg16 -S'COL4A5' -p77 -sg18 -(dp78 -g20 -S'NP_203699.1(LRG_232p2):p.(Pro711_Pro712del)' -p79 -sg22 -S'NP_203699.1:p.(P711_P712del)' -p80 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_033380.2:c.2130_2135del' -p81 -sg30 -g4 -sg31 -(dp82 -g33 -(dp83 -g35 -S'NC_000023.10:g.107845203_107845208del' -p84 -sg37 -(dp85 -g39 -g40 -sg41 -S'GACCACC' -p86 -sg43 -S'107845202' -p87 -sg45 -g46 -sssg47 -(dp88 -g35 -S'NC_000023.11:g.108601973_108601978del' -p89 -sg37 -(dp90 -g39 -g40 -sg41 -S'GACCACC' -p91 -sg43 -S'108601972' -p92 -sg45 -g46 -sssg53 -(dp93 -g35 -S'NC_000023.10:g.107845203_107845208del' -p94 -sg37 -(dp95 -g39 -g57 -sg41 -S'GACCACC' -p96 -sg43 -S'107845202' -p97 -sg45 -g46 -sssg60 -(dp98 -g35 -S'NC_000023.11:g.108601973_108601978del' -p99 -sg37 -(dp100 -g39 -g57 -sg41 -S'GACCACC' -p101 -sg43 -S'108601972' -p102 -sg45 -g46 -sssssS'NM_033380.2:c.2133_2135del' -p103 -(dp104 -g3 -g4 -sg5 -(lp105 -S'Multiple ALT sequences detected' -p106 -aS'auto-submitting all possible combinations' -p107 -aS'NC_000023.10:g.107845202GACCACC>GACC automapped to NC_000023.10:g.107845206_107845208delACC' -p108 -aS'RefSeqGene record not available' -p109 -asg11 -g4 -sg12 -(lp110 -sg14 -VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 2, mRNA -p111 -sg16 -S'COL4A5' -p112 -sg18 -(dp113 -g20 -S'NP_203699.1(LRG_232p2):p.(Pro712del)' -p114 -sg22 -S'NP_203699.1:p.(P712del)' -p115 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_033380.2:c.2133_2135del' -p116 -sg30 -g4 -sg31 -(dp117 -g33 -(dp118 -g35 -S'NC_000023.10:g.107845206_107845208del' -p119 -sg37 -(dp120 -g39 -g40 -sg41 -S'GACC' -p121 -sg43 -S'107845202' -p122 -sg45 -g46 -sssg47 -(dp123 -g35 -S'NC_000023.11:g.108601976_108601978del' -p124 -sg37 -(dp125 -g39 -g40 -sg41 -S'GACC' -p126 -sg43 -S'108601972' -p127 -sg45 -g46 -sssg53 -(dp128 -g35 -S'NC_000023.10:g.107845206_107845208del' -p129 -sg37 -(dp130 -g39 -g57 -sg41 -S'GACC' -p131 -sg43 -S'107845202' -p132 -sg45 -g46 -sssg60 -(dp133 -g35 -S'NC_000023.11:g.108601976_108601978del' -p134 -sg37 -(dp135 -g39 -g57 -sg41 -S'GACC' -p136 -sg43 -S'108601972' -p137 -sg45 -g46 -sssssS'NM_000495.4:c.2133_2135del' -p138 -(dp139 -g3 -g4 -sg5 -(lp140 -S'Multiple ALT sequences detected' -p141 -aS'auto-submitting all possible combinations' -p142 -aS'NC_000023.10:g.107845202GACCACC>GACC automapped to NC_000023.10:g.107845206_107845208delACC' -p143 -aS'RefSeqGene record not available' -p144 -asg11 -g4 -sg12 -(lp145 -sg14 -VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 1, mRNA -p146 -sg16 -S'COL4A5' -p147 -sg18 -(dp148 -g20 -S'NP_000486.1(LRG_232p1):p.(Pro712del)' -p149 -sg22 -S'NP_000486.1:p.(P712del)' -p150 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_000495.4:c.2133_2135del' -p151 -sg30 -g4 -sg31 -(dp152 -g33 -(dp153 -g35 -S'NC_000023.10:g.107845206_107845208del' -p154 -sg37 -(dp155 -g39 -g40 -sg41 -S'GACC' -p156 -sg43 -S'107845202' -p157 -sg45 -g46 -sssg47 -(dp158 -g35 -S'NC_000023.11:g.108601976_108601978del' -p159 -sg37 -(dp160 -g39 -g40 -sg41 -S'GACC' -p161 -sg43 -S'108601972' -p162 -sg45 -g46 -sssg53 -(dp163 -g35 -S'NC_000023.10:g.107845206_107845208del' -p164 -sg37 -(dp165 -g39 -g57 -sg41 -S'GACC' -p166 -sg43 -S'107845202' -p167 -sg45 -g46 -sssg60 -(dp168 -g35 -S'NC_000023.11:g.108601976_108601978del' -p169 -sg37 -(dp170 -g39 -g57 -sg41 -S'GACC' -p171 -sg43 -S'108601972' -p172 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant321.txt b/VariantValidator/testing/testOutputsMasterITS/variant321.txt deleted file mode 100644 index d81bf77c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant321.txt +++ /dev/null @@ -1,407 +0,0 @@ -(dp0 -S'NM_004992.3:c.502C>T' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_764t2:c.502C>T' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The current status of LRG_764 is pending therefore changes may be made to the LRG reference sequence' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -S'' -p9 -sS'alt_genomic_loci' -p10 -(lp11 -(dp12 -S'GRCh37' -p13 -(dp14 -S'HGVS_genomic_description' -p15 -S'NW_003871103.3:g.1465305G>A' -p16 -sS'vcf' -p17 -(dp18 -S'chr' -p19 -S'HG1497_PATCH' -p20 -sS'ref' -p21 -VG -p22 -sS'pos' -p23 -S'1465305' -p24 -sS'alt' -p25 -VA -p26 -sssasS'transcript_description' -p27 -VHomo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 1, mRNA -p28 -sS'gene_symbol' -p29 -S'MECP2' -p30 -sS'HGVS_predicted_protein_consequence' -p31 -(dp32 -S'tlr' -p33 -S'NP_004983.1(LRG_764p2):p.(Arg168Ter)' -p34 -sS'slr' -p35 -S'NP_004983.1:p.(R168*)' -p36 -ssS'submitted_variant' -p37 -S'X-153296777-G-A' -p38 -sS'genome_context_intronic_sequence' -p39 -g9 -sS'HGVS_LRG_variant' -p40 -S'LRG_764:g.110802C>T' -p41 -sS'HGVS_transcript_variant' -p42 -S'NM_004992.3:c.502C>T' -p43 -sS'HGVS_RefSeqGene_variant' -p44 -S'NG_007107.2:g.110802C>T' -p45 -sS'primary_assembly_loci' -p46 -(dp47 -S'hg19' -p48 -(dp49 -g15 -S'NC_000023.10:g.153296777G>A' -p50 -sg17 -(dp51 -g19 -S'chrX' -p52 -sg21 -g22 -sg23 -S'153296777' -p53 -sg25 -g26 -sssS'hg38' -p54 -(dp55 -g15 -S'NC_000023.11:g.154031326G>A' -p56 -sg17 -(dp57 -g19 -g52 -sg21 -g22 -sg23 -S'154031326' -p58 -sg25 -g26 -sssg13 -(dp59 -g15 -S'NC_000023.10:g.153296777G>A' -p60 -sg17 -(dp61 -g19 -S'X' -p62 -sg21 -g22 -sg23 -S'153296777' -p63 -sg25 -g26 -sssS'GRCh38' -p64 -(dp65 -g15 -S'NC_000023.11:g.154031326G>A' -p66 -sg17 -(dp67 -g19 -g62 -sg21 -g22 -sg23 -S'154031326' -p68 -sg25 -g26 -sssssS'flag' -p69 -S'gene_variant' -p70 -sS'NM_001316337.1:c.223C>T' -p71 -(dp72 -g3 -g9 -sg5 -(lp73 -S'RefSeqGene record not available' -p74 -asg8 -g9 -sg10 -(lp75 -(dp76 -g13 -(dp77 -g15 -S'NW_003871103.3:g.1465305G>A' -p78 -sg17 -(dp79 -g19 -g20 -sg21 -g22 -sg23 -S'1465305' -p80 -sg25 -g26 -sssasg27 -VHomo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 3, mRNA -p81 -sg29 -S'MECP2' -p82 -sg31 -(dp83 -g33 -S'NP_001303266.1:p.(Arg75Ter)' -p84 -sg35 -S'NP_001303266.1:p.(R75*)' -p85 -ssg37 -g38 -sg39 -g9 -sg40 -g9 -sg42 -S'NM_001316337.1:c.223C>T' -p86 -sg44 -g9 -sg46 -(dp87 -g48 -(dp88 -g15 -S'NC_000023.10:g.153296777G>A' -p89 -sg17 -(dp90 -g19 -g52 -sg21 -g22 -sg23 -S'153296777' -p91 -sg25 -g26 -sssg54 -(dp92 -g15 -S'NC_000023.11:g.154031326G>A' -p93 -sg17 -(dp94 -g19 -g52 -sg21 -g22 -sg23 -S'154031326' -p95 -sg25 -g26 -sssg13 -(dp96 -g15 -S'NC_000023.10:g.153296777G>A' -p97 -sg17 -(dp98 -g19 -g62 -sg21 -g22 -sg23 -S'153296777' -p99 -sg25 -g26 -sssg64 -(dp100 -g15 -S'NC_000023.11:g.154031326G>A' -p101 -sg17 -(dp102 -g19 -g62 -sg21 -g22 -sg23 -S'154031326' -p103 -sg25 -g26 -sssssS'NM_001110792.1:c.538C>T' -p104 -(dp105 -g3 -S'LRG_764t1:c.538C>T' -p106 -sg5 -(lp107 -S'The current status of LRG_764 is pending therefore changes may be made to the LRG reference sequence' -p108 -asg8 -g9 -sg10 -(lp109 -(dp110 -g13 -(dp111 -g15 -S'NW_003871103.3:g.1465305G>A' -p112 -sg17 -(dp113 -g19 -g20 -sg21 -g22 -sg23 -S'1465305' -p114 -sg25 -g26 -sssasg27 -VHomo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 2, mRNA -p115 -sg29 -S'MECP2' -p116 -sg31 -(dp117 -g33 -S'NP_001104262.1:p.(Arg180Ter)' -p118 -sg35 -S'NP_001104262.1:p.(R180*)' -p119 -ssg37 -g38 -sg39 -g9 -sg40 -S'LRG_764:g.110802C>T' -p120 -sg42 -S'NM_001110792.1:c.538C>T' -p121 -sg44 -S'NG_007107.2:g.110802C>T' -p122 -sg46 -(dp123 -g48 -(dp124 -g15 -S'NC_000023.10:g.153296777G>A' -p125 -sg17 -(dp126 -g19 -g52 -sg21 -g22 -sg23 -S'153296777' -p127 -sg25 -g26 -sssg54 -(dp128 -g15 -S'NC_000023.11:g.154031326G>A' -p129 -sg17 -(dp130 -g19 -g52 -sg21 -g22 -sg23 -S'154031326' -p131 -sg25 -g26 -sssg13 -(dp132 -g15 -S'NC_000023.10:g.153296777G>A' -p133 -sg17 -(dp134 -g19 -g62 -sg21 -g22 -sg23 -S'153296777' -p135 -sg25 -g26 -sssg64 -(dp136 -g15 -S'NC_000023.11:g.154031326G>A' -p137 -sg17 -(dp138 -g19 -g62 -sg21 -g22 -sg23 -S'154031326' -p139 -sg25 -g26 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant322.txt b/VariantValidator/testing/testOutputsMasterITS/variant322.txt deleted file mode 100644 index 7327254e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant322.txt +++ /dev/null @@ -1,146 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_198180.2:c.408_410del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'Cannot identify an in-frame Termination codon in the variant mRNA sequence' -p9 -aS'RefSeqGene record not available' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens pyroglutamylated RFamide peptide (QRFP), mRNA -p15 -sS'gene_symbol' -p16 -S'QRFP' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_937823.1:p.?' -p21 -sS'slr' -p22 -S'NP_937823.1:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_198180.2:c.408_410delGTG' -p25 -sS'genome_context_intronic_sequence' -p26 -g6 -sS'HGVS_LRG_variant' -p27 -g6 -sS'HGVS_transcript_variant' -p28 -S'NM_198180.2:c.408_410del' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000009.11:g.133768816_133768818del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr9' -p40 -sS'ref' -p41 -S'TCAC' -p42 -sS'pos' -p43 -S'133768815' -p44 -sS'alt' -p45 -S'T' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000009.12:g.130893429_130893431del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'TCAC' -p51 -sg43 -S'130893428' -p52 -sg45 -g46 -sssS'GRCh37' -p53 -(dp54 -g35 -S'NC_000009.11:g.133768816_133768818del' -p55 -sg37 -(dp56 -g39 -S'9' -p57 -sg41 -S'TCAC' -p58 -sg43 -S'133768815' -p59 -sg45 -g46 -sssS'GRCh38' -p60 -(dp61 -g35 -S'NC_000009.12:g.130893429_130893431del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'TCAC' -p64 -sg43 -S'130893428' -p65 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant323.txt b/VariantValidator/testing/testOutputsMasterITS/variant323.txt deleted file mode 100644 index 7552fe54..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant323.txt +++ /dev/null @@ -1,144 +0,0 @@ -(dp0 -S'NM_080877.2:c.1733_1735delinsTTT' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA -p12 -sS'gene_symbol' -p13 -S'SLC34A3' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_543153.1:p.(Pro578_Lys579delinsLeuTer)' -p18 -sS'slr' -p19 -S'NP_543153.1:p.(P578_K579delinsL*)' -p20 -ssS'submitted_variant' -p21 -S'NM_080877.2:c.1733_1735delinsTTT' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_080877.2:c.1733_1735delinsTTT' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000009.11:g.140130801_140130803delinsTTT' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr9' -p37 -sS'ref' -p38 -S'CGA' -p39 -sS'pos' -p40 -S'140130801' -p41 -sS'alt' -p42 -S'TTT' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000009.12:g.137236349_137236351delinsTTT' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'CGA' -p48 -sg40 -S'137236349' -p49 -sg42 -g43 -sssS'GRCh37' -p50 -(dp51 -g32 -S'NC_000009.11:g.140130801_140130803delinsTTT' -p52 -sg34 -(dp53 -g36 -S'9' -p54 -sg38 -S'CGA' -p55 -sg40 -S'140130801' -p56 -sg42 -g43 -sssS'GRCh38' -p57 -(dp58 -g32 -S'NC_000009.12:g.137236349_137236351delinsTTT' -p59 -sg34 -(dp60 -g36 -g54 -sg38 -S'CGA' -p61 -sg40 -S'137236349' -p62 -sg42 -g43 -sssssS'flag' -p63 -S'gene_variant' -p64 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant324.txt b/VariantValidator/testing/testOutputsMasterITS/variant324.txt deleted file mode 100644 index 385e2d87..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant324.txt +++ /dev/null @@ -1,144 +0,0 @@ -(dp0 -S'NM_080877.2:c.1735_1737delinsTGA' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA -p12 -sS'gene_symbol' -p13 -S'SLC34A3' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_543153.1:p.(Lys579Ter)' -p18 -sS'slr' -p19 -S'NP_543153.1:p.(K579*)' -p20 -ssS'submitted_variant' -p21 -S'NM_080877.2:c.1735_1737delinsTGA' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_080877.2:c.1735_1737delinsTGA' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000009.11:g.140130803_140130805delinsTGA' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr9' -p37 -sS'ref' -p38 -S'AAG' -p39 -sS'pos' -p40 -S'140130803' -p41 -sS'alt' -p42 -S'TGA' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000009.12:g.137236351_137236353delinsTGA' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'AAG' -p48 -sg40 -S'137236351' -p49 -sg42 -g43 -sssS'GRCh37' -p50 -(dp51 -g32 -S'NC_000009.11:g.140130803_140130805delinsTGA' -p52 -sg34 -(dp53 -g36 -S'9' -p54 -sg38 -S'AAG' -p55 -sg40 -S'140130803' -p56 -sg42 -g43 -sssS'GRCh38' -p57 -(dp58 -g32 -S'NC_000009.12:g.137236351_137236353delinsTGA' -p59 -sg34 -(dp60 -g36 -g54 -sg38 -S'AAG' -p61 -sg40 -S'137236351' -p62 -sg42 -g43 -sssssS'flag' -p63 -S'gene_variant' -p64 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant325.txt b/VariantValidator/testing/testOutputsMasterITS/variant325.txt deleted file mode 100644 index 7eebab1f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant325.txt +++ /dev/null @@ -1,144 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_080877.2:c.1735_1737delinsTAATTGTTC' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA -p14 -sS'gene_symbol' -p15 -S'SLC34A3' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_543153.1:p.(Lys579Ter)' -p20 -sS'slr' -p21 -S'NP_543153.1:p.(K579*)' -p22 -ssS'submitted_variant' -p23 -S'NM_080877.2:c.1735_1737delinsTAATTGTTC' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_080877.2:c.1735_1737delinsTAATTGTTC' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000009.11:g.140130803_140130805delinsTAATTGTTC' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr9' -p39 -sS'ref' -p40 -S'AAG' -p41 -sS'pos' -p42 -S'140130803' -p43 -sS'alt' -p44 -S'TAATTGTTC' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000009.12:g.137236351_137236353delinsTAATTGTTC' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'AAG' -p50 -sg42 -S'137236351' -p51 -sg44 -g45 -sssS'GRCh37' -p52 -(dp53 -g34 -S'NC_000009.11:g.140130803_140130805delinsTAATTGTTC' -p54 -sg36 -(dp55 -g38 -S'9' -p56 -sg40 -S'AAG' -p57 -sg42 -S'140130803' -p58 -sg44 -g45 -sssS'GRCh38' -p59 -(dp60 -g34 -S'NC_000009.12:g.137236351_137236353delinsTAATTGTTC' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'AAG' -p63 -sg42 -S'137236351' -p64 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant326.txt b/VariantValidator/testing/testOutputsMasterITS/variant326.txt deleted file mode 100644 index 5eae03b3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant326.txt +++ /dev/null @@ -1,141 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_080877.2:c.1737delinsATTGTTC' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA -p14 -sS'gene_symbol' -p15 -S'SLC34A3' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_543153.1:p.(Lys579_Ala580insLeuPhe)' -p20 -sS'slr' -p21 -S'NP_543153.1:p.(K579_A580insLF)' -p22 -ssS'submitted_variant' -p23 -S'NM_080877.2:c.1737delinsATTGTTC' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_080877.2:c.1737delinsATTGTTC' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000009.11:g.140130805delinsATTGTTC' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr9' -p39 -sS'ref' -p40 -S'G' -p41 -sS'pos' -p42 -S'140130805' -p43 -sS'alt' -p44 -S'ATTGTTC' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000009.12:g.137236353delinsATTGTTC' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'137236353' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000009.11:g.140130805delinsATTGTTC' -p53 -sg36 -(dp54 -g38 -S'9' -p55 -sg40 -g41 -sg42 -S'140130805' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000009.12:g.137236353delinsATTGTTC' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'137236353' -p61 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant327.txt b/VariantValidator/testing/testOutputsMasterITS/variant327.txt deleted file mode 100644 index 566e4e75..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant327.txt +++ /dev/null @@ -1,146 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.4392_*2delinsAGAG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.4392_*2delinsAGAG' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.(Ter1465GluextTer84)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(*1465Eext*84)' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.4392_*2delinsAGAG' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_1:g.21135_21140delinsAGAG' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000088.3:c.4392_*2delinsAGAG' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007400.1:g.21135_21140delinsAGAG' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.48262861_48262866delinsCTCT' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'GTTTAC' -p43 -sS'pos' -p44 -S'48262861' -p45 -sS'alt' -p46 -VCTCT -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50185500_50185505delinsCTCT' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'GTTTAC' -p52 -sg44 -S'50185500' -p53 -sg46 -VCTCT -p54 -sssS'GRCh37' -p55 -(dp56 -g36 -S'NC_000017.10:g.48262861_48262866delinsCTCT' -p57 -sg38 -(dp58 -g40 -S'17' -p59 -sg42 -S'GTTTAC' -p60 -sg44 -S'48262861' -p61 -sg46 -g47 -sssS'GRCh38' -p62 -(dp63 -g36 -S'NC_000017.11:g.50185500_50185505delinsCTCT' -p64 -sg38 -(dp65 -g40 -g59 -sg42 -S'GTTTAC' -p66 -sg44 -S'50185500' -p67 -sg46 -g54 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant328.txt b/VariantValidator/testing/testOutputsMasterITS/variant328.txt deleted file mode 100644 index 5031f1cb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant328.txt +++ /dev/null @@ -1,146 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589_591delinsAGAAGC' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589_591delinsAGAAGC' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.(Gly197delinsArgSer)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(G197delinsRS)' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.589_591delinsAGAAGC' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_1:g.8638_8640delinsAGAAGC' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000088.3:c.589_591delinsAGAAGC' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007400.1:g.8638_8640delinsAGAAGC' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.48275361_48275363delinsGCTTCT' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'ACC' -p43 -sS'pos' -p44 -S'48275361' -p45 -sS'alt' -p46 -VGCTTCT -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50198000_50198002delinsGCTTCT' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'ACC' -p52 -sg44 -S'50198000' -p53 -sg46 -VGCTTCT -p54 -sssS'GRCh37' -p55 -(dp56 -g36 -S'NC_000017.10:g.48275361_48275363delinsGCTTCT' -p57 -sg38 -(dp58 -g40 -S'17' -p59 -sg42 -S'ACC' -p60 -sg44 -S'48275361' -p61 -sg46 -g47 -sssS'GRCh38' -p62 -(dp63 -g36 -S'NC_000017.11:g.50198000_50198002delinsGCTTCT' -p64 -sg38 -(dp65 -g40 -g59 -sg42 -S'ACC' -p66 -sg44 -S'50198000' -p67 -sg46 -g54 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant329.txt b/VariantValidator/testing/testOutputsMasterITS/variant329.txt deleted file mode 100644 index 4a69ed30..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant329.txt +++ /dev/null @@ -1,141 +0,0 @@ -(dp0 -S'NM_000885.5:c.*2536delinsAGAAAAATCA' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens integrin subunit alpha 4 (ITGA4), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'ITGA4' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000876.3:p.?' -p18 -sS'slr' -p19 -S'NP_000876.3:p.?' -p20 -ssS'submitted_variant' -p21 -S'NM_000885.5:c.*2536delinsAGAAAAATCA' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_000885.5:c.*2536delinsAGAAAAATCA' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000002.11:g.182402790delinsAGAAAAATCA' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'G' -p39 -sS'pos' -p40 -S'182402790' -p41 -sS'alt' -p42 -S'AGAAAAATCA' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.181538063delinsAGAAAAATCA' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'181538063' -p48 -sg42 -g43 -sssS'GRCh37' -p49 -(dp50 -g32 -S'NC_000002.11:g.182402790delinsAGAAAAATCA' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'182402790' -p54 -sg42 -g43 -sssS'GRCh38' -p55 -(dp56 -g32 -S'NC_000002.12:g.181538063delinsAGAAAAATCA' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'181538063' -p59 -sg42 -g43 -sssssS'flag' -p60 -S'gene_variant' -p61 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant33.txt b/VariantValidator/testing/testOutputsMasterITS/variant33.txt deleted file mode 100644 index bad0b0cc..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant33.txt +++ /dev/null @@ -1,219 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000518.5:c.20A>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1232t1:c.20A>T' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens hemoglobin subunit beta (HBB), mRNA -p15 -sS'gene_symbol' -p16 -S'HBB' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000509.1(LRG_1232p1):p.(Glu7Val)' -p21 -sS'slr' -p22 -S'NP_000509.1:p.(E7V)' -p23 -ssS'submitted_variant' -p24 -S'11-5248232-T-A' -p25 -sS'genome_context_intronic_sequence' -p26 -g11 -sS'HGVS_LRG_variant' -p27 -g11 -sS'HGVS_transcript_variant' -p28 -S'NM_000518.5:c.20A>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g11 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000011.9:g.5248232T>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr11' -p40 -sS'ref' -p41 -VT -p42 -sS'pos' -p43 -S'5248232' -p44 -sS'alt' -p45 -VA -p46 -sssS'GRCh37' -p47 -(dp48 -g35 -S'NC_000011.9:g.5248232T>A' -p49 -sg37 -(dp50 -g39 -S'11' -p51 -sg41 -g42 -sg43 -S'5248232' -p52 -sg45 -g46 -sssssS'NM_000518.4:c.20A>T' -p53 -(dp54 -g5 -g11 -sg7 -(lp55 -S'A more recent version of the selected reference sequence NM_000518.4 is available (NM_000518.5)' -p56 -aS'NM_000518.5:c.20A>T MUST be fully validated prior to use in reports' -p57 -aS'select_variants=NM_000518.5:c.20A>T' -p58 -asg10 -g11 -sg12 -(lp59 -sg14 -VHomo sapiens hemoglobin subunit beta (HBB), mRNA -p60 -sg16 -S'HBB' -p61 -sg18 -(dp62 -g20 -S'NP_000509.1(LRG_1232p1):p.(Glu7Val)' -p63 -sg22 -S'NP_000509.1:p.(E7V)' -p64 -ssg24 -g25 -sg26 -g11 -sg27 -g11 -sg28 -S'NM_000518.4:c.20A>T' -p65 -sg30 -S'NG_000007.3:g.70614A>T' -p66 -sg31 -(dp67 -S'GRCh38' -p68 -(dp69 -g35 -S'NC_000011.10:g.5227002T>A' -p70 -sg37 -(dp71 -g39 -g51 -sg41 -g42 -sg43 -S'5227002' -p72 -sg45 -g46 -sssg47 -(dp73 -g35 -S'NC_000011.9:g.5248232T>A' -p74 -sg37 -(dp75 -g39 -g51 -sg41 -g42 -sg43 -S'5248232' -p76 -sg45 -g46 -sssS'hg38' -p77 -(dp78 -g35 -S'NC_000011.10:g.5227002T>A' -p79 -sg37 -(dp80 -g39 -g40 -sg41 -g42 -sg43 -S'5227002' -p81 -sg45 -g46 -sssg33 -(dp82 -g35 -S'NC_000011.9:g.5248232T>A' -p83 -sg37 -(dp84 -g39 -g40 -sg41 -g42 -sg43 -S'5248232' -p85 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant330.txt b/VariantValidator/testing/testOutputsMasterITS/variant330.txt deleted file mode 100644 index 664077df..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant330.txt +++ /dev/null @@ -1,145 +0,0 @@ -(dp0 -S'NM_002693.2:c.-186_-185delinsCC' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_765t1:c.-186_-185delinsCC' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -S'' -p8 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'POLG' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_002684.1(LRG_765p1):p.?' -p18 -sS'slr' -p19 -S'NP_002684.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'NM_002693.2:c.-186_-185delinsCC' -p22 -sS'genome_context_intronic_sequence' -p23 -g8 -sS'HGVS_LRG_variant' -p24 -g8 -sS'HGVS_transcript_variant' -p25 -S'NM_002693.2:c.-186_-185delinsCC' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -S'NG_008218.1:g.5097_5098delinsCC' -p28 -sS'primary_assembly_loci' -p29 -(dp30 -S'GRCh38' -p31 -(dp32 -S'HGVS_genomic_description' -p33 -S'NC_000015.10:g.89334698_89334699delinsGG' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'15' -p38 -sS'ref' -p39 -S'CT' -p40 -sS'pos' -p41 -S'89334698' -p42 -sS'alt' -p43 -VGG -p44 -sssS'GRCh37' -p45 -(dp46 -g33 -S'NC_000015.9:g.89877929_89877930delinsGG' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'CT' -p49 -sg41 -S'89877929' -p50 -sg43 -VGG -p51 -sssS'hg38' -p52 -(dp53 -g33 -S'NC_000015.10:g.89334698_89334699delinsGG' -p54 -sg35 -(dp55 -g37 -S'chr15' -p56 -sg39 -S'CT' -p57 -sg41 -S'89334698' -p58 -sg43 -g44 -sssS'hg19' -p59 -(dp60 -g33 -S'NC_000015.9:g.89877929_89877930delinsGG' -p61 -sg35 -(dp62 -g37 -g56 -sg39 -S'CT' -p63 -sg41 -S'89877929' -p64 -sg43 -g51 -sssssS'flag' -p65 -S'gene_variant' -p66 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant331.txt b/VariantValidator/testing/testOutputsMasterITS/variant331.txt deleted file mode 100644 index 4a9fb178..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant331.txt +++ /dev/null @@ -1,420 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001287344.1:c.690_690+1insCTACATAG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -(dp13 -S'GRCh37' -p14 -(dp15 -S'HGVS_genomic_description' -p16 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p17 -sS'vcf' -p18 -(dp19 -S'chr' -p20 -S'HG1439_PATCH' -p21 -sS'ref' -p22 -S'C' -p23 -sS'pos' -p24 -S'43848' -p25 -sS'alt' -p26 -VCCTATGTAG -p27 -sssasS'transcript_description' -p28 -VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 3, mRNA -p29 -sS'gene_symbol' -p30 -S'BTK' -p31 -sS'HGVS_predicted_protein_consequence' -p32 -(dp33 -S'tlr' -p34 -S'NP_001274273.1:p.?' -p35 -sS'slr' -p36 -S'NP_001274273.1:p.?' -p37 -ssS'submitted_variant' -p38 -S'NG_009616.1:g.29052_29053insCTACATAG' -p39 -sS'genome_context_intronic_sequence' -p40 -S'NC_000023.10(NM_001287344.1):c.690_690+1insCTACATAG' -p41 -sS'HGVS_LRG_variant' -p42 -g6 -sS'HGVS_transcript_variant' -p43 -S'NM_001287344.1:c.690_690+1insCTACATAG' -p44 -sS'HGVS_RefSeqGene_variant' -p45 -g6 -sS'primary_assembly_loci' -p46 -(dp47 -S'hg19' -p48 -(dp49 -g16 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p50 -sg18 -(dp51 -g20 -S'chrX' -p52 -sg22 -g23 -sg24 -S'100617160' -p53 -sg26 -VCCTATGTAG -p54 -sssS'hg38' -p55 -(dp56 -g16 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p57 -sg18 -(dp58 -g20 -g52 -sg22 -g23 -sg24 -S'101362172' -p59 -sg26 -VCCTATGTAG -p60 -sssg14 -(dp61 -g16 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p62 -sg18 -(dp63 -g20 -S'X' -p64 -sg22 -g23 -sg24 -S'100617160' -p65 -sg26 -VCCTATGTAG -p66 -sssS'GRCh38' -p67 -(dp68 -g16 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p69 -sg18 -(dp70 -g20 -g64 -sg22 -g23 -sg24 -S'101362172' -p71 -sg26 -VCCTATGTAG -p72 -sssssS'NM_001287345.1:c.588_588+1insCTACATAG' -p73 -(dp74 -g5 -g6 -sg7 -(lp75 -S'RefSeqGene record not available' -p76 -asg10 -g6 -sg11 -(lp77 -(dp78 -g14 -(dp79 -g16 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p80 -sg18 -(dp81 -g20 -g21 -sg22 -g23 -sg24 -S'43848' -p82 -sg26 -VCCTATGTAG -p83 -sssasg28 -VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 2, mRNA -p84 -sg30 -S'BTK' -p85 -sg32 -(dp86 -g34 -S'NP_001274274.1:p.?' -p87 -sg36 -S'NP_001274274.1:p.?' -p88 -ssg38 -g39 -sg40 -S'NC_000023.10(NM_001287345.1):c.588_588+1insCTACATAG' -p89 -sg42 -g6 -sg43 -S'NM_001287345.1:c.588_588+1insCTACATAG' -p90 -sg45 -g6 -sg46 -(dp91 -g48 -(dp92 -g16 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p93 -sg18 -(dp94 -g20 -g52 -sg22 -g23 -sg24 -S'100617160' -p95 -sg26 -VCCTATGTAG -p96 -sssg55 -(dp97 -g16 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p98 -sg18 -(dp99 -g20 -g52 -sg22 -g23 -sg24 -S'101362172' -p100 -sg26 -VCCTATGTAG -p101 -sssg14 -(dp102 -g16 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p103 -sg18 -(dp104 -g20 -g64 -sg22 -g23 -sg24 -S'100617160' -p105 -sg26 -VCCTATGTAG -p106 -sssg67 -(dp107 -g16 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p108 -sg18 -(dp109 -g20 -g64 -sg22 -g23 -sg24 -S'101362172' -p110 -sg26 -VCCTATGTAG -p111 -sssssS'NM_000061.2:c.588_588+1insCTACATAG' -p112 -(dp113 -g5 -S'LRG_128t1:c.588_588+1insCTACATAG' -p114 -sg7 -(lp115 -sg10 -S'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' -p116 -sg11 -(lp117 -(dp118 -g14 -(dp119 -g16 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p120 -sg18 -(dp121 -g20 -g21 -sg22 -g23 -sg24 -S'43848' -p122 -sg26 -VCCTATGTAG -p123 -sssasg28 -VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA -p124 -sg30 -S'BTK' -p125 -sg32 -(dp126 -g34 -S'NP_000052.1(LRG_128p1):p.?' -p127 -sg36 -S'NP_000052.1:p.?' -p128 -ssg38 -g39 -sg40 -S'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' -p129 -sg42 -S'LRG_128:g.29052_29053insCTACATAG' -p130 -sg43 -S'NM_000061.2:c.588_588+1insCTACATAG' -p131 -sg45 -S'NG_009616.1:g.29052_29053insCTACATAG' -p132 -sg46 -(dp133 -g48 -(dp134 -g16 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p135 -sg18 -(dp136 -g20 -g52 -sg22 -g23 -sg24 -S'100617160' -p137 -sg26 -VCCTATGTAG -p138 -sssg55 -(dp139 -g16 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p140 -sg18 -(dp141 -g20 -g52 -sg22 -g23 -sg24 -S'101362172' -p142 -sg26 -VCCTATGTAG -p143 -sssg14 -(dp144 -g16 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p145 -sg18 -(dp146 -g20 -g64 -sg22 -g23 -sg24 -S'100617160' -p147 -sg26 -VCCTATGTAG -p148 -sssg67 -(dp149 -g16 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p150 -sg18 -(dp151 -g20 -g64 -sg22 -g23 -sg24 -S'101362172' -p152 -sg26 -VCCTATGTAG -p153 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant332.txt b/VariantValidator/testing/testOutputsMasterITS/variant332.txt deleted file mode 100644 index deb41ddc..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant332.txt +++ /dev/null @@ -1,165 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000061.2:c.588_588+1insCTACATAG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_128t1:c.588_588+1insCTACATAG' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -(dp13 -S'GRCh37' -p14 -(dp15 -S'HGVS_genomic_description' -p16 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p17 -sS'vcf' -p18 -(dp19 -S'chr' -p20 -S'HG1439_PATCH' -p21 -sS'ref' -p22 -S'C' -p23 -sS'pos' -p24 -S'43848' -p25 -sS'alt' -p26 -VCCTATGTAG -p27 -sssasS'transcript_description' -p28 -VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA -p29 -sS'gene_symbol' -p30 -S'BTK' -p31 -sS'HGVS_predicted_protein_consequence' -p32 -(dp33 -S'tlr' -p34 -S'NP_000052.1(LRG_128p1):p.?' -p35 -sS'slr' -p36 -S'NP_000052.1:p.?' -p37 -ssS'submitted_variant' -p38 -S'NM_000061.2:c.588_588+1insCTACATAG' -p39 -sS'genome_context_intronic_sequence' -p40 -S'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' -p41 -sS'HGVS_LRG_variant' -p42 -S'LRG_128:g.29052_29053insCTACATAG' -p43 -sS'HGVS_transcript_variant' -p44 -S'NM_000061.2:c.588_588+1insCTACATAG' -p45 -sS'HGVS_RefSeqGene_variant' -p46 -S'NG_009616.1:g.29052_29053insCTACATAG' -p47 -sS'primary_assembly_loci' -p48 -(dp49 -S'hg19' -p50 -(dp51 -g16 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p52 -sg18 -(dp53 -g20 -S'chrX' -p54 -sg22 -g23 -sg24 -S'100617160' -p55 -sg26 -VCCTATGTAG -p56 -sssS'hg38' -p57 -(dp58 -g16 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p59 -sg18 -(dp60 -g20 -g54 -sg22 -g23 -sg24 -S'101362172' -p61 -sg26 -VCCTATGTAG -p62 -sssg14 -(dp63 -g16 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p64 -sg18 -(dp65 -g20 -S'X' -p66 -sg22 -g23 -sg24 -S'100617160' -p67 -sg26 -VCCTATGTAG -p68 -sssS'GRCh38' -p69 -(dp70 -g16 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p71 -sg18 -(dp72 -g20 -g66 -sg22 -g23 -sg24 -S'101362172' -p73 -sg26 -VCCTATGTAG -p74 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant333.txt b/VariantValidator/testing/testOutputsMasterITS/variant333.txt deleted file mode 100644 index 6b1d2687..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant333.txt +++ /dev/null @@ -1,113 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000061.2:c.588_589insCTACATAG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_128t1:c.588_589insCTACATAG' -p6 -sS'validation_warnings' -p7 -(lp8 -S'This coding sequence variant description spans at least one intron' -p9 -aS'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -S'' -p13 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA -p17 -sS'gene_symbol' -p18 -S'BTK' -p19 -sS'HGVS_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_000052.1(LRG_128p1):p.(Ile197LeufsTer5)' -p23 -sS'slr' -p24 -S'NP_000052.1:p.(I197Lfs*5)' -p25 -ssS'submitted_variant' -p26 -S'NM_000061.2:c.588_589insCTACATAG' -p27 -sS'genome_context_intronic_sequence' -p28 -g13 -sS'HGVS_LRG_variant' -p29 -g13 -sS'HGVS_transcript_variant' -p30 -S'NM_000061.2:c.588_589insCTACATAG' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -g13 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chrX' -p42 -sS'ref' -p43 -S'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC' -p44 -sS'pos' -p45 -S'100615751' -p46 -sS'alt' -p47 -S'G' -p48 -sssS'GRCh37' -p49 -(dp50 -g37 -S'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC' -p51 -sg39 -(dp52 -g41 -S'X' -p53 -sg43 -S'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC' -p54 -sg45 -S'100615751' -p55 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant34.txt b/VariantValidator/testing/testOutputsMasterITS/variant34.txt deleted file mode 100644 index ef921874..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant34.txt +++ /dev/null @@ -1,143 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-1G>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589-1G>T' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_007400.1(NM_000088.3):c.589-1G>T' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NG_007400.1(NM_000088.3):c.589-1G>T' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.589-1G>T' -p26 -sS'HGVS_LRG_variant' -p27 -S'LRG_1:g.8637G>T' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000088.3:c.589-1G>T' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_007400.1:g.8637G>T' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000017.10:g.48275364C>A' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr17' -p42 -sS'ref' -p43 -VC -p44 -sS'pos' -p45 -S'48275364' -p46 -sS'alt' -p47 -VA -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000017.11:g.50198003C>A' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'50198003' -p53 -sg47 -g48 -sssS'GRCh37' -p54 -(dp55 -g37 -S'NC_000017.10:g.48275364C>A' -p56 -sg39 -(dp57 -g41 -S'17' -p58 -sg43 -g44 -sg45 -S'48275364' -p59 -sg47 -g48 -sssS'GRCh38' -p60 -(dp61 -g37 -S'NC_000017.11:g.50198003C>A' -p62 -sg39 -(dp63 -g41 -g58 -sg43 -g44 -sg45 -S'50198003' -p64 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant35.txt b/VariantValidator/testing/testOutputsMasterITS/variant35.txt deleted file mode 100644 index 55b30da6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant35.txt +++ /dev/null @@ -1,349 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_182763.2:c.688+403C>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA -p14 -sS'gene_symbol' -p15 -S'MCL1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_877495.1:p.?' -p20 -sS'slr' -p21 -S'NP_877495.1:p.?' -p22 -ssS'submitted_variant' -p23 -S'1:150550916G>A' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000001.10(NM_182763.2):c.688+403C>T' -p26 -sS'HGVS_LRG_variant' -p27 -g6 -sS'HGVS_transcript_variant' -p28 -S'NM_182763.2:c.688+403C>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000001.10:g.150550916G>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr1' -p40 -sS'ref' -p41 -VG -p42 -sS'pos' -p43 -S'150550916' -p44 -sS'alt' -p45 -VA -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000001.11:g.150578440G>A' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p51 -sg45 -g46 -sssS'GRCh37' -p52 -(dp53 -g35 -S'NC_000001.10:g.150550916G>A' -p54 -sg37 -(dp55 -g39 -S'1' -p56 -sg41 -g42 -sg43 -S'150550916' -p57 -sg45 -g46 -sssS'GRCh38' -p58 -(dp59 -g35 -S'NC_000001.11:g.150578440G>A' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p62 -sg45 -g46 -sssssS'NM_001197320.1:c.281C>T' -p63 -(dp64 -g5 -g6 -sg7 -(lp65 -S'RefSeqGene record not available' -p66 -asg10 -g6 -sg11 -(lp67 -sg13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA -p68 -sg15 -S'MCL1' -p69 -sg17 -(dp70 -g19 -S'NP_001184249.1:p.(Ser94Phe)' -p71 -sg21 -S'NP_001184249.1:p.(S94F)' -p72 -ssg23 -g24 -sg25 -g6 -sg27 -g6 -sg28 -S'NM_001197320.1:c.281C>T' -p73 -sg30 -g6 -sg31 -(dp74 -g33 -(dp75 -g35 -S'NC_000001.10:g.150550916G>A' -p76 -sg37 -(dp77 -g39 -g40 -sg41 -g42 -sg43 -S'150550916' -p78 -sg45 -g46 -sssg47 -(dp79 -g35 -S'NC_000001.11:g.150578440G>A' -p80 -sg37 -(dp81 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p82 -sg45 -g46 -sssg52 -(dp83 -g35 -S'NC_000001.10:g.150550916G>A' -p84 -sg37 -(dp85 -g39 -g56 -sg41 -g42 -sg43 -S'150550916' -p86 -sg45 -g46 -sssg58 -(dp87 -g35 -S'NC_000001.11:g.150578440G>A' -p88 -sg37 -(dp89 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p90 -sg45 -g46 -sssssS'NM_021960.4:c.740C>T' -p91 -(dp92 -g5 -g6 -sg7 -(lp93 -sg10 -g6 -sg11 -(lp94 -sg13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA -p95 -sg15 -S'MCL1' -p96 -sg17 -(dp97 -g19 -S'NP_068779.1:p.(Ser247Phe)' -p98 -sg21 -S'NP_068779.1:p.(S247F)' -p99 -ssg23 -g24 -sg25 -g6 -sg27 -g6 -sg28 -S'NM_021960.4:c.740C>T' -p100 -sg30 -S'NG_029146.1:g.6299C>T' -p101 -sg31 -(dp102 -g33 -(dp103 -g35 -S'NC_000001.10:g.150550916G>A' -p104 -sg37 -(dp105 -g39 -g40 -sg41 -g42 -sg43 -S'150550916' -p106 -sg45 -g46 -sssg47 -(dp107 -g35 -S'NC_000001.11:g.150578440G>A' -p108 -sg37 -(dp109 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p110 -sg45 -g46 -sssg52 -(dp111 -g35 -S'NC_000001.10:g.150550916G>A' -p112 -sg37 -(dp113 -g39 -g56 -sg41 -g42 -sg43 -S'150550916' -p114 -sg45 -g46 -sssg58 -(dp115 -g35 -S'NC_000001.11:g.150578440G>A' -p116 -sg37 -(dp117 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p118 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant36.txt b/VariantValidator/testing/testOutputsMasterITS/variant36.txt deleted file mode 100644 index 28e9e884..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant36.txt +++ /dev/null @@ -1,349 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_182763.2:c.688+403C>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA -p14 -sS'gene_symbol' -p15 -S'MCL1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_877495.1:p.?' -p20 -sS'slr' -p21 -S'NP_877495.1:p.?' -p22 -ssS'submitted_variant' -p23 -S'1-150550916-G-A' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000001.10(NM_182763.2):c.688+403C>T' -p26 -sS'HGVS_LRG_variant' -p27 -g6 -sS'HGVS_transcript_variant' -p28 -S'NM_182763.2:c.688+403C>T' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000001.10:g.150550916G>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr1' -p40 -sS'ref' -p41 -VG -p42 -sS'pos' -p43 -S'150550916' -p44 -sS'alt' -p45 -VA -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000001.11:g.150578440G>A' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p51 -sg45 -g46 -sssS'GRCh37' -p52 -(dp53 -g35 -S'NC_000001.10:g.150550916G>A' -p54 -sg37 -(dp55 -g39 -S'1' -p56 -sg41 -g42 -sg43 -S'150550916' -p57 -sg45 -g46 -sssS'GRCh38' -p58 -(dp59 -g35 -S'NC_000001.11:g.150578440G>A' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p62 -sg45 -g46 -sssssS'NM_001197320.1:c.281C>T' -p63 -(dp64 -g5 -g6 -sg7 -(lp65 -S'RefSeqGene record not available' -p66 -asg10 -g6 -sg11 -(lp67 -sg13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA -p68 -sg15 -S'MCL1' -p69 -sg17 -(dp70 -g19 -S'NP_001184249.1:p.(Ser94Phe)' -p71 -sg21 -S'NP_001184249.1:p.(S94F)' -p72 -ssg23 -g24 -sg25 -g6 -sg27 -g6 -sg28 -S'NM_001197320.1:c.281C>T' -p73 -sg30 -g6 -sg31 -(dp74 -g33 -(dp75 -g35 -S'NC_000001.10:g.150550916G>A' -p76 -sg37 -(dp77 -g39 -g40 -sg41 -g42 -sg43 -S'150550916' -p78 -sg45 -g46 -sssg47 -(dp79 -g35 -S'NC_000001.11:g.150578440G>A' -p80 -sg37 -(dp81 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p82 -sg45 -g46 -sssg52 -(dp83 -g35 -S'NC_000001.10:g.150550916G>A' -p84 -sg37 -(dp85 -g39 -g56 -sg41 -g42 -sg43 -S'150550916' -p86 -sg45 -g46 -sssg58 -(dp87 -g35 -S'NC_000001.11:g.150578440G>A' -p88 -sg37 -(dp89 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p90 -sg45 -g46 -sssssS'NM_021960.4:c.740C>T' -p91 -(dp92 -g5 -g6 -sg7 -(lp93 -sg10 -g6 -sg11 -(lp94 -sg13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA -p95 -sg15 -S'MCL1' -p96 -sg17 -(dp97 -g19 -S'NP_068779.1:p.(Ser247Phe)' -p98 -sg21 -S'NP_068779.1:p.(S247F)' -p99 -ssg23 -g24 -sg25 -g6 -sg27 -g6 -sg28 -S'NM_021960.4:c.740C>T' -p100 -sg30 -S'NG_029146.1:g.6299C>T' -p101 -sg31 -(dp102 -g33 -(dp103 -g35 -S'NC_000001.10:g.150550916G>A' -p104 -sg37 -(dp105 -g39 -g40 -sg41 -g42 -sg43 -S'150550916' -p106 -sg45 -g46 -sssg47 -(dp107 -g35 -S'NC_000001.11:g.150578440G>A' -p108 -sg37 -(dp109 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p110 -sg45 -g46 -sssg52 -(dp111 -g35 -S'NC_000001.10:g.150550916G>A' -p112 -sg37 -(dp113 -g39 -g56 -sg41 -g42 -sg43 -S'150550916' -p114 -sg45 -g46 -sssg58 -(dp115 -g35 -S'NC_000001.11:g.150578440G>A' -p116 -sg37 -(dp117 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p118 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant37.txt b/VariantValidator/testing/testOutputsMasterITS/variant37.txt deleted file mode 100644 index a1455cbf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant37.txt +++ /dev/null @@ -1,60 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' -p7 -aS'For additional assistance, submit NG_008123.1:c.2055+18G>A to VariantValidator' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NG_008123.1(LEPRE1_v003):c.2055+18G>A' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'flag' -p26 -S'warning' -p27 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant38.txt b/VariantValidator/testing/testOutputsMasterITS/variant38.txt deleted file mode 100644 index bdcc1949..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant38.txt +++ /dev/null @@ -1,60 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit NG_008123.1:c.2055+18G>A but also specify transcripts from the following' -p7 -aS'select_transcripts=NM_001146289.1|NM_022356.3|NM_001243246.1' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NG_008123.1:c.2055+18G>A' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'flag' -p26 -S'warning' -p27 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant39.txt b/VariantValidator/testing/testOutputsMasterITS/variant39.txt deleted file mode 100644 index d46d4ee0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant39.txt +++ /dev/null @@ -1,143 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_022356.3:c.2055+18G>A' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_5t1:c.2055+18G>A' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_008123.1(NM_022356.3):c.2055+18G>A' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'P3H1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_071751.3(LRG_5p1):p.?' -p20 -sS'slr' -p21 -S'NP_071751.3:p.?' -p22 -ssS'submitted_variant' -p23 -S'NG_008123.1(NM_022356.3):c.2055+18G>A' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000001.10(NM_022356.3):c.2055+18G>A' -p26 -sS'HGVS_LRG_variant' -p27 -S'LRG_5:g.24831G>A' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_022356.3:c.2055+18G>A' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_008123.1:g.24831G>A' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000001.10:g.43212925C>T' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr1' -p42 -sS'ref' -p43 -VC -p44 -sS'pos' -p45 -S'43212925' -p46 -sS'alt' -p47 -VT -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000001.11:g.42747254C>T' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'42747254' -p53 -sg47 -g48 -sssS'GRCh37' -p54 -(dp55 -g37 -S'NC_000001.10:g.43212925C>T' -p56 -sg39 -(dp57 -g41 -S'1' -p58 -sg43 -g44 -sg45 -S'43212925' -p59 -sg47 -g48 -sssS'GRCh38' -p60 -(dp61 -g37 -S'NC_000001.11:g.42747254C>T' -p62 -sg39 -(dp63 -g41 -g58 -sg43 -g44 -sg45 -S'42747254' -p64 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant4.txt b/VariantValidator/testing/testOutputsMasterITS/variant4.txt deleted file mode 100644 index a0458fe8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant4.txt +++ /dev/null @@ -1,247 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000109.3:c.7+127703T>A' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427c, mRNA -p14 -sS'gene_symbol' -p15 -S'DMD' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000100.2:p.?' -p20 -sS'slr' -p21 -S'NP_000100.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NC_000023.10:g.33229673A>T' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000023.10(NM_000109.3):c.7+127703T>A' -p26 -sS'HGVS_LRG_variant' -p27 -g6 -sS'HGVS_transcript_variant' -p28 -S'NM_000109.3:c.7+127703T>A' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'HGVS_genomic_description' -p35 -S'NC_000023.10:g.33229673A>T' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chrX' -p40 -sS'ref' -p41 -VA -p42 -sS'pos' -p43 -S'33229673' -p44 -sS'alt' -p45 -VT -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000023.11:g.33211556A>T' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'33211556' -p51 -sg45 -g46 -sssS'GRCh37' -p52 -(dp53 -g35 -S'NC_000023.10:g.33229673A>T' -p54 -sg37 -(dp55 -g39 -S'X' -p56 -sg41 -g42 -sg43 -S'33229673' -p57 -sg45 -g46 -sssS'GRCh38' -p58 -(dp59 -g35 -S'NC_000023.11:g.33211556A>T' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'33211556' -p62 -sg45 -g46 -sssssS'NM_004006.2:c.-244T>A' -p63 -(dp64 -g5 -S'LRG_199t1:c.-244T>A' -p65 -sg7 -(lp66 -sg10 -g6 -sg11 -(lp67 -sg13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p68 -sg15 -S'DMD' -p69 -sg17 -(dp70 -g19 -S'NP_003997.1(LRG_199p1):p.?' -p71 -sg21 -S'NP_003997.1:p.?' -p72 -ssg23 -g24 -sg25 -g6 -sg27 -S'LRG_199:g.133054T>A' -p73 -sg28 -S'NM_004006.2:c.-244T>A' -p74 -sg30 -S'NG_012232.1:g.133054T>A' -p75 -sg31 -(dp76 -g33 -(dp77 -g35 -S'NC_000023.10:g.33229673A>T' -p78 -sg37 -(dp79 -g39 -g40 -sg41 -g42 -sg43 -S'33229673' -p80 -sg45 -g46 -sssg47 -(dp81 -g35 -S'NC_000023.11:g.33211556A>T' -p82 -sg37 -(dp83 -g39 -g40 -sg41 -g42 -sg43 -S'33211556' -p84 -sg45 -g46 -sssg52 -(dp85 -g35 -S'NC_000023.10:g.33229673A>T' -p86 -sg37 -(dp87 -g39 -g56 -sg41 -g42 -sg43 -S'33229673' -p88 -sg45 -g46 -sssg58 -(dp89 -g35 -S'NC_000023.11:g.33211556A>T' -p90 -sg37 -(dp91 -g39 -g56 -sg41 -g42 -sg43 -S'33211556' -p92 -sg45 -g46 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant40.txt b/VariantValidator/testing/testOutputsMasterITS/variant40.txt deleted file mode 100644 index 44cbb19f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant40.txt +++ /dev/null @@ -1,257 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_021983.4:c.490G>C' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_021983.4:c.490G>C cannot be mapped directly to genome build GRCh37' -p9 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -(dp14 -S'GRCh37' -p15 -(dp16 -S'HGVS_genomic_description' -p17 -S'NT_167246.1:g.3848158T>G' -p18 -sS'vcf' -p19 -(dp20 -S'chr' -p21 -S'HSCHR6_MHC_MANN_CTG1' -p22 -sS'ref' -p23 -S'T' -p24 -sS'pos' -p25 -S'3848158' -p26 -sS'alt' -p27 -VG -p28 -sssa(dp29 -S'hg19' -p30 -(dp31 -g17 -S'NT_167246.1:g.3848158T>G' -p32 -sg19 -(dp33 -g21 -S'chr6_mann_hap4' -p34 -sg23 -g24 -sg25 -S'3848158' -p35 -sg27 -g28 -sssa(dp36 -S'GRCh38' -p37 -(dp38 -g17 -S'NT_167246.2:g.3842538T>G' -p39 -sg19 -(dp40 -g21 -g22 -sg23 -g24 -sg25 -S'3842538' -p41 -sg27 -g28 -sssa(dp42 -S'hg38' -p43 -(dp44 -g17 -S'NT_167246.2:g.3842538T>G' -p45 -sg19 -(dp46 -g21 -S'chr6_GL000253v2_alt' -p47 -sg23 -g24 -sg25 -S'3842538' -p48 -sg27 -g28 -sssa(dp49 -g15 -(dp50 -g17 -S'NT_167247.1:g.3884432C>G' -p51 -sg19 -(dp52 -g21 -S'HSCHR6_MHC_MCF_CTG1' -p53 -sg23 -VC -p54 -sg25 -S'3884432' -p55 -sg27 -g28 -sssa(dp56 -g30 -(dp57 -g17 -S'NT_167247.1:g.3884432C>G' -p58 -sg19 -(dp59 -g21 -S'chr6_mcf_hap5' -p60 -sg23 -g54 -sg25 -S'3884432' -p61 -sg27 -g28 -sssa(dp62 -g15 -(dp63 -g17 -S'NT_167249.1:g.3852542C>G' -p64 -sg19 -(dp65 -g21 -S'HSCHR6_MHC_SSTO_CTG1' -p66 -sg23 -g54 -sg25 -S'3852542' -p67 -sg27 -g28 -sssa(dp68 -g30 -(dp69 -g17 -S'NT_167249.1:g.3852542C>G' -p70 -sg19 -(dp71 -g21 -S'chr6_ssto_hap7' -p72 -sg23 -g54 -sg25 -S'3852542' -p73 -sg27 -g28 -sssa(dp74 -g37 -(dp75 -g17 -S'NT_167249.2:g.3853244C>G' -p76 -sg19 -(dp77 -g21 -g66 -sg23 -g54 -sg25 -S'3853244' -p78 -sg27 -g28 -sssa(dp79 -g43 -(dp80 -g17 -S'NT_167249.2:g.3853244C>G' -p81 -sg19 -(dp82 -g21 -S'chr6_GL000256v2_alt' -p83 -sg23 -g54 -sg25 -S'3853244' -p84 -sg27 -g28 -sssasS'transcript_description' -p85 -VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA -p86 -sS'gene_symbol' -p87 -S'HLA-DRB4' -p88 -sS'HGVS_predicted_protein_consequence' -p89 -(dp90 -S'tlr' -p91 -S'NP_068818.4:p.(Gly164Arg)' -p92 -sS'slr' -p93 -S'NP_068818.4:p.(G164R)' -p94 -ssS'submitted_variant' -p95 -S'NM_021983.4:c.490G>C' -p96 -sS'genome_context_intronic_sequence' -p97 -g6 -sS'HGVS_LRG_variant' -p98 -g6 -sS'HGVS_transcript_variant' -p99 -S'NM_021983.4:c.490G>C' -p100 -sS'HGVS_RefSeqGene_variant' -p101 -S'NG_002433.1:g.5724C>G' -p102 -sS'primary_assembly_loci' -p103 -(dp104 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant41.txt b/VariantValidator/testing/testOutputsMasterITS/variant41.txt deleted file mode 100644 index 44660344..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant41.txt +++ /dev/null @@ -1,519 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032470.3:c.4del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -(dp13 -S'GRCh37' -p14 -(dp15 -S'HGVS_genomic_description' -p16 -S'NT_113891.2:g.3483644del' -p17 -sS'vcf' -p18 -(dp19 -S'chr' -p20 -S'HSCHR6_MHC_COX_CTG1' -p21 -sS'ref' -p22 -S'CG' -p23 -sS'pos' -p24 -S'3483643' -p25 -sS'alt' -p26 -S'C' -p27 -sssa(dp28 -S'hg19' -p29 -(dp30 -g16 -S'NT_113891.2:g.3483644del' -p31 -sg18 -(dp32 -g20 -S'chr6_cox_hap2' -p33 -sg22 -S'CG' -p34 -sg24 -S'3483643' -p35 -sg26 -g27 -sssa(dp36 -S'GRCh38' -p37 -(dp38 -g16 -S'NT_113891.3:g.3483538del' -p39 -sg18 -(dp40 -g20 -g21 -sg22 -S'CG' -p41 -sg24 -S'3483537' -p42 -sg26 -g27 -sssa(dp43 -S'hg38' -p44 -(dp45 -g16 -S'NT_113891.3:g.3483538del' -p46 -sg18 -(dp47 -g20 -S'chr6_GL000251v2_alt' -p48 -sg22 -S'CG' -p49 -sg24 -S'3483537' -p50 -sg26 -g27 -sssa(dp51 -g14 -(dp52 -g16 -S'NT_167245.1:g.3292210del' -p53 -sg18 -(dp54 -g20 -S'HSCHR6_MHC_DBB_CTG1' -p55 -sg22 -S'CG' -p56 -sg24 -S'3292209' -p57 -sg26 -g27 -sssa(dp58 -g29 -(dp59 -g16 -S'NT_167245.1:g.3292210del' -p60 -sg18 -(dp61 -g20 -S'chr6_dbb_hap3' -p62 -sg22 -S'CG' -p63 -sg24 -S'3292209' -p64 -sg26 -g27 -sssa(dp65 -g37 -(dp66 -g16 -S'NT_167245.2:g.3286625del' -p67 -sg18 -(dp68 -g20 -g55 -sg22 -S'CG' -p69 -sg24 -S'3286624' -p70 -sg26 -g27 -sssa(dp71 -g44 -(dp72 -g16 -S'NT_167245.2:g.3286625del' -p73 -sg18 -(dp74 -g20 -S'chr6_GL000252v2_alt' -p75 -sg22 -S'CG' -p76 -sg24 -S'3286624' -p77 -sg26 -g27 -sssa(dp78 -g14 -(dp79 -g16 -S'NT_167247.1:g.3392834del' -p80 -sg18 -(dp81 -g20 -S'HSCHR6_MHC_MCF_CTG1' -p82 -sg22 -S'CG' -p83 -sg24 -S'3392833' -p84 -sg26 -g27 -sssa(dp85 -g29 -(dp86 -g16 -S'NT_167247.1:g.3392834del' -p87 -sg18 -(dp88 -g20 -S'chr6_mcf_hap5' -p89 -sg22 -S'CG' -p90 -sg24 -S'3392833' -p91 -sg26 -g27 -sssa(dp92 -g37 -(dp93 -g16 -S'NT_167247.2:g.3387249del' -p94 -sg18 -(dp95 -g20 -g82 -sg22 -S'CG' -p96 -sg24 -S'3387248' -p97 -sg26 -g27 -sssa(dp98 -g44 -(dp99 -g16 -S'NT_167247.2:g.3387249del' -p100 -sg18 -(dp101 -g20 -S'chr6_GL000254v2_alt' -p102 -sg22 -S'CG' -p103 -sg24 -S'3387248' -p104 -sg26 -g27 -sssa(dp105 -g14 -(dp106 -g16 -S'NT_167248.1:g.3274047del' -p107 -sg18 -(dp108 -g20 -S'HSCHR6_MHC_QBL_CTG1' -p109 -sg22 -S'CG' -p110 -sg24 -S'3274046' -p111 -sg26 -g27 -sssa(dp112 -g29 -(dp113 -g16 -S'NT_167248.1:g.3274047del' -p114 -sg18 -(dp115 -g20 -S'chr6_qbl_hap6' -p116 -sg22 -S'CG' -p117 -sg24 -S'3274046' -p118 -sg26 -g27 -sssa(dp119 -g37 -(dp120 -g16 -S'NT_167248.2:g.3268451del' -p121 -sg18 -(dp122 -g20 -g109 -sg22 -S'CG' -p123 -sg24 -S'3268450' -p124 -sg26 -g27 -sssa(dp125 -g44 -(dp126 -g16 -S'NT_167248.2:g.3268451del' -p127 -sg18 -(dp128 -g20 -S'chr6_GL000255v2_alt' -p129 -sg22 -S'CG' -p130 -sg24 -S'3268450' -p131 -sg26 -g27 -sssa(dp132 -g14 -(dp133 -g16 -S'NT_167249.1:g.3345701del' -p134 -sg18 -(dp135 -g20 -S'HSCHR6_MHC_SSTO_CTG1' -p136 -sg22 -S'CG' -p137 -sg24 -S'3345700' -p138 -sg26 -g27 -sssa(dp139 -g29 -(dp140 -g16 -S'NT_167249.1:g.3345701del' -p141 -sg18 -(dp142 -g20 -S'chr6_ssto_hap7' -p143 -sg22 -S'CG' -p144 -sg24 -S'3345700' -p145 -sg26 -g27 -sssa(dp146 -g37 -(dp147 -g16 -S'NT_167249.2:g.3346403del' -p148 -sg18 -(dp149 -g20 -g136 -sg22 -S'CG' -p150 -sg24 -S'3346402' -p151 -sg26 -g27 -sssa(dp152 -g44 -(dp153 -g16 -S'NT_167249.2:g.3346403del' -p154 -sg18 -(dp155 -g20 -S'chr6_GL000256v2_alt' -p156 -sg22 -S'CG' -p157 -sg24 -S'3346402' -p158 -sg26 -g27 -sssasS'transcript_description' -p159 -VHomo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA -p160 -sS'gene_symbol' -p161 -S'TNXB' -p162 -sS'HGVS_predicted_protein_consequence' -p163 -(dp164 -S'tlr' -p165 -S'NP_115859.2:p.(Arg2AlafsTer91)' -p166 -sS'slr' -p167 -S'NP_115859.2:p.(R2Afs*91)' -p168 -ssS'submitted_variant' -p169 -S'NM_032470.3:c.4del' -p170 -sS'genome_context_intronic_sequence' -p171 -g6 -sS'HGVS_LRG_variant' -p172 -g6 -sS'HGVS_transcript_variant' -p173 -S'NM_032470.3:c.4del' -p174 -sS'HGVS_RefSeqGene_variant' -p175 -g6 -sS'primary_assembly_loci' -p176 -(dp177 -g29 -(dp178 -g16 -S'NC_000006.11:g.32012993del' -p179 -sg18 -(dp180 -g20 -S'chr6' -p181 -sg22 -S'CG' -p182 -sg24 -S'32012992' -p183 -sg26 -g27 -sssg44 -(dp184 -g16 -S'NC_000006.12:g.32045216del' -p185 -sg18 -(dp186 -g20 -g181 -sg22 -S'CG' -p187 -sg24 -S'32045215' -p188 -sg26 -g27 -sssg14 -(dp189 -g16 -S'NC_000006.11:g.32012993del' -p190 -sg18 -(dp191 -g20 -S'6' -p192 -sg22 -S'CG' -p193 -sg24 -S'32012992' -p194 -sg26 -g27 -sssg37 -(dp195 -g16 -S'NC_000006.12:g.32045216del' -p196 -sg18 -(dp197 -g20 -g192 -sg22 -S'CG' -p198 -sg24 -S'32045215' -p199 -sg26 -g27 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant42.txt b/VariantValidator/testing/testOutputsMasterITS/variant42.txt deleted file mode 100644 index 2d7745e3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant42.txt +++ /dev/null @@ -1,129 +0,0 @@ -(dp0 -S'NM_001194958.2:c.20C>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NM_001194958.2:c.20C>A cannot be mapped directly to genome build GRCh37' -p7 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -(dp12 -S'GRCh37' -p13 -(dp14 -S'HGVS_genomic_description' -p15 -S'NW_003315950.2:g.355171C>A' -p16 -sS'vcf' -p17 -(dp18 -S'chr' -p19 -S'HG987_PATCH' -p20 -sS'ref' -p21 -S'C' -p22 -sS'pos' -p23 -S'355171' -p24 -sS'alt' -p25 -S'A' -p26 -sssasS'transcript_description' -p27 -VHomo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA -p28 -sS'gene_symbol' -p29 -S'KCNJ18' -p30 -sS'HGVS_predicted_protein_consequence' -p31 -(dp32 -S'tlr' -p33 -S'NP_001181887.2:p.(Ala7Asp)' -p34 -sS'slr' -p35 -S'NP_001181887.2:p.(A7D)' -p36 -ssS'submitted_variant' -p37 -S'NM_001194958.2:c.20C>A' -p38 -sS'genome_context_intronic_sequence' -p39 -g4 -sS'HGVS_LRG_variant' -p40 -g4 -sS'HGVS_transcript_variant' -p41 -S'NM_001194958.2:c.20C>A' -p42 -sS'HGVS_RefSeqGene_variant' -p43 -S'NG_033093.1:g.15284C>A' -p44 -sS'primary_assembly_loci' -p45 -(dp46 -S'GRCh38' -p47 -(dp48 -g15 -S'NC_000017.11:g.21702806C>A' -p49 -sg17 -(dp50 -g19 -S'17' -p51 -sg21 -g22 -sg23 -S'21702806' -p52 -sg25 -g26 -sssS'hg38' -p53 -(dp54 -g15 -S'NC_000017.11:g.21702806C>A' -p55 -sg17 -(dp56 -g19 -S'chr17' -p57 -sg21 -g22 -sg23 -S'21702806' -p58 -sg25 -g26 -sssssS'flag' -p59 -S'gene_variant' -p60 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant43.txt b/VariantValidator/testing/testOutputsMasterITS/variant43.txt deleted file mode 100644 index 7bf48e25..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant43.txt +++ /dev/null @@ -1,114 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000022.2:c.534A>G' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_16t1:c.534A>G' -p6 -sS'validation_warnings' -p7 -(lp8 -S'A more recent version of the selected reference sequence NM_000022.2 is available (NM_000022.3)' -p9 -aS'NM_000022.3:c.534A>G MUST be fully validated prior to use in reports' -p10 -aS'select_variants=NM_000022.3:c.534A>G' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -S'' -p13 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens adenosine deaminase (ADA), mRNA -p17 -sS'gene_symbol' -p18 -S'ADA' -p19 -sS'HGVS_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_000013.2(LRG_16p1):p.(Val178=)' -p23 -sS'slr' -p24 -S'NP_000013.2:p.(V178=)' -p25 -ssS'submitted_variant' -p26 -S'NM_000022.2:c.534A>G' -p27 -sS'genome_context_intronic_sequence' -p28 -g13 -sS'HGVS_LRG_variant' -p29 -S'LRG_16:g.32462A>G' -p30 -sS'HGVS_transcript_variant' -p31 -S'NM_000022.2:c.534A>G' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -S'NG_007385.1:g.32462A>G' -p34 -sS'primary_assembly_loci' -p35 -(dp36 -S'hg19' -p37 -(dp38 -S'HGVS_genomic_description' -p39 -S'NC_000020.10:g.43252915T>C' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'chr20' -p44 -sS'ref' -p45 -VT -p46 -sS'pos' -p47 -S'43252915' -p48 -sS'alt' -p49 -VC -p50 -sssS'GRCh37' -p51 -(dp52 -g39 -S'NC_000020.10:g.43252915T>C' -p53 -sg41 -(dp54 -g43 -S'20' -p55 -sg45 -g46 -sg47 -S'43252915' -p56 -sg49 -g50 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant44.txt b/VariantValidator/testing/testOutputsMasterITS/variant44.txt deleted file mode 100644 index 9567104c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant44.txt +++ /dev/null @@ -1,257 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_021983.4:c.490G>C' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_021983.4:c.490G>C cannot be mapped directly to genome build GRCh37' -p9 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -(dp14 -S'GRCh37' -p15 -(dp16 -S'HGVS_genomic_description' -p17 -S'NT_167246.1:g.3848158T>G' -p18 -sS'vcf' -p19 -(dp20 -S'chr' -p21 -S'HSCHR6_MHC_MANN_CTG1' -p22 -sS'ref' -p23 -S'T' -p24 -sS'pos' -p25 -S'3848158' -p26 -sS'alt' -p27 -VG -p28 -sssa(dp29 -S'hg19' -p30 -(dp31 -g17 -S'NT_167246.1:g.3848158T>G' -p32 -sg19 -(dp33 -g21 -S'chr6_mann_hap4' -p34 -sg23 -g24 -sg25 -S'3848158' -p35 -sg27 -g28 -sssa(dp36 -S'GRCh38' -p37 -(dp38 -g17 -S'NT_167246.2:g.3842538T>G' -p39 -sg19 -(dp40 -g21 -g22 -sg23 -g24 -sg25 -S'3842538' -p41 -sg27 -g28 -sssa(dp42 -S'hg38' -p43 -(dp44 -g17 -S'NT_167246.2:g.3842538T>G' -p45 -sg19 -(dp46 -g21 -S'chr6_GL000253v2_alt' -p47 -sg23 -g24 -sg25 -S'3842538' -p48 -sg27 -g28 -sssa(dp49 -g15 -(dp50 -g17 -S'NT_167247.1:g.3884432C>G' -p51 -sg19 -(dp52 -g21 -S'HSCHR6_MHC_MCF_CTG1' -p53 -sg23 -VC -p54 -sg25 -S'3884432' -p55 -sg27 -g28 -sssa(dp56 -g30 -(dp57 -g17 -S'NT_167247.1:g.3884432C>G' -p58 -sg19 -(dp59 -g21 -S'chr6_mcf_hap5' -p60 -sg23 -g54 -sg25 -S'3884432' -p61 -sg27 -g28 -sssa(dp62 -g15 -(dp63 -g17 -S'NT_167249.1:g.3852542C>G' -p64 -sg19 -(dp65 -g21 -S'HSCHR6_MHC_SSTO_CTG1' -p66 -sg23 -g54 -sg25 -S'3852542' -p67 -sg27 -g28 -sssa(dp68 -g30 -(dp69 -g17 -S'NT_167249.1:g.3852542C>G' -p70 -sg19 -(dp71 -g21 -S'chr6_ssto_hap7' -p72 -sg23 -g54 -sg25 -S'3852542' -p73 -sg27 -g28 -sssa(dp74 -g37 -(dp75 -g17 -S'NT_167249.2:g.3853244C>G' -p76 -sg19 -(dp77 -g21 -g66 -sg23 -g54 -sg25 -S'3853244' -p78 -sg27 -g28 -sssa(dp79 -g43 -(dp80 -g17 -S'NT_167249.2:g.3853244C>G' -p81 -sg19 -(dp82 -g21 -S'chr6_GL000256v2_alt' -p83 -sg23 -g54 -sg25 -S'3853244' -p84 -sg27 -g28 -sssasS'transcript_description' -p85 -VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA -p86 -sS'gene_symbol' -p87 -S'HLA-DRB4' -p88 -sS'HGVS_predicted_protein_consequence' -p89 -(dp90 -S'tlr' -p91 -S'NP_068818.4:p.(Gly164Arg)' -p92 -sS'slr' -p93 -S'NP_068818.4:p.(G164R)' -p94 -ssS'submitted_variant' -p95 -S'HSCHR6_MHC_SSTO_CTG1-3852542-C-G' -p96 -sS'genome_context_intronic_sequence' -p97 -g6 -sS'HGVS_LRG_variant' -p98 -g6 -sS'HGVS_transcript_variant' -p99 -S'NM_021983.4:c.490G>C' -p100 -sS'HGVS_RefSeqGene_variant' -p101 -S'NG_002433.1:g.5724C>G' -p102 -sS'primary_assembly_loci' -p103 -(dp104 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant45.txt b/VariantValidator/testing/testOutputsMasterITS/variant45.txt deleted file mode 100644 index 6a747e7d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant45.txt +++ /dev/null @@ -1,146 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000368.4:c.363+1dup' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_486t1:c.363+1dup' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_012386.1(NM_000368.4):c.363+1dup' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'TSC1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000359.1(LRG_486p1):p.?' -p20 -sS'slr' -p21 -S'NP_000359.1:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000368.4:c.363+1dupG' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000009.11(NM_000368.4):c.363+1dup' -p26 -sS'HGVS_LRG_variant' -p27 -S'LRG_486:g.24048dup' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000368.4:c.363+1dup' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_012386.1:g.24048dup' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000009.11:g.135800973dup' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr9' -p42 -sS'ref' -p43 -S'C' -p44 -sS'pos' -p45 -S'135800973' -p46 -sS'alt' -p47 -S'CC' -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000009.12:g.132925586dup' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'132925586' -p53 -sg47 -S'CC' -p54 -sssS'GRCh37' -p55 -(dp56 -g37 -S'NC_000009.11:g.135800973dup' -p57 -sg39 -(dp58 -g41 -S'9' -p59 -sg43 -g44 -sg45 -S'135800973' -p60 -sg47 -S'CC' -p61 -sssS'GRCh38' -p62 -(dp63 -g37 -S'NC_000009.12:g.132925586dup' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -g44 -sg45 -S'132925586' -p66 -sg47 -S'CC' -p67 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant46.txt b/VariantValidator/testing/testOutputsMasterITS/variant46.txt deleted file mode 100644 index 05aac70a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant46.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000368.4:c.363+1dup' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_486t1:c.363+1dup' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000368.4:c.363dup normalized to NM_000368.4:c.363+1dup' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_012386.1(NM_000368.4):c.363+1dup' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'TSC1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)' -p21 -sS'slr' -p22 -S'NP_000359.1:p.(M122Dfs*4)' -p23 -ssS'submitted_variant' -p24 -S'NM_000368.4:c.363dupG' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000009.11(NM_000368.4):c.363+1dup' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_486:g.24048dup' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000368.4:c.363+1dup' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_012386.1:g.24048dup' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000009.11:g.135800973dup' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr9' -p43 -sS'ref' -p44 -S'C' -p45 -sS'pos' -p46 -S'135800973' -p47 -sS'alt' -p48 -S'CC' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000009.12:g.132925586dup' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -g45 -sg46 -S'132925586' -p54 -sg48 -S'CC' -p55 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000009.11:g.135800973dup' -p58 -sg40 -(dp59 -g42 -S'9' -p60 -sg44 -g45 -sg46 -S'135800973' -p61 -sg48 -S'CC' -p62 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000009.12:g.132925586dup' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -g45 -sg46 -S'132925586' -p67 -sg48 -S'CC' -p68 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant47.txt b/VariantValidator/testing/testOutputsMasterITS/variant47.txt deleted file mode 100644 index 4bb6f48a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant47.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000089.3:c.1035_1035+2del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_2t1:c.1035_1035+2del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000089.3:c.1033_1035del normalized to NM_000089.3:c.1035_1035+2del' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_007405.1(NM_000089.3):c.1035_1035+2del' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A2' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000080.2(LRG_2p1):p.(Val345del)' -p21 -sS'slr' -p22 -S'NP_000080.2:p.(V345del)' -p23 -ssS'submitted_variant' -p24 -S'NM_000089.3:c.1033_1035delGTT' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000007.13(NM_000089.3):c.1035_1035+2del' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_2:g.20261_20263del' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000089.3:c.1035_1035+2del' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_007405.1:g.20261_20263del' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000007.13:g.94039133_94039135del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr7' -p43 -sS'ref' -p44 -S'CTTG' -p45 -sS'pos' -p46 -S'94039128' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000007.14:g.94409821_94409823del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'CTTG' -p54 -sg46 -S'94409816' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000007.13:g.94039133_94039135del' -p58 -sg40 -(dp59 -g42 -S'7' -p60 -sg44 -S'CTTG' -p61 -sg46 -S'94039128' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000007.14:g.94409821_94409823del' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'CTTG' -p67 -sg46 -S'94409816' -p68 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant48.txt b/VariantValidator/testing/testOutputsMasterITS/variant48.txt deleted file mode 100644 index fd12887f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant48.txt +++ /dev/null @@ -1,146 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000089.3:c.1035_1035+2del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_2t1:c.1035_1035+2del' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_007405.1(NM_000089.3):c.1035_1035+2del' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A2' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000080.2(LRG_2p1):p.(Val345del)' -p20 -sS'slr' -p21 -S'NP_000080.2:p.(V345del)' -p22 -ssS'submitted_variant' -p23 -S'NM_000089.3:c.1035_1035+2delTGT' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000007.13(NM_000089.3):c.1035_1035+2del' -p26 -sS'HGVS_LRG_variant' -p27 -S'LRG_2:g.20261_20263del' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000089.3:c.1035_1035+2del' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_007405.1:g.20261_20263del' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000007.13:g.94039133_94039135del' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr7' -p42 -sS'ref' -p43 -S'CTTG' -p44 -sS'pos' -p45 -S'94039128' -p46 -sS'alt' -p47 -S'C' -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000007.14:g.94409821_94409823del' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'CTTG' -p53 -sg45 -S'94409816' -p54 -sg47 -g48 -sssS'GRCh37' -p55 -(dp56 -g37 -S'NC_000007.13:g.94039133_94039135del' -p57 -sg39 -(dp58 -g41 -S'7' -p59 -sg43 -S'CTTG' -p60 -sg45 -S'94039128' -p61 -sg47 -g48 -sssS'GRCh38' -p62 -(dp63 -g37 -S'NC_000007.14:g.94409821_94409823del' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -S'CTTG' -p66 -sg45 -S'94409816' -p67 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant49.txt b/VariantValidator/testing/testOutputsMasterITS/variant49.txt deleted file mode 100644 index 9dc593b2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant49.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.2024_2028+1del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.2024_2028+1del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.2023_2028del normalized to NM_000088.3:c.2024_2028+1del' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_007400.1(NM_000088.3):c.2024_2028+1del' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)' -p21 -sS'slr' -p22 -S'NP_000079.2:p.(A675_R676del)' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.2023_2028delGCAAGA' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.2024_2028+1del' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_1:g.14656_14661del' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000088.3:c.2024_2028+1del' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_007400.1:g.14656_14661del' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000017.10:g.48269340_48269345del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr17' -p43 -sS'ref' -p44 -S'ACTCTTG' -p45 -sS'pos' -p46 -S'48269339' -p47 -sS'alt' -p48 -S'A' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000017.11:g.50191979_50191984del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'ACTCTTG' -p54 -sg46 -S'50191978' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000017.10:g.48269340_48269345del' -p58 -sg40 -(dp59 -g42 -S'17' -p60 -sg44 -S'ACTCTTG' -p61 -sg46 -S'48269339' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000017.11:g.50191979_50191984del' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'ACTCTTG' -p67 -sg46 -S'50191978' -p68 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant5.txt b/VariantValidator/testing/testOutputsMasterITS/variant5.txt deleted file mode 100644 index 5bb6da1e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant5.txt +++ /dev/null @@ -1,111 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001145026.1:c.715A>G' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'NM_001145026.1:c.715A>G cannot be mapped directly to genome build GRCh37' -p10 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens protein tyrosine phosphatase, receptor type Q (PTPRQ), mRNA -p16 -sS'gene_symbol' -p17 -S'PTPRQ' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_001138498.1:p.(Arg239Gly)' -p22 -sS'slr' -p23 -S'NP_001138498.1:p.(R239G)' -p24 -ssS'submitted_variant' -p25 -S'NM_001145026.1:c.715A>G' -p26 -sS'genome_context_intronic_sequence' -p27 -g6 -sS'HGVS_LRG_variant' -p28 -g6 -sS'HGVS_transcript_variant' -p29 -S'NM_001145026.1:c.715A>G' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'GRCh38' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000012.12:g.80460707A>G' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'12' -p41 -sS'ref' -p42 -VA -p43 -sS'pos' -p44 -S'80460707' -p45 -sS'alt' -p46 -VG -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000012.12:g.80460707A>G' -p50 -sg38 -(dp51 -g40 -S'chr12' -p52 -sg42 -g43 -sg44 -S'80460707' -p53 -sg46 -g47 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant50.txt b/VariantValidator/testing/testOutputsMasterITS/variant50.txt deleted file mode 100644 index b440e5d6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant50.txt +++ /dev/null @@ -1,147 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000089.3:c.938del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_2t1:c.938del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000089.3:c.938-1del automapped to NM_000089.3:c.938del' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A2' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000080.2(LRG_2p1):p.(Gly313AlafsTer86)' -p21 -sS'slr' -p22 -S'NP_000080.2:p.(G313Afs*86)' -p23 -ssS'submitted_variant' -p24 -S'NM_000089.3:c.938-1delG' -p25 -sS'genome_context_intronic_sequence' -p26 -g11 -sS'HGVS_LRG_variant' -p27 -S'LRG_2:g.20164del' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000089.3:c.938del' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_007405.1:g.20164del' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000007.13:g.94039036del' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr7' -p42 -sS'ref' -p43 -S'AG' -p44 -sS'pos' -p45 -S'94039033' -p46 -sS'alt' -p47 -S'A' -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000007.14:g.94409724del' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'AG' -p53 -sg45 -S'94409721' -p54 -sg47 -g48 -sssS'GRCh37' -p55 -(dp56 -g37 -S'NC_000007.13:g.94039036del' -p57 -sg39 -(dp58 -g41 -S'7' -p59 -sg43 -S'AG' -p60 -sg45 -S'94039033' -p61 -sg47 -g48 -sssS'GRCh38' -p62 -(dp63 -g37 -S'NC_000007.14:g.94409724del' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -S'AG' -p66 -sg45 -S'94409721' -p67 -sg47 -g48 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant51.txt b/VariantValidator/testing/testOutputsMasterITS/variant51.txt deleted file mode 100644 index c83a5073..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant51.txt +++ /dev/null @@ -1,141 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589G=' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589G=' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.(Gly197=)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(G197=)' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.589G=' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_1:g.8638G=' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000088.3:c.589G=' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007400.1:g.8638G=' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.48275363C=' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'48275363' -p45 -sS'alt' -p46 -g43 -sssS'hg38' -p47 -(dp48 -g36 -S'NC_000017.11:g.50198002C=' -p49 -sg38 -(dp50 -g40 -g41 -sg42 -g43 -sg44 -S'50198002' -p51 -sg46 -g43 -sssS'GRCh37' -p52 -(dp53 -g36 -S'NC_000017.10:g.48275363C=' -p54 -sg38 -(dp55 -g40 -S'17' -p56 -sg42 -g43 -sg44 -S'48275363' -p57 -sg46 -g43 -sssS'GRCh38' -p58 -(dp59 -g36 -S'NC_000017.11:g.50198002C=' -p60 -sg38 -(dp61 -g40 -g56 -sg42 -g43 -sg44 -S'50198002' -p62 -sg46 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant52.txt b/VariantValidator/testing/testOutputsMasterITS/variant52.txt deleted file mode 100644 index ffcf2255..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant52.txt +++ /dev/null @@ -1,141 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.642A=' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.642A=' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.(Ser214=)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(S214=)' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.642A=' -p24 -sS'genome_context_intronic_sequence' -p25 -g10 -sS'HGVS_LRG_variant' -p26 -S'LRG_1:g.8691A=' -p27 -sS'HGVS_transcript_variant' -p28 -S'NM_000088.3:c.642A=' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_007400.1:g.8691A=' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000017.10:g.48275310T=' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -VT -p43 -sS'pos' -p44 -S'48275310' -p45 -sS'alt' -p46 -g43 -sssS'hg38' -p47 -(dp48 -g36 -S'NC_000017.11:g.50197949T=' -p49 -sg38 -(dp50 -g40 -g41 -sg42 -g43 -sg44 -S'50197949' -p51 -sg46 -g43 -sssS'GRCh37' -p52 -(dp53 -g36 -S'NC_000017.10:g.48275310T=' -p54 -sg38 -(dp55 -g40 -S'17' -p56 -sg42 -g43 -sg44 -S'48275310' -p57 -sg46 -g43 -sssS'GRCh38' -p58 -(dp59 -g36 -S'NC_000017.11:g.50197949T=' -p60 -sg38 -(dp61 -g40 -g56 -sg42 -g43 -sg44 -S'50197949' -p62 -sg46 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant53.txt b/VariantValidator/testing/testOutputsMasterITS/variant53.txt deleted file mode 100644 index 4b8a42f8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant53.txt +++ /dev/null @@ -1,62 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NM_000088.3:c.642+1GG>G automapped to NM_000088.3:c.642+1_642+2delGGinsG' -p7 -aS'NC_000017.10:g.48275308_48275309delCCinsC' -p8 -aS'Variant reference (CC) does not agree with reference sequence (AC)' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g4 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -g4 -sS'gene_symbol' -p14 -g4 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -g4 -sS'slr' -p18 -g4 -ssS'submitted_variant' -p19 -S'NM_000088.3:c.642+1GG>G' -p20 -sS'genome_context_intronic_sequence' -p21 -g4 -sS'HGVS_LRG_variant' -p22 -g4 -sS'HGVS_transcript_variant' -p23 -g4 -sS'HGVS_RefSeqGene_variant' -p24 -g4 -sS'primary_assembly_loci' -p25 -(dp26 -ssS'flag' -p27 -S'warning' -p28 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant54.txt b/VariantValidator/testing/testOutputsMasterITS/variant54.txt deleted file mode 100644 index 60599eea..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant54.txt +++ /dev/null @@ -1,62 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NM_000088.3:c.589-2GG>G automapped to NM_000088.3:c.589-2_589-1delGGinsG' -p7 -aS'NC_000017.10:g.48275364_48275365delCCinsC' -p8 -aS'Variant reference (CC) does not agree with reference sequence (CT)' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g4 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -g4 -sS'gene_symbol' -p14 -g4 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -g4 -sS'slr' -p18 -g4 -ssS'submitted_variant' -p19 -S'NM_000088.3:c.589-2GG>G' -p20 -sS'genome_context_intronic_sequence' -p21 -g4 -sS'HGVS_LRG_variant' -p22 -g4 -sS'HGVS_transcript_variant' -p23 -g4 -sS'HGVS_RefSeqGene_variant' -p24 -g4 -sS'primary_assembly_loci' -p25 -(dp26 -ssS'flag' -p27 -S'warning' -p28 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant55.txt b/VariantValidator/testing/testOutputsMasterITS/variant55.txt deleted file mode 100644 index e912b8a8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant55.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-5_589-4insTTTT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589-5_589-4insTTTT' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589-6_589-5insTTTT normalized to NM_000088.3:c.589-5_589-4insTTTT' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_007400.1(NM_000088.3):c.589-5_589-4insTTTT' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2(LRG_1p1):p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589-6_589-5insTTTT' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.589-5_589-4insTTTT' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_1:g.8633_8634insTTTT' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000088.3:c.589-5_589-4insTTTT' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_007400.1:g.8633_8634insTTTT' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000017.10:g.48275367_48275368insAAAA' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr17' -p43 -sS'ref' -p44 -S'G' -p45 -sS'pos' -p46 -S'48275367' -p47 -sS'alt' -p48 -VGAAAA -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000017.11:g.50198006_50198007insAAAA' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -g45 -sg46 -S'50198006' -p54 -sg48 -VGAAAA -p55 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000017.10:g.48275367_48275368insAAAA' -p58 -sg40 -(dp59 -g42 -S'17' -p60 -sg44 -g45 -sg46 -S'48275367' -p61 -sg48 -VGAAAA -p62 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000017.11:g.50198006_50198007insAAAA' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -g45 -sg46 -S'50198006' -p67 -sg48 -VGAAAA -p68 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant56.txt b/VariantValidator/testing/testOutputsMasterITS/variant56.txt deleted file mode 100644 index 30d44a7f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant56.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.642+4_642+5insAAAA' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.642+4_642+5insAAAA' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.642+3_642+4insAAAA normalized to NM_000088.3:c.642+4_642+5insAAAA' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_007400.1(NM_000088.3):c.642+4_642+5insAAAA' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2(LRG_1p1):p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.642+3_642+4insAAAA' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.642+4_642+5insAAAA' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_1:g.8695_8696insAAAA' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000088.3:c.642+4_642+5insAAAA' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_007400.1:g.8695_8696insAAAA' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000017.10:g.48275305_48275306insTTTT' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr17' -p43 -sS'ref' -p44 -S'C' -p45 -sS'pos' -p46 -S'48275305' -p47 -sS'alt' -p48 -VCTTTT -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000017.11:g.50197944_50197945insTTTT' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -g45 -sg46 -S'50197944' -p54 -sg48 -VCTTTT -p55 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000017.10:g.48275305_48275306insTTTT' -p58 -sg40 -(dp59 -g42 -S'17' -p60 -sg44 -g45 -sg46 -S'48275305' -p61 -sg48 -VCTTTT -p62 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000017.11:g.50197944_50197945insTTTT' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -g45 -sg46 -S'50197944' -p67 -sg48 -VCTTTT -p68 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant57.txt b/VariantValidator/testing/testOutputsMasterITS/variant57.txt deleted file mode 100644 index 0ad82ef6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant57.txt +++ /dev/null @@ -1,146 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-4_589-3insTT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589-4_589-3insTT' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -S'NG_007400.1(NM_000088.3):c.589-4_589-3insTT' -p10 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2(LRG_1p1):p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.589-4_589-3insTT' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.589-4_589-3insTT' -p26 -sS'HGVS_LRG_variant' -p27 -S'LRG_1:g.8634_8635insTT' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000088.3:c.589-4_589-3insTT' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_007400.1:g.8634_8635insTT' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000017.10:g.48275366_48275367insAA' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr17' -p42 -sS'ref' -p43 -S'T' -p44 -sS'pos' -p45 -S'48275366' -p46 -sS'alt' -p47 -VTAA -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000017.11:g.50198005_50198006insAA' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -g44 -sg45 -S'50198005' -p53 -sg47 -VTAA -p54 -sssS'GRCh37' -p55 -(dp56 -g37 -S'NC_000017.10:g.48275366_48275367insAA' -p57 -sg39 -(dp58 -g41 -S'17' -p59 -sg43 -g44 -sg45 -S'48275366' -p60 -sg47 -VTAA -p61 -sssS'GRCh38' -p62 -(dp63 -g37 -S'NC_000017.11:g.50198005_50198006insAA' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -g44 -sg45 -S'50198005' -p66 -sg47 -VTAA -p67 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant58.txt b/VariantValidator/testing/testOutputsMasterITS/variant58.txt deleted file mode 100644 index 434d968d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant58.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-7del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589-7del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589-8del normalized to NM_000088.3:c.589-7del' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_007400.1(NM_000088.3):c.589-7del' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2(LRG_1p1):p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589-8del' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.589-7del' -p27 -sS'HGVS_LRG_variant' -p28 -S'LRG_1:g.8631del' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_000088.3:c.589-7del' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_007400.1:g.8631del' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000017.10:g.48275370del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr17' -p43 -sS'ref' -p44 -S'GA' -p45 -sS'pos' -p46 -S'48275369' -p47 -sS'alt' -p48 -S'G' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000017.11:g.50198009del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'GA' -p54 -sg46 -S'50198008' -p55 -sg48 -g49 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000017.10:g.48275370del' -p58 -sg40 -(dp59 -g42 -S'17' -p60 -sg44 -S'GA' -p61 -sg46 -S'48275369' -p62 -sg48 -g49 -sssS'GRCh38' -p63 -(dp64 -g38 -S'NC_000017.11:g.50198009del' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'GA' -p67 -sg46 -S'50198008' -p68 -sg48 -g49 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant59.txt b/VariantValidator/testing/testOutputsMasterITS/variant59.txt deleted file mode 100644 index fc02edb8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant59.txt +++ /dev/null @@ -1,145 +0,0 @@ -(dp0 -S'NM_000527.4:c.-187_-185del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_274t1:c.-187_-185del' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -S'' -p8 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens low density lipoprotein receptor (LDLR), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'LDLR' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000518.1(LRG_274p1):p.?' -p18 -sS'slr' -p19 -S'NP_000518.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'NM_000527.4:c.-187_-185delCTC' -p22 -sS'genome_context_intronic_sequence' -p23 -g8 -sS'HGVS_LRG_variant' -p24 -S'LRG_274:g.4982_4984del' -p25 -sS'HGVS_transcript_variant' -p26 -S'NM_000527.4:c.-187_-185del' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_009060.1:g.4982_4984del' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'GRCh38' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000019.10:g.11089362_11089364del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'19' -p39 -sS'ref' -p40 -S'ACTC' -p41 -sS'pos' -p42 -S'11089355' -p43 -sS'alt' -p44 -S'A' -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000019.9:g.11200038_11200040del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'ACTC' -p50 -sg42 -S'11200031' -p51 -sg44 -g45 -sssS'hg38' -p52 -(dp53 -g34 -S'NC_000019.10:g.11089362_11089364del' -p54 -sg36 -(dp55 -g38 -S'chr19' -p56 -sg40 -S'ACTC' -p57 -sg42 -S'11089355' -p58 -sg44 -g45 -sssS'hg19' -p59 -(dp60 -g34 -S'NC_000019.9:g.11200038_11200040del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'ACTC' -p63 -sg42 -S'11200031' -p64 -sg44 -g45 -sssssS'flag' -p65 -S'gene_variant' -p66 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant6.txt b/VariantValidator/testing/testOutputsMasterITS/variant6.txt deleted file mode 100644 index 26f5d812..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant6.txt +++ /dev/null @@ -1,1218 +0,0 @@ -(dp0 -S'NM_000548.4:c.138+821del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p7 -aS'RefSeqGene record not available' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'TSC2' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000539.2(LRG_487p1):p.?' -p19 -sS'slr' -p20 -S'NP_000539.2:p.?' -p21 -ssS'submitted_variant' -p22 -S'NC_000016.9:g.2099572TC>T' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000016.9(NM_000548.4):c.138+821del' -p25 -sS'HGVS_LRG_variant' -p26 -g4 -sS'HGVS_transcript_variant' -p27 -S'NM_000548.4:c.138+821del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'GRCh38' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000016.10:g.2049574del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'16' -p39 -sS'ref' -p40 -S'TC' -p41 -sS'pos' -p42 -S'2049571' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000016.9:g.2099575del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'TC' -p50 -sg42 -S'2099572' -p51 -sg44 -g45 -sssS'hg38' -p52 -(dp53 -g34 -S'NC_000016.10:g.2049574del' -p54 -sg36 -(dp55 -g38 -S'chr16' -p56 -sg40 -S'TC' -p57 -sg42 -S'2049571' -p58 -sg44 -g45 -sssS'hg19' -p59 -(dp60 -g34 -S'NC_000016.9:g.2099575del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'TC' -p63 -sg42 -S'2099572' -p64 -sg44 -g45 -sssssS'NM_001077183.2:c.138+821del' -p65 -(dp66 -g3 -g4 -sg5 -(lp67 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p68 -aS'RefSeqGene record not available' -p69 -asg9 -g4 -sg10 -(lp70 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA -p71 -sg14 -S'TSC2' -p72 -sg16 -(dp73 -g18 -S'NP_001070651.1:p.?' -p74 -sg20 -S'NP_001070651.1:p.?' -p75 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001077183.2):c.138+821del' -p76 -sg26 -g4 -sg27 -S'NM_001077183.2:c.138+821del' -p77 -sg29 -g4 -sg30 -(dp78 -g32 -(dp79 -g34 -S'NC_000016.10:g.2049574del' -p80 -sg36 -(dp81 -g38 -g39 -sg40 -S'TC' -p82 -sg42 -S'2049571' -p83 -sg44 -g45 -sssg46 -(dp84 -g34 -S'NC_000016.9:g.2099575del' -p85 -sg36 -(dp86 -g38 -g39 -sg40 -S'TC' -p87 -sg42 -S'2099572' -p88 -sg44 -g45 -sssg52 -(dp89 -g34 -S'NC_000016.10:g.2049574del' -p90 -sg36 -(dp91 -g38 -g56 -sg40 -S'TC' -p92 -sg42 -S'2049571' -p93 -sg44 -g45 -sssg59 -(dp94 -g34 -S'NC_000016.9:g.2099575del' -p95 -sg36 -(dp96 -g38 -g56 -sg40 -S'TC' -p97 -sg42 -S'2099572' -p98 -sg44 -g45 -sssssS'NM_001318831.1:c.-89+821del' -p99 -(dp100 -g3 -g4 -sg5 -(lp101 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p102 -aS'RefSeqGene record not available' -p103 -asg9 -g4 -sg10 -(lp104 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA -p105 -sg14 -S'TSC2' -p106 -sg16 -(dp107 -g18 -S'NP_001305760.1:p.?' -p108 -sg20 -S'NP_001305760.1:p.?' -p109 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001318831.1):c.-89+821del' -p110 -sg26 -g4 -sg27 -S'NM_001318831.1:c.-89+821del' -p111 -sg29 -g4 -sg30 -(dp112 -g32 -(dp113 -g34 -S'NC_000016.10:g.2049574del' -p114 -sg36 -(dp115 -g38 -g39 -sg40 -S'TC' -p116 -sg42 -S'2049571' -p117 -sg44 -g45 -sssg46 -(dp118 -g34 -S'NC_000016.9:g.2099575del' -p119 -sg36 -(dp120 -g38 -g39 -sg40 -S'TC' -p121 -sg42 -S'2099572' -p122 -sg44 -g45 -sssg52 -(dp123 -g34 -S'NC_000016.10:g.2049574del' -p124 -sg36 -(dp125 -g38 -g56 -sg40 -S'TC' -p126 -sg42 -S'2049571' -p127 -sg44 -g45 -sssg59 -(dp128 -g34 -S'NC_000016.9:g.2099575del' -p129 -sg36 -(dp130 -g38 -g56 -sg40 -S'TC' -p131 -sg42 -S'2099572' -p132 -sg44 -g45 -sssssS'NM_000548.3:c.138+821del' -p133 -(dp134 -g3 -S'LRG_487t1:c.138+821del' -p135 -sg5 -(lp136 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p137 -aS'A more recent version of the selected reference sequence NM_000548.3 is available (NM_000548.4)' -p138 -aS'NM_000548.4:c.138+821delC MUST be fully validated prior to use in reports' -p139 -aS'select_variants=NM_000548.4:c.138+821del' -p140 -asg9 -S'NG_005895.1(NM_000548.3):c.138+821del' -p141 -sg10 -(lp142 -sg12 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 1, mRNA -p143 -sg14 -S'TSC2' -p144 -sg16 -(dp145 -g18 -S'NP_000539.2(LRG_487p1):p.?' -p146 -sg20 -S'NP_000539.2:p.?' -p147 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_000548.3):c.138+821del' -p148 -sg26 -S'LRG_487:g.5269del' -p149 -sg27 -S'NM_000548.3:c.138+821del' -p150 -sg29 -S'NG_005895.1:g.5269del' -p151 -sg30 -(dp152 -g59 -(dp153 -g34 -S'NC_000016.9:g.2099575del' -p154 -sg36 -(dp155 -g38 -g56 -sg40 -S'TC' -p156 -sg42 -S'2099572' -p157 -sg44 -g45 -sssg46 -(dp158 -g34 -S'NC_000016.9:g.2099575del' -p159 -sg36 -(dp160 -g38 -g39 -sg40 -S'TC' -p161 -sg42 -S'2099572' -p162 -sg44 -g45 -sssssS'NM_001114382.1:c.138+821del' -p163 -(dp164 -g3 -g4 -sg5 -(lp165 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p166 -aS'A more recent version of the selected reference sequence NM_001114382.1 is available (NM_001114382.2)' -p167 -aS'NM_001114382.2:c.138+821delC MUST be fully validated prior to use in reports' -p168 -aS'select_variants=NM_001114382.2:c.138+821del' -p169 -aS'RefSeqGene record not available' -p170 -asg9 -g4 -sg10 -(lp171 -sg12 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 5, mRNA -p172 -sg14 -S'TSC2' -p173 -sg16 -(dp174 -g18 -S'NP_001107854.1:p.?' -p175 -sg20 -S'NP_001107854.1:p.?' -p176 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001114382.1):c.138+821del' -p177 -sg26 -g4 -sg27 -S'NM_001114382.1:c.138+821del' -p178 -sg29 -g4 -sg30 -(dp179 -g59 -(dp180 -g34 -S'NC_000016.9:g.2099575del' -p181 -sg36 -(dp182 -g38 -g56 -sg40 -S'TC' -p183 -sg42 -S'2099572' -p184 -sg44 -g45 -sssg46 -(dp185 -g34 -S'NC_000016.9:g.2099575del' -p186 -sg36 -(dp187 -g38 -g39 -sg40 -S'TC' -p188 -sg42 -S'2099572' -p189 -sg44 -g45 -sssssS'NM_001318832.1:c.171+821del' -p190 -(dp191 -g3 -g4 -sg5 -(lp192 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p193 -aS'RefSeqGene record not available' -p194 -asg9 -g4 -sg10 -(lp195 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA -p196 -sg14 -S'TSC2' -p197 -sg16 -(dp198 -g18 -S'NP_001305761.1:p.?' -p199 -sg20 -S'NP_001305761.1:p.?' -p200 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001318832.1):c.171+821del' -p201 -sg26 -g4 -sg27 -S'NM_001318832.1:c.171+821del' -p202 -sg29 -g4 -sg30 -(dp203 -g32 -(dp204 -g34 -S'NC_000016.10:g.2049574del' -p205 -sg36 -(dp206 -g38 -g39 -sg40 -S'TC' -p207 -sg42 -S'2049571' -p208 -sg44 -g45 -sssg46 -(dp209 -g34 -S'NC_000016.9:g.2099575del' -p210 -sg36 -(dp211 -g38 -g39 -sg40 -S'TC' -p212 -sg42 -S'2099572' -p213 -sg44 -g45 -sssg52 -(dp214 -g34 -S'NC_000016.10:g.2049574del' -p215 -sg36 -(dp216 -g38 -g56 -sg40 -S'TC' -p217 -sg42 -S'2049571' -p218 -sg44 -g45 -sssg59 -(dp219 -g34 -S'NC_000016.9:g.2099575del' -p220 -sg36 -(dp221 -g38 -g56 -sg40 -S'TC' -p222 -sg42 -S'2099572' -p223 -sg44 -g45 -sssssS'NM_001363528.1:c.138+821del' -p224 -(dp225 -g3 -g4 -sg5 -(lp226 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p227 -aS'RefSeqGene record not available' -p228 -asg9 -g4 -sg10 -(lp229 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 10, mRNA -p230 -sg14 -S'TSC2' -p231 -sg16 -(dp232 -g18 -S'NP_001350457.1:p.?' -p233 -sg20 -S'NP_001350457.1:p.?' -p234 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001363528.1):c.138+821del' -p235 -sg26 -g4 -sg27 -S'NM_001363528.1:c.138+821del' -p236 -sg29 -g4 -sg30 -(dp237 -g59 -(dp238 -g34 -S'NC_000016.9:g.2099575del' -p239 -sg36 -(dp240 -g38 -g56 -sg40 -S'TC' -p241 -sg42 -S'2099572' -p242 -sg44 -g45 -sssg46 -(dp243 -g34 -S'NC_000016.9:g.2099575del' -p244 -sg36 -(dp245 -g38 -g39 -sg40 -S'TC' -p246 -sg42 -S'2099572' -p247 -sg44 -g45 -sssssS'flag' -p248 -S'gene_variant' -p249 -sS'NM_021055.2:c.138+821del' -p250 -(dp251 -g3 -g4 -sg5 -(lp252 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p253 -aS'RefSeqGene record not available' -p254 -asg9 -g4 -sg10 -(lp255 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 2, mRNA -p256 -sg14 -S'TSC2' -p257 -sg16 -(dp258 -g18 -S'NP_066399.2:p.?' -p259 -sg20 -S'NP_066399.2:p.?' -p260 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_021055.2):c.138+821del' -p261 -sg26 -g4 -sg27 -S'NM_021055.2:c.138+821del' -p262 -sg29 -g4 -sg30 -(dp263 -g59 -(dp264 -g34 -S'NC_000016.9:g.2099575del' -p265 -sg36 -(dp266 -g38 -g56 -sg40 -S'TC' -p267 -sg42 -S'2099572' -p268 -sg44 -g45 -sssg46 -(dp269 -g34 -S'NC_000016.9:g.2099575del' -p270 -sg36 -(dp271 -g38 -g39 -sg40 -S'TC' -p272 -sg42 -S'2099572' -p273 -sg44 -g45 -sssssS'NM_001077183.1:c.138+821del' -p274 -(dp275 -g3 -g4 -sg5 -(lp276 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p277 -aS'A more recent version of the selected reference sequence NM_001077183.1 is available (NM_001077183.2)' -p278 -aS'NM_001077183.2:c.138+821delC MUST be fully validated prior to use in reports' -p279 -aS'select_variants=NM_001077183.2:c.138+821del' -p280 -aS'RefSeqGene record not available' -p281 -asg9 -g4 -sg10 -(lp282 -sg12 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 4, mRNA -p283 -sg14 -S'TSC2' -p284 -sg16 -(dp285 -g18 -S'NP_001070651.1:p.?' -p286 -sg20 -S'NP_001070651.1:p.?' -p287 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001077183.1):c.138+821del' -p288 -sg26 -g4 -sg27 -S'NM_001077183.1:c.138+821del' -p289 -sg29 -g4 -sg30 -(dp290 -g59 -(dp291 -g34 -S'NC_000016.9:g.2099575del' -p292 -sg36 -(dp293 -g38 -g56 -sg40 -S'TC' -p294 -sg42 -S'2099572' -p295 -sg44 -g45 -sssg46 -(dp296 -g34 -S'NC_000016.9:g.2099575del' -p297 -sg36 -(dp298 -g38 -g39 -sg40 -S'TC' -p299 -sg42 -S'2099572' -p300 -sg44 -g45 -sssssS'NM_001318827.1:c.138+821del' -p301 -(dp302 -g3 -g4 -sg5 -(lp303 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p304 -aS'RefSeqGene record not available' -p305 -asg9 -g4 -sg10 -(lp306 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA -p307 -sg14 -S'TSC2' -p308 -sg16 -(dp309 -g18 -S'NP_001305756.1:p.?' -p310 -sg20 -S'NP_001305756.1:p.?' -p311 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001318827.1):c.138+821del' -p312 -sg26 -g4 -sg27 -S'NM_001318827.1:c.138+821del' -p313 -sg29 -g4 -sg30 -(dp314 -g32 -(dp315 -g34 -S'NC_000016.10:g.2049574del' -p316 -sg36 -(dp317 -g38 -g39 -sg40 -S'TC' -p318 -sg42 -S'2049571' -p319 -sg44 -g45 -sssg46 -(dp320 -g34 -S'NC_000016.9:g.2099575del' -p321 -sg36 -(dp322 -g38 -g39 -sg40 -S'TC' -p323 -sg42 -S'2099572' -p324 -sg44 -g45 -sssg52 -(dp325 -g34 -S'NC_000016.10:g.2049574del' -p326 -sg36 -(dp327 -g38 -g56 -sg40 -S'TC' -p328 -sg42 -S'2049571' -p329 -sg44 -g45 -sssg59 -(dp330 -g34 -S'NC_000016.9:g.2099575del' -p331 -sg36 -(dp332 -g38 -g56 -sg40 -S'TC' -p333 -sg42 -S'2099572' -p334 -sg44 -g45 -sssssS'NM_001114382.2:c.138+821del' -p335 -(dp336 -g3 -g4 -sg5 -(lp337 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p338 -aS'RefSeqGene record not available' -p339 -asg9 -g4 -sg10 -(lp340 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA -p341 -sg14 -S'TSC2' -p342 -sg16 -(dp343 -g18 -S'NP_001107854.1:p.?' -p344 -sg20 -S'NP_001107854.1:p.?' -p345 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001114382.2):c.138+821del' -p346 -sg26 -g4 -sg27 -S'NM_001114382.2:c.138+821del' -p347 -sg29 -g4 -sg30 -(dp348 -g32 -(dp349 -g34 -S'NC_000016.10:g.2049574del' -p350 -sg36 -(dp351 -g38 -g39 -sg40 -S'TC' -p352 -sg42 -S'2049571' -p353 -sg44 -g45 -sssg46 -(dp354 -g34 -S'NC_000016.9:g.2099575del' -p355 -sg36 -(dp356 -g38 -g39 -sg40 -S'TC' -p357 -sg42 -S'2099572' -p358 -sg44 -g45 -sssg52 -(dp359 -g34 -S'NC_000016.10:g.2049574del' -p360 -sg36 -(dp361 -g38 -g56 -sg40 -S'TC' -p362 -sg42 -S'2049571' -p363 -sg44 -g45 -sssg59 -(dp364 -g34 -S'NC_000016.9:g.2099575del' -p365 -sg36 -(dp366 -g38 -g56 -sg40 -S'TC' -p367 -sg42 -S'2099572' -p368 -sg44 -g45 -sssssS'NM_001318829.1:c.-9-826del' -p369 -(dp370 -g3 -g4 -sg5 -(lp371 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p372 -aS'RefSeqGene record not available' -p373 -asg9 -g4 -sg10 -(lp374 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA -p375 -sg14 -S'TSC2' -p376 -sg16 -(dp377 -g18 -S'NP_001305758.1:p.?' -p378 -sg20 -S'NP_001305758.1:p.?' -p379 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001318829.1):c.-9-826del' -p380 -sg26 -g4 -sg27 -S'NM_001318829.1:c.-9-826del' -p381 -sg29 -g4 -sg30 -(dp382 -g32 -(dp383 -g34 -S'NC_000016.10:g.2049574del' -p384 -sg36 -(dp385 -g38 -g39 -sg40 -S'TC' -p386 -sg42 -S'2049571' -p387 -sg44 -g45 -sssg46 -(dp388 -g34 -S'NC_000016.9:g.2099575del' -p389 -sg36 -(dp390 -g38 -g39 -sg40 -S'TC' -p391 -sg42 -S'2099572' -p392 -sg44 -g45 -sssg52 -(dp393 -g34 -S'NC_000016.10:g.2049574del' -p394 -sg36 -(dp395 -g38 -g56 -sg40 -S'TC' -p396 -sg42 -S'2049571' -p397 -sg44 -g45 -sssg59 -(dp398 -g34 -S'NC_000016.9:g.2099575del' -p399 -sg36 -(dp400 -g38 -g56 -sg40 -S'TC' -p401 -sg42 -S'2099572' -p402 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant60.txt b/VariantValidator/testing/testOutputsMasterITS/variant60.txt deleted file mode 100644 index 9d853afe..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant60.txt +++ /dev/null @@ -1,140 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_206933.2:c.6317C>G' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -sS'RefSeqGene_context_intronic_sequence' -p9 -g6 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens usherin (USH2A), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'USH2A' -p15 -sS'HGVS_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_996816.2:p.(Thr2106Arg)' -p19 -sS'slr' -p20 -S'NP_996816.2:p.(T2106R)' -p21 -ssS'submitted_variant' -p22 -S'NM_206933.2:c.6317C>G' -p23 -sS'genome_context_intronic_sequence' -p24 -g6 -sS'HGVS_LRG_variant' -p25 -g6 -sS'HGVS_transcript_variant' -p26 -S'NM_206933.2:c.6317C>G' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_009497.1:g.381958C>G' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000001.10:g.216219781A>C' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -S'A' -p41 -sS'pos' -p42 -S'216219781' -p43 -sS'alt' -p44 -VC -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000001.11:g.216046439A>C' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'216046439' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000001.10:g.216219781A>C' -p53 -sg36 -(dp54 -g38 -S'1' -p55 -sg40 -g41 -sg42 -S'216219781' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000001.11:g.216046439A>C' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'216046439' -p61 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant61.txt b/VariantValidator/testing/testOutputsMasterITS/variant61.txt deleted file mode 100644 index bd3bd4d9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant61.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'NM_000059.3:c.7397C=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_293t1:c.7397C=' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'RefSeqGene_context_intronic_sequence' -p7 -S'' -p8 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens BRCA2, DNA repair associated (BRCA2), mRNA -p12 -sS'gene_symbol' -p13 -S'BRCA2' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000050.2(LRG_293p1):p.(Ala2466=)' -p18 -sS'slr' -p19 -S'NP_000050.2:p.(A2466=)' -p20 -ssS'submitted_variant' -p21 -S'NC_000013.10:g.32929387T>C' -p22 -sS'genome_context_intronic_sequence' -p23 -g8 -sS'HGVS_LRG_variant' -p24 -S'LRG_293:g.44771C=' -p25 -sS'HGVS_transcript_variant' -p26 -S'NM_000059.3:c.7397C=' -p27 -sS'HGVS_RefSeqGene_variant' -p28 -S'NG_012772.3:g.44771C=' -p29 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000013.10:g.32929387T>C' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr13' -p39 -sS'ref' -p40 -S'T' -p41 -sS'pos' -p42 -S'32929387' -p43 -sS'alt' -p44 -S'C' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000013.11:g.32355250T>C' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'32355250' -p50 -sg44 -g45 -sssS'GRCh37' -p51 -(dp52 -g34 -S'NC_000013.10:g.32929387T>C' -p53 -sg36 -(dp54 -g38 -S'13' -p55 -sg40 -g41 -sg42 -S'32929387' -p56 -sg44 -g45 -sssS'GRCh38' -p57 -(dp58 -g34 -S'NC_000013.11:g.32355250T>C' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'32355250' -p61 -sg44 -g45 -sssssS'flag' -p62 -S'gene_variant' -p63 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant62.txt b/VariantValidator/testing/testOutputsMasterITS/variant62.txt deleted file mode 100644 index c50cf113..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant62.txt +++ /dev/null @@ -1,114 +0,0 @@ -(dp0 -S'NM_015102.3:c.2818-2T>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_015102.3 is available (NM_015102.4)' -p7 -aS'NM_015102.4:c.2818-2T>A MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_015102.4:c.2818-2T>A' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'NG_011724.2(NM_015102.3):c.2818-2A=' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens nephronophthisis 4 (NPHP4), mRNA -p15 -sS'gene_symbol' -p16 -S'NPHP4' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_055917.1:p.?' -p21 -sS'slr' -p22 -S'NP_055917.1:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_015102.3:c.2818-2T>A' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000001.10(NM_015102.3):c.2818-2T>A' -p27 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_015102.3:c.2818-2T>A' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_011724.2:g.122370A=' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000001.10:g.5935162A>T' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr1' -p42 -sS'ref' -p43 -VA -p44 -sS'pos' -p45 -S'5935162' -p46 -sS'alt' -p47 -VT -p48 -sssS'GRCh37' -p49 -(dp50 -g37 -S'NC_000001.10:g.5935162A>T' -p51 -sg39 -(dp52 -g41 -S'1' -p53 -sg43 -g44 -sg45 -S'5935162' -p54 -sg47 -g48 -sssssS'flag' -p55 -S'gene_variant' -p56 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant63.txt b/VariantValidator/testing/testOutputsMasterITS/variant63.txt deleted file mode 100644 index cd8f6313..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant63.txt +++ /dev/null @@ -1,384 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001042544.1:c.3233_3235=' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG' -p9 -aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' -p10 -aS'NM_001042544.1:c.3233_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA -p18 -sS'gene_symbol' -p19 -S'LTBP4' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_001036009.1:p.(Gln1078=)' -p24 -sS'slr' -p25 -S'NP_001036009.1:p.(Q1078=)' -p26 -ssS'submitted_variant' -p27 -S'19-41123094-G-GG' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_001042544.1:c.3233_3235=' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -S'NG_021201.1:g.29022_29024=' -p34 -sS'primary_assembly_loci' -p35 -(dp36 -S'GRCh38' -p37 -(dp38 -S'HGVS_genomic_description' -p39 -S'NC_000019.10:g.40617187_40617189=' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'19' -p44 -sS'ref' -p45 -VAGG -p46 -sS'pos' -p47 -S'40617187' -p48 -sS'alt' -p49 -g46 -sssS'GRCh37' -p50 -(dp51 -g39 -S'NC_000019.9:g.41123095dup' -p52 -sg41 -(dp53 -g43 -g44 -sg45 -S'G' -p54 -sg47 -S'41123094' -p55 -sg49 -VGG -p56 -sssS'hg38' -p57 -(dp58 -g39 -S'NC_000019.10:g.40617187_40617189=' -p59 -sg41 -(dp60 -g43 -S'chr19' -p61 -sg45 -g46 -sg47 -S'40617187' -p62 -sg49 -g46 -sssS'hg19' -p63 -(dp64 -g39 -S'NC_000019.9:g.41123095dup' -p65 -sg41 -(dp66 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p67 -sg49 -VGG -p68 -sssssS'NM_001042545.1:c.3032_3034=' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG' -p72 -aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' -p73 -aS'NM_001042545.1:c.3032_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p74 -aS'Caution should be used when reporting the displayed variant descriptions' -p75 -aS'If you are unsure, please contact admin' -p76 -asg14 -g6 -sg15 -(lp77 -sg17 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA -p78 -sg19 -S'LTBP4' -p79 -sg21 -(dp80 -g23 -S'NP_001036010.1:p.(Gln1011=)' -p81 -sg25 -S'NP_001036010.1:p.(Q1011=)' -p82 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_001042545.1:c.3032_3034=' -p83 -sg33 -S'NG_021201.1:g.29022_29024=' -p84 -sg35 -(dp85 -g37 -(dp86 -g39 -S'NC_000019.10:g.40617187_40617189=' -p87 -sg41 -(dp88 -g43 -g44 -sg45 -VAGG -p89 -sg47 -S'40617187' -p90 -sg49 -g89 -sssg50 -(dp91 -g39 -S'NC_000019.9:g.41123095dup' -p92 -sg41 -(dp93 -g43 -g44 -sg45 -g54 -sg47 -S'41123094' -p94 -sg49 -VGG -p95 -sssg57 -(dp96 -g39 -S'NC_000019.10:g.40617187_40617189=' -p97 -sg41 -(dp98 -g43 -g61 -sg45 -g89 -sg47 -S'40617187' -p99 -sg49 -g89 -sssg63 -(dp100 -g39 -S'NC_000019.9:g.41123095dup' -p101 -sg41 -(dp102 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p103 -sg49 -VGG -p104 -sssssS'NM_003573.2:c.3122_3124=' -p105 -(dp106 -g5 -g6 -sg7 -(lp107 -S'NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG' -p108 -aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' -p109 -aS'NM_003573.2:c.3122_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p110 -aS'Caution should be used when reporting the displayed variant descriptions' -p111 -aS'If you are unsure, please contact admin' -p112 -asg14 -g6 -sg15 -(lp113 -sg17 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA -p114 -sg19 -S'LTBP4' -p115 -sg21 -(dp116 -g23 -S'NP_003564.2:p.(Gln1041=)' -p117 -sg25 -S'NP_003564.2:p.(Q1041=)' -p118 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_003573.2:c.3122_3124=' -p119 -sg33 -S'NG_021201.1:g.29022_29024=' -p120 -sg35 -(dp121 -g37 -(dp122 -g39 -S'NC_000019.10:g.40617187_40617189=' -p123 -sg41 -(dp124 -g43 -g44 -sg45 -VAGG -p125 -sg47 -S'40617187' -p126 -sg49 -g125 -sssg50 -(dp127 -g39 -S'NC_000019.9:g.41123095dup' -p128 -sg41 -(dp129 -g43 -g44 -sg45 -g54 -sg47 -S'41123094' -p130 -sg49 -VGG -p131 -sssg57 -(dp132 -g39 -S'NC_000019.10:g.40617187_40617189=' -p133 -sg41 -(dp134 -g43 -g61 -sg45 -g125 -sg47 -S'40617187' -p135 -sg49 -g125 -sssg63 -(dp136 -g39 -S'NC_000019.9:g.41123095dup' -p137 -sg41 -(dp138 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p139 -sg49 -VGG -p140 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant64.txt b/VariantValidator/testing/testOutputsMasterITS/variant64.txt deleted file mode 100644 index 88c3408f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant64.txt +++ /dev/null @@ -1,449 +0,0 @@ -(dp0 -S'NM_014249.2:c.946_949=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' -p7 -aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p8 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.2' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p12 -aS'NM_014249.3:c.946_949GACC= MUST be fully validated prior to use in reports' -p13 -aS'select_variants=NM_014249.3:c.946_949=' -p14 -aS'RefSeqGene record not available' -p15 -asS'RefSeqGene_context_intronic_sequence' -p16 -g4 -sS'alt_genomic_loci' -p17 -(lp18 -sS'transcript_description' -p19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p20 -sS'gene_symbol' -p21 -S'NR2E3' -p22 -sS'HGVS_predicted_protein_consequence' -p23 -(dp24 -S'tlr' -p25 -S'NP_055064.1:p.(Asp316=)' -p26 -sS'slr' -p27 -S'NP_055064.1:p.(D316=)' -p28 -ssS'submitted_variant' -p29 -S'15-72105928-AC-A' -p30 -sS'genome_context_intronic_sequence' -p31 -g4 -sS'HGVS_LRG_variant' -p32 -g4 -sS'HGVS_transcript_variant' -p33 -S'NM_014249.2:c.946_949=' -p34 -sS'HGVS_RefSeqGene_variant' -p35 -g4 -sS'primary_assembly_loci' -p36 -(dp37 -S'hg19' -p38 -(dp39 -S'HGVS_genomic_description' -p40 -S'NC_000015.9:g.72105933del' -p41 -sS'vcf' -p42 -(dp43 -S'chr' -p44 -S'chr15' -p45 -sS'ref' -p46 -S'AC' -p47 -sS'pos' -p48 -S'72105928' -p49 -sS'alt' -p50 -S'A' -p51 -sssS'GRCh37' -p52 -(dp53 -g40 -S'NC_000015.9:g.72105933del' -p54 -sg42 -(dp55 -g44 -S'15' -p56 -sg46 -S'AC' -p57 -sg48 -S'72105928' -p58 -sg50 -g51 -sssssS'flag' -p59 -S'gene_variant' -p60 -sS'NM_014249.3:c.946_949=' -p61 -(dp62 -g3 -g4 -sg5 -(lp63 -S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' -p64 -aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p65 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.3' -p66 -aS'Caution should be used when reporting the displayed variant descriptions' -p67 -aS'If you are unsure, please contact admin' -p68 -asg16 -g4 -sg17 -(lp69 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p70 -sg21 -S'NR2E3' -p71 -sg23 -(dp72 -g25 -S'NP_055064.1:p.(Asp316=)' -p73 -sg27 -S'NP_055064.1:p.(D316=)' -p74 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_014249.3:c.946_949=' -p75 -sg35 -S'NG_009113.1:g.8034_8037=' -p76 -sg36 -(dp77 -S'GRCh38' -p78 -(dp79 -g40 -S'NC_000015.10:g.71813586_71813591=' -p80 -sg42 -(dp81 -g44 -g56 -sg46 -VGGACCC -p82 -sg48 -S'71813586' -p83 -sg50 -g82 -sssg52 -(dp84 -g40 -S'NC_000015.9:g.72105933del' -p85 -sg42 -(dp86 -g44 -g56 -sg46 -S'AC' -p87 -sg48 -S'72105928' -p88 -sg50 -g51 -sssS'hg38' -p89 -(dp90 -g40 -S'NC_000015.10:g.71813586_71813591=' -p91 -sg42 -(dp92 -g44 -g45 -sg46 -g82 -sg48 -S'71813586' -p93 -sg50 -g82 -sssg38 -(dp94 -g40 -S'NC_000015.9:g.72105933del' -p95 -sg42 -(dp96 -g44 -g45 -sg46 -S'AC' -p97 -sg48 -S'72105928' -p98 -sg50 -g51 -sssssS'NM_016346.2:c.946_949=' -p99 -(dp100 -g3 -g4 -sg5 -(lp101 -S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' -p102 -aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p103 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.2' -p104 -aS'Caution should be used when reporting the displayed variant descriptions' -p105 -aS'If you are unsure, please contact admin' -p106 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p107 -aS'NM_016346.3:c.946_949GACC= MUST be fully validated prior to use in reports' -p108 -aS'select_variants=NM_016346.3:c.946_949=' -p109 -aS'RefSeqGene record not available' -p110 -asg16 -g4 -sg17 -(lp111 -sg19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p112 -sg21 -S'NR2E3' -p113 -sg23 -(dp114 -g25 -S'NP_057430.1:p.(Asp316=)' -p115 -sg27 -S'NP_057430.1:p.(D316=)' -p116 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.2:c.946_949=' -p117 -sg35 -g4 -sg36 -(dp118 -g38 -(dp119 -g40 -S'NC_000015.9:g.72105933del' -p120 -sg42 -(dp121 -g44 -g45 -sg46 -S'AC' -p122 -sg48 -S'72105928' -p123 -sg50 -g51 -sssg52 -(dp124 -g40 -S'NC_000015.9:g.72105933del' -p125 -sg42 -(dp126 -g44 -g56 -sg46 -S'AC' -p127 -sg48 -S'72105928' -p128 -sg50 -g51 -sssssS'NM_016346.3:c.946_949=' -p129 -(dp130 -g3 -g4 -sg5 -(lp131 -S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' -p132 -aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p133 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.3' -p134 -aS'Caution should be used when reporting the displayed variant descriptions' -p135 -aS'If you are unsure, please contact admin' -p136 -aS'RefSeqGene record not available' -p137 -asg16 -g4 -sg17 -(lp138 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p139 -sg21 -S'NR2E3' -p140 -sg23 -(dp141 -g25 -S'NP_057430.1:p.(Asp316=)' -p142 -sg27 -S'NP_057430.1:p.(D316=)' -p143 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.3:c.946_949=' -p144 -sg35 -g4 -sg36 -(dp145 -g78 -(dp146 -g40 -S'NC_000015.10:g.71813587_71813590=' -p147 -sg42 -(dp148 -g44 -g56 -sg46 -VGACC -p149 -sg48 -S'71813587' -p150 -sg50 -g149 -sssg52 -(dp151 -g40 -S'NC_000015.9:g.72105933del' -p152 -sg42 -(dp153 -g44 -g56 -sg46 -S'AC' -p154 -sg48 -S'72105928' -p155 -sg50 -g51 -sssg89 -(dp156 -g40 -S'NC_000015.10:g.71813587_71813590=' -p157 -sg42 -(dp158 -g44 -g45 -sg46 -g149 -sg48 -S'71813587' -p159 -sg50 -g149 -sssg38 -(dp160 -g40 -S'NC_000015.9:g.72105933del' -p161 -sg42 -(dp162 -g44 -g45 -sg46 -S'AC' -p163 -sg48 -S'72105928' -p164 -sg50 -g51 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant65.txt b/VariantValidator/testing/testOutputsMasterITS/variant65.txt deleted file mode 100644 index cf0970ce..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant65.txt +++ /dev/null @@ -1,173 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.126_128=' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_93t1:c.126_128=' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000012.11:g.122064773CCCGCCA>C automapped to NC_000012.11:g.122064785_122064790del' -p9 -aS'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p10 -aS'Genome position NC_000012.11:g.122064780 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -S'' -p15 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'GRCh37' -p19 -(dp20 -S'HGVS_genomic_description' -p21 -S'NW_004504303.2:g.302883_302888del' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG1595_PATCH' -p26 -sS'ref' -p27 -S'CCCGCCA' -p28 -sS'pos' -p29 -S'302871' -p30 -sS'alt' -p31 -S'C' -p32 -sssasS'transcript_description' -p33 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p34 -sS'gene_symbol' -p35 -S'ORAI1' -p36 -sS'HGVS_predicted_protein_consequence' -p37 -(dp38 -S'tlr' -p39 -S'NP_116179.2(LRG_93p1):p.(Ala42=)' -p40 -sS'slr' -p41 -S'NP_116179.2:p.(A42=)' -p42 -ssS'submitted_variant' -p43 -S'12-122064773-CCCGCCA-C' -p44 -sS'genome_context_intronic_sequence' -p45 -g15 -sS'HGVS_LRG_variant' -p46 -S'LRG_93:g.5299_5301=' -p47 -sS'HGVS_transcript_variant' -p48 -S'NM_032790.3:c.126_128=' -p49 -sS'HGVS_RefSeqGene_variant' -p50 -S'NG_007500.1:g.5299_5301=' -p51 -sS'primary_assembly_loci' -p52 -(dp53 -S'hg19' -p54 -(dp55 -g21 -S'NC_000012.11:g.122064785_122064790del' -p56 -sg23 -(dp57 -g25 -S'chr12' -p58 -sg27 -S'CCCGCCA' -p59 -sg29 -S'122064773' -p60 -sg31 -g32 -sssS'hg38' -p61 -(dp62 -g21 -S'NC_000012.12:g.121626873_121626875=' -p63 -sg23 -(dp64 -g25 -g58 -sg27 -VCCC -p65 -sg29 -S'121626873' -p66 -sg31 -g65 -sssg19 -(dp67 -g21 -S'NC_000012.11:g.122064785_122064790del' -p68 -sg23 -(dp69 -g25 -S'12' -p70 -sg27 -S'CCCGCCA' -p71 -sg29 -S'122064773' -p72 -sg31 -g32 -sssS'GRCh38' -p73 -(dp74 -g21 -S'NC_000012.12:g.121626873_121626875=' -p75 -sg23 -(dp76 -g25 -g70 -sg27 -g65 -sg29 -S'121626873' -p77 -sg31 -g65 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant66.txt b/VariantValidator/testing/testOutputsMasterITS/variant66.txt deleted file mode 100644 index ed5dfbd0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant66.txt +++ /dev/null @@ -1,174 +0,0 @@ -(dp0 -S'NM_032790.3:c.132_137dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_93t1:c.132_137dup' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000012.11:g.122064774CCGCCA>CCGCCA automapped to NC_000012.11:g.122064774_122064779CCGCCA=' -p7 -aS'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p8 -aS'NC_000012.11:g.122064773_122064779 contains 6 genomic base(s) that fail to align to transcript NM_032790.3' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -S'' -p13 -sS'alt_genomic_loci' -p14 -(lp15 -(dp16 -S'GRCh37' -p17 -(dp18 -S'HGVS_genomic_description' -p19 -S'NW_004504303.2:g.302868_302887=' -p20 -sS'vcf' -p21 -(dp22 -S'chr' -p23 -S'HG1595_PATCH' -p24 -sS'ref' -p25 -S'GGCCCCGCCACCGCCACCGC' -p26 -sS'pos' -p27 -S'302868' -p28 -sS'alt' -p29 -g26 -sssasS'transcript_description' -p30 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p31 -sS'gene_symbol' -p32 -S'ORAI1' -p33 -sS'HGVS_predicted_protein_consequence' -p34 -(dp35 -S'tlr' -p36 -S'NP_116179.2(LRG_93p1):p.(Pro46_Pro47dup)' -p37 -sS'slr' -p38 -S'NP_116179.2:p.(P46_P47dup)' -p39 -ssS'submitted_variant' -p40 -S'12-122064774-CCGCCA-CCGCCA' -p41 -sS'genome_context_intronic_sequence' -p42 -g13 -sS'HGVS_LRG_variant' -p43 -S'LRG_93:g.5305_5310dup' -p44 -sS'HGVS_transcript_variant' -p45 -S'NM_032790.3:c.132_137dup' -p46 -sS'HGVS_RefSeqGene_variant' -p47 -S'NG_007500.1:g.5305_5310dup' -p48 -sS'primary_assembly_loci' -p49 -(dp50 -S'hg19' -p51 -(dp52 -g19 -S'NC_000012.11:g.122064770_122064789=' -p53 -sg21 -(dp54 -g23 -S'chr12' -p55 -sg25 -S'GGCCCCGCCACCGCCACCGC' -p56 -sg27 -S'122064770' -p57 -sg29 -g56 -sssS'hg38' -p58 -(dp59 -g19 -S'NC_000012.12:g.121626879_121626884dup' -p60 -sg21 -(dp61 -g23 -g55 -sg25 -S'CCGCCA' -p62 -sg27 -S'121626874' -p63 -sg29 -VCCGCCACCGCCA -p64 -sssg17 -(dp65 -g19 -S'NC_000012.11:g.122064770_122064789=' -p66 -sg21 -(dp67 -g23 -S'12' -p68 -sg25 -g56 -sg27 -S'122064770' -p69 -sg29 -g56 -sssS'GRCh38' -p70 -(dp71 -g19 -S'NC_000012.12:g.121626879_121626884dup' -p72 -sg21 -(dp73 -g23 -g68 -sg25 -S'CCGCCA' -p74 -sg27 -S'121626874' -p75 -sg29 -VCCGCCACCGCCA -p76 -sssssS'flag' -p77 -S'gene_variant' -p78 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant67.txt b/VariantValidator/testing/testOutputsMasterITS/variant67.txt deleted file mode 100644 index 24044bb5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant67.txt +++ /dev/null @@ -1,168 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.132_135delinsGCCGT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_93t1:c.132_135delinsGCCGT' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000012.11:g.122064773CCCGCCACCGCCACCGC>CCCGCCACCGCCGCCGTC automapped to NC_000012.11:g.122064785_122064788delinsGCCGT' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -(dp14 -S'GRCh37' -p15 -(dp16 -S'HGVS_genomic_description' -p17 -S'NW_004504303.2:g.302883_302886delinsGCCGT' -p18 -sS'vcf' -p19 -(dp20 -S'chr' -p21 -S'HG1595_PATCH' -p22 -sS'ref' -p23 -S'ACCG' -p24 -sS'pos' -p25 -S'302883' -p26 -sS'alt' -p27 -VGCCGT -p28 -sssasS'transcript_description' -p29 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p30 -sS'gene_symbol' -p31 -S'ORAI1' -p32 -sS'HGVS_predicted_protein_consequence' -p33 -(dp34 -S'tlr' -p35 -S'NP_116179.2(LRG_93p1):p.(Pro46SerfsTer42)' -p36 -sS'slr' -p37 -S'NP_116179.2:p.(P46Sfs*42)' -p38 -ssS'submitted_variant' -p39 -S'12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC' -p40 -sS'genome_context_intronic_sequence' -p41 -g11 -sS'HGVS_LRG_variant' -p42 -S'LRG_93:g.5305_5308delinsGCCGT' -p43 -sS'HGVS_transcript_variant' -p44 -S'NM_032790.3:c.132_135delinsGCCGT' -p45 -sS'HGVS_RefSeqGene_variant' -p46 -S'NG_007500.1:g.5305_5308delinsGCCGT' -p47 -sS'primary_assembly_loci' -p48 -(dp49 -S'hg19' -p50 -(dp51 -g17 -S'NC_000012.11:g.122064785_122064788delinsGCCGT' -p52 -sg19 -(dp53 -g21 -S'chr12' -p54 -sg23 -S'ACCG' -p55 -sg25 -S'122064785' -p56 -sg27 -VGCCGT -p57 -sssS'hg38' -p58 -(dp59 -g17 -S'NC_000012.12:g.121626879_121626882delinsGCCGT' -p60 -sg19 -(dp61 -g21 -g54 -sg23 -S'ACCG' -p62 -sg25 -S'121626879' -p63 -sg27 -VGCCGT -p64 -sssg15 -(dp65 -g17 -S'NC_000012.11:g.122064785_122064788delinsGCCGT' -p66 -sg19 -(dp67 -g21 -S'12' -p68 -sg23 -S'ACCG' -p69 -sg25 -S'122064785' -p70 -sg27 -g57 -sssS'GRCh38' -p71 -(dp72 -g17 -S'NC_000012.12:g.121626879_121626882delinsGCCGT' -p73 -sg19 -(dp74 -g21 -g68 -sg23 -S'ACCG' -p75 -sg25 -S'121626879' -p76 -sg27 -g64 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant68.txt b/VariantValidator/testing/testOutputsMasterITS/variant68.txt deleted file mode 100644 index 6e828c28..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant68.txt +++ /dev/null @@ -1,170 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.129_130insACACCG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_93t1:c.129_130insACACCG' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'NC_000012.11:g.122064777 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -asS'RefSeqGene_context_intronic_sequence' -p13 -S'' -p14 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'GRCh37' -p18 -(dp19 -S'HGVS_genomic_description' -p20 -S'NW_004504303.2:g.302875C>A' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG1595_PATCH' -p25 -sS'ref' -p26 -S'C' -p27 -sS'pos' -p28 -S'302875' -p29 -sS'alt' -p30 -S'A' -p31 -sssasS'transcript_description' -p32 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p33 -sS'gene_symbol' -p34 -S'ORAI1' -p35 -sS'HGVS_predicted_protein_consequence' -p36 -(dp37 -S'tlr' -p38 -S'NP_116179.2(LRG_93p1):p.(Pro43_Pro44insThrPro)' -p39 -sS'slr' -p40 -S'NP_116179.2:p.(P43_P44insTP)' -p41 -ssS'submitted_variant' -p42 -S'NC_000012.11:g.122064777C>A' -p43 -sS'genome_context_intronic_sequence' -p44 -g14 -sS'HGVS_LRG_variant' -p45 -S'LRG_93:g.5302_5303insACACCG' -p46 -sS'HGVS_transcript_variant' -p47 -S'NM_032790.3:c.129_130insACACCG' -p48 -sS'HGVS_RefSeqGene_variant' -p49 -S'NG_007500.1:g.5302_5303insACACCG' -p50 -sS'primary_assembly_loci' -p51 -(dp52 -S'hg19' -p53 -(dp54 -g20 -S'NC_000012.11:g.122064777C>A' -p55 -sg22 -(dp56 -g24 -S'chr12' -p57 -sg26 -g27 -sg28 -S'122064777' -p58 -sg30 -g31 -sssS'hg38' -p59 -(dp60 -g20 -S'NC_000012.12:g.121626876_121626877insACACCG' -p61 -sg22 -(dp62 -g24 -g57 -sg26 -g27 -sg28 -S'121626873' -p63 -sg30 -VCCCGACA -p64 -sssg18 -(dp65 -g20 -S'NC_000012.11:g.122064777C>A' -p66 -sg22 -(dp67 -g24 -S'12' -p68 -sg26 -g27 -sg28 -S'122064777' -p69 -sg30 -g31 -sssS'GRCh38' -p70 -(dp71 -g20 -S'NC_000012.12:g.121626876_121626877insACACCG' -p72 -sg22 -(dp73 -g24 -g68 -sg26 -g27 -sg28 -S'121626873' -p74 -sg30 -VCCCGACA -p75 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant69.txt b/VariantValidator/testing/testOutputsMasterITS/variant69.txt deleted file mode 100644 index bf2ab3c4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant69.txt +++ /dev/null @@ -1,174 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.128_129insCCACC' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_93t1:c.128_129insCCACC' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'NC_000012.11:g.122064775 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p10 -aS'NC_000012.11:g.122064776 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -S'' -p15 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'GRCh37' -p19 -(dp20 -S'HGVS_genomic_description' -p21 -S'NW_004504303.2:g.302874del' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG1595_PATCH' -p26 -sS'ref' -p27 -S'CG' -p28 -sS'pos' -p29 -S'302873' -p30 -sS'alt' -p31 -S'C' -p32 -sssasS'transcript_description' -p33 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p34 -sS'gene_symbol' -p35 -S'ORAI1' -p36 -sS'HGVS_predicted_protein_consequence' -p37 -(dp38 -S'tlr' -p39 -S'NP_116179.2(LRG_93p1):p.(Pro44HisfsTer22)' -p40 -sS'slr' -p41 -S'NP_116179.2:p.(P44Hfs*22)' -p42 -ssS'submitted_variant' -p43 -S'NC_000012.11:g.122064776delG' -p44 -sS'genome_context_intronic_sequence' -p45 -g15 -sS'HGVS_LRG_variant' -p46 -S'LRG_93:g.5301_5302insCCACC' -p47 -sS'HGVS_transcript_variant' -p48 -S'NM_032790.3:c.128_129insCCACC' -p49 -sS'HGVS_RefSeqGene_variant' -p50 -S'NG_007500.1:g.5301_5302insCCACC' -p51 -sS'primary_assembly_loci' -p52 -(dp53 -S'hg19' -p54 -(dp55 -g21 -S'NC_000012.11:g.122064776del' -p56 -sg23 -(dp57 -g25 -S'chr12' -p58 -sg27 -S'CG' -p59 -sg29 -S'122064775' -p60 -sg31 -g32 -sssS'hg38' -p61 -(dp62 -g21 -S'NC_000012.12:g.121626875_121626876insCCACC' -p63 -sg23 -(dp64 -g25 -g58 -sg27 -g32 -sg29 -S'121626873' -p65 -sg31 -VCCCCCA -p66 -sssg19 -(dp67 -g21 -S'NC_000012.11:g.122064776del' -p68 -sg23 -(dp69 -g25 -S'12' -p70 -sg27 -S'CG' -p71 -sg29 -S'122064775' -p72 -sg31 -g32 -sssS'GRCh38' -p73 -(dp74 -g21 -S'NC_000012.12:g.121626875_121626876insCCACC' -p75 -sg23 -(dp76 -g25 -g70 -sg27 -g32 -sg29 -S'121626873' -p77 -sg31 -VCCCCCA -p78 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant7.txt b/VariantValidator/testing/testOutputsMasterITS/variant7.txt deleted file mode 100644 index 4ae1bfd1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant7.txt +++ /dev/null @@ -1,148 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589_590delinsCT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_1t1:c.589_590delinsCT' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589GG>CT automapped to NM_000088.3:c.589_590delGGinsCT' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -S'' -p11 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2(LRG_1p1):p.(Gly197Leu)' -p21 -sS'slr' -p22 -S'NP_000079.2:p.(G197L)' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589GG>CT' -p25 -sS'genome_context_intronic_sequence' -p26 -g11 -sS'HGVS_LRG_variant' -p27 -S'LRG_1:g.8638_8639delinsCT' -p28 -sS'HGVS_transcript_variant' -p29 -S'NM_000088.3:c.589_590delinsCT' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -S'NG_007400.1:g.8638_8639delinsCT' -p32 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'HGVS_genomic_description' -p37 -S'NC_000017.10:g.48275362_48275363delinsAG' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr17' -p42 -sS'ref' -p43 -S'CC' -p44 -sS'pos' -p45 -S'48275362' -p46 -sS'alt' -p47 -VAG -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000017.11:g.50198001_50198002delinsAG' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'CC' -p53 -sg45 -S'50198001' -p54 -sg47 -VAG -p55 -sssS'GRCh37' -p56 -(dp57 -g37 -S'NC_000017.10:g.48275362_48275363delinsAG' -p58 -sg39 -(dp59 -g41 -S'17' -p60 -sg43 -S'CC' -p61 -sg45 -S'48275362' -p62 -sg47 -g48 -sssS'GRCh38' -p63 -(dp64 -g37 -S'NC_000017.11:g.50198001_50198002delinsAG' -p65 -sg39 -(dp66 -g41 -g60 -sg43 -S'CC' -p67 -sg45 -S'50198001' -p68 -sg47 -g55 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant70.txt b/VariantValidator/testing/testOutputsMasterITS/variant70.txt deleted file mode 100644 index 146b61fa..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant70.txt +++ /dev/null @@ -1,175 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.129_130insGCCACCG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_93t1:c.129_130insGCCACCG' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'NC_000012.11:g.122064775 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p10 -aS'NC_000012.11:g.122064775 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -S'' -p15 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'GRCh37' -p19 -(dp20 -S'HGVS_genomic_description' -p21 -S'NW_004504303.2:g.302874dup' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG1595_PATCH' -p26 -sS'ref' -p27 -S'G' -p28 -sS'pos' -p29 -S'302874' -p30 -sS'alt' -p31 -S'GG' -p32 -sssasS'transcript_description' -p33 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p34 -sS'gene_symbol' -p35 -S'ORAI1' -p36 -sS'HGVS_predicted_protein_consequence' -p37 -(dp38 -S'tlr' -p39 -S'NP_116179.2(LRG_93p1):p.(Pro44AlafsTer46)' -p40 -sS'slr' -p41 -S'NP_116179.2:p.(P44Afs*46)' -p42 -ssS'submitted_variant' -p43 -S'NC_000012.11:g.122064776dupG' -p44 -sS'genome_context_intronic_sequence' -p45 -g15 -sS'HGVS_LRG_variant' -p46 -S'LRG_93:g.5302_5303insGCCACCG' -p47 -sS'HGVS_transcript_variant' -p48 -S'NM_032790.3:c.129_130insGCCACCG' -p49 -sS'HGVS_RefSeqGene_variant' -p50 -S'NG_007500.1:g.5302_5303insGCCACCG' -p51 -sS'primary_assembly_loci' -p52 -(dp53 -S'hg19' -p54 -(dp55 -g21 -S'NC_000012.11:g.122064776dup' -p56 -sg23 -(dp57 -g25 -S'chr12' -p58 -sg27 -g28 -sg29 -S'122064776' -p59 -sg31 -S'GG' -p60 -sssS'hg38' -p61 -(dp62 -g21 -S'NC_000012.12:g.121626876_121626877insGCCACCG' -p63 -sg23 -(dp64 -g25 -g58 -sg27 -S'C' -p65 -sg29 -S'121626873' -p66 -sg31 -VCCCGGCCA -p67 -sssg19 -(dp68 -g21 -S'NC_000012.11:g.122064776dup' -p69 -sg23 -(dp70 -g25 -S'12' -p71 -sg27 -g28 -sg29 -S'122064776' -p72 -sg31 -S'GG' -p73 -sssS'GRCh38' -p74 -(dp75 -g21 -S'NC_000012.12:g.121626876_121626877insGCCACCG' -p76 -sg23 -(dp77 -g25 -g71 -sg27 -g65 -sg29 -S'121626873' -p78 -sg31 -VCCCGGCCA -p79 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant71.txt b/VariantValidator/testing/testOutputsMasterITS/variant71.txt deleted file mode 100644 index 918bcb3d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant71.txt +++ /dev/null @@ -1,173 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.129_130insTTTCCACCG' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_93t1:c.129_130insTTTCCACCG' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'NC_000012.11:g.122064776 is one of 7 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -asS'RefSeqGene_context_intronic_sequence' -p13 -S'' -p14 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'GRCh37' -p18 -(dp19 -S'HGVS_genomic_description' -p20 -S'NW_004504303.2:g.302874_302875insTTT' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG1595_PATCH' -p25 -sS'ref' -p26 -S'G' -p27 -sS'pos' -p28 -S'302874' -p29 -sS'alt' -p30 -S'GTTT' -p31 -sssasS'transcript_description' -p32 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p33 -sS'gene_symbol' -p34 -S'ORAI1' -p35 -sS'HGVS_predicted_protein_consequence' -p36 -(dp37 -S'tlr' -p38 -S'NP_116179.2(LRG_93p1):p.(Pro43_Pro44insPheProPro)' -p39 -sS'slr' -p40 -S'NP_116179.2:p.(P43_P44insFPP)' -p41 -ssS'submitted_variant' -p42 -S'NC_000012.11:g.122064776_122064777insTTT' -p43 -sS'genome_context_intronic_sequence' -p44 -g14 -sS'HGVS_LRG_variant' -p45 -S'LRG_93:g.5302_5303insTTTCCACCG' -p46 -sS'HGVS_transcript_variant' -p47 -S'NM_032790.3:c.129_130insTTTCCACCG' -p48 -sS'HGVS_RefSeqGene_variant' -p49 -S'NG_007500.1:g.5302_5303insTTTCCACCG' -p50 -sS'primary_assembly_loci' -p51 -(dp52 -S'hg19' -p53 -(dp54 -g20 -S'NC_000012.11:g.122064776_122064777insTTT' -p55 -sg22 -(dp56 -g24 -S'chr12' -p57 -sg26 -g27 -sg28 -S'122064776' -p58 -sg30 -S'GTTT' -p59 -sssS'hg38' -p60 -(dp61 -g20 -S'NC_000012.12:g.121626876_121626877insTTTCCACCG' -p62 -sg22 -(dp63 -g24 -g57 -sg26 -S'C' -p64 -sg28 -S'121626873' -p65 -sg30 -VCCCGTTTCCA -p66 -sssg18 -(dp67 -g20 -S'NC_000012.11:g.122064776_122064777insTTT' -p68 -sg22 -(dp69 -g24 -S'12' -p70 -sg26 -g27 -sg28 -S'122064776' -p71 -sg30 -S'GTTT' -p72 -sssS'GRCh38' -p73 -(dp74 -g20 -S'NC_000012.12:g.121626876_121626877insTTTCCACCG' -p75 -sg22 -(dp76 -g24 -g70 -sg26 -g64 -sg28 -S'121626873' -p77 -sg30 -VCCCGTTTCCA -p78 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant72.txt b/VariantValidator/testing/testOutputsMasterITS/variant72.txt deleted file mode 100644 index 7c20b67a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant72.txt +++ /dev/null @@ -1,173 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.125_126delinsGCCA' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_93t1:c.125_126delinsGCCA' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'Genome position NC_000012.11:g.122064776 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -asS'RefSeqGene_context_intronic_sequence' -p13 -S'' -p14 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'GRCh37' -p18 -(dp19 -S'HGVS_genomic_description' -p20 -S'NW_004504303.2:g.302870_302873del' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG1595_PATCH' -p25 -sS'ref' -p26 -S'GCCCC' -p27 -sS'pos' -p28 -S'302869' -p29 -sS'alt' -p30 -S'G' -p31 -sssasS'transcript_description' -p32 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p33 -sS'gene_symbol' -p34 -S'ORAI1' -p35 -sS'HGVS_predicted_protein_consequence' -p36 -(dp37 -S'tlr' -p38 -S'NP_116179.2(LRG_93p1):p.(Ala42GlyfsTer23)' -p39 -sS'slr' -p40 -S'NP_116179.2:p.(A42Gfs*23)' -p41 -ssS'submitted_variant' -p42 -S'NC_000012.11:g.122064772_122064775del' -p43 -sS'genome_context_intronic_sequence' -p44 -g14 -sS'HGVS_LRG_variant' -p45 -S'LRG_93:g.5298_5299delinsGCCA' -p46 -sS'HGVS_transcript_variant' -p47 -S'NM_032790.3:c.125_126delinsGCCA' -p48 -sS'HGVS_RefSeqGene_variant' -p49 -S'NG_007500.1:g.5298_5299delinsGCCA' -p50 -sS'primary_assembly_loci' -p51 -(dp52 -S'hg19' -p53 -(dp54 -g20 -S'NC_000012.11:g.122064772_122064775del' -p55 -sg22 -(dp56 -g24 -S'chr12' -p57 -sg26 -S'GCCCC' -p58 -sg28 -S'122064771' -p59 -sg30 -g31 -sssS'hg38' -p60 -(dp61 -g20 -S'NC_000012.12:g.121626867_121626873delinsGCCA' -p62 -sg22 -(dp63 -g24 -g57 -sg26 -S'CCCCGCC' -p64 -sg28 -S'121626867' -p65 -sg30 -S'GCCA' -p66 -sssg18 -(dp67 -g20 -S'NC_000012.11:g.122064772_122064775del' -p68 -sg22 -(dp69 -g24 -S'12' -p70 -sg26 -S'GCCCC' -p71 -sg28 -S'122064771' -p72 -sg30 -g31 -sssS'GRCh38' -p73 -(dp74 -g20 -S'NC_000012.12:g.121626867_121626873delinsGCCA' -p75 -sg22 -(dp76 -g24 -g70 -sg26 -S'CCCCGCC' -p77 -sg28 -S'121626867' -p78 -sg30 -g66 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant73.txt b/VariantValidator/testing/testOutputsMasterITS/variant73.txt deleted file mode 100644 index a089ce0a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant73.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.128_129insCCCCGCCACC' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_93t1:c.128_129insCCCCGCCACC' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'Genome position NC_000012.11:g.122064782 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' -p10 -aS'Genome position NC_000012.11:g.122064776 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -S'' -p15 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'GRCh37' -p19 -(dp20 -S'HGVS_genomic_description' -p21 -S'NW_004504303.2:g.302870_302873dup' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG1595_PATCH' -p26 -sS'ref' -p27 -S'CCCC' -p28 -sS'pos' -p29 -S'302870' -p30 -sS'alt' -p31 -S'CCCCCCCC' -p32 -sssasS'transcript_description' -p33 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p34 -sS'gene_symbol' -p35 -S'ORAI1' -p36 -sS'HGVS_predicted_protein_consequence' -p37 -(dp38 -S'tlr' -p39 -S'NP_116179.2(LRG_93p1):p.(Pro45AlafsTer46)' -p40 -sS'slr' -p41 -S'NP_116179.2:p.(P45Afs*46)' -p42 -ssS'submitted_variant' -p43 -S'NC_000012.11:g.122064772_122064775dup' -p44 -sS'genome_context_intronic_sequence' -p45 -g15 -sS'HGVS_LRG_variant' -p46 -S'LRG_93:g.5301_5302insCCCCGCCACC' -p47 -sS'HGVS_transcript_variant' -p48 -S'NM_032790.3:c.128_129insCCCCGCCACC' -p49 -sS'HGVS_RefSeqGene_variant' -p50 -S'NG_007500.1:g.5301_5302insCCCCGCCACC' -p51 -sS'primary_assembly_loci' -p52 -(dp53 -S'hg19' -p54 -(dp55 -g21 -S'NC_000012.11:g.122064772_122064775dup' -p56 -sg23 -(dp57 -g25 -S'chr12' -p58 -sg27 -S'CCCC' -p59 -sg29 -S'122064772' -p60 -sg31 -S'CCCCCCCC' -p61 -sssS'hg38' -p62 -(dp63 -g21 -S'NC_000012.12:g.121626875_121626876insCCCCGCCACC' -p64 -sg23 -(dp65 -g25 -g58 -sg27 -S'C' -p66 -sg29 -S'121626873' -p67 -sg31 -VCCCCCCCGCCA -p68 -sssg19 -(dp69 -g21 -S'NC_000012.11:g.122064772_122064775dup' -p70 -sg23 -(dp71 -g25 -S'12' -p72 -sg27 -S'CCCC' -p73 -sg29 -S'122064772' -p74 -sg31 -S'CCCCCCCC' -p75 -sssS'GRCh38' -p76 -(dp77 -g21 -S'NC_000012.12:g.121626875_121626876insCCCCGCCACC' -p78 -sg23 -(dp79 -g25 -g72 -sg27 -g66 -sg29 -S'121626873' -p80 -sg31 -VCCCCCCCGCCA -p81 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant74.txt b/VariantValidator/testing/testOutputsMasterITS/variant74.txt deleted file mode 100644 index 43fb280b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant74.txt +++ /dev/null @@ -1,172 +0,0 @@ -(dp0 -S'NM_032790.3:c.126_127insTTTTCCGCCA' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_93t1:c.126_127insTTTTCCGCCA' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p7 -aS'Genome position NC_000012.11:g.122064774 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'GRCh37' -p16 -(dp17 -S'HGVS_genomic_description' -p18 -S'NW_004504303.2:g.302871_302872insTTTT' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG1595_PATCH' -p23 -sS'ref' -p24 -S'C' -p25 -sS'pos' -p26 -S'302871' -p27 -sS'alt' -p28 -S'CTTTT' -p29 -sssasS'transcript_description' -p30 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p31 -sS'gene_symbol' -p32 -S'ORAI1' -p33 -sS'HGVS_predicted_protein_consequence' -p34 -(dp35 -S'tlr' -p36 -S'NP_116179.2(LRG_93p1):p.(Pro43PhefsTer48)' -p37 -sS'slr' -p38 -S'NP_116179.2:p.(P43Ffs*48)' -p39 -ssS'submitted_variant' -p40 -S'NC_000012.11:g.122064773_122064774insTTTT' -p41 -sS'genome_context_intronic_sequence' -p42 -g12 -sS'HGVS_LRG_variant' -p43 -S'LRG_93:g.5299_5300insTTTTCCGCCA' -p44 -sS'HGVS_transcript_variant' -p45 -S'NM_032790.3:c.126_127insTTTTCCGCCA' -p46 -sS'HGVS_RefSeqGene_variant' -p47 -S'NG_007500.1:g.5299_5300insTTTTCCGCCA' -p48 -sS'primary_assembly_loci' -p49 -(dp50 -S'hg19' -p51 -(dp52 -g18 -S'NC_000012.11:g.122064773_122064774insTTTT' -p53 -sg20 -(dp54 -g22 -S'chr12' -p55 -sg24 -g25 -sg26 -S'122064773' -p56 -sg28 -S'CTTTT' -p57 -sssS'hg38' -p58 -(dp59 -g18 -S'NC_000012.12:g.121626873_121626874insTTTTCCGCCA' -p60 -sg20 -(dp61 -g22 -g55 -sg24 -g25 -sg26 -S'121626873' -p62 -sg28 -VCTTTTCCGCCA -p63 -sssg16 -(dp64 -g18 -S'NC_000012.11:g.122064773_122064774insTTTT' -p65 -sg20 -(dp66 -g22 -S'12' -p67 -sg24 -g25 -sg26 -S'122064773' -p68 -sg28 -S'CTTTT' -p69 -sssS'GRCh38' -p70 -(dp71 -g18 -S'NC_000012.12:g.121626873_121626874insTTTTCCGCCA' -p72 -sg20 -(dp73 -g22 -g67 -sg24 -g25 -sg26 -S'121626873' -p74 -sg28 -VCTTTTCCGCCA -p75 -sssssS'flag' -p76 -S'gene_variant' -p77 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant75.txt b/VariantValidator/testing/testOutputsMasterITS/variant75.txt deleted file mode 100644 index ee8ea330..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant75.txt +++ /dev/null @@ -1,172 +0,0 @@ -(dp0 -S'NM_032790.3:c.126C>A' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_93t1:c.126C>A' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p7 -aS'Genome position NC_000012.11:g.122064778 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'GRCh37' -p16 -(dp17 -S'HGVS_genomic_description' -p18 -S'NW_004504303.2:g.302871_302876del' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG1595_PATCH' -p23 -sS'ref' -p24 -S'GCCCCGC' -p25 -sS'pos' -p26 -S'302869' -p27 -sS'alt' -p28 -S'G' -p29 -sssasS'transcript_description' -p30 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p31 -sS'gene_symbol' -p32 -S'ORAI1' -p33 -sS'HGVS_predicted_protein_consequence' -p34 -(dp35 -S'tlr' -p36 -S'NP_116179.2(LRG_93p1):p.(Ala42=)' -p37 -sS'slr' -p38 -S'NP_116179.2:p.(A42=)' -p39 -ssS'submitted_variant' -p40 -S'NC_000012.11:g.122064772_122064777del' -p41 -sS'genome_context_intronic_sequence' -p42 -g12 -sS'HGVS_LRG_variant' -p43 -S'LRG_93:g.5299C>A' -p44 -sS'HGVS_transcript_variant' -p45 -S'NM_032790.3:c.126C>A' -p46 -sS'HGVS_RefSeqGene_variant' -p47 -S'NG_007500.1:g.5299C>A' -p48 -sS'primary_assembly_loci' -p49 -(dp50 -S'hg19' -p51 -(dp52 -g18 -S'NC_000012.11:g.122064773_122064778del' -p53 -sg20 -(dp54 -g22 -S'chr12' -p55 -sg24 -S'GCCCCGC' -p56 -sg26 -S'122064771' -p57 -sg28 -g29 -sssS'hg38' -p58 -(dp59 -g18 -S'NC_000012.12:g.121626873C>A' -p60 -sg20 -(dp61 -g22 -g55 -sg24 -VC -p62 -sg26 -S'121626873' -p63 -sg28 -VA -p64 -sssg16 -(dp65 -g18 -S'NC_000012.11:g.122064773_122064778del' -p66 -sg20 -(dp67 -g22 -S'12' -p68 -sg24 -S'GCCCCGC' -p69 -sg26 -S'122064771' -p70 -sg28 -g29 -sssS'GRCh38' -p71 -(dp72 -g18 -S'NC_000012.12:g.121626873C>A' -p73 -sg20 -(dp74 -g22 -g68 -sg24 -g62 -sg26 -S'121626873' -p75 -sg28 -g64 -sssssS'flag' -p76 -S'gene_variant' -p77 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant76.txt b/VariantValidator/testing/testOutputsMasterITS/variant76.txt deleted file mode 100644 index ddd322e5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant76.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.131_132insCCCGCCACCGCC' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_93t1:c.131_132insCCCGCCACCGCC' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'Genome position NC_000012.11:g.122064778 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' -p10 -aS'Genome position NC_000012.11:g.122064784 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -S'' -p15 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'GRCh37' -p19 -(dp20 -S'HGVS_genomic_description' -p21 -S'NW_004504303.2:g.302871_302876dup' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG1595_PATCH' -p26 -sS'ref' -p27 -S'CCCCGC' -p28 -sS'pos' -p29 -S'302870' -p30 -sS'alt' -p31 -S'CCCCGCCCCCGC' -p32 -sssasS'transcript_description' -p33 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p34 -sS'gene_symbol' -p35 -S'ORAI1' -p36 -sS'HGVS_predicted_protein_consequence' -p37 -(dp38 -S'tlr' -p39 -S'NP_116179.2(LRG_93p1):p.(Pro44_Pro47dup)' -p40 -sS'slr' -p41 -S'NP_116179.2:p.(P44_P47dup)' -p42 -ssS'submitted_variant' -p43 -S'NC_000012.11:g.122064772_122064777dup' -p44 -sS'genome_context_intronic_sequence' -p45 -g15 -sS'HGVS_LRG_variant' -p46 -S'LRG_93:g.5304_5305insCCCGCCACCGCC' -p47 -sS'HGVS_transcript_variant' -p48 -S'NM_032790.3:c.131_132insCCCGCCACCGCC' -p49 -sS'HGVS_RefSeqGene_variant' -p50 -S'NG_007500.1:g.5304_5305insCCCGCCACCGCC' -p51 -sS'primary_assembly_loci' -p52 -(dp53 -S'hg19' -p54 -(dp55 -g21 -S'NC_000012.11:g.122064773_122064778dup' -p56 -sg23 -(dp57 -g25 -S'chr12' -p58 -sg27 -S'CCCCGC' -p59 -sg29 -S'122064772' -p60 -sg31 -S'CCCCGCCCCCGC' -p61 -sssS'hg38' -p62 -(dp63 -g21 -S'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC' -p64 -sg23 -(dp65 -g25 -g58 -sg27 -S'C' -p66 -sg29 -S'121626873' -p67 -sg31 -VCCCGCCCCCGCCA -p68 -sssg19 -(dp69 -g21 -S'NC_000012.11:g.122064773_122064778dup' -p70 -sg23 -(dp71 -g25 -S'12' -p72 -sg27 -S'CCCCGC' -p73 -sg29 -S'122064772' -p74 -sg31 -S'CCCCGCCCCCGC' -p75 -sssS'GRCh38' -p76 -(dp77 -g21 -S'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC' -p78 -sg23 -(dp79 -g25 -g72 -sg27 -g66 -sg29 -S'121626873' -p80 -sg31 -VCCCGCCCCCGCCA -p81 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant77.txt b/VariantValidator/testing/testOutputsMasterITS/variant77.txt deleted file mode 100644 index c518e7f3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant77.txt +++ /dev/null @@ -1,175 +0,0 @@ -(dp0 -S'NM_032790.3:c.135_136insACCGCCACCG' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_93t1:c.135_136insACCGCCACCG' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p7 -aS'NC_000012.11:g.122064778 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'GRCh37' -p16 -(dp17 -S'HGVS_genomic_description' -p18 -S'NW_004504303.2:g.302877_302880dup' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG1595_PATCH' -p23 -sS'ref' -p24 -S'ACCG' -p25 -sS'pos' -p26 -S'302877' -p27 -sS'alt' -p28 -S'ACCGACCG' -p29 -sssasS'transcript_description' -p30 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p31 -sS'gene_symbol' -p32 -S'ORAI1' -p33 -sS'HGVS_predicted_protein_consequence' -p34 -(dp35 -S'tlr' -p36 -S'NP_116179.2(LRG_93p1):p.(Pro46ThrfsTer45)' -p37 -sS'slr' -p38 -S'NP_116179.2:p.(P46Tfs*45)' -p39 -ssS'submitted_variant' -p40 -S'NC_000012.11:g.122064779_122064782dup' -p41 -sS'genome_context_intronic_sequence' -p42 -g12 -sS'HGVS_LRG_variant' -p43 -S'LRG_93:g.5308_5309insACCGCCACCG' -p44 -sS'HGVS_transcript_variant' -p45 -S'NM_032790.3:c.135_136insACCGCCACCG' -p46 -sS'HGVS_RefSeqGene_variant' -p47 -S'NG_007500.1:g.5308_5309insACCGCCACCG' -p48 -sS'primary_assembly_loci' -p49 -(dp50 -S'hg19' -p51 -(dp52 -g18 -S'NC_000012.11:g.122064779_122064782dup' -p53 -sg20 -(dp54 -g22 -S'chr12' -p55 -sg24 -S'ACCG' -p56 -sg26 -S'122064779' -p57 -sg28 -S'ACCGACCG' -p58 -sssS'hg38' -p59 -(dp60 -g18 -S'NC_000012.12:g.121626882_121626883insACCGCCACCG' -p61 -sg20 -(dp62 -g22 -g55 -sg24 -S'C' -p63 -sg26 -S'121626873' -p64 -sg28 -VCCCGCCACCGA -p65 -sssg16 -(dp66 -g18 -S'NC_000012.11:g.122064779_122064782dup' -p67 -sg20 -(dp68 -g22 -S'12' -p69 -sg24 -S'ACCG' -p70 -sg26 -S'122064779' -p71 -sg28 -S'ACCGACCG' -p72 -sssS'GRCh38' -p73 -(dp74 -g18 -S'NC_000012.12:g.121626882_121626883insACCGCCACCG' -p75 -sg20 -(dp76 -g22 -g69 -sg24 -g63 -sg26 -S'121626873' -p77 -sg28 -VCCCGCCACCGA -p78 -sssssS'flag' -p79 -S'gene_variant' -p80 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant78.txt b/VariantValidator/testing/testOutputsMasterITS/variant78.txt deleted file mode 100644 index 0ee9f818..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant78.txt +++ /dev/null @@ -1,173 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.126_127insA' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_93t1:c.126_127insA' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'Genome position NC_000012.11:g.122064776 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -asS'RefSeqGene_context_intronic_sequence' -p13 -S'' -p14 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'GRCh37' -p18 -(dp19 -S'HGVS_genomic_description' -p20 -S'NW_004504303.2:g.302872_302876del' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG1595_PATCH' -p25 -sS'ref' -p26 -S'GGCCCC' -p27 -sS'pos' -p28 -S'302868' -p29 -sS'alt' -p30 -S'G' -p31 -sssasS'transcript_description' -p32 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p33 -sS'gene_symbol' -p34 -S'ORAI1' -p35 -sS'HGVS_predicted_protein_consequence' -p36 -(dp37 -S'tlr' -p38 -S'NP_116179.2(LRG_93p1):p.(Pro43ThrfsTer45)' -p39 -sS'slr' -p40 -S'NP_116179.2:p.(P43Tfs*45)' -p41 -ssS'submitted_variant' -p42 -S'NC_000012.11:g.122064772_122064782del' -p43 -sS'genome_context_intronic_sequence' -p44 -g14 -sS'HGVS_LRG_variant' -p45 -S'LRG_93:g.5299_5300insA' -p46 -sS'HGVS_transcript_variant' -p47 -S'NM_032790.3:c.126_127insA' -p48 -sS'HGVS_RefSeqGene_variant' -p49 -S'NG_007500.1:g.5299_5300insA' -p50 -sS'primary_assembly_loci' -p51 -(dp52 -S'hg19' -p53 -(dp54 -g20 -S'NC_000012.11:g.122064774_122064778del' -p55 -sg22 -(dp56 -g24 -S'chr12' -p57 -sg26 -S'GGCCCC' -p58 -sg28 -S'122064770' -p59 -sg30 -g31 -sssS'hg38' -p60 -(dp61 -g20 -S'NC_000012.12:g.121626873_121626874insA' -p62 -sg22 -(dp63 -g24 -g57 -sg26 -S'C' -p64 -sg28 -S'121626873' -p65 -sg30 -VCA -p66 -sssg18 -(dp67 -g20 -S'NC_000012.11:g.122064774_122064778del' -p68 -sg22 -(dp69 -g24 -S'12' -p70 -sg26 -S'GGCCCC' -p71 -sg28 -S'122064770' -p72 -sg30 -g31 -sssS'GRCh38' -p73 -(dp74 -g20 -S'NC_000012.12:g.121626873_121626874insA' -p75 -sg22 -(dp76 -g24 -g70 -sg26 -g64 -sg28 -S'121626873' -p77 -sg30 -VCA -p78 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant79.txt b/VariantValidator/testing/testOutputsMasterITS/variant79.txt deleted file mode 100644 index 53f810a2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant79.txt +++ /dev/null @@ -1,788 +0,0 @@ -(dp0 -S'NM_021088.3:c.471_473dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_021088.3 with genome build GRCh37' -p7 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_021088.3' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'ZNF2' -p17 -sS'HGVS_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_066574.2:p.(Arg159dup)' -p21 -sS'slr' -p22 -S'NP_066574.2:p.(R159dup)' -p23 -ssS'submitted_variant' -p24 -S'NC_000002.11:g.95847041_95847043GCG=' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'HGVS_LRG_variant' -p27 -g4 -sS'HGVS_transcript_variant' -p28 -S'NM_021088.3:c.471_473dup' -p29 -sS'HGVS_RefSeqGene_variant' -p30 -S'NG_033798.1:g.20883_20885dup' -p31 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000002.11:g.95847037_95847050=' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr2' -p41 -sS'ref' -p42 -S'GCTTGCGGCGGCGA' -p43 -sS'pos' -p44 -S'95847037' -p45 -sS'alt' -p46 -g43 -sssS'hg38' -p47 -(dp48 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p49 -sg38 -(dp50 -g40 -g41 -sg42 -S'GCG' -p51 -sg44 -S'95181296' -p52 -sg46 -VGCGGCG -p53 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000002.11:g.95847037_95847050=' -p56 -sg38 -(dp57 -g40 -S'2' -p58 -sg42 -g43 -sg44 -S'95847037' -p59 -sg46 -g43 -sssS'GRCh38' -p60 -(dp61 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p62 -sg38 -(dp63 -g40 -g58 -sg42 -S'GCG' -p64 -sg44 -S'95181296' -p65 -sg46 -VGCGGCG -p66 -sssssS'NM_001291605.1:c.510_512dup' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'The displayed variants may be artefacts of aligning NM_001291605.1 with genome build GRCh37' -p70 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001291605.1' -p71 -aS'Caution should be used when reporting the displayed variant descriptions' -p72 -aS'If you are unsure, please contact admin' -p73 -aS'RefSeqGene record not available' -p74 -asg11 -g4 -sg12 -(lp75 -sg14 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 5, mRNA -p76 -sg16 -S'ZNF2' -p77 -sg18 -(dp78 -g20 -S'NP_001278534.1:p.(Arg172dup)' -p79 -sg22 -S'NP_001278534.1:p.(R172dup)' -p80 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001291605.1:c.510_512dup' -p81 -sg30 -g4 -sg32 -(dp82 -g34 -(dp83 -g36 -S'NC_000002.11:g.95847037_95847050=' -p84 -sg38 -(dp85 -g40 -g41 -sg42 -g43 -sg44 -S'95847037' -p86 -sg46 -g43 -sssg47 -(dp87 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p88 -sg38 -(dp89 -g40 -g41 -sg42 -S'GCG' -p90 -sg44 -S'95181296' -p91 -sg46 -VGCGGCG -p92 -sssg54 -(dp93 -g36 -S'NC_000002.11:g.95847037_95847050=' -p94 -sg38 -(dp95 -g40 -g58 -sg42 -g43 -sg44 -S'95847037' -p96 -sg46 -g43 -sssg60 -(dp97 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p98 -sg38 -(dp99 -g40 -g58 -sg42 -S'GCG' -p100 -sg44 -S'95181296' -p101 -sg46 -VGCGGCG -p102 -sssssS'NM_001017396.2:c.345_347dup' -p103 -(dp104 -g3 -g4 -sg5 -(lp105 -S'The displayed variants may be artefacts of aligning NM_001017396.2 with genome build GRCh37' -p106 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001017396.2' -p107 -aS'Caution should be used when reporting the displayed variant descriptions' -p108 -aS'If you are unsure, please contact admin' -p109 -aS'RefSeqGene record not available' -p110 -asg11 -g4 -sg12 -(lp111 -sg14 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA -p112 -sg16 -S'ZNF2' -p113 -sg18 -(dp114 -g20 -S'NP_001017396.1:p.(Arg117dup)' -p115 -sg22 -S'NP_001017396.1:p.(R117dup)' -p116 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001017396.2:c.345_347dup' -p117 -sg30 -g4 -sg32 -(dp118 -g34 -(dp119 -g36 -S'NC_000002.11:g.95847037_95847050=' -p120 -sg38 -(dp121 -g40 -g41 -sg42 -S'GCTTGCGGCGGCGA' -p122 -sg44 -S'95847037' -p123 -sg46 -g122 -sssg47 -(dp124 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p125 -sg38 -(dp126 -g40 -g41 -sg42 -S'GCG' -p127 -sg44 -S'95181296' -p128 -sg46 -VGCGGCG -p129 -sssg54 -(dp130 -g36 -S'NC_000002.11:g.95847037_95847050=' -p131 -sg38 -(dp132 -g40 -g58 -sg42 -g122 -sg44 -S'95847037' -p133 -sg46 -g122 -sssg60 -(dp134 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p135 -sg38 -(dp136 -g40 -g58 -sg42 -S'GCG' -p137 -sg44 -S'95181296' -p138 -sg46 -VGCGGCG -p139 -sssssS'NM_001282398.1:c.357_359dup' -p140 -(dp141 -g3 -g4 -sg5 -(lp142 -S'The displayed variants may be artefacts of aligning NM_001282398.1 with genome build GRCh37' -p143 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001282398.1' -p144 -aS'Caution should be used when reporting the displayed variant descriptions' -p145 -aS'If you are unsure, please contact admin' -p146 -aS'RefSeqGene record not available' -p147 -asg11 -g4 -sg12 -(lp148 -sg14 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 3, mRNA -p149 -sg16 -S'ZNF2' -p150 -sg18 -(dp151 -g20 -S'NP_001269327.1:p.(Arg121dup)' -p152 -sg22 -S'NP_001269327.1:p.(R121dup)' -p153 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001282398.1:c.357_359dup' -p154 -sg30 -g4 -sg32 -(dp155 -g34 -(dp156 -g36 -S'NC_000002.11:g.95847037_95847050=' -p157 -sg38 -(dp158 -g40 -g41 -sg42 -g122 -sg44 -S'95847037' -p159 -sg46 -g122 -sssg47 -(dp160 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p161 -sg38 -(dp162 -g40 -g41 -sg42 -S'GCG' -p163 -sg44 -S'95181296' -p164 -sg46 -VGCGGCG -p165 -sssg54 -(dp166 -g36 -S'NC_000002.11:g.95847037_95847050=' -p167 -sg38 -(dp168 -g40 -g58 -sg42 -g122 -sg44 -S'95847037' -p169 -sg46 -g122 -sssg60 -(dp170 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p171 -sg38 -(dp172 -g40 -g58 -sg42 -S'GCG' -p173 -sg44 -S'95181296' -p174 -sg46 -VGCGGCG -p175 -sssssS'flag' -p176 -S'gene_variant' -p177 -sS'NM_001291604.1:c.231_233dup' -p178 -(dp179 -g3 -g4 -sg5 -(lp180 -S'The displayed variants may be artefacts of aligning NM_001291604.1 with genome build GRCh37' -p181 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001291604.1' -p182 -aS'Caution should be used when reporting the displayed variant descriptions' -p183 -aS'If you are unsure, please contact admin' -p184 -aS'RefSeqGene record not available' -p185 -asg11 -g4 -sg12 -(lp186 -sg14 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 4, mRNA -p187 -sg16 -S'ZNF2' -p188 -sg18 -(dp189 -g20 -S'NP_001278533.1:p.(Arg79dup)' -p190 -sg22 -S'NP_001278533.1:p.(R79dup)' -p191 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001291604.1:c.231_233dup' -p192 -sg30 -g4 -sg32 -(dp193 -g34 -(dp194 -g36 -S'NC_000002.11:g.95847037_95847050=' -p195 -sg38 -(dp196 -g40 -g41 -sg42 -g43 -sg44 -S'95847037' -p197 -sg46 -g43 -sssg47 -(dp198 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p199 -sg38 -(dp200 -g40 -g41 -sg42 -S'GCG' -p201 -sg44 -S'95181296' -p202 -sg46 -VGCGGCG -p203 -sssg54 -(dp204 -g36 -S'NC_000002.11:g.95847037_95847050=' -p205 -sg38 -(dp206 -g40 -g58 -sg42 -g43 -sg44 -S'95847037' -p207 -sg46 -g43 -sssg60 -(dp208 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p209 -sg38 -(dp210 -g40 -g58 -sg42 -S'GCG' -p211 -sg44 -S'95181296' -p212 -sg46 -VGCGGCG -p213 -sssssS'NM_021088.2:c.471_473dup' -p214 -(dp215 -g3 -g4 -sg5 -(lp216 -S'The displayed variants may be artefacts of aligning NM_021088.2 with genome build GRCh37' -p217 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_021088.2' -p218 -aS'Caution should be used when reporting the displayed variant descriptions' -p219 -aS'If you are unsure, please contact admin' -p220 -aS'A more recent version of the selected reference sequence NM_021088.2 is available (NM_021088.3)' -p221 -aS'NM_021088.3:c.471_473dupGCG MUST be fully validated prior to use in reports' -p222 -aS'select_variants=NM_021088.3:c.471_473dup' -p223 -aS'RefSeqGene record not available' -p224 -asg11 -g4 -sg12 -(lp225 -sg14 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA -p226 -sg16 -S'ZNF2' -p227 -sg18 -(dp228 -g20 -S'NP_066574.2:p.(Arg159dup)' -p229 -sg22 -S'NP_066574.2:p.(R159dup)' -p230 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_021088.2:c.471_473dup' -p231 -sg30 -g4 -sg32 -(dp232 -g34 -(dp233 -g36 -S'NC_000002.11:g.95847037_95847050=' -p234 -sg38 -(dp235 -g40 -g41 -sg42 -g43 -sg44 -S'95847037' -p236 -sg46 -g43 -sssg54 -(dp237 -g36 -S'NC_000002.11:g.95847037_95847050=' -p238 -sg38 -(dp239 -g40 -g58 -sg42 -g43 -sg44 -S'95847037' -p240 -sg46 -g43 -sssssS'NM_001017396.1:c.345_347dup' -p241 -(dp242 -g3 -g4 -sg5 -(lp243 -S'The displayed variants may be artefacts of aligning NM_001017396.1 with genome build GRCh37' -p244 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001017396.1' -p245 -aS'Caution should be used when reporting the displayed variant descriptions' -p246 -aS'If you are unsure, please contact admin' -p247 -aS'A more recent version of the selected reference sequence NM_001017396.1 is available (NM_001017396.2)' -p248 -aS'NM_001017396.2:c.345_347dupGCG MUST be fully validated prior to use in reports' -p249 -aS'select_variants=NM_001017396.2:c.345_347dup' -p250 -aS'RefSeqGene record not available' -p251 -asg11 -g4 -sg12 -(lp252 -sg14 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA -p253 -sg16 -S'ZNF2' -p254 -sg18 -(dp255 -g20 -S'NP_001017396.1:p.(Arg117dup)' -p256 -sg22 -S'NP_001017396.1:p.(R117dup)' -p257 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001017396.1:c.345_347dup' -p258 -sg30 -g4 -sg32 -(dp259 -g34 -(dp260 -g36 -S'NC_000002.11:g.95847037_95847050=' -p261 -sg38 -(dp262 -g40 -g41 -sg42 -g122 -sg44 -S'95847037' -p263 -sg46 -g122 -sssg54 -(dp264 -g36 -S'NC_000002.11:g.95847037_95847050=' -p265 -sg38 -(dp266 -g40 -g58 -sg42 -g122 -sg44 -S'95847037' -p267 -sg46 -g122 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant8.txt b/VariantValidator/testing/testOutputsMasterITS/variant8.txt deleted file mode 100644 index a9aa91a8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant8.txt +++ /dev/null @@ -1,60 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'base start position must be <= end position' -p7 -aS'Did you mean NM_000094.3:c.6751-3_6751-2del?' -p8 -asS'RefSeqGene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'HGVS_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_000094.3:c.6751-2_6751-3del' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'HGVS_LRG_variant' -p21 -g4 -sS'HGVS_transcript_variant' -p22 -g4 -sS'HGVS_RefSeqGene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -ssS'flag' -p26 -S'warning' -p27 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant80.txt b/VariantValidator/testing/testOutputsMasterITS/variant80.txt deleted file mode 100644 index fbb125f2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant80.txt +++ /dev/null @@ -1,556 +0,0 @@ -(dp0 -S'NM_001083585.1:c.*344_*368dup' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_001083585.1 with genome build GRCh37' -p7 -aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_001083585.1' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'A more recent version of the selected reference sequence NM_001083585.1 is available (NM_001083585.2)' -p11 -aS'NM_001083585.2:c.*344_*368dupTAGTGTTTGGAATTTTCTGTTCATA MUST be fully validated prior to use in reports' -p12 -aS'select_variants=NM_001083585.2:c.*344_*368dup' -p13 -aS'RefSeqGene record not available' -p14 -asS'RefSeqGene_context_intronic_sequence' -p15 -g4 -sS'alt_genomic_loci' -p16 -(lp17 -sS'transcript_description' -p18 -VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA -p19 -sS'gene_symbol' -p20 -S'RABEP1' -p21 -sS'HGVS_predicted_protein_consequence' -p22 -(dp23 -S'tlr' -p24 -S'NP_001077054.1:p.?' -p25 -sS'slr' -p26 -S'NP_001077054.1:p.?' -p27 -ssS'submitted_variant' -p28 -S'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' -p29 -sS'genome_context_intronic_sequence' -p30 -g4 -sS'HGVS_LRG_variant' -p31 -g4 -sS'HGVS_transcript_variant' -p32 -S'NM_001083585.1:c.*344_*368dup' -p33 -sS'HGVS_RefSeqGene_variant' -p34 -g4 -sS'primary_assembly_loci' -p35 -(dp36 -S'hg19' -p37 -(dp38 -S'HGVS_genomic_description' -p39 -S'NC_000017.10:g.5286857_5286915=' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'chr17' -p44 -sS'ref' -p45 -S'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA' -p46 -sS'pos' -p47 -S'5286857' -p48 -sS'alt' -p49 -g46 -sssS'GRCh37' -p50 -(dp51 -g39 -S'NC_000017.10:g.5286857_5286915=' -p52 -sg41 -(dp53 -g43 -S'17' -p54 -sg45 -g46 -sg47 -S'5286857' -p55 -sg49 -g46 -sssssS'NM_004703.5:c.*344_*368dup' -p56 -(dp57 -g3 -g4 -sg5 -(lp58 -S'The displayed variants may be artefacts of aligning NM_004703.5 with genome build GRCh37' -p59 -aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_004703.5' -p60 -aS'Caution should be used when reporting the displayed variant descriptions' -p61 -aS'If you are unsure, please contact admin' -p62 -aS'RefSeqGene record not available' -p63 -asg15 -g4 -sg16 -(lp64 -sg18 -VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA -p65 -sg20 -S'RABEP1' -p66 -sg22 -(dp67 -g24 -S'NP_004694.2:p.?' -p68 -sg26 -S'NP_004694.2:p.?' -p69 -ssg28 -g29 -sg30 -g4 -sg31 -g4 -sg32 -S'NM_004703.5:c.*344_*368dup' -p70 -sg34 -g4 -sg35 -(dp71 -g37 -(dp72 -g39 -S'NC_000017.10:g.5286857_5286915=' -p73 -sg41 -(dp74 -g43 -g44 -sg45 -g46 -sg47 -S'5286857' -p75 -sg49 -g46 -sssS'hg38' -p76 -(dp77 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p78 -sg41 -(dp79 -g43 -g44 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p80 -sg47 -S'5383567' -p81 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p82 -sssg50 -(dp83 -g39 -S'NC_000017.10:g.5286857_5286915=' -p84 -sg41 -(dp85 -g43 -g54 -sg45 -g46 -sg47 -S'5286857' -p86 -sg49 -g46 -sssS'GRCh38' -p87 -(dp88 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p89 -sg41 -(dp90 -g43 -g54 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p91 -sg47 -S'5383567' -p92 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p93 -sssssS'NM_004703.4:c.*344_*368dup' -p94 -(dp95 -g3 -g4 -sg5 -(lp96 -S'The displayed variants may be artefacts of aligning NM_004703.4 with genome build GRCh37' -p97 -aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_004703.4' -p98 -aS'Caution should be used when reporting the displayed variant descriptions' -p99 -aS'If you are unsure, please contact admin' -p100 -aS'A more recent version of the selected reference sequence NM_004703.4 is available (NM_004703.5)' -p101 -aS'NM_004703.5:c.*344_*368dupTAGTGTTTGGAATTTTCTGTTCATA MUST be fully validated prior to use in reports' -p102 -aS'select_variants=NM_004703.5:c.*344_*368dup' -p103 -aS'RefSeqGene record not available' -p104 -asg15 -g4 -sg16 -(lp105 -sg18 -VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA -p106 -sg20 -S'RABEP1' -p107 -sg22 -(dp108 -g24 -S'NP_004694.2:p.?' -p109 -sg26 -S'NP_004694.2:p.?' -p110 -ssg28 -g29 -sg30 -g4 -sg31 -g4 -sg32 -S'NM_004703.4:c.*344_*368dup' -p111 -sg34 -g4 -sg35 -(dp112 -g37 -(dp113 -g39 -S'NC_000017.10:g.5286857_5286915=' -p114 -sg41 -(dp115 -g43 -g44 -sg45 -g46 -sg47 -S'5286857' -p116 -sg49 -g46 -sssg50 -(dp117 -g39 -S'NC_000017.10:g.5286857_5286915=' -p118 -sg41 -(dp119 -g43 -g54 -sg45 -g46 -sg47 -S'5286857' -p120 -sg49 -g46 -sssssS'flag' -p121 -S'gene_variant' -p122 -sS'NM_001291581.1:c.*344_*368dup' -p123 -(dp124 -g3 -g4 -sg5 -(lp125 -S'The displayed variants may be artefacts of aligning NM_001291581.1 with genome build GRCh37' -p126 -aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_001291581.1' -p127 -aS'Caution should be used when reporting the displayed variant descriptions' -p128 -aS'If you are unsure, please contact admin' -p129 -aS'RefSeqGene record not available' -p130 -asg15 -g4 -sg16 -(lp131 -sg18 -VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 3, mRNA -p132 -sg20 -S'RABEP1' -p133 -sg22 -(dp134 -g24 -S'NP_001278510.1:p.?' -p135 -sg26 -S'NP_001278510.1:p.?' -p136 -ssg28 -g29 -sg30 -g4 -sg31 -g4 -sg32 -S'NM_001291581.1:c.*344_*368dup' -p137 -sg34 -g4 -sg35 -(dp138 -g37 -(dp139 -g39 -S'NC_000017.10:g.5286857_5286915=' -p140 -sg41 -(dp141 -g43 -g44 -sg45 -g46 -sg47 -S'5286857' -p142 -sg49 -g46 -sssg76 -(dp143 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p144 -sg41 -(dp145 -g43 -g44 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p146 -sg47 -S'5383567' -p147 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p148 -sssg50 -(dp149 -g39 -S'NC_000017.10:g.5286857_5286915=' -p150 -sg41 -(dp151 -g43 -g54 -sg45 -g46 -sg47 -S'5286857' -p152 -sg49 -g46 -sssg87 -(dp153 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p154 -sg41 -(dp155 -g43 -g54 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p156 -sg47 -S'5383567' -p157 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p158 -sssssS'NM_001083585.2:c.*344_*368dup' -p159 -(dp160 -g3 -g4 -sg5 -(lp161 -S'The displayed variants may be artefacts of aligning NM_001083585.2 with genome build GRCh37' -p162 -aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_001083585.2' -p163 -aS'Caution should be used when reporting the displayed variant descriptions' -p164 -aS'If you are unsure, please contact admin' -p165 -aS'RefSeqGene record not available' -p166 -asg15 -g4 -sg16 -(lp167 -sg18 -VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA -p168 -sg20 -S'RABEP1' -p169 -sg22 -(dp170 -g24 -S'NP_001077054.1:p.?' -p171 -sg26 -S'NP_001077054.1:p.?' -p172 -ssg28 -g29 -sg30 -g4 -sg31 -g4 -sg32 -S'NM_001083585.2:c.*344_*368dup' -p173 -sg34 -g4 -sg35 -(dp174 -g37 -(dp175 -g39 -S'NC_000017.10:g.5286857_5286915=' -p176 -sg41 -(dp177 -g43 -g44 -sg45 -g46 -sg47 -S'5286857' -p178 -sg49 -g46 -sssg76 -(dp179 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p180 -sg41 -(dp181 -g43 -g44 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p182 -sg47 -S'5383567' -p183 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p184 -sssg50 -(dp185 -g39 -S'NC_000017.10:g.5286857_5286915=' -p186 -sg41 -(dp187 -g43 -g54 -sg45 -g46 -sg47 -S'5286857' -p188 -sg49 -g46 -sssg87 -(dp189 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p190 -sg41 -(dp191 -g43 -g54 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p192 -sg47 -S'5383567' -p193 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p194 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant81.txt b/VariantValidator/testing/testOutputsMasterITS/variant81.txt deleted file mode 100644 index beac0895..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant81.txt +++ /dev/null @@ -1,237 +0,0 @@ -(dp0 -S'NM_001080423.3:c.1020del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_001080423.3 with genome build GRCh37' -p7 -aS'NM_001080423.3:c.1019_1022 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p16 -sS'gene_symbol' -p17 -S'GRIP2' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_001073892.3:p.(Ser341GlnfsTer4)' -p22 -sS'slr' -p23 -S'NP_001073892.3:p.(S341Qfs*4)' -p24 -ssS'submitted_variant' -p25 -S'NC_000003.11:g.14561629_14561630GC=' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'HGVS_LRG_variant' -p28 -g4 -sS'HGVS_transcript_variant' -p29 -S'NM_001080423.3:c.1020del' -p30 -sS'HGVS_RefSeqGene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'HGVS_genomic_description' -p36 -S'NC_000003.11:g.14561624_14561630=' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr3' -p41 -sS'ref' -p42 -S'CTGAGGC' -p43 -sS'pos' -p44 -S'14561624' -p45 -sS'alt' -p46 -g43 -sssS'hg38' -p47 -(dp48 -g36 -S'NC_000003.12:g.14520122del' -p49 -sg38 -(dp50 -g40 -g41 -sg42 -S'AG' -p51 -sg44 -S'14520119' -p52 -sg46 -S'A' -p53 -sssS'GRCh37' -p54 -(dp55 -g36 -S'NC_000003.11:g.14561624_14561630=' -p56 -sg38 -(dp57 -g40 -S'3' -p58 -sg42 -g43 -sg44 -S'14561624' -p59 -sg46 -g43 -sssS'GRCh38' -p60 -(dp61 -g36 -S'NC_000003.12:g.14520122del' -p62 -sg38 -(dp63 -g40 -g58 -sg42 -S'AG' -p64 -sg44 -S'14520119' -p65 -sg46 -g53 -sssssS'flag' -p66 -S'gene_variant' -p67 -sS'NM_001080423.2:c.1311del' -p68 -(dp69 -g3 -g4 -sg5 -(lp70 -S'The displayed variants may be artefacts of aligning NM_001080423.2 with genome build GRCh37' -p71 -aS'NM_001080423.2:c.1310_1313 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' -p72 -aS'Caution should be used when reporting the displayed variant descriptions' -p73 -aS'If you are unsure, please contact admin' -p74 -aS'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' -p75 -aS'NM_001080423.3:c.1311delG MUST be fully validated prior to use in reports' -p76 -aS'select_variants=NM_001080423.3:c.1311del' -p77 -aS'RefSeqGene record not available' -p78 -asg12 -g4 -sg13 -(lp79 -sg15 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p80 -sg17 -S'GRIP2' -p81 -sg19 -(dp82 -g21 -S'NP_001073892.2:p.(Ser438GlnfsTer4)' -p83 -sg23 -S'NP_001073892.2:p.(S438Qfs*4)' -p84 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001080423.2:c.1311del' -p85 -sg31 -g4 -sg32 -(dp86 -g34 -(dp87 -g36 -S'NC_000003.11:g.14561624_14561630=' -p88 -sg38 -(dp89 -g40 -g41 -sg42 -g43 -sg44 -S'14561624' -p90 -sg46 -g43 -sssg54 -(dp91 -g36 -S'NC_000003.11:g.14561624_14561630=' -p92 -sg38 -(dp93 -g40 -g58 -sg42 -g43 -sg44 -S'14561624' -p94 -sg46 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant82.txt b/VariantValidator/testing/testOutputsMasterITS/variant82.txt deleted file mode 100644 index 4bbe08e3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant82.txt +++ /dev/null @@ -1,223 +0,0 @@ -(dp0 -S'NM_001080423.3:c.1016_1020=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p12 -sS'gene_symbol' -p13 -S'GRIP2' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001073892.3:p.(Arg339=)' -p18 -sS'slr' -p19 -S'NP_001073892.3:p.(R339=)' -p20 -ssS'submitted_variant' -p21 -S'NC_000003.11:g.14561629_14561630insG' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_001080423.3:c.1016_1020=' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000003.11:g.14561629dup' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr3' -p37 -sS'ref' -p38 -S'G' -p39 -sS'pos' -p40 -S'14561628' -p41 -sS'alt' -p42 -VGG -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000003.12:g.14520120_14520124=' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -VGGGCC -p48 -sg40 -S'14520120' -p49 -sg42 -g48 -sssS'GRCh37' -p50 -(dp51 -g32 -S'NC_000003.11:g.14561629dup' -p52 -sg34 -(dp53 -g36 -S'3' -p54 -sg38 -g39 -sg40 -S'14561628' -p55 -sg42 -VGG -p56 -sssS'GRCh38' -p57 -(dp58 -g32 -S'NC_000003.12:g.14520120_14520124=' -p59 -sg34 -(dp60 -g36 -g54 -sg38 -g48 -sg40 -S'14520120' -p61 -sg42 -g48 -sssssS'flag' -p62 -S'gene_variant' -p63 -sS'NM_001080423.2:c.1307_1311=' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' -p67 -aS'NM_001080423.3:c.1307_1311delinsGGCCC MUST be fully validated prior to use in reports' -p68 -aS'select_variants=NM_001080423.3:c.1307_1311delinsGGCCC' -p69 -aS'RefSeqGene record not available' -p70 -asg8 -g4 -sg9 -(lp71 -sg11 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p72 -sg13 -S'GRIP2' -p73 -sg15 -(dp74 -g17 -S'NP_001073892.2:p.(Arg436=)' -p75 -sg19 -S'NP_001073892.2:p.(R436=)' -p76 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001080423.2:c.1307_1311=' -p77 -sg27 -g4 -sg28 -(dp78 -g30 -(dp79 -g32 -S'NC_000003.11:g.14561629dup' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -g39 -sg40 -S'14561628' -p82 -sg42 -VGG -p83 -sssg50 -(dp84 -g32 -S'NC_000003.11:g.14561629dup' -p85 -sg34 -(dp86 -g36 -g54 -sg38 -g39 -sg40 -S'14561628' -p87 -sg42 -VGG -p88 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant83.txt b/VariantValidator/testing/testOutputsMasterITS/variant83.txt deleted file mode 100644 index 619f514d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant83.txt +++ /dev/null @@ -1,232 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_018717.5:c.1515_1526del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p14 -sS'gene_symbol' -p15 -S'MAML3' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_061187.3:p.(Gln507_Gln510del)' -p20 -sS'slr' -p21 -S'NP_061187.3:p.(Q507_Q510del)' -p22 -ssS'submitted_variant' -p23 -S'NC_000004.11:g.140811111_140811122del' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_018717.5:c.1515_1526del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000004.11:g.140811111_140811122del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr4' -p39 -sS'ref' -p40 -S'TTGCTGCTGCTGC' -p41 -sS'pos' -p42 -S'140811063' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000004.11:g.140811111_140811122del' -p48 -sg36 -(dp49 -g38 -S'4' -p50 -sg40 -S'TTGCTGCTGCTGC' -p51 -sg42 -S'140811063' -p52 -sg44 -g45 -sssssS'NM_018717.4:c.1465_1469=' -p53 -(dp54 -g5 -g6 -sg7 -(lp55 -S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' -p56 -aS'NC_000004.11:g.140811063_140811075 contains 12 genomic base(s) that fail to align to transcript NM_018717.4' -p57 -aS'Caution should be used when reporting the displayed variant descriptions' -p58 -aS'If you are unsure, please contact admin' -p59 -aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' -p60 -aS'NM_018717.5:c.1465_1469CAACA= MUST be fully validated prior to use in reports' -p61 -aS'select_variants=NM_018717.5:c.1465_1469=' -p62 -aS'RefSeqGene record not available' -p63 -asg10 -g6 -sg11 -(lp64 -sg13 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p65 -sg15 -S'MAML3' -p66 -sg17 -(dp67 -g19 -S'NP_061187.2:p.(Gln489=)' -p68 -sg21 -S'NP_061187.2:p.(Q489=)' -p69 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_018717.4:c.1465_1469=' -p70 -sg29 -g6 -sg30 -(dp71 -g32 -(dp72 -g34 -S'NC_000004.11:g.140811111_140811122del' -p73 -sg36 -(dp74 -g38 -g39 -sg40 -S'TTGCTGCTGCTGC' -p75 -sg42 -S'140811063' -p76 -sg44 -g45 -sssS'hg38' -p77 -(dp78 -g34 -S'NC_000004.12:g.139889957_139889968del' -p79 -sg36 -(dp80 -g38 -g39 -sg40 -S'TTGCTGCTGCTGC' -p81 -sg42 -S'139889909' -p82 -sg44 -g45 -sssg46 -(dp83 -g34 -S'NC_000004.11:g.140811111_140811122del' -p84 -sg36 -(dp85 -g38 -g50 -sg40 -S'TTGCTGCTGCTGC' -p86 -sg42 -S'140811063' -p87 -sg44 -g45 -sssS'GRCh38' -p88 -(dp89 -g34 -S'NC_000004.12:g.139889957_139889968del' -p90 -sg36 -(dp91 -g38 -g50 -sg40 -S'TTGCTGCTGCTGC' -p92 -sg42 -S'139889909' -p93 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant84.txt b/VariantValidator/testing/testOutputsMasterITS/variant84.txt deleted file mode 100644 index ed34c0ef..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant84.txt +++ /dev/null @@ -1,228 +0,0 @@ -(dp0 -S'NM_018717.5:c.1468_1479=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p12 -sS'gene_symbol' -p13 -S'MAML3' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_061187.3:p.(Gln490=)' -p18 -sS'slr' -p19 -S'NP_061187.3:p.(Q490=)' -p20 -ssS'submitted_variant' -p21 -S'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_018717.5:c.1468_1479=' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000004.11:g.140811111_140811122=' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr4' -p37 -sS'ref' -p38 -VCTGCTGCTGCTG -p39 -sS'pos' -p40 -S'140811111' -p41 -sS'alt' -p42 -g39 -sssS'GRCh37' -p43 -(dp44 -g32 -S'NC_000004.11:g.140811111_140811122=' -p45 -sg34 -(dp46 -g36 -S'4' -p47 -sg38 -g39 -sg40 -S'140811111' -p48 -sg42 -g39 -sssssS'flag' -p49 -S'gene_variant' -p50 -sS'NM_018717.4:c.1503_1514dup' -p51 -(dp52 -g3 -g4 -sg5 -(lp53 -S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' -p54 -aS'NC_000004.11:g.140811063_140811075 contains 12 genomic base(s) that fail to align to transcript NM_018717.4' -p55 -aS'Caution should be used when reporting the displayed variant descriptions' -p56 -aS'If you are unsure, please contact admin' -p57 -aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' -p58 -aS'NM_018717.5:c.1503_1514dupGCAGCAGCAGCA MUST be fully validated prior to use in reports' -p59 -aS'select_variants=NM_018717.5:c.1503_1514dup' -p60 -aS'RefSeqGene record not available' -p61 -asg8 -g4 -sg9 -(lp62 -sg11 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p63 -sg13 -S'MAML3' -p64 -sg15 -(dp65 -g17 -S'NP_061187.2:p.(Gln503_Gln506dup)' -p66 -sg19 -S'NP_061187.2:p.(Q503_Q506dup)' -p67 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_018717.4:c.1503_1514dup' -p68 -sg27 -g4 -sg28 -(dp69 -g30 -(dp70 -g32 -S'NC_000004.11:g.140811095_140811128=' -p71 -sg34 -(dp72 -g36 -g37 -sg38 -S'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG' -p73 -sg40 -S'140811095' -p74 -sg42 -g73 -sssS'hg38' -p75 -(dp76 -g32 -S'NC_000004.12:g.139889941_139889974=' -p77 -sg34 -(dp78 -g36 -g37 -sg38 -S'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG' -p79 -sg40 -S'139889941' -p80 -sg42 -g79 -sssg43 -(dp81 -g32 -S'NC_000004.11:g.140811095_140811128=' -p82 -sg34 -(dp83 -g36 -g47 -sg38 -g73 -sg40 -S'140811095' -p84 -sg42 -g73 -sssS'GRCh38' -p85 -(dp86 -g32 -S'NC_000004.12:g.139889941_139889974=' -p87 -sg34 -(dp88 -g36 -g47 -sg38 -g79 -sg40 -S'139889941' -p89 -sg42 -g79 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant85.txt b/VariantValidator/testing/testOutputsMasterITS/variant85.txt deleted file mode 100644 index 4ef183ec..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant85.txt +++ /dev/null @@ -1,232 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_018717.5:c.1521_1526del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p14 -sS'gene_symbol' -p15 -S'MAML3' -p16 -sS'HGVS_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_061187.3:p.(Gln509_Gln510del)' -p20 -sS'slr' -p21 -S'NP_061187.3:p.(Q509_Q510del)' -p22 -ssS'submitted_variant' -p23 -S'NC_000004.11:g.140811117_140811122del' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'HGVS_LRG_variant' -p26 -g6 -sS'HGVS_transcript_variant' -p27 -S'NM_018717.5:c.1521_1526del' -p28 -sS'HGVS_RefSeqGene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'HGVS_genomic_description' -p34 -S'NC_000004.11:g.140811117_140811122del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr4' -p39 -sS'ref' -p40 -S'TTGCTGC' -p41 -sS'pos' -p42 -S'140811063' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'GRCh37' -p46 -(dp47 -g34 -S'NC_000004.11:g.140811117_140811122del' -p48 -sg36 -(dp49 -g38 -S'4' -p50 -sg40 -S'TTGCTGC' -p51 -sg42 -S'140811063' -p52 -sg44 -g45 -sssssS'NM_018717.4:c.1509_1514dup' -p53 -(dp54 -g5 -g6 -sg7 -(lp55 -S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' -p56 -aS'NC_000004.11:g.140811063 is one of 12 genomic base(s) that fail to align to transcript NM_018717.4 between positions c.1467_1468' -p57 -aS'Caution should be used when reporting the displayed variant descriptions' -p58 -aS'If you are unsure, please contact admin' -p59 -aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' -p60 -aS'NM_018717.5:c.1509_1514dupGCAGCA MUST be fully validated prior to use in reports' -p61 -aS'select_variants=NM_018717.5:c.1509_1514dup' -p62 -aS'RefSeqGene record not available' -p63 -asg10 -g6 -sg11 -(lp64 -sg13 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p65 -sg15 -S'MAML3' -p66 -sg17 -(dp67 -g19 -S'NP_061187.2:p.(Gln505_Gln506dup)' -p68 -sg21 -S'NP_061187.2:p.(Q505_Q506dup)' -p69 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_018717.4:c.1509_1514dup' -p70 -sg29 -g6 -sg30 -(dp71 -g32 -(dp72 -g34 -S'NC_000004.11:g.140811117_140811122del' -p73 -sg36 -(dp74 -g38 -g39 -sg40 -S'TTGCTGC' -p75 -sg42 -S'140811063' -p76 -sg44 -g45 -sssS'hg38' -p77 -(dp78 -g34 -S'NC_000004.12:g.139889963_139889968del' -p79 -sg36 -(dp80 -g38 -g39 -sg40 -S'TTGCTGC' -p81 -sg42 -S'139889909' -p82 -sg44 -g45 -sssg46 -(dp83 -g34 -S'NC_000004.11:g.140811117_140811122del' -p84 -sg36 -(dp85 -g38 -g50 -sg40 -S'TTGCTGC' -p86 -sg42 -S'140811063' -p87 -sg44 -g45 -sssS'GRCh38' -p88 -(dp89 -g34 -S'NC_000004.12:g.139889963_139889968del' -p90 -sg36 -(dp91 -g38 -g50 -sg40 -S'TTGCTGC' -p92 -sg42 -S'139889909' -p93 -sg44 -g45 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant86.txt b/VariantValidator/testing/testOutputsMasterITS/variant86.txt deleted file mode 100644 index 1196705d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant86.txt +++ /dev/null @@ -1,232 +0,0 @@ -(dp0 -S'NM_018717.5:c.1473_1479del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'RefSeqGene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p12 -sS'gene_symbol' -p13 -S'MAML3' -p14 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_061187.3:p.(Gln491HisfsTer29)' -p18 -sS'slr' -p19 -S'NP_061187.3:p.(Q491Hfs*29)' -p20 -ssS'submitted_variant' -p21 -S'NC_000004.11:g.140811111_140811117del' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'HGVS_LRG_variant' -p24 -g4 -sS'HGVS_transcript_variant' -p25 -S'NM_018717.5:c.1473_1479del' -p26 -sS'HGVS_RefSeqGene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'HGVS_genomic_description' -p32 -S'NC_000004.11:g.140811111_140811117del' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr4' -p37 -sS'ref' -p38 -S'GCTGCTGC' -p39 -sS'pos' -p40 -S'140811110' -p41 -sS'alt' -p42 -S'G' -p43 -sssS'GRCh37' -p44 -(dp45 -g32 -S'NC_000004.11:g.140811111_140811117del' -p46 -sg34 -(dp47 -g36 -S'4' -p48 -sg38 -S'GCTGCTGC' -p49 -sg40 -S'140811110' -p50 -sg42 -g43 -sssssS'flag' -p51 -S'gene_variant' -p52 -sS'NM_018717.4:c.1468_1472dup' -p53 -(dp54 -g3 -g4 -sg5 -(lp55 -S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' -p56 -aS'NC_000004.11:g.140811110 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_018717.4 between positions c.1467_1468' -p57 -aS'Caution should be used when reporting the displayed variant descriptions' -p58 -aS'If you are unsure, please contact admin' -p59 -aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' -p60 -aS'NM_018717.5:c.1468_1472dupCAGCA MUST be fully validated prior to use in reports' -p61 -aS'select_variants=NM_018717.5:c.1468_1472dup' -p62 -aS'RefSeqGene record not available' -p63 -asg8 -g4 -sg9 -(lp64 -sg11 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p65 -sg13 -S'MAML3' -p66 -sg15 -(dp67 -g17 -S'NP_061187.2:p.(Gln491HisfsTer29)' -p68 -sg19 -S'NP_061187.2:p.(Q491Hfs*29)' -p69 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_018717.4:c.1468_1472dup' -p70 -sg27 -g4 -sg28 -(dp71 -g30 -(dp72 -g32 -S'NC_000004.11:g.140811111_140811117del' -p73 -sg34 -(dp74 -g36 -g37 -sg38 -S'GCTGCTGC' -p75 -sg40 -S'140811110' -p76 -sg42 -g43 -sssS'hg38' -p77 -(dp78 -g32 -S'NC_000004.12:g.139889957_139889963del' -p79 -sg34 -(dp80 -g36 -g37 -sg38 -S'GCTGCTGC' -p81 -sg40 -S'139889956' -p82 -sg42 -g43 -sssg44 -(dp83 -g32 -S'NC_000004.11:g.140811111_140811117del' -p84 -sg34 -(dp85 -g36 -g48 -sg38 -S'GCTGCTGC' -p86 -sg40 -S'140811110' -p87 -sg42 -g43 -sssS'GRCh38' -p88 -(dp89 -g32 -S'NC_000004.12:g.139889957_139889963del' -p90 -sg34 -(dp91 -g36 -g48 -sg38 -S'GCTGCTGC' -p92 -sg40 -S'139889956' -p93 -sg42 -g43 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant87.txt b/VariantValidator/testing/testOutputsMasterITS/variant87.txt deleted file mode 100644 index e2d04e7a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant87.txt +++ /dev/null @@ -1,228 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' -p9 -aS'NC_000004.11:g.140811117 is one of 12 genomic base(s) that fail to align to transcript NM_018717.4 between positions c.1467_1468' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' -p13 -aS'NM_018717.5:c.1472_1473insTCAGCAGCAGCA MUST be fully validated prior to use in reports' -p14 -aS'select_variants=NM_018717.5:c.1472_1473insTCAGCAGCAGCA' -p15 -aS'RefSeqGene record not available' -p16 -asS'RefSeqGene_context_intronic_sequence' -p17 -g6 -sS'alt_genomic_loci' -p18 -(lp19 -sS'transcript_description' -p20 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p21 -sS'gene_symbol' -p22 -S'MAML3' -p23 -sS'HGVS_predicted_protein_consequence' -p24 -(dp25 -S'tlr' -p26 -S'NP_061187.2:p.(Gln490_Gln491insHisGlnGlnGln)' -p27 -sS'slr' -p28 -S'NP_061187.2:p.(Q490_Q491insHQQQ)' -p29 -ssS'submitted_variant' -p30 -S'NC_000004.11:g.140811117C>A' -p31 -sS'genome_context_intronic_sequence' -p32 -g6 -sS'HGVS_LRG_variant' -p33 -g6 -sS'HGVS_transcript_variant' -p34 -S'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' -p35 -sS'HGVS_RefSeqGene_variant' -p36 -g6 -sS'primary_assembly_loci' -p37 -(dp38 -S'hg19' -p39 -(dp40 -S'HGVS_genomic_description' -p41 -S'NC_000004.11:g.140811117C>A' -p42 -sS'vcf' -p43 -(dp44 -S'chr' -p45 -S'chr4' -p46 -sS'ref' -p47 -S'C' -p48 -sS'pos' -p49 -S'140811117' -p50 -sS'alt' -p51 -VA -p52 -sssS'hg38' -p53 -(dp54 -g41 -S'NC_000004.12:g.139889963C>A' -p55 -sg43 -(dp56 -g45 -g46 -sg47 -g48 -sg49 -S'139889963' -p57 -sg51 -g52 -sssS'GRCh37' -p58 -(dp59 -g41 -S'NC_000004.11:g.140811117C>A' -p60 -sg43 -(dp61 -g45 -S'4' -p62 -sg47 -g48 -sg49 -S'140811117' -p63 -sg51 -g52 -sssS'GRCh38' -p64 -(dp65 -g41 -S'NC_000004.12:g.139889963C>A' -p66 -sg43 -(dp67 -g45 -g62 -sg47 -g48 -sg49 -S'139889963' -p68 -sg51 -g52 -sssssS'NM_018717.5:c.1473G>T' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'RefSeqGene record not available' -p72 -asg17 -g6 -sg18 -(lp73 -sg20 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p74 -sg22 -S'MAML3' -p75 -sg24 -(dp76 -g26 -S'NP_061187.3:p.(Gln491His)' -p77 -sg28 -S'NP_061187.3:p.(Q491H)' -p78 -ssg30 -g31 -sg32 -g6 -sg33 -g6 -sg34 -S'NM_018717.5:c.1473G>T' -p79 -sg36 -g6 -sg37 -(dp80 -g39 -(dp81 -g41 -S'NC_000004.11:g.140811117C>A' -p82 -sg43 -(dp83 -g45 -g46 -sg47 -VC -p84 -sg49 -S'140811117' -p85 -sg51 -g52 -sssg58 -(dp86 -g41 -S'NC_000004.11:g.140811117C>A' -p87 -sg43 -(dp88 -g45 -g62 -sg47 -g84 -sg49 -S'140811117' -p89 -sg51 -g52 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant88.txt b/VariantValidator/testing/testOutputsMasterITS/variant88.txt deleted file mode 100644 index fff62e3f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant88.txt +++ /dev/null @@ -1,153 +0,0 @@ -(dp0 -S'NM_015120.4:c.1573_1579=' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_741t1:c.1573_1579=' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37' -p7 -aS'NM_015120.4:c.1573_1579 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -asS'RefSeqGene_context_intronic_sequence' -p11 -S'' -p12 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p16 -sS'gene_symbol' -p17 -S'ALMS1' -p18 -sS'HGVS_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_055935.4(LRG_741p1):p.(Ser525=)' -p22 -sS'slr' -p23 -S'NP_055935.4:p.(S525=)' -p24 -ssS'submitted_variant' -p25 -S'NC_000002.11:g.73675227_73675228insCTC' -p26 -sS'genome_context_intronic_sequence' -p27 -g12 -sS'HGVS_LRG_variant' -p28 -S'LRG_741:g.67345_67351=' -p29 -sS'HGVS_transcript_variant' -p30 -S'NM_015120.4:c.1573_1579=' -p31 -sS'HGVS_RefSeqGene_variant' -p32 -S'NG_011690.1:g.67345_67351=' -p33 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000002.11:g.73675228_73675230dup' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr2' -p43 -sS'ref' -p44 -S'CTC' -p45 -sS'pos' -p46 -S'73675228' -p47 -sS'alt' -p48 -VCTCCTC -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000002.12:g.73448097_73448103=' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -VTCTCCTC -p54 -sg46 -S'73448097' -p55 -sg48 -g54 -sssS'GRCh37' -p56 -(dp57 -g38 -S'NC_000002.11:g.73675228_73675230dup' -p58 -sg40 -(dp59 -g42 -S'2' -p60 -sg44 -S'CTC' -p61 -sg46 -S'73675228' -p62 -sg48 -VCTCCTC -p63 -sssS'GRCh38' -p64 -(dp65 -g38 -S'NC_000002.12:g.73448097_73448103=' -p66 -sg40 -(dp67 -g42 -g60 -sg44 -g54 -sg46 -S'73448097' -p68 -sg48 -g54 -sssssS'flag' -p69 -S'gene_variant' -p70 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant89.txt b/VariantValidator/testing/testOutputsMasterITS/variant89.txt deleted file mode 100644 index f0cd2bd9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant89.txt +++ /dev/null @@ -1,213 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.260_262=' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_792t1:c.260_262=' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000009.11:g.136132908T>TC automapped to NC_000009.11:g.136132908_136132909insC' -p9 -aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p10 -aS'NM_020469.2:c.260_262 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence' -p14 -asS'RefSeqGene_context_intronic_sequence' -p15 -S'' -p16 -sS'alt_genomic_loci' -p17 -(lp18 -(dp19 -S'GRCh37' -p20 -(dp21 -S'HGVS_genomic_description' -p22 -S'NW_003315925.1:g.83614_83616=' -p23 -sS'vcf' -p24 -(dp25 -S'chr' -p26 -S'HG79_PATCH' -p27 -sS'ref' -p28 -VTCA -p29 -sS'pos' -p30 -S'83614' -p31 -sS'alt' -p32 -g29 -sssa(dp33 -S'GRCh38' -p34 -(dp35 -g22 -S'NW_009646201.1:g.83614_83616=' -p36 -sg24 -(dp37 -g26 -S'HG2030_PATCH' -p38 -sg28 -VTCA -p39 -sg30 -S'83614' -p40 -sg32 -g39 -sssa(dp41 -S'hg38' -p42 -(dp43 -g22 -S'NW_009646201.1:g.83614_83616=' -p44 -sg24 -(dp45 -g26 -S'NW_009646201.1' -p46 -sg28 -g39 -sg30 -S'83614' -p47 -sg32 -g39 -sssasS'transcript_description' -p48 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p49 -sS'gene_symbol' -p50 -S'ABO' -p51 -sS'HGVS_predicted_protein_consequence' -p52 -(dp53 -S'tlr' -p54 -S'NP_065202.2(LRG_792p1):p.(Val87=)' -p55 -sS'slr' -p56 -S'NP_065202.2:p.(V87=)' -p57 -ssS'submitted_variant' -p58 -S'9-136132908-T-TC' -p59 -sS'genome_context_intronic_sequence' -p60 -g16 -sS'HGVS_LRG_variant' -p61 -S'LRG_792:g.20145_20147=' -p62 -sS'HGVS_transcript_variant' -p63 -S'NM_020469.2:c.260_262=' -p64 -sS'HGVS_RefSeqGene_variant' -p65 -S'NG_006669.1:g.20145_20147=' -p66 -sS'primary_assembly_loci' -p67 -(dp68 -S'hg19' -p69 -(dp70 -g22 -S'NC_000009.11:g.136132908_136132909insC' -p71 -sg24 -(dp72 -g26 -S'chr9' -p73 -sg28 -S'T' -p74 -sg30 -S'136132908' -p75 -sg32 -VTC -p76 -sssg42 -(dp77 -g22 -S'NC_000009.12:g.133257521_133257522insC' -p78 -sg24 -(dp79 -g26 -g73 -sg28 -g74 -sg30 -S'133257521' -p80 -sg32 -VTC -p81 -sssg20 -(dp82 -g22 -S'NC_000009.11:g.136132908_136132909insC' -p83 -sg24 -(dp84 -g26 -S'9' -p85 -sg28 -g74 -sg30 -S'136132908' -p86 -sg32 -VTC -p87 -sssg34 -(dp88 -g22 -S'NC_000009.12:g.133257521_133257522insC' -p89 -sg24 -(dp90 -g26 -g85 -sg28 -g74 -sg30 -S'133257521' -p91 -sg32 -VTC -p92 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant9.txt b/VariantValidator/testing/testOutputsMasterITS/variant9.txt deleted file mode 100644 index c9a58d71..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant9.txt +++ /dev/null @@ -1,62 +0,0 @@ -(dp0 -S'Validation_Warning_1' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'HGVS variant nomenclature does not allow the use of a gene symbol (COL5A1) in place of a valid reference sequence' -p7 -aS'Re-submit COL5A1:c.5071A>T and specify transcripts from the following' -p8 -aS'select_transcripts=NM_000093.4|NM_000093.3|NM_001278074.1' -p9 -asS'RefSeqGene_context_intronic_sequence' -p10 -g4 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -g4 -sS'gene_symbol' -p14 -g4 -sS'HGVS_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -g4 -sS'slr' -p18 -g4 -ssS'submitted_variant' -p19 -S'COL5A1:c.5071A>T' -p20 -sS'genome_context_intronic_sequence' -p21 -g4 -sS'HGVS_LRG_variant' -p22 -g4 -sS'HGVS_transcript_variant' -p23 -g4 -sS'HGVS_RefSeqGene_variant' -p24 -g4 -sS'primary_assembly_loci' -p25 -(dp26 -ssS'flag' -p27 -S'warning' -p28 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant90.txt b/VariantValidator/testing/testOutputsMasterITS/variant90.txt deleted file mode 100644 index 686f6bc2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant90.txt +++ /dev/null @@ -1,216 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.259del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_792t1:c.259del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000009.11:g.136132908TAC>TCA automapped to NC_000009.11:g.136132909_136132910delACinsCA' -p9 -aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p10 -aS'NM_020469.2:c.258_261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence' -p14 -asS'RefSeqGene_context_intronic_sequence' -p15 -S'' -p16 -sS'alt_genomic_loci' -p17 -(lp18 -(dp19 -S'GRCh37' -p20 -(dp21 -S'HGVS_genomic_description' -p22 -S'NW_003315925.1:g.83618del' -p23 -sS'vcf' -p24 -(dp25 -S'chr' -p26 -S'HG79_PATCH' -p27 -sS'ref' -p28 -S'AC' -p29 -sS'pos' -p30 -S'83616' -p31 -sS'alt' -p32 -S'A' -p33 -sssa(dp34 -S'GRCh38' -p35 -(dp36 -g22 -S'NW_009646201.1:g.83618del' -p37 -sg24 -(dp38 -g26 -S'HG2030_PATCH' -p39 -sg28 -S'AC' -p40 -sg30 -S'83616' -p41 -sg32 -g33 -sssa(dp42 -S'hg38' -p43 -(dp44 -g22 -S'NW_009646201.1:g.83618del' -p45 -sg24 -(dp46 -g26 -S'NW_009646201.1' -p47 -sg28 -S'AC' -p48 -sg30 -S'83616' -p49 -sg32 -g33 -sssasS'transcript_description' -p50 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p51 -sS'gene_symbol' -p52 -S'ABO' -p53 -sS'HGVS_predicted_protein_consequence' -p54 -(dp55 -S'tlr' -p56 -S'NP_065202.2(LRG_792p1):p.(Val87Ter)' -p57 -sS'slr' -p58 -S'NP_065202.2:p.(V87*)' -p59 -ssS'submitted_variant' -p60 -S'9-136132908-TAC-TCA' -p61 -sS'genome_context_intronic_sequence' -p62 -g16 -sS'HGVS_LRG_variant' -p63 -S'LRG_792:g.20144del' -p64 -sS'HGVS_transcript_variant' -p65 -S'NM_020469.2:c.259del' -p66 -sS'HGVS_RefSeqGene_variant' -p67 -S'NG_006669.1:g.20144del' -p68 -sS'primary_assembly_loci' -p69 -(dp70 -S'hg19' -p71 -(dp72 -g22 -S'NC_000009.11:g.136132909_136132910delinsCA' -p73 -sg24 -(dp74 -g26 -S'chr9' -p75 -sg28 -S'AC' -p76 -sg30 -S'136132909' -p77 -sg32 -VCA -p78 -sssg43 -(dp79 -g22 -S'NC_000009.12:g.133257522_133257523delinsCA' -p80 -sg24 -(dp81 -g26 -g75 -sg28 -S'AC' -p82 -sg30 -S'133257522' -p83 -sg32 -VCA -p84 -sssg20 -(dp85 -g22 -S'NC_000009.11:g.136132909_136132910delinsCA' -p86 -sg24 -(dp87 -g26 -S'9' -p88 -sg28 -S'AC' -p89 -sg30 -S'136132909' -p90 -sg32 -g78 -sssg35 -(dp91 -g22 -S'NC_000009.12:g.133257522_133257523delinsCA' -p92 -sg24 -(dp93 -g26 -g88 -sg28 -S'AC' -p94 -sg30 -S'133257522' -p95 -sg32 -g84 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant91.txt b/VariantValidator/testing/testOutputsMasterITS/variant91.txt deleted file mode 100644 index 72b62aaa..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant91.txt +++ /dev/null @@ -1,212 +0,0 @@ -(dp0 -S'NM_020469.2:c.261del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_792t1:c.261del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000009.11:g.136132908TA>TA automapped to NC_000009.11:g.136132908_136132909TA=' -p7 -aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p8 -aS'NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence' -p12 -asS'RefSeqGene_context_intronic_sequence' -p13 -S'' -p14 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'GRCh37' -p18 -(dp19 -S'HGVS_genomic_description' -p20 -S'NW_003315925.1:g.83615del' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG79_PATCH' -p25 -sS'ref' -p26 -S'TC' -p27 -sS'pos' -p28 -S'83614' -p29 -sS'alt' -p30 -S'T' -p31 -sssa(dp32 -S'GRCh38' -p33 -(dp34 -g20 -S'NW_009646201.1:g.83615del' -p35 -sg22 -(dp36 -g24 -S'HG2030_PATCH' -p37 -sg26 -S'TC' -p38 -sg28 -S'83614' -p39 -sg30 -g31 -sssa(dp40 -S'hg38' -p41 -(dp42 -g20 -S'NW_009646201.1:g.83615del' -p43 -sg22 -(dp44 -g24 -S'NW_009646201.1' -p45 -sg26 -S'TC' -p46 -sg28 -S'83614' -p47 -sg30 -g31 -sssasS'transcript_description' -p48 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p49 -sS'gene_symbol' -p50 -S'ABO' -p51 -sS'HGVS_predicted_protein_consequence' -p52 -(dp53 -S'tlr' -p54 -S'NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)' -p55 -sS'slr' -p56 -S'NP_065202.2:p.(T88Pfs*31)' -p57 -ssS'submitted_variant' -p58 -S'9-136132908-TA-TA' -p59 -sS'genome_context_intronic_sequence' -p60 -g14 -sS'HGVS_LRG_variant' -p61 -S'LRG_792:g.20146del' -p62 -sS'HGVS_transcript_variant' -p63 -S'NM_020469.2:c.261del' -p64 -sS'HGVS_RefSeqGene_variant' -p65 -S'NG_006669.1:g.20146del' -p66 -sS'primary_assembly_loci' -p67 -(dp68 -S'hg19' -p69 -(dp70 -g20 -S'NC_000009.11:g.136132908_136132909=' -p71 -sg22 -(dp72 -g24 -S'chr9' -p73 -sg26 -S'TA' -p74 -sg28 -S'136132908' -p75 -sg30 -g74 -sssg41 -(dp76 -g20 -S'NC_000009.12:g.133257521_133257522=' -p77 -sg22 -(dp78 -g24 -g73 -sg26 -S'TA' -p79 -sg28 -S'133257521' -p80 -sg30 -g79 -sssg18 -(dp81 -g20 -S'NC_000009.11:g.136132908_136132909=' -p82 -sg22 -(dp83 -g24 -S'9' -p84 -sg26 -g74 -sg28 -S'136132908' -p85 -sg30 -g74 -sssg33 -(dp86 -g20 -S'NC_000009.12:g.133257521_133257522=' -p87 -sg22 -(dp88 -g24 -g84 -sg26 -g79 -sg28 -S'133257521' -p89 -sg30 -g79 -sssssS'flag' -p90 -S'gene_variant' -p91 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant92.txt b/VariantValidator/testing/testOutputsMasterITS/variant92.txt deleted file mode 100644 index 7a8b06e5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant92.txt +++ /dev/null @@ -1,216 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.259del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_792t1:c.259del' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_020469.2:c.258delG automapped to NM_020469.2:c.259delG' -p9 -aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p10 -aS'NM_020469.2:c.258_261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence' -p14 -asS'RefSeqGene_context_intronic_sequence' -p15 -S'' -p16 -sS'alt_genomic_loci' -p17 -(lp18 -(dp19 -S'GRCh37' -p20 -(dp21 -S'HGVS_genomic_description' -p22 -S'NW_003315925.1:g.83618del' -p23 -sS'vcf' -p24 -(dp25 -S'chr' -p26 -S'HG79_PATCH' -p27 -sS'ref' -p28 -S'AC' -p29 -sS'pos' -p30 -S'83616' -p31 -sS'alt' -p32 -S'A' -p33 -sssa(dp34 -S'GRCh38' -p35 -(dp36 -g22 -S'NW_009646201.1:g.83618del' -p37 -sg24 -(dp38 -g26 -S'HG2030_PATCH' -p39 -sg28 -S'AC' -p40 -sg30 -S'83616' -p41 -sg32 -g33 -sssa(dp42 -S'hg38' -p43 -(dp44 -g22 -S'NW_009646201.1:g.83618del' -p45 -sg24 -(dp46 -g26 -S'NW_009646201.1' -p47 -sg28 -S'AC' -p48 -sg30 -S'83616' -p49 -sg32 -g33 -sssasS'transcript_description' -p50 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p51 -sS'gene_symbol' -p52 -S'ABO' -p53 -sS'HGVS_predicted_protein_consequence' -p54 -(dp55 -S'tlr' -p56 -S'NP_065202.2(LRG_792p1):p.(Val87Ter)' -p57 -sS'slr' -p58 -S'NP_065202.2:p.(V87*)' -p59 -ssS'submitted_variant' -p60 -S'NM_020469.2:c.258delG' -p61 -sS'genome_context_intronic_sequence' -p62 -g16 -sS'HGVS_LRG_variant' -p63 -S'LRG_792:g.20144del' -p64 -sS'HGVS_transcript_variant' -p65 -S'NM_020469.2:c.259del' -p66 -sS'HGVS_RefSeqGene_variant' -p67 -S'NG_006669.1:g.20144del' -p68 -sS'primary_assembly_loci' -p69 -(dp70 -S'hg19' -p71 -(dp72 -g22 -S'NC_000009.11:g.136132909_136132910delinsCA' -p73 -sg24 -(dp74 -g26 -S'chr9' -p75 -sg28 -S'AC' -p76 -sg30 -S'136132909' -p77 -sg32 -VCA -p78 -sssg43 -(dp79 -g22 -S'NC_000009.12:g.133257522_133257523delinsCA' -p80 -sg24 -(dp81 -g26 -g75 -sg28 -S'AC' -p82 -sg30 -S'133257522' -p83 -sg32 -VCA -p84 -sssg20 -(dp85 -g22 -S'NC_000009.11:g.136132909_136132910delinsCA' -p86 -sg24 -(dp87 -g26 -S'9' -p88 -sg28 -S'AC' -p89 -sg30 -S'136132909' -p90 -sg32 -g78 -sssg35 -(dp91 -g22 -S'NC_000009.12:g.133257522_133257523delinsCA' -p92 -sg24 -(dp93 -g26 -g88 -sg28 -S'AC' -p94 -sg30 -S'133257522' -p95 -sg32 -g84 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant93.txt b/VariantValidator/testing/testOutputsMasterITS/variant93.txt deleted file mode 100644 index 6f6375f4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant93.txt +++ /dev/null @@ -1,211 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.260_262=' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_792t1:c.260_262=' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p9 -aS'NM_020469.2:c.260_262 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -S'' -p15 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'GRCh37' -p19 -(dp20 -S'HGVS_genomic_description' -p21 -S'NW_003315925.1:g.83614_83616=' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG79_PATCH' -p26 -sS'ref' -p27 -VTCA -p28 -sS'pos' -p29 -S'83614' -p30 -sS'alt' -p31 -g28 -sssa(dp32 -S'GRCh38' -p33 -(dp34 -g21 -S'NW_009646201.1:g.83614_83616=' -p35 -sg23 -(dp36 -g25 -S'HG2030_PATCH' -p37 -sg27 -VTCA -p38 -sg29 -S'83614' -p39 -sg31 -g38 -sssa(dp40 -S'hg38' -p41 -(dp42 -g21 -S'NW_009646201.1:g.83614_83616=' -p43 -sg23 -(dp44 -g25 -S'NW_009646201.1' -p45 -sg27 -g38 -sg29 -S'83614' -p46 -sg31 -g38 -sssasS'transcript_description' -p47 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p48 -sS'gene_symbol' -p49 -S'ABO' -p50 -sS'HGVS_predicted_protein_consequence' -p51 -(dp52 -S'tlr' -p53 -S'NP_065202.2(LRG_792p1):p.(Val87=)' -p54 -sS'slr' -p55 -S'NP_065202.2:p.(V87=)' -p56 -ssS'submitted_variant' -p57 -S'NM_020469.2:c.260_262TGA=' -p58 -sS'genome_context_intronic_sequence' -p59 -g15 -sS'HGVS_LRG_variant' -p60 -S'LRG_792:g.20145_20147=' -p61 -sS'HGVS_transcript_variant' -p62 -S'NM_020469.2:c.260_262=' -p63 -sS'HGVS_RefSeqGene_variant' -p64 -S'NG_006669.1:g.20145_20147=' -p65 -sS'primary_assembly_loci' -p66 -(dp67 -S'hg19' -p68 -(dp69 -g21 -S'NC_000009.11:g.136132908_136132909insC' -p70 -sg23 -(dp71 -g25 -S'chr9' -p72 -sg27 -S'T' -p73 -sg29 -S'136132908' -p74 -sg31 -VTC -p75 -sssg41 -(dp76 -g21 -S'NC_000009.12:g.133257521_133257522insC' -p77 -sg23 -(dp78 -g25 -g72 -sg27 -g73 -sg29 -S'133257521' -p79 -sg31 -VTC -p80 -sssg19 -(dp81 -g21 -S'NC_000009.11:g.136132908_136132909insC' -p82 -sg23 -(dp83 -g25 -S'9' -p84 -sg27 -g73 -sg29 -S'136132908' -p85 -sg31 -VTC -p86 -sssg33 -(dp87 -g21 -S'NC_000009.12:g.133257521_133257522insC' -p88 -sg23 -(dp89 -g25 -g84 -sg27 -g73 -sg29 -S'133257521' -p90 -sg31 -VTC -p91 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant94.txt b/VariantValidator/testing/testOutputsMasterITS/variant94.txt deleted file mode 100644 index 7d495086..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant94.txt +++ /dev/null @@ -1,210 +0,0 @@ -(dp0 -S'NM_020469.2:c.261del' -p1 -(dp2 -S'HGVS_LRG_transcript_variant' -p3 -S'LRG_792t1:c.261del' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p7 -aS'NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence' -p11 -asS'RefSeqGene_context_intronic_sequence' -p12 -S'' -p13 -sS'alt_genomic_loci' -p14 -(lp15 -(dp16 -S'GRCh37' -p17 -(dp18 -S'HGVS_genomic_description' -p19 -S'NW_003315925.1:g.83615del' -p20 -sS'vcf' -p21 -(dp22 -S'chr' -p23 -S'HG79_PATCH' -p24 -sS'ref' -p25 -S'TC' -p26 -sS'pos' -p27 -S'83614' -p28 -sS'alt' -p29 -S'T' -p30 -sssa(dp31 -S'GRCh38' -p32 -(dp33 -g19 -S'NW_009646201.1:g.83615del' -p34 -sg21 -(dp35 -g23 -S'HG2030_PATCH' -p36 -sg25 -S'TC' -p37 -sg27 -S'83614' -p38 -sg29 -g30 -sssa(dp39 -S'hg38' -p40 -(dp41 -g19 -S'NW_009646201.1:g.83615del' -p42 -sg21 -(dp43 -g23 -S'NW_009646201.1' -p44 -sg25 -S'TC' -p45 -sg27 -S'83614' -p46 -sg29 -g30 -sssasS'transcript_description' -p47 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p48 -sS'gene_symbol' -p49 -S'ABO' -p50 -sS'HGVS_predicted_protein_consequence' -p51 -(dp52 -S'tlr' -p53 -S'NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)' -p54 -sS'slr' -p55 -S'NP_065202.2:p.(T88Pfs*31)' -p56 -ssS'submitted_variant' -p57 -S'NM_020469.2:c.261delG' -p58 -sS'genome_context_intronic_sequence' -p59 -g13 -sS'HGVS_LRG_variant' -p60 -S'LRG_792:g.20146del' -p61 -sS'HGVS_transcript_variant' -p62 -S'NM_020469.2:c.261del' -p63 -sS'HGVS_RefSeqGene_variant' -p64 -S'NG_006669.1:g.20146del' -p65 -sS'primary_assembly_loci' -p66 -(dp67 -S'hg19' -p68 -(dp69 -g19 -S'NC_000009.11:g.136132908_136132909=' -p70 -sg21 -(dp71 -g23 -S'chr9' -p72 -sg25 -S'TA' -p73 -sg27 -S'136132908' -p74 -sg29 -g73 -sssg40 -(dp75 -g19 -S'NC_000009.12:g.133257521_133257522=' -p76 -sg21 -(dp77 -g23 -g72 -sg25 -S'TA' -p78 -sg27 -S'133257521' -p79 -sg29 -g78 -sssg17 -(dp80 -g19 -S'NC_000009.11:g.136132908_136132909=' -p81 -sg21 -(dp82 -g23 -S'9' -p83 -sg25 -g73 -sg27 -S'136132908' -p84 -sg29 -g73 -sssg32 -(dp85 -g19 -S'NC_000009.12:g.133257521_133257522=' -p86 -sg21 -(dp87 -g23 -g83 -sg25 -g78 -sg27 -S'133257521' -p88 -sg29 -g78 -sssssS'flag' -p89 -S'gene_variant' -p90 -s. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant95.txt b/VariantValidator/testing/testOutputsMasterITS/variant95.txt deleted file mode 100644 index 10e71526..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant95.txt +++ /dev/null @@ -1,213 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.261dup' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_792t1:c.261dup' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p9 -aS'NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -S'' -p15 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'GRCh37' -p19 -(dp20 -S'HGVS_genomic_description' -p21 -S'NW_003315925.1:g.83615dup' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG79_PATCH' -p26 -sS'ref' -p27 -S'C' -p28 -sS'pos' -p29 -S'83615' -p30 -sS'alt' -p31 -VCC -p32 -sssa(dp33 -S'GRCh38' -p34 -(dp35 -g21 -S'NW_009646201.1:g.83615dup' -p36 -sg23 -(dp37 -g25 -S'HG2030_PATCH' -p38 -sg27 -g28 -sg29 -S'83615' -p39 -sg31 -VCC -p40 -sssa(dp41 -S'hg38' -p42 -(dp43 -g21 -S'NW_009646201.1:g.83615dup' -p44 -sg23 -(dp45 -g25 -S'NW_009646201.1' -p46 -sg27 -g28 -sg29 -S'83615' -p47 -sg31 -VCC -p48 -sssasS'transcript_description' -p49 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p50 -sS'gene_symbol' -p51 -S'ABO' -p52 -sS'HGVS_predicted_protein_consequence' -p53 -(dp54 -S'tlr' -p55 -S'NP_065202.2(LRG_792p1):p.(Thr88AspfsTer107)' -p56 -sS'slr' -p57 -S'NP_065202.2:p.(T88Dfs*107)' -p58 -ssS'submitted_variant' -p59 -S'NM_020469.2:c.261dupG' -p60 -sS'genome_context_intronic_sequence' -p61 -g15 -sS'HGVS_LRG_variant' -p62 -S'LRG_792:g.20146dup' -p63 -sS'HGVS_transcript_variant' -p64 -S'NM_020469.2:c.261dup' -p65 -sS'HGVS_RefSeqGene_variant' -p66 -S'NG_006669.1:g.20146dup' -p67 -sS'primary_assembly_loci' -p68 -(dp69 -S'hg19' -p70 -(dp71 -g21 -S'NC_000009.11:g.136132908_136132909insCC' -p72 -sg23 -(dp73 -g25 -S'chr9' -p74 -sg27 -S'T' -p75 -sg29 -S'136132908' -p76 -sg31 -VTCC -p77 -sssg42 -(dp78 -g21 -S'NC_000009.12:g.133257521_133257522insCC' -p79 -sg23 -(dp80 -g25 -g74 -sg27 -g75 -sg29 -S'133257521' -p81 -sg31 -VTCC -p82 -sssg19 -(dp83 -g21 -S'NC_000009.11:g.136132908_136132909insCC' -p84 -sg23 -(dp85 -g25 -S'9' -p86 -sg27 -g75 -sg29 -S'136132908' -p87 -sg31 -VTCC -p88 -sssg34 -(dp89 -g21 -S'NC_000009.12:g.133257521_133257522insCC' -p90 -sg23 -(dp91 -g25 -g86 -sg27 -g75 -sg29 -S'133257521' -p92 -sg31 -VTCC -p93 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant96.txt b/VariantValidator/testing/testOutputsMasterITS/variant96.txt deleted file mode 100644 index 95c40c91..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant96.txt +++ /dev/null @@ -1,212 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.261_262insTT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'LRG_792t1:c.261_262insTT' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p9 -aS'NM_020469.2:c.261_262 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'The current status of LRG_792 is pending therefore changes may be made to the LRG reference sequence' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -S'' -p15 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'GRCh37' -p19 -(dp20 -S'HGVS_genomic_description' -p21 -S'NW_003315925.1:g.83614_83615insAA' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG79_PATCH' -p26 -sS'ref' -p27 -S'T' -p28 -sS'pos' -p29 -S'83614' -p30 -sS'alt' -p31 -VTAA -p32 -sssa(dp33 -S'GRCh38' -p34 -(dp35 -g21 -S'NW_009646201.1:g.83614_83615insAA' -p36 -sg23 -(dp37 -g25 -S'HG2030_PATCH' -p38 -sg27 -g28 -sg29 -S'83614' -p39 -sg31 -VTAA -p40 -sssa(dp41 -S'hg38' -p42 -(dp43 -g21 -S'NW_009646201.1:g.83614_83615insAA' -p44 -sg23 -(dp45 -g25 -S'NW_009646201.1' -p46 -sg27 -g28 -sg29 -S'83614' -p47 -sg31 -VTAA -p48 -sssasS'transcript_description' -p49 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p50 -sS'gene_symbol' -p51 -S'ABO' -p52 -sS'HGVS_predicted_protein_consequence' -p53 -(dp54 -S'tlr' -p55 -S'NP_065202.2(LRG_792p1):p.(Thr88LeufsTer32)' -p56 -sS'slr' -p57 -S'NP_065202.2:p.(T88Lfs*32)' -p58 -ssS'submitted_variant' -p59 -S'NM_020469.2:c.261_262insTT' -p60 -sS'genome_context_intronic_sequence' -p61 -g15 -sS'HGVS_LRG_variant' -p62 -S'LRG_792:g.20146_20147insTT' -p63 -sS'HGVS_transcript_variant' -p64 -S'NM_020469.2:c.261_262insTT' -p65 -sS'HGVS_RefSeqGene_variant' -p66 -S'NG_006669.1:g.20146_20147insTT' -p67 -sS'primary_assembly_loci' -p68 -(dp69 -S'hg19' -p70 -(dp71 -g21 -S'NC_000009.11:g.136132909_136132910insACA' -p72 -sg23 -(dp73 -g25 -S'chr9' -p74 -sg27 -g28 -sg29 -S'136132908' -p75 -sg31 -VTAAC -p76 -sssg42 -(dp77 -g21 -S'NC_000009.12:g.133257522_133257523insACA' -p78 -sg23 -(dp79 -g25 -g74 -sg27 -g28 -sg29 -S'133257521' -p80 -sg31 -VTAAC -p81 -sssg19 -(dp82 -g21 -S'NC_000009.11:g.136132909_136132910insACA' -p83 -sg23 -(dp84 -g25 -S'9' -p85 -sg27 -g28 -sg29 -S'136132908' -p86 -sg31 -VTAAC -p87 -sssg34 -(dp88 -g21 -S'NC_000009.12:g.133257522_133257523insACA' -p89 -sg23 -(dp90 -g25 -g85 -sg27 -g28 -sg29 -S'133257521' -p91 -sg31 -VTAAC -p92 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant97.txt b/VariantValidator/testing/testOutputsMasterITS/variant97.txt deleted file mode 100644 index 685543c0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant97.txt +++ /dev/null @@ -1,268 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_007121.5:c.515A>T' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p9 -aS'NM_007121.5:c.514_515 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p18 -sS'gene_symbol' -p19 -S'NR1H2' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_009052.3:p.(Lys172Ile)' -p24 -sS'slr' -p25 -S'NP_009052.3:p.(K172I)' -p26 -ssS'submitted_variant' -p27 -S'NC_000019.10:g.50378563_50378564insTAC' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_007121.5:c.515A>T' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'GRCh38' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000019.10:g.50378563_50378564insTAC' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'19' -p43 -sS'ref' -p44 -S'A' -p45 -sS'pos' -p46 -S'50378563' -p47 -sS'alt' -p48 -S'ATAC' -p49 -sssS'GRCh37' -p50 -(dp51 -g38 -S'NC_000019.9:g.50881820_50881821insTAC' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -g45 -sg46 -S'50881820' -p54 -sg48 -S'ATAC' -p55 -sssS'hg38' -p56 -(dp57 -g38 -S'NC_000019.10:g.50378563_50378564insTAC' -p58 -sg40 -(dp59 -g42 -S'chr19' -p60 -sg44 -g45 -sg46 -S'50378563' -p61 -sg48 -S'ATAC' -p62 -sssS'hg19' -p63 -(dp64 -g38 -S'NC_000019.9:g.50881820_50881821insTAC' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -g45 -sg46 -S'50881820' -p67 -sg48 -S'ATAC' -p68 -sssssS'NM_001256647.1:c.224A>T' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p72 -aS'NM_001256647.1:c.223_224 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p73 -aS'Caution should be used when reporting the displayed variant descriptions' -p74 -aS'If you are unsure, please contact admin' -p75 -aS'RefSeqGene record not available' -p76 -asg14 -g6 -sg15 -(lp77 -sg17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p78 -sg19 -S'NR1H2' -p79 -sg21 -(dp80 -g23 -S'NP_001243576.1:p.(Lys75Ile)' -p81 -sg25 -S'NP_001243576.1:p.(K75I)' -p82 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_001256647.1:c.224A>T' -p83 -sg33 -g6 -sg34 -(dp84 -g36 -(dp85 -g38 -S'NC_000019.10:g.50378563_50378564insTAC' -p86 -sg40 -(dp87 -g42 -g43 -sg44 -g45 -sg46 -S'50378563' -p88 -sg48 -S'ATAC' -p89 -sssg50 -(dp90 -g38 -S'NC_000019.9:g.50881820_50881821insTAC' -p91 -sg40 -(dp92 -g42 -g43 -sg44 -g45 -sg46 -S'50881820' -p93 -sg48 -S'ATAC' -p94 -sssg56 -(dp95 -g38 -S'NC_000019.10:g.50378563_50378564insTAC' -p96 -sg40 -(dp97 -g42 -g60 -sg44 -g45 -sg46 -S'50378563' -p98 -sg48 -S'ATAC' -p99 -sssg63 -(dp100 -g38 -S'NC_000019.9:g.50881820_50881821insTAC' -p101 -sg40 -(dp102 -g42 -g60 -sg44 -g45 -sg46 -S'50881820' -p103 -sg48 -S'ATAC' -p104 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant98.txt b/VariantValidator/testing/testOutputsMasterITS/variant98.txt deleted file mode 100644 index add0aa61..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant98.txt +++ /dev/null @@ -1,268 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_007121.5:c.515_516del' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p9 -aS'NM_007121.5:c.514_515 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p18 -sS'gene_symbol' -p19 -S'NR1H2' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_009052.3:p.(Lys172ThrfsTer34)' -p24 -sS'slr' -p25 -S'NP_009052.3:p.(K172Tfs*34)' -p26 -ssS'submitted_variant' -p27 -S'NC_000019.10:g.50378563_50378564insC' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_007121.5:c.515_516del' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'GRCh38' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000019.10:g.50378563_50378564insC' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'19' -p43 -sS'ref' -p44 -S'A' -p45 -sS'pos' -p46 -S'50378563' -p47 -sS'alt' -p48 -S'AC' -p49 -sssS'GRCh37' -p50 -(dp51 -g38 -S'NC_000019.9:g.50881820_50881821insC' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -g45 -sg46 -S'50881820' -p54 -sg48 -S'AC' -p55 -sssS'hg38' -p56 -(dp57 -g38 -S'NC_000019.10:g.50378563_50378564insC' -p58 -sg40 -(dp59 -g42 -S'chr19' -p60 -sg44 -g45 -sg46 -S'50378563' -p61 -sg48 -S'AC' -p62 -sssS'hg19' -p63 -(dp64 -g38 -S'NC_000019.9:g.50881820_50881821insC' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -g45 -sg46 -S'50881820' -p67 -sg48 -S'AC' -p68 -sssssS'NM_001256647.1:c.224_225del' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p72 -aS'NM_001256647.1:c.223_224 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p73 -aS'Caution should be used when reporting the displayed variant descriptions' -p74 -aS'If you are unsure, please contact admin' -p75 -aS'RefSeqGene record not available' -p76 -asg14 -g6 -sg15 -(lp77 -sg17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p78 -sg19 -S'NR1H2' -p79 -sg21 -(dp80 -g23 -S'NP_001243576.1:p.(Lys75ThrfsTer34)' -p81 -sg25 -S'NP_001243576.1:p.(K75Tfs*34)' -p82 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_001256647.1:c.224_225del' -p83 -sg33 -g6 -sg34 -(dp84 -g36 -(dp85 -g38 -S'NC_000019.10:g.50378563_50378564insC' -p86 -sg40 -(dp87 -g42 -g43 -sg44 -g45 -sg46 -S'50378563' -p88 -sg48 -S'AC' -p89 -sssg50 -(dp90 -g38 -S'NC_000019.9:g.50881820_50881821insC' -p91 -sg40 -(dp92 -g42 -g43 -sg44 -g45 -sg46 -S'50881820' -p93 -sg48 -S'AC' -p94 -sssg56 -(dp95 -g38 -S'NC_000019.10:g.50378563_50378564insC' -p96 -sg40 -(dp97 -g42 -g60 -sg44 -g45 -sg46 -S'50378563' -p98 -sg48 -S'AC' -p99 -sssg63 -(dp100 -g38 -S'NC_000019.9:g.50881820_50881821insC' -p101 -sg40 -(dp102 -g42 -g60 -sg44 -g45 -sg46 -S'50881820' -p103 -sg48 -S'AC' -p104 -sssss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant99.txt b/VariantValidator/testing/testOutputsMasterITS/variant99.txt deleted file mode 100644 index 3af3520f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant99.txt +++ /dev/null @@ -1,268 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_007121.5:c.515_516insT' -p3 -(dp4 -S'HGVS_LRG_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p9 -aS'NM_007121.5:c.514_515 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'RefSeqGene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p18 -sS'gene_symbol' -p19 -S'NR1H2' -p20 -sS'HGVS_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_009052.3:p.(Lys172AsnfsTer35)' -p24 -sS'slr' -p25 -S'NP_009052.3:p.(K172Nfs*35)' -p26 -ssS'submitted_variant' -p27 -S'NC_000019.10:g.50378564_50378565insTACA' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'HGVS_LRG_variant' -p30 -g6 -sS'HGVS_transcript_variant' -p31 -S'NM_007121.5:c.515_516insT' -p32 -sS'HGVS_RefSeqGene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'GRCh38' -p36 -(dp37 -S'HGVS_genomic_description' -p38 -S'NC_000019.10:g.50378564_50378565insTACA' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'19' -p43 -sS'ref' -p44 -S'A' -p45 -sS'pos' -p46 -S'50378563' -p47 -sS'alt' -p48 -S'AATAC' -p49 -sssS'GRCh37' -p50 -(dp51 -g38 -S'NC_000019.9:g.50881821_50881822insTACA' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -g45 -sg46 -S'50881820' -p54 -sg48 -S'AATAC' -p55 -sssS'hg38' -p56 -(dp57 -g38 -S'NC_000019.10:g.50378564_50378565insTACA' -p58 -sg40 -(dp59 -g42 -S'chr19' -p60 -sg44 -g45 -sg46 -S'50378563' -p61 -sg48 -S'AATAC' -p62 -sssS'hg19' -p63 -(dp64 -g38 -S'NC_000019.9:g.50881821_50881822insTACA' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -g45 -sg46 -S'50881820' -p67 -sg48 -S'AATAC' -p68 -sssssS'NM_001256647.1:c.224_225insT' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p72 -aS'NM_001256647.1:c.223_224 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p73 -aS'Caution should be used when reporting the displayed variant descriptions' -p74 -aS'If you are unsure, please contact admin' -p75 -aS'RefSeqGene record not available' -p76 -asg14 -g6 -sg15 -(lp77 -sg17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p78 -sg19 -S'NR1H2' -p79 -sg21 -(dp80 -g23 -S'NP_001243576.1:p.(Lys75AsnfsTer35)' -p81 -sg25 -S'NP_001243576.1:p.(K75Nfs*35)' -p82 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_001256647.1:c.224_225insT' -p83 -sg33 -g6 -sg34 -(dp84 -g36 -(dp85 -g38 -S'NC_000019.10:g.50378564_50378565insTACA' -p86 -sg40 -(dp87 -g42 -g43 -sg44 -g45 -sg46 -S'50378563' -p88 -sg48 -S'AATAC' -p89 -sssg50 -(dp90 -g38 -S'NC_000019.9:g.50881821_50881822insTACA' -p91 -sg40 -(dp92 -g42 -g43 -sg44 -g45 -sg46 -S'50881820' -p93 -sg48 -S'AATAC' -p94 -sssg56 -(dp95 -g38 -S'NC_000019.10:g.50378564_50378565insTACA' -p96 -sg40 -(dp97 -g42 -g60 -sg44 -g45 -sg46 -S'50378563' -p98 -sg48 -S'AATAC' -p99 -sssg63 -(dp100 -g38 -S'NC_000019.9:g.50881821_50881822insTACA' -p101 -sg40 -(dp102 -g42 -g60 -sg44 -g45 -sg46 -S'50881820' -p103 -sg48 -S'AATAC' -p104 -sssss. \ No newline at end of file From 0d6f4a56c707b56bf4edd8e6c79481769af4ba34 Mon Sep 17 00:00:00 2001 From: buran Date: Wed, 16 Jan 2019 14:07:19 +0000 Subject: [PATCH 006/223] pause to tag --- VariantValidator/modules/vvChromosomes.py | 566 ++++ VariantValidator/modules/vvConverters.py | 2225 ---------------- VariantValidator/modules/vvHGVS.py | 177 +- VariantValidator/modules/vvLiftover.py | 344 +++ VariantValidator/modules/vvMixinConverters.py | 2319 +++++++++++++++++ .../modules/{vvCore.py => vvMixinCore.py} | 336 +-- VariantValidator/modules/vvMixinInit.py | 468 ++++ VariantValidator/modules/vvObjects.py | 484 +--- VariantValidator/variantanalyser/functions.py | 3 +- .../variantanalyser/pseudo_vcf2hgvs.py | 6 +- 10 files changed, 4056 insertions(+), 2872 deletions(-) delete mode 100644 VariantValidator/modules/vvConverters.py create mode 100644 VariantValidator/modules/vvLiftover.py create mode 100644 VariantValidator/modules/vvMixinConverters.py rename VariantValidator/modules/{vvCore.py => vvMixinCore.py} (98%) create mode 100644 VariantValidator/modules/vvMixinInit.py diff --git a/VariantValidator/modules/vvChromosomes.py b/VariantValidator/modules/vvChromosomes.py index d7a49d5d..d9a03b26 100644 --- a/VariantValidator/modules/vvChromosomes.py +++ b/VariantValidator/modules/vvChromosomes.py @@ -2844,3 +2844,569 @@ def to_chr_num_refseq(accession, primary_assembly): except UnboundLocalError: chr_num = None return chr_num + +# from gap_genes +""" +Lists of genes for GRCh37 and GRCh38 which require a gap to be inserted into either the +transcript or the genome to maintain a perfect alignment +""" +def gap_black_list(symbol): + gapGene = { + "LPP": "", + "VPS13D": "", + "SSPO": "", + "HTT": "", + "PRKDC": "", + "RNA45SN4": "", + "RNA45SN1": "", + "RNA45SN2": "", + "RNA45SN3": "", + "ALMS1": "", + "ZNF141": "", + "PRLR": "", + "NBPF10": "", + "ACACA": "", + "ZMYM2": "", + "MIAT": "", + "WDFY4": "", + "CECR2": "", + "FAM30A": "", + "MYO15B": "", + "CELF2": "", + "JRK": "", + "PTEN": "", + "ZNF714": "", + "MGAT4C": "", + "SLITRK4": "", + "ZAN": "", + "COL19A1": "", + "CCDC144B": "", + "RAB11FIP4": "", + "ZNF516": "", + "ZNF518A": "", + "PROX1": "", + "HCG18": "", + "SON": "", + "ARMC9": "", + "CAMK1D": "", + "GRIP2": "", + "KLHL5": "", + "PPIP5K2": "", + "PKD1L2": "", + "SLC7A2": "", + "DGKK": "", + "IQSEC1": "", + "SYNM": "", + "SARM1": "", + "SMAD5": "", + "MAML3": "", + "CXorf40A": "", + "MAPT": "", + "ITIH5": "", + "NOTCH4": "", + "FER1L4": "", + "CNTNAP4": "", + "NLRC3": "", + "COL18A1": "", + "SLC6A6": "", + "DDX52": "", + "CDH4": "", + "SLC46A1": "", + "SLC35E2B": "", + "OCLN": "", + "DCAF7": "", + "SCAMP1": "", + "ATG13": "", + "SMAD3": "", + "DDX6": "", + "SLC25A53": "", + "ALG9": "", + "DCP1A": "", + "NCAM1": "", + "LINC00869": "", + "MYH7": "", + "DIXDC1": "", + "ZBTB4": "", + "RABEP1": "", + "PVR": "", + "POM121C": "", + "HOOK1": "", + "MAPK8IP2": "", + "ZNF280B": "", + "WASF2": "", + "PLEKHA2": "", + "PPP4R3B": "", + "FAM83H": "", + "SALL3": "", + "PHKG2": "", + "C18orf25": "", + "ZNF229": "", + "ZNF765-ZNF761": "", + "KANSL1": "", + "FAM102B": "", + "NOTCH2NL": "", + "YTHDF3": "", + "DPCR1": "", + "DACH1": "", + "PKD1L3": "", + "GRIA3": "", + "CYP1B1": "", + "LTBP4": "", + "SPON1": "", + "RNA28SN4": "", + "RNA28SN1": "", + "TRIL": "", + "RNA28SN3": "", + "RNA28SN2": "", + "XKR5": "", + "RBM8A": "", + "SALL2": "", + "JADE3": "", + "DHX57": "", + "PIGN": "", + "CPNE3": "", + "ANO1": "", + "NATD1": "", + "DKFZP434A062": "", + "TDRD9": "", + "BDNF": "", + "IVD": "", + "STIMATE": "", + "KCP": "", + "PRAG1": "", + "KLHL18": "", + "LYNX1": "", + "HYOU1": "", + "HLA-L": "", + "ATG9B": "", + "SLC6A14": "", + "PCSK6": "", + "MIR99AHG": "", + "TOX4": "", + "GABBR1": "", + "RABGEF1": "", + "PRR36": "", + "MAP3K14": "", + "PCDHB9": "", + "LOC102723753": "", + "MYO19": "", + "SRSF8": "", + "CTPS2": "", + "AHCYL1": "", + "UHRF1": "", + "MARCKS": "", + "ZMYM1": "", + "SENP3-EIF4A1": "", + "SEC14L2": "", + "RAPGEFL1": "", + "ZNF761": "", + "CNTROB": "", + "SSTR3": "", + "PAX2": "", + "GGA3": "", + "MCL1": "", + "EPS8": "", + "LINC02210": "", + "KRBA1": "", + "MSH5-SAPCD1": "", + "HLA-DPB1": "", + "PPP1R9B": "", + "OPLAH": "", + "UBXN4": "", + "ZNF2": "", + "EPHB6": "", + "LIX1L": "", + "RAPGEF4": "", + "MED22": "", + "POLR3C": "", + "DDR1": "", + "SIGLEC16": "", + "NEFL": "", + "ABCG4": "", + "BAG6": "", + "RECQL4": "", + "SPPL2B": "", + "RETREG3": "", + "FZD6": "", + "SCRT1": "", + "LSM14A": "", + "TAPBP": "", + "TWSG1": "", + "FRMD8": "", + "VPS26C": "", + "PNMA3": "", + "ZNF282": "", + "SP8": "", + "SRRM3": "", + "CCDC125": "", + "NPIPB3": "", + "FAM13C": "", + "GTF2IP1": "", + "ANKRD34A": "", + "PPP1R2": "", + "PHYHIPL": "", + "USH1G": "", + "LINC00461": "", + "ZNRD1ASP": "", + "TRIM10": "", + "SPIB": "", + "BCL6B": "", + "SCARF2": "", + "KIR3DX1": "", + "LOC400682": "", + "HLA-DOA": "", + "PLCD3": "", + "VPS11": "", + "FAM231D": "", + "TRIM52": "", + "ABCF1": "", + "ANP32E": "", + "COPG2IT1": "", + "TGIF2": "", + "LHX1": "", + "PIK3R6": "", + "APOL4": "", + "ZNF502": "", + "FGD5P1": "", + "LINC00624": "", + "ADRA2B": "", + "ZNF598": "", + "GNAZ": "", + "TMEM106A": "", + "SLC12A9": "", + "TCF19": "", + "CCDC3": "", + "EFHC2": "", + "KCNE1B": "", + "PBX2": "", + "PAMR1": "", + "GJA5": "", + "TYW1B": "", + "PLP1": "", + "ANKDD1A": "", + "GBE1": "", + "MAMDC2": "", + "PIGW": "", + "MOCOS": "", + "GRIPAP1": "", + "COL26A1": "", + "MAPT-IT1": "", + "SRRT": "", + "ZNF595": "", + "SEMA3B": "", + "C21orf58": "", + "RHBDF1": "", + "EGR2": "", + "ABRAXAS2": "", + "NPRL3": "", + "TXNIP": "", + "RYK": "", + "RXRB": "", + "LILRB2": "", + "SYT3": "", + "TRPV6": "", + "PARG": "", + "CSNK1G2": "", + "ARHGEF16": "", + "HSH2D": "", + "ALDH3B1": "", + "ZNF274": "", + "MUC13": "", + "LINC00842": "", + "AKT1": "", + "CHM": "", + "ZSCAN26": "", + "MAL2": "", + "PTH2R": "", + "GPANK1": "", + "LINC01623": "", + "CD86": "", + "RHBG": "", + "TMSB15B": "", + "ZCCHC3": "", + "TUBB": "", + "POLDIP2": "", + "PRMT3": "", + "PPT2-EGFL8": "", + "LINC02210-CRHR1": "", + "KIFC1": "", + "USP27X": "", + "HDGFL2": "", + "FOXI3": "", + "PAH": "", + "P3H3": "", + "CRHR1": "", + "LOC101927759": "", + "ARFRP1": "", + "C3orf38": "", + "DAXX": "", + "SLC37A4": "", + "IQCA1L": "", + "MMP28": "", + "LINC02197": "", + "NECAP1": "", + "CDSN": "", + "LOC440570": "", + "B3GNT6": "", + "AOAH": "", + "GAS2L1": "", + "MPIG6B": "", + "CDK11B": "", + "ASPN": "", + "HSPA1B": "", + "LOC100508631": "", + "MICB": "", + "LOC102724580": "", + "SENP3": "", + "RBM38": "", + "TMC4": "", + "LILRB5": "", + "C6orf47": "", + "RIOX1": "", + "BHLHE40-AS1": "", + "SRD5A2": "", + "TSEN34": "", + "EI24": "", + "PADI6": "", + "LINC00893": "", + "CYP2D7": "", + "LINC01622": "", + "LINC01879": "", + "REC8": "", + "UNC93B1": "", + "POU5F1": "", + "GPIHBP1": "", + "FOXD1": "", + "GPSM1": "", + "MICA": "", + "UGT2B15": "", + "KIZ": "", + "ARL17A": "", + "PRAMEF36P": "", + "HCG22": "", + "RNF39": "", + "BECN1": "", + "MOG": "", + "PROSER3": "", + "LINC01149": "", + "CYP21A2": "", + "PRAMEF18": "", + "TBC1D3G": "", + "NR2E3": "", + "NR1H2": "", + "VEGFC": "", + "TBC1D3F": "", + "C18orf65": "", + "HOXC11": "", + "TRY2P": "", + "LINC01138": "", + "LINC00243": "", + "HCG4": "", + "GBAP1": "", + "LYPD4": "", + "FAM226A": "", + "ZNF787": "", + "CYP11A1": "", + "EEF1A2": "", + "SLC38A5": "", + "MICB-DT": "", + "ZNF852": "", + "LOC441242": "", + "RNF115": "", + "SMA4": "", + "TAZ": "", + "LENG9": "", + "STRAP": "", + "CYP4F8": "", + "TSPAN10": "", + "KIR3DL1": "", + "HCP5B": "", + "MMP12": "", + "STAG3L2": "", + "GOLGA6L17P": "", + "ZBTB12": "", + "TREH": "", + "PMCHL2": "", + "LAGE3": "", + "ATRNL1": "", + "CEACAM20": "", + "ZG16": "", + "MIR3936HG": "", + "LOC102724562": "", + "INTS4P2": "", + "LINC00221": "", + "DHRS3": "", + "HCG27": "", + "CLTB": "", + "KLK6": "", + "HLA-H": "", + "SPANXA2-OT1": "", + "PRAMEF11": "", + "PPP1R11": "", + "NDUFA6-AS1": "", + "ECHDC3": "", + "HLA-DQB1": "", + "KIR2DS4": "", + "HLA-B": "", + "LOC102725121": "", + "CIB2": "", + "KIR2DL1": "", + "KIR2DL2": "", + "HLA-C": "", + "ABO": "", + "KRTAP10-7": "", + "HLA-G": "", + "CWC15": "", + "C17orf100": "", + "HLA-J": "", + "OR4K3": "", + "HLA-DQA1": "", + "LOC105379550": "", + "MRPS21": "", + "SIGLEC17P": "", + "LINC01115": "", + "NUDT18": "", + "ORAI1": "", + "PNLIPRP2": "", + "KLF14": "", + "SSX2B": "", + "CCL15-CCL14": "", + "UBXN8": "", + "IGFBP2": "", + "TMEM44-AS1": "", + "TEX13A": "", + "LCA10": "", + "SPANXN2": "", + "SYCE1": "", + "LILRA5": "", + "KRTAP5-4": "", + "FAM228B": "", + "OR12D1": "", + "SPC25": "", + "FCGR1CP": "", + "OR52E1": "", + "NOP16": "", + "EGFL8": "", + "PRAF2": "", + "LOC388282": "", + "CCNQ": "", + "VN1R3": "", + "HLA-V": "", + "SBK3": "", + "LOC100128594": "", + "KLRF1": "", + "EMG1": "", + "TARM1": "", + "UBE2NL": "", + "OR5AL1": "", + "TPSB2": "", + "PSORS1C2": "", + "HLA-DQA2": "", + "OR10AC1": "", + "OR2J1": "", + "OR10J4": "", + "CSNK2B": "", + "OR4Q2": "", + "LOC100507547": "", + "ZNF630-AS1": "", + "HLA-DMA": "", + "OR4E1": "", + "PRB3": "", + "CCL15": "", + "C8orf59": "", + "PSMB9": "", + "LINC01719": "", + "CT45A1": "", + "BST2": "", + "NCF4-AS1": "", + "FOLR3": "", + "KRTAP9-9": "", + "COPZ2": "", + "LYNX1-SLURP2": "", + "SAPCD1": "", + "PSORS1C1": "", + "ZNF793-AS1": "", + "ZNRD1": "", + "FRG1CP": "", + "LINC02362": "", + "KRTAP4-1": "", + "PICSAR": "", + "TWIST2": "", + "LINC01796": "", + "HCG25": "", + "KRTAP7-1": "", + "CRLF2": "", + "MDH2": "", + "HCG9": "", + "ATP5MC1": "", + "TTTY14": "", + "LOC100507384": "", + "PMS2P2": "", + "HCG23": "", + "LINC00226": "", + "RPP21": "", + "GPHB5": "", + "GAGE8": "", + "GAGE2E": "", + "LOC101928087": "", + "GAGE12B": "", + "GRIFIN": "", + "LOC102725193": "", + "HCG14": "", + "IFITM4P": "", + "SNORD48": "", + "MUC22": "", + "PTPRQ": "", + "HERC2": "", + "OTUD7A": "", + "LOC646214": "", + "TJP1": "", + "WDR81": "", + "KLF13": "", + "POLR2A": "", + "LOC100288637": "", + "GOLGA8N": "", + "GOLGA8J": "", + "GOLGA8K": "", + "GOLGA8R": "", + "MTMR10": "", + "SMIM10L1": "", + "KLLN": "", + "LINC02249": "", + "APBA2": "", + "CHRNA7": "", + "DBET": "", + "WNT3": "", + "GOLGA2P10": "", + "CHRFAM7A": "", + "RPH3AL": "", + "SORD2P": "", + "LINC00552": "", + "MPV17L": "", + "SLC22A18AS": "", + "C16orf45": "", + "GRK1": "", + "FRG2": "", + "LOC143666": "", + "FRG2EP": "", + "LOC105373100": "", + "GOLGA8Q": "", + "HERC2P7": "", + "SLC22A18": "", + "METRNL": "", + "BTNL2": "", + "ADAM18": "", + "PRSS22": "", + "C2orf27B": "", + "C2orf27A": "", + "LOC283710": "", + "LOC101928804": "", + "IFI27": "", + "ABCC6": "", + "LOC692247": "" + } + is_it_gapped = gapGene.get(symbol) + if is_it_gapped == '': + return True + else: + return False \ No newline at end of file diff --git a/VariantValidator/modules/vvConverters.py b/VariantValidator/modules/vvConverters.py deleted file mode 100644 index e2452ece..00000000 --- a/VariantValidator/modules/vvConverters.py +++ /dev/null @@ -1,2225 +0,0 @@ -import re -import os -import sys -import copy -from vvLogging import logger -import hgvs -import hgvs.exceptions -from hgvs.dataproviders import uta -from hgvs.dataproviders import seqfetcher -import hgvs.normalizer -import hgvs.validator -import hgvs.parser -import hgvs.variantmapper -import hgvs.sequencevariant - -#Error setup -from hgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError -class mergeHGVSerror(Exception): - pass -class alleleVariantError(Exception): - pass - - -""" -r_to_c -parses r. variant strings into hgvs object and maps to the c. equivalent. -""" -def r_to_c(variant, evm, hp): - # convert the input string into a hgvs object by parsing - var_r = hp.parse_hgvs_variant(variant) - # map to the coding sequence - var_c = evm.r_to_c(var_r) # coding level variant - variant = str(var_c) - c_from_r = {'variant': variant, 'type': ':c.'} - return c_from_r - -""" -Maps transcript variant descriptions onto specified RefSeqGene reference sequences -Return an hgvs object containing the genomic sequence variant relative to the RefSeqGene -acession -refseq_ac = RefSeqGene ac -""" - - -def refseq(variant, vm, refseq_ac, hp, evm, hdp, primary_assembly): - vr = hgvs.validator.Validator(hdp) - # parse the variant into hgvs object - var_c = hp.parse_hgvs_variant(variant) - # map to the genomic co-ordinates using the easy variant mapper set to alt_aln_method = alt_aln_method - var_g = myevm_t_to_g(var_c, evm, hdp, primary_assembly) - # Get overlapping transcripts - forcing a splign alignment - start_i = var_g.posedit.pos.start.base - end_i = var_g.posedit.pos.end.base - alt_ac = var_g.ac - alt_aln_method = 'splign' - transcripts = hdp.get_tx_for_region(alt_ac, alt_aln_method, start_i - 1, end_i) - # Take the first transcript - for trans in transcripts: - tx_ac = trans[0] - try: - ref_c = vm.g_to_t(var_g, tx_ac, alt_aln_method='splign') - except: - continue - else: - # map the variant co-ordinates to the refseq Gene accession using vm - ref_g_dict = { - 'ref_g': '', - 'error': 'false' - } - try: - ref_g_dict['ref_g'] = vm.t_to_g(ref_c, alt_ac=refseq_ac, alt_aln_method='splign') - except: - e = sys.exc_info()[0] - ref_g_dict['error'] = e - try: - vr.validate(ref_g_dict['ref_g']) - except: - e = sys.exc_info()[0] - ref_g_dict['error'] = e - if ref_g_dict['error'] == 'false': - return ref_g_dict - else: - continue - # Return as an error if all fail - return ref_g_dict - - -""" -Parses genomic variant strings into hgvs objects -Maps genomic hgvs object into a coding hgvs object if the c accession string is provided -returns a c. variant description string -""" - - -def g_to_c(var_g, tx_ac, hp, evm): - pat_g = re.compile("\:g\.") # Pattern looks for :g. - # If the :g. pattern is present in the input variant - if pat_g.search(var_g): - # convert the input string into a hgvs object by parsing - var_g = hp.parse_hgvs_variant(var_g) - # Map to coding variant - var_c = str(evm.g_to_c(var_g, tx_ac)) - return var_c - - -""" -Parses genomic variant strings into hgvs objects -Maps genomic hgvs object into a non-coding hgvs object if the n accession string is provided -returns a n. variant description string -""" - - -def g_to_n(var_g, tx_ac, hp, evm): - pat_g = re.compile("\:g\.") # Pattern looks for :g. - # If the :g. pattern is present in the input variant - if pat_g.search(var_g): - # convert the input string into a hgvs object by parsing - var_g = hp.parse_hgvs_variant(var_g) - # Map to coding variant - var_n = str(evm.g_to_n(var_g, tx_ac)) - return var_n - - -""" -Ensures variant strings are transcript c. or n. -returns parsed hgvs c. or n. object -""" - - -def coding(variant, hp): - # If the :c. pattern is present in the input variant - if re.search(':c.', variant) or re.search(':n.', variant): - # convert the input string into a hgvs object - var_c = hp.parse_hgvs_variant(variant) - return var_c - - -""" -Mapping transcript to genomic position -Ensures variant strings are transcript c. or n. -returns parsed hgvs g. object -""" - - -def genomic(variant, evm, hp, hdp, primary_assembly): - # Set regular expressions for if statements - pat_g = re.compile("\:g\.") # Pattern looks for :g. - pat_n = re.compile("\:n\.") - pat_c = re.compile("\:c\.") # Pattern looks for :c. - - # If the :c. pattern is present in the input variant - if pat_c.search(variant) or pat_n.search(variant): - error = 'false' - hgvs_var = hp.parse_hgvs_variant(variant) - try: - var_g = myevm_t_to_g(hgvs_var, evm, hdp, primary_assembly) # genomic level variant - except hgvs.exceptions.HGVSError as e: - error = e - if error != 'false': - var_g = 'error ' + str(e) - return var_g - - # If the :g. pattern is present in the input variant - elif (pat_g.search(variant)): # or (pat_n.search(variant)): - # convert the input string into a hgvs object - var_g = hp.parse_hgvs_variant(variant) - return var_g - - -""" -Mapping transcript to protein prediction -Ensures variant strings are transcript c. -returns parsed hgvs p. object -""" - - - - -""" -Function which takes a NORMALIZED hgvs Python transcript variant and maps to a specified protein reference sequence. A protein -level hgvs python object is returned. - -Note the function currently assumes that the transcript description is correctly normalized having come from the -previous g_to_t function -""" - - - - - -""" -Ensures variant strings are g. -returns parsed hgvs g. object -""" - - -def hgvs_genomic(variant, hp): - # Set regular expressions for if statements - pat_g = re.compile("\:g\.") # Pattern looks for :g. Note (gene) has been removed - # If the :g. pattern is present in the input variant - if pat_g.search(variant): - # convert the input string into a hgvs object - var_g = hp.parse_hgvs_variant(variant) - return var_g - - -""" -Enhanced transcript to genome position mapping function using evm -Deals with mapping from transcript positions that do not exist in the genomic sequence -i.e. the stated position aligns to a genomic gap! -Trys to ensure that a genomic position is always returned even if the c. or n. transcript -will not map to the specified genome build primary assembly. -Deals with transcript mapping to several genomic assemblies -Order -Map to a single NC_ for the specified genome build primary assembly -Map to a single NC_ for an alternate genome build primary assembly -Map to an NT_ from the specified genome build -Map to an NT_ from an alternative genome build -Map to an NW_ from the specified genome build -Map to an NW_ from an alternative genome buildRequires parsed c. or n. object -returns parsed hgvs g. object -""" - - -def myevm_t_to_g(hgvs_c, evm, hdp, primary_assembly): - # create no_norm_evm - if primary_assembly == 'GRCh38': - no_norm_evm = no_norm_evm_38 - elif primary_assembly == 'GRCh37': - no_norm_evm = no_norm_evm_37 - - # store the input - stored_hgvs_c = copy.deepcopy(hgvs_c) - expand_out = 'false' - utilise_gap_code = True - - # Gap gene black list - try: - gene_symbol = dbControls.data.get_gene_symbol_from_transcriptID(hgvs_c.ac) - except Exception: - utilise_gap_code = False - else: - # If the gene symbol is not in the list, the value False will be returned - utilise_gap_code = gap_genes.gap_black_list(gene_symbol) - # Warn gap code in use - logger.warning("gap_compensation_myevm = " + str(utilise_gap_code)) - - if utilise_gap_code is True and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type =='delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): - - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - hgvs_c = no_norm_evm.c_to_n(hgvs_c) - - # Check for intronic - try: - hn.normalize(hgvs_c) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('intronic variant', error): - pass - elif re.search('Length implied by coordinates must equal sequence deletion length', error) and re.match( - 'NR_', hgvs_c.ac): - hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 - - # Check again before continuing - if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search( - '\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): - pass - - else: - try: - # For non-intronic sequence - hgvs_t = copy.deepcopy(hgvs_c) - if hgvs_t.posedit.edit.type == 'inv': - inv_alt = revcomp(hgvs_t.posedit.edit.ref) - t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt - hgvs_t_delins = hp.parse_hgvs_variant(t_delins) - pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) - hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base - inv_alt = pre_base + inv_alt + post_base - hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 - start = hgvs_t.posedit.pos.start.base - hgvs_t.posedit.pos.start.base = start + 1 - hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 - end = hgvs_t.posedit.pos.end.base - hgvs_t.posedit.pos.start.base = start - hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt - hgvs_t = hp.parse_hgvs_variant(hgvs_str) - elif hgvs_c.posedit.edit.type == 'dup': - pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) - alt = pre_base + hgvs_t.posedit.edit.ref + hgvs_t.posedit.edit.ref + post_base - ref = pre_base + hgvs_t.posedit.edit.ref + post_base - dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str((hgvs_t.posedit.pos.start.base + len(ref)) -2) + 'del' + ref + 'ins' + alt - hgvs_t = hp.parse_hgvs_variant(dup_to_delins) - elif hgvs_c.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.end.base+1) - ins_alt = ins_ref[:2] + hgvs_t.posedit.edit.alt + ins_ref[-2:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str(hgvs_t.posedit.pos.end.base +1 ) + 'del' + ins_ref + 'ins' + ins_alt - hgvs_t = hp.parse_hgvs_variant(ins_to_delins) - else: - if str(hgvs_t.posedit.edit.alt) == 'None': - hgvs_t.posedit.edit.alt = '' - pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) - hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base - hgvs_t.posedit.edit.alt = pre_base + hgvs_t.posedit.edit.alt + post_base - hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 - start = hgvs_t.posedit.pos.start.base - hgvs_t.posedit.pos.start.base = start + 1 - hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 - end = hgvs_t.posedit.pos.end.base - hgvs_t.posedit.pos.start.base = start - hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str(hgvs_t.posedit.edit) - hgvs_t = hp.parse_hgvs_variant(hgvs_str) - hgvs_c = copy.deepcopy(hgvs_t) - - # Set expanded out test to true - expand_out = 'true' - - except Exception: - hgvs_c = hgvs_c - - if re.match('NM_', str(hgvs_c.ac)): - try: - hgvs_c = no_norm_evm.n_to_c(hgvs_c) - except hgvs.exceptions.HGVSError as e: - hgvs_c = copy.deepcopy(stored_hgvs_c) - - # Ensure the altered c. variant has not crossed intro exon boundaries - hgvs_check_boundaries = copy.deepcopy(hgvs_c) - try: - h_variant = hn.normalize(hgvs_check_boundaries) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - hgvs_c = copy.deepcopy(stored_hgvs_c) - # Catch identity at the exon/intron boundary by trying to normalize ref only - if hgvs_check_boundaries.posedit.edit.type == 'identity': - reform_ident = str(hgvs_c).split(':')[0] - reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str(hgvs_c.posedit.edit.ref)# + 'ins' + str(hgvs_c.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error) or re.search('Normalization of intronic variants', error): - hgvs_c = copy.deepcopy(stored_hgvs_c) - try: - hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) - hn.normalize(hgvs_genomic) # Check the validity of the mapping - # This will fail on multiple refs for NC_ - except hgvs.exceptions.HGVSError as e: - # Recover all available mapping options from UTA - mapping_options = hdp.get_tx_mapping_options(hgvs_c.ac) - - if mapping_options == []: - raise HGVSDataNotAvailableError("No alignment data between the specified transcript reference sequence and any GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are available.") - - # Capture errors from attempted mappings - attempted_mapping_error = '' - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' - print e - continue - - # If not mapped, raise error - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - if re.search(option[1], attempted_mapping_error): - pass - else: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' - print e - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NT_', option[1]): - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' - print e - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NW_', option[1]): - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' - print e - continue - # Only a RefSeqGene available - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NG_', option[1]): - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' - print e - continue - # If not mapped, raise error - try: - hgvs_genomic - except Exception: - raise HGVSDataNotAvailableError(attempted_mapping_error) - - if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and hgvs_genomic.posedit.edit.alt == '' and expand_out != 'true': - hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref - if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: - try: - hgvs_genomic = hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'insertion length must be 1': - ref = sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) - hgvs_genomic.posedit.edit.ref = ref - hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] - hgvs_genomic = hn.normalize(hgvs_genomic) - if error == 'base start position must be <= end position': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Statements required to reformat the stored_hgvs_c into a useable synonym - if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': - if stored_hgvs_c.type == 'c': - stored_hgvs_n = vm.c_to_n(stored_hgvs_c) - else: - stored_hgvs_n = stored_hgvs_c - stored_ref = sf.fetch_seq(str(stored_hgvs_n.ac),stored_hgvs_n.posedit.pos.start.base-1,stored_hgvs_n.posedit.pos.end.base) - stored_hgvs_c.posedit.edit.ref = stored_ref - - if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': - if hgvs_genomic.posedit.edit.type == 'ins': - stored_ref = sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) - stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] - hgvs_genomic.posedit.edit.ref = stored_ref - hgvs_genomic.posedit.edit.alt = stored_alt - - # First look for variants mapping to the flanks of gaps - # either in the gap or on the flank but not fully within the gap - if expand_out == 'true': - nr_genomic = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) - try: - hn.normalize(nr_genomic) - except hgvs.exceptions.HGVSInvalidVariantError as e: - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str(e) == 'base start position must be <= end position': - # Effectively, this code is designed to handle variants that are directly proximal to - # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases due to - # the deletion length being > the specified range. - - # Warn of variant location wrt the gap - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - logger.warning('Variant is proximal to the flank of a genomic gap') - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - try: - hn.normalize(genomic_gap_variant) - except Exception: - pass - else: - genomic_gap_variant = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) - - if str(e) == 'base start position must be <= end position': - logger.warning('Variant is fully within a genomic gap') - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - - # Logic - # We have checked that the variant does not cross boundaries, or is intronic - # So is likely mapping to a genomic gap - try: - hn.normalize(genomic_gap_variant) - except Exception as e: - if str(e) == 'base start position must be <= end position': - # This will only happen when the variant is fully within the gap - gap_start = genomic_gap_variant.posedit.pos.end.base - gap_end = genomic_gap_variant.posedit.pos.start.base - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - # This will only happen if the variant is flanking the gap but is - # not inside the gap - logger.warning('Variant is on the flank of a genomic gap but not within the gap') - gap_start = genomic_gap_variant.posedit.pos.start.base - 1 - gap_end = genomic_gap_variant.posedit.pos.end.base + 1 - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - genomic_gap_variant.posedit.edit.ref = '' - stored_hgvs_c = copy.deepcopy(hgvs_c) - - # Remove alt - try: - genomic_gap_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass - - # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = hn.normalize(genomic_gap_variant) - # Static map to c. and static normalize - transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant - if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - transcript_gap_variant = hn.normalize(transcript_gap_variant) - - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) - transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) - else: - transcript_gap_n = transcript_gap_variant - transcript_gap_alt_n = stored_hgvs_c - - # Ensure an ALT exists - try: - if transcript_gap_alt_n.posedit.edit.alt is None: - transcript_gap_alt_n.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str(transcript_gap_n.posedit.pos.start.base) + '_' + str(transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref - transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str(transcript_gap_alt_n.posedit.pos.start.base) + '_' + str(transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref - transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(transcript_gap_n.posedit.edit.ref) - if transcript_gap_alt_n.posedit.edit.alt is not None: - alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = transcript_gap_n.posedit.pos.start.base - alt_start = transcript_gap_alt_n.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, transcript_gap_alt_n.posedit.pos.end.base+1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) - else: - alt_base_dict[int] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base+1, 1): - if int in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[int]) - else: - alternate_sequence_bases.append(ref_base_dict[int]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Update variant, map to genome using vm and normalize - transcript_gap_n.posedit.edit.alt = alternate_sequence - - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - - try: - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - except Exception as e: - if str(e) == "base start position must be <= end position": - # Expansion out is required to map back to the genomic position - pre_base = sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.start.base-2,transcript_gap_n.posedit.pos.start.base-1) - post_base = sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.end.base,transcript_gap_n.posedit.pos.end.base+1) - transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 - transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 - transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base - transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Bypass the next bit of gap code - expand_out = 'false' - - else: - pass - # No map to the flank of a gap or within the gap - else: - pass - - - # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS - # Remove identity bases - if hgvs_c == stored_hgvs_c: - expand_out = 'false' - elif expand_out == 'false' or utilise_gap_code is False: - pass - # Correct expansion ref + 2 - elif expand_out == 'true' and ( - len(hgvs_genomic.posedit.edit.ref) == (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: - hgvs_genomic.posedit.pos.start.base = hgvs_genomic.posedit.pos.start.base + 1 - hgvs_genomic.posedit.pos.end.base = hgvs_genomic.posedit.pos.end.base - 1 - hgvs_genomic.posedit.edit.ref = hgvs_genomic.posedit.edit.ref[1:-1] - if hgvs_genomic.posedit.edit.alt is not None: - hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.alt[1:-1] - elif expand_out == 'true' and ( - len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: - if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: - gn = hn.normalize(hgvs_genomic) - pass - - # Likely if the start or end position aligns to a gap in the genomic sequence - # Logic - # We have checked that the variant does not cross boundaries, or is intronic - # So is likely mapping to a genomic gap - elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: - # Incorrect expansion, likely < ref + 2 - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - try: - hn.normalize(genomic_gap_variant) - except Exception as e: - if str(e) == 'base start position must be <= end position': - gap_start = genomic_gap_variant.posedit.pos.end.base - gap_end = genomic_gap_variant.posedit.pos.start.base - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - # Remove alt - try: - genomic_gap_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass - # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = hn.normalize(genomic_gap_variant) - # Static map to c. and static normalize - transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant - transcript_gap_variant = hn.normalize(transcript_gap_variant) - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) - transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) - else: - transcript_gap_n = transcript_gap_variant - transcript_gap_alt_n = stored_hgvs_c - - # Ensure an ALT exists - try: - if transcript_gap_alt_n.posedit.edit.alt is None: - transcript_gap_alt_n.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( - transcript_gap_n.posedit.pos.start.base) + '_' + str( - transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref - transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( - transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( - transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref - transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(transcript_gap_n.posedit.edit.ref) - if transcript_gap_alt_n.posedit.edit.alt is not None: - alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = transcript_gap_n.posedit.pos.start.base - alt_start = transcript_gap_alt_n.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, - transcript_gap_alt_n.posedit.pos.end.base + 1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) - else: - alt_base_dict[int] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): - if int in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[int]) - else: - alternate_sequence_bases.append(ref_base_dict[int]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Update variant, map to genome using vm and normalize - transcript_gap_n.posedit.edit.alt = alternate_sequence - - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - - try: - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - except Exception as e: - if str(e) == "base start position must be <= end position": - # Expansion out is required to map back to the genomic position - pre_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, - transcript_gap_n.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, - transcript_gap_n.posedit.pos.end.base + 1) - transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 - transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 - transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base - transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Ins variants map badly - Especially between c. exon/exon boundary - if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: - try: - hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'insertion length must be 1': - if hgvs_c.type == 'c': - hgvs_t = vm.c_to_n(hgvs_c) - else: - hgvs_t = copy.copy(hgvs_c) - ins_ref = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-1,hgvs_t.posedit.pos.end.base) - ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt - hgvs_t = hp.parse_hgvs_variant(ins_to_delins) - try: - hgvs_c = vm.n_to_c(hgvs_t) - except Exception: - hgvs_c = copy.copy(hgvs_t) - try: - hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) - except Exception as e: - error = str(e) - logger.warning('Ins mapping error in myt_to_g ' + error) - - return hgvs_genomic - -""" -USE WITH MAPPER THAT DOES NOT REPLACE THE REFERENCE GENOMIC BASES AND DOED NOT NORMALIZE - -Enhanced transcript to genome position mapping function using evm -Trys to ensure that a genomic position is always returned even if the c. or n. transcript -will not map to the specified genome build primary assembly. -Deals with transcript mapping to several genomic assemblies -Order -Map to a single NC_ (or ALT) for the specified genome build -returns parsed hgvs g. object -""" - - -def noreplace_myevm_t_to_g(hgvs_c, evm, hdp, primary_assembly): - try: - hgvs_genomic = evm.t_to_g(hgvs_c) - hn.normalize(hgvs_genomic) - # This will fail on multiple refs for NC_ - except hgvs.exceptions.HGVSError as e: - # Recover all available mapping options from UTA - mapping_options = hdp.get_tx_mapping_options(hgvs_c.ac) - if mapping_options == []: - raise HGVSDataNotAvailableError("no g. mapping options available") - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - continue - - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NT_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NW_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - continue - - # Only a RefSeqGene available - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NG_', option[1]): - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except: - continue - try: - hgvs_genomic - except Exception: - - raise HGVSDataNotAvailableError('No available t_to_g liftover') - - # Ins variants map badly - Especially between c. exon/exon boundary - if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: - try: - hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'insertion length must be 1': - if hgvs_c.type == 'c': - hgvs_t = vm.c_to_n(hgvs_c) - else: - hgvs_t = copy.copy(hgvs_c) - ins_ref = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-1,hgvs_t.posedit.pos.end.base) - ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt - hgvs_t = hp.parse_hgvs_variant(ins_to_delins) - try: - hgvs_c = vm.n_to_c(hgvs_t) - except Exception: - hgvs_c = copy.copy(hgvs_t) - try: - hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) - except Exception as e: - error = str(e) - logger.warning('Ins mapping error in myt_to_g ' + error) - - return hgvs_genomic - - -""" -Enhanced transcript to genome position on a specified genomic reference using vm -Deals with mapping from transcript positions that do not exist in the genomic sequence -i.e. the stated position aligns to a genomic gap! -returns parsed hgvs g. object -""" - - -def myvm_t_to_g(hgvs_c, alt_chr, vm, hn, hdp, primary_assembly): - # create no_norm_evm - if primary_assembly == 'GRCh38': - no_norm_evm = no_norm_evm_38 - elif primary_assembly == 'GRCh37': - no_norm_evm = no_norm_evm_37 - - # store the input - stored_hgvs_c = copy.deepcopy(hgvs_c) - expand_out = 'false' - utilise_gap_code = True - - # Gap gene black list - try: - gene_symbol = dbControls.data.get_gene_symbol_from_transcriptID(hgvs_c.ac) - except Exception: - utilise_gap_code = False - else: - # If the gene symbol is not in the list, the value False will be returned - utilise_gap_code = gap_genes.gap_black_list(gene_symbol) - # Warn gap code in use - logger.warning("gap_compensation_mvm = " + str(utilise_gap_code)) - - if utilise_gap_code is True and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type =='delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): - - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - hgvs_c = no_norm_evm.c_to_n(hgvs_c) - - # Check for intronic - try: - hn.normalize(hgvs_c) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('intronic variant', error): - pass - elif re.search('Length implied by coordinates must equal sequence deletion length', error) and re.match( - 'NR_', hgvs_c.ac): - hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 - - # Check again before continuing - if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search('\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): - pass - - else: - try: - # For non-intronic sequence - hgvs_t = copy.deepcopy(hgvs_c) - # handle inversions - if hgvs_t.posedit.edit.type == 'inv': - inv_alt = revcomp(hgvs_t.posedit.edit.ref) - t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt - hgvs_t_delins = hp.parse_hgvs_variant(t_delins) - pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) - hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base - inv_alt = pre_base + inv_alt + post_base - hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 - start = hgvs_t.posedit.pos.start.base - hgvs_t.posedit.pos.start.base = start + 1 - hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 - end = hgvs_t.posedit.pos.end.base - hgvs_t.posedit.pos.start.base = start - hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt - hgvs_t = hp.parse_hgvs_variant(hgvs_str) - if hgvs_c.posedit.edit.type == 'dup': - # hgvs_t = reverse_normalize.normalize(hgvs_t) - pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) - alt = pre_base + hgvs_t.posedit.edit.ref + hgvs_t.posedit.edit.ref + post_base - ref = pre_base + hgvs_t.posedit.edit.ref + post_base - dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str((hgvs_t.posedit.pos.start.base + len(ref)) -2) + 'del' + ref + 'ins' + alt - hgvs_t = hp.parse_hgvs_variant(dup_to_delins) - elif hgvs_c.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.end.base+1) - ins_alt = ins_ref[:2] + hgvs_t.posedit.edit.alt + ins_ref[-2:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str(hgvs_t.posedit.pos.end.base +1 ) + 'del' + ins_ref + 'ins' + ins_alt - hgvs_t = hp.parse_hgvs_variant(ins_to_delins) - else: - if str(hgvs_t.posedit.edit.alt) == 'None': - hgvs_t.posedit.edit.alt = '' - pre_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) - hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base - hgvs_t.posedit.edit.alt = pre_base + hgvs_t.posedit.edit.alt + post_base - hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 - start = hgvs_t.posedit.pos.start.base - hgvs_t.posedit.pos.start.base = start + 1 - hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 - end = hgvs_t.posedit.pos.end.base - hgvs_t.posedit.pos.start.base = start - hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str(hgvs_t.posedit.edit) - hgvs_t = hp.parse_hgvs_variant(hgvs_str) - hgvs_c = copy.deepcopy(hgvs_t) - - # Set expanded out test to true - expand_out = 'true' - - except Exception: - hgvs_c = hgvs_c - - if re.match('NM_', str(hgvs_c.ac)): - try: - hgvs_c = no_norm_evm.n_to_c(hgvs_c) - except hgvs.exceptions.HGVSError as e: - hgvs_c = copy.deepcopy(stored_hgvs_c) - - # Ensure the altered c. variant has not crossed intro exon boundaries - hgvs_check_boundaries = copy.deepcopy(hgvs_c) - try: - h_variant = hn.normalize(hgvs_check_boundaries) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - hgvs_c = copy.deepcopy(stored_hgvs_c) - # Catch identity at the exon/intron boundary by trying to normalize ref only - if hgvs_check_boundaries.posedit.edit.type == 'identity': - reform_ident = str(hgvs_c).split(':')[0] - reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str(hgvs_c.posedit.edit.ref)# + 'ins' + str(hgvs_c.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error) or re.search('Normalization of intronic variants', error): - hgvs_c = copy.deepcopy(stored_hgvs_c) - - hgvs_genomic = vm.t_to_g(hgvs_c, alt_chr) - if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and hgvs_genomic.posedit.edit.alt == '' and expand_out != 'true': - hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref - if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: - try: - hgvs_genomic = hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'insertion length must be 1': - ref = sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) - hgvs_genomic.posedit.edit.ref = ref - hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] - hgvs_genomic = hn.normalize(hgvs_genomic) - if error == 'base start position must be <= end position': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Statements required to reformat the stored_hgvs_c into a useable synonym - if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': - if stored_hgvs_c.type == 'c': - stored_hgvs_n = vm.c_to_n(stored_hgvs_c) - else: - stored_hgvs_n = stored_hgvs_c - stored_ref = sf.fetch_seq(str(stored_hgvs_n.ac),stored_hgvs_n.posedit.pos.start.base-1,stored_hgvs_n.posedit.pos.end.base) - stored_hgvs_c.posedit.edit.ref = stored_ref - - if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': - if hgvs_genomic.posedit.edit.type == 'ins': - stored_ref = sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) - stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] - hgvs_genomic.posedit.edit.ref = stored_ref - hgvs_genomic.posedit.edit.alt = stored_alt - - # First look for variants mapping to the flanks of gaps - # either in the gap or on the flank but not fully within the gap - if expand_out == 'true': - nr_genomic = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) - try: - hn.normalize(nr_genomic) - except hgvs.exceptions.HGVSInvalidVariantError as e: - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str(e) == 'base start position must be <= end position': - # Effectively, this code is designed to handle variants that are directly proximal to - # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases due to - # the deletion length being > the specified range. - - # Warn of variant location wrt the gap - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - logger.warning('Variant is proximal to the flank of a genomic gap') - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - try: - hn.normalize(genomic_gap_variant) - except Exception: - pass - else: - genomic_gap_variant = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) - - if str(e) == 'base start position must be <= end position': - logger.warning('Variant is fully within a genomic gap') - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - - # Logic - # We have checked that the variant does not cross boundaries, or is intronic - # So is likely mapping to a genomic gap - try: - hn.normalize(genomic_gap_variant) - except Exception as e: - if str(e) == 'base start position must be <= end position': - # This will only happen when the variant is fully within the gap - gap_start = genomic_gap_variant.posedit.pos.end.base - gap_end = genomic_gap_variant.posedit.pos.start.base - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - # This will only happen if the variant is flanking the gap but is - # not inside the gap - logger.warning('Variant is on the flank of a genomic gap but not within the gap') - gap_start = genomic_gap_variant.posedit.pos.start.base - 1 - gap_end = genomic_gap_variant.posedit.pos.end.base + 1 - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - genomic_gap_variant.posedit.edit.ref = '' - stored_hgvs_c = copy.deepcopy(hgvs_c) - - # Remove alt - try: - genomic_gap_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass - - # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = hn.normalize(genomic_gap_variant) - # Static map to c. and static normalize - transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant - if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - transcript_gap_variant = hn.normalize(transcript_gap_variant) - - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) - transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) - else: - transcript_gap_n = transcript_gap_variant - transcript_gap_alt_n = stored_hgvs_c - - # Ensure an ALT exists - try: - if transcript_gap_alt_n.posedit.edit.alt is None: - transcript_gap_alt_n.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str(transcript_gap_n.posedit.pos.start.base) + '_' + str(transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref - transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str(transcript_gap_alt_n.posedit.pos.start.base) + '_' + str(transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref - transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(transcript_gap_n.posedit.edit.ref) - if transcript_gap_alt_n.posedit.edit.alt is not None: - alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = transcript_gap_n.posedit.pos.start.base - alt_start = transcript_gap_alt_n.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, transcript_gap_alt_n.posedit.pos.end.base+1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) - else: - alt_base_dict[int] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base+1, 1): - if int in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[int]) - else: - alternate_sequence_bases.append(ref_base_dict[int]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Update variant, map to genome using vm and normalize - transcript_gap_n.posedit.edit.alt = alternate_sequence - - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - - try: - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - except Exception as e: - if str(e) == "base start position must be <= end position": - # Expansion out is required to map back to the genomic position - pre_base = sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.start.base-2,transcript_gap_n.posedit.pos.start.base-1) - post_base = sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.end.base,transcript_gap_n.posedit.pos.end.base+1) - transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 - transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 - transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base - transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Bypass the next bit of gap code - expand_out = 'false' - - else: - pass - # No map to the flank of a gap or within the gap - else: - pass - - # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS - # Remove identity bases - if hgvs_c == stored_hgvs_c: - expand_out = 'false' - elif expand_out == 'false' or utilise_gap_code is False: - pass - # Correct expansion ref + 2 - elif expand_out == 'true' and ( - len(hgvs_genomic.posedit.edit.ref) == (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: - hgvs_genomic.posedit.pos.start.base = hgvs_genomic.posedit.pos.start.base + 1 - hgvs_genomic.posedit.pos.end.base = hgvs_genomic.posedit.pos.end.base - 1 - hgvs_genomic.posedit.edit.ref = hgvs_genomic.posedit.edit.ref[1:-1] - if hgvs_genomic.posedit.edit.alt is not None: - hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.alt[1:-1] - elif expand_out == 'true' and ( - len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: - if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: - gn = hn.normalize(hgvs_genomic) - pass - - # Likely if the start or end position aligns to a gap in the genomic sequence - # Logic - # We have checked that the variant does not cross boundaries, or is intronic - # So is likely mapping to a genomic gap - elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: - # Incorrect expansion, likely < ref + 2 - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - try: - hn.normalize(genomic_gap_variant) - except Exception as e: - if str(e) == 'base start position must be <= end position': - gap_start = genomic_gap_variant.posedit.pos.end.base - gap_end = genomic_gap_variant.posedit.pos.start.base - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - # Remove alt - try: - genomic_gap_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass - # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = hn.normalize(genomic_gap_variant) - # Static map to c. and static normalize - transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant - transcript_gap_variant = hn.normalize(transcript_gap_variant) - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) - transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) - else: - transcript_gap_n = transcript_gap_variant - transcript_gap_alt_n = stored_hgvs_c - - # Ensure an ALT exists - try: - if transcript_gap_alt_n.posedit.edit.alt is None: - transcript_gap_alt_n.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( - transcript_gap_n.posedit.pos.start.base) + '_' + str( - transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref - transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( - transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( - transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref - transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(transcript_gap_n.posedit.edit.ref) - if transcript_gap_alt_n.posedit.edit.alt is not None: - alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = transcript_gap_n.posedit.pos.start.base - alt_start = transcript_gap_alt_n.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, - transcript_gap_alt_n.posedit.pos.end.base + 1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) - else: - alt_base_dict[int] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): - if int in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[int]) - else: - alternate_sequence_bases.append(ref_base_dict[int]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Update variant, map to genome using vm and normalize - transcript_gap_n.posedit.edit.alt = alternate_sequence - - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - - try: - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - except Exception as e: - if str(e) == "base start position must be <= end position": - # Expansion out is required to map back to the genomic position - pre_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, - transcript_gap_n.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, - transcript_gap_n.posedit.pos.end.base + 1) - transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 - transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 - transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base - transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Ins variants map badly - Especially between c. exon/exon boundary - if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: - try: - hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'insertion length must be 1': - if hgvs_c.type == 'c': - hgvs_t = vm.c_to_n(hgvs_c) - else: - hgvs_t = copy.copy(hgvs_c) - ins_ref = sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-1,hgvs_t.posedit.pos.end.base) - ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt - hgvs_t = hp.parse_hgvs_variant(ins_to_delins) - try: - hgvs_c = vm.n_to_c(hgvs_t) - except Exception: - hgvs_c = copy.copy(hgvs_t) - try: - hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) - except Exception as e: - error = str(e) - logger.warning('Ins mapping error in myt_to_g ' + error) - - return hgvs_genomic - - -""" -Simple hgvs g. to c. or n. mapping -returns parsed hgvs c. or n. object -""" - - -def myevm_g_to_t(hdp, evm, hgvs_genomic, alt_ac): - hgvs_t = evm.g_to_t(hgvs_genomic, alt_ac) - return hgvs_t - - -""" -parse p. strings into hgvs p. objects -""" - - -def hgvs_protein(variant, hp): - # Set regular expressions for if statements - pat_p = re.compile("\:p\.") # Pattern looks for :g. Note (gene) has been removed - # If the :p. pattern is present in the input variant - if pat_p.search(variant): - # convert the input string into a hgvs object - var_p = hp.parse_hgvs_variant(variant) - return var_p - - -""" -Convert r. into c. -""" - - -def hgvs_r_to_c(hgvs_object): - # check for LRG_t with r. - if re.match('LRG', hgvs_object.ac): - transcript_ac = dbControls.data.get_RefSeqTranscriptID_from_lrgTranscriptID(hgvs_object.ac) - if transcript_ac == 'none': - raise HGVSDataNotAvailableError('Unable to identify a relevant transcript for ' + hgvs_object.ac) - else: - hgvs_object.ac = transcript_ac - hgvs_object.type = 'c' - edit = str(hgvs_object.posedit.edit) - edit = edit.upper() - # lowercase the supported variant types - edit = edit.replace('DEL', 'del') - edit = edit.replace('INS', 'ins') - edit = edit.replace('INV', 'inv') - edit = edit.replace('DUP', 'dup') - # edit = edit.replace('CON', 'con') - # edit = edit.replace('TRA', 'tra') - edit = edit.replace('U', 'T') - hgvs_object.posedit.edit = edit - return hgvs_object - - -""" -Convert c. into r. -""" - - -def hgvs_c_to_r(hgvs_object): - hgvs_object.type = 'r' - edit = str(hgvs_object.posedit.edit) - edit = edit.lower() - edit = edit.replace('t', 'u') - hgvs_object.posedit.edit = edit - return hgvs_object - - -""" -Input c. r. n. variant string -Use uta.py (hdp) to return the identity information for the transcript variant -see hgvs.dataproviders.uta.py for details -""" - - -def tx_identity_info(variant, hdp): - # Set regular expressions for if statements - pat_c = re.compile("\:c\.") # Pattern looks for :c. Note (gene) has been removed - pat_n = re.compile("\:n\.") # Pattern looks for :c. Note (gene) has been removed - pat_r = re.compile("\:r\.") # Pattern looks for :c. Note (gene) has been removed - - # If the :c. pattern is present in the input variant - if pat_c.search(variant): - # Remove all text to the right and including pat_c - tx_ac = variant[:variant.index(':c.') + len(':c.')] - tx_ac = pat_c.sub('', tx_ac) - # Interface with the UTA database via get_tx_identity in uta.py - tx_id_info = hdp.get_tx_identity_info(tx_ac) - # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list - return tx_id_info - - # If the :n. pattern is present in the input variant - if pat_n.search(variant): - # Remove all text to the right and including pat_c - tx_ac = variant[:variant.index(':n.') + len(':n.')] - tx_ac = pat_n.sub('', tx_ac) - # Interface with the UTA database via get_tx_identity in uta.py - tx_id_info = hdp.get_tx_identity_info(tx_ac) - # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list - return tx_id_info - - # If the :r. pattern is present in the input variant - if pat_r.search(variant): - # Remove all text to the right and including pat_c - tx_ac = variant[:variant.index(':r.') + len(':r.')] - tx_ac = pat_r.sub('', tx_ac) - # Interface with the UTA database via get_tx_identity in uta.py - tx_id_info = hdp.get_tx_identity_info(tx_ac) - # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list - return tx_id_info - - -""" -Input c. r. nd accession string -Use uta.py (hdp) to return the identity information for the transcript variant -see hgvs.dataproviders.uta.py for details -""" - - -def tx_id_info(alt_ac, hdp): - tx_id_info = hdp.get_tx_identity_info(alt_ac) - # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list - return tx_id_info - - -""" -Use uta.py (hdp) to return the transcript information for a specified gene (HGNC SYMBOL) -see hgvs.dataproviders.uta.py for details -""" - - -def tx_for_gene(hgnc, hdp): - # Interface with the UTA database via get_tx_for_gene in uta.py - tx_for_gene = hdp.get_tx_for_gene(hgnc) - return tx_for_gene - - -""" -Extract RefSeqGene Accession from transcript information -see hgvs.dataproviders.uta.py for details -""" - - -def ng_extract(tx_for_gene): - # Set regular expressions for if statements - pat_NG = re.compile("^NG_") # Pattern looks for NG_ at beginning of a string - # For each list in the list of lists tx_for_gene - for list in tx_for_gene: - # If the pattern NG_ is found in element 4 - if pat_NG.search(list[4]): - # The gene accession is set to list element 4 - gene_ac = list[4] - return gene_ac - -""" -Returns exon information for a given transcript -e.g. how the exons align to the genomic reference -see hgvs.dataproviders.uta.py for details -""" - - -def tx_exons(tx_ac, alt_ac, alt_aln_method, hdp): - # Interface with the UTA database via get_tx_exons in uta.py - try: - tx_exons = hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) - except hgvs.exceptions.HGVSError as e: - e - tx_exons = 'hgvs Exception: ' + str(e) - return tx_exons - try: - completion = tx_exons[0]['alt_strand'] - except TypeError: - tx_exons = 'error' - return tx_exons - # If on the reverse strand, reverse the order of elements - if tx_exons[0]['alt_strand'] == -1: - tx_exons = tx_exons[::-1] - return tx_exons - else: - return tx_exons - - -""" -Automatically maps genomic positions onto all overlapping transcripts -""" - - -def relevant_transcripts(hgvs_genomic, evm, hdp, alt_aln_method): - # Pass relevant transcripts for the input variant to rts - # Note, the evm method misses one end, the hdp. method misses the other. Combine both - rts_list = hdp.get_tx_for_region(hgvs_genomic.ac, alt_aln_method, hgvs_genomic.posedit.pos.start.base-1, hgvs_genomic.posedit.pos.end.base-1) - rts_dict = {} - for tx_dat in rts_list: - rts_dict[tx_dat[0]] = True - rts_list_2 = evm.relevant_transcripts(hgvs_genomic) - for tx_dat_2 in rts_list_2: - rts_dict[tx_dat_2] = True - rts = rts_dict.keys() - - # Project genomic variants to new transcripts - # and populate a code_var list - ############################################# - # Open a list to store relevant transcripts - code_var = [] - # Populate transcripts - The keys become the list elements from rel_trs - for x in rts: - y = x.rstrip() # Chomp any whitespace from the right of x ($_) - Assign to y - # Easy variant mapper used to map the input variant to the relevant transcripts - # Check for coding transcripts - try: - variant = evm.g_to_t(hgvs_genomic, y) - except hgvs.exceptions.HGVSError as e: - # Check for non-coding transcripts - try: - variant = evm.g_to_t(hgvs_genomic, y) - except hgvs.exceptions.HGVSError as e: - continue - except: - continue - - # Corrective Normalisation of intronic descriptions in the antisense oriemtation - pl = re.compile('\+') - mi = re.compile('\-') - ast = re.compile('\*') - if pl.search(str(variant)) or mi.search(str(variant)) or ast.search(str(variant)): - tx_ac = variant.ac - alt_ac = hgvs_genomic.ac - - # Interface with the UTA database via get_tx_exons in uta.py - try: - tx_exons = hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) - except hgvs.exceptions.HGVSError as e: - e - tx_exons = 'hgvs Exception: ' + str(e) - return tx_exons - try: - completion = tx_exons[0]['alt_strand'] - except TypeError: - tx_exons = 'error' - return tx_exons - # If on the reverse strand, reverse the order of elements - if tx_exons[0]['alt_strand'] == -1: - tx_exons = tx_exons[::-1] - else: - pass - - # Gene orientation - if tx_exons[0]['alt_strand'] == -1: - antisense = 'true' - else: - antisense = 'false' - - # Pass if antisense = 'false' - if antisense == 'false': - pass - else: - # Reverse normalize hgvs_genomic - rev_hgvs_genomic = reverse_hn.normalize(hgvs_genomic) - # map back to coding - variant = evm.g_to_t(rev_hgvs_genomic, tx_ac) - code_var.append(str(variant)) - return code_var - - -""" -Take HGVS string, parse into hgvs object and validate -""" - - -def validate(input, hp, vr): - hgvs_input = hp.parse_hgvs_variant(input) - g = re.compile(":g.") - p = re.compile(":p.") - if p.search(input): - if hasattr(hgvs_input.posedit.pos.start, 'offset'): - pass - else: - hgvs_input.posedit.pos.start.offset = 0 - if hasattr(hgvs_input.posedit.pos.end, 'offset'): - pass - else: - hgvs_input.posedit.pos.end.offset = 0 - if hasattr(hgvs_input.posedit.pos.start, 'datum'): - pass - else: - hgvs_input.posedit.pos.start.datum = 0 - if hasattr(hgvs_input.posedit.pos.end, 'datum'): - pass - else: - hgvs_input.posedit.pos.end.datum = 0 - if hasattr(hgvs_input.posedit.edit, 'ref_n'): - pass - else: - hgvs_input.posedit.edit.ref_n = hgvs_input.posedit.pos.end.base - hgvs_input.posedit.pos.start.base + 1 - - try: - vr.validate(hgvs_input) - except hgvs.exceptions.HGVSError as e: - - error = e - return error - - else: - error = 'false' - return error - -""" -Search HGNC rest -""" - - -def hgnc_rest(path): - data = { - 'record': '', - 'error': 'false' - } - # HGNC server - headers = { - 'Accept': 'application/json', - } - uri = 'http://rest.genenames.org' - target = urlparse(uri + path) - method = 'GET' - body = '' - h = http.Http() - # collect the response - response, content = h.request( - target.geturl(), - method, - body, - headers) - if response['status'] == '200': - # assume that content is a json reply - # parse content with the json module - data['record'] = json.loads(content) - else: - data['error'] = "Unable to contact the HGNC database: Please try again later" - return data - - -""" -Search Entrez databases with efetch and SeqIO -""" - - -def entrez_efetch(db, id, rettype, retmode): - # IMPORT Bio modules - # from Bio import Entrez - Entrez.email = ENTREZ_ID - # from Bio import SeqIO - handle = Entrez.efetch(db=db, id=id, rettype=rettype, retmode=retmode) - # Get record - record = SeqIO.read(handle, "gb") - # Place into text - # text = handle.read() - handle.close() - return record - - -""" -search Entrez databases with efetch and read -""" - - -def entrez_read(db, id, retmode): - # IMPORT Bio modules - # from Bio import Entrez - Entrez.email = ENTREZ_ID - # from Bio import SeqIO - handle = Entrez.efetch(db=db, id=id, retmode=retmode) - # Get record - record = Entrez.read(handle) - # Place into text - # text = handle.read() - handle.close() - return record - - -""" -Simple reverse complement function for nucleotide sequences -""" - - -def revcomp(bases): - l2 = [] - l = list(bases) - element = 0 - for base in l: - element = element + 1 - if base == 'G': - l2.append('C') - if base == 'C': - l2.append('G') - if base == 'A': - l2.append('T') - if base == 'T': - l2.append('A') - revcomp = ''.join(l2) - revcomp = revcomp[::-1] - return revcomp - - -""" -Function designed to merge multiple HGVS variants (hgvs objects) into a single delins -using 3 prime normalization -""" - - -def merge_hgvs_3pr(hgvs_variant_list): - # Ensure c. is mapped to the - h_list = [] - - # Sanity check and format the submitted variants - for hgvs_v in hgvs_variant_list: - # For testing include parser - try: - hgvs_v = hp.parse_hgvs_variant(hgvs_v) - except Exception as e: - print e - pass - - # Validate - vr.validate(hgvs_v) # Let hgvs errors deal with invalid variants and not hgvs objects - if hgvs_v.type == 'c': - try: - hgvs_v = vm.c_to_n(hgvs_v) - h_list.append(hgvs_v) - except: - raise mergeHGVSerror("Unable to map from c. position to absolute position") - elif hgvs_v.type == 'g': - h_list.append(hgvs_v) - if h_list != []: - hgvs_variant_list = copy.deepcopy(h_list) - - # Define accession and start/end positions - accession = None - merge_start_pos = None - merge_end_pos = None - type = None - full_list = [] - - # Loop through the submitted variants and gather the required info - for hgvs_v in hgvs_variant_list: - # No intronic positions - try: - if hgvs_v.posedit.pos.start.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") - if hgvs_v.posedit.pos.end.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") - except AttributeError: - pass - - # Normalize the variant (allow cross intron) which also adds the reference sequence (?) - hgvs_v = hn.normalize(hgvs_v) - - # Set the accession and ensure that multiple reference sequences have not been queried - if accession is None: - accession = hgvs_v.ac - type = hgvs_v.type - else: - if hgvs_v.ac != accession: - raise mergeHGVSerror("More than one reference sequence submitted") - else: - pass - - # Set initial start and end positions - if merge_start_pos is None: - merge_start_pos = hgvs_v.posedit.pos.start.base - merge_end_pos = hgvs_v.posedit.pos.end.base - # Append to the final list of variants - full_list.append(hgvs_v) - continue - # Ensure variants are in the correct order and not overlapping - else: - # ! hgvs_v.posedit.pos.start.base !> - if hgvs_v.posedit.pos.start.base <= merge_end_pos: - raise mergeHGVSerror("Submitted variants are out of order or their ranges overlap") - else: - # Create a fake variant to handle the missing sequence - ins_seq = sf.fetch_seq(hgvs_v.ac, merge_end_pos, hgvs_v.posedit.pos.start.base - 1) - gapping = hgvs_v.ac + ':' + hgvs_v.type + '.' + str(merge_end_pos + 1) + '_' + str( - hgvs_v.posedit.pos.start.base - 1) + 'delins' + ins_seq - hgvs_gapping = hp.parse_hgvs_variant(gapping) - full_list.append(hgvs_gapping) - # update end_pos - merge_end_pos = hgvs_v.posedit.pos.end.base - # Append to the final list of variants - full_list.append(hgvs_v) - - # Generate the alt sequence - alt_sequence = '' - for hgvs_v in full_list: - ref_alt = hgvs2vcf.hgvs_ref_alt(hgvs_v) - alt_sequence = alt_sequence + ref_alt['alt'] - - # Fetch the reference sequence and copy it for the basis of the alt sequence - reference_sequence = sf.fetch_seq(accession, merge_start_pos - 1, merge_end_pos) - # Generate an hgvs_delins - if alt_sequence == '': - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( - merge_end_pos) + 'del' + reference_sequence - else: - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( - merge_end_pos) + 'del' + reference_sequence + 'ins' + alt_sequence - hgvs_delins = hp.parse_hgvs_variant(delins) - try: - hgvs_delins = vm.n_to_c(hgvs_delins) - except: - pass - # Normalize (allow variants crossing into different exons) - try: - hgvs_delins = hn.normalize(hgvs_delins) - except HGVSUnsupportedOperationError: - pass - return hgvs_delins - - -""" -Function designed to merge multiple HGVS variants (hgvs objects) into a single delins -using 5 prime normalization -""" - - -def merge_hgvs_5pr(hgvs_variant_list): - # Ensure c. is mapped to the - h_list = [] - - # Sanity check and format the submitted variants - for hgvs_v in hgvs_variant_list: - # For testing include parser - try: - hgvs_v = hp.parse_hgvs_variant(hgvs_v) - except: - pass - - # Validate - vr.validate(hgvs_v) # Let hgvs errors deal with invalid variants and not hgvs objects - if hgvs_v.type == 'c': - try: - hgvs_v = vm.c_to_n(hgvs_v) - h_list.append(hgvs_v) - except: - raise mergeHGVSerror("Unable to map from c. position to absolute position") - if h_list != []: - hgvs_variant_list = copy.deepcopy(h_list) - - # Define accession and start/end positions - accession = None - merge_start_pos = None - merge_end_pos = None - type = None - full_list = [] - - # Loop through the submitted variants and gather the required info - for hgvs_v in hgvs_variant_list: - try: - # No intronic positions - if hgvs_v.posedit.pos.start.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") - if hgvs_v.posedit.pos.end.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") - except AttributeError: - pass - - # Normalize the variant (allow cross intron) which also adds the reference sequence (?) - hgvs_v = reverse_hn.normalize(hgvs_v) - - # Set the accession and ensure that multiple reference sequences have not been queried - if accession is None: - accession = hgvs_v.ac - type = hgvs_v.type - else: - if hgvs_v.ac != accession: - raise mergeHGVSerror("More than one reference sequence submitted") - else: - pass - - # Set initial start and end positions - if merge_start_pos is None: - merge_start_pos = hgvs_v.posedit.pos.start.base - merge_end_pos = hgvs_v.posedit.pos.end.base - # Append to the final list of variants - full_list.append(hgvs_v) - continue - # Ensure variants are in the correct order and not overlapping - else: - # ! hgvs_v.posedit.pos.start.base !> - if hgvs_v.posedit.pos.start.base <= merge_end_pos: - raise mergeHGVSerror("Submitted variants are out of order or their ranges overlap") - else: - # Create a fake variant to handle the missing sequence - ins_seq = sf.fetch_seq(hgvs_v.ac, merge_end_pos, hgvs_v.posedit.pos.start.base - 1) - gapping = hgvs_v.ac + ':' + hgvs_v.type + '.' + str(merge_end_pos + 1) + '_' + str( - hgvs_v.posedit.pos.start.base - 1) + 'delins' + ins_seq - hgvs_gapping = hp.parse_hgvs_variant(gapping) - full_list.append(hgvs_gapping) - # update end_pos - merge_end_pos = hgvs_v.posedit.pos.end.base - # Append to the final list of variants - full_list.append(hgvs_v) - - # Generate the alt sequence - alt_sequence = '' - for hgvs_v in full_list: - ref_alt = hgvs2vcf.hgvs_ref_alt(hgvs_v) - alt_sequence = alt_sequence + ref_alt['alt'] - - # Fetch the reference sequence and copy it for the basis of the alt sequence - reference_sequence = sf.fetch_seq(accession, merge_start_pos - 1, merge_end_pos) - - # Generate an hgvs_delins - if alt_sequence == '': - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( - merge_end_pos) + 'del' + reference_sequence - else: - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( - merge_end_pos) + 'del' + reference_sequence + 'ins' + alt_sequence - hgvs_delins = hp.parse_hgvs_variant(delins) - try: - hgvs_delins = vm.n_to_c(hgvs_delins) - except: - pass - # Normalize (allow variants crossing into different exons) - try: - hgvs_delins = reverse_hn.normalize(hgvs_delins) - except HGVSUnsupportedOperationError: - pass - return hgvs_delins - - -""" -Function designed to merge multiple pseudo VCF variants (strings) into a single HGVS delins -using 5 prime normalization then return a 3 prime normalized final HGVS object -""" - - -def merge_pseudo_vcf(vcf_list, genome_build): - hgvs_list = [] - # Convert pseudo_vcf list into a HGVS list - for call in vcf_list: - hgvs = pseudo_vcf2hgvs.pvcf_to_hgvs(call, genome_build, normalization_direction=5) - hgvs_list.append(hgvs) - # Merge - hgvs_delins = merge_hgvs_5pr(hgvs_list) - # normalize 3 prime - hgvs_delins = hn.normalize(hgvs_delins) - # return - return hgvs_delins - - -""" -HGVS allele handling function which takes a single HGVS allele description and -separates each allele into a list of HGVS variants -""" - - -def hgvs_alleles(variant_description): - try: - # Split up the description - accession, remainder = variant_description.split(':') - # Branch - if re.search('[gcn]\.\d+\[', remainder): - # NM_004006.2:c.2376[G>C];[(G>C)] - # if re.search('\(', remainder): - # raise alleleVariantError('Unsupported format ' + remainder) - # NM_004006.2:c.2376[G>C];[G>C] - type, remainder = remainder.split('.') - pos = re.match('\d+', remainder) - pos = pos.group(0) - remainder = remainder.replace(pos, '') - remainder = remainder[1:-1] - alleles = remainder.split('];[') - my_alleles = [] - for posedit in alleles: - if re.search('\(', posedit): - # NM_004006.2:c.2376[G>C];[(G>C)] - continue - posedit_list = [posedit] - current_allele = [] - for pe in posedit_list: - vrt = accession + ':' + type + '.' + str(pos) + pe - current_allele.append(vrt) - my_alleles.append(current_allele) - else: - type, remainder = remainder.split('.') - if re.search('\(;\)', remainder) and re.search('\];', remainder): - # NM_004006.2:c.[296T>G];[476T>C](;)1083A>C(;)1406del - pre_alleles = remainder.split('(;)') - pre_merges = [] - alleles = [] - for allele in pre_alleles: - if re.match('\[', allele): - pre_merges.append(allele) - else: - alleles.append(allele) - # Extract descriptions - my_alleles = [] - # First alleles - for posedits in alleles: - posedit_list = posedits.split(';') - current_allele = [] - for pe in posedit_list: - vrt = accession + ':' + type + '.' + pe - current_allele.append(vrt) - my_alleles.append(current_allele) - - # Then Merges - alleles = [] - remainder = ';'.join(pre_merges) - remainder = remainder[1:-1] # removes the first [ and the last ] - alleles = remainder.split('];[') - # now separate out the variants in each allele§ - for posedits in alleles: - posedit_list = posedits.split(';') - current_allele = [] - for pe in posedit_list: - vrt = accession + ':' + type + '.' + pe - current_allele.append(vrt) - my_alleles.append(current_allele) - # Now merge the alleles into a single variant - merged_alleles = [] - for each_allele in my_alleles: - if re.search('\?', str(each_allele)): - # NM_004006.2:c.[2376G>C];[?] - continue - merge = [] - allele = str(merge_hgvs_3pr(each_allele)) - merge.append(allele) - merged_alleles.append(merge) - my_alleles = merged_alleles - - elif re.search('\(;\)', remainder): - # If statement for uncertainties - # NM_004006.2:c.[296T>G;476C>T];[476C>T](;)1083A>C - if re.search('\[', remainder): - raise alleleVariantError('Unsupported format ' + type + '.' + remainder) - # NM_004006.2:c.2376G>C(;)3103del - # NM_000548.3:c.3623_3647del(;)3745_3756dup - alleles = remainder.split('(;)') - # now separate out the variants in each allele§ - my_alleles = [] - for posedits in alleles: - posedit_list = posedits.split(';') - current_allele = [] - for pe in posedit_list: - vrt = accession + ':' + type + '.' + pe - current_allele.append(vrt) - my_alleles.append(current_allele) - else: - # If statement for uncertainties - if re.search('\(', remainder): - raise alleleVariantError('Unsupported format ' + type + '.' + remainder) - # NM_004006.2:c.[2376G>C];[3103del] - # NM_004006.2:c.[2376G>C];[3103del] - # NM_004006.2:c.[296T>G;476C>T;1083A>C];[296T>G;1083A>C] - # NM_000548.3:c.[4358_4359del;4361_4372del] - remainder = remainder[1:-1] # removes the first [ and the last ] - alleles = remainder.split('];[') - # now separate out the variants in each allele§ - my_alleles = [] - for posedits in alleles: - posedit_list = posedits.split(';') - current_allele = [] - for pe in posedit_list: - vrt = accession + ':' + type + '.' + pe - current_allele.append(vrt) - my_alleles.append(current_allele) - # Now merge the alleles into a single variant - merged_alleles = [] - - for each_allele in my_alleles: - print each_allele - if re.search('\?', str(each_allele)): - # NM_004006.2:c.[2376G>C];[?] - continue - merge = [] - allele = str(merge_hgvs_3pr(each_allele)) - merge.append(allele) - merged_alleles.append(merge) - my_alleles = merged_alleles - - # Extract alleles into strings - allele_strings = [] - for alleles_l in my_alleles: - for allele in alleles_l: - allele_strings.append(allele) - my_alleles = allele_strings - - # return - return my_alleles - except Exception as e: - import traceback - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - raise alleleVariantError(str(e)) diff --git a/VariantValidator/modules/vvHGVS.py b/VariantValidator/modules/vvHGVS.py index 3fbc0373..a0c352d9 100644 --- a/VariantValidator/modules/vvHGVS.py +++ b/VariantValidator/modules/vvHGVS.py @@ -18,14 +18,177 @@ # Import modules import re import copy -import vvChromosomes as supportedChromosomeBuilds +import vvChromosomes # Import Biopython modules from Bio.Seq import Seq +import hgvs # Database connections and hgvs objects are now passed from VariantValidator.py + +# Error handling +class pseudoVCF2HGVSError(Exception): + pass +# pvcf is a pseudo_vcf string +# genome build is a build string e.g. GRCh37 hg19 +# normalization direction an integer, 5 or 3. +def pvcf_to_hgvs(input, selected_assembly, normalization_direction, reverse_normalizer, validator): + # Set normalizer + if normalization_direction == 3: + selected_normalizer = validator.hn + if normalization_direction == 5: + selected_normalizer = reverse_normalizer + + # Gel stye pVCF + input = input.replace(':', '-') + + # VCF type 1 + if re.search('-\d+-[GATC]+-[GATC]+', input): + pre_input = copy.deepcopy(input) + vcf_elements = pre_input.split('-') + input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) + elif re.search('-\d+-[GATC]+-', input): + pre_input = copy.deepcopy(input) + vcf_elements = pre_input.split('-') + input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) + else: + raise pseudoVCF2HGVSError('Unsupported format: VCF specification 4.1 or later') + + # Chr16:2099572TC>T + try: + pre_input = copy.deepcopy(input) + input_list = input.split(':') + pos_ref_alt = str(input_list[1]) + positionAndEdit = input_list[1] + if not re.match('N[CGWT]_', input) and not re.match('LRG_\d+$', input): + chr_num = str(input_list[0]) + chr_num = chr_num.upper() + chr_num = chr_num.strip() + if re.match('CHR', chr_num): + chr_num = chr_num.replace('CHR', '') + # Use selected assembly + accession = vvChromosomes.to_accession(chr_num, selected_assembly) + if accession is None: + error = chr_num + ' is not part of genome build ' + selected_assembly + ' or is not supported' + raise pseudoVCF2HGVSError(error) + else: + accession = input_list[0] + + # Assign reference sequence type + ref_type = ':g.' + if re.match('LRG_', accession): + accession = validator.db.get.get_RefSeqGeneID_from_lrgID(accession) + + # Reformat the variant + input = str(accession) + ref_type + str(positionAndEdit) + except Exception as e: + error = str(e) + raise pseudoVCF2HGVSError(error) + + # Find not_sub type in input e.g. GGGG>G + not_sub = copy.deepcopy(input) + not_sub_find = re.compile("([GATCgatc]+)>([GATCgatc]+)") + if not_sub_find.search(not_sub): + try: + # If the length of either side of the substitution delimer (>) is >1 + matches = not_sub_find.search(not_sub) + if len(matches.group(1)) > 1 or len(matches.group(2)) > 1 or re.search( + "([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", input): + # Search for and remove range + range = re.compile("([0-9]+)_([0-9]+)") + if range.search(not_sub): + m = not_sub_find.search(not_sub) + start = m.group(1) + delete = m.group(2) + beginning_string, middle_string = not_sub.split(':') + middle_string = middle_string.split('_')[0] + end_string = start + '>' + delete + not_sub = beginning_string + ':' + middle_string + end_string + # Split description + split_colon = not_sub.split(':') + ref_ac = split_colon[0] + remainder = split_colon[1] + split_dot = remainder.split('.') + ref_type = split_dot[0] + remainder = split_dot[1] + posedit = remainder + split_greater = remainder.split('>') + insert = split_greater[1] + remainder = split_greater[0] + # Split remainder using matches + r = re.compile("([0-9]+)([GATCgatc]+)") + try: + m = r.search(remainder) + start = m.group(1) + delete = m.group(2) + starts = posedit.split(delete)[0] + re_try = ref_ac + ':' + ref_type + '.' + starts + 'del' + delete[0] + 'ins' + insert + hgvs_re_try = validator.hp.parse_hgvs_variant(re_try) + hgvs_re_try.posedit.edit.ref = delete + start_pos = str(hgvs_re_try.posedit.pos.start) + if re.search('\-', start_pos): + base, offset = start_pos.split('-') + new_offset = 0 - int(offset) + (len(delete)) + end_pos = int(base) + hgvs_re_try.posedit.pos.end.base = int(end_pos) + hgvs_re_try.posedit.pos.end.offset = int(new_offset) - 1 + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert + elif re.search('\+', start_pos): + base, offset = start_pos.split('+') + end_pos = int(base) + (len(delete) - int(offset) - 1) + new_offset = 0 + int(offset) + (len(delete) - 1) + hgvs_re_try.posedit.pos.end.base = int(end_pos) + hgvs_re_try.posedit.pos.end.offset = int(new_offset) + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert + else: + end_pos = int(start_pos) + (len(delete) - 1) + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + end_pos) + 'del' + delete + 'ins' + insert + except: + not_delins = not_sub + # Parse into hgvs object + try: + hgvs_not_delins = validator.hp.parse_hgvs_variant(not_delins) + except hgvs.exceptions.HGVSError as e: + # Sort out multiple ALTS from VCF inputs + if re.search("([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): + # header,alts = not_delins.split('>') + # # Split up the alts into a list + # alt_list = alts.split(',') + # # Assemble and re-submit + # for alt in alt_list: + # validation['warnings'] = 'Multiple ALT sequences detected: auto-submitting all possible combinations' + # validation['write'] = 'false' + # refreshed_description = header + '>' + alt + # query = {'quibble' : refreshed_description, 'id' : validation['id'], 'warnings' : validation['warnings'], 'description' : '', 'coding' : '', 'coding_g' : '', 'genomic_r' : '', 'genomic_g' : '', 'protein' : '', 'write' : 'true', 'primary_assembly' : primary_assembly, 'order' : ordering} + # batch_list.append(query) + error = 'Multiple ALTs not supported by this function' + raise pseudoVCF2HGVSError(error) + else: + error = str(e) + raise pseudoVCF2HGVSError(error) + + # HGVS will deal with the errors + hgvs_object = hgvs_not_delins + else: + hgvs_object = validator.hp.parse_hgvs_variant(input) + + except Exception as e: + error = str(e) + raise pseudoVCF2HGVSError(error) + else: + hgvs_object = validator.hp.parse_hgvs_variant(input) + + # Normalize + hgvs_object = selected_normalizer.normalize(hgvs_object) + # return + return hgvs_object + + def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): hgvs_genomic_variant = hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref @@ -33,7 +196,7 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # Chr - chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) + chr = vvChromosomes.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) if chr is not None: pass else: @@ -195,14 +358,14 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): grc_pa = 'GRCh38' # UCSC Chr - ucsc_chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, ucsc_pa) + ucsc_chr = vvChromosomes.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, ucsc_pa) if ucsc_chr is not None: pass else: ucsc_chr = reverse_normalized_hgvs_genomic.ac # GRC Chr - grc_chr = supportedChromosomeBuilds.to_chr_num_refseq(reverse_normalized_hgvs_genomic.ac, grc_pa) + grc_chr = vvChromosomes.to_chr_num_refseq(reverse_normalized_hgvs_genomic.ac, grc_pa) if grc_chr is not None: pass else: @@ -348,7 +511,7 @@ def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # Chr - chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) + chr = vvChromosomes.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) if chr is not None: pass else: @@ -482,7 +645,7 @@ def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): normalized_hgvs_genomic = hn.normalize(hgvs_genomic_variant) # Chr - chr = supportedChromosomeBuilds.to_chr_num_ucsc(normalized_hgvs_genomic.ac, primary_assembly) + chr = vvChromosomes.to_chr_num_ucsc(normalized_hgvs_genomic.ac, primary_assembly) if chr is not None: pass else: @@ -627,7 +790,7 @@ def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # Chr - chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) + chr = vvChromosomes.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) if chr is not None: pass else: diff --git a/VariantValidator/modules/vvLiftover.py b/VariantValidator/modules/vvLiftover.py new file mode 100644 index 00000000..6cf991b8 --- /dev/null +++ b/VariantValidator/modules/vvLiftover.py @@ -0,0 +1,344 @@ +# -*- coding: utf-8 -*- +""" +Liftover between genome builds is most accurate when mapping via a RefSeq transcript. +For intergenic regions, the process is more complex. +Lift position > Check bases > Lift back and confirm the original position +""" + +# import modules +import hgvs.exceptions +import hgvs.sequencevariant +import re +import os +import vvChromosomes +import vvHGVS +from pyliftover import LiftOver +import warnings +from Bio.Seq import Seq + +# Pre compile variables +hgvs.global_config.formatting.max_ref_length = 1000000 + +# Determine whether a liftover directory has been added to the environment +PYLIFTOVER_DIR = os.environ.get('PYLIFTOVER_DIR') + +def mystr(hgvs_nucleotide): + hgvs_nucleotide_refless = hgvs_nucleotide.format({'max_ref_length': 0}) + return hgvs_nucleotide_refless + +def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_normalizer, sf, evm): + + """ + :param hgvs_genomic: hgvs_object genomic description accession NC, NT, or NW. Not NG + :param build_from: + :param build_to: + :return: lifted {} + Step 1, attempt to liftover using a common RefSeq transcript + """ + + try: + hgvs_genomic = hp.parse_hgvs_variant(hgvs_genomic) + except TypeError: + pass + + # Create return dictionary + lifted_response = {} + + # Check genome build type + if re.match('GRC', build_from): + from_set = 'grc_chr' + alt_from_set = 'ucsc_chr' + if re.search('37', build_from): + lo_from = 'hg19' + alt_build_from = 'hg19' + if re.search('38', build_from): + lo_from = 'hg38' + alt_build_from = 'hg38' + + else: + from_set = 'ucsc_chr' + alt_from_set = 'grc_chr' + if re.search('19', build_from): + lo_from = 'hg19' + alt_build_from = 'GRCh37' + if re.search('38', build_from): + lo_from = 'hg38' + alt_build_from = 'GRCh38' + + if re.match('GRC', build_to): + to_set = 'grc_chr' + alt_to_set = 'ucsc_chr' + if re.search('37', build_to): + lo_to = 'hg19' + alt_build_to = 'hg19' + if re.search('38', build_to): + lo_to = 'hg38' + alt_build_to = 'hg38' + else: + to_set = 'ucsc_chr' + alt_to_set = 'grc_chr' + if re.search('19', build_to): + lo_to = 'hg19' + alt_build_to = 'GRCh37' + if re.search('38', build_to): + lo_to = 'hg38' + alt_build_to = 'GRCh38' + + # populate the variant from data + vcf = hgvs2vcf.report_hgvs2vcf(hgvs_genomic, build_from, reverse_normalizer, sf) + + # Create to and from dictionaries + lifted_response[build_from.lower()] = {} + lifted_response[build_from.lower()][hgvs_genomic.ac] = {'hgvs_genomic_description': mystr(hgvs_genomic), + 'vcf': { + 'chr': vcf[from_set], + 'pos': str(vcf['pos']), + 'ref': vcf['ref'], + 'alt': vcf['alt']} + } + lifted_response[alt_build_from.lower()] = {} + lifted_response[alt_build_from.lower()][hgvs_genomic.ac] = {'hgvs_genomic_description': mystr(hgvs_genomic), + 'vcf': { + 'chr': vcf[alt_from_set], + 'pos': str(vcf['pos']), + 'ref': vcf['ref'], + 'alt': vcf['alt']} + } + # From dictionary currently blank + lifted_response[build_to.lower()] = {} + lifted_response[alt_build_to.lower()] = {} + + # Get a list of overlapping RefSeq transcripts + # Note, due to 0 base positions in UTA (I think) occasionally tx will + rts_list = hdp.get_tx_for_region(hgvs_genomic.ac, 'splign', hgvs_genomic.posedit.pos.start.base - 1, + hgvs_genomic.posedit.pos.end.base - 1) + rts_dict = {} + tx_list = False + for tx_dat in rts_list: + rts_dict[tx_dat[0]] = True + rts_list_2 = evm.relevant_transcripts(hgvs_genomic) + for tx_dat_2 in rts_list_2: + rts_dict[tx_dat_2] = True + if rts_dict != {}: + tx_list = rts_dict.keys() + + # Try to liftover + if tx_list is not False: + selected = [] + for tx in tx_list: + # identify the first transcript if any + options = hdp.get_tx_mapping_options(tx) + for op in options: + if re.match('NC_', op[1]): + if re.match('GRC', build_to): + sfm = vvChromosomes.to_chr_num_refseq(op[1], build_to) + if re.match('hg', build_to): + sfm = vvChromosomes.to_chr_num_ucsc(op[1], build_to) + if sfm is not None: + selected.append([op[0], op[1]]) + for op in options: + if re.match('NT_', op[1]): + if re.match('GRC', build_to): + sfm = vvChromosomes.to_chr_num_refseq(op[1], build_to) + if re.match('hg', build_to): + sfm = vvChromosomes.to_chr_num_ucsc(op[1], build_to) + if sfm is not None: + selected.append([op[0], op[1]]) + for op in options: + if re.match('NW_', op[1]): + if re.match('GRC', build_to): + sfm = vvChromosomes.to_chr_num_refseq(op[1], build_to) + if re.match('hg', build_to): + sfm = vvChromosomes.to_chr_num_ucsc(op[1], build_to) + if sfm is not None: + selected.append([op[0], op[1]]) + + # remove duplicate chroms + filtered_1 = {} + if selected: + for chroms in selected: + if chroms[1] in filtered_1.keys(): + pass + else: + filtered_1[chroms[1]] = chroms[0] + added_data = False + for key, val in filtered_1.iteritems(): + try: + # Note, due to 0 base positions in UTA (I think) occasionally tx will + # be identified that cannot be mapped to. + # In this instance, do not mark added data as True + hgvs_tx = vm.g_to_t(hgvs_genomic, val) + hgvs_alt_genomic = vm.t_to_g(hgvs_tx, key) + alt_vcf = vvHGVS.report_hgvs2vcf(hgvs_alt_genomic, build_to, reverse_normalizer, sf) + + # Add the to build dictionaries + lifted_response[build_to.lower()][hgvs_alt_genomic.ac] = { + 'hgvs_genomic_description': mystr(hgvs_alt_genomic), + 'vcf': { + 'chr': alt_vcf[to_set], + 'pos': str(alt_vcf['pos']), + 'ref': alt_vcf['ref'], + 'alt': alt_vcf['alt']} + } + lifted_response[alt_build_to.lower()][hgvs_alt_genomic.ac] = { + 'hgvs_genomic_description': mystr(hgvs_alt_genomic), + 'vcf': { + 'chr': alt_vcf[alt_to_set], + 'pos': str(alt_vcf['pos']), + 'ref': alt_vcf['ref'], + 'alt': alt_vcf['alt']} + } + added_data = True + except hgvs.exceptions.HGVSInvalidIntervalError as e: + continue + + if lifted_response != {} and added_data is not False: + return lifted_response + else: + pass + else: + # liftover has failed + pass + + """ + Step 2, attempt to liftover using PyLiftover. + Lift position > Check bases > Lift back and confirm the original position + """ + + # Note: pyliftover uses the UCSC liftOver tool. + # https://pypi.org/project/pyliftover/ + # Once validated, download the UCSC liftover files from http://hgdownload.cse.ucsc.edu/goldenPath/hg38/liftOver/ + + # The structure of the following code comes from VV pymod, so need to create a list + genome_builds = [build_to] + + # Create liftover vcf + from_vcf = vvHGVS.report_hgvs2vcf(hgvs_genomic, lo_from, reverse_normalizer, sf) + + if PYLIFTOVER_DIR is not None: + lo_filename_to = PYLIFTOVER_DIR + "%sTo%s.over.chain" % (lo_from, lo_to) + lo_filename_to = str(lo_filename_to.replace('Tohg', 'ToHg')) + + lo = LiftOver(lo_filename_to) + else: + lo = LiftOver(lo_from, lo_to) + + # Fix the GRC CHR + if re.match('chr', from_vcf[from_set]): + liftover_list = lo.convert_coordinate(from_vcf[from_set], int(from_vcf['pos'])) + else: + my_chrom = 'chr' + from_vcf[from_set] + liftover_list = lo.convert_coordinate(my_chrom, int(from_vcf['pos'])) + + + # Create dictionary + primary_genomic_dicts = {} + for lifted in liftover_list: + chr = lifted[0] + pos = lifted[1] + orientated = lifted[2] + + lifted_ref_bases = from_vcf['ref'] + lifted_alt_bases = from_vcf['alt'] + + # Inverted sequence + if orientated != '+': + my_seq = Seq(lifted_ref_bases) + lifted_ref_bases = my_seq.reverse_complement() + your_seq = Seq(lifted_alt_bases) + lifted_alt_bases = your_seq.reverse_complement() + accession = vvChromosomes.to_accession(chr, lo_to) + if accession is None: + wrn = 'Unable to identify an equivalent %s chromosome ID for %s' % (str(lo_to), str(chr)) + warnings.warn(wrn) + continue + else: + not_delins = accession + ':g.' + str(pos) + '_' + str( + (pos - 1) + len(lifted_ref_bases)) + 'del' + lifted_ref_bases + 'ins' + lifted_alt_bases + hgvs_not_delins = hp.parse_hgvs_variant(not_delins) + try: + vr.validate(hgvs_not_delins) + except hgvs.exceptions.HGVSError as e: + warnings.warn(str(e)) + # Most likely incorrect bases + continue + else: + hgvs_lifted = hn.normalize(hgvs_not_delins) + # Now try map back + if PYLIFTOVER_DIR is not None: + lo_filename_from = PYLIFTOVER_DIR + "%sTo%s.over.chain" % (lo_to, lo_from) + + lo_filename_from = str(lo_filename_from.replace('Tohg', 'ToHg')) + lo = LiftOver(lo_filename_from) + else: + lo = LiftOver(lo_to, lo_from) + + # Lift back + liftback_list = lo.convert_coordinate(chr, pos) + + for lifted_back in liftback_list: + # Pull out the good guys! + # Need to add chr to the from_set + if not re.match('chr', lifted_back[0]): + my_from_chr = 'chr' + lifted_back[0] + else: + my_from_chr = lifted_back[0] + + if lifted_back[0] == from_vcf[from_set] or lifted_back[0] == my_from_chr: + if lifted_back[1] == int(from_vcf['pos']): + for build in genome_builds: + vcf_dict = vvHGVS.report_hgvs2vcf(hgvs_lifted, build, reverse_normalizer, sf) + if re.match('GRC', build): + lifted_response[build_to.lower()][hgvs_lifted.ac] = { + 'hgvs_genomic_description': mystr(hgvs_lifted), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': str(vcf_dict['pos']), + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + lifted_response[alt_build_to.lower()][hgvs_lifted.ac] = { + 'hgvs_genomic_description': mystr(hgvs_lifted), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': str(vcf_dict['pos']), + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + else: + lifted_response[build_to.lower()][hgvs_lifted.ac] = { + 'hgvs_genomic_description': mystr(hgvs_lifted), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': str(vcf_dict['pos']), + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + lifted_response[alt_build_to.lower()][hgvs_lifted.ac] = { + 'hgvs_genomic_description': mystr(hgvs_lifted), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': str(vcf_dict['pos']), + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + return lifted_response + +# +# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# + + diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py new file mode 100644 index 00000000..04f52988 --- /dev/null +++ b/VariantValidator/modules/vvMixinConverters.py @@ -0,0 +1,2319 @@ +import re +import os +import sys +import copy +from vvLogging import logger +import hgvs +import hgvs.exceptions +from hgvs.dataproviders import uta +from hgvs.dataproviders import seqfetcher +import hgvs.normalizer +import hgvs.validator +import hgvs.parser +import hgvs.variantmapper +import hgvs.sequencevariant +import vvMixinInit +import vvChromosomes +import vvHGVS +from urlparse import urlparse +import httplib2 as http +import json +from Bio import Entrez,SeqIO + + + +#Error setup +from hgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError +class mergeHGVSerror(Exception): + pass +class alleleVariantError(Exception): + pass + + + +class Mixin(vvMixinInit.Mixin): + """ + r_to_c + parses r. variant strings into hgvs object and maps to the c. equivalent. + """ + def r_to_c(self, variant, evm): + # convert the input string into a hgvs object by parsing + var_r = self.hp.parse_hgvs_variant(variant) + # map to the coding sequence + var_c = evm.r_to_c(var_r) # coding level variant + variant = str(var_c) + c_from_r = {'variant': variant, 'type': ':c.'} + return c_from_r + + """ + Maps transcript variant descriptions onto specified RefSeqGene reference sequences + Return an hgvs object containing the genomic sequence variant relative to the RefSeqGene + acession + refseq_ac = RefSeqGene ac + """ + + + def refseq(self, variant, vmOld, refseq_ac, hpOld, evm, hdpOld, primary_assembly): + vr = hgvs.validator.Validator(self.hdp) + # parse the variant into hgvs object + var_c = self.hp.parse_hgvs_variant(variant) + # map to the genomic co-ordinates using the easy variant mapper set to alt_aln_method = alt_aln_method + var_g = self.myevm_t_to_g(var_c, evm, self.hdp, primary_assembly) + # Get overlapping transcripts - forcing a splign alignment + start_i = var_g.posedit.pos.start.base + end_i = var_g.posedit.pos.end.base + alt_ac = var_g.ac + alt_aln_method = 'splign' + transcripts = self.hdp.get_tx_for_region(alt_ac, alt_aln_method, start_i - 1, end_i) + # Take the first transcript + for trans in transcripts: + tx_ac = trans[0] + try: + ref_c = self.vm.g_to_t(var_g, tx_ac, alt_aln_method='splign') + except: + continue + else: + # map the variant co-ordinates to the refseq Gene accession using vm + ref_g_dict = { + 'ref_g': '', + 'error': 'false' + } + try: + ref_g_dict['ref_g'] = self.vm.t_to_g(ref_c, alt_ac=refseq_ac, alt_aln_method='splign') + except: + e = sys.exc_info()[0] + ref_g_dict['error'] = e + try: + vr.validate(ref_g_dict['ref_g']) + except: + e = sys.exc_info()[0] + ref_g_dict['error'] = e + if ref_g_dict['error'] == 'false': + return ref_g_dict + else: + continue + # Return as an error if all fail + return ref_g_dict + + + """ + Parses genomic variant strings into hgvs objects + Maps genomic hgvs object into a coding hgvs object if the c accession string is provided + returns a c. variant description string + """ + + + def g_to_c(self, var_g, tx_ac, hpOld, evm): + pat_g = re.compile("\:g\.") # Pattern looks for :g. + # If the :g. pattern is present in the input variant + if pat_g.search(var_g): + # convert the input string into a hgvs object by parsing + var_g = self.hp.parse_hgvs_variant(var_g) + # Map to coding variant + var_c = str(evm.g_to_c(var_g, tx_ac)) + return var_c + + + """ + Parses genomic variant strings into hgvs objects + Maps genomic hgvs object into a non-coding hgvs object if the n accession string is provided + returns a n. variant description string + """ + + + def g_to_n(self, var_g, tx_ac, hpOld, evm): + pat_g = re.compile("\:g\.") # Pattern looks for :g. + # If the :g. pattern is present in the input variant + if pat_g.search(var_g): + # convert the input string into a hgvs object by parsing + var_g = self.hp.parse_hgvs_variant(var_g) + # Map to coding variant + var_n = str(evm.g_to_n(var_g, tx_ac)) + return var_n + + + """ + Ensures variant strings are transcript c. or n. + returns parsed hgvs c. or n. object + """ + + + def coding(self, variant, hpOld): + # If the :c. pattern is present in the input variant + if re.search(':c.', variant) or re.search(':n.', variant): + # convert the input string into a hgvs object + var_c = self.hp.parse_hgvs_variant(variant) + return var_c + + + """ + Mapping transcript to genomic position + Ensures variant strings are transcript c. or n. + returns parsed hgvs g. object + """ + + + def genomic(self, variant, evm, hpOld, hdpOld, primary_assembly): + # Set regular expressions for if statements + pat_g = re.compile("\:g\.") # Pattern looks for :g. + pat_n = re.compile("\:n\.") + pat_c = re.compile("\:c\.") # Pattern looks for :c. + + # If the :c. pattern is present in the input variant + if pat_c.search(variant) or pat_n.search(variant): + error = 'false' + hgvs_var = self.hp.parse_hgvs_variant(variant) + try: + var_g = self.myevm_t_to_g(hgvs_var, evm, self.hdp, primary_assembly) # genomic level variant + except hgvs.exceptions.HGVSError as e: + error = e + if error != 'false': + var_g = 'error ' + str(e) + return var_g + + # If the :g. pattern is present in the input variant + elif (pat_g.search(variant)): # or (pat_n.search(variant)): + # convert the input string into a hgvs object + var_g = self.hp.parse_hgvs_variant(variant) + return var_g + + + """ + Mapping transcript to protein prediction + Ensures variant strings are transcript c. + returns parsed hgvs p. object + """ + + + + + """ + Function which takes a NORMALIZED hgvs Python transcript variant and maps to a specified protein reference sequence. A protein + level hgvs python object is returned. + + Note the function currently assumes that the transcript description is correctly normalized having come from the + previous g_to_t function + """ + + + + + + """ + Ensures variant strings are g. + returns parsed hgvs g. object + """ + + + def hgvs_genomic(self, variant, hpOld): + # Set regular expressions for if statements + pat_g = re.compile("\:g\.") # Pattern looks for :g. Note (gene) has been removed + # If the :g. pattern is present in the input variant + if pat_g.search(variant): + # convert the input string into a hgvs object + var_g = self.hp.parse_hgvs_variant(variant) + return var_g + + + """ + Enhanced transcript to genome position mapping function using evm + Deals with mapping from transcript positions that do not exist in the genomic sequence + i.e. the stated position aligns to a genomic gap! + Trys to ensure that a genomic position is always returned even if the c. or n. transcript + will not map to the specified genome build primary assembly. + Deals with transcript mapping to several genomic assemblies + Order + Map to a single NC_ for the specified genome build primary assembly + Map to a single NC_ for an alternate genome build primary assembly + Map to an NT_ from the specified genome build + Map to an NT_ from an alternative genome build + Map to an NW_ from the specified genome build + Map to an NW_ from an alternative genome buildRequires parsed c. or n. object + returns parsed hgvs g. object + """ + + + def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): + # create no_norm_evm + if primary_assembly == 'GRCh38': + no_norm_evm = self.no_norm_evm_38 + elif primary_assembly == 'GRCh37': + no_norm_evm = self.no_norm_evm_37 + + # store the input + stored_hgvs_c = copy.deepcopy(hgvs_c) + expand_out = 'false' + utilise_gap_code = True + + # Gap gene black list + try: + gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(hgvs_c.ac) + except Exception: + utilise_gap_code = False + else: + # If the gene symbol is not in the list, the value False will be returned + utilise_gap_code = vvChromosomes.gap_black_list(gene_symbol) + # Warn gap code in use + logger.warning("gap_compensation_myevm = " + str(utilise_gap_code)) + + if utilise_gap_code is True and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type =='delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): + + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + hgvs_c = no_norm_evm.c_to_n(hgvs_c) + + # Check for intronic + try: + self.hn.normalize(hgvs_c) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('intronic variant', error): + pass + elif re.search('Length implied by coordinates must equal sequence deletion length', error) and re.match( + 'NR_', hgvs_c.ac): + hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 + + # Check again before continuing + if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search( + '\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): + pass + + else: + try: + # For non-intronic sequence + hgvs_t = copy.deepcopy(hgvs_c) + if hgvs_t.posedit.edit.type == 'inv': + inv_alt = self.revcomp(hgvs_t.posedit.edit.ref) + t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + hgvs_t_delins = self.hp.parse_hgvs_variant(t_delins) + pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base + inv_alt = pre_base + inv_alt + post_base + hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 + start = hgvs_t.posedit.pos.start.base + hgvs_t.posedit.pos.start.base = start + 1 + hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 + end = hgvs_t.posedit.pos.end.base + hgvs_t.posedit.pos.start.base = start + hgvs_t.posedit.pos.end.base = end + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + hgvs_t = self.hp.parse_hgvs_variant(hgvs_str) + elif hgvs_c.posedit.edit.type == 'dup': + pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + alt = pre_base + hgvs_t.posedit.edit.ref + hgvs_t.posedit.edit.ref + post_base + ref = pre_base + hgvs_t.posedit.edit.ref + post_base + dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str((hgvs_t.posedit.pos.start.base + len(ref)) -2) + 'del' + ref + 'ins' + alt + hgvs_t = self.hp.parse_hgvs_variant(dup_to_delins) + elif hgvs_c.posedit.edit.type == 'ins': + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.end.base+1) + ins_alt = ins_ref[:2] + hgvs_t.posedit.edit.alt + ins_ref[-2:] + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str(hgvs_t.posedit.pos.end.base +1 ) + 'del' + ins_ref + 'ins' + ins_alt + hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) + else: + if str(hgvs_t.posedit.edit.alt) == 'None': + hgvs_t.posedit.edit.alt = '' + pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base + hgvs_t.posedit.edit.alt = pre_base + hgvs_t.posedit.edit.alt + post_base + hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 + start = hgvs_t.posedit.pos.start.base + hgvs_t.posedit.pos.start.base = start + 1 + hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 + end = hgvs_t.posedit.pos.end.base + hgvs_t.posedit.pos.start.base = start + hgvs_t.posedit.pos.end.base = end + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str(hgvs_t.posedit.edit) + hgvs_t = self.hp.parse_hgvs_variant(hgvs_str) + hgvs_c = copy.deepcopy(hgvs_t) + + # Set expanded out test to true + expand_out = 'true' + + except Exception: + hgvs_c = hgvs_c + + if re.match('NM_', str(hgvs_c.ac)): + try: + hgvs_c = no_norm_evm.n_to_c(hgvs_c) + except hgvs.exceptions.HGVSError as e: + hgvs_c = copy.deepcopy(stored_hgvs_c) + + # Ensure the altered c. variant has not crossed intro exon boundaries + hgvs_check_boundaries = copy.deepcopy(hgvs_c) + try: + h_variant = self.hn.normalize(hgvs_check_boundaries) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + hgvs_c = copy.deepcopy(stored_hgvs_c) + # Catch identity at the exon/intron boundary by trying to normalize ref only + if hgvs_check_boundaries.posedit.edit.type == 'identity': + reform_ident = str(hgvs_c).split(':')[0] + reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str(hgvs_c.posedit.edit.ref)# + 'ins' + str(hgvs_c.posedit.edit.alt) + hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) + try: + self.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error) or re.search('Normalization of intronic variants', error): + hgvs_c = copy.deepcopy(stored_hgvs_c) + try: + hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) + self.hn.normalize(hgvs_genomic) # Check the validity of the mapping + # This will fail on multiple refs for NC_ + except hgvs.exceptions.HGVSError as e: + # Recover all available mapping options from UTA + mapping_options = self.hdp.get_tx_mapping_options(hgvs_c.ac) + + if mapping_options == []: + raise HGVSDataNotAvailableError("No alignment data between the specified transcript reference sequence and any GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are available.") + + # Capture errors from attempted mappings + attempted_mapping_error = '' + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NC_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + if chr_num != 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print e + continue + + # If not mapped, raise error + try: + self.hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NC_', option[1]): + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + if re.search(option[1], attempted_mapping_error): + pass + else: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print e + continue + try: + self.hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NT_', option[1]): + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print e + continue + try: + self.hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NW_', option[1]): + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print e + continue + # Only a RefSeqGene available + try: + self.hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NG_', option[1]): + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print e + continue + # If not mapped, raise error + try: + hgvs_genomic + except Exception: + raise HGVSDataNotAvailableError(attempted_mapping_error) + + if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and hgvs_genomic.posedit.edit.alt == '' and expand_out != 'true': + hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref + if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: + try: + hgvs_genomic = self.hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'insertion length must be 1': + ref = self.sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + hgvs_genomic.posedit.edit.ref = ref + hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] + hgvs_genomic = self.hn.normalize(hgvs_genomic) + if error == 'base start position must be <= end position': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = self.hn.normalize(hgvs_genomic) + + # Statements required to reformat the stored_hgvs_c into a useable synonym + if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': + if stored_hgvs_c.type == 'c': + stored_hgvs_n = self.vm.c_to_n(stored_hgvs_c) + else: + stored_hgvs_n = stored_hgvs_c + stored_ref = self.sf.fetch_seq(str(stored_hgvs_n.ac),stored_hgvs_n.posedit.pos.start.base-1,stored_hgvs_n.posedit.pos.end.base) + stored_hgvs_c.posedit.edit.ref = stored_ref + + if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': + if hgvs_genomic.posedit.edit.type == 'ins': + stored_ref = self.sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] + hgvs_genomic.posedit.edit.ref = stored_ref + hgvs_genomic.posedit.edit.alt = stored_alt + + # First look for variants mapping to the flanks of gaps + # either in the gap or on the flank but not fully within the gap + if expand_out == 'true': + nr_genomic = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) + try: + self.hn.normalize(nr_genomic) + except hgvs.exceptions.HGVSInvalidVariantError as e: + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str(e) == 'base start position must be <= end position': + # Effectively, this code is designed to handle variants that are directly proximal to + # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases due to + # the deletion length being > the specified range. + + # Warn of variant location wrt the gap + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + logger.warning('Variant is proximal to the flank of a genomic gap') + genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + try: + self.hn.normalize(genomic_gap_variant) + except Exception: + pass + else: + genomic_gap_variant = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) + + if str(e) == 'base start position must be <= end position': + logger.warning('Variant is fully within a genomic gap') + genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + + # Logic + # We have checked that the variant does not cross boundaries, or is intronic + # So is likely mapping to a genomic gap + try: + self.hn.normalize(genomic_gap_variant) + except Exception as e: + if str(e) == 'base start position must be <= end position': + # This will only happen when the variant is fully within the gap + gap_start = genomic_gap_variant.posedit.pos.end.base + gap_end = genomic_gap_variant.posedit.pos.start.base + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + # This will only happen if the variant is flanking the gap but is + # not inside the gap + logger.warning('Variant is on the flank of a genomic gap but not within the gap') + gap_start = genomic_gap_variant.posedit.pos.start.base - 1 + gap_end = genomic_gap_variant.posedit.pos.end.base + 1 + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + genomic_gap_variant.posedit.edit.ref = '' + stored_hgvs_c = copy.deepcopy(hgvs_c) + + # Remove alt + try: + genomic_gap_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + pass + + # Should be a delins so will normalize statically and replace the reference bases + genomic_gap_variant = self.hn.normalize(genomic_gap_variant) + # Static map to c. and static normalize + transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) + stored_transcript_gap_variant = transcript_gap_variant + if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + transcript_gap_variant = self.hn.normalize(transcript_gap_variant) + + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) + transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) + else: + transcript_gap_n = transcript_gap_variant + transcript_gap_alt_n = stored_hgvs_c + + # Ensure an ALT exists + try: + if transcript_gap_alt_n.posedit.edit.alt is None: + transcript_gap_alt_n.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str(transcript_gap_n.posedit.pos.start.base) + '_' + str(transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n = self.hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) + transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str(transcript_gap_alt_n.posedit.pos.start.base) + '_' + str(transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n = self.hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(transcript_gap_n.posedit.edit.ref) + if transcript_gap_alt_n.posedit.edit.alt is not None: + alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = transcript_gap_n.posedit.pos.start.base + alt_start = transcript_gap_alt_n.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for int in range(transcript_gap_alt_n.posedit.pos.start.base, transcript_gap_alt_n.posedit.pos.end.base+1, 1): + if int == alt_start: + alt_base_dict[int] = str(''.join(alternate_bases)) + else: + alt_base_dict[int] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base+1, 1): + if int in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[int]) + else: + alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Update variant, map to genome using vm and normalize + transcript_gap_n.posedit.edit.alt = alternate_sequence + + try: + transcript_gap_variant = self.vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + + try: + hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = self.hn.normalize(hgvs_genomic) + except Exception as e: + if str(e) == "base start position must be <= end position": + # Expansion out is required to map back to the genomic position + pre_base = self.sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.start.base-2,transcript_gap_n.posedit.pos.start.base-1) + post_base = self.sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.end.base,transcript_gap_n.posedit.pos.end.base+1) + transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 + transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 + transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base + transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base + try: + transcript_gap_variant = self.vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = self.hn.normalize(hgvs_genomic) + + # Bypass the next bit of gap code + expand_out = 'false' + + else: + pass + # No map to the flank of a gap or within the gap + else: + pass + + + # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS + # Remove identity bases + if hgvs_c == stored_hgvs_c: + expand_out = 'false' + elif expand_out == 'false' or utilise_gap_code is False: + pass + # Correct expansion ref + 2 + elif expand_out == 'true' and ( + len(hgvs_genomic.posedit.edit.ref) == (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: + hgvs_genomic.posedit.pos.start.base = hgvs_genomic.posedit.pos.start.base + 1 + hgvs_genomic.posedit.pos.end.base = hgvs_genomic.posedit.pos.end.base - 1 + hgvs_genomic.posedit.edit.ref = hgvs_genomic.posedit.edit.ref[1:-1] + if hgvs_genomic.posedit.edit.alt is not None: + hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.alt[1:-1] + elif expand_out == 'true' and ( + len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: + if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: + gn = self.hn.normalize(hgvs_genomic) + pass + + # Likely if the start or end position aligns to a gap in the genomic sequence + # Logic + # We have checked that the variant does not cross boundaries, or is intronic + # So is likely mapping to a genomic gap + elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: + # Incorrect expansion, likely < ref + 2 + genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + try: + self.hn.normalize(genomic_gap_variant) + except Exception as e: + if str(e) == 'base start position must be <= end position': + gap_start = genomic_gap_variant.posedit.pos.end.base + gap_end = genomic_gap_variant.posedit.pos.start.base + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + # Remove alt + try: + genomic_gap_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + pass + # Should be a delins so will normalize statically and replace the reference bases + genomic_gap_variant = self.hn.normalize(genomic_gap_variant) + # Static map to c. and static normalize + transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) + stored_transcript_gap_variant = transcript_gap_variant + transcript_gap_variant = self.hn.normalize(transcript_gap_variant) + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) + transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) + else: + transcript_gap_n = transcript_gap_variant + transcript_gap_alt_n = stored_hgvs_c + + # Ensure an ALT exists + try: + if transcript_gap_alt_n.posedit.edit.alt is None: + transcript_gap_alt_n.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( + transcript_gap_n.posedit.pos.start.base) + '_' + str( + transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n = self.hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) + transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( + transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( + transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n = self.hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(transcript_gap_n.posedit.edit.ref) + if transcript_gap_alt_n.posedit.edit.alt is not None: + alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = transcript_gap_n.posedit.pos.start.base + alt_start = transcript_gap_alt_n.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for int in range(transcript_gap_alt_n.posedit.pos.start.base, + transcript_gap_alt_n.posedit.pos.end.base + 1, 1): + if int == alt_start: + alt_base_dict[int] = str(''.join(alternate_bases)) + else: + alt_base_dict[int] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): + if int in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[int]) + else: + alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Update variant, map to genome using vm and normalize + transcript_gap_n.posedit.edit.alt = alternate_sequence + + try: + transcript_gap_variant = self.vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + + try: + hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = self.hn.normalize(hgvs_genomic) + except Exception as e: + if str(e) == "base start position must be <= end position": + # Expansion out is required to map back to the genomic position + pre_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, + transcript_gap_n.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, + transcript_gap_n.posedit.pos.end.base + 1) + transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 + transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 + transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base + transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base + try: + transcript_gap_variant = self.vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = self.hn.normalize(hgvs_genomic) + + # Ins variants map badly - Especially between c. exon/exon boundary + if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: + try: + self.hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'insertion length must be 1': + if hgvs_c.type == 'c': + hgvs_t = self.vm.c_to_n(hgvs_c) + else: + hgvs_t = copy.copy(hgvs_c) + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-1,hgvs_t.posedit.pos.end.base) + ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt + hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) + try: + hgvs_c = self.vm.n_to_c(hgvs_t) + except Exception: + hgvs_c = copy.copy(hgvs_t) + try: + hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) + except Exception as e: + error = str(e) + logger.warning('Ins mapping error in myt_to_g ' + error) + + return hgvs_genomic + + """ + USE WITH MAPPER THAT DOES NOT REPLACE THE REFERENCE GENOMIC BASES AND DOED NOT NORMALIZE + + Enhanced transcript to genome position mapping function using evm + Trys to ensure that a genomic position is always returned even if the c. or n. transcript + will not map to the specified genome build primary assembly. + Deals with transcript mapping to several genomic assemblies + Order + Map to a single NC_ (or ALT) for the specified genome build + returns parsed hgvs g. object + """ + + def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hnOld, hpOld, sfOld, no_norm_evm): + try: + hgvs_genomic = evm.t_to_g(hgvs_c) + self.hn.normalize(hgvs_genomic) + # This will fail on multiple refs for NC_ + except hgvs.exceptions.HGVSError as e: + # Recover all available mapping options from UTA + mapping_options = self.hdp.get_tx_mapping_options(hgvs_c.ac) + + if mapping_options == []: + raise HGVSDataNotAvailableError("no g. mapping options available") + + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NC_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + if chr_num != 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ + 1] + '~' + print e + continue + + # If not mapped, raise error + try: + self.hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NC_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + if chr_num != 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ + 1] + '~' + print e + continue + + # If not mapped, raise error + try: + self.hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NC_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + if chr_num == 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + if re.search(option[1], attempted_mapping_error): + pass + else: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ + option[ + 1] + '~' + print e + continue + try: + self.hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NT_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), + primary_assembly) + if chr_num != 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ + option[ + 1] + '~' + print e + continue + try: + self.hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NT_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), + primary_assembly) + if chr_num == 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str( + e) + "/" + hgvs_c.ac + "/" + \ + option[ + 1] + '~' + print e + continue + try: + self.hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NW_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), + primary_assembly) + if chr_num != 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str( + e) + "/" + hgvs_c.ac + "/" + \ + option[1] + '~' + print e + continue + try: + self.hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NW_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), + primary_assembly) + if chr_num == 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str( + e) + "/" + hgvs_c.ac + "/" + \ + option[1] + '~' + print e + continue + + # Only a RefSeqGene available + try: + self.hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NG_', option[1]): + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str( + e) + "/" + hgvs_c.ac + "/" + \ + option[1] + '~' + print e + continue + try: + hgvs_genomic + except Exception: + raise HGVSDataNotAvailableError('No available t_to_g liftover') + + # Ins variants map badly - Especially between c. exon/exon boundary + if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: + try: + self.hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'insertion length must be 1': + if hgvs_c.type == 'c': + hgvs_t = self.vm.c_to_n(hgvs_c) + else: + hgvs_t = copy.copy(hgvs_c) + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, hgvs_t.posedit.pos.end.base) + ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( + hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt + hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) + try: + hgvs_c = self.vm.n_to_c(hgvs_t) + except Exception: + hgvs_c = copy.copy(hgvs_t) + try: + hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) + except Exception as e: + error = str(e) + logger.warning('Ins mapping error in myt_to_g ' + error) + + return hgvs_genomic + + """ + Enhanced transcript to genome position on a specified genomic reference using vm + Deals with mapping from transcript positions that do not exist in the genomic sequence + i.e. the stated position aligns to a genomic gap! + returns parsed hgvs g. object + """ + + + def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): + # create no_norm_evm + if primary_assembly == 'GRCh38': + no_norm_evm = self.no_norm_evm_38 + elif primary_assembly == 'GRCh37': + no_norm_evm = self.no_norm_evm_37 + + # store the input + stored_hgvs_c = copy.deepcopy(hgvs_c) + expand_out = 'false' + utilise_gap_code = True + + # Gap gene black list + try: + gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(hgvs_c.ac) + except Exception: + utilise_gap_code = False + else: + # If the gene symbol is not in the list, the value False will be returned + utilise_gap_code = vvChromosomes.gap_black_list(gene_symbol) + # Warn gap code in use + logger.warning("gap_compensation_mvm = " + str(utilise_gap_code)) + + if utilise_gap_code is True and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type =='delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): + + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + hgvs_c = no_norm_evm.c_to_n(hgvs_c) + + # Check for intronic + try: + self.hn.normalize(hgvs_c) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('intronic variant', error): + pass + elif re.search('Length implied by coordinates must equal sequence deletion length', error) and re.match( + 'NR_', hgvs_c.ac): + hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 + + # Check again before continuing + if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search('\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): + pass + + else: + try: + # For non-intronic sequence + hgvs_t = copy.deepcopy(hgvs_c) + # handle inversions + if hgvs_t.posedit.edit.type == 'inv': + inv_alt = self.revcomp(hgvs_t.posedit.edit.ref) + t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + hgvs_t_delins = self.hp.parse_hgvs_variant(t_delins) + pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base + inv_alt = pre_base + inv_alt + post_base + hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 + start = hgvs_t.posedit.pos.start.base + hgvs_t.posedit.pos.start.base = start + 1 + hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 + end = hgvs_t.posedit.pos.end.base + hgvs_t.posedit.pos.start.base = start + hgvs_t.posedit.pos.end.base = end + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + hgvs_t = self.hp.parse_hgvs_variant(hgvs_str) + if hgvs_c.posedit.edit.type == 'dup': + # hgvs_t = reverse_normalize.normalize(hgvs_t) + pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + alt = pre_base + hgvs_t.posedit.edit.ref + hgvs_t.posedit.edit.ref + post_base + ref = pre_base + hgvs_t.posedit.edit.ref + post_base + dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str((hgvs_t.posedit.pos.start.base + len(ref)) -2) + 'del' + ref + 'ins' + alt + hgvs_t = self.hp.parse_hgvs_variant(dup_to_delins) + elif hgvs_c.posedit.edit.type == 'ins': + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.end.base+1) + ins_alt = ins_ref[:2] + hgvs_t.posedit.edit.alt + ins_ref[-2:] + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str(hgvs_t.posedit.pos.end.base +1 ) + 'del' + ins_ref + 'ins' + ins_alt + hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) + else: + if str(hgvs_t.posedit.edit.alt) == 'None': + hgvs_t.posedit.edit.alt = '' + pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base + hgvs_t.posedit.edit.alt = pre_base + hgvs_t.posedit.edit.alt + post_base + hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 + start = hgvs_t.posedit.pos.start.base + hgvs_t.posedit.pos.start.base = start + 1 + hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 + end = hgvs_t.posedit.pos.end.base + hgvs_t.posedit.pos.start.base = start + hgvs_t.posedit.pos.end.base = end + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str(hgvs_t.posedit.edit) + hgvs_t = self.hp.parse_hgvs_variant(hgvs_str) + hgvs_c = copy.deepcopy(hgvs_t) + + # Set expanded out test to true + expand_out = 'true' + + except Exception: + hgvs_c = hgvs_c + + if re.match('NM_', str(hgvs_c.ac)): + try: + hgvs_c = no_norm_evm.n_to_c(hgvs_c) + except hgvs.exceptions.HGVSError as e: + hgvs_c = copy.deepcopy(stored_hgvs_c) + + # Ensure the altered c. variant has not crossed intro exon boundaries + hgvs_check_boundaries = copy.deepcopy(hgvs_c) + try: + h_variant = self.hn.normalize(hgvs_check_boundaries) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + hgvs_c = copy.deepcopy(stored_hgvs_c) + # Catch identity at the exon/intron boundary by trying to normalize ref only + if hgvs_check_boundaries.posedit.edit.type == 'identity': + reform_ident = str(hgvs_c).split(':')[0] + reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str(hgvs_c.posedit.edit.ref)# + 'ins' + str(hgvs_c.posedit.edit.alt) + hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) + try: + self.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error) or re.search('Normalization of intronic variants', error): + hgvs_c = copy.deepcopy(stored_hgvs_c) + + hgvs_genomic = self.vm.t_to_g(hgvs_c, alt_chr) + if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and hgvs_genomic.posedit.edit.alt == '' and expand_out != 'true': + hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref + if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: + try: + hgvs_genomic = self.hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'insertion length must be 1': + ref = self.sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + hgvs_genomic.posedit.edit.ref = ref + hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] + hgvs_genomic = self.hn.normalize(hgvs_genomic) + if error == 'base start position must be <= end position': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = self.hn.normalize(hgvs_genomic) + + # Statements required to reformat the stored_hgvs_c into a useable synonym + if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': + if stored_hgvs_c.type == 'c': + stored_hgvs_n = self.vm.c_to_n(stored_hgvs_c) + else: + stored_hgvs_n = stored_hgvs_c + stored_ref = self.sf.fetch_seq(str(stored_hgvs_n.ac),stored_hgvs_n.posedit.pos.start.base-1,stored_hgvs_n.posedit.pos.end.base) + stored_hgvs_c.posedit.edit.ref = stored_ref + + if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': + if hgvs_genomic.posedit.edit.type == 'ins': + stored_ref = self.sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] + hgvs_genomic.posedit.edit.ref = stored_ref + hgvs_genomic.posedit.edit.alt = stored_alt + + # First look for variants mapping to the flanks of gaps + # either in the gap or on the flank but not fully within the gap + if expand_out == 'true': + nr_genomic = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) + try: + self.hn.normalize(nr_genomic) + except hgvs.exceptions.HGVSInvalidVariantError as e: + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str(e) == 'base start position must be <= end position': + # Effectively, this code is designed to handle variants that are directly proximal to + # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases due to + # the deletion length being > the specified range. + + # Warn of variant location wrt the gap + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + logger.warning('Variant is proximal to the flank of a genomic gap') + genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + try: + self.hn.normalize(genomic_gap_variant) + except Exception: + pass + else: + genomic_gap_variant = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) + + if str(e) == 'base start position must be <= end position': + logger.warning('Variant is fully within a genomic gap') + genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + + # Logic + # We have checked that the variant does not cross boundaries, or is intronic + # So is likely mapping to a genomic gap + try: + self.hn.normalize(genomic_gap_variant) + except Exception as e: + if str(e) == 'base start position must be <= end position': + # This will only happen when the variant is fully within the gap + gap_start = genomic_gap_variant.posedit.pos.end.base + gap_end = genomic_gap_variant.posedit.pos.start.base + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + # This will only happen if the variant is flanking the gap but is + # not inside the gap + logger.warning('Variant is on the flank of a genomic gap but not within the gap') + gap_start = genomic_gap_variant.posedit.pos.start.base - 1 + gap_end = genomic_gap_variant.posedit.pos.end.base + 1 + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + genomic_gap_variant.posedit.edit.ref = '' + stored_hgvs_c = copy.deepcopy(hgvs_c) + + # Remove alt + try: + genomic_gap_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + pass + + # Should be a delins so will normalize statically and replace the reference bases + genomic_gap_variant = self.hn.normalize(genomic_gap_variant) + # Static map to c. and static normalize + transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) + stored_transcript_gap_variant = transcript_gap_variant + if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + transcript_gap_variant = self.hn.normalize(transcript_gap_variant) + + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) + transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) + else: + transcript_gap_n = transcript_gap_variant + transcript_gap_alt_n = stored_hgvs_c + + # Ensure an ALT exists + try: + if transcript_gap_alt_n.posedit.edit.alt is None: + transcript_gap_alt_n.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str(transcript_gap_n.posedit.pos.start.base) + '_' + str(transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n = self.hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) + transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str(transcript_gap_alt_n.posedit.pos.start.base) + '_' + str(transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n = self.hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(transcript_gap_n.posedit.edit.ref) + if transcript_gap_alt_n.posedit.edit.alt is not None: + alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = transcript_gap_n.posedit.pos.start.base + alt_start = transcript_gap_alt_n.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for int in range(transcript_gap_alt_n.posedit.pos.start.base, transcript_gap_alt_n.posedit.pos.end.base+1, 1): + if int == alt_start: + alt_base_dict[int] = str(''.join(alternate_bases)) + else: + alt_base_dict[int] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base+1, 1): + if int in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[int]) + else: + alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Update variant, map to genome using vm and normalize + transcript_gap_n.posedit.edit.alt = alternate_sequence + + try: + transcript_gap_variant = self.vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + + try: + hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = self.hn.normalize(hgvs_genomic) + except Exception as e: + if str(e) == "base start position must be <= end position": + # Expansion out is required to map back to the genomic position + pre_base = self.sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.start.base-2,transcript_gap_n.posedit.pos.start.base-1) + post_base = self.sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.end.base,transcript_gap_n.posedit.pos.end.base+1) + transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 + transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 + transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base + transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base + try: + transcript_gap_variant = self.vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = self.hn.normalize(hgvs_genomic) + + # Bypass the next bit of gap code + expand_out = 'false' + + else: + pass + # No map to the flank of a gap or within the gap + else: + pass + + # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS + # Remove identity bases + if hgvs_c == stored_hgvs_c: + expand_out = 'false' + elif expand_out == 'false' or utilise_gap_code is False: + pass + # Correct expansion ref + 2 + elif expand_out == 'true' and ( + len(hgvs_genomic.posedit.edit.ref) == (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: + hgvs_genomic.posedit.pos.start.base = hgvs_genomic.posedit.pos.start.base + 1 + hgvs_genomic.posedit.pos.end.base = hgvs_genomic.posedit.pos.end.base - 1 + hgvs_genomic.posedit.edit.ref = hgvs_genomic.posedit.edit.ref[1:-1] + if hgvs_genomic.posedit.edit.alt is not None: + hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.alt[1:-1] + elif expand_out == 'true' and ( + len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: + if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: + gn = self.hn.normalize(hgvs_genomic) + pass + + # Likely if the start or end position aligns to a gap in the genomic sequence + # Logic + # We have checked that the variant does not cross boundaries, or is intronic + # So is likely mapping to a genomic gap + elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: + # Incorrect expansion, likely < ref + 2 + genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + try: + self.hn.normalize(genomic_gap_variant) + except Exception as e: + if str(e) == 'base start position must be <= end position': + gap_start = genomic_gap_variant.posedit.pos.end.base + gap_end = genomic_gap_variant.posedit.pos.start.base + genomic_gap_variant.posedit.pos.start.base = gap_start + genomic_gap_variant.posedit.pos.end.base = gap_end + # Remove alt + try: + genomic_gap_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + pass + # Should be a delins so will normalize statically and replace the reference bases + genomic_gap_variant = self.hn.normalize(genomic_gap_variant) + # Static map to c. and static normalize + transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) + stored_transcript_gap_variant = transcript_gap_variant + transcript_gap_variant = self.hn.normalize(transcript_gap_variant) + # if NM_ need the n. position + if re.match('NM_', str(hgvs_c.ac)): + transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) + transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) + else: + transcript_gap_n = transcript_gap_variant + transcript_gap_alt_n = stored_hgvs_c + + # Ensure an ALT exists + try: + if transcript_gap_alt_n.posedit.edit.alt is None: + transcript_gap_alt_n.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( + transcript_gap_n.posedit.pos.start.base) + '_' + str( + transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n = self.hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) + transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( + transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( + transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n = self.hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(transcript_gap_n.posedit.edit.ref) + if transcript_gap_alt_n.posedit.edit.alt is not None: + alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = transcript_gap_n.posedit.pos.start.base + alt_start = transcript_gap_alt_n.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for int in range(transcript_gap_alt_n.posedit.pos.start.base, + transcript_gap_alt_n.posedit.pos.end.base + 1, 1): + if int == alt_start: + alt_base_dict[int] = str(''.join(alternate_bases)) + else: + alt_base_dict[int] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): + if int in alt_base_dict.keys(): + alternate_sequence_bases.append(alt_base_dict[int]) + else: + alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Update variant, map to genome using vm and normalize + transcript_gap_n.posedit.edit.alt = alternate_sequence + + try: + transcript_gap_variant = self.vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + + try: + hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = self.hn.normalize(hgvs_genomic) + except Exception as e: + if str(e) == "base start position must be <= end position": + # Expansion out is required to map back to the genomic position + pre_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, + transcript_gap_n.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, + transcript_gap_n.posedit.pos.end.base + 1) + transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 + transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 + transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base + transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base + try: + transcript_gap_variant = self.vm.n_to_c(transcript_gap_n) + except: + transcript_gap_variant = transcript_gap_n + hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) + hgvs_genomic = self.hn.normalize(hgvs_genomic) + + # Ins variants map badly - Especially between c. exon/exon boundary + if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: + try: + self.hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error == 'insertion length must be 1': + if hgvs_c.type == 'c': + hgvs_t = self.vm.c_to_n(hgvs_c) + else: + hgvs_t = copy.copy(hgvs_c) + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-1,hgvs_t.posedit.pos.end.base) + ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt + hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) + try: + hgvs_c = self.vm.n_to_c(hgvs_t) + except Exception: + hgvs_c = copy.copy(hgvs_t) + try: + hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) + except Exception as e: + error = str(e) + logger.warning('Ins mapping error in myt_to_g ' + error) + + return hgvs_genomic + + + """ + Simple hgvs g. to c. or n. mapping + returns parsed hgvs c. or n. object + """ + + + def myevm_g_to_t(self, hdpOld, evm, hgvs_genomic, alt_ac): + hgvs_t = evm.g_to_t(hgvs_genomic, alt_ac) + return hgvs_t + + + """ + parse p. strings into hgvs p. objects + """ + + + def hgvs_protein(self, variant, hpOld): + # Set regular expressions for if statements + pat_p = re.compile("\:p\.") # Pattern looks for :g. Note (gene) has been removed + # If the :p. pattern is present in the input variant + if pat_p.search(variant): + # convert the input string into a hgvs object + var_p = self.hp.parse_hgvs_variant(variant) + return var_p + + + """ + Convert r. into c. + """ + + + def hgvs_r_to_c(self, hgvs_object): + # check for LRG_t with r. + if re.match('LRG', hgvs_object.ac): + transcript_ac = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(hgvs_object.ac) + if transcript_ac == 'none': + raise HGVSDataNotAvailableError('Unable to identify a relevant transcript for ' + hgvs_object.ac) + else: + hgvs_object.ac = transcript_ac + hgvs_object.type = 'c' + edit = str(hgvs_object.posedit.edit) + edit = edit.upper() + # lowercase the supported variant types + edit = edit.replace('DEL', 'del') + edit = edit.replace('INS', 'ins') + edit = edit.replace('INV', 'inv') + edit = edit.replace('DUP', 'dup') + # edit = edit.replace('CON', 'con') + # edit = edit.replace('TRA', 'tra') + edit = edit.replace('U', 'T') + hgvs_object.posedit.edit = edit + return hgvs_object + + + """ + Convert c. into r. + """ + + + def hgvs_c_to_r(self, hgvs_object): + hgvs_object.type = 'r' + edit = str(hgvs_object.posedit.edit) + edit = edit.lower() + edit = edit.replace('t', 'u') + hgvs_object.posedit.edit = edit + return hgvs_object + + + """ + Input c. r. n. variant string + Use uta.py (hdp) to return the identity information for the transcript variant + see hgvs.dataproviders.uta.py for details + """ + + + def tx_identity_info(self, variant, hdpOld): + # Set regular expressions for if statements + pat_c = re.compile("\:c\.") # Pattern looks for :c. Note (gene) has been removed + pat_n = re.compile("\:n\.") # Pattern looks for :c. Note (gene) has been removed + pat_r = re.compile("\:r\.") # Pattern looks for :c. Note (gene) has been removed + + # If the :c. pattern is present in the input variant + if pat_c.search(variant): + # Remove all text to the right and including pat_c + tx_ac = variant[:variant.index(':c.') + len(':c.')] + tx_ac = pat_c.sub('', tx_ac) + # Interface with the UTA database via get_tx_identity in uta.py + tx_id_info = self.hdp.get_tx_identity_info(tx_ac) + # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + return tx_id_info + + # If the :n. pattern is present in the input variant + if pat_n.search(variant): + # Remove all text to the right and including pat_c + tx_ac = variant[:variant.index(':n.') + len(':n.')] + tx_ac = pat_n.sub('', tx_ac) + # Interface with the UTA database via get_tx_identity in uta.py + tx_id_info = self.hdp.get_tx_identity_info(tx_ac) + # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + return tx_id_info + + # If the :r. pattern is present in the input variant + if pat_r.search(variant): + # Remove all text to the right and including pat_c + tx_ac = variant[:variant.index(':r.') + len(':r.')] + tx_ac = pat_r.sub('', tx_ac) + # Interface with the UTA database via get_tx_identity in uta.py + tx_id_info = self.hdp.get_tx_identity_info(tx_ac) + # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + return tx_id_info + + + """ + Input c. r. nd accession string + Use uta.py (hdp) to return the identity information for the transcript variant + see hgvs.dataproviders.uta.py for details + """ + + + def tx_id_info(self, alt_ac, hdpOld): + tx_id_info = self.hdp.get_tx_identity_info(alt_ac) + # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + return tx_id_info + + + """ + Use uta.py (hdp) to return the transcript information for a specified gene (HGNC SYMBOL) + see hgvs.dataproviders.uta.py for details + """ + + + def tx_for_gene(self, hgnc, hdpOld): + # Interface with the UTA database via get_tx_for_gene in uta.py + tx_for_gene = self.hdp.get_tx_for_gene(hgnc) + return tx_for_gene + + + """ + Extract RefSeqGene Accession from transcript information + see hgvs.dataproviders.uta.py for details + """ + + + def ng_extract(self, tx_for_gene): + # Set regular expressions for if statements + pat_NG = re.compile("^NG_") # Pattern looks for NG_ at beginning of a string + # For each list in the list of lists tx_for_gene + for list in tx_for_gene: + # If the pattern NG_ is found in element 4 + if pat_NG.search(list[4]): + # The gene accession is set to list element 4 + gene_ac = list[4] + return gene_ac + + """ + Returns exon information for a given transcript + e.g. how the exons align to the genomic reference + see hgvs.dataproviders.uta.py for details + """ + + + def tx_exons(self, tx_ac, alt_ac, alt_aln_method, hdpOld): + # Interface with the UTA database via get_tx_exons in uta.py + try: + tx_exons = self.hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) + except hgvs.exceptions.HGVSError as e: + #e + tx_exons = 'hgvs Exception: ' + str(e) + return tx_exons + try: + completion = tx_exons[0]['alt_strand'] + except TypeError: + tx_exons = 'error' + return tx_exons + # If on the reverse strand, reverse the order of elements + if tx_exons[0]['alt_strand'] == -1: + tx_exons = tx_exons[::-1] + return tx_exons + else: + return tx_exons + + + """ + Automatically maps genomic positions onto all overlapping transcripts + """ + + + def relevant_transcripts(self, hgvs_genomic, evm, hdpOld, alt_aln_method): + # Pass relevant transcripts for the input variant to rts + # Note, the evm method misses one end, the hdp. method misses the other. Combine both + rts_list = self.hdp.get_tx_for_region(hgvs_genomic.ac, alt_aln_method, hgvs_genomic.posedit.pos.start.base-1, hgvs_genomic.posedit.pos.end.base-1) + rts_dict = {} + for tx_dat in rts_list: + rts_dict[tx_dat[0]] = True + rts_list_2 = evm.relevant_transcripts(hgvs_genomic) + for tx_dat_2 in rts_list_2: + rts_dict[tx_dat_2] = True + rts = rts_dict.keys() + + # Project genomic variants to new transcripts + # and populate a code_var list + ############################################# + # Open a list to store relevant transcripts + code_var = [] + # Populate transcripts - The keys become the list elements from rel_trs + for x in rts: + y = x.rstrip() # Chomp any whitespace from the right of x ($_) - Assign to y + # Easy variant mapper used to map the input variant to the relevant transcripts + # Check for coding transcripts + try: + variant = evm.g_to_t(hgvs_genomic, y) + except hgvs.exceptions.HGVSError as e: + # Check for non-coding transcripts + try: + variant = evm.g_to_t(hgvs_genomic, y) + except hgvs.exceptions.HGVSError as e: + continue + except: + continue + + # Corrective Normalisation of intronic descriptions in the antisense oriemtation + pl = re.compile('\+') + mi = re.compile('\-') + ast = re.compile('\*') + if pl.search(str(variant)) or mi.search(str(variant)) or ast.search(str(variant)): + tx_ac = variant.ac + alt_ac = hgvs_genomic.ac + + # Interface with the UTA database via get_tx_exons in uta.py + try: + tx_exons = self.hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) + except hgvs.exceptions.HGVSError as e: + e + tx_exons = 'hgvs Exception: ' + str(e) + return tx_exons + try: + completion = tx_exons[0]['alt_strand'] + except TypeError: + tx_exons = 'error' + return tx_exons + # If on the reverse strand, reverse the order of elements + if tx_exons[0]['alt_strand'] == -1: + tx_exons = tx_exons[::-1] + else: + pass + + # Gene orientation + if tx_exons[0]['alt_strand'] == -1: + antisense = 'true' + else: + antisense = 'false' + + # Pass if antisense = 'false' + if antisense == 'false': + pass + else: + # Reverse normalize hgvs_genomic + rev_hgvs_genomic = self.reverse_hn.normalize(hgvs_genomic) + # map back to coding + variant = evm.g_to_t(rev_hgvs_genomic, tx_ac) + code_var.append(str(variant)) + return code_var + + + """ + Take HGVS string, parse into hgvs object and validate + """ + + + def validate(self, input, hpOld, vrOld): + hgvs_input = self.hp.parse_hgvs_variant(input) + g = re.compile(":g.") + p = re.compile(":p.") + if p.search(input): + if hasattr(hgvs_input.posedit.pos.start, 'offset'): + pass + else: + hgvs_input.posedit.pos.start.offset = 0 + if hasattr(hgvs_input.posedit.pos.end, 'offset'): + pass + else: + hgvs_input.posedit.pos.end.offset = 0 + if hasattr(hgvs_input.posedit.pos.start, 'datum'): + pass + else: + hgvs_input.posedit.pos.start.datum = 0 + if hasattr(hgvs_input.posedit.pos.end, 'datum'): + pass + else: + hgvs_input.posedit.pos.end.datum = 0 + if hasattr(hgvs_input.posedit.edit, 'ref_n'): + pass + else: + hgvs_input.posedit.edit.ref_n = hgvs_input.posedit.pos.end.base - hgvs_input.posedit.pos.start.base + 1 + + try: + self.vr.validate(hgvs_input) + except hgvs.exceptions.HGVSError as e: + + error = e + return error + + else: + error = 'false' + return error + + """ + Search HGNC rest + """ + + + def hgnc_rest(self, path): + data = { + 'record': '', + 'error': 'false' + } + # HGNC server + headers = { + 'Accept': 'application/json', + } + uri = 'http://rest.genenames.org' + target = urlparse(uri + path) + method = 'GET' + body = '' + h = http.Http() + # collect the response + response, content = h.request( + target.geturl(), + method, + body, + headers) + if response['status'] == '200': + # assume that content is a json reply + # parse content with the json module + data['record'] = json.loads(content) + else: + data['error'] = "Unable to contact the HGNC database: Please try again later" + return data + + + """ + Search Entrez databases with efetch and SeqIO + """ + + + def entrez_efetch(self, db, id, rettype, retmode): + # IMPORT Bio modules + # from Bio import Entrez + Entrez.email = self.entrezID + # from Bio import SeqIO + handle = Entrez.efetch(db=db, id=id, rettype=rettype, retmode=retmode) + # Get record + record = SeqIO.read(handle, "gb") + # Place into text + # text = handle.read() + handle.close() + return record + + + """ + search Entrez databases with efetch and read + """ + + + def entrez_read(self,db, id, retmode): + # IMPORT Bio modules + # from Bio import Entrez + Entrez.email = self.entrezID + # from Bio import SeqIO + handle = Entrez.efetch(db=db, id=id, retmode=retmode) + # Get record + record = Entrez.read(handle) + # Place into text + # text = handle.read() + handle.close() + return record + + + """ + Simple reverse complement function for nucleotide sequences + """ + + + def revcomp(self, bases): + l2 = [] + l = list(bases) + element = 0 + for base in l: + element = element + 1 + if base == 'G': + l2.append('C') + if base == 'C': + l2.append('G') + if base == 'A': + l2.append('T') + if base == 'T': + l2.append('A') + revcomp = ''.join(l2) + revcomp = revcomp[::-1] + return revcomp + + + """ + Function designed to merge multiple HGVS variants (hgvs objects) into a single delins + using 3 prime normalization + """ + + + def merge_hgvs_3pr(self, hgvs_variant_list): + # Ensure c. is mapped to the + h_list = [] + + # Sanity check and format the submitted variants + for hgvs_v in hgvs_variant_list: + # For testing include parser + try: + hgvs_v = self.hp.parse_hgvs_variant(hgvs_v) + except Exception as e: + print e + pass + + # Validate + self.vr.validate(hgvs_v) # Let hgvs errors deal with invalid variants and not hgvs objects + if hgvs_v.type == 'c': + try: + hgvs_v = self.vm.c_to_n(hgvs_v) + h_list.append(hgvs_v) + except: + raise mergeHGVSerror("Unable to map from c. position to absolute position") + elif hgvs_v.type == 'g': + h_list.append(hgvs_v) + if h_list != []: + hgvs_variant_list = copy.deepcopy(h_list) + + # Define accession and start/end positions + accession = None + merge_start_pos = None + merge_end_pos = None + type = None + full_list = [] + + # Loop through the submitted variants and gather the required info + for hgvs_v in hgvs_variant_list: + # No intronic positions + try: + if hgvs_v.posedit.pos.start.offset != 0: + raise mergeHGVSerror("Base-offset position submitted") + if hgvs_v.posedit.pos.end.offset != 0: + raise mergeHGVSerror("Base-offset position submitted") + except AttributeError: + pass + + # Normalize the variant (allow cross intron) which also adds the reference sequence (?) + hgvs_v = self.hn.normalize(hgvs_v) + + # Set the accession and ensure that multiple reference sequences have not been queried + if accession is None: + accession = hgvs_v.ac + type = hgvs_v.type + else: + if hgvs_v.ac != accession: + raise mergeHGVSerror("More than one reference sequence submitted") + else: + pass + + # Set initial start and end positions + if merge_start_pos is None: + merge_start_pos = hgvs_v.posedit.pos.start.base + merge_end_pos = hgvs_v.posedit.pos.end.base + # Append to the final list of variants + full_list.append(hgvs_v) + continue + # Ensure variants are in the correct order and not overlapping + else: + # ! hgvs_v.posedit.pos.start.base !> + if hgvs_v.posedit.pos.start.base <= merge_end_pos: + raise mergeHGVSerror("Submitted variants are out of order or their ranges overlap") + else: + # Create a fake variant to handle the missing sequence + ins_seq = self.sf.fetch_seq(hgvs_v.ac, merge_end_pos, hgvs_v.posedit.pos.start.base - 1) + gapping = hgvs_v.ac + ':' + hgvs_v.type + '.' + str(merge_end_pos + 1) + '_' + str( + hgvs_v.posedit.pos.start.base - 1) + 'delins' + ins_seq + hgvs_gapping = self.hp.parse_hgvs_variant(gapping) + full_list.append(hgvs_gapping) + # update end_pos + merge_end_pos = hgvs_v.posedit.pos.end.base + # Append to the final list of variants + full_list.append(hgvs_v) + + # Generate the alt sequence + alt_sequence = '' + for hgvs_v in full_list: + ref_alt = vvHGVS.hgvs_ref_alt(hgvs_v) + alt_sequence = alt_sequence + ref_alt['alt'] + + # Fetch the reference sequence and copy it for the basis of the alt sequence + reference_sequence = self.sf.fetch_seq(accession, merge_start_pos - 1, merge_end_pos) + # Generate an hgvs_delins + if alt_sequence == '': + delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + merge_end_pos) + 'del' + reference_sequence + else: + delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + merge_end_pos) + 'del' + reference_sequence + 'ins' + alt_sequence + hgvs_delins = self.hp.parse_hgvs_variant(delins) + try: + hgvs_delins = self.vm.n_to_c(hgvs_delins) + except: + pass + # Normalize (allow variants crossing into different exons) + try: + hgvs_delins = self.hn.normalize(hgvs_delins) + except HGVSUnsupportedOperationError: + pass + return hgvs_delins + + + """ + Function designed to merge multiple HGVS variants (hgvs objects) into a single delins + using 5 prime normalization + """ + + + def merge_hgvs_5pr(self, hgvs_variant_list): + # Ensure c. is mapped to the + h_list = [] + + # Sanity check and format the submitted variants + for hgvs_v in hgvs_variant_list: + # For testing include parser + try: + hgvs_v = self.hp.parse_hgvs_variant(hgvs_v) + except: + pass + + # Validate + self.vr.validate(hgvs_v) # Let hgvs errors deal with invalid variants and not hgvs objects + if hgvs_v.type == 'c': + try: + hgvs_v = self.vm.c_to_n(hgvs_v) + h_list.append(hgvs_v) + except: + raise mergeHGVSerror("Unable to map from c. position to absolute position") + if h_list != []: + hgvs_variant_list = copy.deepcopy(h_list) + + # Define accession and start/end positions + accession = None + merge_start_pos = None + merge_end_pos = None + type = None + full_list = [] + + # Loop through the submitted variants and gather the required info + for hgvs_v in hgvs_variant_list: + try: + # No intronic positions + if hgvs_v.posedit.pos.start.offset != 0: + raise mergeHGVSerror("Base-offset position submitted") + if hgvs_v.posedit.pos.end.offset != 0: + raise mergeHGVSerror("Base-offset position submitted") + except AttributeError: + pass + + # Normalize the variant (allow cross intron) which also adds the reference sequence (?) + hgvs_v = self.reverse_hn.normalize(hgvs_v) + + # Set the accession and ensure that multiple reference sequences have not been queried + if accession is None: + accession = hgvs_v.ac + type = hgvs_v.type + else: + if hgvs_v.ac != accession: + raise mergeHGVSerror("More than one reference sequence submitted") + else: + pass + + # Set initial start and end positions + if merge_start_pos is None: + merge_start_pos = hgvs_v.posedit.pos.start.base + merge_end_pos = hgvs_v.posedit.pos.end.base + # Append to the final list of variants + full_list.append(hgvs_v) + continue + # Ensure variants are in the correct order and not overlapping + else: + # ! hgvs_v.posedit.pos.start.base !> + if hgvs_v.posedit.pos.start.base <= merge_end_pos: + raise mergeHGVSerror("Submitted variants are out of order or their ranges overlap") + else: + # Create a fake variant to handle the missing sequence + ins_seq = self.sf.fetch_seq(hgvs_v.ac, merge_end_pos, hgvs_v.posedit.pos.start.base - 1) + gapping = hgvs_v.ac + ':' + hgvs_v.type + '.' + str(merge_end_pos + 1) + '_' + str( + hgvs_v.posedit.pos.start.base - 1) + 'delins' + ins_seq + hgvs_gapping = self.hp.parse_hgvs_variant(gapping) + full_list.append(hgvs_gapping) + # update end_pos + merge_end_pos = hgvs_v.posedit.pos.end.base + # Append to the final list of variants + full_list.append(hgvs_v) + + # Generate the alt sequence + alt_sequence = '' + for hgvs_v in full_list: + ref_alt = vvHGVS.hgvs_ref_alt(hgvs_v) + alt_sequence = alt_sequence + ref_alt['alt'] + + # Fetch the reference sequence and copy it for the basis of the alt sequence + reference_sequence = self.sf.fetch_seq(accession, merge_start_pos - 1, merge_end_pos) + + # Generate an hgvs_delins + if alt_sequence == '': + delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + merge_end_pos) + 'del' + reference_sequence + else: + delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + merge_end_pos) + 'del' + reference_sequence + 'ins' + alt_sequence + hgvs_delins = self.hp.parse_hgvs_variant(delins) + try: + hgvs_delins = self.vm.n_to_c(hgvs_delins) + except: + pass + # Normalize (allow variants crossing into different exons) + try: + hgvs_delins = self.reverse_hn.normalize(hgvs_delins) + except HGVSUnsupportedOperationError: + pass + return hgvs_delins + + + """ + Function designed to merge multiple pseudo VCF variants (strings) into a single HGVS delins + using 5 prime normalization then return a 3 prime normalized final HGVS object + """ + + + def merge_pseudo_vcf(self, vcf_list, genome_build): + hgvs_list = [] + # Convert pseudo_vcf list into a HGVS list + for call in vcf_list: + x55hgvs = vvHGVS.pvcf_to_hgvs(call, genome_build, normalization_direction=5, validator=self) + hgvs_list.append(x55hgvs) + # Merge + hgvs_delins = self.merge_hgvs_5pr(hgvs_list) + # normalize 3 prime + hgvs_delins = self.hn.normalize(hgvs_delins) + # return + return hgvs_delins + + + """ + HGVS allele handling function which takes a single HGVS allele description and + separates each allele into a list of HGVS variants + """ + + + def hgvs_alleles(self, variant_description): + try: + # Split up the description + accession, remainder = variant_description.split(':') + # Branch + if re.search('[gcn]\.\d+\[', remainder): + # NM_004006.2:c.2376[G>C];[(G>C)] + # if re.search('\(', remainder): + # raise alleleVariantError('Unsupported format ' + remainder) + # NM_004006.2:c.2376[G>C];[G>C] + type, remainder = remainder.split('.') + pos = re.match('\d+', remainder) + pos = pos.group(0) + remainder = remainder.replace(pos, '') + remainder = remainder[1:-1] + alleles = remainder.split('];[') + my_alleles = [] + for posedit in alleles: + if re.search('\(', posedit): + # NM_004006.2:c.2376[G>C];[(G>C)] + continue + posedit_list = [posedit] + current_allele = [] + for pe in posedit_list: + vrt = accession + ':' + type + '.' + str(pos) + pe + current_allele.append(vrt) + my_alleles.append(current_allele) + else: + type, remainder = remainder.split('.') + if re.search('\(;\)', remainder) and re.search('\];', remainder): + # NM_004006.2:c.[296T>G];[476T>C](;)1083A>C(;)1406del + pre_alleles = remainder.split('(;)') + pre_merges = [] + alleles = [] + for allele in pre_alleles: + if re.match('\[', allele): + pre_merges.append(allele) + else: + alleles.append(allele) + # Extract descriptions + my_alleles = [] + # First alleles + for posedits in alleles: + posedit_list = posedits.split(';') + current_allele = [] + for pe in posedit_list: + vrt = accession + ':' + type + '.' + pe + current_allele.append(vrt) + my_alleles.append(current_allele) + + # Then Merges + alleles = [] + remainder = ';'.join(pre_merges) + remainder = remainder[1:-1] # removes the first [ and the last ] + alleles = remainder.split('];[') + # now separate out the variants in each allele§ + for posedits in alleles: + posedit_list = posedits.split(';') + current_allele = [] + for pe in posedit_list: + vrt = accession + ':' + type + '.' + pe + current_allele.append(vrt) + my_alleles.append(current_allele) + # Now merge the alleles into a single variant + merged_alleles = [] + for each_allele in my_alleles: + if re.search('\?', str(each_allele)): + # NM_004006.2:c.[2376G>C];[?] + continue + merge = [] + allele = str(self.merge_hgvs_3pr(each_allele)) + merge.append(allele) + merged_alleles.append(merge) + my_alleles = merged_alleles + + elif re.search('\(;\)', remainder): + # If statement for uncertainties + # NM_004006.2:c.[296T>G;476C>T];[476C>T](;)1083A>C + if re.search('\[', remainder): + raise alleleVariantError('Unsupported format ' + type + '.' + remainder) + # NM_004006.2:c.2376G>C(;)3103del + # NM_000548.3:c.3623_3647del(;)3745_3756dup + alleles = remainder.split('(;)') + # now separate out the variants in each allele§ + my_alleles = [] + for posedits in alleles: + posedit_list = posedits.split(';') + current_allele = [] + for pe in posedit_list: + vrt = accession + ':' + type + '.' + pe + current_allele.append(vrt) + my_alleles.append(current_allele) + else: + # If statement for uncertainties + if re.search('\(', remainder): + raise alleleVariantError('Unsupported format ' + type + '.' + remainder) + # NM_004006.2:c.[2376G>C];[3103del] + # NM_004006.2:c.[2376G>C];[3103del] + # NM_004006.2:c.[296T>G;476C>T;1083A>C];[296T>G;1083A>C] + # NM_000548.3:c.[4358_4359del;4361_4372del] + remainder = remainder[1:-1] # removes the first [ and the last ] + alleles = remainder.split('];[') + # now separate out the variants in each allele§ + my_alleles = [] + for posedits in alleles: + posedit_list = posedits.split(';') + current_allele = [] + for pe in posedit_list: + vrt = accession + ':' + type + '.' + pe + current_allele.append(vrt) + my_alleles.append(current_allele) + # Now merge the alleles into a single variant + merged_alleles = [] + + for each_allele in my_alleles: + print each_allele + if re.search('\?', str(each_allele)): + # NM_004006.2:c.[2376G>C];[?] + continue + merge = [] + allele = str(self.merge_hgvs_3pr(each_allele)) + merge.append(allele) + merged_alleles.append(merge) + my_alleles = merged_alleles + + # Extract alleles into strings + allele_strings = [] + for alleles_l in my_alleles: + for allele in alleles_l: + allele_strings.append(allele) + my_alleles = allele_strings + + # return + return my_alleles + except Exception as e: + import traceback + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + raise alleleVariantError(str(e)) diff --git a/VariantValidator/modules/vvCore.py b/VariantValidator/modules/vvMixinCore.py similarity index 98% rename from VariantValidator/modules/vvCore.py rename to VariantValidator/modules/vvMixinCore.py index 0c9ce7da..33da2a30 100644 --- a/VariantValidator/modules/vvCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -47,14 +47,16 @@ #from variantanalyser import supported_chromosome_builds as va_scb #from variantanalyser import gap_genes as gapGenes #from variantanalyser.liftover import liftover as lift_over +from vvLiftover import liftover as lift_over #??? import vvFunctions as fn import vvDatabase import vvChromosomes +import vvMixinConverters from vvObjects import variantValidatorError -class mixin: +class Mixin(vvMixinConverters.Mixin): def validate(self, batch_variant, selected_assembly, select_transcripts, transcriptSet="refseq"): logger.info(batch_variant + ' : ' + selected_assembly) # Take start time @@ -62,7 +64,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Set pre defined variables # SeqFetcher - sf = hgvs.dataproviders.seqfetcher.SeqFetcher() + # sf = hgvs.dataproviders.seqfetcher.SeqFetcher() primary_assembly=None try: @@ -369,7 +371,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr old_ref, old_alt = pos_ref_alt.split('>') old_ref = old_ref.replace(position, '') position = int(position) - 1 - required_base = sf.fetch_seq(accession, start_i=position - 1, end_i=position) + required_base = self.sf.fetch_seq(accession, start_i=position - 1, end_i=position) ref = required_base + old_ref alt = required_base positionAndEdit = str(position) + ref + '>' + alt @@ -379,7 +381,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr old_ref, old_alt = pos_ref_alt.split('>') # old_ref = old_ref.replace(position, '') position = int(position) - 1 - required_base = sf.fetch_seq(accession, start_i=position - 1, end_i=position) + required_base = self.sf.fetch_seq(accession, start_i=position - 1, end_i=position) ref = required_base alt = required_base + old_alt positionAndEdit = str(position) + ref + '>' + alt @@ -631,7 +633,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr delete = m.group(2) starts = posedit.split(delete)[0] re_try = ref_ac + ':' + ref_type + '.' + starts + 'del' + delete[0] + 'ins' + insert - hgvs_re_try = hp.parse_hgvs_variant(re_try) + hgvs_re_try = self.hp.parse_hgvs_variant(re_try) hgvs_re_try.posedit.edit.ref = delete start_pos = str(hgvs_re_try.posedit.pos.start) if re.search('\-', start_pos): @@ -659,7 +661,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr not_delins = not_sub # Parse into hgvs object try: - hgvs_not_delins = hp.parse_hgvs_variant(not_delins) + hgvs_not_delins = self.hp.parse_hgvs_variant(not_delins) except hgvs.exceptions.HGVSError as e: # Sort out multiple ALTS from VCF inputs if re.search("([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): @@ -738,7 +740,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr digits = digits.group(1) # Remove them so that the string SHOULD parse try: - hgvs_failed = hp.parse_hgvs_variant(failed) + hgvs_failed = self.hp.parse_hgvs_variant(failed) except hgvs.exceptions.HGVSError as e: error = str(e) error = 'The syntax of the input variant description is invalid ' @@ -748,10 +750,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr validation['warnings'] = validation['warnings'] + error logger.warning(error + " " + e) continue - hgvs_failed = hp.parse_hgvs_variant(failed) + hgvs_failed = self.hp.parse_hgvs_variant(failed) hgvs_failed.posedit.edit = str(hgvs_failed.posedit.edit).replace(digits, '') failed = str(hgvs_failed) - hgvs_failed = hp.parse_hgvs_variant(failed) + hgvs_failed = self.hp.parse_hgvs_variant(failed) automap = 'Non HGVS compliant variant description ' + input + ' automapped to ' + failed validation['warnings'] = validation['warnings'] + ': ' + automap logger.warning(automap) @@ -832,7 +834,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass try: # Submit to allele extraction function - alleles = va_func.hgvs_alleles(input, hp, vr, hn, vm, sf) + alleles = self.hgvs_alleles(input, self.hp, self.vr, self.hn, self.vm, self.sf) validation['warnings'] = validation[ 'warnings'] + ': ' + 'Automap has extracted possible variant descriptions' logger.resub('Automap has extracted possible variant descriptions, resubmitting') @@ -845,7 +847,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr batch_list.append(query) validation['write'] = 'false' continue - except va_func.alleleVariantError as e: + except fn.alleleVariantError as e: if re.search("Cannot validate sequence of an intronic variant", str(e)): validation['warnings'] = validation[ 'warnings'] + ': ' + 'Intronic positions not supported for HGVS Allele descriptions' @@ -862,7 +864,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr Returns a dictionary containing the formatted input string and the variant type Accepts c, g, n, r currently """ - formatted = va_func.user_input(input) + formatted = fn.user_input(input) # Validator specific variables, note, not all will be necessary for batch, but keep to ensure that batch works # vars = [] @@ -918,7 +920,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr variant = variant.replace('DUP', 'dup') try: - input_parses = hp.parse_hgvs_variant(variant) + input_parses = self.hp.parse_hgvs_variant(variant) except hgvs.exceptions.HGVSError as e: error = str(e) if error == 'false': @@ -976,7 +978,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # They initiate quickly, so no need to move them unnecessarily # Create easy variant mapper (over variant mapper) and splign locked evm - evm = hgvs.assemblymapper.AssemblyMapper(hdp, + evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, assembly_name=primary_assembly, alt_aln_method=alt_aln_method, normalize=True, @@ -984,7 +986,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ) # Setup a reverse normalize instance and non-normalize evm - no_norm_evm = hgvs.assemblymapper.AssemblyMapper(hdp, + no_norm_evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, assembly_name=primary_assembly, alt_aln_method=alt_aln_method, normalize=False, @@ -992,7 +994,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ) # Create a specific minimal evm with no normalizer and no replace_reference - min_evm = hgvs.assemblymapper.AssemblyMapper(hdp, + min_evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, assembly_name=primary_assembly, alt_aln_method=alt_aln_method, normalize=False, @@ -1155,7 +1157,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr Primarily, this code filters out variants that cannot realistically be auto corrected and will cause the downstream functions to return errors """ - input_parses = hp.parse_hgvs_variant(input) + input_parses = self.hp.parse_hgvs_variant(input) if input_parses.type == 'g': if re.match('^NC_', input_parses.ac) or re.match('^NG_', input_parses.ac) or re.match('^NT_', input_parses.ac) or re.match( @@ -1167,7 +1169,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning(error) continue try: - vr.validate(input_parses) + self.vr.validate(input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -1194,7 +1196,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Catch variation in UTRs # These should be in the sequence so can be directly validated. Need to pass to n. try: - vr.validate(input_parses) + self.vr.validate(input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('datums is ill-defined', error): @@ -1243,7 +1245,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: # downstream positions tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') - ts_seq = sf.fetch_seq(input_parses.ac) + ts_seq = self.sf.fetch_seq(input_parses.ac) boundary = len(ts_seq) - ref_end input_parses.posedit.pos.start.base = boundary offset = int(tot_end_pos) - int(boundary) @@ -1251,21 +1253,21 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.match('\*', str( input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') - ts_seq = sf.fetch_seq(input_parses.ac) + ts_seq = self.sf.fetch_seq(input_parses.ac) boundary = len(ts_seq) - ref_end input_parses.posedit.pos.end.base = boundary offset = int(tot_end_pos) - int(boundary) input_parses.posedit.pos.end.offset = offset # Create a lose vm instance - lose_vm = hgvs.variantmapper.VariantMapper(hdp, + lose_vm = hgvs.variantmapper.VariantMapper(self.hdp, replace_reference=True, prevalidation_level=None ) - report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, - primary_assembly, lose_vm, hp, hn, sf, nr_vm) + report_gen = vvConverters.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, + primary_assembly, lose_vm, self.hp, self.hn, self.sf, self.nr_vm) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant: Instead use ' + fn.valstr( report_gen) except Exception as e: @@ -1289,8 +1291,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('n.1-', str(input_parses)): input_parses = evm.n_to_c(input_parses) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - vm, hp, hn, sf, nr_vm) + genomic_position = vvConverters.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, + self.vm, self.hp, self.hn, self.sf, self.nr_vm) error = error + fn.valstr(genomic_position) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) @@ -1306,7 +1308,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Can we go c-g-c try: to_genome = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, vm, - hp, hn, sf, nr_vm) + hp, hn, self.sf, nr_vm) to_tx = evm.g_to_t(to_genome, input_parses.ac) except hgvs.exceptions.HGVSInvalidIntervalError as e: error = str(e) @@ -1329,7 +1331,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.match('\*', str(input_parses.posedit.pos.start)): # downstream positions tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') - ts_seq = sf.fetch_seq(input_parses.ac) + ts_seq = self.sf.fetch_seq(input_parses.ac) boundary = len(ts_seq) - ref_end input_parses.posedit.pos.start.base = boundary te1, te2 = tot_end_pos.split('+') @@ -1338,7 +1340,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input_parses.posedit.pos.start.offset = offset if re.match('\*', str(input_parses.posedit.pos.end)): tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') - ts_seq = sf.fetch_seq(input_parses.ac) + ts_seq = self.sf.fetch_seq(input_parses.ac) boundary = len(ts_seq) - ref_end input_parses.posedit.pos.end.base = boundary te1, te2 = tot_end_pos.split('+') @@ -1347,7 +1349,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input_parses.posedit.pos.end.offset = offset report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, - primary_assembly, lose_vm, hp, hn, sf, nr_vm) + primary_assembly, lose_vm, hp, hn, self.sf, nr_vm) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) except Exception as e: @@ -1385,8 +1387,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = str(e) if re.search('bounds', error): try: - report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - lose_vm, hp, hn, sf, nr_vm) + report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, + self.lose_vm, self.hp, self.hn, self.sf, self.nr_vm) except hgvs.exceptions.HGVSError as e: fn.exceptPass() else: @@ -1414,12 +1416,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create a specific minimal evm with no normalizer and no replace_reference # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence try: - output = va_func.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, vm, hn, - hp, sf, no_norm_evm) + output = self.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, self.vm, self.hn, + self.hp, self.sf, no_norm_evm) except hgvs.exceptions.HGVSDataNotAvailableError as e: tx_ac = input_parses.ac try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(tx_ac) except: gene_symbol = 'none' if gene_symbol == 'none': @@ -1468,7 +1470,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue try: - vr.validate(output) + self.vr.validate(output) except hgvs.exceptions.HGVSError as e: error = str(e) validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -1478,7 +1480,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: # All other variation try: - vr.validate(input_parses) + self.vr.validate(input_parses) except hgvs.exceptions.HGVSUnsupportedOperationError: fn.exceptPass() except hgvs.exceptions.HGVSInvalidVariantError as e: @@ -1535,7 +1537,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Catch variation in UTRs # These should be in the sequence so can be directly validated. Need to pass to n. try: - vr.validate(input_parses) + self.vr.validate(input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('intronic variant', error): @@ -1570,8 +1572,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr remainder = remainder + 1 input_parses.posedit.pos.end.base = boundary input_parses.posedit.pos.end.offset = remainder - report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - lose_vm, hp, hn, sf, nr_vm) + report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, + self.lose_vm, self.hp, self.hn, self.sf, self.nr_vm) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) except Exception as e: @@ -1586,8 +1588,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('n.1-', str(input_parses)): error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, vm, - hp, hn, sf, nr_vm) + genomic_position = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, self.vm, + self.hp, self.hn, self.sf, self.nr_vm) error = error + fn.valstr(genomic_position) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) @@ -1598,13 +1600,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): # Quick look at syntax validation try: - vr.validate(input_parses) + self.vr.validate(input_parses) except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if re.search('bounds', error): try: - report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - lose_vm, hp, hn, sf, nr_vm) + report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, + self.lose_vm, self.hp, self.hn, self.sf, self.nr_vm) except hgvs.exceptions.HGVSError as e: fn.exceptPass() else: @@ -1630,14 +1632,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue elif re.search('Cannot validate sequence of an intronic variant', error): try: - test_g = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, vm, - hp, hn, sf, nr_vm) + test_g = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, self.vm, + self.hp, self.hn, self.sf, self.nr_vm) back_to_n = evm.g_to_t(test_g, input_parses.ac) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('bounds', error): - report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, - primary_assembly, lose_vm, hp, hn, sf, nr_vm) + report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, + primary_assembly, self.lose_vm, self.hp, self.hn, self.sf, self.nr_vm) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -1649,12 +1651,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create a specific minimal evm with no normalizer and no replace_reference # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence try: - output = va_func.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, vm, hn, - hp, sf, no_norm_evm) + output = self.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, self.vm, self.hn, + self.hp, self.sf, no_norm_evm) except hgvs.exceptions.HGVSDataNotAvailableError as e: tx_ac = input_parses.ac try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(tx_ac) except: gene_symbol = 'none' if gene_symbol == 'none': @@ -1689,7 +1691,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning(str(error)) continue try: - vr.validate(output) + self.vr.validate(output) except hgvs.exceptions.HGVSError as e: error = str(e) validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -1699,7 +1701,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: # All other variation try: - vr.validate(input_parses) + self.vr.validate(input_parses) except hgvs.exceptions.HGVSUnsupportedOperationError: fn.exceptPass() @@ -1768,7 +1770,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_mito.type = 'm' caution = '' try: - vr.validate(hgvs_mito) + self.vr.validate(hgvs_mito) except hgvs.exceptions.HGVSError as e: error = caution + ': ' + str(e) validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -1781,7 +1783,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue else: # Any transcripts? - rel_var = va_func.relevant_transcripts(hgvs_mito, evm, self.hdp, alt_aln_method, reverse_normalizer) + rel_var = self.relevant_transcripts(hgvs_mito, evm, self.hdp, alt_aln_method, reverse_normalizer) hgvs_genomic = copy.deepcopy(hgvs_mito) if len(rel_var) == 0: validation['genomic_g'] = fn.valstr(hgvs_mito) @@ -2070,7 +2072,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_coding_variant = hp.parse_hgvs_variant(var) try: hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding_variant, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) + primary_assembly, vm, hp, hn, self.sf, nr_vm) except hgvs.exceptions.HGVSError as e: try_rel_var = [] else: @@ -2084,7 +2086,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Tripple check this assumption by querying the gene position database if len(rel_var) == 0: - vcf_dict = vvHGVS.hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf) + vcf_dict = vvHGVS.hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, self.sf) not_di = str(hgvs_genomic.ac) + ':g.' + str(vcf_dict['pos']) + '_' + str( int(vcf_dict['pos']) + (len(vcf_dict['ref']) - 1)) + 'del' + vcf_dict['ref'] + 'ins' + \ vcf_dict['alt'] @@ -2187,7 +2189,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # VCF vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) + reverse_normalizer, self.sf) chr = vcf_dict['chr'] pos = vcf_dict['pos'] ref = vcf_dict['ref'] @@ -2210,7 +2212,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c.posedit.edit.ref = c.posedit.edit.ref.upper() if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: c.posedit.edit.alt = c.posedit.edit.alt.upper() - stash_input = va_func.myevm_t_to_g(c, self.hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, + stash_input = va_func.myevm_t_to_g(c, self.hdp, no_norm_evm, primary_assembly, vm, hp, hn, self.sf, nr_vm) if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', str( @@ -2226,7 +2228,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr stash_ac = hgvs_stash.ac # MAKE A NO NORM HGVS2VCF - stash_dict = vvHGVS.pos_lock_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, sf) + stash_dict = vvHGVS.pos_lock_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, self.sf) stash_ac = hgvs_stash.ac stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] @@ -2395,7 +2397,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -2409,7 +2411,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -2419,7 +2421,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -2433,7 +2435,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -2479,7 +2481,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -2492,7 +2494,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -2520,7 +2522,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Add the additional base to the ALT start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases if re.match('NM_', str(rn_tx_hgvs_not_delins)): test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) @@ -2528,7 +2530,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -2542,7 +2544,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -2699,7 +2701,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Correct insertion alts if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, genomic_gap_fill_variant_alt.posedit.pos.end.base) genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ @@ -2998,7 +3000,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) stash_ac = hgvs_stash.ac # Make a hard left and hard right not delins g. - stash_dict_right = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) + stash_dict_right = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) stash_pos_right = int(stash_dict_right['pos']) stash_ref_right = stash_dict_right['ref'] stash_alt_right = stash_dict_right['alt'] @@ -3007,7 +3009,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr stash_ac + ':' + hgvs_stash.type + '.' + str( stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) stash_dict_left = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, - reverse_normalizer, sf) + reverse_normalizer, self.sf) stash_pos_left = int(stash_dict_left['pos']) stash_ref_left = stash_dict_left['ref'] stash_alt_left = stash_dict_left['alt'] @@ -3155,7 +3157,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Se rec_var to '' so it can be updated later rec_var = '' try: - to_g = va_func.myevm_t_to_g(obj, self.hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, nr_vm) + to_g = va_func.myevm_t_to_g(obj, self.hdp, no_norm_evm, primary_assembly, vm, hp, hn, self.sf, nr_vm) genomic_ac = to_g.ac except hgvs.exceptions.HGVSDataNotAvailableError as e: if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))) or re.match( @@ -3243,7 +3245,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif geno.search(input): if plus.search(variant) or minus.search(variant): - to_g = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, nr_vm) + to_g = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, nr_vm) es = re.compile('error') if es.search(str(to_g)): if alt_aln_method != 'genebuild': @@ -3356,7 +3358,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr coding = va_func.coding(variant, hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, + pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, nr_vm) # genome back to C coordinates post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) @@ -3459,7 +3461,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pre_var = hp.parse_hgvs_variant(variant) try: pre_var = va_func.myevm_t_to_g(pre_var, self.hdp, no_norm_evm, primary_assembly, vm, hp, - hn, sf, nr_vm) + hn, self.sf, nr_vm) except: e = sys.exc_info()[1] error = str(e) @@ -3570,7 +3572,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr coding = va_func.coding(variant, hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, + pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, nr_vm) # genome back to C coordinates post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) @@ -3637,7 +3639,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr coding = va_func.coding(variant, hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, + pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, nr_vm) # genome back to C coordinates post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) @@ -3706,7 +3708,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr plus = re.compile("\d\+\d") # finds digit + digit minus = re.compile("\d\-\d") # finds digit - digit if plus.search(input) or minus.search(input): - to_g = va_func.genomic(inp, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, nr_vm) + to_g = va_func.genomic(inp, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, nr_vm) es = re.compile('error') if es.search(str(to_g)): if alt_aln_method != 'genebuild': @@ -3866,7 +3868,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if valid == 'false': error = 'false' genomic_validation = str( - va_func.genomic(input, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, sf, nr_vm)) + va_func.genomic(input, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, nr_vm)) del_end = re.compile('\ddel$') delins = re.compile('delins') inv = re.compile('inv') @@ -3958,7 +3960,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Genomic sequence hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding, self.hdp, no_norm_evm, primary_assembly, vm, hp, hn, - sf, nr_vm) + self.sf, nr_vm) final_hgvs_genomic = hgvs_genomic # genomic_possibilities @@ -3985,7 +3987,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr chromosome_normalized_hgvs_coding = hgvs_coding most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, - hgvs_genomic.ac, no_norm_evm, vm, hp, hn, sf, + hgvs_genomic.ac, no_norm_evm, vm, hp, hn, self.sf, nr_vm) hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) @@ -3999,7 +4001,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass() try: stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] stash_alt = stash_dict['alt'] @@ -4017,7 +4019,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, no_norm_evm, - vm, hp, hn, sf, nr_vm) + vm, hp, hn, self.sf, nr_vm) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) @@ -4084,7 +4086,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: stash_ac = hgvs_stash.ac stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, - sf) + self.sf) stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] stash_alt = stash_dict['alt'] @@ -4102,7 +4104,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, no_norm_evm, - vm, hp, hn, sf, nr_vm) + vm, hp, hn, self.sf, nr_vm) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC @@ -4172,9 +4174,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr n_3pr = most_3pr_hgvs_transcript_variant n_5pr = most_5pr_hgvs_transcript_variant # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + pr3_ref = self.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, n_3pr.posedit.pos.end.base) - pr5_ref = sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + pr5_ref = self.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, n_5pr.posedit.pos.end.base) most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref @@ -4317,8 +4319,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if hgvs_genomic.posedit.edit.type == 'delins': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) hgvs_genomic.posedit.edit.ref = lhb + rhb hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb hgvs_genomic.posedit.pos.start.base = end @@ -4327,8 +4329,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if hgvs_genomic.posedit.edit.type == 'del': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) hgvs_genomic.posedit.edit.ref = lhb + rhb hgvs_genomic.posedit.edit.alt = lhb + rhb hgvs_genomic.posedit.pos.start.base = end @@ -4338,9 +4340,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if hgvs_genomic.posedit.edit.type == 'ins': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start, end) hgvs_genomic.posedit.edit.ref = lhb + rhb hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) @@ -4351,7 +4353,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create VCF vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) + reverse_normalizer, self.sf) chr = vcf_dict['chr'] pos = vcf_dict['pos'] ref = vcf_dict['ref'] @@ -4512,7 +4514,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -4528,7 +4530,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -4538,7 +4540,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -4554,7 +4556,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -4598,7 +4600,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, @@ -4615,7 +4617,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) @@ -4644,7 +4646,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Add the additional base to the ALT start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases if re.match('NM_', str(rn_tx_hgvs_not_delins)): test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) @@ -4652,7 +4654,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) @@ -4667,7 +4669,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) @@ -4698,7 +4700,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) except: fn.exceptPass() - ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, + ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, hgvs_t_possibility.posedit.pos.start.base - 1, hgvs_t_possibility.posedit.pos.start.base + 1) try: @@ -4710,7 +4712,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 0] + hgvs_t_possibility.posedit.edit.alt + \ ins_ref[1] if internal_possibility.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(internal_possibility.ac, + ins_ref = self.sf.fetch_seq(internal_possibility.ac, internal_possibility.posedit.pos.start.base - 1, internal_possibility.posedit.pos.end.base) internal_possibility.posedit.edit.ref = ins_ref @@ -4880,7 +4882,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Correct insertion alts if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, genomic_gap_fill_variant_alt.posedit.pos.end.base) genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ @@ -5237,7 +5239,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # Update hgvs_genomic hgvs_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, - no_norm_evm, vm, hp, hn, sf, nr_vm) + no_norm_evm, vm, hp, hn, self.sf, nr_vm) if hgvs_genomic.posedit.edit.type == 'identity': re_c = vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): @@ -5288,8 +5290,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if hgvs_genomic.posedit.edit.type == 'delins': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) hgvs_genomic.posedit.edit.ref = lhb + rhb hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb hgvs_genomic.posedit.pos.start.base = end @@ -5298,8 +5300,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if hgvs_genomic.posedit.edit.type == 'del': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) hgvs_genomic.posedit.edit.ref = lhb + rhb hgvs_genomic.posedit.edit.alt = lhb + rhb hgvs_genomic.posedit.pos.start.base = end @@ -5323,7 +5325,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create vcf vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) + reverse_normalizer, self.sf) chr = vcf_dict['chr'] pos = vcf_dict['pos'] ref = vcf_dict['ref'] @@ -5438,7 +5440,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -5452,7 +5454,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -5462,7 +5464,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -5476,7 +5478,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ @@ -5521,7 +5523,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -5534,7 +5536,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -5563,7 +5565,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Add the additional base to the ALT start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases if re.match('NM_', str(rn_tx_hgvs_not_delins)): test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) @@ -5571,7 +5573,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -5585,7 +5587,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) + primary_assembly, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -5616,7 +5618,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) except: fn.exceptPass() - ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, + ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, hgvs_t_possibility.posedit.pos.start.base - 1, hgvs_t_possibility.posedit.pos.start.base + 1) try: @@ -5628,7 +5630,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 0] + hgvs_t_possibility.posedit.edit.alt + \ ins_ref[1] if internal_possibility.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(internal_possibility.ac, + ins_ref = self.sf.fetch_seq(internal_possibility.ac, internal_possibility.posedit.pos.start.base - 1, internal_possibility.posedit.pos.end.base) internal_possibility.posedit.edit.ref = ins_ref @@ -5773,7 +5775,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Correct insertion alts if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, genomic_gap_fill_variant_alt.posedit.pos.end.base) genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ @@ -6162,7 +6164,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_refseq_ac = 'RefSeqGene record not available' # Predicted effect on protein - protein_dict = va_func.myc_to_p(hgvs_coding, evm, self.hdp, hp, hn, vm, sf, re_to_p=False) + protein_dict = va_func.myc_to_p(hgvs_coding, evm, self.hdp, hp, hn, vm, self.sf, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) @@ -6229,7 +6231,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c_for_p = seek_var try: # Predicted effect on protein - protein_dict = va_func.myc_to_p(c_for_p, evm, self.hdp, hp, hn, vm, sf, re_to_p=False) + protein_dict = va_func.myc_to_p(c_for_p, evm, self.hdp, hp, hn, vm, self.sf, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) @@ -6302,7 +6304,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass() else: # hgvs_protein = va_func.protein(str(c_for_p), evm, hp) - protein_dict = va_func.myc_to_p(c_for_p, evm, self.hdp, hp, hn, vm, sf, + protein_dict = va_func.myc_to_p(c_for_p, evm, self.hdp, hp, hn, vm, self.sf, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] @@ -6599,7 +6601,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr alt_aln_method=alt_aln_method, hdp=self.hdp) orientation = int(ori[0]['alt_strand']) hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, vm, hp, hn, - sf, nr_vm) + self.sf, nr_vm) # Set hgvs_genomic accordingly hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) @@ -6628,7 +6630,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, alt_chr, - no_norm_evm, vm, hp, hn, sf, nr_vm) + no_norm_evm, vm, hp, hn, self.sf, nr_vm) hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) # First to the right @@ -6639,7 +6641,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass() try: stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] stash_alt = stash_dict['alt'] @@ -6656,7 +6658,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Store a tx copy for later use test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, - no_norm_evm, vm, hp, hn, sf, nr_vm) + no_norm_evm, vm, hp, hn, self.sf, nr_vm) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) @@ -6720,7 +6722,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: stash_ac = hgvs_stash.ac stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, - reverse_normalizer, sf) + reverse_normalizer, self.sf) stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] stash_alt = stash_dict['alt'] @@ -6737,7 +6739,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Store a tx copy for later use test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, - no_norm_evm, vm, hp, hn, sf, nr_vm) + no_norm_evm, vm, hp, hn, self.sf, nr_vm) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC @@ -6804,9 +6806,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr n_3pr = most_3pr_hgvs_transcript_variant n_5pr = most_5pr_hgvs_transcript_variant # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + pr3_ref = self.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, n_3pr.posedit.pos.end.base) - pr5_ref = sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + pr5_ref = self.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, n_5pr.posedit.pos.end.base) most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref @@ -6943,8 +6945,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if hgvs_genomic.posedit.edit.type == 'delins': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) hgvs_genomic.posedit.edit.ref = lhb + rhb hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb hgvs_genomic.posedit.pos.start.base = end @@ -6954,8 +6956,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if hgvs_genomic.posedit.edit.type == 'del': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) hgvs_genomic.posedit.edit.ref = lhb + rhb hgvs_genomic.posedit.edit.alt = lhb + rhb hgvs_genomic.posedit.pos.start.base = end @@ -6966,9 +6968,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if hgvs_genomic.posedit.edit.type == 'ins': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start, end) + ref_bases = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start, end) hgvs_genomic.posedit.edit.ref = lhb + rhb hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( @@ -6980,7 +6982,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Make VCF vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) + reverse_normalizer, self.sf) chr = vcf_dict['chr'] pos = vcf_dict['pos'] ref = vcf_dict['ref'] @@ -7126,7 +7128,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ @@ -7146,7 +7148,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ @@ -7158,7 +7160,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ @@ -7178,7 +7180,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases hgvs_not_delins.posedit.edit.alt = ref_bases[ @@ -7220,7 +7222,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, + no_norm_evm, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( @@ -7235,7 +7237,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, + no_norm_evm, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( @@ -7268,7 +7270,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Add the additional base to the ALT start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases if re.match('NM_', str(rn_tx_hgvs_not_delins)): test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) @@ -7276,7 +7278,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, + no_norm_evm, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( @@ -7292,7 +7294,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, + no_norm_evm, vm, hp, hn, self.sf, nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( @@ -7325,7 +7327,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: continue - ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, + ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, hgvs_t_possibility.posedit.pos.start.base - 1, hgvs_t_possibility.posedit.pos.start.base + 1) try: @@ -7337,7 +7339,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 0] + hgvs_t_possibility.posedit.edit.alt + \ ins_ref[1] if possibility.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(possibility.ac, + ins_ref = self.sf.fetch_seq(possibility.ac, possibility.posedit.pos.start.base - 1, possibility.posedit.pos.end.base) possibility.posedit.edit.ref = ins_ref @@ -7508,7 +7510,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Correct insertion alts if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, genomic_gap_fill_variant_alt.posedit.pos.end.base) genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ @@ -7876,7 +7878,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Update hgvs_genomic hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, - no_norm_evm, vm, hp, hn, sf, nr_vm) + no_norm_evm, vm, hp, hn, self.sf, nr_vm) if hgvs_alt_genomic.posedit.edit.type == 'identity': re_c = vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): @@ -7910,8 +7912,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if hgvs_alt_genomic.posedit.edit.type == 'delins': start = hgvs_alt_genomic.posedit.pos.start.base end = hgvs_alt_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) hgvs_alt_genomic.posedit.edit.ref = lhb + rhb hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb hgvs_alt_genomic.posedit.pos.start.base = end @@ -7920,8 +7922,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if hgvs_alt_genomic.posedit.edit.type == 'del': start = hgvs_alt_genomic.posedit.pos.start.base end = hgvs_alt_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) hgvs_alt_genomic.posedit.edit.ref = lhb + rhb hgvs_alt_genomic.posedit.edit.alt = lhb + rhb hgvs_alt_genomic.posedit.pos.start.base = end @@ -7971,7 +7973,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test = vvChromosomes.supported_for_mapping(alt_gen_var.ac, build) if test == 'true': try: - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, reverse_normalizer, sf) + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, reverse_normalizer, self.sf) except hgvs.exceptions.HGVSInvalidVariantError as e: continue # Identify primary assembly positions @@ -7997,7 +7999,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr } if build == 'GRCh38': vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - sf) + self.sf) primary_genomic_dicts['hg38'] = { 'hgvs_genomic_description': fn.valstr(alt_gen_var), 'vcf': {'chr': vcf_dict['ucsc_chr'], @@ -8033,7 +8035,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if build == 'GRCh38': vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - sf) + self.sf) dict = {'hg38': {'hgvs_genomic_description': fn.valstr(alt_gen_var), 'vcf': {'chr': vcf_dict['ucsc_chr'], 'pos': vcf_dict['pos'], @@ -8200,7 +8202,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr build_from = 'GRCh37' # Liftover - lifted_response = lift_over(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, build_to, hn, vm, vr, self.hdp, hp, reverse_normalizer, sf, evm) + lifted_response = lift_over(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, build_to, hn, vm, vr, self.hdp, hp, reverse_normalizer, self.sf, evm) # Sort the respomse into primary assembly and ALT primary_assembly_loci = {} diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py new file mode 100644 index 00000000..00adf6f3 --- /dev/null +++ b/VariantValidator/modules/vvMixinInit.py @@ -0,0 +1,468 @@ +import os +from configparser import ConfigParser,RawConfigParser +import hgvs +import hgvs.parser +import hgvs.dataproviders.uta +import hgvs.dataproviders.seqfetcher +import hgvs.assemblymapper +import hgvs.variantmapper +import hgvs.sequencevariant +import hgvs.validator +import hgvs.exceptions +import hgvs.location +import hgvs.posedit +import hgvs.edit +import hgvs.normalizer +from Bio.Seq import Seq + +import re +import copy +#import io +from vvDatabase import vvDatabase +from vvLogging import logger +import vvFunctions as fn + + +''' +This file contains the validator object, which is instantiated in order to perform validator functions. +The validator contains configuration information and permanent copies of database links and the like. +Much of the validator's inner workings are stored in special one-off function container objects: +validator.db : The validator's MySQL database access functions + +The validator configuration is stored in ~/.config/VariantValidator/config.ini . This is loaded +when the validator object is initialized. + +Running variant validator should hopefully be as simple as writing a script like this: +import VariantValidator + +val=Validator() +val.validate("some kind of gene situation","the transcripts to use") + +''' + +''' + Renaming of variables : + 'seqrepo_directory': HGVS_SEQREPO_DIR, #self.seqrepoPath + 'uta_url': UTA_DB_URL, #self.utaPath + 'py_liftover_directory': PYLIFTOVER_DIR, #self.liftoverPath + 'variantvalidator_data_url': VALIDATOR_DB_URL, #self.db.path + 'entrez_id': ENTREZ_ID, #self.entrezID + 'variantvalidator_version': VERSION, #self.version + 'variantvalidator_hgvs_version': hgvs_version, #self.hgvsVersion + 'uta_schema': str(hdp.data_version()), #self.uta_schema + 'seqrepo_db': HGVS_SEQREPO_DIR.split('/')[-1] #self.seqrepoVersion +''' + + + +class Mixin(): + # This object contains configuration options for the validator, but it inherits the mixin + # class in vvCore that contains the enormous validator function. + def __init__(self): + # First load from the configuration file, if it exists. + configName="config.ini" + homePath=os.path.expanduser("~") + configPath=os.path.join(homePath,".config","VariantValidator") + if not os.path.isdir(configPath): + os.makedirs(configPath) + # Now configpath points to the config file itself. + configPath=os.path.join(configPath,configName) + # Does the file exist? + if not os.path.exists(configPath): + self.createConfig(configPath) + + # Load the configuration file. + with open(configPath) as file: + lines=file.read() + config=RawConfigParser(allow_no_value=True) + config.read(configPath) + # The custom vvLogging module will set itself up using the VALDIATOR_DEBUG environment variable. + logString = config["logging"]['string'] + os.environ["VALIDATOR_DEBUG"] = logString + + # Handle databases + self.entrezID=config["EntrezID"]["entrezID"] + if config["seqrepo"]["location"]!=None: + self.seqrepoVersion=config["seqrepo"]["version"] + self.seqrepoPath=config["seqrepo"]["location"]+self.seqrepoVersion + os.environ['HGVS_SEQREPO_DIR']=self.seqrepoPath + else: + raise ValueError("The seqrepo location has not been set in ~/.config/VariantValidator/config.ini") + os.environ['UTA_DB_URL']=config["uta"]["location"]+config["uta"]["version"] + self.utaPath=config["uta"]["location"]+config["uta"]["version"] + self.dbConfig = { + 'user': config["mysql"]["user"], + 'password':config["mysql"]["password"], + 'host': config["mysql"]["host"], + 'database':config["mysql"]["database"], + 'raise_on_warnings': True + } + self.db=vvDatabase(self,self.dbConfig) + # Set up versions + __version__ = config["variantValidator"]['version'] + self.version=__version__ + if re.match('^\d+\.\d+\.\d+$', __version__) is not None: + self.releasedVersion=True + _is_released_version = True + else: + self.releasedVersion=False + self.hgvsVersion=hgvs.__version__ + + # Set up other configuration variables + self.liftoverPath=config["liftover"]["location"] + if not self.liftoverPath==None: + os.environ['PYLIFTOVER_DIR']=self.liftoverPath + self.entrezID=config["EntrezID"]['entrezid'] + + # Set up HGVS + # Configure hgvs package global settings + hgvs.global_config.uta.pool_max = 25 + hgvs.global_config.formatting.max_ref_length = 1000000 + # Create HGVS objects + self.hdp = hgvs.dataproviders.uta.connect(pooling=True) + self.hp = hgvs.parser.Parser() #Parser + self.vr = hgvs.validator.Validator(self.hdp) # Validator + self.vm = hgvs.variantmapper.VariantMapper(self.hdp) # Variant mapper + # Create a lose vm instance + self.lose_vm = hgvs.variantmapper.VariantMapper(self.hdp, + replace_reference=True, + prevalidation_level=None + ) + self.nr_vm = hgvs.variantmapper.VariantMapper(self.hdp, replace_reference=False) #No reverse variant mapper + self.sf = hgvs.dataproviders.seqfetcher.SeqFetcher() # Seqfetcher + # Set standard genome builds + self.genome_builds = ['GRCh37', 'hg19', 'GRCh38'] + self.uta_schema = str(self.hdp.data_version()) + + # Create normalizer + self.hn = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='splign' + ) + self.reverse_hn = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=5, + alt_aln_method='splign' + ) + + # Create normalizer + self.merge_normalizer = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='splign', + validate=False + ) + self.reverse_merge_normalizer = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='splign', + validate=False + ) + #create no_norm_evm + self.no_norm_evm_38 = hgvs.assemblymapper.AssemblyMapper(self.hdp, + assembly_name='GRCh38', + alt_aln_method='splign', + normalize=False, + replace_reference=True + ) + + self.no_norm_evm_37 = hgvs.assemblymapper.AssemblyMapper(self.hdp, + assembly_name='GRCh37', + alt_aln_method='splign', + normalize=False, + replace_reference=True + ) + + + + #def validate(self): # <-------------- this is imported from the mixin class in vvCore. + # pass + def createConfig(self,outPath): + # This function reads from the default configuration file stored in the same folder as this module, + # and transfers it to outPath. + # Outpath should include a filename. + lines=[] + inPath=os.path.join(os.path.dirname(os.path.realpath(__file__)),"defaultConfig.ini") +# print(os.path.join(inPath,"defaultConfig.ini")) + with open(inPath) as file: + for l in file: + lines.append(l) + with open(outPath, "w") as file: + for l in lines: + file.write(l) + def protein(self,variant, evm, hpUnused): + # Set regular expressions for if statements + pat_c = re.compile("\:c\.") # Pattern looks for :c. Note (gene) has been removed + + # If the :c. pattern is present in the input variant + if pat_c.search(variant): + # convert the input string into a hgvs object + var_c = self.hp.parse_hgvs_variant(variant) + # Does the edit affect the start codon? + if ((var_c.posedit.pos.start.base >= 1 and var_c.posedit.pos.start.base <= 3 and var_c.posedit.pos.start.offset == 0) or ( + var_c.posedit.pos.end.base >= 1 and var_c.posedit.pos.end.base <= 3 and var_c.posedit.pos.end.offset == 0)) and not re.search('\*', str( + var_c.posedit.pos)): + ass_prot = self.hdp.get_pro_ac_for_tx_ac(var_c.ac) + if str(ass_prot) == 'None': + cod = str(var_c) + cod = cod.replace('inv', 'del') + cod = self.hp.parse_hgvs_variant(cod) + p = evm.c_to_p(cod) + ass_prot = p.ac + var_p = hgvs.sequencevariant.SequenceVariant(ac=ass_prot, type='p', posedit='(Met1?)') + else: + var_p = evm.c_to_p(var_c) + return var_p + if re.search(':n.', variant): + var_p = self.hp.parse_hgvs_variant(variant) + var_p.ac = 'Non-coding transcript' + var_p.posedit = '' + return var_p + def myc_to_p(self,hgvs_transcript, evm, re_to_p): + # Create dictionary to store the information + hgvs_transcript_to_hgvs_protein = {'error': '', 'hgvs_protein': '', 'ref_residues': ''} + + # Collect the associated protein + if hgvs_transcript.type == 'c': + associated_protein_accession = self.hdp.get_pro_ac_for_tx_ac(hgvs_transcript.ac) + # This method sometimes fails + if str(associated_protein_accession) == 'None': + cod = str(hgvs_transcript) + cod = cod.replace('inv', 'del') + cod = self.hp.parse_hgvs_variant(cod) + p = evm.c_to_p(cod) + associated_protein_accession = p.ac + else: + pass + + # Check for non-coding transcripts + if hgvs_transcript.type == 'c': + # Handle non inversions with simple c_to_p mapping + + if (hgvs_transcript.posedit.edit.type != 'inv') and (hgvs_transcript.posedit.edit.type != 'delins') and (re_to_p is False): + # Does the edit affect the start codon? + if ((hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) or ( + hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ + and not re.search('\*', str( + hgvs_transcript.posedit.pos)): + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + type='p', posedit='(Met1?)') + else: + try: + hgvs_protein = evm.c_to_p(hgvs_transcript) + except IndexError as e: + error = str(e) + if re.search('string index out of range', error) and re.search('dup', str(hgvs_transcript)): + hgvs_ins = self.hp.parse_hgvs_variant(str(hgvs_transcript)) + hgvs_ins = self.hn.normalize(hgvs_ins) + inst = hgvs_ins.ac + ':c.' + str(hgvs_ins.posedit.pos.start.base - 1) + '_' + str(hgvs_ins.posedit.pos.start.base) + 'ins' + hgvs_ins.posedit.edit.ref + hgvs_transcript = self.hp.parse_hgvs_variant(inst) + hgvs_protein = evm.c_to_p(hgvs_transcript) + + try: + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + except UnboundLocalError: + hgvs_transcript_to_hgvs_protein = self.myc_to_p(hgvs_transcript, evm, re_to_p = True) + return hgvs_transcript_to_hgvs_protein + + else: + # Additional code required to process inversions + # Note, this code was developed for VariantValidator and is not native to the biocommons hgvs Python package + # Convert positions to n. position + hgvs_naughty = self.vm.c_to_n(hgvs_transcript) + + # Collect the deleted sequence using fetch_seq + del_seq = self.sf.fetch_seq(str(hgvs_naughty.ac), start_i=hgvs_naughty.posedit.pos.start.base - 1, end_i=hgvs_naughty.posedit.pos.end.base) + + # Make the inverted sequence + my_seq = Seq(del_seq) + + if hgvs_transcript.posedit.edit.type == 'inv': + inv_seq = my_seq.reverse_complement() + else: + inv_seq = hgvs_transcript.posedit.edit.alt + if inv_seq is None: + inv_seq = '' + + # Look for p. delins or del + not_delins = True + if hgvs_transcript.posedit.edit.type != 'inv': + try: + shifts = evm.c_to_p(hgvs_transcript) + if re.search('del', shifts.posedit.edit.type): + not_delins = False + except Exception: + not_delins = False + else: + not_delins = False + + # Use inv delins code? + if not_delins == False: + # Collect the associated protein + associated_protein_accession = self.hdp.get_pro_ac_for_tx_ac(hgvs_transcript.ac) + + # Intronic inversions are marked as uncertain i.e. p.? + if re.search('\d+\-', str(hgvs_transcript.posedit.pos)) or re.search('\d+\+', str(hgvs_transcript.posedit.pos)) or re.search('\*', str(hgvs_transcript.posedit.pos)) or re.search('[cn].\-', str(hgvs_transcript)): + if (( + hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) + or + (hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ + and not re.search('\*', str(hgvs_transcript.posedit.pos)): + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', + posedit='(Met1?)') + else: + # Make the variant + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit='?') + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + else: + # Need to obtain the cds_start + inf = self.hdp.get_tx_identity_info(hgvs_transcript.ac) + cds_start = inf[3] + + # Extract the reference coding sequence from SeqRepo + try: + ref_seq = self.sf.fetch_seq(str(hgvs_naughty.ac)) + except Exception as e: + error = str(e) + hgvs_transcript_to_hgvs_protein['error'] = error + return hgvs_transcript_to_hgvs_protein + + # Create the variant coding sequence + var_seq = fn.n_inversion(ref_seq, del_seq, inv_seq, + hgvs_naughty.posedit.pos.start.base, + hgvs_naughty.posedit.pos.end.base) + # Translate the reference and variant proteins + prot_ref_seq = fn.translate(ref_seq, cds_start) + + try: + prot_var_seq = fn.translate(var_seq, cds_start) + except IndexError: + hgvs_transcript_to_hgvs_protein['error'] = 'Cannot identify an in-frame Termination codon in the variant mRNA sequence' + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', + posedit='?') + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + + if prot_ref_seq == 'error': + error = 'Unable to generate protein variant description' + hgvs_transcript_to_hgvs_protein['error'] = error + return hgvs_transcript_to_hgvs_protein + elif prot_var_seq == 'error': + # Does the edit affect the start codon? + if (( + hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) + or + (hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ + and not re.search('\*', str(hgvs_transcript.posedit.pos)): + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', + posedit='(Met1?)') + + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + else: + error = 'Unable to generate protein variant description' + hgvs_transcript_to_hgvs_protein['error'] = error + return hgvs_transcript_to_hgvs_protein + else: + # Gather the required information regarding variant interval and sequences + if hgvs_transcript.posedit.edit.type != 'delins': + pro_inv_info = fn.pro_inv_info(prot_ref_seq, prot_var_seq) + else: + pro_inv_info = fn.pro_delins_info(prot_ref_seq, prot_var_seq) + + # Error has occurred + if pro_inv_info['error'] == 'true': + error = 'Translation error occurred, please contact admin' + hgvs_transcript_to_hgvs_protein['error'] = error + return hgvs_transcript_to_hgvs_protein + + # The Nucleotide variant has not affected the protein sequence i.e. synonymous + elif pro_inv_info['variant'] != 'true': + # Make the variant + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', + posedit='=') + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + + else: + # Early termination i.e. stop gained + # if pro_inv_info['terminate'] == 'true': + # end = 'Ter' + str(pro_inv_info['ter_pos']) + # pro_inv_info['prot_ins_seq'].replace('*', end) + + # Complete variant description + # Recode the single letter del and ins sequences into three letter amino acid codes + del_thr = fn.one_to_three(pro_inv_info['prot_del_seq']) + ins_thr = fn.one_to_three(pro_inv_info['prot_ins_seq']) + + # Write the HGVS position and edit + del_len = len(del_thr) + from_aa = del_thr[0:3] + to_aa = del_thr[del_len - 3:] + + # Handle a range of amino acids + if pro_inv_info['edit_start'] != pro_inv_info['edit_end']: + if len(ins_thr) > 0: + if re.search('Ter', del_thr) and ins_thr[-3:] != 'Ter': + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( + pro_inv_info['edit_end']) + 'delins' + ins_thr + '?)' + else: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( + pro_inv_info['edit_end']) + 'delins' + ins_thr + ')' + else: + if re.search('Ter', del_thr) and ins_thr[-3:] != 'Ter': + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( + pro_inv_info['edit_end']) + 'del?)' + else: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( + pro_inv_info['edit_end']) + 'del)' + else: + # Handle extended proteins i.e. stop_lost + if del_thr == 'Ter' and (len(ins_thr) > len(del_thr)): + # Nucleotide variant range aligns to the Termination codon + if ins_thr[-3:] == 'Ter': + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( + ins_thr[:3]) + 'ext' + str(ins_thr[-3:]) + str((len(ins_thr) / 3) - 1) + ')' + # Nucleotide variant range spans the Termination codon + else: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( + ins_thr[:3]) + 'ext?)' + + # Nucleotide variation has not affected the length of the protein thus substitution or del + else: + if len(ins_thr) == 3: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + ins_thr + ')' + elif len(ins_thr) == 0: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + 'del)' + else: + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + 'delins' + ins_thr + ')' + + # Complete the variant + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', + posedit=posedit) + + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + + else: + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = shifts + + # Return + return hgvs_transcript_to_hgvs_protein + + + # Handle non-coding transcript and non transcript descriptions + elif hgvs_transcript.type == 'n': + # non-coding transcripts + hgvs_protein = copy.deepcopy(hgvs_transcript) + hgvs_protein.ac = 'Non-coding ' + hgvs_protein.posedit = '' + hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein + return hgvs_transcript_to_hgvs_protein + else: + hgvs_transcript_to_hgvs_protein['error'] = 'Unable to map %s to %s' % ( + hgvs_transcript.ac, associated_protein_accession) + return hgvs_transcript_to_hgvs_protein + diff --git a/VariantValidator/modules/vvObjects.py b/VariantValidator/modules/vvObjects.py index bd11b422..ff680278 100644 --- a/VariantValidator/modules/vvObjects.py +++ b/VariantValidator/modules/vvObjects.py @@ -1,474 +1,6 @@ -import os -from configparser import ConfigParser,RawConfigParser -import hgvs -import hgvs.parser -import hgvs.dataproviders.uta -import hgvs.dataproviders.seqfetcher -import hgvs.assemblymapper -import hgvs.variantmapper -import hgvs.sequencevariant -import hgvs.validator -import hgvs.exceptions -import hgvs.location -import hgvs.posedit -import hgvs.edit -import hgvs.normalizer -from Bio.Seq import Seq -import re -import copy -#import io -from vvDatabase import vvDatabase -from vvLogging import logger -import vvCore -import vvFunctions as fn -# Custom Exceptions -class variantValidatorError(Exception): - pass - -''' -This file contains the validator object, which is instantiated in order to perform validator functions. -The validator contains configuration information and permanent copies of database links and the like. -Much of the validator's inner workings are stored in special one-off function container objects: -validator.db : The validator's MySQL database access functions - -The validator configuration is stored in ~/.config/VariantValidator/config.ini . This is loaded -when the validator object is initialized. - -Running variant validator should hopefully be as simple as writing a script like this: -import VariantValidator - -val=Validator() -val.validate("some kind of gene situation","the transcripts to use") - -''' - -''' - Renaming of variables : - 'seqrepo_directory': HGVS_SEQREPO_DIR, #self.seqrepoPath - 'uta_url': UTA_DB_URL, #self.utaPath - 'py_liftover_directory': PYLIFTOVER_DIR, #self.liftoverPath - 'variantvalidator_data_url': VALIDATOR_DB_URL, #self.db.path - 'entrez_id': ENTREZ_ID, #self.entrezID - 'variantvalidator_version': VERSION, #self.version - 'variantvalidator_hgvs_version': hgvs_version, #self.hgvsVersion - 'uta_schema': str(hdp.data_version()), #self.uta_schema - 'seqrepo_db': HGVS_SEQREPO_DIR.split('/')[-1] #self.seqrepoVersion -''' - - - -class Validator(vvCore.mixin): - # This object contains configuration options for the validator, but it inherits the mixin - # class in vvCore that contains the enormous validator function. - def __init__(self): - # First load from the configuration file, if it exists. - configName="config.ini" - homePath=os.path.expanduser("~") - configPath=os.path.join(homePath,".config","VariantValidator") - if not os.path.isdir(configPath): - os.makedirs(configPath) - # Now configpath points to the config file itself. - configPath=os.path.join(configPath,configName) - # Does the file exist? - if not os.path.exists(configPath): - self.createConfig(configPath) - - # Load the configuration file. - with open(configPath) as file: - lines=file.read() - config=RawConfigParser(allow_no_value=True) - config.read(configPath) - # The custom vvLogging module will set itself up using the VALDIATOR_DEBUG environment variable. - logString = config["logging"]['string'] - os.environ["VALIDATOR_DEBUG"] = logString - - # Handle databases - self.entrezID=config["EntrezID"]["entrezID"] - if config["seqrepo"]["location"]!=None: - self.seqrepoVersion=config["seqrepo"]["version"] - self.seqrepoPath=config["seqrepo"]["location"]+self.seqrepoVersion - os.environ['HGVS_SEQREPO_DIR']=self.seqrepoPath - else: - raise ValueError("The seqrepo location has not been set in ~/.config/VariantValidator/config.ini") - os.environ['UTA_DB_URL']=config["uta"]["location"]+config["uta"]["version"] - self.utaPath=config["uta"]["location"]+config["uta"]["version"] - self.dbConfig = { - 'user': config["mysql"]["user"], - 'password':config["mysql"]["password"], - 'host': config["mysql"]["host"], - 'database':config["mysql"]["database"], - 'raise_on_warnings': True - } - self.db=vvDatabase(self,self.dbConfig) - # Set up versions - __version__ = config["variantValidator"]['version'] - self.version=__version__ - if re.match('^\d+\.\d+\.\d+$', __version__) is not None: - self.releasedVersion=True - _is_released_version = True - else: - self.releasedVersion=False - self.hgvsVersion=hgvs.__version__ - - # Set up other configuration variables - self.liftoverPath=config["liftover"]["location"] - if not self.liftoverPath==None: - os.environ['PYLIFTOVER_DIR']=self.liftoverPath - self.entrezID=config["EntrezID"]['entrezid'] - - # Set up HGVS - # Configure hgvs package global settings - hgvs.global_config.uta.pool_max = 25 - hgvs.global_config.formatting.max_ref_length = 1000000 - # Create HGVS objects - self.hdp = hgvs.dataproviders.uta.connect(pooling=True) - self.hp = hgvs.parser.Parser() #Parser - self.vr = hgvs.validator.Validator(self.hdp) # Validator - self.vm = hgvs.variantmapper.VariantMapper(self.hdp) # Variant mapper - # Create a lose vm instance - self.lose_vm = hgvs.variantmapper.VariantMapper(self.hdp, - replace_reference=True, - prevalidation_level=None - ) - self.nr_vm = hgvs.variantmapper.VariantMapper(self.hdp, replace_reference=False) #No reverse variant mapper - self.sf = hgvs.dataproviders.seqfetcher.SeqFetcher() # Seqfetcher - # Set standard genome builds - self.genome_builds = ['GRCh37', 'hg19', 'GRCh38'] - self.uta_schema = str(self.hdp.data_version()) - - # Create normalizer - self.hn = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, - alt_aln_method='splign' - ) - self.reverse_hn = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=5, - alt_aln_method='splign' - ) - - # Create normalizer - self.merge_normalizer = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, - alt_aln_method='splign', - validate=False - ) - self.reverse_merge_normalizer = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, - alt_aln_method='splign', - validate=False - ) - #create no_norm_evm - self.no_norm_evm_38 = hgvs.assemblymapper.AssemblyMapper(self.hdp, - assembly_name='GRCh38', - alt_aln_method='splign', - normalize=False, - replace_reference=True - ) - - self.no_norm_evm_37 = hgvs.assemblymapper.AssemblyMapper(self.hdp, - assembly_name='GRCh37', - alt_aln_method='splign', - normalize=False, - replace_reference=True - ) - - - - #def validate(self): # <-------------- this is imported from the mixin class in vvCore. - # pass - def createConfig(self,outPath): - # This function reads from the default configuration file stored in the same folder as this module, - # and transfers it to outPath. - # Outpath should include a filename. - lines=[] - inPath=os.path.join(os.path.dirname(os.path.realpath(__file__)),"defaultConfig.ini") -# print(os.path.join(inPath,"defaultConfig.ini")) - with open(inPath) as file: - for l in file: - lines.append(l) - with open(outPath, "w") as file: - for l in lines: - file.write(l) - def protein(self,variant, evm, hp): - # Set regular expressions for if statements - pat_c = re.compile("\:c\.") # Pattern looks for :c. Note (gene) has been removed - - # If the :c. pattern is present in the input variant - if pat_c.search(variant): - # convert the input string into a hgvs object - var_c = hp.parse_hgvs_variant(variant) - # Does the edit affect the start codon? - if ((var_c.posedit.pos.start.base >= 1 and var_c.posedit.pos.start.base <= 3 and var_c.posedit.pos.start.offset == 0) or ( - var_c.posedit.pos.end.base >= 1 and var_c.posedit.pos.end.base <= 3 and var_c.posedit.pos.end.offset == 0)) and not re.search('\*', str( - var_c.posedit.pos)): - ass_prot = self.hdp.get_pro_ac_for_tx_ac(var_c.ac) - if str(ass_prot) == 'None': - cod = str(var_c) - cod = cod.replace('inv', 'del') - cod = hp.parse_hgvs_variant(cod) - p = evm.c_to_p(cod) - ass_prot = p.ac - var_p = hgvs.sequencevariant.SequenceVariant(ac=ass_prot, type='p', posedit='(Met1?)') - else: - var_p = evm.c_to_p(var_c) - return var_p - if re.search(':n.', variant): - var_p = hp.parse_hgvs_variant(variant) - var_p.ac = 'Non-coding transcript' - var_p.posedit = '' - return var_p - def myc_to_p(self,hgvs_transcript, evm, re_to_p): - # Create dictionary to store the information - hgvs_transcript_to_hgvs_protein = {'error': '', 'hgvs_protein': '', 'ref_residues': ''} - - # Collect the associated protein - if hgvs_transcript.type == 'c': - associated_protein_accession = self.hdp.get_pro_ac_for_tx_ac(hgvs_transcript.ac) - # This method sometimes fails - if str(associated_protein_accession) == 'None': - cod = str(hgvs_transcript) - cod = cod.replace('inv', 'del') - cod = self.hp.parse_hgvs_variant(cod) - p = evm.c_to_p(cod) - associated_protein_accession = p.ac - else: - pass - - # Check for non-coding transcripts - if hgvs_transcript.type == 'c': - # Handle non inversions with simple c_to_p mapping - - if (hgvs_transcript.posedit.edit.type != 'inv') and (hgvs_transcript.posedit.edit.type != 'delins') and (re_to_p is False): - # Does the edit affect the start codon? - if ((hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) or ( - hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ - and not re.search('\*', str( - hgvs_transcript.posedit.pos)): - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, - type='p', posedit='(Met1?)') - else: - try: - hgvs_protein = evm.c_to_p(hgvs_transcript) - except IndexError as e: - error = str(e) - if re.search('string index out of range', error) and re.search('dup', str(hgvs_transcript)): - hgvs_ins = self.hp.parse_hgvs_variant(str(hgvs_transcript)) - hgvs_ins = self.hn.normalize(hgvs_ins) - inst = hgvs_ins.ac + ':c.' + str(hgvs_ins.posedit.pos.start.base - 1) + '_' + str(hgvs_ins.posedit.pos.start.base) + 'ins' + hgvs_ins.posedit.edit.ref - hgvs_transcript = self.hp.parse_hgvs_variant(inst) - hgvs_protein = evm.c_to_p(hgvs_transcript) - - try: - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - except UnboundLocalError: - hgvs_transcript_to_hgvs_protein = self.myc_to_p(hgvs_transcript, evm, re_to_p = True) - return hgvs_transcript_to_hgvs_protein - - else: - # Additional code required to process inversions - # Note, this code was developed for VariantValidator and is not native to the biocommons hgvs Python package - # Convert positions to n. position - hgvs_naughty = self.vm.c_to_n(hgvs_transcript) - - # Collect the deleted sequence using fetch_seq - del_seq = self.sf.fetch_seq(str(hgvs_naughty.ac), start_i=hgvs_naughty.posedit.pos.start.base - 1, end_i=hgvs_naughty.posedit.pos.end.base) - - # Make the inverted sequence - my_seq = Seq(del_seq) - - if hgvs_transcript.posedit.edit.type == 'inv': - inv_seq = my_seq.reverse_complement() - else: - inv_seq = hgvs_transcript.posedit.edit.alt - if inv_seq is None: - inv_seq = '' - - # Look for p. delins or del - not_delins = True - if hgvs_transcript.posedit.edit.type != 'inv': - try: - shifts = evm.c_to_p(hgvs_transcript) - if re.search('del', shifts.posedit.edit.type): - not_delins = False - except Exception: - not_delins = False - else: - not_delins = False - - # Use inv delins code? - if not_delins == False: - # Collect the associated protein - associated_protein_accession = self.hdp.get_pro_ac_for_tx_ac(hgvs_transcript.ac) - - # Intronic inversions are marked as uncertain i.e. p.? - if re.search('\d+\-', str(hgvs_transcript.posedit.pos)) or re.search('\d+\+', str(hgvs_transcript.posedit.pos)) or re.search('\*', str(hgvs_transcript.posedit.pos)) or re.search('[cn].\-', str(hgvs_transcript)): - if (( - hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) - or - (hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ - and not re.search('\*', str(hgvs_transcript.posedit.pos)): - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', - posedit='(Met1?)') - else: - # Make the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit='?') - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - else: - # Need to obtain the cds_start - inf = self.hdp.get_tx_identity_info(hgvs_transcript.ac) - cds_start = inf[3] - - # Extract the reference coding sequence from SeqRepo - try: - ref_seq = self.sf.fetch_seq(str(hgvs_naughty.ac)) - except Exception as e: - error = str(e) - hgvs_transcript_to_hgvs_protein['error'] = error - return hgvs_transcript_to_hgvs_protein - - # Create the variant coding sequence - var_seq = fn.n_inversion(ref_seq, del_seq, inv_seq, - hgvs_naughty.posedit.pos.start.base, - hgvs_naughty.posedit.pos.end.base) - # Translate the reference and variant proteins - prot_ref_seq = fn.translate(ref_seq, cds_start) - - try: - prot_var_seq = fn.translate(var_seq, cds_start) - except IndexError: - hgvs_transcript_to_hgvs_protein['error'] = 'Cannot identify an in-frame Termination codon in the variant mRNA sequence' - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', - posedit='?') - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - - if prot_ref_seq == 'error': - error = 'Unable to generate protein variant description' - hgvs_transcript_to_hgvs_protein['error'] = error - return hgvs_transcript_to_hgvs_protein - elif prot_var_seq == 'error': - # Does the edit affect the start codon? - if (( - hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) - or - (hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ - and not re.search('\*', str(hgvs_transcript.posedit.pos)): - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', - posedit='(Met1?)') - - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - else: - error = 'Unable to generate protein variant description' - hgvs_transcript_to_hgvs_protein['error'] = error - return hgvs_transcript_to_hgvs_protein - else: - # Gather the required information regarding variant interval and sequences - if hgvs_transcript.posedit.edit.type != 'delins': - pro_inv_info = fn.pro_inv_info(prot_ref_seq, prot_var_seq) - else: - pro_inv_info = fn.pro_delins_info(prot_ref_seq, prot_var_seq) - - # Error has occurred - if pro_inv_info['error'] == 'true': - error = 'Translation error occurred, please contact admin' - hgvs_transcript_to_hgvs_protein['error'] = error - return hgvs_transcript_to_hgvs_protein - - # The Nucleotide variant has not affected the protein sequence i.e. synonymous - elif pro_inv_info['variant'] != 'true': - # Make the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', - posedit='=') - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - - else: - # Early termination i.e. stop gained - # if pro_inv_info['terminate'] == 'true': - # end = 'Ter' + str(pro_inv_info['ter_pos']) - # pro_inv_info['prot_ins_seq'].replace('*', end) - - # Complete variant description - # Recode the single letter del and ins sequences into three letter amino acid codes - del_thr = fn.one_to_three(pro_inv_info['prot_del_seq']) - ins_thr = fn.one_to_three(pro_inv_info['prot_ins_seq']) - - # Write the HGVS position and edit - del_len = len(del_thr) - from_aa = del_thr[0:3] - to_aa = del_thr[del_len - 3:] - - # Handle a range of amino acids - if pro_inv_info['edit_start'] != pro_inv_info['edit_end']: - if len(ins_thr) > 0: - if re.search('Ter', del_thr) and ins_thr[-3:] != 'Ter': - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'delins' + ins_thr + '?)' - else: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'delins' + ins_thr + ')' - else: - if re.search('Ter', del_thr) and ins_thr[-3:] != 'Ter': - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'del?)' - else: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'del)' - else: - # Handle extended proteins i.e. stop_lost - if del_thr == 'Ter' and (len(ins_thr) > len(del_thr)): - # Nucleotide variant range aligns to the Termination codon - if ins_thr[-3:] == 'Ter': - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( - ins_thr[:3]) + 'ext' + str(ins_thr[-3:]) + str((len(ins_thr) / 3) - 1) + ')' - # Nucleotide variant range spans the Termination codon - else: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( - ins_thr[:3]) + 'ext?)' - - # Nucleotide variation has not affected the length of the protein thus substitution or del - else: - if len(ins_thr) == 3: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + ins_thr + ')' - elif len(ins_thr) == 0: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + 'del)' - else: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + 'delins' + ins_thr + ')' - - # Complete the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', - posedit=posedit) - - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - - else: - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = shifts - - # Return - return hgvs_transcript_to_hgvs_protein - - - # Handle non-coding transcript and non transcript descriptions - elif hgvs_transcript.type == 'n': - # non-coding transcripts - hgvs_protein = copy.deepcopy(hgvs_transcript) - hgvs_protein.ac = 'Non-coding ' - hgvs_protein.posedit = '' - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - else: - hgvs_transcript_to_hgvs_protein['error'] = 'Unable to map %s to %s' % ( - hgvs_transcript.ac, associated_protein_accession) - return hgvs_transcript_to_hgvs_protein +import vvMixinCore class Validation(): #Validation objects contain a number of variant interpretations @@ -478,4 +10,18 @@ class ValOutput(): #This object contains a single possible interpretation of a variant pass +#Mixins are used to split this very large, complex object over multiple files. +#There is a logical chain to it, though: +# vvMixinInit +# v +# vvMixinConverters +# v +# vvMixinCore +# v +# Validator <- this object. +class Validator(vvMixinCore.Mixin): + pass +# Custom Exceptions +class variantValidatorError(Exception): + pass diff --git a/VariantValidator/variantanalyser/functions.py b/VariantValidator/variantanalyser/functions.py index f43e7048..7762fbb3 100644 --- a/VariantValidator/variantanalyser/functions.py +++ b/VariantValidator/variantanalyser/functions.py @@ -575,7 +575,8 @@ def myc_to_p(hgvs_transcript, evm, hdp, hp, hn, vm, sf, re_to_p): hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ and not re.search('\*', str(hgvs_transcript.posedit.pos)): residue_one = sf.fetch_seq(associated_protein_accession, start_i=1 - 1, end_i=1) - threed_residue_one # was (MET1?) = links.one_to_three(residue_one) + threed_residue_one = links.one_to_three(residue_one) + #threed_residue_one # was (MET1?) = links.one_to_three(residue_one) r_one_report = '(%s1?)' % threed_residue_one # was (MET1?) hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit=r_one_report) diff --git a/VariantValidator/variantanalyser/pseudo_vcf2hgvs.py b/VariantValidator/variantanalyser/pseudo_vcf2hgvs.py index 33f1b1ff..4ca12c36 100644 --- a/VariantValidator/variantanalyser/pseudo_vcf2hgvs.py +++ b/VariantValidator/variantanalyser/pseudo_vcf2hgvs.py @@ -14,9 +14,6 @@ from dbControls import data as va_dbCrl -# Error handling -class pseudoVCF2HGVSError(Exception): - pass # Set variables @@ -42,6 +39,9 @@ class pseudoVCF2HGVSError(Exception): # sf = hgvs.dataproviders.seqfetcher.SeqFetcher() +# Error handling +class pseudoVCF2HGVSError(Exception): + pass # pvcf is a pseudo_vcf string # genome build is a build string e.g. GRCh37 hg19 # normalization direction an integer, 5 or 3. From 1d5a43683f84d729f97b0abb7e3c188cc948c914 Mon Sep 17 00:00:00 2001 From: buran Date: Thu, 17 Jan 2019 08:59:03 +0000 Subject: [PATCH 007/223] Nearly done with conversion --- VariantValidator/modules/vvDatabase.py | 12 +- VariantValidator/modules/vvFunctions.py | 9 +- VariantValidator/modules/vvMixinConverters.py | 309 +++++++++- VariantValidator/modules/vvMixinCore.py | 539 +++++++++--------- 4 files changed, 569 insertions(+), 300 deletions(-) diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 2faa5372..79b246fc 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -39,7 +39,17 @@ def query_with_fetchone(self,entry, table): logger.debug("No data returned from query "+str(query)) return row # From data + # function for adding information to database + def data_add(self, input, alt_aln_method, accession, dbaction, hp, evm, hdp): + # Add accurate transcript descriptions to the database + # RefSeq databases + # Get the Entrez (GenBank) file + self.update_transcript_info_record(accession, hdp) + entry = self.in_entries(accession, 'transcript_info') + return entry + # Retrieve transcript information + def in_entries(self,entry, table): # Use dbquery.py to connect to mysql and return the necessary data data={} @@ -72,7 +82,7 @@ def update_transcript_info_record(self,accession, hdp): hgnc_symbol = previous_entry['hgnc_symbol'] uta_symbol = previous_entry['uta_symbol'] try: - record = fn.entrez_efetch(self.val,db="nucleotide", id=accession, rettype="gb", retmode="text") + record = self.val.entrez_efetch(db="nucleotide", id=accession, rettype="gb", retmode="text") version = record.id description = record.description variant = '0' diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index 6de9b662..85c6b83f 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -33,13 +33,6 @@ def wrapper(self,*args,**kwargs): raise return wrapper -def entrez_efetch(val, db, id, rettype, retmode): - Entrez.email = val.entrezID - handle = Entrez.efetch(db=db, id=id, rettype=rettype, retmode=retmode) - record = SeqIO.read(handle, "gb") - handle.close() - return record - def hgnc_rest(path): data = { 'record': '', @@ -458,3 +451,5 @@ def n_inversion(ref_seq, del_seq, inv_seq, interval_start, interval_end): else: sequence = 'error' return sequence + + diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 04f52988..c73ed0a2 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -827,7 +827,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): returns parsed hgvs g. object """ - def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hnOld, hpOld, sfOld, no_norm_evm): + def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn, hpOld, sfOld, no_norm_evm): try: hgvs_genomic = evm.t_to_g(hgvs_c) self.hn.normalize(hgvs_genomic) @@ -1031,7 +1031,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn """ - def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): + def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): # create no_norm_evm if primary_assembly == 'GRCh38': no_norm_evm = self.no_norm_evm_38 @@ -1143,7 +1143,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): # Ensure the altered c. variant has not crossed intro exon boundaries hgvs_check_boundaries = copy.deepcopy(hgvs_c) try: - h_variant = self.hn.normalize(hgvs_check_boundaries) + h_variant = hn.normalize(hgvs_check_boundaries) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('spanning the exon-intron boundary', error): @@ -1154,7 +1154,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str(hgvs_c.posedit.edit.ref)# + 'ins' + str(hgvs_c.posedit.edit.alt) hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: - self.hn.normalize(hgvs_reform_ident) + hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('spanning the exon-intron boundary', error) or re.search('Normalization of intronic variants', error): @@ -1165,20 +1165,20 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: try: - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': ref = self.sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) hgvs_genomic.posedit.edit.ref = ref hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) if error == 'base start position must be <= end position': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base hgvs_genomic.posedit.pos.start.base = end hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) # Statements required to reformat the stored_hgvs_c into a useable synonym if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': @@ -1201,7 +1201,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): if expand_out == 'true': nr_genomic = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) try: - self.hn.normalize(nr_genomic) + hn.normalize(nr_genomic) except hgvs.exceptions.HGVSInvalidVariantError as e: if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str(e) == 'base start position must be <= end position': # Effectively, this code is designed to handle variants that are directly proximal to @@ -1213,7 +1213,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): logger.warning('Variant is proximal to the flank of a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: - self.hn.normalize(genomic_gap_variant) + hn.normalize(genomic_gap_variant) except Exception: pass else: @@ -1227,7 +1227,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): # We have checked that the variant does not cross boundaries, or is intronic # So is likely mapping to a genomic gap try: - self.hn.normalize(genomic_gap_variant) + hn.normalize(genomic_gap_variant) except Exception as e: if str(e) == 'base start position must be <= end position': # This will only happen when the variant is fully within the gap @@ -1254,12 +1254,12 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): pass # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = self.hn.normalize(genomic_gap_variant) + genomic_gap_variant = hn.normalize(genomic_gap_variant) # Static map to c. and static normalize transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) stored_transcript_gap_variant = transcript_gap_variant if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - transcript_gap_variant = self.hn.normalize(transcript_gap_variant) + transcript_gap_variant = hn.normalize(transcript_gap_variant) # if NM_ need the n. position if re.match('NM_', str(hgvs_c.ac)): @@ -1329,7 +1329,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): try: hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) except Exception as e: if str(e) == "base start position must be <= end position": # Expansion out is required to map back to the genomic position @@ -1344,7 +1344,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): except: transcript_gap_variant = transcript_gap_n hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) # Bypass the next bit of gap code expand_out = 'false' @@ -1372,7 +1372,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): elif expand_out == 'true' and ( len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: - gn = self.hn.normalize(hgvs_genomic) + gn = hn.normalize(hgvs_genomic) pass # Likely if the start or end position aligns to a gap in the genomic sequence @@ -1383,7 +1383,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): # Incorrect expansion, likely < ref + 2 genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: - self.hn.normalize(genomic_gap_variant) + hn.normalize(genomic_gap_variant) except Exception as e: if str(e) == 'base start position must be <= end position': gap_start = genomic_gap_variant.posedit.pos.end.base @@ -1397,11 +1397,11 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): if str(e) == "'Dup' object has no attribute 'alt'": pass # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = self.hn.normalize(genomic_gap_variant) + genomic_gap_variant = hn.normalize(genomic_gap_variant) # Static map to c. and static normalize transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) stored_transcript_gap_variant = transcript_gap_variant - transcript_gap_variant = self.hn.normalize(transcript_gap_variant) + transcript_gap_variant = hn.normalize(transcript_gap_variant) # if NM_ need the n. position if re.match('NM_', str(hgvs_c.ac)): transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) @@ -1475,7 +1475,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): try: hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) except Exception as e: if str(e) == "base start position must be <= end position": # Expansion out is required to map back to the genomic position @@ -1492,12 +1492,12 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hnOld, hdpOld, primary_assembly): except: transcript_gap_variant = transcript_gap_n hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) # Ins variants map badly - Especially between c. exon/exon boundary if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': @@ -1791,7 +1791,7 @@ def relevant_transcripts(self, hgvs_genomic, evm, hdpOld, alt_aln_method): """ - def validate(self, input, hpOld, vrOld): + def validateHGVS(self, input, hpOld, vrOld): hgvs_input = self.hp.parse_hgvs_variant(input) g = re.compile(":g.") p = re.compile(":p.") @@ -2317,3 +2317,268 @@ def hgvs_alleles(self, variant_description): exc_type, exc_value, last_traceback = sys.exc_info() te = traceback.format_exc() raise alleleVariantError(str(e)) + + # Covert chromosomal HGVS description to RefSeqGene + def chr_to_rsg(self, hgvs_genomic, hn, vrOld): + # print 'chr_to_rsg triggered' + hgvs_genomic = hn.normalize(hgvs_genomic) + # split the description + # Accessions + chr_ac = hgvs_genomic.ac + # Positions + chr_start_pos = int(hgvs_genomic.posedit.pos.start.base) + chr_end_pos = int(hgvs_genomic.posedit.pos.end.base) + # edit + chr_edit = hgvs_genomic.posedit.edit + + # Pre set variable, note there could be several + rsg_data_set = [] + + # Recover table from MySql + all_info = self.db.get.get_g_to_g_info() + for line in all_info: + # Logic to identify the correct RefSeqGene + rsg_data = {} + if chr_ac == line[1] and chr_start_pos >= int(line[2]) and chr_end_pos <= int(line[3]): + # query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol FROM refSeqGene_loci" + # (u'NG_034189.1', u'NC_000004.12', 190173122, 190177845, u'+', u'DUX4L1') + # Set the values of the data dictionary + rsg_data['rsg_ac'] = line[0] + rsg_data['chr_ac'] = line[1] + rsg_data['rsg_start'] = line[2] + rsg_data['rsg_end'] = line[3] + rsg_data['ori'] = line[4] + rsg_data['gene'] = line[5] + rsg_data_set.append(rsg_data) + else: + continue + + # Compile descriptions and validate + descriptions = [] + for rsg_data in rsg_data_set: + rsg_ac = rsg_data['rsg_ac'] + rsg_start = rsg_data['rsg_start'] + rsg_end = rsg_data['rsg_end'] + ori = rsg_data['ori'] + gene = rsg_data['gene'] + # String the description + if ori == '+': + rsg_description = rsg_ac + ':g.' + str(chr_start_pos - int(rsg_start) + 1) + '_' + str( + chr_end_pos - int(rsg_start) + 1) + str(chr_edit) + hgvs_refseqgene = self.hp.parse_hgvs_variant(rsg_description) + try: + hgvs_refseqgene = hn.normalize(hgvs_refseqgene) + except: + error = 'Not in SeqRepo' + data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': str(error)} + descriptions.append(data) + continue + try: + self.vr.validate(hgvs_refseqgene) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('does not agree with reference sequence', error): + match = re.findall('\(([GATC]+)\)', error) + new_ref = match[1] + hgvs_refseqgene.posedit.edit.ref = new_ref + error = 'true' + else: + pass + data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': str(error)} + else: + data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': 'true'} + descriptions.append(data) + if ori == '-': + # Reverse complement of bases may be required. Let normalizer do the lifting for strings of bases + # Look for scenarios with RC needed bases and extract the bases from the edit + if re.search(r"((del[GATCUgatcu]+))", str(chr_edit)): + bases = re.search(r"((del[GATCUgatcu]+))", str(chr_edit)) + bases = bases.group(1) + chr_edit = 'del' + str(chr_edit).replace(bases, '') + if re.search(r"((ins[GATCUgatcu]+))", str(chr_edit)): + bases = re.search(r"((ins[GATCUgatcu]+))", str(chr_edit)) + bases = bases.group(1) + ins_revcomp = self.revcomp(bases) + chr_edit = str(chr_edit).replace(bases, '') + 'ins' + ins_revcomp + if re.search(r"((dup[GATCUgatcu]+))", str(chr_edit)): + bases = re.search(r"((dup[GATCUgatcu]+))", str(chr_edit)) + bases = bases.group(1) + chr_edit = 'dup' + str(chr_edit).replace(bases, '') + if re.search(r"((inv[GATCUgatcu]+))", str(chr_edit)): + bases = re.search(r"((inv[GATCUgatcu]+))", str(chr_edit)) + bases = bases.group(1) + chr_edit = 'inv' + str(chr_edit).replace(bases, '') + if re.search('>', str(chr_edit)) or re.search('=', str(chr_edit)): + chr_edit = str(chr_edit) + chr_edit = chr_edit.replace('A>', 't>') + chr_edit = chr_edit.replace('T>', 'a>') + chr_edit = chr_edit.replace('G>', 'c>') + chr_edit = chr_edit.replace('C>', 'g>') + chr_edit = chr_edit.replace('>A', '>t') + chr_edit = chr_edit.replace('>T', '>a') + chr_edit = chr_edit.replace('>G', '>c') + chr_edit = chr_edit.replace('>C', '>g') + chr_edit = chr_edit.replace('C=', 'g=') + chr_edit = chr_edit.replace('G=', 'c=') + chr_edit = chr_edit.replace('A=', 't=') + chr_edit = chr_edit.replace('T=', 'a=') + chr_edit = chr_edit.upper() + + rsg_description = rsg_ac + ':g.' + str( + (int(rsg_end) - int(rsg_start)) - (chr_end_pos - int(rsg_start)) + 1) + '_' + str( + (int(rsg_end) - int(rsg_start)) - (chr_start_pos - int(rsg_start)) + 1) + str(chr_edit) + hgvs_refseqgene = self.hp.parse_hgvs_variant(rsg_description) + try: + hgvs_refseqgene = hn.normalize(hgvs_refseqgene) + except: + error = 'Not in SeqRepo' + data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': str(error)} + descriptions.append(data) + continue + try: + self.vr.validate(hgvs_refseqgene) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('does not agree with reference sequence', error): + match = re.findall('\(([GATC]+)\)', error) + new_ref = match[1] + hgvs_refseqgene.posedit.edit.ref = new_ref + error = 'true' + else: + pass + data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': str(error)} + else: + data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': 'true'} + descriptions.append(data) + + # Return the required data. This is a dictionary containing the rsg description, validation status and gene ID + return descriptions + + + # Covert RefSeqGene HGVS description to Chromosomal + def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): + # normalize + try: + hgvs_refseqgene = hn.normalize(hgvs_refseqgene) + except: + pass + # split the description + # Accessions + rsg_ac = hgvs_refseqgene.ac + # Positions + rsg_start_pos = int(hgvs_refseqgene.posedit.pos.start.base) + rsg_end_pos = int(hgvs_refseqgene.posedit.pos.end.base) + # edit + rsg_edit = hgvs_refseqgene.posedit.edit + + # Pre set variable, note there could be several + chr_data_set = [] + + # Recover table from MySql + all_info = self.db.get.get_g_to_g_info() + for line in all_info: + # Logic to identify the correct RefSeqGene + chr_data = {} + if rsg_ac == line[0] and primary_assembly == line[6]: + # query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol FROM refSeqGene_loci" + # (u'NG_034189.1', u'NC_000004.12', 190173122, 190177845, u'+', u'DUX4L1') + # Set the values of the data dictionary + chr_data['rsg_ac'] = line[0] + chr_data['chr_ac'] = line[1] + chr_data['rsg_start'] = line[2] + chr_data['rsg_end'] = line[3] + chr_data['ori'] = line[4] + chr_data['gene'] = line[5] + chr_data_set.append(chr_data) + else: + continue + + # Compile descriptions and validate + descriptions = [] + for chr_data in chr_data_set: + chr_ac = chr_data['chr_ac'] + rsg_ac = chr_data['rsg_ac'] + chr_start = int(chr_data['rsg_start']) + chr_end = int(chr_data['rsg_end']) + ori = chr_data['ori'] + gene = chr_data['gene'] + # String the description + if ori == '+': + chr_description = chr_ac + ':g.' + str(chr_start + rsg_start_pos - 1) + '_' + str( + chr_start + rsg_end_pos - 1) + str(rsg_edit) + hgvs_genomic = self.hp.parse_hgvs_variant(chr_description) + hgvs_genomic = hn.normalize(hgvs_genomic) + try: + vr.validate(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('does not agree with reference sequence', error): + match = re.findall('\(([GATC]+)\)', error) + new_ref = match[1] + hgvs_genomic.posedit.edit.ref = new_ref + error = 'true' + else: + pass + # # print str(e) + '\n3.' + data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': str(error)} + else: + data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': 'true'} + descriptions.append(data) + if ori == '-': + # Reverse complement of bases may be required. Let normalizer do the lifting for strings of bases + # Look for scenarios with RC needed bases and extract the bases from the edit + if re.search(r"((del[GATCUgatcu]+))", str(rsg_edit)): + bases = re.search(r"((del[GATCUgatcu]+))", str(rsg_edit)) + bases = bases.group(1) + rsg_edit = 'del' + str(rsg_edit).replace(bases, '') + if re.search(r"((ins[GATCUgatcu]+))", str(rsg_edit)): + bases = re.search(r"((ins[GATCUgatcu]+))", str(rsg_edit)) + bases = bases.group(1) + ins_revcomp = self.revcomp(bases) + rsg_edit = str(rsg_edit).replace(bases, '') + 'ins' + ins_revcomp + if re.search(r"((dup[GATCUgatcu]+))", str(rsg_edit)): + bases = re.search(r"((dup[GATCUgatcu]+))", str(rsg_edit)) + bases = bases.group(1) + rsg_edit = 'dup' + str(rsg_edit).replace(bases, '') + if re.search(r"((inv[GATCUgatcu]+))", str(rsg_edit)): + bases = re.search(r"((inv[GATCUgatcu]+))", str(rsg_edit)) + bases = bases.group(1) + rsg_edit = 'inv' + str(rsg_edit).replace(bases, '') + if re.search('>', str(rsg_edit)) or re.search('=', str(rsg_edit)): + rsg_edit = str(rsg_edit) + rsg_edit = rsg_edit.replace('A>', 't>') + rsg_edit = rsg_edit.replace('T>', 'a>') + rsg_edit = rsg_edit.replace('G>', 'c>') + rsg_edit = rsg_edit.replace('C>', 'g>') + rsg_edit = rsg_edit.replace('>A', '>t') + rsg_edit = rsg_edit.replace('>T', '>a') + rsg_edit = rsg_edit.replace('>G', '>c') + rsg_edit = rsg_edit.replace('>C', '>g') + rsg_edit = rsg_edit.replace('C=', 'g=') + rsg_edit = rsg_edit.replace('G=', 'c=') + rsg_edit = rsg_edit.replace('A=', 't=') + rsg_edit = rsg_edit.replace('T=', 'a=') + rsg_edit = rsg_edit.upper() + + chr_description = chr_ac + ':g.' + str( + int(chr_start) + (int(chr_end) - int(chr_start)) - rsg_end_pos + 1) + '_' + str( + int(chr_start) + (int(chr_end) - int(chr_start)) - rsg_start_pos + 1) + str(rsg_edit) + + hgvs_genomic = self.hp.parse_hgvs_variant(chr_description) + hgvs_genomic = hn.normalize(hgvs_genomic) + try: + vr.validate(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('does not agree with reference sequence', error): + match = re.findall('\(([GATC]+)\)', error) + new_ref = match[1] + hgvs_genomic.posedit.edit.ref = new_ref + error = 'true' + data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': str(error)} + else: + data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': 'true'} + descriptions.append(data) + + # Return the required data. This is a dictionary containing the rsg description, validation status and gene ID + return descriptions diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 33da2a30..b5b35e4f 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -45,7 +45,6 @@ #from variantanalyser import batch as va_btch #from variantanalyser import g_to_g as va_g2g #from variantanalyser import supported_chromosome_builds as va_scb -#from variantanalyser import gap_genes as gapGenes #from variantanalyser.liftover import liftover as lift_over from vvLiftover import liftover as lift_over #??? @@ -834,7 +833,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass try: # Submit to allele extraction function - alleles = self.hgvs_alleles(input, self.hp, self.vr, self.hn, self.vm, self.sf) + alleles = self.hgvs_alleles(input, self.hp, self.vr, hn, self.vm, self.sf) validation['warnings'] = validation[ 'warnings'] + ': ' + 'Automap has extracted possible variant descriptions' logger.resub('Automap has extracted possible variant descriptions, resubmitting') @@ -1266,8 +1265,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ) - report_gen = vvConverters.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, - primary_assembly, lose_vm, self.hp, self.hn, self.sf, self.nr_vm) + report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, + primary_assembly, lose_vm, self.hp, hn, self.sf, self.nr_vm) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant: Instead use ' + fn.valstr( report_gen) except Exception as e: @@ -1291,8 +1290,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('n.1-', str(input_parses)): input_parses = evm.n_to_c(input_parses) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = vvConverters.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - self.vm, self.hp, self.hn, self.sf, self.nr_vm) + genomic_position = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, + self.vm, self.hp, hn, self.sf, self.nr_vm) error = error + fn.valstr(genomic_position) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) @@ -1307,8 +1306,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('\d\-\d', str(input_parses)) or re.search('\d\+\d', str(input_parses)): # Can we go c-g-c try: - to_genome = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, vm, - hp, hn, self.sf, nr_vm) + to_genome = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, self.vm, + self.hp, hn, self.sf, self.nr_vm) to_tx = evm.g_to_t(to_genome, input_parses.ac) except hgvs.exceptions.HGVSInvalidIntervalError as e: error = str(e) @@ -1348,8 +1347,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr offset = int(tot_end_pos) - int(boundary) input_parses.posedit.pos.end.offset = offset - report_gen = va_func.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, - primary_assembly, lose_vm, hp, hn, self.sf, nr_vm) + report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, + primary_assembly, self.lose_vm, self.hp, hn, self.sf, self.nr_vm) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) except Exception as e: @@ -1382,13 +1381,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): # Quick look at syntax validation try: - vr.validate(input_parses) + self.vr.validate(input_parses) except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if re.search('bounds', error): try: report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - self.lose_vm, self.hp, self.hn, self.sf, self.nr_vm) + self.lose_vm, self.hp, hn, self.sf, self.nr_vm) except hgvs.exceptions.HGVSError as e: fn.exceptPass() else: @@ -1416,7 +1415,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create a specific minimal evm with no normalizer and no replace_reference # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence try: - output = self.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, self.vm, self.hn, + output = self.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, self.vm, hn, self.hp, self.sf, no_norm_evm) except hgvs.exceptions.HGVSDataNotAvailableError as e: tx_ac = input_parses.ac @@ -1573,7 +1572,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input_parses.posedit.pos.end.base = boundary input_parses.posedit.pos.end.offset = remainder report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - self.lose_vm, self.hp, self.hn, self.sf, self.nr_vm) + self.lose_vm, self.hp, hn, self.sf, self.nr_vm) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) except Exception as e: @@ -1589,7 +1588,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('n.1-', str(input_parses)): error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' genomic_position = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, self.vm, - self.hp, self.hn, self.sf, self.nr_vm) + self.hp, hn, self.sf, self.nr_vm) error = error + fn.valstr(genomic_position) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) @@ -1606,7 +1605,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('bounds', error): try: report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - self.lose_vm, self.hp, self.hn, self.sf, self.nr_vm) + self.lose_vm, self.hp, hn, self.sf, self.nr_vm) except hgvs.exceptions.HGVSError as e: fn.exceptPass() else: @@ -1633,13 +1632,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif re.search('Cannot validate sequence of an intronic variant', error): try: test_g = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, self.vm, - self.hp, self.hn, self.sf, self.nr_vm) + self.hp, hn, self.sf, self.nr_vm) back_to_n = evm.g_to_t(test_g, input_parses.ac) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('bounds', error): report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, - primary_assembly, self.lose_vm, self.hp, self.hn, self.sf, self.nr_vm) + primary_assembly, self.lose_vm, self.hp, hn, self.sf, self.nr_vm) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -1651,7 +1650,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create a specific minimal evm with no normalizer and no replace_reference # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence try: - output = self.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, self.vm, self.hn, + output = self.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, self.vm, hn, self.hp, self.sf, no_norm_evm) except hgvs.exceptions.HGVSDataNotAvailableError as e: tx_ac = input_parses.ac @@ -1800,11 +1799,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'false' # Try to validate the variant try: - hgvs_object = hp.parse_hgvs_variant(variant) + hgvs_object = self.hp.parse_hgvs_variant(variant) except hgvs.exceptions.HGVSError as e: error = str(e) try: - vr.validate(hgvs_object) + self.vr.validate(hgvs_object) except hgvs.exceptions.HGVSError as e: error = str(e) if error != 'false': @@ -1817,17 +1816,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if alt_aln_method != 'genebuild': # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID # accession number - hgvs_object = hp.parse_hgvs_variant(variant) + hgvs_object = self.hp.parse_hgvs_variant(variant) accession = hgvs_object.ac # Look for the accession in our database # Connect to database and send request - record = va_func.entrez_efetch(db="nuccore", id=accession, rettype="gb", retmode="text") + record = self.entrez_efetch(db="nuccore", id=accession, rettype="gb", retmode="text") try: description = record.description except: description = 'Unable to recover the description of ' + accession + ' from Entrez' try: - vr.validate(hgvs_object) + self.vr.validate(hgvs_object) except hgvs.exceptions.HGVSError as e: error = str(e) else: @@ -1845,12 +1844,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr """ trapped_input = input if type == ':r.': - hgvs_input = hp.parse_hgvs_variant(input) # Traps the hgvs variant of r. for further use + hgvs_input = self.hp.parse_hgvs_variant(input) # Traps the hgvs variant of r. for further use # Change to coding variant type = ':c.' # Change input to reflect! try: - hgvs_c = va_func.hgvs_r_to_c(hgvs_input) + hgvs_c = self.va_func.hgvs_r_to_c(hgvs_input) except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -1867,7 +1866,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr """ if (type != ':g.'): error = 'false' - hgvs_vt = hp.parse_hgvs_variant(variant) + hgvs_vt = self.hp.parse_hgvs_variant(variant) try: tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: @@ -1888,8 +1887,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # ACCESS THE GENE INFORMATION RECORDS ON THE UTA DATABASE # Refseq accession - tx_for_gene = va_func.tx_for_gene(hgnc, self.hdp) - refseq_ac = va_func.ng_extract(tx_for_gene) + tx_for_gene = self.tx_for_gene(hgnc, self.hdp) + refseq_ac = self.ng_extract(tx_for_gene) # Additional gene info gene_info = self.hdp.get_gene_info(hgnc) @@ -1905,11 +1904,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if alt_aln_method != 'genebuild': # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID # accession number - hgvs_object = hp.parse_hgvs_variant(variant) + hgvs_object = self.hp.parse_hgvs_variant(variant) accession = hgvs_object.ac # Look for the accession in our database # Connect to database and send request - entry = va_dbCrl.data.in_entries(accession, 'transcript_info') + entry = self.db.in_entries(accession, 'transcript_info') # Analyse the returned data and take the necessary actions # If the error key exists @@ -1928,8 +1927,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if entry['expiry'] == 'true': dbaction = 'update' try: - entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=hp, evm=evm, + entry = self.db.data_add(input=input, alt_aln_method=alt_aln_method, + accession=accession, dbaction=dbaction, hp=self.hp, evm=evm, hdp=self.hdp) except hgvs.exceptions.HGVSError as e: error = 'Transcript %s is not currently supported' % (accession) @@ -1948,8 +1947,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif 'none' in entry: dbaction = 'insert' try: - entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=hp, evm=evm, + entry = self.db.data_add(input=input, alt_aln_method=alt_aln_method, + accession=accession, dbaction=dbaction, hp=self.hp, evm=evm, hdp=self.hdp) except Exception as e: logger.warning(str(e)) @@ -1971,11 +1970,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Ensembl databases else: # accession number - hgvs_object = hp.parse_hgvs_variant(variant) + hgvs_object = self.hp.parse_hgvs_variant(variant) accession = hgvs_object.ac # Look for the accession in our database # Connect to database and send request - entry = va_dbCrl.data.in_entries(accession, 'transcript_info') + entry = self.db.in_entries(accession, 'transcript_info') # Analyse the returned data and take the necessary actions # If the error key exists @@ -1993,8 +1992,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # If the current entry is too old if entry['expiry'] == 'true': dbaction = 'update' - entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=hp, evm=evm, + entry = self.db.data_add(input=input, alt_aln_method=alt_aln_method, + accession=accession, dbaction=dbaction, hp=self.hp, evm=evm, hdp=self.hdp) hgnc_gene_info = entry['description'] else: @@ -2003,8 +2002,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif 'none' in entry: dbaction = 'insert' try: - entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=hp, evm=evm, + entry = self.db.data_add(input=input, alt_aln_method=alt_aln_method, + accession=accession, dbaction=dbaction, hp=self.hp, evm=evm, hdp=self.hdp) except Exception as e: logger.warning(str(e)) @@ -2030,12 +2029,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr """ if (type == ':g.'): - g_query = hp.parse_hgvs_variant(variant) + g_query = self.hp.parse_hgvs_variant(variant) # Genomic coordinates can be validated immediately error = 'false' try: - vr.validate(g_query) + self.vr.validate(g_query) except hgvs.exceptions.HGVSError as e: error = str(e) except KeyError: @@ -2064,19 +2063,19 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr Initial simple projection from the provided g. position all overlapping transcripts """ - rel_var = va_func.relevant_transcripts(hgvs_genomic, evm, self.hdp, alt_aln_method, reverse_normalizer) + rel_var = self.relevant_transcripts(hgvs_genomic, evm, self.hdp, alt_aln_method, reverse_normalizer) # Double check rel_vars have not been missed when mapping from a RefSeqGene if len(rel_var) != 0 and re.match('NG_', str(hgvs_genomic.ac)): for var in rel_var: - hgvs_coding_variant = hp.parse_hgvs_variant(var) + hgvs_coding_variant = self.hp.parse_hgvs_variant(var) try: - hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding_variant, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, nr_vm) + hgvs_genomic = self.myevm_t_to_g(hgvs_coding_variant, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) except hgvs.exceptions.HGVSError as e: try_rel_var = [] else: - try_rel_var = va_func.relevant_transcripts(hgvs_genomic, evm, self.hdp, alt_aln_method, + try_rel_var = self.relevant_transcripts(hgvs_genomic, evm, self.hdp, alt_aln_method, reverse_normalizer) if len(try_rel_var) > len(rel_var): rel_var = try_rel_var @@ -2090,8 +2089,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr not_di = str(hgvs_genomic.ac) + ':g.' + str(vcf_dict['pos']) + '_' + str( int(vcf_dict['pos']) + (len(vcf_dict['ref']) - 1)) + 'del' + vcf_dict['ref'] + 'ins' + \ vcf_dict['alt'] - hgvs_not_di = hp.parse_hgvs_variant(not_di) - rel_var = va_func.relevant_transcripts(hgvs_not_di, evm, self.hdp, alt_aln_method, + hgvs_not_di = self.hp.parse_hgvs_variant(not_di) + rel_var = self.relevant_transcripts(hgvs_not_di, evm, self.hdp, alt_aln_method, reverse_normalizer) # list return statements @@ -2104,9 +2103,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rsg = re.compile('^NG_') if rsg.search(variant): # parse - hgvs_refseqgene = hp.parse_hgvs_variant(variant) + hgvs_refseqgene = self.hp.parse_hgvs_variant(variant) # Convert to chromosomal position - refseqgene_data = va_g2g.rsg_to_chr(hgvs_refseqgene, primary_assembly, hn, vr) + refseqgene_data = self.rsg_to_chr(hgvs_refseqgene, primary_assembly, hn, self.vr) # There should only ever be one description returned refseqgene_data = refseqgene_data[0] @@ -2135,7 +2134,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr sfm = vvChromosomes.supported_for_mapping(hgvs_genomic.ac, primary_assembly) if sfm == 'true': try: - vr.validate(hgvs_genomic) + self.vr.validate(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -2143,12 +2142,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue else: # Map to RefSeqGene if available - refseqgene_data = va_g2g.chr_to_rsg(hgvs_genomic, hn, vr) + refseqgene_data = self.chr_to_rsg(hgvs_genomic, hn, self.vr) rsg_data = '' # Example {'gene': 'NTHL1', 'hgvs_refseqgene': 'NG_008412.1:g.3455_3464delCAAACACACA', 'valid': 'true'} for data in refseqgene_data: if data['valid'] == 'true': - data['hgvs_refseqgene'] = hp.parse_hgvs_variant(data['hgvs_refseqgene']) + data['hgvs_refseqgene'] = self.hp.parse_hgvs_variant(data['hgvs_refseqgene']) data['hgvs_refseqgene'] = fn.valstr(data['hgvs_refseqgene']) rsg_data = rsg_data + data['hgvs_refseqgene'] + ' (' + data['gene'] + '), ' @@ -2207,18 +2206,18 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr stash_end = end # Re-Analyse genomic positions if re.match('NG_', str(stash_input)): - c = hp.parse_hgvs_variant(rel_var[0]) + c = self.hp.parse_hgvs_variant(rel_var[0]) if hasattr(c.posedit.edit, 'ref') and c.posedit.edit.ref is not None: c.posedit.edit.ref = c.posedit.edit.ref.upper() if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: c.posedit.edit.alt = c.posedit.edit.alt.upper() - stash_input = va_func.myevm_t_to_g(c, self.hdp, no_norm_evm, primary_assembly, vm, hp, hn, self.sf, - nr_vm) + stash_input = self.myevm_t_to_g(c, self.hdp, no_norm_evm, primary_assembly, self.vm, self.hp, hn, self.sf, + self.nr_vm) if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', str( stash_input)): try: - hgvs_stash = hp.parse_hgvs_variant(stash_input) + hgvs_stash = self.hp.parse_hgvs_variant(stash_input) except: hgvs_stash = stash_input if hasattr(hgvs_stash.posedit.edit, 'ref') and hgvs_stash.posedit.edit.ref is not None: @@ -2237,9 +2236,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr stash_end = str(stash_pos + len(stash_ref) - 1) # Store a not real deletion insertion - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_hgvs_not_delins = self.hp.parse_hgvs_variant( stash_ac + ':' + hgvs_genomic_5pr.type + '.' + str( stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) @@ -2252,7 +2251,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # loop through rel_var and amend where required for var in rel_var: # Store the current hgvs:c. description - saved_hgvs_coding = hp.parse_hgvs_variant(var) + saved_hgvs_coding = self.hp.parse_hgvs_variant(var) # Remove un-selected transcripts if select_transcripts != 'all': @@ -2265,7 +2264,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = va_func.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + ori = self.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, alt_aln_method=alt_aln_method, hdp=self.hdp) orientation = int(ori[0]['alt_strand']) intronic_variant = 'false' @@ -2480,8 +2479,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -2493,8 +2492,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -2529,8 +2528,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -2543,8 +2542,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -2563,7 +2562,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rn_tx_hgvs_not_delins.posedit.edit.ref) disparity_deletion_in = ['transcript', gap_length] else: - hgvs_stash_t = vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) + hgvs_stash_t = self.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) if len(stash_hgvs_not_delins.posedit.edit.ref) > len( hgvs_stash_t.posedit.edit.ref): try: @@ -2575,14 +2574,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_stash_t.posedit.edit.ref) disparity_deletion_in = ['transcript', gap_length] try: - tx_hgvs_not_delins = vm.c_to_n(hgvs_stash_t) + tx_hgvs_not_delins = self.vm.c_to_n(hgvs_stash_t) except: tx_hgvs_not_delins = hgvs_stash_t hgvs_not_delins = stash_hgvs_not_delins elif hgvs_stash_t.posedit.pos.start.offset != 0 or hgvs_stash_t.posedit.pos.end.offset != 0: disparity_deletion_in = ['transcript', 'Requires Analysis'] try: - tx_hgvs_not_delins = vm.c_to_n(hgvs_stash_t) + tx_hgvs_not_delins = self.vm.c_to_n(hgvs_stash_t) except: tx_hgvs_not_delins = hgvs_stash_t hgvs_not_delins = stash_hgvs_not_delins @@ -2592,7 +2591,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Final sanity checks try: - vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': hgvs_not_delins = saved_hgvs_coding @@ -2624,7 +2623,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( tx_hgvs_not_delins.posedit.pos.start) + '_' + str( tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + tx_hgvs_not_delins = self.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED if disparity_deletion_in[0] == 'transcript': @@ -2650,7 +2649,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( tx_gap_fill_variant.posedit.pos.start) + '_' + str( tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = hp.parse_hgvs_variant( + tx_gap_fill_variant = self.hp.parse_hgvs_variant( tx_gap_fill_variant_delins_from_dup) # Identify which half of the NOT-intron the start position of the variant is in @@ -2668,18 +2667,18 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant.posedit.edit.ref = '' try: - tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) + tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) except: fn.exceptPass() - genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, + genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, reverse_normalized_hgvs_genomic.ac) genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref try: - c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) + c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) except Exception: c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, + genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, hgvs_genomic_5pr.ac) # Ensure an ALT exists @@ -2691,12 +2690,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = hp.parse_hgvs_variant( + genomic_gap_fill_variant = self.hp.parse_hgvs_variant( genomic_gap_fill_variant_delins_from_dup) genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( genomic_gap_fill_variant_alt_delins_from_dup) # Correct insertion alts @@ -2754,7 +2753,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Add the new alt to the gap fill variant and generate transcript variant genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, + hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, tx_gap_fill_variant.ac) # Set warning @@ -2783,23 +2782,23 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # the transcript variant but do not have a position which actually hits the gap, # so the variant likely spans the gap, and is not picked up by an offset. try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins - g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) ng2 = hn.normalize(g2) g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( len(g3.posedit.edit.ref) - 1) try: - c2 = vm.g_to_t(g3, c1.ac) + c2 = self.vm.g_to_t(g3, c1.ac) if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: pass else: tx_hgvs_not_delins = c2 try: - tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) + tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) except hgvs.exceptions.HGVSError: fn.exceptPass() except hgvs.exceptions.HGVSInvalidVariantError: @@ -2814,7 +2813,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c2 = vm.n_to_c(tx_hgvs_not_delins) + c2 = self.vm.n_to_c(tx_hgvs_not_delins) except: c2 = tx_hgvs_not_delins c1 = copy.deepcopy(c2) @@ -2824,12 +2823,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c1.posedit.edit.ref = '' c1.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -2837,7 +2836,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -2858,7 +2857,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins c2 = copy.deepcopy(c1) @@ -2868,12 +2867,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c2.posedit.edit.ref = '' c2.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -2881,7 +2880,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -2902,7 +2901,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c2 = vm.n_to_c(tx_hgvs_not_delins) + c2 = self.vm.n_to_c(tx_hgvs_not_delins) except: c2 = tx_hgvs_not_delins c1 = copy.deepcopy(c2) @@ -2912,12 +2911,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c1.posedit.edit.ref = '' c1.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -2925,7 +2924,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -2946,7 +2945,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins c2 = copy.deepcopy(c1) @@ -2955,8 +2954,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c2.posedit.pos.end.offset = 0 c2.posedit.edit.ref = '' c2.posedit.edit.alt = '' - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - c2 = vm.g_to_t(g2, c2.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + c2 = self.vm.g_to_t(g2, c2.ac) reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] c3 = copy.deepcopy(c1) @@ -3005,7 +3004,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr stash_ref_right = stash_dict_right['ref'] stash_alt_right = stash_dict_right['alt'] stash_end_right = str(stash_pos_right + len(stash_ref_right) - 1) - stash_hgvs_not_delins_right = hp.parse_hgvs_variant( + stash_hgvs_not_delins_right = self.hp.parse_hgvs_variant( stash_ac + ':' + hgvs_stash.type + '.' + str( stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) stash_dict_left = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, @@ -3014,12 +3013,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr stash_ref_left = stash_dict_left['ref'] stash_alt_left = stash_dict_left['alt'] stash_end_left = str(stash_pos_left + len(stash_ref_left) - 1) - stash_hgvs_not_delins_left = hp.parse_hgvs_variant( + stash_hgvs_not_delins_left = self.hp.parse_hgvs_variant( stash_ac + ':' + hgvs_stash.type + '.' + str( stash_pos_left) + '_' + stash_end_left + 'del' + stash_ref_left + 'ins' + stash_alt_left) # Map in-situ to the transcript left and right try: - tx_hard_right = vm.g_to_t(stash_hgvs_not_delins_right, saved_hgvs_coding.ac) + tx_hard_right = self.vm.g_to_t(stash_hgvs_not_delins_right, saved_hgvs_coding.ac) except Exception as e: tx_hard_right = saved_hgvs_coding else: @@ -3027,7 +3026,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if str(normalize_stash_right.posedit) == str(stash_hgvs_not_delins.posedit): tx_hard_right = saved_hgvs_coding try: - tx_hard_left = vm.g_to_t(stash_hgvs_not_delins_left, saved_hgvs_coding.ac) + tx_hard_left = self.vm.g_to_t(stash_hgvs_not_delins_left, saved_hgvs_coding.ac) except Exception as e: tx_hard_left = saved_hgvs_coding else: @@ -3136,7 +3135,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Flag for validation valid = 'false' # Collect information for genomic level validation - obj = hp.parse_hgvs_variant(variant) + obj = self.hp.parse_hgvs_variant(variant) tx_ac = obj.ac @@ -3157,7 +3156,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Se rec_var to '' so it can be updated later rec_var = '' try: - to_g = va_func.myevm_t_to_g(obj, self.hdp, no_norm_evm, primary_assembly, vm, hp, hn, self.sf, nr_vm) + to_g = self.myevm_t_to_g(obj, self.hdp, no_norm_evm, primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) genomic_ac = to_g.ac except hgvs.exceptions.HGVSDataNotAvailableError as e: if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))) or re.match( @@ -3176,7 +3175,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning(str(error)) continue try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(tx_ac) except: gene_symbol = 'none' if gene_symbol == 'none': @@ -3188,7 +3187,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue except TypeError as e: try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) + gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(tx_ac) except: gene_symbol = 'none' if gene_symbol == 'none': @@ -3200,7 +3199,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = va_func.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=alt_aln_method, hdp=self.hdp) + ori = self.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=alt_aln_method, hdp=self.hdp) orientation = int(ori[0]['alt_strand']) intronic_variant = 'false' @@ -3240,12 +3239,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: # Normalize was I believe to replace ref. Mapping does this anyway # to_g = hn.normalize(to_g) - variant = str(va_func.myevm_g_to_t(evm, to_g, tx_ac)) + variant = str(self.myevm_g_to_t(evm, to_g, tx_ac)) tx_ac = '' elif geno.search(input): if plus.search(variant) or minus.search(variant): - to_g = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, nr_vm) + to_g = self.genomic(variant, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, self.nr_vm) es = re.compile('error') if es.search(str(to_g)): if alt_aln_method != 'genebuild': @@ -3272,7 +3271,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: # Normalize was I believe to replace ref. Mapping does this anyway # to_g = hn.normalize(to_g) - variant = str(va_func.myevm_g_to_t(evm, to_g, tx_ac)) + variant = str(self.myevm_g_to_t(evm, to_g, tx_ac)) tx_ac = '' else: @@ -3310,7 +3309,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if boundary == 'false': cross_variant = 'false' - error = va_func.validate(variant, hp=hp, vr=vr) + error = self.validateHGVS(variant, hp=self.hp, vr=self.vr) if error == 'false': valid = 'true' else: @@ -3355,15 +3354,15 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # RNA if pat_r.search(trapped_input): - coding = va_func.coding(variant, hp) + coding = self.coding(variant, self.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, - nr_vm) + pre_var = self.genomic(variant, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, + self.nr_vm) # genome back to C coordinates - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) - test = hp.parse_hgvs_variant(input) + test = self.hp.parse_hgvs_variant(input) if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' @@ -3385,7 +3384,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr del rel_var[:] for accessions in cp_rel: error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) try: tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: @@ -3396,7 +3395,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -3416,7 +3415,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr del rel_var[:] for accessions in cp_rel: error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions[1])) + hgvs_vt = self.hp.parse_hgvs_variant(str(accessions[1])) try: tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: @@ -3426,7 +3425,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) else: # Get hgnc Gene name from command - data = va_func.hgnc_rest( + data = self.hgnc_rest( path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] @@ -3455,13 +3454,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Coding else: - coding = va_func.coding(variant, hp) + coding = self.coding(variant, self.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = hp.parse_hgvs_variant(variant) + pre_var = self.hp.parse_hgvs_variant(variant) try: - pre_var = va_func.myevm_t_to_g(pre_var, self.hdp, no_norm_evm, primary_assembly, vm, hp, - hn, self.sf, nr_vm) + pre_var = self.myevm_t_to_g(pre_var, self.hdp, no_norm_evm, primary_assembly, self.vm, self.hp, + hn, self.sf, self.nr_vm) except: e = sys.exc_info()[1] error = str(e) @@ -3476,13 +3475,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass() # genome back to C coordinates try: - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) except hgvs.exceptions.HGVSError as error: validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue query = post_var - test = hp.parse_hgvs_variant(input) + test = self.hp.parse_hgvs_variant(input) if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' @@ -3498,7 +3497,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr del rel_var[:] for accessions in cp_rel: error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) try: tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: @@ -3508,7 +3507,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) else: # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -3529,7 +3528,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr del rel_var[:] for accessions in cp_rel: error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions[1])) + hgvs_vt = self.parse_hgvs_variant(str(accessions[1])) try: tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: @@ -3539,7 +3538,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) else: # Get hgnc Gene name from command - data = va_func.hgnc_rest( + data = self.hgnc_rest( path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] @@ -3569,15 +3568,15 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: if pat_r.search(trapped_input): - coding = va_func.coding(variant, hp) + coding = self.coding(variant, self.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, - nr_vm) + pre_var = self.genomic(variant, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, + self.nr_vm) # genome back to C coordinates - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) - test = hp.parse_hgvs_variant(input) + test = self.hp.parse_hgvs_variant(input) if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' @@ -3599,7 +3598,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr del rel_var[:] for accessions in cp_rel: error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) try: tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: @@ -3609,7 +3608,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) else: # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + data = self.va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -3636,15 +3635,15 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr batch_list.append(query) else: - coding = va_func.coding(variant, hp) + coding = self.coding(variant, self.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, - nr_vm) + pre_var = self.genomic(variant, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, + self.nr_vm) # genome back to C coordinates - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) + post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) - test = hp.parse_hgvs_variant(input) + test = self.hp.parse_hgvs_variant(input) if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' @@ -3660,7 +3659,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr del rel_var[:] for accessions in cp_rel: error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) try: tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: @@ -3670,7 +3669,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) else: # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': reason = 'Cannot currently display the required information:' error = data['error'] @@ -3701,14 +3700,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # If cck not true elif pat_r.search(trapped_input): # set input hgvs object - hgvs_rna_input = hp.parse_hgvs_variant( + hgvs_rna_input = self.hp.parse_hgvs_variant( trapped_input) # Traps the hgvs variant of r. for further use - inp = str(va_func.hgvs_r_to_c(hgvs_rna_input)) + inp = str(self.hgvs_r_to_c(hgvs_rna_input)) # Regex plus = re.compile("\d\+\d") # finds digit + digit minus = re.compile("\d\-\d") # finds digit - digit if plus.search(input) or minus.search(input): - to_g = va_func.genomic(inp, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, nr_vm) + to_g = self.genomic(inp, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, self.nr_vm) es = re.compile('error') if es.search(str(to_g)): if alt_aln_method != 'genebuild': @@ -3729,13 +3728,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: # Set variants pre and post genomic norm - hgvs_inp = va_func.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) + hgvs_inp = self.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) to_g = hn.normalize(to_g) - hgvs_otp = va_func.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) + hgvs_otp = self.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) tx_ac = '' else: # Set variants pre and post RNA norm - hgvs_inp = hp.parse_hgvs_variant(inp) + hgvs_inp = self.hp.parse_hgvs_variant(inp) try: hgvs_otp = hn.normalize(hgvs_inp) except hgvs.exceptions.HGVSError as e: @@ -3769,7 +3768,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr del rel_var[:] for accessions in cp_rel: error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) try: tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: @@ -3779,7 +3778,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) else: # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -3809,8 +3808,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass else: - query = hp.parse_hgvs_variant(variant) - test = hp.parse_hgvs_variant(input) + query = self.hp.parse_hgvs_variant(variant) + test = self.hp.parse_hgvs_variant(input) if query.posedit.pos != test.posedit.pos: caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' automap = 'Automap has corrected the variant description' @@ -3825,7 +3824,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr del rel_var[:] for accessions in cp_rel: error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) + hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) try: tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: @@ -3835,7 +3834,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) else: # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': reason = 'Cannot currently display the required information:' error = data['error'] @@ -3863,12 +3862,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr batch_list.append(query) # VALIDATION of intronic variants - pre_valid = hp.parse_hgvs_variant(input) - post_valid = hp.parse_hgvs_variant(variant) + pre_valid = self.hp.parse_hgvs_variant(input) + post_valid = self.hp.parse_hgvs_variant(variant) if valid == 'false': error = 'false' genomic_validation = str( - va_func.genomic(input, no_norm_evm, hp, self.hdp, primary_assembly, vm, hn, self.sf, nr_vm)) + self.genomic(input, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, self.nr_vm)) del_end = re.compile('\ddel$') delins = re.compile('delins') inv = re.compile('inv') @@ -3886,7 +3885,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass # Apply validation to intronic variant descriptions (should be valid but make sure) - error = va_func.validate(genomic_validation, hp=hp, vr=vr) + error = self.validateHGVS(genomic_validation, hp=self.hp, vr=self.vr) if error == 'false': valid = 'true' else: @@ -3916,7 +3915,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ############################## # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC - hgvs_coding = va_func.coding(variant, hp) + hgvs_coding = self.coding(variant, self.hp) boundary = re.compile('exon-intron boundary') spanning = re.compile('exon/intron') @@ -3930,12 +3929,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Gap gene black list try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(hgvs_coding.ac) except Exception: fn.exceptPass() else: # If the gene symbol is not in the list, the value False will be returned - gap_compensation = gapGenes.gap_black_list(gene_symbol) + gap_compensation = vvChromosomes.gap_black_list(gene_symbol) # Intron spanning variants if re.search('boundary', str(error)) or re.search('spanning', str(error)): @@ -3955,12 +3954,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # RNA sequence hgvs_rna = copy.deepcopy(hgvs_coding) - hgvs_rna = va_func.hgvs_c_to_r(hgvs_rna) + hgvs_rna = self.hgvs_c_to_r(hgvs_rna) rna = str(hgvs_rna) # Genomic sequence - hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding, self.hdp, no_norm_evm, primary_assembly, vm, hp, hn, - self.sf, nr_vm) + hgvs_genomic = self.myevm_t_to_g(hgvs_coding, self.hdp, no_norm_evm, primary_assembly, self.vm, self.hp, hn, + self.sf, self.nr_vm) final_hgvs_genomic = hgvs_genomic # genomic_possibilities @@ -6532,14 +6531,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr predicted_protein_variant = valid['protein'] if re.match('NP_', predicted_protein_variant): rs_p, pred_prot_posedit = predicted_protein_variant.split(':') - lrg_p = va_dbCrl.data.get_lrgProteinID_from_RefSeqProteinID(rs_p) + lrg_p = self.db.get.get_lrgProteinID_from_RefSeqProteinID(rs_p) if re.match('LRG', lrg_p): predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit # Gene if transcript_accession != '': try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(transcript_accession) + gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(transcript_accession) except: gene_symbol = 'Unable to verify gene symbol for ' + str(transcript_accession) else: @@ -6561,15 +6560,15 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Multiple genomic variants # multi_gen_vars = [] if tx_variant != '': - hgvs_coding = hp.parse_hgvs_variant(str(tx_variant)) + hgvs_coding = self.hp.parse_hgvs_variant(str(tx_variant)) # Gap gene black list try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(hgvs_coding.ac) except Exception: fn.exceptPass() else: # If the gene symbol is not in the list, the value False will be returned - gap_compensation = gapGenes.gap_black_list(gene_symbol) + gap_compensation = vvChromosomes.gap_black_list(gene_symbol) # Look for variants spanning introns try: @@ -6597,11 +6596,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for alt_chr in multi_list: try: # Re set ori - ori = va_func.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, + ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, alt_aln_method=alt_aln_method, hdp=self.hdp) orientation = int(ori[0]['alt_strand']) - hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, vm, hp, hn, - self.sf, nr_vm) + hgvs_alt_genomic = self.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, self.vm, self.hp, hn, + self.sf, self.nr_vm) # Set hgvs_genomic accordingly hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) @@ -6628,9 +6627,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = str(e) chromosome_normalized_hgvs_coding = hgvs_coding - most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, + most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, alt_chr, - no_norm_evm, vm, hp, hn, self.sf, nr_vm) + no_norm_evm, self.vm, self.hp, hn, self.sf, self.nr_vm) hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) # First to the right @@ -6648,7 +6647,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Generate an end position stash_end = str(stash_pos + len(stash_ref) - 1) # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_hgvs_not_delins = self.hp.parse_hgvs_variant( stash_ac + ':' + hgvs_stash.type + '.' + str( stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: @@ -6657,8 +6656,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass() # Store a tx copy for later use test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, - no_norm_evm, vm, hp, hn, self.sf, nr_vm) + stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, + no_norm_evm, self.vm, self.hp, hn, self.sf, self.nr_vm) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) @@ -6689,7 +6688,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr reform_ident = str(test_stash_tx_right).split(':')[0] reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: @@ -6729,7 +6728,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Generate an end position stash_end = str(stash_pos + len(stash_ref) - 1) # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_hgvs_not_delins = self.hp.parse_hgvs_variant( stash_ac + ':' + hgvs_stash.type + '.' + str( stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: @@ -6738,8 +6737,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass() # Store a tx copy for later use test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, - no_norm_evm, vm, hp, hn, self.sf, nr_vm) + stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, + no_norm_evm, self.vm, self.hp, hn, self.sf, self.nr_vm) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC @@ -6770,7 +6769,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr reform_ident = str(test_stash_tx_left).split(':')[0] reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: @@ -6800,8 +6799,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) try: - n_3pr = vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = vm.c_to_n(most_5pr_hgvs_transcript_variant) + n_3pr = self.vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = self.vm.c_to_n(most_5pr_hgvs_transcript_variant) except: n_3pr = most_3pr_hgvs_transcript_variant n_5pr = most_5pr_hgvs_transcript_variant @@ -6819,9 +6818,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ pr5_ref[1] # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = vm.t_to_g( + genomic_from_most_3pr_hgvs_transcript_variant = self.vm.t_to_g( most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = vm.t_to_g( + genomic_from_most_5pr_hgvs_transcript_variant = self.vm.t_to_g( most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) # Normalize - If the variant spans a gap it should then form a static genomic variant @@ -6858,7 +6857,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) try: @@ -6869,7 +6868,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( most_3pr_hgvs_transcript_variant_delins_from_dup) try: @@ -6880,7 +6879,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) try: @@ -6891,7 +6890,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( most_5pr_hgvs_transcript_variant_delins_from_dup) if len( @@ -6996,7 +6995,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pos = str(pos) # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) v = [chr, pos, ref, alt] @@ -7221,9 +7220,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, self.sf, - nr_vm) + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, self.vm, self.hp, hn, self.sf, + self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( saved_hgvs_coding.ac)) @@ -7236,9 +7235,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, self.sf, - nr_vm) + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, self.vm, self.hp, hn, self.sf, + self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( saved_hgvs_coding.ac)) @@ -7277,9 +7276,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, self.sf, - nr_vm) + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, self.vm, self.hp, hn, self.sf, + self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( saved_hgvs_coding.ac)) @@ -7293,9 +7292,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, self.sf, - nr_vm) + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, self.vm, self.hp, hn, self.sf, + self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( saved_hgvs_coding.ac)) @@ -7319,10 +7318,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for possibility in hgvs_genomic_possibilities: if possibility == '': continue - hgvs_t_possibility = vm.g_to_t(possibility, hgvs_coding.ac) + hgvs_t_possibility = self.vm.g_to_t(possibility, hgvs_coding.ac) if hgvs_t_possibility.posedit.edit.type == 'ins': try: - hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) + hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) except: continue if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: @@ -7331,7 +7330,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_t_possibility.posedit.pos.start.base - 1, hgvs_t_possibility.posedit.pos.start.base + 1) try: - hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) + hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) except: continue hgvs_t_possibility.posedit.edit.ref = ins_ref @@ -7358,7 +7357,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re_capture_tx_variant != []: try: - tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) + tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) except: tx_hgvs_not_delins = re_capture_tx_variant[2] disparity_deletion_in = re_capture_tx_variant[0:-1] @@ -7367,7 +7366,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Final sanity checks try: - vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) except Exception as e: if str( e) == 'start or end or both are beyond the bounds of transcript record': @@ -7398,9 +7397,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if disparity_deletion_in[0] == 'false' and ( possibility_counter == 3 or possibility_counter == 4): rg = reverse_normalizer.normalize(hgvs_not_delins) - rtx = vm.g_to_t(rg, tx_hgvs_not_delins.ac) + rtx = self.vm.g_to_t(rg, tx_hgvs_not_delins.ac) fg = hn.normalize(hgvs_not_delins) - ftx = vm.g_to_t(fg, tx_hgvs_not_delins.ac) + ftx = self.vm.g_to_t(fg, tx_hgvs_not_delins.ac) if ( rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): @@ -7416,7 +7415,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_genomic = fg hgvs_genomic_5pr = fg try: - tx_hgvs_not_delins = vm.c_to_n(ftx) + tx_hgvs_not_delins = self.vm.c_to_n(ftx) except Exception: tx_hgvs_not_delins = ftx disparity_deletion_in = ['transcript', 'Requires Analysis'] @@ -7430,7 +7429,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( tx_hgvs_not_delins.posedit.pos.start) + '_' + str( tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant( + tx_hgvs_not_delins = self.hp.parse_hgvs_variant( tx_hgvs_not_delins_delins_from_dup) if disparity_deletion_in[0] == 'transcript': @@ -7455,7 +7454,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( tx_gap_fill_variant.posedit.pos.start) + '_' + str( tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = hp.parse_hgvs_variant( + tx_gap_fill_variant = self.hp.parse_hgvs_variant( tx_gap_fill_variant_delins_from_dup) # Identify which half of the NOT-intron the start position of the variant is in @@ -7477,18 +7476,18 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant.posedit.edit.ref = '' try: - tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) + tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) except: fn.exceptPass() - genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, + genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, reverse_normalized_hgvs_genomic.ac) genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref try: - c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) + c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) except Exception: c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, + genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, hgvs_genomic_5pr.ac) # Ensure an ALT exists @@ -7500,12 +7499,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = hp.parse_hgvs_variant( + genomic_gap_fill_variant = self.hp.parse_hgvs_variant( genomic_gap_fill_variant_delins_from_dup) genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( genomic_gap_fill_variant_alt_delins_from_dup) # Correct insertion alts @@ -7568,7 +7567,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Add the new alt to the gap fill variant and generate transcript variant genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, + hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, tx_gap_fill_variant.ac) # Set warning @@ -7600,23 +7599,23 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # the transcript variant but do not have a position which actually hits the gap, # so the variant likely spans the gap, and is not picked up by an offset. try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins - g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) ng2 = hn.normalize(g2) g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( len(g3.posedit.edit.ref) - 1) try: - c2 = vm.g_to_t(g3, c1.ac) + c2 = self.vm.g_to_t(g3, c1.ac) if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: pass else: tx_hgvs_not_delins = c2 try: - tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) + tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) except hgvs.exceptions.HGVSError: fn.exceptPass() except hgvs.exceptions.HGVSInvalidVariantError: @@ -7635,7 +7634,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c2 = vm.n_to_c(tx_hgvs_not_delins) + c2 = self.vm.n_to_c(tx_hgvs_not_delins) except: c2 = tx_hgvs_not_delins c1 = copy.deepcopy(c2) @@ -7645,12 +7644,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c1.posedit.edit.ref = '' c1.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -7658,7 +7657,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -7683,7 +7682,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins c2 = copy.deepcopy(c1) @@ -7693,12 +7692,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c2.posedit.edit.ref = '' c2.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -7706,7 +7705,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -7731,7 +7730,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c2 = vm.n_to_c(tx_hgvs_not_delins) + c2 = self.vm.n_to_c(tx_hgvs_not_delins) except: c2 = tx_hgvs_not_delins c1 = copy.deepcopy(c2) @@ -7741,12 +7740,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c1.posedit.edit.ref = '' c1.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -7754,7 +7753,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -7779,7 +7778,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins c2 = copy.deepcopy(c1) @@ -7789,12 +7788,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c2.posedit.edit.ref = '' c2.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -7802,7 +7801,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -7877,10 +7876,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # Update hgvs_genomic - hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, - no_norm_evm, vm, hp, hn, self.sf, nr_vm) + hgvs_alt_genomic = self.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, + no_norm_evm, self.vm, self.hp, hn, self.sf, self.nr_vm) if hgvs_alt_genomic.posedit.edit.type == 'identity': - re_c = vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) + re_c = self.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): shuffle_left_g = copy.copy(hgvs_alt_genomic) shuffle_left_g.posedit.edit.ref = '' @@ -7888,7 +7887,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) - re_c = vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + re_c = self.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): hgvs_alt_genomic = shuffle_left_g @@ -8202,7 +8201,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr build_from = 'GRCh37' # Liftover - lifted_response = lift_over(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, build_to, hn, vm, vr, self.hdp, hp, reverse_normalizer, self.sf, evm) + lifted_response = lift_over(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, build_to, hn, self.vm, self.vr, self.hdp, self.hp, reverse_normalizer, self.sf, evm) # Sort the respomse into primary assembly and ALT primary_assembly_loci = {} From 8312d35d4fc22c2e983c61d815957976813cc846 Mon Sep 17 00:00:00 2001 From: buran Date: Thu, 17 Jan 2019 11:01:55 +0000 Subject: [PATCH 008/223] Added some notes to the init function --- VariantValidator/modules/vvMixinInit.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 00adf6f3..38adb5b9 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -178,6 +178,10 @@ def __init__(self): #def validate(self): # <-------------- this is imported from the mixin class in vvCore. # pass + def myConfig(self): + #Returns configuration: + #version, hgvs version, uta schema, seqrepo db. + def createConfig(self,outPath): # This function reads from the default configuration file stored in the same folder as this module, # and transfers it to outPath. From f41064ece4e368c813e4e69829f76ce5b5299efd Mon Sep 17 00:00:00 2001 From: buran Date: Thu, 17 Jan 2019 19:52:06 +0000 Subject: [PATCH 009/223] Finished the conversion, now to test... --- VariantValidator/modules/vvLiftover.py | 8 +- VariantValidator/modules/vvMixinCore.py | 325 ++++++++++++------------ VariantValidator/modules/vvMixinInit.py | 4 +- 3 files changed, 169 insertions(+), 168 deletions(-) diff --git a/VariantValidator/modules/vvLiftover.py b/VariantValidator/modules/vvLiftover.py index 6cf991b8..c21b1016 100644 --- a/VariantValidator/modules/vvLiftover.py +++ b/VariantValidator/modules/vvLiftover.py @@ -12,8 +12,8 @@ import os import vvChromosomes import vvHGVS +from vvLogging import logger from pyliftover import LiftOver -import warnings from Bio.Seq import Seq # Pre compile variables @@ -85,7 +85,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no alt_build_to = 'GRCh38' # populate the variant from data - vcf = hgvs2vcf.report_hgvs2vcf(hgvs_genomic, build_from, reverse_normalizer, sf) + vcf = vvHGVS.report_hgvs2vcf(hgvs_genomic, build_from, reverse_normalizer, sf) # Create to and from dictionaries lifted_response[build_from.lower()] = {} @@ -250,7 +250,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no accession = vvChromosomes.to_accession(chr, lo_to) if accession is None: wrn = 'Unable to identify an equivalent %s chromosome ID for %s' % (str(lo_to), str(chr)) - warnings.warn(wrn) + logger.warning(wrn) continue else: not_delins = accession + ':g.' + str(pos) + '_' + str( @@ -259,7 +259,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no try: vr.validate(hgvs_not_delins) except hgvs.exceptions.HGVSError as e: - warnings.warn(str(e)) + logger.warning(str(e)) # Most likely incorrect bases continue else: diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index b5b35e4f..ebb4f93c 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -19,17 +19,16 @@ # IMPORT PYTHON MODULES import re import time -import datetime +#import datetime import copy import os import sys -import warnings from operator import itemgetter -from pyliftover import LiftOver +#from pyliftover import LiftOver import traceback -from configparser import ConfigParser +#from configparser import ConfigParser -from Bio.Seq import Seq +#from Bio.Seq import Seq # Import variantanalyser and peripheral VV modules #import ref_seq_type @@ -3985,9 +3984,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = str(e) chromosome_normalized_hgvs_coding = hgvs_coding - most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, - hgvs_genomic.ac, no_norm_evm, vm, hp, hn, self.sf, - nr_vm) + most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, + hgvs_genomic.ac, no_norm_evm, self.vm, self.hp, hn, self.sf, + self.nr_vm) hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) # Push from side to side to try pick up odd placements @@ -4007,7 +4006,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Generate an end position stash_end = str(stash_pos + len(stash_ref) - 1) # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_hgvs_not_delins = self.hp.parse_hgvs_variant( stash_ac + ':' + hgvs_stash.type + '.' + str( stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: @@ -4017,8 +4016,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Store a tx copy for later use test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, no_norm_evm, - vm, hp, hn, self.sf, nr_vm) + stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, no_norm_evm, + self.vm, self.hp, hn, self.sf, self.nr_vm) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) @@ -4049,7 +4048,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr reform_ident = str(test_stash_tx_right).split(':')[0] reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: @@ -4092,7 +4091,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Generate an end position stash_end = str(stash_pos + len(stash_ref) - 1) # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( + stash_hgvs_not_delins = self.hp.parse_hgvs_variant( stash_ac + ':' + hgvs_stash.type + '.' + str( stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: @@ -4102,8 +4101,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Store a tx copy for later use test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, no_norm_evm, - vm, hp, hn, self.sf, nr_vm) + stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, no_norm_evm, + self.vm, self.hp, hn, self.sf, self.nr_vm) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC @@ -4135,7 +4134,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr reform_ident = str(test_stash_tx_left).split(':')[0] reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) + hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: @@ -4167,8 +4166,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) try: - n_3pr = vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = vm.c_to_n(most_5pr_hgvs_transcript_variant) + n_3pr = self.vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = self.vm.c_to_n(most_5pr_hgvs_transcript_variant) except: n_3pr = most_3pr_hgvs_transcript_variant n_5pr = most_5pr_hgvs_transcript_variant @@ -4186,9 +4185,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ pr5_ref[1] # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = vm.t_to_g( + genomic_from_most_3pr_hgvs_transcript_variant = self.vm.t_to_g( most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = vm.t_to_g( + genomic_from_most_5pr_hgvs_transcript_variant = self.vm.t_to_g( most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) # Normalize - If the variant spans a gap it should then form a static genomic variant try: @@ -4223,7 +4222,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) try: @@ -4234,7 +4233,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( most_3pr_hgvs_transcript_variant_delins_from_dup) try: @@ -4245,7 +4244,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) try: @@ -4256,7 +4255,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( most_5pr_hgvs_transcript_variant_delins_from_dup) if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( @@ -4366,7 +4365,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pos = str(pos) # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) v = [chr, pos, ref, alt] @@ -4598,9 +4597,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, - nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, + self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) @@ -4615,9 +4614,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, - nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, + self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -4652,9 +4651,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, - nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, + self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -4667,9 +4666,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, - nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, + self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -4693,17 +4692,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if internal_possibility == '': continue - hgvs_t_possibility = vm.g_to_t(internal_possibility, hgvs_coding.ac) + hgvs_t_possibility = self.vm.g_to_t(internal_possibility, hgvs_coding.ac) if hgvs_t_possibility.posedit.edit.type == 'ins': try: - hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) + hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) except: fn.exceptPass() ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, hgvs_t_possibility.posedit.pos.start.base - 1, hgvs_t_possibility.posedit.pos.start.base + 1) try: - hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) + hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) except: fn.exceptPass() hgvs_t_possibility.posedit.edit.ref = ins_ref @@ -4731,7 +4730,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re_capture_tx_variant != []: try: - tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) + tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) except: tx_hgvs_not_delins = re_capture_tx_variant[2] disparity_deletion_in = re_capture_tx_variant[0:-1] @@ -4741,7 +4740,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # 'At hgvs_genomic' # Final sanity checks try: - vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': continue @@ -4770,9 +4769,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if disparity_deletion_in[0] == 'false' and ( possibility_counter == 3 or possibility_counter == 4): rg = reverse_normalizer.normalize(hgvs_not_delins) - rtx = vm.g_to_t(rg, tx_hgvs_not_delins.ac) + rtx = self.vm.g_to_t(rg, tx_hgvs_not_delins.ac) fg = hn.normalize(hgvs_not_delins) - ftx = vm.g_to_t(fg, tx_hgvs_not_delins.ac) + ftx = self.vm.g_to_t(fg, tx_hgvs_not_delins.ac) if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): exons = self.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) @@ -4786,7 +4785,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_genomic = fg hgvs_genomic_5pr = fg try: - tx_hgvs_not_delins = vm.c_to_n(ftx) + tx_hgvs_not_delins = self.vm.c_to_n(ftx) except Exception: tx_hgvs_not_delins = ftx disparity_deletion_in = ['transcript', 'Requires Analysis'] @@ -4800,7 +4799,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( tx_hgvs_not_delins.posedit.pos.start) + '_' + str( tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant( + tx_hgvs_not_delins = self.hp.parse_hgvs_variant( tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED @@ -4830,7 +4829,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( tx_gap_fill_variant.posedit.pos.start) + '_' + str( tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = hp.parse_hgvs_variant( + tx_gap_fill_variant = self.hp.parse_hgvs_variant( tx_gap_fill_variant_delins_from_dup) # Identify which half of the NOT-intron the start position of the variant is in @@ -4848,18 +4847,18 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant.posedit.edit.ref = '' try: - tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) + tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) except: fn.exceptPass() - genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, + genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, reverse_normalized_hgvs_genomic.ac) genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref try: - c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) + c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) except Exception: c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, + genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, hgvs_genomic_5pr.ac) # Ensure an ALT exists @@ -4871,12 +4870,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = hp.parse_hgvs_variant( + genomic_gap_fill_variant = self.hp.parse_hgvs_variant( genomic_gap_fill_variant_delins_from_dup) genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( genomic_gap_fill_variant_alt_delins_from_dup) # Correct insertion alts @@ -4936,7 +4935,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Add the new alt to the gap fill variant and generate transcript variant genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, + hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, tx_gap_fill_variant.ac) # Set warning @@ -4966,23 +4965,23 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # the transcript variant but do not have a position which actually hits the gap, # so the variant likely spans the gap, and is not picked up by an offset. try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins - g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) ng2 = hn.normalize(g2) g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( len(g3.posedit.edit.ref) - 1) try: - c2 = vm.g_to_t(g3, c1.ac) + c2 = self.vm.g_to_t(g3, c1.ac) if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: pass else: tx_hgvs_not_delins = c2 try: - tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) + tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) except hgvs.exceptions.HGVSError: fn.exceptPass() except hgvs.exceptions.HGVSInvalidVariantError: @@ -4998,7 +4997,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c2 = vm.n_to_c(tx_hgvs_not_delins) + c2 = self.vm.n_to_c(tx_hgvs_not_delins) except: c2 = tx_hgvs_not_delins c1 = copy.deepcopy(c2) @@ -5008,12 +5007,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c1.posedit.edit.ref = '' c1.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -5021,7 +5020,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -5045,7 +5044,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins c2 = copy.deepcopy(c1) @@ -5055,12 +5054,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c2.posedit.edit.ref = '' c2.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -5068,7 +5067,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -5090,7 +5089,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c2 = vm.n_to_c(tx_hgvs_not_delins) + c2 = self.vm.n_to_c(tx_hgvs_not_delins) except: c2 = tx_hgvs_not_delins c1 = copy.deepcopy(c2) @@ -5100,12 +5099,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c1.posedit.edit.ref = '' c1.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -5113,7 +5112,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -5137,7 +5136,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins c2 = copy.deepcopy(c1) @@ -5147,12 +5146,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c2.posedit.edit.ref = '' c2.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -5160,7 +5159,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -5237,10 +5236,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: continue # Update hgvs_genomic - hgvs_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, - no_norm_evm, vm, hp, hn, self.sf, nr_vm) + hgvs_genomic = self.va_func.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, + no_norm_evm, self.vm, self.hp, hn, self.sf, self.nr_vm) if hgvs_genomic.posedit.edit.type == 'identity': - re_c = vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) + re_c = self.vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): shuffle_left_g = copy.copy(hgvs_genomic) shuffle_left_g.posedit.edit.ref = '' @@ -5248,7 +5247,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) - re_c = vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + re_c = self.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): hgvs_genomic = shuffle_left_g @@ -5340,7 +5339,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pos = str(pos) # DO NOT DELETE - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( + stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) # Apply gap code to re-format hgvs_coding @@ -5348,7 +5347,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr saved_hgvs_coding = copy.deepcopy(hgvs_coding) # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = va_func.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + ori = self.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, alt_aln_method=alt_aln_method, hdp=self.hdp) orientation = int(ori[0]['alt_strand']) @@ -5521,8 +5520,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -5534,8 +5533,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -5571,8 +5570,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -5585,8 +5584,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, vm, hp, hn, self.sf, nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, + primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -5611,17 +5610,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if internal_possibility == '': continue - hgvs_t_possibility = vm.g_to_t(internal_possibility, hgvs_coding.ac) + hgvs_t_possibility = self.vm.g_to_t(internal_possibility, hgvs_coding.ac) if hgvs_t_possibility.posedit.edit.type == 'ins': try: - hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) + hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) except: fn.exceptPass() ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, hgvs_t_possibility.posedit.pos.start.base - 1, hgvs_t_possibility.posedit.pos.start.base + 1) try: - hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) + hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) except: fn.exceptPass() hgvs_t_possibility.posedit.edit.ref = ins_ref @@ -5648,7 +5647,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re_capture_tx_variant != []: try: - tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) + tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) except: tx_hgvs_not_delins = re_capture_tx_variant[2] disparity_deletion_in = re_capture_tx_variant[0:-1] @@ -5657,7 +5656,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Final sanity checks try: - vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': logger.warning(str(e)) @@ -5695,7 +5694,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( tx_hgvs_not_delins.posedit.pos.start) + '_' + str( tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + tx_hgvs_not_delins = self.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED @@ -5723,7 +5722,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( tx_gap_fill_variant.posedit.pos.start) + '_' + str( tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = hp.parse_hgvs_variant( + tx_gap_fill_variant = self.hp.parse_hgvs_variant( tx_gap_fill_variant_delins_from_dup) # Identify which half of the NOT-intron the start position of the variant is in @@ -5741,18 +5740,18 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant.posedit.edit.ref = '' try: - tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) + tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) except: fn.exceptPass() - genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, + genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, reverse_normalized_hgvs_genomic.ac) genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref try: - c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) + c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) except Exception: c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, + genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, hgvs_genomic_5pr.ac) # Ensure an ALT exists @@ -5764,12 +5763,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = hp.parse_hgvs_variant( + genomic_gap_fill_variant = self.hp.parse_hgvs_variant( genomic_gap_fill_variant_delins_from_dup) genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( genomic_gap_fill_variant_alt_delins_from_dup) # Correct insertion alts @@ -5827,7 +5826,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Add the new alt to the gap fill variant and generate transcript variant genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, + hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, tx_gap_fill_variant.ac) # Set warning @@ -5857,23 +5856,23 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # the transcript variant but do not have a position which actually hits the gap, # so the variant likely spans the gap, and is not picked up by an offset. try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins - g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) ng2 = hn.normalize(g2) g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( len(g3.posedit.edit.ref) - 1) try: - c2 = vm.g_to_t(g3, c1.ac) + c2 = self.vm.g_to_t(g3, c1.ac) if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: pass else: tx_hgvs_not_delins = c2 try: - tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) + tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) except hgvs.exceptions.HGVSError: fn.exceptPass() except hgvs.exceptions.HGVSInvalidVariantError: @@ -5888,7 +5887,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c2 = vm.n_to_c(tx_hgvs_not_delins) + c2 = self.vm.n_to_c(tx_hgvs_not_delins) except: c2 = tx_hgvs_not_delins c1 = copy.deepcopy(c2) @@ -5898,12 +5897,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c1.posedit.edit.ref = '' c1.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -5911,7 +5910,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -5932,7 +5931,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins c2 = copy.deepcopy(c1) @@ -5942,12 +5941,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c2.posedit.edit.ref = '' c2.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -5955,7 +5954,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -5976,7 +5975,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c2 = vm.n_to_c(tx_hgvs_not_delins) + c2 = self.vm.n_to_c(tx_hgvs_not_delins) except: c2 = tx_hgvs_not_delins c1 = copy.deepcopy(c2) @@ -5986,12 +5985,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c1.posedit.edit.ref = '' c1.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -5999,7 +5998,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -6020,7 +6019,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c1 = vm.n_to_c(tx_hgvs_not_delins) + c1 = self.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins c2 = copy.deepcopy(c1) @@ -6030,12 +6029,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c2.posedit.edit.ref = '' c2.posedit.edit.alt = '' if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -6043,7 +6042,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) + c3 = self.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -6144,7 +6143,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Given the difficulties with mapping to and from RefSeqGenes, we now solely rely on UTA if refseqgene_ac != '': - hgvs_refseq = vm.t_to_g(hgvs_coding, refseqgene_ac) + hgvs_refseq = self.vm.t_to_g(hgvs_coding, refseqgene_ac) # Normalize the RefSeqGene Variant to the correct position try: hgvs_refseq = hn.normalize(hgvs_refseq) @@ -6163,7 +6162,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_refseq_ac = 'RefSeqGene record not available' # Predicted effect on protein - protein_dict = va_func.myc_to_p(hgvs_coding, evm, self.hdp, hp, hn, vm, self.sf, re_to_p=False) + protein_dict = self.myc_to_p(hgvs_coding, evm, self.hdp, self.hp, hn, self.vm, self.sf, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) @@ -6178,7 +6177,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # Gene orientation wrt genome - ori = va_func.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, + ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, alt_aln_method=alt_aln_method, hdp=self.hdp) ori = int(ori[0]['alt_strand']) @@ -6225,12 +6224,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except NotImplementedError: pass try: - c_for_p = vm.g_to_t(rng, hgvs_coding.ac) + c_for_p = self.vm.g_to_t(rng, hgvs_coding.ac) except hgvs.exceptions.HGVSInvalidIntervalError as e: c_for_p = seek_var try: # Predicted effect on protein - protein_dict = va_func.myc_to_p(c_for_p, evm, self.hdp, hp, hn, vm, self.sf, re_to_p=False) + protein_dict = self.myc_to_p(c_for_p, evm, self.hdp, self.hp, hn, self.vm, self.sf, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) @@ -6296,14 +6295,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # 5 prime # < # Result, normalize of new variant will be happy - c_for_p = vm.g_to_t(rng, hgvs_coding.ac) + c_for_p = self.vm.g_to_t(rng, hgvs_coding.ac) try: hn.normalize(c_for_p) except hgvs.exceptions.HGVSError as e: fn.exceptPass() else: # hgvs_protein = va_func.protein(str(c_for_p), evm, hp) - protein_dict = va_func.myc_to_p(c_for_p, evm, self.hdp, hp, hn, vm, self.sf, + protein_dict = self.myc_to_p(c_for_p, evm, self.hdp, self.hp, hn, self.vm, self.sf, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] @@ -6341,7 +6340,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_updated = copy.deepcopy(hgvs_coding) hgvs_updated.ac = update try: - vr.validate(hgvs_updated) + self.vr.validate(hgvs_updated) # Updated reference sequence except hgvs.exceptions.HGVSError as e: error = str(e) @@ -6349,7 +6348,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr match = re.findall('\(([GATC]+)\)', error) new_ref = match[1] hgvs_updated.posedit.edit.ref = new_ref - vr.validate(hgvs_updated) + self.vr.validate(hgvs_updated) updated_transcript_variant = hgvs_updated else: pass @@ -6424,7 +6423,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # genomic accession if genomic_variant != '': - hgvs_genomic_variant = hp.parse_hgvs_variant(genomic_variant) + hgvs_genomic_variant = self.hp.parse_hgvs_variant(genomic_variant) genomic_variant = fn.valstr(hgvs_genomic_variant) genomic_accession = hgvs_genomic_variant.ac else: @@ -6439,8 +6438,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr lrg_variant = '' hgvs_refseqgene_variant = 'false' else: - hgvs_refseqgene_variant = hp.parse_hgvs_variant(refseqgene_variant) - rsg_ac = va_dbCrl.data.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) + hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) + rsg_ac = self.db.get.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) if rsg_ac[0] == 'none': lrg_variant = '' else: @@ -6461,14 +6460,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_variant = tx_variant.replace(')', '') # transcript accession - hgvs_tx_variant = hp.parse_hgvs_variant(tx_variant) + hgvs_tx_variant = self.hp.parse_hgvs_variant(tx_variant) tx_variant = fn.valstr(hgvs_tx_variant) - hgvs_transcript_variant = hp.parse_hgvs_variant(tx_variant) + hgvs_transcript_variant = self.hp.parse_hgvs_variant(tx_variant) transcript_accession = hgvs_transcript_variant.ac # Handle LRG lrg_status = 'public' - lrg_transcript = va_dbCrl.data.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) + lrg_transcript = self.db.get.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) if lrg_transcript == 'none': lrg_transcript_variant = '' else: @@ -6479,7 +6478,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # if not re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant != '': # if hgvs_refseqgene_variant != 'RefSeqGene record not available' and hgvs_refseqgene_variant != 'false': try: - hgvs_lrg_t = vm.g_to_t(hgvs_refseqgene_variant, transcript_accession) + hgvs_lrg_t = self.vm.g_to_t(hgvs_refseqgene_variant, transcript_accession) hgvs_lrg_t.ac = lrg_transcript lrg_transcript_variant = fn.valstr(hgvs_lrg_t) except: @@ -6497,21 +6496,21 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if transcript_accession != '' and genomic_accession != '': # Remove del bases str_transcript = fn.valstr(hgvs_transcript_variant) - hgvs_transcript_variant = hp.parse_hgvs_variant(str_transcript) + hgvs_transcript_variant = self.hp.parse_hgvs_variant(str_transcript) try: - vr.validate(hgvs_transcript_variant) + self.vr.validate(hgvs_transcript_variant) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('intronic variant', error): genome_context_transcript_variant = genomic_accession + '(' + transcript_accession + '):c.' + str( hgvs_transcript_variant.posedit) if refseqgene_variant != '': - hgvs_refseqgene_variant = hp.parse_hgvs_variant(refseqgene_variant) + hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) refseqgene_accession = hgvs_refseqgene_variant.ac - hgvs_coding_from_refseqgene = vm.g_to_t(hgvs_refseqgene_variant, + hgvs_coding_from_refseqgene = self.vm.g_to_t(hgvs_refseqgene_variant, hgvs_transcript_variant.ac) hgvs_coding_from_refseqgene = fn.valstr(hgvs_coding_from_refseqgene) - hgvs_coding_from_refseqgene = hp.parse_hgvs_variant(hgvs_coding_from_refseqgene) + hgvs_coding_from_refseqgene = self.hp.parse_hgvs_variant(hgvs_coding_from_refseqgene) RefSeqGene_context_transcript_variant = refseqgene_accession + '(' + transcript_accession + '):c.' + str( hgvs_coding_from_refseqgene.posedit.pos) + str( hgvs_coding_from_refseqgene.posedit.edit) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 38adb5b9..b4f750a0 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -181,7 +181,9 @@ def __init__(self): def myConfig(self): #Returns configuration: #version, hgvs version, uta schema, seqrepo db. - + return { + "Seqrepo path":self.seqrepoPath + } def createConfig(self,outPath): # This function reads from the default configuration file stored in the same folder as this module, # and transfers it to outPath. From bab8e702e5b2e63e37b957f6f08abfb8c7ea09b4 Mon Sep 17 00:00:00 2001 From: buran Date: Thu, 17 Jan 2019 20:08:10 +0000 Subject: [PATCH 010/223] Corrected some code errors --- VariantValidator/modules/vvFunctions.py | 3 +++ VariantValidator/modules/vvMixinConverters.py | 6 +++--- VariantValidator/modules/vvMixinCore.py | 6 +++--- VariantValidator/modules/vvObjects.py | 3 --- VariantValidator/testing/vvTestSave.py | 19 ------------------- 5 files changed, 9 insertions(+), 28 deletions(-) diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index 85c6b83f..73a26b1a 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -453,3 +453,6 @@ def n_inversion(ref_seq, del_seq, inv_seq, interval_start, interval_end): return sequence +# Custom Exceptions +class VariantValidatorError(Exception): + pass diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index c73ed0a2..fbce2977 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -2233,7 +2233,7 @@ def hgvs_alleles(self, variant_description): remainder = ';'.join(pre_merges) remainder = remainder[1:-1] # removes the first [ and the last ] alleles = remainder.split('];[') - # now separate out the variants in each allele§ + # now separate out the variants in each allele | for posedits in alleles: posedit_list = posedits.split(';') current_allele = [] @@ -2261,7 +2261,7 @@ def hgvs_alleles(self, variant_description): # NM_004006.2:c.2376G>C(;)3103del # NM_000548.3:c.3623_3647del(;)3745_3756dup alleles = remainder.split('(;)') - # now separate out the variants in each allele§ + # now separate out the variants in each allele | my_alleles = [] for posedits in alleles: posedit_list = posedits.split(';') @@ -2280,7 +2280,7 @@ def hgvs_alleles(self, variant_description): # NM_000548.3:c.[4358_4359del;4361_4372del] remainder = remainder[1:-1] # removes the first [ and the last ] alleles = remainder.split('];[') - # now separate out the variants in each allele§ + # now separate out the variants in each allele | my_alleles = [] for posedits in alleles: posedit_list = posedits.split(';') diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index ebb4f93c..a3a036f1 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -51,7 +51,7 @@ import vvDatabase import vvChromosomes import vvMixinConverters -from vvObjects import variantValidatorError +from vvFunctions import VariantValidatorError class Mixin(vvMixinConverters.Mixin): @@ -852,7 +852,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning('Intronic positions not supported for HGVS Allele descriptions') continue else: - raise variantValidatorError(str(e)) + raise VariantValidatorError(str(e)) logger.trace("HVGS String allele parsing pass 1 complete", validation) # INITIAL USER INPUT FORMATTING """ @@ -8267,7 +8267,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # tr = ''.join(traceback.format_stack()) tbk = [str(exc_type), str(exc_value), str(te)] er = '\n'.join(tbk) - # raise variantValidatorError('Validation error') + # raise VariantValidatorError('Validation error') # Return # return logger.critical(str(exc_type) + " " + str(exc_value)) diff --git a/VariantValidator/modules/vvObjects.py b/VariantValidator/modules/vvObjects.py index ff680278..20e64cb0 100644 --- a/VariantValidator/modules/vvObjects.py +++ b/VariantValidator/modules/vvObjects.py @@ -22,6 +22,3 @@ class ValOutput(): class Validator(vvMixinCore.Mixin): pass -# Custom Exceptions -class variantValidatorError(Exception): - pass diff --git a/VariantValidator/testing/vvTestSave.py b/VariantValidator/testing/vvTestSave.py index 6e014754..53f20893 100644 --- a/VariantValidator/testing/vvTestSave.py +++ b/VariantValidator/testing/vvTestSave.py @@ -1,27 +1,8 @@ #Saving script import vvTestFunctions as fn -import sys -from StringIO import StringIO -import sqlite3 import os -class vvHub(): - #Variant validator configuration hub object - def __init__(self): - seqrepo_current_version='2018-08-21' - HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version - os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR - self.hvgsSeqrepoPath=HGVS_SEQREPO_DIR - uta_current_version='uta_20180821' - UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version - os.environ['UTA_DB_URL']=UTA_DB_URL - self.utaPath=UTA_DB_URL - import VariantValidator.variantanalyser.vvLogging as vvLogging - self.logger=vvLogging.logger - from VariantValidator import variantValidator as vv - self.vv=vv - self.vv.my_config() hub=vvHub() From 26d1b94af8720c809740e92f124205446b8dc4a4 Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 10:23:59 +0000 Subject: [PATCH 011/223] Bugfixing --- VariantValidator/modules/vvFunctions.py | 18 +++++---------- VariantValidator/modules/vvMixinConverters.py | 6 ++--- VariantValidator/modules/vvMixinCore.py | 22 ++++++++++--------- VariantValidator/modules/vvMixinInit.py | 4 +--- VariantValidator/testing/vvTestSave.py | 3 ++- 5 files changed, 24 insertions(+), 29 deletions(-) diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index 73a26b1a..0ee5b480 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -18,19 +18,13 @@ def handleCursor(func): #Decorator function for handling opening and closing cursors. @functools.wraps(func) def wrapper(self,*args,**kwargs): - try: - self.cursor = self.conn.cursor(buffered=True) - out=func(*args,**kwargs) + self.connection=self.conn.get_connection() + self.cursor = self.connection.cursor(buffered=True) + out=func(self,*args,**kwargs) + if self.cursor: self.cursor.close() - self.cursor=None - return out - except: - try: - self.cursor.close() - self.cursor=None - except: - self.cursor=None - raise + #self.cursor=None + return out return wrapper def hgnc_rest(path): diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index fbce2977..f56013d8 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -1683,7 +1683,7 @@ def ng_extract(self, tx_for_gene): """ - def tx_exons(self, tx_ac, alt_ac, alt_aln_method, hdpOld): + def tx_exons(self, tx_ac, alt_ac, alt_aln_method): # Interface with the UTA database via get_tx_exons in uta.py try: tx_exons = self.hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) @@ -1709,7 +1709,7 @@ def tx_exons(self, tx_ac, alt_ac, alt_aln_method, hdpOld): """ - def relevant_transcripts(self, hgvs_genomic, evm, hdpOld, alt_aln_method): + def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method,reverse_normalizer): # Pass relevant transcripts for the input variant to rts # Note, the evm method misses one end, the hdp. method misses the other. Combine both rts_list = self.hdp.get_tx_for_region(hgvs_genomic.ac, alt_aln_method, hgvs_genomic.posedit.pos.start.base-1, hgvs_genomic.posedit.pos.end.base-1) @@ -1779,7 +1779,7 @@ def relevant_transcripts(self, hgvs_genomic, evm, hdpOld, alt_aln_method): pass else: # Reverse normalize hgvs_genomic - rev_hgvs_genomic = self.reverse_hn.normalize(hgvs_genomic) + rev_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) # map back to coding variant = evm.g_to_t(rev_hgvs_genomic, tx_ac) code_var.append(str(variant)) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index a3a036f1..6e3fa173 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -1781,7 +1781,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue else: # Any transcripts? - rel_var = self.relevant_transcripts(hgvs_mito, evm, self.hdp, alt_aln_method, reverse_normalizer) + rel_var = self.relevant_transcripts(hgvs_mito, evm, alt_aln_method, reverse_normalizer) hgvs_genomic = copy.deepcopy(hgvs_mito) if len(rel_var) == 0: validation['genomic_g'] = fn.valstr(hgvs_mito) @@ -2062,7 +2062,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr Initial simple projection from the provided g. position all overlapping transcripts """ - rel_var = self.relevant_transcripts(hgvs_genomic, evm, self.hdp, alt_aln_method, reverse_normalizer) + rel_var = self.relevant_transcripts(hgvs_genomic, evm, alt_aln_method, reverse_normalizer) # Double check rel_vars have not been missed when mapping from a RefSeqGene if len(rel_var) != 0 and re.match('NG_', str(hgvs_genomic.ac)): @@ -2074,7 +2074,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSError as e: try_rel_var = [] else: - try_rel_var = self.relevant_transcripts(hgvs_genomic, evm, self.hdp, alt_aln_method, + try_rel_var = self.relevant_transcripts(hgvs_genomic, evm, alt_aln_method, reverse_normalizer) if len(try_rel_var) > len(rel_var): rel_var = try_rel_var @@ -2089,7 +2089,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr int(vcf_dict['pos']) + (len(vcf_dict['ref']) - 1)) + 'del' + vcf_dict['ref'] + 'ins' + \ vcf_dict['alt'] hgvs_not_di = self.hp.parse_hgvs_variant(not_di) - rel_var = self.relevant_transcripts(hgvs_not_di, evm, self.hdp, alt_aln_method, + rel_var = self.relevant_transcripts(hgvs_not_di, evm, alt_aln_method, reverse_normalizer) # list return statements @@ -2264,7 +2264,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Get orientation of the gene wrt genome and a list of exons mapped to the genome ori = self.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, - alt_aln_method=alt_aln_method, hdp=self.hdp) + alt_aln_method=alt_aln_method) orientation = int(ori[0]['alt_strand']) intronic_variant = 'false' @@ -3198,7 +3198,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = self.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=alt_aln_method, hdp=self.hdp) + ori = self.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=alt_aln_method) orientation = int(ori[0]['alt_strand']) intronic_variant = 'false' @@ -5348,7 +5348,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Get orientation of the gene wrt genome and a list of exons mapped to the genome ori = self.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, - alt_aln_method=alt_aln_method, hdp=self.hdp) + alt_aln_method=alt_aln_method) orientation = int(ori[0]['alt_strand']) # Look for normalized variant options that do not match hgvs_coding @@ -6178,7 +6178,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Gene orientation wrt genome ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, - alt_aln_method=alt_aln_method, hdp=self.hdp) + alt_aln_method=alt_aln_method) ori = int(ori[0]['alt_strand']) # Look for normalized variant options that do not match hgvs_coding @@ -6384,7 +6384,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr er = str('\n'.join(tbk)) logger.error(str(exc_type) + " " + str(exc_value)) logger.debug(er) - + #debug + raise continue # Outside the for loop @@ -6596,7 +6597,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: # Re set ori ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, - alt_aln_method=alt_aln_method, hdp=self.hdp) + alt_aln_method=alt_aln_method) orientation = int(ori[0]['alt_strand']) hgvs_alt_genomic = self.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, self.vm, self.hp, hn, self.sf, self.nr_vm) @@ -8271,4 +8272,5 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Return # return logger.critical(str(exc_type) + " " + str(exc_value)) + raise logger.debug(str(er)) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index b4f750a0..83a071f2 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -132,7 +132,7 @@ def __init__(self): self.sf = hgvs.dataproviders.seqfetcher.SeqFetcher() # Seqfetcher # Set standard genome builds self.genome_builds = ['GRCh37', 'hg19', 'GRCh38'] - self.uta_schema = str(self.hdp.data_version()) + self.utaSchema = str(self.hdp.data_version()) # Create normalizer self.hn = hgvs.normalizer.Normalizer(self.hdp, @@ -176,8 +176,6 @@ def __init__(self): - #def validate(self): # <-------------- this is imported from the mixin class in vvCore. - # pass def myConfig(self): #Returns configuration: #version, hgvs version, uta schema, seqrepo db. diff --git a/VariantValidator/testing/vvTestSave.py b/VariantValidator/testing/vvTestSave.py index 53f20893..506f5aeb 100644 --- a/VariantValidator/testing/vvTestSave.py +++ b/VariantValidator/testing/vvTestSave.py @@ -6,4 +6,5 @@ hub=vvHub() -fn.generateTestFolder("testOutputs","inputVariants.txt",hub) \ No newline at end of file + +fn.generateTestFolder("testOutputsPreRework","inputVariants.txt",hub) \ No newline at end of file From ebbc891f21cd55e849194b24662acef76f8dda55 Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 12:01:11 +0000 Subject: [PATCH 012/223] Re-entered the myevm_h_to_g function, fixed database connections --- VariantValidator/modules/vvDBGet.py | 14 +- VariantValidator/modules/vvDBInsert.py | 45 ++-- VariantValidator/modules/vvDatabase.py | 9 +- VariantValidator/modules/vvFunctions.py | 16 +- VariantValidator/modules/vvMixinConverters.py | 222 ++++++++++-------- VariantValidator/modules/vvMixinCore.py | 155 ++++++------ VariantValidator/modules/vvMixinInit.py | 9 +- VariantValidator/testing/vvTestFunctions.py | 4 +- VariantValidator/testing/vvTestSave.py | 7 +- 9 files changed, 245 insertions(+), 236 deletions(-) diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index 4870cd21..b5d79632 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -2,23 +2,21 @@ from vvLogging import logger class vvDBGet: - def __init__(self,conn,cursor): + def __init__(self,db): # These are inherited by reference from the vvDatabase object. - self.conn=conn - self.cursor=cursor - + self.db=db @handleCursor def execute(self,query): - self.cursor.execute(query) - row = self.cursor.fetchone() + self.db.cursor.execute(query) + row = self.db.cursor.fetchone() if row is None: logger.debug("No data returned from query "+str(query)) row = ['none', 'No data'] return row @handleCursor def executeAll(self,query): - self.cursor.execute(query) - rows = self.cursor.fetchone() + self.db.cursor.execute(query) + rows = self.db.cursor.fetchone() if rows==[]: logger.debug("No data returned from query "+str(query)) row = ['none', 'No data'] diff --git a/VariantValidator/modules/vvDBInsert.py b/VariantValidator/modules/vvDBInsert.py index 3bcd323b..6a551cc7 100644 --- a/VariantValidator/modules/vvDBInsert.py +++ b/VariantValidator/modules/vvDBInsert.py @@ -1,10 +1,9 @@ -from vvDatabase import handleCursor +from vvFunctions import handleCursor class vvDBInsert: - def __init__(self,conn,cursor): + def __init__(self,db): # These are inherited by reference from the vvDatabase object. - self.conn=conn - self.cursor=cursor + self.db=db # Add new entry def add_entry(self,entry, data, table): return self.insert(entry, data, table) @@ -23,65 +22,65 @@ def insert(self,entry, data, table): hgnc_symbol = data[4] uta_symbol = data[5] query = "INSERT INTO transcript_info(refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated) VALUES(%s,%s, %s, %s, %s, %s, NOW())" - self.cursor.execute(query, (accession, description, variant, version, hgnc_symbol, uta_symbol)) + self.db.cursor.execute(query, (accession, description, variant, version, hgnc_symbol, uta_symbol)) # Query report - if self.cursor.lastrowid: + if self.db.cursor.lastrowid: success = 'true' else: success = 'Unknown error' # Commit and close connection - self.conn.commit() + self.db.conn.commit() return success @handleCursor def insert_refSeqGene_data(self,rsg_data): query = "INSERT INTO refSeqGene_loci(refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, orientation, totalLength, chrPos, rsgPos, entrezID, hgncSymbol, updated) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())" - self.cursor.execute(query, (rsg_data[0], rsg_data[1], rsg_data[2], rsg_data[3], rsg_data[4], rsg_data[5], rsg_data[6], rsg_data[7], rsg_data[8], rsg_data[9], rsg_data[10])) + self.db.cursor.execute(query, (rsg_data[0], rsg_data[1], rsg_data[2], rsg_data[3], rsg_data[4], rsg_data[5], rsg_data[6], rsg_data[7], rsg_data[8], rsg_data[9], rsg_data[10])) # Query report - if self.cursor.lastrowid: + if self.db.cursor.lastrowid: success = 'true' else: success = 'Unknown error' # Commit and close connection - self.conn.commit() + self.db.conn.commit() return success @handleCursor def insert_RefSeqGeneID_from_lrgID(self,lrg_rs_lookup): query = "INSERT INTO LRG_RSG_lookup(lrgID, hgncSymbol, RefSeqGeneID, status) VALUES(%s,%s,%s,%s)" - self.cursor.execute(query, (lrg_rs_lookup[0], lrg_rs_lookup[1], lrg_rs_lookup[2], lrg_rs_lookup[3])) + self.db.cursor.execute(query, (lrg_rs_lookup[0], lrg_rs_lookup[1], lrg_rs_lookup[2], lrg_rs_lookup[3])) # Query report - if self.cursor.lastrowid: + if self.db.cursor.lastrowid: success = 'true' else: success = 'Unknown error' # Commit and close connection - self.conn.commit() + self.db.conn.commit() return success @handleCursor def insert_LRG_transcript_data(self,lrgtx_to_rstID): query = "INSERT INTO LRG_transcripts(LRGtranscriptID, RefSeqTranscriptID) VALUES(%s,%s)" - self.cursor.execute(query, (lrgtx_to_rstID[0], lrgtx_to_rstID[1])) + self.db.cursor.execute(query, (lrgtx_to_rstID[0], lrgtx_to_rstID[1])) # Query report - if self.cursor.lastrowid: + if self.db.cursor.lastrowid: success = 'true' else: success = 'Unknown error' # Commit and close connection - self.conn.commit() + self.db.conn.commit() return success @handleCursor def insert_LRG_protein_data(self,lrg_p, rs_p): query = "INSERT INTO LRG_proteins(LRGproteinID, RefSeqProteinID) VALUES(%s,%s)" - self.cursor.execute(query, (lrg_p, rs_p)) + self.db.cursor.execute(query, (lrg_p, rs_p)) # Query report - if self.cursor.lastrowid: + if self.db.cursor.lastrowid: success = 'true' else: success = 'Unknown error' # Commit and close connection - self.conn.commit() + self.db.conn.commit() return success # from dbupdate @handleCursor @@ -95,17 +94,17 @@ def update(self,entry, data, table): hgnc_symbol = data[4] uta_symbol = data[5] query = "UPDATE transcript_info SET description=%s, transcriptVariant=%s, currentVersion=%s, hgncSymbol=%s, utaSymbol=%s, updated=NOW() WHERE refSeqID = %s" - self.cursor.execute(query, (description, variant, version, hgnc_symbol, uta_symbol, accession)) + self.db.cursor.execute(query, (description, variant, version, hgnc_symbol, uta_symbol, accession)) success = 'true' - self.conn.commit() + self.db.conn.commit() return success # 'true'??? check this. @handleCursor def update_refSeqGene_data(self,rsg_data): query = "UPDATE refSeqGene_loci SET hgncSymbol=%s, updated=NOW() WHERE refSeqGeneID=%s" - self.cursor.execute(query, (rsg_data[10], rsg_data[0])) + self.db.cursor.execute(query, (rsg_data[10], rsg_data[0])) success = 'true' - self.conn.commit() + self.db.conn.commit() return success # Update entries def update_entry(self,entry, data, table): diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 79b246fc..011c8e4b 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -14,8 +14,8 @@ class vvDatabase: # This class contains and handles the mysql connections for the variant validator database. def __init__(self,val,dbConfig): - self.conn = mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **dbConfig) - # self.cursor will be none UNLESS you're wrapping a function in @handlecursor, which automatically opens and + self.conn = None + # self.cursor will be none UNLESS you're wrapping a function in @handleCursor, which automatically opens and # closes connections for you. self.cursor=None self.dbConfig=dbConfig @@ -24,8 +24,9 @@ def __init__(self,val,dbConfig): self.path="mysqlx://"+dbConfig["user"]+":"+dbConfig["password"]+"@"+dbConfig["host"]+"/"+dbConfig["database"] os.environ["VALIDATOR_DB_URL"]=self.path self.val=val - self.insert = vvDBInsert(self.conn,self.cursor) # contains dbinsert, dbupdate - self.get = vvDBGet(self.conn,self.cursor) # contains dbfetchone, dbfetchall + self.insert = vvDBInsert(self) # contains dbinsert, dbupdate + self.get = vvDBGet(self) # contains dbfetchone, dbfetchall + self.db=self #needed to make handlecursor behave # from dbquery @handleCursor diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index 0ee5b480..a39fdb69 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -10,7 +10,7 @@ from vvLogging import logger import re import copy - +import mysql #from urllib.parse import urlparse #Python 3 @@ -18,15 +18,21 @@ def handleCursor(func): #Decorator function for handling opening and closing cursors. @functools.wraps(func) def wrapper(self,*args,**kwargs): - self.connection=self.conn.get_connection() - self.cursor = self.connection.cursor(buffered=True) + self.db.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.db.dbConfig) + self.db.conn=self.db.pool.get_connection() + self.db.cursor = self.db.conn.cursor(buffered=True) out=func(self,*args,**kwargs) - if self.cursor: - self.cursor.close() + if self.db.cursor: + self.db.cursor.close() + if self.db.conn: + self.db.conn.close() #self.cursor=None return out return wrapper + + + def hgnc_rest(path): data = { 'record': '', diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index f56013d8..2c9fc705 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -233,7 +233,7 @@ def hgvs_genomic(self, variant, hpOld): """ - def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): + def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): # create no_norm_evm if primary_assembly == 'GRCh38': no_norm_evm = self.no_norm_evm_38 @@ -264,7 +264,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): # Check for intronic try: - self.hn.normalize(hgvs_c) + hn.normalize(hgvs_c) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('intronic variant', error): @@ -344,7 +344,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): # Ensure the altered c. variant has not crossed intro exon boundaries hgvs_check_boundaries = copy.deepcopy(hgvs_c) try: - h_variant = self.hn.normalize(hgvs_check_boundaries) + h_variant = hn.normalize(hgvs_check_boundaries) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('spanning the exon-intron boundary', error): @@ -355,14 +355,14 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str(hgvs_c.posedit.edit.ref)# + 'ins' + str(hgvs_c.posedit.edit.alt) hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: - self.hn.normalize(hgvs_reform_ident) + hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('spanning the exon-intron boundary', error) or re.search('Normalization of intronic variants', error): hgvs_c = copy.deepcopy(stored_hgvs_c) try: hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) - self.hn.normalize(hgvs_genomic) # Check the validity of the mapping + hn.normalize(hgvs_genomic) # Check the validity of the mapping # This will fail on multiple refs for NC_ except hgvs.exceptions.HGVSError as e: # Recover all available mapping options from UTA @@ -389,7 +389,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): # If not mapped, raise error try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): @@ -406,7 +406,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): print e continue try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): @@ -420,7 +420,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): print e continue try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): @@ -435,7 +435,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): continue # Only a RefSeqGene available try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): @@ -458,20 +458,20 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: try: - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': ref = self.sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) hgvs_genomic.posedit.edit.ref = ref hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) if error == 'base start position must be <= end position': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base hgvs_genomic.posedit.pos.start.base = end hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) # Statements required to reformat the stored_hgvs_c into a useable synonym if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': @@ -494,7 +494,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): if expand_out == 'true': nr_genomic = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) try: - self.hn.normalize(nr_genomic) + hn.normalize(nr_genomic) except hgvs.exceptions.HGVSInvalidVariantError as e: if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str(e) == 'base start position must be <= end position': # Effectively, this code is designed to handle variants that are directly proximal to @@ -506,7 +506,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): logger.warning('Variant is proximal to the flank of a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: - self.hn.normalize(genomic_gap_variant) + hn.normalize(genomic_gap_variant) except Exception: pass else: @@ -520,7 +520,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): # We have checked that the variant does not cross boundaries, or is intronic # So is likely mapping to a genomic gap try: - self.hn.normalize(genomic_gap_variant) + hn.normalize(genomic_gap_variant) except Exception as e: if str(e) == 'base start position must be <= end position': # This will only happen when the variant is fully within the gap @@ -547,12 +547,12 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): pass # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = self.hn.normalize(genomic_gap_variant) + genomic_gap_variant = hn.normalize(genomic_gap_variant) # Static map to c. and static normalize transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) stored_transcript_gap_variant = transcript_gap_variant if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - transcript_gap_variant = self.hn.normalize(transcript_gap_variant) + transcript_gap_variant = hn.normalize(transcript_gap_variant) # if NM_ need the n. position if re.match('NM_', str(hgvs_c.ac)): @@ -622,7 +622,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): try: hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) except Exception as e: if str(e) == "base start position must be <= end position": # Expansion out is required to map back to the genomic position @@ -637,7 +637,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): except: transcript_gap_variant = transcript_gap_n hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) # Bypass the next bit of gap code expand_out = 'false' @@ -666,7 +666,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): elif expand_out == 'true' and ( len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: - gn = self.hn.normalize(hgvs_genomic) + gn = hn.normalize(hgvs_genomic) pass # Likely if the start or end position aligns to a gap in the genomic sequence @@ -677,7 +677,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): # Incorrect expansion, likely < ref + 2 genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: - self.hn.normalize(genomic_gap_variant) + hn.normalize(genomic_gap_variant) except Exception as e: if str(e) == 'base start position must be <= end position': gap_start = genomic_gap_variant.posedit.pos.end.base @@ -691,11 +691,11 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): if str(e) == "'Dup' object has no attribute 'alt'": pass # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = self.hn.normalize(genomic_gap_variant) + genomic_gap_variant = hn.normalize(genomic_gap_variant) # Static map to c. and static normalize transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) stored_transcript_gap_variant = transcript_gap_variant - transcript_gap_variant = self.hn.normalize(transcript_gap_variant) + transcript_gap_variant = hn.normalize(transcript_gap_variant) # if NM_ need the n. position if re.match('NM_', str(hgvs_c.ac)): transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) @@ -769,7 +769,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): try: hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) except Exception as e: if str(e) == "base start position must be <= end position": # Expansion out is required to map back to the genomic position @@ -786,12 +786,12 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): except: transcript_gap_variant = transcript_gap_n hgvs_genomic = self.vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = self.hn.normalize(hgvs_genomic) + hgvs_genomic = hn.normalize(hgvs_genomic) # Ins variants map badly - Especially between c. exon/exon boundary if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': @@ -830,7 +830,7 @@ def myevm_t_to_g(self, hgvs_c, evm, hdpOld, primary_assembly): def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn, hpOld, sfOld, no_norm_evm): try: hgvs_genomic = evm.t_to_g(hgvs_c) - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) # This will fail on multiple refs for NC_ except hgvs.exceptions.HGVSError as e: # Recover all available mapping options from UTA @@ -856,7 +856,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn # If not mapped, raise error try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): @@ -875,7 +875,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn # If not mapped, raise error try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): @@ -896,7 +896,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn print e continue try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): @@ -915,7 +915,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn print e continue try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): @@ -935,7 +935,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn print e continue try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): @@ -954,7 +954,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn print e continue try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): @@ -975,7 +975,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn # Only a RefSeqGene available try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): @@ -998,7 +998,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn # Ins variants map badly - Especially between c. exon/exon boundary if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: try: - self.hn.normalize(hgvs_genomic) + hn.normalize(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': @@ -1031,13 +1031,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn """ - def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): - # create no_norm_evm - if primary_assembly == 'GRCh38': - no_norm_evm = self.no_norm_evm_38 - elif primary_assembly == 'GRCh37': - no_norm_evm = self.no_norm_evm_37 - + def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # store the input stored_hgvs_c = copy.deepcopy(hgvs_c) expand_out = 'false' @@ -1054,7 +1048,8 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): # Warn gap code in use logger.warning("gap_compensation_mvm = " + str(utilise_gap_code)) - if utilise_gap_code is True and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type =='delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): + if utilise_gap_code is True and ( + hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): # if NM_ need the n. position if re.match('NM_', str(hgvs_c.ac)): @@ -1062,7 +1057,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): # Check for intronic try: - self.hn.normalize(hgvs_c) + hn.normalize(hgvs_c) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('intronic variant', error): @@ -1072,7 +1067,8 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 # Check again before continuing - if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search('\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): + if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search( + '\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): pass else: @@ -1082,10 +1078,13 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): # handle inversions if hgvs_t.posedit.edit.type == 'inv': inv_alt = self.revcomp(hgvs_t.posedit.edit.ref) - t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( + hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt hgvs_t_delins = self.hp.parse_hgvs_variant(t_delins) - pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + pre_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, + hgvs_t.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, + hgvs_t.posedit.pos.end.base + 1) hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base inv_alt = pre_base + inv_alt + post_base hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 @@ -1095,26 +1094,36 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): end = hgvs_t.posedit.pos.end.base hgvs_t.posedit.pos.start.base = start hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str( + end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt hgvs_t = self.hp.parse_hgvs_variant(hgvs_str) if hgvs_c.posedit.edit.type == 'dup': # hgvs_t = reverse_normalize.normalize(hgvs_t) - pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + pre_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, + hgvs_t.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, + hgvs_t.posedit.pos.end.base + 1) alt = pre_base + hgvs_t.posedit.edit.ref + hgvs_t.posedit.edit.ref + post_base ref = pre_base + hgvs_t.posedit.edit.ref + post_base - dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str((hgvs_t.posedit.pos.start.base + len(ref)) -2) + 'del' + ref + 'ins' + alt + dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str( + hgvs_t.posedit.pos.start.base - 1) + '_' + str( + (hgvs_t.posedit.pos.start.base + len(ref)) - 2) + 'del' + ref + 'ins' + alt hgvs_t = self.hp.parse_hgvs_variant(dup_to_delins) elif hgvs_c.posedit.edit.type == 'ins': - ins_ref = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.end.base+1) + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, + hgvs_t.posedit.pos.end.base + 1) ins_alt = ins_ref[:2] + hgvs_t.posedit.edit.alt + ins_ref[-2:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str(hgvs_t.posedit.pos.end.base +1 ) + 'del' + ins_ref + 'ins' + ins_alt + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str( + hgvs_t.posedit.pos.start.base - 1) + '_' + str( + hgvs_t.posedit.pos.end.base + 1) + 'del' + ins_ref + 'ins' + ins_alt hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) else: if str(hgvs_t.posedit.edit.alt) == 'None': hgvs_t.posedit.edit.alt = '' - pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + pre_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, + hgvs_t.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, + hgvs_t.posedit.pos.end.base + 1) hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base hgvs_t.posedit.edit.alt = pre_base + hgvs_t.posedit.edit.alt + post_base hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 @@ -1124,7 +1133,8 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): end = hgvs_t.posedit.pos.end.base hgvs_t.posedit.pos.start.base = start hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str(hgvs_t.posedit.edit) + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str( + hgvs_t.posedit.edit) hgvs_t = self.hp.parse_hgvs_variant(hgvs_str) hgvs_c = copy.deepcopy(hgvs_t) @@ -1151,13 +1161,15 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): # Catch identity at the exon/intron boundary by trying to normalize ref only if hgvs_check_boundaries.posedit.edit.type == 'identity': reform_ident = str(hgvs_c).split(':')[0] - reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str(hgvs_c.posedit.edit.ref)# + 'ins' + str(hgvs_c.posedit.edit.alt) + reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str( + hgvs_c.posedit.edit.ref) # + 'ins' + str(hgvs_c.posedit.edit.alt) hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('spanning the exon-intron boundary', error) or re.search('Normalization of intronic variants', error): + if re.search('spanning the exon-intron boundary', error) or re.search( + 'Normalization of intronic variants', error): hgvs_c = copy.deepcopy(stored_hgvs_c) hgvs_genomic = self.vm.t_to_g(hgvs_c, alt_chr) @@ -1169,7 +1181,8 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): except hgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': - ref = self.sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + ref = self.sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, + hgvs_genomic.posedit.pos.end.base) hgvs_genomic.posedit.edit.ref = ref hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] hgvs_genomic = hn.normalize(hgvs_genomic) @@ -1186,12 +1199,14 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): stored_hgvs_n = self.vm.c_to_n(stored_hgvs_c) else: stored_hgvs_n = stored_hgvs_c - stored_ref = self.sf.fetch_seq(str(stored_hgvs_n.ac),stored_hgvs_n.posedit.pos.start.base-1,stored_hgvs_n.posedit.pos.end.base) + stored_ref = self.sf.fetch_seq(str(stored_hgvs_n.ac), stored_hgvs_n.posedit.pos.start.base - 1, + stored_hgvs_n.posedit.pos.end.base) stored_hgvs_c.posedit.edit.ref = stored_ref if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': if hgvs_genomic.posedit.edit.type == 'ins': - stored_ref = self.sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + stored_ref = self.sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, + hgvs_genomic.posedit.pos.end.base) stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] hgvs_genomic.posedit.edit.ref = stored_ref hgvs_genomic.posedit.edit.alt = stored_alt @@ -1203,10 +1218,12 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): try: hn.normalize(nr_genomic) except hgvs.exceptions.HGVSInvalidVariantError as e: - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str(e) == 'base start position must be <= end position': + error_type_1 = str(e) + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str( + e) == 'base start position must be <= end position': # Effectively, this code is designed to handle variants that are directly proximal to - # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases due to - # the deletion length being > the specified range. + # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases + # due to the deletion length being > the specified range. # Warn of variant location wrt the gap if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): @@ -1214,12 +1231,20 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: hn.normalize(genomic_gap_variant) - except Exception: - pass + # Still a problem + except hgvs.exceptions.HGVSInvalidVariantError as e: + if 'base start position must be <= end position' in str(e) and \ + 'Length implied by coordinates must equal' in error_type_1: + make_gen_var = copy.copy(nr_genomic) + make_gen_var.posedit.edit.ref = self.sf.fetch_seq(nr_genomic.ac, + nr_genomic.posedit.pos.start.base - 1, + nr_genomic.posedit.pos.end.base) + genomic_gap_variant = make_gen_var + error_type_1 = None else: genomic_gap_variant = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) - if str(e) == 'base start position must be <= end position': + if error_type_1 == 'base start position must be <= end position': logger.warning('Variant is fully within a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) @@ -1246,20 +1271,24 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): genomic_gap_variant.posedit.edit.ref = '' stored_hgvs_c = copy.deepcopy(hgvs_c) - # Remove alt + # Remove alt try: genomic_gap_variant.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": pass - # Should be a delins so will normalize statically and replace the reference bases + # Should be a delins so will normalize statically and replace the reference bases genomic_gap_variant = hn.normalize(genomic_gap_variant) # Static map to c. and static normalize transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) stored_transcript_gap_variant = transcript_gap_variant if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - transcript_gap_variant = hn.normalize(transcript_gap_variant) + try: + transcript_gap_variant = hn.normalize(transcript_gap_variant) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + if ' Unsupported normalization of variants spanning the UTR-exon boundary' in str(e): + pass # if NM_ need the n. position if re.match('NM_', str(hgvs_c.ac)): @@ -1275,9 +1304,13 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): transcript_gap_alt_n.posedit.edit.alt = 'X' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str(transcript_gap_n.posedit.pos.start.base) + '_' + str(transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( + transcript_gap_n.posedit.pos.start.base) + '_' + str( + transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref transcript_gap_n = self.hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str(transcript_gap_alt_n.posedit.pos.start.base) + '_' + str(transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( + transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( + transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref transcript_gap_alt_n = self.hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) # Split the reference and replacing alt sequence into a dictionary @@ -1303,7 +1336,8 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): # Note, all variants will be forced into the format delete insert # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, transcript_gap_alt_n.posedit.pos.end.base+1, 1): + for int in range(transcript_gap_alt_n.posedit.pos.start.base, + transcript_gap_alt_n.posedit.pos.end.base + 1, 1): if int == alt_start: alt_base_dict[int] = str(''.join(alternate_bases)) else: @@ -1311,7 +1345,8 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): # Generate the alt sequence alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base+1, 1): + for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, + 1): if int in alt_base_dict.keys(): alternate_sequence_bases.append(alt_base_dict[int]) else: @@ -1333,8 +1368,10 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): except Exception as e: if str(e) == "base start position must be <= end position": # Expansion out is required to map back to the genomic position - pre_base = self.sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.start.base-2,transcript_gap_n.posedit.pos.start.base-1) - post_base = self.sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.end.base,transcript_gap_n.posedit.pos.end.base+1) + pre_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, + transcript_gap_n.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, + transcript_gap_n.posedit.pos.end.base + 1) transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base @@ -1355,7 +1392,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): else: pass - # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS + # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS # Remove identity bases if hgvs_c == stored_hgvs_c: expand_out = 'false' @@ -1381,7 +1418,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): # So is likely mapping to a genomic gap elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: # Incorrect expansion, likely < ref + 2 - genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + genomic_gap_variant = self.self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: hn.normalize(genomic_gap_variant) except Exception as e: @@ -1505,9 +1542,10 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): hgvs_t = self.vm.c_to_n(hgvs_c) else: hgvs_t = copy.copy(hgvs_c) - ins_ref = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-1,hgvs_t.posedit.pos.end.base) + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, hgvs_t.posedit.pos.end.base) ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( + hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) try: hgvs_c = self.vm.n_to_c(hgvs_t) @@ -1521,18 +1559,6 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, vmOld, hn, hdpOld, primary_assembly): return hgvs_genomic - - """ - Simple hgvs g. to c. or n. mapping - returns parsed hgvs c. or n. object - """ - - - def myevm_g_to_t(self, hdpOld, evm, hgvs_genomic, alt_ac): - hgvs_t = evm.g_to_t(hgvs_genomic, alt_ac) - return hgvs_t - - """ parse p. strings into hgvs p. objects """ @@ -1791,7 +1817,7 @@ def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method,reverse_normali """ - def validateHGVS(self, input, hpOld, vrOld): + def validateHGVS(self, input): hgvs_input = self.hp.parse_hgvs_variant(input) g = re.compile(":g.") p = re.compile(":p.") @@ -1975,7 +2001,7 @@ def merge_hgvs_3pr(self, hgvs_variant_list): pass # Normalize the variant (allow cross intron) which also adds the reference sequence (?) - hgvs_v = self.hn.normalize(hgvs_v) + hgvs_v = hn.normalize(hgvs_v) # Set the accession and ensure that multiple reference sequences have not been queried if accession is None: @@ -2033,7 +2059,7 @@ def merge_hgvs_3pr(self, hgvs_variant_list): pass # Normalize (allow variants crossing into different exons) try: - hgvs_delins = self.hn.normalize(hgvs_delins) + hgvs_delins = hn.normalize(hgvs_delins) except HGVSUnsupportedOperationError: pass return hgvs_delins @@ -2158,7 +2184,7 @@ def merge_hgvs_5pr(self, hgvs_variant_list): """ - def merge_pseudo_vcf(self, vcf_list, genome_build): + def merge_pseudo_vcf(self, vcf_list, genome_build, hn): hgvs_list = [] # Convert pseudo_vcf list into a HGVS list for call in vcf_list: @@ -2167,7 +2193,7 @@ def merge_pseudo_vcf(self, vcf_list, genome_build): # Merge hgvs_delins = self.merge_hgvs_5pr(hgvs_list) # normalize 3 prime - hgvs_delins = self.hn.normalize(hgvs_delins) + hgvs_delins = hn.normalize(hgvs_delins) # return return hgvs_delins diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 6e3fa173..788b542f 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -1264,8 +1264,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ) - report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, - primary_assembly, lose_vm, self.hp, hn, self.sf, self.nr_vm) + report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, + primary_assembly, hn) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant: Instead use ' + fn.valstr( report_gen) except Exception as e: @@ -1289,8 +1289,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('n.1-', str(input_parses)): input_parses = evm.n_to_c(input_parses) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - self.vm, self.hp, hn, self.sf, self.nr_vm) + genomic_position = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, + hn) error = error + fn.valstr(genomic_position) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) @@ -1305,8 +1305,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('\d\-\d', str(input_parses)) or re.search('\d\+\d', str(input_parses)): # Can we go c-g-c try: - to_genome = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, self.vm, - self.hp, hn, self.sf, self.nr_vm) + to_genome = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, + hn) to_tx = evm.g_to_t(to_genome, input_parses.ac) except hgvs.exceptions.HGVSInvalidIntervalError as e: error = str(e) @@ -1346,8 +1346,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr offset = int(tot_end_pos) - int(boundary) input_parses.posedit.pos.end.offset = offset - report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, - primary_assembly, self.lose_vm, self.hp, hn, self.sf, self.nr_vm) + report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, + primary_assembly, hn) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) except Exception as e: @@ -1385,8 +1385,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = str(e) if re.search('bounds', error): try: - report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - self.lose_vm, self.hp, hn, self.sf, self.nr_vm) + report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, + hn) except hgvs.exceptions.HGVSError as e: fn.exceptPass() else: @@ -1570,8 +1570,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr remainder = remainder + 1 input_parses.posedit.pos.end.base = boundary input_parses.posedit.pos.end.offset = remainder - report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - self.lose_vm, self.hp, hn, self.sf, self.nr_vm) + report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, + hn) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) except Exception as e: @@ -1586,8 +1586,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('n.1-', str(input_parses)): error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, self.vm, - self.hp, hn, self.sf, self.nr_vm) + genomic_position = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, + hn) error = error + fn.valstr(genomic_position) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) @@ -1603,8 +1603,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = str(e) if re.search('bounds', error): try: - report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, - self.lose_vm, self.hp, hn, self.sf, self.nr_vm) + report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, + hn) except hgvs.exceptions.HGVSError as e: fn.exceptPass() else: @@ -1630,14 +1630,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue elif re.search('Cannot validate sequence of an intronic variant', error): try: - test_g = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, primary_assembly, self.vm, - self.hp, hn, self.sf, self.nr_vm) + test_g = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, + hn) back_to_n = evm.g_to_t(test_g, input_parses.ac) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('bounds', error): - report_gen = self.myevm_t_to_g(input_parses, self.hdp, no_norm_evm, - primary_assembly, self.lose_vm, self.hp, hn, self.sf, self.nr_vm) + report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, + primary_assembly, hn) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -2069,8 +2069,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for var in rel_var: hgvs_coding_variant = self.hp.parse_hgvs_variant(var) try: - hgvs_genomic = self.myevm_t_to_g(hgvs_coding_variant, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) + hgvs_genomic = self.myevm_t_to_g(hgvs_coding_variant, no_norm_evm, + primary_assembly, hn) except hgvs.exceptions.HGVSError as e: try_rel_var = [] else: @@ -2210,8 +2210,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c.posedit.edit.ref = c.posedit.edit.ref.upper() if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: c.posedit.edit.alt = c.posedit.edit.alt.upper() - stash_input = self.myevm_t_to_g(c, self.hdp, no_norm_evm, primary_assembly, self.vm, self.hp, hn, self.sf, - self.nr_vm) + stash_input = self.myevm_t_to_g(c, no_norm_evm, primary_assembly, hn) if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', str( stash_input)): @@ -2478,8 +2477,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -2491,8 +2490,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -2527,8 +2526,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -2541,8 +2540,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -3155,7 +3154,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Se rec_var to '' so it can be updated later rec_var = '' try: - to_g = self.myevm_t_to_g(obj, self.hdp, no_norm_evm, primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) + to_g = self.myevm_t_to_g(obj, no_norm_evm, primary_assembly, hn) genomic_ac = to_g.ac except hgvs.exceptions.HGVSDataNotAvailableError as e: if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))) or re.match( @@ -3308,7 +3307,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if boundary == 'false': cross_variant = 'false' - error = self.validateHGVS(variant, hp=self.hp, vr=self.vr) + error = self.validateHGVS(variant) if error == 'false': valid = 'true' else: @@ -3458,8 +3457,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # c to Genome coordinates - Map the variant to the genome pre_var = self.hp.parse_hgvs_variant(variant) try: - pre_var = self.myevm_t_to_g(pre_var, self.hdp, no_norm_evm, primary_assembly, self.vm, self.hp, - hn, self.sf, self.nr_vm) + pre_var = self.myevm_t_to_g(pre_var, no_norm_evm, primary_assembly, + hn) except: e = sys.exc_info()[1] error = str(e) @@ -3884,7 +3883,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass # Apply validation to intronic variant descriptions (should be valid but make sure) - error = self.validateHGVS(genomic_validation, hp=self.hp, vr=self.vr) + error = self.validateHGVS(genomic_validation) if error == 'false': valid = 'true' else: @@ -3957,8 +3956,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rna = str(hgvs_rna) # Genomic sequence - hgvs_genomic = self.myevm_t_to_g(hgvs_coding, self.hdp, no_norm_evm, primary_assembly, self.vm, self.hp, hn, - self.sf, self.nr_vm) + hgvs_genomic = self.myevm_t_to_g(hgvs_coding, no_norm_evm, primary_assembly, hn) final_hgvs_genomic = hgvs_genomic # genomic_possibilities @@ -3985,8 +3983,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr chromosome_normalized_hgvs_coding = hgvs_coding most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, - hgvs_genomic.ac, no_norm_evm, self.vm, self.hp, hn, self.sf, - self.nr_vm) + no_norm_evm, hn) hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) # Push from side to side to try pick up odd placements @@ -4016,8 +4013,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Store a tx copy for later use test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) - stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, no_norm_evm, - self.vm, self.hp, hn, self.sf, self.nr_vm) + stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, no_norm_evm, hn) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) @@ -4101,8 +4097,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Store a tx copy for later use test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) - stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, no_norm_evm, - self.vm, self.hp, hn, self.sf, self.nr_vm) + stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, no_norm_evm, hn) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC @@ -4597,9 +4592,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, - self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) @@ -4614,9 +4608,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, - self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -4651,9 +4644,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, - self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -4666,9 +4658,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, - self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -5236,8 +5227,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: continue # Update hgvs_genomic - hgvs_genomic = self.va_func.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, - no_norm_evm, self.vm, self.hp, hn, self.sf, self.nr_vm) + hgvs_genomic = self.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, + no_norm_evm,hn) if hgvs_genomic.posedit.edit.type == 'identity': re_c = self.vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): @@ -5520,8 +5511,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -5533,8 +5524,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -5570,8 +5561,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): @@ -5584,8 +5575,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, self.hdp, no_norm_evm, - primary_assembly, self.vm, self.hp, hn, self.sf, self.nr_vm) + hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, + primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -6162,7 +6153,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_refseq_ac = 'RefSeqGene record not available' # Predicted effect on protein - protein_dict = self.myc_to_p(hgvs_coding, evm, self.hdp, self.hp, hn, self.vm, self.sf, re_to_p=False) + protein_dict = self.myc_to_p(hgvs_coding, evm, hn, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) @@ -6229,7 +6220,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c_for_p = seek_var try: # Predicted effect on protein - protein_dict = self.myc_to_p(c_for_p, evm, self.hdp, self.hp, hn, self.vm, self.sf, re_to_p=False) + protein_dict = self.myc_to_p(c_for_p, evm, hn, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) @@ -6302,8 +6293,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass() else: # hgvs_protein = va_func.protein(str(c_for_p), evm, hp) - protein_dict = self.myc_to_p(c_for_p, evm, self.hdp, self.hp, hn, self.vm, self.sf, - re_to_p=False) + protein_dict = self.myc_to_p(c_for_p, evm, hn, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) @@ -6599,8 +6589,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, alt_aln_method=alt_aln_method) orientation = int(ori[0]['alt_strand']) - hgvs_alt_genomic = self.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, self.vm, self.hp, hn, - self.sf, self.nr_vm) + hgvs_alt_genomic = self.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, hn) # Set hgvs_genomic accordingly hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) @@ -6629,7 +6618,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, alt_chr, - no_norm_evm, self.vm, self.hp, hn, self.sf, self.nr_vm) + no_norm_evm, hn) hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) # First to the right @@ -6657,7 +6646,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Store a tx copy for later use test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, - no_norm_evm, self.vm, self.hp, hn, self.sf, self.nr_vm) + no_norm_evm, hn) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) @@ -6738,7 +6727,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Store a tx copy for later use test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, - no_norm_evm, self.vm, self.hp, hn, self.sf, self.nr_vm) + no_norm_evm, hn) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC @@ -7221,8 +7210,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, self.vm, self.hp, hn, self.sf, - self.nr_vm) + no_norm_evm, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( saved_hgvs_coding.ac)) @@ -7236,8 +7224,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, self.vm, self.hp, hn, self.sf, - self.nr_vm) + no_norm_evm, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( saved_hgvs_coding.ac)) @@ -7277,8 +7264,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, self.vm, self.hp, hn, self.sf, - self.nr_vm) + no_norm_evm, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( saved_hgvs_coding.ac)) @@ -7293,8 +7279,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, self.vm, self.hp, hn, self.sf, - self.nr_vm) + no_norm_evm, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( saved_hgvs_coding.ac)) @@ -7877,7 +7862,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Update hgvs_genomic hgvs_alt_genomic = self.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, - no_norm_evm, self.vm, self.hp, hn, self.sf, self.nr_vm) + no_norm_evm, hn) if hgvs_alt_genomic.posedit.edit.type == 'identity': re_c = self.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 83a071f2..3c471044 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -135,11 +135,6 @@ def __init__(self): self.utaSchema = str(self.hdp.data_version()) # Create normalizer - self.hn = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, - alt_aln_method='splign' - ) self.reverse_hn = hgvs.normalizer.Normalizer(self.hdp, cross_boundaries=False, shuffle_direction=5, @@ -223,7 +218,7 @@ def protein(self,variant, evm, hpUnused): var_p.ac = 'Non-coding transcript' var_p.posedit = '' return var_p - def myc_to_p(self,hgvs_transcript, evm, re_to_p): + def myc_to_p(self,hgvs_transcript, evm, hn, re_to_p): # Create dictionary to store the information hgvs_transcript_to_hgvs_protein = {'error': '', 'hgvs_protein': '', 'ref_residues': ''} @@ -259,7 +254,7 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): error = str(e) if re.search('string index out of range', error) and re.search('dup', str(hgvs_transcript)): hgvs_ins = self.hp.parse_hgvs_variant(str(hgvs_transcript)) - hgvs_ins = self.hn.normalize(hgvs_ins) + hgvs_ins = hn.normalize(hgvs_ins) inst = hgvs_ins.ac + ':c.' + str(hgvs_ins.posedit.pos.start.base - 1) + '_' + str(hgvs_ins.posedit.pos.start.base) + 'ins' + hgvs_ins.posedit.edit.ref hgvs_transcript = self.hp.parse_hgvs_variant(inst) hgvs_protein = evm.c_to_p(hgvs_transcript) diff --git a/VariantValidator/testing/vvTestFunctions.py b/VariantValidator/testing/vvTestFunctions.py index 6b278adf..0c550419 100644 --- a/VariantValidator/testing/vvTestFunctions.py +++ b/VariantValidator/testing/vvTestFunctions.py @@ -118,7 +118,7 @@ def loadValidations(path): #print(type(out[-1])) return out -def validateBatch(variantArray,validator): +def validateBatch(variantArray,val): #Returns an array of validations (themselves dictionary objects). out=[] selectTranscripts='all' @@ -126,7 +126,7 @@ def validateBatch(variantArray,validator): for i,v in enumerate(variantArray): print("VALIDATING Variant"+str(i)+" "+str(i+1)+"/"+str(len(variantArray))+" "+str(v)) try: - out.append(validator.vv.validator(v,selectedAssembly,selectTranscripts)) + out.append(val.validate(v,selectedAssembly,selectTranscripts)) except KeyboardInterrupt: print("Exiting...") sys.exit() diff --git a/VariantValidator/testing/vvTestSave.py b/VariantValidator/testing/vvTestSave.py index 506f5aeb..6b44a365 100644 --- a/VariantValidator/testing/vvTestSave.py +++ b/VariantValidator/testing/vvTestSave.py @@ -1,10 +1,9 @@ #Saving script import vvTestFunctions as fn +from vvObjects import Validator import os +val=Validator() - -hub=vvHub() - -fn.generateTestFolder("testOutputsPreRework","inputVariants.txt",hub) \ No newline at end of file +fn.generateTestFolder("testOutputsReworked","inputVariants.txt",val) \ No newline at end of file From fd6cb874724b3d98e2decdd3f8be5e16a4506e2b Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 13:07:30 +0000 Subject: [PATCH 013/223] Added testing code, fixed several crashes --- VariantValidator/modules/vvMixinConverters.py | 255 ++++++++++++------ VariantValidator/modules/vvMixinCore.py | 24 +- VariantValidator/modules/vvMixinInit.py | 2 +- VariantValidator/modules/vvTestFunctions.py | 204 ++++++++++++++ VariantValidator/modules/vvTestSave.py | 9 + 5 files changed, 401 insertions(+), 93 deletions(-) create mode 100644 VariantValidator/modules/vvTestFunctions.py create mode 100644 VariantValidator/modules/vvTestSave.py diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 2c9fc705..b037e890 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -153,7 +153,7 @@ def coding(self, variant, hpOld): """ - def genomic(self, variant, evm, hpOld, hdpOld, primary_assembly): + def genomic(self, variant, evm, primary_assembly,hn): # Set regular expressions for if statements pat_g = re.compile("\:g\.") # Pattern looks for :g. pat_n = re.compile("\:n\.") @@ -164,7 +164,7 @@ def genomic(self, variant, evm, hpOld, hdpOld, primary_assembly): error = 'false' hgvs_var = self.hp.parse_hgvs_variant(variant) try: - var_g = self.myevm_t_to_g(hgvs_var, evm, self.hdp, primary_assembly) # genomic level variant + var_g = self.myevm_t_to_g(hgvs_var, evm, primary_assembly,hn) # genomic level variant except hgvs.exceptions.HGVSError as e: error = e if error != 'false': @@ -233,12 +233,7 @@ def hgvs_genomic(self, variant, hpOld): """ - def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): - # create no_norm_evm - if primary_assembly == 'GRCh38': - no_norm_evm = self.no_norm_evm_38 - elif primary_assembly == 'GRCh37': - no_norm_evm = self.no_norm_evm_37 + def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): # store the input stored_hgvs_c = copy.deepcopy(hgvs_c) @@ -256,7 +251,8 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): # Warn gap code in use logger.warning("gap_compensation_myevm = " + str(utilise_gap_code)) - if utilise_gap_code is True and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type =='delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): + if utilise_gap_code is True and ( + hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): # if NM_ need the n. position if re.match('NM_', str(hgvs_c.ac)): @@ -284,10 +280,13 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): hgvs_t = copy.deepcopy(hgvs_c) if hgvs_t.posedit.edit.type == 'inv': inv_alt = self.revcomp(hgvs_t.posedit.edit.ref) - t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( + hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt hgvs_t_delins = self.hp.parse_hgvs_variant(t_delins) - pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + pre_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, + hgvs_t.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, + hgvs_t.posedit.pos.end.base + 1) hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base inv_alt = pre_base + inv_alt + post_base hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 @@ -297,25 +296,35 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): end = hgvs_t.posedit.pos.end.base hgvs_t.posedit.pos.start.base = start hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str( + end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt hgvs_t = self.hp.parse_hgvs_variant(hgvs_str) elif hgvs_c.posedit.edit.type == 'dup': - pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + pre_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, + hgvs_t.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, + hgvs_t.posedit.pos.end.base + 1) alt = pre_base + hgvs_t.posedit.edit.ref + hgvs_t.posedit.edit.ref + post_base ref = pre_base + hgvs_t.posedit.edit.ref + post_base - dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str((hgvs_t.posedit.pos.start.base + len(ref)) -2) + 'del' + ref + 'ins' + alt + dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str( + hgvs_t.posedit.pos.start.base - 1) + '_' + str( + (hgvs_t.posedit.pos.start.base + len(ref)) - 2) + 'del' + ref + 'ins' + alt hgvs_t = self.hp.parse_hgvs_variant(dup_to_delins) elif hgvs_c.posedit.edit.type == 'ins': - ins_ref = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.end.base+1) + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, + hgvs_t.posedit.pos.end.base + 1) ins_alt = ins_ref[:2] + hgvs_t.posedit.edit.alt + ins_ref[-2:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base - 1) + '_' + str(hgvs_t.posedit.pos.end.base +1 ) + 'del' + ins_ref + 'ins' + ins_alt + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str( + hgvs_t.posedit.pos.start.base - 1) + '_' + str( + hgvs_t.posedit.pos.end.base + 1) + 'del' + ins_ref + 'ins' + ins_alt hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) else: if str(hgvs_t.posedit.edit.alt) == 'None': hgvs_t.posedit.edit.alt = '' - pre_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-2,hgvs_t.posedit.pos.start.base-1) - post_base = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.end.base,hgvs_t.posedit.pos.end.base+1) + pre_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, + hgvs_t.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, + hgvs_t.posedit.pos.end.base + 1) hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base hgvs_t.posedit.edit.alt = pre_base + hgvs_t.posedit.edit.alt + post_base hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 @@ -325,7 +334,8 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): end = hgvs_t.posedit.pos.end.base hgvs_t.posedit.pos.start.base = start hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str(hgvs_t.posedit.edit) + hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str( + hgvs_t.posedit.edit) hgvs_t = self.hp.parse_hgvs_variant(hgvs_str) hgvs_c = copy.deepcopy(hgvs_t) @@ -352,27 +362,31 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): # Catch identity at the exon/intron boundary by trying to normalize ref only if hgvs_check_boundaries.posedit.edit.type == 'identity': reform_ident = str(hgvs_c).split(':')[0] - reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str(hgvs_c.posedit.edit.ref)# + 'ins' + str(hgvs_c.posedit.edit.alt) + reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str( + hgvs_c.posedit.edit.ref) # + 'ins' + str(hgvs_c.posedit.edit.alt) hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('spanning the exon-intron boundary', error) or re.search('Normalization of intronic variants', error): + if re.search('spanning the exon-intron boundary', error) or re.search( + 'Normalization of intronic variants', error): hgvs_c = copy.deepcopy(stored_hgvs_c) try: hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) - hn.normalize(hgvs_genomic) # Check the validity of the mapping + hn.normalize(hgvs_genomic) # Check the validity of the mapping # This will fail on multiple refs for NC_ except hgvs.exceptions.HGVSError as e: # Recover all available mapping options from UTA mapping_options = self.hdp.get_tx_mapping_options(hgvs_c.ac) if mapping_options == []: - raise HGVSDataNotAvailableError("No alignment data between the specified transcript reference sequence and any GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are available.") + raise HGVSDataNotAvailableError( + "No alignment data between the specified transcript reference sequence and any GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are available.") # Capture errors from attempted mappings attempted_mapping_error = '' + for option in mapping_options: if re.match('blat', option[2]): continue @@ -383,7 +397,8 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) break except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ + 1] + '~' print e continue @@ -395,16 +410,19 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): if re.match('blat', option[2]): continue if re.match('NC_', option[1]): - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - if re.search(option[1], attempted_mapping_error): - pass - else: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' - print e - continue + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + if chr_num == 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + if re.search(option[1], attempted_mapping_error): + pass + else: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ + 1] + '~' + print e + continue try: hn.normalize(hgvs_genomic) except: @@ -412,42 +430,90 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): if re.match('blat', option[2]): continue if re.match('NT_', option[1]): - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' - print e - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NW_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + if chr_num != 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) break except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ + 1] + '~' print e continue - # Only a RefSeqGene available + try: + hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NT_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), + primary_assembly) + if chr_num == 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ + option[ + 1] + '~' + print e + continue try: hn.normalize(hgvs_genomic) except: for option in mapping_options: if re.match('blat', option[2]): continue - if re.match('NG_', option[1]): - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' - print e + if re.match('NW_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), + primary_assembly) + if chr_num != 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ + option[1] + '~' + print e + continue + try: + hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): continue + if re.match('NW_', option[1]): + chr_num = vvChromosomes.supported_for_mapping(str(option[1]), + primary_assembly) + if chr_num == 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str( + e) + "/" + hgvs_c.ac + "/" + \ + option[1] + '~' + print e + continue + + # Only a RefSeqGene available + try: + hn.normalize(hgvs_genomic) + except: + for option in mapping_options: + if re.match('blat', option[2]): + continue + if re.match('NG_', option[1]): + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ + option[1] + '~' + print e + continue + # If not mapped, raise error try: hgvs_genomic @@ -462,7 +528,8 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): except hgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': - ref = self.sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + ref = self.sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, + hgvs_genomic.posedit.pos.end.base) hgvs_genomic.posedit.edit.ref = ref hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] hgvs_genomic = hn.normalize(hgvs_genomic) @@ -479,12 +546,14 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): stored_hgvs_n = self.vm.c_to_n(stored_hgvs_c) else: stored_hgvs_n = stored_hgvs_c - stored_ref = self.sf.fetch_seq(str(stored_hgvs_n.ac),stored_hgvs_n.posedit.pos.start.base-1,stored_hgvs_n.posedit.pos.end.base) + stored_ref = self.sf.fetch_seq(str(stored_hgvs_n.ac), stored_hgvs_n.posedit.pos.start.base - 1, + stored_hgvs_n.posedit.pos.end.base) stored_hgvs_c.posedit.edit.ref = stored_ref if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': if hgvs_genomic.posedit.edit.type == 'ins': - stored_ref = self.sf.fetch_seq(str(hgvs_genomic.ac),hgvs_genomic.posedit.pos.start.base-1,hgvs_genomic.posedit.pos.end.base) + stored_ref = self.sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, + hgvs_genomic.posedit.pos.end.base) stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] hgvs_genomic.posedit.edit.ref = stored_ref hgvs_genomic.posedit.edit.alt = stored_alt @@ -492,11 +561,15 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): # First look for variants mapping to the flanks of gaps # either in the gap or on the flank but not fully within the gap if expand_out == 'true': + nr_genomic = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) + try: hn.normalize(nr_genomic) except hgvs.exceptions.HGVSInvalidVariantError as e: - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str(e) == 'base start position must be <= end position': + error_type_1 = str(e) + if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str( + e) == 'base start position must be <= end position': # Effectively, this code is designed to handle variants that are directly proximal to # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases due to # the deletion length being > the specified range. @@ -507,12 +580,21 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: hn.normalize(genomic_gap_variant) - except Exception: - pass + # Still a problem + except hgvs.exceptions.HGVSInvalidVariantError as e: + if 'base start position must be <= end position' in str(e) and \ + 'Length implied by coordinates must equal' in error_type_1: + make_gen_var = copy.copy(nr_genomic) + make_gen_var.posedit.edit.ref = self.sf.fetch_seq(nr_genomic.ac, + nr_genomic.posedit.pos.start.base - 1, + nr_genomic.posedit.pos.end.base) + genomic_gap_variant = make_gen_var + + error_type_1 = None else: genomic_gap_variant = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) - if str(e) == 'base start position must be <= end position': + if error_type_1 == 'base start position must be <= end position': logger.warning('Variant is fully within a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) @@ -551,8 +633,13 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): # Static map to c. and static normalize transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) stored_transcript_gap_variant = transcript_gap_variant + if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - transcript_gap_variant = hn.normalize(transcript_gap_variant) + try: + transcript_gap_variant = hn.normalize(transcript_gap_variant) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + if ' Unsupported normalization of variants spanning the UTR-exon boundary' in str(e): + pass # if NM_ need the n. position if re.match('NM_', str(hgvs_c.ac)): @@ -568,9 +655,13 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): transcript_gap_alt_n.posedit.edit.alt = 'X' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str(transcript_gap_n.posedit.pos.start.base) + '_' + str(transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( + transcript_gap_n.posedit.pos.start.base) + '_' + str( + transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref transcript_gap_n = self.hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str(transcript_gap_alt_n.posedit.pos.start.base) + '_' + str(transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( + transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( + transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref transcript_gap_alt_n = self.hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) # Split the reference and replacing alt sequence into a dictionary @@ -596,15 +687,17 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): # Note, all variants will be forced into the format delete insert # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, transcript_gap_alt_n.posedit.pos.end.base+1, 1): + for int in range(transcript_gap_alt_n.posedit.pos.start.base, + transcript_gap_alt_n.posedit.pos.end.base + 1, 1): if int == alt_start: alt_base_dict[int] = str(''.join(alternate_bases)) else: alt_base_dict[int] = 'X' - # Generate the alt sequence + # Generate the alt sequence alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base+1, 1): + for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, + 1): if int in alt_base_dict.keys(): alternate_sequence_bases.append(alt_base_dict[int]) else: @@ -626,8 +719,10 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): except Exception as e: if str(e) == "base start position must be <= end position": # Expansion out is required to map back to the genomic position - pre_base = self.sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.start.base-2,transcript_gap_n.posedit.pos.start.base-1) - post_base = self.sf.fetch_seq(transcript_gap_n.ac,transcript_gap_n.posedit.pos.end.base,transcript_gap_n.posedit.pos.end.base+1) + pre_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, + transcript_gap_n.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, + transcript_gap_n.posedit.pos.end.base + 1) transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base @@ -648,7 +743,6 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): else: pass - # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS # Remove identity bases if hgvs_c == stored_hgvs_c: @@ -799,9 +893,10 @@ def myevm_t_to_g(self, hgvs_c, evm, primary_assembly,hn): hgvs_t = self.vm.c_to_n(hgvs_c) else: hgvs_t = copy.copy(hgvs_c) - ins_ref = self.sf.fetch_seq(str(hgvs_t.ac),hgvs_t.posedit.pos.start.base-1,hgvs_t.posedit.pos.end.base) + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, hgvs_t.posedit.pos.end.base) ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( + hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) try: hgvs_c = self.vm.n_to_c(hgvs_t) @@ -1030,7 +1125,9 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn returns parsed hgvs g. object """ - + def myevm_g_to_t(self,evm, hgvs_genomic, alt_ac): + hgvs_t = evm.g_to_t(hgvs_genomic, alt_ac) + return hgvs_t def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # store the input stored_hgvs_c = copy.deepcopy(hgvs_c) @@ -1956,7 +2053,7 @@ def revcomp(self, bases): """ - def merge_hgvs_3pr(self, hgvs_variant_list): + def merge_hgvs_3pr(self, hgvs_variant_list,hn): # Ensure c. is mapped to the h_list = [] diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 788b542f..dd86fe69 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -3242,7 +3242,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif geno.search(input): if plus.search(variant) or minus.search(variant): - to_g = self.genomic(variant, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, self.nr_vm) + to_g = self.genomic(variant, no_norm_evm, primary_assembly,hn) es = re.compile('error') if es.search(str(to_g)): if alt_aln_method != 'genebuild': @@ -3355,8 +3355,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr coding = self.coding(variant, self.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = self.genomic(variant, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, - self.nr_vm) + pre_var = self.genomic(variant, no_norm_evm, primary_assembly,hn) # genome back to C coordinates post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) @@ -3569,8 +3568,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr coding = self.coding(variant, self.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = self.genomic(variant, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, - self.nr_vm) + pre_var = self.genomic(variant, no_norm_evm, primary_assembly,hn) # genome back to C coordinates post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) @@ -3636,8 +3634,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr coding = self.coding(variant, self.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = self.genomic(variant, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, - self.nr_vm) + pre_var = self.genomic(variant, no_norm_evm, primary_assembly,hn) + # genome back to C coordinates post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) @@ -3705,7 +3703,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr plus = re.compile("\d\+\d") # finds digit + digit minus = re.compile("\d\-\d") # finds digit - digit if plus.search(input) or minus.search(input): - to_g = self.genomic(inp, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, self.nr_vm) + to_g = self.genomic(inp, no_norm_evm, primary_assembly,hn) es = re.compile('error') if es.search(str(to_g)): if alt_aln_method != 'genebuild': @@ -3865,7 +3863,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if valid == 'false': error = 'false' genomic_validation = str( - self.genomic(input, no_norm_evm, self.hp, self.hdp, primary_assembly, self.vm, hn, self.sf, self.nr_vm)) + self.genomic(input, no_norm_evm, primary_assembly,hn) ) del_end = re.compile('\ddel$') delins = re.compile('delins') inv = re.compile('inv') @@ -3982,7 +3980,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = str(e) chromosome_normalized_hgvs_coding = hgvs_coding - most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, + most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, no_norm_evm, hn) hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) @@ -6153,7 +6151,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_refseq_ac = 'RefSeqGene record not available' # Predicted effect on protein - protein_dict = self.myc_to_p(hgvs_coding, evm, hn, re_to_p=False) + protein_dict = self.myc_to_p(hgvs_coding, evm, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) @@ -6220,7 +6218,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr c_for_p = seek_var try: # Predicted effect on protein - protein_dict = self.myc_to_p(c_for_p, evm, hn, re_to_p=False) + protein_dict = self.myc_to_p(c_for_p, evm, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) @@ -6293,7 +6291,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass() else: # hgvs_protein = va_func.protein(str(c_for_p), evm, hp) - protein_dict = self.myc_to_p(c_for_p, evm, hn, re_to_p=False) + protein_dict = self.myc_to_p(c_for_p, evm, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 3c471044..57cd5a91 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -218,7 +218,7 @@ def protein(self,variant, evm, hpUnused): var_p.ac = 'Non-coding transcript' var_p.posedit = '' return var_p - def myc_to_p(self,hgvs_transcript, evm, hn, re_to_p): + def myc_to_p(self,hgvs_transcript, evm, re_to_p): # Create dictionary to store the information hgvs_transcript_to_hgvs_protein = {'error': '', 'hgvs_protein': '', 'ref_residues': ''} diff --git a/VariantValidator/modules/vvTestFunctions.py b/VariantValidator/modules/vvTestFunctions.py new file mode 100644 index 00000000..154337e4 --- /dev/null +++ b/VariantValidator/modules/vvTestFunctions.py @@ -0,0 +1,204 @@ +#PJDP testing suite for variant validator + +#Run this test to validate all variants and SAVE the results for comparison with a different version. +#The input variants file should contain a bunch of variants on each line in quotes. Anything outside the +#quotes is discarded. + +import os +import pickle +import json +import sys + +import sqlite3 +import logging + +logConsoleHandler = logging.StreamHandler() +logConsoleHandler.setLevel(logging.DEBUG) +#Debug +hl=logging.getLogger("hgvs.dataproviders.uta") +hl.addHandler(logConsoleHandler) + + +''' +try: + print("Configuring for personal linux") + seqrepo_current_version='2018-08-21' + HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version + os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR + uta_current_version='uta_20180821' + UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version + os.environ['UTA_DB_URL']=UTA_DB_URL + from VariantValidator import variantValidator as vv + vv.my_config() +except sqlite3.OperationalError: + print("Configuring for VM") + seqrepo_current_version = '2018-08-21' + HGVS_SEQREPO_DIR = '/Users/pjf9/variant_validator_data/seqrepo/' + seqrepo_current_version + os.environ['HGVS_SEQREPO_DIR'] = HGVS_SEQREPO_DIR + uta_current_version = 'uta_20180821' + UTA_DB_URL = 'postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version + os.environ['UTA_DB_URL'] = UTA_DB_URL + os.environ['PYLIFTOVER_DIR'] = '/Users/pjf9/variant_validator_data/pyLiftover/' + from VariantValidator import variantValidator as vv + +''' + +def generateTestFolder(path, inputVariants, validator): + #Saves the results of running inputVariants to a folder given in saveDirectory. + if not os.path.isdir(path): + os.mkdir(path) + variantArray=loadVariantFile(inputVariants) + #Go through the variant array, validating, and save the results. + batch=validateBatch(variantArray,validator) + #Save copy of the resulting dictionary + saveValidationsAsFolder(path,batch) + +def generateTestJSON(path, inputVariants,sysOut): + variantArray=loadVariantFile(inputVariants) + #Go through the variant array, validating, and save the results. + batch=validateBatch(variantArray) + #batch.append(sysOut.getvalue()) + #Save copy of the resulting dictionary + saveValidationsAsJSON(path,batch) + +def saveValidationsAsFolder(path, validations): + #Pickles validation dictionaries into the given folder. + for i,v in enumerate(validations): + with open(os.path.join(path,"variant"+str(i)+".txt") ,"w") as f: + pickle.dump(v,f) + +def saveValidationsAsJSON(path,validations): + #Saves a set of validations (v is a list of dictionaries) or a bunch of validations (v is a list of dictionaries) + #as the json given in path. The name of the file will be that of the input variant string. + jOut=json.dumps(validations) + with open(path,"w") as f: + f.write(jOut) + print("JSON saved to "+path) + +def loadVariantFile(path): + out=[] + #Load up the input variant file, should be passed in path.txt. Extra space, commas and quotes will be stripped. + with open(path) as f: + for l in f.readlines(): + l=l.strip() + if len(l)>3: + if l[-1]==",": + l=l[:-1] + if l[-1]=='"': + l=l[:-1] + if l[0]=='"': + l=l[1:] + out.append(l) + return out + +def saveVariantFile(path, variants): + #Saves a variant input array (a bunch of strings) into a new text file given by path. + with open(path,"w") as f: + for v in variants: + f.write(v+"\n") + +def mergeVariantList(variants1,variants2): + #Merges two lists of variants, avoiding duplicants. + out=[] + for v in variants1: + if not v in out: + out.append(v) + for v in variants2: + if not v in out: + out.append(v) + return out + +def loadValidations(path): + #Loads a set of validations from the folder given in path. + out=[] + for paths,dirs,files in os.walk(path): + for filePath in files: + with open(os.path.join(paths,filePath)) as f: + out.append(pickle.load(f)) + #print(type(out[-1])) + return out + +def validateBatch(variantArray,val): + #Returns an array of validations (themselves dictionary objects). + out=[] + selectTranscripts='all' + selectedAssembly='GRCh37' + for i,v in enumerate(variantArray): + print("VALIDATING Variant"+str(i)+" "+str(i+1)+"/"+str(len(variantArray))+" "+str(v)) + try: + out.append(val.validate(v,selectedAssembly,selectTranscripts)) + except KeyboardInterrupt: + print("Exiting...") + sys.exit() + except Exception as e: + print("FATAL error processing variant: "+str(e)) + out.append({"ERROR":str(e)}) + #raise #debug - uncomment this line to ensure the test leaves a traceback and fails the first time there's a critical error. + return out + +def retrieveVariant(validation): + #Returns the variant string (if possible) from a validation. + out=None + for v in validation.values(): + try: + if type(v)==type({}) and "submitted_variant" in v.keys(): + out=v["submitted_variant"] + return out + except (KeyError, TypeError, AttributeError): + pass + raise AttributeError("Validation does not contain the original variant string") + +def compareValidations(v1,v2,id): + #print(v1,v2) + for vk in v1.keys(): + if not (vk in v2.keys()): +# print("tag "+vk+" : "+str(v1[vk])+" not found in second variant") + print("Variant "+str(id)+": Tag "+vk+" not found in second variant") + return False + for vk in v2.keys(): + if not (vk in v1.keys()): +# print("tag "+vk+" : "+str(v2[vk])+" not found in first variant") + print("Variant "+str(id)+": Tag "+vk+" not found in first variant") + return False + for vk in v1.keys(): + if not (v1[vk]==v2[vk]): + if type(v1[vk])==type(dict()) or type(v2[vk])==type(dict()): + print("Variant " + str(id) + ": Different tag values for key " + str(vk)) + else: + print("Variant "+str(id)+": Different tag values - "+str(vk)+" : "+str(v1[vk])+" vs. "+str(vk)+" : "+str(v2[vk])) + return False + return True + +def compareBatches(v1path,v2path): + #Loads all files in validations folder and compares them + outFlags=[] + passScore=0 + v1batch=loadValidations(v1path) + v2batch=loadValidations(v2path) + print("Comparing validation sets...") + for i,v in enumerate(v1batch): +# print("Comparing validation "+str(i)) + outFlags.append(compareValidations(v1batch[i],v2batch[i],i)) + if outFlags[-1]: + passScore+=1 + if passScore==len(v1batch): + #Test passed. + print("Validation sets are identical, "+str(passScore)+" passed") + return True + else: + print("Validation sets are NOT identical, passed " + str(passScore) + "/" + str(len(v1batch))) + #for i,v in enumerate(v1batch): + #if not outFlags[i]: + #print("Mismatch in validation "+str(i)) + #print(v1batch[i]) + #print("Verses") + #print(v2batch[i]) + return False + +if __name__=="__main__": + + inputVariants="inputVariants.txt" + #saveOut="testJSON.json" + + #fn.generateTestJSON(saveOut,inputVariants,sysOut) + generateTestFolder("testOutputs",inputVariants) diff --git a/VariantValidator/modules/vvTestSave.py b/VariantValidator/modules/vvTestSave.py new file mode 100644 index 00000000..881ab056 --- /dev/null +++ b/VariantValidator/modules/vvTestSave.py @@ -0,0 +1,9 @@ +#Saving script + +import vvTestFunctions as fn +from vvObjects import Validator +import os + +val=Validator() + +fn.generateTestFolder("testOutputsReworked","inputVariants.txt",val) From 6fce5aecd810b30016120c1a2a04df43ba0a1b9e Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 13:33:16 +0000 Subject: [PATCH 014/223] More bugfixes --- VariantValidator/modules/vvDBGet.py | 4 ++-- VariantValidator/modules/vvMixinConverters.py | 2 +- VariantValidator/modules/vvTestFunctions.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index b5d79632..77559c71 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -16,10 +16,10 @@ def execute(self,query): @handleCursor def executeAll(self,query): self.db.cursor.execute(query) - rows = self.db.cursor.fetchone() + rows = self.db.cursor.fetchall() if rows==[]: logger.debug("No data returned from query "+str(query)) - row = ['none', 'No data'] + rows = ['none', 'No data'] return rows # from dbfetchone def get_utaSymbol(self,gene_symbol): diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index b037e890..37dde0de 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -1515,7 +1515,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # So is likely mapping to a genomic gap elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: # Incorrect expansion, likely < ref + 2 - genomic_gap_variant = self.self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) + genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: hn.normalize(genomic_gap_variant) except Exception as e: diff --git a/VariantValidator/modules/vvTestFunctions.py b/VariantValidator/modules/vvTestFunctions.py index 154337e4..4346046d 100644 --- a/VariantValidator/modules/vvTestFunctions.py +++ b/VariantValidator/modules/vvTestFunctions.py @@ -133,7 +133,7 @@ def validateBatch(variantArray,val): except Exception as e: print("FATAL error processing variant: "+str(e)) out.append({"ERROR":str(e)}) - #raise #debug - uncomment this line to ensure the test leaves a traceback and fails the first time there's a critical error. + raise #debug - uncomment this line to ensure the test leaves a traceback and fails the first time there's a critical error. return out def retrieveVariant(validation): From 0ebbe06aee4838784875840703c76081e7ca9f90 Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 13:49:36 +0000 Subject: [PATCH 015/223] Changed to not create pools constantly, seems to be working to solve frequent connection errors --- VariantValidator/modules/vvDatabase.py | 1 + VariantValidator/modules/vvFunctions.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 011c8e4b..747a412f 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -15,6 +15,7 @@ class vvDatabase: # This class contains and handles the mysql connections for the variant validator database. def __init__(self,val,dbConfig): self.conn = None + self.pool = None # self.cursor will be none UNLESS you're wrapping a function in @handleCursor, which automatically opens and # closes connections for you. self.cursor=None diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index a39fdb69..177ef747 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -18,7 +18,8 @@ def handleCursor(func): #Decorator function for handling opening and closing cursors. @functools.wraps(func) def wrapper(self,*args,**kwargs): - self.db.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.db.dbConfig) + if self.db.pool==None: + self.db.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.db.dbConfig) self.db.conn=self.db.pool.get_connection() self.db.cursor = self.db.conn.cursor(buffered=True) out=func(self,*args,**kwargs) From 743bff6f537e23ff793a98d95ae44c2146d06a87 Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 16:15:14 +0000 Subject: [PATCH 016/223] Validator no longer crashes. Restructuring primary directive achieved. --- VariantValidator/modules/vvDBGet.py | 1 + VariantValidator/modules/vvFunctions.py | 5 +++ VariantValidator/modules/vvMixinConverters.py | 41 ++++++++----------- VariantValidator/modules/vvMixinCore.py | 13 ++++-- 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index 77559c71..dbb6ed80 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -40,6 +40,7 @@ def get_refSeqGene_data_by_refSeqGeneID(self,refSeqGeneID, genomeBuild): def get_gene_symbol_from_refSeqGeneID(self,refSeqGeneID): query = "SELECT hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s'" %(refSeqGeneID) return self.execute(query)[0] + #get_refseqgeneId_from_lrgID def get_RefSeqGeneID_from_lrgID(self,lrgID): query = "SELECT RefSeqGeneID FROM LRG_RSG_lookup WHERE lrgID = '%s'" %(lrgID) return self.execute(query)[0] diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index 177ef747..107b3932 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -457,3 +457,8 @@ def n_inversion(ref_seq, del_seq, inv_seq, interval_start, interval_end): # Custom Exceptions class VariantValidatorError(Exception): pass +class mergeHGVSerror(Exception): + pass +class alleleVariantError(Exception): + pass + diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 37dde0de..c5d364b4 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -19,16 +19,11 @@ import httplib2 as http import json from Bio import Entrez,SeqIO - +import vvFunctions as fn #Error setup from hgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError -class mergeHGVSerror(Exception): - pass -class alleleVariantError(Exception): - pass - class Mixin(vvMixinInit.Mixin): @@ -2073,7 +2068,7 @@ def merge_hgvs_3pr(self, hgvs_variant_list,hn): hgvs_v = self.vm.c_to_n(hgvs_v) h_list.append(hgvs_v) except: - raise mergeHGVSerror("Unable to map from c. position to absolute position") + raise fn.mergeHGVSerror("Unable to map from c. position to absolute position") elif hgvs_v.type == 'g': h_list.append(hgvs_v) if h_list != []: @@ -2091,9 +2086,9 @@ def merge_hgvs_3pr(self, hgvs_variant_list,hn): # No intronic positions try: if hgvs_v.posedit.pos.start.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") + raise fn.mergeHGVSerror("Base-offset position submitted") if hgvs_v.posedit.pos.end.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") + raise fn.mergeHGVSerror("Base-offset position submitted") except AttributeError: pass @@ -2106,7 +2101,7 @@ def merge_hgvs_3pr(self, hgvs_variant_list,hn): type = hgvs_v.type else: if hgvs_v.ac != accession: - raise mergeHGVSerror("More than one reference sequence submitted") + raise fn.mergeHGVSerror("More than one reference sequence submitted") else: pass @@ -2121,7 +2116,7 @@ def merge_hgvs_3pr(self, hgvs_variant_list,hn): else: # ! hgvs_v.posedit.pos.start.base !> if hgvs_v.posedit.pos.start.base <= merge_end_pos: - raise mergeHGVSerror("Submitted variants are out of order or their ranges overlap") + raise fn.mergeHGVSerror("Submitted variants are out of order or their ranges overlap") else: # Create a fake variant to handle the missing sequence ins_seq = self.sf.fetch_seq(hgvs_v.ac, merge_end_pos, hgvs_v.posedit.pos.start.base - 1) @@ -2187,7 +2182,7 @@ def merge_hgvs_5pr(self, hgvs_variant_list): hgvs_v = self.vm.c_to_n(hgvs_v) h_list.append(hgvs_v) except: - raise mergeHGVSerror("Unable to map from c. position to absolute position") + raise fn.mergeHGVSerror("Unable to map from c. position to absolute position") if h_list != []: hgvs_variant_list = copy.deepcopy(h_list) @@ -2203,9 +2198,9 @@ def merge_hgvs_5pr(self, hgvs_variant_list): try: # No intronic positions if hgvs_v.posedit.pos.start.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") + raise fn.mergeHGVSerror("Base-offset position submitted") if hgvs_v.posedit.pos.end.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") + raise fn.mergeHGVSerror("Base-offset position submitted") except AttributeError: pass @@ -2218,7 +2213,7 @@ def merge_hgvs_5pr(self, hgvs_variant_list): type = hgvs_v.type else: if hgvs_v.ac != accession: - raise mergeHGVSerror("More than one reference sequence submitted") + raise fn.mergeHGVSerror("More than one reference sequence submitted") else: pass @@ -2233,7 +2228,7 @@ def merge_hgvs_5pr(self, hgvs_variant_list): else: # ! hgvs_v.posedit.pos.start.base !> if hgvs_v.posedit.pos.start.base <= merge_end_pos: - raise mergeHGVSerror("Submitted variants are out of order or their ranges overlap") + raise fn.mergeHGVSerror("Submitted variants are out of order or their ranges overlap") else: # Create a fake variant to handle the missing sequence ins_seq = self.sf.fetch_seq(hgvs_v.ac, merge_end_pos, hgvs_v.posedit.pos.start.base - 1) @@ -2301,7 +2296,7 @@ def merge_pseudo_vcf(self, vcf_list, genome_build, hn): """ - def hgvs_alleles(self, variant_description): + def hgvs_alleles(self, variant_description,hn): try: # Split up the description accession, remainder = variant_description.split(':') @@ -2309,7 +2304,7 @@ def hgvs_alleles(self, variant_description): if re.search('[gcn]\.\d+\[', remainder): # NM_004006.2:c.2376[G>C];[(G>C)] # if re.search('\(', remainder): - # raise alleleVariantError('Unsupported format ' + remainder) + # raise fn.alleleVariantError('Unsupported format ' + remainder) # NM_004006.2:c.2376[G>C];[G>C] type, remainder = remainder.split('.') pos = re.match('\d+', remainder) @@ -2371,7 +2366,7 @@ def hgvs_alleles(self, variant_description): # NM_004006.2:c.[2376G>C];[?] continue merge = [] - allele = str(self.merge_hgvs_3pr(each_allele)) + allele = str(self.merge_hgvs_3pr(each_allele,hn)) merge.append(allele) merged_alleles.append(merge) my_alleles = merged_alleles @@ -2380,7 +2375,7 @@ def hgvs_alleles(self, variant_description): # If statement for uncertainties # NM_004006.2:c.[296T>G;476C>T];[476C>T](;)1083A>C if re.search('\[', remainder): - raise alleleVariantError('Unsupported format ' + type + '.' + remainder) + raise fn.alleleVariantError('Unsupported format ' + type + '.' + remainder) # NM_004006.2:c.2376G>C(;)3103del # NM_000548.3:c.3623_3647del(;)3745_3756dup alleles = remainder.split('(;)') @@ -2396,7 +2391,7 @@ def hgvs_alleles(self, variant_description): else: # If statement for uncertainties if re.search('\(', remainder): - raise alleleVariantError('Unsupported format ' + type + '.' + remainder) + raise fn.alleleVariantError('Unsupported format ' + type + '.' + remainder) # NM_004006.2:c.[2376G>C];[3103del] # NM_004006.2:c.[2376G>C];[3103del] # NM_004006.2:c.[296T>G;476C>T;1083A>C];[296T>G;1083A>C] @@ -2421,7 +2416,7 @@ def hgvs_alleles(self, variant_description): # NM_004006.2:c.[2376G>C];[?] continue merge = [] - allele = str(self.merge_hgvs_3pr(each_allele)) + allele = str(self.merge_hgvs_3pr(each_allele,hn)) merge.append(allele) merged_alleles.append(merge) my_alleles = merged_alleles @@ -2439,7 +2434,7 @@ def hgvs_alleles(self, variant_description): import traceback exc_type, exc_value, last_traceback = sys.exc_info() te = traceback.format_exc() - raise alleleVariantError(str(e)) + raise fn.alleleVariantError(str(e)) # Covert chromosomal HGVS description to RefSeqGene def chr_to_rsg(self, hgvs_genomic, hn, vrOld): diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index dd86fe69..d9e2dcdf 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -387,7 +387,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ref_type = self.db.ref_type_assign(accession) if re.match('LRG_', accession): if ref_type == ':g.': - accession = self.db.get.get_refseqgeneId_from_lrgID(accession) + accession = self.db.get.get_RefSeqGeneID_from_lrgID(accession) else: accession = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) else: @@ -806,7 +806,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input) or re.match( '^LRG_\d+:n.', input): lrg_reference, variation = input.split(':') - refseqgene_reference = self.db.get.get_refseqgeneId_from_lrgID(lrg_reference) + refseqgene_reference = self.db.get.get_RefSeqGeneID_from_lrgID(lrg_reference) if refseqgene_reference != 'none': input = refseqgene_reference + ':' + variation if caution == '': @@ -832,7 +832,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass try: # Submit to allele extraction function - alleles = self.hgvs_alleles(input, self.hp, self.vr, hn, self.vm, self.sf) + alleles = self.hgvs_alleles(input,hn) validation['warnings'] = validation[ 'warnings'] + ': ' + 'Automap has extracted possible variant descriptions' logger.resub('Automap has extracted possible variant descriptions, resubmitting') @@ -851,6 +851,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'warnings'] + ': ' + 'Intronic positions not supported for HGVS Allele descriptions' logger.warning('Intronic positions not supported for HGVS Allele descriptions') continue + elif re.search("No transcript definition for ",str(e)): + validation['warnings'] = validation[ + 'warnings'] + ': ' + str(e) + logger.warning(str(e)) + continue else: raise VariantValidatorError(str(e)) logger.trace("HVGS String allele parsing pass 1 complete", validation) @@ -1072,7 +1077,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr str(input_parses)) or re.match( '^LRG_\d+:c.', str(input_parses)) or re.match('^LRG_\d+:n.', str(input_parses)): lrg_reference, variation = str(input_parses).split(':') - refseqgene_reference = self.db.get.get_refseqgeneId_from_lrgID(lrg_reference) + refseqgene_reference = self.db.get.get_RefSeqGeneID_from_lrgID(lrg_reference) if refseqgene_reference != 'none': input_parses.ac = refseqgene_reference variant = str(input_parses) From 79fa4856b78ee6f05d8ffeebb7c05bf94bec1eb3 Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 16:17:21 +0000 Subject: [PATCH 017/223] Adjusted version in config --- VariantValidator/configuration/config.ini | 2 +- VariantValidator/modules/defaultConfig.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/VariantValidator/configuration/config.ini b/VariantValidator/configuration/config.ini index ea820deb..5f6e4aa0 100644 --- a/VariantValidator/configuration/config.ini +++ b/VariantValidator/configuration/config.ini @@ -1,5 +1,5 @@ [variantValidator] -version = 0.1.0_dev_pre_a +version = 1.0 release_date = tbc [mysql] diff --git a/VariantValidator/modules/defaultConfig.ini b/VariantValidator/modules/defaultConfig.ini index c719865d..ddcf59a7 100644 --- a/VariantValidator/modules/defaultConfig.ini +++ b/VariantValidator/modules/defaultConfig.ini @@ -1,5 +1,5 @@ [variantValidator] -version = 0.1.0_dev_pre_a +version = 0.9 release_date = tbc [mysql] From a366550e38561a79e4c7009c44f053cd7412ff0c Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 16:18:49 +0000 Subject: [PATCH 018/223] Changed default config --- VariantValidator/modules/defaultConfig.ini | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/VariantValidator/modules/defaultConfig.ini b/VariantValidator/modules/defaultConfig.ini index ddcf59a7..c8ae0d47 100644 --- a/VariantValidator/modules/defaultConfig.ini +++ b/VariantValidator/modules/defaultConfig.ini @@ -17,7 +17,14 @@ version = uta_20180821 location = postgresql://uta_admin:uta_admin@127.0.0.1/uta/ [logging] -string = info console +#Levels control verbosity and can be set to "critical" "error" "warning" "info" or "debug". +level = info +#level = debug +console = true +# Beware - file logging has permission issues. +file = false +# Trace is used for debugging to track variants through the validator function +trace = false [EntrezID] entrezid = admin@variantvalidator.org From 9ea1147b74e71a7001cf0ced3f4f41399168af90 Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 16:23:45 +0000 Subject: [PATCH 019/223] Changed logstring setting --- VariantValidator/modules/vvMixinInit.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 57cd5a91..9a01ace8 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -77,7 +77,17 @@ def __init__(self): config=RawConfigParser(allow_no_value=True) config.read(configPath) # The custom vvLogging module will set itself up using the VALDIATOR_DEBUG environment variable. - logString = config["logging"]['string'] + levelString = config["logging"]['level'] + consoleString = config["logging"]['console'] + if consoleString.lower()=="true": + consoleString="console" + fileString = config["logging"]['file'] + if fileString.lower()=="true": + fileString="file" + traceString = config["logging"]['trace'] + if traceString.lower()=="true": + traceString="trace" + logString = levelString+" "+consoleString+" "+fileString+" "+traceString os.environ["VALIDATOR_DEBUG"] = logString # Handle databases From 7164d6fa15df75d759c97bfe5107d4d2701436e9 Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 16:25:51 +0000 Subject: [PATCH 020/223] Fixed myConfig function in validator object --- VariantValidator/modules/vvMixinInit.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 9a01ace8..4b56cdbf 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -185,7 +185,10 @@ def myConfig(self): #Returns configuration: #version, hgvs version, uta schema, seqrepo db. return { - "Seqrepo path":self.seqrepoPath + 'variantvalidator_version': self.version + 'variantvalidator_hgvs_version': self.hgvsVersion + 'uta_schema': self.utaSchema + 'seqrepo_db': self.seqrepoPath } def createConfig(self,outPath): # This function reads from the default configuration file stored in the same folder as this module, From 7ee40c3fc3961140f864ed672314c9a6e69121a1 Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 16:55:28 +0000 Subject: [PATCH 021/223] Added pytest script --- VariantValidator/modules/vvMixinCore.py | 18 +++--- VariantValidator/modules/vvMixinInit.py | 6 +- VariantValidator/modules/vvTestSave.py | 2 +- VariantValidator/testing/test_vv.py | 70 ----------------------- VariantValidator/testing/vvTestCompare.py | 11 ---- 5 files changed, 14 insertions(+), 93 deletions(-) delete mode 100644 VariantValidator/testing/test_vv.py delete mode 100644 VariantValidator/testing/vvTestCompare.py diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index d9e2dcdf..6b560456 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -8223,15 +8223,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logString = logger.getString() for l in logger.getString().split("\n"): logs.append(l) - metadata["logs"] = logString + + if os.environ.get("ADD_LOGS")=="True": + metadata["logs"] = logs metadata["variant"] = batch_variant - metadata["assembly"] = selected_assembly - metadata["transcripts"] = select_transcripts - metadata['seqrepo_directory'] = self.seqrepoPath - metadata['uta_url'] = self.utaPath - metadata['py_liftover_directory'] = self.liftoverPath - metadata['variantvalidator_data_url'] = self.db.path - metadata['entrez_id'] = self.entrezID + #metadata["assembly"] = selected_assembly + #metadata["transcripts"] = select_transcripts + #metadata['seqrepo_directory'] = self.seqrepoPath + #metadata['uta_url'] = self.utaPath + #metadata['py_liftover_directory'] = self.liftoverPath + #metadata['variantvalidator_data_url'] = self.db.path + #metadata['entrez_id'] = self.entrezID metadata['variantvalidator_version'] = self.version metadata['variantvalidator_hgvs_version'] = self.hgvsVersion metadata['uta_schema'] = self.utaSchema diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 4b56cdbf..eb311838 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -185,9 +185,9 @@ def myConfig(self): #Returns configuration: #version, hgvs version, uta schema, seqrepo db. return { - 'variantvalidator_version': self.version - 'variantvalidator_hgvs_version': self.hgvsVersion - 'uta_schema': self.utaSchema + 'variantvalidator_version': self.version, + 'variantvalidator_hgvs_version': self.hgvsVersion, + 'uta_schema': self.utaSchema, 'seqrepo_db': self.seqrepoPath } def createConfig(self,outPath): diff --git a/VariantValidator/modules/vvTestSave.py b/VariantValidator/modules/vvTestSave.py index 881ab056..a6e31d1f 100644 --- a/VariantValidator/modules/vvTestSave.py +++ b/VariantValidator/modules/vvTestSave.py @@ -5,5 +5,5 @@ import os val=Validator() - +os.environ["ADD_LOGS"]="True" fn.generateTestFolder("testOutputsReworked","inputVariants.txt",val) diff --git a/VariantValidator/testing/test_vv.py b/VariantValidator/testing/test_vv.py deleted file mode 100644 index d8860f6e..00000000 --- a/VariantValidator/testing/test_vv.py +++ /dev/null @@ -1,70 +0,0 @@ -#Why isn't this discovered I wonder. -import os -import pytest -import VariantValidator.testing.vvTestFunctions as fn - -class vvHub(): - #Variant validator configuration hub object - def __init__(self): - seqrepo_current_version='2018-08-21' - HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version - os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR - self.hvgsSeqrepoPath=HGVS_SEQREPO_DIR - uta_current_version='uta_20180821' - UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version - os.environ['UTA_DB_URL']=UTA_DB_URL - self.utaPath=UTA_DB_URL - import VariantValidator.variantanalyser.vvLogging as vvLogging - self.logger=vvLogging.logger - from VariantValidator import variantValidator as vv - self.vv=vv - self.vv.my_config() - -inputVariants=fn.loadVariantFile("VariantValidator/testing/inputVariants.txt") - -''' -print("Configuring for personal linux") -seqrepo_current_version='2018-08-21' -HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version -os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR -uta_current_version='uta_20180821' -UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version -os.environ['UTA_DB_URL']=UTA_DB_URL -from VariantValidator import variantValidator as vv -vv.my_config() -''' - -def constructHub(): - hub=vvHub() - return hub - -@pytest.fixture(params=["NC_000012.11:g.122064777C>A","NC_000012.11:g.122064776delG","NC_000012.11:g.122064776dupG"])#inputVariants[:]) -def constructValidation(request): - hub=constructHub() -# print request.param - selectTranscripts='all' - selectedAssembly='GRCh37' - return hub,hub.vv.validator(request.param,selectedAssembly,selectTranscripts) - -def test_validation_output(constructValidation): - hub,v=constructValidation - assert v!=None - -def test_validation_errors(constructValidation): - hub,v=constructValidation - logs=v["metadata"]["logs"].split("\n") - e=0 - for l in logs: - if "ERROR:" in l: - e+=1 - assert e==0 - -def test_validation_criticals(constructValidation): - hub,v=constructValidation - logs=v["metadata"]["logs"].split("\n") - c=0 - for l in logs: - if "CRIT:" in l: - c+=1 - assert c==0 - diff --git a/VariantValidator/testing/vvTestCompare.py b/VariantValidator/testing/vvTestCompare.py deleted file mode 100644 index 65992bef..00000000 --- a/VariantValidator/testing/vvTestCompare.py +++ /dev/null @@ -1,11 +0,0 @@ -#Saving script - -import vvTestFunctions as fn - -masterDirectory="testOutputsMasterITS" -testDirectories=["testOutputsBuran"] - -for d in testDirectories: - print("Comparing "+masterDirectory+" and "+d) - fn.compareBatches(masterDirectory,d) - From 244851f438675ad738a5814689f05ea00cedfe5d Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 16:56:07 +0000 Subject: [PATCH 022/223] For real this time --- VariantValidator/modules/inputVariants.txt | 334 +++++++++++++++++++++ VariantValidator/modules/test_vv.py | 54 ++++ VariantValidator/modules/vvTestCompare.py | 11 + 3 files changed, 399 insertions(+) create mode 100644 VariantValidator/modules/inputVariants.txt create mode 100644 VariantValidator/modules/test_vv.py create mode 100644 VariantValidator/modules/vvTestCompare.py diff --git a/VariantValidator/modules/inputVariants.txt b/VariantValidator/modules/inputVariants.txt new file mode 100644 index 00000000..8dd63cdc --- /dev/null +++ b/VariantValidator/modules/inputVariants.txt @@ -0,0 +1,334 @@ +NM_000088.3:c.589G>T +NM_015120.4:c.35T>C +NM_015120.4:c.39G>C +NM_015120.4:c.34C>T +NC_000002.11:g.73613030C>T +NC_000023.10:g.33229673A>T +NM_001145026.1:c.715A>G +NC_000016.9:g.2099572TC>T +NM_000088.3:c.589GG>CT +NM_000094.3:c.6751-2_6751-3del +COL5A1:c.5071A>T +NG_007400.1:c.5071A>T +chr16:15832508_15832509delinsAC +NM_000088.3:c.589-1GG>G +NM_000088.3:c.642+1GT>G +NM_000088.3:c.589-2AG>G +NC_000017.10:g.48279242G>T +NM_000500.7:c.-107-19C>T +NM_000518.4:c.-130C>T +NM_000518.4:c.-50-80C>T +NM_000518.4:c.316_*342delinsCTACTT +NM_000518.4:c.316_*100del +NM_000518.4:c.*2000C>T +NM_000518.4:c.*132+1868C>T +NM_000518.4:c.-130_*2000= +NM_000518.4:c.-50-80_*132+1868= +NR_138595.1:n.-810C>T +NR_138595.1:n.1-810C>T +NR_138595.1:n.1071+1A= +NR_138595.1:n.-810_1071+1= +NC_000017.10:g.48261457_48261463TTATGTT= +NC_000017.10:g.48275363C>A +NM_000088.3:c.589-1G>T +NM_000088.3:c.591_593inv +11-5248232-T-A +NG_007400.1(NM_000088.3):c.589-1G>T +1:150550916G>A +1-150550916-G-A +NG_008123.1(LEPRE1_v003):c.2055+18G>A +NG_008123.1:c.2055+18G>A +NG_008123.1(NM_022356.3):c.2055+18G>A +NM_021983.4:c.490G>C +NM_032470.3:c.4del +NM_001194958.2:c.20C>A +NM_000022.2:c.534A>G +HSCHR6_MHC_SSTO_CTG1-3852542-C-G +NM_000368.4:c.363+1dupG +NM_000368.4:c.363dupG +NM_000089.3:c.1033_1035delGTT +NM_000089.3:c.1035_1035+2delTGT +NM_000088.3:c.2023_2028delGCAAGA +NM_000089.3:c.938-1delG +NM_000088.3:c.589G= +NM_000088.3:c.642A= +NM_000088.3:c.642+1GG>G +NM_000088.3:c.589-2GG>G +NM_000088.3:c.589-6_589-5insTTTT +NM_000088.3:c.642+3_642+4insAAAA +NM_000088.3:c.589-4_589-3insTT +NM_000088.3:c.589-8del +NM_000527.4:c.-187_-185delCTC +NM_206933.2:c.6317C>G +NC_000013.10:g.32929387T>C +NM_015102.3:c.2818-2T>A +19-41123094-G-GG +15-72105928-AC-A +12-122064773-CCCGCCA-C +12-122064774-CCGCCA-CCGCCA +12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC +NC_000012.11:g.122064777C>A +NC_000012.11:g.122064776delG +NC_000012.11:g.122064776dupG +NC_000012.11:g.122064776_122064777insTTT +NC_000012.11:g.122064772_122064775del +NC_000012.11:g.122064772_122064775dup +NC_000012.11:g.122064773_122064774insTTTT +NC_000012.11:g.122064772_122064777del +NC_000012.11:g.122064772_122064777dup +NC_000012.11:g.122064779_122064782dup +NC_000012.11:g.122064772_122064782del +NC_000002.11:g.95847041_95847043GCG= +NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG= +NC_000003.11:g.14561629_14561630GC= +NC_000003.11:g.14561629_14561630insG +NC_000004.11:g.140811111_140811122del +NC_000004.11:g.140811111_140811122CTGCTGCTGCTG= +NC_000004.11:g.140811117_140811122del +NC_000004.11:g.140811111_140811117del +NC_000004.11:g.140811117C>A +NC_000002.11:g.73675227_73675228insCTC +9-136132908-T-TC +9-136132908-TAC-TCA +9-136132908-TA-TA +NM_020469.2:c.258delG +NM_020469.2:c.260_262TGA= +NM_020469.2:c.261delG +NM_020469.2:c.261dupG +NM_020469.2:c.261_262insTT +NC_000019.10:g.50378563_50378564insTAC +NC_000019.10:g.50378563_50378564insC +NC_000019.10:g.50378564_50378565insTACA +NC_000019.10:g.50378565_50378567dup +NC_000019.10:g.50378563_50378564= +NC_000019.10:g.50378563_50378564insTCGG +NC_000019.10:g.50378563delinsTTAC +NC_000019.10:g.50378563_50378564insTAAC +NC_000019.10:g.50378562_50378565del +NC_000019.10:g.50378562_50378565delinsTC +NC_000007.14:g.149779575_149779577delinsT +NC_000007.14:g.149779575_149779577= +NC_000007.14:g.149779576_149779578del +NC_000007.14:g.149779577del +NC_000007.14:g.149779573_149779579del +NC_000007.14:g.149779573_149779579delinsCA +NM_000088.3:c.590_591inv +NM_024989.3:c.1778_1779inv +NM_032815.3:c.555_556inv +NM_006138.4:c.3_4inv +NM_000038.5:c.3927_3928delAAinsTT +NM_001034853.1:c.2847_2848delAGinsCT +NM_000088.3:c.4392_*2inv +NM_000088.3:c.4392_*5inv +NM_000088.3:c.4390_*7inv +NM_005732.3:c.2923-5insT +NM_198283.1(EYS):c.*743120C>T +NM_133379.4(TTN):c.*265+26591C>T +NM_000088.3:c.589-2_589-1AG>G +NM_000088.3:c.642+1_642+2delGTinsG +NM_004415.3:c.1-1insA +NM_004415.3:c.-1_1insA +NM_000273.2:c.1-5028_253del +NM_002929.2:c.1006C>T +NR_125367.1:n.167+18165G>A +NM_006005.3:c.3071_3073delinsTTA +NM_000089.3:n.1504_1506del +NC_012920.1:m.1011C>T +NC_000006.11:g.90403795G= +1-169519049-T-. +NC_000005.9:g.35058667_35058668AG= +NM_000251.1:c.1296_1348del +NM_000088.3:c.2023_2028del +NM_000088.3:c.2024_2028+1del +ENST00000450616.1:n.31+1G>C +ENST00000491747:c.5071A>T +NG_007400.1:g.8638G>T +LRG_1:g.8638G>T +LRG_1t1:c.589G>T +chr16:g.15832508_15832509delinsAC +NG_012386.1:g.24048dupG +NM_033517.1:c.1307_1309delCGA +HG1311_PATCH-33720-CCGA-C +2-73675227-TCTC-TCTCCTC +2-73675227-TC-TC +3-14561627-AG-AGG +3-14561630-CC-CC +6-90403795-G-G +6-90403795-G-A +6-32012992-CG-C +17-48275363-C-A +17-48275364-C-A +17-48275359-GGA-TCC +7-94039128-CTTG-C +9-135800972-AC-ACC +1-43212925-C-T +HG987_PATCH-355171-C-A +20-43252915-T-C +1-216219781-A-C +2-209113113-G-A,C,T +NC_000005.9:g.35058665_35058666CA= +NC_000002.11:g.73675227_73675229delTCTinsTCTCTC +NM_000828.4:c.-2dupG +X-122318386-A-AGG +NM_000828.4:c.-2G>T +NM_000828.4:c.-2G= +X-122318386-A-AT +NM_000828.4:c.-2_-1insT +NM_000828.4:c.-3_-2insT +NM_000828.4:c.-2delGinsTT +NM_000828.4:c.-2_-1delGCinsTT +NM_000828.4:c.-3_-2delAGinsTT +15-72105929-C-C +15-72105928-AC-ATT +15-72105928-ACC-ATT +15-72105927-GACC-GTT +19-41123093-A-AG +19-41123093-A-AT +19-41123093-AG-A +19-41123093-AG-AG +NM_012309.4:c.913-5058G>A +LRG_199t1:c.2376[G>C];[G>C] +LRG_199t1:c.[2376G>C];[3103del] +LRG_199t1:c.[4358_4359del;4361_4372del] +LRG_199t1:c.2376G>C(;)3103del +LRG_199t1:c.2376[G>C];[(G>C)] +LRG_199t1:c.[2376G>C];[?] +LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C +LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del +LRG_199t1:c.[976-20T>A;976-17_976-1dup] +1-5935162-A-T +1-12065948-C-T +1-46655125-CTCAC-C +1-68912523-TGAGCCAGAG-T +1-68912526-GCCAGAG-G +1-109817590-G-T +1-145597475-GAAGT-G +1-153791300-CTG-C +1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC +1-156108541-G-GG +1-161279695-T-A +1-169519049-T-T +1-226125468-G-A +10-89623035-CGCA-C +11-62457852-C-A +11-108178710-A-AT +11-111735981-G-A +12-11023080-C-A +12-22018712-TC-T +12-52912946-T-C +12-103234292-TC-T +12-103311124-T-C +12-111064166-G-A +12-123738430-CA-C +13-31789169-CT-C +14-62187287-G-A +14-62188231-TT-GA +14-63174827-C-A +15-42680000-CA-C +15-42680000-CA-CAA +15-42703179-T-TTCA +15-42703179-TAG-TTCATCT +15-48782203-C-T +15-72105929-CC-C +15-89873415-G-A +16-2103394-C-T +16-3779300-C-G +16-5128843-C-G +16-74808559-C-T +16-89574804-C-A +16-89574826-A-C +16-89574914-G-GT +16-89574916-C-CGTC +16-89575009-G-A +16-89575040-C-A,CA +16-89576896-A-C +16-89576930-T-TA,TT +16-89576931-G-GTG +16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C +16-89613064-AGGAGAGGCG-AT +16-89613069-AGGCGGGAGA-AT +16-89613145-C-T +17-7578194-GCAC-G +17-7578523-T-TG +17-17119692-A-C +17-41197588-GGACA-G +17-41256884-C-G +17-42991428-C-A +17-48252809-A-T +17-62022709-G-GTC +17-62022711-C-CT +17-62023005-G-GGC +17-62023006-C-A +17-62034787-G-A +18-24128261-GTCCTCC-G +19-15291774-G-A +19-15311794-A-G +19-39076592-G-A +2-50149352-T-C +2-50847195-G-A +2-71825797-C-G +2-166179712-G-C +2-166183371-A-G +2-166929889-GTCCAGGTCCT-GAC +2-166929891-CCAGGTCCT-C +2-179393504-G-T +2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T +2-201950249-G-T +2-238268730-C-A +21-43897396-C-T +22-30064360-G-GCGACGC +3-10188187-TGTCCCGATAG-T +3-50402127-T-G +3-50402890-G-A +3-57851007-AG-A +3-122003832-G-C +4-153332910-C-CAGG +5-1295183-G-A +5-77396835-TTTC-T +5-118811422-GGTGA-G +5-118811422-GGTGAG-G +5-131705587-CG-C +5-148406482-T-C +6-110036337-T-TCAG +6-110036337-TGAT-T +6-152651802-C-A +6-152737643-C-G +7-6026775-T-C +7-55242465-GGAATTAAGAGAAGCA-G +7-55248992-T-TTCCAGGAAGCCT +7-75932111-C-A +7-91652178-A-AAAC +7-117199644-ATCT-A +7-140453136-AC-CT +7-140453136-A-T +7-140453137-C-T +7-143013488-A-T +7-143018934-G-A +7-143048771-C-T +8-1871951-C-T +9-13112056-T-TG +9-21971208-C-A +9-35683240-T-TG +9-135796754-G-A +HG536_PATCH-10391-AC-A +HG865_PATCH-33547-G-A +HG865_PATCH-569441-G-T +HG865_PATCH-574546-C-T +HSCHR1_1_CTG31-133178-TAG-T +HSCHR6_MHC_MANN_CTG1-3848158-T-G +HSCHR6_MHC_MANN_CTG1-3851043-C-A +X-70443101-C-T +X-107845202-GACCACC-GACC,G +X-153296777-G-A +NM_198180.2:c.408_410delGTG +NM_080877.2:c.1733_1735delinsTTT +NM_080877.2:c.1735_1737delinsTGA +NM_080877.2:c.1735_1737delinsTAATTGTTC +NM_080877.2:c.1737delinsATTGTTC +NM_000088.3:c.4392_*2delinsAGAG +NM_000088.3:c.589_591delinsAGAAGC +NM_000885.5:c.*2536delinsAGAAAAATCA +NM_002693.2:c.-186_-185delinsCC +NG_009616.1:g.29052_29053insCTACATAG +NM_000061.2:c.588_588+1insCTACATAG +NM_000061.2:c.588_589insCTACATAG diff --git a/VariantValidator/modules/test_vv.py b/VariantValidator/modules/test_vv.py new file mode 100644 index 00000000..fe67e290 --- /dev/null +++ b/VariantValidator/modules/test_vv.py @@ -0,0 +1,54 @@ +#Why isn't this discovered I wonder. +import os +import pytest +import vvTestFunctions as fn +from vvObjects import Validator + +inputVariants=fn.loadVariantFile("VariantValidator/modules/inputVariants.txt") + +''' +print("Configuring for personal linux") +seqrepo_current_version='2018-08-21' +HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version +os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR +uta_current_version='uta_20180821' +UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version +os.environ['UTA_DB_URL']=UTA_DB_URL +from VariantValidator import variantValidator as vv +vv.my_config() +''' + +def constructVal(): + val=Validator() + return val + +@pytest.fixture(params=inputVariants[:]) +def constructValidation(request): + val=constructVal() +# print request.param + selectTranscripts='all' + selectedAssembly='GRCh37' + return val,val.validate(request.param,selectedAssembly,selectTranscripts) + +def test_validation_output(constructValidation): + val,v=constructValidation + assert v!=None + +def test_validation_errors(constructValidation): + val,v=constructValidation + logs=v["metadata"]["logs"].split("\n") + e=0 + for l in logs: + if "ERROR:" in l: + e+=1 + assert e==0 + +def test_validation_criticals(constructValidation): + val,v=constructValidation + logs=v["metadata"]["logs"].split("\n") + c=0 + for l in logs: + if "CRIT:" in l: + c+=1 + assert c==0 + diff --git a/VariantValidator/modules/vvTestCompare.py b/VariantValidator/modules/vvTestCompare.py new file mode 100644 index 00000000..65992bef --- /dev/null +++ b/VariantValidator/modules/vvTestCompare.py @@ -0,0 +1,11 @@ +#Saving script + +import vvTestFunctions as fn + +masterDirectory="testOutputsMasterITS" +testDirectories=["testOutputsBuran"] + +for d in testDirectories: + print("Comparing "+masterDirectory+" and "+d) + fn.compareBatches(masterDirectory,d) + From 01d797801b4e434be0a21d89278869f2e2e021f0 Mon Sep 17 00:00:00 2001 From: buran Date: Mon, 21 Jan 2019 16:58:05 +0000 Subject: [PATCH 023/223] Added a bunch of files to the disposal folder --- VariantValidator/__init__.py | 21 - VariantValidator/compile_lrg_data.py | 132 - VariantValidator/configuration/__init__.py | 27 - VariantValidator/configuration/config.ini | 33 - VariantValidator/external.py | 114 - .../mysql_refSeqGene_noMissmatch.py | 279 - VariantValidator/output_formatter.py | 49 - VariantValidator/ref_seq_type.py | 55 - VariantValidator/testing/__init__.py | 20 - VariantValidator/testing/inputVariants.txt | 334 - .../testing/mergeInputVariants.py | 15 - VariantValidator/testing/vvTestFunctions.py | 203 - VariantValidator/testing/vvTestSave.py | 9 - .../testing/vvTestSaveOutput101.txt | 552 -- VariantValidator/variantValidator.py | 8716 ----------------- VariantValidator/variantanalyser/__init__.py | 30 - VariantValidator/variantanalyser/batch.py | 39 - .../variantanalyser/dbControls/__init__.py | 19 - .../variantanalyser/dbControls/data.py | 293 - .../dbControls/dbConnection.py | 55 - .../variantanalyser/dbControls/dbconfig.py | 51 - .../variantanalyser/dbControls/dbfetchall.py | 62 - .../variantanalyser/dbControls/dbfetchone.py | 127 - .../variantanalyser/dbControls/dbinsert.py | 128 - .../variantanalyser/dbControls/dbquery.py | 55 - .../variantanalyser/dbControls/dbupdate.py | 109 - .../dbControls/mysql_error.txt | 0 VariantValidator/variantanalyser/functions.py | 3043 ------ VariantValidator/variantanalyser/g_to_g.py | 315 - VariantValidator/variantanalyser/gap_genes.py | 581 -- VariantValidator/variantanalyser/hgvs2vcf.py | 833 -- VariantValidator/variantanalyser/liftover.py | 344 - VariantValidator/variantanalyser/links.py | 1191 --- .../variantanalyser/loggingSetup.py | 59 - .../variantanalyser/mysql_error.txt | 0 .../variantanalyser/pseudo_vcf2hgvs.py | 229 - .../supported_chromosome_builds.py | 2866 ------ VariantValidator/variantanalyser/vvLogging.py | 144 - 38 files changed, 21132 deletions(-) delete mode 100644 VariantValidator/__init__.py delete mode 100644 VariantValidator/compile_lrg_data.py delete mode 100644 VariantValidator/configuration/__init__.py delete mode 100644 VariantValidator/configuration/config.ini delete mode 100644 VariantValidator/external.py delete mode 100644 VariantValidator/mysql_refSeqGene_noMissmatch.py delete mode 100644 VariantValidator/output_formatter.py delete mode 100644 VariantValidator/ref_seq_type.py delete mode 100644 VariantValidator/testing/__init__.py delete mode 100644 VariantValidator/testing/inputVariants.txt delete mode 100644 VariantValidator/testing/mergeInputVariants.py delete mode 100644 VariantValidator/testing/vvTestFunctions.py delete mode 100644 VariantValidator/testing/vvTestSave.py delete mode 100644 VariantValidator/testing/vvTestSaveOutput101.txt delete mode 100644 VariantValidator/variantValidator.py delete mode 100644 VariantValidator/variantanalyser/__init__.py delete mode 100644 VariantValidator/variantanalyser/batch.py delete mode 100644 VariantValidator/variantanalyser/dbControls/__init__.py delete mode 100644 VariantValidator/variantanalyser/dbControls/data.py delete mode 100644 VariantValidator/variantanalyser/dbControls/dbConnection.py delete mode 100644 VariantValidator/variantanalyser/dbControls/dbconfig.py delete mode 100644 VariantValidator/variantanalyser/dbControls/dbfetchall.py delete mode 100644 VariantValidator/variantanalyser/dbControls/dbfetchone.py delete mode 100644 VariantValidator/variantanalyser/dbControls/dbinsert.py delete mode 100644 VariantValidator/variantanalyser/dbControls/dbquery.py delete mode 100644 VariantValidator/variantanalyser/dbControls/dbupdate.py delete mode 100644 VariantValidator/variantanalyser/dbControls/mysql_error.txt delete mode 100644 VariantValidator/variantanalyser/functions.py delete mode 100644 VariantValidator/variantanalyser/g_to_g.py delete mode 100644 VariantValidator/variantanalyser/gap_genes.py delete mode 100644 VariantValidator/variantanalyser/hgvs2vcf.py delete mode 100644 VariantValidator/variantanalyser/liftover.py delete mode 100644 VariantValidator/variantanalyser/links.py delete mode 100644 VariantValidator/variantanalyser/loggingSetup.py delete mode 100644 VariantValidator/variantanalyser/mysql_error.txt delete mode 100644 VariantValidator/variantanalyser/pseudo_vcf2hgvs.py delete mode 100644 VariantValidator/variantanalyser/supported_chromosome_builds.py delete mode 100644 VariantValidator/variantanalyser/vvLogging.py diff --git a/VariantValidator/__init__.py b/VariantValidator/__init__.py deleted file mode 100644 index ae2d4523..00000000 --- a/VariantValidator/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- -import configuration - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# - - diff --git a/VariantValidator/compile_lrg_data.py b/VariantValidator/compile_lrg_data.py deleted file mode 100644 index 1129bc9c..00000000 --- a/VariantValidator/compile_lrg_data.py +++ /dev/null @@ -1,132 +0,0 @@ -# -*- coding: utf-8 -*- - -import urllib2 -import re -import variantanalyser.dbControls.data as data - -def update(): - print 'Updating LRG lookup tables' - lr2rs_download = urllib2.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt') - # Open and read - lr2rs_data = urllib2.urlopen(lr2rs_download) - lr2rs = lr2rs_data.read() - # List the data - lr2rs = lr2rs.strip() - lr2rs = lr2rs.split('\n') - - # Download - lrg_status_download = urllib2.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_GRCh38.txt') - # Open and read - lrg_status_data = urllib2.urlopen(lrg_status_download) - lrg_status = lrg_status_data.read() - # List the data - lrg_status = lrg_status.strip() - lrg_status = lrg_status.split('\n') - - # Download - rs2lr_download = urllib2.Request('http://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene') - # Open and read - rs2lr_data = urllib2.urlopen(rs2lr_download) - rs2lr = rs2lr_data.read() - # List the data - rs2lr = rs2lr.strip() - rs2lr = rs2lr.split('\n') - - # Download LRG transcript (_t) to LRG Protein (__p) data file - lr_t2p_downloaded = urllib2.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_proteins_RefSeq.txt') - # Open and read - lr_t2p_data = urllib2.urlopen(lr_t2p_downloaded) - lr_t2p = lr_t2p_data.read() - # List the data - lr_t2p = lr_t2p.strip() - lr_t2p = lr_t2p.split('\n') - - # Dictionary the status by LRG_ID - lrg_status_dict = {} - # Compile dictionary - for line in lrg_status: - if re.search('^#', line): - continue - else: - list = line.split() - lrgID = list[0] - stat = list[2] - lrg_status_dict[lrgID] = stat - - # Required lookup tables - # LRG_ID GeneSymbol RefSeqGeneID status - # LRG_ID RefSeqTranscriptID - # LRG_T2LRG_P - - print 'Update LRG and LRG_transcript lookup tables' - # Populate lists lrg_rs_lookup (LRG to RefSeqGene) and lrg_t2nm_ (LRG Transcript to RefSeq Transcript) - for line in lr2rs: - if re.search('^#', line): - continue - else: - list = line.split() - # Assign objects - lrg_id = list[0] - symbol = list[1] - rsgid = list[2] - lrg_tx = str(list[0]) + str(list[3]) - rstid = list[4] - status = lrg_status_dict[lrg_id] - # pass data to relevant lists - # lrg_rs_lookup - lrg_rs_lookup = [lrg_id, symbol, rsgid, status] - - # update LRG to RefSeqGene database - data.update_lrg_rs_lookup(lrg_rs_lookup) - - # lrg_t2nm_ - lrgtx_to_rstID = [lrg_tx, rstid] - # update database - data.update_lrgt_rst(lrgtx_to_rstID) - - print 'Update LRG protein lookup table' - # Populate LRG protein RefSeqProtein lokup table - for line in lr_t2p: - if re.search('^#', line): - continue - else: - list = line.split() - # Assign objects - lrg_p = list[0] - rs_p = list[1] - # update LRG to RefSeqGene database - data.update_lrg_p_rs_p_lookup(lrg_p, rs_p) - - print 'LRG lookup tables updated' - return - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# - - - - - - - - - - - - - - diff --git a/VariantValidator/configuration/__init__.py b/VariantValidator/configuration/__init__.py deleted file mode 100644 index d04d865b..00000000 --- a/VariantValidator/configuration/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Add the location of the config.ini as an environment varaible -""" - -import os -CONF_ROOT = os.path.dirname(os.path.abspath(__file__)) -os.environ['CONF_ROOT'] = CONF_ROOT - - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# diff --git a/VariantValidator/configuration/config.ini b/VariantValidator/configuration/config.ini deleted file mode 100644 index 5f6e4aa0..00000000 --- a/VariantValidator/configuration/config.ini +++ /dev/null @@ -1,33 +0,0 @@ -[variantValidator] -version = 1.0 -release_date = tbc - -[mysql] -host = 127.0.0.1 -database = validator -user = vvadmin -password = var1ant - -[logging] -# string = error file console trace -string = debug file console trace - -[EntrezID] -entrezid = admin@variantvalidator.org - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# diff --git a/VariantValidator/external.py b/VariantValidator/external.py deleted file mode 100644 index 503fc820..00000000 --- a/VariantValidator/external.py +++ /dev/null @@ -1,114 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Fiunctions that return links to external data -""" - -import output_formatter -from variantanalyser import dbControls as va_dbCrl - - -""" -Provide direct links to reference sequence records -""" - -def get_urls(dict_out): - # Add urls - report_urls = {} - if 'NM_' in dict_out['hgvs_transcript_variant'] or 'NR_' in dict_out['hgvs_transcript_variant']: - report_urls['transcript'] = 'https://www.ncbi.nlm.nih.gov' \ - '/nuccore/%s' % dict_out['hgvs_transcript_variant'].split(':')[0] - if 'NP_' in dict_out['hgvs_predicted_protein_consequence']['slr']: - report_urls['protein'] = 'https://www.ncbi.nlm.nih.gov' \ - '/nuccore/%s' % str(dict_out['hgvs_predicted_protein_consequence']['slr']).split(':')[0] - if 'NG_' in dict_out['hgvs_refseqgene_variant']: - report_urls['refseqgene'] = 'https://www.ncbi.nlm.nih.gov' \ - '/nuccore/%s' % dict_out['hgvs_refseqgene_variant'].split(':')[0] - if 'LRG' in dict_out['hgvs_lrg_variant']: - lrg_id = dict_out['hgvs_lrg_variant'].split(':')[0] - lrg_data = va_dbCrl.data.get_LRG_data_from_LRGid(lrg_id) - lrg_status = str(lrg_data[4]) - if lrg_status == 'public': - report_urls['lrg'] = 'http://ftp.ebi.ac.uk/pub' \ - '/databases/lrgex/%s.xml' % dict_out['hgvs_lrg_variant'].split(':')[0] - else: - report_urls['lrg'] = 'http://ftp.ebi.ac.uk' \ - '/pub/databases/lrgex' \ - '/pending/%s.xml' % dict_out['hgvs_lrg_variant'].split(':')[0] - # Ensembl needs to be added at a later data - # "http://www.ensembl.org/id/" ? What about historic versions????? - - - return report_urls - - - -""" -Returns a link to the external Varsome Website -""" - -def get_varsome_links(vcf_dict, ucsc_build): - # Varsome and UCSC - varsome_url = None - - if not 'hg19' in primary_assembly or not 'hg38' in primary_assembly: - return {'error': 'Unsupported genome build: supported = hg19 and hg38'} - - if primary_assembly == 'hg19': - varsome = "https://varsome.com/variant/hg19/" # %s" %(coding.replace('dup', 'ins')) - if primary_assembly == 'hg38': - varsome = "https://varsome.com/variant/hg38/" # %s" %(coding.replace('dup', 'ins')) - - # Report VCF from hgvs - rp_vcf_component_list = [str(vcf_dict['vcf']['chr']), - str(vcf_dict['vcf']['pos']), - str(vcf_dict['vcf']['ref']), - str(vcf_dict['vcf']['alt']) - ] - vcf_varsome = '-'.join(rp_vcf_component_list) - varsome_external = varsome + vcf_varsome - varsome_url = varsome_external - return varsome_url - - - -""" -Creates a link to add the VV track to the UCSC genome browser -""" -def ucsc_link(pvcf_dashed, hgvs_genomic, ucsc_assembly, intragenic=False): - - if not 'hg19' in ucsc_assembly or not 'hg38' in ucsc_assembly: - return {'error': 'Unsupported genome build: supported = hg19 and hg38'} - - vcf_components = pvcf_dashed.split('-') - vcf_components[0] = ucsc_chromosome - - browser_start = str(final_hgvs_genomic.posedit.pos.start.base - 11) - browser_end = str(final_hgvs_genomic.posedit.pos.end.base + 11) - ucsc_browser_position = '%s:%s-%s' % (ucsc_chromosome, browser_start, browser_end) - remove_genomic_bases = str(output_formatter.remove_reference(hgvs_genomic)) - - ucsc_link = 'http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s&hgt.customText=https://variantvalidator.org/bed/?variant=%s|%s|GRCh37|%s|%s' % ( - ucsc_assembly, ucsc_browser_position, coding, hgvs_genomic.ac, remove_genomic_bases, pvcf_dashed - ) - - return ucsc_link - - - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/mysql_refSeqGene_noMissmatch.py b/VariantValidator/mysql_refSeqGene_noMissmatch.py deleted file mode 100644 index 6797f43b..00000000 --- a/VariantValidator/mysql_refSeqGene_noMissmatch.py +++ /dev/null @@ -1,279 +0,0 @@ -# -*- coding: utf-8 -*- - -import re -import os -import urllib2 -import copy -import variantanalyser -import variantanalyser.dbControls -import variantanalyser.dbControls.data as db_data - - -def update(): - print 'Updating RefSeqGene no Missmatch MySQL data' - # Set os path - # Set up os paths data and log folders - ROOT = os.path.dirname(os.path.abspath(__file__)) - - # Download data from RefSeqGene - # Download data - rsg = urllib2.Request('http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/gene_RefSeqGene') - response = urllib2.urlopen(rsg) - rsg_file = response.read() - rsg_data_line = rsg_file.split('\n') - rsg_data = [] - for data in rsg_data_line: - rsg_data.append(data) - - # Download data - grch37 = urllib2.Request( - 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.25_refseqgene_alignments.gff3') - response = urllib2.urlopen(grch37) - grch37_file = response.read() - grch37_data_line = grch37_file.split('\n') - grch37_align_data = [] - for data in grch37_data_line: - grch37_align_data.append(data) - - # Download data - grch38 = urllib2.Request( - 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.28_refseqgene_alignments.gff3') - response = urllib2.urlopen(grch38) - grch38_file = response.read() - grch38_data_line = grch38_file.split('\n') - grch38_align_data = [] - for data in grch38_data_line: - grch38_align_data.append(data) - - # Open Lists - # rsg_data = open(os.path.join(ROOT, 'gene_RefSeqGene'), 'r') - rsg_id_info = [] - # grch37_align_data = open(os.path.join(ROOT, 'GCF_000001405.25_refseqgene_alignments.gff3'), 'r') - grch37_align = [] - # grch38_align_data = open(os.path.join(ROOT, 'GCF_000001405.28_refseqgene_alignments.gff3'), 'r') - grch38_align = [] - - # Place the required data from each file into a dictionary - hash = re.compile('#') - for line in rsg_data: - if hash.search(line): - pass - else: - line = line.strip() - info = line.split() - if len(info) == 0: - pass - else: - dict = {'symbol': info[2], 'rsg_id': info[3], 'gene_id': info[1]} - rsg_id_info.append(dict) - - # Create dictionary to store RefSeqGene and gene symbol data NOTE RefSeqGene ID stored without version number! - rsg_to_symbol = {} - # Collect the data - for ent in rsg_id_info: - rsg_id = copy.deepcopy(ent['rsg_id']) - rsg_id = rsg_id.split('.')[0] - rsg_to_symbol[rsg_id] = {'symbol': ent['symbol'], 'gene_id': ent['gene_id']} - - # Count total number of NG to NC mappings - total_rsg_to_nc = 0 - total_rsg_to_nc_rejected = 0 - for line in grch37_align_data: - # Count NG_ to NC_ and remove the entries we don't care about! - if re.search('NC_', line) and re.search('NG_', line): - total_rsg_to_nc = total_rsg_to_nc + 1 - else: - continue - if hash.search(line): - pass - elif not re.search('gap_count=0', line): - if re.search('NC_', line) and re.search('NG_', line): - total_rsg_to_nc_rejected = total_rsg_to_nc_rejected + 1 - # print line - pass - else: - line = line.strip() - info = line.split('\t') - if len(info) != 9: - pass - else: - metrics = info[8].split(';') - id_ori = metrics[1].replace('Target=', '') - id_ori_list = id_ori.split() - dict = {'rsg_id': id_ori_list[0], 'chr_id': info[0], 'rsg_start': info[3], 'rsg_end': info[4], - 'ori': id_ori_list[3]} - grch37_align.append(dict) - - for line in grch38_align_data: - if re.search('NC_', line) and re.search('NG_', line): - total_rsg_to_nc = total_rsg_to_nc + 1 - else: - continue - if hash.search(line): - pass - elif not re.search('gap_count=0', line): - if re.search('NC_', line) and re.search('NG_', line): - total_rsg_to_nc_rejected = total_rsg_to_nc_rejected + 1 - # print line - pass - else: - line = line.strip() - info = line.split('\t') - if len(info) != 9: - pass - else: - metrics = info[8].split(';') - id_ori = metrics[1].replace('Target=', '') - id_ori_list = id_ori.split() - dict = {'rsg_id': id_ori_list[0], 'chr_id': info[0], 'rsg_start': info[3], 'rsg_end': info[4], - 'ori': id_ori_list[3]} - grch38_align.append(dict) - - # Create a data array containing the database - db = [] - # map line - for line in grch37_align: - ml = [] - link = line['rsg_id'] - ml.append(link) - ml.append(line['chr_id']) - ml.append('GRCh37') - ml.append(line['rsg_start']) - ml.append(line['rsg_end']) - ml.append(line['ori']) - # Add the additional data from rsg_id_info - for data in rsg_id_info: - if link == data['rsg_id']: - ml.append(data['symbol']) - ml.append(data['gene_id']) - else: - continue - # Create the entry and append to db - db.append(ml) - - for line in grch38_align: - ml = [] - link = line['rsg_id'] - ml.append(link) - ml.append(line['chr_id']) - ml.append('GRCh38') - ml.append(line['rsg_start']) - ml.append(line['rsg_end']) - ml.append(line['ori']) - # Add the additional data from rsg_id_info - for data in rsg_id_info: - if link == data['rsg_id']: - ml.append(data['symbol']) - ml.append(data['gene_id']) - else: - continue - # Create the entry and append to db - db.append(ml) - - # Known missing identifiers - known = { - 'NG_021289.1' : {'symbol' : 'CFAP47', 'gene_id' : '286464'}, - 'NG_027707.1' : {'symbol' : 'DUX4L1', 'gene_id' : '22947'}, - 'NG_033266.1' : {'symbol' : 'DSE', 'gene_id': '29940'}, - 'NG_061543.1' : {'symbol' : 'CYP1A2', 'gene_id': '1544'}, - 'NG_061374.1' : {'symbol' : 'CYP1A1', 'gene_id': '1543'}, - 'NG_059281.1' : {'symbol' : 'HBB', 'gene_id': '3043'}, - 'NG_012639.1' : {'symbol' : 'VHLL', 'gene_id': '391104'}, - 'NG_059186.1' : {'symbol' : 'HBA1', 'gene_id': '3040'}, - 'NG_059271.1' : {'symbol' : 'HBA2', 'gene_id': '3040'} - } - - # Known Obsolete identifiers - obsolete = { - 'NG_016553.1': 'OBSOLETE', - 'NG_012639.1': 'Removed due to questionable status' - } - - # Identify lines with missing data e.g. gene symbols - for line in db: - try: - line[6] - except IndexError: - try: - identifier = copy.deepcopy(line[0]) - identifier = identifier.split('.')[0] - line.append(rsg_to_symbol[identifier]['symbol']) - line.append(rsg_to_symbol[identifier]['gene_id']) - except KeyError: - try: - line.append(known[line[0]]['symbol']) - line.append(known[line[0]]['gene_id']) - except KeyError: - check = obsolete[line[0]] - print str(line[0]) + ' : ' + check - - # Open a text file to be used as a simple database and write the database - # rsg_db = open(os.path.join(ROOT, 'rsg_chr_db.txt'), 'w') - - to_mysql = [] - for line in db: - if line[0] in obsolete.keys(): - continue - # Only gap-less RefSeqGenes will have passed. The rest will be alternatively curated - write = [] - # Take the mapping data - write = copy.deepcopy(line[0:6]) - # add RSG ranges - write.append('1') - end_rsg = int(line[4]) - int(line[3]) + 1 - end_rsg = str(end_rsg) - write.append(end_rsg) - # Create block data chr then rsg - chr_block = str(line[3]) + '-' + str(line[4]) - write.append(chr_block) - rsg_block = str(write[6]) + '-' + str(write[7]) - write.append(rsg_block) - # Add gene ID and Gene symbol(s) - write.append(line[7]) - write.append(line[6]) - # write_me = '\t'.join(write) - # rsg_db.write(write_me + '\n') - del write[6] - to_mysql.append(write) - - # Set up code to write to database - for line in to_mysql: - current_symbol = db_data.get_gene_symbol_from_refSeqGeneID(line[0]) - if line[10] == current_symbol: - pass - else: - if current_symbol != 'none': - line[10] = current_symbol - else: - pass - db_data.update_refSeqGene_loci(line) - - # Close database - # rsg_db.close() - - print 'Total NG_ to NC_ alignments = ' + str(total_rsg_to_nc) - print 'Gapps within NG_ to NC_ alignments = ' + str(total_rsg_to_nc_rejected) - - print 'complete' - return - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# - - - - diff --git a/VariantValidator/output_formatter.py b/VariantValidator/output_formatter.py deleted file mode 100644 index a2382b16..00000000 --- a/VariantValidator/output_formatter.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Module that will be embedded into the valstr method and translate method - -Two functions, - -format nucleotide descriptions removes the ref bases from displayed descriptions - -format protein descriptions return the description using the single letter aa alphabet -""" - -import hgvs - -""" -format protein description into single letter aa code -""" - - -def single_letter_protein(hgvs_protein): - hgvs_protein_slc = hgvs_protein.format({'p_3_letter': False}) - return hgvs_protein_slc - - -""" -format nucleotide descriptions to not display reference base -""" - - -def remove_reference(hgvs_nucleotide): - hgvs_nucleotide_refless = hgvs_nucleotide.format({'max_ref_length': 0}) - return hgvs_nucleotide_refless - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/ref_seq_type.py b/VariantValidator/ref_seq_type.py deleted file mode 100644 index 7d0fc25d..00000000 --- a/VariantValidator/ref_seq_type.py +++ /dev/null @@ -1,55 +0,0 @@ -# -*- coding: utf-8 -*- -""" -ref_seq_type.py - -Simple function that assignes the correct reference sequence type (c., n., p., g.) to -reference sequences - -# Example -ref_type_assign(accession) -""" - -# Defining reference sequence type from accession -import re -from variantanalyser import dbControls - - -def ref_type_assign(accession): - if re.match('NC_', accession) or re.match('NG_', accession) or re.match('NT_', accession) or re.match('NW_', - accession): - ref_type = ':g.' - elif re.match('NM_', accession): - ref_type = ':c.' - elif re.match('NR_', accession): - ref_type = ':n.' - elif re.match('NP_', accession): - ref_type = ':p.' - elif re.match('LRG_', accession): - if re.search('t', accession): - refseqtranscript_reference = dbControls.data.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) - if re.match('NM_', refseqtranscript_reference): - ref_type = ':c.' - else: - ref_type = ':n.' - elif re.search('_p', accession): - ref_type = ':p.' - else: - ref_type = ':g.' - return ref_type - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# diff --git a/VariantValidator/testing/__init__.py b/VariantValidator/testing/__init__.py deleted file mode 100644 index 2c21758b..00000000 --- a/VariantValidator/testing/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- - - - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/testing/inputVariants.txt b/VariantValidator/testing/inputVariants.txt deleted file mode 100644 index 8dd63cdc..00000000 --- a/VariantValidator/testing/inputVariants.txt +++ /dev/null @@ -1,334 +0,0 @@ -NM_000088.3:c.589G>T -NM_015120.4:c.35T>C -NM_015120.4:c.39G>C -NM_015120.4:c.34C>T -NC_000002.11:g.73613030C>T -NC_000023.10:g.33229673A>T -NM_001145026.1:c.715A>G -NC_000016.9:g.2099572TC>T -NM_000088.3:c.589GG>CT -NM_000094.3:c.6751-2_6751-3del -COL5A1:c.5071A>T -NG_007400.1:c.5071A>T -chr16:15832508_15832509delinsAC -NM_000088.3:c.589-1GG>G -NM_000088.3:c.642+1GT>G -NM_000088.3:c.589-2AG>G -NC_000017.10:g.48279242G>T -NM_000500.7:c.-107-19C>T -NM_000518.4:c.-130C>T -NM_000518.4:c.-50-80C>T -NM_000518.4:c.316_*342delinsCTACTT -NM_000518.4:c.316_*100del -NM_000518.4:c.*2000C>T -NM_000518.4:c.*132+1868C>T -NM_000518.4:c.-130_*2000= -NM_000518.4:c.-50-80_*132+1868= -NR_138595.1:n.-810C>T -NR_138595.1:n.1-810C>T -NR_138595.1:n.1071+1A= -NR_138595.1:n.-810_1071+1= -NC_000017.10:g.48261457_48261463TTATGTT= -NC_000017.10:g.48275363C>A -NM_000088.3:c.589-1G>T -NM_000088.3:c.591_593inv -11-5248232-T-A -NG_007400.1(NM_000088.3):c.589-1G>T -1:150550916G>A -1-150550916-G-A -NG_008123.1(LEPRE1_v003):c.2055+18G>A -NG_008123.1:c.2055+18G>A -NG_008123.1(NM_022356.3):c.2055+18G>A -NM_021983.4:c.490G>C -NM_032470.3:c.4del -NM_001194958.2:c.20C>A -NM_000022.2:c.534A>G -HSCHR6_MHC_SSTO_CTG1-3852542-C-G -NM_000368.4:c.363+1dupG -NM_000368.4:c.363dupG -NM_000089.3:c.1033_1035delGTT -NM_000089.3:c.1035_1035+2delTGT -NM_000088.3:c.2023_2028delGCAAGA -NM_000089.3:c.938-1delG -NM_000088.3:c.589G= -NM_000088.3:c.642A= -NM_000088.3:c.642+1GG>G -NM_000088.3:c.589-2GG>G -NM_000088.3:c.589-6_589-5insTTTT -NM_000088.3:c.642+3_642+4insAAAA -NM_000088.3:c.589-4_589-3insTT -NM_000088.3:c.589-8del -NM_000527.4:c.-187_-185delCTC -NM_206933.2:c.6317C>G -NC_000013.10:g.32929387T>C -NM_015102.3:c.2818-2T>A -19-41123094-G-GG -15-72105928-AC-A -12-122064773-CCCGCCA-C -12-122064774-CCGCCA-CCGCCA -12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC -NC_000012.11:g.122064777C>A -NC_000012.11:g.122064776delG -NC_000012.11:g.122064776dupG -NC_000012.11:g.122064776_122064777insTTT -NC_000012.11:g.122064772_122064775del -NC_000012.11:g.122064772_122064775dup -NC_000012.11:g.122064773_122064774insTTTT -NC_000012.11:g.122064772_122064777del -NC_000012.11:g.122064772_122064777dup -NC_000012.11:g.122064779_122064782dup -NC_000012.11:g.122064772_122064782del -NC_000002.11:g.95847041_95847043GCG= -NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG= -NC_000003.11:g.14561629_14561630GC= -NC_000003.11:g.14561629_14561630insG -NC_000004.11:g.140811111_140811122del -NC_000004.11:g.140811111_140811122CTGCTGCTGCTG= -NC_000004.11:g.140811117_140811122del -NC_000004.11:g.140811111_140811117del -NC_000004.11:g.140811117C>A -NC_000002.11:g.73675227_73675228insCTC -9-136132908-T-TC -9-136132908-TAC-TCA -9-136132908-TA-TA -NM_020469.2:c.258delG -NM_020469.2:c.260_262TGA= -NM_020469.2:c.261delG -NM_020469.2:c.261dupG -NM_020469.2:c.261_262insTT -NC_000019.10:g.50378563_50378564insTAC -NC_000019.10:g.50378563_50378564insC -NC_000019.10:g.50378564_50378565insTACA -NC_000019.10:g.50378565_50378567dup -NC_000019.10:g.50378563_50378564= -NC_000019.10:g.50378563_50378564insTCGG -NC_000019.10:g.50378563delinsTTAC -NC_000019.10:g.50378563_50378564insTAAC -NC_000019.10:g.50378562_50378565del -NC_000019.10:g.50378562_50378565delinsTC -NC_000007.14:g.149779575_149779577delinsT -NC_000007.14:g.149779575_149779577= -NC_000007.14:g.149779576_149779578del -NC_000007.14:g.149779577del -NC_000007.14:g.149779573_149779579del -NC_000007.14:g.149779573_149779579delinsCA -NM_000088.3:c.590_591inv -NM_024989.3:c.1778_1779inv -NM_032815.3:c.555_556inv -NM_006138.4:c.3_4inv -NM_000038.5:c.3927_3928delAAinsTT -NM_001034853.1:c.2847_2848delAGinsCT -NM_000088.3:c.4392_*2inv -NM_000088.3:c.4392_*5inv -NM_000088.3:c.4390_*7inv -NM_005732.3:c.2923-5insT -NM_198283.1(EYS):c.*743120C>T -NM_133379.4(TTN):c.*265+26591C>T -NM_000088.3:c.589-2_589-1AG>G -NM_000088.3:c.642+1_642+2delGTinsG -NM_004415.3:c.1-1insA -NM_004415.3:c.-1_1insA -NM_000273.2:c.1-5028_253del -NM_002929.2:c.1006C>T -NR_125367.1:n.167+18165G>A -NM_006005.3:c.3071_3073delinsTTA -NM_000089.3:n.1504_1506del -NC_012920.1:m.1011C>T -NC_000006.11:g.90403795G= -1-169519049-T-. -NC_000005.9:g.35058667_35058668AG= -NM_000251.1:c.1296_1348del -NM_000088.3:c.2023_2028del -NM_000088.3:c.2024_2028+1del -ENST00000450616.1:n.31+1G>C -ENST00000491747:c.5071A>T -NG_007400.1:g.8638G>T -LRG_1:g.8638G>T -LRG_1t1:c.589G>T -chr16:g.15832508_15832509delinsAC -NG_012386.1:g.24048dupG -NM_033517.1:c.1307_1309delCGA -HG1311_PATCH-33720-CCGA-C -2-73675227-TCTC-TCTCCTC -2-73675227-TC-TC -3-14561627-AG-AGG -3-14561630-CC-CC -6-90403795-G-G -6-90403795-G-A -6-32012992-CG-C -17-48275363-C-A -17-48275364-C-A -17-48275359-GGA-TCC -7-94039128-CTTG-C -9-135800972-AC-ACC -1-43212925-C-T -HG987_PATCH-355171-C-A -20-43252915-T-C -1-216219781-A-C -2-209113113-G-A,C,T -NC_000005.9:g.35058665_35058666CA= -NC_000002.11:g.73675227_73675229delTCTinsTCTCTC -NM_000828.4:c.-2dupG -X-122318386-A-AGG -NM_000828.4:c.-2G>T -NM_000828.4:c.-2G= -X-122318386-A-AT -NM_000828.4:c.-2_-1insT -NM_000828.4:c.-3_-2insT -NM_000828.4:c.-2delGinsTT -NM_000828.4:c.-2_-1delGCinsTT -NM_000828.4:c.-3_-2delAGinsTT -15-72105929-C-C -15-72105928-AC-ATT -15-72105928-ACC-ATT -15-72105927-GACC-GTT -19-41123093-A-AG -19-41123093-A-AT -19-41123093-AG-A -19-41123093-AG-AG -NM_012309.4:c.913-5058G>A -LRG_199t1:c.2376[G>C];[G>C] -LRG_199t1:c.[2376G>C];[3103del] -LRG_199t1:c.[4358_4359del;4361_4372del] -LRG_199t1:c.2376G>C(;)3103del -LRG_199t1:c.2376[G>C];[(G>C)] -LRG_199t1:c.[2376G>C];[?] -LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C -LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del -LRG_199t1:c.[976-20T>A;976-17_976-1dup] -1-5935162-A-T -1-12065948-C-T -1-46655125-CTCAC-C -1-68912523-TGAGCCAGAG-T -1-68912526-GCCAGAG-G -1-109817590-G-T -1-145597475-GAAGT-G -1-153791300-CTG-C -1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC -1-156108541-G-GG -1-161279695-T-A -1-169519049-T-T -1-226125468-G-A -10-89623035-CGCA-C -11-62457852-C-A -11-108178710-A-AT -11-111735981-G-A -12-11023080-C-A -12-22018712-TC-T -12-52912946-T-C -12-103234292-TC-T -12-103311124-T-C -12-111064166-G-A -12-123738430-CA-C -13-31789169-CT-C -14-62187287-G-A -14-62188231-TT-GA -14-63174827-C-A -15-42680000-CA-C -15-42680000-CA-CAA -15-42703179-T-TTCA -15-42703179-TAG-TTCATCT -15-48782203-C-T -15-72105929-CC-C -15-89873415-G-A -16-2103394-C-T -16-3779300-C-G -16-5128843-C-G -16-74808559-C-T -16-89574804-C-A -16-89574826-A-C -16-89574914-G-GT -16-89574916-C-CGTC -16-89575009-G-A -16-89575040-C-A,CA -16-89576896-A-C -16-89576930-T-TA,TT -16-89576931-G-GTG -16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C -16-89613064-AGGAGAGGCG-AT -16-89613069-AGGCGGGAGA-AT -16-89613145-C-T -17-7578194-GCAC-G -17-7578523-T-TG -17-17119692-A-C -17-41197588-GGACA-G -17-41256884-C-G -17-42991428-C-A -17-48252809-A-T -17-62022709-G-GTC -17-62022711-C-CT -17-62023005-G-GGC -17-62023006-C-A -17-62034787-G-A -18-24128261-GTCCTCC-G -19-15291774-G-A -19-15311794-A-G -19-39076592-G-A -2-50149352-T-C -2-50847195-G-A -2-71825797-C-G -2-166179712-G-C -2-166183371-A-G -2-166929889-GTCCAGGTCCT-GAC -2-166929891-CCAGGTCCT-C -2-179393504-G-T -2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T -2-201950249-G-T -2-238268730-C-A -21-43897396-C-T -22-30064360-G-GCGACGC -3-10188187-TGTCCCGATAG-T -3-50402127-T-G -3-50402890-G-A -3-57851007-AG-A -3-122003832-G-C -4-153332910-C-CAGG -5-1295183-G-A -5-77396835-TTTC-T -5-118811422-GGTGA-G -5-118811422-GGTGAG-G -5-131705587-CG-C -5-148406482-T-C -6-110036337-T-TCAG -6-110036337-TGAT-T -6-152651802-C-A -6-152737643-C-G -7-6026775-T-C -7-55242465-GGAATTAAGAGAAGCA-G -7-55248992-T-TTCCAGGAAGCCT -7-75932111-C-A -7-91652178-A-AAAC -7-117199644-ATCT-A -7-140453136-AC-CT -7-140453136-A-T -7-140453137-C-T -7-143013488-A-T -7-143018934-G-A -7-143048771-C-T -8-1871951-C-T -9-13112056-T-TG -9-21971208-C-A -9-35683240-T-TG -9-135796754-G-A -HG536_PATCH-10391-AC-A -HG865_PATCH-33547-G-A -HG865_PATCH-569441-G-T -HG865_PATCH-574546-C-T -HSCHR1_1_CTG31-133178-TAG-T -HSCHR6_MHC_MANN_CTG1-3848158-T-G -HSCHR6_MHC_MANN_CTG1-3851043-C-A -X-70443101-C-T -X-107845202-GACCACC-GACC,G -X-153296777-G-A -NM_198180.2:c.408_410delGTG -NM_080877.2:c.1733_1735delinsTTT -NM_080877.2:c.1735_1737delinsTGA -NM_080877.2:c.1735_1737delinsTAATTGTTC -NM_080877.2:c.1737delinsATTGTTC -NM_000088.3:c.4392_*2delinsAGAG -NM_000088.3:c.589_591delinsAGAAGC -NM_000885.5:c.*2536delinsAGAAAAATCA -NM_002693.2:c.-186_-185delinsCC -NG_009616.1:g.29052_29053insCTACATAG -NM_000061.2:c.588_588+1insCTACATAG -NM_000061.2:c.588_589insCTACATAG diff --git a/VariantValidator/testing/mergeInputVariants.py b/VariantValidator/testing/mergeInputVariants.py deleted file mode 100644 index 1683a330..00000000 --- a/VariantValidator/testing/mergeInputVariants.py +++ /dev/null @@ -1,15 +0,0 @@ -import sys -import vvTestFunctions as fn - -argv=sys.argv -#print(argv) -if len(argv)!=4: - print("Syntax: python mergeInputVariants.py path1 path2 pathOut") - print("Pass two paths (path1 and path2) to this script to merge the variant strings within") - print("and save the output to the file pathOut, removing duplicates in the process.") -else: - v1=fn.loadVariantFile(argv[1]) - v2=fn.loadVariantFile(argv[2]) - vOut=fn.mergeVariantList(v1,v2) - fn.saveVariantFile(argv[3],vOut) - print("Merged "+str(len(v1))+" + "+str(len(v2))+" into "+str(len(vOut))+" unique variants." ) diff --git a/VariantValidator/testing/vvTestFunctions.py b/VariantValidator/testing/vvTestFunctions.py deleted file mode 100644 index 0c550419..00000000 --- a/VariantValidator/testing/vvTestFunctions.py +++ /dev/null @@ -1,203 +0,0 @@ -#PJDP testing suite for variant validator - -#Run this test to validate all variants and SAVE the results for comparison with a different version. -#The input variants file should contain a bunch of variants on each line in quotes. Anything outside the -#quotes is discarded. - -import os -import pickle -import json -import sys - -import sqlite3 -import logging - -logConsoleHandler = logging.StreamHandler() -logConsoleHandler.setLevel(logging.DEBUG) -#Debug -hl=logging.getLogger("hgvs.dataproviders.uta") -hl.addHandler(logConsoleHandler) - - -''' -try: - print("Configuring for personal linux") - seqrepo_current_version='2018-08-21' - HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version - os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR - uta_current_version='uta_20180821' - UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version - os.environ['UTA_DB_URL']=UTA_DB_URL - from VariantValidator import variantValidator as vv - vv.my_config() -except sqlite3.OperationalError: - print("Configuring for VM") - seqrepo_current_version = '2018-08-21' - HGVS_SEQREPO_DIR = '/Users/pjf9/variant_validator_data/seqrepo/' + seqrepo_current_version - os.environ['HGVS_SEQREPO_DIR'] = HGVS_SEQREPO_DIR - uta_current_version = 'uta_20180821' - UTA_DB_URL = 'postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version - os.environ['UTA_DB_URL'] = UTA_DB_URL - os.environ['PYLIFTOVER_DIR'] = '/Users/pjf9/variant_validator_data/pyLiftover/' - from VariantValidator import variantValidator as vv - -''' - -def generateTestFolder(path, inputVariants, validator): - #Saves the results of running inputVariants to a folder given in saveDirectory. - if not os.path.isdir(path): - os.mkdir(path) - variantArray=loadVariantFile(inputVariants) - #Go through the variant array, validating, and save the results. - batch=validateBatch(variantArray,validator) - #Save copy of the resulting dictionary - saveValidationsAsFolder(path,batch) - -def generateTestJSON(path, inputVariants,sysOut): - variantArray=loadVariantFile(inputVariants) - #Go through the variant array, validating, and save the results. - batch=validateBatch(variantArray) - #batch.append(sysOut.getvalue()) - #Save copy of the resulting dictionary - saveValidationsAsJSON(path,batch) - -def saveValidationsAsFolder(path, validations): - #Pickles validation dictionaries into the given folder. - for i,v in enumerate(validations): - with open(os.path.join(path,"variant"+str(i)+".txt") ,"w") as f: - pickle.dump(v,f) - -def saveValidationsAsJSON(path,validations): - #Saves a set of validations (v is a list of dictionaries) or a bunch of validations (v is a list of dictionaries) - #as the json given in path. The name of the file will be that of the input variant string. - jOut=json.dumps(validations) - with open(path,"w") as f: - f.write(jOut) - print("JSON saved to "+path) - -def loadVariantFile(path): - out=[] - #Load up the input variant file, should be passed in path.txt. Extra space, commas and quotes will be stripped. - with open(path) as f: - for l in f.readlines(): - l=l.strip() - if len(l)>3: - if l[-1]==",": - l=l[:-1] - if l[-1]=='"': - l=l[:-1] - if l[0]=='"': - l=l[1:] - out.append(l) - return out - -def saveVariantFile(path, variants): - #Saves a variant input array (a bunch of strings) into a new text file given by path. - with open(path,"w") as f: - for v in variants: - f.write(v+"\n") - -def mergeVariantList(variants1,variants2): - #Merges two lists of variants, avoiding duplicants. - out=[] - for v in variants1: - if not v in out: - out.append(v) - for v in variants2: - if not v in out: - out.append(v) - return out - -def loadValidations(path): - #Loads a set of validations from the folder given in path. - out=[] - for paths,dirs,files in os.walk(path): - for filePath in files: - with open(os.path.join(paths,filePath)) as f: - out.append(pickle.load(f)) - #print(type(out[-1])) - return out - -def validateBatch(variantArray,val): - #Returns an array of validations (themselves dictionary objects). - out=[] - selectTranscripts='all' - selectedAssembly='GRCh37' - for i,v in enumerate(variantArray): - print("VALIDATING Variant"+str(i)+" "+str(i+1)+"/"+str(len(variantArray))+" "+str(v)) - try: - out.append(val.validate(v,selectedAssembly,selectTranscripts)) - except KeyboardInterrupt: - print("Exiting...") - sys.exit() - except Exception as e: - print("FATAL error processing variant: "+str(e)) - out.append({"ERROR":str(e)}) - return out - -def retrieveVariant(validation): - #Returns the variant string (if possible) from a validation. - out=None - for v in validation.values(): - try: - if type(v)==type({}) and "submitted_variant" in v.keys(): - out=v["submitted_variant"] - return out - except (KeyError, TypeError, AttributeError): - pass - raise AttributeError("Validation does not contain the original variant string") - -def compareValidations(v1,v2,id): - #print(v1,v2) - for vk in v1.keys(): - if not (vk in v2.keys()): -# print("tag "+vk+" : "+str(v1[vk])+" not found in second variant") - print("Variant "+str(id)+": Tag "+vk+" not found in second variant") - return False - for vk in v2.keys(): - if not (vk in v1.keys()): -# print("tag "+vk+" : "+str(v2[vk])+" not found in first variant") - print("Variant "+str(id)+": Tag "+vk+" not found in first variant") - return False - for vk in v1.keys(): - if not (v1[vk]==v2[vk]): - if type(v1[vk])==type(dict()) or type(v2[vk])==type(dict()): - print("Variant " + str(id) + ": Different tag values for key " + str(vk)) - else: - print("Variant "+str(id)+": Different tag values - "+str(vk)+" : "+str(v1[vk])+" vs. "+str(vk)+" : "+str(v2[vk])) - return False - return True - -def compareBatches(v1path,v2path): - #Loads all files in validations folder and compares them - outFlags=[] - passScore=0 - v1batch=loadValidations(v1path) - v2batch=loadValidations(v2path) - print("Comparing validation sets...") - for i,v in enumerate(v1batch): -# print("Comparing validation "+str(i)) - outFlags.append(compareValidations(v1batch[i],v2batch[i],i)) - if outFlags[-1]: - passScore+=1 - if passScore==len(v1batch): - #Test passed. - print("Validation sets are identical, "+str(passScore)+" passed") - return True - else: - print("Validation sets are NOT identical, passed " + str(passScore) + "/" + str(len(v1batch))) - #for i,v in enumerate(v1batch): - #if not outFlags[i]: - #print("Mismatch in validation "+str(i)) - #print(v1batch[i]) - #print("Verses") - #print(v2batch[i]) - return False - -if __name__=="__main__": - - inputVariants="inputVariants.txt" - #saveOut="testJSON.json" - - #fn.generateTestJSON(saveOut,inputVariants,sysOut) - generateTestFolder("testOutputs",inputVariants) diff --git a/VariantValidator/testing/vvTestSave.py b/VariantValidator/testing/vvTestSave.py deleted file mode 100644 index 6b44a365..00000000 --- a/VariantValidator/testing/vvTestSave.py +++ /dev/null @@ -1,9 +0,0 @@ -#Saving script - -import vvTestFunctions as fn -from vvObjects import Validator -import os - -val=Validator() - -fn.generateTestFolder("testOutputsReworked","inputVariants.txt",val) \ No newline at end of file diff --git a/VariantValidator/testing/vvTestSaveOutput101.txt b/VariantValidator/testing/vvTestSaveOutput101.txt deleted file mode 100644 index 90625fbf..00000000 --- a/VariantValidator/testing/vvTestSaveOutput101.txt +++ /dev/null @@ -1,552 +0,0 @@ -VALIDATING Variant0 1/334 NM_015120.4:c.35T>C -VALIDATING Variant1 2/334 NM_015120.4:c.39G>C -VALIDATING Variant2 3/334 NM_015120.4:c.34C>T -VALIDATING Variant3 4/334 NC_000002.11:g.73613030C>T -VALIDATING Variant4 5/334 NC_000023.10:g.33229673A>T -VALIDATING Variant5 6/334 NM_001145026.1:c.715A>G -VALIDATING Variant6 7/334 NC_000016.9:g.2099572TC>T -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -VALIDATING Variant7 8/334 NM_000088.3:c.589GG>CT -VALIDATING Variant8 9/334 NM_000094.3:c.6751-2_6751-3del -VALIDATING Variant9 10/334 COL5A1:c.5071A>T -VALIDATING Variant10 11/334 NG_007400.1:c.5071A>T -VALIDATING Variant11 12/334 chr16:15832508_15832509delinsAC -VALIDATING Variant12 13/334 NM_000088.3:c.589-1GG>G -VALIDATING Variant13 14/334 NM_000088.3:c.642+1GT>G -VALIDATING Variant14 15/334 NM_000088.3:c.589-2AG>G -VALIDATING Variant15 16/334 NC_000017.10:g.48279242G>T -VALIDATING Variant16 17/334 NM_000500.7:c.-107-19C>T -Normalization of intronic variants is not supported -VALIDATING Variant17 18/334 NM_000518.4:c.-130C>T -The given coordinate is outside the bounds of the reference sequence. -VALIDATING Variant18 19/334 NM_000518.4:c.-50-80C>T -Normalization of intronic variants is not supported -VALIDATING Variant19 20/334 NM_000518.4:c.316_*342delinsCTACTT -The given coordinate is outside the bounds of the reference sequence. -VALIDATING Variant20 21/334 NM_000518.4:c.316_*100del -VALIDATING Variant21 22/334 NM_000518.4:c.*2000C>T -The given coordinate is outside the bounds of the reference sequence. -VALIDATING Variant22 23/334 NM_000518.4:c.*132+1868C>T -Normalization of intronic variants is not supported -VALIDATING Variant23 24/334 NM_000518.4:c.-130_*2000= -The given coordinate is outside the bounds of the reference sequence. -VALIDATING Variant24 25/334 NM_000518.4:c.-50-80_*132+1868= -VALIDATING Variant25 26/334 NR_138595.1:n.-810C>T -VALIDATING Variant26 27/334 NR_138595.1:n.1-810C>T -VALIDATING Variant27 28/334 NR_138595.1:n.1071+1A= -VALIDATING Variant28 29/334 NR_138595.1:n.-810_1071+1= -VALIDATING Variant29 30/334 NC_000017.10:g.48261457_48261463TTATGTT= -VALIDATING Variant30 31/334 NC_000017.10:g.48275363C>A -VALIDATING Variant31 32/334 NM_000088.3:c.589-1G>T -VALIDATING Variant32 33/334 NM_000088.3:c.591_593inv -VALIDATING Variant33 34/334 11-5248232-T-A -VALIDATING Variant34 35/334 NG_007400.1(NM_000088.3):c.589-1G>T -VALIDATING Variant35 36/334 1:150550916G>A -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -VALIDATING Variant36 37/334 1-150550916-G-A -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -VALIDATING Variant37 38/334 NG_008123.1(LEPRE1_v003):c.2055+18G>A -VALIDATING Variant38 39/334 NG_008123.1:c.2055+18G>A -VALIDATING Variant39 40/334 NG_008123.1(NM_022356.3):c.2055+18G>A -VALIDATING Variant40 41/334 NM_021983.4:c.490G>C -VALIDATING Variant41 42/334 NM_032470.3:c.4del -VALIDATING Variant42 43/334 NM_001194958.2:c.20C>A -VALIDATING Variant43 44/334 NM_000022.2:c.534A>G -VALIDATING Variant44 45/334 HSCHR6_MHC_SSTO_CTG1-3852542-C-G -VALIDATING Variant45 46/334 NM_000368.4:c.363+1dupG -VALIDATING Variant46 47/334 NM_000368.4:c.363dupG -VALIDATING Variant47 48/334 NM_000089.3:c.1033_1035delGTT -VALIDATING Variant48 49/334 NM_000089.3:c.1035_1035+2delTGT -VALIDATING Variant49 50/334 NM_000088.3:c.2023_2028delGCAAGA -VALIDATING Variant50 51/334 NM_000089.3:c.938-1delG -VALIDATING Variant51 52/334 NM_000088.3:c.589G= -VALIDATING Variant52 53/334 NM_000088.3:c.642A= -VALIDATING Variant53 54/334 NM_000088.3:c.642+1GG>G -VALIDATING Variant54 55/334 NM_000088.3:c.589-2GG>G -VALIDATING Variant55 56/334 NM_000088.3:c.589-6_589-5insTTTT -VALIDATING Variant56 57/334 NM_000088.3:c.642+3_642+4insAAAA -VALIDATING Variant57 58/334 NM_000088.3:c.589-4_589-3insTT -VALIDATING Variant58 59/334 NM_000088.3:c.589-8del -VALIDATING Variant59 60/334 NM_000527.4:c.-187_-185delCTC -VALIDATING Variant60 61/334 NM_206933.2:c.6317C>G -VALIDATING Variant61 62/334 NC_000013.10:g.32929387T>C -VALIDATING Variant62 63/334 NM_015102.3:c.2818-2T>A -VALIDATING Variant63 64/334 19-41123094-G-GG -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant64 65/334 15-72105928-AC-A -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant65 66/334 12-122064773-CCCGCCA-C -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant66 67/334 12-122064774-CCGCCA-CCGCCA -VALIDATING Variant67 68/334 12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC -VALIDATING Variant68 69/334 NC_000012.11:g.122064777C>A -VALIDATING Variant69 70/334 NC_000012.11:g.122064776delG -VALIDATING Variant70 71/334 NC_000012.11:g.122064776dupG -VALIDATING Variant71 72/334 NC_000012.11:g.122064776_122064777insTTT -VALIDATING Variant72 73/334 NC_000012.11:g.122064772_122064775del -object of type 'NoneType' has no len() -object of type 'NoneType' has no len() -VALIDATING Variant73 74/334 NC_000012.11:g.122064772_122064775dup -VALIDATING Variant74 75/334 NC_000012.11:g.122064773_122064774insTTTT -VALIDATING Variant75 76/334 NC_000012.11:g.122064772_122064777del -VALIDATING Variant76 77/334 NC_000012.11:g.122064772_122064777dup -VALIDATING Variant77 78/334 NC_000012.11:g.122064779_122064782dup -VALIDATING Variant78 79/334 NC_000012.11:g.122064772_122064782del -VALIDATING Variant79 80/334 NC_000002.11:g.95847041_95847043GCG= -VALIDATING Variant80 81/334 NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG= -VALIDATING Variant81 82/334 NC_000003.11:g.14561629_14561630GC= -VALIDATING Variant82 83/334 NC_000003.11:g.14561629_14561630insG -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant83 84/334 NC_000004.11:g.140811111_140811122del -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant84 85/334 NC_000004.11:g.140811111_140811122CTGCTGCTGCTG= -VALIDATING Variant85 86/334 NC_000004.11:g.140811117_140811122del -VALIDATING Variant86 87/334 NC_000004.11:g.140811111_140811117del -object of type 'NoneType' has no len() -object of type 'NoneType' has no len() -VALIDATING Variant87 88/334 NC_000004.11:g.140811117C>A -VALIDATING Variant88 89/334 NC_000002.11:g.73675227_73675228insCTC -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant89 90/334 9-136132908-T-TC -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant90 91/334 9-136132908-TAC-TCA -VALIDATING Variant91 92/334 9-136132908-TA-TA -VALIDATING Variant92 93/334 NM_020469.2:c.258delG -VALIDATING Variant93 94/334 NM_020469.2:c.260_262TGA= -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant94 95/334 NM_020469.2:c.261delG -VALIDATING Variant95 96/334 NM_020469.2:c.261dupG -VALIDATING Variant96 97/334 NM_020469.2:c.261_262insTT -VALIDATING Variant97 98/334 NC_000019.10:g.50378563_50378564insTAC -VALIDATING Variant98 99/334 NC_000019.10:g.50378563_50378564insC -VALIDATING Variant99 100/334 NC_000019.10:g.50378564_50378565insTACA -VALIDATING Variant100 101/334 NC_000019.10:g.50378565_50378567dup -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant101 102/334 NC_000019.10:g.50378563_50378564= -VALIDATING Variant102 103/334 NC_000019.10:g.50378563_50378564insTCGG -VALIDATING Variant103 104/334 NC_000019.10:g.50378563delinsTTAC -VALIDATING Variant104 105/334 NC_000019.10:g.50378563_50378564insTAAC -VALIDATING Variant105 106/334 NC_000019.10:g.50378562_50378565del -VALIDATING Variant106 107/334 NC_000019.10:g.50378562_50378565delinsTC -VALIDATING Variant107 108/334 NC_000007.14:g.149779575_149779577delinsT -VALIDATING Variant108 109/334 NC_000007.14:g.149779575_149779577= -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant109 110/334 NC_000007.14:g.149779576_149779578del -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant110 111/334 NC_000007.14:g.149779577del -VALIDATING Variant111 112/334 NC_000007.14:g.149779573_149779579del -object of type 'NoneType' has no len() -VALIDATING Variant112 113/334 NC_000007.14:g.149779573_149779579delinsCA -VALIDATING Variant113 114/334 NM_000088.3:c.590_591inv -VALIDATING Variant114 115/334 NM_024989.3:c.1778_1779inv -VALIDATING Variant115 116/334 NM_032815.3:c.555_556inv -VALIDATING Variant116 117/334 NM_006138.4:c.3_4inv -VALIDATING Variant117 118/334 NM_000038.5:c.3927_3928delAAinsTT -VALIDATING Variant118 119/334 NM_001034853.1:c.2847_2848delAGinsCT -VALIDATING Variant119 120/334 NM_000088.3:c.4392_*2inv -VALIDATING Variant120 121/334 NM_000088.3:c.4392_*5inv -VALIDATING Variant121 122/334 NM_000088.3:c.4390_*7inv -VALIDATING Variant122 123/334 NM_005732.3:c.2923-5insT -VALIDATING Variant123 124/334 NM_198283.1(EYS):c.*743120C>T -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -VALIDATING Variant124 125/334 NM_133379.4(TTN):c.*265+26591C>T -Normalization of intronic variants is not supported -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence./NM_133379.4/NC_000002.11~The given coordinate is outside the bounds of the reference sequence./NM_133379.4/NC_018913.2~The given coordinate is outside the bounds of the reference sequence./NM_133379.4/NC_000002.12~The given coordinate is outside the bounds of the reference sequence./NM_133379.4/NG_011618.3~ -VALIDATING Variant125 126/334 NM_000088.3:c.589-2_589-1AG>G -VALIDATING Variant126 127/334 NM_000088.3:c.642+1_642+2delGTinsG -VALIDATING Variant127 128/334 NM_004415.3:c.1-1insA -VALIDATING Variant128 129/334 NM_004415.3:c.-1_1insA -VALIDATING Variant129 130/334 NM_000273.2:c.1-5028_253del -VALIDATING Variant130 131/334 NM_002929.2:c.1006C>T -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -The given coordinate is outside the bounds of the reference sequence. -VALIDATING Variant131 132/334 NR_125367.1:n.167+18165G>A -VALIDATING Variant132 133/334 NM_006005.3:c.3071_3073delinsTTA -VALIDATING Variant133 134/334 NM_000089.3:n.1504_1506del -VALIDATING Variant134 135/334 NC_012920.1:m.1011C>T -VALIDATING Variant135 136/334 NC_000006.11:g.90403795G= -VALIDATING Variant136 137/334 1-169519049-T-. -VALIDATING Variant137 138/334 NC_000005.9:g.35058667_35058668AG= -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -VALIDATING Variant138 139/334 NM_000251.1:c.1296_1348del -VALIDATING Variant139 140/334 NM_000088.3:c.2023_2028del -VALIDATING Variant140 141/334 NM_000088.3:c.2024_2028+1del -VALIDATING Variant141 142/334 ENST00000450616.1:n.31+1G>C -VALIDATING Variant142 143/334 ENST00000491747:c.5071A>T -VALIDATING Variant143 144/334 NM_000088.3:c.589G>T -VALIDATING Variant144 145/334 NG_007400.1:g.8638G>T -VALIDATING Variant145 146/334 LRG_1:g.8638G>T -VALIDATING Variant146 147/334 LRG_1t1:c.589G>T -VALIDATING Variant147 148/334 chr16:g.15832508_15832509delinsAC -VALIDATING Variant148 149/334 NG_012386.1:g.24048dupG -Normalization of intronic variants is not supported -VALIDATING Variant149 150/334 NM_033517.1:c.1307_1309delCGA -VALIDATING Variant150 151/334 HG1311_PATCH-33720-CCGA-C -VALIDATING Variant151 152/334 2-73675227-TCTC-TCTCCTC -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant152 153/334 2-73675227-TC-TC -VALIDATING Variant153 154/334 3-14561627-AG-AGG -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant154 155/334 3-14561630-CC-CC -VALIDATING Variant155 156/334 6-90403795-G-G -VALIDATING Variant156 157/334 6-90403795-G-A -VALIDATING Variant157 158/334 6-32012992-CG-C -VALIDATING Variant158 159/334 17-48275363-C-A -VALIDATING Variant159 160/334 17-48275364-C-A -VALIDATING Variant160 161/334 17-48275359-GGA-TCC -VALIDATING Variant161 162/334 7-94039128-CTTG-C -VALIDATING Variant162 163/334 9-135800972-AC-ACC -Normalization of intronic variants is not supported -VALIDATING Variant163 164/334 1-43212925-C-T -VALIDATING Variant164 165/334 HG987_PATCH-355171-C-A -VALIDATING Variant165 166/334 20-43252915-T-C -VALIDATING Variant166 167/334 1-216219781-A-C -VALIDATING Variant167 168/334 2-209113113-G-A,C,T -VALIDATING Variant168 169/334 NC_000005.9:g.35058665_35058666CA= -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -invalid literal for int() with base 10: '' -VALIDATING Variant169 170/334 NC_000002.11:g.73675227_73675229delTCTinsTCTCTC -VALIDATING Variant170 171/334 NM_000828.4:c.-2dupG -FATAL error processing variant: Validation error -VALIDATING Variant171 172/334 X-122318386-A-AGG -FATAL error processing variant: Validation error -VALIDATING Variant172 173/334 NM_000828.4:c.-2G>T -VALIDATING Variant173 174/334 NM_000828.4:c.-2G= -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant174 175/334 X-122318386-A-AT -VALIDATING Variant175 176/334 NM_000828.4:c.-2_-1insT -VALIDATING Variant176 177/334 NM_000828.4:c.-3_-2insT -VALIDATING Variant177 178/334 NM_000828.4:c.-2delGinsTT -VALIDATING Variant178 179/334 NM_000828.4:c.-2_-1delGCinsTT -VALIDATING Variant179 180/334 NM_000828.4:c.-3_-2delAGinsTT -VALIDATING Variant180 181/334 15-72105929-C-C -VALIDATING Variant181 182/334 15-72105928-AC-ATT -VALIDATING Variant182 183/334 15-72105928-ACC-ATT -VALIDATING Variant183 184/334 15-72105927-GACC-GTT -VALIDATING Variant184 185/334 19-41123093-A-AG -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant185 186/334 19-41123093-A-AT -VALIDATING Variant186 187/334 19-41123093-AG-A -VALIDATING Variant187 188/334 19-41123093-AG-AG -VALIDATING Variant188 189/334 NM_012309.4:c.913-5058G>A -VALIDATING Variant189 190/334 LRG_199t1:c.2376[G>C];[G>C] -VALIDATING Variant190 191/334 LRG_199t1:c.[2376G>C];[3103del] -[u'NM_004006.2:c.2376G>C'] -[u'NM_004006.2:c.3103del'] -VALIDATING Variant191 192/334 LRG_199t1:c.[4358_4359del;4361_4372del] -[u'NM_004006.2:c.4358_4359del', u'NM_004006.2:c.4361_4372del'] -VALIDATING Variant192 193/334 LRG_199t1:c.2376G>C(;)3103del -VALIDATING Variant193 194/334 LRG_199t1:c.2376[G>C];[(G>C)] -VALIDATING Variant194 195/334 LRG_199t1:c.[2376G>C];[?] -[u'NM_004006.2:c.2376G>C'] -[u'NM_004006.2:c.?'] -VALIDATING Variant195 196/334 LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C -VALIDATING Variant196 197/334 LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del -VALIDATING Variant197 198/334 LRG_199t1:c.[976-20T>A;976-17_976-1dup] -[u'NM_004006.2:c.976-20T>A', u'NM_004006.2:c.976-17_976-1dup'] -VALIDATING Variant198 199/334 1-5935162-A-T -VALIDATING Variant199 200/334 1-12065948-C-T -VALIDATING Variant200 201/334 1-46655125-CTCAC-C -VALIDATING Variant201 202/334 1-68912523-TGAGCCAGAG-T -VALIDATING Variant202 203/334 1-68912526-GCCAGAG-G -VALIDATING Variant203 204/334 1-109817590-G-T -VALIDATING Variant204 205/334 1-145597475-GAAGT-G -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -VALIDATING Variant205 206/334 1-153791300-CTG-C -VALIDATING Variant206 207/334 1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC -VALIDATING Variant207 208/334 1-156108541-G-GG -VALIDATING Variant208 209/334 1-161279695-T-A -VALIDATING Variant209 210/334 1-169519049-T-T -VALIDATING Variant210 211/334 1-226125468-G-A -VALIDATING Variant211 212/334 10-89623035-CGCA-C -VALIDATING Variant212 213/334 11-62457852-C-A -VALIDATING Variant213 214/334 11-108178710-A-AT -VALIDATING Variant214 215/334 11-111735981-G-A -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -VALIDATING Variant215 216/334 12-11023080-C-A -VALIDATING Variant216 217/334 12-22018712-TC-T -VALIDATING Variant217 218/334 12-52912946-T-C -VALIDATING Variant218 219/334 12-103234292-TC-T -VALIDATING Variant219 220/334 12-103311124-T-C -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -VALIDATING Variant220 221/334 12-111064166-G-A -Normalization of intronic variants is not supported -VALIDATING Variant221 222/334 12-123738430-CA-C -VALIDATING Variant222 223/334 13-31789169-CT-C -VALIDATING Variant223 224/334 14-62187287-G-A -VALIDATING Variant224 225/334 14-62188231-TT-GA -VALIDATING Variant225 226/334 14-63174827-C-A -VALIDATING Variant226 227/334 15-42680000-CA-C -VALIDATING Variant227 228/334 15-42680000-CA-CAA -VALIDATING Variant228 229/334 15-42703179-T-TTCA -VALIDATING Variant229 230/334 15-42703179-TAG-TTCATCT -VALIDATING Variant230 231/334 15-48782203-C-T -VALIDATING Variant231 232/334 15-72105929-CC-C -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -local variable 'hgvs_c' referenced before assignment -VALIDATING Variant232 233/334 15-89873415-G-A -VALIDATING Variant233 234/334 16-2103394-C-T -Normalization of intronic variants is not supported -VALIDATING Variant234 235/334 16-3779300-C-G -VALIDATING Variant235 236/334 16-5128843-C-G -VALIDATING Variant236 237/334 16-74808559-C-T -VALIDATING Variant237 238/334 16-89574804-C-A -VALIDATING Variant238 239/334 16-89574826-A-C -VALIDATING Variant239 240/334 16-89574914-G-GT -VALIDATING Variant240 241/334 16-89574916-C-CGTC -VALIDATING Variant241 242/334 16-89575009-G-A -VALIDATING Variant242 243/334 16-89575040-C-A,CA -VALIDATING Variant243 244/334 16-89576896-A-C -VALIDATING Variant244 245/334 16-89576930-T-TA,TT -VALIDATING Variant245 246/334 16-89576931-G-GTG -VALIDATING Variant246 247/334 16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C -VALIDATING Variant247 248/334 16-89613064-AGGAGAGGCG-AT -VALIDATING Variant248 249/334 16-89613069-AGGCGGGAGA-AT -VALIDATING Variant249 250/334 16-89613145-C-T -VALIDATING Variant250 251/334 17-7578194-GCAC-G -VALIDATING Variant251 252/334 17-7578523-T-TG -VALIDATING Variant252 253/334 17-17119692-A-C -VALIDATING Variant253 254/334 17-41197588-GGACA-G -VALIDATING Variant254 255/334 17-41256884-C-G -VALIDATING Variant255 256/334 17-42991428-C-A -VALIDATING Variant256 257/334 17-48252809-A-T -VALIDATING Variant257 258/334 17-62022709-G-GTC -VALIDATING Variant258 259/334 17-62022711-C-CT -VALIDATING Variant259 260/334 17-62023005-G-GGC -VALIDATING Variant260 261/334 17-62023006-C-A -VALIDATING Variant261 262/334 17-62034787-G-A -VALIDATING Variant262 263/334 18-24128261-GTCCTCC-G -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -VALIDATING Variant263 264/334 19-15291774-G-A -VALIDATING Variant264 265/334 19-15311794-A-G -VALIDATING Variant265 266/334 19-39076592-G-A -VALIDATING Variant266 267/334 2-50149352-T-C -VALIDATING Variant267 268/334 2-50847195-G-A -VALIDATING Variant268 269/334 2-71825797-C-G -VALIDATING Variant269 270/334 2-166179712-G-C -VALIDATING Variant270 271/334 2-166183371-A-G -VALIDATING Variant271 272/334 2-166929889-GTCCAGGTCCT-GAC -VALIDATING Variant272 273/334 2-166929891-CCAGGTCCT-C -VALIDATING Variant273 274/334 2-179393504-G-T -VALIDATING Variant274 275/334 2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T -VALIDATING Variant275 276/334 2-201950249-G-T -VALIDATING Variant276 277/334 2-238268730-C-A -VALIDATING Variant277 278/334 21-43897396-C-T -VALIDATING Variant278 279/334 22-30064360-G-GCGACGC -VALIDATING Variant279 280/334 3-10188187-TGTCCCGATAG-T -Normalization of intronic variants is not supported -VALIDATING Variant280 281/334 3-50402127-T-G -VALIDATING Variant281 282/334 3-50402890-G-A -VALIDATING Variant282 283/334 3-57851007-AG-A -VALIDATING Variant283 284/334 3-122003832-G-C -VALIDATING Variant284 285/334 4-153332910-C-CAGG -VALIDATING Variant285 286/334 5-1295183-G-A -VALIDATING Variant286 287/334 5-77396835-TTTC-T -VALIDATING Variant287 288/334 5-118811422-GGTGA-G -Normalization of intronic variants is not supported -VALIDATING Variant288 289/334 5-118811422-GGTGAG-G -Normalization of intronic variants is not supported -VALIDATING Variant289 290/334 5-131705587-CG-C -VALIDATING Variant290 291/334 5-148406482-T-C -VALIDATING Variant291 292/334 6-110036337-T-TCAG -VALIDATING Variant292 293/334 6-110036337-TGAT-T -VALIDATING Variant293 294/334 6-152651802-C-A -VALIDATING Variant294 295/334 6-152737643-C-G -VALIDATING Variant295 296/334 7-6026775-T-C -VALIDATING Variant296 297/334 7-55242465-GGAATTAAGAGAAGCA-G -VALIDATING Variant297 298/334 7-55248992-T-TTCCAGGAAGCCT -VALIDATING Variant298 299/334 7-75932111-C-A -VALIDATING Variant299 300/334 7-91652178-A-AAAC -VALIDATING Variant300 301/334 7-117199644-ATCT-A -VALIDATING Variant301 302/334 7-140453136-AC-CT -VALIDATING Variant302 303/334 7-140453136-A-T -VALIDATING Variant303 304/334 7-140453137-C-T -VALIDATING Variant304 305/334 7-143013488-A-T -VALIDATING Variant305 306/334 7-143018934-G-A -VALIDATING Variant306 307/334 7-143048771-C-T -VALIDATING Variant307 308/334 8-1871951-C-T -VALIDATING Variant308 309/334 9-13112056-T-TG -VALIDATING Variant309 310/334 9-21971208-C-A -Normalization of intronic variants is not supported -Normalization of intronic variants is not supported -VALIDATING Variant310 311/334 9-35683240-T-TG -VALIDATING Variant311 312/334 9-135796754-G-A -VALIDATING Variant312 313/334 HG536_PATCH-10391-AC-A -VALIDATING Variant313 314/334 HG865_PATCH-33547-G-A -VALIDATING Variant314 315/334 HG865_PATCH-569441-G-T -VALIDATING Variant315 316/334 HG865_PATCH-574546-C-T -VALIDATING Variant316 317/334 HSCHR1_1_CTG31-133178-TAG-T -VALIDATING Variant317 318/334 HSCHR6_MHC_MANN_CTG1-3848158-T-G -VALIDATING Variant318 319/334 HSCHR6_MHC_MANN_CTG1-3851043-C-A -VALIDATING Variant319 320/334 X-70443101-C-T -Normalization of intronic variants is not supported -VALIDATING Variant320 321/334 X-107845202-GACCACC-GACC,G -VALIDATING Variant321 322/334 X-153296777-G-A -VALIDATING Variant322 323/334 NM_198180.2:c.408_410delGTG -VALIDATING Variant323 324/334 NM_080877.2:c.1733_1735delinsTTT -VALIDATING Variant324 325/334 NM_080877.2:c.1735_1737delinsTGA -VALIDATING Variant325 326/334 NM_080877.2:c.1735_1737delinsTAATTGTTC -VALIDATING Variant326 327/334 NM_080877.2:c.1737delinsATTGTTC -VALIDATING Variant327 328/334 NM_000088.3:c.4392_*2delinsAGAG -VALIDATING Variant328 329/334 NM_000088.3:c.589_591delinsAGAAGC -VALIDATING Variant329 330/334 NM_000885.5:c.*2536delinsAGAAAAATCA -VALIDATING Variant330 331/334 NM_002693.2:c.-186_-185delinsCC -VALIDATING Variant331 332/334 NG_009616.1:g.29052_29053insCTACATAG -VALIDATING Variant332 333/334 NM_000061.2:c.588_588+1insCTACATAG -VALIDATING Variant333 334/334 NM_000061.2:c.588_589insCTACATAG diff --git a/VariantValidator/variantValidator.py b/VariantValidator/variantValidator.py deleted file mode 100644 index 23b4f62c..00000000 --- a/VariantValidator/variantValidator.py +++ /dev/null @@ -1,8716 +0,0 @@ -# -*- coding: utf-8 -*- -""" -VariantValidator.py -List of top level VariantValidator functions -This API is configured by reading the configuration information in the config.ini file -located in the configuration module contained the the root variantValidator directory. -These configurations can be over-ridden by setting environment variables, see -README.txt -This version of the VariantValidator API 0.1.0 contains the following functions: -1 my_config -my_config is a simple function that allows the user to determine whether VariantValidator is -correctly configured, i.e. is the tool searching in the correct locations for its data? -The function also returns version information -# Example -my_config() -2. validator -validator is the primary VariantValidator function which validates sequence variation -descriptions. validator uses sub functions in the variantanalyser module -contained the the root variantValidator directory, and functions priovided by the -hgvs Python package (https://github.com/biocommons/hgvs/) to "manipulate biological -sequence variants according to Human Genome Variation Society recommendations" -# Example -variant = ' NM_000088.3:c.589G>T' -selected_assembly = 'GRCh37' # or GRCh37, hg19, hg38 -select_transcripts = 'all' # Or a pipe delimited, white-space-less, string of transcript -IDs validation = validator(variant, selected_assembly, select_transcripts) -# Accepted input formats -NM_000088.3:c.589G>T -NC_000017.10:g.48275363C>A -NG_007400.1:g.8638G>T -LRG_1:g.8638G>T -LRG_1t1:c.589G>T -17-50198002-C-A (GRCh38) -chr17:50198002C>A (GRCh38) -3. gene2transcripts -This function is similar to the Gene to Transcripts function -https://variantvalidator.org/ref_finder/ except the data is returned within a structured -python object -# HGNC example -variantValidator.validator.gene2transcripts('HTT') -# RefSeq Transcript example -variantValidator.validator.gene2transcripts('NM_002111.8') -4. hgvs2ref -This function retuns the reference sequence with respect to HGVS variation descriptions -The function will only return REFERENCE SEQUENCE i.e. if a c. descriptions overlaps an -intron/exon boundary, only the exonic sequence will be returned -# Example -hgvs2ref('NM_000088.3:c.589_594del') -""" - -# IMPORT HGVS MODULES -import hgvs -import hgvs.parser -import hgvs.dataproviders.uta -import hgvs.dataproviders.seqfetcher -import hgvs.assemblymapper -import hgvs.variantmapper -import hgvs.sequencevariant -import hgvs.validator -import hgvs.exceptions -import hgvs.location -import hgvs.posedit -import hgvs.edit -import hgvs.normalizer - -# IMPORT PYTHON MODULES -import re -import time -import datetime -import copy -import os -import sys -import warnings -from operator import itemgetter -from pyliftover import LiftOver -import traceback -from configparser import ConfigParser - -from Bio.Seq import Seq - -# Import variantanalyser and peripheral VV modules -import ref_seq_type -import external -import output_formatter -import variantanalyser -from vvLogging import logger -from variantanalyser import functions as va_func -from variantanalyser import dbControls as va_dbCrl -from variantanalyser import hgvs2vcf as va_H2V -from variantanalyser import batch as va_btch -from variantanalyser import g_to_g as va_g2g -from variantanalyser import supported_chromosome_builds as va_scb -from variantanalyser import gap_genes as gapGenes -from variantanalyser.liftover import liftover as lift_over - -__version__ = None - -# Ensure configuration is on the OS -if os.environ.get('CONF_ROOT') is None: - import configuration - - CONF_ROOT = os.environ.get('CONF_ROOT') -else: - CONF_ROOT = os.environ.get('CONF_ROOT') -# Define global configuration variables -HGVS_SEQREPO_DIR = "Unspecified" -UTA_DB_URL = 'Unspecified' -VALIDATOR_DB_URL = 'Unspecified' -PYLIFTOVER_DIR = 'Unspecified' -ENTREZ_ID = 'Unspecified' -VERSION = 'Unspecified' -hgvs_version = 'Unspecified' - - -def exceptPass(validation=None): - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - tbk = [str(exc_type), str(exc_value), str(te)] - er = str('\n'.join(tbk)) - if last_traceback: - logger.warning( - "Except pass for " + str(exc_type) + " " + str(exc_value) + " at line " + str(last_traceback.tb_lineno)) - else: - logger.warning("Except pass for " + str(exc_type) + " " + str(exc_value)) - logger.debug(er) - - -# Config Section Mapping function -def ConfigSectionMap(section, c): - dict1 = {} - options = c.options(section) - for option in options: - try: - dict1[option] = c.get(section, option) - if dict1[option] == -1: - logger.warning("skip: %s" % option) - except: - logger.warning("exception on %s!" % option) - dict1[option] = None - return dict1 - - -def loadConfigFile(): - # Set the version from the config.ini - global __version__ - Config = ConfigParser() - with open(os.path.join(CONF_ROOT, 'config.ini')) as f: - Config.read_file(f) - __version__ = ConfigSectionMap("variantValidator",Config)['version'] - if re.match('^\d+\.\d+\.\d+$', __version__) is not None: - _is_released_version = True - # Load database environments from config - - logString = ConfigSectionMap("logging", Config)['string'] - os.environ["VALIDATOR_DEBUG"] = logString - print "ac", os.environ["VALIDATOR_DEBUG"] - print("ls", logString) - - -# Custom Exceptions -class variantValidatorError(Exception): - pass - - -# PRE COMPILE VARIABLES -hgvs.global_config.uta.pool_max = 25 -hdp = hgvs.dataproviders.uta.connect(pooling=True) -# From the hgvs parser import, create an instance of hgvs.parser.Parser -hp = hgvs.parser.Parser() -# Configure hgvs package global settings -hgvs.global_config.formatting.max_ref_length = 1000000 -# Validator -vr = hgvs.validator.Validator(hdp) -# Variant mapper -vm = hgvs.variantmapper.VariantMapper(hdp) -# Create a lose vm instance -lose_vm = hgvs.variantmapper.VariantMapper(hdp, - replace_reference=True, - prevalidation_level=None - ) -nr_vm = hgvs.variantmapper.VariantMapper(hdp, replace_reference=False) -# Create seqfetcher object -sf = hgvs.dataproviders.seqfetcher.SeqFetcher() - -# Set current genome builds -genome_builds = ['GRCh37', 'hg19', 'GRCh38'] - -# Obtain environment variables needed within the top-level function -PYLIFTOVER_DIR = os.environ.get('PYLIFTOVER_DIR') - - -# method for final validation and stringifying parsed hgvs variants prior to printing/passing to html -def valstr(hgvs_variant): - """ - Function to ensure the required number of reference bases are displayed in descriptions - """ - cp_hgvs_variant = copy.deepcopy(hgvs_variant) - if cp_hgvs_variant.posedit.edit.type == 'identity': - if len(cp_hgvs_variant.posedit.edit.ref) > 1: - cp_hgvs_variant = output_formatter.remove_reference(cp_hgvs_variant) - cp_hgvs_variant = str(cp_hgvs_variant) - else: - cp_hgvs_variant = output_formatter.remove_reference(cp_hgvs_variant) - cp_hgvs_variant = str(cp_hgvs_variant) - return cp_hgvs_variant - - -# Check configuration variables -def my_config(): - loadConfigFile() - global HGVS_SEQREPO_DIR - global UTA_DB_URL - global VALIDATOR_DB_URL - global PYLIFTOVER_DIR - global ENTREZ_ID - global VERSION - global hgvs_version - try: - HGVS_SEQREPO_DIR = os.environ.get('HGVS_SEQREPO_DIR') - except Exception: - HGVS_SEQREPO_DIR = 'Unspecified' - try: - UTA_DB_URL = os.environ.get('UTA_DB_URL') - except Exception: - UTA_DB_URL = 'Unspecified' - VALIDATOR_DB_URL = os.environ.get('VALIDATOR_DB_URL') - try: - PYLIFTOVER_DIR = os.environ.get('PYLIFTOVER_DIR') - except Exception: - PYLIFTOVER_DIR = 'Unspecified' - ENTREZ_ID = os.environ.get('ENTREZ_ID') - VERSION = __version__, - VERSION = str(VERSION[0]) - hgvs_version = hgvs.__version__, - hgvs_version = str(hgvs_version[0]) - locate = { - 'seqrepo_directory': HGVS_SEQREPO_DIR, - 'uta_url': UTA_DB_URL, - 'py_liftover_directory': PYLIFTOVER_DIR, - 'variantvalidator_data_url': VALIDATOR_DB_URL, - 'entrez_id': ENTREZ_ID, - 'variantvalidator_version': VERSION, - 'variantvalidator_hgvs_version': hgvs_version, - 'uta_schema': str(hdp.data_version()), - 'seqrepo_db': HGVS_SEQREPO_DIR.split('/')[-1] - } - return locate - - -# Validator code -""" -This is the primary VariantValidator function -""" - - -def validator(batch_variant, selected_assembly, select_transcripts, transcriptSet="refseq"): - logger.info(batch_variant + ' : ' + selected_assembly) - # Take start time - start_time = time.time() - - # Set pre defined variables - # SeqFetcher - # sf = hgvs.dataproviders.seqfetcher.SeqFetcher() - - try: - # Validation - ############ - - # Create a dictionary of transcript ID : '' - if select_transcripts != 'all': - select_transcripts_list = select_transcripts.split('|') - select_transcripts_dict = {} - select_transcripts_dict_plus_version = {} - for id in select_transcripts_list: - id = id.strip() - if re.match('LRG', id): - id = va_dbCrl.data.get_RefSeqTranscriptID_from_lrgTranscriptID(id) - if id == 'none': - continue - select_transcripts_dict_plus_version[id] = '' - id = id.split('.')[0] - select_transcripts_dict[id] = '' - # Set up gene list dictionary - input_genes = {} - - # Remove genes if transcripts selected - # if select_transcripts != 'all': - - # split the batch queries into a list - batch_queries = batch_variant.split('|') - - # Turn each variant into a dictionary. The dictionary will be compiled during validation - batch_list = [] - for queries in batch_queries: - queries = queries.strip() - query = {'quibble': queries, 'id': queries, 'warnings': '', 'description': '', 'coding': '', 'coding_g': '', - 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': 'false', - 'order': 'false'} - batch_list.append(query) - - # Create List to carry batch data - batch_out = [] - - # Ensure batch_list is pulled into the function so that it can be appended to - batch_list = batch_list - - # Enter the validation loop - ########################### - # Allow order by input - ordering = 0 - - """ - Set a flag to mark the final output type - flag : warning - flag : error - flag : intragenic - flag : gene - """ - set_output_type_flag = 'warning' - logger.debug("Batch list length " + str(len(batch_list))) - for validation in batch_list: - # Start timing - logger.traceStart(validation) - # Re-set cautions and automaps - - if transcriptSet == "refseq": - alt_aln_method = 'splign' - elif transcriptSet == "ensembl": - alt_aln_method = 'genebuild' - logger.warning("Ensembl is currently not supported") - validation['warnings'] += ': ' + "Ensembl is currently not supported" - continue - else: - logger.warning( - "The transcript set variable " + transcriptSet + " is invalid, it needs to be 'refseq' or 'ensembl'") - validation[ - 'warnings'] += ': ' + "The transcript set variable " + transcriptSet + " is invalid, it needs to be 'refseq' or 'ensembl'" - continue - - # Create Normalizers - hn = hgvs.normalizer.Normalizer(hdp, - cross_boundaries=False, - shuffle_direction=3, - alt_aln_method=alt_aln_method - ) - reverse_normalizer = hgvs.normalizer.Normalizer(hdp, - cross_boundaries=False, - shuffle_direction=5, - alt_aln_method=alt_aln_method - ) - - # Blank cautions - caution = '' - automap = '' - - # This will be used to order the final output - if str(validation['order']) == 'false': - ordering = ordering + 1 - validation['order'] = ordering - else: - pass - # Bug catcher - try: - # Note, ID is not touched. It is always the input variant description. Quibble will be altered but id will not if type = g. - input = validation['quibble'] - logger.trace("Commenced validation of " + str(input), validation) - - # Test for rich text unicode characters - try: - unicode_test = u"{}".format(input) - except UnicodeDecodeError as e: - # Format the trapped character into unicode for styled printing - my_unicode = e[1] - my_unicode = my_unicode.decode('utf-8') - - # Test for rich text unicode characters - try: - str(my_unicode) - except UnicodeEncodeError as e: - # Format the trapped character into unicode for styled printing - unicoded_it = e[1] - unicoded_it_list = unicoded_it.split() - for try_me in unicoded_it_list: - try: - str(try_me) - except UnicodeEncodeError as e: - found_unicode = try_me - found_error = str(e) - found_at = found_unicode.encode('raw_unicode_escape') - break - # Extract character from the error - unicode = re.findall("u'\\\\\w+'", found_error) - character = unicode[0] - search_term = character.replace("u'", '') - search_term = search_term.replace("'", '') - found_at_decoded = found_at.decode('raw_unicode_escape') - found_at = found_at_decoded.encode('raw_unicode_escape') - string_char = str(character) - # Create a human readable U+ representation - human_code = re.sub("u'\\\\\w", 'U+', string_char) - human_code = human_code.replace("'", "") - format_human = u"{}".format(human_code) - format_human = format_human.upper() - found_at = re.sub(search_term, u'<' + format_human + u'>', found_at) - slasher = re.compile("\\\\") - found_at = re.sub(slasher, '', found_at) - validation['id'] = found_at - error = u'Submitted variant description contains an invalid character which is represented by Unicode character ' + format_human + u' at position ' + found_at + u': Please remove this character and re-submit: A useful search function for Unicode characters can be found at https://unicode-search.net/' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - else: - pass - else: - pass - - # Remove whitespace - ws = copy.copy(input) - input = input.strip() - input = ''.join(input.split()) - if input != ws: - caution = 'Whitespace removed from variant description ' + str(ws) - validation['warnings'] = validation['warnings'] + ': ' + caution - logger.info(caution) - stash_input = copy.copy(input) - # Set the primary_assembly - if validation['primary_assembly'] == 'false': - if selected_assembly == 'hg19': - primary_assembly = 'GRCh37' - elif selected_assembly == 'hg38': - primary_assembly = 'GRCh38' - # Ensure genome build is correctly formatted - elif re.search('GRC', selected_assembly, re.IGNORECASE): - selected_assembly = selected_assembly.replace('g', 'G') - selected_assembly = selected_assembly.replace('r', 'R') - selected_assembly = selected_assembly.replace('c', 'C') - selected_assembly = selected_assembly.replace('H', 'h') - primary_assembly = selected_assembly - # Catch invalid genome build - valid_build = False - for genome_build in genome_builds: - if primary_assembly == genome_build: - valid_build = True - if valid_build is False: - primary_assembly = 'GRCh38' - validation['warnings'] = validation[ - 'warnings'] + ': Invalid genome build has been specified. Automap has selected the default build (GRCh38)' - logger.warning( - 'Invalid genome build has been specified. Automap has selected the default build ' + primary_assembly) - else: - validation['primary_assembly'] = primary_assembly - else: - primary_assembly = validation['primary_assembly'] - logger.trace("Completed string formatting", validation) - # Set variables that batch will not use but are required - crossing = 'false' - boundary = 'false' - - # VCF type 1 - """ - VCF2HGVS stage 1. converts chr-pos-ref-alt into chr:posRef>Alt - The output format is a common mistake caused by inaccurate conversion of - VCF variants into HGVS - hence the need for conversion step 2 - """ - if re.search('[-:]\d+[-:][GATC]+[-:][GATC]+', input): - input = input.replace(':', '-') - # Extract primary_assembly if provided - if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): - in_list = input.split('-') - selected_assembly = in_list[0] - input = '-'.join(in_list[1:]) - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) - elif re.search('[-:]\d+[-:][GATC]+[-:]', input): - input = input.replace(':', '-') - # Extract primary_assembly if provided - if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): - in_list = input.split('-') - selected_assembly = in_list[0] - input = '-'.join(in_list[1:]) - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - validation[ - 'warnings'] = 'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF' - validation['warnings'] = validation['warnings'] + ': VariantValidator has output both alternatives' - logger.resub('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + - pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + - ' as ALT = REF. Validator will output both alternatives.') - validation['write'] = 'false' - input_A = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], 'del') - input_B = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) - queryA = {'quibble': input_A, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - queryB = {'quibble': input_B, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(queryA) - batch_list.append(queryB) - continue - elif re.search('[-:]\d+[-:][-:][GATC]+', input) or re.search('[-:]\d+[-:][.][-:][GATC]+', input): - input = input.replace(':', '-') - if re.search('-.-', input): - input = input.replace('-.-', '-ins-') - if re.search('--', input): - input = input.replace('--', '-ins-') - # Extract primary_assembly if provided - if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): - in_list = input.split('-') - selected_assembly = in_list[0] - input = '-'.join(in_list[1:]) - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) - stash_input = input - logger.trace("Completed VCF-HVGS step 1", validation) - # API type non-HGVS - # e.g. Chr16:2099572TC>T - """ - VCF2HGVS conversion step 2 identifies the correct chromosomal reference - sequence based upon the non compliant identifier e.g. :2099572TC>T. - The data is currently stored in variantanalyser.supported_chromosome_builds. - Anticipated future builds will be transferred to MySQL which can be more - easily updated and maintained. - LRGs and LRG_ts also need to be assigned the correct reference sequence identifier. - The LRG ID data ia stored in the VariantValidator MySQL database. - The reference sequence type is also assigned. - """ - if re.search('\w+\:', input) and not re.search('\w+\:[gcnmrp]\.', input): - if re.search('\w+\:[gcnmrp]', input) and not re.search('\w+\:[gcnmrp]\.', input): - # Missing dot - pass - else: - try: - if re.search('GRCh37', input) or re.search('hg19', input): - primary_assembly = 'GRCh37' - elif re.search('GRCh38', input) or re.search('hg38', input): - primary_assembly = 'GRCh38' - pre_input = copy.deepcopy(input) - input_list = input.split(':') - pos_ref_alt = str(input_list[1]) - positionAndEdit = input_list[1] - if not re.match('N[CGTWMRP]_', input) and not re.match('LRG_', input): - chr_num = str(input_list[0]) - chr_num = chr_num.upper() - chr_num = chr_num.strip() - if re.match('CHR', chr_num): - chr_num = chr_num.replace('CHR', '') - # Use selected assembly - accession = va_scb.to_accession(chr_num, selected_assembly) - if accession is None: - validation['warnings'] = validation[ - 'warnings'] + ': ' + chr_num + \ - ' is not part of genome build ' + selected_assembly - logger.warning(chr_num + ' is not part of genome build ' + selected_assembly) - continue - else: - accession = input_list[0] - if re.search('>', pre_input): - if re.search('del', pre_input): - pos = re.match('\d+', pos_ref_alt) - position = pos.group(0) - old_ref, old_alt = pos_ref_alt.split('>') - old_ref = old_ref.replace(position, '') - position = int(position) - 1 - required_base = sf.fetch_seq(accession, start_i=position - 1, end_i=position) - ref = required_base + old_ref - alt = required_base - positionAndEdit = str(position) + ref + '>' + alt - elif re.search('ins', pre_input): - pos = re.match('\d+', pos_ref_alt) - position = pos.group(0) - old_ref, old_alt = pos_ref_alt.split('>') - # old_ref = old_ref.replace(position, '') - position = int(position) - 1 - required_base = sf.fetch_seq(accession, start_i=position - 1, end_i=position) - ref = required_base - alt = required_base + old_alt - positionAndEdit = str(position) + ref + '>' + alt - # Assign reference sequence type - ref_type = ref_seq_type.ref_type_assign(accession) - if re.match('LRG_', accession): - if ref_type == ':g.': - accession = va_dbCrl.data.get_RefSeqGeneID_from_lrgID(accession) - else: - accession = va_dbCrl.data.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) - else: - accession = accession - input = str(accession) + ref_type + str(positionAndEdit) - stash_input = input - except: - exceptPass(validation) - - # Descriptions lacking the colon : - if re.search('[gcnmrp]\.', input) and not re.search(':[gcnmrp]\.', input): - error = 'Unable to identify a colon (:) in the variant description %s. A colon is required in HGVS variant descriptions to separate the reference accession from the reference type i.e. :. e.g. :c.' % ( - input) - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - - # Ambiguous chr reference - logger.trace("Completed VCF-HVGS step 2", validation) - """ - VCF2HGVS conversion step 3 is similar to step 2 but handles - formats like Chr16:g.2099572TC>T which are provided by Alamut and other - software - """ - if re.search('\w+:[gcnmrp]\.', input) and not re.match('N[CGTWMRP]_', input): - # Take out lowercase Accession characters - lower_cased_list = input.split(':') - if re.search('LRG', lower_cased_list[0], re.IGNORECASE): - lower_case_accession = lower_cased_list[0] - lower_case_accession = lower_case_accession.replace('l', 'L') - lower_case_accession = lower_case_accession.replace('r', 'R') - lower_case_accession = lower_case_accession.replace('g', 'G') - else: - lower_case_accession = lower_cased_list[0] - lower_case_accession = lower_case_accession.upper() - input = ''.join(lower_cased_list[1:]) - input = lower_case_accession + ':' + input - if not re.match('LRG_', input) and not re.match('ENS', input) and not re.match('N[MRPC]_', input): - try: - if re.search('GRCh37', input) or re.search('hg19', input): - primary_assembly = 'GRCh37' - elif re.search('GRCh38', input) or re.search('hg38', input): - primary_assembly = 'GRCh38' - pre_input = copy.deepcopy(input) - input_list = input.split(':') - query_a_symbol = input_list[0] - is_it_a_gene = va_dbCrl.data.get_hgnc_symbol(query_a_symbol) - if is_it_a_gene == 'none': - pos_ref_alt = str(input_list[1]) - positionAndEdit = input_list[1] - chr_num = str(input_list[0]) - chr_num = chr_num.upper() - chr_num = chr_num.strip() - if re.match('CHR', chr_num): - chr_num = chr_num.replace('CHR', '') # Use selected assembly - accession = va_scb.to_accession(chr_num, selected_assembly) - if accession is None: - validation['warnings'] = validation['warnings'] + ': ' + chr_num + \ - ' is not part of genome build ' + selected_assembly - continue - input = str(accession) + ':' + str(positionAndEdit) - stash_input = input - else: - pass - except Exception as e: - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - tbk = [str(exc_type), str(exc_value), str(te)] - er = str('\n'.join(tbk)) - logger.warning(str(exc_type) + " " + str(exc_value)) - logger.debug(er) - - # GENE_SYMBOL:c. n. types - logger.trace("Completed VCF-HGVS step 3", validation) - """ - Searches for gene symbols that have been used as reference sequence - identifiers. Provides a sufficiently repremanding warning, but also provides - correctly formatted variant descriptions with appropriate transcript - reference sequence identifiers i.e. NM_ .... - Note: the output from the function must be validated because VV has no way - of knowing which the users intended reference sequence was, and the exon - boundaries etc of the alternative transcript variants may not be equivalent - """ - if re.search('\w+\:[cn]\.', input): - try: - pre_input = copy.deepcopy(input) - query_a_symbol = pre_input.split(':')[0] - tx_edit = pre_input.split(':')[1] - is_it_a_gene = va_dbCrl.data.get_hgnc_symbol(query_a_symbol) - if is_it_a_gene != 'none': - uta_symbol = va_dbCrl.data.get_uta_symbol(is_it_a_gene) - available_transcripts = hdp.get_tx_for_gene(uta_symbol) - select_from_these_transcripts = {} - for tx in available_transcripts: - if re.match('NM_', tx[3]) or re.match('NR_', tx[3]): - if tx[3] not in select_from_these_transcripts.keys(): - select_from_these_transcripts[tx[3]] = '' - else: - continue - else: - continue - select_from_these_transcripts = '|'.join(select_from_these_transcripts.keys()) - if select_transcripts != 'all': - validation['write'] = 'false' - for transcript in select_transcripts_dict_plus_version.keys(): - validation[ - 'warnings'] = 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence' - refreshed_description = transcript + ':' + tx_edit - query = {'quibble': refreshed_description, 'id': validation['id'], - 'warnings': validation['warnings'], 'description': '', 'coding': '', - 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', - 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) - logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence') - else: - validation['warnings'] = validation['warnings'] + \ - ': ' + 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + input + \ - ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts - logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + input + \ - ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts) - continue - else: - pass - except: - exceptPass() - logger.trace("Gene symbol reference catching complete", validation) - - # NG_:c. or NC_:c. - """ - Similar to the GENE_SYMBOL:c. n. types function, but spots RefSeqGene or - Chromosomal reference sequence identifiers used in the context of c. variant - descriptions - """ - if re.search('\w+\:[cn]', input): - try: - if re.match('^NG_', input): - refSeqGeneID = input.split(':')[0] - tx_edit = input.split(':')[1] - gene_symbol = va_dbCrl.data.get_gene_symbol_from_refSeqGeneID(refSeqGeneID) - if gene_symbol != 'none': - uta_symbol = va_dbCrl.data.get_uta_symbol(gene_symbol) - available_transcripts = hdp.get_tx_for_gene(uta_symbol) - select_from_these_transcripts = {} - for tx in available_transcripts: - if re.match('NM_', tx[3]) or re.match('NR_', tx[3]): - if tx[3] not in select_from_these_transcripts.keys(): - select_from_these_transcripts[tx[3]] = '' - else: - continue - else: - continue - select_from_these_transcripts = '|'.join(select_from_these_transcripts.keys()) - if select_transcripts != 'all': - validation['write'] = 'false' - for transcript in select_transcripts_dict_plus_version.keys(): - validation[ - 'warnings'] = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' - refreshed_description = refSeqGeneID + '(' + transcript + ')' + ':' + tx_edit - query = {'quibble': refreshed_description, 'id': validation['id'], - 'warnings': validation['warnings'], 'description': '', 'coding': '', - 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', - 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} - logger.resub( - 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation. Resubmitting corrected version.') - batch_list.append(query) - else: - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + input + ' but also specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts - logger.warning( - + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + - str( - input) + ' but also specify transcripts from the following: ' + 'select_transcripts=' + str( - select_from_these_transcripts)) - continue - else: - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation' - logger.warning( - 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation') - continue - elif re.match('^NC_', input): - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified' - logger.warning( - 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified') - continue - else: - pass - except: - exceptPass() - - logger.trace("Chromosomal/RefSeqGene reference catching complete", validation) - # Find not_sub type in input e.g. GGGG>G - """ - VCF2HGVS conversion step 4 has two purposes - 1. VCF is frequently inappropriately converted into HGVS like descriptions - such as GGGG>G which is actually a delins, del or ins. The function assigns - the correct edit type - 2. Detects and extracts multiple ALT sequences into HGVS descriptions and - automatically submits them for validation - """ - not_sub = copy.deepcopy(input) - not_sub_find = re.compile("([GATCgatc]+)>([GATCgatc]+)") - if not_sub_find.search(not_sub): - try: - # If the length of either side of the substitution delimer (>) is >1 - matches = not_sub_find.search(not_sub) - if len(matches.group(1)) > 1 or len(matches.group(2)) > 1 or re.search( - "([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", input): - # Search for and remove range - interval_range = re.compile("([0-9]+)_([0-9]+)") - if interval_range.search(not_sub): - m = not_sub_find.search(not_sub) - start = m.group(1) - delete = m.group(2) - beginning_string, middle_string = not_sub.split(':') - middle_string = middle_string.split('_')[0] - end_string = start + '>' + delete - not_sub = beginning_string + ':' + middle_string + end_string - # Split description - split_colon = not_sub.split(':') - ref_ac = split_colon[0] - remainder = split_colon[1] - split_dot = remainder.split('.') - ref_type = split_dot[0] - remainder = split_dot[1] - posedit = remainder - split_greater = remainder.split('>') - insert = split_greater[1] - remainder = split_greater[0] - # Split remainder using matches - r = re.compile("([0-9]+)([GATCgatc]+)") - try: - m = r.search(remainder) - start = m.group(1) - delete = m.group(2) - starts = posedit.split(delete)[0] - re_try = ref_ac + ':' + ref_type + '.' + starts + 'del' + delete[0] + 'ins' + insert - hgvs_re_try = hp.parse_hgvs_variant(re_try) - hgvs_re_try.posedit.edit.ref = delete - start_pos = str(hgvs_re_try.posedit.pos.start) - if re.search('\-', start_pos): - base, offset = start_pos.split('-') - new_offset = 0 - int(offset) + (len(delete)) - end_pos = int(base) - hgvs_re_try.posedit.pos.end.base = int(end_pos) - hgvs_re_try.posedit.pos.end.offset = int(new_offset) - 1 - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert - elif re.search('\+', start_pos): - base, offset = start_pos.split('+') - end_pos = int(base) + (len(delete) - int(offset) - 1) - new_offset = 0 + int(offset) + (len(delete) - 1) - hgvs_re_try.posedit.pos.end.base = int(end_pos) - hgvs_re_try.posedit.pos.end.offset = int(new_offset) - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert - else: - end_pos = int(start_pos) + (len(delete) - 1) - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - end_pos) + 'del' + delete + 'ins' + insert - except: - exceptPass() - not_delins = not_sub - # Parse into hgvs object - try: - hgvs_not_delins = hp.parse_hgvs_variant(not_delins) - except hgvs.exceptions.HGVSError as e: - # Sort out multiple ALTS from VCF inputs - if re.search("([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): - header, alts = not_delins.split('>') - # Split up the alts into a list - alt_list = alts.split(',') - # Assemble and re-submit - for alt in alt_list: - validation[ - 'warnings'] = 'Multiple ALT sequences detected: auto-submitting all possible combinations' - validation['write'] = 'false' - refreshed_description = header + '>' + alt - query = {'quibble': refreshed_description, 'id': validation['id'], - 'warnings': validation['warnings'], 'description': '', 'coding': '', - 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', - 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} - batch_list.append(query) - logger.resub( - 'Multiple ALT sequences detected. Auto-submitting all possible combinations.') - continue - else: - error = str(e) - issue_link = '' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(str(e)) - continue - - # Re-Stash the input as an HGVS - stash_input = copy.copy(hgvs_not_delins) - try: - not_delins = str(hn.normalize(hgvs_not_delins)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('Normalization of intronic variants is not supported', error): - not_delins = not_delins - else: - issue_link = '' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(e)) - continue - # Create warning - caution = 'Variant description ' + input + ' is not HGVS compliant' - automap = input + ' automapped to ' + not_delins - validation['warnings'] = validation['warnings'] + ': ' + automap - # Change input to normalized variant - input = not_delins - else: - pass - except: - exceptPass() - else: - pass - logger.trace("Completed VCF-HVGS step 4", validation) - - # Tackle edit1234 type - """ - Warns that descriptions such as c.ins12 or g.del69 are not HGVS compliant - Strips the trailing numbers and tries to parse the description into an - hgvs object. - If parses, provides a warning including links to the VarNomen web page, but - continues validation - If not, an error message is generated and the loop continues - """ - edit_pass = re.compile('_\d+$') - edit_fail = re.compile('\d+$') - if edit_fail.search(input): - if edit_pass.search(input): - pass - else: - error = 'false' - issue_link = 'false' - failed = copy.deepcopy(input) - # Catch the trailing digits - digits = re.search(r"(\d+$)", failed) - digits = digits.group(1) - # Remove them so that the string SHOULD parse - try: - hgvs_failed = hp.parse_hgvs_variant(failed) - except hgvs.exceptions.HGVSError as e: - error = str(e) - error = 'The syntax of the input variant description is invalid ' - if re.search('ins\d+', failed): - issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' - error = error + ' please refer to ' + issue_link - validation['warnings'] = validation['warnings'] + error - logger.warning(error + " " + e) - continue - hgvs_failed = hp.parse_hgvs_variant(failed) - hgvs_failed.posedit.edit = str(hgvs_failed.posedit.edit).replace(digits, '') - failed = str(hgvs_failed) - hgvs_failed = hp.parse_hgvs_variant(failed) - automap = 'Non HGVS compliant variant description ' + input + ' automapped to ' + failed - validation['warnings'] = validation['warnings'] + ': ' + automap - logger.warning(automap) - input = failed - - logger.trace("Ins/Del reference catching complete", validation) - # Tackle compound variant descriptions NG or NC (NM_) i.e. correctly input NG/NC_(NM_):c. - """ - Fully HGVS compliant intronic variant descriptions take the format e.g - NG_007400.1(NM_000088.3):c.589-1G>T. However, hgvs cannot parse and map - these variant strings. - This function: - Removes the g. reference sequence - NG_007400.1(NM_000088.3):c.589-1G>T ---> (NM_000088.3):c.589-1G>T - Removes the parintheses - (NM_000088.3):c.589-1G>T ---> NM_000088.3:c.589-1G>T - hgvs can now parse the string into an hgvs variant object and manipulate it - """ - caution = '' - compounder = re.compile('\(NM_') - compounder_b = re.compile('\(ENST') - if compounder.search(input): - # Find pattern e.g. +0000 and assign to a variable - transy = re.search(r"(NM_.+)", input) - transy = transy.group(1) - transy = transy.replace(')', '') - input = transy - logger.trace("HVGS typesetting complete", validation) - # Extract variants from HGVS allele descriptions - # http://varnomen.hgvs.org/recommendations/DNA/variant/alleles/ - """ - HGVS allele string parsing function Occurance #1 - Takes a single HGVS allele description and separates each allele into a - list of HGVS variants. The variants are then automatically submitted for - validation. - Note: In this context, it is inappropriate to validate descriptions - containing intronic variant descriptions. In such instances, allele - descriptions should be re-submitted by the user at the gene or genome level - """ - if (re.search(':[gcnr].\[', input) and re.search('\;', input)) or ( - re.search(':[gcrn].\d+\[', input) and re.search('\;', input)) or (re.search('\(\;\)', input)): - # handle LRG inputs - if re.match('^LRG', input): - if re.match('^LRG\d+', input): - string, remainder = input.split(':') - reference = string.replace('LRG', 'LRG_') - input = reference + ':' + remainder - caution = string + ' updated to ' + reference - if not re.match('^LRG_\d+', input): - pass - elif re.match('^LRG_\d+:g.', input) or re.match('^LRG_\d+:p.', input) or re.match('^LRG_\d+:c.', - input) or re.match( - '^LRG_\d+:n.', input): - lrg_reference, variation = input.split(':') - refseqgene_reference = va_dbCrl.data.get_RefSeqGeneID_from_lrgID(lrg_reference) - if refseqgene_reference != 'none': - input = refseqgene_reference + ':' + variation - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - logger.warning(str(caution)) - elif re.match('^LRG_\d+t\d+:c.', input) or re.match('^LRG_\d+t\d+:n.', input) or re.match( - '^LRG_\d+t\d+:p.', input) or re.match('^LRG_\d+t\d+:g.', input): - lrg_reference, variation = input.split(':') - refseqtranscript_reference = va_dbCrl.data.get_RefSeqTranscriptID_from_lrgTranscriptID( - lrg_reference) - if refseqtranscript_reference != 'none': - input = refseqtranscript_reference + ':' + variation - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - logger.warning(str(caution)) - else: - pass - try: - # Submit to allele extraction function - alleles = va_func.hgvs_alleles(input, hp, vr, hn, vm, sf) - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'Automap has extracted possible variant descriptions' - logger.resub('Automap has extracted possible variant descriptions, resubmitting') - for allele in alleles: - query = {'quibble': allele, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} - coding = 'intergenic' - batch_list.append(query) - validation['write'] = 'false' - continue - except va_func.alleleVariantError as e: - if re.search("Cannot validate sequence of an intronic variant", str(e)): - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'Intronic positions not supported for HGVS Allele descriptions' - logger.warning('Intronic positions not supported for HGVS Allele descriptions') - continue - else: - raise variantValidatorError(str(e)) - logger.trace("HVGS String allele parsing pass 1 complete", validation) - # INITIAL USER INPUT FORMATTING - """ - Removes whitespace from the ends of the string - Removes anything in brackets - Identifies variant type - Returns a dictionary containing the formatted input string and the variant type - Accepts c, g, n, r currently - """ - formatted = va_func.user_input(input) - - # Validator specific variables, note, not all will be necessary for batch, but keep to ensure that batch works - # vars = [] - # refseq_gene = '' - # relevant = '' - warning = '' - automap = 'false' - # vmapped = 'false' - # coords = 'false' - # ensembl_gene = 'false' - hgnc_gene_info = 'false' - # issue_link = 'false' - # cr_available = 'false' - # rcmds_tab = 'false' - - # Check the initial validity of the input - if formatted == 'invalid': - if re.search('\w+\:[gcnmrp]', input) and not re.search('\w+\:[gcnmrp]\.', input): - error = 'Variant description ' + input + ' lacks the . character between and in the expected pattern :.' - else: - error = 'Variant description ' + input + ' is not in an accepted format' - validation['warnings'] = validation[ - 'warnings'] + ': ' + error - logger.warning(error) - continue - else: - variant = formatted['variant'] - input = formatted['variant'] - stash_input = formatted['variant'] - type = formatted['type'] - logger.trace("Variant input formatted, proceeding to validate.", validation) - # Conversions - """ - Conversions are not currently supported. The HGVS format for conversions - is rarely seen wrt genomic sequencing data and needs to be re-evaluated - """ - conversion = re.compile('con') - if conversion.search(variant): - validation['warnings'] = validation['warnings'] + ': ' + 'Gene conversions currently unsupported' - logger.warning('Gene conversions currently unsupported') - continue - - # Primary check that hgvs will accept the variant - error = 'false' - # Change RNA bases to upper case but nothing else - if type == ":r.": - variant = variant.upper() - variant = variant.replace(':R.', ':r.') - # lowercase the supported variant types - variant = variant.replace('DEL', 'del') - variant = variant.replace('INS', 'ins') - variant = variant.replace('INV', 'inv') - variant = variant.replace('DUP', 'dup') - - try: - input_parses = hp.parse_hgvs_variant(variant) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'false': - input_parses.ac = input_parses.ac.upper() - if hasattr(input_parses.posedit.edit, 'alt'): - if input_parses.posedit.edit.alt is not None: - input_parses.posedit.edit.alt = input_parses.posedit.edit.alt.upper() - if hasattr(input_parses.posedit.edit, 'ref'): - if input_parses.posedit.edit.ref is not None: - input_parses.posedit.edit.ref = input_parses.posedit.edit.ref.upper() - variant = str(input_parses) - input = str(input_parses) - pass - else: - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(error) - continue - - """ - ENST support needs to be re-evaluated, but is very low priority - ENST not supported by ACMG and is under review by HGVS - """ - if re.match('^ENST', str(input_parses)): - trap_ens_in = str(input_parses) - sim_tx = hdp.get_similar_transcripts(input_parses.ac) - for line in sim_tx: - if str(line[2]) == 'True' and str(line[3]) == 'True' and str(line[4]) == 'True' and str( - line[5]) == 'True' and str(line[6]) == 'True': - input_parses.ac = (line[1]) - input = str(input_parses) - variant = input - break - if re.match('^ENST', str(input_parses)): - error = 'Unable to map ' + str(input_parses.ac) + ' to an equivalent RefSeq transcript' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - validation['warnings'] = validation['warnings'] + ': ' + str( - trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + variant - logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + variant) - logger.trace("HVGS acceptance test passed", validation) - # Check whether supported genome build is requested for non g. descriptions - historic_assembly = 'false' - mapable_assemblies = { - 'GRCh37': 'true', - 'GRCh38': 'true', - 'NCBI36': 'false' - } - is_mapable = mapable_assemblies.get(primary_assembly) - if is_mapable == 'true': - - # These objects cannot be moved outside of the main function because they gather data from the - # iuser input e.g. alignment method and genome build - # They initiate quickly, so no need to move them unnecessarily - - # Create easy variant mapper (over variant mapper) and splign locked evm - evm = hgvs.assemblymapper.AssemblyMapper(hdp, - assembly_name=primary_assembly, - alt_aln_method=alt_aln_method, - normalize=True, - replace_reference=True - ) - - # Setup a reverse normalize instance and non-normalize evm - no_norm_evm = hgvs.assemblymapper.AssemblyMapper(hdp, - assembly_name=primary_assembly, - alt_aln_method=alt_aln_method, - normalize=False, - replace_reference=True - ) - - # Create a specific minimal evm with no normalizer and no replace_reference - min_evm = hgvs.assemblymapper.AssemblyMapper(hdp, - assembly_name=primary_assembly, - alt_aln_method=alt_aln_method, - normalize=False, - replace_reference=False - ) - - else: - error = 'Mapping of ' + variant + ' to genome assembly ' + primary_assembly + ' is not supported' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - # Catch interval end > interval start - """ - hgvs did/does not handle 3' UTR position ordering well. This function - ensures that end pos is not > start pos wrt 3' UTRs. - Also identifies some variants which span into the downstream sequence - i.e. out of bounds - """ - astr = re.compile('\*') - if astr.search(str(input_parses.posedit)): - input_parses_copy = copy.deepcopy(input_parses) - input_parses_copy.type = "c" - # Map to n. position - # Create easy variant mapper (over variant mapper) and splign locked evm - try: - to_n = evm.c_to_n(input_parses_copy) - except hgvs.exceptions.HGVSError as e: - exceptPass() - else: - if to_n.posedit.pos.end.base < to_n.posedit.pos.start.base: - error = 'Interval end position < interval start position ' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - elif input_parses.posedit.pos.end.base < input_parses.posedit.pos.start.base: - error = 'Interval end position ' + str( - input_parses.posedit.pos.end.base) + ' < interval start position ' + str( - input_parses.posedit.pos.start.base) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - - # Catch missing version number in refseq - ref_type = re.compile("^N\w\w\d") - is_version = re.compile("\d\.\d") - en_type = re.compile('^ENS') - lrg_type = re.compile('LRG') - if (ref_type.search(str(input_parses)) and is_version.search(str(input_parses))) or ( - en_type.search(str(input_parses))): - pass - else: - if lrg_type.search(str(input_parses)): - pass - if ref_type.search(str(input_parses)): - error = 'RefSeq variant accession numbers MUST include a version number' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - logger.trace("HVGS interval/version mapping complete", validation) - - # handle LRG inputs - """ - LRG and LRG_t reference sequence identifiers need to be replaced with - equivalent RefSeq identifiers. The lookup data is stored in the - VariantValidator MySQL database - """ - if re.match('^LRG', str(input_parses)): - if re.match('^LRG\d+', str(input_parses.ac)): - string = str(input_parses.ac) - reference = string.replace('LRG', 'LRG_') - input_parses.ac = reference - caution = string + ' updated to ' + reference - if not re.match('^LRG_\d+', str(input_parses)): - pass - elif re.match('^LRG_\d+:g.', str(input_parses)) or re.match('^LRG_\d+:p.', - str(input_parses)) or re.match( - '^LRG_\d+:c.', str(input_parses)) or re.match('^LRG_\d+:n.', str(input_parses)): - lrg_reference, variation = str(input_parses).split(':') - refseqgene_reference = va_dbCrl.data.get_RefSeqGeneID_from_lrgID(lrg_reference) - if refseqgene_reference != 'none': - input_parses.ac = refseqgene_reference - variant = str(input_parses) - input = str(input_parses) - stash_input = input - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - logger.warning(str(caution)) - elif re.match('^LRG_\d+t\d+:c.', str(input_parses)) or re.match('^LRG_\d+t\d+:n.', - str(input_parses)) or re.match( - '^LRG_\d+t\d+:p.', str(input_parses)) or re.match('^LRG_\d+t\d+:g.', str(input_parses)): - lrg_reference, variation = str(input_parses).split(':') - refseqtranscript_reference = va_dbCrl.data.get_RefSeqTranscriptID_from_lrgTranscriptID( - lrg_reference) - if refseqtranscript_reference != 'none': - input_parses.ac = refseqtranscript_reference - variant = str(input_parses) - input = str(input_parses) - stash_input = input - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - logger.warning(str(caution)) - else: - pass - logger.trace("LRG check for conversion to refseq completed", validation) - # Additional Incorrectly input variant capture training - """ - Evolving list of common mistakes, see sections below - """ - # NM_ .g - if (re.search('^NM_', variant) or re.search('^NR_', variant)) and re.search(':g.', variant): - suggestion = input.replace(':g.', ':c.') - error = 'Transcript reference sequence input as genomic (g.) reference sequence. Did you mean ' + suggestion + '?' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - # NR_ c. - if re.search('^NR_', input) and re.search(':c.', input): - suggestion = input.replace(':c.', ':n.') - error = 'Non-coding transcript reference sequence input as coding (c.) reference sequence. Did you mean ' + suggestion + '?' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - # NM_ n. - if re.search('^NM_', input) and re.search(':n.', input): - suggestion = input.replace(':n.', ':c.') - error = 'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. Did you mean ' + suggestion + '?' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - - # NM_ NC_ NG_ NR_ p. - if (re.search('^NM_', variant) or re.search('^NR_', variant) or re.search('^NC_', variant) or re.search( - '^NG_', variant)) and re.search(':p.', variant): - issue_link = 'http://varnomen.hgvs.org/recommendations/protein/' - error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - - # NG_ c or NC_c.. - if (re.search('^NG_', variant) or re.search('^NC_', variant)) and re.search(':c.', variant): - suggestion = ': For additional assistance, submit ' + str(variant) + ' to VariantValidator' - error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' + suggestion - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - - logger.trace("Passed 'common mistakes' catcher", validation) - # Primary validation of the input - """ - An evolving set of variant structure and content searches which identify - and warn users about inappropriate use of HGVS - Primarily, this code filters out variants that cannot realistically be - auto corrected and will cause the downstream functions to return errors - """ - input_parses = hp.parse_hgvs_variant(input) - if input_parses.type == 'g': - if re.match('^NC_', input_parses.ac) or re.match('^NG_', input_parses.ac) or re.match('^NT_', - input_parses.ac) or re.match( - '^NW_', input_parses.ac): - pass - else: - error = 'Invalid reference sequence identifier (' + input_parses.ac + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(error) - continue - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(error) - continue - except Exception as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(error) - continue - # Additional test - try: - hn.normalize(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(error) - continue - else: - exceptPass() - - elif input_parses.type == 'c': - if re.search('\*', str(input_parses)) or re.search('c.\-', str(input_parses)): - # Catch variation in UTRs - # These should be in the sequence so can be directly validated. Need to pass to n. - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('datums is ill-defined', error): - called_ref = input_parses.posedit.edit.ref - try: - to_n = evm.c_to_n(input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(error) - continue - actual_ref = to_n.posedit.edit.ref - if called_ref != actual_ref: - error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(error) - continue - else: - input_parses.posedit.edit.ref = '' - variant = str(input_parses) - else: - if re.search('bounds', error) or re.search('intronic variant', error): - try: - hn.normalize(input_parses) - except hgvs.exceptions.HGVSError as e: - exceptPass() - if re.search('bounds', str(e)): - try: - identity_info = hdp.get_tx_identity_info(input_parses.ac) - ref_start = identity_info[3] - ref_end = identity_info[4] - if re.match('-', str( - input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: - # upstream positions - boundary = int('-' + str(ref_start)) - remainder = int(str(input_parses.posedit.pos.start)) - boundary - input_parses.posedit.pos.start.base = boundary - input_parses.posedit.pos.start.offset = remainder - if re.match('-', str( - input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: - boundary = int('-' + str(ref_start)) - remainder = int(str(input_parses.posedit.pos.end)) - boundary - input_parses.posedit.pos.end.base = boundary - input_parses.posedit.pos.end.offset = remainder - if re.match('\*', str( - input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: - # downstream positions - tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') - ts_seq = sf.fetch_seq(input_parses.ac) - boundary = len(ts_seq) - ref_end - input_parses.posedit.pos.start.base = boundary - offset = int(tot_end_pos) - int(boundary) - input_parses.posedit.pos.start.offset = offset - if re.match('\*', str( - input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: - tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') - ts_seq = sf.fetch_seq(input_parses.ac) - boundary = len(ts_seq) - ref_end - input_parses.posedit.pos.end.base = boundary - offset = int(tot_end_pos) - int(boundary) - input_parses.posedit.pos.end.offset = offset - - # Create a lose vm instance - lose_vm = hgvs.variantmapper.VariantMapper(hdp, - replace_reference=True, - prevalidation_level=None - ) - - - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, - primary_assembly, lose_vm, hp, hn, sf, nr_vm) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant: Instead use ' + valstr( - report_gen) - except Exception as e: - exceptPass() - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - else: - pass - - try: - input_parses = evm.c_to_n(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(e)) - continue - - if re.search('n.1-', str(input_parses)): - input_parses = evm.n_to_c(input_parses) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, - vm, hp, hn, sf, nr_vm) - error = error + valstr(genomic_position) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - - # Re-map input_parses back to c. variant - input_parses = evm.n_to_c(input_parses) - - # Intronic positions in UTRs - if re.search('\d\-\d', str(input_parses)) or re.search('\d\+\d', str(input_parses)): - # Can we go c-g-c - try: - to_genome = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, vm, - hp, hn, sf, nr_vm) - to_tx = evm.g_to_t(to_genome, input_parses.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - error = str(e) - if re.search('bounds', error): - try: - identity_info = hdp.get_tx_identity_info(input_parses.ac) - ref_start = identity_info[3] - ref_end = identity_info[4] - if re.match('-', str(input_parses.posedit.pos.start)): - # upstream positions - boundary = int('-' + str(ref_start)) - remainder = int(str(input_parses.posedit.pos.start)) - boundary - input_parses.posedit.pos.start.base = boundary - input_parses.posedit.pos.start.offset = remainder - if re.match('-', str(input_parses.posedit.pos.end)): - boundary = int('-' + str(ref_start)) - remainder = int(str(input_parses.posedit.pos.end)) - boundary - input_parses.posedit.pos.end.base = boundary - input_parses.posedit.pos.end.offset = remainder - if re.match('\*', str(input_parses.posedit.pos.start)): - # downstream positions - tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') - ts_seq = sf.fetch_seq(input_parses.ac) - boundary = len(ts_seq) - ref_end - input_parses.posedit.pos.start.base = boundary - te1, te2 = tot_end_pos.split('+') - tot_end_pos = int(te1) + int(te2) - offset = int(tot_end_pos) - int(boundary) - input_parses.posedit.pos.start.offset = offset - if re.match('\*', str(input_parses.posedit.pos.end)): - tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') - ts_seq = sf.fetch_seq(input_parses.ac) - boundary = len(ts_seq) - ref_end - input_parses.posedit.pos.end.base = boundary - te1, te2 = tot_end_pos.split('+') - tot_end_pos = int(te1) + int(te2) - offset = int(tot_end_pos) - int(boundary) - input_parses.posedit.pos.end.offset = offset - - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, - primary_assembly, lose_vm, hp, hn, sf, nr_vm) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( - report_gen) - except Exception as e: - exceptPass() - else: - pass - validation['warnings'] = validation['warnings'] + ': ' + str( - error) - logger.warning(str(error)) - continue - - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = str(e) - if 'Alignment is incomplete' in error: - e_list = error.split('~') - gens = [] - for el in e_list: - el_l = el.split('/') - if el_l[-1] == '': - continue - gens.append(el_l[-1]) - acs = '; '.join(gens) - error = 'Cannot map ' + valstr( - input_parses) + ' to a genomic position. ' + input_parses.ac + ' can only be partially aligned to genomic reference sequences ' + acs - validation['warnings'] = validation['warnings'] + ': ' + str( - error) - logger.warning(str(error)) - continue - - elif re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): - # Quick look at syntax validation - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if re.search('bounds', error): - try: - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, - lose_vm, hp, hn, sf, nr_vm) - except hgvs.exceptions.HGVSError as e: - exceptPass() - else: - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( - report_gen) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('insertion length must be 1', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - # Create a specific minimal evm with no normalizer and no replace_reference - # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence - try: - output = va_func.noreplace_myevm_t_to_g(input_parses, evm, hdp, primary_assembly, vm, hn, - hp, sf, no_norm_evm) - except hgvs.exceptions.HGVSDataNotAvailableError as e: - tx_ac = input_parses.ac - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except ValueError as e: - error = str(e) - if re.search('> end', error): - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - try: - evm.g_to_t(output, input_parses.ac) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - try: - vr.validate(output) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # All other variation - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSUnsupportedOperationError: - exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - """ - #Phil: Honestly not sure what the purpose of any of these is, we act the same regardless of what - #kind of error it is. - if re.search('Length implied by coordinates', error): - # Applies to del and inv - # NOTE, there has been no normalization at all so this error is valid here - validation['warnings'] = validation['warnings'] + ': ' + str(error) - # Will apply to > del and inv - if re.search('does not agree with reference sequence', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - # ensures x_y for insertions - if re.search('insertion length must be 1', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - # Boundary issue - if re.search('Variant coordinate is out of the bound of CDS region', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - """ - # This catches errors in introns - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = e - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('bounds', error): - error = error + ' (' + input_parses.ac + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - exceptPass() - - - elif input_parses.type == 'n': - if re.search('\+', str(input_parses)) or re.search('\-', str(input_parses)): - # Catch variation in UTRs - # These should be in the sequence so can be directly validated. Need to pass to n. - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('intronic variant', error): - pass - elif re.search('datums is ill-defined', error): - called_ref = input_parses.posedit.edit.ref - to_n = evm.c_to_n(input_parses) - actual_ref = to_n.posedit.edit.ref - if called_ref != actual_ref: - error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - input_parses.posedit.edit.ref = '' - variant = str(input_parses) - - elif re.search('base must be >=1 for datum = SEQ_START or CDS_END', error): - error = 'The given coordinate is outside the bounds of the reference sequence.' - - try: - if re.match('-', str(input_parses.posedit.pos.start)): - # upstream positions - boundary = 1 - remainder = int(str(input_parses.posedit.pos.start)) - boundary - remainder = remainder + 1 - input_parses.posedit.pos.start.base = boundary - input_parses.posedit.pos.start.offset = remainder - if re.match('-', str(input_parses.posedit.pos.end)): - boundary = 1 - remainder = int(str(input_parses.posedit.pos.end)) - boundary - remainder = remainder + 1 - input_parses.posedit.pos.end.base = boundary - input_parses.posedit.pos.end.offset = remainder - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, - lose_vm, hp, hn, sf, nr_vm) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( - report_gen) - except Exception as e: - exceptPass() - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - if re.search('n.1-', str(input_parses)): - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, vm, - hp, hn, sf, nr_vm) - error = error + valstr(genomic_position) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - - if re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): - # Quick look at syntax validation - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if re.search('bounds', error): - try: - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, - lose_vm, hp, hn, sf, nr_vm) - except hgvs.exceptions.HGVSError as e: - exceptPass() - else: - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( - report_gen) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('insertion length must be 1', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('Cannot validate sequence of an intronic variant', error): - try: - test_g = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, primary_assembly, vm, - hp, hn, sf, nr_vm) - back_to_n = evm.g_to_t(test_g, input_parses.ac) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('bounds', error): - report_gen = va_func.myevm_t_to_g(input_parses, hdp, no_norm_evm, - primary_assembly, lose_vm, hp, hn, sf, nr_vm) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + valstr( - report_gen) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - exceptPass() - - # Create a specific minimal evm with no normalizer and no replace_reference - # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence - try: - output = va_func.noreplace_myevm_t_to_g(input_parses, evm, hdp, primary_assembly, vm, hn, - hp, sf, no_norm_evm) - except hgvs.exceptions.HGVSDataNotAvailableError as e: - tx_ac = input_parses.ac - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except ValueError as e: - error = str(e) - if re.search('> end', error): - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - try: - vr.validate(output) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # All other variation - try: - vr.validate(input_parses) - except hgvs.exceptions.HGVSUnsupportedOperationError: - - exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - """ - if re.search('Length implied by coordinates', error): - # Applies to del and inv - # NOTE, there has been no normalization at all so this error is valid here - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - # Will apply to > del and inv - if re.search('does not agree with reference sequence', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - # ensures x_y for insertions - if re.search('insertion length must be 1', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - # Boundary issue - if re.search('Variant coordinate is out of the bound of CDS region', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - """ - # This catches errors in introns - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - logger.warning(str(error)) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = e - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('bounds', error): - error = error + ' (' + input_parses.ac + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - logger.trace("Variant structure and contents searches passed", validation) - # Mitochondrial variants - """ - Reformat m. into the new HGVS standard which is now m again! - """ - if type == ':m.' or re.match('NC_012920.1', str(input_parses.ac)) or re.match('NC_001807.4', - str(input_parses.ac)): - hgvs_mito = copy.deepcopy(input_parses) - if (re.match('NC_012920.1', str(hgvs_mito.ac)) and hgvs_mito.type == 'g') or ( - re.match('NC_001807.4', str(hgvs_mito.ac)) and hgvs_mito.type == 'g'): - hgvs_mito.type = 'm' - caution = '' - try: - vr.validate(hgvs_mito) - except hgvs.exceptions.HGVSError as e: - error = caution + ': ' + str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except KeyError as e: - error = caution + ': Currently unable to validate ' + hgvs_mito.ac + ' sequence variation' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Any transcripts? - rel_var = va_func.relevant_transcripts(hgvs_mito, evm, hdp, alt_aln_method, reverse_normalizer) - hgvs_genomic = copy.deepcopy(hgvs_mito) - if len(rel_var) == 0: - validation['genomic_g'] = valstr(hgvs_mito) - validation['description'] = 'Homo sapiens mitochondrion, complete genome' - logger.info('Homo sapiens mitochondrion, complete genome') - continue - # Currently we are not expecting this path to be activated because not m. transcripts seem to be NM_ - # This route may throw up errors in the future - else: - pass - - # handle :p. - if type == ':p.': - error = 'false' - # Try to validate the variant - try: - hgvs_object = hp.parse_hgvs_variant(variant) - except hgvs.exceptions.HGVSError as e: - error = str(e) - try: - vr.validate(hgvs_object) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Get accurate descriptions from the relevant databases - # RefSeq databases - if alt_aln_method != 'genebuild': - # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID - # accession number - hgvs_object = hp.parse_hgvs_variant(variant) - accession = hgvs_object.ac - # Look for the accession in our database - # Connect to database and send request - record = va_func.entrez_efetch(db="nuccore", id=accession, rettype="gb", retmode="text") - try: - description = record.description - except: - description = 'Unable to recover the description of ' + accession + ' from Entrez' - try: - vr.validate(hgvs_object) - except hgvs.exceptions.HGVSError as e: - error = str(e) - else: - error = str( - hgvs_object) + ' is HGVS compliant and contains a valid reference amino acid description' - reason = 'Protein level variant descriptions are not fully supported due to redundancy in the genetic code' - validation['warnings'] = validation['warnings'] + ': ' + str(reason) + ': ' + str(error) - validation['protein'] = str(hgvs_object) - logger.warning(str(reason) + ": " + str(error)) - continue - - # handle :r. - """ - convert r, into c. - """ - trapped_input = input - if type == ':r.': - hgvs_input = hp.parse_hgvs_variant(input) # Traps the hgvs variant of r. for further use - # Change to coding variant - type = ':c.' - # Change input to reflect! - try: - hgvs_c = va_func.hgvs_r_to_c(hgvs_input) - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - input = str(hgvs_c) - variant = str(hgvs_c) - - # COLLECT gene symbol, name and ACCESSION INFORMATION - # Gene symbol - logger.trace("Handled mitochondrial variants", validation) - """ - Identifies the transcript reference sequence name and HGNC gene symbol - """ - if (type != ':g.'): - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(variant) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - logger.warning(error) - if error != 'false': - error = 'Please inform UTA admin of the following error: ' + str(error) - issue_link = "https://bitbucket.org/biocommons/uta/issues?status=new&status=open" - reason = "VariantValidator cannot recover information for transcript " + str( - hgvs_vt.ac) + ' beacuse it is not available in the Universal Transcript Archive' - validation['warnings'] = validation['warnings'] + ': ' + str(reason) - logger.warning(str(reason) + ": " + str(error)) - continue - else: - # Get hgnc Gene name from command - hgnc = tx_id_info[6] - issue_link = 'false' - - # ACCESS THE GENE INFORMATION RECORDS ON THE UTA DATABASE - # Refseq accession - tx_for_gene = va_func.tx_for_gene(hgnc, hdp) - refseq_ac = va_func.ng_extract(tx_for_gene) - - # Additional gene info - gene_info = hdp.get_gene_info(hgnc) - # Chromosomal location - try: - maploc = gene_info[1] - except: - maploc = '' - chr_loc = ("Chromosome location: " + maploc) - - # Get accurate transcript descriptions from the relevant databases - # RefSeq databases - if alt_aln_method != 'genebuild': - # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID - # accession number - hgvs_object = hp.parse_hgvs_variant(variant) - accession = hgvs_object.ac - # Look for the accession in our database - # Connect to database and send request - entry = va_dbCrl.data.in_entries(accession, 'transcript_info') - - # Analyse the returned data and take the necessary actions - # If the error key exists - if 'error' in entry: - # Open a hgvs exception log file in append mode - error = entry['description'] - validation['warnings'] = validation['warnings'] + ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error) + ": A Database error occurred, please contact admin") - continue - - # If the accession key is found - elif 'accession' in entry: - description = entry['description'] - # If the current entry is too old - if entry['expiry'] == 'true': - dbaction = 'update' - try: - entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=hp, evm=evm, - hdp=hdp) - except hgvs.exceptions.HGVSError as e: - error = 'Transcript %s is not currently supported' % (accession) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except Exception as e: - error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - hgnc_gene_info = entry['description'] - else: - hgnc_gene_info = entry['description'] - # If the none key is found add the description to the database - elif 'none' in entry: - dbaction = 'insert' - try: - entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=hp, evm=evm, - hdp=hdp) - except Exception as e: - logger.warning(str(e)) - error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - hgnc_gene_info = entry['description'] - - # If no correct keys are found - else: - # Open a hgvs exception log file in append mode - error = 'Unknown error type' - validation['warnings'] = validation['warnings'] + ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error)) - continue - - # Ensembl databases - else: - # accession number - hgvs_object = hp.parse_hgvs_variant(variant) - accession = hgvs_object.ac - # Look for the accession in our database - # Connect to database and send request - entry = va_dbCrl.data.in_entries(accession, 'transcript_info') - - # Analyse the returned data and take the necessary actions - # If the error key exists - if 'error' in entry: - # Open a hgvs exception log file in append mode - error = entry['description'] - validation['warnings'] = validation['warnings'] + ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error)) - continue - - # If the accession key is found - elif 'accession' in entry: - description = entry['description'] - # If the current entry is too old - if entry['expiry'] == 'true': - dbaction = 'update' - entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=hp, evm=evm, - hdp=hdp) - hgnc_gene_info = entry['description'] - else: - hgnc_gene_info = entry['description'] - # If the none key is found add the description to the database - elif 'none' in entry: - dbaction = 'insert' - try: - entry = va_btch.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=hp, evm=evm, - hdp=hdp) - except Exception as e: - logger.warning(str(e)) - error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - hgnc_gene_info = entry['description'] - - # If no correct keys are found - else: - # Open a hgvs exception log file in append mode - error = 'Unknown error type' - validation['warnings'] = validation['warnings'] + ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error)) - continue - - # Genomic type variants will need to be mapped to transcripts - """ - The following section is used to project genomic variants accurately onto - all relevant transcripts - """ - - if (type == ':g.'): - g_query = hp.parse_hgvs_variant(variant) - - # Genomic coordinates can be validated immediately - error = 'false' - try: - vr.validate(g_query) - except hgvs.exceptions.HGVSError as e: - error = str(e) - except KeyError: - error = 'Reference sequence ' + hgvs_genomic.ac + ' is either not supported or does not exist' - if error != 'false': - reason = 'Invalid variant description' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - - # Set test to see if Norm alters the coords - g_test = hn.normalize(g_query) - - # Perform test - if g_query.posedit.pos != g_test.posedit.pos: - # validation['warnings'] = validation['warnings'] + ': ' + 'Input variant description normalized to ' + str(g_test) - hgvs_genomic = g_test - else: - hgvs_genomic = g_query - - # Collect rel_var - # rel_var is a keyworded list of relevant transcripts with associated coding variants - """ - Initial simple projection from the provided g. position all overlapping - transcripts - """ - rel_var = va_func.relevant_transcripts(hgvs_genomic, evm, hdp, alt_aln_method, reverse_normalizer) - - # Double check rel_vars have not been missed when mapping from a RefSeqGene - if len(rel_var) != 0 and re.match('NG_', str(hgvs_genomic.ac)): - for var in rel_var: - hgvs_coding_variant = hp.parse_hgvs_variant(var) - try: - hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding_variant, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - except hgvs.exceptions.HGVSError as e: - try_rel_var = [] - else: - try_rel_var = va_func.relevant_transcripts(hgvs_genomic, evm, hdp, alt_aln_method, - reverse_normalizer) - if len(try_rel_var) > len(rel_var): - rel_var = try_rel_var - break - else: - continue - - # Tripple check this assumption by querying the gene position database - if len(rel_var) == 0: - vcf_dict = va_H2V.hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf) - not_di = str(hgvs_genomic.ac) + ':g.' + str(vcf_dict['pos']) + '_' + str( - int(vcf_dict['pos']) + (len(vcf_dict['ref']) - 1)) + 'del' + vcf_dict['ref'] + 'ins' + \ - vcf_dict['alt'] - hgvs_not_di = hp.parse_hgvs_variant(not_di) - rel_var = va_func.relevant_transcripts(hgvs_not_di, evm, hdp, alt_aln_method, - reverse_normalizer) - - # list return statements - """ - If mapping to transcripts has been unsuccessful, provide relevant details - """ - if len(rel_var) == 0: - - # Check for NG_ - rsg = re.compile('^NG_') - if rsg.search(variant): - # parse - hgvs_refseqgene = hp.parse_hgvs_variant(variant) - # Convert to chromosomal position - refseqgene_data = va_g2g.rsg_to_chr(hgvs_refseqgene, primary_assembly, hn, vr) - # There should only ever be one description returned - refseqgene_data = refseqgene_data[0] - - # Extract data - if refseqgene_data['valid'] == 'true': - input = refseqgene_data['hgvs_genomic'] - # re_submit - # Tag the line so that it is not written out - validation['warnings'] = validation[ - 'warnings'] + ': ' + variant + ' automapped to genome position ' + str( - input) - query = {'quibble': input, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - coding = 'intergenic' - batch_list.append(query) - else: - error = 'Mapping unavailable for RefSeqGene ' + variant + ' using alignment method = ' + alt_aln_method - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - # Chromosome build is not supported or intergenic??? - else: - sfm = va_scb.supported_for_mapping(hgvs_genomic.ac, primary_assembly) - if sfm == 'true': - try: - vr.validate(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Map to RefSeqGene if available - refseqgene_data = va_g2g.chr_to_rsg(hgvs_genomic, hn, vr) - rsg_data = '' - # Example {'gene': 'NTHL1', 'hgvs_refseqgene': 'NG_008412.1:g.3455_3464delCAAACACACA', 'valid': 'true'} - for data in refseqgene_data: - if data['valid'] == 'true': - data['hgvs_refseqgene'] = hp.parse_hgvs_variant(data['hgvs_refseqgene']) - data['hgvs_refseqgene'] = valstr(data['hgvs_refseqgene']) - rsg_data = rsg_data + data['hgvs_refseqgene'] + ' (' + data['gene'] + '), ' - - error = 'No transcripts found that fully overlap the described variation in the genomic sequence' - # set output type flag - set_output_type_flag = 'intergenic' - # set genomic and where available RefSeqGene outputs - validation['warnings'] = validation['warnings'] + ': ' + str(error) - validation['genomic_g'] = valstr(hgvs_genomic) - validation['genomic_r'] = str(rsg_data.split('(')[0]) - logger.warning(str(error)) - continue - else: - error = 'Please ensure the requested chromosome version relates to a supported genome build. Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Tag the line so that it is not written out - validation['write'] = 'false' - - """ - Gap aware projection from g. to c. - """ - - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' - - # Create a pseudo VCF so that normalization can be applied and a delins can be generated - hgvs_genomic_variant = hgvs_genomic - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # VCF - vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # take a look at the input genomic variant for potential base salvage - stash_ac = vcf_dict['chr'] - stash_pos = int(vcf_dict['pos']) - stash_ref = vcf_dict['ref'] - stash_alt = vcf_dict['alt'] - stash_end = end - # Re-Analyse genomic positions - if re.match('NG_', str(stash_input)): - c = hp.parse_hgvs_variant(rel_var[0]) - if hasattr(c.posedit.edit, 'ref') and c.posedit.edit.ref is not None: - c.posedit.edit.ref = c.posedit.edit.ref.upper() - if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: - c.posedit.edit.alt = c.posedit.edit.alt.upper() - stash_input = va_func.myevm_t_to_g(c, hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, - nr_vm) - if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', - str( - stash_input)): - try: - hgvs_stash = hp.parse_hgvs_variant(stash_input) - except: - hgvs_stash = stash_input - if hasattr(hgvs_stash.posedit.edit, 'ref') and hgvs_stash.posedit.edit.ref is not None: - hgvs_stash.posedit.edit.ref = hgvs_stash.posedit.edit.ref.upper() - if hasattr(hgvs_stash.posedit.edit, 'alt') and hgvs_stash.posedit.edit.alt is not None: - hgvs_stash.posedit.edit.alt = hgvs_stash.posedit.edit.alt.upper() - - stash_ac = hgvs_stash.ac - # MAKE A NO NORM HGVS2VCF - stash_dict = va_H2V.pos_lock_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, sf) - stash_ac = hgvs_stash.ac - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - - # Store a not real deletion insertion - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - stash_hgvs_not_delins = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_genomic_5pr.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - - # Set non-valid caution to false - non_valid_caution = 'false' - - # make an empty rel_var - nw_rel_var = [] - - # loop through rel_var and amend where required - for var in rel_var: - # Store the current hgvs:c. description - saved_hgvs_coding = hp.parse_hgvs_variant(var) - - # Remove un-selected transcripts - if select_transcripts != 'all': - tx_ac = saved_hgvs_coding.ac - # If it's in the selected tx dict, keep it - if tx_ac.split('.')[0] in select_transcripts_dict.keys(): - pass - # If not get rid of it! - else: - continue - - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = va_func.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, - alt_aln_method=alt_aln_method, hdp=hdp) - orientation = int(ori[0]['alt_strand']) - intronic_variant = 'false' - - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding - - try: - intron_test = hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'hard_fail': - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', - str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - # If exonic, process - if intronic_variant != 'true': - # map form reverse normalized g. to c. - hgvs_from_5n_g = no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) - - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - pass - else: - pass - - try: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_genomic_5pr, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = saved_hgvs_coding - - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +ve base and adjust - if (re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) or re.search( - '\-', str(rn_tx_hgvs_not_delins.posedit.pos.end))): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - hgvs_stash_t = vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) - if len(stash_hgvs_not_delins.posedit.edit.ref) > len( - hgvs_stash_t.posedit.edit.ref): - try: - hn.normalize(hgvs_stash_t) - except: - exceptPass() - else: - gap_length = len(stash_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_stash_t.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - try: - tx_hgvs_not_delins = vm.c_to_n(hgvs_stash_t) - except: - tx_hgvs_not_delins = hgvs_stash_t - hgvs_not_delins = stash_hgvs_not_delins - elif hgvs_stash_t.posedit.pos.start.offset != 0 or hgvs_stash_t.posedit.pos.end.offset != 0: - disparity_deletion_in = ['transcript', 'Requires Analysis'] - try: - tx_hgvs_not_delins = vm.c_to_n(hgvs_stash_t) - except: - tx_hgvs_not_delins = hgvs_stash_t - hgvs_not_delins = stash_hgvs_not_delins - hgvs_genomic_5pr = stash_hgvs_not_delins - else: - pass - - # Final sanity checks - try: - vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - hgvs_not_delins = saved_hgvs_coding - disparity_deletion_in = ['false', 'false'] - logger.warning(str(e)) - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_not_delins = saved_hgvs_coding - disparity_deletion_in = ['false', 'false'] - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - logger.warning(error) - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search('\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) - except: - exceptPass() - genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match('\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - exceptPass() - - if re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - c2 = vm.g_to_t(g2, c2.ac) - reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] - alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] - c3 = copy.deepcopy(c1) - c3.posedit.pos.end = c2.posedit.pos.end - c3.posedit.edit.ref = '' # reference - c3.posedit.edit.alt = alternate - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' - else: - # Try the push - hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) - stash_ac = hgvs_stash.ac - # Make a hard left and hard right not delins g. - stash_dict_right = va_H2V.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) - stash_pos_right = int(stash_dict_right['pos']) - stash_ref_right = stash_dict_right['ref'] - stash_alt_right = stash_dict_right['alt'] - stash_end_right = str(stash_pos_right + len(stash_ref_right) - 1) - stash_hgvs_not_delins_right = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) - stash_dict_left = va_H2V.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, - reverse_normalizer, sf) - stash_pos_left = int(stash_dict_left['pos']) - stash_ref_left = stash_dict_left['ref'] - stash_alt_left = stash_dict_left['alt'] - stash_end_left = str(stash_pos_left + len(stash_ref_left) - 1) - stash_hgvs_not_delins_left = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos_left) + '_' + stash_end_left + 'del' + stash_ref_left + 'ins' + stash_alt_left) - # Map in-situ to the transcript left and right - try: - tx_hard_right = vm.g_to_t(stash_hgvs_not_delins_right, saved_hgvs_coding.ac) - except Exception as e: - tx_hard_right = saved_hgvs_coding - else: - normalize_stash_right = hn.normalize(stash_hgvs_not_delins_right) - if str(normalize_stash_right.posedit) == str(stash_hgvs_not_delins.posedit): - tx_hard_right = saved_hgvs_coding - try: - tx_hard_left = vm.g_to_t(stash_hgvs_not_delins_left, saved_hgvs_coding.ac) - except Exception as e: - tx_hard_left = saved_hgvs_coding - else: - normalize_stash_left = hn.normalize(stash_hgvs_not_delins_left) - if str(normalize_stash_left.posedit) == str(stash_hgvs_not_delins.posedit): - tx_hard_left = saved_hgvs_coding - # The Logic - Currently limited to genome gaps - if len(stash_hgvs_not_delins_right.posedit.edit.ref) < len( - tx_hard_right.posedit.edit.ref): - tx_hard_right = hn.normalize(tx_hard_right) - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hard_right - gapped_transcripts = gapped_transcripts + str(tx_hard_right.ac) + ' ' - elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len( - tx_hard_left.posedit.edit.ref): - tx_hard_left = hn.normalize(tx_hard_left) - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hard_left - gapped_transcripts = gapped_transcripts + str(tx_hard_left.ac) + ' ' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = saved_hgvs_coding - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = evm.n_to_c(hgvs_refreshed_variant) - else: - pass - try: - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - pass - exceptPass() - # Send to empty nw_rel_var - nw_rel_var.append(hgvs_refreshed_variant) - - # Otherwise these variants need to be set - else: - corrective_action_taken = '' - gapped_alignment_warning = '' - # Send to empty nw_rel_var - nw_rel_var.append(saved_hgvs_coding) - - # Warn the user that the g. description is not valid - if gapped_alignment_warning != '': - if disparity_deletion_in[0] == 'transcript': - corrective_action_taken = 'Automap has deleted ' + str( - disparity_deletion_in[1]) + ' bp from chromosomal reference sequence ' + str( - hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s)' + gapped_transcripts - if disparity_deletion_in[0] == 'chromosome': - corrective_action_taken = 'Automap has added ' + str( - disparity_deletion_in[1]) + ' bp to chromosomal reference sequence ' + str( - hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s) ' + gapped_transcripts - - # Add additional data to the front of automap - if auto_info != '': - automap = auto_info + '\n' + automap - - rel_var = copy.deepcopy(nw_rel_var) - - # Set the values and append to batch_list - for c_description in rel_var: - query = {'quibble': str(c_description), 'id': validation['id'], - 'warnings': validation['warnings'], 'description': '', 'coding': '', - 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) - logger.warning("Continue reached when mapping transcript types to variants") - # Call next description - continue - # TYPE = :c. - - if type == ':c.' or type == ':n.': - - # Flag for validation - valid = 'false' - # Collect information for genomic level validation - obj = hp.parse_hgvs_variant(variant) - - tx_ac = obj.ac - - # Do we keep it? - if select_transcripts != 'all': - if tx_ac in select_transcripts_dict_plus_version.keys(): - pass - # If not get rid of it! - else: - # By marking it as Do Not Write and continuing through the validation loop - validation['write'] = 'false' - continue - else: - pass - - # Set a cross_variant object - cross_variant = 'false' - # Se rec_var to '' so it can be updated later - rec_var = '' - try: - to_g = va_func.myevm_t_to_g(obj, hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, nr_vm) - genomic_ac = to_g.ac - except hgvs.exceptions.HGVSDataNotAvailableError as e: - if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))) or re.match( - "No relevant genomic mapping options available", str(e)): - reason = 'Unable to map the input variant onto a genomic position' - if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))): - error_list = str(e).split('~')[:-1] - combos = [ - 'Full alignment data between the specified transcript reference sequence and all GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are not available: Consequently the input variant description cannot be fully validated and is not supported: Use the Gene to Transcripts function to determine whether an updated transcript reference sequence is available'] # Partial alignment data is available for the following genomic reference sequences: '] - error = '; '.join(combos) - error = error.replace(': ;', ': ') - else: - error = str(e) - error = error + ': Consequently the input variant description cannot be fully validated and is not supported: Use the Gene to Transcripts function to determine whether an updated transcript reference sequence is available' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - except TypeError as e: - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = va_func.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=alt_aln_method, hdp=hdp) - orientation = int(ori[0]['alt_strand']) - intronic_variant = 'false' - - # Collect variant sequence information via normalisation (normalizer) or if intronic via mapping - # INTRONIC OFFSETS - Required for Exon table - # Variable to collect offset to exon boundary - ex_offset = 0 - plus = re.compile("\d\+\d") # finds digit + digit - minus = re.compile("\d\-\d") # finds digit - digit - - geno = re.compile(':g.') - if plus.search(input) or minus.search(input): - es = re.compile('error') - if es.search(str(to_g)): - if alt_aln_method != 'genebuild': - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Insertions at exon boundaries are miss-handled by vm.g_to_t - if ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): - variant = str(obj) - else: - # Normalize was I believe to replace ref. Mapping does this anyway - # to_g = hn.normalize(to_g) - variant = str(va_func.myevm_g_to_t(evm, to_g, tx_ac)) - tx_ac = '' - - elif geno.search(input): - if plus.search(variant) or minus.search(variant): - to_g = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, nr_vm) - es = re.compile('error') - if es.search(str(to_g)): - if alt_aln_method != 'genebuild': - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Insertions at exon boundaries are miss-handled by vm.g_to_t - if ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): - variant = str(obj) - else: - # Normalize was I believe to replace ref. Mapping does this anyway - # to_g = hn.normalize(to_g) - variant = str(va_func.myevm_g_to_t(evm, to_g, tx_ac)) - tx_ac = '' - - else: - # Normalize the variant - error = 'false' - try: - h_variant = hn.normalize(obj) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Unsupported normalization of variants spanning the exon-intron boundary', - error): - h_variant = obj - variant = variant - caution = 'This coding sequence variant description spans at least one intron' - automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( - automap) - logger.warning(str(caution) + ": " + str(automap)) - else: - variant = str(h_variant) - - tx_ac = '' - # Create a crosser (exon boundary crossed) variant - crossed_variant = str(evm._maybe_normalize(obj)) - if variant == crossed_variant: - cross_variant = 'false' - else: - hgvs_crossed_variant = evm._maybe_normalize(obj) - cross_variant = [ - "Coding sequence allowing for exon boundary crossing (default = no crossing)", - crossed_variant, hgvs_crossed_variant.ac] - cr_available = 'true' - - # control of cross_variant - if boundary == 'false': - cross_variant = 'false' - - error = va_func.validate(variant, hp=hp, vr=vr) - if error == 'false': - valid = 'true' - else: - excep = "%s -- %s -- %s\n" % (time.ctime(), error, variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - # Tackle the plus intronic offset - cck = 'false' - if (plus.search(input)): - # Regular expression catches the start of the interval only based on .00+00 pattern - inv_start = re.compile("\.\d+\+\d") - if (inv_start.search(input)): - # Find pattern e.g. +0000 and assign to a variable - off_value = re.search(r"(\+\d+)", input) - off_value = off_value.group(1) - # Integerise the value and assign to ex_offset - ex_offset = int(off_value) - cck = 'true' - if (minus.search(input)): - # Regular expression catches the start of the interval only based on .00-00 pattern - inv_start = re.compile("\.\d+\-\d") - if (inv_start.search(input)): - # Find pattern e.g. -0000 and assign to a variable - off_value = re.search(r"(\-\d+)", input) - off_value = off_value.group(1) - # Integerise the value and assign to ex_offset - ex_offset = int(off_value) - cck = 'true' - - # COORDINATE CHECKER - # hgvs will handle incorrect coordinates so need to automap errors - # Make sure any input intronic coordinates are correct - # Get the desired transcript - pat_r = re.compile(':r.') - pat_g = re.compile(':g.') - if cck == 'true': - dl = re.compile('del') - # This should only ever hit coding and RNA variants - if dl.search(variant): - # RNA - if pat_r.search(trapped_input): - - coding = va_func.coding(variant, hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, - nr_vm) - # genome back to C coordinates - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) - - test = hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - # Change to rna variant - posedit = query.posedit - posedit = posedit.lower() - query.posedit = posedit - query.type = 'r' - post_var = str(query) - automap = trapped_input + ' automapped to ' + str(post_var) - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions[1])) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest( - path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str( - error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) - - # Coding - else: - coding = va_func.coding(variant, hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = hp.parse_hgvs_variant(variant) - try: - pre_var = va_func.myevm_t_to_g(pre_var, hdp, no_norm_evm, primary_assembly, vm, hp, - hn, sf, nr_vm) - except: - e = sys.exc_info()[1] - error = str(e) - reason = 'Input coordinates may be invalid' - if error == 'expected from_start_i <= from_end_i': - error = 'Automap is unable to correct the input exon/intron boundary coordinates, please check your variant description' - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - else: - exceptPass() - else: - exceptPass() - # genome back to C coordinates - try: - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) - except hgvs.exceptions.HGVSError as error: - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - query = post_var - test = hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - automap = trapped_input + ' automapped to ' + str(post_var) - validation['warnings'] = str(validation['warnings']) + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions[1])) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest( - path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str( - error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) - - else: - if pat_r.search(trapped_input): - coding = va_func.coding(variant, hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, - nr_vm) - # genome back to C coordinates - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) - - test = hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - # Change to rna variant - posedit = query.posedit - posedit = posedit.lower() - query.posedit = posedit - query.type = 'r' - post_var = str(query) - automap = input + ' automapped to ' + post_var - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) - - else: - coding = va_func.coding(variant, hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = va_func.genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, - nr_vm) - # genome back to C coordinates - post_var = va_func.myevm_g_to_t(evm, pre_var, trans_acc) - - test = hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - automap = str(trapped_input) + ' automapped to ' + str(post_var) - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - reason = 'Cannot currently display the required information:' - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) - - - # If cck not true - elif pat_r.search(trapped_input): - # set input hgvs object - hgvs_rna_input = hp.parse_hgvs_variant( - trapped_input) # Traps the hgvs variant of r. for further use - inp = str(va_func.hgvs_r_to_c(hgvs_rna_input)) - # Regex - plus = re.compile("\d\+\d") # finds digit + digit - minus = re.compile("\d\-\d") # finds digit - digit - if plus.search(input) or minus.search(input): - to_g = va_func.genomic(inp, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, nr_vm) - es = re.compile('error') - if es.search(str(to_g)): - if alt_aln_method != 'genebuild': - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set variants pre and post genomic norm - hgvs_inp = va_func.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) - to_g = hn.normalize(to_g) - hgvs_otp = va_func.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) - tx_ac = '' - else: - # Set variants pre and post RNA norm - hgvs_inp = hp.parse_hgvs_variant(inp) - try: - hgvs_otp = hn.normalize(hgvs_inp) - except hgvs.exceptions.HGVSError as e: - hgvs_otp = hgvs_inp - tx_ac = '' - - # Set remaining variables - redit = str(hgvs_otp.posedit.edit) - redit = redit.lower() - hgvs_otp.posedit.edit = redit - otp = str(hgvs_otp) - query = str(hgvs_otp.posedit.pos) - test = str(hgvs_inp.posedit.pos) - query = query.replace('T', 'U') - query = query.replace('ENSU', 'ENST') - test = test.replace('T', 'U') - test = test.replace('ENSU', 'ENST') - output = otp.replace(':c.', ':r.') - # Apply coordinates test - if query != test: - caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' - automap = 'Automap has corrected the variant description' - # automapping of variant completed - automap = trapped_input + ' automapped to ' + output - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str(automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(output) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} - batch_list.append(query) - - elif pat_g.search(input): - pass - - else: - query = hp.parse_hgvs_variant(variant) - test = hp.parse_hgvs_variant(input) - if query.posedit.pos != test.posedit.pos: - caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' - automap = 'Automap has corrected the variant description' - # automapping of variant completed - automap = str(test) + ' automapped to ' + str(query) - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str(automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(query) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - reason = 'Cannot currently display the required information:' - error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - validation['write'] = 'false' - # Set the values and append to batch_list - query = {'quibble': valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} - batch_list.append(query) - - # VALIDATION of intronic variants - pre_valid = hp.parse_hgvs_variant(input) - post_valid = hp.parse_hgvs_variant(variant) - if valid == 'false': - error = 'false' - genomic_validation = str( - va_func.genomic(input, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, nr_vm)) - del_end = re.compile('\ddel$') - delins = re.compile('delins') - inv = re.compile('inv') - if valstr(pre_valid) != valstr(post_valid): - if type != ':g.': - if caution == '': - caution = valstr(pre_valid) + ' automapped to ' + valstr(post_valid) - else: - pass - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - logger.warning(str(caution)) - else: - pass - else: - pass - - # Apply validation to intronic variant descriptions (should be valid but make sure) - error = va_func.validate(genomic_validation, hp=hp, vr=vr) - if error == 'false': - valid = 'true' - else: - - excep = "%s -- %s -- %s\n" % (time.ctime(), error, variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) - continue - - if valid == 'true': - var_tab = 'true' - cores = "HGVS-compliant variant descriptions" + warning - - # v0.1a1 edit - if valstr(pre_valid) != valstr(post_valid): - if type == ':g.': - if caution == '': - caution = valstr(pre_valid) + ' automapped to ' + valstr(post_valid) - else: - pass - validation['warnings'] = validation['warnings'] + ': ' + str(caution) - else: - pass - else: - pass - - # COLLECT VARIANT DESCRIPTIONS - ############################## - - # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC - hgvs_coding = va_func.coding(variant, hp) - boundary = re.compile('exon-intron boundary') - spanning = re.compile('exon/intron') - - try: - hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSError as e: - error = str(e) - - # Gap compensating code status - gap_compensation = True - - # Gap gene black list - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(hgvs_coding.ac) - except Exception: - exceptPass() - else: - # If the gene symbol is not in the list, the value False will be returned - gap_compensation = gapGenes.gap_black_list(gene_symbol) - - # Intron spanning variants - if re.search('boundary', str(error)) or re.search('spanning', str(error)): - try: - hgvs_coding = evm._maybe_normalize(hgvs_coding) - gap_compensation = False - except hgvs.exceptions.HGVSError as error: - validation['warnings'] = validation['warnings'] + ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - - # Warn status - logger.warning("gap_compensation_1 = " + str(gap_compensation)) - coding = valstr(hgvs_coding) - - # RNA sequence - hgvs_rna = copy.deepcopy(hgvs_coding) - hgvs_rna = va_func.hgvs_c_to_r(hgvs_rna) - rna = str(hgvs_rna) - - # Genomic sequence - hgvs_genomic = va_func.myevm_t_to_g(hgvs_coding, hdp, no_norm_evm, primary_assembly, vm, hp, hn, - sf, nr_vm) - final_hgvs_genomic = hgvs_genomic - - # genomic_possibilities - # 1. take the simple 3 pr normalized hgvs_genomic - # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - hgvs_genomic_possibilities = [] - - # Loop out gap finding code under these circumstances! - if gap_compensation is True: - logger.warning('g_to_t gap code 1 active') - rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: - try: - chromosome_normalized_hgvs_coding = reverse_normalizer.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: - chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - - most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, - hgvs_genomic.ac, no_norm_evm, vm, hp, hn, sf, - nr_vm) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - - # Push from side to side to try pick up odd placements - # MAKE A NO NORM HGVS2VCF - # First to the right - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = va_H2V.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - exceptPass() - # Store a tx copy for later use - test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, no_norm_evm, - vm, hp, hn, sf, nr_vm) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_right.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - stash_tx_right = test_stash_tx_right - if hasattr(test_stash_tx_right.posedit.edit, - 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - alt = test_stash_tx_right.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_right.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_right).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - exceptPass() - # Intronic positions not supported. Will cause a Value Error - except ValueError: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - exceptPass() - - # Then to the left - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = va_H2V.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, - sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - exceptPass() - # Store a tx copy for later use - test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, no_norm_evm, - vm, hp, hn, sf, nr_vm) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left - if hasattr(test_stash_tx_left.posedit.edit, - 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - alt = test_stash_tx_left.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - - if (len(alt) - ( - test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_left.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_left).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - exceptPass() - except ValueError: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - exceptPass() - - # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - try: - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) - if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) - - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass - - logger.info('\nGENOMIC POSSIBILITIES') - for possibility in hgvs_genomic_possibilities: - if possibility == '': - logger.info('X') - else: - logger.info(valstr(possibility)) - - logger.info('\n') - - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' - - # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] - - # Loop through to see if a gap can be located - # Set the variables required for corrective normalization - possibility_counter = 0 - suppress_c_normalization = 'false' # Applies to boundary crossing normalization - - # Copy a version of hgvs_genomic_possibilities - for possibility in hgvs_genomic_possibilities: - possibility_counter = possibility_counter + 1 - - # Loop out stash possibilities which will not spot gaps so are empty - if possibility == '': - continue - - # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - hgvs_genomic_variant = copy.deepcopy(possibility) - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) - - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - try: - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error): - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) - if re.search('insertion length must be 1', error): - if hgvs_genomic.posedit.edit.type == 'ins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start, end) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - - # Create VCF - vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] - - # Detect intronic variation using normalization - intronic_variant = 'false' - - # Save a copy of current hgvs_coding - try: - saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - saved_hgvs_coding = hgvs_coding - intronic_variant = 'true' - continue - else: - saved_hgvs_coding = no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - hgvs_coding.ac) - - # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - try: - intron_test = hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'hard_fail': - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', - str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'true': - # Flag RefSeqGene for ammendment - # amend_RefSeqGene = 'false' - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - pass - else: - pass - try: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - saved_hgvs_coding.ac) - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - - # Check for +1 base and adjust - if re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - pass - - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, - nr_vm) - - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - - # tx_hgvs_not_delins = rn_tx_hgvs_not_delins - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, - nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - pass - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, - nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, - nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for internal_possibility in hgvs_genomic_possibilities: - if internal_possibility == '': - continue - - hgvs_t_possibility = vm.g_to_t(internal_possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) - except: - exceptPass() - ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) - except: - exceptPass() - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if internal_possibility.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(internal_possibility.ac, - internal_possibility.posedit.pos.start.base - 1, - internal_possibility.posedit.pos.end.base) - internal_possibility.posedit.edit.ref = ins_ref - internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] - - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] - hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # 'At hgvs_genomic' - # Final sanity checks - try: - vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - continue - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - continue - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # amend_RefSeqGene = 'false' - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): - rg = reverse_normalizer.normalize(hgvs_not_delins) - rtx = vm.g_to_t(rg, tx_hgvs_not_delins.ac) - fg = hn.normalize(hgvs_not_delins) - ftx = vm.g_to_t(fg, tx_hgvs_not_delins.ac) - if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) - exonic = False - for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): - exonic = True - if exonic is True: - hgvs_not_delins = fg - hgvs_genomic = fg - hgvs_genomic_5pr = fg - try: - tx_hgvs_not_delins = vm.c_to_n(ftx) - except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant( - tx_hgvs_not_delins_delins_from_dup) - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - # Suppress intron boundary crossing due to non-intron intron based c. seq annotations - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - '\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - '\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) - except: - exceptPass() - genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, - 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match('\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - exceptPass() - - if re.search('\+', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( - tx_hgvs_not_delins.posedit.edit.ref) - 1 - hgvs_refreshed_variant = tx_hgvs_not_delins - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - if possibility_counter == 3: - hgvs_refreshed_variant = stash_tx_right - elif possibility_counter == 4: - hgvs_refreshed_variant = stash_tx_left - else: - hgvs_refreshed_variant = chromosome_normalized_hgvs_coding - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) - else: - pass - - try: - hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - logger.warning(error) - continue - - # Quick check to make sure the coding variant has not changed - try: - to_test = hn.normalize(hgvs_refreshed_variant) - except: - to_test = hgvs_refreshed_variant - if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # Try the next available genomic option - if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - hgvs_coding = to_test - else: - continue - # Update hgvs_genomic - hgvs_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, - no_norm_evm, vm, hp, hn, sf, nr_vm) - if hgvs_genomic.posedit.edit.type == 'identity': - re_c = vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - shuffle_left_g = copy.copy(hgvs_genomic) - shuffle_left_g.posedit.edit.ref = '' - shuffle_left_g.posedit.edit.alt = '' - shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) - re_c = vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - hgvs_genomic = shuffle_left_g - - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' - - # Break if gap has been detected - if disparity_deletion_in[0] != 'false': - break - - # Warn user about gapping - if auto_info != '': - info_lines = auto_info.split('\n') - info_keys = {} - for information in info_lines: - info_keys[information] = '' - info_out = [] - info_out.append( - 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + primary_assembly) - for ky in info_keys.keys(): - info_out.append(ky) - auto_info = '\n'.join(info_out) - auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' - auto_info = str(auto_info.replace('\n', ': ')) - validation['warnings'] = validation['warnings'] + ': ' + str(auto_info) - logger.warning(str(auto_info)) - # Normailse hgvs_genomic - try: - hgvs_genomic = hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - - if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = hn.normalize(hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = hn.normalize(hgvs_genomic) - genomic = valstr(hgvs_genomic) - - else: - stored_hgvs_genomic_variant = hgvs_genomic - suppress_c_normalization = 'false' - gapped_alignment_warning = '' - auto_info = '' - genomic = valstr(hgvs_genomic) - - # Create pseudo VCF based on amended hgvs_genomic - hgvs_genomic_variant = hgvs_genomic - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # Create vcf - vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Create a VCF call - vcf_component_list = [str(chr), str(pos), str(ref), (alt)] - vcf_genomic = '-'.join(vcf_component_list) - - # DO NOT DELETE - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # DO NOT DELETE - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - - # Apply gap code to re-format hgvs_coding - # Store the current hgvs:c. description - saved_hgvs_coding = copy.deepcopy(hgvs_coding) - - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = va_func.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, - alt_aln_method=alt_aln_method, hdp=hdp) - orientation = int(ori[0]['alt_strand']) - - # Look for normalized variant options that do not match hgvs_coding - hgvs_genomic = copy.deepcopy(hgvs_genomic_variant) - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding - - # Loop out gap finding code under these circumstances! - logger.warning("gap_compensation_2 = " + str(gap_compensation)) - if gap_compensation is True: - logger.warning('g_to_t gap code 2 active') - # is it in an exon? - is_it_in_an_exon = 'no' - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - # Take from stored copy - # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - is_it_in_an_exon = 'yes' - if is_it_in_an_exon == 'yes': - # map form reverse normalized g. to c. - hgvs_from_5n_g = no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) - - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - pass - else: - pass - - hard_fail = 'false' - try: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = hgvs_coding - hard_fail = 'true' - - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +ve base and adjust - if re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myevm_t_to_g(test_tx_var, hdp, no_norm_evm, - primary_assembly, vm, hp, hn, sf, nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for internal_possibility in hgvs_genomic_possibilities: - - if internal_possibility == '': - continue - - hgvs_t_possibility = vm.g_to_t(internal_possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) - except: - exceptPass() - ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) - except: - exceptPass() - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if internal_possibility.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(internal_possibility.ac, - internal_possibility.posedit.pos.start.base - 1, - internal_possibility.posedit.pos.end.base) - internal_possibility.posedit.edit.ref = ins_ref - internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] - - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] - hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # Final sanity checks - try: - vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - logger.warning(str(e)) - continue - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - logger.warning(error) - continue - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - if hard_fail == 'true': - disparity_deletion_in = ['false', 'false'] - - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) - - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search('\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) - except: - exceptPass() - genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match('\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - exceptPass() - - if re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # GAP IN THE CHROMOSOME - - elif disparity_deletion_in[0] == 'chromosome': - # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = saved_hgvs_coding - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = evm.n_to_c(hgvs_refreshed_variant) - else: - pass - try: - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - pass - - # Sort out equality to equality c. events where the code will add 2 additional bases - if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): - pass - else: - hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) - coding = valstr(hgvs_coding) - variant = coding - - # OBTAIN THE RefSeqGene coordinates - # Attempt 1 = UTA - sequences_for_tx = hdp.get_tx_mapping_options(hgvs_coding.ac) - recovered_rsg = [] - - for sequence in sequences_for_tx: - if re.match('^NG_', sequence[1]): - recovered_rsg.append(sequence[1]) - recovered_rsg.sort() - recovered_rsg.reverse() - - if 'NG_' in recovered_rsg: - refseqgene_ac = recovered_rsg - else: - refseqgene_ac = '' - - # Given the difficulties with mapping to and from RefSeqGenes, we now solely rely on UTA - if refseqgene_ac != '': - hgvs_refseq = vm.t_to_g(hgvs_coding, refseqgene_ac) - # Normalize the RefSeqGene Variant to the correct position - try: - hgvs_refseq = hn.normalize(hgvs_refseq) - except Exception as e: - # if re.search('insertion length must be 1', error): - hgvs_refseq = 'RefSeqGene record not available' - refseq = 'RefSeqGene record not available' - hgvs_refseq_ac = 'RefSeqGene record not available' - pass - else: - refseq = valstr(hgvs_refseq) - hgvs_refseq_ac = hgvs_refseq.ac - else: - hgvs_refseq = 'RefSeqGene record not available' - refseq = 'RefSeqGene record not available' - hgvs_refseq_ac = 'RefSeqGene record not available' - - # Predicted effect on protein - protein_dict = va_func.myc_to_p(hgvs_coding, evm, hdp, hp, hn, vm, sf, re_to_p=False) - if protein_dict['error'] == '': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - error = protein_dict['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - logger.error(error) - continue - - # Gene orientation wrt genome - ori = va_func.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, - alt_aln_method=alt_aln_method, hdp=hdp) - ori = int(ori[0]['alt_strand']) - - # Look for normalized variant options that do not match hgvs_coding - # boundary crossing normalization - # Re-Save the required variants - hgvs_seek_var = copy.deepcopy(hgvs_coding) - saved_hgvs_coding = copy.deepcopy(hgvs_coding) - - if ori == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif suppress_c_normalization == 'true': - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - try: - automap = valstr(saved_hgvs_coding) + ' normalized to ' + valstr(hgvs_seek_var) - hgvs_coding = hgvs_seek_var - coding = valstr(hgvs_coding) - validation['warnings'] = validation['warnings'] + ': ' + automap - rng = hn.normalize(query_genomic) - except NotImplementedError: - pass - try: - c_for_p = vm.g_to_t(rng, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - c_for_p = seek_var - try: - # Predicted effect on protein - protein_dict = va_func.myc_to_p(c_for_p, evm, hdp, hp, hn, vm, sf, re_to_p=False) - if protein_dict['error'] == '': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - error = protein_dict['error'] - if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': - hgvs_protein = protein_dict['hgvs_protein'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - # Replace protein description in vars table - protein = str(hgvs_protein) - except NotImplementedError: - exceptPass() - else: - # Double check protein position by normalize genomic, and normalize back to c. for normalize or not to normalize issue - coding = valstr(hgvs_coding) - - elif ori != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif suppress_c_normalization == 'true': - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - try: - automap = valstr(saved_hgvs_coding) + ' normalized to ' + valstr(hgvs_seek_var) - hgvs_coding = hgvs_seek_var - coding = valstr(hgvs_coding) - validation['warnings'] = validation['warnings'] + ': ' + automap - except NotImplementedError: - exceptPass() - else: - # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue - coding = valstr(hgvs_coding) - rng = reverse_normalizer.normalize(query_genomic) - try: - # Diagram where - = intron and E = Exon - - # 3 prime - # ---------EEEEEEEEEEEEEEEEE----------- - # < - # Result, normalize of new variant will baulk at intronic - # 5 prime - # < - # Result, normalize of new variant will be happy - c_for_p = vm.g_to_t(rng, hgvs_coding.ac) - try: - hn.normalize(c_for_p) - except hgvs.exceptions.HGVSError as e: - exceptPass() - else: - # hgvs_protein = va_func.protein(str(c_for_p), evm, hp) - protein_dict = va_func.myc_to_p(c_for_p, evm, hdp, hp, hn, vm, sf, - re_to_p=False) - if protein_dict['error'] == '': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - error = protein_dict['error'] - if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': - hgvs_protein = protein_dict['hgvs_protein'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) - # Replace protein description in vars table - protein = str(hgvs_protein) - except Exception: - exceptPass() - - # Check for up-to-date transcript version - updated_transcript_variant = 'None' - tx_id_info = hdp.get_tx_identity_info(hgvs_coding.ac) - uta_gene_symbol = tx_id_info[6] - tx_for_gene = hdp.get_tx_for_gene(uta_gene_symbol) - ac_root, ac_version = hgvs_coding.ac.split('.') - version_tracking = '0' - update = '' - for accession in tx_for_gene: - try: - if re.match(ac_root, accession[3]): - query_version = accession[3].split('.')[1] - if int(query_version) > int(ac_version) and int(query_version) > int( - version_tracking): - version_tracking = query_version - update = accession[3] - except ValueError: - exceptPass() - - if update != '': - hgvs_updated = copy.deepcopy(hgvs_coding) - hgvs_updated.ac = update - try: - vr.validate(hgvs_updated) - # Updated reference sequence - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('does not agree with reference sequence', str(error)): - match = re.findall('\(([GATC]+)\)', error) - new_ref = match[1] - hgvs_updated.posedit.edit.ref = new_ref - vr.validate(hgvs_updated) - updated_transcript_variant = hgvs_updated - else: - pass - updated_transcript_variant = hgvs_updated - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( - updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + valstr( - updated_transcript_variant) - - # Set the data - set_output_type_flag = 'gene' - validation['description'] = hgnc_gene_info - validation['coding'] = str(hgvs_coding) - validation['genomic_r'] = str(hgvs_refseq) - validation['genomic_g'] = str(hgvs_genomic) - validation['protein'] = str(hgvs_protein) - validation['primary_assembly'] = primary_assembly - if gap_compensation is True: - validation['test_stash_tx_left'] = test_stash_tx_left - validation['test_stash_tx_right'] = test_stash_tx_right - # finish timing - logger.traceEnd(validation) - # Report errors to User and VV admin - except KeyboardInterrupt: - raise - except: - set_output_type_flag = 'error' - error = 'Validation error' - validation['warnings'] = str(error) - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - tbk = [str(exc_type), str(exc_value), str(te)] - er = str('\n'.join(tbk)) - logger.error(str(exc_type) + " " + str(exc_value)) - logger.debug(er) - - continue - - # Outside the for loop - ###################### - logger.trace("End of for loop") - # order the rows - # from operator import itemgetter - by_order = sorted(batch_list, key=itemgetter('order')) - - for valid in by_order: - if 'write' in valid.keys(): - if valid['write'] == 'true': - # Blank VCF - # chr = '' - # pos = '' - # ref = '' - # alt = '' - - # Fromulate a json type response - dict_out = {} - - # Set gap compensation bool - gap_compensation = True - - # warngins - warnings = valid['warnings'] - warnings = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warnings) - warnings = re.sub('^: ', '', warnings) - warnings = re.sub('::', ':', warnings) - - # Submitted variant - submitted = valid['id'] - - # Genomic sequence variation - genomic_variant = valid['genomic_g'] - - # genomic accession - if genomic_variant != '': - hgvs_genomic_variant = hp.parse_hgvs_variant(genomic_variant) - genomic_variant = valstr(hgvs_genomic_variant) - genomic_accession = hgvs_genomic_variant.ac - else: - genomic_accession = '' - - # RefSeqGene variation - refseqgene_variant = valid['genomic_r'] - refseqgene_variant = refseqgene_variant.strip() - if re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant == '': - warnings = warnings + ': ' + refseqgene_variant - refseqgene_variant = '' - lrg_variant = '' - hgvs_refseqgene_variant = 'false' - else: - hgvs_refseqgene_variant = hp.parse_hgvs_variant(refseqgene_variant) - rsg_ac = va_dbCrl.data.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) - if rsg_ac[0] == 'none': - lrg_variant = '' - else: - hgvs_lrg = copy.deepcopy(hgvs_refseqgene_variant) - hgvs_lrg.ac = rsg_ac[0] - lrg_variant = valstr(hgvs_lrg) - if rsg_ac[1] == 'public': - pass - else: - warnings = warnings + ': The current status of ' + str( - hgvs_lrg.ac) + ' is pending therefore changes may be made to the LRG reference sequence' - - # Transcript sequence variation - tx_variant = valid['coding'] - if tx_variant != '': - if '(' in tx_variant and ')' in tx_variant: - tx_variant = tx_variant.split('(')[1] - tx_variant = tx_variant.replace(')', '') - - # transcript accession - hgvs_tx_variant = hp.parse_hgvs_variant(tx_variant) - tx_variant = valstr(hgvs_tx_variant) - hgvs_transcript_variant = hp.parse_hgvs_variant(tx_variant) - transcript_accession = hgvs_transcript_variant.ac - - # Handle LRG - lrg_status = 'public' - lrg_transcript = va_dbCrl.data.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) - if lrg_transcript == 'none': - lrg_transcript_variant = '' - else: - # Note - LRG availability is dependant on UTA containing the data. In some - # instances we will be able to display the LRG_tx without being able to - # display the LRG gene data - - # if not re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant != '': - # if hgvs_refseqgene_variant != 'RefSeqGene record not available' and hgvs_refseqgene_variant != 'false': - try: - hgvs_lrg_t = vm.g_to_t(hgvs_refseqgene_variant, transcript_accession) - hgvs_lrg_t.ac = lrg_transcript - lrg_transcript_variant = valstr(hgvs_lrg_t) - except: - if hgvs_transcript_variant.posedit.pos.start.offset == 0 and hgvs_transcript_variant.posedit.pos.end.offset == 0: - hgvs_lrg_t = copy.copy(hgvs_transcript_variant) - hgvs_lrg_t.ac = lrg_transcript - lrg_transcript_variant = valstr(hgvs_lrg_t) - else: - lrg_transcript_variant = '' - else: - transcript_accession = '' - lrg_transcript_variant = '' - - # Look for intronic variants - if transcript_accession != '' and genomic_accession != '': - # Remove del bases - str_transcript = valstr(hgvs_transcript_variant) - hgvs_transcript_variant = hp.parse_hgvs_variant(str_transcript) - try: - vr.validate(hgvs_transcript_variant) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('intronic variant', error): - genome_context_transcript_variant = genomic_accession + '(' + transcript_accession + '):c.' + str( - hgvs_transcript_variant.posedit) - if refseqgene_variant != '': - hgvs_refseqgene_variant = hp.parse_hgvs_variant(refseqgene_variant) - refseqgene_accession = hgvs_refseqgene_variant.ac - hgvs_coding_from_refseqgene = vm.g_to_t(hgvs_refseqgene_variant, - hgvs_transcript_variant.ac) - hgvs_coding_from_refseqgene = valstr(hgvs_coding_from_refseqgene) - hgvs_coding_from_refseqgene = hp.parse_hgvs_variant(hgvs_coding_from_refseqgene) - RefSeqGene_context_transcript_variant = refseqgene_accession + '(' + transcript_accession + '):c.' + str( - hgvs_coding_from_refseqgene.posedit.pos) + str( - hgvs_coding_from_refseqgene.posedit.edit) - else: - RefSeqGene_context_transcript_variant = '' - else: - genome_context_transcript_variant = '' # transcript_variant - RefSeqGene_context_transcript_variant = '' - else: - genome_context_transcript_variant = '' # transcript_variant - RefSeqGene_context_transcript_variant = '' - else: - genome_context_transcript_variant = '' - RefSeqGene_context_transcript_variant = '' - - # Protein description - predicted_protein_variant = valid['protein'] - if re.match('NP_', predicted_protein_variant): - rs_p, pred_prot_posedit = predicted_protein_variant.split(':') - lrg_p = va_dbCrl.data.get_lrgProteinID_from_RefSeqProteinID(rs_p) - if re.match('LRG', lrg_p): - predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit - - # Gene - if transcript_accession != '': - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(transcript_accession) - except: - gene_symbol = 'Unable to verify gene symbol for ' + str(transcript_accession) - else: - gene_symbol = '' - - # Transcript description - transcript_description = valid['description'] - - # Stashed variants - if 'test_stash_tx_left' not in validation: - pass - else: - test_stash_tx_left = validation['test_stash_tx_left'] - if 'test_stash_tx_right' not in validation: - pass - else: - test_stash_tx_right = validation['test_stash_tx_right'] - - # Multiple genomic variants - # multi_gen_vars = [] - if tx_variant != '': - hgvs_coding = hp.parse_hgvs_variant(str(tx_variant)) - # Gap gene black list - try: - gene_symbol = va_dbCrl.data.get_gene_symbol_from_transcriptID(hgvs_coding.ac) - except Exception: - exceptPass() - else: - # If the gene symbol is not in the list, the value False will be returned - gap_compensation = gapGenes.gap_black_list(gene_symbol) - - # Look for variants spanning introns - try: - hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.search('boundary', str(error)) or re.search('spanning', str(error)): - gap_compensation = False - else: - pass - except hgvs.exceptions.HGVSError: - exceptPass() - - # Warn gap code status - logger.warning("gap_compensation_3 = " + str(gap_compensation)) - multi_g = [] - multi_list = [] - mapping_options = hdp.get_tx_mapping_options(hgvs_coding.ac) - for alt_chr in mapping_options: - if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', - alt_chr[1])) and \ - alt_chr[2] == alt_aln_method: - multi_list.append(alt_chr[1]) - - for alt_chr in multi_list: - try: - # Re set ori - ori = va_func.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, - alt_aln_method=alt_aln_method, hdp=hdp) - orientation = int(ori[0]['alt_strand']) - hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, vm, hp, hn, - sf, nr_vm) - # Set hgvs_genomic accordingly - hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) - - # genomic_possibilities - # 1. take the simple 3 pr normalized hgvs_genomic - # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - hgvs_genomic_possibilities = [] - - # Loop out gap code under these circumstances! - if gap_compensation is True: - logger.warning('g_to_t gap code 3 active') - rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_alt_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: - try: - chromosome_normalized_hgvs_coding = reverse_normalizer.normalize( - hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: - chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - - most_3pr_hgvs_genomic = va_func.myvm_t_to_g(chromosome_normalized_hgvs_coding, - alt_chr, - no_norm_evm, vm, hp, hn, sf, nr_vm) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - - # First to the right - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = va_H2V.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - exceptPass() - # Store a tx copy for later use - test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, - no_norm_evm, vm, hp, hn, sf, nr_vm) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_right.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - stash_tx_right = test_stash_tx_right - if hasattr(test_stash_tx_right.posedit.edit, - 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - alt = test_stash_tx_right.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_right.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_right).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - exceptPass() - except ValueError: - exceptPass() - - # Then to the left - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = va_H2V.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, - reverse_normalizer, sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - exceptPass() - # Store a tx copy for later use - test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = va_func.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, - no_norm_evm, vm, hp, hn, sf, nr_vm) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left - if hasattr(test_stash_tx_left.posedit.edit, - 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - alt = test_stash_tx_left.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_left.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_left).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - exceptPass() - except ValueError: - exceptPass() - - # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - try: - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append( - genomic_from_most_3pr_hgvs_transcript_variant) - if len( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append( - genomic_from_most_5pr_hgvs_transcript_variant) - - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass - exceptPass() - - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' - - # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] - # Loop through to see if a gap can be located - possibility_counter = 0 - for possibility in hgvs_genomic_possibilities: - possibility_counter = possibility_counter + 1 - # Loop out stash possibilities which will not spot gaps so are empty - if possibility == '': - continue - - # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - hgvs_genomic_variant = possibility - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) - - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - try: - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic_variant) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error): - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - if re.search('insertion length must be 1', error): - if hgvs_genomic.posedit.edit.type == 'ins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = sf.fetch_seq(str(hgvs_genomic.ac), start, end) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - - # Make VCF - vcf_dict = va_H2V.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] - - # Save a copy of current hgvs_coding - try: - saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, - hgvs_coding.ac) - except Exception as e: - if str( - e) == 'start or end or both are beyond the bounds of transcript record': - saved_hgvs_coding = hgvs_coding - continue - - # Detect intronic variation using normalization - intronic_variant = 'false' - # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - seek_var = valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - try: - intron_test = hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'hard_fail': - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', - str( - hgvs_seek_var.posedit.pos)) or re.search( - '\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str( - hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'true': - # Flag RefSeqGene for ammendment - # amend_RefSeqGene = 'false' - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', - hgvs_genomic_5pr.posedit.edit.type) or re.search( - 'ins', hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and re.search('del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and re.search('del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - else: - pass - else: - pass - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +1 base and adjust - if re.search('\+', - str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, - nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, - nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search('\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - exceptPass() - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, - nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = va_func.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, vm, hp, hn, sf, - nr_vm) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - exceptPass() - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for possibility in hgvs_genomic_possibilities: - if possibility == '': - continue - hgvs_t_possibility = vm.g_to_t(possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = vm.c_to_n(hgvs_t_possibility) - except: - continue - if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: - continue - ins_ref = sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = vm.n_to_c(hgvs_t_possibility) - except: - continue - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if possibility.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(possibility.ac, - possibility.posedit.pos.start.base - 1, - possibility.posedit.pos.end.base) - possibility.posedit.edit.ref = ins_ref - possibility.posedit.edit.alt = ins_ref[ - 0] + possibility.posedit.edit.alt + \ - ins_ref[1] - if len(hgvs_t_possibility.posedit.edit.ref) < len( - possibility.posedit.edit.ref): - gap_length = len(possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] - hgvs_not_delins = possibility - hgvs_genomic_5pr = possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # Final sanity checks - try: - vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str( - e) == 'start or end or both are beyond the bounds of transcript record': - continue - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - continue - elif re.match('Normalization of intronic variants is not supported', - error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): - rg = reverse_normalizer.normalize(hgvs_not_delins) - rtx = vm.g_to_t(rg, tx_hgvs_not_delins.ac) - fg = hn.normalize(hgvs_not_delins) - ftx = vm.g_to_t(fg, tx_hgvs_not_delins.ac) - if ( - rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) - exonic = False - for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], - ex_test[7]): - exonic = True - if exonic is True: - hgvs_not_delins = fg - hgvs_genomic = fg - hgvs_genomic_5pr = fg - try: - tx_hgvs_not_delins = vm.c_to_n(ftx) - except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = hp.parse_hgvs_variant( - tx_hgvs_not_delins_delins_from_dup) - - if disparity_deletion_in[0] == 'transcript': - # amend_RefSeqGene = 'true' - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', - str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', - str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = vm.n_to_c(tx_gap_fill_variant) - except: - exceptPass() - genomic_gap_fill_variant = vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range( - genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range( - genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, - 1): - if integer in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match('\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - exceptPass() - - if re.search('\+', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\+', - str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search('\+', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\-', - str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search('\-', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = vm.t_to_g(c1, hgvs_genomic.ac) - g2 = vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = vm.t_to_g(c2, hgvs_genomic.ac) - g2 = vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - # amend_RefSeqGene = 'true' - if possibility_counter == 3: - hgvs_refreshed_variant = stash_tx_right - elif possibility_counter == 4: - hgvs_refreshed_variant = stash_tx_left - else: - hgvs_refreshed_variant = chromosome_normalized_hgvs_coding - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', - str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) - else: - pass - - try: - hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - continue - - # Quick check to make sure the coding variant has not changed - try: - to_test = hn.normalize(hgvs_refreshed_variant) - except: - to_test = hgvs_refreshed_variant - if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # Try the next available genomic option - if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - hgvs_coding = to_test - else: - continue - - # Update hgvs_genomic - hgvs_alt_genomic = va_func.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, - no_norm_evm, vm, hp, hn, sf, nr_vm) - if hgvs_alt_genomic.posedit.edit.type == 'identity': - re_c = vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - shuffle_left_g = copy.copy(hgvs_alt_genomic) - shuffle_left_g.posedit.edit.ref = '' - shuffle_left_g.posedit.edit.alt = '' - shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) - re_c = vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - hgvs_alt_genomic = shuffle_left_g - - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' - - # Break if gap has been detected - if disparity_deletion_in[0] != 'false': - break - - # Normailse hgvs_genomic - try: - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': - if hgvs_alt_genomic.posedit.edit.type == 'delins': - start = hgvs_alt_genomic.posedit.pos.start.base - end = hgvs_alt_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb - hgvs_alt_genomic.posedit.pos.start.base = end - hgvs_alt_genomic.posedit.pos.end.base = start - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - if hgvs_alt_genomic.posedit.edit.type == 'del': - start = hgvs_alt_genomic.posedit.pos.start.base - end = hgvs_alt_genomic.posedit.pos.end.base - lhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - hgvs_alt_genomic.posedit.edit.alt = lhb + rhb - hgvs_alt_genomic.posedit.pos.start.base = end - hgvs_alt_genomic.posedit.pos.end.base = start - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - - # Refresh the :g. variant - multi_g.append(hgvs_alt_genomic) - else: - multi_g.append(hgvs_alt_genomic) - corrective_action_taken = 'false' - - # In this instance, the gap code has generally found an incomplete-alignment rather than a - # truly gapped alignment. - except KeyError: - warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ - 'genomic reference sequence %s' % (hgvs_coding.ac, - alt_chr) - continue - except hgvs.exceptions.HGVSError as e: - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - error = str(te) - logger.error(str(exc_type) + " " + str(exc_value)) - logger.debug(error) - continue - - if multi_g != []: - multi_g.sort() - multi_gen_vars = multi_g # '|'.join(multi_g) - else: - multi_gen_vars = [] - else: - # HGVS genomic in the absence of a transcript variant - if genomic_variant != '': - multi_gen_vars = [hgvs_genomic_variant] - else: - multi_gen_vars = [] - - # Dictionaries of genomic loci - alt_genomic_dicts = [] - primary_genomic_dicts = {} - - if len(multi_gen_vars) != 0: - for alt_gen_var in multi_gen_vars: - for build in genome_builds: - test = va_scb.supported_for_mapping(alt_gen_var.ac, build) - if test == 'true': - try: - vcf_dict = va_H2V.report_hgvs2vcf(alt_gen_var, build, reverse_normalizer, sf) - except hgvs.exceptions.HGVSInvalidVariantError as e: - continue - # Identify primary assembly positions - if re.match('NC_', alt_gen_var.ac): - if re.match('GRC', build): - primary_genomic_dicts[build.lower()] = { - 'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - - else: - primary_genomic_dicts[build.lower()] = { - 'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - if build == 'GRCh38': - vcf_dict = va_H2V.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - sf) - primary_genomic_dicts['hg38'] = { - 'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - - continue - - else: - if re.match('GRC', build): - dict = {build.lower(): {'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } - else: - dict = {build.lower(): {'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } - # Append - alt_genomic_dicts.append(dict) - - if build == 'GRCh38': - vcf_dict = va_H2V.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - sf) - dict = {'hg38': {'hgvs_genomic_description': valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } - # Append - alt_genomic_dicts.append(dict) - continue - else: - # May need to account for ALT NC_ - pass - - # Warn not directly mapped to specified genome build - if genomic_accession != '': - caution = '' - if primary_assembly.lower() not in primary_genomic_dicts.keys(): - warnings = warnings + ': ' + str( - hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + ': See alternative genomic loci or alternative genome builds for aligned genomic positions' - - warn_list = warnings.split(': ') - warnings_out = [] - for warning in warn_list: - warning.strip() - warning = warning.replace("'", "") - if warning == '': - continue - warnings_out.append(warning) - # Remove duplicate elements but maintain the order - seen = {} - no_rep_list = [seen.setdefault(x, x) for x in warnings_out if x not in seen] - warnings_out = no_rep_list - - # Ensure Variants have had the refs removed. - # if not hasattr(posedit, refseqgene_variant): - if refseqgene_variant != '': - try: - refseqgene_variant = valstr(hgvs_refseqgene_variant) - except: - exceptPass() - - # Add single letter AA code to protein descriptions - predicted_protein_variant_dict = {"tlr": str(predicted_protein_variant), "slr": ''} - if predicted_protein_variant != '': - if not 'Non-coding :n.' in predicted_protein_variant: - try: - format_p = predicted_protein_variant - format_p = re.sub('\(LRG_.+?\)', '', format_p) - re_parse_protein = hp.parse_hgvs_variant(format_p) - re_parse_protein_singleAA = output_formatter.single_letter_protein(re_parse_protein) - predicted_protein_variant_dict["slr"] = str(re_parse_protein_singleAA) - except hgvs.exceptions.HGVSParseError: - exceptPass() - else: - predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) - - # Populate the dictionary - dict_out['submitted_variant'] = submitted - dict_out['gene_symbol'] = gene_symbol - dict_out['transcript_description'] = transcript_description - dict_out['hgvs_transcript_variant'] = tx_variant - dict_out['genome_context_intronic_sequence'] = genome_context_transcript_variant - dict_out['refseqgene_context_intronic_sequence'] = RefSeqGene_context_transcript_variant - dict_out['hgvs_refseqgene_variant'] = refseqgene_variant - dict_out['hgvs_predicted_protein_consequence'] = predicted_protein_variant_dict - dict_out['validation_warnings'] = warnings_out - dict_out['hgvs_lrg_transcript_variant'] = lrg_transcript_variant - dict_out['hgvs_lrg_variant'] = lrg_variant - dict_out['alt_genomic_loci'] = alt_genomic_dicts - dict_out['primary_assembly_loci'] = primary_genomic_dicts - dict_out['reference_sequence_records'] = '' - - # Add links to reference_sequence_records - ref_records = external.get_urls(dict_out) - if ref_records != {}: - dict_out['reference_sequence_records'] = ref_records - - # Append to a list for return - batch_out.append(dict_out) - else: - continue - else: - continue - - """ - Structure the output into dictionaries rather than a list with descriptive keys - and a validation type flag - """ - logger.trace("Populating output dictionary") - # Create output dictionary - validation_output = {'flag': None} - - # For gene outputs, i.e. those that hit transcripts - # dotter = '' - if set_output_type_flag == 'gene': - validation_output['flag'] = 'gene_variant' - validation_error_counter = 0 - for valid_v in batch_out: - if valid_v['validation_warnings'] == ['Validation error']: - validation_error_counter = validation_error_counter + 1 - identification_key = 'Validation_Error_%s' % (str(validation_error_counter)) - else: - identification_key = '%s' % (str(valid_v['hgvs_transcript_variant'])) - - # if identification_key not in validation_output.keys(): - validation_output[identification_key] = valid_v - # else: - # dotter = dotter + ' ' - # validation_output[identification_key + dotter] = valid_v - - # For warning only outputs - # Should only ever be 1 output as an error or a warning of the following types - # Gene symbol as reference sequence - # Gene as transcript reference sequence - if set_output_type_flag == 'warning': - validation_output['flag'] = 'warning' - validation_error_counter = 0 - validation_warning_counter = 0 - if len(batch_out) == 0: - validation_output['flag'] = 'empty_result' - for valid_v in batch_out: - if valid_v['validation_warnings'] == ['Validation error']: - validation_error_counter = validation_error_counter + 1 - identification_key = 'validation_error_%s' % (str(validation_error_counter)) - else: - validation_warning_counter = validation_warning_counter + 1 - identification_key = 'validation_warning_%s' % (str(validation_warning_counter)) - validation_output[identification_key] = valid_v - - # Intergenic variants - validation_intergenic_counter = 0 - if set_output_type_flag == 'intergenic': - validation_output['flag'] = 'intergenic' - for valid_v in batch_out: - validation_intergenic_counter = validation_intergenic_counter + 1 - identification_key = 'Intergenic_Variant_%s' % (str(validation_intergenic_counter)) - - # Attempt to liftover between genome builds - # Note: pyliftover uses the UCSC liftOver tool. - # https://pypi.org/project/pyliftover/ - genomic_position_info = valid_v['primary_assembly_loci'] - for g_p_key in genomic_position_info.keys(): - - # Identify the current build and hgvs_genomic descripsion - if re.match('hg', g_p_key): - # incoming_vcf = genomic_position_info[g_p_key]['vcf'] - # set builds - if g_p_key == 'hg38': - build_to = 'hg19' - build_from = 'hg38' - if g_p_key == 'hg19': - build_to = 'hg38' - build_from = 'hg19' - elif re.match('grc', g_p_key): - # incoming_vcf = genomic_position_info[g_p_key]['vcf'] - # set builds - if g_p_key == 'grch38': - build_to = 'GRCh37' - build_from = 'GRCh38' - if g_p_key == 'grch37': - build_to = 'GRCh38' - build_from = 'GRCh37' - - # Liftover - lifted_response = lift_over(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, build_to, hn, vm, vr, hdp, hp, reverse_normalizer, sf, evm) - - # Sort the respomse into primary assembly and ALT - primary_assembly_loci = {} - alt_genomic_loci = [] - for build_key, accession_dict in lifted_response.iteritems(): - try: - accession_key = accession_dict.keys()[0] - if re.match('NC_', accession_dict[accession_key]['hgvs_genomic_description']): - primary_assembly_loci[build_key.lower()] = accession_dict[accession_key] - else: - alt_genomic_loci.append({build_key.lower(): accession_dict[accession_key]}) - - # KeyError if the dicts are empty - except KeyError: - continue - - # Add the dictionaries from lifted response to the output - if primary_assembly_loci != {}: - valid_v['primary_assembly_loci'] = primary_assembly_loci - if alt_genomic_loci != []: - valid_v['alt_genomic_loci'] = alt_genomic_loci - - # Finalise the output dictionary - validation_output[identification_key] = valid_v - - # Add error strings to validation output - # ''' - metadata = {} - logger.info("Variant successfully validated") - logs = [] - logString = logger.getString() - for l in logger.getString().split("\n"): - logs.append(l) - metadata["logs"] = logString - metadata["variant"] = batch_variant - metadata["assembly"] = selected_assembly - metadata["transcripts"] = select_transcripts - metadata['seqrepo_directory'] = HGVS_SEQREPO_DIR - metadata['uta_url'] = UTA_DB_URL - metadata['py_liftover_directory'] = PYLIFTOVER_DIR - metadata['variantvalidator_data_url'] = VALIDATOR_DB_URL - metadata['entrez_id'] = ENTREZ_ID - metadata['variantvalidator_version'] = VERSION - metadata['variantvalidator_hgvs_version'] = hgvs_version - metadata['uta_schema'] = str(hdp.data_version()) - metadata['seqrepo_db'] = HGVS_SEQREPO_DIR.split('/')[-1] - validation_output["metadata"] = metadata - # ''' - # Measure time elapsed - time_now = time.time() - elapsed_time = time_now - start_time - logger.debug('validation time = ' + str(elapsed_time)) - - # return batch_out - return validation_output - - # Bug catcher - except KeyboardInterrupt: - raise - except BaseException as e: - # Debug mode - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - # tr = ''.join(traceback.format_stack()) - tbk = [str(exc_type), str(exc_value), str(te)] - er = '\n'.join(tbk) - # raise variantValidatorError('Validation error') - # Return - # return - logger.critical(str(exc_type) + " " + str(exc_value)) - logger.debug(str(er)) - -# Generates a list of transcript (UTA supported) and transcript names from a gene symbol or RefSeq transcript ID -def gene2transcripts(query): - input = query - input = input.upper() - if re.search('\d+ORF\d+', input): - input = input.replace('ORF', 'orf') - # Quick check for blank form - if input == '': - caution = {'error': 'Please enter HGNC gene name or transcript identifier (NM_, NR_, or ENST)'} - return caution - else: - hgnc = input - if re.match('NM_', hgnc) or re.match('NR_', hgnc): # or re.match('ENST', hgnc): - try: - tx_info = hdp.get_tx_identity_info(hgnc) - hgnc = tx_info[6] - except hgvs.exceptions.HGVSError as e: - caution = {'error': str(e)} - return caution - - # First perform a search against the input gene symbol or the symbol inferred from UTA - initial = va_func.hgnc_rest(path="/fetch/symbol/" + hgnc) - # Check for a record - if str(initial['record']['response']['numFound']) != '0': - current_sym = hgnc - previous = initial - # No record found, is it a previous symbol? - else: - # Look up current name - current = va_func.hgnc_rest(path="/search/prev_symbol/" + hgnc) - # Look for historic names - # If historic names = 0 - if str(current['record']['response']['numFound']) == '0': - current_sym = hgnc - else: - current_sym = current['record']['response']['docs'][0]['symbol'] - # Look up previous symbols and gene name - # Re-set the previous variable - previous = va_func.hgnc_rest(path="/fetch/symbol/" + current_sym) - - # Extract the relevant data - try: - previous_sym = previous['record']['response']['docs'][0]['prev_symbol'][0] - except: - previous_sym = current_sym - - # Get gene name - try: - gene_name = previous['record']['response']['docs'][0]['name'] - except: - # error = current_sym + ' is not a valid HGNC gene symbol' - gene_name = 'Gene symbol %s not found in the HGNC database of human gene names www.genenames.org' % query - return {'error': gene_name} - - # Look up previous name - try: - previous_name = previous['record']['response']['docs'][0]['prev_name'][0] - except: - previous_name = gene_name - - # Get transcripts - tx_for_gene = hdp.get_tx_for_gene(current_sym) - if len(tx_for_gene) == 0: - tx_for_gene = hdp.get_tx_for_gene(previous_sym) - if len(tx_for_gene) == 0: - tx_for_gene = {'error': 'Unable to retrieve data from the UTA, please contact admin'} - return tx_for_gene - - # Loop through each transcript and get the relevant transcript description - genes_and_tx = [] - recovered_dict = {} - for line in tx_for_gene: - if re.match('^NM_', line[3]) or re.match('^NR_', line[3]): - # Transcript ID - tx = line[3] - tx_description = va_dbCrl.data.get_transcript_description(tx) - if tx_description == 'none': - va_dbCrl.data.update_transcript_info_record(tx, hdp) - tx_description = va_dbCrl.data.get_transcript_description(tx) - # Check for duplicates - if tx in recovered_dict.keys(): - continue - else: - try: - # Add to recovered_dict - recovered_dict[tx] = '' - genes_and_tx.append([tx, tx_description, line[1] + 1, line[2]]) - except: - # Add to recovered_dict - recovered_dict[tx] = '' - genes_and_tx.append([tx, tx_description, 'not applicable', 'not applicable']) - # LRG information - lrg_transcript = va_dbCrl.data.get_lrgTranscriptID_from_RefSeqTranscriptID(tx) - if lrg_transcript == 'none': - pass - else: - genes_and_tx.append([lrg_transcript, tx_description, line[1] + 1, line[2]]) - - cp_genes_and_tx = copy.deepcopy(genes_and_tx) - genes_and_tx = [] - for tx in cp_genes_and_tx: - tx_d = {'reference': tx[0], - 'description': tx[1], - 'coding_start': tx[2] + 1, - 'coding_end': tx[3] - } - genes_and_tx.append(tx_d) - - # Return data table - g2d_data = {'current_symbol': current_sym, - 'previous_symbol': previous_sym, - 'current_name': gene_name, - 'previous_name': previous_name, - 'transcripts': genes_and_tx - } - - return g2d_data - - -# Fetch reference sequence from a HGVS variant description -def hgvs2ref(query): - logger.info('Fetching reference sequence for ' + query) - # Dictionary to store the data - reference = {'variant': query, - 'start_position': '', - 'end_position': '', - 'warning': '', - 'sequence': '', - 'error': ''} - # Step 1: parse the query. Dictionary the parse error if parsing fails - try: - input_hgvs_query = hp.parse_hgvs_variant(query) - except Exception as e: - reference['error'] = str(e) - # Step 2: If the variant is a c., it needs to transferred to n. - try: - hgvs_query = vm.c_to_n(input_hgvs_query) - except: - hgvs_query = input_hgvs_query - - # For transcript reference sequences - if hgvs_query.type == 'c' or hgvs_query.type == 'n': - # Step 4: Check for intronic sequence - if hgvs_query.posedit.pos.start.offset != 0 and hgvs_query.posedit.pos.end.offset != 0: - reference['warning'] = 'Intronic sequence variation: Use genomic reference sequence' - elif hgvs_query.posedit.pos.start.offset != 0 or hgvs_query.posedit.pos.end.offset != 0: - reference['warning'] = 'Partial intronic sequence variation: Returning exonic and/or UTR sequence only' - - # Step 3: split the variant description into the parts required for seqfetching - accession = hgvs_query.ac - start = hgvs_query.posedit.pos.start.base - 1 - end = hgvs_query.posedit.pos.end.base - - # Step 5: try and fetch the sequence using SeqFetcher. Dictionary an error if this fails - try: - sequence = sf.fetch_seq(accession, start, end) - except Exception as e: - reference['error'] = str(e) - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - # tr = ''.join(traceback.format_stack()) - tbk = [str(exc_type), str(exc_value), str(te)] - er = '\n'.join(tbk) - logger.info(str(exc_type) + " " + str(exc_value)) - logger.debug(er) - else: - reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) - reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) - reference['sequence'] = sequence - else: - # Step 3: split the variant description into the parts required for seqfetching - accession = hgvs_query.ac - start = hgvs_query.posedit.pos.start.base - 1 - end = hgvs_query.posedit.pos.end.base - - # Step 5: try and fetch the sequence using SeqFetcher. Dictionary an error if this fails - try: - sequence = sf.fetch_seq(accession, start, end) - except Exception as e: - reference['error'] = str(e) - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - # tr = ''.join(traceback.format_stack()) - tbk = [str(exc_type), str(exc_value), str(te)] - er = '\n'.join(tbk) - logger.info(er) - else: - reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) - reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) - reference['sequence'] = sequence - - # Genomic reference sequence - elif hgvs_query.type == 'g' or hgvs_query.type == 'p': - # Step 3: split the variant description into the parts required for seqfetching - accession = hgvs_query.ac - start = hgvs_query.posedit.pos.start.base - 1 - end = hgvs_query.posedit.pos.end.base - - # Step 5: try and fetch the sequence using SeqFetcher. Dictionary an error if this fails - try: - sequence = sf.fetch_seq(accession, start, end) - except Exception as e: - reference['error'] = str(e) - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - # tr = ''.join(traceback.format_stack()) - tbk = [str(exc_type), str(exc_value), str(te)] - er = '\n'.join(tbk) - logger.info(str(exc_type) + " " + str(exc_value)) - logger.debug(er) - else: - reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) - reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) - reference['sequence'] = sequence - - # Return the resulting reference sequence or error message - return reference - - -def update_vv_data(): - import sys - logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) - # import update modules - import mysql_refSeqGene_noMissmatch - import compile_lrg_data - # Update refSeqGene Primary assembly alignment data - mysql_refSeqGene_noMissmatch.update() - # Update LRG records - compile_lrg_data.update() - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# diff --git a/VariantValidator/variantanalyser/__init__.py b/VariantValidator/variantanalyser/__init__.py deleted file mode 100644 index e8b85207..00000000 --- a/VariantValidator/variantanalyser/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- - -# Import functions from functions.py -from functions import * -from links import * -from supported_chromosome_builds import * -from g_to_g import * -import dbControls -import hgvs2vcf -from batch import * -from gap_genes import * -import pseudo_vcf2hgvs -from liftover import * - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/variantanalyser/batch.py b/VariantValidator/variantanalyser/batch.py deleted file mode 100644 index 14db6b58..00000000 --- a/VariantValidator/variantanalyser/batch.py +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -batch.py - -Contains the link code required to update the transcript_info table when VariantValidator -identifies an out-of-date entry - -""" - -# Import validator functions -import dbControls.data - - -# function for adding information to database -def data_add(input, alt_aln_method, accession, dbaction, hp, evm, hdp): - # Add accurate transcript descriptions to the database - # RefSeq databases - # Get the Entrez (GenBank) file - dbControls.data.update_transcript_info_record(accession, hdp) - entry = dbControls.data.in_entries(accession, 'transcript_info') - return entry - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# diff --git a/VariantValidator/variantanalyser/dbControls/__init__.py b/VariantValidator/variantanalyser/dbControls/__init__.py deleted file mode 100644 index 10bcf9df..00000000 --- a/VariantValidator/variantanalyser/dbControls/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -*- coding: utf-8 -*- - - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/variantanalyser/dbControls/data.py b/VariantValidator/variantanalyser/dbControls/data.py deleted file mode 100644 index 877f28ad..00000000 --- a/VariantValidator/variantanalyser/dbControls/data.py +++ /dev/null @@ -1,293 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -data.py - -Contains all database functions - -Takes requests via the functions and makes the appropriate MySQL queries via the relevant -query type -""" - -# import database modules -import dbquery -import dbinsert -import dbupdate -import dbfetchone -import dbfetchall - -# Import python modules -import os -import re - -# Needs functions from variantanalyser - directory above, unless in a single directory -try: - import variantanalyser.functions as functions -except ImportError: - parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - os.sys.path.insert(0,parentdir) - import functions -except AttributeError: - parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - os.sys.path.insert(0,parentdir) - import functions - -# Retrieve transcript information -def in_entries(entry, table): - - # Use dbquery.py to connect to mysql and return the necessary data - if table == 'transcript_info': - row = dbquery.query_with_fetchone(entry, table) - - if row[0] == 'error': - data = { - 'error' : 'false', - 'description': 'false' - } - - data['error'] = row[0] - data['description'] = row[1] - - elif row[0] == 'none': - data = { - 'none' : 'false', - 'description': 'false' - } - - data['none'] = row[0] - data['description'] = row[1] - - else: - data = { - 'accession' : 'false', - 'description': 'false', - 'updated' : 'false', - 'expiry' : 'false' - } - data['accession'] = row[0] - data['description'] = row[1] - data['variant'] = row[2] - data['version'] = row[3] - data['hgnc_symbol'] = row[4] - data['uta_symbol'] = row[5] - data['updated'] = row[6] - data['expiry'] = row[7] - - return data - -# Add new entry -def add_entry(entry, data, table): - success = dbinsert.insert(entry, data, table) - return success - -def insert_transcript_loci(add_data, primary_assembly): - success = dbinsert.insert_transcript_loci(add_data, primary_assembly) - return success - - -# Update entries -def update_entry(entry, data, table): - success = dbupdate.update(entry, data, table) - return success - -def update_transcript_info_record(accession, hdp): - - # Search Entrez for corresponding record for the RefSeq ID - try: - record = functions.entrez_efetch(db="nucleotide", id=accession, rettype="gb", retmode="text") - version = record.id - description = record.description - variant = '0' - - if re.search('transcript variant', description): - tv = re.search('transcript variant \w+', description) - tv = str(tv.group(0)) - tv = tv.replace('transcript variant', '') - variant = tv.strip() - variant = variant.upper() # Some tv descriptions are a or A - else: - variant = '0' - - # Get information from UTA - try: - uta_info = hdp.get_tx_identity_info(version) - except: - version_ac_ver = version.split('.') - version = version_ac_ver[0] + '.' + str(int(version_ac_ver[1]) - 1) - uta_info = hdp.get_tx_identity_info(version) - - uta_symbol = str(uta_info[6]) - - # First perform a search against the input gene symbol or the symbol inferred from UTA - initial = functions.hgnc_rest(path = "/fetch/symbol/" + uta_symbol) - # Check for a record - if str(initial['record']['response']['numFound']) != '0': - hgnc_symbol = uta_symbol - # No record found, is it a previous symbol? - else: - # Search hgnc rest to see if symbol is out of date - rest_data = functions.hgnc_rest(path = "/search/prev_symbol/" + uta_symbol) - # If the name is correct no record will be found - if rest_data['error'] == 'false': - if int(rest_data['record']['response']['numFound']) == 0: - hgnc_symbol = uta_info[6] - else: - hgnc_symbol = rest_data['record']['response']['docs'][0]['symbol'] - else: - hgnc_symbol = 'unassigned' - - # List of connection error types. May need to be expanded. - # Outcome - Put off update for 3 months! - except Exception as e: - if str(e) == '': - # Issues with DNSSEC for the nih.gov - previous_entry = in_entries(accession, 'transcript_info') - accession = accession - description = previous_entry['description'] - variant = previous_entry['variant'] - version = previous_entry['version'] - hgnc_symbol = previous_entry['hgnc_symbol'] - uta_symbol = previous_entry['uta_symbol'] - - # Query information - # query_info = [accession, description, variant, version, hgnc_symbol, uta_symbol] - query_info = [version, description, variant, version, hgnc_symbol, uta_symbol] - table='transcript_info' - - # Update the transcript_info table (needs plugging in) - returned_data = in_entries(version, table) - # If the entry is not in the database add it - if 'none' in returned_data: - add_entry(version, query_info, table) - # If the data in the entry has changed, update it - else: - update_entry(version, query_info, table) - return - -def update_refSeqGene_loci(rsg_data): - # First query the database - # import dbfetchone - entry_exists = dbfetchone.get_refSeqGene_data_by_refSeqGeneID(rsg_data[0], rsg_data[2]) - if entry_exists[0] == 'none': - # import dbinsert - dbinsert.insert_refSeqGene_data(rsg_data) - else: - # import dbupdate - dbupdate.update_refSeqGene_data(rsg_data) - return - -def update_lrg_rs_lookup(lrg_rs_lookup): - # First query the database - rsgID = dbfetchone.get_RefSeqGeneID_from_lrgID(lrg_rs_lookup[0]) - if rsgID == 'none': - # import dbinsert - dbinsert.insert_RefSeqGeneID_from_lrgID(lrg_rs_lookup) - return - else: - return - -def update_lrgt_rst(lrgtx_to_rstID): - # First query the database - rstID = dbfetchone.get_RefSeqTranscriptID_from_lrgTranscriptID(lrgtx_to_rstID[0]) - if rstID == 'none': - # import dbinsert - dbinsert.insert_LRG_transcript_data(lrgtx_to_rstID) - return - else: - return - -def update_lrg_p_rs_p_lookup(lrg_p, rs_p): - # First query the database - rspID = dbfetchone.get_RefSeqProteinID_from_lrgProteinID(lrg_p) - if rspID == 'none': - # import dbinsert - dbinsert.insert_LRG_protein_data(lrg_p, rs_p) - return - else: - return - -# Direct methods (GET) -def get_transcript_info_for_gene(gene_symbol): - rows = dbfetchall.get_transcript_info_for_gene(gene_symbol) - return rows - -def get_uta_symbol(gene_symbol): - # returns the UTA gene symbol when HGNC gene symbol is input - utaSymbol = str(dbfetchone.get_utaSymbol(gene_symbol)[0]) - return utaSymbol - -def get_hgnc_symbol(gene_symbol): - # returns the HGNC gene symbol when UTA gene symbol is input - hgncSymbol = str(dbfetchone.get_hgncSymbol(gene_symbol)[0]) - return hgncSymbol - -def get_transcript_description(transcript_id): - # returns the transcript description for a given transcript - tx_description = dbfetchone.get_transcript_description(transcript_id) - return tx_description - -def get_gene_symbol_from_transcriptID(transcript_id): - # returns gene symbol for a given transcript ID - gene_symbol = dbfetchone.get_gene_symbol_from_transcriptID(transcript_id) - return gene_symbol - -def get_gene_symbol_from_refSeqGeneID(refSeqGeneID): - # Returns the databases most up-to-date gene symbol for a given NG_ ID - gene_symbol = dbfetchone.get_gene_symbol_from_refSeqGeneID(refSeqGeneID) - return gene_symbol - -def get_g_to_g_info(): - # Recovers the g_to_g data table - table = dbfetchall.get_g_to_g_info() - return table - -def get_all_transcriptID(): - # Returns a list of transcript IDs in our database - table = dbfetchall.get_all_transcriptID() - return table - -def get_RefSeqGeneID_from_lrgID(lrgID): - # Get the relevant RefSeqGeneID for a given LRG ID - rsgID = dbfetchone.get_RefSeqGeneID_from_lrgID(lrgID) - return rsgID - -def get_RefSeqTranscriptID_from_lrgTranscriptID(lrg_txID): - rstID = dbfetchone.get_RefSeqTranscriptID_from_lrgTranscriptID(lrg_txID) - return rstID - -def get_lrgTranscriptID_from_RefSeqTranscriptID(rstID): - lrg_tx = dbfetchone.get_lrgTranscriptID_from_RefSeqTranscriptID(rstID) - return lrg_tx - -def get_lrgProteinID_from_RefSeqProteinID(rs_p): - lrg_p = dbfetchone.get_lrgProteinID_from_RefSeqProteinID(rs_p) - return lrg_p - -def get_lrgID_from_RefSeqGeneID(rsgID): - lrgID = dbfetchone.get_lrgID_from_RefSeqGeneID(rsgID) - return lrgID - -def get_refseqgene_info(refseqgene_id, primary_assembly): - refseqgene_info = dbfetchone.get_refseqgene_info(refseqgene_id, primary_assembly) - return refseqgene_info - -def get_LRG_data_from_LRGid(lrg_id): - LRG_data = dbfetchone.get_LRG_data_from_LRGid(lrg_id) - return LRG_data - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# diff --git a/VariantValidator/variantanalyser/dbControls/dbConnection.py b/VariantValidator/variantanalyser/dbControls/dbConnection.py deleted file mode 100644 index fc524fce..00000000 --- a/VariantValidator/variantanalyser/dbControls/dbConnection.py +++ /dev/null @@ -1,55 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -dbconnect.py - -Connects to MySQL and returns a connection pool -""" - -import mysql.connector -from mysql.connector.pooling import MySQLConnectionPool -import dbconfig -from dbconfig import read_db_config -import os - -_connection_pool = None - -def get_connection(): - global _connection_pool - if not _connection_pool: - VALIDATOR_DB_URL = os.environ.get('VALIDATOR_DB_URL') - if VALIDATOR_DB_URL is not None: - configurations = VALIDATOR_DB_URL.replace('mysqlx://', '') - user_pass,host_database = configurations.split('@') - user,password = user_pass.split(':') - host,database = host_database.split('/') - db_config = { - 'user': user, - 'password': password, - 'host': host, - 'database': database, - 'raise_on_warnings': True, - } - else: - db_config = read_db_config() - _connection_pool = mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **db_config) # MySQLConnection(**db_config) - return _connection_pool - -__all__ = [ 'getConnection' ] - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/variantanalyser/dbControls/dbconfig.py b/VariantValidator/variantanalyser/dbControls/dbconfig.py deleted file mode 100644 index 9c459ffa..00000000 --- a/VariantValidator/variantanalyser/dbControls/dbconfig.py +++ /dev/null @@ -1,51 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -dbconfig.py - -Configures the MySQL connection using the config.ini or environment variables -""" - -from configparser import ConfigParser -import os - -# Get the conf_root from the os -CONF_ROOT = os.environ.get('CONF_ROOT') -def read_db_config(filename=os.path.join(CONF_ROOT, 'config.ini'), section='mysql'): - """ Read database configuration file and return a dictionary object - :param filename: name of the configuration file - :param section: section of database configuration - :return: a dictionary of database parameters - """ - # create parser and read ini configuration file - parser = ConfigParser() - with open(filename) as f: - parser.read_file(f) - - # get section, default to mysql - db = {} - if parser.has_section(section): - items = parser.items(section) - for item in items: - db[item[0]] = item[1] - else: - raise Exception('{0} not found in the {1} file'.format(section, filename)) - - return db - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/variantanalyser/dbControls/dbfetchall.py b/VariantValidator/variantanalyser/dbControls/dbfetchall.py deleted file mode 100644 index 1664bb63..00000000 --- a/VariantValidator/variantanalyser/dbControls/dbfetchall.py +++ /dev/null @@ -1,62 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -dbfetchall.py - -Functions which make MySQL fetchall queries -""" - -import dbConnection - -def execute(query): - conn = dbConnection.get_connection().get_connection()# MySQLConnection(**db_config) - cursor = conn.cursor() - cursor.execute(query) - # Commit query - rows = [] - rows = cursor.fetchall() - # if rows is not None: - if rows != []: - pass - else: - rows = ['none', 'No data'] - cursor.close() - conn.close() - return rows - -# Methods - -def get_transcript_info_for_gene(gene_symbol): - query = "SELECT refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated, IF(updated < NOW() - INTERVAL 3 MONTH , 'true', 'false') FROM transcript_info WHERE hgncSymbol = '%s'" %(gene_symbol) - rows = execute(query) - return rows - -def get_g_to_g_info(): - query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol, genomeBuild FROM refSeqGene_loci" - table = execute(query) - return table - -def get_all_transcriptID(): - query = "SELECT refSeqID FROM transcript_info" - table = execute(query) - return table - -if __name__ == '__main__': - query_with_fetchall() - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# diff --git a/VariantValidator/variantanalyser/dbControls/dbfetchone.py b/VariantValidator/variantanalyser/dbControls/dbfetchone.py deleted file mode 100644 index 05ed8cd4..00000000 --- a/VariantValidator/variantanalyser/dbControls/dbfetchone.py +++ /dev/null @@ -1,127 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -dbfetchone.py - -Functions which make MySQL fetchone queries -""" - -import dbConnection - -def execute(query): - conn = dbConnection.get_connection().get_connection() - cursor = conn.cursor(buffered=True) - cursor.execute(query) - - row = [] - row = cursor.fetchone() - - if row is not None: - pass - else: - # print('No Data...') - row = ['none', 'No data'] - cursor.close() - conn.close() - return row - -# Methods -def get_utaSymbol(gene_symbol): - query = "SELECT utaSymbol FROM transcript_info WHERE hgncSymbol = '%s'" %(gene_symbol) - row = execute(query) - return row - -def get_hgncSymbol(gene_symbol): - query = "SELECT hgncSymbol FROM transcript_info WHERE utaSymbol = '%s'" %(gene_symbol) - row = execute(query) - return row - -def get_transcript_description(transcript_id): - transcript_id = transcript_id - query = "SELECT description FROM transcript_info WHERE refSeqID = '%s'" %(transcript_id) - tx_description = str(execute(query)[0]) - return tx_description - -def get_gene_symbol_from_transcriptID(transcript_id): - transcript_id = transcript_id - query = "SELECT hgncSymbol FROM transcript_info WHERE refSeqID = '%s'" %(transcript_id) - gene_symbol = str(execute(query)[0]) - return gene_symbol - -def get_refSeqGene_data_by_refSeqGeneID(refSeqGeneID, genomeBuild): - query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, orientation, totalLength, chrPos, rsgPos, entrezID, hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s' AND genomeBuild = '%s'" %(refSeqGeneID, genomeBuild) - refSeqGene_data = execute(query) - return refSeqGene_data - -def get_gene_symbol_from_refSeqGeneID(refSeqGeneID): - refseqgene_id = refSeqGeneID - query = "SELECT hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s'" %(refseqgene_id) - gene_symbol = str(execute(query)[0]) - return gene_symbol - -def get_RefSeqGeneID_from_lrgID(lrgID): - query = "SELECT RefSeqGeneID FROM LRG_RSG_lookup WHERE lrgID = '%s'" %(lrgID) - rsgID = execute(query) - rsgID = rsgID[0] - return rsgID - -def get_RefSeqTranscriptID_from_lrgTranscriptID(lrgtxID): - query = "SELECT RefSeqTranscriptID FROM LRG_transcripts WHERE LRGtranscriptID = '%s'" %(lrgtxID) - rstID = execute(query) - rstID = rstID[0] - return rstID - -def get_lrgTranscriptID_from_RefSeqTranscriptID(rstID): - query = "SELECT LRGtranscriptID FROM LRG_transcripts WHERE RefSeqTranscriptID = '%s'" %(rstID) - lrg_tx = execute(query) - lrg_tx = lrg_tx[0] - return lrg_tx - -def get_lrgID_from_RefSeqGeneID(rsgID): - query = "SELECT lrgID, status FROM LRG_RSG_lookup WHERE RefSeqGeneID = '%s'" %(rsgID) - lrgID = execute(query) - lrgID = lrgID - return lrgID - -def get_refseqgene_info(refseqgene_id, primary_assembly): - query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos FROM refSeqGene_loci WHERE refSeqGeneID = '%s' AND genomeBuild = '%s'" %(refseqgene_id, primary_assembly) - refseqgene_info = execute(query) - return refseqgene_info - -def get_RefSeqProteinID_from_lrgProteinID(lrg_p): - query = "SELECT RefSeqProteinID FROM LRG_proteins WHERE LRGproteinID = '%s'" %(lrg_p) - rspID = execute(query) - rspID = rspID[0] - return rspID - -def get_lrgProteinID_from_RefSeqProteinID(rs_p): - query = "SELECT LRGproteinID FROM LRG_proteins WHERE RefSeqProteinID = '%s'" %(rs_p) - lrpID = execute(query) - lrpID = lrpID[0] - return lrpID - -def get_LRG_data_from_LRGid(lrg_id): - query = "SELECT * FROM LRG_RSG_lookup WHERE lrgID = '%s'" %(lrg_id) - lrg_data = execute(query) - lrg_data = lrg_data - return lrg_data - -if __name__ == '__main__': - query_with_fetchone() - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/variantanalyser/dbControls/dbinsert.py b/VariantValidator/variantanalyser/dbControls/dbinsert.py deleted file mode 100644 index 2b621a53..00000000 --- a/VariantValidator/variantanalyser/dbControls/dbinsert.py +++ /dev/null @@ -1,128 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -dbinsert.py - -Functions which make MySQL insert statements -""" - -import os -import dbConnection - -# Set up os paths data and log folders -ROOT = os.path.dirname(os.path.abspath(__file__)) - -def insert(entry, data, table): - conn = dbConnection.get_connection().get_connection() # MySQLConnection(**db_config) - cursor = conn.cursor() - # MySQL queries - - if table == 'transcript_info': - accession = entry - description = data[1] - variant = data[2] - version = data[3] - hgnc_symbol = data[4] - uta_symbol = data[5] - query = "INSERT INTO transcript_info(refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated) VALUES(%s,%s, %s, %s, %s, %s, NOW())" - cursor.execute(query, (accession, description, variant, version, hgnc_symbol, uta_symbol)) - - # Query report - if cursor.lastrowid: - success = 'true' - else: - success = 'Unknown error' - - # Commit and close connection - conn.commit() - cursor.close() - conn.close() - return success - -def insert_refSeqGene_data(rsg_data): - query = "INSERT INTO refSeqGene_loci(refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, orientation, totalLength, chrPos, rsgPos, entrezID, hgncSymbol, updated) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())" - conn = dbConnection.get_connection().get_connection() - cursor = conn.cursor() - cursor.execute(query, (rsg_data[0], rsg_data[1], rsg_data[2], rsg_data[3], rsg_data[4], rsg_data[5], rsg_data[6], rsg_data[7], rsg_data[8], rsg_data[9], rsg_data[10])) - # Query report - if cursor.lastrowid: - success = 'true' - else: - success = 'Unknown error' - - # Commit and close connection - conn.commit() - cursor.close() - conn.close() - return success - -def insert_RefSeqGeneID_from_lrgID(lrg_rs_lookup): - query = "INSERT INTO LRG_RSG_lookup(lrgID, hgncSymbol, RefSeqGeneID, status) VALUES(%s,%s,%s,%s)" - conn = dbConnection.get_connection().get_connection() - cursor = conn.cursor() - cursor.execute(query, (lrg_rs_lookup[0], lrg_rs_lookup[1], lrg_rs_lookup[2], lrg_rs_lookup[3])) - # Query report - if cursor.lastrowid: - success = 'true' - else: - success = 'Unknown error' - - # Commit and close connection - conn.commit() - cursor.close() - conn.close() - return success - -def insert_LRG_transcript_data(lrgtx_to_rstID): - query = "INSERT INTO LRG_transcripts(LRGtranscriptID, RefSeqTranscriptID) VALUES(%s,%s)" - conn = dbConnection.get_connection().get_connection() - cursor = conn.cursor() - cursor.execute(query, (lrgtx_to_rstID[0], lrgtx_to_rstID[1])) - # Query report - if cursor.lastrowid: - success = 'true' - else: - success = 'Unknown error' - - # Commit and close connection - conn.commit() - cursor.close() - conn.close() - return success - -def insert_LRG_protein_data(lrg_p, rs_p): - query = "INSERT INTO LRG_proteins(LRGproteinID, RefSeqProteinID) VALUES(%s,%s)" - conn = dbConnection.get_connection().get_connection() - cursor = conn.cursor() - cursor.execute(query, (lrg_p, rs_p)) - # Query report - if cursor.lastrowid: - success = 'true' - else: - success = 'Unknown error' - - # Commit and close connection - conn.commit() - cursor.close() - conn.close() - return success - -if __name__ == '__main__': - insert() - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/variantanalyser/dbControls/dbquery.py b/VariantValidator/variantanalyser/dbControls/dbquery.py deleted file mode 100644 index 2195da86..00000000 --- a/VariantValidator/variantanalyser/dbControls/dbquery.py +++ /dev/null @@ -1,55 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -dbquery.py - -Functions which fetch data from transcript_info -""" - -import dbConnection - -def query_with_fetchone(entry, table): - # """ Connect to MySQL database """ - - # MySQL queries - - if table == 'transcript_info': - query = "SELECT refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated, IF(updated < NOW() - INTERVAL 3 MONTH , 'true', 'false') FROM transcript_info WHERE refSeqID = '%s'" %(entry) - - conn = dbConnection.get_connection().get_connection() - cursor = conn.cursor(buffered=True) - cursor.execute(query) - - # Blank list for row - row = [] - row = cursor.fetchone() - - if row is not None: - pass - else: - # print('No Data...') - row = ['none', 'No data'] - cursor.close() - conn.close() - return row - - -if __name__ == '__main__': - query_with_fetchone() - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# diff --git a/VariantValidator/variantanalyser/dbControls/dbupdate.py b/VariantValidator/variantanalyser/dbControls/dbupdate.py deleted file mode 100644 index eaf0c962..00000000 --- a/VariantValidator/variantanalyser/dbControls/dbupdate.py +++ /dev/null @@ -1,109 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -dbupdate.py - -Functions which make MySQL update statements -""" -import os -import dbConnection - -# Set up os paths data and log folders -ROOT = os.path.dirname(os.path.abspath(__file__)) - -def update(entry, data, table): - # MySQL queries - -# if table == 'genePos37' or table == 'genePos38': -# query = "UPDATE " + table + " SET symbol=%s, name=%s, prevSymbol=%s, reference=%s, assembly=%s, chr=%s, start=%s, end=%s, refSeqTranscriptID=%s, refSeqGeneID=%s, updated=NOW() WHERE hgncID = %s" -# conn = dbConnection.get_connection().get_connection() -# cursor = conn.cursor() -# cursor.execute(query, (data['symbol'], -# data['name'], -# data['prevSymbol'], -# data['reference'], -# data['assembly'], -# data['chr'], -# data['start'], -# data['end'], -# data['refSeqTranscriptID'], -# data['refSeqGeneID'], -# data['hgncID'])) -# success = 'true' -# conn.commit() -# -# if table == 'transcript_id': -# accession = entry -# desc = data -# query = "UPDATE transcript_id SET description = %s, updated = NOW() WHERE accession = %s" -# conn = dbConnection.get_connection().get_connection() -# cursor = conn.cursor() -# cursor.execute(query, (desc, accession)) -# success = 'true' -# conn.commit() - - if table == 'transcript_info': - accession = entry - description = data[1] - variant = data[2] - version = data[3] - hgnc_symbol = data[4] - uta_symbol = data[5] - query = "UPDATE transcript_info SET description=%s, transcriptVariant=%s, currentVersion=%s, hgncSymbol=%s, utaSymbol=%s, updated=NOW() WHERE refSeqID = %s" - conn = dbConnection.get_connection().get_connection() - cursor = conn.cursor() - cursor.execute(query, (description, variant, version, hgnc_symbol, uta_symbol, accession)) - success = 'true' - conn.commit() - cursor.close() - conn.close() - return success - - -def update_refSeqGene_data(rsg_data): - query = "UPDATE refSeqGene_loci SET hgncSymbol=%s, updated=NOW() WHERE refSeqGeneID=%s" - conn = dbConnection.get_connection().get_connection() - cursor = conn.cursor() - cursor.execute(query, (rsg_data[10], rsg_data[0])) - success = 'true' - conn.commit() - cursor.close() - conn.close() - return success - -""" -mark for removal -""" -# def update_transcript_loci(update_data, primary_assembly): -# data = update_data -# data['name'] = str(data['name']).replace("'", "\'") -# table = 'genePos' + str(primary_assembly.replace('GRCh', '')) -# query = "UPDATE " + table + " SET hgncID=%s, symbol=%s, name=%s, prevSymbol=%s, reference=%s, assembly=%s, chr=%s, start=%s, end=%s, refSeqGeneID=%s, updated=NOW() WHERE refSeqTranscriptID=%s" -# conn = dbConnection.get_connection().get_connection() -# cursor = conn.cursor() -# cursor.execute(query, (data['hgncID'],data['symbol'],data['name'],data['prevSymbol'],data['reference'],data['assembly'],data['chr'],data['start'],data['end'],data['refSeqGeneID'],data['refSeqTranscriptID'])) -# success = 'true' -# conn.commit() -# cursor.close() -# conn.close() -# return success - -if __name__ == '__main__': - update() - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/variantanalyser/dbControls/mysql_error.txt b/VariantValidator/variantanalyser/dbControls/mysql_error.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/VariantValidator/variantanalyser/functions.py b/VariantValidator/variantanalyser/functions.py deleted file mode 100644 index 7762fbb3..00000000 --- a/VariantValidator/variantanalyser/functions.py +++ /dev/null @@ -1,3043 +0,0 @@ -# -*- coding: utf-8 -*- -""" -functions.py - -Module containing VariantValidator sub-functions. The majoirty of these functions require -hgvs Python package top-level functions or sub-functions contained in uta.py and -seqfetcher.py -""" - -# IMPORT REQUIRED PYTHON MODULES -import re -import os -import sys -import copy -from vvLogging import logger - -# Setup functions - -# Config Section Mapping function -def ConfigSectionMap(section): - dict1 = {} - options = Config.options(section) - for option in options: - try: - dict1[option] = Config.get(section, option) - if dict1[option] == -1: - logger.warning("skip: %s" % option) - except: - logger.warning("exception on %s!" % option) - dict1[option] = None - return dict1 - - -# Set up paths -# FUNCTIONS_ROOT = os.path.dirname(os.path.abspath(__file__)) -ENTREZ_ID = os.environ.get('ENTREZ_ID') -if ENTREZ_ID is None: - from configparser import ConfigParser - - CONF_ROOT = os.environ.get('CONF_ROOT') - Config = ConfigParser() - Config.read(os.path.join(CONF_ROOT, 'config.ini')) - ENTREZ_ID = ConfigSectionMap("EntrezID")['entrezid'] - -# IMPORT HGVS MODULES and create instances -import hgvs -import hgvs.exceptions -import hgvs.sequencevariant - -# Error types -from hgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError - - -class mergeHGVSerror(Exception): - pass - - -class alleleVariantError(Exception): - pass - -# # Connect to UTA -# hdp = hgvs.dataproviders.uta.connect(pooling=True) -# # Create normalizer -# hn = hgvs.normalizer.Normalizer(hdp, -# cross_boundaries=False, -# shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, -# alt_aln_method='splign' -# ) -# reverse_hn = hgvs.normalizer.Normalizer(hdp, -# cross_boundaries=False, -# shuffle_direction=5, -# alt_aln_method='splign' -# ) -# -# # Create normalizer -# merge_normalizer = hgvs.normalizer.Normalizer(hdp, -# cross_boundaries=False, -# shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, -# alt_aln_method='splign', -# validate=False -# ) -# reverse_merge_normalizer = hgvs.normalizer.Normalizer(hdp, -# cross_boundaries=False, -# shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, -# alt_aln_method='splign', -# validate=False -# ) -# -# # Validator -# vr = hgvs.validator.Validator(hdp) -# # parser -# hp = hgvs.parser.Parser() -# # Variantmapper -# vm = hgvs.variantmapper.VariantMapper(hdp, replace_reference=True) # , normalize=False) -# nr_vm = hgvs.variantmapper.VariantMapper(hdp, replace_reference=False) -# # SeqFetcher -# sf = hgvs.dataproviders.seqfetcher.SeqFetcher() -# -# #create no_norm_evm -# no_norm_evm_38 = hgvs.assemblymapper.AssemblyMapper(hdp, -# assembly_name='GRCh38', -# alt_aln_method='splign', -# normalize=False, -# replace_reference=True -# ) -# -# no_norm_evm_37 = hgvs.assemblymapper.AssemblyMapper(hdp, -# assembly_name='GRCh37', -# alt_aln_method='splign', -# normalize=False, -# replace_reference=True -# ) - -# variantanalyser modules -import dbControls -import supported_chromosome_builds -import hgvs2vcf -import pseudo_vcf2hgvs -import gap_genes -import links - -# BioPython -from Bio import Entrez -from Bio import SeqIO -from Bio.Seq import Seq - -# HGNC rest variables -import httplib2 as http -import json - -try: - from urlparse import urlparse -except ImportError: - from urllib.parse import urlparse - -""" -usr_input -collect the input from the form and convert to a hgvs readable string - Removes brackets and contained information -if given - Identifies variant type (p. c. etc) - Returns a dictionary containing a formated input string which is optimal for hgvs - parsing and the variant type - Accepts c, g, n, r currently. And now P also 15.07.15 -""" - - -def user_input(input): - raw_variant = input.strip() - - # Set regular expressions for if statements - pat_g = re.compile(":g\.") # Pattern looks for :g. - pat_gene = re.compile('\(.+?\)') # Pattern looks for (....) - pat_c = re.compile(":c\.") # Pattern looks for :c. - pat_r = re.compile(":r\.") # Pattern looks for :r. - pat_n = re.compile(":n\.") # Pattern looks for :n. - pat_p = re.compile(":p\.") # Pattern looks for :p. - pat_m = re.compile(":m\.") # Pattern looks for :m. - pat_est = re.compile("\d:\d") # Pattern looks for number:number - - # If statements - if pat_g.search(raw_variant): # If the :g. pattern is present in the raw_variant, g_in is linked to the raw_variant - if pat_gene.search(raw_variant): # If pat gene is present in the raw_variant - variant = pat_gene.sub('', - raw_variant) # variant is set to the raw_variant string with the pattern (...) substituted out - formated = {'variant': variant, 'type': ':g.'} - return formated - else: - variant = raw_variant # Otherwise it is set to raw_variant - formated = {'variant': variant, 'type': ':g.'} - return formated - - elif pat_r.search(raw_variant): - if pat_gene.search(raw_variant): - variant = pat_gene.sub('', raw_variant) - formated = {'variant': variant, 'type': ':r.'} - return formated - else: - variant = raw_variant - formated = {'variant': variant, 'type': ':r.'} - return formated - - elif pat_n.search(raw_variant): - if pat_gene.search(raw_variant): - variant = pat_gene.sub('', raw_variant) - formated = {'variant': variant, 'type': ':n.'} - return formated - else: - variant = raw_variant - formated = {'variant': variant, 'type': ':n.'} - return formated - - elif pat_c.search(raw_variant): - if pat_gene.search(raw_variant): - variant = pat_gene.sub('', raw_variant) - formated = {'variant': variant, 'type': ':c.'} - return formated - else: - variant = raw_variant - formated = {'variant': variant, 'type': ':c.'} - return formated - - elif pat_p.search(raw_variant): - variant = raw_variant - formated = {'variant': variant, 'type': ':p.'} - return formated - - elif pat_m.search(raw_variant): - variant = raw_variant - formated = {'variant': variant, 'type': ':m.'} - return formated - elif pat_est.search(raw_variant): - variant = raw_variant - formated = {'variant': variant, 'type': 'est'} - return formated - else: - formatted = 'invalid' - return formatted - - -""" -r_to_c -parses r. variant strings into hgvs object and maps to the c. equivalent. - -Marked for removal -""" - -# def r_to_c(variant, evm, hp): -# # convert the input string into a hgvs object by parsing -# var_r = hp.parse_hgvs_variant(variant) -# # map to the coding sequence -# var_c = evm.r_to_c(var_r) # coding level variant -# variant = str(var_c) -# c_from_r = {'variant': variant, 'type': ':c.'} -# return c_from_r - - -""" -Maps transcript variant descriptions onto specified RefSeqGene reference sequences -Return an hgvs object containing the genomic sequence variant relative to the RefSeqGene -acession -refseq_ac = RefSeqGene ac - -Marked for removal -""" - - -# def refseq(variant, vm, refseq_ac, hp, hdp, no_norm_evm, primary_assembly, vr, sf, nr_vm, hn): -# # parse the variant into hgvs object -# var_c = hp.parse_hgvs_variant(variant) -# # map to the genomic co-ordinates using the easy variant mapper set to alt_aln_method = alt_aln_method -# var_g = myevm_t_to_g(var_c, hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, nr_vm) -# # Get overlapping transcripts - forcing a splign alignment -# start_i = var_g.posedit.pos.start.base -# end_i = var_g.posedit.pos.end.base -# alt_ac = var_g.ac -# alt_aln_method = 'splign' -# transcripts = hdp.get_tx_for_region(alt_ac, alt_aln_method, start_i - 1, end_i) -# # Take the first transcript -# for trans in transcripts: -# tx_ac = trans[0] -# try: -# ref_c = vm.g_to_t(var_g, tx_ac, alt_aln_method='splign') -# except: -# continue -# else: -# # map the variant co-ordinates to the refseq Gene accession using vm -# ref_g_dict = { -# 'ref_g': '', -# 'error': 'false' -# } -# try: -# ref_g_dict['ref_g'] = vm.t_to_g(ref_c, alt_ac=refseq_ac, alt_aln_method='splign') -# except: -# e = sys.exc_info()[0] -# ref_g_dict['error'] = e -# try: -# vr.validate(ref_g_dict['ref_g']) -# except: -# e = sys.exc_info()[0] -# ref_g_dict['error'] = e -# if ref_g_dict['error'] == 'false': -# return ref_g_dict -# else: -# continue -# # Return as an error if all fail -# return ref_g_dict - - -""" -Parses genomic variant strings into hgvs objects -Maps genomic hgvs object into a coding hgvs object if the c accession string is provided -returns a c. variant description string - -Marked for removal -""" - -# def g_to_c(var_g, tx_ac, hp, evm): -# pat_g = re.compile(":g\.") # Pattern looks for :g. -# # If the :g. pattern is present in the input variant -# if pat_g.search(var_g): -# # convert the input string into a hgvs object by parsing -# var_g = hp.parse_hgvs_variant(var_g) -# # Map to coding variant -# var_c = str(evm.g_to_c(var_g, tx_ac)) -# return var_c - - -""" -Parses genomic variant strings into hgvs objects -Maps genomic hgvs object into a non-coding hgvs object if the n accession string is provided -returns a n. variant description string - -Marked for removal -""" - -# def g_to_n(var_g, tx_ac, hp, evm): -# pat_g = re.compile(":g\.") # Pattern looks for :g. -# # If the :g. pattern is present in the input variant -# if pat_g.search(var_g): -# # convert the input string into a hgvs object by parsing -# var_g = hp.parse_hgvs_variant(var_g) -# # Map to coding variant -# var_n = str(evm.g_to_n(var_g, tx_ac)) -# return var_n - - -""" -Ensures variant strings are transcript c. or n. -returns parsed hgvs c. or n. object -""" - - -def coding(variant, hp): - # If the :c. pattern is present in the input variant - if re.search(':c.', variant) or re.search(':n.', variant): - # convert the input string into a hgvs object - var_c = hp.parse_hgvs_variant(variant) - return var_c - - -""" -Mapping transcript to genomic position from a HGVS string rather than an hgvs (py) parsed object -Interfaces with myevm t_to_g -Ensures variant strings are transcript c. or n. -returns parsed hgvs g. object -""" - - -def genomic(variant, no_norm_evm, hp, hdp, primary_assembly, vm, hn, sf, nr_vm): - # Set regular expressions for if statements - pat_g = re.compile(":g\.") # Pattern looks for :g. - pat_n = re.compile(":n\.") - pat_c = re.compile(":c\.") # Pattern looks for :c. - - # If the :c. pattern is present in the input variant - if pat_c.search(variant) or pat_n.search(variant): - error = 'false' - hgvs_var = hp.parse_hgvs_variant(variant) - try: - var_g = myevm_t_to_g(hgvs_var, hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, nr_vm) - except hgvs.exceptions.HGVSError as e: - error = e - if error != 'false': - var_g = 'error ' + str(e) - return var_g - - # If the :g. pattern is present in the input variant - elif (pat_g.search(variant)): # or (pat_n.search(variant)): - # convert the input string into a hgvs object - var_g = hp.parse_hgvs_variant(variant) - return var_g - - -""" - - -Mapping transcript to protein prediction -Accepts a variant string rather than a parsed hgvs_object -Ensures variant strings are transcript c. -returns parsed hgvs p. object - -Replaced by myc_to_p and marked for removal -""" - -# def protein(variant, evm, hp, hdp): -# # Set regular expressions for if statements -# pat_c = re.compile(":c\.") # Pattern looks for :c. Note (gene) has been removed -# -# # If the :c. pattern is present in the input variant -# if pat_c.search(variant): -# # convert the input string into a hgvs object -# var_c = hp.parse_hgvs_variant(variant) -# # Does the edit affect the start codon? -# if ((var_c.posedit.pos.start.base >= 1 and var_c.posedit.pos.start.base <= 3 and var_c.posedit.pos.start.offset == 0) or ( -# var_c.posedit.pos.end.base >= 1 and var_c.posedit.pos.end.base <= 3 and var_c.posedit.pos.end.offset == 0)) and not re.search('\*', str( -# var_c.posedit.pos)): -# ass_prot = hdp.get_pro_ac_for_tx_ac(var_c.ac) -# if str(ass_prot) == 'None': -# cod = str(var_c) -# cod = cod.replace('inv', 'del') -# cod = hp.parse_hgvs_variant(cod) -# p = evm.c_to_p(cod) -# ass_prot = p.ac -# var_p = hgvs.sequencevariant.SequenceVariant(ac=ass_prot, type='p', posedit='(Met1?)') -# else: -# var_p = evm.c_to_p(var_c) -# return var_p -# if re.search(':n.', variant): -# var_p = hp.parse_hgvs_variant(variant) -# var_p.ac = 'Non-coding transcript' -# var_p.posedit = '' -# return var_p - -""" -Function which takes a NORMALIZED hgvs Python transcript variant and maps to a specified protein reference sequence. A protein -level hgvs python object is returned. - -Note the function currently assumes that the transcript description is correctly normalized having come from the -previous g_to_t function -""" - - -def myc_to_p(hgvs_transcript, evm, hdp, hp, hn, vm, sf, re_to_p): - # Create dictionary to store the information - hgvs_transcript_to_hgvs_protein = {'error': '', 'hgvs_protein': '', 'ref_residues': ''} - - # Collect the associated protein - if hgvs_transcript.type == 'c': - associated_protein_accession = hdp.get_pro_ac_for_tx_ac(hgvs_transcript.ac) - # This method sometimes fails - if str(associated_protein_accession) == 'None': - cod = str(hgvs_transcript) - cod = cod.replace('inv', 'del') - cod = hp.parse_hgvs_variant(cod) - p = evm.c_to_p(cod) - associated_protein_accession = p.ac - else: - pass - - # Check for non-coding transcripts - if hgvs_transcript.type == 'c': - # Handle non inversions with simple c_to_p mapping - - if (hgvs_transcript.posedit.edit.type != 'inv') and (hgvs_transcript.posedit.edit.type != 'delins') and ( - re_to_p is False): - # Does the edit affect the start codon? - if (( - hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) or ( - hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ - and not re.search('\*', str( - hgvs_transcript.posedit.pos)): - residue_one = sf.fetch_seq(associated_protein_accession, start_i=1-1,end_i=1) - threed_residue_one = links.one_to_three(residue_one) - r_one_report = '(%s1?)' % threed_residue_one - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, - type='p', posedit=r_one_report) - else: - try: - hgvs_protein = evm.c_to_p(hgvs_transcript) - except IndexError as e: - error = str(e) - if re.search('string index out of range', error) and re.search('dup', str(hgvs_transcript)): - hgvs_ins = hp.parse_hgvs_variant(str(hgvs_transcript)) - hgvs_ins = hn.normalize(hgvs_ins) - inst = hgvs_ins.ac + ':c.' + str(hgvs_ins.posedit.pos.start.base - 1) + '_' + str( - hgvs_ins.posedit.pos.start.base) + 'ins' + hgvs_ins.posedit.edit.ref - hgvs_transcript = hp.parse_hgvs_variant(inst) - hgvs_protein = evm.c_to_p(hgvs_transcript) - - try: - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - except UnboundLocalError: - hgvs_transcript_to_hgvs_protein = myc_to_p(hgvs_transcript, evm, hdp, hp, hn, vm, sf, re_to_p=True) - return hgvs_transcript_to_hgvs_protein - - else: - # Additional code required to process inversions - # Note, this code was developed for VariantValidator and is not native to the biocommons hgvs Python package - # Convert positions to n. position - hgvs_naughty = vm.c_to_n(hgvs_transcript) - - # Collect the deleted sequence using fetch_seq - del_seq = sf.fetch_seq(str(hgvs_naughty.ac), start_i=hgvs_naughty.posedit.pos.start.base - 1, - end_i=hgvs_naughty.posedit.pos.end.base) - - # Make the inverted sequence - my_seq = Seq(del_seq) - - if hgvs_transcript.posedit.edit.type == 'inv': - inv_seq = my_seq.reverse_complement() - else: - inv_seq = hgvs_transcript.posedit.edit.alt - if inv_seq is None: - inv_seq = '' - - # Look for p. delins or del - not_delins = True - if hgvs_transcript.posedit.edit.type != 'inv': - try: - shifts = evm.c_to_p(hgvs_transcript) - if re.search('del', shifts.posedit.edit.type): - not_delins = False - except Exception: - not_delins = False - else: - not_delins = False - - # Use inv delins code? - if not_delins == False: - # Collect the associated protein - associated_protein_accession = hdp.get_pro_ac_for_tx_ac(hgvs_transcript.ac) - - # Intronic inversions are marked as uncertain i.e. p.? - if re.search('\d+\-', str(hgvs_transcript.posedit.pos)) or re.search('\d+\+', str( - hgvs_transcript.posedit.pos)) or re.search('\*', str(hgvs_transcript.posedit.pos)) or re.search( - '[cn].\-', str(hgvs_transcript)): - if (( - hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) - or - ( - hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ - and not re.search('\*', str(hgvs_transcript.posedit.pos)): - residue_one = sf.fetch_seq(associated_protein_accession, start_i=1 - 1, end_i=1) - threed_residue_one = links.one_to_three(residue_one) - r_one_report = '(%s1?)' % threed_residue_one # was (MET1?) - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, - type='p', posedit=r_one_report) - else: - # Make the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', - posedit='?') - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - else: - # Need to obtain the cds_start - inf = hdp.get_tx_identity_info(hgvs_transcript.ac) - cds_start = inf[3] - - # Extract the reference coding sequence from SeqRepo - try: - ref_seq = sf.fetch_seq(str(hgvs_naughty.ac)) - except Exception as e: - error = str(e) - hgvs_transcript_to_hgvs_protein['error'] = error - return hgvs_transcript_to_hgvs_protein - - # Create the variant coding sequence - var_seq = links.n_inversion(ref_seq, del_seq, inv_seq, - hgvs_naughty.posedit.pos.start.base, - hgvs_naughty.posedit.pos.end.base) - # Translate the reference and variant proteins - prot_ref_seq = links.translate(ref_seq, cds_start) - - try: - prot_var_seq = links.translate(var_seq, cds_start) - except IndexError: - hgvs_transcript_to_hgvs_protein[ - 'error'] = 'Cannot identify an in-frame Termination codon in the variant mRNA sequence' - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', - posedit='?') - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - - if prot_ref_seq == 'error': - error = 'Unable to generate protein variant description' - hgvs_transcript_to_hgvs_protein['error'] = error - return hgvs_transcript_to_hgvs_protein - elif prot_var_seq == 'error': - # Does the edit affect the start codon? - if (( - hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) - or - ( - hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ - and not re.search('\*', str(hgvs_transcript.posedit.pos)): - residue_one = sf.fetch_seq(associated_protein_accession, start_i=1 - 1, end_i=1) - threed_residue_one = links.one_to_three(residue_one) - #threed_residue_one # was (MET1?) = links.one_to_three(residue_one) - r_one_report = '(%s1?)' % threed_residue_one # was (MET1?) - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, - type='p', posedit=r_one_report) - - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - else: - error = 'Unable to generate protein variant description' - hgvs_transcript_to_hgvs_protein['error'] = error - return hgvs_transcript_to_hgvs_protein - else: - # Gather the required information regarding variant interval and sequences - if hgvs_transcript.posedit.edit.type != 'delins': - pro_inv_info = links.pro_inv_info(prot_ref_seq, prot_var_seq) - else: - pro_inv_info = links.pro_delins_info(prot_ref_seq, prot_var_seq) - - # Error has occurred - if pro_inv_info['error'] == 'true': - error = 'Translation error occurred, please contact admin' - hgvs_transcript_to_hgvs_protein['error'] = error - return hgvs_transcript_to_hgvs_protein - - # The Nucleotide variant has not affected the protein sequence i.e. synonymous - elif pro_inv_info['variant'] != 'true': - # Make the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, - type='p', - posedit='=') - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - - else: - # Early termination i.e. stop gained - # if pro_inv_info['terminate'] == 'true': - # end = 'Ter' + str(pro_inv_info['ter_pos']) - # pro_inv_info['prot_ins_seq'].replace('*', end) - - # Complete variant description - # Recode the single letter del and ins sequences into three letter amino acid codes - del_thr = links.one_to_three(pro_inv_info['prot_del_seq']) - ins_thr = links.one_to_three(pro_inv_info['prot_ins_seq']) - - # Write the HGVS position and edit - del_len = len(del_thr) - from_aa = del_thr[0:3] - to_aa = del_thr[del_len - 3:] - - # Handle a range of amino acids - if pro_inv_info['edit_start'] != pro_inv_info['edit_end']: - if len(ins_thr) > 0: - if re.search('Ter', del_thr) and ins_thr[-3:] != 'Ter': - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'delins' + ins_thr + '?)' - else: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'delins' + ins_thr + ')' - else: - if re.search('Ter', del_thr) and ins_thr[-3:] != 'Ter': - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'del?)' - else: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'del)' - else: - # Handle extended proteins i.e. stop_lost - if del_thr == 'Ter' and (len(ins_thr) > len(del_thr)): - # Nucleotide variant range aligns to the Termination codon - if ins_thr[-3:] == 'Ter': - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( - ins_thr[:3]) + 'ext' + str(ins_thr[-3:]) + str((len(ins_thr) / 3) - 1) + ')' - # Nucleotide variant range spans the Termination codon - else: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( - ins_thr[:3]) + 'ext?)' - - # Nucleotide variation has not affected the length of the protein thus substitution or del - else: - if len(ins_thr) == 3: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + ins_thr + ')' - elif len(ins_thr) == 0: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + 'del)' - else: - posedit = '(' + from_aa + str( - pro_inv_info['edit_start']) + 'delins' + ins_thr + ')' - - # Complete the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, - type='p', - posedit=posedit) - - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - - else: - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = shifts - - # Return - return hgvs_transcript_to_hgvs_protein - - - # Handle non-coding transcript and non transcript descriptions - elif hgvs_transcript.type == 'n': - # non-coding transcripts - hgvs_protein = copy.deepcopy(hgvs_transcript) - hgvs_protein.ac = 'Non-coding ' - hgvs_protein.posedit = '' - hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein - return hgvs_transcript_to_hgvs_protein - else: - hgvs_transcript_to_hgvs_protein['error'] = 'Unable to map %s to %s' % ( - hgvs_transcript.ac, associated_protein_accession) - return hgvs_transcript_to_hgvs_protein - - -""" -Marked for removal -""" -# Return an hgvs object containing the rna sequence variant -# def rna(variant, evm, hp): -# Set regular expressions for if statements -# pat_c = re.compile(":c\.") # Pattern looks for :c. Note (gene) has been removed -# If the :c. pattern is present in the input variant -# if pat_c.search(variant): -# convert the input string into a hgvs object -# var_c = hp.parse_hgvs_variant(variant) -# map to the genomic sequence -# var_r = evm.c_to_n(var_c) # rna level variant -# return var_r - -""" -Marked for removal -""" -# def hgvs_rna(variant, hp): -# # Set regular expressions for if statements -# pat_r = re.compile(":n\.") # Pattern looks for :n. Note (gene) has been removed -# # If the :r. pattern is present in the input variant -# if pat_r.search(variant): -# # convert the input string into a hgvs object -# var_r = hp.parse_hgvs_variant(variant) -# return var_r - - -""" -Ensures variant strings are g. -returns parsed hgvs g. object - -Marked for removal -""" - -# def hgvs_genomic(variant, hp): -# # Set regular expressions for if statements -# pat_g = re.compile(":g\.") # Pattern looks for :g. Note (gene) has been removed -# # If the :g. pattern is present in the input variant -# if pat_g.search(variant): -# # convert the input string into a hgvs object -# var_g = hp.parse_hgvs_variant(variant) -# return var_g - - -""" -Enhanced transcript to genome position mapping function using evm -Deals with mapping from transcript positions that do not exist in the genomic sequence -i.e. the stated position aligns to a genomic gap! -Trys to ensure that a genomic position is always returned even if the c. or n. transcript -will not map to the specified genome build primary assembly. -Deals with transcript mapping to several genomic assemblies -Order -Map to a single NC_ for the specified genome build primary assembly -Map to a single NC_ for an alternate genome build primary assembly -Map to an NT_ from the specified genome build -Map to an NT_ from an alternative genome build -Map to an NW_ from the specified genome build -Map to an NW_ from an alternative genome buildRequires parsed c. or n. object -returns parsed hgvs g. object -""" - - -def myevm_t_to_g(hgvs_c, hdp, no_norm_evm, primary_assembly, vm, hp, hn, sf, nr_vm): - - # store the input - stored_hgvs_c = copy.deepcopy(hgvs_c) - expand_out = 'false' - utilise_gap_code = True - - # Gap gene black list - try: - gene_symbol = dbControls.data.get_gene_symbol_from_transcriptID(hgvs_c.ac) - except Exception: - utilise_gap_code = False - else: - # If the gene symbol is not in the list, the value False will be returned - utilise_gap_code = gap_genes.gap_black_list(gene_symbol) - # Warn gap code in use - logger.warning("gap_compensation_myevm = " + str(utilise_gap_code)) - - if utilise_gap_code is True and ( - hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): - - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - hgvs_c = no_norm_evm.c_to_n(hgvs_c) - - # Check for intronic - try: - hn.normalize(hgvs_c) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('intronic variant', error): - pass - elif re.search('Length implied by coordinates must equal sequence deletion length', error) and re.match( - 'NR_', hgvs_c.ac): - hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 - - # Check again before continuing - if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search( - '\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): - pass - - else: - try: - # For non-intronic sequence - hgvs_t = copy.deepcopy(hgvs_c) - if hgvs_t.posedit.edit.type == 'inv': - inv_alt = revcomp(hgvs_t.posedit.edit.ref) - t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( - hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt - hgvs_t_delins = hp.parse_hgvs_variant(t_delins) - pre_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, - hgvs_t.posedit.pos.end.base + 1) - hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base - inv_alt = pre_base + inv_alt + post_base - hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 - start = hgvs_t.posedit.pos.start.base - hgvs_t.posedit.pos.start.base = start + 1 - hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 - end = hgvs_t.posedit.pos.end.base - hgvs_t.posedit.pos.start.base = start - hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str( - end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt - hgvs_t = hp.parse_hgvs_variant(hgvs_str) - elif hgvs_c.posedit.edit.type == 'dup': - pre_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, - hgvs_t.posedit.pos.end.base + 1) - alt = pre_base + hgvs_t.posedit.edit.ref + hgvs_t.posedit.edit.ref + post_base - ref = pre_base + hgvs_t.posedit.edit.ref + post_base - dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str( - hgvs_t.posedit.pos.start.base - 1) + '_' + str( - (hgvs_t.posedit.pos.start.base + len(ref)) - 2) + 'del' + ref + 'ins' + alt - hgvs_t = hp.parse_hgvs_variant(dup_to_delins) - elif hgvs_c.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.end.base + 1) - ins_alt = ins_ref[:2] + hgvs_t.posedit.edit.alt + ins_ref[-2:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str( - hgvs_t.posedit.pos.start.base - 1) + '_' + str( - hgvs_t.posedit.pos.end.base + 1) + 'del' + ins_ref + 'ins' + ins_alt - hgvs_t = hp.parse_hgvs_variant(ins_to_delins) - else: - if str(hgvs_t.posedit.edit.alt) == 'None': - hgvs_t.posedit.edit.alt = '' - pre_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, - hgvs_t.posedit.pos.end.base + 1) - hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base - hgvs_t.posedit.edit.alt = pre_base + hgvs_t.posedit.edit.alt + post_base - hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 - start = hgvs_t.posedit.pos.start.base - hgvs_t.posedit.pos.start.base = start + 1 - hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 - end = hgvs_t.posedit.pos.end.base - hgvs_t.posedit.pos.start.base = start - hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str( - hgvs_t.posedit.edit) - hgvs_t = hp.parse_hgvs_variant(hgvs_str) - hgvs_c = copy.deepcopy(hgvs_t) - - # Set expanded out test to true - expand_out = 'true' - - except Exception: - hgvs_c = hgvs_c - - if re.match('NM_', str(hgvs_c.ac)): - try: - hgvs_c = no_norm_evm.n_to_c(hgvs_c) - except hgvs.exceptions.HGVSError as e: - hgvs_c = copy.deepcopy(stored_hgvs_c) - - # Ensure the altered c. variant has not crossed intro exon boundaries - hgvs_check_boundaries = copy.deepcopy(hgvs_c) - try: - h_variant = hn.normalize(hgvs_check_boundaries) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - hgvs_c = copy.deepcopy(stored_hgvs_c) - # Catch identity at the exon/intron boundary by trying to normalize ref only - if hgvs_check_boundaries.posedit.edit.type == 'identity': - reform_ident = str(hgvs_c).split(':')[0] - reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str( - hgvs_c.posedit.edit.ref) # + 'ins' + str(hgvs_c.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error) or re.search( - 'Normalization of intronic variants', error): - hgvs_c = copy.deepcopy(stored_hgvs_c) - try: - hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) - hn.normalize(hgvs_genomic) # Check the validity of the mapping - # This will fail on multiple refs for NC_ - except hgvs.exceptions.HGVSError as e: - # Recover all available mapping options from UTA - mapping_options = hdp.get_tx_mapping_options(hgvs_c.ac) - - if mapping_options == []: - raise HGVSDataNotAvailableError( - "No alignment data between the specified transcript reference sequence and any GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are available.") - - # Capture errors from attempted mappings - attempted_mapping_error = '' - - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - 1] + '~' - print e - continue - - # If not mapped, raise error - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num == 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - if re.search(option[1], attempted_mapping_error): - pass - else: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - 1] + '~' - print e - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NT_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - 1] + '~' - print e - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NT_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num == 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - option[ - 1] + '~' - print e - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NW_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print e - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NW_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num == 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str( - e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print e - continue - - # Only a RefSeqGene available - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NG_', option[1]): - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print e - continue - - # If not mapped, raise error - try: - hgvs_genomic - except Exception: - raise HGVSDataNotAvailableError(attempted_mapping_error) - - if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and hgvs_genomic.posedit.edit.alt == '' and expand_out != 'true': - hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref - if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: - try: - hgvs_genomic = hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'insertion length must be 1': - ref = sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, - hgvs_genomic.posedit.pos.end.base) - hgvs_genomic.posedit.edit.ref = ref - hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] - hgvs_genomic = hn.normalize(hgvs_genomic) - if error == 'base start position must be <= end position': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Statements required to reformat the stored_hgvs_c into a useable synonym - if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': - if stored_hgvs_c.type == 'c': - stored_hgvs_n = vm.c_to_n(stored_hgvs_c) - else: - stored_hgvs_n = stored_hgvs_c - stored_ref = sf.fetch_seq(str(stored_hgvs_n.ac), stored_hgvs_n.posedit.pos.start.base - 1, - stored_hgvs_n.posedit.pos.end.base) - stored_hgvs_c.posedit.edit.ref = stored_ref - - if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': - if hgvs_genomic.posedit.edit.type == 'ins': - stored_ref = sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, - hgvs_genomic.posedit.pos.end.base) - stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] - hgvs_genomic.posedit.edit.ref = stored_ref - hgvs_genomic.posedit.edit.alt = stored_alt - - # First look for variants mapping to the flanks of gaps - # either in the gap or on the flank but not fully within the gap - if expand_out == 'true': - - nr_genomic = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) - - try: - hn.normalize(nr_genomic) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error_type_1 = str(e) - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str( - e) == 'base start position must be <= end position': - # Effectively, this code is designed to handle variants that are directly proximal to - # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases due to - # the deletion length being > the specified range. - - # Warn of variant location wrt the gap - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - logger.warning('Variant is proximal to the flank of a genomic gap') - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - try: - hn.normalize(genomic_gap_variant) - # Still a problem - except hgvs.exceptions.HGVSInvalidVariantError as e: - if 'base start position must be <= end position' in str(e) and \ - 'Length implied by coordinates must equal' in error_type_1: - make_gen_var = copy.copy(nr_genomic) - make_gen_var.posedit.edit.ref = sf.fetch_seq(nr_genomic.ac, - nr_genomic.posedit.pos.start.base - 1, - nr_genomic.posedit.pos.end.base) - genomic_gap_variant = make_gen_var - - error_type_1 = None - else: - genomic_gap_variant = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) - - if error_type_1 == 'base start position must be <= end position': - logger.warning('Variant is fully within a genomic gap') - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - - # Logic - # We have checked that the variant does not cross boundaries, or is intronic - # So is likely mapping to a genomic gap - try: - hn.normalize(genomic_gap_variant) - except Exception as e: - if str(e) == 'base start position must be <= end position': - # This will only happen when the variant is fully within the gap - gap_start = genomic_gap_variant.posedit.pos.end.base - gap_end = genomic_gap_variant.posedit.pos.start.base - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - # This will only happen if the variant is flanking the gap but is - # not inside the gap - logger.warning('Variant is on the flank of a genomic gap but not within the gap') - gap_start = genomic_gap_variant.posedit.pos.start.base - 1 - gap_end = genomic_gap_variant.posedit.pos.end.base + 1 - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - genomic_gap_variant.posedit.edit.ref = '' - stored_hgvs_c = copy.deepcopy(hgvs_c) - - # Remove alt - try: - genomic_gap_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass - - # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = hn.normalize(genomic_gap_variant) - # Static map to c. and static normalize - transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant - - if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - try: - transcript_gap_variant = hn.normalize(transcript_gap_variant) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - if ' Unsupported normalization of variants spanning the UTR-exon boundary' in str(e): - pass - - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) - transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) - else: - transcript_gap_n = transcript_gap_variant - transcript_gap_alt_n = stored_hgvs_c - - # Ensure an ALT exists - try: - if transcript_gap_alt_n.posedit.edit.alt is None: - transcript_gap_alt_n.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( - transcript_gap_n.posedit.pos.start.base) + '_' + str( - transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref - transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( - transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( - transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref - transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(transcript_gap_n.posedit.edit.ref) - if transcript_gap_alt_n.posedit.edit.alt is not None: - alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = transcript_gap_n.posedit.pos.start.base - alt_start = transcript_gap_alt_n.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, - transcript_gap_alt_n.posedit.pos.end.base + 1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) - else: - alt_base_dict[int] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, - 1): - if int in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[int]) - else: - alternate_sequence_bases.append(ref_base_dict[int]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Update variant, map to genome using vm and normalize - transcript_gap_n.posedit.edit.alt = alternate_sequence - - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - - try: - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - except Exception as e: - if str(e) == "base start position must be <= end position": - # Expansion out is required to map back to the genomic position - pre_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, - transcript_gap_n.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, - transcript_gap_n.posedit.pos.end.base + 1) - transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 - transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 - transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base - transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Bypass the next bit of gap code - expand_out = 'false' - - else: - pass - # No map to the flank of a gap or within the gap - else: - pass - - # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS - # Remove identity bases - if hgvs_c == stored_hgvs_c: - expand_out = 'false' - elif expand_out == 'false' or utilise_gap_code is False: - pass - # Correct expansion ref + 2 - elif expand_out == 'true' and ( - len(hgvs_genomic.posedit.edit.ref) == (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: - hgvs_genomic.posedit.pos.start.base = hgvs_genomic.posedit.pos.start.base + 1 - hgvs_genomic.posedit.pos.end.base = hgvs_genomic.posedit.pos.end.base - 1 - hgvs_genomic.posedit.edit.ref = hgvs_genomic.posedit.edit.ref[1:-1] - if hgvs_genomic.posedit.edit.alt is not None: - hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.alt[1:-1] - elif expand_out == 'true' and ( - len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: - if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: - gn = hn.normalize(hgvs_genomic) - pass - - # Likely if the start or end position aligns to a gap in the genomic sequence - # Logic - # We have checked that the variant does not cross boundaries, or is intronic - # So is likely mapping to a genomic gap - elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: - # Incorrect expansion, likely < ref + 2 - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - try: - hn.normalize(genomic_gap_variant) - except Exception as e: - if str(e) == 'base start position must be <= end position': - gap_start = genomic_gap_variant.posedit.pos.end.base - gap_end = genomic_gap_variant.posedit.pos.start.base - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - # Remove alt - try: - genomic_gap_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass - # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = hn.normalize(genomic_gap_variant) - # Static map to c. and static normalize - transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant - transcript_gap_variant = hn.normalize(transcript_gap_variant) - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) - transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) - else: - transcript_gap_n = transcript_gap_variant - transcript_gap_alt_n = stored_hgvs_c - - # Ensure an ALT exists - try: - if transcript_gap_alt_n.posedit.edit.alt is None: - transcript_gap_alt_n.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( - transcript_gap_n.posedit.pos.start.base) + '_' + str( - transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref - transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( - transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( - transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref - transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(transcript_gap_n.posedit.edit.ref) - if transcript_gap_alt_n.posedit.edit.alt is not None: - alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = transcript_gap_n.posedit.pos.start.base - alt_start = transcript_gap_alt_n.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, - transcript_gap_alt_n.posedit.pos.end.base + 1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) - else: - alt_base_dict[int] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): - if int in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[int]) - else: - alternate_sequence_bases.append(ref_base_dict[int]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Update variant, map to genome using vm and normalize - transcript_gap_n.posedit.edit.alt = alternate_sequence - - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - - try: - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - except Exception as e: - if str(e) == "base start position must be <= end position": - # Expansion out is required to map back to the genomic position - pre_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, - transcript_gap_n.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, - transcript_gap_n.posedit.pos.end.base + 1) - transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 - transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 - transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base - transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Ins variants map badly - Especially between c. exon/exon boundary - if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: - try: - hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'insertion length must be 1': - if hgvs_c.type == 'c': - hgvs_t = vm.c_to_n(hgvs_c) - else: - hgvs_t = copy.copy(hgvs_c) - ins_ref = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, hgvs_t.posedit.pos.end.base) - ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( - hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt - hgvs_t = hp.parse_hgvs_variant(ins_to_delins) - try: - hgvs_c = vm.n_to_c(hgvs_t) - except Exception: - hgvs_c = copy.copy(hgvs_t) - try: - hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) - except Exception as e: - error = str(e) - logger.warning('Ins mapping error in myt_to_g ' + error) - - return hgvs_genomic - - -""" -USE WITH MAPPER THAT DOES NOT REPLACE THE REFERENCE GENOMIC BASES AND DOED NOT NORMALIZE - -Enhanced transcript to genome position mapping function using evm -Trys to ensure that a genomic position is always returned even if the c. or n. transcript -will not map to the specified genome build primary assembly. -Deals with transcript mapping to several genomic assemblies -Order -Map to a single NC_ (or ALT) for the specified genome build -returns parsed hgvs g. object -""" - - -def noreplace_myevm_t_to_g(hgvs_c, evm, hdp, primary_assembly, vm, hn, hp, sf, no_norm_evm): - try: - hgvs_genomic = evm.t_to_g(hgvs_c) - hn.normalize(hgvs_genomic) - # This will fail on multiple refs for NC_ - except hgvs.exceptions.HGVSError as e: - # Recover all available mapping options from UTA - mapping_options = hdp.get_tx_mapping_options(hgvs_c.ac) - - if mapping_options == []: - raise HGVSDataNotAvailableError("no g. mapping options available") - - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - 1] + '~' - print e - continue - - # If not mapped, raise error - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - 1] + '~' - print e - continue - - # If not mapped, raise error - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num == 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - if re.search(option[1], attempted_mapping_error): - pass - else: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - option[ - 1] + '~' - print e - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NT_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - option[ - 1] + '~' - print e - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NT_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num == 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str( - e) + "/" + hgvs_c.ac + "/" + \ - option[ - 1] + '~' - print e - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NW_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str( - e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print e - continue - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NW_', option[1]): - chr_num = supported_chromosome_builds.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num == 'false': - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str( - e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print e - continue - - # Only a RefSeqGene available - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NG_', option[1]): - try: - hgvs_genomic = vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str( - e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print e - continue - try: - hgvs_genomic - except Exception: - raise HGVSDataNotAvailableError('No available t_to_g liftover') - - # Ins variants map badly - Especially between c. exon/exon boundary - if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: - try: - hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'insertion length must be 1': - if hgvs_c.type == 'c': - hgvs_t = vm.c_to_n(hgvs_c) - else: - hgvs_t = copy.copy(hgvs_c) - ins_ref = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, hgvs_t.posedit.pos.end.base) - ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( - hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt - hgvs_t = hp.parse_hgvs_variant(ins_to_delins) - try: - hgvs_c = vm.n_to_c(hgvs_t) - except Exception: - hgvs_c = copy.copy(hgvs_t) - try: - hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) - except Exception as e: - error = str(e) - logger.warning('Ins mapping error in myt_to_g ' + error) - - return hgvs_genomic - - -""" -Enhanced transcript to genome position on a specified genomic reference using vm -Deals with mapping from transcript positions that do not exist in the genomic sequence -i.e. the stated position aligns to a genomic gap! -returns parsed hgvs g. object -""" - - -def myvm_t_to_g(hgvs_c, alt_chr, no_norm_evm, vm, hp, hn, sf, nr_vm): - # store the input - stored_hgvs_c = copy.deepcopy(hgvs_c) - expand_out = 'false' - utilise_gap_code = True - - # Gap gene black list - try: - gene_symbol = dbControls.data.get_gene_symbol_from_transcriptID(hgvs_c.ac) - except Exception: - utilise_gap_code = False - else: - # If the gene symbol is not in the list, the value False will be returned - utilise_gap_code = gap_genes.gap_black_list(gene_symbol) - # Warn gap code in use - logger.warning("gap_compensation_mvm = " + str(utilise_gap_code)) - - if utilise_gap_code is True and ( - hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): - - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - hgvs_c = no_norm_evm.c_to_n(hgvs_c) - - # Check for intronic - try: - hn.normalize(hgvs_c) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('intronic variant', error): - pass - elif re.search('Length implied by coordinates must equal sequence deletion length', error) and re.match( - 'NR_', hgvs_c.ac): - hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 - - # Check again before continuing - if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search( - '\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): - pass - - else: - try: - # For non-intronic sequence - hgvs_t = copy.deepcopy(hgvs_c) - # handle inversions - if hgvs_t.posedit.edit.type == 'inv': - inv_alt = revcomp(hgvs_t.posedit.edit.ref) - t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( - hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt - hgvs_t_delins = hp.parse_hgvs_variant(t_delins) - pre_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, - hgvs_t.posedit.pos.end.base + 1) - hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base - inv_alt = pre_base + inv_alt + post_base - hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 - start = hgvs_t.posedit.pos.start.base - hgvs_t.posedit.pos.start.base = start + 1 - hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 - end = hgvs_t.posedit.pos.end.base - hgvs_t.posedit.pos.start.base = start - hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str( - end) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt - hgvs_t = hp.parse_hgvs_variant(hgvs_str) - if hgvs_c.posedit.edit.type == 'dup': - # hgvs_t = reverse_normalize.normalize(hgvs_t) - pre_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, - hgvs_t.posedit.pos.end.base + 1) - alt = pre_base + hgvs_t.posedit.edit.ref + hgvs_t.posedit.edit.ref + post_base - ref = pre_base + hgvs_t.posedit.edit.ref + post_base - dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str( - hgvs_t.posedit.pos.start.base - 1) + '_' + str( - (hgvs_t.posedit.pos.start.base + len(ref)) - 2) + 'del' + ref + 'ins' + alt - hgvs_t = hp.parse_hgvs_variant(dup_to_delins) - elif hgvs_c.posedit.edit.type == 'ins': - ins_ref = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.end.base + 1) - ins_alt = ins_ref[:2] + hgvs_t.posedit.edit.alt + ins_ref[-2:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str( - hgvs_t.posedit.pos.start.base - 1) + '_' + str( - hgvs_t.posedit.pos.end.base + 1) + 'del' + ins_ref + 'ins' + ins_alt - hgvs_t = hp.parse_hgvs_variant(ins_to_delins) - else: - if str(hgvs_t.posedit.edit.alt) == 'None': - hgvs_t.posedit.edit.alt = '' - pre_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, - hgvs_t.posedit.pos.end.base + 1) - hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base - hgvs_t.posedit.edit.alt = pre_base + hgvs_t.posedit.edit.alt + post_base - hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 - start = hgvs_t.posedit.pos.start.base - hgvs_t.posedit.pos.start.base = start + 1 - hgvs_t.posedit.pos.end.base = hgvs_t.posedit.pos.end.base + 1 - end = hgvs_t.posedit.pos.end.base - hgvs_t.posedit.pos.start.base = start - hgvs_t.posedit.pos.end.base = end - hgvs_str = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(start) + '_' + str(end) + str( - hgvs_t.posedit.edit) - hgvs_t = hp.parse_hgvs_variant(hgvs_str) - hgvs_c = copy.deepcopy(hgvs_t) - - # Set expanded out test to true - expand_out = 'true' - - except Exception: - hgvs_c = hgvs_c - - if re.match('NM_', str(hgvs_c.ac)): - try: - hgvs_c = no_norm_evm.n_to_c(hgvs_c) - except hgvs.exceptions.HGVSError as e: - hgvs_c = copy.deepcopy(stored_hgvs_c) - - # Ensure the altered c. variant has not crossed intro exon boundaries - hgvs_check_boundaries = copy.deepcopy(hgvs_c) - try: - h_variant = hn.normalize(hgvs_check_boundaries) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - hgvs_c = copy.deepcopy(stored_hgvs_c) - # Catch identity at the exon/intron boundary by trying to normalize ref only - if hgvs_check_boundaries.posedit.edit.type == 'identity': - reform_ident = str(hgvs_c).split(':')[0] - reform_ident = reform_ident + ':' + stored_hgvs_c.type + '.' + str(hgvs_c.posedit.pos) + 'del' + str( - hgvs_c.posedit.edit.ref) # + 'ins' + str(hgvs_c.posedit.edit.alt) - hgvs_reform_ident = hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error) or re.search( - 'Normalization of intronic variants', error): - hgvs_c = copy.deepcopy(stored_hgvs_c) - - hgvs_genomic = vm.t_to_g(hgvs_c, alt_chr) - if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and hgvs_genomic.posedit.edit.alt == '' and expand_out != 'true': - hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref - if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: - try: - hgvs_genomic = hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'insertion length must be 1': - ref = sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, - hgvs_genomic.posedit.pos.end.base) - hgvs_genomic.posedit.edit.ref = ref - hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] - hgvs_genomic = hn.normalize(hgvs_genomic) - if error == 'base start position must be <= end position': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Statements required to reformat the stored_hgvs_c into a useable synonym - if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': - if stored_hgvs_c.type == 'c': - stored_hgvs_n = vm.c_to_n(stored_hgvs_c) - else: - stored_hgvs_n = stored_hgvs_c - stored_ref = sf.fetch_seq(str(stored_hgvs_n.ac), stored_hgvs_n.posedit.pos.start.base - 1, - stored_hgvs_n.posedit.pos.end.base) - stored_hgvs_c.posedit.edit.ref = stored_ref - - if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': - if hgvs_genomic.posedit.edit.type == 'ins': - stored_ref = sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, - hgvs_genomic.posedit.pos.end.base) - stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] - hgvs_genomic.posedit.edit.ref = stored_ref - hgvs_genomic.posedit.edit.alt = stored_alt - - # First look for variants mapping to the flanks of gaps - # either in the gap or on the flank but not fully within the gap - if expand_out == 'true': - nr_genomic = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) - try: - hn.normalize(nr_genomic) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error_type_1 = str(e) - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str( - e) == 'base start position must be <= end position': - # Effectively, this code is designed to handle variants that are directly proximal to - # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases - # due to the deletion length being > the specified range. - - # Warn of variant location wrt the gap - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - logger.warning('Variant is proximal to the flank of a genomic gap') - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - try: - hn.normalize(genomic_gap_variant) - # Still a problem - except hgvs.exceptions.HGVSInvalidVariantError as e: - if 'base start position must be <= end position' in str(e) and \ - 'Length implied by coordinates must equal' in error_type_1: - make_gen_var = copy.copy(nr_genomic) - make_gen_var.posedit.edit.ref = sf.fetch_seq(nr_genomic.ac, - nr_genomic.posedit.pos.start.base - 1, - nr_genomic.posedit.pos.end.base) - genomic_gap_variant = make_gen_var - error_type_1 = None - else: - genomic_gap_variant = nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) - - if error_type_1 == 'base start position must be <= end position': - logger.warning('Variant is fully within a genomic gap') - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - - # Logic - # We have checked that the variant does not cross boundaries, or is intronic - # So is likely mapping to a genomic gap - try: - hn.normalize(genomic_gap_variant) - except Exception as e: - if str(e) == 'base start position must be <= end position': - # This will only happen when the variant is fully within the gap - gap_start = genomic_gap_variant.posedit.pos.end.base - gap_end = genomic_gap_variant.posedit.pos.start.base - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - # This will only happen if the variant is flanking the gap but is - # not inside the gap - logger.warning('Variant is on the flank of a genomic gap but not within the gap') - gap_start = genomic_gap_variant.posedit.pos.start.base - 1 - gap_end = genomic_gap_variant.posedit.pos.end.base + 1 - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - genomic_gap_variant.posedit.edit.ref = '' - stored_hgvs_c = copy.deepcopy(hgvs_c) - - # Remove alt - try: - genomic_gap_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass - - # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = hn.normalize(genomic_gap_variant) - # Static map to c. and static normalize - transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant - if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - try: - transcript_gap_variant = hn.normalize(transcript_gap_variant) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - if ' Unsupported normalization of variants spanning the UTR-exon boundary' in str(e): - pass - - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) - transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) - else: - transcript_gap_n = transcript_gap_variant - transcript_gap_alt_n = stored_hgvs_c - - # Ensure an ALT exists - try: - if transcript_gap_alt_n.posedit.edit.alt is None: - transcript_gap_alt_n.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( - transcript_gap_n.posedit.pos.start.base) + '_' + str( - transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref - transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( - transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( - transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref - transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(transcript_gap_n.posedit.edit.ref) - if transcript_gap_alt_n.posedit.edit.alt is not None: - alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = transcript_gap_n.posedit.pos.start.base - alt_start = transcript_gap_alt_n.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, - transcript_gap_alt_n.posedit.pos.end.base + 1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) - else: - alt_base_dict[int] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, - 1): - if int in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[int]) - else: - alternate_sequence_bases.append(ref_base_dict[int]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Update variant, map to genome using vm and normalize - transcript_gap_n.posedit.edit.alt = alternate_sequence - - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - - try: - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - except Exception as e: - if str(e) == "base start position must be <= end position": - # Expansion out is required to map back to the genomic position - pre_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, - transcript_gap_n.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, - transcript_gap_n.posedit.pos.end.base + 1) - transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 - transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 - transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base - transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Bypass the next bit of gap code - expand_out = 'false' - - else: - pass - # No map to the flank of a gap or within the gap - else: - pass - - # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS - # Remove identity bases - if hgvs_c == stored_hgvs_c: - expand_out = 'false' - elif expand_out == 'false' or utilise_gap_code is False: - pass - # Correct expansion ref + 2 - elif expand_out == 'true' and ( - len(hgvs_genomic.posedit.edit.ref) == (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: - hgvs_genomic.posedit.pos.start.base = hgvs_genomic.posedit.pos.start.base + 1 - hgvs_genomic.posedit.pos.end.base = hgvs_genomic.posedit.pos.end.base - 1 - hgvs_genomic.posedit.edit.ref = hgvs_genomic.posedit.edit.ref[1:-1] - if hgvs_genomic.posedit.edit.alt is not None: - hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.alt[1:-1] - elif expand_out == 'true' and ( - len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: - if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: - gn = hn.normalize(hgvs_genomic) - pass - - # Likely if the start or end position aligns to a gap in the genomic sequence - # Logic - # We have checked that the variant does not cross boundaries, or is intronic - # So is likely mapping to a genomic gap - elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: - # Incorrect expansion, likely < ref + 2 - genomic_gap_variant = vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) - try: - hn.normalize(genomic_gap_variant) - except Exception as e: - if str(e) == 'base start position must be <= end position': - gap_start = genomic_gap_variant.posedit.pos.end.base - gap_end = genomic_gap_variant.posedit.pos.start.base - genomic_gap_variant.posedit.pos.start.base = gap_start - genomic_gap_variant.posedit.pos.end.base = gap_end - # Remove alt - try: - genomic_gap_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass - # Should be a delins so will normalize statically and replace the reference bases - genomic_gap_variant = hn.normalize(genomic_gap_variant) - # Static map to c. and static normalize - transcript_gap_variant = vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant - transcript_gap_variant = hn.normalize(transcript_gap_variant) - # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): - transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) - transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) - else: - transcript_gap_n = transcript_gap_variant - transcript_gap_alt_n = stored_hgvs_c - - # Ensure an ALT exists - try: - if transcript_gap_alt_n.posedit.edit.alt is None: - transcript_gap_alt_n.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( - transcript_gap_n.posedit.pos.start.base) + '_' + str( - transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref - transcript_gap_n = hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( - transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( - transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref - transcript_gap_alt_n = hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(transcript_gap_n.posedit.edit.ref) - if transcript_gap_alt_n.posedit.edit.alt is not None: - alternate_bases = list(transcript_gap_alt_n.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(transcript_gap_alt_n.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = transcript_gap_n.posedit.pos.start.base - alt_start = transcript_gap_alt_n.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, - transcript_gap_alt_n.posedit.pos.end.base + 1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) - else: - alt_base_dict[int] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): - if int in alt_base_dict.keys(): - alternate_sequence_bases.append(alt_base_dict[int]) - else: - alternate_sequence_bases.append(ref_base_dict[int]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Update variant, map to genome using vm and normalize - transcript_gap_n.posedit.edit.alt = alternate_sequence - - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - - try: - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - except Exception as e: - if str(e) == "base start position must be <= end position": - # Expansion out is required to map back to the genomic position - pre_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, - transcript_gap_n.posedit.pos.start.base - 1) - post_base = sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, - transcript_gap_n.posedit.pos.end.base + 1) - transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 - transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 - transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base - transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base - try: - transcript_gap_variant = vm.n_to_c(transcript_gap_n) - except: - transcript_gap_variant = transcript_gap_n - hgvs_genomic = vm.t_to_g(transcript_gap_variant, hgvs_genomic.ac) - hgvs_genomic = hn.normalize(hgvs_genomic) - - # Ins variants map badly - Especially between c. exon/exon boundary - if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: - try: - hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'insertion length must be 1': - if hgvs_c.type == 'c': - hgvs_t = vm.c_to_n(hgvs_c) - else: - hgvs_t = copy.copy(hgvs_c) - ins_ref = sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, hgvs_t.posedit.pos.end.base) - ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( - hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt - hgvs_t = hp.parse_hgvs_variant(ins_to_delins) - try: - hgvs_c = vm.n_to_c(hgvs_t) - except Exception: - hgvs_c = copy.copy(hgvs_t) - try: - hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) - except Exception as e: - error = str(e) - logger.warning('Ins mapping error in myt_to_g ' + error) - - return hgvs_genomic - - -""" -Simple hgvs g. to c. or n. mapping -returns parsed hgvs c. or n. object -""" - - -def myevm_g_to_t(evm, hgvs_genomic, alt_ac): - hgvs_t = evm.g_to_t(hgvs_genomic, alt_ac) - return hgvs_t - - -""" -parse p. strings into hgvs p. objects - -MARKED FOR REMOVAL -""" - -# def hgvs_protein(variant, hp): -# # Set regular expressions for if statements -# pat_p = re.compile(":p\.") # Pattern looks for :g. Note (gene) has been removed -# # If the :p. pattern is present in the input variant -# if pat_p.search(variant): -# # convert the input string into a hgvs object -# var_p = hp.parse_hgvs_variant(variant) -# return var_p - - -""" -Convert r. into c. -""" - - -def hgvs_r_to_c(hgvs_object): - # check for LRG_t with r. - if re.match('LRG', hgvs_object.ac): - transcript_ac = dbControls.data.get_RefSeqTranscriptID_from_lrgTranscriptID(hgvs_object.ac) - if transcript_ac == 'none': - raise HGVSDataNotAvailableError('Unable to identify a relevant transcript for ' + hgvs_object.ac) - else: - hgvs_object.ac = transcript_ac - hgvs_object.type = 'c' - edit = str(hgvs_object.posedit.edit) - edit = edit.upper() - # lowercase the supported variant types - edit = edit.replace('DEL', 'del') - edit = edit.replace('INS', 'ins') - edit = edit.replace('INV', 'inv') - edit = edit.replace('DUP', 'dup') - # edit = edit.replace('CON', 'con') - # edit = edit.replace('TRA', 'tra') - edit = edit.replace('U', 'T') - hgvs_object.posedit.edit = edit - return hgvs_object - - -""" -Convert c. into r. -""" - - -def hgvs_c_to_r(hgvs_object): - hgvs_object.type = 'r' - edit = str(hgvs_object.posedit.edit) - edit = edit.lower() - edit = edit.replace('t', 'u') - hgvs_object.posedit.edit = edit - return hgvs_object - - -""" -Input c. r. n. variant string -Use uta.py (hdp) to return the identity information for the transcript variant -see hgvs.dataproviders.uta.py for details - -MARKED FOR REMOVAL -""" - -# def tx_identity_info(variant, hdp): -# # Set regular expressions for if statements -# pat_c = re.compile(":c\.") # Pattern looks for :c. Note (gene) has been removed -# pat_n = re.compile(":n\.") # Pattern looks for :c. Note (gene) has been removed -# pat_r = re.compile(":r\.") # Pattern looks for :c. Note (gene) has been removed -# -# # If the :c. pattern is present in the input variant -# if pat_c.search(variant): -# # Remove all text to the right and including pat_c -# tx_ac = variant[:variant.index(':c.') + len(':c.')] -# tx_ac = pat_c.sub('', tx_ac) -# # Interface with the UTA database via get_tx_identity in uta.py -# tx_id_info = hdp.get_tx_identity_info(tx_ac) -# # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list -# return tx_id_info -# -# # If the :n. pattern is present in the input variant -# if pat_n.search(variant): -# # Remove all text to the right and including pat_c -# tx_ac = variant[:variant.index(':n.') + len(':n.')] -# tx_ac = pat_n.sub('', tx_ac) -# # Interface with the UTA database via get_tx_identity in uta.py -# tx_id_info = hdp.get_tx_identity_info(tx_ac) -# # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list -# return tx_id_info -# -# # If the :r. pattern is present in the input variant -# if pat_r.search(variant): -# # Remove all text to the right and including pat_c -# tx_ac = variant[:variant.index(':r.') + len(':r.')] -# tx_ac = pat_r.sub('', tx_ac) -# # Interface with the UTA database via get_tx_identity in uta.py -# tx_id_info = hdp.get_tx_identity_info(tx_ac) -# # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list -# return tx_id_info - - -""" -Input c. r. nd accession string -Use uta.py (hdp) to return the identity information for the transcript variant -see hgvs.dataproviders.uta.py for details - -MARKED FOR REMOVAL -""" - -# def tx_id_info(alt_ac, hdp): -# tx_id_info = hdp.get_tx_identity_info(alt_ac) -# # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list -# return tx_id_info - - -""" -Use uta.py (hdp) to return the transcript information for a specified gene (HGNC SYMBOL) -see hgvs.dataproviders.uta.py for details - - -marked for removal -""" - - -def tx_for_gene(hgnc, hdp): - # Interface with the UTA database via get_tx_for_gene in uta.py - tx_for_gene = hdp.get_tx_for_gene(hgnc) - return tx_for_gene - - -""" -Extract RefSeqGene Accession from transcript information -see hgvs.dataproviders.uta.py for details -""" - - -def ng_extract(tx_for_gene): - # Set regular expressions for if statements - pat_NG = re.compile("^NG_") # Pattern looks for NG_ at beginning of a string - # For each list in the list of lists tx_for_gene - for list in tx_for_gene: - # If the pattern NG_ is found in element 4 - if pat_NG.search(list[4]): - # The gene accession is set to list element 4 - gene_ac = list[4] - return gene_ac - - -""" -marked for removal -""" -# def int_start(var_g): -# start = var_g.posedit.pos.start -# # Stringify to get start co-ords -# start = str(start) -# # Make into an integer -# int_start = int(start) -# return int_start - -""" -marked for removal -""" -# def int_end(var_g): -# end = var_g.posedit.pos.end -# # Stringify to get start co-ords -# end = str(end) -# # Make into an integer -# int_end = int(end) -# return int_end - -""" -Returns exon information for a given transcript -e.g. how the exons align to the genomic reference -see hgvs.dataproviders.uta.py for details -""" - - -def tx_exons(tx_ac, alt_ac, alt_aln_method, hdp): - # Interface with the UTA database via get_tx_exons in uta.py - try: - tx_exons = hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) - except hgvs.exceptions.HGVSError as e: - tx_exons = 'hgvs Exception: ' + str(e) - return tx_exons - try: - tx_exons[0]['alt_strand'] - except TypeError: - tx_exons = 'error' - return tx_exons - # If on the reverse strand, reverse the order of elements - if tx_exons[0]['alt_strand'] == -1: - tx_exons = tx_exons[::-1] - return tx_exons - else: - return tx_exons - - -""" -Automatically maps genomic positions onto all overlapping transcripts -""" - - -def relevant_transcripts(hgvs_genomic, evm, hdp, alt_aln_method, reverse_normalizer): - reverse_hn = reverse_normalizer - # Pass relevant transcripts for the input variant to rts - # Note, the evm method misses one end, the hdp. method misses the other. Combine both - rts_list = hdp.get_tx_for_region(hgvs_genomic.ac, alt_aln_method, hgvs_genomic.posedit.pos.start.base - 1, - hgvs_genomic.posedit.pos.end.base - 1) - rts_dict = {} - for tx_dat in rts_list: - rts_dict[tx_dat[0]] = True - rts_list_2 = evm.relevant_transcripts(hgvs_genomic) - for tx_dat_2 in rts_list_2: - rts_dict[tx_dat_2] = True - rts = rts_dict.keys() - - # Project genomic variants to new transcripts - # and populate a code_var list - ############################################# - # Open a list to store relevant transcripts - code_var = [] - # Populate transcripts - The keys become the list elements from rel_trs - for x in rts: - y = x.rstrip() # Chomp any whitespace from the right of x ($_) - Assign to y - # Easy variant mapper used to map the input variant to the relevant transcripts - # Check for coding transcripts - try: - variant = evm.g_to_t(hgvs_genomic, y) - except hgvs.exceptions.HGVSError as e: - # Check for non-coding transcripts - try: - variant = evm.g_to_t(hgvs_genomic, y) - except hgvs.exceptions.HGVSError as e: - continue - except: - continue - - # Corrective Normalisation of intronic descriptions in the antisense oriemtation - pl = re.compile('\+') - mi = re.compile('\-') - ast = re.compile('\*') - if pl.search(str(variant)) or mi.search(str(variant)) or ast.search(str(variant)): - tx_ac = variant.ac - alt_ac = hgvs_genomic.ac - - # Interface with the UTA database via get_tx_exons in uta.py - try: - tx_exons = hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) - except hgvs.exceptions.HGVSError as e: - e - tx_exons = 'hgvs Exception: ' + str(e) - return tx_exons - try: - completion = tx_exons[0]['alt_strand'] - except TypeError: - tx_exons = 'error' - return tx_exons - # If on the reverse strand, reverse the order of elements - if tx_exons[0]['alt_strand'] == -1: - tx_exons = tx_exons[::-1] - else: - pass - - # Gene orientation - if tx_exons[0]['alt_strand'] == -1: - antisense = 'true' - else: - antisense = 'false' - - # Pass if antisense = 'false' - if antisense == 'false': - pass - else: - # Reverse normalize hgvs_genomic - rev_hgvs_genomic = reverse_hn.normalize(hgvs_genomic) - # map back to coding - variant = evm.g_to_t(rev_hgvs_genomic, tx_ac) - code_var.append(str(variant)) - return code_var - - -""" -Take HGVS string, parse into hgvs object and validate -""" - - -def validate(input, hp, vr): - hgvs_input = hp.parse_hgvs_variant(input) - g = re.compile(":g.") - p = re.compile(":p.") - if p.search(input): - if hasattr(hgvs_input.posedit.pos.start, 'offset'): - pass - else: - hgvs_input.posedit.pos.start.offset = 0 - if hasattr(hgvs_input.posedit.pos.end, 'offset'): - pass - else: - hgvs_input.posedit.pos.end.offset = 0 - if hasattr(hgvs_input.posedit.pos.start, 'datum'): - pass - else: - hgvs_input.posedit.pos.start.datum = 0 - if hasattr(hgvs_input.posedit.pos.end, 'datum'): - pass - else: - hgvs_input.posedit.pos.end.datum = 0 - if hasattr(hgvs_input.posedit.edit, 'ref_n'): - pass - else: - hgvs_input.posedit.edit.ref_n = hgvs_input.posedit.pos.end.base - hgvs_input.posedit.pos.start.base + 1 - - try: - vr.validate(hgvs_input) - except hgvs.exceptions.HGVSError as e: - - error = e - return error - - else: - error = 'false' - return error - - -""" -marked for removal -""" -# def sequence_extractor(ac, hdp): -# ac_seq = hdp.get_tx_seq(ac) -# return ac_seq - -""" -marked for removal -""" -# def ref_replace(e, hgvs_variant): -# error = str(e) -# match = re.findall('\(([GATC]+)\)', error) -# new_ref = match[1] -# hgvs_variant.posedit.edit.ref = new_ref -# return hgvs_variant - -""" -Search HGNC rest -""" - - -def hgnc_rest(path): - data = { - 'record': '', - 'error': 'false' - } - # HGNC server - headers = { - 'Accept': 'application/json', - } - uri = 'http://rest.genenames.org' - target = urlparse(uri + path) - method = 'GET' - body = '' - h = http.Http() - # collect the response - response, content = h.request( - target.geturl(), - method, - body, - headers) - if response['status'] == '200': - # assume that content is a json reply - # parse content with the json module - data['record'] = json.loads(content) - else: - data['error'] = "Unable to contact the HGNC database: Please try again later" - return data - - -""" -Search Entrez databases with efetch and SeqIO -""" - - -def entrez_efetch(db, id, rettype, retmode): - # IMPORT Bio modules - # from Bio import Entrez - Entrez.email = ENTREZ_ID - # from Bio import SeqIO - handle = Entrez.efetch(db=db, id=id, rettype=rettype, retmode=retmode) - # Get record - record = SeqIO.read(handle, "gb") - # Place into text - # text = handle.read() - handle.close() - return record - - -""" -search Entrez databases with efetch and read -""" - - -def entrez_read(db, id, retmode): - # IMPORT Bio modules - # from Bio import Entrez - Entrez.email = ENTREZ_ID - # from Bio import SeqIO - handle = Entrez.efetch(db=db, id=id, retmode=retmode) - # Get record - record = Entrez.read(handle) - # Place into text - # text = handle.read() - handle.close() - return record - - -""" -Simple reverse complement function for nucleotide sequences -""" - - -def revcomp(bases): - l2 = [] - l = list(bases) - element = 0 - for base in l: - element = element + 1 - if base == 'G': - l2.append('C') - if base == 'C': - l2.append('G') - if base == 'A': - l2.append('T') - if base == 'T': - l2.append('A') - revcomp = ''.join(l2) - revcomp = revcomp[::-1] - return revcomp - - -""" -Function designed to merge multiple HGVS variants (hgvs objects) into a single delins -using 3 prime normalization -""" - - -def merge_hgvs_3pr(hgvs_variant_list, hp, vr, hn, vm, sf): - # Ensure c. is mapped to the - h_list = [] - - # Sanity check and format the submitted variants - for hgvs_v in hgvs_variant_list: - # For testing include parser - try: - hgvs_v = hp.parse_hgvs_variant(hgvs_v) - except Exception as e: - print e - pass - - # Validate - vr.validate(hgvs_v) # Let hgvs errors deal with invalid variants and not hgvs objects - if hgvs_v.type == 'c': - try: - hgvs_v = vm.c_to_n(hgvs_v) - h_list.append(hgvs_v) - except: - raise mergeHGVSerror("Unable to map from c. position to absolute position") - elif hgvs_v.type == 'g': - h_list.append(hgvs_v) - if h_list != []: - hgvs_variant_list = copy.deepcopy(h_list) - - # Define accession and start/end positions - accession = None - merge_start_pos = None - merge_end_pos = None - type = None - full_list = [] - - # Loop through the submitted variants and gather the required info - for hgvs_v in hgvs_variant_list: - # No intronic positions - try: - if hgvs_v.posedit.pos.start.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") - if hgvs_v.posedit.pos.end.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") - except AttributeError: - pass - - # Normalize the variant (allow cross intron) which also adds the reference sequence (?) - hgvs_v = hn.normalize(hgvs_v) - - # Set the accession and ensure that multiple reference sequences have not been queried - if accession is None: - accession = hgvs_v.ac - type = hgvs_v.type - else: - if hgvs_v.ac != accession: - raise mergeHGVSerror("More than one reference sequence submitted") - else: - pass - - # Set initial start and end positions - if merge_start_pos is None: - merge_start_pos = hgvs_v.posedit.pos.start.base - merge_end_pos = hgvs_v.posedit.pos.end.base - # Append to the final list of variants - full_list.append(hgvs_v) - continue - # Ensure variants are in the correct order and not overlapping - else: - # ! hgvs_v.posedit.pos.start.base !> - if hgvs_v.posedit.pos.start.base <= merge_end_pos: - raise mergeHGVSerror("Submitted variants are out of order or their ranges overlap") - else: - # Create a fake variant to handle the missing sequence - ins_seq = sf.fetch_seq(hgvs_v.ac, merge_end_pos, hgvs_v.posedit.pos.start.base - 1) - gapping = hgvs_v.ac + ':' + hgvs_v.type + '.' + str(merge_end_pos + 1) + '_' + str( - hgvs_v.posedit.pos.start.base - 1) + 'delins' + ins_seq - hgvs_gapping = hp.parse_hgvs_variant(gapping) - full_list.append(hgvs_gapping) - # update end_pos - merge_end_pos = hgvs_v.posedit.pos.end.base - # Append to the final list of variants - full_list.append(hgvs_v) - - # Generate the alt sequence - alt_sequence = '' - for hgvs_v in full_list: - ref_alt = hgvs2vcf.hgvs_ref_alt(hgvs_v, sf) - alt_sequence = alt_sequence + ref_alt['alt'] - - # Fetch the reference sequence and copy it for the basis of the alt sequence - reference_sequence = sf.fetch_seq(accession, merge_start_pos - 1, merge_end_pos) - # Generate an hgvs_delins - if alt_sequence == '': - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( - merge_end_pos) + 'del' + reference_sequence - else: - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( - merge_end_pos) + 'del' + reference_sequence + 'ins' + alt_sequence - hgvs_delins = hp.parse_hgvs_variant(delins) - try: - hgvs_delins = vm.n_to_c(hgvs_delins) - except: - pass - # Normalize (allow variants crossing into different exons) - try: - hgvs_delins = hn.normalize(hgvs_delins) - except HGVSUnsupportedOperationError: - pass - return hgvs_delins - - -""" -Function designed to merge multiple HGVS variants (hgvs objects) into a single delins -using 5 prime normalization -""" - - -def merge_hgvs_5pr(hgvs_variant_list, hp, vr, reverse_normalizer, vm, sf): - reverse_hn = reverse_normalizer - - # Ensure c. is mapped to the - h_list = [] - - # Sanity check and format the submitted variants - for hgvs_v in hgvs_variant_list: - # For testing include parser - try: - hgvs_v = hp.parse_hgvs_variant(hgvs_v) - except: - pass - - # Validate - vr.validate(hgvs_v) # Let hgvs errors deal with invalid variants and not hgvs objects - if hgvs_v.type == 'c': - try: - hgvs_v = vm.c_to_n(hgvs_v) - h_list.append(hgvs_v) - except: - raise mergeHGVSerror("Unable to map from c. position to absolute position") - if h_list != []: - hgvs_variant_list = copy.deepcopy(h_list) - - # Define accession and start/end positions - accession = None - merge_start_pos = None - merge_end_pos = None - type = None - full_list = [] - - # Loop through the submitted variants and gather the required info - for hgvs_v in hgvs_variant_list: - try: - # No intronic positions - if hgvs_v.posedit.pos.start.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") - if hgvs_v.posedit.pos.end.offset != 0: - raise mergeHGVSerror("Base-offset position submitted") - except AttributeError: - pass - - # Normalize the variant (allow cross intron) which also adds the reference sequence (?) - hgvs_v = reverse_hn.normalize(hgvs_v) - - # Set the accession and ensure that multiple reference sequences have not been queried - if accession is None: - accession = hgvs_v.ac - type = hgvs_v.type - else: - if hgvs_v.ac != accession: - raise mergeHGVSerror("More than one reference sequence submitted") - else: - pass - - # Set initial start and end positions - if merge_start_pos is None: - merge_start_pos = hgvs_v.posedit.pos.start.base - merge_end_pos = hgvs_v.posedit.pos.end.base - # Append to the final list of variants - full_list.append(hgvs_v) - continue - # Ensure variants are in the correct order and not overlapping - else: - # ! hgvs_v.posedit.pos.start.base !> - if hgvs_v.posedit.pos.start.base <= merge_end_pos: - raise mergeHGVSerror("Submitted variants are out of order or their ranges overlap") - else: - # Create a fake variant to handle the missing sequence - ins_seq = sf.fetch_seq(hgvs_v.ac, merge_end_pos, hgvs_v.posedit.pos.start.base - 1) - gapping = hgvs_v.ac + ':' + hgvs_v.type + '.' + str(merge_end_pos + 1) + '_' + str( - hgvs_v.posedit.pos.start.base - 1) + 'delins' + ins_seq - hgvs_gapping = hp.parse_hgvs_variant(gapping) - full_list.append(hgvs_gapping) - # update end_pos - merge_end_pos = hgvs_v.posedit.pos.end.base - # Append to the final list of variants - full_list.append(hgvs_v) - - # Generate the alt sequence - alt_sequence = '' - for hgvs_v in full_list: - ref_alt = hgvs2vcf.hgvs_ref_alt(hgvs_v, sf) - alt_sequence = alt_sequence + ref_alt['alt'] - - # Fetch the reference sequence and copy it for the basis of the alt sequence - reference_sequence = sf.fetch_seq(accession, merge_start_pos - 1, merge_end_pos) - - # Generate an hgvs_delins - if alt_sequence == '': - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( - merge_end_pos) + 'del' + reference_sequence - else: - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( - merge_end_pos) + 'del' + reference_sequence + 'ins' + alt_sequence - hgvs_delins = hp.parse_hgvs_variant(delins) - try: - hgvs_delins = vm.n_to_c(hgvs_delins) - except: - pass - # Normalize (allow variants crossing into different exons) - try: - hgvs_delins = reverse_hn.normalize(hgvs_delins) - except HGVSUnsupportedOperationError: - pass - return hgvs_delins - - -""" -Function designed to merge multiple pseudo VCF variants (strings) into a single HGVS delins -using 5 prime normalization then return a 3 prime normalized final HGVS object -""" - - -def merge_pseudo_vcf(vcf_list, genome_build, reverse_normalizer, hn, hp): - hgvs_list = [] - # Convert pseudo_vcf list into a HGVS list - normalization_direction = 5 - for call in vcf_list: - hgvs = pseudo_vcf2hgvs.pvcf_to_hgvs(call, genome_build, normalization_direction, reverse_normalizer, hn, hp) - hgvs_list.append(hgvs) - # Merge - hgvs_delins = merge_hgvs_5pr(hgvs_list) - # normalize 3 prime - hgvs_delins = hn.normalize(hgvs_delins) - # return - return hgvs_delins - - -""" -HGVS allele handling function which takes a single HGVS allele description and -separates each allele into a list of HGVS variants -""" - - -def hgvs_alleles(variant_description, hp, vr, hn, vm, sf): - try: - # Split up the description - accession, remainder = variant_description.split(':') - # Branch - if re.search('[gcn]\.\d+\[', remainder): - # NM_004006.2:c.2376[G>C];[(G>C)] - # if re.search('\(', remainder): - # raise alleleVariantError('Unsupported format ' + remainder) - # NM_004006.2:c.2376[G>C];[G>C] - type, remainder = remainder.split('.') - pos = re.match('\d+', remainder) - pos = pos.group(0) - remainder = remainder.replace(pos, '') - remainder = remainder[1:-1] - alleles = remainder.split('];[') - my_alleles = [] - for posedit in alleles: - if re.search('\(', posedit): - # NM_004006.2:c.2376[G>C];[(G>C)] - continue - posedit_list = [posedit] - current_allele = [] - for pe in posedit_list: - vrt = accession + ':' + type + '.' + str(pos) + pe - current_allele.append(vrt) - my_alleles.append(current_allele) - else: - type, remainder = remainder.split('.') - if re.search('\(;\)', remainder) and re.search('\];', remainder): - # NM_004006.2:c.[296T>G];[476T>C](;)1083A>C(;)1406del - pre_alleles = remainder.split('(;)') - pre_merges = [] - alleles = [] - for allele in pre_alleles: - if re.match('\[', allele): - pre_merges.append(allele) - else: - alleles.append(allele) - # Extract descriptions - my_alleles = [] - # First alleles - for posedits in alleles: - posedit_list = posedits.split(';') - current_allele = [] - for pe in posedit_list: - vrt = accession + ':' + type + '.' + pe - current_allele.append(vrt) - my_alleles.append(current_allele) - - # Then Merges - alleles = [] - remainder = ';'.join(pre_merges) - remainder = remainder[1:-1] # removes the first [ and the last ] - alleles = remainder.split('];[') - # now separate out the variants in each allele§ - for posedits in alleles: - posedit_list = posedits.split(';') - current_allele = [] - for pe in posedit_list: - vrt = accession + ':' + type + '.' + pe - current_allele.append(vrt) - my_alleles.append(current_allele) - # Now merge the alleles into a single variant - merged_alleles = [] - for each_allele in my_alleles: - if re.search('\?', str(each_allele)): - # NM_004006.2:c.[2376G>C];[?] - continue - merge = [] - allele = str(merge_hgvs_3pr(each_allele, hp, vr, hn, vm, sf)) - merge.append(allele) - merged_alleles.append(merge) - my_alleles = merged_alleles - - elif re.search('\(;\)', remainder): - # If statement for uncertainties - # NM_004006.2:c.[296T>G;476C>T];[476C>T](;)1083A>C - if re.search('\[', remainder): - raise alleleVariantError('Unsupported format ' + type + '.' + remainder) - # NM_004006.2:c.2376G>C(;)3103del - # NM_000548.3:c.3623_3647del(;)3745_3756dup - alleles = remainder.split('(;)') - # now separate out the variants in each allele§ - my_alleles = [] - for posedits in alleles: - posedit_list = posedits.split(';') - current_allele = [] - for pe in posedit_list: - vrt = accession + ':' + type + '.' + pe - current_allele.append(vrt) - my_alleles.append(current_allele) - else: - # If statement for uncertainties - if re.search('\(', remainder): - raise alleleVariantError('Unsupported format ' + type + '.' + remainder) - # NM_004006.2:c.[2376G>C];[3103del] - # NM_004006.2:c.[2376G>C];[3103del] - # NM_004006.2:c.[296T>G;476C>T;1083A>C];[296T>G;1083A>C] - # NM_000548.3:c.[4358_4359del;4361_4372del] - remainder = remainder[1:-1] # removes the first [ and the last ] - alleles = remainder.split('];[') - # now separate out the variants in each allele§ - my_alleles = [] - for posedits in alleles: - posedit_list = posedits.split(';') - current_allele = [] - for pe in posedit_list: - vrt = accession + ':' + type + '.' + pe - current_allele.append(vrt) - my_alleles.append(current_allele) - # Now merge the alleles into a single variant - merged_alleles = [] - - for each_allele in my_alleles: - if re.search('\?', str(each_allele)): - # NM_004006.2:c.[2376G>C];[?] - continue - merge = [] - allele = str(merge_hgvs_3pr(each_allele, hp, vr, hn, vm, sf)) - merge.append(allele) - merged_alleles.append(merge) - my_alleles = merged_alleles - - # Extract alleles into strings - allele_strings = [] - for alleles_l in my_alleles: - for allele in alleles_l: - allele_strings.append(allele) - my_alleles = allele_strings - - # return - return my_alleles - except Exception as e: - import traceback - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - raise alleleVariantError(str(e)) - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# diff --git a/VariantValidator/variantanalyser/g_to_g.py b/VariantValidator/variantanalyser/g_to_g.py deleted file mode 100644 index 4f2f8189..00000000 --- a/VariantValidator/variantanalyser/g_to_g.py +++ /dev/null @@ -1,315 +0,0 @@ -# -*- coding: utf-8 -*- -""" -g_to_g.py - -1. chr_to_rsg -maps genomic variants directly to RefSeqGene variants for RefSeqGenes that perfectly align -with the genome - -2. rsg_to_chr -maps RefSeqGene variants directly to genomic variants for RefSeqGenes that perfectly align -with the genome -""" - -import os -import re -import functions -import hgvs -import hgvs.parser -import hgvs.normalizer -import hgvs.validator -import hgvs.exceptions -import dbControls.data as database_data - -hp = hgvs.parser.Parser() -alt_aln_method = 'splign' - -# From the hgvs parser import, create an instance of hgvs.parser.Parser -hp = hgvs.parser.Parser() - -# Set file root -# Set up os paths data and log folders -FILE_ROOT = os.path.dirname(os.path.abspath(__file__)) - - -# Covert chromosomal HGVS description to RefSeqGene -def chr_to_rsg(hgvs_genomic, hn, vr): - # print 'chr_to_rsg triggered' - hgvs_genomic = hn.normalize(hgvs_genomic) - # split the description - # Accessions - chr_ac = hgvs_genomic.ac - # Positions - chr_start_pos = int(hgvs_genomic.posedit.pos.start.base) - chr_end_pos = int(hgvs_genomic.posedit.pos.end.base) - # edit - chr_edit = hgvs_genomic.posedit.edit - - # Pre set variable, note there could be several - rsg_data_set = [] - - # Recover table from MySql - all_info = database_data.get_g_to_g_info() - for line in all_info: - # Logic to identify the correct RefSeqGene - rsg_data = {} - if chr_ac == line[1] and chr_start_pos >= int(line[2]) and chr_end_pos <= int(line[3]): - # query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol FROM refSeqGene_loci" - # (u'NG_034189.1', u'NC_000004.12', 190173122, 190177845, u'+', u'DUX4L1') - # Set the values of the data dictionary - rsg_data['rsg_ac'] = line[0] - rsg_data['chr_ac'] = line[1] - rsg_data['rsg_start'] = line[2] - rsg_data['rsg_end'] = line[3] - rsg_data['ori'] = line[4] - rsg_data['gene'] = line[5] - rsg_data_set.append(rsg_data) - else: - continue - - # Compile descriptions and validate - descriptions = [] - for rsg_data in rsg_data_set: - rsg_ac = rsg_data['rsg_ac'] - rsg_start = rsg_data['rsg_start'] - rsg_end = rsg_data['rsg_end'] - ori = rsg_data['ori'] - gene = rsg_data['gene'] - # String the description - if ori == '+': - rsg_description = rsg_ac + ':g.' + str(chr_start_pos - int(rsg_start) + 1) + '_' + str( - chr_end_pos - int(rsg_start) + 1) + str(chr_edit) - hgvs_refseqgene = hp.parse_hgvs_variant(rsg_description) - try: - hgvs_refseqgene = hn.normalize(hgvs_refseqgene) - except: - error = 'Not in SeqRepo' - data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': str(error)} - descriptions.append(data) - continue - try: - vr.validate(hgvs_refseqgene) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('does not agree with reference sequence', error): - match = re.findall('\(([GATC]+)\)', error) - new_ref = match[1] - hgvs_refseqgene.posedit.edit.ref = new_ref - error = 'true' - else: - pass - data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': str(error)} - else: - data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': 'true'} - descriptions.append(data) - if ori == '-': - # Reverse complement of bases may be required. Let normalizer do the lifting for strings of bases - # Look for scenarios with RC needed bases and extract the bases from the edit - if re.search(r"((del[GATCUgatcu]+))", str(chr_edit)): - bases = re.search(r"((del[GATCUgatcu]+))", str(chr_edit)) - bases = bases.group(1) - chr_edit = 'del' + str(chr_edit).replace(bases, '') - if re.search(r"((ins[GATCUgatcu]+))", str(chr_edit)): - bases = re.search(r"((ins[GATCUgatcu]+))", str(chr_edit)) - bases = bases.group(1) - ins_revcomp = functions.revcomp(bases) - chr_edit = str(chr_edit).replace(bases, '') + 'ins' + ins_revcomp - if re.search(r"((dup[GATCUgatcu]+))", str(chr_edit)): - bases = re.search(r"((dup[GATCUgatcu]+))", str(chr_edit)) - bases = bases.group(1) - chr_edit = 'dup' + str(chr_edit).replace(bases, '') - if re.search(r"((inv[GATCUgatcu]+))", str(chr_edit)): - bases = re.search(r"((inv[GATCUgatcu]+))", str(chr_edit)) - bases = bases.group(1) - chr_edit = 'inv' + str(chr_edit).replace(bases, '') - if re.search('>', str(chr_edit)) or re.search('=', str(chr_edit)): - chr_edit = str(chr_edit) - chr_edit = chr_edit.replace('A>', 't>') - chr_edit = chr_edit.replace('T>', 'a>') - chr_edit = chr_edit.replace('G>', 'c>') - chr_edit = chr_edit.replace('C>', 'g>') - chr_edit = chr_edit.replace('>A', '>t') - chr_edit = chr_edit.replace('>T', '>a') - chr_edit = chr_edit.replace('>G', '>c') - chr_edit = chr_edit.replace('>C', '>g') - chr_edit = chr_edit.replace('C=', 'g=') - chr_edit = chr_edit.replace('G=', 'c=') - chr_edit = chr_edit.replace('A=', 't=') - chr_edit = chr_edit.replace('T=', 'a=') - chr_edit = chr_edit.upper() - - rsg_description = rsg_ac + ':g.' + str( - (int(rsg_end) - int(rsg_start)) - (chr_end_pos - int(rsg_start)) + 1) + '_' + str( - (int(rsg_end) - int(rsg_start)) - (chr_start_pos - int(rsg_start)) + 1) + str(chr_edit) - hgvs_refseqgene = hp.parse_hgvs_variant(rsg_description) - try: - hgvs_refseqgene = hn.normalize(hgvs_refseqgene) - except: - error = 'Not in SeqRepo' - data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': str(error)} - descriptions.append(data) - continue - try: - vr.validate(hgvs_refseqgene) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('does not agree with reference sequence', error): - match = re.findall('\(([GATC]+)\)', error) - new_ref = match[1] - hgvs_refseqgene.posedit.edit.ref = new_ref - error = 'true' - else: - pass - data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': str(error)} - else: - data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': 'true'} - descriptions.append(data) - - # Return the required data. This is a dictionary containing the rsg description, validation status and gene ID - return descriptions - - -# Covert RefSeqGene HGVS description to Chromosomal -def rsg_to_chr(hgvs_refseqgene, primary_assembly, hn, vr): - # normalize - try: - hgvs_refseqgene = hn.normalize(hgvs_refseqgene) - except: - pass - # split the description - # Accessions - rsg_ac = hgvs_refseqgene.ac - # Positions - rsg_start_pos = int(hgvs_refseqgene.posedit.pos.start.base) - rsg_end_pos = int(hgvs_refseqgene.posedit.pos.end.base) - # edit - rsg_edit = hgvs_refseqgene.posedit.edit - - # Pre set variable, note there could be several - chr_data_set = [] - - # Recover table from MySql - all_info = database_data.get_g_to_g_info() - for line in all_info: - # Logic to identify the correct RefSeqGene - chr_data = {} - if rsg_ac == line[0] and primary_assembly == line[6]: - # query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol FROM refSeqGene_loci" - # (u'NG_034189.1', u'NC_000004.12', 190173122, 190177845, u'+', u'DUX4L1') - # Set the values of the data dictionary - chr_data['rsg_ac'] = line[0] - chr_data['chr_ac'] = line[1] - chr_data['rsg_start'] = line[2] - chr_data['rsg_end'] = line[3] - chr_data['ori'] = line[4] - chr_data['gene'] = line[5] - chr_data_set.append(chr_data) - else: - continue - - # Compile descriptions and validate - descriptions = [] - for chr_data in chr_data_set: - chr_ac = chr_data['chr_ac'] - rsg_ac = chr_data['rsg_ac'] - chr_start = int(chr_data['rsg_start']) - chr_end = int(chr_data['rsg_end']) - ori = chr_data['ori'] - gene = chr_data['gene'] - # String the description - if ori == '+': - chr_description = chr_ac + ':g.' + str(chr_start + rsg_start_pos - 1) + '_' + str( - chr_start + rsg_end_pos - 1) + str(rsg_edit) - hgvs_genomic = hp.parse_hgvs_variant(chr_description) - hgvs_genomic = hn.normalize(hgvs_genomic) - try: - vr.validate(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('does not agree with reference sequence', error): - match = re.findall('\(([GATC]+)\)', error) - new_ref = match[1] - hgvs_genomic.posedit.edit.ref = new_ref - error = 'true' - else: - pass - # # print str(e) + '\n3.' - data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': str(error)} - else: - data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': 'true'} - descriptions.append(data) - if ori == '-': - # Reverse complement of bases may be required. Let normalizer do the lifting for strings of bases - # Look for scenarios with RC needed bases and extract the bases from the edit - if re.search(r"((del[GATCUgatcu]+))", str(rsg_edit)): - bases = re.search(r"((del[GATCUgatcu]+))", str(rsg_edit)) - bases = bases.group(1) - rsg_edit = 'del' + str(rsg_edit).replace(bases, '') - if re.search(r"((ins[GATCUgatcu]+))", str(rsg_edit)): - bases = re.search(r"((ins[GATCUgatcu]+))", str(rsg_edit)) - bases = bases.group(1) - ins_revcomp = functions.revcomp(bases) - rsg_edit = str(rsg_edit).replace(bases, '') + 'ins' + ins_revcomp - if re.search(r"((dup[GATCUgatcu]+))", str(rsg_edit)): - bases = re.search(r"((dup[GATCUgatcu]+))", str(rsg_edit)) - bases = bases.group(1) - rsg_edit = 'dup' + str(rsg_edit).replace(bases, '') - if re.search(r"((inv[GATCUgatcu]+))", str(rsg_edit)): - bases = re.search(r"((inv[GATCUgatcu]+))", str(rsg_edit)) - bases = bases.group(1) - rsg_edit = 'inv' + str(rsg_edit).replace(bases, '') - if re.search('>', str(rsg_edit)) or re.search('=', str(rsg_edit)): - rsg_edit = str(rsg_edit) - rsg_edit = rsg_edit.replace('A>', 't>') - rsg_edit = rsg_edit.replace('T>', 'a>') - rsg_edit = rsg_edit.replace('G>', 'c>') - rsg_edit = rsg_edit.replace('C>', 'g>') - rsg_edit = rsg_edit.replace('>A', '>t') - rsg_edit = rsg_edit.replace('>T', '>a') - rsg_edit = rsg_edit.replace('>G', '>c') - rsg_edit = rsg_edit.replace('>C', '>g') - rsg_edit = rsg_edit.replace('C=', 'g=') - rsg_edit = rsg_edit.replace('G=', 'c=') - rsg_edit = rsg_edit.replace('A=', 't=') - rsg_edit = rsg_edit.replace('T=', 'a=') - rsg_edit = rsg_edit.upper() - - chr_description = chr_ac + ':g.' + str( - int(chr_start) + (int(chr_end) - int(chr_start)) - rsg_end_pos + 1) + '_' + str( - int(chr_start) + (int(chr_end) - int(chr_start)) - rsg_start_pos + 1) + str(rsg_edit) - - hgvs_genomic = hp.parse_hgvs_variant(chr_description) - hgvs_genomic = hn.normalize(hgvs_genomic) - try: - vr.validate(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('does not agree with reference sequence', error): - match = re.findall('\(([GATC]+)\)', error) - new_ref = match[1] - hgvs_genomic.posedit.edit.ref = new_ref - error = 'true' - data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': str(error)} - else: - data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': 'true'} - descriptions.append(data) - - # Return the required data. This is a dictionary containing the rsg description, validation status and gene ID - return descriptions - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/variantanalyser/gap_genes.py b/VariantValidator/variantanalyser/gap_genes.py deleted file mode 100644 index 2b9ea320..00000000 --- a/VariantValidator/variantanalyser/gap_genes.py +++ /dev/null @@ -1,581 +0,0 @@ -""" -Lists of genes for GRCh37 and GRCh38 which require a gap to be inserted into either the -transcript or the genome to maintain a perfect alignment -""" -def gap_black_list(symbol): - gapGene = { - "LPP": "", - "VPS13D": "", - "SSPO": "", - "HTT": "", - "PRKDC": "", - "RNA45SN4": "", - "RNA45SN1": "", - "RNA45SN2": "", - "RNA45SN3": "", - "ALMS1": "", - "ZNF141": "", - "PRLR": "", - "NBPF10": "", - "ACACA": "", - "ZMYM2": "", - "MIAT": "", - "WDFY4": "", - "CECR2": "", - "FAM30A": "", - "MYO15B": "", - "CELF2": "", - "JRK": "", - "PTEN": "", - "ZNF714": "", - "MGAT4C": "", - "SLITRK4": "", - "ZAN": "", - "COL19A1": "", - "CCDC144B": "", - "RAB11FIP4": "", - "ZNF516": "", - "ZNF518A": "", - "PROX1": "", - "HCG18": "", - "SON": "", - "ARMC9": "", - "CAMK1D": "", - "GRIP2": "", - "KLHL5": "", - "PPIP5K2": "", - "PKD1L2": "", - "SLC7A2": "", - "DGKK": "", - "IQSEC1": "", - "SYNM": "", - "SARM1": "", - "SMAD5": "", - "MAML3": "", - "CXorf40A": "", - "MAPT": "", - "ITIH5": "", - "NOTCH4": "", - "FER1L4": "", - "CNTNAP4": "", - "NLRC3": "", - "COL18A1": "", - "SLC6A6": "", - "DDX52": "", - "CDH4": "", - "SLC46A1": "", - "SLC35E2B": "", - "OCLN": "", - "DCAF7": "", - "SCAMP1": "", - "ATG13": "", - "SMAD3": "", - "DDX6": "", - "SLC25A53": "", - "ALG9": "", - "DCP1A": "", - "NCAM1": "", - "LINC00869": "", - "MYH7": "", - "DIXDC1": "", - "ZBTB4": "", - "RABEP1": "", - "PVR": "", - "POM121C": "", - "HOOK1": "", - "MAPK8IP2": "", - "ZNF280B": "", - "WASF2": "", - "PLEKHA2": "", - "PPP4R3B": "", - "FAM83H": "", - "SALL3": "", - "PHKG2": "", - "C18orf25": "", - "ZNF229": "", - "ZNF765-ZNF761": "", - "KANSL1": "", - "FAM102B": "", - "NOTCH2NL": "", - "YTHDF3": "", - "DPCR1": "", - "DACH1": "", - "PKD1L3": "", - "GRIA3": "", - "CYP1B1": "", - "LTBP4": "", - "SPON1": "", - "RNA28SN4": "", - "RNA28SN1": "", - "TRIL": "", - "RNA28SN3": "", - "RNA28SN2": "", - "XKR5": "", - "RBM8A": "", - "SALL2": "", - "JADE3": "", - "DHX57": "", - "PIGN": "", - "CPNE3": "", - "ANO1": "", - "NATD1": "", - "DKFZP434A062": "", - "TDRD9": "", - "BDNF": "", - "IVD": "", - "STIMATE": "", - "KCP": "", - "PRAG1": "", - "KLHL18": "", - "LYNX1": "", - "HYOU1": "", - "HLA-L": "", - "ATG9B": "", - "SLC6A14": "", - "PCSK6": "", - "MIR99AHG": "", - "TOX4": "", - "GABBR1": "", - "RABGEF1": "", - "PRR36": "", - "MAP3K14": "", - "PCDHB9": "", - "LOC102723753": "", - "MYO19": "", - "SRSF8": "", - "CTPS2": "", - "AHCYL1": "", - "UHRF1": "", - "MARCKS": "", - "ZMYM1": "", - "SENP3-EIF4A1": "", - "SEC14L2": "", - "RAPGEFL1": "", - "ZNF761": "", - "CNTROB": "", - "SSTR3": "", - "PAX2": "", - "GGA3": "", - "MCL1": "", - "EPS8": "", - "LINC02210": "", - "KRBA1": "", - "MSH5-SAPCD1": "", - "HLA-DPB1": "", - "PPP1R9B": "", - "OPLAH": "", - "UBXN4": "", - "ZNF2": "", - "EPHB6": "", - "LIX1L": "", - "RAPGEF4": "", - "MED22": "", - "POLR3C": "", - "DDR1": "", - "SIGLEC16": "", - "NEFL": "", - "ABCG4": "", - "BAG6": "", - "RECQL4": "", - "SPPL2B": "", - "RETREG3": "", - "FZD6": "", - "SCRT1": "", - "LSM14A": "", - "TAPBP": "", - "TWSG1": "", - "FRMD8": "", - "VPS26C": "", - "PNMA3": "", - "ZNF282": "", - "SP8": "", - "SRRM3": "", - "CCDC125": "", - "NPIPB3": "", - "FAM13C": "", - "GTF2IP1": "", - "ANKRD34A": "", - "PPP1R2": "", - "PHYHIPL": "", - "USH1G": "", - "LINC00461": "", - "ZNRD1ASP": "", - "TRIM10": "", - "SPIB": "", - "BCL6B": "", - "SCARF2": "", - "KIR3DX1": "", - "LOC400682": "", - "HLA-DOA": "", - "PLCD3": "", - "VPS11": "", - "FAM231D": "", - "TRIM52": "", - "ABCF1": "", - "ANP32E": "", - "COPG2IT1": "", - "TGIF2": "", - "LHX1": "", - "PIK3R6": "", - "APOL4": "", - "ZNF502": "", - "FGD5P1": "", - "LINC00624": "", - "ADRA2B": "", - "ZNF598": "", - "GNAZ": "", - "TMEM106A": "", - "SLC12A9": "", - "TCF19": "", - "CCDC3": "", - "EFHC2": "", - "KCNE1B": "", - "PBX2": "", - "PAMR1": "", - "GJA5": "", - "TYW1B": "", - "PLP1": "", - "ANKDD1A": "", - "GBE1": "", - "MAMDC2": "", - "PIGW": "", - "MOCOS": "", - "GRIPAP1": "", - "COL26A1": "", - "MAPT-IT1": "", - "SRRT": "", - "ZNF595": "", - "SEMA3B": "", - "C21orf58": "", - "RHBDF1": "", - "EGR2": "", - "ABRAXAS2": "", - "NPRL3": "", - "TXNIP": "", - "RYK": "", - "RXRB": "", - "LILRB2": "", - "SYT3": "", - "TRPV6": "", - "PARG": "", - "CSNK1G2": "", - "ARHGEF16": "", - "HSH2D": "", - "ALDH3B1": "", - "ZNF274": "", - "MUC13": "", - "LINC00842": "", - "AKT1": "", - "CHM": "", - "ZSCAN26": "", - "MAL2": "", - "PTH2R": "", - "GPANK1": "", - "LINC01623": "", - "CD86": "", - "RHBG": "", - "TMSB15B": "", - "ZCCHC3": "", - "TUBB": "", - "POLDIP2": "", - "PRMT3": "", - "PPT2-EGFL8": "", - "LINC02210-CRHR1": "", - "KIFC1": "", - "USP27X": "", - "HDGFL2": "", - "FOXI3": "", - "PAH": "", - "P3H3": "", - "CRHR1": "", - "LOC101927759": "", - "ARFRP1": "", - "C3orf38": "", - "DAXX": "", - "SLC37A4": "", - "IQCA1L": "", - "MMP28": "", - "LINC02197": "", - "NECAP1": "", - "CDSN": "", - "LOC440570": "", - "B3GNT6": "", - "AOAH": "", - "GAS2L1": "", - "MPIG6B": "", - "CDK11B": "", - "ASPN": "", - "HSPA1B": "", - "LOC100508631": "", - "MICB": "", - "LOC102724580": "", - "SENP3": "", - "RBM38": "", - "TMC4": "", - "LILRB5": "", - "C6orf47": "", - "RIOX1": "", - "BHLHE40-AS1": "", - "SRD5A2": "", - "TSEN34": "", - "EI24": "", - "PADI6": "", - "LINC00893": "", - "CYP2D7": "", - "LINC01622": "", - "LINC01879": "", - "REC8": "", - "UNC93B1": "", - "POU5F1": "", - "GPIHBP1": "", - "FOXD1": "", - "GPSM1": "", - "MICA": "", - "UGT2B15": "", - "KIZ": "", - "ARL17A": "", - "PRAMEF36P": "", - "HCG22": "", - "RNF39": "", - "BECN1": "", - "MOG": "", - "PROSER3": "", - "LINC01149": "", - "CYP21A2": "", - "PRAMEF18": "", - "TBC1D3G": "", - "NR2E3": "", - "NR1H2": "", - "VEGFC": "", - "TBC1D3F": "", - "C18orf65": "", - "HOXC11": "", - "TRY2P": "", - "LINC01138": "", - "LINC00243": "", - "HCG4": "", - "GBAP1": "", - "LYPD4": "", - "FAM226A": "", - "ZNF787": "", - "CYP11A1": "", - "EEF1A2": "", - "SLC38A5": "", - "MICB-DT": "", - "ZNF852": "", - "LOC441242": "", - "RNF115": "", - "SMA4": "", - "TAZ": "", - "LENG9": "", - "STRAP": "", - "CYP4F8": "", - "TSPAN10": "", - "KIR3DL1": "", - "HCP5B": "", - "MMP12": "", - "STAG3L2": "", - "GOLGA6L17P": "", - "ZBTB12": "", - "TREH": "", - "PMCHL2": "", - "LAGE3": "", - "ATRNL1": "", - "CEACAM20": "", - "ZG16": "", - "MIR3936HG": "", - "LOC102724562": "", - "INTS4P2": "", - "LINC00221": "", - "DHRS3": "", - "HCG27": "", - "CLTB": "", - "KLK6": "", - "HLA-H": "", - "SPANXA2-OT1": "", - "PRAMEF11": "", - "PPP1R11": "", - "NDUFA6-AS1": "", - "ECHDC3": "", - "HLA-DQB1": "", - "KIR2DS4": "", - "HLA-B": "", - "LOC102725121": "", - "CIB2": "", - "KIR2DL1": "", - "KIR2DL2": "", - "HLA-C": "", - "ABO": "", - "KRTAP10-7": "", - "HLA-G": "", - "CWC15": "", - "C17orf100": "", - "HLA-J": "", - "OR4K3": "", - "HLA-DQA1": "", - "LOC105379550": "", - "MRPS21": "", - "SIGLEC17P": "", - "LINC01115": "", - "NUDT18": "", - "ORAI1": "", - "PNLIPRP2": "", - "KLF14": "", - "SSX2B": "", - "CCL15-CCL14": "", - "UBXN8": "", - "IGFBP2": "", - "TMEM44-AS1": "", - "TEX13A": "", - "LCA10": "", - "SPANXN2": "", - "SYCE1": "", - "LILRA5": "", - "KRTAP5-4": "", - "FAM228B": "", - "OR12D1": "", - "SPC25": "", - "FCGR1CP": "", - "OR52E1": "", - "NOP16": "", - "EGFL8": "", - "PRAF2": "", - "LOC388282": "", - "CCNQ": "", - "VN1R3": "", - "HLA-V": "", - "SBK3": "", - "LOC100128594": "", - "KLRF1": "", - "EMG1": "", - "TARM1": "", - "UBE2NL": "", - "OR5AL1": "", - "TPSB2": "", - "PSORS1C2": "", - "HLA-DQA2": "", - "OR10AC1": "", - "OR2J1": "", - "OR10J4": "", - "CSNK2B": "", - "OR4Q2": "", - "LOC100507547": "", - "ZNF630-AS1": "", - "HLA-DMA": "", - "OR4E1": "", - "PRB3": "", - "CCL15": "", - "C8orf59": "", - "PSMB9": "", - "LINC01719": "", - "CT45A1": "", - "BST2": "", - "NCF4-AS1": "", - "FOLR3": "", - "KRTAP9-9": "", - "COPZ2": "", - "LYNX1-SLURP2": "", - "SAPCD1": "", - "PSORS1C1": "", - "ZNF793-AS1": "", - "ZNRD1": "", - "FRG1CP": "", - "LINC02362": "", - "KRTAP4-1": "", - "PICSAR": "", - "TWIST2": "", - "LINC01796": "", - "HCG25": "", - "KRTAP7-1": "", - "CRLF2": "", - "MDH2": "", - "HCG9": "", - "ATP5MC1": "", - "TTTY14": "", - "LOC100507384": "", - "PMS2P2": "", - "HCG23": "", - "LINC00226": "", - "RPP21": "", - "GPHB5": "", - "GAGE8": "", - "GAGE2E": "", - "LOC101928087": "", - "GAGE12B": "", - "GRIFIN": "", - "LOC102725193": "", - "HCG14": "", - "IFITM4P": "", - "SNORD48": "", - "MUC22": "", - "PTPRQ": "", - "HERC2": "", - "OTUD7A": "", - "LOC646214": "", - "TJP1": "", - "WDR81": "", - "KLF13": "", - "POLR2A": "", - "LOC100288637": "", - "GOLGA8N": "", - "GOLGA8J": "", - "GOLGA8K": "", - "GOLGA8R": "", - "MTMR10": "", - "SMIM10L1": "", - "KLLN": "", - "LINC02249": "", - "APBA2": "", - "CHRNA7": "", - "DBET": "", - "WNT3": "", - "GOLGA2P10": "", - "CHRFAM7A": "", - "RPH3AL": "", - "SORD2P": "", - "LINC00552": "", - "MPV17L": "", - "SLC22A18AS": "", - "C16orf45": "", - "GRK1": "", - "FRG2": "", - "LOC143666": "", - "FRG2EP": "", - "LOC105373100": "", - "GOLGA8Q": "", - "HERC2P7": "", - "SLC22A18": "", - "METRNL": "", - "BTNL2": "", - "ADAM18": "", - "PRSS22": "", - "C2orf27B": "", - "C2orf27A": "", - "LOC283710": "", - "LOC101928804": "", - "IFI27": "", - "ABCC6": "", - "LOC692247": "" - } - is_it_gapped = gapGene.get(symbol) - if is_it_gapped == '': - return True - else: - return False - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/variantanalyser/hgvs2vcf.py b/VariantValidator/variantanalyser/hgvs2vcf.py deleted file mode 100644 index 502982f3..00000000 --- a/VariantValidator/variantanalyser/hgvs2vcf.py +++ /dev/null @@ -1,833 +0,0 @@ -# -*- coding: utf-8 -*- -""" -hgvs2vcf.py -A variety of functions that convert parder hgvs objects into VCF component parts -Each function has a slightly difference emphasis -1. hgvs2vcf -Simple conversionwhich ensures identity is as 5 prime as possible by adding an extra 5 -prime base. Necessary for most gap handling situations -2. report_hgvs2vcf -Used to report the Most true representation of the VCF i.e. 5 prime normalized but no -additional bases added. NOTE: no gap handling capabilities -3. pos_lock_hgvs2vcf -No normalization at all. No additional bases added. Simply returns an in-situ VCF -4. hard_right_hgvs2vcf and hard_left_hgvs2vcf -Designed specifically for gap handling. -hard left pushes as 5 prime as possible and adds additional bases -hard right pushes as 3 prime as possible and adds additional bases -""" - -# Import modules -import re -import copy -import supported_chromosome_builds as supportedChromosomeBuilds - -# Import Biopython modules -from Bio.Seq import Seq - - -# Database connections and hgvs objects are now passed from VariantValidator.py - -def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): - hgvs_genomic_variant = hgvs_genomic - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # Chr - chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) - if chr is not None: - pass - else: - chr = reverse_normalized_hgvs_genomic.ac - - if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): - pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) - ref = reverse_normalized_hgvs_genomic.posedit.edit.ref - alt = reverse_normalized_hgvs_genomic.posedit.edit.ref - - # Insertions - elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( - reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - alt_start = start - 1 # - # Recover sequences - ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), alt_start, end - 1) - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - # Assemble - pos = start - ref = ref_seq - alt = ref_seq + ins_seq - - # Substitutions - elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): - ref = reverse_normalized_hgvs_genomic.posedit.edit.ref - alt = reverse_normalized_hgvs_genomic.posedit.edit.alt - pos = str(reverse_normalized_hgvs_genomic.posedit.pos) - - # Deletions - elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit)): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 2 - start = start - 1 - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) - # Assemble - pos = str(start) - ref = pre_base + hgvs_del_seq - alt = pre_base - - - # inv - elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 1 - start = start - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) - # Assemble - pos = str(start) - ref = vcf_del_seq - alt = ins_seq - if re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): - my_seq = Seq(vcf_del_seq) - # alt = bs + str(my_seq.reverse_complement()) - alt = str(my_seq.reverse_complement()) - - - # Delins - elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) - adj_start = start - 1 - start = start - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - # Assemble - pos = str(start) - ref = vcf_del_seq - alt = vcf_del_seq[:1] + ins_seq - - # Duplications - elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 2 # - start = start - 1 # - # Recover sequences - dup_seq = reverse_normalized_hgvs_genomic.posedit.edit.ref - vcf_ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - # Assemble - pos = str(start) - ref = vcf_ref_seq - alt = vcf_ref_seq + dup_seq - else: - chr = '' - ref = '' - alt = '' - pos = '' - - # ensure as 5' as possible - if chr != '' and pos != '' and ref != '' and alt != '': - if len(ref) > 1: - rsb = list(str(ref)) - if reverse_normalized_hgvs_genomic.posedit.edit.type == 'identity': - pos = int(pos) - 1 - prev = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), pos - 1, pos) - pos = str(pos) - ref = prev + ref - alt = prev + alt - - # Dictionary the VCF - vcf_dict = {'chr': chr, 'pos': pos, 'ref': ref, 'alt': alt, 'normalized_hgvs': reverse_normalized_hgvs_genomic} - return vcf_dict - - -def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): - hgvs_genomic_variant = hgvs_genomic - - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # Sort the primary assemblies - if re.match('GRC', primary_assembly): - if re.search('37', primary_assembly): - ucsc_pa = 'hg19' - grc_pa = primary_assembly - if re.search('38', primary_assembly): - ucsc_pa = 'hg38' - grc_pa = primary_assembly - else: - if re.search('19', primary_assembly): - ucsc_pa = primary_assembly - grc_pa = 'GRCh37' - if re.search('38', primary_assembly): - ucsc_pa = primary_assembly - grc_pa = 'GRCh38' - - # UCSC Chr - ucsc_chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, ucsc_pa) - if ucsc_chr is not None: - pass - else: - ucsc_chr = reverse_normalized_hgvs_genomic.ac - - # GRC Chr - grc_chr = supportedChromosomeBuilds.to_chr_num_refseq(reverse_normalized_hgvs_genomic.ac, grc_pa) - if grc_chr is not None: - pass - else: - grc_chr = reverse_normalized_hgvs_genomic.ac - - if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): - pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) - ref = reverse_normalized_hgvs_genomic.posedit.edit.ref - alt = reverse_normalized_hgvs_genomic.posedit.edit.ref - - # Insertions - elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( - reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - alt_start = start - 1 # - # Recover sequences - ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), alt_start, end - 1) - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - # Assemble - pos = start - ref = ref_seq - alt = ref_seq + ins_seq - - # Substitutions - elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): - ref = reverse_normalized_hgvs_genomic.posedit.edit.ref - alt = reverse_normalized_hgvs_genomic.posedit.edit.alt - pos = str(reverse_normalized_hgvs_genomic.posedit.pos) - - # Deletions - elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit)): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 2 - start = start - 1 - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) - # Assemble - pos = str(start) - ref = pre_base + hgvs_del_seq - alt = pre_base - - - # inv - elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 1 - start = start - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) - # Assemble - pos = str(start) - # pos = str(start-1) - # ref = bs + vcf_del_seq - ref = vcf_del_seq - alt = ins_seq - if re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): - my_seq = Seq(vcf_del_seq) - # alt = bs + str(my_seq.reverse_complement()) - alt = str(my_seq.reverse_complement()) - - # Delins - elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) - adj_start = start - 1 - start = start - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - # Assemble - # pos = str(start) - # ref = vcf_del_seq - # alt = vcf_del_seq[:1] + ins_seq - pos = str(start + 1) - ref = vcf_del_seq[1:] - alt = ins_seq - - # Duplications - elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 2 # - start = start - 1 # - # Recover sequences - dup_seq = reverse_normalized_hgvs_genomic.posedit.edit.ref - vcf_ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - # Assemble - pos = str(start + 1) - ref = vcf_ref_seq[1:] - alt = vcf_ref_seq[1:] + dup_seq - else: - chr = '' - ref = '' - alt = '' - pos = '' - - # Dictionary the VCF - vcf_dict = {'pos': str(pos), 'ref': ref, 'alt': alt, 'ucsc_chr': ucsc_chr, 'grc_chr': grc_chr, - 'normalized_hgvs': reverse_normalized_hgvs_genomic} - return vcf_dict - - -def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): - # Replace reference manually - if hgvs_genomic.posedit.edit.ref == '': - hgvs_genomic.posedit.edit.ref = sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, - hgvs_genomic.posedit.pos.end.base) - - reverse_normalized_hgvs_genomic = hgvs_genomic - if reverse_normalized_hgvs_genomic.posedit.edit.type == 'identity' and len( - reverse_normalized_hgvs_genomic.posedit.edit.ref) == 0: - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(reverse_normalized_hgvs_genomic) - - # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # Chr - chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) - if chr is not None: - pass - else: - chr = reverse_normalized_hgvs_genomic.ac - - if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): - pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) - ref = reverse_normalized_hgvs_genomic.posedit.edit.ref - alt = reverse_normalized_hgvs_genomic.posedit.edit.ref - - # Insertions - elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( - reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - alt_start = start - 1 # - # Recover sequences - ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), alt_start, end - 1) - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - # Assemble - pos = start - ref = ref_seq - alt = ref_seq + ins_seq - - # Substitutions - elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): - ref = reverse_normalized_hgvs_genomic.posedit.edit.ref - alt = reverse_normalized_hgvs_genomic.posedit.edit.alt - pos = str(reverse_normalized_hgvs_genomic.posedit.pos) - - # Deletions - elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit)): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 2 - start = start - 1 - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) - # Assemble - pos = str(start) - ref = pre_base + hgvs_del_seq - alt = pre_base - - - # inv - elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 1 - start = start - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) - # Assemble - pos = str(start) - # pos = str(start-1) - # ref = bs + vcf_del_seq - ref = vcf_del_seq - alt = ins_seq - if re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): - my_seq = Seq(vcf_del_seq) - # alt = bs + str(my_seq.reverse_complement()) - alt = str(my_seq.reverse_complement()) - - # Delins - elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) - adj_start = start - 1 - start = start - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - # Assemble - pos = str(start) - ref = vcf_del_seq - alt = vcf_del_seq[:1] + ins_seq - - - # Duplications - elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 2 # - start = start - 1 # - # Recover sequences - dup_seq = reverse_normalized_hgvs_genomic.posedit.edit.ref - vcf_ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - # Assemble - pos = str(start) - ref = vcf_ref_seq - alt = vcf_ref_seq + dup_seq - else: - chr = '' - ref = '' - alt = '' - pos = '' - - vcf_dict = {'chr': chr, 'pos': pos, 'ref': ref, 'alt': alt, 'normalized_hgvs': reverse_normalized_hgvs_genomic} - return vcf_dict - - -def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): - hgvs_genomic_variant = hgvs_genomic - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - normalized_hgvs_genomic = hn.normalize(hgvs_genomic_variant) - - # Chr - chr = supportedChromosomeBuilds.to_chr_num_ucsc(normalized_hgvs_genomic.ac, primary_assembly) - if chr is not None: - pass - else: - chr = normalized_hgvs_genomic.ac - - if re.search('[GATC]+\=', str(normalized_hgvs_genomic.posedit)): - pos = str(normalized_hgvs_genomic.posedit.pos.start) - ref = normalized_hgvs_genomic.posedit.edit.ref - alt = normalized_hgvs_genomic.posedit.edit.ref - - # Insertions - elif (re.search('ins', str(normalized_hgvs_genomic.posedit)) and not re.search('del', str( - normalized_hgvs_genomic.posedit))): - end = int(normalized_hgvs_genomic.posedit.pos.end.base) - start = int(normalized_hgvs_genomic.posedit.pos.start.base) - alt_start = start - 1 # - # Recover sequences - ref_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), alt_start, end - 1) - ins_seq = normalized_hgvs_genomic.posedit.edit.alt - # Assemble - pos = start - ref = ref_seq - alt = ref_seq + ins_seq - - # Substitutions - elif re.search('>', str(normalized_hgvs_genomic.posedit)): - ref = normalized_hgvs_genomic.posedit.edit.ref - alt = normalized_hgvs_genomic.posedit.edit.alt - pos = str(normalized_hgvs_genomic.posedit.pos) - - # Deletions - elif re.search('del', str(normalized_hgvs_genomic.posedit)) and not re.search('ins', - str(normalized_hgvs_genomic.posedit)): - end = int(normalized_hgvs_genomic.posedit.pos.end.base) - start = int(normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 2 - start = start - 1 - try: - ins_seq = normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), start, end) - pre_base = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start, start) - # Assemble - pos = str(start) - ref = pre_base + hgvs_del_seq - alt = pre_base - - # inv - elif re.search('inv', str(normalized_hgvs_genomic.posedit)): - end = int(normalized_hgvs_genomic.posedit.pos.end.base) - start = int(normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 1 - start = start - try: - ins_seq = normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), start, end) - vcf_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start, end) - bs = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start - 1, adj_start) - # Assemble - pos = str(start) - # pos = str(start-1) - # ref = bs + vcf_del_seq - ref = vcf_del_seq - alt = ins_seq - if re.search('inv', str(normalized_hgvs_genomic.posedit)): - my_seq = Seq(vcf_del_seq) - # alt = bs + str(my_seq.reverse_complement()) - alt = str(my_seq.reverse_complement()) - - # Delins - elif (re.search('del', str(normalized_hgvs_genomic.posedit)) and re.search('ins', - str(normalized_hgvs_genomic.posedit))): - end = int(normalized_hgvs_genomic.posedit.pos.end.base) - start = int(normalized_hgvs_genomic.posedit.pos.start.base - 1) - adj_start = start - 1 - start = start - try: - ins_seq = normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), start, end) - vcf_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start, end) - # Assemble - pos = str(start) - ref = vcf_del_seq - alt = vcf_del_seq[:1] + ins_seq - - - # Duplications - elif (re.search('dup', str(normalized_hgvs_genomic.posedit))): - end = int(normalized_hgvs_genomic.posedit.pos.end.base) # - start = int(normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 2 # - start = start - 1 # - # Recover sequences - dup_seq = normalized_hgvs_genomic.posedit.edit.ref - vcf_ref_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start, end) - # Assemble - pos = str(start) - ref = vcf_ref_seq - alt = vcf_ref_seq + dup_seq - else: - chr = '' - ref = '' - alt = '' - pos = '' - - # ADD SURROUNDING BASES - if chr != '' and pos != '' and ref != '' and alt != '': - # Add 2 post bases - pos = int(pos) - pre_end_pos = pos + len(ref) - end_pos = pre_end_pos + 1 - post = sf.fetch_seq(str(normalized_hgvs_genomic.ac), pre_end_pos - 1, end_pos) - ref = ref + post - alt = alt + post - - # Dictionary the VCF - vcf_dict = {'chr': chr, 'pos': pos, 'ref': ref, 'alt': alt, 'normalized_hgvs': normalized_hgvs_genomic} - return vcf_dict - - -def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): - hgvs_genomic_variant = hgvs_genomic - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # Chr - chr = supportedChromosomeBuilds.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) - if chr is not None: - pass - else: - chr = reverse_normalized_hgvs_genomic.ac - - if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): - pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) - ref = reverse_normalized_hgvs_genomic.posedit.edit.ref - alt = reverse_normalized_hgvs_genomic.posedit.edit.ref - - # Insertions - elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( - reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - alt_start = start - 1 # - # Recover sequences - ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), alt_start, end - 1) - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - # Assemble - pos = start - ref = ref_seq - alt = ref_seq + ins_seq - - # Substitutions - elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): - ref = reverse_normalized_hgvs_genomic.posedit.edit.ref - alt = reverse_normalized_hgvs_genomic.posedit.edit.alt - pos = str(reverse_normalized_hgvs_genomic.posedit.pos) - - # Deletions - elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit)): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 2 - start = start - 1 - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) - # Assemble - pos = str(start) - ref = pre_base + hgvs_del_seq - alt = pre_base - - - # inv - elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 1 - start = start - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) - # Assemble - pos = str(start) - # pos = str(start-1) - # ref = bs + vcf_del_seq - ref = vcf_del_seq - alt = ins_seq - if re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): - my_seq = Seq(vcf_del_seq) - # alt = bs + str(my_seq.reverse_complement()) - alt = str(my_seq.reverse_complement()) - - # Delins - elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) - adj_start = start - 1 - start = start - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) - vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - # Assemble - pos = str(start) - ref = vcf_del_seq - alt = vcf_del_seq[:1] + ins_seq - - - # Duplications - elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): - end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # - start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) - adj_start = start - 2 # - start = start - 1 # - # Recover sequences - dup_seq = reverse_normalized_hgvs_genomic.posedit.edit.ref - vcf_ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - # Assemble - pos = str(start) - ref = vcf_ref_seq - alt = vcf_ref_seq + dup_seq - else: - chr = '' - ref = '' - alt = '' - pos = '' - - # ADD SURROUNDING BASES - if chr != '' and pos != '' and ref != '' and alt != '': - pre_pos = int(pos) - 1 - pre_pos - prev = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), pre_pos - 1, pre_pos) - pos = str(pre_pos) - ref = prev + ref - alt = prev + alt - - # Dictionary the VCF - vcf_dict = {'chr': chr, 'pos': pos, 'ref': ref, 'alt': alt, 'normalized_hgvs': reverse_normalized_hgvs_genomic} - return vcf_dict - - -def hgvs_ref_alt(hgvs_variant, sf): - if re.search('[GATC]+\=', str(hgvs_variant.posedit)): - ref = hgvs_variant.posedit.edit.ref - alt = hgvs_variant.posedit.edit.ref - - # Insertions - elif (re.search('ins', str(hgvs_variant.posedit)) and not re.search('del', str(hgvs_variant.posedit))): - end = int(hgvs_variant.posedit.pos.end.base) - start = int(hgvs_variant.posedit.pos.start.base) - alt_start = start - 1 # - # Recover sequences - ref_seq = sf.fetch_seq(str(hgvs_variant.ac), alt_start, end) - ins_seq = hgvs_variant.posedit.edit.alt - # Assemble - ref = ref_seq - alt = ref_seq[:1] + ins_seq + ref_seq[-1:] - - # Substitutions - elif re.search('>', str(hgvs_variant.posedit)): - ref = hgvs_variant.posedit.edit.ref - alt = hgvs_variant.posedit.edit.alt - - # Deletions - elif re.search('del', str(hgvs_variant.posedit)) and not re.search('ins', str(hgvs_variant.posedit)): - ref = hgvs_variant.posedit.edit.ref - alt = '' - - # inv - elif re.search('inv', str(hgvs_variant.posedit)): - ref = hgvs_variant.posedit - my_seq = Seq(ref) - alt = str(my_seq.reverse_complement()) - - # Delins - elif (re.search('del', str(hgvs_variant.posedit)) and re.search('ins', str(hgvs_variant.posedit))): - ref = hgvs_variant.posedit.edit.ref - alt = hgvs_variant.posedit.edit.alt - - # Duplications - elif (re.search('dup', str(hgvs_variant.posedit))): - ref = hgvs_variant.posedit.edit.ref - alt = hgvs_variant.posedit.edit.ref + hgvs_variant.posedit.edit.ref - else: - ref = '' - alt = '' - - ref_alt_dict = {'ref': ref, 'alt': alt} - return ref_alt_dict - -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/variantanalyser/liftover.py b/VariantValidator/variantanalyser/liftover.py deleted file mode 100644 index 3f17619e..00000000 --- a/VariantValidator/variantanalyser/liftover.py +++ /dev/null @@ -1,344 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Liftover between genome builds is most accurate when mapping via a RefSeq transcript. -For intergenic regions, the process is more complex. -Lift position > Check bases > Lift back and confirm the original position -""" - -# import modules -import hgvs.exceptions -import hgvs.sequencevariant -import re -import os -import supported_chromosome_builds as scb -import hgvs2vcf -from pyliftover import LiftOver -import warnings -from Bio.Seq import Seq - -# Pre compile variables -hgvs.global_config.formatting.max_ref_length = 1000000 - -# Determine whether a liftover directory has been added to the environment -PYLIFTOVER_DIR = os.environ.get('PYLIFTOVER_DIR') - -def mystr(hgvs_nucleotide): - hgvs_nucleotide_refless = hgvs_nucleotide.format({'max_ref_length': 0}) - return hgvs_nucleotide_refless - -def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_normalizer, sf, evm): - - """ - :param hgvs_genomic: hgvs_object genomic description accession NC, NT, or NW. Not NG - :param build_from: - :param build_to: - :return: lifted {} - Step 1, attempt to liftover using a common RefSeq transcript - """ - - try: - hgvs_genomic = hp.parse_hgvs_variant(hgvs_genomic) - except TypeError: - pass - - # Create return dictionary - lifted_response = {} - - # Check genome build type - if re.match('GRC', build_from): - from_set = 'grc_chr' - alt_from_set = 'ucsc_chr' - if re.search('37', build_from): - lo_from = 'hg19' - alt_build_from = 'hg19' - if re.search('38', build_from): - lo_from = 'hg38' - alt_build_from = 'hg38' - - else: - from_set = 'ucsc_chr' - alt_from_set = 'grc_chr' - if re.search('19', build_from): - lo_from = 'hg19' - alt_build_from = 'GRCh37' - if re.search('38', build_from): - lo_from = 'hg38' - alt_build_from = 'GRCh38' - - if re.match('GRC', build_to): - to_set = 'grc_chr' - alt_to_set = 'ucsc_chr' - if re.search('37', build_to): - lo_to = 'hg19' - alt_build_to = 'hg19' - if re.search('38', build_to): - lo_to = 'hg38' - alt_build_to = 'hg38' - else: - to_set = 'ucsc_chr' - alt_to_set = 'grc_chr' - if re.search('19', build_to): - lo_to = 'hg19' - alt_build_to = 'GRCh37' - if re.search('38', build_to): - lo_to = 'hg38' - alt_build_to = 'GRCh38' - - # populate the variant from data - vcf = hgvs2vcf.report_hgvs2vcf(hgvs_genomic, build_from, reverse_normalizer, sf) - - # Create to and from dictionaries - lifted_response[build_from.lower()] = {} - lifted_response[build_from.lower()][hgvs_genomic.ac] = {'hgvs_genomic_description': mystr(hgvs_genomic), - 'vcf': { - 'chr': vcf[from_set], - 'pos': str(vcf['pos']), - 'ref': vcf['ref'], - 'alt': vcf['alt']} - } - lifted_response[alt_build_from.lower()] = {} - lifted_response[alt_build_from.lower()][hgvs_genomic.ac] = {'hgvs_genomic_description': mystr(hgvs_genomic), - 'vcf': { - 'chr': vcf[alt_from_set], - 'pos': str(vcf['pos']), - 'ref': vcf['ref'], - 'alt': vcf['alt']} - } - # From dictionary currently blank - lifted_response[build_to.lower()] = {} - lifted_response[alt_build_to.lower()] = {} - - # Get a list of overlapping RefSeq transcripts - # Note, due to 0 base positions in UTA (I think) occasionally tx will - rts_list = hdp.get_tx_for_region(hgvs_genomic.ac, 'splign', hgvs_genomic.posedit.pos.start.base - 1, - hgvs_genomic.posedit.pos.end.base - 1) - rts_dict = {} - tx_list = False - for tx_dat in rts_list: - rts_dict[tx_dat[0]] = True - rts_list_2 = evm.relevant_transcripts(hgvs_genomic) - for tx_dat_2 in rts_list_2: - rts_dict[tx_dat_2] = True - if rts_dict != {}: - tx_list = rts_dict.keys() - - # Try to liftover - if tx_list is not False: - selected = [] - for tx in tx_list: - # identify the first transcript if any - options = hdp.get_tx_mapping_options(tx) - for op in options: - if re.match('NC_', op[1]): - if re.match('GRC', build_to): - sfm = scb.to_chr_num_refseq(op[1], build_to) - if re.match('hg', build_to): - sfm = scb.to_chr_num_ucsc(op[1], build_to) - if sfm is not None: - selected.append([op[0], op[1]]) - for op in options: - if re.match('NT_', op[1]): - if re.match('GRC', build_to): - sfm = scb.to_chr_num_refseq(op[1], build_to) - if re.match('hg', build_to): - sfm = scb.to_chr_num_ucsc(op[1], build_to) - if sfm is not None: - selected.append([op[0], op[1]]) - for op in options: - if re.match('NW_', op[1]): - if re.match('GRC', build_to): - sfm = scb.to_chr_num_refseq(op[1], build_to) - if re.match('hg', build_to): - sfm = scb.to_chr_num_ucsc(op[1], build_to) - if sfm is not None: - selected.append([op[0], op[1]]) - - # remove duplicate chroms - filtered_1 = {} - if selected: - for chroms in selected: - if chroms[1] in filtered_1.keys(): - pass - else: - filtered_1[chroms[1]] = chroms[0] - added_data = False - for key, val in filtered_1.iteritems(): - try: - # Note, due to 0 base positions in UTA (I think) occasionally tx will - # be identified that cannot be mapped to. - # In this instance, do not mark added data as True - hgvs_tx = vm.g_to_t(hgvs_genomic, val) - hgvs_alt_genomic = vm.t_to_g(hgvs_tx, key) - alt_vcf = hgvs2vcf.report_hgvs2vcf(hgvs_alt_genomic, build_to, reverse_normalizer, sf) - - # Add the to build dictionaries - lifted_response[build_to.lower()][hgvs_alt_genomic.ac] = { - 'hgvs_genomic_description': mystr(hgvs_alt_genomic), - 'vcf': { - 'chr': alt_vcf[to_set], - 'pos': str(alt_vcf['pos']), - 'ref': alt_vcf['ref'], - 'alt': alt_vcf['alt']} - } - lifted_response[alt_build_to.lower()][hgvs_alt_genomic.ac] = { - 'hgvs_genomic_description': mystr(hgvs_alt_genomic), - 'vcf': { - 'chr': alt_vcf[alt_to_set], - 'pos': str(alt_vcf['pos']), - 'ref': alt_vcf['ref'], - 'alt': alt_vcf['alt']} - } - added_data = True - except hgvs.exceptions.HGVSInvalidIntervalError as e: - continue - - if lifted_response != {} and added_data is not False: - return lifted_response - else: - pass - else: - # liftover has failed - pass - - """ - Step 2, attempt to liftover using PyLiftover. - Lift position > Check bases > Lift back and confirm the original position - """ - - # Note: pyliftover uses the UCSC liftOver tool. - # https://pypi.org/project/pyliftover/ - # Once validated, download the UCSC liftover files from http://hgdownload.cse.ucsc.edu/goldenPath/hg38/liftOver/ - - # The structure of the following code comes from VV pymod, so need to create a list - genome_builds = [build_to] - - # Create liftover vcf - from_vcf = hgvs2vcf.report_hgvs2vcf(hgvs_genomic, lo_from, reverse_normalizer, sf) - - if PYLIFTOVER_DIR is not None: - lo_filename_to = PYLIFTOVER_DIR + "%sTo%s.over.chain" % (lo_from, lo_to) - lo_filename_to = str(lo_filename_to.replace('Tohg', 'ToHg')) - - lo = LiftOver(lo_filename_to) - else: - lo = LiftOver(lo_from, lo_to) - - # Fix the GRC CHR - if re.match('chr', from_vcf[from_set]): - liftover_list = lo.convert_coordinate(from_vcf[from_set], int(from_vcf['pos'])) - else: - my_chrom = 'chr' + from_vcf[from_set] - liftover_list = lo.convert_coordinate(my_chrom, int(from_vcf['pos'])) - - - # Create dictionary - primary_genomic_dicts = {} - for lifted in liftover_list: - chr = lifted[0] - pos = lifted[1] - orientated = lifted[2] - - lifted_ref_bases = from_vcf['ref'] - lifted_alt_bases = from_vcf['alt'] - - # Inverted sequence - if orientated != '+': - my_seq = Seq(lifted_ref_bases) - lifted_ref_bases = my_seq.reverse_complement() - your_seq = Seq(lifted_alt_bases) - lifted_alt_bases = your_seq.reverse_complement() - accession = scb.to_accession(chr, lo_to) - if accession is None: - wrn = 'Unable to identify an equivalent %s chromosome ID for %s' % (str(lo_to), str(chr)) - warnings.warn(wrn) - continue - else: - not_delins = accession + ':g.' + str(pos) + '_' + str( - (pos - 1) + len(lifted_ref_bases)) + 'del' + lifted_ref_bases + 'ins' + lifted_alt_bases - hgvs_not_delins = hp.parse_hgvs_variant(not_delins) - try: - vr.validate(hgvs_not_delins) - except hgvs.exceptions.HGVSError as e: - warnings.warn(str(e)) - # Most likely incorrect bases - continue - else: - hgvs_lifted = hn.normalize(hgvs_not_delins) - # Now try map back - if PYLIFTOVER_DIR is not None: - lo_filename_from = PYLIFTOVER_DIR + "%sTo%s.over.chain" % (lo_to, lo_from) - - lo_filename_from = str(lo_filename_from.replace('Tohg', 'ToHg')) - lo = LiftOver(lo_filename_from) - else: - lo = LiftOver(lo_to, lo_from) - - # Lift back - liftback_list = lo.convert_coordinate(chr, pos) - - for lifted_back in liftback_list: - # Pull out the good guys! - # Need to add chr to the from_set - if not re.match('chr', lifted_back[0]): - my_from_chr = 'chr' + lifted_back[0] - else: - my_from_chr = lifted_back[0] - - if lifted_back[0] == from_vcf[from_set] or lifted_back[0] == my_from_chr: - if lifted_back[1] == int(from_vcf['pos']): - for build in genome_builds: - vcf_dict = hgvs2vcf.report_hgvs2vcf(hgvs_lifted, build, reverse_normalizer, sf) - if re.match('GRC', build): - lifted_response[build_to.lower()][hgvs_lifted.ac] = { - 'hgvs_genomic_description': mystr(hgvs_lifted), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': str(vcf_dict['pos']), - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - lifted_response[alt_build_to.lower()][hgvs_lifted.ac] = { - 'hgvs_genomic_description': mystr(hgvs_lifted), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': str(vcf_dict['pos']), - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - else: - lifted_response[build_to.lower()][hgvs_lifted.ac] = { - 'hgvs_genomic_description': mystr(hgvs_lifted), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': str(vcf_dict['pos']), - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - lifted_response[alt_build_to.lower()][hgvs_lifted.ac] = { - 'hgvs_genomic_description': mystr(hgvs_lifted), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': str(vcf_dict['pos']), - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - return lifted_response - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# - - diff --git a/VariantValidator/variantanalyser/links.py b/VariantValidator/variantanalyser/links.py deleted file mode 100644 index 8a225d9f..00000000 --- a/VariantValidator/variantanalyser/links.py +++ /dev/null @@ -1,1191 +0,0 @@ -# -*- coding: utf-8 -*- -""" -links.py - -is an extension module of functions.py. It was ofiginally built to provide necesary -functions for compiling reference sequence alignments - -The module contains additional VariantValidator sub-functions. The majoirty of these functions require -hgvs Python package top-level functions or sub-functions contained in uta.py and -seqfetcher.py - -""" - -# IMPORT REQUIRED PYTHON MODULES -import re - -# BioPython modules -from Bio.Seq import Seq -from Bio.Alphabet import IUPAC - -""" -Function which predicts the protein effect of c. inversions -""" - - -def pro_inv_info(prot_ref_seq, prot_var_seq): - info = { - 'variant': 'true', - 'prot_del_seq': '', - 'prot_ins_seq': '', - 'edit_start': 0, - 'edit_end': 0, - 'terminate': 'false', - 'ter_pos': 0, - 'error': 'false' - } - - # Is there actually any variation? - if prot_ref_seq == prot_var_seq: - info['variant'] = 'false' - else: - # Deal with terminations - term = re.compile("\*") - if term.search(prot_var_seq): - # Set the termination reporter to true - info['terminate'] = 'true' - # The termination position will be equal to the length of the variant sequence because it's a TERMINATOR!!! - info['ter_pos'] = len(prot_var_seq) - # cut the ref sequence to == size - prot_ref_seq = prot_ref_seq[0:info['ter_pos']] - prot_var_seq = prot_var_seq[0:info['ter_pos']] - - # Whether terminated or not, the sequences should now be the same length - # Unless the termination codon has been disrupted - if len(prot_var_seq) < len(prot_ref_seq): - info['error'] = 'true' - return info - else: - # Set the counter - aa_counter = 0 - - # Make list copies of the sequences to gather the required info - ref = list(prot_ref_seq) - var = list(prot_var_seq) - - # Loop through ref list to find the first missmatch position - for aa in ref: - if ref[aa_counter] == var[aa_counter]: - aa_counter = aa_counter + 1 - else: - break - - # Enter the start position - info['edit_start'] = aa_counter + 1 - # Remove those elements form the list - del ref[0:aa_counter] - del var[0:aa_counter] - - # the sequences should now be the same length - # Except if the termination codon was removed - if len(ref) > len(var): - info['error'] = 'true' - return info - else: - # Reset the aa_counter but to go backwards - aa_counter = 0 - # reverse the lists - ref = ref[::-1] - var = var[::-1] - # Reverse loop through ref list to find the first missmatch position - for aa in ref: - if var[aa_counter] == '\*': - break - if aa == var[aa_counter]: - aa_counter = aa_counter + 1 - else: - break - # Remove those elements form the list - del ref[0:aa_counter] - del var[0:aa_counter] - # re-reverse the lists - ref = ref[::-1] - var = var[::-1] - - # If the var is > ref, the ter has been removed, need to re-add ter to each - if len(ref) < len(var): - ref.append('*') - if prot_var_seq[-1] == '*': - var.append('*') - # the sequences should now be the same length - # Except if the ter was removed - if len(ref) > len(var): - info['error'] = 'true' - return info - else: - # Enter the sequences - info['prot_del_seq'] = ''.join(ref) - info['prot_ins_seq'] = ''.join(var) - info['edit_end'] = info['edit_start'] + len(ref) - 1 - return info - -def pro_delins_info(prot_ref_seq, prot_var_seq): - info = { - 'variant' : 'true', - 'prot_del_seq' : '', - 'prot_ins_seq' : '', - 'edit_start' : 0, - 'edit_end' : 0, - 'terminate' : 'false', - 'ter_pos' : 0, - 'error' : 'false' - } - - # Is there actually any variation? - if prot_ref_seq == prot_var_seq: - info['variant'] = 'false' - else: - # Deal with terminations - term = re.compile("\*") - if term.search(prot_var_seq): - # Set the termination reporter to true - info['terminate'] = 'true' - # The termination position will be equal to the length of the variant sequence because it's a TERMINATOR!!! - info['ter_pos'] = len(prot_var_seq) - # cut the ref sequence to == size - prot_ref_seq = prot_ref_seq[0:info['ter_pos']] - prot_var_seq = prot_var_seq[0:info['ter_pos']] - - # Whether terminated or not, the sequences should now be the same length - # Unless the termination codon has been disrupted - if len(prot_var_seq) < len(prot_ref_seq): - info['error'] = 'true' - return info - else: - # Set the counter - aa_counter = 0 - - # Make list copies of the sequences to gather the required info - ref = list(prot_ref_seq) - var = list(prot_var_seq) - - # Loop through ref list to find the first missmatch position - for aa in ref: - if ref[aa_counter] == var[aa_counter]: - aa_counter = aa_counter + 1 - else: - break - - # Enter the start position - info['edit_start'] = aa_counter + 1 - # Remove those elements form the list - del ref[0:aa_counter] - del var[0:aa_counter] - - # the sequences should now be the same length - # Except if the termination codon was removed - if len(ref) > len(var): - info['error'] = 'true' - return info - else: - # Reset the aa_counter but to go backwards - aa_counter = 0 - # reverse the lists - ref = ref[::-1] - var = var[::-1] - # Reverse loop through ref list to find the first missmatch position - for aa in ref: - if var[aa_counter] == '\*': - break - if aa == var[aa_counter]: - aa_counter = aa_counter + 1 - else: - break - # Remove those elements form the list - del ref[0:aa_counter] - del var[0:aa_counter] - # re-reverse the lists - ref = ref[::-1] - var = var[::-1] - - # If the var is > ref, the ter has been removed, need to re-add ter to each -# if len(ref) < len(var): -# ref.append('*') -# if prot_var_seq[-1] == '*': -# var.append('*') - - # the sequences should now be the same length - # Except if the ter was removed - if len(ref) > len(var): - info['error'] = 'true' - return info - else: - # Enter the sequences - info['prot_del_seq'] = ''.join(ref) - info['prot_ins_seq'] = ''.join(var) - info['edit_end'] = info['edit_start'] + len(ref) -1 - return info - - - -""" -Translate c. reference sequences, including those that have been modified -must have the CDS in the specified position -""" - - -def translate(ed_seq, cds_start): - # ed_seq = ed_seq.replace('\n', '') - ed_seq = ed_seq.strip() - # Ensure the starting codon is in the correct position - met = ed_seq[cds_start:cds_start + 3] - if (met == 'ATG') or (met == 'atg'): - # Remove the 5 prime UTR - sequence = ed_seq[cds_start:] - coding_dna = Seq(str(sequence), IUPAC.unambiguous_dna) - # Translate - trans = coding_dna.translate() - aain = list(trans) - aaout = [] - count = 0 - while aain: - if aain[count] != '*': - aaout.append(aain[count]) - count = count + 1 - else: - aaout.append(aain[count]) - break - translation = ''.join(aaout) - # Apply a width of 60 characters to the string output - # translation = textwrap.fill(translation, width=60) - return translation - else: - translation = 'error' - return translation - - -""" -Convert single letter amino acid code to 3 letter code -""" - - -def one_to_three(seq): - aacode = { - 'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu', - 'F': 'Phe', 'G': 'Gly', 'H': 'His', 'I': 'Ile', - 'K': 'Lys', 'L': 'Leu', 'M': 'Met', 'N': 'Asn', - 'P': 'Pro', 'Q': 'Gln', 'R': 'Arg', 'S': 'Ser', - 'T': 'Thr', 'V': 'Val', 'W': 'Trp', 'Y': 'Tyr', - '*': 'Ter'} - - oned = list(seq) - out = [] - for aa in oned: - get_value = aacode.get(aa) - out.append(get_value) - - threed_up = ''.join(out) - - return threed_up - - -""" -Takes a reference sequence and inverts the specified position -""" - - -# n. Inversions - This comes from VariantValidator, not validation!!!! -def n_inversion(ref_seq, del_seq, inv_seq, interval_start, interval_end): - # Open a list to store the fasta file - sequence = '' - - # Use string indexing to check whether the sequences are the same - test = ref_seq[interval_start - 1:interval_end] - - if test == del_seq: - sequence = ref_seq[0:interval_start - 1] + inv_seq + ref_seq[interval_end:] - return sequence - else: - sequence = 'error' - return sequence - - -# - -# - - -""" -The following section contains old functions which may be recycled -Do not delete -""" - -""" -mark for removal -""" -# Coding Inversions - This comes from VariantValidator, not validation!!!! -# def coding_inversion(ref_seq, del_seq, inv_seq, interval_start, interval_end): -# -# # Open a list to store the fasta file -# sequence = '' -# -# # Use string indexing to check whether the sequences are the same -# test = ref_seq[interval_start-1:interval_end] -# # return test -# if test == del_seq: -# sequence = ref_seq[0:interval_start-1] + inv_seq + ref_seq[interval_end:] -# return sequence -# else: -# sequence = 'error' -# return sequence - - -""" -Legacy function, May be recycled -""" -# SIMPLE DELETIONS -# def sim_del(tx_ac_fasta_title, ac_seq, interval_start, interval_end, variant): -# -# # Open a list to store the fasta file -# tx_ac_fasta = [] -# -# # Intronic positions need to be dealt with -# pl = re.compile('\+') -# mi = re.compile('\-') -# if (pl.search(variant)) or (mi.search(variant)): -# if interval_start == interval_end: -# # Append the title line -# tx_ac_fasta.append(tx_ac_fasta_title) -# # Append the sequence -# tx_ac_fasta.append(ac_seq) -# return tx_ac_fasta -# else: -# pass -# -# # Append the title line -# tx_ac_fasta.append(tx_ac_fasta_title) -# -# # Remove any white space from the string (tabs or spaces) -# # ac_text = textwrap.dedent(ac_seq).strip() -# ac_text = ac_seq -# -# # Assign the original sequence to a list -# ac_list = list(ac_text) -# -# # Delete the required bases (elements) -# del ac_list[interval_start - 1 : interval_end] -# -# # Join the sequence back together -# seq_text = ''.join(ac_list) -# -# # Apply a width of 60 characters to the string output -# #seq_text = textwrap.fill(seq_text, width=60) -# -# # Make edits DNA -# seq_text = seq_text.replace("U", "T") -# seq_text = seq_text.replace("u", "t") -# -# # Append the sequence -# tx_ac_fasta.append(seq_text) -# -# return tx_ac_fasta - -""" -legacy function, May be recycled -""" -# SIMPLE INSERTIONS -# def sim_ins(tx_ac_fasta_title, ac_seq, interval_start, interval_end, edit, variant): -# -# # Open a dictionary to store the fasta file -# tx_ac_fasta_dict = { -# 'tx_ac_fasta' : [], -# 'edit_len' : 0 -# } -# -# # Intronic positions need to be dealt with -# pl = re.compile('\+') -# mi = re.compile('\-') -# if (pl.search(variant)) or (mi.search(variant)): -# # Append the title line -# tx_ac_fasta_dict['tx_ac_fasta'].append(tx_ac_fasta_title) -# # Append the sequence -# tx_ac_fasta_dict['tx_ac_fasta'].append(ac_seq) -# return tx_ac_fasta_dict -# else: -# pass -# -# # Append the title line -# tx_ac_fasta_dict['tx_ac_fasta'].append(tx_ac_fasta_title) -# -# # Remove any white space from the string (tabs or spaces) -# # ac_text = textwrap.dedent(ac_seq).strip() -# ac_text = ac_seq -# -# # Extract inserted sequence and change into a list -# ins_seq = re.search(r"(([GATCUgatcu]+)$)", edit) -# ins_seq = ins_seq.group(1) -# -# # Add the edit length to the dictionary -# tx_ac_fasta_dict['edit_len'] = len(ins_seq) -# -# # Assign the original sequence to a list -# ac_list = list(ac_text) -# -# # Assign the bases up to the edit to the edit list -# edit_list = ac_list[0:interval_start] -# -# # Delete the completed bases from the sequence list -# del ac_list[0:interval_start] -# -# # Append the insertion and the remaining sequence -# for base in ins_list: -# edit_list.append(base) -# -# for base in ac_list: -# edit_list.append(base) -# -# # Join the sequence back together -# seq_text = ''.join(edit_list) -# -# # Make edits DNA -# seq_text = seq_text.replace("U", "T") -# seq_text = seq_text.replace("u", "t") -# -# # Apply a width of 60 characters to the string output -# # seq_text = textwrap.fill(seq_text, width=60) -# -# # Append the sequence -# tx_ac_fasta_dict['tx_ac_fasta'].append(seq_text) -# -# return tx_ac_fasta_dict - - -""" -legacy function, May be recycled -""" -# DELETION INSERTIONS -# def delins(tx_ac_fasta_title, ac_seq, interval_start, interval_end, edit, variant, type): -# # Open a dictionary to store the fasta file -# tx_ac_fasta_dict = { -# 'tx_ac_fasta' : [], -# 'edit_len' : 0 -# } -# -# # Intronic positions need to be dealt with -# pl = re.compile('\+') -# mi = re.compile('\-') -# if (pl.search(variant)) or (mi.search(variant)): -# if interval_start == interval_end: -# # Append the title line -# tx_ac_fasta_dict['tx_ac_fasta'].append(tx_ac_fasta_title) -# # Append the sequence -# tx_ac_fasta_dict['tx_ac_fasta'].append(ac_seq) -# # Append the insert length as zero -# tx_ac_fasta_dict['edit_len'] = 0 -# return tx_ac_fasta_dict -# else: -# pass -# -# # Append the title line -# tx_ac_fasta_dict['tx_ac_fasta'].append(tx_ac_fasta_title) -# -# # Remove any white space from the string (tabs or spaces) -# # ac_text = textwrap.dedent(ac_seq).strip() -# ac_text = ac_seq -# -# # Extract inserted sequence and change into a list -# ins_seq = re.search(r"(([GATCUgatcu]+)$)", edit) -# ins_seq = ins_seq.group(1) -# # Calculate the edit length -# edit_len = len(ins_seq) -# # Balance out the insertions at exon_intron boundaries -# if type != ':g.': -# if mi.search(variant): -# if edit_len > (interval_end - interval_start +1): -# edit_len = (interval_end - interval_start +1) -# ins_seq = ins_seq[-edit_len:] -# else: -# pass -# if pl.search(variant): -# if edit_len > (interval_end - interval_start +1): -# edit_len = (interval_end - interval_start +1) -# ins_seq = ins_seq[0:edit_len] -# else: -# pass -# else: -# pass -# -# # Add the edit length to the dictionary -# tx_ac_fasta_dict['edit_len'] = edit_len -# -# # List the insert -# ins_list = list(ins_seq) -# -# # Assign the original sequence to a list -# ac_list = list(ac_text) -# -# # Delete the required bases (elements) and add the insert list -# ac_list[interval_start - 1 : interval_end] = ins_list -# -# # Join the sequence back together -# seq_text = ''.join(ac_list) -# -# # Make edits DNA -# seq_text = seq_text.replace("U", "T") -# seq_text = seq_text.replace("u", "t") -# -# # Apply a width of 60 characters to the string output -# # seq_text = textwrap.fill(seq_text, width=60) -# -# # Append the sequence -# tx_ac_fasta_dict['tx_ac_fasta'].append(seq_text) -# -# return tx_ac_fasta_dict - -""" -legacy function, May be recycled -""" -# POINT MUTATIONS -# def point(tx_ac_fasta_title, ac_seq, interval_start, edit, variant): -# # Open a dictionary to store the fasta file -# tx_ac_fasta_dict = { -# 'tx_ac_fasta' : [], -# 'flag' : '' -# } -# -# # Intronic positions need to be dealt with -# pl = re.compile('\+') -# mi = re.compile('\-') -# if (pl.search(variant)) or (mi.search(variant)): -# # Append the title line -# tx_ac_fasta_dict['tx_ac_fasta'].append(tx_ac_fasta_title) -# # Append the sequence -# tx_ac_fasta_dict['tx_ac_fasta'].append(ac_seq) -# return tx_ac_fasta_dict -# else: -# pass -# -# # Append the title line -# tx_ac_fasta_dict['tx_ac_fasta'].append(tx_ac_fasta_title) -# -# start_base = re.search(r"(^([GATCUgatcu]+))", edit) -# start_base = start_base.group(1) -# -# end_base = re.search(r"(([GATCUgatcu]+)$)", edit) -# end_base = end_base.group(1) -# -# if end_base == "U": -# end_base = end_base.replace("U", "T") -# if end_base == "u": -# end_base = end_base.replace("u", "t") -# if start_base == "U": -# start_base = start_base.replace("U", "T") -# if start_base == "u": -# start_base = start_base.replace("u", "t") -# -# # Remove any white space from the string (tabs or spaces) -# # ac_text = textwrap.dedent(ac_seq).strip() -# ac_text = ac_seq -# -# # Assign the original sequence to a list -# ac_list = list(ac_text) -# -# # Search the list at the correct location for the edit base -# if ac_list[interval_start -1] == start_base: -# # Make the edit -# ac_list[interval_start -1] = end_base -# # Join the sequence back together -# seq_text = ''.join(ac_list) -# # Make edits DNA -# seq_text = seq_text.replace("U", "T") -# seq_text = seq_text.replace("u", "t") -# # Apply a width of 60 characters to the string output -# # seq_text = textwrap.fill(seq_text, width=60) -# # Append the sequence -# tx_ac_fasta_dict['tx_ac_fasta'].append(seq_text) -# return tx_ac_fasta_dict -# -# # Search the list at the correct location for the edit base -# if ac_list[interval_start -1] == start_base.lower(): -# # Make the edit -# ac_list[interval_start -1] = end_base.lower() -# # Join the sequence back together -# seq_text = ''.join(ac_list) -# # Make edits DNA -# seq_text = seq_text.replace("U", "T") -# seq_text = seq_text.replace("u", "t") -# # Apply a width of 60 characters to the string output -# #seq_text = textwrap.fill(seq_text, width=60) -# # Append the sequence -# tx_ac_fasta_dict['tx_ac_fasta'].append(seq_text) -# return tx_ac_fasta_dict -# -# # Search the list at the correct location for the edit base -# else: -# # Make the edit -# ac_list[interval_start -1] = end_base -# # Join the sequence back together -# seq_text = ''.join(ac_list) -# # Make edits DNA -# seq_text = seq_text.replace("U", "T") -# seq_text = seq_text.replace("u", "t") -# # Apply a width of 60 characters to the string output -# # seq_text = textwrap.fill(seq_text, width=60) -# # Append the sequence -# tx_ac_fasta_dict['tx_ac_fasta'].append(seq_text) -# if (pl.search(variant)) or (mi.search(variant)): -# tx_ac_fasta_dict['flag'] = '' -# else: -# tx_ac_fasta_dict['flag'] = 'Warning: Variant sequence does not agree with reference sequence' -# #tx_ac_fasta_dict['flag'] = ac_seq -# return tx_ac_fasta_dict - -""" -legacy function, May be recycled -""" -# DUPLICATIONS -# def dupn(tx_ac_fasta_title, ac_seq, interval_start, interval_end, edit, variant): -# # Open a dictionary to store the fasta file -# tx_ac_fasta_dict = { -# 'tx_ac_fasta' : [], -# 'edit_len' : 0 -# } -# -# # Intronic positions need to be dealt with -# pl = re.compile('\+') -# mi = re.compile('\-') -# if (pl.search(variant)) or (mi.search(variant)): -# # Append the title line -# tx_ac_fasta_dict['tx_ac_fasta'].append(tx_ac_fasta_title) -# # Append the sequence -# tx_ac_fasta_dict['tx_ac_fasta'].append(ac_seq) -# return tx_ac_fasta_dict -# else: -# pass -# -# # Append the title line -# tx_ac_fasta_dict['tx_ac_fasta'].append(tx_ac_fasta_title) -# -# # Extract duplicated bases if sequence available -# bases = re.compile('[GATCUgatcu]+$') -# if bases.search(edit): -# dup_base = re.search(r"(([GATCUgatcu]+)$)", edit) -# dup_base = dup_base.group(1) -# else: -# # The duplicated sequence needs to be extracted based on coordinates, -# dup_base = ac_seq[interval_start -1:interval_end] -# -# # Append the duplicated sequence length -# tx_ac_fasta_dict['edit_len'] = len(dup_base) -# -# # Remove any white space from the string (tabs or spaces) -# # ac_text = textwrap.dedent(ac_seq).strip() -# ac_text = ac_seq -# -# # Assign the original sequence to a list -# ac_list = list(ac_text) -# -# # Search the list at the correct location for the edit base -# if ac_list[interval_start -1] == dup_base[0]: -# #return 'Giraffes' -# # Make the edit -# edit_list = ac_list[0:interval_start -1] -# # Remove the processed bases -# del ac_list[0:interval_start -1] -# # List the insert and append the bases -# ins_list = list(dup_base) -# for base in ins_list: -# edit_list.append(base) -# # Append the remaining sequence -# for base in ac_list: -# edit_list.append(base) -# -# # Join the sequence back together -# seq_text = ''.join(edit_list) -# #return seq_text -# -# # Make edits DNA -# seq_text = seq_text.replace("U", "T") -# seq_text = seq_text.replace("u", "t") -# -# # Apply a width of 60 characters to the string output -# # seq_text = textwrap.fill(seq_text, width=60) -# -# # Append the sequence -# tx_ac_fasta_dict['tx_ac_fasta'].append(seq_text) -# -# return tx_ac_fasta_dict - - -""" -legacy function, May be recycled -""" -# CHUNKING -# Split a string into chunks of a given length and return a list -# def nsplit(s, n): -# return [s[k:k+n] for k in xrange(0, len(s), n)] - - -""" -legacy function, May be recycled -""" -# PERFORM ALIGNING OF TWO KNOWN SEQUENCES -# Takes sequence strings and creates an alignment list -# def format_alignment(align1, align2, edit_type, interval_start, interval_end, begin, end, ins_len, type, frame): -# """format_alignment(align1, align2, score, begin, end) -> string -# Format the alignment prettily into a string. -# """ -# s = [] -# l1 = [] -# l2 = [] -# l3 = [] -# -# al_l1 = list(align1) -# al_l2 = list(align2) -# element = 0 -# -# -# # Simple alignments -# ################### -# if edit_type == 'sim_align': -# del_l1 = [] -# del_l2 = [] -# # Handle the start of the sequence where the bases should align -# # Set the counter -# count = 0 -# -# for base in al_l2: -# if base != al_l1[count]: -# break -# else: -# del_l2.append(al_l2[count]) -# del_l1.append(al_l1[count]) -# count = count + 1 -# -# # ASSEMBLE THE ALIGNMENT -# # Sequences should be equal length at this stage -# if len(del_l1) == len(del_l2): -# for elements in del_l2: -# l1.append(del_l1[element]) -# l3.append(del_l2[element]) -# -# if del_l1[element] == del_l2[element]: -# l2.append("|") -# else: -# l2.append(" ") -# -# # Add 1 to the element counter -# element = element +1 -# -# -# # Select for out of frame protein variants -# ########################################## -# if edit_type == 'sim_prot': -# # Select for out of frame protein variants -# if frame == 'false': -# -# #return 'girffe' -# # Basic alignment -# del_l1 = [] -# del_l2 = [] -# -# # Handle the start of the sequence where the bases should align -# # Set the counter -# count = 0 -# while al_l2: -# if al_l2[count] != al_l1[count]: -# break -# else: -# del_l2.append(al_l2[count]) -# del_l1.append(al_l1[count]) -# count = count + 1 -# -# add1 = count -# add2 = count -# -# while add1 < len(al_l1): -# del_l1.append(al_l1[add1]) -# add1 = add1 + 1 -# -# while add2 < len(al_l2): -# del_l2.append(al_l2[add2]) -# add2 = add2 + 1 -# -# #return del_l1 -# #return del_l2 -# -# # ASSEMBLE THE ALIGNMENT -# # Sequences should be equal length at this stage -# if len(del_l1) >= len(del_l2): -# element = 0 -# for elements in del_l2: -# l1.append(del_l1[element]) -# l3.append(del_l2[element]) -# -# if del_l1[element] == del_l2[element]: -# l2.append("|") -# else: -# l2.append(" ") -# -# # Add 1 to the element counter -# element = element +1 -# else: -# element = 0 -# # return 'look, another giraffe!' -# for elements in del_l1: -# l1.append(del_l1[element]) -# l3.append(del_l2[element]) -# -# if del_l1[element] == del_l2[element]: -# l2.append("|") -# else: -# l2.append(" ") -# -# # Add 1 to the element counter -# element = element +1 -# -# # Align point mutations -# ####################### -# # Simple alignment which need no alterations accounting for protein changes -# if edit_type == 'point': -# -# # Sequences should be equal length at this stage -# if len(al_l1) == len(al_l2): -# for elements in al_l1: -# l1.append(al_l1[element]) -# l3.append(al_l2[element]) -# -# if al_l1[element] == al_l2[element]: -# l2.append("|") -# else: -# l2.append(" ") -# -# # Add 1 to the element coounter -# element = element +1 -# -# # Join together the elements into alignment strings -# s1 = ''.join(l1) -# s2 = ''.join(l2) -# s3 = ''.join(l3) -# -# -# # Align simple deletions -# ######################## -# if edit_type == 'sim_del': -# -# # Select for in frame protein or non protein variants -# if (frame == 'true') or (type == ':g.') or (type == ':c.') or (type == ':n.'): -# -# # List of the sequence with the deletion -# del_l1 = [] -# del_l2 = [] -# -# # The gap has been set to end - start then + 1 (so 23 - 22 = 1, but gap actually - 2 so plus 1) -# gaps = interval_end - interval_start + 1 -# # Set the counter -# count = 0 -# while count < interval_start - begin -1: -# if al_l2[count] != al_l1[count]: -# break -# else: -# del_l2.append(al_l2[count]) -# del_l1.append(al_l1[count]) -# count = count + 1 -# -# #return align2 -# -# # reset the counter -# count = 0 -# # Fill the gap with - and append -# while count < gaps: -# base = "-" -# del_l1.append(base) -# count = count +1 -# -# # To keep the next loop simple, we need to remove the extraneous 5 prime sequence -# # We cut off the length of the current edit list -# cut = len(del_l1) -# -# # Remove the processed section from each list -# del al_l1[0:cut - gaps] -# del_l2 = al_l2[:] -# del del_l2[0:cut] -# -# # Now repeat the loop on the truncated sequences to complete the alignments -# count = 0 -# for base in al_l1: -# if al_l1[count] != del_l2[count]: -# break -# else: -# del_l1.append(al_l1[count]) -# count = count +1 -# -# # Sequences should be equal length at this stage -# if len(del_l1) == len(al_l2): -# for elements in del_l1: -# l1.append(del_l1[element]) -# l3.append(al_l2[element]) -# -# if del_l1[element] == al_l2[element]: -# l2.append("|") -# else: -# l2.append(" ") -# -# # Add 1 to the element counter -# element = element +1 -# #else: -# #return del_l2 -# #l1.append('Giraffes') -# #l1.append(del_l1) -# -# -# # Align simple insertions and duplications -# ########################################## -# if edit_type == 'sim_ins' or edit_type == 'dupn': -# -# # Select for in frame protein or non protein variants -# if (frame == 'true') or (type == ':g.') or (type == ':c.') or (type == ':r.'): -# -# # List of the sequence with the deletion -# ins_l2 = [] -# ins_l1 = al_l1[:] -# -# # Set the counter and gap length -# count = 0 -# gaps = ins_len -# -# #return str(ins_len) -# -# if edit_type == 'dupn': -# # While the bases align, append the element to the edited list -# while count < interval_end - begin: -# if al_l1[count] != al_l2[count]: -# break -# else: -# ins_l2.append(al_l2[count]) -# count = count +1 -# else: -# # While the bases align, append the element to the edited list -# while count < interval_start - begin: -# if al_l1[count] != al_l2[count]: -# break -# else: -# ins_l2.append(al_l2[count]) -# count = count +1 -# -# # reset the counter -# count = 0 -# # Fill the gap with - and append -# while count < gaps: -# base = "-" -# ins_l2.append(base) -# count = count +1 -# -# # To keep the next loop simple, we need to remove the extraneous 5 prime sequence -# # We cut off the length of the current edit list -# cut = len(ins_l2) -# -# # Remove the processed section from each list -# del al_l2[0:cut - gaps] -# del al_l1[0:cut] -# -# # Now repeat the loop on the truncated sequences to complete the alignments -# count = 0 -# for base in al_l2: -# if al_l1[count] != al_l2[count]: -# break -# else: -# ins_l2.append(al_l2[count]) -# count = count +1 -# -# #return ins_l2 -# -# # Sequences should be equal length at this stage -# if len(ins_l1) == len(ins_l2): -# for elements in ins_l1: -# l1.append(ins_l1[element]) -# l3.append(ins_l2[element]) -# -# if ins_l1[element] == ins_l2[element]: -# l2.append("|") -# else: -# l2.append(" ") -# -# # Add 1 to the element counter -# element = element +1 -# #else: -# #return del_l2 -# #l1.append('Giraffes') -# #l1.append(del_l1) -# -# -# # Align delins mutations -# ######################## -# if edit_type == 'delins': -# # List of the sequence with the deletion -# del_l1 = [] -# del_l2 = [] -# -# # return al_l2 -# -# # Set the total gap length -# gaps = interval_end - interval_start + 1 -# insertion = ins_len -# -# # return str(ins_len) -# -# # Select for in frame protein or non protein variants -# if (frame == 'true') or (type == ':g.') or (type == ':c.') or (type == ':r.'): -# # Handle the start of the sequence where the bases should align -# # Set the counter -# count = 0 -# while count < interval_start - begin -1: -# if al_l2[count] != al_l1[count]: -# break -# else: -# del_l2.append(al_l2[count]) -# del_l1.append(al_l1[count]) -# count = count + 1 -# -# # Handle the deletion -# # append the deleted section to the reference list -# # blank out the variant base -# count = 0 -# done = len(del_l2) -# while count < gaps: -# base = '-' -# del_l1.append(base) -# del_l2.append(al_l2[done]) -# count = count + 1 -# done = done + 1 -# -# # Handle the insertion -# # reset the counter -# count = 0 -# done = len(del_l2) -# while count < ins_len: -# base = '-' -# del_l2.append(base) -# del_l1.append(al_l1[done - gaps]) -# count = count + 1 -# done = done + 1 -# -# # Remove the processed section from each list -# # Lists are of equal length -# done = len(del_l1) -# del al_l1[0:done - gaps] -# del al_l2[0:done - ins_len] -# -# # Now repeat the loop on the truncated sequences to complete the alignments -# count = 0 -# for base in al_l1: -# if del_l1[count] != del_l2[count]: -# break -# else: -# del_l1.append(al_l1[count]) -# del_l2.append(al_l2[count]) -# count = count +1 -# -# # ASSEMBLE THE ALIGNMENT -# # Sequences should be equal length at this stage -# if len(del_l2) == len(del_l1): -# for elements in del_l2: -# l1.append(del_l1[element]) -# l3.append(del_l2[element]) -# -# if del_l1[element] == del_l2[element]: -# l2.append("|") -# else: -# l2.append(" ") -# -# # Add 1 to the element counter -# element = element +1 -# -# # Select for out of frame protein variants -# if frame == 'false': -# # Basic alignment -# -# # Handle the start of the sequence where the bases should align -# # Set the counter -# count = 0 -# while al_l2: -# if al_l2[count] != al_l1[count]: -# break -# else: -# del_l2.append(al_l2[count]) -# del_l1.append(al_l1[count]) -# count = count + 1 -# -# add1 = count -# add2 = count -# -# while add1 < len(al_l1): -# del_l1.append(al_l1[add1]) -# add1 = add1 + 1 -# -# while add2 < len(al_l2): -# del_l2.append(al_l2[add2]) -# add2 = add2 + 1 -# -# # ASSEMBLE THE ALIGNMENT -# if len(del_l1) >= len(del_l2): -# for elements in del_l2: -# l1.append(del_l1[element]) -# l3.append(del_l2[element]) -# -# if del_l1[element] == del_l2[element]: -# l2.append("|") -# else: -# l2.append(" ") -# -# # Add 1 to the element counter -# element = element +1 -# else: -# for elements in del_l1: -# l1.append(del_l1[element]) -# l3.append(del_l2[element]) -# -# if del_l1[element] == del_l2[element]: -# l2.append("|") -# else: -# l2.append(" ") -# -# # Add 1 to the element counter -# element = element +1 -# -# -# # String the alignment lists -# ############################ -# -# # Join together the elements into alignment strings -# s1 = ''.join(l1) -# s2 = ''.join(l2) -# s3 = ''.join(l3) -# -# -# # Append the alignment strings into a list (s) -# s.append(s1) -# s.append(s2) -# s.append(s3) -# -# return s - - - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# - - - - - - - - - - - diff --git a/VariantValidator/variantanalyser/loggingSetup.py b/VariantValidator/variantanalyser/loggingSetup.py deleted file mode 100644 index 87ce2719..00000000 --- a/VariantValidator/variantanalyser/loggingSetup.py +++ /dev/null @@ -1,59 +0,0 @@ - -# Import python diagnostic tools -import logging -from StringIO import StringIO -import traceback - - -# Set up logging -def loggingSetup(): - print("THIS CODE IS CALLED FUCKING ONCE") - if "VV" in logging.Logger.manager.loggerDict: - return - - VALIDATOR_DEBUG = os.environ.get('VALIDATOR_DEBUG') - if VALIDATOR_DEBUG is None: - VALIDATOR_DEBUG="info console" #Set default value - logger=logging.getLogger("VV") - #Set logging urgency levels. - if "debug" in VALIDATOR_DEBUG: - logLevel =logging.DEBUG - elif "warning" in VALIDATOR_DEBUG: - logLevel =logging.WARNING - elif "info" in VALIDATOR_DEBUG: - logLevel =logging.INFO - elif "error" in VALIDATOR_DEBUG: - logLevel =logging.ERROR - elif "critical" in VALIDATOR_DEBUG: - logLevel =logging.CRITICAL - - if "file" in VALIDATOR_DEBUG: - logFileHandler=logging.FileHandler("VV-log.txt") - logFileHandler.setLevel(logLevel) - logger.addHandler(logFileHandler) - if "console" in VALIDATOR_DEBUG: - logConsoleHandler=logging.StreamHandler() - logConsoleHandler.setLevel(logLevel) - logger.addHandler(logConsoleHandler) - #Create a log string to add to validations. - logString=StringIO() - logStringHandler=logging.StreamHandler(logString) - #We want the validation metadata to not contain debug info which may change with program operation - logStringHandler.setLevel(logging.INFO) - logger.addHandler(logStringHandler) - logger.setLevel(logging.DEBUG) #The logger itself must be set with an appropriate level of urgency. - - #print(logger.handers) - logger.propagate=False - -loggingSetup() - -#Test -#logger.debug("Message D") -#logger.info("Message I") -#logger.warning("Message W") -#logger.error("Message E") -#logger.critical("Message C")# - -#print("TEST "+logString.getvalue()) - diff --git a/VariantValidator/variantanalyser/mysql_error.txt b/VariantValidator/variantanalyser/mysql_error.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/VariantValidator/variantanalyser/pseudo_vcf2hgvs.py b/VariantValidator/variantanalyser/pseudo_vcf2hgvs.py deleted file mode 100644 index 4ca12c36..00000000 --- a/VariantValidator/variantanalyser/pseudo_vcf2hgvs.py +++ /dev/null @@ -1,229 +0,0 @@ -""" -psuedo_vcf2hgvs is a stripped down version of VariantValidator's vcf2hgvs functionality -The tool is used to convert only the pseudo VCF format e.g. chr-pos-ref-alt into hgvs -python objects. The two variations on the function provide the HGVS output as either a -3 prime normalized HGVS description or a 5 prime normalized HGVS description. 5 prime -normalization is primarily used in the process of merging several VCF calls into a single -HGVS description -""" -# Import modules -import re -import copy -import hgvs.exceptions -import supported_chromosome_builds as va_scb -from dbControls import data as va_dbCrl - - - - -# Set variables -# hdp = hgvs.dataproviders.uta.connect(pooling=True) -# -# # Reverse normalizer (5 prime) -# reverse_normalize = hgvs.normalizer.Normalizer(hdp, -# cross_boundaries=False, -# shuffle_direction=5, -# alt_aln_method='splign' -# ) -# -# # normalizer (3 prime) -# normalize = hgvs.normalizer.Normalizer(hdp, -# cross_boundaries=False, -# shuffle_direction=3, -# alt_aln_method='splign' -# ) - -# parser -# hp = hgvs.parser.Parser() -# SeqFetcher -# sf = hgvs.dataproviders.seqfetcher.SeqFetcher() - - -# Error handling -class pseudoVCF2HGVSError(Exception): - pass -# pvcf is a pseudo_vcf string -# genome build is a build string e.g. GRCh37 hg19 -# normalization direction an integer, 5 or 3. -def pvcf_to_hgvs(input, selected_assembly, normalization_direction, reverse_normalizer, hn, hp): - # Set normalizer - if normalization_direction == 3: - selected_normalizer = hn - if normalization_direction == 5: - selected_normalizer = reverse_normalizer - - # Gel stye pVCF - input = input.replace(':', '-') - - # VCF type 1 - if re.search('-\d+-[GATC]+-[GATC]+', input): - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) - elif re.search('-\d+-[GATC]+-', input): - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) - else: - raise pseudoVCF2HGVSError('Unsupported format: VCF specification 4.1 or later') - - # Chr16:2099572TC>T - try: - pre_input = copy.deepcopy(input) - input_list = input.split(':') - pos_ref_alt = str(input_list[1]) - positionAndEdit = input_list[1] - if not re.match('N[CGWT]_', input) and not re.match('LRG_\d+$', input): - chr_num = str(input_list[0]) - chr_num = chr_num.upper() - chr_num = chr_num.strip() - if re.match('CHR', chr_num): - chr_num = chr_num.replace('CHR', '') - # Use selected assembly - accession = va_scb.to_accession(chr_num, selected_assembly) - if accession is None: - error = chr_num + ' is not part of genome build ' + selected_assembly + ' or is not supported' - raise pseudoVCF2HGVSError(error) - else: - accession = input_list[0] - - # Assign reference sequence type - ref_type = ':g.' - if re.match('LRG_', accession): - accession = va_dbCrl.get_RefSeqGeneID_from_lrgID(accession) - - # Reformat the variant - input = str(accession) + ref_type + str(positionAndEdit) - except Exception as e: - error = str(e) - raise pseudoVCF2HGVSError(error) - - # Find not_sub type in input e.g. GGGG>G - not_sub = copy.deepcopy(input) - not_sub_find = re.compile("([GATCgatc]+)>([GATCgatc]+)") - if not_sub_find.search(not_sub): - try: - # If the length of either side of the substitution delimer (>) is >1 - matches = not_sub_find.search(not_sub) - if len(matches.group(1)) > 1 or len(matches.group(2)) > 1 or re.search( - "([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", input): - # Search for and remove range - range = re.compile("([0-9]+)_([0-9]+)") - if range.search(not_sub): - m = not_sub_find.search(not_sub) - start = m.group(1) - delete = m.group(2) - beginning_string, middle_string = not_sub.split(':') - middle_string = middle_string.split('_')[0] - end_string = start + '>' + delete - not_sub = beginning_string + ':' + middle_string + end_string - # Split description - split_colon = not_sub.split(':') - ref_ac = split_colon[0] - remainder = split_colon[1] - split_dot = remainder.split('.') - ref_type = split_dot[0] - remainder = split_dot[1] - posedit = remainder - split_greater = remainder.split('>') - insert = split_greater[1] - remainder = split_greater[0] - # Split remainder using matches - r = re.compile("([0-9]+)([GATCgatc]+)") - try: - m = r.search(remainder) - start = m.group(1) - delete = m.group(2) - starts = posedit.split(delete)[0] - re_try = ref_ac + ':' + ref_type + '.' + starts + 'del' + delete[0] + 'ins' + insert - hgvs_re_try = hp.parse_hgvs_variant(re_try) - hgvs_re_try.posedit.edit.ref = delete - start_pos = str(hgvs_re_try.posedit.pos.start) - if re.search('\-', start_pos): - base, offset = start_pos.split('-') - new_offset = 0 - int(offset) + (len(delete)) - end_pos = int(base) - hgvs_re_try.posedit.pos.end.base = int(end_pos) - hgvs_re_try.posedit.pos.end.offset = int(new_offset) - 1 - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert - elif re.search('\+', start_pos): - base, offset = start_pos.split('+') - end_pos = int(base) + (len(delete) - int(offset) - 1) - new_offset = 0 + int(offset) + (len(delete) - 1) - hgvs_re_try.posedit.pos.end.base = int(end_pos) - hgvs_re_try.posedit.pos.end.offset = int(new_offset) - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert - else: - end_pos = int(start_pos) + (len(delete) - 1) - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - end_pos) + 'del' + delete + 'ins' + insert - except: - not_delins = not_sub - # Parse into hgvs object - try: - hgvs_not_delins = hp.parse_hgvs_variant(not_delins) - except hgvs.exceptions.HGVSError as e: - # Sort out multiple ALTS from VCF inputs - if re.search("([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): - # header,alts = not_delins.split('>') - # # Split up the alts into a list - # alt_list = alts.split(',') - # # Assemble and re-submit - # for alt in alt_list: - # validation['warnings'] = 'Multiple ALT sequences detected: auto-submitting all possible combinations' - # validation['write'] = 'false' - # refreshed_description = header + '>' + alt - # query = {'quibble' : refreshed_description, 'id' : validation['id'], 'warnings' : validation['warnings'], 'description' : '', 'coding' : '', 'coding_g' : '', 'genomic_r' : '', 'genomic_g' : '', 'protein' : '', 'write' : 'true', 'primary_assembly' : primary_assembly, 'order' : ordering} - # batch_list.append(query) - error = 'Multiple ALTs not supported by this function' - raise pseudoVCF2HGVSError(error) - else: - error = str(e) - raise pseudoVCF2HGVSError(error) - - # HGVS will deal with the errors - hgvs_object = hgvs_not_delins - else: - hgvs_object = hp.parse_hgvs_variant(input) - - except Exception as e: - error = str(e) - raise pseudoVCF2HGVSError(error) - else: - hgvs_object = hp.parse_hgvs_variant(input) - - # Normalize - hgvs_object = selected_normalizer.normalize(hgvs_object) - # return - return hgvs_object - - - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# - - - - - - - - - - diff --git a/VariantValidator/variantanalyser/supported_chromosome_builds.py b/VariantValidator/variantanalyser/supported_chromosome_builds.py deleted file mode 100644 index e433ecb8..00000000 --- a/VariantValidator/variantanalyser/supported_chromosome_builds.py +++ /dev/null @@ -1,2866 +0,0 @@ -# -*- coding: utf-8 -*- -""" -supported chromosome builds.py - -Contains the necessary functions for matching RefSeq accessions with chromosome numbers -or alt loci names based on genome builds -""" - -# Python modules -import re - - -def supported_for_mapping(ac, primary_assembly): - sfm = 'false' - test_grc = to_chr_num_refseq(ac, primary_assembly) - if test_grc is not None: - sfm = 'true' - test_ucsc = to_chr_num_ucsc(ac, primary_assembly) - if test_ucsc is not None: - sfm = 'true' - return sfm - - -def to_accession(chr_num, primary_assembly): - # Available genome builds - GRCh37 = { - "1": "NC_000001.10", - "2": "NC_000002.11", - "3": "NC_000003.11", - "4": "NC_000004.11", - "5": "NC_000005.9", - "6": "NC_000006.11", - "7": "NC_000007.13", - "8": "NC_000008.10", - "9": "NC_000009.11", - "10": "NC_000010.10", - "11": "NC_000011.9", - "12": "NC_000012.11", - "13": "NC_000013.10", - "14": "NC_000014.8", - "15": "NC_000015.9", - "16": "NC_000016.9", - "17": "NC_000017.10", - "18": "NC_000018.9", - "19": "NC_000019.9", - "20": "NC_000020.10", - "21": "NC_000021.8", - "22": "NC_000022.10", - "23": "NC_000023.10", - "24": "NC_000024.9", - "x": "NC_000023.10", - "y": "NC_000024.9", - "X": "NC_000023.10", - "Y": "NC_000024.9", - "M": "NC_012920.1", - "m": "NC_012920.1", - - # UCSC alts - "11_GL000202_RANDOM": "NT_113921.2", - "17_CTG5_HAP1": "NT_167251.1", - "17_GL000203_RANDOM": "NT_113941.1", - "17_GL000204_RANDOM": "NT_113943.1", - "17_GL000205_RANDOM": "NT_113930.1", - "17_GL000206_RANDOM": "NT_113945.1", - "18_GL000207_RANDOM": "NT_113947.1", - "19_GL000208_RANDOM": "NT_113948.1", - "19_GL000209_RANDOM": "NT_113949.1", - "1_GL000191_RANDOM": "NT_113878.1", - "1_GL000192_RANDOM": "NT_167207.1", - "21_GL000210_RANDOM": "NT_113950.2", - "4_CTG9_HAP1": "NT_167250.1", - "4_GL000193_RANDOM": "NT_113885.1", - "4_GL000194_RANDOM": "NT_113888.1", - "6_APD_HAP1": "NT_167244.1", - "6_COX_HAP2": "NT_113891.2", - "6_DBB_HAP3": "NT_167245.1", - "6_MANN_HAP4": "NT_167246.1", - "6_MCF_HAP5": "NT_167247.1", - "6_QBL_HAP6": "NT_167248.1", - "6_SSTO_HAP7": "NT_167249.1", - "7_GL000195_RANDOM": "NT_113901.1", - "8_GL000196_RANDOM": "NT_113909.1", - "8_GL000197_RANDOM": "NT_113907.1", - "9_GL000198_RANDOM": "NT_113914.1", - "9_GL000199_RANDOM": "NT_113916.2", - "9_GL000200_RANDOM": "NT_113915.1", - "9_GL000201_RANDOM": "NT_113911.1", - "UN_GL000211": "NT_113961.1", - "UN_GL000212": "NT_113923.1", - "UN_GL000213": "NT_167208.1", - "UN_GL000214": "NT_167209.1", - "UN_GL000215": "NT_167210.1", - "UN_GL000216": "NT_167211.1", - "UN_GL000217": "NT_167212.1", - "UN_GL000218": "NT_113889.1", - "UN_GL000219": "NT_167213.1", - "UN_GL000220": "NT_167214.1", - "UN_GL000221": "NT_167215.1", - "UN_GL000222": "NT_167216.1", - "UN_GL000223": "NT_167217.1", - "UN_GL000224": "NT_167218.1", - "UN_GL000225": "NT_167219.1", - "UN_GL000226": "NT_167220.1", - "UN_GL000227": "NT_167221.1", - "UN_GL000228": "NT_167222.1", - "UN_GL000229": "NT_167223.1", - "UN_GL000230": "NT_167224.1", - "UN_GL000231": "NT_167225.1", - "UN_GL000232": "NT_167226.1", - "UN_GL000233": "NT_167227.1", - "UN_GL000234": "NT_167228.1", - "UN_GL000235": "NT_167229.1", - "UN_GL000236": "NT_167230.1", - "UN_GL000237": "NT_167231.1", - "UN_GL000238": "NT_167232.1", - "UN_GL000239": "NT_167233.1", - "UN_GL000240": "NT_167234.1", - "UN_GL000241": "NT_167235.1", - "UN_GL000242": "NT_167236.1", - "UN_GL000243": "NT_167237.1", - "UN_GL000244": "NT_167238.1", - "UN_GL000245": "NT_167239.1", - "UN_GL000246": "NT_167240.1", - "UN_GL000247": "NT_167241.1", - "UN_GL000248": "NT_167242.1", - "UN_GL000249": "NT_167243.1", - - # GRC Alts - 'HG1472_PATCH': 'NW_004070864.2', - 'HG989_PATCH': 'NW_003571030.1', - 'HG1292_PATCH': 'NW_003871056.3', - 'HG1287_PATCH': 'NW_003871055.3', - 'HSCHR1_1_CTG31': 'NW_003315905.1', - 'HSCHR1_2_CTG31': 'NW_003315906.1', - 'HSCHR1_3_CTG31': 'NW_003315907.1', - 'HG1471_PATCH': 'NW_004070863.1', - 'HG1293_PATCH': 'NW_003871057.1', - 'HG1473_PATCH': 'NW_004070865.1', - 'HG999_1_PATCH': 'NW_003315903.1', - 'HG999_2_PATCH': 'NW_003315904.1', - 'HSCHR2_1_CTG1': 'NW_003315908.1', - 'HG953_PATCH': 'NW_004504299.1', - 'HG686_PATCH': 'NW_003571032.1', - 'HSCHR2_2_CTG12': 'NW_003571033.2', - 'HSCHR2_1_CTG12': 'NW_003315909.1', - 'HG1007_PATCH': 'NW_003571031.1', - 'HSCHR3_1_CTG1': 'NW_003871060.1', - 'HG325_PATCH': 'NW_003871059.1', - 'HG186_PATCH': 'NW_003315910.1', - 'HG957_PATCH': 'NW_004775426.1', - 'HG280_PATCH': 'NW_003315911.1', - 'HG1091_PATCH': 'NW_003871058.1', - 'HG991_PATCH': 'NW_003315912.1', - 'HSCHR3_1_CTG2_1': 'NW_003315913.1', - 'HG174_HG254_PATCH': 'NW_004775427.1', - 'HSCHR4_1_CTG6': 'NW_003315915.1', - 'HSCHR4_2_CTG9': 'NW_003315916.1', - 'HG706_PATCH': 'NW_003571035.1', - 'HSCHR4_1_CTG12': 'NW_003315914.1', - 'HG1032_PATCH': 'NW_003571034.1', - 'HSCHR5_2_CTG1': 'NW_003315920.1', - 'HSCHR5_3_CTG1': 'NW_003571036.1', - 'HSCHR5_1_CTG1': 'NW_003315917.2', - 'HSCHR5_1_CTG2': 'NW_003315918.1', - 'HG1063_PATCH': 'NW_003871061.1', - 'HG1082_HG167_PATCH': 'NW_004775428.1', - 'HSCHR5_1_CTG5': 'NW_003315919.1', - 'HG27_PATCH': 'NW_004070866.1', - 'HG1322_PATCH': 'NW_003871063.1', - 'HSCHR6_1_CTG5': 'NW_003315921.1', - 'HG357_PATCH': 'NW_004504300.1', - 'HG1304_PATCH': 'NW_003871062.1', - 'HG193_PATCH': 'NW_004775429.1', - 'HSCHR6_2_CTG5': 'NW_004166862.1', - 'HG736_PATCH': 'NW_003571039.1', - 'HG14_PATCH': 'NW_003571038.1', - 'HG444_PATCH': 'NW_004775430.1', - 'HG1257_PATCH': 'NW_003871064.1', - 'HG946_PATCH': 'NW_003571041.1', - 'HG115_PATCH': 'NW_003571037.1', - 'HG1308_PATCH': 'NW_003871065.1', - 'HSCHR7_1_CTG6': 'NW_003315922.2', - 'HG7_PATCH': 'NW_003571040.1', - 'HG19_PATCH': 'NW_003571042.1', - 'HG1699_PATCH': 'NW_004775431.1', - 'HG418_PATCH': 'NW_003871066.2', - 'HG104_HG975_PATCH': 'NW_003315923.1', - 'HG243_PATCH': 'NW_003315924.1', - 'HSCHR9_1_CTG1': 'NW_003315928.1', - 'HG962_PATCH': 'NW_003871067.1', - 'HSCHR9_1_CTG35': 'NW_003315929.1', - 'HSCHR9_2_CTG35': 'NW_003315930.1', - 'HSCHR9_3_CTG35': 'NW_003315931.1', - 'HG50_PATCH': 'NW_004504301.1', - 'HG1502_PATCH': 'NW_004070869.1', - 'HG79_PATCH': 'NW_003315925.1', - 'HG1500_PATCH': 'NW_004070867.1', - 'HG1501_PATCH': 'NW_004070868.1', - 'HG998_1_PATCH': 'NW_003315926.1', - 'HG998_2_PATCH': 'NW_003315927.1', - 'HG905_PATCH': 'NW_003571043.1', - 'HG871_PATCH': 'NW_003871071.1', - 'HG544_PATCH': 'NW_003315932.1', - 'HSCHR10_1_CTG2': 'NW_003315934.1', - 'HSCHR10_1_CTG5': 'NW_003315935.1', - 'HG1211_PATCH': 'NW_003871068.1', - 'HG1074_PATCH': 'NW_004504302.1', - 'HG339_PATCH': 'NW_003871070.1', - 'HG979_PATCH': 'NW_004775432.1', - 'HG311_PATCH': 'NW_003871069.1', - 'HG995_PATCH': 'NW_003315933.1', - 'HG1479_PATCH': 'NW_004070870.1', - 'HG256_PATCH': 'NW_003871075.1', - 'HG873_PATCH': 'NW_003871082.1', - 'HSCHR11_1_CTG1_1': 'NW_003315936.1', - 'HG281_PATCH': 'NW_003571045.1', - 'HG142_HG150_NOVEL_TEST': 'NW_003871073.1', - 'HG151_NOVEL_TEST': 'NW_003871074.1', - 'HG536_PATCH': 'NW_003571046.1', - 'HG865_PATCH': 'NW_004070871.1', - 'HG414_PATCH': 'NW_003871081.1', - 'HG348_PATCH': 'NW_003871079.1', - 'HG305_PATCH': 'NW_003871077.1', - 'HG388_HG400_PATCH': 'NW_003871080.1', - 'HG306_PATCH': 'NW_003871078.1', - 'HG122_PATCH': 'NW_003871072.2', - 'HG299_PATCH': 'NW_003871076.1', - 'HG858_PATCH': 'NW_003571048.1', - 'HSCHR12_1_CTG1': 'NW_003571049.1', - 'HG344_PATCH': 'NW_003871083.2', - 'HG1133_PATCH': 'NW_003571047.1', - 'HSCHR12_2_CTG2': 'NW_003571050.1', - 'HSCHR12_1_CTG2': 'NW_003315938.1', - 'HSCHR12_1_CTG2_1': 'NW_003315939.1', - 'HSCHR12_2_CTG2_1': 'NW_003315941.1', - 'HSCHR12_3_CTG2_1': 'NW_003315942.2', - 'HG1595_PATCH': 'NW_004504303.2', - 'HSCHR12_1_CTG5': 'NW_003315940.1', - 'HG996_PATCH': 'NW_003315937.1', - 'HG531_PATCH': 'NW_003571051.1', - 'HG1592_PATCH': 'NW_004166863.1', - 'HSCHR15_1_CTG4': 'NW_003315943.1', - 'HSCHR15_1_CTG8': 'NW_003315944.1', - 'HG971_PATCH': 'NW_003871084.1', - 'HSCHR16_1_CTG3_1': 'NW_003315945.1', - 'HG1208_PATCH': 'NW_003871085.1', - 'HSCHR16_2_CTG3_1': 'NW_003315946.1', - 'HG417_PATCH': 'NW_004070872.2', - 'HSCHR17_1_CTG1': 'NW_003315952.2', - 'HG990_PATCH': 'NW_003315951.1', - 'HG987_PATCH': 'NW_003315950.2', - 'HG1591_PATCH': 'NW_004775433.1', - 'HG883_PATCH': 'NW_003871090.1', - 'HG385_PATCH': 'NW_004166864.2', - 'HG75_PATCH': 'NW_003315949.1', - 'HG745_PATCH': 'NW_003315948.2', - 'HSCHR17_4_CTG4': 'NW_003871091.1', - 'HSCHR17_6_CTG4': 'NW_003871093.1', - 'HSCHR17_5_CTG4': 'NW_003871092.1', - 'HSCHR17_1_CTG4': 'NW_003315953.1', - 'HG185_PATCH': 'NW_003571052.1', - 'HG1146_PATCH': 'NW_003871086.1', - 'HG183_PATCH': 'NW_003315947.1', - 'HG747_PATCH': 'NW_003871088.1', - 'HSCHR17_2_CTG4': 'NW_003315954.1', - 'HSCHR17_3_CTG4': 'NW_003315955.1', - 'HG748_PATCH': 'NW_003871089.1', - 'HG271_PATCH': 'NW_003871087.1', - 'HSCHR18_1_CTG1_1': 'NW_003315956.1', - 'HSCHR18_2_CTG1_1': 'NW_003315959.1', - 'HSCHR18_2_CTG2': 'NW_003315960.1', - 'HSCHR18_1_CTG2': 'NW_003315957.1', - 'HSCHR18_1_CTG2_1': 'NW_003315958.1', - 'HSCHR18_2_CTG2_1': 'NW_003315961.1', - 'HG729_PATCH': 'NW_003871094.1', - 'HG730_PATCH': 'NW_003571053.2', - 'HSCHR19_1_CTG3': 'NW_003315962.1', - 'HSCHR19_2_CTG3': 'NW_003315964.2', - 'HSCHR19_3_CTG3': 'NW_003315965.1', - 'HSCHR19_1_CTG3_1': 'NW_003315963.1', - 'HG1350_HG959_PATCH': 'NW_004775434.1', - 'HG1079_PATCH': 'NW_004166865.1', - 'HSCHR19LRC_COX1_CTG1': 'NW_003571054.1', - 'HSCHR19LRC_COX2_CTG1': 'NW_003571055.1', - 'HSCHR19LRC_LRC_I_CTG1': 'NW_003571056.1', - 'HSCHR19LRC_LRC_J_CTG1': 'NW_003571057.1', - 'HSCHR19LRC_LRC_S_CTG1': 'NW_003571058.1', - 'HSCHR19LRC_LRC_T_CTG1': 'NW_003571059.1', - 'HSCHR19LRC_PGF1_CTG1': 'NW_003571060.1', - 'HSCHR19LRC_PGF2_CTG1': 'NW_003571061.1', - 'HSCHR20_1_CTG1': 'NW_003315966.1', - 'HG144_PATCH': 'NW_003871095.1', - 'HG944_PATCH': 'NW_004504304.1', - 'HG506_HG507_HG1000_PATCH': 'NW_003571063.2', - 'HSCHR21_1_CTG1_1': 'NW_003315967.1', - 'HSCHR21_2_CTG1_1': 'NW_003315968.1', - 'HSCHR21_3_CTG1_1': 'NW_003315969.1', - 'HSCHR21_4_CTG1_1': 'NW_003315970.1', - 'HG237_PATCH': 'NW_004775435.1', - 'HG1487_PATCH': 'NW_004070874.1', - 'HG1486_PATCH': 'NW_004070873.1', - 'HG1488_PATCH': 'NW_004070875.1', - 'HG329_PATCH': 'NW_003871096.1', - 'HSCHR22_1_CTG2': 'NW_003315972.1', - 'HSCHR22_1_CTG1': 'NW_003315971.2', - 'HSCHR22_2_CTG1': 'NW_004504305.1', - 'HG497_PATCH': 'NW_004070876.1', - 'HG480_HG481_PATCH': 'NW_003571064.2', - 'HG1423_PATCH': 'NW_003871098.1', - 'HG1424_PATCH': 'NW_003871099.1', - 'HG1435_PATCH': 'NW_004070879.1', - 'HG29_PATCH': 'NW_004166866.1', - 'HG1436_HG1432_PATCH': 'NW_004070880.2', - 'HG1433_PATCH': 'NW_004070877.1', - 'HG1437_PATCH': 'NW_004070881.1', - 'HG1438_PATCH': 'NW_004070882.1', - 'HG1425_PATCH': 'NW_003871100.1', - 'HG1426_PATCH': 'NW_003871101.3', - 'HG1439_PATCH': 'NW_004070883.1', - 'HG1440_PATCH': 'NW_004070884.1', - 'HG1441_PATCH': 'NW_004070885.1', - 'HG375_PATCH': 'NW_003871102.1', - 'HG1434_PATCH': 'NW_004070878.1', - 'HG1462_PATCH': 'NW_004070891.1', - 'HG1463_PATCH': 'NW_004070892.1', - 'HG1490_PATCH': 'NW_004070893.1', - 'HG1442_PATCH': 'NW_004070886.1', - 'HG1443_HG1444_PATCH': 'NW_004070887.1', - 'HG1453_PATCH': 'NW_004070888.1', - 'HG1458_PATCH': 'NW_004070889.1', - 'HG1459_PATCH': 'NW_004070890.2', - 'HG1497_PATCH': 'NW_003871103.3', - 'HSCHR6_MHC_APD_CTG1': 'NT_167244.1', - 'HSCHR6_MHC_COX_CTG1': 'NT_113891.2', - 'HSCHR6_MHC_DBB_CTG1': 'NT_167245.1', - 'HSCHR6_MHC_MANN_CTG1': 'NT_167246.1', - 'HSCHR6_MHC_MCF_CTG1': 'NT_167247.1', - 'HSCHR6_MHC_QBL_CTG1': 'NT_167248.1', - 'HSCHR6_MHC_SSTO_CTG1': 'NT_167249.1', - 'HSCHR4_1_CTG9': 'NT_167250.1', - 'HSCHR17_1_CTG5': 'NT_167251.1' - } - - hg19 = { - "1": "NC_000001.10", - "2": "NC_000002.11", - "3": "NC_000003.11", - "4": "NC_000004.11", - "5": "NC_000005.9", - "6": "NC_000006.11", - "7": "NC_000007.13", - "8": "NC_000008.10", - "9": "NC_000009.11", - "10": "NC_000010.10", - "11": "NC_000011.9", - "12": "NC_000012.11", - "13": "NC_000013.10", - "14": "NC_000014.8", - "15": "NC_000015.9", - "16": "NC_000016.9", - "17": "NC_000017.10", - "18": "NC_000018.9", - "19": "NC_000019.9", - "20": "NC_000020.10", - "21": "NC_000021.8", - "22": "NC_000022.10", - "23": "NC_000023.10", - "24": "NC_000024.9", - "x": "NC_000023.10", - "y": "NC_000024.9", - "X": "NC_000023.10", - "Y": "NC_000024.9", - "M": "NC_001807.4", - "m": "NC_001807.4", - - # UCSC alts - "11_GL000202_RANDOM": "NT_113921.2", - "17_CTG5_HAP1": "NT_167251.1", - "17_GL000203_RANDOM": "NT_113941.1", - "17_GL000204_RANDOM": "NT_113943.1", - "17_GL000205_RANDOM": "NT_113930.1", - "17_GL000206_RANDOM": "NT_113945.1", - "18_GL000207_RANDOM": "NT_113947.1", - "19_GL000208_RANDOM": "NT_113948.1", - "19_GL000209_RANDOM": "NT_113949.1", - "1_GL000191_RANDOM": "NT_113878.1", - "1_GL000192_RANDOM": "NT_167207.1", - "21_GL000210_RANDOM": "NT_113950.2", - "4_CTG9_HAP1": "NT_167250.1", - "4_GL000193_RANDOM": "NT_113885.1", - "4_GL000194_RANDOM": "NT_113888.1", - "6_APD_HAP1": "NT_167244.1", - "6_COX_HAP2": "NT_113891.2", - "6_DBB_HAP3": "NT_167245.1", - "6_MANN_HAP4": "NT_167246.1", - "6_MCF_HAP5": "NT_167247.1", - "6_QBL_HAP6": "NT_167248.1", - "6_SSTO_HAP7": "NT_167249.1", - "7_GL000195_RANDOM": "NT_113901.1", - "8_GL000196_RANDOM": "NT_113909.1", - "8_GL000197_RANDOM": "NT_113907.1", - "9_GL000198_RANDOM": "NT_113914.1", - "9_GL000199_RANDOM": "NT_113916.2", - "9_GL000200_RANDOM": "NT_113915.1", - "9_GL000201_RANDOM": "NT_113911.1", - "UN_GL000211": "NT_113961.1", - "UN_GL000212": "NT_113923.1", - "UN_GL000213": "NT_167208.1", - "UN_GL000214": "NT_167209.1", - "UN_GL000215": "NT_167210.1", - "UN_GL000216": "NT_167211.1", - "UN_GL000217": "NT_167212.1", - "UN_GL000218": "NT_113889.1", - "UN_GL000219": "NT_167213.1", - "UN_GL000220": "NT_167214.1", - "UN_GL000221": "NT_167215.1", - "UN_GL000222": "NT_167216.1", - "UN_GL000223": "NT_167217.1", - "UN_GL000224": "NT_167218.1", - "UN_GL000225": "NT_167219.1", - "UN_GL000226": "NT_167220.1", - "UN_GL000227": "NT_167221.1", - "UN_GL000228": "NT_167222.1", - "UN_GL000229": "NT_167223.1", - "UN_GL000230": "NT_167224.1", - "UN_GL000231": "NT_167225.1", - "UN_GL000232": "NT_167226.1", - "UN_GL000233": "NT_167227.1", - "UN_GL000234": "NT_167228.1", - "UN_GL000235": "NT_167229.1", - "UN_GL000236": "NT_167230.1", - "UN_GL000237": "NT_167231.1", - "UN_GL000238": "NT_167232.1", - "UN_GL000239": "NT_167233.1", - "UN_GL000240": "NT_167234.1", - "UN_GL000241": "NT_167235.1", - "UN_GL000242": "NT_167236.1", - "UN_GL000243": "NT_167237.1", - "UN_GL000244": "NT_167238.1", - "UN_GL000245": "NT_167239.1", - "UN_GL000246": "NT_167240.1", - "UN_GL000247": "NT_167241.1", - "UN_GL000248": "NT_167242.1", - "UN_GL000249": "NT_167243.1", - - # GRC Alts - 'HG1472_PATCH': 'NW_004070864.2', - 'HG989_PATCH': 'NW_003571030.1', - 'HG1292_PATCH': 'NW_003871056.3', - 'HG1287_PATCH': 'NW_003871055.3', - 'HSCHR1_1_CTG31': 'NW_003315905.1', - 'HSCHR1_2_CTG31': 'NW_003315906.1', - 'HSCHR1_3_CTG31': 'NW_003315907.1', - 'HG1471_PATCH': 'NW_004070863.1', - 'HG1293_PATCH': 'NW_003871057.1', - 'HG1473_PATCH': 'NW_004070865.1', - 'HG999_1_PATCH': 'NW_003315903.1', - 'HG999_2_PATCH': 'NW_003315904.1', - 'HSCHR2_1_CTG1': 'NW_003315908.1', - 'HG953_PATCH': 'NW_004504299.1', - 'HG686_PATCH': 'NW_003571032.1', - 'HSCHR2_2_CTG12': 'NW_003571033.2', - 'HSCHR2_1_CTG12': 'NW_003315909.1', - 'HG1007_PATCH': 'NW_003571031.1', - 'HSCHR3_1_CTG1': 'NW_003871060.1', - 'HG325_PATCH': 'NW_003871059.1', - 'HG186_PATCH': 'NW_003315910.1', - 'HG957_PATCH': 'NW_004775426.1', - 'HG280_PATCH': 'NW_003315911.1', - 'HG1091_PATCH': 'NW_003871058.1', - 'HG991_PATCH': 'NW_003315912.1', - 'HSCHR3_1_CTG2_1': 'NW_003315913.1', - 'HG174_HG254_PATCH': 'NW_004775427.1', - 'HSCHR4_1_CTG6': 'NW_003315915.1', - 'HSCHR4_2_CTG9': 'NW_003315916.1', - 'HG706_PATCH': 'NW_003571035.1', - 'HSCHR4_1_CTG12': 'NW_003315914.1', - 'HG1032_PATCH': 'NW_003571034.1', - 'HSCHR5_2_CTG1': 'NW_003315920.1', - 'HSCHR5_3_CTG1': 'NW_003571036.1', - 'HSCHR5_1_CTG1': 'NW_003315917.2', - 'HSCHR5_1_CTG2': 'NW_003315918.1', - 'HG1063_PATCH': 'NW_003871061.1', - 'HG1082_HG167_PATCH': 'NW_004775428.1', - 'HSCHR5_1_CTG5': 'NW_003315919.1', - 'HG27_PATCH': 'NW_004070866.1', - 'HG1322_PATCH': 'NW_003871063.1', - 'HSCHR6_1_CTG5': 'NW_003315921.1', - 'HG357_PATCH': 'NW_004504300.1', - 'HG1304_PATCH': 'NW_003871062.1', - 'HG193_PATCH': 'NW_004775429.1', - 'HSCHR6_2_CTG5': 'NW_004166862.1', - 'HG736_PATCH': 'NW_003571039.1', - 'HG14_PATCH': 'NW_003571038.1', - 'HG444_PATCH': 'NW_004775430.1', - 'HG1257_PATCH': 'NW_003871064.1', - 'HG946_PATCH': 'NW_003571041.1', - 'HG115_PATCH': 'NW_003571037.1', - 'HG1308_PATCH': 'NW_003871065.1', - 'HSCHR7_1_CTG6': 'NW_003315922.2', - 'HG7_PATCH': 'NW_003571040.1', - 'HG19_PATCH': 'NW_003571042.1', - 'HG1699_PATCH': 'NW_004775431.1', - 'HG418_PATCH': 'NW_003871066.2', - 'HG104_HG975_PATCH': 'NW_003315923.1', - 'HG243_PATCH': 'NW_003315924.1', - 'HSCHR9_1_CTG1': 'NW_003315928.1', - 'HG962_PATCH': 'NW_003871067.1', - 'HSCHR9_1_CTG35': 'NW_003315929.1', - 'HSCHR9_2_CTG35': 'NW_003315930.1', - 'HSCHR9_3_CTG35': 'NW_003315931.1', - 'HG50_PATCH': 'NW_004504301.1', - 'HG1502_PATCH': 'NW_004070869.1', - 'HG79_PATCH': 'NW_003315925.1', - 'HG1500_PATCH': 'NW_004070867.1', - 'HG1501_PATCH': 'NW_004070868.1', - 'HG998_1_PATCH': 'NW_003315926.1', - 'HG998_2_PATCH': 'NW_003315927.1', - 'HG905_PATCH': 'NW_003571043.1', - 'HG871_PATCH': 'NW_003871071.1', - 'HG544_PATCH': 'NW_003315932.1', - 'HSCHR10_1_CTG2': 'NW_003315934.1', - 'HSCHR10_1_CTG5': 'NW_003315935.1', - 'HG1211_PATCH': 'NW_003871068.1', - 'HG1074_PATCH': 'NW_004504302.1', - 'HG339_PATCH': 'NW_003871070.1', - 'HG979_PATCH': 'NW_004775432.1', - 'HG311_PATCH': 'NW_003871069.1', - 'HG995_PATCH': 'NW_003315933.1', - 'HG1479_PATCH': 'NW_004070870.1', - 'HG256_PATCH': 'NW_003871075.1', - 'HG873_PATCH': 'NW_003871082.1', - 'HSCHR11_1_CTG1_1': 'NW_003315936.1', - 'HG281_PATCH': 'NW_003571045.1', - 'HG142_HG150_NOVEL_TEST': 'NW_003871073.1', - 'HG151_NOVEL_TEST': 'NW_003871074.1', - 'HG536_PATCH': 'NW_003571046.1', - 'HG865_PATCH': 'NW_004070871.1', - 'HG414_PATCH': 'NW_003871081.1', - 'HG348_PATCH': 'NW_003871079.1', - 'HG305_PATCH': 'NW_003871077.1', - 'HG388_HG400_PATCH': 'NW_003871080.1', - 'HG306_PATCH': 'NW_003871078.1', - 'HG122_PATCH': 'NW_003871072.2', - 'HG299_PATCH': 'NW_003871076.1', - 'HG858_PATCH': 'NW_003571048.1', - 'HSCHR12_1_CTG1': 'NW_003571049.1', - 'HG344_PATCH': 'NW_003871083.2', - 'HG1133_PATCH': 'NW_003571047.1', - 'HSCHR12_2_CTG2': 'NW_003571050.1', - 'HSCHR12_1_CTG2': 'NW_003315938.1', - 'HSCHR12_1_CTG2_1': 'NW_003315939.1', - 'HSCHR12_2_CTG2_1': 'NW_003315941.1', - 'HSCHR12_3_CTG2_1': 'NW_003315942.2', - 'HG1595_PATCH': 'NW_004504303.2', - 'HSCHR12_1_CTG5': 'NW_003315940.1', - 'HG996_PATCH': 'NW_003315937.1', - 'HG531_PATCH': 'NW_003571051.1', - 'HG1592_PATCH': 'NW_004166863.1', - 'HSCHR15_1_CTG4': 'NW_003315943.1', - 'HSCHR15_1_CTG8': 'NW_003315944.1', - 'HG971_PATCH': 'NW_003871084.1', - 'HSCHR16_1_CTG3_1': 'NW_003315945.1', - 'HG1208_PATCH': 'NW_003871085.1', - 'HSCHR16_2_CTG3_1': 'NW_003315946.1', - 'HG417_PATCH': 'NW_004070872.2', - 'HSCHR17_1_CTG1': 'NW_003315952.2', - 'HG990_PATCH': 'NW_003315951.1', - 'HG987_PATCH': 'NW_003315950.2', - 'HG1591_PATCH': 'NW_004775433.1', - 'HG883_PATCH': 'NW_003871090.1', - 'HG385_PATCH': 'NW_004166864.2', - 'HG75_PATCH': 'NW_003315949.1', - 'HG745_PATCH': 'NW_003315948.2', - 'HSCHR17_4_CTG4': 'NW_003871091.1', - 'HSCHR17_6_CTG4': 'NW_003871093.1', - 'HSCHR17_5_CTG4': 'NW_003871092.1', - 'HSCHR17_1_CTG4': 'NW_003315953.1', - 'HG185_PATCH': 'NW_003571052.1', - 'HG1146_PATCH': 'NW_003871086.1', - 'HG183_PATCH': 'NW_003315947.1', - 'HG747_PATCH': 'NW_003871088.1', - 'HSCHR17_2_CTG4': 'NW_003315954.1', - 'HSCHR17_3_CTG4': 'NW_003315955.1', - 'HG748_PATCH': 'NW_003871089.1', - 'HG271_PATCH': 'NW_003871087.1', - 'HSCHR18_1_CTG1_1': 'NW_003315956.1', - 'HSCHR18_2_CTG1_1': 'NW_003315959.1', - 'HSCHR18_2_CTG2': 'NW_003315960.1', - 'HSCHR18_1_CTG2': 'NW_003315957.1', - 'HSCHR18_1_CTG2_1': 'NW_003315958.1', - 'HSCHR18_2_CTG2_1': 'NW_003315961.1', - 'HG729_PATCH': 'NW_003871094.1', - 'HG730_PATCH': 'NW_003571053.2', - 'HSCHR19_1_CTG3': 'NW_003315962.1', - 'HSCHR19_2_CTG3': 'NW_003315964.2', - 'HSCHR19_3_CTG3': 'NW_003315965.1', - 'HSCHR19_1_CTG3_1': 'NW_003315963.1', - 'HG1350_HG959_PATCH': 'NW_004775434.1', - 'HG1079_PATCH': 'NW_004166865.1', - 'HSCHR19LRC_COX1_CTG1': 'NW_003571054.1', - 'HSCHR19LRC_COX2_CTG1': 'NW_003571055.1', - 'HSCHR19LRC_LRC_I_CTG1': 'NW_003571056.1', - 'HSCHR19LRC_LRC_J_CTG1': 'NW_003571057.1', - 'HSCHR19LRC_LRC_S_CTG1': 'NW_003571058.1', - 'HSCHR19LRC_LRC_T_CTG1': 'NW_003571059.1', - 'HSCHR19LRC_PGF1_CTG1': 'NW_003571060.1', - 'HSCHR19LRC_PGF2_CTG1': 'NW_003571061.1', - 'HSCHR20_1_CTG1': 'NW_003315966.1', - 'HG144_PATCH': 'NW_003871095.1', - 'HG944_PATCH': 'NW_004504304.1', - 'HG506_HG507_HG1000_PATCH': 'NW_003571063.2', - 'HSCHR21_1_CTG1_1': 'NW_003315967.1', - 'HSCHR21_2_CTG1_1': 'NW_003315968.1', - 'HSCHR21_3_CTG1_1': 'NW_003315969.1', - 'HSCHR21_4_CTG1_1': 'NW_003315970.1', - 'HG237_PATCH': 'NW_004775435.1', - 'HG1487_PATCH': 'NW_004070874.1', - 'HG1486_PATCH': 'NW_004070873.1', - 'HG1488_PATCH': 'NW_004070875.1', - 'HG329_PATCH': 'NW_003871096.1', - 'HSCHR22_1_CTG2': 'NW_003315972.1', - 'HSCHR22_1_CTG1': 'NW_003315971.2', - 'HSCHR22_2_CTG1': 'NW_004504305.1', - 'HG497_PATCH': 'NW_004070876.1', - 'HG480_HG481_PATCH': 'NW_003571064.2', - 'HG1423_PATCH': 'NW_003871098.1', - 'HG1424_PATCH': 'NW_003871099.1', - 'HG1435_PATCH': 'NW_004070879.1', - 'HG29_PATCH': 'NW_004166866.1', - 'HG1436_HG1432_PATCH': 'NW_004070880.2', - 'HG1433_PATCH': 'NW_004070877.1', - 'HG1437_PATCH': 'NW_004070881.1', - 'HG1438_PATCH': 'NW_004070882.1', - 'HG1425_PATCH': 'NW_003871100.1', - 'HG1426_PATCH': 'NW_003871101.3', - 'HG1439_PATCH': 'NW_004070883.1', - 'HG1440_PATCH': 'NW_004070884.1', - 'HG1441_PATCH': 'NW_004070885.1', - 'HG375_PATCH': 'NW_003871102.1', - 'HG1434_PATCH': 'NW_004070878.1', - 'HG1462_PATCH': 'NW_004070891.1', - 'HG1463_PATCH': 'NW_004070892.1', - 'HG1490_PATCH': 'NW_004070893.1', - 'HG1442_PATCH': 'NW_004070886.1', - 'HG1443_HG1444_PATCH': 'NW_004070887.1', - 'HG1453_PATCH': 'NW_004070888.1', - 'HG1458_PATCH': 'NW_004070889.1', - 'HG1459_PATCH': 'NW_004070890.2', - 'HG1497_PATCH': 'NW_003871103.3', - 'HSCHR6_MHC_APD_CTG1': 'NT_167244.1', - 'HSCHR6_MHC_COX_CTG1': 'NT_113891.2', - 'HSCHR6_MHC_DBB_CTG1': 'NT_167245.1', - 'HSCHR6_MHC_MANN_CTG1': 'NT_167246.1', - 'HSCHR6_MHC_MCF_CTG1': 'NT_167247.1', - 'HSCHR6_MHC_QBL_CTG1': 'NT_167248.1', - 'HSCHR6_MHC_SSTO_CTG1': 'NT_167249.1', - 'HSCHR4_1_CTG9': 'NT_167250.1', - 'HSCHR17_1_CTG5': 'NT_167251.1' - } - - GRCh38 = { - "1": "NC_000001.11", - "2": "NC_000002.12", - "3": "NC_000003.12", - "4": "NC_000004.12", - "5": "NC_000005.10", - "6": "NC_000006.12", - "7": "NC_000007.14", - "8": "NC_000008.11", - "9": "NC_000009.12", - "10": "NC_000010.11", - "11": "NC_000011.10", - "12": "NC_000012.12", - "13": "NC_000013.11", - "14": "NC_000014.9", - "15": "NC_000015.10", - "16": "NC_000016.10", - "17": "NC_000017.11", - "18": "NC_000018.10", - "19": "NC_000019.10", - "20": "NC_000020.11", - "21": "NC_000021.9", - "22": "NC_000022.11", - "23": "NC_000023.11", - "24": "NC_000024.10", - "x": "NC_000023.11", - "y": "NC_000024.10", - "X": "NC_000023.11", - "Y": "NC_000024.10", - "M": "NC_012920.1", - "m": "NC_012920.1", - - # UCSC Alts - "10_GL383545V1_ALT": "NW_003315934.1", - "10_GL383546V1_ALT": "NW_003315935.1", - "10_KI270824V1_ALT": "NT_187579.1", - "10_KI270825V1_ALT": "NT_187580.1", - "11_GL383547V1_ALT": "NW_003315936.1", - "11_JH159136V1_ALT": "NW_003871073.1", - "11_JH159137V1_ALT": "NW_003871074.1", - "11_KI270721V1_RANDOM": "NT_187376.1", - "11_KI270826V1_ALT": "NT_187581.1", - "11_KI270827V1_ALT": "NT_187582.1", - "11_KI270829V1_ALT": "NT_187583.1", - "11_KI270830V1_ALT": "NT_187584.1", - "11_KI270831V1_ALT": "NT_187585.1", - "11_KI270832V1_ALT": "NT_187586.1", - "11_KI270902V1_ALT": "NT_187656.1", - "11_KI270903V1_ALT": "NT_187657.1", - "11_KI270927V1_ALT": "NT_187681.1", - "12_GL383549V1_ALT": "NW_003315938.1", - "12_GL383550V2_ALT": "NW_003315939.2", - "12_GL383551V1_ALT": "NW_003315940.1", - "12_GL383552V1_ALT": "NW_003315941.1", - "12_GL383553V2_ALT": "NW_003315942.2", - "12_GL877875V1_ALT": "NW_003571049.1", - "12_GL877876V1_ALT": "NW_003571050.1", - "12_KI270833V1_ALT": "NT_187589.1", - "12_KI270834V1_ALT": "NT_187590.1", - "12_KI270835V1_ALT": "NT_187587.1", - "12_KI270836V1_ALT": "NT_187591.1", - "12_KI270837V1_ALT": "NT_187588.1", - "12_KI270904V1_ALT": "NT_187658.1", - "13_KI270838V1_ALT": "NT_187592.1", - "13_KI270839V1_ALT": "NT_187593.1", - "13_KI270840V1_ALT": "NT_187594.1", - "13_KI270841V1_ALT": "NT_187595.1", - "13_KI270842V1_ALT": "NT_187596.1", - "13_KI270843V1_ALT": "NT_187597.1", - "14_GL000009V2_RANDOM": "NT_113796.3", - "14_GL000194V1_RANDOM": "NT_113888.1", - "14_GL000225V1_RANDOM": "NT_167219.1", - "14_KI270722V1_RANDOM": "NT_187377.1", - "14_KI270723V1_RANDOM": "NT_187378.1", - "14_KI270724V1_RANDOM": "NT_187379.1", - "14_KI270725V1_RANDOM": "NT_187380.1", - "14_KI270726V1_RANDOM": "NT_187381.1", - "14_KI270844V1_ALT": "NT_187598.1", - "14_KI270845V1_ALT": "NT_187599.1", - "14_KI270846V1_ALT": "NT_187600.1", - "14_KI270847V1_ALT": "NT_187601.1", - "15_GL383554V1_ALT": "NW_003315943.1", - "15_GL383555V2_ALT": "NW_003315944.2", - "15_KI270727V1_RANDOM": "NT_187382.1", - "15_KI270848V1_ALT": "NT_187603.1", - "15_KI270849V1_ALT": "NT_187605.1", - "15_KI270850V1_ALT": "NT_187606.1", - "15_KI270851V1_ALT": "NT_187604.1", - "15_KI270852V1_ALT": "NT_187602.1", - "15_KI270905V1_ALT": "NT_187660.1", - "15_KI270906V1_ALT": "NT_187659.1", - "16_GL383556V1_ALT": "NW_003315945.1", - "16_GL383557V1_ALT": "NW_003315946.1", - "16_KI270728V1_RANDOM": "NT_187383.1", - "16_KI270853V1_ALT": "NT_187607.1", - "16_KI270854V1_ALT": "NT_187610.1", - "16_KI270855V1_ALT": "NT_187608.1", - "16_KI270856V1_ALT": "NT_187609.1", - "17_GL000205V2_RANDOM": "NT_113930.2", - "17_GL000258V2_ALT": "NT_167251.2", - "17_GL383563V3_ALT": "NW_003315952.3", - "17_GL383564V2_ALT": "NW_003315953.2", - "17_GL383565V1_ALT": "NW_003315954.1", - "17_GL383566V1_ALT": "NW_003315955.1", - "17_JH159146V1_ALT": "NW_003871091.1", - "17_JH159147V1_ALT": "NW_003871092.1", - "17_JH159148V1_ALT": "NW_003871093.1", - "17_KI270729V1_RANDOM": "NT_187384.1", - "17_KI270730V1_RANDOM": "NT_187385.1", - "17_KI270857V1_ALT": "NT_187614.1", - "17_KI270858V1_ALT": "NT_187615.1", - "17_KI270859V1_ALT": "NT_187616.1", - "17_KI270860V1_ALT": "NT_187612.1", - "17_KI270861V1_ALT": "NT_187611.1", - "17_KI270862V1_ALT": "NT_187613.1", - "17_KI270907V1_ALT": "NT_187662.1", - "17_KI270908V1_ALT": "NT_187663.1", - "17_KI270909V1_ALT": "NT_187661.1", - "17_KI270910V1_ALT": "NT_187664.1", - "18_GL383567V1_ALT": "NW_003315956.1", - "18_GL383568V1_ALT": "NW_003315957.1", - "18_GL383569V1_ALT": "NW_003315958.1", - "18_GL383570V1_ALT": "NW_003315959.1", - "18_GL383571V1_ALT": "NW_003315960.1", - "18_GL383572V1_ALT": "NW_003315961.1", - "18_KI270863V1_ALT": "NT_187617.1", - "18_KI270864V1_ALT": "NT_187618.1", - "18_KI270911V1_ALT": "NT_187666.1", - "18_KI270912V1_ALT": "NT_187665.1", - "19_GL000209V2_ALT": "NT_113949.2", - "19_GL383573V1_ALT": "NW_003315962.1", - "19_GL383574V1_ALT": "NW_003315963.1", - "19_GL383575V2_ALT": "NW_003315964.2", - "19_GL383576V1_ALT": "NW_003315965.1", - "19_GL949746V1_ALT": "NW_003571054.1", - "19_GL949747V2_ALT": "NW_003571055.2", - "19_GL949748V2_ALT": "NW_003571056.2", - "19_GL949749V2_ALT": "NW_003571057.2", - "19_GL949750V2_ALT": "NW_003571058.2", - "19_GL949751V2_ALT": "NW_003571059.2", - "19_GL949752V1_ALT": "NW_003571060.1", - "19_GL949753V2_ALT": "NW_003571061.2", - "19_KI270865V1_ALT": "NT_187621.1", - "19_KI270866V1_ALT": "NT_187619.1", - "19_KI270867V1_ALT": "NT_187620.1", - "19_KI270868V1_ALT": "NT_187622.1", - "19_KI270882V1_ALT": "NT_187636.1", - "19_KI270883V1_ALT": "NT_187637.1", - "19_KI270884V1_ALT": "NT_187638.1", - "19_KI270885V1_ALT": "NT_187639.1", - "19_KI270886V1_ALT": "NT_187640.1", - "19_KI270887V1_ALT": "NT_187641.1", - "19_KI270888V1_ALT": "NT_187642.1", - "19_KI270889V1_ALT": "NT_187643.1", - "19_KI270890V1_ALT": "NT_187644.1", - "19_KI270891V1_ALT": "NT_187645.1", - "19_KI270914V1_ALT": "NT_187668.1", - "19_KI270915V1_ALT": "NT_187669.1", - "19_KI270916V1_ALT": "NT_187670.1", - "19_KI270917V1_ALT": "NT_187671.1", - "19_KI270918V1_ALT": "NT_187672.1", - "19_KI270919V1_ALT": "NT_187673.1", - "19_KI270920V1_ALT": "NT_187674.1", - "19_KI270921V1_ALT": "NT_187675.1", - "19_KI270922V1_ALT": "NT_187676.1", - "19_KI270923V1_ALT": "NT_187677.1", - "19_KI270929V1_ALT": "NT_187683.1", - "19_KI270930V1_ALT": "NT_187684.1", - "19_KI270931V1_ALT": "NT_187685.1", - "19_KI270932V1_ALT": "NT_187686.1", - "19_KI270933V1_ALT": "NT_187687.1", - "19_KI270938V1_ALT": "NT_187693.1", - "1_GL383518V1_ALT": "NW_003315905.1", - "1_GL383519V1_ALT": "NW_003315906.1", - "1_GL383520V2_ALT": "NW_003315907.2", - "1_KI270706V1_RANDOM": "NT_187361.1", - "1_KI270707V1_RANDOM": "NT_187362.1", - "1_KI270708V1_RANDOM": "NT_187363.1", - "1_KI270709V1_RANDOM": "NT_187364.1", - "1_KI270710V1_RANDOM": "NT_187365.1", - "1_KI270711V1_RANDOM": "NT_187366.1", - "1_KI270712V1_RANDOM": "NT_187367.1", - "1_KI270713V1_RANDOM": "NT_187368.1", - "1_KI270714V1_RANDOM": "NT_187369.1", - "1_KI270759V1_ALT": "NT_187516.1", - "1_KI270760V1_ALT": "NT_187514.1", - "1_KI270761V1_ALT": "NT_187518.1", - "1_KI270762V1_ALT": "NT_187515.1", - "1_KI270763V1_ALT": "NT_187519.1", - "1_KI270764V1_ALT": "NT_187521.1", - "1_KI270765V1_ALT": "NT_187520.1", - "1_KI270766V1_ALT": "NT_187517.1", - "1_KI270892V1_ALT": "NT_187646.1", - "20_GL383577V2_ALT": "NW_003315966.2", - "20_KI270869V1_ALT": "NT_187623.1", - "20_KI270870V1_ALT": "NT_187624.1", - "20_KI270871V1_ALT": "NT_187625.1", - "21_GL383578V2_ALT": "NW_003315967.2", - "21_GL383579V2_ALT": "NW_003315968.2", - "21_GL383580V2_ALT": "NW_003315969.2", - "21_GL383581V2_ALT": "NW_003315970.2", - "21_KI270872V1_ALT": "NT_187626.1", - "21_KI270873V1_ALT": "NT_187627.1", - "21_KI270874V1_ALT": "NT_187628.1", - "22_GL383582V2_ALT": "NW_003315971.2", - "22_GL383583V2_ALT": "NW_003315972.2", - "22_KB663609V1_ALT": "NW_004504305.1", - "22_KI270731V1_RANDOM": "NT_187386.1", - "22_KI270732V1_RANDOM": "NT_187387.1", - "22_KI270733V1_RANDOM": "NT_187388.1", - "22_KI270734V1_RANDOM": "NT_187389.1", - "22_KI270735V1_RANDOM": "NT_187390.1", - "22_KI270736V1_RANDOM": "NT_187391.1", - "22_KI270737V1_RANDOM": "NT_187392.1", - "22_KI270738V1_RANDOM": "NT_187393.1", - "22_KI270739V1_RANDOM": "NT_187394.1", - "22_KI270875V1_ALT": "NT_187629.1", - "22_KI270876V1_ALT": "NT_187630.1", - "22_KI270877V1_ALT": "NT_187631.1", - "22_KI270878V1_ALT": "NT_187632.1", - "22_KI270879V1_ALT": "NT_187633.1", - "22_KI270928V1_ALT": "NT_187682.1", - "2_GL383521V1_ALT": "NW_003315908.1", - "2_GL383522V1_ALT": "NW_003315909.1", - "2_GL582966V2_ALT": "NW_003571033.2", - "2_KI270715V1_RANDOM": "NT_187370.1", - "2_KI270716V1_RANDOM": "NT_187371.1", - "2_KI270767V1_ALT": "NT_187523.1", - "2_KI270768V1_ALT": "NT_187528.1", - "2_KI270769V1_ALT": "NT_187522.1", - "2_KI270770V1_ALT": "NT_187525.1", - "2_KI270771V1_ALT": "NT_187530.1", - "2_KI270772V1_ALT": "NT_187524.1", - "2_KI270773V1_ALT": "NT_187526.1", - "2_KI270774V1_ALT": "NT_187529.1", - "2_KI270775V1_ALT": "NT_187531.1", - "2_KI270776V1_ALT": "NT_187527.1", - "2_KI270893V1_ALT": "NT_187647.1", - "2_KI270894V1_ALT": "NT_187648.1", - "3_GL000221V1_RANDOM": "NT_167215.1", - "3_GL383526V1_ALT": "NW_003315913.1", - "3_JH636055V2_ALT": "NW_003871060.2", - "3_KI270777V1_ALT": "NT_187533.1", - "3_KI270778V1_ALT": "NT_187536.1", - "3_KI270779V1_ALT": "NT_187532.1", - "3_KI270780V1_ALT": "NT_187537.1", - "3_KI270781V1_ALT": "NT_187538.1", - "3_KI270782V1_ALT": "NT_187534.1", - "3_KI270783V1_ALT": "NT_187535.1", - "3_KI270784V1_ALT": "NT_187539.1", - "3_KI270895V1_ALT": "NT_187649.1", - "3_KI270924V1_ALT": "NT_187678.1", - "3_KI270934V1_ALT": "NT_187688.1", - "3_KI270935V1_ALT": "NT_187689.1", - "3_KI270936V1_ALT": "NT_187690.1", - "3_KI270937V1_ALT": "NT_187691.1", - "4_GL000008V2_RANDOM": "NT_113793.3", - "4_GL000257V2_ALT": "NT_167250.2", - "4_GL383527V1_ALT": "NW_003315914.1", - "4_GL383528V1_ALT": "NW_003315915.1", - "4_KI270785V1_ALT": "NT_187542.1", - "4_KI270786V1_ALT": "NT_187543.1", - "4_KI270787V1_ALT": "NT_187541.1", - "4_KI270788V1_ALT": "NT_187544.1", - "4_KI270789V1_ALT": "NT_187545.1", - "4_KI270790V1_ALT": "NT_187540.1", - "4_KI270896V1_ALT": "NT_187650.1", - "4_KI270925V1_ALT": "NT_187679.1", - "5_GL000208V1_RANDOM": "NT_113948.1", - "5_GL339449V2_ALT": "NW_003315917.2", - "5_GL383530V1_ALT": "NW_003315918.1", - "5_GL383531V1_ALT": "NW_003315919.1", - "5_GL383532V1_ALT": "NW_003315920.1", - "5_GL949742V1_ALT": "NW_003571036.1", - "5_KI270791V1_ALT": "NT_187547.1", - "5_KI270792V1_ALT": "NT_187548.1", - "5_KI270793V1_ALT": "NT_187550.1", - "5_KI270794V1_ALT": "NT_187551.1", - "5_KI270795V1_ALT": "NT_187546.1", - "5_KI270796V1_ALT": "NT_187549.1", - "5_KI270897V1_ALT": "NT_187651.1", - "5_KI270898V1_ALT": "NT_187652.1", - "6_GL000250V2_ALT": "NT_167244.2", - "6_GL000251V2_ALT": "NT_113891.3", - "6_GL000252V2_ALT": "NT_167245.2", - "6_GL000253V2_ALT": "NT_167246.2", - "6_GL000254V2_ALT": "NT_167247.2", - "6_GL000255V2_ALT": "NT_167248.2", - "6_GL000256V2_ALT": "NT_167249.2", - "6_GL383533V1_ALT": "NW_003315921.1", - "6_KB021644V2_ALT": "NW_004166862.2", - "6_KI270758V1_ALT": "NT_187692.1", - "6_KI270797V1_ALT": "NT_187552.1", - "6_KI270798V1_ALT": "NT_187553.1", - "6_KI270799V1_ALT": "NT_187554.1", - "6_KI270800V1_ALT": "NT_187555.1", - "6_KI270801V1_ALT": "NT_187556.1", - "6_KI270802V1_ALT": "NT_187557.1", - "7_GL383534V2_ALT": "NW_003315922.2", - "7_KI270803V1_ALT": "NT_187562.1", - "7_KI270804V1_ALT": "NT_187558.1", - "7_KI270805V1_ALT": "NT_187560.1", - "7_KI270806V1_ALT": "NT_187559.1", - "7_KI270807V1_ALT": "NT_187563.1", - "7_KI270808V1_ALT": "NT_187564.1", - "7_KI270809V1_ALT": "NT_187561.1", - "7_KI270899V1_ALT": "NT_187653.1", - "8_KI270810V1_ALT": "NT_187567.1", - "8_KI270811V1_ALT": "NT_187565.1", - "8_KI270812V1_ALT": "NT_187568.1", - "8_KI270813V1_ALT": "NT_187570.1", - "8_KI270814V1_ALT": "NT_187566.1", - "8_KI270815V1_ALT": "NT_187569.1", - "8_KI270816V1_ALT": "NT_187571.1", - "8_KI270817V1_ALT": "NT_187573.1", - "8_KI270818V1_ALT": "NT_187572.1", - "8_KI270819V1_ALT": "NT_187574.1", - "8_KI270820V1_ALT": "NT_187575.1", - "8_KI270821V1_ALT": "NT_187576.1", - "8_KI270822V1_ALT": "NT_187577.1", - "8_KI270900V1_ALT": "NT_187654.1", - "8_KI270901V1_ALT": "NT_187655.1", - "8_KI270926V1_ALT": "NT_187680.1", - "9_GL383539V1_ALT": "NW_003315928.1", - "9_GL383540V1_ALT": "NW_003315929.1", - "9_GL383541V1_ALT": "NW_003315930.1", - "9_GL383542V1_ALT": "NW_003315931.1", - "9_KI270717V1_RANDOM": "NT_187372.1", - "9_KI270718V1_RANDOM": "NT_187373.1", - "9_KI270719V1_RANDOM": "NT_187374.1", - "9_KI270720V1_RANDOM": "NT_187375.1", - "9_KI270823V1_ALT": "NT_187578.1", - "UN_GL000195V1": "NT_113901.1", - "UN_GL000213V1": "NT_167208.1", - "UN_GL000214V1": "NT_167209.1", - "UN_GL000216V2": "NT_167211.2", - "UN_GL000218V1": "NT_113889.1", - "UN_GL000219V1": "NT_167213.1", - "UN_GL000220V1": "NT_167214.1", - "UN_GL000224V1": "NT_167218.1", - "UN_GL000226V1": "NT_167220.1", - "UN_KI270302V1": "NT_187396.1", - "UN_KI270303V1": "NT_187398.1", - "UN_KI270304V1": "NT_187397.1", - "UN_KI270305V1": "NT_187399.1", - "UN_KI270310V1": "NT_187402.1", - "UN_KI270311V1": "NT_187406.1", - "UN_KI270312V1": "NT_187405.1", - "UN_KI270315V1": "NT_187404.1", - "UN_KI270316V1": "NT_187403.1", - "UN_KI270317V1": "NT_187407.1", - "UN_KI270320V1": "NT_187401.1", - "UN_KI270322V1": "NT_187400.1", - "UN_KI270329V1": "NT_187459.1", - "UN_KI270330V1": "NT_187458.1", - "UN_KI270333V1": "NT_187461.1", - "UN_KI270334V1": "NT_187460.1", - "UN_KI270335V1": "NT_187462.1", - "UN_KI270336V1": "NT_187465.1", - "UN_KI270337V1": "NT_187466.1", - "UN_KI270338V1": "NT_187463.1", - "UN_KI270340V1": "NT_187464.1", - "UN_KI270362V1": "NT_187469.1", - "UN_KI270363V1": "NT_187467.1", - "UN_KI270364V1": "NT_187468.1", - "UN_KI270366V1": "NT_187470.1", - "UN_KI270371V1": "NT_187494.1", - "UN_KI270372V1": "NT_187491.1", - "UN_KI270373V1": "NT_187492.1", - "UN_KI270374V1": "NT_187490.1", - "UN_KI270375V1": "NT_187493.1", - "UN_KI270376V1": "NT_187489.1", - "UN_KI270378V1": "NT_187471.1", - "UN_KI270379V1": "NT_187472.1", - "UN_KI270381V1": "NT_187486.1", - "UN_KI270382V1": "NT_187488.1", - "UN_KI270383V1": "NT_187482.1", - "UN_KI270384V1": "NT_187484.1", - "UN_KI270385V1": "NT_187487.1", - "UN_KI270386V1": "NT_187480.1", - "UN_KI270387V1": "NT_187475.1", - "UN_KI270388V1": "NT_187478.1", - "UN_KI270389V1": "NT_187473.1", - "UN_KI270390V1": "NT_187474.1", - "UN_KI270391V1": "NT_187481.1", - "UN_KI270392V1": "NT_187485.1", - "UN_KI270393V1": "NT_187483.1", - "UN_KI270394V1": "NT_187479.1", - "UN_KI270395V1": "NT_187476.1", - "UN_KI270396V1": "NT_187477.1", - "UN_KI270411V1": "NT_187409.1", - "UN_KI270412V1": "NT_187408.1", - "UN_KI270414V1": "NT_187410.1", - "UN_KI270417V1": "NT_187415.1", - "UN_KI270418V1": "NT_187412.1", - "UN_KI270419V1": "NT_187411.1", - "UN_KI270420V1": "NT_187413.1", - "UN_KI270422V1": "NT_187416.1", - "UN_KI270423V1": "NT_187417.1", - "UN_KI270424V1": "NT_187414.1", - "UN_KI270425V1": "NT_187418.1", - "UN_KI270429V1": "NT_187419.1", - "UN_KI270435V1": "NT_187424.1", - "UN_KI270438V1": "NT_187425.1", - "UN_KI270442V1": "NT_187420.1", - "UN_KI270448V1": "NT_187495.1", - "UN_KI270465V1": "NT_187422.1", - "UN_KI270466V1": "NT_187421.1", - "UN_KI270467V1": "NT_187423.1", - "UN_KI270468V1": "NT_187426.1", - "UN_KI270507V1": "NT_187437.1", - "UN_KI270508V1": "NT_187430.1", - "UN_KI270509V1": "NT_187428.1", - "UN_KI270510V1": "NT_187427.1", - "UN_KI270511V1": "NT_187435.1", - "UN_KI270512V1": "NT_187432.1", - "UN_KI270515V1": "NT_187436.1", - "UN_KI270516V1": "NT_187431.1", - "UN_KI270517V1": "NT_187438.1", - "UN_KI270518V1": "NT_187429.1", - "UN_KI270519V1": "NT_187433.1", - "UN_KI270521V1": "NT_187496.1", - "UN_KI270522V1": "NT_187434.1", - "UN_KI270528V1": "NT_187440.1", - "UN_KI270529V1": "NT_187439.1", - "UN_KI270530V1": "NT_187441.1", - "UN_KI270538V1": "NT_187443.1", - "UN_KI270539V1": "NT_187442.1", - "UN_KI270544V1": "NT_187444.1", - "UN_KI270548V1": "NT_187445.1", - "UN_KI270579V1": "NT_187450.1", - "UN_KI270580V1": "NT_187448.1", - "UN_KI270581V1": "NT_187449.1", - "UN_KI270582V1": "NT_187454.1", - "UN_KI270583V1": "NT_187446.1", - "UN_KI270584V1": "NT_187453.1", - "UN_KI270587V1": "NT_187447.1", - "UN_KI270588V1": "NT_187455.1", - "UN_KI270589V1": "NT_187451.1", - "UN_KI270590V1": "NT_187452.1", - "UN_KI270591V1": "NT_187457.1", - "UN_KI270593V1": "NT_187456.1", - "UN_KI270741V1": "NT_187497.1", - "UN_KI270742V1": "NT_187513.1", - "UN_KI270743V1": "NT_187498.1", - "UN_KI270744V1": "NT_187499.1", - "UN_KI270745V1": "NT_187500.1", - "UN_KI270746V1": "NT_187501.1", - "UN_KI270747V1": "NT_187502.1", - "UN_KI270748V1": "NT_187503.1", - "UN_KI270749V1": "NT_187504.1", - "UN_KI270750V1": "NT_187505.1", - "UN_KI270751V1": "NT_187506.1", - "UN_KI270752V1": "NT_187507.1", - "UN_KI270753V1": "NT_187508.1", - "UN_KI270754V1": "NT_187509.1", - "UN_KI270755V1": "NT_187510.1", - "UN_KI270756V1": "NT_187511.1", - "UN_KI270757V1": "NT_187512.1", - "X_KI270880V1_ALT": "NT_187634.1", - "X_KI270881V1_ALT": "NT_187635.1", - "X_KI270913V1_ALT": "NT_187667.1", - "Y_KI270740V1_RANDOM": "NT_187395.1", - - # GRC Alts - 'HG1342_HG2282_PATCH': 'NW_012132914.1', - 'HSCHR1_5_CTG3': 'NW_015495298.1', - 'HG2095_PATCH': 'NW_011332688.1', - 'HSCHR1_4_CTG3': 'NW_014040926.1', - 'HG2058_PATCH': 'NW_009646195.1', - 'HSCHR1_8_CTG3': 'NW_018654706.1', - 'HG460_PATCH': 'NW_019805487.1', - 'HG986_PATCH': 'NW_009646194.1', - 'HSCHR1_9_CTG3': 'NW_018654707.1', - 'HSCHR1_3_CTG3': 'NW_014040925.1', - 'HSCHR1_6_CTG3': 'NW_017852928.1', - 'HG2104_PATCH': 'NW_009646196.1', - 'HG1832_PATCH': 'NW_011332687.1', - 'HG2002_PATCH': 'NW_018654708.1', - 'HSCHR1_5_CTG32_1': 'NW_014040927.1', - 'HG2290_PATCH': 'NW_012132915.1', - 'HSCHR2_7_CTG7_2': 'NW_018654709.1', - 'HSCHR2_6_CTG7_2': 'NW_015495299.1', - 'HSCHR2_8_CTG7_2': 'NW_018654710.1', - 'HG2232_PATCH': 'NW_011332690.1', - 'HG2233_PATCH': 'NW_011332689.1', - 'HG2236_PATCH': 'NW_017363813.1', - 'HG2066_PATCH': 'NW_009646197.1', - 'HG2235_PATCH': 'NW_012132916.1', - 'HG126_PATCH': 'NW_011332691.1', - 'HSCHR3_4_CTG1': 'NW_018654711.1', - 'HG2237_PATCH': 'NW_012132917.1', - 'HG2022_PATCH': 'NW_009646198.1', - 'HG2133_PATCH': 'NW_019805491.1', - 'HSCHR3_6_CTG2_1': 'NW_019805492.1', - 'HSCHR3_9_CTG2_1': 'NW_019805490.1', - 'HSCHR3_8_CTG2_1': 'NW_019805489.1', - 'HSCHR3_7_CTG2_1': 'NW_019805488.1', - 'HSCHR4_2_CTG4': 'NW_013171799.1', - 'HSCHR4_8_CTG12': 'NW_013171800.1', - 'HSCHR4_9_CTG12': 'NW_013171801.1', - 'HSCHR4_12_CTG12': 'NW_017363814.1', - 'HG2023_PATCH': 'NW_015495300.1', - 'HSCHR4_11_CTG12': 'NW_015495301.1', - 'HSCHR5_9_CTG1': 'NW_018654712.1', - 'HSCHR5_7_CTG1': 'NW_009646199.1', - 'HSCHR5_8_CTG1': 'NW_016107297.1', - 'HG30_PATCH': 'NW_016107298.1', - 'HG2057_PATCH': 'NW_018654713.1', - 'HSCHR6_1_CTG10': 'NW_013171803.1', - 'HG1651_PATCH': 'NW_012132918.1', - 'HG2128_PATCH': 'NW_009646200.1', - 'HG2072_PATCH': 'NW_013171802.1', - 'HG2121_PATCH': 'NW_017363815.1', - 'HSCHR7_3_CTG1': 'NW_019805493.1', - 'HG2088_PATCH': 'NW_017852929.1', - 'HG2266_PATCH': 'NW_017852930.1', - 'HG708_PATCH': 'NW_018654714.1', - 'HSCHR7_3_CTG4_4': 'NW_018654715.1', - 'HG2239_PATCH': 'NW_012132919.1', - 'HG76_PATCH': 'NW_018654717.1', - 'HG2068_PATCH': 'NW_017852932.1', - 'HG2067_PATCH': 'NW_017852931.1', - 'HSCHR8_7_CTG7': 'NW_019805494.1', - 'HG2419_PATCH': 'NW_018654716.1', - 'HSCHR9_1_CTG6': 'NW_013171804.1', - 'HSCHR9_1_CTG7': 'NW_013171805.1', - 'HG2030_PATCH': 'NW_009646201.1', - 'HG2244_HG2245_PATCH': 'NW_011332694.1', - 'HSCHR10_1_CTG6': 'NW_013171806.1', - 'HG2191_PATCH': 'NW_009646202.1', - 'HG2334_PATCH': 'NW_013171807.1', - 'HG2242_HG2243_PATCH': 'NW_011332693.1', - 'HG2241_PATCH': 'NW_011332692.1', - 'HG107_PATCH': 'NW_015148966.1', - 'HSCHR11_1_CTG1_2': 'NW_011332695.1', - 'HG2114_PATCH': 'NW_019805496.1', - 'HG2060_PATCH': 'NW_019805495.1', - 'HG1708_PATCH': 'NW_017363816.1', - 'HSCHR11_1_CTG3_1': 'NW_019805498.1', - 'HSCHR11_2_CTG8': 'NW_019805497.1', - 'HG2116_PATCH': 'NW_013171808.1', - 'HG2217_PATCH': 'NW_009646203.1', - 'HSCHR12_2_CTG1': 'NW_013171809.1', - 'HG1815_PATCH': 'NW_018654718.1', - 'HG1362_PATCH': 'NW_011332696.1', - 'HG23_PATCH': 'NW_009646204.1', - 'HSCHR12_8_CTG2_1': 'NW_018654720.1', - 'HG2063_PATCH': 'NW_015148967.1', - 'HG2047_PATCH': 'NW_018654719.1', - 'HG2247_PATCH': 'NW_011332697.1', - 'HSCHR12_9_CTG2_1': 'NW_019805499.1', - 'HG2291_PATCH': 'NW_011332699.1', - 'HSCHR13_1_CTG7': 'NW_013171810.1', - 'HG2216_PATCH': 'NW_009646205.1', - 'HG2249_PATCH': 'NW_011332700.1', - 'HSCHR13_1_CTG8': 'NW_013171811.1', - 'HG2288_HG2289_PATCH': 'NW_011332698.1', - 'HG1_PATCH': 'NW_018654722.1', - 'HSCHR14_8_CTG1': 'NW_018654721.1', - 'HG2139_PATCH': 'NW_011332701.1', - 'HSCHR15_6_CTG8': 'NW_012132920.1', - 'HSCHR16_5_CTG1': 'NW_013171812.1', - 'HG2263_PATCH': 'NW_019805500.1', - 'HG926_PATCH': 'NW_017852933.1', - 'HSCHR16_4_CTG3_1': 'NW_013171813.1', - 'HSCHR16_5_CTG3_1': 'NW_018654723.1', - 'HSCHR16_3_CTG3_1': 'NW_012132921.1', - 'HG2285_HG106_HG2252_PATCH': 'NW_017363817.1', - 'HG2046_PATCH': 'NW_016107299.1', - 'HSCHR17_3_CTG1': 'NW_017363819.1', - 'HSCHR17_11_CTG4': 'NW_017363818.1', - 'HSCHR17_12_CTG4': 'NW_019805501.1', - 'HSCHR18_1_CTG1': 'NW_019805503.1', - 'HSCHR18_5_CTG1_1': 'NW_014040928.1', - 'HG2412_PATCH': 'NW_019805502.1', - 'HG2213_PATCH': 'NW_013171814.1', - 'HG2442_PATCH': 'NW_018654724.1', - 'HG26_PATCH': 'NW_014040929.1', - 'HG2021_PATCH': 'NW_009646206.1', - 'HSCHR19KIR_0019-4656-A_CTG3_1': 'NW_016107300.1', - 'HSCHR19KIR_CA01-TA01_1_CTG3_1': 'NW_016107301.1', - 'HSCHR19KIR_CA01-TA01_2_CTG3_1': 'NW_016107302.1', - 'HSCHR19KIR_CA01-TB04_CTG3_1': 'NW_016107303.1', - 'HSCHR19KIR_CA01-TB01_CTG3_1': 'NW_016107304.1', - 'HSCHR19KIR_HG2394_CTG3_1': 'NW_016107305.1', - 'HSCHR19KIR_502960008-2_CTG3_1': 'NW_016107306.1', - 'HSCHR19KIR_502960008-1_CTG3_1': 'NW_016107307.1', - 'HSCHR19KIR_0010-5217-AB_CTG3_1': 'NW_016107308.1', - 'HSCHR19KIR_7191059-1_CTG3_1': 'NW_016107309.1', - 'HSCHR19KIR_0019-4656-B_CTG3_1': 'NW_016107310.1', - 'HSCHR19KIR_CA04_CTG3_1': 'NW_016107311.1', - 'HSCHR19KIR_7191059-2_CTG3_1': 'NW_016107313.1', - 'HSCHR19KIR_HG2396_CTG3_1': 'NW_016107314.1', - 'HSCHR19KIR_HG2393_CTG3_1': 'NW_016107312.1', - 'HSCHR22_4_CTG1': 'NW_009646207.1', - 'HSCHR22_6_CTG1': 'NW_014040930.1', - 'HSCHR22_7_CTG1': 'NW_014040931.1', - 'HSCHR22_5_CTG1': 'NW_009646208.1', - 'HSCHR22_8_CTG1': 'NW_015148968.1', - 'HG1311_PATCH': 'NW_015148969.1', - 'HSCHRX_3_CTG7': 'NW_017363820.1', - 'HG1531_PATCH': 'NW_018654725.1', - 'HG1535_PATCH': 'NW_018654726.1', - 'HG2062_PATCH': 'NW_009646209.1', - 'HSCHR1_1_CTG3': 'NT_187515.1', - 'HSCHR1_2_CTG3': 'NT_187517.1', - 'HSCHR1_1_CTG11': 'NT_187514.1', - 'HSCHR1_4_CTG31': 'NT_187520.1', - 'HSCHR1_1_CTG31': 'NW_003315905.1', - 'HSCHR1_2_CTG31': 'NW_003315906.1', - 'HSCHR1_3_CTG31': 'NW_003315907.2', - 'HSCHR1_4_CTG32_1': 'NT_187521.1', - 'HSCHR1_3_CTG32_1': 'NT_187519.1', - 'HSCHR1_1_CTG32_1': 'NT_187516.1', - 'HSCHR1_2_CTG32_1': 'NT_187518.1', - 'HSCHR2_2_CTG1': 'NT_187525.1', - 'HSCHR2_3_CTG1': 'NT_187526.1', - 'HSCHR2_4_CTG1': 'NT_187529.1', - 'HSCHR2_1_CTG1': 'NT_187522.1', - 'HSCHR2_1_CTG5': 'NW_003315908.1', - 'HSCHR2_1_CTG7': 'NT_187524.1', - 'HSCHR2_5_CTG7_2': 'NT_187531.1', - 'HSCHR2_4_CTG7_2': 'NT_187530.1', - 'HSCHR2_3_CTG7_2': 'NT_187528.1', - 'HSCHR2_2_CTG7_2': 'NW_003571033.2', - 'HSCHR2_1_CTG7_2': 'NW_003315909.1', - 'HSCHR2_3_CTG15': 'NT_187527.1', - 'HSCHR2_1_CTG15': 'NT_187523.1', - 'HSCHR3_1_CTG1': 'NW_003871060.2', - 'HSCHR3_3_CTG1': 'NT_187535.1', - 'HSCHR3_4_CTG2_1': 'NT_187537.1', - 'HSCHR3_1_CTG2_1': 'NW_003315913.1', - 'HSCHR3_2_CTG2_1': 'NT_187533.1', - 'HSCHR3_3_CTG2_1': 'NT_187536.1', - 'HSCHR3_5_CTG2_1': 'NT_187538.1', - 'HSCHR3_1_CTG3': 'NT_187532.1', - 'HSCHR3_2_CTG3': 'NT_187534.1', - 'HSCHR3_9_CTG3': 'NT_187539.1', - 'HSCHR4_1_CTG4': 'NT_187540.1', - 'HSCHR4_1_CTG6': 'NW_003315915.1', - 'HSCHR4_1_CTG8_1': 'NT_187541.1', - 'HSCHR4_1_CTG9': 'NT_167250.2', - 'HSCHR4_4_CTG12': 'NT_187544.1', - 'HSCHR4_1_CTG12': 'NW_003315914.1', - 'HSCHR4_2_CTG12': 'NT_187542.1', - 'HSCHR4_5_CTG12': 'NT_187545.1', - 'HSCHR4_3_CTG12': 'NT_187543.1', - 'HSCHR5_5_CTG1': 'NT_187550.1', - 'HSCHR5_4_CTG1': 'NT_187548.1', - 'HSCHR5_3_CTG1': 'NT_187547.1', - 'HSCHR5_1_CTG1': 'NW_003315920.1', - 'HSCHR5_2_CTG1': 'NW_003571036.1', - 'HSCHR5_6_CTG1': 'NT_187551.1', - 'HSCHR5_2_CTG1_1': 'NW_003315917.2', - 'HSCHR5_3_CTG1_1': 'NW_003315918.1', - 'HSCHR5_4_CTG1_1': 'NT_187549.1', - 'HSCHR5_1_CTG5': 'NW_003315919.1', - 'HSCHR5_2_CTG5': 'NT_187546.1', - 'HSCHR6_MHC_APD_CTG1': 'NT_167244.2', - 'HSCHR6_1_CTG7': 'NT_187555.1', - 'HSCHR6_1_CTG6': 'NT_187554.1', - 'HSCHR6_1_CTG2': 'NW_003315921.1', - 'HSCHR6_1_CTG8': 'NT_187556.1', - 'HSCHR6_1_CTG9': 'NT_187557.1', - 'HSCHR6_1_CTG3': 'NW_004166862.2', - 'HSCHR6_1_CTG4': 'NT_187552.1', - 'HSCHR6_1_CTG5': 'NT_187553.1', - 'HSCHR7_1_CTG1': 'NT_187558.1', - 'HSCHR7_2_CTG4_4': 'NT_187561.1', - 'HSCHR7_1_CTG4_4': 'NT_187559.1', - 'HSCHR7_1_CTG6': 'NW_003315922.2', - 'HSCHR7_2_CTG6': 'NT_187562.1', - 'HSCHR7_3_CTG6': 'NT_187564.1', - 'HSCHR7_2_CTG7': 'NT_187563.1', - 'HSCHR7_1_CTG7': 'NT_187560.1', - 'HSCHR8_4_CTG1': 'NT_187572.1', - 'HSCHR8_2_CTG1': 'NT_187568.1', - 'HSCHR8_1_CTG1': 'NT_187565.1', - 'HSCHR8_8_CTG1': 'NT_187576.1', - 'HSCHR8_3_CTG1': 'NT_187570.1', - 'HSCHR8_9_CTG1': 'NT_187577.1', - 'HSCHR8_1_CTG6': 'NT_187566.1', - 'HSCHR8_1_CTG7': 'NT_187567.1', - 'HSCHR8_5_CTG7': 'NT_187574.1', - 'HSCHR8_6_CTG7': 'NT_187575.1', - 'HSCHR8_4_CTG7': 'NT_187573.1', - 'HSCHR8_3_CTG7': 'NT_187571.1', - 'HSCHR8_2_CTG7': 'NT_187569.1', - 'HSCHR9_1_CTG1': 'NW_003315928.1', - 'HSCHR9_1_CTG2': 'NW_003315929.1', - 'HSCHR9_1_CTG3': 'NW_003315930.1', - 'HSCHR9_1_CTG4': 'NW_003315931.1', - 'HSCHR9_1_CTG5': 'NT_187578.1', - 'HSCHR10_1_CTG1': 'NW_003315934.1', - 'HSCHR10_1_CTG3': 'NT_187579.1', - 'HSCHR10_1_CTG2': 'NW_003315935.1', - 'HSCHR10_1_CTG4': 'NT_187580.1', - 'HSCHR11_1_CTG8': 'NT_187586.1', - 'HSCHR11_1_CTG6': 'NT_187584.1', - 'HSCHR11_1_CTG7': 'NT_187585.1', - 'HSCHR11_1_CTG5': 'NT_187583.1', - 'HSCHR11_1_CTG1_1': 'NW_003315936.1', - 'HG142_HG150_NOVEL_TEST': 'NW_003871073.1', - 'HG151_NOVEL_TEST': 'NW_003871074.1', - 'HSCHR11_1_CTG3': 'NT_187582.1', - 'HSCHR11_1_CTG2': 'NT_187581.1', - 'HSCHR12_1_CTG1': 'NW_003571049.1', - 'HSCHR12_2_CTG2': 'NW_003571050.1', - 'HSCHR12_5_CTG2': 'NT_187588.1', - 'HSCHR12_1_CTG2': 'NW_003315938.1', - 'HSCHR12_4_CTG2': 'NT_187587.1', - 'HSCHR12_1_CTG2_1': 'NW_003315939.2', - 'HSCHR12_2_CTG2_1': 'NW_003315941.1', - 'HSCHR12_3_CTG2_1': 'NW_003315942.2', - 'HSCHR12_6_CTG2_1': 'NT_187590.1', - 'HSCHR12_4_CTG2_1': 'NW_003315940.1', - 'HSCHR12_5_CTG2_1': 'NT_187589.1', - 'HSCHR12_7_CTG2_1': 'NT_187591.1', - 'HSCHR13_1_CTG3': 'NT_187594.1', - 'HSCHR13_1_CTG2': 'NT_187593.1', - 'HSCHR13_1_CTG6': 'NT_187597.1', - 'HSCHR13_1_CTG4': 'NT_187595.1', - 'HSCHR13_1_CTG1': 'NT_187592.1', - 'HSCHR13_1_CTG5': 'NT_187596.1', - 'HSCHR14_1_CTG1': 'NT_187598.1', - 'HSCHR14_7_CTG1': 'NT_187601.1', - 'HSCHR14_2_CTG1': 'NT_187599.1', - 'HSCHR14_3_CTG1': 'NT_187600.1', - 'HSCHR15_1_CTG1': 'NT_187602.1', - 'HSCHR15_3_CTG3': 'NT_187604.1', - 'HSCHR15_1_CTG3': 'NT_187603.1', - 'HSCHR15_1_CTG8': 'NW_003315943.1', - 'HSCHR15_3_CTG8': 'NT_187605.1', - 'HSCHR15_2_CTG8': 'NW_003315944.2', - 'HSCHR15_5_CTG8': 'NT_187606.1', - 'HSCHR16_CTG2': 'NT_187610.1', - 'HSCHR16_4_CTG1': 'NT_187609.1', - 'HSCHR16_3_CTG1': 'NT_187608.1', - 'HSCHR16_1_CTG1': 'NT_187607.1', - 'HSCHR16_1_CTG3_1': 'NW_003315945.1', - 'HSCHR16_2_CTG3_1': 'NW_003315946.1', - 'HSCHR17_1_CTG1': 'NW_003315952.3', - 'HSCHR17_2_CTG2': 'NT_187613.1', - 'HSCHR17_1_CTG2': 'NT_187611.1', - 'HSCHR17_7_CTG4': 'NT_187614.1', - 'HSCHR17_4_CTG4': 'NW_003871091.1', - 'HSCHR17_5_CTG4': 'NW_003871092.1', - 'HSCHR17_1_CTG4': 'NW_003315953.2', - 'HSCHR17_1_CTG5': 'NT_167251.2', - 'HSCHR17_2_CTG4': 'NW_003315954.1', - 'HSCHR17_8_CTG4': 'NT_187615.1', - 'HSCHR17_9_CTG4': 'NT_187616.1', - 'HSCHR17_3_CTG4': 'NW_003315955.1', - 'HSCHR17_1_CTG9': 'NT_187612.1', - 'HSCHR18_4_CTG1_1': 'NT_187618.1', - 'HSCHR18_1_CTG1_1': 'NW_003315956.1', - 'HSCHR18_2_CTG1_1': 'NW_003315959.1', - 'HSCHR18_2_CTG2': 'NW_003315960.1', - 'HSCHR18_1_CTG2': 'NW_003315957.1', - 'HSCHR18_1_CTG2_1': 'NW_003315958.1', - 'HSCHR18_2_CTG2_1': 'NW_003315961.1', - 'HSCHR18_3_CTG2_1': 'NT_187617.1', - 'HSCHR19_5_CTG2': 'NT_187622.1', - 'HSCHR19_4_CTG2': 'NT_187621.1', - 'HSCHR19_1_CTG2': 'NW_003315962.1', - 'HSCHR19_2_CTG2': 'NW_003315964.2', - 'HSCHR19_3_CTG2': 'NW_003315965.1', - 'HSCHR19_1_CTG3_1': 'NW_003315963.1', - 'HSCHR19_2_CTG3_1': 'NT_187619.1', - 'HSCHR19_3_CTG3_1': 'NT_187620.1', - 'HSCHR19LRC_COX1_CTG3_1': 'NW_003571054.1', - 'HSCHR20_1_CTG1': 'NW_003315966.2', - 'HSCHR20_1_CTG2': 'NT_187623.1', - 'HSCHR20_1_CTG4': 'NT_187625.1', - 'HSCHR20_1_CTG3': 'NT_187624.1', - 'HSCHR21_1_CTG1_1': 'NW_003315967.2', - 'HSCHR21_8_CTG1_1': 'NT_187628.1', - 'HSCHR21_6_CTG1_1': 'NT_187627.1', - 'HSCHR21_2_CTG1_1': 'NW_003315968.2', - 'HSCHR21_3_CTG1_1': 'NW_003315969.2', - 'HSCHR21_4_CTG1_1': 'NW_003315970.2', - 'HSCHR21_5_CTG2': 'NT_187626.1', - 'HSCHR22_1_CTG3': 'NT_187629.1', - 'HSCHR22_1_CTG6': 'NT_187632.1', - 'HSCHR22_1_CTG7': 'NT_187633.1', - 'HSCHR22_1_CTG4': 'NT_187630.1', - 'HSCHR22_1_CTG5': 'NT_187631.1', - 'HSCHR22_1_CTG2': 'NW_003315972.2', - 'HSCHR22_1_CTG1': 'NW_003315971.2', - 'HSCHRX_1_CTG3': 'NT_187634.1', - 'HSCHRX_2_CTG12': 'NT_187635.1', - 'HSCHR1_ALT2_1_CTG32_1': 'NT_187646.1', - 'HSCHR2_2_CTG7': 'NT_187648.1', - 'HSCHR2_2_CTG15': 'NT_187647.1', - 'HSCHR3_3_CTG3': 'NT_187649.1', - 'HSCHR4_6_CTG12': 'NT_187650.1', - 'HSCHR5_1_CTG1_1': 'NT_187651.1', - 'HSCHR5_3_CTG5': 'NT_187652.1', - 'HSCHR6_MHC_COX_CTG1': 'NT_113891.3', - 'HSCHR7_2_CTG1': 'NT_187653.1', - 'HSCHR8_6_CTG1': 'NT_187655.1', - 'HSCHR8_5_CTG1': 'NT_187654.1', - 'HSCHR11_2_CTG1': 'NT_187656.1', - 'HSCHR11_2_CTG1_1': 'NT_187657.1', - 'HSCHR12_3_CTG2': 'NT_187658.1', - 'HSCHR15_2_CTG3': 'NT_187659.1', - 'HSCHR15_4_CTG8': 'NT_187660.1', - 'HSCHR17_2_CTG1': 'NT_187662.1', - 'HSCHR17_3_CTG2': 'NT_187664.1', - 'HSCHR17_10_CTG4': 'NT_187661.1', - 'HSCHR17_6_CTG4': 'NW_003871093.1', - 'HSCHR17_2_CTG5': 'NT_187663.1', - 'HSCHR18_ALT21_CTG2_1': 'NT_187665.1', - 'HSCHR18_ALT2_CTG2_1': 'NT_187666.1', - 'HSCHR19LRC_COX2_CTG3_1': 'NW_003571055.2', - 'HSCHR22_2_CTG1': 'NW_004504305.1', - 'HSCHRX_2_CTG3': 'NT_187667.1', - 'HSCHR3_4_CTG3': 'NT_187678.1', - 'HSCHR4_7_CTG12': 'NT_187679.1', - 'HSCHR6_MHC_DBB_CTG1': 'NT_167245.2', - 'HSCHR8_7_CTG1': 'NT_187680.1', - 'HSCHR11_3_CTG1': 'NT_187681.1', - 'HSCHR19LRC_LRC_I_CTG3_1': 'NW_003571056.2', - 'HSCHR22_3_CTG1': 'NT_187682.1', - 'HSCHR3_5_CTG3': 'NT_187688.1', - 'HSCHR6_MHC_MANN_CTG1': 'NT_167246.2', - 'HSCHR19LRC_LRC_J_CTG3_1': 'NW_003571057.2', - 'HSCHR3_6_CTG3': 'NT_187689.1', - 'HSCHR6_MHC_MCF_CTG1': 'NT_167247.2', - 'HSCHR19LRC_LRC_S_CTG3_1': 'NW_003571058.2', - 'HSCHR3_7_CTG3': 'NT_187690.1', - 'HSCHR6_MHC_QBL_CTG1': 'NT_167248.2', - 'HSCHR19LRC_LRC_T_CTG3_1': 'NW_003571059.2', - 'HSCHR3_8_CTG3': 'NT_187691.1', - 'HSCHR6_MHC_SSTO_CTG1': 'NT_167249.2', - 'HSCHR19LRC_PGF1_CTG3_1': 'NW_003571060.1', - 'HSCHR6_8_CTG1': 'NT_187692.1', - 'HSCHR19LRC_PGF2_CTG3_1': 'NW_003571061.2', - 'HSCHR19_4_CTG3_1': 'NT_187693.1', - 'HSCHR19KIR_FH15_B_HAP_CTG3_1': 'NT_187636.1', - 'HSCHR19KIR_G085_A_HAP_CTG3_1': 'NT_187637.1', - 'HSCHR19KIR_G085_BA1_HAP_CTG3_1': 'NT_187638.1', - 'HSCHR19KIR_G248_A_HAP_CTG3_1': 'NT_187639.1', - 'HSCHR19KIR_G248_BA2_HAP_CTG3_1': 'NT_187640.1', - 'HSCHR19KIR_GRC212_AB_HAP_CTG3_1': 'NT_187641.1', - 'HSCHR19KIR_GRC212_BA1_HAP_CTG3_1': 'NT_187642.1', - 'HSCHR19KIR_LUCE_A_HAP_CTG3_1': 'NT_187643.1', - 'HSCHR19KIR_LUCE_BDEL_HAP_CTG3_1': 'NT_187644.1', - 'HSCHR19KIR_RSH_A_HAP_CTG3_1': 'NT_187645.1', - 'HSCHR19KIR_RSH_BA2_HAP_CTG3_1': 'NT_187668.1', - 'HSCHR19KIR_T7526_A_HAP_CTG3_1': 'NT_187669.1', - 'HSCHR19KIR_T7526_BDEL_HAP_CTG3_1': 'NT_187670.1', - 'HSCHR19KIR_ABC08_A1_HAP_CTG3_1': 'NT_187671.1', - 'HSCHR19KIR_ABC08_AB_HAP_C_P_CTG3_1': 'NT_187672.1', - 'HSCHR19KIR_ABC08_AB_HAP_T_P_CTG3_1': 'NT_187673.1', - 'HSCHR19KIR_FH05_A_HAP_CTG3_1': 'NT_187674.1', - 'HSCHR19KIR_FH05_B_HAP_CTG3_1': 'NT_187675.1', - 'HSCHR19KIR_FH06_A_HAP_CTG3_1': 'NT_187676.1', - 'HSCHR19KIR_FH06_BA1_HAP_CTG3_1': 'NT_187677.1', - 'HSCHR19KIR_FH08_A_HAP_CTG3_1': 'NT_187683.1', - 'HSCHR19KIR_FH08_BAX_HAP_CTG3_1': 'NT_187684.1', - 'HSCHR19KIR_FH13_A_HAP_CTG3_1': 'NT_187685.1', - 'HSCHR19KIR_FH13_BA2_HAP_CTG3_1': 'NT_187686.1', - 'HSCHR19KIR_FH15_A_HAP_CTG3_1': 'NT_187687.1', - 'HSCHR19KIR_RP5_B_HAP_CTG3_1': 'NT_113949.2', - 'HSCHR22_CTG1_3': 'NT_167235.1' - } - # Convert call line to rs line - chr_num = chr_num.upper() - if re.match('CHR', chr_num): - chr_num = chr_num[3:] - if primary_assembly == 'GRCh37': - chr_accession = GRCh37.get(chr_num) - if primary_assembly == 'GRCh38' or primary_assembly == 'hg38': - chr_accession = GRCh38.get(chr_num) - if primary_assembly == 'hg19': - chr_accession = hg19.get(chr_num) - return chr_accession - -""" -Mark for removal at testing -""" -# def to_chr_num(accession): -# # Available genome builds - Primary assembly only, Otherwise leave the RefSeq accession in place -# chr_num_convert = { -# "NC_000001": "1", -# "NC_000002": "2", -# "NC_000003": "3", -# "NC_000004": "4", -# "NC_000005": "5", -# "NC_000006": "6", -# "NC_000007": "7", -# "NC_000008": "8", -# "NC_000009": "9", -# "NC_000010": "10", -# "NC_000011": "11", -# "NC_000012": "12", -# "NC_000013": "13", -# "NC_000014": "14", -# "NC_000015": "15", -# "NC_000016": "16", -# "NC_000017": "17", -# "NC_000018": "18", -# "NC_000019": "19", -# "NC_000020": "20", -# "NC_000021": "21", -# "NC_000022": "22", -# "NC_000023": "X", -# "NC_000024": "Y" -# } -# accession = accession.split('.')[0] -# chr_num = chr_num_convert.get(accession) -# return chr_num - - -""" -Simple dictionary lookup function that takes the RefSeq chromosome identifier and returns the -UCSC genome build formatted VCF identifier. - -Note, UCSC and GenBank have different aliases for the ALT and Patch identifiers -""" - - -def to_chr_num_ucsc(accession, primary_assembly): - # Available genome builds - chr_num_convert_37 = { - "NC_000001.10": "chr1", - "NC_000002.11": "chr2", - "NC_000003.11": "chr3", - "NC_000004.11": "chr4", - "NC_000005.9": "chr5", - "NC_000006.11": "chr6", - "NC_000007.13": "chr7", - "NC_000008.10": "chr8", - "NC_000009.11": "chr9", - "NC_000010.10": "chr10", - "NC_000011.9": "chr11", - "NC_000012.11": "chr12", - "NC_000013.10": "chr13", - "NC_000014.8": "chr14", - "NC_000015.9": "chr15", - "NC_000016.9": "chr16", - "NC_000017.10": "chr17", - "NC_000018.9": "chr18", - "NC_000019.9": "chr19", - "NC_000020.10": "chr20", - "NC_000021.8": "chr21", - "NC_000022.10": "chr22", - "NC_000023.10": "chrX", - "NC_000024.9": "chrY", - "NC_012920.1": "chrM", # Cambridge revised mitochondrial - "NC_001807.1": "chrM", # hg19 mitochondrial - - # UCSC hg19 ALTS - "NT_113921.2": "chr11_gl000202_random", - "NT_167251.1": "chr17_ctg5_hap1", - "NT_113941.1": "chr17_gl000203_random", - "NT_113943.1": "chr17_gl000204_random", - "NT_113930.1": "chr17_gl000205_random", - "NT_113945.1": "chr17_gl000206_random", - "NT_113947.1": "chr18_gl000207_random", - "NT_113948.1": "chr19_gl000208_random", - "NT_113949.1": "chr19_gl000209_random", - "NT_113878.1": "chr1_gl000191_random", - "NT_167207.1": "chr1_gl000192_random", - "NT_113950.2": "chr21_gl000210_random", - "NT_167250.1": "chr4_ctg9_hap1", - "NT_113885.1": "chr4_gl000193_random", - "NT_113888.1": "chr4_gl000194_random", - "NT_167244.1": "chr6_apd_hap1", - "NT_113891.2": "chr6_cox_hap2", - "NT_167245.1": "chr6_dbb_hap3", - "NT_167246.1": "chr6_mann_hap4", - "NT_167247.1": "chr6_mcf_hap5", - "NT_167248.1": "chr6_qbl_hap6", - "NT_167249.1": "chr6_ssto_hap7", - "NT_113901.1": "chr7_gl000195_random", - "NT_113909.1": "chr8_gl000196_random", - "NT_113907.1": "chr8_gl000197_random", - "NT_113914.1": "chr9_gl000198_random", - "NT_113916.2": "chr9_gl000199_random", - "NT_113915.1": "chr9_gl000200_random", - "NT_113911.1": "chr9_gl000201_random", - "NT_113961.1": "chrUn_gl000211", - "NT_113923.1": "chrUn_gl000212", - "NT_167208.1": "chrUn_gl000213", - "NT_167209.1": "chrUn_gl000214", - "NT_167210.1": "chrUn_gl000215", - "NT_167211.1": "chrUn_gl000216", - "NT_167212.1": "chrUn_gl000217", - "NT_113889.1": "chrUn_gl000218", - "NT_167213.1": "chrUn_gl000219", - "NT_167214.1": "chrUn_gl000220", - "NT_167215.1": "chrUn_gl000221", - "NT_167216.1": "chrUn_gl000222", - "NT_167217.1": "chrUn_gl000223", - "NT_167218.1": "chrUn_gl000224", - "NT_167219.1": "chrUn_gl000225", - "NT_167220.1": "chrUn_gl000226", - "NT_167221.1": "chrUn_gl000227", - "NT_167222.1": "chrUn_gl000228", - "NT_167223.1": "chrUn_gl000229", - "NT_167224.1": "chrUn_gl000230", - "NT_167225.1": "chrUn_gl000231", - "NT_167226.1": "chrUn_gl000232", - "NT_167227.1": "chrUn_gl000233", - "NT_167228.1": "chrUn_gl000234", - "NT_167229.1": "chrUn_gl000235", - "NT_167230.1": "chrUn_gl000236", - "NT_167231.1": "chrUn_gl000237", - "NT_167232.1": "chrUn_gl000238", - "NT_167233.1": "chrUn_gl000239", - "NT_167234.1": "chrUn_gl000240", - "NT_167235.1": "chrUn_gl000241", - "NT_167236.1": "chrUn_gl000242", - "NT_167237.1": "chrUn_gl000243", - "NT_167238.1": "chrUn_gl000244", - "NT_167239.1": "chrUn_gl000245", - "NT_167240.1": "chrUn_gl000246", - "NT_167241.1": "chrUn_gl000247", - "NT_167242.1": "chrUn_gl000248", - "NT_167243.1": "chrUn_gl000249" - } - - chr_num_convert_38 = { - "NC_000001.11": "chr1", - "NC_000002.12": "chr2", - "NC_000003.12": "chr3", - "NC_000004.12": "chr4", - "NC_000005.10": "chr5", - "NC_000006.12": "chr6", - "NC_000007.14": "chr7", - "NC_000008.11": "chr8", - "NC_000009.12": "chr9", - "NC_000010.11": "chr10", - "NC_000011.10": "chr11", - "NC_000012.12": "chr12", - "NC_000013.11": "chr13", - "NC_000014.9": "chr14", - "NC_000015.10": "chr15", - "NC_000016.10": "chr16", - "NC_000017.11": "chr17", - "NC_000018.10": "chr18", - "NC_000019.10": "chr19", - "NC_000020.11": "chr20", - "NC_000021.9": "chr21", - "NC_000022.11": "chr22", - "NC_000023.11": "chrX", - "NC_000024.10": "chrY", - "NC_012920.1": "chrM", - - # UCSC hg38 Alts - "NW_003315934.1": "chr10_GL383545v1_alt", - "NW_003315935.1": "chr10_GL383546v1_alt", - "NT_187579.1": "chr10_KI270824v1_alt", - "NT_187580.1": "chr10_KI270825v1_alt", - "NW_003315936.1": "chr11_GL383547v1_alt", - "NW_003871073.1": "chr11_JH159136v1_alt", - "NW_003871074.1": "chr11_JH159137v1_alt", - "NT_187376.1": "chr11_KI270721v1_random", - "NT_187581.1": "chr11_KI270826v1_alt", - "NT_187582.1": "chr11_KI270827v1_alt", - "NT_187583.1": "chr11_KI270829v1_alt", - "NT_187584.1": "chr11_KI270830v1_alt", - "NT_187585.1": "chr11_KI270831v1_alt", - "NT_187586.1": "chr11_KI270832v1_alt", - "NT_187656.1": "chr11_KI270902v1_alt", - "NT_187657.1": "chr11_KI270903v1_alt", - "NT_187681.1": "chr11_KI270927v1_alt", - "NW_003315938.1": "chr12_GL383549v1_alt", - "NW_003315939.2": "chr12_GL383550v2_alt", - "NW_003315940.1": "chr12_GL383551v1_alt", - "NW_003315941.1": "chr12_GL383552v1_alt", - "NW_003315942.2": "chr12_GL383553v2_alt", - "NW_003571049.1": "chr12_GL877875v1_alt", - "NW_003571050.1": "chr12_GL877876v1_alt", - "NT_187589.1": "chr12_KI270833v1_alt", - "NT_187590.1": "chr12_KI270834v1_alt", - "NT_187587.1": "chr12_KI270835v1_alt", - "NT_187591.1": "chr12_KI270836v1_alt", - "NT_187588.1": "chr12_KI270837v1_alt", - "NT_187658.1": "chr12_KI270904v1_alt", - "NT_187592.1": "chr13_KI270838v1_alt", - "NT_187593.1": "chr13_KI270839v1_alt", - "NT_187594.1": "chr13_KI270840v1_alt", - "NT_187595.1": "chr13_KI270841v1_alt", - "NT_187596.1": "chr13_KI270842v1_alt", - "NT_187597.1": "chr13_KI270843v1_alt", - "NT_113796.3": "chr14_GL000009v2_random", - "NT_113888.1": "chr14_GL000194v1_random", - "NT_167219.1": "chr14_GL000225v1_random", - "NT_187377.1": "chr14_KI270722v1_random", - "NT_187378.1": "chr14_KI270723v1_random", - "NT_187379.1": "chr14_KI270724v1_random", - "NT_187380.1": "chr14_KI270725v1_random", - "NT_187381.1": "chr14_KI270726v1_random", - "NT_187598.1": "chr14_KI270844v1_alt", - "NT_187599.1": "chr14_KI270845v1_alt", - "NT_187600.1": "chr14_KI270846v1_alt", - "NT_187601.1": "chr14_KI270847v1_alt", - "NW_003315943.1": "chr15_GL383554v1_alt", - "NW_003315944.2": "chr15_GL383555v2_alt", - "NT_187382.1": "chr15_KI270727v1_random", - "NT_187603.1": "chr15_KI270848v1_alt", - "NT_187605.1": "chr15_KI270849v1_alt", - "NT_187606.1": "chr15_KI270850v1_alt", - "NT_187604.1": "chr15_KI270851v1_alt", - "NT_187602.1": "chr15_KI270852v1_alt", - "NT_187660.1": "chr15_KI270905v1_alt", - "NT_187659.1": "chr15_KI270906v1_alt", - "NW_003315945.1": "chr16_GL383556v1_alt", - "NW_003315946.1": "chr16_GL383557v1_alt", - "NT_187383.1": "chr16_KI270728v1_random", - "NT_187607.1": "chr16_KI270853v1_alt", - "NT_187610.1": "chr16_KI270854v1_alt", - "NT_187608.1": "chr16_KI270855v1_alt", - "NT_187609.1": "chr16_KI270856v1_alt", - "NT_113930.2": "chr17_GL000205v2_random", - "NT_167251.2": "chr17_GL000258v2_alt", - "NW_003315952.3": "chr17_GL383563v3_alt", - "NW_003315953.2": "chr17_GL383564v2_alt", - "NW_003315954.1": "chr17_GL383565v1_alt", - "NW_003315955.1": "chr17_GL383566v1_alt", - "NW_003871091.1": "chr17_JH159146v1_alt", - "NW_003871092.1": "chr17_JH159147v1_alt", - "NW_003871093.1": "chr17_JH159148v1_alt", - "NT_187384.1": "chr17_KI270729v1_random", - "NT_187385.1": "chr17_KI270730v1_random", - "NT_187614.1": "chr17_KI270857v1_alt", - "NT_187615.1": "chr17_KI270858v1_alt", - "NT_187616.1": "chr17_KI270859v1_alt", - "NT_187612.1": "chr17_KI270860v1_alt", - "NT_187611.1": "chr17_KI270861v1_alt", - "NT_187613.1": "chr17_KI270862v1_alt", - "NT_187662.1": "chr17_KI270907v1_alt", - "NT_187663.1": "chr17_KI270908v1_alt", - "NT_187661.1": "chr17_KI270909v1_alt", - "NT_187664.1": "chr17_KI270910v1_alt", - "NW_003315956.1": "chr18_GL383567v1_alt", - "NW_003315957.1": "chr18_GL383568v1_alt", - "NW_003315958.1": "chr18_GL383569v1_alt", - "NW_003315959.1": "chr18_GL383570v1_alt", - "NW_003315960.1": "chr18_GL383571v1_alt", - "NW_003315961.1": "chr18_GL383572v1_alt", - "NT_187617.1": "chr18_KI270863v1_alt", - "NT_187618.1": "chr18_KI270864v1_alt", - "NT_187666.1": "chr18_KI270911v1_alt", - "NT_187665.1": "chr18_KI270912v1_alt", - "NT_113949.2": "chr19_GL000209v2_alt", - "NW_003315962.1": "chr19_GL383573v1_alt", - "NW_003315963.1": "chr19_GL383574v1_alt", - "NW_003315964.2": "chr19_GL383575v2_alt", - "NW_003315965.1": "chr19_GL383576v1_alt", - "NW_003571054.1": "chr19_GL949746v1_alt", - "NW_003571055.2": "chr19_GL949747v2_alt", - "NW_003571056.2": "chr19_GL949748v2_alt", - "NW_003571057.2": "chr19_GL949749v2_alt", - "NW_003571058.2": "chr19_GL949750v2_alt", - "NW_003571059.2": "chr19_GL949751v2_alt", - "NW_003571060.1": "chr19_GL949752v1_alt", - "NW_003571061.2": "chr19_GL949753v2_alt", - "NT_187621.1": "chr19_KI270865v1_alt", - "NT_187619.1": "chr19_KI270866v1_alt", - "NT_187620.1": "chr19_KI270867v1_alt", - "NT_187622.1": "chr19_KI270868v1_alt", - "NT_187636.1": "chr19_KI270882v1_alt", - "NT_187637.1": "chr19_KI270883v1_alt", - "NT_187638.1": "chr19_KI270884v1_alt", - "NT_187639.1": "chr19_KI270885v1_alt", - "NT_187640.1": "chr19_KI270886v1_alt", - "NT_187641.1": "chr19_KI270887v1_alt", - "NT_187642.1": "chr19_KI270888v1_alt", - "NT_187643.1": "chr19_KI270889v1_alt", - "NT_187644.1": "chr19_KI270890v1_alt", - "NT_187645.1": "chr19_KI270891v1_alt", - "NT_187668.1": "chr19_KI270914v1_alt", - "NT_187669.1": "chr19_KI270915v1_alt", - "NT_187670.1": "chr19_KI270916v1_alt", - "NT_187671.1": "chr19_KI270917v1_alt", - "NT_187672.1": "chr19_KI270918v1_alt", - "NT_187673.1": "chr19_KI270919v1_alt", - "NT_187674.1": "chr19_KI270920v1_alt", - "NT_187675.1": "chr19_KI270921v1_alt", - "NT_187676.1": "chr19_KI270922v1_alt", - "NT_187677.1": "chr19_KI270923v1_alt", - "NT_187683.1": "chr19_KI270929v1_alt", - "NT_187684.1": "chr19_KI270930v1_alt", - "NT_187685.1": "chr19_KI270931v1_alt", - "NT_187686.1": "chr19_KI270932v1_alt", - "NT_187687.1": "chr19_KI270933v1_alt", - "NT_187693.1": "chr19_KI270938v1_alt", - "NW_003315905.1": "chr1_GL383518v1_alt", - "NW_003315906.1": "chr1_GL383519v1_alt", - "NW_003315907.2": "chr1_GL383520v2_alt", - "NT_187361.1": "chr1_KI270706v1_random", - "NT_187362.1": "chr1_KI270707v1_random", - "NT_187363.1": "chr1_KI270708v1_random", - "NT_187364.1": "chr1_KI270709v1_random", - "NT_187365.1": "chr1_KI270710v1_random", - "NT_187366.1": "chr1_KI270711v1_random", - "NT_187367.1": "chr1_KI270712v1_random", - "NT_187368.1": "chr1_KI270713v1_random", - "NT_187369.1": "chr1_KI270714v1_random", - "NT_187516.1": "chr1_KI270759v1_alt", - "NT_187514.1": "chr1_KI270760v1_alt", - "NT_187518.1": "chr1_KI270761v1_alt", - "NT_187515.1": "chr1_KI270762v1_alt", - "NT_187519.1": "chr1_KI270763v1_alt", - "NT_187521.1": "chr1_KI270764v1_alt", - "NT_187520.1": "chr1_KI270765v1_alt", - "NT_187517.1": "chr1_KI270766v1_alt", - "NT_187646.1": "chr1_KI270892v1_alt", - "NW_003315966.2": "chr20_GL383577v2_alt", - "NT_187623.1": "chr20_KI270869v1_alt", - "NT_187624.1": "chr20_KI270870v1_alt", - "NT_187625.1": "chr20_KI270871v1_alt", - "NW_003315967.2": "chr21_GL383578v2_alt", - "NW_003315968.2": "chr21_GL383579v2_alt", - "NW_003315969.2": "chr21_GL383580v2_alt", - "NW_003315970.2": "chr21_GL383581v2_alt", - "NT_187626.1": "chr21_KI270872v1_alt", - "NT_187627.1": "chr21_KI270873v1_alt", - "NT_187628.1": "chr21_KI270874v1_alt", - "NW_003315971.2": "chr22_GL383582v2_alt", - "NW_003315972.2": "chr22_GL383583v2_alt", - "NW_004504305.1": "chr22_KB663609v1_alt", - "NT_187386.1": "chr22_KI270731v1_random", - "NT_187387.1": "chr22_KI270732v1_random", - "NT_187388.1": "chr22_KI270733v1_random", - "NT_187389.1": "chr22_KI270734v1_random", - "NT_187390.1": "chr22_KI270735v1_random", - "NT_187391.1": "chr22_KI270736v1_random", - "NT_187392.1": "chr22_KI270737v1_random", - "NT_187393.1": "chr22_KI270738v1_random", - "NT_187394.1": "chr22_KI270739v1_random", - "NT_187629.1": "chr22_KI270875v1_alt", - "NT_187630.1": "chr22_KI270876v1_alt", - "NT_187631.1": "chr22_KI270877v1_alt", - "NT_187632.1": "chr22_KI270878v1_alt", - "NT_187633.1": "chr22_KI270879v1_alt", - "NT_187682.1": "chr22_KI270928v1_alt", - "NW_003315908.1": "chr2_GL383521v1_alt", - "NW_003315909.1": "chr2_GL383522v1_alt", - "NW_003571033.2": "chr2_GL582966v2_alt", - "NT_187370.1": "chr2_KI270715v1_random", - "NT_187371.1": "chr2_KI270716v1_random", - "NT_187523.1": "chr2_KI270767v1_alt", - "NT_187528.1": "chr2_KI270768v1_alt", - "NT_187522.1": "chr2_KI270769v1_alt", - "NT_187525.1": "chr2_KI270770v1_alt", - "NT_187530.1": "chr2_KI270771v1_alt", - "NT_187524.1": "chr2_KI270772v1_alt", - "NT_187526.1": "chr2_KI270773v1_alt", - "NT_187529.1": "chr2_KI270774v1_alt", - "NT_187531.1": "chr2_KI270775v1_alt", - "NT_187527.1": "chr2_KI270776v1_alt", - "NT_187647.1": "chr2_KI270893v1_alt", - "NT_187648.1": "chr2_KI270894v1_alt", - "NT_167215.1": "chr3_GL000221v1_random", - "NW_003315913.1": "chr3_GL383526v1_alt", - "NW_003871060.2": "chr3_JH636055v2_alt", - "NT_187533.1": "chr3_KI270777v1_alt", - "NT_187536.1": "chr3_KI270778v1_alt", - "NT_187532.1": "chr3_KI270779v1_alt", - "NT_187537.1": "chr3_KI270780v1_alt", - "NT_187538.1": "chr3_KI270781v1_alt", - "NT_187534.1": "chr3_KI270782v1_alt", - "NT_187535.1": "chr3_KI270783v1_alt", - "NT_187539.1": "chr3_KI270784v1_alt", - "NT_187649.1": "chr3_KI270895v1_alt", - "NT_187678.1": "chr3_KI270924v1_alt", - "NT_187688.1": "chr3_KI270934v1_alt", - "NT_187689.1": "chr3_KI270935v1_alt", - "NT_187690.1": "chr3_KI270936v1_alt", - "NT_187691.1": "chr3_KI270937v1_alt", - "NT_113793.3": "chr4_GL000008v2_random", - "NT_167250.2": "chr4_GL000257v2_alt", - "NW_003315914.1": "chr4_GL383527v1_alt", - "NW_003315915.1": "chr4_GL383528v1_alt", - "NT_187542.1": "chr4_KI270785v1_alt", - "NT_187543.1": "chr4_KI270786v1_alt", - "NT_187541.1": "chr4_KI270787v1_alt", - "NT_187544.1": "chr4_KI270788v1_alt", - "NT_187545.1": "chr4_KI270789v1_alt", - "NT_187540.1": "chr4_KI270790v1_alt", - "NT_187650.1": "chr4_KI270896v1_alt", - "NT_187679.1": "chr4_KI270925v1_alt", - "NT_113948.1": "chr5_GL000208v1_random", - "NW_003315917.2": "chr5_GL339449v2_alt", - "NW_003315918.1": "chr5_GL383530v1_alt", - "NW_003315919.1": "chr5_GL383531v1_alt", - "NW_003315920.1": "chr5_GL383532v1_alt", - "NW_003571036.1": "chr5_GL949742v1_alt", - "NT_187547.1": "chr5_KI270791v1_alt", - "NT_187548.1": "chr5_KI270792v1_alt", - "NT_187550.1": "chr5_KI270793v1_alt", - "NT_187551.1": "chr5_KI270794v1_alt", - "NT_187546.1": "chr5_KI270795v1_alt", - "NT_187549.1": "chr5_KI270796v1_alt", - "NT_187651.1": "chr5_KI270897v1_alt", - "NT_187652.1": "chr5_KI270898v1_alt", - "NT_167244.2": "chr6_GL000250v2_alt", - "NT_113891.3": "chr6_GL000251v2_alt", - "NT_167245.2": "chr6_GL000252v2_alt", - "NT_167246.2": "chr6_GL000253v2_alt", - "NT_167247.2": "chr6_GL000254v2_alt", - "NT_167248.2": "chr6_GL000255v2_alt", - "NT_167249.2": "chr6_GL000256v2_alt", - "NW_003315921.1": "chr6_GL383533v1_alt", - "NW_004166862.2": "chr6_KB021644v2_alt", - "NT_187692.1": "chr6_KI270758v1_alt", - "NT_187552.1": "chr6_KI270797v1_alt", - "NT_187553.1": "chr6_KI270798v1_alt", - "NT_187554.1": "chr6_KI270799v1_alt", - "NT_187555.1": "chr6_KI270800v1_alt", - "NT_187556.1": "chr6_KI270801v1_alt", - "NT_187557.1": "chr6_KI270802v1_alt", - "NW_003315922.2": "chr7_GL383534v2_alt", - "NT_187562.1": "chr7_KI270803v1_alt", - "NT_187558.1": "chr7_KI270804v1_alt", - "NT_187560.1": "chr7_KI270805v1_alt", - "NT_187559.1": "chr7_KI270806v1_alt", - "NT_187563.1": "chr7_KI270807v1_alt", - "NT_187564.1": "chr7_KI270808v1_alt", - "NT_187561.1": "chr7_KI270809v1_alt", - "NT_187653.1": "chr7_KI270899v1_alt", - "NT_187567.1": "chr8_KI270810v1_alt", - "NT_187565.1": "chr8_KI270811v1_alt", - "NT_187568.1": "chr8_KI270812v1_alt", - "NT_187570.1": "chr8_KI270813v1_alt", - "NT_187566.1": "chr8_KI270814v1_alt", - "NT_187569.1": "chr8_KI270815v1_alt", - "NT_187571.1": "chr8_KI270816v1_alt", - "NT_187573.1": "chr8_KI270817v1_alt", - "NT_187572.1": "chr8_KI270818v1_alt", - "NT_187574.1": "chr8_KI270819v1_alt", - "NT_187575.1": "chr8_KI270820v1_alt", - "NT_187576.1": "chr8_KI270821v1_alt", - "NT_187577.1": "chr8_KI270822v1_alt", - "NT_187654.1": "chr8_KI270900v1_alt", - "NT_187655.1": "chr8_KI270901v1_alt", - "NT_187680.1": "chr8_KI270926v1_alt", - "NW_003315928.1": "chr9_GL383539v1_alt", - "NW_003315929.1": "chr9_GL383540v1_alt", - "NW_003315930.1": "chr9_GL383541v1_alt", - "NW_003315931.1": "chr9_GL383542v1_alt", - "NT_187372.1": "chr9_KI270717v1_random", - "NT_187373.1": "chr9_KI270718v1_random", - "NT_187374.1": "chr9_KI270719v1_random", - "NT_187375.1": "chr9_KI270720v1_random", - "NT_187578.1": "chr9_KI270823v1_alt", - "NT_113901.1": "chrUn_GL000195v1", - "NT_167208.1": "chrUn_GL000213v1", - "NT_167209.1": "chrUn_GL000214v1", - "NT_167211.2": "chrUn_GL000216v2", - "NT_113889.1": "chrUn_GL000218v1", - "NT_167213.1": "chrUn_GL000219v1", - "NT_167214.1": "chrUn_GL000220v1", - "NT_167218.1": "chrUn_GL000224v1", - "NT_167220.1": "chrUn_GL000226v1", - "NT_187396.1": "chrUn_KI270302v1", - "NT_187398.1": "chrUn_KI270303v1", - "NT_187397.1": "chrUn_KI270304v1", - "NT_187399.1": "chrUn_KI270305v1", - "NT_187402.1": "chrUn_KI270310v1", - "NT_187406.1": "chrUn_KI270311v1", - "NT_187405.1": "chrUn_KI270312v1", - "NT_187404.1": "chrUn_KI270315v1", - "NT_187403.1": "chrUn_KI270316v1", - "NT_187407.1": "chrUn_KI270317v1", - "NT_187401.1": "chrUn_KI270320v1", - "NT_187400.1": "chrUn_KI270322v1", - "NT_187459.1": "chrUn_KI270329v1", - "NT_187458.1": "chrUn_KI270330v1", - "NT_187461.1": "chrUn_KI270333v1", - "NT_187460.1": "chrUn_KI270334v1", - "NT_187462.1": "chrUn_KI270335v1", - "NT_187465.1": "chrUn_KI270336v1", - "NT_187466.1": "chrUn_KI270337v1", - "NT_187463.1": "chrUn_KI270338v1", - "NT_187464.1": "chrUn_KI270340v1", - "NT_187469.1": "chrUn_KI270362v1", - "NT_187467.1": "chrUn_KI270363v1", - "NT_187468.1": "chrUn_KI270364v1", - "NT_187470.1": "chrUn_KI270366v1", - "NT_187494.1": "chrUn_KI270371v1", - "NT_187491.1": "chrUn_KI270372v1", - "NT_187492.1": "chrUn_KI270373v1", - "NT_187490.1": "chrUn_KI270374v1", - "NT_187493.1": "chrUn_KI270375v1", - "NT_187489.1": "chrUn_KI270376v1", - "NT_187471.1": "chrUn_KI270378v1", - "NT_187472.1": "chrUn_KI270379v1", - "NT_187486.1": "chrUn_KI270381v1", - "NT_187488.1": "chrUn_KI270382v1", - "NT_187482.1": "chrUn_KI270383v1", - "NT_187484.1": "chrUn_KI270384v1", - "NT_187487.1": "chrUn_KI270385v1", - "NT_187480.1": "chrUn_KI270386v1", - "NT_187475.1": "chrUn_KI270387v1", - "NT_187478.1": "chrUn_KI270388v1", - "NT_187473.1": "chrUn_KI270389v1", - "NT_187474.1": "chrUn_KI270390v1", - "NT_187481.1": "chrUn_KI270391v1", - "NT_187485.1": "chrUn_KI270392v1", - "NT_187483.1": "chrUn_KI270393v1", - "NT_187479.1": "chrUn_KI270394v1", - "NT_187476.1": "chrUn_KI270395v1", - "NT_187477.1": "chrUn_KI270396v1", - "NT_187409.1": "chrUn_KI270411v1", - "NT_187408.1": "chrUn_KI270412v1", - "NT_187410.1": "chrUn_KI270414v1", - "NT_187415.1": "chrUn_KI270417v1", - "NT_187412.1": "chrUn_KI270418v1", - "NT_187411.1": "chrUn_KI270419v1", - "NT_187413.1": "chrUn_KI270420v1", - "NT_187416.1": "chrUn_KI270422v1", - "NT_187417.1": "chrUn_KI270423v1", - "NT_187414.1": "chrUn_KI270424v1", - "NT_187418.1": "chrUn_KI270425v1", - "NT_187419.1": "chrUn_KI270429v1", - "NT_187424.1": "chrUn_KI270435v1", - "NT_187425.1": "chrUn_KI270438v1", - "NT_187420.1": "chrUn_KI270442v1", - "NT_187495.1": "chrUn_KI270448v1", - "NT_187422.1": "chrUn_KI270465v1", - "NT_187421.1": "chrUn_KI270466v1", - "NT_187423.1": "chrUn_KI270467v1", - "NT_187426.1": "chrUn_KI270468v1", - "NT_187437.1": "chrUn_KI270507v1", - "NT_187430.1": "chrUn_KI270508v1", - "NT_187428.1": "chrUn_KI270509v1", - "NT_187427.1": "chrUn_KI270510v1", - "NT_187435.1": "chrUn_KI270511v1", - "NT_187432.1": "chrUn_KI270512v1", - "NT_187436.1": "chrUn_KI270515v1", - "NT_187431.1": "chrUn_KI270516v1", - "NT_187438.1": "chrUn_KI270517v1", - "NT_187429.1": "chrUn_KI270518v1", - "NT_187433.1": "chrUn_KI270519v1", - "NT_187496.1": "chrUn_KI270521v1", - "NT_187434.1": "chrUn_KI270522v1", - "NT_187440.1": "chrUn_KI270528v1", - "NT_187439.1": "chrUn_KI270529v1", - "NT_187441.1": "chrUn_KI270530v1", - "NT_187443.1": "chrUn_KI270538v1", - "NT_187442.1": "chrUn_KI270539v1", - "NT_187444.1": "chrUn_KI270544v1", - "NT_187445.1": "chrUn_KI270548v1", - "NT_187450.1": "chrUn_KI270579v1", - "NT_187448.1": "chrUn_KI270580v1", - "NT_187449.1": "chrUn_KI270581v1", - "NT_187454.1": "chrUn_KI270582v1", - "NT_187446.1": "chrUn_KI270583v1", - "NT_187453.1": "chrUn_KI270584v1", - "NT_187447.1": "chrUn_KI270587v1", - "NT_187455.1": "chrUn_KI270588v1", - "NT_187451.1": "chrUn_KI270589v1", - "NT_187452.1": "chrUn_KI270590v1", - "NT_187457.1": "chrUn_KI270591v1", - "NT_187456.1": "chrUn_KI270593v1", - "NT_187497.1": "chrUn_KI270741v1", - "NT_187513.1": "chrUn_KI270742v1", - "NT_187498.1": "chrUn_KI270743v1", - "NT_187499.1": "chrUn_KI270744v1", - "NT_187500.1": "chrUn_KI270745v1", - "NT_187501.1": "chrUn_KI270746v1", - "NT_187502.1": "chrUn_KI270747v1", - "NT_187503.1": "chrUn_KI270748v1", - "NT_187504.1": "chrUn_KI270749v1", - "NT_187505.1": "chrUn_KI270750v1", - "NT_187506.1": "chrUn_KI270751v1", - "NT_187507.1": "chrUn_KI270752v1", - "NT_187508.1": "chrUn_KI270753v1", - "NT_187509.1": "chrUn_KI270754v1", - "NT_187510.1": "chrUn_KI270755v1", - "NT_187511.1": "chrUn_KI270756v1", - "NT_187512.1": "chrUn_KI270757v1", - "NT_187634.1": "chrX_KI270880v1_alt", - "NT_187635.1": "chrX_KI270881v1_alt", - "NT_187667.1": "chrX_KI270913v1_alt", - "NT_187395.1": "chrY_KI270740v1_random" - } - if primary_assembly == 'hg38': - chr_num = chr_num_convert_38.get(accession) - if primary_assembly == 'hg19': - chr_num = chr_num_convert_37.get(accession) - try: - return chr_num - except UnboundLocalError: - chr_num = None - return chr_num - - -""" -Simple dictionary lookup function that takes the RefSeq chromosome identifier and returns the -Genbank genome build formatted VCF identifier. - -Note, UCSC and GenBank have different aliases for the ALT and Patch identifiers -""" - - -def to_chr_num_refseq(accession, primary_assembly): - # Available genome builds - chr_num_convert_37 = { - "NC_000001.10": "1", - "NC_000002.11": "2", - "NC_000003.11": "3", - "NC_000004.11": "4", - "NC_000005.9": "5", - "NC_000006.11": "6", - "NC_000007.13": "7", - "NC_000008.10": "8", - "NC_000009.11": "9", - "NC_000010.10": "10", - "NC_000011.9": "11", - "NC_000012.11": "12", - "NC_000013.10": "13", - "NC_000014.8": "14", - "NC_000015.9": "15", - "NC_000016.9": "16", - "NC_000017.10": "17", - "NC_000018.9": "18", - "NC_000019.9": "19", - "NC_000020.10": "20", - "NC_000021.8": "21", - "NC_000022.10": "22", - "NC_000023.10": "X", - "NC_000024.9": "Y", - "NC_012920.1": "M", - - # GRC GRCh37 alts - 'NW_004070864.2': 'HG1472_PATCH', - 'NW_003571030.1': 'HG989_PATCH', - 'NW_003871056.3': 'HG1292_PATCH', - 'NW_003871055.3': 'HG1287_PATCH', - 'NW_003315905.1': 'HSCHR1_1_CTG31', - 'NW_003315906.1': 'HSCHR1_2_CTG31', - 'NW_003315907.1': 'HSCHR1_3_CTG31', - 'NW_004070863.1': 'HG1471_PATCH', - 'NW_003871057.1': 'HG1293_PATCH', - 'NW_004070865.1': 'HG1473_PATCH', - 'NW_003315903.1': 'HG999_1_PATCH', - 'NW_003315904.1': 'HG999_2_PATCH', - 'NW_003315908.1': 'HSCHR2_1_CTG1', - 'NW_004504299.1': 'HG953_PATCH', - 'NW_003571032.1': 'HG686_PATCH', - 'NW_003571033.2': 'HSCHR2_2_CTG12', - 'NW_003315909.1': 'HSCHR2_1_CTG12', - 'NW_003571031.1': 'HG1007_PATCH', - 'NW_003871060.1': 'HSCHR3_1_CTG1', - 'NW_003871059.1': 'HG325_PATCH', - 'NW_003315910.1': 'HG186_PATCH', - 'NW_004775426.1': 'HG957_PATCH', - 'NW_003315911.1': 'HG280_PATCH', - 'NW_003871058.1': 'HG1091_PATCH', - 'NW_003315912.1': 'HG991_PATCH', - 'NW_003315913.1': 'HSCHR3_1_CTG2_1', - 'NW_004775427.1': 'HG174_HG254_PATCH', - 'NW_003315915.1': 'HSCHR4_1_CTG6', - 'NW_003315916.1': 'HSCHR4_2_CTG9', - 'NW_003571035.1': 'HG706_PATCH', - 'NW_003315914.1': 'HSCHR4_1_CTG12', - 'NW_003571034.1': 'HG1032_PATCH', - 'NW_003315920.1': 'HSCHR5_2_CTG1', - 'NW_003571036.1': 'HSCHR5_3_CTG1', - 'NW_003315917.2': 'HSCHR5_1_CTG1', - 'NW_003315918.1': 'HSCHR5_1_CTG2', - 'NW_003871061.1': 'HG1063_PATCH', - 'NW_004775428.1': 'HG1082_HG167_PATCH', - 'NW_003315919.1': 'HSCHR5_1_CTG5', - 'NW_004070866.1': 'HG27_PATCH', - 'NW_003871063.1': 'HG1322_PATCH', - 'NW_003315921.1': 'HSCHR6_1_CTG5', - 'NW_004504300.1': 'HG357_PATCH', - 'NW_003871062.1': 'HG1304_PATCH', - 'NW_004775429.1': 'HG193_PATCH', - 'NW_004166862.1': 'HSCHR6_2_CTG5', - 'NW_003571039.1': 'HG736_PATCH', - 'NW_003571038.1': 'HG14_PATCH', - 'NW_004775430.1': 'HG444_PATCH', - 'NW_003871064.1': 'HG1257_PATCH', - 'NW_003571041.1': 'HG946_PATCH', - 'NW_003571037.1': 'HG115_PATCH', - 'NW_003871065.1': 'HG1308_PATCH', - 'NW_003315922.2': 'HSCHR7_1_CTG6', - 'NW_003571040.1': 'HG7_PATCH', - 'NW_003571042.1': 'HG19_PATCH', - 'NW_004775431.1': 'HG1699_PATCH', - 'NW_003871066.2': 'HG418_PATCH', - 'NW_003315923.1': 'HG104_HG975_PATCH', - 'NW_003315924.1': 'HG243_PATCH', - 'NW_003315928.1': 'HSCHR9_1_CTG1', - 'NW_003871067.1': 'HG962_PATCH', - 'NW_003315929.1': 'HSCHR9_1_CTG35', - 'NW_003315930.1': 'HSCHR9_2_CTG35', - 'NW_003315931.1': 'HSCHR9_3_CTG35', - 'NW_004504301.1': 'HG50_PATCH', - 'NW_004070869.1': 'HG1502_PATCH', - 'NW_003315925.1': 'HG79_PATCH', - 'NW_004070867.1': 'HG1500_PATCH', - 'NW_004070868.1': 'HG1501_PATCH', - 'NW_003315926.1': 'HG998_1_PATCH', - 'NW_003315927.1': 'HG998_2_PATCH', - 'NW_003571043.1': 'HG905_PATCH', - 'NW_003871071.1': 'HG871_PATCH', - 'NW_003315932.1': 'HG544_PATCH', - 'NW_003315934.1': 'HSCHR10_1_CTG2', - 'NW_003315935.1': 'HSCHR10_1_CTG5', - 'NW_003871068.1': 'HG1211_PATCH', - 'NW_004504302.1': 'HG1074_PATCH', - 'NW_003871070.1': 'HG339_PATCH', - 'NW_004775432.1': 'HG979_PATCH', - 'NW_003871069.1': 'HG311_PATCH', - 'NW_003315933.1': 'HG995_PATCH', - 'NW_004070870.1': 'HG1479_PATCH', - 'NW_003871075.1': 'HG256_PATCH', - 'NW_003871082.1': 'HG873_PATCH', - 'NW_003315936.1': 'HSCHR11_1_CTG1_1', - 'NW_003571045.1': 'HG281_PATCH', - 'NW_003871073.1': 'HG142_HG150_NOVEL_TEST', - 'NW_003871074.1': 'HG151_NOVEL_TEST', - 'NW_003571046.1': 'HG536_PATCH', - 'NW_004070871.1': 'HG865_PATCH', - 'NW_003871081.1': 'HG414_PATCH', - 'NW_003871079.1': 'HG348_PATCH', - 'NW_003871077.1': 'HG305_PATCH', - 'NW_003871080.1': 'HG388_HG400_PATCH', - 'NW_003871078.1': 'HG306_PATCH', - 'NW_003871072.2': 'HG122_PATCH', - 'NW_003871076.1': 'HG299_PATCH', - 'NW_003571048.1': 'HG858_PATCH', - 'NW_003571049.1': 'HSCHR12_1_CTG1', - 'NW_003871083.2': 'HG344_PATCH', - 'NW_003571047.1': 'HG1133_PATCH', - 'NW_003571050.1': 'HSCHR12_2_CTG2', - 'NW_003315938.1': 'HSCHR12_1_CTG2', - 'NW_003315939.1': 'HSCHR12_1_CTG2_1', - 'NW_003315941.1': 'HSCHR12_2_CTG2_1', - 'NW_003315942.2': 'HSCHR12_3_CTG2_1', - 'NW_004504303.2': 'HG1595_PATCH', - 'NW_003315940.1': 'HSCHR12_1_CTG5', - 'NW_003315937.1': 'HG996_PATCH', - 'NW_003571051.1': 'HG531_PATCH', - 'NW_004166863.1': 'HG1592_PATCH', - 'NW_003315943.1': 'HSCHR15_1_CTG4', - 'NW_003315944.1': 'HSCHR15_1_CTG8', - 'NW_003871084.1': 'HG971_PATCH', - 'NW_003315945.1': 'HSCHR16_1_CTG3_1', - 'NW_003871085.1': 'HG1208_PATCH', - 'NW_003315946.1': 'HSCHR16_2_CTG3_1', - 'NW_004070872.2': 'HG417_PATCH', - 'NW_003315952.2': 'HSCHR17_1_CTG1', - 'NW_003315951.1': 'HG990_PATCH', - 'NW_003315950.2': 'HG987_PATCH', - 'NW_004775433.1': 'HG1591_PATCH', - 'NW_003871090.1': 'HG883_PATCH', - 'NW_004166864.2': 'HG385_PATCH', - 'NW_003315949.1': 'HG75_PATCH', - 'NW_003315948.2': 'HG745_PATCH', - 'NW_003871091.1': 'HSCHR17_4_CTG4', - 'NW_003871093.1': 'HSCHR17_6_CTG4', - 'NW_003871092.1': 'HSCHR17_5_CTG4', - 'NW_003315953.1': 'HSCHR17_1_CTG4', - 'NW_003571052.1': 'HG185_PATCH', - 'NW_003871086.1': 'HG1146_PATCH', - 'NW_003315947.1': 'HG183_PATCH', - 'NW_003871088.1': 'HG747_PATCH', - 'NW_003315954.1': 'HSCHR17_2_CTG4', - 'NW_003315955.1': 'HSCHR17_3_CTG4', - 'NW_003871089.1': 'HG748_PATCH', - 'NW_003871087.1': 'HG271_PATCH', - 'NW_003315956.1': 'HSCHR18_1_CTG1_1', - 'NW_003315959.1': 'HSCHR18_2_CTG1_1', - 'NW_003315960.1': 'HSCHR18_2_CTG2', - 'NW_003315957.1': 'HSCHR18_1_CTG2', - 'NW_003315958.1': 'HSCHR18_1_CTG2_1', - 'NW_003315961.1': 'HSCHR18_2_CTG2_1', - 'NW_003871094.1': 'HG729_PATCH', - 'NW_003571053.2': 'HG730_PATCH', - 'NW_003315962.1': 'HSCHR19_1_CTG3', - 'NW_003315964.2': 'HSCHR19_2_CTG3', - 'NW_003315965.1': 'HSCHR19_3_CTG3', - 'NW_003315963.1': 'HSCHR19_1_CTG3_1', - 'NW_004775434.1': 'HG1350_HG959_PATCH', - 'NW_004166865.1': 'HG1079_PATCH', - 'NW_003571054.1': 'HSCHR19LRC_COX1_CTG1', - 'NW_003571055.1': 'HSCHR19LRC_COX2_CTG1', - 'NW_003571056.1': 'HSCHR19LRC_LRC_I_CTG1', - 'NW_003571057.1': 'HSCHR19LRC_LRC_J_CTG1', - 'NW_003571058.1': 'HSCHR19LRC_LRC_S_CTG1', - 'NW_003571059.1': 'HSCHR19LRC_LRC_T_CTG1', - 'NW_003571060.1': 'HSCHR19LRC_PGF1_CTG1', - 'NW_003571061.1': 'HSCHR19LRC_PGF2_CTG1', - 'NW_003315966.1': 'HSCHR20_1_CTG1', - 'NW_003871095.1': 'HG144_PATCH', - 'NW_004504304.1': 'HG944_PATCH', - 'NW_003571063.2': 'HG506_HG507_HG1000_PATCH', - 'NW_003315967.1': 'HSCHR21_1_CTG1_1', - 'NW_003315968.1': 'HSCHR21_2_CTG1_1', - 'NW_003315969.1': 'HSCHR21_3_CTG1_1', - 'NW_003315970.1': 'HSCHR21_4_CTG1_1', - 'NW_004775435.1': 'HG237_PATCH', - 'NW_004070874.1': 'HG1487_PATCH', - 'NW_004070873.1': 'HG1486_PATCH', - 'NW_004070875.1': 'HG1488_PATCH', - 'NW_003871096.1': 'HG329_PATCH', - 'NW_003315972.1': 'HSCHR22_1_CTG2', - 'NW_003315971.2': 'HSCHR22_1_CTG1', - 'NW_004504305.1': 'HSCHR22_2_CTG1', - 'NW_004070876.1': 'HG497_PATCH', - 'NW_003571064.2': 'HG480_HG481_PATCH', - 'NW_003871098.1': 'HG1423_PATCH', - 'NW_003871099.1': 'HG1424_PATCH', - 'NW_004070879.1': 'HG1435_PATCH', - 'NW_004166866.1': 'HG29_PATCH', - 'NW_004070880.2': 'HG1436_HG1432_PATCH', - 'NW_004070877.1': 'HG1433_PATCH', - 'NW_004070881.1': 'HG1437_PATCH', - 'NW_004070882.1': 'HG1438_PATCH', - 'NW_003871100.1': 'HG1425_PATCH', - 'NW_003871101.3': 'HG1426_PATCH', - 'NW_004070883.1': 'HG1439_PATCH', - 'NW_004070884.1': 'HG1440_PATCH', - 'NW_004070885.1': 'HG1441_PATCH', - 'NW_003871102.1': 'HG375_PATCH', - 'NW_004070878.1': 'HG1434_PATCH', - 'NW_004070891.1': 'HG1462_PATCH', - 'NW_004070892.1': 'HG1463_PATCH', - 'NW_004070893.1': 'HG1490_PATCH', - 'NW_004070886.1': 'HG1442_PATCH', - 'NW_004070887.1': 'HG1443_HG1444_PATCH', - 'NW_004070888.1': 'HG1453_PATCH', - 'NW_004070889.1': 'HG1458_PATCH', - 'NW_004070890.2': 'HG1459_PATCH', - 'NW_003871103.3': 'HG1497_PATCH', - 'NT_167244.1': 'HSCHR6_MHC_APD_CTG1', - 'NT_113891.2': 'HSCHR6_MHC_COX_CTG1', - 'NT_167245.1': 'HSCHR6_MHC_DBB_CTG1', - 'NT_167246.1': 'HSCHR6_MHC_MANN_CTG1', - 'NT_167247.1': 'HSCHR6_MHC_MCF_CTG1', - 'NT_167248.1': 'HSCHR6_MHC_QBL_CTG1', - 'NT_167249.1': 'HSCHR6_MHC_SSTO_CTG1', - 'NT_167250.1': 'HSCHR4_1_CTG9', - 'NT_167251.1': 'HSCHR17_1_CTG5' - } - - chr_num_convert_38 = { - "NC_000001.11": "1", - "NC_000002.12": "2", - "NC_000003.12": "3", - "NC_000004.12": "4", - "NC_000005.10": "5", - "NC_000006.12": "6", - "NC_000007.14": "7", - "NC_000008.11": "8", - "NC_000009.12": "9", - "NC_000010.11": "10", - "NC_000011.10": "11", - "NC_000012.12": "12", - "NC_000013.11": "13", - "NC_000014.9": "14", - "NC_000015.10": "15", - "NC_000016.10": "16", - "NC_000017.11": "17", - "NC_000018.10": "18", - "NC_000019.10": "19", - "NC_000020.11": "20", - "NC_000021.9": "21", - "NC_000022.11": "22", - "NC_000023.11": "X", - "NC_000024.10": "Y", - "NC_012920.1": "M", - - # GRCh38 alts - 'NW_012132914.1': 'HG1342_HG2282_PATCH', - 'NW_015495298.1': 'HSCHR1_5_CTG3', - 'NW_011332688.1': 'HG2095_PATCH', - 'NW_014040926.1': 'HSCHR1_4_CTG3', - 'NW_009646195.1': 'HG2058_PATCH', - 'NW_018654706.1': 'HSCHR1_8_CTG3', - 'NW_019805487.1': 'HG460_PATCH', - 'NW_009646194.1': 'HG986_PATCH', - 'NW_018654707.1': 'HSCHR1_9_CTG3', - 'NW_014040925.1': 'HSCHR1_3_CTG3', - 'NW_017852928.1': 'HSCHR1_6_CTG3', - 'NW_009646196.1': 'HG2104_PATCH', - 'NW_011332687.1': 'HG1832_PATCH', - 'NW_018654708.1': 'HG2002_PATCH', - 'NW_014040927.1': 'HSCHR1_5_CTG32_1', - 'NW_012132915.1': 'HG2290_PATCH', - 'NW_018654709.1': 'HSCHR2_7_CTG7_2', - 'NW_015495299.1': 'HSCHR2_6_CTG7_2', - 'NW_018654710.1': 'HSCHR2_8_CTG7_2', - 'NW_011332690.1': 'HG2232_PATCH', - 'NW_011332689.1': 'HG2233_PATCH', - 'NW_017363813.1': 'HG2236_PATCH', - 'NW_009646197.1': 'HG2066_PATCH', - 'NW_012132916.1': 'HG2235_PATCH', - 'NW_011332691.1': 'HG126_PATCH', - 'NW_018654711.1': 'HSCHR3_4_CTG1', - 'NW_012132917.1': 'HG2237_PATCH', - 'NW_009646198.1': 'HG2022_PATCH', - 'NW_019805491.1': 'HG2133_PATCH', - 'NW_019805492.1': 'HSCHR3_6_CTG2_1', - 'NW_019805490.1': 'HSCHR3_9_CTG2_1', - 'NW_019805489.1': 'HSCHR3_8_CTG2_1', - 'NW_019805488.1': 'HSCHR3_7_CTG2_1', - 'NW_013171799.1': 'HSCHR4_2_CTG4', - 'NW_013171800.1': 'HSCHR4_8_CTG12', - 'NW_013171801.1': 'HSCHR4_9_CTG12', - 'NW_017363814.1': 'HSCHR4_12_CTG12', - 'NW_015495300.1': 'HG2023_PATCH', - 'NW_015495301.1': 'HSCHR4_11_CTG12', - 'NW_018654712.1': 'HSCHR5_9_CTG1', - 'NW_009646199.1': 'HSCHR5_7_CTG1', - 'NW_016107297.1': 'HSCHR5_8_CTG1', - 'NW_016107298.1': 'HG30_PATCH', - 'NW_018654713.1': 'HG2057_PATCH', - 'NW_013171803.1': 'HSCHR6_1_CTG10', - 'NW_012132918.1': 'HG1651_PATCH', - 'NW_009646200.1': 'HG2128_PATCH', - 'NW_013171802.1': 'HG2072_PATCH', - 'NW_017363815.1': 'HG2121_PATCH', - 'NW_019805493.1': 'HSCHR7_3_CTG1', - 'NW_017852929.1': 'HG2088_PATCH', - 'NW_017852930.1': 'HG2266_PATCH', - 'NW_018654714.1': 'HG708_PATCH', - 'NW_018654715.1': 'HSCHR7_3_CTG4_4', - 'NW_012132919.1': 'HG2239_PATCH', - 'NW_018654717.1': 'HG76_PATCH', - 'NW_017852932.1': 'HG2068_PATCH', - 'NW_017852931.1': 'HG2067_PATCH', - 'NW_019805494.1': 'HSCHR8_7_CTG7', - 'NW_018654716.1': 'HG2419_PATCH', - 'NW_013171804.1': 'HSCHR9_1_CTG6', - 'NW_013171805.1': 'HSCHR9_1_CTG7', - 'NW_009646201.1': 'HG2030_PATCH', - 'NW_011332694.1': 'HG2244_HG2245_PATCH', - 'NW_013171806.1': 'HSCHR10_1_CTG6', - 'NW_009646202.1': 'HG2191_PATCH', - 'NW_013171807.1': 'HG2334_PATCH', - 'NW_011332693.1': 'HG2242_HG2243_PATCH', - 'NW_011332692.1': 'HG2241_PATCH', - 'NW_015148966.1': 'HG107_PATCH', - 'NW_011332695.1': 'HSCHR11_1_CTG1_2', - 'NW_019805496.1': 'HG2114_PATCH', - 'NW_019805495.1': 'HG2060_PATCH', - 'NW_017363816.1': 'HG1708_PATCH', - 'NW_019805498.1': 'HSCHR11_1_CTG3_1', - 'NW_019805497.1': 'HSCHR11_2_CTG8', - 'NW_013171808.1': 'HG2116_PATCH', - 'NW_009646203.1': 'HG2217_PATCH', - 'NW_013171809.1': 'HSCHR12_2_CTG1', - 'NW_018654718.1': 'HG1815_PATCH', - 'NW_011332696.1': 'HG1362_PATCH', - 'NW_009646204.1': 'HG23_PATCH', - 'NW_018654720.1': 'HSCHR12_8_CTG2_1', - 'NW_015148967.1': 'HG2063_PATCH', - 'NW_018654719.1': 'HG2047_PATCH', - 'NW_011332697.1': 'HG2247_PATCH', - 'NW_019805499.1': 'HSCHR12_9_CTG2_1', - 'NW_011332699.1': 'HG2291_PATCH', - 'NW_013171810.1': 'HSCHR13_1_CTG7', - 'NW_009646205.1': 'HG2216_PATCH', - 'NW_011332700.1': 'HG2249_PATCH', - 'NW_013171811.1': 'HSCHR13_1_CTG8', - 'NW_011332698.1': 'HG2288_HG2289_PATCH', - 'NW_018654722.1': 'HG1_PATCH', - 'NW_018654721.1': 'HSCHR14_8_CTG1', - 'NW_011332701.1': 'HG2139_PATCH', - 'NW_012132920.1': 'HSCHR15_6_CTG8', - 'NW_013171812.1': 'HSCHR16_5_CTG1', - 'NW_019805500.1': 'HG2263_PATCH', - 'NW_017852933.1': 'HG926_PATCH', - 'NW_013171813.1': 'HSCHR16_4_CTG3_1', - 'NW_018654723.1': 'HSCHR16_5_CTG3_1', - 'NW_012132921.1': 'HSCHR16_3_CTG3_1', - 'NW_017363817.1': 'HG2285_HG106_HG2252_PATCH', - 'NW_016107299.1': 'HG2046_PATCH', - 'NW_017363819.1': 'HSCHR17_3_CTG1', - 'NW_017363818.1': 'HSCHR17_11_CTG4', - 'NW_019805501.1': 'HSCHR17_12_CTG4', - 'NW_019805503.1': 'HSCHR18_1_CTG1', - 'NW_014040928.1': 'HSCHR18_5_CTG1_1', - 'NW_019805502.1': 'HG2412_PATCH', - 'NW_013171814.1': 'HG2213_PATCH', - 'NW_018654724.1': 'HG2442_PATCH', - 'NW_014040929.1': 'HG26_PATCH', - 'NW_009646206.1': 'HG2021_PATCH', - 'NW_016107300.1': 'HSCHR19KIR_0019-4656-A_CTG3_1', - 'NW_016107301.1': 'HSCHR19KIR_CA01-TA01_1_CTG3_1', - 'NW_016107302.1': 'HSCHR19KIR_CA01-TA01_2_CTG3_1', - 'NW_016107303.1': 'HSCHR19KIR_CA01-TB04_CTG3_1', - 'NW_016107304.1': 'HSCHR19KIR_CA01-TB01_CTG3_1', - 'NW_016107305.1': 'HSCHR19KIR_HG2394_CTG3_1', - 'NW_016107306.1': 'HSCHR19KIR_502960008-2_CTG3_1', - 'NW_016107307.1': 'HSCHR19KIR_502960008-1_CTG3_1', - 'NW_016107308.1': 'HSCHR19KIR_0010-5217-AB_CTG3_1', - 'NW_016107309.1': 'HSCHR19KIR_7191059-1_CTG3_1', - 'NW_016107310.1': 'HSCHR19KIR_0019-4656-B_CTG3_1', - 'NW_016107311.1': 'HSCHR19KIR_CA04_CTG3_1', - 'NW_016107313.1': 'HSCHR19KIR_7191059-2_CTG3_1', - 'NW_016107314.1': 'HSCHR19KIR_HG2396_CTG3_1', - 'NW_016107312.1': 'HSCHR19KIR_HG2393_CTG3_1', - 'NW_009646207.1': 'HSCHR22_4_CTG1', - 'NW_014040930.1': 'HSCHR22_6_CTG1', - 'NW_014040931.1': 'HSCHR22_7_CTG1', - 'NW_009646208.1': 'HSCHR22_5_CTG1', - 'NW_015148968.1': 'HSCHR22_8_CTG1', - 'NW_015148969.1': 'HG1311_PATCH', - 'NW_017363820.1': 'HSCHRX_3_CTG7', - 'NW_018654725.1': 'HG1531_PATCH', - 'NW_018654726.1': 'HG1535_PATCH', - 'NW_009646209.1': 'HG2062_PATCH', - 'NT_187515.1': 'HSCHR1_1_CTG3', - 'NT_187517.1': 'HSCHR1_2_CTG3', - 'NT_187514.1': 'HSCHR1_1_CTG11', - 'NT_187520.1': 'HSCHR1_4_CTG31', - 'NW_003315905.1': 'HSCHR1_1_CTG31', - 'NW_003315906.1': 'HSCHR1_2_CTG31', - 'NW_003315907.2': 'HSCHR1_3_CTG31', - 'NT_187521.1': 'HSCHR1_4_CTG32_1', - 'NT_187519.1': 'HSCHR1_3_CTG32_1', - 'NT_187516.1': 'HSCHR1_1_CTG32_1', - 'NT_187518.1': 'HSCHR1_2_CTG32_1', - 'NT_187525.1': 'HSCHR2_2_CTG1', - 'NT_187526.1': 'HSCHR2_3_CTG1', - 'NT_187529.1': 'HSCHR2_4_CTG1', - 'NT_187522.1': 'HSCHR2_1_CTG1', - 'NW_003315908.1': 'HSCHR2_1_CTG5', - 'NT_187524.1': 'HSCHR2_1_CTG7', - 'NT_187531.1': 'HSCHR2_5_CTG7_2', - 'NT_187530.1': 'HSCHR2_4_CTG7_2', - 'NT_187528.1': 'HSCHR2_3_CTG7_2', - 'NW_003571033.2': 'HSCHR2_2_CTG7_2', - 'NW_003315909.1': 'HSCHR2_1_CTG7_2', - 'NT_187527.1': 'HSCHR2_3_CTG15', - 'NT_187523.1': 'HSCHR2_1_CTG15', - 'NW_003871060.2': 'HSCHR3_1_CTG1', - 'NT_187535.1': 'HSCHR3_3_CTG1', - 'NT_187537.1': 'HSCHR3_4_CTG2_1', - 'NW_003315913.1': 'HSCHR3_1_CTG2_1', - 'NT_187533.1': 'HSCHR3_2_CTG2_1', - 'NT_187536.1': 'HSCHR3_3_CTG2_1', - 'NT_187538.1': 'HSCHR3_5_CTG2_1', - 'NT_187532.1': 'HSCHR3_1_CTG3', - 'NT_187534.1': 'HSCHR3_2_CTG3', - 'NT_187539.1': 'HSCHR3_9_CTG3', - 'NT_187540.1': 'HSCHR4_1_CTG4', - 'NW_003315915.1': 'HSCHR4_1_CTG6', - 'NT_187541.1': 'HSCHR4_1_CTG8_1', - 'NT_167250.2': 'HSCHR4_1_CTG9', - 'NT_187544.1': 'HSCHR4_4_CTG12', - 'NW_003315914.1': 'HSCHR4_1_CTG12', - 'NT_187542.1': 'HSCHR4_2_CTG12', - 'NT_187545.1': 'HSCHR4_5_CTG12', - 'NT_187543.1': 'HSCHR4_3_CTG12', - 'NT_187550.1': 'HSCHR5_5_CTG1', - 'NT_187548.1': 'HSCHR5_4_CTG1', - 'NT_187547.1': 'HSCHR5_3_CTG1', - 'NW_003315920.1': 'HSCHR5_1_CTG1', - 'NW_003571036.1': 'HSCHR5_2_CTG1', - 'NT_187551.1': 'HSCHR5_6_CTG1', - 'NW_003315917.2': 'HSCHR5_2_CTG1_1', - 'NW_003315918.1': 'HSCHR5_3_CTG1_1', - 'NT_187549.1': 'HSCHR5_4_CTG1_1', - 'NW_003315919.1': 'HSCHR5_1_CTG5', - 'NT_187546.1': 'HSCHR5_2_CTG5', - 'NT_167244.2': 'HSCHR6_MHC_APD_CTG1', - 'NT_187555.1': 'HSCHR6_1_CTG7', - 'NT_187554.1': 'HSCHR6_1_CTG6', - 'NW_003315921.1': 'HSCHR6_1_CTG2', - 'NT_187556.1': 'HSCHR6_1_CTG8', - 'NT_187557.1': 'HSCHR6_1_CTG9', - 'NW_004166862.2': 'HSCHR6_1_CTG3', - 'NT_187552.1': 'HSCHR6_1_CTG4', - 'NT_187553.1': 'HSCHR6_1_CTG5', - 'NT_187558.1': 'HSCHR7_1_CTG1', - 'NT_187561.1': 'HSCHR7_2_CTG4_4', - 'NT_187559.1': 'HSCHR7_1_CTG4_4', - 'NW_003315922.2': 'HSCHR7_1_CTG6', - 'NT_187562.1': 'HSCHR7_2_CTG6', - 'NT_187564.1': 'HSCHR7_3_CTG6', - 'NT_187563.1': 'HSCHR7_2_CTG7', - 'NT_187560.1': 'HSCHR7_1_CTG7', - 'NT_187572.1': 'HSCHR8_4_CTG1', - 'NT_187568.1': 'HSCHR8_2_CTG1', - 'NT_187565.1': 'HSCHR8_1_CTG1', - 'NT_187576.1': 'HSCHR8_8_CTG1', - 'NT_187570.1': 'HSCHR8_3_CTG1', - 'NT_187577.1': 'HSCHR8_9_CTG1', - 'NT_187566.1': 'HSCHR8_1_CTG6', - 'NT_187567.1': 'HSCHR8_1_CTG7', - 'NT_187574.1': 'HSCHR8_5_CTG7', - 'NT_187575.1': 'HSCHR8_6_CTG7', - 'NT_187573.1': 'HSCHR8_4_CTG7', - 'NT_187571.1': 'HSCHR8_3_CTG7', - 'NT_187569.1': 'HSCHR8_2_CTG7', - 'NW_003315928.1': 'HSCHR9_1_CTG1', - 'NW_003315929.1': 'HSCHR9_1_CTG2', - 'NW_003315930.1': 'HSCHR9_1_CTG3', - 'NW_003315931.1': 'HSCHR9_1_CTG4', - 'NT_187578.1': 'HSCHR9_1_CTG5', - 'NW_003315934.1': 'HSCHR10_1_CTG1', - 'NT_187579.1': 'HSCHR10_1_CTG3', - 'NW_003315935.1': 'HSCHR10_1_CTG2', - 'NT_187580.1': 'HSCHR10_1_CTG4', - 'NT_187586.1': 'HSCHR11_1_CTG8', - 'NT_187584.1': 'HSCHR11_1_CTG6', - 'NT_187585.1': 'HSCHR11_1_CTG7', - 'NT_187583.1': 'HSCHR11_1_CTG5', - 'NW_003315936.1': 'HSCHR11_1_CTG1_1', - 'NW_003871073.1': 'HG142_HG150_NOVEL_TEST', - 'NW_003871074.1': 'HG151_NOVEL_TEST', - 'NT_187582.1': 'HSCHR11_1_CTG3', - 'NT_187581.1': 'HSCHR11_1_CTG2', - 'NW_003571049.1': 'HSCHR12_1_CTG1', - 'NW_003571050.1': 'HSCHR12_2_CTG2', - 'NT_187588.1': 'HSCHR12_5_CTG2', - 'NW_003315938.1': 'HSCHR12_1_CTG2', - 'NT_187587.1': 'HSCHR12_4_CTG2', - 'NW_003315939.2': 'HSCHR12_1_CTG2_1', - 'NW_003315941.1': 'HSCHR12_2_CTG2_1', - 'NW_003315942.2': 'HSCHR12_3_CTG2_1', - 'NT_187590.1': 'HSCHR12_6_CTG2_1', - 'NW_003315940.1': 'HSCHR12_4_CTG2_1', - 'NT_187589.1': 'HSCHR12_5_CTG2_1', - 'NT_187591.1': 'HSCHR12_7_CTG2_1', - 'NT_187594.1': 'HSCHR13_1_CTG3', - 'NT_187593.1': 'HSCHR13_1_CTG2', - 'NT_187597.1': 'HSCHR13_1_CTG6', - 'NT_187595.1': 'HSCHR13_1_CTG4', - 'NT_187592.1': 'HSCHR13_1_CTG1', - 'NT_187596.1': 'HSCHR13_1_CTG5', - 'NT_187598.1': 'HSCHR14_1_CTG1', - 'NT_187601.1': 'HSCHR14_7_CTG1', - 'NT_187599.1': 'HSCHR14_2_CTG1', - 'NT_187600.1': 'HSCHR14_3_CTG1', - 'NT_187602.1': 'HSCHR15_1_CTG1', - 'NT_187604.1': 'HSCHR15_3_CTG3', - 'NT_187603.1': 'HSCHR15_1_CTG3', - 'NW_003315943.1': 'HSCHR15_1_CTG8', - 'NT_187605.1': 'HSCHR15_3_CTG8', - 'NW_003315944.2': 'HSCHR15_2_CTG8', - 'NT_187606.1': 'HSCHR15_5_CTG8', - 'NT_187610.1': 'HSCHR16_CTG2', - 'NT_187609.1': 'HSCHR16_4_CTG1', - 'NT_187608.1': 'HSCHR16_3_CTG1', - 'NT_187607.1': 'HSCHR16_1_CTG1', - 'NW_003315945.1': 'HSCHR16_1_CTG3_1', - 'NW_003315946.1': 'HSCHR16_2_CTG3_1', - 'NW_003315952.3': 'HSCHR17_1_CTG1', - 'NT_187613.1': 'HSCHR17_2_CTG2', - 'NT_187611.1': 'HSCHR17_1_CTG2', - 'NT_187614.1': 'HSCHR17_7_CTG4', - 'NW_003871091.1': 'HSCHR17_4_CTG4', - 'NW_003871092.1': 'HSCHR17_5_CTG4', - 'NW_003315953.2': 'HSCHR17_1_CTG4', - 'NT_167251.2': 'HSCHR17_1_CTG5', - 'NW_003315954.1': 'HSCHR17_2_CTG4', - 'NT_187615.1': 'HSCHR17_8_CTG4', - 'NT_187616.1': 'HSCHR17_9_CTG4', - 'NW_003315955.1': 'HSCHR17_3_CTG4', - 'NT_187612.1': 'HSCHR17_1_CTG9', - 'NT_187618.1': 'HSCHR18_4_CTG1_1', - 'NW_003315956.1': 'HSCHR18_1_CTG1_1', - 'NW_003315959.1': 'HSCHR18_2_CTG1_1', - 'NW_003315960.1': 'HSCHR18_2_CTG2', - 'NW_003315957.1': 'HSCHR18_1_CTG2', - 'NW_003315958.1': 'HSCHR18_1_CTG2_1', - 'NW_003315961.1': 'HSCHR18_2_CTG2_1', - 'NT_187617.1': 'HSCHR18_3_CTG2_1', - 'NT_187622.1': 'HSCHR19_5_CTG2', - 'NT_187621.1': 'HSCHR19_4_CTG2', - 'NW_003315962.1': 'HSCHR19_1_CTG2', - 'NW_003315964.2': 'HSCHR19_2_CTG2', - 'NW_003315965.1': 'HSCHR19_3_CTG2', - 'NW_003315963.1': 'HSCHR19_1_CTG3_1', - 'NT_187619.1': 'HSCHR19_2_CTG3_1', - 'NT_187620.1': 'HSCHR19_3_CTG3_1', - 'NW_003571054.1': 'HSCHR19LRC_COX1_CTG3_1', - 'NW_003315966.2': 'HSCHR20_1_CTG1', - 'NT_187623.1': 'HSCHR20_1_CTG2', - 'NT_187625.1': 'HSCHR20_1_CTG4', - 'NT_187624.1': 'HSCHR20_1_CTG3', - 'NW_003315967.2': 'HSCHR21_1_CTG1_1', - 'NT_187628.1': 'HSCHR21_8_CTG1_1', - 'NT_187627.1': 'HSCHR21_6_CTG1_1', - 'NW_003315968.2': 'HSCHR21_2_CTG1_1', - 'NW_003315969.2': 'HSCHR21_3_CTG1_1', - 'NW_003315970.2': 'HSCHR21_4_CTG1_1', - 'NT_187626.1': 'HSCHR21_5_CTG2', - 'NT_187629.1': 'HSCHR22_1_CTG3', - 'NT_187632.1': 'HSCHR22_1_CTG6', - 'NT_187633.1': 'HSCHR22_1_CTG7', - 'NT_187630.1': 'HSCHR22_1_CTG4', - 'NT_187631.1': 'HSCHR22_1_CTG5', - 'NW_003315972.2': 'HSCHR22_1_CTG2', - 'NW_003315971.2': 'HSCHR22_1_CTG1', - 'NT_187634.1': 'HSCHRX_1_CTG3', - 'NT_187635.1': 'HSCHRX_2_CTG12', - 'NT_187646.1': 'HSCHR1_ALT2_1_CTG32_1', - 'NT_187648.1': 'HSCHR2_2_CTG7', - 'NT_187647.1': 'HSCHR2_2_CTG15', - 'NT_187649.1': 'HSCHR3_3_CTG3', - 'NT_187650.1': 'HSCHR4_6_CTG12', - 'NT_187651.1': 'HSCHR5_1_CTG1_1', - 'NT_187652.1': 'HSCHR5_3_CTG5', - 'NT_113891.3': 'HSCHR6_MHC_COX_CTG1', - 'NT_187653.1': 'HSCHR7_2_CTG1', - 'NT_187655.1': 'HSCHR8_6_CTG1', - 'NT_187654.1': 'HSCHR8_5_CTG1', - 'NT_187656.1': 'HSCHR11_2_CTG1', - 'NT_187657.1': 'HSCHR11_2_CTG1_1', - 'NT_187658.1': 'HSCHR12_3_CTG2', - 'NT_187659.1': 'HSCHR15_2_CTG3', - 'NT_187660.1': 'HSCHR15_4_CTG8', - 'NT_187662.1': 'HSCHR17_2_CTG1', - 'NT_187664.1': 'HSCHR17_3_CTG2', - 'NT_187661.1': 'HSCHR17_10_CTG4', - 'NW_003871093.1': 'HSCHR17_6_CTG4', - 'NT_187663.1': 'HSCHR17_2_CTG5', - 'NT_187665.1': 'HSCHR18_ALT21_CTG2_1', - 'NT_187666.1': 'HSCHR18_ALT2_CTG2_1', - 'NW_003571055.2': 'HSCHR19LRC_COX2_CTG3_1', - 'NW_004504305.1': 'HSCHR22_2_CTG1', - 'NT_187667.1': 'HSCHRX_2_CTG3', - 'NT_187678.1': 'HSCHR3_4_CTG3', - 'NT_187679.1': 'HSCHR4_7_CTG12', - 'NT_167245.2': 'HSCHR6_MHC_DBB_CTG1', - 'NT_187680.1': 'HSCHR8_7_CTG1', - 'NT_187681.1': 'HSCHR11_3_CTG1', - 'NW_003571056.2': 'HSCHR19LRC_LRC_I_CTG3_1', - 'NT_187682.1': 'HSCHR22_3_CTG1', - 'NT_187688.1': 'HSCHR3_5_CTG3', - 'NT_167246.2': 'HSCHR6_MHC_MANN_CTG1', - 'NW_003571057.2': 'HSCHR19LRC_LRC_J_CTG3_1', - 'NT_187689.1': 'HSCHR3_6_CTG3', - 'NT_167247.2': 'HSCHR6_MHC_MCF_CTG1', - 'NW_003571058.2': 'HSCHR19LRC_LRC_S_CTG3_1', - 'NT_187690.1': 'HSCHR3_7_CTG3', - 'NT_167248.2': 'HSCHR6_MHC_QBL_CTG1', - 'NW_003571059.2': 'HSCHR19LRC_LRC_T_CTG3_1', - 'NT_187691.1': 'HSCHR3_8_CTG3', - 'NT_167249.2': 'HSCHR6_MHC_SSTO_CTG1', - 'NW_003571060.1': 'HSCHR19LRC_PGF1_CTG3_1', - 'NT_187692.1': 'HSCHR6_8_CTG1', - 'NW_003571061.2': 'HSCHR19LRC_PGF2_CTG3_1', - 'NT_187693.1': 'HSCHR19_4_CTG3_1', - 'NT_187636.1': 'HSCHR19KIR_FH15_B_HAP_CTG3_1', - 'NT_187637.1': 'HSCHR19KIR_G085_A_HAP_CTG3_1', - 'NT_187638.1': 'HSCHR19KIR_G085_BA1_HAP_CTG3_1', - 'NT_187639.1': 'HSCHR19KIR_G248_A_HAP_CTG3_1', - 'NT_187640.1': 'HSCHR19KIR_G248_BA2_HAP_CTG3_1', - 'NT_187641.1': 'HSCHR19KIR_GRC212_AB_HAP_CTG3_1', - 'NT_187642.1': 'HSCHR19KIR_GRC212_BA1_HAP_CTG3_1', - 'NT_187643.1': 'HSCHR19KIR_LUCE_A_HAP_CTG3_1', - 'NT_187644.1': 'HSCHR19KIR_LUCE_BDEL_HAP_CTG3_1', - 'NT_187645.1': 'HSCHR19KIR_RSH_A_HAP_CTG3_1', - 'NT_187668.1': 'HSCHR19KIR_RSH_BA2_HAP_CTG3_1', - 'NT_187669.1': 'HSCHR19KIR_T7526_A_HAP_CTG3_1', - 'NT_187670.1': 'HSCHR19KIR_T7526_BDEL_HAP_CTG3_1', - 'NT_187671.1': 'HSCHR19KIR_ABC08_A1_HAP_CTG3_1', - 'NT_187672.1': 'HSCHR19KIR_ABC08_AB_HAP_C_P_CTG3_1', - 'NT_187673.1': 'HSCHR19KIR_ABC08_AB_HAP_T_P_CTG3_1', - 'NT_187674.1': 'HSCHR19KIR_FH05_A_HAP_CTG3_1', - 'NT_187675.1': 'HSCHR19KIR_FH05_B_HAP_CTG3_1', - 'NT_187676.1': 'HSCHR19KIR_FH06_A_HAP_CTG3_1', - 'NT_187677.1': 'HSCHR19KIR_FH06_BA1_HAP_CTG3_1', - 'NT_187683.1': 'HSCHR19KIR_FH08_A_HAP_CTG3_1', - 'NT_187684.1': 'HSCHR19KIR_FH08_BAX_HAP_CTG3_1', - 'NT_187685.1': 'HSCHR19KIR_FH13_A_HAP_CTG3_1', - 'NT_187686.1': 'HSCHR19KIR_FH13_BA2_HAP_CTG3_1', - 'NT_187687.1': 'HSCHR19KIR_FH15_A_HAP_CTG3_1', - 'NT_113949.2': 'HSCHR19KIR_RP5_B_HAP_CTG3_1', - 'NT_167235.1': 'HSCHR22_CTG1_3' - } - if primary_assembly == 'GRCh38' or primary_assembly == 'hg38': - chr_num = chr_num_convert_38.get(accession) - if primary_assembly == 'GRCh37' or primary_assembly == 'hg19': - chr_num = chr_num_convert_37.get(accession) - try: - return chr_num - except UnboundLocalError: - chr_num = None - return chr_num - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# diff --git a/VariantValidator/variantanalyser/vvLogging.py b/VariantValidator/variantanalyser/vvLogging.py deleted file mode 100644 index 6f66d7ff..00000000 --- a/VariantValidator/variantanalyser/vvLogging.py +++ /dev/null @@ -1,144 +0,0 @@ - -import logging -import datetime -import os -from StringIO import StringIO - -VALIDATOR_DEBUG=os.environ.get('VALIDATOR_DEBUG') - -class logger(): - #Grand unified variant validator logging static class. - #logString=StringIO() - @staticmethod - def loggingSetup(): - # Set up logging - # I need to use the VVObfuscator in the logger global dictionary - # becuase it's a global variable tied to the logger module - # Modules are singletons, but their variables are not. Consequently - # this is the only sensible way to ensure that the logging setup is called - # once. If another programmer has any better ideas that leave these functions - # with a configured VV logger object that only has its handlers added once, - # feel free to fix it up. - #print("Entering setup") - #The logger must be at the very least drawn from the logging library's dictionary - #for every time this module is imported. - logger.logger = logging.getLogger("VV") - if "VVObfuscator" in logging.Logger.manager.loggerDict: - return - logging.getLogger("VVObfuscator") - #print("Engaging setup") - - global VALIDATOR_DEBUG - # Check envrionment variables - VALIDATOR_DEBUG=os.environ.get('VALIDATOR_DEBUG') - #print("VD",os.environ.get('VALIDATOR_DEBUG')) - - if VALIDATOR_DEBUG is None: - VALIDATOR_DEBUG = "info console" # Set default value - # Set logging urgency levels. - if "debug" in VALIDATOR_DEBUG: - logLevel = logging.DEBUG - elif "warning" in VALIDATOR_DEBUG: - logLevel = logging.WARNING - elif "info" in VALIDATOR_DEBUG: - logLevel = logging.INFO - elif "error" in VALIDATOR_DEBUG: - logLevel = logging.ERROR - elif "critical" in VALIDATOR_DEBUG: - logLevel = logging.CRITICAL - - if "file" in VALIDATOR_DEBUG: - logFileHandler = logging.FileHandler("VV-log.txt") - logFileHandler.setLevel(logLevel) - logger.logger.addHandler(logFileHandler) - if "console" in VALIDATOR_DEBUG: - logConsoleHandler = logging.StreamHandler() - logConsoleHandler.setLevel(logLevel) - logger.logger.addHandler(logConsoleHandler) - # Create a log string to add to validations. - # Since it has to survive multiple imports, I'm stuffing it into the logger dictionary. - # Feel free to amend this coding monstrosity without my knowledge. - logging.Logger.manager.loggerDict["VVLogString"]=StringIO() - logStringHandler = logging.StreamHandler(logging.Logger.manager.loggerDict["VVLogString"]) - # We want the validation metadata to not contain debug info which may change with program operation - logStringHandler.setLevel(logging.INFO) - logger.logger.addHandler(logStringHandler) - logger.logger.setLevel(logging.DEBUG) # The logger itself must be set with an appropriate level of urgency. - - logger.logger.propagate = False - @staticmethod - def debug(s): - logger.loggingSetup() - logger.logger.debug("DEBUG: "+s) - @staticmethod - def info(s): - logger.loggingSetup() - logger.logger.info("INFO : "+s) - @staticmethod - def warning(s): - logger.loggingSetup() - logger.logger.warning("WARN : "+s) - @staticmethod - def error(s): - logger.loggingSetup() - logger.logger.error("ERROR: "+s) - @staticmethod - def critical(s): - logger.loggingSetup() - logger.logger.critical("CRIT : "+s) - @staticmethod - def trace(s,v=None): - #v should be a dictionary with a 'timing' key. - #global VALIDATOR_DEBUG - #print(VALIDATOR_DEBUG) - #if "trace" in VALIDATOR_DEBUG: - # logger.loggingSetup() - if not v: - logger.logger.debug("TRACE: "+s) - else: - logger.logger.debug("TRACE: "+s) - v['timing']['traceLabels'].append(s) - v['timing']['traceTimes'].append(str((datetime.datetime.now()-v['timing']['checkDT']).microseconds//1000)) - v['timing']['checkDT']=datetime.datetime.now() - @staticmethod - def resub(s): - #Resubmit one or multiple variants - logger.loggingSetup() - logger.logger.warning("RESUB: "+s) - @staticmethod - def getString(): - logger.loggingSetup() - #print("RETURNING:") - #print(logging.Logger.manager.loggerDict["VVLogString"].getvalue()) - return logging.Logger.manager.loggerDict["VVLogString"].getvalue() - @staticmethod - def traceStart(v): - logger.loggingSetup() -# global VALIDATOR_DEBUG -# if "trace" in VALIDATOR_DEBUG: - if True: - v['timing']={} - v['timing']['traceLabels']=[] - v['timing']['traceTimes']=[] - v['timing']['startDT']=datetime.datetime.now() - v['timing']['checkDT']=datetime.datetime.now() - @staticmethod - def traceEnd(v): - logger.loggingSetup() - #global VALIDATOR_DEBUG - #if "trace" in VALIDATOR_DEBUG: - if True: - v['timing']['traceLabels'].append("complete") - v['timing']['traceTimes'].append((datetime.datetime.now()-v['timing']['startDT']).microseconds//1000) - del v['timing']['startDT'] - del v['timing']['checkDT'] - -#Test -#logger.debug("Message D") -#logger.info("Message I") -#logger.warning("Message W") -#logger.error("Message E") -#logger.critical("Message C")# - -#print("TEST "+logString.getvalue()) - From 082087b24efe687b72822b4062d343e7da11ae33 Mon Sep 17 00:00:00 2001 From: buran Date: Tue, 22 Jan 2019 10:59:23 +0000 Subject: [PATCH 024/223] Added as many docstrings as I could --- VariantValidator/modules/test_vv.py | 16 +- VariantValidator/modules/vvChromosomes.py | 80 ++-- VariantValidator/modules/vvDBGet.py | 3 + VariantValidator/modules/vvDBInsert.py | 3 + VariantValidator/modules/vvDatabase.py | 36 +- VariantValidator/modules/vvFunctions.py | 65 +-- VariantValidator/modules/vvHGVS.py | 66 ++- VariantValidator/modules/vvLogging.py | 4 + VariantValidator/modules/vvMixinConverters.py | 377 +++++++----------- VariantValidator/modules/vvMixinCore.py | 25 +- VariantValidator/modules/vvMixinInit.py | 53 +-- 11 files changed, 338 insertions(+), 390 deletions(-) diff --git a/VariantValidator/modules/test_vv.py b/VariantValidator/modules/test_vv.py index fe67e290..6e96570e 100644 --- a/VariantValidator/modules/test_vv.py +++ b/VariantValidator/modules/test_vv.py @@ -18,24 +18,24 @@ vv.my_config() ''' -def constructVal(): - val=Validator() - return val @pytest.fixture(params=inputVariants[:]) def constructValidation(request): - val=constructVal() + val=Validator() # print request.param selectTranscripts='all' selectedAssembly='GRCh37' - return val,val.validate(request.param,selectedAssembly,selectTranscripts) + out=val.validate(request.param,selectedAssembly,selectTranscripts) + del val.db + del val + return out def test_validation_output(constructValidation): - val,v=constructValidation + v=constructValidation assert v!=None def test_validation_errors(constructValidation): - val,v=constructValidation + v=constructValidation logs=v["metadata"]["logs"].split("\n") e=0 for l in logs: @@ -44,7 +44,7 @@ def test_validation_errors(constructValidation): assert e==0 def test_validation_criticals(constructValidation): - val,v=constructValidation + v=constructValidation logs=v["metadata"]["logs"].split("\n") c=0 for l in logs: diff --git a/VariantValidator/modules/vvChromosomes.py b/VariantValidator/modules/vvChromosomes.py index d9a03b26..d92b0c54 100644 --- a/VariantValidator/modules/vvChromosomes.py +++ b/VariantValidator/modules/vvChromosomes.py @@ -19,7 +19,13 @@ def supported_for_mapping(ac, primary_assembly): def to_accession(chr_num, primary_assembly): - # Available genome builds + ''' + Available genome builds + + :param chr_num: + :param primary_assembly: + :return: + ''' GRCh37 = { "1": "NC_000001.10", "2": "NC_000002.11", @@ -1536,51 +1542,14 @@ def to_accession(chr_num, primary_assembly): chr_accession = hg19.get(chr_num) return chr_accession -""" -Mark for removal at testing -""" -# def to_chr_num(accession): -# # Available genome builds - Primary assembly only, Otherwise leave the RefSeq accession in place -# chr_num_convert = { -# "NC_000001": "1", -# "NC_000002": "2", -# "NC_000003": "3", -# "NC_000004": "4", -# "NC_000005": "5", -# "NC_000006": "6", -# "NC_000007": "7", -# "NC_000008": "8", -# "NC_000009": "9", -# "NC_000010": "10", -# "NC_000011": "11", -# "NC_000012": "12", -# "NC_000013": "13", -# "NC_000014": "14", -# "NC_000015": "15", -# "NC_000016": "16", -# "NC_000017": "17", -# "NC_000018": "18", -# "NC_000019": "19", -# "NC_000020": "20", -# "NC_000021": "21", -# "NC_000022": "22", -# "NC_000023": "X", -# "NC_000024": "Y" -# } -# accession = accession.split('.')[0] -# chr_num = chr_num_convert.get(accession) -# return chr_num - - -""" -Simple dictionary lookup function that takes the RefSeq chromosome identifier and returns the -UCSC genome build formatted VCF identifier. - -Note, UCSC and GenBank have different aliases for the ALT and Patch identifiers -""" - def to_chr_num_ucsc(accession, primary_assembly): + """ + Simple dictionary lookup function that takes the RefSeq chromosome identifier and returns the + UCSC genome build formatted VCF identifier. + + Note, UCSC and GenBank have different aliases for the ALT and Patch identifiers + """ # Available genome builds chr_num_convert_37 = { "NC_000001.10": "chr1", @@ -2150,16 +2119,13 @@ def to_chr_num_ucsc(accession, primary_assembly): chr_num = None return chr_num - -""" -Simple dictionary lookup function that takes the RefSeq chromosome identifier and returns the -Genbank genome build formatted VCF identifier. - -Note, UCSC and GenBank have different aliases for the ALT and Patch identifiers -""" - - def to_chr_num_refseq(accession, primary_assembly): + """ + Simple dictionary lookup function that takes the RefSeq chromosome identifier and returns the + Genbank genome build formatted VCF identifier. + + Note, UCSC and GenBank have different aliases for the ALT and Patch identifiers + """ # Available genome builds chr_num_convert_37 = { "NC_000001.10": "1", @@ -2846,11 +2812,11 @@ def to_chr_num_refseq(accession, primary_assembly): return chr_num # from gap_genes -""" -Lists of genes for GRCh37 and GRCh38 which require a gap to be inserted into either the -transcript or the genome to maintain a perfect alignment -""" def gap_black_list(symbol): + """ + Lists of genes for GRCh37 and GRCh38 which require a gap to be inserted into either the + transcript or the genome to maintain a perfect alignment + """ gapGene = { "LPP": "", "VPS13D": "", diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index dbb6ed80..b891610e 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -2,6 +2,9 @@ from vvLogging import logger class vvDBGet: + ''' + Most of the functions in DBGet generate queries for retrieving data from the databases. + ''' def __init__(self,db): # These are inherited by reference from the vvDatabase object. self.db=db diff --git a/VariantValidator/modules/vvDBInsert.py b/VariantValidator/modules/vvDBInsert.py index 6a551cc7..c006bc00 100644 --- a/VariantValidator/modules/vvDBInsert.py +++ b/VariantValidator/modules/vvDBInsert.py @@ -1,6 +1,9 @@ from vvFunctions import handleCursor class vvDBInsert: + ''' + This object is a function container for inserting objects into the database. + ''' def __init__(self,db): # These are inherited by reference from the vvDatabase object. self.db=db diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 747a412f..b711b81f 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -12,10 +12,11 @@ import os class vvDatabase: - # This class contains and handles the mysql connections for the variant validator database. + ''' + This class contains and handles the mysql connections for the variant validator database. + ''' def __init__(self,val,dbConfig): self.conn = None - self.pool = None # self.cursor will be none UNLESS you're wrapping a function in @handleCursor, which automatically opens and # closes connections for you. self.cursor=None @@ -28,7 +29,14 @@ def __init__(self,val,dbConfig): self.insert = vvDBInsert(self) # contains dbinsert, dbupdate self.get = vvDBGet(self) # contains dbfetchone, dbfetchall self.db=self #needed to make handlecursor behave - + self.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.dbConfig) + def __del__(self): + if self.conn: + self.conn.close() + if self.pool: + self.pool.close() + if self.cursor: + self.cursor.close() # from dbquery @handleCursor def query_with_fetchone(self,entry, table): @@ -41,19 +49,23 @@ def query_with_fetchone(self,entry, table): logger.debug("No data returned from query "+str(query)) return row # From data - # function for adding information to database - def data_add(self, input, alt_aln_method, accession, dbaction, hp, evm, hdp): + def data_add(self, accession): + ''' # Add accurate transcript descriptions to the database - # RefSeq databases - # Get the Entrez (GenBank) file - self.update_transcript_info_record(accession, hdp) + :param accession: + :return: + ''' + self.update_transcript_info_record(accession, self.val.hdp) entry = self.in_entries(accession, 'transcript_info') return entry - # Retrieve transcript information - def in_entries(self,entry, table): - # Use dbquery.py to connect to mysql and return the necessary data + ''' + Retrieve transcript information + :param entry: + :param table: + :return: + ''' data={} if table == 'transcript_info': row = self.query_with_fetchone(entry, table) @@ -74,7 +86,9 @@ def in_entries(self,entry, table): data['expiry'] = row[7] return data def update_transcript_info_record(self,accession, hdp): + ''' # Search Entrez for corresponding record for the RefSeq ID + ''' # Prime these entries, just in case. previous_entry = self.in_entries(accession, 'transcript_info') accession = accession diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index 107b3932..ce314778 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -15,11 +15,13 @@ #from urllib.parse import urlparse #Python 3 def handleCursor(func): - #Decorator function for handling opening and closing cursors. + ''' + Decorator function for handling opening and closing cursors. + ''' @functools.wraps(func) def wrapper(self,*args,**kwargs): - if self.db.pool==None: - self.db.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.db.dbConfig) +# if self.db.pool==None: +# self.db.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.db.dbConfig) self.db.conn=self.db.pool.get_connection() self.db.cursor = self.db.conn.cursor(buffered=True) out=func(self,*args,**kwargs) @@ -78,15 +80,15 @@ def valstr(hgvs_variant): return cp_hgvs_variant # From output_formatter -""" -format protein description into single letter aa code -""" def single_letter_protein(hgvs_protein): + """ + format protein description into single letter aa code + """ return hgvs_protein.format({'p_3_letter': False}) -""" -format nucleotide descriptions to not display reference base -""" def remove_reference(hgvs_nucleotide): + """ + format nucleotide descriptions to not display reference base + """ hgvs_nucleotide_refless = hgvs_nucleotide.format({'max_ref_length': 0}) return hgvs_nucleotide_refless @@ -103,16 +105,16 @@ def exceptPass(validation=None): logger.debug(er) # From functions.py -""" -user_input -collect the input from the form and convert to a hgvs readable string - Removes brackets and contained information -if given - Identifies variant type (p. c. etc) - Returns a dictionary containing a formated input string which is optimal for hgvs - parsing and the variant type - Accepts c, g, n, r currently. And now P also 15.07.15 -""" def user_input(input): + """ + user_input + collect the input from the form and convert to a hgvs readable string + Removes brackets and contained information -if given + Identifies variant type (p. c. etc) + Returns a dictionary containing a formated input string which is optimal for hgvs + parsing and the variant type + Accepts c, g, n, r currently. And now P also 15.07.15 + """ raw_variant = input.strip() # Set regular expressions for if statements @@ -185,11 +187,10 @@ def user_input(input): return formatted # From links.py -""" -Function which predicts the protein effect of c. inversions -""" - def pro_inv_info(prot_ref_seq, prot_var_seq): + """ + Function which predicts the protein effect of c. inversions + """ info = { 'variant': 'true', 'prot_del_seq': '', @@ -382,11 +383,11 @@ def pro_delins_info(prot_ref_seq, prot_var_seq): info['edit_end'] = info['edit_start'] + len(ref) -1 return info -""" -Translate c. reference sequences, including those that have been modified -must have the CDS in the specified position -""" def translate(ed_seq, cds_start): + """ + Translate c. reference sequences, including those that have been modified + must have the CDS in the specified position + """ # ed_seq = ed_seq.replace('\n', '') ed_seq = ed_seq.strip() # Ensure the starting codon is in the correct position @@ -415,10 +416,10 @@ def translate(ed_seq, cds_start): translation = 'error' return translation -""" -Convert single letter amino acid code to 3 letter code -""" def one_to_three(seq): + """ + Convert single letter amino acid code to 3 letter code + """ aacode = { 'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu', 'F': 'Phe', 'G': 'Gly', 'H': 'His', 'I': 'Ile', @@ -438,11 +439,11 @@ def one_to_three(seq): return threed_up -""" -Takes a reference sequence and inverts the specified position -""" # n. Inversions - This comes from VariantValidator, not validation!!!! def n_inversion(ref_seq, del_seq, inv_seq, interval_start, interval_end): + """ + Takes a reference sequence and inverts the specified position + """ sequence = '' # Use string indexing to check whether the sequences are the same test = ref_seq[interval_start - 1:interval_end] diff --git a/VariantValidator/modules/vvHGVS.py b/VariantValidator/modules/vvHGVS.py index a0c352d9..5e364439 100644 --- a/VariantValidator/modules/vvHGVS.py +++ b/VariantValidator/modules/vvHGVS.py @@ -1,18 +1,6 @@ """ A variety of functions that convert parder hgvs objects into VCF component parts Each function has a slightly difference emphasis -1. hgvs2vcf -Simple conversionwhich ensures identity is as 5 prime as possible by adding an extra 5 -prime base. Necessary for most gap handling situations -2. report_hgvs2vcf -Used to report the Most true representation of the VCF i.e. 5 prime normalized but no -additional bases added. NOTE: no gap handling capabilities -3. pos_lock_hgvs2vcf -No normalization at all. No additional bases added. Simply returns an in-situ VCF -4. hard_right_hgvs2vcf and hard_left_hgvs2vcf -Designed specifically for gap handling. -hard left pushes as 5 prime as possible and adds additional bases -hard right pushes as 3 prime as possible and adds additional bases """ # Import modules @@ -31,10 +19,15 @@ # Error handling class pseudoVCF2HGVSError(Exception): pass -# pvcf is a pseudo_vcf string -# genome build is a build string e.g. GRCh37 hg19 -# normalization direction an integer, 5 or 3. def pvcf_to_hgvs(input, selected_assembly, normalization_direction, reverse_normalizer, validator): + ''' + :param input: pseudo_vcf string + :param selected_assembly: + :param normalization_direction: normalization direction an integer, 5 or 3. + :param reverse_normalizer: + :param validator: + :return: + ''' # Set normalizer if normalization_direction == 3: selected_normalizer = validator.hn @@ -190,6 +183,16 @@ def pvcf_to_hgvs(input, selected_assembly, normalization_direction, reverse_norm def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): + ''' + Simple conversionwhich ensures identity is as 5 prime as possible by adding an extra 5 + prime base. Necessary for most gap handling situations + + :param hgvs_genomic: + :param primary_assembly: + :param reverse_normalizer: + :param sf: + :return: + ''' hgvs_genomic_variant = hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) @@ -335,6 +338,16 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): + ''' + Used to report the Most true representation of the VCF i.e. 5 prime normalized but no + additional bases added. NOTE: no gap handling capabilities + + :param hgvs_genomic: + :param primary_assembly: + :param reverse_normalizer: + :param sf: + :return: + ''' hgvs_genomic_variant = hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref @@ -498,6 +511,15 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): + ''' + No normalization at all. No additional bases added. Simply returns an in-situ VCF + + :param hgvs_genomic: + :param primary_assembly: + :param reverse_normalizer: + :param sf: + :return: + ''' # Replace reference manually if hgvs_genomic.posedit.edit.ref == '': hgvs_genomic.posedit.edit.ref = sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, @@ -640,6 +662,10 @@ def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): + ''' + Designed specifically for gap handling. + hard right pushes as 3 prime as possible and adds additional bases + ''' hgvs_genomic_variant = hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref normalized_hgvs_genomic = hn.normalize(hgvs_genomic_variant) @@ -784,6 +810,16 @@ def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): + ''' + Designed specifically for gap handling. + hard left pushes as 5 prime as possible and adds additional bases + + :param hgvs_genomic: + :param primary_assembly: + :param reverse_normalizer: + :param sf: + :return: + ''' hgvs_genomic_variant = hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) diff --git a/VariantValidator/modules/vvLogging.py b/VariantValidator/modules/vvLogging.py index 6f66d7ff..6901b6a3 100644 --- a/VariantValidator/modules/vvLogging.py +++ b/VariantValidator/modules/vvLogging.py @@ -7,10 +7,13 @@ VALIDATOR_DEBUG=os.environ.get('VALIDATOR_DEBUG') class logger(): + ''' #Grand unified variant validator logging static class. + ''' #logString=StringIO() @staticmethod def loggingSetup(): + ''' # Set up logging # I need to use the VVObfuscator in the logger global dictionary # becuase it's a global variable tied to the logger module @@ -19,6 +22,7 @@ def loggingSetup(): # once. If another programmer has any better ideas that leave these functions # with a configured VV logger object that only has its handlers added once, # feel free to fix it up. + ''' #print("Entering setup") #The logger must be at the very least drawn from the logging library's dictionary #for every time this module is imported. diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index c5d364b4..0552380d 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -27,11 +27,15 @@ class Mixin(vvMixinInit.Mixin): - """ - r_to_c - parses r. variant strings into hgvs object and maps to the c. equivalent. - """ + ''' + This mixin contains converters that use the validator's configuration information. + + ''' def r_to_c(self, variant, evm): + """ + r_to_c + parses r. variant strings into hgvs object and maps to the c. equivalent. + """ # convert the input string into a hgvs object by parsing var_r = self.hp.parse_hgvs_variant(variant) # map to the coding sequence @@ -40,15 +44,13 @@ def r_to_c(self, variant, evm): c_from_r = {'variant': variant, 'type': ':c.'} return c_from_r - """ - Maps transcript variant descriptions onto specified RefSeqGene reference sequences - Return an hgvs object containing the genomic sequence variant relative to the RefSeqGene - acession - refseq_ac = RefSeqGene ac - """ - - def refseq(self, variant, vmOld, refseq_ac, hpOld, evm, hdpOld, primary_assembly): + """ + Maps transcript variant descriptions onto specified RefSeqGene reference sequences + Return an hgvs object containing the genomic sequence variant relative to the RefSeqGene + acession + refseq_ac = RefSeqGene ac + """ vr = hgvs.validator.Validator(self.hdp) # parse the variant into hgvs object var_c = self.hp.parse_hgvs_variant(variant) @@ -90,15 +92,12 @@ def refseq(self, variant, vmOld, refseq_ac, hpOld, evm, hdpOld, primary_assembly # Return as an error if all fail return ref_g_dict - - """ - Parses genomic variant strings into hgvs objects - Maps genomic hgvs object into a coding hgvs object if the c accession string is provided - returns a c. variant description string - """ - - def g_to_c(self, var_g, tx_ac, hpOld, evm): + """ + Parses genomic variant strings into hgvs objects + Maps genomic hgvs object into a coding hgvs object if the c accession string is provided + returns a c. variant description string + """ pat_g = re.compile("\:g\.") # Pattern looks for :g. # If the :g. pattern is present in the input variant if pat_g.search(var_g): @@ -109,14 +108,12 @@ def g_to_c(self, var_g, tx_ac, hpOld, evm): return var_c - """ - Parses genomic variant strings into hgvs objects - Maps genomic hgvs object into a non-coding hgvs object if the n accession string is provided - returns a n. variant description string - """ - - def g_to_n(self, var_g, tx_ac, hpOld, evm): + """ + Parses genomic variant strings into hgvs objects + Maps genomic hgvs object into a non-coding hgvs object if the n accession string is provided + returns a n. variant description string + """ pat_g = re.compile("\:g\.") # Pattern looks for :g. # If the :g. pattern is present in the input variant if pat_g.search(var_g): @@ -126,29 +123,23 @@ def g_to_n(self, var_g, tx_ac, hpOld, evm): var_n = str(evm.g_to_n(var_g, tx_ac)) return var_n - - """ - Ensures variant strings are transcript c. or n. - returns parsed hgvs c. or n. object - """ - - def coding(self, variant, hpOld): + """ + Ensures variant strings are transcript c. or n. + returns parsed hgvs c. or n. object + """ # If the :c. pattern is present in the input variant if re.search(':c.', variant) or re.search(':n.', variant): # convert the input string into a hgvs object var_c = self.hp.parse_hgvs_variant(variant) return var_c - - """ - Mapping transcript to genomic position - Ensures variant strings are transcript c. or n. - returns parsed hgvs g. object - """ - - def genomic(self, variant, evm, primary_assembly,hn): + """ + Mapping transcript to genomic position + Ensures variant strings are transcript c. or n. + returns parsed hgvs g. object + """ # Set regular expressions for if statements pat_g = re.compile("\:g\.") # Pattern looks for :g. pat_n = re.compile("\:n\.") @@ -172,35 +163,11 @@ def genomic(self, variant, evm, primary_assembly,hn): var_g = self.hp.parse_hgvs_variant(variant) return var_g - - """ - Mapping transcript to protein prediction - Ensures variant strings are transcript c. - returns parsed hgvs p. object - """ - - - - - """ - Function which takes a NORMALIZED hgvs Python transcript variant and maps to a specified protein reference sequence. A protein - level hgvs python object is returned. - - Note the function currently assumes that the transcript description is correctly normalized having come from the - previous g_to_t function - """ - - - - - - """ - Ensures variant strings are g. - returns parsed hgvs g. object - """ - - def hgvs_genomic(self, variant, hpOld): + """ + Ensures variant strings are g. + returns parsed hgvs g. object + """ # Set regular expressions for if statements pat_g = re.compile("\:g\.") # Pattern looks for :g. Note (gene) has been removed # If the :g. pattern is present in the input variant @@ -209,27 +176,23 @@ def hgvs_genomic(self, variant, hpOld): var_g = self.hp.parse_hgvs_variant(variant) return var_g - - """ - Enhanced transcript to genome position mapping function using evm - Deals with mapping from transcript positions that do not exist in the genomic sequence - i.e. the stated position aligns to a genomic gap! - Trys to ensure that a genomic position is always returned even if the c. or n. transcript - will not map to the specified genome build primary assembly. - Deals with transcript mapping to several genomic assemblies - Order - Map to a single NC_ for the specified genome build primary assembly - Map to a single NC_ for an alternate genome build primary assembly - Map to an NT_ from the specified genome build - Map to an NT_ from an alternative genome build - Map to an NW_ from the specified genome build - Map to an NW_ from an alternative genome buildRequires parsed c. or n. object - returns parsed hgvs g. object - """ - - def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): - + """ + Enhanced transcript to genome position mapping function using evm + Deals with mapping from transcript positions that do not exist in the genomic sequence + i.e. the stated position aligns to a genomic gap! + Trys to ensure that a genomic position is always returned even if the c. or n. transcript + will not map to the specified genome build primary assembly. + Deals with transcript mapping to several genomic assemblies + Order + Map to a single NC_ for the specified genome build primary assembly + Map to a single NC_ for an alternate genome build primary assembly + Map to an NT_ from the specified genome build + Map to an NT_ from an alternative genome build + Map to an NW_ from the specified genome build + Map to an NW_ from an alternative genome buildRequires parsed c. or n. object + returns parsed hgvs g. object + """ # store the input stored_hgvs_c = copy.deepcopy(hgvs_c) expand_out = 'false' @@ -905,19 +868,18 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): return hgvs_genomic - """ - USE WITH MAPPER THAT DOES NOT REPLACE THE REFERENCE GENOMIC BASES AND DOED NOT NORMALIZE - - Enhanced transcript to genome position mapping function using evm - Trys to ensure that a genomic position is always returned even if the c. or n. transcript - will not map to the specified genome build primary assembly. - Deals with transcript mapping to several genomic assemblies - Order - Map to a single NC_ (or ALT) for the specified genome build - returns parsed hgvs g. object - """ - def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn, hpOld, sfOld, no_norm_evm): + """ + USE WITH MAPPER THAT DOES NOT REPLACE THE REFERENCE GENOMIC BASES AND DOED NOT NORMALIZE + + Enhanced transcript to genome position mapping function using evm + Trys to ensure that a genomic position is always returned even if the c. or n. transcript + will not map to the specified genome build primary assembly. + Deals with transcript mapping to several genomic assemblies + Order + Map to a single NC_ (or ALT) for the specified genome build + returns parsed hgvs g. object + """ try: hgvs_genomic = evm.t_to_g(hgvs_c) hn.normalize(hgvs_genomic) @@ -1113,14 +1075,13 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn return hgvs_genomic - """ - Enhanced transcript to genome position on a specified genomic reference using vm - Deals with mapping from transcript positions that do not exist in the genomic sequence - i.e. the stated position aligns to a genomic gap! - returns parsed hgvs g. object - """ - def myevm_g_to_t(self,evm, hgvs_genomic, alt_ac): + """ + Enhanced transcript to genome position on a specified genomic reference using vm + Deals with mapping from transcript positions that do not exist in the genomic sequence + i.e. the stated position aligns to a genomic gap! + returns parsed hgvs g. object + """ hgvs_t = evm.g_to_t(hgvs_genomic, alt_ac) return hgvs_t def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): @@ -1651,12 +1612,10 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): return hgvs_genomic - """ - parse p. strings into hgvs p. objects - """ - - def hgvs_protein(self, variant, hpOld): + """ + parse p. strings into hgvs p. objects + """ # Set regular expressions for if statements pat_p = re.compile("\:p\.") # Pattern looks for :g. Note (gene) has been removed # If the :p. pattern is present in the input variant @@ -1665,13 +1624,10 @@ def hgvs_protein(self, variant, hpOld): var_p = self.hp.parse_hgvs_variant(variant) return var_p - - """ - Convert r. into c. - """ - - def hgvs_r_to_c(self, hgvs_object): + """ + Convert r. into c. + """ # check for LRG_t with r. if re.match('LRG', hgvs_object.ac): transcript_ac = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(hgvs_object.ac) @@ -1693,13 +1649,10 @@ def hgvs_r_to_c(self, hgvs_object): hgvs_object.posedit.edit = edit return hgvs_object - - """ - Convert c. into r. - """ - - def hgvs_c_to_r(self, hgvs_object): + """ + Convert c. into r. + """ hgvs_object.type = 'r' edit = str(hgvs_object.posedit.edit) edit = edit.lower() @@ -1707,15 +1660,12 @@ def hgvs_c_to_r(self, hgvs_object): hgvs_object.posedit.edit = edit return hgvs_object - - """ - Input c. r. n. variant string - Use uta.py (hdp) to return the identity information for the transcript variant - see hgvs.dataproviders.uta.py for details - """ - - def tx_identity_info(self, variant, hdpOld): + """ + Input c. r. n. variant string + Use uta.py (hdp) to return the identity information for the transcript variant + see hgvs.dataproviders.uta.py for details + """ # Set regular expressions for if statements pat_c = re.compile("\:c\.") # Pattern looks for :c. Note (gene) has been removed pat_n = re.compile("\:n\.") # Pattern looks for :c. Note (gene) has been removed @@ -1751,39 +1701,30 @@ def tx_identity_info(self, variant, hdpOld): # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list return tx_id_info - - """ - Input c. r. nd accession string - Use uta.py (hdp) to return the identity information for the transcript variant - see hgvs.dataproviders.uta.py for details - """ - - def tx_id_info(self, alt_ac, hdpOld): + """ + Input c. r. nd accession string + Use uta.py (hdp) to return the identity information for the transcript variant + see hgvs.dataproviders.uta.py for details + """ tx_id_info = self.hdp.get_tx_identity_info(alt_ac) # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list return tx_id_info - - """ - Use uta.py (hdp) to return the transcript information for a specified gene (HGNC SYMBOL) - see hgvs.dataproviders.uta.py for details - """ - - def tx_for_gene(self, hgnc, hdpOld): + """ + Use uta.py (hdp) to return the transcript information for a specified gene (HGNC SYMBOL) + see hgvs.dataproviders.uta.py for details + """ # Interface with the UTA database via get_tx_for_gene in uta.py tx_for_gene = self.hdp.get_tx_for_gene(hgnc) return tx_for_gene - - """ - Extract RefSeqGene Accession from transcript information - see hgvs.dataproviders.uta.py for details - """ - - def ng_extract(self, tx_for_gene): + """ + Extract RefSeqGene Accession from transcript information + see hgvs.dataproviders.uta.py for details + """ # Set regular expressions for if statements pat_NG = re.compile("^NG_") # Pattern looks for NG_ at beginning of a string # For each list in the list of lists tx_for_gene @@ -1794,14 +1735,12 @@ def ng_extract(self, tx_for_gene): gene_ac = list[4] return gene_ac - """ - Returns exon information for a given transcript - e.g. how the exons align to the genomic reference - see hgvs.dataproviders.uta.py for details - """ - - def tx_exons(self, tx_ac, alt_ac, alt_aln_method): + """ + Returns exon information for a given transcript + e.g. how the exons align to the genomic reference + see hgvs.dataproviders.uta.py for details + """ # Interface with the UTA database via get_tx_exons in uta.py try: tx_exons = self.hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) @@ -1821,13 +1760,10 @@ def tx_exons(self, tx_ac, alt_ac, alt_aln_method): else: return tx_exons - - """ - Automatically maps genomic positions onto all overlapping transcripts - """ - - def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method,reverse_normalizer): + """ + Automatically maps genomic positions onto all overlapping transcripts + """ # Pass relevant transcripts for the input variant to rts # Note, the evm method misses one end, the hdp. method misses the other. Combine both rts_list = self.hdp.get_tx_for_region(hgvs_genomic.ac, alt_aln_method, hgvs_genomic.posedit.pos.start.base-1, hgvs_genomic.posedit.pos.end.base-1) @@ -1903,13 +1839,10 @@ def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method,reverse_normali code_var.append(str(variant)) return code_var - - """ - Take HGVS string, parse into hgvs object and validate - """ - - def validateHGVS(self, input): + """ + Take HGVS string, parse into hgvs object and validate + """ hgvs_input = self.hp.parse_hgvs_variant(input) g = re.compile(":g.") p = re.compile(":p.") @@ -1946,12 +1879,10 @@ def validateHGVS(self, input): error = 'false' return error - """ - Search HGNC rest - """ - - def hgnc_rest(self, path): + """ + Search HGNC rest + """ data = { 'record': '', 'error': 'false' @@ -1979,14 +1910,10 @@ def hgnc_rest(self, path): data['error'] = "Unable to contact the HGNC database: Please try again later" return data - - """ - Search Entrez databases with efetch and SeqIO - """ - - def entrez_efetch(self, db, id, rettype, retmode): - # IMPORT Bio modules + """ + Search Entrez databases with efetch and SeqIO + """ # from Bio import Entrez Entrez.email = self.entrezID # from Bio import SeqIO @@ -1998,13 +1925,10 @@ def entrez_efetch(self, db, id, rettype, retmode): handle.close() return record - - """ - search Entrez databases with efetch and read - """ - - def entrez_read(self,db, id, retmode): + """ + search Entrez databases with efetch and read + """ # IMPORT Bio modules # from Bio import Entrez Entrez.email = self.entrezID @@ -2017,13 +1941,10 @@ def entrez_read(self,db, id, retmode): handle.close() return record - - """ - Simple reverse complement function for nucleotide sequences - """ - - def revcomp(self, bases): + """ + Simple reverse complement function for nucleotide sequences + """ l2 = [] l = list(bases) element = 0 @@ -2041,14 +1962,11 @@ def revcomp(self, bases): revcomp = revcomp[::-1] return revcomp - - """ - Function designed to merge multiple HGVS variants (hgvs objects) into a single delins - using 3 prime normalization - """ - - def merge_hgvs_3pr(self, hgvs_variant_list,hn): + """ + Function designed to merge multiple HGVS variants (hgvs objects) into a single delins + using 3 prime normalization + """ # Ensure c. is mapped to the h_list = [] @@ -2156,14 +2074,11 @@ def merge_hgvs_3pr(self, hgvs_variant_list,hn): pass return hgvs_delins - - """ - Function designed to merge multiple HGVS variants (hgvs objects) into a single delins - using 5 prime normalization - """ - - def merge_hgvs_5pr(self, hgvs_variant_list): + """ + Function designed to merge multiple HGVS variants (hgvs objects) into a single delins + using 5 prime normalization + """ # Ensure c. is mapped to the h_list = [] @@ -2269,14 +2184,11 @@ def merge_hgvs_5pr(self, hgvs_variant_list): pass return hgvs_delins - - """ - Function designed to merge multiple pseudo VCF variants (strings) into a single HGVS delins - using 5 prime normalization then return a 3 prime normalized final HGVS object - """ - - def merge_pseudo_vcf(self, vcf_list, genome_build, hn): + """ + Function designed to merge multiple pseudo VCF variants (strings) into a single HGVS delins + using 5 prime normalization then return a 3 prime normalized final HGVS object + """ hgvs_list = [] # Convert pseudo_vcf list into a HGVS list for call in vcf_list: @@ -2289,14 +2201,11 @@ def merge_pseudo_vcf(self, vcf_list, genome_build, hn): # return return hgvs_delins - - """ - HGVS allele handling function which takes a single HGVS allele description and - separates each allele into a list of HGVS variants - """ - - def hgvs_alleles(self, variant_description,hn): + """ + HGVS allele handling function which takes a single HGVS allele description and + separates each allele into a list of HGVS variants + """ try: # Split up the description accession, remainder = variant_description.split(':') @@ -2436,8 +2345,10 @@ def hgvs_alleles(self, variant_description,hn): te = traceback.format_exc() raise fn.alleleVariantError(str(e)) - # Covert chromosomal HGVS description to RefSeqGene def chr_to_rsg(self, hgvs_genomic, hn, vrOld): + ''' + # Covert chromosomal HGVS description to RefSeqGene + ''' # print 'chr_to_rsg triggered' hgvs_genomic = hn.normalize(hgvs_genomic) # split the description @@ -2573,8 +2484,16 @@ def chr_to_rsg(self, hgvs_genomic, hn, vrOld): return descriptions - # Covert RefSeqGene HGVS description to Chromosomal def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): + ''' + # Covert RefSeqGene HGVS description to Chromosomal + + :param hgvs_refseqgene: + :param primary_assembly: + :param hn: HGVS Normalizer + :param vr: + :return: + ''' # normalize try: hgvs_refseqgene = hn.normalize(hgvs_refseqgene) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 6b560456..06ef9650 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -56,6 +56,15 @@ class Mixin(vvMixinConverters.Mixin): def validate(self, batch_variant, selected_assembly, select_transcripts, transcriptSet="refseq"): + ''' + This is the main validator function. + :param batch_variant: A string containing the variant to be validated + :param selected_assembly: The version of the genome assembly to use. + :param select_transcripts: Can be an array of different transcripts, or 'all' + Selecting multiple transcripts will lead to a multiple variant outputs. + :param transcriptSet: + :return: + ''' logger.info(batch_variant + ' : ' + selected_assembly) # Take start time start_time = time.time() @@ -1931,9 +1940,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if entry['expiry'] == 'true': dbaction = 'update' try: - entry = self.db.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=self.hp, evm=evm, - hdp=self.hdp) + entry = self.db.data_add(accession=accession) except hgvs.exceptions.HGVSError as e: error = 'Transcript %s is not currently supported' % (accession) validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -1951,9 +1958,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif 'none' in entry: dbaction = 'insert' try: - entry = self.db.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=self.hp, evm=evm, - hdp=self.hdp) + entry = self.db.data_add(accession=accession) except Exception as e: logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' @@ -1996,9 +2001,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # If the current entry is too old if entry['expiry'] == 'true': dbaction = 'update' - entry = self.db.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=self.hp, evm=evm, - hdp=self.hdp) + entry = self.db.data_add(accession=accession) hgnc_gene_info = entry['description'] else: hgnc_gene_info = entry['description'] @@ -2006,9 +2009,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif 'none' in entry: dbaction = 'insert' try: - entry = self.db.data_add(input=input, alt_aln_method=alt_aln_method, - accession=accession, dbaction=dbaction, hp=self.hp, evm=evm, - hdp=self.hdp) + entry = self.db.data_add(accession=accession) except Exception as e: logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index eb311838..3839c941 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -23,24 +23,29 @@ import vvFunctions as fn -''' -This file contains the validator object, which is instantiated in order to perform validator functions. -The validator contains configuration information and permanent copies of database links and the like. -Much of the validator's inner workings are stored in special one-off function container objects: -validator.db : The validator's MySQL database access functions -The validator configuration is stored in ~/.config/VariantValidator/config.ini . This is loaded -when the validator object is initialized. +class Mixin(): + ''' + # This object contains configuration options for the validator, but it inherits the mixin + # class in vvCore that contains the enormous validator function. + + This mixin is the first for the validator object, which is instantiated in order to perform validator functions. + The validator contains configuration information and permanent copies of database links and the like. + Much of the validator's inner workings are stored in special one-off function container objects: + validator.db : The validator's MySQL database access functions -Running variant validator should hopefully be as simple as writing a script like this: -import VariantValidator + The validator configuration is stored in ~/.config/VariantValidator/config.ini . This is loaded + when the validator object is initialized. -val=Validator() -val.validate("some kind of gene situation","the transcripts to use") + Running variant validator should hopefully be as simple as writing a script like this: + import VariantValidator -''' + val=Validator() + val.validate("some kind of gene situation","The genome version","the transcripts to use") -''' + ''' + def __init__(self): + ''' Renaming of variables : 'seqrepo_directory': HGVS_SEQREPO_DIR, #self.seqrepoPath 'uta_url': UTA_DB_URL, #self.utaPath @@ -51,14 +56,7 @@ 'variantvalidator_hgvs_version': hgvs_version, #self.hgvsVersion 'uta_schema': str(hdp.data_version()), #self.uta_schema 'seqrepo_db': HGVS_SEQREPO_DIR.split('/')[-1] #self.seqrepoVersion -''' - - - -class Mixin(): - # This object contains configuration options for the validator, but it inherits the mixin - # class in vvCore that contains the enormous validator function. - def __init__(self): + ''' # First load from the configuration file, if it exists. configName="config.ini" homePath=os.path.expanduser("~") @@ -72,10 +70,9 @@ def __init__(self): self.createConfig(configPath) # Load the configuration file. - with open(configPath) as file: - lines=file.read() config=RawConfigParser(allow_no_value=True) - config.read(configPath) + with open(configPath) as file: + config.read_file(file) # The custom vvLogging module will set itself up using the VALDIATOR_DEBUG environment variable. levelString = config["logging"]['level'] consoleString = config["logging"]['console'] @@ -179,11 +176,13 @@ def __init__(self): replace_reference=True ) - - + def __del__(self): + del self.db def myConfig(self): + ''' #Returns configuration: #version, hgvs version, uta schema, seqrepo db. + ''' return { 'variantvalidator_version': self.version, 'variantvalidator_hgvs_version': self.hgvsVersion, @@ -191,9 +190,11 @@ def myConfig(self): 'seqrepo_db': self.seqrepoPath } def createConfig(self,outPath): + ''' # This function reads from the default configuration file stored in the same folder as this module, # and transfers it to outPath. # Outpath should include a filename. + ''' lines=[] inPath=os.path.join(os.path.dirname(os.path.realpath(__file__)),"defaultConfig.ini") # print(os.path.join(inPath,"defaultConfig.ini")) From 92494cefbdd9ee5099198eb7c2825df2b1f5f553 Mon Sep 17 00:00:00 2001 From: buran Date: Tue, 22 Jan 2019 12:23:10 +0000 Subject: [PATCH 025/223] Tried making module with seperate testing folder --- VariantValidator/__init__.py | 1 + VariantValidator/modules/vvObjects.py | 24 ---------------- .../{modules => testing}/inputVariants.txt | 0 .../{modules => testing}/test_vv.py | 2 +- .../{modules => testing}/vvTestCompare.py | 0 .../{modules => testing}/vvTestFunctions.py | 0 .../{modules => testing}/vvTestSave.py | 2 +- VariantValidator/variantValidator.py | 28 +++++++++++++++++++ 8 files changed, 31 insertions(+), 26 deletions(-) create mode 100644 VariantValidator/__init__.py delete mode 100644 VariantValidator/modules/vvObjects.py rename VariantValidator/{modules => testing}/inputVariants.txt (100%) rename VariantValidator/{modules => testing}/test_vv.py (97%) rename VariantValidator/{modules => testing}/vvTestCompare.py (100%) rename VariantValidator/{modules => testing}/vvTestFunctions.py (100%) rename VariantValidator/{modules => testing}/vvTestSave.py (81%) create mode 100644 VariantValidator/variantValidator.py diff --git a/VariantValidator/__init__.py b/VariantValidator/__init__.py new file mode 100644 index 00000000..2ae28399 --- /dev/null +++ b/VariantValidator/__init__.py @@ -0,0 +1 @@ +pass diff --git a/VariantValidator/modules/vvObjects.py b/VariantValidator/modules/vvObjects.py deleted file mode 100644 index 20e64cb0..00000000 --- a/VariantValidator/modules/vvObjects.py +++ /dev/null @@ -1,24 +0,0 @@ - - -import vvMixinCore - -class Validation(): - #Validation objects contain a number of variant interpretations - pass - -class ValOutput(): - #This object contains a single possible interpretation of a variant - pass - -#Mixins are used to split this very large, complex object over multiple files. -#There is a logical chain to it, though: -# vvMixinInit -# v -# vvMixinConverters -# v -# vvMixinCore -# v -# Validator <- this object. -class Validator(vvMixinCore.Mixin): - pass - diff --git a/VariantValidator/modules/inputVariants.txt b/VariantValidator/testing/inputVariants.txt similarity index 100% rename from VariantValidator/modules/inputVariants.txt rename to VariantValidator/testing/inputVariants.txt diff --git a/VariantValidator/modules/test_vv.py b/VariantValidator/testing/test_vv.py similarity index 97% rename from VariantValidator/modules/test_vv.py rename to VariantValidator/testing/test_vv.py index 6e96570e..54b6819d 100644 --- a/VariantValidator/modules/test_vv.py +++ b/VariantValidator/testing/test_vv.py @@ -2,7 +2,7 @@ import os import pytest import vvTestFunctions as fn -from vvObjects import Validator +from VariantValidator import Validator inputVariants=fn.loadVariantFile("VariantValidator/modules/inputVariants.txt") diff --git a/VariantValidator/modules/vvTestCompare.py b/VariantValidator/testing/vvTestCompare.py similarity index 100% rename from VariantValidator/modules/vvTestCompare.py rename to VariantValidator/testing/vvTestCompare.py diff --git a/VariantValidator/modules/vvTestFunctions.py b/VariantValidator/testing/vvTestFunctions.py similarity index 100% rename from VariantValidator/modules/vvTestFunctions.py rename to VariantValidator/testing/vvTestFunctions.py diff --git a/VariantValidator/modules/vvTestSave.py b/VariantValidator/testing/vvTestSave.py similarity index 81% rename from VariantValidator/modules/vvTestSave.py rename to VariantValidator/testing/vvTestSave.py index a6e31d1f..94a706b4 100644 --- a/VariantValidator/modules/vvTestSave.py +++ b/VariantValidator/testing/vvTestSave.py @@ -1,7 +1,7 @@ #Saving script import vvTestFunctions as fn -from vvObjects import Validator +from VariantValidator import Validator import os val=Validator() diff --git a/VariantValidator/variantValidator.py b/VariantValidator/variantValidator.py new file mode 100644 index 00000000..0e98bd87 --- /dev/null +++ b/VariantValidator/variantValidator.py @@ -0,0 +1,28 @@ +import modules.vvMixinCore as vvMixinCore + +class Validation(): + ''' + #Validation objects contain a number of variant interpretations + ''' + pass + +class ValOutput(): + ''' + #This object contains a single possible interpretation of a variant + ''' + pass + +class Validator(vvMixinCore.Mixin): + ''' + #Mixins are used to split this very large, complex object over multiple files. + #There is a logical chain to it, though: + # vvMixinInit + # v + # vvMixinConverters + # v + # vvMixinCore + # v + # Validator <- this object. + ''' + pass + From 22acf71f771aaa9d7d2e037ee5189e1f04b807d4 Mon Sep 17 00:00:00 2001 From: buran Date: Tue, 22 Jan 2019 15:55:47 +0000 Subject: [PATCH 026/223] Rearranged everything, modified setup.py, combuncted everything --- VariantValidator/__init__.py | 4 +++- VariantValidator/modules/vvMixinCore.py | 2 +- VariantValidator/testing/vvTestSave.py | 7 ++++--- VariantValidator/variantValidator.py | 2 ++ setup.py | 9 ++++----- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/VariantValidator/__init__.py b/VariantValidator/__init__.py index 2ae28399..a6016d48 100644 --- a/VariantValidator/__init__.py +++ b/VariantValidator/__init__.py @@ -1 +1,3 @@ -pass +from variantValidator import * + +__all__=["Validator","Validation"] diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 06ef9650..24c9ebc4 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -2001,7 +2001,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # If the current entry is too old if entry['expiry'] == 'true': dbaction = 'update' - entry = self.db.data_add(accession=accession) + entry = self.db.data_add(accession=accession) hgnc_gene_info = entry['description'] else: hgnc_gene_info = entry['description'] diff --git a/VariantValidator/testing/vvTestSave.py b/VariantValidator/testing/vvTestSave.py index 94a706b4..7228505c 100644 --- a/VariantValidator/testing/vvTestSave.py +++ b/VariantValidator/testing/vvTestSave.py @@ -1,9 +1,10 @@ #Saving script import vvTestFunctions as fn -from VariantValidator import Validator +#from VariantValidator import Validator +import VariantValidator as vv import os -val=Validator() +val=vv.Validator() os.environ["ADD_LOGS"]="True" -fn.generateTestFolder("testOutputsReworked","inputVariants.txt",val) +fn.generateTestFolder("testOutputs","inputVariants.txt",val) diff --git a/VariantValidator/variantValidator.py b/VariantValidator/variantValidator.py index 0e98bd87..9e69723a 100644 --- a/VariantValidator/variantValidator.py +++ b/VariantValidator/variantValidator.py @@ -26,3 +26,5 @@ class Validator(vvMixinCore.Mixin): ''' pass + + diff --git a/setup.py b/setup.py index 72e34b65..7cdfdd5e 100644 --- a/setup.py +++ b/setup.py @@ -5,14 +5,13 @@ setup( name='VariantValidator', - version='0.1.0_dev_pre_a', + version='0.9', description='API for accurate, mapping and formatting of sequence variants using HGVS nomenclature', - long_description=open('README.txt').read(), + long_description=open('README.md').read(), url='', author='Peter J. Causey-Freeman', author_email='pjf9@leicester.ac.uk', - package_data={"VariantValidator": ["configuration/*.ini"],}, - packages=find_packages(), + packages=['VariantValidator','VariantValidator.modules'], include_package_data=True, license="GNU AFFERO GENERAL PUBLIC LICENSE, Version 3 (https://www.gnu.org/licenses/agpl-3.0.en.html)", # See https://pypi.python.org/pypi?%3Aaction=list_classifiers @@ -73,4 +72,4 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -# \ No newline at end of file +# From 0c250a72f93588f418f6e659e101c0b7257e6c7a Mon Sep 17 00:00:00 2001 From: buran Date: Wed, 23 Jan 2019 09:52:08 +0000 Subject: [PATCH 027/223] Changed the vvTestCompare script to ignore metadata differences --- VariantValidator/testing/test_vv.py | 2 +- VariantValidator/testing/vvTestFunctions.py | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/VariantValidator/testing/test_vv.py b/VariantValidator/testing/test_vv.py index 54b6819d..b63b2b32 100644 --- a/VariantValidator/testing/test_vv.py +++ b/VariantValidator/testing/test_vv.py @@ -4,7 +4,7 @@ import vvTestFunctions as fn from VariantValidator import Validator -inputVariants=fn.loadVariantFile("VariantValidator/modules/inputVariants.txt") +inputVariants=fn.loadVariantFile("inputVariants.txt") ''' print("Configuring for personal linux") diff --git a/VariantValidator/testing/vvTestFunctions.py b/VariantValidator/testing/vvTestFunctions.py index 4346046d..25f4ea27 100644 --- a/VariantValidator/testing/vvTestFunctions.py +++ b/VariantValidator/testing/vvTestFunctions.py @@ -150,17 +150,28 @@ def retrieveVariant(validation): def compareValidations(v1,v2,id): #print(v1,v2) - for vk in v1.keys(): - if not (vk in v2.keys()): + #Remove metadata + v1Keys=v1.keys() + if "metadata" in v1Keys: + v1Keys.remove("metadata") + else: + print("Variant "+str(id)+": metadata not found in first variant") + v2Keys=v2.keys() + if "metadata" in v2Keys: + v2Keys.remove("metadata") + else: + print("Variant "+str(id)+": metadata not found in second variant") + for vk in v1Keys: + if not (vk in v2Keys): # print("tag "+vk+" : "+str(v1[vk])+" not found in second variant") print("Variant "+str(id)+": Tag "+vk+" not found in second variant") return False - for vk in v2.keys(): - if not (vk in v1.keys()): + for vk in v2Keys: + if not (vk in v1Keys): # print("tag "+vk+" : "+str(v2[vk])+" not found in first variant") print("Variant "+str(id)+": Tag "+vk+" not found in first variant") return False - for vk in v1.keys(): + for vk in v1Keys: if not (v1[vk]==v2[vk]): if type(v1[vk])==type(dict()) or type(v2[vk])==type(dict()): print("Variant " + str(id) + ": Different tag values for key " + str(vk)) From 58564c9c35f23487a11e3a5c0f260f1025c3e397 Mon Sep 17 00:00:00 2001 From: buran Date: Wed, 23 Jan 2019 09:55:31 +0000 Subject: [PATCH 028/223] Master test outputs added --- .../testing/testOutputsMasterITS/variant0.txt | 171 + .../testing/testOutputsMasterITS/variant1.txt | 172 + .../testOutputsMasterITS/variant10.txt | 80 + .../testOutputsMasterITS/variant100.txt | 309 ++ .../testOutputsMasterITS/variant101.txt | 316 ++ .../testOutputsMasterITS/variant102.txt | 302 ++ .../testOutputsMasterITS/variant103.txt | 309 ++ .../testOutputsMasterITS/variant104.txt | 289 ++ .../testOutputsMasterITS/variant105.txt | 309 ++ .../testOutputsMasterITS/variant106.txt | 309 ++ .../testOutputsMasterITS/variant107.txt | 312 ++ .../testOutputsMasterITS/variant108.txt | 183 + .../testOutputsMasterITS/variant109.txt | 183 + .../testOutputsMasterITS/variant11.txt | 80 + .../testOutputsMasterITS/variant110.txt | 183 + .../testOutputsMasterITS/variant111.txt | 182 + .../testOutputsMasterITS/variant112.txt | 182 + .../testOutputsMasterITS/variant113.txt | 183 + .../testOutputsMasterITS/variant114.txt | 177 + .../testOutputsMasterITS/variant115.txt | 177 + .../testOutputsMasterITS/variant116.txt | 177 + .../testOutputsMasterITS/variant117.txt | 23 + .../testOutputsMasterITS/variant118.txt | 177 + .../testOutputsMasterITS/variant119.txt | 177 + .../testOutputsMasterITS/variant12.txt | 704 ++++ .../testOutputsMasterITS/variant120.txt | 179 + .../testOutputsMasterITS/variant121.txt | 177 + .../testOutputsMasterITS/variant122.txt | 177 + .../testOutputsMasterITS/variant123.txt | 80 + .../testOutputsMasterITS/variant124.txt | 80 + .../testOutputsMasterITS/variant125.txt | 80 + .../testOutputsMasterITS/variant126.txt | 177 + .../testOutputsMasterITS/variant127.txt | 175 + .../testOutputsMasterITS/variant128.txt | 80 + .../testOutputsMasterITS/variant129.txt | 174 + .../testOutputsMasterITS/variant13.txt | 177 + .../testOutputsMasterITS/variant130.txt | 80 + .../testOutputsMasterITS/variant131.txt | 141 + .../testOutputsMasterITS/variant132.txt | 167 + .../testOutputsMasterITS/variant133.txt | 82 + .../testOutputsMasterITS/variant134.txt | 80 + .../testOutputsMasterITS/variant135.txt | 157 + .../testOutputsMasterITS/variant136.txt | 259 ++ .../testOutputsMasterITS/variant137.txt | 304 ++ .../testOutputsMasterITS/variant138.txt | 947 +++++ .../testOutputsMasterITS/variant139.txt | 80 + .../testOutputsMasterITS/variant14.txt | 177 + .../testOutputsMasterITS/variant140.txt | 177 + .../testOutputsMasterITS/variant141.txt | 175 + .../testOutputsMasterITS/variant142.txt | 80 + .../testOutputsMasterITS/variant143.txt | 80 + .../testOutputsMasterITS/variant144.txt | 171 + .../testOutputsMasterITS/variant145.txt | 80 + .../testOutputsMasterITS/variant146.txt | 80 + .../testOutputsMasterITS/variant147.txt | 704 ++++ .../testOutputsMasterITS/variant148.txt | 535 +++ .../testOutputsMasterITS/variant149.txt | 144 + .../testOutputsMasterITS/variant15.txt | 177 + .../testOutputsMasterITS/variant150.txt | 80 + .../testOutputsMasterITS/variant151.txt | 184 + .../testOutputsMasterITS/variant152.txt | 183 + .../testOutputsMasterITS/variant153.txt | 267 ++ .../testOutputsMasterITS/variant154.txt | 281 ++ .../testOutputsMasterITS/variant155.txt | 259 ++ .../testOutputsMasterITS/variant156.txt | 260 ++ .../testOutputsMasterITS/variant157.txt | 1357 +++++++ .../testOutputsMasterITS/variant158.txt | 171 + .../testOutputsMasterITS/variant159.txt | 172 + .../testOutputsMasterITS/variant16.txt | 156 + .../testOutputsMasterITS/variant160.txt | 179 + .../testOutputsMasterITS/variant161.txt | 177 + .../testOutputsMasterITS/variant162.txt | 547 +++ .../testOutputsMasterITS/variant163.txt | 402 +++ .../testOutputsMasterITS/variant164.txt | 179 + .../testOutputsMasterITS/variant165.txt | 600 ++++ .../testOutputsMasterITS/variant166.txt | 171 + .../testOutputsMasterITS/variant167.txt | 1405 ++++++++ .../testOutputsMasterITS/variant168.txt | 947 +++++ .../testOutputsMasterITS/variant169.txt | 174 + .../testOutputsMasterITS/variant17.txt | 80 + .../testOutputsMasterITS/variant170.txt | 183 + .../testOutputsMasterITS/variant171.txt | 443 +++ .../testOutputsMasterITS/variant172.txt | 182 + .../testOutputsMasterITS/variant173.txt | 181 + .../testOutputsMasterITS/variant174.txt | 438 +++ .../testOutputsMasterITS/variant175.txt | 183 + .../testOutputsMasterITS/variant176.txt | 182 + .../testOutputsMasterITS/variant177.txt | 182 + .../testOutputsMasterITS/variant178.txt | 182 + .../testOutputsMasterITS/variant179.txt | 174 + .../testOutputsMasterITS/variant18.txt | 82 + .../testOutputsMasterITS/variant180.txt | 506 +++ .../testOutputsMasterITS/variant181.txt | 510 +++ .../testOutputsMasterITS/variant182.txt | 510 +++ .../testOutputsMasterITS/variant183.txt | 515 +++ .../testOutputsMasterITS/variant184.txt | 439 +++ .../testOutputsMasterITS/variant185.txt | 438 +++ .../testOutputsMasterITS/variant186.txt | 442 +++ .../testOutputsMasterITS/variant187.txt | 437 +++ .../testOutputsMasterITS/variant188.txt | 180 + .../testOutputsMasterITS/variant189.txt | 121 + .../testOutputsMasterITS/variant19.txt | 80 + .../testOutputsMasterITS/variant190.txt | 23 + .../testOutputsMasterITS/variant191.txt | 23 + .../testOutputsMasterITS/variant192.txt | 121 + .../testOutputsMasterITS/variant193.txt | 82 + .../testOutputsMasterITS/variant194.txt | 23 + .../testOutputsMasterITS/variant195.txt | 23 + .../testOutputsMasterITS/variant196.txt | 23 + .../testOutputsMasterITS/variant197.txt | 80 + .../testOutputsMasterITS/variant198.txt | 606 ++++ .../testOutputsMasterITS/variant199.txt | 286 ++ .../testing/testOutputsMasterITS/variant2.txt | 174 + .../testOutputsMasterITS/variant20.txt | 82 + .../testOutputsMasterITS/variant200.txt | 543 +++ .../testOutputsMasterITS/variant201.txt | 176 + .../testOutputsMasterITS/variant202.txt | 176 + .../testOutputsMasterITS/variant203.txt | 171 + .../testOutputsMasterITS/variant204.txt | 511 +++ .../testOutputsMasterITS/variant205.txt | 303 ++ .../testOutputsMasterITS/variant206.txt | 1001 ++++++ .../testOutputsMasterITS/variant207.txt | 632 ++++ .../testOutputsMasterITS/variant208.txt | 374 ++ .../testOutputsMasterITS/variant209.txt | 172 + .../testOutputsMasterITS/variant21.txt | 180 + .../testOutputsMasterITS/variant210.txt | 528 +++ .../testOutputsMasterITS/variant211.txt | 215 ++ .../testOutputsMasterITS/variant212.txt | 734 ++++ .../testOutputsMasterITS/variant213.txt | 297 ++ .../testOutputsMasterITS/variant214.txt | 3113 +++++++++++++++++ .../testOutputsMasterITS/variant215.txt | 316 ++ .../testOutputsMasterITS/variant216.txt | 485 +++ .../testOutputsMasterITS/variant217.txt | 172 + .../testOutputsMasterITS/variant218.txt | 424 +++ .../testOutputsMasterITS/variant219.txt | 408 +++ .../testOutputsMasterITS/variant22.txt | 82 + .../testOutputsMasterITS/variant220.txt | 1185 +++++++ .../testOutputsMasterITS/variant221.txt | 418 +++ .../testOutputsMasterITS/variant222.txt | 177 + .../testOutputsMasterITS/variant223.txt | 515 +++ .../testOutputsMasterITS/variant224.txt | 540 +++ .../testOutputsMasterITS/variant225.txt | 462 +++ .../testOutputsMasterITS/variant226.txt | 418 +++ .../testOutputsMasterITS/variant227.txt | 418 +++ .../testOutputsMasterITS/variant228.txt | 781 +++++ .../testOutputsMasterITS/variant229.txt | 786 +++++ .../testOutputsMasterITS/variant23.txt | 80 + .../testOutputsMasterITS/variant230.txt | 171 + .../testOutputsMasterITS/variant231.txt | 511 +++ .../testOutputsMasterITS/variant232.txt | 286 ++ .../testOutputsMasterITS/variant233.txt | 1291 +++++++ .../testOutputsMasterITS/variant234.txt | 286 ++ .../testOutputsMasterITS/variant235.txt | 286 ++ .../testOutputsMasterITS/variant236.txt | 171 + .../testOutputsMasterITS/variant237.txt | 368 ++ .../testOutputsMasterITS/variant238.txt | 544 +++ .../testOutputsMasterITS/variant239.txt | 567 +++ .../testOutputsMasterITS/variant24.txt | 82 + .../testOutputsMasterITS/variant240.txt | 580 +++ .../testOutputsMasterITS/variant241.txt | 549 +++ .../testOutputsMasterITS/variant242.txt | 1106 ++++++ .../testOutputsMasterITS/variant243.txt | 549 +++ .../testOutputsMasterITS/variant244.txt | 1119 ++++++ .../testOutputsMasterITS/variant245.txt | 580 +++ .../testOutputsMasterITS/variant246.txt | 567 +++ .../testOutputsMasterITS/variant247.txt | 357 ++ .../testOutputsMasterITS/variant248.txt | 354 ++ .../testOutputsMasterITS/variant249.txt | 341 ++ .../testOutputsMasterITS/variant25.txt | 80 + .../testOutputsMasterITS/variant250.txt | 1870 ++++++++++ .../testOutputsMasterITS/variant251.txt | 1870 ++++++++++ .../testOutputsMasterITS/variant252.txt | 642 ++++ .../testOutputsMasterITS/variant253.txt | 777 ++++ .../testOutputsMasterITS/variant254.txt | 748 ++++ .../testOutputsMasterITS/variant255.txt | 483 +++ .../testOutputsMasterITS/variant256.txt | 573 +++ .../testOutputsMasterITS/variant257.txt | 180 + .../testOutputsMasterITS/variant258.txt | 177 + .../testOutputsMasterITS/variant259.txt | 177 + .../testOutputsMasterITS/variant26.txt | 80 + .../testOutputsMasterITS/variant260.txt | 172 + .../testOutputsMasterITS/variant261.txt | 171 + .../testOutputsMasterITS/variant262.txt | 914 +++++ .../testOutputsMasterITS/variant263.txt | 171 + .../testOutputsMasterITS/variant264.txt | 156 + .../testOutputsMasterITS/variant265.txt | 286 ++ .../testOutputsMasterITS/variant266.txt | 3076 ++++++++++++++++ .../testOutputsMasterITS/variant267.txt | 2023 +++++++++++ .../testOutputsMasterITS/variant268.txt | 1666 +++++++++ .../testOutputsMasterITS/variant269.txt | 401 +++ .../testOutputsMasterITS/variant27.txt | 80 + .../testOutputsMasterITS/variant270.txt | 401 +++ .../testOutputsMasterITS/variant271.txt | 2534 ++++++++++++++ .../testOutputsMasterITS/variant272.txt | 2495 +++++++++++++ .../testOutputsMasterITS/variant273.txt | 1148 ++++++ .../testOutputsMasterITS/variant274.txt | 176 + .../testOutputsMasterITS/variant275.txt | 286 ++ .../testOutputsMasterITS/variant276.txt | 404 +++ .../testOutputsMasterITS/variant277.txt | 377 ++ .../testOutputsMasterITS/variant278.txt | 1227 +++++++ .../testOutputsMasterITS/variant279.txt | 421 +++ .../testOutputsMasterITS/variant28.txt | 23 + .../testOutputsMasterITS/variant280.txt | 894 +++++ .../testOutputsMasterITS/variant281.txt | 1116 ++++++ .../testOutputsMasterITS/variant282.txt | 553 +++ .../testOutputsMasterITS/variant283.txt | 286 ++ .../testOutputsMasterITS/variant284.txt | 510 +++ .../testOutputsMasterITS/variant285.txt | 156 + .../testOutputsMasterITS/variant286.txt | 389 ++ .../testOutputsMasterITS/variant287.txt | 758 ++++ .../testOutputsMasterITS/variant288.txt | 758 ++++ .../testOutputsMasterITS/variant289.txt | 414 +++ .../testOutputsMasterITS/variant29.txt | 80 + .../testOutputsMasterITS/variant290.txt | 171 + .../testOutputsMasterITS/variant291.txt | 176 + .../testOutputsMasterITS/variant292.txt | 176 + .../testOutputsMasterITS/variant293.txt | 286 ++ .../testOutputsMasterITS/variant294.txt | 286 ++ .../testOutputsMasterITS/variant295.txt | 1944 ++++++++++ .../testOutputsMasterITS/variant296.txt | 908 +++++ .../testOutputsMasterITS/variant297.txt | 1063 ++++++ .../testOutputsMasterITS/variant298.txt | 292 ++ .../testOutputsMasterITS/variant299.txt | 304 ++ .../testing/testOutputsMasterITS/variant3.txt | 171 + .../testOutputsMasterITS/variant30.txt | 171 + .../testOutputsMasterITS/variant300.txt | 294 ++ .../testOutputsMasterITS/variant301.txt | 548 +++ .../testOutputsMasterITS/variant302.txt | 518 +++ .../testOutputsMasterITS/variant303.txt | 518 +++ .../testOutputsMasterITS/variant304.txt | 284 ++ .../testOutputsMasterITS/variant305.txt | 282 ++ .../testOutputsMasterITS/variant306.txt | 282 ++ .../testOutputsMasterITS/variant307.txt | 596 ++++ .../testOutputsMasterITS/variant308.txt | 539 +++ .../testOutputsMasterITS/variant309.txt | 603 ++++ .../testOutputsMasterITS/variant31.txt | 171 + .../testOutputsMasterITS/variant310.txt | 543 +++ .../testOutputsMasterITS/variant311.txt | 516 +++ .../testOutputsMasterITS/variant312.txt | 216 ++ .../testOutputsMasterITS/variant313.txt | 635 ++++ .../testOutputsMasterITS/variant314.txt | 179 + .../testOutputsMasterITS/variant315.txt | 180 + .../testOutputsMasterITS/variant316.txt | 182 + .../testOutputsMasterITS/variant317.txt | 293 ++ .../testOutputsMasterITS/variant318.txt | 292 ++ .../testOutputsMasterITS/variant319.txt | 287 ++ .../testOutputsMasterITS/variant32.txt | 172 + .../testOutputsMasterITS/variant320.txt | 555 +++ .../testOutputsMasterITS/variant321.txt | 513 +++ .../testOutputsMasterITS/variant322.txt | 176 + .../testOutputsMasterITS/variant323.txt | 174 + .../testOutputsMasterITS/variant324.txt | 174 + .../testOutputsMasterITS/variant325.txt | 174 + .../testOutputsMasterITS/variant326.txt | 171 + .../testOutputsMasterITS/variant327.txt | 175 + .../testOutputsMasterITS/variant328.txt | 175 + .../testOutputsMasterITS/variant329.txt | 171 + .../testOutputsMasterITS/variant33.txt | 177 + .../testOutputsMasterITS/variant330.txt | 175 + .../testOutputsMasterITS/variant331.txt | 533 +++ .../testOutputsMasterITS/variant332.txt | 215 ++ .../testOutputsMasterITS/variant333.txt | 142 + .../testOutputsMasterITS/variant34.txt | 259 ++ .../testOutputsMasterITS/variant35.txt | 172 + .../testOutputsMasterITS/variant36.txt | 402 +++ .../testOutputsMasterITS/variant37.txt | 402 +++ .../testOutputsMasterITS/variant38.txt | 82 + .../testOutputsMasterITS/variant39.txt | 80 + .../testing/testOutputsMasterITS/variant4.txt | 171 + .../testOutputsMasterITS/variant40.txt | 172 + .../testOutputsMasterITS/variant41.txt | 293 ++ .../testOutputsMasterITS/variant42.txt | 564 +++ .../testOutputsMasterITS/variant43.txt | 179 + .../testOutputsMasterITS/variant44.txt | 143 + .../testOutputsMasterITS/variant45.txt | 293 ++ .../testOutputsMasterITS/variant46.txt | 175 + .../testOutputsMasterITS/variant47.txt | 177 + .../testOutputsMasterITS/variant48.txt | 177 + .../testOutputsMasterITS/variant49.txt | 175 + .../testing/testOutputsMasterITS/variant5.txt | 287 ++ .../testOutputsMasterITS/variant50.txt | 177 + .../testOutputsMasterITS/variant51.txt | 176 + .../testOutputsMasterITS/variant52.txt | 170 + .../testOutputsMasterITS/variant53.txt | 170 + .../testOutputsMasterITS/variant54.txt | 177 + .../testOutputsMasterITS/variant55.txt | 177 + .../testOutputsMasterITS/variant56.txt | 177 + .../testOutputsMasterITS/variant57.txt | 177 + .../testOutputsMasterITS/variant58.txt | 175 + .../testOutputsMasterITS/variant59.txt | 177 + .../testing/testOutputsMasterITS/variant6.txt | 143 + .../testOutputsMasterITS/variant60.txt | 174 + .../testOutputsMasterITS/variant61.txt | 171 + .../testOutputsMasterITS/variant62.txt | 171 + .../testOutputsMasterITS/variant63.txt | 144 + .../testOutputsMasterITS/variant64.txt | 439 +++ .../testOutputsMasterITS/variant65.txt | 511 +++ .../testOutputsMasterITS/variant66.txt | 223 ++ .../testOutputsMasterITS/variant67.txt | 223 ++ .../testOutputsMasterITS/variant68.txt | 218 ++ .../testOutputsMasterITS/variant69.txt | 219 ++ .../testing/testOutputsMasterITS/variant7.txt | 1362 ++++++++ .../testOutputsMasterITS/variant70.txt | 224 ++ .../testOutputsMasterITS/variant71.txt | 225 ++ .../testOutputsMasterITS/variant72.txt | 223 ++ .../testOutputsMasterITS/variant73.txt | 223 ++ .../testOutputsMasterITS/variant74.txt | 228 ++ .../testOutputsMasterITS/variant75.txt | 222 ++ .../testOutputsMasterITS/variant76.txt | 222 ++ .../testOutputsMasterITS/variant77.txt | 228 ++ .../testOutputsMasterITS/variant78.txt | 226 ++ .../testOutputsMasterITS/variant79.txt | 223 ++ .../testing/testOutputsMasterITS/variant8.txt | 177 + .../testOutputsMasterITS/variant80.txt | 882 +++++ .../testOutputsMasterITS/variant81.txt | 628 ++++ .../testOutputsMasterITS/variant82.txt | 277 ++ .../testOutputsMasterITS/variant83.txt | 263 ++ .../testOutputsMasterITS/variant84.txt | 272 ++ .../testOutputsMasterITS/variant85.txt | 268 ++ .../testOutputsMasterITS/variant86.txt | 272 ++ .../testOutputsMasterITS/variant87.txt | 272 ++ .../testOutputsMasterITS/variant88.txt | 268 ++ .../testOutputsMasterITS/variant89.txt | 182 + .../testing/testOutputsMasterITS/variant9.txt | 82 + .../testOutputsMasterITS/variant90.txt | 261 ++ .../testOutputsMasterITS/variant91.txt | 265 ++ .../testOutputsMasterITS/variant92.txt | 261 ++ .../testOutputsMasterITS/variant93.txt | 265 ++ .../testOutputsMasterITS/variant94.txt | 259 ++ .../testOutputsMasterITS/variant95.txt | 259 ++ .../testOutputsMasterITS/variant96.txt | 262 ++ .../testOutputsMasterITS/variant97.txt | 261 ++ .../testOutputsMasterITS/variant98.txt | 309 ++ .../testOutputsMasterITS/variant99.txt | 309 ++ VariantValidator/testing/vvTestCompare.py | 2 +- 335 files changed, 124634 insertions(+), 1 deletion(-) create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant0.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant1.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant10.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant100.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant101.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant102.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant103.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant104.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant105.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant106.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant107.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant108.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant109.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant11.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant110.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant111.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant112.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant113.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant114.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant115.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant116.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant117.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant118.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant119.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant12.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant120.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant121.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant122.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant123.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant124.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant125.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant126.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant127.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant128.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant129.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant13.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant130.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant131.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant132.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant133.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant134.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant135.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant136.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant137.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant138.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant139.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant14.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant140.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant141.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant142.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant143.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant144.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant145.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant146.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant147.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant148.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant149.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant15.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant150.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant151.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant152.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant153.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant154.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant155.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant156.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant157.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant158.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant159.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant16.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant160.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant161.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant162.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant163.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant164.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant165.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant166.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant167.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant168.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant169.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant17.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant170.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant171.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant172.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant173.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant174.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant175.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant176.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant177.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant178.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant179.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant18.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant180.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant181.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant182.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant183.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant184.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant185.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant186.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant187.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant188.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant189.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant19.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant190.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant191.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant192.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant193.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant194.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant195.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant196.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant197.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant198.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant199.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant2.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant20.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant200.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant201.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant202.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant203.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant204.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant205.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant206.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant207.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant208.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant209.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant21.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant210.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant211.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant212.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant213.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant214.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant215.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant216.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant217.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant218.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant219.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant22.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant220.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant221.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant222.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant223.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant224.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant225.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant226.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant227.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant228.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant229.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant23.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant230.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant231.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant232.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant233.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant234.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant235.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant236.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant237.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant238.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant239.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant24.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant240.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant241.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant242.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant243.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant244.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant245.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant246.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant247.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant248.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant249.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant25.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant250.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant251.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant252.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant253.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant254.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant255.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant256.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant257.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant258.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant259.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant26.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant260.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant261.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant262.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant263.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant264.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant265.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant266.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant267.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant268.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant269.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant27.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant270.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant271.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant272.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant273.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant274.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant275.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant276.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant277.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant278.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant279.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant28.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant280.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant281.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant282.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant283.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant284.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant285.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant286.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant287.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant288.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant289.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant29.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant290.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant291.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant292.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant293.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant294.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant295.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant296.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant297.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant298.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant299.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant3.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant30.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant300.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant301.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant302.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant303.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant304.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant305.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant306.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant307.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant308.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant309.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant31.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant310.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant311.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant312.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant313.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant314.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant315.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant316.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant317.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant318.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant319.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant32.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant320.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant321.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant322.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant323.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant324.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant325.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant326.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant327.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant328.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant329.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant33.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant330.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant331.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant332.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant333.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant34.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant35.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant36.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant37.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant38.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant39.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant4.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant40.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant41.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant42.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant43.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant44.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant45.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant46.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant47.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant48.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant49.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant5.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant50.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant51.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant52.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant53.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant54.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant55.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant56.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant57.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant58.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant59.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant6.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant60.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant61.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant62.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant63.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant64.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant65.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant66.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant67.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant68.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant69.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant7.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant70.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant71.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant72.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant73.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant74.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant75.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant76.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant77.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant78.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant79.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant8.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant80.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant81.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant82.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant83.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant84.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant85.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant86.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant87.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant88.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant89.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant9.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant90.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant91.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant92.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant93.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant94.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant95.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant96.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant97.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant98.txt create mode 100644 VariantValidator/testing/testOutputsMasterITS/variant99.txt diff --git a/VariantValidator/testing/testOutputsMasterITS/variant0.txt b/VariantValidator/testing/testOutputsMasterITS/variant0.txt new file mode 100644 index 00000000..4d5d0a22 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant0.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.(Gly197Cys)' +p20 +sS'slr' +p21 +S'NP_000079.2:p.(G197C)' +p22 +ssS'submitted_variant' +p23 +S'NM_000088.3:c.589G>T' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000088.3:c.589G>T' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000017.10:g.48275363C>A' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr17' +p39 +sS'ref' +p40 +VC +p41 +sS'pos' +p42 +S'48275363' +p43 +sS'alt' +p44 +VA +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000017.11:g.50198002C>A' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'50198002' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000017.10:g.48275363C>A' +p53 +sg36 +(dp54 +g38 +S'17' +p55 +sg40 +g41 +sg42 +S'48275363' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000017.11:g.50198002C>A' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'50198002' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p67 +sssS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant1.txt b/VariantValidator/testing/testOutputsMasterITS/variant1.txt new file mode 100644 index 00000000..c55d48e9 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant1.txt @@ -0,0 +1,172 @@ +(dp0 +S'NM_015120.4:c.35T>C' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA +p12 +sS'gene_symbol' +p13 +S'ALMS1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_055935.4:p.(Leu12Pro)' +p18 +sS'slr' +p19 +S'NP_055935.4:p.(L12P)' +p20 +ssS'submitted_variant' +p21 +S'NM_015120.4:c.35T>C' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_015120.4:c.35T>C' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000002.11:g.73613031delinsCGGA' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr2' +p37 +sS'ref' +p38 +S'T' +p39 +sS'pos' +p40 +S'73613031' +p41 +sS'alt' +p42 +S'CGGA' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000002.12:g.73385903delinsCGGA' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'73385903' +p48 +sg42 +S'CGGA' +p49 +sssS'grch37' +p50 +(dp51 +g32 +S'NC_000002.11:g.73613031delinsCGGA' +p52 +sg34 +(dp53 +g36 +S'2' +p54 +sg38 +g39 +sg40 +S'73613031' +p55 +sg42 +g43 +sssS'grch38' +p56 +(dp57 +g32 +S'NC_000002.12:g.73385903delinsCGGA' +p58 +sg34 +(dp59 +g36 +g54 +sg38 +g39 +sg40 +S'73385903' +p60 +sg42 +g49 +ssssS'reference_sequence_records' +p61 +(dp62 +S'protein' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' +p64 +sS'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' +p66 +sssS'flag' +p67 +S'gene_variant' +p68 +sS'metadata' +p69 +(dp70 +S'variantvalidator_hgvs_version' +p71 +S'1.1.3' +p72 +sS'uta_schema' +p73 +S'uta_20180821' +p74 +sS'seqrepo_db' +p75 +S'2018-08-21' +p76 +sS'variantvalidator_version' +p77 +S'v0.2' +p78 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant10.txt b/VariantValidator/testing/testOutputsMasterITS/variant10.txt new file mode 100644 index 00000000..7c89309a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant10.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'COL5A1 is not part of genome build GRCh37' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'COL5A1:c.5071A>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant100.txt b/VariantValidator/testing/testOutputsMasterITS/variant100.txt new file mode 100644 index 00000000..8d42226f --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant100.txt @@ -0,0 +1,309 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_007121.5:c.515_516insT' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' +p19 +aS'NM_007121.5:c.515_517 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p20 +aS'Caution should be used when reporting the displayed variant descriptions' +p21 +aS'If you are unsure, please contact admin' +p22 +aS'RefSeqGene record not available' +p23 +asS'refseqgene_context_intronic_sequence' +p24 +g16 +sS'alt_genomic_loci' +p25 +(lp26 +sS'transcript_description' +p27 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA +p28 +sS'gene_symbol' +p29 +S'NR1H2' +p30 +sS'hgvs_predicted_protein_consequence' +p31 +(dp32 +S'tlr' +p33 +S'NP_009052.3:p.(Lys172AsnfsTer35)' +p34 +sS'slr' +p35 +S'NP_009052.3:p.(K172Nfs*35)' +p36 +ssS'submitted_variant' +p37 +S'NC_000019.10:g.50378564_50378565insTACA' +p38 +sS'genome_context_intronic_sequence' +p39 +g16 +sS'hgvs_lrg_variant' +p40 +g16 +sS'hgvs_transcript_variant' +p41 +S'NM_007121.5:c.515_516insT' +p42 +sS'hgvs_refseqgene_variant' +p43 +g16 +sS'primary_assembly_loci' +p44 +(dp45 +S'grch38' +p46 +(dp47 +S'hgvs_genomic_description' +p48 +S'NC_000019.10:g.50378564_50378565insTACA' +p49 +sS'vcf' +p50 +(dp51 +S'chr' +p52 +S'19' +p53 +sS'ref' +p54 +S'A' +p55 +sS'pos' +p56 +S'50378563' +p57 +sS'alt' +p58 +VAATAC +p59 +sssS'grch37' +p60 +(dp61 +g48 +S'NC_000019.9:g.50881821_50881822insTACA' +p62 +sg50 +(dp63 +g52 +g53 +sg54 +g55 +sg56 +S'50881820' +p64 +sg58 +VAATAC +p65 +sssS'hg38' +p66 +(dp67 +g48 +S'NC_000019.10:g.50378564_50378565insTACA' +p68 +sg50 +(dp69 +g52 +S'chr19' +p70 +sg54 +g55 +sg56 +S'50378563' +p71 +sg58 +VAATAC +p72 +sssS'hg19' +p73 +(dp74 +g48 +S'NC_000019.9:g.50881821_50881822insTACA' +p75 +sg50 +(dp76 +g52 +g70 +sg54 +g55 +sg56 +S'50881820' +p77 +sg58 +VAATAC +p78 +ssssS'reference_sequence_records' +p79 +(dp80 +S'protein' +p81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' +p82 +sS'transcript' +p83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' +p84 +sssS'NM_001256647.1:c.224_225insT' +p85 +(dp86 +g15 +g16 +sg17 +(lp87 +S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' +p88 +aS'NM_001256647.1:c.224_226 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p89 +aS'Caution should be used when reporting the displayed variant descriptions' +p90 +aS'If you are unsure, please contact admin' +p91 +aS'RefSeqGene record not available' +p92 +asg24 +g16 +sg25 +(lp93 +sg27 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA +p94 +sg29 +S'NR1H2' +p95 +sg31 +(dp96 +g33 +S'NP_001243576.1:p.(Lys75AsnfsTer35)' +p97 +sg35 +S'NP_001243576.1:p.(K75Nfs*35)' +p98 +ssg37 +g38 +sg39 +g16 +sg40 +g16 +sg41 +S'NM_001256647.1:c.224_225insT' +p99 +sg43 +g16 +sg44 +(dp100 +S'grch38' +p101 +(dp102 +g48 +S'NC_000019.10:g.50378564_50378565insTACA' +p103 +sg50 +(dp104 +g52 +g53 +sg54 +g55 +sg56 +S'50378563' +p105 +sg58 +VAATAC +p106 +sssS'grch37' +p107 +(dp108 +g48 +S'NC_000019.9:g.50881821_50881822insTACA' +p109 +sg50 +(dp110 +g52 +g53 +sg54 +g55 +sg56 +S'50881820' +p111 +sg58 +VAATAC +p112 +sssg66 +(dp113 +g48 +S'NC_000019.10:g.50378564_50378565insTACA' +p114 +sg50 +(dp115 +g52 +g70 +sg54 +g55 +sg56 +S'50378563' +p116 +sg58 +VAATAC +p117 +sssS'hg19' +p118 +(dp119 +g48 +S'NC_000019.9:g.50881821_50881822insTACA' +p120 +sg50 +(dp121 +g52 +g70 +sg54 +g55 +sg56 +S'50881820' +p122 +sg58 +VAATAC +p123 +ssssg79 +(dp124 +g81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' +p125 +sg83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' +p126 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant101.txt b/VariantValidator/testing/testOutputsMasterITS/variant101.txt new file mode 100644 index 00000000..58964b7c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant101.txt @@ -0,0 +1,316 @@ +(dp0 +S'NM_007121.5:c.514_520=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' +p7 +aS'NM_007121.5:c.514_520 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA +p16 +sS'gene_symbol' +p17 +S'NR1H2' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_009052.3:p.(Lys172=)' +p22 +sS'slr' +p23 +S'NP_009052.3:p.(K172=)' +p24 +ssS'submitted_variant' +p25 +S'NC_000019.10:g.50378565_50378567dup' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_007121.5:c.514_520=' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'grch38' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000019.10:g.50378565_50378567dup' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'19' +p41 +sS'ref' +p42 +S'AAC' +p43 +sS'pos' +p44 +S'50378564' +p45 +sS'alt' +p46 +VAACAAC +p47 +sssS'grch37' +p48 +(dp49 +g36 +S'NC_000019.9:g.50881822_50881824dup' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'AAC' +p52 +sg44 +S'50881821' +p53 +sg46 +VAACAAC +p54 +sssS'hg38' +p55 +(dp56 +g36 +S'NC_000019.10:g.50378565_50378567dup' +p57 +sg38 +(dp58 +g40 +S'chr19' +p59 +sg42 +S'AAC' +p60 +sg44 +S'50378564' +p61 +sg46 +VAACAAC +p62 +sssS'hg19' +p63 +(dp64 +g36 +S'NC_000019.9:g.50881822_50881824dup' +p65 +sg38 +(dp66 +g40 +g59 +sg42 +S'AAC' +p67 +sg44 +S'50881821' +p68 +sg46 +VAACAAC +p69 +ssssS'reference_sequence_records' +p70 +(dp71 +S'protein' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' +p73 +sS'transcript' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' +p75 +sssS'NM_001256647.1:c.223_229=' +p76 +(dp77 +g3 +g4 +sg5 +(lp78 +S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' +p79 +aS'NM_001256647.1:c.223_229 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p80 +aS'Caution should be used when reporting the displayed variant descriptions' +p81 +aS'If you are unsure, please contact admin' +p82 +aS'RefSeqGene record not available' +p83 +asg12 +g4 +sg13 +(lp84 +sg15 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA +p85 +sg17 +S'NR1H2' +p86 +sg19 +(dp87 +g21 +S'NP_001243576.1:p.(Lys75=)' +p88 +sg23 +S'NP_001243576.1:p.(K75=)' +p89 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001256647.1:c.223_229=' +p90 +sg31 +g4 +sg32 +(dp91 +S'grch38' +p92 +(dp93 +g36 +S'NC_000019.10:g.50378565_50378567dup' +p94 +sg38 +(dp95 +g40 +g41 +sg42 +S'AAC' +p96 +sg44 +S'50378564' +p97 +sg46 +VAACAAC +p98 +sssS'grch37' +p99 +(dp100 +g36 +S'NC_000019.9:g.50881822_50881824dup' +p101 +sg38 +(dp102 +g40 +g41 +sg42 +S'AAC' +p103 +sg44 +S'50881821' +p104 +sg46 +VAACAAC +p105 +sssg55 +(dp106 +g36 +S'NC_000019.10:g.50378565_50378567dup' +p107 +sg38 +(dp108 +g40 +g59 +sg42 +S'AAC' +p109 +sg44 +S'50378564' +p110 +sg46 +VAACAAC +p111 +sssS'hg19' +p112 +(dp113 +g36 +S'NC_000019.9:g.50881822_50881824dup' +p114 +sg38 +(dp115 +g40 +g59 +sg42 +S'AAC' +p116 +sg44 +S'50881821' +p117 +sg46 +VAACAAC +p118 +ssssg70 +(dp119 +g72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' +p120 +sg74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' +p121 +sssS'flag' +p122 +S'gene_variant' +p123 +sS'metadata' +p124 +(dp125 +S'variantvalidator_hgvs_version' +p126 +S'1.1.3' +p127 +sS'uta_schema' +p128 +S'uta_20180821' +p129 +sS'seqrepo_db' +p130 +S'2018-08-21' +p131 +sS'variantvalidator_version' +p132 +S'v0.2' +p133 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant102.txt b/VariantValidator/testing/testOutputsMasterITS/variant102.txt new file mode 100644 index 00000000..788c4620 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant102.txt @@ -0,0 +1,302 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_007121.5:c.519_521del' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' +p19 +aS'NM_007121.5:c.515_517 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p20 +aS'Caution should be used when reporting the displayed variant descriptions' +p21 +aS'If you are unsure, please contact admin' +p22 +aS'RefSeqGene record not available' +p23 +asS'refseqgene_context_intronic_sequence' +p24 +g16 +sS'alt_genomic_loci' +p25 +(lp26 +sS'transcript_description' +p27 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA +p28 +sS'gene_symbol' +p29 +S'NR1H2' +p30 +sS'hgvs_predicted_protein_consequence' +p31 +(dp32 +S'tlr' +p33 +S'NP_009052.3:p.(Gln176del)' +p34 +sS'slr' +p35 +S'NP_009052.3:p.(Q176del)' +p36 +ssS'submitted_variant' +p37 +S'NC_000019.10:g.50378563_50378564=' +p38 +sS'genome_context_intronic_sequence' +p39 +g16 +sS'hgvs_lrg_variant' +p40 +g16 +sS'hgvs_transcript_variant' +p41 +S'NM_007121.5:c.519_521del' +p42 +sS'hgvs_refseqgene_variant' +p43 +g16 +sS'primary_assembly_loci' +p44 +(dp45 +S'grch38' +p46 +(dp47 +S'hgvs_genomic_description' +p48 +S'NC_000019.10:g.50378563_50378564=' +p49 +sS'vcf' +p50 +(dp51 +S'chr' +p52 +S'19' +p53 +sS'ref' +p54 +S'AA' +p55 +sS'pos' +p56 +S'50378563' +p57 +sS'alt' +p58 +g55 +sssS'grch37' +p59 +(dp60 +g48 +S'NC_000019.9:g.50881820_50881821=' +p61 +sg50 +(dp62 +g52 +g53 +sg54 +S'AA' +p63 +sg56 +S'50881820' +p64 +sg58 +g63 +sssS'hg38' +p65 +(dp66 +g48 +S'NC_000019.10:g.50378563_50378564=' +p67 +sg50 +(dp68 +g52 +S'chr19' +p69 +sg54 +g55 +sg56 +S'50378563' +p70 +sg58 +g55 +sssS'hg19' +p71 +(dp72 +g48 +S'NC_000019.9:g.50881820_50881821=' +p73 +sg50 +(dp74 +g52 +g69 +sg54 +g63 +sg56 +S'50881820' +p75 +sg58 +g63 +ssssS'reference_sequence_records' +p76 +(dp77 +S'protein' +p78 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' +p79 +sS'transcript' +p80 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' +p81 +sssS'NM_001256647.1:c.228_230del' +p82 +(dp83 +g15 +g16 +sg17 +(lp84 +S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' +p85 +aS'NM_001256647.1:c.224_226 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p86 +aS'Caution should be used when reporting the displayed variant descriptions' +p87 +aS'If you are unsure, please contact admin' +p88 +aS'RefSeqGene record not available' +p89 +asg24 +g16 +sg25 +(lp90 +sg27 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA +p91 +sg29 +S'NR1H2' +p92 +sg31 +(dp93 +g33 +S'NP_001243576.1:p.(Gln79del)' +p94 +sg35 +S'NP_001243576.1:p.(Q79del)' +p95 +ssg37 +g38 +sg39 +g16 +sg40 +g16 +sg41 +S'NM_001256647.1:c.228_230del' +p96 +sg43 +g16 +sg44 +(dp97 +S'grch38' +p98 +(dp99 +g48 +S'NC_000019.10:g.50378563_50378564=' +p100 +sg50 +(dp101 +g52 +g53 +sg54 +g55 +sg56 +S'50378563' +p102 +sg58 +g55 +sssS'grch37' +p103 +(dp104 +g48 +S'NC_000019.9:g.50881820_50881821=' +p105 +sg50 +(dp106 +g52 +g53 +sg54 +g63 +sg56 +S'50881820' +p107 +sg58 +g63 +sssg65 +(dp108 +g48 +S'NC_000019.10:g.50378563_50378564=' +p109 +sg50 +(dp110 +g52 +g69 +sg54 +g55 +sg56 +S'50378563' +p111 +sg58 +g55 +sssS'hg19' +p112 +(dp113 +g48 +S'NC_000019.9:g.50881820_50881821=' +p114 +sg50 +(dp115 +g52 +g69 +sg54 +g63 +sg56 +S'50881820' +p116 +sg58 +g63 +ssssg76 +(dp117 +g78 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' +p118 +sg80 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' +p119 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant103.txt b/VariantValidator/testing/testOutputsMasterITS/variant103.txt new file mode 100644 index 00000000..69998578 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant103.txt @@ -0,0 +1,309 @@ +(dp0 +S'NM_001256647.1:c.224_226delinsTCGG' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' +p7 +aS'NM_001256647.1:c.224_226 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA +p16 +sS'gene_symbol' +p17 +S'NR1H2' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_001243576.1:p.(Lys75IlefsTer35)' +p22 +sS'slr' +p23 +S'NP_001243576.1:p.(K75Ifs*35)' +p24 +ssS'submitted_variant' +p25 +S'NC_000019.10:g.50378563_50378564insTCGG' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_001256647.1:c.224_226delinsTCGG' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'grch38' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000019.10:g.50378563_50378564insTCGG' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'19' +p41 +sS'ref' +p42 +S'A' +p43 +sS'pos' +p44 +S'50378563' +p45 +sS'alt' +p46 +VATCGG +p47 +sssS'grch37' +p48 +(dp49 +g36 +S'NC_000019.9:g.50881820_50881821insTCGG' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +g43 +sg44 +S'50881820' +p52 +sg46 +VATCGG +p53 +sssS'hg38' +p54 +(dp55 +g36 +S'NC_000019.10:g.50378563_50378564insTCGG' +p56 +sg38 +(dp57 +g40 +S'chr19' +p58 +sg42 +g43 +sg44 +S'50378563' +p59 +sg46 +VATCGG +p60 +sssS'hg19' +p61 +(dp62 +g36 +S'NC_000019.9:g.50881820_50881821insTCGG' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +g43 +sg44 +S'50881820' +p65 +sg46 +VATCGG +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' +p72 +sssS'flag' +p73 +S'gene_variant' +p74 +sS'NM_007121.5:c.515_517delinsTCGG' +p75 +(dp76 +g3 +g4 +sg5 +(lp77 +S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' +p78 +aS'NM_007121.5:c.515_517 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p79 +aS'Caution should be used when reporting the displayed variant descriptions' +p80 +aS'If you are unsure, please contact admin' +p81 +aS'RefSeqGene record not available' +p82 +asg12 +g4 +sg13 +(lp83 +sg15 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA +p84 +sg17 +S'NR1H2' +p85 +sg19 +(dp86 +g21 +S'NP_009052.3:p.(Lys172IlefsTer35)' +p87 +sg23 +S'NP_009052.3:p.(K172Ifs*35)' +p88 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_007121.5:c.515_517delinsTCGG' +p89 +sg31 +g4 +sg32 +(dp90 +S'grch38' +p91 +(dp92 +g36 +S'NC_000019.10:g.50378563_50378564insTCGG' +p93 +sg38 +(dp94 +g40 +g41 +sg42 +g43 +sg44 +S'50378563' +p95 +sg46 +VATCGG +p96 +sssS'grch37' +p97 +(dp98 +g36 +S'NC_000019.9:g.50881820_50881821insTCGG' +p99 +sg38 +(dp100 +g40 +g41 +sg42 +g43 +sg44 +S'50881820' +p101 +sg46 +VATCGG +p102 +sssg54 +(dp103 +g36 +S'NC_000019.10:g.50378563_50378564insTCGG' +p104 +sg38 +(dp105 +g40 +g58 +sg42 +g43 +sg44 +S'50378563' +p106 +sg46 +VATCGG +p107 +sssS'hg19' +p108 +(dp109 +g36 +S'NC_000019.9:g.50881820_50881821insTCGG' +p110 +sg38 +(dp111 +g40 +g58 +sg42 +g43 +sg44 +S'50881820' +p112 +sg46 +VATCGG +p113 +ssssg67 +(dp114 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' +p115 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' +p116 +sssS'metadata' +p117 +(dp118 +S'variantvalidator_hgvs_version' +p119 +S'1.1.3' +p120 +sS'uta_schema' +p121 +S'uta_20180821' +p122 +sS'seqrepo_db' +p123 +S'2018-08-21' +p124 +sS'variantvalidator_version' +p125 +S'v0.2' +p126 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant104.txt b/VariantValidator/testing/testOutputsMasterITS/variant104.txt new file mode 100644 index 00000000..e23d6793 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant104.txt @@ -0,0 +1,289 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_007121.5:c.514_515inv' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA +p24 +sS'gene_symbol' +p25 +S'NR1H2' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_009052.3:p.(Lys172Leu)' +p30 +sS'slr' +p31 +S'NP_009052.3:p.(K172L)' +p32 +ssS'submitted_variant' +p33 +S'NC_000019.10:g.50378563delinsTTAC' +p34 +sS'genome_context_intronic_sequence' +p35 +g16 +sS'hgvs_lrg_variant' +p36 +g16 +sS'hgvs_transcript_variant' +p37 +S'NM_007121.5:c.514_515inv' +p38 +sS'hgvs_refseqgene_variant' +p39 +g16 +sS'primary_assembly_loci' +p40 +(dp41 +S'grch38' +p42 +(dp43 +S'hgvs_genomic_description' +p44 +S'NC_000019.10:g.50378563delinsTTAC' +p45 +sS'vcf' +p46 +(dp47 +S'chr' +p48 +S'19' +p49 +sS'ref' +p50 +S'A' +p51 +sS'pos' +p52 +S'50378563' +p53 +sS'alt' +p54 +S'TTAC' +p55 +sssS'grch37' +p56 +(dp57 +g44 +S'NC_000019.9:g.50881820delinsTTAC' +p58 +sg46 +(dp59 +g48 +g49 +sg50 +g51 +sg52 +S'50881820' +p60 +sg54 +S'TTAC' +p61 +sssS'hg38' +p62 +(dp63 +g44 +S'NC_000019.10:g.50378563delinsTTAC' +p64 +sg46 +(dp65 +g48 +S'chr19' +p66 +sg50 +g51 +sg52 +S'50378563' +p67 +sg54 +g55 +sssS'hg19' +p68 +(dp69 +g44 +S'NC_000019.9:g.50881820delinsTTAC' +p70 +sg46 +(dp71 +g48 +g66 +sg50 +g51 +sg52 +S'50881820' +p72 +sg54 +g61 +ssssS'reference_sequence_records' +p73 +(dp74 +S'protein' +p75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' +p76 +sS'transcript' +p77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' +p78 +sssS'NM_001256647.1:c.223_224inv' +p79 +(dp80 +g15 +g16 +sg17 +(lp81 +S'RefSeqGene record not available' +p82 +asg20 +g16 +sg21 +(lp83 +sg23 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA +p84 +sg25 +S'NR1H2' +p85 +sg27 +(dp86 +g29 +S'NP_001243576.1:p.(Lys75Leu)' +p87 +sg31 +S'NP_001243576.1:p.(K75L)' +p88 +ssg33 +g34 +sg35 +g16 +sg36 +g16 +sg37 +S'NM_001256647.1:c.223_224inv' +p89 +sg39 +g16 +sg40 +(dp90 +S'grch38' +p91 +(dp92 +g44 +S'NC_000019.10:g.50378563delinsTTAC' +p93 +sg46 +(dp94 +g48 +g49 +sg50 +g51 +sg52 +S'50378563' +p95 +sg54 +S'TTAC' +p96 +sssS'grch37' +p97 +(dp98 +g44 +S'NC_000019.9:g.50881820delinsTTAC' +p99 +sg46 +(dp100 +g48 +g49 +sg50 +g51 +sg52 +S'50881820' +p101 +sg54 +S'TTAC' +p102 +sssg62 +(dp103 +g44 +S'NC_000019.10:g.50378563delinsTTAC' +p104 +sg46 +(dp105 +g48 +g66 +sg50 +g51 +sg52 +S'50378563' +p106 +sg54 +g96 +sssS'hg19' +p107 +(dp108 +g44 +S'NC_000019.9:g.50881820delinsTTAC' +p109 +sg46 +(dp110 +g48 +g66 +sg50 +g51 +sg52 +S'50881820' +p111 +sg54 +g102 +ssssg73 +(dp112 +g75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' +p113 +sg77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' +p114 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant105.txt b/VariantValidator/testing/testOutputsMasterITS/variant105.txt new file mode 100644 index 00000000..4fc3fb9a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant105.txt @@ -0,0 +1,309 @@ +(dp0 +S'NM_007121.5:c.514_515insT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' +p7 +aS'NM_007121.5:c.514_515 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA +p16 +sS'gene_symbol' +p17 +S'NR1H2' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_009052.3:p.(Lys172IlefsTer35)' +p22 +sS'slr' +p23 +S'NP_009052.3:p.(K172Ifs*35)' +p24 +ssS'submitted_variant' +p25 +S'NC_000019.10:g.50378563_50378564insTAAC' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_007121.5:c.514_515insT' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'grch38' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000019.10:g.50378563_50378564insTAAC' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'19' +p41 +sS'ref' +p42 +S'A' +p43 +sS'pos' +p44 +S'50378563' +p45 +sS'alt' +p46 +S'ATAAC' +p47 +sssS'grch37' +p48 +(dp49 +g36 +S'NC_000019.9:g.50881820_50881821insTAAC' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +g43 +sg44 +S'50881820' +p52 +sg46 +S'ATAAC' +p53 +sssS'hg38' +p54 +(dp55 +g36 +S'NC_000019.10:g.50378563_50378564insTAAC' +p56 +sg38 +(dp57 +g40 +S'chr19' +p58 +sg42 +g43 +sg44 +S'50378563' +p59 +sg46 +S'ATAAC' +p60 +sssS'hg19' +p61 +(dp62 +g36 +S'NC_000019.9:g.50881820_50881821insTAAC' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +g43 +sg44 +S'50881820' +p65 +sg46 +S'ATAAC' +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' +p72 +sssS'NM_001256647.1:c.223_224insT' +p73 +(dp74 +g3 +g4 +sg5 +(lp75 +S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' +p76 +aS'NM_001256647.1:c.223_224 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p77 +aS'Caution should be used when reporting the displayed variant descriptions' +p78 +aS'If you are unsure, please contact admin' +p79 +aS'RefSeqGene record not available' +p80 +asg12 +g4 +sg13 +(lp81 +sg15 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA +p82 +sg17 +S'NR1H2' +p83 +sg19 +(dp84 +g21 +S'NP_001243576.1:p.(Lys75IlefsTer35)' +p85 +sg23 +S'NP_001243576.1:p.(K75Ifs*35)' +p86 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001256647.1:c.223_224insT' +p87 +sg31 +g4 +sg32 +(dp88 +S'grch38' +p89 +(dp90 +g36 +S'NC_000019.10:g.50378563_50378564insTAAC' +p91 +sg38 +(dp92 +g40 +g41 +sg42 +g43 +sg44 +S'50378563' +p93 +sg46 +S'ATAAC' +p94 +sssS'grch37' +p95 +(dp96 +g36 +S'NC_000019.9:g.50881820_50881821insTAAC' +p97 +sg38 +(dp98 +g40 +g41 +sg42 +g43 +sg44 +S'50881820' +p99 +sg46 +S'ATAAC' +p100 +sssg54 +(dp101 +g36 +S'NC_000019.10:g.50378563_50378564insTAAC' +p102 +sg38 +(dp103 +g40 +g58 +sg42 +g43 +sg44 +S'50378563' +p104 +sg46 +S'ATAAC' +p105 +sssS'hg19' +p106 +(dp107 +g36 +S'NC_000019.9:g.50881820_50881821insTAAC' +p108 +sg38 +(dp109 +g40 +g58 +sg42 +g43 +sg44 +S'50881820' +p110 +sg46 +S'ATAAC' +p111 +ssssg67 +(dp112 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' +p113 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' +p114 +sssS'flag' +p115 +S'gene_variant' +p116 +sS'metadata' +p117 +(dp118 +S'variantvalidator_hgvs_version' +p119 +S'1.1.3' +p120 +sS'uta_schema' +p121 +S'uta_20180821' +p122 +sS'seqrepo_db' +p123 +S'2018-08-21' +p124 +sS'variantvalidator_version' +p125 +S'v0.2' +p126 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant106.txt b/VariantValidator/testing/testOutputsMasterITS/variant106.txt new file mode 100644 index 00000000..829c9c64 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant106.txt @@ -0,0 +1,309 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_001256647.1:c.222_228del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' +p9 +aS'NM_001256647.1:c.222_228 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA +p18 +sS'gene_symbol' +p19 +S'NR1H2' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_001243576.1:p.(Lys75SerfsTer47)' +p24 +sS'slr' +p25 +S'NP_001243576.1:p.(K75Sfs*47)' +p26 +ssS'submitted_variant' +p27 +S'NC_000019.10:g.50378562_50378565del' +p28 +sS'genome_context_intronic_sequence' +p29 +g6 +sS'hgvs_lrg_variant' +p30 +g6 +sS'hgvs_transcript_variant' +p31 +S'NM_001256647.1:c.222_228del' +p32 +sS'hgvs_refseqgene_variant' +p33 +g6 +sS'primary_assembly_loci' +p34 +(dp35 +S'grch38' +p36 +(dp37 +S'hgvs_genomic_description' +p38 +S'NC_000019.10:g.50378562_50378565del' +p39 +sS'vcf' +p40 +(dp41 +S'chr' +p42 +S'19' +p43 +sS'ref' +p44 +S'GGAAA' +p45 +sS'pos' +p46 +S'50378561' +p47 +sS'alt' +p48 +S'G' +p49 +sssS'grch37' +p50 +(dp51 +g38 +S'NC_000019.9:g.50881819_50881822del' +p52 +sg40 +(dp53 +g42 +g43 +sg44 +S'GGAAA' +p54 +sg46 +S'50881818' +p55 +sg48 +g49 +sssS'hg38' +p56 +(dp57 +g38 +S'NC_000019.10:g.50378562_50378565del' +p58 +sg40 +(dp59 +g42 +S'chr19' +p60 +sg44 +S'GGAAA' +p61 +sg46 +S'50378561' +p62 +sg48 +g49 +sssS'hg19' +p63 +(dp64 +g38 +S'NC_000019.9:g.50881819_50881822del' +p65 +sg40 +(dp66 +g42 +g60 +sg44 +S'GGAAA' +p67 +sg46 +S'50881818' +p68 +sg48 +g49 +ssssS'reference_sequence_records' +p69 +(dp70 +S'protein' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' +p72 +sS'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' +p74 +sssS'NM_007121.5:c.513_519del' +p75 +(dp76 +g5 +g6 +sg7 +(lp77 +S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' +p78 +aS'NM_007121.5:c.513_519 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p79 +aS'Caution should be used when reporting the displayed variant descriptions' +p80 +aS'If you are unsure, please contact admin' +p81 +aS'RefSeqGene record not available' +p82 +asg14 +g6 +sg15 +(lp83 +sg17 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA +p84 +sg19 +S'NR1H2' +p85 +sg21 +(dp86 +g23 +S'NP_009052.3:p.(Lys172SerfsTer47)' +p87 +sg25 +S'NP_009052.3:p.(K172Sfs*47)' +p88 +ssg27 +g28 +sg29 +g6 +sg30 +g6 +sg31 +S'NM_007121.5:c.513_519del' +p89 +sg33 +g6 +sg34 +(dp90 +S'grch38' +p91 +(dp92 +g38 +S'NC_000019.10:g.50378562_50378565del' +p93 +sg40 +(dp94 +g42 +g43 +sg44 +S'GGAAA' +p95 +sg46 +S'50378561' +p96 +sg48 +g49 +sssS'grch37' +p97 +(dp98 +g38 +S'NC_000019.9:g.50881819_50881822del' +p99 +sg40 +(dp100 +g42 +g43 +sg44 +S'GGAAA' +p101 +sg46 +S'50881818' +p102 +sg48 +g49 +sssg56 +(dp103 +g38 +S'NC_000019.10:g.50378562_50378565del' +p104 +sg40 +(dp105 +g42 +g60 +sg44 +S'GGAAA' +p106 +sg46 +S'50378561' +p107 +sg48 +g49 +sssS'hg19' +p108 +(dp109 +g38 +S'NC_000019.9:g.50881819_50881822del' +p110 +sg40 +(dp111 +g42 +g60 +sg44 +S'GGAAA' +p112 +sg46 +S'50881818' +p113 +sg48 +g49 +ssssg69 +(dp114 +g71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' +p115 +sg73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' +p116 +sssS'metadata' +p117 +(dp118 +S'variantvalidator_hgvs_version' +p119 +S'1.1.3' +p120 +sS'uta_schema' +p121 +S'uta_20180821' +p122 +sS'seqrepo_db' +p123 +S'2018-08-21' +p124 +sS'variantvalidator_version' +p125 +S'v0.2' +p126 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant107.txt b/VariantValidator/testing/testOutputsMasterITS/variant107.txt new file mode 100644 index 00000000..e625cb1a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant107.txt @@ -0,0 +1,312 @@ +(dp0 +S'NM_001256647.1:c.222_228delinsTC' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' +p7 +aS'NM_001256647.1:c.222_228 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA +p16 +sS'gene_symbol' +p17 +S'NR1H2' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_001243576.1:p.(Lys75ProfsTer33)' +p22 +sS'slr' +p23 +S'NP_001243576.1:p.(K75Pfs*33)' +p24 +ssS'submitted_variant' +p25 +S'NC_000019.10:g.50378562_50378565delinsTC' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_001256647.1:c.222_228delinsTC' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'grch38' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000019.10:g.50378562_50378565delinsTC' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'19' +p41 +sS'ref' +p42 +S'GAAA' +p43 +sS'pos' +p44 +S'50378562' +p45 +sS'alt' +p46 +S'TC' +p47 +sssS'grch37' +p48 +(dp49 +g36 +S'NC_000019.9:g.50881819_50881822delinsTC' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'GAAA' +p52 +sg44 +S'50881819' +p53 +sg46 +S'TC' +p54 +sssS'hg38' +p55 +(dp56 +g36 +S'NC_000019.10:g.50378562_50378565delinsTC' +p57 +sg38 +(dp58 +g40 +S'chr19' +p59 +sg42 +S'GAAA' +p60 +sg44 +S'50378562' +p61 +sg46 +g47 +sssS'hg19' +p62 +(dp63 +g36 +S'NC_000019.9:g.50881819_50881822delinsTC' +p64 +sg38 +(dp65 +g40 +g59 +sg42 +S'GAAA' +p66 +sg44 +S'50881819' +p67 +sg46 +g54 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' +p73 +sssS'flag' +p74 +S'gene_variant' +p75 +sS'NM_007121.5:c.513_519delinsTC' +p76 +(dp77 +g3 +g4 +sg5 +(lp78 +S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' +p79 +aS'NM_007121.5:c.513_519 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p80 +aS'Caution should be used when reporting the displayed variant descriptions' +p81 +aS'If you are unsure, please contact admin' +p82 +aS'RefSeqGene record not available' +p83 +asg12 +g4 +sg13 +(lp84 +sg15 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA +p85 +sg17 +S'NR1H2' +p86 +sg19 +(dp87 +g21 +S'NP_009052.3:p.(Lys172ProfsTer33)' +p88 +sg23 +S'NP_009052.3:p.(K172Pfs*33)' +p89 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_007121.5:c.513_519delinsTC' +p90 +sg31 +g4 +sg32 +(dp91 +S'grch38' +p92 +(dp93 +g36 +S'NC_000019.10:g.50378562_50378565delinsTC' +p94 +sg38 +(dp95 +g40 +g41 +sg42 +S'GAAA' +p96 +sg44 +S'50378562' +p97 +sg46 +S'TC' +p98 +sssS'grch37' +p99 +(dp100 +g36 +S'NC_000019.9:g.50881819_50881822delinsTC' +p101 +sg38 +(dp102 +g40 +g41 +sg42 +S'GAAA' +p103 +sg44 +S'50881819' +p104 +sg46 +S'TC' +p105 +sssg55 +(dp106 +g36 +S'NC_000019.10:g.50378562_50378565delinsTC' +p107 +sg38 +(dp108 +g40 +g59 +sg42 +S'GAAA' +p109 +sg44 +S'50378562' +p110 +sg46 +g98 +sssS'hg19' +p111 +(dp112 +g36 +S'NC_000019.9:g.50881819_50881822delinsTC' +p113 +sg38 +(dp114 +g40 +g59 +sg42 +S'GAAA' +p115 +sg44 +S'50881819' +p116 +sg46 +g105 +ssssg68 +(dp117 +g70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' +p118 +sg72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' +p119 +sssS'metadata' +p120 +(dp121 +S'variantvalidator_hgvs_version' +p122 +S'1.1.3' +p123 +sS'uta_schema' +p124 +S'uta_20180821' +p125 +sS'seqrepo_db' +p126 +S'2018-08-21' +p127 +sS'variantvalidator_version' +p128 +S'v0.2' +p129 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant108.txt b/VariantValidator/testing/testOutputsMasterITS/variant108.txt new file mode 100644 index 00000000..3d361f21 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant108.txt @@ -0,0 +1,183 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_198455.2:c.1115_1116insT' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' +p9 +aS'Genome position NC_000007.13:g.149476667 aligns within a Requires Analysis-bp gap in transcript NM_198455.2 between positions c.1116_1117' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens SCO-spondin (SSPO), mRNA +p18 +sS'gene_symbol' +p19 +S'SSPO' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_940857.2:p.(Leu374ProfsTer16)' +p24 +sS'slr' +p25 +S'NP_940857.2:p.(L374Pfs*16)' +p26 +ssS'submitted_variant' +p27 +S'NC_000007.14:g.149779575_149779577delinsT' +p28 +sS'genome_context_intronic_sequence' +p29 +g6 +sS'hgvs_lrg_variant' +p30 +g6 +sS'hgvs_transcript_variant' +p31 +S'NM_198455.2:c.1115_1116insT' +p32 +sS'hgvs_refseqgene_variant' +p33 +g6 +sS'primary_assembly_loci' +p34 +(dp35 +S'hg19' +p36 +(dp37 +S'hgvs_genomic_description' +p38 +S'NC_000007.13:g.149476664_149476666delinsTC' +p39 +sS'vcf' +p40 +(dp41 +S'chr' +p42 +S'chr7' +p43 +sS'ref' +p44 +S'CAG' +p45 +sS'pos' +p46 +S'149476664' +p47 +sS'alt' +p48 +S'TC' +p49 +sssS'hg38' +p50 +(dp51 +g38 +S'NC_000007.14:g.149779575_149779577delinsT' +p52 +sg40 +(dp53 +g42 +g43 +sg44 +S'CAG' +p54 +sg46 +S'149779575' +p55 +sg48 +S'T' +p56 +sssS'grch37' +p57 +(dp58 +g38 +S'NC_000007.13:g.149476664_149476666delinsTC' +p59 +sg40 +(dp60 +g42 +S'7' +p61 +sg44 +S'CAG' +p62 +sg46 +S'149476664' +p63 +sg48 +g49 +sssS'grch38' +p64 +(dp65 +g38 +S'NC_000007.14:g.149779575_149779577delinsT' +p66 +sg40 +(dp67 +g42 +g61 +sg44 +S'CAG' +p68 +sg46 +S'149779575' +p69 +sg48 +g56 +ssssS'reference_sequence_records' +p70 +(dp71 +S'protein' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' +p73 +sS'transcript' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' +p75 +sssS'metadata' +p76 +(dp77 +S'variantvalidator_hgvs_version' +p78 +S'1.1.3' +p79 +sS'uta_schema' +p80 +S'uta_20180821' +p81 +sS'seqrepo_db' +p82 +S'2018-08-21' +p83 +sS'variantvalidator_version' +p84 +S'v0.2' +p85 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant109.txt b/VariantValidator/testing/testOutputsMasterITS/variant109.txt new file mode 100644 index 00000000..6997f57a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant109.txt @@ -0,0 +1,183 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_198455.2:c.1116_1118=' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' +p9 +aS'Genome position NC_000007.13:g.149476667 aligns within a Requires Analysis-bp gap in transcript NM_198455.2 between positions c.1116_1117' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens SCO-spondin (SSPO), mRNA +p18 +sS'gene_symbol' +p19 +S'SSPO' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_940857.2:p.(Asp372=)' +p24 +sS'slr' +p25 +S'NP_940857.2:p.(D372=)' +p26 +ssS'submitted_variant' +p27 +S'NC_000007.14:g.149779575_149779577=' +p28 +sS'genome_context_intronic_sequence' +p29 +g6 +sS'hgvs_lrg_variant' +p30 +g6 +sS'hgvs_transcript_variant' +p31 +S'NM_198455.2:c.1116_1118=' +p32 +sS'hgvs_refseqgene_variant' +p33 +g6 +sS'primary_assembly_loci' +p34 +(dp35 +S'hg19' +p36 +(dp37 +S'hgvs_genomic_description' +p38 +S'NC_000007.13:g.149476665_149476666del' +p39 +sS'vcf' +p40 +(dp41 +S'chr' +p42 +S'chr7' +p43 +sS'ref' +p44 +S'CAG' +p45 +sS'pos' +p46 +S'149476664' +p47 +sS'alt' +p48 +S'C' +p49 +sssS'hg38' +p50 +(dp51 +g38 +S'NC_000007.14:g.149779576_149779578del' +p52 +sg40 +(dp53 +g42 +g43 +sg44 +S'ACAG' +p54 +sg46 +S'149779574' +p55 +sg48 +S'A' +p56 +sssS'grch37' +p57 +(dp58 +g38 +S'NC_000007.13:g.149476665_149476666del' +p59 +sg40 +(dp60 +g42 +S'7' +p61 +sg44 +S'CAG' +p62 +sg46 +S'149476664' +p63 +sg48 +g49 +sssS'grch38' +p64 +(dp65 +g38 +S'NC_000007.14:g.149779576_149779578del' +p66 +sg40 +(dp67 +g42 +g61 +sg44 +S'ACAG' +p68 +sg46 +S'149779574' +p69 +sg48 +g56 +ssssS'reference_sequence_records' +p70 +(dp71 +S'protein' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' +p73 +sS'transcript' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' +p75 +sssS'metadata' +p76 +(dp77 +S'variantvalidator_hgvs_version' +p78 +S'1.1.3' +p79 +sS'uta_schema' +p80 +S'uta_20180821' +p81 +sS'seqrepo_db' +p82 +S'2018-08-21' +p83 +sS'variantvalidator_version' +p84 +S'v0.2' +p85 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant11.txt b/VariantValidator/testing/testOutputsMasterITS/variant11.txt new file mode 100644 index 00000000..bd208042 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant11.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NG_007400.1:c.5071A>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant110.txt b/VariantValidator/testing/testOutputsMasterITS/variant110.txt new file mode 100644 index 00000000..9b453b66 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant110.txt @@ -0,0 +1,183 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_198455.2:c.1116_1118=' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' +p9 +aS'Genome position NC_000007.13:g.149476667 aligns within a Requires Analysis-bp gap in transcript NM_198455.2 between positions c.1116_1117' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens SCO-spondin (SSPO), mRNA +p18 +sS'gene_symbol' +p19 +S'SSPO' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_940857.2:p.(Asp372=)' +p24 +sS'slr' +p25 +S'NP_940857.2:p.(D372=)' +p26 +ssS'submitted_variant' +p27 +S'NC_000007.14:g.149779576_149779578del' +p28 +sS'genome_context_intronic_sequence' +p29 +g6 +sS'hgvs_lrg_variant' +p30 +g6 +sS'hgvs_transcript_variant' +p31 +S'NM_198455.2:c.1116_1118=' +p32 +sS'hgvs_refseqgene_variant' +p33 +g6 +sS'primary_assembly_loci' +p34 +(dp35 +S'hg19' +p36 +(dp37 +S'hgvs_genomic_description' +p38 +S'NC_000007.13:g.149476665_149476666del' +p39 +sS'vcf' +p40 +(dp41 +S'chr' +p42 +S'chr7' +p43 +sS'ref' +p44 +S'CAG' +p45 +sS'pos' +p46 +S'149476664' +p47 +sS'alt' +p48 +S'C' +p49 +sssS'hg38' +p50 +(dp51 +g38 +S'NC_000007.14:g.149779576_149779578del' +p52 +sg40 +(dp53 +g42 +g43 +sg44 +S'ACAG' +p54 +sg46 +S'149779574' +p55 +sg48 +S'A' +p56 +sssS'grch37' +p57 +(dp58 +g38 +S'NC_000007.13:g.149476665_149476666del' +p59 +sg40 +(dp60 +g42 +S'7' +p61 +sg44 +S'CAG' +p62 +sg46 +S'149476664' +p63 +sg48 +g49 +sssS'grch38' +p64 +(dp65 +g38 +S'NC_000007.14:g.149779576_149779578del' +p66 +sg40 +(dp67 +g42 +g61 +sg44 +S'ACAG' +p68 +sg46 +S'149779574' +p69 +sg48 +g56 +ssssS'reference_sequence_records' +p70 +(dp71 +S'protein' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' +p73 +sS'transcript' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' +p75 +sssS'metadata' +p76 +(dp77 +S'variantvalidator_hgvs_version' +p78 +S'1.1.3' +p79 +sS'uta_schema' +p80 +S'uta_20180821' +p81 +sS'seqrepo_db' +p82 +S'2018-08-21' +p83 +sS'variantvalidator_version' +p84 +S'v0.2' +p85 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant111.txt b/VariantValidator/testing/testOutputsMasterITS/variant111.txt new file mode 100644 index 00000000..d2cc31c7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant111.txt @@ -0,0 +1,182 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_198455.2:c.1115_1116dup' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' +p9 +aS'NC_000007.13:g.149476666 is one of 2 genomic base(s) that fail to align to transcript NM_198455.2 between positions c.1116_1117' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens SCO-spondin (SSPO), mRNA +p18 +sS'gene_symbol' +p19 +S'SSPO' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_940857.2:p.(Pro373ThrfsTer6)' +p24 +sS'slr' +p25 +S'NP_940857.2:p.(P373Tfs*6)' +p26 +ssS'submitted_variant' +p27 +S'NC_000007.14:g.149779577del' +p28 +sS'genome_context_intronic_sequence' +p29 +g6 +sS'hgvs_lrg_variant' +p30 +g6 +sS'hgvs_transcript_variant' +p31 +S'NM_198455.2:c.1115_1116dup' +p32 +sS'hgvs_refseqgene_variant' +p33 +g6 +sS'primary_assembly_loci' +p34 +(dp35 +S'hg19' +p36 +(dp37 +S'hgvs_genomic_description' +p38 +S'NC_000007.13:g.149476666G>C' +p39 +sS'vcf' +p40 +(dp41 +S'chr' +p42 +S'chr7' +p43 +sS'ref' +p44 +S'G' +p45 +sS'pos' +p46 +S'149476666' +p47 +sS'alt' +p48 +S'C' +p49 +sssS'hg38' +p50 +(dp51 +g38 +S'NC_000007.14:g.149779577del' +p52 +sg40 +(dp53 +g42 +g43 +sg44 +S'AG' +p54 +sg46 +S'149779576' +p55 +sg48 +S'A' +p56 +sssS'grch37' +p57 +(dp58 +g38 +S'NC_000007.13:g.149476666G>C' +p59 +sg40 +(dp60 +g42 +S'7' +p61 +sg44 +g45 +sg46 +S'149476666' +p62 +sg48 +g49 +sssS'grch38' +p63 +(dp64 +g38 +S'NC_000007.14:g.149779577del' +p65 +sg40 +(dp66 +g42 +g61 +sg44 +S'AG' +p67 +sg46 +S'149779576' +p68 +sg48 +g56 +ssssS'reference_sequence_records' +p69 +(dp70 +S'protein' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' +p72 +sS'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' +p74 +sssS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant112.txt b/VariantValidator/testing/testOutputsMasterITS/variant112.txt new file mode 100644 index 00000000..3e53ec68 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant112.txt @@ -0,0 +1,182 @@ +(dp0 +S'NM_198455.2:c.1114_1117del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' +p7 +aS'NC_000007.13:g.149476661_149476667 contains 2 genomic base(s) that fail to align to transcript NM_198455.2' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens SCO-spondin (SSPO), mRNA +p16 +sS'gene_symbol' +p17 +S'SSPO' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_940857.2:p.(Asp372ProfsTer5)' +p22 +sS'slr' +p23 +S'NP_940857.2:p.(D372Pfs*5)' +p24 +ssS'submitted_variant' +p25 +S'NC_000007.14:g.149779573_149779579del' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_198455.2:c.1114_1117del' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000007.13:g.149476662_149476667del' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr7' +p41 +sS'ref' +p42 +S'TGACAGC' +p43 +sS'pos' +p44 +S'149476661' +p45 +sS'alt' +p46 +S'T' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000007.14:g.149779573_149779579del' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'TGACAGCC' +p52 +sg44 +S'149779572' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000007.13:g.149476662_149476667del' +p56 +sg38 +(dp57 +g40 +S'7' +p58 +sg42 +S'TGACAGC' +p59 +sg44 +S'149476661' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000007.14:g.149779573_149779579del' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'TGACAGCC' +p65 +sg44 +S'149779572' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' +p72 +sssS'flag' +p73 +S'gene_variant' +p74 +sS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant113.txt b/VariantValidator/testing/testOutputsMasterITS/variant113.txt new file mode 100644 index 00000000..83c5d575 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant113.txt @@ -0,0 +1,183 @@ +(dp0 +S'NM_198455.2:c.1114_1117delinsCA' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' +p7 +aS'NC_000007.13:g.149476661_149476667 contains 2 genomic base(s) that fail to align to transcript NM_198455.2' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens SCO-spondin (SSPO), mRNA +p16 +sS'gene_symbol' +p17 +S'SSPO' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_940857.2:p.(Asp372HisfsTer17)' +p22 +sS'slr' +p23 +S'NP_940857.2:p.(D372Hfs*17)' +p24 +ssS'submitted_variant' +p25 +S'NC_000007.14:g.149779573_149779579delinsCA' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_198455.2:c.1114_1117delinsCA' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000007.13:g.149476662_149476667delinsCA' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr7' +p41 +sS'ref' +p42 +S'GACAGC' +p43 +sS'pos' +p44 +S'149476662' +p45 +sS'alt' +p46 +S'CA' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000007.14:g.149779573_149779579delinsCA' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'GACAGCC' +p52 +sg44 +S'149779573' +p53 +sg46 +S'CA' +p54 +sssS'grch37' +p55 +(dp56 +g36 +S'NC_000007.13:g.149476662_149476667delinsCA' +p57 +sg38 +(dp58 +g40 +S'7' +p59 +sg42 +S'GACAGC' +p60 +sg44 +S'149476662' +p61 +sg46 +g47 +sssS'grch38' +p62 +(dp63 +g36 +S'NC_000007.14:g.149779573_149779579delinsCA' +p64 +sg38 +(dp65 +g40 +g59 +sg42 +S'GACAGCC' +p66 +sg44 +S'149779573' +p67 +sg46 +g54 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' +p73 +sssS'flag' +p74 +S'gene_variant' +p75 +sS'metadata' +p76 +(dp77 +S'variantvalidator_hgvs_version' +p78 +S'1.1.3' +p79 +sS'uta_schema' +p80 +S'uta_20180821' +p81 +sS'seqrepo_db' +p82 +S'2018-08-21' +p83 +sS'variantvalidator_version' +p84 +S'v0.2' +p85 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant114.txt b/VariantValidator/testing/testOutputsMasterITS/variant114.txt new file mode 100644 index 00000000..2dfbb5a9 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant114.txt @@ -0,0 +1,177 @@ +(dp0 +S'NM_000088.3:c.590_591inv' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p12 +sS'gene_symbol' +p13 +S'COL1A1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_000079.2:p.(Gly197Asp)' +p18 +sS'slr' +p19 +S'NP_000079.2:p.(G197D)' +p20 +ssS'submitted_variant' +p21 +S'NM_000088.3:c.590_591inv' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_000088.3:c.590_591inv' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000017.10:g.48275361_48275362inv' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr17' +p37 +sS'ref' +p38 +S'AC' +p39 +sS'pos' +p40 +S'48275361' +p41 +sS'alt' +p42 +S'GT' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000017.11:g.50198000_50198001inv' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +S'AC' +p48 +sg40 +S'50198000' +p49 +sg42 +S'GT' +p50 +sssS'grch37' +p51 +(dp52 +g32 +S'NC_000017.10:g.48275361_48275362inv' +p53 +sg34 +(dp54 +g36 +S'17' +p55 +sg38 +S'AC' +p56 +sg40 +S'48275361' +p57 +sg42 +S'GT' +p58 +sssS'grch38' +p59 +(dp60 +g32 +S'NC_000017.11:g.50198000_50198001inv' +p61 +sg34 +(dp62 +g36 +g55 +sg38 +S'AC' +p63 +sg40 +S'50198000' +p64 +sg42 +S'GT' +p65 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p71 +sssS'flag' +p72 +S'gene_variant' +p73 +sS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant115.txt b/VariantValidator/testing/testOutputsMasterITS/variant115.txt new file mode 100644 index 00000000..67f43ad9 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant115.txt @@ -0,0 +1,177 @@ +(dp0 +S'NM_024989.3:c.1778_1779inv' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens post-GPI attachment to proteins 1 (PGAP1), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'PGAP1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_079265.2:p.(Phe593Ter)' +p18 +sS'slr' +p19 +S'NP_079265.2:p.(F593*)' +p20 +ssS'submitted_variant' +p21 +S'NM_024989.3:c.1778_1779inv' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_024989.3:c.1778_1779inv' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000002.11:g.197729793_197729794inv' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr2' +p37 +sS'ref' +p38 +S'AA' +p39 +sS'pos' +p40 +S'197729793' +p41 +sS'alt' +p42 +S'TT' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000002.12:g.196865069_196865070inv' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +S'AA' +p48 +sg40 +S'196865069' +p49 +sg42 +S'TT' +p50 +sssS'grch37' +p51 +(dp52 +g32 +S'NC_000002.11:g.197729793_197729794inv' +p53 +sg34 +(dp54 +g36 +S'2' +p55 +sg38 +S'AA' +p56 +sg40 +S'197729793' +p57 +sg42 +S'TT' +p58 +sssS'grch38' +p59 +(dp60 +g32 +S'NC_000002.12:g.196865069_196865070inv' +p61 +sg34 +(dp62 +g36 +g55 +sg38 +S'AA' +p63 +sg40 +S'196865069' +p64 +sg42 +S'TT' +p65 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_079265.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024989.3' +p71 +sssS'flag' +p72 +S'gene_variant' +p73 +sS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant116.txt b/VariantValidator/testing/testOutputsMasterITS/variant116.txt new file mode 100644 index 00000000..b4fed108 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant116.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032815.3:c.555_556inv' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens nuclear factor of activated T cells 2 interacting protein (NFATC2IP), mRNA +p14 +sS'gene_symbol' +p15 +S'NFATC2IP' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_116204.3:p.(Glu185_Glu186delinsAspTer)' +p20 +sS'slr' +p21 +S'NP_116204.3:p.(E185_E186delinsD*)' +p22 +ssS'submitted_variant' +p23 +S'NM_032815.3:c.555_556inv' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_032815.3:c.555_556inv' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'grch38' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000016.10:g.28954659_28954660inv' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'16' +p39 +sS'ref' +p40 +S'AG' +p41 +sS'pos' +p42 +S'28954659' +p43 +sS'alt' +p44 +S'CT' +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000016.9:g.28965980_28965981inv' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'AG' +p50 +sg42 +S'28965980' +p51 +sg44 +S'CT' +p52 +sssS'hg38' +p53 +(dp54 +g34 +S'NC_000016.10:g.28954659_28954660inv' +p55 +sg36 +(dp56 +g38 +S'chr16' +p57 +sg40 +S'AG' +p58 +sg42 +S'28954659' +p59 +sg44 +S'CT' +p60 +sssS'hg19' +p61 +(dp62 +g34 +S'NC_000016.9:g.28965980_28965981inv' +p63 +sg36 +(dp64 +g38 +g57 +sg40 +S'AG' +p65 +sg42 +S'28965980' +p66 +sg44 +S'CT' +p67 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116204.3' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032815.3' +p73 +sssS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant117.txt b/VariantValidator/testing/testOutputsMasterITS/variant117.txt new file mode 100644 index 00000000..406b2fbf --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant117.txt @@ -0,0 +1,23 @@ +(dp0 +S'flag' +p1 +NsS'metadata' +p2 +(dp3 +S'variantvalidator_hgvs_version' +p4 +S'1.1.3' +p5 +sS'uta_schema' +p6 +S'uta_20180821' +p7 +sS'seqrepo_db' +p8 +S'2018-08-21' +p9 +sS'variantvalidator_version' +p10 +S'v0.2' +p11 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant118.txt b/VariantValidator/testing/testOutputsMasterITS/variant118.txt new file mode 100644 index 00000000..c5baab8b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant118.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000038.5:c.3927_3928inv' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens APC, WNT signaling pathway regulator (APC), transcript variant 3, mRNA +p14 +sS'gene_symbol' +p15 +S'APC' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000029.2:p.(Glu1309_Lys1310delinsAspTer)' +p20 +sS'slr' +p21 +S'NP_000029.2:p.(E1309_K1310delinsD*)' +p22 +ssS'submitted_variant' +p23 +S'NM_000038.5:c.3927_3928delAAinsTT' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000038.5:c.3927_3928inv' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'grch38' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000005.10:g.112839521_112839522inv' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'5' +p39 +sS'ref' +p40 +S'AA' +p41 +sS'pos' +p42 +S'112839521' +p43 +sS'alt' +p44 +S'TT' +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000005.9:g.112175218_112175219inv' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'AA' +p50 +sg42 +S'112175218' +p51 +sg44 +S'TT' +p52 +sssS'hg38' +p53 +(dp54 +g34 +S'NC_000005.10:g.112839521_112839522inv' +p55 +sg36 +(dp56 +g38 +S'chr5' +p57 +sg40 +S'AA' +p58 +sg42 +S'112839521' +p59 +sg44 +S'TT' +p60 +sssS'hg19' +p61 +(dp62 +g34 +S'NC_000005.9:g.112175218_112175219inv' +p63 +sg36 +(dp64 +g38 +g57 +sg40 +S'AA' +p65 +sg42 +S'112175218' +p66 +sg44 +S'TT' +p67 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000029.2' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000038.5' +p73 +sssS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant119.txt b/VariantValidator/testing/testOutputsMasterITS/variant119.txt new file mode 100644 index 00000000..83cfa2e6 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant119.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_001034853.1:c.2847_2848inv' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens retinitis pigmentosa GTPase regulator (RPGR), transcript variant C, mRNA +p14 +sS'gene_symbol' +p15 +S'RPGR' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_001030025.1:p.(Glu949_Glu950delinsAspTer)' +p20 +sS'slr' +p21 +S'NP_001030025.1:p.(E949_E950delinsD*)' +p22 +ssS'submitted_variant' +p23 +S'NM_001034853.1:c.2847_2848delAGinsCT' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_001034853.1:c.2847_2848inv' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000023.10:g.38145404_38145405inv' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chrX' +p39 +sS'ref' +p40 +S'CT' +p41 +sS'pos' +p42 +S'38145404' +p43 +sS'alt' +p44 +S'AG' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000023.11:g.38286151_38286152inv' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'CT' +p50 +sg42 +S'38286151' +p51 +sg44 +S'AG' +p52 +sssS'grch37' +p53 +(dp54 +g34 +S'NC_000023.10:g.38145404_38145405inv' +p55 +sg36 +(dp56 +g38 +S'X' +p57 +sg40 +S'CT' +p58 +sg42 +S'38145404' +p59 +sg44 +S'AG' +p60 +sssS'grch38' +p61 +(dp62 +g34 +S'NC_000023.11:g.38286151_38286152inv' +p63 +sg36 +(dp64 +g38 +g57 +sg40 +S'CT' +p65 +sg42 +S'38286151' +p66 +sg44 +S'AG' +p67 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001030025.1' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001034853.1' +p73 +sssS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant12.txt b/VariantValidator/testing/testOutputsMasterITS/variant12.txt new file mode 100644 index 00000000..d6eccd94 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant12.txt @@ -0,0 +1,704 @@ +(dp0 +S'NM_002474.2:c.3034_3035inv' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +(dp11 +S'grch38' +p12 +(dp13 +S'hgvs_genomic_description' +p14 +S'NT_187607.1:g.1396662_1396663inv' +p15 +sS'vcf' +p16 +(dp17 +S'chr' +p18 +S'HSCHR16_1_CTG1' +p19 +sS'ref' +p20 +S'GT' +p21 +sS'pos' +p22 +S'1396662' +p23 +sS'alt' +p24 +S'AC' +p25 +sssa(dp26 +S'hg38' +p27 +(dp28 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p29 +sg16 +(dp30 +g18 +S'chr16_KI270853v1_alt' +p31 +sg20 +S'GT' +p32 +sg22 +S'1396662' +p33 +sg24 +S'AC' +p34 +sssasS'transcript_description' +p35 +VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA +p36 +sS'gene_symbol' +p37 +S'MYH11' +p38 +sS'hgvs_predicted_protein_consequence' +p39 +(dp40 +S'tlr' +p41 +S'NP_002465.1:p.(Thr1012Val)' +p42 +sS'slr' +p43 +S'NP_002465.1:p.(T1012V)' +p44 +ssS'submitted_variant' +p45 +S'chr16:15832508_15832509delinsAC' +p46 +sS'genome_context_intronic_sequence' +p47 +g4 +sS'hgvs_lrg_variant' +p48 +g4 +sS'hgvs_transcript_variant' +p49 +S'NM_002474.2:c.3034_3035inv' +p50 +sS'hgvs_refseqgene_variant' +p51 +g4 +sS'primary_assembly_loci' +p52 +(dp53 +S'grch38' +p54 +(dp55 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p56 +sg16 +(dp57 +g18 +S'16' +p58 +sg20 +S'GT' +p59 +sg22 +S'15738651' +p60 +sg24 +S'AC' +p61 +sssS'grch37' +p62 +(dp63 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p64 +sg16 +(dp65 +g18 +g58 +sg20 +S'GT' +p66 +sg22 +S'15832508' +p67 +sg24 +S'AC' +p68 +sssg27 +(dp69 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p70 +sg16 +(dp71 +g18 +S'chr16' +p72 +sg20 +S'GT' +p73 +sg22 +S'15738651' +p74 +sg24 +S'AC' +p75 +sssS'hg19' +p76 +(dp77 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p78 +sg16 +(dp79 +g18 +g72 +sg20 +S'GT' +p80 +sg22 +S'15832508' +p81 +sg24 +S'AC' +p82 +ssssS'reference_sequence_records' +p83 +(dp84 +S'protein' +p85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1' +p86 +sS'transcript' +p87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2' +p88 +sssS'NM_022844.2:c.3034_3035inv' +p89 +(dp90 +g3 +g4 +sg5 +(lp91 +S'RefSeqGene record not available' +p92 +asg8 +g4 +sg9 +(lp93 +(dp94 +S'grch38' +p95 +(dp96 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p97 +sg16 +(dp98 +g18 +g19 +sg20 +S'GT' +p99 +sg22 +S'1396662' +p100 +sg24 +S'AC' +p101 +sssa(dp102 +g27 +(dp103 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p104 +sg16 +(dp105 +g18 +g31 +sg20 +S'GT' +p106 +sg22 +S'1396662' +p107 +sg24 +S'AC' +p108 +sssasg35 +VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA +p109 +sg37 +S'MYH11' +p110 +sg39 +(dp111 +g41 +S'NP_074035.1:p.(Thr1012Val)' +p112 +sg43 +S'NP_074035.1:p.(T1012V)' +p113 +ssg45 +g46 +sg47 +g4 +sg48 +g4 +sg49 +S'NM_022844.2:c.3034_3035inv' +p114 +sg51 +g4 +sg52 +(dp115 +S'grch38' +p116 +(dp117 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p118 +sg16 +(dp119 +g18 +g58 +sg20 +S'GT' +p120 +sg22 +S'15738651' +p121 +sg24 +S'AC' +p122 +sssS'grch37' +p123 +(dp124 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p125 +sg16 +(dp126 +g18 +g58 +sg20 +S'GT' +p127 +sg22 +S'15832508' +p128 +sg24 +S'AC' +p129 +sssg27 +(dp130 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p131 +sg16 +(dp132 +g18 +g72 +sg20 +S'GT' +p133 +sg22 +S'15738651' +p134 +sg24 +S'AC' +p135 +sssS'hg19' +p136 +(dp137 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p138 +sg16 +(dp139 +g18 +g72 +sg20 +S'GT' +p140 +sg22 +S'15832508' +p141 +sg24 +S'AC' +p142 +ssssg83 +(dp143 +g85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1' +p144 +sg87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2' +p145 +sssS'NM_001040114.1:c.3055_3056inv' +p146 +(dp147 +g3 +g4 +sg5 +(lp148 +S'RefSeqGene record not available' +p149 +asg8 +g4 +sg9 +(lp150 +(dp151 +S'grch38' +p152 +(dp153 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p154 +sg16 +(dp155 +g18 +g19 +sg20 +S'GT' +p156 +sg22 +S'1396662' +p157 +sg24 +S'AC' +p158 +sssa(dp159 +g27 +(dp160 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p161 +sg16 +(dp162 +g18 +g31 +sg20 +S'GT' +p163 +sg22 +S'1396662' +p164 +sg24 +S'AC' +p165 +sssasg35 +VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA +p166 +sg37 +S'MYH11' +p167 +sg39 +(dp168 +g41 +S'NP_001035203.1:p.(Thr1019Val)' +p169 +sg43 +S'NP_001035203.1:p.(T1019V)' +p170 +ssg45 +g46 +sg47 +g4 +sg48 +g4 +sg49 +S'NM_001040114.1:c.3055_3056inv' +p171 +sg51 +g4 +sg52 +(dp172 +S'grch38' +p173 +(dp174 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p175 +sg16 +(dp176 +g18 +g58 +sg20 +S'GT' +p177 +sg22 +S'15738651' +p178 +sg24 +S'AC' +p179 +sssS'grch37' +p180 +(dp181 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p182 +sg16 +(dp183 +g18 +g58 +sg20 +S'GT' +p184 +sg22 +S'15832508' +p185 +sg24 +S'AC' +p186 +sssg27 +(dp187 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p188 +sg16 +(dp189 +g18 +g72 +sg20 +S'GT' +p190 +sg22 +S'15738651' +p191 +sg24 +S'AC' +p192 +sssS'hg19' +p193 +(dp194 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p195 +sg16 +(dp196 +g18 +g72 +sg20 +S'GT' +p197 +sg22 +S'15832508' +p198 +sg24 +S'AC' +p199 +ssssg83 +(dp200 +g85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1' +p201 +sg87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1' +p202 +sssS'flag' +p203 +S'gene_variant' +p204 +sS'NM_001040113.1:c.3055_3056inv' +p205 +(dp206 +g3 +g4 +sg5 +(lp207 +S'RefSeqGene record not available' +p208 +asg8 +g4 +sg9 +(lp209 +(dp210 +S'grch38' +p211 +(dp212 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p213 +sg16 +(dp214 +g18 +g19 +sg20 +S'GT' +p215 +sg22 +S'1396662' +p216 +sg24 +S'AC' +p217 +sssa(dp218 +g27 +(dp219 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p220 +sg16 +(dp221 +g18 +g31 +sg20 +S'GT' +p222 +sg22 +S'1396662' +p223 +sg24 +S'AC' +p224 +sssasg35 +VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA +p225 +sg37 +S'MYH11' +p226 +sg39 +(dp227 +g41 +S'NP_001035202.1:p.(Thr1019Val)' +p228 +sg43 +S'NP_001035202.1:p.(T1019V)' +p229 +ssg45 +g46 +sg47 +g4 +sg48 +g4 +sg49 +S'NM_001040113.1:c.3055_3056inv' +p230 +sg51 +g4 +sg52 +(dp231 +S'grch38' +p232 +(dp233 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p234 +sg16 +(dp235 +g18 +g58 +sg20 +S'GT' +p236 +sg22 +S'15738651' +p237 +sg24 +S'AC' +p238 +sssS'grch37' +p239 +(dp240 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p241 +sg16 +(dp242 +g18 +g58 +sg20 +S'GT' +p243 +sg22 +S'15832508' +p244 +sg24 +S'AC' +p245 +sssg27 +(dp246 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p247 +sg16 +(dp248 +g18 +g72 +sg20 +S'GT' +p249 +sg22 +S'15738651' +p250 +sg24 +S'AC' +p251 +sssS'hg19' +p252 +(dp253 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p254 +sg16 +(dp255 +g18 +g72 +sg20 +S'GT' +p256 +sg22 +S'15832508' +p257 +sg24 +S'AC' +p258 +ssssg83 +(dp259 +g85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1' +p260 +sg87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1' +p261 +sssS'metadata' +p262 +(dp263 +S'variantvalidator_hgvs_version' +p264 +S'1.1.3' +p265 +sS'uta_schema' +p266 +S'uta_20180821' +p267 +sS'seqrepo_db' +p268 +S'2018-08-21' +p269 +sS'variantvalidator_version' +p270 +S'v0.2' +p271 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant120.txt b/VariantValidator/testing/testOutputsMasterITS/variant120.txt new file mode 100644 index 00000000..725d40b4 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant120.txt @@ -0,0 +1,179 @@ +(dp0 +S'NM_000088.3:c.4394_4395inv' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NM_000088.3:c.4392_*2inv normalized to NM_000088.3:c.4394_4395inv' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p13 +sS'gene_symbol' +p14 +S'COL1A1' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_000079.2:p.(Ter1465PheextTer27)' +p19 +sS'slr' +p20 +S'NP_000079.2:p.(*1465Fext*27)' +p21 +ssS'submitted_variant' +p22 +S'NM_000088.3:c.4392_*2inv' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_000088.3:c.4394_4395inv' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000017.10:g.48262863_48262864inv' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr17' +p38 +sS'ref' +p39 +S'TT' +p40 +sS'pos' +p41 +S'48262863' +p42 +sS'alt' +p43 +S'AA' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000017.11:g.50185502_50185503inv' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'TT' +p49 +sg41 +S'50185502' +p50 +sg43 +S'AA' +p51 +sssS'grch37' +p52 +(dp53 +g33 +S'NC_000017.10:g.48262863_48262864inv' +p54 +sg35 +(dp55 +g37 +S'17' +p56 +sg39 +S'TT' +p57 +sg41 +S'48262863' +p58 +sg43 +S'AA' +p59 +sssS'grch38' +p60 +(dp61 +g33 +S'NC_000017.11:g.50185502_50185503inv' +p62 +sg35 +(dp63 +g37 +g56 +sg39 +S'TT' +p64 +sg41 +S'50185502' +p65 +sg43 +S'AA' +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'flag' +p73 +S'gene_variant' +p74 +sS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant121.txt b/VariantValidator/testing/testOutputsMasterITS/variant121.txt new file mode 100644 index 00000000..85118fd0 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant121.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_000088.3:c.4392_*5inv' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p24 +sS'gene_symbol' +p25 +S'COL1A1' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_000079.2:p.?' +p30 +sS'slr' +p31 +S'NP_000079.2:p.?' +p32 +ssS'submitted_variant' +p33 +S'NM_000088.3:c.4392_*5inv' +p34 +sS'genome_context_intronic_sequence' +p35 +g16 +sS'hgvs_lrg_variant' +p36 +g16 +sS'hgvs_transcript_variant' +p37 +S'NM_000088.3:c.4392_*5inv' +p38 +sS'hgvs_refseqgene_variant' +p39 +g16 +sS'primary_assembly_loci' +p40 +(dp41 +S'hg19' +p42 +(dp43 +S'hgvs_genomic_description' +p44 +S'NC_000017.10:g.48262858_48262866inv' +p45 +sS'vcf' +p46 +(dp47 +S'chr' +p48 +S'chr17' +p49 +sS'ref' +p50 +S'GAGTTTA' +p51 +sS'pos' +p52 +S'48262859' +p53 +sS'alt' +p54 +S'TAAACTC' +p55 +sssS'hg38' +p56 +(dp57 +g44 +S'NC_000017.11:g.50185497_50185505inv' +p58 +sg46 +(dp59 +g48 +g49 +sg50 +S'GAGTTTA' +p60 +sg52 +S'50185498' +p61 +sg54 +S'TAAACTC' +p62 +sssS'grch37' +p63 +(dp64 +g44 +S'NC_000017.10:g.48262858_48262866inv' +p65 +sg46 +(dp66 +g48 +S'17' +p67 +sg50 +S'GAGTTTA' +p68 +sg52 +S'48262859' +p69 +sg54 +S'TAAACTC' +p70 +sssS'grch38' +p71 +(dp72 +g44 +S'NC_000017.11:g.50185497_50185505inv' +p73 +sg46 +(dp74 +g48 +g67 +sg50 +S'GAGTTTA' +p75 +sg52 +S'50185498' +p76 +sg54 +S'TAAACTC' +p77 +ssssS'reference_sequence_records' +p78 +(dp79 +S'protein' +p80 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p81 +sS'transcript' +p82 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p83 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant122.txt b/VariantValidator/testing/testOutputsMasterITS/variant122.txt new file mode 100644 index 00000000..732ad2d7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant122.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.4390_*7inv' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.?' +p20 +sS'slr' +p21 +S'NP_000079.2:p.?' +p22 +ssS'submitted_variant' +p23 +S'NM_000088.3:c.4390_*7inv' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000088.3:c.4390_*7inv' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000017.10:g.48262856_48262868inv' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr17' +p39 +sS'ref' +p40 +S'AGGGAGTTTACAG' +p41 +sS'pos' +p42 +S'48262856' +p43 +sS'alt' +p44 +S'CTGTAAACTCCCT' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000017.11:g.50185495_50185507inv' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'AGGGAGTTTACAG' +p50 +sg42 +S'50185495' +p51 +sg44 +S'CTGTAAACTCCCT' +p52 +sssS'grch37' +p53 +(dp54 +g34 +S'NC_000017.10:g.48262856_48262868inv' +p55 +sg36 +(dp56 +g38 +S'17' +p57 +sg40 +S'AGGGAGTTTACAG' +p58 +sg42 +S'48262856' +p59 +sg44 +S'CTGTAAACTCCCT' +p60 +sssS'grch38' +p61 +(dp62 +g34 +S'NC_000017.11:g.50185495_50185507inv' +p63 +sg36 +(dp64 +g38 +g57 +sg40 +S'AGGGAGTTTACAG' +p65 +sg42 +S'50185495' +p66 +sg44 +S'CTGTAAACTCCCT' +p67 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p73 +sssS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant123.txt b/VariantValidator/testing/testOutputsMasterITS/variant123.txt new file mode 100644 index 00000000..0da4596a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant123.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'insertion length must be 1' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NM_005732.3:c.2923-5insT' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant124.txt b/VariantValidator/testing/testOutputsMasterITS/variant124.txt new file mode 100644 index 00000000..bab14006 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant124.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The given coordinate is outside the bounds of the reference sequence.' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NM_198283.1(EYS):c.*743120C>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant125.txt b/VariantValidator/testing/testOutputsMasterITS/variant125.txt new file mode 100644 index 00000000..612a82b3 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant125.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'start or end or both are beyond the bounds of transcript record' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NM_133379.4(TTN):c.*265+26591C>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant126.txt b/VariantValidator/testing/testOutputsMasterITS/variant126.txt new file mode 100644 index 00000000..a93ce03c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant126.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589-2_589-1delinsG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.589-2_589-1AG>G automapped to NM_000088.3:c.589-2_589-1delAGinsG' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.?' +p21 +sS'slr' +p22 +S'NP_000079.2:p.?' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.589-2_589-1AG>G' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000088.3:c.589-2_589-1delinsG' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.48275364_48275365delinsC' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'CT' +p43 +sS'pos' +p44 +S'48275364' +p45 +sS'alt' +p46 +S'C' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.50198003_50198004delinsC' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'CT' +p52 +sg44 +S'50198003' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.48275364_48275365delinsC' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +S'CT' +p59 +sg44 +S'48275364' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.50198003_50198004delinsC' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'CT' +p65 +sg44 +S'50198003' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant127.txt b/VariantValidator/testing/testOutputsMasterITS/variant127.txt new file mode 100644 index 00000000..7921b40d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant127.txt @@ -0,0 +1,175 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.642+1_642+2delinsG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.?' +p20 +sS'slr' +p21 +S'NP_000079.2:p.?' +p22 +ssS'submitted_variant' +p23 +S'NM_000088.3:c.642+1_642+2delGTinsG' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000088.3:c.642+1_642+2delinsG' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000017.10:g.48275308_48275309delinsC' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr17' +p40 +sS'ref' +p41 +S'TA' +p42 +sS'pos' +p43 +S'48275307' +p44 +sS'alt' +p45 +S'T' +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000017.11:g.50197947_50197948delinsC' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +S'TA' +p51 +sg43 +S'50197946' +p52 +sg45 +g46 +sssS'grch37' +p53 +(dp54 +g35 +S'NC_000017.10:g.48275308_48275309delinsC' +p55 +sg37 +(dp56 +g39 +S'17' +p57 +sg41 +S'TA' +p58 +sg43 +S'48275307' +p59 +sg45 +g46 +sssS'grch38' +p60 +(dp61 +g35 +S'NC_000017.11:g.50197947_50197948delinsC' +p62 +sg37 +(dp63 +g39 +g57 +sg41 +S'TA' +p64 +sg43 +S'50197946' +p65 +sg45 +g46 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p71 +sssS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant128.txt b/VariantValidator/testing/testOutputsMasterITS/variant128.txt new file mode 100644 index 00000000..06085851 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant128.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'insertion length must be 1' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NM_004415.3:c.1-1insA' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant129.txt b/VariantValidator/testing/testOutputsMasterITS/variant129.txt new file mode 100644 index 00000000..8f7b95f8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant129.txt @@ -0,0 +1,174 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_004415.3:c.-1_1insA' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens desmoplakin (DSP), transcript variant 1, mRNA +p14 +sS'gene_symbol' +p15 +S'DSP' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_004406.2:p.(Met1?)' +p20 +sS'slr' +p21 +S'NP_004406.2:p.(M1?)' +p22 +ssS'submitted_variant' +p23 +S'NM_004415.3:c.-1_1insA' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_004415.3:c.-1_1insA' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000006.11:g.7542148_7542149insA' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr6' +p39 +sS'ref' +p40 +S'A' +p41 +sS'pos' +p42 +S'7542149' +p43 +sS'alt' +p44 +S'AA' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000006.12:g.7541915_7541916insA' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'7541916' +p50 +sg44 +S'AA' +p51 +sssS'grch37' +p52 +(dp53 +g34 +S'NC_000006.11:g.7542148_7542149insA' +p54 +sg36 +(dp55 +g38 +S'6' +p56 +sg40 +g41 +sg42 +S'7542149' +p57 +sg44 +S'AA' +p58 +sssS'grch38' +p59 +(dp60 +g34 +S'NC_000006.12:g.7541915_7541916insA' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +g41 +sg42 +S'7541916' +p63 +sg44 +S'AA' +p64 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004406.2' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004415.3' +p70 +sssS'metadata' +p71 +(dp72 +S'variantvalidator_hgvs_version' +p73 +S'1.1.3' +p74 +sS'uta_schema' +p75 +S'uta_20180821' +p76 +sS'seqrepo_db' +p77 +S'2018-08-21' +p78 +sS'variantvalidator_version' +p79 +S'v0.2' +p80 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant13.txt b/VariantValidator/testing/testOutputsMasterITS/variant13.txt new file mode 100644 index 00000000..e91d2065 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant13.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589-1_589delinsG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.589-1GG>G automapped to NM_000088.3:c.589-1_589delGGinsG' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.?' +p21 +sS'slr' +p22 +S'NP_000079.2:p.?' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.589-1GG>G' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000088.3):c.589-1_589delinsG' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000088.3:c.589-1_589delinsG' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.48275363_48275364delinsC' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'AC' +p43 +sS'pos' +p44 +S'48275361' +p45 +sS'alt' +p46 +S'A' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.50198002_50198003delinsC' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'AC' +p52 +sg44 +S'50198000' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.48275363_48275364delinsC' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +S'AC' +p59 +sg44 +S'48275361' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.50198002_50198003delinsC' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'AC' +p65 +sg44 +S'50198000' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant130.txt b/VariantValidator/testing/testOutputsMasterITS/variant130.txt new file mode 100644 index 00000000..1efeccff --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant130.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'start or end or both are beyond the bounds of transcript record' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NM_000273.2:c.1-5028_253del' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant131.txt b/VariantValidator/testing/testOutputsMasterITS/variant131.txt new file mode 100644 index 00000000..9b95a8f7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant131.txt @@ -0,0 +1,141 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_002929.2:c.1006C>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +aS'NM_002929.2:c.1006C>T cannot be mapped directly to genome build GRCh37' +p10 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g6 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens G protein-coupled receptor kinase 1 (GRK1), mRNA +p16 +sS'gene_symbol' +p17 +S'GRK1' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_002920.1:p.(Leu336Phe)' +p22 +sS'slr' +p23 +S'NP_002920.1:p.(L336F)' +p24 +ssS'submitted_variant' +p25 +S'NM_002929.2:c.1006C>T' +p26 +sS'genome_context_intronic_sequence' +p27 +g6 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_002929.2:c.1006C>T' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'grch38' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000013.11:g.113723094C>T' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'13' +p41 +sS'ref' +p42 +VC +p43 +sS'pos' +p44 +S'113723094' +p45 +sS'alt' +p46 +VT +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000013.11:g.113723094C>T' +p50 +sg38 +(dp51 +g40 +S'chr13' +p52 +sg42 +g43 +sg44 +S'113723094' +p53 +sg46 +g47 +ssssS'reference_sequence_records' +p54 +(dp55 +S'protein' +p56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002920.1' +p57 +sS'transcript' +p58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002929.2' +p59 +sssS'metadata' +p60 +(dp61 +S'variantvalidator_hgvs_version' +p62 +S'1.1.3' +p63 +sS'uta_schema' +p64 +S'uta_20180821' +p65 +sS'seqrepo_db' +p66 +S'2018-08-21' +p67 +sS'variantvalidator_version' +p68 +S'v0.2' +p69 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant132.txt b/VariantValidator/testing/testOutputsMasterITS/variant132.txt new file mode 100644 index 00000000..7fac199b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant132.txt @@ -0,0 +1,167 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NR_125367.1:n.167+18165G>A' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens myosin heavy chain gene cluster antisense RNA (MYHAS), long non-coding RNA +p14 +sS'gene_symbol' +p15 +S'MYHAS' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'Non-coding :n.' +p20 +sS'slr' +p21 +g20 +ssS'submitted_variant' +p22 +S'NR_125367.1:n.167+18165G>A' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000017.10(NR_125367.1):c.167+18165G>A' +p25 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NR_125367.1:n.167+18165G>A' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000017.10:g.10327720G>A' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr17' +p39 +sS'ref' +p40 +S'G' +p41 +sS'pos' +p42 +S'10327720' +p43 +sS'alt' +p44 +S'A' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000017.11:g.10424403G>A' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'10424403' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000017.10:g.10327720G>A' +p53 +sg36 +(dp54 +g38 +S'17' +p55 +sg40 +g41 +sg42 +S'10327720' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000017.11:g.10424403G>A' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'10424403' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_125367.1' +p65 +sssS'metadata' +p66 +(dp67 +S'variantvalidator_hgvs_version' +p68 +S'1.1.3' +p69 +sS'uta_schema' +p70 +S'uta_20180821' +p71 +sS'seqrepo_db' +p72 +S'2018-08-21' +p73 +sS'variantvalidator_version' +p74 +S'v0.2' +p75 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant133.txt b/VariantValidator/testing/testOutputsMasterITS/variant133.txt new file mode 100644 index 00000000..b680c64b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant133.txt @@ -0,0 +1,82 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Variant coordinate is out of the bound of CDS region (CDS length ' +p7 +aS'2673)' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +g4 +sS'gene_symbol' +p13 +g4 +sS'hgvs_predicted_protein_consequence' +p14 +(dp15 +S'tlr' +p16 +g4 +sS'slr' +p17 +g4 +ssS'submitted_variant' +p18 +S'NM_006005.3:c.3071_3073delinsTTA' +p19 +sS'genome_context_intronic_sequence' +p20 +g4 +sS'hgvs_lrg_variant' +p21 +g4 +sS'hgvs_transcript_variant' +p22 +g4 +sS'hgvs_refseqgene_variant' +p23 +g4 +sS'primary_assembly_loci' +p24 +(dp25 +sS'reference_sequence_records' +p26 +g4 +ssS'flag' +p27 +S'warning' +p28 +sS'metadata' +p29 +(dp30 +S'variantvalidator_hgvs_version' +p31 +S'1.1.3' +p32 +sS'uta_schema' +p33 +S'uta_20180821' +p34 +sS'seqrepo_db' +p35 +S'2018-08-21' +p36 +sS'variantvalidator_version' +p37 +S'v0.2' +p38 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant134.txt b/VariantValidator/testing/testOutputsMasterITS/variant134.txt new file mode 100644 index 00000000..c82897c8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant134.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. Did you mean NM_000089.3:c.1504_1506del?' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NM_000089.3:n.1504_1506del' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant135.txt b/VariantValidator/testing/testOutputsMasterITS/variant135.txt new file mode 100644 index 00000000..91a17310 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant135.txt @@ -0,0 +1,157 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +sS'refseqgene_context_intronic_sequence' +p7 +g4 +sS'alt_genomic_loci' +p8 +(lp9 +sS'transcript_description' +p10 +S'Homo sapiens mitochondrion, complete genome' +p11 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NC_012920.1:m.1011C>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +S'hg19' +p25 +(dp26 +S'hgvs_genomic_description' +p27 +S'NC_012920.1:m.1011C>T' +p28 +sS'vcf' +p29 +(dp30 +S'chr' +p31 +S'chrM' +p32 +sS'ref' +p33 +S'C' +p34 +sS'pos' +p35 +S'1011' +p36 +sS'alt' +p37 +S'T' +p38 +sssS'hg38' +p39 +(dp40 +g27 +S'NC_012920.1:m.1011C>T' +p41 +sg29 +(dp42 +g31 +g32 +sg33 +g34 +sg35 +S'1011' +p43 +sg37 +g38 +sssS'grch37' +p44 +(dp45 +g27 +S'NC_012920.1:m.1011C>T' +p46 +sg29 +(dp47 +g31 +S'M' +p48 +sg33 +g34 +sg35 +S'1011' +p49 +sg37 +g38 +sssS'grch38' +p50 +(dp51 +g27 +S'NC_012920.1:m.1011C>T' +p52 +sg29 +(dp53 +g31 +g48 +sg33 +g34 +sg35 +S'1011' +p54 +sg37 +g38 +ssssS'reference_sequence_records' +p55 +g4 +ssS'flag' +p56 +S'warning' +p57 +sS'metadata' +p58 +(dp59 +S'variantvalidator_hgvs_version' +p60 +S'1.1.3' +p61 +sS'uta_schema' +p62 +S'uta_20180821' +p63 +sS'seqrepo_db' +p64 +S'2018-08-21' +p65 +sS'variantvalidator_version' +p66 +S'v0.2' +p67 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant136.txt b/VariantValidator/testing/testOutputsMasterITS/variant136.txt new file mode 100644 index 00000000..ffd271ae --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant136.txt @@ -0,0 +1,259 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_014611.1:c.9879T>C' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)' +p19 +aS'NM_014611.2:c.9879C= MUST be fully validated prior to use in reports' +p20 +aS'select_variants=NM_014611.2:c.9879C=' +p21 +aS'RefSeqGene record not available' +p22 +asS'refseqgene_context_intronic_sequence' +p23 +g16 +sS'alt_genomic_loci' +p24 +(lp25 +sS'transcript_description' +p26 +VHomo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA +p27 +sS'gene_symbol' +p28 +S'MDN1' +p29 +sS'hgvs_predicted_protein_consequence' +p30 +(dp31 +S'tlr' +p32 +S'NP_055426.1:p.(Val3293=)' +p33 +sS'slr' +p34 +S'NP_055426.1:p.(V3293=)' +p35 +ssS'submitted_variant' +p36 +S'NC_000006.11:g.90403795G=' +p37 +sS'genome_context_intronic_sequence' +p38 +g16 +sS'hgvs_lrg_variant' +p39 +g16 +sS'hgvs_transcript_variant' +p40 +S'NM_014611.1:c.9879T>C' +p41 +sS'hgvs_refseqgene_variant' +p42 +g16 +sS'primary_assembly_loci' +p43 +(dp44 +S'hg19' +p45 +(dp46 +S'hgvs_genomic_description' +p47 +S'NC_000006.11:g.90403795G=' +p48 +sS'vcf' +p49 +(dp50 +S'chr' +p51 +S'chr6' +p52 +sS'ref' +p53 +S'G' +p54 +sS'pos' +p55 +S'90403795' +p56 +sS'alt' +p57 +g54 +sssS'grch37' +p58 +(dp59 +g47 +S'NC_000006.11:g.90403795G=' +p60 +sg49 +(dp61 +g51 +S'6' +p62 +sg53 +g54 +sg55 +S'90403795' +p63 +sg57 +g54 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1' +p69 +sssS'NM_014611.2:c.9879C=' +p70 +(dp71 +g15 +g16 +sg17 +(lp72 +S'RefSeqGene record not available' +p73 +asg23 +g16 +sg24 +(lp74 +sg26 +VHomo sapiens midasin AAA ATPase 1 (MDN1), mRNA +p75 +sg28 +S'MDN1' +p76 +sg30 +(dp77 +g32 +S'NP_055426.1:p.(Val3293=)' +p78 +sg34 +S'NP_055426.1:p.(V3293=)' +p79 +ssg36 +g37 +sg38 +g16 +sg39 +g16 +sg40 +S'NM_014611.2:c.9879C=' +p80 +sg42 +g16 +sg43 +(dp81 +S'hg19' +p82 +(dp83 +g47 +S'NC_000006.11:g.90403795G=' +p84 +sg49 +(dp85 +g51 +g52 +sg53 +VG +p86 +sg55 +S'90403795' +p87 +sg57 +g86 +sssS'hg38' +p88 +(dp89 +g47 +S'NC_000006.12:g.89694076G=' +p90 +sg49 +(dp91 +g51 +g52 +sg53 +g86 +sg55 +S'89694076' +p92 +sg57 +g86 +sssS'grch37' +p93 +(dp94 +g47 +S'NC_000006.11:g.90403795G=' +p95 +sg49 +(dp96 +g51 +g62 +sg53 +g86 +sg55 +S'90403795' +p97 +sg57 +g86 +sssS'grch38' +p98 +(dp99 +g47 +S'NC_000006.12:g.89694076G=' +p100 +sg49 +(dp101 +g51 +g62 +sg53 +g86 +sg55 +S'89694076' +p102 +sg57 +g86 +ssssg64 +(dp103 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' +p104 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2' +p105 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant137.txt b/VariantValidator/testing/testOutputsMasterITS/variant137.txt new file mode 100644 index 00000000..3c3c9142 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant137.txt @@ -0,0 +1,304 @@ +(dp0 +S'NM_000130.4:c.1602del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat 1-169519049-T-. as a deletion whereas VCF specification 4.1 onwards would treat 1-169519049-T-. as ALT = REF' +p7 +aS'VariantValidator has output both alternatives' +p8 +aS'NC_000001.10:g.169519048TT>T automapped to NC_000001.10:g.169519049delT' +p9 +aS'NM_000130.4:c.1601del normalized to NM_000130.4:c.1602del' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens coagulation factor V (F5), mRNA +p16 +sS'gene_symbol' +p17 +S'F5' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_000121.2:p.(Arg534GlnfsTer40)' +p22 +sS'slr' +p23 +S'NP_000121.2:p.(R534Qfs*40)' +p24 +ssS'submitted_variant' +p25 +S'1-169519049-T-.' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_000130.4:c.1602del' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000001.10:g.169519048del' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr1' +p41 +sS'ref' +p42 +S'CT' +p43 +sS'pos' +p44 +S'169519047' +p45 +sS'alt' +p46 +S'C' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000001.11:g.169549810del' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'CT' +p52 +sg44 +S'169549809' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000001.10:g.169519048del' +p56 +sg38 +(dp57 +g40 +S'1' +p58 +sg42 +S'CT' +p59 +sg44 +S'169519047' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000001.11:g.169549810del' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'CT' +p65 +sg44 +S'169549809' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4' +p72 +sssS'flag' +p73 +S'gene_variant' +p74 +sS'NM_000130.4:c.1601G>A' +p75 +(dp76 +g3 +g4 +sg5 +(lp77 +S'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat 1-169519049-T-. as a deletion whereas VCF specification 4.1 onwards would treat 1-169519049-T-. as ALT = REF' +p78 +aS'VariantValidator has output both alternatives' +p79 +aS'RefSeqGene record not available' +p80 +asg12 +g4 +sg13 +(lp81 +sg15 +VHomo sapiens coagulation factor V (F5), mRNA +p82 +sg17 +S'F5' +p83 +sg19 +(dp84 +g21 +S'NP_000121.2:p.(Arg534Gln)' +p85 +sg23 +S'NP_000121.2:p.(R534Q)' +p86 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_000130.4:c.1601G>A' +p87 +sg31 +g4 +sg32 +(dp88 +S'hg19' +p89 +(dp90 +g36 +S'NC_000001.10:g.169519049T=' +p91 +sg38 +(dp92 +g40 +g41 +sg42 +S'T' +p93 +sg44 +S'169519049' +p94 +sg46 +g93 +sssg48 +(dp95 +g36 +S'NC_000001.11:g.169549811C>T' +p96 +sg38 +(dp97 +g40 +g41 +sg42 +VC +p98 +sg44 +S'169549811' +p99 +sg46 +VT +p100 +sssS'grch37' +p101 +(dp102 +g36 +S'NC_000001.10:g.169519049T=' +p103 +sg38 +(dp104 +g40 +g58 +sg42 +g93 +sg44 +S'169519049' +p105 +sg46 +g93 +sssS'grch38' +p106 +(dp107 +g36 +S'NC_000001.11:g.169549811C>T' +p108 +sg38 +(dp109 +g40 +g58 +sg42 +g98 +sg44 +S'169549811' +p110 +sg46 +g100 +ssssg67 +(dp111 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2' +p112 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4' +p113 +sssS'metadata' +p114 +(dp115 +S'variantvalidator_hgvs_version' +p116 +S'1.1.3' +p117 +sS'uta_schema' +p118 +S'uta_20180821' +p119 +sS'seqrepo_db' +p120 +S'2018-08-21' +p121 +sS'variantvalidator_version' +p122 +S'v0.2' +p123 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant138.txt b/VariantValidator/testing/testOutputsMasterITS/variant138.txt new file mode 100644 index 00000000..d2442906 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant138.txt @@ -0,0 +1,947 @@ +(dp0 +S'NM_001204317.1:c.856-9155_856-9154=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA +p12 +sS'gene_symbol' +p13 +S'PRLR' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001191246.1:p.?' +p18 +sS'slr' +p19 +S'NP_001191246.1:p.?' +p20 +ssS'submitted_variant' +p21 +S'NC_000005.9:g.35058667_35058668AG=' +p22 +sS'genome_context_intronic_sequence' +p23 +S'NC_000005.9(NM_001204317.1):c.856-9155_856-9154=' +p24 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001204317.1:c.856-9155_856-9154=' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'grch38' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000005.10:g.35058562_35058563=' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'5' +p38 +sS'ref' +p39 +S'AA' +p40 +sS'pos' +p41 +S'35058562' +p42 +sS'alt' +p43 +g40 +sssS'grch37' +p44 +(dp45 +g33 +S'NC_000005.9:g.35058667_35058668=' +p46 +sg35 +(dp47 +g37 +g38 +sg39 +S'AG' +p48 +sg41 +S'35058667' +p49 +sg43 +g48 +sssS'hg38' +p50 +(dp51 +g33 +S'NC_000005.10:g.35058562_35058563=' +p52 +sg35 +(dp53 +g37 +S'chr5' +p54 +sg39 +g40 +sg41 +S'35058562' +p55 +sg43 +g40 +sssS'hg19' +p56 +(dp57 +g33 +S'NC_000005.9:g.35058667_35058668=' +p58 +sg35 +(dp59 +g37 +g54 +sg39 +g48 +sg41 +S'35058667' +p60 +sg43 +g48 +ssssS'reference_sequence_records' +p61 +(dp62 +S'protein' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1' +p64 +sS'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1' +p66 +sssS'NM_001204316.1:c.1009+7383_1009+7384=' +p67 +(dp68 +g3 +g4 +sg5 +(lp69 +S'RefSeqGene record not available' +p70 +asg8 +g4 +sg9 +(lp71 +sg11 +VHomo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA +p72 +sg13 +S'PRLR' +p73 +sg15 +(dp74 +g17 +S'NP_001191245.1:p.?' +p75 +sg19 +S'NP_001191245.1:p.?' +p76 +ssg21 +g22 +sg23 +S'NC_000005.9(NM_001204316.1):c.1009+7383_1009+7384=' +p77 +sg25 +g4 +sg26 +S'NM_001204316.1:c.1009+7383_1009+7384=' +p78 +sg28 +g4 +sg29 +(dp79 +S'grch38' +p80 +(dp81 +g33 +S'NC_000005.10:g.35058565_35058566=' +p82 +sg35 +(dp83 +g37 +g38 +sg39 +S'AT' +p84 +sg41 +S'35058565' +p85 +sg43 +g84 +sssS'grch37' +p86 +(dp87 +g33 +S'NC_000005.9:g.35058667_35058668=' +p88 +sg35 +(dp89 +g37 +g38 +sg39 +g48 +sg41 +S'35058667' +p90 +sg43 +g48 +sssg50 +(dp91 +g33 +S'NC_000005.10:g.35058565_35058566=' +p92 +sg35 +(dp93 +g37 +g54 +sg39 +g84 +sg41 +S'35058565' +p94 +sg43 +g84 +sssS'hg19' +p95 +(dp96 +g33 +S'NC_000005.9:g.35058667_35058668=' +p97 +sg35 +(dp98 +g37 +g54 +sg39 +g48 +sg41 +S'35058667' +p99 +sg43 +g48 +ssssg61 +(dp100 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1' +p101 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1' +p102 +sssS'NM_001204314.2:c.*6528del' +p103 +(dp104 +g3 +g4 +sg5 +(lp105 +S'The displayed variants may be artefacts of aligning NM_001204314.2 with genome build GRCh37' +p106 +aS'NM_001204314.2:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' +p107 +aS'Caution should be used when reporting the displayed variant descriptions' +p108 +aS'If you are unsure, please contact admin' +p109 +aS'RefSeqGene record not available' +p110 +asg8 +g4 +sg9 +(lp111 +sg11 +VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA +p112 +sg13 +S'PRLR' +p113 +sg15 +(dp114 +g17 +S'NP_001191243.1:p.?' +p115 +sg19 +S'NP_001191243.1:p.?' +p116 +ssg21 +g22 +sg23 +g4 +sg25 +g4 +sg26 +S'NM_001204314.2:c.*6528del' +p117 +sg28 +g4 +sg29 +(dp118 +S'grch38' +p119 +(dp120 +g33 +S'NC_000005.10:g.35058563del' +p121 +sg35 +(dp122 +g37 +g38 +sg39 +S'CA' +p123 +sg41 +S'35058560' +p124 +sg43 +S'C' +p125 +sssS'grch37' +p126 +(dp127 +g33 +S'NC_000005.9:g.35058662_35058668=' +p128 +sg35 +(dp129 +g37 +g38 +sg39 +S'AGACAAG' +p130 +sg41 +S'35058662' +p131 +sg43 +g130 +sssg50 +(dp132 +g33 +S'NC_000005.10:g.35058563del' +p133 +sg35 +(dp134 +g37 +g54 +sg39 +S'CA' +p135 +sg41 +S'35058560' +p136 +sg43 +g125 +sssS'hg19' +p137 +(dp138 +g33 +S'NC_000005.9:g.35058662_35058668=' +p139 +sg35 +(dp140 +g37 +g54 +sg39 +g130 +sg41 +S'35058662' +p141 +sg43 +g130 +ssssg61 +(dp142 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1' +p143 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2' +p144 +sssS'NM_001204318.1:c.686-9155_686-9154=' +p145 +(dp146 +g3 +g4 +sg5 +(lp147 +S'RefSeqGene record not available' +p148 +asg8 +g4 +sg9 +(lp149 +sg11 +VHomo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA +p150 +sg13 +S'PRLR' +p151 +sg15 +(dp152 +g17 +S'NP_001191247.1:p.?' +p153 +sg19 +S'NP_001191247.1:p.?' +p154 +ssg21 +g22 +sg23 +S'NC_000005.9(NM_001204318.1):c.686-9155_686-9154=' +p155 +sg25 +g4 +sg26 +S'NM_001204318.1:c.686-9155_686-9154=' +p156 +sg28 +g4 +sg29 +(dp157 +S'grch38' +p158 +(dp159 +g33 +S'NC_000005.10:g.35058562_35058563=' +p160 +sg35 +(dp161 +g37 +g38 +sg39 +g40 +sg41 +S'35058562' +p162 +sg43 +g40 +sssS'grch37' +p163 +(dp164 +g33 +S'NC_000005.9:g.35058667_35058668=' +p165 +sg35 +(dp166 +g37 +g38 +sg39 +g48 +sg41 +S'35058667' +p167 +sg43 +g48 +sssg50 +(dp168 +g33 +S'NC_000005.10:g.35058562_35058563=' +p169 +sg35 +(dp170 +g37 +g54 +sg39 +g40 +sg41 +S'35058562' +p171 +sg43 +g40 +sssS'hg19' +p172 +(dp173 +g33 +S'NC_000005.9:g.35058667_35058668=' +p174 +sg35 +(dp175 +g37 +g54 +sg39 +g48 +sg41 +S'35058667' +p176 +sg43 +g48 +ssssg61 +(dp177 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1' +p178 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1' +p179 +sssS'NR_037910.1:n.828-9155_828-9154=' +p180 +(dp181 +g3 +g4 +sg5 +(lp182 +S'RefSeqGene record not available' +p183 +asg8 +g4 +sg9 +(lp184 +sg11 +VHomo sapiens prolactin receptor (PRLR), transcript variant 7, non-coding RNA +p185 +sg13 +S'PRLR' +p186 +sg15 +(dp187 +g17 +S'Non-coding :n.' +p188 +sg19 +g188 +ssg21 +g22 +sg23 +S'NC_000005.9(NR_037910.1):c.828-9155_828-9154=' +p189 +sg25 +g4 +sg26 +S'NR_037910.1:n.828-9155_828-9154=' +p190 +sg28 +g4 +sg29 +(dp191 +S'grch38' +p192 +(dp193 +g33 +S'NC_000005.10:g.35058562_35058563=' +p194 +sg35 +(dp195 +g37 +g38 +sg39 +g40 +sg41 +S'35058562' +p196 +sg43 +g40 +sssS'grch37' +p197 +(dp198 +g33 +S'NC_000005.9:g.35058667_35058668=' +p199 +sg35 +(dp200 +g37 +g38 +sg39 +g48 +sg41 +S'35058667' +p201 +sg43 +g48 +sssg50 +(dp202 +g33 +S'NC_000005.10:g.35058562_35058563=' +p203 +sg35 +(dp204 +g37 +g54 +sg39 +g40 +sg41 +S'35058562' +p205 +sg43 +g40 +sssS'hg19' +p206 +(dp207 +g33 +S'NC_000005.9:g.35058667_35058668=' +p208 +sg35 +(dp209 +g37 +g54 +sg39 +g48 +sg41 +S'35058667' +p210 +sg43 +g48 +ssssg61 +(dp211 +g65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1' +p212 +sssS'metadata' +p213 +(dp214 +S'variantvalidator_hgvs_version' +p215 +S'1.1.3' +p216 +sS'uta_schema' +p217 +S'uta_20180821' +p218 +sS'seqrepo_db' +p219 +S'2018-08-21' +p220 +sS'variantvalidator_version' +p221 +S'v0.2' +p222 +ssS'flag' +p223 +S'gene_variant' +p224 +sS'NM_000949.5:c.*6523_*6524=' +p225 +(dp226 +g3 +g4 +sg5 +(lp227 +S'A more recent version of the selected reference sequence NM_000949.5 is available (NM_000949.6)' +p228 +aS'NM_000949.6:c.*6523_*6524delATinsCT MUST be fully validated prior to use in reports' +p229 +aS'select_variants=NM_000949.6:c.*6523_*6524delinsCT' +p230 +aS'RefSeqGene record not available' +p231 +asg8 +g4 +sg9 +(lp232 +sg11 +VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA +p233 +sg13 +S'PRLR' +p234 +sg15 +(dp235 +g17 +S'NP_000940.1:p.?' +p236 +sg19 +S'NP_000940.1:p.?' +p237 +ssg21 +g22 +sg23 +g4 +sg25 +g4 +sg26 +S'NM_000949.5:c.*6523_*6524=' +p238 +sg28 +g4 +sg29 +(dp239 +S'hg19' +p240 +(dp241 +g33 +S'NC_000005.9:g.35058667_35058668=' +p242 +sg35 +(dp243 +g37 +g54 +sg39 +VAG +p244 +sg41 +S'35058667' +p245 +sg43 +g244 +sssS'grch37' +p246 +(dp247 +g33 +S'NC_000005.9:g.35058667_35058668=' +p248 +sg35 +(dp249 +g37 +g38 +sg39 +g244 +sg41 +S'35058667' +p250 +sg43 +g244 +ssssg61 +(dp251 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1' +p252 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5' +p253 +sssS'NM_001204314.1:c.*6523_*6524=' +p254 +(dp255 +g3 +g4 +sg5 +(lp256 +S'A more recent version of the selected reference sequence NM_001204314.1 is available (NM_001204314.2)' +p257 +aS'NM_001204314.2:c.*6523_*6524delATinsCT MUST be fully validated prior to use in reports' +p258 +aS'select_variants=NM_001204314.2:c.*6523_*6524delinsCT' +p259 +aS'RefSeqGene record not available' +p260 +asg8 +g4 +sg9 +(lp261 +sg11 +VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA +p262 +sg13 +S'PRLR' +p263 +sg15 +(dp264 +g17 +S'NP_001191243.1:p.?' +p265 +sg19 +S'NP_001191243.1:p.?' +p266 +ssg21 +g22 +sg23 +g4 +sg25 +g4 +sg26 +S'NM_001204314.1:c.*6523_*6524=' +p267 +sg28 +g4 +sg29 +(dp268 +S'hg19' +p269 +(dp270 +g33 +S'NC_000005.9:g.35058667_35058668=' +p271 +sg35 +(dp272 +g37 +g54 +sg39 +VAG +p273 +sg41 +S'35058667' +p274 +sg43 +g273 +sssS'grch37' +p275 +(dp276 +g33 +S'NC_000005.9:g.35058667_35058668=' +p277 +sg35 +(dp278 +g37 +g38 +sg39 +g273 +sg41 +S'35058667' +p279 +sg43 +g273 +ssssg61 +(dp280 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1' +p281 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1' +p282 +sssS'NM_000949.6:c.*6528del' +p283 +(dp284 +g3 +g4 +sg5 +(lp285 +S'The displayed variants may be artefacts of aligning NM_000949.6 with genome build GRCh37' +p286 +aS'NM_000949.6:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' +p287 +aS'Caution should be used when reporting the displayed variant descriptions' +p288 +aS'If you are unsure, please contact admin' +p289 +aS'RefSeqGene record not available' +p290 +asg8 +g4 +sg9 +(lp291 +sg11 +VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA +p292 +sg13 +S'PRLR' +p293 +sg15 +(dp294 +g17 +S'NP_000940.1:p.?' +p295 +sg19 +S'NP_000940.1:p.?' +p296 +ssg21 +g22 +sg23 +g4 +sg25 +g4 +sg26 +S'NM_000949.6:c.*6528del' +p297 +sg28 +g4 +sg29 +(dp298 +S'grch38' +p299 +(dp300 +g33 +S'NC_000005.10:g.35058563del' +p301 +sg35 +(dp302 +g37 +g38 +sg39 +S'CA' +p303 +sg41 +S'35058560' +p304 +sg43 +g125 +sssS'grch37' +p305 +(dp306 +g33 +S'NC_000005.9:g.35058662_35058668=' +p307 +sg35 +(dp308 +g37 +g38 +sg39 +g130 +sg41 +S'35058662' +p309 +sg43 +g130 +sssg50 +(dp310 +g33 +S'NC_000005.10:g.35058563del' +p311 +sg35 +(dp312 +g37 +g54 +sg39 +S'CA' +p313 +sg41 +S'35058560' +p314 +sg43 +g125 +sssS'hg19' +p315 +(dp316 +g33 +S'NC_000005.9:g.35058662_35058668=' +p317 +sg35 +(dp318 +g37 +g54 +sg39 +g130 +sg41 +S'35058662' +p319 +sg43 +g130 +ssssg61 +(dp320 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1' +p321 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.6' +p322 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant139.txt b/VariantValidator/testing/testOutputsMasterITS/variant139.txt new file mode 100644 index 00000000..1b7cf83d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant139.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Required information for NM_000251.1 is missing from the Universal Transcript Archive, please select an alternative version of NM_000251.1 by submitting NM_000251.1 or MSH2 to https://variantvalidator.org/ref_finder/, or select an alternative genome build' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NM_000251.1:c.1296_1348del' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant14.txt b/VariantValidator/testing/testOutputsMasterITS/variant14.txt new file mode 100644 index 00000000..fbd6f6c2 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant14.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.642+1_642+2delinsG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.642+1GT>G automapped to NM_000088.3:c.642+1_642+2delGTinsG' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.?' +p21 +sS'slr' +p22 +S'NP_000079.2:p.?' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.642+1GT>G' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000088.3:c.642+1_642+2delinsG' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.48275308_48275309delinsC' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'TA' +p43 +sS'pos' +p44 +S'48275307' +p45 +sS'alt' +p46 +S'T' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.50197947_50197948delinsC' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'TA' +p52 +sg44 +S'50197946' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.48275308_48275309delinsC' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +S'TA' +p59 +sg44 +S'48275307' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.50197947_50197948delinsC' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'TA' +p65 +sg44 +S'50197946' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant140.txt b/VariantValidator/testing/testOutputsMasterITS/variant140.txt new file mode 100644 index 00000000..25517962 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant140.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.2024_2028+1del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.2023_2028del normalized to NM_000088.3:c.2024_2028+1del' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.(Ala675_Arg676del)' +p21 +sS'slr' +p22 +S'NP_000079.2:p.(A675_R676del)' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.2023_2028del' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000088.3):c.2024_2028+1del' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000088.3:c.2024_2028+1del' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.48269340_48269345del' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'ACTCTTG' +p43 +sS'pos' +p44 +S'48269339' +p45 +sS'alt' +p46 +S'A' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.50191979_50191984del' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'ACTCTTG' +p52 +sg44 +S'50191978' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.48269340_48269345del' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +S'ACTCTTG' +p59 +sg44 +S'48269339' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.50191979_50191984del' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'ACTCTTG' +p65 +sg44 +S'50191978' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant141.txt b/VariantValidator/testing/testOutputsMasterITS/variant141.txt new file mode 100644 index 00000000..aa04dcaf --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant141.txt @@ -0,0 +1,175 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.2024_2028+1del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.?' +p20 +sS'slr' +p21 +S'NP_000079.2:p.?' +p22 +ssS'submitted_variant' +p23 +S'NM_000088.3:c.2024_2028+1del' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000017.10(NM_000088.3):c.2024_2028+1del' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000088.3:c.2024_2028+1del' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000017.10:g.48269340_48269345del' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr17' +p40 +sS'ref' +p41 +S'ACTCTTG' +p42 +sS'pos' +p43 +S'48269339' +p44 +sS'alt' +p45 +S'A' +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000017.11:g.50191979_50191984del' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +S'ACTCTTG' +p51 +sg43 +S'50191978' +p52 +sg45 +g46 +sssS'grch37' +p53 +(dp54 +g35 +S'NC_000017.10:g.48269340_48269345del' +p55 +sg37 +(dp56 +g39 +S'17' +p57 +sg41 +S'ACTCTTG' +p58 +sg43 +S'48269339' +p59 +sg45 +g46 +sssS'grch38' +p60 +(dp61 +g35 +S'NC_000017.11:g.50191979_50191984del' +p62 +sg37 +(dp63 +g39 +g57 +sg41 +S'ACTCTTG' +p64 +sg43 +S'50191978' +p65 +sg45 +g46 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p71 +sssS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant142.txt b/VariantValidator/testing/testOutputsMasterITS/variant142.txt new file mode 100644 index 00000000..09b8d9ec --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant142.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Unable to map ENST00000450616.1 to an equivalent RefSeq transcript' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'ENST00000450616.1:n.31+1G>C' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant143.txt b/VariantValidator/testing/testOutputsMasterITS/variant143.txt new file mode 100644 index 00000000..0c77bfde --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant143.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Unable to map ENST00000491747 to an equivalent RefSeq transcript' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'ENST00000491747:c.5071A>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant144.txt b/VariantValidator/testing/testOutputsMasterITS/variant144.txt new file mode 100644 index 00000000..54077e93 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant144.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.(Gly197Cys)' +p20 +sS'slr' +p21 +S'NP_000079.2:p.(G197C)' +p22 +ssS'submitted_variant' +p23 +S'NG_007400.1:g.8638G>T' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000088.3:c.589G>T' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000017.10:g.48275363C>A' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr17' +p39 +sS'ref' +p40 +VC +p41 +sS'pos' +p42 +S'48275363' +p43 +sS'alt' +p44 +VA +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000017.11:g.50198002C>A' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'50198002' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000017.10:g.48275363C>A' +p53 +sg36 +(dp54 +g38 +S'17' +p55 +sg40 +g41 +sg42 +S'48275363' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000017.11:g.50198002C>A' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'50198002' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p67 +sssS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant145.txt b/VariantValidator/testing/testOutputsMasterITS/variant145.txt new file mode 100644 index 00000000..a857e60f --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant145.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Invalid reference sequence identifier (LRG_1)' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'LRG_1:g.8638G>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant146.txt b/VariantValidator/testing/testOutputsMasterITS/variant146.txt new file mode 100644 index 00000000..73f553f0 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant146.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'No transcript definition for (tx_ac=LRG_1t1)' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'LRG_1t1:c.589G>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant147.txt b/VariantValidator/testing/testOutputsMasterITS/variant147.txt new file mode 100644 index 00000000..d1af95d0 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant147.txt @@ -0,0 +1,704 @@ +(dp0 +S'NM_002474.2:c.3034_3035inv' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +(dp11 +S'grch38' +p12 +(dp13 +S'hgvs_genomic_description' +p14 +S'NT_187607.1:g.1396662_1396663inv' +p15 +sS'vcf' +p16 +(dp17 +S'chr' +p18 +S'HSCHR16_1_CTG1' +p19 +sS'ref' +p20 +S'GT' +p21 +sS'pos' +p22 +S'1396662' +p23 +sS'alt' +p24 +S'AC' +p25 +sssa(dp26 +S'hg38' +p27 +(dp28 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p29 +sg16 +(dp30 +g18 +S'chr16_KI270853v1_alt' +p31 +sg20 +S'GT' +p32 +sg22 +S'1396662' +p33 +sg24 +S'AC' +p34 +sssasS'transcript_description' +p35 +VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA +p36 +sS'gene_symbol' +p37 +S'MYH11' +p38 +sS'hgvs_predicted_protein_consequence' +p39 +(dp40 +S'tlr' +p41 +S'NP_002465.1:p.(Thr1012Val)' +p42 +sS'slr' +p43 +S'NP_002465.1:p.(T1012V)' +p44 +ssS'submitted_variant' +p45 +S'chr16:g.15832508_15832509delinsAC' +p46 +sS'genome_context_intronic_sequence' +p47 +g4 +sS'hgvs_lrg_variant' +p48 +g4 +sS'hgvs_transcript_variant' +p49 +S'NM_002474.2:c.3034_3035inv' +p50 +sS'hgvs_refseqgene_variant' +p51 +g4 +sS'primary_assembly_loci' +p52 +(dp53 +S'grch38' +p54 +(dp55 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p56 +sg16 +(dp57 +g18 +S'16' +p58 +sg20 +S'GT' +p59 +sg22 +S'15738651' +p60 +sg24 +S'AC' +p61 +sssS'grch37' +p62 +(dp63 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p64 +sg16 +(dp65 +g18 +g58 +sg20 +S'GT' +p66 +sg22 +S'15832508' +p67 +sg24 +S'AC' +p68 +sssg27 +(dp69 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p70 +sg16 +(dp71 +g18 +S'chr16' +p72 +sg20 +S'GT' +p73 +sg22 +S'15738651' +p74 +sg24 +S'AC' +p75 +sssS'hg19' +p76 +(dp77 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p78 +sg16 +(dp79 +g18 +g72 +sg20 +S'GT' +p80 +sg22 +S'15832508' +p81 +sg24 +S'AC' +p82 +ssssS'reference_sequence_records' +p83 +(dp84 +S'protein' +p85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1' +p86 +sS'transcript' +p87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2' +p88 +sssS'NM_022844.2:c.3034_3035inv' +p89 +(dp90 +g3 +g4 +sg5 +(lp91 +S'RefSeqGene record not available' +p92 +asg8 +g4 +sg9 +(lp93 +(dp94 +S'grch38' +p95 +(dp96 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p97 +sg16 +(dp98 +g18 +g19 +sg20 +S'GT' +p99 +sg22 +S'1396662' +p100 +sg24 +S'AC' +p101 +sssa(dp102 +g27 +(dp103 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p104 +sg16 +(dp105 +g18 +g31 +sg20 +S'GT' +p106 +sg22 +S'1396662' +p107 +sg24 +S'AC' +p108 +sssasg35 +VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA +p109 +sg37 +S'MYH11' +p110 +sg39 +(dp111 +g41 +S'NP_074035.1:p.(Thr1012Val)' +p112 +sg43 +S'NP_074035.1:p.(T1012V)' +p113 +ssg45 +g46 +sg47 +g4 +sg48 +g4 +sg49 +S'NM_022844.2:c.3034_3035inv' +p114 +sg51 +g4 +sg52 +(dp115 +S'grch38' +p116 +(dp117 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p118 +sg16 +(dp119 +g18 +g58 +sg20 +S'GT' +p120 +sg22 +S'15738651' +p121 +sg24 +S'AC' +p122 +sssS'grch37' +p123 +(dp124 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p125 +sg16 +(dp126 +g18 +g58 +sg20 +S'GT' +p127 +sg22 +S'15832508' +p128 +sg24 +S'AC' +p129 +sssg27 +(dp130 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p131 +sg16 +(dp132 +g18 +g72 +sg20 +S'GT' +p133 +sg22 +S'15738651' +p134 +sg24 +S'AC' +p135 +sssS'hg19' +p136 +(dp137 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p138 +sg16 +(dp139 +g18 +g72 +sg20 +S'GT' +p140 +sg22 +S'15832508' +p141 +sg24 +S'AC' +p142 +ssssg83 +(dp143 +g85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1' +p144 +sg87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2' +p145 +sssS'NM_001040114.1:c.3055_3056inv' +p146 +(dp147 +g3 +g4 +sg5 +(lp148 +S'RefSeqGene record not available' +p149 +asg8 +g4 +sg9 +(lp150 +(dp151 +S'grch38' +p152 +(dp153 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p154 +sg16 +(dp155 +g18 +g19 +sg20 +S'GT' +p156 +sg22 +S'1396662' +p157 +sg24 +S'AC' +p158 +sssa(dp159 +g27 +(dp160 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p161 +sg16 +(dp162 +g18 +g31 +sg20 +S'GT' +p163 +sg22 +S'1396662' +p164 +sg24 +S'AC' +p165 +sssasg35 +VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA +p166 +sg37 +S'MYH11' +p167 +sg39 +(dp168 +g41 +S'NP_001035203.1:p.(Thr1019Val)' +p169 +sg43 +S'NP_001035203.1:p.(T1019V)' +p170 +ssg45 +g46 +sg47 +g4 +sg48 +g4 +sg49 +S'NM_001040114.1:c.3055_3056inv' +p171 +sg51 +g4 +sg52 +(dp172 +S'grch38' +p173 +(dp174 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p175 +sg16 +(dp176 +g18 +g58 +sg20 +S'GT' +p177 +sg22 +S'15738651' +p178 +sg24 +S'AC' +p179 +sssS'grch37' +p180 +(dp181 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p182 +sg16 +(dp183 +g18 +g58 +sg20 +S'GT' +p184 +sg22 +S'15832508' +p185 +sg24 +S'AC' +p186 +sssg27 +(dp187 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p188 +sg16 +(dp189 +g18 +g72 +sg20 +S'GT' +p190 +sg22 +S'15738651' +p191 +sg24 +S'AC' +p192 +sssS'hg19' +p193 +(dp194 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p195 +sg16 +(dp196 +g18 +g72 +sg20 +S'GT' +p197 +sg22 +S'15832508' +p198 +sg24 +S'AC' +p199 +ssssg83 +(dp200 +g85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1' +p201 +sg87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1' +p202 +sssS'flag' +p203 +S'gene_variant' +p204 +sS'NM_001040113.1:c.3055_3056inv' +p205 +(dp206 +g3 +g4 +sg5 +(lp207 +S'RefSeqGene record not available' +p208 +asg8 +g4 +sg9 +(lp209 +(dp210 +S'grch38' +p211 +(dp212 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p213 +sg16 +(dp214 +g18 +g19 +sg20 +S'GT' +p215 +sg22 +S'1396662' +p216 +sg24 +S'AC' +p217 +sssa(dp218 +g27 +(dp219 +g14 +S'NT_187607.1:g.1396662_1396663inv' +p220 +sg16 +(dp221 +g18 +g31 +sg20 +S'GT' +p222 +sg22 +S'1396662' +p223 +sg24 +S'AC' +p224 +sssasg35 +VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA +p225 +sg37 +S'MYH11' +p226 +sg39 +(dp227 +g41 +S'NP_001035202.1:p.(Thr1019Val)' +p228 +sg43 +S'NP_001035202.1:p.(T1019V)' +p229 +ssg45 +g46 +sg47 +g4 +sg48 +g4 +sg49 +S'NM_001040113.1:c.3055_3056inv' +p230 +sg51 +g4 +sg52 +(dp231 +S'grch38' +p232 +(dp233 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p234 +sg16 +(dp235 +g18 +g58 +sg20 +S'GT' +p236 +sg22 +S'15738651' +p237 +sg24 +S'AC' +p238 +sssS'grch37' +p239 +(dp240 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p241 +sg16 +(dp242 +g18 +g58 +sg20 +S'GT' +p243 +sg22 +S'15832508' +p244 +sg24 +S'AC' +p245 +sssg27 +(dp246 +g14 +S'NC_000016.10:g.15738651_15738652inv' +p247 +sg16 +(dp248 +g18 +g72 +sg20 +S'GT' +p249 +sg22 +S'15738651' +p250 +sg24 +S'AC' +p251 +sssS'hg19' +p252 +(dp253 +g14 +S'NC_000016.9:g.15832508_15832509inv' +p254 +sg16 +(dp255 +g18 +g72 +sg20 +S'GT' +p256 +sg22 +S'15832508' +p257 +sg24 +S'AC' +p258 +ssssg83 +(dp259 +g85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1' +p260 +sg87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1' +p261 +sssS'metadata' +p262 +(dp263 +S'variantvalidator_hgvs_version' +p264 +S'1.1.3' +p265 +sS'uta_schema' +p266 +S'uta_20180821' +p267 +sS'seqrepo_db' +p268 +S'2018-08-21' +p269 +sS'variantvalidator_version' +p270 +S'v0.2' +p271 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant148.txt b/VariantValidator/testing/testOutputsMasterITS/variant148.txt new file mode 100644 index 00000000..79a095b8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant148.txt @@ -0,0 +1,535 @@ +(dp0 +S'NM_001162427.1:c.210+1615dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA +p12 +sS'gene_symbol' +p13 +S'TSC1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001155899.1:p.?' +p18 +sS'slr' +p19 +S'NP_001155899.1:p.?' +p20 +ssS'submitted_variant' +p21 +S'NG_012386.1:g.24048dupG' +p22 +sS'genome_context_intronic_sequence' +p23 +S'NC_000009.11(NM_001162427.1):c.210+1615dup' +p24 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001162427.1:c.210+1615dup' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000009.11:g.135800973dup' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr9' +p38 +sS'ref' +p39 +S'C' +p40 +sS'pos' +p41 +S'135800973' +p42 +sS'alt' +p43 +S'CC' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000009.12:g.132925586dup' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'132925586' +p49 +sg43 +S'CC' +p50 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000009.11:g.135800973dup' +p53 +sg35 +(dp54 +g37 +S'9' +p55 +sg39 +g40 +sg41 +S'135800973' +p56 +sg43 +S'CC' +p57 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000009.12:g.132925586dup' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +g40 +sg41 +S'132925586' +p62 +sg43 +S'CC' +p63 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1' +p69 +sssS'NM_001162426.1:c.363+1dup' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'RefSeqGene record not available' +p73 +asg8 +g4 +sg9 +(lp74 +sg11 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA +p75 +sg13 +S'TSC1' +p76 +sg15 +(dp77 +g17 +S'NP_001155898.1:p.?' +p78 +sg19 +S'NP_001155898.1:p.?' +p79 +ssg21 +g22 +sg23 +S'NC_000009.11(NM_001162426.1):c.363+1dup' +p80 +sg25 +g4 +sg26 +S'NM_001162426.1:c.363+1dup' +p81 +sg28 +g4 +sg29 +(dp82 +S'hg19' +p83 +(dp84 +g33 +S'NC_000009.11:g.135800973dup' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +g40 +sg41 +S'135800973' +p87 +sg43 +S'CC' +p88 +sssg45 +(dp89 +g33 +S'NC_000009.12:g.132925586dup' +p90 +sg35 +(dp91 +g37 +g38 +sg39 +g40 +sg41 +S'132925586' +p92 +sg43 +S'CC' +p93 +sssS'grch37' +p94 +(dp95 +g33 +S'NC_000009.11:g.135800973dup' +p96 +sg35 +(dp97 +g37 +g55 +sg39 +g40 +sg41 +S'135800973' +p98 +sg43 +S'CC' +p99 +sssS'grch38' +p100 +(dp101 +g33 +S'NC_000009.12:g.132925586dup' +p102 +sg35 +(dp103 +g37 +g55 +sg39 +g40 +sg41 +S'132925586' +p104 +sg43 +S'CC' +p105 +ssssg64 +(dp106 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1' +p107 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1' +p108 +sssS'flag' +p109 +S'gene_variant' +p110 +sS'NM_001362177.1:c.-1+1dup' +p111 +(dp112 +g3 +g4 +sg5 +(lp113 +S'RefSeqGene record not available' +p114 +asg8 +g4 +sg9 +(lp115 +sg11 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA +p116 +sg13 +S'TSC1' +p117 +sg15 +(dp118 +g17 +S'NP_001349106.1:p.?' +p119 +sg19 +S'NP_001349106.1:p.?' +p120 +ssg21 +g22 +sg23 +S'NC_000009.11(NM_001362177.1):c.-1+1dup' +p121 +sg25 +g4 +sg26 +S'NM_001362177.1:c.-1+1dup' +p122 +sg28 +g4 +sg29 +(dp123 +S'hg19' +p124 +(dp125 +g33 +S'NC_000009.11:g.135800973dup' +p126 +sg35 +(dp127 +g37 +g38 +sg39 +g40 +sg41 +S'135800973' +p128 +sg43 +S'CC' +p129 +sssg45 +(dp130 +g33 +S'NC_000009.12:g.132925586dup' +p131 +sg35 +(dp132 +g37 +g38 +sg39 +g40 +sg41 +S'132925586' +p133 +sg43 +S'CC' +p134 +sssS'grch37' +p135 +(dp136 +g33 +S'NC_000009.11:g.135800973dup' +p137 +sg35 +(dp138 +g37 +g55 +sg39 +g40 +sg41 +S'135800973' +p139 +sg43 +S'CC' +p140 +sssS'grch38' +p141 +(dp142 +g33 +S'NC_000009.12:g.132925586dup' +p143 +sg35 +(dp144 +g37 +g55 +sg39 +g40 +sg41 +S'132925586' +p145 +sg43 +S'CC' +p146 +ssssg64 +(dp147 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1' +p148 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1' +p149 +sssS'NM_000368.4:c.363+1dup' +p150 +(dp151 +g3 +g4 +sg5 +(lp152 +S'RefSeqGene record not available' +p153 +asg8 +g4 +sg9 +(lp154 +sg11 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA +p155 +sg13 +S'TSC1' +p156 +sg15 +(dp157 +g17 +S'NP_000359.1:p.?' +p158 +sg19 +S'NP_000359.1:p.?' +p159 +ssg21 +g22 +sg23 +S'NC_000009.11(NM_000368.4):c.363+1dup' +p160 +sg25 +g4 +sg26 +S'NM_000368.4:c.363+1dup' +p161 +sg28 +g4 +sg29 +(dp162 +S'hg19' +p163 +(dp164 +g33 +S'NC_000009.11:g.135800973dup' +p165 +sg35 +(dp166 +g37 +g38 +sg39 +g40 +sg41 +S'135800973' +p167 +sg43 +S'CC' +p168 +sssg45 +(dp169 +g33 +S'NC_000009.12:g.132925586dup' +p170 +sg35 +(dp171 +g37 +g38 +sg39 +g40 +sg41 +S'132925586' +p172 +sg43 +S'CC' +p173 +sssS'grch37' +p174 +(dp175 +g33 +S'NC_000009.11:g.135800973dup' +p176 +sg35 +(dp177 +g37 +g55 +sg39 +g40 +sg41 +S'135800973' +p178 +sg43 +S'CC' +p179 +sssS'grch38' +p180 +(dp181 +g33 +S'NC_000009.12:g.132925586dup' +p182 +sg35 +(dp183 +g37 +g55 +sg39 +g40 +sg41 +S'132925586' +p184 +sg43 +S'CC' +p185 +ssssg64 +(dp186 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1' +p187 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4' +p188 +sssS'metadata' +p189 +(dp190 +S'variantvalidator_hgvs_version' +p191 +S'1.1.3' +p192 +sS'uta_schema' +p193 +S'uta_20180821' +p194 +sS'seqrepo_db' +p195 +S'2018-08-21' +p196 +sS'variantvalidator_version' +p197 +S'v0.2' +p198 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant149.txt b/VariantValidator/testing/testOutputsMasterITS/variant149.txt new file mode 100644 index 00000000..6b333171 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant149.txt @@ -0,0 +1,144 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_033517.1:c.1307_1309del' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +aS'NM_033517.1:c.1307_1309delCGA cannot be mapped directly to genome build GRCh37' +p20 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p21 +asS'refseqgene_context_intronic_sequence' +p22 +g16 +sS'alt_genomic_loci' +p23 +(lp24 +(dp25 +S'grch38' +p26 +(dp27 +S'hgvs_genomic_description' +p28 +S'NW_015148969.1:g.33721_33723del' +p29 +sS'vcf' +p30 +(dp31 +S'chr' +p32 +S'HG1311_PATCH' +p33 +sS'ref' +p34 +S'CCGA' +p35 +sS'pos' +p36 +S'33720' +p37 +sS'alt' +p38 +S'C' +p39 +sssa(dp40 +S'hg38' +p41 +(dp42 +g28 +S'NW_015148969.1:g.33721_33723del' +p43 +sg30 +(dp44 +g32 +S'NW_015148969.1' +p45 +sg34 +S'CCGA' +p46 +sg36 +S'33720' +p47 +sg38 +g39 +sssasS'transcript_description' +p48 +VHomo sapiens SH3 and multiple ankyrin repeat domains 3 (SHANK3), mRNA +p49 +sS'gene_symbol' +p50 +S'SHANK3' +p51 +sS'hgvs_predicted_protein_consequence' +p52 +(dp53 +S'tlr' +p54 +S'NP_277052.1:p.(Pro436_Ser437delinsArg)' +p55 +sS'slr' +p56 +S'NP_277052.1:p.(P436_S437delinsR)' +p57 +ssS'submitted_variant' +p58 +S'NM_033517.1:c.1307_1309delCGA' +p59 +sS'genome_context_intronic_sequence' +p60 +g16 +sS'hgvs_lrg_variant' +p61 +g16 +sS'hgvs_transcript_variant' +p62 +S'NM_033517.1:c.1307_1309del' +p63 +sS'hgvs_refseqgene_variant' +p64 +g16 +sS'primary_assembly_loci' +p65 +(dp66 +sS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_277052.1' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033517.1' +p72 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant15.txt b/VariantValidator/testing/testOutputsMasterITS/variant15.txt new file mode 100644 index 00000000..f8cb933b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant15.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589-2_589-1delinsG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.589-2AG>G automapped to NM_000088.3:c.589-2_589-1delAGinsG' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.?' +p21 +sS'slr' +p22 +S'NP_000079.2:p.?' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.589-2AG>G' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000088.3:c.589-2_589-1delinsG' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.48275364_48275365delinsC' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'CT' +p43 +sS'pos' +p44 +S'48275364' +p45 +sS'alt' +p46 +S'C' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.50198003_50198004delinsC' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'CT' +p52 +sg44 +S'50198003' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.48275364_48275365delinsC' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +S'CT' +p59 +sg44 +S'48275364' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.50198003_50198004delinsC' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'CT' +p65 +sg44 +S'50198003' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant150.txt b/VariantValidator/testing/testOutputsMasterITS/variant150.txt new file mode 100644 index 00000000..6487afa1 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant150.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'HG1311_PATCH is not part of genome build GRCh37' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'HG1311_PATCH-33720-CCGA-C' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant151.txt b/VariantValidator/testing/testOutputsMasterITS/variant151.txt new file mode 100644 index 00000000..c900fb4d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant151.txt @@ -0,0 +1,184 @@ +(dp0 +S'NM_015120.4:c.1573_1579=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000002.11:g.73675227TCTC>TCTCCTC automapped to NC_000002.11:g.73675228_73675230dupCTC' +p7 +aS'The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37' +p8 +aS'NM_015120.4:c.1573_1579 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g4 +sS'alt_genomic_loci' +p14 +(lp15 +sS'transcript_description' +p16 +VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA +p17 +sS'gene_symbol' +p18 +S'ALMS1' +p19 +sS'hgvs_predicted_protein_consequence' +p20 +(dp21 +S'tlr' +p22 +S'NP_055935.4:p.(Ser525=)' +p23 +sS'slr' +p24 +S'NP_055935.4:p.(S525=)' +p25 +ssS'submitted_variant' +p26 +S'2-73675227-TCTC-TCTCCTC' +p27 +sS'genome_context_intronic_sequence' +p28 +g4 +sS'hgvs_lrg_variant' +p29 +g4 +sS'hgvs_transcript_variant' +p30 +S'NM_015120.4:c.1573_1579=' +p31 +sS'hgvs_refseqgene_variant' +p32 +g4 +sS'primary_assembly_loci' +p33 +(dp34 +S'hg19' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000002.11:g.73675228_73675230dup' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'chr2' +p42 +sS'ref' +p43 +S'CTC' +p44 +sS'pos' +p45 +S'73675228' +p46 +sS'alt' +p47 +VCTCCTC +p48 +sssS'hg38' +p49 +(dp50 +g37 +S'NC_000002.12:g.73448097_73448103=' +p51 +sg39 +(dp52 +g41 +g42 +sg43 +VTCTCCTC +p53 +sg45 +S'73448097' +p54 +sg47 +g53 +sssS'grch37' +p55 +(dp56 +g37 +S'NC_000002.11:g.73675228_73675230dup' +p57 +sg39 +(dp58 +g41 +S'2' +p59 +sg43 +S'CTC' +p60 +sg45 +S'73675228' +p61 +sg47 +VCTCCTC +p62 +sssS'grch38' +p63 +(dp64 +g37 +S'NC_000002.12:g.73448097_73448103=' +p65 +sg39 +(dp66 +g41 +g59 +sg43 +g53 +sg45 +S'73448097' +p67 +sg47 +g53 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' +p73 +sssS'flag' +p74 +S'gene_variant' +p75 +sS'metadata' +p76 +(dp77 +S'variantvalidator_hgvs_version' +p78 +S'1.1.3' +p79 +sS'uta_schema' +p80 +S'uta_20180821' +p81 +sS'seqrepo_db' +p82 +S'2018-08-21' +p83 +sS'variantvalidator_version' +p84 +S'v0.2' +p85 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant152.txt b/VariantValidator/testing/testOutputsMasterITS/variant152.txt new file mode 100644 index 00000000..be63fc92 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant152.txt @@ -0,0 +1,183 @@ +(dp0 +S'NM_015120.4:c.1577_1579del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000002.11:g.73675227TC>TC automapped to NC_000002.11:g.73675227_73675228TC=' +p7 +aS'The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37' +p8 +aS'NM_015120.4:c.1574_1576 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g4 +sS'alt_genomic_loci' +p14 +(lp15 +sS'transcript_description' +p16 +VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA +p17 +sS'gene_symbol' +p18 +S'ALMS1' +p19 +sS'hgvs_predicted_protein_consequence' +p20 +(dp21 +S'tlr' +p22 +S'NP_055935.4:p.(Pro526del)' +p23 +sS'slr' +p24 +S'NP_055935.4:p.(P526del)' +p25 +ssS'submitted_variant' +p26 +S'2-73675227-TC-TC' +p27 +sS'genome_context_intronic_sequence' +p28 +g4 +sS'hgvs_lrg_variant' +p29 +g4 +sS'hgvs_transcript_variant' +p30 +S'NM_015120.4:c.1577_1579del' +p31 +sS'hgvs_refseqgene_variant' +p32 +g4 +sS'primary_assembly_loci' +p33 +(dp34 +S'hg19' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000002.11:g.73675227_73675228=' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'chr2' +p42 +sS'ref' +p43 +S'TC' +p44 +sS'pos' +p45 +S'73675227' +p46 +sS'alt' +p47 +g44 +sssS'hg38' +p48 +(dp49 +g37 +S'NC_000002.12:g.73448101_73448103del' +p50 +sg39 +(dp51 +g41 +g42 +sg43 +S'TCTC' +p52 +sg45 +S'73448097' +p53 +sg47 +S'T' +p54 +sssS'grch37' +p55 +(dp56 +g37 +S'NC_000002.11:g.73675227_73675228=' +p57 +sg39 +(dp58 +g41 +S'2' +p59 +sg43 +g44 +sg45 +S'73675227' +p60 +sg47 +g44 +sssS'grch38' +p61 +(dp62 +g37 +S'NC_000002.12:g.73448101_73448103del' +p63 +sg39 +(dp64 +g41 +g59 +sg43 +S'TCTC' +p65 +sg45 +S'73448097' +p66 +sg47 +g54 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' +p72 +sssS'flag' +p73 +S'gene_variant' +p74 +sS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant153.txt b/VariantValidator/testing/testOutputsMasterITS/variant153.txt new file mode 100644 index 00000000..ec8b6ddb --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant153.txt @@ -0,0 +1,267 @@ +(dp0 +S'NM_001080423.3:c.1016_1020=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000003.11:g.14561627AG>AGG automapped to NC_000003.11:g.14561629dupG' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA +p13 +sS'gene_symbol' +p14 +S'GRIP2' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001073892.3:p.(Arg339=)' +p19 +sS'slr' +p20 +S'NP_001073892.3:p.(R339=)' +p21 +ssS'submitted_variant' +p22 +S'3-14561627-AG-AGG' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001080423.3:c.1016_1020=' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000003.11:g.14561629dup' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr3' +p38 +sS'ref' +p39 +S'G' +p40 +sS'pos' +p41 +S'14561628' +p42 +sS'alt' +p43 +VGG +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000003.12:g.14520120_14520124=' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +VGGGCC +p49 +sg41 +S'14520120' +p50 +sg43 +g49 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000003.11:g.14561629dup' +p53 +sg35 +(dp54 +g37 +S'3' +p55 +sg39 +g40 +sg41 +S'14561628' +p56 +sg43 +VGG +p57 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000003.12:g.14520120_14520124=' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +g49 +sg41 +S'14520120' +p62 +sg43 +g49 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3' +p68 +sssS'flag' +p69 +S'gene_variant' +p70 +sS'NM_001080423.2:c.1307_1311=' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000003.11:g.14561627AG>AGG automapped to NC_000003.11:g.14561629dupG' +p74 +aS'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' +p75 +aS'NM_001080423.3:c.1307_1311delinsGGCCC MUST be fully validated prior to use in reports' +p76 +aS'select_variants=NM_001080423.3:c.1307_1311delinsGGCCC' +p77 +aS'RefSeqGene record not available' +p78 +asg9 +g4 +sg10 +(lp79 +sg12 +VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA +p80 +sg14 +S'GRIP2' +p81 +sg16 +(dp82 +g18 +S'NP_001073892.2:p.(Arg436=)' +p83 +sg20 +S'NP_001073892.2:p.(R436=)' +p84 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001080423.2:c.1307_1311=' +p85 +sg28 +g4 +sg29 +(dp86 +S'hg19' +p87 +(dp88 +g33 +S'NC_000003.11:g.14561629dup' +p89 +sg35 +(dp90 +g37 +g38 +sg39 +g40 +sg41 +S'14561628' +p91 +sg43 +VGG +p92 +sssS'grch37' +p93 +(dp94 +g33 +S'NC_000003.11:g.14561629dup' +p95 +sg35 +(dp96 +g37 +g55 +sg39 +g40 +sg41 +S'14561628' +p97 +sg43 +VGG +p98 +ssssg63 +(dp99 +g65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2' +p100 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2' +p101 +sssS'metadata' +p102 +(dp103 +S'variantvalidator_hgvs_version' +p104 +S'1.1.3' +p105 +sS'uta_schema' +p106 +S'uta_20180821' +p107 +sS'seqrepo_db' +p108 +S'2018-08-21' +p109 +sS'variantvalidator_version' +p110 +S'v0.2' +p111 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant154.txt b/VariantValidator/testing/testOutputsMasterITS/variant154.txt new file mode 100644 index 00000000..4ffeb83e --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant154.txt @@ -0,0 +1,281 @@ +(dp0 +S'NM_001080423.3:c.1020del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000003.11:g.14561630CC>CC automapped to NC_000003.11:g.14561630_14561631CC=' +p7 +aS'The displayed variants may be artefacts of aligning NM_001080423.3 with genome build GRCh37' +p8 +aS'NM_001080423.3:c.1019_1022 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g4 +sS'alt_genomic_loci' +p14 +(lp15 +sS'transcript_description' +p16 +VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA +p17 +sS'gene_symbol' +p18 +S'GRIP2' +p19 +sS'hgvs_predicted_protein_consequence' +p20 +(dp21 +S'tlr' +p22 +S'NP_001073892.3:p.(Ser341GlnfsTer4)' +p23 +sS'slr' +p24 +S'NP_001073892.3:p.(S341Qfs*4)' +p25 +ssS'submitted_variant' +p26 +S'3-14561630-CC-CC' +p27 +sS'genome_context_intronic_sequence' +p28 +g4 +sS'hgvs_lrg_variant' +p29 +g4 +sS'hgvs_transcript_variant' +p30 +S'NM_001080423.3:c.1020del' +p31 +sS'hgvs_refseqgene_variant' +p32 +g4 +sS'primary_assembly_loci' +p33 +(dp34 +S'hg19' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000003.11:g.14561624_14561630=' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'chr3' +p42 +sS'ref' +p43 +S'CTGAGGC' +p44 +sS'pos' +p45 +S'14561624' +p46 +sS'alt' +p47 +g44 +sssS'hg38' +p48 +(dp49 +g37 +S'NC_000003.12:g.14520122del' +p50 +sg39 +(dp51 +g41 +g42 +sg43 +S'AG' +p52 +sg45 +S'14520119' +p53 +sg47 +S'A' +p54 +sssS'grch37' +p55 +(dp56 +g37 +S'NC_000003.11:g.14561624_14561630=' +p57 +sg39 +(dp58 +g41 +S'3' +p59 +sg43 +g44 +sg45 +S'14561624' +p60 +sg47 +g44 +sssS'grch38' +p61 +(dp62 +g37 +S'NC_000003.12:g.14520122del' +p63 +sg39 +(dp64 +g41 +g59 +sg43 +S'AG' +p65 +sg45 +S'14520119' +p66 +sg47 +g54 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3' +p72 +sssS'flag' +p73 +S'gene_variant' +p74 +sS'NM_001080423.2:c.1311del' +p75 +(dp76 +g3 +g4 +sg5 +(lp77 +S'NC_000003.11:g.14561630CC>CC automapped to NC_000003.11:g.14561630_14561631CC=' +p78 +aS'The displayed variants may be artefacts of aligning NM_001080423.2 with genome build GRCh37' +p79 +aS'NM_001080423.2:c.1310_1313 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' +p80 +aS'Caution should be used when reporting the displayed variant descriptions' +p81 +aS'If you are unsure, please contact admin' +p82 +aS'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' +p83 +aS'NM_001080423.3:c.1311delG MUST be fully validated prior to use in reports' +p84 +aS'select_variants=NM_001080423.3:c.1311del' +p85 +aS'RefSeqGene record not available' +p86 +asg13 +g4 +sg14 +(lp87 +sg16 +VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA +p88 +sg18 +S'GRIP2' +p89 +sg20 +(dp90 +g22 +S'NP_001073892.2:p.(Ser438GlnfsTer4)' +p91 +sg24 +S'NP_001073892.2:p.(S438Qfs*4)' +p92 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_001080423.2:c.1311del' +p93 +sg32 +g4 +sg33 +(dp94 +S'hg19' +p95 +(dp96 +g37 +S'NC_000003.11:g.14561624_14561630=' +p97 +sg39 +(dp98 +g41 +g42 +sg43 +g44 +sg45 +S'14561624' +p99 +sg47 +g44 +sssS'grch37' +p100 +(dp101 +g37 +S'NC_000003.11:g.14561624_14561630=' +p102 +sg39 +(dp103 +g41 +g59 +sg43 +g44 +sg45 +S'14561624' +p104 +sg47 +g44 +ssssg67 +(dp105 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2' +p106 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2' +p107 +sssS'metadata' +p108 +(dp109 +S'variantvalidator_hgvs_version' +p110 +S'1.1.3' +p111 +sS'uta_schema' +p112 +S'uta_20180821' +p113 +sS'seqrepo_db' +p114 +S'2018-08-21' +p115 +sS'variantvalidator_version' +p116 +S'v0.2' +p117 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant155.txt b/VariantValidator/testing/testOutputsMasterITS/variant155.txt new file mode 100644 index 00000000..c2441ec0 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant155.txt @@ -0,0 +1,259 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_014611.1:c.9879T>C' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)' +p19 +aS'NM_014611.2:c.9879C= MUST be fully validated prior to use in reports' +p20 +aS'select_variants=NM_014611.2:c.9879C=' +p21 +aS'RefSeqGene record not available' +p22 +asS'refseqgene_context_intronic_sequence' +p23 +g16 +sS'alt_genomic_loci' +p24 +(lp25 +sS'transcript_description' +p26 +VHomo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA +p27 +sS'gene_symbol' +p28 +S'MDN1' +p29 +sS'hgvs_predicted_protein_consequence' +p30 +(dp31 +S'tlr' +p32 +S'NP_055426.1:p.(Val3293=)' +p33 +sS'slr' +p34 +S'NP_055426.1:p.(V3293=)' +p35 +ssS'submitted_variant' +p36 +S'6-90403795-G-G' +p37 +sS'genome_context_intronic_sequence' +p38 +g16 +sS'hgvs_lrg_variant' +p39 +g16 +sS'hgvs_transcript_variant' +p40 +S'NM_014611.1:c.9879T>C' +p41 +sS'hgvs_refseqgene_variant' +p42 +g16 +sS'primary_assembly_loci' +p43 +(dp44 +S'hg19' +p45 +(dp46 +S'hgvs_genomic_description' +p47 +S'NC_000006.11:g.90403795G=' +p48 +sS'vcf' +p49 +(dp50 +S'chr' +p51 +S'chr6' +p52 +sS'ref' +p53 +S'G' +p54 +sS'pos' +p55 +S'90403795' +p56 +sS'alt' +p57 +g54 +sssS'grch37' +p58 +(dp59 +g47 +S'NC_000006.11:g.90403795G=' +p60 +sg49 +(dp61 +g51 +S'6' +p62 +sg53 +g54 +sg55 +S'90403795' +p63 +sg57 +g54 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1' +p69 +sssS'NM_014611.2:c.9879C=' +p70 +(dp71 +g15 +g16 +sg17 +(lp72 +S'RefSeqGene record not available' +p73 +asg23 +g16 +sg24 +(lp74 +sg26 +VHomo sapiens midasin AAA ATPase 1 (MDN1), mRNA +p75 +sg28 +S'MDN1' +p76 +sg30 +(dp77 +g32 +S'NP_055426.1:p.(Val3293=)' +p78 +sg34 +S'NP_055426.1:p.(V3293=)' +p79 +ssg36 +g37 +sg38 +g16 +sg39 +g16 +sg40 +S'NM_014611.2:c.9879C=' +p80 +sg42 +g16 +sg43 +(dp81 +S'hg19' +p82 +(dp83 +g47 +S'NC_000006.11:g.90403795G=' +p84 +sg49 +(dp85 +g51 +g52 +sg53 +VG +p86 +sg55 +S'90403795' +p87 +sg57 +g86 +sssS'hg38' +p88 +(dp89 +g47 +S'NC_000006.12:g.89694076G=' +p90 +sg49 +(dp91 +g51 +g52 +sg53 +g86 +sg55 +S'89694076' +p92 +sg57 +g86 +sssS'grch37' +p93 +(dp94 +g47 +S'NC_000006.11:g.90403795G=' +p95 +sg49 +(dp96 +g51 +g62 +sg53 +g86 +sg55 +S'90403795' +p97 +sg57 +g86 +sssS'grch38' +p98 +(dp99 +g47 +S'NC_000006.12:g.89694076G=' +p100 +sg49 +(dp101 +g51 +g62 +sg53 +g86 +sg55 +S'89694076' +p102 +sg57 +g86 +ssssg64 +(dp103 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' +p104 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2' +p105 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant156.txt b/VariantValidator/testing/testOutputsMasterITS/variant156.txt new file mode 100644 index 00000000..1ff95bea --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant156.txt @@ -0,0 +1,260 @@ +(dp0 +S'NM_014611.2:c.9879C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens midasin AAA ATPase 1 (MDN1), mRNA +p12 +sS'gene_symbol' +p13 +S'MDN1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_055426.1:p.(Val3293=)' +p18 +sS'slr' +p19 +S'NP_055426.1:p.(V3293=)' +p20 +ssS'submitted_variant' +p21 +S'6-90403795-G-A' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_014611.2:c.9879C>T' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000006.11:g.90403795G>A' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr6' +p37 +sS'ref' +p38 +VG +p39 +sS'pos' +p40 +S'90403795' +p41 +sS'alt' +p42 +VA +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000006.12:g.89694076G>A' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'89694076' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000006.11:g.90403795G>A' +p51 +sg34 +(dp52 +g36 +S'6' +p53 +sg38 +g39 +sg40 +S'90403795' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000006.12:g.89694076G>A' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'89694076' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'NM_014611.1:c.9879T=' +p68 +(dp69 +g3 +g4 +sg5 +(lp70 +S'A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)' +p71 +aS'NM_014611.2:c.9879C>T MUST be fully validated prior to use in reports' +p72 +aS'select_variants=NM_014611.2:c.9879C>T' +p73 +aS'RefSeqGene record not available' +p74 +asg8 +g4 +sg9 +(lp75 +sg11 +VHomo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA +p76 +sg13 +S'MDN1' +p77 +sg15 +(dp78 +g17 +S'NP_055426.1:p.(Val3293=)' +p79 +sg19 +S'NP_055426.1:p.(V3293=)' +p80 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_014611.1:c.9879T=' +p81 +sg27 +g4 +sg28 +(dp82 +S'hg19' +p83 +(dp84 +g32 +S'NC_000006.11:g.90403795G>A' +p85 +sg34 +(dp86 +g36 +g37 +sg38 +S'G' +p87 +sg40 +S'90403795' +p88 +sg42 +g43 +sssS'grch37' +p89 +(dp90 +g32 +S'NC_000006.11:g.90403795G>A' +p91 +sg34 +(dp92 +g36 +g53 +sg38 +g87 +sg40 +S'90403795' +p93 +sg42 +g43 +ssssg60 +(dp94 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' +p95 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1' +p96 +sssS'metadata' +p97 +(dp98 +S'variantvalidator_hgvs_version' +p99 +S'1.1.3' +p100 +sS'uta_schema' +p101 +S'uta_20180821' +p102 +sS'seqrepo_db' +p103 +S'2018-08-21' +p104 +sS'variantvalidator_version' +p105 +S'v0.2' +p106 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant157.txt b/VariantValidator/testing/testOutputsMasterITS/variant157.txt new file mode 100644 index 00000000..3cbb7339 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant157.txt @@ -0,0 +1,1357 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032470.3:c.4del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +(dp14 +S'grch37' +p15 +(dp16 +S'hgvs_genomic_description' +p17 +S'NT_113891.2:g.3483644del' +p18 +sS'vcf' +p19 +(dp20 +S'chr' +p21 +S'HSCHR6_MHC_COX_CTG1' +p22 +sS'ref' +p23 +S'CG' +p24 +sS'pos' +p25 +S'3483643' +p26 +sS'alt' +p27 +S'C' +p28 +sssa(dp29 +S'hg19' +p30 +(dp31 +g17 +S'NT_113891.2:g.3483644del' +p32 +sg19 +(dp33 +g21 +S'chr6_cox_hap2' +p34 +sg23 +S'CG' +p35 +sg25 +S'3483643' +p36 +sg27 +g28 +sssa(dp37 +S'grch38' +p38 +(dp39 +g17 +S'NT_113891.3:g.3483538del' +p40 +sg19 +(dp41 +g21 +g22 +sg23 +S'CG' +p42 +sg25 +S'3483537' +p43 +sg27 +g28 +sssa(dp44 +S'hg38' +p45 +(dp46 +g17 +S'NT_113891.3:g.3483538del' +p47 +sg19 +(dp48 +g21 +S'chr6_GL000251v2_alt' +p49 +sg23 +S'CG' +p50 +sg25 +S'3483537' +p51 +sg27 +g28 +sssa(dp52 +S'grch37' +p53 +(dp54 +g17 +S'NT_167245.1:g.3292210del' +p55 +sg19 +(dp56 +g21 +S'HSCHR6_MHC_DBB_CTG1' +p57 +sg23 +S'CG' +p58 +sg25 +S'3292209' +p59 +sg27 +g28 +sssa(dp60 +S'hg19' +p61 +(dp62 +g17 +S'NT_167245.1:g.3292210del' +p63 +sg19 +(dp64 +g21 +S'chr6_dbb_hap3' +p65 +sg23 +S'CG' +p66 +sg25 +S'3292209' +p67 +sg27 +g28 +sssa(dp68 +S'grch38' +p69 +(dp70 +g17 +S'NT_167245.2:g.3286625del' +p71 +sg19 +(dp72 +g21 +g57 +sg23 +S'CG' +p73 +sg25 +S'3286624' +p74 +sg27 +g28 +sssa(dp75 +g45 +(dp76 +g17 +S'NT_167245.2:g.3286625del' +p77 +sg19 +(dp78 +g21 +S'chr6_GL000252v2_alt' +p79 +sg23 +S'CG' +p80 +sg25 +S'3286624' +p81 +sg27 +g28 +sssa(dp82 +S'grch37' +p83 +(dp84 +g17 +S'NT_167247.1:g.3392834del' +p85 +sg19 +(dp86 +g21 +S'HSCHR6_MHC_MCF_CTG1' +p87 +sg23 +S'CG' +p88 +sg25 +S'3392833' +p89 +sg27 +g28 +sssa(dp90 +S'hg19' +p91 +(dp92 +g17 +S'NT_167247.1:g.3392834del' +p93 +sg19 +(dp94 +g21 +S'chr6_mcf_hap5' +p95 +sg23 +S'CG' +p96 +sg25 +S'3392833' +p97 +sg27 +g28 +sssa(dp98 +S'grch38' +p99 +(dp100 +g17 +S'NT_167247.2:g.3387249del' +p101 +sg19 +(dp102 +g21 +g87 +sg23 +S'CG' +p103 +sg25 +S'3387248' +p104 +sg27 +g28 +sssa(dp105 +g45 +(dp106 +g17 +S'NT_167247.2:g.3387249del' +p107 +sg19 +(dp108 +g21 +S'chr6_GL000254v2_alt' +p109 +sg23 +S'CG' +p110 +sg25 +S'3387248' +p111 +sg27 +g28 +sssa(dp112 +S'grch37' +p113 +(dp114 +g17 +S'NT_167248.1:g.3274047del' +p115 +sg19 +(dp116 +g21 +S'HSCHR6_MHC_QBL_CTG1' +p117 +sg23 +S'CG' +p118 +sg25 +S'3274046' +p119 +sg27 +g28 +sssa(dp120 +S'hg19' +p121 +(dp122 +g17 +S'NT_167248.1:g.3274047del' +p123 +sg19 +(dp124 +g21 +S'chr6_qbl_hap6' +p125 +sg23 +S'CG' +p126 +sg25 +S'3274046' +p127 +sg27 +g28 +sssa(dp128 +S'grch38' +p129 +(dp130 +g17 +S'NT_167248.2:g.3268451del' +p131 +sg19 +(dp132 +g21 +g117 +sg23 +S'CG' +p133 +sg25 +S'3268450' +p134 +sg27 +g28 +sssa(dp135 +g45 +(dp136 +g17 +S'NT_167248.2:g.3268451del' +p137 +sg19 +(dp138 +g21 +S'chr6_GL000255v2_alt' +p139 +sg23 +S'CG' +p140 +sg25 +S'3268450' +p141 +sg27 +g28 +sssa(dp142 +S'grch37' +p143 +(dp144 +g17 +S'NT_167249.1:g.3345701del' +p145 +sg19 +(dp146 +g21 +S'HSCHR6_MHC_SSTO_CTG1' +p147 +sg23 +S'CG' +p148 +sg25 +S'3345700' +p149 +sg27 +g28 +sssa(dp150 +S'hg19' +p151 +(dp152 +g17 +S'NT_167249.1:g.3345701del' +p153 +sg19 +(dp154 +g21 +S'chr6_ssto_hap7' +p155 +sg23 +S'CG' +p156 +sg25 +S'3345700' +p157 +sg27 +g28 +sssa(dp158 +S'grch38' +p159 +(dp160 +g17 +S'NT_167249.2:g.3346403del' +p161 +sg19 +(dp162 +g21 +g147 +sg23 +S'CG' +p163 +sg25 +S'3346402' +p164 +sg27 +g28 +sssa(dp165 +g45 +(dp166 +g17 +S'NT_167249.2:g.3346403del' +p167 +sg19 +(dp168 +g21 +S'chr6_GL000256v2_alt' +p169 +sg23 +S'CG' +p170 +sg25 +S'3346402' +p171 +sg27 +g28 +sssasS'transcript_description' +p172 +VHomo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA +p173 +sS'gene_symbol' +p174 +S'TNXB' +p175 +sS'hgvs_predicted_protein_consequence' +p176 +(dp177 +S'tlr' +p178 +S'NP_115859.2:p.(Arg2AlafsTer91)' +p179 +sS'slr' +p180 +S'NP_115859.2:p.(R2Afs*91)' +p181 +ssS'submitted_variant' +p182 +S'6-32012992-CG-C' +p183 +sS'genome_context_intronic_sequence' +p184 +g6 +sS'hgvs_lrg_variant' +p185 +g6 +sS'hgvs_transcript_variant' +p186 +S'NM_032470.3:c.4del' +p187 +sS'hgvs_refseqgene_variant' +p188 +g6 +sS'primary_assembly_loci' +p189 +(dp190 +S'hg19' +p191 +(dp192 +g17 +S'NC_000006.11:g.32012993del' +p193 +sg19 +(dp194 +g21 +S'chr6' +p195 +sg23 +S'CG' +p196 +sg25 +S'32012992' +p197 +sg27 +g28 +sssg45 +(dp198 +g17 +S'NC_000006.12:g.32045216del' +p199 +sg19 +(dp200 +g21 +g195 +sg23 +S'CG' +p201 +sg25 +S'32045215' +p202 +sg27 +g28 +sssS'grch37' +p203 +(dp204 +g17 +S'NC_000006.11:g.32012993del' +p205 +sg19 +(dp206 +g21 +S'6' +p207 +sg23 +S'CG' +p208 +sg25 +S'32012992' +p209 +sg27 +g28 +sssS'grch38' +p210 +(dp211 +g17 +S'NC_000006.12:g.32045216del' +p212 +sg19 +(dp213 +g21 +g207 +sg23 +S'CG' +p214 +sg25 +S'32045215' +p215 +sg27 +g28 +ssssS'reference_sequence_records' +p216 +(dp217 +S'protein' +p218 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_115859.2' +p219 +sS'transcript' +p220 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032470.3' +p221 +sssS'NM_001365276.1:c.10717del' +p222 +(dp223 +g5 +g6 +sg7 +(lp224 +S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' +p225 +aS'RefSeqGene record not available' +p226 +asg11 +g6 +sg12 +(lp227 +(dp228 +S'grch37' +p229 +(dp230 +g17 +S'NT_113891.2:g.3483644del' +p231 +sg19 +(dp232 +g21 +g22 +sg23 +S'CG' +p233 +sg25 +S'3483643' +p234 +sg27 +g28 +sssa(dp235 +S'hg19' +p236 +(dp237 +g17 +S'NT_113891.2:g.3483644del' +p238 +sg19 +(dp239 +g21 +g34 +sg23 +S'CG' +p240 +sg25 +S'3483643' +p241 +sg27 +g28 +sssa(dp242 +S'grch37' +p243 +(dp244 +g17 +S'NT_167245.1:g.3292210del' +p245 +sg19 +(dp246 +g21 +g57 +sg23 +S'CG' +p247 +sg25 +S'3292209' +p248 +sg27 +g28 +sssa(dp249 +S'hg19' +p250 +(dp251 +g17 +S'NT_167245.1:g.3292210del' +p252 +sg19 +(dp253 +g21 +g65 +sg23 +S'CG' +p254 +sg25 +S'3292209' +p255 +sg27 +g28 +sssa(dp256 +S'grch37' +p257 +(dp258 +g17 +S'NT_167247.1:g.3392834del' +p259 +sg19 +(dp260 +g21 +g87 +sg23 +S'CG' +p261 +sg25 +S'3392833' +p262 +sg27 +g28 +sssa(dp263 +S'hg19' +p264 +(dp265 +g17 +S'NT_167247.1:g.3392834del' +p266 +sg19 +(dp267 +g21 +g95 +sg23 +S'CG' +p268 +sg25 +S'3392833' +p269 +sg27 +g28 +sssasg172 +VHomo sapiens tenascin XB (TNXB), transcript variant 3, mRNA +p270 +sg174 +S'TNXB' +p271 +sg176 +(dp272 +g178 +S'NP_001352205.1:p.(Arg3573AlafsTer91)' +p273 +sg180 +S'NP_001352205.1:p.(R3573Afs*91)' +p274 +ssg182 +g183 +sg184 +g6 +sg185 +g6 +sg186 +S'NM_001365276.1:c.10717del' +p275 +sg188 +g6 +sg189 +(dp276 +S'hg19' +p277 +(dp278 +g17 +S'NC_000006.11:g.32012993del' +p279 +sg19 +(dp280 +g21 +g195 +sg23 +S'CG' +p281 +sg25 +S'32012992' +p282 +sg27 +g28 +sssS'grch37' +p283 +(dp284 +g17 +S'NC_000006.11:g.32012993del' +p285 +sg19 +(dp286 +g21 +g207 +sg23 +S'CG' +p287 +sg25 +S'32012992' +p288 +sg27 +g28 +ssssg216 +(dp289 +g218 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001352205.1' +p290 +sg220 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001365276.1' +p291 +sssS'NM_019105.7:c.10711del' +p292 +(dp293 +g5 +g6 +sg7 +(lp294 +S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' +p295 +aS'RefSeqGene record not available' +p296 +asg11 +g6 +sg12 +(lp297 +(dp298 +S'grch37' +p299 +(dp300 +g17 +S'NT_113891.2:g.3483644del' +p301 +sg19 +(dp302 +g21 +g22 +sg23 +S'CG' +p303 +sg25 +S'3483643' +p304 +sg27 +g28 +sssa(dp305 +S'hg19' +p306 +(dp307 +g17 +S'NT_113891.2:g.3483644del' +p308 +sg19 +(dp309 +g21 +g34 +sg23 +S'CG' +p310 +sg25 +S'3483643' +p311 +sg27 +g28 +sssa(dp312 +S'grch37' +p313 +(dp314 +g17 +S'NT_167245.1:g.3292210del' +p315 +sg19 +(dp316 +g21 +g57 +sg23 +S'CG' +p317 +sg25 +S'3292209' +p318 +sg27 +g28 +sssa(dp319 +S'hg19' +p320 +(dp321 +g17 +S'NT_167245.1:g.3292210del' +p322 +sg19 +(dp323 +g21 +g65 +sg23 +S'CG' +p324 +sg25 +S'3292209' +p325 +sg27 +g28 +sssa(dp326 +S'grch37' +p327 +(dp328 +g17 +S'NT_167247.1:g.3392834del' +p329 +sg19 +(dp330 +g21 +g87 +sg23 +S'CG' +p331 +sg25 +S'3392833' +p332 +sg27 +g28 +sssa(dp333 +S'hg19' +p334 +(dp335 +g17 +S'NT_167247.1:g.3392834del' +p336 +sg19 +(dp337 +g21 +g95 +sg23 +S'CG' +p338 +sg25 +S'3392833' +p339 +sg27 +g28 +sssasg172 +VHomo sapiens tenascin XB (TNXB), transcript variant XB, mRNA +p340 +sg174 +S'TNXB' +p341 +sg176 +(dp342 +g178 +S'NP_061978.6:p.(Arg3571AlafsTer91)' +p343 +sg180 +S'NP_061978.6:p.(R3571Afs*91)' +p344 +ssg182 +g183 +sg184 +g6 +sg185 +g6 +sg186 +S'NM_019105.7:c.10711del' +p345 +sg188 +g6 +sg189 +(dp346 +S'hg19' +p347 +(dp348 +g17 +S'NC_000006.11:g.32012993del' +p349 +sg19 +(dp350 +g21 +g195 +sg23 +S'CG' +p351 +sg25 +S'32012992' +p352 +sg27 +g28 +sssS'grch37' +p353 +(dp354 +g17 +S'NC_000006.11:g.32012993del' +p355 +sg19 +(dp356 +g21 +g207 +sg23 +S'CG' +p357 +sg25 +S'32012992' +p358 +sg27 +g28 +ssssg216 +(dp359 +g218 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061978.6' +p360 +sg220 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_019105.7' +p361 +sssS'NM_019105.6:c.10711del' +p362 +(dp363 +g5 +g6 +sg7 +(lp364 +S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' +p365 +aS'A more recent version of the selected reference sequence NM_019105.6 is available (NM_019105.7)' +p366 +aS'NM_019105.7:c.10711delC MUST be fully validated prior to use in reports' +p367 +aS'select_variants=NM_019105.7:c.10711del' +p368 +aS'RefSeqGene record not available' +p369 +asg11 +g6 +sg12 +(lp370 +(dp371 +S'grch37' +p372 +(dp373 +g17 +S'NT_113891.2:g.3483644del' +p374 +sg19 +(dp375 +g21 +g22 +sg23 +S'CG' +p376 +sg25 +S'3483643' +p377 +sg27 +g28 +sssa(dp378 +S'hg19' +p379 +(dp380 +g17 +S'NT_113891.2:g.3483644del' +p381 +sg19 +(dp382 +g21 +g34 +sg23 +S'CG' +p383 +sg25 +S'3483643' +p384 +sg27 +g28 +sssa(dp385 +S'grch38' +p386 +(dp387 +g17 +S'NT_113891.3:g.3483538del' +p388 +sg19 +(dp389 +g21 +g22 +sg23 +S'CG' +p390 +sg25 +S'3483537' +p391 +sg27 +g28 +sssa(dp392 +g45 +(dp393 +g17 +S'NT_113891.3:g.3483538del' +p394 +sg19 +(dp395 +g21 +g49 +sg23 +S'CG' +p396 +sg25 +S'3483537' +p397 +sg27 +g28 +sssa(dp398 +S'grch37' +p399 +(dp400 +g17 +S'NT_167245.1:g.3292210del' +p401 +sg19 +(dp402 +g21 +g57 +sg23 +S'CG' +p403 +sg25 +S'3292209' +p404 +sg27 +g28 +sssa(dp405 +S'hg19' +p406 +(dp407 +g17 +S'NT_167245.1:g.3292210del' +p408 +sg19 +(dp409 +g21 +g65 +sg23 +S'CG' +p410 +sg25 +S'3292209' +p411 +sg27 +g28 +sssa(dp412 +S'grch38' +p413 +(dp414 +g17 +S'NT_167245.2:g.3286625del' +p415 +sg19 +(dp416 +g21 +g57 +sg23 +S'CG' +p417 +sg25 +S'3286624' +p418 +sg27 +g28 +sssa(dp419 +g45 +(dp420 +g17 +S'NT_167245.2:g.3286625del' +p421 +sg19 +(dp422 +g21 +g79 +sg23 +S'CG' +p423 +sg25 +S'3286624' +p424 +sg27 +g28 +sssa(dp425 +S'grch37' +p426 +(dp427 +g17 +S'NT_167247.1:g.3392834del' +p428 +sg19 +(dp429 +g21 +g87 +sg23 +S'CG' +p430 +sg25 +S'3392833' +p431 +sg27 +g28 +sssa(dp432 +S'hg19' +p433 +(dp434 +g17 +S'NT_167247.1:g.3392834del' +p435 +sg19 +(dp436 +g21 +g95 +sg23 +S'CG' +p437 +sg25 +S'3392833' +p438 +sg27 +g28 +sssa(dp439 +S'grch38' +p440 +(dp441 +g17 +S'NT_167247.2:g.3387249del' +p442 +sg19 +(dp443 +g21 +g87 +sg23 +S'CG' +p444 +sg25 +S'3387248' +p445 +sg27 +g28 +sssa(dp446 +g45 +(dp447 +g17 +S'NT_167247.2:g.3387249del' +p448 +sg19 +(dp449 +g21 +g109 +sg23 +S'CG' +p450 +sg25 +S'3387248' +p451 +sg27 +g28 +sssa(dp452 +S'grch37' +p453 +(dp454 +g17 +S'NT_167248.1:g.3271861del' +p455 +sg19 +(dp456 +g21 +g117 +sg23 +S'AG' +p457 +sg25 +S'3271858' +p458 +sg27 +S'A' +p459 +sssa(dp460 +S'hg19' +p461 +(dp462 +g17 +S'NT_167248.1:g.3271861del' +p463 +sg19 +(dp464 +g21 +g125 +sg23 +S'AG' +p465 +sg25 +S'3271858' +p466 +sg27 +g459 +sssasg172 +VHomo sapiens tenascin XB (TNXB), transcript variant XB, mRNA +p467 +sg174 +S'TNXB' +p468 +sg176 +(dp469 +g178 +S'NP_061978.6:p.(Arg3571AlafsTer91)' +p470 +sg180 +S'NP_061978.6:p.(R3571Afs*91)' +p471 +ssg182 +g183 +sg184 +g6 +sg185 +g6 +sg186 +S'NM_019105.6:c.10711del' +p472 +sg188 +g6 +sg189 +(dp473 +S'hg19' +p474 +(dp475 +g17 +S'NC_000006.11:g.32012993del' +p476 +sg19 +(dp477 +g21 +g195 +sg23 +S'CG' +p478 +sg25 +S'32012992' +p479 +sg27 +g28 +sssg45 +(dp480 +g17 +S'NC_000006.12:g.32045216del' +p481 +sg19 +(dp482 +g21 +g195 +sg23 +S'CG' +p483 +sg25 +S'32045215' +p484 +sg27 +g28 +sssS'grch37' +p485 +(dp486 +g17 +S'NC_000006.11:g.32012993del' +p487 +sg19 +(dp488 +g21 +g207 +sg23 +S'CG' +p489 +sg25 +S'32012992' +p490 +sg27 +g28 +sssS'grch38' +p491 +(dp492 +g17 +S'NC_000006.12:g.32045216del' +p493 +sg19 +(dp494 +g21 +g207 +sg23 +S'CG' +p495 +sg25 +S'32045215' +p496 +sg27 +g28 +ssssg216 +(dp497 +g218 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061978.6' +p498 +sg220 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_019105.6' +p499 +sssS'metadata' +p500 +(dp501 +S'variantvalidator_hgvs_version' +p502 +S'1.1.3' +p503 +sS'uta_schema' +p504 +S'uta_20180821' +p505 +sS'seqrepo_db' +p506 +S'2018-08-21' +p507 +sS'variantvalidator_version' +p508 +S'v0.2' +p509 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant158.txt b/VariantValidator/testing/testOutputsMasterITS/variant158.txt new file mode 100644 index 00000000..5508fb91 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant158.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.(Gly197Cys)' +p20 +sS'slr' +p21 +S'NP_000079.2:p.(G197C)' +p22 +ssS'submitted_variant' +p23 +S'17-48275363-C-A' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000088.3:c.589G>T' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000017.10:g.48275363C>A' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr17' +p39 +sS'ref' +p40 +VC +p41 +sS'pos' +p42 +S'48275363' +p43 +sS'alt' +p44 +VA +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000017.11:g.50198002C>A' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'50198002' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000017.10:g.48275363C>A' +p53 +sg36 +(dp54 +g38 +S'17' +p55 +sg40 +g41 +sg42 +S'48275363' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000017.11:g.50198002C>A' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'50198002' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p67 +sssS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant159.txt b/VariantValidator/testing/testOutputsMasterITS/variant159.txt new file mode 100644 index 00000000..475bc1f7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant159.txt @@ -0,0 +1,172 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589-1G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.?' +p20 +sS'slr' +p21 +S'NP_000079.2:p.?' +p22 +ssS'submitted_variant' +p23 +S'17-48275364-C-A' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000017.10(NM_000088.3):c.589-1G>T' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000088.3:c.589-1G>T' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000017.10:g.48275364C>A' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr17' +p40 +sS'ref' +p41 +VC +p42 +sS'pos' +p43 +S'48275364' +p44 +sS'alt' +p45 +VA +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000017.11:g.50198003C>A' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'50198003' +p51 +sg45 +g46 +sssS'grch37' +p52 +(dp53 +g35 +S'NC_000017.10:g.48275364C>A' +p54 +sg37 +(dp55 +g39 +S'17' +p56 +sg41 +g42 +sg43 +S'48275364' +p57 +sg45 +g46 +sssS'grch38' +p58 +(dp59 +g35 +S'NC_000017.11:g.50198003C>A' +p60 +sg37 +(dp61 +g39 +g56 +sg41 +g42 +sg43 +S'50198003' +p62 +sg45 +g46 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p68 +sssS'metadata' +p69 +(dp70 +S'variantvalidator_hgvs_version' +p71 +S'1.1.3' +p72 +sS'uta_schema' +p73 +S'uta_20180821' +p74 +sS'seqrepo_db' +p75 +S'2018-08-21' +p76 +sS'variantvalidator_version' +p77 +S'v0.2' +p78 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant16.txt b/VariantValidator/testing/testOutputsMasterITS/variant16.txt new file mode 100644 index 00000000..eb14f0ff --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant16.txt @@ -0,0 +1,156 @@ +(dp0 +S'flag' +p1 +S'intergenic' +p2 +sS'Intergenic_Variant_1' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'No transcripts found that fully overlap the described variation in the genomic sequence' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +g6 +sS'gene_symbol' +p14 +g6 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +g6 +sS'slr' +p18 +g6 +ssS'submitted_variant' +p19 +S'NC_000017.10:g.48279242G>T' +p20 +sS'genome_context_intronic_sequence' +p21 +g6 +sS'hgvs_lrg_variant' +p22 +g6 +sS'hgvs_transcript_variant' +p23 +g6 +sS'hgvs_refseqgene_variant' +p24 +g6 +sS'primary_assembly_loci' +p25 +(dp26 +S'hg19' +p27 +(dp28 +S'hgvs_genomic_description' +p29 +VNC_000017.10:g.48279242G>T +p30 +sS'vcf' +p31 +(dp32 +S'chr' +p33 +S'chr17' +p34 +sS'ref' +p35 +S'G' +p36 +sS'pos' +p37 +S'48279242' +p38 +sS'alt' +p39 +S'T' +p40 +sssS'grch37' +p41 +(dp42 +g29 +VNC_000017.10:g.48279242G>T +p43 +sg31 +(dp44 +g33 +S'17' +p45 +sg35 +g36 +sg37 +g38 +sg39 +g40 +sssS'hg38' +p46 +(dp47 +g29 +VNC_000017.11:g.50201881G>T +p48 +sg31 +(dp49 +g33 +g34 +sg35 +g36 +sg37 +S'50201881' +p50 +sg39 +g40 +sssS'grch38' +p51 +(dp52 +g29 +VNC_000017.11:g.50201881G>T +p53 +sg31 +(dp54 +g33 +g45 +sg35 +g36 +sg37 +g50 +sg39 +g40 +ssssS'reference_sequence_records' +p55 +g6 +ssS'metadata' +p56 +(dp57 +S'variantvalidator_hgvs_version' +p58 +S'1.1.3' +p59 +sS'uta_schema' +p60 +S'uta_20180821' +p61 +sS'seqrepo_db' +p62 +S'2018-08-21' +p63 +sS'variantvalidator_version' +p64 +S'v0.2' +p65 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant160.txt b/VariantValidator/testing/testOutputsMasterITS/variant160.txt new file mode 100644 index 00000000..54995ddf --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant160.txt @@ -0,0 +1,179 @@ +(dp0 +S'NM_000088.3:c.591_593inv' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000017.10:g.48275359GGA>TCC automapped to NC_000017.10:g.48275359_48275361inv' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p13 +sS'gene_symbol' +p14 +S'COL1A1' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_000079.2:p.(Pro198Asp)' +p19 +sS'slr' +p20 +S'NP_000079.2:p.(P198D)' +p21 +ssS'submitted_variant' +p22 +S'17-48275359-GGA-TCC' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_000088.3:c.591_593inv' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000017.10:g.48275359_48275361inv' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr17' +p38 +sS'ref' +p39 +S'GGA' +p40 +sS'pos' +p41 +S'48275359' +p42 +sS'alt' +p43 +S'TCC' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000017.11:g.50197998_50198000inv' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'GGA' +p49 +sg41 +S'50197998' +p50 +sg43 +S'TCC' +p51 +sssS'grch37' +p52 +(dp53 +g33 +S'NC_000017.10:g.48275359_48275361inv' +p54 +sg35 +(dp55 +g37 +S'17' +p56 +sg39 +S'GGA' +p57 +sg41 +S'48275359' +p58 +sg43 +S'TCC' +p59 +sssS'grch38' +p60 +(dp61 +g33 +S'NC_000017.11:g.50197998_50198000inv' +p62 +sg35 +(dp63 +g37 +g56 +sg39 +S'GGA' +p64 +sg41 +S'50197998' +p65 +sg43 +S'TCC' +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'flag' +p73 +S'gene_variant' +p74 +sS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant161.txt b/VariantValidator/testing/testOutputsMasterITS/variant161.txt new file mode 100644 index 00000000..b707940c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant161.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000089.3:c.1035_1035+2del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000007.13:g.94039128CTTG>C automapped to NC_000007.13:g.94039133_94039135delTGT' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A2' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000080.2:p.(Val345del)' +p21 +sS'slr' +p22 +S'NP_000080.2:p.(V345del)' +p23 +ssS'submitted_variant' +p24 +S'7-94039128-CTTG-C' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000007.13(NM_000089.3):c.1035_1035+2del' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000089.3:c.1035_1035+2del' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000007.13:g.94039133_94039135del' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr7' +p41 +sS'ref' +p42 +S'CTTG' +p43 +sS'pos' +p44 +S'94039128' +p45 +sS'alt' +p46 +S'C' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000007.14:g.94409821_94409823del' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'CTTG' +p52 +sg44 +S'94409816' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000007.13:g.94039133_94039135del' +p56 +sg38 +(dp57 +g40 +S'7' +p58 +sg42 +S'CTTG' +p59 +sg44 +S'94039128' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000007.14:g.94409821_94409823del' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'CTTG' +p65 +sg44 +S'94409816' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant162.txt b/VariantValidator/testing/testOutputsMasterITS/variant162.txt new file mode 100644 index 00000000..bdf685d0 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant162.txt @@ -0,0 +1,547 @@ +(dp0 +S'NM_001162427.1:c.210+1615dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA +p13 +sS'gene_symbol' +p14 +S'TSC1' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001155899.1:p.?' +p19 +sS'slr' +p20 +S'NP_001155899.1:p.?' +p21 +ssS'submitted_variant' +p22 +S'9-135800972-AC-ACC' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000009.11(NM_001162427.1):c.210+1615dup' +p25 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_001162427.1:c.210+1615dup' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000009.11:g.135800973dup' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr9' +p39 +sS'ref' +p40 +S'C' +p41 +sS'pos' +p42 +S'135800973' +p43 +sS'alt' +p44 +S'CC' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000009.12:g.132925586dup' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'132925586' +p50 +sg44 +S'CC' +p51 +sssS'grch37' +p52 +(dp53 +g34 +S'NC_000009.11:g.135800973dup' +p54 +sg36 +(dp55 +g38 +S'9' +p56 +sg40 +g41 +sg42 +S'135800973' +p57 +sg44 +S'CC' +p58 +sssS'grch38' +p59 +(dp60 +g34 +S'NC_000009.12:g.132925586dup' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +g41 +sg42 +S'132925586' +p63 +sg44 +S'CC' +p64 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1' +p70 +sssS'NM_001162426.1:c.363+1dup' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' +p74 +aS'NM_001162426.1:c.363dup normalized to NM_001162426.1:c.363+1dup' +p75 +aS'RefSeqGene record not available' +p76 +asg9 +g4 +sg10 +(lp77 +sg12 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA +p78 +sg14 +S'TSC1' +p79 +sg16 +(dp80 +g18 +S'NP_001155898.1:p.(Met122AspfsTer4)' +p81 +sg20 +S'NP_001155898.1:p.(M122Dfs*4)' +p82 +ssg22 +g23 +sg24 +S'NC_000009.11(NM_001162426.1):c.363+1dup' +p83 +sg26 +g4 +sg27 +S'NM_001162426.1:c.363+1dup' +p84 +sg29 +g4 +sg30 +(dp85 +S'hg19' +p86 +(dp87 +g34 +S'NC_000009.11:g.135800973dup' +p88 +sg36 +(dp89 +g38 +g39 +sg40 +g41 +sg42 +S'135800973' +p90 +sg44 +S'CC' +p91 +sssg46 +(dp92 +g34 +S'NC_000009.12:g.132925586dup' +p93 +sg36 +(dp94 +g38 +g39 +sg40 +g41 +sg42 +S'132925586' +p95 +sg44 +S'CC' +p96 +sssS'grch37' +p97 +(dp98 +g34 +S'NC_000009.11:g.135800973dup' +p99 +sg36 +(dp100 +g38 +g56 +sg40 +g41 +sg42 +S'135800973' +p101 +sg44 +S'CC' +p102 +sssS'grch38' +p103 +(dp104 +g34 +S'NC_000009.12:g.132925586dup' +p105 +sg36 +(dp106 +g38 +g56 +sg40 +g41 +sg42 +S'132925586' +p107 +sg44 +S'CC' +p108 +ssssg65 +(dp109 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1' +p110 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1' +p111 +sssS'flag' +p112 +S'gene_variant' +p113 +sS'NM_001362177.1:c.-1+1dup' +p114 +(dp115 +g3 +g4 +sg5 +(lp116 +S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' +p117 +aS'RefSeqGene record not available' +p118 +asg9 +g4 +sg10 +(lp119 +sg12 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA +p120 +sg14 +S'TSC1' +p121 +sg16 +(dp122 +g18 +S'NP_001349106.1:p.?' +p123 +sg20 +S'NP_001349106.1:p.?' +p124 +ssg22 +g23 +sg24 +S'NC_000009.11(NM_001362177.1):c.-1+1dup' +p125 +sg26 +g4 +sg27 +S'NM_001362177.1:c.-1+1dup' +p126 +sg29 +g4 +sg30 +(dp127 +S'hg19' +p128 +(dp129 +g34 +S'NC_000009.11:g.135800973dup' +p130 +sg36 +(dp131 +g38 +g39 +sg40 +g41 +sg42 +S'135800973' +p132 +sg44 +S'CC' +p133 +sssg46 +(dp134 +g34 +S'NC_000009.12:g.132925586dup' +p135 +sg36 +(dp136 +g38 +g39 +sg40 +g41 +sg42 +S'132925586' +p137 +sg44 +S'CC' +p138 +sssS'grch37' +p139 +(dp140 +g34 +S'NC_000009.11:g.135800973dup' +p141 +sg36 +(dp142 +g38 +g56 +sg40 +g41 +sg42 +S'135800973' +p143 +sg44 +S'CC' +p144 +sssS'grch38' +p145 +(dp146 +g34 +S'NC_000009.12:g.132925586dup' +p147 +sg36 +(dp148 +g38 +g56 +sg40 +g41 +sg42 +S'132925586' +p149 +sg44 +S'CC' +p150 +ssssg65 +(dp151 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1' +p152 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1' +p153 +sssS'NM_000368.4:c.363+1dup' +p154 +(dp155 +g3 +g4 +sg5 +(lp156 +S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' +p157 +aS'NM_000368.4:c.363dup normalized to NM_000368.4:c.363+1dup' +p158 +aS'RefSeqGene record not available' +p159 +asg9 +g4 +sg10 +(lp160 +sg12 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA +p161 +sg14 +S'TSC1' +p162 +sg16 +(dp163 +g18 +S'NP_000359.1:p.(Met122AspfsTer4)' +p164 +sg20 +S'NP_000359.1:p.(M122Dfs*4)' +p165 +ssg22 +g23 +sg24 +S'NC_000009.11(NM_000368.4):c.363+1dup' +p166 +sg26 +g4 +sg27 +S'NM_000368.4:c.363+1dup' +p167 +sg29 +g4 +sg30 +(dp168 +S'hg19' +p169 +(dp170 +g34 +S'NC_000009.11:g.135800973dup' +p171 +sg36 +(dp172 +g38 +g39 +sg40 +g41 +sg42 +S'135800973' +p173 +sg44 +S'CC' +p174 +sssg46 +(dp175 +g34 +S'NC_000009.12:g.132925586dup' +p176 +sg36 +(dp177 +g38 +g39 +sg40 +g41 +sg42 +S'132925586' +p178 +sg44 +S'CC' +p179 +sssS'grch37' +p180 +(dp181 +g34 +S'NC_000009.11:g.135800973dup' +p182 +sg36 +(dp183 +g38 +g56 +sg40 +g41 +sg42 +S'135800973' +p184 +sg44 +S'CC' +p185 +sssS'grch38' +p186 +(dp187 +g34 +S'NC_000009.12:g.132925586dup' +p188 +sg36 +(dp189 +g38 +g56 +sg40 +g41 +sg42 +S'132925586' +p190 +sg44 +S'CC' +p191 +ssssg65 +(dp192 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1' +p193 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4' +p194 +sssS'metadata' +p195 +(dp196 +S'variantvalidator_hgvs_version' +p197 +S'1.1.3' +p198 +sS'uta_schema' +p199 +S'uta_20180821' +p200 +sS'seqrepo_db' +p201 +S'2018-08-21' +p202 +sS'variantvalidator_version' +p203 +S'v0.2' +p204 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant163.txt b/VariantValidator/testing/testOutputsMasterITS/variant163.txt new file mode 100644 index 00000000..8b0df0fb --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant163.txt @@ -0,0 +1,402 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_001243246.1:c.2073G>A' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 3, mRNA +p14 +sS'gene_symbol' +p15 +S'P3H1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_001230175.1:p.(Ala691=)' +p20 +sS'slr' +p21 +S'NP_001230175.1:p.(A691=)' +p22 +ssS'submitted_variant' +p23 +S'1-43212925-C-T' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_001243246.1:c.2073G>A' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000001.10:g.43212925C>T' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr1' +p39 +sS'ref' +p40 +VC +p41 +sS'pos' +p42 +S'43212925' +p43 +sS'alt' +p44 +VT +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000001.11:g.42747254C>T' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'42747254' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000001.10:g.43212925C>T' +p53 +sg36 +(dp54 +g38 +S'1' +p55 +sg40 +g41 +sg42 +S'43212925' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000001.11:g.42747254C>T' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'42747254' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230175.1' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243246.1' +p67 +sssS'NM_001146289.1:c.2073G>A' +p68 +(dp69 +g5 +g6 +sg7 +(lp70 +S'RefSeqGene record not available' +p71 +asg10 +g6 +sg11 +(lp72 +sg13 +VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 2, mRNA +p73 +sg15 +S'P3H1' +p74 +sg17 +(dp75 +g19 +S'NP_001139761.1:p.(Ala691=)' +p76 +sg21 +S'NP_001139761.1:p.(A691=)' +p77 +ssg23 +g24 +sg25 +g6 +sg26 +g6 +sg27 +S'NM_001146289.1:c.2073G>A' +p78 +sg29 +g6 +sg30 +(dp79 +S'hg19' +p80 +(dp81 +g34 +S'NC_000001.10:g.43212925C>T' +p82 +sg36 +(dp83 +g38 +g39 +sg40 +g41 +sg42 +S'43212925' +p84 +sg44 +g45 +sssg46 +(dp85 +g34 +S'NC_000001.11:g.42747254C>T' +p86 +sg36 +(dp87 +g38 +g39 +sg40 +g41 +sg42 +S'42747254' +p88 +sg44 +g45 +sssS'grch37' +p89 +(dp90 +g34 +S'NC_000001.10:g.43212925C>T' +p91 +sg36 +(dp92 +g38 +g55 +sg40 +g41 +sg42 +S'43212925' +p93 +sg44 +g45 +sssS'grch38' +p94 +(dp95 +g34 +S'NC_000001.11:g.42747254C>T' +p96 +sg36 +(dp97 +g38 +g55 +sg40 +g41 +sg42 +S'42747254' +p98 +sg44 +g45 +ssssg62 +(dp99 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001139761.1' +p100 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001146289.1' +p101 +sssS'NM_022356.3:c.2055+18G>A' +p102 +(dp103 +g5 +g6 +sg7 +(lp104 +S'RefSeqGene record not available' +p105 +asg10 +g6 +sg11 +(lp106 +sg13 +VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA +p107 +sg15 +S'P3H1' +p108 +sg17 +(dp109 +g19 +S'NP_071751.3:p.?' +p110 +sg21 +S'NP_071751.3:p.?' +p111 +ssg23 +g24 +sg25 +S'NC_000001.10(NM_022356.3):c.2055+18G>A' +p112 +sg26 +g6 +sg27 +S'NM_022356.3:c.2055+18G>A' +p113 +sg29 +g6 +sg30 +(dp114 +S'hg19' +p115 +(dp116 +g34 +S'NC_000001.10:g.43212925C>T' +p117 +sg36 +(dp118 +g38 +g39 +sg40 +g41 +sg42 +S'43212925' +p119 +sg44 +g45 +sssg46 +(dp120 +g34 +S'NC_000001.11:g.42747254C>T' +p121 +sg36 +(dp122 +g38 +g39 +sg40 +g41 +sg42 +S'42747254' +p123 +sg44 +g45 +sssS'grch37' +p124 +(dp125 +g34 +S'NC_000001.10:g.43212925C>T' +p126 +sg36 +(dp127 +g38 +g55 +sg40 +g41 +sg42 +S'43212925' +p128 +sg44 +g45 +sssS'grch38' +p129 +(dp130 +g34 +S'NC_000001.11:g.42747254C>T' +p131 +sg36 +(dp132 +g38 +g55 +sg40 +g41 +sg42 +S'42747254' +p133 +sg44 +g45 +ssssg62 +(dp134 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3' +p135 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3' +p136 +sssS'metadata' +p137 +(dp138 +S'variantvalidator_hgvs_version' +p139 +S'1.1.3' +p140 +sS'uta_schema' +p141 +S'uta_20180821' +p142 +sS'seqrepo_db' +p143 +S'2018-08-21' +p144 +sS'variantvalidator_version' +p145 +S'v0.2' +p146 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant164.txt b/VariantValidator/testing/testOutputsMasterITS/variant164.txt new file mode 100644 index 00000000..668cb668 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant164.txt @@ -0,0 +1,179 @@ +(dp0 +S'NM_001194958.2:c.20C>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +aS'NM_001194958.2:c.20C>A cannot be mapped directly to genome build GRCh37' +p8 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g4 +sS'alt_genomic_loci' +p11 +(lp12 +(dp13 +S'grch37' +p14 +(dp15 +S'hgvs_genomic_description' +p16 +S'NW_003315950.2:g.355171C>A' +p17 +sS'vcf' +p18 +(dp19 +S'chr' +p20 +S'HG987_PATCH' +p21 +sS'ref' +p22 +S'C' +p23 +sS'pos' +p24 +S'355171' +p25 +sS'alt' +p26 +S'A' +p27 +sssa(dp28 +S'hg19' +p29 +(dp30 +g16 +S'NW_003315950.2:g.355171C>A' +p31 +sg18 +(dp32 +g20 +S'NW_003315950.2' +p33 +sg22 +g23 +sg24 +S'355171' +p34 +sg26 +g27 +sssasS'transcript_description' +p35 +VHomo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA +p36 +sS'gene_symbol' +p37 +S'KCNJ18' +p38 +sS'hgvs_predicted_protein_consequence' +p39 +(dp40 +S'tlr' +p41 +S'NP_001181887.2:p.(Ala7Asp)' +p42 +sS'slr' +p43 +S'NP_001181887.2:p.(A7D)' +p44 +ssS'submitted_variant' +p45 +S'HG987_PATCH-355171-C-A' +p46 +sS'genome_context_intronic_sequence' +p47 +g4 +sS'hgvs_lrg_variant' +p48 +g4 +sS'hgvs_transcript_variant' +p49 +S'NM_001194958.2:c.20C>A' +p50 +sS'hgvs_refseqgene_variant' +p51 +g4 +sS'primary_assembly_loci' +p52 +(dp53 +S'grch38' +p54 +(dp55 +g16 +S'NC_000017.11:g.21702806C>A' +p56 +sg18 +(dp57 +g20 +S'17' +p58 +sg22 +g23 +sg24 +S'21702806' +p59 +sg26 +g27 +sssS'hg38' +p60 +(dp61 +g16 +S'NC_000017.11:g.21702806C>A' +p62 +sg18 +(dp63 +g20 +S'chr17' +p64 +sg22 +g23 +sg24 +S'21702806' +p65 +sg26 +g27 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2' +p71 +sssS'flag' +p72 +S'gene_variant' +p73 +sS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant165.txt b/VariantValidator/testing/testOutputsMasterITS/variant165.txt new file mode 100644 index 00000000..7594875c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant165.txt @@ -0,0 +1,600 @@ +(dp0 +S'NM_000022.3:c.534A>G' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens adenosine deaminase (ADA), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'ADA' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_000013.2:p.(Val178=)' +p18 +sS'slr' +p19 +S'NP_000013.2:p.(V178=)' +p20 +ssS'submitted_variant' +p21 +S'20-43252915-T-C' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_000022.3:c.534A>G' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000020.10:g.43252915T>C' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr20' +p37 +sS'ref' +p38 +VT +p39 +sS'pos' +p40 +S'43252915' +p41 +sS'alt' +p42 +VC +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000020.11:g.44624274T>C' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'44624274' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000020.10:g.43252915T>C' +p51 +sg34 +(dp52 +g36 +S'20' +p53 +sg38 +g39 +sg40 +S'43252915' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000020.11:g.44624274T>C' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'44624274' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.3' +p65 +sssS'NM_001322051.1:c.534A>G' +p66 +(dp67 +g3 +g4 +sg5 +(lp68 +S'RefSeqGene record not available' +p69 +asg8 +g4 +sg9 +(lp70 +sg11 +VHomo sapiens adenosine deaminase (ADA), transcript variant 3, mRNA +p71 +sg13 +S'ADA' +p72 +sg15 +(dp73 +g17 +S'NP_001308980.1:p.(Val178=)' +p74 +sg19 +S'NP_001308980.1:p.(V178=)' +p75 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322051.1:c.534A>G' +p76 +sg27 +g4 +sg28 +(dp77 +S'hg19' +p78 +(dp79 +g32 +S'NC_000020.10:g.43252915T>C' +p80 +sg34 +(dp81 +g36 +g37 +sg38 +g39 +sg40 +S'43252915' +p82 +sg42 +g43 +sssg44 +(dp83 +g32 +S'NC_000020.11:g.44624274T>C' +p84 +sg34 +(dp85 +g36 +g37 +sg38 +g39 +sg40 +S'44624274' +p86 +sg42 +g43 +sssS'grch37' +p87 +(dp88 +g32 +S'NC_000020.10:g.43252915T>C' +p89 +sg34 +(dp90 +g36 +g53 +sg38 +g39 +sg40 +S'43252915' +p91 +sg42 +g43 +sssS'grch38' +p92 +(dp93 +g32 +S'NC_000020.11:g.44624274T>C' +p94 +sg34 +(dp95 +g36 +g53 +sg38 +g39 +sg40 +S'44624274' +p96 +sg42 +g43 +ssssg60 +(dp97 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308980.1' +p98 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322051.1' +p99 +sssS'NM_000022.2:c.534A>G' +p100 +(dp101 +g3 +g4 +sg5 +(lp102 +S'A more recent version of the selected reference sequence NM_000022.2 is available (NM_000022.3)' +p103 +aS'NM_000022.3:c.534A>G MUST be fully validated prior to use in reports' +p104 +aS'select_variants=NM_000022.3:c.534A>G' +p105 +aS'RefSeqGene record not available' +p106 +asg8 +g4 +sg9 +(lp107 +sg11 +VHomo sapiens adenosine deaminase (ADA), mRNA +p108 +sg13 +S'ADA' +p109 +sg15 +(dp110 +g17 +S'NP_000013.2:p.(Val178=)' +p111 +sg19 +S'NP_000013.2:p.(V178=)' +p112 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_000022.2:c.534A>G' +p113 +sg27 +g4 +sg28 +(dp114 +S'hg19' +p115 +(dp116 +g32 +S'NC_000020.10:g.43252915T>C' +p117 +sg34 +(dp118 +g36 +g37 +sg38 +g39 +sg40 +S'43252915' +p119 +sg42 +g43 +sssS'grch37' +p120 +(dp121 +g32 +S'NC_000020.10:g.43252915T>C' +p122 +sg34 +(dp123 +g36 +g53 +sg38 +g39 +sg40 +S'43252915' +p124 +sg42 +g43 +ssssg60 +(dp125 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2' +p126 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2' +p127 +sssS'flag' +p128 +S'gene_variant' +p129 +sS'NM_001322050.1:c.129A>G' +p130 +(dp131 +g3 +g4 +sg5 +(lp132 +S'RefSeqGene record not available' +p133 +asg8 +g4 +sg9 +(lp134 +sg11 +VHomo sapiens adenosine deaminase (ADA), transcript variant 2, mRNA +p135 +sg13 +S'ADA' +p136 +sg15 +(dp137 +g17 +S'NP_001308979.1:p.(Val43=)' +p138 +sg19 +S'NP_001308979.1:p.(V43=)' +p139 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322050.1:c.129A>G' +p140 +sg27 +g4 +sg28 +(dp141 +S'hg19' +p142 +(dp143 +g32 +S'NC_000020.10:g.43252915T>C' +p144 +sg34 +(dp145 +g36 +g37 +sg38 +g39 +sg40 +S'43252915' +p146 +sg42 +g43 +sssg44 +(dp147 +g32 +S'NC_000020.11:g.44624274T>C' +p148 +sg34 +(dp149 +g36 +g37 +sg38 +g39 +sg40 +S'44624274' +p150 +sg42 +g43 +sssS'grch37' +p151 +(dp152 +g32 +S'NC_000020.10:g.43252915T>C' +p153 +sg34 +(dp154 +g36 +g53 +sg38 +g39 +sg40 +S'43252915' +p155 +sg42 +g43 +sssS'grch38' +p156 +(dp157 +g32 +S'NC_000020.11:g.44624274T>C' +p158 +sg34 +(dp159 +g36 +g53 +sg38 +g39 +sg40 +S'44624274' +p160 +sg42 +g43 +ssssg60 +(dp161 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308979.1' +p162 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322050.1' +p163 +sssS'NR_136160.1:n.685A>G' +p164 +(dp165 +g3 +g4 +sg5 +(lp166 +S'RefSeqGene record not available' +p167 +asg8 +g4 +sg9 +(lp168 +sg11 +VHomo sapiens adenosine deaminase (ADA), transcript variant 4, non-coding RNA +p169 +sg13 +S'ADA' +p170 +sg15 +(dp171 +g17 +S'Non-coding :n.' +p172 +sg19 +g172 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NR_136160.1:n.685A>G' +p173 +sg27 +g4 +sg28 +(dp174 +S'hg19' +p175 +(dp176 +g32 +S'NC_000020.10:g.43252915T>C' +p177 +sg34 +(dp178 +g36 +g37 +sg38 +g39 +sg40 +S'43252915' +p179 +sg42 +g43 +sssg44 +(dp180 +g32 +S'NC_000020.11:g.44624274T>C' +p181 +sg34 +(dp182 +g36 +g37 +sg38 +g39 +sg40 +S'44624274' +p183 +sg42 +g43 +sssS'grch37' +p184 +(dp185 +g32 +S'NC_000020.10:g.43252915T>C' +p186 +sg34 +(dp187 +g36 +g53 +sg38 +g39 +sg40 +S'43252915' +p188 +sg42 +g43 +sssS'grch38' +p189 +(dp190 +g32 +S'NC_000020.11:g.44624274T>C' +p191 +sg34 +(dp192 +g36 +g53 +sg38 +g39 +sg40 +S'44624274' +p193 +sg42 +g43 +ssssg60 +(dp194 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_136160.1' +p195 +sssS'metadata' +p196 +(dp197 +S'variantvalidator_hgvs_version' +p198 +S'1.1.3' +p199 +sS'uta_schema' +p200 +S'uta_20180821' +p201 +sS'seqrepo_db' +p202 +S'2018-08-21' +p203 +sS'variantvalidator_version' +p204 +S'v0.2' +p205 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant166.txt b/VariantValidator/testing/testOutputsMasterITS/variant166.txt new file mode 100644 index 00000000..e30960a5 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant166.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_206933.2:c.6317C>G' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens usherin (USH2A), transcript variant 2, mRNA +p14 +sS'gene_symbol' +p15 +S'USH2A' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_996816.2:p.(Thr2106Arg)' +p20 +sS'slr' +p21 +S'NP_996816.2:p.(T2106R)' +p22 +ssS'submitted_variant' +p23 +S'1-216219781-A-C' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_206933.2:c.6317C>G' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000001.10:g.216219781A>C' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr1' +p39 +sS'ref' +p40 +S'A' +p41 +sS'pos' +p42 +S'216219781' +p43 +sS'alt' +p44 +VC +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000001.11:g.216046439A>C' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'216046439' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000001.10:g.216219781A>C' +p53 +sg36 +(dp54 +g38 +S'1' +p55 +sg40 +g41 +sg42 +S'216219781' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000001.11:g.216046439A>C' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'216046439' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2' +p67 +sssS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant167.txt b/VariantValidator/testing/testOutputsMasterITS/variant167.txt new file mode 100644 index 00000000..b1fff138 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant167.txt @@ -0,0 +1,1405 @@ +(dp0 +S'NM_005896.3:c.394C>G' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Multiple ALT sequences detected' +p7 +aS'auto-submitting all possible combinations' +p8 +aS'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g4 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA +p14 +sS'gene_symbol' +p15 +S'IDH1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_005887.2:p.(Arg132Gly)' +p20 +sS'slr' +p21 +S'NP_005887.2:p.(R132G)' +p22 +ssS'submitted_variant' +p23 +S'2-209113113-G-A,C,T' +p24 +sS'genome_context_intronic_sequence' +p25 +g4 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_005896.3:c.394C>G' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000002.11:g.209113113G>C' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr2' +p39 +sS'ref' +p40 +VG +p41 +sS'pos' +p42 +S'209113113' +p43 +sS'alt' +p44 +VC +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000002.12:g.208248389G>C' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'208248389' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000002.11:g.209113113G>C' +p53 +sg36 +(dp54 +g38 +S'2' +p55 +sg40 +g41 +sg42 +S'209113113' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000002.12:g.208248389G>C' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'208248389' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3' +p67 +sssS'NM_001282387.1:c.394C>G' +p68 +(dp69 +g3 +g4 +sg5 +(lp70 +S'Multiple ALT sequences detected' +p71 +aS'auto-submitting all possible combinations' +p72 +aS'RefSeqGene record not available' +p73 +asg10 +g4 +sg11 +(lp74 +sg13 +VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA +p75 +sg15 +S'IDH1' +p76 +sg17 +(dp77 +g19 +S'NP_001269316.1:p.(Arg132Gly)' +p78 +sg21 +S'NP_001269316.1:p.(R132G)' +p79 +ssg23 +g24 +sg25 +g4 +sg26 +g4 +sg27 +S'NM_001282387.1:c.394C>G' +p80 +sg29 +g4 +sg30 +(dp81 +S'hg19' +p82 +(dp83 +g34 +S'NC_000002.11:g.209113113G>C' +p84 +sg36 +(dp85 +g38 +g39 +sg40 +g41 +sg42 +S'209113113' +p86 +sg44 +g45 +sssg46 +(dp87 +g34 +S'NC_000002.12:g.208248389G>C' +p88 +sg36 +(dp89 +g38 +g39 +sg40 +g41 +sg42 +S'208248389' +p90 +sg44 +g45 +sssS'grch37' +p91 +(dp92 +g34 +S'NC_000002.11:g.209113113G>C' +p93 +sg36 +(dp94 +g38 +g55 +sg40 +g41 +sg42 +S'209113113' +p95 +sg44 +g45 +sssS'grch38' +p96 +(dp97 +g34 +S'NC_000002.12:g.208248389G>C' +p98 +sg36 +(dp99 +g38 +g55 +sg40 +g41 +sg42 +S'208248389' +p100 +sg44 +g45 +ssssg62 +(dp101 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1' +p102 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1' +p103 +sssS'NM_001282387.1:c.394C>A' +p104 +(dp105 +g3 +g4 +sg5 +(lp106 +S'Multiple ALT sequences detected' +p107 +aS'auto-submitting all possible combinations' +p108 +aS'RefSeqGene record not available' +p109 +asg10 +g4 +sg11 +(lp110 +sg13 +VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA +p111 +sg15 +S'IDH1' +p112 +sg17 +(dp113 +g19 +S'NP_001269316.1:p.(Arg132Ser)' +p114 +sg21 +S'NP_001269316.1:p.(R132S)' +p115 +ssg23 +g24 +sg25 +g4 +sg26 +g4 +sg27 +S'NM_001282387.1:c.394C>A' +p116 +sg29 +g4 +sg30 +(dp117 +S'hg19' +p118 +(dp119 +g34 +S'NC_000002.11:g.209113113G>T' +p120 +sg36 +(dp121 +g38 +g39 +sg40 +g41 +sg42 +S'209113113' +p122 +sg44 +VT +p123 +sssg46 +(dp124 +g34 +S'NC_000002.12:g.208248389G>T' +p125 +sg36 +(dp126 +g38 +g39 +sg40 +g41 +sg42 +S'208248389' +p127 +sg44 +g123 +sssS'grch37' +p128 +(dp129 +g34 +S'NC_000002.11:g.209113113G>T' +p130 +sg36 +(dp131 +g38 +g55 +sg40 +g41 +sg42 +S'209113113' +p132 +sg44 +g123 +sssS'grch38' +p133 +(dp134 +g34 +S'NC_000002.12:g.208248389G>T' +p135 +sg36 +(dp136 +g38 +g55 +sg40 +g41 +sg42 +S'208248389' +p137 +sg44 +g123 +ssssg62 +(dp138 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1' +p139 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1' +p140 +sssS'NM_005896.3:c.394C>A' +p141 +(dp142 +g3 +g4 +sg5 +(lp143 +S'Multiple ALT sequences detected' +p144 +aS'auto-submitting all possible combinations' +p145 +aS'RefSeqGene record not available' +p146 +asg10 +g4 +sg11 +(lp147 +sg13 +VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA +p148 +sg15 +S'IDH1' +p149 +sg17 +(dp150 +g19 +S'NP_005887.2:p.(Arg132Ser)' +p151 +sg21 +S'NP_005887.2:p.(R132S)' +p152 +ssg23 +g24 +sg25 +g4 +sg26 +g4 +sg27 +S'NM_005896.3:c.394C>A' +p153 +sg29 +g4 +sg30 +(dp154 +S'hg19' +p155 +(dp156 +g34 +S'NC_000002.11:g.209113113G>T' +p157 +sg36 +(dp158 +g38 +g39 +sg40 +g41 +sg42 +S'209113113' +p159 +sg44 +g123 +sssg46 +(dp160 +g34 +S'NC_000002.12:g.208248389G>T' +p161 +sg36 +(dp162 +g38 +g39 +sg40 +g41 +sg42 +S'208248389' +p163 +sg44 +g123 +sssS'grch37' +p164 +(dp165 +g34 +S'NC_000002.11:g.209113113G>T' +p166 +sg36 +(dp167 +g38 +g55 +sg40 +g41 +sg42 +S'209113113' +p168 +sg44 +g123 +sssS'grch38' +p169 +(dp170 +g34 +S'NC_000002.12:g.208248389G>T' +p171 +sg36 +(dp172 +g38 +g55 +sg40 +g41 +sg42 +S'208248389' +p173 +sg44 +g123 +ssssg62 +(dp174 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' +p175 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3' +p176 +sssS'NM_001282386.1:c.394C>T' +p177 +(dp178 +g3 +g4 +sg5 +(lp179 +S'Multiple ALT sequences detected' +p180 +aS'auto-submitting all possible combinations' +p181 +aS'RefSeqGene record not available' +p182 +asg10 +g4 +sg11 +(lp183 +sg13 +VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA +p184 +sg15 +S'IDH1' +p185 +sg17 +(dp186 +g19 +S'NP_001269315.1:p.(Arg132Cys)' +p187 +sg21 +S'NP_001269315.1:p.(R132C)' +p188 +ssg23 +g24 +sg25 +g4 +sg26 +g4 +sg27 +S'NM_001282386.1:c.394C>T' +p189 +sg29 +g4 +sg30 +(dp190 +S'hg19' +p191 +(dp192 +g34 +S'NC_000002.11:g.209113113G>A' +p193 +sg36 +(dp194 +g38 +g39 +sg40 +g41 +sg42 +S'209113113' +p195 +sg44 +VA +p196 +sssg46 +(dp197 +g34 +S'NC_000002.12:g.208248389G>A' +p198 +sg36 +(dp199 +g38 +g39 +sg40 +g41 +sg42 +S'208248389' +p200 +sg44 +g196 +sssS'grch37' +p201 +(dp202 +g34 +S'NC_000002.11:g.209113113G>A' +p203 +sg36 +(dp204 +g38 +g55 +sg40 +g41 +sg42 +S'209113113' +p205 +sg44 +g196 +sssS'grch38' +p206 +(dp207 +g34 +S'NC_000002.12:g.208248389G>A' +p208 +sg36 +(dp209 +g38 +g55 +sg40 +g41 +sg42 +S'208248389' +p210 +sg44 +g196 +ssssg62 +(dp211 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1' +p212 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1' +p213 +sssS'NM_005896.2:c.394C>A' +p214 +(dp215 +g3 +g4 +sg5 +(lp216 +S'Multiple ALT sequences detected' +p217 +aS'auto-submitting all possible combinations' +p218 +aS'A more recent version of the selected reference sequence NM_005896.2 is available (NM_005896.3)' +p219 +aS'NM_005896.3:c.394C>A MUST be fully validated prior to use in reports' +p220 +aS'select_variants=NM_005896.3:c.394C>A' +p221 +aS'RefSeqGene record not available' +p222 +asg10 +g4 +sg11 +(lp223 +sg13 +VHomo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA +p224 +sg15 +S'IDH1' +p225 +sg17 +(dp226 +g19 +S'NP_005887.2:p.(Arg132Ser)' +p227 +sg21 +S'NP_005887.2:p.(R132S)' +p228 +ssg23 +g24 +sg25 +g4 +sg26 +g4 +sg27 +S'NM_005896.2:c.394C>A' +p229 +sg29 +g4 +sg30 +(dp230 +S'hg19' +p231 +(dp232 +g34 +S'NC_000002.11:g.209113113G>T' +p233 +sg36 +(dp234 +g38 +g39 +sg40 +g41 +sg42 +S'209113113' +p235 +sg44 +g123 +sssS'grch37' +p236 +(dp237 +g34 +S'NC_000002.11:g.209113113G>T' +p238 +sg36 +(dp239 +g38 +g55 +sg40 +g41 +sg42 +S'209113113' +p240 +sg44 +g123 +ssssg62 +(dp241 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' +p242 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2' +p243 +sssS'NM_005896.2:c.394C>G' +p244 +(dp245 +g3 +g4 +sg5 +(lp246 +S'Multiple ALT sequences detected' +p247 +aS'auto-submitting all possible combinations' +p248 +aS'A more recent version of the selected reference sequence NM_005896.2 is available (NM_005896.3)' +p249 +aS'NM_005896.3:c.394C>G MUST be fully validated prior to use in reports' +p250 +aS'select_variants=NM_005896.3:c.394C>G' +p251 +aS'RefSeqGene record not available' +p252 +asg10 +g4 +sg11 +(lp253 +sg13 +VHomo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA +p254 +sg15 +S'IDH1' +p255 +sg17 +(dp256 +g19 +S'NP_005887.2:p.(Arg132Gly)' +p257 +sg21 +S'NP_005887.2:p.(R132G)' +p258 +ssg23 +g24 +sg25 +g4 +sg26 +g4 +sg27 +S'NM_005896.2:c.394C>G' +p259 +sg29 +g4 +sg30 +(dp260 +S'hg19' +p261 +(dp262 +g34 +S'NC_000002.11:g.209113113G>C' +p263 +sg36 +(dp264 +g38 +g39 +sg40 +g41 +sg42 +S'209113113' +p265 +sg44 +g45 +sssS'grch37' +p266 +(dp267 +g34 +S'NC_000002.11:g.209113113G>C' +p268 +sg36 +(dp269 +g38 +g55 +sg40 +g41 +sg42 +S'209113113' +p270 +sg44 +g45 +ssssg62 +(dp271 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' +p272 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2' +p273 +sssS'flag' +p274 +S'gene_variant' +p275 +sS'NM_005896.3:c.394C>T' +p276 +(dp277 +g3 +g4 +sg5 +(lp278 +S'Multiple ALT sequences detected' +p279 +aS'auto-submitting all possible combinations' +p280 +aS'RefSeqGene record not available' +p281 +asg10 +g4 +sg11 +(lp282 +sg13 +VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA +p283 +sg15 +S'IDH1' +p284 +sg17 +(dp285 +g19 +S'NP_005887.2:p.(Arg132Cys)' +p286 +sg21 +S'NP_005887.2:p.(R132C)' +p287 +ssg23 +g24 +sg25 +g4 +sg26 +g4 +sg27 +S'NM_005896.3:c.394C>T' +p288 +sg29 +g4 +sg30 +(dp289 +S'hg19' +p290 +(dp291 +g34 +S'NC_000002.11:g.209113113G>A' +p292 +sg36 +(dp293 +g38 +g39 +sg40 +g41 +sg42 +S'209113113' +p294 +sg44 +g196 +sssg46 +(dp295 +g34 +S'NC_000002.12:g.208248389G>A' +p296 +sg36 +(dp297 +g38 +g39 +sg40 +g41 +sg42 +S'208248389' +p298 +sg44 +g196 +sssS'grch37' +p299 +(dp300 +g34 +S'NC_000002.11:g.209113113G>A' +p301 +sg36 +(dp302 +g38 +g55 +sg40 +g41 +sg42 +S'209113113' +p303 +sg44 +g196 +sssS'grch38' +p304 +(dp305 +g34 +S'NC_000002.12:g.208248389G>A' +p306 +sg36 +(dp307 +g38 +g55 +sg40 +g41 +sg42 +S'208248389' +p308 +sg44 +g196 +ssssg62 +(dp309 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' +p310 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3' +p311 +sssS'NM_001282387.1:c.394C>T' +p312 +(dp313 +g3 +g4 +sg5 +(lp314 +S'Multiple ALT sequences detected' +p315 +aS'auto-submitting all possible combinations' +p316 +aS'RefSeqGene record not available' +p317 +asg10 +g4 +sg11 +(lp318 +sg13 +VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA +p319 +sg15 +S'IDH1' +p320 +sg17 +(dp321 +g19 +S'NP_001269316.1:p.(Arg132Cys)' +p322 +sg21 +S'NP_001269316.1:p.(R132C)' +p323 +ssg23 +g24 +sg25 +g4 +sg26 +g4 +sg27 +S'NM_001282387.1:c.394C>T' +p324 +sg29 +g4 +sg30 +(dp325 +S'hg19' +p326 +(dp327 +g34 +S'NC_000002.11:g.209113113G>A' +p328 +sg36 +(dp329 +g38 +g39 +sg40 +g41 +sg42 +S'209113113' +p330 +sg44 +g196 +sssg46 +(dp331 +g34 +S'NC_000002.12:g.208248389G>A' +p332 +sg36 +(dp333 +g38 +g39 +sg40 +g41 +sg42 +S'208248389' +p334 +sg44 +g196 +sssS'grch37' +p335 +(dp336 +g34 +S'NC_000002.11:g.209113113G>A' +p337 +sg36 +(dp338 +g38 +g55 +sg40 +g41 +sg42 +S'209113113' +p339 +sg44 +g196 +sssS'grch38' +p340 +(dp341 +g34 +S'NC_000002.12:g.208248389G>A' +p342 +sg36 +(dp343 +g38 +g55 +sg40 +g41 +sg42 +S'208248389' +p344 +sg44 +g196 +ssssg62 +(dp345 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1' +p346 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1' +p347 +sssS'NM_001282386.1:c.394C>G' +p348 +(dp349 +g3 +g4 +sg5 +(lp350 +S'Multiple ALT sequences detected' +p351 +aS'auto-submitting all possible combinations' +p352 +aS'RefSeqGene record not available' +p353 +asg10 +g4 +sg11 +(lp354 +sg13 +VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA +p355 +sg15 +S'IDH1' +p356 +sg17 +(dp357 +g19 +S'NP_001269315.1:p.(Arg132Gly)' +p358 +sg21 +S'NP_001269315.1:p.(R132G)' +p359 +ssg23 +g24 +sg25 +g4 +sg26 +g4 +sg27 +S'NM_001282386.1:c.394C>G' +p360 +sg29 +g4 +sg30 +(dp361 +S'hg19' +p362 +(dp363 +g34 +S'NC_000002.11:g.209113113G>C' +p364 +sg36 +(dp365 +g38 +g39 +sg40 +g41 +sg42 +S'209113113' +p366 +sg44 +g45 +sssg46 +(dp367 +g34 +S'NC_000002.12:g.208248389G>C' +p368 +sg36 +(dp369 +g38 +g39 +sg40 +g41 +sg42 +S'208248389' +p370 +sg44 +g45 +sssS'grch37' +p371 +(dp372 +g34 +S'NC_000002.11:g.209113113G>C' +p373 +sg36 +(dp374 +g38 +g55 +sg40 +g41 +sg42 +S'209113113' +p375 +sg44 +g45 +sssS'grch38' +p376 +(dp377 +g34 +S'NC_000002.12:g.208248389G>C' +p378 +sg36 +(dp379 +g38 +g55 +sg40 +g41 +sg42 +S'208248389' +p380 +sg44 +g45 +ssssg62 +(dp381 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1' +p382 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1' +p383 +sssS'NM_005896.2:c.394C>T' +p384 +(dp385 +g3 +g4 +sg5 +(lp386 +S'Multiple ALT sequences detected' +p387 +aS'auto-submitting all possible combinations' +p388 +aS'A more recent version of the selected reference sequence NM_005896.2 is available (NM_005896.3)' +p389 +aS'NM_005896.3:c.394C>T MUST be fully validated prior to use in reports' +p390 +aS'select_variants=NM_005896.3:c.394C>T' +p391 +aS'RefSeqGene record not available' +p392 +asg10 +g4 +sg11 +(lp393 +sg13 +VHomo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA +p394 +sg15 +S'IDH1' +p395 +sg17 +(dp396 +g19 +S'NP_005887.2:p.(Arg132Cys)' +p397 +sg21 +S'NP_005887.2:p.(R132C)' +p398 +ssg23 +g24 +sg25 +g4 +sg26 +g4 +sg27 +S'NM_005896.2:c.394C>T' +p399 +sg29 +g4 +sg30 +(dp400 +S'hg19' +p401 +(dp402 +g34 +S'NC_000002.11:g.209113113G>A' +p403 +sg36 +(dp404 +g38 +g39 +sg40 +g41 +sg42 +S'209113113' +p405 +sg44 +g196 +sssS'grch37' +p406 +(dp407 +g34 +S'NC_000002.11:g.209113113G>A' +p408 +sg36 +(dp409 +g38 +g55 +sg40 +g41 +sg42 +S'209113113' +p410 +sg44 +g196 +ssssg62 +(dp411 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' +p412 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2' +p413 +sssS'NM_001282386.1:c.394C>A' +p414 +(dp415 +g3 +g4 +sg5 +(lp416 +S'Multiple ALT sequences detected' +p417 +aS'auto-submitting all possible combinations' +p418 +aS'RefSeqGene record not available' +p419 +asg10 +g4 +sg11 +(lp420 +sg13 +VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA +p421 +sg15 +S'IDH1' +p422 +sg17 +(dp423 +g19 +S'NP_001269315.1:p.(Arg132Ser)' +p424 +sg21 +S'NP_001269315.1:p.(R132S)' +p425 +ssg23 +g24 +sg25 +g4 +sg26 +g4 +sg27 +S'NM_001282386.1:c.394C>A' +p426 +sg29 +g4 +sg30 +(dp427 +S'hg19' +p428 +(dp429 +g34 +S'NC_000002.11:g.209113113G>T' +p430 +sg36 +(dp431 +g38 +g39 +sg40 +g41 +sg42 +S'209113113' +p432 +sg44 +g123 +sssg46 +(dp433 +g34 +S'NC_000002.12:g.208248389G>T' +p434 +sg36 +(dp435 +g38 +g39 +sg40 +g41 +sg42 +S'208248389' +p436 +sg44 +g123 +sssS'grch37' +p437 +(dp438 +g34 +S'NC_000002.11:g.209113113G>T' +p439 +sg36 +(dp440 +g38 +g55 +sg40 +g41 +sg42 +S'209113113' +p441 +sg44 +g123 +sssS'grch38' +p442 +(dp443 +g34 +S'NC_000002.12:g.208248389G>T' +p444 +sg36 +(dp445 +g38 +g55 +sg40 +g41 +sg42 +S'208248389' +p446 +sg44 +g123 +ssssg62 +(dp447 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1' +p448 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1' +p449 +sssS'metadata' +p450 +(dp451 +S'variantvalidator_hgvs_version' +p452 +S'1.1.3' +p453 +sS'uta_schema' +p454 +S'uta_20180821' +p455 +sS'seqrepo_db' +p456 +S'2018-08-21' +p457 +sS'variantvalidator_version' +p458 +S'v0.2' +p459 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant168.txt b/VariantValidator/testing/testOutputsMasterITS/variant168.txt new file mode 100644 index 00000000..9af81dc2 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant168.txt @@ -0,0 +1,947 @@ +(dp0 +S'NM_001204314.1:c.*6525_*6526=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'A more recent version of the selected reference sequence NM_001204314.1 is available (NM_001204314.2)' +p7 +aS'NM_001204314.2:c.*6525_*6526delCTinsTG MUST be fully validated prior to use in reports' +p8 +aS'select_variants=NM_001204314.2:c.*6525_*6526delinsTG' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g4 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA +p15 +sS'gene_symbol' +p16 +S'PRLR' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_001191243.1:p.?' +p21 +sS'slr' +p22 +S'NP_001191243.1:p.?' +p23 +ssS'submitted_variant' +p24 +S'NC_000005.9:g.35058665_35058666CA=' +p25 +sS'genome_context_intronic_sequence' +p26 +g4 +sS'hgvs_lrg_variant' +p27 +g4 +sS'hgvs_transcript_variant' +p28 +S'NM_001204314.1:c.*6525_*6526=' +p29 +sS'hgvs_refseqgene_variant' +p30 +g4 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000005.9:g.35058665_35058666=' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr5' +p40 +sS'ref' +p41 +VCA +p42 +sS'pos' +p43 +S'35058665' +p44 +sS'alt' +p45 +g42 +sssS'grch37' +p46 +(dp47 +g35 +S'NC_000005.9:g.35058665_35058666=' +p48 +sg37 +(dp49 +g39 +S'5' +p50 +sg41 +g42 +sg43 +S'35058665' +p51 +sg45 +g42 +ssssS'reference_sequence_records' +p52 +(dp53 +S'protein' +p54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1' +p55 +sS'transcript' +p56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1' +p57 +sssS'NM_001204314.2:c.*6528del' +p58 +(dp59 +g3 +g4 +sg5 +(lp60 +S'The displayed variants may be artefacts of aligning NM_001204314.2 with genome build GRCh37' +p61 +aS'NM_001204314.2:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' +p62 +aS'Caution should be used when reporting the displayed variant descriptions' +p63 +aS'If you are unsure, please contact admin' +p64 +aS'RefSeqGene record not available' +p65 +asg11 +g4 +sg12 +(lp66 +sg14 +VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA +p67 +sg16 +S'PRLR' +p68 +sg18 +(dp69 +g20 +S'NP_001191243.1:p.?' +p70 +sg22 +S'NP_001191243.1:p.?' +p71 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_001204314.2:c.*6528del' +p72 +sg30 +g4 +sg31 +(dp73 +S'grch38' +p74 +(dp75 +g35 +S'NC_000005.10:g.35058563del' +p76 +sg37 +(dp77 +g39 +g50 +sg41 +S'CA' +p78 +sg43 +S'35058560' +p79 +sg45 +S'C' +p80 +sssS'grch37' +p81 +(dp82 +g35 +S'NC_000005.9:g.35058662_35058668=' +p83 +sg37 +(dp84 +g39 +g50 +sg41 +S'AGACAAG' +p85 +sg43 +S'35058662' +p86 +sg45 +g85 +sssS'hg38' +p87 +(dp88 +g35 +S'NC_000005.10:g.35058563del' +p89 +sg37 +(dp90 +g39 +g40 +sg41 +S'CA' +p91 +sg43 +S'35058560' +p92 +sg45 +g80 +sssS'hg19' +p93 +(dp94 +g35 +S'NC_000005.9:g.35058662_35058668=' +p95 +sg37 +(dp96 +g39 +g40 +sg41 +g85 +sg43 +S'35058662' +p97 +sg45 +g85 +ssssg52 +(dp98 +g54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1' +p99 +sg56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2' +p100 +sssS'NM_001204317.1:c.856-9153_856-9152=' +p101 +(dp102 +g3 +g4 +sg5 +(lp103 +S'RefSeqGene record not available' +p104 +asg11 +g4 +sg12 +(lp105 +sg14 +VHomo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA +p106 +sg16 +S'PRLR' +p107 +sg18 +(dp108 +g20 +S'NP_001191246.1:p.?' +p109 +sg22 +S'NP_001191246.1:p.?' +p110 +ssg24 +g25 +sg26 +S'NC_000005.9(NM_001204317.1):c.856-9153_856-9152=' +p111 +sg27 +g4 +sg28 +S'NM_001204317.1:c.856-9153_856-9152=' +p112 +sg30 +g4 +sg31 +(dp113 +S'grch38' +p114 +(dp115 +g35 +S'NC_000005.10:g.35058560_35058561=' +p116 +sg37 +(dp117 +g39 +g50 +sg41 +S'CA' +p118 +sg43 +S'35058560' +p119 +sg45 +g118 +sssS'grch37' +p120 +(dp121 +g35 +S'NC_000005.9:g.35058665_35058666=' +p122 +sg37 +(dp123 +g39 +g50 +sg41 +S'CA' +p124 +sg43 +S'35058665' +p125 +sg45 +g124 +sssg87 +(dp126 +g35 +S'NC_000005.10:g.35058560_35058561=' +p127 +sg37 +(dp128 +g39 +g40 +sg41 +g118 +sg43 +S'35058560' +p129 +sg45 +g118 +sssS'hg19' +p130 +(dp131 +g35 +S'NC_000005.9:g.35058665_35058666=' +p132 +sg37 +(dp133 +g39 +g40 +sg41 +g124 +sg43 +S'35058665' +p134 +sg45 +g124 +ssssg52 +(dp135 +g54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1' +p136 +sg56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1' +p137 +sssS'NM_001204316.1:c.1009+7385_1009+7386=' +p138 +(dp139 +g3 +g4 +sg5 +(lp140 +S'RefSeqGene record not available' +p141 +asg11 +g4 +sg12 +(lp142 +sg14 +VHomo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA +p143 +sg16 +S'PRLR' +p144 +sg18 +(dp145 +g20 +S'NP_001191245.1:p.?' +p146 +sg22 +S'NP_001191245.1:p.?' +p147 +ssg24 +g25 +sg26 +S'NC_000005.9(NM_001204316.1):c.1009+7385_1009+7386=' +p148 +sg27 +g4 +sg28 +S'NM_001204316.1:c.1009+7385_1009+7386=' +p149 +sg30 +g4 +sg31 +(dp150 +S'grch38' +p151 +(dp152 +g35 +S'NC_000005.10:g.35058563_35058564=' +p153 +sg37 +(dp154 +g39 +g50 +sg41 +S'AG' +p155 +sg43 +S'35058563' +p156 +sg45 +g155 +sssS'grch37' +p157 +(dp158 +g35 +S'NC_000005.9:g.35058665_35058666=' +p159 +sg37 +(dp160 +g39 +g50 +sg41 +g124 +sg43 +S'35058665' +p161 +sg45 +g124 +sssg87 +(dp162 +g35 +S'NC_000005.10:g.35058563_35058564=' +p163 +sg37 +(dp164 +g39 +g40 +sg41 +g155 +sg43 +S'35058563' +p165 +sg45 +g155 +sssS'hg19' +p166 +(dp167 +g35 +S'NC_000005.9:g.35058665_35058666=' +p168 +sg37 +(dp169 +g39 +g40 +sg41 +g124 +sg43 +S'35058665' +p170 +sg45 +g124 +ssssg52 +(dp171 +g54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1' +p172 +sg56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1' +p173 +sssS'metadata' +p174 +(dp175 +S'variantvalidator_hgvs_version' +p176 +S'1.1.3' +p177 +sS'uta_schema' +p178 +S'uta_20180821' +p179 +sS'seqrepo_db' +p180 +S'2018-08-21' +p181 +sS'variantvalidator_version' +p182 +S'v0.2' +p183 +ssS'flag' +p184 +S'gene_variant' +p185 +sS'NR_037910.1:n.828-9153_828-9152=' +p186 +(dp187 +g3 +g4 +sg5 +(lp188 +S'RefSeqGene record not available' +p189 +asg11 +g4 +sg12 +(lp190 +sg14 +VHomo sapiens prolactin receptor (PRLR), transcript variant 7, non-coding RNA +p191 +sg16 +S'PRLR' +p192 +sg18 +(dp193 +g20 +S'Non-coding :n.' +p194 +sg22 +g194 +ssg24 +g25 +sg26 +S'NC_000005.9(NR_037910.1):c.828-9153_828-9152=' +p195 +sg27 +g4 +sg28 +S'NR_037910.1:n.828-9153_828-9152=' +p196 +sg30 +g4 +sg31 +(dp197 +S'grch38' +p198 +(dp199 +g35 +S'NC_000005.10:g.35058560_35058561=' +p200 +sg37 +(dp201 +g39 +g50 +sg41 +g118 +sg43 +S'35058560' +p202 +sg45 +g118 +sssS'grch37' +p203 +(dp204 +g35 +S'NC_000005.9:g.35058665_35058666=' +p205 +sg37 +(dp206 +g39 +g50 +sg41 +g124 +sg43 +S'35058665' +p207 +sg45 +g124 +sssg87 +(dp208 +g35 +S'NC_000005.10:g.35058560_35058561=' +p209 +sg37 +(dp210 +g39 +g40 +sg41 +g118 +sg43 +S'35058560' +p211 +sg45 +g118 +sssS'hg19' +p212 +(dp213 +g35 +S'NC_000005.9:g.35058665_35058666=' +p214 +sg37 +(dp215 +g39 +g40 +sg41 +g124 +sg43 +S'35058665' +p216 +sg45 +g124 +ssssg52 +(dp217 +g56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1' +p218 +sssS'NM_001204318.1:c.686-9153_686-9152=' +p219 +(dp220 +g3 +g4 +sg5 +(lp221 +S'RefSeqGene record not available' +p222 +asg11 +g4 +sg12 +(lp223 +sg14 +VHomo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA +p224 +sg16 +S'PRLR' +p225 +sg18 +(dp226 +g20 +S'NP_001191247.1:p.?' +p227 +sg22 +S'NP_001191247.1:p.?' +p228 +ssg24 +g25 +sg26 +S'NC_000005.9(NM_001204318.1):c.686-9153_686-9152=' +p229 +sg27 +g4 +sg28 +S'NM_001204318.1:c.686-9153_686-9152=' +p230 +sg30 +g4 +sg31 +(dp231 +S'grch38' +p232 +(dp233 +g35 +S'NC_000005.10:g.35058560_35058561=' +p234 +sg37 +(dp235 +g39 +g50 +sg41 +g118 +sg43 +S'35058560' +p236 +sg45 +g118 +sssS'grch37' +p237 +(dp238 +g35 +S'NC_000005.9:g.35058665_35058666=' +p239 +sg37 +(dp240 +g39 +g50 +sg41 +g124 +sg43 +S'35058665' +p241 +sg45 +g124 +sssg87 +(dp242 +g35 +S'NC_000005.10:g.35058560_35058561=' +p243 +sg37 +(dp244 +g39 +g40 +sg41 +g118 +sg43 +S'35058560' +p245 +sg45 +g118 +sssS'hg19' +p246 +(dp247 +g35 +S'NC_000005.9:g.35058665_35058666=' +p248 +sg37 +(dp249 +g39 +g40 +sg41 +g124 +sg43 +S'35058665' +p250 +sg45 +g124 +ssssg52 +(dp251 +g54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1' +p252 +sg56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1' +p253 +sssS'NM_000949.5:c.*6525_*6526=' +p254 +(dp255 +g3 +g4 +sg5 +(lp256 +S'A more recent version of the selected reference sequence NM_000949.5 is available (NM_000949.6)' +p257 +aS'NM_000949.6:c.*6525_*6526delCTinsTG MUST be fully validated prior to use in reports' +p258 +aS'select_variants=NM_000949.6:c.*6525_*6526delinsTG' +p259 +aS'RefSeqGene record not available' +p260 +asg11 +g4 +sg12 +(lp261 +sg14 +VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA +p262 +sg16 +S'PRLR' +p263 +sg18 +(dp264 +g20 +S'NP_000940.1:p.?' +p265 +sg22 +S'NP_000940.1:p.?' +p266 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_000949.5:c.*6525_*6526=' +p267 +sg30 +g4 +sg31 +(dp268 +S'hg19' +p269 +(dp270 +g35 +S'NC_000005.9:g.35058665_35058666=' +p271 +sg37 +(dp272 +g39 +g40 +sg41 +VCA +p273 +sg43 +S'35058665' +p274 +sg45 +g273 +sssS'grch37' +p275 +(dp276 +g35 +S'NC_000005.9:g.35058665_35058666=' +p277 +sg37 +(dp278 +g39 +g50 +sg41 +g273 +sg43 +S'35058665' +p279 +sg45 +g273 +ssssg52 +(dp280 +g54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1' +p281 +sg56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5' +p282 +sssS'NM_000949.6:c.*6528del' +p283 +(dp284 +g3 +g4 +sg5 +(lp285 +S'The displayed variants may be artefacts of aligning NM_000949.6 with genome build GRCh37' +p286 +aS'NM_000949.6:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' +p287 +aS'Caution should be used when reporting the displayed variant descriptions' +p288 +aS'If you are unsure, please contact admin' +p289 +aS'RefSeqGene record not available' +p290 +asg11 +g4 +sg12 +(lp291 +sg14 +VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA +p292 +sg16 +S'PRLR' +p293 +sg18 +(dp294 +g20 +S'NP_000940.1:p.?' +p295 +sg22 +S'NP_000940.1:p.?' +p296 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_000949.6:c.*6528del' +p297 +sg30 +g4 +sg31 +(dp298 +S'grch38' +p299 +(dp300 +g35 +S'NC_000005.10:g.35058563del' +p301 +sg37 +(dp302 +g39 +g50 +sg41 +S'CA' +p303 +sg43 +S'35058560' +p304 +sg45 +g80 +sssS'grch37' +p305 +(dp306 +g35 +S'NC_000005.9:g.35058662_35058668=' +p307 +sg37 +(dp308 +g39 +g50 +sg41 +g85 +sg43 +S'35058662' +p309 +sg45 +g85 +sssg87 +(dp310 +g35 +S'NC_000005.10:g.35058563del' +p311 +sg37 +(dp312 +g39 +g40 +sg41 +S'CA' +p313 +sg43 +S'35058560' +p314 +sg45 +g80 +sssS'hg19' +p315 +(dp316 +g35 +S'NC_000005.9:g.35058662_35058668=' +p317 +sg37 +(dp318 +g39 +g40 +sg41 +g85 +sg43 +S'35058662' +p319 +sg45 +g85 +ssssg52 +(dp320 +g54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1' +p321 +sg56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.6' +p322 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant169.txt b/VariantValidator/testing/testOutputsMasterITS/variant169.txt new file mode 100644 index 00000000..e1b7c706 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant169.txt @@ -0,0 +1,174 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_015120.4:c.1580_1581insCCT' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA +p14 +sS'gene_symbol' +p15 +S'ALMS1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_055935.4:p.(Leu527dup)' +p20 +sS'slr' +p21 +S'NP_055935.4:p.(L527dup)' +p22 +ssS'submitted_variant' +p23 +S'NC_000002.11:g.73675227_73675229delTCTinsTCTCTC' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_015120.4:c.1580_1581insCCT' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000002.11:g.73675231_73675232insCCT' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr2' +p39 +sS'ref' +p40 +S'T' +p41 +sS'pos' +p42 +S'73675229' +p43 +sS'alt' +p44 +VTCTC +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000002.12:g.73448104_73448105insCCT' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'73448102' +p50 +sg44 +VTCTC +p51 +sssS'grch37' +p52 +(dp53 +g34 +S'NC_000002.11:g.73675231_73675232insCCT' +p54 +sg36 +(dp55 +g38 +S'2' +p56 +sg40 +g41 +sg42 +S'73675229' +p57 +sg44 +VTCTC +p58 +sssS'grch38' +p59 +(dp60 +g34 +S'NC_000002.12:g.73448104_73448105insCCT' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +g41 +sg42 +S'73448102' +p63 +sg44 +VTCTC +p64 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' +p70 +sssS'metadata' +p71 +(dp72 +S'variantvalidator_hgvs_version' +p73 +S'1.1.3' +p74 +sS'uta_schema' +p75 +S'uta_20180821' +p76 +sS'seqrepo_db' +p77 +S'2018-08-21' +p78 +sS'variantvalidator_version' +p79 +S'v0.2' +p80 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant17.txt b/VariantValidator/testing/testOutputsMasterITS/variant17.txt new file mode 100644 index 00000000..f5670b1d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant17.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000006.11:g.32006074C>T' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NM_000500.7:c.-107-19C>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant170.txt b/VariantValidator/testing/testOutputsMasterITS/variant170.txt new file mode 100644 index 00000000..57a83ec5 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant170.txt @@ -0,0 +1,183 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000828.4:c.-2dup' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' +p9 +aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA +p18 +sS'gene_symbol' +p19 +S'GRIA3' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_000819.3:p.?' +p24 +sS'slr' +p25 +S'NP_000819.3:p.?' +p26 +ssS'submitted_variant' +p27 +S'NM_000828.4:c.-2dupG' +p28 +sS'genome_context_intronic_sequence' +p29 +g6 +sS'hgvs_lrg_variant' +p30 +g6 +sS'hgvs_transcript_variant' +p31 +S'NM_000828.4:c.-2dup' +p32 +sS'hgvs_refseqgene_variant' +p33 +g6 +sS'primary_assembly_loci' +p34 +(dp35 +S'hg19' +p36 +(dp37 +S'hgvs_genomic_description' +p38 +S'NC_000023.10:g.122318386_122318387insGG' +p39 +sS'vcf' +p40 +(dp41 +S'chr' +p42 +S'chrX' +p43 +sS'ref' +p44 +S'A' +p45 +sS'pos' +p46 +S'122318386' +p47 +sS'alt' +p48 +VAGG +p49 +sssS'hg38' +p50 +(dp51 +g38 +S'NC_000023.11:g.123184534dup' +p52 +sg40 +(dp53 +g42 +g43 +sg44 +S'G' +p54 +sg46 +S'123184534' +p55 +sg48 +VGG +p56 +sssS'grch37' +p57 +(dp58 +g38 +S'NC_000023.10:g.122318386_122318387insGG' +p59 +sg40 +(dp60 +g42 +S'X' +p61 +sg44 +g45 +sg46 +S'122318386' +p62 +sg48 +VAGG +p63 +sssS'grch38' +p64 +(dp65 +g38 +S'NC_000023.11:g.123184534dup' +p66 +sg40 +(dp67 +g42 +g61 +sg44 +g54 +sg46 +S'123184534' +p68 +sg48 +VGG +p69 +ssssS'reference_sequence_records' +p70 +(dp71 +S'protein' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' +p73 +sS'transcript' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' +p75 +sssS'metadata' +p76 +(dp77 +S'variantvalidator_hgvs_version' +p78 +S'1.1.3' +p79 +sS'uta_schema' +p80 +S'uta_20180821' +p81 +sS'seqrepo_db' +p82 +S'2018-08-21' +p83 +sS'variantvalidator_version' +p84 +S'v0.2' +p85 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant171.txt b/VariantValidator/testing/testOutputsMasterITS/variant171.txt new file mode 100644 index 00000000..6ae08973 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant171.txt @@ -0,0 +1,443 @@ +(dp0 +S'NM_007325.4:c.-2dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000023.10:g.122318386A>AGG automapped to NC_000023.10:g.122318386_122318387insGG' +p7 +aS'The displayed variants may be artefacts of aligning NM_007325.4 with genome build GRCh37' +p8 +aS'NM_007325.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g4 +sS'alt_genomic_loci' +p14 +(lp15 +sS'transcript_description' +p16 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA +p17 +sS'gene_symbol' +p18 +S'GRIA3' +p19 +sS'hgvs_predicted_protein_consequence' +p20 +(dp21 +S'tlr' +p22 +S'NP_015564.4:p.?' +p23 +sS'slr' +p24 +S'NP_015564.4:p.?' +p25 +ssS'submitted_variant' +p26 +S'X-122318386-A-AGG' +p27 +sS'genome_context_intronic_sequence' +p28 +g4 +sS'hgvs_lrg_variant' +p29 +g4 +sS'hgvs_transcript_variant' +p30 +S'NM_007325.4:c.-2dup' +p31 +sS'hgvs_refseqgene_variant' +p32 +g4 +sS'primary_assembly_loci' +p33 +(dp34 +S'hg19' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000023.10:g.122318386_122318387insGG' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'chrX' +p42 +sS'ref' +p43 +S'A' +p44 +sS'pos' +p45 +S'122318386' +p46 +sS'alt' +p47 +VAGG +p48 +sssS'hg38' +p49 +(dp50 +g37 +S'NC_000023.11:g.123184534dup' +p51 +sg39 +(dp52 +g41 +g42 +sg43 +S'G' +p53 +sg45 +S'123184534' +p54 +sg47 +VGG +p55 +sssS'grch37' +p56 +(dp57 +g37 +S'NC_000023.10:g.122318386_122318387insGG' +p58 +sg39 +(dp59 +g41 +S'X' +p60 +sg43 +g44 +sg45 +S'122318386' +p61 +sg47 +VAGG +p62 +sssS'grch38' +p63 +(dp64 +g37 +S'NC_000023.11:g.123184534dup' +p65 +sg39 +(dp66 +g41 +g60 +sg43 +g53 +sg45 +S'123184534' +p67 +sg47 +VGG +p68 +ssssS'reference_sequence_records' +p69 +(dp70 +S'protein' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4' +p72 +sS'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4' +p74 +sssS'flag' +p75 +S'gene_variant' +p76 +sS'NM_001256743.1:c.-2dup' +p77 +(dp78 +g3 +g4 +sg5 +(lp79 +S'NC_000023.10:g.122318386A>AGG automapped to NC_000023.10:g.122318386_122318387insGG' +p80 +aS'The displayed variants may be artefacts of aligning NM_001256743.1 with genome build GRCh37' +p81 +aS'NM_001256743.1:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p82 +aS'Caution should be used when reporting the displayed variant descriptions' +p83 +aS'If you are unsure, please contact admin' +p84 +aS'RefSeqGene record not available' +p85 +asg13 +g4 +sg14 +(lp86 +sg16 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA +p87 +sg18 +S'GRIA3' +p88 +sg20 +(dp89 +g22 +S'NP_001243672.1:p.?' +p90 +sg24 +S'NP_001243672.1:p.?' +p91 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_001256743.1:c.-2dup' +p92 +sg32 +g4 +sg33 +(dp93 +S'hg19' +p94 +(dp95 +g37 +S'NC_000023.10:g.122318386_122318387insGG' +p96 +sg39 +(dp97 +g41 +g42 +sg43 +g44 +sg45 +S'122318386' +p98 +sg47 +VAGG +p99 +sssg49 +(dp100 +g37 +S'NC_000023.11:g.123184534dup' +p101 +sg39 +(dp102 +g41 +g42 +sg43 +g53 +sg45 +S'123184534' +p103 +sg47 +VGG +p104 +sssS'grch37' +p105 +(dp106 +g37 +S'NC_000023.10:g.122318386_122318387insGG' +p107 +sg39 +(dp108 +g41 +g60 +sg43 +g44 +sg45 +S'122318386' +p109 +sg47 +VAGG +p110 +sssS'grch38' +p111 +(dp112 +g37 +S'NC_000023.11:g.123184534dup' +p113 +sg39 +(dp114 +g41 +g60 +sg43 +g53 +sg45 +S'123184534' +p115 +sg47 +VGG +p116 +ssssg69 +(dp117 +g71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1' +p118 +sg73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1' +p119 +sssS'NM_000828.4:c.-2dup' +p120 +(dp121 +g3 +g4 +sg5 +(lp122 +S'NC_000023.10:g.122318386A>AGG automapped to NC_000023.10:g.122318386_122318387insGG' +p123 +aS'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' +p124 +aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p125 +aS'Caution should be used when reporting the displayed variant descriptions' +p126 +aS'If you are unsure, please contact admin' +p127 +aS'RefSeqGene record not available' +p128 +asg13 +g4 +sg14 +(lp129 +sg16 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA +p130 +sg18 +S'GRIA3' +p131 +sg20 +(dp132 +g22 +S'NP_000819.3:p.?' +p133 +sg24 +S'NP_000819.3:p.?' +p134 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_000828.4:c.-2dup' +p135 +sg32 +g4 +sg33 +(dp136 +S'hg19' +p137 +(dp138 +g37 +S'NC_000023.10:g.122318386_122318387insGG' +p139 +sg39 +(dp140 +g41 +g42 +sg43 +g44 +sg45 +S'122318386' +p141 +sg47 +VAGG +p142 +sssg49 +(dp143 +g37 +S'NC_000023.11:g.123184534dup' +p144 +sg39 +(dp145 +g41 +g42 +sg43 +g53 +sg45 +S'123184534' +p146 +sg47 +VGG +p147 +sssS'grch37' +p148 +(dp149 +g37 +S'NC_000023.10:g.122318386_122318387insGG' +p150 +sg39 +(dp151 +g41 +g60 +sg43 +g44 +sg45 +S'122318386' +p152 +sg47 +VAGG +p153 +sssS'grch38' +p154 +(dp155 +g37 +S'NC_000023.11:g.123184534dup' +p156 +sg39 +(dp157 +g41 +g60 +sg43 +g53 +sg45 +S'123184534' +p158 +sg47 +VGG +p159 +ssssg69 +(dp160 +g71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' +p161 +sg73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' +p162 +sssS'metadata' +p163 +(dp164 +S'variantvalidator_hgvs_version' +p165 +S'1.1.3' +p166 +sS'uta_schema' +p167 +S'uta_20180821' +p168 +sS'seqrepo_db' +p169 +S'2018-08-21' +p170 +sS'variantvalidator_version' +p171 +S'v0.2' +p172 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant172.txt b/VariantValidator/testing/testOutputsMasterITS/variant172.txt new file mode 100644 index 00000000..06ff7fd8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant172.txt @@ -0,0 +1,182 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000828.4:c.-2G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' +p9 +aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA +p18 +sS'gene_symbol' +p19 +S'GRIA3' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_000819.3:p.?' +p24 +sS'slr' +p25 +S'NP_000819.3:p.?' +p26 +ssS'submitted_variant' +p27 +S'NM_000828.4:c.-2G>T' +p28 +sS'genome_context_intronic_sequence' +p29 +g6 +sS'hgvs_lrg_variant' +p30 +g6 +sS'hgvs_transcript_variant' +p31 +S'NM_000828.4:c.-2G>T' +p32 +sS'hgvs_refseqgene_variant' +p33 +g6 +sS'primary_assembly_loci' +p34 +(dp35 +S'hg19' +p36 +(dp37 +S'hgvs_genomic_description' +p38 +S'NC_000023.10:g.122318386_122318387insT' +p39 +sS'vcf' +p40 +(dp41 +S'chr' +p42 +S'chrX' +p43 +sS'ref' +p44 +S'A' +p45 +sS'pos' +p46 +S'122318386' +p47 +sS'alt' +p48 +VAT +p49 +sssS'hg38' +p50 +(dp51 +g38 +S'NC_000023.11:g.123184534G>T' +p52 +sg40 +(dp53 +g42 +g43 +sg44 +VG +p54 +sg46 +S'123184534' +p55 +sg48 +VT +p56 +sssS'grch37' +p57 +(dp58 +g38 +S'NC_000023.10:g.122318386_122318387insT' +p59 +sg40 +(dp60 +g42 +S'X' +p61 +sg44 +g45 +sg46 +S'122318386' +p62 +sg48 +VAT +p63 +sssS'grch38' +p64 +(dp65 +g38 +S'NC_000023.11:g.123184534G>T' +p66 +sg40 +(dp67 +g42 +g61 +sg44 +g54 +sg46 +S'123184534' +p68 +sg48 +g56 +ssssS'reference_sequence_records' +p69 +(dp70 +S'protein' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' +p72 +sS'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' +p74 +sssS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant173.txt b/VariantValidator/testing/testOutputsMasterITS/variant173.txt new file mode 100644 index 00000000..82ee8c2e --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant173.txt @@ -0,0 +1,181 @@ +(dp0 +S'NM_000828.4:c.-2G=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' +p7 +aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA +p16 +sS'gene_symbol' +p17 +S'GRIA3' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_000819.3:p.?' +p22 +sS'slr' +p23 +S'NP_000819.3:p.?' +p24 +ssS'submitted_variant' +p25 +S'NM_000828.4:c.-2G=' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_000828.4:c.-2G=' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000023.10:g.122318386_122318387insG' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chrX' +p41 +sS'ref' +p42 +S'A' +p43 +sS'pos' +p44 +S'122318386' +p45 +sS'alt' +p46 +VAG +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000023.11:g.123184534G=' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +VG +p52 +sg44 +S'123184534' +p53 +sg46 +g52 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000023.10:g.122318386_122318387insG' +p56 +sg38 +(dp57 +g40 +S'X' +p58 +sg42 +g43 +sg44 +S'122318386' +p59 +sg46 +VAG +p60 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000023.11:g.123184534G=' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +g52 +sg44 +S'123184534' +p65 +sg46 +g52 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' +p71 +sssS'flag' +p72 +S'gene_variant' +p73 +sS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant174.txt b/VariantValidator/testing/testOutputsMasterITS/variant174.txt new file mode 100644 index 00000000..6f2fbe91 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant174.txt @@ -0,0 +1,438 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_000828.4:c.-2G>T' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT' +p19 +aS'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' +p20 +aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p21 +aS'Caution should be used when reporting the displayed variant descriptions' +p22 +aS'If you are unsure, please contact admin' +p23 +aS'RefSeqGene record not available' +p24 +asS'refseqgene_context_intronic_sequence' +p25 +g16 +sS'alt_genomic_loci' +p26 +(lp27 +sS'transcript_description' +p28 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA +p29 +sS'gene_symbol' +p30 +S'GRIA3' +p31 +sS'hgvs_predicted_protein_consequence' +p32 +(dp33 +S'tlr' +p34 +S'NP_000819.3:p.?' +p35 +sS'slr' +p36 +S'NP_000819.3:p.?' +p37 +ssS'submitted_variant' +p38 +S'X-122318386-A-AT' +p39 +sS'genome_context_intronic_sequence' +p40 +g16 +sS'hgvs_lrg_variant' +p41 +g16 +sS'hgvs_transcript_variant' +p42 +S'NM_000828.4:c.-2G>T' +p43 +sS'hgvs_refseqgene_variant' +p44 +g16 +sS'primary_assembly_loci' +p45 +(dp46 +S'hg19' +p47 +(dp48 +S'hgvs_genomic_description' +p49 +S'NC_000023.10:g.122318386_122318387insT' +p50 +sS'vcf' +p51 +(dp52 +S'chr' +p53 +S'chrX' +p54 +sS'ref' +p55 +S'A' +p56 +sS'pos' +p57 +S'122318386' +p58 +sS'alt' +p59 +VAT +p60 +sssS'hg38' +p61 +(dp62 +g49 +S'NC_000023.11:g.123184534G>T' +p63 +sg51 +(dp64 +g53 +g54 +sg55 +VG +p65 +sg57 +S'123184534' +p66 +sg59 +VT +p67 +sssS'grch37' +p68 +(dp69 +g49 +S'NC_000023.10:g.122318386_122318387insT' +p70 +sg51 +(dp71 +g53 +S'X' +p72 +sg55 +g56 +sg57 +S'122318386' +p73 +sg59 +VAT +p74 +sssS'grch38' +p75 +(dp76 +g49 +S'NC_000023.11:g.123184534G>T' +p77 +sg51 +(dp78 +g53 +g72 +sg55 +g65 +sg57 +S'123184534' +p79 +sg59 +g67 +ssssS'reference_sequence_records' +p80 +(dp81 +S'protein' +p82 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' +p83 +sS'transcript' +p84 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' +p85 +sssS'NM_001256743.1:c.-2G>T' +p86 +(dp87 +g15 +g16 +sg17 +(lp88 +S'NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT' +p89 +aS'The displayed variants may be artefacts of aligning NM_001256743.1 with genome build GRCh37' +p90 +aS'NM_001256743.1:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p91 +aS'Caution should be used when reporting the displayed variant descriptions' +p92 +aS'If you are unsure, please contact admin' +p93 +aS'RefSeqGene record not available' +p94 +asg25 +g16 +sg26 +(lp95 +sg28 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA +p96 +sg30 +S'GRIA3' +p97 +sg32 +(dp98 +g34 +S'NP_001243672.1:p.?' +p99 +sg36 +S'NP_001243672.1:p.?' +p100 +ssg38 +g39 +sg40 +g16 +sg41 +g16 +sg42 +S'NM_001256743.1:c.-2G>T' +p101 +sg44 +g16 +sg45 +(dp102 +S'hg19' +p103 +(dp104 +g49 +S'NC_000023.10:g.122318386_122318387insT' +p105 +sg51 +(dp106 +g53 +g54 +sg55 +g56 +sg57 +S'122318386' +p107 +sg59 +VAT +p108 +sssg61 +(dp109 +g49 +S'NC_000023.11:g.123184534G>T' +p110 +sg51 +(dp111 +g53 +g54 +sg55 +g65 +sg57 +S'123184534' +p112 +sg59 +g67 +sssS'grch37' +p113 +(dp114 +g49 +S'NC_000023.10:g.122318386_122318387insT' +p115 +sg51 +(dp116 +g53 +g72 +sg55 +g56 +sg57 +S'122318386' +p117 +sg59 +VAT +p118 +sssS'grch38' +p119 +(dp120 +g49 +S'NC_000023.11:g.123184534G>T' +p121 +sg51 +(dp122 +g53 +g72 +sg55 +g65 +sg57 +S'123184534' +p123 +sg59 +g67 +ssssg80 +(dp124 +g82 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1' +p125 +sg84 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1' +p126 +sssS'NM_007325.4:c.-2G>T' +p127 +(dp128 +g15 +g16 +sg17 +(lp129 +S'NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT' +p130 +aS'The displayed variants may be artefacts of aligning NM_007325.4 with genome build GRCh37' +p131 +aS'NM_007325.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p132 +aS'Caution should be used when reporting the displayed variant descriptions' +p133 +aS'If you are unsure, please contact admin' +p134 +aS'RefSeqGene record not available' +p135 +asg25 +g16 +sg26 +(lp136 +sg28 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA +p137 +sg30 +S'GRIA3' +p138 +sg32 +(dp139 +g34 +S'NP_015564.4:p.?' +p140 +sg36 +S'NP_015564.4:p.?' +p141 +ssg38 +g39 +sg40 +g16 +sg41 +g16 +sg42 +S'NM_007325.4:c.-2G>T' +p142 +sg44 +g16 +sg45 +(dp143 +S'hg19' +p144 +(dp145 +g49 +S'NC_000023.10:g.122318386_122318387insT' +p146 +sg51 +(dp147 +g53 +g54 +sg55 +g56 +sg57 +S'122318386' +p148 +sg59 +VAT +p149 +sssg61 +(dp150 +g49 +S'NC_000023.11:g.123184534G>T' +p151 +sg51 +(dp152 +g53 +g54 +sg55 +g65 +sg57 +S'123184534' +p153 +sg59 +g67 +sssS'grch37' +p154 +(dp155 +g49 +S'NC_000023.10:g.122318386_122318387insT' +p156 +sg51 +(dp157 +g53 +g72 +sg55 +g56 +sg57 +S'122318386' +p158 +sg59 +VAT +p159 +sssS'grch38' +p160 +(dp161 +g49 +S'NC_000023.11:g.123184534G>T' +p162 +sg51 +(dp163 +g53 +g72 +sg55 +g65 +sg57 +S'123184534' +p164 +sg59 +g67 +ssssg80 +(dp165 +g82 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4' +p166 +sg84 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4' +p167 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant175.txt b/VariantValidator/testing/testOutputsMasterITS/variant175.txt new file mode 100644 index 00000000..fefdaa7b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant175.txt @@ -0,0 +1,183 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000828.4:c.-2_-1insT' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' +p9 +aS'NM_000828.4:c.-2_-1 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA +p18 +sS'gene_symbol' +p19 +S'GRIA3' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_000819.3:p.?' +p24 +sS'slr' +p25 +S'NP_000819.3:p.?' +p26 +ssS'submitted_variant' +p27 +S'NM_000828.4:c.-2_-1insT' +p28 +sS'genome_context_intronic_sequence' +p29 +g6 +sS'hgvs_lrg_variant' +p30 +g6 +sS'hgvs_transcript_variant' +p31 +S'NM_000828.4:c.-2_-1insT' +p32 +sS'hgvs_refseqgene_variant' +p33 +g6 +sS'primary_assembly_loci' +p34 +(dp35 +S'hg19' +p36 +(dp37 +S'hgvs_genomic_description' +p38 +S'NC_000023.10:g.122318386_122318387insGT' +p39 +sS'vcf' +p40 +(dp41 +S'chr' +p42 +S'chrX' +p43 +sS'ref' +p44 +S'A' +p45 +sS'pos' +p46 +S'122318386' +p47 +sS'alt' +p48 +S'AGT' +p49 +sssS'hg38' +p50 +(dp51 +g38 +S'NC_000023.11:g.123184534_123184535insT' +p52 +sg40 +(dp53 +g42 +g43 +sg44 +S'G' +p54 +sg46 +S'123184534' +p55 +sg48 +VGT +p56 +sssS'grch37' +p57 +(dp58 +g38 +S'NC_000023.10:g.122318386_122318387insGT' +p59 +sg40 +(dp60 +g42 +S'X' +p61 +sg44 +g45 +sg46 +S'122318386' +p62 +sg48 +S'AGT' +p63 +sssS'grch38' +p64 +(dp65 +g38 +S'NC_000023.11:g.123184534_123184535insT' +p66 +sg40 +(dp67 +g42 +g61 +sg44 +g54 +sg46 +S'123184534' +p68 +sg48 +VGT +p69 +ssssS'reference_sequence_records' +p70 +(dp71 +S'protein' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' +p73 +sS'transcript' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' +p75 +sssS'metadata' +p76 +(dp77 +S'variantvalidator_hgvs_version' +p78 +S'1.1.3' +p79 +sS'uta_schema' +p80 +S'uta_20180821' +p81 +sS'seqrepo_db' +p82 +S'2018-08-21' +p83 +sS'variantvalidator_version' +p84 +S'v0.2' +p85 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant176.txt b/VariantValidator/testing/testOutputsMasterITS/variant176.txt new file mode 100644 index 00000000..2b17950e --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant176.txt @@ -0,0 +1,182 @@ +(dp0 +S'NM_000828.4:c.-3_-2insT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' +p7 +aS'NM_000828.4:c.-3_-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA +p16 +sS'gene_symbol' +p17 +S'GRIA3' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_000819.3:p.?' +p22 +sS'slr' +p23 +S'NP_000819.3:p.?' +p24 +ssS'submitted_variant' +p25 +S'NM_000828.4:c.-3_-2insT' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_000828.4:c.-3_-2insT' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000023.10:g.122318386_122318387insTG' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chrX' +p41 +sS'ref' +p42 +S'A' +p43 +sS'pos' +p44 +S'122318386' +p45 +sS'alt' +p46 +S'ATG' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000023.11:g.123184533_123184534insT' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +g43 +sg44 +S'123184533' +p52 +sg46 +VAT +p53 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000023.10:g.122318386_122318387insTG' +p56 +sg38 +(dp57 +g40 +S'X' +p58 +sg42 +g43 +sg44 +S'122318386' +p59 +sg46 +S'ATG' +p60 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000023.11:g.123184533_123184534insT' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +g43 +sg44 +S'123184533' +p65 +sg46 +VAT +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' +p72 +sssS'flag' +p73 +S'gene_variant' +p74 +sS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant177.txt b/VariantValidator/testing/testOutputsMasterITS/variant177.txt new file mode 100644 index 00000000..b0359a87 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant177.txt @@ -0,0 +1,182 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000828.4:c.-2delinsTT' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' +p9 +aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA +p18 +sS'gene_symbol' +p19 +S'GRIA3' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_000819.3:p.?' +p24 +sS'slr' +p25 +S'NP_000819.3:p.?' +p26 +ssS'submitted_variant' +p27 +S'NM_000828.4:c.-2delGinsTT' +p28 +sS'genome_context_intronic_sequence' +p29 +g6 +sS'hgvs_lrg_variant' +p30 +g6 +sS'hgvs_transcript_variant' +p31 +S'NM_000828.4:c.-2delinsTT' +p32 +sS'hgvs_refseqgene_variant' +p33 +g6 +sS'primary_assembly_loci' +p34 +(dp35 +S'hg19' +p36 +(dp37 +S'hgvs_genomic_description' +p38 +S'NC_000023.10:g.122318386_122318387insTT' +p39 +sS'vcf' +p40 +(dp41 +S'chr' +p42 +S'chrX' +p43 +sS'ref' +p44 +S'A' +p45 +sS'pos' +p46 +S'122318386' +p47 +sS'alt' +p48 +VATT +p49 +sssS'hg38' +p50 +(dp51 +g38 +S'NC_000023.11:g.123184534delinsTT' +p52 +sg40 +(dp53 +g42 +g43 +sg44 +S'G' +p54 +sg46 +S'123184534' +p55 +sg48 +VTT +p56 +sssS'grch37' +p57 +(dp58 +g38 +S'NC_000023.10:g.122318386_122318387insTT' +p59 +sg40 +(dp60 +g42 +S'X' +p61 +sg44 +g45 +sg46 +S'122318386' +p62 +sg48 +VATT +p63 +sssS'grch38' +p64 +(dp65 +g38 +S'NC_000023.11:g.123184534delinsTT' +p66 +sg40 +(dp67 +g42 +g61 +sg44 +g54 +sg46 +S'123184534' +p68 +sg48 +g56 +ssssS'reference_sequence_records' +p69 +(dp70 +S'protein' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' +p72 +sS'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' +p74 +sssS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant178.txt b/VariantValidator/testing/testOutputsMasterITS/variant178.txt new file mode 100644 index 00000000..f6fee1b0 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant178.txt @@ -0,0 +1,182 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000828.4:c.-2_-1delinsTT' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' +p9 +aS'NM_000828.4:c.-2_-1 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA +p18 +sS'gene_symbol' +p19 +S'GRIA3' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_000819.3:p.?' +p24 +sS'slr' +p25 +S'NP_000819.3:p.?' +p26 +ssS'submitted_variant' +p27 +S'NM_000828.4:c.-2_-1delGCinsTT' +p28 +sS'genome_context_intronic_sequence' +p29 +g6 +sS'hgvs_lrg_variant' +p30 +g6 +sS'hgvs_transcript_variant' +p31 +S'NM_000828.4:c.-2_-1delinsTT' +p32 +sS'hgvs_refseqgene_variant' +p33 +g6 +sS'primary_assembly_loci' +p34 +(dp35 +S'hg19' +p36 +(dp37 +S'hgvs_genomic_description' +p38 +S'NC_000023.10:g.122318387delinsTT' +p39 +sS'vcf' +p40 +(dp41 +S'chr' +p42 +S'chrX' +p43 +sS'ref' +p44 +S'C' +p45 +sS'pos' +p46 +S'122318387' +p47 +sS'alt' +p48 +S'TT' +p49 +sssS'hg38' +p50 +(dp51 +g38 +S'NC_000023.11:g.123184534_123184535delinsTT' +p52 +sg40 +(dp53 +g42 +g43 +sg44 +S'GC' +p54 +sg46 +S'123184534' +p55 +sg48 +VTT +p56 +sssS'grch37' +p57 +(dp58 +g38 +S'NC_000023.10:g.122318387delinsTT' +p59 +sg40 +(dp60 +g42 +S'X' +p61 +sg44 +g45 +sg46 +S'122318387' +p62 +sg48 +g49 +sssS'grch38' +p63 +(dp64 +g38 +S'NC_000023.11:g.123184534_123184535delinsTT' +p65 +sg40 +(dp66 +g42 +g61 +sg44 +S'GC' +p67 +sg46 +S'123184534' +p68 +sg48 +g56 +ssssS'reference_sequence_records' +p69 +(dp70 +S'protein' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' +p72 +sS'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' +p74 +sssS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant179.txt b/VariantValidator/testing/testOutputsMasterITS/variant179.txt new file mode 100644 index 00000000..1a7d2161 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant179.txt @@ -0,0 +1,174 @@ +(dp0 +S'NM_000828.4:c.-3_-2delinsTT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA +p12 +sS'gene_symbol' +p13 +S'GRIA3' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_000819.3:p.?' +p18 +sS'slr' +p19 +S'NP_000819.3:p.?' +p20 +ssS'submitted_variant' +p21 +S'NM_000828.4:c.-3_-2delAGinsTT' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_000828.4:c.-3_-2delinsTT' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000023.10:g.122318386delinsTT' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chrX' +p37 +sS'ref' +p38 +S'A' +p39 +sS'pos' +p40 +S'122318386' +p41 +sS'alt' +p42 +S'TT' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000023.11:g.123184533_123184534delinsTT' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +S'AG' +p48 +sg40 +S'123184533' +p49 +sg42 +VTT +p50 +sssS'grch37' +p51 +(dp52 +g32 +S'NC_000023.10:g.122318386delinsTT' +p53 +sg34 +(dp54 +g36 +S'X' +p55 +sg38 +g39 +sg40 +S'122318386' +p56 +sg42 +g43 +sssS'grch38' +p57 +(dp58 +g32 +S'NC_000023.11:g.123184533_123184534delinsTT' +p59 +sg34 +(dp60 +g36 +g55 +sg38 +S'AG' +p61 +sg40 +S'123184533' +p62 +sg42 +g50 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' +p68 +sssS'flag' +p69 +S'gene_variant' +p70 +sS'metadata' +p71 +(dp72 +S'variantvalidator_hgvs_version' +p73 +S'1.1.3' +p74 +sS'uta_schema' +p75 +S'uta_20180821' +p76 +sS'seqrepo_db' +p77 +S'2018-08-21' +p78 +sS'variantvalidator_version' +p79 +S'v0.2' +p80 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant18.txt b/VariantValidator/testing/testOutputsMasterITS/variant18.txt new file mode 100644 index 00000000..ae1a7d0d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant18.txt @@ -0,0 +1,82 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' +p7 +aS'Instead use NC_000011.9:g.5248381A=' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +g4 +sS'gene_symbol' +p13 +g4 +sS'hgvs_predicted_protein_consequence' +p14 +(dp15 +S'tlr' +p16 +g4 +sS'slr' +p17 +g4 +ssS'submitted_variant' +p18 +S'NM_000518.4:c.-130C>T' +p19 +sS'genome_context_intronic_sequence' +p20 +g4 +sS'hgvs_lrg_variant' +p21 +g4 +sS'hgvs_transcript_variant' +p22 +g4 +sS'hgvs_refseqgene_variant' +p23 +g4 +sS'primary_assembly_loci' +p24 +(dp25 +sS'reference_sequence_records' +p26 +g4 +ssS'flag' +p27 +S'warning' +p28 +sS'metadata' +p29 +(dp30 +S'variantvalidator_hgvs_version' +p31 +S'1.1.3' +p32 +sS'uta_schema' +p33 +S'uta_20180821' +p34 +sS'seqrepo_db' +p35 +S'2018-08-21' +p36 +sS'variantvalidator_version' +p37 +S'v0.2' +p38 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant180.txt b/VariantValidator/testing/testOutputsMasterITS/variant180.txt new file mode 100644 index 00000000..a3671f32 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant180.txt @@ -0,0 +1,506 @@ +(dp0 +S'NM_014249.3:c.951dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' +p7 +aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_014249.3' +p8 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g4 +sS'alt_genomic_loci' +p14 +(lp15 +sS'transcript_description' +p16 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA +p17 +sS'gene_symbol' +p18 +S'NR2E3' +p19 +sS'hgvs_predicted_protein_consequence' +p20 +(dp21 +S'tlr' +p22 +S'NP_055064.1:p.(Thr318HisfsTer23)' +p23 +sS'slr' +p24 +S'NP_055064.1:p.(T318Hfs*23)' +p25 +ssS'submitted_variant' +p26 +S'15-72105929-C-C' +p27 +sS'genome_context_intronic_sequence' +p28 +g4 +sS'hgvs_lrg_variant' +p29 +g4 +sS'hgvs_transcript_variant' +p30 +S'NM_014249.3:c.951dup' +p31 +sS'hgvs_refseqgene_variant' +p32 +g4 +sS'primary_assembly_loci' +p33 +(dp34 +S'grch38' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000015.10:g.71813592dup' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'15' +p42 +sS'ref' +p43 +S'C' +p44 +sS'pos' +p45 +S'71813589' +p46 +sS'alt' +p47 +VCC +p48 +sssS'grch37' +p49 +(dp50 +g37 +S'NC_000015.9:g.72105924_72105934=' +p51 +sg39 +(dp52 +g41 +g42 +sg43 +S'GTGGACCCCCA' +p53 +sg45 +S'72105924' +p54 +sg47 +g53 +sssS'hg38' +p55 +(dp56 +g37 +S'NC_000015.10:g.71813592dup' +p57 +sg39 +(dp58 +g41 +S'chr15' +p59 +sg43 +g44 +sg45 +S'71813589' +p60 +sg47 +VCC +p61 +sssS'hg19' +p62 +(dp63 +g37 +S'NC_000015.9:g.72105924_72105934=' +p64 +sg39 +(dp65 +g41 +g59 +sg43 +g53 +sg45 +S'72105924' +p66 +sg47 +g53 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' +p72 +sssS'NM_014249.2:c.951dup' +p73 +(dp74 +g3 +g4 +sg5 +(lp75 +S'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' +p76 +aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_014249.2' +p77 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' +p78 +aS'Caution should be used when reporting the displayed variant descriptions' +p79 +aS'If you are unsure, please contact admin' +p80 +aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' +p81 +aS'NM_014249.3:c.951dupC MUST be fully validated prior to use in reports' +p82 +aS'select_variants=NM_014249.3:c.951dup' +p83 +aS'RefSeqGene record not available' +p84 +asg13 +g4 +sg14 +(lp85 +sg16 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA +p86 +sg18 +S'NR2E3' +p87 +sg20 +(dp88 +g22 +S'NP_055064.1:p.(Thr318HisfsTer23)' +p89 +sg24 +S'NP_055064.1:p.(T318Hfs*23)' +p90 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_014249.2:c.951dup' +p91 +sg32 +g4 +sg33 +(dp92 +S'hg19' +p93 +(dp94 +g37 +S'NC_000015.9:g.72105924_72105934=' +p95 +sg39 +(dp96 +g41 +g59 +sg43 +g53 +sg45 +S'72105924' +p97 +sg47 +g53 +sssS'grch37' +p98 +(dp99 +g37 +S'NC_000015.9:g.72105924_72105934=' +p100 +sg39 +(dp101 +g41 +g42 +sg43 +g53 +sg45 +S'72105924' +p102 +sg47 +g53 +ssssg67 +(dp103 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p104 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' +p105 +sssS'flag' +p106 +S'gene_variant' +p107 +sS'NM_016346.3:c.951dup' +p108 +(dp109 +g3 +g4 +sg5 +(lp110 +S'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' +p111 +aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_016346.3' +p112 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' +p113 +aS'Caution should be used when reporting the displayed variant descriptions' +p114 +aS'If you are unsure, please contact admin' +p115 +aS'RefSeqGene record not available' +p116 +asg13 +g4 +sg14 +(lp117 +sg16 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA +p118 +sg18 +S'NR2E3' +p119 +sg20 +(dp120 +g22 +S'NP_057430.1:p.(Thr318HisfsTer23)' +p121 +sg24 +S'NP_057430.1:p.(T318Hfs*23)' +p122 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_016346.3:c.951dup' +p123 +sg32 +g4 +sg33 +(dp124 +S'grch38' +p125 +(dp126 +g37 +S'NC_000015.10:g.71813592dup' +p127 +sg39 +(dp128 +g41 +g42 +sg43 +g44 +sg45 +S'71813589' +p129 +sg47 +VCC +p130 +sssS'grch37' +p131 +(dp132 +g37 +S'NC_000015.9:g.72105924_72105934=' +p133 +sg39 +(dp134 +g41 +g42 +sg43 +g53 +sg45 +S'72105924' +p135 +sg47 +g53 +sssg55 +(dp136 +g37 +S'NC_000015.10:g.71813592dup' +p137 +sg39 +(dp138 +g41 +g59 +sg43 +g44 +sg45 +S'71813589' +p139 +sg47 +VCC +p140 +sssS'hg19' +p141 +(dp142 +g37 +S'NC_000015.9:g.72105924_72105934=' +p143 +sg39 +(dp144 +g41 +g59 +sg43 +g53 +sg45 +S'72105924' +p145 +sg47 +g53 +ssssg67 +(dp146 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p147 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' +p148 +sssS'NM_016346.2:c.951dup' +p149 +(dp150 +g3 +g4 +sg5 +(lp151 +S'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' +p152 +aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_016346.2' +p153 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' +p154 +aS'Caution should be used when reporting the displayed variant descriptions' +p155 +aS'If you are unsure, please contact admin' +p156 +aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' +p157 +aS'NM_016346.3:c.951dupC MUST be fully validated prior to use in reports' +p158 +aS'select_variants=NM_016346.3:c.951dup' +p159 +aS'RefSeqGene record not available' +p160 +asg13 +g4 +sg14 +(lp161 +sg16 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA +p162 +sg18 +S'NR2E3' +p163 +sg20 +(dp164 +g22 +S'NP_057430.1:p.(Thr318HisfsTer23)' +p165 +sg24 +S'NP_057430.1:p.(T318Hfs*23)' +p166 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_016346.2:c.951dup' +p167 +sg32 +g4 +sg33 +(dp168 +S'hg19' +p169 +(dp170 +g37 +S'NC_000015.9:g.72105924_72105934=' +p171 +sg39 +(dp172 +g41 +g59 +sg43 +g53 +sg45 +S'72105924' +p173 +sg47 +g53 +sssS'grch37' +p174 +(dp175 +g37 +S'NC_000015.9:g.72105924_72105934=' +p176 +sg39 +(dp177 +g41 +g42 +sg43 +g53 +sg45 +S'72105924' +p178 +sg47 +g53 +ssssg67 +(dp179 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p180 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' +p181 +sssS'metadata' +p182 +(dp183 +S'variantvalidator_hgvs_version' +p184 +S'1.1.3' +p185 +sS'uta_schema' +p186 +S'uta_20180821' +p187 +sS'seqrepo_db' +p188 +S'2018-08-21' +p189 +sS'variantvalidator_version' +p190 +S'v0.2' +p191 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant181.txt b/VariantValidator/testing/testOutputsMasterITS/variant181.txt new file mode 100644 index 00000000..5a30b469 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant181.txt @@ -0,0 +1,510 @@ +(dp0 +S'NM_014249.2:c.947_948insTT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' +p7 +aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' +p8 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' +p12 +aS'NM_014249.3:c.947_948insTT MUST be fully validated prior to use in reports' +p13 +aS'select_variants=NM_014249.3:c.947_948insTT' +p14 +aS'RefSeqGene record not available' +p15 +asS'refseqgene_context_intronic_sequence' +p16 +g4 +sS'alt_genomic_loci' +p17 +(lp18 +sS'transcript_description' +p19 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA +p20 +sS'gene_symbol' +p21 +S'NR2E3' +p22 +sS'hgvs_predicted_protein_consequence' +p23 +(dp24 +S'tlr' +p25 +S'NP_055064.1:p.(Pro317SerfsTer8)' +p26 +sS'slr' +p27 +S'NP_055064.1:p.(P317Sfs*8)' +p28 +ssS'submitted_variant' +p29 +S'15-72105928-AC-ATT' +p30 +sS'genome_context_intronic_sequence' +p31 +g4 +sS'hgvs_lrg_variant' +p32 +g4 +sS'hgvs_transcript_variant' +p33 +S'NM_014249.2:c.947_948insTT' +p34 +sS'hgvs_refseqgene_variant' +p35 +g4 +sS'primary_assembly_loci' +p36 +(dp37 +S'hg19' +p38 +(dp39 +S'hgvs_genomic_description' +p40 +S'NC_000015.9:g.72105929delinsTT' +p41 +sS'vcf' +p42 +(dp43 +S'chr' +p44 +S'chr15' +p45 +sS'ref' +p46 +S'C' +p47 +sS'pos' +p48 +S'72105929' +p49 +sS'alt' +p50 +S'TT' +p51 +sssS'grch37' +p52 +(dp53 +g40 +S'NC_000015.9:g.72105929delinsTT' +p54 +sg42 +(dp55 +g44 +S'15' +p56 +sg46 +g47 +sg48 +S'72105929' +p57 +sg50 +g51 +ssssS'reference_sequence_records' +p58 +(dp59 +S'protein' +p60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p61 +sS'transcript' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' +p63 +sssS'NM_016346.3:c.947_948insTT' +p64 +(dp65 +g3 +g4 +sg5 +(lp66 +S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' +p67 +aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' +p68 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' +p69 +aS'Caution should be used when reporting the displayed variant descriptions' +p70 +aS'If you are unsure, please contact admin' +p71 +aS'RefSeqGene record not available' +p72 +asg16 +g4 +sg17 +(lp73 +sg19 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA +p74 +sg21 +S'NR2E3' +p75 +sg23 +(dp76 +g25 +S'NP_057430.1:p.(Pro317SerfsTer8)' +p77 +sg27 +S'NP_057430.1:p.(P317Sfs*8)' +p78 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_016346.3:c.947_948insTT' +p79 +sg35 +g4 +sg36 +(dp80 +S'grch38' +p81 +(dp82 +g40 +S'NC_000015.10:g.71813588_71813589insTT' +p83 +sg42 +(dp84 +g44 +g56 +sg46 +S'A' +p85 +sg48 +S'71813588' +p86 +sg50 +VATT +p87 +sssS'grch37' +p88 +(dp89 +g40 +S'NC_000015.9:g.72105929delinsTT' +p90 +sg42 +(dp91 +g44 +g56 +sg46 +g47 +sg48 +S'72105929' +p92 +sg50 +S'TT' +p93 +sssS'hg38' +p94 +(dp95 +g40 +S'NC_000015.10:g.71813588_71813589insTT' +p96 +sg42 +(dp97 +g44 +g45 +sg46 +g85 +sg48 +S'71813588' +p98 +sg50 +VATT +p99 +sssS'hg19' +p100 +(dp101 +g40 +S'NC_000015.9:g.72105929delinsTT' +p102 +sg42 +(dp103 +g44 +g45 +sg46 +g47 +sg48 +S'72105929' +p104 +sg50 +g93 +ssssg58 +(dp105 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p106 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' +p107 +sssS'flag' +p108 +S'gene_variant' +p109 +sS'NM_016346.2:c.947_948insTT' +p110 +(dp111 +g3 +g4 +sg5 +(lp112 +S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' +p113 +aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' +p114 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' +p115 +aS'Caution should be used when reporting the displayed variant descriptions' +p116 +aS'If you are unsure, please contact admin' +p117 +aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' +p118 +aS'NM_016346.3:c.947_948insTT MUST be fully validated prior to use in reports' +p119 +aS'select_variants=NM_016346.3:c.947_948insTT' +p120 +aS'RefSeqGene record not available' +p121 +asg16 +g4 +sg17 +(lp122 +sg19 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA +p123 +sg21 +S'NR2E3' +p124 +sg23 +(dp125 +g25 +S'NP_057430.1:p.(Pro317SerfsTer8)' +p126 +sg27 +S'NP_057430.1:p.(P317Sfs*8)' +p127 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_016346.2:c.947_948insTT' +p128 +sg35 +g4 +sg36 +(dp129 +S'hg19' +p130 +(dp131 +g40 +S'NC_000015.9:g.72105929delinsTT' +p132 +sg42 +(dp133 +g44 +g45 +sg46 +g47 +sg48 +S'72105929' +p134 +sg50 +S'TT' +p135 +sssS'grch37' +p136 +(dp137 +g40 +S'NC_000015.9:g.72105929delinsTT' +p138 +sg42 +(dp139 +g44 +g56 +sg46 +g47 +sg48 +S'72105929' +p140 +sg50 +g135 +ssssg58 +(dp141 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p142 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' +p143 +sssS'NM_014249.3:c.947_948insTT' +p144 +(dp145 +g3 +g4 +sg5 +(lp146 +S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' +p147 +aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' +p148 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' +p149 +aS'Caution should be used when reporting the displayed variant descriptions' +p150 +aS'If you are unsure, please contact admin' +p151 +aS'RefSeqGene record not available' +p152 +asg16 +g4 +sg17 +(lp153 +sg19 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA +p154 +sg21 +S'NR2E3' +p155 +sg23 +(dp156 +g25 +S'NP_055064.1:p.(Pro317SerfsTer8)' +p157 +sg27 +S'NP_055064.1:p.(P317Sfs*8)' +p158 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_014249.3:c.947_948insTT' +p159 +sg35 +g4 +sg36 +(dp160 +S'grch38' +p161 +(dp162 +g40 +S'NC_000015.10:g.71813588_71813589insTT' +p163 +sg42 +(dp164 +g44 +g56 +sg46 +g85 +sg48 +S'71813588' +p165 +sg50 +VATT +p166 +sssS'grch37' +p167 +(dp168 +g40 +S'NC_000015.9:g.72105929delinsTT' +p169 +sg42 +(dp170 +g44 +g56 +sg46 +g47 +sg48 +S'72105929' +p171 +sg50 +S'TT' +p172 +sssg94 +(dp173 +g40 +S'NC_000015.10:g.71813588_71813589insTT' +p174 +sg42 +(dp175 +g44 +g45 +sg46 +g85 +sg48 +S'71813588' +p176 +sg50 +VATT +p177 +sssS'hg19' +p178 +(dp179 +g40 +S'NC_000015.9:g.72105929delinsTT' +p180 +sg42 +(dp181 +g44 +g45 +sg46 +g47 +sg48 +S'72105929' +p182 +sg50 +g172 +ssssg58 +(dp183 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p184 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' +p185 +sssS'metadata' +p186 +(dp187 +S'variantvalidator_hgvs_version' +p188 +S'1.1.3' +p189 +sS'uta_schema' +p190 +S'uta_20180821' +p191 +sS'seqrepo_db' +p192 +S'2018-08-21' +p193 +sS'variantvalidator_version' +p194 +S'v0.2' +p195 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant182.txt b/VariantValidator/testing/testOutputsMasterITS/variant182.txt new file mode 100644 index 00000000..14c7d00c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant182.txt @@ -0,0 +1,510 @@ +(dp0 +S'NM_014249.2:c.947_948insTT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' +p7 +aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' +p8 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' +p12 +aS'NM_014249.3:c.947_948insTT MUST be fully validated prior to use in reports' +p13 +aS'select_variants=NM_014249.3:c.947_948insTT' +p14 +aS'RefSeqGene record not available' +p15 +asS'refseqgene_context_intronic_sequence' +p16 +g4 +sS'alt_genomic_loci' +p17 +(lp18 +sS'transcript_description' +p19 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA +p20 +sS'gene_symbol' +p21 +S'NR2E3' +p22 +sS'hgvs_predicted_protein_consequence' +p23 +(dp24 +S'tlr' +p25 +S'NP_055064.1:p.(Pro317SerfsTer8)' +p26 +sS'slr' +p27 +S'NP_055064.1:p.(P317Sfs*8)' +p28 +ssS'submitted_variant' +p29 +S'15-72105928-ACC-ATT' +p30 +sS'genome_context_intronic_sequence' +p31 +g4 +sS'hgvs_lrg_variant' +p32 +g4 +sS'hgvs_transcript_variant' +p33 +S'NM_014249.2:c.947_948insTT' +p34 +sS'hgvs_refseqgene_variant' +p35 +g4 +sS'primary_assembly_loci' +p36 +(dp37 +S'hg19' +p38 +(dp39 +S'hgvs_genomic_description' +p40 +S'NC_000015.9:g.72105929delinsTT' +p41 +sS'vcf' +p42 +(dp43 +S'chr' +p44 +S'chr15' +p45 +sS'ref' +p46 +S'C' +p47 +sS'pos' +p48 +S'72105929' +p49 +sS'alt' +p50 +S'TT' +p51 +sssS'grch37' +p52 +(dp53 +g40 +S'NC_000015.9:g.72105929delinsTT' +p54 +sg42 +(dp55 +g44 +S'15' +p56 +sg46 +g47 +sg48 +S'72105929' +p57 +sg50 +g51 +ssssS'reference_sequence_records' +p58 +(dp59 +S'protein' +p60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p61 +sS'transcript' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' +p63 +sssS'NM_016346.3:c.947_948insTT' +p64 +(dp65 +g3 +g4 +sg5 +(lp66 +S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' +p67 +aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' +p68 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' +p69 +aS'Caution should be used when reporting the displayed variant descriptions' +p70 +aS'If you are unsure, please contact admin' +p71 +aS'RefSeqGene record not available' +p72 +asg16 +g4 +sg17 +(lp73 +sg19 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA +p74 +sg21 +S'NR2E3' +p75 +sg23 +(dp76 +g25 +S'NP_057430.1:p.(Pro317SerfsTer8)' +p77 +sg27 +S'NP_057430.1:p.(P317Sfs*8)' +p78 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_016346.3:c.947_948insTT' +p79 +sg35 +g4 +sg36 +(dp80 +S'grch38' +p81 +(dp82 +g40 +S'NC_000015.10:g.71813588_71813589insTT' +p83 +sg42 +(dp84 +g44 +g56 +sg46 +S'A' +p85 +sg48 +S'71813588' +p86 +sg50 +VATT +p87 +sssS'grch37' +p88 +(dp89 +g40 +S'NC_000015.9:g.72105929delinsTT' +p90 +sg42 +(dp91 +g44 +g56 +sg46 +g47 +sg48 +S'72105929' +p92 +sg50 +S'TT' +p93 +sssS'hg38' +p94 +(dp95 +g40 +S'NC_000015.10:g.71813588_71813589insTT' +p96 +sg42 +(dp97 +g44 +g45 +sg46 +g85 +sg48 +S'71813588' +p98 +sg50 +VATT +p99 +sssS'hg19' +p100 +(dp101 +g40 +S'NC_000015.9:g.72105929delinsTT' +p102 +sg42 +(dp103 +g44 +g45 +sg46 +g47 +sg48 +S'72105929' +p104 +sg50 +g93 +ssssg58 +(dp105 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p106 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' +p107 +sssS'flag' +p108 +S'gene_variant' +p109 +sS'NM_016346.2:c.947_948insTT' +p110 +(dp111 +g3 +g4 +sg5 +(lp112 +S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' +p113 +aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' +p114 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' +p115 +aS'Caution should be used when reporting the displayed variant descriptions' +p116 +aS'If you are unsure, please contact admin' +p117 +aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' +p118 +aS'NM_016346.3:c.947_948insTT MUST be fully validated prior to use in reports' +p119 +aS'select_variants=NM_016346.3:c.947_948insTT' +p120 +aS'RefSeqGene record not available' +p121 +asg16 +g4 +sg17 +(lp122 +sg19 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA +p123 +sg21 +S'NR2E3' +p124 +sg23 +(dp125 +g25 +S'NP_057430.1:p.(Pro317SerfsTer8)' +p126 +sg27 +S'NP_057430.1:p.(P317Sfs*8)' +p127 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_016346.2:c.947_948insTT' +p128 +sg35 +g4 +sg36 +(dp129 +S'hg19' +p130 +(dp131 +g40 +S'NC_000015.9:g.72105929delinsTT' +p132 +sg42 +(dp133 +g44 +g45 +sg46 +g47 +sg48 +S'72105929' +p134 +sg50 +S'TT' +p135 +sssS'grch37' +p136 +(dp137 +g40 +S'NC_000015.9:g.72105929delinsTT' +p138 +sg42 +(dp139 +g44 +g56 +sg46 +g47 +sg48 +S'72105929' +p140 +sg50 +g135 +ssssg58 +(dp141 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p142 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' +p143 +sssS'NM_014249.3:c.947_948insTT' +p144 +(dp145 +g3 +g4 +sg5 +(lp146 +S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' +p147 +aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' +p148 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' +p149 +aS'Caution should be used when reporting the displayed variant descriptions' +p150 +aS'If you are unsure, please contact admin' +p151 +aS'RefSeqGene record not available' +p152 +asg16 +g4 +sg17 +(lp153 +sg19 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA +p154 +sg21 +S'NR2E3' +p155 +sg23 +(dp156 +g25 +S'NP_055064.1:p.(Pro317SerfsTer8)' +p157 +sg27 +S'NP_055064.1:p.(P317Sfs*8)' +p158 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_014249.3:c.947_948insTT' +p159 +sg35 +g4 +sg36 +(dp160 +S'grch38' +p161 +(dp162 +g40 +S'NC_000015.10:g.71813588_71813589insTT' +p163 +sg42 +(dp164 +g44 +g56 +sg46 +g85 +sg48 +S'71813588' +p165 +sg50 +VATT +p166 +sssS'grch37' +p167 +(dp168 +g40 +S'NC_000015.9:g.72105929delinsTT' +p169 +sg42 +(dp170 +g44 +g56 +sg46 +g47 +sg48 +S'72105929' +p171 +sg50 +S'TT' +p172 +sssg94 +(dp173 +g40 +S'NC_000015.10:g.71813588_71813589insTT' +p174 +sg42 +(dp175 +g44 +g45 +sg46 +g85 +sg48 +S'71813588' +p176 +sg50 +VATT +p177 +sssS'hg19' +p178 +(dp179 +g40 +S'NC_000015.9:g.72105929delinsTT' +p180 +sg42 +(dp181 +g44 +g45 +sg46 +g47 +sg48 +S'72105929' +p182 +sg50 +g172 +ssssg58 +(dp183 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p184 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' +p185 +sssS'metadata' +p186 +(dp187 +S'variantvalidator_hgvs_version' +p188 +S'1.1.3' +p189 +sS'uta_schema' +p190 +S'uta_20180821' +p191 +sS'seqrepo_db' +p192 +S'2018-08-21' +p193 +sS'variantvalidator_version' +p194 +S'v0.2' +p195 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant183.txt b/VariantValidator/testing/testOutputsMasterITS/variant183.txt new file mode 100644 index 00000000..5a9c75dd --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant183.txt @@ -0,0 +1,515 @@ +(dp0 +S'NM_014249.3:c.947delinsTT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' +p7 +aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' +p8 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g4 +sS'alt_genomic_loci' +p14 +(lp15 +sS'transcript_description' +p16 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA +p17 +sS'gene_symbol' +p18 +S'NR2E3' +p19 +sS'hgvs_predicted_protein_consequence' +p20 +(dp21 +S'tlr' +p22 +S'NP_055064.1:p.(Asp316ValfsTer25)' +p23 +sS'slr' +p24 +S'NP_055064.1:p.(D316Vfs*25)' +p25 +ssS'submitted_variant' +p26 +S'15-72105927-GACC-GTT' +p27 +sS'genome_context_intronic_sequence' +p28 +g4 +sS'hgvs_lrg_variant' +p29 +g4 +sS'hgvs_transcript_variant' +p30 +S'NM_014249.3:c.947delinsTT' +p31 +sS'hgvs_refseqgene_variant' +p32 +g4 +sS'primary_assembly_loci' +p33 +(dp34 +S'grch38' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000015.10:g.71813588delinsTT' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'15' +p42 +sS'ref' +p43 +S'A' +p44 +sS'pos' +p45 +S'71813588' +p46 +sS'alt' +p47 +VTT +p48 +sssS'grch37' +p49 +(dp50 +g37 +S'NC_000015.9:g.72105928_72105929delinsTT' +p51 +sg39 +(dp52 +g41 +g42 +sg43 +S'AC' +p53 +sg45 +S'72105928' +p54 +sg47 +S'TT' +p55 +sssS'hg38' +p56 +(dp57 +g37 +S'NC_000015.10:g.71813588delinsTT' +p58 +sg39 +(dp59 +g41 +S'chr15' +p60 +sg43 +g44 +sg45 +S'71813588' +p61 +sg47 +g48 +sssS'hg19' +p62 +(dp63 +g37 +S'NC_000015.9:g.72105928_72105929delinsTT' +p64 +sg39 +(dp65 +g41 +g60 +sg43 +S'AC' +p66 +sg45 +S'72105928' +p67 +sg47 +g55 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' +p73 +sssS'NM_016346.2:c.947delinsTT' +p74 +(dp75 +g3 +g4 +sg5 +(lp76 +S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' +p77 +aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' +p78 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' +p79 +aS'Caution should be used when reporting the displayed variant descriptions' +p80 +aS'If you are unsure, please contact admin' +p81 +aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' +p82 +aS'NM_016346.3:c.947delAinsTT MUST be fully validated prior to use in reports' +p83 +aS'select_variants=NM_016346.3:c.947delinsTT' +p84 +aS'RefSeqGene record not available' +p85 +asg13 +g4 +sg14 +(lp86 +sg16 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA +p87 +sg18 +S'NR2E3' +p88 +sg20 +(dp89 +g22 +S'NP_057430.1:p.(Asp316ValfsTer25)' +p90 +sg24 +S'NP_057430.1:p.(D316Vfs*25)' +p91 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_016346.2:c.947delinsTT' +p92 +sg32 +g4 +sg33 +(dp93 +S'hg19' +p94 +(dp95 +g37 +S'NC_000015.9:g.72105928_72105929delinsTT' +p96 +sg39 +(dp97 +g41 +g60 +sg43 +S'AC' +p98 +sg45 +S'72105928' +p99 +sg47 +S'TT' +p100 +sssS'grch37' +p101 +(dp102 +g37 +S'NC_000015.9:g.72105928_72105929delinsTT' +p103 +sg39 +(dp104 +g41 +g42 +sg43 +S'AC' +p105 +sg45 +S'72105928' +p106 +sg47 +g100 +ssssg68 +(dp107 +g70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p108 +sg72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' +p109 +sssS'NM_014249.2:c.947delinsTT' +p110 +(dp111 +g3 +g4 +sg5 +(lp112 +S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' +p113 +aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' +p114 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' +p115 +aS'Caution should be used when reporting the displayed variant descriptions' +p116 +aS'If you are unsure, please contact admin' +p117 +aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' +p118 +aS'NM_014249.3:c.947delAinsTT MUST be fully validated prior to use in reports' +p119 +aS'select_variants=NM_014249.3:c.947delinsTT' +p120 +aS'RefSeqGene record not available' +p121 +asg13 +g4 +sg14 +(lp122 +sg16 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA +p123 +sg18 +S'NR2E3' +p124 +sg20 +(dp125 +g22 +S'NP_055064.1:p.(Asp316ValfsTer25)' +p126 +sg24 +S'NP_055064.1:p.(D316Vfs*25)' +p127 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_014249.2:c.947delinsTT' +p128 +sg32 +g4 +sg33 +(dp129 +S'hg19' +p130 +(dp131 +g37 +S'NC_000015.9:g.72105928_72105929delinsTT' +p132 +sg39 +(dp133 +g41 +g60 +sg43 +S'AC' +p134 +sg45 +S'72105928' +p135 +sg47 +S'TT' +p136 +sssS'grch37' +p137 +(dp138 +g37 +S'NC_000015.9:g.72105928_72105929delinsTT' +p139 +sg39 +(dp140 +g41 +g42 +sg43 +S'AC' +p141 +sg45 +S'72105928' +p142 +sg47 +g136 +ssssg68 +(dp143 +g70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p144 +sg72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' +p145 +sssS'flag' +p146 +S'gene_variant' +p147 +sS'NM_016346.3:c.947delinsTT' +p148 +(dp149 +g3 +g4 +sg5 +(lp150 +S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' +p151 +aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' +p152 +aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' +p153 +aS'Caution should be used when reporting the displayed variant descriptions' +p154 +aS'If you are unsure, please contact admin' +p155 +aS'RefSeqGene record not available' +p156 +asg13 +g4 +sg14 +(lp157 +sg16 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA +p158 +sg18 +S'NR2E3' +p159 +sg20 +(dp160 +g22 +S'NP_057430.1:p.(Asp316ValfsTer25)' +p161 +sg24 +S'NP_057430.1:p.(D316Vfs*25)' +p162 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_016346.3:c.947delinsTT' +p163 +sg32 +g4 +sg33 +(dp164 +S'grch38' +p165 +(dp166 +g37 +S'NC_000015.10:g.71813588delinsTT' +p167 +sg39 +(dp168 +g41 +g42 +sg43 +g44 +sg45 +S'71813588' +p169 +sg47 +VTT +p170 +sssS'grch37' +p171 +(dp172 +g37 +S'NC_000015.9:g.72105928_72105929delinsTT' +p173 +sg39 +(dp174 +g41 +g42 +sg43 +S'AC' +p175 +sg45 +S'72105928' +p176 +sg47 +S'TT' +p177 +sssg56 +(dp178 +g37 +S'NC_000015.10:g.71813588delinsTT' +p179 +sg39 +(dp180 +g41 +g60 +sg43 +g44 +sg45 +S'71813588' +p181 +sg47 +g170 +sssS'hg19' +p182 +(dp183 +g37 +S'NC_000015.9:g.72105928_72105929delinsTT' +p184 +sg39 +(dp185 +g41 +g60 +sg43 +S'AC' +p186 +sg45 +S'72105928' +p187 +sg47 +g177 +ssssg68 +(dp188 +g70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p189 +sg72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' +p190 +sssS'metadata' +p191 +(dp192 +S'variantvalidator_hgvs_version' +p193 +S'1.1.3' +p194 +sS'uta_schema' +p195 +S'uta_20180821' +p196 +sS'seqrepo_db' +p197 +S'2018-08-21' +p198 +sS'variantvalidator_version' +p199 +S'v0.2' +p200 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant184.txt b/VariantValidator/testing/testOutputsMasterITS/variant184.txt new file mode 100644 index 00000000..c8f4469f --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant184.txt @@ -0,0 +1,439 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_001042544.1:c.3233_3235=' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG' +p9 +aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' +p10 +aS'NM_001042544.1:c.3233_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p11 +aS'Caution should be used when reporting the displayed variant descriptions' +p12 +aS'If you are unsure, please contact admin' +p13 +aS'RefSeqGene record not available' +p14 +asS'refseqgene_context_intronic_sequence' +p15 +g6 +sS'alt_genomic_loci' +p16 +(lp17 +sS'transcript_description' +p18 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA +p19 +sS'gene_symbol' +p20 +S'LTBP4' +p21 +sS'hgvs_predicted_protein_consequence' +p22 +(dp23 +S'tlr' +p24 +S'NP_001036009.1:p.(Gln1078=)' +p25 +sS'slr' +p26 +S'NP_001036009.1:p.(Q1078=)' +p27 +ssS'submitted_variant' +p28 +S'19-41123093-A-AG' +p29 +sS'genome_context_intronic_sequence' +p30 +g6 +sS'hgvs_lrg_variant' +p31 +g6 +sS'hgvs_transcript_variant' +p32 +S'NM_001042544.1:c.3233_3235=' +p33 +sS'hgvs_refseqgene_variant' +p34 +g6 +sS'primary_assembly_loci' +p35 +(dp36 +S'grch38' +p37 +(dp38 +S'hgvs_genomic_description' +p39 +S'NC_000019.10:g.40617187_40617189=' +p40 +sS'vcf' +p41 +(dp42 +S'chr' +p43 +S'19' +p44 +sS'ref' +p45 +VAGG +p46 +sS'pos' +p47 +S'40617187' +p48 +sS'alt' +p49 +g46 +sssS'grch37' +p50 +(dp51 +g39 +S'NC_000019.9:g.41123095dup' +p52 +sg41 +(dp53 +g43 +g44 +sg45 +S'G' +p54 +sg47 +S'41123094' +p55 +sg49 +VGG +p56 +sssS'hg38' +p57 +(dp58 +g39 +S'NC_000019.10:g.40617187_40617189=' +p59 +sg41 +(dp60 +g43 +S'chr19' +p61 +sg45 +g46 +sg47 +S'40617187' +p62 +sg49 +g46 +sssS'hg19' +p63 +(dp64 +g39 +S'NC_000019.9:g.41123095dup' +p65 +sg41 +(dp66 +g43 +g61 +sg45 +g54 +sg47 +S'41123094' +p67 +sg49 +VGG +p68 +ssssS'reference_sequence_records' +p69 +(dp70 +S'protein' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1' +p72 +sS'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1' +p74 +sssS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ssS'NM_001042545.1:c.3032_3034=' +p85 +(dp86 +g5 +g6 +sg7 +(lp87 +S'NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG' +p88 +aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' +p89 +aS'NM_001042545.1:c.3032_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p90 +aS'Caution should be used when reporting the displayed variant descriptions' +p91 +aS'If you are unsure, please contact admin' +p92 +aS'RefSeqGene record not available' +p93 +asg15 +g6 +sg16 +(lp94 +sg18 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA +p95 +sg20 +S'LTBP4' +p96 +sg22 +(dp97 +g24 +S'NP_001036010.1:p.(Gln1011=)' +p98 +sg26 +S'NP_001036010.1:p.(Q1011=)' +p99 +ssg28 +g29 +sg30 +g6 +sg31 +g6 +sg32 +S'NM_001042545.1:c.3032_3034=' +p100 +sg34 +g6 +sg35 +(dp101 +S'grch38' +p102 +(dp103 +g39 +S'NC_000019.10:g.40617187_40617189=' +p104 +sg41 +(dp105 +g43 +g44 +sg45 +VAGG +p106 +sg47 +S'40617187' +p107 +sg49 +g106 +sssS'grch37' +p108 +(dp109 +g39 +S'NC_000019.9:g.41123095dup' +p110 +sg41 +(dp111 +g43 +g44 +sg45 +g54 +sg47 +S'41123094' +p112 +sg49 +VGG +p113 +sssg57 +(dp114 +g39 +S'NC_000019.10:g.40617187_40617189=' +p115 +sg41 +(dp116 +g43 +g61 +sg45 +g106 +sg47 +S'40617187' +p117 +sg49 +g106 +sssS'hg19' +p118 +(dp119 +g39 +S'NC_000019.9:g.41123095dup' +p120 +sg41 +(dp121 +g43 +g61 +sg45 +g54 +sg47 +S'41123094' +p122 +sg49 +VGG +p123 +ssssg69 +(dp124 +g71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1' +p125 +sg73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1' +p126 +sssS'NM_003573.2:c.3122_3124=' +p127 +(dp128 +g5 +g6 +sg7 +(lp129 +S'NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG' +p130 +aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' +p131 +aS'NM_003573.2:c.3122_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p132 +aS'Caution should be used when reporting the displayed variant descriptions' +p133 +aS'If you are unsure, please contact admin' +p134 +aS'RefSeqGene record not available' +p135 +asg15 +g6 +sg16 +(lp136 +sg18 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA +p137 +sg20 +S'LTBP4' +p138 +sg22 +(dp139 +g24 +S'NP_003564.2:p.(Gln1041=)' +p140 +sg26 +S'NP_003564.2:p.(Q1041=)' +p141 +ssg28 +g29 +sg30 +g6 +sg31 +g6 +sg32 +S'NM_003573.2:c.3122_3124=' +p142 +sg34 +g6 +sg35 +(dp143 +S'grch38' +p144 +(dp145 +g39 +S'NC_000019.10:g.40617187_40617189=' +p146 +sg41 +(dp147 +g43 +g44 +sg45 +VAGG +p148 +sg47 +S'40617187' +p149 +sg49 +g148 +sssS'grch37' +p150 +(dp151 +g39 +S'NC_000019.9:g.41123095dup' +p152 +sg41 +(dp153 +g43 +g44 +sg45 +g54 +sg47 +S'41123094' +p154 +sg49 +VGG +p155 +sssg57 +(dp156 +g39 +S'NC_000019.10:g.40617187_40617189=' +p157 +sg41 +(dp158 +g43 +g61 +sg45 +g148 +sg47 +S'40617187' +p159 +sg49 +g148 +sssS'hg19' +p160 +(dp161 +g39 +S'NC_000019.9:g.41123095dup' +p162 +sg41 +(dp163 +g43 +g61 +sg45 +g54 +sg47 +S'41123094' +p164 +sg49 +VGG +p165 +ssssg69 +(dp166 +g71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2' +p167 +sg73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2' +p168 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant185.txt b/VariantValidator/testing/testOutputsMasterITS/variant185.txt new file mode 100644 index 00000000..ee61fa54 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant185.txt @@ -0,0 +1,438 @@ +(dp0 +S'NM_003573.2:c.3123G>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT' +p7 +aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' +p8 +aS'NM_003573.2:c.3123 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g4 +sS'alt_genomic_loci' +p14 +(lp15 +sS'transcript_description' +p16 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA +p17 +sS'gene_symbol' +p18 +S'LTBP4' +p19 +sS'hgvs_predicted_protein_consequence' +p20 +(dp21 +S'tlr' +p22 +S'NP_003564.2:p.(Gln1041His)' +p23 +sS'slr' +p24 +S'NP_003564.2:p.(Q1041H)' +p25 +ssS'submitted_variant' +p26 +S'19-41123093-A-AT' +p27 +sS'genome_context_intronic_sequence' +p28 +g4 +sS'hgvs_lrg_variant' +p29 +g4 +sS'hgvs_transcript_variant' +p30 +S'NM_003573.2:c.3123G>T' +p31 +sS'hgvs_refseqgene_variant' +p32 +g4 +sS'primary_assembly_loci' +p33 +(dp34 +S'grch38' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000019.10:g.40617188G>T' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'19' +p42 +sS'ref' +p43 +VG +p44 +sS'pos' +p45 +S'40617188' +p46 +sS'alt' +p47 +VT +p48 +sssS'grch37' +p49 +(dp50 +g37 +S'NC_000019.9:g.41123093_41123094insT' +p51 +sg39 +(dp52 +g41 +g42 +sg43 +S'A' +p53 +sg45 +S'41123093' +p54 +sg47 +VAT +p55 +sssS'hg38' +p56 +(dp57 +g37 +S'NC_000019.10:g.40617188G>T' +p58 +sg39 +(dp59 +g41 +S'chr19' +p60 +sg43 +g44 +sg45 +S'40617188' +p61 +sg47 +g48 +sssS'hg19' +p62 +(dp63 +g37 +S'NC_000019.9:g.41123093_41123094insT' +p64 +sg39 +(dp65 +g41 +g60 +sg43 +g53 +sg45 +S'41123093' +p66 +sg47 +VAT +p67 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2' +p73 +sssS'flag' +p74 +S'gene_variant' +p75 +sS'metadata' +p76 +(dp77 +S'variantvalidator_hgvs_version' +p78 +S'1.1.3' +p79 +sS'uta_schema' +p80 +S'uta_20180821' +p81 +sS'seqrepo_db' +p82 +S'2018-08-21' +p83 +sS'variantvalidator_version' +p84 +S'v0.2' +p85 +ssS'NM_001042545.1:c.3033G>T' +p86 +(dp87 +g3 +g4 +sg5 +(lp88 +S'NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT' +p89 +aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' +p90 +aS'NM_001042545.1:c.3033 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p91 +aS'Caution should be used when reporting the displayed variant descriptions' +p92 +aS'If you are unsure, please contact admin' +p93 +aS'RefSeqGene record not available' +p94 +asg13 +g4 +sg14 +(lp95 +sg16 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA +p96 +sg18 +S'LTBP4' +p97 +sg20 +(dp98 +g22 +S'NP_001036010.1:p.(Gln1011His)' +p99 +sg24 +S'NP_001036010.1:p.(Q1011H)' +p100 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_001042545.1:c.3033G>T' +p101 +sg32 +g4 +sg33 +(dp102 +S'grch38' +p103 +(dp104 +g37 +S'NC_000019.10:g.40617188G>T' +p105 +sg39 +(dp106 +g41 +g42 +sg43 +g44 +sg45 +S'40617188' +p107 +sg47 +g48 +sssS'grch37' +p108 +(dp109 +g37 +S'NC_000019.9:g.41123093_41123094insT' +p110 +sg39 +(dp111 +g41 +g42 +sg43 +g53 +sg45 +S'41123093' +p112 +sg47 +VAT +p113 +sssg56 +(dp114 +g37 +S'NC_000019.10:g.40617188G>T' +p115 +sg39 +(dp116 +g41 +g60 +sg43 +g44 +sg45 +S'40617188' +p117 +sg47 +g48 +sssS'hg19' +p118 +(dp119 +g37 +S'NC_000019.9:g.41123093_41123094insT' +p120 +sg39 +(dp121 +g41 +g60 +sg43 +g53 +sg45 +S'41123093' +p122 +sg47 +VAT +p123 +ssssg68 +(dp124 +g70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1' +p125 +sg72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1' +p126 +sssS'NM_001042544.1:c.3234G>T' +p127 +(dp128 +g3 +g4 +sg5 +(lp129 +S'NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT' +p130 +aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' +p131 +aS'NM_001042544.1:c.3234 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p132 +aS'Caution should be used when reporting the displayed variant descriptions' +p133 +aS'If you are unsure, please contact admin' +p134 +aS'RefSeqGene record not available' +p135 +asg13 +g4 +sg14 +(lp136 +sg16 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA +p137 +sg18 +S'LTBP4' +p138 +sg20 +(dp139 +g22 +S'NP_001036009.1:p.(Gln1078His)' +p140 +sg24 +S'NP_001036009.1:p.(Q1078H)' +p141 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_001042544.1:c.3234G>T' +p142 +sg32 +g4 +sg33 +(dp143 +S'grch38' +p144 +(dp145 +g37 +S'NC_000019.10:g.40617188G>T' +p146 +sg39 +(dp147 +g41 +g42 +sg43 +g44 +sg45 +S'40617188' +p148 +sg47 +g48 +sssS'grch37' +p149 +(dp150 +g37 +S'NC_000019.9:g.41123093_41123094insT' +p151 +sg39 +(dp152 +g41 +g42 +sg43 +g53 +sg45 +S'41123093' +p153 +sg47 +VAT +p154 +sssg56 +(dp155 +g37 +S'NC_000019.10:g.40617188G>T' +p156 +sg39 +(dp157 +g41 +g60 +sg43 +g44 +sg45 +S'40617188' +p158 +sg47 +g48 +sssS'hg19' +p159 +(dp160 +g37 +S'NC_000019.9:g.41123093_41123094insT' +p161 +sg39 +(dp162 +g41 +g60 +sg43 +g53 +sg45 +S'41123093' +p163 +sg47 +VAT +p164 +ssssg68 +(dp165 +g70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1' +p166 +sg72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1' +p167 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant186.txt b/VariantValidator/testing/testOutputsMasterITS/variant186.txt new file mode 100644 index 00000000..9dd11dde --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant186.txt @@ -0,0 +1,442 @@ +(dp0 +S'NM_001042544.1:c.3235_3236del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG' +p7 +aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' +p8 +aS'NM_001042544.1:c.3234_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g4 +sS'alt_genomic_loci' +p14 +(lp15 +sS'transcript_description' +p16 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA +p17 +sS'gene_symbol' +p18 +S'LTBP4' +p19 +sS'hgvs_predicted_protein_consequence' +p20 +(dp21 +S'tlr' +p22 +S'NP_001036009.1:p.(Gly1079LeufsTer17)' +p23 +sS'slr' +p24 +S'NP_001036009.1:p.(G1079Lfs*17)' +p25 +ssS'submitted_variant' +p26 +S'19-41123093-AG-A' +p27 +sS'genome_context_intronic_sequence' +p28 +g4 +sS'hgvs_lrg_variant' +p29 +g4 +sS'hgvs_transcript_variant' +p30 +S'NM_001042544.1:c.3235_3236del' +p31 +sS'hgvs_refseqgene_variant' +p32 +g4 +sS'primary_assembly_loci' +p33 +(dp34 +S'grch38' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000019.10:g.40617189_40617190del' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'19' +p42 +sS'ref' +p43 +S'AGG' +p44 +sS'pos' +p45 +S'40617187' +p46 +sS'alt' +p47 +S'A' +p48 +sssS'grch37' +p49 +(dp50 +g37 +S'NC_000019.9:g.41123095del' +p51 +sg39 +(dp52 +g41 +g42 +sg43 +S'AG' +p53 +sg45 +S'41123093' +p54 +sg47 +g48 +sssS'hg38' +p55 +(dp56 +g37 +S'NC_000019.10:g.40617189_40617190del' +p57 +sg39 +(dp58 +g41 +S'chr19' +p59 +sg43 +S'AGG' +p60 +sg45 +S'40617187' +p61 +sg47 +g48 +sssS'hg19' +p62 +(dp63 +g37 +S'NC_000019.9:g.41123095del' +p64 +sg39 +(dp65 +g41 +g59 +sg43 +S'AG' +p66 +sg45 +S'41123093' +p67 +sg47 +g48 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1' +p73 +sssS'flag' +p74 +S'gene_variant' +p75 +sS'NM_001042545.1:c.3034_3035del' +p76 +(dp77 +g3 +g4 +sg5 +(lp78 +S'NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG' +p79 +aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' +p80 +aS'NM_001042545.1:c.3033_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p81 +aS'Caution should be used when reporting the displayed variant descriptions' +p82 +aS'If you are unsure, please contact admin' +p83 +aS'RefSeqGene record not available' +p84 +asg13 +g4 +sg14 +(lp85 +sg16 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA +p86 +sg18 +S'LTBP4' +p87 +sg20 +(dp88 +g22 +S'NP_001036010.1:p.(Gly1012LeufsTer17)' +p89 +sg24 +S'NP_001036010.1:p.(G1012Lfs*17)' +p90 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_001042545.1:c.3034_3035del' +p91 +sg32 +g4 +sg33 +(dp92 +S'grch38' +p93 +(dp94 +g37 +S'NC_000019.10:g.40617189_40617190del' +p95 +sg39 +(dp96 +g41 +g42 +sg43 +S'AGG' +p97 +sg45 +S'40617187' +p98 +sg47 +g48 +sssS'grch37' +p99 +(dp100 +g37 +S'NC_000019.9:g.41123095del' +p101 +sg39 +(dp102 +g41 +g42 +sg43 +S'AG' +p103 +sg45 +S'41123093' +p104 +sg47 +g48 +sssg55 +(dp105 +g37 +S'NC_000019.10:g.40617189_40617190del' +p106 +sg39 +(dp107 +g41 +g59 +sg43 +S'AGG' +p108 +sg45 +S'40617187' +p109 +sg47 +g48 +sssS'hg19' +p110 +(dp111 +g37 +S'NC_000019.9:g.41123095del' +p112 +sg39 +(dp113 +g41 +g59 +sg43 +S'AG' +p114 +sg45 +S'41123093' +p115 +sg47 +g48 +ssssg68 +(dp116 +g70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1' +p117 +sg72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1' +p118 +sssS'NM_003573.2:c.3124_3125del' +p119 +(dp120 +g3 +g4 +sg5 +(lp121 +S'NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG' +p122 +aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' +p123 +aS'NM_003573.2:c.3123_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p124 +aS'Caution should be used when reporting the displayed variant descriptions' +p125 +aS'If you are unsure, please contact admin' +p126 +aS'RefSeqGene record not available' +p127 +asg13 +g4 +sg14 +(lp128 +sg16 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA +p129 +sg18 +S'LTBP4' +p130 +sg20 +(dp131 +g22 +S'NP_003564.2:p.(Gly1042LeufsTer17)' +p132 +sg24 +S'NP_003564.2:p.(G1042Lfs*17)' +p133 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_003573.2:c.3124_3125del' +p134 +sg32 +g4 +sg33 +(dp135 +S'grch38' +p136 +(dp137 +g37 +S'NC_000019.10:g.40617189_40617190del' +p138 +sg39 +(dp139 +g41 +g42 +sg43 +S'AGG' +p140 +sg45 +S'40617187' +p141 +sg47 +g48 +sssS'grch37' +p142 +(dp143 +g37 +S'NC_000019.9:g.41123095del' +p144 +sg39 +(dp145 +g41 +g42 +sg43 +S'AG' +p146 +sg45 +S'41123093' +p147 +sg47 +g48 +sssg55 +(dp148 +g37 +S'NC_000019.10:g.40617189_40617190del' +p149 +sg39 +(dp150 +g41 +g59 +sg43 +S'AGG' +p151 +sg45 +S'40617187' +p152 +sg47 +g48 +sssS'hg19' +p153 +(dp154 +g37 +S'NC_000019.9:g.41123095del' +p155 +sg39 +(dp156 +g41 +g59 +sg43 +S'AG' +p157 +sg45 +S'41123093' +p158 +sg47 +g48 +ssssg68 +(dp159 +g70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2' +p160 +sg72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2' +p161 +sssS'metadata' +p162 +(dp163 +S'variantvalidator_hgvs_version' +p164 +S'1.1.3' +p165 +sS'uta_schema' +p166 +S'uta_20180821' +p167 +sS'seqrepo_db' +p168 +S'2018-08-21' +p169 +sS'variantvalidator_version' +p170 +S'v0.2' +p171 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant187.txt b/VariantValidator/testing/testOutputsMasterITS/variant187.txt new file mode 100644 index 00000000..01c0951e --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant187.txt @@ -0,0 +1,437 @@ +(dp0 +S'NM_001042545.1:c.3035del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=' +p7 +aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' +p8 +aS'NM_001042545.1:c.3033 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g4 +sS'alt_genomic_loci' +p14 +(lp15 +sS'transcript_description' +p16 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA +p17 +sS'gene_symbol' +p18 +S'LTBP4' +p19 +sS'hgvs_predicted_protein_consequence' +p20 +(dp21 +S'tlr' +p22 +S'NP_001036010.1:p.(Gly1012ValfsTer14)' +p23 +sS'slr' +p24 +S'NP_001036010.1:p.(G1012Vfs*14)' +p25 +ssS'submitted_variant' +p26 +S'19-41123093-AG-AG' +p27 +sS'genome_context_intronic_sequence' +p28 +g4 +sS'hgvs_lrg_variant' +p29 +g4 +sS'hgvs_transcript_variant' +p30 +S'NM_001042545.1:c.3035del' +p31 +sS'hgvs_refseqgene_variant' +p32 +g4 +sS'primary_assembly_loci' +p33 +(dp34 +S'grch38' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000019.10:g.40617190del' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'19' +p42 +sS'ref' +p43 +S'AG' +p44 +sS'pos' +p45 +S'40617187' +p46 +sS'alt' +p47 +S'A' +p48 +sssS'grch37' +p49 +(dp50 +g37 +S'NC_000019.9:g.41123093_41123094=' +p51 +sg39 +(dp52 +g41 +g42 +sg43 +S'AG' +p53 +sg45 +S'41123093' +p54 +sg47 +g53 +sssS'hg38' +p55 +(dp56 +g37 +S'NC_000019.10:g.40617190del' +p57 +sg39 +(dp58 +g41 +S'chr19' +p59 +sg43 +S'AG' +p60 +sg45 +S'40617187' +p61 +sg47 +g48 +sssS'hg19' +p62 +(dp63 +g37 +S'NC_000019.9:g.41123093_41123094=' +p64 +sg39 +(dp65 +g41 +g59 +sg43 +g53 +sg45 +S'41123093' +p66 +sg47 +g53 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1' +p72 +sssS'flag' +p73 +S'gene_variant' +p74 +sS'NM_001042544.1:c.3236del' +p75 +(dp76 +g3 +g4 +sg5 +(lp77 +S'NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=' +p78 +aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' +p79 +aS'NM_001042544.1:c.3234 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p80 +aS'Caution should be used when reporting the displayed variant descriptions' +p81 +aS'If you are unsure, please contact admin' +p82 +aS'RefSeqGene record not available' +p83 +asg13 +g4 +sg14 +(lp84 +sg16 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA +p85 +sg18 +S'LTBP4' +p86 +sg20 +(dp87 +g22 +S'NP_001036009.1:p.(Gly1079ValfsTer14)' +p88 +sg24 +S'NP_001036009.1:p.(G1079Vfs*14)' +p89 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_001042544.1:c.3236del' +p90 +sg32 +g4 +sg33 +(dp91 +S'grch38' +p92 +(dp93 +g37 +S'NC_000019.10:g.40617190del' +p94 +sg39 +(dp95 +g41 +g42 +sg43 +S'AG' +p96 +sg45 +S'40617187' +p97 +sg47 +g48 +sssS'grch37' +p98 +(dp99 +g37 +S'NC_000019.9:g.41123093_41123094=' +p100 +sg39 +(dp101 +g41 +g42 +sg43 +g53 +sg45 +S'41123093' +p102 +sg47 +g53 +sssg55 +(dp103 +g37 +S'NC_000019.10:g.40617190del' +p104 +sg39 +(dp105 +g41 +g59 +sg43 +S'AG' +p106 +sg45 +S'40617187' +p107 +sg47 +g48 +sssS'hg19' +p108 +(dp109 +g37 +S'NC_000019.9:g.41123093_41123094=' +p110 +sg39 +(dp111 +g41 +g59 +sg43 +g53 +sg45 +S'41123093' +p112 +sg47 +g53 +ssssg67 +(dp113 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1' +p114 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1' +p115 +sssS'NM_003573.2:c.3125del' +p116 +(dp117 +g3 +g4 +sg5 +(lp118 +S'NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=' +p119 +aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' +p120 +aS'NM_003573.2:c.3123 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p121 +aS'Caution should be used when reporting the displayed variant descriptions' +p122 +aS'If you are unsure, please contact admin' +p123 +aS'RefSeqGene record not available' +p124 +asg13 +g4 +sg14 +(lp125 +sg16 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA +p126 +sg18 +S'LTBP4' +p127 +sg20 +(dp128 +g22 +S'NP_003564.2:p.(Gly1042ValfsTer14)' +p129 +sg24 +S'NP_003564.2:p.(G1042Vfs*14)' +p130 +ssg26 +g27 +sg28 +g4 +sg29 +g4 +sg30 +S'NM_003573.2:c.3125del' +p131 +sg32 +g4 +sg33 +(dp132 +S'grch38' +p133 +(dp134 +g37 +S'NC_000019.10:g.40617190del' +p135 +sg39 +(dp136 +g41 +g42 +sg43 +S'AG' +p137 +sg45 +S'40617187' +p138 +sg47 +g48 +sssS'grch37' +p139 +(dp140 +g37 +S'NC_000019.9:g.41123093_41123094=' +p141 +sg39 +(dp142 +g41 +g42 +sg43 +g53 +sg45 +S'41123093' +p143 +sg47 +g53 +sssg55 +(dp144 +g37 +S'NC_000019.10:g.40617190del' +p145 +sg39 +(dp146 +g41 +g59 +sg43 +S'AG' +p147 +sg45 +S'40617187' +p148 +sg47 +g48 +sssS'hg19' +p149 +(dp150 +g37 +S'NC_000019.9:g.41123093_41123094=' +p151 +sg39 +(dp152 +g41 +g59 +sg43 +g53 +sg45 +S'41123093' +p153 +sg47 +g53 +ssssg67 +(dp154 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2' +p155 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2' +p156 +sssS'metadata' +p157 +(dp158 +S'variantvalidator_hgvs_version' +p159 +S'1.1.3' +p160 +sS'uta_schema' +p161 +S'uta_20180821' +p162 +sS'seqrepo_db' +p163 +S'2018-08-21' +p164 +sS'variantvalidator_version' +p165 +S'v0.2' +p166 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant188.txt b/VariantValidator/testing/testOutputsMasterITS/variant188.txt new file mode 100644 index 00000000..7c94f8d1 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant188.txt @@ -0,0 +1,180 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_012309.4:c.913-5058G>A' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +aS'NM_012309.4:c.913-5058G>A cannot be mapped directly to genome build GRCh37' +p10 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g6 +sS'alt_genomic_loci' +p13 +(lp14 +(dp15 +S'grch37' +p16 +(dp17 +S'hgvs_genomic_description' +p18 +S'NW_004070871.1:g.574546C>T' +p19 +sS'vcf' +p20 +(dp21 +S'chr' +p22 +S'HG865_PATCH' +p23 +sS'ref' +p24 +VC +p25 +sS'pos' +p26 +S'574546' +p27 +sS'alt' +p28 +VT +p29 +sssa(dp30 +S'hg19' +p31 +(dp32 +g18 +S'NW_004070871.1:g.574546C>T' +p33 +sg20 +(dp34 +g22 +S'NW_004070871.1' +p35 +sg24 +g25 +sg26 +S'574546' +p36 +sg28 +g29 +sssasS'transcript_description' +p37 +VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA +p38 +sS'gene_symbol' +p39 +S'SHANK2' +p40 +sS'hgvs_predicted_protein_consequence' +p41 +(dp42 +S'tlr' +p43 +S'NP_036441.2:p.?' +p44 +sS'slr' +p45 +S'NP_036441.2:p.?' +p46 +ssS'submitted_variant' +p47 +S'NM_012309.4:c.913-5058G>A' +p48 +sS'genome_context_intronic_sequence' +p49 +S'NC_000011.10(NM_012309.4):c.913-5058G>A' +p50 +sS'hgvs_lrg_variant' +p51 +g6 +sS'hgvs_transcript_variant' +p52 +S'NM_012309.4:c.913-5058G>A' +p53 +sS'hgvs_refseqgene_variant' +p54 +g6 +sS'primary_assembly_loci' +p55 +(dp56 +S'grch38' +p57 +(dp58 +g18 +S'NC_000011.10:g.71080333C>T' +p59 +sg20 +(dp60 +g22 +S'11' +p61 +sg24 +g25 +sg26 +S'71080333' +p62 +sg28 +g29 +sssS'hg38' +p63 +(dp64 +g18 +S'NC_000011.10:g.71080333C>T' +p65 +sg20 +(dp66 +g22 +S'chr11' +p67 +sg24 +g25 +sg26 +S'71080333' +p68 +sg28 +g29 +ssssS'reference_sequence_records' +p69 +(dp70 +S'protein' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2' +p72 +sS'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4' +p74 +sssS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant189.txt b/VariantValidator/testing/testOutputsMasterITS/variant189.txt new file mode 100644 index 00000000..75351fd0 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant189.txt @@ -0,0 +1,121 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Automap has extracted possible variant descriptions' +p7 +aS'No transcript definition for (tx_ac=LRG_199t1)' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +g4 +sS'gene_symbol' +p13 +g4 +sS'hgvs_predicted_protein_consequence' +p14 +(dp15 +S'tlr' +p16 +g4 +sS'slr' +p17 +g4 +ssS'submitted_variant' +p18 +S'LRG_199t1:c.2376[G>C];[G>C]' +p19 +sS'genome_context_intronic_sequence' +p20 +g4 +sS'hgvs_lrg_variant' +p21 +g4 +sS'hgvs_transcript_variant' +p22 +g4 +sS'hgvs_refseqgene_variant' +p23 +g4 +sS'primary_assembly_loci' +p24 +(dp25 +sS'reference_sequence_records' +p26 +g4 +ssS'flag' +p27 +S'warning' +p28 +sS'validation_warning_2' +p29 +(dp30 +g3 +g4 +sg5 +(lp31 +S'Automap has extracted possible variant descriptions' +p32 +aS'No transcript definition for (tx_ac=LRG_199t1)' +p33 +asg9 +g4 +sg10 +(lp34 +sg12 +g4 +sg13 +g4 +sg14 +(dp35 +g16 +g4 +sg17 +g4 +ssg18 +g19 +sg20 +g4 +sg21 +g4 +sg22 +g4 +sg23 +g4 +sg24 +(dp36 +sg26 +g4 +ssS'metadata' +p37 +(dp38 +S'variantvalidator_hgvs_version' +p39 +S'1.1.3' +p40 +sS'uta_schema' +p41 +S'uta_20180821' +p42 +sS'seqrepo_db' +p43 +S'2018-08-21' +p44 +sS'variantvalidator_version' +p45 +S'v0.2' +p46 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant19.txt b/VariantValidator/testing/testOutputsMasterITS/variant19.txt new file mode 100644 index 00000000..45468d9c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant19.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000011.9:g.5248381A=' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NM_000518.4:c.-50-80C>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant190.txt b/VariantValidator/testing/testOutputsMasterITS/variant190.txt new file mode 100644 index 00000000..406b2fbf --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant190.txt @@ -0,0 +1,23 @@ +(dp0 +S'flag' +p1 +NsS'metadata' +p2 +(dp3 +S'variantvalidator_hgvs_version' +p4 +S'1.1.3' +p5 +sS'uta_schema' +p6 +S'uta_20180821' +p7 +sS'seqrepo_db' +p8 +S'2018-08-21' +p9 +sS'variantvalidator_version' +p10 +S'v0.2' +p11 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant191.txt b/VariantValidator/testing/testOutputsMasterITS/variant191.txt new file mode 100644 index 00000000..406b2fbf --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant191.txt @@ -0,0 +1,23 @@ +(dp0 +S'flag' +p1 +NsS'metadata' +p2 +(dp3 +S'variantvalidator_hgvs_version' +p4 +S'1.1.3' +p5 +sS'uta_schema' +p6 +S'uta_20180821' +p7 +sS'seqrepo_db' +p8 +S'2018-08-21' +p9 +sS'variantvalidator_version' +p10 +S'v0.2' +p11 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant192.txt b/VariantValidator/testing/testOutputsMasterITS/variant192.txt new file mode 100644 index 00000000..6241f0af --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant192.txt @@ -0,0 +1,121 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Automap has extracted possible variant descriptions' +p7 +aS'No transcript definition for (tx_ac=LRG_199t1)' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +g4 +sS'gene_symbol' +p13 +g4 +sS'hgvs_predicted_protein_consequence' +p14 +(dp15 +S'tlr' +p16 +g4 +sS'slr' +p17 +g4 +ssS'submitted_variant' +p18 +S'LRG_199t1:c.2376G>C(;)3103del' +p19 +sS'genome_context_intronic_sequence' +p20 +g4 +sS'hgvs_lrg_variant' +p21 +g4 +sS'hgvs_transcript_variant' +p22 +g4 +sS'hgvs_refseqgene_variant' +p23 +g4 +sS'primary_assembly_loci' +p24 +(dp25 +sS'reference_sequence_records' +p26 +g4 +ssS'flag' +p27 +S'warning' +p28 +sS'validation_warning_2' +p29 +(dp30 +g3 +g4 +sg5 +(lp31 +S'Automap has extracted possible variant descriptions' +p32 +aS'No transcript definition for (tx_ac=LRG_199t1)' +p33 +asg9 +g4 +sg10 +(lp34 +sg12 +g4 +sg13 +g4 +sg14 +(dp35 +g16 +g4 +sg17 +g4 +ssg18 +g19 +sg20 +g4 +sg21 +g4 +sg22 +g4 +sg23 +g4 +sg24 +(dp36 +sg26 +g4 +ssS'metadata' +p37 +(dp38 +S'variantvalidator_hgvs_version' +p39 +S'1.1.3' +p40 +sS'uta_schema' +p41 +S'uta_20180821' +p42 +sS'seqrepo_db' +p43 +S'2018-08-21' +p44 +sS'variantvalidator_version' +p45 +S'v0.2' +p46 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant193.txt b/VariantValidator/testing/testOutputsMasterITS/variant193.txt new file mode 100644 index 00000000..a6a49aff --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant193.txt @@ -0,0 +1,82 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Automap has extracted possible variant descriptions' +p7 +aS'No transcript definition for (tx_ac=LRG_199t1)' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +g4 +sS'gene_symbol' +p13 +g4 +sS'hgvs_predicted_protein_consequence' +p14 +(dp15 +S'tlr' +p16 +g4 +sS'slr' +p17 +g4 +ssS'submitted_variant' +p18 +S'LRG_199t1:c.2376[G>C];[(G>C)]' +p19 +sS'genome_context_intronic_sequence' +p20 +g4 +sS'hgvs_lrg_variant' +p21 +g4 +sS'hgvs_transcript_variant' +p22 +g4 +sS'hgvs_refseqgene_variant' +p23 +g4 +sS'primary_assembly_loci' +p24 +(dp25 +sS'reference_sequence_records' +p26 +g4 +ssS'flag' +p27 +S'warning' +p28 +sS'metadata' +p29 +(dp30 +S'variantvalidator_hgvs_version' +p31 +S'1.1.3' +p32 +sS'uta_schema' +p33 +S'uta_20180821' +p34 +sS'seqrepo_db' +p35 +S'2018-08-21' +p36 +sS'variantvalidator_version' +p37 +S'v0.2' +p38 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant194.txt b/VariantValidator/testing/testOutputsMasterITS/variant194.txt new file mode 100644 index 00000000..406b2fbf --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant194.txt @@ -0,0 +1,23 @@ +(dp0 +S'flag' +p1 +NsS'metadata' +p2 +(dp3 +S'variantvalidator_hgvs_version' +p4 +S'1.1.3' +p5 +sS'uta_schema' +p6 +S'uta_20180821' +p7 +sS'seqrepo_db' +p8 +S'2018-08-21' +p9 +sS'variantvalidator_version' +p10 +S'v0.2' +p11 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant195.txt b/VariantValidator/testing/testOutputsMasterITS/variant195.txt new file mode 100644 index 00000000..406b2fbf --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant195.txt @@ -0,0 +1,23 @@ +(dp0 +S'flag' +p1 +NsS'metadata' +p2 +(dp3 +S'variantvalidator_hgvs_version' +p4 +S'1.1.3' +p5 +sS'uta_schema' +p6 +S'uta_20180821' +p7 +sS'seqrepo_db' +p8 +S'2018-08-21' +p9 +sS'variantvalidator_version' +p10 +S'v0.2' +p11 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant196.txt b/VariantValidator/testing/testOutputsMasterITS/variant196.txt new file mode 100644 index 00000000..406b2fbf --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant196.txt @@ -0,0 +1,23 @@ +(dp0 +S'flag' +p1 +NsS'metadata' +p2 +(dp3 +S'variantvalidator_hgvs_version' +p4 +S'1.1.3' +p5 +sS'uta_schema' +p6 +S'uta_20180821' +p7 +sS'seqrepo_db' +p8 +S'2018-08-21' +p9 +sS'variantvalidator_version' +p10 +S'v0.2' +p11 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant197.txt b/VariantValidator/testing/testOutputsMasterITS/variant197.txt new file mode 100644 index 00000000..e1b8214b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant197.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Intronic positions not supported for HGVS Allele descriptions' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'LRG_199t1:c.[976-20T>A;976-17_976-1dup]' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant198.txt b/VariantValidator/testing/testOutputsMasterITS/variant198.txt new file mode 100644 index 00000000..e445129d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant198.txt @@ -0,0 +1,606 @@ +(dp0 +S'NM_015102.3:c.2818-2T>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'A more recent version of the selected reference sequence NM_015102.3 is available (NM_015102.4)' +p7 +aS'NM_015102.4:c.2818-2T>A MUST be fully validated prior to use in reports' +p8 +aS'select_variants=NM_015102.4:c.2818-2T>A' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g4 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens nephronophthisis 4 (NPHP4), mRNA +p15 +sS'gene_symbol' +p16 +S'NPHP4' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_055917.1:p.?' +p21 +sS'slr' +p22 +S'NP_055917.1:p.?' +p23 +ssS'submitted_variant' +p24 +S'1-5935162-A-T' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000001.10(NM_015102.3):c.2818-2T>A' +p27 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_015102.3:c.2818-2T>A' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000001.10:g.5935162A>T' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr1' +p41 +sS'ref' +p42 +VA +p43 +sS'pos' +p44 +S'5935162' +p45 +sS'alt' +p46 +VT +p47 +sssS'grch37' +p48 +(dp49 +g36 +S'NC_000001.10:g.5935162A>T' +p50 +sg38 +(dp51 +g40 +S'1' +p52 +sg42 +g43 +sg44 +S'5935162' +p53 +sg46 +g47 +ssssS'reference_sequence_records' +p54 +(dp55 +S'protein' +p56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1' +p57 +sS'transcript' +p58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3' +p59 +sssS'NM_001291593.1:c.1279-2T>A' +p60 +(dp61 +g3 +g4 +sg5 +(lp62 +S'RefSeqGene record not available' +p63 +asg11 +g4 +sg12 +(lp64 +sg14 +VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 2, mRNA +p65 +sg16 +S'NPHP4' +p66 +sg18 +(dp67 +g20 +S'NP_001278522.1:p.?' +p68 +sg22 +S'NP_001278522.1:p.?' +p69 +ssg24 +g25 +sg26 +S'NC_000001.10(NM_001291593.1):c.1279-2T>A' +p70 +sg28 +g4 +sg29 +S'NM_001291593.1:c.1279-2T>A' +p71 +sg31 +g4 +sg32 +(dp72 +S'hg19' +p73 +(dp74 +g36 +S'NC_000001.10:g.5935162A>T' +p75 +sg38 +(dp76 +g40 +g41 +sg42 +g43 +sg44 +S'5935162' +p77 +sg46 +g47 +sssS'hg38' +p78 +(dp79 +g36 +S'NC_000001.11:g.5875102T=' +p80 +sg38 +(dp81 +g40 +g41 +sg42 +S'T' +p82 +sg44 +S'5875102' +p83 +sg46 +g82 +sssS'grch37' +p84 +(dp85 +g36 +S'NC_000001.10:g.5935162A>T' +p86 +sg38 +(dp87 +g40 +g52 +sg42 +g43 +sg44 +S'5935162' +p88 +sg46 +g47 +sssS'grch38' +p89 +(dp90 +g36 +S'NC_000001.11:g.5875102T=' +p91 +sg38 +(dp92 +g40 +g52 +sg42 +g82 +sg44 +S'5875102' +p93 +sg46 +g82 +ssssg54 +(dp94 +g56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278522.1' +p95 +sg58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291593.1' +p96 +sssS'NM_015102.4:c.2818-2T>A' +p97 +(dp98 +g3 +g4 +sg5 +(lp99 +S'RefSeqGene record not available' +p100 +asg11 +g4 +sg12 +(lp101 +sg14 +VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 1, mRNA +p102 +sg16 +S'NPHP4' +p103 +sg18 +(dp104 +g20 +S'NP_055917.1:p.?' +p105 +sg22 +S'NP_055917.1:p.?' +p106 +ssg24 +g25 +sg26 +S'NC_000001.10(NM_015102.4):c.2818-2T>A' +p107 +sg28 +g4 +sg29 +S'NM_015102.4:c.2818-2T>A' +p108 +sg31 +g4 +sg32 +(dp109 +S'hg19' +p110 +(dp111 +g36 +S'NC_000001.10:g.5935162A>T' +p112 +sg38 +(dp113 +g40 +g41 +sg42 +g43 +sg44 +S'5935162' +p114 +sg46 +g47 +sssg78 +(dp115 +g36 +S'NC_000001.11:g.5875102T=' +p116 +sg38 +(dp117 +g40 +g41 +sg42 +g82 +sg44 +S'5875102' +p118 +sg46 +g82 +sssS'grch37' +p119 +(dp120 +g36 +S'NC_000001.10:g.5935162A>T' +p121 +sg38 +(dp122 +g40 +g52 +sg42 +g43 +sg44 +S'5935162' +p123 +sg46 +g47 +sssS'grch38' +p124 +(dp125 +g36 +S'NC_000001.11:g.5875102T=' +p126 +sg38 +(dp127 +g40 +g52 +sg42 +g82 +sg44 +S'5875102' +p128 +sg46 +g82 +ssssg54 +(dp129 +g56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1' +p130 +sg58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.4' +p131 +sssS'NM_001291594.1:c.1282-2T>A' +p132 +(dp133 +g3 +g4 +sg5 +(lp134 +S'RefSeqGene record not available' +p135 +asg11 +g4 +sg12 +(lp136 +sg14 +VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 3, mRNA +p137 +sg16 +S'NPHP4' +p138 +sg18 +(dp139 +g20 +S'NP_001278523.1:p.?' +p140 +sg22 +S'NP_001278523.1:p.?' +p141 +ssg24 +g25 +sg26 +S'NC_000001.10(NM_001291594.1):c.1282-2T>A' +p142 +sg28 +g4 +sg29 +S'NM_001291594.1:c.1282-2T>A' +p143 +sg31 +g4 +sg32 +(dp144 +S'hg19' +p145 +(dp146 +g36 +S'NC_000001.10:g.5935162A>T' +p147 +sg38 +(dp148 +g40 +g41 +sg42 +g43 +sg44 +S'5935162' +p149 +sg46 +g47 +sssg78 +(dp150 +g36 +S'NC_000001.11:g.5875102T=' +p151 +sg38 +(dp152 +g40 +g41 +sg42 +g82 +sg44 +S'5875102' +p153 +sg46 +g82 +sssS'grch37' +p154 +(dp155 +g36 +S'NC_000001.10:g.5935162A>T' +p156 +sg38 +(dp157 +g40 +g52 +sg42 +g43 +sg44 +S'5935162' +p158 +sg46 +g47 +sssS'grch38' +p159 +(dp160 +g36 +S'NC_000001.11:g.5875102T=' +p161 +sg38 +(dp162 +g40 +g52 +sg42 +g82 +sg44 +S'5875102' +p163 +sg46 +g82 +ssssg54 +(dp164 +g56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278523.1' +p165 +sg58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291594.1' +p166 +sssS'flag' +p167 +S'gene_variant' +p168 +sS'NR_111987.1:n.3633-2T>A' +p169 +(dp170 +g3 +g4 +sg5 +(lp171 +S'RefSeqGene record not available' +p172 +asg11 +g4 +sg12 +(lp173 +sg14 +VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 4, non-coding RNA +p174 +sg16 +S'NPHP4' +p175 +sg18 +(dp176 +g20 +S'Non-coding :n.' +p177 +sg22 +g177 +ssg24 +g25 +sg26 +S'NC_000001.10(NR_111987.1):c.3633-2T>A' +p178 +sg28 +g4 +sg29 +S'NR_111987.1:n.3633-2T>A' +p179 +sg31 +g4 +sg32 +(dp180 +S'hg19' +p181 +(dp182 +g36 +S'NC_000001.10:g.5935162A>T' +p183 +sg38 +(dp184 +g40 +g41 +sg42 +g43 +sg44 +S'5935162' +p185 +sg46 +g47 +sssg78 +(dp186 +g36 +S'NC_000001.11:g.5875102T=' +p187 +sg38 +(dp188 +g40 +g41 +sg42 +g82 +sg44 +S'5875102' +p189 +sg46 +g82 +sssS'grch37' +p190 +(dp191 +g36 +S'NC_000001.10:g.5935162A>T' +p192 +sg38 +(dp193 +g40 +g52 +sg42 +g43 +sg44 +S'5935162' +p194 +sg46 +g47 +sssS'grch38' +p195 +(dp196 +g36 +S'NC_000001.11:g.5875102T=' +p197 +sg38 +(dp198 +g40 +g52 +sg42 +g82 +sg44 +S'5875102' +p199 +sg46 +g82 +ssssg54 +(dp200 +g58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_111987.1' +p201 +sssS'metadata' +p202 +(dp203 +S'variantvalidator_hgvs_version' +p204 +S'1.1.3' +p205 +sS'uta_schema' +p206 +S'uta_20180821' +p207 +sS'seqrepo_db' +p208 +S'2018-08-21' +p209 +sS'variantvalidator_version' +p210 +S'v0.2' +p211 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant199.txt b/VariantValidator/testing/testOutputsMasterITS/variant199.txt new file mode 100644 index 00000000..7ff7a9a6 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant199.txt @@ -0,0 +1,286 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_001127660.1:c.1676C>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens mitofusin 2 (MFN2), transcript variant 2, mRNA +p14 +sS'gene_symbol' +p15 +S'MFN2' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_001121132.1:p.(Pro559Leu)' +p20 +sS'slr' +p21 +S'NP_001121132.1:p.(P559L)' +p22 +ssS'submitted_variant' +p23 +S'1-12065948-C-T' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_001127660.1:c.1676C>T' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000001.10:g.12065948C>T' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr1' +p39 +sS'ref' +p40 +S'C' +p41 +sS'pos' +p42 +S'12065948' +p43 +sS'alt' +p44 +S'T' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000001.11:g.12005891C>T' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'12005891' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000001.10:g.12065948C>T' +p53 +sg36 +(dp54 +g38 +S'1' +p55 +sg40 +g41 +sg42 +S'12065948' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000001.11:g.12005891C>T' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'12005891' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001121132.1' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001127660.1' +p67 +sssS'NM_014874.3:c.1676C>T' +p68 +(dp69 +g5 +g6 +sg7 +(lp70 +S'RefSeqGene record not available' +p71 +asg10 +g6 +sg11 +(lp72 +sg13 +VHomo sapiens mitofusin 2 (MFN2), transcript variant 1, mRNA +p73 +sg15 +S'MFN2' +p74 +sg17 +(dp75 +g19 +S'NP_055689.1:p.(Pro559Leu)' +p76 +sg21 +S'NP_055689.1:p.(P559L)' +p77 +ssg23 +g24 +sg25 +g6 +sg26 +g6 +sg27 +S'NM_014874.3:c.1676C>T' +p78 +sg29 +g6 +sg30 +(dp79 +S'hg19' +p80 +(dp81 +g34 +S'NC_000001.10:g.12065948C>T' +p82 +sg36 +(dp83 +g38 +g39 +sg40 +g41 +sg42 +S'12065948' +p84 +sg44 +g45 +sssg46 +(dp85 +g34 +S'NC_000001.11:g.12005891C>T' +p86 +sg36 +(dp87 +g38 +g39 +sg40 +g41 +sg42 +S'12005891' +p88 +sg44 +g45 +sssS'grch37' +p89 +(dp90 +g34 +S'NC_000001.10:g.12065948C>T' +p91 +sg36 +(dp92 +g38 +g55 +sg40 +g41 +sg42 +S'12065948' +p93 +sg44 +g45 +sssS'grch38' +p94 +(dp95 +g34 +S'NC_000001.11:g.12005891C>T' +p96 +sg36 +(dp97 +g38 +g55 +sg40 +g41 +sg42 +S'12005891' +p98 +sg44 +g45 +ssssg62 +(dp99 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055689.1' +p100 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014874.3' +p101 +sssS'metadata' +p102 +(dp103 +S'variantvalidator_hgvs_version' +p104 +S'1.1.3' +p105 +sS'uta_schema' +p106 +S'uta_20180821' +p107 +sS'seqrepo_db' +p108 +S'2018-08-21' +p109 +sS'variantvalidator_version' +p110 +S'v0.2' +p111 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant2.txt b/VariantValidator/testing/testOutputsMasterITS/variant2.txt new file mode 100644 index 00000000..b4c0701c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant2.txt @@ -0,0 +1,174 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_015120.4:c.39G>C' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA +p14 +sS'gene_symbol' +p15 +S'ALMS1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_055935.4:p.(Glu13Asp)' +p20 +sS'slr' +p21 +S'NP_055935.4:p.(E13D)' +p22 +ssS'submitted_variant' +p23 +S'NM_015120.4:c.39G>C' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_015120.4:c.39G>C' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000002.11:g.73613034_73613035insCGA' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr2' +p39 +sS'ref' +p40 +S'G' +p41 +sS'pos' +p42 +S'73613032' +p43 +sS'alt' +p44 +S'GGAC' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000002.12:g.73385906_73385907insCGA' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'73385904' +p50 +sg44 +S'GGAC' +p51 +sssS'grch37' +p52 +(dp53 +g34 +S'NC_000002.11:g.73613034_73613035insCGA' +p54 +sg36 +(dp55 +g38 +S'2' +p56 +sg40 +g41 +sg42 +S'73613032' +p57 +sg44 +S'GGAC' +p58 +sssS'grch38' +p59 +(dp60 +g34 +S'NC_000002.12:g.73385906_73385907insCGA' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +g41 +sg42 +S'73385904' +p63 +sg44 +S'GGAC' +p64 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' +p70 +sssS'metadata' +p71 +(dp72 +S'variantvalidator_hgvs_version' +p73 +S'1.1.3' +p74 +sS'uta_schema' +p75 +S'uta_20180821' +p76 +sS'seqrepo_db' +p77 +S'2018-08-21' +p78 +sS'variantvalidator_version' +p79 +S'v0.2' +p80 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant20.txt b/VariantValidator/testing/testOutputsMasterITS/variant20.txt new file mode 100644 index 00000000..0a044332 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant20.txt @@ -0,0 +1,82 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' +p7 +aS'Instead use NC_000011.9:g.5246486_5246956delinsAAGTAG' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +g4 +sS'gene_symbol' +p13 +g4 +sS'hgvs_predicted_protein_consequence' +p14 +(dp15 +S'tlr' +p16 +g4 +sS'slr' +p17 +g4 +ssS'submitted_variant' +p18 +S'NM_000518.4:c.316_*342delinsCTACTT' +p19 +sS'genome_context_intronic_sequence' +p20 +g4 +sS'hgvs_lrg_variant' +p21 +g4 +sS'hgvs_transcript_variant' +p22 +g4 +sS'hgvs_refseqgene_variant' +p23 +g4 +sS'primary_assembly_loci' +p24 +(dp25 +sS'reference_sequence_records' +p26 +g4 +ssS'flag' +p27 +S'warning' +p28 +sS'metadata' +p29 +(dp30 +S'variantvalidator_hgvs_version' +p31 +S'1.1.3' +p32 +sS'uta_schema' +p33 +S'uta_20180821' +p34 +sS'seqrepo_db' +p35 +S'2018-08-21' +p36 +sS'variantvalidator_version' +p37 +S'v0.2' +p38 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant200.txt b/VariantValidator/testing/testOutputsMasterITS/variant200.txt new file mode 100644 index 00000000..8f017a82 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant200.txt @@ -0,0 +1,543 @@ +(dp0 +S'NM_001290129.1:c.1829+5_1829+8del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 3, mRNA +p13 +sS'gene_symbol' +p14 +S'POMGNT1' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001277058.1:p.?' +p19 +sS'slr' +p20 +S'NP_001277058.1:p.?' +p21 +ssS'submitted_variant' +p22 +S'1-46655125-CTCAC-C' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000001.10(NM_001290129.1):c.1829+5_1829+8del' +p25 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_001290129.1:c.1829+5_1829+8del' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000001.10:g.46655122_46655125del' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr1' +p39 +sS'ref' +p40 +S'GTCAC' +p41 +sS'pos' +p42 +S'46655121' +p43 +sS'alt' +p44 +S'G' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000001.11:g.46189450_46189453del' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'GTCAC' +p50 +sg42 +S'46189449' +p51 +sg44 +g45 +sssS'grch37' +p52 +(dp53 +g34 +S'NC_000001.10:g.46655122_46655125del' +p54 +sg36 +(dp55 +g38 +S'1' +p56 +sg40 +S'GTCAC' +p57 +sg42 +S'46655121' +p58 +sg44 +g45 +sssS'grch38' +p59 +(dp60 +g34 +S'NC_000001.11:g.46189450_46189453del' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +S'GTCAC' +p63 +sg42 +S'46189449' +p64 +sg44 +g45 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277058.1' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290129.1' +p70 +sssS'NM_001290130.1:c.1466+5_1466+8del' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' +p74 +aS'RefSeqGene record not available' +p75 +asg9 +g4 +sg10 +(lp76 +sg12 +VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 4, mRNA +p77 +sg14 +S'POMGNT1' +p78 +sg16 +(dp79 +g18 +S'NP_001277059.1:p.?' +p80 +sg20 +S'NP_001277059.1:p.?' +p81 +ssg22 +g23 +sg24 +S'NC_000001.10(NM_001290130.1):c.1466+5_1466+8del' +p82 +sg26 +g4 +sg27 +S'NM_001290130.1:c.1466+5_1466+8del' +p83 +sg29 +g4 +sg30 +(dp84 +S'hg19' +p85 +(dp86 +g34 +S'NC_000001.10:g.46655122_46655125del' +p87 +sg36 +(dp88 +g38 +g39 +sg40 +S'GTCAC' +p89 +sg42 +S'46655121' +p90 +sg44 +g45 +sssg46 +(dp91 +g34 +S'NC_000001.11:g.46189450_46189453del' +p92 +sg36 +(dp93 +g38 +g39 +sg40 +S'GTCAC' +p94 +sg42 +S'46189449' +p95 +sg44 +g45 +sssS'grch37' +p96 +(dp97 +g34 +S'NC_000001.10:g.46655122_46655125del' +p98 +sg36 +(dp99 +g38 +g56 +sg40 +S'GTCAC' +p100 +sg42 +S'46655121' +p101 +sg44 +g45 +sssS'grch38' +p102 +(dp103 +g34 +S'NC_000001.11:g.46189450_46189453del' +p104 +sg36 +(dp105 +g38 +g56 +sg40 +S'GTCAC' +p106 +sg42 +S'46189449' +p107 +sg44 +g45 +ssssg65 +(dp108 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277059.1' +p109 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290130.1' +p110 +sssS'NM_017739.3:c.1895+5_1895+8del' +p111 +(dp112 +g3 +g4 +sg5 +(lp113 +S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' +p114 +aS'RefSeqGene record not available' +p115 +asg9 +g4 +sg10 +(lp116 +sg12 +VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 1, mRNA +p117 +sg14 +S'POMGNT1' +p118 +sg16 +(dp119 +g18 +S'NP_060209.3:p.?' +p120 +sg20 +S'NP_060209.3:p.?' +p121 +ssg22 +g23 +sg24 +S'NC_000001.10(NM_017739.3):c.1895+5_1895+8del' +p122 +sg26 +g4 +sg27 +S'NM_017739.3:c.1895+5_1895+8del' +p123 +sg29 +g4 +sg30 +(dp124 +S'hg19' +p125 +(dp126 +g34 +S'NC_000001.10:g.46655122_46655125del' +p127 +sg36 +(dp128 +g38 +g39 +sg40 +S'GTCAC' +p129 +sg42 +S'46655121' +p130 +sg44 +g45 +sssg46 +(dp131 +g34 +S'NC_000001.11:g.46189450_46189453del' +p132 +sg36 +(dp133 +g38 +g39 +sg40 +S'GTCAC' +p134 +sg42 +S'46189449' +p135 +sg44 +g45 +sssS'grch37' +p136 +(dp137 +g34 +S'NC_000001.10:g.46655122_46655125del' +p138 +sg36 +(dp139 +g38 +g56 +sg40 +S'GTCAC' +p140 +sg42 +S'46655121' +p141 +sg44 +g45 +sssS'grch38' +p142 +(dp143 +g34 +S'NC_000001.11:g.46189450_46189453del' +p144 +sg36 +(dp145 +g38 +g56 +sg40 +S'GTCAC' +p146 +sg42 +S'46189449' +p147 +sg44 +g45 +ssssg65 +(dp148 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_060209.3' +p149 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_017739.3' +p150 +sssS'NM_001243766.1:c.1869+31_1869+34del' +p151 +(dp152 +g3 +g4 +sg5 +(lp153 +S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' +p154 +aS'RefSeqGene record not available' +p155 +asg9 +g4 +sg10 +(lp156 +sg12 +VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 2, mRNA +p157 +sg14 +S'POMGNT1' +p158 +sg16 +(dp159 +g18 +S'NP_001230695.1:p.?' +p160 +sg20 +S'NP_001230695.1:p.?' +p161 +ssg22 +g23 +sg24 +S'NC_000001.10(NM_001243766.1):c.1869+31_1869+34del' +p162 +sg26 +g4 +sg27 +S'NM_001243766.1:c.1869+31_1869+34del' +p163 +sg29 +g4 +sg30 +(dp164 +S'hg19' +p165 +(dp166 +g34 +S'NC_000001.10:g.46655122_46655125del' +p167 +sg36 +(dp168 +g38 +g39 +sg40 +S'GTCAC' +p169 +sg42 +S'46655121' +p170 +sg44 +g45 +sssg46 +(dp171 +g34 +S'NC_000001.11:g.46189450_46189453del' +p172 +sg36 +(dp173 +g38 +g39 +sg40 +S'GTCAC' +p174 +sg42 +S'46189449' +p175 +sg44 +g45 +sssS'grch37' +p176 +(dp177 +g34 +S'NC_000001.10:g.46655122_46655125del' +p178 +sg36 +(dp179 +g38 +g56 +sg40 +S'GTCAC' +p180 +sg42 +S'46655121' +p181 +sg44 +g45 +sssS'grch38' +p182 +(dp183 +g34 +S'NC_000001.11:g.46189450_46189453del' +p184 +sg36 +(dp185 +g38 +g56 +sg40 +S'GTCAC' +p186 +sg42 +S'46189449' +p187 +sg44 +g45 +ssssg65 +(dp188 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230695.1' +p189 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243766.1' +p190 +sssS'flag' +p191 +S'gene_variant' +p192 +sS'metadata' +p193 +(dp194 +S'variantvalidator_hgvs_version' +p195 +S'1.1.3' +p196 +sS'uta_schema' +p197 +S'uta_20180821' +p198 +sS'seqrepo_db' +p199 +S'2018-08-21' +p200 +sS'variantvalidator_version' +p201 +S'v0.2' +p202 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant201.txt b/VariantValidator/testing/testOutputsMasterITS/variant201.txt new file mode 100644 index 00000000..7ea368a7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant201.txt @@ -0,0 +1,176 @@ +(dp0 +S'NM_000329.2:c.106_114del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000001.10:g.68912523TGAGCCAGAG>T automapped to NC_000001.10:g.68912525_68912533del' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens RPE65, retinoid isomerohydrolase (RPE65), mRNA +p13 +sS'gene_symbol' +p14 +S'RPE65' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_000320.1:p.(Leu36_Leu38del)' +p19 +sS'slr' +p20 +S'NP_000320.1:p.(L36_L38del)' +p21 +ssS'submitted_variant' +p22 +S'1-68912523-TGAGCCAGAG-T' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_000329.2:c.106_114del' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000001.10:g.68912524_68912532del' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr1' +p38 +sS'ref' +p39 +S'TGAGCCAGAG' +p40 +sS'pos' +p41 +S'68912523' +p42 +sS'alt' +p43 +S'T' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000001.11:g.68446841_68446849del' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'TGAGCCAGAG' +p49 +sg41 +S'68446840' +p50 +sg43 +g44 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000001.10:g.68912524_68912532del' +p53 +sg35 +(dp54 +g37 +S'1' +p55 +sg39 +S'TGAGCCAGAG' +p56 +sg41 +S'68912523' +p57 +sg43 +g44 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000001.11:g.68446841_68446849del' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'TGAGCCAGAG' +p62 +sg41 +S'68446840' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2' +p69 +sssS'flag' +p70 +S'gene_variant' +p71 +sS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant202.txt b/VariantValidator/testing/testOutputsMasterITS/variant202.txt new file mode 100644 index 00000000..7c710760 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant202.txt @@ -0,0 +1,176 @@ +(dp0 +S'NM_000329.2:c.109_114del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000001.10:g.68912526GCCAGAG>G automapped to NC_000001.10:g.68912527_68912532del' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens RPE65, retinoid isomerohydrolase (RPE65), mRNA +p13 +sS'gene_symbol' +p14 +S'RPE65' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_000320.1:p.(Trp37_Leu38del)' +p19 +sS'slr' +p20 +S'NP_000320.1:p.(W37_L38del)' +p21 +ssS'submitted_variant' +p22 +S'1-68912526-GCCAGAG-G' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_000329.2:c.109_114del' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000001.10:g.68912524_68912529del' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr1' +p38 +sS'ref' +p39 +S'TGAGCCA' +p40 +sS'pos' +p41 +S'68912523' +p42 +sS'alt' +p43 +S'T' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000001.11:g.68446841_68446846del' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'TGAGCCA' +p49 +sg41 +S'68446840' +p50 +sg43 +g44 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000001.10:g.68912524_68912529del' +p53 +sg35 +(dp54 +g37 +S'1' +p55 +sg39 +S'TGAGCCA' +p56 +sg41 +S'68912523' +p57 +sg43 +g44 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000001.11:g.68446841_68446846del' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'TGAGCCA' +p62 +sg41 +S'68446840' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2' +p69 +sssS'flag' +p70 +S'gene_variant' +p71 +sS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant203.txt b/VariantValidator/testing/testOutputsMasterITS/variant203.txt new file mode 100644 index 00000000..e99fdc71 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant203.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_001408.2:c.*919G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens cadherin EGF LAG seven-pass G-type receptor 2 (CELSR2), mRNA +p14 +sS'gene_symbol' +p15 +S'CELSR2' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_001399.1:p.?' +p20 +sS'slr' +p21 +S'NP_001399.1:p.?' +p22 +ssS'submitted_variant' +p23 +S'1-109817590-G-T' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_001408.2:c.*919G>T' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000001.10:g.109817590G>T' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr1' +p39 +sS'ref' +p40 +S'G' +p41 +sS'pos' +p42 +S'109817590' +p43 +sS'alt' +p44 +S'T' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000001.11:g.109274968G>T' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'109274968' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000001.10:g.109817590G>T' +p53 +sg36 +(dp54 +g38 +S'1' +p55 +sg40 +g41 +sg42 +S'109817590' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000001.11:g.109274968G>T' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'109274968' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001399.1' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001408.2' +p67 +sssS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant204.txt b/VariantValidator/testing/testOutputsMasterITS/variant204.txt new file mode 100644 index 00000000..2f2f8599 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant204.txt @@ -0,0 +1,511 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_006468.6:c.1070+35_1070+38del' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'NC_000001.10:g.145597475GAAGT>G automapped to NC_000001.10:g.145597477_145597480del' +p19 +aS'A more recent version of the selected reference sequence NM_006468.6 is available (NM_006468.7)' +p20 +aS'NM_006468.7:c.1070+35_1070+38del MUST be fully validated prior to use in reports' +p21 +aS'select_variants=NM_006468.7:c.1070+35_1070+38del' +p22 +aS'RefSeqGene record not available' +p23 +asS'refseqgene_context_intronic_sequence' +p24 +g16 +sS'alt_genomic_loci' +p25 +(lp26 +(dp27 +S'grch37' +p28 +(dp29 +S'hgvs_genomic_description' +p30 +S'NW_003871055.3:g.2653044_2653047del' +p31 +sS'vcf' +p32 +(dp33 +S'chr' +p34 +S'HG1287_PATCH' +p35 +sS'ref' +p36 +S'ATACT' +p37 +sS'pos' +p38 +S'2653042' +p39 +sS'alt' +p40 +S'A' +p41 +sssa(dp42 +S'hg19' +p43 +(dp44 +g30 +S'NW_003871055.3:g.2653044_2653047del' +p45 +sg32 +(dp46 +g34 +S'NW_003871055.3' +p47 +sg36 +S'ATACT' +p48 +sg38 +S'2653042' +p49 +sg40 +g41 +sssasS'transcript_description' +p50 +VHomo sapiens polymerase (RNA) III (DNA directed) polypeptide C (62kD) (POLR3C), mRNA +p51 +sS'gene_symbol' +p52 +S'POLR3C' +p53 +sS'hgvs_predicted_protein_consequence' +p54 +(dp55 +S'tlr' +p56 +S'NP_006459.3:p.?' +p57 +sS'slr' +p58 +S'NP_006459.3:p.?' +p59 +ssS'submitted_variant' +p60 +S'1-145597475-GAAGT-G' +p61 +sS'genome_context_intronic_sequence' +p62 +S'NC_000001.10(NM_006468.6):c.1070+35_1070+38del' +p63 +sS'hgvs_lrg_variant' +p64 +g16 +sS'hgvs_transcript_variant' +p65 +S'NM_006468.6:c.1070+35_1070+38del' +p66 +sS'hgvs_refseqgene_variant' +p67 +g16 +sS'primary_assembly_loci' +p68 +(dp69 +S'hg19' +p70 +(dp71 +g30 +S'NC_000001.10:g.145597477_145597480del' +p72 +sg32 +(dp73 +g34 +S'chr1' +p74 +sg36 +S'GAAGT' +p75 +sg38 +S'145597475' +p76 +sg40 +S'G' +p77 +sssS'grch37' +p78 +(dp79 +g30 +S'NC_000001.10:g.145597477_145597480del' +p80 +sg32 +(dp81 +g34 +S'1' +p82 +sg36 +S'GAAGT' +p83 +sg38 +S'145597475' +p84 +sg40 +g77 +ssssS'reference_sequence_records' +p85 +(dp86 +S'protein' +p87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006459.3' +p88 +sS'transcript' +p89 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006468.6' +p90 +sssS'NM_001303456.1:c.1109+35_1109+38del' +p91 +(dp92 +g15 +g16 +sg17 +(lp93 +S'NC_000001.10:g.145597475GAAGT>G automapped to NC_000001.10:g.145597477_145597480del' +p94 +aS'RefSeqGene record not available' +p95 +asg24 +g16 +sg25 +(lp96 +(dp97 +S'grch37' +p98 +(dp99 +g30 +S'NW_003871055.3:g.2653044_2653047del' +p100 +sg32 +(dp101 +g34 +g35 +sg36 +S'ATACT' +p102 +sg38 +S'2653042' +p103 +sg40 +g41 +sssa(dp104 +S'hg19' +p105 +(dp106 +g30 +S'NW_003871055.3:g.2653044_2653047del' +p107 +sg32 +(dp108 +g34 +S'NW_003871055.3' +p109 +sg36 +S'ATACT' +p110 +sg38 +S'2653042' +p111 +sg40 +g41 +sssasg50 +VHomo sapiens RNA polymerase III subunit C (POLR3C), transcript variant 2, mRNA +p112 +sg52 +S'POLR3C' +p113 +sg54 +(dp114 +g56 +S'NP_001290385.1:p.?' +p115 +sg58 +S'NP_001290385.1:p.?' +p116 +ssg60 +g61 +sg62 +S'NC_000001.10(NM_001303456.1):c.1109+35_1109+38del' +p117 +sg64 +g16 +sg65 +S'NM_001303456.1:c.1109+35_1109+38del' +p118 +sg67 +g16 +sg68 +(dp119 +S'hg19' +p120 +(dp121 +g30 +S'NC_000001.10:g.145597477_145597480del' +p122 +sg32 +(dp123 +g34 +g74 +sg36 +S'GAAGT' +p124 +sg38 +S'145597475' +p125 +sg40 +g77 +sssS'hg38' +p126 +(dp127 +g30 +S'NC_000001.11:g.145837631_145837634del' +p128 +sg32 +(dp129 +g34 +g74 +sg36 +S'ATACT' +p130 +sg38 +S'145837629' +p131 +sg40 +g41 +sssS'grch37' +p132 +(dp133 +g30 +S'NC_000001.10:g.145597477_145597480del' +p134 +sg32 +(dp135 +g34 +g82 +sg36 +S'GAAGT' +p136 +sg38 +S'145597475' +p137 +sg40 +g77 +sssS'grch38' +p138 +(dp139 +g30 +S'NC_000001.11:g.145837631_145837634del' +p140 +sg32 +(dp141 +g34 +g82 +sg36 +S'ATACT' +p142 +sg38 +S'145837629' +p143 +sg40 +g41 +ssssg85 +(dp144 +g87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001290385.1' +p145 +sg89 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001303456.1' +p146 +sssS'NM_006468.7:c.1070+35_1070+38del' +p147 +(dp148 +g15 +g16 +sg17 +(lp149 +S'NC_000001.10:g.145597475GAAGT>G automapped to NC_000001.10:g.145597477_145597480del' +p150 +aS'RefSeqGene record not available' +p151 +asg24 +g16 +sg25 +(lp152 +(dp153 +S'grch37' +p154 +(dp155 +g30 +S'NW_003871055.3:g.2653044_2653047del' +p156 +sg32 +(dp157 +g34 +g35 +sg36 +S'ATACT' +p158 +sg38 +S'2653042' +p159 +sg40 +g41 +sssa(dp160 +S'hg19' +p161 +(dp162 +g30 +S'NW_003871055.3:g.2653044_2653047del' +p163 +sg32 +(dp164 +g34 +S'NW_003871055.3' +p165 +sg36 +S'ATACT' +p166 +sg38 +S'2653042' +p167 +sg40 +g41 +sssasg50 +VHomo sapiens RNA polymerase III subunit C (POLR3C), transcript variant 1, mRNA +p168 +sg52 +S'POLR3C' +p169 +sg54 +(dp170 +g56 +S'NP_006459.3:p.?' +p171 +sg58 +S'NP_006459.3:p.?' +p172 +ssg60 +g61 +sg62 +S'NC_000001.10(NM_006468.7):c.1070+35_1070+38del' +p173 +sg64 +g16 +sg65 +S'NM_006468.7:c.1070+35_1070+38del' +p174 +sg67 +g16 +sg68 +(dp175 +S'hg19' +p176 +(dp177 +g30 +S'NC_000001.10:g.145597477_145597480del' +p178 +sg32 +(dp179 +g34 +g74 +sg36 +S'GAAGT' +p180 +sg38 +S'145597475' +p181 +sg40 +g77 +sssg126 +(dp182 +g30 +S'NC_000001.11:g.145837631_145837634del' +p183 +sg32 +(dp184 +g34 +g74 +sg36 +S'ATACT' +p185 +sg38 +S'145837629' +p186 +sg40 +g41 +sssS'grch37' +p187 +(dp188 +g30 +S'NC_000001.10:g.145597477_145597480del' +p189 +sg32 +(dp190 +g34 +g82 +sg36 +S'GAAGT' +p191 +sg38 +S'145597475' +p192 +sg40 +g77 +sssS'grch38' +p193 +(dp194 +g30 +S'NC_000001.11:g.145837631_145837634del' +p195 +sg32 +(dp196 +g34 +g82 +sg36 +S'ATACT' +p197 +sg38 +S'145837629' +p198 +sg40 +g41 +ssssg85 +(dp199 +g87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006459.3' +p200 +sg89 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006468.7' +p201 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant205.txt b/VariantValidator/testing/testOutputsMasterITS/variant205.txt new file mode 100644 index 00000000..f6eebae5 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant205.txt @@ -0,0 +1,303 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_020699.2:c.562_563del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000001.10:g.153791300CTG>C automapped to NC_000001.10:g.153791302_153791303delGT' +p9 +aS'A more recent version of the selected reference sequence NM_020699.2 is available (NM_020699.3)' +p10 +aS'NM_020699.3:c.562_563delCA MUST be fully validated prior to use in reports' +p11 +aS'select_variants=NM_020699.3:c.562_563del' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA +p18 +sS'gene_symbol' +p19 +S'GATAD2B' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_065750.1:p.(Gln188GlufsTer36)' +p24 +sS'slr' +p25 +S'NP_065750.1:p.(Q188Efs*36)' +p26 +ssS'submitted_variant' +p27 +S'1-153791300-CTG-C' +p28 +sS'genome_context_intronic_sequence' +p29 +g6 +sS'hgvs_lrg_variant' +p30 +g6 +sS'hgvs_transcript_variant' +p31 +S'NM_020699.2:c.562_563del' +p32 +sS'hgvs_refseqgene_variant' +p33 +g6 +sS'primary_assembly_loci' +p34 +(dp35 +S'hg19' +p36 +(dp37 +S'hgvs_genomic_description' +p38 +S'NC_000001.10:g.153791301_153791302del' +p39 +sS'vcf' +p40 +(dp41 +S'chr' +p42 +S'chr1' +p43 +sS'ref' +p44 +S'CTG' +p45 +sS'pos' +p46 +S'153791300' +p47 +sS'alt' +p48 +S'C' +p49 +sssS'hg38' +p50 +(dp51 +g38 +S'NC_000001.11:g.153818825_153818826del' +p52 +sg40 +(dp53 +g42 +g43 +sg44 +S'CTG' +p54 +sg46 +S'153818824' +p55 +sg48 +g49 +sssS'grch37' +p56 +(dp57 +g38 +S'NC_000001.10:g.153791301_153791302del' +p58 +sg40 +(dp59 +g42 +S'1' +p60 +sg44 +S'CTG' +p61 +sg46 +S'153791300' +p62 +sg48 +g49 +sssS'grch38' +p63 +(dp64 +g38 +S'NC_000001.11:g.153818825_153818826del' +p65 +sg40 +(dp66 +g42 +g60 +sg44 +S'CTG' +p67 +sg46 +S'153818824' +p68 +sg48 +g49 +ssssS'reference_sequence_records' +p69 +(dp70 +S'protein' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1' +p72 +sS'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2' +p74 +sssS'NM_020699.3:c.562_563del' +p75 +(dp76 +g5 +g6 +sg7 +(lp77 +S'NC_000001.10:g.153791300CTG>C automapped to NC_000001.10:g.153791302_153791303delGT' +p78 +aS'RefSeqGene record not available' +p79 +asg14 +g6 +sg15 +(lp80 +sg17 +VHomo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA +p81 +sg19 +S'GATAD2B' +p82 +sg21 +(dp83 +g23 +S'NP_065750.1:p.(Gln188GlufsTer36)' +p84 +sg25 +S'NP_065750.1:p.(Q188Efs*36)' +p85 +ssg27 +g28 +sg29 +g6 +sg30 +g6 +sg31 +S'NM_020699.3:c.562_563del' +p86 +sg33 +g6 +sg34 +(dp87 +S'hg19' +p88 +(dp89 +g38 +S'NC_000001.10:g.153791301_153791302del' +p90 +sg40 +(dp91 +g42 +g43 +sg44 +S'CTG' +p92 +sg46 +S'153791300' +p93 +sg48 +g49 +sssg50 +(dp94 +g38 +S'NC_000001.11:g.153818825_153818826del' +p95 +sg40 +(dp96 +g42 +g43 +sg44 +S'CTG' +p97 +sg46 +S'153818824' +p98 +sg48 +g49 +sssS'grch37' +p99 +(dp100 +g38 +S'NC_000001.10:g.153791301_153791302del' +p101 +sg40 +(dp102 +g42 +g60 +sg44 +S'CTG' +p103 +sg46 +S'153791300' +p104 +sg48 +g49 +sssS'grch38' +p105 +(dp106 +g38 +S'NC_000001.11:g.153818825_153818826del' +p107 +sg40 +(dp108 +g42 +g60 +sg44 +S'CTG' +p109 +sg46 +S'153818824' +p110 +sg48 +g49 +ssssg69 +(dp111 +g71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1' +p112 +sg73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.3' +p113 +sssS'metadata' +p114 +(dp115 +S'variantvalidator_hgvs_version' +p116 +S'1.1.3' +p117 +sS'uta_schema' +p118 +S'uta_20180821' +p119 +sS'seqrepo_db' +p120 +S'2018-08-21' +p121 +sS'variantvalidator_version' +p122 +S'v0.2' +p123 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant206.txt b/VariantValidator/testing/testOutputsMasterITS/variant206.txt new file mode 100644 index 00000000..3ba748e3 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant206.txt @@ -0,0 +1,1001 @@ +(dp0 +S'NM_005572.3:c.711_734delinsCCCC' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens lamin A/C (LMNA), transcript variant 2, mRNA +p13 +sS'gene_symbol' +p14 +S'LMNA' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_005563.1:p.(Glu238ProfsTer9)' +p19 +sS'slr' +p20 +S'NP_005563.1:p.(E238Pfs*9)' +p21 +ssS'submitted_variant' +p22 +S'1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_005572.3:c.711_734delinsCCCC' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr1' +p38 +sS'ref' +p39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p40 +sS'pos' +p41 +S'156104667' +p42 +sS'alt' +p43 +S'CCCC' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p49 +sg41 +S'156134876' +p50 +sg43 +g44 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p53 +sg35 +(dp54 +g37 +S'1' +p55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p56 +sg41 +S'156104667' +p57 +sg43 +g44 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p62 +sg41 +S'156134876' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005563.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005572.3' +p69 +sssS'NM_001257374.1:c.375_398delinsCCCC' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' +p73 +aS'A more recent version of the selected reference sequence NM_001257374.1 is available (NM_001257374.2)' +p74 +aS'NM_001257374.2:c.375_398delinsCCCC MUST be fully validated prior to use in reports' +p75 +aS'select_variants=NM_001257374.2:c.375_398delinsCCCC' +p76 +aS'RefSeqGene record not available' +p77 +asg9 +g4 +sg10 +(lp78 +sg12 +VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA +p79 +sg14 +S'LMNA' +p80 +sg16 +(dp81 +g18 +S'NP_001244303.1:p.(Glu126ProfsTer9)' +p82 +sg20 +S'NP_001244303.1:p.(E126Pfs*9)' +p83 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001257374.1:c.375_398delinsCCCC' +p84 +sg28 +g4 +sg29 +(dp85 +S'hg19' +p86 +(dp87 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p88 +sg35 +(dp89 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p90 +sg41 +S'156104667' +p91 +sg43 +S'CCCC' +p92 +sssS'grch37' +p93 +(dp94 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p95 +sg35 +(dp96 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p97 +sg41 +S'156104667' +p98 +sg43 +g92 +ssssg64 +(dp99 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1' +p100 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1' +p101 +sssS'NM_001257374.2:c.375_398delinsCCCC' +p102 +(dp103 +g3 +g4 +sg5 +(lp104 +S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' +p105 +aS'RefSeqGene record not available' +p106 +asg9 +g4 +sg10 +(lp107 +sg12 +VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA +p108 +sg14 +S'LMNA' +p109 +sg16 +(dp110 +g18 +S'NP_001244303.1:p.(Glu126ProfsTer9)' +p111 +sg20 +S'NP_001244303.1:p.(E126Pfs*9)' +p112 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001257374.2:c.375_398delinsCCCC' +p113 +sg28 +g4 +sg29 +(dp114 +S'hg19' +p115 +(dp116 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p117 +sg35 +(dp118 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p119 +sg41 +S'156104667' +p120 +sg43 +S'CCCC' +p121 +sssg45 +(dp122 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p123 +sg35 +(dp124 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p125 +sg41 +S'156134876' +p126 +sg43 +g121 +sssS'grch37' +p127 +(dp128 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p129 +sg35 +(dp130 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p131 +sg41 +S'156104667' +p132 +sg43 +g121 +sssS'grch38' +p133 +(dp134 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p135 +sg35 +(dp136 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p137 +sg41 +S'156134876' +p138 +sg43 +g121 +ssssg64 +(dp139 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1' +p140 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2' +p141 +sssS'NM_001282624.1:c.468_491delinsCCCC' +p142 +(dp143 +g3 +g4 +sg5 +(lp144 +S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' +p145 +aS'RefSeqGene record not available' +p146 +asg9 +g4 +sg10 +(lp147 +sg12 +VHomo sapiens lamin A/C (LMNA), transcript variant 5, mRNA +p148 +sg14 +S'LMNA' +p149 +sg16 +(dp150 +g18 +S'NP_001269553.1:p.(Glu157ProfsTer9)' +p151 +sg20 +S'NP_001269553.1:p.(E157Pfs*9)' +p152 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001282624.1:c.468_491delinsCCCC' +p153 +sg28 +g4 +sg29 +(dp154 +S'hg19' +p155 +(dp156 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p157 +sg35 +(dp158 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p159 +sg41 +S'156104667' +p160 +sg43 +S'CCCC' +p161 +sssg45 +(dp162 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p163 +sg35 +(dp164 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p165 +sg41 +S'156134876' +p166 +sg43 +g161 +sssS'grch37' +p167 +(dp168 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p169 +sg35 +(dp170 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p171 +sg41 +S'156104667' +p172 +sg43 +g161 +sssS'grch38' +p173 +(dp174 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p175 +sg35 +(dp176 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p177 +sg41 +S'156134876' +p178 +sg43 +g161 +ssssg64 +(dp179 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269553.1' +p180 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282624.1' +p181 +sssS'flag' +p182 +S'gene_variant' +p183 +sS'NM_170708.3:c.711_734delinsCCCC' +p184 +(dp185 +g3 +g4 +sg5 +(lp186 +S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' +p187 +aS'RefSeqGene record not available' +p188 +asg9 +g4 +sg10 +(lp189 +sg12 +VHomo sapiens lamin A/C (LMNA), transcript variant 3, mRNA +p190 +sg14 +S'LMNA' +p191 +sg16 +(dp192 +g18 +S'NP_733822.1:p.(Glu238ProfsTer9)' +p193 +sg20 +S'NP_733822.1:p.(E238Pfs*9)' +p194 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_170708.3:c.711_734delinsCCCC' +p195 +sg28 +g4 +sg29 +(dp196 +S'hg19' +p197 +(dp198 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p199 +sg35 +(dp200 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p201 +sg41 +S'156104667' +p202 +sg43 +S'CCCC' +p203 +sssg45 +(dp204 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p205 +sg35 +(dp206 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p207 +sg41 +S'156134876' +p208 +sg43 +g203 +sssS'grch37' +p209 +(dp210 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p211 +sg35 +(dp212 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p213 +sg41 +S'156104667' +p214 +sg43 +g203 +sssS'grch38' +p215 +(dp216 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p217 +sg35 +(dp218 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p219 +sg41 +S'156134876' +p220 +sg43 +g203 +ssssg64 +(dp221 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1' +p222 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3' +p223 +sssS'NM_170707.3:c.711_734delinsCCCC' +p224 +(dp225 +g3 +g4 +sg5 +(lp226 +S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' +p227 +aS'RefSeqGene record not available' +p228 +asg9 +g4 +sg10 +(lp229 +sg12 +VHomo sapiens lamin A/C (LMNA), transcript variant 1, mRNA +p230 +sg14 +S'LMNA' +p231 +sg16 +(dp232 +g18 +S'NP_733821.1:p.(Glu238ProfsTer9)' +p233 +sg20 +S'NP_733821.1:p.(E238Pfs*9)' +p234 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_170707.3:c.711_734delinsCCCC' +p235 +sg28 +g4 +sg29 +(dp236 +S'hg19' +p237 +(dp238 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p239 +sg35 +(dp240 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p241 +sg41 +S'156104667' +p242 +sg43 +S'CCCC' +p243 +sssg45 +(dp244 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p245 +sg35 +(dp246 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p247 +sg41 +S'156134876' +p248 +sg43 +g243 +sssS'grch37' +p249 +(dp250 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p251 +sg35 +(dp252 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p253 +sg41 +S'156104667' +p254 +sg43 +g243 +sssS'grch38' +p255 +(dp256 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p257 +sg35 +(dp258 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p259 +sg41 +S'156134876' +p260 +sg43 +g243 +ssssg64 +(dp261 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1' +p262 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3' +p263 +sssS'metadata' +p264 +(dp265 +S'variantvalidator_hgvs_version' +p266 +S'1.1.3' +p267 +sS'uta_schema' +p268 +S'uta_20180821' +p269 +sS'seqrepo_db' +p270 +S'2018-08-21' +p271 +sS'variantvalidator_version' +p272 +S'v0.2' +p273 +ssS'NM_001282626.1:c.711_734delinsCCCC' +p274 +(dp275 +g3 +g4 +sg5 +(lp276 +S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' +p277 +aS'RefSeqGene record not available' +p278 +asg9 +g4 +sg10 +(lp279 +sg12 +VHomo sapiens lamin A/C (LMNA), transcript variant 7, mRNA +p280 +sg14 +S'LMNA' +p281 +sg16 +(dp282 +g18 +S'NP_001269555.1:p.(Glu238ProfsTer9)' +p283 +sg20 +S'NP_001269555.1:p.(E238Pfs*9)' +p284 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001282626.1:c.711_734delinsCCCC' +p285 +sg28 +g4 +sg29 +(dp286 +S'hg19' +p287 +(dp288 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p289 +sg35 +(dp290 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p291 +sg41 +S'156104667' +p292 +sg43 +S'CCCC' +p293 +sssg45 +(dp294 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p295 +sg35 +(dp296 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p297 +sg41 +S'156134876' +p298 +sg43 +g293 +sssS'grch37' +p299 +(dp300 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p301 +sg35 +(dp302 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p303 +sg41 +S'156104667' +p304 +sg43 +g293 +sssS'grch38' +p305 +(dp306 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p307 +sg35 +(dp308 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p309 +sg41 +S'156134876' +p310 +sg43 +g293 +ssssg64 +(dp311 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1' +p312 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1' +p313 +sssS'NM_001282625.1:c.711_734delinsCCCC' +p314 +(dp315 +g3 +g4 +sg5 +(lp316 +S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' +p317 +aS'RefSeqGene record not available' +p318 +asg9 +g4 +sg10 +(lp319 +sg12 +VHomo sapiens lamin A/C (LMNA), transcript variant 6, mRNA +p320 +sg14 +S'LMNA' +p321 +sg16 +(dp322 +g18 +S'NP_001269554.1:p.(Glu238ProfsTer9)' +p323 +sg20 +S'NP_001269554.1:p.(E238Pfs*9)' +p324 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001282625.1:c.711_734delinsCCCC' +p325 +sg28 +g4 +sg29 +(dp326 +S'hg19' +p327 +(dp328 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p329 +sg35 +(dp330 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p331 +sg41 +S'156104667' +p332 +sg43 +S'CCCC' +p333 +sssg45 +(dp334 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p335 +sg35 +(dp336 +g37 +g38 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p337 +sg41 +S'156134876' +p338 +sg43 +g333 +sssS'grch37' +p339 +(dp340 +g33 +S'NC_000001.10:g.156104667_156104690delinsCCCC' +p341 +sg35 +(dp342 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p343 +sg41 +S'156104667' +p344 +sg43 +g333 +sssS'grch38' +p345 +(dp346 +g33 +S'NC_000001.11:g.156134876_156134899delinsCCCC' +p347 +sg35 +(dp348 +g37 +g55 +sg39 +S'TGAGAGCCGGCTGGCGGATGCGCT' +p349 +sg41 +S'156134876' +p350 +sg43 +g333 +ssssg64 +(dp351 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269554.1' +p352 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282625.1' +p353 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant207.txt b/VariantValidator/testing/testOutputsMasterITS/variant207.txt new file mode 100644 index 00000000..b96d4ce1 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant207.txt @@ -0,0 +1,632 @@ +(dp0 +S'NM_170707.3:c.1961dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens lamin A/C (LMNA), transcript variant 1, mRNA +p13 +sS'gene_symbol' +p14 +S'LMNA' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_733821.1:p.(Thr655AsnfsTer49)' +p19 +sS'slr' +p20 +S'NP_733821.1:p.(T655Nfs*49)' +p21 +ssS'submitted_variant' +p22 +S'1-156108541-G-GG' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_170707.3:c.1961dup' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000001.10:g.156108541dup' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr1' +p38 +sS'ref' +p39 +S'G' +p40 +sS'pos' +p41 +S'156108541' +p42 +sS'alt' +p43 +S'GG' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000001.11:g.156138750dup' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'156138750' +p49 +sg43 +S'GG' +p50 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000001.10:g.156108541dup' +p53 +sg35 +(dp54 +g37 +S'1' +p55 +sg39 +g40 +sg41 +S'156108541' +p56 +sg43 +S'GG' +p57 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000001.11:g.156138750dup' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +g40 +sg41 +S'156138750' +p62 +sg43 +S'GG' +p63 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3' +p69 +sssS'NM_001282626.1:c.1818+143dup' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' +p73 +aS'RefSeqGene record not available' +p74 +asg9 +g4 +sg10 +(lp75 +sg12 +VHomo sapiens lamin A/C (LMNA), transcript variant 7, mRNA +p76 +sg14 +S'LMNA' +p77 +sg16 +(dp78 +g18 +S'NP_001269555.1:p.?' +p79 +sg20 +S'NP_001269555.1:p.?' +p80 +ssg22 +g23 +sg24 +S'NC_000001.10(NM_001282626.1):c.1818+143dup' +p81 +sg25 +g4 +sg26 +S'NM_001282626.1:c.1818+143dup' +p82 +sg28 +g4 +sg29 +(dp83 +S'hg19' +p84 +(dp85 +g33 +S'NC_000001.10:g.156108541dup' +p86 +sg35 +(dp87 +g37 +g38 +sg39 +g40 +sg41 +S'156108541' +p88 +sg43 +S'GG' +p89 +sssg45 +(dp90 +g33 +S'NC_000001.11:g.156138750dup' +p91 +sg35 +(dp92 +g37 +g38 +sg39 +g40 +sg41 +S'156138750' +p93 +sg43 +S'GG' +p94 +sssS'grch37' +p95 +(dp96 +g33 +S'NC_000001.10:g.156108541dup' +p97 +sg35 +(dp98 +g37 +g55 +sg39 +g40 +sg41 +S'156108541' +p99 +sg43 +S'GG' +p100 +sssS'grch38' +p101 +(dp102 +g33 +S'NC_000001.11:g.156138750dup' +p103 +sg35 +(dp104 +g37 +g55 +sg39 +g40 +sg41 +S'156138750' +p105 +sg43 +S'GG' +p106 +ssssg64 +(dp107 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1' +p108 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1' +p109 +sssS'flag' +p110 +S'gene_variant' +p111 +sS'NM_001257374.2:c.1625dup' +p112 +(dp113 +g3 +g4 +sg5 +(lp114 +S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' +p115 +aS'RefSeqGene record not available' +p116 +asg9 +g4 +sg10 +(lp117 +sg12 +VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA +p118 +sg14 +S'LMNA' +p119 +sg16 +(dp120 +g18 +S'NP_001244303.1:p.(Thr543AsnfsTer90)' +p121 +sg20 +S'NP_001244303.1:p.(T543Nfs*90)' +p122 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001257374.2:c.1625dup' +p123 +sg28 +g4 +sg29 +(dp124 +S'hg19' +p125 +(dp126 +g33 +S'NC_000001.10:g.156108541dup' +p127 +sg35 +(dp128 +g37 +g38 +sg39 +g40 +sg41 +S'156108541' +p129 +sg43 +S'GG' +p130 +sssg45 +(dp131 +g33 +S'NC_000001.11:g.156138750dup' +p132 +sg35 +(dp133 +g37 +g38 +sg39 +g40 +sg41 +S'156138750' +p134 +sg43 +S'GG' +p135 +sssS'grch37' +p136 +(dp137 +g33 +S'NC_000001.10:g.156108541dup' +p138 +sg35 +(dp139 +g37 +g55 +sg39 +g40 +sg41 +S'156108541' +p140 +sg43 +S'GG' +p141 +sssS'grch38' +p142 +(dp143 +g33 +S'NC_000001.11:g.156138750dup' +p144 +sg35 +(dp145 +g37 +g55 +sg39 +g40 +sg41 +S'156138750' +p146 +sg43 +S'GG' +p147 +ssssg64 +(dp148 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1' +p149 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2' +p150 +sssS'NM_170708.3:c.1871dup' +p151 +(dp152 +g3 +g4 +sg5 +(lp153 +S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' +p154 +aS'RefSeqGene record not available' +p155 +asg9 +g4 +sg10 +(lp156 +sg12 +VHomo sapiens lamin A/C (LMNA), transcript variant 3, mRNA +p157 +sg14 +S'LMNA' +p158 +sg16 +(dp159 +g18 +S'NP_733822.1:p.(Thr625AsnfsTer49)' +p160 +sg20 +S'NP_733822.1:p.(T625Nfs*49)' +p161 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_170708.3:c.1871dup' +p162 +sg28 +g4 +sg29 +(dp163 +S'hg19' +p164 +(dp165 +g33 +S'NC_000001.10:g.156108541dup' +p166 +sg35 +(dp167 +g37 +g38 +sg39 +g40 +sg41 +S'156108541' +p168 +sg43 +S'GG' +p169 +sssg45 +(dp170 +g33 +S'NC_000001.11:g.156138750dup' +p171 +sg35 +(dp172 +g37 +g38 +sg39 +g40 +sg41 +S'156138750' +p173 +sg43 +S'GG' +p174 +sssS'grch37' +p175 +(dp176 +g33 +S'NC_000001.10:g.156108541dup' +p177 +sg35 +(dp178 +g37 +g55 +sg39 +g40 +sg41 +S'156108541' +p179 +sg43 +S'GG' +p180 +sssS'grch38' +p181 +(dp182 +g33 +S'NC_000001.11:g.156138750dup' +p183 +sg35 +(dp184 +g37 +g55 +sg39 +g40 +sg41 +S'156138750' +p185 +sg43 +S'GG' +p186 +ssssg64 +(dp187 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1' +p188 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3' +p189 +sssS'NM_001257374.1:c.1625dup' +p190 +(dp191 +g3 +g4 +sg5 +(lp192 +S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' +p193 +aS'A more recent version of the selected reference sequence NM_001257374.1 is available (NM_001257374.2)' +p194 +aS'NM_001257374.2:c.1625dupG MUST be fully validated prior to use in reports' +p195 +aS'select_variants=NM_001257374.2:c.1625dup' +p196 +aS'RefSeqGene record not available' +p197 +asg9 +g4 +sg10 +(lp198 +sg12 +VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA +p199 +sg14 +S'LMNA' +p200 +sg16 +(dp201 +g18 +S'NP_001244303.1:p.(Thr543AsnfsTer90)' +p202 +sg20 +S'NP_001244303.1:p.(T543Nfs*90)' +p203 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001257374.1:c.1625dup' +p204 +sg28 +g4 +sg29 +(dp205 +S'hg19' +p206 +(dp207 +g33 +S'NC_000001.10:g.156108541dup' +p208 +sg35 +(dp209 +g37 +g38 +sg39 +g40 +sg41 +S'156108541' +p210 +sg43 +S'GG' +p211 +sssS'grch37' +p212 +(dp213 +g33 +S'NC_000001.10:g.156108541dup' +p214 +sg35 +(dp215 +g37 +g55 +sg39 +g40 +sg41 +S'156108541' +p216 +sg43 +S'GG' +p217 +ssssg64 +(dp218 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1' +p219 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1' +p220 +sssS'metadata' +p221 +(dp222 +S'variantvalidator_hgvs_version' +p223 +S'1.1.3' +p224 +sS'uta_schema' +p225 +S'uta_20180821' +p226 +sS'seqrepo_db' +p227 +S'2018-08-21' +p228 +sS'variantvalidator_version' +p229 +S'v0.2' +p230 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant208.txt b/VariantValidator/testing/testOutputsMasterITS/variant208.txt new file mode 100644 index 00000000..a236c2ce --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant208.txt @@ -0,0 +1,374 @@ +(dp0 +S'metadata' +p1 +(dp2 +S'variantvalidator_hgvs_version' +p3 +S'1.1.3' +p4 +sS'uta_schema' +p5 +S'uta_20180821' +p6 +sS'seqrepo_db' +p7 +S'2018-08-21' +p8 +sS'variantvalidator_version' +p9 +S'v0.2' +p10 +ssS'flag' +p11 +S'gene_variant' +p12 +sS'NM_001315491.1:c.1A>T' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens myelin protein zero (MPZ), transcript variant 1, mRNA +p24 +sS'gene_symbol' +p25 +S'MPZ' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_001302420.1:p.(Met1?)' +p30 +sS'slr' +p31 +S'NP_001302420.1:p.(M1?)' +p32 +ssS'submitted_variant' +p33 +S'1-161279695-T-A' +p34 +sS'genome_context_intronic_sequence' +p35 +g16 +sS'hgvs_lrg_variant' +p36 +g16 +sS'hgvs_transcript_variant' +p37 +S'NM_001315491.1:c.1A>T' +p38 +sS'hgvs_refseqgene_variant' +p39 +g16 +sS'primary_assembly_loci' +p40 +(dp41 +S'hg19' +p42 +(dp43 +S'hgvs_genomic_description' +p44 +S'NC_000001.10:g.161279695T>A' +p45 +sS'vcf' +p46 +(dp47 +S'chr' +p48 +S'chr1' +p49 +sS'ref' +p50 +VT +p51 +sS'pos' +p52 +S'161279695' +p53 +sS'alt' +p54 +VA +p55 +sssS'hg38' +p56 +(dp57 +g44 +S'NC_000001.11:g.161309905T>A' +p58 +sg46 +(dp59 +g48 +g49 +sg50 +g51 +sg52 +S'161309905' +p60 +sg54 +g55 +sssS'grch37' +p61 +(dp62 +g44 +S'NC_000001.10:g.161279695T>A' +p63 +sg46 +(dp64 +g48 +S'1' +p65 +sg50 +g51 +sg52 +S'161279695' +p66 +sg54 +g55 +sssS'grch38' +p67 +(dp68 +g44 +S'NC_000001.11:g.161309905T>A' +p69 +sg46 +(dp70 +g48 +g65 +sg50 +g51 +sg52 +S'161309905' +p71 +sg54 +g55 +ssssS'reference_sequence_records' +p72 +(dp73 +S'protein' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001302420.1' +p75 +sS'transcript' +p76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001315491.1' +p77 +sssS'NM_000530.7:c.1A>T' +p78 +(dp79 +g15 +g16 +sg17 +(lp80 +S'RefSeqGene record not available' +p81 +asg20 +g16 +sg21 +(lp82 +sg23 +VHomo sapiens myelin protein zero (MPZ), transcript variant 1, mRNA +p83 +sg25 +S'MPZ' +p84 +sg27 +(dp85 +g29 +S'NP_000521.2:p.(Met1?)' +p86 +sg31 +S'NP_000521.2:p.(M1?)' +p87 +ssg33 +g34 +sg35 +g16 +sg36 +g16 +sg37 +S'NM_000530.7:c.1A>T' +p88 +sg39 +g16 +sg40 +(dp89 +S'hg19' +p90 +(dp91 +g44 +S'NC_000001.10:g.161279695T>A' +p92 +sg46 +(dp93 +g48 +g49 +sg50 +g51 +sg52 +S'161279695' +p94 +sg54 +g55 +sssg56 +(dp95 +g44 +S'NC_000001.11:g.161309905T>A' +p96 +sg46 +(dp97 +g48 +g49 +sg50 +g51 +sg52 +S'161309905' +p98 +sg54 +g55 +sssS'grch37' +p99 +(dp100 +g44 +S'NC_000001.10:g.161279695T>A' +p101 +sg46 +(dp102 +g48 +g65 +sg50 +g51 +sg52 +S'161279695' +p103 +sg54 +g55 +sssS'grch38' +p104 +(dp105 +g44 +S'NC_000001.11:g.161309905T>A' +p106 +sg46 +(dp107 +g48 +g65 +sg50 +g51 +sg52 +S'161309905' +p108 +sg54 +g55 +ssssg72 +(dp109 +g74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2' +p110 +sg76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.7' +p111 +sssS'NM_000530.6:c.1A>T' +p112 +(dp113 +g15 +g16 +sg17 +(lp114 +S'A more recent version of the selected reference sequence NM_000530.6 is available (NM_000530.7)' +p115 +aS'NM_000530.7:c.1A>T MUST be fully validated prior to use in reports' +p116 +aS'select_variants=NM_000530.7:c.1A>T' +p117 +aS'RefSeqGene record not available' +p118 +asg20 +g16 +sg21 +(lp119 +sg23 +VHomo sapiens myelin protein zero (MPZ), mRNA +p120 +sg25 +S'MPZ' +p121 +sg27 +(dp122 +g29 +S'NP_000521.2:p.(Met1?)' +p123 +sg31 +S'NP_000521.2:p.(M1?)' +p124 +ssg33 +g34 +sg35 +g16 +sg36 +g16 +sg37 +S'NM_000530.6:c.1A>T' +p125 +sg39 +g16 +sg40 +(dp126 +S'hg19' +p127 +(dp128 +g44 +S'NC_000001.10:g.161279695T>A' +p129 +sg46 +(dp130 +g48 +g49 +sg50 +g51 +sg52 +S'161279695' +p131 +sg54 +g55 +sssS'grch37' +p132 +(dp133 +g44 +S'NC_000001.10:g.161279695T>A' +p134 +sg46 +(dp135 +g48 +g65 +sg50 +g51 +sg52 +S'161279695' +p136 +sg54 +g55 +ssssg72 +(dp137 +g74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2' +p138 +sg76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.6' +p139 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant209.txt b/VariantValidator/testing/testOutputsMasterITS/variant209.txt new file mode 100644 index 00000000..f96f2257 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant209.txt @@ -0,0 +1,172 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000130.4:c.1601G>A' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens coagulation factor V (F5), mRNA +p14 +sS'gene_symbol' +p15 +S'F5' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000121.2:p.(Arg534Gln)' +p20 +sS'slr' +p21 +S'NP_000121.2:p.(R534Q)' +p22 +ssS'submitted_variant' +p23 +S'1-169519049-T-T' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000130.4:c.1601G>A' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000001.10:g.169519049T=' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr1' +p39 +sS'ref' +p40 +S'T' +p41 +sS'pos' +p42 +S'169519049' +p43 +sS'alt' +p44 +g41 +sssS'hg38' +p45 +(dp46 +g34 +S'NC_000001.11:g.169549811C>T' +p47 +sg36 +(dp48 +g38 +g39 +sg40 +VC +p49 +sg42 +S'169549811' +p50 +sg44 +VT +p51 +sssS'grch37' +p52 +(dp53 +g34 +S'NC_000001.10:g.169519049T=' +p54 +sg36 +(dp55 +g38 +S'1' +p56 +sg40 +g41 +sg42 +S'169519049' +p57 +sg44 +g41 +sssS'grch38' +p58 +(dp59 +g34 +S'NC_000001.11:g.169549811C>T' +p60 +sg36 +(dp61 +g38 +g56 +sg40 +g49 +sg42 +S'169549811' +p62 +sg44 +g51 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4' +p68 +sssS'metadata' +p69 +(dp70 +S'variantvalidator_hgvs_version' +p71 +S'1.1.3' +p72 +sS'uta_schema' +p73 +S'uta_20180821' +p74 +sS'seqrepo_db' +p75 +S'2018-08-21' +p76 +sS'variantvalidator_version' +p77 +S'v0.2' +p78 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant21.txt b/VariantValidator/testing/testOutputsMasterITS/variant21.txt new file mode 100644 index 00000000..134f0811 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant21.txt @@ -0,0 +1,180 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000518.4:c.316_*100del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'A more recent version of the selected reference sequence NM_000518.4 is available (NM_000518.5)' +p9 +aS'NM_000518.5:c.316_*100del MUST be fully validated prior to use in reports' +p10 +aS'select_variants=NM_000518.5:c.316_*100del' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g6 +sS'alt_genomic_loci' +p14 +(lp15 +sS'transcript_description' +p16 +VHomo sapiens hemoglobin subunit beta (HBB), mRNA +p17 +sS'gene_symbol' +p18 +S'HBB' +p19 +sS'hgvs_predicted_protein_consequence' +p20 +(dp21 +S'tlr' +p22 +S'NP_000509.1:p.(Leu106SerfsTer3)' +p23 +sS'slr' +p24 +S'NP_000509.1:p.(L106Sfs*3)' +p25 +ssS'submitted_variant' +p26 +S'NM_000518.4:c.316_*100del' +p27 +sS'genome_context_intronic_sequence' +p28 +g6 +sS'hgvs_lrg_variant' +p29 +g6 +sS'hgvs_transcript_variant' +p30 +S'NM_000518.4:c.316_*100del' +p31 +sS'hgvs_refseqgene_variant' +p32 +g6 +sS'primary_assembly_loci' +p33 +(dp34 +S'grch38' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000011.10:g.5225498_5225726del' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'11' +p42 +sS'ref' +p43 +S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' +p44 +sS'pos' +p45 +S'5225497' +p46 +sS'alt' +p47 +S'A' +p48 +sssS'grch37' +p49 +(dp50 +g37 +S'NC_000011.9:g.5246728_5246956del' +p51 +sg39 +(dp52 +g41 +g42 +sg43 +S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' +p53 +sg45 +S'5246727' +p54 +sg47 +g48 +sssS'hg38' +p55 +(dp56 +g37 +S'NC_000011.10:g.5225498_5225726del' +p57 +sg39 +(dp58 +g41 +S'chr11' +p59 +sg43 +S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' +p60 +sg45 +S'5225497' +p61 +sg47 +g48 +sssS'hg19' +p62 +(dp63 +g37 +S'NC_000011.9:g.5246728_5246956del' +p64 +sg39 +(dp65 +g41 +g59 +sg43 +S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' +p66 +sg45 +S'5246727' +p67 +sg47 +g48 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.4' +p73 +sssS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant210.txt b/VariantValidator/testing/testOutputsMasterITS/variant210.txt new file mode 100644 index 00000000..bde23683 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant210.txt @@ -0,0 +1,528 @@ +(dp0 +S'NM_003240.4:c.774C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'LEFTY2' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_003231.2:p.(Thr258=)' +p18 +sS'slr' +p19 +S'NP_003231.2:p.(T258=)' +p20 +ssS'submitted_variant' +p21 +S'1-226125468-G-A' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_003240.4:c.774C>T' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000001.10:g.226125468G>A' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr1' +p37 +sS'ref' +p38 +VG +p39 +sS'pos' +p40 +S'226125468' +p41 +sS'alt' +p42 +VA +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000001.11:g.225937768G>A' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'225937768' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000001.10:g.226125468G>A' +p51 +sg34 +(dp52 +g36 +S'1' +p53 +sg38 +g39 +sg40 +S'226125468' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000001.11:g.225937768G>A' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'225937768' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.4' +p65 +sssS'NM_003240.3:c.774C>T' +p66 +(dp67 +g3 +g4 +sg5 +(lp68 +S'A more recent version of the selected reference sequence NM_003240.3 is available (NM_003240.4)' +p69 +aS'NM_003240.4:c.774C>T MUST be fully validated prior to use in reports' +p70 +aS'select_variants=NM_003240.4:c.774C>T' +p71 +aS'RefSeqGene record not available' +p72 +asg8 +g4 +sg9 +(lp73 +sg11 +VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 1, mRNA +p74 +sg13 +S'LEFTY2' +p75 +sg15 +(dp76 +g17 +S'NP_003231.2:p.(Thr258=)' +p77 +sg19 +S'NP_003231.2:p.(T258=)' +p78 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_003240.3:c.774C>T' +p79 +sg27 +g4 +sg28 +(dp80 +S'hg19' +p81 +(dp82 +g32 +S'NC_000001.10:g.226125468G>A' +p83 +sg34 +(dp84 +g36 +g37 +sg38 +g39 +sg40 +S'226125468' +p85 +sg42 +g43 +sssg44 +(dp86 +g32 +S'NC_000001.11:g.225937768G>A' +p87 +sg34 +(dp88 +g36 +g37 +sg38 +g39 +sg40 +S'225937768' +p89 +sg42 +g43 +sssS'grch37' +p90 +(dp91 +g32 +S'NC_000001.10:g.226125468G>A' +p92 +sg34 +(dp93 +g36 +g53 +sg38 +g39 +sg40 +S'226125468' +p94 +sg42 +g43 +sssS'grch38' +p95 +(dp96 +g32 +S'NC_000001.11:g.225937768G>A' +p97 +sg34 +(dp98 +g36 +g53 +sg38 +g39 +sg40 +S'225937768' +p99 +sg42 +g43 +ssssg60 +(dp100 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2' +p101 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.3' +p102 +sssS'NM_001172425.1:c.672C>T' +p103 +(dp104 +g3 +g4 +sg5 +(lp105 +S'A more recent version of the selected reference sequence NM_001172425.1 is available (NM_001172425.2)' +p106 +aS'NM_001172425.2:c.672C>T MUST be fully validated prior to use in reports' +p107 +aS'select_variants=NM_001172425.2:c.672C>T' +p108 +aS'RefSeqGene record not available' +p109 +asg8 +g4 +sg9 +(lp110 +sg11 +VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 2, mRNA +p111 +sg13 +S'LEFTY2' +p112 +sg15 +(dp113 +g17 +S'NP_001165896.1:p.(Thr224=)' +p114 +sg19 +S'NP_001165896.1:p.(T224=)' +p115 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001172425.1:c.672C>T' +p116 +sg27 +g4 +sg28 +(dp117 +S'hg19' +p118 +(dp119 +g32 +S'NC_000001.10:g.226125468G>A' +p120 +sg34 +(dp121 +g36 +g37 +sg38 +g39 +sg40 +S'226125468' +p122 +sg42 +g43 +sssg44 +(dp123 +g32 +S'NC_000001.11:g.225937768G>A' +p124 +sg34 +(dp125 +g36 +g37 +sg38 +g39 +sg40 +S'225937768' +p126 +sg42 +g43 +sssS'grch37' +p127 +(dp128 +g32 +S'NC_000001.10:g.226125468G>A' +p129 +sg34 +(dp130 +g36 +g53 +sg38 +g39 +sg40 +S'226125468' +p131 +sg42 +g43 +sssS'grch38' +p132 +(dp133 +g32 +S'NC_000001.11:g.225937768G>A' +p134 +sg34 +(dp135 +g36 +g53 +sg38 +g39 +sg40 +S'225937768' +p136 +sg42 +g43 +ssssg60 +(dp137 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1' +p138 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.1' +p139 +sssS'NM_001172425.2:c.672C>T' +p140 +(dp141 +g3 +g4 +sg5 +(lp142 +S'RefSeqGene record not available' +p143 +asg8 +g4 +sg9 +(lp144 +sg11 +VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 2, mRNA +p145 +sg13 +S'LEFTY2' +p146 +sg15 +(dp147 +g17 +S'NP_001165896.1:p.(Thr224=)' +p148 +sg19 +S'NP_001165896.1:p.(T224=)' +p149 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001172425.2:c.672C>T' +p150 +sg27 +g4 +sg28 +(dp151 +S'hg19' +p152 +(dp153 +g32 +S'NC_000001.10:g.226125468G>A' +p154 +sg34 +(dp155 +g36 +g37 +sg38 +g39 +sg40 +S'226125468' +p156 +sg42 +g43 +sssg44 +(dp157 +g32 +S'NC_000001.11:g.225937768G>A' +p158 +sg34 +(dp159 +g36 +g37 +sg38 +g39 +sg40 +S'225937768' +p160 +sg42 +g43 +sssS'grch37' +p161 +(dp162 +g32 +S'NC_000001.10:g.226125468G>A' +p163 +sg34 +(dp164 +g36 +g53 +sg38 +g39 +sg40 +S'226125468' +p165 +sg42 +g43 +sssS'grch38' +p166 +(dp167 +g32 +S'NC_000001.11:g.225937768G>A' +p168 +sg34 +(dp169 +g36 +g53 +sg38 +g39 +sg40 +S'225937768' +p170 +sg42 +g43 +ssssg60 +(dp171 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1' +p172 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.2' +p173 +sssS'flag' +p174 +S'gene_variant' +p175 +sS'metadata' +p176 +(dp177 +S'variantvalidator_hgvs_version' +p178 +S'1.1.3' +p179 +sS'uta_schema' +p180 +S'uta_20180821' +p181 +sS'seqrepo_db' +p182 +S'2018-08-21' +p183 +sS'variantvalidator_version' +p184 +S'v0.2' +p185 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant211.txt b/VariantValidator/testing/testOutputsMasterITS/variant211.txt new file mode 100644 index 00000000..29b689a2 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant211.txt @@ -0,0 +1,215 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_001126049.1:c.-794_-792del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000010.10:g.89623035CGCA>C automapped to NC_000010.10:g.89623039_89623041delGCA' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +(dp14 +S'grch38' +p15 +(dp16 +S'hgvs_genomic_description' +p17 +S'NW_013171807.1:g.79106_79108del' +p18 +sS'vcf' +p19 +(dp20 +S'chr' +p21 +S'HG2334_PATCH' +p22 +sS'ref' +p23 +S'CGCA' +p24 +sS'pos' +p25 +S'79102' +p26 +sS'alt' +p27 +S'C' +p28 +sssa(dp29 +S'hg38' +p30 +(dp31 +g17 +S'NW_013171807.1:g.79106_79108del' +p32 +sg19 +(dp33 +g21 +S'NW_013171807.1' +p34 +sg23 +S'CGCA' +p35 +sg25 +S'79102' +p36 +sg27 +g28 +sssasS'transcript_description' +p37 +VHomo sapiens killin, p53 regulated DNA replication inhibitor (KLLN), mRNA +p38 +sS'gene_symbol' +p39 +S'KLLN' +p40 +sS'hgvs_predicted_protein_consequence' +p41 +(dp42 +S'tlr' +p43 +S'NP_001119521.1:p.?' +p44 +sS'slr' +p45 +S'NP_001119521.1:p.?' +p46 +ssS'submitted_variant' +p47 +S'10-89623035-CGCA-C' +p48 +sS'genome_context_intronic_sequence' +p49 +g6 +sS'hgvs_lrg_variant' +p50 +g6 +sS'hgvs_transcript_variant' +p51 +S'NM_001126049.1:c.-794_-792del' +p52 +sS'hgvs_refseqgene_variant' +p53 +g6 +sS'primary_assembly_loci' +p54 +(dp55 +S'hg19' +p56 +(dp57 +g17 +S'NC_000010.10:g.89623039_89623041del' +p58 +sg19 +(dp59 +g21 +S'chr10' +p60 +sg23 +S'CGCA' +p61 +sg25 +S'89623035' +p62 +sg27 +g28 +sssg30 +(dp63 +g17 +S'NC_000010.11:g.87863282_87863284del' +p64 +sg19 +(dp65 +g21 +g60 +sg23 +S'CGCA' +p66 +sg25 +S'87863278' +p67 +sg27 +g28 +sssS'grch37' +p68 +(dp69 +g17 +S'NC_000010.10:g.89623039_89623041del' +p70 +sg19 +(dp71 +g21 +S'10' +p72 +sg23 +S'CGCA' +p73 +sg25 +S'89623035' +p74 +sg27 +g28 +sssS'grch38' +p75 +(dp76 +g17 +S'NC_000010.11:g.87863282_87863284del' +p77 +sg19 +(dp78 +g21 +g72 +sg23 +S'CGCA' +p79 +sg25 +S'87863278' +p80 +sg27 +g28 +ssssS'reference_sequence_records' +p81 +(dp82 +S'protein' +p83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119521.1' +p84 +sS'transcript' +p85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126049.1' +p86 +sssS'metadata' +p87 +(dp88 +S'variantvalidator_hgvs_version' +p89 +S'1.1.3' +p90 +sS'uta_schema' +p91 +S'uta_20180821' +p92 +sS'seqrepo_db' +p93 +S'2018-08-21' +p94 +sS'variantvalidator_version' +p95 +S'v0.2' +p96 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant212.txt b/VariantValidator/testing/testOutputsMasterITS/variant212.txt new file mode 100644 index 00000000..da687b88 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant212.txt @@ -0,0 +1,734 @@ +(dp0 +S'NR_037946.1:n.3896G>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens HNRNPUL2-BSCL2 readthrough (NMD candidate) (HNRNPUL2-BSCL2), long non-coding RNA +p12 +sS'gene_symbol' +p13 +S'HNRNPUL2-BSCL2' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'Non-coding :n.' +p18 +sS'slr' +p19 +g18 +ssS'submitted_variant' +p20 +S'11-62457852-C-A' +p21 +sS'genome_context_intronic_sequence' +p22 +g4 +sS'hgvs_lrg_variant' +p23 +g4 +sS'hgvs_transcript_variant' +p24 +S'NR_037946.1:n.3896G>T' +p25 +sS'hgvs_refseqgene_variant' +p26 +g4 +sS'primary_assembly_loci' +p27 +(dp28 +S'grch38' +p29 +(dp30 +S'hgvs_genomic_description' +p31 +S'NC_000011.10:g.62690380C>A' +p32 +sS'vcf' +p33 +(dp34 +S'chr' +p35 +S'11' +p36 +sS'ref' +p37 +VC +p38 +sS'pos' +p39 +S'62690380' +p40 +sS'alt' +p41 +VA +p42 +sssS'grch37' +p43 +(dp44 +g31 +S'NC_000011.9:g.62457852C>A' +p45 +sg33 +(dp46 +g35 +g36 +sg37 +g38 +sg39 +S'62457852' +p47 +sg41 +g42 +sssS'hg38' +p48 +(dp49 +g31 +S'NC_000011.10:g.62690380C>A' +p50 +sg33 +(dp51 +g35 +S'chr11' +p52 +sg37 +g38 +sg39 +S'62690380' +p53 +sg41 +g42 +sssS'hg19' +p54 +(dp55 +g31 +S'NC_000011.9:g.62457852C>A' +p56 +sg33 +(dp57 +g35 +g52 +sg37 +g38 +sg39 +S'62457852' +p58 +sg41 +g42 +ssssS'reference_sequence_records' +p59 +(dp60 +S'transcript' +p61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037946.1' +p62 +sssS'NM_032667.6:c.1184G>T' +p63 +(dp64 +g3 +g4 +sg5 +(lp65 +S'RefSeqGene record not available' +p66 +asg8 +g4 +sg9 +(lp67 +sg11 +VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 2, mRNA +p68 +sg13 +S'BSCL2' +p69 +sg15 +(dp70 +g17 +S'NP_116056.3:p.(Cys395Phe)' +p71 +sg19 +S'NP_116056.3:p.(C395F)' +p72 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_032667.6:c.1184G>T' +p73 +sg26 +g4 +sg27 +(dp74 +S'grch38' +p75 +(dp76 +g31 +S'NC_000011.10:g.62690380C>A' +p77 +sg33 +(dp78 +g35 +g36 +sg37 +g38 +sg39 +S'62690380' +p79 +sg41 +g42 +sssS'grch37' +p80 +(dp81 +g31 +S'NC_000011.9:g.62457852C>A' +p82 +sg33 +(dp83 +g35 +g36 +sg37 +g38 +sg39 +S'62457852' +p84 +sg41 +g42 +sssg48 +(dp85 +g31 +S'NC_000011.10:g.62690380C>A' +p86 +sg33 +(dp87 +g35 +g52 +sg37 +g38 +sg39 +S'62690380' +p88 +sg41 +g42 +sssS'hg19' +p89 +(dp90 +g31 +S'NC_000011.9:g.62457852C>A' +p91 +sg33 +(dp92 +g35 +g52 +sg37 +g38 +sg39 +S'62457852' +p93 +sg41 +g42 +ssssg59 +(dp94 +S'protein' +p95 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116056.3' +p96 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032667.6' +p97 +sssS'NR_037949.1:n.1984G>T' +p98 +(dp99 +g3 +g4 +sg5 +(lp100 +S'RefSeqGene record not available' +p101 +asg8 +g4 +sg9 +(lp102 +sg11 +VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 5, non-coding RNA +p103 +sg13 +S'BSCL2' +p104 +sg15 +(dp105 +g17 +S'Non-coding :n.' +p106 +sg19 +g106 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NR_037949.1:n.1984G>T' +p107 +sg26 +g4 +sg27 +(dp108 +S'grch38' +p109 +(dp110 +g31 +S'NC_000011.10:g.62690380C>A' +p111 +sg33 +(dp112 +g35 +g36 +sg37 +g38 +sg39 +S'62690380' +p113 +sg41 +g42 +sssS'grch37' +p114 +(dp115 +g31 +S'NC_000011.9:g.62457852C>A' +p116 +sg33 +(dp117 +g35 +g36 +sg37 +g38 +sg39 +S'62457852' +p118 +sg41 +g42 +sssg48 +(dp119 +g31 +S'NC_000011.10:g.62690380C>A' +p120 +sg33 +(dp121 +g35 +g52 +sg37 +g38 +sg39 +S'62690380' +p122 +sg41 +g42 +sssS'hg19' +p123 +(dp124 +g31 +S'NC_000011.9:g.62457852C>A' +p125 +sg33 +(dp126 +g35 +g52 +sg37 +g38 +sg39 +S'62457852' +p127 +sg41 +g42 +ssssg59 +(dp128 +g61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037949.1' +p129 +sssS'NR_037948.1:n.1978G>T' +p130 +(dp131 +g3 +g4 +sg5 +(lp132 +S'RefSeqGene record not available' +p133 +asg8 +g4 +sg9 +(lp134 +sg11 +VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 4, non-coding RNA +p135 +sg13 +S'BSCL2' +p136 +sg15 +(dp137 +g17 +S'Non-coding :n.' +p138 +sg19 +g138 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NR_037948.1:n.1978G>T' +p139 +sg26 +g4 +sg27 +(dp140 +S'grch38' +p141 +(dp142 +g31 +S'NC_000011.10:g.62690380C>A' +p143 +sg33 +(dp144 +g35 +g36 +sg37 +g38 +sg39 +S'62690380' +p145 +sg41 +g42 +sssS'grch37' +p146 +(dp147 +g31 +S'NC_000011.9:g.62457852C>A' +p148 +sg33 +(dp149 +g35 +g36 +sg37 +g38 +sg39 +S'62457852' +p150 +sg41 +g42 +sssg48 +(dp151 +g31 +S'NC_000011.10:g.62690380C>A' +p152 +sg33 +(dp153 +g35 +g52 +sg37 +g38 +sg39 +S'62690380' +p154 +sg41 +g42 +sssS'hg19' +p155 +(dp156 +g31 +S'NC_000011.9:g.62457852C>A' +p157 +sg33 +(dp158 +g35 +g52 +sg37 +g38 +sg39 +S'62457852' +p159 +sg41 +g42 +ssssg59 +(dp160 +g61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037948.1' +p161 +sssS'NM_001122955.3:c.1376G>T' +p162 +(dp163 +g3 +g4 +sg5 +(lp164 +S'RefSeqGene record not available' +p165 +asg8 +g4 +sg9 +(lp166 +sg11 +VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 1, mRNA +p167 +sg13 +S'BSCL2' +p168 +sg15 +(dp169 +g17 +S'NP_001116427.1:p.(Cys459Phe)' +p170 +sg19 +S'NP_001116427.1:p.(C459F)' +p171 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_001122955.3:c.1376G>T' +p172 +sg26 +g4 +sg27 +(dp173 +S'grch38' +p174 +(dp175 +g31 +S'NC_000011.10:g.62690380C>A' +p176 +sg33 +(dp177 +g35 +g36 +sg37 +g38 +sg39 +S'62690380' +p178 +sg41 +g42 +sssS'grch37' +p179 +(dp180 +g31 +S'NC_000011.9:g.62457852C>A' +p181 +sg33 +(dp182 +g35 +g36 +sg37 +g38 +sg39 +S'62457852' +p183 +sg41 +g42 +sssg48 +(dp184 +g31 +S'NC_000011.10:g.62690380C>A' +p185 +sg33 +(dp186 +g35 +g52 +sg37 +g38 +sg39 +S'62690380' +p187 +sg41 +g42 +sssS'hg19' +p188 +(dp189 +g31 +S'NC_000011.9:g.62457852C>A' +p190 +sg33 +(dp191 +g35 +g52 +sg37 +g38 +sg39 +S'62457852' +p192 +sg41 +g42 +ssssg59 +(dp193 +g95 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001116427.1' +p194 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001122955.3' +p195 +sssS'flag' +p196 +S'gene_variant' +p197 +sS'NM_001130702.2:c.*178G>T' +p198 +(dp199 +g3 +g4 +sg5 +(lp200 +S'RefSeqGene record not available' +p201 +asg8 +g4 +sg9 +(lp202 +sg11 +VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 3, mRNA +p203 +sg13 +S'BSCL2' +p204 +sg15 +(dp205 +g17 +S'NP_001124174.2:p.?' +p206 +sg19 +S'NP_001124174.2:p.?' +p207 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_001130702.2:c.*178G>T' +p208 +sg26 +g4 +sg27 +(dp209 +S'grch38' +p210 +(dp211 +g31 +S'NC_000011.10:g.62690380C>A' +p212 +sg33 +(dp213 +g35 +g36 +sg37 +g38 +sg39 +S'62690380' +p214 +sg41 +g42 +sssS'grch37' +p215 +(dp216 +g31 +S'NC_000011.9:g.62457852C>A' +p217 +sg33 +(dp218 +g35 +g36 +sg37 +g38 +sg39 +S'62457852' +p219 +sg41 +g42 +sssg48 +(dp220 +g31 +S'NC_000011.10:g.62690380C>A' +p221 +sg33 +(dp222 +g35 +g52 +sg37 +g38 +sg39 +S'62690380' +p223 +sg41 +g42 +sssS'hg19' +p224 +(dp225 +g31 +S'NC_000011.9:g.62457852C>A' +p226 +sg33 +(dp227 +g35 +g52 +sg37 +g38 +sg39 +S'62457852' +p228 +sg41 +g42 +ssssg59 +(dp229 +g95 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124174.2' +p230 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130702.2' +p231 +sssS'metadata' +p232 +(dp233 +S'variantvalidator_hgvs_version' +p234 +S'1.1.3' +p235 +sS'uta_schema' +p236 +S'uta_20180821' +p237 +sS'seqrepo_db' +p238 +S'2018-08-21' +p239 +sS'variantvalidator_version' +p240 +S'v0.2' +p241 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant213.txt b/VariantValidator/testing/testOutputsMasterITS/variant213.txt new file mode 100644 index 00000000..f0758558 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant213.txt @@ -0,0 +1,297 @@ +(dp0 +S'NM_001351834.1:c.5761_5762insT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000011.9:g.108178710A>AT automapped to NC_000011.9:g.108178710_108178711insT' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens ATM serine/threonine kinase (ATM), transcript variant 1, mRNA +p13 +sS'gene_symbol' +p14 +S'ATM' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001338763.1:p.(Arg1921MetfsTer9)' +p19 +sS'slr' +p20 +S'NP_001338763.1:p.(R1921Mfs*9)' +p21 +ssS'submitted_variant' +p22 +S'11-108178710-A-AT' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001351834.1:c.5761_5762insT' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'grch38' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000011.10:g.108307983_108307984insT' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'11' +p38 +sS'ref' +p39 +S'A' +p40 +sS'pos' +p41 +S'108307983' +p42 +sS'alt' +p43 +S'AT' +p44 +sssS'grch37' +p45 +(dp46 +g33 +S'NC_000011.9:g.108178710_108178711insT' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'108178710' +p49 +sg43 +S'AT' +p50 +sssS'hg38' +p51 +(dp52 +g33 +S'NC_000011.10:g.108307983_108307984insT' +p53 +sg35 +(dp54 +g37 +S'chr11' +p55 +sg39 +g40 +sg41 +S'108307983' +p56 +sg43 +S'AT' +p57 +sssS'hg19' +p58 +(dp59 +g33 +S'NC_000011.9:g.108178710_108178711insT' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +g40 +sg41 +S'108178710' +p62 +sg43 +S'AT' +p63 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338763.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351834.1' +p69 +sssS'flag' +p70 +S'gene_variant' +p71 +sS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ssS'NM_000051.3:c.5761_5762insT' +p82 +(dp83 +g3 +g4 +sg5 +(lp84 +S'NC_000011.9:g.108178710A>AT automapped to NC_000011.9:g.108178710_108178711insT' +p85 +aS'RefSeqGene record not available' +p86 +asg9 +g4 +sg10 +(lp87 +sg12 +VHomo sapiens ATM serine/threonine kinase (ATM), transcript variant 2, mRNA +p88 +sg14 +S'ATM' +p89 +sg16 +(dp90 +g18 +S'NP_000042.3:p.(Arg1921MetfsTer9)' +p91 +sg20 +S'NP_000042.3:p.(R1921Mfs*9)' +p92 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_000051.3:c.5761_5762insT' +p93 +sg28 +g4 +sg29 +(dp94 +S'grch38' +p95 +(dp96 +g33 +S'NC_000011.10:g.108307983_108307984insT' +p97 +sg35 +(dp98 +g37 +g38 +sg39 +g40 +sg41 +S'108307983' +p99 +sg43 +S'AT' +p100 +sssS'grch37' +p101 +(dp102 +g33 +S'NC_000011.9:g.108178710_108178711insT' +p103 +sg35 +(dp104 +g37 +g38 +sg39 +g40 +sg41 +S'108178710' +p105 +sg43 +S'AT' +p106 +sssg51 +(dp107 +g33 +S'NC_000011.10:g.108307983_108307984insT' +p108 +sg35 +(dp109 +g37 +g55 +sg39 +g40 +sg41 +S'108307983' +p110 +sg43 +S'AT' +p111 +sssS'hg19' +p112 +(dp113 +g33 +S'NC_000011.9:g.108178710_108178711insT' +p114 +sg35 +(dp115 +g37 +g55 +sg39 +g40 +sg41 +S'108178710' +p116 +sg43 +S'AT' +p117 +ssssg64 +(dp118 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000042.3' +p119 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000051.3' +p120 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant214.txt b/VariantValidator/testing/testOutputsMasterITS/variant214.txt new file mode 100644 index 00000000..fc1fd9f6 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant214.txt @@ -0,0 +1,3113 @@ +(dp0 +S'NM_001352419.1:c.-108-7C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +(dp11 +S'grch37' +p12 +(dp13 +S'hgvs_genomic_description' +p14 +S'NW_003871080.1:g.117249G>A' +p15 +sS'vcf' +p16 +(dp17 +S'chr' +p18 +S'HG388_HG400_PATCH' +p19 +sS'ref' +p20 +VG +p21 +sS'pos' +p22 +S'117249' +p23 +sS'alt' +p24 +VA +p25 +sssa(dp26 +S'hg19' +p27 +(dp28 +g14 +S'NW_003871080.1:g.117249G>A' +p29 +sg16 +(dp30 +g18 +S'NW_003871080.1' +p31 +sg20 +g21 +sg22 +S'117249' +p32 +sg24 +g25 +sssasS'transcript_description' +p33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 15, mRNA +p34 +sS'gene_symbol' +p35 +S'ALG9' +p36 +sS'hgvs_predicted_protein_consequence' +p37 +(dp38 +S'tlr' +p39 +S'NP_001339348.1:p.?' +p40 +sS'slr' +p41 +S'NP_001339348.1:p.?' +p42 +ssS'submitted_variant' +p43 +S'11-111735981-G-A' +p44 +sS'genome_context_intronic_sequence' +p45 +S'NC_000011.9(NM_001352419.1):c.-108-7C>T' +p46 +sS'hgvs_lrg_variant' +p47 +g4 +sS'hgvs_transcript_variant' +p48 +S'NM_001352419.1:c.-108-7C>T' +p49 +sS'hgvs_refseqgene_variant' +p50 +g4 +sS'primary_assembly_loci' +p51 +(dp52 +S'grch38' +p53 +(dp54 +g14 +S'NC_000011.10:g.111865258G>A' +p55 +sg16 +(dp56 +g18 +S'11' +p57 +sg20 +g21 +sg22 +S'111865258' +p58 +sg24 +g25 +sssS'grch37' +p59 +(dp60 +g14 +S'NC_000011.9:g.111735981G>A' +p61 +sg16 +(dp62 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p63 +sg24 +g25 +sssS'hg38' +p64 +(dp65 +g14 +S'NC_000011.10:g.111865258G>A' +p66 +sg16 +(dp67 +g18 +S'chr11' +p68 +sg20 +g21 +sg22 +S'111865258' +p69 +sg24 +g25 +sssS'hg19' +p70 +(dp71 +g14 +S'NC_000011.9:g.111735981G>A' +p72 +sg16 +(dp73 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p74 +sg24 +g25 +ssssS'reference_sequence_records' +p75 +(dp76 +S'protein' +p77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339348.1' +p78 +sS'transcript' +p79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352419.1' +p80 +sssS'NM_001352412.1:c.-108-7C>T' +p81 +(dp82 +g3 +g4 +sg5 +(lp83 +S'RefSeqGene record not available' +p84 +asg8 +g4 +sg9 +(lp85 +(dp86 +S'grch37' +p87 +(dp88 +g14 +S'NW_003871080.1:g.117249G>A' +p89 +sg16 +(dp90 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p91 +sg24 +g25 +sssa(dp92 +S'hg19' +p93 +(dp94 +g14 +S'NW_003871080.1:g.117249G>A' +p95 +sg16 +(dp96 +g18 +S'NW_003871080.1' +p97 +sg20 +g21 +sg22 +S'117249' +p98 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 8, mRNA +p99 +sg35 +S'ALG9' +p100 +sg37 +(dp101 +g39 +S'NP_001339341.1:p.?' +p102 +sg41 +S'NP_001339341.1:p.?' +p103 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352412.1):c.-108-7C>T' +p104 +sg47 +g4 +sg48 +S'NM_001352412.1:c.-108-7C>T' +p105 +sg50 +g4 +sg51 +(dp106 +S'grch38' +p107 +(dp108 +g14 +S'NC_000011.10:g.111865258G>A' +p109 +sg16 +(dp110 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p111 +sg24 +g25 +sssS'grch37' +p112 +(dp113 +g14 +S'NC_000011.9:g.111735981G>A' +p114 +sg16 +(dp115 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p116 +sg24 +g25 +sssg64 +(dp117 +g14 +S'NC_000011.10:g.111865258G>A' +p118 +sg16 +(dp119 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p120 +sg24 +g25 +sssS'hg19' +p121 +(dp122 +g14 +S'NC_000011.9:g.111735981G>A' +p123 +sg16 +(dp124 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p125 +sg24 +g25 +ssssg75 +(dp126 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339341.1' +p127 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352412.1' +p128 +sssS'NM_001077692.1:c.-108-7C>T' +p129 +(dp130 +g3 +g4 +sg5 +(lp131 +S'RefSeqGene record not available' +p132 +asg8 +g4 +sg9 +(lp133 +(dp134 +S'grch37' +p135 +(dp136 +g14 +S'NW_003871080.1:g.117249G>A' +p137 +sg16 +(dp138 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p139 +sg24 +g25 +sssa(dp140 +S'hg19' +p141 +(dp142 +g14 +S'NW_003871080.1:g.117249G>A' +p143 +sg16 +(dp144 +g18 +S'NW_003871080.1' +p145 +sg20 +g21 +sg22 +S'117249' +p146 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 4, mRNA +p147 +sg35 +S'ALG9' +p148 +sg37 +(dp149 +g39 +S'NP_001071160.1:p.?' +p150 +sg41 +S'NP_001071160.1:p.?' +p151 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001077692.1):c.-108-7C>T' +p152 +sg47 +g4 +sg48 +S'NM_001077692.1:c.-108-7C>T' +p153 +sg50 +g4 +sg51 +(dp154 +S'grch38' +p155 +(dp156 +g14 +S'NC_000011.10:g.111865258G>A' +p157 +sg16 +(dp158 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p159 +sg24 +g25 +sssS'grch37' +p160 +(dp161 +g14 +S'NC_000011.9:g.111735981G>A' +p162 +sg16 +(dp163 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p164 +sg24 +g25 +sssg64 +(dp165 +g14 +S'NC_000011.10:g.111865258G>A' +p166 +sg16 +(dp167 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p168 +sg24 +g25 +sssS'hg19' +p169 +(dp170 +g14 +S'NC_000011.9:g.111735981G>A' +p171 +sg16 +(dp172 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p173 +sg24 +g25 +ssssg75 +(dp174 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071160.1' +p175 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077692.1' +p176 +sssS'NM_001352418.1:c.406-7C>T' +p177 +(dp178 +g3 +g4 +sg5 +(lp179 +S'RefSeqGene record not available' +p180 +asg8 +g4 +sg9 +(lp181 +(dp182 +S'grch37' +p183 +(dp184 +g14 +S'NW_003871080.1:g.117249G>A' +p185 +sg16 +(dp186 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p187 +sg24 +g25 +sssa(dp188 +S'hg19' +p189 +(dp190 +g14 +S'NW_003871080.1:g.117249G>A' +p191 +sg16 +(dp192 +g18 +S'NW_003871080.1' +p193 +sg20 +g21 +sg22 +S'117249' +p194 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 14, mRNA +p195 +sg35 +S'ALG9' +p196 +sg37 +(dp197 +g39 +S'NP_001339347.1:p.?' +p198 +sg41 +S'NP_001339347.1:p.?' +p199 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352418.1):c.406-7C>T' +p200 +sg47 +g4 +sg48 +S'NM_001352418.1:c.406-7C>T' +p201 +sg50 +g4 +sg51 +(dp202 +S'grch38' +p203 +(dp204 +g14 +S'NC_000011.10:g.111865258G>A' +p205 +sg16 +(dp206 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p207 +sg24 +g25 +sssS'grch37' +p208 +(dp209 +g14 +S'NC_000011.9:g.111735981G>A' +p210 +sg16 +(dp211 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p212 +sg24 +g25 +sssg64 +(dp213 +g14 +S'NC_000011.10:g.111865258G>A' +p214 +sg16 +(dp215 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p216 +sg24 +g25 +sssS'hg19' +p217 +(dp218 +g14 +S'NC_000011.9:g.111735981G>A' +p219 +sg16 +(dp220 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p221 +sg24 +g25 +ssssg75 +(dp222 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339347.1' +p223 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352418.1' +p224 +sssS'NM_001352423.1:c.-108-7C>T' +p225 +(dp226 +g3 +g4 +sg5 +(lp227 +S'RefSeqGene record not available' +p228 +asg8 +g4 +sg9 +(lp229 +(dp230 +S'grch37' +p231 +(dp232 +g14 +S'NW_003871080.1:g.117249G>A' +p233 +sg16 +(dp234 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p235 +sg24 +g25 +sssa(dp236 +S'hg19' +p237 +(dp238 +g14 +S'NW_003871080.1:g.117249G>A' +p239 +sg16 +(dp240 +g18 +S'NW_003871080.1' +p241 +sg20 +g21 +sg22 +S'117249' +p242 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 19, mRNA +p243 +sg35 +S'ALG9' +p244 +sg37 +(dp245 +g39 +S'NP_001339352.1:p.?' +p246 +sg41 +S'NP_001339352.1:p.?' +p247 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352423.1):c.-108-7C>T' +p248 +sg47 +g4 +sg48 +S'NM_001352423.1:c.-108-7C>T' +p249 +sg50 +g4 +sg51 +(dp250 +S'grch38' +p251 +(dp252 +g14 +S'NC_000011.10:g.111865258G>A' +p253 +sg16 +(dp254 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p255 +sg24 +g25 +sssS'grch37' +p256 +(dp257 +g14 +S'NC_000011.9:g.111735981G>A' +p258 +sg16 +(dp259 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p260 +sg24 +g25 +sssg64 +(dp261 +g14 +S'NC_000011.10:g.111865258G>A' +p262 +sg16 +(dp263 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p264 +sg24 +g25 +sssS'hg19' +p265 +(dp266 +g14 +S'NC_000011.9:g.111735981G>A' +p267 +sg16 +(dp268 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p269 +sg24 +g25 +ssssg75 +(dp270 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339352.1' +p271 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352423.1' +p272 +sssS'NM_001352415.1:c.-108-7C>T' +p273 +(dp274 +g3 +g4 +sg5 +(lp275 +S'RefSeqGene record not available' +p276 +asg8 +g4 +sg9 +(lp277 +(dp278 +S'grch37' +p279 +(dp280 +g14 +S'NW_003871080.1:g.117249G>A' +p281 +sg16 +(dp282 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p283 +sg24 +g25 +sssa(dp284 +S'hg19' +p285 +(dp286 +g14 +S'NW_003871080.1:g.117249G>A' +p287 +sg16 +(dp288 +g18 +S'NW_003871080.1' +p289 +sg20 +g21 +sg22 +S'117249' +p290 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 11, mRNA +p291 +sg35 +S'ALG9' +p292 +sg37 +(dp293 +g39 +S'NP_001339344.1:p.?' +p294 +sg41 +S'NP_001339344.1:p.?' +p295 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352415.1):c.-108-7C>T' +p296 +sg47 +g4 +sg48 +S'NM_001352415.1:c.-108-7C>T' +p297 +sg50 +g4 +sg51 +(dp298 +S'grch38' +p299 +(dp300 +g14 +S'NC_000011.10:g.111865258G>A' +p301 +sg16 +(dp302 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p303 +sg24 +g25 +sssS'grch37' +p304 +(dp305 +g14 +S'NC_000011.9:g.111735981G>A' +p306 +sg16 +(dp307 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p308 +sg24 +g25 +sssg64 +(dp309 +g14 +S'NC_000011.10:g.111865258G>A' +p310 +sg16 +(dp311 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p312 +sg24 +g25 +sssS'hg19' +p313 +(dp314 +g14 +S'NC_000011.9:g.111735981G>A' +p315 +sg16 +(dp316 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p317 +sg24 +g25 +ssssg75 +(dp318 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339344.1' +p319 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352415.1' +p320 +sssS'NM_001352421.1:c.-108-7C>T' +p321 +(dp322 +g3 +g4 +sg5 +(lp323 +S'RefSeqGene record not available' +p324 +asg8 +g4 +sg9 +(lp325 +(dp326 +S'grch37' +p327 +(dp328 +g14 +S'NW_003871080.1:g.117249G>A' +p329 +sg16 +(dp330 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p331 +sg24 +g25 +sssa(dp332 +S'hg19' +p333 +(dp334 +g14 +S'NW_003871080.1:g.117249G>A' +p335 +sg16 +(dp336 +g18 +S'NW_003871080.1' +p337 +sg20 +g21 +sg22 +S'117249' +p338 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 17, mRNA +p339 +sg35 +S'ALG9' +p340 +sg37 +(dp341 +g39 +S'NP_001339350.1:p.?' +p342 +sg41 +S'NP_001339350.1:p.?' +p343 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352421.1):c.-108-7C>T' +p344 +sg47 +g4 +sg48 +S'NM_001352421.1:c.-108-7C>T' +p345 +sg50 +g4 +sg51 +(dp346 +S'grch38' +p347 +(dp348 +g14 +S'NC_000011.10:g.111865258G>A' +p349 +sg16 +(dp350 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p351 +sg24 +g25 +sssS'grch37' +p352 +(dp353 +g14 +S'NC_000011.9:g.111735981G>A' +p354 +sg16 +(dp355 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p356 +sg24 +g25 +sssg64 +(dp357 +g14 +S'NC_000011.10:g.111865258G>A' +p358 +sg16 +(dp359 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p360 +sg24 +g25 +sssS'hg19' +p361 +(dp362 +g14 +S'NC_000011.9:g.111735981G>A' +p363 +sg16 +(dp364 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p365 +sg24 +g25 +ssssg75 +(dp366 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339350.1' +p367 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352421.1' +p368 +sssS'NM_001352411.1:c.-108-7C>T' +p369 +(dp370 +g3 +g4 +sg5 +(lp371 +S'RefSeqGene record not available' +p372 +asg8 +g4 +sg9 +(lp373 +(dp374 +S'grch37' +p375 +(dp376 +g14 +S'NW_003871080.1:g.117249G>A' +p377 +sg16 +(dp378 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p379 +sg24 +g25 +sssa(dp380 +S'hg19' +p381 +(dp382 +g14 +S'NW_003871080.1:g.117249G>A' +p383 +sg16 +(dp384 +g18 +S'NW_003871080.1' +p385 +sg20 +g21 +sg22 +S'117249' +p386 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 7, mRNA +p387 +sg35 +S'ALG9' +p388 +sg37 +(dp389 +g39 +S'NP_001339340.1:p.?' +p390 +sg41 +S'NP_001339340.1:p.?' +p391 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352411.1):c.-108-7C>T' +p392 +sg47 +g4 +sg48 +S'NM_001352411.1:c.-108-7C>T' +p393 +sg50 +g4 +sg51 +(dp394 +S'grch38' +p395 +(dp396 +g14 +S'NC_000011.10:g.111865258G>A' +p397 +sg16 +(dp398 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p399 +sg24 +g25 +sssS'grch37' +p400 +(dp401 +g14 +S'NC_000011.9:g.111735981G>A' +p402 +sg16 +(dp403 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p404 +sg24 +g25 +sssg64 +(dp405 +g14 +S'NC_000011.10:g.111865258G>A' +p406 +sg16 +(dp407 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p408 +sg24 +g25 +sssS'hg19' +p409 +(dp410 +g14 +S'NC_000011.9:g.111735981G>A' +p411 +sg16 +(dp412 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p413 +sg24 +g25 +ssssg75 +(dp414 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339340.1' +p415 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352411.1' +p416 +sssS'NR_147984.1:n.782-7C>T' +p417 +(dp418 +g3 +g4 +sg5 +(lp419 +S'RefSeqGene record not available' +p420 +asg8 +g4 +sg9 +(lp421 +(dp422 +S'grch37' +p423 +(dp424 +g14 +S'NW_003871080.1:g.117249G>A' +p425 +sg16 +(dp426 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p427 +sg24 +g25 +sssa(dp428 +S'hg19' +p429 +(dp430 +g14 +S'NW_003871080.1:g.117249G>A' +p431 +sg16 +(dp432 +g18 +S'NW_003871080.1' +p433 +sg20 +g21 +sg22 +S'117249' +p434 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 20, non-coding RNA +p435 +sg35 +S'ALG9' +p436 +sg37 +(dp437 +g39 +S'Non-coding :n.' +p438 +sg41 +g438 +ssg43 +g44 +sg45 +S'NC_000011.9(NR_147984.1):c.782-7C>T' +p439 +sg47 +g4 +sg48 +S'NR_147984.1:n.782-7C>T' +p440 +sg50 +g4 +sg51 +(dp441 +S'grch38' +p442 +(dp443 +g14 +S'NC_000011.10:g.111865258G>A' +p444 +sg16 +(dp445 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p446 +sg24 +g25 +sssS'grch37' +p447 +(dp448 +g14 +S'NC_000011.9:g.111735981G>A' +p449 +sg16 +(dp450 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p451 +sg24 +g25 +sssg64 +(dp452 +g14 +S'NC_000011.10:g.111865258G>A' +p453 +sg16 +(dp454 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p455 +sg24 +g25 +sssS'hg19' +p456 +(dp457 +g14 +S'NC_000011.9:g.111735981G>A' +p458 +sg16 +(dp459 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p460 +sg24 +g25 +ssssg75 +(dp461 +g79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_147984.1' +p462 +sssS'NM_001077691.1:c.-108-7C>T' +p463 +(dp464 +g3 +g4 +sg5 +(lp465 +S'RefSeqGene record not available' +p466 +asg8 +g4 +sg9 +(lp467 +(dp468 +S'grch37' +p469 +(dp470 +g14 +S'NW_003871080.1:g.117249G>A' +p471 +sg16 +(dp472 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p473 +sg24 +g25 +sssa(dp474 +S'hg19' +p475 +(dp476 +g14 +S'NW_003871080.1:g.117249G>A' +p477 +sg16 +(dp478 +g18 +S'NW_003871080.1' +p479 +sg20 +g21 +sg22 +S'117249' +p480 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 3, mRNA +p481 +sg35 +S'ALG9' +p482 +sg37 +(dp483 +g39 +S'NP_001071159.1:p.?' +p484 +sg41 +S'NP_001071159.1:p.?' +p485 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001077691.1):c.-108-7C>T' +p486 +sg47 +g4 +sg48 +S'NM_001077691.1:c.-108-7C>T' +p487 +sg50 +g4 +sg51 +(dp488 +S'grch38' +p489 +(dp490 +g14 +S'NC_000011.10:g.111865258G>A' +p491 +sg16 +(dp492 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p493 +sg24 +g25 +sssS'grch37' +p494 +(dp495 +g14 +S'NC_000011.9:g.111735981G>A' +p496 +sg16 +(dp497 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p498 +sg24 +g25 +sssg64 +(dp499 +g14 +S'NC_000011.10:g.111865258G>A' +p500 +sg16 +(dp501 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p502 +sg24 +g25 +sssS'hg19' +p503 +(dp504 +g14 +S'NC_000011.9:g.111735981G>A' +p505 +sg16 +(dp506 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p507 +sg24 +g25 +ssssg75 +(dp508 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071159.1' +p509 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077691.1' +p510 +sssS'metadata' +p511 +(dp512 +S'variantvalidator_hgvs_version' +p513 +S'1.1.3' +p514 +sS'uta_schema' +p515 +S'uta_20180821' +p516 +sS'seqrepo_db' +p517 +S'2018-08-21' +p518 +sS'variantvalidator_version' +p519 +S'v0.2' +p520 +ssS'NM_001352410.1:c.-108-7C>T' +p521 +(dp522 +g3 +g4 +sg5 +(lp523 +S'RefSeqGene record not available' +p524 +asg8 +g4 +sg9 +(lp525 +(dp526 +S'grch37' +p527 +(dp528 +g14 +S'NW_003871080.1:g.117249G>A' +p529 +sg16 +(dp530 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p531 +sg24 +g25 +sssa(dp532 +S'hg19' +p533 +(dp534 +g14 +S'NW_003871080.1:g.117249G>A' +p535 +sg16 +(dp536 +g18 +S'NW_003871080.1' +p537 +sg20 +g21 +sg22 +S'117249' +p538 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 6, mRNA +p539 +sg35 +S'ALG9' +p540 +sg37 +(dp541 +g39 +S'NP_001339339.1:p.?' +p542 +sg41 +S'NP_001339339.1:p.?' +p543 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352410.1):c.-108-7C>T' +p544 +sg47 +g4 +sg48 +S'NM_001352410.1:c.-108-7C>T' +p545 +sg50 +g4 +sg51 +(dp546 +S'grch38' +p547 +(dp548 +g14 +S'NC_000011.10:g.111865258G>A' +p549 +sg16 +(dp550 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p551 +sg24 +g25 +sssS'grch37' +p552 +(dp553 +g14 +S'NC_000011.9:g.111735981G>A' +p554 +sg16 +(dp555 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p556 +sg24 +g25 +sssg64 +(dp557 +g14 +S'NC_000011.10:g.111865258G>A' +p558 +sg16 +(dp559 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p560 +sg24 +g25 +sssS'hg19' +p561 +(dp562 +g14 +S'NC_000011.9:g.111735981G>A' +p563 +sg16 +(dp564 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p565 +sg24 +g25 +ssssg75 +(dp566 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339339.1' +p567 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352410.1' +p568 +sssS'NM_001077690.1:c.406-7C>T' +p569 +(dp570 +g3 +g4 +sg5 +(lp571 +S'RefSeqGene record not available' +p572 +asg8 +g4 +sg9 +(lp573 +(dp574 +S'grch37' +p575 +(dp576 +g14 +S'NW_003871080.1:g.117249G>A' +p577 +sg16 +(dp578 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p579 +sg24 +g25 +sssa(dp580 +S'hg19' +p581 +(dp582 +g14 +S'NW_003871080.1:g.117249G>A' +p583 +sg16 +(dp584 +g18 +S'NW_003871080.1' +p585 +sg20 +g21 +sg22 +S'117249' +p586 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 2, mRNA +p587 +sg35 +S'ALG9' +p588 +sg37 +(dp589 +g39 +S'NP_001071158.1:p.?' +p590 +sg41 +S'NP_001071158.1:p.?' +p591 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001077690.1):c.406-7C>T' +p592 +sg47 +g4 +sg48 +S'NM_001077690.1:c.406-7C>T' +p593 +sg50 +g4 +sg51 +(dp594 +S'grch38' +p595 +(dp596 +g14 +S'NC_000011.10:g.111865258G>A' +p597 +sg16 +(dp598 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p599 +sg24 +g25 +sssS'grch37' +p600 +(dp601 +g14 +S'NC_000011.9:g.111735981G>A' +p602 +sg16 +(dp603 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p604 +sg24 +g25 +sssg64 +(dp605 +g14 +S'NC_000011.10:g.111865258G>A' +p606 +sg16 +(dp607 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p608 +sg24 +g25 +sssS'hg19' +p609 +(dp610 +g14 +S'NC_000011.9:g.111735981G>A' +p611 +sg16 +(dp612 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p613 +sg24 +g25 +ssssg75 +(dp614 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071158.1' +p615 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077690.1' +p616 +sssS'flag' +p617 +S'gene_variant' +p618 +sS'NM_001352422.1:c.-326-7C>T' +p619 +(dp620 +g3 +g4 +sg5 +(lp621 +S'RefSeqGene record not available' +p622 +asg8 +g4 +sg9 +(lp623 +(dp624 +S'grch37' +p625 +(dp626 +g14 +S'NW_003871080.1:g.117249G>A' +p627 +sg16 +(dp628 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p629 +sg24 +g25 +sssa(dp630 +S'hg19' +p631 +(dp632 +g14 +S'NW_003871080.1:g.117249G>A' +p633 +sg16 +(dp634 +g18 +S'NW_003871080.1' +p635 +sg20 +g21 +sg22 +S'117249' +p636 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 18, mRNA +p637 +sg35 +S'ALG9' +p638 +sg37 +(dp639 +g39 +S'NP_001339351.1:p.?' +p640 +sg41 +S'NP_001339351.1:p.?' +p641 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352422.1):c.-326-7C>T' +p642 +sg47 +g4 +sg48 +S'NM_001352422.1:c.-326-7C>T' +p643 +sg50 +g4 +sg51 +(dp644 +S'grch38' +p645 +(dp646 +g14 +S'NC_000011.10:g.111865258G>A' +p647 +sg16 +(dp648 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p649 +sg24 +g25 +sssS'grch37' +p650 +(dp651 +g14 +S'NC_000011.9:g.111735981G>A' +p652 +sg16 +(dp653 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p654 +sg24 +g25 +sssg64 +(dp655 +g14 +S'NC_000011.10:g.111865258G>A' +p656 +sg16 +(dp657 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p658 +sg24 +g25 +sssS'hg19' +p659 +(dp660 +g14 +S'NC_000011.9:g.111735981G>A' +p661 +sg16 +(dp662 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p663 +sg24 +g25 +ssssg75 +(dp664 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339351.1' +p665 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352422.1' +p666 +sssS'NM_001352416.1:c.-108-7C>T' +p667 +(dp668 +g3 +g4 +sg5 +(lp669 +S'RefSeqGene record not available' +p670 +asg8 +g4 +sg9 +(lp671 +(dp672 +S'grch37' +p673 +(dp674 +g14 +S'NW_003871080.1:g.117249G>A' +p675 +sg16 +(dp676 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p677 +sg24 +g25 +sssa(dp678 +S'hg19' +p679 +(dp680 +g14 +S'NW_003871080.1:g.117249G>A' +p681 +sg16 +(dp682 +g18 +S'NW_003871080.1' +p683 +sg20 +g21 +sg22 +S'117249' +p684 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 12, mRNA +p685 +sg35 +S'ALG9' +p686 +sg37 +(dp687 +g39 +S'NP_001339345.1:p.?' +p688 +sg41 +S'NP_001339345.1:p.?' +p689 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352416.1):c.-108-7C>T' +p690 +sg47 +g4 +sg48 +S'NM_001352416.1:c.-108-7C>T' +p691 +sg50 +g4 +sg51 +(dp692 +S'grch38' +p693 +(dp694 +g14 +S'NC_000011.10:g.111865258G>A' +p695 +sg16 +(dp696 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p697 +sg24 +g25 +sssS'grch37' +p698 +(dp699 +g14 +S'NC_000011.9:g.111735981G>A' +p700 +sg16 +(dp701 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p702 +sg24 +g25 +sssg64 +(dp703 +g14 +S'NC_000011.10:g.111865258G>A' +p704 +sg16 +(dp705 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p706 +sg24 +g25 +sssS'hg19' +p707 +(dp708 +g14 +S'NC_000011.9:g.111735981G>A' +p709 +sg16 +(dp710 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p711 +sg24 +g25 +ssssg75 +(dp712 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339345.1' +p713 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352416.1' +p714 +sssS'NM_001352420.1:c.-108-7C>T' +p715 +(dp716 +g3 +g4 +sg5 +(lp717 +S'RefSeqGene record not available' +p718 +asg8 +g4 +sg9 +(lp719 +(dp720 +S'grch37' +p721 +(dp722 +g14 +S'NW_003871080.1:g.117249G>A' +p723 +sg16 +(dp724 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p725 +sg24 +g25 +sssa(dp726 +S'hg19' +p727 +(dp728 +g14 +S'NW_003871080.1:g.117249G>A' +p729 +sg16 +(dp730 +g18 +S'NW_003871080.1' +p731 +sg20 +g21 +sg22 +S'117249' +p732 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 16, mRNA +p733 +sg35 +S'ALG9' +p734 +sg37 +(dp735 +g39 +S'NP_001339349.1:p.?' +p736 +sg41 +S'NP_001339349.1:p.?' +p737 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352420.1):c.-108-7C>T' +p738 +sg47 +g4 +sg48 +S'NM_001352420.1:c.-108-7C>T' +p739 +sg50 +g4 +sg51 +(dp740 +S'grch38' +p741 +(dp742 +g14 +S'NC_000011.10:g.111865258G>A' +p743 +sg16 +(dp744 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p745 +sg24 +g25 +sssS'grch37' +p746 +(dp747 +g14 +S'NC_000011.9:g.111735981G>A' +p748 +sg16 +(dp749 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p750 +sg24 +g25 +sssg64 +(dp751 +g14 +S'NC_000011.10:g.111865258G>A' +p752 +sg16 +(dp753 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p754 +sg24 +g25 +sssS'hg19' +p755 +(dp756 +g14 +S'NC_000011.9:g.111735981G>A' +p757 +sg16 +(dp758 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p759 +sg24 +g25 +ssssg75 +(dp760 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339349.1' +p761 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352420.1' +p762 +sssS'NM_024740.2:c.406-7C>T' +p763 +(dp764 +g3 +g4 +sg5 +(lp765 +S'RefSeqGene record not available' +p766 +asg8 +g4 +sg9 +(lp767 +(dp768 +S'grch37' +p769 +(dp770 +g14 +S'NW_003871080.1:g.117249G>A' +p771 +sg16 +(dp772 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p773 +sg24 +g25 +sssa(dp774 +S'hg19' +p775 +(dp776 +g14 +S'NW_003871080.1:g.117249G>A' +p777 +sg16 +(dp778 +g18 +S'NW_003871080.1' +p779 +sg20 +g21 +sg22 +S'117249' +p780 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 1, mRNA +p781 +sg35 +S'ALG9' +p782 +sg37 +(dp783 +g39 +S'NP_079016.2:p.?' +p784 +sg41 +S'NP_079016.2:p.?' +p785 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_024740.2):c.406-7C>T' +p786 +sg47 +g4 +sg48 +S'NM_024740.2:c.406-7C>T' +p787 +sg50 +g4 +sg51 +(dp788 +S'grch38' +p789 +(dp790 +g14 +S'NC_000011.10:g.111865258G>A' +p791 +sg16 +(dp792 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p793 +sg24 +g25 +sssS'grch37' +p794 +(dp795 +g14 +S'NC_000011.9:g.111735981G>A' +p796 +sg16 +(dp797 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p798 +sg24 +g25 +sssg64 +(dp799 +g14 +S'NC_000011.10:g.111865258G>A' +p800 +sg16 +(dp801 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p802 +sg24 +g25 +sssS'hg19' +p803 +(dp804 +g14 +S'NC_000011.9:g.111735981G>A' +p805 +sg16 +(dp806 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p807 +sg24 +g25 +ssssg75 +(dp808 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_079016.2' +p809 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024740.2' +p810 +sssS'NM_001352414.1:c.-108-7C>T' +p811 +(dp812 +g3 +g4 +sg5 +(lp813 +S'RefSeqGene record not available' +p814 +asg8 +g4 +sg9 +(lp815 +(dp816 +S'grch37' +p817 +(dp818 +g14 +S'NW_003871080.1:g.117249G>A' +p819 +sg16 +(dp820 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p821 +sg24 +g25 +sssa(dp822 +S'hg19' +p823 +(dp824 +g14 +S'NW_003871080.1:g.117249G>A' +p825 +sg16 +(dp826 +g18 +S'NW_003871080.1' +p827 +sg20 +g21 +sg22 +S'117249' +p828 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 10, mRNA +p829 +sg35 +S'ALG9' +p830 +sg37 +(dp831 +g39 +S'NP_001339343.1:p.?' +p832 +sg41 +S'NP_001339343.1:p.?' +p833 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352414.1):c.-108-7C>T' +p834 +sg47 +g4 +sg48 +S'NM_001352414.1:c.-108-7C>T' +p835 +sg50 +g4 +sg51 +(dp836 +S'grch38' +p837 +(dp838 +g14 +S'NC_000011.10:g.111865258G>A' +p839 +sg16 +(dp840 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p841 +sg24 +g25 +sssS'grch37' +p842 +(dp843 +g14 +S'NC_000011.9:g.111735981G>A' +p844 +sg16 +(dp845 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p846 +sg24 +g25 +sssg64 +(dp847 +g14 +S'NC_000011.10:g.111865258G>A' +p848 +sg16 +(dp849 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p850 +sg24 +g25 +sssS'hg19' +p851 +(dp852 +g14 +S'NC_000011.9:g.111735981G>A' +p853 +sg16 +(dp854 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p855 +sg24 +g25 +ssssg75 +(dp856 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339343.1' +p857 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352414.1' +p858 +sssS'NM_001352417.1:c.406-7C>T' +p859 +(dp860 +g3 +g4 +sg5 +(lp861 +S'RefSeqGene record not available' +p862 +asg8 +g4 +sg9 +(lp863 +(dp864 +S'grch37' +p865 +(dp866 +g14 +S'NW_003871080.1:g.117249G>A' +p867 +sg16 +(dp868 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p869 +sg24 +g25 +sssa(dp870 +S'hg19' +p871 +(dp872 +g14 +S'NW_003871080.1:g.117249G>A' +p873 +sg16 +(dp874 +g18 +S'NW_003871080.1' +p875 +sg20 +g21 +sg22 +S'117249' +p876 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 13, mRNA +p877 +sg35 +S'ALG9' +p878 +sg37 +(dp879 +g39 +S'NP_001339346.1:p.?' +p880 +sg41 +S'NP_001339346.1:p.?' +p881 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352417.1):c.406-7C>T' +p882 +sg47 +g4 +sg48 +S'NM_001352417.1:c.406-7C>T' +p883 +sg50 +g4 +sg51 +(dp884 +S'grch38' +p885 +(dp886 +g14 +S'NC_000011.10:g.111865258G>A' +p887 +sg16 +(dp888 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p889 +sg24 +g25 +sssS'grch37' +p890 +(dp891 +g14 +S'NC_000011.9:g.111735981G>A' +p892 +sg16 +(dp893 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p894 +sg24 +g25 +sssg64 +(dp895 +g14 +S'NC_000011.10:g.111865258G>A' +p896 +sg16 +(dp897 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p898 +sg24 +g25 +sssS'hg19' +p899 +(dp900 +g14 +S'NC_000011.9:g.111735981G>A' +p901 +sg16 +(dp902 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p903 +sg24 +g25 +ssssg75 +(dp904 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339346.1' +p905 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352417.1' +p906 +sssS'NM_001352409.1:c.-108-7C>T' +p907 +(dp908 +g3 +g4 +sg5 +(lp909 +S'RefSeqGene record not available' +p910 +asg8 +g4 +sg9 +(lp911 +(dp912 +S'grch37' +p913 +(dp914 +g14 +S'NW_003871080.1:g.117249G>A' +p915 +sg16 +(dp916 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p917 +sg24 +g25 +sssa(dp918 +S'hg19' +p919 +(dp920 +g14 +S'NW_003871080.1:g.117249G>A' +p921 +sg16 +(dp922 +g18 +S'NW_003871080.1' +p923 +sg20 +g21 +sg22 +S'117249' +p924 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 5, mRNA +p925 +sg35 +S'ALG9' +p926 +sg37 +(dp927 +g39 +S'NP_001339338.1:p.?' +p928 +sg41 +S'NP_001339338.1:p.?' +p929 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352409.1):c.-108-7C>T' +p930 +sg47 +g4 +sg48 +S'NM_001352409.1:c.-108-7C>T' +p931 +sg50 +g4 +sg51 +(dp932 +S'grch38' +p933 +(dp934 +g14 +S'NC_000011.10:g.111865258G>A' +p935 +sg16 +(dp936 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p937 +sg24 +g25 +sssS'grch37' +p938 +(dp939 +g14 +S'NC_000011.9:g.111735981G>A' +p940 +sg16 +(dp941 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p942 +sg24 +g25 +sssg64 +(dp943 +g14 +S'NC_000011.10:g.111865258G>A' +p944 +sg16 +(dp945 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p946 +sg24 +g25 +sssS'hg19' +p947 +(dp948 +g14 +S'NC_000011.9:g.111735981G>A' +p949 +sg16 +(dp950 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p951 +sg24 +g25 +ssssg75 +(dp952 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339338.1' +p953 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352409.1' +p954 +sssS'NM_001352413.1:c.-108-7C>T' +p955 +(dp956 +g3 +g4 +sg5 +(lp957 +S'RefSeqGene record not available' +p958 +asg8 +g4 +sg9 +(lp959 +(dp960 +S'grch37' +p961 +(dp962 +g14 +S'NW_003871080.1:g.117249G>A' +p963 +sg16 +(dp964 +g18 +g19 +sg20 +g21 +sg22 +S'117249' +p965 +sg24 +g25 +sssa(dp966 +S'hg19' +p967 +(dp968 +g14 +S'NW_003871080.1:g.117249G>A' +p969 +sg16 +(dp970 +g18 +S'NW_003871080.1' +p971 +sg20 +g21 +sg22 +S'117249' +p972 +sg24 +g25 +sssasg33 +VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 9, mRNA +p973 +sg35 +S'ALG9' +p974 +sg37 +(dp975 +g39 +S'NP_001339342.1:p.?' +p976 +sg41 +S'NP_001339342.1:p.?' +p977 +ssg43 +g44 +sg45 +S'NC_000011.9(NM_001352413.1):c.-108-7C>T' +p978 +sg47 +g4 +sg48 +S'NM_001352413.1:c.-108-7C>T' +p979 +sg50 +g4 +sg51 +(dp980 +S'grch38' +p981 +(dp982 +g14 +S'NC_000011.10:g.111865258G>A' +p983 +sg16 +(dp984 +g18 +g57 +sg20 +g21 +sg22 +S'111865258' +p985 +sg24 +g25 +sssS'grch37' +p986 +(dp987 +g14 +S'NC_000011.9:g.111735981G>A' +p988 +sg16 +(dp989 +g18 +g57 +sg20 +g21 +sg22 +S'111735981' +p990 +sg24 +g25 +sssg64 +(dp991 +g14 +S'NC_000011.10:g.111865258G>A' +p992 +sg16 +(dp993 +g18 +g68 +sg20 +g21 +sg22 +S'111865258' +p994 +sg24 +g25 +sssS'hg19' +p995 +(dp996 +g14 +S'NC_000011.9:g.111735981G>A' +p997 +sg16 +(dp998 +g18 +g68 +sg20 +g21 +sg22 +S'111735981' +p999 +sg24 +g25 +ssssg75 +(dp1000 +g77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339342.1' +p1001 +sg79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352413.1' +p1002 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant215.txt b/VariantValidator/testing/testOutputsMasterITS/variant215.txt new file mode 100644 index 00000000..8941cc1c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant215.txt @@ -0,0 +1,316 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NR_037918.2:n.1184+11736G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +(dp13 +S'grch38' +p14 +(dp15 +S'hgvs_genomic_description' +p16 +S'NT_187658.1:g.69187C>A' +p17 +sS'vcf' +p18 +(dp19 +S'chr' +p20 +S'HSCHR12_3_CTG2' +p21 +sS'ref' +p22 +VC +p23 +sS'pos' +p24 +S'69187' +p25 +sS'alt' +p26 +VA +p27 +sssa(dp28 +S'hg38' +p29 +(dp30 +g16 +S'NT_187658.1:g.69187C>A' +p31 +sg18 +(dp32 +g20 +S'chr12_KI270904v1_alt' +p33 +sg22 +g23 +sg24 +S'69187' +p34 +sg26 +g27 +sssa(dp35 +S'grch37' +p36 +(dp37 +g16 +S'NW_003571047.1:g.69187C>A' +p38 +sg18 +(dp39 +g20 +S'HG1133_PATCH' +p40 +sg22 +g23 +sg24 +S'69187' +p41 +sg26 +g27 +sssa(dp42 +S'hg19' +p43 +(dp44 +g16 +S'NW_003571047.1:g.69187C>A' +p45 +sg18 +(dp46 +g20 +S'NW_003571047.1' +p47 +sg22 +g23 +sg24 +S'69187' +p48 +sg26 +g27 +sssa(dp49 +S'grch37' +p50 +(dp51 +g16 +S'NW_003571050.1:g.69187C>A' +p52 +sg18 +(dp53 +g20 +S'HSCHR12_2_CTG2' +p54 +sg22 +g23 +sg24 +S'69187' +p55 +sg26 +g27 +sssa(dp56 +S'hg19' +p57 +(dp58 +g16 +S'NW_003571050.1:g.69187C>A' +p59 +sg18 +(dp60 +g20 +S'NW_003571050.1' +p61 +sg22 +g23 +sg24 +S'69187' +p62 +sg26 +g27 +sssa(dp63 +S'grch38' +p64 +(dp65 +g16 +S'NW_003571050.1:g.69187C>A' +p66 +sg18 +(dp67 +g20 +g54 +sg22 +g23 +sg24 +S'69187' +p68 +sg26 +g27 +sssa(dp69 +g29 +(dp70 +g16 +S'NW_003571050.1:g.69187C>A' +p71 +sg18 +(dp72 +g20 +S'chr12_GL877876v1_alt' +p73 +sg22 +g23 +sg24 +S'69187' +p74 +sg26 +g27 +sssasS'transcript_description' +p75 +VHomo sapiens PRH1-PRR4 readthrough (PRH1-PRR4), long non-coding RNA +p76 +sS'gene_symbol' +p77 +S'PRH1-PRR4' +p78 +sS'hgvs_predicted_protein_consequence' +p79 +(dp80 +S'tlr' +p81 +S'Non-coding :n.' +p82 +sS'slr' +p83 +g82 +ssS'submitted_variant' +p84 +S'12-11023080-C-A' +p85 +sS'genome_context_intronic_sequence' +p86 +S'NC_000012.11(NR_037918.2):c.1184+11736G>T' +p87 +sS'hgvs_lrg_variant' +p88 +g6 +sS'hgvs_transcript_variant' +p89 +S'NR_037918.2:n.1184+11736G>T' +p90 +sS'hgvs_refseqgene_variant' +p91 +g6 +sS'primary_assembly_loci' +p92 +(dp93 +S'hg19' +p94 +(dp95 +g16 +S'NC_000012.11:g.11023080C>A' +p96 +sg18 +(dp97 +g20 +S'chr12' +p98 +sg22 +g23 +sg24 +S'11023080' +p99 +sg26 +g27 +sssg29 +(dp100 +g16 +S'NC_000012.12:g.10870481C>A' +p101 +sg18 +(dp102 +g20 +g98 +sg22 +g23 +sg24 +S'10870481' +p103 +sg26 +g27 +sssS'grch37' +p104 +(dp105 +g16 +S'NC_000012.11:g.11023080C>A' +p106 +sg18 +(dp107 +g20 +S'12' +p108 +sg22 +g23 +sg24 +S'11023080' +p109 +sg26 +g27 +sssS'grch38' +p110 +(dp111 +g16 +S'NC_000012.12:g.10870481C>A' +p112 +sg18 +(dp113 +g20 +g108 +sg22 +g23 +sg24 +S'10870481' +p114 +sg26 +g27 +ssssS'reference_sequence_records' +p115 +(dp116 +S'transcript' +p117 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037918.2' +p118 +sssS'metadata' +p119 +(dp120 +S'variantvalidator_hgvs_version' +p121 +S'1.1.3' +p122 +sS'uta_schema' +p123 +S'uta_20180821' +p124 +sS'seqrepo_db' +p125 +S'2018-08-21' +p126 +sS'variantvalidator_version' +p127 +S'v0.2' +p128 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant216.txt b/VariantValidator/testing/testOutputsMasterITS/variant216.txt new file mode 100644 index 00000000..aa954e68 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant216.txt @@ -0,0 +1,485 @@ +(dp0 +S'NM_020297.3:c.2199-1302del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens ATP binding cassette subfamily C member 9 (ABCC9), transcript variant SUR2B, mRNA +p13 +sS'gene_symbol' +p14 +S'ABCC9' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_064693.2:p.?' +p19 +sS'slr' +p20 +S'NP_064693.2:p.?' +p21 +ssS'submitted_variant' +p22 +S'12-22018712-TC-T' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000012.11(NM_020297.3):c.2199-1302del' +p25 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_020297.3:c.2199-1302del' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000012.11:g.22018713del' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr12' +p39 +sS'ref' +p40 +S'TC' +p41 +sS'pos' +p42 +S'22018712' +p43 +sS'alt' +p44 +S'T' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000012.12:g.21865779del' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'TC' +p50 +sg42 +S'21865778' +p51 +sg44 +g45 +sssS'grch37' +p52 +(dp53 +g34 +S'NC_000012.11:g.22018713del' +p54 +sg36 +(dp55 +g38 +S'12' +p56 +sg40 +S'TC' +p57 +sg42 +S'22018712' +p58 +sg44 +g45 +sssS'grch38' +p59 +(dp60 +g34 +S'NC_000012.12:g.21865779del' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +S'TC' +p63 +sg42 +S'21865778' +p64 +sg44 +g45 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.3' +p70 +sssS'NM_005691.3:c.2199-1302del' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' +p74 +aS'RefSeqGene record not available' +p75 +asg9 +g4 +sg10 +(lp76 +sg12 +VHomo sapiens ATP binding cassette subfamily C member 9 (ABCC9), transcript variant SUR2A, mRNA +p77 +sg14 +S'ABCC9' +p78 +sg16 +(dp79 +g18 +S'NP_005682.2:p.?' +p80 +sg20 +S'NP_005682.2:p.?' +p81 +ssg22 +g23 +sg24 +S'NC_000012.11(NM_005691.3):c.2199-1302del' +p82 +sg26 +g4 +sg27 +S'NM_005691.3:c.2199-1302del' +p83 +sg29 +g4 +sg30 +(dp84 +S'hg19' +p85 +(dp86 +g34 +S'NC_000012.11:g.22018713del' +p87 +sg36 +(dp88 +g38 +g39 +sg40 +S'TC' +p89 +sg42 +S'22018712' +p90 +sg44 +g45 +sssg46 +(dp91 +g34 +S'NC_000012.12:g.21865779del' +p92 +sg36 +(dp93 +g38 +g39 +sg40 +S'TC' +p94 +sg42 +S'21865778' +p95 +sg44 +g45 +sssS'grch37' +p96 +(dp97 +g34 +S'NC_000012.11:g.22018713del' +p98 +sg36 +(dp99 +g38 +g56 +sg40 +S'TC' +p100 +sg42 +S'22018712' +p101 +sg44 +g45 +sssS'grch38' +p102 +(dp103 +g34 +S'NC_000012.12:g.21865779del' +p104 +sg36 +(dp105 +g38 +g56 +sg40 +S'TC' +p106 +sg42 +S'21865778' +p107 +sg44 +g45 +ssssg65 +(dp108 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2' +p109 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.3' +p110 +sssS'NM_020297.2:c.2199-1302del' +p111 +(dp112 +g3 +g4 +sg5 +(lp113 +S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' +p114 +aS'A more recent version of the selected reference sequence NM_020297.2 is available (NM_020297.3)' +p115 +aS'NM_020297.3:c.2199-1302delG MUST be fully validated prior to use in reports' +p116 +aS'select_variants=NM_020297.3:c.2199-1302del' +p117 +aS'RefSeqGene record not available' +p118 +asg9 +g4 +sg10 +(lp119 +sg12 +VHomo sapiens ATP-binding cassette, sub-family C (CFTR/MRP), member 9 (ABCC9), transcript variant SUR2B, mRNA +p120 +sg14 +S'ABCC9' +p121 +sg16 +(dp122 +g18 +S'NP_064693.2:p.?' +p123 +sg20 +S'NP_064693.2:p.?' +p124 +ssg22 +g23 +sg24 +S'NC_000012.11(NM_020297.2):c.2199-1302del' +p125 +sg26 +g4 +sg27 +S'NM_020297.2:c.2199-1302del' +p126 +sg29 +g4 +sg30 +(dp127 +S'hg19' +p128 +(dp129 +g34 +S'NC_000012.11:g.22018713del' +p130 +sg36 +(dp131 +g38 +g39 +sg40 +S'TC' +p132 +sg42 +S'22018712' +p133 +sg44 +g45 +sssS'grch37' +p134 +(dp135 +g34 +S'NC_000012.11:g.22018713del' +p136 +sg36 +(dp137 +g38 +g56 +sg40 +S'TC' +p138 +sg42 +S'22018712' +p139 +sg44 +g45 +ssssg65 +(dp140 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2' +p141 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.2' +p142 +sssS'flag' +p143 +S'gene_variant' +p144 +sS'NM_005691.2:c.2199-1302del' +p145 +(dp146 +g3 +g4 +sg5 +(lp147 +S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' +p148 +aS'A more recent version of the selected reference sequence NM_005691.2 is available (NM_005691.3)' +p149 +aS'NM_005691.3:c.2199-1302delG MUST be fully validated prior to use in reports' +p150 +aS'select_variants=NM_005691.3:c.2199-1302del' +p151 +aS'RefSeqGene record not available' +p152 +asg9 +g4 +sg10 +(lp153 +sg12 +VHomo sapiens ATP-binding cassette, sub-family C (CFTR/MRP), member 9 (ABCC9), transcript variant SUR2A, mRNA +p154 +sg14 +S'ABCC9' +p155 +sg16 +(dp156 +g18 +S'NP_005682.2:p.?' +p157 +sg20 +S'NP_005682.2:p.?' +p158 +ssg22 +g23 +sg24 +S'NC_000012.11(NM_005691.2):c.2199-1302del' +p159 +sg26 +g4 +sg27 +S'NM_005691.2:c.2199-1302del' +p160 +sg29 +g4 +sg30 +(dp161 +S'hg19' +p162 +(dp163 +g34 +S'NC_000012.11:g.22018713del' +p164 +sg36 +(dp165 +g38 +g39 +sg40 +S'TC' +p166 +sg42 +S'22018712' +p167 +sg44 +g45 +sssS'grch37' +p168 +(dp169 +g34 +S'NC_000012.11:g.22018713del' +p170 +sg36 +(dp171 +g38 +g56 +sg40 +S'TC' +p172 +sg42 +S'22018712' +p173 +sg44 +g45 +ssssg65 +(dp174 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2' +p175 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.2' +p176 +sssS'metadata' +p177 +(dp178 +S'variantvalidator_hgvs_version' +p179 +S'1.1.3' +p180 +sS'uta_schema' +p181 +S'uta_20180821' +p182 +sS'seqrepo_db' +p183 +S'2018-08-21' +p184 +sS'variantvalidator_version' +p185 +S'v0.2' +p186 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant217.txt b/VariantValidator/testing/testOutputsMasterITS/variant217.txt new file mode 100644 index 00000000..97bc3f0a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant217.txt @@ -0,0 +1,172 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000424.3:c.556-2A>G' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens keratin 5 (KRT5), mRNA +p14 +sS'gene_symbol' +p15 +S'KRT5' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000415.2:p.?' +p20 +sS'slr' +p21 +S'NP_000415.2:p.?' +p22 +ssS'submitted_variant' +p23 +S'12-52912946-T-C' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000012.11(NM_000424.3):c.556-2A>G' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000424.3:c.556-2A>G' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000012.11:g.52912946T>C' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr12' +p40 +sS'ref' +p41 +VT +p42 +sS'pos' +p43 +S'52912946' +p44 +sS'alt' +p45 +VC +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000012.12:g.52519162T>C' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'52519162' +p51 +sg45 +g46 +sssS'grch37' +p52 +(dp53 +g35 +S'NC_000012.11:g.52912946T>C' +p54 +sg37 +(dp55 +g39 +S'12' +p56 +sg41 +g42 +sg43 +S'52912946' +p57 +sg45 +g46 +sssS'grch38' +p58 +(dp59 +g35 +S'NC_000012.12:g.52519162T>C' +p60 +sg37 +(dp61 +g39 +g56 +sg41 +g42 +sg43 +S'52519162' +p62 +sg45 +g46 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000415.2' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000424.3' +p68 +sssS'metadata' +p69 +(dp70 +S'variantvalidator_hgvs_version' +p71 +S'1.1.3' +p72 +sS'uta_schema' +p73 +S'uta_20180821' +p74 +sS'seqrepo_db' +p75 +S'2018-08-21' +p76 +sS'variantvalidator_version' +p77 +S'v0.2' +p78 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant218.txt b/VariantValidator/testing/testOutputsMasterITS/variant218.txt new file mode 100644 index 00000000..9a542b87 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant218.txt @@ -0,0 +1,424 @@ +(dp0 +S'NM_000277.2:c.1200del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000012.11:g.103234292TC>T automapped to NC_000012.11:g.103234294delC' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 1, mRNA +p13 +sS'gene_symbol' +p14 +S'PAH' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_000268.1:p.(Asn401ThrfsTer51)' +p19 +sS'slr' +p20 +S'NP_000268.1:p.(N401Tfs*51)' +p21 +ssS'submitted_variant' +p22 +S'12-103234292-TC-T' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_000277.2:c.1200del' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000012.11:g.103234294del' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr12' +p38 +sS'ref' +p39 +S'TC' +p40 +sS'pos' +p41 +S'103234292' +p42 +sS'alt' +p43 +S'T' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000012.12:g.102840516del' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'TC' +p49 +sg41 +S'102840514' +p50 +sg43 +g44 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000012.11:g.103234294del' +p53 +sg35 +(dp54 +g37 +S'12' +p55 +sg39 +S'TC' +p56 +sg41 +S'103234292' +p57 +sg43 +g44 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000012.12:g.102840516del' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'TC' +p62 +sg41 +S'102840514' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2' +p69 +sssS'NM_001354304.1:c.1200del' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000012.11:g.103234292TC>T automapped to NC_000012.11:g.103234294delC' +p73 +aS'RefSeqGene record not available' +p74 +asg9 +g4 +sg10 +(lp75 +sg12 +VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 2, mRNA +p76 +sg14 +S'PAH' +p77 +sg16 +(dp78 +g18 +S'NP_001341233.1:p.(Asn401ThrfsTer51)' +p79 +sg20 +S'NP_001341233.1:p.(N401Tfs*51)' +p80 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001354304.1:c.1200del' +p81 +sg28 +g4 +sg29 +(dp82 +S'hg19' +p83 +(dp84 +g33 +S'NC_000012.11:g.103234294del' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +S'TC' +p87 +sg41 +S'103234292' +p88 +sg43 +g44 +sssg45 +(dp89 +g33 +S'NC_000012.12:g.102840516del' +p90 +sg35 +(dp91 +g37 +g38 +sg39 +S'TC' +p92 +sg41 +S'102840514' +p93 +sg43 +g44 +sssS'grch37' +p94 +(dp95 +g33 +S'NC_000012.11:g.103234294del' +p96 +sg35 +(dp97 +g37 +g55 +sg39 +S'TC' +p98 +sg41 +S'103234292' +p99 +sg43 +g44 +sssS'grch38' +p100 +(dp101 +g33 +S'NC_000012.12:g.102840516del' +p102 +sg35 +(dp103 +g37 +g55 +sg39 +S'TC' +p104 +sg41 +S'102840514' +p105 +sg43 +g44 +ssssg64 +(dp106 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1' +p107 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1' +p108 +sssS'flag' +p109 +S'gene_variant' +p110 +sS'metadata' +p111 +(dp112 +S'variantvalidator_hgvs_version' +p113 +S'1.1.3' +p114 +sS'uta_schema' +p115 +S'uta_20180821' +p116 +sS'seqrepo_db' +p117 +S'2018-08-21' +p118 +sS'variantvalidator_version' +p119 +S'v0.2' +p120 +ssS'NM_000277.1:c.1200del' +p121 +(dp122 +g3 +g4 +sg5 +(lp123 +S'NC_000012.11:g.103234292TC>T automapped to NC_000012.11:g.103234294delC' +p124 +aS'A more recent version of the selected reference sequence NM_000277.1 is available (NM_000277.2)' +p125 +aS'NM_000277.2:c.1200delG MUST be fully validated prior to use in reports' +p126 +aS'select_variants=NM_000277.2:c.1200del' +p127 +aS'RefSeqGene record not available' +p128 +asg9 +g4 +sg10 +(lp129 +sg12 +VHomo sapiens phenylalanine hydroxylase (PAH), mRNA +p130 +sg14 +S'PAH' +p131 +sg16 +(dp132 +g18 +S'NP_000268.1:p.(Asn401ThrfsTer51)' +p133 +sg20 +S'NP_000268.1:p.(N401Tfs*51)' +p134 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_000277.1:c.1200del' +p135 +sg28 +g4 +sg29 +(dp136 +S'hg19' +p137 +(dp138 +g33 +S'NC_000012.11:g.103234294del' +p139 +sg35 +(dp140 +g37 +g38 +sg39 +S'TC' +p141 +sg41 +S'103234292' +p142 +sg43 +g44 +sssg45 +(dp143 +g33 +S'NC_000012.12:g.102840516del' +p144 +sg35 +(dp145 +g37 +g38 +sg39 +S'TC' +p146 +sg41 +S'102840514' +p147 +sg43 +g44 +sssS'grch37' +p148 +(dp149 +g33 +S'NC_000012.11:g.103234294del' +p150 +sg35 +(dp151 +g37 +g55 +sg39 +S'TC' +p152 +sg41 +S'103234292' +p153 +sg43 +g44 +sssS'grch38' +p154 +(dp155 +g33 +S'NC_000012.12:g.102840516del' +p156 +sg35 +(dp157 +g37 +g55 +sg39 +S'TC' +p158 +sg41 +S'102840514' +p159 +sg43 +g44 +ssssg64 +(dp160 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1' +p161 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.1' +p162 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant219.txt b/VariantValidator/testing/testOutputsMasterITS/variant219.txt new file mode 100644 index 00000000..932fee13 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant219.txt @@ -0,0 +1,408 @@ +(dp0 +S'NM_001354304.1:c.-95-121A>G' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 2, mRNA +p12 +sS'gene_symbol' +p13 +S'PAH' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001341233.1:p.?' +p18 +sS'slr' +p19 +S'NP_001341233.1:p.?' +p20 +ssS'submitted_variant' +p21 +S'12-103311124-T-C' +p22 +sS'genome_context_intronic_sequence' +p23 +S'NC_000012.11(NM_001354304.1):c.-95-121A>G' +p24 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001354304.1:c.-95-121A>G' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000012.11:g.103311124T>C' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr12' +p38 +sS'ref' +p39 +VT +p40 +sS'pos' +p41 +S'103311124' +p42 +sS'alt' +p43 +VC +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000012.12:g.102917346T>C' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'102917346' +p49 +sg43 +g44 +sssS'grch37' +p50 +(dp51 +g33 +S'NC_000012.11:g.103311124T>C' +p52 +sg35 +(dp53 +g37 +S'12' +p54 +sg39 +g40 +sg41 +S'103311124' +p55 +sg43 +g44 +sssS'grch38' +p56 +(dp57 +g33 +S'NC_000012.12:g.102917346T>C' +p58 +sg35 +(dp59 +g37 +g54 +sg39 +g40 +sg41 +S'102917346' +p60 +sg43 +g44 +ssssS'reference_sequence_records' +p61 +(dp62 +S'protein' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1' +p64 +sS'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1' +p66 +sssS'flag' +p67 +S'gene_variant' +p68 +sS'NM_000277.2:c.-216A>G' +p69 +(dp70 +g3 +g4 +sg5 +(lp71 +S'RefSeqGene record not available' +p72 +asg8 +g4 +sg9 +(lp73 +sg11 +VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 1, mRNA +p74 +sg13 +S'PAH' +p75 +sg15 +(dp76 +g17 +S'NP_000268.1:p.?' +p77 +sg19 +S'NP_000268.1:p.?' +p78 +ssg21 +g22 +sg23 +g4 +sg25 +g4 +sg26 +S'NM_000277.2:c.-216A>G' +p79 +sg28 +g4 +sg29 +(dp80 +S'hg19' +p81 +(dp82 +g33 +S'NC_000012.11:g.103311124T>C' +p83 +sg35 +(dp84 +g37 +g38 +sg39 +g40 +sg41 +S'103311124' +p85 +sg43 +g44 +sssg45 +(dp86 +g33 +S'NC_000012.12:g.102917346T>C' +p87 +sg35 +(dp88 +g37 +g38 +sg39 +g40 +sg41 +S'102917346' +p89 +sg43 +g44 +sssS'grch37' +p90 +(dp91 +g33 +S'NC_000012.11:g.103311124T>C' +p92 +sg35 +(dp93 +g37 +g54 +sg39 +g40 +sg41 +S'103311124' +p94 +sg43 +g44 +sssS'grch38' +p95 +(dp96 +g33 +S'NC_000012.12:g.102917346T>C' +p97 +sg35 +(dp98 +g37 +g54 +sg39 +g40 +sg41 +S'102917346' +p99 +sg43 +g44 +ssssg61 +(dp100 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1' +p101 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2' +p102 +sssS'NM_000277.1:c.-215A>G' +p103 +(dp104 +g3 +g4 +sg5 +(lp105 +S'A more recent version of the selected reference sequence NM_000277.1 is available (NM_000277.2)' +p106 +aS'NM_000277.2:c.-215C>G MUST be fully validated prior to use in reports' +p107 +aS'select_variants=NM_000277.2:c.-215C>G' +p108 +aS'RefSeqGene record not available' +p109 +asg8 +g4 +sg9 +(lp110 +sg11 +VHomo sapiens phenylalanine hydroxylase (PAH), mRNA +p111 +sg13 +S'PAH' +p112 +sg15 +(dp113 +g17 +S'NP_000268.1:p.?' +p114 +sg19 +S'NP_000268.1:p.?' +p115 +ssg21 +g22 +sg23 +g4 +sg25 +g4 +sg26 +S'NM_000277.1:c.-215A>G' +p116 +sg28 +g4 +sg29 +(dp117 +S'hg19' +p118 +(dp119 +g33 +S'NC_000012.11:g.103311124T>C' +p120 +sg35 +(dp121 +g37 +g38 +sg39 +g40 +sg41 +S'103311124' +p122 +sg43 +g44 +sssg45 +(dp123 +g33 +S'NC_000012.12:g.102917346T>C' +p124 +sg35 +(dp125 +g37 +g38 +sg39 +g40 +sg41 +S'102917346' +p126 +sg43 +g44 +sssS'grch37' +p127 +(dp128 +g33 +S'NC_000012.11:g.103311124T>C' +p129 +sg35 +(dp130 +g37 +g54 +sg39 +g40 +sg41 +S'103311124' +p131 +sg43 +g44 +sssS'grch38' +p132 +(dp133 +g33 +S'NC_000012.12:g.102917346T>C' +p134 +sg35 +(dp135 +g37 +g54 +sg39 +g40 +sg41 +S'102917346' +p136 +sg43 +g44 +ssssg61 +(dp137 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1' +p138 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.1' +p139 +sssS'metadata' +p140 +(dp141 +S'variantvalidator_hgvs_version' +p142 +S'1.1.3' +p143 +sS'uta_schema' +p144 +S'uta_20180821' +p145 +sS'seqrepo_db' +p146 +S'2018-08-21' +p147 +sS'variantvalidator_version' +p148 +S'v0.2' +p149 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant22.txt b/VariantValidator/testing/testOutputsMasterITS/variant22.txt new file mode 100644 index 00000000..b100322f --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant22.txt @@ -0,0 +1,82 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' +p7 +aS'Instead use NC_000011.9:g.5244828A=' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +g4 +sS'gene_symbol' +p13 +g4 +sS'hgvs_predicted_protein_consequence' +p14 +(dp15 +S'tlr' +p16 +g4 +sS'slr' +p17 +g4 +ssS'submitted_variant' +p18 +S'NM_000518.4:c.*2000C>T' +p19 +sS'genome_context_intronic_sequence' +p20 +g4 +sS'hgvs_lrg_variant' +p21 +g4 +sS'hgvs_transcript_variant' +p22 +g4 +sS'hgvs_refseqgene_variant' +p23 +g4 +sS'primary_assembly_loci' +p24 +(dp25 +sS'reference_sequence_records' +p26 +g4 +ssS'flag' +p27 +S'warning' +p28 +sS'metadata' +p29 +(dp30 +S'variantvalidator_hgvs_version' +p31 +S'1.1.3' +p32 +sS'uta_schema' +p33 +S'uta_20180821' +p34 +sS'seqrepo_db' +p35 +S'2018-08-21' +p36 +sS'variantvalidator_version' +p37 +S'v0.2' +p38 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant220.txt b/VariantValidator/testing/testOutputsMasterITS/variant220.txt new file mode 100644 index 00000000..b7154ffc --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant220.txt @@ -0,0 +1,1185 @@ +(dp0 +S'NM_001319681.1:c.-366-1G>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 7, mRNA +p12 +sS'gene_symbol' +p13 +S'TCTN1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001306610.1:p.?' +p18 +sS'slr' +p19 +S'NP_001306610.1:p.?' +p20 +ssS'submitted_variant' +p21 +S'12-111064166-G-A' +p22 +sS'genome_context_intronic_sequence' +p23 +S'NC_000012.11(NM_001319681.1):c.-366-1G>A' +p24 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001319681.1:c.-366-1G>A' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000012.11:g.111064166G>A' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr12' +p38 +sS'ref' +p39 +S'G' +p40 +sS'pos' +p41 +S'111064166' +p42 +sS'alt' +p43 +S'A' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000012.12:g.110626361G>A' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'110626361' +p49 +sg43 +g44 +sssS'grch37' +p50 +(dp51 +g33 +S'NC_000012.11:g.111064166G>A' +p52 +sg35 +(dp53 +g37 +S'12' +p54 +sg39 +g40 +sg41 +S'111064166' +p55 +sg43 +g44 +sssS'grch38' +p56 +(dp57 +g33 +S'NC_000012.12:g.110626361G>A' +p58 +sg35 +(dp59 +g37 +g54 +sg39 +g40 +sg41 +S'110626361' +p60 +sg43 +g44 +ssssS'reference_sequence_records' +p61 +(dp62 +S'protein' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306610.1' +p64 +sS'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319681.1' +p66 +sssS'NM_001319680.1:c.342-1G>A' +p67 +(dp68 +g3 +g4 +sg5 +(lp69 +S'RefSeqGene record not available' +p70 +asg8 +g4 +sg9 +(lp71 +sg11 +VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 6, mRNA +p72 +sg13 +S'TCTN1' +p73 +sg15 +(dp74 +g17 +S'NP_001306609.1:p.?' +p75 +sg19 +S'NP_001306609.1:p.?' +p76 +ssg21 +g22 +sg23 +S'NC_000012.11(NM_001319680.1):c.342-1G>A' +p77 +sg25 +g4 +sg26 +S'NM_001319680.1:c.342-1G>A' +p78 +sg28 +g4 +sg29 +(dp79 +S'hg19' +p80 +(dp81 +g33 +S'NC_000012.11:g.111064166G>A' +p82 +sg35 +(dp83 +g37 +g38 +sg39 +g40 +sg41 +S'111064166' +p84 +sg43 +g44 +sssg45 +(dp85 +g33 +S'NC_000012.12:g.110626361G>A' +p86 +sg35 +(dp87 +g37 +g38 +sg39 +g40 +sg41 +S'110626361' +p88 +sg43 +g44 +sssS'grch37' +p89 +(dp90 +g33 +S'NC_000012.11:g.111064166G>A' +p91 +sg35 +(dp92 +g37 +g54 +sg39 +g40 +sg41 +S'111064166' +p93 +sg43 +g44 +sssS'grch38' +p94 +(dp95 +g33 +S'NC_000012.12:g.110626361G>A' +p96 +sg35 +(dp97 +g37 +g54 +sg39 +g40 +sg41 +S'110626361' +p98 +sg43 +g44 +ssssg61 +(dp99 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306609.1' +p100 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319680.1' +p101 +sssS'NM_001082538.2:c.342-1G>A' +p102 +(dp103 +g3 +g4 +sg5 +(lp104 +S'RefSeqGene record not available' +p105 +asg8 +g4 +sg9 +(lp106 +sg11 +VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 1, mRNA +p107 +sg13 +S'TCTN1' +p108 +sg15 +(dp109 +g17 +S'NP_001076007.1:p.?' +p110 +sg19 +S'NP_001076007.1:p.?' +p111 +ssg21 +g22 +sg23 +S'NC_000012.11(NM_001082538.2):c.342-1G>A' +p112 +sg25 +g4 +sg26 +S'NM_001082538.2:c.342-1G>A' +p113 +sg28 +g4 +sg29 +(dp114 +S'hg19' +p115 +(dp116 +g33 +S'NC_000012.11:g.111064166G>A' +p117 +sg35 +(dp118 +g37 +g38 +sg39 +g40 +sg41 +S'111064166' +p119 +sg43 +g44 +sssg45 +(dp120 +g33 +S'NC_000012.12:g.110626361G>A' +p121 +sg35 +(dp122 +g37 +g38 +sg39 +g40 +sg41 +S'110626361' +p123 +sg43 +g44 +sssS'grch37' +p124 +(dp125 +g33 +S'NC_000012.11:g.111064166G>A' +p126 +sg35 +(dp127 +g37 +g54 +sg39 +g40 +sg41 +S'111064166' +p128 +sg43 +g44 +sssS'grch38' +p129 +(dp130 +g33 +S'NC_000012.12:g.110626361G>A' +p131 +sg35 +(dp132 +g37 +g54 +sg39 +g40 +sg41 +S'110626361' +p133 +sg43 +g44 +ssssg61 +(dp134 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076007.1' +p135 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082538.2' +p136 +sssS'metadata' +p137 +(dp138 +S'variantvalidator_hgvs_version' +p139 +S'1.1.3' +p140 +sS'uta_schema' +p141 +S'uta_20180821' +p142 +sS'seqrepo_db' +p143 +S'2018-08-21' +p144 +sS'variantvalidator_version' +p145 +S'v0.2' +p146 +ssS'NM_001173976.1:c.162-1G>A' +p147 +(dp148 +g3 +g4 +sg5 +(lp149 +S'RefSeqGene record not available' +p150 +asg8 +g4 +sg9 +(lp151 +sg11 +VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 5, mRNA +p152 +sg13 +S'TCTN1' +p153 +sg15 +(dp154 +g17 +S'NP_001167447.1:p.?' +p155 +sg19 +S'NP_001167447.1:p.?' +p156 +ssg21 +g22 +sg23 +S'NC_000012.11(NM_001173976.1):c.162-1G>A' +p157 +sg25 +g4 +sg26 +S'NM_001173976.1:c.162-1G>A' +p158 +sg28 +g4 +sg29 +(dp159 +S'hg19' +p160 +(dp161 +g33 +S'NC_000012.11:g.111064166G>A' +p162 +sg35 +(dp163 +g37 +g38 +sg39 +g40 +sg41 +S'111064166' +p164 +sg43 +g44 +sssg45 +(dp165 +g33 +S'NC_000012.12:g.110626361G>A' +p166 +sg35 +(dp167 +g37 +g38 +sg39 +g40 +sg41 +S'110626361' +p168 +sg43 +g44 +sssS'grch37' +p169 +(dp170 +g33 +S'NC_000012.11:g.111064166G>A' +p171 +sg35 +(dp172 +g37 +g54 +sg39 +g40 +sg41 +S'111064166' +p173 +sg43 +g44 +sssS'grch38' +p174 +(dp175 +g33 +S'NC_000012.12:g.110626361G>A' +p176 +sg35 +(dp177 +g37 +g54 +sg39 +g40 +sg41 +S'110626361' +p178 +sg43 +g44 +ssssg61 +(dp179 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167447.1' +p180 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173976.1' +p181 +sssS'flag' +p182 +S'gene_variant' +p183 +sS'NM_001082537.2:c.342-1G>A' +p184 +(dp185 +g3 +g4 +sg5 +(lp186 +S'RefSeqGene record not available' +p187 +asg8 +g4 +sg9 +(lp188 +sg11 +VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 2, mRNA +p189 +sg13 +S'TCTN1' +p190 +sg15 +(dp191 +g17 +S'NP_001076006.1:p.?' +p192 +sg19 +S'NP_001076006.1:p.?' +p193 +ssg21 +g22 +sg23 +S'NC_000012.11(NM_001082537.2):c.342-1G>A' +p194 +sg25 +g4 +sg26 +S'NM_001082537.2:c.342-1G>A' +p195 +sg28 +g4 +sg29 +(dp196 +S'hg19' +p197 +(dp198 +g33 +S'NC_000012.11:g.111064166G>A' +p199 +sg35 +(dp200 +g37 +g38 +sg39 +g40 +sg41 +S'111064166' +p201 +sg43 +g44 +sssg45 +(dp202 +g33 +S'NC_000012.12:g.110626361G>A' +p203 +sg35 +(dp204 +g37 +g38 +sg39 +g40 +sg41 +S'110626361' +p205 +sg43 +g44 +sssS'grch37' +p206 +(dp207 +g33 +S'NC_000012.11:g.111064166G>A' +p208 +sg35 +(dp209 +g37 +g54 +sg39 +g40 +sg41 +S'111064166' +p210 +sg43 +g44 +sssS'grch38' +p211 +(dp212 +g33 +S'NC_000012.12:g.110626361G>A' +p213 +sg35 +(dp214 +g37 +g54 +sg39 +g40 +sg41 +S'110626361' +p215 +sg43 +g44 +ssssg61 +(dp216 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076006.1' +p217 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082537.2' +p218 +sssS'NR_135088.1:n.559-1G>A' +p219 +(dp220 +g3 +g4 +sg5 +(lp221 +S'RefSeqGene record not available' +p222 +asg8 +g4 +sg9 +(lp223 +sg11 +VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 9, non-coding RNA +p224 +sg13 +S'TCTN1' +p225 +sg15 +(dp226 +g17 +S'Non-coding :n.' +p227 +sg19 +g227 +ssg21 +g22 +sg23 +S'NC_000012.11(NR_135088.1):c.559-1G>A' +p228 +sg25 +g4 +sg26 +S'NR_135088.1:n.559-1G>A' +p229 +sg28 +g4 +sg29 +(dp230 +S'hg19' +p231 +(dp232 +g33 +S'NC_000012.11:g.111064166G>A' +p233 +sg35 +(dp234 +g37 +g38 +sg39 +g40 +sg41 +S'111064166' +p235 +sg43 +g44 +sssg45 +(dp236 +g33 +S'NC_000012.12:g.110626361G>A' +p237 +sg35 +(dp238 +g37 +g38 +sg39 +g40 +sg41 +S'110626361' +p239 +sg43 +g44 +sssS'grch37' +p240 +(dp241 +g33 +S'NC_000012.11:g.111064166G>A' +p242 +sg35 +(dp243 +g37 +g54 +sg39 +g40 +sg41 +S'111064166' +p244 +sg43 +g44 +sssS'grch38' +p245 +(dp246 +g33 +S'NC_000012.12:g.110626361G>A' +p247 +sg35 +(dp248 +g37 +g54 +sg39 +g40 +sg41 +S'110626361' +p249 +sg43 +g44 +ssssg61 +(dp250 +g65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_135088.1' +p251 +sssS'NM_024549.5:c.342-1G>A' +p252 +(dp253 +g3 +g4 +sg5 +(lp254 +S'RefSeqGene record not available' +p255 +asg8 +g4 +sg9 +(lp256 +sg11 +VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 3, mRNA +p257 +sg13 +S'TCTN1' +p258 +sg15 +(dp259 +g17 +S'NP_078825.2:p.?' +p260 +sg19 +S'NP_078825.2:p.?' +p261 +ssg21 +g22 +sg23 +S'NC_000012.11(NM_024549.5):c.342-1G>A' +p262 +sg25 +g4 +sg26 +S'NM_024549.5:c.342-1G>A' +p263 +sg28 +g4 +sg29 +(dp264 +S'hg19' +p265 +(dp266 +g33 +S'NC_000012.11:g.111064166G>A' +p267 +sg35 +(dp268 +g37 +g38 +sg39 +g40 +sg41 +S'111064166' +p269 +sg43 +g44 +sssg45 +(dp270 +g33 +S'NC_000012.12:g.110626361G>A' +p271 +sg35 +(dp272 +g37 +g38 +sg39 +g40 +sg41 +S'110626361' +p273 +sg43 +g44 +sssS'grch37' +p274 +(dp275 +g33 +S'NC_000012.11:g.111064166G>A' +p276 +sg35 +(dp277 +g37 +g54 +sg39 +g40 +sg41 +S'111064166' +p278 +sg43 +g44 +sssS'grch38' +p279 +(dp280 +g33 +S'NC_000012.12:g.110626361G>A' +p281 +sg35 +(dp282 +g37 +g54 +sg39 +g40 +sg41 +S'110626361' +p283 +sg43 +g44 +ssssg61 +(dp284 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_078825.2' +p285 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024549.5' +p286 +sssS'NM_001319682.1:c.174-1G>A' +p287 +(dp288 +g3 +g4 +sg5 +(lp289 +S'RefSeqGene record not available' +p290 +asg8 +g4 +sg9 +(lp291 +sg11 +VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 8, mRNA +p292 +sg13 +S'TCTN1' +p293 +sg15 +(dp294 +g17 +S'NP_001306611.1:p.?' +p295 +sg19 +S'NP_001306611.1:p.?' +p296 +ssg21 +g22 +sg23 +S'NC_000012.11(NM_001319682.1):c.174-1G>A' +p297 +sg25 +g4 +sg26 +S'NM_001319682.1:c.174-1G>A' +p298 +sg28 +g4 +sg29 +(dp299 +S'hg19' +p300 +(dp301 +g33 +S'NC_000012.11:g.111064166G>A' +p302 +sg35 +(dp303 +g37 +g38 +sg39 +g40 +sg41 +S'111064166' +p304 +sg43 +g44 +sssg45 +(dp305 +g33 +S'NC_000012.12:g.110626361G>A' +p306 +sg35 +(dp307 +g37 +g38 +sg39 +g40 +sg41 +S'110626361' +p308 +sg43 +g44 +sssS'grch37' +p309 +(dp310 +g33 +S'NC_000012.11:g.111064166G>A' +p311 +sg35 +(dp312 +g37 +g54 +sg39 +g40 +sg41 +S'111064166' +p313 +sg43 +g44 +sssS'grch38' +p314 +(dp315 +g33 +S'NC_000012.12:g.110626361G>A' +p316 +sg35 +(dp317 +g37 +g54 +sg39 +g40 +sg41 +S'110626361' +p318 +sg43 +g44 +ssssg61 +(dp319 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306611.1' +p320 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319682.1' +p321 +sssS'NM_001173975.1:c.174-1G>A' +p322 +(dp323 +g3 +g4 +sg5 +(lp324 +S'A more recent version of the selected reference sequence NM_001173975.1 is available (NM_001173975.2)' +p325 +aS'NM_001173975.2:c.174-1G>A MUST be fully validated prior to use in reports' +p326 +aS'select_variants=NM_001173975.2:c.174-1G>A' +p327 +aS'RefSeqGene record not available' +p328 +asg8 +g4 +sg9 +(lp329 +sg11 +VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 4, mRNA +p330 +sg13 +S'TCTN1' +p331 +sg15 +(dp332 +g17 +S'NP_001167446.1:p.?' +p333 +sg19 +S'NP_001167446.1:p.?' +p334 +ssg21 +g22 +sg23 +S'NC_000012.11(NM_001173975.1):c.174-1G>A' +p335 +sg25 +g4 +sg26 +S'NM_001173975.1:c.174-1G>A' +p336 +sg28 +g4 +sg29 +(dp337 +S'hg19' +p338 +(dp339 +g33 +S'NC_000012.11:g.111064166G>A' +p340 +sg35 +(dp341 +g37 +g38 +sg39 +g40 +sg41 +S'111064166' +p342 +sg43 +g44 +sssS'grch37' +p343 +(dp344 +g33 +S'NC_000012.11:g.111064166G>A' +p345 +sg35 +(dp346 +g37 +g54 +sg39 +g40 +sg41 +S'111064166' +p347 +sg43 +g44 +ssssg61 +(dp348 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1' +p349 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.1' +p350 +sssS'NM_001173975.2:c.174-1G>A' +p351 +(dp352 +g3 +g4 +sg5 +(lp353 +S'RefSeqGene record not available' +p354 +asg8 +g4 +sg9 +(lp355 +sg11 +VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 4, mRNA +p356 +sg13 +S'TCTN1' +p357 +sg15 +(dp358 +g17 +S'NP_001167446.1:p.?' +p359 +sg19 +S'NP_001167446.1:p.?' +p360 +ssg21 +g22 +sg23 +S'NC_000012.11(NM_001173975.2):c.174-1G>A' +p361 +sg25 +g4 +sg26 +S'NM_001173975.2:c.174-1G>A' +p362 +sg28 +g4 +sg29 +(dp363 +S'hg19' +p364 +(dp365 +g33 +S'NC_000012.11:g.111064166G>A' +p366 +sg35 +(dp367 +g37 +g38 +sg39 +g40 +sg41 +S'111064166' +p368 +sg43 +g44 +sssg45 +(dp369 +g33 +S'NC_000012.12:g.110626361G>A' +p370 +sg35 +(dp371 +g37 +g38 +sg39 +g40 +sg41 +S'110626361' +p372 +sg43 +g44 +sssS'grch37' +p373 +(dp374 +g33 +S'NC_000012.11:g.111064166G>A' +p375 +sg35 +(dp376 +g37 +g54 +sg39 +g40 +sg41 +S'111064166' +p377 +sg43 +g44 +sssS'grch38' +p378 +(dp379 +g33 +S'NC_000012.12:g.110626361G>A' +p380 +sg35 +(dp381 +g37 +g54 +sg39 +g40 +sg41 +S'110626361' +p382 +sg43 +g44 +ssssg61 +(dp383 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1' +p384 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.2' +p385 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant221.txt b/VariantValidator/testing/testOutputsMasterITS/variant221.txt new file mode 100644 index 00000000..c712e68d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant221.txt @@ -0,0 +1,418 @@ +(dp0 +S'NM_001194995.1:c.210del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000012.11:g.123738430CA>C automapped to NC_000012.11:g.123738431delA' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 3, mRNA +p13 +sS'gene_symbol' +p14 +S'C12orf65' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001181924.1:p.(Gly72AlafsTer13)' +p19 +sS'slr' +p20 +S'NP_001181924.1:p.(G72Afs*13)' +p21 +ssS'submitted_variant' +p22 +S'12-123738430-CA-C' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001194995.1:c.210del' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000012.11:g.123738431del' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr12' +p38 +sS'ref' +p39 +S'CA' +p40 +sS'pos' +p41 +S'123738430' +p42 +sS'alt' +p43 +S'C' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000012.12:g.123253884del' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'CA' +p49 +sg41 +S'123253883' +p50 +sg43 +g44 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000012.11:g.123738431del' +p53 +sg35 +(dp54 +g37 +S'12' +p55 +sg39 +S'CA' +p56 +sg41 +S'123738430' +p57 +sg43 +g44 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000012.12:g.123253884del' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'CA' +p62 +sg41 +S'123253883' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181924.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194995.1' +p69 +sssS'flag' +p70 +S'gene_variant' +p71 +sS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ssS'NM_152269.4:c.210del' +p82 +(dp83 +g3 +g4 +sg5 +(lp84 +S'NC_000012.11:g.123738430CA>C automapped to NC_000012.11:g.123738431delA' +p85 +aS'RefSeqGene record not available' +p86 +asg9 +g4 +sg10 +(lp87 +sg12 +VHomo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 1, mRNA +p88 +sg14 +S'C12orf65' +p89 +sg16 +(dp90 +g18 +S'NP_689482.1:p.(Gly72AlafsTer13)' +p91 +sg20 +S'NP_689482.1:p.(G72Afs*13)' +p92 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_152269.4:c.210del' +p93 +sg28 +g4 +sg29 +(dp94 +S'hg19' +p95 +(dp96 +g33 +S'NC_000012.11:g.123738431del' +p97 +sg35 +(dp98 +g37 +g38 +sg39 +S'CA' +p99 +sg41 +S'123738430' +p100 +sg43 +g44 +sssg45 +(dp101 +g33 +S'NC_000012.12:g.123253884del' +p102 +sg35 +(dp103 +g37 +g38 +sg39 +S'CA' +p104 +sg41 +S'123253883' +p105 +sg43 +g44 +sssS'grch37' +p106 +(dp107 +g33 +S'NC_000012.11:g.123738431del' +p108 +sg35 +(dp109 +g37 +g55 +sg39 +S'CA' +p110 +sg41 +S'123738430' +p111 +sg43 +g44 +sssS'grch38' +p112 +(dp113 +g33 +S'NC_000012.12:g.123253884del' +p114 +sg35 +(dp115 +g37 +g55 +sg39 +S'CA' +p116 +sg41 +S'123253883' +p117 +sg43 +g44 +ssssg64 +(dp118 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_689482.1' +p119 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_152269.4' +p120 +sssS'NM_001143905.2:c.210del' +p121 +(dp122 +g3 +g4 +sg5 +(lp123 +S'NC_000012.11:g.123738430CA>C automapped to NC_000012.11:g.123738431delA' +p124 +aS'RefSeqGene record not available' +p125 +asg9 +g4 +sg10 +(lp126 +sg12 +VHomo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 2, mRNA +p127 +sg14 +S'C12orf65' +p128 +sg16 +(dp129 +g18 +S'NP_001137377.1:p.(Gly72AlafsTer13)' +p130 +sg20 +S'NP_001137377.1:p.(G72Afs*13)' +p131 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001143905.2:c.210del' +p132 +sg28 +g4 +sg29 +(dp133 +S'hg19' +p134 +(dp135 +g33 +S'NC_000012.11:g.123738431del' +p136 +sg35 +(dp137 +g37 +g38 +sg39 +S'CA' +p138 +sg41 +S'123738430' +p139 +sg43 +g44 +sssg45 +(dp140 +g33 +S'NC_000012.12:g.123253884del' +p141 +sg35 +(dp142 +g37 +g38 +sg39 +S'CA' +p143 +sg41 +S'123253883' +p144 +sg43 +g44 +sssS'grch37' +p145 +(dp146 +g33 +S'NC_000012.11:g.123738431del' +p147 +sg35 +(dp148 +g37 +g55 +sg39 +S'CA' +p149 +sg41 +S'123738430' +p150 +sg43 +g44 +sssS'grch38' +p151 +(dp152 +g33 +S'NC_000012.12:g.123253884del' +p153 +sg35 +(dp154 +g37 +g55 +sg39 +S'CA' +p155 +sg41 +S'123253883' +p156 +sg43 +g44 +ssssg64 +(dp157 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001137377.1' +p158 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001143905.2' +p159 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant222.txt b/VariantValidator/testing/testOutputsMasterITS/variant222.txt new file mode 100644 index 00000000..f7b7223b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant222.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_194318.3:c.71-5del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000013.10:g.31789169CT>C automapped to NC_000013.10:g.31789183delT' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens beta 3-glucosyltransferase (B3GLCT), mRNA +p15 +sS'gene_symbol' +p16 +S'B3GLCT' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_919299.3:p.?' +p21 +sS'slr' +p22 +S'NP_919299.3:p.?' +p23 +ssS'submitted_variant' +p24 +S'13-31789169-CT-C' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000013.10(NM_194318.3):c.71-5del' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_194318.3:c.71-5del' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000013.10:g.31789183del' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr13' +p41 +sS'ref' +p42 +S'CT' +p43 +sS'pos' +p44 +S'31789169' +p45 +sS'alt' +p46 +S'C' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000013.11:g.31215046del' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'CT' +p52 +sg44 +S'31215032' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000013.10:g.31789183del' +p56 +sg38 +(dp57 +g40 +S'13' +p58 +sg42 +S'CT' +p59 +sg44 +S'31789169' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000013.11:g.31215046del' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'CT' +p65 +sg44 +S'31215032' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_919299.3' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_194318.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant223.txt b/VariantValidator/testing/testOutputsMasterITS/variant223.txt new file mode 100644 index 00000000..451a92b5 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant223.txt @@ -0,0 +1,515 @@ +(dp0 +S'NR_144368.1:n.214-3552C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens uncharacterized LOC105370526 (LOC105370526), long non-coding RNA +p12 +sS'gene_symbol' +p13 +S'LOC105370526' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'Non-coding :n.' +p18 +sS'slr' +p19 +g18 +ssS'submitted_variant' +p20 +S'14-62187287-G-A' +p21 +sS'genome_context_intronic_sequence' +p22 +S'NC_000014.8(NR_144368.1):c.214-3552C>T' +p23 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NR_144368.1:n.214-3552C>T' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000014.8:g.62187287G>A' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr14' +p37 +sS'ref' +p38 +VG +p39 +sS'pos' +p40 +S'62187287' +p41 +sS'alt' +p42 +VA +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000014.9:g.61720569G>A' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'61720569' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000014.8:g.62187287G>A' +p51 +sg34 +(dp52 +g36 +S'14' +p53 +sg38 +g39 +sg40 +S'62187287' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000014.9:g.61720569G>A' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'61720569' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'transcript' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_144368.1' +p63 +sssS'NM_181054.2:c.223G>A' +p64 +(dp65 +g3 +g4 +sg5 +(lp66 +S'RefSeqGene record not available' +p67 +asg8 +g4 +sg9 +(lp68 +sg11 +VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 2, mRNA +p69 +sg13 +S'HIF1A' +p70 +sg15 +(dp71 +g17 +S'NP_851397.1:p.(Ala75Thr)' +p72 +sg19 +S'NP_851397.1:p.(A75T)' +p73 +ssg20 +g21 +sg22 +g4 +sg24 +g4 +sg25 +S'NM_181054.2:c.223G>A' +p74 +sg27 +g4 +sg28 +(dp75 +S'hg19' +p76 +(dp77 +g32 +S'NC_000014.8:g.62187287G>A' +p78 +sg34 +(dp79 +g36 +g37 +sg38 +S'G' +p80 +sg40 +S'62187287' +p81 +sg42 +S'A' +p82 +sssg44 +(dp83 +g32 +S'NC_000014.9:g.61720569G>A' +p84 +sg34 +(dp85 +g36 +g37 +sg38 +g80 +sg40 +S'61720569' +p86 +sg42 +g82 +sssS'grch37' +p87 +(dp88 +g32 +S'NC_000014.8:g.62187287G>A' +p89 +sg34 +(dp90 +g36 +g53 +sg38 +g80 +sg40 +S'62187287' +p91 +sg42 +g82 +sssS'grch38' +p92 +(dp93 +g32 +S'NC_000014.9:g.61720569G>A' +p94 +sg34 +(dp95 +g36 +g53 +sg38 +g80 +sg40 +S'61720569' +p96 +sg42 +g82 +ssssg60 +(dp97 +S'protein' +p98 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_851397.1' +p99 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181054.2' +p100 +sssS'flag' +p101 +S'gene_variant' +p102 +sS'NM_001243084.1:c.295G>A' +p103 +(dp104 +g3 +g4 +sg5 +(lp105 +S'RefSeqGene record not available' +p106 +asg8 +g4 +sg9 +(lp107 +sg11 +VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 3, mRNA +p108 +sg13 +S'HIF1A' +p109 +sg15 +(dp110 +g17 +S'NP_001230013.1:p.(Ala99Thr)' +p111 +sg19 +S'NP_001230013.1:p.(A99T)' +p112 +ssg20 +g21 +sg22 +g4 +sg24 +g4 +sg25 +S'NM_001243084.1:c.295G>A' +p113 +sg27 +g4 +sg28 +(dp114 +S'hg19' +p115 +(dp116 +g32 +S'NC_000014.8:g.62187287G>A' +p117 +sg34 +(dp118 +g36 +g37 +sg38 +g80 +sg40 +S'62187287' +p119 +sg42 +g82 +sssg44 +(dp120 +g32 +S'NC_000014.9:g.61720569G>A' +p121 +sg34 +(dp122 +g36 +g37 +sg38 +g80 +sg40 +S'61720569' +p123 +sg42 +g82 +sssS'grch37' +p124 +(dp125 +g32 +S'NC_000014.8:g.62187287G>A' +p126 +sg34 +(dp127 +g36 +g53 +sg38 +g80 +sg40 +S'62187287' +p128 +sg42 +g82 +sssS'grch38' +p129 +(dp130 +g32 +S'NC_000014.9:g.61720569G>A' +p131 +sg34 +(dp132 +g36 +g53 +sg38 +g80 +sg40 +S'61720569' +p133 +sg42 +g82 +ssssg60 +(dp134 +g98 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1' +p135 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1' +p136 +sssS'NM_001530.3:c.223G>A' +p137 +(dp138 +g3 +g4 +sg5 +(lp139 +S'RefSeqGene record not available' +p140 +asg8 +g4 +sg9 +(lp141 +sg11 +VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 1, mRNA +p142 +sg13 +S'HIF1A' +p143 +sg15 +(dp144 +g17 +S'NP_001521.1:p.(Ala75Thr)' +p145 +sg19 +S'NP_001521.1:p.(A75T)' +p146 +ssg20 +g21 +sg22 +g4 +sg24 +g4 +sg25 +S'NM_001530.3:c.223G>A' +p147 +sg27 +g4 +sg28 +(dp148 +S'hg19' +p149 +(dp150 +g32 +S'NC_000014.8:g.62187287G>A' +p151 +sg34 +(dp152 +g36 +g37 +sg38 +g80 +sg40 +S'62187287' +p153 +sg42 +g82 +sssg44 +(dp154 +g32 +S'NC_000014.9:g.61720569G>A' +p155 +sg34 +(dp156 +g36 +g37 +sg38 +g80 +sg40 +S'61720569' +p157 +sg42 +g82 +sssS'grch37' +p158 +(dp159 +g32 +S'NC_000014.8:g.62187287G>A' +p160 +sg34 +(dp161 +g36 +g53 +sg38 +g80 +sg40 +S'62187287' +p162 +sg42 +g82 +sssS'grch38' +p163 +(dp164 +g32 +S'NC_000014.9:g.61720569G>A' +p165 +sg34 +(dp166 +g36 +g53 +sg38 +g80 +sg40 +S'61720569' +p167 +sg42 +g82 +ssssg60 +(dp168 +g98 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001521.1' +p169 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001530.3' +p170 +sssS'metadata' +p171 +(dp172 +S'variantvalidator_hgvs_version' +p173 +S'1.1.3' +p174 +sS'uta_schema' +p175 +S'uta_20180821' +p176 +sS'seqrepo_db' +p177 +S'2018-08-21' +p178 +sS'variantvalidator_version' +p179 +S'v0.2' +p180 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant224.txt b/VariantValidator/testing/testOutputsMasterITS/variant224.txt new file mode 100644 index 00000000..90c4acf7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant224.txt @@ -0,0 +1,540 @@ +(dp0 +S'NR_144368.1:n.214-4497_214-4496delinsTC' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens uncharacterized LOC105370526 (LOC105370526), long non-coding RNA +p13 +sS'gene_symbol' +p14 +S'LOC105370526' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'Non-coding :n.' +p19 +sS'slr' +p20 +g19 +ssS'submitted_variant' +p21 +S'14-62188231-TT-GA' +p22 +sS'genome_context_intronic_sequence' +p23 +S'NC_000014.8(NR_144368.1):c.214-4497_214-4496delinsTC' +p24 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NR_144368.1:n.214-4497_214-4496delinsTC' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000014.8:g.62188231_62188232delinsGA' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr14' +p38 +sS'ref' +p39 +S'TT' +p40 +sS'pos' +p41 +S'62188231' +p42 +sS'alt' +p43 +VGA +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000014.9:g.61721513_61721514delinsGA' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'TT' +p49 +sg41 +S'61721513' +p50 +sg43 +VGA +p51 +sssS'grch37' +p52 +(dp53 +g33 +S'NC_000014.8:g.62188231_62188232delinsGA' +p54 +sg35 +(dp55 +g37 +S'14' +p56 +sg39 +S'TT' +p57 +sg41 +S'62188231' +p58 +sg43 +g44 +sssS'grch38' +p59 +(dp60 +g33 +S'NC_000014.9:g.61721513_61721514delinsGA' +p61 +sg35 +(dp62 +g37 +g56 +sg39 +S'TT' +p63 +sg41 +S'61721513' +p64 +sg43 +g51 +ssssS'reference_sequence_records' +p65 +(dp66 +S'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_144368.1' +p68 +sssS'NM_001530.3:c.231_232delinsGA' +p69 +(dp70 +g3 +g4 +sg5 +(lp71 +S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' +p72 +aS'RefSeqGene record not available' +p73 +asg9 +g4 +sg10 +(lp74 +sg12 +VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 1, mRNA +p75 +sg14 +S'HIF1A' +p76 +sg16 +(dp77 +g18 +S'NP_001521.1:p.(Asp77_Leu78delinsGluMet)' +p78 +sg20 +S'NP_001521.1:p.(D77_L78delinsEM)' +p79 +ssg21 +g22 +sg23 +g4 +sg25 +g4 +sg26 +S'NM_001530.3:c.231_232delinsGA' +p80 +sg28 +g4 +sg29 +(dp81 +S'hg19' +p82 +(dp83 +g33 +S'NC_000014.8:g.62188231_62188232delinsGA' +p84 +sg35 +(dp85 +g37 +g38 +sg39 +S'TT' +p86 +sg41 +S'62188231' +p87 +sg43 +S'GA' +p88 +sssg45 +(dp89 +g33 +S'NC_000014.9:g.61721513_61721514delinsGA' +p90 +sg35 +(dp91 +g37 +g38 +sg39 +S'TT' +p92 +sg41 +S'61721513' +p93 +sg43 +g88 +sssS'grch37' +p94 +(dp95 +g33 +S'NC_000014.8:g.62188231_62188232delinsGA' +p96 +sg35 +(dp97 +g37 +g56 +sg39 +S'TT' +p98 +sg41 +S'62188231' +p99 +sg43 +g88 +sssS'grch38' +p100 +(dp101 +g33 +S'NC_000014.9:g.61721513_61721514delinsGA' +p102 +sg35 +(dp103 +g37 +g56 +sg39 +S'TT' +p104 +sg41 +S'61721513' +p105 +sg43 +g88 +ssssg65 +(dp106 +S'protein' +p107 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001521.1' +p108 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001530.3' +p109 +sssS'flag' +p110 +S'gene_variant' +p111 +sS'NM_001243084.1:c.303_304delinsGA' +p112 +(dp113 +g3 +g4 +sg5 +(lp114 +S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' +p115 +aS'RefSeqGene record not available' +p116 +asg9 +g4 +sg10 +(lp117 +sg12 +VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 3, mRNA +p118 +sg14 +S'HIF1A' +p119 +sg16 +(dp120 +g18 +S'NP_001230013.1:p.(Asp101_Leu102delinsGluMet)' +p121 +sg20 +S'NP_001230013.1:p.(D101_L102delinsEM)' +p122 +ssg21 +g22 +sg23 +g4 +sg25 +g4 +sg26 +S'NM_001243084.1:c.303_304delinsGA' +p123 +sg28 +g4 +sg29 +(dp124 +S'hg19' +p125 +(dp126 +g33 +S'NC_000014.8:g.62188231_62188232delinsGA' +p127 +sg35 +(dp128 +g37 +g38 +sg39 +S'TT' +p129 +sg41 +S'62188231' +p130 +sg43 +S'GA' +p131 +sssg45 +(dp132 +g33 +S'NC_000014.9:g.61721513_61721514delinsGA' +p133 +sg35 +(dp134 +g37 +g38 +sg39 +S'TT' +p135 +sg41 +S'61721513' +p136 +sg43 +g131 +sssS'grch37' +p137 +(dp138 +g33 +S'NC_000014.8:g.62188231_62188232delinsGA' +p139 +sg35 +(dp140 +g37 +g56 +sg39 +S'TT' +p141 +sg41 +S'62188231' +p142 +sg43 +g131 +sssS'grch38' +p143 +(dp144 +g33 +S'NC_000014.9:g.61721513_61721514delinsGA' +p145 +sg35 +(dp146 +g37 +g56 +sg39 +S'TT' +p147 +sg41 +S'61721513' +p148 +sg43 +g131 +ssssg65 +(dp149 +g107 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1' +p150 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1' +p151 +sssS'NM_181054.2:c.231_232delinsGA' +p152 +(dp153 +g3 +g4 +sg5 +(lp154 +S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' +p155 +aS'RefSeqGene record not available' +p156 +asg9 +g4 +sg10 +(lp157 +sg12 +VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 2, mRNA +p158 +sg14 +S'HIF1A' +p159 +sg16 +(dp160 +g18 +S'NP_851397.1:p.(Asp77_Leu78delinsGluMet)' +p161 +sg20 +S'NP_851397.1:p.(D77_L78delinsEM)' +p162 +ssg21 +g22 +sg23 +g4 +sg25 +g4 +sg26 +S'NM_181054.2:c.231_232delinsGA' +p163 +sg28 +g4 +sg29 +(dp164 +S'hg19' +p165 +(dp166 +g33 +S'NC_000014.8:g.62188231_62188232delinsGA' +p167 +sg35 +(dp168 +g37 +g38 +sg39 +S'TT' +p169 +sg41 +S'62188231' +p170 +sg43 +S'GA' +p171 +sssg45 +(dp172 +g33 +S'NC_000014.9:g.61721513_61721514delinsGA' +p173 +sg35 +(dp174 +g37 +g38 +sg39 +S'TT' +p175 +sg41 +S'61721513' +p176 +sg43 +g171 +sssS'grch37' +p177 +(dp178 +g33 +S'NC_000014.8:g.62188231_62188232delinsGA' +p179 +sg35 +(dp180 +g37 +g56 +sg39 +S'TT' +p181 +sg41 +S'62188231' +p182 +sg43 +g171 +sssS'grch38' +p183 +(dp184 +g33 +S'NC_000014.9:g.61721513_61721514delinsGA' +p185 +sg35 +(dp186 +g37 +g56 +sg39 +S'TT' +p187 +sg41 +S'61721513' +p188 +sg43 +g171 +ssssg65 +(dp189 +g107 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_851397.1' +p190 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181054.2' +p191 +sssS'metadata' +p192 +(dp193 +S'variantvalidator_hgvs_version' +p194 +S'1.1.3' +p195 +sS'uta_schema' +p196 +S'uta_20180821' +p197 +sS'seqrepo_db' +p198 +S'2018-08-21' +p199 +sS'variantvalidator_version' +p200 +S'v0.2' +p201 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant225.txt b/VariantValidator/testing/testOutputsMasterITS/variant225.txt new file mode 100644 index 00000000..69eaacd9 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant225.txt @@ -0,0 +1,462 @@ +(dp0 +S'NM_139318.3:c.2366G>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'A more recent version of the selected reference sequence NM_139318.3 is available (NM_139318.4)' +p7 +aS'NM_139318.4:c.2366G>T MUST be fully validated prior to use in reports' +p8 +aS'select_variants=NM_139318.4:c.2366G>T' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g4 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens potassium voltage-gated channel, subfamily H (eag-related), member 5 (KCNH5), transcript variant 1, mRNA +p15 +sS'gene_symbol' +p16 +S'KCNH5' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_647479.2:p.(Gly789Val)' +p21 +sS'slr' +p22 +S'NP_647479.2:p.(G789V)' +p23 +ssS'submitted_variant' +p24 +S'14-63174827-C-A' +p25 +sS'genome_context_intronic_sequence' +p26 +g4 +sS'hgvs_lrg_variant' +p27 +g4 +sS'hgvs_transcript_variant' +p28 +S'NM_139318.3:c.2366G>T' +p29 +sS'hgvs_refseqgene_variant' +p30 +g4 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000014.8:g.63174827C>A' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr14' +p40 +sS'ref' +p41 +VC +p42 +sS'pos' +p43 +S'63174827' +p44 +sS'alt' +p45 +VA +p46 +sssS'grch37' +p47 +(dp48 +g35 +S'NC_000014.8:g.63174827C>A' +p49 +sg37 +(dp50 +g39 +S'14' +p51 +sg41 +g42 +sg43 +S'63174827' +p52 +sg45 +g46 +ssssS'reference_sequence_records' +p53 +(dp54 +S'protein' +p55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2' +p56 +sS'transcript' +p57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.3' +p58 +sssS'NM_172375.1:c.*333G>T' +p59 +(dp60 +g3 +g4 +sg5 +(lp61 +S'A more recent version of the selected reference sequence NM_172375.1 is available (NM_172375.2)' +p62 +aS'NM_172375.2:c.*333G>T MUST be fully validated prior to use in reports' +p63 +aS'select_variants=NM_172375.2:c.*333G>T' +p64 +aS'RefSeqGene record not available' +p65 +asg11 +g4 +sg12 +(lp66 +sg14 +VHomo sapiens potassium voltage-gated channel, subfamily H (eag-related), member 5 (KCNH5), transcript variant 3, mRNA +p67 +sg16 +S'KCNH5' +p68 +sg18 +(dp69 +g20 +S'NP_758963.1:p.?' +p70 +sg22 +S'NP_758963.1:p.?' +p71 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_172375.1:c.*333G>T' +p72 +sg30 +g4 +sg31 +(dp73 +S'hg19' +p74 +(dp75 +g35 +S'NC_000014.8:g.63174827C>A' +p76 +sg37 +(dp77 +g39 +g40 +sg41 +g42 +sg43 +S'63174827' +p78 +sg45 +g46 +sssS'grch37' +p79 +(dp80 +g35 +S'NC_000014.8:g.63174827C>A' +p81 +sg37 +(dp82 +g39 +g51 +sg41 +g42 +sg43 +S'63174827' +p83 +sg45 +g46 +ssssg53 +(dp84 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1' +p85 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.1' +p86 +sssS'NM_172375.2:c.*333G>T' +p87 +(dp88 +g3 +g4 +sg5 +(lp89 +S'RefSeqGene record not available' +p90 +asg11 +g4 +sg12 +(lp91 +sg14 +VHomo sapiens potassium voltage-gated channel subfamily H member 5 (KCNH5), transcript variant 3, mRNA +p92 +sg16 +S'KCNH5' +p93 +sg18 +(dp94 +g20 +S'NP_758963.1:p.?' +p95 +sg22 +S'NP_758963.1:p.?' +p96 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_172375.2:c.*333G>T' +p97 +sg30 +g4 +sg31 +(dp98 +S'hg19' +p99 +(dp100 +g35 +S'NC_000014.8:g.63174827C>A' +p101 +sg37 +(dp102 +g39 +g40 +sg41 +g42 +sg43 +S'63174827' +p103 +sg45 +g46 +sssS'hg38' +p104 +(dp105 +g35 +S'NC_000014.9:g.62708109C>A' +p106 +sg37 +(dp107 +g39 +g40 +sg41 +g42 +sg43 +S'62708109' +p108 +sg45 +g46 +sssS'grch37' +p109 +(dp110 +g35 +S'NC_000014.8:g.63174827C>A' +p111 +sg37 +(dp112 +g39 +g51 +sg41 +g42 +sg43 +S'63174827' +p113 +sg45 +g46 +sssS'grch38' +p114 +(dp115 +g35 +S'NC_000014.9:g.62708109C>A' +p116 +sg37 +(dp117 +g39 +g51 +sg41 +g42 +sg43 +S'62708109' +p118 +sg45 +g46 +ssssg53 +(dp119 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1' +p120 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.2' +p121 +sssS'flag' +p122 +S'gene_variant' +p123 +sS'NM_139318.4:c.2366G>T' +p124 +(dp125 +g3 +g4 +sg5 +(lp126 +S'RefSeqGene record not available' +p127 +asg11 +g4 +sg12 +(lp128 +sg14 +VHomo sapiens potassium voltage-gated channel subfamily H member 5 (KCNH5), transcript variant 1, mRNA +p129 +sg16 +S'KCNH5' +p130 +sg18 +(dp131 +g20 +S'NP_647479.2:p.(Gly789Val)' +p132 +sg22 +S'NP_647479.2:p.(G789V)' +p133 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_139318.4:c.2366G>T' +p134 +sg30 +g4 +sg31 +(dp135 +S'hg19' +p136 +(dp137 +g35 +S'NC_000014.8:g.63174827C>A' +p138 +sg37 +(dp139 +g39 +g40 +sg41 +g42 +sg43 +S'63174827' +p140 +sg45 +g46 +sssg104 +(dp141 +g35 +S'NC_000014.9:g.62708109C>A' +p142 +sg37 +(dp143 +g39 +g40 +sg41 +g42 +sg43 +S'62708109' +p144 +sg45 +g46 +sssS'grch37' +p145 +(dp146 +g35 +S'NC_000014.8:g.63174827C>A' +p147 +sg37 +(dp148 +g39 +g51 +sg41 +g42 +sg43 +S'63174827' +p149 +sg45 +g46 +sssS'grch38' +p150 +(dp151 +g35 +S'NC_000014.9:g.62708109C>A' +p152 +sg37 +(dp153 +g39 +g51 +sg41 +g42 +sg43 +S'62708109' +p154 +sg45 +g46 +ssssg53 +(dp155 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2' +p156 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.4' +p157 +sssS'metadata' +p158 +(dp159 +S'variantvalidator_hgvs_version' +p160 +S'1.1.3' +p161 +sS'uta_schema' +p162 +S'uta_20180821' +p163 +sS'seqrepo_db' +p164 +S'2018-08-21' +p165 +sS'variantvalidator_version' +p166 +S'v0.2' +p167 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant226.txt b/VariantValidator/testing/testOutputsMasterITS/variant226.txt new file mode 100644 index 00000000..1d9712ce --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant226.txt @@ -0,0 +1,418 @@ +(dp0 +S'NM_000070.2:c.550del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000015.9:g.42680000CA>C automapped to NC_000015.9:g.42680002delA' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA +p13 +sS'gene_symbol' +p14 +S'CAPN3' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_000061.1:p.(Thr184ArgfsTer36)' +p19 +sS'slr' +p20 +S'NP_000061.1:p.(T184Rfs*36)' +p21 +ssS'submitted_variant' +p22 +S'15-42680000-CA-C' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_000070.2:c.550del' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'grch38' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000015.10:g.42387804del' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'15' +p38 +sS'ref' +p39 +S'CA' +p40 +sS'pos' +p41 +S'42387802' +p42 +sS'alt' +p43 +S'C' +p44 +sssS'grch37' +p45 +(dp46 +g33 +S'NC_000015.9:g.42680002del' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'CA' +p49 +sg41 +S'42680000' +p50 +sg43 +g44 +sssS'hg38' +p51 +(dp52 +g33 +S'NC_000015.10:g.42387804del' +p53 +sg35 +(dp54 +g37 +S'chr15' +p55 +sg39 +S'CA' +p56 +sg41 +S'42387802' +p57 +sg43 +g44 +sssS'hg19' +p58 +(dp59 +g33 +S'NC_000015.9:g.42680002del' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'CA' +p62 +sg41 +S'42680000' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2' +p69 +sssS'flag' +p70 +S'gene_variant' +p71 +sS'NM_024344.1:c.550del' +p72 +(dp73 +g3 +g4 +sg5 +(lp74 +S'NC_000015.9:g.42680000CA>C automapped to NC_000015.9:g.42680002delA' +p75 +aS'RefSeqGene record not available' +p76 +asg9 +g4 +sg10 +(lp77 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA +p78 +sg14 +S'CAPN3' +p79 +sg16 +(dp80 +g18 +S'NP_077320.1:p.(Thr184ArgfsTer36)' +p81 +sg20 +S'NP_077320.1:p.(T184Rfs*36)' +p82 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_024344.1:c.550del' +p83 +sg28 +g4 +sg29 +(dp84 +S'grch38' +p85 +(dp86 +g33 +S'NC_000015.10:g.42387804del' +p87 +sg35 +(dp88 +g37 +g38 +sg39 +S'CA' +p89 +sg41 +S'42387802' +p90 +sg43 +g44 +sssS'grch37' +p91 +(dp92 +g33 +S'NC_000015.9:g.42680002del' +p93 +sg35 +(dp94 +g37 +g38 +sg39 +S'CA' +p95 +sg41 +S'42680000' +p96 +sg43 +g44 +sssg51 +(dp97 +g33 +S'NC_000015.10:g.42387804del' +p98 +sg35 +(dp99 +g37 +g55 +sg39 +S'CA' +p100 +sg41 +S'42387802' +p101 +sg43 +g44 +sssS'hg19' +p102 +(dp103 +g33 +S'NC_000015.9:g.42680002del' +p104 +sg35 +(dp105 +g37 +g55 +sg39 +S'CA' +p106 +sg41 +S'42680000' +p107 +sg43 +g44 +ssssg64 +(dp108 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1' +p109 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1' +p110 +sssS'NM_173087.1:c.550del' +p111 +(dp112 +g3 +g4 +sg5 +(lp113 +S'NC_000015.9:g.42680000CA>C automapped to NC_000015.9:g.42680002delA' +p114 +aS'RefSeqGene record not available' +p115 +asg9 +g4 +sg10 +(lp116 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA +p117 +sg14 +S'CAPN3' +p118 +sg16 +(dp119 +g18 +S'NP_775110.1:p.(Thr184ArgfsTer36)' +p120 +sg20 +S'NP_775110.1:p.(T184Rfs*36)' +p121 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_173087.1:c.550del' +p122 +sg28 +g4 +sg29 +(dp123 +S'grch38' +p124 +(dp125 +g33 +S'NC_000015.10:g.42387804del' +p126 +sg35 +(dp127 +g37 +g38 +sg39 +S'CA' +p128 +sg41 +S'42387802' +p129 +sg43 +g44 +sssS'grch37' +p130 +(dp131 +g33 +S'NC_000015.9:g.42680002del' +p132 +sg35 +(dp133 +g37 +g38 +sg39 +S'CA' +p134 +sg41 +S'42680000' +p135 +sg43 +g44 +sssg51 +(dp136 +g33 +S'NC_000015.10:g.42387804del' +p137 +sg35 +(dp138 +g37 +g55 +sg39 +S'CA' +p139 +sg41 +S'42387802' +p140 +sg43 +g44 +sssS'hg19' +p141 +(dp142 +g33 +S'NC_000015.9:g.42680002del' +p143 +sg35 +(dp144 +g37 +g55 +sg39 +S'CA' +p145 +sg41 +S'42680000' +p146 +sg43 +g44 +ssssg64 +(dp147 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1' +p148 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1' +p149 +sssS'metadata' +p150 +(dp151 +S'variantvalidator_hgvs_version' +p152 +S'1.1.3' +p153 +sS'uta_schema' +p154 +S'uta_20180821' +p155 +sS'seqrepo_db' +p156 +S'2018-08-21' +p157 +sS'variantvalidator_version' +p158 +S'v0.2' +p159 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant227.txt b/VariantValidator/testing/testOutputsMasterITS/variant227.txt new file mode 100644 index 00000000..234808b3 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant227.txt @@ -0,0 +1,418 @@ +(dp0 +S'NM_024344.1:c.550dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000015.9:g.42680000CA>CAA automapped to NC_000015.9:g.42680002dupA' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA +p13 +sS'gene_symbol' +p14 +S'CAPN3' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_077320.1:p.(Thr184AsnfsTer16)' +p19 +sS'slr' +p20 +S'NP_077320.1:p.(T184Nfs*16)' +p21 +ssS'submitted_variant' +p22 +S'15-42680000-CA-CAA' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_024344.1:c.550dup' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'grch38' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000015.10:g.42387804dup' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'15' +p38 +sS'ref' +p39 +S'A' +p40 +sS'pos' +p41 +S'42387803' +p42 +sS'alt' +p43 +S'AA' +p44 +sssS'grch37' +p45 +(dp46 +g33 +S'NC_000015.9:g.42680002dup' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'42680001' +p49 +sg43 +S'AA' +p50 +sssS'hg38' +p51 +(dp52 +g33 +S'NC_000015.10:g.42387804dup' +p53 +sg35 +(dp54 +g37 +S'chr15' +p55 +sg39 +g40 +sg41 +S'42387803' +p56 +sg43 +S'AA' +p57 +sssS'hg19' +p58 +(dp59 +g33 +S'NC_000015.9:g.42680002dup' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +g40 +sg41 +S'42680001' +p62 +sg43 +S'AA' +p63 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1' +p69 +sssS'NM_173087.1:c.550dup' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000015.9:g.42680000CA>CAA automapped to NC_000015.9:g.42680002dupA' +p73 +aS'RefSeqGene record not available' +p74 +asg9 +g4 +sg10 +(lp75 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA +p76 +sg14 +S'CAPN3' +p77 +sg16 +(dp78 +g18 +S'NP_775110.1:p.(Thr184AsnfsTer16)' +p79 +sg20 +S'NP_775110.1:p.(T184Nfs*16)' +p80 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_173087.1:c.550dup' +p81 +sg28 +g4 +sg29 +(dp82 +S'grch38' +p83 +(dp84 +g33 +S'NC_000015.10:g.42387804dup' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +g40 +sg41 +S'42387803' +p87 +sg43 +S'AA' +p88 +sssS'grch37' +p89 +(dp90 +g33 +S'NC_000015.9:g.42680002dup' +p91 +sg35 +(dp92 +g37 +g38 +sg39 +g40 +sg41 +S'42680001' +p93 +sg43 +S'AA' +p94 +sssg51 +(dp95 +g33 +S'NC_000015.10:g.42387804dup' +p96 +sg35 +(dp97 +g37 +g55 +sg39 +g40 +sg41 +S'42387803' +p98 +sg43 +S'AA' +p99 +sssS'hg19' +p100 +(dp101 +g33 +S'NC_000015.9:g.42680002dup' +p102 +sg35 +(dp103 +g37 +g55 +sg39 +g40 +sg41 +S'42680001' +p104 +sg43 +S'AA' +p105 +ssssg64 +(dp106 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1' +p107 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1' +p108 +sssS'flag' +p109 +S'gene_variant' +p110 +sS'NM_000070.2:c.550dup' +p111 +(dp112 +g3 +g4 +sg5 +(lp113 +S'NC_000015.9:g.42680000CA>CAA automapped to NC_000015.9:g.42680002dupA' +p114 +aS'RefSeqGene record not available' +p115 +asg9 +g4 +sg10 +(lp116 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA +p117 +sg14 +S'CAPN3' +p118 +sg16 +(dp119 +g18 +S'NP_000061.1:p.(Thr184AsnfsTer16)' +p120 +sg20 +S'NP_000061.1:p.(T184Nfs*16)' +p121 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_000070.2:c.550dup' +p122 +sg28 +g4 +sg29 +(dp123 +S'grch38' +p124 +(dp125 +g33 +S'NC_000015.10:g.42387804dup' +p126 +sg35 +(dp127 +g37 +g38 +sg39 +g40 +sg41 +S'42387803' +p128 +sg43 +S'AA' +p129 +sssS'grch37' +p130 +(dp131 +g33 +S'NC_000015.9:g.42680002dup' +p132 +sg35 +(dp133 +g37 +g38 +sg39 +g40 +sg41 +S'42680001' +p134 +sg43 +S'AA' +p135 +sssg51 +(dp136 +g33 +S'NC_000015.10:g.42387804dup' +p137 +sg35 +(dp138 +g37 +g55 +sg39 +g40 +sg41 +S'42387803' +p139 +sg43 +S'AA' +p140 +sssS'hg19' +p141 +(dp142 +g33 +S'NC_000015.9:g.42680002dup' +p143 +sg35 +(dp144 +g37 +g55 +sg39 +g40 +sg41 +S'42680001' +p145 +sg43 +S'AA' +p146 +ssssg64 +(dp147 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1' +p148 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2' +p149 +sssS'metadata' +p150 +(dp151 +S'variantvalidator_hgvs_version' +p152 +S'1.1.3' +p153 +sS'uta_schema' +p154 +S'uta_20180821' +p155 +sS'seqrepo_db' +p156 +S'2018-08-21' +p157 +sS'variantvalidator_version' +p158 +S'v0.2' +p159 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant228.txt b/VariantValidator/testing/testOutputsMasterITS/variant228.txt new file mode 100644 index 00000000..23e460bb --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant228.txt @@ -0,0 +1,781 @@ +(dp0 +S'NM_173088.1:c.825_826insTCA' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 4, mRNA +p13 +sS'gene_symbol' +p14 +S'CAPN3' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_775111.1:p.(Val275_Arg276insSer)' +p19 +sS'slr' +p20 +S'NP_775111.1:p.(V275_R276insS)' +p21 +ssS'submitted_variant' +p22 +S'15-42703179-T-TTCA' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_173088.1:c.825_826insTCA' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'grch38' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000015.10:g.42410981_42410982insTCA' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'15' +p38 +sS'ref' +p39 +S'T' +p40 +sS'pos' +p41 +S'42410981' +p42 +sS'alt' +p43 +S'TTCA' +p44 +sssS'grch37' +p45 +(dp46 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'42703179' +p49 +sg43 +S'TTCA' +p50 +sssS'hg38' +p51 +(dp52 +g33 +S'NC_000015.10:g.42410981_42410982insTCA' +p53 +sg35 +(dp54 +g37 +S'chr15' +p55 +sg39 +g40 +sg41 +S'42410981' +p56 +sg43 +S'TTCA' +p57 +sssS'hg19' +p58 +(dp59 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +g40 +sg41 +S'42703179' +p62 +sg43 +S'TTCA' +p63 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1' +p69 +sssS'NM_173090.1:c.366_367insTCA' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' +p73 +aS'RefSeqGene record not available' +p74 +asg9 +g4 +sg10 +(lp75 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 6, mRNA +p76 +sg14 +S'CAPN3' +p77 +sg16 +(dp78 +g18 +S'NP_775113.1:p.(Val122_Arg123insSer)' +p79 +sg20 +S'NP_775113.1:p.(V122_R123insS)' +p80 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_173090.1:c.366_367insTCA' +p81 +sg28 +g4 +sg29 +(dp82 +S'grch38' +p83 +(dp84 +g33 +S'NC_000015.10:g.42410981_42410982insTCA' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +g40 +sg41 +S'42410981' +p87 +sg43 +S'TTCA' +p88 +sssS'grch37' +p89 +(dp90 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p91 +sg35 +(dp92 +g37 +g38 +sg39 +g40 +sg41 +S'42703179' +p93 +sg43 +S'TTCA' +p94 +sssg51 +(dp95 +g33 +S'NC_000015.10:g.42410981_42410982insTCA' +p96 +sg35 +(dp97 +g37 +g55 +sg39 +g40 +sg41 +S'42410981' +p98 +sg43 +S'TTCA' +p99 +sssS'hg19' +p100 +(dp101 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p102 +sg35 +(dp103 +g37 +g55 +sg39 +g40 +sg41 +S'42703179' +p104 +sg43 +S'TTCA' +p105 +ssssg64 +(dp106 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1' +p107 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1' +p108 +sssS'NM_173089.1:c.366_367insTCA' +p109 +(dp110 +g3 +g4 +sg5 +(lp111 +S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' +p112 +aS'RefSeqGene record not available' +p113 +asg9 +g4 +sg10 +(lp114 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 5, mRNA +p115 +sg14 +S'CAPN3' +p116 +sg16 +(dp117 +g18 +S'NP_775112.1:p.(Val122_Arg123insSer)' +p118 +sg20 +S'NP_775112.1:p.(V122_R123insS)' +p119 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_173089.1:c.366_367insTCA' +p120 +sg28 +g4 +sg29 +(dp121 +S'grch38' +p122 +(dp123 +g33 +S'NC_000015.10:g.42410981_42410982insTCA' +p124 +sg35 +(dp125 +g37 +g38 +sg39 +g40 +sg41 +S'42410981' +p126 +sg43 +S'TTCA' +p127 +sssS'grch37' +p128 +(dp129 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p130 +sg35 +(dp131 +g37 +g38 +sg39 +g40 +sg41 +S'42703179' +p132 +sg43 +S'TTCA' +p133 +sssg51 +(dp134 +g33 +S'NC_000015.10:g.42410981_42410982insTCA' +p135 +sg35 +(dp136 +g37 +g55 +sg39 +g40 +sg41 +S'42410981' +p137 +sg43 +S'TTCA' +p138 +sssS'hg19' +p139 +(dp140 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p141 +sg35 +(dp142 +g37 +g55 +sg39 +g40 +sg41 +S'42703179' +p143 +sg43 +S'TTCA' +p144 +ssssg64 +(dp145 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1' +p146 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1' +p147 +sssS'NM_173087.1:c.2085_2086insTCA' +p148 +(dp149 +g3 +g4 +sg5 +(lp150 +S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' +p151 +aS'RefSeqGene record not available' +p152 +asg9 +g4 +sg10 +(lp153 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA +p154 +sg14 +S'CAPN3' +p155 +sg16 +(dp156 +g18 +S'NP_775110.1:p.(Val695_Arg696insSer)' +p157 +sg20 +S'NP_775110.1:p.(V695_R696insS)' +p158 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_173087.1:c.2085_2086insTCA' +p159 +sg28 +g4 +sg29 +(dp160 +S'grch38' +p161 +(dp162 +g33 +S'NC_000015.10:g.42410981_42410982insTCA' +p163 +sg35 +(dp164 +g37 +g38 +sg39 +g40 +sg41 +S'42410981' +p165 +sg43 +S'TTCA' +p166 +sssS'grch37' +p167 +(dp168 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p169 +sg35 +(dp170 +g37 +g38 +sg39 +g40 +sg41 +S'42703179' +p171 +sg43 +S'TTCA' +p172 +sssg51 +(dp173 +g33 +S'NC_000015.10:g.42410981_42410982insTCA' +p174 +sg35 +(dp175 +g37 +g55 +sg39 +g40 +sg41 +S'42410981' +p176 +sg43 +S'TTCA' +p177 +sssS'hg19' +p178 +(dp179 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p180 +sg35 +(dp181 +g37 +g55 +sg39 +g40 +sg41 +S'42703179' +p182 +sg43 +S'TTCA' +p183 +ssssg64 +(dp184 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1' +p185 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1' +p186 +sssS'flag' +p187 +S'gene_variant' +p188 +sS'NM_000070.2:c.2361_2362insTCA' +p189 +(dp190 +g3 +g4 +sg5 +(lp191 +S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' +p192 +aS'RefSeqGene record not available' +p193 +asg9 +g4 +sg10 +(lp194 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA +p195 +sg14 +S'CAPN3' +p196 +sg16 +(dp197 +g18 +S'NP_000061.1:p.(Val787_Arg788insSer)' +p198 +sg20 +S'NP_000061.1:p.(V787_R788insS)' +p199 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_000070.2:c.2361_2362insTCA' +p200 +sg28 +g4 +sg29 +(dp201 +S'grch38' +p202 +(dp203 +g33 +S'NC_000015.10:g.42410981_42410982insTCA' +p204 +sg35 +(dp205 +g37 +g38 +sg39 +g40 +sg41 +S'42410981' +p206 +sg43 +S'TTCA' +p207 +sssS'grch37' +p208 +(dp209 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p210 +sg35 +(dp211 +g37 +g38 +sg39 +g40 +sg41 +S'42703179' +p212 +sg43 +S'TTCA' +p213 +sssg51 +(dp214 +g33 +S'NC_000015.10:g.42410981_42410982insTCA' +p215 +sg35 +(dp216 +g37 +g55 +sg39 +g40 +sg41 +S'42410981' +p217 +sg43 +S'TTCA' +p218 +sssS'hg19' +p219 +(dp220 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p221 +sg35 +(dp222 +g37 +g55 +sg39 +g40 +sg41 +S'42703179' +p223 +sg43 +S'TTCA' +p224 +ssssg64 +(dp225 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1' +p226 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2' +p227 +sssS'NM_024344.1:c.2343_2344insTCA' +p228 +(dp229 +g3 +g4 +sg5 +(lp230 +S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' +p231 +aS'RefSeqGene record not available' +p232 +asg9 +g4 +sg10 +(lp233 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA +p234 +sg14 +S'CAPN3' +p235 +sg16 +(dp236 +g18 +S'NP_077320.1:p.(Val781_Arg782insSer)' +p237 +sg20 +S'NP_077320.1:p.(V781_R782insS)' +p238 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_024344.1:c.2343_2344insTCA' +p239 +sg28 +g4 +sg29 +(dp240 +S'grch38' +p241 +(dp242 +g33 +S'NC_000015.10:g.42410981_42410982insTCA' +p243 +sg35 +(dp244 +g37 +g38 +sg39 +g40 +sg41 +S'42410981' +p245 +sg43 +S'TTCA' +p246 +sssS'grch37' +p247 +(dp248 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p249 +sg35 +(dp250 +g37 +g38 +sg39 +g40 +sg41 +S'42703179' +p251 +sg43 +S'TTCA' +p252 +sssg51 +(dp253 +g33 +S'NC_000015.10:g.42410981_42410982insTCA' +p254 +sg35 +(dp255 +g37 +g55 +sg39 +g40 +sg41 +S'42410981' +p256 +sg43 +S'TTCA' +p257 +sssS'hg19' +p258 +(dp259 +g33 +S'NC_000015.9:g.42703179_42703180insTCA' +p260 +sg35 +(dp261 +g37 +g55 +sg39 +g40 +sg41 +S'42703179' +p262 +sg43 +S'TTCA' +p263 +ssssg64 +(dp264 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1' +p265 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1' +p266 +sssS'metadata' +p267 +(dp268 +S'variantvalidator_hgvs_version' +p269 +S'1.1.3' +p270 +sS'uta_schema' +p271 +S'uta_20180821' +p272 +sS'seqrepo_db' +p273 +S'2018-08-21' +p274 +sS'variantvalidator_version' +p275 +S'v0.2' +p276 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant229.txt b/VariantValidator/testing/testOutputsMasterITS/variant229.txt new file mode 100644 index 00000000..dcc6a4e6 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant229.txt @@ -0,0 +1,786 @@ +(dp0 +S'NM_024344.1:c.2344_2345delinsTCATCT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA +p13 +sS'gene_symbol' +p14 +S'CAPN3' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_077320.1:p.(Arg782SerfsTer14)' +p19 +sS'slr' +p20 +S'NP_077320.1:p.(R782Sfs*14)' +p21 +ssS'submitted_variant' +p22 +S'15-42703179-TAG-TTCATCT' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_024344.1:c.2344_2345delinsTCATCT' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'grch38' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'15' +p38 +sS'ref' +p39 +S'AG' +p40 +sS'pos' +p41 +S'42410982' +p42 +sS'alt' +p43 +S'TCATCT' +p44 +sssS'grch37' +p45 +(dp46 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'AG' +p49 +sg41 +S'42703180' +p50 +sg43 +g44 +sssS'hg38' +p51 +(dp52 +g33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p53 +sg35 +(dp54 +g37 +S'chr15' +p55 +sg39 +S'AG' +p56 +sg41 +S'42410982' +p57 +sg43 +g44 +sssS'hg19' +p58 +(dp59 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'AG' +p62 +sg41 +S'42703180' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1' +p69 +sssS'NM_173090.1:c.367_368delinsTCATCT' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' +p73 +aS'RefSeqGene record not available' +p74 +asg9 +g4 +sg10 +(lp75 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 6, mRNA +p76 +sg14 +S'CAPN3' +p77 +sg16 +(dp78 +g18 +S'NP_775113.1:p.(Arg123SerfsTer14)' +p79 +sg20 +S'NP_775113.1:p.(R123Sfs*14)' +p80 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_173090.1:c.367_368delinsTCATCT' +p81 +sg28 +g4 +sg29 +(dp82 +S'grch38' +p83 +(dp84 +g33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +S'AG' +p87 +sg41 +S'42410982' +p88 +sg43 +S'TCATCT' +p89 +sssS'grch37' +p90 +(dp91 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p92 +sg35 +(dp93 +g37 +g38 +sg39 +S'AG' +p94 +sg41 +S'42703180' +p95 +sg43 +g89 +sssg51 +(dp96 +g33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p97 +sg35 +(dp98 +g37 +g55 +sg39 +S'AG' +p99 +sg41 +S'42410982' +p100 +sg43 +g89 +sssS'hg19' +p101 +(dp102 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p103 +sg35 +(dp104 +g37 +g55 +sg39 +S'AG' +p105 +sg41 +S'42703180' +p106 +sg43 +g89 +ssssg64 +(dp107 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1' +p108 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1' +p109 +sssS'flag' +p110 +S'gene_variant' +p111 +sS'NM_000070.2:c.2362_2363delinsTCATCT' +p112 +(dp113 +g3 +g4 +sg5 +(lp114 +S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' +p115 +aS'RefSeqGene record not available' +p116 +asg9 +g4 +sg10 +(lp117 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA +p118 +sg14 +S'CAPN3' +p119 +sg16 +(dp120 +g18 +S'NP_000061.1:p.(Arg788SerfsTer14)' +p121 +sg20 +S'NP_000061.1:p.(R788Sfs*14)' +p122 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_000070.2:c.2362_2363delinsTCATCT' +p123 +sg28 +g4 +sg29 +(dp124 +S'grch38' +p125 +(dp126 +g33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p127 +sg35 +(dp128 +g37 +g38 +sg39 +S'AG' +p129 +sg41 +S'42410982' +p130 +sg43 +S'TCATCT' +p131 +sssS'grch37' +p132 +(dp133 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p134 +sg35 +(dp135 +g37 +g38 +sg39 +S'AG' +p136 +sg41 +S'42703180' +p137 +sg43 +g131 +sssg51 +(dp138 +g33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p139 +sg35 +(dp140 +g37 +g55 +sg39 +S'AG' +p141 +sg41 +S'42410982' +p142 +sg43 +g131 +sssS'hg19' +p143 +(dp144 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p145 +sg35 +(dp146 +g37 +g55 +sg39 +S'AG' +p147 +sg41 +S'42703180' +p148 +sg43 +g131 +ssssg64 +(dp149 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1' +p150 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2' +p151 +sssS'NM_173088.1:c.826_827delinsTCATCT' +p152 +(dp153 +g3 +g4 +sg5 +(lp154 +S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' +p155 +aS'RefSeqGene record not available' +p156 +asg9 +g4 +sg10 +(lp157 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 4, mRNA +p158 +sg14 +S'CAPN3' +p159 +sg16 +(dp160 +g18 +S'NP_775111.1:p.(Arg276SerfsTer14)' +p161 +sg20 +S'NP_775111.1:p.(R276Sfs*14)' +p162 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_173088.1:c.826_827delinsTCATCT' +p163 +sg28 +g4 +sg29 +(dp164 +S'grch38' +p165 +(dp166 +g33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p167 +sg35 +(dp168 +g37 +g38 +sg39 +S'AG' +p169 +sg41 +S'42410982' +p170 +sg43 +S'TCATCT' +p171 +sssS'grch37' +p172 +(dp173 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p174 +sg35 +(dp175 +g37 +g38 +sg39 +S'AG' +p176 +sg41 +S'42703180' +p177 +sg43 +g171 +sssg51 +(dp178 +g33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p179 +sg35 +(dp180 +g37 +g55 +sg39 +S'AG' +p181 +sg41 +S'42410982' +p182 +sg43 +g171 +sssS'hg19' +p183 +(dp184 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p185 +sg35 +(dp186 +g37 +g55 +sg39 +S'AG' +p187 +sg41 +S'42703180' +p188 +sg43 +g171 +ssssg64 +(dp189 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1' +p190 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1' +p191 +sssS'NM_173089.1:c.367_368delinsTCATCT' +p192 +(dp193 +g3 +g4 +sg5 +(lp194 +S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' +p195 +aS'RefSeqGene record not available' +p196 +asg9 +g4 +sg10 +(lp197 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 5, mRNA +p198 +sg14 +S'CAPN3' +p199 +sg16 +(dp200 +g18 +S'NP_775112.1:p.(Arg123SerfsTer14)' +p201 +sg20 +S'NP_775112.1:p.(R123Sfs*14)' +p202 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_173089.1:c.367_368delinsTCATCT' +p203 +sg28 +g4 +sg29 +(dp204 +S'grch38' +p205 +(dp206 +g33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p207 +sg35 +(dp208 +g37 +g38 +sg39 +S'AG' +p209 +sg41 +S'42410982' +p210 +sg43 +S'TCATCT' +p211 +sssS'grch37' +p212 +(dp213 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p214 +sg35 +(dp215 +g37 +g38 +sg39 +S'AG' +p216 +sg41 +S'42703180' +p217 +sg43 +g211 +sssg51 +(dp218 +g33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p219 +sg35 +(dp220 +g37 +g55 +sg39 +S'AG' +p221 +sg41 +S'42410982' +p222 +sg43 +g211 +sssS'hg19' +p223 +(dp224 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p225 +sg35 +(dp226 +g37 +g55 +sg39 +S'AG' +p227 +sg41 +S'42703180' +p228 +sg43 +g211 +ssssg64 +(dp229 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1' +p230 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1' +p231 +sssS'NM_173087.1:c.2086_2087delinsTCATCT' +p232 +(dp233 +g3 +g4 +sg5 +(lp234 +S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' +p235 +aS'RefSeqGene record not available' +p236 +asg9 +g4 +sg10 +(lp237 +sg12 +VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA +p238 +sg14 +S'CAPN3' +p239 +sg16 +(dp240 +g18 +S'NP_775110.1:p.(Arg696SerfsTer14)' +p241 +sg20 +S'NP_775110.1:p.(R696Sfs*14)' +p242 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_173087.1:c.2086_2087delinsTCATCT' +p243 +sg28 +g4 +sg29 +(dp244 +S'grch38' +p245 +(dp246 +g33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p247 +sg35 +(dp248 +g37 +g38 +sg39 +S'AG' +p249 +sg41 +S'42410982' +p250 +sg43 +S'TCATCT' +p251 +sssS'grch37' +p252 +(dp253 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p254 +sg35 +(dp255 +g37 +g38 +sg39 +S'AG' +p256 +sg41 +S'42703180' +p257 +sg43 +g251 +sssg51 +(dp258 +g33 +S'NC_000015.10:g.42410982_42410983delinsTCATCT' +p259 +sg35 +(dp260 +g37 +g55 +sg39 +S'AG' +p261 +sg41 +S'42410982' +p262 +sg43 +g251 +sssS'hg19' +p263 +(dp264 +g33 +S'NC_000015.9:g.42703180_42703181delinsTCATCT' +p265 +sg35 +(dp266 +g37 +g55 +sg39 +S'AG' +p267 +sg41 +S'42703180' +p268 +sg43 +g251 +ssssg64 +(dp269 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1' +p270 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1' +p271 +sssS'metadata' +p272 +(dp273 +S'variantvalidator_hgvs_version' +p274 +S'1.1.3' +p275 +sS'uta_schema' +p276 +S'uta_20180821' +p277 +sS'seqrepo_db' +p278 +S'2018-08-21' +p279 +sS'variantvalidator_version' +p280 +S'v0.2' +p281 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant23.txt b/VariantValidator/testing/testOutputsMasterITS/variant23.txt new file mode 100644 index 00000000..924bec35 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant23.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'start or end or both are beyond the bounds of transcript record' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NM_000518.4:c.*132+1868C>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant230.txt b/VariantValidator/testing/testOutputsMasterITS/variant230.txt new file mode 100644 index 00000000..ebbb67d3 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant230.txt @@ -0,0 +1,171 @@ +(dp0 +S'NM_000138.4:c.2927G>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens fibrillin 1 (FBN1), mRNA +p12 +sS'gene_symbol' +p13 +S'FBN1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_000129.3:p.(Arg976His)' +p18 +sS'slr' +p19 +S'NP_000129.3:p.(R976H)' +p20 +ssS'submitted_variant' +p21 +S'15-48782203-C-T' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_000138.4:c.2927G>A' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'grch38' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000015.10:g.48490006C>T' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'15' +p37 +sS'ref' +p38 +VC +p39 +sS'pos' +p40 +S'48490006' +p41 +sS'alt' +p42 +VT +p43 +sssS'grch37' +p44 +(dp45 +g32 +S'NC_000015.9:g.48782203C>T' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'48782203' +p48 +sg42 +g43 +sssS'hg38' +p49 +(dp50 +g32 +S'NC_000015.10:g.48490006C>T' +p51 +sg34 +(dp52 +g36 +S'chr15' +p53 +sg38 +g39 +sg40 +S'48490006' +p54 +sg42 +g43 +sssS'hg19' +p55 +(dp56 +g32 +S'NC_000015.9:g.48782203C>T' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'48782203' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000129.3' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000138.4' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant231.txt b/VariantValidator/testing/testOutputsMasterITS/variant231.txt new file mode 100644 index 00000000..fc5a7dda --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant231.txt @@ -0,0 +1,511 @@ +(dp0 +S'NM_014249.2:c.946_949=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' +p7 +aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' +p8 +aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.2' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' +p12 +aS'NM_014249.3:c.946_949GACC= MUST be fully validated prior to use in reports' +p13 +aS'select_variants=NM_014249.3:c.946_949=' +p14 +aS'RefSeqGene record not available' +p15 +asS'refseqgene_context_intronic_sequence' +p16 +g4 +sS'alt_genomic_loci' +p17 +(lp18 +sS'transcript_description' +p19 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA +p20 +sS'gene_symbol' +p21 +S'NR2E3' +p22 +sS'hgvs_predicted_protein_consequence' +p23 +(dp24 +S'tlr' +p25 +S'NP_055064.1:p.(Asp316=)' +p26 +sS'slr' +p27 +S'NP_055064.1:p.(D316=)' +p28 +ssS'submitted_variant' +p29 +S'15-72105929-CC-C' +p30 +sS'genome_context_intronic_sequence' +p31 +g4 +sS'hgvs_lrg_variant' +p32 +g4 +sS'hgvs_transcript_variant' +p33 +S'NM_014249.2:c.946_949=' +p34 +sS'hgvs_refseqgene_variant' +p35 +g4 +sS'primary_assembly_loci' +p36 +(dp37 +S'hg19' +p38 +(dp39 +S'hgvs_genomic_description' +p40 +S'NC_000015.9:g.72105933del' +p41 +sS'vcf' +p42 +(dp43 +S'chr' +p44 +S'chr15' +p45 +sS'ref' +p46 +S'AC' +p47 +sS'pos' +p48 +S'72105928' +p49 +sS'alt' +p50 +S'A' +p51 +sssS'grch37' +p52 +(dp53 +g40 +S'NC_000015.9:g.72105933del' +p54 +sg42 +(dp55 +g44 +S'15' +p56 +sg46 +S'AC' +p57 +sg48 +S'72105928' +p58 +sg50 +g51 +ssssS'reference_sequence_records' +p59 +(dp60 +S'protein' +p61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p62 +sS'transcript' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' +p64 +sssS'NM_016346.3:c.946_949=' +p65 +(dp66 +g3 +g4 +sg5 +(lp67 +S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' +p68 +aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' +p69 +aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.3' +p70 +aS'Caution should be used when reporting the displayed variant descriptions' +p71 +aS'If you are unsure, please contact admin' +p72 +aS'RefSeqGene record not available' +p73 +asg16 +g4 +sg17 +(lp74 +sg19 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA +p75 +sg21 +S'NR2E3' +p76 +sg23 +(dp77 +g25 +S'NP_057430.1:p.(Asp316=)' +p78 +sg27 +S'NP_057430.1:p.(D316=)' +p79 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_016346.3:c.946_949=' +p80 +sg35 +g4 +sg36 +(dp81 +S'grch38' +p82 +(dp83 +g40 +S'NC_000015.10:g.71813587_71813590=' +p84 +sg42 +(dp85 +g44 +g56 +sg46 +VGACC +p86 +sg48 +S'71813587' +p87 +sg50 +g86 +sssS'grch37' +p88 +(dp89 +g40 +S'NC_000015.9:g.72105933del' +p90 +sg42 +(dp91 +g44 +g56 +sg46 +S'AC' +p92 +sg48 +S'72105928' +p93 +sg50 +g51 +sssS'hg38' +p94 +(dp95 +g40 +S'NC_000015.10:g.71813587_71813590=' +p96 +sg42 +(dp97 +g44 +g45 +sg46 +g86 +sg48 +S'71813587' +p98 +sg50 +g86 +sssS'hg19' +p99 +(dp100 +g40 +S'NC_000015.9:g.72105933del' +p101 +sg42 +(dp102 +g44 +g45 +sg46 +S'AC' +p103 +sg48 +S'72105928' +p104 +sg50 +g51 +ssssg59 +(dp105 +g61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p106 +sg63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' +p107 +sssS'flag' +p108 +S'gene_variant' +p109 +sS'NM_014249.3:c.946_949=' +p110 +(dp111 +g3 +g4 +sg5 +(lp112 +S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' +p113 +aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' +p114 +aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.3' +p115 +aS'Caution should be used when reporting the displayed variant descriptions' +p116 +aS'If you are unsure, please contact admin' +p117 +aS'RefSeqGene record not available' +p118 +asg16 +g4 +sg17 +(lp119 +sg19 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA +p120 +sg21 +S'NR2E3' +p121 +sg23 +(dp122 +g25 +S'NP_055064.1:p.(Asp316=)' +p123 +sg27 +S'NP_055064.1:p.(D316=)' +p124 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_014249.3:c.946_949=' +p125 +sg35 +g4 +sg36 +(dp126 +S'grch38' +p127 +(dp128 +g40 +S'NC_000015.10:g.71813587_71813590=' +p129 +sg42 +(dp130 +g44 +g56 +sg46 +VGACC +p131 +sg48 +S'71813587' +p132 +sg50 +g131 +sssS'grch37' +p133 +(dp134 +g40 +S'NC_000015.9:g.72105933del' +p135 +sg42 +(dp136 +g44 +g56 +sg46 +S'AC' +p137 +sg48 +S'72105928' +p138 +sg50 +g51 +sssg94 +(dp139 +g40 +S'NC_000015.10:g.71813587_71813590=' +p140 +sg42 +(dp141 +g44 +g45 +sg46 +g131 +sg48 +S'71813587' +p142 +sg50 +g131 +sssS'hg19' +p143 +(dp144 +g40 +S'NC_000015.9:g.72105933del' +p145 +sg42 +(dp146 +g44 +g45 +sg46 +S'AC' +p147 +sg48 +S'72105928' +p148 +sg50 +g51 +ssssg59 +(dp149 +g61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p150 +sg63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' +p151 +sssS'NM_016346.2:c.946_949=' +p152 +(dp153 +g3 +g4 +sg5 +(lp154 +S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' +p155 +aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' +p156 +aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.2' +p157 +aS'Caution should be used when reporting the displayed variant descriptions' +p158 +aS'If you are unsure, please contact admin' +p159 +aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' +p160 +aS'NM_016346.3:c.946_949GACC= MUST be fully validated prior to use in reports' +p161 +aS'select_variants=NM_016346.3:c.946_949=' +p162 +aS'RefSeqGene record not available' +p163 +asg16 +g4 +sg17 +(lp164 +sg19 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA +p165 +sg21 +S'NR2E3' +p166 +sg23 +(dp167 +g25 +S'NP_057430.1:p.(Asp316=)' +p168 +sg27 +S'NP_057430.1:p.(D316=)' +p169 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_016346.2:c.946_949=' +p170 +sg35 +g4 +sg36 +(dp171 +S'hg19' +p172 +(dp173 +g40 +S'NC_000015.9:g.72105933del' +p174 +sg42 +(dp175 +g44 +g45 +sg46 +S'AC' +p176 +sg48 +S'72105928' +p177 +sg50 +g51 +sssS'grch37' +p178 +(dp179 +g40 +S'NC_000015.9:g.72105933del' +p180 +sg42 +(dp181 +g44 +g56 +sg46 +S'AC' +p182 +sg48 +S'72105928' +p183 +sg50 +g51 +ssssg59 +(dp184 +g61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p185 +sg63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' +p186 +sssS'metadata' +p187 +(dp188 +S'variantvalidator_hgvs_version' +p189 +S'1.1.3' +p190 +sS'uta_schema' +p191 +S'uta_20180821' +p192 +sS'seqrepo_db' +p193 +S'2018-08-21' +p194 +sS'variantvalidator_version' +p195 +S'v0.2' +p196 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant232.txt b/VariantValidator/testing/testOutputsMasterITS/variant232.txt new file mode 100644 index 00000000..cac71aaa --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant232.txt @@ -0,0 +1,286 @@ +(dp0 +S'NM_002693.2:c.752C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'POLG' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_002684.1:p.(Thr251Ile)' +p18 +sS'slr' +p19 +S'NP_002684.1:p.(T251I)' +p20 +ssS'submitted_variant' +p21 +S'15-89873415-G-A' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_002693.2:c.752C>T' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'grch38' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000015.10:g.89330184G>A' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'15' +p37 +sS'ref' +p38 +VG +p39 +sS'pos' +p40 +S'89330184' +p41 +sS'alt' +p42 +VA +p43 +sssS'grch37' +p44 +(dp45 +g32 +S'NC_000015.9:g.89873415G>A' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'89873415' +p48 +sg42 +g43 +sssS'hg38' +p49 +(dp50 +g32 +S'NC_000015.10:g.89330184G>A' +p51 +sg34 +(dp52 +g36 +S'chr15' +p53 +sg38 +g39 +sg40 +S'89330184' +p54 +sg42 +g43 +sssS'hg19' +p55 +(dp56 +g32 +S'NC_000015.9:g.89873415G>A' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'89873415' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ssS'NM_001126131.1:c.752C>T' +p78 +(dp79 +g3 +g4 +sg5 +(lp80 +S'RefSeqGene record not available' +p81 +asg8 +g4 +sg9 +(lp82 +sg11 +VHomo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 2, mRNA +p83 +sg13 +S'POLG' +p84 +sg15 +(dp85 +g17 +S'NP_001119603.1:p.(Thr251Ile)' +p86 +sg19 +S'NP_001119603.1:p.(T251I)' +p87 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001126131.1:c.752C>T' +p88 +sg27 +g4 +sg28 +(dp89 +S'grch38' +p90 +(dp91 +g32 +S'NC_000015.10:g.89330184G>A' +p92 +sg34 +(dp93 +g36 +g37 +sg38 +g39 +sg40 +S'89330184' +p94 +sg42 +g43 +sssS'grch37' +p95 +(dp96 +g32 +S'NC_000015.9:g.89873415G>A' +p97 +sg34 +(dp98 +g36 +g37 +sg38 +g39 +sg40 +S'89873415' +p99 +sg42 +g43 +sssg49 +(dp100 +g32 +S'NC_000015.10:g.89330184G>A' +p101 +sg34 +(dp102 +g36 +g53 +sg38 +g39 +sg40 +S'89330184' +p103 +sg42 +g43 +sssS'hg19' +p104 +(dp105 +g32 +S'NC_000015.9:g.89873415G>A' +p106 +sg34 +(dp107 +g36 +g53 +sg38 +g39 +sg40 +S'89873415' +p108 +sg42 +g43 +ssssg60 +(dp109 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119603.1' +p110 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126131.1' +p111 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant233.txt b/VariantValidator/testing/testOutputsMasterITS/variant233.txt new file mode 100644 index 00000000..0ef11703 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant233.txt @@ -0,0 +1,1291 @@ +(dp0 +S'NM_001318832.1:c.310C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA +p12 +sS'gene_symbol' +p13 +S'TSC2' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001305761.1:p.(Arg104Trp)' +p18 +sS'slr' +p19 +S'NP_001305761.1:p.(R104W)' +p20 +ssS'submitted_variant' +p21 +S'16-2103394-C-T' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001318832.1:c.310C>T' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'grch38' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000016.10:g.2053393C>T' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'16' +p37 +sS'ref' +p38 +S'C' +p39 +sS'pos' +p40 +S'2053393' +p41 +sS'alt' +p42 +S'T' +p43 +sssS'grch37' +p44 +(dp45 +g32 +S'NC_000016.9:g.2103394C>T' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p48 +sg42 +g43 +sssS'hg38' +p49 +(dp50 +g32 +S'NC_000016.10:g.2053393C>T' +p51 +sg34 +(dp52 +g36 +S'chr16' +p53 +sg38 +g39 +sg40 +S'2053393' +p54 +sg42 +g43 +sssS'hg19' +p55 +(dp56 +g32 +S'NC_000016.9:g.2103394C>T' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1' +p65 +sssS'NM_000548.4:c.277C>T' +p66 +(dp67 +g3 +g4 +sg5 +(lp68 +S'RefSeqGene record not available' +p69 +asg8 +g4 +sg9 +(lp70 +sg11 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA +p71 +sg13 +S'TSC2' +p72 +sg15 +(dp73 +g17 +S'NP_000539.2:p.(Arg93Trp)' +p74 +sg19 +S'NP_000539.2:p.(R93W)' +p75 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_000548.4:c.277C>T' +p76 +sg27 +g4 +sg28 +(dp77 +S'grch38' +p78 +(dp79 +g32 +S'NC_000016.10:g.2053393C>T' +p80 +sg34 +(dp81 +g36 +g37 +sg38 +g39 +sg40 +S'2053393' +p82 +sg42 +g43 +sssS'grch37' +p83 +(dp84 +g32 +S'NC_000016.9:g.2103394C>T' +p85 +sg34 +(dp86 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p87 +sg42 +g43 +sssg49 +(dp88 +g32 +S'NC_000016.10:g.2053393C>T' +p89 +sg34 +(dp90 +g36 +g53 +sg38 +g39 +sg40 +S'2053393' +p91 +sg42 +g43 +sssS'hg19' +p92 +(dp93 +g32 +S'NC_000016.9:g.2103394C>T' +p94 +sg34 +(dp95 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p96 +sg42 +g43 +ssssg60 +(dp97 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2' +p98 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4' +p99 +sssS'NM_001318829.1:c.130C>T' +p100 +(dp101 +g3 +g4 +sg5 +(lp102 +S'RefSeqGene record not available' +p103 +asg8 +g4 +sg9 +(lp104 +sg11 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA +p105 +sg13 +S'TSC2' +p106 +sg15 +(dp107 +g17 +S'NP_001305758.1:p.(Arg44Trp)' +p108 +sg19 +S'NP_001305758.1:p.(R44W)' +p109 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001318829.1:c.130C>T' +p110 +sg27 +g4 +sg28 +(dp111 +S'grch38' +p112 +(dp113 +g32 +S'NC_000016.10:g.2053393C>T' +p114 +sg34 +(dp115 +g36 +g37 +sg38 +g39 +sg40 +S'2053393' +p116 +sg42 +g43 +sssS'grch37' +p117 +(dp118 +g32 +S'NC_000016.9:g.2103394C>T' +p119 +sg34 +(dp120 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p121 +sg42 +g43 +sssg49 +(dp122 +g32 +S'NC_000016.10:g.2053393C>T' +p123 +sg34 +(dp124 +g36 +g53 +sg38 +g39 +sg40 +S'2053393' +p125 +sg42 +g43 +sssS'hg19' +p126 +(dp127 +g32 +S'NC_000016.9:g.2103394C>T' +p128 +sg34 +(dp129 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p130 +sg42 +g43 +ssssg60 +(dp131 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305758.1' +p132 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318829.1' +p133 +sssS'NM_001077183.2:c.277C>T' +p134 +(dp135 +g3 +g4 +sg5 +(lp136 +S'RefSeqGene record not available' +p137 +asg8 +g4 +sg9 +(lp138 +sg11 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA +p139 +sg13 +S'TSC2' +p140 +sg15 +(dp141 +g17 +S'NP_001070651.1:p.(Arg93Trp)' +p142 +sg19 +S'NP_001070651.1:p.(R93W)' +p143 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001077183.2:c.277C>T' +p144 +sg27 +g4 +sg28 +(dp145 +S'grch38' +p146 +(dp147 +g32 +S'NC_000016.10:g.2053393C>T' +p148 +sg34 +(dp149 +g36 +g37 +sg38 +g39 +sg40 +S'2053393' +p150 +sg42 +g43 +sssS'grch37' +p151 +(dp152 +g32 +S'NC_000016.9:g.2103394C>T' +p153 +sg34 +(dp154 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p155 +sg42 +g43 +sssg49 +(dp156 +g32 +S'NC_000016.10:g.2053393C>T' +p157 +sg34 +(dp158 +g36 +g53 +sg38 +g39 +sg40 +S'2053393' +p159 +sg42 +g43 +sssS'hg19' +p160 +(dp161 +g32 +S'NC_000016.9:g.2103394C>T' +p162 +sg34 +(dp163 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p164 +sg42 +g43 +ssssg60 +(dp165 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1' +p166 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2' +p167 +sssS'NM_001114382.1:c.277C>T' +p168 +(dp169 +g3 +g4 +sg5 +(lp170 +S'A more recent version of the selected reference sequence NM_001114382.1 is available (NM_001114382.2)' +p171 +aS'NM_001114382.2:c.277C>T MUST be fully validated prior to use in reports' +p172 +aS'select_variants=NM_001114382.2:c.277C>T' +p173 +aS'RefSeqGene record not available' +p174 +asg8 +g4 +sg9 +(lp175 +sg11 +VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 5, mRNA +p176 +sg13 +S'TSC2' +p177 +sg15 +(dp178 +g17 +S'NP_001107854.1:p.(Arg93Trp)' +p179 +sg19 +S'NP_001107854.1:p.(R93W)' +p180 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001114382.1:c.277C>T' +p181 +sg27 +g4 +sg28 +(dp182 +S'hg19' +p183 +(dp184 +g32 +S'NC_000016.9:g.2103394C>T' +p185 +sg34 +(dp186 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p187 +sg42 +g43 +sssS'grch37' +p188 +(dp189 +g32 +S'NC_000016.9:g.2103394C>T' +p190 +sg34 +(dp191 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p192 +sg42 +g43 +ssssg60 +(dp193 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1' +p194 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1' +p195 +sssS'NM_001077183.1:c.277C>T' +p196 +(dp197 +g3 +g4 +sg5 +(lp198 +S'A more recent version of the selected reference sequence NM_001077183.1 is available (NM_001077183.2)' +p199 +aS'NM_001077183.2:c.277C>T MUST be fully validated prior to use in reports' +p200 +aS'select_variants=NM_001077183.2:c.277C>T' +p201 +aS'RefSeqGene record not available' +p202 +asg8 +g4 +sg9 +(lp203 +sg11 +VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 4, mRNA +p204 +sg13 +S'TSC2' +p205 +sg15 +(dp206 +g17 +S'NP_001070651.1:p.(Arg93Trp)' +p207 +sg19 +S'NP_001070651.1:p.(R93W)' +p208 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001077183.1:c.277C>T' +p209 +sg27 +g4 +sg28 +(dp210 +S'hg19' +p211 +(dp212 +g32 +S'NC_000016.9:g.2103394C>T' +p213 +sg34 +(dp214 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p215 +sg42 +g43 +sssS'grch37' +p216 +(dp217 +g32 +S'NC_000016.9:g.2103394C>T' +p218 +sg34 +(dp219 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p220 +sg42 +g43 +ssssg60 +(dp221 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1' +p222 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1' +p223 +sssS'NM_001318827.1:c.226-903C>T' +p224 +(dp225 +g3 +g4 +sg5 +(lp226 +S'RefSeqGene record not available' +p227 +asg8 +g4 +sg9 +(lp228 +sg11 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA +p229 +sg13 +S'TSC2' +p230 +sg15 +(dp231 +g17 +S'NP_001305756.1:p.?' +p232 +sg19 +S'NP_001305756.1:p.?' +p233 +ssg21 +g22 +sg23 +S'NC_000016.9(NM_001318827.1):c.226-903C>T' +p234 +sg24 +g4 +sg25 +S'NM_001318827.1:c.226-903C>T' +p235 +sg27 +g4 +sg28 +(dp236 +S'grch38' +p237 +(dp238 +g32 +S'NC_000016.10:g.2053393C>T' +p239 +sg34 +(dp240 +g36 +g37 +sg38 +g39 +sg40 +S'2053393' +p241 +sg42 +g43 +sssS'grch37' +p242 +(dp243 +g32 +S'NC_000016.9:g.2103394C>T' +p244 +sg34 +(dp245 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p246 +sg42 +g43 +sssg49 +(dp247 +g32 +S'NC_000016.10:g.2053393C>T' +p248 +sg34 +(dp249 +g36 +g53 +sg38 +g39 +sg40 +S'2053393' +p250 +sg42 +g43 +sssS'hg19' +p251 +(dp252 +g32 +S'NC_000016.9:g.2103394C>T' +p253 +sg34 +(dp254 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p255 +sg42 +g43 +ssssg60 +(dp256 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1' +p257 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1' +p258 +sssS'flag' +p259 +S'gene_variant' +p260 +sS'NM_001114382.2:c.277C>T' +p261 +(dp262 +g3 +g4 +sg5 +(lp263 +S'RefSeqGene record not available' +p264 +asg8 +g4 +sg9 +(lp265 +sg11 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA +p266 +sg13 +S'TSC2' +p267 +sg15 +(dp268 +g17 +S'NP_001107854.1:p.(Arg93Trp)' +p269 +sg19 +S'NP_001107854.1:p.(R93W)' +p270 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001114382.2:c.277C>T' +p271 +sg27 +g4 +sg28 +(dp272 +S'grch38' +p273 +(dp274 +g32 +S'NC_000016.10:g.2053393C>T' +p275 +sg34 +(dp276 +g36 +g37 +sg38 +g39 +sg40 +S'2053393' +p277 +sg42 +g43 +sssS'grch37' +p278 +(dp279 +g32 +S'NC_000016.9:g.2103394C>T' +p280 +sg34 +(dp281 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p282 +sg42 +g43 +sssg49 +(dp283 +g32 +S'NC_000016.10:g.2053393C>T' +p284 +sg34 +(dp285 +g36 +g53 +sg38 +g39 +sg40 +S'2053393' +p286 +sg42 +g43 +sssS'hg19' +p287 +(dp288 +g32 +S'NC_000016.9:g.2103394C>T' +p289 +sg34 +(dp290 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p291 +sg42 +g43 +ssssg60 +(dp292 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1' +p293 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2' +p294 +sssS'NM_001363528.1:c.277C>T' +p295 +(dp296 +g3 +g4 +sg5 +(lp297 +S'RefSeqGene record not available' +p298 +asg8 +g4 +sg9 +(lp299 +sg11 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 10, mRNA +p300 +sg13 +S'TSC2' +p301 +sg15 +(dp302 +g17 +S'NP_001350457.1:p.(Arg93Trp)' +p303 +sg19 +S'NP_001350457.1:p.(R93W)' +p304 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001363528.1:c.277C>T' +p305 +sg27 +g4 +sg28 +(dp306 +S'hg19' +p307 +(dp308 +g32 +S'NC_000016.9:g.2103394C>T' +p309 +sg34 +(dp310 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p311 +sg42 +g43 +sssS'grch37' +p312 +(dp313 +g32 +S'NC_000016.9:g.2103394C>T' +p314 +sg34 +(dp315 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p316 +sg42 +g43 +ssssg60 +(dp317 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1' +p318 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1' +p319 +sssS'NM_021055.2:c.277C>T' +p320 +(dp321 +g3 +g4 +sg5 +(lp322 +S'RefSeqGene record not available' +p323 +asg8 +g4 +sg9 +(lp324 +sg11 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 2, mRNA +p325 +sg13 +S'TSC2' +p326 +sg15 +(dp327 +g17 +S'NP_066399.2:p.(Arg93Trp)' +p328 +sg19 +S'NP_066399.2:p.(R93W)' +p329 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_021055.2:c.277C>T' +p330 +sg27 +g4 +sg28 +(dp331 +S'hg19' +p332 +(dp333 +g32 +S'NC_000016.9:g.2103394C>T' +p334 +sg34 +(dp335 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p336 +sg42 +g43 +sssS'grch37' +p337 +(dp338 +g32 +S'NC_000016.9:g.2103394C>T' +p339 +sg34 +(dp340 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p341 +sg42 +g43 +ssssg60 +(dp342 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2' +p343 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2' +p344 +sssS'NM_000548.3:c.277C>T' +p345 +(dp346 +g3 +g4 +sg5 +(lp347 +S'A more recent version of the selected reference sequence NM_000548.3 is available (NM_000548.4)' +p348 +aS'NM_000548.4:c.277C>T MUST be fully validated prior to use in reports' +p349 +aS'select_variants=NM_000548.4:c.277C>T' +p350 +aS'RefSeqGene record not available' +p351 +asg8 +g4 +sg9 +(lp352 +sg11 +VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 1, mRNA +p353 +sg13 +S'TSC2' +p354 +sg15 +(dp355 +g17 +S'NP_000539.2:p.(Arg93Trp)' +p356 +sg19 +S'NP_000539.2:p.(R93W)' +p357 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_000548.3:c.277C>T' +p358 +sg27 +g4 +sg28 +(dp359 +S'hg19' +p360 +(dp361 +g32 +S'NC_000016.9:g.2103394C>T' +p362 +sg34 +(dp363 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p364 +sg42 +g43 +sssS'grch37' +p365 +(dp366 +g32 +S'NC_000016.9:g.2103394C>T' +p367 +sg34 +(dp368 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p369 +sg42 +g43 +ssssg60 +(dp370 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2' +p371 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3' +p372 +sssS'NM_001318831.1:c.-1-2803C>T' +p373 +(dp374 +g3 +g4 +sg5 +(lp375 +S'RefSeqGene record not available' +p376 +asg8 +g4 +sg9 +(lp377 +sg11 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA +p378 +sg13 +S'TSC2' +p379 +sg15 +(dp380 +g17 +S'NP_001305760.1:p.?' +p381 +sg19 +S'NP_001305760.1:p.?' +p382 +ssg21 +g22 +sg23 +S'NC_000016.9(NM_001318831.1):c.-1-2803C>T' +p383 +sg24 +g4 +sg25 +S'NM_001318831.1:c.-1-2803C>T' +p384 +sg27 +g4 +sg28 +(dp385 +S'grch38' +p386 +(dp387 +g32 +S'NC_000016.10:g.2053393C>T' +p388 +sg34 +(dp389 +g36 +g37 +sg38 +g39 +sg40 +S'2053393' +p390 +sg42 +g43 +sssS'grch37' +p391 +(dp392 +g32 +S'NC_000016.9:g.2103394C>T' +p393 +sg34 +(dp394 +g36 +g37 +sg38 +g39 +sg40 +S'2103394' +p395 +sg42 +g43 +sssg49 +(dp396 +g32 +S'NC_000016.10:g.2053393C>T' +p397 +sg34 +(dp398 +g36 +g53 +sg38 +g39 +sg40 +S'2053393' +p399 +sg42 +g43 +sssS'hg19' +p400 +(dp401 +g32 +S'NC_000016.9:g.2103394C>T' +p402 +sg34 +(dp403 +g36 +g53 +sg38 +g39 +sg40 +S'2103394' +p404 +sg42 +g43 +ssssg60 +(dp405 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305760.1' +p406 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318831.1' +p407 +sssS'metadata' +p408 +(dp409 +S'variantvalidator_hgvs_version' +p410 +S'1.1.3' +p411 +sS'uta_schema' +p412 +S'uta_20180821' +p413 +sS'seqrepo_db' +p414 +S'2018-08-21' +p415 +sS'variantvalidator_version' +p416 +S'v0.2' +p417 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant234.txt b/VariantValidator/testing/testOutputsMasterITS/variant234.txt new file mode 100644 index 00000000..3c5da003 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant234.txt @@ -0,0 +1,286 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_001079846.1:c.5634G>C' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens CREB binding protein (CREBBP), transcript variant 2, mRNA +p14 +sS'gene_symbol' +p15 +S'CREBBP' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_001073315.1:p.(Met1878Ile)' +p20 +sS'slr' +p21 +S'NP_001073315.1:p.(M1878I)' +p22 +ssS'submitted_variant' +p23 +S'16-3779300-C-G' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_001079846.1:c.5634G>C' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'grch38' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000016.10:g.3729299C>G' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'16' +p39 +sS'ref' +p40 +VC +p41 +sS'pos' +p42 +S'3729299' +p43 +sS'alt' +p44 +VG +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000016.9:g.3779300C>G' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'3779300' +p50 +sg44 +g45 +sssS'hg38' +p51 +(dp52 +g34 +S'NC_000016.10:g.3729299C>G' +p53 +sg36 +(dp54 +g38 +S'chr16' +p55 +sg40 +g41 +sg42 +S'3729299' +p56 +sg44 +g45 +sssS'hg19' +p57 +(dp58 +g34 +S'NC_000016.9:g.3779300C>G' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'3779300' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073315.1' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001079846.1' +p67 +sssS'NM_004380.2:c.5748G>C' +p68 +(dp69 +g5 +g6 +sg7 +(lp70 +S'RefSeqGene record not available' +p71 +asg10 +g6 +sg11 +(lp72 +sg13 +VHomo sapiens CREB binding protein (CREBBP), transcript variant 1, mRNA +p73 +sg15 +S'CREBBP' +p74 +sg17 +(dp75 +g19 +S'NP_004371.2:p.(Met1916Ile)' +p76 +sg21 +S'NP_004371.2:p.(M1916I)' +p77 +ssg23 +g24 +sg25 +g6 +sg26 +g6 +sg27 +S'NM_004380.2:c.5748G>C' +p78 +sg29 +g6 +sg30 +(dp79 +S'grch38' +p80 +(dp81 +g34 +S'NC_000016.10:g.3729299C>G' +p82 +sg36 +(dp83 +g38 +g39 +sg40 +g41 +sg42 +S'3729299' +p84 +sg44 +g45 +sssS'grch37' +p85 +(dp86 +g34 +S'NC_000016.9:g.3779300C>G' +p87 +sg36 +(dp88 +g38 +g39 +sg40 +g41 +sg42 +S'3779300' +p89 +sg44 +g45 +sssg51 +(dp90 +g34 +S'NC_000016.10:g.3729299C>G' +p91 +sg36 +(dp92 +g38 +g55 +sg40 +g41 +sg42 +S'3729299' +p93 +sg44 +g45 +sssS'hg19' +p94 +(dp95 +g34 +S'NC_000016.9:g.3779300C>G' +p96 +sg36 +(dp97 +g38 +g55 +sg40 +g41 +sg42 +S'3779300' +p98 +sg44 +g45 +ssssg62 +(dp99 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004371.2' +p100 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004380.2' +p101 +sssS'metadata' +p102 +(dp103 +S'variantvalidator_hgvs_version' +p104 +S'1.1.3' +p105 +sS'uta_schema' +p106 +S'uta_20180821' +p107 +sS'seqrepo_db' +p108 +S'2018-08-21' +p109 +sS'variantvalidator_version' +p110 +S'v0.2' +p111 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant235.txt b/VariantValidator/testing/testOutputsMasterITS/variant235.txt new file mode 100644 index 00000000..b637011e --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant235.txt @@ -0,0 +1,286 @@ +(dp0 +S'NM_019109.4:c.826C>G' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens ALG1, chitobiosyldiphosphodolichol beta-mannosyltransferase (ALG1), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'ALG1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_061982.3:p.(Arg276Gly)' +p18 +sS'slr' +p19 +S'NP_061982.3:p.(R276G)' +p20 +ssS'submitted_variant' +p21 +S'16-5128843-C-G' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_019109.4:c.826C>G' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'grch38' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000016.10:g.5078842C>G' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'16' +p37 +sS'ref' +p38 +S'C' +p39 +sS'pos' +p40 +S'5078842' +p41 +sS'alt' +p42 +S'G' +p43 +sssS'grch37' +p44 +(dp45 +g32 +S'NC_000016.9:g.5128843C>G' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'5128843' +p48 +sg42 +g43 +sssS'hg38' +p49 +(dp50 +g32 +S'NC_000016.10:g.5078842C>G' +p51 +sg34 +(dp52 +g36 +S'chr16' +p53 +sg38 +g39 +sg40 +S'5078842' +p54 +sg42 +g43 +sssS'hg19' +p55 +(dp56 +g32 +S'NC_000016.9:g.5128843C>G' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'5128843' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061982.3' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_019109.4' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ssS'NM_001330504.1:c.493C>G' +p78 +(dp79 +g3 +g4 +sg5 +(lp80 +S'RefSeqGene record not available' +p81 +asg8 +g4 +sg9 +(lp82 +sg11 +VHomo sapiens ALG1, chitobiosyldiphosphodolichol beta-mannosyltransferase (ALG1), transcript variant 2, mRNA +p83 +sg13 +S'ALG1' +p84 +sg15 +(dp85 +g17 +S'NP_001317433.1:p.(Arg165Gly)' +p86 +sg19 +S'NP_001317433.1:p.(R165G)' +p87 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330504.1:c.493C>G' +p88 +sg27 +g4 +sg28 +(dp89 +S'grch38' +p90 +(dp91 +g32 +S'NC_000016.10:g.5078842C>G' +p92 +sg34 +(dp93 +g36 +g37 +sg38 +g39 +sg40 +S'5078842' +p94 +sg42 +g43 +sssS'grch37' +p95 +(dp96 +g32 +S'NC_000016.9:g.5128843C>G' +p97 +sg34 +(dp98 +g36 +g37 +sg38 +g39 +sg40 +S'5128843' +p99 +sg42 +g43 +sssg49 +(dp100 +g32 +S'NC_000016.10:g.5078842C>G' +p101 +sg34 +(dp102 +g36 +g53 +sg38 +g39 +sg40 +S'5078842' +p103 +sg42 +g43 +sssS'hg19' +p104 +(dp105 +g32 +S'NC_000016.9:g.5128843C>G' +p106 +sg34 +(dp107 +g36 +g53 +sg38 +g39 +sg40 +S'5128843' +p108 +sg42 +g43 +ssssg60 +(dp109 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317433.1' +p110 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330504.1' +p111 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant236.txt b/VariantValidator/testing/testOutputsMasterITS/variant236.txt new file mode 100644 index 00000000..d1db4531 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant236.txt @@ -0,0 +1,171 @@ +(dp0 +S'NM_024306.4:c.95G>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens fatty acid 2-hydroxylase (FA2H), mRNA +p12 +sS'gene_symbol' +p13 +S'FA2H' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_077282.3:p.(Arg32His)' +p18 +sS'slr' +p19 +S'NP_077282.3:p.(R32H)' +p20 +ssS'submitted_variant' +p21 +S'16-74808559-C-T' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_024306.4:c.95G>A' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'grch38' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000016.10:g.74774661C>T' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'16' +p37 +sS'ref' +p38 +VC +p39 +sS'pos' +p40 +S'74774661' +p41 +sS'alt' +p42 +VT +p43 +sssS'grch37' +p44 +(dp45 +g32 +S'NC_000016.9:g.74808559C>T' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'74808559' +p48 +sg42 +g43 +sssS'hg38' +p49 +(dp50 +g32 +S'NC_000016.10:g.74774661C>T' +p51 +sg34 +(dp52 +g36 +S'chr16' +p53 +sg38 +g39 +sg40 +S'74774661' +p54 +sg42 +g43 +sssS'hg19' +p55 +(dp56 +g32 +S'NC_000016.9:g.74808559C>T' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'74808559' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_077282.3' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024306.4' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant237.txt b/VariantValidator/testing/testOutputsMasterITS/variant237.txt new file mode 100644 index 00000000..31669e82 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant237.txt @@ -0,0 +1,368 @@ +(dp0 +S'NM_003119.3:c.-22C>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'SPG7' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_003110.1:p.?' +p18 +sS'slr' +p19 +S'NP_003110.1:p.?' +p20 +ssS'submitted_variant' +p21 +S'16-89574804-C-A' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_003119.3:c.-22C>A' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'grch38' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000016.10:g.89508396C>A' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'16' +p37 +sS'ref' +p38 +S'C' +p39 +sS'pos' +p40 +S'89508396' +p41 +sS'alt' +p42 +S'A' +p43 +sssS'grch37' +p44 +(dp45 +g32 +S'NC_000016.9:g.89574804C>A' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'89574804' +p48 +sg42 +g43 +sssS'hg38' +p49 +(dp50 +g32 +S'NC_000016.10:g.89508396C>A' +p51 +sg34 +(dp52 +g36 +S'chr16' +p53 +sg38 +g39 +sg40 +S'89508396' +p54 +sg42 +g43 +sssS'hg19' +p55 +(dp56 +g32 +S'NC_000016.9:g.89574804C>A' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'89574804' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'NM_199367.2:c.-22C>A' +p68 +(dp69 +g3 +g4 +sg5 +(lp70 +S'RefSeqGene record not available' +p71 +asg8 +g4 +sg9 +(lp72 +sg11 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p73 +sg13 +S'SPG7' +p74 +sg15 +(dp75 +g17 +S'NP_955399.1:p.?' +p76 +sg19 +S'NP_955399.1:p.?' +p77 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_199367.2:c.-22C>A' +p78 +sg27 +g4 +sg28 +(dp79 +S'grch38' +p80 +(dp81 +g32 +S'NC_000016.10:g.89508396C>A' +p82 +sg34 +(dp83 +g36 +g37 +sg38 +g39 +sg40 +S'89508396' +p84 +sg42 +g43 +sssS'grch37' +p85 +(dp86 +g32 +S'NC_000016.9:g.89574804C>A' +p87 +sg34 +(dp88 +g36 +g37 +sg38 +g39 +sg40 +S'89574804' +p89 +sg42 +g43 +sssg49 +(dp90 +g32 +S'NC_000016.10:g.89508396C>A' +p91 +sg34 +(dp92 +g36 +g53 +sg38 +g39 +sg40 +S'89508396' +p93 +sg42 +g43 +sssS'hg19' +p94 +(dp95 +g32 +S'NC_000016.9:g.89574804C>A' +p96 +sg34 +(dp97 +g36 +g53 +sg38 +g39 +sg40 +S'89574804' +p98 +sg42 +g43 +ssssg60 +(dp99 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p100 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p101 +sssS'NM_001363850.1:c.-22C>A' +p102 +(dp103 +g3 +g4 +sg5 +(lp104 +S'RefSeqGene record not available' +p105 +asg8 +g4 +sg9 +(lp106 +sg11 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p107 +sg13 +S'SPG7' +p108 +sg15 +(dp109 +g17 +S'NP_001350779.1:p.?' +p110 +sg19 +S'NP_001350779.1:p.?' +p111 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001363850.1:c.-22C>A' +p112 +sg27 +g4 +sg28 +(dp113 +S'hg19' +p114 +(dp115 +g32 +S'NC_000016.9:g.89574804C>A' +p116 +sg34 +(dp117 +g36 +g53 +sg38 +g39 +sg40 +S'89574804' +p118 +sg42 +g43 +sssS'grch37' +p119 +(dp120 +g32 +S'NC_000016.9:g.89574804C>A' +p121 +sg34 +(dp122 +g36 +g37 +sg38 +g39 +sg40 +S'89574804' +p123 +sg42 +g43 +ssssg60 +(dp124 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p125 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p126 +sssS'metadata' +p127 +(dp128 +S'variantvalidator_hgvs_version' +p129 +S'1.1.3' +p130 +sS'uta_schema' +p131 +S'uta_20180821' +p132 +sS'seqrepo_db' +p133 +S'2018-08-21' +p134 +sS'variantvalidator_version' +p135 +S'v0.2' +p136 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant238.txt b/VariantValidator/testing/testOutputsMasterITS/variant238.txt new file mode 100644 index 00000000..756d1d36 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant238.txt @@ -0,0 +1,544 @@ +(dp0 +S'NM_003119.2:c.1A>C' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p7 +aS'NM_003119.3:c.1A>C MUST be fully validated prior to use in reports' +p8 +aS'select_variants=NM_003119.3:c.1A>C' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g4 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p15 +sS'gene_symbol' +p16 +S'SPG7' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_003110.1:p.(Met1?)' +p21 +sS'slr' +p22 +S'NP_003110.1:p.(M1?)' +p23 +ssS'submitted_variant' +p24 +S'16-89574826-A-C' +p25 +sS'genome_context_intronic_sequence' +p26 +g4 +sS'hgvs_lrg_variant' +p27 +g4 +sS'hgvs_transcript_variant' +p28 +S'NM_003119.2:c.1A>C' +p29 +sS'hgvs_refseqgene_variant' +p30 +g4 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000016.9:g.89574826A>C' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr16' +p40 +sS'ref' +p41 +S'A' +p42 +sS'pos' +p43 +S'89574826' +p44 +sS'alt' +p45 +S'C' +p46 +sssS'grch37' +p47 +(dp48 +g35 +S'NC_000016.9:g.89574826A>C' +p49 +sg37 +(dp50 +g39 +S'16' +p51 +sg41 +g42 +sg43 +S'89574826' +p52 +sg45 +g46 +ssssS'reference_sequence_records' +p53 +(dp54 +S'protein' +p55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p56 +sS'transcript' +p57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p58 +sssS'NM_199367.1:c.1A>C' +p59 +(dp60 +g3 +g4 +sg5 +(lp61 +S'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' +p62 +aS'NM_199367.2:c.1A>C MUST be fully validated prior to use in reports' +p63 +aS'select_variants=NM_199367.2:c.1A>C' +p64 +aS'RefSeqGene record not available' +p65 +asg11 +g4 +sg12 +(lp66 +sg14 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA +p67 +sg16 +S'SPG7' +p68 +sg18 +(dp69 +g20 +S'NP_955399.1:p.(Met1?)' +p70 +sg22 +S'NP_955399.1:p.(M1?)' +p71 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_199367.1:c.1A>C' +p72 +sg30 +g4 +sg31 +(dp73 +S'hg19' +p74 +(dp75 +g35 +S'NC_000016.9:g.89574826A>C' +p76 +sg37 +(dp77 +g39 +g40 +sg41 +g42 +sg43 +S'89574826' +p78 +sg45 +g46 +sssS'grch37' +p79 +(dp80 +g35 +S'NC_000016.9:g.89574826A>C' +p81 +sg37 +(dp82 +g39 +g51 +sg41 +g42 +sg43 +S'89574826' +p83 +sg45 +g46 +ssssg53 +(dp84 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p85 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' +p86 +sssS'NM_001363850.1:c.1A>C' +p87 +(dp88 +g3 +g4 +sg5 +(lp89 +S'RefSeqGene record not available' +p90 +asg11 +g4 +sg12 +(lp91 +sg14 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p92 +sg16 +S'SPG7' +p93 +sg18 +(dp94 +g20 +S'NP_001350779.1:p.(Met1?)' +p95 +sg22 +S'NP_001350779.1:p.(M1?)' +p96 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_001363850.1:c.1A>C' +p97 +sg30 +g4 +sg31 +(dp98 +S'hg19' +p99 +(dp100 +g35 +S'NC_000016.9:g.89574826A>C' +p101 +sg37 +(dp102 +g39 +g40 +sg41 +g42 +sg43 +S'89574826' +p103 +sg45 +g46 +sssS'grch37' +p104 +(dp105 +g35 +S'NC_000016.9:g.89574826A>C' +p106 +sg37 +(dp107 +g39 +g51 +sg41 +g42 +sg43 +S'89574826' +p108 +sg45 +g46 +ssssg53 +(dp109 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p110 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p111 +sssS'NM_199367.2:c.1A>C' +p112 +(dp113 +g3 +g4 +sg5 +(lp114 +S'RefSeqGene record not available' +p115 +asg11 +g4 +sg12 +(lp116 +sg14 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p117 +sg16 +S'SPG7' +p118 +sg18 +(dp119 +g20 +S'NP_955399.1:p.(Met1?)' +p120 +sg22 +S'NP_955399.1:p.(M1?)' +p121 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_199367.2:c.1A>C' +p122 +sg30 +g4 +sg31 +(dp123 +S'grch38' +p124 +(dp125 +g35 +S'NC_000016.10:g.89508418A>C' +p126 +sg37 +(dp127 +g39 +g51 +sg41 +g42 +sg43 +S'89508418' +p128 +sg45 +g46 +sssS'grch37' +p129 +(dp130 +g35 +S'NC_000016.9:g.89574826A>C' +p131 +sg37 +(dp132 +g39 +g51 +sg41 +g42 +sg43 +S'89574826' +p133 +sg45 +g46 +sssS'hg38' +p134 +(dp135 +g35 +S'NC_000016.10:g.89508418A>C' +p136 +sg37 +(dp137 +g39 +g40 +sg41 +g42 +sg43 +S'89508418' +p138 +sg45 +g46 +sssS'hg19' +p139 +(dp140 +g35 +S'NC_000016.9:g.89574826A>C' +p141 +sg37 +(dp142 +g39 +g40 +sg41 +g42 +sg43 +S'89574826' +p143 +sg45 +g46 +ssssg53 +(dp144 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p145 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p146 +sssS'flag' +p147 +S'gene_variant' +p148 +sS'NM_003119.3:c.1A>C' +p149 +(dp150 +g3 +g4 +sg5 +(lp151 +S'RefSeqGene record not available' +p152 +asg11 +g4 +sg12 +(lp153 +sg14 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p154 +sg16 +S'SPG7' +p155 +sg18 +(dp156 +g20 +S'NP_003110.1:p.(Met1?)' +p157 +sg22 +S'NP_003110.1:p.(M1?)' +p158 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_003119.3:c.1A>C' +p159 +sg30 +g4 +sg31 +(dp160 +S'grch38' +p161 +(dp162 +g35 +S'NC_000016.10:g.89508418A>C' +p163 +sg37 +(dp164 +g39 +g51 +sg41 +g42 +sg43 +S'89508418' +p165 +sg45 +g46 +sssS'grch37' +p166 +(dp167 +g35 +S'NC_000016.9:g.89574826A>C' +p168 +sg37 +(dp169 +g39 +g51 +sg41 +g42 +sg43 +S'89574826' +p170 +sg45 +g46 +sssg134 +(dp171 +g35 +S'NC_000016.10:g.89508418A>C' +p172 +sg37 +(dp173 +g39 +g40 +sg41 +g42 +sg43 +S'89508418' +p174 +sg45 +g46 +sssS'hg19' +p175 +(dp176 +g35 +S'NC_000016.9:g.89574826A>C' +p177 +sg37 +(dp178 +g39 +g40 +sg41 +g42 +sg43 +S'89574826' +p179 +sg45 +g46 +ssssg53 +(dp180 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p181 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p182 +sssS'metadata' +p183 +(dp184 +S'variantvalidator_hgvs_version' +p185 +S'1.1.3' +p186 +sS'uta_schema' +p187 +S'uta_20180821' +p188 +sS'seqrepo_db' +p189 +S'2018-08-21' +p190 +sS'variantvalidator_version' +p191 +S'v0.2' +p192 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant239.txt b/VariantValidator/testing/testOutputsMasterITS/variant239.txt new file mode 100644 index 00000000..f9178588 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant239.txt @@ -0,0 +1,567 @@ +(dp0 +S'NM_001363850.1:c.90dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p13 +sS'gene_symbol' +p14 +S'SPG7' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001350779.1:p.(Pro31SerfsTer43)' +p19 +sS'slr' +p20 +S'NP_001350779.1:p.(P31Sfs*43)' +p21 +ssS'submitted_variant' +p22 +S'16-89574914-G-GT' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001363850.1:c.90dup' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000016.9:g.89574915dup' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr16' +p38 +sS'ref' +p39 +S'T' +p40 +sS'pos' +p41 +S'89574915' +p42 +sS'alt' +p43 +S'TT' +p44 +sssS'grch37' +p45 +(dp46 +g33 +S'NC_000016.9:g.89574915dup' +p47 +sg35 +(dp48 +g37 +S'16' +p49 +sg39 +g40 +sg41 +S'89574915' +p50 +sg43 +S'TT' +p51 +ssssS'reference_sequence_records' +p52 +(dp53 +S'protein' +p54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p55 +sS'transcript' +p56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p57 +sssS'NM_199367.1:c.90dup' +p58 +(dp59 +g3 +g4 +sg5 +(lp60 +S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' +p61 +aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' +p62 +aS'NM_199367.2:c.90dupT MUST be fully validated prior to use in reports' +p63 +aS'select_variants=NM_199367.2:c.90dup' +p64 +aS'RefSeqGene record not available' +p65 +asg9 +g4 +sg10 +(lp66 +sg12 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA +p67 +sg14 +S'SPG7' +p68 +sg16 +(dp69 +g18 +S'NP_955399.1:p.(Pro31SerfsTer43)' +p70 +sg20 +S'NP_955399.1:p.(P31Sfs*43)' +p71 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_199367.1:c.90dup' +p72 +sg28 +g4 +sg29 +(dp73 +S'hg19' +p74 +(dp75 +g33 +S'NC_000016.9:g.89574915dup' +p76 +sg35 +(dp77 +g37 +g38 +sg39 +g40 +sg41 +S'89574915' +p78 +sg43 +S'TT' +p79 +sssS'grch37' +p80 +(dp81 +g33 +S'NC_000016.9:g.89574915dup' +p82 +sg35 +(dp83 +g37 +g49 +sg39 +g40 +sg41 +S'89574915' +p84 +sg43 +S'TT' +p85 +ssssg52 +(dp86 +g54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p87 +sg56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' +p88 +sssS'NM_003119.2:c.90dup' +p89 +(dp90 +g3 +g4 +sg5 +(lp91 +S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' +p92 +aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p93 +aS'NM_003119.3:c.90dupT MUST be fully validated prior to use in reports' +p94 +aS'select_variants=NM_003119.3:c.90dup' +p95 +aS'RefSeqGene record not available' +p96 +asg9 +g4 +sg10 +(lp97 +sg12 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p98 +sg14 +S'SPG7' +p99 +sg16 +(dp100 +g18 +S'NP_003110.1:p.(Pro31SerfsTer43)' +p101 +sg20 +S'NP_003110.1:p.(P31Sfs*43)' +p102 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_003119.2:c.90dup' +p103 +sg28 +g4 +sg29 +(dp104 +S'hg19' +p105 +(dp106 +g33 +S'NC_000016.9:g.89574915dup' +p107 +sg35 +(dp108 +g37 +g38 +sg39 +g40 +sg41 +S'89574915' +p109 +sg43 +S'TT' +p110 +sssS'grch37' +p111 +(dp112 +g33 +S'NC_000016.9:g.89574915dup' +p113 +sg35 +(dp114 +g37 +g49 +sg39 +g40 +sg41 +S'89574915' +p115 +sg43 +S'TT' +p116 +ssssg52 +(dp117 +g54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p118 +sg56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p119 +sssS'NM_199367.2:c.90dup' +p120 +(dp121 +g3 +g4 +sg5 +(lp122 +S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' +p123 +aS'RefSeqGene record not available' +p124 +asg9 +g4 +sg10 +(lp125 +sg12 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p126 +sg14 +S'SPG7' +p127 +sg16 +(dp128 +g18 +S'NP_955399.1:p.(Pro31SerfsTer43)' +p129 +sg20 +S'NP_955399.1:p.(P31Sfs*43)' +p130 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_199367.2:c.90dup' +p131 +sg28 +g4 +sg29 +(dp132 +S'grch38' +p133 +(dp134 +g33 +S'NC_000016.10:g.89508507dup' +p135 +sg35 +(dp136 +g37 +g49 +sg39 +g40 +sg41 +S'89508507' +p137 +sg43 +S'TT' +p138 +sssS'grch37' +p139 +(dp140 +g33 +S'NC_000016.9:g.89574915dup' +p141 +sg35 +(dp142 +g37 +g49 +sg39 +g40 +sg41 +S'89574915' +p143 +sg43 +S'TT' +p144 +sssS'hg38' +p145 +(dp146 +g33 +S'NC_000016.10:g.89508507dup' +p147 +sg35 +(dp148 +g37 +g38 +sg39 +g40 +sg41 +S'89508507' +p149 +sg43 +S'TT' +p150 +sssS'hg19' +p151 +(dp152 +g33 +S'NC_000016.9:g.89574915dup' +p153 +sg35 +(dp154 +g37 +g38 +sg39 +g40 +sg41 +S'89574915' +p155 +sg43 +S'TT' +p156 +ssssg52 +(dp157 +g54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p158 +sg56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p159 +sssS'flag' +p160 +S'gene_variant' +p161 +sS'NM_003119.3:c.90dup' +p162 +(dp163 +g3 +g4 +sg5 +(lp164 +S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' +p165 +aS'RefSeqGene record not available' +p166 +asg9 +g4 +sg10 +(lp167 +sg12 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p168 +sg14 +S'SPG7' +p169 +sg16 +(dp170 +g18 +S'NP_003110.1:p.(Pro31SerfsTer43)' +p171 +sg20 +S'NP_003110.1:p.(P31Sfs*43)' +p172 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_003119.3:c.90dup' +p173 +sg28 +g4 +sg29 +(dp174 +S'grch38' +p175 +(dp176 +g33 +S'NC_000016.10:g.89508507dup' +p177 +sg35 +(dp178 +g37 +g49 +sg39 +g40 +sg41 +S'89508507' +p179 +sg43 +S'TT' +p180 +sssS'grch37' +p181 +(dp182 +g33 +S'NC_000016.9:g.89574915dup' +p183 +sg35 +(dp184 +g37 +g49 +sg39 +g40 +sg41 +S'89574915' +p185 +sg43 +S'TT' +p186 +sssg145 +(dp187 +g33 +S'NC_000016.10:g.89508507dup' +p188 +sg35 +(dp189 +g37 +g38 +sg39 +g40 +sg41 +S'89508507' +p190 +sg43 +S'TT' +p191 +sssS'hg19' +p192 +(dp193 +g33 +S'NC_000016.9:g.89574915dup' +p194 +sg35 +(dp195 +g37 +g38 +sg39 +g40 +sg41 +S'89574915' +p196 +sg43 +S'TT' +p197 +ssssg52 +(dp198 +g54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p199 +sg56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p200 +sssS'metadata' +p201 +(dp202 +S'variantvalidator_hgvs_version' +p203 +S'1.1.3' +p204 +sS'uta_schema' +p205 +S'uta_20180821' +p206 +sS'seqrepo_db' +p207 +S'2018-08-21' +p208 +sS'variantvalidator_version' +p209 +S'v0.2' +p210 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant24.txt b/VariantValidator/testing/testOutputsMasterITS/variant24.txt new file mode 100644 index 00000000..1e278cbd --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant24.txt @@ -0,0 +1,82 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' +p7 +aS'Instead use NC_000011.9:g.5244828_5248381=' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +g4 +sS'gene_symbol' +p13 +g4 +sS'hgvs_predicted_protein_consequence' +p14 +(dp15 +S'tlr' +p16 +g4 +sS'slr' +p17 +g4 +ssS'submitted_variant' +p18 +S'NM_000518.4:c.-130_*2000=' +p19 +sS'genome_context_intronic_sequence' +p20 +g4 +sS'hgvs_lrg_variant' +p21 +g4 +sS'hgvs_transcript_variant' +p22 +g4 +sS'hgvs_refseqgene_variant' +p23 +g4 +sS'primary_assembly_loci' +p24 +(dp25 +sS'reference_sequence_records' +p26 +g4 +ssS'flag' +p27 +S'warning' +p28 +sS'metadata' +p29 +(dp30 +S'variantvalidator_hgvs_version' +p31 +S'1.1.3' +p32 +sS'uta_schema' +p33 +S'uta_20180821' +p34 +sS'seqrepo_db' +p35 +S'2018-08-21' +p36 +sS'variantvalidator_version' +p37 +S'v0.2' +p38 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant240.txt b/VariantValidator/testing/testOutputsMasterITS/variant240.txt new file mode 100644 index 00000000..3f8bc1a3 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant240.txt @@ -0,0 +1,580 @@ +(dp0 +S'NM_199367.2:c.89_91dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p13 +sS'gene_symbol' +p14 +S'SPG7' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_955399.1:p.(Ser30_Pro31insArg)' +p19 +sS'slr' +p20 +S'NP_955399.1:p.(S30_P31insR)' +p21 +ssS'submitted_variant' +p22 +S'16-89574916-C-CGTC' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_199367.2:c.89_91dup' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'grch38' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000016.10:g.89508506_89508508dup' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'16' +p38 +sS'ref' +p39 +S'GTC' +p40 +sS'pos' +p41 +S'89508506' +p42 +sS'alt' +p43 +S'GTCGTC' +p44 +sssS'grch37' +p45 +(dp46 +g33 +S'NC_000016.9:g.89574914_89574916dup' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'GTC' +p49 +sg41 +S'89574914' +p50 +sg43 +S'GTCGTC' +p51 +sssS'hg38' +p52 +(dp53 +g33 +S'NC_000016.10:g.89508506_89508508dup' +p54 +sg35 +(dp55 +g37 +S'chr16' +p56 +sg39 +S'GTC' +p57 +sg41 +S'89508506' +p58 +sg43 +S'GTCGTC' +p59 +sssS'hg19' +p60 +(dp61 +g33 +S'NC_000016.9:g.89574914_89574916dup' +p62 +sg35 +(dp63 +g37 +g56 +sg39 +S'GTC' +p64 +sg41 +S'89574914' +p65 +sg43 +S'GTCGTC' +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p72 +sssS'NM_003119.3:c.89_91dup' +p73 +(dp74 +g3 +g4 +sg5 +(lp75 +S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' +p76 +aS'RefSeqGene record not available' +p77 +asg9 +g4 +sg10 +(lp78 +sg12 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p79 +sg14 +S'SPG7' +p80 +sg16 +(dp81 +g18 +S'NP_003110.1:p.(Ser30_Pro31insArg)' +p82 +sg20 +S'NP_003110.1:p.(S30_P31insR)' +p83 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_003119.3:c.89_91dup' +p84 +sg28 +g4 +sg29 +(dp85 +S'grch38' +p86 +(dp87 +g33 +S'NC_000016.10:g.89508506_89508508dup' +p88 +sg35 +(dp89 +g37 +g38 +sg39 +S'GTC' +p90 +sg41 +S'89508506' +p91 +sg43 +S'GTCGTC' +p92 +sssS'grch37' +p93 +(dp94 +g33 +S'NC_000016.9:g.89574914_89574916dup' +p95 +sg35 +(dp96 +g37 +g38 +sg39 +S'GTC' +p97 +sg41 +S'89574914' +p98 +sg43 +S'GTCGTC' +p99 +sssg52 +(dp100 +g33 +S'NC_000016.10:g.89508506_89508508dup' +p101 +sg35 +(dp102 +g37 +g56 +sg39 +S'GTC' +p103 +sg41 +S'89508506' +p104 +sg43 +S'GTCGTC' +p105 +sssS'hg19' +p106 +(dp107 +g33 +S'NC_000016.9:g.89574914_89574916dup' +p108 +sg35 +(dp109 +g37 +g56 +sg39 +S'GTC' +p110 +sg41 +S'89574914' +p111 +sg43 +S'GTCGTC' +p112 +ssssg67 +(dp113 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p114 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p115 +sssS'NM_001363850.1:c.89_91dup' +p116 +(dp117 +g3 +g4 +sg5 +(lp118 +S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' +p119 +aS'RefSeqGene record not available' +p120 +asg9 +g4 +sg10 +(lp121 +sg12 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p122 +sg14 +S'SPG7' +p123 +sg16 +(dp124 +g18 +S'NP_001350779.1:p.(Ser30_Pro31insArg)' +p125 +sg20 +S'NP_001350779.1:p.(S30_P31insR)' +p126 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001363850.1:c.89_91dup' +p127 +sg28 +g4 +sg29 +(dp128 +S'hg19' +p129 +(dp130 +g33 +S'NC_000016.9:g.89574914_89574916dup' +p131 +sg35 +(dp132 +g37 +g56 +sg39 +S'GTC' +p133 +sg41 +S'89574914' +p134 +sg43 +S'GTCGTC' +p135 +sssS'grch37' +p136 +(dp137 +g33 +S'NC_000016.9:g.89574914_89574916dup' +p138 +sg35 +(dp139 +g37 +g38 +sg39 +S'GTC' +p140 +sg41 +S'89574914' +p141 +sg43 +S'GTCGTC' +p142 +ssssg67 +(dp143 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p144 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p145 +sssS'flag' +p146 +S'gene_variant' +p147 +sS'NM_199367.1:c.89_91dup' +p148 +(dp149 +g3 +g4 +sg5 +(lp150 +S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' +p151 +aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' +p152 +aS'NM_199367.2:c.89_91dupGTC MUST be fully validated prior to use in reports' +p153 +aS'select_variants=NM_199367.2:c.89_91dup' +p154 +aS'RefSeqGene record not available' +p155 +asg9 +g4 +sg10 +(lp156 +sg12 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA +p157 +sg14 +S'SPG7' +p158 +sg16 +(dp159 +g18 +S'NP_955399.1:p.(Ser30_Pro31insArg)' +p160 +sg20 +S'NP_955399.1:p.(S30_P31insR)' +p161 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_199367.1:c.89_91dup' +p162 +sg28 +g4 +sg29 +(dp163 +S'hg19' +p164 +(dp165 +g33 +S'NC_000016.9:g.89574914_89574916dup' +p166 +sg35 +(dp167 +g37 +g56 +sg39 +S'GTC' +p168 +sg41 +S'89574914' +p169 +sg43 +S'GTCGTC' +p170 +sssS'grch37' +p171 +(dp172 +g33 +S'NC_000016.9:g.89574914_89574916dup' +p173 +sg35 +(dp174 +g37 +g38 +sg39 +S'GTC' +p175 +sg41 +S'89574914' +p176 +sg43 +S'GTCGTC' +p177 +ssssg67 +(dp178 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p179 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' +p180 +sssS'NM_003119.2:c.89_91dup' +p181 +(dp182 +g3 +g4 +sg5 +(lp183 +S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' +p184 +aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p185 +aS'NM_003119.3:c.89_91dupGTC MUST be fully validated prior to use in reports' +p186 +aS'select_variants=NM_003119.3:c.89_91dup' +p187 +aS'RefSeqGene record not available' +p188 +asg9 +g4 +sg10 +(lp189 +sg12 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p190 +sg14 +S'SPG7' +p191 +sg16 +(dp192 +g18 +S'NP_003110.1:p.(Ser30_Pro31insArg)' +p193 +sg20 +S'NP_003110.1:p.(S30_P31insR)' +p194 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_003119.2:c.89_91dup' +p195 +sg28 +g4 +sg29 +(dp196 +S'hg19' +p197 +(dp198 +g33 +S'NC_000016.9:g.89574914_89574916dup' +p199 +sg35 +(dp200 +g37 +g56 +sg39 +S'GTC' +p201 +sg41 +S'89574914' +p202 +sg43 +S'GTCGTC' +p203 +sssS'grch37' +p204 +(dp205 +g33 +S'NC_000016.9:g.89574914_89574916dup' +p206 +sg35 +(dp207 +g37 +g38 +sg39 +S'GTC' +p208 +sg41 +S'89574914' +p209 +sg43 +S'GTCGTC' +p210 +ssssg67 +(dp211 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p212 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p213 +sssS'metadata' +p214 +(dp215 +S'variantvalidator_hgvs_version' +p216 +S'1.1.3' +p217 +sS'uta_schema' +p218 +S'uta_20180821' +p219 +sS'seqrepo_db' +p220 +S'2018-08-21' +p221 +sS'variantvalidator_version' +p222 +S'v0.2' +p223 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant241.txt b/VariantValidator/testing/testOutputsMasterITS/variant241.txt new file mode 100644 index 00000000..bb3c2985 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant241.txt @@ -0,0 +1,549 @@ +(dp0 +S'NM_199367.2:c.183+1G>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p12 +sS'gene_symbol' +p13 +S'SPG7' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_955399.1:p.?' +p18 +sS'slr' +p19 +S'NP_955399.1:p.?' +p20 +ssS'submitted_variant' +p21 +S'16-89575009-G-A' +p22 +sS'genome_context_intronic_sequence' +p23 +S'NC_000016.9(NM_199367.2):c.183+1G>A' +p24 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_199367.2:c.183+1G>A' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'grch38' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000016.10:g.89508601G>A' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'16' +p38 +sS'ref' +p39 +S'G' +p40 +sS'pos' +p41 +S'89508601' +p42 +sS'alt' +p43 +S'A' +p44 +sssS'grch37' +p45 +(dp46 +g33 +S'NC_000016.9:g.89575009G>A' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'89575009' +p49 +sg43 +g44 +sssS'hg38' +p50 +(dp51 +g33 +S'NC_000016.10:g.89508601G>A' +p52 +sg35 +(dp53 +g37 +S'chr16' +p54 +sg39 +g40 +sg41 +S'89508601' +p55 +sg43 +g44 +sssS'hg19' +p56 +(dp57 +g33 +S'NC_000016.9:g.89575009G>A' +p58 +sg35 +(dp59 +g37 +g54 +sg39 +g40 +sg41 +S'89575009' +p60 +sg43 +g44 +ssssS'reference_sequence_records' +p61 +(dp62 +S'protein' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p64 +sS'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p66 +sssS'NM_003119.2:c.183+1G>A' +p67 +(dp68 +g3 +g4 +sg5 +(lp69 +S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p70 +aS'NM_003119.3:c.183+1G>A MUST be fully validated prior to use in reports' +p71 +aS'select_variants=NM_003119.3:c.183+1G>A' +p72 +aS'RefSeqGene record not available' +p73 +asg8 +g4 +sg9 +(lp74 +sg11 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p75 +sg13 +S'SPG7' +p76 +sg15 +(dp77 +g17 +S'NP_003110.1:p.?' +p78 +sg19 +S'NP_003110.1:p.?' +p79 +ssg21 +g22 +sg23 +S'NC_000016.9(NM_003119.2):c.183+1G>A' +p80 +sg25 +g4 +sg26 +S'NM_003119.2:c.183+1G>A' +p81 +sg28 +g4 +sg29 +(dp82 +S'hg19' +p83 +(dp84 +g33 +S'NC_000016.9:g.89575009G>A' +p85 +sg35 +(dp86 +g37 +g54 +sg39 +g40 +sg41 +S'89575009' +p87 +sg43 +g44 +sssS'grch37' +p88 +(dp89 +g33 +S'NC_000016.9:g.89575009G>A' +p90 +sg35 +(dp91 +g37 +g38 +sg39 +g40 +sg41 +S'89575009' +p92 +sg43 +g44 +ssssg61 +(dp93 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p94 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p95 +sssS'flag' +p96 +S'gene_variant' +p97 +sS'NM_199367.1:c.183+1G>A' +p98 +(dp99 +g3 +g4 +sg5 +(lp100 +S'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' +p101 +aS'NM_199367.2:c.183+1G>A MUST be fully validated prior to use in reports' +p102 +aS'select_variants=NM_199367.2:c.183+1G>A' +p103 +aS'RefSeqGene record not available' +p104 +asg8 +g4 +sg9 +(lp105 +sg11 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA +p106 +sg13 +S'SPG7' +p107 +sg15 +(dp108 +g17 +S'NP_955399.1:p.?' +p109 +sg19 +S'NP_955399.1:p.?' +p110 +ssg21 +g22 +sg23 +S'NC_000016.9(NM_199367.1):c.183+1G>A' +p111 +sg25 +g4 +sg26 +S'NM_199367.1:c.183+1G>A' +p112 +sg28 +g4 +sg29 +(dp113 +S'hg19' +p114 +(dp115 +g33 +S'NC_000016.9:g.89575009G>A' +p116 +sg35 +(dp117 +g37 +g54 +sg39 +g40 +sg41 +S'89575009' +p118 +sg43 +g44 +sssS'grch37' +p119 +(dp120 +g33 +S'NC_000016.9:g.89575009G>A' +p121 +sg35 +(dp122 +g37 +g38 +sg39 +g40 +sg41 +S'89575009' +p123 +sg43 +g44 +ssssg61 +(dp124 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p125 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' +p126 +sssS'NM_001363850.1:c.183+1G>A' +p127 +(dp128 +g3 +g4 +sg5 +(lp129 +S'RefSeqGene record not available' +p130 +asg8 +g4 +sg9 +(lp131 +sg11 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p132 +sg13 +S'SPG7' +p133 +sg15 +(dp134 +g17 +S'NP_001350779.1:p.?' +p135 +sg19 +S'NP_001350779.1:p.?' +p136 +ssg21 +g22 +sg23 +S'NC_000016.9(NM_001363850.1):c.183+1G>A' +p137 +sg25 +g4 +sg26 +S'NM_001363850.1:c.183+1G>A' +p138 +sg28 +g4 +sg29 +(dp139 +S'hg19' +p140 +(dp141 +g33 +S'NC_000016.9:g.89575009G>A' +p142 +sg35 +(dp143 +g37 +g54 +sg39 +g40 +sg41 +S'89575009' +p144 +sg43 +g44 +sssS'grch37' +p145 +(dp146 +g33 +S'NC_000016.9:g.89575009G>A' +p147 +sg35 +(dp148 +g37 +g38 +sg39 +g40 +sg41 +S'89575009' +p149 +sg43 +g44 +ssssg61 +(dp150 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p151 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p152 +sssS'NM_003119.3:c.183+1G>A' +p153 +(dp154 +g3 +g4 +sg5 +(lp155 +S'RefSeqGene record not available' +p156 +asg8 +g4 +sg9 +(lp157 +sg11 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p158 +sg13 +S'SPG7' +p159 +sg15 +(dp160 +g17 +S'NP_003110.1:p.?' +p161 +sg19 +S'NP_003110.1:p.?' +p162 +ssg21 +g22 +sg23 +S'NC_000016.9(NM_003119.3):c.183+1G>A' +p163 +sg25 +g4 +sg26 +S'NM_003119.3:c.183+1G>A' +p164 +sg28 +g4 +sg29 +(dp165 +S'grch38' +p166 +(dp167 +g33 +S'NC_000016.10:g.89508601G>A' +p168 +sg35 +(dp169 +g37 +g38 +sg39 +g40 +sg41 +S'89508601' +p170 +sg43 +g44 +sssS'grch37' +p171 +(dp172 +g33 +S'NC_000016.9:g.89575009G>A' +p173 +sg35 +(dp174 +g37 +g38 +sg39 +g40 +sg41 +S'89575009' +p175 +sg43 +g44 +sssg50 +(dp176 +g33 +S'NC_000016.10:g.89508601G>A' +p177 +sg35 +(dp178 +g37 +g54 +sg39 +g40 +sg41 +S'89508601' +p179 +sg43 +g44 +sssS'hg19' +p180 +(dp181 +g33 +S'NC_000016.9:g.89575009G>A' +p182 +sg35 +(dp183 +g37 +g54 +sg39 +g40 +sg41 +S'89575009' +p184 +sg43 +g44 +ssssg61 +(dp185 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p186 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p187 +sssS'metadata' +p188 +(dp189 +S'variantvalidator_hgvs_version' +p190 +S'1.1.3' +p191 +sS'uta_schema' +p192 +S'uta_20180821' +p193 +sS'seqrepo_db' +p194 +S'2018-08-21' +p195 +sS'variantvalidator_version' +p196 +S'v0.2' +p197 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant242.txt b/VariantValidator/testing/testOutputsMasterITS/variant242.txt new file mode 100644 index 00000000..59175b57 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant242.txt @@ -0,0 +1,1106 @@ +(dp0 +S'NM_199367.1:c.183+32_183+33insA' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Multiple ALT sequences detected' +p7 +aS'auto-submitting all possible combinations' +p8 +aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' +p9 +aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' +p10 +aS'NM_199367.2:c.183+32_183+33insA MUST be fully validated prior to use in reports' +p11 +aS'select_variants=NM_199367.2:c.183+32_183+33insA' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g4 +sS'alt_genomic_loci' +p15 +(lp16 +sS'transcript_description' +p17 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA +p18 +sS'gene_symbol' +p19 +S'SPG7' +p20 +sS'hgvs_predicted_protein_consequence' +p21 +(dp22 +S'tlr' +p23 +S'NP_955399.1:p.?' +p24 +sS'slr' +p25 +S'NP_955399.1:p.?' +p26 +ssS'submitted_variant' +p27 +S'16-89575040-C-A,CA' +p28 +sS'genome_context_intronic_sequence' +p29 +S'NC_000016.9(NM_199367.1):c.183+32_183+33insA' +p30 +sS'hgvs_lrg_variant' +p31 +g4 +sS'hgvs_transcript_variant' +p32 +S'NM_199367.1:c.183+32_183+33insA' +p33 +sS'hgvs_refseqgene_variant' +p34 +g4 +sS'primary_assembly_loci' +p35 +(dp36 +S'hg19' +p37 +(dp38 +S'hgvs_genomic_description' +p39 +S'NC_000016.9:g.89575040_89575041insA' +p40 +sS'vcf' +p41 +(dp42 +S'chr' +p43 +S'chr16' +p44 +sS'ref' +p45 +S'C' +p46 +sS'pos' +p47 +S'89575040' +p48 +sS'alt' +p49 +S'CA' +p50 +sssS'grch37' +p51 +(dp52 +g39 +S'NC_000016.9:g.89575040_89575041insA' +p53 +sg41 +(dp54 +g43 +S'16' +p55 +sg45 +g46 +sg47 +S'89575040' +p56 +sg49 +S'CA' +p57 +ssssS'reference_sequence_records' +p58 +(dp59 +S'protein' +p60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p61 +sS'transcript' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' +p63 +sssS'NM_001363850.1:c.183+32C>A' +p64 +(dp65 +g3 +g4 +sg5 +(lp66 +S'Multiple ALT sequences detected' +p67 +aS'auto-submitting all possible combinations' +p68 +aS'RefSeqGene record not available' +p69 +asg14 +g4 +sg15 +(lp70 +sg17 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p71 +sg19 +S'SPG7' +p72 +sg21 +(dp73 +g23 +S'NP_001350779.1:p.?' +p74 +sg25 +S'NP_001350779.1:p.?' +p75 +ssg27 +g28 +sg29 +S'NC_000016.9(NM_001363850.1):c.183+32C>A' +p76 +sg31 +g4 +sg32 +S'NM_001363850.1:c.183+32C>A' +p77 +sg34 +g4 +sg35 +(dp78 +S'hg19' +p79 +(dp80 +g39 +S'NC_000016.9:g.89575040C>A' +p81 +sg41 +(dp82 +g43 +g44 +sg45 +g46 +sg47 +S'89575040' +p83 +sg49 +S'A' +p84 +sssS'grch37' +p85 +(dp86 +g39 +S'NC_000016.9:g.89575040C>A' +p87 +sg41 +(dp88 +g43 +g55 +sg45 +g46 +sg47 +S'89575040' +p89 +sg49 +g84 +ssssg58 +(dp90 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p91 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p92 +sssS'NM_001363850.1:c.183+32_183+33insA' +p93 +(dp94 +g3 +g4 +sg5 +(lp95 +S'Multiple ALT sequences detected' +p96 +aS'auto-submitting all possible combinations' +p97 +aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' +p98 +aS'RefSeqGene record not available' +p99 +asg14 +g4 +sg15 +(lp100 +sg17 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p101 +sg19 +S'SPG7' +p102 +sg21 +(dp103 +g23 +S'NP_001350779.1:p.?' +p104 +sg25 +S'NP_001350779.1:p.?' +p105 +ssg27 +g28 +sg29 +S'NC_000016.9(NM_001363850.1):c.183+32_183+33insA' +p106 +sg31 +g4 +sg32 +S'NM_001363850.1:c.183+32_183+33insA' +p107 +sg34 +g4 +sg35 +(dp108 +S'hg19' +p109 +(dp110 +g39 +S'NC_000016.9:g.89575040_89575041insA' +p111 +sg41 +(dp112 +g43 +g44 +sg45 +g46 +sg47 +S'89575040' +p113 +sg49 +S'CA' +p114 +sssS'grch37' +p115 +(dp116 +g39 +S'NC_000016.9:g.89575040_89575041insA' +p117 +sg41 +(dp118 +g43 +g55 +sg45 +g46 +sg47 +S'89575040' +p119 +sg49 +S'CA' +p120 +ssssg58 +(dp121 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p122 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p123 +sssS'NM_199367.2:c.183+32C>A' +p124 +(dp125 +g3 +g4 +sg5 +(lp126 +S'Multiple ALT sequences detected' +p127 +aS'auto-submitting all possible combinations' +p128 +aS'RefSeqGene record not available' +p129 +asg14 +g4 +sg15 +(lp130 +sg17 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p131 +sg19 +S'SPG7' +p132 +sg21 +(dp133 +g23 +S'NP_955399.1:p.?' +p134 +sg25 +S'NP_955399.1:p.?' +p135 +ssg27 +g28 +sg29 +S'NC_000016.9(NM_199367.2):c.183+32C>A' +p136 +sg31 +g4 +sg32 +S'NM_199367.2:c.183+32C>A' +p137 +sg34 +g4 +sg35 +(dp138 +S'grch38' +p139 +(dp140 +g39 +S'NC_000016.10:g.89508632C>A' +p141 +sg41 +(dp142 +g43 +g55 +sg45 +g46 +sg47 +S'89508632' +p143 +sg49 +g84 +sssS'grch37' +p144 +(dp145 +g39 +S'NC_000016.9:g.89575040C>A' +p146 +sg41 +(dp147 +g43 +g55 +sg45 +g46 +sg47 +S'89575040' +p148 +sg49 +g84 +sssS'hg38' +p149 +(dp150 +g39 +S'NC_000016.10:g.89508632C>A' +p151 +sg41 +(dp152 +g43 +g44 +sg45 +g46 +sg47 +S'89508632' +p153 +sg49 +g84 +sssS'hg19' +p154 +(dp155 +g39 +S'NC_000016.9:g.89575040C>A' +p156 +sg41 +(dp157 +g43 +g44 +sg45 +g46 +sg47 +S'89575040' +p158 +sg49 +g84 +ssssg58 +(dp159 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p160 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p161 +sssS'NM_003119.3:c.183+32_183+33insA' +p162 +(dp163 +g3 +g4 +sg5 +(lp164 +S'Multiple ALT sequences detected' +p165 +aS'auto-submitting all possible combinations' +p166 +aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' +p167 +aS'RefSeqGene record not available' +p168 +asg14 +g4 +sg15 +(lp169 +sg17 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p170 +sg19 +S'SPG7' +p171 +sg21 +(dp172 +g23 +S'NP_003110.1:p.?' +p173 +sg25 +S'NP_003110.1:p.?' +p174 +ssg27 +g28 +sg29 +S'NC_000016.9(NM_003119.3):c.183+32_183+33insA' +p175 +sg31 +g4 +sg32 +S'NM_003119.3:c.183+32_183+33insA' +p176 +sg34 +g4 +sg35 +(dp177 +S'grch38' +p178 +(dp179 +g39 +S'NC_000016.10:g.89508632_89508633insA' +p180 +sg41 +(dp181 +g43 +g55 +sg45 +g46 +sg47 +S'89508632' +p182 +sg49 +S'CA' +p183 +sssS'grch37' +p184 +(dp185 +g39 +S'NC_000016.9:g.89575040_89575041insA' +p186 +sg41 +(dp187 +g43 +g55 +sg45 +g46 +sg47 +S'89575040' +p188 +sg49 +S'CA' +p189 +sssg149 +(dp190 +g39 +S'NC_000016.10:g.89508632_89508633insA' +p191 +sg41 +(dp192 +g43 +g44 +sg45 +g46 +sg47 +S'89508632' +p193 +sg49 +S'CA' +p194 +sssS'hg19' +p195 +(dp196 +g39 +S'NC_000016.9:g.89575040_89575041insA' +p197 +sg41 +(dp198 +g43 +g44 +sg45 +g46 +sg47 +S'89575040' +p199 +sg49 +S'CA' +p200 +ssssg58 +(dp201 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p202 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p203 +sssS'flag' +p204 +S'gene_variant' +p205 +sS'NM_003119.2:c.183+32_183+33insA' +p206 +(dp207 +g3 +g4 +sg5 +(lp208 +S'Multiple ALT sequences detected' +p209 +aS'auto-submitting all possible combinations' +p210 +aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' +p211 +aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p212 +aS'NM_003119.3:c.183+32_183+33insA MUST be fully validated prior to use in reports' +p213 +aS'select_variants=NM_003119.3:c.183+32_183+33insA' +p214 +aS'RefSeqGene record not available' +p215 +asg14 +g4 +sg15 +(lp216 +sg17 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p217 +sg19 +S'SPG7' +p218 +sg21 +(dp219 +g23 +S'NP_003110.1:p.?' +p220 +sg25 +S'NP_003110.1:p.?' +p221 +ssg27 +g28 +sg29 +S'NC_000016.9(NM_003119.2):c.183+32_183+33insA' +p222 +sg31 +g4 +sg32 +S'NM_003119.2:c.183+32_183+33insA' +p223 +sg34 +g4 +sg35 +(dp224 +S'hg19' +p225 +(dp226 +g39 +S'NC_000016.9:g.89575040_89575041insA' +p227 +sg41 +(dp228 +g43 +g44 +sg45 +g46 +sg47 +S'89575040' +p229 +sg49 +S'CA' +p230 +sssS'grch37' +p231 +(dp232 +g39 +S'NC_000016.9:g.89575040_89575041insA' +p233 +sg41 +(dp234 +g43 +g55 +sg45 +g46 +sg47 +S'89575040' +p235 +sg49 +S'CA' +p236 +ssssg58 +(dp237 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p238 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p239 +sssS'NM_199367.1:c.183+32C>A' +p240 +(dp241 +g3 +g4 +sg5 +(lp242 +S'Multiple ALT sequences detected' +p243 +aS'auto-submitting all possible combinations' +p244 +aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' +p245 +aS'NM_199367.2:c.183+32C>A MUST be fully validated prior to use in reports' +p246 +aS'select_variants=NM_199367.2:c.183+32C>A' +p247 +aS'RefSeqGene record not available' +p248 +asg14 +g4 +sg15 +(lp249 +sg17 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA +p250 +sg19 +S'SPG7' +p251 +sg21 +(dp252 +g23 +S'NP_955399.1:p.?' +p253 +sg25 +S'NP_955399.1:p.?' +p254 +ssg27 +g28 +sg29 +S'NC_000016.9(NM_199367.1):c.183+32C>A' +p255 +sg31 +g4 +sg32 +S'NM_199367.1:c.183+32C>A' +p256 +sg34 +g4 +sg35 +(dp257 +S'hg19' +p258 +(dp259 +g39 +S'NC_000016.9:g.89575040C>A' +p260 +sg41 +(dp261 +g43 +g44 +sg45 +g46 +sg47 +S'89575040' +p262 +sg49 +g84 +sssS'grch37' +p263 +(dp264 +g39 +S'NC_000016.9:g.89575040C>A' +p265 +sg41 +(dp266 +g43 +g55 +sg45 +g46 +sg47 +S'89575040' +p267 +sg49 +g84 +ssssg58 +(dp268 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p269 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' +p270 +sssS'NM_003119.3:c.183+32C>A' +p271 +(dp272 +g3 +g4 +sg5 +(lp273 +S'Multiple ALT sequences detected' +p274 +aS'auto-submitting all possible combinations' +p275 +aS'RefSeqGene record not available' +p276 +asg14 +g4 +sg15 +(lp277 +sg17 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p278 +sg19 +S'SPG7' +p279 +sg21 +(dp280 +g23 +S'NP_003110.1:p.?' +p281 +sg25 +S'NP_003110.1:p.?' +p282 +ssg27 +g28 +sg29 +S'NC_000016.9(NM_003119.3):c.183+32C>A' +p283 +sg31 +g4 +sg32 +S'NM_003119.3:c.183+32C>A' +p284 +sg34 +g4 +sg35 +(dp285 +S'grch38' +p286 +(dp287 +g39 +S'NC_000016.10:g.89508632C>A' +p288 +sg41 +(dp289 +g43 +g55 +sg45 +g46 +sg47 +S'89508632' +p290 +sg49 +g84 +sssS'grch37' +p291 +(dp292 +g39 +S'NC_000016.9:g.89575040C>A' +p293 +sg41 +(dp294 +g43 +g55 +sg45 +g46 +sg47 +S'89575040' +p295 +sg49 +g84 +sssg149 +(dp296 +g39 +S'NC_000016.10:g.89508632C>A' +p297 +sg41 +(dp298 +g43 +g44 +sg45 +g46 +sg47 +S'89508632' +p299 +sg49 +g84 +sssS'hg19' +p300 +(dp301 +g39 +S'NC_000016.9:g.89575040C>A' +p302 +sg41 +(dp303 +g43 +g44 +sg45 +g46 +sg47 +S'89575040' +p304 +sg49 +g84 +ssssg58 +(dp305 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p306 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p307 +sssS'NM_199367.2:c.183+32_183+33insA' +p308 +(dp309 +g3 +g4 +sg5 +(lp310 +S'Multiple ALT sequences detected' +p311 +aS'auto-submitting all possible combinations' +p312 +aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' +p313 +aS'RefSeqGene record not available' +p314 +asg14 +g4 +sg15 +(lp315 +sg17 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p316 +sg19 +S'SPG7' +p317 +sg21 +(dp318 +g23 +S'NP_955399.1:p.?' +p319 +sg25 +S'NP_955399.1:p.?' +p320 +ssg27 +g28 +sg29 +S'NC_000016.9(NM_199367.2):c.183+32_183+33insA' +p321 +sg31 +g4 +sg32 +S'NM_199367.2:c.183+32_183+33insA' +p322 +sg34 +g4 +sg35 +(dp323 +S'grch38' +p324 +(dp325 +g39 +S'NC_000016.10:g.89508632_89508633insA' +p326 +sg41 +(dp327 +g43 +g55 +sg45 +g46 +sg47 +S'89508632' +p328 +sg49 +S'CA' +p329 +sssS'grch37' +p330 +(dp331 +g39 +S'NC_000016.9:g.89575040_89575041insA' +p332 +sg41 +(dp333 +g43 +g55 +sg45 +g46 +sg47 +S'89575040' +p334 +sg49 +S'CA' +p335 +sssg149 +(dp336 +g39 +S'NC_000016.10:g.89508632_89508633insA' +p337 +sg41 +(dp338 +g43 +g44 +sg45 +g46 +sg47 +S'89508632' +p339 +sg49 +S'CA' +p340 +sssS'hg19' +p341 +(dp342 +g39 +S'NC_000016.9:g.89575040_89575041insA' +p343 +sg41 +(dp344 +g43 +g44 +sg45 +g46 +sg47 +S'89575040' +p345 +sg49 +S'CA' +p346 +ssssg58 +(dp347 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p348 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p349 +sssS'NM_003119.2:c.183+32C>A' +p350 +(dp351 +g3 +g4 +sg5 +(lp352 +S'Multiple ALT sequences detected' +p353 +aS'auto-submitting all possible combinations' +p354 +aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p355 +aS'NM_003119.3:c.183+32C>A MUST be fully validated prior to use in reports' +p356 +aS'select_variants=NM_003119.3:c.183+32C>A' +p357 +aS'RefSeqGene record not available' +p358 +asg14 +g4 +sg15 +(lp359 +sg17 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p360 +sg19 +S'SPG7' +p361 +sg21 +(dp362 +g23 +S'NP_003110.1:p.?' +p363 +sg25 +S'NP_003110.1:p.?' +p364 +ssg27 +g28 +sg29 +S'NC_000016.9(NM_003119.2):c.183+32C>A' +p365 +sg31 +g4 +sg32 +S'NM_003119.2:c.183+32C>A' +p366 +sg34 +g4 +sg35 +(dp367 +S'hg19' +p368 +(dp369 +g39 +S'NC_000016.9:g.89575040C>A' +p370 +sg41 +(dp371 +g43 +g44 +sg45 +g46 +sg47 +S'89575040' +p372 +sg49 +g84 +sssS'grch37' +p373 +(dp374 +g39 +S'NC_000016.9:g.89575040C>A' +p375 +sg41 +(dp376 +g43 +g55 +sg45 +g46 +sg47 +S'89575040' +p377 +sg49 +g84 +ssssg58 +(dp378 +g60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p379 +sg62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p380 +sssS'metadata' +p381 +(dp382 +S'variantvalidator_hgvs_version' +p383 +S'1.1.3' +p384 +sS'uta_schema' +p385 +S'uta_20180821' +p386 +sS'seqrepo_db' +p387 +S'2018-08-21' +p388 +sS'variantvalidator_version' +p389 +S'v0.2' +p390 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant243.txt b/VariantValidator/testing/testOutputsMasterITS/variant243.txt new file mode 100644 index 00000000..8f98830f --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant243.txt @@ -0,0 +1,549 @@ +(dp0 +S'NM_199367.2:c.184-2A>C' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p12 +sS'gene_symbol' +p13 +S'SPG7' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_955399.1:p.?' +p18 +sS'slr' +p19 +S'NP_955399.1:p.?' +p20 +ssS'submitted_variant' +p21 +S'16-89576896-A-C' +p22 +sS'genome_context_intronic_sequence' +p23 +S'NC_000016.9(NM_199367.2):c.184-2A>C' +p24 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_199367.2:c.184-2A>C' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'grch38' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000016.10:g.89510488A>C' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'16' +p38 +sS'ref' +p39 +S'A' +p40 +sS'pos' +p41 +S'89510488' +p42 +sS'alt' +p43 +S'C' +p44 +sssS'grch37' +p45 +(dp46 +g33 +S'NC_000016.9:g.89576896A>C' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'89576896' +p49 +sg43 +g44 +sssS'hg38' +p50 +(dp51 +g33 +S'NC_000016.10:g.89510488A>C' +p52 +sg35 +(dp53 +g37 +S'chr16' +p54 +sg39 +g40 +sg41 +S'89510488' +p55 +sg43 +g44 +sssS'hg19' +p56 +(dp57 +g33 +S'NC_000016.9:g.89576896A>C' +p58 +sg35 +(dp59 +g37 +g54 +sg39 +g40 +sg41 +S'89576896' +p60 +sg43 +g44 +ssssS'reference_sequence_records' +p61 +(dp62 +S'protein' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p64 +sS'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p66 +sssS'NM_003119.2:c.184-2A>C' +p67 +(dp68 +g3 +g4 +sg5 +(lp69 +S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p70 +aS'NM_003119.3:c.184-2A>C MUST be fully validated prior to use in reports' +p71 +aS'select_variants=NM_003119.3:c.184-2A>C' +p72 +aS'RefSeqGene record not available' +p73 +asg8 +g4 +sg9 +(lp74 +sg11 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p75 +sg13 +S'SPG7' +p76 +sg15 +(dp77 +g17 +S'NP_003110.1:p.?' +p78 +sg19 +S'NP_003110.1:p.?' +p79 +ssg21 +g22 +sg23 +S'NC_000016.9(NM_003119.2):c.184-2A>C' +p80 +sg25 +g4 +sg26 +S'NM_003119.2:c.184-2A>C' +p81 +sg28 +g4 +sg29 +(dp82 +S'hg19' +p83 +(dp84 +g33 +S'NC_000016.9:g.89576896A>C' +p85 +sg35 +(dp86 +g37 +g54 +sg39 +g40 +sg41 +S'89576896' +p87 +sg43 +g44 +sssS'grch37' +p88 +(dp89 +g33 +S'NC_000016.9:g.89576896A>C' +p90 +sg35 +(dp91 +g37 +g38 +sg39 +g40 +sg41 +S'89576896' +p92 +sg43 +g44 +ssssg61 +(dp93 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p94 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p95 +sssS'NM_003119.3:c.184-2A>C' +p96 +(dp97 +g3 +g4 +sg5 +(lp98 +S'RefSeqGene record not available' +p99 +asg8 +g4 +sg9 +(lp100 +sg11 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p101 +sg13 +S'SPG7' +p102 +sg15 +(dp103 +g17 +S'NP_003110.1:p.?' +p104 +sg19 +S'NP_003110.1:p.?' +p105 +ssg21 +g22 +sg23 +S'NC_000016.9(NM_003119.3):c.184-2A>C' +p106 +sg25 +g4 +sg26 +S'NM_003119.3:c.184-2A>C' +p107 +sg28 +g4 +sg29 +(dp108 +S'grch38' +p109 +(dp110 +g33 +S'NC_000016.10:g.89510488A>C' +p111 +sg35 +(dp112 +g37 +g38 +sg39 +g40 +sg41 +S'89510488' +p113 +sg43 +g44 +sssS'grch37' +p114 +(dp115 +g33 +S'NC_000016.9:g.89576896A>C' +p116 +sg35 +(dp117 +g37 +g38 +sg39 +g40 +sg41 +S'89576896' +p118 +sg43 +g44 +sssg50 +(dp119 +g33 +S'NC_000016.10:g.89510488A>C' +p120 +sg35 +(dp121 +g37 +g54 +sg39 +g40 +sg41 +S'89510488' +p122 +sg43 +g44 +sssS'hg19' +p123 +(dp124 +g33 +S'NC_000016.9:g.89576896A>C' +p125 +sg35 +(dp126 +g37 +g54 +sg39 +g40 +sg41 +S'89576896' +p127 +sg43 +g44 +ssssg61 +(dp128 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p129 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p130 +sssS'NM_001363850.1:c.184-2A>C' +p131 +(dp132 +g3 +g4 +sg5 +(lp133 +S'RefSeqGene record not available' +p134 +asg8 +g4 +sg9 +(lp135 +sg11 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p136 +sg13 +S'SPG7' +p137 +sg15 +(dp138 +g17 +S'NP_001350779.1:p.?' +p139 +sg19 +S'NP_001350779.1:p.?' +p140 +ssg21 +g22 +sg23 +S'NC_000016.9(NM_001363850.1):c.184-2A>C' +p141 +sg25 +g4 +sg26 +S'NM_001363850.1:c.184-2A>C' +p142 +sg28 +g4 +sg29 +(dp143 +S'hg19' +p144 +(dp145 +g33 +S'NC_000016.9:g.89576896A>C' +p146 +sg35 +(dp147 +g37 +g54 +sg39 +g40 +sg41 +S'89576896' +p148 +sg43 +g44 +sssS'grch37' +p149 +(dp150 +g33 +S'NC_000016.9:g.89576896A>C' +p151 +sg35 +(dp152 +g37 +g38 +sg39 +g40 +sg41 +S'89576896' +p153 +sg43 +g44 +ssssg61 +(dp154 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p155 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p156 +sssS'flag' +p157 +S'gene_variant' +p158 +sS'NM_199367.1:c.184-2A>C' +p159 +(dp160 +g3 +g4 +sg5 +(lp161 +S'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' +p162 +aS'NM_199367.2:c.184-2A>C MUST be fully validated prior to use in reports' +p163 +aS'select_variants=NM_199367.2:c.184-2A>C' +p164 +aS'RefSeqGene record not available' +p165 +asg8 +g4 +sg9 +(lp166 +sg11 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA +p167 +sg13 +S'SPG7' +p168 +sg15 +(dp169 +g17 +S'NP_955399.1:p.?' +p170 +sg19 +S'NP_955399.1:p.?' +p171 +ssg21 +g22 +sg23 +S'NC_000016.9(NM_199367.1):c.184-2A>C' +p172 +sg25 +g4 +sg26 +S'NM_199367.1:c.184-2A>C' +p173 +sg28 +g4 +sg29 +(dp174 +S'hg19' +p175 +(dp176 +g33 +S'NC_000016.9:g.89576896A>C' +p177 +sg35 +(dp178 +g37 +g54 +sg39 +g40 +sg41 +S'89576896' +p179 +sg43 +g44 +sssS'grch37' +p180 +(dp181 +g33 +S'NC_000016.9:g.89576896A>C' +p182 +sg35 +(dp183 +g37 +g38 +sg39 +g40 +sg41 +S'89576896' +p184 +sg43 +g44 +ssssg61 +(dp185 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p186 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' +p187 +sssS'metadata' +p188 +(dp189 +S'variantvalidator_hgvs_version' +p190 +S'1.1.3' +p191 +sS'uta_schema' +p192 +S'uta_20180821' +p193 +sS'seqrepo_db' +p194 +S'2018-08-21' +p195 +sS'variantvalidator_version' +p196 +S'v0.2' +p197 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant244.txt b/VariantValidator/testing/testOutputsMasterITS/variant244.txt new file mode 100644 index 00000000..779881ba --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant244.txt @@ -0,0 +1,1119 @@ +(dp0 +S'NM_003119.3:c.216dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Multiple ALT sequences detected' +p7 +aS'auto-submitting all possible combinations' +p8 +aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g4 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p15 +sS'gene_symbol' +p16 +S'SPG7' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_003110.1:p.(Glu73Ter)' +p21 +sS'slr' +p22 +S'NP_003110.1:p.(E73*)' +p23 +ssS'submitted_variant' +p24 +S'16-89576930-T-TA,TT' +p25 +sS'genome_context_intronic_sequence' +p26 +g4 +sS'hgvs_lrg_variant' +p27 +g4 +sS'hgvs_transcript_variant' +p28 +S'NM_003119.3:c.216dup' +p29 +sS'hgvs_refseqgene_variant' +p30 +g4 +sS'primary_assembly_loci' +p31 +(dp32 +S'grch38' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000016.10:g.89510522dup' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'16' +p40 +sS'ref' +p41 +S'T' +p42 +sS'pos' +p43 +S'89510520' +p44 +sS'alt' +p45 +S'TT' +p46 +sssS'grch37' +p47 +(dp48 +g35 +S'NC_000016.9:g.89576930dup' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'89576928' +p51 +sg45 +S'TT' +p52 +sssS'hg38' +p53 +(dp54 +g35 +S'NC_000016.10:g.89510522dup' +p55 +sg37 +(dp56 +g39 +S'chr16' +p57 +sg41 +g42 +sg43 +S'89510520' +p58 +sg45 +S'TT' +p59 +sssS'hg19' +p60 +(dp61 +g35 +S'NC_000016.9:g.89576930dup' +p62 +sg37 +(dp63 +g39 +g57 +sg41 +g42 +sg43 +S'89576928' +p64 +sg45 +S'TT' +p65 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p71 +sssS'NM_003119.2:c.216_217insA' +p72 +(dp73 +g3 +g4 +sg5 +(lp74 +S'Multiple ALT sequences detected' +p75 +aS'auto-submitting all possible combinations' +p76 +aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' +p77 +aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p78 +aS'NM_003119.3:c.216_217insA MUST be fully validated prior to use in reports' +p79 +aS'select_variants=NM_003119.3:c.216_217insA' +p80 +aS'RefSeqGene record not available' +p81 +asg11 +g4 +sg12 +(lp82 +sg14 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p83 +sg16 +S'SPG7' +p84 +sg18 +(dp85 +g20 +S'NP_003110.1:p.(Glu73ArgfsTer30)' +p86 +sg22 +S'NP_003110.1:p.(E73Rfs*30)' +p87 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_003119.2:c.216_217insA' +p88 +sg30 +g4 +sg31 +(dp89 +S'hg19' +p90 +(dp91 +g35 +S'NC_000016.9:g.89576930_89576931insA' +p92 +sg37 +(dp93 +g39 +g57 +sg41 +g42 +sg43 +S'89576930' +p94 +sg45 +S'TA' +p95 +sssS'grch37' +p96 +(dp97 +g35 +S'NC_000016.9:g.89576930_89576931insA' +p98 +sg37 +(dp99 +g39 +g40 +sg41 +g42 +sg43 +S'89576930' +p100 +sg45 +S'TA' +p101 +ssssg66 +(dp102 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p103 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p104 +sssS'NM_199367.2:c.216dup' +p105 +(dp106 +g3 +g4 +sg5 +(lp107 +S'Multiple ALT sequences detected' +p108 +aS'auto-submitting all possible combinations' +p109 +aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' +p110 +aS'RefSeqGene record not available' +p111 +asg11 +g4 +sg12 +(lp112 +sg14 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p113 +sg16 +S'SPG7' +p114 +sg18 +(dp115 +g20 +S'NP_955399.1:p.(Glu73Ter)' +p116 +sg22 +S'NP_955399.1:p.(E73*)' +p117 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_199367.2:c.216dup' +p118 +sg30 +g4 +sg31 +(dp119 +S'grch38' +p120 +(dp121 +g35 +S'NC_000016.10:g.89510522dup' +p122 +sg37 +(dp123 +g39 +g40 +sg41 +g42 +sg43 +S'89510520' +p124 +sg45 +S'TT' +p125 +sssS'grch37' +p126 +(dp127 +g35 +S'NC_000016.9:g.89576930dup' +p128 +sg37 +(dp129 +g39 +g40 +sg41 +g42 +sg43 +S'89576928' +p130 +sg45 +S'TT' +p131 +sssg53 +(dp132 +g35 +S'NC_000016.10:g.89510522dup' +p133 +sg37 +(dp134 +g39 +g57 +sg41 +g42 +sg43 +S'89510520' +p135 +sg45 +S'TT' +p136 +sssS'hg19' +p137 +(dp138 +g35 +S'NC_000016.9:g.89576930dup' +p139 +sg37 +(dp140 +g39 +g57 +sg41 +g42 +sg43 +S'89576928' +p141 +sg45 +S'TT' +p142 +ssssg66 +(dp143 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p144 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p145 +sssS'NM_199367.2:c.216_217insA' +p146 +(dp147 +g3 +g4 +sg5 +(lp148 +S'Multiple ALT sequences detected' +p149 +aS'auto-submitting all possible combinations' +p150 +aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' +p151 +aS'RefSeqGene record not available' +p152 +asg11 +g4 +sg12 +(lp153 +sg14 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p154 +sg16 +S'SPG7' +p155 +sg18 +(dp156 +g20 +S'NP_955399.1:p.(Glu73ArgfsTer30)' +p157 +sg22 +S'NP_955399.1:p.(E73Rfs*30)' +p158 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_199367.2:c.216_217insA' +p159 +sg30 +g4 +sg31 +(dp160 +S'grch38' +p161 +(dp162 +g35 +S'NC_000016.10:g.89510522_89510523insA' +p163 +sg37 +(dp164 +g39 +g40 +sg41 +g42 +sg43 +S'89510522' +p165 +sg45 +S'TA' +p166 +sssS'grch37' +p167 +(dp168 +g35 +S'NC_000016.9:g.89576930_89576931insA' +p169 +sg37 +(dp170 +g39 +g40 +sg41 +g42 +sg43 +S'89576930' +p171 +sg45 +S'TA' +p172 +sssg53 +(dp173 +g35 +S'NC_000016.10:g.89510522_89510523insA' +p174 +sg37 +(dp175 +g39 +g57 +sg41 +g42 +sg43 +S'89510522' +p176 +sg45 +S'TA' +p177 +sssS'hg19' +p178 +(dp179 +g35 +S'NC_000016.9:g.89576930_89576931insA' +p180 +sg37 +(dp181 +g39 +g57 +sg41 +g42 +sg43 +S'89576930' +p182 +sg45 +S'TA' +p183 +ssssg66 +(dp184 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p185 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p186 +sssS'NM_001363850.1:c.216dup' +p187 +(dp188 +g3 +g4 +sg5 +(lp189 +S'Multiple ALT sequences detected' +p190 +aS'auto-submitting all possible combinations' +p191 +aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' +p192 +aS'RefSeqGene record not available' +p193 +asg11 +g4 +sg12 +(lp194 +sg14 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p195 +sg16 +S'SPG7' +p196 +sg18 +(dp197 +g20 +S'NP_001350779.1:p.(Glu73Ter)' +p198 +sg22 +S'NP_001350779.1:p.(E73*)' +p199 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_001363850.1:c.216dup' +p200 +sg30 +g4 +sg31 +(dp201 +S'hg19' +p202 +(dp203 +g35 +S'NC_000016.9:g.89576930dup' +p204 +sg37 +(dp205 +g39 +g57 +sg41 +g42 +sg43 +S'89576928' +p206 +sg45 +S'TT' +p207 +sssS'grch37' +p208 +(dp209 +g35 +S'NC_000016.9:g.89576930dup' +p210 +sg37 +(dp211 +g39 +g40 +sg41 +g42 +sg43 +S'89576928' +p212 +sg45 +S'TT' +p213 +ssssg66 +(dp214 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p215 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p216 +sssS'flag' +p217 +S'gene_variant' +p218 +sS'NM_001363850.1:c.216_217insA' +p219 +(dp220 +g3 +g4 +sg5 +(lp221 +S'Multiple ALT sequences detected' +p222 +aS'auto-submitting all possible combinations' +p223 +aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' +p224 +aS'RefSeqGene record not available' +p225 +asg11 +g4 +sg12 +(lp226 +sg14 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p227 +sg16 +S'SPG7' +p228 +sg18 +(dp229 +g20 +S'NP_001350779.1:p.(Glu73ArgfsTer30)' +p230 +sg22 +S'NP_001350779.1:p.(E73Rfs*30)' +p231 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_001363850.1:c.216_217insA' +p232 +sg30 +g4 +sg31 +(dp233 +S'hg19' +p234 +(dp235 +g35 +S'NC_000016.9:g.89576930_89576931insA' +p236 +sg37 +(dp237 +g39 +g57 +sg41 +g42 +sg43 +S'89576930' +p238 +sg45 +S'TA' +p239 +sssS'grch37' +p240 +(dp241 +g35 +S'NC_000016.9:g.89576930_89576931insA' +p242 +sg37 +(dp243 +g39 +g40 +sg41 +g42 +sg43 +S'89576930' +p244 +sg45 +S'TA' +p245 +ssssg66 +(dp246 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p247 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p248 +sssS'NM_199367.1:c.216_217insA' +p249 +(dp250 +g3 +g4 +sg5 +(lp251 +S'Multiple ALT sequences detected' +p252 +aS'auto-submitting all possible combinations' +p253 +aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' +p254 +aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' +p255 +aS'NM_199367.2:c.216_217insA MUST be fully validated prior to use in reports' +p256 +aS'select_variants=NM_199367.2:c.216_217insA' +p257 +aS'RefSeqGene record not available' +p258 +asg11 +g4 +sg12 +(lp259 +sg14 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA +p260 +sg16 +S'SPG7' +p261 +sg18 +(dp262 +g20 +S'NP_955399.1:p.(Glu73ArgfsTer30)' +p263 +sg22 +S'NP_955399.1:p.(E73Rfs*30)' +p264 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_199367.1:c.216_217insA' +p265 +sg30 +g4 +sg31 +(dp266 +S'hg19' +p267 +(dp268 +g35 +S'NC_000016.9:g.89576930_89576931insA' +p269 +sg37 +(dp270 +g39 +g57 +sg41 +g42 +sg43 +S'89576930' +p271 +sg45 +S'TA' +p272 +sssS'grch37' +p273 +(dp274 +g35 +S'NC_000016.9:g.89576930_89576931insA' +p275 +sg37 +(dp276 +g39 +g40 +sg41 +g42 +sg43 +S'89576930' +p277 +sg45 +S'TA' +p278 +ssssg66 +(dp279 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p280 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' +p281 +sssS'metadata' +p282 +(dp283 +S'variantvalidator_hgvs_version' +p284 +S'1.1.3' +p285 +sS'uta_schema' +p286 +S'uta_20180821' +p287 +sS'seqrepo_db' +p288 +S'2018-08-21' +p289 +sS'variantvalidator_version' +p290 +S'v0.2' +p291 +ssS'NM_199367.1:c.216dup' +p292 +(dp293 +g3 +g4 +sg5 +(lp294 +S'Multiple ALT sequences detected' +p295 +aS'auto-submitting all possible combinations' +p296 +aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' +p297 +aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' +p298 +aS'NM_199367.2:c.216dupT MUST be fully validated prior to use in reports' +p299 +aS'select_variants=NM_199367.2:c.216dup' +p300 +aS'RefSeqGene record not available' +p301 +asg11 +g4 +sg12 +(lp302 +sg14 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA +p303 +sg16 +S'SPG7' +p304 +sg18 +(dp305 +g20 +S'NP_955399.1:p.(Glu73Ter)' +p306 +sg22 +S'NP_955399.1:p.(E73*)' +p307 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_199367.1:c.216dup' +p308 +sg30 +g4 +sg31 +(dp309 +S'hg19' +p310 +(dp311 +g35 +S'NC_000016.9:g.89576930dup' +p312 +sg37 +(dp313 +g39 +g57 +sg41 +g42 +sg43 +S'89576928' +p314 +sg45 +S'TT' +p315 +sssS'grch37' +p316 +(dp317 +g35 +S'NC_000016.9:g.89576930dup' +p318 +sg37 +(dp319 +g39 +g40 +sg41 +g42 +sg43 +S'89576928' +p320 +sg45 +S'TT' +p321 +ssssg66 +(dp322 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p323 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' +p324 +sssS'NM_003119.3:c.216_217insA' +p325 +(dp326 +g3 +g4 +sg5 +(lp327 +S'Multiple ALT sequences detected' +p328 +aS'auto-submitting all possible combinations' +p329 +aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' +p330 +aS'RefSeqGene record not available' +p331 +asg11 +g4 +sg12 +(lp332 +sg14 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p333 +sg16 +S'SPG7' +p334 +sg18 +(dp335 +g20 +S'NP_003110.1:p.(Glu73ArgfsTer30)' +p336 +sg22 +S'NP_003110.1:p.(E73Rfs*30)' +p337 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_003119.3:c.216_217insA' +p338 +sg30 +g4 +sg31 +(dp339 +S'grch38' +p340 +(dp341 +g35 +S'NC_000016.10:g.89510522_89510523insA' +p342 +sg37 +(dp343 +g39 +g40 +sg41 +g42 +sg43 +S'89510522' +p344 +sg45 +S'TA' +p345 +sssS'grch37' +p346 +(dp347 +g35 +S'NC_000016.9:g.89576930_89576931insA' +p348 +sg37 +(dp349 +g39 +g40 +sg41 +g42 +sg43 +S'89576930' +p350 +sg45 +S'TA' +p351 +sssg53 +(dp352 +g35 +S'NC_000016.10:g.89510522_89510523insA' +p353 +sg37 +(dp354 +g39 +g57 +sg41 +g42 +sg43 +S'89510522' +p355 +sg45 +S'TA' +p356 +sssS'hg19' +p357 +(dp358 +g35 +S'NC_000016.9:g.89576930_89576931insA' +p359 +sg37 +(dp360 +g39 +g57 +sg41 +g42 +sg43 +S'89576930' +p361 +sg45 +S'TA' +p362 +ssssg66 +(dp363 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p364 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p365 +sssS'NM_003119.2:c.216dup' +p366 +(dp367 +g3 +g4 +sg5 +(lp368 +S'Multiple ALT sequences detected' +p369 +aS'auto-submitting all possible combinations' +p370 +aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' +p371 +aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p372 +aS'NM_003119.3:c.216dupT MUST be fully validated prior to use in reports' +p373 +aS'select_variants=NM_003119.3:c.216dup' +p374 +aS'RefSeqGene record not available' +p375 +asg11 +g4 +sg12 +(lp376 +sg14 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p377 +sg16 +S'SPG7' +p378 +sg18 +(dp379 +g20 +S'NP_003110.1:p.(Glu73Ter)' +p380 +sg22 +S'NP_003110.1:p.(E73*)' +p381 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_003119.2:c.216dup' +p382 +sg30 +g4 +sg31 +(dp383 +S'hg19' +p384 +(dp385 +g35 +S'NC_000016.9:g.89576930dup' +p386 +sg37 +(dp387 +g39 +g57 +sg41 +g42 +sg43 +S'89576928' +p388 +sg45 +S'TT' +p389 +sssS'grch37' +p390 +(dp391 +g35 +S'NC_000016.9:g.89576930dup' +p392 +sg37 +(dp393 +g39 +g40 +sg41 +g42 +sg43 +S'89576928' +p394 +sg45 +S'TT' +p395 +ssssg66 +(dp396 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p397 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p398 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant245.txt b/VariantValidator/testing/testOutputsMasterITS/variant245.txt new file mode 100644 index 00000000..ab31d77a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant245.txt @@ -0,0 +1,580 @@ +(dp0 +S'NM_199367.1:c.216_217dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' +p7 +aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' +p8 +aS'NM_199367.2:c.216_217dupTG MUST be fully validated prior to use in reports' +p9 +aS'select_variants=NM_199367.2:c.216_217dup' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA +p16 +sS'gene_symbol' +p17 +S'SPG7' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_955399.1:p.(Glu73ValfsTer9)' +p22 +sS'slr' +p23 +S'NP_955399.1:p.(E73Vfs*9)' +p24 +ssS'submitted_variant' +p25 +S'16-89576931-G-GTG' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_199367.1:c.216_217dup' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000016.9:g.89576930_89576931dup' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr16' +p41 +sS'ref' +p42 +S'TG' +p43 +sS'pos' +p44 +S'89576930' +p45 +sS'alt' +p46 +S'TGTG' +p47 +sssS'grch37' +p48 +(dp49 +g36 +S'NC_000016.9:g.89576930_89576931dup' +p50 +sg38 +(dp51 +g40 +S'16' +p52 +sg42 +S'TG' +p53 +sg44 +S'89576930' +p54 +sg46 +S'TGTG' +p55 +ssssS'reference_sequence_records' +p56 +(dp57 +S'protein' +p58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p59 +sS'transcript' +p60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' +p61 +sssS'NM_003119.3:c.216_217dup' +p62 +(dp63 +g3 +g4 +sg5 +(lp64 +S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' +p65 +aS'RefSeqGene record not available' +p66 +asg12 +g4 +sg13 +(lp67 +sg15 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p68 +sg17 +S'SPG7' +p69 +sg19 +(dp70 +g21 +S'NP_003110.1:p.(Glu73ValfsTer9)' +p71 +sg23 +S'NP_003110.1:p.(E73Vfs*9)' +p72 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_003119.3:c.216_217dup' +p73 +sg31 +g4 +sg32 +(dp74 +S'grch38' +p75 +(dp76 +g36 +S'NC_000016.10:g.89510522_89510523dup' +p77 +sg38 +(dp78 +g40 +g52 +sg42 +S'TG' +p79 +sg44 +S'89510522' +p80 +sg46 +S'TGTG' +p81 +sssS'grch37' +p82 +(dp83 +g36 +S'NC_000016.9:g.89576930_89576931dup' +p84 +sg38 +(dp85 +g40 +g52 +sg42 +S'TG' +p86 +sg44 +S'89576930' +p87 +sg46 +S'TGTG' +p88 +sssS'hg38' +p89 +(dp90 +g36 +S'NC_000016.10:g.89510522_89510523dup' +p91 +sg38 +(dp92 +g40 +g41 +sg42 +S'TG' +p93 +sg44 +S'89510522' +p94 +sg46 +S'TGTG' +p95 +sssS'hg19' +p96 +(dp97 +g36 +S'NC_000016.9:g.89576930_89576931dup' +p98 +sg38 +(dp99 +g40 +g41 +sg42 +S'TG' +p100 +sg44 +S'89576930' +p101 +sg46 +S'TGTG' +p102 +ssssg56 +(dp103 +g58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p104 +sg60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p105 +sssS'NM_199367.2:c.216_217dup' +p106 +(dp107 +g3 +g4 +sg5 +(lp108 +S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' +p109 +aS'RefSeqGene record not available' +p110 +asg12 +g4 +sg13 +(lp111 +sg15 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p112 +sg17 +S'SPG7' +p113 +sg19 +(dp114 +g21 +S'NP_955399.1:p.(Glu73ValfsTer9)' +p115 +sg23 +S'NP_955399.1:p.(E73Vfs*9)' +p116 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_199367.2:c.216_217dup' +p117 +sg31 +g4 +sg32 +(dp118 +S'grch38' +p119 +(dp120 +g36 +S'NC_000016.10:g.89510522_89510523dup' +p121 +sg38 +(dp122 +g40 +g52 +sg42 +S'TG' +p123 +sg44 +S'89510522' +p124 +sg46 +S'TGTG' +p125 +sssS'grch37' +p126 +(dp127 +g36 +S'NC_000016.9:g.89576930_89576931dup' +p128 +sg38 +(dp129 +g40 +g52 +sg42 +S'TG' +p130 +sg44 +S'89576930' +p131 +sg46 +S'TGTG' +p132 +sssg89 +(dp133 +g36 +S'NC_000016.10:g.89510522_89510523dup' +p134 +sg38 +(dp135 +g40 +g41 +sg42 +S'TG' +p136 +sg44 +S'89510522' +p137 +sg46 +S'TGTG' +p138 +sssS'hg19' +p139 +(dp140 +g36 +S'NC_000016.9:g.89576930_89576931dup' +p141 +sg38 +(dp142 +g40 +g41 +sg42 +S'TG' +p143 +sg44 +S'89576930' +p144 +sg46 +S'TGTG' +p145 +ssssg56 +(dp146 +g58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p147 +sg60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p148 +sssS'NM_003119.2:c.216_217dup' +p149 +(dp150 +g3 +g4 +sg5 +(lp151 +S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' +p152 +aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p153 +aS'NM_003119.3:c.216_217dupTG MUST be fully validated prior to use in reports' +p154 +aS'select_variants=NM_003119.3:c.216_217dup' +p155 +aS'RefSeqGene record not available' +p156 +asg12 +g4 +sg13 +(lp157 +sg15 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p158 +sg17 +S'SPG7' +p159 +sg19 +(dp160 +g21 +S'NP_003110.1:p.(Glu73ValfsTer9)' +p161 +sg23 +S'NP_003110.1:p.(E73Vfs*9)' +p162 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_003119.2:c.216_217dup' +p163 +sg31 +g4 +sg32 +(dp164 +S'hg19' +p165 +(dp166 +g36 +S'NC_000016.9:g.89576930_89576931dup' +p167 +sg38 +(dp168 +g40 +g41 +sg42 +S'TG' +p169 +sg44 +S'89576930' +p170 +sg46 +S'TGTG' +p171 +sssS'grch37' +p172 +(dp173 +g36 +S'NC_000016.9:g.89576930_89576931dup' +p174 +sg38 +(dp175 +g40 +g52 +sg42 +S'TG' +p176 +sg44 +S'89576930' +p177 +sg46 +S'TGTG' +p178 +ssssg56 +(dp179 +g58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p180 +sg60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p181 +sssS'flag' +p182 +S'gene_variant' +p183 +sS'NM_001363850.1:c.216_217dup' +p184 +(dp185 +g3 +g4 +sg5 +(lp186 +S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' +p187 +aS'RefSeqGene record not available' +p188 +asg12 +g4 +sg13 +(lp189 +sg15 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p190 +sg17 +S'SPG7' +p191 +sg19 +(dp192 +g21 +S'NP_001350779.1:p.(Glu73ValfsTer9)' +p193 +sg23 +S'NP_001350779.1:p.(E73Vfs*9)' +p194 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001363850.1:c.216_217dup' +p195 +sg31 +g4 +sg32 +(dp196 +S'hg19' +p197 +(dp198 +g36 +S'NC_000016.9:g.89576930_89576931dup' +p199 +sg38 +(dp200 +g40 +g41 +sg42 +S'TG' +p201 +sg44 +S'89576930' +p202 +sg46 +S'TGTG' +p203 +sssS'grch37' +p204 +(dp205 +g36 +S'NC_000016.9:g.89576930_89576931dup' +p206 +sg38 +(dp207 +g40 +g52 +sg42 +S'TG' +p208 +sg44 +S'89576930' +p209 +sg46 +S'TGTG' +p210 +ssssg56 +(dp211 +g58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p212 +sg60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p213 +sssS'metadata' +p214 +(dp215 +S'variantvalidator_hgvs_version' +p216 +S'1.1.3' +p217 +sS'uta_schema' +p218 +S'uta_20180821' +p219 +sS'seqrepo_db' +p220 +S'2018-08-21' +p221 +sS'variantvalidator_version' +p222 +S'v0.2' +p223 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant246.txt b/VariantValidator/testing/testOutputsMasterITS/variant246.txt new file mode 100644 index 00000000..6a45a8ca --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant246.txt @@ -0,0 +1,567 @@ +(dp0 +S'NM_199367.1:c.1046_1071del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' +p7 +aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' +p8 +aS'NM_199367.2:c.1046_1071del MUST be fully validated prior to use in reports' +p9 +aS'select_variants=NM_199367.2:c.1046_1071del' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA +p16 +sS'gene_symbol' +p17 +S'SPG7' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_955399.1:p.(Gly349AlafsTer38)' +p22 +sS'slr' +p23 +S'NP_955399.1:p.(G349Afs*38)' +p24 +ssS'submitted_variant' +p25 +S'16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_199367.1:c.1046_1071del' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000016.9:g.89598370_89598395del' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr16' +p41 +sS'ref' +p42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p43 +sS'pos' +p44 +S'89598368' +p45 +sS'alt' +p46 +S'C' +p47 +sssS'grch37' +p48 +(dp49 +g36 +S'NC_000016.9:g.89598370_89598395del' +p50 +sg38 +(dp51 +g40 +S'16' +p52 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p53 +sg44 +S'89598368' +p54 +sg46 +g47 +ssssS'reference_sequence_records' +p55 +(dp56 +S'protein' +p57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p58 +sS'transcript' +p59 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' +p60 +sssS'NM_001363850.1:c.1046_1071del' +p61 +(dp62 +g3 +g4 +sg5 +(lp63 +S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' +p64 +aS'RefSeqGene record not available' +p65 +asg12 +g4 +sg13 +(lp66 +sg15 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p67 +sg17 +S'SPG7' +p68 +sg19 +(dp69 +g21 +S'NP_001350779.1:p.(Gly349AlafsTer38)' +p70 +sg23 +S'NP_001350779.1:p.(G349Afs*38)' +p71 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001363850.1:c.1046_1071del' +p72 +sg31 +g4 +sg32 +(dp73 +S'hg19' +p74 +(dp75 +g36 +S'NC_000016.9:g.89598370_89598395del' +p76 +sg38 +(dp77 +g40 +g41 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p78 +sg44 +S'89598368' +p79 +sg46 +g47 +sssS'grch37' +p80 +(dp81 +g36 +S'NC_000016.9:g.89598370_89598395del' +p82 +sg38 +(dp83 +g40 +g52 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p84 +sg44 +S'89598368' +p85 +sg46 +g47 +ssssg55 +(dp86 +g57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p87 +sg59 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p88 +sssS'NM_199367.2:c.1046_1071del' +p89 +(dp90 +g3 +g4 +sg5 +(lp91 +S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' +p92 +aS'RefSeqGene record not available' +p93 +asg12 +g4 +sg13 +(lp94 +sg15 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA +p95 +sg17 +S'SPG7' +p96 +sg19 +(dp97 +g21 +S'NP_955399.1:p.(Gly349AlafsTer38)' +p98 +sg23 +S'NP_955399.1:p.(G349Afs*38)' +p99 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_199367.2:c.1046_1071del' +p100 +sg31 +g4 +sg32 +(dp101 +S'grch38' +p102 +(dp103 +g36 +S'NC_000016.10:g.89531962_89531987del' +p104 +sg38 +(dp105 +g40 +g52 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p106 +sg44 +S'89531960' +p107 +sg46 +g47 +sssS'grch37' +p108 +(dp109 +g36 +S'NC_000016.9:g.89598370_89598395del' +p110 +sg38 +(dp111 +g40 +g52 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p112 +sg44 +S'89598368' +p113 +sg46 +g47 +sssS'hg38' +p114 +(dp115 +g36 +S'NC_000016.10:g.89531962_89531987del' +p116 +sg38 +(dp117 +g40 +g41 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p118 +sg44 +S'89531960' +p119 +sg46 +g47 +sssS'hg19' +p120 +(dp121 +g36 +S'NC_000016.9:g.89598370_89598395del' +p122 +sg38 +(dp123 +g40 +g41 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p124 +sg44 +S'89598368' +p125 +sg46 +g47 +ssssg55 +(dp126 +g57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' +p127 +sg59 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' +p128 +sssS'flag' +p129 +S'gene_variant' +p130 +sS'NM_003119.2:c.1046_1071del' +p131 +(dp132 +g3 +g4 +sg5 +(lp133 +S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' +p134 +aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p135 +aS'NM_003119.3:c.1046_1071del MUST be fully validated prior to use in reports' +p136 +aS'select_variants=NM_003119.3:c.1046_1071del' +p137 +aS'RefSeqGene record not available' +p138 +asg12 +g4 +sg13 +(lp139 +sg15 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p140 +sg17 +S'SPG7' +p141 +sg19 +(dp142 +g21 +S'NP_003110.1:p.(Gly349AlafsTer38)' +p143 +sg23 +S'NP_003110.1:p.(G349Afs*38)' +p144 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_003119.2:c.1046_1071del' +p145 +sg31 +g4 +sg32 +(dp146 +S'hg19' +p147 +(dp148 +g36 +S'NC_000016.9:g.89598370_89598395del' +p149 +sg38 +(dp150 +g40 +g41 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p151 +sg44 +S'89598368' +p152 +sg46 +g47 +sssS'grch37' +p153 +(dp154 +g36 +S'NC_000016.9:g.89598370_89598395del' +p155 +sg38 +(dp156 +g40 +g52 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p157 +sg44 +S'89598368' +p158 +sg46 +g47 +ssssg55 +(dp159 +g57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p160 +sg59 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p161 +sssS'NM_003119.3:c.1046_1071del' +p162 +(dp163 +g3 +g4 +sg5 +(lp164 +S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' +p165 +aS'RefSeqGene record not available' +p166 +asg12 +g4 +sg13 +(lp167 +sg15 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p168 +sg17 +S'SPG7' +p169 +sg19 +(dp170 +g21 +S'NP_003110.1:p.(Gly349AlafsTer38)' +p171 +sg23 +S'NP_003110.1:p.(G349Afs*38)' +p172 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_003119.3:c.1046_1071del' +p173 +sg31 +g4 +sg32 +(dp174 +S'grch38' +p175 +(dp176 +g36 +S'NC_000016.10:g.89531962_89531987del' +p177 +sg38 +(dp178 +g40 +g52 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p179 +sg44 +S'89531960' +p180 +sg46 +g47 +sssS'grch37' +p181 +(dp182 +g36 +S'NC_000016.9:g.89598370_89598395del' +p183 +sg38 +(dp184 +g40 +g52 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p185 +sg44 +S'89598368' +p186 +sg46 +g47 +sssg114 +(dp187 +g36 +S'NC_000016.10:g.89531962_89531987del' +p188 +sg38 +(dp189 +g40 +g41 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p190 +sg44 +S'89531960' +p191 +sg46 +g47 +sssS'hg19' +p192 +(dp193 +g36 +S'NC_000016.9:g.89598370_89598395del' +p194 +sg38 +(dp195 +g40 +g41 +sg42 +S'CGGCCCCCCCGGCTGTGGGAAGACGCT' +p196 +sg44 +S'89598368' +p197 +sg46 +g47 +ssssg55 +(dp198 +g57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p199 +sg59 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p200 +sssS'metadata' +p201 +(dp202 +S'variantvalidator_hgvs_version' +p203 +S'1.1.3' +p204 +sS'uta_schema' +p205 +S'uta_20180821' +p206 +sS'seqrepo_db' +p207 +S'2018-08-21' +p208 +sS'variantvalidator_version' +p209 +S'v0.2' +p210 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant247.txt b/VariantValidator/testing/testOutputsMasterITS/variant247.txt new file mode 100644 index 00000000..85637161 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant247.txt @@ -0,0 +1,357 @@ +(dp0 +S'NM_001363850.1:c.1450-1_1457delinsT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000016.9:g.89613064AGGAGAGGCG>AT automapped to NC_000016.9:g.89613065_89613073delinsT' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p13 +sS'gene_symbol' +p14 +S'SPG7' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001350779.1:p.?' +p19 +sS'slr' +p20 +S'NP_001350779.1:p.?' +p21 +ssS'submitted_variant' +p22 +S'16-89613064-AGGAGAGGCG-AT' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000016.9(NM_001363850.1):c.1450-1_1457delinsT' +p25 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_001363850.1:c.1450-1_1457delinsT' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000016.9:g.89613065_89613073delinsT' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr16' +p39 +sS'ref' +p40 +S'GGAGAGGCG' +p41 +sS'pos' +p42 +S'89613065' +p43 +sS'alt' +p44 +S'T' +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000016.9:g.89613065_89613073delinsT' +p48 +sg36 +(dp49 +g38 +S'16' +p50 +sg40 +S'GGAGAGGCG' +p51 +sg42 +S'89613065' +p52 +sg44 +g45 +ssssS'reference_sequence_records' +p53 +(dp54 +S'protein' +p55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p56 +sS'transcript' +p57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p58 +sssS'flag' +p59 +S'gene_variant' +p60 +sS'metadata' +p61 +(dp62 +S'variantvalidator_hgvs_version' +p63 +S'1.1.3' +p64 +sS'uta_schema' +p65 +S'uta_20180821' +p66 +sS'seqrepo_db' +p67 +S'2018-08-21' +p68 +sS'variantvalidator_version' +p69 +S'v0.2' +p70 +ssS'NM_003119.2:c.1450-1_1457delinsT' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000016.9:g.89613064AGGAGAGGCG>AT automapped to NC_000016.9:g.89613065_89613073delinsT' +p74 +aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p75 +aS'NM_003119.3:c.1450-1_1457delinsT MUST be fully validated prior to use in reports' +p76 +aS'select_variants=NM_003119.3:c.1450-1_1457delinsT' +p77 +aS'RefSeqGene record not available' +p78 +asg9 +g4 +sg10 +(lp79 +sg12 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p80 +sg14 +S'SPG7' +p81 +sg16 +(dp82 +g18 +S'NP_003110.1:p.?' +p83 +sg20 +S'NP_003110.1:p.?' +p84 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_003119.2):c.1450-1_1457delinsT' +p85 +sg26 +g4 +sg27 +S'NM_003119.2:c.1450-1_1457delinsT' +p86 +sg29 +g4 +sg30 +(dp87 +S'hg19' +p88 +(dp89 +g34 +S'NC_000016.9:g.89613065_89613073delinsT' +p90 +sg36 +(dp91 +g38 +g39 +sg40 +S'GGAGAGGCG' +p92 +sg42 +S'89613065' +p93 +sg44 +g45 +sssS'grch37' +p94 +(dp95 +g34 +S'NC_000016.9:g.89613065_89613073delinsT' +p96 +sg36 +(dp97 +g38 +g50 +sg40 +S'GGAGAGGCG' +p98 +sg42 +S'89613065' +p99 +sg44 +g45 +ssssg53 +(dp100 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p101 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p102 +sssS'NM_003119.3:c.1450-1_1457delinsT' +p103 +(dp104 +g3 +g4 +sg5 +(lp105 +S'NC_000016.9:g.89613064AGGAGAGGCG>AT automapped to NC_000016.9:g.89613065_89613073delinsT' +p106 +aS'RefSeqGene record not available' +p107 +asg9 +g4 +sg10 +(lp108 +sg12 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p109 +sg14 +S'SPG7' +p110 +sg16 +(dp111 +g18 +S'NP_003110.1:p.?' +p112 +sg20 +S'NP_003110.1:p.?' +p113 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_003119.3):c.1450-1_1457delinsT' +p114 +sg26 +g4 +sg27 +S'NM_003119.3:c.1450-1_1457delinsT' +p115 +sg29 +g4 +sg30 +(dp116 +S'grch38' +p117 +(dp118 +g34 +S'NC_000016.10:g.89546657_89546665delinsT' +p119 +sg36 +(dp120 +g38 +g50 +sg40 +S'GGAGAGGCG' +p121 +sg42 +S'89546657' +p122 +sg44 +g45 +sssS'grch37' +p123 +(dp124 +g34 +S'NC_000016.9:g.89613065_89613073delinsT' +p125 +sg36 +(dp126 +g38 +g50 +sg40 +S'GGAGAGGCG' +p127 +sg42 +S'89613065' +p128 +sg44 +g45 +sssS'hg38' +p129 +(dp130 +g34 +S'NC_000016.10:g.89546657_89546665delinsT' +p131 +sg36 +(dp132 +g38 +g39 +sg40 +S'GGAGAGGCG' +p133 +sg42 +S'89546657' +p134 +sg44 +g45 +sssS'hg19' +p135 +(dp136 +g34 +S'NC_000016.9:g.89613065_89613073delinsT' +p137 +sg36 +(dp138 +g38 +g39 +sg40 +S'GGAGAGGCG' +p139 +sg42 +S'89613065' +p140 +sg44 +g45 +ssssg53 +(dp141 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p142 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p143 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant248.txt b/VariantValidator/testing/testOutputsMasterITS/variant248.txt new file mode 100644 index 00000000..43bdb4bb --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant248.txt @@ -0,0 +1,354 @@ +(dp0 +S'NM_003119.2:c.1454_1462delinsT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000016.9:g.89613069AGGCGGGAGA>AT automapped to NC_000016.9:g.89613070_89613078delinsT' +p7 +aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p8 +aS'NM_003119.3:c.1454_1462delinsT MUST be fully validated prior to use in reports' +p9 +aS'select_variants=NM_003119.3:c.1454_1462delinsT' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p16 +sS'gene_symbol' +p17 +S'SPG7' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_003110.1:p.(Arg485IlefsTer3)' +p22 +sS'slr' +p23 +S'NP_003110.1:p.(R485Ifs*3)' +p24 +ssS'submitted_variant' +p25 +S'16-89613069-AGGCGGGAGA-AT' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_003119.2:c.1454_1462delinsT' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000016.9:g.89613070_89613078delinsT' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr16' +p41 +sS'ref' +p42 +S'GGCGGGAGA' +p43 +sS'pos' +p44 +S'89613070' +p45 +sS'alt' +p46 +S'T' +p47 +sssS'grch37' +p48 +(dp49 +g36 +S'NC_000016.9:g.89613070_89613078delinsT' +p50 +sg38 +(dp51 +g40 +S'16' +p52 +sg42 +S'GGCGGGAGA' +p53 +sg44 +S'89613070' +p54 +sg46 +g47 +ssssS'reference_sequence_records' +p55 +(dp56 +S'protein' +p57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p58 +sS'transcript' +p59 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p60 +sssS'flag' +p61 +S'gene_variant' +p62 +sS'metadata' +p63 +(dp64 +S'variantvalidator_hgvs_version' +p65 +S'1.1.3' +p66 +sS'uta_schema' +p67 +S'uta_20180821' +p68 +sS'seqrepo_db' +p69 +S'2018-08-21' +p70 +sS'variantvalidator_version' +p71 +S'v0.2' +p72 +ssS'NM_001363850.1:c.1454_1462delinsT' +p73 +(dp74 +g3 +g4 +sg5 +(lp75 +S'NC_000016.9:g.89613069AGGCGGGAGA>AT automapped to NC_000016.9:g.89613070_89613078delinsT' +p76 +aS'RefSeqGene record not available' +p77 +asg12 +g4 +sg13 +(lp78 +sg15 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p79 +sg17 +S'SPG7' +p80 +sg19 +(dp81 +g21 +S'NP_001350779.1:p.(Arg485IlefsTer3)' +p82 +sg23 +S'NP_001350779.1:p.(R485Ifs*3)' +p83 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001363850.1:c.1454_1462delinsT' +p84 +sg31 +g4 +sg32 +(dp85 +S'hg19' +p86 +(dp87 +g36 +S'NC_000016.9:g.89613070_89613078delinsT' +p88 +sg38 +(dp89 +g40 +g41 +sg42 +S'GGCGGGAGA' +p90 +sg44 +S'89613070' +p91 +sg46 +g47 +sssS'grch37' +p92 +(dp93 +g36 +S'NC_000016.9:g.89613070_89613078delinsT' +p94 +sg38 +(dp95 +g40 +g52 +sg42 +S'GGCGGGAGA' +p96 +sg44 +S'89613070' +p97 +sg46 +g47 +ssssg55 +(dp98 +g57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p99 +sg59 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p100 +sssS'NM_003119.3:c.1454_1462delinsT' +p101 +(dp102 +g3 +g4 +sg5 +(lp103 +S'NC_000016.9:g.89613069AGGCGGGAGA>AT automapped to NC_000016.9:g.89613070_89613078delinsT' +p104 +aS'RefSeqGene record not available' +p105 +asg12 +g4 +sg13 +(lp106 +sg15 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p107 +sg17 +S'SPG7' +p108 +sg19 +(dp109 +g21 +S'NP_003110.1:p.(Arg485IlefsTer3)' +p110 +sg23 +S'NP_003110.1:p.(R485Ifs*3)' +p111 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_003119.3:c.1454_1462delinsT' +p112 +sg31 +g4 +sg32 +(dp113 +S'grch38' +p114 +(dp115 +g36 +S'NC_000016.10:g.89546662_89546670delinsT' +p116 +sg38 +(dp117 +g40 +g52 +sg42 +S'GGCGGGAGA' +p118 +sg44 +S'89546662' +p119 +sg46 +g47 +sssS'grch37' +p120 +(dp121 +g36 +S'NC_000016.9:g.89613070_89613078delinsT' +p122 +sg38 +(dp123 +g40 +g52 +sg42 +S'GGCGGGAGA' +p124 +sg44 +S'89613070' +p125 +sg46 +g47 +sssS'hg38' +p126 +(dp127 +g36 +S'NC_000016.10:g.89546662_89546670delinsT' +p128 +sg38 +(dp129 +g40 +g41 +sg42 +S'GGCGGGAGA' +p130 +sg44 +S'89546662' +p131 +sg46 +g47 +sssS'hg19' +p132 +(dp133 +g36 +S'NC_000016.9:g.89613070_89613078delinsT' +p134 +sg38 +(dp135 +g40 +g41 +sg42 +S'GGCGGGAGA' +p136 +sg44 +S'89613070' +p137 +sg46 +g47 +ssssg55 +(dp138 +g57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p139 +sg59 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p140 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant249.txt b/VariantValidator/testing/testOutputsMasterITS/variant249.txt new file mode 100644 index 00000000..49d55a90 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant249.txt @@ -0,0 +1,341 @@ +(dp0 +S'NM_001363850.1:c.1529C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA +p12 +sS'gene_symbol' +p13 +S'SPG7' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001350779.1:p.(Ala510Val)' +p18 +sS'slr' +p19 +S'NP_001350779.1:p.(A510V)' +p20 +ssS'submitted_variant' +p21 +S'16-89613145-C-T' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001363850.1:c.1529C>T' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000016.9:g.89613145C>T' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr16' +p37 +sS'ref' +p38 +S'C' +p39 +sS'pos' +p40 +S'89613145' +p41 +sS'alt' +p42 +S'T' +p43 +sssS'grch37' +p44 +(dp45 +g32 +S'NC_000016.9:g.89613145C>T' +p46 +sg34 +(dp47 +g36 +S'16' +p48 +sg38 +g39 +sg40 +S'89613145' +p49 +sg42 +g43 +ssssS'reference_sequence_records' +p50 +(dp51 +S'protein' +p52 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' +p53 +sS'transcript' +p54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' +p55 +sssS'NM_003119.3:c.1529C>T' +p56 +(dp57 +g3 +g4 +sg5 +(lp58 +S'RefSeqGene record not available' +p59 +asg8 +g4 +sg9 +(lp60 +sg11 +VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA +p61 +sg13 +S'SPG7' +p62 +sg15 +(dp63 +g17 +S'NP_003110.1:p.(Ala510Val)' +p64 +sg19 +S'NP_003110.1:p.(A510V)' +p65 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_003119.3:c.1529C>T' +p66 +sg27 +g4 +sg28 +(dp67 +S'grch38' +p68 +(dp69 +g32 +S'NC_000016.10:g.89546737C>T' +p70 +sg34 +(dp71 +g36 +g48 +sg38 +g39 +sg40 +S'89546737' +p72 +sg42 +g43 +sssS'grch37' +p73 +(dp74 +g32 +S'NC_000016.9:g.89613145C>T' +p75 +sg34 +(dp76 +g36 +g48 +sg38 +g39 +sg40 +S'89613145' +p77 +sg42 +g43 +sssS'hg38' +p78 +(dp79 +g32 +S'NC_000016.10:g.89546737C>T' +p80 +sg34 +(dp81 +g36 +g37 +sg38 +g39 +sg40 +S'89546737' +p82 +sg42 +g43 +sssS'hg19' +p83 +(dp84 +g32 +S'NC_000016.9:g.89613145C>T' +p85 +sg34 +(dp86 +g36 +g37 +sg38 +g39 +sg40 +S'89613145' +p87 +sg42 +g43 +ssssg50 +(dp88 +g52 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p89 +sg54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' +p90 +sssS'flag' +p91 +S'gene_variant' +p92 +sS'NM_003119.2:c.1529C>T' +p93 +(dp94 +g3 +g4 +sg5 +(lp95 +S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' +p96 +aS'NM_003119.3:c.1529C>T MUST be fully validated prior to use in reports' +p97 +aS'select_variants=NM_003119.3:c.1529C>T' +p98 +aS'RefSeqGene record not available' +p99 +asg8 +g4 +sg9 +(lp100 +sg11 +VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA +p101 +sg13 +S'SPG7' +p102 +sg15 +(dp103 +g17 +S'NP_003110.1:p.(Ala510Val)' +p104 +sg19 +S'NP_003110.1:p.(A510V)' +p105 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_003119.2:c.1529C>T' +p106 +sg27 +g4 +sg28 +(dp107 +S'hg19' +p108 +(dp109 +g32 +S'NC_000016.9:g.89613145C>T' +p110 +sg34 +(dp111 +g36 +g37 +sg38 +g39 +sg40 +S'89613145' +p112 +sg42 +g43 +sssS'grch37' +p113 +(dp114 +g32 +S'NC_000016.9:g.89613145C>T' +p115 +sg34 +(dp116 +g36 +g48 +sg38 +g39 +sg40 +S'89613145' +p117 +sg42 +g43 +ssssg50 +(dp118 +g52 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' +p119 +sg54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' +p120 +sssS'metadata' +p121 +(dp122 +S'variantvalidator_hgvs_version' +p123 +S'1.1.3' +p124 +sS'uta_schema' +p125 +S'uta_20180821' +p126 +sS'seqrepo_db' +p127 +S'2018-08-21' +p128 +sS'variantvalidator_version' +p129 +S'v0.2' +p130 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant25.txt b/VariantValidator/testing/testOutputsMasterITS/variant25.txt new file mode 100644 index 00000000..1bf85b0a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant25.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000011.9:g.5244828_5248381=' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NM_000518.4:c.-50-80_*132+1868=' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant250.txt b/VariantValidator/testing/testOutputsMasterITS/variant250.txt new file mode 100644 index 00000000..b886e7b1 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant250.txt @@ -0,0 +1,1870 @@ +(dp0 +S'NM_001276695.1:c.535_537del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA +p13 +sS'gene_symbol' +p14 +S'TP53' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001263624.1:p.(Val179del)' +p19 +sS'slr' +p20 +S'NP_001263624.1:p.(V179del)' +p21 +ssS'submitted_variant' +p22 +S'17-7578194-GCAC-G' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001276695.1:c.535_537del' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000017.10:g.7578195_7578197del' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr17' +p38 +sS'ref' +p39 +S'GCAC' +p40 +sS'pos' +p41 +S'7578194' +p42 +sS'alt' +p43 +S'G' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000017.11:g.7674877_7674879del' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'GCAC' +p49 +sg41 +S'7674876' +p50 +sg43 +g44 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000017.10:g.7578195_7578197del' +p53 +sg35 +(dp54 +g37 +S'17' +p55 +sg39 +S'GCAC' +p56 +sg41 +S'7578194' +p57 +sg43 +g44 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000017.11:g.7674877_7674879del' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'GCAC' +p62 +sg41 +S'7674876' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1' +p69 +sssS'NM_001126113.2:c.652_654del' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p73 +aS'RefSeqGene record not available' +p74 +asg9 +g4 +sg10 +(lp75 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA +p76 +sg14 +S'TP53' +p77 +sg16 +(dp78 +g18 +S'NP_001119585.1:p.(Val218del)' +p79 +sg20 +S'NP_001119585.1:p.(V218del)' +p80 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126113.2:c.652_654del' +p81 +sg28 +g4 +sg29 +(dp82 +S'hg19' +p83 +(dp84 +g33 +S'NC_000017.10:g.7578195_7578197del' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +S'GCAC' +p87 +sg41 +S'7578194' +p88 +sg43 +g44 +sssg45 +(dp89 +g33 +S'NC_000017.11:g.7674877_7674879del' +p90 +sg35 +(dp91 +g37 +g38 +sg39 +S'GCAC' +p92 +sg41 +S'7674876' +p93 +sg43 +g44 +sssS'grch37' +p94 +(dp95 +g33 +S'NC_000017.10:g.7578195_7578197del' +p96 +sg35 +(dp97 +g37 +g55 +sg39 +S'GCAC' +p98 +sg41 +S'7578194' +p99 +sg43 +g44 +sssS'grch38' +p100 +(dp101 +g33 +S'NC_000017.11:g.7674877_7674879del' +p102 +sg35 +(dp103 +g37 +g55 +sg39 +S'GCAC' +p104 +sg41 +S'7674876' +p105 +sg43 +g44 +ssssg64 +(dp106 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1' +p107 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2' +p108 +sssS'NM_001126118.1:c.535_537del' +p109 +(dp110 +g3 +g4 +sg5 +(lp111 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p112 +aS'RefSeqGene record not available' +p113 +asg9 +g4 +sg10 +(lp114 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 8, mRNA +p115 +sg14 +S'TP53' +p116 +sg16 +(dp117 +g18 +S'NP_001119590.1:p.(Val179del)' +p118 +sg20 +S'NP_001119590.1:p.(V179del)' +p119 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126118.1:c.535_537del' +p120 +sg28 +g4 +sg29 +(dp121 +S'hg19' +p122 +(dp123 +g33 +S'NC_000017.10:g.7578195_7578197del' +p124 +sg35 +(dp125 +g37 +g38 +sg39 +S'GCAC' +p126 +sg41 +S'7578194' +p127 +sg43 +g44 +sssg45 +(dp128 +g33 +S'NC_000017.11:g.7674877_7674879del' +p129 +sg35 +(dp130 +g37 +g38 +sg39 +S'GCAC' +p131 +sg41 +S'7674876' +p132 +sg43 +g44 +sssS'grch37' +p133 +(dp134 +g33 +S'NC_000017.10:g.7578195_7578197del' +p135 +sg35 +(dp136 +g37 +g55 +sg39 +S'GCAC' +p137 +sg41 +S'7578194' +p138 +sg43 +g44 +sssS'grch38' +p139 +(dp140 +g33 +S'NC_000017.11:g.7674877_7674879del' +p141 +sg35 +(dp142 +g37 +g55 +sg39 +S'GCAC' +p143 +sg41 +S'7674876' +p144 +sg43 +g44 +ssssg64 +(dp145 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1' +p146 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1' +p147 +sssS'NM_001126116.1:c.256_258del' +p148 +(dp149 +g3 +g4 +sg5 +(lp150 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p151 +aS'RefSeqGene record not available' +p152 +asg9 +g4 +sg10 +(lp153 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA +p154 +sg14 +S'TP53' +p155 +sg16 +(dp156 +g18 +S'NP_001119588.1:p.(Val86del)' +p157 +sg20 +S'NP_001119588.1:p.(V86del)' +p158 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126116.1:c.256_258del' +p159 +sg28 +g4 +sg29 +(dp160 +S'hg19' +p161 +(dp162 +g33 +S'NC_000017.10:g.7578195_7578197del' +p163 +sg35 +(dp164 +g37 +g38 +sg39 +S'GCAC' +p165 +sg41 +S'7578194' +p166 +sg43 +g44 +sssg45 +(dp167 +g33 +S'NC_000017.11:g.7674877_7674879del' +p168 +sg35 +(dp169 +g37 +g38 +sg39 +S'GCAC' +p170 +sg41 +S'7674876' +p171 +sg43 +g44 +sssS'grch37' +p172 +(dp173 +g33 +S'NC_000017.10:g.7578195_7578197del' +p174 +sg35 +(dp175 +g37 +g55 +sg39 +S'GCAC' +p176 +sg41 +S'7578194' +p177 +sg43 +g44 +sssS'grch38' +p178 +(dp179 +g33 +S'NC_000017.11:g.7674877_7674879del' +p180 +sg35 +(dp181 +g37 +g55 +sg39 +S'GCAC' +p182 +sg41 +S'7674876' +p183 +sg43 +g44 +ssssg64 +(dp184 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1' +p185 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1' +p186 +sssS'NM_001126117.1:c.256_258del' +p187 +(dp188 +g3 +g4 +sg5 +(lp189 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p190 +aS'RefSeqGene record not available' +p191 +asg9 +g4 +sg10 +(lp192 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA +p193 +sg14 +S'TP53' +p194 +sg16 +(dp195 +g18 +S'NP_001119589.1:p.(Val86del)' +p196 +sg20 +S'NP_001119589.1:p.(V86del)' +p197 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126117.1:c.256_258del' +p198 +sg28 +g4 +sg29 +(dp199 +S'hg19' +p200 +(dp201 +g33 +S'NC_000017.10:g.7578195_7578197del' +p202 +sg35 +(dp203 +g37 +g38 +sg39 +S'GCAC' +p204 +sg41 +S'7578194' +p205 +sg43 +g44 +sssg45 +(dp206 +g33 +S'NC_000017.11:g.7674877_7674879del' +p207 +sg35 +(dp208 +g37 +g38 +sg39 +S'GCAC' +p209 +sg41 +S'7674876' +p210 +sg43 +g44 +sssS'grch37' +p211 +(dp212 +g33 +S'NC_000017.10:g.7578195_7578197del' +p213 +sg35 +(dp214 +g37 +g55 +sg39 +S'GCAC' +p215 +sg41 +S'7578194' +p216 +sg43 +g44 +sssS'grch38' +p217 +(dp218 +g33 +S'NC_000017.11:g.7674877_7674879del' +p219 +sg35 +(dp220 +g37 +g55 +sg39 +S'GCAC' +p221 +sg41 +S'7674876' +p222 +sg43 +g44 +ssssg64 +(dp223 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1' +p224 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1' +p225 +sssS'NM_001276761.1:c.535_537del' +p226 +(dp227 +g3 +g4 +sg5 +(lp228 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p229 +aS'RefSeqGene record not available' +p230 +asg9 +g4 +sg10 +(lp231 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA +p232 +sg14 +S'TP53' +p233 +sg16 +(dp234 +g18 +S'NP_001263690.1:p.(Val179del)' +p235 +sg20 +S'NP_001263690.1:p.(V179del)' +p236 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276761.1:c.535_537del' +p237 +sg28 +g4 +sg29 +(dp238 +S'hg19' +p239 +(dp240 +g33 +S'NC_000017.10:g.7578195_7578197del' +p241 +sg35 +(dp242 +g37 +g38 +sg39 +S'GCAC' +p243 +sg41 +S'7578194' +p244 +sg43 +g44 +sssg45 +(dp245 +g33 +S'NC_000017.11:g.7674877_7674879del' +p246 +sg35 +(dp247 +g37 +g38 +sg39 +S'GCAC' +p248 +sg41 +S'7674876' +p249 +sg43 +g44 +sssS'grch37' +p250 +(dp251 +g33 +S'NC_000017.10:g.7578195_7578197del' +p252 +sg35 +(dp253 +g37 +g55 +sg39 +S'GCAC' +p254 +sg41 +S'7578194' +p255 +sg43 +g44 +sssS'grch38' +p256 +(dp257 +g33 +S'NC_000017.11:g.7674877_7674879del' +p258 +sg35 +(dp259 +g37 +g55 +sg39 +S'GCAC' +p260 +sg41 +S'7674876' +p261 +sg43 +g44 +ssssg64 +(dp262 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1' +p263 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1' +p264 +sssS'NM_001126112.2:c.652_654del' +p265 +(dp266 +g3 +g4 +sg5 +(lp267 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p268 +aS'RefSeqGene record not available' +p269 +asg9 +g4 +sg10 +(lp270 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA +p271 +sg14 +S'TP53' +p272 +sg16 +(dp273 +g18 +S'NP_001119584.1:p.(Val218del)' +p274 +sg20 +S'NP_001119584.1:p.(V218del)' +p275 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126112.2:c.652_654del' +p276 +sg28 +g4 +sg29 +(dp277 +S'hg19' +p278 +(dp279 +g33 +S'NC_000017.10:g.7578195_7578197del' +p280 +sg35 +(dp281 +g37 +g38 +sg39 +S'GCAC' +p282 +sg41 +S'7578194' +p283 +sg43 +g44 +sssg45 +(dp284 +g33 +S'NC_000017.11:g.7674877_7674879del' +p285 +sg35 +(dp286 +g37 +g38 +sg39 +S'GCAC' +p287 +sg41 +S'7674876' +p288 +sg43 +g44 +sssS'grch37' +p289 +(dp290 +g33 +S'NC_000017.10:g.7578195_7578197del' +p291 +sg35 +(dp292 +g37 +g55 +sg39 +S'GCAC' +p293 +sg41 +S'7578194' +p294 +sg43 +g44 +sssS'grch38' +p295 +(dp296 +g33 +S'NC_000017.11:g.7674877_7674879del' +p297 +sg35 +(dp298 +g37 +g55 +sg39 +S'GCAC' +p299 +sg41 +S'7674876' +p300 +sg43 +g44 +ssssg64 +(dp301 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1' +p302 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2' +p303 +sssS'flag' +p304 +S'gene_variant' +p305 +sS'NM_001276697.1:c.175_177del' +p306 +(dp307 +g3 +g4 +sg5 +(lp308 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p309 +aS'RefSeqGene record not available' +p310 +asg9 +g4 +sg10 +(lp311 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA +p312 +sg14 +S'TP53' +p313 +sg16 +(dp314 +g18 +S'NP_001263626.1:p.(Val59del)' +p315 +sg20 +S'NP_001263626.1:p.(V59del)' +p316 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276697.1:c.175_177del' +p317 +sg28 +g4 +sg29 +(dp318 +S'hg19' +p319 +(dp320 +g33 +S'NC_000017.10:g.7578195_7578197del' +p321 +sg35 +(dp322 +g37 +g38 +sg39 +S'GCAC' +p323 +sg41 +S'7578194' +p324 +sg43 +g44 +sssg45 +(dp325 +g33 +S'NC_000017.11:g.7674877_7674879del' +p326 +sg35 +(dp327 +g37 +g38 +sg39 +S'GCAC' +p328 +sg41 +S'7674876' +p329 +sg43 +g44 +sssS'grch37' +p330 +(dp331 +g33 +S'NC_000017.10:g.7578195_7578197del' +p332 +sg35 +(dp333 +g37 +g55 +sg39 +S'GCAC' +p334 +sg41 +S'7578194' +p335 +sg43 +g44 +sssS'grch38' +p336 +(dp337 +g33 +S'NC_000017.11:g.7674877_7674879del' +p338 +sg35 +(dp339 +g37 +g55 +sg39 +S'GCAC' +p340 +sg41 +S'7674876' +p341 +sg43 +g44 +ssssg64 +(dp342 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1' +p343 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1' +p344 +sssS'NM_001276696.1:c.535_537del' +p345 +(dp346 +g3 +g4 +sg5 +(lp347 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p348 +aS'RefSeqGene record not available' +p349 +asg9 +g4 +sg10 +(lp350 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA +p351 +sg14 +S'TP53' +p352 +sg16 +(dp353 +g18 +S'NP_001263625.1:p.(Val179del)' +p354 +sg20 +S'NP_001263625.1:p.(V179del)' +p355 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276696.1:c.535_537del' +p356 +sg28 +g4 +sg29 +(dp357 +S'hg19' +p358 +(dp359 +g33 +S'NC_000017.10:g.7578195_7578197del' +p360 +sg35 +(dp361 +g37 +g38 +sg39 +S'GCAC' +p362 +sg41 +S'7578194' +p363 +sg43 +g44 +sssg45 +(dp364 +g33 +S'NC_000017.11:g.7674877_7674879del' +p365 +sg35 +(dp366 +g37 +g38 +sg39 +S'GCAC' +p367 +sg41 +S'7674876' +p368 +sg43 +g44 +sssS'grch37' +p369 +(dp370 +g33 +S'NC_000017.10:g.7578195_7578197del' +p371 +sg35 +(dp372 +g37 +g55 +sg39 +S'GCAC' +p373 +sg41 +S'7578194' +p374 +sg43 +g44 +sssS'grch38' +p375 +(dp376 +g33 +S'NC_000017.11:g.7674877_7674879del' +p377 +sg35 +(dp378 +g37 +g55 +sg39 +S'GCAC' +p379 +sg41 +S'7674876' +p380 +sg43 +g44 +ssssg64 +(dp381 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1' +p382 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1' +p383 +sssS'NM_001276698.1:c.175_177del' +p384 +(dp385 +g3 +g4 +sg5 +(lp386 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p387 +aS'RefSeqGene record not available' +p388 +asg9 +g4 +sg10 +(lp389 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA +p390 +sg14 +S'TP53' +p391 +sg16 +(dp392 +g18 +S'NP_001263627.1:p.(Val59del)' +p393 +sg20 +S'NP_001263627.1:p.(V59del)' +p394 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276698.1:c.175_177del' +p395 +sg28 +g4 +sg29 +(dp396 +S'hg19' +p397 +(dp398 +g33 +S'NC_000017.10:g.7578195_7578197del' +p399 +sg35 +(dp400 +g37 +g38 +sg39 +S'GCAC' +p401 +sg41 +S'7578194' +p402 +sg43 +g44 +sssg45 +(dp403 +g33 +S'NC_000017.11:g.7674877_7674879del' +p404 +sg35 +(dp405 +g37 +g38 +sg39 +S'GCAC' +p406 +sg41 +S'7674876' +p407 +sg43 +g44 +sssS'grch37' +p408 +(dp409 +g33 +S'NC_000017.10:g.7578195_7578197del' +p410 +sg35 +(dp411 +g37 +g55 +sg39 +S'GCAC' +p412 +sg41 +S'7578194' +p413 +sg43 +g44 +sssS'grch38' +p414 +(dp415 +g33 +S'NC_000017.11:g.7674877_7674879del' +p416 +sg35 +(dp417 +g37 +g55 +sg39 +S'GCAC' +p418 +sg41 +S'7674876' +p419 +sg43 +g44 +ssssg64 +(dp420 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1' +p421 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1' +p422 +sssS'NM_001126115.1:c.256_258del' +p423 +(dp424 +g3 +g4 +sg5 +(lp425 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p426 +aS'RefSeqGene record not available' +p427 +asg9 +g4 +sg10 +(lp428 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA +p429 +sg14 +S'TP53' +p430 +sg16 +(dp431 +g18 +S'NP_001119587.1:p.(Val86del)' +p432 +sg20 +S'NP_001119587.1:p.(V86del)' +p433 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126115.1:c.256_258del' +p434 +sg28 +g4 +sg29 +(dp435 +S'hg19' +p436 +(dp437 +g33 +S'NC_000017.10:g.7578195_7578197del' +p438 +sg35 +(dp439 +g37 +g38 +sg39 +S'GCAC' +p440 +sg41 +S'7578194' +p441 +sg43 +g44 +sssg45 +(dp442 +g33 +S'NC_000017.11:g.7674877_7674879del' +p443 +sg35 +(dp444 +g37 +g38 +sg39 +S'GCAC' +p445 +sg41 +S'7674876' +p446 +sg43 +g44 +sssS'grch37' +p447 +(dp448 +g33 +S'NC_000017.10:g.7578195_7578197del' +p449 +sg35 +(dp450 +g37 +g55 +sg39 +S'GCAC' +p451 +sg41 +S'7578194' +p452 +sg43 +g44 +sssS'grch38' +p453 +(dp454 +g33 +S'NC_000017.11:g.7674877_7674879del' +p455 +sg35 +(dp456 +g37 +g55 +sg39 +S'GCAC' +p457 +sg41 +S'7674876' +p458 +sg43 +g44 +ssssg64 +(dp459 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1' +p460 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1' +p461 +sssS'NM_001126114.2:c.652_654del' +p462 +(dp463 +g3 +g4 +sg5 +(lp464 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p465 +aS'RefSeqGene record not available' +p466 +asg9 +g4 +sg10 +(lp467 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA +p468 +sg14 +S'TP53' +p469 +sg16 +(dp470 +g18 +S'NP_001119586.1:p.(Val218del)' +p471 +sg20 +S'NP_001119586.1:p.(V218del)' +p472 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126114.2:c.652_654del' +p473 +sg28 +g4 +sg29 +(dp474 +S'hg19' +p475 +(dp476 +g33 +S'NC_000017.10:g.7578195_7578197del' +p477 +sg35 +(dp478 +g37 +g38 +sg39 +S'GCAC' +p479 +sg41 +S'7578194' +p480 +sg43 +g44 +sssg45 +(dp481 +g33 +S'NC_000017.11:g.7674877_7674879del' +p482 +sg35 +(dp483 +g37 +g38 +sg39 +S'GCAC' +p484 +sg41 +S'7674876' +p485 +sg43 +g44 +sssS'grch37' +p486 +(dp487 +g33 +S'NC_000017.10:g.7578195_7578197del' +p488 +sg35 +(dp489 +g37 +g55 +sg39 +S'GCAC' +p490 +sg41 +S'7578194' +p491 +sg43 +g44 +sssS'grch38' +p492 +(dp493 +g33 +S'NC_000017.11:g.7674877_7674879del' +p494 +sg35 +(dp495 +g37 +g55 +sg39 +S'GCAC' +p496 +sg41 +S'7674876' +p497 +sg43 +g44 +ssssg64 +(dp498 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1' +p499 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2' +p500 +sssS'metadata' +p501 +(dp502 +S'variantvalidator_hgvs_version' +p503 +S'1.1.3' +p504 +sS'uta_schema' +p505 +S'uta_20180821' +p506 +sS'seqrepo_db' +p507 +S'2018-08-21' +p508 +sS'variantvalidator_version' +p509 +S'v0.2' +p510 +ssS'NM_001276699.1:c.175_177del' +p511 +(dp512 +g3 +g4 +sg5 +(lp513 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p514 +aS'RefSeqGene record not available' +p515 +asg9 +g4 +sg10 +(lp516 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA +p517 +sg14 +S'TP53' +p518 +sg16 +(dp519 +g18 +S'NP_001263628.1:p.(Val59del)' +p520 +sg20 +S'NP_001263628.1:p.(V59del)' +p521 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276699.1:c.175_177del' +p522 +sg28 +g4 +sg29 +(dp523 +S'hg19' +p524 +(dp525 +g33 +S'NC_000017.10:g.7578195_7578197del' +p526 +sg35 +(dp527 +g37 +g38 +sg39 +S'GCAC' +p528 +sg41 +S'7578194' +p529 +sg43 +g44 +sssg45 +(dp530 +g33 +S'NC_000017.11:g.7674877_7674879del' +p531 +sg35 +(dp532 +g37 +g38 +sg39 +S'GCAC' +p533 +sg41 +S'7674876' +p534 +sg43 +g44 +sssS'grch37' +p535 +(dp536 +g33 +S'NC_000017.10:g.7578195_7578197del' +p537 +sg35 +(dp538 +g37 +g55 +sg39 +S'GCAC' +p539 +sg41 +S'7578194' +p540 +sg43 +g44 +sssS'grch38' +p541 +(dp542 +g33 +S'NC_000017.11:g.7674877_7674879del' +p543 +sg35 +(dp544 +g37 +g55 +sg39 +S'GCAC' +p545 +sg41 +S'7674876' +p546 +sg43 +g44 +ssssg64 +(dp547 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1' +p548 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1' +p549 +sssS'NM_001276760.1:c.535_537del' +p550 +(dp551 +g3 +g4 +sg5 +(lp552 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p553 +aS'RefSeqGene record not available' +p554 +asg9 +g4 +sg10 +(lp555 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA +p556 +sg14 +S'TP53' +p557 +sg16 +(dp558 +g18 +S'NP_001263689.1:p.(Val179del)' +p559 +sg20 +S'NP_001263689.1:p.(V179del)' +p560 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276760.1:c.535_537del' +p561 +sg28 +g4 +sg29 +(dp562 +S'hg19' +p563 +(dp564 +g33 +S'NC_000017.10:g.7578195_7578197del' +p565 +sg35 +(dp566 +g37 +g38 +sg39 +S'GCAC' +p567 +sg41 +S'7578194' +p568 +sg43 +g44 +sssg45 +(dp569 +g33 +S'NC_000017.11:g.7674877_7674879del' +p570 +sg35 +(dp571 +g37 +g38 +sg39 +S'GCAC' +p572 +sg41 +S'7674876' +p573 +sg43 +g44 +sssS'grch37' +p574 +(dp575 +g33 +S'NC_000017.10:g.7578195_7578197del' +p576 +sg35 +(dp577 +g37 +g55 +sg39 +S'GCAC' +p578 +sg41 +S'7578194' +p579 +sg43 +g44 +sssS'grch38' +p580 +(dp581 +g33 +S'NC_000017.11:g.7674877_7674879del' +p582 +sg35 +(dp583 +g37 +g55 +sg39 +S'GCAC' +p584 +sg41 +S'7674876' +p585 +sg43 +g44 +ssssg64 +(dp586 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1' +p587 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1' +p588 +sssS'NM_000546.5:c.652_654del' +p589 +(dp590 +g3 +g4 +sg5 +(lp591 +S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' +p592 +aS'RefSeqGene record not available' +p593 +asg9 +g4 +sg10 +(lp594 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA +p595 +sg14 +S'TP53' +p596 +sg16 +(dp597 +g18 +S'NP_000537.3:p.(Val218del)' +p598 +sg20 +S'NP_000537.3:p.(V218del)' +p599 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_000546.5:c.652_654del' +p600 +sg28 +g4 +sg29 +(dp601 +S'hg19' +p602 +(dp603 +g33 +S'NC_000017.10:g.7578195_7578197del' +p604 +sg35 +(dp605 +g37 +g38 +sg39 +S'GCAC' +p606 +sg41 +S'7578194' +p607 +sg43 +g44 +sssg45 +(dp608 +g33 +S'NC_000017.11:g.7674877_7674879del' +p609 +sg35 +(dp610 +g37 +g38 +sg39 +S'GCAC' +p611 +sg41 +S'7674876' +p612 +sg43 +g44 +sssS'grch37' +p613 +(dp614 +g33 +S'NC_000017.10:g.7578195_7578197del' +p615 +sg35 +(dp616 +g37 +g55 +sg39 +S'GCAC' +p617 +sg41 +S'7578194' +p618 +sg43 +g44 +sssS'grch38' +p619 +(dp620 +g33 +S'NC_000017.11:g.7674877_7674879del' +p621 +sg35 +(dp622 +g37 +g55 +sg39 +S'GCAC' +p623 +sg41 +S'7674876' +p624 +sg43 +g44 +ssssg64 +(dp625 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3' +p626 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5' +p627 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant251.txt b/VariantValidator/testing/testOutputsMasterITS/variant251.txt new file mode 100644 index 00000000..67533096 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant251.txt @@ -0,0 +1,1870 @@ +(dp0 +S'NM_001276760.1:c.289dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA +p13 +sS'gene_symbol' +p14 +S'TP53' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001263689.1:p.(Gln97ProfsTer13)' +p19 +sS'slr' +p20 +S'NP_001263689.1:p.(Q97Pfs*13)' +p21 +ssS'submitted_variant' +p22 +S'17-7578523-T-TG' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001276760.1:c.289dup' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000017.10:g.7578524dup' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr17' +p38 +sS'ref' +p39 +S'G' +p40 +sS'pos' +p41 +S'7578524' +p42 +sS'alt' +p43 +VGG +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000017.11:g.7675206dup' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p49 +sg43 +VGG +p50 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000017.10:g.7578524dup' +p53 +sg35 +(dp54 +g37 +S'17' +p55 +sg39 +g40 +sg41 +S'7578524' +p56 +sg43 +VGG +p57 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000017.11:g.7675206dup' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p62 +sg43 +VGG +p63 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1' +p69 +sssS'NM_001126118.1:c.289dup' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p73 +aS'RefSeqGene record not available' +p74 +asg9 +g4 +sg10 +(lp75 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 8, mRNA +p76 +sg14 +S'TP53' +p77 +sg16 +(dp78 +g18 +S'NP_001119590.1:p.(Gln97ProfsTer13)' +p79 +sg20 +S'NP_001119590.1:p.(Q97Pfs*13)' +p80 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126118.1:c.289dup' +p81 +sg28 +g4 +sg29 +(dp82 +S'hg19' +p83 +(dp84 +g33 +S'NC_000017.10:g.7578524dup' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p87 +sg43 +VGG +p88 +sssg45 +(dp89 +g33 +S'NC_000017.11:g.7675206dup' +p90 +sg35 +(dp91 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p92 +sg43 +VGG +p93 +sssS'grch37' +p94 +(dp95 +g33 +S'NC_000017.10:g.7578524dup' +p96 +sg35 +(dp97 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p98 +sg43 +VGG +p99 +sssS'grch38' +p100 +(dp101 +g33 +S'NC_000017.11:g.7675206dup' +p102 +sg35 +(dp103 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p104 +sg43 +VGG +p105 +ssssg64 +(dp106 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1' +p107 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1' +p108 +sssS'NM_001276695.1:c.289dup' +p109 +(dp110 +g3 +g4 +sg5 +(lp111 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p112 +aS'RefSeqGene record not available' +p113 +asg9 +g4 +sg10 +(lp114 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA +p115 +sg14 +S'TP53' +p116 +sg16 +(dp117 +g18 +S'NP_001263624.1:p.(Gln97ProfsTer13)' +p118 +sg20 +S'NP_001263624.1:p.(Q97Pfs*13)' +p119 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276695.1:c.289dup' +p120 +sg28 +g4 +sg29 +(dp121 +S'hg19' +p122 +(dp123 +g33 +S'NC_000017.10:g.7578524dup' +p124 +sg35 +(dp125 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p126 +sg43 +VGG +p127 +sssg45 +(dp128 +g33 +S'NC_000017.11:g.7675206dup' +p129 +sg35 +(dp130 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p131 +sg43 +VGG +p132 +sssS'grch37' +p133 +(dp134 +g33 +S'NC_000017.10:g.7578524dup' +p135 +sg35 +(dp136 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p137 +sg43 +VGG +p138 +sssS'grch38' +p139 +(dp140 +g33 +S'NC_000017.11:g.7675206dup' +p141 +sg35 +(dp142 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p143 +sg43 +VGG +p144 +ssssg64 +(dp145 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1' +p146 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1' +p147 +sssS'NM_001276699.1:c.-72dup' +p148 +(dp149 +g3 +g4 +sg5 +(lp150 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p151 +aS'RefSeqGene record not available' +p152 +asg9 +g4 +sg10 +(lp153 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA +p154 +sg14 +S'TP53' +p155 +sg16 +(dp156 +g18 +S'NP_001263628.1:p.?' +p157 +sg20 +S'NP_001263628.1:p.?' +p158 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276699.1:c.-72dup' +p159 +sg28 +g4 +sg29 +(dp160 +S'hg19' +p161 +(dp162 +g33 +S'NC_000017.10:g.7578524dup' +p163 +sg35 +(dp164 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p165 +sg43 +VGG +p166 +sssg45 +(dp167 +g33 +S'NC_000017.11:g.7675206dup' +p168 +sg35 +(dp169 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p170 +sg43 +VGG +p171 +sssS'grch37' +p172 +(dp173 +g33 +S'NC_000017.10:g.7578524dup' +p174 +sg35 +(dp175 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p176 +sg43 +VGG +p177 +sssS'grch38' +p178 +(dp179 +g33 +S'NC_000017.11:g.7675206dup' +p180 +sg35 +(dp181 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p182 +sg43 +VGG +p183 +ssssg64 +(dp184 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1' +p185 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1' +p186 +sssS'NM_001126115.1:c.10dup' +p187 +(dp188 +g3 +g4 +sg5 +(lp189 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p190 +aS'RefSeqGene record not available' +p191 +asg9 +g4 +sg10 +(lp192 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA +p193 +sg14 +S'TP53' +p194 +sg16 +(dp195 +g18 +S'NP_001119587.1:p.(Gln4ProfsTer13)' +p196 +sg20 +S'NP_001119587.1:p.(Q4Pfs*13)' +p197 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126115.1:c.10dup' +p198 +sg28 +g4 +sg29 +(dp199 +S'hg19' +p200 +(dp201 +g33 +S'NC_000017.10:g.7578524dup' +p202 +sg35 +(dp203 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p204 +sg43 +VGG +p205 +sssg45 +(dp206 +g33 +S'NC_000017.11:g.7675206dup' +p207 +sg35 +(dp208 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p209 +sg43 +VGG +p210 +sssS'grch37' +p211 +(dp212 +g33 +S'NC_000017.10:g.7578524dup' +p213 +sg35 +(dp214 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p215 +sg43 +VGG +p216 +sssS'grch38' +p217 +(dp218 +g33 +S'NC_000017.11:g.7675206dup' +p219 +sg35 +(dp220 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p221 +sg43 +VGG +p222 +ssssg64 +(dp223 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1' +p224 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1' +p225 +sssS'NM_001276697.1:c.-72dup' +p226 +(dp227 +g3 +g4 +sg5 +(lp228 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p229 +aS'RefSeqGene record not available' +p230 +asg9 +g4 +sg10 +(lp231 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA +p232 +sg14 +S'TP53' +p233 +sg16 +(dp234 +g18 +S'NP_001263626.1:p.?' +p235 +sg20 +S'NP_001263626.1:p.?' +p236 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276697.1:c.-72dup' +p237 +sg28 +g4 +sg29 +(dp238 +S'hg19' +p239 +(dp240 +g33 +S'NC_000017.10:g.7578524dup' +p241 +sg35 +(dp242 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p243 +sg43 +VGG +p244 +sssg45 +(dp245 +g33 +S'NC_000017.11:g.7675206dup' +p246 +sg35 +(dp247 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p248 +sg43 +VGG +p249 +sssS'grch37' +p250 +(dp251 +g33 +S'NC_000017.10:g.7578524dup' +p252 +sg35 +(dp253 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p254 +sg43 +VGG +p255 +sssS'grch38' +p256 +(dp257 +g33 +S'NC_000017.11:g.7675206dup' +p258 +sg35 +(dp259 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p260 +sg43 +VGG +p261 +ssssg64 +(dp262 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1' +p263 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1' +p264 +sssS'NM_001126117.1:c.10dup' +p265 +(dp266 +g3 +g4 +sg5 +(lp267 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p268 +aS'RefSeqGene record not available' +p269 +asg9 +g4 +sg10 +(lp270 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA +p271 +sg14 +S'TP53' +p272 +sg16 +(dp273 +g18 +S'NP_001119589.1:p.(Gln4ProfsTer13)' +p274 +sg20 +S'NP_001119589.1:p.(Q4Pfs*13)' +p275 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126117.1:c.10dup' +p276 +sg28 +g4 +sg29 +(dp277 +S'hg19' +p278 +(dp279 +g33 +S'NC_000017.10:g.7578524dup' +p280 +sg35 +(dp281 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p282 +sg43 +VGG +p283 +sssg45 +(dp284 +g33 +S'NC_000017.11:g.7675206dup' +p285 +sg35 +(dp286 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p287 +sg43 +VGG +p288 +sssS'grch37' +p289 +(dp290 +g33 +S'NC_000017.10:g.7578524dup' +p291 +sg35 +(dp292 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p293 +sg43 +VGG +p294 +sssS'grch38' +p295 +(dp296 +g33 +S'NC_000017.11:g.7675206dup' +p297 +sg35 +(dp298 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p299 +sg43 +VGG +p300 +ssssg64 +(dp301 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1' +p302 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1' +p303 +sssS'NM_000546.5:c.406dup' +p304 +(dp305 +g3 +g4 +sg5 +(lp306 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p307 +aS'RefSeqGene record not available' +p308 +asg9 +g4 +sg10 +(lp309 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA +p310 +sg14 +S'TP53' +p311 +sg16 +(dp312 +g18 +S'NP_000537.3:p.(Gln136ProfsTer13)' +p313 +sg20 +S'NP_000537.3:p.(Q136Pfs*13)' +p314 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_000546.5:c.406dup' +p315 +sg28 +g4 +sg29 +(dp316 +S'hg19' +p317 +(dp318 +g33 +S'NC_000017.10:g.7578524dup' +p319 +sg35 +(dp320 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p321 +sg43 +VGG +p322 +sssg45 +(dp323 +g33 +S'NC_000017.11:g.7675206dup' +p324 +sg35 +(dp325 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p326 +sg43 +VGG +p327 +sssS'grch37' +p328 +(dp329 +g33 +S'NC_000017.10:g.7578524dup' +p330 +sg35 +(dp331 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p332 +sg43 +VGG +p333 +sssS'grch38' +p334 +(dp335 +g33 +S'NC_000017.11:g.7675206dup' +p336 +sg35 +(dp337 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p338 +sg43 +VGG +p339 +ssssg64 +(dp340 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3' +p341 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5' +p342 +sssS'flag' +p343 +S'gene_variant' +p344 +sS'NM_001276696.1:c.289dup' +p345 +(dp346 +g3 +g4 +sg5 +(lp347 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p348 +aS'RefSeqGene record not available' +p349 +asg9 +g4 +sg10 +(lp350 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA +p351 +sg14 +S'TP53' +p352 +sg16 +(dp353 +g18 +S'NP_001263625.1:p.(Gln97ProfsTer13)' +p354 +sg20 +S'NP_001263625.1:p.(Q97Pfs*13)' +p355 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276696.1:c.289dup' +p356 +sg28 +g4 +sg29 +(dp357 +S'hg19' +p358 +(dp359 +g33 +S'NC_000017.10:g.7578524dup' +p360 +sg35 +(dp361 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p362 +sg43 +VGG +p363 +sssg45 +(dp364 +g33 +S'NC_000017.11:g.7675206dup' +p365 +sg35 +(dp366 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p367 +sg43 +VGG +p368 +sssS'grch37' +p369 +(dp370 +g33 +S'NC_000017.10:g.7578524dup' +p371 +sg35 +(dp372 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p373 +sg43 +VGG +p374 +sssS'grch38' +p375 +(dp376 +g33 +S'NC_000017.11:g.7675206dup' +p377 +sg35 +(dp378 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p379 +sg43 +VGG +p380 +ssssg64 +(dp381 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1' +p382 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1' +p383 +sssS'NM_001276698.1:c.-72dup' +p384 +(dp385 +g3 +g4 +sg5 +(lp386 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p387 +aS'RefSeqGene record not available' +p388 +asg9 +g4 +sg10 +(lp389 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA +p390 +sg14 +S'TP53' +p391 +sg16 +(dp392 +g18 +S'NP_001263627.1:p.?' +p393 +sg20 +S'NP_001263627.1:p.?' +p394 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276698.1:c.-72dup' +p395 +sg28 +g4 +sg29 +(dp396 +S'hg19' +p397 +(dp398 +g33 +S'NC_000017.10:g.7578524dup' +p399 +sg35 +(dp400 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p401 +sg43 +VGG +p402 +sssg45 +(dp403 +g33 +S'NC_000017.11:g.7675206dup' +p404 +sg35 +(dp405 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p406 +sg43 +VGG +p407 +sssS'grch37' +p408 +(dp409 +g33 +S'NC_000017.10:g.7578524dup' +p410 +sg35 +(dp411 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p412 +sg43 +VGG +p413 +sssS'grch38' +p414 +(dp415 +g33 +S'NC_000017.11:g.7675206dup' +p416 +sg35 +(dp417 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p418 +sg43 +VGG +p419 +ssssg64 +(dp420 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1' +p421 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1' +p422 +sssS'NM_001126114.2:c.406dup' +p423 +(dp424 +g3 +g4 +sg5 +(lp425 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p426 +aS'RefSeqGene record not available' +p427 +asg9 +g4 +sg10 +(lp428 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA +p429 +sg14 +S'TP53' +p430 +sg16 +(dp431 +g18 +S'NP_001119586.1:p.(Gln136ProfsTer13)' +p432 +sg20 +S'NP_001119586.1:p.(Q136Pfs*13)' +p433 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126114.2:c.406dup' +p434 +sg28 +g4 +sg29 +(dp435 +S'hg19' +p436 +(dp437 +g33 +S'NC_000017.10:g.7578524dup' +p438 +sg35 +(dp439 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p440 +sg43 +VGG +p441 +sssg45 +(dp442 +g33 +S'NC_000017.11:g.7675206dup' +p443 +sg35 +(dp444 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p445 +sg43 +VGG +p446 +sssS'grch37' +p447 +(dp448 +g33 +S'NC_000017.10:g.7578524dup' +p449 +sg35 +(dp450 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p451 +sg43 +VGG +p452 +sssS'grch38' +p453 +(dp454 +g33 +S'NC_000017.11:g.7675206dup' +p455 +sg35 +(dp456 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p457 +sg43 +VGG +p458 +ssssg64 +(dp459 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1' +p460 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2' +p461 +sssS'NM_001276761.1:c.289dup' +p462 +(dp463 +g3 +g4 +sg5 +(lp464 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p465 +aS'RefSeqGene record not available' +p466 +asg9 +g4 +sg10 +(lp467 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA +p468 +sg14 +S'TP53' +p469 +sg16 +(dp470 +g18 +S'NP_001263690.1:p.(Gln97ProfsTer13)' +p471 +sg20 +S'NP_001263690.1:p.(Q97Pfs*13)' +p472 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001276761.1:c.289dup' +p473 +sg28 +g4 +sg29 +(dp474 +S'hg19' +p475 +(dp476 +g33 +S'NC_000017.10:g.7578524dup' +p477 +sg35 +(dp478 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p479 +sg43 +VGG +p480 +sssg45 +(dp481 +g33 +S'NC_000017.11:g.7675206dup' +p482 +sg35 +(dp483 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p484 +sg43 +VGG +p485 +sssS'grch37' +p486 +(dp487 +g33 +S'NC_000017.10:g.7578524dup' +p488 +sg35 +(dp489 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p490 +sg43 +VGG +p491 +sssS'grch38' +p492 +(dp493 +g33 +S'NC_000017.11:g.7675206dup' +p494 +sg35 +(dp495 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p496 +sg43 +VGG +p497 +ssssg64 +(dp498 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1' +p499 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1' +p500 +sssS'NM_001126113.2:c.406dup' +p501 +(dp502 +g3 +g4 +sg5 +(lp503 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p504 +aS'RefSeqGene record not available' +p505 +asg9 +g4 +sg10 +(lp506 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA +p507 +sg14 +S'TP53' +p508 +sg16 +(dp509 +g18 +S'NP_001119585.1:p.(Gln136ProfsTer13)' +p510 +sg20 +S'NP_001119585.1:p.(Q136Pfs*13)' +p511 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126113.2:c.406dup' +p512 +sg28 +g4 +sg29 +(dp513 +S'hg19' +p514 +(dp515 +g33 +S'NC_000017.10:g.7578524dup' +p516 +sg35 +(dp517 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p518 +sg43 +VGG +p519 +sssg45 +(dp520 +g33 +S'NC_000017.11:g.7675206dup' +p521 +sg35 +(dp522 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p523 +sg43 +VGG +p524 +sssS'grch37' +p525 +(dp526 +g33 +S'NC_000017.10:g.7578524dup' +p527 +sg35 +(dp528 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p529 +sg43 +VGG +p530 +sssS'grch38' +p531 +(dp532 +g33 +S'NC_000017.11:g.7675206dup' +p533 +sg35 +(dp534 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p535 +sg43 +VGG +p536 +ssssg64 +(dp537 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1' +p538 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2' +p539 +sssS'NM_001126116.1:c.10dup' +p540 +(dp541 +g3 +g4 +sg5 +(lp542 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p543 +aS'RefSeqGene record not available' +p544 +asg9 +g4 +sg10 +(lp545 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA +p546 +sg14 +S'TP53' +p547 +sg16 +(dp548 +g18 +S'NP_001119588.1:p.(Gln4ProfsTer13)' +p549 +sg20 +S'NP_001119588.1:p.(Q4Pfs*13)' +p550 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126116.1:c.10dup' +p551 +sg28 +g4 +sg29 +(dp552 +S'hg19' +p553 +(dp554 +g33 +S'NC_000017.10:g.7578524dup' +p555 +sg35 +(dp556 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p557 +sg43 +VGG +p558 +sssg45 +(dp559 +g33 +S'NC_000017.11:g.7675206dup' +p560 +sg35 +(dp561 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p562 +sg43 +VGG +p563 +sssS'grch37' +p564 +(dp565 +g33 +S'NC_000017.10:g.7578524dup' +p566 +sg35 +(dp567 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p568 +sg43 +VGG +p569 +sssS'grch38' +p570 +(dp571 +g33 +S'NC_000017.11:g.7675206dup' +p572 +sg35 +(dp573 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p574 +sg43 +VGG +p575 +ssssg64 +(dp576 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1' +p577 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1' +p578 +sssS'metadata' +p579 +(dp580 +S'variantvalidator_hgvs_version' +p581 +S'1.1.3' +p582 +sS'uta_schema' +p583 +S'uta_20180821' +p584 +sS'seqrepo_db' +p585 +S'2018-08-21' +p586 +sS'variantvalidator_version' +p587 +S'v0.2' +p588 +ssS'NM_001126112.2:c.406dup' +p589 +(dp590 +g3 +g4 +sg5 +(lp591 +S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' +p592 +aS'RefSeqGene record not available' +p593 +asg9 +g4 +sg10 +(lp594 +sg12 +VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA +p595 +sg14 +S'TP53' +p596 +sg16 +(dp597 +g18 +S'NP_001119584.1:p.(Gln136ProfsTer13)' +p598 +sg20 +S'NP_001119584.1:p.(Q136Pfs*13)' +p599 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001126112.2:c.406dup' +p600 +sg28 +g4 +sg29 +(dp601 +S'hg19' +p602 +(dp603 +g33 +S'NC_000017.10:g.7578524dup' +p604 +sg35 +(dp605 +g37 +g38 +sg39 +g40 +sg41 +S'7578524' +p606 +sg43 +VGG +p607 +sssg45 +(dp608 +g33 +S'NC_000017.11:g.7675206dup' +p609 +sg35 +(dp610 +g37 +g38 +sg39 +g40 +sg41 +S'7675206' +p611 +sg43 +VGG +p612 +sssS'grch37' +p613 +(dp614 +g33 +S'NC_000017.10:g.7578524dup' +p615 +sg35 +(dp616 +g37 +g55 +sg39 +g40 +sg41 +S'7578524' +p617 +sg43 +VGG +p618 +sssS'grch38' +p619 +(dp620 +g33 +S'NC_000017.11:g.7675206dup' +p621 +sg35 +(dp622 +g37 +g55 +sg39 +g40 +sg41 +S'7675206' +p623 +sg43 +VGG +p624 +ssssg64 +(dp625 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1' +p626 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2' +p627 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant252.txt b/VariantValidator/testing/testOutputsMasterITS/variant252.txt new file mode 100644 index 00000000..86e02399 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant252.txt @@ -0,0 +1,642 @@ +(dp0 +S'NM_144997.6:c.1300+2T>G' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens folliculin (FLCN), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'FLCN' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_659434.2:p.?' +p18 +sS'slr' +p19 +S'NP_659434.2:p.?' +p20 +ssS'submitted_variant' +p21 +S'17-17119692-A-C' +p22 +sS'genome_context_intronic_sequence' +p23 +S'NC_000017.10(NM_144997.6):c.1300+2T>G' +p24 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_144997.6:c.1300+2T>G' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000017.10:g.17119692A>C' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr17' +p38 +sS'ref' +p39 +VA +p40 +sS'pos' +p41 +S'17119692' +p42 +sS'alt' +p43 +VC +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000017.11:g.17216378A>C' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'17216378' +p49 +sg43 +g44 +sssS'grch37' +p50 +(dp51 +g33 +S'NC_000017.10:g.17119692A>C' +p52 +sg35 +(dp53 +g37 +S'17' +p54 +sg39 +g40 +sg41 +S'17119692' +p55 +sg43 +g44 +sssS'grch38' +p56 +(dp57 +g33 +S'NC_000017.11:g.17216378A>C' +p58 +sg35 +(dp59 +g37 +g54 +sg39 +g40 +sg41 +S'17216378' +p60 +sg43 +g44 +ssssS'reference_sequence_records' +p61 +(dp62 +S'protein' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2' +p64 +sS'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.6' +p66 +sssS'NM_001353230.1:c.1300+2T>G' +p67 +(dp68 +g3 +g4 +sg5 +(lp69 +S'RefSeqGene record not available' +p70 +asg8 +g4 +sg9 +(lp71 +sg11 +VHomo sapiens folliculin (FLCN), transcript variant 4, mRNA +p72 +sg13 +S'FLCN' +p73 +sg15 +(dp74 +g17 +S'NP_001340159.1:p.?' +p75 +sg19 +S'NP_001340159.1:p.?' +p76 +ssg21 +g22 +sg23 +S'NC_000017.10(NM_001353230.1):c.1300+2T>G' +p77 +sg25 +g4 +sg26 +S'NM_001353230.1:c.1300+2T>G' +p78 +sg28 +g4 +sg29 +(dp79 +S'hg19' +p80 +(dp81 +g33 +S'NC_000017.10:g.17119692A>C' +p82 +sg35 +(dp83 +g37 +g38 +sg39 +g40 +sg41 +S'17119692' +p84 +sg43 +g44 +sssg45 +(dp85 +g33 +S'NC_000017.11:g.17216378A>C' +p86 +sg35 +(dp87 +g37 +g38 +sg39 +g40 +sg41 +S'17216378' +p88 +sg43 +g44 +sssS'grch37' +p89 +(dp90 +g33 +S'NC_000017.10:g.17119692A>C' +p91 +sg35 +(dp92 +g37 +g54 +sg39 +g40 +sg41 +S'17119692' +p93 +sg43 +g44 +sssS'grch38' +p94 +(dp95 +g33 +S'NC_000017.11:g.17216378A>C' +p96 +sg35 +(dp97 +g37 +g54 +sg39 +g40 +sg41 +S'17216378' +p98 +sg43 +g44 +ssssg61 +(dp99 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340159.1' +p100 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353230.1' +p101 +sssS'NM_001353229.1:c.1354+2T>G' +p102 +(dp103 +g3 +g4 +sg5 +(lp104 +S'RefSeqGene record not available' +p105 +asg8 +g4 +sg9 +(lp106 +sg11 +VHomo sapiens folliculin (FLCN), transcript variant 3, mRNA +p107 +sg13 +S'FLCN' +p108 +sg15 +(dp109 +g17 +S'NP_001340158.1:p.?' +p110 +sg19 +S'NP_001340158.1:p.?' +p111 +ssg21 +g22 +sg23 +S'NC_000017.10(NM_001353229.1):c.1354+2T>G' +p112 +sg25 +g4 +sg26 +S'NM_001353229.1:c.1354+2T>G' +p113 +sg28 +g4 +sg29 +(dp114 +S'hg19' +p115 +(dp116 +g33 +S'NC_000017.10:g.17119692A>C' +p117 +sg35 +(dp118 +g37 +g38 +sg39 +g40 +sg41 +S'17119692' +p119 +sg43 +g44 +sssg45 +(dp120 +g33 +S'NC_000017.11:g.17216378A>C' +p121 +sg35 +(dp122 +g37 +g38 +sg39 +g40 +sg41 +S'17216378' +p123 +sg43 +g44 +sssS'grch37' +p124 +(dp125 +g33 +S'NC_000017.10:g.17119692A>C' +p126 +sg35 +(dp127 +g37 +g54 +sg39 +g40 +sg41 +S'17119692' +p128 +sg43 +g44 +sssS'grch38' +p129 +(dp130 +g33 +S'NC_000017.11:g.17216378A>C' +p131 +sg35 +(dp132 +g37 +g54 +sg39 +g40 +sg41 +S'17216378' +p133 +sg43 +g44 +ssssg61 +(dp134 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340158.1' +p135 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353229.1' +p136 +sssS'flag' +p137 +S'gene_variant' +p138 +sS'NM_144997.5:c.1300+2T>G' +p139 +(dp140 +g3 +g4 +sg5 +(lp141 +S'A more recent version of the selected reference sequence NM_144997.5 is available (NM_144997.6)' +p142 +aS'NM_144997.6:c.1300+2T>G MUST be fully validated prior to use in reports' +p143 +aS'select_variants=NM_144997.6:c.1300+2T>G' +p144 +aS'RefSeqGene record not available' +p145 +asg8 +g4 +sg9 +(lp146 +sg11 +VHomo sapiens folliculin (FLCN), transcript variant 1, mRNA +p147 +sg13 +S'FLCN' +p148 +sg15 +(dp149 +g17 +S'NP_659434.2:p.?' +p150 +sg19 +S'NP_659434.2:p.?' +p151 +ssg21 +g22 +sg23 +S'NC_000017.10(NM_144997.5):c.1300+2T>G' +p152 +sg25 +g4 +sg26 +S'NM_144997.5:c.1300+2T>G' +p153 +sg28 +g4 +sg29 +(dp154 +S'hg19' +p155 +(dp156 +g33 +S'NC_000017.10:g.17119692A>C' +p157 +sg35 +(dp158 +g37 +g38 +sg39 +g40 +sg41 +S'17119692' +p159 +sg43 +g44 +sssg45 +(dp160 +g33 +S'NC_000017.11:g.17216378A>C' +p161 +sg35 +(dp162 +g37 +g38 +sg39 +g40 +sg41 +S'17216378' +p163 +sg43 +g44 +sssS'grch37' +p164 +(dp165 +g33 +S'NC_000017.10:g.17119692A>C' +p166 +sg35 +(dp167 +g37 +g54 +sg39 +g40 +sg41 +S'17119692' +p168 +sg43 +g44 +sssS'grch38' +p169 +(dp170 +g33 +S'NC_000017.11:g.17216378A>C' +p171 +sg35 +(dp172 +g37 +g54 +sg39 +g40 +sg41 +S'17216378' +p173 +sg43 +g44 +ssssg61 +(dp174 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2' +p175 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.5' +p176 +sssS'NM_001353231.1:c.1300+2T>G' +p177 +(dp178 +g3 +g4 +sg5 +(lp179 +S'RefSeqGene record not available' +p180 +asg8 +g4 +sg9 +(lp181 +sg11 +VHomo sapiens folliculin (FLCN), transcript variant 5, mRNA +p182 +sg13 +S'FLCN' +p183 +sg15 +(dp184 +g17 +S'NP_001340160.1:p.?' +p185 +sg19 +S'NP_001340160.1:p.?' +p186 +ssg21 +g22 +sg23 +S'NC_000017.10(NM_001353231.1):c.1300+2T>G' +p187 +sg25 +g4 +sg26 +S'NM_001353231.1:c.1300+2T>G' +p188 +sg28 +g4 +sg29 +(dp189 +S'hg19' +p190 +(dp191 +g33 +S'NC_000017.10:g.17119692A>C' +p192 +sg35 +(dp193 +g37 +g38 +sg39 +g40 +sg41 +S'17119692' +p194 +sg43 +g44 +sssg45 +(dp195 +g33 +S'NC_000017.11:g.17216378A>C' +p196 +sg35 +(dp197 +g37 +g38 +sg39 +g40 +sg41 +S'17216378' +p198 +sg43 +g44 +sssS'grch37' +p199 +(dp200 +g33 +S'NC_000017.10:g.17119692A>C' +p201 +sg35 +(dp202 +g37 +g54 +sg39 +g40 +sg41 +S'17119692' +p203 +sg43 +g44 +sssS'grch38' +p204 +(dp205 +g33 +S'NC_000017.11:g.17216378A>C' +p206 +sg35 +(dp207 +g37 +g54 +sg39 +g40 +sg41 +S'17216378' +p208 +sg43 +g44 +ssssg61 +(dp209 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340160.1' +p210 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353231.1' +p211 +sssS'metadata' +p212 +(dp213 +S'variantvalidator_hgvs_version' +p214 +S'1.1.3' +p215 +sS'uta_schema' +p216 +S'uta_20180821' +p217 +sS'seqrepo_db' +p218 +S'2018-08-21' +p219 +sS'variantvalidator_version' +p220 +S'v0.2' +p221 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant253.txt b/VariantValidator/testing/testOutputsMasterITS/variant253.txt new file mode 100644 index 00000000..d9d8f736 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant253.txt @@ -0,0 +1,777 @@ +(dp0 +S'NM_007294.3:c.*103_*106del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 1, mRNA +p13 +sS'gene_symbol' +p14 +S'BRCA1' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_009225.1:p.?' +p19 +sS'slr' +p20 +S'NP_009225.1:p.?' +p21 +ssS'submitted_variant' +p22 +S'17-41197588-GGACA-G' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_007294.3:c.*103_*106del' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000017.10:g.41197589_41197592del' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr17' +p38 +sS'ref' +p39 +S'GGACA' +p40 +sS'pos' +p41 +S'41197588' +p42 +sS'alt' +p43 +S'G' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000017.11:g.43045572_43045575del' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'GGACA' +p49 +sg41 +S'43045571' +p50 +sg43 +g44 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000017.10:g.41197589_41197592del' +p53 +sg35 +(dp54 +g37 +S'17' +p55 +sg39 +S'GGACA' +p56 +sg41 +S'41197588' +p57 +sg43 +g44 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000017.11:g.43045572_43045575del' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'GGACA' +p62 +sg41 +S'43045571' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3' +p69 +sssS'NM_007297.3:c.*103_*106del' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' +p73 +aS'RefSeqGene record not available' +p74 +asg9 +g4 +sg10 +(lp75 +sg12 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 3, mRNA +p76 +sg14 +S'BRCA1' +p77 +sg16 +(dp78 +g18 +S'NP_009228.2:p.?' +p79 +sg20 +S'NP_009228.2:p.?' +p80 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_007297.3:c.*103_*106del' +p81 +sg28 +g4 +sg29 +(dp82 +S'hg19' +p83 +(dp84 +g33 +S'NC_000017.10:g.41197589_41197592del' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +S'GGACA' +p87 +sg41 +S'41197588' +p88 +sg43 +g44 +sssg45 +(dp89 +g33 +S'NC_000017.11:g.43045572_43045575del' +p90 +sg35 +(dp91 +g37 +g38 +sg39 +S'GGACA' +p92 +sg41 +S'43045571' +p93 +sg43 +g44 +sssS'grch37' +p94 +(dp95 +g33 +S'NC_000017.10:g.41197589_41197592del' +p96 +sg35 +(dp97 +g37 +g55 +sg39 +S'GGACA' +p98 +sg41 +S'41197588' +p99 +sg43 +g44 +sssS'grch38' +p100 +(dp101 +g33 +S'NC_000017.11:g.43045572_43045575del' +p102 +sg35 +(dp103 +g37 +g55 +sg39 +S'GGACA' +p104 +sg41 +S'43045571' +p105 +sg43 +g44 +ssssg64 +(dp106 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2' +p107 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3' +p108 +sssS'NR_027676.1:n.5831_5834del' +p109 +(dp110 +g3 +g4 +sg5 +(lp111 +S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' +p112 +aS'RefSeqGene record not available' +p113 +asg9 +g4 +sg10 +(lp114 +sg12 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 6, non-coding RNA +p115 +sg14 +S'BRCA1' +p116 +sg16 +(dp117 +g18 +S'Non-coding :n.' +p118 +sg20 +g118 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NR_027676.1:n.5831_5834del' +p119 +sg28 +g4 +sg29 +(dp120 +S'hg19' +p121 +(dp122 +g33 +S'NC_000017.10:g.41197589_41197592del' +p123 +sg35 +(dp124 +g37 +g38 +sg39 +S'GGACA' +p125 +sg41 +S'41197588' +p126 +sg43 +g44 +sssg45 +(dp127 +g33 +S'NC_000017.11:g.43045572_43045575del' +p128 +sg35 +(dp129 +g37 +g38 +sg39 +S'GGACA' +p130 +sg41 +S'43045571' +p131 +sg43 +g44 +sssS'grch37' +p132 +(dp133 +g33 +S'NC_000017.10:g.41197589_41197592del' +p134 +sg35 +(dp135 +g37 +g55 +sg39 +S'GGACA' +p136 +sg41 +S'41197588' +p137 +sg43 +g44 +sssS'grch38' +p138 +(dp139 +g33 +S'NC_000017.11:g.43045572_43045575del' +p140 +sg35 +(dp141 +g37 +g55 +sg39 +S'GGACA' +p142 +sg41 +S'43045571' +p143 +sg43 +g44 +ssssg64 +(dp144 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_027676.1' +p145 +sssS'NM_007300.3:c.*103_*106del' +p146 +(dp147 +g3 +g4 +sg5 +(lp148 +S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' +p149 +aS'RefSeqGene record not available' +p150 +asg9 +g4 +sg10 +(lp151 +sg12 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 2, mRNA +p152 +sg14 +S'BRCA1' +p153 +sg16 +(dp154 +g18 +S'NP_009231.2:p.?' +p155 +sg20 +S'NP_009231.2:p.?' +p156 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_007300.3:c.*103_*106del' +p157 +sg28 +g4 +sg29 +(dp158 +S'hg19' +p159 +(dp160 +g33 +S'NC_000017.10:g.41197589_41197592del' +p161 +sg35 +(dp162 +g37 +g38 +sg39 +S'GGACA' +p163 +sg41 +S'41197588' +p164 +sg43 +g44 +sssg45 +(dp165 +g33 +S'NC_000017.11:g.43045572_43045575del' +p166 +sg35 +(dp167 +g37 +g38 +sg39 +S'GGACA' +p168 +sg41 +S'43045571' +p169 +sg43 +g44 +sssS'grch37' +p170 +(dp171 +g33 +S'NC_000017.10:g.41197589_41197592del' +p172 +sg35 +(dp173 +g37 +g55 +sg39 +S'GGACA' +p174 +sg41 +S'41197588' +p175 +sg43 +g44 +sssS'grch38' +p176 +(dp177 +g33 +S'NC_000017.11:g.43045572_43045575del' +p178 +sg35 +(dp179 +g37 +g55 +sg39 +S'GGACA' +p180 +sg41 +S'43045571' +p181 +sg43 +g44 +ssssg64 +(dp182 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2' +p183 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3' +p184 +sssS'flag' +p185 +S'gene_variant' +p186 +sS'NM_007299.3:c.*209_*212del' +p187 +(dp188 +g3 +g4 +sg5 +(lp189 +S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' +p190 +aS'RefSeqGene record not available' +p191 +asg9 +g4 +sg10 +(lp192 +sg12 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 5, mRNA +p193 +sg14 +S'BRCA1' +p194 +sg16 +(dp195 +g18 +S'NP_009230.2:p.?' +p196 +sg20 +S'NP_009230.2:p.?' +p197 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_007299.3:c.*209_*212del' +p198 +sg28 +g4 +sg29 +(dp199 +S'hg19' +p200 +(dp201 +g33 +S'NC_000017.10:g.41197589_41197592del' +p202 +sg35 +(dp203 +g37 +g38 +sg39 +S'GGACA' +p204 +sg41 +S'41197588' +p205 +sg43 +g44 +sssg45 +(dp206 +g33 +S'NC_000017.11:g.43045572_43045575del' +p207 +sg35 +(dp208 +g37 +g38 +sg39 +S'GGACA' +p209 +sg41 +S'43045571' +p210 +sg43 +g44 +sssS'grch37' +p211 +(dp212 +g33 +S'NC_000017.10:g.41197589_41197592del' +p213 +sg35 +(dp214 +g37 +g55 +sg39 +S'GGACA' +p215 +sg41 +S'41197588' +p216 +sg43 +g44 +sssS'grch38' +p217 +(dp218 +g33 +S'NC_000017.11:g.43045572_43045575del' +p219 +sg35 +(dp220 +g37 +g55 +sg39 +S'GGACA' +p221 +sg41 +S'43045571' +p222 +sg43 +g44 +ssssg64 +(dp223 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2' +p224 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3' +p225 +sssS'metadata' +p226 +(dp227 +S'variantvalidator_hgvs_version' +p228 +S'1.1.3' +p229 +sS'uta_schema' +p230 +S'uta_20180821' +p231 +sS'seqrepo_db' +p232 +S'2018-08-21' +p233 +sS'variantvalidator_version' +p234 +S'v0.2' +p235 +ssS'NM_007298.3:c.*103_*106del' +p236 +(dp237 +g3 +g4 +sg5 +(lp238 +S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' +p239 +aS'RefSeqGene record not available' +p240 +asg9 +g4 +sg10 +(lp241 +sg12 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 4, mRNA +p242 +sg14 +S'BRCA1' +p243 +sg16 +(dp244 +g18 +S'NP_009229.2:p.?' +p245 +sg20 +S'NP_009229.2:p.?' +p246 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_007298.3:c.*103_*106del' +p247 +sg28 +g4 +sg29 +(dp248 +S'hg19' +p249 +(dp250 +g33 +S'NC_000017.10:g.41197589_41197592del' +p251 +sg35 +(dp252 +g37 +g38 +sg39 +S'GGACA' +p253 +sg41 +S'41197588' +p254 +sg43 +g44 +sssg45 +(dp255 +g33 +S'NC_000017.11:g.43045572_43045575del' +p256 +sg35 +(dp257 +g37 +g38 +sg39 +S'GGACA' +p258 +sg41 +S'43045571' +p259 +sg43 +g44 +sssS'grch37' +p260 +(dp261 +g33 +S'NC_000017.10:g.41197589_41197592del' +p262 +sg35 +(dp263 +g37 +g55 +sg39 +S'GGACA' +p264 +sg41 +S'41197588' +p265 +sg43 +g44 +sssS'grch38' +p266 +(dp267 +g33 +S'NC_000017.11:g.43045572_43045575del' +p268 +sg35 +(dp269 +g37 +g55 +sg39 +S'GGACA' +p270 +sg41 +S'43045571' +p271 +sg43 +g44 +ssssg64 +(dp272 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2' +p273 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3' +p274 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant254.txt b/VariantValidator/testing/testOutputsMasterITS/variant254.txt new file mode 100644 index 00000000..58431335 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant254.txt @@ -0,0 +1,748 @@ +(dp0 +S'NM_007299.3:c.301+1G>C' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 5, mRNA +p12 +sS'gene_symbol' +p13 +S'BRCA1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_009230.2:p.?' +p18 +sS'slr' +p19 +S'NP_009230.2:p.?' +p20 +ssS'submitted_variant' +p21 +S'17-41256884-C-G' +p22 +sS'genome_context_intronic_sequence' +p23 +S'NC_000017.10(NM_007299.3):c.301+1G>C' +p24 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_007299.3:c.301+1G>C' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000017.10:g.41256884C>G' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr17' +p38 +sS'ref' +p39 +VC +p40 +sS'pos' +p41 +S'41256884' +p42 +sS'alt' +p43 +VG +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000017.11:g.43104867C>G' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'43104867' +p49 +sg43 +g44 +sssS'grch37' +p50 +(dp51 +g33 +S'NC_000017.10:g.41256884C>G' +p52 +sg35 +(dp53 +g37 +S'17' +p54 +sg39 +g40 +sg41 +S'41256884' +p55 +sg43 +g44 +sssS'grch38' +p56 +(dp57 +g33 +S'NC_000017.11:g.43104867C>G' +p58 +sg35 +(dp59 +g37 +g54 +sg39 +g40 +sg41 +S'43104867' +p60 +sg43 +g44 +ssssS'reference_sequence_records' +p61 +(dp62 +S'protein' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2' +p64 +sS'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3' +p66 +sssS'NR_027676.1:n.440+1G>C' +p67 +(dp68 +g3 +g4 +sg5 +(lp69 +S'RefSeqGene record not available' +p70 +asg8 +g4 +sg9 +(lp71 +sg11 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 6, non-coding RNA +p72 +sg13 +S'BRCA1' +p73 +sg15 +(dp74 +g17 +S'Non-coding :n.' +p75 +sg19 +g75 +ssg21 +g22 +sg23 +S'NC_000017.10(NR_027676.1):c.440+1G>C' +p76 +sg25 +g4 +sg26 +S'NR_027676.1:n.440+1G>C' +p77 +sg28 +g4 +sg29 +(dp78 +S'hg19' +p79 +(dp80 +g33 +S'NC_000017.10:g.41256884C>G' +p81 +sg35 +(dp82 +g37 +g38 +sg39 +g40 +sg41 +S'41256884' +p83 +sg43 +g44 +sssg45 +(dp84 +g33 +S'NC_000017.11:g.43104867C>G' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +g40 +sg41 +S'43104867' +p87 +sg43 +g44 +sssS'grch37' +p88 +(dp89 +g33 +S'NC_000017.10:g.41256884C>G' +p90 +sg35 +(dp91 +g37 +g54 +sg39 +g40 +sg41 +S'41256884' +p92 +sg43 +g44 +sssS'grch38' +p93 +(dp94 +g33 +S'NC_000017.11:g.43104867C>G' +p95 +sg35 +(dp96 +g37 +g54 +sg39 +g40 +sg41 +S'43104867' +p97 +sg43 +g44 +ssssg61 +(dp98 +g65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_027676.1' +p99 +sssS'NM_007300.3:c.301+1G>C' +p100 +(dp101 +g3 +g4 +sg5 +(lp102 +S'RefSeqGene record not available' +p103 +asg8 +g4 +sg9 +(lp104 +sg11 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 2, mRNA +p105 +sg13 +S'BRCA1' +p106 +sg15 +(dp107 +g17 +S'NP_009231.2:p.?' +p108 +sg19 +S'NP_009231.2:p.?' +p109 +ssg21 +g22 +sg23 +S'NC_000017.10(NM_007300.3):c.301+1G>C' +p110 +sg25 +g4 +sg26 +S'NM_007300.3:c.301+1G>C' +p111 +sg28 +g4 +sg29 +(dp112 +S'hg19' +p113 +(dp114 +g33 +S'NC_000017.10:g.41256884C>G' +p115 +sg35 +(dp116 +g37 +g38 +sg39 +g40 +sg41 +S'41256884' +p117 +sg43 +g44 +sssg45 +(dp118 +g33 +S'NC_000017.11:g.43104867C>G' +p119 +sg35 +(dp120 +g37 +g38 +sg39 +g40 +sg41 +S'43104867' +p121 +sg43 +g44 +sssS'grch37' +p122 +(dp123 +g33 +S'NC_000017.10:g.41256884C>G' +p124 +sg35 +(dp125 +g37 +g54 +sg39 +g40 +sg41 +S'41256884' +p126 +sg43 +g44 +sssS'grch38' +p127 +(dp128 +g33 +S'NC_000017.11:g.43104867C>G' +p129 +sg35 +(dp130 +g37 +g54 +sg39 +g40 +sg41 +S'43104867' +p131 +sg43 +g44 +ssssg61 +(dp132 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2' +p133 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3' +p134 +sssS'NM_007298.3:c.301+1G>C' +p135 +(dp136 +g3 +g4 +sg5 +(lp137 +S'RefSeqGene record not available' +p138 +asg8 +g4 +sg9 +(lp139 +sg11 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 4, mRNA +p140 +sg13 +S'BRCA1' +p141 +sg15 +(dp142 +g17 +S'NP_009229.2:p.?' +p143 +sg19 +S'NP_009229.2:p.?' +p144 +ssg21 +g22 +sg23 +S'NC_000017.10(NM_007298.3):c.301+1G>C' +p145 +sg25 +g4 +sg26 +S'NM_007298.3:c.301+1G>C' +p146 +sg28 +g4 +sg29 +(dp147 +S'hg19' +p148 +(dp149 +g33 +S'NC_000017.10:g.41256884C>G' +p150 +sg35 +(dp151 +g37 +g38 +sg39 +g40 +sg41 +S'41256884' +p152 +sg43 +g44 +sssg45 +(dp153 +g33 +S'NC_000017.11:g.43104867C>G' +p154 +sg35 +(dp155 +g37 +g38 +sg39 +g40 +sg41 +S'43104867' +p156 +sg43 +g44 +sssS'grch37' +p157 +(dp158 +g33 +S'NC_000017.10:g.41256884C>G' +p159 +sg35 +(dp160 +g37 +g54 +sg39 +g40 +sg41 +S'41256884' +p161 +sg43 +g44 +sssS'grch38' +p162 +(dp163 +g33 +S'NC_000017.11:g.43104867C>G' +p164 +sg35 +(dp165 +g37 +g54 +sg39 +g40 +sg41 +S'43104867' +p166 +sg43 +g44 +ssssg61 +(dp167 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2' +p168 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3' +p169 +sssS'NM_007297.3:c.160+1G>C' +p170 +(dp171 +g3 +g4 +sg5 +(lp172 +S'RefSeqGene record not available' +p173 +asg8 +g4 +sg9 +(lp174 +sg11 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 3, mRNA +p175 +sg13 +S'BRCA1' +p176 +sg15 +(dp177 +g17 +S'NP_009228.2:p.?' +p178 +sg19 +S'NP_009228.2:p.?' +p179 +ssg21 +g22 +sg23 +S'NC_000017.10(NM_007297.3):c.160+1G>C' +p180 +sg25 +g4 +sg26 +S'NM_007297.3:c.160+1G>C' +p181 +sg28 +g4 +sg29 +(dp182 +S'hg19' +p183 +(dp184 +g33 +S'NC_000017.10:g.41256884C>G' +p185 +sg35 +(dp186 +g37 +g38 +sg39 +g40 +sg41 +S'41256884' +p187 +sg43 +g44 +sssg45 +(dp188 +g33 +S'NC_000017.11:g.43104867C>G' +p189 +sg35 +(dp190 +g37 +g38 +sg39 +g40 +sg41 +S'43104867' +p191 +sg43 +g44 +sssS'grch37' +p192 +(dp193 +g33 +S'NC_000017.10:g.41256884C>G' +p194 +sg35 +(dp195 +g37 +g54 +sg39 +g40 +sg41 +S'41256884' +p196 +sg43 +g44 +sssS'grch38' +p197 +(dp198 +g33 +S'NC_000017.11:g.43104867C>G' +p199 +sg35 +(dp200 +g37 +g54 +sg39 +g40 +sg41 +S'43104867' +p201 +sg43 +g44 +ssssg61 +(dp202 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2' +p203 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3' +p204 +sssS'flag' +p205 +S'gene_variant' +p206 +sS'NM_007294.3:c.301+1G>C' +p207 +(dp208 +g3 +g4 +sg5 +(lp209 +S'RefSeqGene record not available' +p210 +asg8 +g4 +sg9 +(lp211 +sg11 +VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 1, mRNA +p212 +sg13 +S'BRCA1' +p213 +sg15 +(dp214 +g17 +S'NP_009225.1:p.?' +p215 +sg19 +S'NP_009225.1:p.?' +p216 +ssg21 +g22 +sg23 +S'NC_000017.10(NM_007294.3):c.301+1G>C' +p217 +sg25 +g4 +sg26 +S'NM_007294.3:c.301+1G>C' +p218 +sg28 +g4 +sg29 +(dp219 +S'hg19' +p220 +(dp221 +g33 +S'NC_000017.10:g.41256884C>G' +p222 +sg35 +(dp223 +g37 +g38 +sg39 +g40 +sg41 +S'41256884' +p224 +sg43 +g44 +sssg45 +(dp225 +g33 +S'NC_000017.11:g.43104867C>G' +p226 +sg35 +(dp227 +g37 +g38 +sg39 +g40 +sg41 +S'43104867' +p228 +sg43 +g44 +sssS'grch37' +p229 +(dp230 +g33 +S'NC_000017.10:g.41256884C>G' +p231 +sg35 +(dp232 +g37 +g54 +sg39 +g40 +sg41 +S'41256884' +p233 +sg43 +g44 +sssS'grch38' +p234 +(dp235 +g33 +S'NC_000017.11:g.43104867C>G' +p236 +sg35 +(dp237 +g37 +g54 +sg39 +g40 +sg41 +S'43104867' +p238 +sg43 +g44 +ssssg61 +(dp239 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1' +p240 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3' +p241 +sssS'metadata' +p242 +(dp243 +S'variantvalidator_hgvs_version' +p244 +S'1.1.3' +p245 +sS'uta_schema' +p246 +S'uta_20180821' +p247 +sS'seqrepo_db' +p248 +S'2018-08-21' +p249 +sS'variantvalidator_version' +p250 +S'v0.2' +p251 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant255.txt b/VariantValidator/testing/testOutputsMasterITS/variant255.txt new file mode 100644 index 00000000..cdbcd27b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant255.txt @@ -0,0 +1,483 @@ +(dp0 +S'NM_001363846.1:c.490G>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 4, mRNA +p12 +sS'gene_symbol' +p13 +S'GFAP' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001350775.1:p.(Glu164Ter)' +p18 +sS'slr' +p19 +S'NP_001350775.1:p.(E164*)' +p20 +ssS'submitted_variant' +p21 +S'17-42991428-C-A' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001363846.1:c.490G>T' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000017.10:g.42991428C>A' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr17' +p37 +sS'ref' +p38 +VC +p39 +sS'pos' +p40 +S'42991428' +p41 +sS'alt' +p42 +VA +p43 +sssS'grch37' +p44 +(dp45 +g32 +S'NC_000017.10:g.42991428C>A' +p46 +sg34 +(dp47 +g36 +S'17' +p48 +sg38 +g39 +sg40 +S'42991428' +p49 +sg42 +g43 +ssssS'reference_sequence_records' +p50 +(dp51 +S'protein' +p52 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350775.1' +p53 +sS'transcript' +p54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363846.1' +p55 +sssS'NM_001131019.2:c.490G>T' +p56 +(dp57 +g3 +g4 +sg5 +(lp58 +S'RefSeqGene record not available' +p59 +asg8 +g4 +sg9 +(lp60 +sg11 +VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 2, mRNA +p61 +sg13 +S'GFAP' +p62 +sg15 +(dp63 +g17 +S'NP_001124491.1:p.(Glu164Ter)' +p64 +sg19 +S'NP_001124491.1:p.(E164*)' +p65 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001131019.2:c.490G>T' +p66 +sg27 +g4 +sg28 +(dp67 +S'hg19' +p68 +(dp69 +g32 +S'NC_000017.10:g.42991428C>A' +p70 +sg34 +(dp71 +g36 +g37 +sg38 +g39 +sg40 +S'42991428' +p72 +sg42 +g43 +sssS'hg38' +p73 +(dp74 +g32 +S'NC_000017.11:g.44914060C>A' +p75 +sg34 +(dp76 +g36 +g37 +sg38 +g39 +sg40 +S'44914060' +p77 +sg42 +g43 +sssS'grch37' +p78 +(dp79 +g32 +S'NC_000017.10:g.42991428C>A' +p80 +sg34 +(dp81 +g36 +g48 +sg38 +g39 +sg40 +S'42991428' +p82 +sg42 +g43 +sssS'grch38' +p83 +(dp84 +g32 +S'NC_000017.11:g.44914060C>A' +p85 +sg34 +(dp86 +g36 +g48 +sg38 +g39 +sg40 +S'44914060' +p87 +sg42 +g43 +ssssg50 +(dp88 +g52 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124491.1' +p89 +sg54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001131019.2' +p90 +sssS'flag' +p91 +S'gene_variant' +p92 +sS'NM_001242376.1:c.490G>T' +p93 +(dp94 +g3 +g4 +sg5 +(lp95 +S'RefSeqGene record not available' +p96 +asg8 +g4 +sg9 +(lp97 +sg11 +VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 3, mRNA +p98 +sg13 +S'GFAP' +p99 +sg15 +(dp100 +g17 +S'NP_001229305.1:p.(Glu164Ter)' +p101 +sg19 +S'NP_001229305.1:p.(E164*)' +p102 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001242376.1:c.490G>T' +p103 +sg27 +g4 +sg28 +(dp104 +S'hg19' +p105 +(dp106 +g32 +S'NC_000017.10:g.42991428C>A' +p107 +sg34 +(dp108 +g36 +g37 +sg38 +g39 +sg40 +S'42991428' +p109 +sg42 +g43 +sssg73 +(dp110 +g32 +S'NC_000017.11:g.44914060C>A' +p111 +sg34 +(dp112 +g36 +g37 +sg38 +g39 +sg40 +S'44914060' +p113 +sg42 +g43 +sssS'grch37' +p114 +(dp115 +g32 +S'NC_000017.10:g.42991428C>A' +p116 +sg34 +(dp117 +g36 +g48 +sg38 +g39 +sg40 +S'42991428' +p118 +sg42 +g43 +sssS'grch38' +p119 +(dp120 +g32 +S'NC_000017.11:g.44914060C>A' +p121 +sg34 +(dp122 +g36 +g48 +sg38 +g39 +sg40 +S'44914060' +p123 +sg42 +g43 +ssssg50 +(dp124 +g52 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001229305.1' +p125 +sg54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001242376.1' +p126 +sssS'NM_002055.4:c.490G>T' +p127 +(dp128 +g3 +g4 +sg5 +(lp129 +S'RefSeqGene record not available' +p130 +asg8 +g4 +sg9 +(lp131 +sg11 +VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 1, mRNA +p132 +sg13 +S'GFAP' +p133 +sg15 +(dp134 +g17 +S'NP_002046.1:p.(Glu164Ter)' +p135 +sg19 +S'NP_002046.1:p.(E164*)' +p136 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_002055.4:c.490G>T' +p137 +sg27 +g4 +sg28 +(dp138 +S'hg19' +p139 +(dp140 +g32 +S'NC_000017.10:g.42991428C>A' +p141 +sg34 +(dp142 +g36 +g37 +sg38 +g39 +sg40 +S'42991428' +p143 +sg42 +g43 +sssg73 +(dp144 +g32 +S'NC_000017.11:g.44914060C>A' +p145 +sg34 +(dp146 +g36 +g37 +sg38 +g39 +sg40 +S'44914060' +p147 +sg42 +g43 +sssS'grch37' +p148 +(dp149 +g32 +S'NC_000017.10:g.42991428C>A' +p150 +sg34 +(dp151 +g36 +g48 +sg38 +g39 +sg40 +S'42991428' +p152 +sg42 +g43 +sssS'grch38' +p153 +(dp154 +g32 +S'NC_000017.11:g.44914060C>A' +p155 +sg34 +(dp156 +g36 +g48 +sg38 +g39 +sg40 +S'44914060' +p157 +sg42 +g43 +ssssg50 +(dp158 +g52 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002046.1' +p159 +sg54 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002055.4' +p160 +sssS'metadata' +p161 +(dp162 +S'variantvalidator_hgvs_version' +p163 +S'1.1.3' +p164 +sS'uta_schema' +p165 +S'uta_20180821' +p166 +sS'seqrepo_db' +p167 +S'2018-08-21' +p168 +sS'variantvalidator_version' +p169 +S'v0.2' +p170 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant256.txt b/VariantValidator/testing/testOutputsMasterITS/variant256.txt new file mode 100644 index 00000000..f30fee0b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant256.txt @@ -0,0 +1,573 @@ +(dp0 +S'NM_001135697.1:c.*11A>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'A more recent version of the selected reference sequence NM_001135697.1 is available (NM_001135697.2)' +p7 +aS'NM_001135697.2:c.*11A>T MUST be fully validated prior to use in reports' +p8 +aS'select_variants=NM_001135697.2:c.*11A>T' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g4 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 2, mRNA +p15 +sS'gene_symbol' +p16 +S'SGCA' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_001129169.1:p.?' +p21 +sS'slr' +p22 +S'NP_001129169.1:p.?' +p23 +ssS'submitted_variant' +p24 +S'17-48252809-A-T' +p25 +sS'genome_context_intronic_sequence' +p26 +g4 +sS'hgvs_lrg_variant' +p27 +g4 +sS'hgvs_transcript_variant' +p28 +S'NM_001135697.1:c.*11A>T' +p29 +sS'hgvs_refseqgene_variant' +p30 +g4 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000017.10:g.48252809A>T' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr17' +p40 +sS'ref' +p41 +S'A' +p42 +sS'pos' +p43 +S'48252809' +p44 +sS'alt' +p45 +S'T' +p46 +sssS'grch37' +p47 +(dp48 +g35 +S'NC_000017.10:g.48252809A>T' +p49 +sg37 +(dp50 +g39 +S'17' +p51 +sg41 +g42 +sg43 +S'48252809' +p52 +sg45 +g46 +ssssS'reference_sequence_records' +p53 +(dp54 +S'protein' +p55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1' +p56 +sS'transcript' +p57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.1' +p58 +sssS'flag' +p59 +S'gene_variant' +p60 +sS'NR_135553.1:n.1022A>T' +p61 +(dp62 +g3 +g4 +sg5 +(lp63 +S'RefSeqGene record not available' +p64 +asg11 +g4 +sg12 +(lp65 +sg14 +VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 3, non-coding RNA +p66 +sg16 +S'SGCA' +p67 +sg18 +(dp68 +g20 +S'Non-coding :n.' +p69 +sg22 +g69 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NR_135553.1:n.1022A>T' +p70 +sg30 +g4 +sg31 +(dp71 +S'hg19' +p72 +(dp73 +g35 +S'NC_000017.10:g.48252809A>T' +p74 +sg37 +(dp75 +g39 +g40 +sg41 +g42 +sg43 +S'48252809' +p76 +sg45 +g46 +sssS'hg38' +p77 +(dp78 +g35 +S'NC_000017.11:g.50175448A>T' +p79 +sg37 +(dp80 +g39 +g40 +sg41 +g42 +sg43 +S'50175448' +p81 +sg45 +g46 +sssS'grch37' +p82 +(dp83 +g35 +S'NC_000017.10:g.48252809A>T' +p84 +sg37 +(dp85 +g39 +g51 +sg41 +g42 +sg43 +S'48252809' +p86 +sg45 +g46 +sssS'grch38' +p87 +(dp88 +g35 +S'NC_000017.11:g.50175448A>T' +p89 +sg37 +(dp90 +g39 +g51 +sg41 +g42 +sg43 +S'50175448' +p91 +sg45 +g46 +ssssg53 +(dp92 +g57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_135553.1' +p93 +sssS'NM_001135697.2:c.*11A>T' +p94 +(dp95 +g3 +g4 +sg5 +(lp96 +S'RefSeqGene record not available' +p97 +asg11 +g4 +sg12 +(lp98 +sg14 +VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 2, mRNA +p99 +sg16 +S'SGCA' +p100 +sg18 +(dp101 +g20 +S'NP_001129169.1:p.?' +p102 +sg22 +S'NP_001129169.1:p.?' +p103 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_001135697.2:c.*11A>T' +p104 +sg30 +g4 +sg31 +(dp105 +S'hg19' +p106 +(dp107 +g35 +S'NC_000017.10:g.48252809A>T' +p108 +sg37 +(dp109 +g39 +g40 +sg41 +g42 +sg43 +S'48252809' +p110 +sg45 +g46 +sssg77 +(dp111 +g35 +S'NC_000017.11:g.50175448A>T' +p112 +sg37 +(dp113 +g39 +g40 +sg41 +g42 +sg43 +S'50175448' +p114 +sg45 +g46 +sssS'grch37' +p115 +(dp116 +g35 +S'NC_000017.10:g.48252809A>T' +p117 +sg37 +(dp118 +g39 +g51 +sg41 +g42 +sg43 +S'48252809' +p119 +sg45 +g46 +sssS'grch38' +p120 +(dp121 +g35 +S'NC_000017.11:g.50175448A>T' +p122 +sg37 +(dp123 +g39 +g51 +sg41 +g42 +sg43 +S'50175448' +p124 +sg45 +g46 +ssssg53 +(dp125 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1' +p126 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.2' +p127 +sssS'NM_000023.3:c.*11A>T' +p128 +(dp129 +g3 +g4 +sg5 +(lp130 +S'RefSeqGene record not available' +p131 +asg11 +g4 +sg12 +(lp132 +sg14 +VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 1, mRNA +p133 +sg16 +S'SGCA' +p134 +sg18 +(dp135 +g20 +S'NP_000014.1:p.?' +p136 +sg22 +S'NP_000014.1:p.?' +p137 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_000023.3:c.*11A>T' +p138 +sg30 +g4 +sg31 +(dp139 +S'hg19' +p140 +(dp141 +g35 +S'NC_000017.10:g.48252809A>T' +p142 +sg37 +(dp143 +g39 +g40 +sg41 +g42 +sg43 +S'48252809' +p144 +sg45 +g46 +sssg77 +(dp145 +g35 +S'NC_000017.11:g.50175448A>T' +p146 +sg37 +(dp147 +g39 +g40 +sg41 +g42 +sg43 +S'50175448' +p148 +sg45 +g46 +sssS'grch37' +p149 +(dp150 +g35 +S'NC_000017.10:g.48252809A>T' +p151 +sg37 +(dp152 +g39 +g51 +sg41 +g42 +sg43 +S'48252809' +p153 +sg45 +g46 +sssS'grch38' +p154 +(dp155 +g35 +S'NC_000017.11:g.50175448A>T' +p156 +sg37 +(dp157 +g39 +g51 +sg41 +g42 +sg43 +S'50175448' +p158 +sg45 +g46 +ssssg53 +(dp159 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000014.1' +p160 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000023.3' +p161 +sssS'NM_000023.2:c.*11A>T' +p162 +(dp163 +g3 +g4 +sg5 +(lp164 +S'A more recent version of the selected reference sequence NM_000023.2 is available (NM_000023.3)' +p165 +aS'NM_000023.3:c.*11A>T MUST be fully validated prior to use in reports' +p166 +aS'select_variants=NM_000023.3:c.*11A>T' +p167 +aS'RefSeqGene record not available' +p168 +asg11 +g4 +sg12 +(lp169 +sg14 +VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 1, mRNA +p170 +sg16 +S'SGCA' +p171 +sg18 +(dp172 +g20 +S'NP_000014.1:p.?' +p173 +sg22 +S'NP_000014.1:p.?' +p174 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_000023.2:c.*11A>T' +p175 +sg30 +g4 +sg31 +(dp176 +S'hg19' +p177 +(dp178 +g35 +S'NC_000017.10:g.48252809A>T' +p179 +sg37 +(dp180 +g39 +g40 +sg41 +g42 +sg43 +S'48252809' +p181 +sg45 +g46 +sssS'grch37' +p182 +(dp183 +g35 +S'NC_000017.10:g.48252809A>T' +p184 +sg37 +(dp185 +g39 +g51 +sg41 +g42 +sg43 +S'48252809' +p186 +sg45 +g46 +ssssg53 +(dp187 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000014.1' +p188 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000023.2' +p189 +sssS'metadata' +p190 +(dp191 +S'variantvalidator_hgvs_version' +p192 +S'1.1.3' +p193 +sS'uta_schema' +p194 +S'uta_20180821' +p195 +sS'seqrepo_db' +p196 +S'2018-08-21' +p197 +sS'variantvalidator_version' +p198 +S'v0.2' +p199 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant257.txt b/VariantValidator/testing/testOutputsMasterITS/variant257.txt new file mode 100644 index 00000000..34d5b066 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant257.txt @@ -0,0 +1,180 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000334.4:c.3720+9_3720+10dup' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000017.10:g.62022709G>GTC automapped to NC_000017.10:g.62022710_62022711dupTC' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA +p15 +sS'gene_symbol' +p16 +S'SCN4A' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000325.4:p.?' +p21 +sS'slr' +p22 +S'NP_000325.4:p.?' +p23 +ssS'submitted_variant' +p24 +S'17-62022709-G-GTC' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000334.4):c.3720+9_3720+10dup' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000334.4:c.3720+9_3720+10dup' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.62022710_62022711dup' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'TC' +p43 +sS'pos' +p44 +S'62022710' +p45 +sS'alt' +p46 +S'TCTC' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.63945350_63945351dup' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'TC' +p52 +sg44 +S'63945350' +p53 +sg46 +S'TCTC' +p54 +sssS'grch37' +p55 +(dp56 +g36 +S'NC_000017.10:g.62022710_62022711dup' +p57 +sg38 +(dp58 +g40 +S'17' +p59 +sg42 +S'TC' +p60 +sg44 +S'62022710' +p61 +sg46 +S'TCTC' +p62 +sssS'grch38' +p63 +(dp64 +g36 +S'NC_000017.11:g.63945350_63945351dup' +p65 +sg38 +(dp66 +g40 +g59 +sg42 +S'TC' +p67 +sg44 +S'63945350' +p68 +sg46 +S'TCTC' +p69 +ssssS'reference_sequence_records' +p70 +(dp71 +S'protein' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4' +p73 +sS'transcript' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4' +p75 +sssS'metadata' +p76 +(dp77 +S'variantvalidator_hgvs_version' +p78 +S'1.1.3' +p79 +sS'uta_schema' +p80 +S'uta_20180821' +p81 +sS'seqrepo_db' +p82 +S'2018-08-21' +p83 +sS'variantvalidator_version' +p84 +S'v0.2' +p85 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant258.txt b/VariantValidator/testing/testOutputsMasterITS/variant258.txt new file mode 100644 index 00000000..1ee1bcfb --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant258.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_000334.4:c.3720+8_3720+9insA' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'NC_000017.10:g.62022711C>CT automapped to NC_000017.10:g.62022711_62022712insT' +p19 +aS'RefSeqGene record not available' +p20 +asS'refseqgene_context_intronic_sequence' +p21 +g16 +sS'alt_genomic_loci' +p22 +(lp23 +sS'transcript_description' +p24 +VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA +p25 +sS'gene_symbol' +p26 +S'SCN4A' +p27 +sS'hgvs_predicted_protein_consequence' +p28 +(dp29 +S'tlr' +p30 +S'NP_000325.4:p.?' +p31 +sS'slr' +p32 +S'NP_000325.4:p.?' +p33 +ssS'submitted_variant' +p34 +S'17-62022711-C-CT' +p35 +sS'genome_context_intronic_sequence' +p36 +S'NC_000017.10(NM_000334.4):c.3720+8_3720+9insA' +p37 +sS'hgvs_lrg_variant' +p38 +g16 +sS'hgvs_transcript_variant' +p39 +S'NM_000334.4:c.3720+8_3720+9insA' +p40 +sS'hgvs_refseqgene_variant' +p41 +g16 +sS'primary_assembly_loci' +p42 +(dp43 +S'hg19' +p44 +(dp45 +S'hgvs_genomic_description' +p46 +S'NC_000017.10:g.62022711_62022712insT' +p47 +sS'vcf' +p48 +(dp49 +S'chr' +p50 +S'chr17' +p51 +sS'ref' +p52 +S'C' +p53 +sS'pos' +p54 +S'62022711' +p55 +sS'alt' +p56 +VCT +p57 +sssS'hg38' +p58 +(dp59 +g46 +S'NC_000017.11:g.63945351_63945352insT' +p60 +sg48 +(dp61 +g50 +g51 +sg52 +g53 +sg54 +S'63945351' +p62 +sg56 +VCT +p63 +sssS'grch37' +p64 +(dp65 +g46 +S'NC_000017.10:g.62022711_62022712insT' +p66 +sg48 +(dp67 +g50 +S'17' +p68 +sg52 +g53 +sg54 +S'62022711' +p69 +sg56 +VCT +p70 +sssS'grch38' +p71 +(dp72 +g46 +S'NC_000017.11:g.63945351_63945352insT' +p73 +sg48 +(dp74 +g50 +g68 +sg52 +g53 +sg54 +S'63945351' +p75 +sg56 +VCT +p76 +ssssS'reference_sequence_records' +p77 +(dp78 +S'protein' +p79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4' +p80 +sS'transcript' +p81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4' +p82 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant259.txt b/VariantValidator/testing/testOutputsMasterITS/variant259.txt new file mode 100644 index 00000000..9ec16ffc --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant259.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000334.4:c.3442-8_3442-7insGC' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000017.10:g.62023005G>GGC automapped to NC_000017.10:g.62023005_62023006insGC' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA +p15 +sS'gene_symbol' +p16 +S'SCN4A' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000325.4:p.?' +p21 +sS'slr' +p22 +S'NP_000325.4:p.?' +p23 +ssS'submitted_variant' +p24 +S'17-62023005-G-GGC' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000334.4):c.3442-8_3442-7insGC' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000334.4:c.3442-8_3442-7insGC' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.62023005_62023006insGC' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'G' +p43 +sS'pos' +p44 +S'62023005' +p45 +sS'alt' +p46 +VGGC +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.63945645_63945646insGC' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +g43 +sg44 +S'63945645' +p52 +sg46 +VGGC +p53 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.62023005_62023006insGC' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +g43 +sg44 +S'62023005' +p59 +sg46 +VGGC +p60 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.63945645_63945646insGC' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +g43 +sg44 +S'63945645' +p65 +sg46 +VGGC +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant26.txt b/VariantValidator/testing/testOutputsMasterITS/variant26.txt new file mode 100644 index 00000000..dcc46545 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant26.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The given coordinate is outside the bounds of the reference sequence.' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NR_138595.1:n.-810C>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant260.txt b/VariantValidator/testing/testOutputsMasterITS/variant260.txt new file mode 100644 index 00000000..7945a902 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant260.txt @@ -0,0 +1,172 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_000334.4:c.3442-8G>T' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA +p24 +sS'gene_symbol' +p25 +S'SCN4A' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_000325.4:p.?' +p30 +sS'slr' +p31 +S'NP_000325.4:p.?' +p32 +ssS'submitted_variant' +p33 +S'17-62023006-C-A' +p34 +sS'genome_context_intronic_sequence' +p35 +S'NC_000017.10(NM_000334.4):c.3442-8G>T' +p36 +sS'hgvs_lrg_variant' +p37 +g16 +sS'hgvs_transcript_variant' +p38 +S'NM_000334.4:c.3442-8G>T' +p39 +sS'hgvs_refseqgene_variant' +p40 +g16 +sS'primary_assembly_loci' +p41 +(dp42 +S'hg19' +p43 +(dp44 +S'hgvs_genomic_description' +p45 +S'NC_000017.10:g.62023006C>A' +p46 +sS'vcf' +p47 +(dp48 +S'chr' +p49 +S'chr17' +p50 +sS'ref' +p51 +VC +p52 +sS'pos' +p53 +S'62023006' +p54 +sS'alt' +p55 +VA +p56 +sssS'hg38' +p57 +(dp58 +g45 +S'NC_000017.11:g.63945646C>A' +p59 +sg47 +(dp60 +g49 +g50 +sg51 +g52 +sg53 +S'63945646' +p61 +sg55 +g56 +sssS'grch37' +p62 +(dp63 +g45 +S'NC_000017.10:g.62023006C>A' +p64 +sg47 +(dp65 +g49 +S'17' +p66 +sg51 +g52 +sg53 +S'62023006' +p67 +sg55 +g56 +sssS'grch38' +p68 +(dp69 +g45 +S'NC_000017.11:g.63945646C>A' +p70 +sg47 +(dp71 +g49 +g66 +sg51 +g52 +sg53 +S'63945646' +p72 +sg55 +g56 +ssssS'reference_sequence_records' +p73 +(dp74 +S'protein' +p75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4' +p76 +sS'transcript' +p77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4' +p78 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant261.txt b/VariantValidator/testing/testOutputsMasterITS/variant261.txt new file mode 100644 index 00000000..9384931b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant261.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000334.4:c.2111C>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA +p14 +sS'gene_symbol' +p15 +S'SCN4A' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000325.4:p.(Thr704Met)' +p20 +sS'slr' +p21 +S'NP_000325.4:p.(T704M)' +p22 +ssS'submitted_variant' +p23 +S'17-62034787-G-A' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000334.4:c.2111C>T' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000017.10:g.62034787G>A' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr17' +p39 +sS'ref' +p40 +VG +p41 +sS'pos' +p42 +S'62034787' +p43 +sS'alt' +p44 +VA +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000017.11:g.63957427G>A' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'63957427' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000017.10:g.62034787G>A' +p53 +sg36 +(dp54 +g38 +S'17' +p55 +sg40 +g41 +sg42 +S'62034787' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000017.11:g.63957427G>A' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'63957427' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4' +p67 +sssS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant262.txt b/VariantValidator/testing/testOutputsMasterITS/variant262.txt new file mode 100644 index 00000000..517193d1 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant262.txt @@ -0,0 +1,914 @@ +(dp0 +S'NM_001351443.1:c.-16+941_-16+946del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 6, mRNA +p13 +sS'gene_symbol' +p14 +S'KCTD1' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001338372.1:p.?' +p19 +sS'slr' +p20 +S'NP_001338372.1:p.?' +p21 +ssS'submitted_variant' +p22 +S'18-24128261-GTCCTCC-G' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000018.9(NM_001351443.1):c.-16+941_-16+946del' +p25 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_001351443.1:c.-16+941_-16+946del' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'grch38' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000018.10:g.26548298_26548303del' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'18' +p39 +sS'ref' +p40 +S'GTCCTCC' +p41 +sS'pos' +p42 +S'26548297' +p43 +sS'alt' +p44 +S'G' +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000018.9:g.24128262_24128267del' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'GTCCTCC' +p50 +sg42 +S'24128261' +p51 +sg44 +g45 +sssS'hg38' +p52 +(dp53 +g34 +S'NC_000018.10:g.26548298_26548303del' +p54 +sg36 +(dp55 +g38 +S'chr18' +p56 +sg40 +S'GTCCTCC' +p57 +sg42 +S'26548297' +p58 +sg44 +g45 +sssS'hg19' +p59 +(dp60 +g34 +S'NC_000018.9:g.24128262_24128267del' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +S'GTCCTCC' +p63 +sg42 +S'24128261' +p64 +sg44 +g45 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338372.1' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351443.1' +p70 +sssS'NM_001258222.1:c.10-47053_10-47048del' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' +p74 +aS'A more recent version of the selected reference sequence NM_001258222.1 is available (NM_001258222.2)' +p75 +aS'NM_001258222.2:c.10-47053_10-47048del MUST be fully validated prior to use in reports' +p76 +aS'select_variants=NM_001258222.2:c.10-47053_10-47048del' +p77 +aS'RefSeqGene record not available' +p78 +asg9 +g4 +sg10 +(lp79 +sg12 +VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 5, mRNA +p80 +sg14 +S'KCTD1' +p81 +sg16 +(dp82 +g18 +S'NP_001245151.1:p.?' +p83 +sg20 +S'NP_001245151.1:p.?' +p84 +ssg22 +g23 +sg24 +S'NC_000018.9(NM_001258222.1):c.10-47053_10-47048del' +p85 +sg26 +g4 +sg27 +S'NM_001258222.1:c.10-47053_10-47048del' +p86 +sg29 +g4 +sg30 +(dp87 +S'grch38' +p88 +(dp89 +g34 +S'NC_000018.10:g.26548298_26548303del' +p90 +sg36 +(dp91 +g38 +g39 +sg40 +S'GTCCTCC' +p92 +sg42 +S'26548297' +p93 +sg44 +g45 +sssS'grch37' +p94 +(dp95 +g34 +S'NC_000018.9:g.24128262_24128267del' +p96 +sg36 +(dp97 +g38 +g39 +sg40 +S'GTCCTCC' +p98 +sg42 +S'24128261' +p99 +sg44 +g45 +sssg52 +(dp100 +g34 +S'NC_000018.10:g.26548298_26548303del' +p101 +sg36 +(dp102 +g38 +g56 +sg40 +S'GTCCTCC' +p103 +sg42 +S'26548297' +p104 +sg44 +g45 +sssS'hg19' +p105 +(dp106 +g34 +S'NC_000018.9:g.24128262_24128267del' +p107 +sg36 +(dp108 +g38 +g56 +sg40 +S'GTCCTCC' +p109 +sg42 +S'24128261' +p110 +sg44 +g45 +ssssg65 +(dp111 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1' +p112 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.1' +p113 +sssS'NM_001258221.1:c.-16+1426_-16+1431del' +p114 +(dp115 +g3 +g4 +sg5 +(lp116 +S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' +p117 +aS'RefSeqGene record not available' +p118 +asg9 +g4 +sg10 +(lp119 +sg12 +VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 4, mRNA +p120 +sg14 +S'KCTD1' +p121 +sg16 +(dp122 +g18 +S'NP_001245150.1:p.?' +p123 +sg20 +S'NP_001245150.1:p.?' +p124 +ssg22 +g23 +sg24 +S'NC_000018.9(NM_001258221.1):c.-16+1426_-16+1431del' +p125 +sg26 +g4 +sg27 +S'NM_001258221.1:c.-16+1426_-16+1431del' +p126 +sg29 +g4 +sg30 +(dp127 +S'grch38' +p128 +(dp129 +g34 +S'NC_000018.10:g.26548298_26548303del' +p130 +sg36 +(dp131 +g38 +g39 +sg40 +S'GTCCTCC' +p132 +sg42 +S'26548297' +p133 +sg44 +g45 +sssS'grch37' +p134 +(dp135 +g34 +S'NC_000018.9:g.24128262_24128267del' +p136 +sg36 +(dp137 +g38 +g39 +sg40 +S'GTCCTCC' +p138 +sg42 +S'24128261' +p139 +sg44 +g45 +sssg52 +(dp140 +g34 +S'NC_000018.10:g.26548298_26548303del' +p141 +sg36 +(dp142 +g38 +g56 +sg40 +S'GTCCTCC' +p143 +sg42 +S'26548297' +p144 +sg44 +g45 +sssS'hg19' +p145 +(dp146 +g34 +S'NC_000018.9:g.24128262_24128267del' +p147 +sg36 +(dp148 +g38 +g56 +sg40 +S'GTCCTCC' +p149 +sg42 +S'24128261' +p150 +sg44 +g45 +ssssg65 +(dp151 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245150.1' +p152 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258221.1' +p153 +sssS'NM_001258222.2:c.10-47053_10-47048del' +p154 +(dp155 +g3 +g4 +sg5 +(lp156 +S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' +p157 +aS'RefSeqGene record not available' +p158 +asg9 +g4 +sg10 +(lp159 +sg12 +VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 5, mRNA +p160 +sg14 +S'KCTD1' +p161 +sg16 +(dp162 +g18 +S'NP_001245151.1:p.?' +p163 +sg20 +S'NP_001245151.1:p.?' +p164 +ssg22 +g23 +sg24 +S'NC_000018.9(NM_001258222.2):c.10-47053_10-47048del' +p165 +sg26 +g4 +sg27 +S'NM_001258222.2:c.10-47053_10-47048del' +p166 +sg29 +g4 +sg30 +(dp167 +S'grch38' +p168 +(dp169 +g34 +S'NC_000018.10:g.26548298_26548303del' +p170 +sg36 +(dp171 +g38 +g39 +sg40 +S'GTCCTCC' +p172 +sg42 +S'26548297' +p173 +sg44 +g45 +sssS'grch37' +p174 +(dp175 +g34 +S'NC_000018.9:g.24128262_24128267del' +p176 +sg36 +(dp177 +g38 +g39 +sg40 +S'GTCCTCC' +p178 +sg42 +S'24128261' +p179 +sg44 +g45 +sssg52 +(dp180 +g34 +S'NC_000018.10:g.26548298_26548303del' +p181 +sg36 +(dp182 +g38 +g56 +sg40 +S'GTCCTCC' +p183 +sg42 +S'26548297' +p184 +sg44 +g45 +sssS'hg19' +p185 +(dp186 +g34 +S'NC_000018.9:g.24128262_24128267del' +p187 +sg36 +(dp188 +g38 +g56 +sg40 +S'GTCCTCC' +p189 +sg42 +S'24128261' +p190 +sg44 +g45 +ssssg65 +(dp191 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1' +p192 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.2' +p193 +sssS'flag' +p194 +S'gene_variant' +p195 +sS'NM_001136205.2:c.-16+588_-16+593del' +p196 +(dp197 +g3 +g4 +sg5 +(lp198 +S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' +p199 +aS'RefSeqGene record not available' +p200 +asg9 +g4 +sg10 +(lp201 +sg12 +VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 1, mRNA +p202 +sg14 +S'KCTD1' +p203 +sg16 +(dp204 +g18 +S'NP_001129677.1:p.?' +p205 +sg20 +S'NP_001129677.1:p.?' +p206 +ssg22 +g23 +sg24 +S'NC_000018.9(NM_001136205.2):c.-16+588_-16+593del' +p207 +sg26 +g4 +sg27 +S'NM_001136205.2:c.-16+588_-16+593del' +p208 +sg29 +g4 +sg30 +(dp209 +S'grch38' +p210 +(dp211 +g34 +S'NC_000018.10:g.26548298_26548303del' +p212 +sg36 +(dp213 +g38 +g39 +sg40 +S'GTCCTCC' +p214 +sg42 +S'26548297' +p215 +sg44 +g45 +sssS'grch37' +p216 +(dp217 +g34 +S'NC_000018.9:g.24128262_24128267del' +p218 +sg36 +(dp219 +g38 +g39 +sg40 +S'GTCCTCC' +p220 +sg42 +S'24128261' +p221 +sg44 +g45 +sssg52 +(dp222 +g34 +S'NC_000018.10:g.26548298_26548303del' +p223 +sg36 +(dp224 +g38 +g56 +sg40 +S'GTCCTCC' +p225 +sg42 +S'26548297' +p226 +sg44 +g45 +sssS'hg19' +p227 +(dp228 +g34 +S'NC_000018.9:g.24128262_24128267del' +p229 +sg36 +(dp230 +g38 +g56 +sg40 +S'GTCCTCC' +p231 +sg42 +S'24128261' +p232 +sg44 +g45 +ssssg65 +(dp233 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129677.1' +p234 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001136205.2' +p235 +sssS'NM_198991.3:c.-15-47053_-15-47048del' +p236 +(dp237 +g3 +g4 +sg5 +(lp238 +S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' +p239 +aS'RefSeqGene record not available' +p240 +asg9 +g4 +sg10 +(lp241 +sg12 +VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 2, mRNA +p242 +sg14 +S'KCTD1' +p243 +sg16 +(dp244 +g18 +S'NP_945342.1:p.?' +p245 +sg20 +S'NP_945342.1:p.?' +p246 +ssg22 +g23 +sg24 +S'NC_000018.9(NM_198991.3):c.-15-47053_-15-47048del' +p247 +sg26 +g4 +sg27 +S'NM_198991.3:c.-15-47053_-15-47048del' +p248 +sg29 +g4 +sg30 +(dp249 +S'grch38' +p250 +(dp251 +g34 +S'NC_000018.10:g.26548298_26548303del' +p252 +sg36 +(dp253 +g38 +g39 +sg40 +S'GTCCTCC' +p254 +sg42 +S'26548297' +p255 +sg44 +g45 +sssS'grch37' +p256 +(dp257 +g34 +S'NC_000018.9:g.24128262_24128267del' +p258 +sg36 +(dp259 +g38 +g39 +sg40 +S'GTCCTCC' +p260 +sg42 +S'24128261' +p261 +sg44 +g45 +sssg52 +(dp262 +g34 +S'NC_000018.10:g.26548298_26548303del' +p263 +sg36 +(dp264 +g38 +g56 +sg40 +S'GTCCTCC' +p265 +sg42 +S'26548297' +p266 +sg44 +g45 +sssS'hg19' +p267 +(dp268 +g34 +S'NC_000018.9:g.24128262_24128267del' +p269 +sg36 +(dp270 +g38 +g56 +sg40 +S'GTCCTCC' +p271 +sg42 +S'24128261' +p272 +sg44 +g45 +ssssg65 +(dp273 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_945342.1' +p274 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198991.3' +p275 +sssS'NM_001142730.2:c.234_239del' +p276 +(dp277 +g3 +g4 +sg5 +(lp278 +S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' +p279 +aS'RefSeqGene record not available' +p280 +asg9 +g4 +sg10 +(lp281 +sg12 +VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 3, mRNA +p282 +sg14 +S'KCTD1' +p283 +sg16 +(dp284 +g18 +S'NP_001136202.1:p.(Glu78_Glu79del)' +p285 +sg20 +S'NP_001136202.1:p.(E78_E79del)' +p286 +ssg22 +g23 +sg24 +g4 +sg26 +g4 +sg27 +S'NM_001142730.2:c.234_239del' +p287 +sg29 +g4 +sg30 +(dp288 +S'grch38' +p289 +(dp290 +g34 +S'NC_000018.10:g.26548298_26548303del' +p291 +sg36 +(dp292 +g38 +g39 +sg40 +S'GTCCTCC' +p293 +sg42 +S'26548297' +p294 +sg44 +g45 +sssS'grch37' +p295 +(dp296 +g34 +S'NC_000018.9:g.24128262_24128267del' +p297 +sg36 +(dp298 +g38 +g39 +sg40 +S'GTCCTCC' +p299 +sg42 +S'24128261' +p300 +sg44 +g45 +sssg52 +(dp301 +g34 +S'NC_000018.10:g.26548298_26548303del' +p302 +sg36 +(dp303 +g38 +g56 +sg40 +S'GTCCTCC' +p304 +sg42 +S'26548297' +p305 +sg44 +g45 +sssS'hg19' +p306 +(dp307 +g34 +S'NC_000018.9:g.24128262_24128267del' +p308 +sg36 +(dp309 +g38 +g56 +sg40 +S'GTCCTCC' +p310 +sg42 +S'24128261' +p311 +sg44 +g45 +ssssg65 +(dp312 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001136202.1' +p313 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001142730.2' +p314 +sssS'metadata' +p315 +(dp316 +S'variantvalidator_hgvs_version' +p317 +S'1.1.3' +p318 +sS'uta_schema' +p319 +S'uta_20180821' +p320 +sS'seqrepo_db' +p321 +S'2018-08-21' +p322 +sS'variantvalidator_version' +p323 +S'v0.2' +p324 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant263.txt b/VariantValidator/testing/testOutputsMasterITS/variant263.txt new file mode 100644 index 00000000..56a73641 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant263.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_000435.2:c.2992C>T' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens notch 3 (NOTCH3), mRNA +p24 +sS'gene_symbol' +p25 +S'NOTCH3' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_000426.2:p.(Gln998Ter)' +p30 +sS'slr' +p31 +S'NP_000426.2:p.(Q998*)' +p32 +ssS'submitted_variant' +p33 +S'19-15291774-G-A' +p34 +sS'genome_context_intronic_sequence' +p35 +g16 +sS'hgvs_lrg_variant' +p36 +g16 +sS'hgvs_transcript_variant' +p37 +S'NM_000435.2:c.2992C>T' +p38 +sS'hgvs_refseqgene_variant' +p39 +g16 +sS'primary_assembly_loci' +p40 +(dp41 +S'grch38' +p42 +(dp43 +S'hgvs_genomic_description' +p44 +S'NC_000019.10:g.15180963G>A' +p45 +sS'vcf' +p46 +(dp47 +S'chr' +p48 +S'19' +p49 +sS'ref' +p50 +VG +p51 +sS'pos' +p52 +S'15180963' +p53 +sS'alt' +p54 +VA +p55 +sssS'grch37' +p56 +(dp57 +g44 +S'NC_000019.9:g.15291774G>A' +p58 +sg46 +(dp59 +g48 +g49 +sg50 +g51 +sg52 +S'15291774' +p60 +sg54 +g55 +sssS'hg38' +p61 +(dp62 +g44 +S'NC_000019.10:g.15180963G>A' +p63 +sg46 +(dp64 +g48 +S'chr19' +p65 +sg50 +g51 +sg52 +S'15180963' +p66 +sg54 +g55 +sssS'hg19' +p67 +(dp68 +g44 +S'NC_000019.9:g.15291774G>A' +p69 +sg46 +(dp70 +g48 +g65 +sg50 +g51 +sg52 +S'15291774' +p71 +sg54 +g55 +ssssS'reference_sequence_records' +p72 +(dp73 +S'protein' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000426.2' +p75 +sS'transcript' +p76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000435.2' +p77 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant264.txt b/VariantValidator/testing/testOutputsMasterITS/variant264.txt new file mode 100644 index 00000000..d197c6d9 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant264.txt @@ -0,0 +1,156 @@ +(dp0 +S'flag' +p1 +S'intergenic' +p2 +sS'Intergenic_Variant_1' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'No transcripts found that fully overlap the described variation in the genomic sequence' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +g6 +sS'gene_symbol' +p14 +g6 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +g6 +sS'slr' +p18 +g6 +ssS'submitted_variant' +p19 +S'19-15311794-A-G' +p20 +sS'genome_context_intronic_sequence' +p21 +g6 +sS'hgvs_lrg_variant' +p22 +g6 +sS'hgvs_transcript_variant' +p23 +g6 +sS'hgvs_refseqgene_variant' +p24 +g6 +sS'primary_assembly_loci' +p25 +(dp26 +S'hg19' +p27 +(dp28 +S'hgvs_genomic_description' +p29 +VNC_000019.9:g.15311794A>G +p30 +sS'vcf' +p31 +(dp32 +S'chr' +p33 +S'chr19' +p34 +sS'ref' +p35 +S'A' +p36 +sS'pos' +p37 +S'15311794' +p38 +sS'alt' +p39 +S'G' +p40 +sssS'grch37' +p41 +(dp42 +g29 +VNC_000019.9:g.15311794A>G +p43 +sg31 +(dp44 +g33 +S'19' +p45 +sg35 +g36 +sg37 +g38 +sg39 +g40 +sssS'hg38' +p46 +(dp47 +g29 +VNC_000019.10:g.15200983A>G +p48 +sg31 +(dp49 +g33 +g34 +sg35 +g36 +sg37 +S'15200983' +p50 +sg39 +g40 +sssS'grch38' +p51 +(dp52 +g29 +VNC_000019.10:g.15200983A>G +p53 +sg31 +(dp54 +g33 +g45 +sg35 +g36 +sg37 +g50 +sg39 +g40 +ssssS'reference_sequence_records' +p55 +g6 +ssS'metadata' +p56 +(dp57 +S'variantvalidator_hgvs_version' +p58 +S'1.1.3' +p59 +sS'uta_schema' +p60 +S'uta_20180821' +p61 +sS'seqrepo_db' +p62 +S'2018-08-21' +p63 +sS'variantvalidator_version' +p64 +S'v0.2' +p65 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant265.txt b/VariantValidator/testing/testOutputsMasterITS/variant265.txt new file mode 100644 index 00000000..a7e72c16 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant265.txt @@ -0,0 +1,286 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000540.2:c.14818G>A' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens ryanodine receptor 1 (RYR1), transcript variant 1, mRNA +p14 +sS'gene_symbol' +p15 +S'RYR1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000531.2:p.(Ala4940Thr)' +p20 +sS'slr' +p21 +S'NP_000531.2:p.(A4940T)' +p22 +ssS'submitted_variant' +p23 +S'19-39076592-G-A' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000540.2:c.14818G>A' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'grch38' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000019.10:g.38585952G>A' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'19' +p39 +sS'ref' +p40 +S'G' +p41 +sS'pos' +p42 +S'38585952' +p43 +sS'alt' +p44 +S'A' +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000019.9:g.39076592G>A' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'39076592' +p50 +sg44 +g45 +sssS'hg38' +p51 +(dp52 +g34 +S'NC_000019.10:g.38585952G>A' +p53 +sg36 +(dp54 +g38 +S'chr19' +p55 +sg40 +g41 +sg42 +S'38585952' +p56 +sg44 +g45 +sssS'hg19' +p57 +(dp58 +g34 +S'NC_000019.9:g.39076592G>A' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'39076592' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000531.2' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000540.2' +p67 +sssS'NM_001042723.1:c.14803G>A' +p68 +(dp69 +g5 +g6 +sg7 +(lp70 +S'RefSeqGene record not available' +p71 +asg10 +g6 +sg11 +(lp72 +sg13 +VHomo sapiens ryanodine receptor 1 (RYR1), transcript variant 2, mRNA +p73 +sg15 +S'RYR1' +p74 +sg17 +(dp75 +g19 +S'NP_001036188.1:p.(Ala4935Thr)' +p76 +sg21 +S'NP_001036188.1:p.(A4935T)' +p77 +ssg23 +g24 +sg25 +g6 +sg26 +g6 +sg27 +S'NM_001042723.1:c.14803G>A' +p78 +sg29 +g6 +sg30 +(dp79 +S'grch38' +p80 +(dp81 +g34 +S'NC_000019.10:g.38585952G>A' +p82 +sg36 +(dp83 +g38 +g39 +sg40 +g41 +sg42 +S'38585952' +p84 +sg44 +g45 +sssS'grch37' +p85 +(dp86 +g34 +S'NC_000019.9:g.39076592G>A' +p87 +sg36 +(dp88 +g38 +g39 +sg40 +g41 +sg42 +S'39076592' +p89 +sg44 +g45 +sssg51 +(dp90 +g34 +S'NC_000019.10:g.38585952G>A' +p91 +sg36 +(dp92 +g38 +g55 +sg40 +g41 +sg42 +S'38585952' +p93 +sg44 +g45 +sssS'hg19' +p94 +(dp95 +g34 +S'NC_000019.9:g.39076592G>A' +p96 +sg36 +(dp97 +g38 +g55 +sg40 +g41 +sg42 +S'39076592' +p98 +sg44 +g45 +ssssg62 +(dp99 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036188.1' +p100 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042723.1' +p101 +sssS'metadata' +p102 +(dp103 +S'variantvalidator_hgvs_version' +p104 +S'1.1.3' +p105 +sS'uta_schema' +p106 +S'uta_20180821' +p107 +sS'seqrepo_db' +p108 +S'2018-08-21' +p109 +sS'variantvalidator_version' +p110 +S'v0.2' +p111 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant266.txt b/VariantValidator/testing/testOutputsMasterITS/variant266.txt new file mode 100644 index 00000000..fc9a1170 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant266.txt @@ -0,0 +1,3076 @@ +(dp0 +S'NM_001330086.1:c.4245A>G' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha9, mRNA +p12 +sS'gene_symbol' +p13 +S'NRXN1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001317015.1:p.(Pro1415=)' +p18 +sS'slr' +p19 +S'NP_001317015.1:p.(P1415=)' +p20 +ssS'submitted_variant' +p21 +S'2-50149352-T-C' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001330086.1:c.4245A>G' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000002.11:g.50149352T>C' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr2' +p37 +sS'ref' +p38 +VT +p39 +sS'pos' +p40 +S'50149352' +p41 +sS'alt' +p42 +VC +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000002.12:g.49922214T>C' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000002.11:g.50149352T>C' +p51 +sg34 +(dp52 +g36 +S'2' +p53 +sg38 +g39 +sg40 +S'50149352' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000002.12:g.49922214T>C' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1' +p65 +sssS'NM_001330083.1:c.4089A>G' +p66 +(dp67 +g3 +g4 +sg5 +(lp68 +S'RefSeqGene record not available' +p69 +asg8 +g4 +sg9 +(lp70 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha6, mRNA +p71 +sg13 +S'NRXN1' +p72 +sg15 +(dp73 +g17 +S'NP_001317012.1:p.(Pro1363=)' +p74 +sg19 +S'NP_001317012.1:p.(P1363=)' +p75 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330083.1:c.4089A>G' +p76 +sg27 +g4 +sg28 +(dp77 +S'hg19' +p78 +(dp79 +g32 +S'NC_000002.11:g.50149352T>C' +p80 +sg34 +(dp81 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p82 +sg42 +g43 +sssg44 +(dp83 +g32 +S'NC_000002.12:g.49922214T>C' +p84 +sg34 +(dp85 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p86 +sg42 +g43 +sssS'grch37' +p87 +(dp88 +g32 +S'NC_000002.11:g.50149352T>C' +p89 +sg34 +(dp90 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p91 +sg42 +g43 +sssS'grch38' +p92 +(dp93 +g32 +S'NC_000002.12:g.49922214T>C' +p94 +sg34 +(dp95 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p96 +sg42 +g43 +ssssg60 +(dp97 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1' +p98 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1' +p99 +sssS'NM_001330095.1:c.4113A>G' +p100 +(dp101 +g3 +g4 +sg5 +(lp102 +S'RefSeqGene record not available' +p103 +asg8 +g4 +sg9 +(lp104 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha14, mRNA +p105 +sg13 +S'NRXN1' +p106 +sg15 +(dp107 +g17 +S'NP_001317024.1:p.(Pro1371=)' +p108 +sg19 +S'NP_001317024.1:p.(P1371=)' +p109 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330095.1:c.4113A>G' +p110 +sg27 +g4 +sg28 +(dp111 +S'hg19' +p112 +(dp113 +g32 +S'NC_000002.11:g.50149352T>C' +p114 +sg34 +(dp115 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p116 +sg42 +g43 +sssg44 +(dp117 +g32 +S'NC_000002.12:g.49922214T>C' +p118 +sg34 +(dp119 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p120 +sg42 +g43 +sssS'grch37' +p121 +(dp122 +g32 +S'NC_000002.11:g.50149352T>C' +p123 +sg34 +(dp124 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p125 +sg42 +g43 +sssS'grch38' +p126 +(dp127 +g32 +S'NC_000002.12:g.49922214T>C' +p128 +sg34 +(dp129 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p130 +sg42 +g43 +ssssg60 +(dp131 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1' +p132 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1' +p133 +sssS'NM_138735.2:c.1059A>G' +p134 +(dp135 +g3 +g4 +sg5 +(lp136 +S'A more recent version of the selected reference sequence NM_138735.2 is available (NM_138735.4)' +p137 +aS'NM_138735.4:c.1059A>G MUST be fully validated prior to use in reports' +p138 +aS'select_variants=NM_138735.4:c.1059A>G' +p139 +aS'RefSeqGene record not available' +p140 +asg8 +g4 +sg9 +(lp141 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant beta, mRNA +p142 +sg13 +S'NRXN1' +p143 +sg15 +(dp144 +g17 +S'NP_620072.1:p.(Pro353=)' +p145 +sg19 +S'NP_620072.1:p.(P353=)' +p146 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_138735.2:c.1059A>G' +p147 +sg27 +g4 +sg28 +(dp148 +S'hg19' +p149 +(dp150 +g32 +S'NC_000002.11:g.50149352T>C' +p151 +sg34 +(dp152 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p153 +sg42 +g43 +sssg44 +(dp154 +g32 +S'NC_000002.12:g.49922214T>C' +p155 +sg34 +(dp156 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p157 +sg42 +g43 +sssS'grch37' +p158 +(dp159 +g32 +S'NC_000002.11:g.50149352T>C' +p160 +sg34 +(dp161 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p162 +sg42 +g43 +sssS'grch38' +p163 +(dp164 +g32 +S'NC_000002.12:g.49922214T>C' +p165 +sg34 +(dp166 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p167 +sg42 +g43 +ssssg60 +(dp168 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1' +p169 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.2' +p170 +sssS'NM_001330078.1:c.4254A>G' +p171 +(dp172 +g3 +g4 +sg5 +(lp173 +S'RefSeqGene record not available' +p174 +asg8 +g4 +sg9 +(lp175 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha4, mRNA +p176 +sg13 +S'NRXN1' +p177 +sg15 +(dp178 +g17 +S'NP_001317007.1:p.(Pro1418=)' +p179 +sg19 +S'NP_001317007.1:p.(P1418=)' +p180 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330078.1:c.4254A>G' +p181 +sg27 +g4 +sg28 +(dp182 +S'hg19' +p183 +(dp184 +g32 +S'NC_000002.11:g.50149352T>C' +p185 +sg34 +(dp186 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p187 +sg42 +g43 +sssg44 +(dp188 +g32 +S'NC_000002.12:g.49922214T>C' +p189 +sg34 +(dp190 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p191 +sg42 +g43 +sssS'grch37' +p192 +(dp193 +g32 +S'NC_000002.11:g.50149352T>C' +p194 +sg34 +(dp195 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p196 +sg42 +g43 +sssS'grch38' +p197 +(dp198 +g32 +S'NC_000002.12:g.49922214T>C' +p199 +sg34 +(dp200 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p201 +sg42 +g43 +ssssg60 +(dp202 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1' +p203 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1' +p204 +sssS'NM_001330094.1:c.4233A>G' +p205 +(dp206 +g3 +g4 +sg5 +(lp207 +S'RefSeqGene record not available' +p208 +asg8 +g4 +sg9 +(lp209 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha13, mRNA +p210 +sg13 +S'NRXN1' +p211 +sg15 +(dp212 +g17 +S'NP_001317023.1:p.(Pro1411=)' +p213 +sg19 +S'NP_001317023.1:p.(P1411=)' +p214 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330094.1:c.4233A>G' +p215 +sg27 +g4 +sg28 +(dp216 +S'hg19' +p217 +(dp218 +g32 +S'NC_000002.11:g.50149352T>C' +p219 +sg34 +(dp220 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p221 +sg42 +g43 +sssg44 +(dp222 +g32 +S'NC_000002.12:g.49922214T>C' +p223 +sg34 +(dp224 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p225 +sg42 +g43 +sssS'grch37' +p226 +(dp227 +g32 +S'NC_000002.11:g.50149352T>C' +p228 +sg34 +(dp229 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p230 +sg42 +g43 +sssS'grch38' +p231 +(dp232 +g32 +S'NC_000002.12:g.49922214T>C' +p233 +sg34 +(dp234 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p235 +sg42 +g43 +ssssg60 +(dp236 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1' +p237 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1' +p238 +sssS'NM_001320157.3:c.150A>G' +p239 +(dp240 +g3 +g4 +sg5 +(lp241 +S'RefSeqGene record not available' +p242 +asg8 +g4 +sg9 +(lp243 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma2, mRNA +p244 +sg13 +S'NRXN1' +p245 +sg15 +(dp246 +g17 +S'NP_001307086.1:p.(Pro50=)' +p247 +sg19 +S'NP_001307086.1:p.(P50=)' +p248 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001320157.3:c.150A>G' +p249 +sg27 +g4 +sg28 +(dp250 +S'hg19' +p251 +(dp252 +g32 +S'NC_000002.11:g.50149352T>C' +p253 +sg34 +(dp254 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p255 +sg42 +g43 +sssg44 +(dp256 +g32 +S'NC_000002.12:g.49922214T>C' +p257 +sg34 +(dp258 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p259 +sg42 +g43 +sssS'grch37' +p260 +(dp261 +g32 +S'NC_000002.11:g.50149352T>C' +p262 +sg34 +(dp263 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p264 +sg42 +g43 +sssS'grch38' +p265 +(dp266 +g32 +S'NC_000002.12:g.49922214T>C' +p267 +sg34 +(dp268 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p269 +sg42 +g43 +ssssg60 +(dp270 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1' +p271 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.3' +p272 +sssS'NM_001330088.1:c.4074A>G' +p273 +(dp274 +g3 +g4 +sg5 +(lp275 +S'RefSeqGene record not available' +p276 +asg8 +g4 +sg9 +(lp277 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha11, mRNA +p278 +sg13 +S'NRXN1' +p279 +sg15 +(dp280 +g17 +S'NP_001317017.1:p.(Pro1358=)' +p281 +sg19 +S'NP_001317017.1:p.(P1358=)' +p282 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330088.1:c.4074A>G' +p283 +sg27 +g4 +sg28 +(dp284 +S'hg19' +p285 +(dp286 +g32 +S'NC_000002.11:g.50149352T>C' +p287 +sg34 +(dp288 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p289 +sg42 +g43 +sssg44 +(dp290 +g32 +S'NC_000002.12:g.49922214T>C' +p291 +sg34 +(dp292 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p293 +sg42 +g43 +sssS'grch37' +p294 +(dp295 +g32 +S'NC_000002.11:g.50149352T>C' +p296 +sg34 +(dp297 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p298 +sg42 +g43 +sssS'grch38' +p299 +(dp300 +g32 +S'NC_000002.12:g.49922214T>C' +p301 +sg34 +(dp302 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p303 +sg42 +g43 +ssssg60 +(dp304 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1' +p305 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1' +p306 +sssS'NM_001330092.1:c.1149A>G' +p307 +(dp308 +g3 +g4 +sg5 +(lp309 +S'RefSeqGene record not available' +p310 +asg8 +g4 +sg9 +(lp311 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant beta3, mRNA +p312 +sg13 +S'NRXN1' +p313 +sg15 +(dp314 +g17 +S'NP_001317021.1:p.(Pro383=)' +p315 +sg19 +S'NP_001317021.1:p.(P383=)' +p316 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330092.1:c.1149A>G' +p317 +sg27 +g4 +sg28 +(dp318 +S'hg19' +p319 +(dp320 +g32 +S'NC_000002.11:g.50149352T>C' +p321 +sg34 +(dp322 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p323 +sg42 +g43 +sssg44 +(dp324 +g32 +S'NC_000002.12:g.49922214T>C' +p325 +sg34 +(dp326 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p327 +sg42 +g43 +sssS'grch37' +p328 +(dp329 +g32 +S'NC_000002.11:g.50149352T>C' +p330 +sg34 +(dp331 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p332 +sg42 +g43 +sssS'grch38' +p333 +(dp334 +g32 +S'NC_000002.12:g.49922214T>C' +p335 +sg34 +(dp336 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p337 +sg42 +g43 +ssssg60 +(dp338 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317021.1' +p339 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330092.1' +p340 +sssS'NM_138735.4:c.1059A>G' +p341 +(dp342 +g3 +g4 +sg5 +(lp343 +S'RefSeqGene record not available' +p344 +asg8 +g4 +sg9 +(lp345 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant beta1, mRNA +p346 +sg13 +S'NRXN1' +p347 +sg15 +(dp348 +g17 +S'NP_620072.1:p.(Pro353=)' +p349 +sg19 +S'NP_620072.1:p.(P353=)' +p350 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_138735.4:c.1059A>G' +p351 +sg27 +g4 +sg28 +(dp352 +S'hg19' +p353 +(dp354 +g32 +S'NC_000002.11:g.50149352T>C' +p355 +sg34 +(dp356 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p357 +sg42 +g43 +sssg44 +(dp358 +g32 +S'NC_000002.12:g.49922214T>C' +p359 +sg34 +(dp360 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p361 +sg42 +g43 +sssS'grch37' +p362 +(dp363 +g32 +S'NC_000002.11:g.50149352T>C' +p364 +sg34 +(dp365 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p366 +sg42 +g43 +sssS'grch38' +p367 +(dp368 +g32 +S'NC_000002.12:g.49922214T>C' +p369 +sg34 +(dp370 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p371 +sg42 +g43 +ssssg60 +(dp372 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1' +p373 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.4' +p374 +sssS'NM_001330096.1:c.4044A>G' +p375 +(dp376 +g3 +g4 +sg5 +(lp377 +S'RefSeqGene record not available' +p378 +asg8 +g4 +sg9 +(lp379 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha15, mRNA +p380 +sg13 +S'NRXN1' +p381 +sg15 +(dp382 +g17 +S'NP_001317025.1:p.(Pro1348=)' +p383 +sg19 +S'NP_001317025.1:p.(P1348=)' +p384 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330096.1:c.4044A>G' +p385 +sg27 +g4 +sg28 +(dp386 +S'hg19' +p387 +(dp388 +g32 +S'NC_000002.11:g.50149352T>C' +p389 +sg34 +(dp390 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p391 +sg42 +g43 +sssg44 +(dp392 +g32 +S'NC_000002.12:g.49922214T>C' +p393 +sg34 +(dp394 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p395 +sg42 +g43 +sssS'grch37' +p396 +(dp397 +g32 +S'NC_000002.11:g.50149352T>C' +p398 +sg34 +(dp399 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p400 +sg42 +g43 +sssS'grch38' +p401 +(dp402 +g32 +S'NC_000002.12:g.49922214T>C' +p403 +sg34 +(dp404 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p405 +sg42 +g43 +ssssg60 +(dp406 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1' +p407 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1' +p408 +sssS'NM_001135659.2:c.4374A>G' +p409 +(dp410 +g3 +g4 +sg5 +(lp411 +S'RefSeqGene record not available' +p412 +asg8 +g4 +sg9 +(lp413 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA +p414 +sg13 +S'NRXN1' +p415 +sg15 +(dp416 +g17 +S'NP_001129131.1:p.(Pro1458=)' +p417 +sg19 +S'NP_001129131.1:p.(P1458=)' +p418 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001135659.2:c.4374A>G' +p419 +sg27 +g4 +sg28 +(dp420 +S'hg19' +p421 +(dp422 +g32 +S'NC_000002.11:g.50149352T>C' +p423 +sg34 +(dp424 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p425 +sg42 +g43 +sssg44 +(dp426 +g32 +S'NC_000002.12:g.49922214T>C' +p427 +sg34 +(dp428 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p429 +sg42 +g43 +sssS'grch37' +p430 +(dp431 +g32 +S'NC_000002.11:g.50149352T>C' +p432 +sg34 +(dp433 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p434 +sg42 +g43 +sssS'grch38' +p435 +(dp436 +g32 +S'NC_000002.12:g.49922214T>C' +p437 +sg34 +(dp438 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p439 +sg42 +g43 +ssssg60 +(dp440 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1' +p441 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2' +p442 +sssS'NM_001330085.1:c.4227A>G' +p443 +(dp444 +g3 +g4 +sg5 +(lp445 +S'RefSeqGene record not available' +p446 +asg8 +g4 +sg9 +(lp447 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha8, mRNA +p448 +sg13 +S'NRXN1' +p449 +sg15 +(dp450 +g17 +S'NP_001317014.1:p.(Pro1409=)' +p451 +sg19 +S'NP_001317014.1:p.(P1409=)' +p452 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330085.1:c.4227A>G' +p453 +sg27 +g4 +sg28 +(dp454 +S'hg19' +p455 +(dp456 +g32 +S'NC_000002.11:g.50149352T>C' +p457 +sg34 +(dp458 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p459 +sg42 +g43 +sssg44 +(dp460 +g32 +S'NC_000002.12:g.49922214T>C' +p461 +sg34 +(dp462 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p463 +sg42 +g43 +sssS'grch37' +p464 +(dp465 +g32 +S'NC_000002.11:g.50149352T>C' +p466 +sg34 +(dp467 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p468 +sg42 +g43 +sssS'grch38' +p469 +(dp470 +g32 +S'NC_000002.12:g.49922214T>C' +p471 +sg34 +(dp472 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p473 +sg42 +g43 +ssssg60 +(dp474 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1' +p475 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1' +p476 +sssS'metadata' +p477 +(dp478 +S'variantvalidator_hgvs_version' +p479 +S'1.1.3' +p480 +sS'uta_schema' +p481 +S'uta_20180821' +p482 +sS'seqrepo_db' +p483 +S'2018-08-21' +p484 +sS'variantvalidator_version' +p485 +S'v0.2' +p486 +ssS'NM_001320156.1:c.159A>G' +p487 +(dp488 +g3 +g4 +sg5 +(lp489 +S'A more recent version of the selected reference sequence NM_001320156.1 is available (NM_001320156.3)' +p490 +aS'NM_001320156.3:c.159A>G MUST be fully validated prior to use in reports' +p491 +aS'select_variants=NM_001320156.3:c.159A>G' +p492 +aS'RefSeqGene record not available' +p493 +asg8 +g4 +sg9 +(lp494 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma1, mRNA +p495 +sg13 +S'NRXN1' +p496 +sg15 +(dp497 +g17 +S'NP_001307085.1:p.(Pro53=)' +p498 +sg19 +S'NP_001307085.1:p.(P53=)' +p499 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001320156.1:c.159A>G' +p500 +sg27 +g4 +sg28 +(dp501 +S'hg19' +p502 +(dp503 +g32 +S'NC_000002.11:g.50149352T>C' +p504 +sg34 +(dp505 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p506 +sg42 +g43 +sssg44 +(dp507 +g32 +S'NC_000002.12:g.49922214T>C' +p508 +sg34 +(dp509 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p510 +sg42 +g43 +sssS'grch37' +p511 +(dp512 +g32 +S'NC_000002.11:g.50149352T>C' +p513 +sg34 +(dp514 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p515 +sg42 +g43 +sssS'grch38' +p516 +(dp517 +g32 +S'NC_000002.12:g.49922214T>C' +p518 +sg34 +(dp519 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p520 +sg42 +g43 +ssssg60 +(dp521 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1' +p522 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.1' +p523 +sssS'NM_001330077.1:c.4230A>G' +p524 +(dp525 +g3 +g4 +sg5 +(lp526 +S'RefSeqGene record not available' +p527 +asg8 +g4 +sg9 +(lp528 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha3, mRNA +p529 +sg13 +S'NRXN1' +p530 +sg15 +(dp531 +g17 +S'NP_001317006.1:p.(Pro1410=)' +p532 +sg19 +S'NP_001317006.1:p.(P1410=)' +p533 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330077.1:c.4230A>G' +p534 +sg27 +g4 +sg28 +(dp535 +S'hg19' +p536 +(dp537 +g32 +S'NC_000002.11:g.50149352T>C' +p538 +sg34 +(dp539 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p540 +sg42 +g43 +sssg44 +(dp541 +g32 +S'NC_000002.12:g.49922214T>C' +p542 +sg34 +(dp543 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p544 +sg42 +g43 +sssS'grch37' +p545 +(dp546 +g32 +S'NC_000002.11:g.50149352T>C' +p547 +sg34 +(dp548 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p549 +sg42 +g43 +sssS'grch38' +p550 +(dp551 +g32 +S'NC_000002.12:g.49922214T>C' +p552 +sg34 +(dp553 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p554 +sg42 +g43 +ssssg60 +(dp555 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1' +p556 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1' +p557 +sssS'NM_001330093.1:c.4251A>G' +p558 +(dp559 +g3 +g4 +sg5 +(lp560 +S'RefSeqGene record not available' +p561 +asg8 +g4 +sg9 +(lp562 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha12, mRNA +p563 +sg13 +S'NRXN1' +p564 +sg15 +(dp565 +g17 +S'NP_001317022.1:p.(Pro1417=)' +p566 +sg19 +S'NP_001317022.1:p.(P1417=)' +p567 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330093.1:c.4251A>G' +p568 +sg27 +g4 +sg28 +(dp569 +S'hg19' +p570 +(dp571 +g32 +S'NC_000002.11:g.50149352T>C' +p572 +sg34 +(dp573 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p574 +sg42 +g43 +sssg44 +(dp575 +g32 +S'NC_000002.12:g.49922214T>C' +p576 +sg34 +(dp577 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p578 +sg42 +g43 +sssS'grch37' +p579 +(dp580 +g32 +S'NC_000002.11:g.50149352T>C' +p581 +sg34 +(dp582 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p583 +sg42 +g43 +sssS'grch38' +p584 +(dp585 +g32 +S'NC_000002.12:g.49922214T>C' +p586 +sg34 +(dp587 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p588 +sg42 +g43 +ssssg60 +(dp589 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1' +p590 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1' +p591 +sssS'NM_001135659.1:c.4374A>G' +p592 +(dp593 +g3 +g4 +sg5 +(lp594 +S'A more recent version of the selected reference sequence NM_001135659.1 is available (NM_001135659.2)' +p595 +aS'NM_001135659.2:c.4374A>G MUST be fully validated prior to use in reports' +p596 +aS'select_variants=NM_001135659.2:c.4374A>G' +p597 +aS'RefSeqGene record not available' +p598 +asg8 +g4 +sg9 +(lp599 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA +p600 +sg13 +S'NRXN1' +p601 +sg15 +(dp602 +g17 +S'NP_001129131.1:p.(Pro1458=)' +p603 +sg19 +S'NP_001129131.1:p.(P1458=)' +p604 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001135659.1:c.4374A>G' +p605 +sg27 +g4 +sg28 +(dp606 +S'hg19' +p607 +(dp608 +g32 +S'NC_000002.11:g.50149352T>C' +p609 +sg34 +(dp610 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p611 +sg42 +g43 +sssg44 +(dp612 +g32 +S'NC_000002.12:g.49922214T>C' +p613 +sg34 +(dp614 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p615 +sg42 +g43 +sssS'grch37' +p616 +(dp617 +g32 +S'NC_000002.11:g.50149352T>C' +p618 +sg34 +(dp619 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p620 +sg42 +g43 +sssS'grch38' +p621 +(dp622 +g32 +S'NC_000002.12:g.49922214T>C' +p623 +sg34 +(dp624 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p625 +sg42 +g43 +ssssg60 +(dp626 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1' +p627 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1' +p628 +sssS'NM_001320157.1:c.150A>G' +p629 +(dp630 +g3 +g4 +sg5 +(lp631 +S'A more recent version of the selected reference sequence NM_001320157.1 is available (NM_001320157.3)' +p632 +aS'NM_001320157.3:c.150A>G MUST be fully validated prior to use in reports' +p633 +aS'select_variants=NM_001320157.3:c.150A>G' +p634 +aS'RefSeqGene record not available' +p635 +asg8 +g4 +sg9 +(lp636 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma2, mRNA +p637 +sg13 +S'NRXN1' +p638 +sg15 +(dp639 +g17 +S'NP_001307086.1:p.(Pro50=)' +p640 +sg19 +S'NP_001307086.1:p.(P50=)' +p641 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001320157.1:c.150A>G' +p642 +sg27 +g4 +sg28 +(dp643 +S'hg19' +p644 +(dp645 +g32 +S'NC_000002.11:g.50149352T>C' +p646 +sg34 +(dp647 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p648 +sg42 +g43 +sssg44 +(dp649 +g32 +S'NC_000002.12:g.49922214T>C' +p650 +sg34 +(dp651 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p652 +sg42 +g43 +sssS'grch37' +p653 +(dp654 +g32 +S'NC_000002.11:g.50149352T>C' +p655 +sg34 +(dp656 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p657 +sg42 +g43 +sssS'grch38' +p658 +(dp659 +g32 +S'NC_000002.12:g.49922214T>C' +p660 +sg34 +(dp661 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p662 +sg42 +g43 +ssssg60 +(dp663 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1' +p664 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.1' +p665 +sssS'NM_001330084.1:c.4188A>G' +p666 +(dp667 +g3 +g4 +sg5 +(lp668 +S'RefSeqGene record not available' +p669 +asg8 +g4 +sg9 +(lp670 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha7, mRNA +p671 +sg13 +S'NRXN1' +p672 +sg15 +(dp673 +g17 +S'NP_001317013.1:p.(Pro1396=)' +p674 +sg19 +S'NP_001317013.1:p.(P1396=)' +p675 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330084.1:c.4188A>G' +p676 +sg27 +g4 +sg28 +(dp677 +S'hg19' +p678 +(dp679 +g32 +S'NC_000002.11:g.50149352T>C' +p680 +sg34 +(dp681 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p682 +sg42 +g43 +sssg44 +(dp683 +g32 +S'NC_000002.12:g.49922214T>C' +p684 +sg34 +(dp685 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p686 +sg42 +g43 +sssS'grch37' +p687 +(dp688 +g32 +S'NC_000002.11:g.50149352T>C' +p689 +sg34 +(dp690 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p691 +sg42 +g43 +sssS'grch38' +p692 +(dp693 +g32 +S'NC_000002.12:g.49922214T>C' +p694 +sg34 +(dp695 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p696 +sg42 +g43 +ssssg60 +(dp697 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1' +p698 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1' +p699 +sssS'NM_004801.4:c.4164A>G' +p700 +(dp701 +g3 +g4 +sg5 +(lp702 +S'A more recent version of the selected reference sequence NM_004801.4 is available (NM_004801.5)' +p703 +aS'NM_004801.5:c.4164A>G MUST be fully validated prior to use in reports' +p704 +aS'select_variants=NM_004801.5:c.4164A>G' +p705 +aS'RefSeqGene record not available' +p706 +asg8 +g4 +sg9 +(lp707 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA +p708 +sg13 +S'NRXN1' +p709 +sg15 +(dp710 +g17 +S'NP_004792.1:p.(Pro1388=)' +p711 +sg19 +S'NP_004792.1:p.(P1388=)' +p712 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_004801.4:c.4164A>G' +p713 +sg27 +g4 +sg28 +(dp714 +S'hg19' +p715 +(dp716 +g32 +S'NC_000002.11:g.50149352T>C' +p717 +sg34 +(dp718 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p719 +sg42 +g43 +sssg44 +(dp720 +g32 +S'NC_000002.12:g.49922214T>C' +p721 +sg34 +(dp722 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p723 +sg42 +g43 +sssS'grch37' +p724 +(dp725 +g32 +S'NC_000002.11:g.50149352T>C' +p726 +sg34 +(dp727 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p728 +sg42 +g43 +sssS'grch38' +p729 +(dp730 +g32 +S'NC_000002.12:g.49922214T>C' +p731 +sg34 +(dp732 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p733 +sg42 +g43 +ssssg60 +(dp734 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1' +p735 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4' +p736 +sssS'NM_001330082.1:c.4221A>G' +p737 +(dp738 +g3 +g4 +sg5 +(lp739 +S'RefSeqGene record not available' +p740 +asg8 +g4 +sg9 +(lp741 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha5, mRNA +p742 +sg13 +S'NRXN1' +p743 +sg15 +(dp744 +g17 +S'NP_001317011.1:p.(Pro1407=)' +p745 +sg19 +S'NP_001317011.1:p.(P1407=)' +p746 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330082.1:c.4221A>G' +p747 +sg27 +g4 +sg28 +(dp748 +S'hg19' +p749 +(dp750 +g32 +S'NC_000002.11:g.50149352T>C' +p751 +sg34 +(dp752 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p753 +sg42 +g43 +sssg44 +(dp754 +g32 +S'NC_000002.12:g.49922214T>C' +p755 +sg34 +(dp756 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p757 +sg42 +g43 +sssS'grch37' +p758 +(dp759 +g32 +S'NC_000002.11:g.50149352T>C' +p760 +sg34 +(dp761 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p762 +sg42 +g43 +sssS'grch38' +p763 +(dp764 +g32 +S'NC_000002.12:g.49922214T>C' +p765 +sg34 +(dp766 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p767 +sg42 +g43 +ssssg60 +(dp768 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1' +p769 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1' +p770 +sssS'flag' +p771 +S'gene_variant' +p772 +sS'NM_001330091.1:c.1140A>G' +p773 +(dp774 +g3 +g4 +sg5 +(lp775 +S'RefSeqGene record not available' +p776 +asg8 +g4 +sg9 +(lp777 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant beta2, mRNA +p778 +sg13 +S'NRXN1' +p779 +sg15 +(dp780 +g17 +S'NP_001317020.1:p.(Pro380=)' +p781 +sg19 +S'NP_001317020.1:p.(P380=)' +p782 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330091.1:c.1140A>G' +p783 +sg27 +g4 +sg28 +(dp784 +S'hg19' +p785 +(dp786 +g32 +S'NC_000002.11:g.50149352T>C' +p787 +sg34 +(dp788 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p789 +sg42 +g43 +sssg44 +(dp790 +g32 +S'NC_000002.12:g.49922214T>C' +p791 +sg34 +(dp792 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p793 +sg42 +g43 +sssS'grch37' +p794 +(dp795 +g32 +S'NC_000002.11:g.50149352T>C' +p796 +sg34 +(dp797 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p798 +sg42 +g43 +sssS'grch38' +p799 +(dp800 +g32 +S'NC_000002.12:g.49922214T>C' +p801 +sg34 +(dp802 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p803 +sg42 +g43 +ssssg60 +(dp804 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317020.1' +p805 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330091.1' +p806 +sssS'NM_001320156.3:c.159A>G' +p807 +(dp808 +g3 +g4 +sg5 +(lp809 +S'RefSeqGene record not available' +p810 +asg8 +g4 +sg9 +(lp811 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma1, mRNA +p812 +sg13 +S'NRXN1' +p813 +sg15 +(dp814 +g17 +S'NP_001307085.1:p.(Pro53=)' +p815 +sg19 +S'NP_001307085.1:p.(P53=)' +p816 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001320156.3:c.159A>G' +p817 +sg27 +g4 +sg28 +(dp818 +S'hg19' +p819 +(dp820 +g32 +S'NC_000002.11:g.50149352T>C' +p821 +sg34 +(dp822 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p823 +sg42 +g43 +sssg44 +(dp824 +g32 +S'NC_000002.12:g.49922214T>C' +p825 +sg34 +(dp826 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p827 +sg42 +g43 +sssS'grch37' +p828 +(dp829 +g32 +S'NC_000002.11:g.50149352T>C' +p830 +sg34 +(dp831 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p832 +sg42 +g43 +sssS'grch38' +p833 +(dp834 +g32 +S'NC_000002.12:g.49922214T>C' +p835 +sg34 +(dp836 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p837 +sg42 +g43 +ssssg60 +(dp838 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1' +p839 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.3' +p840 +sssS'NM_001330087.1:c.4053A>G' +p841 +(dp842 +g3 +g4 +sg5 +(lp843 +S'RefSeqGene record not available' +p844 +asg8 +g4 +sg9 +(lp845 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha10, mRNA +p846 +sg13 +S'NRXN1' +p847 +sg15 +(dp848 +g17 +S'NP_001317016.1:p.(Pro1351=)' +p849 +sg19 +S'NP_001317016.1:p.(P1351=)' +p850 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330087.1:c.4053A>G' +p851 +sg27 +g4 +sg28 +(dp852 +S'hg19' +p853 +(dp854 +g32 +S'NC_000002.11:g.50149352T>C' +p855 +sg34 +(dp856 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p857 +sg42 +g43 +sssg44 +(dp858 +g32 +S'NC_000002.12:g.49922214T>C' +p859 +sg34 +(dp860 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p861 +sg42 +g43 +sssS'grch37' +p862 +(dp863 +g32 +S'NC_000002.11:g.50149352T>C' +p864 +sg34 +(dp865 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p866 +sg42 +g43 +sssS'grch38' +p867 +(dp868 +g32 +S'NC_000002.12:g.49922214T>C' +p869 +sg34 +(dp870 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p871 +sg42 +g43 +ssssg60 +(dp872 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1' +p873 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1' +p874 +sssS'NM_001330097.1:c.1050A>G' +p875 +(dp876 +g3 +g4 +sg5 +(lp877 +S'RefSeqGene record not available' +p878 +asg8 +g4 +sg9 +(lp879 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant beta4, mRNA +p880 +sg13 +S'NRXN1' +p881 +sg15 +(dp882 +g17 +S'NP_001317026.1:p.(Pro350=)' +p883 +sg19 +S'NP_001317026.1:p.(P350=)' +p884 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330097.1:c.1050A>G' +p885 +sg27 +g4 +sg28 +(dp886 +S'hg19' +p887 +(dp888 +g32 +S'NC_000002.11:g.50149352T>C' +p889 +sg34 +(dp890 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p891 +sg42 +g43 +sssg44 +(dp892 +g32 +S'NC_000002.12:g.49922214T>C' +p893 +sg34 +(dp894 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p895 +sg42 +g43 +sssS'grch37' +p896 +(dp897 +g32 +S'NC_000002.11:g.50149352T>C' +p898 +sg34 +(dp899 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p900 +sg42 +g43 +sssS'grch38' +p901 +(dp902 +g32 +S'NC_000002.12:g.49922214T>C' +p903 +sg34 +(dp904 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p905 +sg42 +g43 +ssssg60 +(dp906 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317026.1' +p907 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330097.1' +p908 +sssS'NM_004801.5:c.4164A>G' +p909 +(dp910 +g3 +g4 +sg5 +(lp911 +S'RefSeqGene record not available' +p912 +asg8 +g4 +sg9 +(lp913 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA +p914 +sg13 +S'NRXN1' +p915 +sg15 +(dp916 +g17 +S'NP_004792.1:p.(Pro1388=)' +p917 +sg19 +S'NP_004792.1:p.(P1388=)' +p918 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_004801.5:c.4164A>G' +p919 +sg27 +g4 +sg28 +(dp920 +S'hg19' +p921 +(dp922 +g32 +S'NC_000002.11:g.50149352T>C' +p923 +sg34 +(dp924 +g36 +g37 +sg38 +g39 +sg40 +S'50149352' +p925 +sg42 +g43 +sssg44 +(dp926 +g32 +S'NC_000002.12:g.49922214T>C' +p927 +sg34 +(dp928 +g36 +g37 +sg38 +g39 +sg40 +S'49922214' +p929 +sg42 +g43 +sssS'grch37' +p930 +(dp931 +g32 +S'NC_000002.11:g.50149352T>C' +p932 +sg34 +(dp933 +g36 +g53 +sg38 +g39 +sg40 +S'50149352' +p934 +sg42 +g43 +sssS'grch38' +p935 +(dp936 +g32 +S'NC_000002.12:g.49922214T>C' +p937 +sg34 +(dp938 +g36 +g53 +sg38 +g39 +sg40 +S'49922214' +p939 +sg42 +g43 +ssssg60 +(dp940 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1' +p941 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5' +p942 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant267.txt b/VariantValidator/testing/testOutputsMasterITS/variant267.txt new file mode 100644 index 00000000..742727c3 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant267.txt @@ -0,0 +1,2023 @@ +(dp0 +S'NM_001330096.1:c.1201C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha15, mRNA +p12 +sS'gene_symbol' +p13 +S'NRXN1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001317025.1:p.(Pro401Ser)' +p18 +sS'slr' +p19 +S'NP_001317025.1:p.(P401S)' +p20 +ssS'submitted_variant' +p21 +S'2-50847195-G-A' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001330096.1:c.1201C>T' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000002.11:g.50847195G>A' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr2' +p37 +sS'ref' +p38 +VG +p39 +sS'pos' +p40 +S'50847195' +p41 +sS'alt' +p42 +VA +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000002.12:g.50620057G>A' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000002.11:g.50847195G>A' +p51 +sg34 +(dp52 +g36 +S'2' +p53 +sg38 +g39 +sg40 +S'50847195' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000002.12:g.50620057G>A' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1' +p65 +sssS'NM_001330084.1:c.1246C>T' +p66 +(dp67 +g3 +g4 +sg5 +(lp68 +S'RefSeqGene record not available' +p69 +asg8 +g4 +sg9 +(lp70 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha7, mRNA +p71 +sg13 +S'NRXN1' +p72 +sg15 +(dp73 +g17 +S'NP_001317013.1:p.(Pro416Ser)' +p74 +sg19 +S'NP_001317013.1:p.(P416S)' +p75 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330084.1:c.1246C>T' +p76 +sg27 +g4 +sg28 +(dp77 +S'hg19' +p78 +(dp79 +g32 +S'NC_000002.11:g.50847195G>A' +p80 +sg34 +(dp81 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p82 +sg42 +g43 +sssg44 +(dp83 +g32 +S'NC_000002.12:g.50620057G>A' +p84 +sg34 +(dp85 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p86 +sg42 +g43 +sssS'grch37' +p87 +(dp88 +g32 +S'NC_000002.11:g.50847195G>A' +p89 +sg34 +(dp90 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p91 +sg42 +g43 +sssS'grch38' +p92 +(dp93 +g32 +S'NC_000002.12:g.50620057G>A' +p94 +sg34 +(dp95 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p96 +sg42 +g43 +ssssg60 +(dp97 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1' +p98 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1' +p99 +sssS'NM_001330077.1:c.1261C>T' +p100 +(dp101 +g3 +g4 +sg5 +(lp102 +S'RefSeqGene record not available' +p103 +asg8 +g4 +sg9 +(lp104 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha3, mRNA +p105 +sg13 +S'NRXN1' +p106 +sg15 +(dp107 +g17 +S'NP_001317006.1:p.(Pro421Ser)' +p108 +sg19 +S'NP_001317006.1:p.(P421S)' +p109 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330077.1:c.1261C>T' +p110 +sg27 +g4 +sg28 +(dp111 +S'hg19' +p112 +(dp113 +g32 +S'NC_000002.11:g.50847195G>A' +p114 +sg34 +(dp115 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p116 +sg42 +g43 +sssg44 +(dp117 +g32 +S'NC_000002.12:g.50620057G>A' +p118 +sg34 +(dp119 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p120 +sg42 +g43 +sssS'grch37' +p121 +(dp122 +g32 +S'NC_000002.11:g.50847195G>A' +p123 +sg34 +(dp124 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p125 +sg42 +g43 +sssS'grch38' +p126 +(dp127 +g32 +S'NC_000002.12:g.50620057G>A' +p128 +sg34 +(dp129 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p130 +sg42 +g43 +ssssg60 +(dp131 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1' +p132 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1' +p133 +sssS'NM_001330086.1:c.1285C>T' +p134 +(dp135 +g3 +g4 +sg5 +(lp136 +S'RefSeqGene record not available' +p137 +asg8 +g4 +sg9 +(lp138 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha9, mRNA +p139 +sg13 +S'NRXN1' +p140 +sg15 +(dp141 +g17 +S'NP_001317015.1:p.(Pro429Ser)' +p142 +sg19 +S'NP_001317015.1:p.(P429S)' +p143 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330086.1:c.1285C>T' +p144 +sg27 +g4 +sg28 +(dp145 +S'hg19' +p146 +(dp147 +g32 +S'NC_000002.11:g.50847195G>A' +p148 +sg34 +(dp149 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p150 +sg42 +g43 +sssg44 +(dp151 +g32 +S'NC_000002.12:g.50620057G>A' +p152 +sg34 +(dp153 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p154 +sg42 +g43 +sssS'grch37' +p155 +(dp156 +g32 +S'NC_000002.11:g.50847195G>A' +p157 +sg34 +(dp158 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p159 +sg42 +g43 +sssS'grch38' +p160 +(dp161 +g32 +S'NC_000002.12:g.50620057G>A' +p162 +sg34 +(dp163 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p164 +sg42 +g43 +ssssg60 +(dp165 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1' +p166 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1' +p167 +sssS'NM_001330088.1:c.1231C>T' +p168 +(dp169 +g3 +g4 +sg5 +(lp170 +S'RefSeqGene record not available' +p171 +asg8 +g4 +sg9 +(lp172 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha11, mRNA +p173 +sg13 +S'NRXN1' +p174 +sg15 +(dp175 +g17 +S'NP_001317017.1:p.(Pro411Ser)' +p176 +sg19 +S'NP_001317017.1:p.(P411S)' +p177 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330088.1:c.1231C>T' +p178 +sg27 +g4 +sg28 +(dp179 +S'hg19' +p180 +(dp181 +g32 +S'NC_000002.11:g.50847195G>A' +p182 +sg34 +(dp183 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p184 +sg42 +g43 +sssg44 +(dp185 +g32 +S'NC_000002.12:g.50620057G>A' +p186 +sg34 +(dp187 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p188 +sg42 +g43 +sssS'grch37' +p189 +(dp190 +g32 +S'NC_000002.11:g.50847195G>A' +p191 +sg34 +(dp192 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p193 +sg42 +g43 +sssS'grch38' +p194 +(dp195 +g32 +S'NC_000002.12:g.50620057G>A' +p196 +sg34 +(dp197 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p198 +sg42 +g43 +ssssg60 +(dp199 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1' +p200 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1' +p201 +sssS'NM_001330093.1:c.1282C>T' +p202 +(dp203 +g3 +g4 +sg5 +(lp204 +S'RefSeqGene record not available' +p205 +asg8 +g4 +sg9 +(lp206 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha12, mRNA +p207 +sg13 +S'NRXN1' +p208 +sg15 +(dp209 +g17 +S'NP_001317022.1:p.(Pro428Ser)' +p210 +sg19 +S'NP_001317022.1:p.(P428S)' +p211 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330093.1:c.1282C>T' +p212 +sg27 +g4 +sg28 +(dp213 +S'hg19' +p214 +(dp215 +g32 +S'NC_000002.11:g.50847195G>A' +p216 +sg34 +(dp217 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p218 +sg42 +g43 +sssg44 +(dp219 +g32 +S'NC_000002.12:g.50620057G>A' +p220 +sg34 +(dp221 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p222 +sg42 +g43 +sssS'grch37' +p223 +(dp224 +g32 +S'NC_000002.11:g.50847195G>A' +p225 +sg34 +(dp226 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p227 +sg42 +g43 +sssS'grch38' +p228 +(dp229 +g32 +S'NC_000002.12:g.50620057G>A' +p230 +sg34 +(dp231 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p232 +sg42 +g43 +ssssg60 +(dp233 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1' +p234 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1' +p235 +sssS'NM_001330087.1:c.1201C>T' +p236 +(dp237 +g3 +g4 +sg5 +(lp238 +S'RefSeqGene record not available' +p239 +asg8 +g4 +sg9 +(lp240 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha10, mRNA +p241 +sg13 +S'NRXN1' +p242 +sg15 +(dp243 +g17 +S'NP_001317016.1:p.(Pro401Ser)' +p244 +sg19 +S'NP_001317016.1:p.(P401S)' +p245 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330087.1:c.1201C>T' +p246 +sg27 +g4 +sg28 +(dp247 +S'hg19' +p248 +(dp249 +g32 +S'NC_000002.11:g.50847195G>A' +p250 +sg34 +(dp251 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p252 +sg42 +g43 +sssg44 +(dp253 +g32 +S'NC_000002.12:g.50620057G>A' +p254 +sg34 +(dp255 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p256 +sg42 +g43 +sssS'grch37' +p257 +(dp258 +g32 +S'NC_000002.11:g.50847195G>A' +p259 +sg34 +(dp260 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p261 +sg42 +g43 +sssS'grch38' +p262 +(dp263 +g32 +S'NC_000002.12:g.50620057G>A' +p264 +sg34 +(dp265 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p266 +sg42 +g43 +ssssg60 +(dp267 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1' +p268 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1' +p269 +sssS'NM_001330082.1:c.1261C>T' +p270 +(dp271 +g3 +g4 +sg5 +(lp272 +S'RefSeqGene record not available' +p273 +asg8 +g4 +sg9 +(lp274 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha5, mRNA +p275 +sg13 +S'NRXN1' +p276 +sg15 +(dp277 +g17 +S'NP_001317011.1:p.(Pro421Ser)' +p278 +sg19 +S'NP_001317011.1:p.(P421S)' +p279 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330082.1:c.1261C>T' +p280 +sg27 +g4 +sg28 +(dp281 +S'hg19' +p282 +(dp283 +g32 +S'NC_000002.11:g.50847195G>A' +p284 +sg34 +(dp285 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p286 +sg42 +g43 +sssg44 +(dp287 +g32 +S'NC_000002.12:g.50620057G>A' +p288 +sg34 +(dp289 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p290 +sg42 +g43 +sssS'grch37' +p291 +(dp292 +g32 +S'NC_000002.11:g.50847195G>A' +p293 +sg34 +(dp294 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p295 +sg42 +g43 +sssS'grch38' +p296 +(dp297 +g32 +S'NC_000002.12:g.50620057G>A' +p298 +sg34 +(dp299 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p300 +sg42 +g43 +ssssg60 +(dp301 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1' +p302 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1' +p303 +sssS'NM_001330078.1:c.1285C>T' +p304 +(dp305 +g3 +g4 +sg5 +(lp306 +S'RefSeqGene record not available' +p307 +asg8 +g4 +sg9 +(lp308 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha4, mRNA +p309 +sg13 +S'NRXN1' +p310 +sg15 +(dp311 +g17 +S'NP_001317007.1:p.(Pro429Ser)' +p312 +sg19 +S'NP_001317007.1:p.(P429S)' +p313 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330078.1:c.1285C>T' +p314 +sg27 +g4 +sg28 +(dp315 +S'hg19' +p316 +(dp317 +g32 +S'NC_000002.11:g.50847195G>A' +p318 +sg34 +(dp319 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p320 +sg42 +g43 +sssg44 +(dp321 +g32 +S'NC_000002.12:g.50620057G>A' +p322 +sg34 +(dp323 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p324 +sg42 +g43 +sssS'grch37' +p325 +(dp326 +g32 +S'NC_000002.11:g.50847195G>A' +p327 +sg34 +(dp328 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p329 +sg42 +g43 +sssS'grch38' +p330 +(dp331 +g32 +S'NC_000002.12:g.50620057G>A' +p332 +sg34 +(dp333 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p334 +sg42 +g43 +ssssg60 +(dp335 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1' +p336 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1' +p337 +sssS'NM_001330094.1:c.1273C>T' +p338 +(dp339 +g3 +g4 +sg5 +(lp340 +S'RefSeqGene record not available' +p341 +asg8 +g4 +sg9 +(lp342 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha13, mRNA +p343 +sg13 +S'NRXN1' +p344 +sg15 +(dp345 +g17 +S'NP_001317023.1:p.(Pro425Ser)' +p346 +sg19 +S'NP_001317023.1:p.(P425S)' +p347 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330094.1:c.1273C>T' +p348 +sg27 +g4 +sg28 +(dp349 +S'hg19' +p350 +(dp351 +g32 +S'NC_000002.11:g.50847195G>A' +p352 +sg34 +(dp353 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p354 +sg42 +g43 +sssg44 +(dp355 +g32 +S'NC_000002.12:g.50620057G>A' +p356 +sg34 +(dp357 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p358 +sg42 +g43 +sssS'grch37' +p359 +(dp360 +g32 +S'NC_000002.11:g.50847195G>A' +p361 +sg34 +(dp362 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p363 +sg42 +g43 +sssS'grch38' +p364 +(dp365 +g32 +S'NC_000002.12:g.50620057G>A' +p366 +sg34 +(dp367 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p368 +sg42 +g43 +ssssg60 +(dp369 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1' +p370 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1' +p371 +sssS'flag' +p372 +S'gene_variant' +p373 +sS'NM_001135659.2:c.1405C>T' +p374 +(dp375 +g3 +g4 +sg5 +(lp376 +S'RefSeqGene record not available' +p377 +asg8 +g4 +sg9 +(lp378 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA +p379 +sg13 +S'NRXN1' +p380 +sg15 +(dp381 +g17 +S'NP_001129131.1:p.(Pro469Ser)' +p382 +sg19 +S'NP_001129131.1:p.(P469S)' +p383 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001135659.2:c.1405C>T' +p384 +sg27 +g4 +sg28 +(dp385 +S'hg19' +p386 +(dp387 +g32 +S'NC_000002.11:g.50847195G>A' +p388 +sg34 +(dp389 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p390 +sg42 +g43 +sssg44 +(dp391 +g32 +S'NC_000002.12:g.50620057G>A' +p392 +sg34 +(dp393 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p394 +sg42 +g43 +sssS'grch37' +p395 +(dp396 +g32 +S'NC_000002.11:g.50847195G>A' +p397 +sg34 +(dp398 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p399 +sg42 +g43 +sssS'grch38' +p400 +(dp401 +g32 +S'NC_000002.12:g.50620057G>A' +p402 +sg34 +(dp403 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p404 +sg42 +g43 +ssssg60 +(dp405 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1' +p406 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2' +p407 +sssS'NM_001330083.1:c.1246C>T' +p408 +(dp409 +g3 +g4 +sg5 +(lp410 +S'RefSeqGene record not available' +p411 +asg8 +g4 +sg9 +(lp412 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha6, mRNA +p413 +sg13 +S'NRXN1' +p414 +sg15 +(dp415 +g17 +S'NP_001317012.1:p.(Pro416Ser)' +p416 +sg19 +S'NP_001317012.1:p.(P416S)' +p417 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330083.1:c.1246C>T' +p418 +sg27 +g4 +sg28 +(dp419 +S'hg19' +p420 +(dp421 +g32 +S'NC_000002.11:g.50847195G>A' +p422 +sg34 +(dp423 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p424 +sg42 +g43 +sssg44 +(dp425 +g32 +S'NC_000002.12:g.50620057G>A' +p426 +sg34 +(dp427 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p428 +sg42 +g43 +sssS'grch37' +p429 +(dp430 +g32 +S'NC_000002.11:g.50847195G>A' +p431 +sg34 +(dp432 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p433 +sg42 +g43 +sssS'grch38' +p434 +(dp435 +g32 +S'NC_000002.12:g.50620057G>A' +p436 +sg34 +(dp437 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p438 +sg42 +g43 +ssssg60 +(dp439 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1' +p440 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1' +p441 +sssS'NM_004801.5:c.1285C>T' +p442 +(dp443 +g3 +g4 +sg5 +(lp444 +S'RefSeqGene record not available' +p445 +asg8 +g4 +sg9 +(lp446 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA +p447 +sg13 +S'NRXN1' +p448 +sg15 +(dp449 +g17 +S'NP_004792.1:p.(Pro429Ser)' +p450 +sg19 +S'NP_004792.1:p.(P429S)' +p451 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_004801.5:c.1285C>T' +p452 +sg27 +g4 +sg28 +(dp453 +S'hg19' +p454 +(dp455 +g32 +S'NC_000002.11:g.50847195G>A' +p456 +sg34 +(dp457 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p458 +sg42 +g43 +sssg44 +(dp459 +g32 +S'NC_000002.12:g.50620057G>A' +p460 +sg34 +(dp461 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p462 +sg42 +g43 +sssS'grch37' +p463 +(dp464 +g32 +S'NC_000002.11:g.50847195G>A' +p465 +sg34 +(dp466 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p467 +sg42 +g43 +sssS'grch38' +p468 +(dp469 +g32 +S'NC_000002.12:g.50620057G>A' +p470 +sg34 +(dp471 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p472 +sg42 +g43 +ssssg60 +(dp473 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1' +p474 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5' +p475 +sssS'NM_001330085.1:c.1285C>T' +p476 +(dp477 +g3 +g4 +sg5 +(lp478 +S'RefSeqGene record not available' +p479 +asg8 +g4 +sg9 +(lp480 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha8, mRNA +p481 +sg13 +S'NRXN1' +p482 +sg15 +(dp483 +g17 +S'NP_001317014.1:p.(Pro429Ser)' +p484 +sg19 +S'NP_001317014.1:p.(P429S)' +p485 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330085.1:c.1285C>T' +p486 +sg27 +g4 +sg28 +(dp487 +S'hg19' +p488 +(dp489 +g32 +S'NC_000002.11:g.50847195G>A' +p490 +sg34 +(dp491 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p492 +sg42 +g43 +sssg44 +(dp493 +g32 +S'NC_000002.12:g.50620057G>A' +p494 +sg34 +(dp495 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p496 +sg42 +g43 +sssS'grch37' +p497 +(dp498 +g32 +S'NC_000002.11:g.50847195G>A' +p499 +sg34 +(dp500 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p501 +sg42 +g43 +sssS'grch38' +p502 +(dp503 +g32 +S'NC_000002.12:g.50620057G>A' +p504 +sg34 +(dp505 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p506 +sg42 +g43 +ssssg60 +(dp507 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1' +p508 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1' +p509 +sssS'NM_001330095.1:c.1261C>T' +p510 +(dp511 +g3 +g4 +sg5 +(lp512 +S'RefSeqGene record not available' +p513 +asg8 +g4 +sg9 +(lp514 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha14, mRNA +p515 +sg13 +S'NRXN1' +p516 +sg15 +(dp517 +g17 +S'NP_001317024.1:p.(Pro421Ser)' +p518 +sg19 +S'NP_001317024.1:p.(P421S)' +p519 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001330095.1:c.1261C>T' +p520 +sg27 +g4 +sg28 +(dp521 +S'hg19' +p522 +(dp523 +g32 +S'NC_000002.11:g.50847195G>A' +p524 +sg34 +(dp525 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p526 +sg42 +g43 +sssg44 +(dp527 +g32 +S'NC_000002.12:g.50620057G>A' +p528 +sg34 +(dp529 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p530 +sg42 +g43 +sssS'grch37' +p531 +(dp532 +g32 +S'NC_000002.11:g.50847195G>A' +p533 +sg34 +(dp534 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p535 +sg42 +g43 +sssS'grch38' +p536 +(dp537 +g32 +S'NC_000002.12:g.50620057G>A' +p538 +sg34 +(dp539 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p540 +sg42 +g43 +ssssg60 +(dp541 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1' +p542 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1' +p543 +sssS'NM_004801.4:c.1285C>T' +p544 +(dp545 +g3 +g4 +sg5 +(lp546 +S'A more recent version of the selected reference sequence NM_004801.4 is available (NM_004801.5)' +p547 +aS'NM_004801.5:c.1285C>T MUST be fully validated prior to use in reports' +p548 +aS'select_variants=NM_004801.5:c.1285C>T' +p549 +aS'RefSeqGene record not available' +p550 +asg8 +g4 +sg9 +(lp551 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA +p552 +sg13 +S'NRXN1' +p553 +sg15 +(dp554 +g17 +S'NP_004792.1:p.(Pro429Ser)' +p555 +sg19 +S'NP_004792.1:p.(P429S)' +p556 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_004801.4:c.1285C>T' +p557 +sg27 +g4 +sg28 +(dp558 +S'hg19' +p559 +(dp560 +g32 +S'NC_000002.11:g.50847195G>A' +p561 +sg34 +(dp562 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p563 +sg42 +g43 +sssg44 +(dp564 +g32 +S'NC_000002.12:g.50620057G>A' +p565 +sg34 +(dp566 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p567 +sg42 +g43 +sssS'grch37' +p568 +(dp569 +g32 +S'NC_000002.11:g.50847195G>A' +p570 +sg34 +(dp571 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p572 +sg42 +g43 +sssS'grch38' +p573 +(dp574 +g32 +S'NC_000002.12:g.50620057G>A' +p575 +sg34 +(dp576 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p577 +sg42 +g43 +ssssg60 +(dp578 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1' +p579 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4' +p580 +sssS'NM_001135659.1:c.1405C>T' +p581 +(dp582 +g3 +g4 +sg5 +(lp583 +S'A more recent version of the selected reference sequence NM_001135659.1 is available (NM_001135659.2)' +p584 +aS'NM_001135659.2:c.1405C>T MUST be fully validated prior to use in reports' +p585 +aS'select_variants=NM_001135659.2:c.1405C>T' +p586 +aS'RefSeqGene record not available' +p587 +asg8 +g4 +sg9 +(lp588 +sg11 +VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA +p589 +sg13 +S'NRXN1' +p590 +sg15 +(dp591 +g17 +S'NP_001129131.1:p.(Pro469Ser)' +p592 +sg19 +S'NP_001129131.1:p.(P469S)' +p593 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001135659.1:c.1405C>T' +p594 +sg27 +g4 +sg28 +(dp595 +S'hg19' +p596 +(dp597 +g32 +S'NC_000002.11:g.50847195G>A' +p598 +sg34 +(dp599 +g36 +g37 +sg38 +g39 +sg40 +S'50847195' +p600 +sg42 +g43 +sssg44 +(dp601 +g32 +S'NC_000002.12:g.50620057G>A' +p602 +sg34 +(dp603 +g36 +g37 +sg38 +g39 +sg40 +S'50620057' +p604 +sg42 +g43 +sssS'grch37' +p605 +(dp606 +g32 +S'NC_000002.11:g.50847195G>A' +p607 +sg34 +(dp608 +g36 +g53 +sg38 +g39 +sg40 +S'50847195' +p609 +sg42 +g43 +sssS'grch38' +p610 +(dp611 +g32 +S'NC_000002.12:g.50620057G>A' +p612 +sg34 +(dp613 +g36 +g53 +sg38 +g39 +sg40 +S'50620057' +p614 +sg42 +g43 +ssssg60 +(dp615 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1' +p616 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1' +p617 +sssS'metadata' +p618 +(dp619 +S'variantvalidator_hgvs_version' +p620 +S'1.1.3' +p621 +sS'uta_schema' +p622 +S'uta_20180821' +p623 +sS'seqrepo_db' +p624 +S'2018-08-21' +p625 +sS'variantvalidator_version' +p626 +S'v0.2' +p627 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant268.txt b/VariantValidator/testing/testOutputsMasterITS/variant268.txt new file mode 100644 index 00000000..31ebad6d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant268.txt @@ -0,0 +1,1666 @@ +(dp0 +S'NM_001130986.1:c.3585C>G' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens dysferlin (DYSF), transcript variant 3, mRNA +p12 +sS'gene_symbol' +p13 +S'DYSF' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001124458.1:p.(Ile1195Met)' +p18 +sS'slr' +p19 +S'NP_001124458.1:p.(I1195M)' +p20 +ssS'submitted_variant' +p21 +S'2-71825797-C-G' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001130986.1:c.3585C>G' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000002.11:g.71825797C>G' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr2' +p37 +sS'ref' +p38 +S'C' +p39 +sS'pos' +p40 +S'71825797' +p41 +sS'alt' +p42 +S'G' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000002.12:g.71598667C>G' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000002.11:g.71825797C>G' +p51 +sg34 +(dp52 +g36 +S'2' +p53 +sg38 +g39 +sg40 +S'71825797' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000002.12:g.71598667C>G' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124458.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130986.1' +p65 +sssS'NM_001130976.1:c.3582C>G' +p66 +(dp67 +g3 +g4 +sg5 +(lp68 +S'RefSeqGene record not available' +p69 +asg8 +g4 +sg9 +(lp70 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 9, mRNA +p71 +sg13 +S'DYSF' +p72 +sg15 +(dp73 +g17 +S'NP_001124448.1:p.(Ile1194Met)' +p74 +sg19 +S'NP_001124448.1:p.(I1194M)' +p75 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130976.1:c.3582C>G' +p76 +sg27 +g4 +sg28 +(dp77 +S'hg19' +p78 +(dp79 +g32 +S'NC_000002.11:g.71825797C>G' +p80 +sg34 +(dp81 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p82 +sg42 +g43 +sssg44 +(dp83 +g32 +S'NC_000002.12:g.71598667C>G' +p84 +sg34 +(dp85 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p86 +sg42 +g43 +sssS'grch37' +p87 +(dp88 +g32 +S'NC_000002.11:g.71825797C>G' +p89 +sg34 +(dp90 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p91 +sg42 +g43 +sssS'grch38' +p92 +(dp93 +g32 +S'NC_000002.12:g.71598667C>G' +p94 +sg34 +(dp95 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p96 +sg42 +g43 +ssssg60 +(dp97 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124448.1' +p98 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130976.1' +p99 +sssS'NM_001130981.1:c.3675C>G' +p100 +(dp101 +g3 +g4 +sg5 +(lp102 +S'RefSeqGene record not available' +p103 +asg8 +g4 +sg9 +(lp104 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 14, mRNA +p105 +sg13 +S'DYSF' +p106 +sg15 +(dp107 +g17 +S'NP_001124453.1:p.(Ile1225Met)' +p108 +sg19 +S'NP_001124453.1:p.(I1225M)' +p109 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130981.1:c.3675C>G' +p110 +sg27 +g4 +sg28 +(dp111 +S'hg19' +p112 +(dp113 +g32 +S'NC_000002.11:g.71825797C>G' +p114 +sg34 +(dp115 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p116 +sg42 +g43 +sssg44 +(dp117 +g32 +S'NC_000002.12:g.71598667C>G' +p118 +sg34 +(dp119 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p120 +sg42 +g43 +sssS'grch37' +p121 +(dp122 +g32 +S'NC_000002.11:g.71825797C>G' +p123 +sg34 +(dp124 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p125 +sg42 +g43 +sssS'grch38' +p126 +(dp127 +g32 +S'NC_000002.12:g.71598667C>G' +p128 +sg34 +(dp129 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p130 +sg42 +g43 +ssssg60 +(dp131 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124453.1' +p132 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130981.1' +p133 +sssS'NM_003494.3:c.3624C>G' +p134 +(dp135 +g3 +g4 +sg5 +(lp136 +S'RefSeqGene record not available' +p137 +asg8 +g4 +sg9 +(lp138 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 8, mRNA +p139 +sg13 +S'DYSF' +p140 +sg15 +(dp141 +g17 +S'NP_003485.1:p.(Ile1208Met)' +p142 +sg19 +S'NP_003485.1:p.(I1208M)' +p143 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_003494.3:c.3624C>G' +p144 +sg27 +g4 +sg28 +(dp145 +S'hg19' +p146 +(dp147 +g32 +S'NC_000002.11:g.71825797C>G' +p148 +sg34 +(dp149 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p150 +sg42 +g43 +sssg44 +(dp151 +g32 +S'NC_000002.12:g.71598667C>G' +p152 +sg34 +(dp153 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p154 +sg42 +g43 +sssS'grch37' +p155 +(dp156 +g32 +S'NC_000002.11:g.71825797C>G' +p157 +sg34 +(dp158 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p159 +sg42 +g43 +sssS'grch38' +p160 +(dp161 +g32 +S'NC_000002.12:g.71598667C>G' +p162 +sg34 +(dp163 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p164 +sg42 +g43 +ssssg60 +(dp165 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003485.1' +p166 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003494.3' +p167 +sssS'NM_001130985.1:c.3678C>G' +p168 +(dp169 +g3 +g4 +sg5 +(lp170 +S'RefSeqGene record not available' +p171 +asg8 +g4 +sg9 +(lp172 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 4, mRNA +p173 +sg13 +S'DYSF' +p174 +sg15 +(dp175 +g17 +S'NP_001124457.1:p.(Ile1226Met)' +p176 +sg19 +S'NP_001124457.1:p.(I1226M)' +p177 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130985.1:c.3678C>G' +p178 +sg27 +g4 +sg28 +(dp179 +S'hg19' +p180 +(dp181 +g32 +S'NC_000002.11:g.71825797C>G' +p182 +sg34 +(dp183 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p184 +sg42 +g43 +sssg44 +(dp185 +g32 +S'NC_000002.12:g.71598667C>G' +p186 +sg34 +(dp187 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p188 +sg42 +g43 +sssS'grch37' +p189 +(dp190 +g32 +S'NC_000002.11:g.71825797C>G' +p191 +sg34 +(dp192 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p193 +sg42 +g43 +sssS'grch38' +p194 +(dp195 +g32 +S'NC_000002.12:g.71598667C>G' +p196 +sg34 +(dp197 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p198 +sg42 +g43 +ssssg60 +(dp199 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124457.1' +p200 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130985.1' +p201 +sssS'NM_001130983.1:c.3627C>G' +p202 +(dp203 +g3 +g4 +sg5 +(lp204 +S'RefSeqGene record not available' +p205 +asg8 +g4 +sg9 +(lp206 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 6, mRNA +p207 +sg13 +S'DYSF' +p208 +sg15 +(dp209 +g17 +S'NP_001124455.1:p.(Ile1209Met)' +p210 +sg19 +S'NP_001124455.1:p.(I1209M)' +p211 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130983.1:c.3627C>G' +p212 +sg27 +g4 +sg28 +(dp213 +S'hg19' +p214 +(dp215 +g32 +S'NC_000002.11:g.71825797C>G' +p216 +sg34 +(dp217 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p218 +sg42 +g43 +sssg44 +(dp219 +g32 +S'NC_000002.12:g.71598667C>G' +p220 +sg34 +(dp221 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p222 +sg42 +g43 +sssS'grch37' +p223 +(dp224 +g32 +S'NC_000002.11:g.71825797C>G' +p225 +sg34 +(dp226 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p227 +sg42 +g43 +sssS'grch38' +p228 +(dp229 +g32 +S'NC_000002.12:g.71598667C>G' +p230 +sg34 +(dp231 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p232 +sg42 +g43 +ssssg60 +(dp233 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124455.1' +p234 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130983.1' +p235 +sssS'NM_001130987.1:c.3678C>G' +p236 +(dp237 +g3 +g4 +sg5 +(lp238 +S'RefSeqGene record not available' +p239 +asg8 +g4 +sg9 +(lp240 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 1, mRNA +p241 +sg13 +S'DYSF' +p242 +sg15 +(dp243 +g17 +S'NP_001124459.1:p.(Ile1226Met)' +p244 +sg19 +S'NP_001124459.1:p.(I1226M)' +p245 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130987.1:c.3678C>G' +p246 +sg27 +g4 +sg28 +(dp247 +S'hg19' +p248 +(dp249 +g32 +S'NC_000002.11:g.71825797C>G' +p250 +sg34 +(dp251 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p252 +sg42 +g43 +sssg44 +(dp253 +g32 +S'NC_000002.12:g.71598667C>G' +p254 +sg34 +(dp255 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p256 +sg42 +g43 +sssS'grch37' +p257 +(dp258 +g32 +S'NC_000002.11:g.71825797C>G' +p259 +sg34 +(dp260 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p261 +sg42 +g43 +sssS'grch38' +p262 +(dp263 +g32 +S'NC_000002.12:g.71598667C>G' +p264 +sg34 +(dp265 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p266 +sg42 +g43 +ssssg60 +(dp267 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124459.1' +p268 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130987.1' +p269 +sssS'flag' +p270 +S'gene_variant' +p271 +sS'NM_001130980.1:c.3675C>G' +p272 +(dp273 +g3 +g4 +sg5 +(lp274 +S'RefSeqGene record not available' +p275 +asg8 +g4 +sg9 +(lp276 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 13, mRNA +p277 +sg13 +S'DYSF' +p278 +sg15 +(dp279 +g17 +S'NP_001124452.1:p.(Ile1225Met)' +p280 +sg19 +S'NP_001124452.1:p.(I1225M)' +p281 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130980.1:c.3675C>G' +p282 +sg27 +g4 +sg28 +(dp283 +S'hg19' +p284 +(dp285 +g32 +S'NC_000002.11:g.71825797C>G' +p286 +sg34 +(dp287 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p288 +sg42 +g43 +sssg44 +(dp289 +g32 +S'NC_000002.12:g.71598667C>G' +p290 +sg34 +(dp291 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p292 +sg42 +g43 +sssS'grch37' +p293 +(dp294 +g32 +S'NC_000002.11:g.71825797C>G' +p295 +sg34 +(dp296 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p297 +sg42 +g43 +sssS'grch38' +p298 +(dp299 +g32 +S'NC_000002.12:g.71598667C>G' +p300 +sg34 +(dp301 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p302 +sg42 +g43 +ssssg60 +(dp303 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124452.1' +p304 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130980.1' +p305 +sssS'NM_001130979.1:c.3717C>G' +p306 +(dp307 +g3 +g4 +sg5 +(lp308 +S'RefSeqGene record not available' +p309 +asg8 +g4 +sg9 +(lp310 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 12, mRNA +p311 +sg13 +S'DYSF' +p312 +sg15 +(dp313 +g17 +S'NP_001124451.1:p.(Ile1239Met)' +p314 +sg19 +S'NP_001124451.1:p.(I1239M)' +p315 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130979.1:c.3717C>G' +p316 +sg27 +g4 +sg28 +(dp317 +S'hg19' +p318 +(dp319 +g32 +S'NC_000002.11:g.71825797C>G' +p320 +sg34 +(dp321 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p322 +sg42 +g43 +sssg44 +(dp323 +g32 +S'NC_000002.12:g.71598667C>G' +p324 +sg34 +(dp325 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p326 +sg42 +g43 +sssS'grch37' +p327 +(dp328 +g32 +S'NC_000002.11:g.71825797C>G' +p329 +sg34 +(dp330 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p331 +sg42 +g43 +sssS'grch38' +p332 +(dp333 +g32 +S'NC_000002.12:g.71598667C>G' +p334 +sg34 +(dp335 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p336 +sg42 +g43 +ssssg60 +(dp337 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124451.1' +p338 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130979.1' +p339 +sssS'NM_001130984.1:c.3585C>G' +p340 +(dp341 +g3 +g4 +sg5 +(lp342 +S'RefSeqGene record not available' +p343 +asg8 +g4 +sg9 +(lp344 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 5, mRNA +p345 +sg13 +S'DYSF' +p346 +sg15 +(dp347 +g17 +S'NP_001124456.1:p.(Ile1195Met)' +p348 +sg19 +S'NP_001124456.1:p.(I1195M)' +p349 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130984.1:c.3585C>G' +p350 +sg27 +g4 +sg28 +(dp351 +S'hg19' +p352 +(dp353 +g32 +S'NC_000002.11:g.71825797C>G' +p354 +sg34 +(dp355 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p356 +sg42 +g43 +sssg44 +(dp357 +g32 +S'NC_000002.12:g.71598667C>G' +p358 +sg34 +(dp359 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p360 +sg42 +g43 +sssS'grch37' +p361 +(dp362 +g32 +S'NC_000002.11:g.71825797C>G' +p363 +sg34 +(dp364 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p365 +sg42 +g43 +sssS'grch38' +p366 +(dp367 +g32 +S'NC_000002.12:g.71598667C>G' +p368 +sg34 +(dp369 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p370 +sg42 +g43 +ssssg60 +(dp371 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124456.1' +p372 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130984.1' +p373 +sssS'NM_001130977.1:c.3582C>G' +p374 +(dp375 +g3 +g4 +sg5 +(lp376 +S'RefSeqGene record not available' +p377 +asg8 +g4 +sg9 +(lp378 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 10, mRNA +p379 +sg13 +S'DYSF' +p380 +sg15 +(dp381 +g17 +S'NP_001124449.1:p.(Ile1194Met)' +p382 +sg19 +S'NP_001124449.1:p.(I1194M)' +p383 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130977.1:c.3582C>G' +p384 +sg27 +g4 +sg28 +(dp385 +S'hg19' +p386 +(dp387 +g32 +S'NC_000002.11:g.71825797C>G' +p388 +sg34 +(dp389 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p390 +sg42 +g43 +sssg44 +(dp391 +g32 +S'NC_000002.12:g.71598667C>G' +p392 +sg34 +(dp393 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p394 +sg42 +g43 +sssS'grch37' +p395 +(dp396 +g32 +S'NC_000002.11:g.71825797C>G' +p397 +sg34 +(dp398 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p399 +sg42 +g43 +sssS'grch38' +p400 +(dp401 +g32 +S'NC_000002.12:g.71598667C>G' +p402 +sg34 +(dp403 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p404 +sg42 +g43 +ssssg60 +(dp405 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124449.1' +p406 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130977.1' +p407 +sssS'NM_001130455.1:c.3627C>G' +p408 +(dp409 +g3 +g4 +sg5 +(lp410 +S'RefSeqGene record not available' +p411 +asg8 +g4 +sg9 +(lp412 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 2, mRNA +p413 +sg13 +S'DYSF' +p414 +sg15 +(dp415 +g17 +S'NP_001123927.1:p.(Ile1209Met)' +p416 +sg19 +S'NP_001123927.1:p.(I1209M)' +p417 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130455.1:c.3627C>G' +p418 +sg27 +g4 +sg28 +(dp419 +S'hg19' +p420 +(dp421 +g32 +S'NC_000002.11:g.71825797C>G' +p422 +sg34 +(dp423 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p424 +sg42 +g43 +sssg44 +(dp425 +g32 +S'NC_000002.12:g.71598667C>G' +p426 +sg34 +(dp427 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p428 +sg42 +g43 +sssS'grch37' +p429 +(dp430 +g32 +S'NC_000002.11:g.71825797C>G' +p431 +sg34 +(dp432 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p433 +sg42 +g43 +sssS'grch38' +p434 +(dp435 +g32 +S'NC_000002.12:g.71598667C>G' +p436 +sg34 +(dp437 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p438 +sg42 +g43 +ssssg60 +(dp439 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001123927.1' +p440 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130455.1' +p441 +sssS'metadata' +p442 +(dp443 +S'variantvalidator_hgvs_version' +p444 +S'1.1.3' +p445 +sS'uta_schema' +p446 +S'uta_20180821' +p447 +sS'seqrepo_db' +p448 +S'2018-08-21' +p449 +sS'variantvalidator_version' +p450 +S'v0.2' +p451 +ssS'NM_001130982.1:c.3720C>G' +p452 +(dp453 +g3 +g4 +sg5 +(lp454 +S'RefSeqGene record not available' +p455 +asg8 +g4 +sg9 +(lp456 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 7, mRNA +p457 +sg13 +S'DYSF' +p458 +sg15 +(dp459 +g17 +S'NP_001124454.1:p.(Ile1240Met)' +p460 +sg19 +S'NP_001124454.1:p.(I1240M)' +p461 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130982.1:c.3720C>G' +p462 +sg27 +g4 +sg28 +(dp463 +S'hg19' +p464 +(dp465 +g32 +S'NC_000002.11:g.71825797C>G' +p466 +sg34 +(dp467 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p468 +sg42 +g43 +sssg44 +(dp469 +g32 +S'NC_000002.12:g.71598667C>G' +p470 +sg34 +(dp471 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p472 +sg42 +g43 +sssS'grch37' +p473 +(dp474 +g32 +S'NC_000002.11:g.71825797C>G' +p475 +sg34 +(dp476 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p477 +sg42 +g43 +sssS'grch38' +p478 +(dp479 +g32 +S'NC_000002.12:g.71598667C>G' +p480 +sg34 +(dp481 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p482 +sg42 +g43 +ssssg60 +(dp483 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124454.1' +p484 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130982.1' +p485 +sssS'NM_001130978.1:c.3624C>G' +p486 +(dp487 +g3 +g4 +sg5 +(lp488 +S'RefSeqGene record not available' +p489 +asg8 +g4 +sg9 +(lp490 +sg11 +VHomo sapiens dysferlin (DYSF), transcript variant 11, mRNA +p491 +sg13 +S'DYSF' +p492 +sg15 +(dp493 +g17 +S'NP_001124450.1:p.(Ile1208Met)' +p494 +sg19 +S'NP_001124450.1:p.(I1208M)' +p495 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001130978.1:c.3624C>G' +p496 +sg27 +g4 +sg28 +(dp497 +S'hg19' +p498 +(dp499 +g32 +S'NC_000002.11:g.71825797C>G' +p500 +sg34 +(dp501 +g36 +g37 +sg38 +g39 +sg40 +S'71825797' +p502 +sg42 +g43 +sssg44 +(dp503 +g32 +S'NC_000002.12:g.71598667C>G' +p504 +sg34 +(dp505 +g36 +g37 +sg38 +g39 +sg40 +S'71598667' +p506 +sg42 +g43 +sssS'grch37' +p507 +(dp508 +g32 +S'NC_000002.11:g.71825797C>G' +p509 +sg34 +(dp510 +g36 +g53 +sg38 +g39 +sg40 +S'71825797' +p511 +sg42 +g43 +sssS'grch38' +p512 +(dp513 +g32 +S'NC_000002.12:g.71598667C>G' +p514 +sg34 +(dp515 +g36 +g53 +sg38 +g39 +sg40 +S'71598667' +p516 +sg42 +g43 +ssssg60 +(dp517 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124450.1' +p518 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130978.1' +p519 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant269.txt b/VariantValidator/testing/testOutputsMasterITS/variant269.txt new file mode 100644 index 00000000..44271a5c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant269.txt @@ -0,0 +1,401 @@ +(dp0 +S'NM_021007.2:c.1718G>C' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'SCN2A' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_066287.2:p.(Ser573Thr)' +p18 +sS'slr' +p19 +S'NP_066287.2:p.(S573T)' +p20 +ssS'submitted_variant' +p21 +S'2-166179712-G-C' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_021007.2:c.1718G>C' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000002.11:g.166179712G>C' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr2' +p37 +sS'ref' +p38 +S'G' +p39 +sS'pos' +p40 +S'166179712' +p41 +sS'alt' +p42 +S'C' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000002.12:g.165323202G>C' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'165323202' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000002.11:g.166179712G>C' +p51 +sg34 +(dp52 +g36 +S'2' +p53 +sg38 +g39 +sg40 +S'166179712' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000002.12:g.165323202G>C' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'165323202' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ssS'NM_001040143.1:c.1718G>C' +p78 +(dp79 +g3 +g4 +sg5 +(lp80 +S'RefSeqGene record not available' +p81 +asg8 +g4 +sg9 +(lp82 +sg11 +VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 3, mRNA +p83 +sg13 +S'SCN2A' +p84 +sg15 +(dp85 +g17 +S'NP_001035233.1:p.(Ser573Thr)' +p86 +sg19 +S'NP_001035233.1:p.(S573T)' +p87 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001040143.1:c.1718G>C' +p88 +sg27 +g4 +sg28 +(dp89 +S'hg19' +p90 +(dp91 +g32 +S'NC_000002.11:g.166179712G>C' +p92 +sg34 +(dp93 +g36 +g37 +sg38 +g39 +sg40 +S'166179712' +p94 +sg42 +g43 +sssg44 +(dp95 +g32 +S'NC_000002.12:g.165323202G>C' +p96 +sg34 +(dp97 +g36 +g37 +sg38 +g39 +sg40 +S'165323202' +p98 +sg42 +g43 +sssS'grch37' +p99 +(dp100 +g32 +S'NC_000002.11:g.166179712G>C' +p101 +sg34 +(dp102 +g36 +g53 +sg38 +g39 +sg40 +S'166179712' +p103 +sg42 +g43 +sssS'grch38' +p104 +(dp105 +g32 +S'NC_000002.12:g.165323202G>C' +p106 +sg34 +(dp107 +g36 +g53 +sg38 +g39 +sg40 +S'165323202' +p108 +sg42 +g43 +ssssg60 +(dp109 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1' +p110 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1' +p111 +sssS'NM_001040142.1:c.1718G>C' +p112 +(dp113 +g3 +g4 +sg5 +(lp114 +S'RefSeqGene record not available' +p115 +asg8 +g4 +sg9 +(lp116 +sg11 +VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 2, mRNA +p117 +sg13 +S'SCN2A' +p118 +sg15 +(dp119 +g17 +S'NP_001035232.1:p.(Ser573Thr)' +p120 +sg19 +S'NP_001035232.1:p.(S573T)' +p121 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001040142.1:c.1718G>C' +p122 +sg27 +g4 +sg28 +(dp123 +S'hg19' +p124 +(dp125 +g32 +S'NC_000002.11:g.166179712G>C' +p126 +sg34 +(dp127 +g36 +g37 +sg38 +g39 +sg40 +S'166179712' +p128 +sg42 +g43 +sssg44 +(dp129 +g32 +S'NC_000002.12:g.165323202G>C' +p130 +sg34 +(dp131 +g36 +g37 +sg38 +g39 +sg40 +S'165323202' +p132 +sg42 +g43 +sssS'grch37' +p133 +(dp134 +g32 +S'NC_000002.11:g.166179712G>C' +p135 +sg34 +(dp136 +g36 +g53 +sg38 +g39 +sg40 +S'166179712' +p137 +sg42 +g43 +sssS'grch38' +p138 +(dp139 +g32 +S'NC_000002.12:g.165323202G>C' +p140 +sg34 +(dp141 +g36 +g53 +sg38 +g39 +sg40 +S'165323202' +p142 +sg42 +g43 +ssssg60 +(dp143 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035232.1' +p144 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040142.1' +p145 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant27.txt b/VariantValidator/testing/testOutputsMasterITS/variant27.txt new file mode 100644 index 00000000..da9bb27b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant27.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000014.8:g.36989536G>A' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NR_138595.1:n.1-810C>T' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant270.txt b/VariantValidator/testing/testOutputsMasterITS/variant270.txt new file mode 100644 index 00000000..5a0d9e7b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant270.txt @@ -0,0 +1,401 @@ +(dp0 +S'NM_021007.2:c.2026A>G' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'SCN2A' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_066287.2:p.(Thr676Ala)' +p18 +sS'slr' +p19 +S'NP_066287.2:p.(T676A)' +p20 +ssS'submitted_variant' +p21 +S'2-166183371-A-G' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_021007.2:c.2026A>G' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000002.11:g.166183371A>G' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr2' +p37 +sS'ref' +p38 +S'A' +p39 +sS'pos' +p40 +S'166183371' +p41 +sS'alt' +p42 +S'G' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000002.12:g.165326861A>G' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'165326861' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000002.11:g.166183371A>G' +p51 +sg34 +(dp52 +g36 +S'2' +p53 +sg38 +g39 +sg40 +S'166183371' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000002.12:g.165326861A>G' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'165326861' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'NM_001040143.1:c.2026A>G' +p68 +(dp69 +g3 +g4 +sg5 +(lp70 +S'RefSeqGene record not available' +p71 +asg8 +g4 +sg9 +(lp72 +sg11 +VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 3, mRNA +p73 +sg13 +S'SCN2A' +p74 +sg15 +(dp75 +g17 +S'NP_001035233.1:p.(Thr676Ala)' +p76 +sg19 +S'NP_001035233.1:p.(T676A)' +p77 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001040143.1:c.2026A>G' +p78 +sg27 +g4 +sg28 +(dp79 +S'hg19' +p80 +(dp81 +g32 +S'NC_000002.11:g.166183371A>G' +p82 +sg34 +(dp83 +g36 +g37 +sg38 +g39 +sg40 +S'166183371' +p84 +sg42 +g43 +sssg44 +(dp85 +g32 +S'NC_000002.12:g.165326861A>G' +p86 +sg34 +(dp87 +g36 +g37 +sg38 +g39 +sg40 +S'165326861' +p88 +sg42 +g43 +sssS'grch37' +p89 +(dp90 +g32 +S'NC_000002.11:g.166183371A>G' +p91 +sg34 +(dp92 +g36 +g53 +sg38 +g39 +sg40 +S'166183371' +p93 +sg42 +g43 +sssS'grch38' +p94 +(dp95 +g32 +S'NC_000002.12:g.165326861A>G' +p96 +sg34 +(dp97 +g36 +g53 +sg38 +g39 +sg40 +S'165326861' +p98 +sg42 +g43 +ssssg60 +(dp99 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1' +p100 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1' +p101 +sssS'NM_001040142.1:c.2026A>G' +p102 +(dp103 +g3 +g4 +sg5 +(lp104 +S'RefSeqGene record not available' +p105 +asg8 +g4 +sg9 +(lp106 +sg11 +VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 2, mRNA +p107 +sg13 +S'SCN2A' +p108 +sg15 +(dp109 +g17 +S'NP_001035232.1:p.(Thr676Ala)' +p110 +sg19 +S'NP_001035232.1:p.(T676A)' +p111 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001040142.1:c.2026A>G' +p112 +sg27 +g4 +sg28 +(dp113 +S'hg19' +p114 +(dp115 +g32 +S'NC_000002.11:g.166183371A>G' +p116 +sg34 +(dp117 +g36 +g37 +sg38 +g39 +sg40 +S'166183371' +p118 +sg42 +g43 +sssg44 +(dp119 +g32 +S'NC_000002.12:g.165326861A>G' +p120 +sg34 +(dp121 +g36 +g37 +sg38 +g39 +sg40 +S'165326861' +p122 +sg42 +g43 +sssS'grch37' +p123 +(dp124 +g32 +S'NC_000002.11:g.166183371A>G' +p125 +sg34 +(dp126 +g36 +g53 +sg38 +g39 +sg40 +S'166183371' +p127 +sg42 +g43 +sssS'grch38' +p128 +(dp129 +g32 +S'NC_000002.12:g.165326861A>G' +p130 +sg34 +(dp131 +g36 +g53 +sg38 +g39 +sg40 +S'165326861' +p132 +sg42 +g43 +ssssg60 +(dp133 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035232.1' +p134 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040142.1' +p135 +sssS'metadata' +p136 +(dp137 +S'variantvalidator_hgvs_version' +p138 +S'1.1.3' +p139 +sS'uta_schema' +p140 +S'uta_20180821' +p141 +sS'seqrepo_db' +p142 +S'2018-08-21' +p143 +sS'variantvalidator_version' +p144 +S'v0.2' +p145 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant271.txt b/VariantValidator/testing/testOutputsMasterITS/variant271.txt new file mode 100644 index 00000000..636623b9 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant271.txt @@ -0,0 +1,2534 @@ +(dp0 +S'NM_001353951.1:c.233_242delinsGT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 8, mRNA +p13 +sS'gene_symbol' +p14 +S'SCN1A' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001340880.1:p.(Glu78GlyfsTer7)' +p19 +sS'slr' +p20 +S'NP_001340880.1:p.(E78Gfs*7)' +p21 +ssS'submitted_variant' +p22 +S'2-166929889-GTCCAGGTCCT-GAC' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001353951.1:c.233_242delinsGT' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr2' +p38 +sS'ref' +p39 +S'TCCAGGTCCT' +p40 +sS'pos' +p41 +S'166929890' +p42 +sS'alt' +p43 +VAC +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p49 +sg41 +S'166073380' +p50 +sg43 +VAC +p51 +sssS'grch37' +p52 +(dp53 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p54 +sg35 +(dp55 +g37 +S'2' +p56 +sg39 +S'TCCAGGTCCT' +p57 +sg41 +S'166929890' +p58 +sg43 +g44 +sssS'grch38' +p59 +(dp60 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p61 +sg35 +(dp62 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p63 +sg41 +S'166073380' +p64 +sg43 +g51 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1' +p70 +sssS'NM_001353958.1:c.233_242delinsGT' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p74 +aS'RefSeqGene record not available' +p75 +asg9 +g4 +sg10 +(lp76 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 13, mRNA +p77 +sg14 +S'SCN1A' +p78 +sg16 +(dp79 +g18 +S'NP_001340887.1:p.(Glu78GlyfsTer7)' +p80 +sg20 +S'NP_001340887.1:p.(E78Gfs*7)' +p81 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001353958.1:c.233_242delinsGT' +p82 +sg28 +g4 +sg29 +(dp83 +S'hg19' +p84 +(dp85 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p86 +sg35 +(dp87 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p88 +sg41 +S'166929890' +p89 +sg43 +VAC +p90 +sssg45 +(dp91 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p92 +sg35 +(dp93 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p94 +sg41 +S'166073380' +p95 +sg43 +VAC +p96 +sssS'grch37' +p97 +(dp98 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p99 +sg35 +(dp100 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p101 +sg41 +S'166929890' +p102 +sg43 +g90 +sssS'grch38' +p103 +(dp104 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p105 +sg35 +(dp106 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p107 +sg41 +S'166073380' +p108 +sg43 +g96 +ssssg65 +(dp109 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1' +p110 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1' +p111 +sssS'NM_001202435.1:c.233_242delinsGT' +p112 +(dp113 +g3 +g4 +sg5 +(lp114 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p115 +aS'A more recent version of the selected reference sequence NM_001202435.1 is available (NM_001202435.2)' +p116 +aS'NM_001202435.2:c.233_242delinsGT MUST be fully validated prior to use in reports' +p117 +aS'select_variants=NM_001202435.2:c.233_242delinsGT' +p118 +aS'RefSeqGene record not available' +p119 +asg9 +g4 +sg10 +(lp120 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA +p121 +sg14 +S'SCN1A' +p122 +sg16 +(dp123 +g18 +S'NP_001189364.1:p.(Glu78GlyfsTer7)' +p124 +sg20 +S'NP_001189364.1:p.(E78Gfs*7)' +p125 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001202435.1:c.233_242delinsGT' +p126 +sg28 +g4 +sg29 +(dp127 +S'hg19' +p128 +(dp129 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p130 +sg35 +(dp131 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p132 +sg41 +S'166929890' +p133 +sg43 +VAC +p134 +sssg45 +(dp135 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p136 +sg35 +(dp137 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p138 +sg41 +S'166073380' +p139 +sg43 +VAC +p140 +sssS'grch37' +p141 +(dp142 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p143 +sg35 +(dp144 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p145 +sg41 +S'166929890' +p146 +sg43 +g134 +sssS'grch38' +p147 +(dp148 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p149 +sg35 +(dp150 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p151 +sg41 +S'166073380' +p152 +sg43 +g140 +ssssg65 +(dp153 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1' +p154 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1' +p155 +sssS'NR_148667.1:n.638_647delinsGT' +p156 +(dp157 +g3 +g4 +sg5 +(lp158 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p159 +aS'RefSeqGene record not available' +p160 +asg9 +g4 +sg10 +(lp161 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 16, non-coding RNA +p162 +sg14 +S'SCN1A' +p163 +sg16 +(dp164 +g18 +S'Non-coding :n.' +p165 +sg20 +g165 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NR_148667.1:n.638_647delinsGT' +p166 +sg28 +g4 +sg29 +(dp167 +S'hg19' +p168 +(dp169 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p170 +sg35 +(dp171 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p172 +sg41 +S'166929890' +p173 +sg43 +VAC +p174 +sssg45 +(dp175 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p176 +sg35 +(dp177 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p178 +sg41 +S'166073380' +p179 +sg43 +VAC +p180 +sssS'grch37' +p181 +(dp182 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p183 +sg35 +(dp184 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p185 +sg41 +S'166929890' +p186 +sg43 +g174 +sssS'grch38' +p187 +(dp188 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p189 +sg35 +(dp190 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p191 +sg41 +S'166073380' +p192 +sg43 +g180 +ssssg65 +(dp193 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1' +p194 +sssS'NM_001165964.1:c.233_242delinsGT' +p195 +(dp196 +g3 +g4 +sg5 +(lp197 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p198 +aS'A more recent version of the selected reference sequence NM_001165964.1 is available (NM_001165964.2)' +p199 +aS'NM_001165964.2:c.233_242delinsGT MUST be fully validated prior to use in reports' +p200 +aS'select_variants=NM_001165964.2:c.233_242delinsGT' +p201 +aS'RefSeqGene record not available' +p202 +asg9 +g4 +sg10 +(lp203 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA +p204 +sg14 +S'SCN1A' +p205 +sg16 +(dp206 +g18 +S'NP_001159436.1:p.(Glu78GlyfsTer7)' +p207 +sg20 +S'NP_001159436.1:p.(E78Gfs*7)' +p208 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001165964.1:c.233_242delinsGT' +p209 +sg28 +g4 +sg29 +(dp210 +S'hg19' +p211 +(dp212 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p213 +sg35 +(dp214 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p215 +sg41 +S'166929890' +p216 +sg43 +VAC +p217 +sssg45 +(dp218 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p219 +sg35 +(dp220 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p221 +sg41 +S'166073380' +p222 +sg43 +VAC +p223 +sssS'grch37' +p224 +(dp225 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p226 +sg35 +(dp227 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p228 +sg41 +S'166929890' +p229 +sg43 +g217 +sssS'grch38' +p230 +(dp231 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p232 +sg35 +(dp233 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p234 +sg41 +S'166073380' +p235 +sg43 +g223 +ssssg65 +(dp236 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1' +p237 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1' +p238 +sssS'NM_001202435.2:c.233_242delinsGT' +p239 +(dp240 +g3 +g4 +sg5 +(lp241 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p242 +aS'RefSeqGene record not available' +p243 +asg9 +g4 +sg10 +(lp244 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA +p245 +sg14 +S'SCN1A' +p246 +sg16 +(dp247 +g18 +S'NP_001189364.1:p.(Glu78GlyfsTer7)' +p248 +sg20 +S'NP_001189364.1:p.(E78Gfs*7)' +p249 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001202435.2:c.233_242delinsGT' +p250 +sg28 +g4 +sg29 +(dp251 +S'hg19' +p252 +(dp253 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p254 +sg35 +(dp255 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p256 +sg41 +S'166929890' +p257 +sg43 +VAC +p258 +sssg45 +(dp259 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p260 +sg35 +(dp261 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p262 +sg41 +S'166073380' +p263 +sg43 +VAC +p264 +sssS'grch37' +p265 +(dp266 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p267 +sg35 +(dp268 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p269 +sg41 +S'166929890' +p270 +sg43 +g258 +sssS'grch38' +p271 +(dp272 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p273 +sg35 +(dp274 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p275 +sg41 +S'166073380' +p276 +sg43 +g264 +ssssg65 +(dp277 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1' +p278 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2' +p279 +sssS'NM_006920.5:c.233_242delinsGT' +p280 +(dp281 +g3 +g4 +sg5 +(lp282 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p283 +aS'RefSeqGene record not available' +p284 +asg9 +g4 +sg10 +(lp285 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA +p286 +sg14 +S'SCN1A' +p287 +sg16 +(dp288 +g18 +S'NP_008851.3:p.(Glu78GlyfsTer7)' +p289 +sg20 +S'NP_008851.3:p.(E78Gfs*7)' +p290 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_006920.5:c.233_242delinsGT' +p291 +sg28 +g4 +sg29 +(dp292 +S'hg19' +p293 +(dp294 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p295 +sg35 +(dp296 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p297 +sg41 +S'166929890' +p298 +sg43 +VAC +p299 +sssg45 +(dp300 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p301 +sg35 +(dp302 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p303 +sg41 +S'166073380' +p304 +sg43 +VAC +p305 +sssS'grch37' +p306 +(dp307 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p308 +sg35 +(dp309 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p310 +sg41 +S'166929890' +p311 +sg43 +g299 +sssS'grch38' +p312 +(dp313 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p314 +sg35 +(dp315 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p316 +sg41 +S'166073380' +p317 +sg43 +g305 +ssssg65 +(dp318 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3' +p319 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5' +p320 +sssS'NM_001165963.1:c.233_242delinsGT' +p321 +(dp322 +g3 +g4 +sg5 +(lp323 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p324 +aS'A more recent version of the selected reference sequence NM_001165963.1 is available (NM_001165963.2)' +p325 +aS'NM_001165963.2:c.233_242delinsGT MUST be fully validated prior to use in reports' +p326 +aS'select_variants=NM_001165963.2:c.233_242delinsGT' +p327 +aS'RefSeqGene record not available' +p328 +asg9 +g4 +sg10 +(lp329 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA +p330 +sg14 +S'SCN1A' +p331 +sg16 +(dp332 +g18 +S'NP_001159435.1:p.(Glu78GlyfsTer7)' +p333 +sg20 +S'NP_001159435.1:p.(E78Gfs*7)' +p334 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001165963.1:c.233_242delinsGT' +p335 +sg28 +g4 +sg29 +(dp336 +S'hg19' +p337 +(dp338 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p339 +sg35 +(dp340 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p341 +sg41 +S'166929890' +p342 +sg43 +VAC +p343 +sssg45 +(dp344 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p345 +sg35 +(dp346 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p347 +sg41 +S'166073380' +p348 +sg43 +VAC +p349 +sssS'grch37' +p350 +(dp351 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p352 +sg35 +(dp353 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p354 +sg41 +S'166929890' +p355 +sg43 +g343 +sssS'grch38' +p356 +(dp357 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p358 +sg35 +(dp359 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p360 +sg41 +S'166073380' +p361 +sg43 +g349 +ssssg65 +(dp362 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1' +p363 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1' +p364 +sssS'NM_001353955.1:c.233_242delinsGT' +p365 +(dp366 +g3 +g4 +sg5 +(lp367 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p368 +aS'RefSeqGene record not available' +p369 +asg9 +g4 +sg10 +(lp370 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 11, mRNA +p371 +sg14 +S'SCN1A' +p372 +sg16 +(dp373 +g18 +S'NP_001340884.1:p.(Glu78GlyfsTer7)' +p374 +sg20 +S'NP_001340884.1:p.(E78Gfs*7)' +p375 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001353955.1:c.233_242delinsGT' +p376 +sg28 +g4 +sg29 +(dp377 +S'hg19' +p378 +(dp379 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p380 +sg35 +(dp381 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p382 +sg41 +S'166929890' +p383 +sg43 +VAC +p384 +sssg45 +(dp385 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p386 +sg35 +(dp387 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p388 +sg41 +S'166073380' +p389 +sg43 +VAC +p390 +sssS'grch37' +p391 +(dp392 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p393 +sg35 +(dp394 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p395 +sg41 +S'166929890' +p396 +sg43 +g384 +sssS'grch38' +p397 +(dp398 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p399 +sg35 +(dp400 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p401 +sg41 +S'166073380' +p402 +sg43 +g390 +ssssg65 +(dp403 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1' +p404 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1' +p405 +sssS'NM_001353961.1:c.-2193_-2184delinsGT' +p406 +(dp407 +g3 +g4 +sg5 +(lp408 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p409 +aS'RefSeqGene record not available' +p410 +asg9 +g4 +sg10 +(lp411 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 15, mRNA +p412 +sg14 +S'SCN1A' +p413 +sg16 +(dp414 +g18 +S'NP_001340890.1:p.?' +p415 +sg20 +S'NP_001340890.1:p.?' +p416 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001353961.1:c.-2193_-2184delinsGT' +p417 +sg28 +g4 +sg29 +(dp418 +S'hg19' +p419 +(dp420 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p421 +sg35 +(dp422 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p423 +sg41 +S'166929890' +p424 +sg43 +VAC +p425 +sssg45 +(dp426 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p427 +sg35 +(dp428 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p429 +sg41 +S'166073380' +p430 +sg43 +VAC +p431 +sssS'grch37' +p432 +(dp433 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p434 +sg35 +(dp435 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p436 +sg41 +S'166929890' +p437 +sg43 +g425 +sssS'grch38' +p438 +(dp439 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p440 +sg35 +(dp441 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p442 +sg41 +S'166073380' +p443 +sg43 +g431 +ssssg65 +(dp444 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1' +p445 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1' +p446 +sssS'metadata' +p447 +(dp448 +S'variantvalidator_hgvs_version' +p449 +S'1.1.3' +p450 +sS'uta_schema' +p451 +S'uta_20180821' +p452 +sS'seqrepo_db' +p453 +S'2018-08-21' +p454 +sS'variantvalidator_version' +p455 +S'v0.2' +p456 +ssS'NM_001165963.2:c.233_242delinsGT' +p457 +(dp458 +g3 +g4 +sg5 +(lp459 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p460 +aS'RefSeqGene record not available' +p461 +asg9 +g4 +sg10 +(lp462 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA +p463 +sg14 +S'SCN1A' +p464 +sg16 +(dp465 +g18 +S'NP_001159435.1:p.(Glu78GlyfsTer7)' +p466 +sg20 +S'NP_001159435.1:p.(E78Gfs*7)' +p467 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001165963.2:c.233_242delinsGT' +p468 +sg28 +g4 +sg29 +(dp469 +S'hg19' +p470 +(dp471 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p472 +sg35 +(dp473 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p474 +sg41 +S'166929890' +p475 +sg43 +VAC +p476 +sssg45 +(dp477 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p478 +sg35 +(dp479 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p480 +sg41 +S'166073380' +p481 +sg43 +VAC +p482 +sssS'grch37' +p483 +(dp484 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p485 +sg35 +(dp486 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p487 +sg41 +S'166929890' +p488 +sg43 +g476 +sssS'grch38' +p489 +(dp490 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p491 +sg35 +(dp492 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p493 +sg41 +S'166073380' +p494 +sg43 +g482 +ssssg65 +(dp495 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1' +p496 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2' +p497 +sssS'NM_001353950.1:c.233_242delinsGT' +p498 +(dp499 +g3 +g4 +sg5 +(lp500 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p501 +aS'RefSeqGene record not available' +p502 +asg9 +g4 +sg10 +(lp503 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 7, mRNA +p504 +sg14 +S'SCN1A' +p505 +sg16 +(dp506 +g18 +S'NP_001340879.1:p.(Glu78GlyfsTer7)' +p507 +sg20 +S'NP_001340879.1:p.(E78Gfs*7)' +p508 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001353950.1:c.233_242delinsGT' +p509 +sg28 +g4 +sg29 +(dp510 +S'hg19' +p511 +(dp512 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p513 +sg35 +(dp514 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p515 +sg41 +S'166929890' +p516 +sg43 +VAC +p517 +sssg45 +(dp518 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p519 +sg35 +(dp520 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p521 +sg41 +S'166073380' +p522 +sg43 +VAC +p523 +sssS'grch37' +p524 +(dp525 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p526 +sg35 +(dp527 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p528 +sg41 +S'166929890' +p529 +sg43 +g517 +sssS'grch38' +p530 +(dp531 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p532 +sg35 +(dp533 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p534 +sg41 +S'166073380' +p535 +sg43 +g523 +ssssg65 +(dp536 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1' +p537 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1' +p538 +sssS'flag' +p539 +S'gene_variant' +p540 +sS'NM_001353948.1:c.233_242delinsGT' +p541 +(dp542 +g3 +g4 +sg5 +(lp543 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p544 +aS'RefSeqGene record not available' +p545 +asg9 +g4 +sg10 +(lp546 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 5, mRNA +p547 +sg14 +S'SCN1A' +p548 +sg16 +(dp549 +g18 +S'NP_001340877.1:p.(Glu78GlyfsTer7)' +p550 +sg20 +S'NP_001340877.1:p.(E78Gfs*7)' +p551 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001353948.1:c.233_242delinsGT' +p552 +sg28 +g4 +sg29 +(dp553 +S'hg19' +p554 +(dp555 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p556 +sg35 +(dp557 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p558 +sg41 +S'166929890' +p559 +sg43 +VAC +p560 +sssg45 +(dp561 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p562 +sg35 +(dp563 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p564 +sg41 +S'166073380' +p565 +sg43 +VAC +p566 +sssS'grch37' +p567 +(dp568 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p569 +sg35 +(dp570 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p571 +sg41 +S'166929890' +p572 +sg43 +g560 +sssS'grch38' +p573 +(dp574 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p575 +sg35 +(dp576 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p577 +sg41 +S'166073380' +p578 +sg43 +g566 +ssssg65 +(dp579 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1' +p580 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1' +p581 +sssS'NM_001353949.1:c.233_242delinsGT' +p582 +(dp583 +g3 +g4 +sg5 +(lp584 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p585 +aS'RefSeqGene record not available' +p586 +asg9 +g4 +sg10 +(lp587 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 6, mRNA +p588 +sg14 +S'SCN1A' +p589 +sg16 +(dp590 +g18 +S'NP_001340878.1:p.(Glu78GlyfsTer7)' +p591 +sg20 +S'NP_001340878.1:p.(E78Gfs*7)' +p592 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001353949.1:c.233_242delinsGT' +p593 +sg28 +g4 +sg29 +(dp594 +S'hg19' +p595 +(dp596 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p597 +sg35 +(dp598 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p599 +sg41 +S'166929890' +p600 +sg43 +VAC +p601 +sssg45 +(dp602 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p603 +sg35 +(dp604 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p605 +sg41 +S'166073380' +p606 +sg43 +VAC +p607 +sssS'grch37' +p608 +(dp609 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p610 +sg35 +(dp611 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p612 +sg41 +S'166929890' +p613 +sg43 +g601 +sssS'grch38' +p614 +(dp615 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p616 +sg35 +(dp617 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p618 +sg41 +S'166073380' +p619 +sg43 +g607 +ssssg65 +(dp620 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1' +p621 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1' +p622 +sssS'NM_001353957.1:c.233_242delinsGT' +p623 +(dp624 +g3 +g4 +sg5 +(lp625 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p626 +aS'RefSeqGene record not available' +p627 +asg9 +g4 +sg10 +(lp628 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 12, mRNA +p629 +sg14 +S'SCN1A' +p630 +sg16 +(dp631 +g18 +S'NP_001340886.1:p.(Glu78GlyfsTer7)' +p632 +sg20 +S'NP_001340886.1:p.(E78Gfs*7)' +p633 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001353957.1:c.233_242delinsGT' +p634 +sg28 +g4 +sg29 +(dp635 +S'hg19' +p636 +(dp637 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p638 +sg35 +(dp639 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p640 +sg41 +S'166929890' +p641 +sg43 +VAC +p642 +sssg45 +(dp643 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p644 +sg35 +(dp645 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p646 +sg41 +S'166073380' +p647 +sg43 +VAC +p648 +sssS'grch37' +p649 +(dp650 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p651 +sg35 +(dp652 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p653 +sg41 +S'166929890' +p654 +sg43 +g642 +sssS'grch38' +p655 +(dp656 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p657 +sg35 +(dp658 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p659 +sg41 +S'166073380' +p660 +sg43 +g648 +ssssg65 +(dp661 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1' +p662 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1' +p663 +sssS'NM_001353952.1:c.233_242delinsGT' +p664 +(dp665 +g3 +g4 +sg5 +(lp666 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p667 +aS'RefSeqGene record not available' +p668 +asg9 +g4 +sg10 +(lp669 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 9, mRNA +p670 +sg14 +S'SCN1A' +p671 +sg16 +(dp672 +g18 +S'NP_001340881.1:p.(Glu78GlyfsTer7)' +p673 +sg20 +S'NP_001340881.1:p.(E78Gfs*7)' +p674 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001353952.1:c.233_242delinsGT' +p675 +sg28 +g4 +sg29 +(dp676 +S'hg19' +p677 +(dp678 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p679 +sg35 +(dp680 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p681 +sg41 +S'166929890' +p682 +sg43 +VAC +p683 +sssg45 +(dp684 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p685 +sg35 +(dp686 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p687 +sg41 +S'166073380' +p688 +sg43 +VAC +p689 +sssS'grch37' +p690 +(dp691 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p692 +sg35 +(dp693 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p694 +sg41 +S'166929890' +p695 +sg43 +g683 +sssS'grch38' +p696 +(dp697 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p698 +sg35 +(dp699 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p700 +sg41 +S'166073380' +p701 +sg43 +g689 +ssssg65 +(dp702 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1' +p703 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1' +p704 +sssS'NM_001353954.1:c.233_242delinsGT' +p705 +(dp706 +g3 +g4 +sg5 +(lp707 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p708 +aS'RefSeqGene record not available' +p709 +asg9 +g4 +sg10 +(lp710 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 10, mRNA +p711 +sg14 +S'SCN1A' +p712 +sg16 +(dp713 +g18 +S'NP_001340883.1:p.(Glu78GlyfsTer7)' +p714 +sg20 +S'NP_001340883.1:p.(E78Gfs*7)' +p715 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001353954.1:c.233_242delinsGT' +p716 +sg28 +g4 +sg29 +(dp717 +S'hg19' +p718 +(dp719 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p720 +sg35 +(dp721 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p722 +sg41 +S'166929890' +p723 +sg43 +VAC +p724 +sssg45 +(dp725 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p726 +sg35 +(dp727 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p728 +sg41 +S'166073380' +p729 +sg43 +VAC +p730 +sssS'grch37' +p731 +(dp732 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p733 +sg35 +(dp734 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p735 +sg41 +S'166929890' +p736 +sg43 +g724 +sssS'grch38' +p737 +(dp738 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p739 +sg35 +(dp740 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p741 +sg41 +S'166073380' +p742 +sg43 +g730 +ssssg65 +(dp743 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1' +p744 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1' +p745 +sssS'NM_006920.4:c.233_242delinsGT' +p746 +(dp747 +g3 +g4 +sg5 +(lp748 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p749 +aS'A more recent version of the selected reference sequence NM_006920.4 is available (NM_006920.5)' +p750 +aS'NM_006920.5:c.233_242delinsGT MUST be fully validated prior to use in reports' +p751 +aS'select_variants=NM_006920.5:c.233_242delinsGT' +p752 +aS'RefSeqGene record not available' +p753 +asg9 +g4 +sg10 +(lp754 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA +p755 +sg14 +S'SCN1A' +p756 +sg16 +(dp757 +g18 +S'NP_008851.3:p.(Glu78GlyfsTer7)' +p758 +sg20 +S'NP_008851.3:p.(E78Gfs*7)' +p759 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_006920.4:c.233_242delinsGT' +p760 +sg28 +g4 +sg29 +(dp761 +S'hg19' +p762 +(dp763 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p764 +sg35 +(dp765 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p766 +sg41 +S'166929890' +p767 +sg43 +VAC +p768 +sssg45 +(dp769 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p770 +sg35 +(dp771 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p772 +sg41 +S'166073380' +p773 +sg43 +VAC +p774 +sssS'grch37' +p775 +(dp776 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p777 +sg35 +(dp778 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p779 +sg41 +S'166929890' +p780 +sg43 +g768 +sssS'grch38' +p781 +(dp782 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p783 +sg35 +(dp784 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p785 +sg41 +S'166073380' +p786 +sg43 +g774 +ssssg65 +(dp787 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3' +p788 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4' +p789 +sssS'NM_001353960.1:c.233_242delinsGT' +p790 +(dp791 +g3 +g4 +sg5 +(lp792 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p793 +aS'RefSeqGene record not available' +p794 +asg9 +g4 +sg10 +(lp795 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 14, mRNA +p796 +sg14 +S'SCN1A' +p797 +sg16 +(dp798 +g18 +S'NP_001340889.1:p.(Glu78GlyfsTer7)' +p799 +sg20 +S'NP_001340889.1:p.(E78Gfs*7)' +p800 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001353960.1:c.233_242delinsGT' +p801 +sg28 +g4 +sg29 +(dp802 +S'hg19' +p803 +(dp804 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p805 +sg35 +(dp806 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p807 +sg41 +S'166929890' +p808 +sg43 +VAC +p809 +sssg45 +(dp810 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p811 +sg35 +(dp812 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p813 +sg41 +S'166073380' +p814 +sg43 +VAC +p815 +sssS'grch37' +p816 +(dp817 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p818 +sg35 +(dp819 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p820 +sg41 +S'166929890' +p821 +sg43 +g809 +sssS'grch38' +p822 +(dp823 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p824 +sg35 +(dp825 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p826 +sg41 +S'166073380' +p827 +sg43 +g815 +ssssg65 +(dp828 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1' +p829 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1' +p830 +sssS'NM_001165964.2:c.233_242delinsGT' +p831 +(dp832 +g3 +g4 +sg5 +(lp833 +S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' +p834 +aS'RefSeqGene record not available' +p835 +asg9 +g4 +sg10 +(lp836 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA +p837 +sg14 +S'SCN1A' +p838 +sg16 +(dp839 +g18 +S'NP_001159436.1:p.(Glu78GlyfsTer7)' +p840 +sg20 +S'NP_001159436.1:p.(E78Gfs*7)' +p841 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001165964.2:c.233_242delinsGT' +p842 +sg28 +g4 +sg29 +(dp843 +S'hg19' +p844 +(dp845 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p846 +sg35 +(dp847 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p848 +sg41 +S'166929890' +p849 +sg43 +VAC +p850 +sssg45 +(dp851 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p852 +sg35 +(dp853 +g37 +g38 +sg39 +S'TCCAGGTCCT' +p854 +sg41 +S'166073380' +p855 +sg43 +VAC +p856 +sssS'grch37' +p857 +(dp858 +g33 +S'NC_000002.11:g.166929890_166929899delinsAC' +p859 +sg35 +(dp860 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p861 +sg41 +S'166929890' +p862 +sg43 +g850 +sssS'grch38' +p863 +(dp864 +g33 +S'NC_000002.12:g.166073380_166073389delinsAC' +p865 +sg35 +(dp866 +g37 +g56 +sg39 +S'TCCAGGTCCT' +p867 +sg41 +S'166073380' +p868 +sg43 +g856 +ssssg65 +(dp869 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1' +p870 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2' +p871 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant272.txt b/VariantValidator/testing/testOutputsMasterITS/variant272.txt new file mode 100644 index 00000000..4a1c9595 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant272.txt @@ -0,0 +1,2495 @@ +(dp0 +S'NR_148667.1:n.638_645del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 16, non-coding RNA +p13 +sS'gene_symbol' +p14 +S'SCN1A' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'Non-coding :n.' +p19 +sS'slr' +p20 +g19 +ssS'submitted_variant' +p21 +S'2-166929891-CCAGGTCCT-C' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NR_148667.1:n.638_645del' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000002.11:g.166929892_166929899del' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr2' +p37 +sS'ref' +p38 +S'CCAGGTCCT' +p39 +sS'pos' +p40 +S'166929891' +p41 +sS'alt' +p42 +S'C' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000002.12:g.166073382_166073389del' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +S'CCAGGTCCT' +p48 +sg40 +S'166073381' +p49 +sg42 +g43 +sssS'grch37' +p50 +(dp51 +g32 +S'NC_000002.11:g.166929892_166929899del' +p52 +sg34 +(dp53 +g36 +S'2' +p54 +sg38 +S'CCAGGTCCT' +p55 +sg40 +S'166929891' +p56 +sg42 +g43 +sssS'grch38' +p57 +(dp58 +g32 +S'NC_000002.12:g.166073382_166073389del' +p59 +sg34 +(dp60 +g36 +g54 +sg38 +S'CCAGGTCCT' +p61 +sg40 +S'166073381' +p62 +sg42 +g43 +ssssS'reference_sequence_records' +p63 +(dp64 +S'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1' +p66 +sssS'NM_001165964.2:c.233_240del' +p67 +(dp68 +g3 +g4 +sg5 +(lp69 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p70 +aS'RefSeqGene record not available' +p71 +asg9 +g4 +sg10 +(lp72 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA +p73 +sg14 +S'SCN1A' +p74 +sg16 +(dp75 +g18 +S'NP_001159436.1:p.(Glu78GlyfsTer7)' +p76 +sg20 +S'NP_001159436.1:p.(E78Gfs*7)' +p77 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001165964.2:c.233_240del' +p78 +sg27 +g4 +sg28 +(dp79 +S'hg19' +p80 +(dp81 +g32 +S'NC_000002.11:g.166929892_166929899del' +p82 +sg34 +(dp83 +g36 +g37 +sg38 +S'CCAGGTCCT' +p84 +sg40 +S'166929891' +p85 +sg42 +g43 +sssg44 +(dp86 +g32 +S'NC_000002.12:g.166073382_166073389del' +p87 +sg34 +(dp88 +g36 +g37 +sg38 +S'CCAGGTCCT' +p89 +sg40 +S'166073381' +p90 +sg42 +g43 +sssS'grch37' +p91 +(dp92 +g32 +S'NC_000002.11:g.166929892_166929899del' +p93 +sg34 +(dp94 +g36 +g54 +sg38 +S'CCAGGTCCT' +p95 +sg40 +S'166929891' +p96 +sg42 +g43 +sssS'grch38' +p97 +(dp98 +g32 +S'NC_000002.12:g.166073382_166073389del' +p99 +sg34 +(dp100 +g36 +g54 +sg38 +S'CCAGGTCCT' +p101 +sg40 +S'166073381' +p102 +sg42 +g43 +ssssg63 +(dp103 +S'protein' +p104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1' +p105 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2' +p106 +sssS'NM_001353951.1:c.233_240del' +p107 +(dp108 +g3 +g4 +sg5 +(lp109 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p110 +aS'RefSeqGene record not available' +p111 +asg9 +g4 +sg10 +(lp112 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 8, mRNA +p113 +sg14 +S'SCN1A' +p114 +sg16 +(dp115 +g18 +S'NP_001340880.1:p.(Glu78GlyfsTer7)' +p116 +sg20 +S'NP_001340880.1:p.(E78Gfs*7)' +p117 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001353951.1:c.233_240del' +p118 +sg27 +g4 +sg28 +(dp119 +S'hg19' +p120 +(dp121 +g32 +S'NC_000002.11:g.166929892_166929899del' +p122 +sg34 +(dp123 +g36 +g37 +sg38 +S'CCAGGTCCT' +p124 +sg40 +S'166929891' +p125 +sg42 +g43 +sssg44 +(dp126 +g32 +S'NC_000002.12:g.166073382_166073389del' +p127 +sg34 +(dp128 +g36 +g37 +sg38 +S'CCAGGTCCT' +p129 +sg40 +S'166073381' +p130 +sg42 +g43 +sssS'grch37' +p131 +(dp132 +g32 +S'NC_000002.11:g.166929892_166929899del' +p133 +sg34 +(dp134 +g36 +g54 +sg38 +S'CCAGGTCCT' +p135 +sg40 +S'166929891' +p136 +sg42 +g43 +sssS'grch38' +p137 +(dp138 +g32 +S'NC_000002.12:g.166073382_166073389del' +p139 +sg34 +(dp140 +g36 +g54 +sg38 +S'CCAGGTCCT' +p141 +sg40 +S'166073381' +p142 +sg42 +g43 +ssssg63 +(dp143 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1' +p144 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1' +p145 +sssS'NM_001353954.1:c.233_240del' +p146 +(dp147 +g3 +g4 +sg5 +(lp148 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p149 +aS'RefSeqGene record not available' +p150 +asg9 +g4 +sg10 +(lp151 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 10, mRNA +p152 +sg14 +S'SCN1A' +p153 +sg16 +(dp154 +g18 +S'NP_001340883.1:p.(Glu78GlyfsTer7)' +p155 +sg20 +S'NP_001340883.1:p.(E78Gfs*7)' +p156 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001353954.1:c.233_240del' +p157 +sg27 +g4 +sg28 +(dp158 +S'hg19' +p159 +(dp160 +g32 +S'NC_000002.11:g.166929892_166929899del' +p161 +sg34 +(dp162 +g36 +g37 +sg38 +S'CCAGGTCCT' +p163 +sg40 +S'166929891' +p164 +sg42 +g43 +sssg44 +(dp165 +g32 +S'NC_000002.12:g.166073382_166073389del' +p166 +sg34 +(dp167 +g36 +g37 +sg38 +S'CCAGGTCCT' +p168 +sg40 +S'166073381' +p169 +sg42 +g43 +sssS'grch37' +p170 +(dp171 +g32 +S'NC_000002.11:g.166929892_166929899del' +p172 +sg34 +(dp173 +g36 +g54 +sg38 +S'CCAGGTCCT' +p174 +sg40 +S'166929891' +p175 +sg42 +g43 +sssS'grch38' +p176 +(dp177 +g32 +S'NC_000002.12:g.166073382_166073389del' +p178 +sg34 +(dp179 +g36 +g54 +sg38 +S'CCAGGTCCT' +p180 +sg40 +S'166073381' +p181 +sg42 +g43 +ssssg63 +(dp182 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1' +p183 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1' +p184 +sssS'NM_001353961.1:c.-2193_-2186del' +p185 +(dp186 +g3 +g4 +sg5 +(lp187 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p188 +aS'RefSeqGene record not available' +p189 +asg9 +g4 +sg10 +(lp190 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 15, mRNA +p191 +sg14 +S'SCN1A' +p192 +sg16 +(dp193 +g18 +S'NP_001340890.1:p.?' +p194 +sg20 +S'NP_001340890.1:p.?' +p195 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001353961.1:c.-2193_-2186del' +p196 +sg27 +g4 +sg28 +(dp197 +S'hg19' +p198 +(dp199 +g32 +S'NC_000002.11:g.166929892_166929899del' +p200 +sg34 +(dp201 +g36 +g37 +sg38 +S'CCAGGTCCT' +p202 +sg40 +S'166929891' +p203 +sg42 +g43 +sssg44 +(dp204 +g32 +S'NC_000002.12:g.166073382_166073389del' +p205 +sg34 +(dp206 +g36 +g37 +sg38 +S'CCAGGTCCT' +p207 +sg40 +S'166073381' +p208 +sg42 +g43 +sssS'grch37' +p209 +(dp210 +g32 +S'NC_000002.11:g.166929892_166929899del' +p211 +sg34 +(dp212 +g36 +g54 +sg38 +S'CCAGGTCCT' +p213 +sg40 +S'166929891' +p214 +sg42 +g43 +sssS'grch38' +p215 +(dp216 +g32 +S'NC_000002.12:g.166073382_166073389del' +p217 +sg34 +(dp218 +g36 +g54 +sg38 +S'CCAGGTCCT' +p219 +sg40 +S'166073381' +p220 +sg42 +g43 +ssssg63 +(dp221 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1' +p222 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1' +p223 +sssS'NM_001353948.1:c.233_240del' +p224 +(dp225 +g3 +g4 +sg5 +(lp226 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p227 +aS'RefSeqGene record not available' +p228 +asg9 +g4 +sg10 +(lp229 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 5, mRNA +p230 +sg14 +S'SCN1A' +p231 +sg16 +(dp232 +g18 +S'NP_001340877.1:p.(Glu78GlyfsTer7)' +p233 +sg20 +S'NP_001340877.1:p.(E78Gfs*7)' +p234 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001353948.1:c.233_240del' +p235 +sg27 +g4 +sg28 +(dp236 +S'hg19' +p237 +(dp238 +g32 +S'NC_000002.11:g.166929892_166929899del' +p239 +sg34 +(dp240 +g36 +g37 +sg38 +S'CCAGGTCCT' +p241 +sg40 +S'166929891' +p242 +sg42 +g43 +sssg44 +(dp243 +g32 +S'NC_000002.12:g.166073382_166073389del' +p244 +sg34 +(dp245 +g36 +g37 +sg38 +S'CCAGGTCCT' +p246 +sg40 +S'166073381' +p247 +sg42 +g43 +sssS'grch37' +p248 +(dp249 +g32 +S'NC_000002.11:g.166929892_166929899del' +p250 +sg34 +(dp251 +g36 +g54 +sg38 +S'CCAGGTCCT' +p252 +sg40 +S'166929891' +p253 +sg42 +g43 +sssS'grch38' +p254 +(dp255 +g32 +S'NC_000002.12:g.166073382_166073389del' +p256 +sg34 +(dp257 +g36 +g54 +sg38 +S'CCAGGTCCT' +p258 +sg40 +S'166073381' +p259 +sg42 +g43 +ssssg63 +(dp260 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1' +p261 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1' +p262 +sssS'NM_001353960.1:c.233_240del' +p263 +(dp264 +g3 +g4 +sg5 +(lp265 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p266 +aS'RefSeqGene record not available' +p267 +asg9 +g4 +sg10 +(lp268 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 14, mRNA +p269 +sg14 +S'SCN1A' +p270 +sg16 +(dp271 +g18 +S'NP_001340889.1:p.(Glu78GlyfsTer7)' +p272 +sg20 +S'NP_001340889.1:p.(E78Gfs*7)' +p273 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001353960.1:c.233_240del' +p274 +sg27 +g4 +sg28 +(dp275 +S'hg19' +p276 +(dp277 +g32 +S'NC_000002.11:g.166929892_166929899del' +p278 +sg34 +(dp279 +g36 +g37 +sg38 +S'CCAGGTCCT' +p280 +sg40 +S'166929891' +p281 +sg42 +g43 +sssg44 +(dp282 +g32 +S'NC_000002.12:g.166073382_166073389del' +p283 +sg34 +(dp284 +g36 +g37 +sg38 +S'CCAGGTCCT' +p285 +sg40 +S'166073381' +p286 +sg42 +g43 +sssS'grch37' +p287 +(dp288 +g32 +S'NC_000002.11:g.166929892_166929899del' +p289 +sg34 +(dp290 +g36 +g54 +sg38 +S'CCAGGTCCT' +p291 +sg40 +S'166929891' +p292 +sg42 +g43 +sssS'grch38' +p293 +(dp294 +g32 +S'NC_000002.12:g.166073382_166073389del' +p295 +sg34 +(dp296 +g36 +g54 +sg38 +S'CCAGGTCCT' +p297 +sg40 +S'166073381' +p298 +sg42 +g43 +ssssg63 +(dp299 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1' +p300 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1' +p301 +sssS'NM_001202435.1:c.233_240del' +p302 +(dp303 +g3 +g4 +sg5 +(lp304 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p305 +aS'A more recent version of the selected reference sequence NM_001202435.1 is available (NM_001202435.2)' +p306 +aS'NM_001202435.2:c.233_240del MUST be fully validated prior to use in reports' +p307 +aS'select_variants=NM_001202435.2:c.233_240del' +p308 +aS'RefSeqGene record not available' +p309 +asg9 +g4 +sg10 +(lp310 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA +p311 +sg14 +S'SCN1A' +p312 +sg16 +(dp313 +g18 +S'NP_001189364.1:p.(Glu78GlyfsTer7)' +p314 +sg20 +S'NP_001189364.1:p.(E78Gfs*7)' +p315 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001202435.1:c.233_240del' +p316 +sg27 +g4 +sg28 +(dp317 +S'hg19' +p318 +(dp319 +g32 +S'NC_000002.11:g.166929892_166929899del' +p320 +sg34 +(dp321 +g36 +g37 +sg38 +S'CCAGGTCCT' +p322 +sg40 +S'166929891' +p323 +sg42 +g43 +sssg44 +(dp324 +g32 +S'NC_000002.12:g.166073382_166073389del' +p325 +sg34 +(dp326 +g36 +g37 +sg38 +S'CCAGGTCCT' +p327 +sg40 +S'166073381' +p328 +sg42 +g43 +sssS'grch37' +p329 +(dp330 +g32 +S'NC_000002.11:g.166929892_166929899del' +p331 +sg34 +(dp332 +g36 +g54 +sg38 +S'CCAGGTCCT' +p333 +sg40 +S'166929891' +p334 +sg42 +g43 +sssS'grch38' +p335 +(dp336 +g32 +S'NC_000002.12:g.166073382_166073389del' +p337 +sg34 +(dp338 +g36 +g54 +sg38 +S'CCAGGTCCT' +p339 +sg40 +S'166073381' +p340 +sg42 +g43 +ssssg63 +(dp341 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1' +p342 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1' +p343 +sssS'NM_001202435.2:c.233_240del' +p344 +(dp345 +g3 +g4 +sg5 +(lp346 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p347 +aS'RefSeqGene record not available' +p348 +asg9 +g4 +sg10 +(lp349 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA +p350 +sg14 +S'SCN1A' +p351 +sg16 +(dp352 +g18 +S'NP_001189364.1:p.(Glu78GlyfsTer7)' +p353 +sg20 +S'NP_001189364.1:p.(E78Gfs*7)' +p354 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001202435.2:c.233_240del' +p355 +sg27 +g4 +sg28 +(dp356 +S'hg19' +p357 +(dp358 +g32 +S'NC_000002.11:g.166929892_166929899del' +p359 +sg34 +(dp360 +g36 +g37 +sg38 +S'CCAGGTCCT' +p361 +sg40 +S'166929891' +p362 +sg42 +g43 +sssg44 +(dp363 +g32 +S'NC_000002.12:g.166073382_166073389del' +p364 +sg34 +(dp365 +g36 +g37 +sg38 +S'CCAGGTCCT' +p366 +sg40 +S'166073381' +p367 +sg42 +g43 +sssS'grch37' +p368 +(dp369 +g32 +S'NC_000002.11:g.166929892_166929899del' +p370 +sg34 +(dp371 +g36 +g54 +sg38 +S'CCAGGTCCT' +p372 +sg40 +S'166929891' +p373 +sg42 +g43 +sssS'grch38' +p374 +(dp375 +g32 +S'NC_000002.12:g.166073382_166073389del' +p376 +sg34 +(dp377 +g36 +g54 +sg38 +S'CCAGGTCCT' +p378 +sg40 +S'166073381' +p379 +sg42 +g43 +ssssg63 +(dp380 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1' +p381 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2' +p382 +sssS'metadata' +p383 +(dp384 +S'variantvalidator_hgvs_version' +p385 +S'1.1.3' +p386 +sS'uta_schema' +p387 +S'uta_20180821' +p388 +sS'seqrepo_db' +p389 +S'2018-08-21' +p390 +sS'variantvalidator_version' +p391 +S'v0.2' +p392 +ssS'NM_006920.5:c.233_240del' +p393 +(dp394 +g3 +g4 +sg5 +(lp395 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p396 +aS'RefSeqGene record not available' +p397 +asg9 +g4 +sg10 +(lp398 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA +p399 +sg14 +S'SCN1A' +p400 +sg16 +(dp401 +g18 +S'NP_008851.3:p.(Glu78GlyfsTer7)' +p402 +sg20 +S'NP_008851.3:p.(E78Gfs*7)' +p403 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_006920.5:c.233_240del' +p404 +sg27 +g4 +sg28 +(dp405 +S'hg19' +p406 +(dp407 +g32 +S'NC_000002.11:g.166929892_166929899del' +p408 +sg34 +(dp409 +g36 +g37 +sg38 +S'CCAGGTCCT' +p410 +sg40 +S'166929891' +p411 +sg42 +g43 +sssg44 +(dp412 +g32 +S'NC_000002.12:g.166073382_166073389del' +p413 +sg34 +(dp414 +g36 +g37 +sg38 +S'CCAGGTCCT' +p415 +sg40 +S'166073381' +p416 +sg42 +g43 +sssS'grch37' +p417 +(dp418 +g32 +S'NC_000002.11:g.166929892_166929899del' +p419 +sg34 +(dp420 +g36 +g54 +sg38 +S'CCAGGTCCT' +p421 +sg40 +S'166929891' +p422 +sg42 +g43 +sssS'grch38' +p423 +(dp424 +g32 +S'NC_000002.12:g.166073382_166073389del' +p425 +sg34 +(dp426 +g36 +g54 +sg38 +S'CCAGGTCCT' +p427 +sg40 +S'166073381' +p428 +sg42 +g43 +ssssg63 +(dp429 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3' +p430 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5' +p431 +sssS'NM_001353955.1:c.233_240del' +p432 +(dp433 +g3 +g4 +sg5 +(lp434 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p435 +aS'RefSeqGene record not available' +p436 +asg9 +g4 +sg10 +(lp437 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 11, mRNA +p438 +sg14 +S'SCN1A' +p439 +sg16 +(dp440 +g18 +S'NP_001340884.1:p.(Glu78GlyfsTer7)' +p441 +sg20 +S'NP_001340884.1:p.(E78Gfs*7)' +p442 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001353955.1:c.233_240del' +p443 +sg27 +g4 +sg28 +(dp444 +S'hg19' +p445 +(dp446 +g32 +S'NC_000002.11:g.166929892_166929899del' +p447 +sg34 +(dp448 +g36 +g37 +sg38 +S'CCAGGTCCT' +p449 +sg40 +S'166929891' +p450 +sg42 +g43 +sssg44 +(dp451 +g32 +S'NC_000002.12:g.166073382_166073389del' +p452 +sg34 +(dp453 +g36 +g37 +sg38 +S'CCAGGTCCT' +p454 +sg40 +S'166073381' +p455 +sg42 +g43 +sssS'grch37' +p456 +(dp457 +g32 +S'NC_000002.11:g.166929892_166929899del' +p458 +sg34 +(dp459 +g36 +g54 +sg38 +S'CCAGGTCCT' +p460 +sg40 +S'166929891' +p461 +sg42 +g43 +sssS'grch38' +p462 +(dp463 +g32 +S'NC_000002.12:g.166073382_166073389del' +p464 +sg34 +(dp465 +g36 +g54 +sg38 +S'CCAGGTCCT' +p466 +sg40 +S'166073381' +p467 +sg42 +g43 +ssssg63 +(dp468 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1' +p469 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1' +p470 +sssS'NM_001353952.1:c.233_240del' +p471 +(dp472 +g3 +g4 +sg5 +(lp473 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p474 +aS'RefSeqGene record not available' +p475 +asg9 +g4 +sg10 +(lp476 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 9, mRNA +p477 +sg14 +S'SCN1A' +p478 +sg16 +(dp479 +g18 +S'NP_001340881.1:p.(Glu78GlyfsTer7)' +p480 +sg20 +S'NP_001340881.1:p.(E78Gfs*7)' +p481 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001353952.1:c.233_240del' +p482 +sg27 +g4 +sg28 +(dp483 +S'hg19' +p484 +(dp485 +g32 +S'NC_000002.11:g.166929892_166929899del' +p486 +sg34 +(dp487 +g36 +g37 +sg38 +S'CCAGGTCCT' +p488 +sg40 +S'166929891' +p489 +sg42 +g43 +sssg44 +(dp490 +g32 +S'NC_000002.12:g.166073382_166073389del' +p491 +sg34 +(dp492 +g36 +g37 +sg38 +S'CCAGGTCCT' +p493 +sg40 +S'166073381' +p494 +sg42 +g43 +sssS'grch37' +p495 +(dp496 +g32 +S'NC_000002.11:g.166929892_166929899del' +p497 +sg34 +(dp498 +g36 +g54 +sg38 +S'CCAGGTCCT' +p499 +sg40 +S'166929891' +p500 +sg42 +g43 +sssS'grch38' +p501 +(dp502 +g32 +S'NC_000002.12:g.166073382_166073389del' +p503 +sg34 +(dp504 +g36 +g54 +sg38 +S'CCAGGTCCT' +p505 +sg40 +S'166073381' +p506 +sg42 +g43 +ssssg63 +(dp507 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1' +p508 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1' +p509 +sssS'NM_001353957.1:c.233_240del' +p510 +(dp511 +g3 +g4 +sg5 +(lp512 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p513 +aS'RefSeqGene record not available' +p514 +asg9 +g4 +sg10 +(lp515 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 12, mRNA +p516 +sg14 +S'SCN1A' +p517 +sg16 +(dp518 +g18 +S'NP_001340886.1:p.(Glu78GlyfsTer7)' +p519 +sg20 +S'NP_001340886.1:p.(E78Gfs*7)' +p520 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001353957.1:c.233_240del' +p521 +sg27 +g4 +sg28 +(dp522 +S'hg19' +p523 +(dp524 +g32 +S'NC_000002.11:g.166929892_166929899del' +p525 +sg34 +(dp526 +g36 +g37 +sg38 +S'CCAGGTCCT' +p527 +sg40 +S'166929891' +p528 +sg42 +g43 +sssg44 +(dp529 +g32 +S'NC_000002.12:g.166073382_166073389del' +p530 +sg34 +(dp531 +g36 +g37 +sg38 +S'CCAGGTCCT' +p532 +sg40 +S'166073381' +p533 +sg42 +g43 +sssS'grch37' +p534 +(dp535 +g32 +S'NC_000002.11:g.166929892_166929899del' +p536 +sg34 +(dp537 +g36 +g54 +sg38 +S'CCAGGTCCT' +p538 +sg40 +S'166929891' +p539 +sg42 +g43 +sssS'grch38' +p540 +(dp541 +g32 +S'NC_000002.12:g.166073382_166073389del' +p542 +sg34 +(dp543 +g36 +g54 +sg38 +S'CCAGGTCCT' +p544 +sg40 +S'166073381' +p545 +sg42 +g43 +ssssg63 +(dp546 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1' +p547 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1' +p548 +sssS'flag' +p549 +S'gene_variant' +p550 +sS'NM_006920.4:c.233_240del' +p551 +(dp552 +g3 +g4 +sg5 +(lp553 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p554 +aS'A more recent version of the selected reference sequence NM_006920.4 is available (NM_006920.5)' +p555 +aS'NM_006920.5:c.233_240del MUST be fully validated prior to use in reports' +p556 +aS'select_variants=NM_006920.5:c.233_240del' +p557 +aS'RefSeqGene record not available' +p558 +asg9 +g4 +sg10 +(lp559 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA +p560 +sg14 +S'SCN1A' +p561 +sg16 +(dp562 +g18 +S'NP_008851.3:p.(Glu78GlyfsTer7)' +p563 +sg20 +S'NP_008851.3:p.(E78Gfs*7)' +p564 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_006920.4:c.233_240del' +p565 +sg27 +g4 +sg28 +(dp566 +S'hg19' +p567 +(dp568 +g32 +S'NC_000002.11:g.166929892_166929899del' +p569 +sg34 +(dp570 +g36 +g37 +sg38 +S'CCAGGTCCT' +p571 +sg40 +S'166929891' +p572 +sg42 +g43 +sssg44 +(dp573 +g32 +S'NC_000002.12:g.166073382_166073389del' +p574 +sg34 +(dp575 +g36 +g37 +sg38 +S'CCAGGTCCT' +p576 +sg40 +S'166073381' +p577 +sg42 +g43 +sssS'grch37' +p578 +(dp579 +g32 +S'NC_000002.11:g.166929892_166929899del' +p580 +sg34 +(dp581 +g36 +g54 +sg38 +S'CCAGGTCCT' +p582 +sg40 +S'166929891' +p583 +sg42 +g43 +sssS'grch38' +p584 +(dp585 +g32 +S'NC_000002.12:g.166073382_166073389del' +p586 +sg34 +(dp587 +g36 +g54 +sg38 +S'CCAGGTCCT' +p588 +sg40 +S'166073381' +p589 +sg42 +g43 +ssssg63 +(dp590 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3' +p591 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4' +p592 +sssS'NM_001353950.1:c.233_240del' +p593 +(dp594 +g3 +g4 +sg5 +(lp595 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p596 +aS'RefSeqGene record not available' +p597 +asg9 +g4 +sg10 +(lp598 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 7, mRNA +p599 +sg14 +S'SCN1A' +p600 +sg16 +(dp601 +g18 +S'NP_001340879.1:p.(Glu78GlyfsTer7)' +p602 +sg20 +S'NP_001340879.1:p.(E78Gfs*7)' +p603 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001353950.1:c.233_240del' +p604 +sg27 +g4 +sg28 +(dp605 +S'hg19' +p606 +(dp607 +g32 +S'NC_000002.11:g.166929892_166929899del' +p608 +sg34 +(dp609 +g36 +g37 +sg38 +S'CCAGGTCCT' +p610 +sg40 +S'166929891' +p611 +sg42 +g43 +sssg44 +(dp612 +g32 +S'NC_000002.12:g.166073382_166073389del' +p613 +sg34 +(dp614 +g36 +g37 +sg38 +S'CCAGGTCCT' +p615 +sg40 +S'166073381' +p616 +sg42 +g43 +sssS'grch37' +p617 +(dp618 +g32 +S'NC_000002.11:g.166929892_166929899del' +p619 +sg34 +(dp620 +g36 +g54 +sg38 +S'CCAGGTCCT' +p621 +sg40 +S'166929891' +p622 +sg42 +g43 +sssS'grch38' +p623 +(dp624 +g32 +S'NC_000002.12:g.166073382_166073389del' +p625 +sg34 +(dp626 +g36 +g54 +sg38 +S'CCAGGTCCT' +p627 +sg40 +S'166073381' +p628 +sg42 +g43 +ssssg63 +(dp629 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1' +p630 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1' +p631 +sssS'NM_001165963.2:c.233_240del' +p632 +(dp633 +g3 +g4 +sg5 +(lp634 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p635 +aS'RefSeqGene record not available' +p636 +asg9 +g4 +sg10 +(lp637 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA +p638 +sg14 +S'SCN1A' +p639 +sg16 +(dp640 +g18 +S'NP_001159435.1:p.(Glu78GlyfsTer7)' +p641 +sg20 +S'NP_001159435.1:p.(E78Gfs*7)' +p642 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001165963.2:c.233_240del' +p643 +sg27 +g4 +sg28 +(dp644 +S'hg19' +p645 +(dp646 +g32 +S'NC_000002.11:g.166929892_166929899del' +p647 +sg34 +(dp648 +g36 +g37 +sg38 +S'CCAGGTCCT' +p649 +sg40 +S'166929891' +p650 +sg42 +g43 +sssg44 +(dp651 +g32 +S'NC_000002.12:g.166073382_166073389del' +p652 +sg34 +(dp653 +g36 +g37 +sg38 +S'CCAGGTCCT' +p654 +sg40 +S'166073381' +p655 +sg42 +g43 +sssS'grch37' +p656 +(dp657 +g32 +S'NC_000002.11:g.166929892_166929899del' +p658 +sg34 +(dp659 +g36 +g54 +sg38 +S'CCAGGTCCT' +p660 +sg40 +S'166929891' +p661 +sg42 +g43 +sssS'grch38' +p662 +(dp663 +g32 +S'NC_000002.12:g.166073382_166073389del' +p664 +sg34 +(dp665 +g36 +g54 +sg38 +S'CCAGGTCCT' +p666 +sg40 +S'166073381' +p667 +sg42 +g43 +ssssg63 +(dp668 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1' +p669 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2' +p670 +sssS'NM_001165963.1:c.233_240del' +p671 +(dp672 +g3 +g4 +sg5 +(lp673 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p674 +aS'A more recent version of the selected reference sequence NM_001165963.1 is available (NM_001165963.2)' +p675 +aS'NM_001165963.2:c.233_240del MUST be fully validated prior to use in reports' +p676 +aS'select_variants=NM_001165963.2:c.233_240del' +p677 +aS'RefSeqGene record not available' +p678 +asg9 +g4 +sg10 +(lp679 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA +p680 +sg14 +S'SCN1A' +p681 +sg16 +(dp682 +g18 +S'NP_001159435.1:p.(Glu78GlyfsTer7)' +p683 +sg20 +S'NP_001159435.1:p.(E78Gfs*7)' +p684 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001165963.1:c.233_240del' +p685 +sg27 +g4 +sg28 +(dp686 +S'hg19' +p687 +(dp688 +g32 +S'NC_000002.11:g.166929892_166929899del' +p689 +sg34 +(dp690 +g36 +g37 +sg38 +S'CCAGGTCCT' +p691 +sg40 +S'166929891' +p692 +sg42 +g43 +sssg44 +(dp693 +g32 +S'NC_000002.12:g.166073382_166073389del' +p694 +sg34 +(dp695 +g36 +g37 +sg38 +S'CCAGGTCCT' +p696 +sg40 +S'166073381' +p697 +sg42 +g43 +sssS'grch37' +p698 +(dp699 +g32 +S'NC_000002.11:g.166929892_166929899del' +p700 +sg34 +(dp701 +g36 +g54 +sg38 +S'CCAGGTCCT' +p702 +sg40 +S'166929891' +p703 +sg42 +g43 +sssS'grch38' +p704 +(dp705 +g32 +S'NC_000002.12:g.166073382_166073389del' +p706 +sg34 +(dp707 +g36 +g54 +sg38 +S'CCAGGTCCT' +p708 +sg40 +S'166073381' +p709 +sg42 +g43 +ssssg63 +(dp710 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1' +p711 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1' +p712 +sssS'NM_001165964.1:c.233_240del' +p713 +(dp714 +g3 +g4 +sg5 +(lp715 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p716 +aS'A more recent version of the selected reference sequence NM_001165964.1 is available (NM_001165964.2)' +p717 +aS'NM_001165964.2:c.233_240del MUST be fully validated prior to use in reports' +p718 +aS'select_variants=NM_001165964.2:c.233_240del' +p719 +aS'RefSeqGene record not available' +p720 +asg9 +g4 +sg10 +(lp721 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA +p722 +sg14 +S'SCN1A' +p723 +sg16 +(dp724 +g18 +S'NP_001159436.1:p.(Glu78GlyfsTer7)' +p725 +sg20 +S'NP_001159436.1:p.(E78Gfs*7)' +p726 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001165964.1:c.233_240del' +p727 +sg27 +g4 +sg28 +(dp728 +S'hg19' +p729 +(dp730 +g32 +S'NC_000002.11:g.166929892_166929899del' +p731 +sg34 +(dp732 +g36 +g37 +sg38 +S'CCAGGTCCT' +p733 +sg40 +S'166929891' +p734 +sg42 +g43 +sssg44 +(dp735 +g32 +S'NC_000002.12:g.166073382_166073389del' +p736 +sg34 +(dp737 +g36 +g37 +sg38 +S'CCAGGTCCT' +p738 +sg40 +S'166073381' +p739 +sg42 +g43 +sssS'grch37' +p740 +(dp741 +g32 +S'NC_000002.11:g.166929892_166929899del' +p742 +sg34 +(dp743 +g36 +g54 +sg38 +S'CCAGGTCCT' +p744 +sg40 +S'166929891' +p745 +sg42 +g43 +sssS'grch38' +p746 +(dp747 +g32 +S'NC_000002.12:g.166073382_166073389del' +p748 +sg34 +(dp749 +g36 +g54 +sg38 +S'CCAGGTCCT' +p750 +sg40 +S'166073381' +p751 +sg42 +g43 +ssssg63 +(dp752 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1' +p753 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1' +p754 +sssS'NM_001353958.1:c.233_240del' +p755 +(dp756 +g3 +g4 +sg5 +(lp757 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p758 +aS'RefSeqGene record not available' +p759 +asg9 +g4 +sg10 +(lp760 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 13, mRNA +p761 +sg14 +S'SCN1A' +p762 +sg16 +(dp763 +g18 +S'NP_001340887.1:p.(Glu78GlyfsTer7)' +p764 +sg20 +S'NP_001340887.1:p.(E78Gfs*7)' +p765 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001353958.1:c.233_240del' +p766 +sg27 +g4 +sg28 +(dp767 +S'hg19' +p768 +(dp769 +g32 +S'NC_000002.11:g.166929892_166929899del' +p770 +sg34 +(dp771 +g36 +g37 +sg38 +S'CCAGGTCCT' +p772 +sg40 +S'166929891' +p773 +sg42 +g43 +sssg44 +(dp774 +g32 +S'NC_000002.12:g.166073382_166073389del' +p775 +sg34 +(dp776 +g36 +g37 +sg38 +S'CCAGGTCCT' +p777 +sg40 +S'166073381' +p778 +sg42 +g43 +sssS'grch37' +p779 +(dp780 +g32 +S'NC_000002.11:g.166929892_166929899del' +p781 +sg34 +(dp782 +g36 +g54 +sg38 +S'CCAGGTCCT' +p783 +sg40 +S'166929891' +p784 +sg42 +g43 +sssS'grch38' +p785 +(dp786 +g32 +S'NC_000002.12:g.166073382_166073389del' +p787 +sg34 +(dp788 +g36 +g54 +sg38 +S'CCAGGTCCT' +p789 +sg40 +S'166073381' +p790 +sg42 +g43 +ssssg63 +(dp791 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1' +p792 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1' +p793 +sssS'NM_001353949.1:c.233_240del' +p794 +(dp795 +g3 +g4 +sg5 +(lp796 +S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' +p797 +aS'RefSeqGene record not available' +p798 +asg9 +g4 +sg10 +(lp799 +sg12 +VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 6, mRNA +p800 +sg14 +S'SCN1A' +p801 +sg16 +(dp802 +g18 +S'NP_001340878.1:p.(Glu78GlyfsTer7)' +p803 +sg20 +S'NP_001340878.1:p.(E78Gfs*7)' +p804 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001353949.1:c.233_240del' +p805 +sg27 +g4 +sg28 +(dp806 +S'hg19' +p807 +(dp808 +g32 +S'NC_000002.11:g.166929892_166929899del' +p809 +sg34 +(dp810 +g36 +g37 +sg38 +S'CCAGGTCCT' +p811 +sg40 +S'166929891' +p812 +sg42 +g43 +sssg44 +(dp813 +g32 +S'NC_000002.12:g.166073382_166073389del' +p814 +sg34 +(dp815 +g36 +g37 +sg38 +S'CCAGGTCCT' +p816 +sg40 +S'166073381' +p817 +sg42 +g43 +sssS'grch37' +p818 +(dp819 +g32 +S'NC_000002.11:g.166929892_166929899del' +p820 +sg34 +(dp821 +g36 +g54 +sg38 +S'CCAGGTCCT' +p822 +sg40 +S'166929891' +p823 +sg42 +g43 +sssS'grch38' +p824 +(dp825 +g32 +S'NC_000002.12:g.166073382_166073389del' +p826 +sg34 +(dp827 +g36 +g54 +sg38 +S'CCAGGTCCT' +p828 +sg40 +S'166073381' +p829 +sg42 +g43 +ssssg63 +(dp830 +g104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1' +p831 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1' +p832 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant273.txt b/VariantValidator/testing/testOutputsMasterITS/variant273.txt new file mode 100644 index 00000000..7433d100 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant273.txt @@ -0,0 +1,1148 @@ +(dp0 +S'NM_001256850.1:c.102051C>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens titin (TTN), transcript variant N2BA, mRNA +p12 +sS'gene_symbol' +p13 +S'TTN' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001243779.1:p.(Ser34017Arg)' +p18 +sS'slr' +p19 +S'NP_001243779.1:p.(S34017R)' +p20 +ssS'submitted_variant' +p21 +S'2-179393504-G-T' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001256850.1:c.102051C>A' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000002.11:g.179393504G>T' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr2' +p37 +sS'ref' +p38 +VG +p39 +sS'pos' +p40 +S'179393504' +p41 +sS'alt' +p42 +VT +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000002.12:g.178528777G>T' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'178528777' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000002.11:g.179393504G>T' +p51 +sg34 +(dp52 +g36 +S'2' +p53 +sg38 +g39 +sg40 +S'179393504' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000002.12:g.178528777G>T' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'178528777' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243779.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256850.1' +p65 +sssS'NM_133378.4:c.99270C>A' +p66 +(dp67 +g3 +g4 +sg5 +(lp68 +S'RefSeqGene record not available' +p69 +asg8 +g4 +sg9 +(lp70 +sg11 +VHomo sapiens titin (TTN), transcript variant N2-A, mRNA +p71 +sg13 +S'TTN' +p72 +sg15 +(dp73 +g17 +S'NP_596869.4:p.(Ser33090Arg)' +p74 +sg19 +S'NP_596869.4:p.(S33090R)' +p75 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_133378.4:c.99270C>A' +p76 +sg27 +g4 +sg28 +(dp77 +S'hg19' +p78 +(dp79 +g32 +S'NC_000002.11:g.179393504G>T' +p80 +sg34 +(dp81 +g36 +g37 +sg38 +g39 +sg40 +S'179393504' +p82 +sg42 +g43 +sssg44 +(dp83 +g32 +S'NC_000002.12:g.178528777G>T' +p84 +sg34 +(dp85 +g36 +g37 +sg38 +g39 +sg40 +S'178528777' +p86 +sg42 +g43 +sssS'grch37' +p87 +(dp88 +g32 +S'NC_000002.11:g.179393504G>T' +p89 +sg34 +(dp90 +g36 +g53 +sg38 +g39 +sg40 +S'179393504' +p91 +sg42 +g43 +sssS'grch38' +p92 +(dp93 +g32 +S'NC_000002.12:g.178528777G>T' +p94 +sg34 +(dp95 +g36 +g53 +sg38 +g39 +sg40 +S'178528777' +p96 +sg42 +g43 +ssssg60 +(dp97 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_596869.4' +p98 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133378.4' +p99 +sssS'NM_133432.3:c.80154C>A' +p100 +(dp101 +g3 +g4 +sg5 +(lp102 +S'RefSeqGene record not available' +p103 +asg8 +g4 +sg9 +(lp104 +sg11 +VHomo sapiens titin (TTN), transcript variant novex-1, mRNA +p105 +sg13 +S'TTN' +p106 +sg15 +(dp107 +g17 +S'NP_597676.3:p.(Ser26718Arg)' +p108 +sg19 +S'NP_597676.3:p.(S26718R)' +p109 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_133432.3:c.80154C>A' +p110 +sg27 +g4 +sg28 +(dp111 +S'hg19' +p112 +(dp113 +g32 +S'NC_000002.11:g.179393504G>T' +p114 +sg34 +(dp115 +g36 +g37 +sg38 +g39 +sg40 +S'179393504' +p116 +sg42 +g43 +sssg44 +(dp117 +g32 +S'NC_000002.12:g.178528777G>T' +p118 +sg34 +(dp119 +g36 +g37 +sg38 +g39 +sg40 +S'178528777' +p120 +sg42 +g43 +sssS'grch37' +p121 +(dp122 +g32 +S'NC_000002.11:g.179393504G>T' +p123 +sg34 +(dp124 +g36 +g53 +sg38 +g39 +sg40 +S'179393504' +p125 +sg42 +g43 +sssS'grch38' +p126 +(dp127 +g32 +S'NC_000002.12:g.178528777G>T' +p128 +sg34 +(dp129 +g36 +g53 +sg38 +g39 +sg40 +S'178528777' +p130 +sg42 +g43 +ssssg60 +(dp131 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_597676.3' +p132 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133432.3' +p133 +sssS'NM_001267550.1:c.106974C>A' +p134 +(dp135 +g3 +g4 +sg5 +(lp136 +S'A more recent version of the selected reference sequence NM_001267550.1 is available (NM_001267550.2)' +p137 +aS'NM_001267550.2:c.106974C>A MUST be fully validated prior to use in reports' +p138 +aS'select_variants=NM_001267550.2:c.106974C>A' +p139 +aS'RefSeqGene record not available' +p140 +asg8 +g4 +sg9 +(lp141 +sg11 +VHomo sapiens titin (TTN), transcript variant IC, mRNA +p142 +sg13 +S'TTN' +p143 +sg15 +(dp144 +g17 +S'NP_001254479.1:p.(Ser35658Arg)' +p145 +sg19 +S'NP_001254479.1:p.(S35658R)' +p146 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001267550.1:c.106974C>A' +p147 +sg27 +g4 +sg28 +(dp148 +S'hg19' +p149 +(dp150 +g32 +S'NC_000002.11:g.179393504G>T' +p151 +sg34 +(dp152 +g36 +g37 +sg38 +g39 +sg40 +S'179393504' +p153 +sg42 +g43 +sssS'grch37' +p154 +(dp155 +g32 +S'NC_000002.11:g.179393504G>T' +p156 +sg34 +(dp157 +g36 +g53 +sg38 +g39 +sg40 +S'179393504' +p158 +sg42 +g43 +ssssg60 +(dp159 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.1' +p160 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.1' +p161 +sssS'NR_038272.1:n.219+5141G>T' +p162 +(dp163 +g3 +g4 +sg5 +(lp164 +S'RefSeqGene record not available' +p165 +asg8 +g4 +sg9 +(lp166 +sg11 +VHomo sapiens TTN antisense RNA 1 (TTN-AS1), transcript variant 1, long non-coding RNA +p167 +sg13 +S'TTN-AS1' +p168 +sg15 +(dp169 +g17 +S'Non-coding :n.' +p170 +sg19 +g170 +ssg21 +g22 +sg23 +S'NC_000002.11(NR_038272.1):c.219+5141G>T' +p171 +sg24 +g4 +sg25 +S'NR_038272.1:n.219+5141G>T' +p172 +sg27 +g4 +sg28 +(dp173 +S'hg19' +p174 +(dp175 +g32 +S'NC_000002.11:g.179393504G>T' +p176 +sg34 +(dp177 +g36 +g37 +sg38 +S'G' +p178 +sg40 +S'179393504' +p179 +sg42 +S'T' +p180 +sssg44 +(dp181 +g32 +S'NC_000002.12:g.178528777G>T' +p182 +sg34 +(dp183 +g36 +g37 +sg38 +g178 +sg40 +S'178528777' +p184 +sg42 +g180 +sssS'grch37' +p185 +(dp186 +g32 +S'NC_000002.11:g.179393504G>T' +p187 +sg34 +(dp188 +g36 +g53 +sg38 +g178 +sg40 +S'179393504' +p189 +sg42 +g180 +sssS'grch38' +p190 +(dp191 +g32 +S'NC_000002.12:g.178528777G>T' +p192 +sg34 +(dp193 +g36 +g53 +sg38 +g178 +sg40 +S'178528777' +p194 +sg42 +g180 +ssssg60 +(dp195 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_038272.1' +p196 +sssS'NM_133437.4:c.80355C>A' +p197 +(dp198 +g3 +g4 +sg5 +(lp199 +S'RefSeqGene record not available' +p200 +asg8 +g4 +sg9 +(lp201 +sg11 +VHomo sapiens titin (TTN), transcript variant novex-2, mRNA +p202 +sg13 +S'TTN' +p203 +sg15 +(dp204 +g17 +S'NP_597681.4:p.(Ser26785Arg)' +p205 +sg19 +S'NP_597681.4:p.(S26785R)' +p206 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_133437.4:c.80355C>A' +p207 +sg27 +g4 +sg28 +(dp208 +S'hg19' +p209 +(dp210 +g32 +S'NC_000002.11:g.179393504G>T' +p211 +sg34 +(dp212 +g36 +g37 +sg38 +g39 +sg40 +S'179393504' +p213 +sg42 +g43 +sssg44 +(dp214 +g32 +S'NC_000002.12:g.178528777G>T' +p215 +sg34 +(dp216 +g36 +g37 +sg38 +g39 +sg40 +S'178528777' +p217 +sg42 +g43 +sssS'grch37' +p218 +(dp219 +g32 +S'NC_000002.11:g.179393504G>T' +p220 +sg34 +(dp221 +g36 +g53 +sg38 +g39 +sg40 +S'179393504' +p222 +sg42 +g43 +sssS'grch38' +p223 +(dp224 +g32 +S'NC_000002.12:g.178528777G>T' +p225 +sg34 +(dp226 +g36 +g53 +sg38 +g39 +sg40 +S'178528777' +p227 +sg42 +g43 +ssssg60 +(dp228 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.4' +p229 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.4' +p230 +sssS'flag' +p231 +S'gene_variant' +p232 +sS'NR_038271.1:n.446+5141G>T' +p233 +(dp234 +g3 +g4 +sg5 +(lp235 +S'RefSeqGene record not available' +p236 +asg8 +g4 +sg9 +(lp237 +sg11 +VHomo sapiens TTN antisense RNA 1 (TTN-AS1), transcript variant 2, long non-coding RNA +p238 +sg13 +S'TTN-AS1' +p239 +sg15 +(dp240 +g17 +S'Non-coding :n.' +p241 +sg19 +g241 +ssg21 +g22 +sg23 +S'NC_000002.11(NR_038271.1):c.446+5141G>T' +p242 +sg24 +g4 +sg25 +S'NR_038271.1:n.446+5141G>T' +p243 +sg27 +g4 +sg28 +(dp244 +S'hg19' +p245 +(dp246 +g32 +S'NC_000002.11:g.179393504G>T' +p247 +sg34 +(dp248 +g36 +g37 +sg38 +g178 +sg40 +S'179393504' +p249 +sg42 +g180 +sssg44 +(dp250 +g32 +S'NC_000002.12:g.178528777G>T' +p251 +sg34 +(dp252 +g36 +g37 +sg38 +g178 +sg40 +S'178528777' +p253 +sg42 +g180 +sssS'grch37' +p254 +(dp255 +g32 +S'NC_000002.11:g.179393504G>T' +p256 +sg34 +(dp257 +g36 +g53 +sg38 +g178 +sg40 +S'179393504' +p258 +sg42 +g180 +sssS'grch38' +p259 +(dp260 +g32 +S'NC_000002.12:g.178528777G>T' +p261 +sg34 +(dp262 +g36 +g53 +sg38 +g178 +sg40 +S'178528777' +p263 +sg42 +g180 +ssssg60 +(dp264 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_038271.1' +p265 +sssS'NM_001267550.2:c.106974C>A' +p266 +(dp267 +g3 +g4 +sg5 +(lp268 +S'RefSeqGene record not available' +p269 +asg8 +g4 +sg9 +(lp270 +sg11 +VHomo sapiens titin (TTN), transcript variant IC, mRNA +p271 +sg13 +S'TTN' +p272 +sg15 +(dp273 +g17 +S'NP_001254479.2:p.(Ser35658Arg)' +p274 +sg19 +S'NP_001254479.2:p.(S35658R)' +p275 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001267550.2:c.106974C>A' +p276 +sg27 +g4 +sg28 +(dp277 +S'hg19' +p278 +(dp279 +g32 +S'NC_000002.11:g.179393504G>T' +p280 +sg34 +(dp281 +g36 +g37 +sg38 +g39 +sg40 +S'179393504' +p282 +sg42 +g43 +sssg44 +(dp283 +g32 +S'NC_000002.12:g.178528777G>T' +p284 +sg34 +(dp285 +g36 +g37 +sg38 +g39 +sg40 +S'178528777' +p286 +sg42 +g43 +sssS'grch37' +p287 +(dp288 +g32 +S'NC_000002.11:g.179393504G>T' +p289 +sg34 +(dp290 +g36 +g53 +sg38 +g39 +sg40 +S'179393504' +p291 +sg42 +g43 +sssS'grch38' +p292 +(dp293 +g32 +S'NC_000002.12:g.178528777G>T' +p294 +sg34 +(dp295 +g36 +g53 +sg38 +g39 +sg40 +S'178528777' +p296 +sg42 +g43 +ssssg60 +(dp297 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.2' +p298 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.2' +p299 +sssS'NM_133437.3:c.80355C>A' +p300 +(dp301 +g3 +g4 +sg5 +(lp302 +S'A more recent version of the selected reference sequence NM_133437.3 is available (NM_133437.4)' +p303 +aS'NM_133437.4:c.80355C>A MUST be fully validated prior to use in reports' +p304 +aS'select_variants=NM_133437.4:c.80355C>A' +p305 +aS'RefSeqGene record not available' +p306 +asg8 +g4 +sg9 +(lp307 +sg11 +VHomo sapiens titin (TTN), transcript variant novex-2, mRNA +p308 +sg13 +S'TTN' +p309 +sg15 +(dp310 +g17 +S'NP_597681.3:p.(Ser26785Arg)' +p311 +sg19 +S'NP_597681.3:p.(S26785R)' +p312 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_133437.3:c.80355C>A' +p313 +sg27 +g4 +sg28 +(dp314 +S'hg19' +p315 +(dp316 +g32 +S'NC_000002.11:g.179393504G>T' +p317 +sg34 +(dp318 +g36 +g37 +sg38 +g39 +sg40 +S'179393504' +p319 +sg42 +g43 +sssS'grch37' +p320 +(dp321 +g32 +S'NC_000002.11:g.179393504G>T' +p322 +sg34 +(dp323 +g36 +g53 +sg38 +g39 +sg40 +S'179393504' +p324 +sg42 +g43 +ssssg60 +(dp325 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.3' +p326 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.3' +p327 +sssS'NM_003319.4:c.79779C>A' +p328 +(dp329 +g3 +g4 +sg5 +(lp330 +S'RefSeqGene record not available' +p331 +asg8 +g4 +sg9 +(lp332 +sg11 +VHomo sapiens titin (TTN), transcript variant N2-B, mRNA +p333 +sg13 +S'TTN' +p334 +sg15 +(dp335 +g17 +S'NP_003310.4:p.(Ser26593Arg)' +p336 +sg19 +S'NP_003310.4:p.(S26593R)' +p337 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_003319.4:c.79779C>A' +p338 +sg27 +g4 +sg28 +(dp339 +S'hg19' +p340 +(dp341 +g32 +S'NC_000002.11:g.179393504G>T' +p342 +sg34 +(dp343 +g36 +g37 +sg38 +g39 +sg40 +S'179393504' +p344 +sg42 +g43 +sssg44 +(dp345 +g32 +S'NC_000002.12:g.178528777G>T' +p346 +sg34 +(dp347 +g36 +g37 +sg38 +g39 +sg40 +S'178528777' +p348 +sg42 +g43 +sssS'grch37' +p349 +(dp350 +g32 +S'NC_000002.11:g.179393504G>T' +p351 +sg34 +(dp352 +g36 +g53 +sg38 +g39 +sg40 +S'179393504' +p353 +sg42 +g43 +sssS'grch38' +p354 +(dp355 +g32 +S'NC_000002.12:g.178528777G>T' +p356 +sg34 +(dp357 +g36 +g53 +sg38 +g39 +sg40 +S'178528777' +p358 +sg42 +g43 +ssssg60 +(dp359 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003310.4' +p360 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003319.4' +p361 +sssS'metadata' +p362 +(dp363 +S'variantvalidator_hgvs_version' +p364 +S'1.1.3' +p365 +sS'uta_schema' +p366 +S'uta_20180821' +p367 +sS'seqrepo_db' +p368 +S'2018-08-21' +p369 +sS'variantvalidator_version' +p370 +S'v0.2' +p371 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant274.txt b/VariantValidator/testing/testOutputsMasterITS/variant274.txt new file mode 100644 index 00000000..132314ce --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant274.txt @@ -0,0 +1,176 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_194250.1:c.3324_3347del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000002.11:g.185803444TGCAGCTGCTGCAGCTGCAGCTGCA>T automapped to NC_000002.11:g.185803447_185803470del' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens zinc finger protein 804A (ZNF804A), mRNA +p15 +sS'gene_symbol' +p16 +S'ZNF804A' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_919226.1:p.(Ala1112_Ala1119del)' +p21 +sS'slr' +p22 +S'NP_919226.1:p.(A1112_A1119del)' +p23 +ssS'submitted_variant' +p24 +S'2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T' +p25 +sS'genome_context_intronic_sequence' +p26 +g6 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_194250.1:c.3324_3347del' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000002.11:g.185803447_185803470del' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr2' +p40 +sS'ref' +p41 +S'TGCAGCTGCTGCAGCTGCAGCTGCA' +p42 +sS'pos' +p43 +S'185803444' +p44 +sS'alt' +p45 +S'T' +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000002.12:g.184938720_184938743del' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +S'TGCAGCTGCTGCAGCTGCAGCTGCA' +p51 +sg43 +S'184938717' +p52 +sg45 +g46 +sssS'grch37' +p53 +(dp54 +g35 +S'NC_000002.11:g.185803447_185803470del' +p55 +sg37 +(dp56 +g39 +S'2' +p57 +sg41 +S'TGCAGCTGCTGCAGCTGCAGCTGCA' +p58 +sg43 +S'185803444' +p59 +sg45 +g46 +sssS'grch38' +p60 +(dp61 +g35 +S'NC_000002.12:g.184938720_184938743del' +p62 +sg37 +(dp63 +g39 +g57 +sg41 +S'TGCAGCTGCTGCAGCTGCAGCTGCA' +p64 +sg43 +S'184938717' +p65 +sg45 +g46 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_919226.1' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_194250.1' +p71 +sssS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant275.txt b/VariantValidator/testing/testOutputsMasterITS/variant275.txt new file mode 100644 index 00000000..2d7b7e0e --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant275.txt @@ -0,0 +1,286 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_002491.2:c.208G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens NADH:ubiquinone oxidoreductase subunit B3 (NDUFB3), transcript variant 1, mRNA +p14 +sS'gene_symbol' +p15 +S'NDUFB3' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_002482.1:p.(Gly70Ter)' +p20 +sS'slr' +p21 +S'NP_002482.1:p.(G70*)' +p22 +ssS'submitted_variant' +p23 +S'2-201950249-G-T' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_002491.2:c.208G>T' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000002.11:g.201950249G>T' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr2' +p39 +sS'ref' +p40 +S'G' +p41 +sS'pos' +p42 +S'201950249' +p43 +sS'alt' +p44 +S'T' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000002.12:g.201085526G>T' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'201085526' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000002.11:g.201950249G>T' +p53 +sg36 +(dp54 +g38 +S'2' +p55 +sg40 +g41 +sg42 +S'201950249' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000002.12:g.201085526G>T' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'201085526' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002482.1' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002491.2' +p67 +sssS'NM_001257102.1:c.208G>T' +p68 +(dp69 +g5 +g6 +sg7 +(lp70 +S'RefSeqGene record not available' +p71 +asg10 +g6 +sg11 +(lp72 +sg13 +VHomo sapiens NADH:ubiquinone oxidoreductase subunit B3 (NDUFB3), transcript variant 2, mRNA +p73 +sg15 +S'NDUFB3' +p74 +sg17 +(dp75 +g19 +S'NP_001244031.1:p.(Gly70Ter)' +p76 +sg21 +S'NP_001244031.1:p.(G70*)' +p77 +ssg23 +g24 +sg25 +g6 +sg26 +g6 +sg27 +S'NM_001257102.1:c.208G>T' +p78 +sg29 +g6 +sg30 +(dp79 +S'hg19' +p80 +(dp81 +g34 +S'NC_000002.11:g.201950249G>T' +p82 +sg36 +(dp83 +g38 +g39 +sg40 +g41 +sg42 +S'201950249' +p84 +sg44 +g45 +sssg46 +(dp85 +g34 +S'NC_000002.12:g.201085526G>T' +p86 +sg36 +(dp87 +g38 +g39 +sg40 +g41 +sg42 +S'201085526' +p88 +sg44 +g45 +sssS'grch37' +p89 +(dp90 +g34 +S'NC_000002.11:g.201950249G>T' +p91 +sg36 +(dp92 +g38 +g55 +sg40 +g41 +sg42 +S'201950249' +p93 +sg44 +g45 +sssS'grch38' +p94 +(dp95 +g34 +S'NC_000002.12:g.201085526G>T' +p96 +sg36 +(dp97 +g38 +g55 +sg40 +g41 +sg42 +S'201085526' +p98 +sg44 +g45 +ssssg62 +(dp99 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244031.1' +p100 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257102.1' +p101 +sssS'metadata' +p102 +(dp103 +S'variantvalidator_hgvs_version' +p104 +S'1.1.3' +p105 +sS'uta_schema' +p106 +S'uta_20180821' +p107 +sS'seqrepo_db' +p108 +S'2018-08-21' +p109 +sS'variantvalidator_version' +p110 +S'v0.2' +p111 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant276.txt b/VariantValidator/testing/testOutputsMasterITS/variant276.txt new file mode 100644 index 00000000..c3103bb5 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant276.txt @@ -0,0 +1,404 @@ +(dp0 +S'NM_004369.3:c.6282+1G>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'COL6A3' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_004360.2:p.?' +p18 +sS'slr' +p19 +S'NP_004360.2:p.?' +p20 +ssS'submitted_variant' +p21 +S'2-238268730-C-A' +p22 +sS'genome_context_intronic_sequence' +p23 +S'NC_000002.11(NM_004369.3):c.6282+1G>T' +p24 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_004369.3:c.6282+1G>T' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000002.11:g.238268730C>A' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr2' +p38 +sS'ref' +p39 +VC +p40 +sS'pos' +p41 +S'238268730' +p42 +sS'alt' +p43 +VA +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000002.12:g.237360087C>A' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'237360087' +p49 +sg43 +g44 +sssS'grch37' +p50 +(dp51 +g33 +S'NC_000002.11:g.238268730C>A' +p52 +sg35 +(dp53 +g37 +S'2' +p54 +sg39 +g40 +sg41 +S'238268730' +p55 +sg43 +g44 +sssS'grch38' +p56 +(dp57 +g33 +S'NC_000002.12:g.237360087C>A' +p58 +sg35 +(dp59 +g37 +g54 +sg39 +g40 +sg41 +S'237360087' +p60 +sg43 +g44 +ssssS'reference_sequence_records' +p61 +(dp62 +S'protein' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004360.2' +p64 +sS'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004369.3' +p66 +sssS'flag' +p67 +S'gene_variant' +p68 +sS'metadata' +p69 +(dp70 +S'variantvalidator_hgvs_version' +p71 +S'1.1.3' +p72 +sS'uta_schema' +p73 +S'uta_20180821' +p74 +sS'seqrepo_db' +p75 +S'2018-08-21' +p76 +sS'variantvalidator_version' +p77 +S'v0.2' +p78 +ssS'NM_057166.4:c.4461+1G>T' +p79 +(dp80 +g3 +g4 +sg5 +(lp81 +S'RefSeqGene record not available' +p82 +asg8 +g4 +sg9 +(lp83 +sg11 +VHomo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 4, mRNA +p84 +sg13 +S'COL6A3' +p85 +sg15 +(dp86 +g17 +S'NP_476507.3:p.?' +p87 +sg19 +S'NP_476507.3:p.?' +p88 +ssg21 +g22 +sg23 +S'NC_000002.11(NM_057166.4):c.4461+1G>T' +p89 +sg25 +g4 +sg26 +S'NM_057166.4:c.4461+1G>T' +p90 +sg28 +g4 +sg29 +(dp91 +S'hg19' +p92 +(dp93 +g33 +S'NC_000002.11:g.238268730C>A' +p94 +sg35 +(dp95 +g37 +g38 +sg39 +g40 +sg41 +S'238268730' +p96 +sg43 +g44 +sssg45 +(dp97 +g33 +S'NC_000002.12:g.237360087C>A' +p98 +sg35 +(dp99 +g37 +g38 +sg39 +g40 +sg41 +S'237360087' +p100 +sg43 +g44 +sssS'grch37' +p101 +(dp102 +g33 +S'NC_000002.11:g.238268730C>A' +p103 +sg35 +(dp104 +g37 +g54 +sg39 +g40 +sg41 +S'238268730' +p105 +sg43 +g44 +sssS'grch38' +p106 +(dp107 +g33 +S'NC_000002.12:g.237360087C>A' +p108 +sg35 +(dp109 +g37 +g54 +sg39 +g40 +sg41 +S'237360087' +p110 +sg43 +g44 +ssssg61 +(dp111 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_476507.3' +p112 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_057166.4' +p113 +sssS'NM_057167.3:c.5664+1G>T' +p114 +(dp115 +g3 +g4 +sg5 +(lp116 +S'RefSeqGene record not available' +p117 +asg8 +g4 +sg9 +(lp118 +sg11 +VHomo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 5, mRNA +p119 +sg13 +S'COL6A3' +p120 +sg15 +(dp121 +g17 +S'NP_476508.2:p.?' +p122 +sg19 +S'NP_476508.2:p.?' +p123 +ssg21 +g22 +sg23 +S'NC_000002.11(NM_057167.3):c.5664+1G>T' +p124 +sg25 +g4 +sg26 +S'NM_057167.3:c.5664+1G>T' +p125 +sg28 +g4 +sg29 +(dp126 +S'hg19' +p127 +(dp128 +g33 +S'NC_000002.11:g.238268730C>A' +p129 +sg35 +(dp130 +g37 +g38 +sg39 +g40 +sg41 +S'238268730' +p131 +sg43 +g44 +sssg45 +(dp132 +g33 +S'NC_000002.12:g.237360087C>A' +p133 +sg35 +(dp134 +g37 +g38 +sg39 +g40 +sg41 +S'237360087' +p135 +sg43 +g44 +sssS'grch37' +p136 +(dp137 +g33 +S'NC_000002.11:g.238268730C>A' +p138 +sg35 +(dp139 +g37 +g54 +sg39 +g40 +sg41 +S'238268730' +p140 +sg43 +g44 +sssS'grch38' +p141 +(dp142 +g33 +S'NC_000002.12:g.237360087C>A' +p143 +sg35 +(dp144 +g37 +g54 +sg39 +g40 +sg41 +S'237360087' +p145 +sg43 +g44 +ssssg61 +(dp146 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_476508.2' +p147 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_057167.3' +p148 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant277.txt b/VariantValidator/testing/testOutputsMasterITS/variant277.txt new file mode 100644 index 00000000..f13649e5 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant277.txt @@ -0,0 +1,377 @@ +(dp0 +S'NM_080860.2:c.727+5G>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'A more recent version of the selected reference sequence NM_080860.2 is available (NM_080860.3)' +p7 +aS'NM_080860.3:c.727+5G>A MUST be fully validated prior to use in reports' +p8 +aS'select_variants=NM_080860.3:c.727+5G>A' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g4 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens radial spoke head 1 homolog (Chlamydomonas) (RSPH1), mRNA +p15 +sS'gene_symbol' +p16 +S'RSPH1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_543136.1:p.?' +p21 +sS'slr' +p22 +S'NP_543136.1:p.?' +p23 +ssS'submitted_variant' +p24 +S'21-43897396-C-T' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000021.8(NM_080860.2):c.727+5G>A' +p27 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_080860.2:c.727+5G>A' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000021.8:g.43897396C>T' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr21' +p41 +sS'ref' +p42 +VC +p43 +sS'pos' +p44 +S'43897396' +p45 +sS'alt' +p46 +VT +p47 +sssS'grch37' +p48 +(dp49 +g36 +S'NC_000021.8:g.43897396C>T' +p50 +sg38 +(dp51 +g40 +S'21' +p52 +sg42 +g43 +sg44 +S'43897396' +p53 +sg46 +g47 +ssssS'reference_sequence_records' +p54 +(dp55 +S'protein' +p56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1' +p57 +sS'transcript' +p58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.2' +p59 +sssS'flag' +p60 +S'gene_variant' +p61 +sS'metadata' +p62 +(dp63 +S'variantvalidator_hgvs_version' +p64 +S'1.1.3' +p65 +sS'uta_schema' +p66 +S'uta_20180821' +p67 +sS'seqrepo_db' +p68 +S'2018-08-21' +p69 +sS'variantvalidator_version' +p70 +S'v0.2' +p71 +ssS'NM_080860.3:c.727+5G>A' +p72 +(dp73 +g3 +g4 +sg5 +(lp74 +S'RefSeqGene record not available' +p75 +asg11 +g4 +sg12 +(lp76 +sg14 +VHomo sapiens radial spoke head component 1 (RSPH1), transcript variant 1, mRNA +p77 +sg16 +S'RSPH1' +p78 +sg18 +(dp79 +g20 +S'NP_543136.1:p.?' +p80 +sg22 +S'NP_543136.1:p.?' +p81 +ssg24 +g25 +sg26 +S'NC_000021.8(NM_080860.3):c.727+5G>A' +p82 +sg28 +g4 +sg29 +S'NM_080860.3:c.727+5G>A' +p83 +sg31 +g4 +sg32 +(dp84 +S'hg19' +p85 +(dp86 +g36 +S'NC_000021.8:g.43897396C>T' +p87 +sg38 +(dp88 +g40 +g41 +sg42 +g43 +sg44 +S'43897396' +p89 +sg46 +g47 +sssS'hg38' +p90 +(dp91 +g36 +S'NC_000021.9:g.42477286C>T' +p92 +sg38 +(dp93 +g40 +g41 +sg42 +g43 +sg44 +S'42477286' +p94 +sg46 +g47 +sssS'grch37' +p95 +(dp96 +g36 +S'NC_000021.8:g.43897396C>T' +p97 +sg38 +(dp98 +g40 +g52 +sg42 +g43 +sg44 +S'43897396' +p99 +sg46 +g47 +sssS'grch38' +p100 +(dp101 +g36 +S'NC_000021.9:g.42477286C>T' +p102 +sg38 +(dp103 +g40 +g52 +sg42 +g43 +sg44 +S'42477286' +p104 +sg46 +g47 +ssssg54 +(dp105 +g56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1' +p106 +sg58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.3' +p107 +sssS'NM_001286506.1:c.613+5G>A' +p108 +(dp109 +g3 +g4 +sg5 +(lp110 +S'RefSeqGene record not available' +p111 +asg11 +g4 +sg12 +(lp112 +sg14 +VHomo sapiens radial spoke head component 1 (RSPH1), transcript variant 2, mRNA +p113 +sg16 +S'RSPH1' +p114 +sg18 +(dp115 +g20 +S'NP_001273435.1:p.?' +p116 +sg22 +S'NP_001273435.1:p.?' +p117 +ssg24 +g25 +sg26 +S'NC_000021.8(NM_001286506.1):c.613+5G>A' +p118 +sg28 +g4 +sg29 +S'NM_001286506.1:c.613+5G>A' +p119 +sg31 +g4 +sg32 +(dp120 +S'hg19' +p121 +(dp122 +g36 +S'NC_000021.8:g.43897396C>T' +p123 +sg38 +(dp124 +g40 +g41 +sg42 +g43 +sg44 +S'43897396' +p125 +sg46 +g47 +sssg90 +(dp126 +g36 +S'NC_000021.9:g.42477286C>T' +p127 +sg38 +(dp128 +g40 +g41 +sg42 +g43 +sg44 +S'42477286' +p129 +sg46 +g47 +sssS'grch37' +p130 +(dp131 +g36 +S'NC_000021.8:g.43897396C>T' +p132 +sg38 +(dp133 +g40 +g52 +sg42 +g43 +sg44 +S'43897396' +p134 +sg46 +g47 +sssS'grch38' +p135 +(dp136 +g36 +S'NC_000021.9:g.42477286C>T' +p137 +sg38 +(dp138 +g40 +g52 +sg42 +g43 +sg44 +S'42477286' +p139 +sg46 +g47 +ssssg54 +(dp140 +g56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001273435.1' +p141 +sg58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001286506.1' +p142 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant278.txt b/VariantValidator/testing/testOutputsMasterITS/variant278.txt new file mode 100644 index 00000000..b15685b4 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant278.txt @@ -0,0 +1,1227 @@ +(dp0 +S'NM_000268.3:c.924_925insCGACGC' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens neurofibromin 2 (NF2), transcript variant 1, mRNA +p13 +sS'gene_symbol' +p14 +S'NF2' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_000259.1:p.(Arg310_Arg311dup)' +p19 +sS'slr' +p20 +S'NP_000259.1:p.(R310_R311dup)' +p21 +ssS'submitted_variant' +p22 +S'22-30064360-G-GCGACGC' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_000268.3:c.924_925insCGACGC' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr22' +p38 +sS'ref' +p39 +S'G' +p40 +sS'pos' +p41 +S'30064360' +p42 +sS'alt' +p43 +S'GCGACGC' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'29668371' +p49 +sg43 +S'GCGACGC' +p50 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p53 +sg35 +(dp54 +g37 +S'22' +p55 +sg39 +g40 +sg41 +S'30064360' +p56 +sg43 +S'GCGACGC' +p57 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +g40 +sg41 +S'29668371' +p62 +sg43 +S'GCGACGC' +p63 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000259.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000268.3' +p69 +sssS'NM_181828.2:c.798_799insCGACGC' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' +p73 +aS'RefSeqGene record not available' +p74 +asg9 +g4 +sg10 +(lp75 +sg12 +VHomo sapiens neurofibromin 2 (NF2), transcript variant 5, mRNA +p76 +sg14 +S'NF2' +p77 +sg16 +(dp78 +g18 +S'NP_861966.1:p.(Arg268_Arg269dup)' +p79 +sg20 +S'NP_861966.1:p.(R268_R269dup)' +p80 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_181828.2:c.798_799insCGACGC' +p81 +sg28 +g4 +sg29 +(dp82 +S'hg19' +p83 +(dp84 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +g40 +sg41 +S'30064360' +p87 +sg43 +S'GCGACGC' +p88 +sssg45 +(dp89 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p90 +sg35 +(dp91 +g37 +g38 +sg39 +g40 +sg41 +S'29668371' +p92 +sg43 +S'GCGACGC' +p93 +sssS'grch37' +p94 +(dp95 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p96 +sg35 +(dp97 +g37 +g55 +sg39 +g40 +sg41 +S'30064360' +p98 +sg43 +S'GCGACGC' +p99 +sssS'grch38' +p100 +(dp101 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p102 +sg35 +(dp103 +g37 +g55 +sg39 +g40 +sg41 +S'29668371' +p104 +sg43 +S'GCGACGC' +p105 +ssssg64 +(dp106 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861966.1' +p107 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181828.2' +p108 +sssS'NM_181830.2:c.675_676insCGACGC' +p109 +(dp110 +g3 +g4 +sg5 +(lp111 +S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' +p112 +aS'RefSeqGene record not available' +p113 +asg9 +g4 +sg10 +(lp114 +sg12 +VHomo sapiens neurofibromin 2 (NF2), transcript variant 7, mRNA +p115 +sg14 +S'NF2' +p116 +sg16 +(dp117 +g18 +S'NP_861968.1:p.(Arg227_Arg228dup)' +p118 +sg20 +S'NP_861968.1:p.(R227_R228dup)' +p119 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_181830.2:c.675_676insCGACGC' +p120 +sg28 +g4 +sg29 +(dp121 +S'hg19' +p122 +(dp123 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p124 +sg35 +(dp125 +g37 +g38 +sg39 +g40 +sg41 +S'30064360' +p126 +sg43 +S'GCGACGC' +p127 +sssg45 +(dp128 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p129 +sg35 +(dp130 +g37 +g38 +sg39 +g40 +sg41 +S'29668371' +p131 +sg43 +S'GCGACGC' +p132 +sssS'grch37' +p133 +(dp134 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p135 +sg35 +(dp136 +g37 +g55 +sg39 +g40 +sg41 +S'30064360' +p137 +sg43 +S'GCGACGC' +p138 +sssS'grch38' +p139 +(dp140 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p141 +sg35 +(dp142 +g37 +g55 +sg39 +g40 +sg41 +S'29668371' +p143 +sg43 +S'GCGACGC' +p144 +ssssg64 +(dp145 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861968.1' +p146 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181830.2' +p147 +sssS'NM_181825.2:c.924_925insCGACGC' +p148 +(dp149 +g3 +g4 +sg5 +(lp150 +S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' +p151 +aS'RefSeqGene record not available' +p152 +asg9 +g4 +sg10 +(lp153 +sg12 +VHomo sapiens neurofibromin 2 (NF2), transcript variant 12, mRNA +p154 +sg14 +S'NF2' +p155 +sg16 +(dp156 +g18 +S'NP_861546.1:p.(Arg310_Arg311dup)' +p157 +sg20 +S'NP_861546.1:p.(R310_R311dup)' +p158 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_181825.2:c.924_925insCGACGC' +p159 +sg28 +g4 +sg29 +(dp160 +S'hg19' +p161 +(dp162 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p163 +sg35 +(dp164 +g37 +g38 +sg39 +g40 +sg41 +S'30064360' +p165 +sg43 +S'GCGACGC' +p166 +sssg45 +(dp167 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p168 +sg35 +(dp169 +g37 +g38 +sg39 +g40 +sg41 +S'29668371' +p170 +sg43 +S'GCGACGC' +p171 +sssS'grch37' +p172 +(dp173 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p174 +sg35 +(dp175 +g37 +g55 +sg39 +g40 +sg41 +S'30064360' +p176 +sg43 +S'GCGACGC' +p177 +sssS'grch38' +p178 +(dp179 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p180 +sg35 +(dp181 +g37 +g55 +sg39 +g40 +sg41 +S'29668371' +p182 +sg43 +S'GCGACGC' +p183 +ssssg64 +(dp184 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861546.1' +p185 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181825.2' +p186 +sssS'NM_181832.2:c.924_925insCGACGC' +p187 +(dp188 +g3 +g4 +sg5 +(lp189 +S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' +p190 +aS'RefSeqGene record not available' +p191 +asg9 +g4 +sg10 +(lp192 +sg12 +VHomo sapiens neurofibromin 2 (NF2), transcript variant 8, mRNA +p193 +sg14 +S'NF2' +p194 +sg16 +(dp195 +g18 +S'NP_861970.1:p.(Arg310_Arg311dup)' +p196 +sg20 +S'NP_861970.1:p.(R310_R311dup)' +p197 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_181832.2:c.924_925insCGACGC' +p198 +sg28 +g4 +sg29 +(dp199 +S'hg19' +p200 +(dp201 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p202 +sg35 +(dp203 +g37 +g38 +sg39 +g40 +sg41 +S'30064360' +p204 +sg43 +S'GCGACGC' +p205 +sssg45 +(dp206 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p207 +sg35 +(dp208 +g37 +g38 +sg39 +g40 +sg41 +S'29668371' +p209 +sg43 +S'GCGACGC' +p210 +sssS'grch37' +p211 +(dp212 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p213 +sg35 +(dp214 +g37 +g55 +sg39 +g40 +sg41 +S'30064360' +p215 +sg43 +S'GCGACGC' +p216 +sssS'grch38' +p217 +(dp218 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p219 +sg35 +(dp220 +g37 +g55 +sg39 +g40 +sg41 +S'29668371' +p221 +sg43 +S'GCGACGC' +p222 +ssssg64 +(dp223 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861970.1' +p224 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181832.2' +p225 +sssS'NM_181833.2:c.447+26086_447+26087insCGACGC' +p226 +(dp227 +g3 +g4 +sg5 +(lp228 +S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' +p229 +aS'RefSeqGene record not available' +p230 +asg9 +g4 +sg10 +(lp231 +sg12 +VHomo sapiens neurofibromin 2 (NF2), transcript variant 9, mRNA +p232 +sg14 +S'NF2' +p233 +sg16 +(dp234 +g18 +S'NP_861971.1:p.?' +p235 +sg20 +S'NP_861971.1:p.?' +p236 +ssg22 +g23 +sg24 +S'NC_000022.10(NM_181833.2):c.447+26086_447+26087insCGACGC' +p237 +sg25 +g4 +sg26 +S'NM_181833.2:c.447+26086_447+26087insCGACGC' +p238 +sg28 +g4 +sg29 +(dp239 +S'hg19' +p240 +(dp241 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p242 +sg35 +(dp243 +g37 +g38 +sg39 +g40 +sg41 +S'30064360' +p244 +sg43 +S'GCGACGC' +p245 +sssg45 +(dp246 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p247 +sg35 +(dp248 +g37 +g38 +sg39 +g40 +sg41 +S'29668371' +p249 +sg43 +S'GCGACGC' +p250 +sssS'grch37' +p251 +(dp252 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p253 +sg35 +(dp254 +g37 +g55 +sg39 +g40 +sg41 +S'30064360' +p255 +sg43 +S'GCGACGC' +p256 +sssS'grch38' +p257 +(dp258 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p259 +sg35 +(dp260 +g37 +g55 +sg39 +g40 +sg41 +S'29668371' +p261 +sg43 +S'GCGACGC' +p262 +ssssg64 +(dp263 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861971.1' +p264 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181833.2' +p265 +sssS'NM_016418.5:c.924_925insCGACGC' +p266 +(dp267 +g3 +g4 +sg5 +(lp268 +S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' +p269 +aS'RefSeqGene record not available' +p270 +asg9 +g4 +sg10 +(lp271 +sg12 +VHomo sapiens neurofibromin 2 (NF2), transcript variant 2, mRNA +p272 +sg14 +S'NF2' +p273 +sg16 +(dp274 +g18 +S'NP_057502.2:p.(Arg310_Arg311dup)' +p275 +sg20 +S'NP_057502.2:p.(R310_R311dup)' +p276 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_016418.5:c.924_925insCGACGC' +p277 +sg28 +g4 +sg29 +(dp278 +S'hg19' +p279 +(dp280 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p281 +sg35 +(dp282 +g37 +g38 +sg39 +g40 +sg41 +S'30064360' +p283 +sg43 +S'GCGACGC' +p284 +sssg45 +(dp285 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p286 +sg35 +(dp287 +g37 +g38 +sg39 +g40 +sg41 +S'29668371' +p288 +sg43 +S'GCGACGC' +p289 +sssS'grch37' +p290 +(dp291 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p292 +sg35 +(dp293 +g37 +g55 +sg39 +g40 +sg41 +S'30064360' +p294 +sg43 +S'GCGACGC' +p295 +sssS'grch38' +p296 +(dp297 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p298 +sg35 +(dp299 +g37 +g55 +sg39 +g40 +sg41 +S'29668371' +p300 +sg43 +S'GCGACGC' +p301 +ssssg64 +(dp302 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057502.2' +p303 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016418.5' +p304 +sssS'NM_181829.2:c.801_802insCGACGC' +p305 +(dp306 +g3 +g4 +sg5 +(lp307 +S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' +p308 +aS'RefSeqGene record not available' +p309 +asg9 +g4 +sg10 +(lp310 +sg12 +VHomo sapiens neurofibromin 2 (NF2), transcript variant 6, mRNA +p311 +sg14 +S'NF2' +p312 +sg16 +(dp313 +g18 +S'NP_861967.1:p.(Arg269_Arg270dup)' +p314 +sg20 +S'NP_861967.1:p.(R269_R270dup)' +p315 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_181829.2:c.801_802insCGACGC' +p316 +sg28 +g4 +sg29 +(dp317 +S'hg19' +p318 +(dp319 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p320 +sg35 +(dp321 +g37 +g38 +sg39 +g40 +sg41 +S'30064360' +p322 +sg43 +S'GCGACGC' +p323 +sssg45 +(dp324 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p325 +sg35 +(dp326 +g37 +g38 +sg39 +g40 +sg41 +S'29668371' +p327 +sg43 +S'GCGACGC' +p328 +sssS'grch37' +p329 +(dp330 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p331 +sg35 +(dp332 +g37 +g55 +sg39 +g40 +sg41 +S'30064360' +p333 +sg43 +S'GCGACGC' +p334 +sssS'grch38' +p335 +(dp336 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p337 +sg35 +(dp338 +g37 +g55 +sg39 +g40 +sg41 +S'29668371' +p339 +sg43 +S'GCGACGC' +p340 +ssssg64 +(dp341 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861967.1' +p342 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181829.2' +p343 +sssS'flag' +p344 +S'gene_variant' +p345 +sS'NR_156186.1:n.1483_1484insCGACGC' +p346 +(dp347 +g3 +g4 +sg5 +(lp348 +S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' +p349 +aS'RefSeqGene record not available' +p350 +asg9 +g4 +sg10 +(lp351 +sg12 +VHomo sapiens neurofibromin 2 (NF2), transcript variant 14, non-coding RNA +p352 +sg14 +S'NF2' +p353 +sg16 +(dp354 +g18 +S'Non-coding :n.' +p355 +sg20 +g355 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NR_156186.1:n.1483_1484insCGACGC' +p356 +sg28 +g4 +sg29 +(dp357 +S'hg19' +p358 +(dp359 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p360 +sg35 +(dp361 +g37 +g38 +sg39 +g40 +sg41 +S'30064360' +p362 +sg43 +S'GCGACGC' +p363 +sssS'grch37' +p364 +(dp365 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p366 +sg35 +(dp367 +g37 +g55 +sg39 +g40 +sg41 +S'30064360' +p368 +sg43 +S'GCGACGC' +p369 +ssssg64 +(dp370 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_156186.1' +p371 +sssS'NM_181831.2:c.675_676insCGACGC' +p372 +(dp373 +g3 +g4 +sg5 +(lp374 +S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' +p375 +aS'RefSeqGene record not available' +p376 +asg9 +g4 +sg10 +(lp377 +sg12 +VHomo sapiens neurofibromin 2 (NF2), transcript variant 13, mRNA +p378 +sg14 +S'NF2' +p379 +sg16 +(dp380 +g18 +S'NP_861969.1:p.(Arg227_Arg228dup)' +p381 +sg20 +S'NP_861969.1:p.(R227_R228dup)' +p382 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_181831.2:c.675_676insCGACGC' +p383 +sg28 +g4 +sg29 +(dp384 +S'hg19' +p385 +(dp386 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p387 +sg35 +(dp388 +g37 +g38 +sg39 +g40 +sg41 +S'30064360' +p389 +sg43 +S'GCGACGC' +p390 +sssg45 +(dp391 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p392 +sg35 +(dp393 +g37 +g38 +sg39 +g40 +sg41 +S'29668371' +p394 +sg43 +S'GCGACGC' +p395 +sssS'grch37' +p396 +(dp397 +g33 +S'NC_000022.10:g.30064360_30064361insCGACGC' +p398 +sg35 +(dp399 +g37 +g55 +sg39 +g40 +sg41 +S'30064360' +p400 +sg43 +S'GCGACGC' +p401 +sssS'grch38' +p402 +(dp403 +g33 +S'NC_000022.11:g.29668371_29668372insCGACGC' +p404 +sg35 +(dp405 +g37 +g55 +sg39 +g40 +sg41 +S'29668371' +p406 +sg43 +S'GCGACGC' +p407 +ssssg64 +(dp408 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861969.1' +p409 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181831.2' +p410 +sssS'metadata' +p411 +(dp412 +S'variantvalidator_hgvs_version' +p413 +S'1.1.3' +p414 +sS'uta_schema' +p415 +S'uta_20180821' +p416 +sS'seqrepo_db' +p417 +S'2018-08-21' +p418 +sS'variantvalidator_version' +p419 +S'v0.2' +p420 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant279.txt b/VariantValidator/testing/testOutputsMasterITS/variant279.txt new file mode 100644 index 00000000..9f4a65d8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant279.txt @@ -0,0 +1,421 @@ +(dp0 +S'NM_198156.2:c.341-3280_341-3271del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000003.11:g.10188187TGTCCCGATAG>T automapped to NC_000003.11:g.10188191_10188200del' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 2, mRNA +p13 +sS'gene_symbol' +p14 +S'VHL' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_937799.1:p.?' +p19 +sS'slr' +p20 +S'NP_937799.1:p.?' +p21 +ssS'submitted_variant' +p22 +S'3-10188187-TGTCCCGATAG-T' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000003.11(NM_198156.2):c.341-3280_341-3271del' +p25 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_198156.2:c.341-3280_341-3271del' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000003.11:g.10188191_10188200del' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr3' +p39 +sS'ref' +p40 +S'TGTCCCGATAG' +p41 +sS'pos' +p42 +S'10188187' +p43 +sS'alt' +p44 +S'T' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000003.12:g.10146507_10146516del' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'TGTCCCGATAG' +p50 +sg42 +S'10146503' +p51 +sg44 +g45 +sssS'grch37' +p52 +(dp53 +g34 +S'NC_000003.11:g.10188191_10188200del' +p54 +sg36 +(dp55 +g38 +S'3' +p56 +sg40 +S'TGTCCCGATAG' +p57 +sg42 +S'10188187' +p58 +sg44 +g45 +sssS'grch38' +p59 +(dp60 +g34 +S'NC_000003.12:g.10146507_10146516del' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +S'TGTCCCGATAG' +p63 +sg42 +S'10146503' +p64 +sg44 +g45 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_937799.1' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198156.2' +p70 +sssS'flag' +p71 +S'gene_variant' +p72 +sS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ssS'NM_001354723.1:c.*18-3280_*18-3271del' +p83 +(dp84 +g3 +g4 +sg5 +(lp85 +S'NC_000003.11:g.10188187TGTCCCGATAG>T automapped to NC_000003.11:g.10188191_10188200del' +p86 +aS'RefSeqGene record not available' +p87 +asg9 +g4 +sg10 +(lp88 +sg12 +VHomo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 3, mRNA +p89 +sg14 +S'VHL' +p90 +sg16 +(dp91 +g18 +S'NP_001341652.1:p.?' +p92 +sg20 +S'NP_001341652.1:p.?' +p93 +ssg22 +g23 +sg24 +S'NC_000003.11(NM_001354723.1):c.*18-3280_*18-3271del' +p94 +sg26 +g4 +sg27 +S'NM_001354723.1:c.*18-3280_*18-3271del' +p95 +sg29 +g4 +sg30 +(dp96 +S'hg19' +p97 +(dp98 +g34 +S'NC_000003.11:g.10188191_10188200del' +p99 +sg36 +(dp100 +g38 +g39 +sg40 +S'TGTCCCGATAG' +p101 +sg42 +S'10188187' +p102 +sg44 +g45 +sssg46 +(dp103 +g34 +S'NC_000003.12:g.10146507_10146516del' +p104 +sg36 +(dp105 +g38 +g39 +sg40 +S'TGTCCCGATAG' +p106 +sg42 +S'10146503' +p107 +sg44 +g45 +sssS'grch37' +p108 +(dp109 +g34 +S'NC_000003.11:g.10188191_10188200del' +p110 +sg36 +(dp111 +g38 +g56 +sg40 +S'TGTCCCGATAG' +p112 +sg42 +S'10188187' +p113 +sg44 +g45 +sssS'grch38' +p114 +(dp115 +g34 +S'NC_000003.12:g.10146507_10146516del' +p116 +sg36 +(dp117 +g38 +g56 +sg40 +S'TGTCCCGATAG' +p118 +sg42 +S'10146503' +p119 +sg44 +g45 +ssssg65 +(dp120 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341652.1' +p121 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354723.1' +p122 +sssS'NM_000551.3:c.341-7_343del' +p123 +(dp124 +g3 +g4 +sg5 +(lp125 +S'NC_000003.11:g.10188187TGTCCCGATAG>T automapped to NC_000003.11:g.10188191_10188200del' +p126 +aS'RefSeqGene record not available' +p127 +asg9 +g4 +sg10 +(lp128 +sg12 +VHomo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 1, mRNA +p129 +sg14 +S'VHL' +p130 +sg16 +(dp131 +g18 +S'NP_000542.1:p.?' +p132 +sg20 +S'NP_000542.1:p.?' +p133 +ssg22 +g23 +sg24 +S'NC_000003.11(NM_000551.3):c.341-7_343del' +p134 +sg26 +g4 +sg27 +S'NM_000551.3:c.341-7_343del' +p135 +sg29 +g4 +sg30 +(dp136 +S'hg19' +p137 +(dp138 +g34 +S'NC_000003.11:g.10188191_10188200del' +p139 +sg36 +(dp140 +g38 +g39 +sg40 +S'TGTCCCGATAG' +p141 +sg42 +S'10188187' +p142 +sg44 +g45 +sssg46 +(dp143 +g34 +S'NC_000003.12:g.10146507_10146516del' +p144 +sg36 +(dp145 +g38 +g39 +sg40 +S'TGTCCCGATAG' +p146 +sg42 +S'10146503' +p147 +sg44 +g45 +sssS'grch37' +p148 +(dp149 +g34 +S'NC_000003.11:g.10188191_10188200del' +p150 +sg36 +(dp151 +g38 +g56 +sg40 +S'TGTCCCGATAG' +p152 +sg42 +S'10188187' +p153 +sg44 +g45 +sssS'grch38' +p154 +(dp155 +g34 +S'NC_000003.12:g.10146507_10146516del' +p156 +sg36 +(dp157 +g38 +g56 +sg40 +S'TGTCCCGATAG' +p158 +sg42 +S'10146503' +p159 +sg44 +g45 +ssssg65 +(dp160 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000542.1' +p161 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000551.3' +p162 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant28.txt b/VariantValidator/testing/testOutputsMasterITS/variant28.txt new file mode 100644 index 00000000..406b2fbf --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant28.txt @@ -0,0 +1,23 @@ +(dp0 +S'flag' +p1 +NsS'metadata' +p2 +(dp3 +S'variantvalidator_hgvs_version' +p4 +S'1.1.3' +p5 +sS'uta_schema' +p6 +S'uta_20180821' +p7 +sS'seqrepo_db' +p8 +S'2018-08-21' +p9 +sS'variantvalidator_version' +p10 +S'v0.2' +p11 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant280.txt b/VariantValidator/testing/testOutputsMasterITS/variant280.txt new file mode 100644 index 00000000..7853a80a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant280.txt @@ -0,0 +1,894 @@ +(dp0 +S'NM_001005505.2:c.3408A>C' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'CACNA2D2' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001005505.1:p.(Gln1136His)' +p18 +sS'slr' +p19 +S'NP_001005505.1:p.(Q1136H)' +p20 +ssS'submitted_variant' +p21 +S'3-50402127-T-G' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001005505.2:c.3408A>C' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000003.11:g.50402127T>G' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr3' +p37 +sS'ref' +p38 +VT +p39 +sS'pos' +p40 +S'50402127' +p41 +sS'alt' +p42 +VG +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000003.12:g.50364696T>G' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'50364696' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000003.11:g.50402127T>G' +p51 +sg34 +(dp52 +g36 +S'3' +p53 +sg38 +g39 +sg40 +S'50402127' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000003.12:g.50364696T>G' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'50364696' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2' +p65 +sssS'NM_006030.2:c.3402A>C' +p66 +(dp67 +g3 +g4 +sg5 +(lp68 +S'A more recent version of the selected reference sequence NM_006030.2 is available (NM_006030.3)' +p69 +aS'NM_006030.3:c.3402A>C MUST be fully validated prior to use in reports' +p70 +aS'select_variants=NM_006030.3:c.3402A>C' +p71 +aS'RefSeqGene record not available' +p72 +asg8 +g4 +sg9 +(lp73 +sg11 +VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 2, mRNA +p74 +sg13 +S'CACNA2D2' +p75 +sg15 +(dp76 +g17 +S'NP_006021.2:p.(Gln1134His)' +p77 +sg19 +S'NP_006021.2:p.(Q1134H)' +p78 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_006030.2:c.3402A>C' +p79 +sg27 +g4 +sg28 +(dp80 +S'hg19' +p81 +(dp82 +g32 +S'NC_000003.11:g.50402127T>G' +p83 +sg34 +(dp84 +g36 +g37 +sg38 +g39 +sg40 +S'50402127' +p85 +sg42 +g43 +sssS'grch37' +p86 +(dp87 +g32 +S'NC_000003.11:g.50402127T>G' +p88 +sg34 +(dp89 +g36 +g53 +sg38 +g39 +sg40 +S'50402127' +p90 +sg42 +g43 +ssssg60 +(dp91 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2' +p92 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2' +p93 +sssS'NM_001174051.1:c.3423A>C' +p94 +(dp95 +g3 +g4 +sg5 +(lp96 +S'A more recent version of the selected reference sequence NM_001174051.1 is available (NM_001174051.2)' +p97 +aS'NM_001174051.2:c.3423A>C MUST be fully validated prior to use in reports' +p98 +aS'select_variants=NM_001174051.2:c.3423A>C' +p99 +aS'RefSeqGene record not available' +p100 +asg8 +g4 +sg9 +(lp101 +sg11 +VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 3, mRNA +p102 +sg13 +S'CACNA2D2' +p103 +sg15 +(dp104 +g17 +S'NP_001167522.1:p.(Gln1141His)' +p105 +sg19 +S'NP_001167522.1:p.(Q1141H)' +p106 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001174051.1:c.3423A>C' +p107 +sg27 +g4 +sg28 +(dp108 +S'hg19' +p109 +(dp110 +g32 +S'NC_000003.11:g.50402127T>G' +p111 +sg34 +(dp112 +g36 +g37 +sg38 +g39 +sg40 +S'50402127' +p113 +sg42 +g43 +sssS'grch37' +p114 +(dp115 +g32 +S'NC_000003.11:g.50402127T>G' +p116 +sg34 +(dp117 +g36 +g53 +sg38 +g39 +sg40 +S'50402127' +p118 +sg42 +g43 +ssssg60 +(dp119 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1' +p120 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1' +p121 +sssS'NM_001174051.2:c.3423A>C' +p122 +(dp123 +g3 +g4 +sg5 +(lp124 +S'RefSeqGene record not available' +p125 +asg8 +g4 +sg9 +(lp126 +sg11 +VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 3, mRNA +p127 +sg13 +S'CACNA2D2' +p128 +sg15 +(dp129 +g17 +S'NP_001167522.1:p.(Gln1141His)' +p130 +sg19 +S'NP_001167522.1:p.(Q1141H)' +p131 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001174051.2:c.3423A>C' +p132 +sg27 +g4 +sg28 +(dp133 +S'hg19' +p134 +(dp135 +g32 +S'NC_000003.11:g.50402127T>G' +p136 +sg34 +(dp137 +g36 +g37 +sg38 +g39 +sg40 +S'50402127' +p138 +sg42 +g43 +sssg44 +(dp139 +g32 +S'NC_000003.12:g.50364696T>G' +p140 +sg34 +(dp141 +g36 +g37 +sg38 +g39 +sg40 +S'50364696' +p142 +sg42 +g43 +sssS'grch37' +p143 +(dp144 +g32 +S'NC_000003.11:g.50402127T>G' +p145 +sg34 +(dp146 +g36 +g53 +sg38 +g39 +sg40 +S'50402127' +p147 +sg42 +g43 +sssS'grch38' +p148 +(dp149 +g32 +S'NC_000003.12:g.50364696T>G' +p150 +sg34 +(dp151 +g36 +g53 +sg38 +g39 +sg40 +S'50364696' +p152 +sg42 +g43 +ssssg60 +(dp153 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1' +p154 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2' +p155 +sssS'metadata' +p156 +(dp157 +S'variantvalidator_hgvs_version' +p158 +S'1.1.3' +p159 +sS'uta_schema' +p160 +S'uta_20180821' +p161 +sS'seqrepo_db' +p162 +S'2018-08-21' +p163 +sS'variantvalidator_version' +p164 +S'v0.2' +p165 +ssS'NM_006030.3:c.3402A>C' +p166 +(dp167 +g3 +g4 +sg5 +(lp168 +S'RefSeqGene record not available' +p169 +asg8 +g4 +sg9 +(lp170 +sg11 +VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 2, mRNA +p171 +sg13 +S'CACNA2D2' +p172 +sg15 +(dp173 +g17 +S'NP_006021.2:p.(Gln1134His)' +p174 +sg19 +S'NP_006021.2:p.(Q1134H)' +p175 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_006030.3:c.3402A>C' +p176 +sg27 +g4 +sg28 +(dp177 +S'hg19' +p178 +(dp179 +g32 +S'NC_000003.11:g.50402127T>G' +p180 +sg34 +(dp181 +g36 +g37 +sg38 +g39 +sg40 +S'50402127' +p182 +sg42 +g43 +sssg44 +(dp183 +g32 +S'NC_000003.12:g.50364696T>G' +p184 +sg34 +(dp185 +g36 +g37 +sg38 +g39 +sg40 +S'50364696' +p186 +sg42 +g43 +sssS'grch37' +p187 +(dp188 +g32 +S'NC_000003.11:g.50402127T>G' +p189 +sg34 +(dp190 +g36 +g53 +sg38 +g39 +sg40 +S'50402127' +p191 +sg42 +g43 +sssS'grch38' +p192 +(dp193 +g32 +S'NC_000003.12:g.50364696T>G' +p194 +sg34 +(dp195 +g36 +g53 +sg38 +g39 +sg40 +S'50364696' +p196 +sg42 +g43 +ssssg60 +(dp197 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2' +p198 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3' +p199 +sssS'NM_001291101.1:c.3201A>C' +p200 +(dp201 +g3 +g4 +sg5 +(lp202 +S'RefSeqGene record not available' +p203 +asg8 +g4 +sg9 +(lp204 +sg11 +VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 4, mRNA +p205 +sg13 +S'CACNA2D2' +p206 +sg15 +(dp207 +g17 +S'NP_001278030.1:p.(Gln1067His)' +p208 +sg19 +S'NP_001278030.1:p.(Q1067H)' +p209 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001291101.1:c.3201A>C' +p210 +sg27 +g4 +sg28 +(dp211 +S'hg19' +p212 +(dp213 +g32 +S'NC_000003.11:g.50402127T>G' +p214 +sg34 +(dp215 +g36 +g37 +sg38 +g39 +sg40 +S'50402127' +p216 +sg42 +g43 +sssg44 +(dp217 +g32 +S'NC_000003.12:g.50364696T>G' +p218 +sg34 +(dp219 +g36 +g37 +sg38 +g39 +sg40 +S'50364696' +p220 +sg42 +g43 +sssS'grch37' +p221 +(dp222 +g32 +S'NC_000003.11:g.50402127T>G' +p223 +sg34 +(dp224 +g36 +g53 +sg38 +g39 +sg40 +S'50402127' +p225 +sg42 +g43 +sssS'grch38' +p226 +(dp227 +g32 +S'NC_000003.12:g.50364696T>G' +p228 +sg34 +(dp229 +g36 +g53 +sg38 +g39 +sg40 +S'50364696' +p230 +sg42 +g43 +ssssg60 +(dp231 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1' +p232 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1' +p233 +sssS'flag' +p234 +S'gene_variant' +p235 +sS'NR_111912.1:n.443-1601T>G' +p236 +(dp237 +g3 +g4 +sg5 +(lp238 +S'RefSeqGene record not available' +p239 +asg8 +g4 +sg9 +(lp240 +sg11 +VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 3, non-coding RNA +p241 +sg13 +S'CYB561D2' +p242 +sg15 +(dp243 +g17 +S'Non-coding :n.' +p244 +sg19 +g244 +ssg21 +g22 +sg23 +S'NC_000003.11(NR_111912.1):c.443-1601T>G' +p245 +sg24 +g4 +sg25 +S'NR_111912.1:n.443-1601T>G' +p246 +sg27 +g4 +sg28 +(dp247 +S'hg19' +p248 +(dp249 +g32 +S'NC_000003.11:g.50402127T>G' +p250 +sg34 +(dp251 +g36 +g37 +sg38 +S'T' +p252 +sg40 +S'50402127' +p253 +sg42 +S'G' +p254 +sssg44 +(dp255 +g32 +S'NC_000003.12:g.50364696T>G' +p256 +sg34 +(dp257 +g36 +g37 +sg38 +g252 +sg40 +S'50364696' +p258 +sg42 +g254 +sssS'grch37' +p259 +(dp260 +g32 +S'NC_000003.11:g.50402127T>G' +p261 +sg34 +(dp262 +g36 +g53 +sg38 +g252 +sg40 +S'50402127' +p263 +sg42 +g254 +sssS'grch38' +p264 +(dp265 +g32 +S'NC_000003.12:g.50364696T>G' +p266 +sg34 +(dp267 +g36 +g53 +sg38 +g252 +sg40 +S'50364696' +p268 +sg42 +g254 +ssssg60 +(dp269 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_111912.1' +p270 +sssS'NM_001005505.1:c.3408A>C' +p271 +(dp272 +g3 +g4 +sg5 +(lp273 +S'A more recent version of the selected reference sequence NM_001005505.1 is available (NM_001005505.2)' +p274 +aS'NM_001005505.2:c.3408A>C MUST be fully validated prior to use in reports' +p275 +aS'select_variants=NM_001005505.2:c.3408A>C' +p276 +aS'RefSeqGene record not available' +p277 +asg8 +g4 +sg9 +(lp278 +sg11 +VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 1, mRNA +p279 +sg13 +S'CACNA2D2' +p280 +sg15 +(dp281 +g17 +S'NP_001005505.1:p.(Gln1136His)' +p282 +sg19 +S'NP_001005505.1:p.(Q1136H)' +p283 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001005505.1:c.3408A>C' +p284 +sg27 +g4 +sg28 +(dp285 +S'hg19' +p286 +(dp287 +g32 +S'NC_000003.11:g.50402127T>G' +p288 +sg34 +(dp289 +g36 +g37 +sg38 +g39 +sg40 +S'50402127' +p290 +sg42 +g43 +sssS'grch37' +p291 +(dp292 +g32 +S'NC_000003.11:g.50402127T>G' +p293 +sg34 +(dp294 +g36 +g53 +sg38 +g39 +sg40 +S'50402127' +p295 +sg42 +g43 +ssssg60 +(dp296 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1' +p297 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1' +p298 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant281.txt b/VariantValidator/testing/testOutputsMasterITS/variant281.txt new file mode 100644 index 00000000..5d933d13 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant281.txt @@ -0,0 +1,1116 @@ +(dp0 +S'NR_111913.1:n.126G>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 4, non-coding RNA +p12 +sS'gene_symbol' +p13 +S'CYB561D2' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'Non-coding :n.' +p18 +sS'slr' +p19 +g18 +ssS'submitted_variant' +p20 +S'3-50402890-G-A' +p21 +sS'genome_context_intronic_sequence' +p22 +g4 +sS'hgvs_lrg_variant' +p23 +g4 +sS'hgvs_transcript_variant' +p24 +S'NR_111913.1:n.126G>A' +p25 +sS'hgvs_refseqgene_variant' +p26 +g4 +sS'primary_assembly_loci' +p27 +(dp28 +S'hg19' +p29 +(dp30 +S'hgvs_genomic_description' +p31 +S'NC_000003.11:g.50402890G>A' +p32 +sS'vcf' +p33 +(dp34 +S'chr' +p35 +S'chr3' +p36 +sS'ref' +p37 +S'G' +p38 +sS'pos' +p39 +S'50402890' +p40 +sS'alt' +p41 +S'A' +p42 +sssS'hg38' +p43 +(dp44 +g31 +S'NC_000003.12:g.50365459G>A' +p45 +sg33 +(dp46 +g35 +g36 +sg37 +g38 +sg39 +S'50365459' +p47 +sg41 +g42 +sssS'grch37' +p48 +(dp49 +g31 +S'NC_000003.11:g.50402890G>A' +p50 +sg33 +(dp51 +g35 +S'3' +p52 +sg37 +g38 +sg39 +S'50402890' +p53 +sg41 +g42 +sssS'grch38' +p54 +(dp55 +g31 +S'NC_000003.12:g.50365459G>A' +p56 +sg33 +(dp57 +g35 +g52 +sg37 +g38 +sg39 +S'50365459' +p58 +sg41 +g42 +ssssS'reference_sequence_records' +p59 +(dp60 +S'transcript' +p61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_111913.1' +p62 +sssS'NR_111912.1:n.443-838G>A' +p63 +(dp64 +g3 +g4 +sg5 +(lp65 +S'RefSeqGene record not available' +p66 +asg8 +g4 +sg9 +(lp67 +sg11 +VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 3, non-coding RNA +p68 +sg13 +S'CYB561D2' +p69 +sg15 +(dp70 +g17 +S'Non-coding :n.' +p71 +sg19 +g71 +ssg20 +g21 +sg22 +S'NC_000003.11(NR_111912.1):c.443-838G>A' +p72 +sg23 +g4 +sg24 +S'NR_111912.1:n.443-838G>A' +p73 +sg26 +g4 +sg27 +(dp74 +S'hg19' +p75 +(dp76 +g31 +S'NC_000003.11:g.50402890G>A' +p77 +sg33 +(dp78 +g35 +g36 +sg37 +g38 +sg39 +S'50402890' +p79 +sg41 +g42 +sssg43 +(dp80 +g31 +S'NC_000003.12:g.50365459G>A' +p81 +sg33 +(dp82 +g35 +g36 +sg37 +g38 +sg39 +S'50365459' +p83 +sg41 +g42 +sssS'grch37' +p84 +(dp85 +g31 +S'NC_000003.11:g.50402890G>A' +p86 +sg33 +(dp87 +g35 +g52 +sg37 +g38 +sg39 +S'50402890' +p88 +sg41 +g42 +sssS'grch38' +p89 +(dp90 +g31 +S'NC_000003.12:g.50365459G>A' +p91 +sg33 +(dp92 +g35 +g52 +sg37 +g38 +sg39 +S'50365459' +p93 +sg41 +g42 +ssssg59 +(dp94 +g61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_111912.1' +p95 +sssS'NM_001291101.1:c.2788C>T' +p96 +(dp97 +g3 +g4 +sg5 +(lp98 +S'RefSeqGene record not available' +p99 +asg8 +g4 +sg9 +(lp100 +sg11 +VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 4, mRNA +p101 +sg13 +S'CACNA2D2' +p102 +sg15 +(dp103 +g17 +S'NP_001278030.1:p.(Pro930Ser)' +p104 +sg19 +S'NP_001278030.1:p.(P930S)' +p105 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_001291101.1:c.2788C>T' +p106 +sg26 +g4 +sg27 +(dp107 +S'hg19' +p108 +(dp109 +g31 +S'NC_000003.11:g.50402890G>A' +p110 +sg33 +(dp111 +g35 +g36 +sg37 +VG +p112 +sg39 +S'50402890' +p113 +sg41 +VA +p114 +sssg43 +(dp115 +g31 +S'NC_000003.12:g.50365459G>A' +p116 +sg33 +(dp117 +g35 +g36 +sg37 +g112 +sg39 +S'50365459' +p118 +sg41 +g114 +sssS'grch37' +p119 +(dp120 +g31 +S'NC_000003.11:g.50402890G>A' +p121 +sg33 +(dp122 +g35 +g52 +sg37 +g112 +sg39 +S'50402890' +p123 +sg41 +g114 +sssS'grch38' +p124 +(dp125 +g31 +S'NC_000003.12:g.50365459G>A' +p126 +sg33 +(dp127 +g35 +g52 +sg37 +g112 +sg39 +S'50365459' +p128 +sg41 +g114 +ssssg59 +(dp129 +S'protein' +p130 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1' +p131 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1' +p132 +sssS'NM_006030.2:c.2995C>T' +p133 +(dp134 +g3 +g4 +sg5 +(lp135 +S'A more recent version of the selected reference sequence NM_006030.2 is available (NM_006030.3)' +p136 +aS'NM_006030.3:c.2995C>T MUST be fully validated prior to use in reports' +p137 +aS'select_variants=NM_006030.3:c.2995C>T' +p138 +aS'RefSeqGene record not available' +p139 +asg8 +g4 +sg9 +(lp140 +sg11 +VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 2, mRNA +p141 +sg13 +S'CACNA2D2' +p142 +sg15 +(dp143 +g17 +S'NP_006021.2:p.(Pro999Ser)' +p144 +sg19 +S'NP_006021.2:p.(P999S)' +p145 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_006030.2:c.2995C>T' +p146 +sg26 +g4 +sg27 +(dp147 +S'hg19' +p148 +(dp149 +g31 +S'NC_000003.11:g.50402890G>A' +p150 +sg33 +(dp151 +g35 +g36 +sg37 +g112 +sg39 +S'50402890' +p152 +sg41 +g114 +sssS'grch37' +p153 +(dp154 +g31 +S'NC_000003.11:g.50402890G>A' +p155 +sg33 +(dp156 +g35 +g52 +sg37 +g112 +sg39 +S'50402890' +p157 +sg41 +g114 +ssssg59 +(dp158 +g130 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2' +p159 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2' +p160 +sssS'NR_111914.1:n.126G>A' +p161 +(dp162 +g3 +g4 +sg5 +(lp163 +S'RefSeqGene record not available' +p164 +asg8 +g4 +sg9 +(lp165 +sg11 +VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 5, non-coding RNA +p166 +sg13 +S'CYB561D2' +p167 +sg15 +(dp168 +g17 +S'Non-coding :n.' +p169 +sg19 +g169 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NR_111914.1:n.126G>A' +p170 +sg26 +g4 +sg27 +(dp171 +S'hg19' +p172 +(dp173 +g31 +S'NC_000003.11:g.50402890G>A' +p174 +sg33 +(dp175 +g35 +g36 +sg37 +g38 +sg39 +S'50402890' +p176 +sg41 +g42 +sssg43 +(dp177 +g31 +S'NC_000003.12:g.50365459G>A' +p178 +sg33 +(dp179 +g35 +g36 +sg37 +g38 +sg39 +S'50365459' +p180 +sg41 +g42 +sssS'grch37' +p181 +(dp182 +g31 +S'NC_000003.11:g.50402890G>A' +p183 +sg33 +(dp184 +g35 +g52 +sg37 +g38 +sg39 +S'50402890' +p185 +sg41 +g42 +sssS'grch38' +p186 +(dp187 +g31 +S'NC_000003.12:g.50365459G>A' +p188 +sg33 +(dp189 +g35 +g52 +sg37 +g38 +sg39 +S'50365459' +p190 +sg41 +g42 +ssssg59 +(dp191 +g61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_111914.1' +p192 +sssS'NM_001005505.2:c.2995C>T' +p193 +(dp194 +g3 +g4 +sg5 +(lp195 +S'RefSeqGene record not available' +p196 +asg8 +g4 +sg9 +(lp197 +sg11 +VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 1, mRNA +p198 +sg13 +S'CACNA2D2' +p199 +sg15 +(dp200 +g17 +S'NP_001005505.1:p.(Pro999Ser)' +p201 +sg19 +S'NP_001005505.1:p.(P999S)' +p202 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_001005505.2:c.2995C>T' +p203 +sg26 +g4 +sg27 +(dp204 +S'hg19' +p205 +(dp206 +g31 +S'NC_000003.11:g.50402890G>A' +p207 +sg33 +(dp208 +g35 +g36 +sg37 +g112 +sg39 +S'50402890' +p209 +sg41 +g114 +sssg43 +(dp210 +g31 +S'NC_000003.12:g.50365459G>A' +p211 +sg33 +(dp212 +g35 +g36 +sg37 +g112 +sg39 +S'50365459' +p213 +sg41 +g114 +sssS'grch37' +p214 +(dp215 +g31 +S'NC_000003.11:g.50402890G>A' +p216 +sg33 +(dp217 +g35 +g52 +sg37 +g112 +sg39 +S'50402890' +p218 +sg41 +g114 +sssS'grch38' +p219 +(dp220 +g31 +S'NC_000003.12:g.50365459G>A' +p221 +sg33 +(dp222 +g35 +g52 +sg37 +g112 +sg39 +S'50365459' +p223 +sg41 +g114 +ssssg59 +(dp224 +g130 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1' +p225 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2' +p226 +sssS'flag' +p227 +S'gene_variant' +p228 +sS'NM_001174051.1:c.3016C>T' +p229 +(dp230 +g3 +g4 +sg5 +(lp231 +S'A more recent version of the selected reference sequence NM_001174051.1 is available (NM_001174051.2)' +p232 +aS'NM_001174051.2:c.3016C>T MUST be fully validated prior to use in reports' +p233 +aS'select_variants=NM_001174051.2:c.3016C>T' +p234 +aS'RefSeqGene record not available' +p235 +asg8 +g4 +sg9 +(lp236 +sg11 +VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 3, mRNA +p237 +sg13 +S'CACNA2D2' +p238 +sg15 +(dp239 +g17 +S'NP_001167522.1:p.(Pro1006Ser)' +p240 +sg19 +S'NP_001167522.1:p.(P1006S)' +p241 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_001174051.1:c.3016C>T' +p242 +sg26 +g4 +sg27 +(dp243 +S'hg19' +p244 +(dp245 +g31 +S'NC_000003.11:g.50402890G>A' +p246 +sg33 +(dp247 +g35 +g36 +sg37 +g112 +sg39 +S'50402890' +p248 +sg41 +g114 +sssS'grch37' +p249 +(dp250 +g31 +S'NC_000003.11:g.50402890G>A' +p251 +sg33 +(dp252 +g35 +g52 +sg37 +g112 +sg39 +S'50402890' +p253 +sg41 +g114 +ssssg59 +(dp254 +g130 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1' +p255 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1' +p256 +sssS'NM_001174051.2:c.3016C>T' +p257 +(dp258 +g3 +g4 +sg5 +(lp259 +S'RefSeqGene record not available' +p260 +asg8 +g4 +sg9 +(lp261 +sg11 +VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 3, mRNA +p262 +sg13 +S'CACNA2D2' +p263 +sg15 +(dp264 +g17 +S'NP_001167522.1:p.(Pro1006Ser)' +p265 +sg19 +S'NP_001167522.1:p.(P1006S)' +p266 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_001174051.2:c.3016C>T' +p267 +sg26 +g4 +sg27 +(dp268 +S'hg19' +p269 +(dp270 +g31 +S'NC_000003.11:g.50402890G>A' +p271 +sg33 +(dp272 +g35 +g36 +sg37 +g112 +sg39 +S'50402890' +p273 +sg41 +g114 +sssg43 +(dp274 +g31 +S'NC_000003.12:g.50365459G>A' +p275 +sg33 +(dp276 +g35 +g36 +sg37 +g112 +sg39 +S'50365459' +p277 +sg41 +g114 +sssS'grch37' +p278 +(dp279 +g31 +S'NC_000003.11:g.50402890G>A' +p280 +sg33 +(dp281 +g35 +g52 +sg37 +g112 +sg39 +S'50402890' +p282 +sg41 +g114 +sssS'grch38' +p283 +(dp284 +g31 +S'NC_000003.12:g.50365459G>A' +p285 +sg33 +(dp286 +g35 +g52 +sg37 +g112 +sg39 +S'50365459' +p287 +sg41 +g114 +ssssg59 +(dp288 +g130 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1' +p289 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2' +p290 +sssS'NM_006030.3:c.2995C>T' +p291 +(dp292 +g3 +g4 +sg5 +(lp293 +S'RefSeqGene record not available' +p294 +asg8 +g4 +sg9 +(lp295 +sg11 +VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 2, mRNA +p296 +sg13 +S'CACNA2D2' +p297 +sg15 +(dp298 +g17 +S'NP_006021.2:p.(Pro999Ser)' +p299 +sg19 +S'NP_006021.2:p.(P999S)' +p300 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_006030.3:c.2995C>T' +p301 +sg26 +g4 +sg27 +(dp302 +S'hg19' +p303 +(dp304 +g31 +S'NC_000003.11:g.50402890G>A' +p305 +sg33 +(dp306 +g35 +g36 +sg37 +g112 +sg39 +S'50402890' +p307 +sg41 +g114 +sssg43 +(dp308 +g31 +S'NC_000003.12:g.50365459G>A' +p309 +sg33 +(dp310 +g35 +g36 +sg37 +g112 +sg39 +S'50365459' +p311 +sg41 +g114 +sssS'grch37' +p312 +(dp313 +g31 +S'NC_000003.11:g.50402890G>A' +p314 +sg33 +(dp315 +g35 +g52 +sg37 +g112 +sg39 +S'50402890' +p316 +sg41 +g114 +sssS'grch38' +p317 +(dp318 +g31 +S'NC_000003.12:g.50365459G>A' +p319 +sg33 +(dp320 +g35 +g52 +sg37 +g112 +sg39 +S'50365459' +p321 +sg41 +g114 +ssssg59 +(dp322 +g130 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2' +p323 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3' +p324 +sssS'NM_001005505.1:c.2995C>T' +p325 +(dp326 +g3 +g4 +sg5 +(lp327 +S'A more recent version of the selected reference sequence NM_001005505.1 is available (NM_001005505.2)' +p328 +aS'NM_001005505.2:c.2995C>T MUST be fully validated prior to use in reports' +p329 +aS'select_variants=NM_001005505.2:c.2995C>T' +p330 +aS'RefSeqGene record not available' +p331 +asg8 +g4 +sg9 +(lp332 +sg11 +VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 1, mRNA +p333 +sg13 +S'CACNA2D2' +p334 +sg15 +(dp335 +g17 +S'NP_001005505.1:p.(Pro999Ser)' +p336 +sg19 +S'NP_001005505.1:p.(P999S)' +p337 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_001005505.1:c.2995C>T' +p338 +sg26 +g4 +sg27 +(dp339 +S'hg19' +p340 +(dp341 +g31 +S'NC_000003.11:g.50402890G>A' +p342 +sg33 +(dp343 +g35 +g36 +sg37 +g112 +sg39 +S'50402890' +p344 +sg41 +g114 +sssS'grch37' +p345 +(dp346 +g31 +S'NC_000003.11:g.50402890G>A' +p347 +sg33 +(dp348 +g35 +g52 +sg37 +g112 +sg39 +S'50402890' +p349 +sg41 +g114 +ssssg59 +(dp350 +g130 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1' +p351 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1' +p352 +sssS'metadata' +p353 +(dp354 +S'variantvalidator_hgvs_version' +p355 +S'1.1.3' +p356 +sS'uta_schema' +p357 +S'uta_20180821' +p358 +sS'seqrepo_db' +p359 +S'2018-08-21' +p360 +sS'variantvalidator_version' +p361 +S'v0.2' +p362 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant282.txt b/VariantValidator/testing/testOutputsMasterITS/variant282.txt new file mode 100644 index 00000000..bfa4b9ad --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant282.txt @@ -0,0 +1,553 @@ +(dp0 +S'NM_007159.4:c.1135+565del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens sarcolemma associated protein (SLMAP), transcript variant 2, mRNA +p13 +sS'gene_symbol' +p14 +S'SLMAP' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_009090.2:p.?' +p19 +sS'slr' +p20 +S'NP_009090.2:p.?' +p21 +ssS'submitted_variant' +p22 +S'3-57851007-AG-A' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000003.11(NM_007159.4):c.1135+565del' +p25 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_007159.4:c.1135+565del' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000003.11:g.57851008del' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr3' +p39 +sS'ref' +p40 +S'AG' +p41 +sS'pos' +p42 +S'57851007' +p43 +sS'alt' +p44 +S'A' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000003.12:g.57865281del' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'AG' +p50 +sg42 +S'57865280' +p51 +sg44 +g45 +sssS'grch37' +p52 +(dp53 +g34 +S'NC_000003.11:g.57851008del' +p54 +sg36 +(dp55 +g38 +S'3' +p56 +sg40 +S'AG' +p57 +sg42 +S'57851007' +p58 +sg44 +g45 +sssS'grch38' +p59 +(dp60 +g34 +S'NC_000003.12:g.57865281del' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +S'AG' +p63 +sg42 +S'57865280' +p64 +sg44 +g45 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.4' +p70 +sssS'' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' +p74 +aS'Unable to assign transcript identity records to NM_001304421.1, potentially an obsolete record :' +p75 +asg9 +g4 +sg10 +(lp76 +sg12 +g4 +sg14 +g4 +sg16 +(dp77 +g18 +g4 +sg20 +g4 +ssg22 +g23 +sg24 +g4 +sg26 +g4 +sg27 +g4 +sg29 +g4 +sg30 +(dp78 +sg65 +g4 +ssS'NM_001304421.2:c.1135+565del' +p79 +(dp80 +g3 +g4 +sg5 +(lp81 +S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' +p82 +aS'RefSeqGene record not available' +p83 +asg9 +g4 +sg10 +(lp84 +sg12 +VHomo sapiens sarcolemma associated protein (SLMAP), transcript variant 3, mRNA +p85 +sg14 +S'SLMAP' +p86 +sg16 +(dp87 +g18 +S'NP_001291350.1:p.?' +p88 +sg20 +S'NP_001291350.1:p.?' +p89 +ssg22 +g23 +sg24 +S'NC_000003.11(NM_001304421.2):c.1135+565del' +p90 +sg26 +g4 +sg27 +S'NM_001304421.2:c.1135+565del' +p91 +sg29 +g4 +sg30 +(dp92 +S'hg19' +p93 +(dp94 +g34 +S'NC_000003.11:g.57851008del' +p95 +sg36 +(dp96 +g38 +g39 +sg40 +S'AG' +p97 +sg42 +S'57851007' +p98 +sg44 +g45 +sssg46 +(dp99 +g34 +S'NC_000003.12:g.57865281del' +p100 +sg36 +(dp101 +g38 +g39 +sg40 +S'AG' +p102 +sg42 +S'57865280' +p103 +sg44 +g45 +sssS'grch37' +p104 +(dp105 +g34 +S'NC_000003.11:g.57851008del' +p106 +sg36 +(dp107 +g38 +g56 +sg40 +S'AG' +p108 +sg42 +S'57851007' +p109 +sg44 +g45 +sssS'grch38' +p110 +(dp111 +g34 +S'NC_000003.12:g.57865281del' +p112 +sg36 +(dp113 +g38 +g56 +sg40 +S'AG' +p114 +sg42 +S'57865280' +p115 +sg44 +g45 +ssssg65 +(dp116 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291350.1' +p117 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304421.2' +p118 +sssS'NM_001304420.2:c.1186+424del' +p119 +(dp120 +g3 +g4 +sg5 +(lp121 +S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' +p122 +aS'RefSeqGene record not available' +p123 +asg9 +g4 +sg10 +(lp124 +sg12 +VHomo sapiens sarcolemma associated protein (SLMAP), transcript variant 1, mRNA +p125 +sg14 +S'SLMAP' +p126 +sg16 +(dp127 +g18 +S'NP_001291349.1:p.?' +p128 +sg20 +S'NP_001291349.1:p.?' +p129 +ssg22 +g23 +sg24 +S'NC_000003.11(NM_001304420.2):c.1186+424del' +p130 +sg26 +g4 +sg27 +S'NM_001304420.2:c.1186+424del' +p131 +sg29 +g4 +sg30 +(dp132 +S'hg19' +p133 +(dp134 +g34 +S'NC_000003.11:g.57851008del' +p135 +sg36 +(dp136 +g38 +g39 +sg40 +S'AG' +p137 +sg42 +S'57851007' +p138 +sg44 +g45 +sssg46 +(dp139 +g34 +S'NC_000003.12:g.57865281del' +p140 +sg36 +(dp141 +g38 +g39 +sg40 +S'AG' +p142 +sg42 +S'57865280' +p143 +sg44 +g45 +sssS'grch37' +p144 +(dp145 +g34 +S'NC_000003.11:g.57851008del' +p146 +sg36 +(dp147 +g38 +g56 +sg40 +S'AG' +p148 +sg42 +S'57851007' +p149 +sg44 +g45 +sssS'grch38' +p150 +(dp151 +g34 +S'NC_000003.12:g.57865281del' +p152 +sg36 +(dp153 +g38 +g56 +sg40 +S'AG' +p154 +sg42 +S'57865280' +p155 +sg44 +g45 +ssssg65 +(dp156 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291349.1' +p157 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304420.2' +p158 +sssS'flag' +p159 +S'gene_variant' +p160 +sS'NM_007159.2:c.1135+565del' +p161 +(dp162 +g3 +g4 +sg5 +(lp163 +S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' +p164 +aS'A more recent version of the selected reference sequence NM_007159.2 is available (NM_007159.4)' +p165 +aS'NM_007159.4:c.1135+565delG MUST be fully validated prior to use in reports' +p166 +aS'select_variants=NM_007159.4:c.1135+565del' +p167 +aS'RefSeqGene record not available' +p168 +asg9 +g4 +sg10 +(lp169 +sg12 +VHomo sapiens sarcolemma associated protein (SLMAP), mRNA +p170 +sg14 +S'SLMAP' +p171 +sg16 +(dp172 +g18 +S'NP_009090.2:p.?' +p173 +sg20 +S'NP_009090.2:p.?' +p174 +ssg22 +g23 +sg24 +S'NC_000003.11(NM_007159.2):c.1135+565del' +p175 +sg26 +g4 +sg27 +S'NM_007159.2:c.1135+565del' +p176 +sg29 +g4 +sg30 +(dp177 +S'hg19' +p178 +(dp179 +g34 +S'NC_000003.11:g.57851008del' +p180 +sg36 +(dp181 +g38 +g39 +sg40 +S'AG' +p182 +sg42 +S'57851007' +p183 +sg44 +g45 +sssS'grch37' +p184 +(dp185 +g34 +S'NC_000003.11:g.57851008del' +p186 +sg36 +(dp187 +g38 +g56 +sg40 +S'AG' +p188 +sg42 +S'57851007' +p189 +sg44 +g45 +ssssg65 +(dp190 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2' +p191 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.2' +p192 +sssS'metadata' +p193 +(dp194 +S'variantvalidator_hgvs_version' +p195 +S'1.1.3' +p196 +sS'uta_schema' +p197 +S'uta_20180821' +p198 +sS'seqrepo_db' +p199 +S'2018-08-21' +p200 +sS'variantvalidator_version' +p201 +S'v0.2' +p202 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant283.txt b/VariantValidator/testing/testOutputsMasterITS/variant283.txt new file mode 100644 index 00000000..3416aa30 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant283.txt @@ -0,0 +1,286 @@ +(dp0 +S'NM_001178065.1:c.3061C=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens calcium sensing receptor (CASR), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'CASR' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001171536.1:p.(Gln1021=)' +p18 +sS'slr' +p19 +S'NP_001171536.1:p.(Q1021=)' +p20 +ssS'submitted_variant' +p21 +S'3-122003832-G-C' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001178065.1:c.3061C=' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000003.11:g.122003832G>C' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr3' +p37 +sS'ref' +p38 +S'G' +p39 +sS'pos' +p40 +S'122003832' +p41 +sS'alt' +p42 +S'C' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000003.12:g.122284985G>C' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'122284985' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000003.11:g.122003832G>C' +p51 +sg34 +(dp52 +g36 +S'3' +p53 +sg38 +g39 +sg40 +S'122003832' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000003.12:g.122284985G>C' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'122284985' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001171536.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001178065.1' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'NM_000388.3:c.3031C=' +p68 +(dp69 +g3 +g4 +sg5 +(lp70 +S'RefSeqGene record not available' +p71 +asg8 +g4 +sg9 +(lp72 +sg11 +VHomo sapiens calcium sensing receptor (CASR), transcript variant 2, mRNA +p73 +sg13 +S'CASR' +p74 +sg15 +(dp75 +g17 +S'NP_000379.2:p.(Gln1011=)' +p76 +sg19 +S'NP_000379.2:p.(Q1011=)' +p77 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_000388.3:c.3031C=' +p78 +sg27 +g4 +sg28 +(dp79 +S'hg19' +p80 +(dp81 +g32 +S'NC_000003.11:g.122003832G>C' +p82 +sg34 +(dp83 +g36 +g37 +sg38 +g39 +sg40 +S'122003832' +p84 +sg42 +g43 +sssg44 +(dp85 +g32 +S'NC_000003.12:g.122284985G>C' +p86 +sg34 +(dp87 +g36 +g37 +sg38 +g39 +sg40 +S'122284985' +p88 +sg42 +g43 +sssS'grch37' +p89 +(dp90 +g32 +S'NC_000003.11:g.122003832G>C' +p91 +sg34 +(dp92 +g36 +g53 +sg38 +g39 +sg40 +S'122003832' +p93 +sg42 +g43 +sssS'grch38' +p94 +(dp95 +g32 +S'NC_000003.12:g.122284985G>C' +p96 +sg34 +(dp97 +g36 +g53 +sg38 +g39 +sg40 +S'122284985' +p98 +sg42 +g43 +ssssg60 +(dp99 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000379.2' +p100 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000388.3' +p101 +sssS'metadata' +p102 +(dp103 +S'variantvalidator_hgvs_version' +p104 +S'1.1.3' +p105 +sS'uta_schema' +p106 +S'uta_20180821' +p107 +sS'seqrepo_db' +p108 +S'2018-08-21' +p109 +sS'variantvalidator_version' +p110 +S'v0.2' +p111 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant284.txt b/VariantValidator/testing/testOutputsMasterITS/variant284.txt new file mode 100644 index 00000000..bd3204c6 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant284.txt @@ -0,0 +1,510 @@ +(dp0 +S'NM_001349798.1:c.45_46insCCT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' +p7 +aS'A more recent version of the selected reference sequence NM_001349798.1 is available (NM_001349798.2)' +p8 +aS'NM_001349798.2:c.45_46insCCT MUST be fully validated prior to use in reports' +p9 +aS'select_variants=NM_001349798.2:c.45_46insCCT' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 5, mRNA +p16 +sS'gene_symbol' +p17 +S'FBXW7' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_361014.1:p.(Thr15_Gly16insPro)' +p22 +sS'slr' +p23 +S'NP_361014.1:p.(T15_G16insP)' +p24 +ssS'submitted_variant' +p25 +S'4-153332910-C-CAGG' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_001349798.1:c.45_46insCCT' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000004.11:g.153332910_153332911insAGG' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr4' +p41 +sS'ref' +p42 +S'C' +p43 +sS'pos' +p44 +S'153332910' +p45 +sS'alt' +p46 +VCAGG +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000004.12:g.152411758_152411759insAGG' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +g43 +sg44 +S'152411758' +p52 +sg46 +VCAGG +p53 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000004.11:g.153332910_153332911insAGG' +p56 +sg38 +(dp57 +g40 +S'4' +p58 +sg42 +g43 +sg44 +S'153332910' +p59 +sg46 +VCAGG +p60 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000004.12:g.152411758_152411759insAGG' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +g43 +sg44 +S'152411758' +p65 +sg46 +VCAGG +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.1' +p72 +sssS'NM_033632.3:c.45_46insCCT' +p73 +(dp74 +g3 +g4 +sg5 +(lp75 +S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' +p76 +aS'RefSeqGene record not available' +p77 +asg12 +g4 +sg13 +(lp78 +sg15 +VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 1, mRNA +p79 +sg17 +S'FBXW7' +p80 +sg19 +(dp81 +g21 +S'NP_361014.1:p.(Thr15_Gly16insPro)' +p82 +sg23 +S'NP_361014.1:p.(T15_G16insP)' +p83 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_033632.3:c.45_46insCCT' +p84 +sg31 +g4 +sg32 +(dp85 +S'hg19' +p86 +(dp87 +g36 +S'NC_000004.11:g.153332910_153332911insAGG' +p88 +sg38 +(dp89 +g40 +g41 +sg42 +g43 +sg44 +S'153332910' +p90 +sg46 +VCAGG +p91 +sssg48 +(dp92 +g36 +S'NC_000004.12:g.152411758_152411759insAGG' +p93 +sg38 +(dp94 +g40 +g41 +sg42 +g43 +sg44 +S'152411758' +p95 +sg46 +VCAGG +p96 +sssS'grch37' +p97 +(dp98 +g36 +S'NC_000004.11:g.153332910_153332911insAGG' +p99 +sg38 +(dp100 +g40 +g58 +sg42 +g43 +sg44 +S'153332910' +p101 +sg46 +VCAGG +p102 +sssS'grch38' +p103 +(dp104 +g36 +S'NC_000004.12:g.152411758_152411759insAGG' +p105 +sg38 +(dp106 +g40 +g58 +sg42 +g43 +sg44 +S'152411758' +p107 +sg46 +VCAGG +p108 +ssssg67 +(dp109 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1' +p110 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033632.3' +p111 +sssS'NM_001257069.1:c.45_46insCCT' +p112 +(dp113 +g3 +g4 +sg5 +(lp114 +S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' +p115 +aS'RefSeqGene record not available' +p116 +asg12 +g4 +sg13 +(lp117 +sg15 +VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 4, mRNA +p118 +sg17 +S'FBXW7' +p119 +sg19 +(dp120 +g21 +S'NP_001243998.1:p.(Thr15_Gly16insPro)' +p121 +sg23 +S'NP_001243998.1:p.(T15_G16insP)' +p122 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001257069.1:c.45_46insCCT' +p123 +sg31 +g4 +sg32 +(dp124 +S'hg19' +p125 +(dp126 +g36 +S'NC_000004.11:g.153332910_153332911insAGG' +p127 +sg38 +(dp128 +g40 +g41 +sg42 +g43 +sg44 +S'153332910' +p129 +sg46 +VCAGG +p130 +sssg48 +(dp131 +g36 +S'NC_000004.12:g.152411758_152411759insAGG' +p132 +sg38 +(dp133 +g40 +g41 +sg42 +g43 +sg44 +S'152411758' +p134 +sg46 +VCAGG +p135 +sssS'grch37' +p136 +(dp137 +g36 +S'NC_000004.11:g.153332910_153332911insAGG' +p138 +sg38 +(dp139 +g40 +g58 +sg42 +g43 +sg44 +S'153332910' +p140 +sg46 +VCAGG +p141 +sssS'grch38' +p142 +(dp143 +g36 +S'NC_000004.12:g.152411758_152411759insAGG' +p144 +sg38 +(dp145 +g40 +g58 +sg42 +g43 +sg44 +S'152411758' +p146 +sg46 +VCAGG +p147 +ssssg67 +(dp148 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243998.1' +p149 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257069.1' +p150 +sssS'flag' +p151 +S'gene_variant' +p152 +sS'NM_001349798.2:c.45_46insCCT' +p153 +(dp154 +g3 +g4 +sg5 +(lp155 +S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' +p156 +aS'RefSeqGene record not available' +p157 +asg12 +g4 +sg13 +(lp158 +sg15 +VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 5, mRNA +p159 +sg17 +S'FBXW7' +p160 +sg19 +(dp161 +g21 +S'NP_001336727.1:p.(Thr15_Gly16insPro)' +p162 +sg23 +S'NP_001336727.1:p.(T15_G16insP)' +p163 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001349798.2:c.45_46insCCT' +p164 +sg31 +g4 +sg32 +(dp165 +S'hg19' +p166 +(dp167 +g36 +S'NC_000004.11:g.153332910_153332911insAGG' +p168 +sg38 +(dp169 +g40 +g41 +sg42 +g43 +sg44 +S'153332910' +p170 +sg46 +VCAGG +p171 +sssS'grch37' +p172 +(dp173 +g36 +S'NC_000004.11:g.153332910_153332911insAGG' +p174 +sg38 +(dp175 +g40 +g58 +sg42 +g43 +sg44 +S'153332910' +p176 +sg46 +VCAGG +p177 +ssssg67 +(dp178 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001336727.1' +p179 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.2' +p180 +sssS'metadata' +p181 +(dp182 +S'variantvalidator_hgvs_version' +p183 +S'1.1.3' +p184 +sS'uta_schema' +p185 +S'uta_20180821' +p186 +sS'seqrepo_db' +p187 +S'2018-08-21' +p188 +sS'variantvalidator_version' +p189 +S'v0.2' +p190 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant285.txt b/VariantValidator/testing/testOutputsMasterITS/variant285.txt new file mode 100644 index 00000000..345bc094 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant285.txt @@ -0,0 +1,156 @@ +(dp0 +S'flag' +p1 +S'intergenic' +p2 +sS'Intergenic_Variant_1' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'No transcripts found that fully overlap the described variation in the genomic sequence' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +g6 +sS'gene_symbol' +p14 +g6 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +g6 +sS'slr' +p18 +g6 +ssS'submitted_variant' +p19 +S'5-1295183-G-A' +p20 +sS'genome_context_intronic_sequence' +p21 +g6 +sS'hgvs_lrg_variant' +p22 +g6 +sS'hgvs_transcript_variant' +p23 +g6 +sS'hgvs_refseqgene_variant' +p24 +g6 +sS'primary_assembly_loci' +p25 +(dp26 +S'hg19' +p27 +(dp28 +S'hgvs_genomic_description' +p29 +VNC_000005.9:g.1295183G>A +p30 +sS'vcf' +p31 +(dp32 +S'chr' +p33 +S'chr5' +p34 +sS'ref' +p35 +S'G' +p36 +sS'pos' +p37 +S'1295183' +p38 +sS'alt' +p39 +S'A' +p40 +sssS'grch37' +p41 +(dp42 +g29 +VNC_000005.9:g.1295183G>A +p43 +sg31 +(dp44 +g33 +S'5' +p45 +sg35 +g36 +sg37 +g38 +sg39 +g40 +sssS'hg38' +p46 +(dp47 +g29 +VNC_000005.10:g.1295068G>A +p48 +sg31 +(dp49 +g33 +g34 +sg35 +g36 +sg37 +S'1295068' +p50 +sg39 +g40 +sssS'grch38' +p51 +(dp52 +g29 +VNC_000005.10:g.1295068G>A +p53 +sg31 +(dp54 +g33 +g45 +sg35 +g36 +sg37 +g50 +sg39 +g40 +ssssS'reference_sequence_records' +p55 +g6 +ssS'metadata' +p56 +(dp57 +S'variantvalidator_hgvs_version' +p58 +S'1.1.3' +p59 +sS'uta_schema' +p60 +S'uta_20180821' +p61 +sS'seqrepo_db' +p62 +S'2018-08-21' +p63 +sS'variantvalidator_version' +p64 +S'v0.2' +p65 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant286.txt b/VariantValidator/testing/testOutputsMasterITS/variant286.txt new file mode 100644 index 00000000..ebc21b1a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant286.txt @@ -0,0 +1,389 @@ +(dp0 +S'NM_003664.4:c.2409_2411del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000005.9:g.77396835TTTC>T automapped to NC_000005.9:g.77396838_77396840delCTT' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens adaptor related protein complex 3 subunit beta 1 (AP3B1), transcript variant 1, mRNA +p13 +sS'gene_symbol' +p14 +S'AP3B1' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_003655.3:p.(Lys804del)' +p19 +sS'slr' +p20 +S'NP_003655.3:p.(K804del)' +p21 +ssS'submitted_variant' +p22 +S'5-77396835-TTTC-T' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_003664.4:c.2409_2411del' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'grch38' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000005.10:g.78101012_78101014del' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'5' +p38 +sS'ref' +p39 +S'TTTC' +p40 +sS'pos' +p41 +S'78101011' +p42 +sS'alt' +p43 +S'T' +p44 +sssS'grch37' +p45 +(dp46 +g33 +S'NC_000005.9:g.77396836_77396838del' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'TTTC' +p49 +sg41 +S'77396835' +p50 +sg43 +g44 +sssS'hg38' +p51 +(dp52 +g33 +S'NC_000005.10:g.78101012_78101014del' +p53 +sg35 +(dp54 +g37 +S'chr5' +p55 +sg39 +S'TTTC' +p56 +sg41 +S'78101011' +p57 +sg43 +g44 +sssS'hg19' +p58 +(dp59 +g33 +S'NC_000005.9:g.77396836_77396838del' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'TTTC' +p62 +sg41 +S'77396835' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.4' +p69 +sssS'flag' +p70 +S'gene_variant' +p71 +sS'NM_003664.3:c.2409_2411del' +p72 +(dp73 +g3 +g4 +sg5 +(lp74 +S'NC_000005.9:g.77396835TTTC>T automapped to NC_000005.9:g.77396838_77396840delCTT' +p75 +aS'A more recent version of the selected reference sequence NM_003664.3 is available (NM_003664.4)' +p76 +aS'NM_003664.4:c.2409_2411delGAA MUST be fully validated prior to use in reports' +p77 +aS'select_variants=NM_003664.4:c.2409_2411del' +p78 +aS'RefSeqGene record not available' +p79 +asg9 +g4 +sg10 +(lp80 +sg12 +VHomo sapiens adaptor-related protein complex 3, beta 1 subunit (AP3B1), mRNA +p81 +sg14 +S'AP3B1' +p82 +sg16 +(dp83 +g18 +S'NP_003655.3:p.(Lys804del)' +p84 +sg20 +S'NP_003655.3:p.(K804del)' +p85 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_003664.3:c.2409_2411del' +p86 +sg28 +g4 +sg29 +(dp87 +S'hg19' +p88 +(dp89 +g33 +S'NC_000005.9:g.77396836_77396838del' +p90 +sg35 +(dp91 +g37 +g55 +sg39 +S'TTTC' +p92 +sg41 +S'77396835' +p93 +sg43 +g44 +sssS'grch37' +p94 +(dp95 +g33 +S'NC_000005.9:g.77396836_77396838del' +p96 +sg35 +(dp97 +g37 +g38 +sg39 +S'TTTC' +p98 +sg41 +S'77396835' +p99 +sg43 +g44 +ssssg64 +(dp100 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3' +p101 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.3' +p102 +sssS'NM_001271769.1:c.2262_2264del' +p103 +(dp104 +g3 +g4 +sg5 +(lp105 +S'NC_000005.9:g.77396835TTTC>T automapped to NC_000005.9:g.77396838_77396840delCTT' +p106 +aS'RefSeqGene record not available' +p107 +asg9 +g4 +sg10 +(lp108 +sg12 +VHomo sapiens adaptor related protein complex 3 subunit beta 1 (AP3B1), transcript variant 2, mRNA +p109 +sg14 +S'AP3B1' +p110 +sg16 +(dp111 +g18 +S'NP_001258698.1:p.(Lys755del)' +p112 +sg20 +S'NP_001258698.1:p.(K755del)' +p113 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001271769.1:c.2262_2264del' +p114 +sg28 +g4 +sg29 +(dp115 +S'grch38' +p116 +(dp117 +g33 +S'NC_000005.10:g.78101012_78101014del' +p118 +sg35 +(dp119 +g37 +g38 +sg39 +S'TTTC' +p120 +sg41 +S'78101011' +p121 +sg43 +g44 +sssS'grch37' +p122 +(dp123 +g33 +S'NC_000005.9:g.77396836_77396838del' +p124 +sg35 +(dp125 +g37 +g38 +sg39 +S'TTTC' +p126 +sg41 +S'77396835' +p127 +sg43 +g44 +sssg51 +(dp128 +g33 +S'NC_000005.10:g.78101012_78101014del' +p129 +sg35 +(dp130 +g37 +g55 +sg39 +S'TTTC' +p131 +sg41 +S'78101011' +p132 +sg43 +g44 +sssS'hg19' +p133 +(dp134 +g33 +S'NC_000005.9:g.77396836_77396838del' +p135 +sg35 +(dp136 +g37 +g55 +sg39 +S'TTTC' +p137 +sg41 +S'77396835' +p138 +sg43 +g44 +ssssg64 +(dp139 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001258698.1' +p140 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001271769.1' +p141 +sssS'metadata' +p142 +(dp143 +S'variantvalidator_hgvs_version' +p144 +S'1.1.3' +p145 +sS'uta_schema' +p146 +S'uta_20180821' +p147 +sS'seqrepo_db' +p148 +S'2018-08-21' +p149 +sS'variantvalidator_version' +p150 +S'v0.2' +p151 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant287.txt b/VariantValidator/testing/testOutputsMasterITS/variant287.txt new file mode 100644 index 00000000..30ff8d6b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant287.txt @@ -0,0 +1,758 @@ +(dp0 +S'NM_000414.3:c.302+3_302+6del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 2, mRNA +p13 +sS'gene_symbol' +p14 +S'HSD17B4' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_000405.1:p.?' +p19 +sS'slr' +p20 +S'NP_000405.1:p.?' +p21 +ssS'submitted_variant' +p22 +S'5-118811422-GGTGA-G' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000005.9(NM_000414.3):c.302+3_302+6del' +p25 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_000414.3:c.302+3_302+6del' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'grch38' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000005.10:g.119475730_119475733del' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'5' +p39 +sS'ref' +p40 +S'GGTGA' +p41 +sS'pos' +p42 +S'119475727' +p43 +sS'alt' +p44 +S'G' +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000005.9:g.118811425_118811428del' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'GGTGA' +p50 +sg42 +S'118811422' +p51 +sg44 +g45 +sssS'hg38' +p52 +(dp53 +g34 +S'NC_000005.10:g.119475730_119475733del' +p54 +sg36 +(dp55 +g38 +S'chr5' +p56 +sg40 +S'GGTGA' +p57 +sg42 +S'119475727' +p58 +sg44 +g45 +sssS'hg19' +p59 +(dp60 +g34 +S'NC_000005.9:g.118811425_118811428del' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +S'GGTGA' +p63 +sg42 +S'118811422' +p64 +sg44 +g45 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3' +p70 +sssS'NM_001292028.1:c.-110+3_-110+6del' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' +p74 +aS'RefSeqGene record not available' +p75 +asg9 +g4 +sg10 +(lp76 +sg12 +VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 5, mRNA +p77 +sg14 +S'HSD17B4' +p78 +sg16 +(dp79 +g18 +S'NP_001278957.1:p.?' +p80 +sg20 +S'NP_001278957.1:p.?' +p81 +ssg22 +g23 +sg24 +S'NC_000005.9(NM_001292028.1):c.-110+3_-110+6del' +p82 +sg26 +g4 +sg27 +S'NM_001292028.1:c.-110+3_-110+6del' +p83 +sg29 +g4 +sg30 +(dp84 +S'grch38' +p85 +(dp86 +g34 +S'NC_000005.10:g.119475730_119475733del' +p87 +sg36 +(dp88 +g38 +g39 +sg40 +S'GGTGA' +p89 +sg42 +S'119475727' +p90 +sg44 +g45 +sssS'grch37' +p91 +(dp92 +g34 +S'NC_000005.9:g.118811425_118811428del' +p93 +sg36 +(dp94 +g38 +g39 +sg40 +S'GGTGA' +p95 +sg42 +S'118811422' +p96 +sg44 +g45 +sssg52 +(dp97 +g34 +S'NC_000005.10:g.119475730_119475733del' +p98 +sg36 +(dp99 +g38 +g56 +sg40 +S'GGTGA' +p100 +sg42 +S'119475727' +p101 +sg44 +g45 +sssS'hg19' +p102 +(dp103 +g34 +S'NC_000005.9:g.118811425_118811428del' +p104 +sg36 +(dp105 +g38 +g56 +sg40 +S'GGTGA' +p106 +sg42 +S'118811422' +p107 +sg44 +g45 +ssssg65 +(dp108 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1' +p109 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1' +p110 +sssS'NM_001199291.2:c.377+3_377+6del' +p111 +(dp112 +g3 +g4 +sg5 +(lp113 +S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' +p114 +aS'RefSeqGene record not available' +p115 +asg9 +g4 +sg10 +(lp116 +sg12 +VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA +p117 +sg14 +S'HSD17B4' +p118 +sg16 +(dp119 +g18 +S'NP_001186220.1:p.?' +p120 +sg20 +S'NP_001186220.1:p.?' +p121 +ssg22 +g23 +sg24 +S'NC_000005.9(NM_001199291.2):c.377+3_377+6del' +p122 +sg26 +g4 +sg27 +S'NM_001199291.2:c.377+3_377+6del' +p123 +sg29 +g4 +sg30 +(dp124 +S'grch38' +p125 +(dp126 +g34 +S'NC_000005.10:g.119475730_119475733del' +p127 +sg36 +(dp128 +g38 +g39 +sg40 +S'GGTGA' +p129 +sg42 +S'119475727' +p130 +sg44 +g45 +sssS'grch37' +p131 +(dp132 +g34 +S'NC_000005.9:g.118811425_118811428del' +p133 +sg36 +(dp134 +g38 +g39 +sg40 +S'GGTGA' +p135 +sg42 +S'118811422' +p136 +sg44 +g45 +sssg52 +(dp137 +g34 +S'NC_000005.10:g.119475730_119475733del' +p138 +sg36 +(dp139 +g38 +g56 +sg40 +S'GGTGA' +p140 +sg42 +S'119475727' +p141 +sg44 +g45 +sssS'hg19' +p142 +(dp143 +g34 +S'NC_000005.9:g.118811425_118811428del' +p144 +sg36 +(dp145 +g38 +g56 +sg40 +S'GGTGA' +p146 +sg42 +S'118811422' +p147 +sg44 +g45 +ssssg65 +(dp148 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1' +p149 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2' +p150 +sssS'flag' +p151 +S'gene_variant' +p152 +sS'NM_001292027.1:c.230+3_230+6del' +p153 +(dp154 +g3 +g4 +sg5 +(lp155 +S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' +p156 +aS'RefSeqGene record not available' +p157 +asg9 +g4 +sg10 +(lp158 +sg12 +VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 4, mRNA +p159 +sg14 +S'HSD17B4' +p160 +sg16 +(dp161 +g18 +S'NP_001278956.1:p.?' +p162 +sg20 +S'NP_001278956.1:p.?' +p163 +ssg22 +g23 +sg24 +S'NC_000005.9(NM_001292027.1):c.230+3_230+6del' +p164 +sg26 +g4 +sg27 +S'NM_001292027.1:c.230+3_230+6del' +p165 +sg29 +g4 +sg30 +(dp166 +S'grch38' +p167 +(dp168 +g34 +S'NC_000005.10:g.119475730_119475733del' +p169 +sg36 +(dp170 +g38 +g39 +sg40 +S'GGTGA' +p171 +sg42 +S'119475727' +p172 +sg44 +g45 +sssS'grch37' +p173 +(dp174 +g34 +S'NC_000005.9:g.118811425_118811428del' +p175 +sg36 +(dp176 +g38 +g39 +sg40 +S'GGTGA' +p177 +sg42 +S'118811422' +p178 +sg44 +g45 +sssg52 +(dp179 +g34 +S'NC_000005.10:g.119475730_119475733del' +p180 +sg36 +(dp181 +g38 +g56 +sg40 +S'GGTGA' +p182 +sg42 +S'119475727' +p183 +sg44 +g45 +sssS'hg19' +p184 +(dp185 +g34 +S'NC_000005.9:g.118811425_118811428del' +p186 +sg36 +(dp187 +g38 +g56 +sg40 +S'GGTGA' +p188 +sg42 +S'118811422' +p189 +sg44 +g45 +ssssg65 +(dp190 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278956.1' +p191 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292027.1' +p192 +sssS'NM_001199291.1:c.377+3_377+6del' +p193 +(dp194 +g3 +g4 +sg5 +(lp195 +S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' +p196 +aS'A more recent version of the selected reference sequence NM_001199291.1 is available (NM_001199291.2)' +p197 +aS'NM_001199291.2:c.377+3_377+6del MUST be fully validated prior to use in reports' +p198 +aS'select_variants=NM_001199291.2:c.377+3_377+6del' +p199 +aS'RefSeqGene record not available' +p200 +asg9 +g4 +sg10 +(lp201 +sg12 +VHomo sapiens hydroxysteroid (17-beta) dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA +p202 +sg14 +S'HSD17B4' +p203 +sg16 +(dp204 +g18 +S'NP_001186220.1:p.?' +p205 +sg20 +S'NP_001186220.1:p.?' +p206 +ssg22 +g23 +sg24 +S'NC_000005.9(NM_001199291.1):c.377+3_377+6del' +p207 +sg26 +g4 +sg27 +S'NM_001199291.1:c.377+3_377+6del' +p208 +sg29 +g4 +sg30 +(dp209 +S'hg19' +p210 +(dp211 +g34 +S'NC_000005.9:g.118811425_118811428del' +p212 +sg36 +(dp213 +g38 +g56 +sg40 +S'GGTGA' +p214 +sg42 +S'118811422' +p215 +sg44 +g45 +sssS'grch37' +p216 +(dp217 +g34 +S'NC_000005.9:g.118811425_118811428del' +p218 +sg36 +(dp219 +g38 +g39 +sg40 +S'GGTGA' +p220 +sg42 +S'118811422' +p221 +sg44 +g45 +ssssg65 +(dp222 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1' +p223 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1' +p224 +sssS'metadata' +p225 +(dp226 +S'variantvalidator_hgvs_version' +p227 +S'1.1.3' +p228 +sS'uta_schema' +p229 +S'uta_20180821' +p230 +sS'seqrepo_db' +p231 +S'2018-08-21' +p232 +sS'variantvalidator_version' +p233 +S'v0.2' +p234 +ssS'NM_001199292.1:c.248+3_248+6del' +p235 +(dp236 +g3 +g4 +sg5 +(lp237 +S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' +p238 +aS'RefSeqGene record not available' +p239 +asg9 +g4 +sg10 +(lp240 +sg12 +VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 3, mRNA +p241 +sg14 +S'HSD17B4' +p242 +sg16 +(dp243 +g18 +S'NP_001186221.1:p.?' +p244 +sg20 +S'NP_001186221.1:p.?' +p245 +ssg22 +g23 +sg24 +S'NC_000005.9(NM_001199292.1):c.248+3_248+6del' +p246 +sg26 +g4 +sg27 +S'NM_001199292.1:c.248+3_248+6del' +p247 +sg29 +g4 +sg30 +(dp248 +S'grch38' +p249 +(dp250 +g34 +S'NC_000005.10:g.119475730_119475733del' +p251 +sg36 +(dp252 +g38 +g39 +sg40 +S'GGTGA' +p253 +sg42 +S'119475727' +p254 +sg44 +g45 +sssS'grch37' +p255 +(dp256 +g34 +S'NC_000005.9:g.118811425_118811428del' +p257 +sg36 +(dp258 +g38 +g39 +sg40 +S'GGTGA' +p259 +sg42 +S'118811422' +p260 +sg44 +g45 +sssg52 +(dp261 +g34 +S'NC_000005.10:g.119475730_119475733del' +p262 +sg36 +(dp263 +g38 +g56 +sg40 +S'GGTGA' +p264 +sg42 +S'119475727' +p265 +sg44 +g45 +sssS'hg19' +p266 +(dp267 +g34 +S'NC_000005.9:g.118811425_118811428del' +p268 +sg36 +(dp269 +g38 +g56 +sg40 +S'GGTGA' +p270 +sg42 +S'118811422' +p271 +sg44 +g45 +ssssg65 +(dp272 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186221.1' +p273 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199292.1' +p274 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant288.txt b/VariantValidator/testing/testOutputsMasterITS/variant288.txt new file mode 100644 index 00000000..b855ac72 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant288.txt @@ -0,0 +1,758 @@ +(dp0 +S'NM_001292028.1:c.-110+1_-110+5del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 5, mRNA +p13 +sS'gene_symbol' +p14 +S'HSD17B4' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001278957.1:p.?' +p19 +sS'slr' +p20 +S'NP_001278957.1:p.?' +p21 +ssS'submitted_variant' +p22 +S'5-118811422-GGTGAG-G' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000005.9(NM_001292028.1):c.-110+1_-110+5del' +p25 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_001292028.1:c.-110+1_-110+5del' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'grch38' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000005.10:g.119475728_119475732del' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'5' +p39 +sS'ref' +p40 +S'GGGTGA' +p41 +sS'pos' +p42 +S'119475726' +p43 +sS'alt' +p44 +S'G' +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000005.9:g.118811423_118811427del' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'GGGTGA' +p50 +sg42 +S'118811421' +p51 +sg44 +g45 +sssS'hg38' +p52 +(dp53 +g34 +S'NC_000005.10:g.119475728_119475732del' +p54 +sg36 +(dp55 +g38 +S'chr5' +p56 +sg40 +S'GGGTGA' +p57 +sg42 +S'119475726' +p58 +sg44 +g45 +sssS'hg19' +p59 +(dp60 +g34 +S'NC_000005.9:g.118811423_118811427del' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +S'GGGTGA' +p63 +sg42 +S'118811421' +p64 +sg44 +g45 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1' +p70 +sssS'NM_000414.3:c.302+1_302+5del' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' +p74 +aS'RefSeqGene record not available' +p75 +asg9 +g4 +sg10 +(lp76 +sg12 +VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 2, mRNA +p77 +sg14 +S'HSD17B4' +p78 +sg16 +(dp79 +g18 +S'NP_000405.1:p.?' +p80 +sg20 +S'NP_000405.1:p.?' +p81 +ssg22 +g23 +sg24 +S'NC_000005.9(NM_000414.3):c.302+1_302+5del' +p82 +sg26 +g4 +sg27 +S'NM_000414.3:c.302+1_302+5del' +p83 +sg29 +g4 +sg30 +(dp84 +S'grch38' +p85 +(dp86 +g34 +S'NC_000005.10:g.119475728_119475732del' +p87 +sg36 +(dp88 +g38 +g39 +sg40 +S'GGGTGA' +p89 +sg42 +S'119475726' +p90 +sg44 +g45 +sssS'grch37' +p91 +(dp92 +g34 +S'NC_000005.9:g.118811423_118811427del' +p93 +sg36 +(dp94 +g38 +g39 +sg40 +S'GGGTGA' +p95 +sg42 +S'118811421' +p96 +sg44 +g45 +sssg52 +(dp97 +g34 +S'NC_000005.10:g.119475728_119475732del' +p98 +sg36 +(dp99 +g38 +g56 +sg40 +S'GGGTGA' +p100 +sg42 +S'119475726' +p101 +sg44 +g45 +sssS'hg19' +p102 +(dp103 +g34 +S'NC_000005.9:g.118811423_118811427del' +p104 +sg36 +(dp105 +g38 +g56 +sg40 +S'GGGTGA' +p106 +sg42 +S'118811421' +p107 +sg44 +g45 +ssssg65 +(dp108 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1' +p109 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3' +p110 +sssS'NM_001199291.2:c.377+1_377+5del' +p111 +(dp112 +g3 +g4 +sg5 +(lp113 +S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' +p114 +aS'RefSeqGene record not available' +p115 +asg9 +g4 +sg10 +(lp116 +sg12 +VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA +p117 +sg14 +S'HSD17B4' +p118 +sg16 +(dp119 +g18 +S'NP_001186220.1:p.?' +p120 +sg20 +S'NP_001186220.1:p.?' +p121 +ssg22 +g23 +sg24 +S'NC_000005.9(NM_001199291.2):c.377+1_377+5del' +p122 +sg26 +g4 +sg27 +S'NM_001199291.2:c.377+1_377+5del' +p123 +sg29 +g4 +sg30 +(dp124 +S'grch38' +p125 +(dp126 +g34 +S'NC_000005.10:g.119475728_119475732del' +p127 +sg36 +(dp128 +g38 +g39 +sg40 +S'GGGTGA' +p129 +sg42 +S'119475726' +p130 +sg44 +g45 +sssS'grch37' +p131 +(dp132 +g34 +S'NC_000005.9:g.118811423_118811427del' +p133 +sg36 +(dp134 +g38 +g39 +sg40 +S'GGGTGA' +p135 +sg42 +S'118811421' +p136 +sg44 +g45 +sssg52 +(dp137 +g34 +S'NC_000005.10:g.119475728_119475732del' +p138 +sg36 +(dp139 +g38 +g56 +sg40 +S'GGGTGA' +p140 +sg42 +S'119475726' +p141 +sg44 +g45 +sssS'hg19' +p142 +(dp143 +g34 +S'NC_000005.9:g.118811423_118811427del' +p144 +sg36 +(dp145 +g38 +g56 +sg40 +S'GGGTGA' +p146 +sg42 +S'118811421' +p147 +sg44 +g45 +ssssg65 +(dp148 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1' +p149 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2' +p150 +sssS'NM_001199292.1:c.248+1_248+5del' +p151 +(dp152 +g3 +g4 +sg5 +(lp153 +S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' +p154 +aS'RefSeqGene record not available' +p155 +asg9 +g4 +sg10 +(lp156 +sg12 +VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 3, mRNA +p157 +sg14 +S'HSD17B4' +p158 +sg16 +(dp159 +g18 +S'NP_001186221.1:p.?' +p160 +sg20 +S'NP_001186221.1:p.?' +p161 +ssg22 +g23 +sg24 +S'NC_000005.9(NM_001199292.1):c.248+1_248+5del' +p162 +sg26 +g4 +sg27 +S'NM_001199292.1:c.248+1_248+5del' +p163 +sg29 +g4 +sg30 +(dp164 +S'grch38' +p165 +(dp166 +g34 +S'NC_000005.10:g.119475728_119475732del' +p167 +sg36 +(dp168 +g38 +g39 +sg40 +S'GGGTGA' +p169 +sg42 +S'119475726' +p170 +sg44 +g45 +sssS'grch37' +p171 +(dp172 +g34 +S'NC_000005.9:g.118811423_118811427del' +p173 +sg36 +(dp174 +g38 +g39 +sg40 +S'GGGTGA' +p175 +sg42 +S'118811421' +p176 +sg44 +g45 +sssg52 +(dp177 +g34 +S'NC_000005.10:g.119475728_119475732del' +p178 +sg36 +(dp179 +g38 +g56 +sg40 +S'GGGTGA' +p180 +sg42 +S'119475726' +p181 +sg44 +g45 +sssS'hg19' +p182 +(dp183 +g34 +S'NC_000005.9:g.118811423_118811427del' +p184 +sg36 +(dp185 +g38 +g56 +sg40 +S'GGGTGA' +p186 +sg42 +S'118811421' +p187 +sg44 +g45 +ssssg65 +(dp188 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186221.1' +p189 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199292.1' +p190 +sssS'flag' +p191 +S'gene_variant' +p192 +sS'NM_001199291.1:c.377+1_377+5del' +p193 +(dp194 +g3 +g4 +sg5 +(lp195 +S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' +p196 +aS'A more recent version of the selected reference sequence NM_001199291.1 is available (NM_001199291.2)' +p197 +aS'NM_001199291.2:c.377+1_377+5del MUST be fully validated prior to use in reports' +p198 +aS'select_variants=NM_001199291.2:c.377+1_377+5del' +p199 +aS'RefSeqGene record not available' +p200 +asg9 +g4 +sg10 +(lp201 +sg12 +VHomo sapiens hydroxysteroid (17-beta) dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA +p202 +sg14 +S'HSD17B4' +p203 +sg16 +(dp204 +g18 +S'NP_001186220.1:p.?' +p205 +sg20 +S'NP_001186220.1:p.?' +p206 +ssg22 +g23 +sg24 +S'NC_000005.9(NM_001199291.1):c.377+1_377+5del' +p207 +sg26 +g4 +sg27 +S'NM_001199291.1:c.377+1_377+5del' +p208 +sg29 +g4 +sg30 +(dp209 +S'hg19' +p210 +(dp211 +g34 +S'NC_000005.9:g.118811423_118811427del' +p212 +sg36 +(dp213 +g38 +g56 +sg40 +S'GGGTGA' +p214 +sg42 +S'118811421' +p215 +sg44 +g45 +sssS'grch37' +p216 +(dp217 +g34 +S'NC_000005.9:g.118811423_118811427del' +p218 +sg36 +(dp219 +g38 +g39 +sg40 +S'GGGTGA' +p220 +sg42 +S'118811421' +p221 +sg44 +g45 +ssssg65 +(dp222 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1' +p223 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1' +p224 +sssS'NM_001292027.1:c.230+1_230+5del' +p225 +(dp226 +g3 +g4 +sg5 +(lp227 +S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' +p228 +aS'RefSeqGene record not available' +p229 +asg9 +g4 +sg10 +(lp230 +sg12 +VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 4, mRNA +p231 +sg14 +S'HSD17B4' +p232 +sg16 +(dp233 +g18 +S'NP_001278956.1:p.?' +p234 +sg20 +S'NP_001278956.1:p.?' +p235 +ssg22 +g23 +sg24 +S'NC_000005.9(NM_001292027.1):c.230+1_230+5del' +p236 +sg26 +g4 +sg27 +S'NM_001292027.1:c.230+1_230+5del' +p237 +sg29 +g4 +sg30 +(dp238 +S'grch38' +p239 +(dp240 +g34 +S'NC_000005.10:g.119475728_119475732del' +p241 +sg36 +(dp242 +g38 +g39 +sg40 +S'GGGTGA' +p243 +sg42 +S'119475726' +p244 +sg44 +g45 +sssS'grch37' +p245 +(dp246 +g34 +S'NC_000005.9:g.118811423_118811427del' +p247 +sg36 +(dp248 +g38 +g39 +sg40 +S'GGGTGA' +p249 +sg42 +S'118811421' +p250 +sg44 +g45 +sssg52 +(dp251 +g34 +S'NC_000005.10:g.119475728_119475732del' +p252 +sg36 +(dp253 +g38 +g56 +sg40 +S'GGGTGA' +p254 +sg42 +S'119475726' +p255 +sg44 +g45 +sssS'hg19' +p256 +(dp257 +g34 +S'NC_000005.9:g.118811423_118811427del' +p258 +sg36 +(dp259 +g38 +g56 +sg40 +S'GGGTGA' +p260 +sg42 +S'118811421' +p261 +sg44 +g45 +ssssg65 +(dp262 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278956.1' +p263 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292027.1' +p264 +sssS'metadata' +p265 +(dp266 +S'variantvalidator_hgvs_version' +p267 +S'1.1.3' +p268 +sS'uta_schema' +p269 +S'uta_20180821' +p270 +sS'seqrepo_db' +p271 +S'2018-08-21' +p272 +sS'variantvalidator_version' +p273 +S'v0.2' +p274 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant289.txt b/VariantValidator/testing/testOutputsMasterITS/variant289.txt new file mode 100644 index 00000000..0415f3d6 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant289.txt @@ -0,0 +1,414 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NR_110997.1:n.21del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000005.9:g.131705587CG>C automapped to NC_000005.9:g.131705590delG' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens MIR3936 host gene (MIR3936HG), long non-coding RNA +p15 +sS'gene_symbol' +p16 +S'MIR3936HG' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'Non-coding :n.' +p21 +sS'slr' +p22 +g21 +ssS'submitted_variant' +p23 +S'5-131705587-CG-C' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NR_110997.1:n.21del' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'grch38' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000005.10:g.132369898del' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'5' +p39 +sS'ref' +p40 +S'CG' +p41 +sS'pos' +p42 +S'132369895' +p43 +sS'alt' +p44 +S'C' +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000005.9:g.131705590del' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'CG' +p50 +sg42 +S'131705587' +p51 +sg44 +g45 +sssS'hg38' +p52 +(dp53 +g34 +S'NC_000005.10:g.132369898del' +p54 +sg36 +(dp55 +g38 +S'chr5' +p56 +sg40 +S'CG' +p57 +sg42 +S'132369895' +p58 +sg44 +g45 +sssS'hg19' +p59 +(dp60 +g34 +S'NC_000005.9:g.131705590del' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +S'CG' +p63 +sg42 +S'131705587' +p64 +sg44 +g45 +ssssS'reference_sequence_records' +p65 +(dp66 +S'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_110997.1' +p68 +sssS'NM_003060.3:c.-75del' +p69 +(dp70 +g5 +g6 +sg7 +(lp71 +S'NC_000005.9:g.131705587CG>C automapped to NC_000005.9:g.131705590delG' +p72 +aS'RefSeqGene record not available' +p73 +asg11 +g6 +sg12 +(lp74 +sg14 +VHomo sapiens solute carrier family 22 member 5 (SLC22A5), transcript variant 2, mRNA +p75 +sg16 +S'SLC22A5' +p76 +sg18 +(dp77 +g20 +S'NP_003051.1:p.?' +p78 +sg22 +S'NP_003051.1:p.?' +p79 +ssg23 +g24 +sg25 +g6 +sg26 +g6 +sg27 +S'NM_003060.3:c.-75del' +p80 +sg29 +g6 +sg30 +(dp81 +S'grch38' +p82 +(dp83 +g34 +S'NC_000005.10:g.132369898del' +p84 +sg36 +(dp85 +g38 +g39 +sg40 +S'CG' +p86 +sg42 +S'132369895' +p87 +sg44 +g45 +sssS'grch37' +p88 +(dp89 +g34 +S'NC_000005.9:g.131705590del' +p90 +sg36 +(dp91 +g38 +g39 +sg40 +S'CG' +p92 +sg42 +S'131705587' +p93 +sg44 +g45 +sssg52 +(dp94 +g34 +S'NC_000005.10:g.132369898del' +p95 +sg36 +(dp96 +g38 +g56 +sg40 +S'CG' +p97 +sg42 +S'132369895' +p98 +sg44 +g45 +sssS'hg19' +p99 +(dp100 +g34 +S'NC_000005.9:g.131705590del' +p101 +sg36 +(dp102 +g38 +g56 +sg40 +S'CG' +p103 +sg42 +S'131705587' +p104 +sg44 +g45 +ssssg65 +(dp105 +S'protein' +p106 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003051.1' +p107 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003060.3' +p108 +sssS'NM_001308122.1:c.-75del' +p109 +(dp110 +g5 +g6 +sg7 +(lp111 +S'NC_000005.9:g.131705587CG>C automapped to NC_000005.9:g.131705590delG' +p112 +aS'RefSeqGene record not available' +p113 +asg11 +g6 +sg12 +(lp114 +sg14 +VHomo sapiens solute carrier family 22 member 5 (SLC22A5), transcript variant 1, mRNA +p115 +sg16 +S'SLC22A5' +p116 +sg18 +(dp117 +g20 +S'NP_001295051.1:p.?' +p118 +sg22 +S'NP_001295051.1:p.?' +p119 +ssg23 +g24 +sg25 +g6 +sg26 +g6 +sg27 +S'NM_001308122.1:c.-75del' +p120 +sg29 +g6 +sg30 +(dp121 +S'grch38' +p122 +(dp123 +g34 +S'NC_000005.10:g.132369898del' +p124 +sg36 +(dp125 +g38 +g39 +sg40 +S'CG' +p126 +sg42 +S'132369895' +p127 +sg44 +g45 +sssS'grch37' +p128 +(dp129 +g34 +S'NC_000005.9:g.131705590del' +p130 +sg36 +(dp131 +g38 +g39 +sg40 +S'CG' +p132 +sg42 +S'131705587' +p133 +sg44 +g45 +sssg52 +(dp134 +g34 +S'NC_000005.10:g.132369898del' +p135 +sg36 +(dp136 +g38 +g56 +sg40 +S'CG' +p137 +sg42 +S'132369895' +p138 +sg44 +g45 +sssS'hg19' +p139 +(dp140 +g34 +S'NC_000005.9:g.131705590del' +p141 +sg36 +(dp142 +g38 +g56 +sg40 +S'CG' +p143 +sg42 +S'131705587' +p144 +sg44 +g45 +ssssg65 +(dp145 +g106 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295051.1' +p146 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308122.1' +p147 +sssS'metadata' +p148 +(dp149 +S'variantvalidator_hgvs_version' +p150 +S'1.1.3' +p151 +sS'uta_schema' +p152 +S'uta_20180821' +p153 +sS'seqrepo_db' +p154 +S'2018-08-21' +p155 +sS'variantvalidator_version' +p156 +S'v0.2' +p157 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant29.txt b/VariantValidator/testing/testOutputsMasterITS/variant29.txt new file mode 100644 index 00000000..a385ce0a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant29.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The given coordinate is outside the bounds of the reference sequence.' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NR_138595.1:n.-810_1071+1=' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant290.txt b/VariantValidator/testing/testOutputsMasterITS/variant290.txt new file mode 100644 index 00000000..e7d08e3b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant290.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_024577.3:c.2813A>G' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens SH3 domain and tetratricopeptide repeats 2 (SH3TC2), mRNA +p14 +sS'gene_symbol' +p15 +S'SH3TC2' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_078853.2:p.(His938Arg)' +p20 +sS'slr' +p21 +S'NP_078853.2:p.(H938R)' +p22 +ssS'submitted_variant' +p23 +S'5-148406482-T-C' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_024577.3:c.2813A>G' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'grch38' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000005.10:g.149026919T>C' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'5' +p39 +sS'ref' +p40 +VT +p41 +sS'pos' +p42 +S'149026919' +p43 +sS'alt' +p44 +VC +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000005.9:g.148406482T>C' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'148406482' +p50 +sg44 +g45 +sssS'hg38' +p51 +(dp52 +g34 +S'NC_000005.10:g.149026919T>C' +p53 +sg36 +(dp54 +g38 +S'chr5' +p55 +sg40 +g41 +sg42 +S'149026919' +p56 +sg44 +g45 +sssS'hg19' +p57 +(dp58 +g34 +S'NC_000005.9:g.148406482T>C' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'148406482' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_078853.2' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024577.3' +p67 +sssS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant291.txt b/VariantValidator/testing/testOutputsMasterITS/variant291.txt new file mode 100644 index 00000000..6fb68886 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant291.txt @@ -0,0 +1,176 @@ +(dp0 +S'NM_014845.5:c.123_124insCAG' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000006.11:g.110036337T>TCAG automapped to NC_000006.11:g.110036337_110036338insCAG' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens FIG4 phosphoinositide 5-phosphatase (FIG4), mRNA +p13 +sS'gene_symbol' +p14 +S'FIG4' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_055660.1:p.(Ile41_Asp42insGln)' +p19 +sS'slr' +p20 +S'NP_055660.1:p.(I41_D42insQ)' +p21 +ssS'submitted_variant' +p22 +S'6-110036337-T-TCAG' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_014845.5:c.123_124insCAG' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000006.11:g.110036337_110036338insCAG' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr6' +p38 +sS'ref' +p39 +S'T' +p40 +sS'pos' +p41 +S'110036337' +p42 +sS'alt' +p43 +S'TCAG' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000006.12:g.109715134_109715135insCAG' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'109715134' +p49 +sg43 +S'TCAG' +p50 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000006.11:g.110036337_110036338insCAG' +p53 +sg35 +(dp54 +g37 +S'6' +p55 +sg39 +g40 +sg41 +S'110036337' +p56 +sg43 +S'TCAG' +p57 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000006.12:g.109715134_109715135insCAG' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +g40 +sg41 +S'109715134' +p62 +sg43 +S'TCAG' +p63 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055660.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014845.5' +p69 +sssS'flag' +p70 +S'gene_variant' +p71 +sS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant292.txt b/VariantValidator/testing/testOutputsMasterITS/variant292.txt new file mode 100644 index 00000000..bf03b80b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant292.txt @@ -0,0 +1,176 @@ +(dp0 +S'NM_014845.5:c.124_126del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000006.11:g.110036337TGAT>T automapped to NC_000006.11:g.110036338_110036340delGAT' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens FIG4 phosphoinositide 5-phosphatase (FIG4), mRNA +p13 +sS'gene_symbol' +p14 +S'FIG4' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_055660.1:p.(Asp42del)' +p19 +sS'slr' +p20 +S'NP_055660.1:p.(D42del)' +p21 +ssS'submitted_variant' +p22 +S'6-110036337-TGAT-T' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_014845.5:c.124_126del' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000006.11:g.110036338_110036340del' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr6' +p38 +sS'ref' +p39 +S'TTGA' +p40 +sS'pos' +p41 +S'110036336' +p42 +sS'alt' +p43 +S'T' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000006.12:g.109715135_109715137del' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'TTGA' +p49 +sg41 +S'109715133' +p50 +sg43 +g44 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000006.11:g.110036338_110036340del' +p53 +sg35 +(dp54 +g37 +S'6' +p55 +sg39 +S'TTGA' +p56 +sg41 +S'110036336' +p57 +sg43 +g44 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000006.12:g.109715135_109715137del' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'TTGA' +p62 +sg41 +S'109715133' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055660.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014845.5' +p69 +sssS'flag' +p70 +S'gene_variant' +p71 +sS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant293.txt b/VariantValidator/testing/testOutputsMasterITS/variant293.txt new file mode 100644 index 00000000..0540b605 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant293.txt @@ -0,0 +1,286 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_182961.3:c.14018G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 1, mRNA +p14 +sS'gene_symbol' +p15 +S'SYNE1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_892006.3:p.(Arg4673Leu)' +p20 +sS'slr' +p21 +S'NP_892006.3:p.(R4673L)' +p22 +ssS'submitted_variant' +p23 +S'6-152651802-C-A' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_182961.3:c.14018G>T' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000006.11:g.152651802C>A' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr6' +p39 +sS'ref' +p40 +VC +p41 +sS'pos' +p42 +S'152651802' +p43 +sS'alt' +p44 +VA +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000006.12:g.152330667C>A' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'152330667' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000006.11:g.152651802C>A' +p53 +sg36 +(dp54 +g38 +S'6' +p55 +sg40 +g41 +sg42 +S'152651802' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000006.12:g.152330667C>A' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'152330667' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3' +p67 +sssS'NM_033071.3:c.13805G>T' +p68 +(dp69 +g5 +g6 +sg7 +(lp70 +S'RefSeqGene record not available' +p71 +asg10 +g6 +sg11 +(lp72 +sg13 +VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 2, mRNA +p73 +sg15 +S'SYNE1' +p74 +sg17 +(dp75 +g19 +S'NP_149062.1:p.(Arg4602Leu)' +p76 +sg21 +S'NP_149062.1:p.(R4602L)' +p77 +ssg23 +g24 +sg25 +g6 +sg26 +g6 +sg27 +S'NM_033071.3:c.13805G>T' +p78 +sg29 +g6 +sg30 +(dp79 +S'hg19' +p80 +(dp81 +g34 +S'NC_000006.11:g.152651802C>A' +p82 +sg36 +(dp83 +g38 +g39 +sg40 +g41 +sg42 +S'152651802' +p84 +sg44 +g45 +sssg46 +(dp85 +g34 +S'NC_000006.12:g.152330667C>A' +p86 +sg36 +(dp87 +g38 +g39 +sg40 +g41 +sg42 +S'152330667' +p88 +sg44 +g45 +sssS'grch37' +p89 +(dp90 +g34 +S'NC_000006.11:g.152651802C>A' +p91 +sg36 +(dp92 +g38 +g55 +sg40 +g41 +sg42 +S'152651802' +p93 +sg44 +g45 +sssS'grch38' +p94 +(dp95 +g34 +S'NC_000006.12:g.152330667C>A' +p96 +sg36 +(dp97 +g38 +g55 +sg40 +g41 +sg42 +S'152330667' +p98 +sg44 +g45 +ssssg62 +(dp99 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1' +p100 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3' +p101 +sssS'metadata' +p102 +(dp103 +S'variantvalidator_hgvs_version' +p104 +S'1.1.3' +p105 +sS'uta_schema' +p106 +S'uta_20180821' +p107 +sS'seqrepo_db' +p108 +S'2018-08-21' +p109 +sS'variantvalidator_version' +p110 +S'v0.2' +p111 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant294.txt b/VariantValidator/testing/testOutputsMasterITS/variant294.txt new file mode 100644 index 00000000..662b8068 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant294.txt @@ -0,0 +1,286 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_033071.3:c.5950G>C' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 2, mRNA +p24 +sS'gene_symbol' +p25 +S'SYNE1' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_149062.1:p.(Ala1984Pro)' +p30 +sS'slr' +p31 +S'NP_149062.1:p.(A1984P)' +p32 +ssS'submitted_variant' +p33 +S'6-152737643-C-G' +p34 +sS'genome_context_intronic_sequence' +p35 +g16 +sS'hgvs_lrg_variant' +p36 +g16 +sS'hgvs_transcript_variant' +p37 +S'NM_033071.3:c.5950G>C' +p38 +sS'hgvs_refseqgene_variant' +p39 +g16 +sS'primary_assembly_loci' +p40 +(dp41 +S'hg19' +p42 +(dp43 +S'hgvs_genomic_description' +p44 +S'NC_000006.11:g.152737643C>G' +p45 +sS'vcf' +p46 +(dp47 +S'chr' +p48 +S'chr6' +p49 +sS'ref' +p50 +VC +p51 +sS'pos' +p52 +S'152737643' +p53 +sS'alt' +p54 +VG +p55 +sssS'hg38' +p56 +(dp57 +g44 +S'NC_000006.12:g.152416508C>G' +p58 +sg46 +(dp59 +g48 +g49 +sg50 +g51 +sg52 +S'152416508' +p60 +sg54 +g55 +sssS'grch37' +p61 +(dp62 +g44 +S'NC_000006.11:g.152737643C>G' +p63 +sg46 +(dp64 +g48 +S'6' +p65 +sg50 +g51 +sg52 +S'152737643' +p66 +sg54 +g55 +sssS'grch38' +p67 +(dp68 +g44 +S'NC_000006.12:g.152416508C>G' +p69 +sg46 +(dp70 +g48 +g65 +sg50 +g51 +sg52 +S'152416508' +p71 +sg54 +g55 +ssssS'reference_sequence_records' +p72 +(dp73 +S'protein' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1' +p75 +sS'transcript' +p76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3' +p77 +sssS'NM_182961.3:c.5929G>C' +p78 +(dp79 +g15 +g16 +sg17 +(lp80 +S'RefSeqGene record not available' +p81 +asg20 +g16 +sg21 +(lp82 +sg23 +VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 1, mRNA +p83 +sg25 +S'SYNE1' +p84 +sg27 +(dp85 +g29 +S'NP_892006.3:p.(Ala1977Pro)' +p86 +sg31 +S'NP_892006.3:p.(A1977P)' +p87 +ssg33 +g34 +sg35 +g16 +sg36 +g16 +sg37 +S'NM_182961.3:c.5929G>C' +p88 +sg39 +g16 +sg40 +(dp89 +S'hg19' +p90 +(dp91 +g44 +S'NC_000006.11:g.152737643C>G' +p92 +sg46 +(dp93 +g48 +g49 +sg50 +g51 +sg52 +S'152737643' +p94 +sg54 +g55 +sssg56 +(dp95 +g44 +S'NC_000006.12:g.152416508C>G' +p96 +sg46 +(dp97 +g48 +g49 +sg50 +g51 +sg52 +S'152416508' +p98 +sg54 +g55 +sssS'grch37' +p99 +(dp100 +g44 +S'NC_000006.11:g.152737643C>G' +p101 +sg46 +(dp102 +g48 +g65 +sg50 +g51 +sg52 +S'152737643' +p103 +sg54 +g55 +sssS'grch38' +p104 +(dp105 +g44 +S'NC_000006.12:g.152416508C>G' +p106 +sg46 +(dp107 +g48 +g65 +sg50 +g51 +sg52 +S'152416508' +p108 +sg54 +g55 +ssssg72 +(dp109 +g74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3' +p110 +sg76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3' +p111 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant295.txt b/VariantValidator/testing/testOutputsMasterITS/variant295.txt new file mode 100644 index 00000000..11551fc0 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant295.txt @@ -0,0 +1,1944 @@ +(dp0 +S'NM_001322005.1:c.1216A>G' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 4, mRNA +p12 +sS'gene_symbol' +p13 +S'PMS2' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001308934.1:p.(Lys406Glu)' +p18 +sS'slr' +p19 +S'NP_001308934.1:p.(K406E)' +p20 +ssS'submitted_variant' +p21 +S'7-6026775-T-C' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001322005.1:c.1216A>G' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000007.13:g.6026775T>C' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr7' +p37 +sS'ref' +p38 +VT +p39 +sS'pos' +p40 +S'6026775' +p41 +sS'alt' +p42 +VC +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000007.14:g.5987144T>C' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000007.13:g.6026775T>C' +p51 +sg34 +(dp52 +g36 +S'7' +p53 +sg38 +g39 +sg40 +S'6026775' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000007.14:g.5987144T>C' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308934.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322005.1' +p65 +sssS'NM_001322012.1:c.688A>G' +p66 +(dp67 +g3 +g4 +sg5 +(lp68 +S'RefSeqGene record not available' +p69 +asg8 +g4 +sg9 +(lp70 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 11, mRNA +p71 +sg13 +S'PMS2' +p72 +sg15 +(dp73 +g17 +S'NP_001308941.1:p.(Lys230Glu)' +p74 +sg19 +S'NP_001308941.1:p.(K230E)' +p75 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322012.1:c.688A>G' +p76 +sg27 +g4 +sg28 +(dp77 +S'hg19' +p78 +(dp79 +g32 +S'NC_000007.13:g.6026775T>C' +p80 +sg34 +(dp81 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p82 +sg42 +g43 +sssg44 +(dp83 +g32 +S'NC_000007.14:g.5987144T>C' +p84 +sg34 +(dp85 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p86 +sg42 +g43 +sssS'grch37' +p87 +(dp88 +g32 +S'NC_000007.13:g.6026775T>C' +p89 +sg34 +(dp90 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p91 +sg42 +g43 +sssS'grch38' +p92 +(dp93 +g32 +S'NC_000007.14:g.5987144T>C' +p94 +sg34 +(dp95 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p96 +sg42 +g43 +ssssg60 +(dp97 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308941.1' +p98 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322012.1' +p99 +sssS'NM_001322010.1:c.1060A>G' +p100 +(dp101 +g3 +g4 +sg5 +(lp102 +S'RefSeqGene record not available' +p103 +asg8 +g4 +sg9 +(lp104 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 9, mRNA +p105 +sg13 +S'PMS2' +p106 +sg15 +(dp107 +g17 +S'NP_001308939.1:p.(Lys354Glu)' +p108 +sg19 +S'NP_001308939.1:p.(K354E)' +p109 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322010.1:c.1060A>G' +p110 +sg27 +g4 +sg28 +(dp111 +S'hg19' +p112 +(dp113 +g32 +S'NC_000007.13:g.6026775T>C' +p114 +sg34 +(dp115 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p116 +sg42 +g43 +sssg44 +(dp117 +g32 +S'NC_000007.14:g.5987144T>C' +p118 +sg34 +(dp119 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p120 +sg42 +g43 +sssS'grch37' +p121 +(dp122 +g32 +S'NC_000007.13:g.6026775T>C' +p123 +sg34 +(dp124 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p125 +sg42 +g43 +sssS'grch38' +p126 +(dp127 +g32 +S'NC_000007.14:g.5987144T>C' +p128 +sg34 +(dp129 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p130 +sg42 +g43 +ssssg60 +(dp131 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308939.1' +p132 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322010.1' +p133 +sssS'NM_001322015.1:c.1312A>G' +p134 +(dp135 +g3 +g4 +sg5 +(lp136 +S'RefSeqGene record not available' +p137 +asg8 +g4 +sg9 +(lp138 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 14, mRNA +p139 +sg13 +S'PMS2' +p140 +sg15 +(dp141 +g17 +S'NP_001308944.1:p.(Lys438Glu)' +p142 +sg19 +S'NP_001308944.1:p.(K438E)' +p143 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322015.1:c.1312A>G' +p144 +sg27 +g4 +sg28 +(dp145 +S'hg19' +p146 +(dp147 +g32 +S'NC_000007.13:g.6026775T>C' +p148 +sg34 +(dp149 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p150 +sg42 +g43 +sssg44 +(dp151 +g32 +S'NC_000007.14:g.5987144T>C' +p152 +sg34 +(dp153 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p154 +sg42 +g43 +sssS'grch37' +p155 +(dp156 +g32 +S'NC_000007.13:g.6026775T>C' +p157 +sg34 +(dp158 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p159 +sg42 +g43 +sssS'grch38' +p160 +(dp161 +g32 +S'NC_000007.14:g.5987144T>C' +p162 +sg34 +(dp163 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p164 +sg42 +g43 +ssssg60 +(dp165 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308944.1' +p166 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322015.1' +p167 +sssS'NM_001322003.1:c.1216A>G' +p168 +(dp169 +g3 +g4 +sg5 +(lp170 +S'RefSeqGene record not available' +p171 +asg8 +g4 +sg9 +(lp172 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 2, mRNA +p173 +sg13 +S'PMS2' +p174 +sg15 +(dp175 +g17 +S'NP_001308932.1:p.(Lys406Glu)' +p176 +sg19 +S'NP_001308932.1:p.(K406E)' +p177 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322003.1:c.1216A>G' +p178 +sg27 +g4 +sg28 +(dp179 +S'hg19' +p180 +(dp181 +g32 +S'NC_000007.13:g.6026775T>C' +p182 +sg34 +(dp183 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p184 +sg42 +g43 +sssg44 +(dp185 +g32 +S'NC_000007.14:g.5987144T>C' +p186 +sg34 +(dp187 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p188 +sg42 +g43 +sssS'grch37' +p189 +(dp190 +g32 +S'NC_000007.13:g.6026775T>C' +p191 +sg34 +(dp192 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p193 +sg42 +g43 +sssS'grch38' +p194 +(dp195 +g32 +S'NC_000007.14:g.5987144T>C' +p196 +sg34 +(dp197 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p198 +sg42 +g43 +ssssg60 +(dp199 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308932.1' +p200 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322003.1' +p201 +sssS'NM_001322014.1:c.1621A>G' +p202 +(dp203 +g3 +g4 +sg5 +(lp204 +S'RefSeqGene record not available' +p205 +asg8 +g4 +sg9 +(lp206 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 13, mRNA +p207 +sg13 +S'PMS2' +p208 +sg15 +(dp209 +g17 +S'NP_001308943.1:p.(Lys541Glu)' +p210 +sg19 +S'NP_001308943.1:p.(K541E)' +p211 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322014.1:c.1621A>G' +p212 +sg27 +g4 +sg28 +(dp213 +S'hg19' +p214 +(dp215 +g32 +S'NC_000007.13:g.6026775T>C' +p216 +sg34 +(dp217 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p218 +sg42 +g43 +sssg44 +(dp219 +g32 +S'NC_000007.14:g.5987144T>C' +p220 +sg34 +(dp221 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p222 +sg42 +g43 +sssS'grch37' +p223 +(dp224 +g32 +S'NC_000007.13:g.6026775T>C' +p225 +sg34 +(dp226 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p227 +sg42 +g43 +sssS'grch38' +p228 +(dp229 +g32 +S'NC_000007.14:g.5987144T>C' +p230 +sg34 +(dp231 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p232 +sg42 +g43 +ssssg60 +(dp233 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308943.1' +p234 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322014.1' +p235 +sssS'NM_001322004.1:c.1216A>G' +p236 +(dp237 +g3 +g4 +sg5 +(lp238 +S'RefSeqGene record not available' +p239 +asg8 +g4 +sg9 +(lp240 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 3, mRNA +p241 +sg13 +S'PMS2' +p242 +sg15 +(dp243 +g17 +S'NP_001308933.1:p.(Lys406Glu)' +p244 +sg19 +S'NP_001308933.1:p.(K406E)' +p245 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322004.1:c.1216A>G' +p246 +sg27 +g4 +sg28 +(dp247 +S'hg19' +p248 +(dp249 +g32 +S'NC_000007.13:g.6026775T>C' +p250 +sg34 +(dp251 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p252 +sg42 +g43 +sssg44 +(dp253 +g32 +S'NC_000007.14:g.5987144T>C' +p254 +sg34 +(dp255 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p256 +sg42 +g43 +sssS'grch37' +p257 +(dp258 +g32 +S'NC_000007.13:g.6026775T>C' +p259 +sg34 +(dp260 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p261 +sg42 +g43 +sssS'grch38' +p262 +(dp263 +g32 +S'NC_000007.14:g.5987144T>C' +p264 +sg34 +(dp265 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p266 +sg42 +g43 +ssssg60 +(dp267 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308933.1' +p268 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322004.1' +p269 +sssS'NM_001322008.1:c.1303A>G' +p270 +(dp271 +g3 +g4 +sg5 +(lp272 +S'RefSeqGene record not available' +p273 +asg8 +g4 +sg9 +(lp274 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 7, mRNA +p275 +sg13 +S'PMS2' +p276 +sg15 +(dp277 +g17 +S'NP_001308937.1:p.(Lys435Glu)' +p278 +sg19 +S'NP_001308937.1:p.(K435E)' +p279 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322008.1:c.1303A>G' +p280 +sg27 +g4 +sg28 +(dp281 +S'hg19' +p282 +(dp283 +g32 +S'NC_000007.13:g.6026775T>C' +p284 +sg34 +(dp285 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p286 +sg42 +g43 +sssg44 +(dp287 +g32 +S'NC_000007.14:g.5987144T>C' +p288 +sg34 +(dp289 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p290 +sg42 +g43 +sssS'grch37' +p291 +(dp292 +g32 +S'NC_000007.13:g.6026775T>C' +p293 +sg34 +(dp294 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p295 +sg42 +g43 +sssS'grch38' +p296 +(dp297 +g32 +S'NC_000007.14:g.5987144T>C' +p298 +sg34 +(dp299 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p300 +sg42 +g43 +ssssg60 +(dp301 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308937.1' +p302 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322008.1' +p303 +sssS'NM_001322006.1:c.1465A>G' +p304 +(dp305 +g3 +g4 +sg5 +(lp306 +S'RefSeqGene record not available' +p307 +asg8 +g4 +sg9 +(lp308 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 5, mRNA +p309 +sg13 +S'PMS2' +p310 +sg15 +(dp311 +g17 +S'NP_001308935.1:p.(Lys489Glu)' +p312 +sg19 +S'NP_001308935.1:p.(K489E)' +p313 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322006.1:c.1465A>G' +p314 +sg27 +g4 +sg28 +(dp315 +S'hg19' +p316 +(dp317 +g32 +S'NC_000007.13:g.6026775T>C' +p318 +sg34 +(dp319 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p320 +sg42 +g43 +sssg44 +(dp321 +g32 +S'NC_000007.14:g.5987144T>C' +p322 +sg34 +(dp323 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p324 +sg42 +g43 +sssS'grch37' +p325 +(dp326 +g32 +S'NC_000007.13:g.6026775T>C' +p327 +sg34 +(dp328 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p329 +sg42 +g43 +sssS'grch38' +p330 +(dp331 +g32 +S'NC_000007.14:g.5987144T>C' +p332 +sg34 +(dp333 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p334 +sg42 +g43 +ssssg60 +(dp335 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308935.1' +p336 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322006.1' +p337 +sssS'NM_001322013.1:c.1048A>G' +p338 +(dp339 +g3 +g4 +sg5 +(lp340 +S'RefSeqGene record not available' +p341 +asg8 +g4 +sg9 +(lp342 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 12, mRNA +p343 +sg13 +S'PMS2' +p344 +sg15 +(dp345 +g17 +S'NP_001308942.1:p.(Lys350Glu)' +p346 +sg19 +S'NP_001308942.1:p.(K350E)' +p347 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322013.1:c.1048A>G' +p348 +sg27 +g4 +sg28 +(dp349 +S'hg19' +p350 +(dp351 +g32 +S'NC_000007.13:g.6026775T>C' +p352 +sg34 +(dp353 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p354 +sg42 +g43 +sssg44 +(dp355 +g32 +S'NC_000007.14:g.5987144T>C' +p356 +sg34 +(dp357 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p358 +sg42 +g43 +sssS'grch37' +p359 +(dp360 +g32 +S'NC_000007.13:g.6026775T>C' +p361 +sg34 +(dp362 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p363 +sg42 +g43 +sssS'grch38' +p364 +(dp365 +g32 +S'NC_000007.14:g.5987144T>C' +p366 +sg34 +(dp367 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p368 +sg42 +g43 +ssssg60 +(dp369 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308942.1' +p370 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322013.1' +p371 +sssS'NM_001322009.1:c.1216A>G' +p372 +(dp373 +g3 +g4 +sg5 +(lp374 +S'RefSeqGene record not available' +p375 +asg8 +g4 +sg9 +(lp376 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 8, mRNA +p377 +sg13 +S'PMS2' +p378 +sg15 +(dp379 +g17 +S'NP_001308938.1:p.(Lys406Glu)' +p380 +sg19 +S'NP_001308938.1:p.(K406E)' +p381 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322009.1:c.1216A>G' +p382 +sg27 +g4 +sg28 +(dp383 +S'hg19' +p384 +(dp385 +g32 +S'NC_000007.13:g.6026775T>C' +p386 +sg34 +(dp387 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p388 +sg42 +g43 +sssg44 +(dp389 +g32 +S'NC_000007.14:g.5987144T>C' +p390 +sg34 +(dp391 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p392 +sg42 +g43 +sssS'grch37' +p393 +(dp394 +g32 +S'NC_000007.13:g.6026775T>C' +p395 +sg34 +(dp396 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p397 +sg42 +g43 +sssS'grch38' +p398 +(dp399 +g32 +S'NC_000007.14:g.5987144T>C' +p400 +sg34 +(dp401 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p402 +sg42 +g43 +ssssg60 +(dp403 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308938.1' +p404 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322009.1' +p405 +sssS'NR_003085.2:n.1703G=' +p406 +(dp407 +g3 +g4 +sg5 +(lp408 +S'RefSeqGene record not available' +p409 +asg8 +g4 +sg9 +(lp410 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 2, non-coding RNA +p411 +sg13 +S'PMS2' +p412 +sg15 +(dp413 +g17 +S'Non-coding :n.' +p414 +sg19 +g414 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NR_003085.2:n.1703G=' +p415 +sg27 +g4 +sg28 +(dp416 +S'hg19' +p417 +(dp418 +g32 +S'NC_000007.13:g.6026775T>C' +p419 +sg34 +(dp420 +g36 +g37 +sg38 +S'T' +p421 +sg40 +S'6026775' +p422 +sg42 +g43 +sssS'grch37' +p423 +(dp424 +g32 +S'NC_000007.13:g.6026775T>C' +p425 +sg34 +(dp426 +g36 +g53 +sg38 +g421 +sg40 +S'6026775' +p427 +sg42 +g43 +ssssg60 +(dp428 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_003085.2' +p429 +sssS'flag' +p430 +S'gene_variant' +p431 +sS'NM_000535.5:c.1621G=' +p432 +(dp433 +g3 +g4 +sg5 +(lp434 +S'A more recent version of the selected reference sequence NM_000535.5 is available (NM_000535.6)' +p435 +aS'NM_000535.6:c.1621A>G MUST be fully validated prior to use in reports' +p436 +aS'select_variants=NM_000535.6:c.1621A>G' +p437 +aS'RefSeqGene record not available' +p438 +asg8 +g4 +sg9 +(lp439 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 1, mRNA +p440 +sg13 +S'PMS2' +p441 +sg15 +(dp442 +g17 +S'NP_000526.1:p.(Glu541=)' +p443 +sg19 +S'NP_000526.1:p.(E541=)' +p444 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_000535.5:c.1621G=' +p445 +sg27 +g4 +sg28 +(dp446 +S'hg19' +p447 +(dp448 +g32 +S'NC_000007.13:g.6026775T>C' +p449 +sg34 +(dp450 +g36 +g37 +sg38 +g421 +sg40 +S'6026775' +p451 +sg42 +g43 +sssS'grch37' +p452 +(dp453 +g32 +S'NC_000007.13:g.6026775T>C' +p454 +sg34 +(dp455 +g36 +g53 +sg38 +g421 +sg40 +S'6026775' +p456 +sg42 +g43 +ssssg60 +(dp457 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.1' +p458 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.5' +p459 +sssS'NM_001322007.1:c.1303A>G' +p460 +(dp461 +g3 +g4 +sg5 +(lp462 +S'RefSeqGene record not available' +p463 +asg8 +g4 +sg9 +(lp464 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 6, mRNA +p465 +sg13 +S'PMS2' +p466 +sg15 +(dp467 +g17 +S'NP_001308936.1:p.(Lys435Glu)' +p468 +sg19 +S'NP_001308936.1:p.(K435E)' +p469 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322007.1:c.1303A>G' +p470 +sg27 +g4 +sg28 +(dp471 +S'hg19' +p472 +(dp473 +g32 +S'NC_000007.13:g.6026775T>C' +p474 +sg34 +(dp475 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p476 +sg42 +g43 +sssg44 +(dp477 +g32 +S'NC_000007.14:g.5987144T>C' +p478 +sg34 +(dp479 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p480 +sg42 +g43 +sssS'grch37' +p481 +(dp482 +g32 +S'NC_000007.13:g.6026775T>C' +p483 +sg34 +(dp484 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p485 +sg42 +g43 +sssS'grch38' +p486 +(dp487 +g32 +S'NC_000007.14:g.5987144T>C' +p488 +sg34 +(dp489 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p490 +sg42 +g43 +ssssg60 +(dp491 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308936.1' +p492 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322007.1' +p493 +sssS'NR_136154.1:n.1708A>G' +p494 +(dp495 +g3 +g4 +sg5 +(lp496 +S'RefSeqGene record not available' +p497 +asg8 +g4 +sg9 +(lp498 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 15, non-coding RNA +p499 +sg13 +S'PMS2' +p500 +sg15 +(dp501 +g17 +S'Non-coding :n.' +p502 +sg19 +g502 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NR_136154.1:n.1708A>G' +p503 +sg27 +g4 +sg28 +(dp504 +S'hg19' +p505 +(dp506 +g32 +S'NC_000007.13:g.6026775T>C' +p507 +sg34 +(dp508 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p509 +sg42 +g43 +sssg44 +(dp510 +g32 +S'NC_000007.14:g.5987144T>C' +p511 +sg34 +(dp512 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p513 +sg42 +g43 +sssS'grch37' +p514 +(dp515 +g32 +S'NC_000007.13:g.6026775T>C' +p516 +sg34 +(dp517 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p518 +sg42 +g43 +sssS'grch38' +p519 +(dp520 +g32 +S'NC_000007.14:g.5987144T>C' +p521 +sg34 +(dp522 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p523 +sg42 +g43 +ssssg60 +(dp524 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_136154.1' +p525 +sssS'NM_001322011.1:c.688A>G' +p526 +(dp527 +g3 +g4 +sg5 +(lp528 +S'RefSeqGene record not available' +p529 +asg8 +g4 +sg9 +(lp530 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 10, mRNA +p531 +sg13 +S'PMS2' +p532 +sg15 +(dp533 +g17 +S'NP_001308940.1:p.(Lys230Glu)' +p534 +sg19 +S'NP_001308940.1:p.(K230E)' +p535 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001322011.1:c.688A>G' +p536 +sg27 +g4 +sg28 +(dp537 +S'hg19' +p538 +(dp539 +g32 +S'NC_000007.13:g.6026775T>C' +p540 +sg34 +(dp541 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p542 +sg42 +g43 +sssg44 +(dp543 +g32 +S'NC_000007.14:g.5987144T>C' +p544 +sg34 +(dp545 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p546 +sg42 +g43 +sssS'grch37' +p547 +(dp548 +g32 +S'NC_000007.13:g.6026775T>C' +p549 +sg34 +(dp550 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p551 +sg42 +g43 +sssS'grch38' +p552 +(dp553 +g32 +S'NC_000007.14:g.5987144T>C' +p554 +sg34 +(dp555 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p556 +sg42 +g43 +ssssg60 +(dp557 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308940.1' +p558 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322011.1' +p559 +sssS'NM_000535.6:c.1621A>G' +p560 +(dp561 +g3 +g4 +sg5 +(lp562 +S'RefSeqGene record not available' +p563 +asg8 +g4 +sg9 +(lp564 +sg11 +VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 1, mRNA +p565 +sg13 +S'PMS2' +p566 +sg15 +(dp567 +g17 +S'NP_000526.2:p.(Lys541Glu)' +p568 +sg19 +S'NP_000526.2:p.(K541E)' +p569 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_000535.6:c.1621A>G' +p570 +sg27 +g4 +sg28 +(dp571 +S'hg19' +p572 +(dp573 +g32 +S'NC_000007.13:g.6026775T>C' +p574 +sg34 +(dp575 +g36 +g37 +sg38 +g39 +sg40 +S'6026775' +p576 +sg42 +g43 +sssg44 +(dp577 +g32 +S'NC_000007.14:g.5987144T>C' +p578 +sg34 +(dp579 +g36 +g37 +sg38 +g39 +sg40 +S'5987144' +p580 +sg42 +g43 +sssS'grch37' +p581 +(dp582 +g32 +S'NC_000007.13:g.6026775T>C' +p583 +sg34 +(dp584 +g36 +g53 +sg38 +g39 +sg40 +S'6026775' +p585 +sg42 +g43 +sssS'grch38' +p586 +(dp587 +g32 +S'NC_000007.14:g.5987144T>C' +p588 +sg34 +(dp589 +g36 +g53 +sg38 +g39 +sg40 +S'5987144' +p590 +sg42 +g43 +ssssg60 +(dp591 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.2' +p592 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.6' +p593 +sssS'metadata' +p594 +(dp595 +S'variantvalidator_hgvs_version' +p596 +S'1.1.3' +p597 +sS'uta_schema' +p598 +S'uta_20180821' +p599 +sS'seqrepo_db' +p600 +S'2018-08-21' +p601 +sS'variantvalidator_version' +p602 +S'v0.2' +p603 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant296.txt b/VariantValidator/testing/testOutputsMasterITS/variant296.txt new file mode 100644 index 00000000..479838ba --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant296.txt @@ -0,0 +1,908 @@ +(dp0 +S'NM_001346900.1:c.2077_2091del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 8, mRNA +p13 +sS'gene_symbol' +p14 +S'EGFR' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001333829.1:p.(Glu693_Ala697del)' +p19 +sS'slr' +p20 +S'NP_001333829.1:p.(E693_A697del)' +p21 +ssS'submitted_variant' +p22 +S'7-55242465-GGAATTAAGAGAAGCA-G' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001346900.1:c.2077_2091del' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000007.13:g.55242466_55242480del' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr7' +p38 +sS'ref' +p39 +S'GGAATTAAGAGAAGCA' +p40 +sS'pos' +p41 +S'55242465' +p42 +sS'alt' +p43 +S'G' +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000007.14:g.55174773_55174787del' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p49 +sg41 +S'55174772' +p50 +sg43 +g44 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000007.13:g.55242466_55242480del' +p53 +sg35 +(dp54 +g37 +S'7' +p55 +sg39 +S'GGAATTAAGAGAAGCA' +p56 +sg41 +S'55242465' +p57 +sg43 +g44 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000007.14:g.55174773_55174787del' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p62 +sg41 +S'55174772' +p63 +sg43 +g44 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1' +p69 +sssS'NM_001346898.1:c.2236_2250del' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' +p73 +aS'RefSeqGene record not available' +p74 +asg9 +g4 +sg10 +(lp75 +sg12 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 6, mRNA +p76 +sg14 +S'EGFR' +p77 +sg16 +(dp78 +g18 +S'NP_001333827.1:p.(Glu746_Ala750del)' +p79 +sg20 +S'NP_001333827.1:p.(E746_A750del)' +p80 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001346898.1:c.2236_2250del' +p81 +sg28 +g4 +sg29 +(dp82 +S'hg19' +p83 +(dp84 +g33 +S'NC_000007.13:g.55242466_55242480del' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p87 +sg41 +S'55242465' +p88 +sg43 +g44 +sssg45 +(dp89 +g33 +S'NC_000007.14:g.55174773_55174787del' +p90 +sg35 +(dp91 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p92 +sg41 +S'55174772' +p93 +sg43 +g44 +sssS'grch37' +p94 +(dp95 +g33 +S'NC_000007.13:g.55242466_55242480del' +p96 +sg35 +(dp97 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p98 +sg41 +S'55242465' +p99 +sg43 +g44 +sssS'grch38' +p100 +(dp101 +g33 +S'NC_000007.14:g.55174773_55174787del' +p102 +sg35 +(dp103 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p104 +sg41 +S'55174772' +p105 +sg43 +g44 +ssssg64 +(dp106 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1' +p107 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1' +p108 +sssS'NM_001346941.1:c.1435_1449del' +p109 +(dp110 +g3 +g4 +sg5 +(lp111 +S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' +p112 +aS'RefSeqGene record not available' +p113 +asg9 +g4 +sg10 +(lp114 +sg12 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant EGFRvIII, mRNA +p115 +sg14 +S'EGFR' +p116 +sg16 +(dp117 +g18 +S'NP_001333870.1:p.(Glu479_Ala483del)' +p118 +sg20 +S'NP_001333870.1:p.(E479_A483del)' +p119 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001346941.1:c.1435_1449del' +p120 +sg28 +g4 +sg29 +(dp121 +S'hg19' +p122 +(dp123 +g33 +S'NC_000007.13:g.55242466_55242480del' +p124 +sg35 +(dp125 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p126 +sg41 +S'55242465' +p127 +sg43 +g44 +sssg45 +(dp128 +g33 +S'NC_000007.14:g.55174773_55174787del' +p129 +sg35 +(dp130 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p131 +sg41 +S'55174772' +p132 +sg43 +g44 +sssS'grch37' +p133 +(dp134 +g33 +S'NC_000007.13:g.55242466_55242480del' +p135 +sg35 +(dp136 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p137 +sg41 +S'55242465' +p138 +sg43 +g44 +sssS'grch38' +p139 +(dp140 +g33 +S'NC_000007.14:g.55174773_55174787del' +p141 +sg35 +(dp142 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p143 +sg41 +S'55174772' +p144 +sg43 +g44 +ssssg64 +(dp145 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1' +p146 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1' +p147 +sssS'flag' +p148 +S'gene_variant' +p149 +sS'NM_001346899.1:c.2101_2115del' +p150 +(dp151 +g3 +g4 +sg5 +(lp152 +S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' +p153 +aS'RefSeqGene record not available' +p154 +asg9 +g4 +sg10 +(lp155 +sg12 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 7, mRNA +p156 +sg14 +S'EGFR' +p157 +sg16 +(dp158 +g18 +S'NP_001333828.1:p.(Glu701_Ala705del)' +p159 +sg20 +S'NP_001333828.1:p.(E701_A705del)' +p160 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001346899.1:c.2101_2115del' +p161 +sg28 +g4 +sg29 +(dp162 +S'hg19' +p163 +(dp164 +g33 +S'NC_000007.13:g.55242466_55242480del' +p165 +sg35 +(dp166 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p167 +sg41 +S'55242465' +p168 +sg43 +g44 +sssg45 +(dp169 +g33 +S'NC_000007.14:g.55174773_55174787del' +p170 +sg35 +(dp171 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p172 +sg41 +S'55174772' +p173 +sg43 +g44 +sssS'grch37' +p174 +(dp175 +g33 +S'NC_000007.13:g.55242466_55242480del' +p176 +sg35 +(dp177 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p178 +sg41 +S'55242465' +p179 +sg43 +g44 +sssS'grch38' +p180 +(dp181 +g33 +S'NC_000007.14:g.55174773_55174787del' +p182 +sg35 +(dp183 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p184 +sg41 +S'55174772' +p185 +sg43 +g44 +ssssg64 +(dp186 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1' +p187 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1' +p188 +sssS'NM_001346897.1:c.2101_2115del' +p189 +(dp190 +g3 +g4 +sg5 +(lp191 +S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' +p192 +aS'RefSeqGene record not available' +p193 +asg9 +g4 +sg10 +(lp194 +sg12 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 5, mRNA +p195 +sg14 +S'EGFR' +p196 +sg16 +(dp197 +g18 +S'NP_001333826.1:p.(Glu701_Ala705del)' +p198 +sg20 +S'NP_001333826.1:p.(E701_A705del)' +p199 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001346897.1:c.2101_2115del' +p200 +sg28 +g4 +sg29 +(dp201 +S'hg19' +p202 +(dp203 +g33 +S'NC_000007.13:g.55242466_55242480del' +p204 +sg35 +(dp205 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p206 +sg41 +S'55242465' +p207 +sg43 +g44 +sssg45 +(dp208 +g33 +S'NC_000007.14:g.55174773_55174787del' +p209 +sg35 +(dp210 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p211 +sg41 +S'55174772' +p212 +sg43 +g44 +sssS'grch37' +p213 +(dp214 +g33 +S'NC_000007.13:g.55242466_55242480del' +p215 +sg35 +(dp216 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p217 +sg41 +S'55242465' +p218 +sg43 +g44 +sssS'grch38' +p219 +(dp220 +g33 +S'NC_000007.14:g.55174773_55174787del' +p221 +sg35 +(dp222 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p223 +sg41 +S'55174772' +p224 +sg43 +g44 +ssssg64 +(dp225 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333826.1' +p226 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346897.1' +p227 +sssS'NM_005228.3:c.2236_2250del' +p228 +(dp229 +g3 +g4 +sg5 +(lp230 +S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' +p231 +aS'A more recent version of the selected reference sequence NM_005228.3 is available (NM_005228.4)' +p232 +aS'NM_005228.4:c.2236_2250del MUST be fully validated prior to use in reports' +p233 +aS'select_variants=NM_005228.4:c.2236_2250del' +p234 +aS'RefSeqGene record not available' +p235 +asg9 +g4 +sg10 +(lp236 +sg12 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA +p237 +sg14 +S'EGFR' +p238 +sg16 +(dp239 +g18 +S'NP_005219.2:p.(Glu746_Ala750del)' +p240 +sg20 +S'NP_005219.2:p.(E746_A750del)' +p241 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_005228.3:c.2236_2250del' +p242 +sg28 +g4 +sg29 +(dp243 +S'hg19' +p244 +(dp245 +g33 +S'NC_000007.13:g.55242466_55242480del' +p246 +sg35 +(dp247 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p248 +sg41 +S'55242465' +p249 +sg43 +g44 +sssg45 +(dp250 +g33 +S'NC_000007.14:g.55174773_55174787del' +p251 +sg35 +(dp252 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p253 +sg41 +S'55174772' +p254 +sg43 +g44 +sssS'grch37' +p255 +(dp256 +g33 +S'NC_000007.13:g.55242466_55242480del' +p257 +sg35 +(dp258 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p259 +sg41 +S'55242465' +p260 +sg43 +g44 +sssS'grch38' +p261 +(dp262 +g33 +S'NC_000007.14:g.55174773_55174787del' +p263 +sg35 +(dp264 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p265 +sg41 +S'55174772' +p266 +sg43 +g44 +ssssg64 +(dp267 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2' +p268 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3' +p269 +sssS'NM_005228.4:c.2236_2250del' +p270 +(dp271 +g3 +g4 +sg5 +(lp272 +S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' +p273 +aS'RefSeqGene record not available' +p274 +asg9 +g4 +sg10 +(lp275 +sg12 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA +p276 +sg14 +S'EGFR' +p277 +sg16 +(dp278 +g18 +S'NP_005219.2:p.(Glu746_Ala750del)' +p279 +sg20 +S'NP_005219.2:p.(E746_A750del)' +p280 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_005228.4:c.2236_2250del' +p281 +sg28 +g4 +sg29 +(dp282 +S'hg19' +p283 +(dp284 +g33 +S'NC_000007.13:g.55242466_55242480del' +p285 +sg35 +(dp286 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p287 +sg41 +S'55242465' +p288 +sg43 +g44 +sssg45 +(dp289 +g33 +S'NC_000007.14:g.55174773_55174787del' +p290 +sg35 +(dp291 +g37 +g38 +sg39 +S'GGAATTAAGAGAAGCA' +p292 +sg41 +S'55174772' +p293 +sg43 +g44 +sssS'grch37' +p294 +(dp295 +g33 +S'NC_000007.13:g.55242466_55242480del' +p296 +sg35 +(dp297 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p298 +sg41 +S'55242465' +p299 +sg43 +g44 +sssS'grch38' +p300 +(dp301 +g33 +S'NC_000007.14:g.55174773_55174787del' +p302 +sg35 +(dp303 +g37 +g55 +sg39 +S'GGAATTAAGAGAAGCA' +p304 +sg41 +S'55174772' +p305 +sg43 +g44 +ssssg64 +(dp306 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2' +p307 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.4' +p308 +sssS'metadata' +p309 +(dp310 +S'variantvalidator_hgvs_version' +p311 +S'1.1.3' +p312 +sS'uta_schema' +p313 +S'uta_20180821' +p314 +sS'seqrepo_db' +p315 +S'2018-08-21' +p316 +sS'variantvalidator_version' +p317 +S'v0.2' +p318 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant297.txt b/VariantValidator/testing/testOutputsMasterITS/variant297.txt new file mode 100644 index 00000000..14000cc1 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant297.txt @@ -0,0 +1,1063 @@ +(dp0 +S'NM_005228.3:c.2284-5_2290dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' +p7 +aS'A more recent version of the selected reference sequence NM_005228.3 is available (NM_005228.4)' +p8 +aS'NM_005228.4:c.2284-5_2290dupTCCAGGAAGCCT MUST be fully validated prior to use in reports' +p9 +aS'select_variants=NM_005228.4:c.2284-5_2290dup' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA +p16 +sS'gene_symbol' +p17 +S'EGFR' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_005219.2:p.?' +p22 +sS'slr' +p23 +S'NP_005219.2:p.?' +p24 +ssS'submitted_variant' +p25 +S'7-55248992-T-TTCCAGGAAGCCT' +p26 +sS'genome_context_intronic_sequence' +p27 +S'NC_000007.13(NM_005228.3):c.2284-5_2290dup' +p28 +sS'hgvs_lrg_variant' +p29 +g4 +sS'hgvs_transcript_variant' +p30 +S'NM_005228.3:c.2284-5_2290dup' +p31 +sS'hgvs_refseqgene_variant' +p32 +g4 +sS'primary_assembly_loci' +p33 +(dp34 +S'hg19' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000007.13:g.55248981_55248992dup' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'chr7' +p42 +sS'ref' +p43 +S'TCCAGGAAGCCT' +p44 +sS'pos' +p45 +S'55248981' +p46 +sS'alt' +p47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p48 +sssS'hg38' +p49 +(dp50 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p51 +sg39 +(dp52 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p53 +sg45 +S'55181288' +p54 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p55 +sssS'grch37' +p56 +(dp57 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p58 +sg39 +(dp59 +g41 +S'7' +p60 +sg43 +S'TCCAGGAAGCCT' +p61 +sg45 +S'55248981' +p62 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p63 +sssS'grch38' +p64 +(dp65 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p66 +sg39 +(dp67 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p68 +sg45 +S'55181288' +p69 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p70 +ssssS'reference_sequence_records' +p71 +(dp72 +S'protein' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2' +p74 +sS'transcript' +p75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3' +p76 +sssS'NM_001346899.1:c.2149-5_2155dup' +p77 +(dp78 +g3 +g4 +sg5 +(lp79 +S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' +p80 +aS'RefSeqGene record not available' +p81 +asg12 +g4 +sg13 +(lp82 +sg15 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 7, mRNA +p83 +sg17 +S'EGFR' +p84 +sg19 +(dp85 +g21 +S'NP_001333828.1:p.?' +p86 +sg23 +S'NP_001333828.1:p.?' +p87 +ssg25 +g26 +sg27 +S'NC_000007.13(NM_001346899.1):c.2149-5_2155dup' +p88 +sg29 +g4 +sg30 +S'NM_001346899.1:c.2149-5_2155dup' +p89 +sg32 +g4 +sg33 +(dp90 +S'hg19' +p91 +(dp92 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p93 +sg39 +(dp94 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p95 +sg45 +S'55248981' +p96 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p97 +sssg49 +(dp98 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p99 +sg39 +(dp100 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p101 +sg45 +S'55181288' +p102 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p103 +sssS'grch37' +p104 +(dp105 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p106 +sg39 +(dp107 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p108 +sg45 +S'55248981' +p109 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p110 +sssS'grch38' +p111 +(dp112 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p113 +sg39 +(dp114 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p115 +sg45 +S'55181288' +p116 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p117 +ssssg71 +(dp118 +g73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1' +p119 +sg75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1' +p120 +sssS'NM_005228.4:c.2284-5_2290dup' +p121 +(dp122 +g3 +g4 +sg5 +(lp123 +S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' +p124 +aS'RefSeqGene record not available' +p125 +asg12 +g4 +sg13 +(lp126 +sg15 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA +p127 +sg17 +S'EGFR' +p128 +sg19 +(dp129 +g21 +S'NP_005219.2:p.?' +p130 +sg23 +S'NP_005219.2:p.?' +p131 +ssg25 +g26 +sg27 +S'NC_000007.13(NM_005228.4):c.2284-5_2290dup' +p132 +sg29 +g4 +sg30 +S'NM_005228.4:c.2284-5_2290dup' +p133 +sg32 +g4 +sg33 +(dp134 +S'hg19' +p135 +(dp136 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p137 +sg39 +(dp138 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p139 +sg45 +S'55248981' +p140 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p141 +sssg49 +(dp142 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p143 +sg39 +(dp144 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p145 +sg45 +S'55181288' +p146 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p147 +sssS'grch37' +p148 +(dp149 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p150 +sg39 +(dp151 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p152 +sg45 +S'55248981' +p153 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p154 +sssS'grch38' +p155 +(dp156 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p157 +sg39 +(dp158 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p159 +sg45 +S'55181288' +p160 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p161 +ssssg71 +(dp162 +g73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2' +p163 +sg75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.4' +p164 +sssS'NM_001346898.1:c.2284-5_2290dup' +p165 +(dp166 +g3 +g4 +sg5 +(lp167 +S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' +p168 +aS'RefSeqGene record not available' +p169 +asg12 +g4 +sg13 +(lp170 +sg15 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 6, mRNA +p171 +sg17 +S'EGFR' +p172 +sg19 +(dp173 +g21 +S'NP_001333827.1:p.?' +p174 +sg23 +S'NP_001333827.1:p.?' +p175 +ssg25 +g26 +sg27 +S'NC_000007.13(NM_001346898.1):c.2284-5_2290dup' +p176 +sg29 +g4 +sg30 +S'NM_001346898.1:c.2284-5_2290dup' +p177 +sg32 +g4 +sg33 +(dp178 +S'hg19' +p179 +(dp180 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p181 +sg39 +(dp182 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p183 +sg45 +S'55248981' +p184 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p185 +sssg49 +(dp186 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p187 +sg39 +(dp188 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p189 +sg45 +S'55181288' +p190 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p191 +sssS'grch37' +p192 +(dp193 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p194 +sg39 +(dp195 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p196 +sg45 +S'55248981' +p197 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p198 +sssS'grch38' +p199 +(dp200 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p201 +sg39 +(dp202 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p203 +sg45 +S'55181288' +p204 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p205 +ssssg71 +(dp206 +g73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1' +p207 +sg75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1' +p208 +sssS'NM_001346941.1:c.1483-5_1489dup' +p209 +(dp210 +g3 +g4 +sg5 +(lp211 +S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' +p212 +aS'RefSeqGene record not available' +p213 +asg12 +g4 +sg13 +(lp214 +sg15 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant EGFRvIII, mRNA +p215 +sg17 +S'EGFR' +p216 +sg19 +(dp217 +g21 +S'NP_001333870.1:p.?' +p218 +sg23 +S'NP_001333870.1:p.?' +p219 +ssg25 +g26 +sg27 +S'NC_000007.13(NM_001346941.1):c.1483-5_1489dup' +p220 +sg29 +g4 +sg30 +S'NM_001346941.1:c.1483-5_1489dup' +p221 +sg32 +g4 +sg33 +(dp222 +S'hg19' +p223 +(dp224 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p225 +sg39 +(dp226 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p227 +sg45 +S'55248981' +p228 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p229 +sssg49 +(dp230 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p231 +sg39 +(dp232 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p233 +sg45 +S'55181288' +p234 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p235 +sssS'grch37' +p236 +(dp237 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p238 +sg39 +(dp239 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p240 +sg45 +S'55248981' +p241 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p242 +sssS'grch38' +p243 +(dp244 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p245 +sg39 +(dp246 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p247 +sg45 +S'55181288' +p248 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p249 +ssssg71 +(dp250 +g73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1' +p251 +sg75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1' +p252 +sssS'flag' +p253 +S'gene_variant' +p254 +sS'NM_001346900.1:c.2125-5_2131dup' +p255 +(dp256 +g3 +g4 +sg5 +(lp257 +S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' +p258 +aS'RefSeqGene record not available' +p259 +asg12 +g4 +sg13 +(lp260 +sg15 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 8, mRNA +p261 +sg17 +S'EGFR' +p262 +sg19 +(dp263 +g21 +S'NP_001333829.1:p.?' +p264 +sg23 +S'NP_001333829.1:p.?' +p265 +ssg25 +g26 +sg27 +S'NC_000007.13(NM_001346900.1):c.2125-5_2131dup' +p266 +sg29 +g4 +sg30 +S'NM_001346900.1:c.2125-5_2131dup' +p267 +sg32 +g4 +sg33 +(dp268 +S'hg19' +p269 +(dp270 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p271 +sg39 +(dp272 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p273 +sg45 +S'55248981' +p274 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p275 +sssg49 +(dp276 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p277 +sg39 +(dp278 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p279 +sg45 +S'55181288' +p280 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p281 +sssS'grch37' +p282 +(dp283 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p284 +sg39 +(dp285 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p286 +sg45 +S'55248981' +p287 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p288 +sssS'grch38' +p289 +(dp290 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p291 +sg39 +(dp292 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p293 +sg45 +S'55181288' +p294 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p295 +ssssg71 +(dp296 +g73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1' +p297 +sg75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1' +p298 +sssS'NR_047551.1:n.1272_1283dup' +p299 +(dp300 +g3 +g4 +sg5 +(lp301 +S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' +p302 +aS'RefSeqGene record not available' +p303 +asg12 +g4 +sg13 +(lp304 +sg15 +VHomo sapiens EGFR antisense RNA 1 (EGFR-AS1), long non-coding RNA +p305 +sg17 +S'EGFR-AS1' +p306 +sg19 +(dp307 +g21 +S'Non-coding :n.' +p308 +sg23 +g308 +ssg25 +g26 +sg27 +g4 +sg29 +g4 +sg30 +S'NR_047551.1:n.1272_1283dup' +p309 +sg32 +g4 +sg33 +(dp310 +S'hg19' +p311 +(dp312 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p313 +sg39 +(dp314 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p315 +sg45 +S'55248981' +p316 +sg47 +VTCCAGGAAGCCTTCCAGGAAGCCT +p317 +sssg49 +(dp318 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p319 +sg39 +(dp320 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p321 +sg45 +S'55181288' +p322 +sg47 +VTCCAGGAAGCCTTCCAGGAAGCCT +p323 +sssS'grch37' +p324 +(dp325 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p326 +sg39 +(dp327 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p328 +sg45 +S'55248981' +p329 +sg47 +VTCCAGGAAGCCTTCCAGGAAGCCT +p330 +sssS'grch38' +p331 +(dp332 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p333 +sg39 +(dp334 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p335 +sg45 +S'55181288' +p336 +sg47 +VTCCAGGAAGCCTTCCAGGAAGCCT +p337 +ssssg71 +(dp338 +g75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_047551.1' +p339 +sssS'NM_001346897.1:c.2149-5_2155dup' +p340 +(dp341 +g3 +g4 +sg5 +(lp342 +S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' +p343 +aS'RefSeqGene record not available' +p344 +asg12 +g4 +sg13 +(lp345 +sg15 +VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 5, mRNA +p346 +sg17 +S'EGFR' +p347 +sg19 +(dp348 +g21 +S'NP_001333826.1:p.?' +p349 +sg23 +S'NP_001333826.1:p.?' +p350 +ssg25 +g26 +sg27 +S'NC_000007.13(NM_001346897.1):c.2149-5_2155dup' +p351 +sg29 +g4 +sg30 +S'NM_001346897.1:c.2149-5_2155dup' +p352 +sg32 +g4 +sg33 +(dp353 +S'hg19' +p354 +(dp355 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p356 +sg39 +(dp357 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p358 +sg45 +S'55248981' +p359 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p360 +sssg49 +(dp361 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p362 +sg39 +(dp363 +g41 +g42 +sg43 +S'TCCAGGAAGCCT' +p364 +sg45 +S'55181288' +p365 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p366 +sssS'grch37' +p367 +(dp368 +g37 +S'NC_000007.13:g.55248981_55248992dup' +p369 +sg39 +(dp370 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p371 +sg45 +S'55248981' +p372 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p373 +sssS'grch38' +p374 +(dp375 +g37 +S'NC_000007.14:g.55181288_55181299dup' +p376 +sg39 +(dp377 +g41 +g60 +sg43 +S'TCCAGGAAGCCT' +p378 +sg45 +S'55181288' +p379 +sg47 +S'TCCAGGAAGCCTTCCAGGAAGCCT' +p380 +ssssg71 +(dp381 +g73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333826.1' +p382 +sg75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346897.1' +p383 +sssS'metadata' +p384 +(dp385 +S'variantvalidator_hgvs_version' +p386 +S'1.1.3' +p387 +sS'uta_schema' +p388 +S'uta_20180821' +p389 +sS'seqrepo_db' +p390 +S'2018-08-21' +p391 +sS'variantvalidator_version' +p392 +S'v0.2' +p393 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant298.txt b/VariantValidator/testing/testOutputsMasterITS/variant298.txt new file mode 100644 index 00000000..7951ff48 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant298.txt @@ -0,0 +1,292 @@ +(dp0 +S'NM_001540.4:c.82C>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens heat shock protein family B (small) member 1 (HSPB1), mRNA +p12 +sS'gene_symbol' +p13 +S'HSPB1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001531.1:p.(Leu28Ile)' +p18 +sS'slr' +p19 +S'NP_001531.1:p.(L28I)' +p20 +ssS'submitted_variant' +p21 +S'7-75932111-C-A' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001540.4:c.82C>A' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000007.13:g.75932111C>A' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr7' +p37 +sS'ref' +p38 +S'C' +p39 +sS'pos' +p40 +S'75932111' +p41 +sS'alt' +p42 +S'A' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000007.14:g.76302794C>A' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'76302794' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000007.13:g.75932111C>A' +p51 +sg34 +(dp52 +g36 +S'7' +p53 +sg38 +g39 +sg40 +S'75932111' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000007.14:g.76302794C>A' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'76302794' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001531.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001540.4' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ssS'NM_001540.3:c.82C>A' +p78 +(dp79 +g3 +g4 +sg5 +(lp80 +S'A more recent version of the selected reference sequence NM_001540.3 is available (NM_001540.4)' +p81 +aS'NM_001540.4:c.82C>A MUST be fully validated prior to use in reports' +p82 +aS'select_variants=NM_001540.4:c.82C>A' +p83 +aS'RefSeqGene record not available' +p84 +asg8 +g4 +sg9 +(lp85 +sg11 +VHomo sapiens heat shock protein family B (small) member 1 (HSPB1), mRNA +p86 +sg13 +S'HSPB1' +p87 +sg15 +(dp88 +g17 +S'NP_001531.1:p.(Leu28Ile)' +p89 +sg19 +S'NP_001531.1:p.(L28I)' +p90 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001540.3:c.82C>A' +p91 +sg27 +g4 +sg28 +(dp92 +S'hg19' +p93 +(dp94 +g32 +S'NC_000007.13:g.75932111C>A' +p95 +sg34 +(dp96 +g36 +g37 +sg38 +g39 +sg40 +S'75932111' +p97 +sg42 +g43 +sssg44 +(dp98 +g32 +S'NC_000007.14:g.76302794C>A' +p99 +sg34 +(dp100 +g36 +g37 +sg38 +g39 +sg40 +S'76302794' +p101 +sg42 +g43 +sssS'grch37' +p102 +(dp103 +g32 +S'NC_000007.13:g.75932111C>A' +p104 +sg34 +(dp105 +g36 +g53 +sg38 +g39 +sg40 +S'75932111' +p106 +sg42 +g43 +sssS'grch38' +p107 +(dp108 +g32 +S'NC_000007.14:g.76302794C>A' +p109 +sg34 +(dp110 +g36 +g53 +sg38 +g39 +sg40 +S'76302794' +p111 +sg42 +g43 +ssssg60 +(dp112 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001531.1' +p113 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001540.3' +p114 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant299.txt b/VariantValidator/testing/testOutputsMasterITS/variant299.txt new file mode 100644 index 00000000..d507da8c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant299.txt @@ -0,0 +1,304 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_005751.4:c.4004_4006dup' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'NC_000007.13:g.91652178A>AAAC automapped to NC_000007.13:g.91652179_91652181dupAAC' +p19 +aS'RefSeqGene record not available' +p20 +asS'refseqgene_context_intronic_sequence' +p21 +g16 +sS'alt_genomic_loci' +p22 +(lp23 +sS'transcript_description' +p24 +VHomo sapiens A-kinase anchoring protein 9 (AKAP9), transcript variant 2, mRNA +p25 +sS'gene_symbol' +p26 +S'AKAP9' +p27 +sS'hgvs_predicted_protein_consequence' +p28 +(dp29 +S'tlr' +p30 +S'NP_005742.4:p.(Lys1335_Leu1336insGln)' +p31 +sS'slr' +p32 +S'NP_005742.4:p.(K1335_L1336insQ)' +p33 +ssS'submitted_variant' +p34 +S'7-91652178-A-AAAC' +p35 +sS'genome_context_intronic_sequence' +p36 +g16 +sS'hgvs_lrg_variant' +p37 +g16 +sS'hgvs_transcript_variant' +p38 +S'NM_005751.4:c.4004_4006dup' +p39 +sS'hgvs_refseqgene_variant' +p40 +g16 +sS'primary_assembly_loci' +p41 +(dp42 +S'hg19' +p43 +(dp44 +S'hgvs_genomic_description' +p45 +S'NC_000007.13:g.91652179_91652181dup' +p46 +sS'vcf' +p47 +(dp48 +S'chr' +p49 +S'chr7' +p50 +sS'ref' +p51 +S'AAC' +p52 +sS'pos' +p53 +S'91652179' +p54 +sS'alt' +p55 +S'AACAAC' +p56 +sssS'hg38' +p57 +(dp58 +g45 +S'NC_000007.14:g.92022865_92022867dup' +p59 +sg47 +(dp60 +g49 +g50 +sg51 +S'AAC' +p61 +sg53 +S'92022865' +p62 +sg55 +S'AACAAC' +p63 +sssS'grch37' +p64 +(dp65 +g45 +S'NC_000007.13:g.91652179_91652181dup' +p66 +sg47 +(dp67 +g49 +S'7' +p68 +sg51 +S'AAC' +p69 +sg53 +S'91652179' +p70 +sg55 +S'AACAAC' +p71 +sssS'grch38' +p72 +(dp73 +g45 +S'NC_000007.14:g.92022865_92022867dup' +p74 +sg47 +(dp75 +g49 +g68 +sg51 +S'AAC' +p76 +sg53 +S'92022865' +p77 +sg55 +S'AACAAC' +p78 +ssssS'reference_sequence_records' +p79 +(dp80 +S'protein' +p81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005742.4' +p82 +sS'transcript' +p83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005751.4' +p84 +sssS'NM_147185.2:c.4004_4006dup' +p85 +(dp86 +g15 +g16 +sg17 +(lp87 +S'NC_000007.13:g.91652178A>AAAC automapped to NC_000007.13:g.91652179_91652181dupAAC' +p88 +aS'RefSeqGene record not available' +p89 +asg21 +g16 +sg22 +(lp90 +sg24 +VHomo sapiens A-kinase anchoring protein 9 (AKAP9), transcript variant 3, mRNA +p91 +sg26 +S'AKAP9' +p92 +sg28 +(dp93 +g30 +S'NP_671714.1:p.(Lys1335_Leu1336insGln)' +p94 +sg32 +S'NP_671714.1:p.(K1335_L1336insQ)' +p95 +ssg34 +g35 +sg36 +g16 +sg37 +g16 +sg38 +S'NM_147185.2:c.4004_4006dup' +p96 +sg40 +g16 +sg41 +(dp97 +S'hg19' +p98 +(dp99 +g45 +S'NC_000007.13:g.91652179_91652181dup' +p100 +sg47 +(dp101 +g49 +g50 +sg51 +S'AAC' +p102 +sg53 +S'91652179' +p103 +sg55 +S'AACAAC' +p104 +sssg57 +(dp105 +g45 +S'NC_000007.14:g.92022865_92022867dup' +p106 +sg47 +(dp107 +g49 +g50 +sg51 +S'AAC' +p108 +sg53 +S'92022865' +p109 +sg55 +S'AACAAC' +p110 +sssS'grch37' +p111 +(dp112 +g45 +S'NC_000007.13:g.91652179_91652181dup' +p113 +sg47 +(dp114 +g49 +g68 +sg51 +S'AAC' +p115 +sg53 +S'91652179' +p116 +sg55 +S'AACAAC' +p117 +sssS'grch38' +p118 +(dp119 +g45 +S'NC_000007.14:g.92022865_92022867dup' +p120 +sg47 +(dp121 +g49 +g68 +sg51 +S'AAC' +p122 +sg53 +S'92022865' +p123 +sg55 +S'AACAAC' +p124 +ssssg79 +(dp125 +g81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_671714.1' +p126 +sg83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_147185.2' +p127 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant3.txt b/VariantValidator/testing/testOutputsMasterITS/variant3.txt new file mode 100644 index 00000000..7efe2ac2 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant3.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_015120.4:c.34C>T' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA +p24 +sS'gene_symbol' +p25 +S'ALMS1' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_055935.4:p.(Leu12=)' +p30 +sS'slr' +p31 +S'NP_055935.4:p.(L12=)' +p32 +ssS'submitted_variant' +p33 +S'NM_015120.4:c.34C>T' +p34 +sS'genome_context_intronic_sequence' +p35 +g16 +sS'hgvs_lrg_variant' +p36 +g16 +sS'hgvs_transcript_variant' +p37 +S'NM_015120.4:c.34C>T' +p38 +sS'hgvs_refseqgene_variant' +p39 +g16 +sS'primary_assembly_loci' +p40 +(dp41 +S'hg19' +p42 +(dp43 +S'hgvs_genomic_description' +p44 +S'NC_000002.11:g.73613030C>T' +p45 +sS'vcf' +p46 +(dp47 +S'chr' +p48 +S'chr2' +p49 +sS'ref' +p50 +VC +p51 +sS'pos' +p52 +S'73613030' +p53 +sS'alt' +p54 +VT +p55 +sssS'hg38' +p56 +(dp57 +g44 +S'NC_000002.12:g.73385902C>T' +p58 +sg46 +(dp59 +g48 +g49 +sg50 +g51 +sg52 +S'73385902' +p60 +sg54 +g55 +sssS'grch37' +p61 +(dp62 +g44 +S'NC_000002.11:g.73613030C>T' +p63 +sg46 +(dp64 +g48 +S'2' +p65 +sg50 +g51 +sg52 +S'73613030' +p66 +sg54 +g55 +sssS'grch38' +p67 +(dp68 +g44 +S'NC_000002.12:g.73385902C>T' +p69 +sg46 +(dp70 +g48 +g65 +sg50 +g51 +sg52 +S'73385902' +p71 +sg54 +g55 +ssssS'reference_sequence_records' +p72 +(dp73 +S'protein' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' +p75 +sS'transcript' +p76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' +p77 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant30.txt b/VariantValidator/testing/testOutputsMasterITS/variant30.txt new file mode 100644 index 00000000..277e90b3 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant30.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.*1400_*1406=' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.?' +p20 +sS'slr' +p21 +S'NP_000079.2:p.?' +p22 +ssS'submitted_variant' +p23 +S'NC_000017.10:g.48261457_48261463TTATGTT=' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000088.3:c.*1400_*1406=' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000017.10:g.48261457_48261463=' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr17' +p39 +sS'ref' +p40 +VTTATGTT +p41 +sS'pos' +p42 +S'48261457' +p43 +sS'alt' +p44 +g41 +sssS'hg38' +p45 +(dp46 +g34 +S'NC_000017.11:g.50184096_50184102=' +p47 +sg36 +(dp48 +g38 +g39 +sg40 +VTTATGTT +p49 +sg42 +S'50184096' +p50 +sg44 +g49 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000017.10:g.48261457_48261463=' +p53 +sg36 +(dp54 +g38 +S'17' +p55 +sg40 +g41 +sg42 +S'48261457' +p56 +sg44 +g41 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000017.11:g.50184096_50184102=' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g49 +sg42 +S'50184096' +p61 +sg44 +g49 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p67 +sssS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant300.txt b/VariantValidator/testing/testOutputsMasterITS/variant300.txt new file mode 100644 index 00000000..e68159d5 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant300.txt @@ -0,0 +1,294 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NR_149084.1:n.221+1140_221+1142del' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'NC_000007.13:g.117199644ATCT>A automapped to NC_000007.13:g.117199646_117199648delCTT' +p19 +aS'RefSeqGene record not available' +p20 +asS'refseqgene_context_intronic_sequence' +p21 +g16 +sS'alt_genomic_loci' +p22 +(lp23 +sS'transcript_description' +p24 +VHomo sapiens CFTR antisense RNA 1 (CFTR-AS1), long non-coding RNA +p25 +sS'gene_symbol' +p26 +S'CFTR-AS1' +p27 +sS'hgvs_predicted_protein_consequence' +p28 +(dp29 +S'tlr' +p30 +S'Non-coding :n.' +p31 +sS'slr' +p32 +g31 +ssS'submitted_variant' +p33 +S'7-117199644-ATCT-A' +p34 +sS'genome_context_intronic_sequence' +p35 +S'NC_000007.13(NR_149084.1):c.221+1140_221+1142del' +p36 +sS'hgvs_lrg_variant' +p37 +g16 +sS'hgvs_transcript_variant' +p38 +S'NR_149084.1:n.221+1140_221+1142del' +p39 +sS'hgvs_refseqgene_variant' +p40 +g16 +sS'primary_assembly_loci' +p41 +(dp42 +S'hg19' +p43 +(dp44 +S'hgvs_genomic_description' +p45 +S'NC_000007.13:g.117199645_117199647del' +p46 +sS'vcf' +p47 +(dp48 +S'chr' +p49 +S'chr7' +p50 +sS'ref' +p51 +S'ATCT' +p52 +sS'pos' +p53 +S'117199644' +p54 +sS'alt' +p55 +S'A' +p56 +sssS'hg38' +p57 +(dp58 +g45 +S'NC_000007.14:g.117559591_117559593del' +p59 +sg47 +(dp60 +g49 +g50 +sg51 +S'ATCT' +p61 +sg53 +S'117559590' +p62 +sg55 +g56 +sssS'grch37' +p63 +(dp64 +g45 +S'NC_000007.13:g.117199645_117199647del' +p65 +sg47 +(dp66 +g49 +S'7' +p67 +sg51 +S'ATCT' +p68 +sg53 +S'117199644' +p69 +sg55 +g56 +sssS'grch38' +p70 +(dp71 +g45 +S'NC_000007.14:g.117559591_117559593del' +p72 +sg47 +(dp73 +g49 +g67 +sg51 +S'ATCT' +p74 +sg53 +S'117559590' +p75 +sg55 +g56 +ssssS'reference_sequence_records' +p76 +(dp77 +S'transcript' +p78 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_149084.1' +p79 +sssS'NM_000492.3:c.1521_1523del' +p80 +(dp81 +g15 +g16 +sg17 +(lp82 +S'NC_000007.13:g.117199644ATCT>A automapped to NC_000007.13:g.117199646_117199648delCTT' +p83 +aS'RefSeqGene record not available' +p84 +asg21 +g16 +sg22 +(lp85 +sg24 +VHomo sapiens cystic fibrosis transmembrane conductance regulator (CFTR), mRNA +p86 +sg26 +S'CFTR' +p87 +sg28 +(dp88 +g30 +S'NP_000483.3:p.(Phe508del)' +p89 +sg32 +S'NP_000483.3:p.(F508del)' +p90 +ssg33 +g34 +sg35 +g16 +sg37 +g16 +sg38 +S'NM_000492.3:c.1521_1523del' +p91 +sg40 +g16 +sg41 +(dp92 +S'hg19' +p93 +(dp94 +g45 +S'NC_000007.13:g.117199646_117199648del' +p95 +sg47 +(dp96 +g49 +g50 +sg51 +S'ATCT' +p97 +sg53 +S'117199644' +p98 +sg55 +g56 +sssg57 +(dp99 +g45 +S'NC_000007.14:g.117559592_117559594del' +p100 +sg47 +(dp101 +g49 +g50 +sg51 +S'ATCT' +p102 +sg53 +S'117559590' +p103 +sg55 +g56 +sssS'grch37' +p104 +(dp105 +g45 +S'NC_000007.13:g.117199646_117199648del' +p106 +sg47 +(dp107 +g49 +g67 +sg51 +S'ATCT' +p108 +sg53 +S'117199644' +p109 +sg55 +g56 +sssS'grch38' +p110 +(dp111 +g45 +S'NC_000007.14:g.117559592_117559594del' +p112 +sg47 +(dp113 +g49 +g67 +sg51 +S'ATCT' +p114 +sg53 +S'117559590' +p115 +sg55 +g56 +ssssg76 +(dp116 +S'protein' +p117 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000483.3' +p118 +sg78 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000492.3' +p119 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant301.txt b/VariantValidator/testing/testOutputsMasterITS/variant301.txt new file mode 100644 index 00000000..6cd8e292 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant301.txt @@ -0,0 +1,548 @@ +(dp0 +S'NR_148928.1:n.2896_2897delinsAG' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA +p13 +sS'gene_symbol' +p14 +S'BRAF' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'Non-coding :n.' +p19 +sS'slr' +p20 +g19 +ssS'submitted_variant' +p21 +S'7-140453136-AC-CT' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NR_148928.1:n.2896_2897delinsAG' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000007.13:g.140453136_140453137delinsCT' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr7' +p37 +sS'ref' +p38 +S'AC' +p39 +sS'pos' +p40 +S'140453136' +p41 +sS'alt' +p42 +VCT +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000007.14:g.140753336_140753337delinsCT' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +S'AC' +p48 +sg40 +S'140753336' +p49 +sg42 +VCT +p50 +sssS'grch37' +p51 +(dp52 +g32 +S'NC_000007.13:g.140453136_140453137delinsCT' +p53 +sg34 +(dp54 +g36 +S'7' +p55 +sg38 +S'AC' +p56 +sg40 +S'140453136' +p57 +sg42 +g43 +sssS'grch38' +p58 +(dp59 +g32 +S'NC_000007.14:g.140753336_140753337delinsCT' +p60 +sg34 +(dp61 +g36 +g55 +sg38 +S'AC' +p62 +sg40 +S'140753336' +p63 +sg42 +g50 +ssssS'reference_sequence_records' +p64 +(dp65 +S'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1' +p67 +sssS'NM_004333.4:c.1798_1799delinsAG' +p68 +(dp69 +g3 +g4 +sg5 +(lp70 +S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' +p71 +aS'A more recent version of the selected reference sequence NM_004333.4 is available (NM_004333.5)' +p72 +aS'NM_004333.5:c.1798_1799delGTinsAG MUST be fully validated prior to use in reports' +p73 +aS'select_variants=NM_004333.5:c.1798_1799delinsAG' +p74 +aS'RefSeqGene record not available' +p75 +asg9 +g4 +sg10 +(lp76 +sg12 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA +p77 +sg14 +S'BRAF' +p78 +sg16 +(dp79 +g18 +S'NP_004324.2:p.(Val600Arg)' +p80 +sg20 +S'NP_004324.2:p.(V600R)' +p81 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_004333.4:c.1798_1799delinsAG' +p82 +sg27 +g4 +sg28 +(dp83 +S'hg19' +p84 +(dp85 +g32 +S'NC_000007.13:g.140453136_140453137delinsCT' +p86 +sg34 +(dp87 +g36 +g37 +sg38 +S'AC' +p88 +sg40 +S'140453136' +p89 +sg42 +VCT +p90 +sssg44 +(dp91 +g32 +S'NC_000007.14:g.140753336_140753337delinsCT' +p92 +sg34 +(dp93 +g36 +g37 +sg38 +S'AC' +p94 +sg40 +S'140753336' +p95 +sg42 +VCT +p96 +sssS'grch37' +p97 +(dp98 +g32 +S'NC_000007.13:g.140453136_140453137delinsCT' +p99 +sg34 +(dp100 +g36 +g55 +sg38 +S'AC' +p101 +sg40 +S'140453136' +p102 +sg42 +g90 +sssS'grch38' +p103 +(dp104 +g32 +S'NC_000007.14:g.140753336_140753337delinsCT' +p105 +sg34 +(dp106 +g36 +g55 +sg38 +S'AC' +p107 +sg40 +S'140753336' +p108 +sg42 +g96 +ssssg64 +(dp109 +S'protein' +p110 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' +p111 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4' +p112 +sssS'NM_004333.5:c.1798_1799delinsAG' +p113 +(dp114 +g3 +g4 +sg5 +(lp115 +S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' +p116 +aS'RefSeqGene record not available' +p117 +asg9 +g4 +sg10 +(lp118 +sg12 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA +p119 +sg14 +S'BRAF' +p120 +sg16 +(dp121 +g18 +S'NP_004324.2:p.(Val600Arg)' +p122 +sg20 +S'NP_004324.2:p.(V600R)' +p123 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_004333.5:c.1798_1799delinsAG' +p124 +sg27 +g4 +sg28 +(dp125 +S'hg19' +p126 +(dp127 +g32 +S'NC_000007.13:g.140453136_140453137delinsCT' +p128 +sg34 +(dp129 +g36 +g37 +sg38 +S'AC' +p130 +sg40 +S'140453136' +p131 +sg42 +VCT +p132 +sssg44 +(dp133 +g32 +S'NC_000007.14:g.140753336_140753337delinsCT' +p134 +sg34 +(dp135 +g36 +g37 +sg38 +S'AC' +p136 +sg40 +S'140753336' +p137 +sg42 +VCT +p138 +sssS'grch37' +p139 +(dp140 +g32 +S'NC_000007.13:g.140453136_140453137delinsCT' +p141 +sg34 +(dp142 +g36 +g55 +sg38 +S'AC' +p143 +sg40 +S'140453136' +p144 +sg42 +g132 +sssS'grch38' +p145 +(dp146 +g32 +S'NC_000007.14:g.140753336_140753337delinsCT' +p147 +sg34 +(dp148 +g36 +g55 +sg38 +S'AC' +p149 +sg40 +S'140753336' +p150 +sg42 +g138 +ssssg64 +(dp151 +g110 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' +p152 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5' +p153 +sssS'flag' +p154 +S'gene_variant' +p155 +sS'NM_001354609.1:c.1798_1799delinsAG' +p156 +(dp157 +g3 +g4 +sg5 +(lp158 +S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' +p159 +aS'RefSeqGene record not available' +p160 +asg9 +g4 +sg10 +(lp161 +sg12 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA +p162 +sg14 +S'BRAF' +p163 +sg16 +(dp164 +g18 +S'NP_001341538.1:p.(Val600Arg)' +p165 +sg20 +S'NP_001341538.1:p.(V600R)' +p166 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001354609.1:c.1798_1799delinsAG' +p167 +sg27 +g4 +sg28 +(dp168 +S'hg19' +p169 +(dp170 +g32 +S'NC_000007.13:g.140453136_140453137delinsCT' +p171 +sg34 +(dp172 +g36 +g37 +sg38 +S'AC' +p173 +sg40 +S'140453136' +p174 +sg42 +VCT +p175 +sssg44 +(dp176 +g32 +S'NC_000007.14:g.140753336_140753337delinsCT' +p177 +sg34 +(dp178 +g36 +g37 +sg38 +S'AC' +p179 +sg40 +S'140753336' +p180 +sg42 +VCT +p181 +sssS'grch37' +p182 +(dp183 +g32 +S'NC_000007.13:g.140453136_140453137delinsCT' +p184 +sg34 +(dp185 +g36 +g55 +sg38 +S'AC' +p186 +sg40 +S'140453136' +p187 +sg42 +g175 +sssS'grch38' +p188 +(dp189 +g32 +S'NC_000007.14:g.140753336_140753337delinsCT' +p190 +sg34 +(dp191 +g36 +g55 +sg38 +S'AC' +p192 +sg40 +S'140753336' +p193 +sg42 +g181 +ssssg64 +(dp194 +g110 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1' +p195 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1' +p196 +sssS'metadata' +p197 +(dp198 +S'variantvalidator_hgvs_version' +p199 +S'1.1.3' +p200 +sS'uta_schema' +p201 +S'uta_20180821' +p202 +sS'seqrepo_db' +p203 +S'2018-08-21' +p204 +sS'variantvalidator_version' +p205 +S'v0.2' +p206 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant302.txt b/VariantValidator/testing/testOutputsMasterITS/variant302.txt new file mode 100644 index 00000000..9977ca2c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant302.txt @@ -0,0 +1,518 @@ +(dp0 +S'NM_001354609.1:c.1799T>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA +p12 +sS'gene_symbol' +p13 +S'BRAF' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001341538.1:p.(Val600Glu)' +p18 +sS'slr' +p19 +S'NP_001341538.1:p.(V600E)' +p20 +ssS'submitted_variant' +p21 +S'7-140453136-A-T' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001354609.1:c.1799T>A' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000007.13:g.140453136A>T' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr7' +p37 +sS'ref' +p38 +VA +p39 +sS'pos' +p40 +S'140453136' +p41 +sS'alt' +p42 +VT +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000007.14:g.140753336A>T' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'140753336' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000007.13:g.140453136A>T' +p51 +sg34 +(dp52 +g36 +S'7' +p53 +sg38 +g39 +sg40 +S'140453136' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000007.14:g.140753336A>T' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'140753336' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1' +p65 +sssS'NR_148928.1:n.2897T>A' +p66 +(dp67 +g3 +g4 +sg5 +(lp68 +S'RefSeqGene record not available' +p69 +asg8 +g4 +sg9 +(lp70 +sg11 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA +p71 +sg13 +S'BRAF' +p72 +sg15 +(dp73 +g17 +S'Non-coding :n.' +p74 +sg19 +g74 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NR_148928.1:n.2897T>A' +p75 +sg27 +g4 +sg28 +(dp76 +S'hg19' +p77 +(dp78 +g32 +S'NC_000007.13:g.140453136A>T' +p79 +sg34 +(dp80 +g36 +g37 +sg38 +g39 +sg40 +S'140453136' +p81 +sg42 +g43 +sssg44 +(dp82 +g32 +S'NC_000007.14:g.140753336A>T' +p83 +sg34 +(dp84 +g36 +g37 +sg38 +g39 +sg40 +S'140753336' +p85 +sg42 +g43 +sssS'grch37' +p86 +(dp87 +g32 +S'NC_000007.13:g.140453136A>T' +p88 +sg34 +(dp89 +g36 +g53 +sg38 +g39 +sg40 +S'140453136' +p90 +sg42 +g43 +sssS'grch38' +p91 +(dp92 +g32 +S'NC_000007.14:g.140753336A>T' +p93 +sg34 +(dp94 +g36 +g53 +sg38 +g39 +sg40 +S'140753336' +p95 +sg42 +g43 +ssssg60 +(dp96 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1' +p97 +sssS'NM_004333.5:c.1799T>A' +p98 +(dp99 +g3 +g4 +sg5 +(lp100 +S'RefSeqGene record not available' +p101 +asg8 +g4 +sg9 +(lp102 +sg11 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA +p103 +sg13 +S'BRAF' +p104 +sg15 +(dp105 +g17 +S'NP_004324.2:p.(Val600Glu)' +p106 +sg19 +S'NP_004324.2:p.(V600E)' +p107 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_004333.5:c.1799T>A' +p108 +sg27 +g4 +sg28 +(dp109 +S'hg19' +p110 +(dp111 +g32 +S'NC_000007.13:g.140453136A>T' +p112 +sg34 +(dp113 +g36 +g37 +sg38 +g39 +sg40 +S'140453136' +p114 +sg42 +g43 +sssg44 +(dp115 +g32 +S'NC_000007.14:g.140753336A>T' +p116 +sg34 +(dp117 +g36 +g37 +sg38 +g39 +sg40 +S'140753336' +p118 +sg42 +g43 +sssS'grch37' +p119 +(dp120 +g32 +S'NC_000007.13:g.140453136A>T' +p121 +sg34 +(dp122 +g36 +g53 +sg38 +g39 +sg40 +S'140453136' +p123 +sg42 +g43 +sssS'grch38' +p124 +(dp125 +g32 +S'NC_000007.14:g.140753336A>T' +p126 +sg34 +(dp127 +g36 +g53 +sg38 +g39 +sg40 +S'140753336' +p128 +sg42 +g43 +ssssg60 +(dp129 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' +p130 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5' +p131 +sssS'flag' +p132 +S'gene_variant' +p133 +sS'NM_004333.4:c.1799T>A' +p134 +(dp135 +g3 +g4 +sg5 +(lp136 +S'A more recent version of the selected reference sequence NM_004333.4 is available (NM_004333.5)' +p137 +aS'NM_004333.5:c.1799T>A MUST be fully validated prior to use in reports' +p138 +aS'select_variants=NM_004333.5:c.1799T>A' +p139 +aS'RefSeqGene record not available' +p140 +asg8 +g4 +sg9 +(lp141 +sg11 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA +p142 +sg13 +S'BRAF' +p143 +sg15 +(dp144 +g17 +S'NP_004324.2:p.(Val600Glu)' +p145 +sg19 +S'NP_004324.2:p.(V600E)' +p146 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_004333.4:c.1799T>A' +p147 +sg27 +g4 +sg28 +(dp148 +S'hg19' +p149 +(dp150 +g32 +S'NC_000007.13:g.140453136A>T' +p151 +sg34 +(dp152 +g36 +g37 +sg38 +g39 +sg40 +S'140453136' +p153 +sg42 +g43 +sssg44 +(dp154 +g32 +S'NC_000007.14:g.140753336A>T' +p155 +sg34 +(dp156 +g36 +g37 +sg38 +g39 +sg40 +S'140753336' +p157 +sg42 +g43 +sssS'grch37' +p158 +(dp159 +g32 +S'NC_000007.13:g.140453136A>T' +p160 +sg34 +(dp161 +g36 +g53 +sg38 +g39 +sg40 +S'140453136' +p162 +sg42 +g43 +sssS'grch38' +p163 +(dp164 +g32 +S'NC_000007.14:g.140753336A>T' +p165 +sg34 +(dp166 +g36 +g53 +sg38 +g39 +sg40 +S'140753336' +p167 +sg42 +g43 +ssssg60 +(dp168 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' +p169 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4' +p170 +sssS'metadata' +p171 +(dp172 +S'variantvalidator_hgvs_version' +p173 +S'1.1.3' +p174 +sS'uta_schema' +p175 +S'uta_20180821' +p176 +sS'seqrepo_db' +p177 +S'2018-08-21' +p178 +sS'variantvalidator_version' +p179 +S'v0.2' +p180 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant303.txt b/VariantValidator/testing/testOutputsMasterITS/variant303.txt new file mode 100644 index 00000000..f7a06ef7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant303.txt @@ -0,0 +1,518 @@ +(dp0 +S'NR_148928.1:n.2896G>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA +p12 +sS'gene_symbol' +p13 +S'BRAF' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'Non-coding :n.' +p18 +sS'slr' +p19 +g18 +ssS'submitted_variant' +p20 +S'7-140453137-C-T' +p21 +sS'genome_context_intronic_sequence' +p22 +g4 +sS'hgvs_lrg_variant' +p23 +g4 +sS'hgvs_transcript_variant' +p24 +S'NR_148928.1:n.2896G>A' +p25 +sS'hgvs_refseqgene_variant' +p26 +g4 +sS'primary_assembly_loci' +p27 +(dp28 +S'hg19' +p29 +(dp30 +S'hgvs_genomic_description' +p31 +S'NC_000007.13:g.140453137C>T' +p32 +sS'vcf' +p33 +(dp34 +S'chr' +p35 +S'chr7' +p36 +sS'ref' +p37 +VC +p38 +sS'pos' +p39 +S'140453137' +p40 +sS'alt' +p41 +VT +p42 +sssS'hg38' +p43 +(dp44 +g31 +S'NC_000007.14:g.140753337C>T' +p45 +sg33 +(dp46 +g35 +g36 +sg37 +g38 +sg39 +S'140753337' +p47 +sg41 +g42 +sssS'grch37' +p48 +(dp49 +g31 +S'NC_000007.13:g.140453137C>T' +p50 +sg33 +(dp51 +g35 +S'7' +p52 +sg37 +g38 +sg39 +S'140453137' +p53 +sg41 +g42 +sssS'grch38' +p54 +(dp55 +g31 +S'NC_000007.14:g.140753337C>T' +p56 +sg33 +(dp57 +g35 +g52 +sg37 +g38 +sg39 +S'140753337' +p58 +sg41 +g42 +ssssS'reference_sequence_records' +p59 +(dp60 +S'transcript' +p61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1' +p62 +sssS'NM_004333.5:c.1798G>A' +p63 +(dp64 +g3 +g4 +sg5 +(lp65 +S'RefSeqGene record not available' +p66 +asg8 +g4 +sg9 +(lp67 +sg11 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA +p68 +sg13 +S'BRAF' +p69 +sg15 +(dp70 +g17 +S'NP_004324.2:p.(Val600Met)' +p71 +sg19 +S'NP_004324.2:p.(V600M)' +p72 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_004333.5:c.1798G>A' +p73 +sg26 +g4 +sg27 +(dp74 +S'hg19' +p75 +(dp76 +g31 +S'NC_000007.13:g.140453137C>T' +p77 +sg33 +(dp78 +g35 +g36 +sg37 +g38 +sg39 +S'140453137' +p79 +sg41 +g42 +sssg43 +(dp80 +g31 +S'NC_000007.14:g.140753337C>T' +p81 +sg33 +(dp82 +g35 +g36 +sg37 +g38 +sg39 +S'140753337' +p83 +sg41 +g42 +sssS'grch37' +p84 +(dp85 +g31 +S'NC_000007.13:g.140453137C>T' +p86 +sg33 +(dp87 +g35 +g52 +sg37 +g38 +sg39 +S'140453137' +p88 +sg41 +g42 +sssS'grch38' +p89 +(dp90 +g31 +S'NC_000007.14:g.140753337C>T' +p91 +sg33 +(dp92 +g35 +g52 +sg37 +g38 +sg39 +S'140753337' +p93 +sg41 +g42 +ssssg59 +(dp94 +S'protein' +p95 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' +p96 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5' +p97 +sssS'NM_004333.4:c.1798G>A' +p98 +(dp99 +g3 +g4 +sg5 +(lp100 +S'A more recent version of the selected reference sequence NM_004333.4 is available (NM_004333.5)' +p101 +aS'NM_004333.5:c.1798G>A MUST be fully validated prior to use in reports' +p102 +aS'select_variants=NM_004333.5:c.1798G>A' +p103 +aS'RefSeqGene record not available' +p104 +asg8 +g4 +sg9 +(lp105 +sg11 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA +p106 +sg13 +S'BRAF' +p107 +sg15 +(dp108 +g17 +S'NP_004324.2:p.(Val600Met)' +p109 +sg19 +S'NP_004324.2:p.(V600M)' +p110 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_004333.4:c.1798G>A' +p111 +sg26 +g4 +sg27 +(dp112 +S'hg19' +p113 +(dp114 +g31 +S'NC_000007.13:g.140453137C>T' +p115 +sg33 +(dp116 +g35 +g36 +sg37 +g38 +sg39 +S'140453137' +p117 +sg41 +g42 +sssg43 +(dp118 +g31 +S'NC_000007.14:g.140753337C>T' +p119 +sg33 +(dp120 +g35 +g36 +sg37 +g38 +sg39 +S'140753337' +p121 +sg41 +g42 +sssS'grch37' +p122 +(dp123 +g31 +S'NC_000007.13:g.140453137C>T' +p124 +sg33 +(dp125 +g35 +g52 +sg37 +g38 +sg39 +S'140453137' +p126 +sg41 +g42 +sssS'grch38' +p127 +(dp128 +g31 +S'NC_000007.14:g.140753337C>T' +p129 +sg33 +(dp130 +g35 +g52 +sg37 +g38 +sg39 +S'140753337' +p131 +sg41 +g42 +ssssg59 +(dp132 +g95 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' +p133 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4' +p134 +sssS'NM_001354609.1:c.1798G>A' +p135 +(dp136 +g3 +g4 +sg5 +(lp137 +S'RefSeqGene record not available' +p138 +asg8 +g4 +sg9 +(lp139 +sg11 +VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA +p140 +sg13 +S'BRAF' +p141 +sg15 +(dp142 +g17 +S'NP_001341538.1:p.(Val600Met)' +p143 +sg19 +S'NP_001341538.1:p.(V600M)' +p144 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_001354609.1:c.1798G>A' +p145 +sg26 +g4 +sg27 +(dp146 +S'hg19' +p147 +(dp148 +g31 +S'NC_000007.13:g.140453137C>T' +p149 +sg33 +(dp150 +g35 +g36 +sg37 +g38 +sg39 +S'140453137' +p151 +sg41 +g42 +sssg43 +(dp152 +g31 +S'NC_000007.14:g.140753337C>T' +p153 +sg33 +(dp154 +g35 +g36 +sg37 +g38 +sg39 +S'140753337' +p155 +sg41 +g42 +sssS'grch37' +p156 +(dp157 +g31 +S'NC_000007.13:g.140453137C>T' +p158 +sg33 +(dp159 +g35 +g52 +sg37 +g38 +sg39 +S'140453137' +p160 +sg41 +g42 +sssS'grch38' +p161 +(dp162 +g31 +S'NC_000007.14:g.140753337C>T' +p163 +sg33 +(dp164 +g35 +g52 +sg37 +g38 +sg39 +S'140753337' +p165 +sg41 +g42 +ssssg59 +(dp166 +g95 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1' +p167 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1' +p168 +sssS'flag' +p169 +S'gene_variant' +p170 +sS'metadata' +p171 +(dp172 +S'variantvalidator_hgvs_version' +p173 +S'1.1.3' +p174 +sS'uta_schema' +p175 +S'uta_20180821' +p176 +sS'seqrepo_db' +p177 +S'2018-08-21' +p178 +sS'variantvalidator_version' +p179 +S'v0.2' +p180 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant304.txt b/VariantValidator/testing/testOutputsMasterITS/variant304.txt new file mode 100644 index 00000000..150eff4f --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant304.txt @@ -0,0 +1,284 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000083.2:c.180+3A>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA +p14 +sS'gene_symbol' +p15 +S'CLCN1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000074.2:p.?' +p20 +sS'slr' +p21 +S'NP_000074.2:p.?' +p22 +ssS'submitted_variant' +p23 +S'7-143013488-A-T' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000007.13(NM_000083.2):c.180+3A>T' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000083.2:c.180+3A>T' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000007.13:g.143013488A>T' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr7' +p40 +sS'ref' +p41 +S'A' +p42 +sS'pos' +p43 +S'143013488' +p44 +sS'alt' +p45 +S'T' +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000007.14:g.143316395A>T' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'143316395' +p51 +sg45 +g46 +sssS'grch37' +p52 +(dp53 +g35 +S'NC_000007.13:g.143013488A>T' +p54 +sg37 +(dp55 +g39 +S'7' +p56 +sg41 +g42 +sg43 +S'143013488' +p57 +sg45 +g46 +sssS'grch38' +p58 +(dp59 +g35 +S'NC_000007.14:g.143316395A>T' +p60 +sg37 +(dp61 +g39 +g56 +sg41 +g42 +sg43 +S'143316395' +p62 +sg45 +g46 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2' +p68 +sssS'NR_046453.1:n.267+3A>T' +p69 +(dp70 +g5 +g6 +sg7 +(lp71 +S'RefSeqGene record not available' +p72 +asg10 +g6 +sg11 +(lp73 +sg13 +VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA +p74 +sg15 +S'CLCN1' +p75 +sg17 +(dp76 +g19 +S'Non-coding :n.' +p77 +sg21 +g77 +ssg23 +g24 +sg25 +S'NC_000007.13(NR_046453.1):c.267+3A>T' +p78 +sg27 +g6 +sg28 +S'NR_046453.1:n.267+3A>T' +p79 +sg30 +g6 +sg31 +(dp80 +S'hg19' +p81 +(dp82 +g35 +S'NC_000007.13:g.143013488A>T' +p83 +sg37 +(dp84 +g39 +g40 +sg41 +g42 +sg43 +S'143013488' +p85 +sg45 +g46 +sssg47 +(dp86 +g35 +S'NC_000007.14:g.143316395A>T' +p87 +sg37 +(dp88 +g39 +g40 +sg41 +g42 +sg43 +S'143316395' +p89 +sg45 +g46 +sssS'grch37' +p90 +(dp91 +g35 +S'NC_000007.13:g.143013488A>T' +p92 +sg37 +(dp93 +g39 +g56 +sg41 +g42 +sg43 +S'143013488' +p94 +sg45 +g46 +sssS'grch38' +p95 +(dp96 +g35 +S'NC_000007.14:g.143316395A>T' +p97 +sg37 +(dp98 +g39 +g56 +sg41 +g42 +sg43 +S'143316395' +p99 +sg45 +g46 +ssssg63 +(dp100 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1' +p101 +sssS'metadata' +p102 +(dp103 +S'variantvalidator_hgvs_version' +p104 +S'1.1.3' +p105 +sS'uta_schema' +p106 +S'uta_20180821' +p107 +sS'seqrepo_db' +p108 +S'2018-08-21' +p109 +sS'variantvalidator_version' +p110 +S'v0.2' +p111 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant305.txt b/VariantValidator/testing/testOutputsMasterITS/variant305.txt new file mode 100644 index 00000000..cec1f10a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant305.txt @@ -0,0 +1,282 @@ +(dp0 +S'NR_046453.1:n.776G>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA +p12 +sS'gene_symbol' +p13 +S'CLCN1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'Non-coding :n.' +p18 +sS'slr' +p19 +g18 +ssS'submitted_variant' +p20 +S'7-143018934-G-A' +p21 +sS'genome_context_intronic_sequence' +p22 +g4 +sS'hgvs_lrg_variant' +p23 +g4 +sS'hgvs_transcript_variant' +p24 +S'NR_046453.1:n.776G>A' +p25 +sS'hgvs_refseqgene_variant' +p26 +g4 +sS'primary_assembly_loci' +p27 +(dp28 +S'hg19' +p29 +(dp30 +S'hgvs_genomic_description' +p31 +S'NC_000007.13:g.143018934G>A' +p32 +sS'vcf' +p33 +(dp34 +S'chr' +p35 +S'chr7' +p36 +sS'ref' +p37 +S'G' +p38 +sS'pos' +p39 +S'143018934' +p40 +sS'alt' +p41 +S'A' +p42 +sssS'hg38' +p43 +(dp44 +g31 +S'NC_000007.14:g.143321841G>A' +p45 +sg33 +(dp46 +g35 +g36 +sg37 +g38 +sg39 +S'143321841' +p47 +sg41 +g42 +sssS'grch37' +p48 +(dp49 +g31 +S'NC_000007.13:g.143018934G>A' +p50 +sg33 +(dp51 +g35 +S'7' +p52 +sg37 +g38 +sg39 +S'143018934' +p53 +sg41 +g42 +sssS'grch38' +p54 +(dp55 +g31 +S'NC_000007.14:g.143321841G>A' +p56 +sg33 +(dp57 +g35 +g52 +sg37 +g38 +sg39 +S'143321841' +p58 +sg41 +g42 +ssssS'reference_sequence_records' +p59 +(dp60 +S'transcript' +p61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1' +p62 +sssS'flag' +p63 +S'gene_variant' +p64 +sS'NM_000083.2:c.689G>A' +p65 +(dp66 +g3 +g4 +sg5 +(lp67 +S'RefSeqGene record not available' +p68 +asg8 +g4 +sg9 +(lp69 +sg11 +VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA +p70 +sg13 +S'CLCN1' +p71 +sg15 +(dp72 +g17 +S'NP_000074.2:p.(Gly230Glu)' +p73 +sg19 +S'NP_000074.2:p.(G230E)' +p74 +ssg20 +g21 +sg22 +g4 +sg23 +g4 +sg24 +S'NM_000083.2:c.689G>A' +p75 +sg26 +g4 +sg27 +(dp76 +S'hg19' +p77 +(dp78 +g31 +S'NC_000007.13:g.143018934G>A' +p79 +sg33 +(dp80 +g35 +g36 +sg37 +g38 +sg39 +S'143018934' +p81 +sg41 +g42 +sssg43 +(dp82 +g31 +S'NC_000007.14:g.143321841G>A' +p83 +sg33 +(dp84 +g35 +g36 +sg37 +g38 +sg39 +S'143321841' +p85 +sg41 +g42 +sssS'grch37' +p86 +(dp87 +g31 +S'NC_000007.13:g.143018934G>A' +p88 +sg33 +(dp89 +g35 +g52 +sg37 +g38 +sg39 +S'143018934' +p90 +sg41 +g42 +sssS'grch38' +p91 +(dp92 +g31 +S'NC_000007.14:g.143321841G>A' +p93 +sg33 +(dp94 +g35 +g52 +sg37 +g38 +sg39 +S'143321841' +p95 +sg41 +g42 +ssssg59 +(dp96 +S'protein' +p97 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2' +p98 +sg61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2' +p99 +sssS'metadata' +p100 +(dp101 +S'variantvalidator_hgvs_version' +p102 +S'1.1.3' +p103 +sS'uta_schema' +p104 +S'uta_20180821' +p105 +sS'seqrepo_db' +p106 +S'2018-08-21' +p107 +sS'variantvalidator_version' +p108 +S'v0.2' +p109 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant306.txt b/VariantValidator/testing/testOutputsMasterITS/variant306.txt new file mode 100644 index 00000000..0e383f0a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant306.txt @@ -0,0 +1,282 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NR_046453.1:n.2620C>T' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA +p24 +sS'gene_symbol' +p25 +S'CLCN1' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'Non-coding :n.' +p30 +sS'slr' +p31 +g30 +ssS'submitted_variant' +p32 +S'7-143048771-C-T' +p33 +sS'genome_context_intronic_sequence' +p34 +g16 +sS'hgvs_lrg_variant' +p35 +g16 +sS'hgvs_transcript_variant' +p36 +S'NR_046453.1:n.2620C>T' +p37 +sS'hgvs_refseqgene_variant' +p38 +g16 +sS'primary_assembly_loci' +p39 +(dp40 +S'hg19' +p41 +(dp42 +S'hgvs_genomic_description' +p43 +S'NC_000007.13:g.143048771C>T' +p44 +sS'vcf' +p45 +(dp46 +S'chr' +p47 +S'chr7' +p48 +sS'ref' +p49 +S'C' +p50 +sS'pos' +p51 +S'143048771' +p52 +sS'alt' +p53 +S'T' +p54 +sssS'hg38' +p55 +(dp56 +g43 +S'NC_000007.14:g.143351678C>T' +p57 +sg45 +(dp58 +g47 +g48 +sg49 +g50 +sg51 +S'143351678' +p59 +sg53 +g54 +sssS'grch37' +p60 +(dp61 +g43 +S'NC_000007.13:g.143048771C>T' +p62 +sg45 +(dp63 +g47 +S'7' +p64 +sg49 +g50 +sg51 +S'143048771' +p65 +sg53 +g54 +sssS'grch38' +p66 +(dp67 +g43 +S'NC_000007.14:g.143351678C>T' +p68 +sg45 +(dp69 +g47 +g64 +sg49 +g50 +sg51 +S'143351678' +p70 +sg53 +g54 +ssssS'reference_sequence_records' +p71 +(dp72 +S'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1' +p74 +sssS'NM_000083.2:c.2680C>T' +p75 +(dp76 +g15 +g16 +sg17 +(lp77 +S'RefSeqGene record not available' +p78 +asg20 +g16 +sg21 +(lp79 +sg23 +VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA +p80 +sg25 +S'CLCN1' +p81 +sg27 +(dp82 +g29 +S'NP_000074.2:p.(Arg894Ter)' +p83 +sg31 +S'NP_000074.2:p.(R894*)' +p84 +ssg32 +g33 +sg34 +g16 +sg35 +g16 +sg36 +S'NM_000083.2:c.2680C>T' +p85 +sg38 +g16 +sg39 +(dp86 +S'hg19' +p87 +(dp88 +g43 +S'NC_000007.13:g.143048771C>T' +p89 +sg45 +(dp90 +g47 +g48 +sg49 +g50 +sg51 +S'143048771' +p91 +sg53 +g54 +sssg55 +(dp92 +g43 +S'NC_000007.14:g.143351678C>T' +p93 +sg45 +(dp94 +g47 +g48 +sg49 +g50 +sg51 +S'143351678' +p95 +sg53 +g54 +sssS'grch37' +p96 +(dp97 +g43 +S'NC_000007.13:g.143048771C>T' +p98 +sg45 +(dp99 +g47 +g64 +sg49 +g50 +sg51 +S'143048771' +p100 +sg53 +g54 +sssS'grch38' +p101 +(dp102 +g43 +S'NC_000007.14:g.143351678C>T' +p103 +sg45 +(dp104 +g47 +g64 +sg49 +g50 +sg51 +S'143351678' +p105 +sg53 +g54 +ssssg71 +(dp106 +S'protein' +p107 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2' +p108 +sg73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2' +p109 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant307.txt b/VariantValidator/testing/testOutputsMasterITS/variant307.txt new file mode 100644 index 00000000..219b49d9 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant307.txt @@ -0,0 +1,596 @@ +(dp0 +S'NM_014629.3:c.2399C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +(dp11 +S'grch38' +p12 +(dp13 +S'hgvs_genomic_description' +p14 +S'NT_187576.1:g.107161C>T' +p15 +sS'vcf' +p16 +(dp17 +S'chr' +p18 +S'HSCHR8_8_CTG1' +p19 +sS'ref' +p20 +S'C' +p21 +sS'pos' +p22 +S'107161' +p23 +sS'alt' +p24 +S'T' +p25 +sssa(dp26 +S'hg38' +p27 +(dp28 +g14 +S'NT_187576.1:g.107161C>T' +p29 +sg16 +(dp30 +g18 +S'chr8_KI270821v1_alt' +p31 +sg20 +g21 +sg22 +S'107161' +p32 +sg24 +g25 +sssasS'transcript_description' +p33 +VHomo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 1, mRNA +p34 +sS'gene_symbol' +p35 +S'ARHGEF10' +p36 +sS'hgvs_predicted_protein_consequence' +p37 +(dp38 +S'tlr' +p39 +S'NP_055444.2:p.(Pro800Leu)' +p40 +sS'slr' +p41 +S'NP_055444.2:p.(P800L)' +p42 +ssS'submitted_variant' +p43 +S'8-1871951-C-T' +p44 +sS'genome_context_intronic_sequence' +p45 +g4 +sS'hgvs_lrg_variant' +p46 +g4 +sS'hgvs_transcript_variant' +p47 +S'NM_014629.3:c.2399C>T' +p48 +sS'hgvs_refseqgene_variant' +p49 +g4 +sS'primary_assembly_loci' +p50 +(dp51 +S'hg19' +p52 +(dp53 +g14 +S'NC_000008.10:g.1871951C>T' +p54 +sg16 +(dp55 +g18 +S'chr8' +p56 +sg20 +g21 +sg22 +S'1871951' +p57 +sg24 +g25 +sssg27 +(dp58 +g14 +S'NC_000008.11:g.1923785C>T' +p59 +sg16 +(dp60 +g18 +g56 +sg20 +g21 +sg22 +S'1923785' +p61 +sg24 +g25 +sssS'grch37' +p62 +(dp63 +g14 +S'NC_000008.10:g.1871951C>T' +p64 +sg16 +(dp65 +g18 +S'8' +p66 +sg20 +g21 +sg22 +S'1871951' +p67 +sg24 +g25 +sssS'grch38' +p68 +(dp69 +g14 +S'NC_000008.11:g.1923785C>T' +p70 +sg16 +(dp71 +g18 +g66 +sg20 +g21 +sg22 +S'1923785' +p72 +sg24 +g25 +ssssS'reference_sequence_records' +p73 +(dp74 +S'protein' +p75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2' +p76 +sS'transcript' +p77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.3' +p78 +sssS'NM_014629.2:c.2399C>T' +p79 +(dp80 +g3 +g4 +sg5 +(lp81 +S'A more recent version of the selected reference sequence NM_014629.2 is available (NM_014629.3)' +p82 +aS'NM_014629.3:c.2399C>T MUST be fully validated prior to use in reports' +p83 +aS'select_variants=NM_014629.3:c.2399C>T' +p84 +aS'RefSeqGene record not available' +p85 +asg8 +g4 +sg9 +(lp86 +sg33 +VHomo sapiens Rho guanine nucleotide exchange factor (GEF) 10 (ARHGEF10), mRNA +p87 +sg35 +S'ARHGEF10' +p88 +sg37 +(dp89 +g39 +S'NP_055444.2:p.(Pro800Leu)' +p90 +sg41 +S'NP_055444.2:p.(P800L)' +p91 +ssg43 +g44 +sg45 +g4 +sg46 +g4 +sg47 +S'NM_014629.2:c.2399C>T' +p92 +sg49 +g4 +sg50 +(dp93 +S'hg19' +p94 +(dp95 +g14 +S'NC_000008.10:g.1871951C>T' +p96 +sg16 +(dp97 +g18 +g56 +sg20 +g21 +sg22 +S'1871951' +p98 +sg24 +g25 +sssS'grch37' +p99 +(dp100 +g14 +S'NC_000008.10:g.1871951C>T' +p101 +sg16 +(dp102 +g18 +g66 +sg20 +g21 +sg22 +S'1871951' +p103 +sg24 +g25 +ssssg73 +(dp104 +g75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2' +p105 +sg77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.2' +p106 +sssS'NM_001308153.1:c.2471C>T' +p107 +(dp108 +g3 +g4 +sg5 +(lp109 +S'RefSeqGene record not available' +p110 +asg8 +g4 +sg9 +(lp111 +(dp112 +S'grch38' +p113 +(dp114 +g14 +S'NT_187576.1:g.107161C>T' +p115 +sg16 +(dp116 +g18 +g19 +sg20 +g21 +sg22 +S'107161' +p117 +sg24 +g25 +sssa(dp118 +g27 +(dp119 +g14 +S'NT_187576.1:g.107161C>T' +p120 +sg16 +(dp121 +g18 +g31 +sg20 +g21 +sg22 +S'107161' +p122 +sg24 +g25 +sssasg33 +VHomo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 3, mRNA +p123 +sg35 +S'ARHGEF10' +p124 +sg37 +(dp125 +g39 +S'NP_001295082.1:p.(Pro824Leu)' +p126 +sg41 +S'NP_001295082.1:p.(P824L)' +p127 +ssg43 +g44 +sg45 +g4 +sg46 +g4 +sg47 +S'NM_001308153.1:c.2471C>T' +p128 +sg49 +g4 +sg50 +(dp129 +S'hg19' +p130 +(dp131 +g14 +S'NC_000008.10:g.1871951C>T' +p132 +sg16 +(dp133 +g18 +g56 +sg20 +g21 +sg22 +S'1871951' +p134 +sg24 +g25 +sssg27 +(dp135 +g14 +S'NC_000008.11:g.1923785C>T' +p136 +sg16 +(dp137 +g18 +g56 +sg20 +g21 +sg22 +S'1923785' +p138 +sg24 +g25 +sssS'grch37' +p139 +(dp140 +g14 +S'NC_000008.10:g.1871951C>T' +p141 +sg16 +(dp142 +g18 +g66 +sg20 +g21 +sg22 +S'1871951' +p143 +sg24 +g25 +sssS'grch38' +p144 +(dp145 +g14 +S'NC_000008.11:g.1923785C>T' +p146 +sg16 +(dp147 +g18 +g66 +sg20 +g21 +sg22 +S'1923785' +p148 +sg24 +g25 +ssssg73 +(dp149 +g75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295082.1' +p150 +sg77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308153.1' +p151 +sssS'flag' +p152 +S'gene_variant' +p153 +sS'NM_001308152.1:c.2285C>T' +p154 +(dp155 +g3 +g4 +sg5 +(lp156 +S'RefSeqGene record not available' +p157 +asg8 +g4 +sg9 +(lp158 +(dp159 +S'grch38' +p160 +(dp161 +g14 +S'NT_187576.1:g.107161C>T' +p162 +sg16 +(dp163 +g18 +g19 +sg20 +g21 +sg22 +S'107161' +p164 +sg24 +g25 +sssa(dp165 +g27 +(dp166 +g14 +S'NT_187576.1:g.107161C>T' +p167 +sg16 +(dp168 +g18 +g31 +sg20 +g21 +sg22 +S'107161' +p169 +sg24 +g25 +sssasg33 +VHomo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 2, mRNA +p170 +sg35 +S'ARHGEF10' +p171 +sg37 +(dp172 +g39 +S'NP_001295081.1:p.(Pro762Leu)' +p173 +sg41 +S'NP_001295081.1:p.(P762L)' +p174 +ssg43 +g44 +sg45 +g4 +sg46 +g4 +sg47 +S'NM_001308152.1:c.2285C>T' +p175 +sg49 +g4 +sg50 +(dp176 +S'hg19' +p177 +(dp178 +g14 +S'NC_000008.10:g.1871951C>T' +p179 +sg16 +(dp180 +g18 +g56 +sg20 +g21 +sg22 +S'1871951' +p181 +sg24 +g25 +sssg27 +(dp182 +g14 +S'NC_000008.11:g.1923785C>T' +p183 +sg16 +(dp184 +g18 +g56 +sg20 +g21 +sg22 +S'1923785' +p185 +sg24 +g25 +sssS'grch37' +p186 +(dp187 +g14 +S'NC_000008.10:g.1871951C>T' +p188 +sg16 +(dp189 +g18 +g66 +sg20 +g21 +sg22 +S'1871951' +p190 +sg24 +g25 +sssS'grch38' +p191 +(dp192 +g14 +S'NC_000008.11:g.1923785C>T' +p193 +sg16 +(dp194 +g18 +g66 +sg20 +g21 +sg22 +S'1923785' +p195 +sg24 +g25 +ssssg73 +(dp196 +g75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295081.1' +p197 +sg77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308152.1' +p198 +sssS'metadata' +p199 +(dp200 +S'variantvalidator_hgvs_version' +p201 +S'1.1.3' +p202 +sS'uta_schema' +p203 +S'uta_20180821' +p204 +sS'seqrepo_db' +p205 +S'2018-08-21' +p206 +sS'variantvalidator_version' +p207 +S'v0.2' +p208 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant308.txt b/VariantValidator/testing/testOutputsMasterITS/variant308.txt new file mode 100644 index 00000000..2dcae498 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant308.txt @@ -0,0 +1,539 @@ +(dp0 +S'NM_001261407.1:c.5504dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 3, mRNA +p13 +sS'gene_symbol' +p14 +S'MPDZ' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001248336.1:p.(Thr1836AsnfsTer15)' +p19 +sS'slr' +p20 +S'NP_001248336.1:p.(T1836Nfs*15)' +p21 +ssS'submitted_variant' +p22 +S'9-13112056-T-TG' +p23 +sS'genome_context_intronic_sequence' +p24 +g4 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_001261407.1:c.5504dup' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000009.11:g.13112057dup' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr9' +p38 +sS'ref' +p39 +S'G' +p40 +sS'pos' +p41 +S'13112057' +p42 +sS'alt' +p43 +VGG +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000009.12:g.13112058dup' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'13112058' +p49 +sg43 +VGG +p50 +sssS'grch37' +p51 +(dp52 +g33 +S'NC_000009.11:g.13112057dup' +p53 +sg35 +(dp54 +g37 +S'9' +p55 +sg39 +g40 +sg41 +S'13112057' +p56 +sg43 +VGG +p57 +sssS'grch38' +p58 +(dp59 +g33 +S'NC_000009.12:g.13112058dup' +p60 +sg35 +(dp61 +g37 +g55 +sg39 +g40 +sg41 +S'13112058' +p62 +sg43 +VGG +p63 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248336.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261407.1' +p69 +sssS'NM_001330637.1:c.5690dup' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' +p73 +aS'RefSeqGene record not available' +p74 +asg9 +g4 +sg10 +(lp75 +sg12 +VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 4, mRNA +p76 +sg14 +S'MPDZ' +p77 +sg16 +(dp78 +g18 +S'NP_001317566.1:p.(Thr1898AsnfsTer15)' +p79 +sg20 +S'NP_001317566.1:p.(T1898Nfs*15)' +p80 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001330637.1:c.5690dup' +p81 +sg28 +g4 +sg29 +(dp82 +S'hg19' +p83 +(dp84 +g33 +S'NC_000009.11:g.13112057dup' +p85 +sg35 +(dp86 +g37 +g38 +sg39 +g40 +sg41 +S'13112057' +p87 +sg43 +VGG +p88 +sssg45 +(dp89 +g33 +S'NC_000009.12:g.13112058dup' +p90 +sg35 +(dp91 +g37 +g38 +sg39 +g40 +sg41 +S'13112058' +p92 +sg43 +VGG +p93 +sssS'grch37' +p94 +(dp95 +g33 +S'NC_000009.11:g.13112057dup' +p96 +sg35 +(dp97 +g37 +g55 +sg39 +g40 +sg41 +S'13112057' +p98 +sg43 +VGG +p99 +sssS'grch38' +p100 +(dp101 +g33 +S'NC_000009.12:g.13112058dup' +p102 +sg35 +(dp103 +g37 +g55 +sg39 +g40 +sg41 +S'13112058' +p104 +sg43 +VGG +p105 +ssssg64 +(dp106 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317566.1' +p107 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330637.1' +p108 +sssS'NM_001261406.1:c.5591dup' +p109 +(dp110 +g3 +g4 +sg5 +(lp111 +S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' +p112 +aS'RefSeqGene record not available' +p113 +asg9 +g4 +sg10 +(lp114 +sg12 +VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 2, mRNA +p115 +sg14 +S'MPDZ' +p116 +sg16 +(dp117 +g18 +S'NP_001248335.1:p.(Thr1865AsnfsTer15)' +p118 +sg20 +S'NP_001248335.1:p.(T1865Nfs*15)' +p119 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_001261406.1:c.5591dup' +p120 +sg28 +g4 +sg29 +(dp121 +S'hg19' +p122 +(dp123 +g33 +S'NC_000009.11:g.13112057dup' +p124 +sg35 +(dp125 +g37 +g38 +sg39 +g40 +sg41 +S'13112057' +p126 +sg43 +VGG +p127 +sssg45 +(dp128 +g33 +S'NC_000009.12:g.13112058dup' +p129 +sg35 +(dp130 +g37 +g38 +sg39 +g40 +sg41 +S'13112058' +p131 +sg43 +VGG +p132 +sssS'grch37' +p133 +(dp134 +g33 +S'NC_000009.11:g.13112057dup' +p135 +sg35 +(dp136 +g37 +g55 +sg39 +g40 +sg41 +S'13112057' +p137 +sg43 +VGG +p138 +sssS'grch38' +p139 +(dp140 +g33 +S'NC_000009.12:g.13112058dup' +p141 +sg35 +(dp142 +g37 +g55 +sg39 +g40 +sg41 +S'13112058' +p143 +sg43 +VGG +p144 +ssssg64 +(dp145 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248335.1' +p146 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261406.1' +p147 +sssS'flag' +p148 +S'gene_variant' +p149 +sS'NM_003829.4:c.5603dup' +p150 +(dp151 +g3 +g4 +sg5 +(lp152 +S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' +p153 +aS'RefSeqGene record not available' +p154 +asg9 +g4 +sg10 +(lp155 +sg12 +VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 1, mRNA +p156 +sg14 +S'MPDZ' +p157 +sg16 +(dp158 +g18 +S'NP_003820.2:p.(Thr1869AsnfsTer15)' +p159 +sg20 +S'NP_003820.2:p.(T1869Nfs*15)' +p160 +ssg22 +g23 +sg24 +g4 +sg25 +g4 +sg26 +S'NM_003829.4:c.5603dup' +p161 +sg28 +g4 +sg29 +(dp162 +S'hg19' +p163 +(dp164 +g33 +S'NC_000009.11:g.13112057dup' +p165 +sg35 +(dp166 +g37 +g38 +sg39 +g40 +sg41 +S'13112057' +p167 +sg43 +VGG +p168 +sssg45 +(dp169 +g33 +S'NC_000009.12:g.13112058dup' +p170 +sg35 +(dp171 +g37 +g38 +sg39 +g40 +sg41 +S'13112058' +p172 +sg43 +VGG +p173 +sssS'grch37' +p174 +(dp175 +g33 +S'NC_000009.11:g.13112057dup' +p176 +sg35 +(dp177 +g37 +g55 +sg39 +g40 +sg41 +S'13112057' +p178 +sg43 +VGG +p179 +sssS'grch38' +p180 +(dp181 +g33 +S'NC_000009.12:g.13112058dup' +p182 +sg35 +(dp183 +g37 +g55 +sg39 +g40 +sg41 +S'13112058' +p184 +sg43 +VGG +p185 +ssssg64 +(dp186 +g66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003820.2' +p187 +sg68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003829.4' +p188 +sssS'metadata' +p189 +(dp190 +S'variantvalidator_hgvs_version' +p191 +S'1.1.3' +p192 +sS'uta_schema' +p193 +S'uta_20180821' +p194 +sS'seqrepo_db' +p195 +S'2018-08-21' +p196 +sS'variantvalidator_version' +p197 +S'v0.2' +p198 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant309.txt b/VariantValidator/testing/testOutputsMasterITS/variant309.txt new file mode 100644 index 00000000..9798130c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant309.txt @@ -0,0 +1,603 @@ +(dp0 +S'NM_058197.4:c.*74-1G>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 3, mRNA +p12 +sS'gene_symbol' +p13 +S'CDKN2A' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_478104.2:p.?' +p18 +sS'slr' +p19 +S'NP_478104.2:p.?' +p20 +ssS'submitted_variant' +p21 +S'9-21971208-C-A' +p22 +sS'genome_context_intronic_sequence' +p23 +S'NC_000009.11(NM_058197.4):c.*74-1G>T' +p24 +sS'hgvs_lrg_variant' +p25 +g4 +sS'hgvs_transcript_variant' +p26 +S'NM_058197.4:c.*74-1G>T' +p27 +sS'hgvs_refseqgene_variant' +p28 +g4 +sS'primary_assembly_loci' +p29 +(dp30 +S'hg19' +p31 +(dp32 +S'hgvs_genomic_description' +p33 +S'NC_000009.11:g.21971208C>A' +p34 +sS'vcf' +p35 +(dp36 +S'chr' +p37 +S'chr9' +p38 +sS'ref' +p39 +VC +p40 +sS'pos' +p41 +S'21971208' +p42 +sS'alt' +p43 +VA +p44 +sssS'hg38' +p45 +(dp46 +g33 +S'NC_000009.12:g.21971209C>A' +p47 +sg35 +(dp48 +g37 +g38 +sg39 +g40 +sg41 +S'21971209' +p49 +sg43 +g44 +sssS'grch37' +p50 +(dp51 +g33 +S'NC_000009.11:g.21971208C>A' +p52 +sg35 +(dp53 +g37 +S'9' +p54 +sg39 +g40 +sg41 +S'21971208' +p55 +sg43 +g44 +sssS'grch38' +p56 +(dp57 +g33 +S'NC_000009.12:g.21971209C>A' +p58 +sg35 +(dp59 +g37 +g54 +sg39 +g40 +sg41 +S'21971209' +p60 +sg43 +g44 +ssssS'reference_sequence_records' +p61 +(dp62 +S'protein' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_478104.2' +p64 +sS'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_058197.4' +p66 +sssS'NM_000077.4:c.151-1G>T' +p67 +(dp68 +g3 +g4 +sg5 +(lp69 +S'RefSeqGene record not available' +p70 +asg8 +g4 +sg9 +(lp71 +sg11 +VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 1, mRNA +p72 +sg13 +S'CDKN2A' +p73 +sg15 +(dp74 +g17 +S'NP_000068.1:p.?' +p75 +sg19 +S'NP_000068.1:p.?' +p76 +ssg21 +g22 +sg23 +S'NC_000009.11(NM_000077.4):c.151-1G>T' +p77 +sg25 +g4 +sg26 +S'NM_000077.4:c.151-1G>T' +p78 +sg28 +g4 +sg29 +(dp79 +S'hg19' +p80 +(dp81 +g33 +S'NC_000009.11:g.21971208C>A' +p82 +sg35 +(dp83 +g37 +g38 +sg39 +g40 +sg41 +S'21971208' +p84 +sg43 +g44 +sssg45 +(dp85 +g33 +S'NC_000009.12:g.21971209C>A' +p86 +sg35 +(dp87 +g37 +g38 +sg39 +g40 +sg41 +S'21971209' +p88 +sg43 +g44 +sssS'grch37' +p89 +(dp90 +g33 +S'NC_000009.11:g.21971208C>A' +p91 +sg35 +(dp92 +g37 +g54 +sg39 +g40 +sg41 +S'21971208' +p93 +sg43 +g44 +sssS'grch38' +p94 +(dp95 +g33 +S'NC_000009.12:g.21971209C>A' +p96 +sg35 +(dp97 +g37 +g54 +sg39 +g40 +sg41 +S'21971209' +p98 +sg43 +g44 +ssssg61 +(dp99 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000068.1' +p100 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000077.4' +p101 +sssS'NM_001363763.1:c.-3-1G>T' +p102 +(dp103 +g3 +g4 +sg5 +(lp104 +S'RefSeqGene record not available' +p105 +asg8 +g4 +sg9 +(lp106 +sg11 +VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 6, mRNA +p107 +sg13 +S'CDKN2A' +p108 +sg15 +(dp109 +g17 +S'NP_001350692.1:p.?' +p110 +sg19 +S'NP_001350692.1:p.?' +p111 +ssg21 +g22 +sg23 +S'NC_000009.11(NM_001363763.1):c.-3-1G>T' +p112 +sg25 +g4 +sg26 +S'NM_001363763.1:c.-3-1G>T' +p113 +sg28 +g4 +sg29 +(dp114 +S'hg19' +p115 +(dp116 +g33 +S'NC_000009.11:g.21971208C>A' +p117 +sg35 +(dp118 +g37 +g38 +sg39 +g40 +sg41 +S'21971208' +p119 +sg43 +g44 +sssS'grch37' +p120 +(dp121 +g33 +S'NC_000009.11:g.21971208C>A' +p122 +sg35 +(dp123 +g37 +g54 +sg39 +g40 +sg41 +S'21971208' +p124 +sg43 +g44 +ssssg61 +(dp125 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350692.1' +p126 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363763.1' +p127 +sssS'NM_001195132.1:c.151-1G>T' +p128 +(dp129 +g3 +g4 +sg5 +(lp130 +S'RefSeqGene record not available' +p131 +asg8 +g4 +sg9 +(lp132 +sg11 +VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 5, mRNA +p133 +sg13 +S'CDKN2A' +p134 +sg15 +(dp135 +g17 +S'NP_001182061.1:p.?' +p136 +sg19 +S'NP_001182061.1:p.?' +p137 +ssg21 +g22 +sg23 +S'NC_000009.11(NM_001195132.1):c.151-1G>T' +p138 +sg25 +g4 +sg26 +S'NM_001195132.1:c.151-1G>T' +p139 +sg28 +g4 +sg29 +(dp140 +S'hg19' +p141 +(dp142 +g33 +S'NC_000009.11:g.21971208C>A' +p143 +sg35 +(dp144 +g37 +g38 +sg39 +g40 +sg41 +S'21971208' +p145 +sg43 +g44 +sssg45 +(dp146 +g33 +S'NC_000009.12:g.21971209C>A' +p147 +sg35 +(dp148 +g37 +g38 +sg39 +g40 +sg41 +S'21971209' +p149 +sg43 +g44 +sssS'grch37' +p150 +(dp151 +g33 +S'NC_000009.11:g.21971208C>A' +p152 +sg35 +(dp153 +g37 +g54 +sg39 +g40 +sg41 +S'21971208' +p154 +sg43 +g44 +sssS'grch38' +p155 +(dp156 +g33 +S'NC_000009.12:g.21971209C>A' +p157 +sg35 +(dp158 +g37 +g54 +sg39 +g40 +sg41 +S'21971209' +p159 +sg43 +g44 +ssssg61 +(dp160 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001182061.1' +p161 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001195132.1' +p162 +sssS'NM_058195.3:c.194-1G>T' +p163 +(dp164 +g3 +g4 +sg5 +(lp165 +S'RefSeqGene record not available' +p166 +asg8 +g4 +sg9 +(lp167 +sg11 +VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 4, mRNA +p168 +sg13 +S'CDKN2A' +p169 +sg15 +(dp170 +g17 +S'NP_478102.2:p.?' +p171 +sg19 +S'NP_478102.2:p.?' +p172 +ssg21 +g22 +sg23 +S'NC_000009.11(NM_058195.3):c.194-1G>T' +p173 +sg25 +g4 +sg26 +S'NM_058195.3:c.194-1G>T' +p174 +sg28 +g4 +sg29 +(dp175 +S'hg19' +p176 +(dp177 +g33 +S'NC_000009.11:g.21971208C>A' +p178 +sg35 +(dp179 +g37 +g38 +sg39 +g40 +sg41 +S'21971208' +p180 +sg43 +g44 +sssg45 +(dp181 +g33 +S'NC_000009.12:g.21971209C>A' +p182 +sg35 +(dp183 +g37 +g38 +sg39 +g40 +sg41 +S'21971209' +p184 +sg43 +g44 +sssS'grch37' +p185 +(dp186 +g33 +S'NC_000009.11:g.21971208C>A' +p187 +sg35 +(dp188 +g37 +g54 +sg39 +g40 +sg41 +S'21971208' +p189 +sg43 +g44 +sssS'grch38' +p190 +(dp191 +g33 +S'NC_000009.12:g.21971209C>A' +p192 +sg35 +(dp193 +g37 +g54 +sg39 +g40 +sg41 +S'21971209' +p194 +sg43 +g44 +ssssg61 +(dp195 +g63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_478102.2' +p196 +sg65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_058195.3' +p197 +sssS'flag' +p198 +S'gene_variant' +p199 +sS'metadata' +p200 +(dp201 +S'variantvalidator_hgvs_version' +p202 +S'1.1.3' +p203 +sS'uta_schema' +p204 +S'uta_20180821' +p205 +sS'seqrepo_db' +p206 +S'2018-08-21' +p207 +sS'variantvalidator_version' +p208 +S'v0.2' +p209 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant31.txt b/VariantValidator/testing/testOutputsMasterITS/variant31.txt new file mode 100644 index 00000000..ff4da964 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant31.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.(Gly197Cys)' +p20 +sS'slr' +p21 +S'NP_000079.2:p.(G197C)' +p22 +ssS'submitted_variant' +p23 +S'NC_000017.10:g.48275363C>A' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000088.3:c.589G>T' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000017.10:g.48275363C>A' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr17' +p39 +sS'ref' +p40 +VC +p41 +sS'pos' +p42 +S'48275363' +p43 +sS'alt' +p44 +VA +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000017.11:g.50198002C>A' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'50198002' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000017.10:g.48275363C>A' +p53 +sg36 +(dp54 +g38 +S'17' +p55 +sg40 +g41 +sg42 +S'48275363' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000017.11:g.50198002C>A' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'50198002' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p67 +sssS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant310.txt b/VariantValidator/testing/testOutputsMasterITS/variant310.txt new file mode 100644 index 00000000..ea4a8be6 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant310.txt @@ -0,0 +1,543 @@ +(dp0 +S'NM_001301227.1:c.773-3dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.4, mRNA +p13 +sS'gene_symbol' +p14 +S'TPM2' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001288156.1:p.?' +p19 +sS'slr' +p20 +S'NP_001288156.1:p.?' +p21 +ssS'submitted_variant' +p22 +S'9-35683240-T-TG' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000009.11(NM_001301227.1):c.773-3dup' +p25 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_001301227.1:c.773-3dup' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000009.11:g.35683241dup' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr9' +p39 +sS'ref' +p40 +S'G' +p41 +sS'pos' +p42 +S'35683241' +p43 +sS'alt' +p44 +S'GG' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000009.12:g.35683244dup' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'35683244' +p50 +sg44 +S'GG' +p51 +sssS'grch37' +p52 +(dp53 +g34 +S'NC_000009.11:g.35683241dup' +p54 +sg36 +(dp55 +g38 +S'9' +p56 +sg40 +g41 +sg42 +S'35683241' +p57 +sg44 +S'GG' +p58 +sssS'grch38' +p59 +(dp60 +g34 +S'NC_000009.12:g.35683244dup' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +g41 +sg42 +S'35683244' +p63 +sg44 +S'GG' +p64 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288156.1' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301227.1' +p70 +sssS'NM_001301226.1:c.772+1002dup' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' +p74 +aS'RefSeqGene record not available' +p75 +asg9 +g4 +sg10 +(lp76 +sg12 +VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.3, mRNA +p77 +sg14 +S'TPM2' +p78 +sg16 +(dp79 +g18 +S'NP_001288155.1:p.?' +p80 +sg20 +S'NP_001288155.1:p.?' +p81 +ssg22 +g23 +sg24 +S'NC_000009.11(NM_001301226.1):c.772+1002dup' +p82 +sg26 +g4 +sg27 +S'NM_001301226.1:c.772+1002dup' +p83 +sg29 +g4 +sg30 +(dp84 +S'hg19' +p85 +(dp86 +g34 +S'NC_000009.11:g.35683241dup' +p87 +sg36 +(dp88 +g38 +g39 +sg40 +g41 +sg42 +S'35683241' +p89 +sg44 +S'GG' +p90 +sssg46 +(dp91 +g34 +S'NC_000009.12:g.35683244dup' +p92 +sg36 +(dp93 +g38 +g39 +sg40 +g41 +sg42 +S'35683244' +p94 +sg44 +S'GG' +p95 +sssS'grch37' +p96 +(dp97 +g34 +S'NC_000009.11:g.35683241dup' +p98 +sg36 +(dp99 +g38 +g56 +sg40 +g41 +sg42 +S'35683241' +p100 +sg44 +S'GG' +p101 +sssS'grch38' +p102 +(dp103 +g34 +S'NC_000009.12:g.35683244dup' +p104 +sg36 +(dp105 +g38 +g56 +sg40 +g41 +sg42 +S'35683244' +p106 +sg44 +S'GG' +p107 +ssssg65 +(dp108 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288155.1' +p109 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301226.1' +p110 +sssS'NM_213674.1:c.772+1002dup' +p111 +(dp112 +g3 +g4 +sg5 +(lp113 +S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' +p114 +aS'RefSeqGene record not available' +p115 +asg9 +g4 +sg10 +(lp116 +sg12 +VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.1, mRNA +p117 +sg14 +S'TPM2' +p118 +sg16 +(dp119 +g18 +S'NP_998839.1:p.?' +p120 +sg20 +S'NP_998839.1:p.?' +p121 +ssg22 +g23 +sg24 +S'NC_000009.11(NM_213674.1):c.772+1002dup' +p122 +sg26 +g4 +sg27 +S'NM_213674.1:c.772+1002dup' +p123 +sg29 +g4 +sg30 +(dp124 +S'hg19' +p125 +(dp126 +g34 +S'NC_000009.11:g.35683241dup' +p127 +sg36 +(dp128 +g38 +g39 +sg40 +g41 +sg42 +S'35683241' +p129 +sg44 +S'GG' +p130 +sssg46 +(dp131 +g34 +S'NC_000009.12:g.35683244dup' +p132 +sg36 +(dp133 +g38 +g39 +sg40 +g41 +sg42 +S'35683244' +p134 +sg44 +S'GG' +p135 +sssS'grch37' +p136 +(dp137 +g34 +S'NC_000009.11:g.35683241dup' +p138 +sg36 +(dp139 +g38 +g56 +sg40 +g41 +sg42 +S'35683241' +p140 +sg44 +S'GG' +p141 +sssS'grch38' +p142 +(dp143 +g34 +S'NC_000009.12:g.35683244dup' +p144 +sg36 +(dp145 +g38 +g56 +sg40 +g41 +sg42 +S'35683244' +p146 +sg44 +S'GG' +p147 +ssssg65 +(dp148 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_998839.1' +p149 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_213674.1' +p150 +sssS'NM_003289.3:c.773-3dup' +p151 +(dp152 +g3 +g4 +sg5 +(lp153 +S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' +p154 +aS'RefSeqGene record not available' +p155 +asg9 +g4 +sg10 +(lp156 +sg12 +VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.2, mRNA +p157 +sg14 +S'TPM2' +p158 +sg16 +(dp159 +g18 +S'NP_003280.2:p.?' +p160 +sg20 +S'NP_003280.2:p.?' +p161 +ssg22 +g23 +sg24 +S'NC_000009.11(NM_003289.3):c.773-3dup' +p162 +sg26 +g4 +sg27 +S'NM_003289.3:c.773-3dup' +p163 +sg29 +g4 +sg30 +(dp164 +S'hg19' +p165 +(dp166 +g34 +S'NC_000009.11:g.35683241dup' +p167 +sg36 +(dp168 +g38 +g39 +sg40 +g41 +sg42 +S'35683241' +p169 +sg44 +S'GG' +p170 +sssg46 +(dp171 +g34 +S'NC_000009.12:g.35683244dup' +p172 +sg36 +(dp173 +g38 +g39 +sg40 +g41 +sg42 +S'35683244' +p174 +sg44 +S'GG' +p175 +sssS'grch37' +p176 +(dp177 +g34 +S'NC_000009.11:g.35683241dup' +p178 +sg36 +(dp179 +g38 +g56 +sg40 +g41 +sg42 +S'35683241' +p180 +sg44 +S'GG' +p181 +sssS'grch38' +p182 +(dp183 +g34 +S'NC_000009.12:g.35683244dup' +p184 +sg36 +(dp185 +g38 +g56 +sg40 +g41 +sg42 +S'35683244' +p186 +sg44 +S'GG' +p187 +ssssg65 +(dp188 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003280.2' +p189 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003289.3' +p190 +sssS'flag' +p191 +S'gene_variant' +p192 +sS'metadata' +p193 +(dp194 +S'variantvalidator_hgvs_version' +p195 +S'1.1.3' +p196 +sS'uta_schema' +p197 +S'uta_20180821' +p198 +sS'seqrepo_db' +p199 +S'2018-08-21' +p200 +sS'variantvalidator_version' +p201 +S'v0.2' +p202 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant311.txt b/VariantValidator/testing/testOutputsMasterITS/variant311.txt new file mode 100644 index 00000000..5866371f --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant311.txt @@ -0,0 +1,516 @@ +(dp0 +S'NM_000368.4:c.733C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'TSC1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_000359.1:p.(Arg245Ter)' +p18 +sS'slr' +p19 +S'NP_000359.1:p.(R245*)' +p20 +ssS'submitted_variant' +p21 +S'9-135796754-G-A' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_000368.4:c.733C>T' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000009.11:g.135796754G>A' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr9' +p37 +sS'ref' +p38 +VG +p39 +sS'pos' +p40 +S'135796754' +p41 +sS'alt' +p42 +VA +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000009.12:g.132921367G>A' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'132921367' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000009.11:g.135796754G>A' +p51 +sg34 +(dp52 +g36 +S'9' +p53 +sg38 +g39 +sg40 +S'135796754' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000009.12:g.132921367G>A' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'132921367' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4' +p65 +sssS'NM_001162426.1:c.733C>T' +p66 +(dp67 +g3 +g4 +sg5 +(lp68 +S'RefSeqGene record not available' +p69 +asg8 +g4 +sg9 +(lp70 +sg11 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA +p71 +sg13 +S'TSC1' +p72 +sg15 +(dp73 +g17 +S'NP_001155898.1:p.(Arg245Ter)' +p74 +sg19 +S'NP_001155898.1:p.(R245*)' +p75 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001162426.1:c.733C>T' +p76 +sg27 +g4 +sg28 +(dp77 +S'hg19' +p78 +(dp79 +g32 +S'NC_000009.11:g.135796754G>A' +p80 +sg34 +(dp81 +g36 +g37 +sg38 +g39 +sg40 +S'135796754' +p82 +sg42 +g43 +sssg44 +(dp83 +g32 +S'NC_000009.12:g.132921367G>A' +p84 +sg34 +(dp85 +g36 +g37 +sg38 +g39 +sg40 +S'132921367' +p86 +sg42 +g43 +sssS'grch37' +p87 +(dp88 +g32 +S'NC_000009.11:g.135796754G>A' +p89 +sg34 +(dp90 +g36 +g53 +sg38 +g39 +sg40 +S'135796754' +p91 +sg42 +g43 +sssS'grch38' +p92 +(dp93 +g32 +S'NC_000009.12:g.132921367G>A' +p94 +sg34 +(dp95 +g36 +g53 +sg38 +g39 +sg40 +S'132921367' +p96 +sg42 +g43 +ssssg60 +(dp97 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1' +p98 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1' +p99 +sssS'flag' +p100 +S'gene_variant' +p101 +sS'NM_001362177.1:c.370C>T' +p102 +(dp103 +g3 +g4 +sg5 +(lp104 +S'RefSeqGene record not available' +p105 +asg8 +g4 +sg9 +(lp106 +sg11 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA +p107 +sg13 +S'TSC1' +p108 +sg15 +(dp109 +g17 +S'NP_001349106.1:p.(Arg124Ter)' +p110 +sg19 +S'NP_001349106.1:p.(R124*)' +p111 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001362177.1:c.370C>T' +p112 +sg27 +g4 +sg28 +(dp113 +S'hg19' +p114 +(dp115 +g32 +S'NC_000009.11:g.135796754G>A' +p116 +sg34 +(dp117 +g36 +g37 +sg38 +g39 +sg40 +S'135796754' +p118 +sg42 +g43 +sssg44 +(dp119 +g32 +S'NC_000009.12:g.132921367G>A' +p120 +sg34 +(dp121 +g36 +g37 +sg38 +g39 +sg40 +S'132921367' +p122 +sg42 +g43 +sssS'grch37' +p123 +(dp124 +g32 +S'NC_000009.11:g.135796754G>A' +p125 +sg34 +(dp126 +g36 +g53 +sg38 +g39 +sg40 +S'135796754' +p127 +sg42 +g43 +sssS'grch38' +p128 +(dp129 +g32 +S'NC_000009.12:g.132921367G>A' +p130 +sg34 +(dp131 +g36 +g53 +sg38 +g39 +sg40 +S'132921367' +p132 +sg42 +g43 +ssssg60 +(dp133 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1' +p134 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1' +p135 +sssS'NM_001162427.1:c.580C>T' +p136 +(dp137 +g3 +g4 +sg5 +(lp138 +S'RefSeqGene record not available' +p139 +asg8 +g4 +sg9 +(lp140 +sg11 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA +p141 +sg13 +S'TSC1' +p142 +sg15 +(dp143 +g17 +S'NP_001155899.1:p.(Arg194Ter)' +p144 +sg19 +S'NP_001155899.1:p.(R194*)' +p145 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001162427.1:c.580C>T' +p146 +sg27 +g4 +sg28 +(dp147 +S'hg19' +p148 +(dp149 +g32 +S'NC_000009.11:g.135796754G>A' +p150 +sg34 +(dp151 +g36 +g37 +sg38 +g39 +sg40 +S'135796754' +p152 +sg42 +g43 +sssg44 +(dp153 +g32 +S'NC_000009.12:g.132921367G>A' +p154 +sg34 +(dp155 +g36 +g37 +sg38 +g39 +sg40 +S'132921367' +p156 +sg42 +g43 +sssS'grch37' +p157 +(dp158 +g32 +S'NC_000009.11:g.135796754G>A' +p159 +sg34 +(dp160 +g36 +g53 +sg38 +g39 +sg40 +S'135796754' +p161 +sg42 +g43 +sssS'grch38' +p162 +(dp163 +g32 +S'NC_000009.12:g.132921367G>A' +p164 +sg34 +(dp165 +g36 +g53 +sg38 +g39 +sg40 +S'132921367' +p166 +sg42 +g43 +ssssg60 +(dp167 +g62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1' +p168 +sg64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1' +p169 +sssS'metadata' +p170 +(dp171 +S'variantvalidator_hgvs_version' +p172 +S'1.1.3' +p173 +sS'uta_schema' +p174 +S'uta_20180821' +p175 +sS'seqrepo_db' +p176 +S'2018-08-21' +p177 +sS'variantvalidator_version' +p178 +S'v0.2' +p179 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant312.txt b/VariantValidator/testing/testOutputsMasterITS/variant312.txt new file mode 100644 index 00000000..0ccd7dc4 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant312.txt @@ -0,0 +1,216 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_005247.2:c.616del' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'NW_003571046.1:g.10391AC>A automapped to NW_003571046.1:g.10396delC' +p19 +aS'RefSeqGene record not available' +p20 +asS'refseqgene_context_intronic_sequence' +p21 +g16 +sS'alt_genomic_loci' +p22 +(lp23 +(dp24 +S'grch37' +p25 +(dp26 +S'hgvs_genomic_description' +p27 +S'NW_003571046.1:g.10392del' +p28 +sS'vcf' +p29 +(dp30 +S'chr' +p31 +S'HG536_PATCH' +p32 +sS'ref' +p33 +S'AC' +p34 +sS'pos' +p35 +S'10391' +p36 +sS'alt' +p37 +S'A' +p38 +sssa(dp39 +S'hg19' +p40 +(dp41 +g27 +S'NW_003571046.1:g.10392del' +p42 +sg29 +(dp43 +g31 +S'NW_003571046.1' +p44 +sg33 +S'AC' +p45 +sg35 +S'10391' +p46 +sg37 +g38 +sssasS'transcript_description' +p47 +VHomo sapiens fibroblast growth factor 3 (FGF3), mRNA +p48 +sS'gene_symbol' +p49 +S'FGF3' +p50 +sS'hgvs_predicted_protein_consequence' +p51 +(dp52 +S'tlr' +p53 +S'NP_005238.1:p.(Val206SerfsTer117)' +p54 +sS'slr' +p55 +S'NP_005238.1:p.(V206Sfs*117)' +p56 +ssS'submitted_variant' +p57 +S'HG536_PATCH-10391-AC-A' +p58 +sS'genome_context_intronic_sequence' +p59 +g16 +sS'hgvs_lrg_variant' +p60 +g16 +sS'hgvs_transcript_variant' +p61 +S'NM_005247.2:c.616del' +p62 +sS'hgvs_refseqgene_variant' +p63 +g16 +sS'primary_assembly_loci' +p64 +(dp65 +S'grch38' +p66 +(dp67 +g27 +S'NC_000011.10:g.69810409del' +p68 +sg29 +(dp69 +g31 +S'11' +p70 +sg33 +S'AC' +p71 +sg35 +S'69810408' +p72 +sg37 +g38 +sssS'grch37' +p73 +(dp74 +g27 +S'NC_000011.9:g.69625177del' +p75 +sg29 +(dp76 +g31 +g70 +sg33 +S'AC' +p77 +sg35 +S'69625176' +p78 +sg37 +g38 +sssS'hg38' +p79 +(dp80 +g27 +S'NC_000011.10:g.69810409del' +p81 +sg29 +(dp82 +g31 +S'chr11' +p83 +sg33 +S'AC' +p84 +sg35 +S'69810408' +p85 +sg37 +g38 +sssS'hg19' +p86 +(dp87 +g27 +S'NC_000011.9:g.69625177del' +p88 +sg29 +(dp89 +g31 +g83 +sg33 +S'AC' +p90 +sg35 +S'69625176' +p91 +sg37 +g38 +ssssS'reference_sequence_records' +p92 +(dp93 +S'protein' +p94 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005238.1' +p95 +sS'transcript' +p96 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005247.2' +p97 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant313.txt b/VariantValidator/testing/testOutputsMasterITS/variant313.txt new file mode 100644 index 00000000..7149a06b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant313.txt @@ -0,0 +1,635 @@ +(dp0 +S'NR_110766.1:n.833+969C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +(dp11 +S'grch37' +p12 +(dp13 +S'hgvs_genomic_description' +p14 +S'NW_004070871.1:g.33547G>A' +p15 +sS'vcf' +p16 +(dp17 +S'chr' +p18 +S'HG865_PATCH' +p19 +sS'ref' +p20 +VG +p21 +sS'pos' +p22 +S'33547' +p23 +sS'alt' +p24 +VA +p25 +sssa(dp26 +S'hg19' +p27 +(dp28 +g14 +S'NW_004070871.1:g.33547G>A' +p29 +sg16 +(dp30 +g18 +S'NW_004070871.1' +p31 +sg20 +g21 +sg22 +S'33547' +p32 +sg24 +g25 +sssasS'transcript_description' +p33 +VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 3, non-coding RNA +p34 +sS'gene_symbol' +p35 +S'SHANK2' +p36 +sS'hgvs_predicted_protein_consequence' +p37 +(dp38 +S'tlr' +p39 +S'Non-coding :n.' +p40 +sS'slr' +p41 +g40 +ssS'submitted_variant' +p42 +S'HG865_PATCH-33547-G-A' +p43 +sS'genome_context_intronic_sequence' +p44 +S'NC_000011.9(NR_110766.1):c.833+969C>T' +p45 +sS'hgvs_lrg_variant' +p46 +g4 +sS'hgvs_transcript_variant' +p47 +S'NR_110766.1:n.833+969C>T' +p48 +sS'hgvs_refseqgene_variant' +p49 +g4 +sS'primary_assembly_loci' +p50 +(dp51 +S'grch38' +p52 +(dp53 +g14 +S'NC_000011.10:g.70489334G>A' +p54 +sg16 +(dp55 +g18 +S'11' +p56 +sg20 +g21 +sg22 +S'70489334' +p57 +sg24 +g25 +sssS'grch37' +p58 +(dp59 +g14 +S'NC_000011.9:g.70335439G>A' +p60 +sg16 +(dp61 +g18 +g56 +sg20 +g21 +sg22 +S'70335439' +p62 +sg24 +g25 +sssS'hg38' +p63 +(dp64 +g14 +S'NC_000011.10:g.70489334G>A' +p65 +sg16 +(dp66 +g18 +S'chr11' +p67 +sg20 +g21 +sg22 +S'70489334' +p68 +sg24 +g25 +sssS'hg19' +p69 +(dp70 +g14 +S'NC_000011.9:g.70335439G>A' +p71 +sg16 +(dp72 +g18 +g67 +sg20 +g21 +sg22 +S'70335439' +p73 +sg24 +g25 +ssssS'reference_sequence_records' +p74 +(dp75 +S'transcript' +p76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NR_110766.1' +p77 +sssS'NM_012309.4:c.2566C>T' +p78 +(dp79 +g3 +g4 +sg5 +(lp80 +S'RefSeqGene record not available' +p81 +asg8 +g4 +sg9 +(lp82 +(dp83 +S'grch37' +p84 +(dp85 +g14 +S'NW_004070871.1:g.33547G>A' +p86 +sg16 +(dp87 +g18 +g19 +sg20 +g21 +sg22 +S'33547' +p88 +sg24 +g25 +sssa(dp89 +S'hg19' +p90 +(dp91 +g14 +S'NW_004070871.1:g.33547G>A' +p92 +sg16 +(dp93 +g18 +S'NW_004070871.1' +p94 +sg20 +g21 +sg22 +S'33547' +p95 +sg24 +g25 +sssasg33 +VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA +p96 +sg35 +S'SHANK2' +p97 +sg37 +(dp98 +g39 +S'NP_036441.2:p.(Leu856=)' +p99 +sg41 +S'NP_036441.2:p.(L856=)' +p100 +ssg42 +g43 +sg44 +g4 +sg46 +g4 +sg47 +S'NM_012309.4:c.2566C>T' +p101 +sg49 +g4 +sg50 +(dp102 +S'grch38' +p103 +(dp104 +g14 +S'NC_000011.10:g.70489334G>A' +p105 +sg16 +(dp106 +g18 +g56 +sg20 +g21 +sg22 +S'70489334' +p107 +sg24 +g25 +sssS'grch37' +p108 +(dp109 +g14 +S'NC_000011.9:g.70336423G>A' +p110 +sg16 +(dp111 +g18 +g56 +sg20 +g21 +sg22 +S'70336423' +p112 +sg24 +g25 +sssg63 +(dp113 +g14 +S'NC_000011.10:g.70489334G>A' +p114 +sg16 +(dp115 +g18 +g67 +sg20 +g21 +sg22 +S'70489334' +p116 +sg24 +g25 +sssS'hg19' +p117 +(dp118 +g14 +S'NC_000011.9:g.70336423G>A' +p119 +sg16 +(dp120 +g18 +g67 +sg20 +g21 +sg22 +S'70336423' +p121 +sg24 +g25 +ssssg74 +(dp122 +S'protein' +p123 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2' +p124 +sg76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4' +p125 +sssS'NM_133266.4:c.802C>T' +p126 +(dp127 +g3 +g4 +sg5 +(lp128 +S'RefSeqGene record not available' +p129 +asg8 +g4 +sg9 +(lp130 +(dp131 +S'grch37' +p132 +(dp133 +g14 +S'NW_004070871.1:g.33547G>A' +p134 +sg16 +(dp135 +g18 +g19 +sg20 +g21 +sg22 +S'33547' +p136 +sg24 +g25 +sssa(dp137 +S'hg19' +p138 +(dp139 +g14 +S'NW_004070871.1:g.33547G>A' +p140 +sg16 +(dp141 +g18 +S'NW_004070871.1' +p142 +sg20 +g21 +sg22 +S'33547' +p143 +sg24 +g25 +sssasg33 +VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA +p144 +sg35 +S'SHANK2' +p145 +sg37 +(dp146 +g39 +S'NP_573573.2:p.(Leu268=)' +p147 +sg41 +S'NP_573573.2:p.(L268=)' +p148 +ssg42 +g43 +sg44 +g4 +sg46 +g4 +sg47 +S'NM_133266.4:c.802C>T' +p149 +sg49 +g4 +sg50 +(dp150 +S'grch38' +p151 +(dp152 +g14 +S'NC_000011.10:g.70489334G>A' +p153 +sg16 +(dp154 +g18 +g56 +sg20 +g21 +sg22 +S'70489334' +p155 +sg24 +g25 +sssS'grch37' +p156 +(dp157 +g14 +S'NC_000011.9:g.70335439G>A' +p158 +sg16 +(dp159 +g18 +g56 +sg20 +g21 +sg22 +S'70335439' +p160 +sg24 +g25 +sssg63 +(dp161 +g14 +S'NC_000011.10:g.70489334G>A' +p162 +sg16 +(dp163 +g18 +g67 +sg20 +g21 +sg22 +S'70489334' +p164 +sg24 +g25 +sssS'hg19' +p165 +(dp166 +g14 +S'NC_000011.9:g.70335439G>A' +p167 +sg16 +(dp168 +g18 +g67 +sg20 +g21 +sg22 +S'70335439' +p169 +sg24 +g25 +ssssg74 +(dp170 +g123 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2' +p171 +sg76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.4' +p172 +sssS'flag' +p173 +S'gene_variant' +p174 +sS'NM_133266.3:c.802C>T' +p175 +(dp176 +g3 +g4 +sg5 +(lp177 +S'A more recent version of the selected reference sequence NM_133266.3 is available (NM_133266.4)' +p178 +aS'NM_133266.4:c.802C>T MUST be fully validated prior to use in reports' +p179 +aS'select_variants=NM_133266.4:c.802C>T' +p180 +aS'RefSeqGene record not available' +p181 +asg8 +g4 +sg9 +(lp182 +(dp183 +S'grch37' +p184 +(dp185 +g14 +S'NW_004070871.1:g.33547G>A' +p186 +sg16 +(dp187 +g18 +g19 +sg20 +g21 +sg22 +S'33547' +p188 +sg24 +g25 +sssa(dp189 +S'hg19' +p190 +(dp191 +g14 +S'NW_004070871.1:g.33547G>A' +p192 +sg16 +(dp193 +g18 +S'NW_004070871.1' +p194 +sg20 +g21 +sg22 +S'33547' +p195 +sg24 +g25 +sssasg33 +VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA +p196 +sg35 +S'SHANK2' +p197 +sg37 +(dp198 +g39 +S'NP_573573.2:p.(Leu268=)' +p199 +sg41 +S'NP_573573.2:p.(L268=)' +p200 +ssg42 +g43 +sg44 +g4 +sg46 +g4 +sg47 +S'NM_133266.3:c.802C>T' +p201 +sg49 +g4 +sg50 +(dp202 +S'hg19' +p203 +(dp204 +g14 +S'NC_000011.9:g.70335439G>A' +p205 +sg16 +(dp206 +g18 +g67 +sg20 +g21 +sg22 +S'70335439' +p207 +sg24 +g25 +sssS'grch37' +p208 +(dp209 +g14 +S'NC_000011.9:g.70335439G>A' +p210 +sg16 +(dp211 +g18 +g56 +sg20 +g21 +sg22 +S'70335439' +p212 +sg24 +g25 +ssssg74 +(dp213 +g123 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2' +p214 +sg76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.3' +p215 +sssS'metadata' +p216 +(dp217 +S'variantvalidator_hgvs_version' +p218 +S'1.1.3' +p219 +sS'uta_schema' +p220 +S'uta_20180821' +p221 +sS'seqrepo_db' +p222 +S'2018-08-21' +p223 +sS'variantvalidator_version' +p224 +S'v0.2' +p225 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant314.txt b/VariantValidator/testing/testOutputsMasterITS/variant314.txt new file mode 100644 index 00000000..42975a4a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant314.txt @@ -0,0 +1,179 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_012309.4:c.960C>A' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +aS'NM_012309.4:c.960C>A cannot be mapped directly to genome build GRCh37' +p10 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g6 +sS'alt_genomic_loci' +p13 +(lp14 +(dp15 +S'grch37' +p16 +(dp17 +S'hgvs_genomic_description' +p18 +S'NW_004070871.1:g.569441G>T' +p19 +sS'vcf' +p20 +(dp21 +S'chr' +p22 +S'HG865_PATCH' +p23 +sS'ref' +p24 +VG +p25 +sS'pos' +p26 +S'569441' +p27 +sS'alt' +p28 +VT +p29 +sssa(dp30 +S'hg19' +p31 +(dp32 +g18 +S'NW_004070871.1:g.569441G>T' +p33 +sg20 +(dp34 +g22 +S'NW_004070871.1' +p35 +sg24 +g25 +sg26 +S'569441' +p36 +sg28 +g29 +sssasS'transcript_description' +p37 +VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA +p38 +sS'gene_symbol' +p39 +S'SHANK2' +p40 +sS'hgvs_predicted_protein_consequence' +p41 +(dp42 +S'tlr' +p43 +S'NP_036441.2:p.(Tyr320Ter)' +p44 +sS'slr' +p45 +S'NP_036441.2:p.(Y320*)' +p46 +ssS'submitted_variant' +p47 +S'HG865_PATCH-569441-G-T' +p48 +sS'genome_context_intronic_sequence' +p49 +g6 +sS'hgvs_lrg_variant' +p50 +g6 +sS'hgvs_transcript_variant' +p51 +S'NM_012309.4:c.960C>A' +p52 +sS'hgvs_refseqgene_variant' +p53 +g6 +sS'primary_assembly_loci' +p54 +(dp55 +S'grch38' +p56 +(dp57 +g18 +S'NC_000011.10:g.71075228G>T' +p58 +sg20 +(dp59 +g22 +S'11' +p60 +sg24 +g25 +sg26 +S'71075228' +p61 +sg28 +g29 +sssS'hg38' +p62 +(dp63 +g18 +S'NC_000011.10:g.71075228G>T' +p64 +sg20 +(dp65 +g22 +S'chr11' +p66 +sg24 +g25 +sg26 +S'71075228' +p67 +sg28 +g29 +ssssS'reference_sequence_records' +p68 +(dp69 +S'protein' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2' +p71 +sS'transcript' +p72 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4' +p73 +sssS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant315.txt b/VariantValidator/testing/testOutputsMasterITS/variant315.txt new file mode 100644 index 00000000..9601b8bd --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant315.txt @@ -0,0 +1,180 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_012309.4:c.913-5058G>A' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +aS'NM_012309.4:c.913-5058G>A cannot be mapped directly to genome build GRCh37' +p10 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g6 +sS'alt_genomic_loci' +p13 +(lp14 +(dp15 +S'grch37' +p16 +(dp17 +S'hgvs_genomic_description' +p18 +S'NW_004070871.1:g.574546C>T' +p19 +sS'vcf' +p20 +(dp21 +S'chr' +p22 +S'HG865_PATCH' +p23 +sS'ref' +p24 +VC +p25 +sS'pos' +p26 +S'574546' +p27 +sS'alt' +p28 +VT +p29 +sssa(dp30 +S'hg19' +p31 +(dp32 +g18 +S'NW_004070871.1:g.574546C>T' +p33 +sg20 +(dp34 +g22 +S'NW_004070871.1' +p35 +sg24 +g25 +sg26 +S'574546' +p36 +sg28 +g29 +sssasS'transcript_description' +p37 +VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA +p38 +sS'gene_symbol' +p39 +S'SHANK2' +p40 +sS'hgvs_predicted_protein_consequence' +p41 +(dp42 +S'tlr' +p43 +S'NP_036441.2:p.?' +p44 +sS'slr' +p45 +S'NP_036441.2:p.?' +p46 +ssS'submitted_variant' +p47 +S'HG865_PATCH-574546-C-T' +p48 +sS'genome_context_intronic_sequence' +p49 +S'NC_000011.10(NM_012309.4):c.913-5058G>A' +p50 +sS'hgvs_lrg_variant' +p51 +g6 +sS'hgvs_transcript_variant' +p52 +S'NM_012309.4:c.913-5058G>A' +p53 +sS'hgvs_refseqgene_variant' +p54 +g6 +sS'primary_assembly_loci' +p55 +(dp56 +S'grch38' +p57 +(dp58 +g18 +S'NC_000011.10:g.71080333C>T' +p59 +sg20 +(dp60 +g22 +S'11' +p61 +sg24 +g25 +sg26 +S'71080333' +p62 +sg28 +g29 +sssS'hg38' +p63 +(dp64 +g18 +S'NC_000011.10:g.71080333C>T' +p65 +sg20 +(dp66 +g22 +S'chr11' +p67 +sg24 +g25 +sg26 +S'71080333' +p68 +sg28 +g29 +ssssS'reference_sequence_records' +p69 +(dp70 +S'protein' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2' +p72 +sS'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4' +p74 +sssS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant316.txt b/VariantValidator/testing/testOutputsMasterITS/variant316.txt new file mode 100644 index 00000000..e6ec98b8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant316.txt @@ -0,0 +1,182 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_020699.2:c.802_803insTT' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'NW_003315905.1:g.133178TAG>T automapped to NW_003315905.1:g.133179_133180delAG' +p19 +aS'A more recent version of the selected reference sequence NM_020699.2 is available (NM_020699.3)' +p20 +aS'NM_020699.3:c.802_803insTT MUST be fully validated prior to use in reports' +p21 +aS'select_variants=NM_020699.3:c.802_803insTT' +p22 +aS'RefSeqGene record not available' +p23 +asS'refseqgene_context_intronic_sequence' +p24 +g16 +sS'alt_genomic_loci' +p25 +(lp26 +sS'transcript_description' +p27 +VHomo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA +p28 +sS'gene_symbol' +p29 +S'GATAD2B' +p30 +sS'hgvs_predicted_protein_consequence' +p31 +(dp32 +S'tlr' +p33 +S'NP_065750.1:p.(Pro268LeufsTer26)' +p34 +sS'slr' +p35 +S'NP_065750.1:p.(P268Lfs*26)' +p36 +ssS'submitted_variant' +p37 +S'HSCHR1_1_CTG31-133178-TAG-T' +p38 +sS'genome_context_intronic_sequence' +p39 +g16 +sS'hgvs_lrg_variant' +p40 +g16 +sS'hgvs_transcript_variant' +p41 +S'NM_020699.2:c.802_803insTT' +p42 +sS'hgvs_refseqgene_variant' +p43 +g16 +sS'primary_assembly_loci' +p44 +(dp45 +S'hg19' +p46 +(dp47 +S'hgvs_genomic_description' +p48 +S'NC_000001.10:g.153789945_153789946delinsGAAG' +p49 +sS'vcf' +p50 +(dp51 +S'chr' +p52 +S'chr1' +p53 +sS'ref' +p54 +S'G' +p55 +sS'pos' +p56 +S'153789945' +p57 +sS'alt' +p58 +VGAA +p59 +sssS'hg38' +p60 +(dp61 +g48 +S'NC_000001.11:g.153817469_153817470insAA' +p62 +sg50 +(dp63 +g52 +g53 +sg54 +g55 +sg56 +S'153817469' +p64 +sg58 +VGAA +p65 +sssS'grch37' +p66 +(dp67 +g48 +S'NC_000001.10:g.153789945_153789946delinsGAAG' +p68 +sg50 +(dp69 +g52 +S'1' +p70 +sg54 +g55 +sg56 +S'153789945' +p71 +sg58 +VGAA +p72 +sssS'grch38' +p73 +(dp74 +g48 +S'NC_000001.11:g.153817469_153817470insAA' +p75 +sg50 +(dp76 +g52 +g70 +sg54 +g55 +sg56 +S'153817469' +p77 +sg58 +VGAA +p78 +ssssS'reference_sequence_records' +p79 +(dp80 +S'protein' +p81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1' +p82 +sS'transcript' +p83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2' +p84 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant317.txt b/VariantValidator/testing/testOutputsMasterITS/variant317.txt new file mode 100644 index 00000000..6c1b3a14 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant317.txt @@ -0,0 +1,293 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_021983.4:c.490G>C' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +aS'NM_021983.4:c.490G>C cannot be mapped directly to genome build GRCh37' +p20 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p21 +asS'refseqgene_context_intronic_sequence' +p22 +g16 +sS'alt_genomic_loci' +p23 +(lp24 +(dp25 +S'grch37' +p26 +(dp27 +S'hgvs_genomic_description' +p28 +S'NT_167246.1:g.3848158T>G' +p29 +sS'vcf' +p30 +(dp31 +S'chr' +p32 +S'HSCHR6_MHC_MANN_CTG1' +p33 +sS'ref' +p34 +S'T' +p35 +sS'pos' +p36 +S'3848158' +p37 +sS'alt' +p38 +VG +p39 +sssa(dp40 +S'hg19' +p41 +(dp42 +g28 +S'NT_167246.1:g.3848158T>G' +p43 +sg30 +(dp44 +g32 +S'chr6_mann_hap4' +p45 +sg34 +g35 +sg36 +S'3848158' +p46 +sg38 +g39 +sssa(dp47 +S'grch38' +p48 +(dp49 +g28 +S'NT_167246.2:g.3842538T>G' +p50 +sg30 +(dp51 +g32 +g33 +sg34 +g35 +sg36 +S'3842538' +p52 +sg38 +g39 +sssa(dp53 +S'hg38' +p54 +(dp55 +g28 +S'NT_167246.2:g.3842538T>G' +p56 +sg30 +(dp57 +g32 +S'chr6_GL000253v2_alt' +p58 +sg34 +g35 +sg36 +S'3842538' +p59 +sg38 +g39 +sssa(dp60 +S'grch37' +p61 +(dp62 +g28 +S'NT_167247.1:g.3884432C>G' +p63 +sg30 +(dp64 +g32 +S'HSCHR6_MHC_MCF_CTG1' +p65 +sg34 +VC +p66 +sg36 +S'3884432' +p67 +sg38 +g39 +sssa(dp68 +S'hg19' +p69 +(dp70 +g28 +S'NT_167247.1:g.3884432C>G' +p71 +sg30 +(dp72 +g32 +S'chr6_mcf_hap5' +p73 +sg34 +g66 +sg36 +S'3884432' +p74 +sg38 +g39 +sssa(dp75 +S'grch37' +p76 +(dp77 +g28 +S'NT_167249.1:g.3852542C>G' +p78 +sg30 +(dp79 +g32 +S'HSCHR6_MHC_SSTO_CTG1' +p80 +sg34 +g66 +sg36 +S'3852542' +p81 +sg38 +g39 +sssa(dp82 +S'hg19' +p83 +(dp84 +g28 +S'NT_167249.1:g.3852542C>G' +p85 +sg30 +(dp86 +g32 +S'chr6_ssto_hap7' +p87 +sg34 +g66 +sg36 +S'3852542' +p88 +sg38 +g39 +sssa(dp89 +S'grch38' +p90 +(dp91 +g28 +S'NT_167249.2:g.3853244C>G' +p92 +sg30 +(dp93 +g32 +g80 +sg34 +g66 +sg36 +S'3853244' +p94 +sg38 +g39 +sssa(dp95 +g54 +(dp96 +g28 +S'NT_167249.2:g.3853244C>G' +p97 +sg30 +(dp98 +g32 +S'chr6_GL000256v2_alt' +p99 +sg34 +g66 +sg36 +S'3853244' +p100 +sg38 +g39 +sssasS'transcript_description' +p101 +VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA +p102 +sS'gene_symbol' +p103 +S'HLA-DRB4' +p104 +sS'hgvs_predicted_protein_consequence' +p105 +(dp106 +S'tlr' +p107 +S'NP_068818.4:p.(Gly164Arg)' +p108 +sS'slr' +p109 +S'NP_068818.4:p.(G164R)' +p110 +ssS'submitted_variant' +p111 +S'HSCHR6_MHC_MANN_CTG1-3848158-T-G' +p112 +sS'genome_context_intronic_sequence' +p113 +g16 +sS'hgvs_lrg_variant' +p114 +g16 +sS'hgvs_transcript_variant' +p115 +S'NM_021983.4:c.490G>C' +p116 +sS'hgvs_refseqgene_variant' +p117 +g16 +sS'primary_assembly_loci' +p118 +(dp119 +sS'reference_sequence_records' +p120 +(dp121 +S'protein' +p122 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4' +p123 +sS'transcript' +p124 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4' +p125 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant318.txt b/VariantValidator/testing/testOutputsMasterITS/variant318.txt new file mode 100644 index 00000000..49d77588 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant318.txt @@ -0,0 +1,292 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_021983.4:c.346G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +aS'NM_021983.4:c.346G>T cannot be mapped directly to genome build GRCh37' +p10 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g6 +sS'alt_genomic_loci' +p13 +(lp14 +(dp15 +S'grch37' +p16 +(dp17 +S'hgvs_genomic_description' +p18 +S'NT_167246.1:g.3851043C>A' +p19 +sS'vcf' +p20 +(dp21 +S'chr' +p22 +S'HSCHR6_MHC_MANN_CTG1' +p23 +sS'ref' +p24 +VC +p25 +sS'pos' +p26 +S'3851043' +p27 +sS'alt' +p28 +VA +p29 +sssa(dp30 +S'hg19' +p31 +(dp32 +g18 +S'NT_167246.1:g.3851043C>A' +p33 +sg20 +(dp34 +g22 +S'chr6_mann_hap4' +p35 +sg24 +g25 +sg26 +S'3851043' +p36 +sg28 +g29 +sssa(dp37 +S'grch38' +p38 +(dp39 +g18 +S'NT_167246.2:g.3845423C>A' +p40 +sg20 +(dp41 +g22 +g23 +sg24 +g25 +sg26 +S'3845423' +p42 +sg28 +g29 +sssa(dp43 +S'hg38' +p44 +(dp45 +g18 +S'NT_167246.2:g.3845423C>A' +p46 +sg20 +(dp47 +g22 +S'chr6_GL000253v2_alt' +p48 +sg24 +g25 +sg26 +S'3845423' +p49 +sg28 +g29 +sssa(dp50 +S'grch37' +p51 +(dp52 +g18 +S'NT_167247.1:g.3887313C>A' +p53 +sg20 +(dp54 +g22 +S'HSCHR6_MHC_MCF_CTG1' +p55 +sg24 +g25 +sg26 +S'3887313' +p56 +sg28 +g29 +sssa(dp57 +S'hg19' +p58 +(dp59 +g18 +S'NT_167247.1:g.3887313C>A' +p60 +sg20 +(dp61 +g22 +S'chr6_mcf_hap5' +p62 +sg24 +g25 +sg26 +S'3887313' +p63 +sg28 +g29 +sssa(dp64 +S'grch37' +p65 +(dp66 +g18 +S'NT_167249.1:g.3855423C>A' +p67 +sg20 +(dp68 +g22 +S'HSCHR6_MHC_SSTO_CTG1' +p69 +sg24 +g25 +sg26 +S'3855423' +p70 +sg28 +g29 +sssa(dp71 +S'hg19' +p72 +(dp73 +g18 +S'NT_167249.1:g.3855423C>A' +p74 +sg20 +(dp75 +g22 +S'chr6_ssto_hap7' +p76 +sg24 +g25 +sg26 +S'3855423' +p77 +sg28 +g29 +sssa(dp78 +S'grch38' +p79 +(dp80 +g18 +S'NT_167249.2:g.3856125C>A' +p81 +sg20 +(dp82 +g22 +g69 +sg24 +g25 +sg26 +S'3856125' +p83 +sg28 +g29 +sssa(dp84 +g44 +(dp85 +g18 +S'NT_167249.2:g.3856125C>A' +p86 +sg20 +(dp87 +g22 +S'chr6_GL000256v2_alt' +p88 +sg24 +g25 +sg26 +S'3856125' +p89 +sg28 +g29 +sssasS'transcript_description' +p90 +VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA +p91 +sS'gene_symbol' +p92 +S'HLA-DRB4' +p93 +sS'hgvs_predicted_protein_consequence' +p94 +(dp95 +S'tlr' +p96 +S'NP_068818.4:p.(Glu116Ter)' +p97 +sS'slr' +p98 +S'NP_068818.4:p.(E116*)' +p99 +ssS'submitted_variant' +p100 +S'HSCHR6_MHC_MANN_CTG1-3851043-C-A' +p101 +sS'genome_context_intronic_sequence' +p102 +g6 +sS'hgvs_lrg_variant' +p103 +g6 +sS'hgvs_transcript_variant' +p104 +S'NM_021983.4:c.346G>T' +p105 +sS'hgvs_refseqgene_variant' +p106 +g6 +sS'primary_assembly_loci' +p107 +(dp108 +sS'reference_sequence_records' +p109 +(dp110 +S'protein' +p111 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4' +p112 +sS'transcript' +p113 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4' +p114 +sssS'metadata' +p115 +(dp116 +S'variantvalidator_hgvs_version' +p117 +S'1.1.3' +p118 +sS'uta_schema' +p119 +S'uta_20180821' +p120 +sS'seqrepo_db' +p121 +S'2018-08-21' +p122 +sS'variantvalidator_version' +p123 +S'v0.2' +p124 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant319.txt b/VariantValidator/testing/testOutputsMasterITS/variant319.txt new file mode 100644 index 00000000..d663d806 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant319.txt @@ -0,0 +1,287 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_001097642.2:c.-16-441C>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens gap junction protein beta 1 (GJB1), transcript variant 1, mRNA +p14 +sS'gene_symbol' +p15 +S'GJB1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_001091111.1:p.?' +p20 +sS'slr' +p21 +S'NP_001091111.1:p.?' +p22 +ssS'submitted_variant' +p23 +S'X-70443101-C-T' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000023.10(NM_001097642.2):c.-16-441C>T' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_001097642.2:c.-16-441C>T' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000023.10:g.70443101C>T' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chrX' +p40 +sS'ref' +p41 +S'C' +p42 +sS'pos' +p43 +S'70443101' +p44 +sS'alt' +p45 +S'T' +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000023.11:g.71223251C>T' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'71223251' +p51 +sg45 +g46 +sssS'grch37' +p52 +(dp53 +g35 +S'NC_000023.10:g.70443101C>T' +p54 +sg37 +(dp55 +g39 +S'X' +p56 +sg41 +g42 +sg43 +S'70443101' +p57 +sg45 +g46 +sssS'grch38' +p58 +(dp59 +g35 +S'NC_000023.11:g.71223251C>T' +p60 +sg37 +(dp61 +g39 +g56 +sg41 +g42 +sg43 +S'71223251' +p62 +sg45 +g46 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001091111.1' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001097642.2' +p68 +sssS'NM_000166.5:c.-101C>T' +p69 +(dp70 +g5 +g6 +sg7 +(lp71 +S'RefSeqGene record not available' +p72 +asg10 +g6 +sg11 +(lp73 +sg13 +VHomo sapiens gap junction protein beta 1 (GJB1), transcript variant 2, mRNA +p74 +sg15 +S'GJB1' +p75 +sg17 +(dp76 +g19 +S'NP_000157.1:p.?' +p77 +sg21 +S'NP_000157.1:p.?' +p78 +ssg23 +g24 +sg25 +g6 +sg27 +g6 +sg28 +S'NM_000166.5:c.-101C>T' +p79 +sg30 +g6 +sg31 +(dp80 +S'hg19' +p81 +(dp82 +g35 +S'NC_000023.10:g.70443101C>T' +p83 +sg37 +(dp84 +g39 +g40 +sg41 +g42 +sg43 +S'70443101' +p85 +sg45 +g46 +sssg47 +(dp86 +g35 +S'NC_000023.11:g.71223251C>T' +p87 +sg37 +(dp88 +g39 +g40 +sg41 +g42 +sg43 +S'71223251' +p89 +sg45 +g46 +sssS'grch37' +p90 +(dp91 +g35 +S'NC_000023.10:g.70443101C>T' +p92 +sg37 +(dp93 +g39 +g56 +sg41 +g42 +sg43 +S'70443101' +p94 +sg45 +g46 +sssS'grch38' +p95 +(dp96 +g35 +S'NC_000023.11:g.71223251C>T' +p97 +sg37 +(dp98 +g39 +g56 +sg41 +g42 +sg43 +S'71223251' +p99 +sg45 +g46 +ssssg63 +(dp100 +g65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000157.1' +p101 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000166.5' +p102 +sssS'metadata' +p103 +(dp104 +S'variantvalidator_hgvs_version' +p105 +S'1.1.3' +p106 +sS'uta_schema' +p107 +S'uta_20180821' +p108 +sS'seqrepo_db' +p109 +S'2018-08-21' +p110 +sS'variantvalidator_version' +p111 +S'v0.2' +p112 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant32.txt b/VariantValidator/testing/testOutputsMasterITS/variant32.txt new file mode 100644 index 00000000..54d25dc8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant32.txt @@ -0,0 +1,172 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589-1G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.?' +p20 +sS'slr' +p21 +S'NP_000079.2:p.?' +p22 +ssS'submitted_variant' +p23 +S'NM_000088.3:c.589-1G>T' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000017.10(NM_000088.3):c.589-1G>T' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000088.3:c.589-1G>T' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000017.10:g.48275364C>A' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr17' +p40 +sS'ref' +p41 +VC +p42 +sS'pos' +p43 +S'48275364' +p44 +sS'alt' +p45 +VA +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000017.11:g.50198003C>A' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'50198003' +p51 +sg45 +g46 +sssS'grch37' +p52 +(dp53 +g35 +S'NC_000017.10:g.48275364C>A' +p54 +sg37 +(dp55 +g39 +S'17' +p56 +sg41 +g42 +sg43 +S'48275364' +p57 +sg45 +g46 +sssS'grch38' +p58 +(dp59 +g35 +S'NC_000017.11:g.50198003C>A' +p60 +sg37 +(dp61 +g39 +g56 +sg41 +g42 +sg43 +S'50198003' +p62 +sg45 +g46 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p68 +sssS'metadata' +p69 +(dp70 +S'variantvalidator_hgvs_version' +p71 +S'1.1.3' +p72 +sS'uta_schema' +p73 +S'uta_20180821' +p74 +sS'seqrepo_db' +p75 +S'2018-08-21' +p76 +sS'variantvalidator_version' +p77 +S'v0.2' +p78 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant320.txt b/VariantValidator/testing/testOutputsMasterITS/variant320.txt new file mode 100644 index 00000000..2bd34839 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant320.txt @@ -0,0 +1,555 @@ +(dp0 +S'NM_033380.2:c.2130_2135del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'Multiple ALT sequences detected' +p7 +aS'auto-submitting all possible combinations' +p8 +aS'NC_000023.10:g.107845202GACCACC>G automapped to NC_000023.10:g.107845203_107845208del' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g4 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 2, mRNA +p15 +sS'gene_symbol' +p16 +S'COL4A5' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_203699.1:p.(Pro711_Pro712del)' +p21 +sS'slr' +p22 +S'NP_203699.1:p.(P711_P712del)' +p23 +ssS'submitted_variant' +p24 +S'X-107845202-GACCACC-GACC,G' +p25 +sS'genome_context_intronic_sequence' +p26 +g4 +sS'hgvs_lrg_variant' +p27 +g4 +sS'hgvs_transcript_variant' +p28 +S'NM_033380.2:c.2130_2135del' +p29 +sS'hgvs_refseqgene_variant' +p30 +g4 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000023.10:g.107845203_107845208del' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chrX' +p40 +sS'ref' +p41 +S'GACCACC' +p42 +sS'pos' +p43 +S'107845202' +p44 +sS'alt' +p45 +S'G' +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000023.11:g.108601973_108601978del' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +S'GACCACC' +p51 +sg43 +S'108601972' +p52 +sg45 +g46 +sssS'grch37' +p53 +(dp54 +g35 +S'NC_000023.10:g.107845203_107845208del' +p55 +sg37 +(dp56 +g39 +S'X' +p57 +sg41 +S'GACCACC' +p58 +sg43 +S'107845202' +p59 +sg45 +g46 +sssS'grch38' +p60 +(dp61 +g35 +S'NC_000023.11:g.108601973_108601978del' +p62 +sg37 +(dp63 +g39 +g57 +sg41 +S'GACCACC' +p64 +sg43 +S'108601972' +p65 +sg45 +g46 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_203699.1' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033380.2' +p71 +sssS'NM_000495.4:c.2130_2135del' +p72 +(dp73 +g3 +g4 +sg5 +(lp74 +S'Multiple ALT sequences detected' +p75 +aS'auto-submitting all possible combinations' +p76 +aS'NC_000023.10:g.107845202GACCACC>G automapped to NC_000023.10:g.107845203_107845208del' +p77 +aS'RefSeqGene record not available' +p78 +asg11 +g4 +sg12 +(lp79 +sg14 +VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 1, mRNA +p80 +sg16 +S'COL4A5' +p81 +sg18 +(dp82 +g20 +S'NP_000486.1:p.(Pro711_Pro712del)' +p83 +sg22 +S'NP_000486.1:p.(P711_P712del)' +p84 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_000495.4:c.2130_2135del' +p85 +sg30 +g4 +sg31 +(dp86 +S'hg19' +p87 +(dp88 +g35 +S'NC_000023.10:g.107845203_107845208del' +p89 +sg37 +(dp90 +g39 +g40 +sg41 +S'GACCACC' +p91 +sg43 +S'107845202' +p92 +sg45 +g46 +sssg47 +(dp93 +g35 +S'NC_000023.11:g.108601973_108601978del' +p94 +sg37 +(dp95 +g39 +g40 +sg41 +S'GACCACC' +p96 +sg43 +S'108601972' +p97 +sg45 +g46 +sssS'grch37' +p98 +(dp99 +g35 +S'NC_000023.10:g.107845203_107845208del' +p100 +sg37 +(dp101 +g39 +g57 +sg41 +S'GACCACC' +p102 +sg43 +S'107845202' +p103 +sg45 +g46 +sssS'grch38' +p104 +(dp105 +g35 +S'NC_000023.11:g.108601973_108601978del' +p106 +sg37 +(dp107 +g39 +g57 +sg41 +S'GACCACC' +p108 +sg43 +S'108601972' +p109 +sg45 +g46 +ssssg66 +(dp110 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1' +p111 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4' +p112 +sssS'flag' +p113 +S'gene_variant' +p114 +sS'NM_000495.4:c.2133_2135del' +p115 +(dp116 +g3 +g4 +sg5 +(lp117 +S'Multiple ALT sequences detected' +p118 +aS'auto-submitting all possible combinations' +p119 +aS'NC_000023.10:g.107845202GACCACC>GACC automapped to NC_000023.10:g.107845206_107845208delACC' +p120 +aS'RefSeqGene record not available' +p121 +asg11 +g4 +sg12 +(lp122 +sg14 +VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 1, mRNA +p123 +sg16 +S'COL4A5' +p124 +sg18 +(dp125 +g20 +S'NP_000486.1:p.(Pro712del)' +p126 +sg22 +S'NP_000486.1:p.(P712del)' +p127 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_000495.4:c.2133_2135del' +p128 +sg30 +g4 +sg31 +(dp129 +S'hg19' +p130 +(dp131 +g35 +S'NC_000023.10:g.107845206_107845208del' +p132 +sg37 +(dp133 +g39 +g40 +sg41 +S'GACC' +p134 +sg43 +S'107845202' +p135 +sg45 +g46 +sssg47 +(dp136 +g35 +S'NC_000023.11:g.108601976_108601978del' +p137 +sg37 +(dp138 +g39 +g40 +sg41 +S'GACC' +p139 +sg43 +S'108601972' +p140 +sg45 +g46 +sssS'grch37' +p141 +(dp142 +g35 +S'NC_000023.10:g.107845206_107845208del' +p143 +sg37 +(dp144 +g39 +g57 +sg41 +S'GACC' +p145 +sg43 +S'107845202' +p146 +sg45 +g46 +sssS'grch38' +p147 +(dp148 +g35 +S'NC_000023.11:g.108601976_108601978del' +p149 +sg37 +(dp150 +g39 +g57 +sg41 +S'GACC' +p151 +sg43 +S'108601972' +p152 +sg45 +g46 +ssssg66 +(dp153 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1' +p154 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4' +p155 +sssS'NM_033380.2:c.2133_2135del' +p156 +(dp157 +g3 +g4 +sg5 +(lp158 +S'Multiple ALT sequences detected' +p159 +aS'auto-submitting all possible combinations' +p160 +aS'NC_000023.10:g.107845202GACCACC>GACC automapped to NC_000023.10:g.107845206_107845208delACC' +p161 +aS'RefSeqGene record not available' +p162 +asg11 +g4 +sg12 +(lp163 +sg14 +VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 2, mRNA +p164 +sg16 +S'COL4A5' +p165 +sg18 +(dp166 +g20 +S'NP_203699.1:p.(Pro712del)' +p167 +sg22 +S'NP_203699.1:p.(P712del)' +p168 +ssg24 +g25 +sg26 +g4 +sg27 +g4 +sg28 +S'NM_033380.2:c.2133_2135del' +p169 +sg30 +g4 +sg31 +(dp170 +S'hg19' +p171 +(dp172 +g35 +S'NC_000023.10:g.107845206_107845208del' +p173 +sg37 +(dp174 +g39 +g40 +sg41 +S'GACC' +p175 +sg43 +S'107845202' +p176 +sg45 +g46 +sssg47 +(dp177 +g35 +S'NC_000023.11:g.108601976_108601978del' +p178 +sg37 +(dp179 +g39 +g40 +sg41 +S'GACC' +p180 +sg43 +S'108601972' +p181 +sg45 +g46 +sssS'grch37' +p182 +(dp183 +g35 +S'NC_000023.10:g.107845206_107845208del' +p184 +sg37 +(dp185 +g39 +g57 +sg41 +S'GACC' +p186 +sg43 +S'107845202' +p187 +sg45 +g46 +sssS'grch38' +p188 +(dp189 +g35 +S'NC_000023.11:g.108601976_108601978del' +p190 +sg37 +(dp191 +g39 +g57 +sg41 +S'GACC' +p192 +sg43 +S'108601972' +p193 +sg45 +g46 +ssssg66 +(dp194 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_203699.1' +p195 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033380.2' +p196 +sssS'metadata' +p197 +(dp198 +S'variantvalidator_hgvs_version' +p199 +S'1.1.3' +p200 +sS'uta_schema' +p201 +S'uta_20180821' +p202 +sS'seqrepo_db' +p203 +S'2018-08-21' +p204 +sS'variantvalidator_version' +p205 +S'v0.2' +p206 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant321.txt b/VariantValidator/testing/testOutputsMasterITS/variant321.txt new file mode 100644 index 00000000..f3204457 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant321.txt @@ -0,0 +1,513 @@ +(dp0 +S'NM_004992.3:c.502C>T' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +(dp11 +S'grch37' +p12 +(dp13 +S'hgvs_genomic_description' +p14 +S'NW_003871103.3:g.1465305G>A' +p15 +sS'vcf' +p16 +(dp17 +S'chr' +p18 +S'HG1497_PATCH' +p19 +sS'ref' +p20 +VG +p21 +sS'pos' +p22 +S'1465305' +p23 +sS'alt' +p24 +VA +p25 +sssa(dp26 +S'hg19' +p27 +(dp28 +g14 +S'NW_003871103.3:g.1465305G>A' +p29 +sg16 +(dp30 +g18 +S'NW_003871103.3' +p31 +sg20 +g21 +sg22 +S'1465305' +p32 +sg24 +g25 +sssasS'transcript_description' +p33 +VHomo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 1, mRNA +p34 +sS'gene_symbol' +p35 +S'MECP2' +p36 +sS'hgvs_predicted_protein_consequence' +p37 +(dp38 +S'tlr' +p39 +S'NP_004983.1:p.(Arg168Ter)' +p40 +sS'slr' +p41 +S'NP_004983.1:p.(R168*)' +p42 +ssS'submitted_variant' +p43 +S'X-153296777-G-A' +p44 +sS'genome_context_intronic_sequence' +p45 +g4 +sS'hgvs_lrg_variant' +p46 +g4 +sS'hgvs_transcript_variant' +p47 +S'NM_004992.3:c.502C>T' +p48 +sS'hgvs_refseqgene_variant' +p49 +g4 +sS'primary_assembly_loci' +p50 +(dp51 +S'hg19' +p52 +(dp53 +g14 +S'NC_000023.10:g.153296777G>A' +p54 +sg16 +(dp55 +g18 +S'chrX' +p56 +sg20 +g21 +sg22 +S'153296777' +p57 +sg24 +g25 +sssS'hg38' +p58 +(dp59 +g14 +S'NC_000023.11:g.154031326G>A' +p60 +sg16 +(dp61 +g18 +g56 +sg20 +g21 +sg22 +S'154031326' +p62 +sg24 +g25 +sssS'grch37' +p63 +(dp64 +g14 +S'NC_000023.10:g.153296777G>A' +p65 +sg16 +(dp66 +g18 +S'X' +p67 +sg20 +g21 +sg22 +S'153296777' +p68 +sg24 +g25 +sssS'grch38' +p69 +(dp70 +g14 +S'NC_000023.11:g.154031326G>A' +p71 +sg16 +(dp72 +g18 +g67 +sg20 +g21 +sg22 +S'154031326' +p73 +sg24 +g25 +ssssS'reference_sequence_records' +p74 +(dp75 +S'protein' +p76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004983.1' +p77 +sS'transcript' +p78 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004992.3' +p79 +sssS'flag' +p80 +S'gene_variant' +p81 +sS'NM_001316337.1:c.223C>T' +p82 +(dp83 +g3 +g4 +sg5 +(lp84 +S'RefSeqGene record not available' +p85 +asg8 +g4 +sg9 +(lp86 +(dp87 +S'grch37' +p88 +(dp89 +g14 +S'NW_003871103.3:g.1465305G>A' +p90 +sg16 +(dp91 +g18 +g19 +sg20 +g21 +sg22 +S'1465305' +p92 +sg24 +g25 +sssa(dp93 +S'hg19' +p94 +(dp95 +g14 +S'NW_003871103.3:g.1465305G>A' +p96 +sg16 +(dp97 +g18 +S'NW_003871103.3' +p98 +sg20 +g21 +sg22 +S'1465305' +p99 +sg24 +g25 +sssasg33 +VHomo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 3, mRNA +p100 +sg35 +S'MECP2' +p101 +sg37 +(dp102 +g39 +S'NP_001303266.1:p.(Arg75Ter)' +p103 +sg41 +S'NP_001303266.1:p.(R75*)' +p104 +ssg43 +g44 +sg45 +g4 +sg46 +g4 +sg47 +S'NM_001316337.1:c.223C>T' +p105 +sg49 +g4 +sg50 +(dp106 +S'hg19' +p107 +(dp108 +g14 +S'NC_000023.10:g.153296777G>A' +p109 +sg16 +(dp110 +g18 +g56 +sg20 +g21 +sg22 +S'153296777' +p111 +sg24 +g25 +sssg58 +(dp112 +g14 +S'NC_000023.11:g.154031326G>A' +p113 +sg16 +(dp114 +g18 +g56 +sg20 +g21 +sg22 +S'154031326' +p115 +sg24 +g25 +sssS'grch37' +p116 +(dp117 +g14 +S'NC_000023.10:g.153296777G>A' +p118 +sg16 +(dp119 +g18 +g67 +sg20 +g21 +sg22 +S'153296777' +p120 +sg24 +g25 +sssS'grch38' +p121 +(dp122 +g14 +S'NC_000023.11:g.154031326G>A' +p123 +sg16 +(dp124 +g18 +g67 +sg20 +g21 +sg22 +S'154031326' +p125 +sg24 +g25 +ssssg74 +(dp126 +g76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001303266.1' +p127 +sg78 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001316337.1' +p128 +sssS'metadata' +p129 +(dp130 +S'variantvalidator_hgvs_version' +p131 +S'1.1.3' +p132 +sS'uta_schema' +p133 +S'uta_20180821' +p134 +sS'seqrepo_db' +p135 +S'2018-08-21' +p136 +sS'variantvalidator_version' +p137 +S'v0.2' +p138 +ssS'NM_001110792.1:c.538C>T' +p139 +(dp140 +g3 +g4 +sg5 +(lp141 +S'RefSeqGene record not available' +p142 +asg8 +g4 +sg9 +(lp143 +(dp144 +S'grch37' +p145 +(dp146 +g14 +S'NW_003871103.3:g.1465305G>A' +p147 +sg16 +(dp148 +g18 +g19 +sg20 +g21 +sg22 +S'1465305' +p149 +sg24 +g25 +sssa(dp150 +S'hg19' +p151 +(dp152 +g14 +S'NW_003871103.3:g.1465305G>A' +p153 +sg16 +(dp154 +g18 +S'NW_003871103.3' +p155 +sg20 +g21 +sg22 +S'1465305' +p156 +sg24 +g25 +sssasg33 +VHomo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 2, mRNA +p157 +sg35 +S'MECP2' +p158 +sg37 +(dp159 +g39 +S'NP_001104262.1:p.(Arg180Ter)' +p160 +sg41 +S'NP_001104262.1:p.(R180*)' +p161 +ssg43 +g44 +sg45 +g4 +sg46 +g4 +sg47 +S'NM_001110792.1:c.538C>T' +p162 +sg49 +g4 +sg50 +(dp163 +S'hg19' +p164 +(dp165 +g14 +S'NC_000023.10:g.153296777G>A' +p166 +sg16 +(dp167 +g18 +g56 +sg20 +g21 +sg22 +S'153296777' +p168 +sg24 +g25 +sssg58 +(dp169 +g14 +S'NC_000023.11:g.154031326G>A' +p170 +sg16 +(dp171 +g18 +g56 +sg20 +g21 +sg22 +S'154031326' +p172 +sg24 +g25 +sssS'grch37' +p173 +(dp174 +g14 +S'NC_000023.10:g.153296777G>A' +p175 +sg16 +(dp176 +g18 +g67 +sg20 +g21 +sg22 +S'153296777' +p177 +sg24 +g25 +sssS'grch38' +p178 +(dp179 +g14 +S'NC_000023.11:g.154031326G>A' +p180 +sg16 +(dp181 +g18 +g67 +sg20 +g21 +sg22 +S'154031326' +p182 +sg24 +g25 +ssssg74 +(dp183 +g76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001104262.1' +p184 +sg78 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001110792.1' +p185 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant322.txt b/VariantValidator/testing/testOutputsMasterITS/variant322.txt new file mode 100644 index 00000000..ea61adba --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant322.txt @@ -0,0 +1,176 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_198180.2:c.408_410del' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'Cannot identify an in-frame Termination codon in the variant mRNA sequence' +p19 +aS'RefSeqGene record not available' +p20 +asS'refseqgene_context_intronic_sequence' +p21 +g16 +sS'alt_genomic_loci' +p22 +(lp23 +sS'transcript_description' +p24 +VHomo sapiens pyroglutamylated RFamide peptide (QRFP), mRNA +p25 +sS'gene_symbol' +p26 +S'QRFP' +p27 +sS'hgvs_predicted_protein_consequence' +p28 +(dp29 +S'tlr' +p30 +S'NP_937823.1:p.?' +p31 +sS'slr' +p32 +S'NP_937823.1:p.?' +p33 +ssS'submitted_variant' +p34 +S'NM_198180.2:c.408_410delGTG' +p35 +sS'genome_context_intronic_sequence' +p36 +g16 +sS'hgvs_lrg_variant' +p37 +g16 +sS'hgvs_transcript_variant' +p38 +S'NM_198180.2:c.408_410del' +p39 +sS'hgvs_refseqgene_variant' +p40 +g16 +sS'primary_assembly_loci' +p41 +(dp42 +S'hg19' +p43 +(dp44 +S'hgvs_genomic_description' +p45 +S'NC_000009.11:g.133768816_133768818del' +p46 +sS'vcf' +p47 +(dp48 +S'chr' +p49 +S'chr9' +p50 +sS'ref' +p51 +S'TCAC' +p52 +sS'pos' +p53 +S'133768815' +p54 +sS'alt' +p55 +S'T' +p56 +sssS'hg38' +p57 +(dp58 +g45 +S'NC_000009.12:g.130893429_130893431del' +p59 +sg47 +(dp60 +g49 +g50 +sg51 +S'TCAC' +p61 +sg53 +S'130893428' +p62 +sg55 +g56 +sssS'grch37' +p63 +(dp64 +g45 +S'NC_000009.11:g.133768816_133768818del' +p65 +sg47 +(dp66 +g49 +S'9' +p67 +sg51 +S'TCAC' +p68 +sg53 +S'133768815' +p69 +sg55 +g56 +sssS'grch38' +p70 +(dp71 +g45 +S'NC_000009.12:g.130893429_130893431del' +p72 +sg47 +(dp73 +g49 +g67 +sg51 +S'TCAC' +p74 +sg53 +S'130893428' +p75 +sg55 +g56 +ssssS'reference_sequence_records' +p76 +(dp77 +S'protein' +p78 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_937823.1' +p79 +sS'transcript' +p80 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198180.2' +p81 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant323.txt b/VariantValidator/testing/testOutputsMasterITS/variant323.txt new file mode 100644 index 00000000..b12158b2 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant323.txt @@ -0,0 +1,174 @@ +(dp0 +S'NM_080877.2:c.1733_1735delinsTTT' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA +p12 +sS'gene_symbol' +p13 +S'SLC34A3' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_543153.1:p.(Pro578_Lys579delinsLeuTer)' +p18 +sS'slr' +p19 +S'NP_543153.1:p.(P578_K579delinsL*)' +p20 +ssS'submitted_variant' +p21 +S'NM_080877.2:c.1733_1735delinsTTT' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_080877.2:c.1733_1735delinsTTT' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000009.11:g.140130801_140130803delinsTTT' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr9' +p37 +sS'ref' +p38 +S'CGA' +p39 +sS'pos' +p40 +S'140130801' +p41 +sS'alt' +p42 +S'TTT' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000009.12:g.137236349_137236351delinsTTT' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +S'CGA' +p48 +sg40 +S'137236349' +p49 +sg42 +g43 +sssS'grch37' +p50 +(dp51 +g32 +S'NC_000009.11:g.140130801_140130803delinsTTT' +p52 +sg34 +(dp53 +g36 +S'9' +p54 +sg38 +S'CGA' +p55 +sg40 +S'140130801' +p56 +sg42 +g43 +sssS'grch38' +p57 +(dp58 +g32 +S'NC_000009.12:g.137236349_137236351delinsTTT' +p59 +sg34 +(dp60 +g36 +g54 +sg38 +S'CGA' +p61 +sg40 +S'137236349' +p62 +sg42 +g43 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2' +p68 +sssS'flag' +p69 +S'gene_variant' +p70 +sS'metadata' +p71 +(dp72 +S'variantvalidator_hgvs_version' +p73 +S'1.1.3' +p74 +sS'uta_schema' +p75 +S'uta_20180821' +p76 +sS'seqrepo_db' +p77 +S'2018-08-21' +p78 +sS'variantvalidator_version' +p79 +S'v0.2' +p80 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant324.txt b/VariantValidator/testing/testOutputsMasterITS/variant324.txt new file mode 100644 index 00000000..a7f2bd3d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant324.txt @@ -0,0 +1,174 @@ +(dp0 +S'NM_080877.2:c.1735_1737delinsTGA' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA +p12 +sS'gene_symbol' +p13 +S'SLC34A3' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_543153.1:p.(Lys579Ter)' +p18 +sS'slr' +p19 +S'NP_543153.1:p.(K579*)' +p20 +ssS'submitted_variant' +p21 +S'NM_080877.2:c.1735_1737delinsTGA' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_080877.2:c.1735_1737delinsTGA' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000009.11:g.140130803_140130805delinsTGA' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr9' +p37 +sS'ref' +p38 +S'AAG' +p39 +sS'pos' +p40 +S'140130803' +p41 +sS'alt' +p42 +S'TGA' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000009.12:g.137236351_137236353delinsTGA' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +S'AAG' +p48 +sg40 +S'137236351' +p49 +sg42 +g43 +sssS'grch37' +p50 +(dp51 +g32 +S'NC_000009.11:g.140130803_140130805delinsTGA' +p52 +sg34 +(dp53 +g36 +S'9' +p54 +sg38 +S'AAG' +p55 +sg40 +S'140130803' +p56 +sg42 +g43 +sssS'grch38' +p57 +(dp58 +g32 +S'NC_000009.12:g.137236351_137236353delinsTGA' +p59 +sg34 +(dp60 +g36 +g54 +sg38 +S'AAG' +p61 +sg40 +S'137236351' +p62 +sg42 +g43 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2' +p68 +sssS'flag' +p69 +S'gene_variant' +p70 +sS'metadata' +p71 +(dp72 +S'variantvalidator_hgvs_version' +p73 +S'1.1.3' +p74 +sS'uta_schema' +p75 +S'uta_20180821' +p76 +sS'seqrepo_db' +p77 +S'2018-08-21' +p78 +sS'variantvalidator_version' +p79 +S'v0.2' +p80 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant325.txt b/VariantValidator/testing/testOutputsMasterITS/variant325.txt new file mode 100644 index 00000000..984fed33 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant325.txt @@ -0,0 +1,174 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_080877.2:c.1735_1737delinsTAATTGTTC' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA +p24 +sS'gene_symbol' +p25 +S'SLC34A3' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_543153.1:p.(Lys579Ter)' +p30 +sS'slr' +p31 +S'NP_543153.1:p.(K579*)' +p32 +ssS'submitted_variant' +p33 +S'NM_080877.2:c.1735_1737delinsTAATTGTTC' +p34 +sS'genome_context_intronic_sequence' +p35 +g16 +sS'hgvs_lrg_variant' +p36 +g16 +sS'hgvs_transcript_variant' +p37 +S'NM_080877.2:c.1735_1737delinsTAATTGTTC' +p38 +sS'hgvs_refseqgene_variant' +p39 +g16 +sS'primary_assembly_loci' +p40 +(dp41 +S'hg19' +p42 +(dp43 +S'hgvs_genomic_description' +p44 +S'NC_000009.11:g.140130803_140130805delinsTAATTGTTC' +p45 +sS'vcf' +p46 +(dp47 +S'chr' +p48 +S'chr9' +p49 +sS'ref' +p50 +S'AAG' +p51 +sS'pos' +p52 +S'140130803' +p53 +sS'alt' +p54 +S'TAATTGTTC' +p55 +sssS'hg38' +p56 +(dp57 +g44 +S'NC_000009.12:g.137236351_137236353delinsTAATTGTTC' +p58 +sg46 +(dp59 +g48 +g49 +sg50 +S'AAG' +p60 +sg52 +S'137236351' +p61 +sg54 +g55 +sssS'grch37' +p62 +(dp63 +g44 +S'NC_000009.11:g.140130803_140130805delinsTAATTGTTC' +p64 +sg46 +(dp65 +g48 +S'9' +p66 +sg50 +S'AAG' +p67 +sg52 +S'140130803' +p68 +sg54 +g55 +sssS'grch38' +p69 +(dp70 +g44 +S'NC_000009.12:g.137236351_137236353delinsTAATTGTTC' +p71 +sg46 +(dp72 +g48 +g66 +sg50 +S'AAG' +p73 +sg52 +S'137236351' +p74 +sg54 +g55 +ssssS'reference_sequence_records' +p75 +(dp76 +S'protein' +p77 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1' +p78 +sS'transcript' +p79 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2' +p80 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant326.txt b/VariantValidator/testing/testOutputsMasterITS/variant326.txt new file mode 100644 index 00000000..91ec5c8c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant326.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_080877.2:c.1737delinsATTGTTC' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA +p14 +sS'gene_symbol' +p15 +S'SLC34A3' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_543153.1:p.(Lys579_Ala580insLeuPhe)' +p20 +sS'slr' +p21 +S'NP_543153.1:p.(K579_A580insLF)' +p22 +ssS'submitted_variant' +p23 +S'NM_080877.2:c.1737delinsATTGTTC' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_080877.2:c.1737delinsATTGTTC' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000009.11:g.140130805delinsATTGTTC' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr9' +p39 +sS'ref' +p40 +S'G' +p41 +sS'pos' +p42 +S'140130805' +p43 +sS'alt' +p44 +S'ATTGTTC' +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000009.12:g.137236353delinsATTGTTC' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'137236353' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000009.11:g.140130805delinsATTGTTC' +p53 +sg36 +(dp54 +g38 +S'9' +p55 +sg40 +g41 +sg42 +S'140130805' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000009.12:g.137236353delinsATTGTTC' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'137236353' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2' +p67 +sssS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant327.txt b/VariantValidator/testing/testOutputsMasterITS/variant327.txt new file mode 100644 index 00000000..f2da1be1 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant327.txt @@ -0,0 +1,175 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.4392_*2delinsAGAG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.(Ter1465GluextTer84)' +p20 +sS'slr' +p21 +S'NP_000079.2:p.(*1465Eext*84)' +p22 +ssS'submitted_variant' +p23 +S'NM_000088.3:c.4392_*2delinsAGAG' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000088.3:c.4392_*2delinsAGAG' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000017.10:g.48262861_48262866delinsCTCT' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr17' +p39 +sS'ref' +p40 +S'GTTTAC' +p41 +sS'pos' +p42 +S'48262861' +p43 +sS'alt' +p44 +VCTCT +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000017.11:g.50185500_50185505delinsCTCT' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'GTTTAC' +p50 +sg42 +S'50185500' +p51 +sg44 +VCTCT +p52 +sssS'grch37' +p53 +(dp54 +g34 +S'NC_000017.10:g.48262861_48262866delinsCTCT' +p55 +sg36 +(dp56 +g38 +S'17' +p57 +sg40 +S'GTTTAC' +p58 +sg42 +S'48262861' +p59 +sg44 +g45 +sssS'grch38' +p60 +(dp61 +g34 +S'NC_000017.11:g.50185500_50185505delinsCTCT' +p62 +sg36 +(dp63 +g38 +g57 +sg40 +S'GTTTAC' +p64 +sg42 +S'50185500' +p65 +sg44 +g52 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p71 +sssS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant328.txt b/VariantValidator/testing/testOutputsMasterITS/variant328.txt new file mode 100644 index 00000000..6151bb5b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant328.txt @@ -0,0 +1,175 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589_591delinsAGAAGC' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.(Gly197delinsArgSer)' +p20 +sS'slr' +p21 +S'NP_000079.2:p.(G197delinsRS)' +p22 +ssS'submitted_variant' +p23 +S'NM_000088.3:c.589_591delinsAGAAGC' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000088.3:c.589_591delinsAGAAGC' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000017.10:g.48275361_48275363delinsGCTTCT' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr17' +p39 +sS'ref' +p40 +S'ACC' +p41 +sS'pos' +p42 +S'48275361' +p43 +sS'alt' +p44 +VGCTTCT +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000017.11:g.50198000_50198002delinsGCTTCT' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'ACC' +p50 +sg42 +S'50198000' +p51 +sg44 +VGCTTCT +p52 +sssS'grch37' +p53 +(dp54 +g34 +S'NC_000017.10:g.48275361_48275363delinsGCTTCT' +p55 +sg36 +(dp56 +g38 +S'17' +p57 +sg40 +S'ACC' +p58 +sg42 +S'48275361' +p59 +sg44 +g45 +sssS'grch38' +p60 +(dp61 +g34 +S'NC_000017.11:g.50198000_50198002delinsGCTTCT' +p62 +sg36 +(dp63 +g38 +g57 +sg40 +S'ACC' +p64 +sg42 +S'50198000' +p65 +sg44 +g52 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p71 +sssS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant329.txt b/VariantValidator/testing/testOutputsMasterITS/variant329.txt new file mode 100644 index 00000000..42b8acfb --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant329.txt @@ -0,0 +1,171 @@ +(dp0 +S'NM_000885.5:c.*2536delinsAGAAAAATCA' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens integrin subunit alpha 4 (ITGA4), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'ITGA4' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_000876.3:p.?' +p18 +sS'slr' +p19 +S'NP_000876.3:p.?' +p20 +ssS'submitted_variant' +p21 +S'NM_000885.5:c.*2536delinsAGAAAAATCA' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_000885.5:c.*2536delinsAGAAAAATCA' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000002.11:g.182402790delinsAGAAAAATCA' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr2' +p37 +sS'ref' +p38 +S'G' +p39 +sS'pos' +p40 +S'182402790' +p41 +sS'alt' +p42 +S'AGAAAAATCA' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000002.12:g.181538063delinsAGAAAAATCA' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'181538063' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000002.11:g.182402790delinsAGAAAAATCA' +p51 +sg34 +(dp52 +g36 +S'2' +p53 +sg38 +g39 +sg40 +S'182402790' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000002.12:g.181538063delinsAGAAAAATCA' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'181538063' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000876.3' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000885.5' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant33.txt b/VariantValidator/testing/testOutputsMasterITS/variant33.txt new file mode 100644 index 00000000..79f857c0 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant33.txt @@ -0,0 +1,177 @@ +(dp0 +S'NM_000088.3:c.591_593inv' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p12 +sS'gene_symbol' +p13 +S'COL1A1' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_000079.2:p.(Pro198Asp)' +p18 +sS'slr' +p19 +S'NP_000079.2:p.(P198D)' +p20 +ssS'submitted_variant' +p21 +S'NM_000088.3:c.591_593inv' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_000088.3:c.591_593inv' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000017.10:g.48275359_48275361inv' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr17' +p37 +sS'ref' +p38 +S'GGA' +p39 +sS'pos' +p40 +S'48275359' +p41 +sS'alt' +p42 +S'TCC' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000017.11:g.50197998_50198000inv' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +S'GGA' +p48 +sg40 +S'50197998' +p49 +sg42 +S'TCC' +p50 +sssS'grch37' +p51 +(dp52 +g32 +S'NC_000017.10:g.48275359_48275361inv' +p53 +sg34 +(dp54 +g36 +S'17' +p55 +sg38 +S'GGA' +p56 +sg40 +S'48275359' +p57 +sg42 +S'TCC' +p58 +sssS'grch38' +p59 +(dp60 +g32 +S'NC_000017.11:g.50197998_50198000inv' +p61 +sg34 +(dp62 +g36 +g55 +sg38 +S'GGA' +p63 +sg40 +S'50197998' +p64 +sg42 +S'TCC' +p65 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p71 +sssS'flag' +p72 +S'gene_variant' +p73 +sS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant330.txt b/VariantValidator/testing/testOutputsMasterITS/variant330.txt new file mode 100644 index 00000000..6f4cace8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant330.txt @@ -0,0 +1,175 @@ +(dp0 +S'NM_002693.2:c.-186_-185delinsCC' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'POLG' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_002684.1:p.?' +p18 +sS'slr' +p19 +S'NP_002684.1:p.?' +p20 +ssS'submitted_variant' +p21 +S'NM_002693.2:c.-186_-185delinsCC' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_002693.2:c.-186_-185delinsCC' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'grch38' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000015.10:g.89334698_89334699delinsGG' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'15' +p37 +sS'ref' +p38 +S'CT' +p39 +sS'pos' +p40 +S'89334698' +p41 +sS'alt' +p42 +VGG +p43 +sssS'grch37' +p44 +(dp45 +g32 +S'NC_000015.9:g.89877929_89877930delinsGG' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +S'CT' +p48 +sg40 +S'89877929' +p49 +sg42 +VGG +p50 +sssS'hg38' +p51 +(dp52 +g32 +S'NC_000015.10:g.89334698_89334699delinsGG' +p53 +sg34 +(dp54 +g36 +S'chr15' +p55 +sg38 +S'CT' +p56 +sg40 +S'89334698' +p57 +sg42 +g43 +sssS'hg19' +p58 +(dp59 +g32 +S'NC_000015.9:g.89877929_89877930delinsGG' +p60 +sg34 +(dp61 +g36 +g55 +sg38 +S'CT' +p62 +sg40 +S'89877929' +p63 +sg42 +g50 +ssssS'reference_sequence_records' +p64 +(dp65 +S'protein' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1' +p67 +sS'transcript' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2' +p69 +sssS'flag' +p70 +S'gene_variant' +p71 +sS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant331.txt b/VariantValidator/testing/testOutputsMasterITS/variant331.txt new file mode 100644 index 00000000..daa65a33 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant331.txt @@ -0,0 +1,533 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_001287344.1:c.690_690+1insCTACATAG' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +(dp23 +S'grch37' +p24 +(dp25 +S'hgvs_genomic_description' +p26 +S'NW_004070883.1:g.43848_43849insCTATGTAG' +p27 +sS'vcf' +p28 +(dp29 +S'chr' +p30 +S'HG1439_PATCH' +p31 +sS'ref' +p32 +S'C' +p33 +sS'pos' +p34 +S'43848' +p35 +sS'alt' +p36 +VCCTATGTAG +p37 +sssa(dp38 +S'hg19' +p39 +(dp40 +g26 +S'NW_004070883.1:g.43848_43849insCTATGTAG' +p41 +sg28 +(dp42 +g30 +S'NW_004070883.1' +p43 +sg32 +g33 +sg34 +S'43848' +p44 +sg36 +VCCTATGTAG +p45 +sssasS'transcript_description' +p46 +VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 3, mRNA +p47 +sS'gene_symbol' +p48 +S'BTK' +p49 +sS'hgvs_predicted_protein_consequence' +p50 +(dp51 +S'tlr' +p52 +S'NP_001274273.1:p.?' +p53 +sS'slr' +p54 +S'NP_001274273.1:p.?' +p55 +ssS'submitted_variant' +p56 +S'NG_009616.1:g.29052_29053insCTACATAG' +p57 +sS'genome_context_intronic_sequence' +p58 +S'NC_000023.10(NM_001287344.1):c.690_690+1insCTACATAG' +p59 +sS'hgvs_lrg_variant' +p60 +g16 +sS'hgvs_transcript_variant' +p61 +S'NM_001287344.1:c.690_690+1insCTACATAG' +p62 +sS'hgvs_refseqgene_variant' +p63 +g16 +sS'primary_assembly_loci' +p64 +(dp65 +S'hg19' +p66 +(dp67 +g26 +S'NC_000023.10:g.100617160_100617161insCTATGTAG' +p68 +sg28 +(dp69 +g30 +S'chrX' +p70 +sg32 +g33 +sg34 +S'100617160' +p71 +sg36 +VCCTATGTAG +p72 +sssS'hg38' +p73 +(dp74 +g26 +S'NC_000023.11:g.101362172_101362173insCTATGTAG' +p75 +sg28 +(dp76 +g30 +g70 +sg32 +g33 +sg34 +S'101362172' +p77 +sg36 +VCCTATGTAG +p78 +sssS'grch37' +p79 +(dp80 +g26 +S'NC_000023.10:g.100617160_100617161insCTATGTAG' +p81 +sg28 +(dp82 +g30 +S'X' +p83 +sg32 +g33 +sg34 +S'100617160' +p84 +sg36 +VCCTATGTAG +p85 +sssS'grch38' +p86 +(dp87 +g26 +S'NC_000023.11:g.101362172_101362173insCTATGTAG' +p88 +sg28 +(dp89 +g30 +g83 +sg32 +g33 +sg34 +S'101362172' +p90 +sg36 +VCCTATGTAG +p91 +ssssS'reference_sequence_records' +p92 +(dp93 +S'protein' +p94 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274273.1' +p95 +sS'transcript' +p96 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287344.1' +p97 +sssS'NM_001287345.1:c.588_588+1insCTACATAG' +p98 +(dp99 +g15 +g16 +sg17 +(lp100 +S'RefSeqGene record not available' +p101 +asg20 +g16 +sg21 +(lp102 +(dp103 +S'grch37' +p104 +(dp105 +g26 +S'NW_004070883.1:g.43848_43849insCTATGTAG' +p106 +sg28 +(dp107 +g30 +g31 +sg32 +g33 +sg34 +S'43848' +p108 +sg36 +VCCTATGTAG +p109 +sssa(dp110 +S'hg19' +p111 +(dp112 +g26 +S'NW_004070883.1:g.43848_43849insCTATGTAG' +p113 +sg28 +(dp114 +g30 +S'NW_004070883.1' +p115 +sg32 +g33 +sg34 +S'43848' +p116 +sg36 +VCCTATGTAG +p117 +sssasg46 +VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 2, mRNA +p118 +sg48 +S'BTK' +p119 +sg50 +(dp120 +g52 +S'NP_001274274.1:p.?' +p121 +sg54 +S'NP_001274274.1:p.?' +p122 +ssg56 +g57 +sg58 +S'NC_000023.10(NM_001287345.1):c.588_588+1insCTACATAG' +p123 +sg60 +g16 +sg61 +S'NM_001287345.1:c.588_588+1insCTACATAG' +p124 +sg63 +g16 +sg64 +(dp125 +S'hg19' +p126 +(dp127 +g26 +S'NC_000023.10:g.100617160_100617161insCTATGTAG' +p128 +sg28 +(dp129 +g30 +g70 +sg32 +g33 +sg34 +S'100617160' +p130 +sg36 +VCCTATGTAG +p131 +sssg73 +(dp132 +g26 +S'NC_000023.11:g.101362172_101362173insCTATGTAG' +p133 +sg28 +(dp134 +g30 +g70 +sg32 +g33 +sg34 +S'101362172' +p135 +sg36 +VCCTATGTAG +p136 +sssS'grch37' +p137 +(dp138 +g26 +S'NC_000023.10:g.100617160_100617161insCTATGTAG' +p139 +sg28 +(dp140 +g30 +g83 +sg32 +g33 +sg34 +S'100617160' +p141 +sg36 +VCCTATGTAG +p142 +sssS'grch38' +p143 +(dp144 +g26 +S'NC_000023.11:g.101362172_101362173insCTATGTAG' +p145 +sg28 +(dp146 +g30 +g83 +sg32 +g33 +sg34 +S'101362172' +p147 +sg36 +VCCTATGTAG +p148 +ssssg92 +(dp149 +g94 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274274.1' +p150 +sg96 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287345.1' +p151 +sssS'NM_000061.2:c.588_588+1insCTACATAG' +p152 +(dp153 +g15 +g16 +sg17 +(lp154 +S'RefSeqGene record not available' +p155 +asg20 +g16 +sg21 +(lp156 +(dp157 +S'grch37' +p158 +(dp159 +g26 +S'NW_004070883.1:g.43848_43849insCTATGTAG' +p160 +sg28 +(dp161 +g30 +g31 +sg32 +g33 +sg34 +S'43848' +p162 +sg36 +VCCTATGTAG +p163 +sssa(dp164 +S'hg19' +p165 +(dp166 +g26 +S'NW_004070883.1:g.43848_43849insCTATGTAG' +p167 +sg28 +(dp168 +g30 +S'NW_004070883.1' +p169 +sg32 +g33 +sg34 +S'43848' +p170 +sg36 +VCCTATGTAG +p171 +sssasg46 +VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA +p172 +sg48 +S'BTK' +p173 +sg50 +(dp174 +g52 +S'NP_000052.1:p.?' +p175 +sg54 +S'NP_000052.1:p.?' +p176 +ssg56 +g57 +sg58 +S'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' +p177 +sg60 +g16 +sg61 +S'NM_000061.2:c.588_588+1insCTACATAG' +p178 +sg63 +g16 +sg64 +(dp179 +S'hg19' +p180 +(dp181 +g26 +S'NC_000023.10:g.100617160_100617161insCTATGTAG' +p182 +sg28 +(dp183 +g30 +g70 +sg32 +g33 +sg34 +S'100617160' +p184 +sg36 +VCCTATGTAG +p185 +sssg73 +(dp186 +g26 +S'NC_000023.11:g.101362172_101362173insCTATGTAG' +p187 +sg28 +(dp188 +g30 +g70 +sg32 +g33 +sg34 +S'101362172' +p189 +sg36 +VCCTATGTAG +p190 +sssS'grch37' +p191 +(dp192 +g26 +S'NC_000023.10:g.100617160_100617161insCTATGTAG' +p193 +sg28 +(dp194 +g30 +g83 +sg32 +g33 +sg34 +S'100617160' +p195 +sg36 +VCCTATGTAG +p196 +sssS'grch38' +p197 +(dp198 +g26 +S'NC_000023.11:g.101362172_101362173insCTATGTAG' +p199 +sg28 +(dp200 +g30 +g83 +sg32 +g33 +sg34 +S'101362172' +p201 +sg36 +VCCTATGTAG +p202 +ssssg92 +(dp203 +g94 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1' +p204 +sg96 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2' +p205 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant332.txt b/VariantValidator/testing/testOutputsMasterITS/variant332.txt new file mode 100644 index 00000000..d21731b7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant332.txt @@ -0,0 +1,215 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000061.2:c.588_588+1insCTACATAG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +(dp13 +S'grch37' +p14 +(dp15 +S'hgvs_genomic_description' +p16 +S'NW_004070883.1:g.43848_43849insCTATGTAG' +p17 +sS'vcf' +p18 +(dp19 +S'chr' +p20 +S'HG1439_PATCH' +p21 +sS'ref' +p22 +S'C' +p23 +sS'pos' +p24 +S'43848' +p25 +sS'alt' +p26 +VCCTATGTAG +p27 +sssa(dp28 +S'hg19' +p29 +(dp30 +g16 +S'NW_004070883.1:g.43848_43849insCTATGTAG' +p31 +sg18 +(dp32 +g20 +S'NW_004070883.1' +p33 +sg22 +g23 +sg24 +S'43848' +p34 +sg26 +VCCTATGTAG +p35 +sssasS'transcript_description' +p36 +VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA +p37 +sS'gene_symbol' +p38 +S'BTK' +p39 +sS'hgvs_predicted_protein_consequence' +p40 +(dp41 +S'tlr' +p42 +S'NP_000052.1:p.?' +p43 +sS'slr' +p44 +S'NP_000052.1:p.?' +p45 +ssS'submitted_variant' +p46 +S'NM_000061.2:c.588_588+1insCTACATAG' +p47 +sS'genome_context_intronic_sequence' +p48 +S'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' +p49 +sS'hgvs_lrg_variant' +p50 +g6 +sS'hgvs_transcript_variant' +p51 +S'NM_000061.2:c.588_588+1insCTACATAG' +p52 +sS'hgvs_refseqgene_variant' +p53 +g6 +sS'primary_assembly_loci' +p54 +(dp55 +S'hg19' +p56 +(dp57 +g16 +S'NC_000023.10:g.100617160_100617161insCTATGTAG' +p58 +sg18 +(dp59 +g20 +S'chrX' +p60 +sg22 +g23 +sg24 +S'100617160' +p61 +sg26 +VCCTATGTAG +p62 +sssS'hg38' +p63 +(dp64 +g16 +S'NC_000023.11:g.101362172_101362173insCTATGTAG' +p65 +sg18 +(dp66 +g20 +g60 +sg22 +g23 +sg24 +S'101362172' +p67 +sg26 +VCCTATGTAG +p68 +sssS'grch37' +p69 +(dp70 +g16 +S'NC_000023.10:g.100617160_100617161insCTATGTAG' +p71 +sg18 +(dp72 +g20 +S'X' +p73 +sg22 +g23 +sg24 +S'100617160' +p74 +sg26 +VCCTATGTAG +p75 +sssS'grch38' +p76 +(dp77 +g16 +S'NC_000023.11:g.101362172_101362173insCTATGTAG' +p78 +sg18 +(dp79 +g20 +g73 +sg22 +g23 +sg24 +S'101362172' +p80 +sg26 +VCCTATGTAG +p81 +ssssS'reference_sequence_records' +p82 +(dp83 +S'protein' +p84 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1' +p85 +sS'transcript' +p86 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2' +p87 +sssS'metadata' +p88 +(dp89 +S'variantvalidator_hgvs_version' +p90 +S'1.1.3' +p91 +sS'uta_schema' +p92 +S'uta_20180821' +p93 +sS'seqrepo_db' +p94 +S'2018-08-21' +p95 +sS'variantvalidator_version' +p96 +S'v0.2' +p97 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant333.txt b/VariantValidator/testing/testOutputsMasterITS/variant333.txt new file mode 100644 index 00000000..fa35e680 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant333.txt @@ -0,0 +1,142 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000061.2:c.588_589insCTACATAG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'This coding sequence variant description spans at least one intron' +p9 +aS'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g6 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA +p16 +sS'gene_symbol' +p17 +S'BTK' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_000052.1:p.(Ile197LeufsTer5)' +p22 +sS'slr' +p23 +S'NP_000052.1:p.(I197Lfs*5)' +p24 +ssS'submitted_variant' +p25 +S'NM_000061.2:c.588_589insCTACATAG' +p26 +sS'genome_context_intronic_sequence' +p27 +g6 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000061.2:c.588_589insCTACATAG' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chrX' +p41 +sS'ref' +p42 +S'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC' +p43 +sS'pos' +p44 +S'100615751' +p45 +sS'alt' +p46 +S'G' +p47 +sssS'grch37' +p48 +(dp49 +g36 +S'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC' +p50 +sg38 +(dp51 +g40 +S'X' +p52 +sg42 +S'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC' +p53 +sg44 +S'100615751' +p54 +sg46 +g47 +ssssS'reference_sequence_records' +p55 +(dp56 +S'protein' +p57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1' +p58 +sS'transcript' +p59 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2' +p60 +sssS'metadata' +p61 +(dp62 +S'variantvalidator_hgvs_version' +p63 +S'1.1.3' +p64 +sS'uta_schema' +p65 +S'uta_20180821' +p66 +sS'seqrepo_db' +p67 +S'2018-08-21' +p68 +sS'variantvalidator_version' +p69 +S'v0.2' +p70 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant34.txt b/VariantValidator/testing/testOutputsMasterITS/variant34.txt new file mode 100644 index 00000000..a1dc82a1 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant34.txt @@ -0,0 +1,259 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_000518.5:c.20A>T' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens hemoglobin subunit beta (HBB), mRNA +p24 +sS'gene_symbol' +p25 +S'HBB' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_000509.1:p.(Glu7Val)' +p30 +sS'slr' +p31 +S'NP_000509.1:p.(E7V)' +p32 +ssS'submitted_variant' +p33 +S'11-5248232-T-A' +p34 +sS'genome_context_intronic_sequence' +p35 +g16 +sS'hgvs_lrg_variant' +p36 +g16 +sS'hgvs_transcript_variant' +p37 +S'NM_000518.5:c.20A>T' +p38 +sS'hgvs_refseqgene_variant' +p39 +g16 +sS'primary_assembly_loci' +p40 +(dp41 +S'hg19' +p42 +(dp43 +S'hgvs_genomic_description' +p44 +S'NC_000011.9:g.5248232T>A' +p45 +sS'vcf' +p46 +(dp47 +S'chr' +p48 +S'chr11' +p49 +sS'ref' +p50 +VT +p51 +sS'pos' +p52 +S'5248232' +p53 +sS'alt' +p54 +VA +p55 +sssS'grch37' +p56 +(dp57 +g44 +S'NC_000011.9:g.5248232T>A' +p58 +sg46 +(dp59 +g48 +S'11' +p60 +sg50 +g51 +sg52 +S'5248232' +p61 +sg54 +g55 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.5' +p67 +sssS'NM_000518.4:c.20A>T' +p68 +(dp69 +g15 +g16 +sg17 +(lp70 +S'A more recent version of the selected reference sequence NM_000518.4 is available (NM_000518.5)' +p71 +aS'NM_000518.5:c.20A>T MUST be fully validated prior to use in reports' +p72 +aS'select_variants=NM_000518.5:c.20A>T' +p73 +aS'RefSeqGene record not available' +p74 +asg20 +g16 +sg21 +(lp75 +sg23 +VHomo sapiens hemoglobin subunit beta (HBB), mRNA +p76 +sg25 +S'HBB' +p77 +sg27 +(dp78 +g29 +S'NP_000509.1:p.(Glu7Val)' +p79 +sg31 +S'NP_000509.1:p.(E7V)' +p80 +ssg33 +g34 +sg35 +g16 +sg36 +g16 +sg37 +S'NM_000518.4:c.20A>T' +p81 +sg39 +g16 +sg40 +(dp82 +S'grch38' +p83 +(dp84 +g44 +S'NC_000011.10:g.5227002T>A' +p85 +sg46 +(dp86 +g48 +g60 +sg50 +g51 +sg52 +S'5227002' +p87 +sg54 +g55 +sssS'grch37' +p88 +(dp89 +g44 +S'NC_000011.9:g.5248232T>A' +p90 +sg46 +(dp91 +g48 +g60 +sg50 +g51 +sg52 +S'5248232' +p92 +sg54 +g55 +sssS'hg38' +p93 +(dp94 +g44 +S'NC_000011.10:g.5227002T>A' +p95 +sg46 +(dp96 +g48 +g49 +sg50 +g51 +sg52 +S'5227002' +p97 +sg54 +g55 +sssS'hg19' +p98 +(dp99 +g44 +S'NC_000011.9:g.5248232T>A' +p100 +sg46 +(dp101 +g48 +g49 +sg50 +g51 +sg52 +S'5248232' +p102 +sg54 +g55 +ssssg62 +(dp103 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1' +p104 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.4' +p105 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant35.txt b/VariantValidator/testing/testOutputsMasterITS/variant35.txt new file mode 100644 index 00000000..26ca558f --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant35.txt @@ -0,0 +1,172 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589-1G>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.?' +p20 +sS'slr' +p21 +S'NP_000079.2:p.?' +p22 +ssS'submitted_variant' +p23 +S'NG_007400.1(NM_000088.3):c.589-1G>T' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000017.10(NM_000088.3):c.589-1G>T' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000088.3:c.589-1G>T' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000017.10:g.48275364C>A' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr17' +p40 +sS'ref' +p41 +VC +p42 +sS'pos' +p43 +S'48275364' +p44 +sS'alt' +p45 +VA +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000017.11:g.50198003C>A' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'50198003' +p51 +sg45 +g46 +sssS'grch37' +p52 +(dp53 +g35 +S'NC_000017.10:g.48275364C>A' +p54 +sg37 +(dp55 +g39 +S'17' +p56 +sg41 +g42 +sg43 +S'48275364' +p57 +sg45 +g46 +sssS'grch38' +p58 +(dp59 +g35 +S'NC_000017.11:g.50198003C>A' +p60 +sg37 +(dp61 +g39 +g56 +sg41 +g42 +sg43 +S'50198003' +p62 +sg45 +g46 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p68 +sssS'metadata' +p69 +(dp70 +S'variantvalidator_hgvs_version' +p71 +S'1.1.3' +p72 +sS'uta_schema' +p73 +S'uta_20180821' +p74 +sS'seqrepo_db' +p75 +S'2018-08-21' +p76 +sS'variantvalidator_version' +p77 +S'v0.2' +p78 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant36.txt b/VariantValidator/testing/testOutputsMasterITS/variant36.txt new file mode 100644 index 00000000..38461988 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant36.txt @@ -0,0 +1,402 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_182763.2:c.688+403C>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA +p14 +sS'gene_symbol' +p15 +S'MCL1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_877495.1:p.?' +p20 +sS'slr' +p21 +S'NP_877495.1:p.?' +p22 +ssS'submitted_variant' +p23 +S'1:150550916G>A' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000001.10(NM_182763.2):c.688+403C>T' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_182763.2:c.688+403C>T' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000001.10:g.150550916G>A' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr1' +p40 +sS'ref' +p41 +VG +p42 +sS'pos' +p43 +S'150550916' +p44 +sS'alt' +p45 +VA +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000001.11:g.150578440G>A' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'150578440' +p51 +sg45 +g46 +sssS'grch37' +p52 +(dp53 +g35 +S'NC_000001.10:g.150550916G>A' +p54 +sg37 +(dp55 +g39 +S'1' +p56 +sg41 +g42 +sg43 +S'150550916' +p57 +sg45 +g46 +sssS'grch38' +p58 +(dp59 +g35 +S'NC_000001.11:g.150578440G>A' +p60 +sg37 +(dp61 +g39 +g56 +sg41 +g42 +sg43 +S'150578440' +p62 +sg45 +g46 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2' +p68 +sssS'NM_001197320.1:c.281C>T' +p69 +(dp70 +g5 +g6 +sg7 +(lp71 +S'RefSeqGene record not available' +p72 +asg10 +g6 +sg11 +(lp73 +sg13 +VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA +p74 +sg15 +S'MCL1' +p75 +sg17 +(dp76 +g19 +S'NP_001184249.1:p.(Ser94Phe)' +p77 +sg21 +S'NP_001184249.1:p.(S94F)' +p78 +ssg23 +g24 +sg25 +g6 +sg27 +g6 +sg28 +S'NM_001197320.1:c.281C>T' +p79 +sg30 +g6 +sg31 +(dp80 +S'hg19' +p81 +(dp82 +g35 +S'NC_000001.10:g.150550916G>A' +p83 +sg37 +(dp84 +g39 +g40 +sg41 +g42 +sg43 +S'150550916' +p85 +sg45 +g46 +sssg47 +(dp86 +g35 +S'NC_000001.11:g.150578440G>A' +p87 +sg37 +(dp88 +g39 +g40 +sg41 +g42 +sg43 +S'150578440' +p89 +sg45 +g46 +sssS'grch37' +p90 +(dp91 +g35 +S'NC_000001.10:g.150550916G>A' +p92 +sg37 +(dp93 +g39 +g56 +sg41 +g42 +sg43 +S'150550916' +p94 +sg45 +g46 +sssS'grch38' +p95 +(dp96 +g35 +S'NC_000001.11:g.150578440G>A' +p97 +sg37 +(dp98 +g39 +g56 +sg41 +g42 +sg43 +S'150578440' +p99 +sg45 +g46 +ssssg63 +(dp100 +g65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1' +p101 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1' +p102 +sssS'metadata' +p103 +(dp104 +S'variantvalidator_hgvs_version' +p105 +S'1.1.3' +p106 +sS'uta_schema' +p107 +S'uta_20180821' +p108 +sS'seqrepo_db' +p109 +S'2018-08-21' +p110 +sS'variantvalidator_version' +p111 +S'v0.2' +p112 +ssS'NM_021960.4:c.740C>T' +p113 +(dp114 +g5 +g6 +sg7 +(lp115 +S'RefSeqGene record not available' +p116 +asg10 +g6 +sg11 +(lp117 +sg13 +VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA +p118 +sg15 +S'MCL1' +p119 +sg17 +(dp120 +g19 +S'NP_068779.1:p.(Ser247Phe)' +p121 +sg21 +S'NP_068779.1:p.(S247F)' +p122 +ssg23 +g24 +sg25 +g6 +sg27 +g6 +sg28 +S'NM_021960.4:c.740C>T' +p123 +sg30 +g6 +sg31 +(dp124 +S'hg19' +p125 +(dp126 +g35 +S'NC_000001.10:g.150550916G>A' +p127 +sg37 +(dp128 +g39 +g40 +sg41 +g42 +sg43 +S'150550916' +p129 +sg45 +g46 +sssg47 +(dp130 +g35 +S'NC_000001.11:g.150578440G>A' +p131 +sg37 +(dp132 +g39 +g40 +sg41 +g42 +sg43 +S'150578440' +p133 +sg45 +g46 +sssS'grch37' +p134 +(dp135 +g35 +S'NC_000001.10:g.150550916G>A' +p136 +sg37 +(dp137 +g39 +g56 +sg41 +g42 +sg43 +S'150550916' +p138 +sg45 +g46 +sssS'grch38' +p139 +(dp140 +g35 +S'NC_000001.11:g.150578440G>A' +p141 +sg37 +(dp142 +g39 +g56 +sg41 +g42 +sg43 +S'150578440' +p143 +sg45 +g46 +ssssg63 +(dp144 +g65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1' +p145 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4' +p146 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant37.txt b/VariantValidator/testing/testOutputsMasterITS/variant37.txt new file mode 100644 index 00000000..4337a37d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant37.txt @@ -0,0 +1,402 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_182763.2:c.688+403C>T' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA +p14 +sS'gene_symbol' +p15 +S'MCL1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_877495.1:p.?' +p20 +sS'slr' +p21 +S'NP_877495.1:p.?' +p22 +ssS'submitted_variant' +p23 +S'1-150550916-G-A' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000001.10(NM_182763.2):c.688+403C>T' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_182763.2:c.688+403C>T' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000001.10:g.150550916G>A' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr1' +p40 +sS'ref' +p41 +VG +p42 +sS'pos' +p43 +S'150550916' +p44 +sS'alt' +p45 +VA +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000001.11:g.150578440G>A' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'150578440' +p51 +sg45 +g46 +sssS'grch37' +p52 +(dp53 +g35 +S'NC_000001.10:g.150550916G>A' +p54 +sg37 +(dp55 +g39 +S'1' +p56 +sg41 +g42 +sg43 +S'150550916' +p57 +sg45 +g46 +sssS'grch38' +p58 +(dp59 +g35 +S'NC_000001.11:g.150578440G>A' +p60 +sg37 +(dp61 +g39 +g56 +sg41 +g42 +sg43 +S'150578440' +p62 +sg45 +g46 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2' +p68 +sssS'NM_001197320.1:c.281C>T' +p69 +(dp70 +g5 +g6 +sg7 +(lp71 +S'RefSeqGene record not available' +p72 +asg10 +g6 +sg11 +(lp73 +sg13 +VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA +p74 +sg15 +S'MCL1' +p75 +sg17 +(dp76 +g19 +S'NP_001184249.1:p.(Ser94Phe)' +p77 +sg21 +S'NP_001184249.1:p.(S94F)' +p78 +ssg23 +g24 +sg25 +g6 +sg27 +g6 +sg28 +S'NM_001197320.1:c.281C>T' +p79 +sg30 +g6 +sg31 +(dp80 +S'hg19' +p81 +(dp82 +g35 +S'NC_000001.10:g.150550916G>A' +p83 +sg37 +(dp84 +g39 +g40 +sg41 +g42 +sg43 +S'150550916' +p85 +sg45 +g46 +sssg47 +(dp86 +g35 +S'NC_000001.11:g.150578440G>A' +p87 +sg37 +(dp88 +g39 +g40 +sg41 +g42 +sg43 +S'150578440' +p89 +sg45 +g46 +sssS'grch37' +p90 +(dp91 +g35 +S'NC_000001.10:g.150550916G>A' +p92 +sg37 +(dp93 +g39 +g56 +sg41 +g42 +sg43 +S'150550916' +p94 +sg45 +g46 +sssS'grch38' +p95 +(dp96 +g35 +S'NC_000001.11:g.150578440G>A' +p97 +sg37 +(dp98 +g39 +g56 +sg41 +g42 +sg43 +S'150578440' +p99 +sg45 +g46 +ssssg63 +(dp100 +g65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1' +p101 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1' +p102 +sssS'metadata' +p103 +(dp104 +S'variantvalidator_hgvs_version' +p105 +S'1.1.3' +p106 +sS'uta_schema' +p107 +S'uta_20180821' +p108 +sS'seqrepo_db' +p109 +S'2018-08-21' +p110 +sS'variantvalidator_version' +p111 +S'v0.2' +p112 +ssS'NM_021960.4:c.740C>T' +p113 +(dp114 +g5 +g6 +sg7 +(lp115 +S'RefSeqGene record not available' +p116 +asg10 +g6 +sg11 +(lp117 +sg13 +VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA +p118 +sg15 +S'MCL1' +p119 +sg17 +(dp120 +g19 +S'NP_068779.1:p.(Ser247Phe)' +p121 +sg21 +S'NP_068779.1:p.(S247F)' +p122 +ssg23 +g24 +sg25 +g6 +sg27 +g6 +sg28 +S'NM_021960.4:c.740C>T' +p123 +sg30 +g6 +sg31 +(dp124 +S'hg19' +p125 +(dp126 +g35 +S'NC_000001.10:g.150550916G>A' +p127 +sg37 +(dp128 +g39 +g40 +sg41 +g42 +sg43 +S'150550916' +p129 +sg45 +g46 +sssg47 +(dp130 +g35 +S'NC_000001.11:g.150578440G>A' +p131 +sg37 +(dp132 +g39 +g40 +sg41 +g42 +sg43 +S'150578440' +p133 +sg45 +g46 +sssS'grch37' +p134 +(dp135 +g35 +S'NC_000001.10:g.150550916G>A' +p136 +sg37 +(dp137 +g39 +g56 +sg41 +g42 +sg43 +S'150550916' +p138 +sg45 +g46 +sssS'grch38' +p139 +(dp140 +g35 +S'NC_000001.11:g.150578440G>A' +p141 +sg37 +(dp142 +g39 +g56 +sg41 +g42 +sg43 +S'150578440' +p143 +sg45 +g46 +ssssg63 +(dp144 +g65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1' +p145 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4' +p146 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant38.txt b/VariantValidator/testing/testOutputsMasterITS/variant38.txt new file mode 100644 index 00000000..2cbceff8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant38.txt @@ -0,0 +1,82 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' +p7 +aS'For additional assistance, submit NG_008123.1:c.2055+18G>A to VariantValidator' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +g4 +sS'gene_symbol' +p13 +g4 +sS'hgvs_predicted_protein_consequence' +p14 +(dp15 +S'tlr' +p16 +g4 +sS'slr' +p17 +g4 +ssS'submitted_variant' +p18 +S'NG_008123.1(LEPRE1_v003):c.2055+18G>A' +p19 +sS'genome_context_intronic_sequence' +p20 +g4 +sS'hgvs_lrg_variant' +p21 +g4 +sS'hgvs_transcript_variant' +p22 +g4 +sS'hgvs_refseqgene_variant' +p23 +g4 +sS'primary_assembly_loci' +p24 +(dp25 +sS'reference_sequence_records' +p26 +g4 +ssS'flag' +p27 +S'warning' +p28 +sS'metadata' +p29 +(dp30 +S'variantvalidator_hgvs_version' +p31 +S'1.1.3' +p32 +sS'uta_schema' +p33 +S'uta_20180821' +p34 +sS'seqrepo_db' +p35 +S'2018-08-21' +p36 +sS'variantvalidator_version' +p37 +S'v0.2' +p38 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant39.txt b/VariantValidator/testing/testOutputsMasterITS/variant39.txt new file mode 100644 index 00000000..c07a1c1d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant39.txt @@ -0,0 +1,80 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +g4 +sS'gene_symbol' +p12 +g4 +sS'hgvs_predicted_protein_consequence' +p13 +(dp14 +S'tlr' +p15 +g4 +sS'slr' +p16 +g4 +ssS'submitted_variant' +p17 +S'NG_008123.1:c.2055+18G>A' +p18 +sS'genome_context_intronic_sequence' +p19 +g4 +sS'hgvs_lrg_variant' +p20 +g4 +sS'hgvs_transcript_variant' +p21 +g4 +sS'hgvs_refseqgene_variant' +p22 +g4 +sS'primary_assembly_loci' +p23 +(dp24 +sS'reference_sequence_records' +p25 +g4 +ssS'flag' +p26 +S'warning' +p27 +sS'metadata' +p28 +(dp29 +S'variantvalidator_hgvs_version' +p30 +S'1.1.3' +p31 +sS'uta_schema' +p32 +S'uta_20180821' +p33 +sS'seqrepo_db' +p34 +S'2018-08-21' +p35 +sS'variantvalidator_version' +p36 +S'v0.2' +p37 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant4.txt b/VariantValidator/testing/testOutputsMasterITS/variant4.txt new file mode 100644 index 00000000..9ff6b60e --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant4.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_015120.4:c.34C>T' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA +p24 +sS'gene_symbol' +p25 +S'ALMS1' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_055935.4:p.(Leu12=)' +p30 +sS'slr' +p31 +S'NP_055935.4:p.(L12=)' +p32 +ssS'submitted_variant' +p33 +S'NC_000002.11:g.73613030C>T' +p34 +sS'genome_context_intronic_sequence' +p35 +g16 +sS'hgvs_lrg_variant' +p36 +g16 +sS'hgvs_transcript_variant' +p37 +S'NM_015120.4:c.34C>T' +p38 +sS'hgvs_refseqgene_variant' +p39 +g16 +sS'primary_assembly_loci' +p40 +(dp41 +S'hg19' +p42 +(dp43 +S'hgvs_genomic_description' +p44 +S'NC_000002.11:g.73613030C>T' +p45 +sS'vcf' +p46 +(dp47 +S'chr' +p48 +S'chr2' +p49 +sS'ref' +p50 +VC +p51 +sS'pos' +p52 +S'73613030' +p53 +sS'alt' +p54 +VT +p55 +sssS'hg38' +p56 +(dp57 +g44 +S'NC_000002.12:g.73385902C>T' +p58 +sg46 +(dp59 +g48 +g49 +sg50 +g51 +sg52 +S'73385902' +p60 +sg54 +g55 +sssS'grch37' +p61 +(dp62 +g44 +S'NC_000002.11:g.73613030C>T' +p63 +sg46 +(dp64 +g48 +S'2' +p65 +sg50 +g51 +sg52 +S'73613030' +p66 +sg54 +g55 +sssS'grch38' +p67 +(dp68 +g44 +S'NC_000002.12:g.73385902C>T' +p69 +sg46 +(dp70 +g48 +g65 +sg50 +g51 +sg52 +S'73385902' +p71 +sg54 +g55 +ssssS'reference_sequence_records' +p72 +(dp73 +S'protein' +p74 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' +p75 +sS'transcript' +p76 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' +p77 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant40.txt b/VariantValidator/testing/testOutputsMasterITS/variant40.txt new file mode 100644 index 00000000..f5bc3f75 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant40.txt @@ -0,0 +1,172 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_022356.3:c.2055+18G>A' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA +p14 +sS'gene_symbol' +p15 +S'P3H1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_071751.3:p.?' +p20 +sS'slr' +p21 +S'NP_071751.3:p.?' +p22 +ssS'submitted_variant' +p23 +S'NG_008123.1(NM_022356.3):c.2055+18G>A' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000001.10(NM_022356.3):c.2055+18G>A' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_022356.3:c.2055+18G>A' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000001.10:g.43212925C>T' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr1' +p40 +sS'ref' +p41 +VC +p42 +sS'pos' +p43 +S'43212925' +p44 +sS'alt' +p45 +VT +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000001.11:g.42747254C>T' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'42747254' +p51 +sg45 +g46 +sssS'grch37' +p52 +(dp53 +g35 +S'NC_000001.10:g.43212925C>T' +p54 +sg37 +(dp55 +g39 +S'1' +p56 +sg41 +g42 +sg43 +S'43212925' +p57 +sg45 +g46 +sssS'grch38' +p58 +(dp59 +g35 +S'NC_000001.11:g.42747254C>T' +p60 +sg37 +(dp61 +g39 +g56 +sg41 +g42 +sg43 +S'42747254' +p62 +sg45 +g46 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3' +p68 +sssS'metadata' +p69 +(dp70 +S'variantvalidator_hgvs_version' +p71 +S'1.1.3' +p72 +sS'uta_schema' +p73 +S'uta_20180821' +p74 +sS'seqrepo_db' +p75 +S'2018-08-21' +p76 +sS'variantvalidator_version' +p77 +S'v0.2' +p78 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant41.txt b/VariantValidator/testing/testOutputsMasterITS/variant41.txt new file mode 100644 index 00000000..9d99ffdb --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant41.txt @@ -0,0 +1,293 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_021983.4:c.490G>C' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +aS'NM_021983.4:c.490G>C cannot be mapped directly to genome build GRCh37' +p20 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p21 +asS'refseqgene_context_intronic_sequence' +p22 +g16 +sS'alt_genomic_loci' +p23 +(lp24 +(dp25 +S'grch37' +p26 +(dp27 +S'hgvs_genomic_description' +p28 +S'NT_167246.1:g.3848158T>G' +p29 +sS'vcf' +p30 +(dp31 +S'chr' +p32 +S'HSCHR6_MHC_MANN_CTG1' +p33 +sS'ref' +p34 +S'T' +p35 +sS'pos' +p36 +S'3848158' +p37 +sS'alt' +p38 +VG +p39 +sssa(dp40 +S'hg19' +p41 +(dp42 +g28 +S'NT_167246.1:g.3848158T>G' +p43 +sg30 +(dp44 +g32 +S'chr6_mann_hap4' +p45 +sg34 +g35 +sg36 +S'3848158' +p46 +sg38 +g39 +sssa(dp47 +S'grch38' +p48 +(dp49 +g28 +S'NT_167246.2:g.3842538T>G' +p50 +sg30 +(dp51 +g32 +g33 +sg34 +g35 +sg36 +S'3842538' +p52 +sg38 +g39 +sssa(dp53 +S'hg38' +p54 +(dp55 +g28 +S'NT_167246.2:g.3842538T>G' +p56 +sg30 +(dp57 +g32 +S'chr6_GL000253v2_alt' +p58 +sg34 +g35 +sg36 +S'3842538' +p59 +sg38 +g39 +sssa(dp60 +S'grch37' +p61 +(dp62 +g28 +S'NT_167247.1:g.3884432C>G' +p63 +sg30 +(dp64 +g32 +S'HSCHR6_MHC_MCF_CTG1' +p65 +sg34 +VC +p66 +sg36 +S'3884432' +p67 +sg38 +g39 +sssa(dp68 +S'hg19' +p69 +(dp70 +g28 +S'NT_167247.1:g.3884432C>G' +p71 +sg30 +(dp72 +g32 +S'chr6_mcf_hap5' +p73 +sg34 +g66 +sg36 +S'3884432' +p74 +sg38 +g39 +sssa(dp75 +S'grch37' +p76 +(dp77 +g28 +S'NT_167249.1:g.3852542C>G' +p78 +sg30 +(dp79 +g32 +S'HSCHR6_MHC_SSTO_CTG1' +p80 +sg34 +g66 +sg36 +S'3852542' +p81 +sg38 +g39 +sssa(dp82 +S'hg19' +p83 +(dp84 +g28 +S'NT_167249.1:g.3852542C>G' +p85 +sg30 +(dp86 +g32 +S'chr6_ssto_hap7' +p87 +sg34 +g66 +sg36 +S'3852542' +p88 +sg38 +g39 +sssa(dp89 +S'grch38' +p90 +(dp91 +g28 +S'NT_167249.2:g.3853244C>G' +p92 +sg30 +(dp93 +g32 +g80 +sg34 +g66 +sg36 +S'3853244' +p94 +sg38 +g39 +sssa(dp95 +g54 +(dp96 +g28 +S'NT_167249.2:g.3853244C>G' +p97 +sg30 +(dp98 +g32 +S'chr6_GL000256v2_alt' +p99 +sg34 +g66 +sg36 +S'3853244' +p100 +sg38 +g39 +sssasS'transcript_description' +p101 +VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA +p102 +sS'gene_symbol' +p103 +S'HLA-DRB4' +p104 +sS'hgvs_predicted_protein_consequence' +p105 +(dp106 +S'tlr' +p107 +S'NP_068818.4:p.(Gly164Arg)' +p108 +sS'slr' +p109 +S'NP_068818.4:p.(G164R)' +p110 +ssS'submitted_variant' +p111 +S'NM_021983.4:c.490G>C' +p112 +sS'genome_context_intronic_sequence' +p113 +g16 +sS'hgvs_lrg_variant' +p114 +g16 +sS'hgvs_transcript_variant' +p115 +S'NM_021983.4:c.490G>C' +p116 +sS'hgvs_refseqgene_variant' +p117 +g16 +sS'primary_assembly_loci' +p118 +(dp119 +sS'reference_sequence_records' +p120 +(dp121 +S'protein' +p122 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4' +p123 +sS'transcript' +p124 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4' +p125 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant42.txt b/VariantValidator/testing/testOutputsMasterITS/variant42.txt new file mode 100644 index 00000000..9b6cb56e --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant42.txt @@ -0,0 +1,564 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032470.3:c.4del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +(dp13 +S'grch37' +p14 +(dp15 +S'hgvs_genomic_description' +p16 +S'NT_113891.2:g.3483644del' +p17 +sS'vcf' +p18 +(dp19 +S'chr' +p20 +S'HSCHR6_MHC_COX_CTG1' +p21 +sS'ref' +p22 +S'CG' +p23 +sS'pos' +p24 +S'3483643' +p25 +sS'alt' +p26 +S'C' +p27 +sssa(dp28 +S'hg19' +p29 +(dp30 +g16 +S'NT_113891.2:g.3483644del' +p31 +sg18 +(dp32 +g20 +S'chr6_cox_hap2' +p33 +sg22 +S'CG' +p34 +sg24 +S'3483643' +p35 +sg26 +g27 +sssa(dp36 +S'grch38' +p37 +(dp38 +g16 +S'NT_113891.3:g.3483538del' +p39 +sg18 +(dp40 +g20 +g21 +sg22 +S'CG' +p41 +sg24 +S'3483537' +p42 +sg26 +g27 +sssa(dp43 +S'hg38' +p44 +(dp45 +g16 +S'NT_113891.3:g.3483538del' +p46 +sg18 +(dp47 +g20 +S'chr6_GL000251v2_alt' +p48 +sg22 +S'CG' +p49 +sg24 +S'3483537' +p50 +sg26 +g27 +sssa(dp51 +S'grch37' +p52 +(dp53 +g16 +S'NT_167245.1:g.3292210del' +p54 +sg18 +(dp55 +g20 +S'HSCHR6_MHC_DBB_CTG1' +p56 +sg22 +S'CG' +p57 +sg24 +S'3292209' +p58 +sg26 +g27 +sssa(dp59 +S'hg19' +p60 +(dp61 +g16 +S'NT_167245.1:g.3292210del' +p62 +sg18 +(dp63 +g20 +S'chr6_dbb_hap3' +p64 +sg22 +S'CG' +p65 +sg24 +S'3292209' +p66 +sg26 +g27 +sssa(dp67 +S'grch38' +p68 +(dp69 +g16 +S'NT_167245.2:g.3286625del' +p70 +sg18 +(dp71 +g20 +g56 +sg22 +S'CG' +p72 +sg24 +S'3286624' +p73 +sg26 +g27 +sssa(dp74 +g44 +(dp75 +g16 +S'NT_167245.2:g.3286625del' +p76 +sg18 +(dp77 +g20 +S'chr6_GL000252v2_alt' +p78 +sg22 +S'CG' +p79 +sg24 +S'3286624' +p80 +sg26 +g27 +sssa(dp81 +S'grch37' +p82 +(dp83 +g16 +S'NT_167247.1:g.3392834del' +p84 +sg18 +(dp85 +g20 +S'HSCHR6_MHC_MCF_CTG1' +p86 +sg22 +S'CG' +p87 +sg24 +S'3392833' +p88 +sg26 +g27 +sssa(dp89 +S'hg19' +p90 +(dp91 +g16 +S'NT_167247.1:g.3392834del' +p92 +sg18 +(dp93 +g20 +S'chr6_mcf_hap5' +p94 +sg22 +S'CG' +p95 +sg24 +S'3392833' +p96 +sg26 +g27 +sssa(dp97 +S'grch38' +p98 +(dp99 +g16 +S'NT_167247.2:g.3387249del' +p100 +sg18 +(dp101 +g20 +g86 +sg22 +S'CG' +p102 +sg24 +S'3387248' +p103 +sg26 +g27 +sssa(dp104 +g44 +(dp105 +g16 +S'NT_167247.2:g.3387249del' +p106 +sg18 +(dp107 +g20 +S'chr6_GL000254v2_alt' +p108 +sg22 +S'CG' +p109 +sg24 +S'3387248' +p110 +sg26 +g27 +sssa(dp111 +S'grch37' +p112 +(dp113 +g16 +S'NT_167248.1:g.3274047del' +p114 +sg18 +(dp115 +g20 +S'HSCHR6_MHC_QBL_CTG1' +p116 +sg22 +S'CG' +p117 +sg24 +S'3274046' +p118 +sg26 +g27 +sssa(dp119 +S'hg19' +p120 +(dp121 +g16 +S'NT_167248.1:g.3274047del' +p122 +sg18 +(dp123 +g20 +S'chr6_qbl_hap6' +p124 +sg22 +S'CG' +p125 +sg24 +S'3274046' +p126 +sg26 +g27 +sssa(dp127 +S'grch38' +p128 +(dp129 +g16 +S'NT_167248.2:g.3268451del' +p130 +sg18 +(dp131 +g20 +g116 +sg22 +S'CG' +p132 +sg24 +S'3268450' +p133 +sg26 +g27 +sssa(dp134 +g44 +(dp135 +g16 +S'NT_167248.2:g.3268451del' +p136 +sg18 +(dp137 +g20 +S'chr6_GL000255v2_alt' +p138 +sg22 +S'CG' +p139 +sg24 +S'3268450' +p140 +sg26 +g27 +sssa(dp141 +S'grch37' +p142 +(dp143 +g16 +S'NT_167249.1:g.3345701del' +p144 +sg18 +(dp145 +g20 +S'HSCHR6_MHC_SSTO_CTG1' +p146 +sg22 +S'CG' +p147 +sg24 +S'3345700' +p148 +sg26 +g27 +sssa(dp149 +S'hg19' +p150 +(dp151 +g16 +S'NT_167249.1:g.3345701del' +p152 +sg18 +(dp153 +g20 +S'chr6_ssto_hap7' +p154 +sg22 +S'CG' +p155 +sg24 +S'3345700' +p156 +sg26 +g27 +sssa(dp157 +S'grch38' +p158 +(dp159 +g16 +S'NT_167249.2:g.3346403del' +p160 +sg18 +(dp161 +g20 +g146 +sg22 +S'CG' +p162 +sg24 +S'3346402' +p163 +sg26 +g27 +sssa(dp164 +g44 +(dp165 +g16 +S'NT_167249.2:g.3346403del' +p166 +sg18 +(dp167 +g20 +S'chr6_GL000256v2_alt' +p168 +sg22 +S'CG' +p169 +sg24 +S'3346402' +p170 +sg26 +g27 +sssasS'transcript_description' +p171 +VHomo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA +p172 +sS'gene_symbol' +p173 +S'TNXB' +p174 +sS'hgvs_predicted_protein_consequence' +p175 +(dp176 +S'tlr' +p177 +S'NP_115859.2:p.(Arg2AlafsTer91)' +p178 +sS'slr' +p179 +S'NP_115859.2:p.(R2Afs*91)' +p180 +ssS'submitted_variant' +p181 +S'NM_032470.3:c.4del' +p182 +sS'genome_context_intronic_sequence' +p183 +g6 +sS'hgvs_lrg_variant' +p184 +g6 +sS'hgvs_transcript_variant' +p185 +S'NM_032470.3:c.4del' +p186 +sS'hgvs_refseqgene_variant' +p187 +g6 +sS'primary_assembly_loci' +p188 +(dp189 +S'hg19' +p190 +(dp191 +g16 +S'NC_000006.11:g.32012993del' +p192 +sg18 +(dp193 +g20 +S'chr6' +p194 +sg22 +S'CG' +p195 +sg24 +S'32012992' +p196 +sg26 +g27 +sssg44 +(dp197 +g16 +S'NC_000006.12:g.32045216del' +p198 +sg18 +(dp199 +g20 +g194 +sg22 +S'CG' +p200 +sg24 +S'32045215' +p201 +sg26 +g27 +sssS'grch37' +p202 +(dp203 +g16 +S'NC_000006.11:g.32012993del' +p204 +sg18 +(dp205 +g20 +S'6' +p206 +sg22 +S'CG' +p207 +sg24 +S'32012992' +p208 +sg26 +g27 +sssS'grch38' +p209 +(dp210 +g16 +S'NC_000006.12:g.32045216del' +p211 +sg18 +(dp212 +g20 +g206 +sg22 +S'CG' +p213 +sg24 +S'32045215' +p214 +sg26 +g27 +ssssS'reference_sequence_records' +p215 +(dp216 +S'protein' +p217 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_115859.2' +p218 +sS'transcript' +p219 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032470.3' +p220 +sssS'metadata' +p221 +(dp222 +S'variantvalidator_hgvs_version' +p223 +S'1.1.3' +p224 +sS'uta_schema' +p225 +S'uta_20180821' +p226 +sS'seqrepo_db' +p227 +S'2018-08-21' +p228 +sS'variantvalidator_version' +p229 +S'v0.2' +p230 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant43.txt b/VariantValidator/testing/testOutputsMasterITS/variant43.txt new file mode 100644 index 00000000..6e3ef37b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant43.txt @@ -0,0 +1,179 @@ +(dp0 +S'NM_001194958.2:c.20C>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +aS'NM_001194958.2:c.20C>A cannot be mapped directly to genome build GRCh37' +p8 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g4 +sS'alt_genomic_loci' +p11 +(lp12 +(dp13 +S'grch37' +p14 +(dp15 +S'hgvs_genomic_description' +p16 +S'NW_003315950.2:g.355171C>A' +p17 +sS'vcf' +p18 +(dp19 +S'chr' +p20 +S'HG987_PATCH' +p21 +sS'ref' +p22 +S'C' +p23 +sS'pos' +p24 +S'355171' +p25 +sS'alt' +p26 +S'A' +p27 +sssa(dp28 +S'hg19' +p29 +(dp30 +g16 +S'NW_003315950.2:g.355171C>A' +p31 +sg18 +(dp32 +g20 +S'NW_003315950.2' +p33 +sg22 +g23 +sg24 +S'355171' +p34 +sg26 +g27 +sssasS'transcript_description' +p35 +VHomo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA +p36 +sS'gene_symbol' +p37 +S'KCNJ18' +p38 +sS'hgvs_predicted_protein_consequence' +p39 +(dp40 +S'tlr' +p41 +S'NP_001181887.2:p.(Ala7Asp)' +p42 +sS'slr' +p43 +S'NP_001181887.2:p.(A7D)' +p44 +ssS'submitted_variant' +p45 +S'NM_001194958.2:c.20C>A' +p46 +sS'genome_context_intronic_sequence' +p47 +g4 +sS'hgvs_lrg_variant' +p48 +g4 +sS'hgvs_transcript_variant' +p49 +S'NM_001194958.2:c.20C>A' +p50 +sS'hgvs_refseqgene_variant' +p51 +g4 +sS'primary_assembly_loci' +p52 +(dp53 +S'grch38' +p54 +(dp55 +g16 +S'NC_000017.11:g.21702806C>A' +p56 +sg18 +(dp57 +g20 +S'17' +p58 +sg22 +g23 +sg24 +S'21702806' +p59 +sg26 +g27 +sssS'hg38' +p60 +(dp61 +g16 +S'NC_000017.11:g.21702806C>A' +p62 +sg18 +(dp63 +g20 +S'chr17' +p64 +sg22 +g23 +sg24 +S'21702806' +p65 +sg26 +g27 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2' +p71 +sssS'flag' +p72 +S'gene_variant' +p73 +sS'metadata' +p74 +(dp75 +S'variantvalidator_hgvs_version' +p76 +S'1.1.3' +p77 +sS'uta_schema' +p78 +S'uta_20180821' +p79 +sS'seqrepo_db' +p80 +S'2018-08-21' +p81 +sS'variantvalidator_version' +p82 +S'v0.2' +p83 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant44.txt b/VariantValidator/testing/testOutputsMasterITS/variant44.txt new file mode 100644 index 00000000..f156b082 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant44.txt @@ -0,0 +1,143 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_000022.2:c.534A>G' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'A more recent version of the selected reference sequence NM_000022.2 is available (NM_000022.3)' +p19 +aS'NM_000022.3:c.534A>G MUST be fully validated prior to use in reports' +p20 +aS'select_variants=NM_000022.3:c.534A>G' +p21 +aS'RefSeqGene record not available' +p22 +asS'refseqgene_context_intronic_sequence' +p23 +g16 +sS'alt_genomic_loci' +p24 +(lp25 +sS'transcript_description' +p26 +VHomo sapiens adenosine deaminase (ADA), mRNA +p27 +sS'gene_symbol' +p28 +S'ADA' +p29 +sS'hgvs_predicted_protein_consequence' +p30 +(dp31 +S'tlr' +p32 +S'NP_000013.2:p.(Val178=)' +p33 +sS'slr' +p34 +S'NP_000013.2:p.(V178=)' +p35 +ssS'submitted_variant' +p36 +S'NM_000022.2:c.534A>G' +p37 +sS'genome_context_intronic_sequence' +p38 +g16 +sS'hgvs_lrg_variant' +p39 +g16 +sS'hgvs_transcript_variant' +p40 +S'NM_000022.2:c.534A>G' +p41 +sS'hgvs_refseqgene_variant' +p42 +g16 +sS'primary_assembly_loci' +p43 +(dp44 +S'hg19' +p45 +(dp46 +S'hgvs_genomic_description' +p47 +S'NC_000020.10:g.43252915T>C' +p48 +sS'vcf' +p49 +(dp50 +S'chr' +p51 +S'chr20' +p52 +sS'ref' +p53 +VT +p54 +sS'pos' +p55 +S'43252915' +p56 +sS'alt' +p57 +VC +p58 +sssS'grch37' +p59 +(dp60 +g47 +S'NC_000020.10:g.43252915T>C' +p61 +sg49 +(dp62 +g51 +S'20' +p63 +sg53 +g54 +sg55 +S'43252915' +p64 +sg57 +g58 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2' +p70 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant45.txt b/VariantValidator/testing/testOutputsMasterITS/variant45.txt new file mode 100644 index 00000000..3fcafa99 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant45.txt @@ -0,0 +1,293 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_021983.4:c.490G>C' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +aS'NM_021983.4:c.490G>C cannot be mapped directly to genome build GRCh37' +p20 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p21 +asS'refseqgene_context_intronic_sequence' +p22 +g16 +sS'alt_genomic_loci' +p23 +(lp24 +(dp25 +S'grch37' +p26 +(dp27 +S'hgvs_genomic_description' +p28 +S'NT_167246.1:g.3848158T>G' +p29 +sS'vcf' +p30 +(dp31 +S'chr' +p32 +S'HSCHR6_MHC_MANN_CTG1' +p33 +sS'ref' +p34 +S'T' +p35 +sS'pos' +p36 +S'3848158' +p37 +sS'alt' +p38 +VG +p39 +sssa(dp40 +S'hg19' +p41 +(dp42 +g28 +S'NT_167246.1:g.3848158T>G' +p43 +sg30 +(dp44 +g32 +S'chr6_mann_hap4' +p45 +sg34 +g35 +sg36 +S'3848158' +p46 +sg38 +g39 +sssa(dp47 +S'grch38' +p48 +(dp49 +g28 +S'NT_167246.2:g.3842538T>G' +p50 +sg30 +(dp51 +g32 +g33 +sg34 +g35 +sg36 +S'3842538' +p52 +sg38 +g39 +sssa(dp53 +S'hg38' +p54 +(dp55 +g28 +S'NT_167246.2:g.3842538T>G' +p56 +sg30 +(dp57 +g32 +S'chr6_GL000253v2_alt' +p58 +sg34 +g35 +sg36 +S'3842538' +p59 +sg38 +g39 +sssa(dp60 +S'grch37' +p61 +(dp62 +g28 +S'NT_167247.1:g.3884432C>G' +p63 +sg30 +(dp64 +g32 +S'HSCHR6_MHC_MCF_CTG1' +p65 +sg34 +VC +p66 +sg36 +S'3884432' +p67 +sg38 +g39 +sssa(dp68 +S'hg19' +p69 +(dp70 +g28 +S'NT_167247.1:g.3884432C>G' +p71 +sg30 +(dp72 +g32 +S'chr6_mcf_hap5' +p73 +sg34 +g66 +sg36 +S'3884432' +p74 +sg38 +g39 +sssa(dp75 +S'grch37' +p76 +(dp77 +g28 +S'NT_167249.1:g.3852542C>G' +p78 +sg30 +(dp79 +g32 +S'HSCHR6_MHC_SSTO_CTG1' +p80 +sg34 +g66 +sg36 +S'3852542' +p81 +sg38 +g39 +sssa(dp82 +S'hg19' +p83 +(dp84 +g28 +S'NT_167249.1:g.3852542C>G' +p85 +sg30 +(dp86 +g32 +S'chr6_ssto_hap7' +p87 +sg34 +g66 +sg36 +S'3852542' +p88 +sg38 +g39 +sssa(dp89 +S'grch38' +p90 +(dp91 +g28 +S'NT_167249.2:g.3853244C>G' +p92 +sg30 +(dp93 +g32 +g80 +sg34 +g66 +sg36 +S'3853244' +p94 +sg38 +g39 +sssa(dp95 +g54 +(dp96 +g28 +S'NT_167249.2:g.3853244C>G' +p97 +sg30 +(dp98 +g32 +S'chr6_GL000256v2_alt' +p99 +sg34 +g66 +sg36 +S'3853244' +p100 +sg38 +g39 +sssasS'transcript_description' +p101 +VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA +p102 +sS'gene_symbol' +p103 +S'HLA-DRB4' +p104 +sS'hgvs_predicted_protein_consequence' +p105 +(dp106 +S'tlr' +p107 +S'NP_068818.4:p.(Gly164Arg)' +p108 +sS'slr' +p109 +S'NP_068818.4:p.(G164R)' +p110 +ssS'submitted_variant' +p111 +S'HSCHR6_MHC_SSTO_CTG1-3852542-C-G' +p112 +sS'genome_context_intronic_sequence' +p113 +g16 +sS'hgvs_lrg_variant' +p114 +g16 +sS'hgvs_transcript_variant' +p115 +S'NM_021983.4:c.490G>C' +p116 +sS'hgvs_refseqgene_variant' +p117 +g16 +sS'primary_assembly_loci' +p118 +(dp119 +sS'reference_sequence_records' +p120 +(dp121 +S'protein' +p122 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4' +p123 +sS'transcript' +p124 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4' +p125 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant46.txt b/VariantValidator/testing/testOutputsMasterITS/variant46.txt new file mode 100644 index 00000000..a27a5070 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant46.txt @@ -0,0 +1,175 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000368.4:c.363+1dup' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA +p14 +sS'gene_symbol' +p15 +S'TSC1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000359.1:p.?' +p20 +sS'slr' +p21 +S'NP_000359.1:p.?' +p22 +ssS'submitted_variant' +p23 +S'NM_000368.4:c.363+1dupG' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000009.11(NM_000368.4):c.363+1dup' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000368.4:c.363+1dup' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000009.11:g.135800973dup' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr9' +p40 +sS'ref' +p41 +S'C' +p42 +sS'pos' +p43 +S'135800973' +p44 +sS'alt' +p45 +S'CC' +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000009.12:g.132925586dup' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'132925586' +p51 +sg45 +S'CC' +p52 +sssS'grch37' +p53 +(dp54 +g35 +S'NC_000009.11:g.135800973dup' +p55 +sg37 +(dp56 +g39 +S'9' +p57 +sg41 +g42 +sg43 +S'135800973' +p58 +sg45 +S'CC' +p59 +sssS'grch38' +p60 +(dp61 +g35 +S'NC_000009.12:g.132925586dup' +p62 +sg37 +(dp63 +g39 +g57 +sg41 +g42 +sg43 +S'132925586' +p64 +sg45 +S'CC' +p65 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4' +p71 +sssS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant47.txt b/VariantValidator/testing/testOutputsMasterITS/variant47.txt new file mode 100644 index 00000000..bf747895 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant47.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000368.4:c.363+1dup' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000368.4:c.363dup normalized to NM_000368.4:c.363+1dup' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA +p15 +sS'gene_symbol' +p16 +S'TSC1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000359.1:p.(Met122AspfsTer4)' +p21 +sS'slr' +p22 +S'NP_000359.1:p.(M122Dfs*4)' +p23 +ssS'submitted_variant' +p24 +S'NM_000368.4:c.363dupG' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000009.11(NM_000368.4):c.363+1dup' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000368.4:c.363+1dup' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000009.11:g.135800973dup' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr9' +p41 +sS'ref' +p42 +S'C' +p43 +sS'pos' +p44 +S'135800973' +p45 +sS'alt' +p46 +S'CC' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000009.12:g.132925586dup' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +g43 +sg44 +S'132925586' +p52 +sg46 +S'CC' +p53 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000009.11:g.135800973dup' +p56 +sg38 +(dp57 +g40 +S'9' +p58 +sg42 +g43 +sg44 +S'135800973' +p59 +sg46 +S'CC' +p60 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000009.12:g.132925586dup' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +g43 +sg44 +S'132925586' +p65 +sg46 +S'CC' +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant48.txt b/VariantValidator/testing/testOutputsMasterITS/variant48.txt new file mode 100644 index 00000000..a4f8aedf --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant48.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000089.3:c.1035_1035+2del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000089.3:c.1033_1035del normalized to NM_000089.3:c.1035_1035+2del' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A2' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000080.2:p.(Val345del)' +p21 +sS'slr' +p22 +S'NP_000080.2:p.(V345del)' +p23 +ssS'submitted_variant' +p24 +S'NM_000089.3:c.1033_1035delGTT' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000007.13(NM_000089.3):c.1035_1035+2del' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000089.3:c.1035_1035+2del' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000007.13:g.94039133_94039135del' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr7' +p41 +sS'ref' +p42 +S'CTTG' +p43 +sS'pos' +p44 +S'94039128' +p45 +sS'alt' +p46 +S'C' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000007.14:g.94409821_94409823del' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'CTTG' +p52 +sg44 +S'94409816' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000007.13:g.94039133_94039135del' +p56 +sg38 +(dp57 +g40 +S'7' +p58 +sg42 +S'CTTG' +p59 +sg44 +S'94039128' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000007.14:g.94409821_94409823del' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'CTTG' +p65 +sg44 +S'94409816' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant49.txt b/VariantValidator/testing/testOutputsMasterITS/variant49.txt new file mode 100644 index 00000000..9f06f84b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant49.txt @@ -0,0 +1,175 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000089.3:c.1035_1035+2del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A2' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000080.2:p.(Val345del)' +p20 +sS'slr' +p21 +S'NP_000080.2:p.(V345del)' +p22 +ssS'submitted_variant' +p23 +S'NM_000089.3:c.1035_1035+2delTGT' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000007.13(NM_000089.3):c.1035_1035+2del' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000089.3:c.1035_1035+2del' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000007.13:g.94039133_94039135del' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr7' +p40 +sS'ref' +p41 +S'CTTG' +p42 +sS'pos' +p43 +S'94039128' +p44 +sS'alt' +p45 +S'C' +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000007.14:g.94409821_94409823del' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +S'CTTG' +p51 +sg43 +S'94409816' +p52 +sg45 +g46 +sssS'grch37' +p53 +(dp54 +g35 +S'NC_000007.13:g.94039133_94039135del' +p55 +sg37 +(dp56 +g39 +S'7' +p57 +sg41 +S'CTTG' +p58 +sg43 +S'94039128' +p59 +sg45 +g46 +sssS'grch38' +p60 +(dp61 +g35 +S'NC_000007.14:g.94409821_94409823del' +p62 +sg37 +(dp63 +g39 +g57 +sg41 +S'CTTG' +p64 +sg43 +S'94409816' +p65 +sg45 +g46 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3' +p71 +sssS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant5.txt b/VariantValidator/testing/testOutputsMasterITS/variant5.txt new file mode 100644 index 00000000..52ecc3f7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant5.txt @@ -0,0 +1,287 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000109.3:c.7+127703T>A' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens dystrophin (DMD), transcript variant Dp427c, mRNA +p14 +sS'gene_symbol' +p15 +S'DMD' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000100.2:p.?' +p20 +sS'slr' +p21 +S'NP_000100.2:p.?' +p22 +ssS'submitted_variant' +p23 +S'NC_000023.10:g.33229673A>T' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000023.10(NM_000109.3):c.7+127703T>A' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000109.3:c.7+127703T>A' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000023.10:g.33229673A>T' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chrX' +p40 +sS'ref' +p41 +VA +p42 +sS'pos' +p43 +S'33229673' +p44 +sS'alt' +p45 +VT +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000023.11:g.33211556A>T' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'33211556' +p51 +sg45 +g46 +sssS'grch37' +p52 +(dp53 +g35 +S'NC_000023.10:g.33229673A>T' +p54 +sg37 +(dp55 +g39 +S'X' +p56 +sg41 +g42 +sg43 +S'33229673' +p57 +sg45 +g46 +sssS'grch38' +p58 +(dp59 +g35 +S'NC_000023.11:g.33211556A>T' +p60 +sg37 +(dp61 +g39 +g56 +sg41 +g42 +sg43 +S'33211556' +p62 +sg45 +g46 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000100.2' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000109.3' +p68 +sssS'NM_004006.2:c.-244T>A' +p69 +(dp70 +g5 +g6 +sg7 +(lp71 +S'RefSeqGene record not available' +p72 +asg10 +g6 +sg11 +(lp73 +sg13 +VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA +p74 +sg15 +S'DMD' +p75 +sg17 +(dp76 +g19 +S'NP_003997.1:p.?' +p77 +sg21 +S'NP_003997.1:p.?' +p78 +ssg23 +g24 +sg25 +g6 +sg27 +g6 +sg28 +S'NM_004006.2:c.-244T>A' +p79 +sg30 +g6 +sg31 +(dp80 +S'hg19' +p81 +(dp82 +g35 +S'NC_000023.10:g.33229673A>T' +p83 +sg37 +(dp84 +g39 +g40 +sg41 +g42 +sg43 +S'33229673' +p85 +sg45 +g46 +sssg47 +(dp86 +g35 +S'NC_000023.11:g.33211556A>T' +p87 +sg37 +(dp88 +g39 +g40 +sg41 +g42 +sg43 +S'33211556' +p89 +sg45 +g46 +sssS'grch37' +p90 +(dp91 +g35 +S'NC_000023.10:g.33229673A>T' +p92 +sg37 +(dp93 +g39 +g56 +sg41 +g42 +sg43 +S'33229673' +p94 +sg45 +g46 +sssS'grch38' +p95 +(dp96 +g35 +S'NC_000023.11:g.33211556A>T' +p97 +sg37 +(dp98 +g39 +g56 +sg41 +g42 +sg43 +S'33211556' +p99 +sg45 +g46 +ssssg63 +(dp100 +g65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1' +p101 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2' +p102 +sssS'metadata' +p103 +(dp104 +S'variantvalidator_hgvs_version' +p105 +S'1.1.3' +p106 +sS'uta_schema' +p107 +S'uta_20180821' +p108 +sS'seqrepo_db' +p109 +S'2018-08-21' +p110 +sS'variantvalidator_version' +p111 +S'v0.2' +p112 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant50.txt b/VariantValidator/testing/testOutputsMasterITS/variant50.txt new file mode 100644 index 00000000..91540969 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant50.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.2024_2028+1del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.2023_2028del normalized to NM_000088.3:c.2024_2028+1del' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.(Ala675_Arg676del)' +p21 +sS'slr' +p22 +S'NP_000079.2:p.(A675_R676del)' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.2023_2028delGCAAGA' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000088.3):c.2024_2028+1del' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000088.3:c.2024_2028+1del' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.48269340_48269345del' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'ACTCTTG' +p43 +sS'pos' +p44 +S'48269339' +p45 +sS'alt' +p46 +S'A' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.50191979_50191984del' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'ACTCTTG' +p52 +sg44 +S'50191978' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.48269340_48269345del' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +S'ACTCTTG' +p59 +sg44 +S'48269339' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.50191979_50191984del' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'ACTCTTG' +p65 +sg44 +S'50191978' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant51.txt b/VariantValidator/testing/testOutputsMasterITS/variant51.txt new file mode 100644 index 00000000..eed5dbf9 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant51.txt @@ -0,0 +1,176 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000089.3:c.938del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000089.3:c.938-1del automapped to NM_000089.3:c.938del' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A2' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000080.2:p.(Gly313AlafsTer86)' +p21 +sS'slr' +p22 +S'NP_000080.2:p.(G313Afs*86)' +p23 +ssS'submitted_variant' +p24 +S'NM_000089.3:c.938-1delG' +p25 +sS'genome_context_intronic_sequence' +p26 +g6 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000089.3:c.938del' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000007.13:g.94039036del' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr7' +p40 +sS'ref' +p41 +S'AG' +p42 +sS'pos' +p43 +S'94039033' +p44 +sS'alt' +p45 +S'A' +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000007.14:g.94409724del' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +S'AG' +p51 +sg43 +S'94409721' +p52 +sg45 +g46 +sssS'grch37' +p53 +(dp54 +g35 +S'NC_000007.13:g.94039036del' +p55 +sg37 +(dp56 +g39 +S'7' +p57 +sg41 +S'AG' +p58 +sg43 +S'94039033' +p59 +sg45 +g46 +sssS'grch38' +p60 +(dp61 +g35 +S'NC_000007.14:g.94409724del' +p62 +sg37 +(dp63 +g39 +g57 +sg41 +S'AG' +p64 +sg43 +S'94409721' +p65 +sg45 +g46 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3' +p71 +sssS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant52.txt b/VariantValidator/testing/testOutputsMasterITS/variant52.txt new file mode 100644 index 00000000..af719c3c --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant52.txt @@ -0,0 +1,170 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_000088.3:c.589G=' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p24 +sS'gene_symbol' +p25 +S'COL1A1' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_000079.2:p.(Gly197=)' +p30 +sS'slr' +p31 +S'NP_000079.2:p.(G197=)' +p32 +ssS'submitted_variant' +p33 +S'NM_000088.3:c.589G=' +p34 +sS'genome_context_intronic_sequence' +p35 +g16 +sS'hgvs_lrg_variant' +p36 +g16 +sS'hgvs_transcript_variant' +p37 +S'NM_000088.3:c.589G=' +p38 +sS'hgvs_refseqgene_variant' +p39 +g16 +sS'primary_assembly_loci' +p40 +(dp41 +S'hg19' +p42 +(dp43 +S'hgvs_genomic_description' +p44 +S'NC_000017.10:g.48275363C=' +p45 +sS'vcf' +p46 +(dp47 +S'chr' +p48 +S'chr17' +p49 +sS'ref' +p50 +VC +p51 +sS'pos' +p52 +S'48275363' +p53 +sS'alt' +p54 +g51 +sssS'hg38' +p55 +(dp56 +g44 +S'NC_000017.11:g.50198002C=' +p57 +sg46 +(dp58 +g48 +g49 +sg50 +g51 +sg52 +S'50198002' +p59 +sg54 +g51 +sssS'grch37' +p60 +(dp61 +g44 +S'NC_000017.10:g.48275363C=' +p62 +sg46 +(dp63 +g48 +S'17' +p64 +sg50 +g51 +sg52 +S'48275363' +p65 +sg54 +g51 +sssS'grch38' +p66 +(dp67 +g44 +S'NC_000017.11:g.50198002C=' +p68 +sg46 +(dp69 +g48 +g64 +sg50 +g51 +sg52 +S'50198002' +p70 +sg54 +g51 +ssssS'reference_sequence_records' +p71 +(dp72 +S'protein' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p74 +sS'transcript' +p75 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p76 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant53.txt b/VariantValidator/testing/testOutputsMasterITS/variant53.txt new file mode 100644 index 00000000..82bb9336 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant53.txt @@ -0,0 +1,170 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.642A=' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.(Ser214=)' +p20 +sS'slr' +p21 +S'NP_000079.2:p.(S214=)' +p22 +ssS'submitted_variant' +p23 +S'NM_000088.3:c.642A=' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_000088.3:c.642A=' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000017.10:g.48275310T=' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr17' +p39 +sS'ref' +p40 +VT +p41 +sS'pos' +p42 +S'48275310' +p43 +sS'alt' +p44 +g41 +sssS'hg38' +p45 +(dp46 +g34 +S'NC_000017.11:g.50197949T=' +p47 +sg36 +(dp48 +g38 +g39 +sg40 +g41 +sg42 +S'50197949' +p49 +sg44 +g41 +sssS'grch37' +p50 +(dp51 +g34 +S'NC_000017.10:g.48275310T=' +p52 +sg36 +(dp53 +g38 +S'17' +p54 +sg40 +g41 +sg42 +S'48275310' +p55 +sg44 +g41 +sssS'grch38' +p56 +(dp57 +g34 +S'NC_000017.11:g.50197949T=' +p58 +sg36 +(dp59 +g38 +g54 +sg40 +g41 +sg42 +S'50197949' +p60 +sg44 +g41 +ssssS'reference_sequence_records' +p61 +(dp62 +S'protein' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p64 +sS'transcript' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p66 +sssS'metadata' +p67 +(dp68 +S'variantvalidator_hgvs_version' +p69 +S'1.1.3' +p70 +sS'uta_schema' +p71 +S'uta_20180821' +p72 +sS'seqrepo_db' +p73 +S'2018-08-21' +p74 +sS'variantvalidator_version' +p75 +S'v0.2' +p76 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant54.txt b/VariantValidator/testing/testOutputsMasterITS/variant54.txt new file mode 100644 index 00000000..06d041ef --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant54.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.642+1_642+2delinsG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.642+1GG>G automapped to NM_000088.3:c.642+1_642+2delGGinsG' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.?' +p21 +sS'slr' +p22 +S'NP_000079.2:p.?' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.642+1GG>G' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000088.3:c.642+1_642+2delinsG' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.48275308_48275309delinsC' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'TA' +p43 +sS'pos' +p44 +S'48275307' +p45 +sS'alt' +p46 +S'T' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.50197947_50197948delinsC' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'TA' +p52 +sg44 +S'50197946' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.48275308_48275309delinsC' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +S'TA' +p59 +sg44 +S'48275307' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.50197947_50197948delinsC' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'TA' +p65 +sg44 +S'50197946' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant55.txt b/VariantValidator/testing/testOutputsMasterITS/variant55.txt new file mode 100644 index 00000000..f9a61c92 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant55.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589-2_589-1delinsG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.589-2GG>G automapped to NM_000088.3:c.589-2_589-1delGGinsG' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.?' +p21 +sS'slr' +p22 +S'NP_000079.2:p.?' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.589-2GG>G' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000088.3:c.589-2_589-1delinsG' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.48275364_48275365delinsC' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'CT' +p43 +sS'pos' +p44 +S'48275364' +p45 +sS'alt' +p46 +S'C' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.50198003_50198004delinsC' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'CT' +p52 +sg44 +S'50198003' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.48275364_48275365delinsC' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +S'CT' +p59 +sg44 +S'48275364' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.50198003_50198004delinsC' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'CT' +p65 +sg44 +S'50198003' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant56.txt b/VariantValidator/testing/testOutputsMasterITS/variant56.txt new file mode 100644 index 00000000..e22ae1d7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant56.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589-5_589-4insTTTT' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.589-6_589-5insTTTT normalized to NM_000088.3:c.589-5_589-4insTTTT' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.?' +p21 +sS'slr' +p22 +S'NP_000079.2:p.?' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.589-6_589-5insTTTT' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000088.3):c.589-5_589-4insTTTT' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000088.3:c.589-5_589-4insTTTT' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.48275367_48275368insAAAA' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'G' +p43 +sS'pos' +p44 +S'48275367' +p45 +sS'alt' +p46 +VGAAAA +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.50198006_50198007insAAAA' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +g43 +sg44 +S'50198006' +p52 +sg46 +VGAAAA +p53 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.48275367_48275368insAAAA' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +g43 +sg44 +S'48275367' +p59 +sg46 +VGAAAA +p60 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.50198006_50198007insAAAA' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +g43 +sg44 +S'50198006' +p65 +sg46 +VGAAAA +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant57.txt b/VariantValidator/testing/testOutputsMasterITS/variant57.txt new file mode 100644 index 00000000..a1f9b37a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant57.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.642+4_642+5insAAAA' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.642+3_642+4insAAAA normalized to NM_000088.3:c.642+4_642+5insAAAA' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.?' +p21 +sS'slr' +p22 +S'NP_000079.2:p.?' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.642+3_642+4insAAAA' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000088.3):c.642+4_642+5insAAAA' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000088.3:c.642+4_642+5insAAAA' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.48275305_48275306insTTTT' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'C' +p43 +sS'pos' +p44 +S'48275305' +p45 +sS'alt' +p46 +VCTTTT +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.50197944_50197945insTTTT' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +g43 +sg44 +S'50197944' +p52 +sg46 +VCTTTT +p53 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.48275305_48275306insTTTT' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +g43 +sg44 +S'48275305' +p59 +sg46 +VCTTTT +p60 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.50197944_50197945insTTTT' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +g43 +sg44 +S'50197944' +p65 +sg46 +VCTTTT +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant58.txt b/VariantValidator/testing/testOutputsMasterITS/variant58.txt new file mode 100644 index 00000000..e75142b4 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant58.txt @@ -0,0 +1,175 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589-4_589-3insTT' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p14 +sS'gene_symbol' +p15 +S'COL1A1' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_000079.2:p.?' +p20 +sS'slr' +p21 +S'NP_000079.2:p.?' +p22 +ssS'submitted_variant' +p23 +S'NM_000088.3:c.589-4_589-3insTT' +p24 +sS'genome_context_intronic_sequence' +p25 +S'NC_000017.10(NM_000088.3):c.589-4_589-3insTT' +p26 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000088.3:c.589-4_589-3insTT' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000017.10:g.48275366_48275367insAA' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr17' +p40 +sS'ref' +p41 +S'T' +p42 +sS'pos' +p43 +S'48275366' +p44 +sS'alt' +p45 +VTAA +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000017.11:g.50198005_50198006insAA' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +g42 +sg43 +S'50198005' +p51 +sg45 +VTAA +p52 +sssS'grch37' +p53 +(dp54 +g35 +S'NC_000017.10:g.48275366_48275367insAA' +p55 +sg37 +(dp56 +g39 +S'17' +p57 +sg41 +g42 +sg43 +S'48275366' +p58 +sg45 +VTAA +p59 +sssS'grch38' +p60 +(dp61 +g35 +S'NC_000017.11:g.50198005_50198006insAA' +p62 +sg37 +(dp63 +g39 +g57 +sg41 +g42 +sg43 +S'50198005' +p64 +sg45 +VTAA +p65 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p71 +sssS'metadata' +p72 +(dp73 +S'variantvalidator_hgvs_version' +p74 +S'1.1.3' +p75 +sS'uta_schema' +p76 +S'uta_20180821' +p77 +sS'seqrepo_db' +p78 +S'2018-08-21' +p79 +sS'variantvalidator_version' +p80 +S'v0.2' +p81 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant59.txt b/VariantValidator/testing/testOutputsMasterITS/variant59.txt new file mode 100644 index 00000000..ea974b83 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant59.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589-7del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.589-8del normalized to NM_000088.3:c.589-7del' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.?' +p21 +sS'slr' +p22 +S'NP_000079.2:p.?' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.589-8del' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000017.10(NM_000088.3):c.589-7del' +p27 +sS'hgvs_lrg_variant' +p28 +g6 +sS'hgvs_transcript_variant' +p29 +S'NM_000088.3:c.589-7del' +p30 +sS'hgvs_refseqgene_variant' +p31 +g6 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000017.10:g.48275370del' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr17' +p41 +sS'ref' +p42 +S'GA' +p43 +sS'pos' +p44 +S'48275369' +p45 +sS'alt' +p46 +S'G' +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000017.11:g.50198009del' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +S'GA' +p52 +sg44 +S'50198008' +p53 +sg46 +g47 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000017.10:g.48275370del' +p56 +sg38 +(dp57 +g40 +S'17' +p58 +sg42 +S'GA' +p59 +sg44 +S'48275369' +p60 +sg46 +g47 +sssS'grch38' +p61 +(dp62 +g36 +S'NC_000017.11:g.50198009del' +p63 +sg38 +(dp64 +g40 +g58 +sg42 +S'GA' +p65 +sg44 +S'50198008' +p66 +sg46 +g47 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant6.txt b/VariantValidator/testing/testOutputsMasterITS/variant6.txt new file mode 100644 index 00000000..7319021d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant6.txt @@ -0,0 +1,143 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_001145026.1:c.715A>G' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +aS'Suspected incomplete alignment between transcript NM_001145026.1 and genomic reference sequence NC_000012.11' +p10 +aS'NM_001145026.1:c.715A>G cannot be mapped directly to genome build GRCh37' +p11 +aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g6 +sS'alt_genomic_loci' +p14 +(lp15 +sS'transcript_description' +p16 +VHomo sapiens protein tyrosine phosphatase, receptor type Q (PTPRQ), mRNA +p17 +sS'gene_symbol' +p18 +S'PTPRQ' +p19 +sS'hgvs_predicted_protein_consequence' +p20 +(dp21 +S'tlr' +p22 +S'NP_001138498.1:p.(Arg239Gly)' +p23 +sS'slr' +p24 +S'NP_001138498.1:p.(R239G)' +p25 +ssS'submitted_variant' +p26 +S'NM_001145026.1:c.715A>G' +p27 +sS'genome_context_intronic_sequence' +p28 +g6 +sS'hgvs_lrg_variant' +p29 +g6 +sS'hgvs_transcript_variant' +p30 +S'NM_001145026.1:c.715A>G' +p31 +sS'hgvs_refseqgene_variant' +p32 +g6 +sS'primary_assembly_loci' +p33 +(dp34 +S'grch38' +p35 +(dp36 +S'hgvs_genomic_description' +p37 +S'NC_000012.12:g.80460707A>G' +p38 +sS'vcf' +p39 +(dp40 +S'chr' +p41 +S'12' +p42 +sS'ref' +p43 +VA +p44 +sS'pos' +p45 +S'80460707' +p46 +sS'alt' +p47 +VG +p48 +sssS'hg38' +p49 +(dp50 +g37 +S'NC_000012.12:g.80460707A>G' +p51 +sg39 +(dp52 +g41 +S'chr12' +p53 +sg43 +g44 +sg45 +S'80460707' +p54 +sg47 +g48 +ssssS'reference_sequence_records' +p55 +(dp56 +S'protein' +p57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001138498.1' +p58 +sS'transcript' +p59 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001145026.1' +p60 +sssS'metadata' +p61 +(dp62 +S'variantvalidator_hgvs_version' +p63 +S'1.1.3' +p64 +sS'uta_schema' +p65 +S'uta_20180821' +p66 +sS'seqrepo_db' +p67 +S'2018-08-21' +p68 +sS'variantvalidator_version' +p69 +S'v0.2' +p70 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant60.txt b/VariantValidator/testing/testOutputsMasterITS/variant60.txt new file mode 100644 index 00000000..4eb27550 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant60.txt @@ -0,0 +1,174 @@ +(dp0 +S'NM_000527.4:c.-187_-185del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens low density lipoprotein receptor (LDLR), transcript variant 1, mRNA +p12 +sS'gene_symbol' +p13 +S'LDLR' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_000518.1:p.?' +p18 +sS'slr' +p19 +S'NP_000518.1:p.?' +p20 +ssS'submitted_variant' +p21 +S'NM_000527.4:c.-187_-185delCTC' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_000527.4:c.-187_-185del' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'grch38' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000019.10:g.11089362_11089364del' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'19' +p37 +sS'ref' +p38 +S'ACTC' +p39 +sS'pos' +p40 +S'11089355' +p41 +sS'alt' +p42 +S'A' +p43 +sssS'grch37' +p44 +(dp45 +g32 +S'NC_000019.9:g.11200038_11200040del' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +S'ACTC' +p48 +sg40 +S'11200031' +p49 +sg42 +g43 +sssS'hg38' +p50 +(dp51 +g32 +S'NC_000019.10:g.11089362_11089364del' +p52 +sg34 +(dp53 +g36 +S'chr19' +p54 +sg38 +S'ACTC' +p55 +sg40 +S'11089355' +p56 +sg42 +g43 +sssS'hg19' +p57 +(dp58 +g32 +S'NC_000019.9:g.11200038_11200040del' +p59 +sg34 +(dp60 +g36 +g54 +sg38 +S'ACTC' +p61 +sg40 +S'11200031' +p62 +sg42 +g43 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000518.1' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000527.4' +p68 +sssS'flag' +p69 +S'gene_variant' +p70 +sS'metadata' +p71 +(dp72 +S'variantvalidator_hgvs_version' +p73 +S'1.1.3' +p74 +sS'uta_schema' +p75 +S'uta_20180821' +p76 +sS'seqrepo_db' +p77 +S'2018-08-21' +p78 +sS'variantvalidator_version' +p79 +S'v0.2' +p80 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant61.txt b/VariantValidator/testing/testOutputsMasterITS/variant61.txt new file mode 100644 index 00000000..413e615a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant61.txt @@ -0,0 +1,171 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_206933.2:c.6317C>G' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens usherin (USH2A), transcript variant 2, mRNA +p14 +sS'gene_symbol' +p15 +S'USH2A' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_996816.2:p.(Thr2106Arg)' +p20 +sS'slr' +p21 +S'NP_996816.2:p.(T2106R)' +p22 +ssS'submitted_variant' +p23 +S'NM_206933.2:c.6317C>G' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_206933.2:c.6317C>G' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000001.10:g.216219781A>C' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr1' +p39 +sS'ref' +p40 +S'A' +p41 +sS'pos' +p42 +S'216219781' +p43 +sS'alt' +p44 +VC +p45 +sssS'hg38' +p46 +(dp47 +g34 +S'NC_000001.11:g.216046439A>C' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +g41 +sg42 +S'216046439' +p50 +sg44 +g45 +sssS'grch37' +p51 +(dp52 +g34 +S'NC_000001.10:g.216219781A>C' +p53 +sg36 +(dp54 +g38 +S'1' +p55 +sg40 +g41 +sg42 +S'216219781' +p56 +sg44 +g45 +sssS'grch38' +p57 +(dp58 +g34 +S'NC_000001.11:g.216046439A>C' +p59 +sg36 +(dp60 +g38 +g55 +sg40 +g41 +sg42 +S'216046439' +p61 +sg44 +g45 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2' +p67 +sssS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant62.txt b/VariantValidator/testing/testOutputsMasterITS/variant62.txt new file mode 100644 index 00000000..a7821cbb --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant62.txt @@ -0,0 +1,171 @@ +(dp0 +S'NM_000059.3:c.7397C=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens BRCA2, DNA repair associated (BRCA2), mRNA +p12 +sS'gene_symbol' +p13 +S'BRCA2' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_000050.2:p.(Ala2466=)' +p18 +sS'slr' +p19 +S'NP_000050.2:p.(A2466=)' +p20 +ssS'submitted_variant' +p21 +S'NC_000013.10:g.32929387T>C' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_000059.3:c.7397C=' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000013.10:g.32929387T>C' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr13' +p37 +sS'ref' +p38 +S'T' +p39 +sS'pos' +p40 +S'32929387' +p41 +sS'alt' +p42 +S'C' +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000013.11:g.32355250T>C' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +g39 +sg40 +S'32355250' +p48 +sg42 +g43 +sssS'grch37' +p49 +(dp50 +g32 +S'NC_000013.10:g.32929387T>C' +p51 +sg34 +(dp52 +g36 +S'13' +p53 +sg38 +g39 +sg40 +S'32929387' +p54 +sg42 +g43 +sssS'grch38' +p55 +(dp56 +g32 +S'NC_000013.11:g.32355250T>C' +p57 +sg34 +(dp58 +g36 +g53 +sg38 +g39 +sg40 +S'32355250' +p59 +sg42 +g43 +ssssS'reference_sequence_records' +p60 +(dp61 +S'protein' +p62 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000050.2' +p63 +sS'transcript' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000059.3' +p65 +sssS'flag' +p66 +S'gene_variant' +p67 +sS'metadata' +p68 +(dp69 +S'variantvalidator_hgvs_version' +p70 +S'1.1.3' +p71 +sS'uta_schema' +p72 +S'uta_20180821' +p73 +sS'seqrepo_db' +p74 +S'2018-08-21' +p75 +sS'variantvalidator_version' +p76 +S'v0.2' +p77 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant63.txt b/VariantValidator/testing/testOutputsMasterITS/variant63.txt new file mode 100644 index 00000000..06210e37 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant63.txt @@ -0,0 +1,144 @@ +(dp0 +S'NM_015102.3:c.2818-2T>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'A more recent version of the selected reference sequence NM_015102.3 is available (NM_015102.4)' +p7 +aS'NM_015102.4:c.2818-2T>A MUST be fully validated prior to use in reports' +p8 +aS'select_variants=NM_015102.4:c.2818-2T>A' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g4 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens nephronophthisis 4 (NPHP4), mRNA +p15 +sS'gene_symbol' +p16 +S'NPHP4' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_055917.1:p.?' +p21 +sS'slr' +p22 +S'NP_055917.1:p.?' +p23 +ssS'submitted_variant' +p24 +S'NM_015102.3:c.2818-2T>A' +p25 +sS'genome_context_intronic_sequence' +p26 +S'NC_000001.10(NM_015102.3):c.2818-2T>A' +p27 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_015102.3:c.2818-2T>A' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000001.10:g.5935162A>T' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr1' +p41 +sS'ref' +p42 +VA +p43 +sS'pos' +p44 +S'5935162' +p45 +sS'alt' +p46 +VT +p47 +sssS'grch37' +p48 +(dp49 +g36 +S'NC_000001.10:g.5935162A>T' +p50 +sg38 +(dp51 +g40 +S'1' +p52 +sg42 +g43 +sg44 +S'5935162' +p53 +sg46 +g47 +ssssS'reference_sequence_records' +p54 +(dp55 +S'protein' +p56 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1' +p57 +sS'transcript' +p58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3' +p59 +sssS'flag' +p60 +S'gene_variant' +p61 +sS'metadata' +p62 +(dp63 +S'variantvalidator_hgvs_version' +p64 +S'1.1.3' +p65 +sS'uta_schema' +p66 +S'uta_20180821' +p67 +sS'seqrepo_db' +p68 +S'2018-08-21' +p69 +sS'variantvalidator_version' +p70 +S'v0.2' +p71 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant64.txt b/VariantValidator/testing/testOutputsMasterITS/variant64.txt new file mode 100644 index 00000000..5e367564 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant64.txt @@ -0,0 +1,439 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_001042544.1:c.3233_3235=' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG' +p9 +aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' +p10 +aS'NM_001042544.1:c.3233_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p11 +aS'Caution should be used when reporting the displayed variant descriptions' +p12 +aS'If you are unsure, please contact admin' +p13 +aS'RefSeqGene record not available' +p14 +asS'refseqgene_context_intronic_sequence' +p15 +g6 +sS'alt_genomic_loci' +p16 +(lp17 +sS'transcript_description' +p18 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA +p19 +sS'gene_symbol' +p20 +S'LTBP4' +p21 +sS'hgvs_predicted_protein_consequence' +p22 +(dp23 +S'tlr' +p24 +S'NP_001036009.1:p.(Gln1078=)' +p25 +sS'slr' +p26 +S'NP_001036009.1:p.(Q1078=)' +p27 +ssS'submitted_variant' +p28 +S'19-41123094-G-GG' +p29 +sS'genome_context_intronic_sequence' +p30 +g6 +sS'hgvs_lrg_variant' +p31 +g6 +sS'hgvs_transcript_variant' +p32 +S'NM_001042544.1:c.3233_3235=' +p33 +sS'hgvs_refseqgene_variant' +p34 +g6 +sS'primary_assembly_loci' +p35 +(dp36 +S'grch38' +p37 +(dp38 +S'hgvs_genomic_description' +p39 +S'NC_000019.10:g.40617187_40617189=' +p40 +sS'vcf' +p41 +(dp42 +S'chr' +p43 +S'19' +p44 +sS'ref' +p45 +VAGG +p46 +sS'pos' +p47 +S'40617187' +p48 +sS'alt' +p49 +g46 +sssS'grch37' +p50 +(dp51 +g39 +S'NC_000019.9:g.41123095dup' +p52 +sg41 +(dp53 +g43 +g44 +sg45 +S'G' +p54 +sg47 +S'41123094' +p55 +sg49 +VGG +p56 +sssS'hg38' +p57 +(dp58 +g39 +S'NC_000019.10:g.40617187_40617189=' +p59 +sg41 +(dp60 +g43 +S'chr19' +p61 +sg45 +g46 +sg47 +S'40617187' +p62 +sg49 +g46 +sssS'hg19' +p63 +(dp64 +g39 +S'NC_000019.9:g.41123095dup' +p65 +sg41 +(dp66 +g43 +g61 +sg45 +g54 +sg47 +S'41123094' +p67 +sg49 +VGG +p68 +ssssS'reference_sequence_records' +p69 +(dp70 +S'protein' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1' +p72 +sS'transcript' +p73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1' +p74 +sssS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ssS'NM_001042545.1:c.3032_3034=' +p85 +(dp86 +g5 +g6 +sg7 +(lp87 +S'NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG' +p88 +aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' +p89 +aS'NM_001042545.1:c.3032_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p90 +aS'Caution should be used when reporting the displayed variant descriptions' +p91 +aS'If you are unsure, please contact admin' +p92 +aS'RefSeqGene record not available' +p93 +asg15 +g6 +sg16 +(lp94 +sg18 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA +p95 +sg20 +S'LTBP4' +p96 +sg22 +(dp97 +g24 +S'NP_001036010.1:p.(Gln1011=)' +p98 +sg26 +S'NP_001036010.1:p.(Q1011=)' +p99 +ssg28 +g29 +sg30 +g6 +sg31 +g6 +sg32 +S'NM_001042545.1:c.3032_3034=' +p100 +sg34 +g6 +sg35 +(dp101 +S'grch38' +p102 +(dp103 +g39 +S'NC_000019.10:g.40617187_40617189=' +p104 +sg41 +(dp105 +g43 +g44 +sg45 +VAGG +p106 +sg47 +S'40617187' +p107 +sg49 +g106 +sssS'grch37' +p108 +(dp109 +g39 +S'NC_000019.9:g.41123095dup' +p110 +sg41 +(dp111 +g43 +g44 +sg45 +g54 +sg47 +S'41123094' +p112 +sg49 +VGG +p113 +sssg57 +(dp114 +g39 +S'NC_000019.10:g.40617187_40617189=' +p115 +sg41 +(dp116 +g43 +g61 +sg45 +g106 +sg47 +S'40617187' +p117 +sg49 +g106 +sssS'hg19' +p118 +(dp119 +g39 +S'NC_000019.9:g.41123095dup' +p120 +sg41 +(dp121 +g43 +g61 +sg45 +g54 +sg47 +S'41123094' +p122 +sg49 +VGG +p123 +ssssg69 +(dp124 +g71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1' +p125 +sg73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1' +p126 +sssS'NM_003573.2:c.3122_3124=' +p127 +(dp128 +g5 +g6 +sg7 +(lp129 +S'NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG' +p130 +aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' +p131 +aS'NM_003573.2:c.3122_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' +p132 +aS'Caution should be used when reporting the displayed variant descriptions' +p133 +aS'If you are unsure, please contact admin' +p134 +aS'RefSeqGene record not available' +p135 +asg15 +g6 +sg16 +(lp136 +sg18 +VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA +p137 +sg20 +S'LTBP4' +p138 +sg22 +(dp139 +g24 +S'NP_003564.2:p.(Gln1041=)' +p140 +sg26 +S'NP_003564.2:p.(Q1041=)' +p141 +ssg28 +g29 +sg30 +g6 +sg31 +g6 +sg32 +S'NM_003573.2:c.3122_3124=' +p142 +sg34 +g6 +sg35 +(dp143 +S'grch38' +p144 +(dp145 +g39 +S'NC_000019.10:g.40617187_40617189=' +p146 +sg41 +(dp147 +g43 +g44 +sg45 +VAGG +p148 +sg47 +S'40617187' +p149 +sg49 +g148 +sssS'grch37' +p150 +(dp151 +g39 +S'NC_000019.9:g.41123095dup' +p152 +sg41 +(dp153 +g43 +g44 +sg45 +g54 +sg47 +S'41123094' +p154 +sg49 +VGG +p155 +sssg57 +(dp156 +g39 +S'NC_000019.10:g.40617187_40617189=' +p157 +sg41 +(dp158 +g43 +g61 +sg45 +g148 +sg47 +S'40617187' +p159 +sg49 +g148 +sssS'hg19' +p160 +(dp161 +g39 +S'NC_000019.9:g.41123095dup' +p162 +sg41 +(dp163 +g43 +g61 +sg45 +g54 +sg47 +S'41123094' +p164 +sg49 +VGG +p165 +ssssg69 +(dp166 +g71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2' +p167 +sg73 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2' +p168 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant65.txt b/VariantValidator/testing/testOutputsMasterITS/variant65.txt new file mode 100644 index 00000000..22ec5a84 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant65.txt @@ -0,0 +1,511 @@ +(dp0 +S'NM_014249.2:c.946_949=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' +p7 +aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' +p8 +aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.2' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' +p12 +aS'NM_014249.3:c.946_949GACC= MUST be fully validated prior to use in reports' +p13 +aS'select_variants=NM_014249.3:c.946_949=' +p14 +aS'RefSeqGene record not available' +p15 +asS'refseqgene_context_intronic_sequence' +p16 +g4 +sS'alt_genomic_loci' +p17 +(lp18 +sS'transcript_description' +p19 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA +p20 +sS'gene_symbol' +p21 +S'NR2E3' +p22 +sS'hgvs_predicted_protein_consequence' +p23 +(dp24 +S'tlr' +p25 +S'NP_055064.1:p.(Asp316=)' +p26 +sS'slr' +p27 +S'NP_055064.1:p.(D316=)' +p28 +ssS'submitted_variant' +p29 +S'15-72105928-AC-A' +p30 +sS'genome_context_intronic_sequence' +p31 +g4 +sS'hgvs_lrg_variant' +p32 +g4 +sS'hgvs_transcript_variant' +p33 +S'NM_014249.2:c.946_949=' +p34 +sS'hgvs_refseqgene_variant' +p35 +g4 +sS'primary_assembly_loci' +p36 +(dp37 +S'hg19' +p38 +(dp39 +S'hgvs_genomic_description' +p40 +S'NC_000015.9:g.72105933del' +p41 +sS'vcf' +p42 +(dp43 +S'chr' +p44 +S'chr15' +p45 +sS'ref' +p46 +S'AC' +p47 +sS'pos' +p48 +S'72105928' +p49 +sS'alt' +p50 +S'A' +p51 +sssS'grch37' +p52 +(dp53 +g40 +S'NC_000015.9:g.72105933del' +p54 +sg42 +(dp55 +g44 +S'15' +p56 +sg46 +S'AC' +p57 +sg48 +S'72105928' +p58 +sg50 +g51 +ssssS'reference_sequence_records' +p59 +(dp60 +S'protein' +p61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p62 +sS'transcript' +p63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' +p64 +sssS'NM_016346.3:c.946_949=' +p65 +(dp66 +g3 +g4 +sg5 +(lp67 +S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' +p68 +aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' +p69 +aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.3' +p70 +aS'Caution should be used when reporting the displayed variant descriptions' +p71 +aS'If you are unsure, please contact admin' +p72 +aS'RefSeqGene record not available' +p73 +asg16 +g4 +sg17 +(lp74 +sg19 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA +p75 +sg21 +S'NR2E3' +p76 +sg23 +(dp77 +g25 +S'NP_057430.1:p.(Asp316=)' +p78 +sg27 +S'NP_057430.1:p.(D316=)' +p79 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_016346.3:c.946_949=' +p80 +sg35 +g4 +sg36 +(dp81 +S'grch38' +p82 +(dp83 +g40 +S'NC_000015.10:g.71813587_71813590=' +p84 +sg42 +(dp85 +g44 +g56 +sg46 +VGACC +p86 +sg48 +S'71813587' +p87 +sg50 +g86 +sssS'grch37' +p88 +(dp89 +g40 +S'NC_000015.9:g.72105933del' +p90 +sg42 +(dp91 +g44 +g56 +sg46 +S'AC' +p92 +sg48 +S'72105928' +p93 +sg50 +g51 +sssS'hg38' +p94 +(dp95 +g40 +S'NC_000015.10:g.71813587_71813590=' +p96 +sg42 +(dp97 +g44 +g45 +sg46 +g86 +sg48 +S'71813587' +p98 +sg50 +g86 +sssS'hg19' +p99 +(dp100 +g40 +S'NC_000015.9:g.72105933del' +p101 +sg42 +(dp102 +g44 +g45 +sg46 +S'AC' +p103 +sg48 +S'72105928' +p104 +sg50 +g51 +ssssg59 +(dp105 +g61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p106 +sg63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' +p107 +sssS'flag' +p108 +S'gene_variant' +p109 +sS'NM_014249.3:c.946_949=' +p110 +(dp111 +g3 +g4 +sg5 +(lp112 +S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' +p113 +aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' +p114 +aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.3' +p115 +aS'Caution should be used when reporting the displayed variant descriptions' +p116 +aS'If you are unsure, please contact admin' +p117 +aS'RefSeqGene record not available' +p118 +asg16 +g4 +sg17 +(lp119 +sg19 +VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA +p120 +sg21 +S'NR2E3' +p121 +sg23 +(dp122 +g25 +S'NP_055064.1:p.(Asp316=)' +p123 +sg27 +S'NP_055064.1:p.(D316=)' +p124 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_014249.3:c.946_949=' +p125 +sg35 +g4 +sg36 +(dp126 +S'grch38' +p127 +(dp128 +g40 +S'NC_000015.10:g.71813587_71813590=' +p129 +sg42 +(dp130 +g44 +g56 +sg46 +VGACC +p131 +sg48 +S'71813587' +p132 +sg50 +g131 +sssS'grch37' +p133 +(dp134 +g40 +S'NC_000015.9:g.72105933del' +p135 +sg42 +(dp136 +g44 +g56 +sg46 +S'AC' +p137 +sg48 +S'72105928' +p138 +sg50 +g51 +sssg94 +(dp139 +g40 +S'NC_000015.10:g.71813587_71813590=' +p140 +sg42 +(dp141 +g44 +g45 +sg46 +g131 +sg48 +S'71813587' +p142 +sg50 +g131 +sssS'hg19' +p143 +(dp144 +g40 +S'NC_000015.9:g.72105933del' +p145 +sg42 +(dp146 +g44 +g45 +sg46 +S'AC' +p147 +sg48 +S'72105928' +p148 +sg50 +g51 +ssssg59 +(dp149 +g61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' +p150 +sg63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' +p151 +sssS'NM_016346.2:c.946_949=' +p152 +(dp153 +g3 +g4 +sg5 +(lp154 +S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' +p155 +aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' +p156 +aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.2' +p157 +aS'Caution should be used when reporting the displayed variant descriptions' +p158 +aS'If you are unsure, please contact admin' +p159 +aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' +p160 +aS'NM_016346.3:c.946_949GACC= MUST be fully validated prior to use in reports' +p161 +aS'select_variants=NM_016346.3:c.946_949=' +p162 +aS'RefSeqGene record not available' +p163 +asg16 +g4 +sg17 +(lp164 +sg19 +VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA +p165 +sg21 +S'NR2E3' +p166 +sg23 +(dp167 +g25 +S'NP_057430.1:p.(Asp316=)' +p168 +sg27 +S'NP_057430.1:p.(D316=)' +p169 +ssg29 +g30 +sg31 +g4 +sg32 +g4 +sg33 +S'NM_016346.2:c.946_949=' +p170 +sg35 +g4 +sg36 +(dp171 +S'hg19' +p172 +(dp173 +g40 +S'NC_000015.9:g.72105933del' +p174 +sg42 +(dp175 +g44 +g45 +sg46 +S'AC' +p176 +sg48 +S'72105928' +p177 +sg50 +g51 +sssS'grch37' +p178 +(dp179 +g40 +S'NC_000015.9:g.72105933del' +p180 +sg42 +(dp181 +g44 +g56 +sg46 +S'AC' +p182 +sg48 +S'72105928' +p183 +sg50 +g51 +ssssg59 +(dp184 +g61 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' +p185 +sg63 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' +p186 +sssS'metadata' +p187 +(dp188 +S'variantvalidator_hgvs_version' +p189 +S'1.1.3' +p190 +sS'uta_schema' +p191 +S'uta_20180821' +p192 +sS'seqrepo_db' +p193 +S'2018-08-21' +p194 +sS'variantvalidator_version' +p195 +S'v0.2' +p196 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant66.txt b/VariantValidator/testing/testOutputsMasterITS/variant66.txt new file mode 100644 index 00000000..86a77815 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant66.txt @@ -0,0 +1,223 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032790.3:c.126_128=' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000012.11:g.122064773CCCGCCA>C automapped to NC_000012.11:g.122064785_122064790del' +p9 +aS'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p10 +aS'Genome position NC_000012.11:g.122064780 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' +p11 +aS'Caution should be used when reporting the displayed variant descriptions' +p12 +aS'If you are unsure, please contact admin' +p13 +aS'RefSeqGene record not available' +p14 +asS'refseqgene_context_intronic_sequence' +p15 +g6 +sS'alt_genomic_loci' +p16 +(lp17 +(dp18 +S'grch37' +p19 +(dp20 +S'hgvs_genomic_description' +p21 +S'NW_004504303.2:g.302883_302888del' +p22 +sS'vcf' +p23 +(dp24 +S'chr' +p25 +S'HG1595_PATCH' +p26 +sS'ref' +p27 +S'CCCGCCA' +p28 +sS'pos' +p29 +S'302871' +p30 +sS'alt' +p31 +S'C' +p32 +sssa(dp33 +S'hg19' +p34 +(dp35 +g21 +S'NW_004504303.2:g.302883_302888del' +p36 +sg23 +(dp37 +g25 +S'NW_004504303.2' +p38 +sg27 +S'CCCGCCA' +p39 +sg29 +S'302871' +p40 +sg31 +g32 +sssasS'transcript_description' +p41 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p42 +sS'gene_symbol' +p43 +S'ORAI1' +p44 +sS'hgvs_predicted_protein_consequence' +p45 +(dp46 +S'tlr' +p47 +S'NP_116179.2:p.(Ala42=)' +p48 +sS'slr' +p49 +S'NP_116179.2:p.(A42=)' +p50 +ssS'submitted_variant' +p51 +S'12-122064773-CCCGCCA-C' +p52 +sS'genome_context_intronic_sequence' +p53 +g6 +sS'hgvs_lrg_variant' +p54 +g6 +sS'hgvs_transcript_variant' +p55 +S'NM_032790.3:c.126_128=' +p56 +sS'hgvs_refseqgene_variant' +p57 +g6 +sS'primary_assembly_loci' +p58 +(dp59 +S'hg19' +p60 +(dp61 +g21 +S'NC_000012.11:g.122064785_122064790del' +p62 +sg23 +(dp63 +g25 +S'chr12' +p64 +sg27 +S'CCCGCCA' +p65 +sg29 +S'122064773' +p66 +sg31 +g32 +sssS'hg38' +p67 +(dp68 +g21 +S'NC_000012.12:g.121626873_121626875=' +p69 +sg23 +(dp70 +g25 +g64 +sg27 +VCCC +p71 +sg29 +S'121626873' +p72 +sg31 +g71 +sssS'grch37' +p73 +(dp74 +g21 +S'NC_000012.11:g.122064785_122064790del' +p75 +sg23 +(dp76 +g25 +S'12' +p77 +sg27 +S'CCCGCCA' +p78 +sg29 +S'122064773' +p79 +sg31 +g32 +sssS'grch38' +p80 +(dp81 +g21 +S'NC_000012.12:g.121626873_121626875=' +p82 +sg23 +(dp83 +g25 +g77 +sg27 +g71 +sg29 +S'121626873' +p84 +sg31 +g71 +ssssS'reference_sequence_records' +p85 +(dp86 +S'protein' +p87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p88 +sS'transcript' +p89 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p90 +sssS'metadata' +p91 +(dp92 +S'variantvalidator_hgvs_version' +p93 +S'1.1.3' +p94 +sS'uta_schema' +p95 +S'uta_20180821' +p96 +sS'seqrepo_db' +p97 +S'2018-08-21' +p98 +sS'variantvalidator_version' +p99 +S'v0.2' +p100 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant67.txt b/VariantValidator/testing/testOutputsMasterITS/variant67.txt new file mode 100644 index 00000000..ad633cd9 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant67.txt @@ -0,0 +1,223 @@ +(dp0 +S'NM_032790.3:c.132_137dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000012.11:g.122064774CCGCCA>CCGCCA automapped to NC_000012.11:g.122064774_122064779CCGCCA=' +p7 +aS'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p8 +aS'NC_000012.11:g.122064773_122064779 contains 6 genomic base(s) that fail to align to transcript NM_032790.3' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g4 +sS'alt_genomic_loci' +p14 +(lp15 +(dp16 +S'grch37' +p17 +(dp18 +S'hgvs_genomic_description' +p19 +S'NW_004504303.2:g.302868_302887=' +p20 +sS'vcf' +p21 +(dp22 +S'chr' +p23 +S'HG1595_PATCH' +p24 +sS'ref' +p25 +S'GGCCCCGCCACCGCCACCGC' +p26 +sS'pos' +p27 +S'302868' +p28 +sS'alt' +p29 +g26 +sssa(dp30 +S'hg19' +p31 +(dp32 +g19 +S'NW_004504303.2:g.302868_302887=' +p33 +sg21 +(dp34 +g23 +S'NW_004504303.2' +p35 +sg25 +g26 +sg27 +S'302868' +p36 +sg29 +g26 +sssasS'transcript_description' +p37 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p38 +sS'gene_symbol' +p39 +S'ORAI1' +p40 +sS'hgvs_predicted_protein_consequence' +p41 +(dp42 +S'tlr' +p43 +S'NP_116179.2:p.(Pro46_Pro47dup)' +p44 +sS'slr' +p45 +S'NP_116179.2:p.(P46_P47dup)' +p46 +ssS'submitted_variant' +p47 +S'12-122064774-CCGCCA-CCGCCA' +p48 +sS'genome_context_intronic_sequence' +p49 +g4 +sS'hgvs_lrg_variant' +p50 +g4 +sS'hgvs_transcript_variant' +p51 +S'NM_032790.3:c.132_137dup' +p52 +sS'hgvs_refseqgene_variant' +p53 +g4 +sS'primary_assembly_loci' +p54 +(dp55 +S'hg19' +p56 +(dp57 +g19 +S'NC_000012.11:g.122064770_122064789=' +p58 +sg21 +(dp59 +g23 +S'chr12' +p60 +sg25 +S'GGCCCCGCCACCGCCACCGC' +p61 +sg27 +S'122064770' +p62 +sg29 +g61 +sssS'hg38' +p63 +(dp64 +g19 +S'NC_000012.12:g.121626879_121626884dup' +p65 +sg21 +(dp66 +g23 +g60 +sg25 +S'CCGCCA' +p67 +sg27 +S'121626874' +p68 +sg29 +VCCGCCACCGCCA +p69 +sssS'grch37' +p70 +(dp71 +g19 +S'NC_000012.11:g.122064770_122064789=' +p72 +sg21 +(dp73 +g23 +S'12' +p74 +sg25 +g61 +sg27 +S'122064770' +p75 +sg29 +g61 +sssS'grch38' +p76 +(dp77 +g19 +S'NC_000012.12:g.121626879_121626884dup' +p78 +sg21 +(dp79 +g23 +g74 +sg25 +S'CCGCCA' +p80 +sg27 +S'121626874' +p81 +sg29 +VCCGCCACCGCCA +p82 +ssssS'reference_sequence_records' +p83 +(dp84 +S'protein' +p85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p86 +sS'transcript' +p87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p88 +sssS'flag' +p89 +S'gene_variant' +p90 +sS'metadata' +p91 +(dp92 +S'variantvalidator_hgvs_version' +p93 +S'1.1.3' +p94 +sS'uta_schema' +p95 +S'uta_20180821' +p96 +sS'seqrepo_db' +p97 +S'2018-08-21' +p98 +sS'variantvalidator_version' +p99 +S'v0.2' +p100 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant68.txt b/VariantValidator/testing/testOutputsMasterITS/variant68.txt new file mode 100644 index 00000000..759d2009 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant68.txt @@ -0,0 +1,218 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032790.3:c.132_135delinsGCCGT' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000012.11:g.122064773CCCGCCACCGCCACCGC>CCCGCCACCGCCGCCGTC automapped to NC_000012.11:g.122064785_122064788delinsGCCGT' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +(dp14 +S'grch37' +p15 +(dp16 +S'hgvs_genomic_description' +p17 +S'NW_004504303.2:g.302883_302886delinsGCCGT' +p18 +sS'vcf' +p19 +(dp20 +S'chr' +p21 +S'HG1595_PATCH' +p22 +sS'ref' +p23 +S'ACCG' +p24 +sS'pos' +p25 +S'302883' +p26 +sS'alt' +p27 +VGCCGT +p28 +sssa(dp29 +S'hg19' +p30 +(dp31 +g17 +S'NW_004504303.2:g.302883_302886delinsGCCGT' +p32 +sg19 +(dp33 +g21 +S'NW_004504303.2' +p34 +sg23 +S'ACCG' +p35 +sg25 +S'302883' +p36 +sg27 +g28 +sssasS'transcript_description' +p37 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p38 +sS'gene_symbol' +p39 +S'ORAI1' +p40 +sS'hgvs_predicted_protein_consequence' +p41 +(dp42 +S'tlr' +p43 +S'NP_116179.2:p.(Pro46SerfsTer42)' +p44 +sS'slr' +p45 +S'NP_116179.2:p.(P46Sfs*42)' +p46 +ssS'submitted_variant' +p47 +S'12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC' +p48 +sS'genome_context_intronic_sequence' +p49 +g6 +sS'hgvs_lrg_variant' +p50 +g6 +sS'hgvs_transcript_variant' +p51 +S'NM_032790.3:c.132_135delinsGCCGT' +p52 +sS'hgvs_refseqgene_variant' +p53 +g6 +sS'primary_assembly_loci' +p54 +(dp55 +S'hg19' +p56 +(dp57 +g17 +S'NC_000012.11:g.122064785_122064788delinsGCCGT' +p58 +sg19 +(dp59 +g21 +S'chr12' +p60 +sg23 +S'ACCG' +p61 +sg25 +S'122064785' +p62 +sg27 +VGCCGT +p63 +sssS'hg38' +p64 +(dp65 +g17 +S'NC_000012.12:g.121626879_121626882delinsGCCGT' +p66 +sg19 +(dp67 +g21 +g60 +sg23 +S'ACCG' +p68 +sg25 +S'121626879' +p69 +sg27 +VGCCGT +p70 +sssS'grch37' +p71 +(dp72 +g17 +S'NC_000012.11:g.122064785_122064788delinsGCCGT' +p73 +sg19 +(dp74 +g21 +S'12' +p75 +sg23 +S'ACCG' +p76 +sg25 +S'122064785' +p77 +sg27 +g63 +sssS'grch38' +p78 +(dp79 +g17 +S'NC_000012.12:g.121626879_121626882delinsGCCGT' +p80 +sg19 +(dp81 +g21 +g75 +sg23 +S'ACCG' +p82 +sg25 +S'121626879' +p83 +sg27 +g70 +ssssS'reference_sequence_records' +p84 +(dp85 +S'protein' +p86 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p87 +sS'transcript' +p88 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p89 +sssS'metadata' +p90 +(dp91 +S'variantvalidator_hgvs_version' +p92 +S'1.1.3' +p93 +sS'uta_schema' +p94 +S'uta_20180821' +p95 +sS'seqrepo_db' +p96 +S'2018-08-21' +p97 +sS'variantvalidator_version' +p98 +S'v0.2' +p99 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant69.txt b/VariantValidator/testing/testOutputsMasterITS/variant69.txt new file mode 100644 index 00000000..5e82de2a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant69.txt @@ -0,0 +1,219 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032790.3:c.129_130insACACCG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p9 +aS'NC_000012.11:g.122064777 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +(dp17 +S'grch37' +p18 +(dp19 +S'hgvs_genomic_description' +p20 +S'NW_004504303.2:g.302875C>A' +p21 +sS'vcf' +p22 +(dp23 +S'chr' +p24 +S'HG1595_PATCH' +p25 +sS'ref' +p26 +S'C' +p27 +sS'pos' +p28 +S'302875' +p29 +sS'alt' +p30 +S'A' +p31 +sssa(dp32 +S'hg19' +p33 +(dp34 +g20 +S'NW_004504303.2:g.302875C>A' +p35 +sg22 +(dp36 +g24 +S'NW_004504303.2' +p37 +sg26 +g27 +sg28 +S'302875' +p38 +sg30 +g31 +sssasS'transcript_description' +p39 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p40 +sS'gene_symbol' +p41 +S'ORAI1' +p42 +sS'hgvs_predicted_protein_consequence' +p43 +(dp44 +S'tlr' +p45 +S'NP_116179.2:p.(Pro43_Pro44insThrPro)' +p46 +sS'slr' +p47 +S'NP_116179.2:p.(P43_P44insTP)' +p48 +ssS'submitted_variant' +p49 +S'NC_000012.11:g.122064777C>A' +p50 +sS'genome_context_intronic_sequence' +p51 +g6 +sS'hgvs_lrg_variant' +p52 +g6 +sS'hgvs_transcript_variant' +p53 +S'NM_032790.3:c.129_130insACACCG' +p54 +sS'hgvs_refseqgene_variant' +p55 +g6 +sS'primary_assembly_loci' +p56 +(dp57 +S'hg19' +p58 +(dp59 +g20 +S'NC_000012.11:g.122064777C>A' +p60 +sg22 +(dp61 +g24 +S'chr12' +p62 +sg26 +g27 +sg28 +S'122064777' +p63 +sg30 +g31 +sssS'hg38' +p64 +(dp65 +g20 +S'NC_000012.12:g.121626876_121626877insACACCG' +p66 +sg22 +(dp67 +g24 +g62 +sg26 +g27 +sg28 +S'121626873' +p68 +sg30 +VCCCGACA +p69 +sssS'grch37' +p70 +(dp71 +g20 +S'NC_000012.11:g.122064777C>A' +p72 +sg22 +(dp73 +g24 +S'12' +p74 +sg26 +g27 +sg28 +S'122064777' +p75 +sg30 +g31 +sssS'grch38' +p76 +(dp77 +g20 +S'NC_000012.12:g.121626876_121626877insACACCG' +p78 +sg22 +(dp79 +g24 +g74 +sg26 +g27 +sg28 +S'121626873' +p80 +sg30 +VCCCGACA +p81 +ssssS'reference_sequence_records' +p82 +(dp83 +S'protein' +p84 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p85 +sS'transcript' +p86 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p87 +sssS'metadata' +p88 +(dp89 +S'variantvalidator_hgvs_version' +p90 +S'1.1.3' +p91 +sS'uta_schema' +p92 +S'uta_20180821' +p93 +sS'seqrepo_db' +p94 +S'2018-08-21' +p95 +sS'variantvalidator_version' +p96 +S'v0.2' +p97 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant7.txt b/VariantValidator/testing/testOutputsMasterITS/variant7.txt new file mode 100644 index 00000000..3ddbed10 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant7.txt @@ -0,0 +1,1362 @@ +(dp0 +S'NM_001077183.2:c.138+821del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p7 +aS'RefSeqGene record not available' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA +p13 +sS'gene_symbol' +p14 +S'TSC2' +p15 +sS'hgvs_predicted_protein_consequence' +p16 +(dp17 +S'tlr' +p18 +S'NP_001070651.1:p.?' +p19 +sS'slr' +p20 +S'NP_001070651.1:p.?' +p21 +ssS'submitted_variant' +p22 +S'NC_000016.9:g.2099572TC>T' +p23 +sS'genome_context_intronic_sequence' +p24 +S'NC_000016.9(NM_001077183.2):c.138+821del' +p25 +sS'hgvs_lrg_variant' +p26 +g4 +sS'hgvs_transcript_variant' +p27 +S'NM_001077183.2:c.138+821del' +p28 +sS'hgvs_refseqgene_variant' +p29 +g4 +sS'primary_assembly_loci' +p30 +(dp31 +S'grch38' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000016.10:g.2049574del' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'16' +p39 +sS'ref' +p40 +S'TC' +p41 +sS'pos' +p42 +S'2049571' +p43 +sS'alt' +p44 +S'T' +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000016.9:g.2099575del' +p48 +sg36 +(dp49 +g38 +g39 +sg40 +S'TC' +p50 +sg42 +S'2099572' +p51 +sg44 +g45 +sssS'hg38' +p52 +(dp53 +g34 +S'NC_000016.10:g.2049574del' +p54 +sg36 +(dp55 +g38 +S'chr16' +p56 +sg40 +S'TC' +p57 +sg42 +S'2049571' +p58 +sg44 +g45 +sssS'hg19' +p59 +(dp60 +g34 +S'NC_000016.9:g.2099575del' +p61 +sg36 +(dp62 +g38 +g56 +sg40 +S'TC' +p63 +sg42 +S'2099572' +p64 +sg44 +g45 +ssssS'reference_sequence_records' +p65 +(dp66 +S'protein' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1' +p68 +sS'transcript' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2' +p70 +sssS'NM_001318831.1:c.-89+821del' +p71 +(dp72 +g3 +g4 +sg5 +(lp73 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p74 +aS'RefSeqGene record not available' +p75 +asg9 +g4 +sg10 +(lp76 +sg12 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA +p77 +sg14 +S'TSC2' +p78 +sg16 +(dp79 +g18 +S'NP_001305760.1:p.?' +p80 +sg20 +S'NP_001305760.1:p.?' +p81 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_001318831.1):c.-89+821del' +p82 +sg26 +g4 +sg27 +S'NM_001318831.1:c.-89+821del' +p83 +sg29 +g4 +sg30 +(dp84 +S'grch38' +p85 +(dp86 +g34 +S'NC_000016.10:g.2049574del' +p87 +sg36 +(dp88 +g38 +g39 +sg40 +S'TC' +p89 +sg42 +S'2049571' +p90 +sg44 +g45 +sssS'grch37' +p91 +(dp92 +g34 +S'NC_000016.9:g.2099575del' +p93 +sg36 +(dp94 +g38 +g39 +sg40 +S'TC' +p95 +sg42 +S'2099572' +p96 +sg44 +g45 +sssg52 +(dp97 +g34 +S'NC_000016.10:g.2049574del' +p98 +sg36 +(dp99 +g38 +g56 +sg40 +S'TC' +p100 +sg42 +S'2049571' +p101 +sg44 +g45 +sssS'hg19' +p102 +(dp103 +g34 +S'NC_000016.9:g.2099575del' +p104 +sg36 +(dp105 +g38 +g56 +sg40 +S'TC' +p106 +sg42 +S'2099572' +p107 +sg44 +g45 +ssssg65 +(dp108 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305760.1' +p109 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318831.1' +p110 +sssS'NM_021055.2:c.138+821del' +p111 +(dp112 +g3 +g4 +sg5 +(lp113 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p114 +aS'RefSeqGene record not available' +p115 +asg9 +g4 +sg10 +(lp116 +sg12 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 2, mRNA +p117 +sg14 +S'TSC2' +p118 +sg16 +(dp119 +g18 +S'NP_066399.2:p.?' +p120 +sg20 +S'NP_066399.2:p.?' +p121 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_021055.2):c.138+821del' +p122 +sg26 +g4 +sg27 +S'NM_021055.2:c.138+821del' +p123 +sg29 +g4 +sg30 +(dp124 +S'hg19' +p125 +(dp126 +g34 +S'NC_000016.9:g.2099575del' +p127 +sg36 +(dp128 +g38 +g56 +sg40 +S'TC' +p129 +sg42 +S'2099572' +p130 +sg44 +g45 +sssS'grch37' +p131 +(dp132 +g34 +S'NC_000016.9:g.2099575del' +p133 +sg36 +(dp134 +g38 +g39 +sg40 +S'TC' +p135 +sg42 +S'2099572' +p136 +sg44 +g45 +ssssg65 +(dp137 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2' +p138 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2' +p139 +sssS'NM_001318832.1:c.171+821del' +p140 +(dp141 +g3 +g4 +sg5 +(lp142 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p143 +aS'RefSeqGene record not available' +p144 +asg9 +g4 +sg10 +(lp145 +sg12 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA +p146 +sg14 +S'TSC2' +p147 +sg16 +(dp148 +g18 +S'NP_001305761.1:p.?' +p149 +sg20 +S'NP_001305761.1:p.?' +p150 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_001318832.1):c.171+821del' +p151 +sg26 +g4 +sg27 +S'NM_001318832.1:c.171+821del' +p152 +sg29 +g4 +sg30 +(dp153 +S'grch38' +p154 +(dp155 +g34 +S'NC_000016.10:g.2049574del' +p156 +sg36 +(dp157 +g38 +g39 +sg40 +S'TC' +p158 +sg42 +S'2049571' +p159 +sg44 +g45 +sssS'grch37' +p160 +(dp161 +g34 +S'NC_000016.9:g.2099575del' +p162 +sg36 +(dp163 +g38 +g39 +sg40 +S'TC' +p164 +sg42 +S'2099572' +p165 +sg44 +g45 +sssg52 +(dp166 +g34 +S'NC_000016.10:g.2049574del' +p167 +sg36 +(dp168 +g38 +g56 +sg40 +S'TC' +p169 +sg42 +S'2049571' +p170 +sg44 +g45 +sssS'hg19' +p171 +(dp172 +g34 +S'NC_000016.9:g.2099575del' +p173 +sg36 +(dp174 +g38 +g56 +sg40 +S'TC' +p175 +sg42 +S'2099572' +p176 +sg44 +g45 +ssssg65 +(dp177 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1' +p178 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1' +p179 +sssS'NM_001114382.1:c.138+821del' +p180 +(dp181 +g3 +g4 +sg5 +(lp182 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p183 +aS'A more recent version of the selected reference sequence NM_001114382.1 is available (NM_001114382.2)' +p184 +aS'NM_001114382.2:c.138+821delC MUST be fully validated prior to use in reports' +p185 +aS'select_variants=NM_001114382.2:c.138+821del' +p186 +aS'RefSeqGene record not available' +p187 +asg9 +g4 +sg10 +(lp188 +sg12 +VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 5, mRNA +p189 +sg14 +S'TSC2' +p190 +sg16 +(dp191 +g18 +S'NP_001107854.1:p.?' +p192 +sg20 +S'NP_001107854.1:p.?' +p193 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_001114382.1):c.138+821del' +p194 +sg26 +g4 +sg27 +S'NM_001114382.1:c.138+821del' +p195 +sg29 +g4 +sg30 +(dp196 +S'hg19' +p197 +(dp198 +g34 +S'NC_000016.9:g.2099575del' +p199 +sg36 +(dp200 +g38 +g56 +sg40 +S'TC' +p201 +sg42 +S'2099572' +p202 +sg44 +g45 +sssS'grch37' +p203 +(dp204 +g34 +S'NC_000016.9:g.2099575del' +p205 +sg36 +(dp206 +g38 +g39 +sg40 +S'TC' +p207 +sg42 +S'2099572' +p208 +sg44 +g45 +ssssg65 +(dp209 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1' +p210 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1' +p211 +sssS'NM_000548.4:c.138+821del' +p212 +(dp213 +g3 +g4 +sg5 +(lp214 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p215 +aS'RefSeqGene record not available' +p216 +asg9 +g4 +sg10 +(lp217 +sg12 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA +p218 +sg14 +S'TSC2' +p219 +sg16 +(dp220 +g18 +S'NP_000539.2:p.?' +p221 +sg20 +S'NP_000539.2:p.?' +p222 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_000548.4):c.138+821del' +p223 +sg26 +g4 +sg27 +S'NM_000548.4:c.138+821del' +p224 +sg29 +g4 +sg30 +(dp225 +S'grch38' +p226 +(dp227 +g34 +S'NC_000016.10:g.2049574del' +p228 +sg36 +(dp229 +g38 +g39 +sg40 +S'TC' +p230 +sg42 +S'2049571' +p231 +sg44 +g45 +sssS'grch37' +p232 +(dp233 +g34 +S'NC_000016.9:g.2099575del' +p234 +sg36 +(dp235 +g38 +g39 +sg40 +S'TC' +p236 +sg42 +S'2099572' +p237 +sg44 +g45 +sssg52 +(dp238 +g34 +S'NC_000016.10:g.2049574del' +p239 +sg36 +(dp240 +g38 +g56 +sg40 +S'TC' +p241 +sg42 +S'2049571' +p242 +sg44 +g45 +sssS'hg19' +p243 +(dp244 +g34 +S'NC_000016.9:g.2099575del' +p245 +sg36 +(dp246 +g38 +g56 +sg40 +S'TC' +p247 +sg42 +S'2099572' +p248 +sg44 +g45 +ssssg65 +(dp249 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2' +p250 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4' +p251 +sssS'NM_001363528.1:c.138+821del' +p252 +(dp253 +g3 +g4 +sg5 +(lp254 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p255 +aS'RefSeqGene record not available' +p256 +asg9 +g4 +sg10 +(lp257 +sg12 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 10, mRNA +p258 +sg14 +S'TSC2' +p259 +sg16 +(dp260 +g18 +S'NP_001350457.1:p.?' +p261 +sg20 +S'NP_001350457.1:p.?' +p262 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_001363528.1):c.138+821del' +p263 +sg26 +g4 +sg27 +S'NM_001363528.1:c.138+821del' +p264 +sg29 +g4 +sg30 +(dp265 +S'hg19' +p266 +(dp267 +g34 +S'NC_000016.9:g.2099575del' +p268 +sg36 +(dp269 +g38 +g56 +sg40 +S'TC' +p270 +sg42 +S'2099572' +p271 +sg44 +g45 +sssS'grch37' +p272 +(dp273 +g34 +S'NC_000016.9:g.2099575del' +p274 +sg36 +(dp275 +g38 +g39 +sg40 +S'TC' +p276 +sg42 +S'2099572' +p277 +sg44 +g45 +ssssg65 +(dp278 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1' +p279 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1' +p280 +sssS'flag' +p281 +S'gene_variant' +p282 +sS'NM_001077183.1:c.138+821del' +p283 +(dp284 +g3 +g4 +sg5 +(lp285 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p286 +aS'A more recent version of the selected reference sequence NM_001077183.1 is available (NM_001077183.2)' +p287 +aS'NM_001077183.2:c.138+821delC MUST be fully validated prior to use in reports' +p288 +aS'select_variants=NM_001077183.2:c.138+821del' +p289 +aS'RefSeqGene record not available' +p290 +asg9 +g4 +sg10 +(lp291 +sg12 +VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 4, mRNA +p292 +sg14 +S'TSC2' +p293 +sg16 +(dp294 +g18 +S'NP_001070651.1:p.?' +p295 +sg20 +S'NP_001070651.1:p.?' +p296 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_001077183.1):c.138+821del' +p297 +sg26 +g4 +sg27 +S'NM_001077183.1:c.138+821del' +p298 +sg29 +g4 +sg30 +(dp299 +S'hg19' +p300 +(dp301 +g34 +S'NC_000016.9:g.2099575del' +p302 +sg36 +(dp303 +g38 +g56 +sg40 +S'TC' +p304 +sg42 +S'2099572' +p305 +sg44 +g45 +sssS'grch37' +p306 +(dp307 +g34 +S'NC_000016.9:g.2099575del' +p308 +sg36 +(dp309 +g38 +g39 +sg40 +S'TC' +p310 +sg42 +S'2099572' +p311 +sg44 +g45 +ssssg65 +(dp312 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1' +p313 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1' +p314 +sssS'NM_001318827.1:c.138+821del' +p315 +(dp316 +g3 +g4 +sg5 +(lp317 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p318 +aS'RefSeqGene record not available' +p319 +asg9 +g4 +sg10 +(lp320 +sg12 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA +p321 +sg14 +S'TSC2' +p322 +sg16 +(dp323 +g18 +S'NP_001305756.1:p.?' +p324 +sg20 +S'NP_001305756.1:p.?' +p325 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_001318827.1):c.138+821del' +p326 +sg26 +g4 +sg27 +S'NM_001318827.1:c.138+821del' +p327 +sg29 +g4 +sg30 +(dp328 +S'grch38' +p329 +(dp330 +g34 +S'NC_000016.10:g.2049574del' +p331 +sg36 +(dp332 +g38 +g39 +sg40 +S'TC' +p333 +sg42 +S'2049571' +p334 +sg44 +g45 +sssS'grch37' +p335 +(dp336 +g34 +S'NC_000016.9:g.2099575del' +p337 +sg36 +(dp338 +g38 +g39 +sg40 +S'TC' +p339 +sg42 +S'2099572' +p340 +sg44 +g45 +sssg52 +(dp341 +g34 +S'NC_000016.10:g.2049574del' +p342 +sg36 +(dp343 +g38 +g56 +sg40 +S'TC' +p344 +sg42 +S'2049571' +p345 +sg44 +g45 +sssS'hg19' +p346 +(dp347 +g34 +S'NC_000016.9:g.2099575del' +p348 +sg36 +(dp349 +g38 +g56 +sg40 +S'TC' +p350 +sg42 +S'2099572' +p351 +sg44 +g45 +ssssg65 +(dp352 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1' +p353 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1' +p354 +sssS'NM_000548.3:c.138+821del' +p355 +(dp356 +g3 +g4 +sg5 +(lp357 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p358 +aS'A more recent version of the selected reference sequence NM_000548.3 is available (NM_000548.4)' +p359 +aS'NM_000548.4:c.138+821delC MUST be fully validated prior to use in reports' +p360 +aS'select_variants=NM_000548.4:c.138+821del' +p361 +aS'RefSeqGene record not available' +p362 +asg9 +g4 +sg10 +(lp363 +sg12 +VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 1, mRNA +p364 +sg14 +S'TSC2' +p365 +sg16 +(dp366 +g18 +S'NP_000539.2:p.?' +p367 +sg20 +S'NP_000539.2:p.?' +p368 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_000548.3):c.138+821del' +p369 +sg26 +g4 +sg27 +S'NM_000548.3:c.138+821del' +p370 +sg29 +g4 +sg30 +(dp371 +S'hg19' +p372 +(dp373 +g34 +S'NC_000016.9:g.2099575del' +p374 +sg36 +(dp375 +g38 +g56 +sg40 +S'TC' +p376 +sg42 +S'2099572' +p377 +sg44 +g45 +sssS'grch37' +p378 +(dp379 +g34 +S'NC_000016.9:g.2099575del' +p380 +sg36 +(dp381 +g38 +g39 +sg40 +S'TC' +p382 +sg42 +S'2099572' +p383 +sg44 +g45 +ssssg65 +(dp384 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2' +p385 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3' +p386 +sssS'NM_001114382.2:c.138+821del' +p387 +(dp388 +g3 +g4 +sg5 +(lp389 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p390 +aS'RefSeqGene record not available' +p391 +asg9 +g4 +sg10 +(lp392 +sg12 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA +p393 +sg14 +S'TSC2' +p394 +sg16 +(dp395 +g18 +S'NP_001107854.1:p.?' +p396 +sg20 +S'NP_001107854.1:p.?' +p397 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_001114382.2):c.138+821del' +p398 +sg26 +g4 +sg27 +S'NM_001114382.2:c.138+821del' +p399 +sg29 +g4 +sg30 +(dp400 +S'grch38' +p401 +(dp402 +g34 +S'NC_000016.10:g.2049574del' +p403 +sg36 +(dp404 +g38 +g39 +sg40 +S'TC' +p405 +sg42 +S'2049571' +p406 +sg44 +g45 +sssS'grch37' +p407 +(dp408 +g34 +S'NC_000016.9:g.2099575del' +p409 +sg36 +(dp410 +g38 +g39 +sg40 +S'TC' +p411 +sg42 +S'2099572' +p412 +sg44 +g45 +sssg52 +(dp413 +g34 +S'NC_000016.10:g.2049574del' +p414 +sg36 +(dp415 +g38 +g56 +sg40 +S'TC' +p416 +sg42 +S'2049571' +p417 +sg44 +g45 +sssS'hg19' +p418 +(dp419 +g34 +S'NC_000016.9:g.2099575del' +p420 +sg36 +(dp421 +g38 +g56 +sg40 +S'TC' +p422 +sg42 +S'2099572' +p423 +sg44 +g45 +ssssg65 +(dp424 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1' +p425 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2' +p426 +sssS'NM_001318829.1:c.-9-826del' +p427 +(dp428 +g3 +g4 +sg5 +(lp429 +S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' +p430 +aS'RefSeqGene record not available' +p431 +asg9 +g4 +sg10 +(lp432 +sg12 +VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA +p433 +sg14 +S'TSC2' +p434 +sg16 +(dp435 +g18 +S'NP_001305758.1:p.?' +p436 +sg20 +S'NP_001305758.1:p.?' +p437 +ssg22 +g23 +sg24 +S'NC_000016.9(NM_001318829.1):c.-9-826del' +p438 +sg26 +g4 +sg27 +S'NM_001318829.1:c.-9-826del' +p439 +sg29 +g4 +sg30 +(dp440 +S'grch38' +p441 +(dp442 +g34 +S'NC_000016.10:g.2049574del' +p443 +sg36 +(dp444 +g38 +g39 +sg40 +S'TC' +p445 +sg42 +S'2049571' +p446 +sg44 +g45 +sssS'grch37' +p447 +(dp448 +g34 +S'NC_000016.9:g.2099575del' +p449 +sg36 +(dp450 +g38 +g39 +sg40 +S'TC' +p451 +sg42 +S'2099572' +p452 +sg44 +g45 +sssg52 +(dp453 +g34 +S'NC_000016.10:g.2049574del' +p454 +sg36 +(dp455 +g38 +g56 +sg40 +S'TC' +p456 +sg42 +S'2049571' +p457 +sg44 +g45 +sssS'hg19' +p458 +(dp459 +g34 +S'NC_000016.9:g.2099575del' +p460 +sg36 +(dp461 +g38 +g56 +sg40 +S'TC' +p462 +sg42 +S'2099572' +p463 +sg44 +g45 +ssssg65 +(dp464 +g67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305758.1' +p465 +sg69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318829.1' +p466 +sssS'metadata' +p467 +(dp468 +S'variantvalidator_hgvs_version' +p469 +S'1.1.3' +p470 +sS'uta_schema' +p471 +S'uta_20180821' +p472 +sS'seqrepo_db' +p473 +S'2018-08-21' +p474 +sS'variantvalidator_version' +p475 +S'v0.2' +p476 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant70.txt b/VariantValidator/testing/testOutputsMasterITS/variant70.txt new file mode 100644 index 00000000..25f99e45 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant70.txt @@ -0,0 +1,224 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032790.3:c.128_129insCCACC' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p9 +aS'NC_000012.11:g.122064775 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' +p10 +aS'NC_000012.11:g.122064776 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' +p11 +aS'Caution should be used when reporting the displayed variant descriptions' +p12 +aS'If you are unsure, please contact admin' +p13 +aS'RefSeqGene record not available' +p14 +asS'refseqgene_context_intronic_sequence' +p15 +g6 +sS'alt_genomic_loci' +p16 +(lp17 +(dp18 +S'grch37' +p19 +(dp20 +S'hgvs_genomic_description' +p21 +S'NW_004504303.2:g.302874del' +p22 +sS'vcf' +p23 +(dp24 +S'chr' +p25 +S'HG1595_PATCH' +p26 +sS'ref' +p27 +S'CG' +p28 +sS'pos' +p29 +S'302873' +p30 +sS'alt' +p31 +S'C' +p32 +sssa(dp33 +S'hg19' +p34 +(dp35 +g21 +S'NW_004504303.2:g.302874del' +p36 +sg23 +(dp37 +g25 +S'NW_004504303.2' +p38 +sg27 +S'CG' +p39 +sg29 +S'302873' +p40 +sg31 +g32 +sssasS'transcript_description' +p41 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p42 +sS'gene_symbol' +p43 +S'ORAI1' +p44 +sS'hgvs_predicted_protein_consequence' +p45 +(dp46 +S'tlr' +p47 +S'NP_116179.2:p.(Pro44HisfsTer22)' +p48 +sS'slr' +p49 +S'NP_116179.2:p.(P44Hfs*22)' +p50 +ssS'submitted_variant' +p51 +S'NC_000012.11:g.122064776delG' +p52 +sS'genome_context_intronic_sequence' +p53 +g6 +sS'hgvs_lrg_variant' +p54 +g6 +sS'hgvs_transcript_variant' +p55 +S'NM_032790.3:c.128_129insCCACC' +p56 +sS'hgvs_refseqgene_variant' +p57 +g6 +sS'primary_assembly_loci' +p58 +(dp59 +S'hg19' +p60 +(dp61 +g21 +S'NC_000012.11:g.122064776del' +p62 +sg23 +(dp63 +g25 +S'chr12' +p64 +sg27 +S'CG' +p65 +sg29 +S'122064775' +p66 +sg31 +g32 +sssS'hg38' +p67 +(dp68 +g21 +S'NC_000012.12:g.121626875_121626876insCCACC' +p69 +sg23 +(dp70 +g25 +g64 +sg27 +g32 +sg29 +S'121626873' +p71 +sg31 +VCCCCCA +p72 +sssS'grch37' +p73 +(dp74 +g21 +S'NC_000012.11:g.122064776del' +p75 +sg23 +(dp76 +g25 +S'12' +p77 +sg27 +S'CG' +p78 +sg29 +S'122064775' +p79 +sg31 +g32 +sssS'grch38' +p80 +(dp81 +g21 +S'NC_000012.12:g.121626875_121626876insCCACC' +p82 +sg23 +(dp83 +g25 +g77 +sg27 +g32 +sg29 +S'121626873' +p84 +sg31 +VCCCCCA +p85 +ssssS'reference_sequence_records' +p86 +(dp87 +S'protein' +p88 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p89 +sS'transcript' +p90 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p91 +sssS'metadata' +p92 +(dp93 +S'variantvalidator_hgvs_version' +p94 +S'1.1.3' +p95 +sS'uta_schema' +p96 +S'uta_20180821' +p97 +sS'seqrepo_db' +p98 +S'2018-08-21' +p99 +sS'variantvalidator_version' +p100 +S'v0.2' +p101 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant71.txt b/VariantValidator/testing/testOutputsMasterITS/variant71.txt new file mode 100644 index 00000000..ef02ced7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant71.txt @@ -0,0 +1,225 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032790.3:c.129_130insGCCACCG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p9 +aS'NC_000012.11:g.122064775 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' +p10 +aS'NC_000012.11:g.122064775 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' +p11 +aS'Caution should be used when reporting the displayed variant descriptions' +p12 +aS'If you are unsure, please contact admin' +p13 +aS'RefSeqGene record not available' +p14 +asS'refseqgene_context_intronic_sequence' +p15 +g6 +sS'alt_genomic_loci' +p16 +(lp17 +(dp18 +S'grch37' +p19 +(dp20 +S'hgvs_genomic_description' +p21 +S'NW_004504303.2:g.302874dup' +p22 +sS'vcf' +p23 +(dp24 +S'chr' +p25 +S'HG1595_PATCH' +p26 +sS'ref' +p27 +S'G' +p28 +sS'pos' +p29 +S'302874' +p30 +sS'alt' +p31 +S'GG' +p32 +sssa(dp33 +S'hg19' +p34 +(dp35 +g21 +S'NW_004504303.2:g.302874dup' +p36 +sg23 +(dp37 +g25 +S'NW_004504303.2' +p38 +sg27 +g28 +sg29 +S'302874' +p39 +sg31 +S'GG' +p40 +sssasS'transcript_description' +p41 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p42 +sS'gene_symbol' +p43 +S'ORAI1' +p44 +sS'hgvs_predicted_protein_consequence' +p45 +(dp46 +S'tlr' +p47 +S'NP_116179.2:p.(Pro44AlafsTer46)' +p48 +sS'slr' +p49 +S'NP_116179.2:p.(P44Afs*46)' +p50 +ssS'submitted_variant' +p51 +S'NC_000012.11:g.122064776dupG' +p52 +sS'genome_context_intronic_sequence' +p53 +g6 +sS'hgvs_lrg_variant' +p54 +g6 +sS'hgvs_transcript_variant' +p55 +S'NM_032790.3:c.129_130insGCCACCG' +p56 +sS'hgvs_refseqgene_variant' +p57 +g6 +sS'primary_assembly_loci' +p58 +(dp59 +S'hg19' +p60 +(dp61 +g21 +S'NC_000012.11:g.122064776dup' +p62 +sg23 +(dp63 +g25 +S'chr12' +p64 +sg27 +g28 +sg29 +S'122064776' +p65 +sg31 +S'GG' +p66 +sssS'hg38' +p67 +(dp68 +g21 +S'NC_000012.12:g.121626876_121626877insGCCACCG' +p69 +sg23 +(dp70 +g25 +g64 +sg27 +S'C' +p71 +sg29 +S'121626873' +p72 +sg31 +VCCCGGCCA +p73 +sssS'grch37' +p74 +(dp75 +g21 +S'NC_000012.11:g.122064776dup' +p76 +sg23 +(dp77 +g25 +S'12' +p78 +sg27 +g28 +sg29 +S'122064776' +p79 +sg31 +S'GG' +p80 +sssS'grch38' +p81 +(dp82 +g21 +S'NC_000012.12:g.121626876_121626877insGCCACCG' +p83 +sg23 +(dp84 +g25 +g78 +sg27 +g71 +sg29 +S'121626873' +p85 +sg31 +VCCCGGCCA +p86 +ssssS'reference_sequence_records' +p87 +(dp88 +S'protein' +p89 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p90 +sS'transcript' +p91 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p92 +sssS'metadata' +p93 +(dp94 +S'variantvalidator_hgvs_version' +p95 +S'1.1.3' +p96 +sS'uta_schema' +p97 +S'uta_20180821' +p98 +sS'seqrepo_db' +p99 +S'2018-08-21' +p100 +sS'variantvalidator_version' +p101 +S'v0.2' +p102 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant72.txt b/VariantValidator/testing/testOutputsMasterITS/variant72.txt new file mode 100644 index 00000000..0ab2ff7d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant72.txt @@ -0,0 +1,223 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032790.3:c.129_130insTTTCCACCG' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p9 +aS'NC_000012.11:g.122064776 is one of 7 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +(dp17 +S'grch37' +p18 +(dp19 +S'hgvs_genomic_description' +p20 +S'NW_004504303.2:g.302874_302875insTTT' +p21 +sS'vcf' +p22 +(dp23 +S'chr' +p24 +S'HG1595_PATCH' +p25 +sS'ref' +p26 +S'G' +p27 +sS'pos' +p28 +S'302874' +p29 +sS'alt' +p30 +S'GTTT' +p31 +sssa(dp32 +S'hg19' +p33 +(dp34 +g20 +S'NW_004504303.2:g.302874_302875insTTT' +p35 +sg22 +(dp36 +g24 +S'NW_004504303.2' +p37 +sg26 +g27 +sg28 +S'302874' +p38 +sg30 +S'GTTT' +p39 +sssasS'transcript_description' +p40 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p41 +sS'gene_symbol' +p42 +S'ORAI1' +p43 +sS'hgvs_predicted_protein_consequence' +p44 +(dp45 +S'tlr' +p46 +S'NP_116179.2:p.(Pro43_Pro44insPheProPro)' +p47 +sS'slr' +p48 +S'NP_116179.2:p.(P43_P44insFPP)' +p49 +ssS'submitted_variant' +p50 +S'NC_000012.11:g.122064776_122064777insTTT' +p51 +sS'genome_context_intronic_sequence' +p52 +g6 +sS'hgvs_lrg_variant' +p53 +g6 +sS'hgvs_transcript_variant' +p54 +S'NM_032790.3:c.129_130insTTTCCACCG' +p55 +sS'hgvs_refseqgene_variant' +p56 +g6 +sS'primary_assembly_loci' +p57 +(dp58 +S'hg19' +p59 +(dp60 +g20 +S'NC_000012.11:g.122064776_122064777insTTT' +p61 +sg22 +(dp62 +g24 +S'chr12' +p63 +sg26 +g27 +sg28 +S'122064776' +p64 +sg30 +S'GTTT' +p65 +sssS'hg38' +p66 +(dp67 +g20 +S'NC_000012.12:g.121626876_121626877insTTTCCACCG' +p68 +sg22 +(dp69 +g24 +g63 +sg26 +S'C' +p70 +sg28 +S'121626873' +p71 +sg30 +VCCCGTTTCCA +p72 +sssS'grch37' +p73 +(dp74 +g20 +S'NC_000012.11:g.122064776_122064777insTTT' +p75 +sg22 +(dp76 +g24 +S'12' +p77 +sg26 +g27 +sg28 +S'122064776' +p78 +sg30 +S'GTTT' +p79 +sssS'grch38' +p80 +(dp81 +g20 +S'NC_000012.12:g.121626876_121626877insTTTCCACCG' +p82 +sg22 +(dp83 +g24 +g77 +sg26 +g70 +sg28 +S'121626873' +p84 +sg30 +VCCCGTTTCCA +p85 +ssssS'reference_sequence_records' +p86 +(dp87 +S'protein' +p88 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p89 +sS'transcript' +p90 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p91 +sssS'metadata' +p92 +(dp93 +S'variantvalidator_hgvs_version' +p94 +S'1.1.3' +p95 +sS'uta_schema' +p96 +S'uta_20180821' +p97 +sS'seqrepo_db' +p98 +S'2018-08-21' +p99 +sS'variantvalidator_version' +p100 +S'v0.2' +p101 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant73.txt b/VariantValidator/testing/testOutputsMasterITS/variant73.txt new file mode 100644 index 00000000..e5812afa --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant73.txt @@ -0,0 +1,223 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032790.3:c.125_126delinsGCCA' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p9 +aS'Genome position NC_000012.11:g.122064776 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +(dp17 +S'grch37' +p18 +(dp19 +S'hgvs_genomic_description' +p20 +S'NW_004504303.2:g.302870_302873del' +p21 +sS'vcf' +p22 +(dp23 +S'chr' +p24 +S'HG1595_PATCH' +p25 +sS'ref' +p26 +S'GCCCC' +p27 +sS'pos' +p28 +S'302869' +p29 +sS'alt' +p30 +S'G' +p31 +sssa(dp32 +S'hg19' +p33 +(dp34 +g20 +S'NW_004504303.2:g.302870_302873del' +p35 +sg22 +(dp36 +g24 +S'NW_004504303.2' +p37 +sg26 +S'GCCCC' +p38 +sg28 +S'302869' +p39 +sg30 +g31 +sssasS'transcript_description' +p40 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p41 +sS'gene_symbol' +p42 +S'ORAI1' +p43 +sS'hgvs_predicted_protein_consequence' +p44 +(dp45 +S'tlr' +p46 +S'NP_116179.2:p.(Ala42GlyfsTer23)' +p47 +sS'slr' +p48 +S'NP_116179.2:p.(A42Gfs*23)' +p49 +ssS'submitted_variant' +p50 +S'NC_000012.11:g.122064772_122064775del' +p51 +sS'genome_context_intronic_sequence' +p52 +g6 +sS'hgvs_lrg_variant' +p53 +g6 +sS'hgvs_transcript_variant' +p54 +S'NM_032790.3:c.125_126delinsGCCA' +p55 +sS'hgvs_refseqgene_variant' +p56 +g6 +sS'primary_assembly_loci' +p57 +(dp58 +S'hg19' +p59 +(dp60 +g20 +S'NC_000012.11:g.122064772_122064775del' +p61 +sg22 +(dp62 +g24 +S'chr12' +p63 +sg26 +S'GCCCC' +p64 +sg28 +S'122064771' +p65 +sg30 +g31 +sssS'hg38' +p66 +(dp67 +g20 +S'NC_000012.12:g.121626867_121626873delinsGCCA' +p68 +sg22 +(dp69 +g24 +g63 +sg26 +S'CCCCGCC' +p70 +sg28 +S'121626867' +p71 +sg30 +S'GCCA' +p72 +sssS'grch37' +p73 +(dp74 +g20 +S'NC_000012.11:g.122064772_122064775del' +p75 +sg22 +(dp76 +g24 +S'12' +p77 +sg26 +S'GCCCC' +p78 +sg28 +S'122064771' +p79 +sg30 +g31 +sssS'grch38' +p80 +(dp81 +g20 +S'NC_000012.12:g.121626867_121626873delinsGCCA' +p82 +sg22 +(dp83 +g24 +g77 +sg26 +S'CCCCGCC' +p84 +sg28 +S'121626867' +p85 +sg30 +g72 +ssssS'reference_sequence_records' +p86 +(dp87 +S'protein' +p88 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p89 +sS'transcript' +p90 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p91 +sssS'metadata' +p92 +(dp93 +S'variantvalidator_hgvs_version' +p94 +S'1.1.3' +p95 +sS'uta_schema' +p96 +S'uta_20180821' +p97 +sS'seqrepo_db' +p98 +S'2018-08-21' +p99 +sS'variantvalidator_version' +p100 +S'v0.2' +p101 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant74.txt b/VariantValidator/testing/testOutputsMasterITS/variant74.txt new file mode 100644 index 00000000..f6142300 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant74.txt @@ -0,0 +1,228 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032790.3:c.128_129insCCCCGCCACC' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p9 +aS'Genome position NC_000012.11:g.122064782 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' +p10 +aS'Genome position NC_000012.11:g.122064776 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' +p11 +aS'Caution should be used when reporting the displayed variant descriptions' +p12 +aS'If you are unsure, please contact admin' +p13 +aS'RefSeqGene record not available' +p14 +asS'refseqgene_context_intronic_sequence' +p15 +g6 +sS'alt_genomic_loci' +p16 +(lp17 +(dp18 +S'grch37' +p19 +(dp20 +S'hgvs_genomic_description' +p21 +S'NW_004504303.2:g.302870_302873dup' +p22 +sS'vcf' +p23 +(dp24 +S'chr' +p25 +S'HG1595_PATCH' +p26 +sS'ref' +p27 +S'CCCC' +p28 +sS'pos' +p29 +S'302870' +p30 +sS'alt' +p31 +S'CCCCCCCC' +p32 +sssa(dp33 +S'hg19' +p34 +(dp35 +g21 +S'NW_004504303.2:g.302870_302873dup' +p36 +sg23 +(dp37 +g25 +S'NW_004504303.2' +p38 +sg27 +S'CCCC' +p39 +sg29 +S'302870' +p40 +sg31 +S'CCCCCCCC' +p41 +sssasS'transcript_description' +p42 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p43 +sS'gene_symbol' +p44 +S'ORAI1' +p45 +sS'hgvs_predicted_protein_consequence' +p46 +(dp47 +S'tlr' +p48 +S'NP_116179.2:p.(Pro45AlafsTer46)' +p49 +sS'slr' +p50 +S'NP_116179.2:p.(P45Afs*46)' +p51 +ssS'submitted_variant' +p52 +S'NC_000012.11:g.122064772_122064775dup' +p53 +sS'genome_context_intronic_sequence' +p54 +g6 +sS'hgvs_lrg_variant' +p55 +g6 +sS'hgvs_transcript_variant' +p56 +S'NM_032790.3:c.128_129insCCCCGCCACC' +p57 +sS'hgvs_refseqgene_variant' +p58 +g6 +sS'primary_assembly_loci' +p59 +(dp60 +S'hg19' +p61 +(dp62 +g21 +S'NC_000012.11:g.122064772_122064775dup' +p63 +sg23 +(dp64 +g25 +S'chr12' +p65 +sg27 +S'CCCC' +p66 +sg29 +S'122064772' +p67 +sg31 +S'CCCCCCCC' +p68 +sssS'hg38' +p69 +(dp70 +g21 +S'NC_000012.12:g.121626875_121626876insCCCCGCCACC' +p71 +sg23 +(dp72 +g25 +g65 +sg27 +S'C' +p73 +sg29 +S'121626873' +p74 +sg31 +VCCCCCCCGCCA +p75 +sssS'grch37' +p76 +(dp77 +g21 +S'NC_000012.11:g.122064772_122064775dup' +p78 +sg23 +(dp79 +g25 +S'12' +p80 +sg27 +S'CCCC' +p81 +sg29 +S'122064772' +p82 +sg31 +S'CCCCCCCC' +p83 +sssS'grch38' +p84 +(dp85 +g21 +S'NC_000012.12:g.121626875_121626876insCCCCGCCACC' +p86 +sg23 +(dp87 +g25 +g80 +sg27 +g73 +sg29 +S'121626873' +p88 +sg31 +VCCCCCCCGCCA +p89 +ssssS'reference_sequence_records' +p90 +(dp91 +S'protein' +p92 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p93 +sS'transcript' +p94 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p95 +sssS'metadata' +p96 +(dp97 +S'variantvalidator_hgvs_version' +p98 +S'1.1.3' +p99 +sS'uta_schema' +p100 +S'uta_20180821' +p101 +sS'seqrepo_db' +p102 +S'2018-08-21' +p103 +sS'variantvalidator_version' +p104 +S'v0.2' +p105 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant75.txt b/VariantValidator/testing/testOutputsMasterITS/variant75.txt new file mode 100644 index 00000000..d43f63f8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant75.txt @@ -0,0 +1,222 @@ +(dp0 +S'NM_032790.3:c.126_127insTTTTCCGCCA' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p7 +aS'Genome position NC_000012.11:g.122064774 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +(dp15 +S'grch37' +p16 +(dp17 +S'hgvs_genomic_description' +p18 +S'NW_004504303.2:g.302871_302872insTTTT' +p19 +sS'vcf' +p20 +(dp21 +S'chr' +p22 +S'HG1595_PATCH' +p23 +sS'ref' +p24 +S'C' +p25 +sS'pos' +p26 +S'302871' +p27 +sS'alt' +p28 +S'CTTTT' +p29 +sssa(dp30 +S'hg19' +p31 +(dp32 +g18 +S'NW_004504303.2:g.302871_302872insTTTT' +p33 +sg20 +(dp34 +g22 +S'NW_004504303.2' +p35 +sg24 +g25 +sg26 +S'302871' +p36 +sg28 +S'CTTTT' +p37 +sssasS'transcript_description' +p38 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p39 +sS'gene_symbol' +p40 +S'ORAI1' +p41 +sS'hgvs_predicted_protein_consequence' +p42 +(dp43 +S'tlr' +p44 +S'NP_116179.2:p.(Pro43PhefsTer48)' +p45 +sS'slr' +p46 +S'NP_116179.2:p.(P43Ffs*48)' +p47 +ssS'submitted_variant' +p48 +S'NC_000012.11:g.122064773_122064774insTTTT' +p49 +sS'genome_context_intronic_sequence' +p50 +g4 +sS'hgvs_lrg_variant' +p51 +g4 +sS'hgvs_transcript_variant' +p52 +S'NM_032790.3:c.126_127insTTTTCCGCCA' +p53 +sS'hgvs_refseqgene_variant' +p54 +g4 +sS'primary_assembly_loci' +p55 +(dp56 +S'hg19' +p57 +(dp58 +g18 +S'NC_000012.11:g.122064773_122064774insTTTT' +p59 +sg20 +(dp60 +g22 +S'chr12' +p61 +sg24 +g25 +sg26 +S'122064773' +p62 +sg28 +S'CTTTT' +p63 +sssS'hg38' +p64 +(dp65 +g18 +S'NC_000012.12:g.121626873_121626874insTTTTCCGCCA' +p66 +sg20 +(dp67 +g22 +g61 +sg24 +g25 +sg26 +S'121626873' +p68 +sg28 +VCTTTTCCGCCA +p69 +sssS'grch37' +p70 +(dp71 +g18 +S'NC_000012.11:g.122064773_122064774insTTTT' +p72 +sg20 +(dp73 +g22 +S'12' +p74 +sg24 +g25 +sg26 +S'122064773' +p75 +sg28 +S'CTTTT' +p76 +sssS'grch38' +p77 +(dp78 +g18 +S'NC_000012.12:g.121626873_121626874insTTTTCCGCCA' +p79 +sg20 +(dp80 +g22 +g74 +sg24 +g25 +sg26 +S'121626873' +p81 +sg28 +VCTTTTCCGCCA +p82 +ssssS'reference_sequence_records' +p83 +(dp84 +S'protein' +p85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p86 +sS'transcript' +p87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p88 +sssS'flag' +p89 +S'gene_variant' +p90 +sS'metadata' +p91 +(dp92 +S'variantvalidator_hgvs_version' +p93 +S'1.1.3' +p94 +sS'uta_schema' +p95 +S'uta_20180821' +p96 +sS'seqrepo_db' +p97 +S'2018-08-21' +p98 +sS'variantvalidator_version' +p99 +S'v0.2' +p100 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant76.txt b/VariantValidator/testing/testOutputsMasterITS/variant76.txt new file mode 100644 index 00000000..1892999a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant76.txt @@ -0,0 +1,222 @@ +(dp0 +S'NM_032790.3:c.126C>A' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p7 +aS'Genome position NC_000012.11:g.122064778 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +(dp15 +S'grch37' +p16 +(dp17 +S'hgvs_genomic_description' +p18 +S'NW_004504303.2:g.302871_302876del' +p19 +sS'vcf' +p20 +(dp21 +S'chr' +p22 +S'HG1595_PATCH' +p23 +sS'ref' +p24 +S'GCCCCGC' +p25 +sS'pos' +p26 +S'302869' +p27 +sS'alt' +p28 +S'G' +p29 +sssa(dp30 +S'hg19' +p31 +(dp32 +g18 +S'NW_004504303.2:g.302871_302876del' +p33 +sg20 +(dp34 +g22 +S'NW_004504303.2' +p35 +sg24 +S'GCCCCGC' +p36 +sg26 +S'302869' +p37 +sg28 +g29 +sssasS'transcript_description' +p38 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p39 +sS'gene_symbol' +p40 +S'ORAI1' +p41 +sS'hgvs_predicted_protein_consequence' +p42 +(dp43 +S'tlr' +p44 +S'NP_116179.2:p.(Ala42=)' +p45 +sS'slr' +p46 +S'NP_116179.2:p.(A42=)' +p47 +ssS'submitted_variant' +p48 +S'NC_000012.11:g.122064772_122064777del' +p49 +sS'genome_context_intronic_sequence' +p50 +g4 +sS'hgvs_lrg_variant' +p51 +g4 +sS'hgvs_transcript_variant' +p52 +S'NM_032790.3:c.126C>A' +p53 +sS'hgvs_refseqgene_variant' +p54 +g4 +sS'primary_assembly_loci' +p55 +(dp56 +S'hg19' +p57 +(dp58 +g18 +S'NC_000012.11:g.122064773_122064778del' +p59 +sg20 +(dp60 +g22 +S'chr12' +p61 +sg24 +S'GCCCCGC' +p62 +sg26 +S'122064771' +p63 +sg28 +g29 +sssS'hg38' +p64 +(dp65 +g18 +S'NC_000012.12:g.121626873C>A' +p66 +sg20 +(dp67 +g22 +g61 +sg24 +VC +p68 +sg26 +S'121626873' +p69 +sg28 +VA +p70 +sssS'grch37' +p71 +(dp72 +g18 +S'NC_000012.11:g.122064773_122064778del' +p73 +sg20 +(dp74 +g22 +S'12' +p75 +sg24 +S'GCCCCGC' +p76 +sg26 +S'122064771' +p77 +sg28 +g29 +sssS'grch38' +p78 +(dp79 +g18 +S'NC_000012.12:g.121626873C>A' +p80 +sg20 +(dp81 +g22 +g75 +sg24 +g68 +sg26 +S'121626873' +p82 +sg28 +g70 +ssssS'reference_sequence_records' +p83 +(dp84 +S'protein' +p85 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p86 +sS'transcript' +p87 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p88 +sssS'flag' +p89 +S'gene_variant' +p90 +sS'metadata' +p91 +(dp92 +S'variantvalidator_hgvs_version' +p93 +S'1.1.3' +p94 +sS'uta_schema' +p95 +S'uta_20180821' +p96 +sS'seqrepo_db' +p97 +S'2018-08-21' +p98 +sS'variantvalidator_version' +p99 +S'v0.2' +p100 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant77.txt b/VariantValidator/testing/testOutputsMasterITS/variant77.txt new file mode 100644 index 00000000..d42f6ac0 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant77.txt @@ -0,0 +1,228 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032790.3:c.131_132insCCCGCCACCGCC' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p9 +aS'Genome position NC_000012.11:g.122064778 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' +p10 +aS'Genome position NC_000012.11:g.122064784 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' +p11 +aS'Caution should be used when reporting the displayed variant descriptions' +p12 +aS'If you are unsure, please contact admin' +p13 +aS'RefSeqGene record not available' +p14 +asS'refseqgene_context_intronic_sequence' +p15 +g6 +sS'alt_genomic_loci' +p16 +(lp17 +(dp18 +S'grch37' +p19 +(dp20 +S'hgvs_genomic_description' +p21 +S'NW_004504303.2:g.302871_302876dup' +p22 +sS'vcf' +p23 +(dp24 +S'chr' +p25 +S'HG1595_PATCH' +p26 +sS'ref' +p27 +S'CCCCGC' +p28 +sS'pos' +p29 +S'302870' +p30 +sS'alt' +p31 +S'CCCCGCCCCCGC' +p32 +sssa(dp33 +S'hg19' +p34 +(dp35 +g21 +S'NW_004504303.2:g.302871_302876dup' +p36 +sg23 +(dp37 +g25 +S'NW_004504303.2' +p38 +sg27 +S'CCCCGC' +p39 +sg29 +S'302870' +p40 +sg31 +S'CCCCGCCCCCGC' +p41 +sssasS'transcript_description' +p42 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p43 +sS'gene_symbol' +p44 +S'ORAI1' +p45 +sS'hgvs_predicted_protein_consequence' +p46 +(dp47 +S'tlr' +p48 +S'NP_116179.2:p.(Pro44_Pro47dup)' +p49 +sS'slr' +p50 +S'NP_116179.2:p.(P44_P47dup)' +p51 +ssS'submitted_variant' +p52 +S'NC_000012.11:g.122064772_122064777dup' +p53 +sS'genome_context_intronic_sequence' +p54 +g6 +sS'hgvs_lrg_variant' +p55 +g6 +sS'hgvs_transcript_variant' +p56 +S'NM_032790.3:c.131_132insCCCGCCACCGCC' +p57 +sS'hgvs_refseqgene_variant' +p58 +g6 +sS'primary_assembly_loci' +p59 +(dp60 +S'hg19' +p61 +(dp62 +g21 +S'NC_000012.11:g.122064773_122064778dup' +p63 +sg23 +(dp64 +g25 +S'chr12' +p65 +sg27 +S'CCCCGC' +p66 +sg29 +S'122064772' +p67 +sg31 +S'CCCCGCCCCCGC' +p68 +sssS'hg38' +p69 +(dp70 +g21 +S'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC' +p71 +sg23 +(dp72 +g25 +g65 +sg27 +S'C' +p73 +sg29 +S'121626873' +p74 +sg31 +VCCCGCCCCCGCCA +p75 +sssS'grch37' +p76 +(dp77 +g21 +S'NC_000012.11:g.122064773_122064778dup' +p78 +sg23 +(dp79 +g25 +S'12' +p80 +sg27 +S'CCCCGC' +p81 +sg29 +S'122064772' +p82 +sg31 +S'CCCCGCCCCCGC' +p83 +sssS'grch38' +p84 +(dp85 +g21 +S'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC' +p86 +sg23 +(dp87 +g25 +g80 +sg27 +g73 +sg29 +S'121626873' +p88 +sg31 +VCCCGCCCCCGCCA +p89 +ssssS'reference_sequence_records' +p90 +(dp91 +S'protein' +p92 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p93 +sS'transcript' +p94 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p95 +sssS'metadata' +p96 +(dp97 +S'variantvalidator_hgvs_version' +p98 +S'1.1.3' +p99 +sS'uta_schema' +p100 +S'uta_20180821' +p101 +sS'seqrepo_db' +p102 +S'2018-08-21' +p103 +sS'variantvalidator_version' +p104 +S'v0.2' +p105 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant78.txt b/VariantValidator/testing/testOutputsMasterITS/variant78.txt new file mode 100644 index 00000000..07aa6f8d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant78.txt @@ -0,0 +1,226 @@ +(dp0 +S'NM_032790.3:c.135_136insACCGCCACCG' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p7 +aS'NC_000012.11:g.122064778 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +(dp15 +S'grch37' +p16 +(dp17 +S'hgvs_genomic_description' +p18 +S'NW_004504303.2:g.302877_302880dup' +p19 +sS'vcf' +p20 +(dp21 +S'chr' +p22 +S'HG1595_PATCH' +p23 +sS'ref' +p24 +S'ACCG' +p25 +sS'pos' +p26 +S'302877' +p27 +sS'alt' +p28 +S'ACCGACCG' +p29 +sssa(dp30 +S'hg19' +p31 +(dp32 +g18 +S'NW_004504303.2:g.302877_302880dup' +p33 +sg20 +(dp34 +g22 +S'NW_004504303.2' +p35 +sg24 +S'ACCG' +p36 +sg26 +S'302877' +p37 +sg28 +S'ACCGACCG' +p38 +sssasS'transcript_description' +p39 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p40 +sS'gene_symbol' +p41 +S'ORAI1' +p42 +sS'hgvs_predicted_protein_consequence' +p43 +(dp44 +S'tlr' +p45 +S'NP_116179.2:p.(Pro46ThrfsTer45)' +p46 +sS'slr' +p47 +S'NP_116179.2:p.(P46Tfs*45)' +p48 +ssS'submitted_variant' +p49 +S'NC_000012.11:g.122064779_122064782dup' +p50 +sS'genome_context_intronic_sequence' +p51 +g4 +sS'hgvs_lrg_variant' +p52 +g4 +sS'hgvs_transcript_variant' +p53 +S'NM_032790.3:c.135_136insACCGCCACCG' +p54 +sS'hgvs_refseqgene_variant' +p55 +g4 +sS'primary_assembly_loci' +p56 +(dp57 +S'hg19' +p58 +(dp59 +g18 +S'NC_000012.11:g.122064779_122064782dup' +p60 +sg20 +(dp61 +g22 +S'chr12' +p62 +sg24 +S'ACCG' +p63 +sg26 +S'122064779' +p64 +sg28 +S'ACCGACCG' +p65 +sssS'hg38' +p66 +(dp67 +g18 +S'NC_000012.12:g.121626882_121626883insACCGCCACCG' +p68 +sg20 +(dp69 +g22 +g62 +sg24 +S'C' +p70 +sg26 +S'121626873' +p71 +sg28 +VCCCGCCACCGA +p72 +sssS'grch37' +p73 +(dp74 +g18 +S'NC_000012.11:g.122064779_122064782dup' +p75 +sg20 +(dp76 +g22 +S'12' +p77 +sg24 +S'ACCG' +p78 +sg26 +S'122064779' +p79 +sg28 +S'ACCGACCG' +p80 +sssS'grch38' +p81 +(dp82 +g18 +S'NC_000012.12:g.121626882_121626883insACCGCCACCG' +p83 +sg20 +(dp84 +g22 +g77 +sg24 +g70 +sg26 +S'121626873' +p85 +sg28 +VCCCGCCACCGA +p86 +ssssS'reference_sequence_records' +p87 +(dp88 +S'protein' +p89 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p90 +sS'transcript' +p91 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p92 +sssS'flag' +p93 +S'gene_variant' +p94 +sS'metadata' +p95 +(dp96 +S'variantvalidator_hgvs_version' +p97 +S'1.1.3' +p98 +sS'uta_schema' +p99 +S'uta_20180821' +p100 +sS'seqrepo_db' +p101 +S'2018-08-21' +p102 +sS'variantvalidator_version' +p103 +S'v0.2' +p104 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant79.txt b/VariantValidator/testing/testOutputsMasterITS/variant79.txt new file mode 100644 index 00000000..b484c2d4 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant79.txt @@ -0,0 +1,223 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_032790.3:c.126_127insA' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' +p9 +aS'Genome position NC_000012.11:g.122064776 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +(dp17 +S'grch37' +p18 +(dp19 +S'hgvs_genomic_description' +p20 +S'NW_004504303.2:g.302872_302876del' +p21 +sS'vcf' +p22 +(dp23 +S'chr' +p24 +S'HG1595_PATCH' +p25 +sS'ref' +p26 +S'GGCCCC' +p27 +sS'pos' +p28 +S'302868' +p29 +sS'alt' +p30 +S'G' +p31 +sssa(dp32 +S'hg19' +p33 +(dp34 +g20 +S'NW_004504303.2:g.302872_302876del' +p35 +sg22 +(dp36 +g24 +S'NW_004504303.2' +p37 +sg26 +S'GGCCCC' +p38 +sg28 +S'302868' +p39 +sg30 +g31 +sssasS'transcript_description' +p40 +VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA +p41 +sS'gene_symbol' +p42 +S'ORAI1' +p43 +sS'hgvs_predicted_protein_consequence' +p44 +(dp45 +S'tlr' +p46 +S'NP_116179.2:p.(Pro43ThrfsTer45)' +p47 +sS'slr' +p48 +S'NP_116179.2:p.(P43Tfs*45)' +p49 +ssS'submitted_variant' +p50 +S'NC_000012.11:g.122064772_122064782del' +p51 +sS'genome_context_intronic_sequence' +p52 +g6 +sS'hgvs_lrg_variant' +p53 +g6 +sS'hgvs_transcript_variant' +p54 +S'NM_032790.3:c.126_127insA' +p55 +sS'hgvs_refseqgene_variant' +p56 +g6 +sS'primary_assembly_loci' +p57 +(dp58 +S'hg19' +p59 +(dp60 +g20 +S'NC_000012.11:g.122064774_122064778del' +p61 +sg22 +(dp62 +g24 +S'chr12' +p63 +sg26 +S'GGCCCC' +p64 +sg28 +S'122064770' +p65 +sg30 +g31 +sssS'hg38' +p66 +(dp67 +g20 +S'NC_000012.12:g.121626873_121626874insA' +p68 +sg22 +(dp69 +g24 +g63 +sg26 +S'C' +p70 +sg28 +S'121626873' +p71 +sg30 +VCA +p72 +sssS'grch37' +p73 +(dp74 +g20 +S'NC_000012.11:g.122064774_122064778del' +p75 +sg22 +(dp76 +g24 +S'12' +p77 +sg26 +S'GGCCCC' +p78 +sg28 +S'122064770' +p79 +sg30 +g31 +sssS'grch38' +p80 +(dp81 +g20 +S'NC_000012.12:g.121626873_121626874insA' +p82 +sg22 +(dp83 +g24 +g77 +sg26 +g70 +sg28 +S'121626873' +p84 +sg30 +VCA +p85 +ssssS'reference_sequence_records' +p86 +(dp87 +S'protein' +p88 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' +p89 +sS'transcript' +p90 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' +p91 +sssS'metadata' +p92 +(dp93 +S'variantvalidator_hgvs_version' +p94 +S'1.1.3' +p95 +sS'uta_schema' +p96 +S'uta_20180821' +p97 +sS'seqrepo_db' +p98 +S'2018-08-21' +p99 +sS'variantvalidator_version' +p100 +S'v0.2' +p101 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant8.txt b/VariantValidator/testing/testOutputsMasterITS/variant8.txt new file mode 100644 index 00000000..836f91e7 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant8.txt @@ -0,0 +1,177 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_000088.3:c.589_590delinsCT' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_000088.3:c.589GG>CT automapped to NM_000088.3:c.589_590delGGinsCT' +p9 +aS'RefSeqGene record not available' +p10 +asS'refseqgene_context_intronic_sequence' +p11 +g6 +sS'alt_genomic_loci' +p12 +(lp13 +sS'transcript_description' +p14 +VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA +p15 +sS'gene_symbol' +p16 +S'COL1A1' +p17 +sS'hgvs_predicted_protein_consequence' +p18 +(dp19 +S'tlr' +p20 +S'NP_000079.2:p.(Gly197Leu)' +p21 +sS'slr' +p22 +S'NP_000079.2:p.(G197L)' +p23 +ssS'submitted_variant' +p24 +S'NM_000088.3:c.589GG>CT' +p25 +sS'genome_context_intronic_sequence' +p26 +g6 +sS'hgvs_lrg_variant' +p27 +g6 +sS'hgvs_transcript_variant' +p28 +S'NM_000088.3:c.589_590delinsCT' +p29 +sS'hgvs_refseqgene_variant' +p30 +g6 +sS'primary_assembly_loci' +p31 +(dp32 +S'hg19' +p33 +(dp34 +S'hgvs_genomic_description' +p35 +S'NC_000017.10:g.48275362_48275363delinsAG' +p36 +sS'vcf' +p37 +(dp38 +S'chr' +p39 +S'chr17' +p40 +sS'ref' +p41 +S'CC' +p42 +sS'pos' +p43 +S'48275362' +p44 +sS'alt' +p45 +VAG +p46 +sssS'hg38' +p47 +(dp48 +g35 +S'NC_000017.11:g.50198001_50198002delinsAG' +p49 +sg37 +(dp50 +g39 +g40 +sg41 +S'CC' +p51 +sg43 +S'50198001' +p52 +sg45 +VAG +p53 +sssS'grch37' +p54 +(dp55 +g35 +S'NC_000017.10:g.48275362_48275363delinsAG' +p56 +sg37 +(dp57 +g39 +S'17' +p58 +sg41 +S'CC' +p59 +sg43 +S'48275362' +p60 +sg45 +g46 +sssS'grch38' +p61 +(dp62 +g35 +S'NC_000017.11:g.50198001_50198002delinsAG' +p63 +sg37 +(dp64 +g39 +g58 +sg41 +S'CC' +p65 +sg43 +S'50198001' +p66 +sg45 +g53 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' +p72 +sssS'metadata' +p73 +(dp74 +S'variantvalidator_hgvs_version' +p75 +S'1.1.3' +p76 +sS'uta_schema' +p77 +S'uta_20180821' +p78 +sS'seqrepo_db' +p79 +S'2018-08-21' +p80 +sS'variantvalidator_version' +p81 +S'v0.2' +p82 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant80.txt b/VariantValidator/testing/testOutputsMasterITS/variant80.txt new file mode 100644 index 00000000..725ebe82 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant80.txt @@ -0,0 +1,882 @@ +(dp0 +S'NM_021088.3:c.471_473dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_021088.3 with genome build GRCh37' +p7 +aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_021088.3' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA +p16 +sS'gene_symbol' +p17 +S'ZNF2' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_066574.2:p.(Arg159dup)' +p22 +sS'slr' +p23 +S'NP_066574.2:p.(R159dup)' +p24 +ssS'submitted_variant' +p25 +S'NC_000002.11:g.95847041_95847043GCG=' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_021088.3:c.471_473dup' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000002.11:g.95847037_95847050=' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr2' +p41 +sS'ref' +p42 +S'GCTTGCGGCGGCGA' +p43 +sS'pos' +p44 +S'95847037' +p45 +sS'alt' +p46 +g43 +sssS'hg38' +p47 +(dp48 +g36 +S'NC_000002.12:g.95181299_95181301dup' +p49 +sg38 +(dp50 +g40 +g41 +sg42 +S'GCG' +p51 +sg44 +S'95181296' +p52 +sg46 +VGCGGCG +p53 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000002.11:g.95847037_95847050=' +p56 +sg38 +(dp57 +g40 +S'2' +p58 +sg42 +g43 +sg44 +S'95847037' +p59 +sg46 +g43 +sssS'grch38' +p60 +(dp61 +g36 +S'NC_000002.12:g.95181299_95181301dup' +p62 +sg38 +(dp63 +g40 +g58 +sg42 +S'GCG' +p64 +sg44 +S'95181296' +p65 +sg46 +VGCGGCG +p66 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.3' +p72 +sssS'NM_001291605.1:c.510_512dup' +p73 +(dp74 +g3 +g4 +sg5 +(lp75 +S'The displayed variants may be artefacts of aligning NM_001291605.1 with genome build GRCh37' +p76 +aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001291605.1' +p77 +aS'Caution should be used when reporting the displayed variant descriptions' +p78 +aS'If you are unsure, please contact admin' +p79 +aS'RefSeqGene record not available' +p80 +asg12 +g4 +sg13 +(lp81 +sg15 +VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 5, mRNA +p82 +sg17 +S'ZNF2' +p83 +sg19 +(dp84 +g21 +S'NP_001278534.1:p.(Arg172dup)' +p85 +sg23 +S'NP_001278534.1:p.(R172dup)' +p86 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001291605.1:c.510_512dup' +p87 +sg31 +g4 +sg32 +(dp88 +S'hg19' +p89 +(dp90 +g36 +S'NC_000002.11:g.95847037_95847050=' +p91 +sg38 +(dp92 +g40 +g41 +sg42 +g43 +sg44 +S'95847037' +p93 +sg46 +g43 +sssg47 +(dp94 +g36 +S'NC_000002.12:g.95181299_95181301dup' +p95 +sg38 +(dp96 +g40 +g41 +sg42 +S'GCG' +p97 +sg44 +S'95181296' +p98 +sg46 +VGCGGCG +p99 +sssS'grch37' +p100 +(dp101 +g36 +S'NC_000002.11:g.95847037_95847050=' +p102 +sg38 +(dp103 +g40 +g58 +sg42 +g43 +sg44 +S'95847037' +p104 +sg46 +g43 +sssS'grch38' +p105 +(dp106 +g36 +S'NC_000002.12:g.95181299_95181301dup' +p107 +sg38 +(dp108 +g40 +g58 +sg42 +S'GCG' +p109 +sg44 +S'95181296' +p110 +sg46 +VGCGGCG +p111 +ssssg67 +(dp112 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278534.1' +p113 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291605.1' +p114 +sssS'NM_001017396.2:c.345_347dup' +p115 +(dp116 +g3 +g4 +sg5 +(lp117 +S'The displayed variants may be artefacts of aligning NM_001017396.2 with genome build GRCh37' +p118 +aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001017396.2' +p119 +aS'Caution should be used when reporting the displayed variant descriptions' +p120 +aS'If you are unsure, please contact admin' +p121 +aS'RefSeqGene record not available' +p122 +asg12 +g4 +sg13 +(lp123 +sg15 +VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA +p124 +sg17 +S'ZNF2' +p125 +sg19 +(dp126 +g21 +S'NP_001017396.1:p.(Arg117dup)' +p127 +sg23 +S'NP_001017396.1:p.(R117dup)' +p128 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001017396.2:c.345_347dup' +p129 +sg31 +g4 +sg32 +(dp130 +S'hg19' +p131 +(dp132 +g36 +S'NC_000002.11:g.95847037_95847050=' +p133 +sg38 +(dp134 +g40 +g41 +sg42 +g43 +sg44 +S'95847037' +p135 +sg46 +g43 +sssg47 +(dp136 +g36 +S'NC_000002.12:g.95181299_95181301dup' +p137 +sg38 +(dp138 +g40 +g41 +sg42 +S'GCG' +p139 +sg44 +S'95181296' +p140 +sg46 +VGCGGCG +p141 +sssS'grch37' +p142 +(dp143 +g36 +S'NC_000002.11:g.95847037_95847050=' +p144 +sg38 +(dp145 +g40 +g58 +sg42 +g43 +sg44 +S'95847037' +p146 +sg46 +g43 +sssS'grch38' +p147 +(dp148 +g36 +S'NC_000002.12:g.95181299_95181301dup' +p149 +sg38 +(dp150 +g40 +g58 +sg42 +S'GCG' +p151 +sg44 +S'95181296' +p152 +sg46 +VGCGGCG +p153 +ssssg67 +(dp154 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1' +p155 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.2' +p156 +sssS'NM_001282398.1:c.357_359dup' +p157 +(dp158 +g3 +g4 +sg5 +(lp159 +S'The displayed variants may be artefacts of aligning NM_001282398.1 with genome build GRCh37' +p160 +aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001282398.1' +p161 +aS'Caution should be used when reporting the displayed variant descriptions' +p162 +aS'If you are unsure, please contact admin' +p163 +aS'RefSeqGene record not available' +p164 +asg12 +g4 +sg13 +(lp165 +sg15 +VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 3, mRNA +p166 +sg17 +S'ZNF2' +p167 +sg19 +(dp168 +g21 +S'NP_001269327.1:p.(Arg121dup)' +p169 +sg23 +S'NP_001269327.1:p.(R121dup)' +p170 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001282398.1:c.357_359dup' +p171 +sg31 +g4 +sg32 +(dp172 +S'hg19' +p173 +(dp174 +g36 +S'NC_000002.11:g.95847037_95847050=' +p175 +sg38 +(dp176 +g40 +g41 +sg42 +g43 +sg44 +S'95847037' +p177 +sg46 +g43 +sssg47 +(dp178 +g36 +S'NC_000002.12:g.95181299_95181301dup' +p179 +sg38 +(dp180 +g40 +g41 +sg42 +S'GCG' +p181 +sg44 +S'95181296' +p182 +sg46 +VGCGGCG +p183 +sssS'grch37' +p184 +(dp185 +g36 +S'NC_000002.11:g.95847037_95847050=' +p186 +sg38 +(dp187 +g40 +g58 +sg42 +g43 +sg44 +S'95847037' +p188 +sg46 +g43 +sssS'grch38' +p189 +(dp190 +g36 +S'NC_000002.12:g.95181299_95181301dup' +p191 +sg38 +(dp192 +g40 +g58 +sg42 +S'GCG' +p193 +sg44 +S'95181296' +p194 +sg46 +VGCGGCG +p195 +ssssg67 +(dp196 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269327.1' +p197 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282398.1' +p198 +sssS'flag' +p199 +S'gene_variant' +p200 +sS'NM_001291604.1:c.231_233dup' +p201 +(dp202 +g3 +g4 +sg5 +(lp203 +S'The displayed variants may be artefacts of aligning NM_001291604.1 with genome build GRCh37' +p204 +aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001291604.1' +p205 +aS'Caution should be used when reporting the displayed variant descriptions' +p206 +aS'If you are unsure, please contact admin' +p207 +aS'RefSeqGene record not available' +p208 +asg12 +g4 +sg13 +(lp209 +sg15 +VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 4, mRNA +p210 +sg17 +S'ZNF2' +p211 +sg19 +(dp212 +g21 +S'NP_001278533.1:p.(Arg79dup)' +p213 +sg23 +S'NP_001278533.1:p.(R79dup)' +p214 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001291604.1:c.231_233dup' +p215 +sg31 +g4 +sg32 +(dp216 +S'hg19' +p217 +(dp218 +g36 +S'NC_000002.11:g.95847037_95847050=' +p219 +sg38 +(dp220 +g40 +g41 +sg42 +g43 +sg44 +S'95847037' +p221 +sg46 +g43 +sssg47 +(dp222 +g36 +S'NC_000002.12:g.95181299_95181301dup' +p223 +sg38 +(dp224 +g40 +g41 +sg42 +S'GCG' +p225 +sg44 +S'95181296' +p226 +sg46 +VGCGGCG +p227 +sssS'grch37' +p228 +(dp229 +g36 +S'NC_000002.11:g.95847037_95847050=' +p230 +sg38 +(dp231 +g40 +g58 +sg42 +g43 +sg44 +S'95847037' +p232 +sg46 +g43 +sssS'grch38' +p233 +(dp234 +g36 +S'NC_000002.12:g.95181299_95181301dup' +p235 +sg38 +(dp236 +g40 +g58 +sg42 +S'GCG' +p237 +sg44 +S'95181296' +p238 +sg46 +VGCGGCG +p239 +ssssg67 +(dp240 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278533.1' +p241 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291604.1' +p242 +sssS'NM_021088.2:c.471_473dup' +p243 +(dp244 +g3 +g4 +sg5 +(lp245 +S'The displayed variants may be artefacts of aligning NM_021088.2 with genome build GRCh37' +p246 +aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_021088.2' +p247 +aS'Caution should be used when reporting the displayed variant descriptions' +p248 +aS'If you are unsure, please contact admin' +p249 +aS'A more recent version of the selected reference sequence NM_021088.2 is available (NM_021088.3)' +p250 +aS'NM_021088.3:c.471_473dupGCG MUST be fully validated prior to use in reports' +p251 +aS'select_variants=NM_021088.3:c.471_473dup' +p252 +aS'RefSeqGene record not available' +p253 +asg12 +g4 +sg13 +(lp254 +sg15 +VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA +p255 +sg17 +S'ZNF2' +p256 +sg19 +(dp257 +g21 +S'NP_066574.2:p.(Arg159dup)' +p258 +sg23 +S'NP_066574.2:p.(R159dup)' +p259 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_021088.2:c.471_473dup' +p260 +sg31 +g4 +sg32 +(dp261 +S'hg19' +p262 +(dp263 +g36 +S'NC_000002.11:g.95847037_95847050=' +p264 +sg38 +(dp265 +g40 +g41 +sg42 +g43 +sg44 +S'95847037' +p266 +sg46 +g43 +sssS'grch37' +p267 +(dp268 +g36 +S'NC_000002.11:g.95847037_95847050=' +p269 +sg38 +(dp270 +g40 +g58 +sg42 +g43 +sg44 +S'95847037' +p271 +sg46 +g43 +ssssg67 +(dp272 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2' +p273 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.2' +p274 +sssS'NM_001017396.1:c.345_347dup' +p275 +(dp276 +g3 +g4 +sg5 +(lp277 +S'The displayed variants may be artefacts of aligning NM_001017396.1 with genome build GRCh37' +p278 +aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001017396.1' +p279 +aS'Caution should be used when reporting the displayed variant descriptions' +p280 +aS'If you are unsure, please contact admin' +p281 +aS'A more recent version of the selected reference sequence NM_001017396.1 is available (NM_001017396.2)' +p282 +aS'NM_001017396.2:c.345_347dupGCG MUST be fully validated prior to use in reports' +p283 +aS'select_variants=NM_001017396.2:c.345_347dup' +p284 +aS'RefSeqGene record not available' +p285 +asg12 +g4 +sg13 +(lp286 +sg15 +VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA +p287 +sg17 +S'ZNF2' +p288 +sg19 +(dp289 +g21 +S'NP_001017396.1:p.(Arg117dup)' +p290 +sg23 +S'NP_001017396.1:p.(R117dup)' +p291 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001017396.1:c.345_347dup' +p292 +sg31 +g4 +sg32 +(dp293 +S'hg19' +p294 +(dp295 +g36 +S'NC_000002.11:g.95847037_95847050=' +p296 +sg38 +(dp297 +g40 +g41 +sg42 +g43 +sg44 +S'95847037' +p298 +sg46 +g43 +sssS'grch37' +p299 +(dp300 +g36 +S'NC_000002.11:g.95847037_95847050=' +p301 +sg38 +(dp302 +g40 +g58 +sg42 +g43 +sg44 +S'95847037' +p303 +sg46 +g43 +ssssg67 +(dp304 +g69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1' +p305 +sg71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.1' +p306 +sssS'metadata' +p307 +(dp308 +S'variantvalidator_hgvs_version' +p309 +S'1.1.3' +p310 +sS'uta_schema' +p311 +S'uta_20180821' +p312 +sS'seqrepo_db' +p313 +S'2018-08-21' +p314 +sS'variantvalidator_version' +p315 +S'v0.2' +p316 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant81.txt b/VariantValidator/testing/testOutputsMasterITS/variant81.txt new file mode 100644 index 00000000..cceaacd1 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant81.txt @@ -0,0 +1,628 @@ +(dp0 +S'NM_001083585.1:c.*344_*368dup' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_001083585.1 with genome build GRCh37' +p7 +aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_001083585.1' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'A more recent version of the selected reference sequence NM_001083585.1 is available (NM_001083585.2)' +p11 +aS'NM_001083585.2:c.*344_*368dupTAGTGTTTGGAATTTTCTGTTCATA MUST be fully validated prior to use in reports' +p12 +aS'select_variants=NM_001083585.2:c.*344_*368dup' +p13 +aS'RefSeqGene record not available' +p14 +asS'refseqgene_context_intronic_sequence' +p15 +g4 +sS'alt_genomic_loci' +p16 +(lp17 +sS'transcript_description' +p18 +VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA +p19 +sS'gene_symbol' +p20 +S'RABEP1' +p21 +sS'hgvs_predicted_protein_consequence' +p22 +(dp23 +S'tlr' +p24 +S'NP_001077054.1:p.?' +p25 +sS'slr' +p26 +S'NP_001077054.1:p.?' +p27 +ssS'submitted_variant' +p28 +S'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' +p29 +sS'genome_context_intronic_sequence' +p30 +g4 +sS'hgvs_lrg_variant' +p31 +g4 +sS'hgvs_transcript_variant' +p32 +S'NM_001083585.1:c.*344_*368dup' +p33 +sS'hgvs_refseqgene_variant' +p34 +g4 +sS'primary_assembly_loci' +p35 +(dp36 +S'hg19' +p37 +(dp38 +S'hgvs_genomic_description' +p39 +S'NC_000017.10:g.5286857_5286915=' +p40 +sS'vcf' +p41 +(dp42 +S'chr' +p43 +S'chr17' +p44 +sS'ref' +p45 +S'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA' +p46 +sS'pos' +p47 +S'5286857' +p48 +sS'alt' +p49 +g46 +sssS'grch37' +p50 +(dp51 +g39 +S'NC_000017.10:g.5286857_5286915=' +p52 +sg41 +(dp53 +g43 +S'17' +p54 +sg45 +g46 +sg47 +S'5286857' +p55 +sg49 +g46 +ssssS'reference_sequence_records' +p56 +(dp57 +S'protein' +p58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001077054.1' +p59 +sS'transcript' +p60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001083585.1' +p61 +sssS'NM_004703.5:c.*344_*368dup' +p62 +(dp63 +g3 +g4 +sg5 +(lp64 +S'The displayed variants may be artefacts of aligning NM_004703.5 with genome build GRCh37' +p65 +aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_004703.5' +p66 +aS'Caution should be used when reporting the displayed variant descriptions' +p67 +aS'If you are unsure, please contact admin' +p68 +aS'RefSeqGene record not available' +p69 +asg15 +g4 +sg16 +(lp70 +sg18 +VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA +p71 +sg20 +S'RABEP1' +p72 +sg22 +(dp73 +g24 +S'NP_004694.2:p.?' +p74 +sg26 +S'NP_004694.2:p.?' +p75 +ssg28 +g29 +sg30 +g4 +sg31 +g4 +sg32 +S'NM_004703.5:c.*344_*368dup' +p76 +sg34 +g4 +sg35 +(dp77 +S'hg19' +p78 +(dp79 +g39 +S'NC_000017.10:g.5286857_5286915=' +p80 +sg41 +(dp81 +g43 +g44 +sg45 +g46 +sg47 +S'5286857' +p82 +sg49 +g46 +sssS'hg38' +p83 +(dp84 +g39 +S'NC_000017.11:g.5383567_5383591dup' +p85 +sg41 +(dp86 +g43 +g44 +sg45 +S'TAGTGTTTGGAATTTTCTGTTCATA' +p87 +sg47 +S'5383567' +p88 +sg49 +VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA +p89 +sssS'grch37' +p90 +(dp91 +g39 +S'NC_000017.10:g.5286857_5286915=' +p92 +sg41 +(dp93 +g43 +g54 +sg45 +g46 +sg47 +S'5286857' +p94 +sg49 +g46 +sssS'grch38' +p95 +(dp96 +g39 +S'NC_000017.11:g.5383567_5383591dup' +p97 +sg41 +(dp98 +g43 +g54 +sg45 +S'TAGTGTTTGGAATTTTCTGTTCATA' +p99 +sg47 +S'5383567' +p100 +sg49 +VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA +p101 +ssssg56 +(dp102 +g58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2' +p103 +sg60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.5' +p104 +sssS'NM_004703.4:c.*344_*368dup' +p105 +(dp106 +g3 +g4 +sg5 +(lp107 +S'The displayed variants may be artefacts of aligning NM_004703.4 with genome build GRCh37' +p108 +aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_004703.4' +p109 +aS'Caution should be used when reporting the displayed variant descriptions' +p110 +aS'If you are unsure, please contact admin' +p111 +aS'A more recent version of the selected reference sequence NM_004703.4 is available (NM_004703.5)' +p112 +aS'NM_004703.5:c.*344_*368dupTAGTGTTTGGAATTTTCTGTTCATA MUST be fully validated prior to use in reports' +p113 +aS'select_variants=NM_004703.5:c.*344_*368dup' +p114 +aS'RefSeqGene record not available' +p115 +asg15 +g4 +sg16 +(lp116 +sg18 +VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA +p117 +sg20 +S'RABEP1' +p118 +sg22 +(dp119 +g24 +S'NP_004694.2:p.?' +p120 +sg26 +S'NP_004694.2:p.?' +p121 +ssg28 +g29 +sg30 +g4 +sg31 +g4 +sg32 +S'NM_004703.4:c.*344_*368dup' +p122 +sg34 +g4 +sg35 +(dp123 +S'hg19' +p124 +(dp125 +g39 +S'NC_000017.10:g.5286857_5286915=' +p126 +sg41 +(dp127 +g43 +g44 +sg45 +g46 +sg47 +S'5286857' +p128 +sg49 +g46 +sssS'grch37' +p129 +(dp130 +g39 +S'NC_000017.10:g.5286857_5286915=' +p131 +sg41 +(dp132 +g43 +g54 +sg45 +g46 +sg47 +S'5286857' +p133 +sg49 +g46 +ssssg56 +(dp134 +g58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2' +p135 +sg60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.4' +p136 +sssS'flag' +p137 +S'gene_variant' +p138 +sS'NM_001291581.1:c.*344_*368dup' +p139 +(dp140 +g3 +g4 +sg5 +(lp141 +S'The displayed variants may be artefacts of aligning NM_001291581.1 with genome build GRCh37' +p142 +aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_001291581.1' +p143 +aS'Caution should be used when reporting the displayed variant descriptions' +p144 +aS'If you are unsure, please contact admin' +p145 +aS'RefSeqGene record not available' +p146 +asg15 +g4 +sg16 +(lp147 +sg18 +VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 3, mRNA +p148 +sg20 +S'RABEP1' +p149 +sg22 +(dp150 +g24 +S'NP_001278510.1:p.?' +p151 +sg26 +S'NP_001278510.1:p.?' +p152 +ssg28 +g29 +sg30 +g4 +sg31 +g4 +sg32 +S'NM_001291581.1:c.*344_*368dup' +p153 +sg34 +g4 +sg35 +(dp154 +S'hg19' +p155 +(dp156 +g39 +S'NC_000017.10:g.5286857_5286915=' +p157 +sg41 +(dp158 +g43 +g44 +sg45 +g46 +sg47 +S'5286857' +p159 +sg49 +g46 +sssg83 +(dp160 +g39 +S'NC_000017.11:g.5383567_5383591dup' +p161 +sg41 +(dp162 +g43 +g44 +sg45 +S'TAGTGTTTGGAATTTTCTGTTCATA' +p163 +sg47 +S'5383567' +p164 +sg49 +VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA +p165 +sssS'grch37' +p166 +(dp167 +g39 +S'NC_000017.10:g.5286857_5286915=' +p168 +sg41 +(dp169 +g43 +g54 +sg45 +g46 +sg47 +S'5286857' +p170 +sg49 +g46 +sssS'grch38' +p171 +(dp172 +g39 +S'NC_000017.11:g.5383567_5383591dup' +p173 +sg41 +(dp174 +g43 +g54 +sg45 +S'TAGTGTTTGGAATTTTCTGTTCATA' +p175 +sg47 +S'5383567' +p176 +sg49 +VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA +p177 +ssssg56 +(dp178 +g58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278510.1' +p179 +sg60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291581.1' +p180 +sssS'NM_001083585.2:c.*344_*368dup' +p181 +(dp182 +g3 +g4 +sg5 +(lp183 +S'The displayed variants may be artefacts of aligning NM_001083585.2 with genome build GRCh37' +p184 +aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_001083585.2' +p185 +aS'Caution should be used when reporting the displayed variant descriptions' +p186 +aS'If you are unsure, please contact admin' +p187 +aS'RefSeqGene record not available' +p188 +asg15 +g4 +sg16 +(lp189 +sg18 +VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA +p190 +sg20 +S'RABEP1' +p191 +sg22 +(dp192 +g24 +S'NP_001077054.1:p.?' +p193 +sg26 +S'NP_001077054.1:p.?' +p194 +ssg28 +g29 +sg30 +g4 +sg31 +g4 +sg32 +S'NM_001083585.2:c.*344_*368dup' +p195 +sg34 +g4 +sg35 +(dp196 +S'hg19' +p197 +(dp198 +g39 +S'NC_000017.10:g.5286857_5286915=' +p199 +sg41 +(dp200 +g43 +g44 +sg45 +g46 +sg47 +S'5286857' +p201 +sg49 +g46 +sssg83 +(dp202 +g39 +S'NC_000017.11:g.5383567_5383591dup' +p203 +sg41 +(dp204 +g43 +g44 +sg45 +S'TAGTGTTTGGAATTTTCTGTTCATA' +p205 +sg47 +S'5383567' +p206 +sg49 +VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA +p207 +sssS'grch37' +p208 +(dp209 +g39 +S'NC_000017.10:g.5286857_5286915=' +p210 +sg41 +(dp211 +g43 +g54 +sg45 +g46 +sg47 +S'5286857' +p212 +sg49 +g46 +sssS'grch38' +p213 +(dp214 +g39 +S'NC_000017.11:g.5383567_5383591dup' +p215 +sg41 +(dp216 +g43 +g54 +sg45 +S'TAGTGTTTGGAATTTTCTGTTCATA' +p217 +sg47 +S'5383567' +p218 +sg49 +VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA +p219 +ssssg56 +(dp220 +g58 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001077054.1' +p221 +sg60 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001083585.2' +p222 +sssS'metadata' +p223 +(dp224 +S'variantvalidator_hgvs_version' +p225 +S'1.1.3' +p226 +sS'uta_schema' +p227 +S'uta_20180821' +p228 +sS'seqrepo_db' +p229 +S'2018-08-21' +p230 +sS'variantvalidator_version' +p231 +S'v0.2' +p232 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant82.txt b/VariantValidator/testing/testOutputsMasterITS/variant82.txt new file mode 100644 index 00000000..dfcc0f30 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant82.txt @@ -0,0 +1,277 @@ +(dp0 +S'NM_001080423.3:c.1020del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_001080423.3 with genome build GRCh37' +p7 +aS'NM_001080423.3:c.1019_1022 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA +p16 +sS'gene_symbol' +p17 +S'GRIP2' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_001073892.3:p.(Ser341GlnfsTer4)' +p22 +sS'slr' +p23 +S'NP_001073892.3:p.(S341Qfs*4)' +p24 +ssS'submitted_variant' +p25 +S'NC_000003.11:g.14561629_14561630GC=' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_001080423.3:c.1020del' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000003.11:g.14561624_14561630=' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr3' +p41 +sS'ref' +p42 +S'CTGAGGC' +p43 +sS'pos' +p44 +S'14561624' +p45 +sS'alt' +p46 +g43 +sssS'hg38' +p47 +(dp48 +g36 +S'NC_000003.12:g.14520122del' +p49 +sg38 +(dp50 +g40 +g41 +sg42 +S'AG' +p51 +sg44 +S'14520119' +p52 +sg46 +S'A' +p53 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000003.11:g.14561624_14561630=' +p56 +sg38 +(dp57 +g40 +S'3' +p58 +sg42 +g43 +sg44 +S'14561624' +p59 +sg46 +g43 +sssS'grch38' +p60 +(dp61 +g36 +S'NC_000003.12:g.14520122del' +p62 +sg38 +(dp63 +g40 +g58 +sg42 +S'AG' +p64 +sg44 +S'14520119' +p65 +sg46 +g53 +ssssS'reference_sequence_records' +p66 +(dp67 +S'protein' +p68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3' +p69 +sS'transcript' +p70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3' +p71 +sssS'flag' +p72 +S'gene_variant' +p73 +sS'NM_001080423.2:c.1311del' +p74 +(dp75 +g3 +g4 +sg5 +(lp76 +S'The displayed variants may be artefacts of aligning NM_001080423.2 with genome build GRCh37' +p77 +aS'NM_001080423.2:c.1310_1313 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' +p78 +aS'Caution should be used when reporting the displayed variant descriptions' +p79 +aS'If you are unsure, please contact admin' +p80 +aS'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' +p81 +aS'NM_001080423.3:c.1311delG MUST be fully validated prior to use in reports' +p82 +aS'select_variants=NM_001080423.3:c.1311del' +p83 +aS'RefSeqGene record not available' +p84 +asg12 +g4 +sg13 +(lp85 +sg15 +VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA +p86 +sg17 +S'GRIP2' +p87 +sg19 +(dp88 +g21 +S'NP_001073892.2:p.(Ser438GlnfsTer4)' +p89 +sg23 +S'NP_001073892.2:p.(S438Qfs*4)' +p90 +ssg25 +g26 +sg27 +g4 +sg28 +g4 +sg29 +S'NM_001080423.2:c.1311del' +p91 +sg31 +g4 +sg32 +(dp92 +S'hg19' +p93 +(dp94 +g36 +S'NC_000003.11:g.14561624_14561630=' +p95 +sg38 +(dp96 +g40 +g41 +sg42 +g43 +sg44 +S'14561624' +p97 +sg46 +g43 +sssS'grch37' +p98 +(dp99 +g36 +S'NC_000003.11:g.14561624_14561630=' +p100 +sg38 +(dp101 +g40 +g58 +sg42 +g43 +sg44 +S'14561624' +p102 +sg46 +g43 +ssssg66 +(dp103 +g68 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2' +p104 +sg70 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2' +p105 +sssS'metadata' +p106 +(dp107 +S'variantvalidator_hgvs_version' +p108 +S'1.1.3' +p109 +sS'uta_schema' +p110 +S'uta_20180821' +p111 +sS'seqrepo_db' +p112 +S'2018-08-21' +p113 +sS'variantvalidator_version' +p114 +S'v0.2' +p115 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant83.txt b/VariantValidator/testing/testOutputsMasterITS/variant83.txt new file mode 100644 index 00000000..6e4d3190 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant83.txt @@ -0,0 +1,263 @@ +(dp0 +S'NM_001080423.3:c.1016_1020=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA +p12 +sS'gene_symbol' +p13 +S'GRIP2' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_001073892.3:p.(Arg339=)' +p18 +sS'slr' +p19 +S'NP_001073892.3:p.(R339=)' +p20 +ssS'submitted_variant' +p21 +S'NC_000003.11:g.14561629_14561630insG' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_001080423.3:c.1016_1020=' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000003.11:g.14561629dup' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr3' +p37 +sS'ref' +p38 +S'G' +p39 +sS'pos' +p40 +S'14561628' +p41 +sS'alt' +p42 +VGG +p43 +sssS'hg38' +p44 +(dp45 +g32 +S'NC_000003.12:g.14520120_14520124=' +p46 +sg34 +(dp47 +g36 +g37 +sg38 +VGGGCC +p48 +sg40 +S'14520120' +p49 +sg42 +g48 +sssS'grch37' +p50 +(dp51 +g32 +S'NC_000003.11:g.14561629dup' +p52 +sg34 +(dp53 +g36 +S'3' +p54 +sg38 +g39 +sg40 +S'14561628' +p55 +sg42 +VGG +p56 +sssS'grch38' +p57 +(dp58 +g32 +S'NC_000003.12:g.14520120_14520124=' +p59 +sg34 +(dp60 +g36 +g54 +sg38 +g48 +sg40 +S'14520120' +p61 +sg42 +g48 +ssssS'reference_sequence_records' +p62 +(dp63 +S'protein' +p64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3' +p65 +sS'transcript' +p66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3' +p67 +sssS'flag' +p68 +S'gene_variant' +p69 +sS'NM_001080423.2:c.1307_1311=' +p70 +(dp71 +g3 +g4 +sg5 +(lp72 +S'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' +p73 +aS'NM_001080423.3:c.1307_1311delinsGGCCC MUST be fully validated prior to use in reports' +p74 +aS'select_variants=NM_001080423.3:c.1307_1311delinsGGCCC' +p75 +aS'RefSeqGene record not available' +p76 +asg8 +g4 +sg9 +(lp77 +sg11 +VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA +p78 +sg13 +S'GRIP2' +p79 +sg15 +(dp80 +g17 +S'NP_001073892.2:p.(Arg436=)' +p81 +sg19 +S'NP_001073892.2:p.(R436=)' +p82 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_001080423.2:c.1307_1311=' +p83 +sg27 +g4 +sg28 +(dp84 +S'hg19' +p85 +(dp86 +g32 +S'NC_000003.11:g.14561629dup' +p87 +sg34 +(dp88 +g36 +g37 +sg38 +g39 +sg40 +S'14561628' +p89 +sg42 +VGG +p90 +sssS'grch37' +p91 +(dp92 +g32 +S'NC_000003.11:g.14561629dup' +p93 +sg34 +(dp94 +g36 +g54 +sg38 +g39 +sg40 +S'14561628' +p95 +sg42 +VGG +p96 +ssssg62 +(dp97 +g64 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2' +p98 +sg66 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2' +p99 +sssS'metadata' +p100 +(dp101 +S'variantvalidator_hgvs_version' +p102 +S'1.1.3' +p103 +sS'uta_schema' +p104 +S'uta_20180821' +p105 +sS'seqrepo_db' +p106 +S'2018-08-21' +p107 +sS'variantvalidator_version' +p108 +S'v0.2' +p109 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant84.txt b/VariantValidator/testing/testOutputsMasterITS/variant84.txt new file mode 100644 index 00000000..341be722 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant84.txt @@ -0,0 +1,272 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_018717.5:c.1515_1526del' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'RefSeqGene record not available' +p19 +asS'refseqgene_context_intronic_sequence' +p20 +g16 +sS'alt_genomic_loci' +p21 +(lp22 +sS'transcript_description' +p23 +VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA +p24 +sS'gene_symbol' +p25 +S'MAML3' +p26 +sS'hgvs_predicted_protein_consequence' +p27 +(dp28 +S'tlr' +p29 +S'NP_061187.3:p.(Gln507_Gln510del)' +p30 +sS'slr' +p31 +S'NP_061187.3:p.(Q507_Q510del)' +p32 +ssS'submitted_variant' +p33 +S'NC_000004.11:g.140811111_140811122del' +p34 +sS'genome_context_intronic_sequence' +p35 +g16 +sS'hgvs_lrg_variant' +p36 +g16 +sS'hgvs_transcript_variant' +p37 +S'NM_018717.5:c.1515_1526del' +p38 +sS'hgvs_refseqgene_variant' +p39 +g16 +sS'primary_assembly_loci' +p40 +(dp41 +S'hg19' +p42 +(dp43 +S'hgvs_genomic_description' +p44 +S'NC_000004.11:g.140811111_140811122del' +p45 +sS'vcf' +p46 +(dp47 +S'chr' +p48 +S'chr4' +p49 +sS'ref' +p50 +S'TTGCTGCTGCTGC' +p51 +sS'pos' +p52 +S'140811063' +p53 +sS'alt' +p54 +S'T' +p55 +sssS'grch37' +p56 +(dp57 +g44 +S'NC_000004.11:g.140811111_140811122del' +p58 +sg46 +(dp59 +g48 +S'4' +p60 +sg50 +S'TTGCTGCTGCTGC' +p61 +sg52 +S'140811063' +p62 +sg54 +g55 +ssssS'reference_sequence_records' +p63 +(dp64 +S'protein' +p65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3' +p66 +sS'transcript' +p67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5' +p68 +sssS'NM_018717.4:c.1465_1469=' +p69 +(dp70 +g15 +g16 +sg17 +(lp71 +S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' +p72 +aS'NC_000004.11:g.140811063_140811075 contains 12 genomic base(s) that fail to align to transcript NM_018717.4' +p73 +aS'Caution should be used when reporting the displayed variant descriptions' +p74 +aS'If you are unsure, please contact admin' +p75 +aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' +p76 +aS'NM_018717.5:c.1465_1469CAACA= MUST be fully validated prior to use in reports' +p77 +aS'select_variants=NM_018717.5:c.1465_1469=' +p78 +aS'RefSeqGene record not available' +p79 +asg20 +g16 +sg21 +(lp80 +sg23 +VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA +p81 +sg25 +S'MAML3' +p82 +sg27 +(dp83 +g29 +S'NP_061187.2:p.(Gln489=)' +p84 +sg31 +S'NP_061187.2:p.(Q489=)' +p85 +ssg33 +g34 +sg35 +g16 +sg36 +g16 +sg37 +S'NM_018717.4:c.1465_1469=' +p86 +sg39 +g16 +sg40 +(dp87 +S'hg19' +p88 +(dp89 +g44 +S'NC_000004.11:g.140811111_140811122del' +p90 +sg46 +(dp91 +g48 +g49 +sg50 +S'TTGCTGCTGCTGC' +p92 +sg52 +S'140811063' +p93 +sg54 +g55 +sssS'hg38' +p94 +(dp95 +g44 +S'NC_000004.12:g.139889957_139889968del' +p96 +sg46 +(dp97 +g48 +g49 +sg50 +S'TTGCTGCTGCTGC' +p98 +sg52 +S'139889909' +p99 +sg54 +g55 +sssS'grch37' +p100 +(dp101 +g44 +S'NC_000004.11:g.140811111_140811122del' +p102 +sg46 +(dp103 +g48 +g60 +sg50 +S'TTGCTGCTGCTGC' +p104 +sg52 +S'140811063' +p105 +sg54 +g55 +sssS'grch38' +p106 +(dp107 +g44 +S'NC_000004.12:g.139889957_139889968del' +p108 +sg46 +(dp109 +g48 +g60 +sg50 +S'TTGCTGCTGCTGC' +p110 +sg52 +S'139889909' +p111 +sg54 +g55 +ssssg63 +(dp112 +g65 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2' +p113 +sg67 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4' +p114 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant85.txt b/VariantValidator/testing/testOutputsMasterITS/variant85.txt new file mode 100644 index 00000000..f37d4e0f --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant85.txt @@ -0,0 +1,268 @@ +(dp0 +S'NM_018717.5:c.1468_1479=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA +p12 +sS'gene_symbol' +p13 +S'MAML3' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_061187.3:p.(Gln490=)' +p18 +sS'slr' +p19 +S'NP_061187.3:p.(Q490=)' +p20 +ssS'submitted_variant' +p21 +S'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_018717.5:c.1468_1479=' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000004.11:g.140811111_140811122=' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr4' +p37 +sS'ref' +p38 +VCTGCTGCTGCTG +p39 +sS'pos' +p40 +S'140811111' +p41 +sS'alt' +p42 +g39 +sssS'grch37' +p43 +(dp44 +g32 +S'NC_000004.11:g.140811111_140811122=' +p45 +sg34 +(dp46 +g36 +S'4' +p47 +sg38 +g39 +sg40 +S'140811111' +p48 +sg42 +g39 +ssssS'reference_sequence_records' +p49 +(dp50 +S'protein' +p51 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3' +p52 +sS'transcript' +p53 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5' +p54 +sssS'flag' +p55 +S'gene_variant' +p56 +sS'NM_018717.4:c.1503_1514dup' +p57 +(dp58 +g3 +g4 +sg5 +(lp59 +S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' +p60 +aS'NC_000004.11:g.140811063_140811075 contains 12 genomic base(s) that fail to align to transcript NM_018717.4' +p61 +aS'Caution should be used when reporting the displayed variant descriptions' +p62 +aS'If you are unsure, please contact admin' +p63 +aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' +p64 +aS'NM_018717.5:c.1503_1514dupGCAGCAGCAGCA MUST be fully validated prior to use in reports' +p65 +aS'select_variants=NM_018717.5:c.1503_1514dup' +p66 +aS'RefSeqGene record not available' +p67 +asg8 +g4 +sg9 +(lp68 +sg11 +VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA +p69 +sg13 +S'MAML3' +p70 +sg15 +(dp71 +g17 +S'NP_061187.2:p.(Gln503_Gln506dup)' +p72 +sg19 +S'NP_061187.2:p.(Q503_Q506dup)' +p73 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_018717.4:c.1503_1514dup' +p74 +sg27 +g4 +sg28 +(dp75 +S'hg19' +p76 +(dp77 +g32 +S'NC_000004.11:g.140811095_140811128=' +p78 +sg34 +(dp79 +g36 +g37 +sg38 +S'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG' +p80 +sg40 +S'140811095' +p81 +sg42 +g80 +sssS'hg38' +p82 +(dp83 +g32 +S'NC_000004.12:g.139889941_139889974=' +p84 +sg34 +(dp85 +g36 +g37 +sg38 +S'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG' +p86 +sg40 +S'139889941' +p87 +sg42 +g86 +sssS'grch37' +p88 +(dp89 +g32 +S'NC_000004.11:g.140811095_140811128=' +p90 +sg34 +(dp91 +g36 +g47 +sg38 +g80 +sg40 +S'140811095' +p92 +sg42 +g80 +sssS'grch38' +p93 +(dp94 +g32 +S'NC_000004.12:g.139889941_139889974=' +p95 +sg34 +(dp96 +g36 +g47 +sg38 +g86 +sg40 +S'139889941' +p97 +sg42 +g86 +ssssg49 +(dp98 +g51 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2' +p99 +sg53 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4' +p100 +sssS'metadata' +p101 +(dp102 +S'variantvalidator_hgvs_version' +p103 +S'1.1.3' +p104 +sS'uta_schema' +p105 +S'uta_20180821' +p106 +sS'seqrepo_db' +p107 +S'2018-08-21' +p108 +sS'variantvalidator_version' +p109 +S'v0.2' +p110 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant86.txt b/VariantValidator/testing/testOutputsMasterITS/variant86.txt new file mode 100644 index 00000000..3ad98372 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant86.txt @@ -0,0 +1,272 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_018717.5:c.1521_1526del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'RefSeqGene record not available' +p9 +asS'refseqgene_context_intronic_sequence' +p10 +g6 +sS'alt_genomic_loci' +p11 +(lp12 +sS'transcript_description' +p13 +VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA +p14 +sS'gene_symbol' +p15 +S'MAML3' +p16 +sS'hgvs_predicted_protein_consequence' +p17 +(dp18 +S'tlr' +p19 +S'NP_061187.3:p.(Gln509_Gln510del)' +p20 +sS'slr' +p21 +S'NP_061187.3:p.(Q509_Q510del)' +p22 +ssS'submitted_variant' +p23 +S'NC_000004.11:g.140811117_140811122del' +p24 +sS'genome_context_intronic_sequence' +p25 +g6 +sS'hgvs_lrg_variant' +p26 +g6 +sS'hgvs_transcript_variant' +p27 +S'NM_018717.5:c.1521_1526del' +p28 +sS'hgvs_refseqgene_variant' +p29 +g6 +sS'primary_assembly_loci' +p30 +(dp31 +S'hg19' +p32 +(dp33 +S'hgvs_genomic_description' +p34 +S'NC_000004.11:g.140811117_140811122del' +p35 +sS'vcf' +p36 +(dp37 +S'chr' +p38 +S'chr4' +p39 +sS'ref' +p40 +S'TTGCTGC' +p41 +sS'pos' +p42 +S'140811063' +p43 +sS'alt' +p44 +S'T' +p45 +sssS'grch37' +p46 +(dp47 +g34 +S'NC_000004.11:g.140811117_140811122del' +p48 +sg36 +(dp49 +g38 +S'4' +p50 +sg40 +S'TTGCTGC' +p51 +sg42 +S'140811063' +p52 +sg44 +g45 +ssssS'reference_sequence_records' +p53 +(dp54 +S'protein' +p55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3' +p56 +sS'transcript' +p57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5' +p58 +sssS'NM_018717.4:c.1509_1514dup' +p59 +(dp60 +g5 +g6 +sg7 +(lp61 +S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' +p62 +aS'NC_000004.11:g.140811063 is one of 12 genomic base(s) that fail to align to transcript NM_018717.4 between positions c.1467_1468' +p63 +aS'Caution should be used when reporting the displayed variant descriptions' +p64 +aS'If you are unsure, please contact admin' +p65 +aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' +p66 +aS'NM_018717.5:c.1509_1514dupGCAGCA MUST be fully validated prior to use in reports' +p67 +aS'select_variants=NM_018717.5:c.1509_1514dup' +p68 +aS'RefSeqGene record not available' +p69 +asg10 +g6 +sg11 +(lp70 +sg13 +VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA +p71 +sg15 +S'MAML3' +p72 +sg17 +(dp73 +g19 +S'NP_061187.2:p.(Gln505_Gln506dup)' +p74 +sg21 +S'NP_061187.2:p.(Q505_Q506dup)' +p75 +ssg23 +g24 +sg25 +g6 +sg26 +g6 +sg27 +S'NM_018717.4:c.1509_1514dup' +p76 +sg29 +g6 +sg30 +(dp77 +S'hg19' +p78 +(dp79 +g34 +S'NC_000004.11:g.140811117_140811122del' +p80 +sg36 +(dp81 +g38 +g39 +sg40 +S'TTGCTGC' +p82 +sg42 +S'140811063' +p83 +sg44 +g45 +sssS'hg38' +p84 +(dp85 +g34 +S'NC_000004.12:g.139889963_139889968del' +p86 +sg36 +(dp87 +g38 +g39 +sg40 +S'TTGCTGC' +p88 +sg42 +S'139889909' +p89 +sg44 +g45 +sssS'grch37' +p90 +(dp91 +g34 +S'NC_000004.11:g.140811117_140811122del' +p92 +sg36 +(dp93 +g38 +g50 +sg40 +S'TTGCTGC' +p94 +sg42 +S'140811063' +p95 +sg44 +g45 +sssS'grch38' +p96 +(dp97 +g34 +S'NC_000004.12:g.139889963_139889968del' +p98 +sg36 +(dp99 +g38 +g50 +sg40 +S'TTGCTGC' +p100 +sg42 +S'139889909' +p101 +sg44 +g45 +ssssg53 +(dp102 +g55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2' +p103 +sg57 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4' +p104 +sssS'metadata' +p105 +(dp106 +S'variantvalidator_hgvs_version' +p107 +S'1.1.3' +p108 +sS'uta_schema' +p109 +S'uta_20180821' +p110 +sS'seqrepo_db' +p111 +S'2018-08-21' +p112 +sS'variantvalidator_version' +p113 +S'v0.2' +p114 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant87.txt b/VariantValidator/testing/testOutputsMasterITS/variant87.txt new file mode 100644 index 00000000..6ff8ccb4 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant87.txt @@ -0,0 +1,272 @@ +(dp0 +S'NM_018717.5:c.1473_1479del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'RefSeqGene record not available' +p7 +asS'refseqgene_context_intronic_sequence' +p8 +g4 +sS'alt_genomic_loci' +p9 +(lp10 +sS'transcript_description' +p11 +VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA +p12 +sS'gene_symbol' +p13 +S'MAML3' +p14 +sS'hgvs_predicted_protein_consequence' +p15 +(dp16 +S'tlr' +p17 +S'NP_061187.3:p.(Gln491HisfsTer29)' +p18 +sS'slr' +p19 +S'NP_061187.3:p.(Q491Hfs*29)' +p20 +ssS'submitted_variant' +p21 +S'NC_000004.11:g.140811111_140811117del' +p22 +sS'genome_context_intronic_sequence' +p23 +g4 +sS'hgvs_lrg_variant' +p24 +g4 +sS'hgvs_transcript_variant' +p25 +S'NM_018717.5:c.1473_1479del' +p26 +sS'hgvs_refseqgene_variant' +p27 +g4 +sS'primary_assembly_loci' +p28 +(dp29 +S'hg19' +p30 +(dp31 +S'hgvs_genomic_description' +p32 +S'NC_000004.11:g.140811111_140811117del' +p33 +sS'vcf' +p34 +(dp35 +S'chr' +p36 +S'chr4' +p37 +sS'ref' +p38 +S'GCTGCTGC' +p39 +sS'pos' +p40 +S'140811110' +p41 +sS'alt' +p42 +S'G' +p43 +sssS'grch37' +p44 +(dp45 +g32 +S'NC_000004.11:g.140811111_140811117del' +p46 +sg34 +(dp47 +g36 +S'4' +p48 +sg38 +S'GCTGCTGC' +p49 +sg40 +S'140811110' +p50 +sg42 +g43 +ssssS'reference_sequence_records' +p51 +(dp52 +S'protein' +p53 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3' +p54 +sS'transcript' +p55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5' +p56 +sssS'flag' +p57 +S'gene_variant' +p58 +sS'NM_018717.4:c.1468_1472dup' +p59 +(dp60 +g3 +g4 +sg5 +(lp61 +S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' +p62 +aS'NC_000004.11:g.140811110 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_018717.4 between positions c.1467_1468' +p63 +aS'Caution should be used when reporting the displayed variant descriptions' +p64 +aS'If you are unsure, please contact admin' +p65 +aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' +p66 +aS'NM_018717.5:c.1468_1472dupCAGCA MUST be fully validated prior to use in reports' +p67 +aS'select_variants=NM_018717.5:c.1468_1472dup' +p68 +aS'RefSeqGene record not available' +p69 +asg8 +g4 +sg9 +(lp70 +sg11 +VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA +p71 +sg13 +S'MAML3' +p72 +sg15 +(dp73 +g17 +S'NP_061187.2:p.(Gln491HisfsTer29)' +p74 +sg19 +S'NP_061187.2:p.(Q491Hfs*29)' +p75 +ssg21 +g22 +sg23 +g4 +sg24 +g4 +sg25 +S'NM_018717.4:c.1468_1472dup' +p76 +sg27 +g4 +sg28 +(dp77 +S'hg19' +p78 +(dp79 +g32 +S'NC_000004.11:g.140811111_140811117del' +p80 +sg34 +(dp81 +g36 +g37 +sg38 +S'GCTGCTGC' +p82 +sg40 +S'140811110' +p83 +sg42 +g43 +sssS'hg38' +p84 +(dp85 +g32 +S'NC_000004.12:g.139889957_139889963del' +p86 +sg34 +(dp87 +g36 +g37 +sg38 +S'GCTGCTGC' +p88 +sg40 +S'139889956' +p89 +sg42 +g43 +sssS'grch37' +p90 +(dp91 +g32 +S'NC_000004.11:g.140811111_140811117del' +p92 +sg34 +(dp93 +g36 +g48 +sg38 +S'GCTGCTGC' +p94 +sg40 +S'140811110' +p95 +sg42 +g43 +sssS'grch38' +p96 +(dp97 +g32 +S'NC_000004.12:g.139889957_139889963del' +p98 +sg34 +(dp99 +g36 +g48 +sg38 +S'GCTGCTGC' +p100 +sg40 +S'139889956' +p101 +sg42 +g43 +ssssg51 +(dp102 +g53 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2' +p103 +sg55 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4' +p104 +sssS'metadata' +p105 +(dp106 +S'variantvalidator_hgvs_version' +p107 +S'1.1.3' +p108 +sS'uta_schema' +p109 +S'uta_20180821' +p110 +sS'seqrepo_db' +p111 +S'2018-08-21' +p112 +sS'variantvalidator_version' +p113 +S'v0.2' +p114 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant88.txt b/VariantValidator/testing/testOutputsMasterITS/variant88.txt new file mode 100644 index 00000000..d21ee799 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant88.txt @@ -0,0 +1,268 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' +p19 +aS'NC_000004.11:g.140811117 is one of 12 genomic base(s) that fail to align to transcript NM_018717.4 between positions c.1467_1468' +p20 +aS'Caution should be used when reporting the displayed variant descriptions' +p21 +aS'If you are unsure, please contact admin' +p22 +aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' +p23 +aS'NM_018717.5:c.1472_1473insTCAGCAGCAGCA MUST be fully validated prior to use in reports' +p24 +aS'select_variants=NM_018717.5:c.1472_1473insTCAGCAGCAGCA' +p25 +aS'RefSeqGene record not available' +p26 +asS'refseqgene_context_intronic_sequence' +p27 +g16 +sS'alt_genomic_loci' +p28 +(lp29 +sS'transcript_description' +p30 +VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA +p31 +sS'gene_symbol' +p32 +S'MAML3' +p33 +sS'hgvs_predicted_protein_consequence' +p34 +(dp35 +S'tlr' +p36 +S'NP_061187.2:p.(Gln490_Gln491insHisGlnGlnGln)' +p37 +sS'slr' +p38 +S'NP_061187.2:p.(Q490_Q491insHQQQ)' +p39 +ssS'submitted_variant' +p40 +S'NC_000004.11:g.140811117C>A' +p41 +sS'genome_context_intronic_sequence' +p42 +g16 +sS'hgvs_lrg_variant' +p43 +g16 +sS'hgvs_transcript_variant' +p44 +S'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' +p45 +sS'hgvs_refseqgene_variant' +p46 +g16 +sS'primary_assembly_loci' +p47 +(dp48 +S'hg19' +p49 +(dp50 +S'hgvs_genomic_description' +p51 +S'NC_000004.11:g.140811117C>A' +p52 +sS'vcf' +p53 +(dp54 +S'chr' +p55 +S'chr4' +p56 +sS'ref' +p57 +S'C' +p58 +sS'pos' +p59 +S'140811117' +p60 +sS'alt' +p61 +VA +p62 +sssS'hg38' +p63 +(dp64 +g51 +S'NC_000004.12:g.139889963C>A' +p65 +sg53 +(dp66 +g55 +g56 +sg57 +g58 +sg59 +S'139889963' +p67 +sg61 +g62 +sssS'grch37' +p68 +(dp69 +g51 +S'NC_000004.11:g.140811117C>A' +p70 +sg53 +(dp71 +g55 +S'4' +p72 +sg57 +g58 +sg59 +S'140811117' +p73 +sg61 +g62 +sssS'grch38' +p74 +(dp75 +g51 +S'NC_000004.12:g.139889963C>A' +p76 +sg53 +(dp77 +g55 +g72 +sg57 +g58 +sg59 +S'139889963' +p78 +sg61 +g62 +ssssS'reference_sequence_records' +p79 +(dp80 +S'protein' +p81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2' +p82 +sS'transcript' +p83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4' +p84 +sssS'NM_018717.5:c.1473G>T' +p85 +(dp86 +g15 +g16 +sg17 +(lp87 +S'RefSeqGene record not available' +p88 +asg27 +g16 +sg28 +(lp89 +sg30 +VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA +p90 +sg32 +S'MAML3' +p91 +sg34 +(dp92 +g36 +S'NP_061187.3:p.(Gln491His)' +p93 +sg38 +S'NP_061187.3:p.(Q491H)' +p94 +ssg40 +g41 +sg42 +g16 +sg43 +g16 +sg44 +S'NM_018717.5:c.1473G>T' +p95 +sg46 +g16 +sg47 +(dp96 +S'hg19' +p97 +(dp98 +g51 +S'NC_000004.11:g.140811117C>A' +p99 +sg53 +(dp100 +g55 +g56 +sg57 +VC +p101 +sg59 +S'140811117' +p102 +sg61 +g62 +sssS'grch37' +p103 +(dp104 +g51 +S'NC_000004.11:g.140811117C>A' +p105 +sg53 +(dp106 +g55 +g72 +sg57 +g101 +sg59 +S'140811117' +p107 +sg61 +g62 +ssssg79 +(dp108 +g81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3' +p109 +sg83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5' +p110 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant89.txt b/VariantValidator/testing/testOutputsMasterITS/variant89.txt new file mode 100644 index 00000000..dda98ce8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant89.txt @@ -0,0 +1,182 @@ +(dp0 +S'NM_015120.4:c.1573_1579=' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37' +p7 +aS'NM_015120.4:c.1573_1579 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +sS'transcript_description' +p15 +VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA +p16 +sS'gene_symbol' +p17 +S'ALMS1' +p18 +sS'hgvs_predicted_protein_consequence' +p19 +(dp20 +S'tlr' +p21 +S'NP_055935.4:p.(Ser525=)' +p22 +sS'slr' +p23 +S'NP_055935.4:p.(S525=)' +p24 +ssS'submitted_variant' +p25 +S'NC_000002.11:g.73675227_73675228insCTC' +p26 +sS'genome_context_intronic_sequence' +p27 +g4 +sS'hgvs_lrg_variant' +p28 +g4 +sS'hgvs_transcript_variant' +p29 +S'NM_015120.4:c.1573_1579=' +p30 +sS'hgvs_refseqgene_variant' +p31 +g4 +sS'primary_assembly_loci' +p32 +(dp33 +S'hg19' +p34 +(dp35 +S'hgvs_genomic_description' +p36 +S'NC_000002.11:g.73675228_73675230dup' +p37 +sS'vcf' +p38 +(dp39 +S'chr' +p40 +S'chr2' +p41 +sS'ref' +p42 +S'CTC' +p43 +sS'pos' +p44 +S'73675228' +p45 +sS'alt' +p46 +VCTCCTC +p47 +sssS'hg38' +p48 +(dp49 +g36 +S'NC_000002.12:g.73448097_73448103=' +p50 +sg38 +(dp51 +g40 +g41 +sg42 +VTCTCCTC +p52 +sg44 +S'73448097' +p53 +sg46 +g52 +sssS'grch37' +p54 +(dp55 +g36 +S'NC_000002.11:g.73675228_73675230dup' +p56 +sg38 +(dp57 +g40 +S'2' +p58 +sg42 +S'CTC' +p59 +sg44 +S'73675228' +p60 +sg46 +VCTCCTC +p61 +sssS'grch38' +p62 +(dp63 +g36 +S'NC_000002.12:g.73448097_73448103=' +p64 +sg38 +(dp65 +g40 +g58 +sg42 +g52 +sg44 +S'73448097' +p66 +sg46 +g52 +ssssS'reference_sequence_records' +p67 +(dp68 +S'protein' +p69 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' +p70 +sS'transcript' +p71 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' +p72 +sssS'flag' +p73 +S'gene_variant' +p74 +sS'metadata' +p75 +(dp76 +S'variantvalidator_hgvs_version' +p77 +S'1.1.3' +p78 +sS'uta_schema' +p79 +S'uta_20180821' +p80 +sS'seqrepo_db' +p81 +S'2018-08-21' +p82 +sS'variantvalidator_version' +p83 +S'v0.2' +p84 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant9.txt b/VariantValidator/testing/testOutputsMasterITS/variant9.txt new file mode 100644 index 00000000..3097623f --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant9.txt @@ -0,0 +1,82 @@ +(dp0 +S'validation_warning_1' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'base start position must be <= end position' +p7 +aS'Did you mean NM_000094.3:c.6751-3_6751-2del?' +p8 +asS'refseqgene_context_intronic_sequence' +p9 +g4 +sS'alt_genomic_loci' +p10 +(lp11 +sS'transcript_description' +p12 +g4 +sS'gene_symbol' +p13 +g4 +sS'hgvs_predicted_protein_consequence' +p14 +(dp15 +S'tlr' +p16 +g4 +sS'slr' +p17 +g4 +ssS'submitted_variant' +p18 +S'NM_000094.3:c.6751-2_6751-3del' +p19 +sS'genome_context_intronic_sequence' +p20 +g4 +sS'hgvs_lrg_variant' +p21 +g4 +sS'hgvs_transcript_variant' +p22 +g4 +sS'hgvs_refseqgene_variant' +p23 +g4 +sS'primary_assembly_loci' +p24 +(dp25 +sS'reference_sequence_records' +p26 +g4 +ssS'flag' +p27 +S'warning' +p28 +sS'metadata' +p29 +(dp30 +S'variantvalidator_hgvs_version' +p31 +S'1.1.3' +p32 +sS'uta_schema' +p33 +S'uta_20180821' +p34 +sS'seqrepo_db' +p35 +S'2018-08-21' +p36 +sS'variantvalidator_version' +p37 +S'v0.2' +p38 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant90.txt b/VariantValidator/testing/testOutputsMasterITS/variant90.txt new file mode 100644 index 00000000..8601fc9d --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant90.txt @@ -0,0 +1,261 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_020469.2:c.260_262=' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000009.11:g.136132908T>TC automapped to NC_000009.11:g.136132908_136132909insC' +p9 +aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' +p10 +aS'NM_020469.2:c.260_262 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' +p11 +aS'Caution should be used when reporting the displayed variant descriptions' +p12 +aS'If you are unsure, please contact admin' +p13 +aS'RefSeqGene record not available' +p14 +asS'refseqgene_context_intronic_sequence' +p15 +g6 +sS'alt_genomic_loci' +p16 +(lp17 +(dp18 +S'grch37' +p19 +(dp20 +S'hgvs_genomic_description' +p21 +S'NW_003315925.1:g.83614_83616=' +p22 +sS'vcf' +p23 +(dp24 +S'chr' +p25 +S'HG79_PATCH' +p26 +sS'ref' +p27 +VTCA +p28 +sS'pos' +p29 +S'83614' +p30 +sS'alt' +p31 +g28 +sssa(dp32 +S'hg19' +p33 +(dp34 +g21 +S'NW_003315925.1:g.83614_83616=' +p35 +sg23 +(dp36 +g25 +S'NW_003315925.1' +p37 +sg27 +g28 +sg29 +S'83614' +p38 +sg31 +g28 +sssa(dp39 +S'grch38' +p40 +(dp41 +g21 +S'NW_009646201.1:g.83614_83616=' +p42 +sg23 +(dp43 +g25 +S'HG2030_PATCH' +p44 +sg27 +VTCA +p45 +sg29 +S'83614' +p46 +sg31 +g45 +sssa(dp47 +S'hg38' +p48 +(dp49 +g21 +S'NW_009646201.1:g.83614_83616=' +p50 +sg23 +(dp51 +g25 +S'NW_009646201.1' +p52 +sg27 +g45 +sg29 +S'83614' +p53 +sg31 +g45 +sssasS'transcript_description' +p54 +VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA +p55 +sS'gene_symbol' +p56 +S'ABO' +p57 +sS'hgvs_predicted_protein_consequence' +p58 +(dp59 +S'tlr' +p60 +S'NP_065202.2:p.(Val87=)' +p61 +sS'slr' +p62 +S'NP_065202.2:p.(V87=)' +p63 +ssS'submitted_variant' +p64 +S'9-136132908-T-TC' +p65 +sS'genome_context_intronic_sequence' +p66 +g6 +sS'hgvs_lrg_variant' +p67 +g6 +sS'hgvs_transcript_variant' +p68 +S'NM_020469.2:c.260_262=' +p69 +sS'hgvs_refseqgene_variant' +p70 +g6 +sS'primary_assembly_loci' +p71 +(dp72 +S'hg19' +p73 +(dp74 +g21 +S'NC_000009.11:g.136132908_136132909insC' +p75 +sg23 +(dp76 +g25 +S'chr9' +p77 +sg27 +S'T' +p78 +sg29 +S'136132908' +p79 +sg31 +VTC +p80 +sssg48 +(dp81 +g21 +S'NC_000009.12:g.133257521_133257522insC' +p82 +sg23 +(dp83 +g25 +g77 +sg27 +g78 +sg29 +S'133257521' +p84 +sg31 +VTC +p85 +sssS'grch37' +p86 +(dp87 +g21 +S'NC_000009.11:g.136132908_136132909insC' +p88 +sg23 +(dp89 +g25 +S'9' +p90 +sg27 +g78 +sg29 +S'136132908' +p91 +sg31 +VTC +p92 +sssS'grch38' +p93 +(dp94 +g21 +S'NC_000009.12:g.133257521_133257522insC' +p95 +sg23 +(dp96 +g25 +g90 +sg27 +g78 +sg29 +S'133257521' +p97 +sg31 +VTC +p98 +ssssS'reference_sequence_records' +p99 +(dp100 +S'protein' +p101 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' +p102 +sS'transcript' +p103 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' +p104 +sssS'metadata' +p105 +(dp106 +S'variantvalidator_hgvs_version' +p107 +S'1.1.3' +p108 +sS'uta_schema' +p109 +S'uta_20180821' +p110 +sS'seqrepo_db' +p111 +S'2018-08-21' +p112 +sS'variantvalidator_version' +p113 +S'v0.2' +p114 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant91.txt b/VariantValidator/testing/testOutputsMasterITS/variant91.txt new file mode 100644 index 00000000..05e0fda3 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant91.txt @@ -0,0 +1,265 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_020469.2:c.259del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NC_000009.11:g.136132908TAC>TCA automapped to NC_000009.11:g.136132909_136132910delACinsCA' +p9 +aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' +p10 +aS'NM_020469.2:c.258_261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' +p11 +aS'Caution should be used when reporting the displayed variant descriptions' +p12 +aS'If you are unsure, please contact admin' +p13 +aS'RefSeqGene record not available' +p14 +asS'refseqgene_context_intronic_sequence' +p15 +g6 +sS'alt_genomic_loci' +p16 +(lp17 +(dp18 +S'grch37' +p19 +(dp20 +S'hgvs_genomic_description' +p21 +S'NW_003315925.1:g.83618del' +p22 +sS'vcf' +p23 +(dp24 +S'chr' +p25 +S'HG79_PATCH' +p26 +sS'ref' +p27 +S'AC' +p28 +sS'pos' +p29 +S'83616' +p30 +sS'alt' +p31 +S'A' +p32 +sssa(dp33 +S'hg19' +p34 +(dp35 +g21 +S'NW_003315925.1:g.83618del' +p36 +sg23 +(dp37 +g25 +S'NW_003315925.1' +p38 +sg27 +S'AC' +p39 +sg29 +S'83616' +p40 +sg31 +g32 +sssa(dp41 +S'grch38' +p42 +(dp43 +g21 +S'NW_009646201.1:g.83618del' +p44 +sg23 +(dp45 +g25 +S'HG2030_PATCH' +p46 +sg27 +S'AC' +p47 +sg29 +S'83616' +p48 +sg31 +g32 +sssa(dp49 +S'hg38' +p50 +(dp51 +g21 +S'NW_009646201.1:g.83618del' +p52 +sg23 +(dp53 +g25 +S'NW_009646201.1' +p54 +sg27 +S'AC' +p55 +sg29 +S'83616' +p56 +sg31 +g32 +sssasS'transcript_description' +p57 +VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA +p58 +sS'gene_symbol' +p59 +S'ABO' +p60 +sS'hgvs_predicted_protein_consequence' +p61 +(dp62 +S'tlr' +p63 +S'NP_065202.2:p.(Val87Ter)' +p64 +sS'slr' +p65 +S'NP_065202.2:p.(V87*)' +p66 +ssS'submitted_variant' +p67 +S'9-136132908-TAC-TCA' +p68 +sS'genome_context_intronic_sequence' +p69 +g6 +sS'hgvs_lrg_variant' +p70 +g6 +sS'hgvs_transcript_variant' +p71 +S'NM_020469.2:c.259del' +p72 +sS'hgvs_refseqgene_variant' +p73 +g6 +sS'primary_assembly_loci' +p74 +(dp75 +S'hg19' +p76 +(dp77 +g21 +S'NC_000009.11:g.136132909_136132910delinsCA' +p78 +sg23 +(dp79 +g25 +S'chr9' +p80 +sg27 +S'AC' +p81 +sg29 +S'136132909' +p82 +sg31 +VCA +p83 +sssg50 +(dp84 +g21 +S'NC_000009.12:g.133257522_133257523delinsCA' +p85 +sg23 +(dp86 +g25 +g80 +sg27 +S'AC' +p87 +sg29 +S'133257522' +p88 +sg31 +VCA +p89 +sssS'grch37' +p90 +(dp91 +g21 +S'NC_000009.11:g.136132909_136132910delinsCA' +p92 +sg23 +(dp93 +g25 +S'9' +p94 +sg27 +S'AC' +p95 +sg29 +S'136132909' +p96 +sg31 +g83 +sssS'grch38' +p97 +(dp98 +g21 +S'NC_000009.12:g.133257522_133257523delinsCA' +p99 +sg23 +(dp100 +g25 +g94 +sg27 +S'AC' +p101 +sg29 +S'133257522' +p102 +sg31 +g89 +ssssS'reference_sequence_records' +p103 +(dp104 +S'protein' +p105 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' +p106 +sS'transcript' +p107 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' +p108 +sssS'metadata' +p109 +(dp110 +S'variantvalidator_hgvs_version' +p111 +S'1.1.3' +p112 +sS'uta_schema' +p113 +S'uta_20180821' +p114 +sS'seqrepo_db' +p115 +S'2018-08-21' +p116 +sS'variantvalidator_version' +p117 +S'v0.2' +p118 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant92.txt b/VariantValidator/testing/testOutputsMasterITS/variant92.txt new file mode 100644 index 00000000..d999d667 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant92.txt @@ -0,0 +1,261 @@ +(dp0 +S'NM_020469.2:c.261del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'NC_000009.11:g.136132908TA>TA automapped to NC_000009.11:g.136132908_136132909TA=' +p7 +aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' +p8 +aS'NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' +p9 +aS'Caution should be used when reporting the displayed variant descriptions' +p10 +aS'If you are unsure, please contact admin' +p11 +aS'RefSeqGene record not available' +p12 +asS'refseqgene_context_intronic_sequence' +p13 +g4 +sS'alt_genomic_loci' +p14 +(lp15 +(dp16 +S'grch37' +p17 +(dp18 +S'hgvs_genomic_description' +p19 +S'NW_003315925.1:g.83615del' +p20 +sS'vcf' +p21 +(dp22 +S'chr' +p23 +S'HG79_PATCH' +p24 +sS'ref' +p25 +S'TC' +p26 +sS'pos' +p27 +S'83614' +p28 +sS'alt' +p29 +S'T' +p30 +sssa(dp31 +S'hg19' +p32 +(dp33 +g19 +S'NW_003315925.1:g.83615del' +p34 +sg21 +(dp35 +g23 +S'NW_003315925.1' +p36 +sg25 +S'TC' +p37 +sg27 +S'83614' +p38 +sg29 +g30 +sssa(dp39 +S'grch38' +p40 +(dp41 +g19 +S'NW_009646201.1:g.83615del' +p42 +sg21 +(dp43 +g23 +S'HG2030_PATCH' +p44 +sg25 +S'TC' +p45 +sg27 +S'83614' +p46 +sg29 +g30 +sssa(dp47 +S'hg38' +p48 +(dp49 +g19 +S'NW_009646201.1:g.83615del' +p50 +sg21 +(dp51 +g23 +S'NW_009646201.1' +p52 +sg25 +S'TC' +p53 +sg27 +S'83614' +p54 +sg29 +g30 +sssasS'transcript_description' +p55 +VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA +p56 +sS'gene_symbol' +p57 +S'ABO' +p58 +sS'hgvs_predicted_protein_consequence' +p59 +(dp60 +S'tlr' +p61 +S'NP_065202.2:p.(Thr88ProfsTer31)' +p62 +sS'slr' +p63 +S'NP_065202.2:p.(T88Pfs*31)' +p64 +ssS'submitted_variant' +p65 +S'9-136132908-TA-TA' +p66 +sS'genome_context_intronic_sequence' +p67 +g4 +sS'hgvs_lrg_variant' +p68 +g4 +sS'hgvs_transcript_variant' +p69 +S'NM_020469.2:c.261del' +p70 +sS'hgvs_refseqgene_variant' +p71 +g4 +sS'primary_assembly_loci' +p72 +(dp73 +S'hg19' +p74 +(dp75 +g19 +S'NC_000009.11:g.136132908_136132909=' +p76 +sg21 +(dp77 +g23 +S'chr9' +p78 +sg25 +S'TA' +p79 +sg27 +S'136132908' +p80 +sg29 +g79 +sssg48 +(dp81 +g19 +S'NC_000009.12:g.133257521_133257522=' +p82 +sg21 +(dp83 +g23 +g78 +sg25 +S'TA' +p84 +sg27 +S'133257521' +p85 +sg29 +g84 +sssS'grch37' +p86 +(dp87 +g19 +S'NC_000009.11:g.136132908_136132909=' +p88 +sg21 +(dp89 +g23 +S'9' +p90 +sg25 +g79 +sg27 +S'136132908' +p91 +sg29 +g79 +sssS'grch38' +p92 +(dp93 +g19 +S'NC_000009.12:g.133257521_133257522=' +p94 +sg21 +(dp95 +g23 +g90 +sg25 +g84 +sg27 +S'133257521' +p96 +sg29 +g84 +ssssS'reference_sequence_records' +p97 +(dp98 +S'protein' +p99 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' +p100 +sS'transcript' +p101 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' +p102 +sssS'flag' +p103 +S'gene_variant' +p104 +sS'metadata' +p105 +(dp106 +S'variantvalidator_hgvs_version' +p107 +S'1.1.3' +p108 +sS'uta_schema' +p109 +S'uta_20180821' +p110 +sS'seqrepo_db' +p111 +S'2018-08-21' +p112 +sS'variantvalidator_version' +p113 +S'v0.2' +p114 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant93.txt b/VariantValidator/testing/testOutputsMasterITS/variant93.txt new file mode 100644 index 00000000..af56eddc --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant93.txt @@ -0,0 +1,265 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_020469.2:c.259del' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'NM_020469.2:c.258delG automapped to NM_020469.2:c.259delG' +p9 +aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' +p10 +aS'NM_020469.2:c.258_261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' +p11 +aS'Caution should be used when reporting the displayed variant descriptions' +p12 +aS'If you are unsure, please contact admin' +p13 +aS'RefSeqGene record not available' +p14 +asS'refseqgene_context_intronic_sequence' +p15 +g6 +sS'alt_genomic_loci' +p16 +(lp17 +(dp18 +S'grch37' +p19 +(dp20 +S'hgvs_genomic_description' +p21 +S'NW_003315925.1:g.83618del' +p22 +sS'vcf' +p23 +(dp24 +S'chr' +p25 +S'HG79_PATCH' +p26 +sS'ref' +p27 +S'AC' +p28 +sS'pos' +p29 +S'83616' +p30 +sS'alt' +p31 +S'A' +p32 +sssa(dp33 +S'hg19' +p34 +(dp35 +g21 +S'NW_003315925.1:g.83618del' +p36 +sg23 +(dp37 +g25 +S'NW_003315925.1' +p38 +sg27 +S'AC' +p39 +sg29 +S'83616' +p40 +sg31 +g32 +sssa(dp41 +S'grch38' +p42 +(dp43 +g21 +S'NW_009646201.1:g.83618del' +p44 +sg23 +(dp45 +g25 +S'HG2030_PATCH' +p46 +sg27 +S'AC' +p47 +sg29 +S'83616' +p48 +sg31 +g32 +sssa(dp49 +S'hg38' +p50 +(dp51 +g21 +S'NW_009646201.1:g.83618del' +p52 +sg23 +(dp53 +g25 +S'NW_009646201.1' +p54 +sg27 +S'AC' +p55 +sg29 +S'83616' +p56 +sg31 +g32 +sssasS'transcript_description' +p57 +VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA +p58 +sS'gene_symbol' +p59 +S'ABO' +p60 +sS'hgvs_predicted_protein_consequence' +p61 +(dp62 +S'tlr' +p63 +S'NP_065202.2:p.(Val87Ter)' +p64 +sS'slr' +p65 +S'NP_065202.2:p.(V87*)' +p66 +ssS'submitted_variant' +p67 +S'NM_020469.2:c.258delG' +p68 +sS'genome_context_intronic_sequence' +p69 +g6 +sS'hgvs_lrg_variant' +p70 +g6 +sS'hgvs_transcript_variant' +p71 +S'NM_020469.2:c.259del' +p72 +sS'hgvs_refseqgene_variant' +p73 +g6 +sS'primary_assembly_loci' +p74 +(dp75 +S'hg19' +p76 +(dp77 +g21 +S'NC_000009.11:g.136132909_136132910delinsCA' +p78 +sg23 +(dp79 +g25 +S'chr9' +p80 +sg27 +S'AC' +p81 +sg29 +S'136132909' +p82 +sg31 +VCA +p83 +sssg50 +(dp84 +g21 +S'NC_000009.12:g.133257522_133257523delinsCA' +p85 +sg23 +(dp86 +g25 +g80 +sg27 +S'AC' +p87 +sg29 +S'133257522' +p88 +sg31 +VCA +p89 +sssS'grch37' +p90 +(dp91 +g21 +S'NC_000009.11:g.136132909_136132910delinsCA' +p92 +sg23 +(dp93 +g25 +S'9' +p94 +sg27 +S'AC' +p95 +sg29 +S'136132909' +p96 +sg31 +g83 +sssS'grch38' +p97 +(dp98 +g21 +S'NC_000009.12:g.133257522_133257523delinsCA' +p99 +sg23 +(dp100 +g25 +g94 +sg27 +S'AC' +p101 +sg29 +S'133257522' +p102 +sg31 +g89 +ssssS'reference_sequence_records' +p103 +(dp104 +S'protein' +p105 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' +p106 +sS'transcript' +p107 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' +p108 +sssS'metadata' +p109 +(dp110 +S'variantvalidator_hgvs_version' +p111 +S'1.1.3' +p112 +sS'uta_schema' +p113 +S'uta_20180821' +p114 +sS'seqrepo_db' +p115 +S'2018-08-21' +p116 +sS'variantvalidator_version' +p117 +S'v0.2' +p118 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant94.txt b/VariantValidator/testing/testOutputsMasterITS/variant94.txt new file mode 100644 index 00000000..d0b9f8b2 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant94.txt @@ -0,0 +1,259 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_020469.2:c.260_262=' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' +p9 +aS'NM_020469.2:c.260_262 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +(dp17 +S'grch37' +p18 +(dp19 +S'hgvs_genomic_description' +p20 +S'NW_003315925.1:g.83614_83616=' +p21 +sS'vcf' +p22 +(dp23 +S'chr' +p24 +S'HG79_PATCH' +p25 +sS'ref' +p26 +VTCA +p27 +sS'pos' +p28 +S'83614' +p29 +sS'alt' +p30 +g27 +sssa(dp31 +S'hg19' +p32 +(dp33 +g20 +S'NW_003315925.1:g.83614_83616=' +p34 +sg22 +(dp35 +g24 +S'NW_003315925.1' +p36 +sg26 +g27 +sg28 +S'83614' +p37 +sg30 +g27 +sssa(dp38 +S'grch38' +p39 +(dp40 +g20 +S'NW_009646201.1:g.83614_83616=' +p41 +sg22 +(dp42 +g24 +S'HG2030_PATCH' +p43 +sg26 +VTCA +p44 +sg28 +S'83614' +p45 +sg30 +g44 +sssa(dp46 +S'hg38' +p47 +(dp48 +g20 +S'NW_009646201.1:g.83614_83616=' +p49 +sg22 +(dp50 +g24 +S'NW_009646201.1' +p51 +sg26 +g44 +sg28 +S'83614' +p52 +sg30 +g44 +sssasS'transcript_description' +p53 +VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA +p54 +sS'gene_symbol' +p55 +S'ABO' +p56 +sS'hgvs_predicted_protein_consequence' +p57 +(dp58 +S'tlr' +p59 +S'NP_065202.2:p.(Val87=)' +p60 +sS'slr' +p61 +S'NP_065202.2:p.(V87=)' +p62 +ssS'submitted_variant' +p63 +S'NM_020469.2:c.260_262TGA=' +p64 +sS'genome_context_intronic_sequence' +p65 +g6 +sS'hgvs_lrg_variant' +p66 +g6 +sS'hgvs_transcript_variant' +p67 +S'NM_020469.2:c.260_262=' +p68 +sS'hgvs_refseqgene_variant' +p69 +g6 +sS'primary_assembly_loci' +p70 +(dp71 +S'hg19' +p72 +(dp73 +g20 +S'NC_000009.11:g.136132908_136132909insC' +p74 +sg22 +(dp75 +g24 +S'chr9' +p76 +sg26 +S'T' +p77 +sg28 +S'136132908' +p78 +sg30 +VTC +p79 +sssg47 +(dp80 +g20 +S'NC_000009.12:g.133257521_133257522insC' +p81 +sg22 +(dp82 +g24 +g76 +sg26 +g77 +sg28 +S'133257521' +p83 +sg30 +VTC +p84 +sssS'grch37' +p85 +(dp86 +g20 +S'NC_000009.11:g.136132908_136132909insC' +p87 +sg22 +(dp88 +g24 +S'9' +p89 +sg26 +g77 +sg28 +S'136132908' +p90 +sg30 +VTC +p91 +sssS'grch38' +p92 +(dp93 +g20 +S'NC_000009.12:g.133257521_133257522insC' +p94 +sg22 +(dp95 +g24 +g89 +sg26 +g77 +sg28 +S'133257521' +p96 +sg30 +VTC +p97 +ssssS'reference_sequence_records' +p98 +(dp99 +S'protein' +p100 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' +p101 +sS'transcript' +p102 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' +p103 +sssS'metadata' +p104 +(dp105 +S'variantvalidator_hgvs_version' +p106 +S'1.1.3' +p107 +sS'uta_schema' +p108 +S'uta_20180821' +p109 +sS'seqrepo_db' +p110 +S'2018-08-21' +p111 +sS'variantvalidator_version' +p112 +S'v0.2' +p113 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant95.txt b/VariantValidator/testing/testOutputsMasterITS/variant95.txt new file mode 100644 index 00000000..f2a0e24e --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant95.txt @@ -0,0 +1,259 @@ +(dp0 +S'NM_020469.2:c.261del' +p1 +(dp2 +S'hgvs_lrg_transcript_variant' +p3 +S'' +p4 +sS'validation_warnings' +p5 +(lp6 +S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' +p7 +aS'NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' +p8 +aS'Caution should be used when reporting the displayed variant descriptions' +p9 +aS'If you are unsure, please contact admin' +p10 +aS'RefSeqGene record not available' +p11 +asS'refseqgene_context_intronic_sequence' +p12 +g4 +sS'alt_genomic_loci' +p13 +(lp14 +(dp15 +S'grch37' +p16 +(dp17 +S'hgvs_genomic_description' +p18 +S'NW_003315925.1:g.83615del' +p19 +sS'vcf' +p20 +(dp21 +S'chr' +p22 +S'HG79_PATCH' +p23 +sS'ref' +p24 +S'TC' +p25 +sS'pos' +p26 +S'83614' +p27 +sS'alt' +p28 +S'T' +p29 +sssa(dp30 +S'hg19' +p31 +(dp32 +g18 +S'NW_003315925.1:g.83615del' +p33 +sg20 +(dp34 +g22 +S'NW_003315925.1' +p35 +sg24 +S'TC' +p36 +sg26 +S'83614' +p37 +sg28 +g29 +sssa(dp38 +S'grch38' +p39 +(dp40 +g18 +S'NW_009646201.1:g.83615del' +p41 +sg20 +(dp42 +g22 +S'HG2030_PATCH' +p43 +sg24 +S'TC' +p44 +sg26 +S'83614' +p45 +sg28 +g29 +sssa(dp46 +S'hg38' +p47 +(dp48 +g18 +S'NW_009646201.1:g.83615del' +p49 +sg20 +(dp50 +g22 +S'NW_009646201.1' +p51 +sg24 +S'TC' +p52 +sg26 +S'83614' +p53 +sg28 +g29 +sssasS'transcript_description' +p54 +VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA +p55 +sS'gene_symbol' +p56 +S'ABO' +p57 +sS'hgvs_predicted_protein_consequence' +p58 +(dp59 +S'tlr' +p60 +S'NP_065202.2:p.(Thr88ProfsTer31)' +p61 +sS'slr' +p62 +S'NP_065202.2:p.(T88Pfs*31)' +p63 +ssS'submitted_variant' +p64 +S'NM_020469.2:c.261delG' +p65 +sS'genome_context_intronic_sequence' +p66 +g4 +sS'hgvs_lrg_variant' +p67 +g4 +sS'hgvs_transcript_variant' +p68 +S'NM_020469.2:c.261del' +p69 +sS'hgvs_refseqgene_variant' +p70 +g4 +sS'primary_assembly_loci' +p71 +(dp72 +S'hg19' +p73 +(dp74 +g18 +S'NC_000009.11:g.136132908_136132909=' +p75 +sg20 +(dp76 +g22 +S'chr9' +p77 +sg24 +S'TA' +p78 +sg26 +S'136132908' +p79 +sg28 +g78 +sssg47 +(dp80 +g18 +S'NC_000009.12:g.133257521_133257522=' +p81 +sg20 +(dp82 +g22 +g77 +sg24 +S'TA' +p83 +sg26 +S'133257521' +p84 +sg28 +g83 +sssS'grch37' +p85 +(dp86 +g18 +S'NC_000009.11:g.136132908_136132909=' +p87 +sg20 +(dp88 +g22 +S'9' +p89 +sg24 +g78 +sg26 +S'136132908' +p90 +sg28 +g78 +sssS'grch38' +p91 +(dp92 +g18 +S'NC_000009.12:g.133257521_133257522=' +p93 +sg20 +(dp94 +g22 +g89 +sg24 +g83 +sg26 +S'133257521' +p95 +sg28 +g83 +ssssS'reference_sequence_records' +p96 +(dp97 +S'protein' +p98 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' +p99 +sS'transcript' +p100 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' +p101 +sssS'flag' +p102 +S'gene_variant' +p103 +sS'metadata' +p104 +(dp105 +S'variantvalidator_hgvs_version' +p106 +S'1.1.3' +p107 +sS'uta_schema' +p108 +S'uta_20180821' +p109 +sS'seqrepo_db' +p110 +S'2018-08-21' +p111 +sS'variantvalidator_version' +p112 +S'v0.2' +p113 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant96.txt b/VariantValidator/testing/testOutputsMasterITS/variant96.txt new file mode 100644 index 00000000..89f2fb9a --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant96.txt @@ -0,0 +1,262 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_020469.2:c.261dup' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' +p9 +aS'NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +(dp17 +S'grch37' +p18 +(dp19 +S'hgvs_genomic_description' +p20 +S'NW_003315925.1:g.83615dup' +p21 +sS'vcf' +p22 +(dp23 +S'chr' +p24 +S'HG79_PATCH' +p25 +sS'ref' +p26 +S'C' +p27 +sS'pos' +p28 +S'83615' +p29 +sS'alt' +p30 +VCC +p31 +sssa(dp32 +S'hg19' +p33 +(dp34 +g20 +S'NW_003315925.1:g.83615dup' +p35 +sg22 +(dp36 +g24 +S'NW_003315925.1' +p37 +sg26 +g27 +sg28 +S'83615' +p38 +sg30 +VCC +p39 +sssa(dp40 +S'grch38' +p41 +(dp42 +g20 +S'NW_009646201.1:g.83615dup' +p43 +sg22 +(dp44 +g24 +S'HG2030_PATCH' +p45 +sg26 +g27 +sg28 +S'83615' +p46 +sg30 +VCC +p47 +sssa(dp48 +S'hg38' +p49 +(dp50 +g20 +S'NW_009646201.1:g.83615dup' +p51 +sg22 +(dp52 +g24 +S'NW_009646201.1' +p53 +sg26 +g27 +sg28 +S'83615' +p54 +sg30 +VCC +p55 +sssasS'transcript_description' +p56 +VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA +p57 +sS'gene_symbol' +p58 +S'ABO' +p59 +sS'hgvs_predicted_protein_consequence' +p60 +(dp61 +S'tlr' +p62 +S'NP_065202.2:p.(Thr88AspfsTer107)' +p63 +sS'slr' +p64 +S'NP_065202.2:p.(T88Dfs*107)' +p65 +ssS'submitted_variant' +p66 +S'NM_020469.2:c.261dupG' +p67 +sS'genome_context_intronic_sequence' +p68 +g6 +sS'hgvs_lrg_variant' +p69 +g6 +sS'hgvs_transcript_variant' +p70 +S'NM_020469.2:c.261dup' +p71 +sS'hgvs_refseqgene_variant' +p72 +g6 +sS'primary_assembly_loci' +p73 +(dp74 +S'hg19' +p75 +(dp76 +g20 +S'NC_000009.11:g.136132908_136132909insCC' +p77 +sg22 +(dp78 +g24 +S'chr9' +p79 +sg26 +S'T' +p80 +sg28 +S'136132908' +p81 +sg30 +VTCC +p82 +sssg49 +(dp83 +g20 +S'NC_000009.12:g.133257521_133257522insCC' +p84 +sg22 +(dp85 +g24 +g79 +sg26 +g80 +sg28 +S'133257521' +p86 +sg30 +VTCC +p87 +sssS'grch37' +p88 +(dp89 +g20 +S'NC_000009.11:g.136132908_136132909insCC' +p90 +sg22 +(dp91 +g24 +S'9' +p92 +sg26 +g80 +sg28 +S'136132908' +p93 +sg30 +VTCC +p94 +sssS'grch38' +p95 +(dp96 +g20 +S'NC_000009.12:g.133257521_133257522insCC' +p97 +sg22 +(dp98 +g24 +g92 +sg26 +g80 +sg28 +S'133257521' +p99 +sg30 +VTCC +p100 +ssssS'reference_sequence_records' +p101 +(dp102 +S'protein' +p103 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' +p104 +sS'transcript' +p105 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' +p106 +sssS'metadata' +p107 +(dp108 +S'variantvalidator_hgvs_version' +p109 +S'1.1.3' +p110 +sS'uta_schema' +p111 +S'uta_20180821' +p112 +sS'seqrepo_db' +p113 +S'2018-08-21' +p114 +sS'variantvalidator_version' +p115 +S'v0.2' +p116 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant97.txt b/VariantValidator/testing/testOutputsMasterITS/variant97.txt new file mode 100644 index 00000000..9b6e02f9 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant97.txt @@ -0,0 +1,261 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'NM_020469.2:c.261_262insTT' +p3 +(dp4 +S'hgvs_lrg_transcript_variant' +p5 +S'' +p6 +sS'validation_warnings' +p7 +(lp8 +S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' +p9 +aS'NM_020469.2:c.261_262 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' +p10 +aS'Caution should be used when reporting the displayed variant descriptions' +p11 +aS'If you are unsure, please contact admin' +p12 +aS'RefSeqGene record not available' +p13 +asS'refseqgene_context_intronic_sequence' +p14 +g6 +sS'alt_genomic_loci' +p15 +(lp16 +(dp17 +S'grch37' +p18 +(dp19 +S'hgvs_genomic_description' +p20 +S'NW_003315925.1:g.83614_83615insAA' +p21 +sS'vcf' +p22 +(dp23 +S'chr' +p24 +S'HG79_PATCH' +p25 +sS'ref' +p26 +S'T' +p27 +sS'pos' +p28 +S'83614' +p29 +sS'alt' +p30 +VTAA +p31 +sssa(dp32 +S'hg19' +p33 +(dp34 +g20 +S'NW_003315925.1:g.83614_83615insAA' +p35 +sg22 +(dp36 +g24 +S'NW_003315925.1' +p37 +sg26 +g27 +sg28 +S'83614' +p38 +sg30 +VTAA +p39 +sssa(dp40 +S'grch38' +p41 +(dp42 +g20 +S'NW_009646201.1:g.83614_83615insAA' +p43 +sg22 +(dp44 +g24 +S'HG2030_PATCH' +p45 +sg26 +g27 +sg28 +S'83614' +p46 +sg30 +VTAA +p47 +sssa(dp48 +S'hg38' +p49 +(dp50 +g20 +S'NW_009646201.1:g.83614_83615insAA' +p51 +sg22 +(dp52 +g24 +S'NW_009646201.1' +p53 +sg26 +g27 +sg28 +S'83614' +p54 +sg30 +VTAA +p55 +sssasS'transcript_description' +p56 +VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA +p57 +sS'gene_symbol' +p58 +S'ABO' +p59 +sS'hgvs_predicted_protein_consequence' +p60 +(dp61 +S'tlr' +p62 +S'NP_065202.2:p.(Thr88LeufsTer32)' +p63 +sS'slr' +p64 +S'NP_065202.2:p.(T88Lfs*32)' +p65 +ssS'submitted_variant' +p66 +S'NM_020469.2:c.261_262insTT' +p67 +sS'genome_context_intronic_sequence' +p68 +g6 +sS'hgvs_lrg_variant' +p69 +g6 +sS'hgvs_transcript_variant' +p70 +S'NM_020469.2:c.261_262insTT' +p71 +sS'hgvs_refseqgene_variant' +p72 +g6 +sS'primary_assembly_loci' +p73 +(dp74 +S'hg19' +p75 +(dp76 +g20 +S'NC_000009.11:g.136132909_136132910insACA' +p77 +sg22 +(dp78 +g24 +S'chr9' +p79 +sg26 +g27 +sg28 +S'136132908' +p80 +sg30 +VTAAC +p81 +sssg49 +(dp82 +g20 +S'NC_000009.12:g.133257522_133257523insACA' +p83 +sg22 +(dp84 +g24 +g79 +sg26 +g27 +sg28 +S'133257521' +p85 +sg30 +VTAAC +p86 +sssS'grch37' +p87 +(dp88 +g20 +S'NC_000009.11:g.136132909_136132910insACA' +p89 +sg22 +(dp90 +g24 +S'9' +p91 +sg26 +g27 +sg28 +S'136132908' +p92 +sg30 +VTAAC +p93 +sssS'grch38' +p94 +(dp95 +g20 +S'NC_000009.12:g.133257522_133257523insACA' +p96 +sg22 +(dp97 +g24 +g91 +sg26 +g27 +sg28 +S'133257521' +p98 +sg30 +VTAAC +p99 +ssssS'reference_sequence_records' +p100 +(dp101 +S'protein' +p102 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' +p103 +sS'transcript' +p104 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' +p105 +sssS'metadata' +p106 +(dp107 +S'variantvalidator_hgvs_version' +p108 +S'1.1.3' +p109 +sS'uta_schema' +p110 +S'uta_20180821' +p111 +sS'seqrepo_db' +p112 +S'2018-08-21' +p113 +sS'variantvalidator_version' +p114 +S'v0.2' +p115 +ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant98.txt b/VariantValidator/testing/testOutputsMasterITS/variant98.txt new file mode 100644 index 00000000..37a32b2b --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant98.txt @@ -0,0 +1,309 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_007121.5:c.515A>T' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' +p19 +aS'NM_007121.5:c.515_517 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p20 +aS'Caution should be used when reporting the displayed variant descriptions' +p21 +aS'If you are unsure, please contact admin' +p22 +aS'RefSeqGene record not available' +p23 +asS'refseqgene_context_intronic_sequence' +p24 +g16 +sS'alt_genomic_loci' +p25 +(lp26 +sS'transcript_description' +p27 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA +p28 +sS'gene_symbol' +p29 +S'NR1H2' +p30 +sS'hgvs_predicted_protein_consequence' +p31 +(dp32 +S'tlr' +p33 +S'NP_009052.3:p.(Lys172Ile)' +p34 +sS'slr' +p35 +S'NP_009052.3:p.(K172I)' +p36 +ssS'submitted_variant' +p37 +S'NC_000019.10:g.50378563_50378564insTAC' +p38 +sS'genome_context_intronic_sequence' +p39 +g16 +sS'hgvs_lrg_variant' +p40 +g16 +sS'hgvs_transcript_variant' +p41 +S'NM_007121.5:c.515A>T' +p42 +sS'hgvs_refseqgene_variant' +p43 +g16 +sS'primary_assembly_loci' +p44 +(dp45 +S'grch38' +p46 +(dp47 +S'hgvs_genomic_description' +p48 +S'NC_000019.10:g.50378563_50378564insTAC' +p49 +sS'vcf' +p50 +(dp51 +S'chr' +p52 +S'19' +p53 +sS'ref' +p54 +S'A' +p55 +sS'pos' +p56 +S'50378563' +p57 +sS'alt' +p58 +VATAC +p59 +sssS'grch37' +p60 +(dp61 +g48 +S'NC_000019.9:g.50881820_50881821insTAC' +p62 +sg50 +(dp63 +g52 +g53 +sg54 +g55 +sg56 +S'50881820' +p64 +sg58 +VATAC +p65 +sssS'hg38' +p66 +(dp67 +g48 +S'NC_000019.10:g.50378563_50378564insTAC' +p68 +sg50 +(dp69 +g52 +S'chr19' +p70 +sg54 +g55 +sg56 +S'50378563' +p71 +sg58 +VATAC +p72 +sssS'hg19' +p73 +(dp74 +g48 +S'NC_000019.9:g.50881820_50881821insTAC' +p75 +sg50 +(dp76 +g52 +g70 +sg54 +g55 +sg56 +S'50881820' +p77 +sg58 +VATAC +p78 +ssssS'reference_sequence_records' +p79 +(dp80 +S'protein' +p81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' +p82 +sS'transcript' +p83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' +p84 +sssS'NM_001256647.1:c.224A>T' +p85 +(dp86 +g15 +g16 +sg17 +(lp87 +S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' +p88 +aS'NM_001256647.1:c.224_226 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p89 +aS'Caution should be used when reporting the displayed variant descriptions' +p90 +aS'If you are unsure, please contact admin' +p91 +aS'RefSeqGene record not available' +p92 +asg24 +g16 +sg25 +(lp93 +sg27 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA +p94 +sg29 +S'NR1H2' +p95 +sg31 +(dp96 +g33 +S'NP_001243576.1:p.(Lys75Ile)' +p97 +sg35 +S'NP_001243576.1:p.(K75I)' +p98 +ssg37 +g38 +sg39 +g16 +sg40 +g16 +sg41 +S'NM_001256647.1:c.224A>T' +p99 +sg43 +g16 +sg44 +(dp100 +S'grch38' +p101 +(dp102 +g48 +S'NC_000019.10:g.50378563_50378564insTAC' +p103 +sg50 +(dp104 +g52 +g53 +sg54 +g55 +sg56 +S'50378563' +p105 +sg58 +VATAC +p106 +sssS'grch37' +p107 +(dp108 +g48 +S'NC_000019.9:g.50881820_50881821insTAC' +p109 +sg50 +(dp110 +g52 +g53 +sg54 +g55 +sg56 +S'50881820' +p111 +sg58 +VATAC +p112 +sssg66 +(dp113 +g48 +S'NC_000019.10:g.50378563_50378564insTAC' +p114 +sg50 +(dp115 +g52 +g70 +sg54 +g55 +sg56 +S'50378563' +p116 +sg58 +VATAC +p117 +sssS'hg19' +p118 +(dp119 +g48 +S'NC_000019.9:g.50881820_50881821insTAC' +p120 +sg50 +(dp121 +g52 +g70 +sg54 +g55 +sg56 +S'50881820' +p122 +sg58 +VATAC +p123 +ssssg79 +(dp124 +g81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' +p125 +sg83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' +p126 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant99.txt b/VariantValidator/testing/testOutputsMasterITS/variant99.txt new file mode 100644 index 00000000..2d9a43c8 --- /dev/null +++ b/VariantValidator/testing/testOutputsMasterITS/variant99.txt @@ -0,0 +1,309 @@ +(dp0 +S'flag' +p1 +S'gene_variant' +p2 +sS'metadata' +p3 +(dp4 +S'variantvalidator_hgvs_version' +p5 +S'1.1.3' +p6 +sS'uta_schema' +p7 +S'uta_20180821' +p8 +sS'seqrepo_db' +p9 +S'2018-08-21' +p10 +sS'variantvalidator_version' +p11 +S'v0.2' +p12 +ssS'NM_007121.5:c.515_516del' +p13 +(dp14 +S'hgvs_lrg_transcript_variant' +p15 +S'' +p16 +sS'validation_warnings' +p17 +(lp18 +S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' +p19 +aS'NM_007121.5:c.514_515 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p20 +aS'Caution should be used when reporting the displayed variant descriptions' +p21 +aS'If you are unsure, please contact admin' +p22 +aS'RefSeqGene record not available' +p23 +asS'refseqgene_context_intronic_sequence' +p24 +g16 +sS'alt_genomic_loci' +p25 +(lp26 +sS'transcript_description' +p27 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA +p28 +sS'gene_symbol' +p29 +S'NR1H2' +p30 +sS'hgvs_predicted_protein_consequence' +p31 +(dp32 +S'tlr' +p33 +S'NP_009052.3:p.(Lys172ThrfsTer34)' +p34 +sS'slr' +p35 +S'NP_009052.3:p.(K172Tfs*34)' +p36 +ssS'submitted_variant' +p37 +S'NC_000019.10:g.50378563_50378564insC' +p38 +sS'genome_context_intronic_sequence' +p39 +g16 +sS'hgvs_lrg_variant' +p40 +g16 +sS'hgvs_transcript_variant' +p41 +S'NM_007121.5:c.515_516del' +p42 +sS'hgvs_refseqgene_variant' +p43 +g16 +sS'primary_assembly_loci' +p44 +(dp45 +S'grch38' +p46 +(dp47 +S'hgvs_genomic_description' +p48 +S'NC_000019.10:g.50378563_50378564insC' +p49 +sS'vcf' +p50 +(dp51 +S'chr' +p52 +S'19' +p53 +sS'ref' +p54 +S'A' +p55 +sS'pos' +p56 +S'50378563' +p57 +sS'alt' +p58 +S'AC' +p59 +sssS'grch37' +p60 +(dp61 +g48 +S'NC_000019.9:g.50881820_50881821insC' +p62 +sg50 +(dp63 +g52 +g53 +sg54 +g55 +sg56 +S'50881820' +p64 +sg58 +S'AC' +p65 +sssS'hg38' +p66 +(dp67 +g48 +S'NC_000019.10:g.50378563_50378564insC' +p68 +sg50 +(dp69 +g52 +S'chr19' +p70 +sg54 +g55 +sg56 +S'50378563' +p71 +sg58 +S'AC' +p72 +sssS'hg19' +p73 +(dp74 +g48 +S'NC_000019.9:g.50881820_50881821insC' +p75 +sg50 +(dp76 +g52 +g70 +sg54 +g55 +sg56 +S'50881820' +p77 +sg58 +S'AC' +p78 +ssssS'reference_sequence_records' +p79 +(dp80 +S'protein' +p81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' +p82 +sS'transcript' +p83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' +p84 +sssS'NM_001256647.1:c.224_225del' +p85 +(dp86 +g15 +g16 +sg17 +(lp87 +S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' +p88 +aS'NM_001256647.1:c.223_224 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' +p89 +aS'Caution should be used when reporting the displayed variant descriptions' +p90 +aS'If you are unsure, please contact admin' +p91 +aS'RefSeqGene record not available' +p92 +asg24 +g16 +sg25 +(lp93 +sg27 +VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA +p94 +sg29 +S'NR1H2' +p95 +sg31 +(dp96 +g33 +S'NP_001243576.1:p.(Lys75ThrfsTer34)' +p97 +sg35 +S'NP_001243576.1:p.(K75Tfs*34)' +p98 +ssg37 +g38 +sg39 +g16 +sg40 +g16 +sg41 +S'NM_001256647.1:c.224_225del' +p99 +sg43 +g16 +sg44 +(dp100 +S'grch38' +p101 +(dp102 +g48 +S'NC_000019.10:g.50378563_50378564insC' +p103 +sg50 +(dp104 +g52 +g53 +sg54 +g55 +sg56 +S'50378563' +p105 +sg58 +S'AC' +p106 +sssS'grch37' +p107 +(dp108 +g48 +S'NC_000019.9:g.50881820_50881821insC' +p109 +sg50 +(dp110 +g52 +g53 +sg54 +g55 +sg56 +S'50881820' +p111 +sg58 +S'AC' +p112 +sssg66 +(dp113 +g48 +S'NC_000019.10:g.50378563_50378564insC' +p114 +sg50 +(dp115 +g52 +g70 +sg54 +g55 +sg56 +S'50378563' +p116 +sg58 +S'AC' +p117 +sssS'hg19' +p118 +(dp119 +g48 +S'NC_000019.9:g.50881820_50881821insC' +p120 +sg50 +(dp121 +g52 +g70 +sg54 +g55 +sg56 +S'50881820' +p122 +sg58 +S'AC' +p123 +ssssg79 +(dp124 +g81 +S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' +p125 +sg83 +S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' +p126 +sss. \ No newline at end of file diff --git a/VariantValidator/testing/vvTestCompare.py b/VariantValidator/testing/vvTestCompare.py index 65992bef..1c17e2c7 100644 --- a/VariantValidator/testing/vvTestCompare.py +++ b/VariantValidator/testing/vvTestCompare.py @@ -3,7 +3,7 @@ import vvTestFunctions as fn masterDirectory="testOutputsMasterITS" -testDirectories=["testOutputsBuran"] +testDirectories=["testOutputs"] for d in testDirectories: print("Comparing "+masterDirectory+" and "+d) From 7343d84b7bdae7d14985ee40d8caac3385d94291 Mon Sep 17 00:00:00 2001 From: buran Date: Wed, 23 Jan 2019 12:16:13 +0000 Subject: [PATCH 029/223] Modified manual files --- INSTALLATION.md | 6 +- MANUAL.md | 71 +++++++++------------ VERSION.txt | 2 +- VariantValidator/simpleTestScript.py | 14 ++++ VariantValidator/testing/vvTestCompare.py | 2 +- VariantValidator/testing/vvTestFunctions.py | 25 -------- 6 files changed, 50 insertions(+), 70 deletions(-) create mode 100644 VariantValidator/simpleTestScript.py diff --git a/INSTALLATION.md b/INSTALLATION.md index 059a8de6..db6b3b46 100644 --- a/INSTALLATION.md +++ b/INSTALLATION.md @@ -121,4 +121,8 @@ After it finishes downloading, check it installed correctly: ## Configuration -See the file MANUAL.md for configuration instructions. +See the file MANUAL.md for configuration instructions - but before you attempt to configure anything, run + > python simpleTestScript.py +to check that validator's depedencies are installed correctly, and allow it to create a blank configuration file on your system. + + diff --git a/MANUAL.md b/MANUAL.md index 0327120e..dcedfc35 100644 --- a/MANUAL.md +++ b/MANUAL.md @@ -1,57 +1,51 @@ # Variant Validator Operation Manual ## Configuration +Variant Validator will create a configuration file for each user if it does not detect one, located in the folder + > ~/.config/VariantValidator/config.ini +This file, freshly created, will be missing the path to the SeqRepo directory which you should fill in after installation accordingly. If the configuration file hasn't been filled in correctly, the validator will exit immediately with an error. -Presently Variant Validator uses a combination of environment variables to configure itself. The configuration file is in /VariantValidator/configuration/config.ini and should be edited with the current user's details. Specifically, the section: +It's possible to use a remote seqrepo directory, at a cost of greatly reduced performane. + +The mysql database is configured in this section: > [mysql] > host = 127.0.0.1 > database = validator > user = vvadmin > password = var1ant -needs to be changed if the variant validator database login details are different. +Information here also needs to be changed if the variant validator database login details are different. + +The [uta] section also contains path information to the UTA archive, if it's installed. The section > [logging] -contains a single variable: - > string = error file console trace -which can be changed to alter the level of verbosity of the validator output. Alternatively you can set the environment variable VALIDATOR_DEBUG to a string of the same format. -The string should contain any of the following words: -* file - Writes the logging output to the "vvLog.txt" file. Without the word "file" in the environment variable, the logs will be posted instead to the console. -* debug - Logs all events, including debugging. -* trace - Used for diagnosis during development. -* info - Information events on the decisions the validator is making are logged. -* warning - Warnings indicate malformed variants. This is the default logging level. -* error - Variants that produce errors are nonsensical to the point where they cannot be validated. -* critical - Fatal errors that crash the validator are logged at this level. -During a test, this is set to maximum verbosity. +contains several headings which can be changed to alter the level of verbosity of the validator output. + +* file - If "True", writes the logging output to the "vvLog.txt" file in the current working directory. While useful for diagnostics, logging in this way has permissions issues and will fill up the hard disk of an automated installation quickly. +* level - Can be one of several values. All errors below the selected level of severity will not be logged. By default, and to help with setting things up, the info level statements will be logged, but you should change this to make the validator less talkative in normal use. +** debug - Logs all events, including debugging. +** info - Information events on the decisions the validator is making are logged. +** warning - Warnings indicate malformed variants. This is the default logging level. +** error - Variants that produce errors are nonsensical to the point where they cannot be validated. +** critical - Fatal errors that crash the validator are logged at this level. +* trace - Used for diagnosis during development. Can be set to 'True' if you need to profile the validator code. The validator itself will set environment variables to allow for the correct operation of HGVS software. ## Operation -Python scripts importing variant validator will have to set up a last few configuration variables before they can proceed. These variables must be set in such a way that they don't go out of scope - otherwise the validator won't work. - -This example script will validate the variant NM_000088.3:c.589G>T and then print the output as a json file. You might need to change it to point to the correct seqrepo directory. +Validating variants, provided the software is installed correctly, is as simple as: -> import json -> import os -> seqrepo_current_version = '2018-08-21' -> HGVS_SEQREPO_DIR = '~/seqrepo/' + seqrepo_current_version -> os.environ['HGVS_SEQREPO_DIR'] = HGVS_SEQREPO_DIR -> uta_current_version = 'uta_20180821' -> UTA_DB_URL = 'postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version -> os.environ['UTA_DB_URL'] = UTA_DB_URL -> from VariantValidator import variantValidator -> variantValidator.my_config() - -From this point onward, -> variant = 'NM_000088.3:c.589G>T' +> from VariantValidator import Validator +> +> validator = Validator() +> variant = 'NC_000012.11:g.122064776delG' > select_transcripts = 'all' > selected_assembly = 'GRCh37' -> validation = variantValidator.validator(variant, selected_assembly, select_transcripts) -> print json.dumps(validation, sort_keys=True, indent=4, separators=(',', ': ')) +> +> out=Validator().validate(variant, selected_assembly, select_transcripts) -Much of the script is currently reladed to setting up environment variables. In future versions, this information will be stored in a local configuration file. +The 'out' object is a simple dictionary containing the genetic information of the validated variant. The simpleTestScript.py will validate this variant and then print the output nicely as a json. The accepted formats for variants include: > NM_000088.3:c.589G>T @@ -70,17 +64,10 @@ Possible assemblies are: You can select all transcripts by passing 'all', or use multiple transcripts with: > select_transcripts = 'NM_022356.3| NM_001146289.1| NM_001243246.1' -View supported transcripts for a gene example: HGNC gene symbol https://www.genenames.org/ -> variantValidator.validator.gene2transcripts ('HTT') -RefSeq Transcript -> variantValidator.validator.gene2transcripts (' NM_002111.8') -Get reference sequence for HGVS variant description -> variantValidator.validator.hgvs2ref('NM_000088.3:c.589_594del') - ## Unit testing Variant Validator is written to be pytest-compatible. Run > pytest -in the variant validator root folder, the same as that in which this file resides. The test will take several minutes to complete, but runs through over three hundred common and malformed variants. - +in the variant validator testing folder, the same as that in which this file resides. The test will take several minutes to complete, but runs through over three hundred common and malformed variants. + diff --git a/VERSION.txt b/VERSION.txt index 08a623bd..abfc95d6 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -0.1.0_dev_pre_a \ No newline at end of file +0.9a diff --git a/VariantValidator/simpleTestScript.py b/VariantValidator/simpleTestScript.py new file mode 100644 index 00000000..a351b611 --- /dev/null +++ b/VariantValidator/simpleTestScript.py @@ -0,0 +1,14 @@ +import json +import os + +from VariantValidator import Validator + +#variant = 'NM_000088.3:c.589G>T' +variant = 'NC_000012.11:g.122064776delG' +select_transcripts = 'all' +selected_assembly = 'GRCh37' + +validator=Validator() +out=Validator().validate(variant, selected_assembly, select_transcripts) + +print json.dumps(out, sort_keys=True, indent=4, separators=(',', ': ')) diff --git a/VariantValidator/testing/vvTestCompare.py b/VariantValidator/testing/vvTestCompare.py index 1c17e2c7..8d013405 100644 --- a/VariantValidator/testing/vvTestCompare.py +++ b/VariantValidator/testing/vvTestCompare.py @@ -2,7 +2,7 @@ import vvTestFunctions as fn -masterDirectory="testOutputsMasterITS" +masterDirectory="testOutputsMasterITS " testDirectories=["testOutputs"] for d in testDirectories: diff --git a/VariantValidator/testing/vvTestFunctions.py b/VariantValidator/testing/vvTestFunctions.py index 25f4ea27..415dfe49 100644 --- a/VariantValidator/testing/vvTestFunctions.py +++ b/VariantValidator/testing/vvTestFunctions.py @@ -18,31 +18,6 @@ hl=logging.getLogger("hgvs.dataproviders.uta") hl.addHandler(logConsoleHandler) - -''' -try: - print("Configuring for personal linux") - seqrepo_current_version='2018-08-21' - HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version - os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR - uta_current_version='uta_20180821' - UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version - os.environ['UTA_DB_URL']=UTA_DB_URL - from VariantValidator import variantValidator as vv - vv.my_config() -except sqlite3.OperationalError: - print("Configuring for VM") - seqrepo_current_version = '2018-08-21' - HGVS_SEQREPO_DIR = '/Users/pjf9/variant_validator_data/seqrepo/' + seqrepo_current_version - os.environ['HGVS_SEQREPO_DIR'] = HGVS_SEQREPO_DIR - uta_current_version = 'uta_20180821' - UTA_DB_URL = 'postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version - os.environ['UTA_DB_URL'] = UTA_DB_URL - os.environ['PYLIFTOVER_DIR'] = '/Users/pjf9/variant_validator_data/pyLiftover/' - from VariantValidator import variantValidator as vv - -''' - def generateTestFolder(path, inputVariants, validator): #Saves the results of running inputVariants to a folder given in saveDirectory. if not os.path.isdir(path): From 2b912aa17eef6279f9611e4b60f66b975ff3195d Mon Sep 17 00:00:00 2001 From: buran Date: Thu, 24 Jan 2019 13:24:47 +0000 Subject: [PATCH 030/223] Converted database from subobjects to mixins, to try and remove circular references in an attempt to fix connection bug with pytest --- VariantValidator/modules/vvDBGet.py | 14 ++-- VariantValidator/modules/vvDBInit.py | 33 ++++++++++ VariantValidator/modules/vvDBInsert.py | 46 +++++++------ VariantValidator/modules/vvDatabase.py | 66 ++++++++----------- VariantValidator/modules/vvFunctions.py | 17 ++--- VariantValidator/modules/vvHGVS.py | 2 +- VariantValidator/modules/vvMixinConverters.py | 10 +-- VariantValidator/modules/vvMixinCore.py | 46 ++++++------- VariantValidator/modules/vvMixinInit.py | 1 + 9 files changed, 126 insertions(+), 109 deletions(-) create mode 100644 VariantValidator/modules/vvDBInit.py diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index b891610e..76b8c515 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -1,25 +1,23 @@ from vvFunctions import handleCursor from vvLogging import logger +import vvDBInit -class vvDBGet: +class Mixin(vvDBInit.Mixin): ''' Most of the functions in DBGet generate queries for retrieving data from the databases. ''' - def __init__(self,db): - # These are inherited by reference from the vvDatabase object. - self.db=db @handleCursor def execute(self,query): - self.db.cursor.execute(query) - row = self.db.cursor.fetchone() + self.cursor.execute(query) + row = self.cursor.fetchone() if row is None: logger.debug("No data returned from query "+str(query)) row = ['none', 'No data'] return row @handleCursor def executeAll(self,query): - self.db.cursor.execute(query) - rows = self.db.cursor.fetchall() + self.cursor.execute(query) + rows = self.cursor.fetchall() if rows==[]: logger.debug("No data returned from query "+str(query)) rows = ['none', 'No data'] diff --git a/VariantValidator/modules/vvDBInit.py b/VariantValidator/modules/vvDBInit.py new file mode 100644 index 00000000..3d2816df --- /dev/null +++ b/VariantValidator/modules/vvDBInit.py @@ -0,0 +1,33 @@ +import mysql.connector +from mysql.connector.pooling import MySQLConnectionPool +import os + +class Mixin(): + ''' + A mixin containing the database initialisation routines. + ''' + def __init__(self,val,dbConfig): + self.conn = None + # self.cursor will be none UNLESS you're wrapping a function in @handleCursor, which automatically opens and + # closes connections for you. + self.cursor=None + self.dbConfig=dbConfig + # Construct database URL + #'mysqlx://vvadmin:var1ant@127.0.0.1/validator' + self.path="mysqlx://"+dbConfig["user"]+":"+dbConfig["password"]+"@"+dbConfig["host"]+"/"+dbConfig["database"] + os.environ["VALIDATOR_DB_URL"]=self.path + self.val=val + self.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.dbConfig) + self.conn=self.pool.get_connection() + def __del__(self): + if self.conn: + self.conn.close() + self.conn=None + if self.pool: + self.pool.close() + self.pool=None + if self.cursor: + self.cursor.close() + self.cursor=None + if self.val: + self.val=None diff --git a/VariantValidator/modules/vvDBInsert.py b/VariantValidator/modules/vvDBInsert.py index c006bc00..59cac186 100644 --- a/VariantValidator/modules/vvDBInsert.py +++ b/VariantValidator/modules/vvDBInsert.py @@ -1,12 +1,10 @@ from vvFunctions import handleCursor +import vvDBGet -class vvDBInsert: +class Mixin(vvDBGet.Mixin): ''' This object is a function container for inserting objects into the database. ''' - def __init__(self,db): - # These are inherited by reference from the vvDatabase object. - self.db=db # Add new entry def add_entry(self,entry, data, table): return self.insert(entry, data, table) @@ -25,65 +23,65 @@ def insert(self,entry, data, table): hgnc_symbol = data[4] uta_symbol = data[5] query = "INSERT INTO transcript_info(refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated) VALUES(%s,%s, %s, %s, %s, %s, NOW())" - self.db.cursor.execute(query, (accession, description, variant, version, hgnc_symbol, uta_symbol)) + self.cursor.execute(query, (accession, description, variant, version, hgnc_symbol, uta_symbol)) # Query report - if self.db.cursor.lastrowid: + if self.cursor.lastrowid: success = 'true' else: success = 'Unknown error' - # Commit and close connection - self.db.conn.commit() + # Commit and close connection (?close?) + self.conn.commit() return success @handleCursor def insert_refSeqGene_data(self,rsg_data): query = "INSERT INTO refSeqGene_loci(refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, orientation, totalLength, chrPos, rsgPos, entrezID, hgncSymbol, updated) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())" - self.db.cursor.execute(query, (rsg_data[0], rsg_data[1], rsg_data[2], rsg_data[3], rsg_data[4], rsg_data[5], rsg_data[6], rsg_data[7], rsg_data[8], rsg_data[9], rsg_data[10])) + self.cursor.execute(query, (rsg_data[0], rsg_data[1], rsg_data[2], rsg_data[3], rsg_data[4], rsg_data[5], rsg_data[6], rsg_data[7], rsg_data[8], rsg_data[9], rsg_data[10])) # Query report - if self.db.cursor.lastrowid: + if self.cursor.lastrowid: success = 'true' else: success = 'Unknown error' # Commit and close connection - self.db.conn.commit() + self.conn.commit() return success @handleCursor def insert_RefSeqGeneID_from_lrgID(self,lrg_rs_lookup): query = "INSERT INTO LRG_RSG_lookup(lrgID, hgncSymbol, RefSeqGeneID, status) VALUES(%s,%s,%s,%s)" - self.db.cursor.execute(query, (lrg_rs_lookup[0], lrg_rs_lookup[1], lrg_rs_lookup[2], lrg_rs_lookup[3])) + self.cursor.execute(query, (lrg_rs_lookup[0], lrg_rs_lookup[1], lrg_rs_lookup[2], lrg_rs_lookup[3])) # Query report - if self.db.cursor.lastrowid: + if self.cursor.lastrowid: success = 'true' else: success = 'Unknown error' # Commit and close connection - self.db.conn.commit() + self.conn.commit() return success @handleCursor def insert_LRG_transcript_data(self,lrgtx_to_rstID): query = "INSERT INTO LRG_transcripts(LRGtranscriptID, RefSeqTranscriptID) VALUES(%s,%s)" - self.db.cursor.execute(query, (lrgtx_to_rstID[0], lrgtx_to_rstID[1])) + self.cursor.execute(query, (lrgtx_to_rstID[0], lrgtx_to_rstID[1])) # Query report - if self.db.cursor.lastrowid: + if self.cursor.lastrowid: success = 'true' else: success = 'Unknown error' # Commit and close connection - self.db.conn.commit() + self.conn.commit() return success @handleCursor def insert_LRG_protein_data(self,lrg_p, rs_p): query = "INSERT INTO LRG_proteins(LRGproteinID, RefSeqProteinID) VALUES(%s,%s)" - self.db.cursor.execute(query, (lrg_p, rs_p)) + self.cursor.execute(query, (lrg_p, rs_p)) # Query report - if self.db.cursor.lastrowid: + if self.cursor.lastrowid: success = 'true' else: success = 'Unknown error' # Commit and close connection - self.db.conn.commit() + self.conn.commit() return success # from dbupdate @handleCursor @@ -97,17 +95,17 @@ def update(self,entry, data, table): hgnc_symbol = data[4] uta_symbol = data[5] query = "UPDATE transcript_info SET description=%s, transcriptVariant=%s, currentVersion=%s, hgncSymbol=%s, utaSymbol=%s, updated=NOW() WHERE refSeqID = %s" - self.db.cursor.execute(query, (description, variant, version, hgnc_symbol, uta_symbol, accession)) + self.cursor.execute(query, (description, variant, version, hgnc_symbol, uta_symbol, accession)) success = 'true' - self.db.conn.commit() + self.conn.commit() return success # 'true'??? check this. @handleCursor def update_refSeqGene_data(self,rsg_data): query = "UPDATE refSeqGene_loci SET hgncSymbol=%s, updated=NOW() WHERE refSeqGeneID=%s" - self.db.cursor.execute(query, (rsg_data[10], rsg_data[0])) + self.cursor.execute(query, (rsg_data[10], rsg_data[0])) success = 'true' - self.db.conn.commit() + self.conn.commit() return success # Update entries def update_entry(self,entry, data, table): diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index b711b81f..f3b77f2e 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -1,42 +1,28 @@ -import mysql.connector -from mysql.connector.pooling import MySQLConnectionPool from vvLogging import logger import vvFunctions as fn from vvFunctions import handleCursor -from vvDBInsert import vvDBInsert -from vvDBGet import vvDBGet +#from vvDBInsert import vvDBInsert +#from vvDBGet import vvDBGet +import vvDBInsert import urllib2 import copy import re import os -class vvDatabase: +class vvDatabase(vvDBInsert.Mixin): ''' This class contains and handles the mysql connections for the variant validator database. + + It now uses mixins, and the order of inheritance is + vvDBInit.Mixin + v + vvDBGet.Mixin + v + vvDBInsert.Mixin + v + vvDatabase ''' - def __init__(self,val,dbConfig): - self.conn = None - # self.cursor will be none UNLESS you're wrapping a function in @handleCursor, which automatically opens and - # closes connections for you. - self.cursor=None - self.dbConfig=dbConfig - # Construct database URL - #'mysqlx://vvadmin:var1ant@127.0.0.1/validator' - self.path="mysqlx://"+dbConfig["user"]+":"+dbConfig["password"]+"@"+dbConfig["host"]+"/"+dbConfig["database"] - os.environ["VALIDATOR_DB_URL"]=self.path - self.val=val - self.insert = vvDBInsert(self) # contains dbinsert, dbupdate - self.get = vvDBGet(self) # contains dbfetchone, dbfetchall - self.db=self #needed to make handlecursor behave - self.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.dbConfig) - def __del__(self): - if self.conn: - self.conn.close() - if self.pool: - self.pool.close() - if self.cursor: - self.cursor.close() # from dbquery @handleCursor def query_with_fetchone(self,entry, table): @@ -156,34 +142,34 @@ def update_transcript_info_record(self,accession, hdp): returned_data = self.in_entries(version, table) # If the entry is not in the database add it if 'none' in returned_data: - self.insert.add_entry(version, query_info, table) + self.add_entry(version, query_info, table) # If the data in the entry has changed, update it else: - self.insert.update_entry(version, query_info, table) + self.update_entry(version, query_info, table) return def update_refSeqGene_loci(self,rsg_data): # First query the database - entry_exists = self.get.get_refSeqGene_data_by_refSeqGeneID(rsg_data[0], rsg_data[2]) + entry_exists = self.get_refSeqGene_data_by_refSeqGeneID(rsg_data[0], rsg_data[2]) if entry_exists[0] == 'none': - self.insert.insert_refSeqGene_data(rsg_data) + self.insert_refSeqGene_data(rsg_data) else: - self.insert.update_refSeqGene_data(rsg_data) + self.update_refSeqGene_data(rsg_data) def update_lrg_rs_lookup(self,lrg_rs_lookup): # First query the database - rsgID = self.get.get_RefSeqGeneID_from_lrgID(lrg_rs_lookup[0]) + rsgID = self.get_RefSeqGeneID_from_lrgID(lrg_rs_lookup[0]) if rsgID == 'none': - self.insert.insert_RefSeqGeneID_from_lrgID(lrg_rs_lookup) + self.insert_RefSeqGeneID_from_lrgID(lrg_rs_lookup) def update_lrgt_rst(self,lrgtx_to_rstID): # First query the database - rstID = self.get.get_RefSeqTranscriptID_from_lrgTranscriptID(lrgtx_to_rstID[0]) + rstID = self.get_RefSeqTranscriptID_from_lrgTranscriptID(lrgtx_to_rstID[0]) if rstID == 'none': - self.insert.insert_LRG_transcript_data(lrgtx_to_rstID) + self.insert_LRG_transcript_data(lrgtx_to_rstID) def update_lrg_p_rs_p_lookup(self,lrg_p, rs_p): # First query the database - rspID = self.get.get_RefSeqProteinID_from_lrgProteinID(lrg_p) + rspID = self.get_RefSeqProteinID_from_lrgProteinID(lrg_p) if rspID == 'none': - self.insert.insert_LRG_protein_data(lrg_p, rs_p) + self.insert_LRG_protein_data(lrg_p, rs_p) # From variantValidator.py def update_vv_data(self): # Update refSeqGene Primary assembly alignment data @@ -420,7 +406,7 @@ def update_rsg(self): # Set up code to write to database for line in to_mysql: - current_symbol = self.get.get_gene_symbol_from_refSeqGeneID(line[0]) + current_symbol = self.get_gene_symbol_from_refSeqGeneID(line[0]) if line[10] == current_symbol: pass else: @@ -546,7 +532,7 @@ def ref_type_assign(self,accession): ref_type = ':p.' elif re.match('LRG_', accession): if re.search('t', accession): - refseqtranscript_reference = self.get.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) + refseqtranscript_reference = self.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) if re.match('NM_', refseqtranscript_reference): ref_type = ':c.' else: diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index ce314778..acd0978e 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -11,6 +11,7 @@ import re import copy import mysql +import time #from urllib.parse import urlparse #Python 3 @@ -20,15 +21,15 @@ def handleCursor(func): ''' @functools.wraps(func) def wrapper(self,*args,**kwargs): -# if self.db.pool==None: -# self.db.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.db.dbConfig) - self.db.conn=self.db.pool.get_connection() - self.db.cursor = self.db.conn.cursor(buffered=True) +# if self.pool==None: +# self.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.dbConfig) +# self.conn=self.pool.get_connection() + self.cursor = self.conn.cursor(buffered=True) out=func(self,*args,**kwargs) - if self.db.cursor: - self.db.cursor.close() - if self.db.conn: - self.db.conn.close() + if self.cursor: + self.cursor.close() +# if self.conn: +# self.conn.close() #self.cursor=None return out return wrapper diff --git a/VariantValidator/modules/vvHGVS.py b/VariantValidator/modules/vvHGVS.py index 5e364439..2a279d5e 100644 --- a/VariantValidator/modules/vvHGVS.py +++ b/VariantValidator/modules/vvHGVS.py @@ -72,7 +72,7 @@ def pvcf_to_hgvs(input, selected_assembly, normalization_direction, reverse_norm # Assign reference sequence type ref_type = ':g.' if re.match('LRG_', accession): - accession = validator.db.get.get_RefSeqGeneID_from_lrgID(accession) + accession = validator.db.get_RefSeqGeneID_from_lrgID(accession) # Reformat the variant input = str(accession) + ref_type + str(positionAndEdit) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 0552380d..09e9f29e 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -200,7 +200,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): # Gap gene black list try: - gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(hgvs_c.ac) + gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_c.ac) except Exception: utilise_gap_code = False else: @@ -1092,7 +1092,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # Gap gene black list try: - gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(hgvs_c.ac) + gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_c.ac) except Exception: utilise_gap_code = False else: @@ -1630,7 +1630,7 @@ def hgvs_r_to_c(self, hgvs_object): """ # check for LRG_t with r. if re.match('LRG', hgvs_object.ac): - transcript_ac = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(hgvs_object.ac) + transcript_ac = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID(hgvs_object.ac) if transcript_ac == 'none': raise HGVSDataNotAvailableError('Unable to identify a relevant transcript for ' + hgvs_object.ac) else: @@ -2364,7 +2364,7 @@ def chr_to_rsg(self, hgvs_genomic, hn, vrOld): rsg_data_set = [] # Recover table from MySql - all_info = self.db.get.get_g_to_g_info() + all_info = self.db.get_g_to_g_info() for line in all_info: # Logic to identify the correct RefSeqGene rsg_data = {} @@ -2512,7 +2512,7 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): chr_data_set = [] # Recover table from MySql - all_info = self.db.get.get_g_to_g_info() + all_info = self.db.get_g_to_g_info() for line in all_info: # Logic to identify the correct RefSeqGene chr_data = {} diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 24c9ebc4..f662cbcd 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -86,7 +86,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for id in select_transcripts_list: id = id.strip() if re.match('LRG', id): - id = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(id) + id = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID(id) if id == 'none': continue select_transcripts_dict_plus_version[id] = '' @@ -396,9 +396,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ref_type = self.db.ref_type_assign(accession) if re.match('LRG_', accession): if ref_type == ':g.': - accession = self.db.get.get_RefSeqGeneID_from_lrgID(accession) + accession = self.db.get_RefSeqGeneID_from_lrgID(accession) else: - accession = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) + accession = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) else: accession = accession input = str(accession) + ref_type + str(positionAndEdit) @@ -443,7 +443,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pre_input = copy.deepcopy(input) input_list = input.split(':') query_a_symbol = input_list[0] - is_it_a_gene = self.db.get.get_hgnc_symbol(query_a_symbol) + is_it_a_gene = self.db.get_hgnc_symbol(query_a_symbol) if is_it_a_gene == 'none': pos_ref_alt = str(input_list[1]) positionAndEdit = input_list[1] @@ -485,9 +485,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pre_input = copy.deepcopy(input) query_a_symbol = pre_input.split(':')[0] tx_edit = pre_input.split(':')[1] - is_it_a_gene = self.db.get.get_hgnc_symbol(query_a_symbol) + is_it_a_gene = self.db.get_hgnc_symbol(query_a_symbol) if is_it_a_gene != 'none': - uta_symbol = self.db.get.get_uta_symbol(is_it_a_gene) + uta_symbol = self.db.get_uta_symbol(is_it_a_gene) available_transcripts = self.hdp.get_tx_for_gene(uta_symbol) select_from_these_transcripts = {} for tx in available_transcripts: @@ -539,9 +539,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.match('^NG_', input): refSeqGeneID = input.split(':')[0] tx_edit = input.split(':')[1] - gene_symbol = self.db.get.get_gene_symbol_from_refSeqGeneID(refSeqGeneID) + gene_symbol = self.db.get_gene_symbol_from_refSeqGeneID(refSeqGeneID) if gene_symbol != 'none': - uta_symbol = self.db.get.get_uta_symbol(gene_symbol) + uta_symbol = self.db.get_uta_symbol(gene_symbol) available_transcripts = self.hdp.get_tx_for_gene(uta_symbol) select_from_these_transcripts = {} for tx in available_transcripts: @@ -815,7 +815,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input) or re.match( '^LRG_\d+:n.', input): lrg_reference, variation = input.split(':') - refseqgene_reference = self.db.get.get_RefSeqGeneID_from_lrgID(lrg_reference) + refseqgene_reference = self.db.get_RefSeqGeneID_from_lrgID(lrg_reference) if refseqgene_reference != 'none': input = refseqgene_reference + ':' + variation if caution == '': @@ -827,7 +827,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif re.match('^LRG_\d+t\d+:c.', input) or re.match('^LRG_\d+t\d+:n.', input) or re.match( '^LRG_\d+t\d+:p.', input) or re.match('^LRG_\d+t\d+:g.', input): lrg_reference, variation = input.split(':') - refseqtranscript_reference = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID( + refseqtranscript_reference = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID( lrg_reference) if refseqtranscript_reference != 'none': input = refseqtranscript_reference + ':' + variation @@ -1086,7 +1086,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr str(input_parses)) or re.match( '^LRG_\d+:c.', str(input_parses)) or re.match('^LRG_\d+:n.', str(input_parses)): lrg_reference, variation = str(input_parses).split(':') - refseqgene_reference = self.db.get.get_RefSeqGeneID_from_lrgID(lrg_reference) + refseqgene_reference = self.db.get_RefSeqGeneID_from_lrgID(lrg_reference) if refseqgene_reference != 'none': input_parses.ac = refseqgene_reference variant = str(input_parses) @@ -1102,7 +1102,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr str(input_parses)) or re.match( '^LRG_\d+t\d+:p.', str(input_parses)) or re.match('^LRG_\d+t\d+:g.', str(input_parses)): lrg_reference, variation = str(input_parses).split(':') - refseqtranscript_reference = self.db.get.get_RefSeqTranscriptID_from_lrgTranscriptID( + refseqtranscript_reference = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID( lrg_reference) if refseqtranscript_reference != 'none': input_parses.ac = refseqtranscript_reference @@ -1433,7 +1433,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSDataNotAvailableError as e: tx_ac = input_parses.ac try: - gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(tx_ac) + gene_symbol = self.db.get_gene_symbol_from_transcriptID(tx_ac) except: gene_symbol = 'none' if gene_symbol == 'none': @@ -1668,7 +1668,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSDataNotAvailableError as e: tx_ac = input_parses.ac try: - gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(tx_ac) + gene_symbol = self.db.get_gene_symbol_from_transcriptID(tx_ac) except: gene_symbol = 'none' if gene_symbol == 'none': @@ -3179,7 +3179,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning(str(error)) continue try: - gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(tx_ac) + gene_symbol = self.db.get_gene_symbol_from_transcriptID(tx_ac) except: gene_symbol = 'none' if gene_symbol == 'none': @@ -3191,7 +3191,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue except TypeError as e: try: - gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(tx_ac) + gene_symbol = self.db.get_gene_symbol_from_transcriptID(tx_ac) except: gene_symbol = 'none' if gene_symbol == 'none': @@ -3931,7 +3931,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Gap gene black list try: - gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) except Exception: fn.exceptPass() else: @@ -6434,7 +6434,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_refseqgene_variant = 'false' else: hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) - rsg_ac = self.db.get.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) + rsg_ac = self.db.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) if rsg_ac[0] == 'none': lrg_variant = '' else: @@ -6462,7 +6462,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Handle LRG lrg_status = 'public' - lrg_transcript = self.db.get.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) + lrg_transcript = self.db.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) if lrg_transcript == 'none': lrg_transcript_variant = '' else: @@ -6525,14 +6525,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr predicted_protein_variant = valid['protein'] if re.match('NP_', predicted_protein_variant): rs_p, pred_prot_posedit = predicted_protein_variant.split(':') - lrg_p = self.db.get.get_lrgProteinID_from_RefSeqProteinID(rs_p) + lrg_p = self.db.get_lrgProteinID_from_RefSeqProteinID(rs_p) if re.match('LRG', lrg_p): predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit # Gene if transcript_accession != '': try: - gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(transcript_accession) + gene_symbol = self.db.get_gene_symbol_from_transcriptID(transcript_accession) except: gene_symbol = 'Unable to verify gene symbol for ' + str(transcript_accession) else: @@ -6557,7 +6557,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_coding = self.hp.parse_hgvs_variant(str(tx_variant)) # Gap gene black list try: - gene_symbol = self.db.get.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) except Exception: fn.exceptPass() else: @@ -8099,7 +8099,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr dict_out['reference_sequence_records'] = '' # Add links to reference_sequence_records - ref_records = self.db.get.get_urls(dict_out) + ref_records = self.db.get_urls(dict_out) if ref_records != {}: dict_out['reference_sequence_records'] = ref_records diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 3839c941..08e00ab6 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -104,6 +104,7 @@ def __init__(self): 'database':config["mysql"]["database"], 'raise_on_warnings': True } + #Create database access objects self.db=vvDatabase(self,self.dbConfig) # Set up versions __version__ = config["variantValidator"]['version'] From 5cbbae5447d8b8f21de7e779281bdb1af1689d62 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 15 Feb 2019 10:38:48 +0000 Subject: [PATCH 031/223] Added tests and made changes corresponding to alterations in master branch --- VariantValidator/modules/vvHGVS.py | 6 +- VariantValidator/modules/vvMixinConverters.py | 9 +- VariantValidator/modules/vvMixinCore.py | 21 +- VariantValidator/testing/test_vv.py | 5 +- test/test_inputs.py | 18956 ++++++++++++++++ 5 files changed, 18986 insertions(+), 11 deletions(-) create mode 100644 test/test_inputs.py diff --git a/VariantValidator/modules/vvHGVS.py b/VariantValidator/modules/vvHGVS.py index 2a279d5e..a978e39f 100644 --- a/VariantValidator/modules/vvHGVS.py +++ b/VariantValidator/modules/vvHGVS.py @@ -495,9 +495,9 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): dup_seq = reverse_normalized_hgvs_genomic.posedit.edit.ref vcf_ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) # Assemble - pos = str(start + 1) - ref = vcf_ref_seq[1:] - alt = vcf_ref_seq[1:] + dup_seq + pos = str(start) + ref = vcf_ref_seq[0] + alt = vcf_ref_seq else: chr = '' ref = '' diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 09e9f29e..7c91eaea 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -2050,7 +2050,7 @@ def merge_hgvs_3pr(self, hgvs_variant_list,hn): # Generate the alt sequence alt_sequence = '' for hgvs_v in full_list: - ref_alt = vvHGVS.hgvs_ref_alt(hgvs_v) + ref_alt = vvHGVS.hgvs_ref_alt(hgvs_v, self.sf) alt_sequence = alt_sequence + ref_alt['alt'] # Fetch the reference sequence and copy it for the basis of the alt sequence @@ -2159,7 +2159,7 @@ def merge_hgvs_5pr(self, hgvs_variant_list): # Generate the alt sequence alt_sequence = '' for hgvs_v in full_list: - ref_alt = vvHGVS.hgvs_ref_alt(hgvs_v) + ref_alt = vvHGVS.hgvs_ref_alt(hgvs_v, self.sf) alt_sequence = alt_sequence + ref_alt['alt'] # Fetch the reference sequence and copy it for the basis of the alt sequence @@ -2277,6 +2277,8 @@ def hgvs_alleles(self, variant_description,hn): merge = [] allele = str(self.merge_hgvs_3pr(each_allele,hn)) merge.append(allele) + for variant in each_allele: + merged_alleles.append([variant]) merged_alleles.append(merge) my_alleles = merged_alleles @@ -2320,13 +2322,14 @@ def hgvs_alleles(self, variant_description,hn): merged_alleles = [] for each_allele in my_alleles: - print each_allele if re.search('\?', str(each_allele)): # NM_004006.2:c.[2376G>C];[?] continue merge = [] allele = str(self.merge_hgvs_3pr(each_allele,hn)) merge.append(allele) + for variant in each_allele: + merged_alleles.append([variant]) merged_alleles.append(merge) my_alleles = merged_alleles diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index f662cbcd..3974a7a8 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -936,7 +936,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSError as e: error = str(e) if error == 'false': - input_parses.ac = input_parses.ac.upper() + if 'LRG' in input_parses.ac: + input_parses.ac.replace('T', 't') + else: + input_parses.ac = input_parses.ac.upper() if hasattr(input_parses.posedit.edit, 'alt'): if input_parses.posedit.edit.alt is not None: input_parses.posedit.edit.alt = input_parses.posedit.edit.alt.upper() @@ -6131,8 +6134,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr recovered_rsg.sort() recovered_rsg.reverse() - if 'NG_' in recovered_rsg: - refseqgene_ac = recovered_rsg + if len(recovered_rsg) > 0 and 'NG_' in recovered_rsg[0]: + refseqgene_ac = recovered_rsg[0] else: refseqgene_ac = '' @@ -8123,12 +8126,22 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if set_output_type_flag == 'gene': validation_output['flag'] = 'gene_variant' validation_error_counter = 0 + validation_obsolete_counter = 0 for valid_v in batch_out: if valid_v['validation_warnings'] == ['Validation error']: validation_error_counter = validation_error_counter + 1 identification_key = 'Validation_Error_%s' % (str(validation_error_counter)) else: - identification_key = '%s' % (str(valid_v['hgvs_transcript_variant'])) + obs_obs = False + for ob_rec in valid_v['validation_warnings']: + if 'obsolete' in ob_rec: + validation_obsolete_counter = validation_obsolete_counter + 1 + obs_obs = True + break + if obs_obs is True: + identification_key = 'obsolete_record_%s' % (str(validation_obsolete_counter)) + else: + identification_key = '%s' % (str(valid_v['hgvs_transcript_variant'])) # if identification_key not in validation_output.keys(): validation_output[identification_key] = valid_v diff --git a/VariantValidator/testing/test_vv.py b/VariantValidator/testing/test_vv.py index b63b2b32..4032f77b 100644 --- a/VariantValidator/testing/test_vv.py +++ b/VariantValidator/testing/test_vv.py @@ -4,7 +4,7 @@ import vvTestFunctions as fn from VariantValidator import Validator -inputVariants=fn.loadVariantFile("inputVariants.txt") +inputVariants=fn.loadVariantFile(os.path.join(os.path.dirname(__file__), "inputVariants.txt")) ''' print("Configuring for personal linux") @@ -30,10 +30,12 @@ def constructValidation(request): del val return out +@pytest.mark.skip(reason="old test") def test_validation_output(constructValidation): v=constructValidation assert v!=None +@pytest.mark.skip(reason="old test") def test_validation_errors(constructValidation): v=constructValidation logs=v["metadata"]["logs"].split("\n") @@ -43,6 +45,7 @@ def test_validation_errors(constructValidation): e+=1 assert e==0 +@pytest.mark.skip(reason="old test") def test_validation_criticals(constructValidation): v=constructValidation logs=v["metadata"]["logs"].split("\n") diff --git a/test/test_inputs.py b/test/test_inputs.py new file mode 100644 index 00000000..64b187de --- /dev/null +++ b/test/test_inputs.py @@ -0,0 +1,18956 @@ +from VariantValidator import Validator + +class TestVariants(object): + + @classmethod + def setup_class(cls): + cls.vv = Validator() + + def test_variant1(self): + variant = 'NM_015120.4:c.35T>C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_015120.4:c.35T>C' in results.keys() + assert results['NM_015120.4:c.35T>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.35T>C' + assert results['NM_015120.4:c.35T>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.35T>C']['alt_genomic_loci'] == [] + assert results['NM_015120.4:c.35T>C']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + assert results['NM_015120.4:c.35T>C']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.35T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12Pro)', 'slr': 'NP_055935.4:p.(L12P)'} + assert results['NM_015120.4:c.35T>C']['submitted_variant'] == 'NM_015120.4:c.35T>C' + assert results['NM_015120.4:c.35T>C']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.35T>C']['hgvs_lrg_variant'] == 'LRG_741:g.5146T>C' + assert results['NM_015120.4:c.35T>C']['hgvs_transcript_variant'] == 'NM_015120.4:c.35T>C' + assert results['NM_015120.4:c.35T>C']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.5146T>C' + assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73613031', 'alt': 'CGGA'}} + assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73385903', 'alt': 'CGGA'}} + assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73613031', 'alt': 'CGGA'}} + assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73385903', 'alt': 'CGGA'}} + assert results['NM_015120.4:c.35T>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant2(self): + variant = 'NM_015120.4:c.39G>C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_015120.4:c.39G>C' in results.keys() + assert results['NM_015120.4:c.39G>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.39G>C' + assert results['NM_015120.4:c.39G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.39G>C']['alt_genomic_loci'] == [] + assert results['NM_015120.4:c.39G>C']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + assert results['NM_015120.4:c.39G>C']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.39G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Glu13Asp)', 'slr': 'NP_055935.4:p.(E13D)'} + assert results['NM_015120.4:c.39G>C']['submitted_variant'] == 'NM_015120.4:c.39G>C' + assert results['NM_015120.4:c.39G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.39G>C']['hgvs_lrg_variant'] == 'LRG_741:g.5150G>C' + assert results['NM_015120.4:c.39G>C']['hgvs_transcript_variant'] == 'NM_015120.4:c.39G>C' + assert results['NM_015120.4:c.39G>C']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.5150G>C' + assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '73613032', 'alt': 'GGAC'}} + assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '73385904', 'alt': 'GGAC'}} + assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '73613032', 'alt': 'GGAC'}} + assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '73385904', 'alt': 'GGAC'}} + assert results['NM_015120.4:c.39G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} + + + def test_variant3(self): + variant = 'NM_015120.4:c.34C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_015120.4:c.34C>T' in results.keys() + assert results['NM_015120.4:c.34C>T']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.34C>T' + assert results['NM_015120.4:c.34C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.34C>T']['alt_genomic_loci'] == [] + assert results['NM_015120.4:c.34C>T']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + assert results['NM_015120.4:c.34C>T']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.34C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12=)', 'slr': 'NP_055935.4:p.(L12=)'} + assert results['NM_015120.4:c.34C>T']['submitted_variant'] == 'NM_015120.4:c.34C>T' + assert results['NM_015120.4:c.34C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.34C>T']['hgvs_lrg_variant'] == 'LRG_741:g.5145C>T' + assert results['NM_015120.4:c.34C>T']['hgvs_transcript_variant'] == 'NM_015120.4:c.34C>T' + assert results['NM_015120.4:c.34C>T']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.5145C>T' + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '73613030', 'alt': u'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '73385902', 'alt': u'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '73613030', 'alt': u'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '73385902', 'alt': u'T'}} + assert results['NM_015120.4:c.34C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} + + + def test_variant4(self): + variant = 'NC_000002.11:g.73613030C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_015120.4:c.34C>T' in results.keys() + assert results['NM_015120.4:c.34C>T']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.34C>T' + assert results['NM_015120.4:c.34C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.34C>T']['alt_genomic_loci'] == [] + assert results['NM_015120.4:c.34C>T']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + assert results['NM_015120.4:c.34C>T']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.34C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12=)', 'slr': 'NP_055935.4:p.(L12=)'} + assert results['NM_015120.4:c.34C>T']['submitted_variant'] == 'NC_000002.11:g.73613030C>T' + assert results['NM_015120.4:c.34C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.34C>T']['hgvs_lrg_variant'] == 'LRG_741:g.5145C>T' + assert results['NM_015120.4:c.34C>T']['hgvs_transcript_variant'] == 'NM_015120.4:c.34C>T' + assert results['NM_015120.4:c.34C>T']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.5145C>T' + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '73613030', 'alt': u'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '73385902', 'alt': u'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '73613030', 'alt': u'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '73385902', 'alt': u'T'}} + assert results['NM_015120.4:c.34C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} + + + def test_variant5(self): + variant = 'NC_000023.10:g.33229673A>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000109.3:c.7+127703T>A' in results.keys() + assert results['NM_000109.3:c.7+127703T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000109.3:c.7+127703T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000109.3:c.7+127703T>A']['alt_genomic_loci'] == [] + assert results['NM_000109.3:c.7+127703T>A']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427c, mRNA' + assert results['NM_000109.3:c.7+127703T>A']['gene_symbol'] == 'DMD' + assert results['NM_000109.3:c.7+127703T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000100.2:p.?', 'slr': 'NP_000100.2:p.?'} + assert results['NM_000109.3:c.7+127703T>A']['submitted_variant'] == 'NC_000023.10:g.33229673A>T' + assert results['NM_000109.3:c.7+127703T>A']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_000109.3):c.7+127703T>A' + assert results['NM_000109.3:c.7+127703T>A']['hgvs_lrg_variant'] == '' + assert results['NM_000109.3:c.7+127703T>A']['hgvs_transcript_variant'] == 'NM_000109.3:c.7+127703T>A' + assert results['NM_000109.3:c.7+127703T>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '33229673', 'alt': u'T'}} + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '33211556', 'alt': u'T'}} + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '33229673', 'alt': u'T'}} + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '33211556', 'alt': u'T'}} + assert results['NM_000109.3:c.7+127703T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000100.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000109.3'} + + assert 'NM_004006.2:c.-244T>A' in results.keys() + assert results['NM_004006.2:c.-244T>A']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.-244T>A' + assert results['NM_004006.2:c.-244T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.-244T>A']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.-244T>A']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.-244T>A']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.-244T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.?', 'slr': 'NP_003997.1:p.?'} + assert results['NM_004006.2:c.-244T>A']['submitted_variant'] == 'NC_000023.10:g.33229673A>T' + assert results['NM_004006.2:c.-244T>A']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.-244T>A']['hgvs_lrg_variant'] == 'LRG_199:g.133054T>A' + assert results['NM_004006.2:c.-244T>A']['hgvs_transcript_variant'] == 'NM_004006.2:c.-244T>A' + assert results['NM_004006.2:c.-244T>A']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.133054T>A' + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '33229673', 'alt': u'T'}} + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '33211556', 'alt': u'T'}} + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '33229673', 'alt': u'T'}} + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '33211556', 'alt': u'T'}} + assert results['NM_004006.2:c.-244T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + + def test_variant6(self): + variant = 'NM_001145026.1:c.715A>G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001145026.1:c.715A>G' in results.keys() + assert results['NM_001145026.1:c.715A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001145026.1:c.715A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001145026.1:c.715A>G']['alt_genomic_loci'] == [] + assert results['NM_001145026.1:c.715A>G']['transcript_description'] == 'Homo sapiens protein tyrosine phosphatase, receptor type Q (PTPRQ), mRNA' + assert results['NM_001145026.1:c.715A>G']['gene_symbol'] == 'PTPRQ' + assert results['NM_001145026.1:c.715A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001138498.1:p.(Arg239Gly)', 'slr': 'NP_001138498.1:p.(R239G)'} + assert results['NM_001145026.1:c.715A>G']['submitted_variant'] == 'NM_001145026.1:c.715A>G' + assert results['NM_001145026.1:c.715A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001145026.1:c.715A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001145026.1:c.715A>G']['hgvs_transcript_variant'] == 'NM_001145026.1:c.715A>G' + assert results['NM_001145026.1:c.715A>G']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['NM_001145026.1:c.715A>G']['primary_assembly_loci'].keys() + assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': 'chr12', 'ref': u'A', 'pos': '80460707', 'alt': u'G'}} + assert 'grch37' not in results['NM_001145026.1:c.715A>G']['primary_assembly_loci'].keys() + assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': '12', 'ref': u'A', 'pos': '80460707', 'alt': u'G'}} + assert results['NM_001145026.1:c.715A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001138498.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001145026.1'} + + + def test_variant7(self): + variant = 'NC_000016.9:g.2099572TC>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000548.4:c.138+821del' in results.keys() + assert results['NM_000548.4:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000548.4:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000548.4:c.138+821del']['alt_genomic_loci'] == [] + assert results['NM_000548.4:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA' + assert results['NM_000548.4:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_000548.4:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.?', 'slr': 'NP_000539.2:p.?'} + assert results['NM_000548.4:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_000548.4:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_000548.4):c.138+821del' + assert results['NM_000548.4:c.138+821del']['hgvs_lrg_variant'] == '' + assert results['NM_000548.4:c.138+821del']['hgvs_transcript_variant'] == 'NM_000548.4:c.138+821del' + assert results['NM_000548.4:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_000548.4:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4'} + + assert 'NM_001077183.2:c.138+821del' in results.keys() + assert results['NM_001077183.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077183.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001077183.2:c.138+821del']['alt_genomic_loci'] == [] + assert results['NM_001077183.2:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA' + assert results['NM_001077183.2:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001077183.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.?', 'slr': 'NP_001070651.1:p.?'} + assert results['NM_001077183.2:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001077183.2:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001077183.2):c.138+821del' + assert results['NM_001077183.2:c.138+821del']['hgvs_lrg_variant'] == '' + assert results['NM_001077183.2:c.138+821del']['hgvs_transcript_variant'] == 'NM_001077183.2:c.138+821del' + assert results['NM_001077183.2:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001077183.2:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2'} + + assert 'NM_001318831.1:c.-89+821del' in results.keys() + assert results['NM_001318831.1:c.-89+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318831.1:c.-89+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318831.1:c.-89+821del']['alt_genomic_loci'] == [] + assert results['NM_001318831.1:c.-89+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA' + assert results['NM_001318831.1:c.-89+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001318831.1:c.-89+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305760.1:p.?', 'slr': 'NP_001305760.1:p.?'} + assert results['NM_001318831.1:c.-89+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001318831.1:c.-89+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318831.1):c.-89+821del' + assert results['NM_001318831.1:c.-89+821del']['hgvs_lrg_variant'] == '' + assert results['NM_001318831.1:c.-89+821del']['hgvs_transcript_variant'] == 'NM_001318831.1:c.-89+821del' + assert results['NM_001318831.1:c.-89+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001318831.1:c.-89+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305760.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318831.1'} + + assert 'NM_000548.3:c.138+821del' in results.keys() + assert results['NM_000548.3:c.138+821del']['hgvs_lrg_transcript_variant'] == 'LRG_487t1:c.138+821del' + assert results['NM_000548.3:c.138+821del']['refseqgene_context_intronic_sequence'] == 'NG_005895.1(NM_000548.3):c.138+821del' + assert results['NM_000548.3:c.138+821del']['alt_genomic_loci'] == [] + assert results['NM_000548.3:c.138+821del']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 1, mRNA' + assert results['NM_000548.3:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_000548.3:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.?', 'slr': 'NP_000539.2:p.?'} + assert results['NM_000548.3:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_000548.3:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_000548.3):c.138+821del' + assert results['NM_000548.3:c.138+821del']['hgvs_lrg_variant'] == 'LRG_487:g.5269del' + assert results['NM_000548.3:c.138+821del']['hgvs_transcript_variant'] == 'NM_000548.3:c.138+821del' + assert results['NM_000548.3:c.138+821del']['hgvs_refseqgene_variant'] == 'NG_005895.1:g.5269del' + assert results['NM_000548.3:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert 'hg38' not in results['NM_000548.3:c.138+821del']['primary_assembly_loci'].keys() + assert results['NM_000548.3:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert 'grch38' not in results['NM_000548.3:c.138+821del']['primary_assembly_loci'].keys() + assert results['NM_000548.3:c.138+821del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005895.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_487.xml'} + + assert 'NM_001114382.1:c.138+821del' in results.keys() + assert results['NM_001114382.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001114382.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001114382.1:c.138+821del']['alt_genomic_loci'] == [] + assert results['NM_001114382.1:c.138+821del']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 5, mRNA' + assert results['NM_001114382.1:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001114382.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.?', 'slr': 'NP_001107854.1:p.?'} + assert results['NM_001114382.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001114382.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001114382.1):c.138+821del' + assert results['NM_001114382.1:c.138+821del']['hgvs_lrg_variant'] == '' + assert results['NM_001114382.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001114382.1:c.138+821del' + assert results['NM_001114382.1:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001114382.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert 'hg38' not in results['NM_001114382.1:c.138+821del']['primary_assembly_loci'].keys() + assert results['NM_001114382.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert 'grch38' not in results['NM_001114382.1:c.138+821del']['primary_assembly_loci'].keys() + assert results['NM_001114382.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1'} + + assert 'NM_001318832.1:c.171+821del' in results.keys() + assert results['NM_001318832.1:c.171+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318832.1:c.171+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318832.1:c.171+821del']['alt_genomic_loci'] == [] + assert results['NM_001318832.1:c.171+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA' + assert results['NM_001318832.1:c.171+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001318832.1:c.171+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305761.1:p.?', 'slr': 'NP_001305761.1:p.?'} + assert results['NM_001318832.1:c.171+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001318832.1:c.171+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318832.1):c.171+821del' + assert results['NM_001318832.1:c.171+821del']['hgvs_lrg_variant'] == '' + assert results['NM_001318832.1:c.171+821del']['hgvs_transcript_variant'] == 'NM_001318832.1:c.171+821del' + assert results['NM_001318832.1:c.171+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001318832.1:c.171+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1'} + + assert 'NM_001363528.1:c.138+821del' in results.keys() + assert results['NM_001363528.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363528.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363528.1:c.138+821del']['alt_genomic_loci'] == [] + assert results['NM_001363528.1:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 10, mRNA' + assert results['NM_001363528.1:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001363528.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350457.1:p.?', 'slr': 'NP_001350457.1:p.?'} + assert results['NM_001363528.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001363528.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363528.1):c.138+821del' + assert results['NM_001363528.1:c.138+821del']['hgvs_lrg_variant'] == '' + assert results['NM_001363528.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001363528.1:c.138+821del' + assert results['NM_001363528.1:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363528.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert 'hg38' not in results['NM_001363528.1:c.138+821del']['primary_assembly_loci'].keys() + assert results['NM_001363528.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert 'grch38' not in results['NM_001363528.1:c.138+821del']['primary_assembly_loci'].keys() + assert results['NM_001363528.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_021055.2:c.138+821del' in results.keys() + assert results['NM_021055.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021055.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021055.2:c.138+821del']['alt_genomic_loci'] == [] + assert results['NM_021055.2:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 2, mRNA' + assert results['NM_021055.2:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_021055.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066399.2:p.?', 'slr': 'NP_066399.2:p.?'} + assert results['NM_021055.2:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_021055.2:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_021055.2):c.138+821del' + assert results['NM_021055.2:c.138+821del']['hgvs_lrg_variant'] == '' + assert results['NM_021055.2:c.138+821del']['hgvs_transcript_variant'] == 'NM_021055.2:c.138+821del' + assert results['NM_021055.2:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_021055.2:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert 'hg38' not in results['NM_021055.2:c.138+821del']['primary_assembly_loci'].keys() + assert results['NM_021055.2:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert 'grch38' not in results['NM_021055.2:c.138+821del']['primary_assembly_loci'].keys() + assert results['NM_021055.2:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2'} + + assert 'NM_001077183.1:c.138+821del' in results.keys() + assert results['NM_001077183.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077183.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001077183.1:c.138+821del']['alt_genomic_loci'] == [] + assert results['NM_001077183.1:c.138+821del']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 4, mRNA' + assert results['NM_001077183.1:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001077183.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.?', 'slr': 'NP_001070651.1:p.?'} + assert results['NM_001077183.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001077183.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001077183.1):c.138+821del' + assert results['NM_001077183.1:c.138+821del']['hgvs_lrg_variant'] == '' + assert results['NM_001077183.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001077183.1:c.138+821del' + assert results['NM_001077183.1:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001077183.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert 'hg38' not in results['NM_001077183.1:c.138+821del']['primary_assembly_loci'].keys() + assert results['NM_001077183.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert 'grch38' not in results['NM_001077183.1:c.138+821del']['primary_assembly_loci'].keys() + assert results['NM_001077183.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1'} + + assert 'NM_001318827.1:c.138+821del' in results.keys() + assert results['NM_001318827.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318827.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318827.1:c.138+821del']['alt_genomic_loci'] == [] + assert results['NM_001318827.1:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA' + assert results['NM_001318827.1:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001318827.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305756.1:p.?', 'slr': 'NP_001305756.1:p.?'} + assert results['NM_001318827.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001318827.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318827.1):c.138+821del' + assert results['NM_001318827.1:c.138+821del']['hgvs_lrg_variant'] == '' + assert results['NM_001318827.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001318827.1:c.138+821del' + assert results['NM_001318827.1:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001318827.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1'} + + assert 'NM_001114382.2:c.138+821del' in results.keys() + assert results['NM_001114382.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001114382.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001114382.2:c.138+821del']['alt_genomic_loci'] == [] + assert results['NM_001114382.2:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA' + assert results['NM_001114382.2:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001114382.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.?', 'slr': 'NP_001107854.1:p.?'} + assert results['NM_001114382.2:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001114382.2:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001114382.2):c.138+821del' + assert results['NM_001114382.2:c.138+821del']['hgvs_lrg_variant'] == '' + assert results['NM_001114382.2:c.138+821del']['hgvs_transcript_variant'] == 'NM_001114382.2:c.138+821del' + assert results['NM_001114382.2:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001114382.2:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2'} + + assert 'NM_001318829.1:c.-9-826del' in results.keys() + assert results['NM_001318829.1:c.-9-826del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318829.1:c.-9-826del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318829.1:c.-9-826del']['alt_genomic_loci'] == [] + assert results['NM_001318829.1:c.-9-826del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA' + assert results['NM_001318829.1:c.-9-826del']['gene_symbol'] == 'TSC2' + assert results['NM_001318829.1:c.-9-826del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305758.1:p.?', 'slr': 'NP_001305758.1:p.?'} + assert results['NM_001318829.1:c.-9-826del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001318829.1:c.-9-826del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318829.1):c.-9-826del' + assert results['NM_001318829.1:c.-9-826del']['hgvs_lrg_variant'] == '' + assert results['NM_001318829.1:c.-9-826del']['hgvs_transcript_variant'] == 'NM_001318829.1:c.-9-826del' + assert results['NM_001318829.1:c.-9-826del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} + assert results['NM_001318829.1:c.-9-826del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305758.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318829.1'} + + + def test_variant8(self): + variant = 'NM_000088.3:c.589GG>CT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589_590delinsCT' in results.keys() + assert results['NM_000088.3:c.589_590delinsCT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589_590delinsCT' + assert results['NM_000088.3:c.589_590delinsCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589_590delinsCT']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589_590delinsCT']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589_590delinsCT']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589_590delinsCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Leu)', 'slr': 'NP_000079.2:p.(G197L)'} + assert results['NM_000088.3:c.589_590delinsCT']['submitted_variant'] == 'NM_000088.3:c.589GG>CT' + assert results['NM_000088.3:c.589_590delinsCT']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589_590delinsCT']['hgvs_lrg_variant'] == 'LRG_1:g.8638_8639delinsCT' + assert results['NM_000088.3:c.589_590delinsCT']['hgvs_transcript_variant'] == 'NM_000088.3:c.589_590delinsCT' + assert results['NM_000088.3:c.589_590delinsCT']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638_8639delinsCT' + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275362_48275363delinsAG', 'vcf': {'chr': 'chr17', 'ref': 'CC', 'pos': '48275362', 'alt': u'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198001_50198002delinsAG', 'vcf': {'chr': 'chr17', 'ref': 'CC', 'pos': '50198001', 'alt': u'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275362_48275363delinsAG', 'vcf': {'chr': '17', 'ref': 'CC', 'pos': '48275362', 'alt': u'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198001_50198002delinsAG', 'vcf': {'chr': '17', 'ref': 'CC', 'pos': '50198001', 'alt': u'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant9(self): + variant = 'NM_000094.3:c.6751-2_6751-3del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000094.3:c.6751-2_6751-3del' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant10(self): + variant = 'COL5A1:c.5071A>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'COL5A1:c.5071A>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant11(self): + variant = 'NG_007400.1:c.5071A>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NG_007400.1:c.5071A>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant12(self): + variant = 'chr16:15832508_15832509delinsAC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_002474.2:c.3034_3035inv' in results.keys() + assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] + assert results['NM_002474.2:c.3034_3035inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA' + assert results['NM_002474.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' + assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1:p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} + assert results['NM_002474.2:c.3034_3035inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' + assert results['NM_002474.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' + assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + assert results['NM_002474.2:c.3034_3035inv']['hgvs_transcript_variant'] == 'NM_002474.2:c.3034_3035inv' + assert results['NM_002474.2:c.3034_3035inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2'} + + assert 'NM_022844.2:c.3034_3035inv' in results.keys() + assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_022844.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] + assert results['NM_022844.2:c.3034_3035inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA' + assert results['NM_022844.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' + assert results['NM_022844.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_074035.1:p.(Thr1012Val)', 'slr': 'NP_074035.1:p.(T1012V)'} + assert results['NM_022844.2:c.3034_3035inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' + assert results['NM_022844.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' + assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + assert results['NM_022844.2:c.3034_3035inv']['hgvs_transcript_variant'] == 'NM_022844.2:c.3034_3035inv' + assert results['NM_022844.2:c.3034_3035inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2'} + + assert 'NM_001040114.1:c.3055_3056inv' in results.keys() + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] + assert results['NM_001040114.1:c.3055_3056inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA' + assert results['NM_001040114.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035203.1:p.(Thr1019Val)', 'slr': 'NP_001035203.1:p.(T1019V)'} + assert results['NM_001040114.1:c.3055_3056inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' + assert results['NM_001040114.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_variant'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040114.1:c.3055_3056inv' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001040113.1:c.3055_3056inv' in results.keys() + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] + assert results['NM_001040113.1:c.3055_3056inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA' + assert results['NM_001040113.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1:p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} + assert results['NM_001040113.1:c.3055_3056inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' + assert results['NM_001040113.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_variant'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040113.1:c.3055_3056inv' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == 'NG_009299.1:g.123379_123380inv' + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009299.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1'} + + + def test_variant13(self): + variant = 'NM_000088.3:c.589-1GG>G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589-1_589delinsG' in results.keys() + assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590del' + assert results['NM_000088.3:c.589-1_589delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.590del' + assert results['NM_000088.3:c.589-1_589delinsG']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589-1_589delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589-1_589delinsG']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-1_589delinsG']['submitted_variant'] == 'NM_000088.3:c.589-1GG>G' + assert results['NM_000088.3:c.589-1_589delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-1_589delinsG' + assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8639del' + assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-1_589delinsG' + assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8639del' + assert results['NM_000088.3:c.589-1_589delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363_48275364delinsC', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '48275361', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1_589delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002_50198003delinsC', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '50198000', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1_589delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363_48275364delinsC', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '48275361', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1_589delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002_50198003delinsC', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '50198000', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1_589delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant14(self): + variant = 'NM_000088.3:c.642+1GT>G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.642+1_642+2delinsG' in results.keys() + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.642+1_642+2delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.642+1_642+2delinsG']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.642+1_642+2delinsG']['submitted_variant'] == 'NM_000088.3:c.642+1GT>G' + assert results['NM_000088.3:c.642+1_642+2delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+1_642+2delinsG' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant15(self): + variant = 'NM_000088.3:c.589-2AG>G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589-2_589-1delinsG' in results.keys() + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589-2_589-1delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589-2_589-1delinsG']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-2_589-1delinsG']['submitted_variant'] == 'NM_000088.3:c.589-2AG>G' + assert results['NM_000088.3:c.589-2_589-1delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2_589-1delinsG' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant16(self): + variant = 'NC_000017.10:g.48279242G>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'intergenic' + assert 'Intergenic_Variant_1' in results.keys() + assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' + assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' + assert results['Intergenic_Variant_1']['alt_genomic_loci'] == [] + assert results['Intergenic_Variant_1']['transcript_description'] == '' + assert results['Intergenic_Variant_1']['gene_symbol'] == '' + assert results['Intergenic_Variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['Intergenic_Variant_1']['submitted_variant'] == 'NC_000017.10:g.48279242G>T' + assert results['Intergenic_Variant_1']['genome_context_intronic_sequence'] == '' + assert results['Intergenic_Variant_1']['hgvs_lrg_variant'] == 'LRG_1:g.4759C>A' + assert results['Intergenic_Variant_1']['hgvs_transcript_variant'] == '' + assert results['Intergenic_Variant_1']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.4759C>A' + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': u'NC_000017.10:g.48279242G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': u'NC_000017.11:g.50201881G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': u'NC_000017.10:g.48279242G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': u'NC_000017.11:g.50201881G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} + assert results['Intergenic_Variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant17(self): + variant = 'NM_000500.7:c.-107-19C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000500.7:c.-107-19C>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant18(self): + variant = 'NM_000518.4:c.-130C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-130C>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant19(self): + variant = 'NM_000518.4:c.-50-80C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-50-80C>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant20(self): + variant = 'NM_000518.4:c.316_*342delinsCTACTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.316_*342delinsCTACTT' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant21(self): + variant = 'NM_000518.4:c.316_*100del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000518.4:c.316_*100del' in results.keys() + assert results['NM_000518.4:c.316_*100del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000518.4:c.316_*100del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000518.4:c.316_*100del']['alt_genomic_loci'] == [] + assert results['NM_000518.4:c.316_*100del']['transcript_description'] == 'Homo sapiens hemoglobin subunit beta (HBB), mRNA' + assert results['NM_000518.4:c.316_*100del']['gene_symbol'] == 'HBB' + assert results['NM_000518.4:c.316_*100del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Leu106SerfsTer3)', 'slr': 'NP_000509.1:p.(L106Sfs*3)'} + assert results['NM_000518.4:c.316_*100del']['submitted_variant'] == 'NM_000518.4:c.316_*100del' + assert results['NM_000518.4:c.316_*100del']['genome_context_intronic_sequence'] == '' + assert results['NM_000518.4:c.316_*100del']['hgvs_lrg_variant'] == '' + assert results['NM_000518.4:c.316_*100del']['hgvs_transcript_variant'] == 'NM_000518.4:c.316_*100del' + assert results['NM_000518.4:c.316_*100del']['hgvs_refseqgene_variant'] == 'NG_000007.3:g.71890_72118del' + assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5246728_5246956del', 'vcf': {'chr': 'chr11', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'pos': '5246727', 'alt': 'A'}} + assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5225498_5225726del', 'vcf': {'chr': 'chr11', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'pos': '5225497', 'alt': 'A'}} + assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5246728_5246956del', 'vcf': {'chr': '11', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'pos': '5246727', 'alt': 'A'}} + assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5225498_5225726del', 'vcf': {'chr': '11', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'pos': '5225497', 'alt': 'A'}} + assert results['NM_000518.4:c.316_*100del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_000007.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.4'} + + + def test_variant22(self): + variant = 'NM_000518.4:c.*2000C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.*2000C>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant23(self): + variant = 'NM_000518.4:c.*132+1868C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.*132+1868C>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant24(self): + variant = 'NM_000518.4:c.-130_*2000=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-130_*2000=' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant25(self): + variant = 'NM_000518.4:c.-50-80_*132+1868=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-50-80_*132+1868=' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant26(self): + variant = 'NR_138595.1:n.-810C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.-810C>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant27(self): + variant = 'NR_138595.1:n.1-810C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.1-810C>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant28(self): + variant = 'NR_138595.1:n.1071+1A=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.1071+1A=' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + + assert results['flag'] == 'warning' + + def test_variant29(self): + variant = 'NR_138595.1:n.-810_1071+1=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.-810_1071+1=' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant30(self): + variant = 'NC_000017.10:g.48261457_48261463TTATGTT=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.*1400_*1406=' in results.keys() + assert results['NM_000088.3:c.*1400_*1406=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.*1400_*1406=' + assert results['NM_000088.3:c.*1400_*1406=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.*1400_*1406=']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.*1400_*1406=']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.*1400_*1406=']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.*1400_*1406=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.*1400_*1406=']['submitted_variant'] == 'NC_000017.10:g.48261457_48261463TTATGTT=' + assert results['NM_000088.3:c.*1400_*1406=']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.*1400_*1406=']['hgvs_lrg_variant'] == 'LRG_1:g.22538_22544=' + assert results['NM_000088.3:c.*1400_*1406=']['hgvs_transcript_variant'] == 'NM_000088.3:c.*1400_*1406=' + assert results['NM_000088.3:c.*1400_*1406=']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.22538_22544=' + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48261457_48261463=', 'vcf': {'chr': 'chr17', 'ref': u'TTATGTT', 'pos': '48261457', 'alt': u'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50184096_50184102=', 'vcf': {'chr': 'chr17', 'ref': u'TTATGTT', 'pos': '50184096', 'alt': u'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48261457_48261463=', 'vcf': {'chr': '17', 'ref': u'TTATGTT', 'pos': '48261457', 'alt': u'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50184096_50184102=', 'vcf': {'chr': '17', 'ref': u'TTATGTT', 'pos': '50184096', 'alt': u'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant31(self): + variant = 'NC_000017.10:g.48275363C>A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589G>T' in results.keys() + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'NC_000017.10:g.48275363C>A' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant32(self): + variant = 'NM_000088.3:c.589-1G>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589-1G>T' in results.keys() + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589-1G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-1G>T']['submitted_variant'] == 'NM_000088.3:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8637G>T' + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant33(self): + variant = 'NM_000088.3:c.591_593inv' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000088.3:c.591_593inv' in results.keys() + assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.591_593inv' + assert results['NM_000088.3:c.591_593inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.591_593inv']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.591_593inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.591_593inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.591_593inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Pro198Asp)', 'slr': 'NP_000079.2:p.(P198D)'} + assert results['NM_000088.3:c.591_593inv']['submitted_variant'] == 'NM_000088.3:c.591_593inv' + assert results['NM_000088.3:c.591_593inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_variant'] == 'LRG_1:g.8640_8642inv' + assert results['NM_000088.3:c.591_593inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.591_593inv' + assert results['NM_000088.3:c.591_593inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8640_8642inv' + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': 'chr17', 'ref': 'GGA', 'pos': '48275359', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': 'chr17', 'ref': 'GGA', 'pos': '50197998', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': '17', 'ref': 'GGA', 'pos': '48275359', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': '17', 'ref': 'GGA', 'pos': '50197998', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant34(self): + variant = '11-5248232-T-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000518.5:c.20A>T' in results.keys() + assert results['NM_000518.5:c.20A>T']['hgvs_lrg_transcript_variant'] == 'LRG_1232t1:c.20A>T' + assert results['NM_000518.5:c.20A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000518.5:c.20A>T']['alt_genomic_loci'] == [] + assert results['NM_000518.5:c.20A>T']['transcript_description'] == 'Homo sapiens hemoglobin subunit beta (HBB), mRNA' + assert results['NM_000518.5:c.20A>T']['gene_symbol'] == 'HBB' + assert results['NM_000518.5:c.20A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Glu7Val)', 'slr': 'NP_000509.1:p.(E7V)'} + assert results['NM_000518.5:c.20A>T']['submitted_variant'] == '11-5248232-T-A' + assert results['NM_000518.5:c.20A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000518.5:c.20A>T']['hgvs_lrg_variant'] == '' + assert results['NM_000518.5:c.20A>T']['hgvs_transcript_variant'] == 'NM_000518.5:c.20A>T' + assert results['NM_000518.5:c.20A>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_000518.5:c.20A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': 'chr11', 'ref': u'T', 'pos': '5248232', 'alt': u'A'}} + assert 'hg38' not in results['NM_000518.5:c.20A>T']['primary_assembly_loci'].keys() + assert results['NM_000518.5:c.20A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': '11', 'ref': u'T', 'pos': '5248232', 'alt': u'A'}} + assert 'grch38' not in results['NM_000518.5:c.20A>T']['primary_assembly_loci'].keys() + assert results['NM_000518.5:c.20A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.5'} + + assert 'NM_000518.4:c.20A>T' in results.keys() + assert results['NM_000518.4:c.20A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000518.4:c.20A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000518.4:c.20A>T']['alt_genomic_loci'] == [] + assert results['NM_000518.4:c.20A>T']['transcript_description'] == 'Homo sapiens hemoglobin subunit beta (HBB), mRNA' + assert results['NM_000518.4:c.20A>T']['gene_symbol'] == 'HBB' + assert results['NM_000518.4:c.20A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Glu7Val)', 'slr': 'NP_000509.1:p.(E7V)'} + assert results['NM_000518.4:c.20A>T']['submitted_variant'] == '11-5248232-T-A' + assert results['NM_000518.4:c.20A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000518.4:c.20A>T']['hgvs_lrg_variant'] == '' + assert results['NM_000518.4:c.20A>T']['hgvs_transcript_variant'] == 'NM_000518.4:c.20A>T' + assert results['NM_000518.4:c.20A>T']['hgvs_refseqgene_variant'] == 'NG_000007.3:g.70614A>T' + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': 'chr11', 'ref': u'T', 'pos': '5248232', 'alt': u'A'}} + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5227002T>A', 'vcf': {'chr': 'chr11', 'ref': u'T', 'pos': '5227002', 'alt': u'A'}} + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': '11', 'ref': u'T', 'pos': '5248232', 'alt': u'A'}} + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5227002T>A', 'vcf': {'chr': '11', 'ref': u'T', 'pos': '5227002', 'alt': u'A'}} + assert results['NM_000518.4:c.20A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_000007.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.4'} + + + def test_variant35(self): + variant = 'NG_007400.1(NM_000088.3):c.589-1G>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589-1G>T' in results.keys() + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589-1G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-1G>T']['submitted_variant'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8637G>T' + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant36(self): + variant = '1:150550916G>A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_182763.2:c.688+403C>T' in results.keys() + assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_182763.2:c.688+403C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'] == [] + assert results['NM_182763.2:c.688+403C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA' + assert results['NM_182763.2:c.688+403C>T']['gene_symbol'] == 'MCL1' + assert results['NM_182763.2:c.688+403C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_877495.1:p.?', 'slr': 'NP_877495.1:p.?'} + assert results['NM_182763.2:c.688+403C>T']['submitted_variant'] == '1:150550916G>A' + assert results['NM_182763.2:c.688+403C>T']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_182763.2):c.688+403C>T' + assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_variant'] == '' + assert results['NM_182763.2:c.688+403C>T']['hgvs_transcript_variant'] == 'NM_182763.2:c.688+403C>T' + assert results['NM_182763.2:c.688+403C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_182763.2:c.688+403C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2'} + + assert 'NM_001197320.1:c.281C>T' in results.keys() + assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001197320.1:c.281C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001197320.1:c.281C>T']['alt_genomic_loci'] == [] + assert results['NM_001197320.1:c.281C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA' + assert results['NM_001197320.1:c.281C>T']['gene_symbol'] == 'MCL1' + assert results['NM_001197320.1:c.281C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001184249.1:p.(Ser94Phe)', 'slr': 'NP_001184249.1:p.(S94F)'} + assert results['NM_001197320.1:c.281C>T']['submitted_variant'] == '1:150550916G>A' + assert results['NM_001197320.1:c.281C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001197320.1:c.281C>T']['hgvs_transcript_variant'] == 'NM_001197320.1:c.281C>T' + assert results['NM_001197320.1:c.281C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_001197320.1:c.281C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1'} + + assert 'NM_021960.4:c.740C>T' in results.keys() + assert results['NM_021960.4:c.740C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021960.4:c.740C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021960.4:c.740C>T']['alt_genomic_loci'] == [] + assert results['NM_021960.4:c.740C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA' + assert results['NM_021960.4:c.740C>T']['gene_symbol'] == 'MCL1' + assert results['NM_021960.4:c.740C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068779.1:p.(Ser247Phe)', 'slr': 'NP_068779.1:p.(S247F)'} + assert results['NM_021960.4:c.740C>T']['submitted_variant'] == '1:150550916G>A' + assert results['NM_021960.4:c.740C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_021960.4:c.740C>T']['hgvs_lrg_variant'] == '' + assert results['NM_021960.4:c.740C>T']['hgvs_transcript_variant'] == 'NM_021960.4:c.740C>T' + assert results['NM_021960.4:c.740C>T']['hgvs_refseqgene_variant'] == 'NG_029146.1:g.6299C>T' + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_021960.4:c.740C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029146.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4'} + + + def test_variant37(self): + variant = '1-150550916-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_182763.2:c.688+403C>T' in results.keys() + assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_182763.2:c.688+403C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'] == [] + assert results['NM_182763.2:c.688+403C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA' + assert results['NM_182763.2:c.688+403C>T']['gene_symbol'] == 'MCL1' + assert results['NM_182763.2:c.688+403C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_877495.1:p.?', 'slr': 'NP_877495.1:p.?'} + assert results['NM_182763.2:c.688+403C>T']['submitted_variant'] == '1-150550916-G-A' + assert results['NM_182763.2:c.688+403C>T']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_182763.2):c.688+403C>T' + assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_variant'] == '' + assert results['NM_182763.2:c.688+403C>T']['hgvs_transcript_variant'] == 'NM_182763.2:c.688+403C>T' + assert results['NM_182763.2:c.688+403C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_182763.2:c.688+403C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2'} + + assert 'NM_001197320.1:c.281C>T' in results.keys() + assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001197320.1:c.281C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001197320.1:c.281C>T']['alt_genomic_loci'] == [] + assert results['NM_001197320.1:c.281C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA' + assert results['NM_001197320.1:c.281C>T']['gene_symbol'] == 'MCL1' + assert results['NM_001197320.1:c.281C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001184249.1:p.(Ser94Phe)', 'slr': 'NP_001184249.1:p.(S94F)'} + assert results['NM_001197320.1:c.281C>T']['submitted_variant'] == '1-150550916-G-A' + assert results['NM_001197320.1:c.281C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001197320.1:c.281C>T']['hgvs_transcript_variant'] == 'NM_001197320.1:c.281C>T' + assert results['NM_001197320.1:c.281C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_001197320.1:c.281C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1'} + + assert 'NM_021960.4:c.740C>T' in results.keys() + assert results['NM_021960.4:c.740C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021960.4:c.740C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021960.4:c.740C>T']['alt_genomic_loci'] == [] + assert results['NM_021960.4:c.740C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA' + assert results['NM_021960.4:c.740C>T']['gene_symbol'] == 'MCL1' + assert results['NM_021960.4:c.740C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068779.1:p.(Ser247Phe)', 'slr': 'NP_068779.1:p.(S247F)'} + assert results['NM_021960.4:c.740C>T']['submitted_variant'] == '1-150550916-G-A' + assert results['NM_021960.4:c.740C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_021960.4:c.740C>T']['hgvs_lrg_variant'] == '' + assert results['NM_021960.4:c.740C>T']['hgvs_transcript_variant'] == 'NM_021960.4:c.740C>T' + assert results['NM_021960.4:c.740C>T']['hgvs_refseqgene_variant'] == 'NG_029146.1:g.6299C>T' + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_021960.4:c.740C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029146.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4'} + + + def test_variant38(self): + variant = 'NG_008123.1(LEPRE1_v003):c.2055+18G>A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NG_008123.1(LEPRE1_v003):c.2055+18G>A' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant39(self): + variant = 'NG_008123.1:c.2055+18G>A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NG_008123.1:c.2055+18G>A' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant40(self): + variant = 'NG_008123.1(NM_022356.3):c.2055+18G>A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_022356.3:c.2055+18G>A' in results.keys() + assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t1:c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['refseqgene_context_intronic_sequence'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'] == [] + assert results['NM_022356.3:c.2055+18G>A']['transcript_description'] == 'Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA' + assert results['NM_022356.3:c.2055+18G>A']['gene_symbol'] == 'P3H1' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_071751.3(LRG_5p1):p.?', 'slr': 'NP_071751.3:p.?'} + assert results['NM_022356.3:c.2055+18G>A']['submitted_variant'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_022356.3):c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_transcript_variant'] == 'NM_022356.3:c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_refseqgene_variant'] == 'NG_008123.1:g.24831G>A' + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_022356.3:c.2055+18G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} + + + def test_variant41(self): + variant = 'NM_021983.4:c.490G>C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_021983.4:c.490G>C' in results.keys() + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}] + assert results['NM_021983.4:c.490G>C']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' + assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' + assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} + assert results['NM_021983.4:c.490G>C']['submitted_variant'] == 'NM_021983.4:c.490G>C' + assert results['NM_021983.4:c.490G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' + assert results['NM_021983.4:c.490G>C']['hgvs_transcript_variant'] == 'NM_021983.4:c.490G>C' + assert results['NM_021983.4:c.490G>C']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.5724C>G' + assert 'hg19' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'hg38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'grch37' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'grch38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} + + + def test_variant42(self): + variant = 'NM_032470.3:c.4del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032470.3:c.4del' in results.keys() + assert results['NM_032470.3:c.4del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_032470.3:c.4del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032470.3:c.4del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}] + assert results['NM_032470.3:c.4del']['transcript_description'] == 'Homo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA' + assert results['NM_032470.3:c.4del']['gene_symbol'] == 'TNXB' + assert results['NM_032470.3:c.4del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_115859.2:p.(Arg2AlafsTer91)', 'slr': 'NP_115859.2:p.(R2Afs*91)'} + assert results['NM_032470.3:c.4del']['submitted_variant'] == 'NM_032470.3:c.4del' + assert results['NM_032470.3:c.4del']['genome_context_intronic_sequence'] == '' + assert results['NM_032470.3:c.4del']['hgvs_lrg_variant'] == '' + assert results['NM_032470.3:c.4del']['hgvs_transcript_variant'] == 'NM_032470.3:c.4del' + assert results['NM_032470.3:c.4del']['hgvs_refseqgene_variant'] == '' + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_115859.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032470.3'} + + + def test_variant43(self): + variant = 'NM_001194958.2:c.20C>A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001194958.2:c.20C>A' in results.keys() + assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001194958.2:c.20C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001194958.2:c.20C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}] + assert results['NM_001194958.2:c.20C>A']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA' + assert results['NM_001194958.2:c.20C>A']['gene_symbol'] == 'KCNJ18' + assert results['NM_001194958.2:c.20C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181887.2:p.(Ala7Asp)', 'slr': 'NP_001181887.2:p.(A7D)'} + assert results['NM_001194958.2:c.20C>A']['submitted_variant'] == 'NM_001194958.2:c.20C>A' + assert results['NM_001194958.2:c.20C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001194958.2:c.20C>A']['hgvs_transcript_variant'] == 'NM_001194958.2:c.20C>A' + assert results['NM_001194958.2:c.20C>A']['hgvs_refseqgene_variant'] == 'NG_033093.1:g.15284C>A' + assert 'hg19' not in results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys() + assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} + assert 'grch37' not in results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys() + assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} + assert results['NM_001194958.2:c.20C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033093.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2'} + + assert results['flag'] == 'gene_variant' + + def test_variant44(self): + variant = 'NM_000022.2:c.534A>G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000022.2:c.534A>G' in results.keys() + assert results['NM_000022.2:c.534A>G']['hgvs_lrg_transcript_variant'] == 'LRG_16t1:c.534A>G' + assert results['NM_000022.2:c.534A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000022.2:c.534A>G']['alt_genomic_loci'] == [] + assert results['NM_000022.2:c.534A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), mRNA' + assert results['NM_000022.2:c.534A>G']['gene_symbol'] == 'ADA' + assert results['NM_000022.2:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} + assert results['NM_000022.2:c.534A>G']['submitted_variant'] == 'NM_000022.2:c.534A>G' + assert results['NM_000022.2:c.534A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000022.2:c.534A>G']['hgvs_lrg_variant'] == 'LRG_16:g.32462A>G' + assert results['NM_000022.2:c.534A>G']['hgvs_transcript_variant'] == 'NM_000022.2:c.534A>G' + assert results['NM_000022.2:c.534A>G']['hgvs_refseqgene_variant'] == 'NG_007385.1:g.32462A>G' + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert 'hg38' not in results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys() + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert 'grch38' not in results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys() + assert results['NM_000022.2:c.534A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007385.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_16.xml'} + + + def test_variant45(self): + variant = 'HSCHR6_MHC_SSTO_CTG1-3852542-C-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_021983.4:c.490G>C' in results.keys() + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}] + assert results['NM_021983.4:c.490G>C']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' + assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' + assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} + assert results['NM_021983.4:c.490G>C']['submitted_variant'] == 'HSCHR6_MHC_SSTO_CTG1-3852542-C-G' + assert results['NM_021983.4:c.490G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' + assert results['NM_021983.4:c.490G>C']['hgvs_transcript_variant'] == 'NM_021983.4:c.490G>C' + assert results['NM_021983.4:c.490G>C']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.5724C>G' + assert 'hg19' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'hg38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'grch37' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'grch38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} + + + def test_variant46(self): + variant = 'NM_000368.4:c.363+1dupG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000368.4:c.363+1dup' in results.keys() + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] + assert results['NM_000368.4:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA' + assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.?', 'slr': 'NP_000359.1:p.?'} + assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == 'NM_000368.4:c.363+1dupG' + assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} + + + def test_variant47(self): + variant = 'NM_000368.4:c.363dupG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000368.4:c.363+1dup' in results.keys() + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] + assert results['NM_000368.4:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA' + assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)', 'slr': 'NP_000359.1:p.(M122Dfs*4)'} + assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == 'NM_000368.4:c.363dupG' + assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} + + + def test_variant48(self): + variant = 'NM_000089.3:c.1033_1035delGTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000089.3:c.1035_1035+2del' in results.keys() + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'] == [] + assert results['NM_000089.3:c.1035_1035+2del']['transcript_description'] == 'Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA' + assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} + assert results['NM_000089.3:c.1035_1035+2del']['submitted_variant'] == 'NM_000089.3:c.1033_1035delGTT' + assert results['NM_000089.3:c.1035_1035+2del']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000089.3):c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_variant'] == 'LRG_2:g.20261_20263del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_transcript_variant'] == 'NM_000089.3:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_refseqgene_variant'] == 'NG_007405.1:g.20261_20263del' + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} + + + def test_variant49(self): + variant = 'NM_000089.3:c.1035_1035+2delTGT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000089.3:c.1035_1035+2del' in results.keys() + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'] == [] + assert results['NM_000089.3:c.1035_1035+2del']['transcript_description'] == 'Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA' + assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} + assert results['NM_000089.3:c.1035_1035+2del']['submitted_variant'] == 'NM_000089.3:c.1035_1035+2delTGT' + assert results['NM_000089.3:c.1035_1035+2del']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000089.3):c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_variant'] == 'LRG_2:g.20261_20263del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_transcript_variant'] == 'NM_000089.3:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_refseqgene_variant'] == 'NG_007405.1:g.20261_20263del' + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} + + + def test_variant50(self): + variant = 'NM_000088.3:c.2023_2028delGCAAGA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.2024_2028+1del' in results.keys() + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.2024_2028+1del']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)', 'slr': 'NP_000079.2:p.(A675_R676del)'} + assert results['NM_000088.3:c.2024_2028+1del']['submitted_variant'] == 'NM_000088.3:c.2023_2028delGCAAGA' + assert results['NM_000088.3:c.2024_2028+1del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_transcript_variant'] == 'NM_000088.3:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.14656_14661del' + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant51(self): + variant = 'NM_000089.3:c.938-1delG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000089.3:c.938del' in results.keys() + assert results['NM_000089.3:c.938del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.938del' + assert results['NM_000089.3:c.938del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000089.3:c.938del']['alt_genomic_loci'] == [] + assert results['NM_000089.3:c.938del']['transcript_description'] == 'Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA' + assert results['NM_000089.3:c.938del']['gene_symbol'] == 'COL1A2' + assert results['NM_000089.3:c.938del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Gly313AlafsTer86)', 'slr': 'NP_000080.2:p.(G313Afs*86)'} + assert results['NM_000089.3:c.938del']['submitted_variant'] == 'NM_000089.3:c.938-1delG' + assert results['NM_000089.3:c.938del']['genome_context_intronic_sequence'] == '' + assert results['NM_000089.3:c.938del']['hgvs_lrg_variant'] == 'LRG_2:g.20164del' + assert results['NM_000089.3:c.938del']['hgvs_transcript_variant'] == 'NM_000089.3:c.938del' + assert results['NM_000089.3:c.938del']['hgvs_refseqgene_variant'] == 'NG_007405.1:g.20164del' + assert results['NM_000089.3:c.938del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039036del', 'vcf': {'chr': 'chr7', 'ref': 'AG', 'pos': '94039033', 'alt': 'A'}} + assert results['NM_000089.3:c.938del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409724del', 'vcf': {'chr': 'chr7', 'ref': 'AG', 'pos': '94409721', 'alt': 'A'}} + assert results['NM_000089.3:c.938del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039036del', 'vcf': {'chr': '7', 'ref': 'AG', 'pos': '94039033', 'alt': 'A'}} + assert results['NM_000089.3:c.938del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409724del', 'vcf': {'chr': '7', 'ref': 'AG', 'pos': '94409721', 'alt': 'A'}} + assert results['NM_000089.3:c.938del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} + + + def test_variant52(self): + variant = 'NM_000088.3:c.589G=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589G=' in results.keys() + assert results['NM_000088.3:c.589G=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G=' + assert results['NM_000088.3:c.589G=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G=']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589G=']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589G=']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197=)', 'slr': 'NP_000079.2:p.(G197=)'} + assert results['NM_000088.3:c.589G=']['submitted_variant'] == 'NM_000088.3:c.589G=' + assert results['NM_000088.3:c.589G=']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G=']['hgvs_lrg_variant'] == 'LRG_1:g.8638G=' + assert results['NM_000088.3:c.589G=']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G=' + assert results['NM_000088.3:c.589G=']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G=' + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C=', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'C'}} + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C=', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'C'}} + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C=', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'C'}} + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C=', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'C'}} + assert results['NM_000088.3:c.589G=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant53(self): + variant = 'NM_000088.3:c.642A=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.642A=' in results.keys() + assert results['NM_000088.3:c.642A=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642A=' + assert results['NM_000088.3:c.642A=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.642A=']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.642A=']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.642A=']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642A=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ser214=)', 'slr': 'NP_000079.2:p.(S214=)'} + assert results['NM_000088.3:c.642A=']['submitted_variant'] == 'NM_000088.3:c.642A=' + assert results['NM_000088.3:c.642A=']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.642A=']['hgvs_lrg_variant'] == 'LRG_1:g.8691A=' + assert results['NM_000088.3:c.642A=']['hgvs_transcript_variant'] == 'NM_000088.3:c.642A=' + assert results['NM_000088.3:c.642A=']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8691A=' + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275310T=', 'vcf': {'chr': 'chr17', 'ref': u'T', 'pos': '48275310', 'alt': u'T'}} + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197949T=', 'vcf': {'chr': 'chr17', 'ref': u'T', 'pos': '50197949', 'alt': u'T'}} + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275310T=', 'vcf': {'chr': '17', 'ref': u'T', 'pos': '48275310', 'alt': u'T'}} + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197949T=', 'vcf': {'chr': '17', 'ref': u'T', 'pos': '50197949', 'alt': u'T'}} + assert results['NM_000088.3:c.642A=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant54(self): + variant = 'NM_000088.3:c.642+1GG>G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.642+1_642+2delinsG' in results.keys() + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.642+1_642+2delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.642+1_642+2delinsG']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.642+1_642+2delinsG']['submitted_variant'] == 'NM_000088.3:c.642+1GG>G' + assert results['NM_000088.3:c.642+1_642+2delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+1_642+2delinsG' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant55(self): + variant = 'NM_000088.3:c.589-2GG>G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589-2_589-1delinsG' in results.keys() + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589-2_589-1delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589-2_589-1delinsG']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-2_589-1delinsG']['submitted_variant'] == 'NM_000088.3:c.589-2GG>G' + assert results['NM_000088.3:c.589-2_589-1delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2_589-1delinsG' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant56(self): + variant = 'NM_000088.3:c.589-6_589-5insTTTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589-5_589-4insTTTT' in results.keys() + assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-5_589-4insTTTT' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-5_589-4insTTTT' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589-5_589-4insTTTT']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['submitted_variant'] == 'NM_000088.3:c.589-6_589-5insTTTT' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-5_589-4insTTTT' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_lrg_variant'] == 'LRG_1:g.8633_8634insTTTT' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-5_589-4insTTTT' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8633_8634insTTTT' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275367_48275368insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48275367', 'alt': u'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198006_50198007insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50198006', 'alt': u'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275367_48275368insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48275367', 'alt': u'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198006_50198007insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50198006', 'alt': u'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant57(self): + variant = 'NM_000088.3:c.642+3_642+4insAAAA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.642+4_642+5insAAAA' in results.keys() + assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+4_642+5insAAAA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+4_642+5insAAAA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.642+4_642+5insAAAA']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['submitted_variant'] == 'NM_000088.3:c.642+3_642+4insAAAA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+4_642+5insAAAA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_lrg_variant'] == 'LRG_1:g.8695_8696insAAAA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+4_642+5insAAAA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8695_8696insAAAA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275305_48275306insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275305', 'alt': u'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197944_50197945insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50197944', 'alt': u'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275305_48275306insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275305', 'alt': u'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197944_50197945insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50197944', 'alt': u'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant58(self): + variant = 'NM_000088.3:c.589-4_589-3insTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589-4_589-3insTT' in results.keys() + assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-4_589-3insTT' + assert results['NM_000088.3:c.589-4_589-3insTT']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-4_589-3insTT' + assert results['NM_000088.3:c.589-4_589-3insTT']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589-4_589-3insTT']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589-4_589-3insTT']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-4_589-3insTT']['submitted_variant'] == 'NM_000088.3:c.589-4_589-3insTT' + assert results['NM_000088.3:c.589-4_589-3insTT']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-4_589-3insTT' + assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_lrg_variant'] == 'LRG_1:g.8634_8635insTT' + assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-4_589-3insTT' + assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8634_8635insTT' + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366_48275367insAA', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '48275366', 'alt': u'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005_50198006insAA', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '50198005', 'alt': u'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366_48275367insAA', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '48275366', 'alt': u'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005_50198006insAA', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '50198005', 'alt': u'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant59(self): + variant = 'NM_000088.3:c.589-8del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589-7del' in results.keys() + assert results['NM_000088.3:c.589-7del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-7del' + assert results['NM_000088.3:c.589-7del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-7del' + assert results['NM_000088.3:c.589-7del']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589-7del']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589-7del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-7del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-7del']['submitted_variant'] == 'NM_000088.3:c.589-8del' + assert results['NM_000088.3:c.589-7del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-7del' + assert results['NM_000088.3:c.589-7del']['hgvs_lrg_variant'] == 'LRG_1:g.8631del' + assert results['NM_000088.3:c.589-7del']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-7del' + assert results['NM_000088.3:c.589-7del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8631del' + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275370del', 'vcf': {'chr': 'chr17', 'ref': 'GA', 'pos': '48275369', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198009del', 'vcf': {'chr': 'chr17', 'ref': 'GA', 'pos': '50198008', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275370del', 'vcf': {'chr': '17', 'ref': 'GA', 'pos': '48275369', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198009del', 'vcf': {'chr': '17', 'ref': 'GA', 'pos': '50198008', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant60(self): + variant = 'NM_000527.4:c.-187_-185delCTC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000527.4:c.-187_-185del' in results.keys() + assert results['NM_000527.4:c.-187_-185del']['hgvs_lrg_transcript_variant'] == 'LRG_274t1:c.-187_-185del' + assert results['NM_000527.4:c.-187_-185del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000527.4:c.-187_-185del']['alt_genomic_loci'] == [] + assert results['NM_000527.4:c.-187_-185del']['transcript_description'] == 'Homo sapiens low density lipoprotein receptor (LDLR), transcript variant 1, mRNA' + assert results['NM_000527.4:c.-187_-185del']['gene_symbol'] == 'LDLR' + assert results['NM_000527.4:c.-187_-185del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000518.1(LRG_274p1):p.?', 'slr': 'NP_000518.1:p.?'} + assert results['NM_000527.4:c.-187_-185del']['submitted_variant'] == 'NM_000527.4:c.-187_-185delCTC' + assert results['NM_000527.4:c.-187_-185del']['genome_context_intronic_sequence'] == '' + assert results['NM_000527.4:c.-187_-185del']['hgvs_lrg_variant'] == 'LRG_274:g.4982_4984del' + assert results['NM_000527.4:c.-187_-185del']['hgvs_transcript_variant'] == 'NM_000527.4:c.-187_-185del' + assert results['NM_000527.4:c.-187_-185del']['hgvs_refseqgene_variant'] == 'NG_009060.1:g.4982_4984del' + assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.11200038_11200040del', 'vcf': {'chr': 'chr19', 'ref': 'ACTC', 'pos': '11200031', 'alt': 'A'}} + assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.11089362_11089364del', 'vcf': {'chr': 'chr19', 'ref': 'ACTC', 'pos': '11089355', 'alt': 'A'}} + assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.11200038_11200040del', 'vcf': {'chr': '19', 'ref': 'ACTC', 'pos': '11200031', 'alt': 'A'}} + assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.11089362_11089364del', 'vcf': {'chr': '19', 'ref': 'ACTC', 'pos': '11089355', 'alt': 'A'}} + assert results['NM_000527.4:c.-187_-185del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009060.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000518.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000527.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_274.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant61(self): + variant = 'NM_206933.2:c.6317C>G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_206933.2:c.6317C>G' in results.keys() + assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_206933.2:c.6317C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_206933.2:c.6317C>G']['alt_genomic_loci'] == [] + assert results['NM_206933.2:c.6317C>G']['transcript_description'] == 'Homo sapiens usherin (USH2A), transcript variant 2, mRNA' + assert results['NM_206933.2:c.6317C>G']['gene_symbol'] == 'USH2A' + assert results['NM_206933.2:c.6317C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_996816.2:p.(Thr2106Arg)', 'slr': 'NP_996816.2:p.(T2106R)'} + assert results['NM_206933.2:c.6317C>G']['submitted_variant'] == 'NM_206933.2:c.6317C>G' + assert results['NM_206933.2:c.6317C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_variant'] == '' + assert results['NM_206933.2:c.6317C>G']['hgvs_transcript_variant'] == 'NM_206933.2:c.6317C>G' + assert results['NM_206933.2:c.6317C>G']['hgvs_refseqgene_variant'] == 'NG_009497.1:g.381958C>G' + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216219781', 'alt': u'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216046439', 'alt': u'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216219781', 'alt': u'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216046439', 'alt': u'C'}} + assert results['NM_206933.2:c.6317C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009497.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2'} + + + def test_variant62(self): + variant = 'NC_000013.10:g.32929387T>C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000059.3:c.7397C=' in results.keys() + assert results['NM_000059.3:c.7397C=']['hgvs_lrg_transcript_variant'] == 'LRG_293t1:c.7397C=' + assert results['NM_000059.3:c.7397C=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000059.3:c.7397C=']['alt_genomic_loci'] == [] + assert results['NM_000059.3:c.7397C=']['transcript_description'] == 'Homo sapiens BRCA2, DNA repair associated (BRCA2), mRNA' + assert results['NM_000059.3:c.7397C=']['gene_symbol'] == 'BRCA2' + assert results['NM_000059.3:c.7397C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000050.2(LRG_293p1):p.(Ala2466=)', 'slr': 'NP_000050.2:p.(A2466=)'} + assert results['NM_000059.3:c.7397C=']['submitted_variant'] == 'NC_000013.10:g.32929387T>C' + assert results['NM_000059.3:c.7397C=']['genome_context_intronic_sequence'] == '' + assert results['NM_000059.3:c.7397C=']['hgvs_lrg_variant'] == 'LRG_293:g.44771C=' + assert results['NM_000059.3:c.7397C=']['hgvs_transcript_variant'] == 'NM_000059.3:c.7397C=' + assert results['NM_000059.3:c.7397C=']['hgvs_refseqgene_variant'] == 'NG_012772.3:g.44771C=' + assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000013.10:g.32929387T>C', 'vcf': {'chr': 'chr13', 'ref': 'T', 'pos': '32929387', 'alt': 'C'}} + assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000013.11:g.32355250T>C', 'vcf': {'chr': 'chr13', 'ref': 'T', 'pos': '32355250', 'alt': 'C'}} + assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000013.10:g.32929387T>C', 'vcf': {'chr': '13', 'ref': 'T', 'pos': '32929387', 'alt': 'C'}} + assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000013.11:g.32355250T>C', 'vcf': {'chr': '13', 'ref': 'T', 'pos': '32355250', 'alt': 'C'}} + assert results['NM_000059.3:c.7397C=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012772.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000050.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000059.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_293.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant63(self): + variant = 'NM_015102.3:c.2818-2T>A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_015102.3:c.2818-2T>A' in results.keys() + assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_015102.3:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == 'NG_011724.2(NM_015102.3):c.2818-2A=' + assert results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'] == [] + assert results['NM_015102.3:c.2818-2T>A']['transcript_description'] == 'Homo sapiens nephronophthisis 4 (NPHP4), mRNA' + assert results['NM_015102.3:c.2818-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} + assert results['NM_015102.3:c.2818-2T>A']['submitted_variant'] == 'NM_015102.3:c.2818-2T>A' + assert results['NM_015102.3:c.2818-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_015102.3):c.2818-2T>A' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_variant'] == '' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_transcript_variant'] == 'NM_015102.3:c.2818-2T>A' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_refseqgene_variant'] == 'NG_011724.2:g.122370A=' + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert 'hg38' not in results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys() + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert 'grch38' not in results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys() + assert results['NM_015102.3:c.2818-2T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011724.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3'} + + assert results['flag'] == 'gene_variant' + + def test_variant64(self): + variant = '19-41123094-G-GG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001042544.1:c.3233_3235=' in results.keys() + assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042544.1:c.3233_3235=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'] == [] + assert results['NM_001042544.1:c.3233_3235=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA' + assert results['NM_001042544.1:c.3233_3235=']['gene_symbol'] == 'LTBP4' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078=)', 'slr': 'NP_001036009.1:p.(Q1078=)'} + assert results['NM_001042544.1:c.3233_3235=']['submitted_variant'] == '19-41123094-G-GG' + assert results['NM_001042544.1:c.3233_3235=']['genome_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_variant'] == '' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3233_3235=' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} + + assert 'NM_001042545.1:c.3032_3034=' in results.keys() + assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042545.1:c.3032_3034=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'] == [] + assert results['NM_001042545.1:c.3032_3034=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA' + assert results['NM_001042545.1:c.3032_3034=']['gene_symbol'] == 'LTBP4' + assert results['NM_001042545.1:c.3032_3034=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011=)', 'slr': 'NP_001036010.1:p.(Q1011=)'} + assert results['NM_001042545.1:c.3032_3034=']['submitted_variant'] == '19-41123094-G-GG' + assert results['NM_001042545.1:c.3032_3034=']['genome_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_variant'] == '' + assert results['NM_001042545.1:c.3032_3034=']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3032_3034=' + assert results['NM_001042545.1:c.3032_3034=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} + + assert 'NM_003573.2:c.3122_3124=' in results.keys() + assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003573.2:c.3122_3124=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'] == [] + assert results['NM_003573.2:c.3122_3124=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA' + assert results['NM_003573.2:c.3122_3124=']['gene_symbol'] == 'LTBP4' + assert results['NM_003573.2:c.3122_3124=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041=)', 'slr': 'NP_003564.2:p.(Q1041=)'} + assert results['NM_003573.2:c.3122_3124=']['submitted_variant'] == '19-41123094-G-GG' + assert results['NM_003573.2:c.3122_3124=']['genome_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_variant'] == '' + assert results['NM_003573.2:c.3122_3124=']['hgvs_transcript_variant'] == 'NM_003573.2:c.3122_3124=' + assert results['NM_003573.2:c.3122_3124=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} + + + def test_variant65(self): + variant = '15-72105928-AC-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_014249.2:c.946_949=' in results.keys() + assert results['NM_014249.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.946_949=']['alt_genomic_loci'] == [] + assert results['NM_014249.2:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.2:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} + assert results['NM_014249.2:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' + assert results['NM_014249.2:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.2:c.946_949=' + assert results['NM_014249.2:c.946_949=']['hgvs_refseqgene_variant'] == '' + assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert 'hg38' not in results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert 'grch38' not in results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + + assert 'NM_016346.3:c.946_949=' in results.keys() + assert results['NM_016346.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.946_949=']['alt_genomic_loci'] == [] + assert results['NM_016346.3:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.3:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} + assert results['NM_016346.3:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' + assert results['NM_016346.3:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.3:c.946_949=' + assert results['NM_016346.3:c.946_949=']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': 'chr15', 'ref': u'GACC', 'pos': '71813587', 'alt': u'GACC'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': '15', 'ref': u'GACC', 'pos': '71813587', 'alt': u'GACC'}} + assert results['NM_016346.3:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_014249.3:c.946_949=' in results.keys() + assert results['NM_014249.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.946_949=']['alt_genomic_loci'] == [] + assert results['NM_014249.3:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.3:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} + assert results['NM_014249.3:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' + assert results['NM_014249.3:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_014249.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.3:c.946_949=' + assert results['NM_014249.3:c.946_949=']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8034_8037=' + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': 'chr15', 'ref': u'GGACCC', 'pos': '71813586', 'alt': u'GGACCC'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': u'GGACCC', 'pos': '71813586', 'alt': u'GGACCC'}} + assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} + + assert 'NM_016346.2:c.946_949=' in results.keys() + assert results['NM_016346.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.946_949=']['alt_genomic_loci'] == [] + assert results['NM_016346.2:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.2:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} + assert results['NM_016346.2:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' + assert results['NM_016346.2:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_016346.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.2:c.946_949=' + assert results['NM_016346.2:c.946_949=']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert 'hg38' not in results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert 'grch38' not in results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} + + + def test_variant66(self): + variant = '12-122064773-CCCGCCA-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032790.3:c.126_128=' in results.keys() + assert results['NM_032790.3:c.126_128=']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_128=' + assert results['NM_032790.3:c.126_128=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126_128=']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'CCCGCCA', 'pos': '302871', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'CCCGCCA', 'pos': '302871', 'alt': 'C'}}}] + assert results['NM_032790.3:c.126_128=']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.126_128=']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.126_128=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42=)', 'slr': 'NP_116179.2:p.(A42=)'} + assert results['NM_032790.3:c.126_128=']['submitted_variant'] == '12-122064773-CCCGCCA-C' + assert results['NM_032790.3:c.126_128=']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126_128=']['hgvs_lrg_variant'] == 'LRG_93:g.5299_5301=' + assert results['NM_032790.3:c.126_128=']['hgvs_transcript_variant'] == 'NM_032790.3:c.126_128=' + assert results['NM_032790.3:c.126_128=']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299_5301=' + assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064790del', 'vcf': {'chr': 'chr12', 'ref': 'CCCGCCA', 'pos': '122064773', 'alt': 'C'}} + assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626875=', 'vcf': {'chr': 'chr12', 'ref': u'CCC', 'pos': '121626873', 'alt': u'CCC'}} + assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064790del', 'vcf': {'chr': '12', 'ref': 'CCCGCCA', 'pos': '122064773', 'alt': 'C'}} + assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626875=', 'vcf': {'chr': '12', 'ref': u'CCC', 'pos': '121626873', 'alt': u'CCC'}} + assert results['NM_032790.3:c.126_128=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + + def test_variant67(self): + variant = '12-122064774-CCGCCA-CCGCCA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_032790.3:c.132_137dup' in results.keys() + assert results['NM_032790.3:c.132_137dup']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.132_137dup' + assert results['NM_032790.3:c.132_137dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.132_137dup']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '302868', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '302868', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}] + assert results['NM_032790.3:c.132_137dup']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.132_137dup']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.132_137dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46_Pro47dup)', 'slr': 'NP_116179.2:p.(P46_P47dup)'} + assert results['NM_032790.3:c.132_137dup']['submitted_variant'] == '12-122064774-CCGCCA-CCGCCA' + assert results['NM_032790.3:c.132_137dup']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.132_137dup']['hgvs_lrg_variant'] == 'LRG_93:g.5305_5310dup' + assert results['NM_032790.3:c.132_137dup']['hgvs_transcript_variant'] == 'NM_032790.3:c.132_137dup' + assert results['NM_032790.3:c.132_137dup']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5305_5310dup' + assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064770_122064789=', 'vcf': {'chr': 'chr12', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '122064770', 'alt': 'GGCCCCGCCACCGCCACCGC'}} + assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626884dup', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCA'}} + assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064770_122064789=', 'vcf': {'chr': '12', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '122064770', 'alt': 'GGCCCCGCCACCGCCACCGC'}} + assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626884dup', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCA'}} + assert results['NM_032790.3:c.132_137dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant68(self): + variant = '12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032790.3:c.132_135delinsGCCGT' in results.keys() + assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.132_135delinsGCCGT' + assert results['NM_032790.3:c.132_135delinsGCCGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.132_135delinsGCCGT']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'ACCG', 'pos': '302883', 'alt': u'GCCGT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'ACCG', 'pos': '302883', 'alt': u'GCCGT'}}}] + assert results['NM_032790.3:c.132_135delinsGCCGT']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.132_135delinsGCCGT']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46SerfsTer42)', 'slr': 'NP_116179.2:p.(P46Sfs*42)'} + assert results['NM_032790.3:c.132_135delinsGCCGT']['submitted_variant'] == '12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC' + assert results['NM_032790.3:c.132_135delinsGCCGT']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_lrg_variant'] == 'LRG_93:g.5305_5308delinsGCCGT' + assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_transcript_variant'] == 'NM_032790.3:c.132_135delinsGCCGT' + assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5305_5308delinsGCCGT' + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064788delinsGCCGT', 'vcf': {'chr': 'chr12', 'ref': 'ACCG', 'pos': '122064785', 'alt': u'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626882delinsGCCGT', 'vcf': {'chr': 'chr12', 'ref': 'ACCG', 'pos': '121626879', 'alt': u'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064788delinsGCCGT', 'vcf': {'chr': '12', 'ref': 'ACCG', 'pos': '122064785', 'alt': u'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626882delinsGCCGT', 'vcf': {'chr': '12', 'ref': 'ACCG', 'pos': '121626879', 'alt': u'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + + def test_variant69(self): + variant = 'NC_000012.11:g.122064777C>A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032790.3:c.129_130insACACCG' in results.keys() + assert results['NM_032790.3:c.129_130insACACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insACACCG' + assert results['NM_032790.3:c.129_130insACACCG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.129_130insACACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302875', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302875', 'alt': 'A'}}}] + assert results['NM_032790.3:c.129_130insACACCG']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.129_130insACACCG']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.129_130insACACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43_Pro44insThrPro)', 'slr': 'NP_116179.2:p.(P43_P44insTP)'} + assert results['NM_032790.3:c.129_130insACACCG']['submitted_variant'] == 'NC_000012.11:g.122064777C>A' + assert results['NM_032790.3:c.129_130insACACCG']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.129_130insACACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5302_5303insACACCG' + assert results['NM_032790.3:c.129_130insACACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.129_130insACACCG' + assert results['NM_032790.3:c.129_130insACACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5302_5303insACACCG' + assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064777C>A', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064777', 'alt': 'A'}} + assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insACACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGACA'}} + assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064777C>A', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064777', 'alt': 'A'}} + assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insACACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGACA'}} + assert results['NM_032790.3:c.129_130insACACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + + def test_variant70(self): + variant = 'NC_000012.11:g.122064776delG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032790.3:c.128_129insCCACC' in results.keys() + assert results['NM_032790.3:c.128_129insCCACC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.128_129insCCACC' + assert results['NM_032790.3:c.128_129insCCACC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.128_129insCCACC']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'CG', 'pos': '302873', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'CG', 'pos': '302873', 'alt': 'C'}}}] + assert results['NM_032790.3:c.128_129insCCACC']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.128_129insCCACC']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.128_129insCCACC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44HisfsTer22)', 'slr': 'NP_116179.2:p.(P44Hfs*22)'} + assert results['NM_032790.3:c.128_129insCCACC']['submitted_variant'] == 'NC_000012.11:g.122064776delG' + assert results['NM_032790.3:c.128_129insCCACC']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.128_129insCCACC']['hgvs_lrg_variant'] == 'LRG_93:g.5301_5302insCCACC' + assert results['NM_032790.3:c.128_129insCCACC']['hgvs_transcript_variant'] == 'NM_032790.3:c.128_129insCCACC' + assert results['NM_032790.3:c.128_129insCCACC']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5301_5302insCCACC' + assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776del', 'vcf': {'chr': 'chr12', 'ref': 'CG', 'pos': '122064775', 'alt': 'C'}} + assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCACC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCCCA'}} + assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776del', 'vcf': {'chr': '12', 'ref': 'CG', 'pos': '122064775', 'alt': 'C'}} + assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCACC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCCCA'}} + assert results['NM_032790.3:c.128_129insCCACC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + + def test_variant71(self): + variant = 'NC_000012.11:g.122064776dupG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032790.3:c.129_130insGCCACCG' in results.keys() + assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insGCCACCG' + assert results['NM_032790.3:c.129_130insGCCACCG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.129_130insGCCACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302873', 'alt': 'CG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302873', 'alt': 'CG'}}}] + assert results['NM_032790.3:c.129_130insGCCACCG']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.129_130insGCCACCG']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44AlafsTer46)', 'slr': 'NP_116179.2:p.(P44Afs*46)'} + assert results['NM_032790.3:c.129_130insGCCACCG']['submitted_variant'] == 'NC_000012.11:g.122064776dupG' + assert results['NM_032790.3:c.129_130insGCCACCG']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5302_5303insGCCACCG' + assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.129_130insGCCACCG' + assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5302_5303insGCCACCG' + assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776dup', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064775', 'alt': 'CG'}} + assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insGCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGGCCA'}} + assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776dup', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064775', 'alt': 'CG'}} + assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insGCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGGCCA'}} + assert results['NM_032790.3:c.129_130insGCCACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + + def test_variant72(self): + variant = 'NC_000012.11:g.122064776_122064777insTTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032790.3:c.129_130insTTTCCACCG' in results.keys() + assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insTTTCCACCG' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302874', 'alt': 'GTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302874', 'alt': 'GTTT'}}}] + assert results['NM_032790.3:c.129_130insTTTCCACCG']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43_Pro44insPheProPro)', 'slr': 'NP_116179.2:p.(P43_P44insFPP)'} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['submitted_variant'] == 'NC_000012.11:g.122064776_122064777insTTT' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5302_5303insTTTCCACCG' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.129_130insTTTCCACCG' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5302_5303insTTTCCACCG' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776_122064777insTTT', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '122064776', 'alt': 'GTTT'}} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insTTTCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGTTTCCA'}} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776_122064777insTTT', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '122064776', 'alt': 'GTTT'}} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insTTTCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGTTTCCA'}} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + + def test_variant73(self): + variant = 'NC_000012.11:g.122064772_122064775del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032790.3:c.125_126delinsGCCA' in results.keys() + assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.125_126delinsGCCA' + assert results['NM_032790.3:c.125_126delinsGCCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.125_126delinsGCCA']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GCCCC', 'pos': '302869', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GCCCC', 'pos': '302869', 'alt': 'G'}}}] + assert results['NM_032790.3:c.125_126delinsGCCA']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.125_126delinsGCCA']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42GlyfsTer23)', 'slr': 'NP_116179.2:p.(A42Gfs*23)'} + assert results['NM_032790.3:c.125_126delinsGCCA']['submitted_variant'] == 'NC_000012.11:g.122064772_122064775del' + assert results['NM_032790.3:c.125_126delinsGCCA']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_lrg_variant'] == 'LRG_93:g.5298_5299delinsGCCA' + assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_transcript_variant'] == 'NM_032790.3:c.125_126delinsGCCA' + assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5298_5299delinsGCCA' + assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775del', 'vcf': {'chr': 'chr12', 'ref': 'GCCCC', 'pos': '122064771', 'alt': 'G'}} + assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626867_121626873delinsGCCA', 'vcf': {'chr': 'chr12', 'ref': 'CCCCGCC', 'pos': '121626867', 'alt': 'GCCA'}} + assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775del', 'vcf': {'chr': '12', 'ref': 'GCCCC', 'pos': '122064771', 'alt': 'G'}} + assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626867_121626873delinsGCCA', 'vcf': {'chr': '12', 'ref': 'CCCCGCC', 'pos': '121626867', 'alt': 'GCCA'}} + assert results['NM_032790.3:c.125_126delinsGCCA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + + def test_variant74(self): + variant = 'NC_000012.11:g.122064772_122064775dup' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032790.3:c.128_129insCCCCGCCACC' in results.keys() + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.128_129insCCCCGCCACC' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCC'}}}] + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro45AlafsTer46)', 'slr': 'NP_116179.2:p.(P45Afs*46)'} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['submitted_variant'] == 'NC_000012.11:g.122064772_122064775dup' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_lrg_variant'] == 'LRG_93:g.5301_5302insCCCCGCCACC' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_transcript_variant'] == 'NM_032790.3:c.128_129insCCCCGCCACC' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5301_5302insCCCCGCCACC' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775dup', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCC'}} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCCCGCCACC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCCCCCGCCA'}} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775dup', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCC'}} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCCCGCCACC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCCCCCGCCA'}} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + + def test_variant75(self): + variant = 'NC_000012.11:g.122064773_122064774insTTTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_032790.3:c.126_127insTTTTCCGCCA' in results.keys() + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_127insTTTTCCGCCA' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302871', 'alt': 'CTTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302871', 'alt': 'CTTTT'}}}] + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43PhefsTer48)', 'slr': 'NP_116179.2:p.(P43Ffs*48)'} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['submitted_variant'] == 'NC_000012.11:g.122064773_122064774insTTTT' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_lrg_variant'] == 'LRG_93:g.5299_5300insTTTTCCGCCA' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_transcript_variant'] == 'NM_032790.3:c.126_127insTTTTCCGCCA' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299_5300insTTTTCCGCCA' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064774insTTTT', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064773', 'alt': 'CTTTT'}} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insTTTTCCGCCA', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CTTTTCCGCCA'}} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064774insTTTT', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064773', 'alt': 'CTTTT'}} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insTTTTCCGCCA', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CTTTTCCGCCA'}} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant76(self): + variant = 'NC_000012.11:g.122064772_122064777del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_032790.3:c.126C>A' in results.keys() + assert results['NM_032790.3:c.126C>A']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126C>A' + assert results['NM_032790.3:c.126C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GCCCCGC', 'pos': '302869', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GCCCCGC', 'pos': '302869', 'alt': 'G'}}}] + assert results['NM_032790.3:c.126C>A']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.126C>A']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.126C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42=)', 'slr': 'NP_116179.2:p.(A42=)'} + assert results['NM_032790.3:c.126C>A']['submitted_variant'] == 'NC_000012.11:g.122064772_122064777del' + assert results['NM_032790.3:c.126C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126C>A']['hgvs_lrg_variant'] == 'LRG_93:g.5299C>A' + assert results['NM_032790.3:c.126C>A']['hgvs_transcript_variant'] == 'NM_032790.3:c.126C>A' + assert results['NM_032790.3:c.126C>A']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299C>A' + assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': 'chr12', 'ref': 'GCCCCGC', 'pos': '122064771', 'alt': 'G'}} + assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': 'chr12', 'ref': u'C', 'pos': '121626873', 'alt': u'A'}} + assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': '12', 'ref': 'GCCCCGC', 'pos': '122064771', 'alt': 'G'}} + assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': '12', 'ref': u'C', 'pos': '121626873', 'alt': u'A'}} + assert results['NM_032790.3:c.126C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant77(self): + variant = 'NC_000012.11:g.122064772_122064777dup' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032790.3:c.131_132insCCCGCCACCGCC' in results.keys() + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.131_132insCCCGCCACCGCC' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCCGC'}}}] + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44_Pro47dup)', 'slr': 'NP_116179.2:p.(P44_P47dup)'} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['submitted_variant'] == 'NC_000012.11:g.122064772_122064777dup' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_lrg_variant'] == 'LRG_93:g.5304_5305insCCCGCCACCGCC' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_transcript_variant'] == 'NM_032790.3:c.131_132insCCCGCCACCGCC' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5304_5305insCCCGCCACCGCC' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778dup', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCCGC'}} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGCCCCCGCCA'}} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778dup', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCCGC'}} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGCCCCCGCCA'}} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + + def test_variant78(self): + variant = 'NC_000012.11:g.122064779_122064782dup' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_032790.3:c.135_136insACCGCCACCG' in results.keys() + assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.135_136insACCGCCACCG' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302876', 'alt': 'CACCG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302876', 'alt': 'CACCG'}}}] + assert results['NM_032790.3:c.135_136insACCGCCACCG']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46ThrfsTer45)', 'slr': 'NP_116179.2:p.(P46Tfs*45)'} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['submitted_variant'] == 'NC_000012.11:g.122064779_122064782dup' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5308_5309insACCGCCACCG' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.135_136insACCGCCACCG' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5308_5309insACCGCCACCG' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064779_122064782dup', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064778', 'alt': 'CACCG'}} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626882_121626883insACCGCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGCCACCGA'}} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064779_122064782dup', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064778', 'alt': 'CACCG'}} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626882_121626883insACCGCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGCCACCGA'}} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant79(self): + variant = 'NC_000012.11:g.122064772_122064782del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032790.3:c.126_127insA' in results.keys() + assert results['NM_032790.3:c.126_127insA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_127insA' + assert results['NM_032790.3:c.126_127insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126_127insA']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GGCCCC', 'pos': '302868', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GGCCCC', 'pos': '302868', 'alt': 'G'}}}] + assert results['NM_032790.3:c.126_127insA']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + assert results['NM_032790.3:c.126_127insA']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.126_127insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43ThrfsTer45)', 'slr': 'NP_116179.2:p.(P43Tfs*45)'} + assert results['NM_032790.3:c.126_127insA']['submitted_variant'] == 'NC_000012.11:g.122064772_122064782del' + assert results['NM_032790.3:c.126_127insA']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126_127insA']['hgvs_lrg_variant'] == 'LRG_93:g.5299_5300insA' + assert results['NM_032790.3:c.126_127insA']['hgvs_transcript_variant'] == 'NM_032790.3:c.126_127insA' + assert results['NM_032790.3:c.126_127insA']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299_5300insA' + assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064774_122064778del', 'vcf': {'chr': 'chr12', 'ref': 'GGCCCC', 'pos': '122064770', 'alt': 'G'}} + assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insA', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CA'}} + assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064774_122064778del', 'vcf': {'chr': '12', 'ref': 'GGCCCC', 'pos': '122064770', 'alt': 'G'}} + assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insA', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CA'}} + assert results['NM_032790.3:c.126_127insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} + + + def test_variant80(self): + variant = 'NC_000002.11:g.95847041_95847043GCG=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_021088.3:c.471_473dup' in results.keys() + assert results['NM_021088.3:c.471_473dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021088.3:c.471_473dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021088.3:c.471_473dup']['alt_genomic_loci'] == [] + assert results['NM_021088.3:c.471_473dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA' + assert results['NM_021088.3:c.471_473dup']['gene_symbol'] == 'ZNF2' + assert results['NM_021088.3:c.471_473dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066574.2:p.(Arg159dup)', 'slr': 'NP_066574.2:p.(R159dup)'} + assert results['NM_021088.3:c.471_473dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' + assert results['NM_021088.3:c.471_473dup']['genome_context_intronic_sequence'] == '' + assert results['NM_021088.3:c.471_473dup']['hgvs_lrg_variant'] == '' + assert results['NM_021088.3:c.471_473dup']['hgvs_transcript_variant'] == 'NM_021088.3:c.471_473dup' + assert results['NM_021088.3:c.471_473dup']['hgvs_refseqgene_variant'] == 'NG_033798.1:g.20883_20885dup' + assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} + assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} + assert results['NM_021088.3:c.471_473dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033798.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.3'} + + assert 'NM_001291605.1:c.510_512dup' in results.keys() + assert results['NM_001291605.1:c.510_512dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291605.1:c.510_512dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001291605.1:c.510_512dup']['alt_genomic_loci'] == [] + assert results['NM_001291605.1:c.510_512dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 5, mRNA' + assert results['NM_001291605.1:c.510_512dup']['gene_symbol'] == 'ZNF2' + assert results['NM_001291605.1:c.510_512dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278534.1:p.(Arg172dup)', 'slr': 'NP_001278534.1:p.(R172dup)'} + assert results['NM_001291605.1:c.510_512dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' + assert results['NM_001291605.1:c.510_512dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001291605.1:c.510_512dup']['hgvs_lrg_variant'] == '' + assert results['NM_001291605.1:c.510_512dup']['hgvs_transcript_variant'] == 'NM_001291605.1:c.510_512dup' + assert results['NM_001291605.1:c.510_512dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} + assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} + assert results['NM_001291605.1:c.510_512dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278534.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291605.1'} + + assert 'NM_001017396.2:c.345_347dup' in results.keys() + assert results['NM_001017396.2:c.345_347dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001017396.2:c.345_347dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001017396.2:c.345_347dup']['alt_genomic_loci'] == [] + assert results['NM_001017396.2:c.345_347dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA' + assert results['NM_001017396.2:c.345_347dup']['gene_symbol'] == 'ZNF2' + assert results['NM_001017396.2:c.345_347dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001017396.1:p.(Arg117dup)', 'slr': 'NP_001017396.1:p.(R117dup)'} + assert results['NM_001017396.2:c.345_347dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' + assert results['NM_001017396.2:c.345_347dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001017396.2:c.345_347dup']['hgvs_lrg_variant'] == '' + assert results['NM_001017396.2:c.345_347dup']['hgvs_transcript_variant'] == 'NM_001017396.2:c.345_347dup' + assert results['NM_001017396.2:c.345_347dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} + assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} + assert results['NM_001017396.2:c.345_347dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.2'} + + assert 'NM_001282398.1:c.357_359dup' in results.keys() + assert results['NM_001282398.1:c.357_359dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001282398.1:c.357_359dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282398.1:c.357_359dup']['alt_genomic_loci'] == [] + assert results['NM_001282398.1:c.357_359dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 3, mRNA' + assert results['NM_001282398.1:c.357_359dup']['gene_symbol'] == 'ZNF2' + assert results['NM_001282398.1:c.357_359dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269327.1:p.(Arg121dup)', 'slr': 'NP_001269327.1:p.(R121dup)'} + assert results['NM_001282398.1:c.357_359dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' + assert results['NM_001282398.1:c.357_359dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001282398.1:c.357_359dup']['hgvs_lrg_variant'] == '' + assert results['NM_001282398.1:c.357_359dup']['hgvs_transcript_variant'] == 'NM_001282398.1:c.357_359dup' + assert results['NM_001282398.1:c.357_359dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} + assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} + assert results['NM_001282398.1:c.357_359dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269327.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282398.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001291604.1:c.231_233dup' in results.keys() + assert results['NM_001291604.1:c.231_233dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291604.1:c.231_233dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001291604.1:c.231_233dup']['alt_genomic_loci'] == [] + assert results['NM_001291604.1:c.231_233dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 4, mRNA' + assert results['NM_001291604.1:c.231_233dup']['gene_symbol'] == 'ZNF2' + assert results['NM_001291604.1:c.231_233dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278533.1:p.(Arg79dup)', 'slr': 'NP_001278533.1:p.(R79dup)'} + assert results['NM_001291604.1:c.231_233dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' + assert results['NM_001291604.1:c.231_233dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001291604.1:c.231_233dup']['hgvs_lrg_variant'] == '' + assert results['NM_001291604.1:c.231_233dup']['hgvs_transcript_variant'] == 'NM_001291604.1:c.231_233dup' + assert results['NM_001291604.1:c.231_233dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} + assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} + assert results['NM_001291604.1:c.231_233dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278533.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291604.1'} + + assert 'NM_021088.2:c.471_473dup' in results.keys() + assert results['NM_021088.2:c.471_473dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021088.2:c.471_473dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021088.2:c.471_473dup']['alt_genomic_loci'] == [] + assert results['NM_021088.2:c.471_473dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA' + assert results['NM_021088.2:c.471_473dup']['gene_symbol'] == 'ZNF2' + assert results['NM_021088.2:c.471_473dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066574.2:p.(Arg159dup)', 'slr': 'NP_066574.2:p.(R159dup)'} + assert results['NM_021088.2:c.471_473dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' + assert results['NM_021088.2:c.471_473dup']['genome_context_intronic_sequence'] == '' + assert results['NM_021088.2:c.471_473dup']['hgvs_lrg_variant'] == '' + assert results['NM_021088.2:c.471_473dup']['hgvs_transcript_variant'] == 'NM_021088.2:c.471_473dup' + assert results['NM_021088.2:c.471_473dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_021088.2:c.471_473dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert 'hg38' not in results['NM_021088.2:c.471_473dup']['primary_assembly_loci'].keys() + assert results['NM_021088.2:c.471_473dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert 'grch38' not in results['NM_021088.2:c.471_473dup']['primary_assembly_loci'].keys() + assert results['NM_021088.2:c.471_473dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.2'} + + assert 'NM_001017396.1:c.345_347dup' in results.keys() + assert results['NM_001017396.1:c.345_347dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001017396.1:c.345_347dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001017396.1:c.345_347dup']['alt_genomic_loci'] == [] + assert results['NM_001017396.1:c.345_347dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA' + assert results['NM_001017396.1:c.345_347dup']['gene_symbol'] == 'ZNF2' + assert results['NM_001017396.1:c.345_347dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001017396.1:p.(Arg117dup)', 'slr': 'NP_001017396.1:p.(R117dup)'} + assert results['NM_001017396.1:c.345_347dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' + assert results['NM_001017396.1:c.345_347dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001017396.1:c.345_347dup']['hgvs_lrg_variant'] == '' + assert results['NM_001017396.1:c.345_347dup']['hgvs_transcript_variant'] == 'NM_001017396.1:c.345_347dup' + assert results['NM_001017396.1:c.345_347dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001017396.1:c.345_347dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert 'hg38' not in results['NM_001017396.1:c.345_347dup']['primary_assembly_loci'].keys() + assert results['NM_001017396.1:c.345_347dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert 'grch38' not in results['NM_001017396.1:c.345_347dup']['primary_assembly_loci'].keys() + assert results['NM_001017396.1:c.345_347dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.1'} + + + def test_variant81(self): + variant = 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001083585.1:c.*344_*368dup' in results.keys() + assert results['NM_001083585.1:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001083585.1:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001083585.1:c.*344_*368dup']['alt_genomic_loci'] == [] + assert results['NM_001083585.1:c.*344_*368dup']['transcript_description'] == 'Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA' + assert results['NM_001083585.1:c.*344_*368dup']['gene_symbol'] == 'RABEP1' + assert results['NM_001083585.1:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001077054.1:p.?', 'slr': 'NP_001077054.1:p.?'} + assert results['NM_001083585.1:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' + assert results['NM_001083585.1:c.*344_*368dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001083585.1:c.*344_*368dup']['hgvs_lrg_variant'] == '' + assert results['NM_001083585.1:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_001083585.1:c.*344_*368dup' + assert results['NM_001083585.1:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert 'hg38' not in results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci'].keys() + assert results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert 'grch38' not in results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci'].keys() + assert results['NM_001083585.1:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001077054.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001083585.1'} + + assert 'NM_004703.5:c.*344_*368dup' in results.keys() + assert results['NM_004703.5:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004703.5:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004703.5:c.*344_*368dup']['alt_genomic_loci'] == [] + assert results['NM_004703.5:c.*344_*368dup']['transcript_description'] == 'Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA' + assert results['NM_004703.5:c.*344_*368dup']['gene_symbol'] == 'RABEP1' + assert results['NM_004703.5:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004694.2:p.?', 'slr': 'NP_004694.2:p.?'} + assert results['NM_004703.5:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' + assert results['NM_004703.5:c.*344_*368dup']['genome_context_intronic_sequence'] == '' + assert results['NM_004703.5:c.*344_*368dup']['hgvs_lrg_variant'] == '' + assert results['NM_004703.5:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_004703.5:c.*344_*368dup' + assert results['NM_004703.5:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_004703.5:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.5'} + + assert 'NM_004703.4:c.*344_*368dup' in results.keys() + assert results['NM_004703.4:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004703.4:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004703.4:c.*344_*368dup']['alt_genomic_loci'] == [] + assert results['NM_004703.4:c.*344_*368dup']['transcript_description'] == 'Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA' + assert results['NM_004703.4:c.*344_*368dup']['gene_symbol'] == 'RABEP1' + assert results['NM_004703.4:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004694.2:p.?', 'slr': 'NP_004694.2:p.?'} + assert results['NM_004703.4:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' + assert results['NM_004703.4:c.*344_*368dup']['genome_context_intronic_sequence'] == '' + assert results['NM_004703.4:c.*344_*368dup']['hgvs_lrg_variant'] == '' + assert results['NM_004703.4:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_004703.4:c.*344_*368dup' + assert results['NM_004703.4:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert 'hg38' not in results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci'].keys() + assert results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert 'grch38' not in results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci'].keys() + assert results['NM_004703.4:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.4'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001291581.1:c.*344_*368dup' in results.keys() + assert results['NM_001291581.1:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291581.1:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001291581.1:c.*344_*368dup']['alt_genomic_loci'] == [] + assert results['NM_001291581.1:c.*344_*368dup']['transcript_description'] == 'Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 3, mRNA' + assert results['NM_001291581.1:c.*344_*368dup']['gene_symbol'] == 'RABEP1' + assert results['NM_001291581.1:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278510.1:p.?', 'slr': 'NP_001278510.1:p.?'} + assert results['NM_001291581.1:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' + assert results['NM_001291581.1:c.*344_*368dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001291581.1:c.*344_*368dup']['hgvs_lrg_variant'] == '' + assert results['NM_001291581.1:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_001291581.1:c.*344_*368dup' + assert results['NM_001291581.1:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_001291581.1:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278510.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291581.1'} + + assert 'NM_001083585.2:c.*344_*368dup' in results.keys() + assert results['NM_001083585.2:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001083585.2:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001083585.2:c.*344_*368dup']['alt_genomic_loci'] == [] + assert results['NM_001083585.2:c.*344_*368dup']['transcript_description'] == 'Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA' + assert results['NM_001083585.2:c.*344_*368dup']['gene_symbol'] == 'RABEP1' + assert results['NM_001083585.2:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001077054.1:p.?', 'slr': 'NP_001077054.1:p.?'} + assert results['NM_001083585.2:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' + assert results['NM_001083585.2:c.*344_*368dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001083585.2:c.*344_*368dup']['hgvs_lrg_variant'] == '' + assert results['NM_001083585.2:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_001083585.2:c.*344_*368dup' + assert results['NM_001083585.2:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_001083585.2:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001077054.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001083585.2'} + + + def test_variant82(self): + variant = 'NC_000003.11:g.14561629_14561630GC=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001080423.3:c.1020del' in results.keys() + assert results['NM_001080423.3:c.1020del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.3:c.1020del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1020del']['alt_genomic_loci'] == [] + assert results['NM_001080423.3:c.1020del']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + assert results['NM_001080423.3:c.1020del']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.3:c.1020del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Ser341GlnfsTer4)', 'slr': 'NP_001073892.3:p.(S341Qfs*4)'} + assert results['NM_001080423.3:c.1020del']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630GC=' + assert results['NM_001080423.3:c.1020del']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1020del']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.3:c.1020del']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1020del' + assert results['NM_001080423.3:c.1020del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '14520119', 'alt': 'A'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '14520119', 'alt': 'A'}} + assert results['NM_001080423.3:c.1020del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001080423.2:c.1311del' in results.keys() + assert results['NM_001080423.2:c.1311del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.2:c.1311del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1311del']['alt_genomic_loci'] == [] + assert results['NM_001080423.2:c.1311del']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + assert results['NM_001080423.2:c.1311del']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.2:c.1311del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Ser438GlnfsTer4)', 'slr': 'NP_001073892.2:p.(S438Qfs*4)'} + assert results['NM_001080423.2:c.1311del']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630GC=' + assert results['NM_001080423.2:c.1311del']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1311del']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.2:c.1311del']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1311del' + assert results['NM_001080423.2:c.1311del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert 'hg38' not in results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys() + assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert 'grch38' not in results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys() + assert results['NM_001080423.2:c.1311del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} + + + def test_variant83(self): + variant = 'NC_000003.11:g.14561629_14561630insG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001080423.3:c.1016_1020=' in results.keys() + assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.3:c.1016_1020=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'] == [] + assert results['NM_001080423.3:c.1016_1020=']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + assert results['NM_001080423.3:c.1016_1020=']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Arg339=)', 'slr': 'NP_001073892.3:p.(R339=)'} + assert results['NM_001080423.3:c.1016_1020=']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630insG' + assert results['NM_001080423.3:c.1016_1020=']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1016_1020=' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': 'chr3', 'ref': u'GGGCC', 'pos': '14520120', 'alt': u'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': '3', 'ref': u'GGGCC', 'pos': '14520120', 'alt': u'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001080423.2:c.1307_1311=' in results.keys() + assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.2:c.1307_1311=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'] == [] + assert results['NM_001080423.2:c.1307_1311=']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + assert results['NM_001080423.2:c.1307_1311=']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.2:c.1307_1311=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Arg436=)', 'slr': 'NP_001073892.2:p.(R436=)'} + assert results['NM_001080423.2:c.1307_1311=']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630insG' + assert results['NM_001080423.2:c.1307_1311=']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.2:c.1307_1311=']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1307_1311=' + assert results['NM_001080423.2:c.1307_1311=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert 'hg38' not in results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys() + assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert 'grch38' not in results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys() + assert results['NM_001080423.2:c.1307_1311=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} + + + def test_variant84(self): + variant = 'NC_000004.11:g.140811111_140811122del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_018717.5:c.1515_1526del' in results.keys() + assert results['NM_018717.5:c.1515_1526del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.5:c.1515_1526del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1515_1526del']['alt_genomic_loci'] == [] + assert results['NM_018717.5:c.1515_1526del']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + assert results['NM_018717.5:c.1515_1526del']['gene_symbol'] == 'MAML3' + assert results['NM_018717.5:c.1515_1526del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln507_Gln510del)', 'slr': 'NP_061187.3:p.(Q507_Q510del)'} + assert results['NM_018717.5:c.1515_1526del']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122del' + assert results['NM_018717.5:c.1515_1526del']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1515_1526del']['hgvs_lrg_variant'] == '' + assert results['NM_018717.5:c.1515_1526del']['hgvs_transcript_variant'] == 'NM_018717.5:c.1515_1526del' + assert results['NM_018717.5:c.1515_1526del']['hgvs_refseqgene_variant'] == '' + assert results['NM_018717.5:c.1515_1526del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGCTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert 'hg38' not in results['NM_018717.5:c.1515_1526del']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1515_1526del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': '4', 'ref': 'TTGCTGCTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert 'grch38' not in results['NM_018717.5:c.1515_1526del']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1515_1526del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} + + assert 'NM_018717.4:c.1465_1469=' in results.keys() + assert results['NM_018717.4:c.1465_1469=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.4:c.1465_1469=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1465_1469=']['alt_genomic_loci'] == [] + assert results['NM_018717.4:c.1465_1469=']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + assert results['NM_018717.4:c.1465_1469=']['gene_symbol'] == 'MAML3' + assert results['NM_018717.4:c.1465_1469=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln489=)', 'slr': 'NP_061187.2:p.(Q489=)'} + assert results['NM_018717.4:c.1465_1469=']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122del' + assert results['NM_018717.4:c.1465_1469=']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1465_1469=']['hgvs_lrg_variant'] == '' + assert results['NM_018717.4:c.1465_1469=']['hgvs_transcript_variant'] == 'NM_018717.4:c.1465_1469=' + assert results['NM_018717.4:c.1465_1469=']['hgvs_refseqgene_variant'] == '' + assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGCTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889968del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGCTGCTGC', 'pos': '139889909', 'alt': 'T'}} + assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': '4', 'ref': 'TTGCTGCTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889968del', 'vcf': {'chr': '4', 'ref': 'TTGCTGCTGCTGC', 'pos': '139889909', 'alt': 'T'}} + assert results['NM_018717.4:c.1465_1469=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4'} + + + def test_variant85(self): + variant = 'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_018717.5:c.1468_1479=' in results.keys() + assert results['NM_018717.5:c.1468_1479=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.5:c.1468_1479=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1468_1479=']['alt_genomic_loci'] == [] + assert results['NM_018717.5:c.1468_1479=']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + assert results['NM_018717.5:c.1468_1479=']['gene_symbol'] == 'MAML3' + assert results['NM_018717.5:c.1468_1479=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln490=)', 'slr': 'NP_061187.3:p.(Q490=)'} + assert results['NM_018717.5:c.1468_1479=']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' + assert results['NM_018717.5:c.1468_1479=']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1468_1479=']['hgvs_lrg_variant'] == '' + assert results['NM_018717.5:c.1468_1479=']['hgvs_transcript_variant'] == 'NM_018717.5:c.1468_1479=' + assert results['NM_018717.5:c.1468_1479=']['hgvs_refseqgene_variant'] == '' + assert results['NM_018717.5:c.1468_1479=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122=', 'vcf': {'chr': 'chr4', 'ref': u'CTGCTGCTGCTG', 'pos': '140811111', 'alt': u'CTGCTGCTGCTG'}} + assert 'hg38' not in results['NM_018717.5:c.1468_1479=']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1468_1479=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122=', 'vcf': {'chr': '4', 'ref': u'CTGCTGCTGCTG', 'pos': '140811111', 'alt': u'CTGCTGCTGCTG'}} + assert 'grch38' not in results['NM_018717.5:c.1468_1479=']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1468_1479=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} + + assert results['flag'] == 'gene_variant' + assert 'NM_018717.4:c.1503_1514dup' in results.keys() + assert results['NM_018717.4:c.1503_1514dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.4:c.1503_1514dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1503_1514dup']['alt_genomic_loci'] == [] + assert results['NM_018717.4:c.1503_1514dup']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + assert results['NM_018717.4:c.1503_1514dup']['gene_symbol'] == 'MAML3' + assert results['NM_018717.4:c.1503_1514dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln503_Gln506dup)', 'slr': 'NP_061187.2:p.(Q503_Q506dup)'} + assert results['NM_018717.4:c.1503_1514dup']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' + assert results['NM_018717.4:c.1503_1514dup']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1503_1514dup']['hgvs_lrg_variant'] == '' + assert results['NM_018717.4:c.1503_1514dup']['hgvs_transcript_variant'] == 'NM_018717.4:c.1503_1514dup' + assert results['NM_018717.4:c.1503_1514dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811095_140811128=', 'vcf': {'chr': 'chr4', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'pos': '140811095', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} + assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889941_139889974=', 'vcf': {'chr': 'chr4', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'pos': '139889941', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} + assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811095_140811128=', 'vcf': {'chr': '4', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'pos': '140811095', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} + assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889941_139889974=', 'vcf': {'chr': '4', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'pos': '139889941', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} + assert results['NM_018717.4:c.1503_1514dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4'} + + + def test_variant86(self): + variant = 'NC_000004.11:g.140811117_140811122del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_018717.5:c.1521_1526del' in results.keys() + assert results['NM_018717.5:c.1521_1526del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.5:c.1521_1526del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1521_1526del']['alt_genomic_loci'] == [] + assert results['NM_018717.5:c.1521_1526del']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + assert results['NM_018717.5:c.1521_1526del']['gene_symbol'] == 'MAML3' + assert results['NM_018717.5:c.1521_1526del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln509_Gln510del)', 'slr': 'NP_061187.3:p.(Q509_Q510del)'} + assert results['NM_018717.5:c.1521_1526del']['submitted_variant'] == 'NC_000004.11:g.140811117_140811122del' + assert results['NM_018717.5:c.1521_1526del']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1521_1526del']['hgvs_lrg_variant'] == '' + assert results['NM_018717.5:c.1521_1526del']['hgvs_transcript_variant'] == 'NM_018717.5:c.1521_1526del' + assert results['NM_018717.5:c.1521_1526del']['hgvs_refseqgene_variant'] == '' + assert results['NM_018717.5:c.1521_1526del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert 'hg38' not in results['NM_018717.5:c.1521_1526del']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1521_1526del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': '4', 'ref': 'TTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert 'grch38' not in results['NM_018717.5:c.1521_1526del']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1521_1526del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} + + assert 'NM_018717.4:c.1509_1514dup' in results.keys() + assert results['NM_018717.4:c.1509_1514dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.4:c.1509_1514dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1509_1514dup']['alt_genomic_loci'] == [] + assert results['NM_018717.4:c.1509_1514dup']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + assert results['NM_018717.4:c.1509_1514dup']['gene_symbol'] == 'MAML3' + assert results['NM_018717.4:c.1509_1514dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln505_Gln506dup)', 'slr': 'NP_061187.2:p.(Q505_Q506dup)'} + assert results['NM_018717.4:c.1509_1514dup']['submitted_variant'] == 'NC_000004.11:g.140811117_140811122del' + assert results['NM_018717.4:c.1509_1514dup']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1509_1514dup']['hgvs_lrg_variant'] == '' + assert results['NM_018717.4:c.1509_1514dup']['hgvs_transcript_variant'] == 'NM_018717.4:c.1509_1514dup' + assert results['NM_018717.4:c.1509_1514dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963_139889968del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGC', 'pos': '139889909', 'alt': 'T'}} + assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': '4', 'ref': 'TTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963_139889968del', 'vcf': {'chr': '4', 'ref': 'TTGCTGC', 'pos': '139889909', 'alt': 'T'}} + assert results['NM_018717.4:c.1509_1514dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4'} + + + def test_variant87(self): + variant = 'NC_000004.11:g.140811111_140811117del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_018717.5:c.1473_1479del' in results.keys() + assert results['NM_018717.5:c.1473_1479del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.5:c.1473_1479del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1473_1479del']['alt_genomic_loci'] == [] + assert results['NM_018717.5:c.1473_1479del']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + assert results['NM_018717.5:c.1473_1479del']['gene_symbol'] == 'MAML3' + assert results['NM_018717.5:c.1473_1479del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln491HisfsTer29)', 'slr': 'NP_061187.3:p.(Q491Hfs*29)'} + assert results['NM_018717.5:c.1473_1479del']['submitted_variant'] == 'NC_000004.11:g.140811111_140811117del' + assert results['NM_018717.5:c.1473_1479del']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1473_1479del']['hgvs_lrg_variant'] == '' + assert results['NM_018717.5:c.1473_1479del']['hgvs_transcript_variant'] == 'NM_018717.5:c.1473_1479del' + assert results['NM_018717.5:c.1473_1479del']['hgvs_refseqgene_variant'] == '' + assert results['NM_018717.5:c.1473_1479del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': 'chr4', 'ref': 'GCTGCTGC', 'pos': '140811110', 'alt': 'G'}} + assert 'hg38' not in results['NM_018717.5:c.1473_1479del']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1473_1479del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': '4', 'ref': 'GCTGCTGC', 'pos': '140811110', 'alt': 'G'}} + assert 'grch38' not in results['NM_018717.5:c.1473_1479del']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1473_1479del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} + + assert results['flag'] == 'gene_variant' + assert 'NM_018717.4:c.1468_1472dup' in results.keys() + assert results['NM_018717.4:c.1468_1472dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.4:c.1468_1472dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1468_1472dup']['alt_genomic_loci'] == [] + assert results['NM_018717.4:c.1468_1472dup']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + assert results['NM_018717.4:c.1468_1472dup']['gene_symbol'] == 'MAML3' + assert results['NM_018717.4:c.1468_1472dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln491HisfsTer29)', 'slr': 'NP_061187.2:p.(Q491Hfs*29)'} + assert results['NM_018717.4:c.1468_1472dup']['submitted_variant'] == 'NC_000004.11:g.140811111_140811117del' + assert results['NM_018717.4:c.1468_1472dup']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1468_1472dup']['hgvs_lrg_variant'] == '' + assert results['NM_018717.4:c.1468_1472dup']['hgvs_transcript_variant'] == 'NM_018717.4:c.1468_1472dup' + assert results['NM_018717.4:c.1468_1472dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': 'chr4', 'ref': 'GCTGCTGC', 'pos': '140811110', 'alt': 'G'}} + assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889963del', 'vcf': {'chr': 'chr4', 'ref': 'GCTGCTGC', 'pos': '139889956', 'alt': 'G'}} + assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': '4', 'ref': 'GCTGCTGC', 'pos': '140811110', 'alt': 'G'}} + assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889963del', 'vcf': {'chr': '4', 'ref': 'GCTGCTGC', 'pos': '139889956', 'alt': 'G'}} + assert results['NM_018717.4:c.1468_1472dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4'} + + + def test_variant88(self): + variant = 'NC_000004.11:g.140811117C>A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' in results.keys() + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['alt_genomic_loci'] == [] + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['gene_symbol'] == 'MAML3' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln490_Gln491insHisGlnGlnGln)', 'slr': 'NP_061187.2:p.(Q490_Q491insHQQQ)'} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['submitted_variant'] == 'NC_000004.11:g.140811117C>A' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_lrg_variant'] == '' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_transcript_variant'] == 'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_refseqgene_variant'] == '' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '140811117', 'alt': u'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963C>A', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '139889963', 'alt': u'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '140811117', 'alt': u'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963C>A', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '139889963', 'alt': u'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4'} + + assert 'NM_018717.5:c.1473G>T' in results.keys() + assert results['NM_018717.5:c.1473G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.5:c.1473G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1473G>T']['alt_genomic_loci'] == [] + assert results['NM_018717.5:c.1473G>T']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + assert results['NM_018717.5:c.1473G>T']['gene_symbol'] == 'MAML3' + assert results['NM_018717.5:c.1473G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln491His)', 'slr': 'NP_061187.3:p.(Q491H)'} + assert results['NM_018717.5:c.1473G>T']['submitted_variant'] == 'NC_000004.11:g.140811117C>A' + assert results['NM_018717.5:c.1473G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1473G>T']['hgvs_lrg_variant'] == '' + assert results['NM_018717.5:c.1473G>T']['hgvs_transcript_variant'] == 'NM_018717.5:c.1473G>T' + assert results['NM_018717.5:c.1473G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_018717.5:c.1473G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': 'chr4', 'ref': u'C', 'pos': '140811117', 'alt': u'A'}} + assert 'hg38' not in results['NM_018717.5:c.1473G>T']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1473G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': '4', 'ref': u'C', 'pos': '140811117', 'alt': u'A'}} + assert 'grch38' not in results['NM_018717.5:c.1473G>T']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1473G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} + + + def test_variant89(self): + variant = 'NC_000002.11:g.73675227_73675228insCTC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_015120.4:c.1573_1579=' in results.keys() + assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1573_1579=' + assert results['NM_015120.4:c.1573_1579=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'] == [] + assert results['NM_015120.4:c.1573_1579=']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + assert results['NM_015120.4:c.1573_1579=']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.1573_1579=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Ser525=)', 'slr': 'NP_055935.4:p.(S525=)'} + assert results['NM_015120.4:c.1573_1579=']['submitted_variant'] == 'NC_000002.11:g.73675227_73675228insCTC' + assert results['NM_015120.4:c.1573_1579=']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_variant'] == 'LRG_741:g.67345_67351=' + assert results['NM_015120.4:c.1573_1579=']['hgvs_transcript_variant'] == 'NM_015120.4:c.1573_1579=' + assert results['NM_015120.4:c.1573_1579=']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.67345_67351=' + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': 'chr2', 'ref': u'TCTCCTC', 'pos': '73448097', 'alt': u'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': '2', 'ref': u'TCTCCTC', 'pos': '73448097', 'alt': u'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant90(self): + variant = '9-136132908-T-TC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_020469.2:c.260_262=' in results.keys() + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.260_262=' + assert results['NM_020469.2:c.260_262=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.260_262=']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}] + assert results['NM_020469.2:c.260_262=']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + assert results['NM_020469.2:c.260_262=']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.260_262=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87=)', 'slr': 'NP_065202.2:p.(V87=)'} + assert results['NM_020469.2:c.260_262=']['submitted_variant'] == '9-136132908-T-TC' + assert results['NM_020469.2:c.260_262=']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == 'LRG_792:g.20145_20147=' + assert results['NM_020469.2:c.260_262=']['hgvs_transcript_variant'] == 'NM_020469.2:c.260_262=' + assert results['NM_020469.2:c.260_262=']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20145_20147=' + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': u'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': u'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': u'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': u'TC'}} + assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + + + def test_variant91(self): + variant = '9-136132908-TAC-TCA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_020469.2:c.259del' in results.keys() + assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.259del' + assert results['NM_020469.2:c.259del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.259del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}] + assert results['NM_020469.2:c.259del']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + assert results['NM_020469.2:c.259del']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.259del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87Ter)', 'slr': 'NP_065202.2:p.(V87*)'} + assert results['NM_020469.2:c.259del']['submitted_variant'] == '9-136132908-TAC-TCA' + assert results['NM_020469.2:c.259del']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == 'LRG_792:g.20144del' + assert results['NM_020469.2:c.259del']['hgvs_transcript_variant'] == 'NM_020469.2:c.259del' + assert results['NM_020469.2:c.259del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20144del' + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '136132909', 'alt': u'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '133257522', 'alt': u'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '136132909', 'alt': u'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '133257522', 'alt': u'CA'}} + assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + + + def test_variant92(self): + variant = '9-136132908-TA-TA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_020469.2:c.261del' in results.keys() + assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261del' + assert results['NM_020469.2:c.261del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}] + assert results['NM_020469.2:c.261del']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + assert results['NM_020469.2:c.261del']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.261del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)', 'slr': 'NP_065202.2:p.(T88Pfs*31)'} + assert results['NM_020469.2:c.261del']['submitted_variant'] == '9-136132908-TA-TA' + assert results['NM_020469.2:c.261del']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261del']['hgvs_lrg_variant'] == 'LRG_792:g.20146del' + assert results['NM_020469.2:c.261del']['hgvs_transcript_variant'] == 'NM_020469.2:c.261del' + assert results['NM_020469.2:c.261del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146del' + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant93(self): + variant = 'NM_020469.2:c.258delG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_020469.2:c.259del' in results.keys() + assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.259del' + assert results['NM_020469.2:c.259del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.259del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}] + assert results['NM_020469.2:c.259del']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + assert results['NM_020469.2:c.259del']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.259del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87Ter)', 'slr': 'NP_065202.2:p.(V87*)'} + assert results['NM_020469.2:c.259del']['submitted_variant'] == 'NM_020469.2:c.258delG' + assert results['NM_020469.2:c.259del']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == 'LRG_792:g.20144del' + assert results['NM_020469.2:c.259del']['hgvs_transcript_variant'] == 'NM_020469.2:c.259del' + assert results['NM_020469.2:c.259del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20144del' + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '136132909', 'alt': u'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '133257522', 'alt': u'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '136132909', 'alt': u'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '133257522', 'alt': u'CA'}} + assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + + + def test_variant94(self): + variant = 'NM_020469.2:c.260_262TGA=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_020469.2:c.260_262=' in results.keys() + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.260_262=' + assert results['NM_020469.2:c.260_262=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.260_262=']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}] + assert results['NM_020469.2:c.260_262=']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + assert results['NM_020469.2:c.260_262=']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.260_262=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87=)', 'slr': 'NP_065202.2:p.(V87=)'} + assert results['NM_020469.2:c.260_262=']['submitted_variant'] == 'NM_020469.2:c.260_262TGA=' + assert results['NM_020469.2:c.260_262=']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == 'LRG_792:g.20145_20147=' + assert results['NM_020469.2:c.260_262=']['hgvs_transcript_variant'] == 'NM_020469.2:c.260_262=' + assert results['NM_020469.2:c.260_262=']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20145_20147=' + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': u'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': u'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': u'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': u'TC'}} + assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + + + def test_variant95(self): + variant = 'NM_020469.2:c.261delG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_020469.2:c.261del' in results.keys() + assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261del' + assert results['NM_020469.2:c.261del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}] + assert results['NM_020469.2:c.261del']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + assert results['NM_020469.2:c.261del']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.261del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)', 'slr': 'NP_065202.2:p.(T88Pfs*31)'} + assert results['NM_020469.2:c.261del']['submitted_variant'] == 'NM_020469.2:c.261delG' + assert results['NM_020469.2:c.261del']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261del']['hgvs_lrg_variant'] == 'LRG_792:g.20146del' + assert results['NM_020469.2:c.261del']['hgvs_transcript_variant'] == 'NM_020469.2:c.261del' + assert results['NM_020469.2:c.261del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146del' + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant96(self): + variant = 'NM_020469.2:c.261dupG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_020469.2:c.261dup' in results.keys() + assert results['NM_020469.2:c.261dup']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261dup' + assert results['NM_020469.2:c.261dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261dup']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}] + assert results['NM_020469.2:c.261dup']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + assert results['NM_020469.2:c.261dup']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.261dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88AspfsTer107)', 'slr': 'NP_065202.2:p.(T88Dfs*107)'} + assert results['NM_020469.2:c.261dup']['submitted_variant'] == 'NM_020469.2:c.261dupG' + assert results['NM_020469.2:c.261dup']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261dup']['hgvs_lrg_variant'] == 'LRG_792:g.20146dup' + assert results['NM_020469.2:c.261dup']['hgvs_transcript_variant'] == 'NM_020469.2:c.261dup' + assert results['NM_020469.2:c.261dup']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146dup' + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': u'TCC'}} + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': u'TCC'}} + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': u'TCC'}} + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': u'TCC'}} + assert results['NM_020469.2:c.261dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + + + def test_variant97(self): + variant = 'NM_020469.2:c.261_262insTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_020469.2:c.261_262insTT' in results.keys() + assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261_262insTT' + assert results['NM_020469.2:c.261_262insTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261_262insTT']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'T', 'pos': '83614', 'alt': u'TAA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'T', 'pos': '83614', 'alt': u'TAA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'T', 'pos': '83614', 'alt': u'TAA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'T', 'pos': '83614', 'alt': u'TAA'}}}] + assert results['NM_020469.2:c.261_262insTT']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + assert results['NM_020469.2:c.261_262insTT']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.261_262insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88LeufsTer32)', 'slr': 'NP_065202.2:p.(T88Lfs*32)'} + assert results['NM_020469.2:c.261_262insTT']['submitted_variant'] == 'NM_020469.2:c.261_262insTT' + assert results['NM_020469.2:c.261_262insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_variant'] == 'LRG_792:g.20146_20147insTT' + assert results['NM_020469.2:c.261_262insTT']['hgvs_transcript_variant'] == 'NM_020469.2:c.261_262insTT' + assert results['NM_020469.2:c.261_262insTT']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146_20147insTT' + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': u'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': u'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': u'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': u'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + + + def test_variant98(self): + variant = 'NC_000019.10:g.50378563_50378564insTAC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_007121.5:c.515A>T' in results.keys() + assert results['NM_007121.5:c.515A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.515A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.515A>T']['alt_genomic_loci'] == [] + assert results['NM_007121.5:c.515A>T']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + assert results['NM_007121.5:c.515A>T']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.515A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172Ile)', 'slr': 'NP_009052.3:p.(K172I)'} + assert results['NM_007121.5:c.515A>T']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAC' + assert results['NM_007121.5:c.515A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.515A>T']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.515A>T']['hgvs_transcript_variant'] == 'NM_007121.5:c.515A>T' + assert results['NM_007121.5:c.515A>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATAC'}} + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATAC'}} + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATAC'}} + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATAC'}} + assert results['NM_007121.5:c.515A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + + assert 'NM_001256647.1:c.224A>T' in results.keys() + assert results['NM_001256647.1:c.224A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.224A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224A>T']['alt_genomic_loci'] == [] + assert results['NM_001256647.1:c.224A>T']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + assert results['NM_001256647.1:c.224A>T']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.224A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75Ile)', 'slr': 'NP_001243576.1:p.(K75I)'} + assert results['NM_001256647.1:c.224A>T']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAC' + assert results['NM_001256647.1:c.224A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224A>T']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.224A>T']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224A>T' + assert results['NM_001256647.1:c.224A>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} + + + def test_variant99(self): + variant = 'NC_000019.10:g.50378563_50378564insC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_007121.5:c.515_516del' in results.keys() + assert results['NM_007121.5:c.515_516del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.515_516del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.515_516del']['alt_genomic_loci'] == [] + assert results['NM_007121.5:c.515_516del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + assert results['NM_007121.5:c.515_516del']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.515_516del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172ThrfsTer34)', 'slr': 'NP_009052.3:p.(K172Tfs*34)'} + assert results['NM_007121.5:c.515_516del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insC' + assert results['NM_007121.5:c.515_516del']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.515_516del']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.515_516del']['hgvs_transcript_variant'] == 'NM_007121.5:c.515_516del' + assert results['NM_007121.5:c.515_516del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AC'}} + assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AC'}} + assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AC'}} + assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AC'}} + assert results['NM_007121.5:c.515_516del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + + assert 'NM_001256647.1:c.224_225del' in results.keys() + assert results['NM_001256647.1:c.224_225del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.224_225del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224_225del']['alt_genomic_loci'] == [] + assert results['NM_001256647.1:c.224_225del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + assert results['NM_001256647.1:c.224_225del']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.224_225del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75ThrfsTer34)', 'slr': 'NP_001243576.1:p.(K75Tfs*34)'} + assert results['NM_001256647.1:c.224_225del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insC' + assert results['NM_001256647.1:c.224_225del']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224_225del']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.224_225del']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224_225del' + assert results['NM_001256647.1:c.224_225del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AC'}} + assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AC'}} + assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AC'}} + assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AC'}} + assert results['NM_001256647.1:c.224_225del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} + + + def test_variant100(self): + variant = 'NC_000019.10:g.50378564_50378565insTACA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_007121.5:c.515_516insT' in results.keys() + assert results['NM_007121.5:c.515_516insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.515_516insT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.515_516insT']['alt_genomic_loci'] == [] + assert results['NM_007121.5:c.515_516insT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + assert results['NM_007121.5:c.515_516insT']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.515_516insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172AsnfsTer35)', 'slr': 'NP_009052.3:p.(K172Nfs*35)'} + assert results['NM_007121.5:c.515_516insT']['submitted_variant'] == 'NC_000019.10:g.50378564_50378565insTACA' + assert results['NM_007121.5:c.515_516insT']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.515_516insT']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.515_516insT']['hgvs_transcript_variant'] == 'NM_007121.5:c.515_516insT' + assert results['NM_007121.5:c.515_516insT']['hgvs_refseqgene_variant'] == '' + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + + assert 'NM_001256647.1:c.224_225insT' in results.keys() + assert results['NM_001256647.1:c.224_225insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.224_225insT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224_225insT']['alt_genomic_loci'] == [] + assert results['NM_001256647.1:c.224_225insT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + assert results['NM_001256647.1:c.224_225insT']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.224_225insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75AsnfsTer35)', 'slr': 'NP_001243576.1:p.(K75Nfs*35)'} + assert results['NM_001256647.1:c.224_225insT']['submitted_variant'] == 'NC_000019.10:g.50378564_50378565insTACA' + assert results['NM_001256647.1:c.224_225insT']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224_225insT']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.224_225insT']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224_225insT' + assert results['NM_001256647.1:c.224_225insT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} + + + def test_variant101(self): + variant = 'NC_000019.10:g.50378565_50378567dup' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_007121.5:c.514_520=' in results.keys() + assert results['NM_007121.5:c.514_520=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.514_520=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.514_520=']['alt_genomic_loci'] == [] + assert results['NM_007121.5:c.514_520=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + assert results['NM_007121.5:c.514_520=']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.514_520=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172=)', 'slr': 'NP_009052.3:p.(K172=)'} + assert results['NM_007121.5:c.514_520=']['submitted_variant'] == 'NC_000019.10:g.50378565_50378567dup' + assert results['NM_007121.5:c.514_520=']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.514_520=']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.514_520=']['hgvs_transcript_variant'] == 'NM_007121.5:c.514_520=' + assert results['NM_007121.5:c.514_520=']['hgvs_refseqgene_variant'] == '' + assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AAAC'}} + assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AAAC'}} + assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AAAC'}} + assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AAAC'}} + assert results['NM_007121.5:c.514_520=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + + assert 'NM_001256647.1:c.223_229=' in results.keys() + assert results['NM_001256647.1:c.223_229=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.223_229=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.223_229=']['alt_genomic_loci'] == [] + assert results['NM_001256647.1:c.223_229=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + assert results['NM_001256647.1:c.223_229=']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.223_229=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75=)', 'slr': 'NP_001243576.1:p.(K75=)'} + assert results['NM_001256647.1:c.223_229=']['submitted_variant'] == 'NC_000019.10:g.50378565_50378567dup' + assert results['NM_001256647.1:c.223_229=']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.223_229=']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.223_229=']['hgvs_transcript_variant'] == 'NM_001256647.1:c.223_229=' + assert results['NM_001256647.1:c.223_229=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AAAC'}} + assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AAAC'}} + assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AAAC'}} + assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AAAC'}} + assert results['NM_001256647.1:c.223_229=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} + + assert results['flag'] == 'gene_variant' + + def test_variant102(self): + variant = 'NC_000019.10:g.50378563_50378564=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_007121.5:c.519_521del' in results.keys() + assert results['NM_007121.5:c.519_521del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.519_521del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.519_521del']['alt_genomic_loci'] == [] + assert results['NM_007121.5:c.519_521del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + assert results['NM_007121.5:c.519_521del']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.519_521del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Gln176del)', 'slr': 'NP_009052.3:p.(Q176del)'} + assert results['NM_007121.5:c.519_521del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564=' + assert results['NM_007121.5:c.519_521del']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.519_521del']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.519_521del']['hgvs_transcript_variant'] == 'NM_007121.5:c.519_521del' + assert results['NM_007121.5:c.519_521del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': 'chr19', 'ref': 'AA', 'pos': '50881820', 'alt': 'AA'}} + assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': 'chr19', 'ref': 'AA', 'pos': '50378563', 'alt': 'AA'}} + assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': '19', 'ref': 'AA', 'pos': '50881820', 'alt': 'AA'}} + assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': '19', 'ref': 'AA', 'pos': '50378563', 'alt': 'AA'}} + assert results['NM_007121.5:c.519_521del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + + assert 'NM_001256647.1:c.228_230del' in results.keys() + assert results['NM_001256647.1:c.228_230del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.228_230del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.228_230del']['alt_genomic_loci'] == [] + assert results['NM_001256647.1:c.228_230del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + assert results['NM_001256647.1:c.228_230del']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.228_230del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Gln79del)', 'slr': 'NP_001243576.1:p.(Q79del)'} + assert results['NM_001256647.1:c.228_230del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564=' + assert results['NM_001256647.1:c.228_230del']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.228_230del']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.228_230del']['hgvs_transcript_variant'] == 'NM_001256647.1:c.228_230del' + assert results['NM_001256647.1:c.228_230del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': 'chr19', 'ref': 'AA', 'pos': '50881820', 'alt': 'AA'}} + assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': 'chr19', 'ref': 'AA', 'pos': '50378563', 'alt': 'AA'}} + assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': '19', 'ref': 'AA', 'pos': '50881820', 'alt': 'AA'}} + assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': '19', 'ref': 'AA', 'pos': '50378563', 'alt': 'AA'}} + assert results['NM_001256647.1:c.228_230del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} + + + def test_variant103(self): + variant = 'NC_000019.10:g.50378563_50378564insTCGG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001256647.1:c.224_226delinsTCGG' in results.keys() + assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.224_226delinsTCGG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224_226delinsTCGG']['alt_genomic_loci'] == [] + assert results['NM_001256647.1:c.224_226delinsTCGG']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + assert results['NM_001256647.1:c.224_226delinsTCGG']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75IlefsTer35)', 'slr': 'NP_001243576.1:p.(K75Ifs*35)'} + assert results['NM_001256647.1:c.224_226delinsTCGG']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTCGG' + assert results['NM_001256647.1:c.224_226delinsTCGG']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224_226delinsTCGG' + assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_007121.5:c.515_517delinsTCGG' in results.keys() + assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.515_517delinsTCGG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.515_517delinsTCGG']['alt_genomic_loci'] == [] + assert results['NM_007121.5:c.515_517delinsTCGG']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + assert results['NM_007121.5:c.515_517delinsTCGG']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172IlefsTer35)', 'slr': 'NP_009052.3:p.(K172Ifs*35)'} + assert results['NM_007121.5:c.515_517delinsTCGG']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTCGG' + assert results['NM_007121.5:c.515_517delinsTCGG']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_transcript_variant'] == 'NM_007121.5:c.515_517delinsTCGG' + assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_refseqgene_variant'] == '' + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + + + def test_variant104(self): + variant = 'NC_000019.10:g.50378563delinsTTAC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_007121.5:c.514_515inv' in results.keys() + assert results['NM_007121.5:c.514_515inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.514_515inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.514_515inv']['alt_genomic_loci'] == [] + assert results['NM_007121.5:c.514_515inv']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + assert results['NM_007121.5:c.514_515inv']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.514_515inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172Leu)', 'slr': 'NP_009052.3:p.(K172L)'} + assert results['NM_007121.5:c.514_515inv']['submitted_variant'] == 'NC_000019.10:g.50378563delinsTTAC' + assert results['NM_007121.5:c.514_515inv']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.514_515inv']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.514_515inv']['hgvs_transcript_variant'] == 'NM_007121.5:c.514_515inv' + assert results['NM_007121.5:c.514_515inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'TTAC'}} + assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'TTAC'}} + assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'TTAC'}} + assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'TTAC'}} + assert results['NM_007121.5:c.514_515inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + + assert 'NM_001256647.1:c.223_224inv' in results.keys() + assert results['NM_001256647.1:c.223_224inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.223_224inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.223_224inv']['alt_genomic_loci'] == [] + assert results['NM_001256647.1:c.223_224inv']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + assert results['NM_001256647.1:c.223_224inv']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.223_224inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75Leu)', 'slr': 'NP_001243576.1:p.(K75L)'} + assert results['NM_001256647.1:c.223_224inv']['submitted_variant'] == 'NC_000019.10:g.50378563delinsTTAC' + assert results['NM_001256647.1:c.223_224inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.223_224inv']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.223_224inv']['hgvs_transcript_variant'] == 'NM_001256647.1:c.223_224inv' + assert results['NM_001256647.1:c.223_224inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'TTAC'}} + assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'TTAC'}} + assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'TTAC'}} + assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'TTAC'}} + assert results['NM_001256647.1:c.223_224inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} + + + def test_variant105(self): + variant = 'NC_000019.10:g.50378563_50378564insTAAC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_007121.5:c.514_515insT' in results.keys() + assert results['NM_007121.5:c.514_515insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.514_515insT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.514_515insT']['alt_genomic_loci'] == [] + assert results['NM_007121.5:c.514_515insT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + assert results['NM_007121.5:c.514_515insT']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.514_515insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172IlefsTer35)', 'slr': 'NP_009052.3:p.(K172Ifs*35)'} + assert results['NM_007121.5:c.514_515insT']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAAC' + assert results['NM_007121.5:c.514_515insT']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.514_515insT']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.514_515insT']['hgvs_transcript_variant'] == 'NM_007121.5:c.514_515insT' + assert results['NM_007121.5:c.514_515insT']['hgvs_refseqgene_variant'] == '' + assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAAC'}} + assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAAC'}} + assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAAC'}} + assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAAC'}} + assert results['NM_007121.5:c.514_515insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + + assert 'NM_001256647.1:c.223_224insT' in results.keys() + assert results['NM_001256647.1:c.223_224insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.223_224insT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.223_224insT']['alt_genomic_loci'] == [] + assert results['NM_001256647.1:c.223_224insT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + assert results['NM_001256647.1:c.223_224insT']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.223_224insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75IlefsTer35)', 'slr': 'NP_001243576.1:p.(K75Ifs*35)'} + assert results['NM_001256647.1:c.223_224insT']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAAC' + assert results['NM_001256647.1:c.223_224insT']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.223_224insT']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.223_224insT']['hgvs_transcript_variant'] == 'NM_001256647.1:c.223_224insT' + assert results['NM_001256647.1:c.223_224insT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAAC'}} + assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAAC'}} + assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAAC'}} + assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAAC'}} + assert results['NM_001256647.1:c.223_224insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} + + assert results['flag'] == 'gene_variant' + + def test_variant106(self): + variant = 'NC_000019.10:g.50378562_50378565del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001256647.1:c.222_228del' in results.keys() + assert results['NM_001256647.1:c.222_228del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.222_228del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.222_228del']['alt_genomic_loci'] == [] + assert results['NM_001256647.1:c.222_228del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + assert results['NM_001256647.1:c.222_228del']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.222_228del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75SerfsTer47)', 'slr': 'NP_001243576.1:p.(K75Sfs*47)'} + assert results['NM_001256647.1:c.222_228del']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565del' + assert results['NM_001256647.1:c.222_228del']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.222_228del']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.222_228del']['hgvs_transcript_variant'] == 'NM_001256647.1:c.222_228del' + assert results['NM_001256647.1:c.222_228del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': 'chr19', 'ref': 'GGAAA', 'pos': '50881818', 'alt': 'G'}} + assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': 'chr19', 'ref': 'GGAAA', 'pos': '50378561', 'alt': 'G'}} + assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': '19', 'ref': 'GGAAA', 'pos': '50881818', 'alt': 'G'}} + assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': '19', 'ref': 'GGAAA', 'pos': '50378561', 'alt': 'G'}} + assert results['NM_001256647.1:c.222_228del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} + + assert 'NM_007121.5:c.513_519del' in results.keys() + assert results['NM_007121.5:c.513_519del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.513_519del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.513_519del']['alt_genomic_loci'] == [] + assert results['NM_007121.5:c.513_519del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + assert results['NM_007121.5:c.513_519del']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.513_519del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172SerfsTer47)', 'slr': 'NP_009052.3:p.(K172Sfs*47)'} + assert results['NM_007121.5:c.513_519del']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565del' + assert results['NM_007121.5:c.513_519del']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.513_519del']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.513_519del']['hgvs_transcript_variant'] == 'NM_007121.5:c.513_519del' + assert results['NM_007121.5:c.513_519del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': 'chr19', 'ref': 'GGAAA', 'pos': '50881818', 'alt': 'G'}} + assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': 'chr19', 'ref': 'GGAAA', 'pos': '50378561', 'alt': 'G'}} + assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': '19', 'ref': 'GGAAA', 'pos': '50881818', 'alt': 'G'}} + assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': '19', 'ref': 'GGAAA', 'pos': '50378561', 'alt': 'G'}} + assert results['NM_007121.5:c.513_519del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + + + def test_variant107(self): + variant = 'NC_000019.10:g.50378562_50378565delinsTC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001256647.1:c.222_228delinsTC' in results.keys() + assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.222_228delinsTC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.222_228delinsTC']['alt_genomic_loci'] == [] + assert results['NM_001256647.1:c.222_228delinsTC']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + assert results['NM_001256647.1:c.222_228delinsTC']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75ProfsTer33)', 'slr': 'NP_001243576.1:p.(K75Pfs*33)'} + assert results['NM_001256647.1:c.222_228delinsTC']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565delinsTC' + assert results['NM_001256647.1:c.222_228delinsTC']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_transcript_variant'] == 'NM_001256647.1:c.222_228delinsTC' + assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': 'chr19', 'ref': 'GAAA', 'pos': '50881819', 'alt': 'TC'}} + assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': 'chr19', 'ref': 'GAAA', 'pos': '50378562', 'alt': 'TC'}} + assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': '19', 'ref': 'GAAA', 'pos': '50881819', 'alt': 'TC'}} + assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': '19', 'ref': 'GAAA', 'pos': '50378562', 'alt': 'TC'}} + assert results['NM_001256647.1:c.222_228delinsTC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_007121.5:c.513_519delinsTC' in results.keys() + assert results['NM_007121.5:c.513_519delinsTC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.513_519delinsTC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.513_519delinsTC']['alt_genomic_loci'] == [] + assert results['NM_007121.5:c.513_519delinsTC']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + assert results['NM_007121.5:c.513_519delinsTC']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.513_519delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172ProfsTer33)', 'slr': 'NP_009052.3:p.(K172Pfs*33)'} + assert results['NM_007121.5:c.513_519delinsTC']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565delinsTC' + assert results['NM_007121.5:c.513_519delinsTC']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.513_519delinsTC']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.513_519delinsTC']['hgvs_transcript_variant'] == 'NM_007121.5:c.513_519delinsTC' + assert results['NM_007121.5:c.513_519delinsTC']['hgvs_refseqgene_variant'] == '' + assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': 'chr19', 'ref': 'GAAA', 'pos': '50881819', 'alt': 'TC'}} + assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': 'chr19', 'ref': 'GAAA', 'pos': '50378562', 'alt': 'TC'}} + assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': '19', 'ref': 'GAAA', 'pos': '50881819', 'alt': 'TC'}} + assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': '19', 'ref': 'GAAA', 'pos': '50378562', 'alt': 'TC'}} + assert results['NM_007121.5:c.513_519delinsTC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + + + def test_variant108(self): + variant = 'NC_000007.14:g.149779575_149779577delinsT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_198455.2:c.1115_1116insT' in results.keys() + assert results['NM_198455.2:c.1115_1116insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1115_1116insT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1115_1116insT']['alt_genomic_loci'] == [] + assert results['NM_198455.2:c.1115_1116insT']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + assert results['NM_198455.2:c.1115_1116insT']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1115_1116insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Leu374ProfsTer16)', 'slr': 'NP_940857.2:p.(L374Pfs*16)'} + assert results['NM_198455.2:c.1115_1116insT']['submitted_variant'] == 'NC_000007.14:g.149779575_149779577delinsT' + assert results['NM_198455.2:c.1115_1116insT']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1115_1116insT']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1115_1116insT']['hgvs_transcript_variant'] == 'NM_198455.2:c.1115_1116insT' + assert results['NM_198455.2:c.1115_1116insT']['hgvs_refseqgene_variant'] == '' + assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476664_149476666delinsTC', 'vcf': {'chr': 'chr7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'TC'}} + assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779575_149779577delinsT', 'vcf': {'chr': 'chr7', 'ref': 'CAG', 'pos': '149779575', 'alt': 'T'}} + assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476664_149476666delinsTC', 'vcf': {'chr': '7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'TC'}} + assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779575_149779577delinsT', 'vcf': {'chr': '7', 'ref': 'CAG', 'pos': '149779575', 'alt': 'T'}} + assert results['NM_198455.2:c.1115_1116insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} + + + def test_variant109(self): + variant = 'NC_000007.14:g.149779575_149779577=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_198455.2:c.1116_1118=' in results.keys() + assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1116_1118=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'] == [] + assert results['NM_198455.2:c.1116_1118=']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + assert results['NM_198455.2:c.1116_1118=']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1116_1118=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372=)', 'slr': 'NP_940857.2:p.(D372=)'} + assert results['NM_198455.2:c.1116_1118=']['submitted_variant'] == 'NC_000007.14:g.149779575_149779577=' + assert results['NM_198455.2:c.1116_1118=']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1116_1118=']['hgvs_transcript_variant'] == 'NM_198455.2:c.1116_1118=' + assert results['NM_198455.2:c.1116_1118=']['hgvs_refseqgene_variant'] == '' + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': 'chr7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'C'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': 'chr7', 'ref': 'ACAG', 'pos': '149779574', 'alt': 'A'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': '7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'C'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': '7', 'ref': 'ACAG', 'pos': '149779574', 'alt': 'A'}} + assert results['NM_198455.2:c.1116_1118=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} + + + def test_variant110(self): + variant = 'NC_000007.14:g.149779576_149779578del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_198455.2:c.1116_1118=' in results.keys() + assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1116_1118=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'] == [] + assert results['NM_198455.2:c.1116_1118=']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + assert results['NM_198455.2:c.1116_1118=']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1116_1118=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372=)', 'slr': 'NP_940857.2:p.(D372=)'} + assert results['NM_198455.2:c.1116_1118=']['submitted_variant'] == 'NC_000007.14:g.149779576_149779578del' + assert results['NM_198455.2:c.1116_1118=']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1116_1118=']['hgvs_transcript_variant'] == 'NM_198455.2:c.1116_1118=' + assert results['NM_198455.2:c.1116_1118=']['hgvs_refseqgene_variant'] == '' + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': 'chr7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'C'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': 'chr7', 'ref': 'ACAG', 'pos': '149779574', 'alt': 'A'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': '7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'C'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': '7', 'ref': 'ACAG', 'pos': '149779574', 'alt': 'A'}} + assert results['NM_198455.2:c.1116_1118=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} + + + def test_variant111(self): + variant = 'NC_000007.14:g.149779577del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_198455.2:c.1115_1116dup' in results.keys() + assert results['NM_198455.2:c.1115_1116dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1115_1116dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1115_1116dup']['alt_genomic_loci'] == [] + assert results['NM_198455.2:c.1115_1116dup']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + assert results['NM_198455.2:c.1115_1116dup']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1115_1116dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Pro373ThrfsTer6)', 'slr': 'NP_940857.2:p.(P373Tfs*6)'} + assert results['NM_198455.2:c.1115_1116dup']['submitted_variant'] == 'NC_000007.14:g.149779577del' + assert results['NM_198455.2:c.1115_1116dup']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1115_1116dup']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1115_1116dup']['hgvs_transcript_variant'] == 'NM_198455.2:c.1115_1116dup' + assert results['NM_198455.2:c.1115_1116dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476666G>C', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '149476666', 'alt': 'C'}} + assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779577del', 'vcf': {'chr': 'chr7', 'ref': 'AG', 'pos': '149779576', 'alt': 'A'}} + assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476666G>C', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '149476666', 'alt': 'C'}} + assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779577del', 'vcf': {'chr': '7', 'ref': 'AG', 'pos': '149779576', 'alt': 'A'}} + assert results['NM_198455.2:c.1115_1116dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} + + + def test_variant112(self): + variant = 'NC_000007.14:g.149779573_149779579del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_198455.2:c.1114_1117del' in results.keys() + assert results['NM_198455.2:c.1114_1117del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1114_1117del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1114_1117del']['alt_genomic_loci'] == [] + assert results['NM_198455.2:c.1114_1117del']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + assert results['NM_198455.2:c.1114_1117del']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1114_1117del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372ProfsTer5)', 'slr': 'NP_940857.2:p.(D372Pfs*5)'} + assert results['NM_198455.2:c.1114_1117del']['submitted_variant'] == 'NC_000007.14:g.149779573_149779579del' + assert results['NM_198455.2:c.1114_1117del']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1114_1117del']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1114_1117del']['hgvs_transcript_variant'] == 'NM_198455.2:c.1114_1117del' + assert results['NM_198455.2:c.1114_1117del']['hgvs_refseqgene_variant'] == '' + assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667del', 'vcf': {'chr': 'chr7', 'ref': 'TGACAGC', 'pos': '149476661', 'alt': 'T'}} + assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579del', 'vcf': {'chr': 'chr7', 'ref': 'TGACAGCC', 'pos': '149779572', 'alt': 'T'}} + assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667del', 'vcf': {'chr': '7', 'ref': 'TGACAGC', 'pos': '149476661', 'alt': 'T'}} + assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579del', 'vcf': {'chr': '7', 'ref': 'TGACAGCC', 'pos': '149779572', 'alt': 'T'}} + assert results['NM_198455.2:c.1114_1117del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} + + assert results['flag'] == 'gene_variant' + + def test_variant113(self): + variant = 'NC_000007.14:g.149779573_149779579delinsCA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_198455.2:c.1114_1117delinsCA' in results.keys() + assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1114_1117delinsCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1114_1117delinsCA']['alt_genomic_loci'] == [] + assert results['NM_198455.2:c.1114_1117delinsCA']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + assert results['NM_198455.2:c.1114_1117delinsCA']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372HisfsTer17)', 'slr': 'NP_940857.2:p.(D372Hfs*17)'} + assert results['NM_198455.2:c.1114_1117delinsCA']['submitted_variant'] == 'NC_000007.14:g.149779573_149779579delinsCA' + assert results['NM_198455.2:c.1114_1117delinsCA']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_transcript_variant'] == 'NM_198455.2:c.1114_1117delinsCA' + assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_refseqgene_variant'] == '' + assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667delinsCA', 'vcf': {'chr': 'chr7', 'ref': 'GACAGC', 'pos': '149476662', 'alt': 'CA'}} + assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579delinsCA', 'vcf': {'chr': 'chr7', 'ref': 'GACAGCC', 'pos': '149779573', 'alt': 'CA'}} + assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667delinsCA', 'vcf': {'chr': '7', 'ref': 'GACAGC', 'pos': '149476662', 'alt': 'CA'}} + assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579delinsCA', 'vcf': {'chr': '7', 'ref': 'GACAGCC', 'pos': '149779573', 'alt': 'CA'}} + assert results['NM_198455.2:c.1114_1117delinsCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} + + assert results['flag'] == 'gene_variant' + + def test_variant114(self): + variant = 'NM_000088.3:c.590_591inv' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000088.3:c.590_591inv' in results.keys() + assert results['NM_000088.3:c.590_591inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590_591inv' + assert results['NM_000088.3:c.590_591inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.590_591inv']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.590_591inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.590_591inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.590_591inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Asp)', 'slr': 'NP_000079.2:p.(G197D)'} + assert results['NM_000088.3:c.590_591inv']['submitted_variant'] == 'NM_000088.3:c.590_591inv' + assert results['NM_000088.3:c.590_591inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.590_591inv']['hgvs_lrg_variant'] == 'LRG_1:g.8639_8640inv' + assert results['NM_000088.3:c.590_591inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.590_591inv' + assert results['NM_000088.3:c.590_591inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8639_8640inv' + assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275362inv', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '48275361', 'alt': 'GT'}} + assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198001inv', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '50198000', 'alt': 'GT'}} + assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275362inv', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '48275361', 'alt': 'GT'}} + assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198001inv', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '50198000', 'alt': 'GT'}} + assert results['NM_000088.3:c.590_591inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant115(self): + variant = 'NM_024989.3:c.1778_1779inv' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_024989.3:c.1778_1779inv' in results.keys() + assert results['NM_024989.3:c.1778_1779inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024989.3:c.1778_1779inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_024989.3:c.1778_1779inv']['alt_genomic_loci'] == [] + assert results['NM_024989.3:c.1778_1779inv']['transcript_description'] == 'Homo sapiens post-GPI attachment to proteins 1 (PGAP1), transcript variant 1, mRNA' + assert results['NM_024989.3:c.1778_1779inv']['gene_symbol'] == 'PGAP1' + assert results['NM_024989.3:c.1778_1779inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_079265.2:p.(Phe593Ter)', 'slr': 'NP_079265.2:p.(F593*)'} + assert results['NM_024989.3:c.1778_1779inv']['submitted_variant'] == 'NM_024989.3:c.1778_1779inv' + assert results['NM_024989.3:c.1778_1779inv']['genome_context_intronic_sequence'] == '' + assert results['NM_024989.3:c.1778_1779inv']['hgvs_lrg_variant'] == '' + assert results['NM_024989.3:c.1778_1779inv']['hgvs_transcript_variant'] == 'NM_024989.3:c.1778_1779inv' + assert results['NM_024989.3:c.1778_1779inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.197729793_197729794inv', 'vcf': {'chr': 'chr2', 'ref': 'AA', 'pos': '197729793', 'alt': 'TT'}} + assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.196865069_196865070inv', 'vcf': {'chr': 'chr2', 'ref': 'AA', 'pos': '196865069', 'alt': 'TT'}} + assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.197729793_197729794inv', 'vcf': {'chr': '2', 'ref': 'AA', 'pos': '197729793', 'alt': 'TT'}} + assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.196865069_196865070inv', 'vcf': {'chr': '2', 'ref': 'AA', 'pos': '196865069', 'alt': 'TT'}} + assert results['NM_024989.3:c.1778_1779inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_079265.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024989.3'} + + assert results['flag'] == 'gene_variant' + + def test_variant116(self): + variant = 'NM_032815.3:c.555_556inv' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032815.3:c.555_556inv' in results.keys() + assert results['NM_032815.3:c.555_556inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_032815.3:c.555_556inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032815.3:c.555_556inv']['alt_genomic_loci'] == [] + assert results['NM_032815.3:c.555_556inv']['transcript_description'] == 'Homo sapiens nuclear factor of activated T cells 2 interacting protein (NFATC2IP), mRNA' + assert results['NM_032815.3:c.555_556inv']['gene_symbol'] == 'NFATC2IP' + assert results['NM_032815.3:c.555_556inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116204.3:p.(Glu185_Glu186delinsAspTer)', 'slr': 'NP_116204.3:p.(E185_E186delinsD*)'} + assert results['NM_032815.3:c.555_556inv']['submitted_variant'] == 'NM_032815.3:c.555_556inv' + assert results['NM_032815.3:c.555_556inv']['genome_context_intronic_sequence'] == '' + assert results['NM_032815.3:c.555_556inv']['hgvs_lrg_variant'] == '' + assert results['NM_032815.3:c.555_556inv']['hgvs_transcript_variant'] == 'NM_032815.3:c.555_556inv' + assert results['NM_032815.3:c.555_556inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.28965980_28965981inv', 'vcf': {'chr': 'chr16', 'ref': 'AG', 'pos': '28965980', 'alt': 'CT'}} + assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.28954659_28954660inv', 'vcf': {'chr': 'chr16', 'ref': 'AG', 'pos': '28954659', 'alt': 'CT'}} + assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.28965980_28965981inv', 'vcf': {'chr': '16', 'ref': 'AG', 'pos': '28965980', 'alt': 'CT'}} + assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.28954659_28954660inv', 'vcf': {'chr': '16', 'ref': 'AG', 'pos': '28954659', 'alt': 'CT'}} + assert results['NM_032815.3:c.555_556inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116204.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032815.3'} + + def test_variant117(self): + variant = 'NM_006138.4:c.3_4inv' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_006138.4:c.3_4inv' in results.keys() + assert results['NM_006138.4:c.3_4inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006138.4:c.3_4inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006138.4:c.3_4inv']['alt_genomic_loci'] == [] + assert results['NM_006138.4:c.3_4inv']['transcript_description'] == 'Homo sapiens membrane spanning 4-domains A3 (MS4A3), transcript variant 1, mRNA' + assert results['NM_006138.4:c.3_4inv']['gene_symbol'] == 'MS4A3' + assert results['NM_006138.4:c.3_4inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006129.4:p.(Met1?)', 'slr': 'NP_006129.4:p.(M1?)'} + assert results['NM_006138.4:c.3_4inv']['submitted_variant'] == 'NM_006138.4:c.3_4inv' + assert results['NM_006138.4:c.3_4inv']['genome_context_intronic_sequence'] == '' + assert results['NM_006138.4:c.3_4inv']['hgvs_lrg_variant'] == '' + assert results['NM_006138.4:c.3_4inv']['hgvs_transcript_variant'] == 'NM_006138.4:c.3_4inv' + assert results['NM_006138.4:c.3_4inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.59828636_59828637inv', 'vcf': {'chr': 'chr11', 'ref': 'GG', 'pos': '59828636', 'alt': 'CC'}} + assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.60061163_60061164inv', 'vcf': {'chr': 'chr11', 'ref': 'GG', 'pos': '60061163', 'alt': 'CC'}} + assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.59828636_59828637inv', 'vcf': {'chr': '11', 'ref': 'GG', 'pos': '59828636', 'alt': 'CC'}} + assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.60061163_60061164inv', 'vcf': {'chr': '11', 'ref': 'GG', 'pos': '60061163', 'alt': 'CC'}} + + assert results['flag'] == 'gene_variant' + + def test_variant118(self): + variant = 'NM_000038.5:c.3927_3928delAAinsTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000038.5:c.3927_3928inv' in results.keys() + assert results['NM_000038.5:c.3927_3928inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000038.5:c.3927_3928inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000038.5:c.3927_3928inv']['alt_genomic_loci'] == [] + assert results['NM_000038.5:c.3927_3928inv']['transcript_description'] == 'Homo sapiens APC, WNT signaling pathway regulator (APC), transcript variant 3, mRNA' + assert results['NM_000038.5:c.3927_3928inv']['gene_symbol'] == 'APC' + assert results['NM_000038.5:c.3927_3928inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000029.2(LRG_130p1):p.(Glu1309_Lys1310delinsAspTer)', 'slr': 'NP_000029.2:p.(E1309_K1310delinsD*)'} + assert results['NM_000038.5:c.3927_3928inv']['submitted_variant'] == 'NM_000038.5:c.3927_3928delAAinsTT' + assert results['NM_000038.5:c.3927_3928inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000038.5:c.3927_3928inv']['hgvs_lrg_variant'] == '' + assert results['NM_000038.5:c.3927_3928inv']['hgvs_transcript_variant'] == 'NM_000038.5:c.3927_3928inv' + assert results['NM_000038.5:c.3927_3928inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.112175218_112175219inv', 'vcf': {'chr': 'chr5', 'ref': 'AA', 'pos': '112175218', 'alt': 'TT'}} + assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.112839521_112839522inv', 'vcf': {'chr': 'chr5', 'ref': 'AA', 'pos': '112839521', 'alt': 'TT'}} + assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.112175218_112175219inv', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '112175218', 'alt': 'TT'}} + assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.112839521_112839522inv', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '112839521', 'alt': 'TT'}} + assert results['NM_000038.5:c.3927_3928inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000029.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000038.5'} + + + def test_variant119(self): + variant = 'NM_001034853.1:c.2847_2848delAGinsCT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001034853.1:c.2847_2848inv' in results.keys() + assert results['NM_001034853.1:c.2847_2848inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001034853.1:c.2847_2848inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001034853.1:c.2847_2848inv']['alt_genomic_loci'] == [] + assert results['NM_001034853.1:c.2847_2848inv']['transcript_description'] == 'Homo sapiens retinitis pigmentosa GTPase regulator (RPGR), transcript variant C, mRNA' + assert results['NM_001034853.1:c.2847_2848inv']['gene_symbol'] == 'RPGR' + assert results['NM_001034853.1:c.2847_2848inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001030025.1:p.(Glu949_Glu950delinsAspTer)', 'slr': 'NP_001030025.1:p.(E949_E950delinsD*)'} + assert results['NM_001034853.1:c.2847_2848inv']['submitted_variant'] == 'NM_001034853.1:c.2847_2848delAGinsCT' + assert results['NM_001034853.1:c.2847_2848inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001034853.1:c.2847_2848inv']['hgvs_lrg_variant'] == '' + assert results['NM_001034853.1:c.2847_2848inv']['hgvs_transcript_variant'] == 'NM_001034853.1:c.2847_2848inv' + assert results['NM_001034853.1:c.2847_2848inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.38145404_38145405inv', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '38145404', 'alt': 'AG'}} + assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.38286151_38286152inv', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '38286151', 'alt': 'AG'}} + assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.38145404_38145405inv', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '38145404', 'alt': 'AG'}} + assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.38286151_38286152inv', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '38286151', 'alt': 'AG'}} + assert results['NM_001034853.1:c.2847_2848inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001030025.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001034853.1'} + + + def test_variant120(self): + variant = 'NM_000088.3:c.4392_*2inv' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000088.3:c.4394_4395inv' in results.keys() + assert results['NM_000088.3:c.4394_4395inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4394_4395inv' + assert results['NM_000088.3:c.4394_4395inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4394_4395inv']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.4394_4395inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.4394_4395inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.4394_4395inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ter1465PheextTer27)', 'slr': 'NP_000079.2:p.(*1465Fext*27)'} + assert results['NM_000088.3:c.4394_4395inv']['submitted_variant'] == 'NM_000088.3:c.4392_*2inv' + assert results['NM_000088.3:c.4394_4395inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4394_4395inv']['hgvs_lrg_variant'] == 'LRG_1:g.21137_21138inv' + assert results['NM_000088.3:c.4394_4395inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.4394_4395inv' + assert results['NM_000088.3:c.4394_4395inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.21137_21138inv' + assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262863_48262864inv', 'vcf': {'chr': 'chr17', 'ref': 'TT', 'pos': '48262863', 'alt': 'AA'}} + assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185502_50185503inv', 'vcf': {'chr': 'chr17', 'ref': 'TT', 'pos': '50185502', 'alt': 'AA'}} + assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262863_48262864inv', 'vcf': {'chr': '17', 'ref': 'TT', 'pos': '48262863', 'alt': 'AA'}} + assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185502_50185503inv', 'vcf': {'chr': '17', 'ref': 'TT', 'pos': '50185502', 'alt': 'AA'}} + assert results['NM_000088.3:c.4394_4395inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant121(self): + variant = 'NM_000088.3:c.4392_*5inv' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.4392_*5inv' in results.keys() + assert results['NM_000088.3:c.4392_*5inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4393_*4inv' + assert results['NM_000088.3:c.4392_*5inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4392_*5inv']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.4392_*5inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.4392_*5inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.4392_*5inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.4392_*5inv']['submitted_variant'] == 'NM_000088.3:c.4392_*5inv' + assert results['NM_000088.3:c.4392_*5inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4392_*5inv']['hgvs_lrg_variant'] == 'LRG_1:g.21136_21142inv' + assert results['NM_000088.3:c.4392_*5inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.4392_*5inv' + assert results['NM_000088.3:c.4392_*5inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.21136_21142inv' + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262858_48262866inv', 'vcf': {'chr': 'chr17', 'ref': 'GAGTTTA', 'pos': '48262859', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185497_50185505inv', 'vcf': {'chr': 'chr17', 'ref': 'GAGTTTA', 'pos': '50185498', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262858_48262866inv', 'vcf': {'chr': '17', 'ref': 'GAGTTTA', 'pos': '48262859', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185497_50185505inv', 'vcf': {'chr': '17', 'ref': 'GAGTTTA', 'pos': '50185498', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant122(self): + variant = 'NM_000088.3:c.4390_*7inv' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.4390_*7inv' in results.keys() + assert results['NM_000088.3:c.4390_*7inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4390_*7inv' + assert results['NM_000088.3:c.4390_*7inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4390_*7inv']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.4390_*7inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.4390_*7inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.4390_*7inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.4390_*7inv']['submitted_variant'] == 'NM_000088.3:c.4390_*7inv' + assert results['NM_000088.3:c.4390_*7inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4390_*7inv']['hgvs_lrg_variant'] == 'LRG_1:g.21133_21145inv' + assert results['NM_000088.3:c.4390_*7inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.4390_*7inv' + assert results['NM_000088.3:c.4390_*7inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.21133_21145inv' + assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262856_48262868inv', 'vcf': {'chr': 'chr17', 'ref': 'AGGGAGTTTACAG', 'pos': '48262856', 'alt': 'CTGTAAACTCCCT'}} + assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185495_50185507inv', 'vcf': {'chr': 'chr17', 'ref': 'AGGGAGTTTACAG', 'pos': '50185495', 'alt': 'CTGTAAACTCCCT'}} + assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262856_48262868inv', 'vcf': {'chr': '17', 'ref': 'AGGGAGTTTACAG', 'pos': '48262856', 'alt': 'CTGTAAACTCCCT'}} + assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185495_50185507inv', 'vcf': {'chr': '17', 'ref': 'AGGGAGTTTACAG', 'pos': '50185495', 'alt': 'CTGTAAACTCCCT'}} + assert results['NM_000088.3:c.4390_*7inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant123(self): + variant = 'NM_005732.3:c.2923-5insT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_005732.3:c.2923-5insT' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant124(self): + variant = 'NM_198283.1(EYS):c.*743120C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_198283.1(EYS):c.*743120C>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant125(self): + variant = 'NM_133379.4(TTN):c.*265+26591C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_133379.4(TTN):c.*265+26591C>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant126(self): + variant = 'NM_000088.3:c.589-2_589-1AG>G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589-2_589-1delinsG' in results.keys() + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589-2_589-1delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589-2_589-1delinsG']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-2_589-1delinsG']['submitted_variant'] == 'NM_000088.3:c.589-2_589-1AG>G' + assert results['NM_000088.3:c.589-2_589-1delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2_589-1delinsG' + assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2_589-1delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant127(self): + variant = 'NM_000088.3:c.642+1_642+2delGTinsG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.642+1_642+2delinsG' in results.keys() + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.642+1_642+2delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.642+1_642+2delinsG']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.642+1_642+2delinsG']['submitted_variant'] == 'NM_000088.3:c.642+1_642+2delGTinsG' + assert results['NM_000088.3:c.642+1_642+2delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+1_642+2delinsG' + assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+1_642+2delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant128(self): + variant = 'NM_004415.3:c.1-1insA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_004415.3:c.1-1insA' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant129(self): + variant = 'NM_004415.3:c.-1_1insA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_004415.3:c.-1_1insA' in results.keys() + assert results['NM_004415.3:c.-1_1insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004415.3:c.-1_1insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004415.3:c.-1_1insA']['alt_genomic_loci'] == [] + assert results['NM_004415.3:c.-1_1insA']['transcript_description'] == 'Homo sapiens desmoplakin (DSP), transcript variant 1, mRNA' + assert results['NM_004415.3:c.-1_1insA']['gene_symbol'] == 'DSP' + assert results['NM_004415.3:c.-1_1insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004406.2(LRG_423p1):p.(Met1?)', 'slr': 'NP_004406.2:p.(M1?)'} + assert results['NM_004415.3:c.-1_1insA']['submitted_variant'] == 'NM_004415.3:c.-1_1insA' + assert results['NM_004415.3:c.-1_1insA']['genome_context_intronic_sequence'] == '' + assert results['NM_004415.3:c.-1_1insA']['hgvs_lrg_variant'] == '' + assert results['NM_004415.3:c.-1_1insA']['hgvs_transcript_variant'] == 'NM_004415.3:c.-1_1insA' + assert results['NM_004415.3:c.-1_1insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.7542148_7542149insA', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '7542148', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.7541915_7541916insA', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '7541915', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.7542148_7542149insA', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '7542148', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.7541915_7541916insA', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '7541915', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004406.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004415.3'} + + + def test_variant130(self): + variant = 'NM_000273.2:c.1-5028_253del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000273.2:c.1-5028_253del' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant131(self): + variant = 'NM_002929.2:c.1006C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_002929.2:c.1006C>T' in results.keys() + assert results['NM_002929.2:c.1006C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_002929.2:c.1006C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_002929.2:c.1006C>T']['alt_genomic_loci'] == [] + assert results['NM_002929.2:c.1006C>T']['transcript_description'] == 'Homo sapiens G protein-coupled receptor kinase 1 (GRK1), mRNA' + assert results['NM_002929.2:c.1006C>T']['gene_symbol'] == 'GRK1' + assert results['NM_002929.2:c.1006C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002920.1:p.(Leu336Phe)', 'slr': 'NP_002920.1:p.(L336F)'} + assert results['NM_002929.2:c.1006C>T']['submitted_variant'] == 'NM_002929.2:c.1006C>T' + assert results['NM_002929.2:c.1006C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_002929.2:c.1006C>T']['hgvs_lrg_variant'] == '' + assert results['NM_002929.2:c.1006C>T']['hgvs_transcript_variant'] == 'NM_002929.2:c.1006C>T' + assert results['NM_002929.2:c.1006C>T']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['NM_002929.2:c.1006C>T']['primary_assembly_loci'].keys() + assert results['NM_002929.2:c.1006C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000013.11:g.113723094C>T', 'vcf': {'chr': 'chr13', 'ref': u'C', 'pos': '113723094', 'alt': u'T'}} + assert 'grch37' not in results['NM_002929.2:c.1006C>T']['primary_assembly_loci'].keys() + assert results['NM_002929.2:c.1006C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000013.11:g.113723094C>T', 'vcf': {'chr': '13', 'ref': u'C', 'pos': '113723094', 'alt': u'T'}} + assert results['NM_002929.2:c.1006C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002920.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002929.2'} + + + def test_variant132(self): + variant = 'NR_125367.1:n.167+18165G>A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NR_125367.1:n.167+18165G>A' in results.keys() + assert results['NR_125367.1:n.167+18165G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_125367.1:n.167+18165G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_125367.1:n.167+18165G>A']['alt_genomic_loci'] == [] + assert results['NR_125367.1:n.167+18165G>A']['transcript_description'] == 'Homo sapiens myosin heavy chain gene cluster antisense RNA (MYHAS), long non-coding RNA' + assert results['NR_125367.1:n.167+18165G>A']['gene_symbol'] == 'MYHAS' + assert results['NR_125367.1:n.167+18165G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_125367.1:n.167+18165G>A']['submitted_variant'] == 'NR_125367.1:n.167+18165G>A' + assert results['NR_125367.1:n.167+18165G>A']['genome_context_intronic_sequence'] == 'NC_000017.10(NR_125367.1):c.167+18165G>A' + assert results['NR_125367.1:n.167+18165G>A']['hgvs_lrg_variant'] == '' + assert results['NR_125367.1:n.167+18165G>A']['hgvs_transcript_variant'] == 'NR_125367.1:n.167+18165G>A' + assert results['NR_125367.1:n.167+18165G>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.10327720G>A', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '10327720', 'alt': 'A'}} + assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.10424403G>A', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '10424403', 'alt': 'A'}} + assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.10327720G>A', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '10327720', 'alt': 'A'}} + assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.10424403G>A', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '10424403', 'alt': 'A'}} + assert results['NR_125367.1:n.167+18165G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_125367.1'} + + + def test_variant133(self): + variant = 'NM_006005.3:c.3071_3073delinsTTA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_006005.3:c.3071_3073delinsTTA' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant134(self): + variant = 'NM_000089.3:n.1504_1506del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000089.3:n.1504_1506del' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant135(self): + variant = 'NC_012920.1:m.1011C>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == 'Homo sapiens mitochondrion, complete genome' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NC_012920.1:m.1011C>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'chrM', 'ref': 'C', 'pos': '1011', 'alt': 'T'}} + assert results['validation_warning_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'chrM', 'ref': 'C', 'pos': '1011', 'alt': 'T'}} + assert results['validation_warning_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'M', 'ref': 'C', 'pos': '1011', 'alt': 'T'}} + assert results['validation_warning_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'M', 'ref': 'C', 'pos': '1011', 'alt': 'T'}} + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant136(self): + variant = 'NC_000006.11:g.90403795G=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_014611.1:c.9879T>C' in results.keys() + assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.1:c.9879T>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014611.1:c.9879T>C']['alt_genomic_loci'] == [] + assert results['NM_014611.1:c.9879T>C']['transcript_description'] == 'Homo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA' + assert results['NM_014611.1:c.9879T>C']['gene_symbol'] == 'MDN1' + assert results['NM_014611.1:c.9879T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.1:c.9879T>C']['submitted_variant'] == 'NC_000006.11:g.90403795G=' + assert results['NM_014611.1:c.9879T>C']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_variant'] == '' + assert results['NM_014611.1:c.9879T>C']['hgvs_transcript_variant'] == 'NM_014611.1:c.9879T>C' + assert results['NM_014611.1:c.9879T>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert 'hg38' not in results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys() + assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert 'grch38' not in results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys() + assert results['NM_014611.1:c.9879T>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1'} + + assert 'NM_014611.2:c.9879C=' in results.keys() + assert results['NM_014611.2:c.9879C=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.2:c.9879C=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014611.2:c.9879C=']['alt_genomic_loci'] == [] + assert results['NM_014611.2:c.9879C=']['transcript_description'] == 'Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA' + assert results['NM_014611.2:c.9879C=']['gene_symbol'] == 'MDN1' + assert results['NM_014611.2:c.9879C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.2:c.9879C=']['submitted_variant'] == 'NC_000006.11:g.90403795G=' + assert results['NM_014611.2:c.9879C=']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.2:c.9879C=']['hgvs_lrg_variant'] == '' + assert results['NM_014611.2:c.9879C=']['hgvs_transcript_variant'] == 'NM_014611.2:c.9879C=' + assert results['NM_014611.2:c.9879C=']['hgvs_refseqgene_variant'] == '' + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '90403795', 'alt': u'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '89694076', 'alt': u'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '90403795', 'alt': u'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '89694076', 'alt': u'G'}} + assert results['NM_014611.2:c.9879C=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2'} + + + def test_variant137(self): + variant = '1-169519049-T-.' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000130.4:c.1602del' in results.keys() + assert results['NM_000130.4:c.1602del']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601del' + assert results['NM_000130.4:c.1602del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000130.4:c.1602del']['alt_genomic_loci'] == [] + assert results['NM_000130.4:c.1602del']['transcript_description'] == 'Homo sapiens coagulation factor V (F5), mRNA' + assert results['NM_000130.4:c.1602del']['gene_symbol'] == 'F5' + assert results['NM_000130.4:c.1602del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534GlnfsTer40)', 'slr': 'NP_000121.2:p.(R534Qfs*40)'} + assert results['NM_000130.4:c.1602del']['submitted_variant'] == '1-169519049-T-.' + assert results['NM_000130.4:c.1602del']['genome_context_intronic_sequence'] == '' + assert results['NM_000130.4:c.1602del']['hgvs_lrg_variant'] == 'LRG_553:g.41721del' + assert results['NM_000130.4:c.1602del']['hgvs_transcript_variant'] == 'NM_000130.4:c.1602del' + assert results['NM_000130.4:c.1602del']['hgvs_refseqgene_variant'] == 'NG_011806.1:g.41721del' + assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519048del', 'vcf': {'chr': 'chr1', 'ref': 'CT', 'pos': '169519047', 'alt': 'C'}} + assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549810del', 'vcf': {'chr': 'chr1', 'ref': 'CT', 'pos': '169549809', 'alt': 'C'}} + assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519048del', 'vcf': {'chr': '1', 'ref': 'CT', 'pos': '169519047', 'alt': 'C'}} + assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549810del', 'vcf': {'chr': '1', 'ref': 'CT', 'pos': '169549809', 'alt': 'C'}} + assert results['NM_000130.4:c.1602del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000130.4:c.1601G>A' in results.keys() + assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601G>A' + assert results['NM_000130.4:c.1601G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000130.4:c.1601G>A']['alt_genomic_loci'] == [] + assert results['NM_000130.4:c.1601G>A']['transcript_description'] == 'Homo sapiens coagulation factor V (F5), mRNA' + assert results['NM_000130.4:c.1601G>A']['gene_symbol'] == 'F5' + assert results['NM_000130.4:c.1601G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534Gln)', 'slr': 'NP_000121.2:p.(R534Q)'} + assert results['NM_000130.4:c.1601G>A']['submitted_variant'] == '1-169519049-T-.' + assert results['NM_000130.4:c.1601G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_variant'] == 'LRG_553:g.41721G>A' + assert results['NM_000130.4:c.1601G>A']['hgvs_transcript_variant'] == 'NM_000130.4:c.1601G>A' + assert results['NM_000130.4:c.1601G>A']['hgvs_refseqgene_variant'] == 'NG_011806.1:g.41721G>A' + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '169549811', 'alt': u'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '169549811', 'alt': u'T'}} + assert results['NM_000130.4:c.1601G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} + + + def test_variant138(self): + variant = 'NC_000005.9:g.35058667_35058668AG=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001204317.1:c.856-9155_856-9154=' in results.keys() + assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204317.1:c.856-9155_856-9154=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204317.1:c.856-9155_856-9154=']['alt_genomic_loci'] == [] + assert results['NM_001204317.1:c.856-9155_856-9154=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA' + assert results['NM_001204317.1:c.856-9155_856-9154=']['gene_symbol'] == 'PRLR' + assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191246.1:p.?', 'slr': 'NP_001191246.1:p.?'} + assert results['NM_001204317.1:c.856-9155_856-9154=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NM_001204317.1:c.856-9155_856-9154=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204317.1):c.856-9155_856-9154=' + assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_lrg_variant'] == '' + assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_transcript_variant'] == 'NM_001204317.1:c.856-9155_856-9154=' + assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': 'chr5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} + assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} + assert results['NM_001204317.1:c.856-9155_856-9154=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1'} + + assert 'NM_001204316.1:c.1009+7383_1009+7384=' in results.keys() + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['alt_genomic_loci'] == [] + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['gene_symbol'] == 'PRLR' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191245.1:p.?', 'slr': 'NP_001191245.1:p.?'} + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204316.1):c.1009+7383_1009+7384=' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_lrg_variant'] == '' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_transcript_variant'] == 'NM_001204316.1:c.1009+7383_1009+7384=' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058565_35058566=', 'vcf': {'chr': 'chr5', 'ref': 'AT', 'pos': '35058565', 'alt': 'AT'}} + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058565_35058566=', 'vcf': {'chr': '5', 'ref': 'AT', 'pos': '35058565', 'alt': 'AT'}} + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1'} + + assert 'NM_001204314.2:c.*6528del' in results.keys() + assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204314.2:c.*6528del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204314.2:c.*6528del']['alt_genomic_loci'] == [] + assert results['NM_001204314.2:c.*6528del']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA' + assert results['NM_001204314.2:c.*6528del']['gene_symbol'] == 'PRLR' + assert results['NM_001204314.2:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} + assert results['NM_001204314.2:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NM_001204314.2:c.*6528del']['genome_context_intronic_sequence'] == '' + assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_variant'] == '' + assert results['NM_001204314.2:c.*6528del']['hgvs_transcript_variant'] == 'NM_001204314.2:c.*6528del' + assert results['NM_001204314.2:c.*6528del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} + assert results['NM_001204314.2:c.*6528del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2'} + + assert 'NM_001204318.1:c.686-9155_686-9154=' in results.keys() + assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204318.1:c.686-9155_686-9154=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204318.1:c.686-9155_686-9154=']['alt_genomic_loci'] == [] + assert results['NM_001204318.1:c.686-9155_686-9154=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA' + assert results['NM_001204318.1:c.686-9155_686-9154=']['gene_symbol'] == 'PRLR' + assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191247.1:p.?', 'slr': 'NP_001191247.1:p.?'} + assert results['NM_001204318.1:c.686-9155_686-9154=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NM_001204318.1:c.686-9155_686-9154=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204318.1):c.686-9155_686-9154=' + assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_lrg_variant'] == '' + assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_transcript_variant'] == 'NM_001204318.1:c.686-9155_686-9154=' + assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': 'chr5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} + assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} + assert results['NM_001204318.1:c.686-9155_686-9154=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1'} + + assert 'NR_037910.1:n.828-9155_828-9154=' in results.keys() + assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037910.1:n.828-9155_828-9154=']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_037910.1:n.828-9155_828-9154=']['alt_genomic_loci'] == [] + assert results['NR_037910.1:n.828-9155_828-9154=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 7, non-coding RNA' + assert results['NR_037910.1:n.828-9155_828-9154=']['gene_symbol'] == 'PRLR' + assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037910.1:n.828-9155_828-9154=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NR_037910.1:n.828-9155_828-9154=']['genome_context_intronic_sequence'] == 'NC_000005.9(NR_037910.1):c.828-9155_828-9154=' + assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_lrg_variant'] == '' + assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_transcript_variant'] == 'NR_037910.1:n.828-9155_828-9154=' + assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_refseqgene_variant'] == '' + assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': 'chr5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} + assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} + assert results['NR_037910.1:n.828-9155_828-9154=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000949.5:c.*6523_*6524=' in results.keys() + assert results['NM_000949.5:c.*6523_*6524=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000949.5:c.*6523_*6524=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000949.5:c.*6523_*6524=']['alt_genomic_loci'] == [] + assert results['NM_000949.5:c.*6523_*6524=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA' + assert results['NM_000949.5:c.*6523_*6524=']['gene_symbol'] == 'PRLR' + assert results['NM_000949.5:c.*6523_*6524=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} + assert results['NM_000949.5:c.*6523_*6524=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NM_000949.5:c.*6523_*6524=']['genome_context_intronic_sequence'] == '' + assert results['NM_000949.5:c.*6523_*6524=']['hgvs_lrg_variant'] == '' + assert results['NM_000949.5:c.*6523_*6524=']['hgvs_transcript_variant'] == 'NM_000949.5:c.*6523_*6524=' + assert results['NM_000949.5:c.*6523_*6524=']['hgvs_refseqgene_variant'] == 'NG_029042.1:g.177156_177157=' + assert results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', 'vcf': {'chr': 'chr5', 'ref': u'AAGA', 'pos': '35058666', 'alt': u'AAGA'}} + assert 'hg38' not in results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci'].keys() + assert results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', 'vcf': {'chr': '5', 'ref': u'AAGA', 'pos': '35058666', 'alt': u'AAGA'}} + assert 'grch38' not in results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci'].keys() + assert results['NM_000949.5:c.*6523_*6524=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029042.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5'} + + assert 'NM_001204314.1:c.*6523_*6524=' in results.keys() + assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204314.1:c.*6523_*6524=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204314.1:c.*6523_*6524=']['alt_genomic_loci'] == [] + assert results['NM_001204314.1:c.*6523_*6524=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA' + assert results['NM_001204314.1:c.*6523_*6524=']['gene_symbol'] == 'PRLR' + assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} + assert results['NM_001204314.1:c.*6523_*6524=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NM_001204314.1:c.*6523_*6524=']['genome_context_intronic_sequence'] == '' + assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_lrg_variant'] == '' + assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_transcript_variant'] == 'NM_001204314.1:c.*6523_*6524=' + assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': u'AG', 'pos': '35058667', 'alt': u'AG'}} + assert 'hg38' not in results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci'].keys() + assert results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': u'AG', 'pos': '35058667', 'alt': u'AG'}} + assert 'grch38' not in results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci'].keys() + assert results['NM_001204314.1:c.*6523_*6524=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1'} + + assert 'NM_000949.6:c.*6528del' in results.keys() + assert results['NM_000949.6:c.*6528del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000949.6:c.*6528del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000949.6:c.*6528del']['alt_genomic_loci'] == [] + assert results['NM_000949.6:c.*6528del']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA' + assert results['NM_000949.6:c.*6528del']['gene_symbol'] == 'PRLR' + assert results['NM_000949.6:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} + assert results['NM_000949.6:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NM_000949.6:c.*6528del']['genome_context_intronic_sequence'] == '' + assert results['NM_000949.6:c.*6528del']['hgvs_lrg_variant'] == '' + assert results['NM_000949.6:c.*6528del']['hgvs_transcript_variant'] == 'NM_000949.6:c.*6528del' + assert results['NM_000949.6:c.*6528del']['hgvs_refseqgene_variant'] == '' + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} + assert results['NM_000949.6:c.*6528del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.6'} + + + def test_variant139(self): + variant = 'NM_000251.1:c.1296_1348del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'NM_000251.1:c.1296_1348del' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant140(self): + variant = 'NM_000088.3:c.2023_2028del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.2024_2028+1del' in results.keys() + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.2024_2028+1del']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)', 'slr': 'NP_000079.2:p.(A675_R676del)'} + assert results['NM_000088.3:c.2024_2028+1del']['submitted_variant'] == 'NM_000088.3:c.2023_2028del' + assert results['NM_000088.3:c.2024_2028+1del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_transcript_variant'] == 'NM_000088.3:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.14656_14661del' + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant141(self): + variant = 'NM_000088.3:c.2024_2028+1del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.2024_2028+1del' in results.keys() + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.2024_2028+1del']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.2024_2028+1del']['submitted_variant'] == 'NM_000088.3:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_transcript_variant'] == 'NM_000088.3:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.14656_14661del' + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant142(self): + variant = 'ENST00000450616.1:n.31+1G>C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'ENST00000450616.1:n.31+1G>C' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant143(self): + variant = 'ENST00000491747:c.5071A>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'ENST00000491747:c.5071A>T' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant144(self): + variant = 'NM_000088.3:c.589G>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589G>T' in results.keys() + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant145(self): + variant = 'NG_007400.1:g.8638G>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589G>T' in results.keys() + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'NG_007400.1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant146(self): + variant = 'LRG_1:g.8638G>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589G>T' in results.keys() + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant147(self): + variant = 'LRG_1t1:c.589G>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589G>T' in results.keys() + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant148(self): + variant = 'chr16:g.15832508_15832509delinsAC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_002474.2:c.3034_3035inv' in results.keys() + assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] + assert results['NM_002474.2:c.3034_3035inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA' + assert results['NM_002474.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' + assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1:p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} + assert results['NM_002474.2:c.3034_3035inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' + assert results['NM_002474.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' + assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + assert results['NM_002474.2:c.3034_3035inv']['hgvs_transcript_variant'] == 'NM_002474.2:c.3034_3035inv' + assert results['NM_002474.2:c.3034_3035inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2'} + + assert 'NM_022844.2:c.3034_3035inv' in results.keys() + assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_022844.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] + assert results['NM_022844.2:c.3034_3035inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA' + assert results['NM_022844.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' + assert results['NM_022844.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_074035.1:p.(Thr1012Val)', 'slr': 'NP_074035.1:p.(T1012V)'} + assert results['NM_022844.2:c.3034_3035inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' + assert results['NM_022844.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' + assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + assert results['NM_022844.2:c.3034_3035inv']['hgvs_transcript_variant'] == 'NM_022844.2:c.3034_3035inv' + assert results['NM_022844.2:c.3034_3035inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2'} + + assert 'NM_001040114.1:c.3055_3056inv' in results.keys() + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] + assert results['NM_001040114.1:c.3055_3056inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA' + assert results['NM_001040114.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035203.1:p.(Thr1019Val)', 'slr': 'NP_001035203.1:p.(T1019V)'} + assert results['NM_001040114.1:c.3055_3056inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' + assert results['NM_001040114.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_variant'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040114.1:c.3055_3056inv' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001040113.1:c.3055_3056inv' in results.keys() + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] + assert results['NM_001040113.1:c.3055_3056inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA' + assert results['NM_001040113.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1:p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} + assert results['NM_001040113.1:c.3055_3056inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' + assert results['NM_001040113.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_variant'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040113.1:c.3055_3056inv' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == 'NG_009299.1:g.123379_123380inv' + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009299.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1'} + + + def test_variant149(self): + variant = 'NG_012386.1:g.24048dupG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001162427.1:c.210+1615dup' in results.keys() + assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162427.1:c.210+1615dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'] == [] + assert results['NM_001162427.1:c.210+1615dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA' + assert results['NM_001162427.1:c.210+1615dup']['gene_symbol'] == 'TSC1' + assert results['NM_001162427.1:c.210+1615dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.?', 'slr': 'NP_001155899.1:p.?'} + assert results['NM_001162427.1:c.210+1615dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' + assert results['NM_001162427.1:c.210+1615dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162427.1):c.210+1615dup' + assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_variant'] == '' + assert results['NM_001162427.1:c.210+1615dup']['hgvs_transcript_variant'] == 'NM_001162427.1:c.210+1615dup' + assert results['NM_001162427.1:c.210+1615dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1'} + + assert 'NM_001162426.1:c.363+1dup' in results.keys() + assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162426.1:c.363+1dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'] == [] + assert results['NM_001162426.1:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA' + assert results['NM_001162426.1:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_001162426.1:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.?', 'slr': 'NP_001155898.1:p.?'} + assert results['NM_001162426.1:c.363+1dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' + assert results['NM_001162426.1:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162426.1):c.363+1dup' + assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_variant'] == '' + assert results['NM_001162426.1:c.363+1dup']['hgvs_transcript_variant'] == 'NM_001162426.1:c.363+1dup' + assert results['NM_001162426.1:c.363+1dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001362177.1:c.-1+1dup' in results.keys() + assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001362177.1:c.-1+1dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'] == [] + assert results['NM_001362177.1:c.-1+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA' + assert results['NM_001362177.1:c.-1+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.?', 'slr': 'NP_001349106.1:p.?'} + assert results['NM_001362177.1:c.-1+1dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' + assert results['NM_001362177.1:c.-1+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001362177.1):c.-1+1dup' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_variant'] == '' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_transcript_variant'] == 'NM_001362177.1:c.-1+1dup' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1'} + + assert 'NM_000368.4:c.363+1dup' in results.keys() + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] + assert results['NM_000368.4:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA' + assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.?', 'slr': 'NP_000359.1:p.?'} + assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' + assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} + + + def test_variant150(self): + variant = 'NM_033517.1:c.1307_1309delCGA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_033517.1:c.1307_1309del' in results.keys() + assert results['NM_033517.1:c.1307_1309del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_033517.1:c.1307_1309del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_033517.1:c.1307_1309del']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'HG1311_PATCH', 'ref': 'CCGA', 'pos': '33720', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'NW_015148969.1', 'ref': 'CCGA', 'pos': '33720', 'alt': 'C'}}}] + assert results['NM_033517.1:c.1307_1309del']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 3 (SHANK3), mRNA' + assert results['NM_033517.1:c.1307_1309del']['gene_symbol'] == 'SHANK3' + assert results['NM_033517.1:c.1307_1309del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_277052.1:p.(Pro436_Ser437delinsArg)', 'slr': 'NP_277052.1:p.(P436_S437delinsR)'} + assert results['NM_033517.1:c.1307_1309del']['submitted_variant'] == 'NM_033517.1:c.1307_1309delCGA' + assert results['NM_033517.1:c.1307_1309del']['genome_context_intronic_sequence'] == '' + assert results['NM_033517.1:c.1307_1309del']['hgvs_lrg_variant'] == '' + assert results['NM_033517.1:c.1307_1309del']['hgvs_transcript_variant'] == 'NM_033517.1:c.1307_1309del' + assert results['NM_033517.1:c.1307_1309del']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys() + assert 'hg38' not in results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys() + assert 'grch37' not in results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys() + assert 'grch38' not in results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys() + assert results['NM_033517.1:c.1307_1309del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_277052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033517.1'} + + + def test_variant151(self): + variant = 'HG1311_PATCH-33720-CCGA-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'HG1311_PATCH-33720-CCGA-C' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant152(self): + variant = '2-73675227-TCTC-TCTCCTC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_015120.4:c.1573_1579=' in results.keys() + assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1573_1579=' + assert results['NM_015120.4:c.1573_1579=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'] == [] + assert results['NM_015120.4:c.1573_1579=']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + assert results['NM_015120.4:c.1573_1579=']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.1573_1579=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Ser525=)', 'slr': 'NP_055935.4:p.(S525=)'} + assert results['NM_015120.4:c.1573_1579=']['submitted_variant'] == '2-73675227-TCTC-TCTCCTC' + assert results['NM_015120.4:c.1573_1579=']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_variant'] == 'LRG_741:g.67345_67351=' + assert results['NM_015120.4:c.1573_1579=']['hgvs_transcript_variant'] == 'NM_015120.4:c.1573_1579=' + assert results['NM_015120.4:c.1573_1579=']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.67345_67351=' + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': 'chr2', 'ref': u'TCTCCTC', 'pos': '73448097', 'alt': u'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': '2', 'ref': u'TCTCCTC', 'pos': '73448097', 'alt': u'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant153(self): + variant = '2-73675227-TC-TC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_015120.4:c.1577_1579del' in results.keys() + assert results['NM_015120.4:c.1577_1579del']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1577_1579del' + assert results['NM_015120.4:c.1577_1579del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1577_1579del']['alt_genomic_loci'] == [] + assert results['NM_015120.4:c.1577_1579del']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + assert results['NM_015120.4:c.1577_1579del']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.1577_1579del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Pro526del)', 'slr': 'NP_055935.4:p.(P526del)'} + assert results['NM_015120.4:c.1577_1579del']['submitted_variant'] == '2-73675227-TC-TC' + assert results['NM_015120.4:c.1577_1579del']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1577_1579del']['hgvs_lrg_variant'] == 'LRG_741:g.67349_67351del' + assert results['NM_015120.4:c.1577_1579del']['hgvs_transcript_variant'] == 'NM_015120.4:c.1577_1579del' + assert results['NM_015120.4:c.1577_1579del']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.67349_67351del' + assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675227_73675228=', 'vcf': {'chr': 'chr2', 'ref': 'TC', 'pos': '73675227', 'alt': 'TC'}} + assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448101_73448103del', 'vcf': {'chr': 'chr2', 'ref': 'TCTC', 'pos': '73448097', 'alt': 'T'}} + assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675227_73675228=', 'vcf': {'chr': '2', 'ref': 'TC', 'pos': '73675227', 'alt': 'TC'}} + assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448101_73448103del', 'vcf': {'chr': '2', 'ref': 'TCTC', 'pos': '73448097', 'alt': 'T'}} + assert results['NM_015120.4:c.1577_1579del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant154(self): + variant = '3-14561627-AG-AGG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001080423.3:c.1016_1020=' in results.keys() + assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.3:c.1016_1020=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'] == [] + assert results['NM_001080423.3:c.1016_1020=']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + assert results['NM_001080423.3:c.1016_1020=']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Arg339=)', 'slr': 'NP_001073892.3:p.(R339=)'} + assert results['NM_001080423.3:c.1016_1020=']['submitted_variant'] == '3-14561627-AG-AGG' + assert results['NM_001080423.3:c.1016_1020=']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1016_1020=' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': 'chr3', 'ref': u'GGGCC', 'pos': '14520120', 'alt': u'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': '3', 'ref': u'GGGCC', 'pos': '14520120', 'alt': u'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001080423.2:c.1307_1311=' in results.keys() + assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.2:c.1307_1311=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'] == [] + assert results['NM_001080423.2:c.1307_1311=']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + assert results['NM_001080423.2:c.1307_1311=']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.2:c.1307_1311=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Arg436=)', 'slr': 'NP_001073892.2:p.(R436=)'} + assert results['NM_001080423.2:c.1307_1311=']['submitted_variant'] == '3-14561627-AG-AGG' + assert results['NM_001080423.2:c.1307_1311=']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.2:c.1307_1311=']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1307_1311=' + assert results['NM_001080423.2:c.1307_1311=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert 'hg38' not in results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys() + assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert 'grch38' not in results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys() + assert results['NM_001080423.2:c.1307_1311=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} + + + def test_variant155(self): + variant = '3-14561630-CC-CC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001080423.3:c.1020del' in results.keys() + assert results['NM_001080423.3:c.1020del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.3:c.1020del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1020del']['alt_genomic_loci'] == [] + assert results['NM_001080423.3:c.1020del']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + assert results['NM_001080423.3:c.1020del']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.3:c.1020del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Ser341GlnfsTer4)', 'slr': 'NP_001073892.3:p.(S341Qfs*4)'} + assert results['NM_001080423.3:c.1020del']['submitted_variant'] == '3-14561630-CC-CC' + assert results['NM_001080423.3:c.1020del']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1020del']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.3:c.1020del']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1020del' + assert results['NM_001080423.3:c.1020del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '14520119', 'alt': 'A'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '14520119', 'alt': 'A'}} + assert results['NM_001080423.3:c.1020del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001080423.2:c.1311del' in results.keys() + assert results['NM_001080423.2:c.1311del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.2:c.1311del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1311del']['alt_genomic_loci'] == [] + assert results['NM_001080423.2:c.1311del']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + assert results['NM_001080423.2:c.1311del']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.2:c.1311del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Ser438GlnfsTer4)', 'slr': 'NP_001073892.2:p.(S438Qfs*4)'} + assert results['NM_001080423.2:c.1311del']['submitted_variant'] == '3-14561630-CC-CC' + assert results['NM_001080423.2:c.1311del']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1311del']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.2:c.1311del']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1311del' + assert results['NM_001080423.2:c.1311del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert 'hg38' not in results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys() + assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert 'grch38' not in results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys() + assert results['NM_001080423.2:c.1311del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} + + + def test_variant156(self): + variant = '6-90403795-G-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_014611.1:c.9879T>C' in results.keys() + assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.1:c.9879T>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014611.1:c.9879T>C']['alt_genomic_loci'] == [] + assert results['NM_014611.1:c.9879T>C']['transcript_description'] == 'Homo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA' + assert results['NM_014611.1:c.9879T>C']['gene_symbol'] == 'MDN1' + assert results['NM_014611.1:c.9879T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.1:c.9879T>C']['submitted_variant'] == '6-90403795-G-G' + assert results['NM_014611.1:c.9879T>C']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_variant'] == '' + assert results['NM_014611.1:c.9879T>C']['hgvs_transcript_variant'] == 'NM_014611.1:c.9879T>C' + assert results['NM_014611.1:c.9879T>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert 'hg38' not in results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys() + assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert 'grch38' not in results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys() + assert results['NM_014611.1:c.9879T>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1'} + + assert 'NM_014611.2:c.9879C=' in results.keys() + assert results['NM_014611.2:c.9879C=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.2:c.9879C=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014611.2:c.9879C=']['alt_genomic_loci'] == [] + assert results['NM_014611.2:c.9879C=']['transcript_description'] == 'Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA' + assert results['NM_014611.2:c.9879C=']['gene_symbol'] == 'MDN1' + assert results['NM_014611.2:c.9879C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.2:c.9879C=']['submitted_variant'] == '6-90403795-G-G' + assert results['NM_014611.2:c.9879C=']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.2:c.9879C=']['hgvs_lrg_variant'] == '' + assert results['NM_014611.2:c.9879C=']['hgvs_transcript_variant'] == 'NM_014611.2:c.9879C=' + assert results['NM_014611.2:c.9879C=']['hgvs_refseqgene_variant'] == '' + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '90403795', 'alt': u'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '89694076', 'alt': u'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '90403795', 'alt': u'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '89694076', 'alt': u'G'}} + assert results['NM_014611.2:c.9879C=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2'} + + + def test_variant157(self): + variant = '6-90403795-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_014611.2:c.9879C>T' in results.keys() + assert results['NM_014611.2:c.9879C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.2:c.9879C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014611.2:c.9879C>T']['alt_genomic_loci'] == [] + assert results['NM_014611.2:c.9879C>T']['transcript_description'] == 'Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA' + assert results['NM_014611.2:c.9879C>T']['gene_symbol'] == 'MDN1' + assert results['NM_014611.2:c.9879C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.2:c.9879C>T']['submitted_variant'] == '6-90403795-G-A' + assert results['NM_014611.2:c.9879C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.2:c.9879C>T']['hgvs_lrg_variant'] == '' + assert results['NM_014611.2:c.9879C>T']['hgvs_transcript_variant'] == 'NM_014611.2:c.9879C>T' + assert results['NM_014611.2:c.9879C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '90403795', 'alt': u'A'}} + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G>A', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '89694076', 'alt': u'A'}} + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '90403795', 'alt': u'A'}} + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G>A', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '89694076', 'alt': u'A'}} + assert results['NM_014611.2:c.9879C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_014611.1:c.9879T=' in results.keys() + assert results['NM_014611.1:c.9879T=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.1:c.9879T=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014611.1:c.9879T=']['alt_genomic_loci'] == [] + assert results['NM_014611.1:c.9879T=']['transcript_description'] == 'Homo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA' + assert results['NM_014611.1:c.9879T=']['gene_symbol'] == 'MDN1' + assert results['NM_014611.1:c.9879T=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.1:c.9879T=']['submitted_variant'] == '6-90403795-G-A' + assert results['NM_014611.1:c.9879T=']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.1:c.9879T=']['hgvs_lrg_variant'] == '' + assert results['NM_014611.1:c.9879T=']['hgvs_transcript_variant'] == 'NM_014611.1:c.9879T=' + assert results['NM_014611.1:c.9879T=']['hgvs_refseqgene_variant'] == '' + assert results['NM_014611.1:c.9879T=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': u'A'}} + assert 'hg38' not in results['NM_014611.1:c.9879T=']['primary_assembly_loci'].keys() + assert results['NM_014611.1:c.9879T=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': u'A'}} + assert 'grch38' not in results['NM_014611.1:c.9879T=']['primary_assembly_loci'].keys() + assert results['NM_014611.1:c.9879T=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1'} + + + def test_variant158(self): + variant = '6-32012992-CG-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_032470.3:c.4del' in results.keys() + assert results['NM_032470.3:c.4del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_032470.3:c.4del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032470.3:c.4del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}] + assert results['NM_032470.3:c.4del']['transcript_description'] == 'Homo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA' + assert results['NM_032470.3:c.4del']['gene_symbol'] == 'TNXB' + assert results['NM_032470.3:c.4del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_115859.2:p.(Arg2AlafsTer91)', 'slr': 'NP_115859.2:p.(R2Afs*91)'} + assert results['NM_032470.3:c.4del']['submitted_variant'] == '6-32012992-CG-C' + assert results['NM_032470.3:c.4del']['genome_context_intronic_sequence'] == '' + assert results['NM_032470.3:c.4del']['hgvs_lrg_variant'] == '' + assert results['NM_032470.3:c.4del']['hgvs_transcript_variant'] == 'NM_032470.3:c.4del' + assert results['NM_032470.3:c.4del']['hgvs_refseqgene_variant'] == '' + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_115859.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032470.3'} + + assert 'NM_001365276.1:c.10717del' in results.keys() + assert results['NM_001365276.1:c.10717del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001365276.1:c.10717del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001365276.1:c.10717del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}] + assert results['NM_001365276.1:c.10717del']['transcript_description'] == 'Homo sapiens tenascin XB (TNXB), transcript variant 3, mRNA' + assert results['NM_001365276.1:c.10717del']['gene_symbol'] == 'TNXB' + assert results['NM_001365276.1:c.10717del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001352205.1:p.(Arg3573AlafsTer91)', 'slr': 'NP_001352205.1:p.(R3573Afs*91)'} + assert results['NM_001365276.1:c.10717del']['submitted_variant'] == '6-32012992-CG-C' + assert results['NM_001365276.1:c.10717del']['genome_context_intronic_sequence'] == '' + assert results['NM_001365276.1:c.10717del']['hgvs_lrg_variant'] == '' + assert results['NM_001365276.1:c.10717del']['hgvs_transcript_variant'] == 'NM_001365276.1:c.10717del' + assert results['NM_001365276.1:c.10717del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001365276.1:c.10717del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert 'hg38' not in results['NM_001365276.1:c.10717del']['primary_assembly_loci'].keys() + assert results['NM_001365276.1:c.10717del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert 'grch38' not in results['NM_001365276.1:c.10717del']['primary_assembly_loci'].keys() + assert results['NM_001365276.1:c.10717del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001352205.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001365276.1'} + + assert 'NM_019105.7:c.10711del' in results.keys() + assert results['NM_019105.7:c.10711del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_019105.7:c.10711del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_019105.7:c.10711del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}] + assert results['NM_019105.7:c.10711del']['transcript_description'] == 'Homo sapiens tenascin XB (TNXB), transcript variant XB, mRNA' + assert results['NM_019105.7:c.10711del']['gene_symbol'] == 'TNXB' + assert results['NM_019105.7:c.10711del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061978.6:p.(Arg3571AlafsTer91)', 'slr': 'NP_061978.6:p.(R3571Afs*91)'} + assert results['NM_019105.7:c.10711del']['submitted_variant'] == '6-32012992-CG-C' + assert results['NM_019105.7:c.10711del']['genome_context_intronic_sequence'] == '' + assert results['NM_019105.7:c.10711del']['hgvs_lrg_variant'] == '' + assert results['NM_019105.7:c.10711del']['hgvs_transcript_variant'] == 'NM_019105.7:c.10711del' + assert results['NM_019105.7:c.10711del']['hgvs_refseqgene_variant'] == '' + assert results['NM_019105.7:c.10711del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert 'hg38' not in results['NM_019105.7:c.10711del']['primary_assembly_loci'].keys() + assert results['NM_019105.7:c.10711del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert 'grch38' not in results['NM_019105.7:c.10711del']['primary_assembly_loci'].keys() + assert results['NM_019105.7:c.10711del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061978.6', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_019105.7'} + + assert 'NM_019105.6:c.10711del' in results.keys() + assert results['NM_019105.6:c.10711del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_019105.6:c.10711del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_019105.6:c.10711del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'AG', 'pos': '3271858', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'AG', 'pos': '3271858', 'alt': 'A'}}}] + assert results['NM_019105.6:c.10711del']['transcript_description'] == 'Homo sapiens tenascin XB (TNXB), transcript variant XB, mRNA' + assert results['NM_019105.6:c.10711del']['gene_symbol'] == 'TNXB' + assert results['NM_019105.6:c.10711del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061978.6:p.(Arg3571AlafsTer91)', 'slr': 'NP_061978.6:p.(R3571Afs*91)'} + assert results['NM_019105.6:c.10711del']['submitted_variant'] == '6-32012992-CG-C' + assert results['NM_019105.6:c.10711del']['genome_context_intronic_sequence'] == '' + assert results['NM_019105.6:c.10711del']['hgvs_lrg_variant'] == '' + assert results['NM_019105.6:c.10711del']['hgvs_transcript_variant'] == 'NM_019105.6:c.10711del' + assert results['NM_019105.6:c.10711del']['hgvs_refseqgene_variant'] == 'NG_008337.2:g.69159del' + assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} + assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} + assert results['NM_019105.6:c.10711del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008337.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061978.6', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_019105.6'} + + + def test_variant159(self): + variant = '17-48275363-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589G>T' in results.keys() + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['submitted_variant'] == '17-48275363-C-A' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant160(self): + variant = '17-48275364-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589-1G>T' in results.keys() + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589-1G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-1G>T']['submitted_variant'] == '17-48275364-C-A' + assert results['NM_000088.3:c.589-1G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8637G>T' + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant161(self): + variant = '17-48275359-GGA-TCC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000088.3:c.591_593inv' in results.keys() + assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.591_593inv' + assert results['NM_000088.3:c.591_593inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.591_593inv']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.591_593inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.591_593inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.591_593inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Pro198Asp)', 'slr': 'NP_000079.2:p.(P198D)'} + assert results['NM_000088.3:c.591_593inv']['submitted_variant'] == '17-48275359-GGA-TCC' + assert results['NM_000088.3:c.591_593inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_variant'] == 'LRG_1:g.8640_8642inv' + assert results['NM_000088.3:c.591_593inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.591_593inv' + assert results['NM_000088.3:c.591_593inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8640_8642inv' + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': 'chr17', 'ref': 'GGA', 'pos': '48275359', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': 'chr17', 'ref': 'GGA', 'pos': '50197998', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': '17', 'ref': 'GGA', 'pos': '48275359', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': '17', 'ref': 'GGA', 'pos': '50197998', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant162(self): + variant = '7-94039128-CTTG-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000089.3:c.1035_1035+2del' in results.keys() + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'] == [] + assert results['NM_000089.3:c.1035_1035+2del']['transcript_description'] == 'Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA' + assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} + assert results['NM_000089.3:c.1035_1035+2del']['submitted_variant'] == '7-94039128-CTTG-C' + assert results['NM_000089.3:c.1035_1035+2del']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000089.3):c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_variant'] == 'LRG_2:g.20261_20263del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_transcript_variant'] == 'NM_000089.3:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_refseqgene_variant'] == 'NG_007405.1:g.20261_20263del' + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} + + + def test_variant163(self): + variant = '9-135800972-AC-ACC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001162427.1:c.210+1615dup' in results.keys() + assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162427.1:c.210+1615dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'] == [] + assert results['NM_001162427.1:c.210+1615dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA' + assert results['NM_001162427.1:c.210+1615dup']['gene_symbol'] == 'TSC1' + assert results['NM_001162427.1:c.210+1615dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.?', 'slr': 'NP_001155899.1:p.?'} + assert results['NM_001162427.1:c.210+1615dup']['submitted_variant'] == '9-135800972-AC-ACC' + assert results['NM_001162427.1:c.210+1615dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162427.1):c.210+1615dup' + assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_variant'] == '' + assert results['NM_001162427.1:c.210+1615dup']['hgvs_transcript_variant'] == 'NM_001162427.1:c.210+1615dup' + assert results['NM_001162427.1:c.210+1615dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1'} + + assert 'NM_001162426.1:c.363+1dup' in results.keys() + assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162426.1:c.363+1dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'] == [] + assert results['NM_001162426.1:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA' + assert results['NM_001162426.1:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_001162426.1:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.(Met122AspfsTer4)', 'slr': 'NP_001155898.1:p.(M122Dfs*4)'} + assert results['NM_001162426.1:c.363+1dup']['submitted_variant'] == '9-135800972-AC-ACC' + assert results['NM_001162426.1:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162426.1):c.363+1dup' + assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_variant'] == '' + assert results['NM_001162426.1:c.363+1dup']['hgvs_transcript_variant'] == 'NM_001162426.1:c.363+1dup' + assert results['NM_001162426.1:c.363+1dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001362177.1:c.-1+1dup' in results.keys() + assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001362177.1:c.-1+1dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'] == [] + assert results['NM_001362177.1:c.-1+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA' + assert results['NM_001362177.1:c.-1+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.?', 'slr': 'NP_001349106.1:p.?'} + assert results['NM_001362177.1:c.-1+1dup']['submitted_variant'] == '9-135800972-AC-ACC' + assert results['NM_001362177.1:c.-1+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001362177.1):c.-1+1dup' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_variant'] == '' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_transcript_variant'] == 'NM_001362177.1:c.-1+1dup' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1'} + + assert 'NM_000368.4:c.363+1dup' in results.keys() + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] + assert results['NM_000368.4:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA' + assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)', 'slr': 'NP_000359.1:p.(M122Dfs*4)'} + assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == '9-135800972-AC-ACC' + assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} + + + def test_variant164(self): + variant = '1-43212925-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001243246.1:c.2073G>A' in results.keys() + assert results['NM_001243246.1:c.2073G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t3:c.2073G>A' + assert results['NM_001243246.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001243246.1:c.2073G>A']['alt_genomic_loci'] == [] + assert results['NM_001243246.1:c.2073G>A']['transcript_description'] == 'Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 3, mRNA' + assert results['NM_001243246.1:c.2073G>A']['gene_symbol'] == 'P3H1' + assert results['NM_001243246.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230175.1:p.(Ala691=)', 'slr': 'NP_001230175.1:p.(A691=)'} + assert results['NM_001243246.1:c.2073G>A']['submitted_variant'] == '1-43212925-C-T' + assert results['NM_001243246.1:c.2073G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001243246.1:c.2073G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001243246.1:c.2073G>A']['hgvs_transcript_variant'] == 'NM_001243246.1:c.2073G>A' + assert results['NM_001243246.1:c.2073G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_001243246.1:c.2073G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230175.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243246.1'} + + assert 'NM_001146289.1:c.2073G>A' in results.keys() + assert results['NM_001146289.1:c.2073G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t2:c.2073G>A' + assert results['NM_001146289.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001146289.1:c.2073G>A']['alt_genomic_loci'] == [] + assert results['NM_001146289.1:c.2073G>A']['transcript_description'] == 'Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 2, mRNA' + assert results['NM_001146289.1:c.2073G>A']['gene_symbol'] == 'P3H1' + assert results['NM_001146289.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001139761.1:p.(Ala691=)', 'slr': 'NP_001139761.1:p.(A691=)'} + assert results['NM_001146289.1:c.2073G>A']['submitted_variant'] == '1-43212925-C-T' + assert results['NM_001146289.1:c.2073G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001146289.1:c.2073G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' + assert results['NM_001146289.1:c.2073G>A']['hgvs_transcript_variant'] == 'NM_001146289.1:c.2073G>A' + assert results['NM_001146289.1:c.2073G>A']['hgvs_refseqgene_variant'] == 'NG_008123.1:g.24831G>A' + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_001146289.1:c.2073G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001139761.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001146289.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} + + assert 'NM_022356.3:c.2055+18G>A' in results.keys() + assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t1:c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['refseqgene_context_intronic_sequence'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'] == [] + assert results['NM_022356.3:c.2055+18G>A']['transcript_description'] == 'Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA' + assert results['NM_022356.3:c.2055+18G>A']['gene_symbol'] == 'P3H1' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_071751.3(LRG_5p1):p.?', 'slr': 'NP_071751.3:p.?'} + assert results['NM_022356.3:c.2055+18G>A']['submitted_variant'] == '1-43212925-C-T' + assert results['NM_022356.3:c.2055+18G>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_022356.3):c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_transcript_variant'] == 'NM_022356.3:c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_refseqgene_variant'] == 'NG_008123.1:g.24831G>A' + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_022356.3:c.2055+18G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} + + + def test_variant165(self): + variant = 'HG987_PATCH-355171-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001194958.2:c.20C>A' in results.keys() + assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001194958.2:c.20C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001194958.2:c.20C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}] + assert results['NM_001194958.2:c.20C>A']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA' + assert results['NM_001194958.2:c.20C>A']['gene_symbol'] == 'KCNJ18' + assert results['NM_001194958.2:c.20C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181887.2:p.(Ala7Asp)', 'slr': 'NP_001181887.2:p.(A7D)'} + assert results['NM_001194958.2:c.20C>A']['submitted_variant'] == 'HG987_PATCH-355171-C-A' + assert results['NM_001194958.2:c.20C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001194958.2:c.20C>A']['hgvs_transcript_variant'] == 'NM_001194958.2:c.20C>A' + assert results['NM_001194958.2:c.20C>A']['hgvs_refseqgene_variant'] == 'NG_033093.1:g.15284C>A' + assert 'hg19' not in results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys() + assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} + assert 'grch37' not in results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys() + assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} + assert results['NM_001194958.2:c.20C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033093.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2'} + + assert results['flag'] == 'gene_variant' + + def test_variant166(self): + variant = '20-43252915-T-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000022.3:c.534A>G' in results.keys() + assert results['NM_000022.3:c.534A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000022.3:c.534A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000022.3:c.534A>G']['alt_genomic_loci'] == [] + assert results['NM_000022.3:c.534A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), transcript variant 1, mRNA' + assert results['NM_000022.3:c.534A>G']['gene_symbol'] == 'ADA' + assert results['NM_000022.3:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} + assert results['NM_000022.3:c.534A>G']['submitted_variant'] == '20-43252915-T-C' + assert results['NM_000022.3:c.534A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000022.3:c.534A>G']['hgvs_lrg_variant'] == '' + assert results['NM_000022.3:c.534A>G']['hgvs_transcript_variant'] == 'NM_000022.3:c.534A>G' + assert results['NM_000022.3:c.534A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NM_000022.3:c.534A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.3'} + + assert 'NM_001322051.1:c.534A>G' in results.keys() + assert results['NM_001322051.1:c.534A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322051.1:c.534A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322051.1:c.534A>G']['alt_genomic_loci'] == [] + assert results['NM_001322051.1:c.534A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), transcript variant 3, mRNA' + assert results['NM_001322051.1:c.534A>G']['gene_symbol'] == 'ADA' + assert results['NM_001322051.1:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308980.1:p.(Val178=)', 'slr': 'NP_001308980.1:p.(V178=)'} + assert results['NM_001322051.1:c.534A>G']['submitted_variant'] == '20-43252915-T-C' + assert results['NM_001322051.1:c.534A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322051.1:c.534A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322051.1:c.534A>G']['hgvs_transcript_variant'] == 'NM_001322051.1:c.534A>G' + assert results['NM_001322051.1:c.534A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NM_001322051.1:c.534A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308980.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322051.1'} + + assert 'NM_000022.2:c.534A>G' in results.keys() + assert results['NM_000022.2:c.534A>G']['hgvs_lrg_transcript_variant'] == 'LRG_16t1:c.534A>G' + assert results['NM_000022.2:c.534A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000022.2:c.534A>G']['alt_genomic_loci'] == [] + assert results['NM_000022.2:c.534A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), mRNA' + assert results['NM_000022.2:c.534A>G']['gene_symbol'] == 'ADA' + assert results['NM_000022.2:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} + assert results['NM_000022.2:c.534A>G']['submitted_variant'] == '20-43252915-T-C' + assert results['NM_000022.2:c.534A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000022.2:c.534A>G']['hgvs_lrg_variant'] == 'LRG_16:g.32462A>G' + assert results['NM_000022.2:c.534A>G']['hgvs_transcript_variant'] == 'NM_000022.2:c.534A>G' + assert results['NM_000022.2:c.534A>G']['hgvs_refseqgene_variant'] == 'NG_007385.1:g.32462A>G' + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert 'hg38' not in results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys() + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert 'grch38' not in results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys() + assert results['NM_000022.2:c.534A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007385.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_16.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001322050.1:c.129A>G' in results.keys() + assert results['NM_001322050.1:c.129A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322050.1:c.129A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322050.1:c.129A>G']['alt_genomic_loci'] == [] + assert results['NM_001322050.1:c.129A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), transcript variant 2, mRNA' + assert results['NM_001322050.1:c.129A>G']['gene_symbol'] == 'ADA' + assert results['NM_001322050.1:c.129A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308979.1:p.(Val43=)', 'slr': 'NP_001308979.1:p.(V43=)'} + assert results['NM_001322050.1:c.129A>G']['submitted_variant'] == '20-43252915-T-C' + assert results['NM_001322050.1:c.129A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322050.1:c.129A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322050.1:c.129A>G']['hgvs_transcript_variant'] == 'NM_001322050.1:c.129A>G' + assert results['NM_001322050.1:c.129A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NM_001322050.1:c.129A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308979.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322050.1'} + + assert 'NR_136160.1:n.685A>G' in results.keys() + assert results['NR_136160.1:n.685A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_136160.1:n.685A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_136160.1:n.685A>G']['alt_genomic_loci'] == [] + assert results['NR_136160.1:n.685A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), transcript variant 4, non-coding RNA' + assert results['NR_136160.1:n.685A>G']['gene_symbol'] == 'ADA' + assert results['NR_136160.1:n.685A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_136160.1:n.685A>G']['submitted_variant'] == '20-43252915-T-C' + assert results['NR_136160.1:n.685A>G']['genome_context_intronic_sequence'] == '' + assert results['NR_136160.1:n.685A>G']['hgvs_lrg_variant'] == '' + assert results['NR_136160.1:n.685A>G']['hgvs_transcript_variant'] == 'NR_136160.1:n.685A>G' + assert results['NR_136160.1:n.685A>G']['hgvs_refseqgene_variant'] == '' + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NR_136160.1:n.685A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_136160.1'} + + + def test_variant167(self): + variant = '1-216219781-A-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_206933.2:c.6317C>G' in results.keys() + assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_206933.2:c.6317C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_206933.2:c.6317C>G']['alt_genomic_loci'] == [] + assert results['NM_206933.2:c.6317C>G']['transcript_description'] == 'Homo sapiens usherin (USH2A), transcript variant 2, mRNA' + assert results['NM_206933.2:c.6317C>G']['gene_symbol'] == 'USH2A' + assert results['NM_206933.2:c.6317C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_996816.2:p.(Thr2106Arg)', 'slr': 'NP_996816.2:p.(T2106R)'} + assert results['NM_206933.2:c.6317C>G']['submitted_variant'] == '1-216219781-A-C' + assert results['NM_206933.2:c.6317C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_variant'] == '' + assert results['NM_206933.2:c.6317C>G']['hgvs_transcript_variant'] == 'NM_206933.2:c.6317C>G' + assert results['NM_206933.2:c.6317C>G']['hgvs_refseqgene_variant'] == 'NG_009497.1:g.381958C>G' + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216219781', 'alt': u'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216046439', 'alt': u'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216219781', 'alt': u'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216046439', 'alt': u'C'}} + assert results['NM_206933.2:c.6317C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009497.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2'} + + + def test_variant168(self): + variant = '2-209113113-G-A,C,T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_005896.3:c.394C>G' in results.keys() + assert results['NM_005896.3:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>G' + assert results['NM_005896.3:c.394C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005896.3:c.394C>G']['alt_genomic_loci'] == [] + assert results['NM_005896.3:c.394C>G']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA' + assert results['NM_005896.3:c.394C>G']['gene_symbol'] == 'IDH1' + assert results['NM_005896.3:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Gly)', 'slr': 'NP_005887.2:p.(R132G)'} + assert results['NM_005896.3:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_005896.3:c.394C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.3:c.394C>G']['hgvs_lrg_variant'] == '' + assert results['NM_005896.3:c.394C>G']['hgvs_transcript_variant'] == 'NM_005896.3:c.394C>G' + assert results['NM_005896.3:c.394C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} + assert results['NM_005896.3:c.394C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3'} + + assert 'NM_001282387.1:c.394C>G' in results.keys() + assert results['NM_001282387.1:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>G' + assert results['NM_001282387.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282387.1:c.394C>G']['alt_genomic_loci'] == [] + assert results['NM_001282387.1:c.394C>G']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA' + assert results['NM_001282387.1:c.394C>G']['gene_symbol'] == 'IDH1' + assert results['NM_001282387.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1:p.(Arg132Gly)', 'slr': 'NP_001269316.1:p.(R132G)'} + assert results['NM_001282387.1:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_001282387.1:c.394C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001282387.1:c.394C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001282387.1:c.394C>G']['hgvs_transcript_variant'] == 'NM_001282387.1:c.394C>G' + assert results['NM_001282387.1:c.394C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} + assert results['NM_001282387.1:c.394C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1'} + + assert 'NM_001282387.1:c.394C>A' in results.keys() + assert results['NM_001282387.1:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>A' + assert results['NM_001282387.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282387.1:c.394C>A']['alt_genomic_loci'] == [] + assert results['NM_001282387.1:c.394C>A']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA' + assert results['NM_001282387.1:c.394C>A']['gene_symbol'] == 'IDH1' + assert results['NM_001282387.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1:p.(Arg132Ser)', 'slr': 'NP_001269316.1:p.(R132S)'} + assert results['NM_001282387.1:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_001282387.1:c.394C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001282387.1:c.394C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001282387.1:c.394C>A']['hgvs_transcript_variant'] == 'NM_001282387.1:c.394C>A' + assert results['NM_001282387.1:c.394C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} + assert results['NM_001282387.1:c.394C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1'} + + assert 'NM_005896.3:c.394C>A' in results.keys() + assert results['NM_005896.3:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>A' + assert results['NM_005896.3:c.394C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005896.3:c.394C>A']['alt_genomic_loci'] == [] + assert results['NM_005896.3:c.394C>A']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA' + assert results['NM_005896.3:c.394C>A']['gene_symbol'] == 'IDH1' + assert results['NM_005896.3:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Ser)', 'slr': 'NP_005887.2:p.(R132S)'} + assert results['NM_005896.3:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_005896.3:c.394C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.3:c.394C>A']['hgvs_lrg_variant'] == '' + assert results['NM_005896.3:c.394C>A']['hgvs_transcript_variant'] == 'NM_005896.3:c.394C>A' + assert results['NM_005896.3:c.394C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} + assert results['NM_005896.3:c.394C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3'} + + assert 'NM_001282386.1:c.394C>T' in results.keys() + assert results['NM_001282386.1:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>T' + assert results['NM_001282386.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282386.1:c.394C>T']['alt_genomic_loci'] == [] + assert results['NM_001282386.1:c.394C>T']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA' + assert results['NM_001282386.1:c.394C>T']['gene_symbol'] == 'IDH1' + assert results['NM_001282386.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1:p.(Arg132Cys)', 'slr': 'NP_001269315.1:p.(R132C)'} + assert results['NM_001282386.1:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_001282386.1:c.394C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001282386.1:c.394C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001282386.1:c.394C>T']['hgvs_transcript_variant'] == 'NM_001282386.1:c.394C>T' + assert results['NM_001282386.1:c.394C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} + assert results['NM_001282386.1:c.394C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1'} + + assert 'NM_005896.2:c.394C>A' in results.keys() + assert results['NM_005896.2:c.394C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005896.2:c.394C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005896.2:c.394C>A']['alt_genomic_loci'] == [] + assert results['NM_005896.2:c.394C>A']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA' + assert results['NM_005896.2:c.394C>A']['gene_symbol'] == 'IDH1' + assert results['NM_005896.2:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Ser)', 'slr': 'NP_005887.2:p.(R132S)'} + assert results['NM_005896.2:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_005896.2:c.394C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.2:c.394C>A']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>A' + assert results['NM_005896.2:c.394C>A']['hgvs_transcript_variant'] == 'NM_005896.2:c.394C>A' + assert results['NM_005896.2:c.394C>A']['hgvs_refseqgene_variant'] == 'NG_023319.2:g.22686C>A' + assert results['NM_005896.2:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} + assert 'hg38' not in results['NM_005896.2:c.394C>A']['primary_assembly_loci'].keys() + assert results['NM_005896.2:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} + assert 'grch38' not in results['NM_005896.2:c.394C>A']['primary_assembly_loci'].keys() + assert results['NM_005896.2:c.394C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} + + assert 'NM_005896.2:c.394C>G' in results.keys() + assert results['NM_005896.2:c.394C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005896.2:c.394C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005896.2:c.394C>G']['alt_genomic_loci'] == [] + assert results['NM_005896.2:c.394C>G']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA' + assert results['NM_005896.2:c.394C>G']['gene_symbol'] == 'IDH1' + assert results['NM_005896.2:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Gly)', 'slr': 'NP_005887.2:p.(R132G)'} + assert results['NM_005896.2:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_005896.2:c.394C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.2:c.394C>G']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>G' + assert results['NM_005896.2:c.394C>G']['hgvs_transcript_variant'] == 'NM_005896.2:c.394C>G' + assert results['NM_005896.2:c.394C>G']['hgvs_refseqgene_variant'] == 'NG_023319.2:g.22686C>G' + assert results['NM_005896.2:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} + assert 'hg38' not in results['NM_005896.2:c.394C>G']['primary_assembly_loci'].keys() + assert results['NM_005896.2:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} + assert 'grch38' not in results['NM_005896.2:c.394C>G']['primary_assembly_loci'].keys() + assert results['NM_005896.2:c.394C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_005896.3:c.394C>T' in results.keys() + assert results['NM_005896.3:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>T' + assert results['NM_005896.3:c.394C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005896.3:c.394C>T']['alt_genomic_loci'] == [] + assert results['NM_005896.3:c.394C>T']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA' + assert results['NM_005896.3:c.394C>T']['gene_symbol'] == 'IDH1' + assert results['NM_005896.3:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Cys)', 'slr': 'NP_005887.2:p.(R132C)'} + assert results['NM_005896.3:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_005896.3:c.394C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.3:c.394C>T']['hgvs_lrg_variant'] == '' + assert results['NM_005896.3:c.394C>T']['hgvs_transcript_variant'] == 'NM_005896.3:c.394C>T' + assert results['NM_005896.3:c.394C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} + assert results['NM_005896.3:c.394C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3'} + + assert 'NM_001282387.1:c.394C>T' in results.keys() + assert results['NM_001282387.1:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>T' + assert results['NM_001282387.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282387.1:c.394C>T']['alt_genomic_loci'] == [] + assert results['NM_001282387.1:c.394C>T']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA' + assert results['NM_001282387.1:c.394C>T']['gene_symbol'] == 'IDH1' + assert results['NM_001282387.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1:p.(Arg132Cys)', 'slr': 'NP_001269316.1:p.(R132C)'} + assert results['NM_001282387.1:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_001282387.1:c.394C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001282387.1:c.394C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001282387.1:c.394C>T']['hgvs_transcript_variant'] == 'NM_001282387.1:c.394C>T' + assert results['NM_001282387.1:c.394C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} + assert results['NM_001282387.1:c.394C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1'} + + assert 'NM_001282386.1:c.394C>G' in results.keys() + assert results['NM_001282386.1:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>G' + assert results['NM_001282386.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282386.1:c.394C>G']['alt_genomic_loci'] == [] + assert results['NM_001282386.1:c.394C>G']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA' + assert results['NM_001282386.1:c.394C>G']['gene_symbol'] == 'IDH1' + assert results['NM_001282386.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1:p.(Arg132Gly)', 'slr': 'NP_001269315.1:p.(R132G)'} + assert results['NM_001282386.1:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_001282386.1:c.394C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001282386.1:c.394C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001282386.1:c.394C>G']['hgvs_transcript_variant'] == 'NM_001282386.1:c.394C>G' + assert results['NM_001282386.1:c.394C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} + assert results['NM_001282386.1:c.394C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1'} + + assert 'NM_005896.2:c.394C>T' in results.keys() + assert results['NM_005896.2:c.394C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005896.2:c.394C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005896.2:c.394C>T']['alt_genomic_loci'] == [] + assert results['NM_005896.2:c.394C>T']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA' + assert results['NM_005896.2:c.394C>T']['gene_symbol'] == 'IDH1' + assert results['NM_005896.2:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Cys)', 'slr': 'NP_005887.2:p.(R132C)'} + assert results['NM_005896.2:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_005896.2:c.394C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.2:c.394C>T']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>T' + assert results['NM_005896.2:c.394C>T']['hgvs_transcript_variant'] == 'NM_005896.2:c.394C>T' + assert results['NM_005896.2:c.394C>T']['hgvs_refseqgene_variant'] == 'NG_023319.2:g.22686C>T' + assert results['NM_005896.2:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} + assert 'hg38' not in results['NM_005896.2:c.394C>T']['primary_assembly_loci'].keys() + assert results['NM_005896.2:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} + assert 'grch38' not in results['NM_005896.2:c.394C>T']['primary_assembly_loci'].keys() + assert results['NM_005896.2:c.394C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} + + assert 'NM_001282386.1:c.394C>A' in results.keys() + assert results['NM_001282386.1:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>A' + assert results['NM_001282386.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282386.1:c.394C>A']['alt_genomic_loci'] == [] + assert results['NM_001282386.1:c.394C>A']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA' + assert results['NM_001282386.1:c.394C>A']['gene_symbol'] == 'IDH1' + assert results['NM_001282386.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1:p.(Arg132Ser)', 'slr': 'NP_001269315.1:p.(R132S)'} + assert results['NM_001282386.1:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_001282386.1:c.394C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001282386.1:c.394C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001282386.1:c.394C>A']['hgvs_transcript_variant'] == 'NM_001282386.1:c.394C>A' + assert results['NM_001282386.1:c.394C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} + assert results['NM_001282386.1:c.394C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1'} + + + def test_variant169(self): + variant = 'NC_000005.9:g.35058665_35058666CA=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001204314.1:c.*6525_*6526=' in results.keys() + assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204314.1:c.*6525_*6526=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204314.1:c.*6525_*6526=']['alt_genomic_loci'] == [] + assert results['NM_001204314.1:c.*6525_*6526=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA' + assert results['NM_001204314.1:c.*6525_*6526=']['gene_symbol'] == 'PRLR' + assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} + assert results['NM_001204314.1:c.*6525_*6526=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NM_001204314.1:c.*6525_*6526=']['genome_context_intronic_sequence'] == '' + assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_lrg_variant'] == '' + assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_transcript_variant'] == 'NM_001204314.1:c.*6525_*6526=' + assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': u'CA', 'pos': '35058665', 'alt': u'CA'}} + assert 'hg38' not in results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci'].keys() + assert results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': u'CA', 'pos': '35058665', 'alt': u'CA'}} + assert 'grch38' not in results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci'].keys() + assert results['NM_001204314.1:c.*6525_*6526=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1'} + + assert 'NM_001204314.2:c.*6528del' in results.keys() + assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204314.2:c.*6528del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204314.2:c.*6528del']['alt_genomic_loci'] == [] + assert results['NM_001204314.2:c.*6528del']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA' + assert results['NM_001204314.2:c.*6528del']['gene_symbol'] == 'PRLR' + assert results['NM_001204314.2:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} + assert results['NM_001204314.2:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NM_001204314.2:c.*6528del']['genome_context_intronic_sequence'] == '' + assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_variant'] == '' + assert results['NM_001204314.2:c.*6528del']['hgvs_transcript_variant'] == 'NM_001204314.2:c.*6528del' + assert results['NM_001204314.2:c.*6528del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} + assert results['NM_001204314.2:c.*6528del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2'} + + assert 'NM_001204317.1:c.856-9153_856-9152=' in results.keys() + assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204317.1:c.856-9153_856-9152=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204317.1:c.856-9153_856-9152=']['alt_genomic_loci'] == [] + assert results['NM_001204317.1:c.856-9153_856-9152=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA' + assert results['NM_001204317.1:c.856-9153_856-9152=']['gene_symbol'] == 'PRLR' + assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191246.1:p.?', 'slr': 'NP_001191246.1:p.?'} + assert results['NM_001204317.1:c.856-9153_856-9152=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NM_001204317.1:c.856-9153_856-9152=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204317.1):c.856-9153_856-9152=' + assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_lrg_variant'] == '' + assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_transcript_variant'] == 'NM_001204317.1:c.856-9153_856-9152=' + assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} + assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} + assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} + assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} + assert results['NM_001204317.1:c.856-9153_856-9152=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1'} + + assert 'NM_001204316.1:c.1009+7385_1009+7386=' in results.keys() + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['alt_genomic_loci'] == [] + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['gene_symbol'] == 'PRLR' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191245.1:p.?', 'slr': 'NP_001191245.1:p.?'} + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204316.1):c.1009+7385_1009+7386=' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_lrg_variant'] == '' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_transcript_variant'] == 'NM_001204316.1:c.1009+7385_1009+7386=' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563_35058564=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058563', 'alt': 'AG'}} + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563_35058564=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058563', 'alt': 'AG'}} + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1'} + + assert results['flag'] == 'gene_variant' + assert 'NR_037910.1:n.828-9153_828-9152=' in results.keys() + assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037910.1:n.828-9153_828-9152=']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_037910.1:n.828-9153_828-9152=']['alt_genomic_loci'] == [] + assert results['NR_037910.1:n.828-9153_828-9152=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 7, non-coding RNA' + assert results['NR_037910.1:n.828-9153_828-9152=']['gene_symbol'] == 'PRLR' + assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037910.1:n.828-9153_828-9152=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NR_037910.1:n.828-9153_828-9152=']['genome_context_intronic_sequence'] == 'NC_000005.9(NR_037910.1):c.828-9153_828-9152=' + assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_lrg_variant'] == '' + assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_transcript_variant'] == 'NR_037910.1:n.828-9153_828-9152=' + assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_refseqgene_variant'] == '' + assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} + assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} + assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} + assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} + assert results['NR_037910.1:n.828-9153_828-9152=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1'} + + assert 'NM_001204318.1:c.686-9153_686-9152=' in results.keys() + assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204318.1:c.686-9153_686-9152=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204318.1:c.686-9153_686-9152=']['alt_genomic_loci'] == [] + assert results['NM_001204318.1:c.686-9153_686-9152=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA' + assert results['NM_001204318.1:c.686-9153_686-9152=']['gene_symbol'] == 'PRLR' + assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191247.1:p.?', 'slr': 'NP_001191247.1:p.?'} + assert results['NM_001204318.1:c.686-9153_686-9152=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NM_001204318.1:c.686-9153_686-9152=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204318.1):c.686-9153_686-9152=' + assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_lrg_variant'] == '' + assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_transcript_variant'] == 'NM_001204318.1:c.686-9153_686-9152=' + assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} + assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} + assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} + assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} + assert results['NM_001204318.1:c.686-9153_686-9152=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1'} + + assert 'NM_000949.5:c.*6525_*6526=' in results.keys() + assert results['NM_000949.5:c.*6525_*6526=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000949.5:c.*6525_*6526=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000949.5:c.*6525_*6526=']['alt_genomic_loci'] == [] + assert results['NM_000949.5:c.*6525_*6526=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA' + assert results['NM_000949.5:c.*6525_*6526=']['gene_symbol'] == 'PRLR' + assert results['NM_000949.5:c.*6525_*6526=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} + assert results['NM_000949.5:c.*6525_*6526=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NM_000949.5:c.*6525_*6526=']['genome_context_intronic_sequence'] == '' + assert results['NM_000949.5:c.*6525_*6526=']['hgvs_lrg_variant'] == '' + assert results['NM_000949.5:c.*6525_*6526=']['hgvs_transcript_variant'] == 'NM_000949.5:c.*6525_*6526=' + assert results['NM_000949.5:c.*6525_*6526=']['hgvs_refseqgene_variant'] == 'NG_029042.1:g.177158_177159=' + assert results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', 'vcf': {'chr': 'chr5', 'ref': u'ACAAG', 'pos': '35058664', 'alt': u'ACAAG'}} + assert 'hg38' not in results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci'].keys() + assert results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', 'vcf': {'chr': '5', 'ref': u'ACAAG', 'pos': '35058664', 'alt': u'ACAAG'}} + assert 'grch38' not in results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci'].keys() + assert results['NM_000949.5:c.*6525_*6526=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029042.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5'} + + assert 'NM_000949.6:c.*6528del' in results.keys() + assert results['NM_000949.6:c.*6528del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000949.6:c.*6528del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000949.6:c.*6528del']['alt_genomic_loci'] == [] + assert results['NM_000949.6:c.*6528del']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA' + assert results['NM_000949.6:c.*6528del']['gene_symbol'] == 'PRLR' + assert results['NM_000949.6:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} + assert results['NM_000949.6:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NM_000949.6:c.*6528del']['genome_context_intronic_sequence'] == '' + assert results['NM_000949.6:c.*6528del']['hgvs_lrg_variant'] == '' + assert results['NM_000949.6:c.*6528del']['hgvs_transcript_variant'] == 'NM_000949.6:c.*6528del' + assert results['NM_000949.6:c.*6528del']['hgvs_refseqgene_variant'] == '' + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} + assert results['NM_000949.6:c.*6528del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.6'} + + + def test_variant170(self): + variant = 'NC_000002.11:g.73675227_73675229delTCTinsTCTCTC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_015120.4:c.1580_1581insCCT' in results.keys() + assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1580_1581insCCT' + assert results['NM_015120.4:c.1580_1581insCCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1580_1581insCCT']['alt_genomic_loci'] == [] + assert results['NM_015120.4:c.1580_1581insCCT']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + assert results['NM_015120.4:c.1580_1581insCCT']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu527dup)', 'slr': 'NP_055935.4:p.(L527dup)'} + assert results['NM_015120.4:c.1580_1581insCCT']['submitted_variant'] == 'NC_000002.11:g.73675227_73675229delTCTinsTCTCTC' + assert results['NM_015120.4:c.1580_1581insCCT']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_lrg_variant'] == 'LRG_741:g.67352_67353insCCT' + assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_transcript_variant'] == 'NM_015120.4:c.1580_1581insCCT' + assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.67352_67353insCCT' + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675231_73675232insCCT', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73675229', 'alt': u'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448104_73448105insCCT', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73448102', 'alt': u'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675231_73675232insCCT', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73675229', 'alt': u'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448104_73448105insCCT', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73448102', 'alt': u'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} + + + def test_variant171(self): + variant = 'NM_000828.4:c.-2dupG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000828.4:c.-2dup' in results.keys() + assert results['NM_000828.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2dup']['alt_genomic_loci'] == [] + assert results['NM_000828.4:c.-2dup']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + assert results['NM_000828.4:c.-2dup']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2dup']['submitted_variant'] == 'NM_000828.4:c.-2dupG' + assert results['NM_000828.4:c.-2dup']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2dup']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2dup']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2dup' + assert results['NM_000828.4:c.-2dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} + + + def test_variant172(self): + variant = 'X-122318386-A-AGG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_007325.4:c.-2dup' in results.keys() + assert results['NM_007325.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007325.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007325.4:c.-2dup']['alt_genomic_loci'] == [] + assert results['NM_007325.4:c.-2dup']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA' + assert results['NM_007325.4:c.-2dup']['gene_symbol'] == 'GRIA3' + assert results['NM_007325.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_015564.4:p.?', 'slr': 'NP_015564.4:p.?'} + assert results['NM_007325.4:c.-2dup']['submitted_variant'] == 'X-122318386-A-AGG' + assert results['NM_007325.4:c.-2dup']['genome_context_intronic_sequence'] == '' + assert results['NM_007325.4:c.-2dup']['hgvs_lrg_variant'] == '' + assert results['NM_007325.4:c.-2dup']['hgvs_transcript_variant'] == 'NM_007325.4:c.-2dup' + assert results['NM_007325.4:c.-2dup']['hgvs_refseqgene_variant'] == 'NG_009377.1:g.5292dup' + assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} + assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} + assert results['NM_007325.4:c.-2dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009377.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001256743.1:c.-2dup' in results.keys() + assert results['NM_001256743.1:c.-2dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256743.1:c.-2dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256743.1:c.-2dup']['alt_genomic_loci'] == [] + assert results['NM_001256743.1:c.-2dup']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA' + assert results['NM_001256743.1:c.-2dup']['gene_symbol'] == 'GRIA3' + assert results['NM_001256743.1:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243672.1:p.?', 'slr': 'NP_001243672.1:p.?'} + assert results['NM_001256743.1:c.-2dup']['submitted_variant'] == 'X-122318386-A-AGG' + assert results['NM_001256743.1:c.-2dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001256743.1:c.-2dup']['hgvs_lrg_variant'] == '' + assert results['NM_001256743.1:c.-2dup']['hgvs_transcript_variant'] == 'NM_001256743.1:c.-2dup' + assert results['NM_001256743.1:c.-2dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} + assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} + assert results['NM_001256743.1:c.-2dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1'} + + assert 'NM_000828.4:c.-2dup' in results.keys() + assert results['NM_000828.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2dup']['alt_genomic_loci'] == [] + assert results['NM_000828.4:c.-2dup']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + assert results['NM_000828.4:c.-2dup']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2dup']['submitted_variant'] == 'X-122318386-A-AGG' + assert results['NM_000828.4:c.-2dup']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2dup']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2dup']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2dup' + assert results['NM_000828.4:c.-2dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} + + + def test_variant173(self): + variant = 'NM_000828.4:c.-2G>T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000828.4:c.-2G>T' in results.keys() + assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2G>T']['alt_genomic_loci'] == [] + assert results['NM_000828.4:c.-2G>T']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + assert results['NM_000828.4:c.-2G>T']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2G>T']['submitted_variant'] == 'NM_000828.4:c.-2G>T' + assert results['NM_000828.4:c.-2G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2G>T']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2G>T' + assert results['NM_000828.4:c.-2G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_000828.4:c.-2G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} + + + def test_variant174(self): + variant = 'NM_000828.4:c.-2G=' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000828.4:c.-2G=' in results.keys() + assert results['NM_000828.4:c.-2G=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2G=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2G=']['alt_genomic_loci'] == [] + assert results['NM_000828.4:c.-2G=']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + assert results['NM_000828.4:c.-2G=']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2G=']['submitted_variant'] == 'NM_000828.4:c.-2G=' + assert results['NM_000828.4:c.-2G=']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2G=']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2G=']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2G=' + assert results['NM_000828.4:c.-2G=']['hgvs_refseqgene_variant'] == '' + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AG'}} + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G=', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '123184534', 'alt': u'G'}} + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AG'}} + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G=', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '123184534', 'alt': u'G'}} + assert results['NM_000828.4:c.-2G=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} + + assert results['flag'] == 'gene_variant' + + def test_variant175(self): + variant = 'X-122318386-A-AT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000828.4:c.-2G>T' in results.keys() + assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2G>T']['alt_genomic_loci'] == [] + assert results['NM_000828.4:c.-2G>T']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + assert results['NM_000828.4:c.-2G>T']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2G>T']['submitted_variant'] == 'X-122318386-A-AT' + assert results['NM_000828.4:c.-2G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2G>T']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2G>T' + assert results['NM_000828.4:c.-2G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_000828.4:c.-2G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} + + assert 'NM_001256743.1:c.-2G>T' in results.keys() + assert results['NM_001256743.1:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256743.1:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256743.1:c.-2G>T']['alt_genomic_loci'] == [] + assert results['NM_001256743.1:c.-2G>T']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA' + assert results['NM_001256743.1:c.-2G>T']['gene_symbol'] == 'GRIA3' + assert results['NM_001256743.1:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243672.1:p.?', 'slr': 'NP_001243672.1:p.?'} + assert results['NM_001256743.1:c.-2G>T']['submitted_variant'] == 'X-122318386-A-AT' + assert results['NM_001256743.1:c.-2G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001256743.1:c.-2G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001256743.1:c.-2G>T']['hgvs_transcript_variant'] == 'NM_001256743.1:c.-2G>T' + assert results['NM_001256743.1:c.-2G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_001256743.1:c.-2G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1'} + + assert 'NM_007325.4:c.-2G>T' in results.keys() + assert results['NM_007325.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007325.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007325.4:c.-2G>T']['alt_genomic_loci'] == [] + assert results['NM_007325.4:c.-2G>T']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA' + assert results['NM_007325.4:c.-2G>T']['gene_symbol'] == 'GRIA3' + assert results['NM_007325.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_015564.4:p.?', 'slr': 'NP_015564.4:p.?'} + assert results['NM_007325.4:c.-2G>T']['submitted_variant'] == 'X-122318386-A-AT' + assert results['NM_007325.4:c.-2G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_007325.4:c.-2G>T']['hgvs_lrg_variant'] == '' + assert results['NM_007325.4:c.-2G>T']['hgvs_transcript_variant'] == 'NM_007325.4:c.-2G>T' + assert results['NM_007325.4:c.-2G>T']['hgvs_refseqgene_variant'] == 'NG_009377.1:g.5292G>T' + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_007325.4:c.-2G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009377.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4'} + + + def test_variant176(self): + variant = 'NM_000828.4:c.-2_-1insT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000828.4:c.-2_-1insT' in results.keys() + assert results['NM_000828.4:c.-2_-1insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2_-1insT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2_-1insT']['alt_genomic_loci'] == [] + assert results['NM_000828.4:c.-2_-1insT']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + assert results['NM_000828.4:c.-2_-1insT']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2_-1insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2_-1insT']['submitted_variant'] == 'NM_000828.4:c.-2_-1insT' + assert results['NM_000828.4:c.-2_-1insT']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2_-1insT']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2_-1insT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2_-1insT' + assert results['NM_000828.4:c.-2_-1insT']['hgvs_refseqgene_variant'] == '' + assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AGT'}} + assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535insT', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': u'GT'}} + assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AGT'}} + assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535insT', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': u'GT'}} + assert results['NM_000828.4:c.-2_-1insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} + + + def test_variant177(self): + variant = 'NM_000828.4:c.-3_-2insT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000828.4:c.-3_-2insT' in results.keys() + assert results['NM_000828.4:c.-3_-2insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-3_-2insT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-3_-2insT']['alt_genomic_loci'] == [] + assert results['NM_000828.4:c.-3_-2insT']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + assert results['NM_000828.4:c.-3_-2insT']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-3_-2insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-3_-2insT']['submitted_variant'] == 'NM_000828.4:c.-3_-2insT' + assert results['NM_000828.4:c.-3_-2insT']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-3_-2insT']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-3_-2insT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-3_-2insT' + assert results['NM_000828.4:c.-3_-2insT']['hgvs_refseqgene_variant'] == '' + assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'ATG'}} + assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': u'AT'}} + assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'ATG'}} + assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': u'AT'}} + assert results['NM_000828.4:c.-3_-2insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} + + assert results['flag'] == 'gene_variant' + + def test_variant178(self): + variant = 'NM_000828.4:c.-2delGinsTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000828.4:c.-2delinsTT' in results.keys() + assert results['NM_000828.4:c.-2delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2delinsTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2delinsTT']['alt_genomic_loci'] == [] + assert results['NM_000828.4:c.-2delinsTT']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + assert results['NM_000828.4:c.-2delinsTT']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2delinsTT']['submitted_variant'] == 'NM_000828.4:c.-2delGinsTT' + assert results['NM_000828.4:c.-2delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2delinsTT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2delinsTT' + assert results['NM_000828.4:c.-2delinsTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'ATT'}} + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': u'TT'}} + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'ATT'}} + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534delinsTT', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': u'TT'}} + assert results['NM_000828.4:c.-2delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} + + + def test_variant179(self): + variant = 'NM_000828.4:c.-2_-1delGCinsTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000828.4:c.-2_-1delinsTT' in results.keys() + assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2_-1delinsTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2_-1delinsTT']['alt_genomic_loci'] == [] + assert results['NM_000828.4:c.-2_-1delinsTT']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + assert results['NM_000828.4:c.-2_-1delinsTT']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2_-1delinsTT']['submitted_variant'] == 'NM_000828.4:c.-2_-1delGCinsTT' + assert results['NM_000828.4:c.-2_-1delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2_-1delinsTT' + assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318387delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '122318387', 'alt': 'TT'}} + assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'GC', 'pos': '123184534', 'alt': u'TT'}} + assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318387delinsTT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '122318387', 'alt': 'TT'}} + assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535delinsTT', 'vcf': {'chr': 'X', 'ref': 'GC', 'pos': '123184534', 'alt': u'TT'}} + assert results['NM_000828.4:c.-2_-1delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} + + + def test_variant180(self): + variant = 'NM_000828.4:c.-3_-2delAGinsTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000828.4:c.-3_-2delinsTT' in results.keys() + assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-3_-2delinsTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-3_-2delinsTT']['alt_genomic_loci'] == [] + assert results['NM_000828.4:c.-3_-2delinsTT']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + assert results['NM_000828.4:c.-3_-2delinsTT']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-3_-2delinsTT']['submitted_variant'] == 'NM_000828.4:c.-3_-2delAGinsTT' + assert results['NM_000828.4:c.-3_-2delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-3_-2delinsTT' + assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'TT'}} + assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'AG', 'pos': '123184533', 'alt': u'TT'}} + assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386delinsTT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'TT'}} + assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534delinsTT', 'vcf': {'chr': 'X', 'ref': 'AG', 'pos': '123184533', 'alt': u'TT'}} + assert results['NM_000828.4:c.-3_-2delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} + + assert results['flag'] == 'gene_variant' + + def test_variant181(self): + variant = '15-72105929-C-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_014249.3:c.951dup' in results.keys() + assert results['NM_014249.3:c.951dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.951dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.951dup']['alt_genomic_loci'] == [] + assert results['NM_014249.3:c.951dup']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.3:c.951dup']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Thr318HisfsTer23)', 'slr': 'NP_055064.1:p.(T318Hfs*23)'} + assert results['NM_014249.3:c.951dup']['submitted_variant'] == '15-72105929-C-C' + assert results['NM_014249.3:c.951dup']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.951dup']['hgvs_lrg_variant'] == '' + assert results['NM_014249.3:c.951dup']['hgvs_transcript_variant'] == 'NM_014249.3:c.951dup' + assert results['NM_014249.3:c.951dup']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8039dup' + assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'AC'}} + assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'AC'}} + assert results['NM_014249.3:c.951dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} + + assert 'NM_014249.2:c.951dup' in results.keys() + assert results['NM_014249.2:c.951dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.951dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.951dup']['alt_genomic_loci'] == [] + assert results['NM_014249.2:c.951dup']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.2:c.951dup']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Thr318HisfsTer23)', 'slr': 'NP_055064.1:p.(T318Hfs*23)'} + assert results['NM_014249.2:c.951dup']['submitted_variant'] == '15-72105929-C-C' + assert results['NM_014249.2:c.951dup']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.951dup']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.951dup']['hgvs_transcript_variant'] == 'NM_014249.2:c.951dup' + assert results['NM_014249.2:c.951dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_014249.2:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert 'hg38' not in results['NM_014249.2:c.951dup']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert 'grch38' not in results['NM_014249.2:c.951dup']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.951dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_016346.3:c.951dup' in results.keys() + assert results['NM_016346.3:c.951dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.951dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.951dup']['alt_genomic_loci'] == [] + assert results['NM_016346.3:c.951dup']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.3:c.951dup']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Thr318HisfsTer23)', 'slr': 'NP_057430.1:p.(T318Hfs*23)'} + assert results['NM_016346.3:c.951dup']['submitted_variant'] == '15-72105929-C-C' + assert results['NM_016346.3:c.951dup']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.951dup']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.951dup']['hgvs_transcript_variant'] == 'NM_016346.3:c.951dup' + assert results['NM_016346.3:c.951dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'AC'}} + assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'AC'}} + assert results['NM_016346.3:c.951dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + + assert 'NM_016346.2:c.951dup' in results.keys() + assert results['NM_016346.2:c.951dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.951dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.951dup']['alt_genomic_loci'] == [] + assert results['NM_016346.2:c.951dup']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.2:c.951dup']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Thr318HisfsTer23)', 'slr': 'NP_057430.1:p.(T318Hfs*23)'} + assert results['NM_016346.2:c.951dup']['submitted_variant'] == '15-72105929-C-C' + assert results['NM_016346.2:c.951dup']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.951dup']['hgvs_lrg_variant'] == '' + assert results['NM_016346.2:c.951dup']['hgvs_transcript_variant'] == 'NM_016346.2:c.951dup' + assert results['NM_016346.2:c.951dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.2:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert 'hg38' not in results['NM_016346.2:c.951dup']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert 'grch38' not in results['NM_016346.2:c.951dup']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.951dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} + + + def test_variant182(self): + variant = '15-72105928-AC-ATT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_014249.2:c.947_948insTT' in results.keys() + assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'] == [] + assert results['NM_014249.2:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} + assert results['NM_014249.2:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' + assert results['NM_014249.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.2:c.947_948insTT' + assert results['NM_014249.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert 'hg38' not in results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert 'grch38' not in results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + + assert 'NM_016346.3:c.947_948insTT' in results.keys() + assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'] == [] + assert results['NM_016346.3:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} + assert results['NM_016346.3:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' + assert results['NM_016346.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.3:c.947_948insTT' + assert results['NM_016346.3:c.947_948insTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_016346.2:c.947_948insTT' in results.keys() + assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'] == [] + assert results['NM_016346.2:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} + assert results['NM_016346.2:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' + assert results['NM_016346.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_016346.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.2:c.947_948insTT' + assert results['NM_016346.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert 'hg38' not in results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert 'grch38' not in results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} + + assert 'NM_014249.3:c.947_948insTT' in results.keys() + assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'] == [] + assert results['NM_014249.3:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} + assert results['NM_014249.3:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' + assert results['NM_014249.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_014249.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.3:c.947_948insTT' + assert results['NM_014249.3:c.947_948insTT']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8035_8036insTT' + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} + + + def test_variant183(self): + variant = '15-72105928-ACC-ATT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_014249.2:c.947_948insTT' in results.keys() + assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'] == [] + assert results['NM_014249.2:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} + assert results['NM_014249.2:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' + assert results['NM_014249.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.2:c.947_948insTT' + assert results['NM_014249.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert 'hg38' not in results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert 'grch38' not in results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + + assert 'NM_016346.3:c.947_948insTT' in results.keys() + assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'] == [] + assert results['NM_016346.3:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} + assert results['NM_016346.3:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' + assert results['NM_016346.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.3:c.947_948insTT' + assert results['NM_016346.3:c.947_948insTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_016346.2:c.947_948insTT' in results.keys() + assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'] == [] + assert results['NM_016346.2:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} + assert results['NM_016346.2:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' + assert results['NM_016346.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_016346.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.2:c.947_948insTT' + assert results['NM_016346.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert 'hg38' not in results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert 'grch38' not in results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} + + assert 'NM_014249.3:c.947_948insTT' in results.keys() + assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'] == [] + assert results['NM_014249.3:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} + assert results['NM_014249.3:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' + assert results['NM_014249.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_014249.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.3:c.947_948insTT' + assert results['NM_014249.3:c.947_948insTT']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8035_8036insTT' + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} + + + def test_variant184(self): + variant = '15-72105927-GACC-GTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_014249.3:c.947delinsTT' in results.keys() + assert results['NM_014249.3:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.947delinsTT']['alt_genomic_loci'] == [] + assert results['NM_014249.3:c.947delinsTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.3:c.947delinsTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316ValfsTer25)', 'slr': 'NP_055064.1:p.(D316Vfs*25)'} + assert results['NM_014249.3:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' + assert results['NM_014249.3:c.947delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.947delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_014249.3:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_014249.3:c.947delinsTT' + assert results['NM_014249.3:c.947delinsTT']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8035delinsTT' + assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} + assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'TT'}} + assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} + assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'TT'}} + assert results['NM_014249.3:c.947delinsTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} + + assert 'NM_016346.2:c.947delinsTT' in results.keys() + assert results['NM_016346.2:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.947delinsTT']['alt_genomic_loci'] == [] + assert results['NM_016346.2:c.947delinsTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.2:c.947delinsTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316ValfsTer25)', 'slr': 'NP_057430.1:p.(D316Vfs*25)'} + assert results['NM_016346.2:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' + assert results['NM_016346.2:c.947delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.947delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_016346.2:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_016346.2:c.947delinsTT' + assert results['NM_016346.2:c.947delinsTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.2:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} + assert 'hg38' not in results['NM_016346.2:c.947delinsTT']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} + assert 'grch38' not in results['NM_016346.2:c.947delinsTT']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.947delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} + + assert 'NM_014249.2:c.947delinsTT' in results.keys() + assert results['NM_014249.2:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.947delinsTT']['alt_genomic_loci'] == [] + assert results['NM_014249.2:c.947delinsTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.2:c.947delinsTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316ValfsTer25)', 'slr': 'NP_055064.1:p.(D316Vfs*25)'} + assert results['NM_014249.2:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' + assert results['NM_014249.2:c.947delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.947delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_014249.2:c.947delinsTT' + assert results['NM_014249.2:c.947delinsTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_014249.2:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} + assert 'hg38' not in results['NM_014249.2:c.947delinsTT']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} + assert 'grch38' not in results['NM_014249.2:c.947delinsTT']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.947delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_016346.3:c.947delinsTT' in results.keys() + assert results['NM_016346.3:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.947delinsTT']['alt_genomic_loci'] == [] + assert results['NM_016346.3:c.947delinsTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.3:c.947delinsTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316ValfsTer25)', 'slr': 'NP_057430.1:p.(D316Vfs*25)'} + assert results['NM_016346.3:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' + assert results['NM_016346.3:c.947delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.947delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_016346.3:c.947delinsTT' + assert results['NM_016346.3:c.947delinsTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} + assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'TT'}} + assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} + assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'TT'}} + assert results['NM_016346.3:c.947delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + + + def test_variant185(self): + variant = '19-41123093-A-AG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001042544.1:c.3233_3235=' in results.keys() + assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042544.1:c.3233_3235=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'] == [] + assert results['NM_001042544.1:c.3233_3235=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA' + assert results['NM_001042544.1:c.3233_3235=']['gene_symbol'] == 'LTBP4' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078=)', 'slr': 'NP_001036009.1:p.(Q1078=)'} + assert results['NM_001042544.1:c.3233_3235=']['submitted_variant'] == '19-41123093-A-AG' + assert results['NM_001042544.1:c.3233_3235=']['genome_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_variant'] == '' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3233_3235=' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} + + assert 'NM_001042545.1:c.3032_3034=' in results.keys() + assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042545.1:c.3032_3034=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'] == [] + assert results['NM_001042545.1:c.3032_3034=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA' + assert results['NM_001042545.1:c.3032_3034=']['gene_symbol'] == 'LTBP4' + assert results['NM_001042545.1:c.3032_3034=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011=)', 'slr': 'NP_001036010.1:p.(Q1011=)'} + assert results['NM_001042545.1:c.3032_3034=']['submitted_variant'] == '19-41123093-A-AG' + assert results['NM_001042545.1:c.3032_3034=']['genome_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_variant'] == '' + assert results['NM_001042545.1:c.3032_3034=']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3032_3034=' + assert results['NM_001042545.1:c.3032_3034=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} + + assert 'NM_003573.2:c.3122_3124=' in results.keys() + assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003573.2:c.3122_3124=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'] == [] + assert results['NM_003573.2:c.3122_3124=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA' + assert results['NM_003573.2:c.3122_3124=']['gene_symbol'] == 'LTBP4' + assert results['NM_003573.2:c.3122_3124=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041=)', 'slr': 'NP_003564.2:p.(Q1041=)'} + assert results['NM_003573.2:c.3122_3124=']['submitted_variant'] == '19-41123093-A-AG' + assert results['NM_003573.2:c.3122_3124=']['genome_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_variant'] == '' + assert results['NM_003573.2:c.3122_3124=']['hgvs_transcript_variant'] == 'NM_003573.2:c.3122_3124=' + assert results['NM_003573.2:c.3122_3124=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} + + + def test_variant186(self): + variant = '19-41123093-A-AT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_003573.2:c.3123G>T' in results.keys() + assert results['NM_003573.2:c.3123G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003573.2:c.3123G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3123G>T']['alt_genomic_loci'] == [] + assert results['NM_003573.2:c.3123G>T']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA' + assert results['NM_003573.2:c.3123G>T']['gene_symbol'] == 'LTBP4' + assert results['NM_003573.2:c.3123G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041His)', 'slr': 'NP_003564.2:p.(Q1041H)'} + assert results['NM_003573.2:c.3123G>T']['submitted_variant'] == '19-41123093-A-AT' + assert results['NM_003573.2:c.3123G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3123G>T']['hgvs_lrg_variant'] == '' + assert results['NM_003573.2:c.3123G>T']['hgvs_transcript_variant'] == 'NM_003573.2:c.3123G>T' + assert results['NM_003573.2:c.3123G>T']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29023G>T' + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} + assert results['NM_003573.2:c.3123G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001042545.1:c.3033G>T' in results.keys() + assert results['NM_001042545.1:c.3033G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042545.1:c.3033G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3033G>T']['alt_genomic_loci'] == [] + assert results['NM_001042545.1:c.3033G>T']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA' + assert results['NM_001042545.1:c.3033G>T']['gene_symbol'] == 'LTBP4' + assert results['NM_001042545.1:c.3033G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011His)', 'slr': 'NP_001036010.1:p.(Q1011H)'} + assert results['NM_001042545.1:c.3033G>T']['submitted_variant'] == '19-41123093-A-AT' + assert results['NM_001042545.1:c.3033G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3033G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001042545.1:c.3033G>T']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3033G>T' + assert results['NM_001042545.1:c.3033G>T']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29023G>T' + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} + assert results['NM_001042545.1:c.3033G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} + + assert 'NM_001042544.1:c.3234G>T' in results.keys() + assert results['NM_001042544.1:c.3234G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042544.1:c.3234G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3234G>T']['alt_genomic_loci'] == [] + assert results['NM_001042544.1:c.3234G>T']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA' + assert results['NM_001042544.1:c.3234G>T']['gene_symbol'] == 'LTBP4' + assert results['NM_001042544.1:c.3234G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078His)', 'slr': 'NP_001036009.1:p.(Q1078H)'} + assert results['NM_001042544.1:c.3234G>T']['submitted_variant'] == '19-41123093-A-AT' + assert results['NM_001042544.1:c.3234G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3234G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001042544.1:c.3234G>T']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3234G>T' + assert results['NM_001042544.1:c.3234G>T']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29023G>T' + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} + assert results['NM_001042544.1:c.3234G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} + + + def test_variant187(self): + variant = '19-41123093-AG-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001042544.1:c.3235_3236del' in results.keys() + assert results['NM_001042544.1:c.3235_3236del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042544.1:c.3235_3236del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3235_3236del']['alt_genomic_loci'] == [] + assert results['NM_001042544.1:c.3235_3236del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA' + assert results['NM_001042544.1:c.3235_3236del']['gene_symbol'] == 'LTBP4' + assert results['NM_001042544.1:c.3235_3236del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gly1079LeufsTer17)', 'slr': 'NP_001036009.1:p.(G1079Lfs*17)'} + assert results['NM_001042544.1:c.3235_3236del']['submitted_variant'] == '19-41123093-AG-A' + assert results['NM_001042544.1:c.3235_3236del']['genome_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3235_3236del']['hgvs_lrg_variant'] == '' + assert results['NM_001042544.1:c.3235_3236del']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3235_3236del' + assert results['NM_001042544.1:c.3235_3236del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29024_29025del' + assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} + assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} + assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_001042544.1:c.3235_3236del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001042545.1:c.3034_3035del' in results.keys() + assert results['NM_001042545.1:c.3034_3035del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042545.1:c.3034_3035del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3034_3035del']['alt_genomic_loci'] == [] + assert results['NM_001042545.1:c.3034_3035del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA' + assert results['NM_001042545.1:c.3034_3035del']['gene_symbol'] == 'LTBP4' + assert results['NM_001042545.1:c.3034_3035del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gly1012LeufsTer17)', 'slr': 'NP_001036010.1:p.(G1012Lfs*17)'} + assert results['NM_001042545.1:c.3034_3035del']['submitted_variant'] == '19-41123093-AG-A' + assert results['NM_001042545.1:c.3034_3035del']['genome_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3034_3035del']['hgvs_lrg_variant'] == '' + assert results['NM_001042545.1:c.3034_3035del']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3034_3035del' + assert results['NM_001042545.1:c.3034_3035del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29024_29025del' + assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} + assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} + assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_001042545.1:c.3034_3035del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} + + assert 'NM_003573.2:c.3124_3125del' in results.keys() + assert results['NM_003573.2:c.3124_3125del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003573.2:c.3124_3125del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3124_3125del']['alt_genomic_loci'] == [] + assert results['NM_003573.2:c.3124_3125del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA' + assert results['NM_003573.2:c.3124_3125del']['gene_symbol'] == 'LTBP4' + assert results['NM_003573.2:c.3124_3125del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gly1042LeufsTer17)', 'slr': 'NP_003564.2:p.(G1042Lfs*17)'} + assert results['NM_003573.2:c.3124_3125del']['submitted_variant'] == '19-41123093-AG-A' + assert results['NM_003573.2:c.3124_3125del']['genome_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3124_3125del']['hgvs_lrg_variant'] == '' + assert results['NM_003573.2:c.3124_3125del']['hgvs_transcript_variant'] == 'NM_003573.2:c.3124_3125del' + assert results['NM_003573.2:c.3124_3125del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29024_29025del' + assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} + assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} + assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_003573.2:c.3124_3125del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} + + + def test_variant188(self): + variant = '19-41123093-AG-AG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001042545.1:c.3035del' in results.keys() + assert results['NM_001042545.1:c.3035del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042545.1:c.3035del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3035del']['alt_genomic_loci'] == [] + assert results['NM_001042545.1:c.3035del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA' + assert results['NM_001042545.1:c.3035del']['gene_symbol'] == 'LTBP4' + assert results['NM_001042545.1:c.3035del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gly1012ValfsTer14)', 'slr': 'NP_001036010.1:p.(G1012Vfs*14)'} + assert results['NM_001042545.1:c.3035del']['submitted_variant'] == '19-41123093-AG-AG' + assert results['NM_001042545.1:c.3035del']['genome_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3035del']['hgvs_lrg_variant'] == '' + assert results['NM_001042545.1:c.3035del']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3035del' + assert results['NM_001042545.1:c.3035del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29025del' + assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_001042545.1:c.3035del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001042544.1:c.3236del' in results.keys() + assert results['NM_001042544.1:c.3236del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042544.1:c.3236del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3236del']['alt_genomic_loci'] == [] + assert results['NM_001042544.1:c.3236del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA' + assert results['NM_001042544.1:c.3236del']['gene_symbol'] == 'LTBP4' + assert results['NM_001042544.1:c.3236del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gly1079ValfsTer14)', 'slr': 'NP_001036009.1:p.(G1079Vfs*14)'} + assert results['NM_001042544.1:c.3236del']['submitted_variant'] == '19-41123093-AG-AG' + assert results['NM_001042544.1:c.3236del']['genome_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3236del']['hgvs_lrg_variant'] == '' + assert results['NM_001042544.1:c.3236del']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3236del' + assert results['NM_001042544.1:c.3236del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29025del' + assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_001042544.1:c.3236del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} + + assert 'NM_003573.2:c.3125del' in results.keys() + assert results['NM_003573.2:c.3125del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003573.2:c.3125del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3125del']['alt_genomic_loci'] == [] + assert results['NM_003573.2:c.3125del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA' + assert results['NM_003573.2:c.3125del']['gene_symbol'] == 'LTBP4' + assert results['NM_003573.2:c.3125del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gly1042ValfsTer14)', 'slr': 'NP_003564.2:p.(G1042Vfs*14)'} + assert results['NM_003573.2:c.3125del']['submitted_variant'] == '19-41123093-AG-AG' + assert results['NM_003573.2:c.3125del']['genome_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3125del']['hgvs_lrg_variant'] == '' + assert results['NM_003573.2:c.3125del']['hgvs_transcript_variant'] == 'NM_003573.2:c.3125del' + assert results['NM_003573.2:c.3125del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29025del' + assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} + assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} + assert results['NM_003573.2:c.3125del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} + + + def test_variant189(self): + variant = 'NM_012309.4:c.913-5058G>A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_012309.4:c.913-5058G>A' in results.keys() + assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_012309.4:c.913-5058G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'C', 'pos': '574546', 'alt': u'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'C', 'pos': '574546', 'alt': u'T'}}}] + assert results['NM_012309.4:c.913-5058G>A']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' + assert results['NM_012309.4:c.913-5058G>A']['gene_symbol'] == 'SHANK2' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.?', 'slr': 'NP_036441.2:p.?'} + assert results['NM_012309.4:c.913-5058G>A']['submitted_variant'] == 'NM_012309.4:c.913-5058G>A' + assert results['NM_012309.4:c.913-5058G>A']['genome_context_intronic_sequence'] == 'NC_000011.10(NM_012309.4):c.913-5058G>A' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_variant'] == '' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_transcript_variant'] == 'NM_012309.4:c.913-5058G>A' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys() + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '71080333', 'alt': u'T'}} + assert 'grch37' not in results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys() + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '71080333', 'alt': u'T'}} + assert results['NM_012309.4:c.913-5058G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} + + + def test_variant190(self): + variant = 'LRG_199t1:c.2376[G>C];[G>C]' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_004006.2:c.2376G>C' in results.keys() + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.2376G>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} + assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.2376[G>C];[G>C]' + assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + + def test_variant191(self): + variant = 'LRG_199t1:c.[2376G>C];[3103del]' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_004006.2:c.3103del' in results.keys() + assert results['NM_004006.2:c.3103del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.3103del' + assert results['NM_004006.2:c.3103del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.3103del']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.3103del']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.3103del']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.3103del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)', 'slr': 'NP_003997.1:p.(Q1035Sfs*9)'} + assert results['NM_004006.2:c.3103del']['submitted_variant'] == 'LRG_199t1:c.[2376G>C];[3103del]' + assert results['NM_004006.2:c.3103del']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.3103del']['hgvs_lrg_variant'] == 'LRG_199:g.876053del' + assert results['NM_004006.2:c.3103del']['hgvs_transcript_variant'] == 'NM_004006.2:c.3103del' + assert results['NM_004006.2:c.3103del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.876053del' + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486674del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468557del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486674del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468557del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_004006.2:c.2376G>C' in results.keys() + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.2376G>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} + assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.[2376G>C];[3103del]' + assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + + def test_variant192(self): + variant = 'LRG_199t1:c.[4358_4359del;4361_4372del]' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_004006.2:c.4358_4372delinsG' in results.keys() + assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4358_4372delinsG' + assert results['NM_004006.2:c.4358_4372delinsG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.4358_4372delinsG']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.4358_4372delinsG']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.4358_4372delinsG']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Asp1453GlyfsTer11)', 'slr': 'NP_003997.1:p.(D1453Gfs*11)'} + assert results['NM_004006.2:c.4358_4372delinsG']['submitted_variant'] == 'LRG_199t1:c.[4358_4359del;4361_4372del]' + assert results['NM_004006.2:c.4358_4372delinsG']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_lrg_variant'] == 'LRG_199:g.954949_954963delinsG' + assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_transcript_variant'] == 'NM_004006.2:c.4358_4372delinsG' + assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.954949_954963delinsG' + assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407778delinsC', 'vcf': {'chr': 'chrX', 'ref': 'ACTTCATGGAGACAT', 'pos': '32407764', 'alt': u'C'}} + assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389661delinsC', 'vcf': {'chr': 'chrX', 'ref': 'ACTTCATGGAGACAT', 'pos': '32389647', 'alt': u'C'}} + assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407778delinsC', 'vcf': {'chr': 'X', 'ref': 'ACTTCATGGAGACAT', 'pos': '32407764', 'alt': u'C'}} + assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389661delinsC', 'vcf': {'chr': 'X', 'ref': 'ACTTCATGGAGACAT', 'pos': '32389647', 'alt': u'C'}} + assert results['NM_004006.2:c.4358_4372delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + assert 'NM_004006.2:c.4358_4359del' in results.keys() + assert results['NM_004006.2:c.4358_4359del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4358_4359del' + assert results['NM_004006.2:c.4358_4359del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.4358_4359del']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.4358_4359del']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.4358_4359del']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.4358_4359del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Asp1453GlyfsTer15)', 'slr': 'NP_003997.1:p.(D1453Gfs*15)'} + assert results['NM_004006.2:c.4358_4359del']['submitted_variant'] == 'LRG_199t1:c.[4358_4359del;4361_4372del]' + assert results['NM_004006.2:c.4358_4359del']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.4358_4359del']['hgvs_lrg_variant'] == 'LRG_199:g.954949_954950del' + assert results['NM_004006.2:c.4358_4359del']['hgvs_transcript_variant'] == 'NM_004006.2:c.4358_4359del' + assert results['NM_004006.2:c.4358_4359del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.954949_954950del' + assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407777_32407778del', 'vcf': {'chr': 'chrX', 'ref': 'CAT', 'pos': '32407776', 'alt': 'C'}} + assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389660_32389661del', 'vcf': {'chr': 'chrX', 'ref': 'CAT', 'pos': '32389659', 'alt': 'C'}} + assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407777_32407778del', 'vcf': {'chr': 'X', 'ref': 'CAT', 'pos': '32407776', 'alt': 'C'}} + assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389660_32389661del', 'vcf': {'chr': 'X', 'ref': 'CAT', 'pos': '32389659', 'alt': 'C'}} + assert results['NM_004006.2:c.4358_4359del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + assert 'NM_004006.2:c.4362_4373del' in results.keys() + assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4362_4373del' + assert results['NM_004006.2:c.4362_4373del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.4362_4373del']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.4362_4373del']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.4362_4373del']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.4362_4373del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ser1455_Phe1458del)', 'slr': 'NP_003997.1:p.(S1455_F1458del)'} + assert results['NM_004006.2:c.4362_4373del']['submitted_variant'] == 'LRG_199t1:c.[4358_4359del;4361_4372del]' + assert results['NM_004006.2:c.4362_4373del']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_variant'] == 'LRG_199:g.954953_954964del' + assert results['NM_004006.2:c.4362_4373del']['hgvs_transcript_variant'] == 'NM_004006.2:c.4362_4373del' + assert results['NM_004006.2:c.4362_4373del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.954953_954964del' + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407763_32407774del', 'vcf': {'chr': 'chrX', 'ref': 'AAACTTCATGGAG', 'pos': '32407762', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389646_32389657del', 'vcf': {'chr': 'chrX', 'ref': 'AAACTTCATGGAG', 'pos': '32389645', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407763_32407774del', 'vcf': {'chr': 'X', 'ref': 'AAACTTCATGGAG', 'pos': '32407762', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389646_32389657del', 'vcf': {'chr': 'X', 'ref': 'AAACTTCATGGAG', 'pos': '32389645', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + + def test_variant193(self): + variant = 'LRG_199t1:c.2376G>C(;)3103del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_004006.2:c.3103del' in results.keys() + assert results['NM_004006.2:c.3103del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.3103del' + assert results['NM_004006.2:c.3103del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.3103del']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.3103del']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.3103del']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.3103del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)', 'slr': 'NP_003997.1:p.(Q1035Sfs*9)'} + assert results['NM_004006.2:c.3103del']['submitted_variant'] == 'LRG_199t1:c.2376G>C(;)3103del' + assert results['NM_004006.2:c.3103del']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.3103del']['hgvs_lrg_variant'] == 'LRG_199:g.876053del' + assert results['NM_004006.2:c.3103del']['hgvs_transcript_variant'] == 'NM_004006.2:c.3103del' + assert results['NM_004006.2:c.3103del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.876053del' + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486674del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468557del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486674del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468557del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_004006.2:c.2376G>C' in results.keys() + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.2376G>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} + assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.2376G>C(;)3103del' + assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + + def test_variant194(self): + variant = 'LRG_199t1:c.2376[G>C];[(G>C)]' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_004006.2:c.2376G>C' in results.keys() + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.2376G>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} + assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.2376[G>C];[(G>C)]' + assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + + def test_variant195(self): + variant = 'LRG_199t1:c.[2376G>C];[?]' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_004006.2:c.2376G>C' in results.keys() + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.2376G>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} + assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.[2376G>C];[?]' + assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + + def test_variant196(self): + variant = 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_004006.2:c.476T=' in results.keys() + assert results['NM_004006.2:c.476T=']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.476T=' + assert results['NM_004006.2:c.476T=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.476T=']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.476T=']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.476T=']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.476T=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Phe159=)', 'slr': 'NP_003997.1:p.(F159=)'} + assert results['NM_004006.2:c.476T=']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' + assert results['NM_004006.2:c.476T=']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.476T=']['hgvs_lrg_variant'] == 'LRG_199:g.528088T=' + assert results['NM_004006.2:c.476T=']['hgvs_transcript_variant'] == 'NM_004006.2:c.476T=' + assert results['NM_004006.2:c.476T=']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.528088T=' + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A=', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32834639', 'alt': u'A'}} + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32816522', 'alt': u'A'}} + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A=', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32834639', 'alt': u'A'}} + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32816522', 'alt': u'A'}} + assert results['NM_004006.2:c.476T=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + assert 'NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT' in results.keys() + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296_358-3delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_lrg_variant'] == 'LRG_199:g.521254_527967delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_transcript_variant'] == 'NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT' + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.521254_527967delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639_32841473delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'chrX', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32834760', 'alt': u'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522_32823356delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'chrX', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32816643', 'alt': u'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639_32841473delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'X', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32834760', 'alt': u'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522_32823356delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'X', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32816643', 'alt': u'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_004006.2:c.296T>G' in results.keys() + assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' + assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.296T>G']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.296T>G']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.296T>G']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.296T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} + assert results['NM_004006.2:c.296T>G']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' + assert results['NM_004006.2:c.296T>G']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.296T>G']['hgvs_lrg_variant'] == 'LRG_199:g.521254T>G' + assert results['NM_004006.2:c.296T>G']['hgvs_transcript_variant'] == 'NM_004006.2:c.296T>G' + assert results['NM_004006.2:c.296T>G']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.521254T>G' + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32841473', 'alt': u'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32823356', 'alt': u'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32841473', 'alt': u'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32823356', 'alt': u'C'}} + assert results['NM_004006.2:c.296T>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + assert 'NM_004006.2:c.1083A>C' in results.keys() + assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1083A>C' + assert results['NM_004006.2:c.1083A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.1083A>C']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.1083A>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.1083A>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.1083A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Thr361=)', 'slr': 'NP_003997.1:p.(T361=)'} + assert results['NM_004006.2:c.1083A>C']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' + assert results['NM_004006.2:c.1083A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_variant'] == 'LRG_199:g.699580A>C' + assert results['NM_004006.2:c.1083A>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.1083A>C' + assert results['NM_004006.2:c.1083A>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.699580A>C' + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'chrX', 'ref': u'T', 'pos': '32663147', 'alt': u'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'chrX', 'ref': u'T', 'pos': '32645030', 'alt': u'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'X', 'ref': u'T', 'pos': '32663147', 'alt': u'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'X', 'ref': u'T', 'pos': '32645030', 'alt': u'G'}} + assert results['NM_004006.2:c.1083A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + + def test_variant197(self): + variant = 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_004006.2:c.1408del' in results.keys() + assert results['NM_004006.2:c.1408del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1408del' + assert results['NM_004006.2:c.1408del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.1408del']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.1408del']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.1408del']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.1408del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Arg470GlufsTer17)', 'slr': 'NP_003997.1:p.(R470Efs*17)'} + assert results['NM_004006.2:c.1408del']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' + assert results['NM_004006.2:c.1408del']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.1408del']['hgvs_lrg_variant'] == 'LRG_199:g.730233del' + assert results['NM_004006.2:c.1408del']['hgvs_transcript_variant'] == 'NM_004006.2:c.1408del' + assert results['NM_004006.2:c.1408del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.730233del' + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32632494del', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '32632493', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32614377del', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '32614376', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32632494del', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '32632493', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32614377del', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '32614376', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_004006.2:c.296T>G' in results.keys() + assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' + assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.296T>G']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.296T>G']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.296T>G']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.296T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} + assert results['NM_004006.2:c.296T>G']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' + assert results['NM_004006.2:c.296T>G']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.296T>G']['hgvs_lrg_variant'] == 'LRG_199:g.521254T>G' + assert results['NM_004006.2:c.296T>G']['hgvs_transcript_variant'] == 'NM_004006.2:c.296T>G' + assert results['NM_004006.2:c.296T>G']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.521254T>G' + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32841473', 'alt': u'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32823356', 'alt': u'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32841473', 'alt': u'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32823356', 'alt': u'C'}} + assert results['NM_004006.2:c.296T>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + assert 'NM_004006.2:c.476T>C' in results.keys() + assert results['NM_004006.2:c.476T>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.476T>C' + assert results['NM_004006.2:c.476T>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.476T>C']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.476T>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.476T>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.476T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Phe159Ser)', 'slr': 'NP_003997.1:p.(F159S)'} + assert results['NM_004006.2:c.476T>C']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' + assert results['NM_004006.2:c.476T>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.476T>C']['hgvs_lrg_variant'] == 'LRG_199:g.528088T>C' + assert results['NM_004006.2:c.476T>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.476T>C' + assert results['NM_004006.2:c.476T>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.528088T>C' + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A>G', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32834639', 'alt': u'G'}} + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A>G', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32816522', 'alt': u'G'}} + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A>G', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32834639', 'alt': u'G'}} + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A>G', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32816522', 'alt': u'G'}} + assert results['NM_004006.2:c.476T>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + assert 'NM_004006.2:c.1083A>C' in results.keys() + assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1083A>C' + assert results['NM_004006.2:c.1083A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.1083A>C']['alt_genomic_loci'] == [] + assert results['NM_004006.2:c.1083A>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + assert results['NM_004006.2:c.1083A>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.1083A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Thr361=)', 'slr': 'NP_003997.1:p.(T361=)'} + assert results['NM_004006.2:c.1083A>C']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' + assert results['NM_004006.2:c.1083A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_variant'] == 'LRG_199:g.699580A>C' + assert results['NM_004006.2:c.1083A>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.1083A>C' + assert results['NM_004006.2:c.1083A>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.699580A>C' + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'chrX', 'ref': u'T', 'pos': '32663147', 'alt': u'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'chrX', 'ref': u'T', 'pos': '32645030', 'alt': u'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'X', 'ref': u'T', 'pos': '32663147', 'alt': u'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'X', 'ref': u'T', 'pos': '32645030', 'alt': u'G'}} + assert results['NM_004006.2:c.1083A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + + + def test_variant198(self): + variant = 'LRG_199t1:c.[976-20T>A;976-17_976-1dup]' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'validation_warning_1' in results.keys() + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' + assert results['validation_warning_1']['alt_genomic_loci'] == [] + assert results['validation_warning_1']['transcript_description'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['submitted_variant'] == 'LRG_199t1:c.[976-20T>A;976-17_976-1dup]' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert results['validation_warning_1']['reference_sequence_records'] == '' + + assert results['flag'] == 'warning' + + def test_variant199(self): + variant = '1-5935162-A-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_015102.3:c.2818-2T>A' in results.keys() + assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_015102.3:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == 'NG_011724.2(NM_015102.3):c.2818-2A=' + assert results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'] == [] + assert results['NM_015102.3:c.2818-2T>A']['transcript_description'] == 'Homo sapiens nephronophthisis 4 (NPHP4), mRNA' + assert results['NM_015102.3:c.2818-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} + assert results['NM_015102.3:c.2818-2T>A']['submitted_variant'] == '1-5935162-A-T' + assert results['NM_015102.3:c.2818-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_015102.3):c.2818-2T>A' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_variant'] == '' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_transcript_variant'] == 'NM_015102.3:c.2818-2T>A' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_refseqgene_variant'] == 'NG_011724.2:g.122370A=' + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert 'hg38' not in results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys() + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert 'grch38' not in results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys() + assert results['NM_015102.3:c.2818-2T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011724.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3'} + + assert 'NM_001291593.1:c.1279-2T>A' in results.keys() + assert results['NM_001291593.1:c.1279-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291593.1:c.1279-2T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001291593.1:c.1279-2T>A']['alt_genomic_loci'] == [] + assert results['NM_001291593.1:c.1279-2T>A']['transcript_description'] == 'Homo sapiens nephrocystin 4 (NPHP4), transcript variant 2, mRNA' + assert results['NM_001291593.1:c.1279-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NM_001291593.1:c.1279-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278522.1:p.?', 'slr': 'NP_001278522.1:p.?'} + assert results['NM_001291593.1:c.1279-2T>A']['submitted_variant'] == '1-5935162-A-T' + assert results['NM_001291593.1:c.1279-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001291593.1):c.1279-2T>A' + assert results['NM_001291593.1:c.1279-2T>A']['hgvs_lrg_variant'] == '' + assert results['NM_001291593.1:c.1279-2T>A']['hgvs_transcript_variant'] == 'NM_001291593.1:c.1279-2T>A' + assert results['NM_001291593.1:c.1279-2T>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} + assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} + assert results['NM_001291593.1:c.1279-2T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291593.1'} + + assert 'NM_015102.4:c.2818-2T>A' in results.keys() + assert results['NM_015102.4:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_015102.4:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_015102.4:c.2818-2T>A']['alt_genomic_loci'] == [] + assert results['NM_015102.4:c.2818-2T>A']['transcript_description'] == 'Homo sapiens nephrocystin 4 (NPHP4), transcript variant 1, mRNA' + assert results['NM_015102.4:c.2818-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NM_015102.4:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} + assert results['NM_015102.4:c.2818-2T>A']['submitted_variant'] == '1-5935162-A-T' + assert results['NM_015102.4:c.2818-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_015102.4):c.2818-2T>A' + assert results['NM_015102.4:c.2818-2T>A']['hgvs_lrg_variant'] == '' + assert results['NM_015102.4:c.2818-2T>A']['hgvs_transcript_variant'] == 'NM_015102.4:c.2818-2T>A' + assert results['NM_015102.4:c.2818-2T>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} + assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} + assert results['NM_015102.4:c.2818-2T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.4'} + + assert 'NM_001291594.1:c.1282-2T>A' in results.keys() + assert results['NM_001291594.1:c.1282-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291594.1:c.1282-2T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001291594.1:c.1282-2T>A']['alt_genomic_loci'] == [] + assert results['NM_001291594.1:c.1282-2T>A']['transcript_description'] == 'Homo sapiens nephrocystin 4 (NPHP4), transcript variant 3, mRNA' + assert results['NM_001291594.1:c.1282-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NM_001291594.1:c.1282-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278523.1:p.?', 'slr': 'NP_001278523.1:p.?'} + assert results['NM_001291594.1:c.1282-2T>A']['submitted_variant'] == '1-5935162-A-T' + assert results['NM_001291594.1:c.1282-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001291594.1):c.1282-2T>A' + assert results['NM_001291594.1:c.1282-2T>A']['hgvs_lrg_variant'] == '' + assert results['NM_001291594.1:c.1282-2T>A']['hgvs_transcript_variant'] == 'NM_001291594.1:c.1282-2T>A' + assert results['NM_001291594.1:c.1282-2T>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} + assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} + assert results['NM_001291594.1:c.1282-2T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278523.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291594.1'} + + assert results['flag'] == 'gene_variant' + assert 'NR_111987.1:n.3633-2T>A' in results.keys() + assert results['NR_111987.1:n.3633-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_111987.1:n.3633-2T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_111987.1:n.3633-2T>A']['alt_genomic_loci'] == [] + assert results['NR_111987.1:n.3633-2T>A']['transcript_description'] == 'Homo sapiens nephrocystin 4 (NPHP4), transcript variant 4, non-coding RNA' + assert results['NR_111987.1:n.3633-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NR_111987.1:n.3633-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_111987.1:n.3633-2T>A']['submitted_variant'] == '1-5935162-A-T' + assert results['NR_111987.1:n.3633-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NR_111987.1):c.3633-2T>A' + assert results['NR_111987.1:n.3633-2T>A']['hgvs_lrg_variant'] == '' + assert results['NR_111987.1:n.3633-2T>A']['hgvs_transcript_variant'] == 'NR_111987.1:n.3633-2T>A' + assert results['NR_111987.1:n.3633-2T>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} + assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} + assert results['NR_111987.1:n.3633-2T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111987.1'} + + + def test_variant200(self): + variant = '1-12065948-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001127660.1:c.1676C>T' in results.keys() + assert results['NM_001127660.1:c.1676C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001127660.1:c.1676C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001127660.1:c.1676C>T']['alt_genomic_loci'] == [] + assert results['NM_001127660.1:c.1676C>T']['transcript_description'] == 'Homo sapiens mitofusin 2 (MFN2), transcript variant 2, mRNA' + assert results['NM_001127660.1:c.1676C>T']['gene_symbol'] == 'MFN2' + assert results['NM_001127660.1:c.1676C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001121132.1:p.(Pro559Leu)', 'slr': 'NP_001121132.1:p.(P559L)'} + assert results['NM_001127660.1:c.1676C>T']['submitted_variant'] == '1-12065948-C-T' + assert results['NM_001127660.1:c.1676C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001127660.1:c.1676C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001127660.1:c.1676C>T']['hgvs_transcript_variant'] == 'NM_001127660.1:c.1676C>T' + assert results['NM_001127660.1:c.1676C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '12065948', 'alt': 'T'}} + assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '12005891', 'alt': 'T'}} + assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '12065948', 'alt': 'T'}} + assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '12005891', 'alt': 'T'}} + assert results['NM_001127660.1:c.1676C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001121132.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001127660.1'} + + assert 'NM_014874.3:c.1676C>T' in results.keys() + assert results['NM_014874.3:c.1676C>T']['hgvs_lrg_transcript_variant'] == 'LRG_255t1:c.1676C>T' + assert results['NM_014874.3:c.1676C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014874.3:c.1676C>T']['alt_genomic_loci'] == [] + assert results['NM_014874.3:c.1676C>T']['transcript_description'] == 'Homo sapiens mitofusin 2 (MFN2), transcript variant 1, mRNA' + assert results['NM_014874.3:c.1676C>T']['gene_symbol'] == 'MFN2' + assert results['NM_014874.3:c.1676C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055689.1(LRG_255p1):p.(Pro559Leu)', 'slr': 'NP_055689.1:p.(P559L)'} + assert results['NM_014874.3:c.1676C>T']['submitted_variant'] == '1-12065948-C-T' + assert results['NM_014874.3:c.1676C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_014874.3:c.1676C>T']['hgvs_lrg_variant'] == 'LRG_255:g.30711C>T' + assert results['NM_014874.3:c.1676C>T']['hgvs_transcript_variant'] == 'NM_014874.3:c.1676C>T' + assert results['NM_014874.3:c.1676C>T']['hgvs_refseqgene_variant'] == 'NG_007945.1:g.30711C>T' + assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '12065948', 'alt': 'T'}} + assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '12005891', 'alt': 'T'}} + assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '12065948', 'alt': 'T'}} + assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '12005891', 'alt': 'T'}} + assert results['NM_014874.3:c.1676C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007945.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055689.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014874.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_255.xml'} + + + def test_variant201(self): + variant = '1-46655125-CTCAC-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001290129.1:c.1829+5_1829+8del' in results.keys() + assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001290129.1:c.1829+5_1829+8del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001290129.1:c.1829+5_1829+8del']['alt_genomic_loci'] == [] + assert results['NM_001290129.1:c.1829+5_1829+8del']['transcript_description'] == 'Homo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 3, mRNA' + assert results['NM_001290129.1:c.1829+5_1829+8del']['gene_symbol'] == 'POMGNT1' + assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001277058.1:p.?', 'slr': 'NP_001277058.1:p.?'} + assert results['NM_001290129.1:c.1829+5_1829+8del']['submitted_variant'] == '1-46655125-CTCAC-C' + assert results['NM_001290129.1:c.1829+5_1829+8del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001290129.1):c.1829+5_1829+8del' + assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_lrg_variant'] == '' + assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_transcript_variant'] == 'NM_001290129.1:c.1829+5_1829+8del' + assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277058.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290129.1'} + + assert 'NM_001290130.1:c.1466+5_1466+8del' in results.keys() + assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001290130.1:c.1466+5_1466+8del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001290130.1:c.1466+5_1466+8del']['alt_genomic_loci'] == [] + assert results['NM_001290130.1:c.1466+5_1466+8del']['transcript_description'] == 'Homo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 4, mRNA' + assert results['NM_001290130.1:c.1466+5_1466+8del']['gene_symbol'] == 'POMGNT1' + assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001277059.1:p.?', 'slr': 'NP_001277059.1:p.?'} + assert results['NM_001290130.1:c.1466+5_1466+8del']['submitted_variant'] == '1-46655125-CTCAC-C' + assert results['NM_001290130.1:c.1466+5_1466+8del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001290130.1):c.1466+5_1466+8del' + assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_lrg_variant'] == '' + assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_transcript_variant'] == 'NM_001290130.1:c.1466+5_1466+8del' + assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277059.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290130.1'} + + assert 'NM_017739.3:c.1895+5_1895+8del' in results.keys() + assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_lrg_transcript_variant'] == 'LRG_701t2:c.1895+5_1895+8del' + assert results['NM_017739.3:c.1895+5_1895+8del']['refseqgene_context_intronic_sequence'] == 'NG_009205.2(NM_017739.3):c.1895+5_1895+8del' + assert results['NM_017739.3:c.1895+5_1895+8del']['alt_genomic_loci'] == [] + assert results['NM_017739.3:c.1895+5_1895+8del']['transcript_description'] == 'Homo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 1, mRNA' + assert results['NM_017739.3:c.1895+5_1895+8del']['gene_symbol'] == 'POMGNT1' + assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_060209.3(LRG_701p2):p.?', 'slr': 'NP_060209.3:p.?'} + assert results['NM_017739.3:c.1895+5_1895+8del']['submitted_variant'] == '1-46655125-CTCAC-C' + assert results['NM_017739.3:c.1895+5_1895+8del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_017739.3):c.1895+5_1895+8del' + assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_lrg_variant'] == 'LRG_701:g.35853_35856del' + assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_transcript_variant'] == 'NM_017739.3:c.1895+5_1895+8del' + assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_refseqgene_variant'] == 'NG_009205.2:g.35853_35856del' + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009205.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_060209.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_017739.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_701.xml'} + + assert 'NM_001243766.1:c.1869+31_1869+34del' in results.keys() + assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_lrg_transcript_variant'] == 'LRG_701t1:c.1869+31_1869+34del' + assert results['NM_001243766.1:c.1869+31_1869+34del']['refseqgene_context_intronic_sequence'] == 'NG_009205.2(NM_001243766.1):c.1869+31_1869+34del' + assert results['NM_001243766.1:c.1869+31_1869+34del']['alt_genomic_loci'] == [] + assert results['NM_001243766.1:c.1869+31_1869+34del']['transcript_description'] == 'Homo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 2, mRNA' + assert results['NM_001243766.1:c.1869+31_1869+34del']['gene_symbol'] == 'POMGNT1' + assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230695.1:p.?', 'slr': 'NP_001230695.1:p.?'} + assert results['NM_001243766.1:c.1869+31_1869+34del']['submitted_variant'] == '1-46655125-CTCAC-C' + assert results['NM_001243766.1:c.1869+31_1869+34del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001243766.1):c.1869+31_1869+34del' + assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_lrg_variant'] == 'LRG_701:g.35853_35856del' + assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_transcript_variant'] == 'NM_001243766.1:c.1869+31_1869+34del' + assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_refseqgene_variant'] == 'NG_009205.2:g.35853_35856del' + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009205.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230695.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243766.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_701.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant202(self): + variant = '1-68912523-TGAGCCAGAG-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000329.2:c.106_114del' in results.keys() + assert results['NM_000329.2:c.106_114del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000329.2:c.106_114del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000329.2:c.106_114del']['alt_genomic_loci'] == [] + assert results['NM_000329.2:c.106_114del']['transcript_description'] == 'Homo sapiens RPE65, retinoid isomerohydrolase (RPE65), mRNA' + assert results['NM_000329.2:c.106_114del']['gene_symbol'] == 'RPE65' + assert results['NM_000329.2:c.106_114del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000320.1:p.(Leu36_Leu38del)', 'slr': 'NP_000320.1:p.(L36_L38del)'} + assert results['NM_000329.2:c.106_114del']['submitted_variant'] == '1-68912523-TGAGCCAGAG-T' + assert results['NM_000329.2:c.106_114del']['genome_context_intronic_sequence'] == '' + assert results['NM_000329.2:c.106_114del']['hgvs_lrg_variant'] == '' + assert results['NM_000329.2:c.106_114del']['hgvs_transcript_variant'] == 'NM_000329.2:c.106_114del' + assert results['NM_000329.2:c.106_114del']['hgvs_refseqgene_variant'] == 'NG_008472.1:g.8111_8119del' + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912524_68912532del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCAGAG', 'pos': '68912523', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446841_68446849del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCAGAG', 'pos': '68446840', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912524_68912532del', 'vcf': {'chr': '1', 'ref': 'TGAGCCAGAG', 'pos': '68912523', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446841_68446849del', 'vcf': {'chr': '1', 'ref': 'TGAGCCAGAG', 'pos': '68446840', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008472.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2'} + + assert results['flag'] == 'gene_variant' + + def test_variant203(self): + variant = '1-68912526-GCCAGAG-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000329.2:c.109_114del' in results.keys() + assert results['NM_000329.2:c.109_114del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000329.2:c.109_114del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000329.2:c.109_114del']['alt_genomic_loci'] == [] + assert results['NM_000329.2:c.109_114del']['transcript_description'] == 'Homo sapiens RPE65, retinoid isomerohydrolase (RPE65), mRNA' + assert results['NM_000329.2:c.109_114del']['gene_symbol'] == 'RPE65' + assert results['NM_000329.2:c.109_114del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000320.1:p.(Trp37_Leu38del)', 'slr': 'NP_000320.1:p.(W37_L38del)'} + assert results['NM_000329.2:c.109_114del']['submitted_variant'] == '1-68912526-GCCAGAG-G' + assert results['NM_000329.2:c.109_114del']['genome_context_intronic_sequence'] == '' + assert results['NM_000329.2:c.109_114del']['hgvs_lrg_variant'] == '' + assert results['NM_000329.2:c.109_114del']['hgvs_transcript_variant'] == 'NM_000329.2:c.109_114del' + assert results['NM_000329.2:c.109_114del']['hgvs_refseqgene_variant'] == 'NG_008472.1:g.8114_8119del' + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912524_68912529del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCA', 'pos': '68912523', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446841_68446846del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCA', 'pos': '68446840', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912524_68912529del', 'vcf': {'chr': '1', 'ref': 'TGAGCCA', 'pos': '68912523', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446841_68446846del', 'vcf': {'chr': '1', 'ref': 'TGAGCCA', 'pos': '68446840', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008472.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2'} + + assert results['flag'] == 'gene_variant' + + def test_variant204(self): + variant = '1-109817590-G-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001408.2:c.*919G>T' in results.keys() + assert results['NM_001408.2:c.*919G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001408.2:c.*919G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001408.2:c.*919G>T']['alt_genomic_loci'] == [] + assert results['NM_001408.2:c.*919G>T']['transcript_description'] == 'Homo sapiens cadherin EGF LAG seven-pass G-type receptor 2 (CELSR2), mRNA' + assert results['NM_001408.2:c.*919G>T']['gene_symbol'] == 'CELSR2' + assert results['NM_001408.2:c.*919G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001399.1:p.?', 'slr': 'NP_001399.1:p.?'} + assert results['NM_001408.2:c.*919G>T']['submitted_variant'] == '1-109817590-G-T' + assert results['NM_001408.2:c.*919G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001408.2:c.*919G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001408.2:c.*919G>T']['hgvs_transcript_variant'] == 'NM_001408.2:c.*919G>T' + assert results['NM_001408.2:c.*919G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.109817590G>T', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '109817590', 'alt': 'T'}} + assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.109274968G>T', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '109274968', 'alt': 'T'}} + assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.109817590G>T', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '109817590', 'alt': 'T'}} + assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.109274968G>T', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '109274968', 'alt': 'T'}} + assert results['NM_001408.2:c.*919G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001408.2'} + + + def test_variant205(self): + variant = '1-145597475-GAAGT-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_006468.6:c.1070+35_1070+38del' in results.keys() + assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006468.6:c.1070+35_1070+38del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006468.6:c.1070+35_1070+38del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}] + assert results['NM_006468.6:c.1070+35_1070+38del']['transcript_description'] == 'Homo sapiens polymerase (RNA) III (DNA directed) polypeptide C (62kD) (POLR3C), mRNA' + assert results['NM_006468.6:c.1070+35_1070+38del']['gene_symbol'] == 'POLR3C' + assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006459.3:p.?', 'slr': 'NP_006459.3:p.?'} + assert results['NM_006468.6:c.1070+35_1070+38del']['submitted_variant'] == '1-145597475-GAAGT-G' + assert results['NM_006468.6:c.1070+35_1070+38del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_006468.6):c.1070+35_1070+38del' + assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_lrg_variant'] == '' + assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_transcript_variant'] == 'NM_006468.6:c.1070+35_1070+38del' + assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_refseqgene_variant'] == '' + assert results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': 'chr1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} + assert 'hg38' not in results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci'].keys() + assert results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': '1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} + assert 'grch38' not in results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci'].keys() + assert results['NM_006468.6:c.1070+35_1070+38del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006459.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006468.6'} + + assert 'NM_001303456.1:c.1109+35_1109+38del' in results.keys() + assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001303456.1:c.1109+35_1109+38del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001303456.1:c.1109+35_1109+38del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}] + assert results['NM_001303456.1:c.1109+35_1109+38del']['transcript_description'] == 'Homo sapiens RNA polymerase III subunit C (POLR3C), transcript variant 2, mRNA' + assert results['NM_001303456.1:c.1109+35_1109+38del']['gene_symbol'] == 'POLR3C' + assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001290385.1:p.?', 'slr': 'NP_001290385.1:p.?'} + assert results['NM_001303456.1:c.1109+35_1109+38del']['submitted_variant'] == '1-145597475-GAAGT-G' + assert results['NM_001303456.1:c.1109+35_1109+38del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001303456.1):c.1109+35_1109+38del' + assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_lrg_variant'] == '' + assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_transcript_variant'] == 'NM_001303456.1:c.1109+35_1109+38del' + assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': 'chr1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} + assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': 'chr1', 'ref': 'ATACT', 'pos': '145837629', 'alt': 'A'}} + assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': '1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} + assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': '1', 'ref': 'ATACT', 'pos': '145837629', 'alt': 'A'}} + assert results['NM_001303456.1:c.1109+35_1109+38del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001290385.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001303456.1'} + + assert 'NM_006468.7:c.1070+35_1070+38del' in results.keys() + assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006468.7:c.1070+35_1070+38del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006468.7:c.1070+35_1070+38del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}] + assert results['NM_006468.7:c.1070+35_1070+38del']['transcript_description'] == 'Homo sapiens RNA polymerase III subunit C (POLR3C), transcript variant 1, mRNA' + assert results['NM_006468.7:c.1070+35_1070+38del']['gene_symbol'] == 'POLR3C' + assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006459.3:p.?', 'slr': 'NP_006459.3:p.?'} + assert results['NM_006468.7:c.1070+35_1070+38del']['submitted_variant'] == '1-145597475-GAAGT-G' + assert results['NM_006468.7:c.1070+35_1070+38del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_006468.7):c.1070+35_1070+38del' + assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_lrg_variant'] == '' + assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_transcript_variant'] == 'NM_006468.7:c.1070+35_1070+38del' + assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_refseqgene_variant'] == '' + assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': 'chr1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} + assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': 'chr1', 'ref': 'ATACT', 'pos': '145837629', 'alt': 'A'}} + assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': '1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} + assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': '1', 'ref': 'ATACT', 'pos': '145837629', 'alt': 'A'}} + assert results['NM_006468.7:c.1070+35_1070+38del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006459.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006468.7'} + + + def test_variant206(self): + variant = '1-153791300-CTG-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_020699.2:c.562_563del' in results.keys() + assert results['NM_020699.2:c.562_563del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020699.2:c.562_563del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020699.2:c.562_563del']['alt_genomic_loci'] == [] + assert results['NM_020699.2:c.562_563del']['transcript_description'] == 'Homo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA' + assert results['NM_020699.2:c.562_563del']['gene_symbol'] == 'GATAD2B' + assert results['NM_020699.2:c.562_563del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Gln188GlufsTer36)', 'slr': 'NP_065750.1:p.(Q188Efs*36)'} + assert results['NM_020699.2:c.562_563del']['submitted_variant'] == '1-153791300-CTG-C' + assert results['NM_020699.2:c.562_563del']['genome_context_intronic_sequence'] == '' + assert results['NM_020699.2:c.562_563del']['hgvs_lrg_variant'] == '' + assert results['NM_020699.2:c.562_563del']['hgvs_transcript_variant'] == 'NM_020699.2:c.562_563del' + assert results['NM_020699.2:c.562_563del']['hgvs_refseqgene_variant'] == '' + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791301_153791302del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818825_153818826del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791301_153791302del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818825_153818826del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2'} + + assert 'NM_020699.3:c.562_563del' in results.keys() + assert results['NM_020699.3:c.562_563del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020699.3:c.562_563del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020699.3:c.562_563del']['alt_genomic_loci'] == [] + assert results['NM_020699.3:c.562_563del']['transcript_description'] == 'Homo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA' + assert results['NM_020699.3:c.562_563del']['gene_symbol'] == 'GATAD2B' + assert results['NM_020699.3:c.562_563del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Gln188GlufsTer36)', 'slr': 'NP_065750.1:p.(Q188Efs*36)'} + assert results['NM_020699.3:c.562_563del']['submitted_variant'] == '1-153791300-CTG-C' + assert results['NM_020699.3:c.562_563del']['genome_context_intronic_sequence'] == '' + assert results['NM_020699.3:c.562_563del']['hgvs_lrg_variant'] == '' + assert results['NM_020699.3:c.562_563del']['hgvs_transcript_variant'] == 'NM_020699.3:c.562_563del' + assert results['NM_020699.3:c.562_563del']['hgvs_refseqgene_variant'] == '' + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791301_153791302del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818825_153818826del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791301_153791302del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818825_153818826del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.3'} + + + def test_variant207(self): + variant = '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_005572.3:c.711_734delinsCCCC' in results.keys() + assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == 'LRG_254t1:c.711_734delinsCCCC' + assert results['NM_005572.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005572.3:c.711_734delinsCCCC']['alt_genomic_loci'] == [] + assert results['NM_005572.3:c.711_734delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 2, mRNA' + assert results['NM_005572.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005563.1(LRG_254p1):p.(Glu238ProfsTer9)', 'slr': 'NP_005563.1:p.(E238Pfs*9)'} + assert results['NM_005572.3:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + assert results['NM_005572.3:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_lrg_variant'] == 'LRG_254:g.57304_57327delinsCCCC' + assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_transcript_variant'] == 'NM_005572.3:c.711_734delinsCCCC' + assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_refseqgene_variant'] == 'NG_008692.2:g.57304_57327delinsCCCC' + assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_005572.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008692.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005563.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005572.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_254.xml'} + + assert 'NM_001257374.1:c.375_398delinsCCCC' in results.keys() + assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257374.1:c.375_398delinsCCCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001257374.1:c.375_398delinsCCCC']['alt_genomic_loci'] == [] + assert results['NM_001257374.1:c.375_398delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 4, mRNA' + assert results['NM_001257374.1:c.375_398delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Glu126ProfsTer9)', 'slr': 'NP_001244303.1:p.(E126Pfs*9)'} + assert results['NM_001257374.1:c.375_398delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + assert results['NM_001257374.1:c.375_398delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_lrg_variant'] == '' + assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_transcript_variant'] == 'NM_001257374.1:c.375_398delinsCCCC' + assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_refseqgene_variant'] == '' + assert results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert 'hg38' not in results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci'].keys() + assert results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert 'grch38' not in results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci'].keys() + assert results['NM_001257374.1:c.375_398delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1'} + + assert 'NM_001257374.2:c.375_398delinsCCCC' in results.keys() + assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257374.2:c.375_398delinsCCCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001257374.2:c.375_398delinsCCCC']['alt_genomic_loci'] == [] + assert results['NM_001257374.2:c.375_398delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 4, mRNA' + assert results['NM_001257374.2:c.375_398delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Glu126ProfsTer9)', 'slr': 'NP_001244303.1:p.(E126Pfs*9)'} + assert results['NM_001257374.2:c.375_398delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + assert results['NM_001257374.2:c.375_398delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_lrg_variant'] == '' + assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_transcript_variant'] == 'NM_001257374.2:c.375_398delinsCCCC' + assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_refseqgene_variant'] == '' + assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_001257374.2:c.375_398delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2'} + + assert 'NM_001282624.1:c.468_491delinsCCCC' in results.keys() + assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001282624.1:c.468_491delinsCCCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282624.1:c.468_491delinsCCCC']['alt_genomic_loci'] == [] + assert results['NM_001282624.1:c.468_491delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 5, mRNA' + assert results['NM_001282624.1:c.468_491delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269553.1:p.(Glu157ProfsTer9)', 'slr': 'NP_001269553.1:p.(E157Pfs*9)'} + assert results['NM_001282624.1:c.468_491delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + assert results['NM_001282624.1:c.468_491delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_lrg_variant'] == '' + assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_transcript_variant'] == 'NM_001282624.1:c.468_491delinsCCCC' + assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_001282624.1:c.468_491delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269553.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282624.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_170708.3:c.711_734delinsCCCC' in results.keys() + assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_170708.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_170708.3:c.711_734delinsCCCC']['alt_genomic_loci'] == [] + assert results['NM_170708.3:c.711_734delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 3, mRNA' + assert results['NM_170708.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733822.1(LRG_254p3):p.(Glu238ProfsTer9)', 'slr': 'NP_733822.1:p.(E238Pfs*9)'} + assert results['NM_170708.3:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + assert results['NM_170708.3:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' + assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_transcript_variant'] == 'NM_170708.3:c.711_734delinsCCCC' + assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_refseqgene_variant'] == '' + assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_170708.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3'} + + assert 'NM_170707.3:c.711_734delinsCCCC' in results.keys() + assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_170707.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_170707.3:c.711_734delinsCCCC']['alt_genomic_loci'] == [] + assert results['NM_170707.3:c.711_734delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 1, mRNA' + assert results['NM_170707.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733821.1(LRG_254p2):p.(Glu238ProfsTer9)', 'slr': 'NP_733821.1:p.(E238Pfs*9)'} + assert results['NM_170707.3:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + assert results['NM_170707.3:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' + assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_transcript_variant'] == 'NM_170707.3:c.711_734delinsCCCC' + assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_refseqgene_variant'] == '' + assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_170707.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3'} + + assert 'NM_001282626.1:c.711_734delinsCCCC' in results.keys() + assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001282626.1:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282626.1:c.711_734delinsCCCC']['alt_genomic_loci'] == [] + assert results['NM_001282626.1:c.711_734delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 7, mRNA' + assert results['NM_001282626.1:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269555.1:p.(Glu238ProfsTer9)', 'slr': 'NP_001269555.1:p.(E238Pfs*9)'} + assert results['NM_001282626.1:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + assert results['NM_001282626.1:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' + assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_transcript_variant'] == 'NM_001282626.1:c.711_734delinsCCCC' + assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_001282626.1:c.711_734delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1'} + + assert 'NM_001282625.1:c.711_734delinsCCCC' in results.keys() + assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001282625.1:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282625.1:c.711_734delinsCCCC']['alt_genomic_loci'] == [] + assert results['NM_001282625.1:c.711_734delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 6, mRNA' + assert results['NM_001282625.1:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269554.1:p.(Glu238ProfsTer9)', 'slr': 'NP_001269554.1:p.(E238Pfs*9)'} + assert results['NM_001282625.1:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + assert results['NM_001282625.1:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' + assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_transcript_variant'] == 'NM_001282625.1:c.711_734delinsCCCC' + assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} + assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} + assert results['NM_001282625.1:c.711_734delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269554.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282625.1'} + + + def test_variant208(self): + variant = '1-156108541-G-GG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_170707.3:c.1961dup' in results.keys() + assert results['NM_170707.3:c.1961dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_170707.3:c.1961dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_170707.3:c.1961dup']['alt_genomic_loci'] == [] + assert results['NM_170707.3:c.1961dup']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 1, mRNA' + assert results['NM_170707.3:c.1961dup']['gene_symbol'] == 'LMNA' + assert results['NM_170707.3:c.1961dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733821.1(LRG_254p2):p.(Thr655AsnfsTer49)', 'slr': 'NP_733821.1:p.(T655Nfs*49)'} + assert results['NM_170707.3:c.1961dup']['submitted_variant'] == '1-156108541-G-GG' + assert results['NM_170707.3:c.1961dup']['genome_context_intronic_sequence'] == '' + assert results['NM_170707.3:c.1961dup']['hgvs_lrg_variant'] == '' + assert results['NM_170707.3:c.1961dup']['hgvs_transcript_variant'] == 'NM_170707.3:c.1961dup' + assert results['NM_170707.3:c.1961dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} + assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} + assert results['NM_170707.3:c.1961dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3'} + + assert 'NM_001282626.1:c.1818+143dup' in results.keys() + assert results['NM_001282626.1:c.1818+143dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001282626.1:c.1818+143dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282626.1:c.1818+143dup']['alt_genomic_loci'] == [] + assert results['NM_001282626.1:c.1818+143dup']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 7, mRNA' + assert results['NM_001282626.1:c.1818+143dup']['gene_symbol'] == 'LMNA' + assert results['NM_001282626.1:c.1818+143dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269555.1:p.?', 'slr': 'NP_001269555.1:p.?'} + assert results['NM_001282626.1:c.1818+143dup']['submitted_variant'] == '1-156108541-G-GG' + assert results['NM_001282626.1:c.1818+143dup']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001282626.1):c.1818+143dup' + assert results['NM_001282626.1:c.1818+143dup']['hgvs_lrg_variant'] == '' + assert results['NM_001282626.1:c.1818+143dup']['hgvs_transcript_variant'] == 'NM_001282626.1:c.1818+143dup' + assert results['NM_001282626.1:c.1818+143dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} + assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} + assert results['NM_001282626.1:c.1818+143dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001257374.2:c.1625dup' in results.keys() + assert results['NM_001257374.2:c.1625dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257374.2:c.1625dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001257374.2:c.1625dup']['alt_genomic_loci'] == [] + assert results['NM_001257374.2:c.1625dup']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 4, mRNA' + assert results['NM_001257374.2:c.1625dup']['gene_symbol'] == 'LMNA' + assert results['NM_001257374.2:c.1625dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Thr543AsnfsTer90)', 'slr': 'NP_001244303.1:p.(T543Nfs*90)'} + assert results['NM_001257374.2:c.1625dup']['submitted_variant'] == '1-156108541-G-GG' + assert results['NM_001257374.2:c.1625dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001257374.2:c.1625dup']['hgvs_lrg_variant'] == '' + assert results['NM_001257374.2:c.1625dup']['hgvs_transcript_variant'] == 'NM_001257374.2:c.1625dup' + assert results['NM_001257374.2:c.1625dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} + assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} + assert results['NM_001257374.2:c.1625dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2'} + + assert 'NM_170708.3:c.1871dup' in results.keys() + assert results['NM_170708.3:c.1871dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_170708.3:c.1871dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_170708.3:c.1871dup']['alt_genomic_loci'] == [] + assert results['NM_170708.3:c.1871dup']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 3, mRNA' + assert results['NM_170708.3:c.1871dup']['gene_symbol'] == 'LMNA' + assert results['NM_170708.3:c.1871dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733822.1(LRG_254p3):p.(Thr625AsnfsTer49)', 'slr': 'NP_733822.1:p.(T625Nfs*49)'} + assert results['NM_170708.3:c.1871dup']['submitted_variant'] == '1-156108541-G-GG' + assert results['NM_170708.3:c.1871dup']['genome_context_intronic_sequence'] == '' + assert results['NM_170708.3:c.1871dup']['hgvs_lrg_variant'] == '' + assert results['NM_170708.3:c.1871dup']['hgvs_transcript_variant'] == 'NM_170708.3:c.1871dup' + assert results['NM_170708.3:c.1871dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} + assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} + assert results['NM_170708.3:c.1871dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3'} + + assert 'NM_001257374.1:c.1625dup' in results.keys() + assert results['NM_001257374.1:c.1625dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257374.1:c.1625dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001257374.1:c.1625dup']['alt_genomic_loci'] == [] + assert results['NM_001257374.1:c.1625dup']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 4, mRNA' + assert results['NM_001257374.1:c.1625dup']['gene_symbol'] == 'LMNA' + assert results['NM_001257374.1:c.1625dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Thr543AsnfsTer90)', 'slr': 'NP_001244303.1:p.(T543Nfs*90)'} + assert results['NM_001257374.1:c.1625dup']['submitted_variant'] == '1-156108541-G-GG' + assert results['NM_001257374.1:c.1625dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001257374.1:c.1625dup']['hgvs_lrg_variant'] == '' + assert results['NM_001257374.1:c.1625dup']['hgvs_transcript_variant'] == 'NM_001257374.1:c.1625dup' + assert results['NM_001257374.1:c.1625dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001257374.1:c.1625dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert 'hg38' not in results['NM_001257374.1:c.1625dup']['primary_assembly_loci'].keys() + assert results['NM_001257374.1:c.1625dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert 'grch38' not in results['NM_001257374.1:c.1625dup']['primary_assembly_loci'].keys() + assert results['NM_001257374.1:c.1625dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1'} + + + def test_variant209(self): + variant = '1-161279695-T-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001315491.1:c.1A>T' in results.keys() + assert results['NM_001315491.1:c.1A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001315491.1:c.1A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001315491.1:c.1A>T']['alt_genomic_loci'] == [] + assert results['NM_001315491.1:c.1A>T']['transcript_description'] == 'Homo sapiens myelin protein zero (MPZ), transcript variant 1, mRNA' + assert results['NM_001315491.1:c.1A>T']['gene_symbol'] == 'MPZ' + assert results['NM_001315491.1:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001302420.1:p.(Met1?)', 'slr': 'NP_001302420.1:p.(M1?)'} + assert results['NM_001315491.1:c.1A>T']['submitted_variant'] == '1-161279695-T-A' + assert results['NM_001315491.1:c.1A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001315491.1:c.1A>T']['hgvs_lrg_variant'] == '' + assert results['NM_001315491.1:c.1A>T']['hgvs_transcript_variant'] == 'NM_001315491.1:c.1A>T' + assert results['NM_001315491.1:c.1A>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': 'chr1', 'ref': u'T', 'pos': '161309905', 'alt': u'A'}} + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': '1', 'ref': u'T', 'pos': '161309905', 'alt': u'A'}} + assert results['NM_001315491.1:c.1A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001302420.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001315491.1'} + + assert 'NM_000530.7:c.1A>T' in results.keys() + assert results['NM_000530.7:c.1A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000530.7:c.1A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000530.7:c.1A>T']['alt_genomic_loci'] == [] + assert results['NM_000530.7:c.1A>T']['transcript_description'] == 'Homo sapiens myelin protein zero (MPZ), transcript variant 1, mRNA' + assert results['NM_000530.7:c.1A>T']['gene_symbol'] == 'MPZ' + assert results['NM_000530.7:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000521.2(LRG_256p1):p.(Met1?)', 'slr': 'NP_000521.2:p.(M1?)'} + assert results['NM_000530.7:c.1A>T']['submitted_variant'] == '1-161279695-T-A' + assert results['NM_000530.7:c.1A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000530.7:c.1A>T']['hgvs_lrg_variant'] == '' + assert results['NM_000530.7:c.1A>T']['hgvs_transcript_variant'] == 'NM_000530.7:c.1A>T' + assert results['NM_000530.7:c.1A>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': 'chr1', 'ref': u'T', 'pos': '161309905', 'alt': u'A'}} + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': '1', 'ref': u'T', 'pos': '161309905', 'alt': u'A'}} + assert results['NM_000530.7:c.1A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.7'} + + assert 'NM_000530.6:c.1A>T' in results.keys() + assert results['NM_000530.6:c.1A>T']['hgvs_lrg_transcript_variant'] == 'LRG_256t1:c.1A>T' + assert results['NM_000530.6:c.1A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000530.6:c.1A>T']['alt_genomic_loci'] == [] + assert results['NM_000530.6:c.1A>T']['transcript_description'] == 'Homo sapiens myelin protein zero (MPZ), mRNA' + assert results['NM_000530.6:c.1A>T']['gene_symbol'] == 'MPZ' + assert results['NM_000530.6:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000521.2(LRG_256p1):p.(Met1?)', 'slr': 'NP_000521.2:p.(M1?)'} + assert results['NM_000530.6:c.1A>T']['submitted_variant'] == '1-161279695-T-A' + assert results['NM_000530.6:c.1A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000530.6:c.1A>T']['hgvs_lrg_variant'] == 'LRG_256:g.5068A>T' + assert results['NM_000530.6:c.1A>T']['hgvs_transcript_variant'] == 'NM_000530.6:c.1A>T' + assert results['NM_000530.6:c.1A>T']['hgvs_refseqgene_variant'] == 'NG_008055.1:g.5068A>T' + assert results['NM_000530.6:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} + assert 'hg38' not in results['NM_000530.6:c.1A>T']['primary_assembly_loci'].keys() + assert results['NM_000530.6:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} + assert 'grch38' not in results['NM_000530.6:c.1A>T']['primary_assembly_loci'].keys() + assert results['NM_000530.6:c.1A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008055.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.6', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_256.xml'} + + + def test_variant210(self): + variant = '1-169519049-T-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000130.4:c.1601G>A' in results.keys() + assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601G>A' + assert results['NM_000130.4:c.1601G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000130.4:c.1601G>A']['alt_genomic_loci'] == [] + assert results['NM_000130.4:c.1601G>A']['transcript_description'] == 'Homo sapiens coagulation factor V (F5), mRNA' + assert results['NM_000130.4:c.1601G>A']['gene_symbol'] == 'F5' + assert results['NM_000130.4:c.1601G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534Gln)', 'slr': 'NP_000121.2:p.(R534Q)'} + assert results['NM_000130.4:c.1601G>A']['submitted_variant'] == '1-169519049-T-T' + assert results['NM_000130.4:c.1601G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_variant'] == 'LRG_553:g.41721G>A' + assert results['NM_000130.4:c.1601G>A']['hgvs_transcript_variant'] == 'NM_000130.4:c.1601G>A' + assert results['NM_000130.4:c.1601G>A']['hgvs_refseqgene_variant'] == 'NG_011806.1:g.41721G>A' + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '169549811', 'alt': u'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '169549811', 'alt': u'T'}} + assert results['NM_000130.4:c.1601G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} + + + def test_variant211(self): + variant = '1-226125468-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_003240.4:c.774C>T' in results.keys() + assert results['NM_003240.4:c.774C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003240.4:c.774C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003240.4:c.774C>T']['alt_genomic_loci'] == [] + assert results['NM_003240.4:c.774C>T']['transcript_description'] == 'Homo sapiens left-right determination factor 2 (LEFTY2), transcript variant 1, mRNA' + assert results['NM_003240.4:c.774C>T']['gene_symbol'] == 'LEFTY2' + assert results['NM_003240.4:c.774C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003231.2:p.(Thr258=)', 'slr': 'NP_003231.2:p.(T258=)'} + assert results['NM_003240.4:c.774C>T']['submitted_variant'] == '1-226125468-G-A' + assert results['NM_003240.4:c.774C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_003240.4:c.774C>T']['hgvs_lrg_variant'] == '' + assert results['NM_003240.4:c.774C>T']['hgvs_transcript_variant'] == 'NM_003240.4:c.774C>T' + assert results['NM_003240.4:c.774C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_003240.4:c.774C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.4'} + + assert 'NM_003240.3:c.774C>T' in results.keys() + assert results['NM_003240.3:c.774C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003240.3:c.774C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003240.3:c.774C>T']['alt_genomic_loci'] == [] + assert results['NM_003240.3:c.774C>T']['transcript_description'] == 'Homo sapiens left-right determination factor 2 (LEFTY2), transcript variant 1, mRNA' + assert results['NM_003240.3:c.774C>T']['gene_symbol'] == 'LEFTY2' + assert results['NM_003240.3:c.774C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003231.2:p.(Thr258=)', 'slr': 'NP_003231.2:p.(T258=)'} + assert results['NM_003240.3:c.774C>T']['submitted_variant'] == '1-226125468-G-A' + assert results['NM_003240.3:c.774C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_003240.3:c.774C>T']['hgvs_lrg_variant'] == '' + assert results['NM_003240.3:c.774C>T']['hgvs_transcript_variant'] == 'NM_003240.3:c.774C>T' + assert results['NM_003240.3:c.774C>T']['hgvs_refseqgene_variant'] == 'NG_008118.1:g.8453C>T' + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_003240.3:c.774C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008118.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.3'} + + assert 'NM_001172425.1:c.672C>T' in results.keys() + assert results['NM_001172425.1:c.672C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001172425.1:c.672C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001172425.1:c.672C>T']['alt_genomic_loci'] == [] + assert results['NM_001172425.1:c.672C>T']['transcript_description'] == 'Homo sapiens left-right determination factor 2 (LEFTY2), transcript variant 2, mRNA' + assert results['NM_001172425.1:c.672C>T']['gene_symbol'] == 'LEFTY2' + assert results['NM_001172425.1:c.672C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001165896.1:p.(Thr224=)', 'slr': 'NP_001165896.1:p.(T224=)'} + assert results['NM_001172425.1:c.672C>T']['submitted_variant'] == '1-226125468-G-A' + assert results['NM_001172425.1:c.672C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001172425.1:c.672C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001172425.1:c.672C>T']['hgvs_transcript_variant'] == 'NM_001172425.1:c.672C>T' + assert results['NM_001172425.1:c.672C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_001172425.1:c.672C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.1'} + + assert 'NM_001172425.2:c.672C>T' in results.keys() + assert results['NM_001172425.2:c.672C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001172425.2:c.672C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001172425.2:c.672C>T']['alt_genomic_loci'] == [] + assert results['NM_001172425.2:c.672C>T']['transcript_description'] == 'Homo sapiens left-right determination factor 2 (LEFTY2), transcript variant 2, mRNA' + assert results['NM_001172425.2:c.672C>T']['gene_symbol'] == 'LEFTY2' + assert results['NM_001172425.2:c.672C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001165896.1:p.(Thr224=)', 'slr': 'NP_001165896.1:p.(T224=)'} + assert results['NM_001172425.2:c.672C>T']['submitted_variant'] == '1-226125468-G-A' + assert results['NM_001172425.2:c.672C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001172425.2:c.672C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001172425.2:c.672C>T']['hgvs_transcript_variant'] == 'NM_001172425.2:c.672C>T' + assert results['NM_001172425.2:c.672C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_001172425.2:c.672C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.2'} + + assert results['flag'] == 'gene_variant' + + def test_variant212(self): + variant = '10-89623035-CGCA-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001126049.1:c.-794_-792del' in results.keys() + assert results['NM_001126049.1:c.-794_-792del']['hgvs_lrg_transcript_variant'] == 'LRG_1087t1:c.-794_-792del' + assert results['NM_001126049.1:c.-794_-792del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126049.1:c.-794_-792del']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'HG2334_PATCH', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'NW_013171807.1', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}] + assert results['NM_001126049.1:c.-794_-792del']['transcript_description'] == 'Homo sapiens killin, p53 regulated DNA replication inhibitor (KLLN), mRNA' + assert results['NM_001126049.1:c.-794_-792del']['gene_symbol'] == 'KLLN' + assert results['NM_001126049.1:c.-794_-792del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119521.1:p.?', 'slr': 'NP_001119521.1:p.?'} + assert results['NM_001126049.1:c.-794_-792del']['submitted_variant'] == '10-89623035-CGCA-C' + assert results['NM_001126049.1:c.-794_-792del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126049.1:c.-794_-792del']['hgvs_lrg_variant'] == 'LRG_1087:g.5157_5159del' + assert results['NM_001126049.1:c.-794_-792del']['hgvs_transcript_variant'] == 'NM_001126049.1:c.-794_-792del' + assert results['NM_001126049.1:c.-794_-792del']['hgvs_refseqgene_variant'] == 'NG_033079.1:g.5157_5159del' + assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000010.10:g.89623039_89623041del', 'vcf': {'chr': 'chr10', 'ref': 'CGCA', 'pos': '89623035', 'alt': 'C'}} + assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000010.11:g.87863282_87863284del', 'vcf': {'chr': 'chr10', 'ref': 'CGCA', 'pos': '87863278', 'alt': 'C'}} + assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000010.10:g.89623039_89623041del', 'vcf': {'chr': '10', 'ref': 'CGCA', 'pos': '89623035', 'alt': 'C'}} + assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000010.11:g.87863282_87863284del', 'vcf': {'chr': '10', 'ref': 'CGCA', 'pos': '87863278', 'alt': 'C'}} + assert results['NM_001126049.1:c.-794_-792del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033079.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119521.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126049.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1087.xml'} + + + def test_variant213(self): + variant = '11-62457852-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NR_037946.1:n.3896G>T' in results.keys() + assert results['NR_037946.1:n.3896G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037946.1:n.3896G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_037946.1:n.3896G>T']['alt_genomic_loci'] == [] + assert results['NR_037946.1:n.3896G>T']['transcript_description'] == 'Homo sapiens HNRNPUL2-BSCL2 readthrough (NMD candidate) (HNRNPUL2-BSCL2), long non-coding RNA' + assert results['NR_037946.1:n.3896G>T']['gene_symbol'] == 'HNRNPUL2-BSCL2' + assert results['NR_037946.1:n.3896G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037946.1:n.3896G>T']['submitted_variant'] == '11-62457852-C-A' + assert results['NR_037946.1:n.3896G>T']['genome_context_intronic_sequence'] == '' + assert results['NR_037946.1:n.3896G>T']['hgvs_lrg_variant'] == '' + assert results['NR_037946.1:n.3896G>T']['hgvs_transcript_variant'] == 'NR_037946.1:n.3896G>T' + assert results['NR_037946.1:n.3896G>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NR_037946.1:n.3896G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037946.1'} + + assert 'NM_032667.6:c.1184G>T' in results.keys() + assert results['NM_032667.6:c.1184G>T']['hgvs_lrg_transcript_variant'] == 'LRG_235t2:c.1184G>T' + assert results['NM_032667.6:c.1184G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032667.6:c.1184G>T']['alt_genomic_loci'] == [] + assert results['NM_032667.6:c.1184G>T']['transcript_description'] == 'Homo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 2, mRNA' + assert results['NM_032667.6:c.1184G>T']['gene_symbol'] == 'BSCL2' + assert results['NM_032667.6:c.1184G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116056.3(LRG_235p2):p.(Cys395Phe)', 'slr': 'NP_116056.3:p.(C395F)'} + assert results['NM_032667.6:c.1184G>T']['submitted_variant'] == '11-62457852-C-A' + assert results['NM_032667.6:c.1184G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_032667.6:c.1184G>T']['hgvs_lrg_variant'] == '' + assert results['NM_032667.6:c.1184G>T']['hgvs_transcript_variant'] == 'NM_032667.6:c.1184G>T' + assert results['NM_032667.6:c.1184G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NM_032667.6:c.1184G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116056.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032667.6'} + + assert 'NR_037949.1:n.1984G>T' in results.keys() + assert results['NR_037949.1:n.1984G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037949.1:n.1984G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_037949.1:n.1984G>T']['alt_genomic_loci'] == [] + assert results['NR_037949.1:n.1984G>T']['transcript_description'] == 'Homo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 5, non-coding RNA' + assert results['NR_037949.1:n.1984G>T']['gene_symbol'] == 'BSCL2' + assert results['NR_037949.1:n.1984G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037949.1:n.1984G>T']['submitted_variant'] == '11-62457852-C-A' + assert results['NR_037949.1:n.1984G>T']['genome_context_intronic_sequence'] == '' + assert results['NR_037949.1:n.1984G>T']['hgvs_lrg_variant'] == '' + assert results['NR_037949.1:n.1984G>T']['hgvs_transcript_variant'] == 'NR_037949.1:n.1984G>T' + assert results['NR_037949.1:n.1984G>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NR_037949.1:n.1984G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037949.1'} + + assert 'NR_037948.1:n.1978G>T' in results.keys() + assert results['NR_037948.1:n.1978G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037948.1:n.1978G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_037948.1:n.1978G>T']['alt_genomic_loci'] == [] + assert results['NR_037948.1:n.1978G>T']['transcript_description'] == 'Homo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 4, non-coding RNA' + assert results['NR_037948.1:n.1978G>T']['gene_symbol'] == 'BSCL2' + assert results['NR_037948.1:n.1978G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037948.1:n.1978G>T']['submitted_variant'] == '11-62457852-C-A' + assert results['NR_037948.1:n.1978G>T']['genome_context_intronic_sequence'] == '' + assert results['NR_037948.1:n.1978G>T']['hgvs_lrg_variant'] == '' + assert results['NR_037948.1:n.1978G>T']['hgvs_transcript_variant'] == 'NR_037948.1:n.1978G>T' + assert results['NR_037948.1:n.1978G>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NR_037948.1:n.1978G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037948.1'} + + assert 'NM_001122955.3:c.1376G>T' in results.keys() + assert results['NM_001122955.3:c.1376G>T']['hgvs_lrg_transcript_variant'] == 'LRG_235t1:c.1376G>T' + assert results['NM_001122955.3:c.1376G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001122955.3:c.1376G>T']['alt_genomic_loci'] == [] + assert results['NM_001122955.3:c.1376G>T']['transcript_description'] == 'Homo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 1, mRNA' + assert results['NM_001122955.3:c.1376G>T']['gene_symbol'] == 'BSCL2' + assert results['NM_001122955.3:c.1376G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001116427.1:p.(Cys459Phe)', 'slr': 'NP_001116427.1:p.(C459F)'} + assert results['NM_001122955.3:c.1376G>T']['submitted_variant'] == '11-62457852-C-A' + assert results['NM_001122955.3:c.1376G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001122955.3:c.1376G>T']['hgvs_lrg_variant'] == 'LRG_235:g.24195G>T' + assert results['NM_001122955.3:c.1376G>T']['hgvs_transcript_variant'] == 'NM_001122955.3:c.1376G>T' + assert results['NM_001122955.3:c.1376G>T']['hgvs_refseqgene_variant'] == 'NG_008461.1:g.24195G>T' + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NM_001122955.3:c.1376G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008461.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001116427.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001122955.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_235.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001130702.2:c.*178G>T' in results.keys() + assert results['NM_001130702.2:c.*178G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130702.2:c.*178G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130702.2:c.*178G>T']['alt_genomic_loci'] == [] + assert results['NM_001130702.2:c.*178G>T']['transcript_description'] == 'Homo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 3, mRNA' + assert results['NM_001130702.2:c.*178G>T']['gene_symbol'] == 'BSCL2' + assert results['NM_001130702.2:c.*178G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124174.2:p.?', 'slr': 'NP_001124174.2:p.?'} + assert results['NM_001130702.2:c.*178G>T']['submitted_variant'] == '11-62457852-C-A' + assert results['NM_001130702.2:c.*178G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001130702.2:c.*178G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001130702.2:c.*178G>T']['hgvs_transcript_variant'] == 'NM_001130702.2:c.*178G>T' + assert results['NM_001130702.2:c.*178G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NM_001130702.2:c.*178G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124174.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130702.2'} + + + def test_variant214(self): + variant = '11-108178710-A-AT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001351834.1:c.5761_5762insT' in results.keys() + assert results['NM_001351834.1:c.5761_5762insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001351834.1:c.5761_5762insT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001351834.1:c.5761_5762insT']['alt_genomic_loci'] == [] + assert results['NM_001351834.1:c.5761_5762insT']['transcript_description'] == 'Homo sapiens ATM serine/threonine kinase (ATM), transcript variant 1, mRNA' + assert results['NM_001351834.1:c.5761_5762insT']['gene_symbol'] == 'ATM' + assert results['NM_001351834.1:c.5761_5762insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001338763.1:p.(Arg1921MetfsTer9)', 'slr': 'NP_001338763.1:p.(R1921Mfs*9)'} + assert results['NM_001351834.1:c.5761_5762insT']['submitted_variant'] == '11-108178710-A-AT' + assert results['NM_001351834.1:c.5761_5762insT']['genome_context_intronic_sequence'] == '' + assert results['NM_001351834.1:c.5761_5762insT']['hgvs_lrg_variant'] == '' + assert results['NM_001351834.1:c.5761_5762insT']['hgvs_transcript_variant'] == 'NM_001351834.1:c.5761_5762insT' + assert results['NM_001351834.1:c.5761_5762insT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': 'chr11', 'ref': 'A', 'pos': '108178710', 'alt': 'AT'}} + assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': 'chr11', 'ref': 'A', 'pos': '108307983', 'alt': 'AT'}} + assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': '11', 'ref': 'A', 'pos': '108178710', 'alt': 'AT'}} + assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': '11', 'ref': 'A', 'pos': '108307983', 'alt': 'AT'}} + assert results['NM_001351834.1:c.5761_5762insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338763.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351834.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000051.3:c.5761_5762insT' in results.keys() + assert results['NM_000051.3:c.5761_5762insT']['hgvs_lrg_transcript_variant'] == 'LRG_135t1:c.5761_5762insT' + assert results['NM_000051.3:c.5761_5762insT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000051.3:c.5761_5762insT']['alt_genomic_loci'] == [] + assert results['NM_000051.3:c.5761_5762insT']['transcript_description'] == 'Homo sapiens ATM serine/threonine kinase (ATM), transcript variant 2, mRNA' + assert results['NM_000051.3:c.5761_5762insT']['gene_symbol'] == 'ATM' + assert results['NM_000051.3:c.5761_5762insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000042.3(LRG_135p1):p.(Arg1921MetfsTer9)', 'slr': 'NP_000042.3:p.(R1921Mfs*9)'} + assert results['NM_000051.3:c.5761_5762insT']['submitted_variant'] == '11-108178710-A-AT' + assert results['NM_000051.3:c.5761_5762insT']['genome_context_intronic_sequence'] == '' + assert results['NM_000051.3:c.5761_5762insT']['hgvs_lrg_variant'] == 'LRG_135:g.90152_90153insT' + assert results['NM_000051.3:c.5761_5762insT']['hgvs_transcript_variant'] == 'NM_000051.3:c.5761_5762insT' + assert results['NM_000051.3:c.5761_5762insT']['hgvs_refseqgene_variant'] == 'NG_009830.1:g.90152_90153insT' + assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': 'chr11', 'ref': 'A', 'pos': '108178710', 'alt': 'AT'}} + assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': 'chr11', 'ref': 'A', 'pos': '108307983', 'alt': 'AT'}} + assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': '11', 'ref': 'A', 'pos': '108178710', 'alt': 'AT'}} + assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': '11', 'ref': 'A', 'pos': '108307983', 'alt': 'AT'}} + assert results['NM_000051.3:c.5761_5762insT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009830.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000042.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000051.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_135.xml'} + + + def test_variant215(self): + variant = '11-111735981-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001352419.1:c.-108-7C>T' in results.keys() + assert results['NM_001352419.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352419.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352419.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352419.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 15, mRNA' + assert results['NM_001352419.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352419.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339348.1:p.?', 'slr': 'NP_001339348.1:p.?'} + assert results['NM_001352419.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352419.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352419.1):c.-108-7C>T' + assert results['NM_001352419.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352419.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352419.1:c.-108-7C>T' + assert results['NM_001352419.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339348.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352419.1'} + + assert 'NM_001352412.1:c.-108-7C>T' in results.keys() + assert results['NM_001352412.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352412.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352412.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352412.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 8, mRNA' + assert results['NM_001352412.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352412.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339341.1:p.?', 'slr': 'NP_001339341.1:p.?'} + assert results['NM_001352412.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352412.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352412.1):c.-108-7C>T' + assert results['NM_001352412.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352412.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352412.1:c.-108-7C>T' + assert results['NM_001352412.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339341.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352412.1'} + + assert 'NM_001077692.1:c.-108-7C>T' in results.keys() + assert results['NM_001077692.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077692.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001077692.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001077692.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 4, mRNA' + assert results['NM_001077692.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001077692.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071160.1:p.?', 'slr': 'NP_001071160.1:p.?'} + assert results['NM_001077692.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001077692.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001077692.1):c.-108-7C>T' + assert results['NM_001077692.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001077692.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001077692.1:c.-108-7C>T' + assert results['NM_001077692.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071160.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077692.1'} + + assert 'NM_001352418.1:c.406-7C>T' in results.keys() + assert results['NM_001352418.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352418.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352418.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352418.1:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 14, mRNA' + assert results['NM_001352418.1:c.406-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352418.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339347.1:p.?', 'slr': 'NP_001339347.1:p.?'} + assert results['NM_001352418.1:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352418.1:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352418.1):c.406-7C>T' + assert results['NM_001352418.1:c.406-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352418.1:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_001352418.1:c.406-7C>T' + assert results['NM_001352418.1:c.406-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352418.1:c.406-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339347.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352418.1'} + + assert 'NM_001352423.1:c.-108-7C>T' in results.keys() + assert results['NM_001352423.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352423.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352423.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352423.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 19, mRNA' + assert results['NM_001352423.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352423.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339352.1:p.?', 'slr': 'NP_001339352.1:p.?'} + assert results['NM_001352423.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352423.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352423.1):c.-108-7C>T' + assert results['NM_001352423.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352423.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352423.1:c.-108-7C>T' + assert results['NM_001352423.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339352.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352423.1'} + + assert 'NM_001352415.1:c.-108-7C>T' in results.keys() + assert results['NM_001352415.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352415.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352415.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352415.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 11, mRNA' + assert results['NM_001352415.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352415.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339344.1:p.?', 'slr': 'NP_001339344.1:p.?'} + assert results['NM_001352415.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352415.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352415.1):c.-108-7C>T' + assert results['NM_001352415.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352415.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352415.1:c.-108-7C>T' + assert results['NM_001352415.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339344.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352415.1'} + + assert 'NM_001352421.1:c.-108-7C>T' in results.keys() + assert results['NM_001352421.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352421.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352421.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352421.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 17, mRNA' + assert results['NM_001352421.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352421.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339350.1:p.?', 'slr': 'NP_001339350.1:p.?'} + assert results['NM_001352421.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352421.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352421.1):c.-108-7C>T' + assert results['NM_001352421.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352421.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352421.1:c.-108-7C>T' + assert results['NM_001352421.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339350.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352421.1'} + + assert 'NM_001352411.1:c.-108-7C>T' in results.keys() + assert results['NM_001352411.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352411.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352411.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352411.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 7, mRNA' + assert results['NM_001352411.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352411.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339340.1:p.?', 'slr': 'NP_001339340.1:p.?'} + assert results['NM_001352411.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352411.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352411.1):c.-108-7C>T' + assert results['NM_001352411.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352411.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352411.1:c.-108-7C>T' + assert results['NM_001352411.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339340.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352411.1'} + + assert 'NR_147984.1:n.782-7C>T' in results.keys() + assert results['NR_147984.1:n.782-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_147984.1:n.782-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_147984.1:n.782-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NR_147984.1:n.782-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 20, non-coding RNA' + assert results['NR_147984.1:n.782-7C>T']['gene_symbol'] == 'ALG9' + assert results['NR_147984.1:n.782-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_147984.1:n.782-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NR_147984.1:n.782-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NR_147984.1):c.782-7C>T' + assert results['NR_147984.1:n.782-7C>T']['hgvs_lrg_variant'] == '' + assert results['NR_147984.1:n.782-7C>T']['hgvs_transcript_variant'] == 'NR_147984.1:n.782-7C>T' + assert results['NR_147984.1:n.782-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NR_147984.1:n.782-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_147984.1'} + + assert 'NM_001077691.1:c.-108-7C>T' in results.keys() + assert results['NM_001077691.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077691.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001077691.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001077691.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 3, mRNA' + assert results['NM_001077691.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001077691.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071159.1:p.?', 'slr': 'NP_001071159.1:p.?'} + assert results['NM_001077691.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001077691.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001077691.1):c.-108-7C>T' + assert results['NM_001077691.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001077691.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001077691.1:c.-108-7C>T' + assert results['NM_001077691.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071159.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077691.1'} + + assert 'NM_001352410.1:c.-108-7C>T' in results.keys() + assert results['NM_001352410.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352410.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352410.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352410.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 6, mRNA' + assert results['NM_001352410.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352410.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339339.1:p.?', 'slr': 'NP_001339339.1:p.?'} + assert results['NM_001352410.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352410.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352410.1):c.-108-7C>T' + assert results['NM_001352410.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352410.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352410.1:c.-108-7C>T' + assert results['NM_001352410.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339339.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352410.1'} + + assert 'NM_001077690.1:c.406-7C>T' in results.keys() + assert results['NM_001077690.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077690.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001077690.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001077690.1:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 2, mRNA' + assert results['NM_001077690.1:c.406-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001077690.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071158.1:p.?', 'slr': 'NP_001071158.1:p.?'} + assert results['NM_001077690.1:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001077690.1:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001077690.1):c.406-7C>T' + assert results['NM_001077690.1:c.406-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001077690.1:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_001077690.1:c.406-7C>T' + assert results['NM_001077690.1:c.406-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001077690.1:c.406-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071158.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077690.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001352422.1:c.-326-7C>T' in results.keys() + assert results['NM_001352422.1:c.-326-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352422.1:c.-326-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352422.1:c.-326-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352422.1:c.-326-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 18, mRNA' + assert results['NM_001352422.1:c.-326-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352422.1:c.-326-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339351.1:p.?', 'slr': 'NP_001339351.1:p.?'} + assert results['NM_001352422.1:c.-326-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352422.1:c.-326-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352422.1):c.-326-7C>T' + assert results['NM_001352422.1:c.-326-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352422.1:c.-326-7C>T']['hgvs_transcript_variant'] == 'NM_001352422.1:c.-326-7C>T' + assert results['NM_001352422.1:c.-326-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339351.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352422.1'} + + assert 'NM_001352416.1:c.-108-7C>T' in results.keys() + assert results['NM_001352416.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352416.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352416.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352416.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 12, mRNA' + assert results['NM_001352416.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352416.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339345.1:p.?', 'slr': 'NP_001339345.1:p.?'} + assert results['NM_001352416.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352416.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352416.1):c.-108-7C>T' + assert results['NM_001352416.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352416.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352416.1:c.-108-7C>T' + assert results['NM_001352416.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339345.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352416.1'} + + assert 'NM_001352420.1:c.-108-7C>T' in results.keys() + assert results['NM_001352420.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352420.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352420.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352420.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 16, mRNA' + assert results['NM_001352420.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352420.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339349.1:p.?', 'slr': 'NP_001339349.1:p.?'} + assert results['NM_001352420.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352420.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352420.1):c.-108-7C>T' + assert results['NM_001352420.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352420.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352420.1:c.-108-7C>T' + assert results['NM_001352420.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339349.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352420.1'} + + assert 'NM_024740.2:c.406-7C>T' in results.keys() + assert results['NM_024740.2:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024740.2:c.406-7C>T']['refseqgene_context_intronic_sequence'] == 'NG_009210.1(NM_024740.2):c.406-7C>T' + assert results['NM_024740.2:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_024740.2:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 1, mRNA' + assert results['NM_024740.2:c.406-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_024740.2:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_079016.2:p.?', 'slr': 'NP_079016.2:p.?'} + assert results['NM_024740.2:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_024740.2:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_024740.2):c.406-7C>T' + assert results['NM_024740.2:c.406-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_024740.2:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_024740.2:c.406-7C>T' + assert results['NM_024740.2:c.406-7C>T']['hgvs_refseqgene_variant'] == 'NG_009210.1:g.11324C>T' + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_024740.2:c.406-7C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009210.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_079016.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024740.2'} + + assert 'NM_001352414.1:c.-108-7C>T' in results.keys() + assert results['NM_001352414.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352414.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352414.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352414.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 10, mRNA' + assert results['NM_001352414.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352414.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339343.1:p.?', 'slr': 'NP_001339343.1:p.?'} + assert results['NM_001352414.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352414.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352414.1):c.-108-7C>T' + assert results['NM_001352414.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352414.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352414.1:c.-108-7C>T' + assert results['NM_001352414.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339343.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352414.1'} + + assert 'NM_001352417.1:c.406-7C>T' in results.keys() + assert results['NM_001352417.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352417.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352417.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352417.1:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 13, mRNA' + assert results['NM_001352417.1:c.406-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352417.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339346.1:p.?', 'slr': 'NP_001339346.1:p.?'} + assert results['NM_001352417.1:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352417.1:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352417.1):c.406-7C>T' + assert results['NM_001352417.1:c.406-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352417.1:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_001352417.1:c.406-7C>T' + assert results['NM_001352417.1:c.406-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352417.1:c.406-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339346.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352417.1'} + + assert 'NM_001352409.1:c.-108-7C>T' in results.keys() + assert results['NM_001352409.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352409.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352409.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352409.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 5, mRNA' + assert results['NM_001352409.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352409.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339338.1:p.?', 'slr': 'NP_001339338.1:p.?'} + assert results['NM_001352409.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352409.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352409.1):c.-108-7C>T' + assert results['NM_001352409.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352409.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352409.1:c.-108-7C>T' + assert results['NM_001352409.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339338.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352409.1'} + + assert 'NM_001352413.1:c.-108-7C>T' in results.keys() + assert results['NM_001352413.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352413.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352413.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352413.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 9, mRNA' + assert results['NM_001352413.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352413.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339342.1:p.?', 'slr': 'NP_001339342.1:p.?'} + assert results['NM_001352413.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352413.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352413.1):c.-108-7C>T' + assert results['NM_001352413.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352413.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352413.1:c.-108-7C>T' + assert results['NM_001352413.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339342.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352413.1'} + + + def test_variant216(self): + variant = '12-11023080-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NR_037918.2:n.1184+11736G>T' in results.keys() + assert results['NR_037918.2:n.1184+11736G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037918.2:n.1184+11736G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_037918.2:n.1184+11736G>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_3_CTG2', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'chr12_KI270904v1_alt', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'HG1133_PATCH', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'NW_003571047.1', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'NW_003571050.1', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'chr12_GL877876v1_alt', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}] + assert results['NR_037918.2:n.1184+11736G>T']['transcript_description'] == 'Homo sapiens PRH1-PRR4 readthrough (PRH1-PRR4), long non-coding RNA' + assert results['NR_037918.2:n.1184+11736G>T']['gene_symbol'] == 'PRH1-PRR4' + assert results['NR_037918.2:n.1184+11736G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037918.2:n.1184+11736G>T']['submitted_variant'] == '12-11023080-C-A' + assert results['NR_037918.2:n.1184+11736G>T']['genome_context_intronic_sequence'] == 'NC_000012.11(NR_037918.2):c.1184+11736G>T' + assert results['NR_037918.2:n.1184+11736G>T']['hgvs_lrg_variant'] == '' + assert results['NR_037918.2:n.1184+11736G>T']['hgvs_transcript_variant'] == 'NR_037918.2:n.1184+11736G>T' + assert results['NR_037918.2:n.1184+11736G>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.11023080C>A', 'vcf': {'chr': 'chr12', 'ref': u'C', 'pos': '11023080', 'alt': u'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.10870481C>A', 'vcf': {'chr': 'chr12', 'ref': u'C', 'pos': '10870481', 'alt': u'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.11023080C>A', 'vcf': {'chr': '12', 'ref': u'C', 'pos': '11023080', 'alt': u'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.10870481C>A', 'vcf': {'chr': '12', 'ref': u'C', 'pos': '10870481', 'alt': u'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037918.2'} + + + def test_variant217(self): + variant = '12-22018712-TC-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_020297.3:c.2199-1302del' in results.keys() + assert results['NM_020297.3:c.2199-1302del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020297.3:c.2199-1302del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020297.3:c.2199-1302del']['alt_genomic_loci'] == [] + assert results['NM_020297.3:c.2199-1302del']['transcript_description'] == 'Homo sapiens ATP binding cassette subfamily C member 9 (ABCC9), transcript variant SUR2B, mRNA' + assert results['NM_020297.3:c.2199-1302del']['gene_symbol'] == 'ABCC9' + assert results['NM_020297.3:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_064693.2(LRG_377p1):p.?', 'slr': 'NP_064693.2:p.?'} + assert results['NM_020297.3:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' + assert results['NM_020297.3:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_020297.3):c.2199-1302del' + assert results['NM_020297.3:c.2199-1302del']['hgvs_lrg_variant'] == '' + assert results['NM_020297.3:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_020297.3:c.2199-1302del' + assert results['NM_020297.3:c.2199-1302del']['hgvs_refseqgene_variant'] == '' + assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '21865778', 'alt': 'T'}} + assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '21865778', 'alt': 'T'}} + assert results['NM_020297.3:c.2199-1302del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.3'} + + assert 'NM_005691.3:c.2199-1302del' in results.keys() + assert results['NM_005691.3:c.2199-1302del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005691.3:c.2199-1302del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005691.3:c.2199-1302del']['alt_genomic_loci'] == [] + assert results['NM_005691.3:c.2199-1302del']['transcript_description'] == 'Homo sapiens ATP binding cassette subfamily C member 9 (ABCC9), transcript variant SUR2A, mRNA' + assert results['NM_005691.3:c.2199-1302del']['gene_symbol'] == 'ABCC9' + assert results['NM_005691.3:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005682.2(LRG_377p2):p.?', 'slr': 'NP_005682.2:p.?'} + assert results['NM_005691.3:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' + assert results['NM_005691.3:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_005691.3):c.2199-1302del' + assert results['NM_005691.3:c.2199-1302del']['hgvs_lrg_variant'] == '' + assert results['NM_005691.3:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_005691.3:c.2199-1302del' + assert results['NM_005691.3:c.2199-1302del']['hgvs_refseqgene_variant'] == '' + assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '21865778', 'alt': 'T'}} + assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '21865778', 'alt': 'T'}} + assert results['NM_005691.3:c.2199-1302del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.3'} + + assert 'NM_020297.2:c.2199-1302del' in results.keys() + assert results['NM_020297.2:c.2199-1302del']['hgvs_lrg_transcript_variant'] == 'LRG_377t1:c.2199-1302del' + assert results['NM_020297.2:c.2199-1302del']['refseqgene_context_intronic_sequence'] == 'NG_012819.1(NM_020297.2):c.2199-1302del' + assert results['NM_020297.2:c.2199-1302del']['alt_genomic_loci'] == [] + assert results['NM_020297.2:c.2199-1302del']['transcript_description'] == 'Homo sapiens ATP-binding cassette, sub-family C (CFTR/MRP), member 9 (ABCC9), transcript variant SUR2B, mRNA' + assert results['NM_020297.2:c.2199-1302del']['gene_symbol'] == 'ABCC9' + assert results['NM_020297.2:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_064693.2(LRG_377p1):p.?', 'slr': 'NP_064693.2:p.?'} + assert results['NM_020297.2:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' + assert results['NM_020297.2:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_020297.2):c.2199-1302del' + assert results['NM_020297.2:c.2199-1302del']['hgvs_lrg_variant'] == 'LRG_377:g.75916del' + assert results['NM_020297.2:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_020297.2:c.2199-1302del' + assert results['NM_020297.2:c.2199-1302del']['hgvs_refseqgene_variant'] == 'NG_012819.1:g.75916del' + assert results['NM_020297.2:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert 'hg38' not in results['NM_020297.2:c.2199-1302del']['primary_assembly_loci'].keys() + assert results['NM_020297.2:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert 'grch38' not in results['NM_020297.2:c.2199-1302del']['primary_assembly_loci'].keys() + assert results['NM_020297.2:c.2199-1302del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012819.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_377.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_005691.2:c.2199-1302del' in results.keys() + assert results['NM_005691.2:c.2199-1302del']['hgvs_lrg_transcript_variant'] == 'LRG_377t2:c.2199-1302del' + assert results['NM_005691.2:c.2199-1302del']['refseqgene_context_intronic_sequence'] == 'NG_012819.1(NM_005691.2):c.2199-1302del' + assert results['NM_005691.2:c.2199-1302del']['alt_genomic_loci'] == [] + assert results['NM_005691.2:c.2199-1302del']['transcript_description'] == 'Homo sapiens ATP-binding cassette, sub-family C (CFTR/MRP), member 9 (ABCC9), transcript variant SUR2A, mRNA' + assert results['NM_005691.2:c.2199-1302del']['gene_symbol'] == 'ABCC9' + assert results['NM_005691.2:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005682.2(LRG_377p2):p.?', 'slr': 'NP_005682.2:p.?'} + assert results['NM_005691.2:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' + assert results['NM_005691.2:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_005691.2):c.2199-1302del' + assert results['NM_005691.2:c.2199-1302del']['hgvs_lrg_variant'] == 'LRG_377:g.75916del' + assert results['NM_005691.2:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_005691.2:c.2199-1302del' + assert results['NM_005691.2:c.2199-1302del']['hgvs_refseqgene_variant'] == 'NG_012819.1:g.75916del' + assert results['NM_005691.2:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert 'hg38' not in results['NM_005691.2:c.2199-1302del']['primary_assembly_loci'].keys() + assert results['NM_005691.2:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert 'grch38' not in results['NM_005691.2:c.2199-1302del']['primary_assembly_loci'].keys() + assert results['NM_005691.2:c.2199-1302del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012819.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_377.xml'} + + + def test_variant218(self): + variant = '12-52912946-T-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000424.3:c.556-2A>G' in results.keys() + assert results['NM_000424.3:c.556-2A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000424.3:c.556-2A>G']['refseqgene_context_intronic_sequence'] == 'NG_008297.1(NM_000424.3):c.556-2A>G' + assert results['NM_000424.3:c.556-2A>G']['alt_genomic_loci'] == [] + assert results['NM_000424.3:c.556-2A>G']['transcript_description'] == 'Homo sapiens keratin 5 (KRT5), mRNA' + assert results['NM_000424.3:c.556-2A>G']['gene_symbol'] == 'KRT5' + assert results['NM_000424.3:c.556-2A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000415.2:p.?', 'slr': 'NP_000415.2:p.?'} + assert results['NM_000424.3:c.556-2A>G']['submitted_variant'] == '12-52912946-T-C' + assert results['NM_000424.3:c.556-2A>G']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_000424.3):c.556-2A>G' + assert results['NM_000424.3:c.556-2A>G']['hgvs_lrg_variant'] == '' + assert results['NM_000424.3:c.556-2A>G']['hgvs_transcript_variant'] == 'NM_000424.3:c.556-2A>G' + assert results['NM_000424.3:c.556-2A>G']['hgvs_refseqgene_variant'] == 'NG_008297.1:g.6298A>G' + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.52912946T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '52912946', 'alt': u'C'}} + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.52519162T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '52519162', 'alt': u'C'}} + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.52912946T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '52912946', 'alt': u'C'}} + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.52519162T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '52519162', 'alt': u'C'}} + assert results['NM_000424.3:c.556-2A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008297.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000415.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000424.3'} + + + def test_variant219(self): + variant = '12-103234292-TC-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001354304.1:c.1200del' in results.keys() + assert results['NM_001354304.1:c.1200del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354304.1:c.1200del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001354304.1:c.1200del']['alt_genomic_loci'] == [] + assert results['NM_001354304.1:c.1200del']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), transcript variant 2, mRNA' + assert results['NM_001354304.1:c.1200del']['gene_symbol'] == 'PAH' + assert results['NM_001354304.1:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341233.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_001341233.1:p.(N401Tfs*51)'} + assert results['NM_001354304.1:c.1200del']['submitted_variant'] == '12-103234292-TC-T' + assert results['NM_001354304.1:c.1200del']['genome_context_intronic_sequence'] == '' + assert results['NM_001354304.1:c.1200del']['hgvs_lrg_variant'] == '' + assert results['NM_001354304.1:c.1200del']['hgvs_transcript_variant'] == 'NM_001354304.1:c.1200del' + assert results['NM_001354304.1:c.1200del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} + assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} + assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} + assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} + assert results['NM_001354304.1:c.1200del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1'} + + assert 'NM_000277.2:c.1200del' in results.keys() + assert results['NM_000277.2:c.1200del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000277.2:c.1200del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000277.2:c.1200del']['alt_genomic_loci'] == [] + assert results['NM_000277.2:c.1200del']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), transcript variant 1, mRNA' + assert results['NM_000277.2:c.1200del']['gene_symbol'] == 'PAH' + assert results['NM_000277.2:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_000268.1:p.(N401Tfs*51)'} + assert results['NM_000277.2:c.1200del']['submitted_variant'] == '12-103234292-TC-T' + assert results['NM_000277.2:c.1200del']['genome_context_intronic_sequence'] == '' + assert results['NM_000277.2:c.1200del']['hgvs_lrg_variant'] == '' + assert results['NM_000277.2:c.1200del']['hgvs_transcript_variant'] == 'NM_000277.2:c.1200del' + assert results['NM_000277.2:c.1200del']['hgvs_refseqgene_variant'] == '' + assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} + assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} + assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} + assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} + assert results['NM_000277.2:c.1200del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000277.1:c.1200del' in results.keys() + assert results['NM_000277.1:c.1200del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000277.1:c.1200del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000277.1:c.1200del']['alt_genomic_loci'] == [] + assert results['NM_000277.1:c.1200del']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), mRNA' + assert results['NM_000277.1:c.1200del']['gene_symbol'] == 'PAH' + assert results['NM_000277.1:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_000268.1:p.(N401Tfs*51)'} + assert results['NM_000277.1:c.1200del']['submitted_variant'] == '12-103234292-TC-T' + assert results['NM_000277.1:c.1200del']['genome_context_intronic_sequence'] == '' + assert results['NM_000277.1:c.1200del']['hgvs_lrg_variant'] == '' + assert results['NM_000277.1:c.1200del']['hgvs_transcript_variant'] == 'NM_000277.1:c.1200del' + assert results['NM_000277.1:c.1200del']['hgvs_refseqgene_variant'] == 'NG_008690.1:g.82088del' + assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} + assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} + assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} + assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} + assert results['NM_000277.1:c.1200del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.1'} + + + def test_variant220(self): + variant = '12-103311124-T-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001354304.1:c.-95-121A>G' in results.keys() + assert results['NM_001354304.1:c.-95-121A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354304.1:c.-95-121A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001354304.1:c.-95-121A>G']['alt_genomic_loci'] == [] + assert results['NM_001354304.1:c.-95-121A>G']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), transcript variant 2, mRNA' + assert results['NM_001354304.1:c.-95-121A>G']['gene_symbol'] == 'PAH' + assert results['NM_001354304.1:c.-95-121A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341233.1:p.?', 'slr': 'NP_001341233.1:p.?'} + assert results['NM_001354304.1:c.-95-121A>G']['submitted_variant'] == '12-103311124-T-C' + assert results['NM_001354304.1:c.-95-121A>G']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001354304.1):c.-95-121A>G' + assert results['NM_001354304.1:c.-95-121A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001354304.1:c.-95-121A>G']['hgvs_transcript_variant'] == 'NM_001354304.1:c.-95-121A>G' + assert results['NM_001354304.1:c.-95-121A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000277.2:c.-216A>G' in results.keys() + assert results['NM_000277.2:c.-216A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000277.2:c.-216A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000277.2:c.-216A>G']['alt_genomic_loci'] == [] + assert results['NM_000277.2:c.-216A>G']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), transcript variant 1, mRNA' + assert results['NM_000277.2:c.-216A>G']['gene_symbol'] == 'PAH' + assert results['NM_000277.2:c.-216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.?', 'slr': 'NP_000268.1:p.?'} + assert results['NM_000277.2:c.-216A>G']['submitted_variant'] == '12-103311124-T-C' + assert results['NM_000277.2:c.-216A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000277.2:c.-216A>G']['hgvs_lrg_variant'] == '' + assert results['NM_000277.2:c.-216A>G']['hgvs_transcript_variant'] == 'NM_000277.2:c.-216A>G' + assert results['NM_000277.2:c.-216A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} + assert results['NM_000277.2:c.-216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2'} + + assert 'NM_000277.1:c.-215A>G' in results.keys() + assert results['NM_000277.1:c.-215A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000277.1:c.-215A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000277.1:c.-215A>G']['alt_genomic_loci'] == [] + assert results['NM_000277.1:c.-215A>G']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), mRNA' + assert results['NM_000277.1:c.-215A>G']['gene_symbol'] == 'PAH' + assert results['NM_000277.1:c.-215A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.?', 'slr': 'NP_000268.1:p.?'} + assert results['NM_000277.1:c.-215A>G']['submitted_variant'] == '12-103311124-T-C' + assert results['NM_000277.1:c.-215A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000277.1:c.-215A>G']['hgvs_lrg_variant'] == '' + assert results['NM_000277.1:c.-215A>G']['hgvs_transcript_variant'] == 'NM_000277.1:c.-215A>G' + assert results['NM_000277.1:c.-215A>G']['hgvs_refseqgene_variant'] == 'NG_008690.1:g.5258A>G' + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} + assert results['NM_000277.1:c.-215A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.1'} + + + def test_variant221(self): + variant = '12-111064166-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001319681.1:c.-366-1G>A' in results.keys() + assert results['NM_001319681.1:c.-366-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001319681.1:c.-366-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001319681.1:c.-366-1G>A']['alt_genomic_loci'] == [] + assert results['NM_001319681.1:c.-366-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 7, mRNA' + assert results['NM_001319681.1:c.-366-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001319681.1:c.-366-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306610.1:p.?', 'slr': 'NP_001306610.1:p.?'} + assert results['NM_001319681.1:c.-366-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001319681.1:c.-366-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001319681.1):c.-366-1G>A' + assert results['NM_001319681.1:c.-366-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001319681.1:c.-366-1G>A']['hgvs_transcript_variant'] == 'NM_001319681.1:c.-366-1G>A' + assert results['NM_001319681.1:c.-366-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001319681.1:c.-366-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306610.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319681.1'} + + assert 'NM_001319680.1:c.342-1G>A' in results.keys() + assert results['NM_001319680.1:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001319680.1:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001319680.1:c.342-1G>A']['alt_genomic_loci'] == [] + assert results['NM_001319680.1:c.342-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 6, mRNA' + assert results['NM_001319680.1:c.342-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001319680.1:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306609.1:p.?', 'slr': 'NP_001306609.1:p.?'} + assert results['NM_001319680.1:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001319680.1:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001319680.1):c.342-1G>A' + assert results['NM_001319680.1:c.342-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001319680.1:c.342-1G>A']['hgvs_transcript_variant'] == 'NM_001319680.1:c.342-1G>A' + assert results['NM_001319680.1:c.342-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001319680.1:c.342-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306609.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319680.1'} + + assert 'NM_001082538.2:c.342-1G>A' in results.keys() + assert results['NM_001082538.2:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001082538.2:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001082538.2:c.342-1G>A']['alt_genomic_loci'] == [] + assert results['NM_001082538.2:c.342-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 1, mRNA' + assert results['NM_001082538.2:c.342-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001082538.2:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001076007.1:p.?', 'slr': 'NP_001076007.1:p.?'} + assert results['NM_001082538.2:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001082538.2:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001082538.2):c.342-1G>A' + assert results['NM_001082538.2:c.342-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001082538.2:c.342-1G>A']['hgvs_transcript_variant'] == 'NM_001082538.2:c.342-1G>A' + assert results['NM_001082538.2:c.342-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001082538.2:c.342-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076007.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082538.2'} + + assert 'NM_001173976.1:c.162-1G>A' in results.keys() + assert results['NM_001173976.1:c.162-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001173976.1:c.162-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001173976.1:c.162-1G>A']['alt_genomic_loci'] == [] + assert results['NM_001173976.1:c.162-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 5, mRNA' + assert results['NM_001173976.1:c.162-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001173976.1:c.162-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167447.1:p.?', 'slr': 'NP_001167447.1:p.?'} + assert results['NM_001173976.1:c.162-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001173976.1:c.162-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001173976.1):c.162-1G>A' + assert results['NM_001173976.1:c.162-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001173976.1:c.162-1G>A']['hgvs_transcript_variant'] == 'NM_001173976.1:c.162-1G>A' + assert results['NM_001173976.1:c.162-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001173976.1:c.162-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167447.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173976.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001082537.2:c.342-1G>A' in results.keys() + assert results['NM_001082537.2:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001082537.2:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001082537.2:c.342-1G>A']['alt_genomic_loci'] == [] + assert results['NM_001082537.2:c.342-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 2, mRNA' + assert results['NM_001082537.2:c.342-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001082537.2:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001076006.1:p.?', 'slr': 'NP_001076006.1:p.?'} + assert results['NM_001082537.2:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001082537.2:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001082537.2):c.342-1G>A' + assert results['NM_001082537.2:c.342-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001082537.2:c.342-1G>A']['hgvs_transcript_variant'] == 'NM_001082537.2:c.342-1G>A' + assert results['NM_001082537.2:c.342-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001082537.2:c.342-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076006.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082537.2'} + + assert 'NR_135088.1:n.559-1G>A' in results.keys() + assert results['NR_135088.1:n.559-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_135088.1:n.559-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_135088.1:n.559-1G>A']['alt_genomic_loci'] == [] + assert results['NR_135088.1:n.559-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 9, non-coding RNA' + assert results['NR_135088.1:n.559-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NR_135088.1:n.559-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_135088.1:n.559-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NR_135088.1:n.559-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NR_135088.1):c.559-1G>A' + assert results['NR_135088.1:n.559-1G>A']['hgvs_lrg_variant'] == '' + assert results['NR_135088.1:n.559-1G>A']['hgvs_transcript_variant'] == 'NR_135088.1:n.559-1G>A' + assert results['NR_135088.1:n.559-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NR_135088.1:n.559-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_135088.1'} + + assert 'NM_024549.5:c.342-1G>A' in results.keys() + assert results['NM_024549.5:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024549.5:c.342-1G>A']['refseqgene_context_intronic_sequence'] == 'NG_030381.1(NM_024549.5):c.342-1G>A' + assert results['NM_024549.5:c.342-1G>A']['alt_genomic_loci'] == [] + assert results['NM_024549.5:c.342-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 3, mRNA' + assert results['NM_024549.5:c.342-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_024549.5:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_078825.2:p.?', 'slr': 'NP_078825.2:p.?'} + assert results['NM_024549.5:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_024549.5:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_024549.5):c.342-1G>A' + assert results['NM_024549.5:c.342-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_024549.5:c.342-1G>A']['hgvs_transcript_variant'] == 'NM_024549.5:c.342-1G>A' + assert results['NM_024549.5:c.342-1G>A']['hgvs_refseqgene_variant'] == 'NG_030381.1:g.17335G>A' + assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_024549.5:c.342-1G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_030381.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_078825.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024549.5'} + + assert 'NM_001173975.2:c.174-1G>A' in results.keys() + assert results['NM_001173975.2:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001173975.2:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001173975.2:c.174-1G>A']['alt_genomic_loci'] == [] + assert results['NM_001173975.2:c.174-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 4, mRNA' + assert results['NM_001173975.2:c.174-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001173975.2:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167446.1:p.?', 'slr': 'NP_001167446.1:p.?'} + assert results['NM_001173975.2:c.174-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001173975.2:c.174-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001173975.2):c.174-1G>A' + assert results['NM_001173975.2:c.174-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001173975.2:c.174-1G>A']['hgvs_transcript_variant'] == 'NM_001173975.2:c.174-1G>A' + assert results['NM_001173975.2:c.174-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001173975.2:c.174-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.2'} + + assert 'NM_001173975.1:c.174-1G>A' in results.keys() + assert results['NM_001173975.1:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001173975.1:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001173975.1:c.174-1G>A']['alt_genomic_loci'] == [] + assert results['NM_001173975.1:c.174-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 4, mRNA' + assert results['NM_001173975.1:c.174-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001173975.1:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167446.1:p.?', 'slr': 'NP_001167446.1:p.?'} + assert results['NM_001173975.1:c.174-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001173975.1:c.174-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001173975.1):c.174-1G>A' + assert results['NM_001173975.1:c.174-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001173975.1:c.174-1G>A']['hgvs_transcript_variant'] == 'NM_001173975.1:c.174-1G>A' + assert results['NM_001173975.1:c.174-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert 'hg38' not in results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci'].keys() + assert results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert 'grch38' not in results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci'].keys() + assert results['NM_001173975.1:c.174-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.1'} + + assert 'NM_001319682.1:c.174-1G>A' in results.keys() + assert results['NM_001319682.1:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001319682.1:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001319682.1:c.174-1G>A']['alt_genomic_loci'] == [] + assert results['NM_001319682.1:c.174-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 8, mRNA' + assert results['NM_001319682.1:c.174-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001319682.1:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306611.1:p.?', 'slr': 'NP_001306611.1:p.?'} + assert results['NM_001319682.1:c.174-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001319682.1:c.174-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001319682.1):c.174-1G>A' + assert results['NM_001319682.1:c.174-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001319682.1:c.174-1G>A']['hgvs_transcript_variant'] == 'NM_001319682.1:c.174-1G>A' + assert results['NM_001319682.1:c.174-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NM_001319682.1:c.174-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306611.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319682.1'} + + + def test_variant222(self): + variant = '12-123738430-CA-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001194995.1:c.210del' in results.keys() + assert results['NM_001194995.1:c.210del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001194995.1:c.210del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001194995.1:c.210del']['alt_genomic_loci'] == [] + assert results['NM_001194995.1:c.210del']['transcript_description'] == 'Homo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 3, mRNA' + assert results['NM_001194995.1:c.210del']['gene_symbol'] == 'C12orf65' + assert results['NM_001194995.1:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181924.1:p.(Gly72AlafsTer13)', 'slr': 'NP_001181924.1:p.(G72Afs*13)'} + assert results['NM_001194995.1:c.210del']['submitted_variant'] == '12-123738430-CA-C' + assert results['NM_001194995.1:c.210del']['genome_context_intronic_sequence'] == '' + assert results['NM_001194995.1:c.210del']['hgvs_lrg_variant'] == '' + assert results['NM_001194995.1:c.210del']['hgvs_transcript_variant'] == 'NM_001194995.1:c.210del' + assert results['NM_001194995.1:c.210del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} + assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} + assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} + assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} + assert results['NM_001194995.1:c.210del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181924.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194995.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_152269.4:c.210del' in results.keys() + assert results['NM_152269.4:c.210del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_152269.4:c.210del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_152269.4:c.210del']['alt_genomic_loci'] == [] + assert results['NM_152269.4:c.210del']['transcript_description'] == 'Homo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 1, mRNA' + assert results['NM_152269.4:c.210del']['gene_symbol'] == 'C12orf65' + assert results['NM_152269.4:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_689482.1:p.(Gly72AlafsTer13)', 'slr': 'NP_689482.1:p.(G72Afs*13)'} + assert results['NM_152269.4:c.210del']['submitted_variant'] == '12-123738430-CA-C' + assert results['NM_152269.4:c.210del']['genome_context_intronic_sequence'] == '' + assert results['NM_152269.4:c.210del']['hgvs_lrg_variant'] == '' + assert results['NM_152269.4:c.210del']['hgvs_transcript_variant'] == 'NM_152269.4:c.210del' + assert results['NM_152269.4:c.210del']['hgvs_refseqgene_variant'] == 'NG_027517.1:g.25588del' + assert results['NM_152269.4:c.210del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} + assert results['NM_152269.4:c.210del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} + assert results['NM_152269.4:c.210del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} + assert results['NM_152269.4:c.210del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} + assert results['NM_152269.4:c.210del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_027517.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_689482.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_152269.4'} + + assert 'NM_001143905.2:c.210del' in results.keys() + assert results['NM_001143905.2:c.210del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001143905.2:c.210del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001143905.2:c.210del']['alt_genomic_loci'] == [] + assert results['NM_001143905.2:c.210del']['transcript_description'] == 'Homo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 2, mRNA' + assert results['NM_001143905.2:c.210del']['gene_symbol'] == 'C12orf65' + assert results['NM_001143905.2:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001137377.1:p.(Gly72AlafsTer13)', 'slr': 'NP_001137377.1:p.(G72Afs*13)'} + assert results['NM_001143905.2:c.210del']['submitted_variant'] == '12-123738430-CA-C' + assert results['NM_001143905.2:c.210del']['genome_context_intronic_sequence'] == '' + assert results['NM_001143905.2:c.210del']['hgvs_lrg_variant'] == '' + assert results['NM_001143905.2:c.210del']['hgvs_transcript_variant'] == 'NM_001143905.2:c.210del' + assert results['NM_001143905.2:c.210del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} + assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} + assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} + assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} + assert results['NM_001143905.2:c.210del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001137377.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001143905.2'} + + + def test_variant223(self): + variant = '13-31789169-CT-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_194318.3:c.71-5del' in results.keys() + assert results['NM_194318.3:c.71-5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_194318.3:c.71-5del']['refseqgene_context_intronic_sequence'] == 'NG_011732.1(NM_194318.3):c.71-5del' + assert results['NM_194318.3:c.71-5del']['alt_genomic_loci'] == [] + assert results['NM_194318.3:c.71-5del']['transcript_description'] == 'Homo sapiens beta 3-glucosyltransferase (B3GLCT), mRNA' + assert results['NM_194318.3:c.71-5del']['gene_symbol'] == 'B3GLCT' + assert results['NM_194318.3:c.71-5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_919299.3:p.?', 'slr': 'NP_919299.3:p.?'} + assert results['NM_194318.3:c.71-5del']['submitted_variant'] == '13-31789169-CT-C' + assert results['NM_194318.3:c.71-5del']['genome_context_intronic_sequence'] == 'NC_000013.10(NM_194318.3):c.71-5del' + assert results['NM_194318.3:c.71-5del']['hgvs_lrg_variant'] == '' + assert results['NM_194318.3:c.71-5del']['hgvs_transcript_variant'] == 'NM_194318.3:c.71-5del' + assert results['NM_194318.3:c.71-5del']['hgvs_refseqgene_variant'] == 'NG_011732.1:g.20072del' + assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000013.10:g.31789183del', 'vcf': {'chr': 'chr13', 'ref': 'CT', 'pos': '31789169', 'alt': 'C'}} + assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000013.11:g.31215046del', 'vcf': {'chr': 'chr13', 'ref': 'CT', 'pos': '31215032', 'alt': 'C'}} + assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000013.10:g.31789183del', 'vcf': {'chr': '13', 'ref': 'CT', 'pos': '31789169', 'alt': 'C'}} + assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000013.11:g.31215046del', 'vcf': {'chr': '13', 'ref': 'CT', 'pos': '31215032', 'alt': 'C'}} + assert results['NM_194318.3:c.71-5del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011732.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_919299.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_194318.3'} + + + def test_variant224(self): + variant = '14-62187287-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NR_144368.1:n.214-3552C>T' in results.keys() + assert results['NR_144368.1:n.214-3552C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_144368.1:n.214-3552C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_144368.1:n.214-3552C>T']['alt_genomic_loci'] == [] + assert results['NR_144368.1:n.214-3552C>T']['transcript_description'] == 'Homo sapiens uncharacterized LOC105370526 (LOC105370526), long non-coding RNA' + assert results['NR_144368.1:n.214-3552C>T']['gene_symbol'] == 'LOC105370526' + assert results['NR_144368.1:n.214-3552C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_144368.1:n.214-3552C>T']['submitted_variant'] == '14-62187287-G-A' + assert results['NR_144368.1:n.214-3552C>T']['genome_context_intronic_sequence'] == 'NC_000014.8(NR_144368.1):c.214-3552C>T' + assert results['NR_144368.1:n.214-3552C>T']['hgvs_lrg_variant'] == '' + assert results['NR_144368.1:n.214-3552C>T']['hgvs_transcript_variant'] == 'NR_144368.1:n.214-3552C>T' + assert results['NR_144368.1:n.214-3552C>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'ref': u'G', 'pos': '62187287', 'alt': u'A'}} + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'ref': u'G', 'pos': '61720569', 'alt': u'A'}} + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'ref': u'G', 'pos': '62187287', 'alt': u'A'}} + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'ref': u'G', 'pos': '61720569', 'alt': u'A'}} + assert results['NR_144368.1:n.214-3552C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_144368.1'} + + assert 'NM_181054.2:c.223G>A' in results.keys() + assert results['NM_181054.2:c.223G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181054.2:c.223G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181054.2:c.223G>A']['alt_genomic_loci'] == [] + assert results['NM_181054.2:c.223G>A']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 2, mRNA' + assert results['NM_181054.2:c.223G>A']['gene_symbol'] == 'HIF1A' + assert results['NM_181054.2:c.223G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_851397.1:p.(Ala75Thr)', 'slr': 'NP_851397.1:p.(A75T)'} + assert results['NM_181054.2:c.223G>A']['submitted_variant'] == '14-62187287-G-A' + assert results['NM_181054.2:c.223G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_181054.2:c.223G>A']['hgvs_lrg_variant'] == '' + assert results['NM_181054.2:c.223G>A']['hgvs_transcript_variant'] == 'NM_181054.2:c.223G>A' + assert results['NM_181054.2:c.223G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} + assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} + assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} + assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} + assert results['NM_181054.2:c.223G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_851397.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181054.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001243084.1:c.295G>A' in results.keys() + assert results['NM_001243084.1:c.295G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001243084.1:c.295G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001243084.1:c.295G>A']['alt_genomic_loci'] == [] + assert results['NM_001243084.1:c.295G>A']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 3, mRNA' + assert results['NM_001243084.1:c.295G>A']['gene_symbol'] == 'HIF1A' + assert results['NM_001243084.1:c.295G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230013.1:p.(Ala99Thr)', 'slr': 'NP_001230013.1:p.(A99T)'} + assert results['NM_001243084.1:c.295G>A']['submitted_variant'] == '14-62187287-G-A' + assert results['NM_001243084.1:c.295G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001243084.1:c.295G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001243084.1:c.295G>A']['hgvs_transcript_variant'] == 'NM_001243084.1:c.295G>A' + assert results['NM_001243084.1:c.295G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} + assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} + assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} + assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} + assert results['NM_001243084.1:c.295G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1'} + + assert 'NM_001530.3:c.223G>A' in results.keys() + assert results['NM_001530.3:c.223G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001530.3:c.223G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001530.3:c.223G>A']['alt_genomic_loci'] == [] + assert results['NM_001530.3:c.223G>A']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 1, mRNA' + assert results['NM_001530.3:c.223G>A']['gene_symbol'] == 'HIF1A' + assert results['NM_001530.3:c.223G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001521.1:p.(Ala75Thr)', 'slr': 'NP_001521.1:p.(A75T)'} + assert results['NM_001530.3:c.223G>A']['submitted_variant'] == '14-62187287-G-A' + assert results['NM_001530.3:c.223G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001530.3:c.223G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001530.3:c.223G>A']['hgvs_transcript_variant'] == 'NM_001530.3:c.223G>A' + assert results['NM_001530.3:c.223G>A']['hgvs_refseqgene_variant'] == 'NG_029606.1:g.30169G>A' + assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} + assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} + assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} + assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} + assert results['NM_001530.3:c.223G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029606.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001521.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001530.3'} + + + def test_variant225(self): + variant = '14-62188231-TT-GA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NR_144368.1:n.214-4497_214-4496delinsTC' in results.keys() + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['alt_genomic_loci'] == [] + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['transcript_description'] == 'Homo sapiens uncharacterized LOC105370526 (LOC105370526), long non-coding RNA' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['gene_symbol'] == 'LOC105370526' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['submitted_variant'] == '14-62188231-TT-GA' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['genome_context_intronic_sequence'] == 'NC_000014.8(NR_144368.1):c.214-4497_214-4496delinsTC' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_lrg_variant'] == '' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_transcript_variant'] == 'NR_144368.1:n.214-4497_214-4496delinsTC' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_refseqgene_variant'] == '' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '62188231', 'alt': u'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '61721513', 'alt': u'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '62188231', 'alt': u'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '61721513', 'alt': u'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_144368.1'} + + assert 'NM_001530.3:c.231_232delinsGA' in results.keys() + assert results['NM_001530.3:c.231_232delinsGA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001530.3:c.231_232delinsGA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001530.3:c.231_232delinsGA']['alt_genomic_loci'] == [] + assert results['NM_001530.3:c.231_232delinsGA']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 1, mRNA' + assert results['NM_001530.3:c.231_232delinsGA']['gene_symbol'] == 'HIF1A' + assert results['NM_001530.3:c.231_232delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001521.1:p.(Asp77_Leu78delinsGluMet)', 'slr': 'NP_001521.1:p.(D77_L78delinsEM)'} + assert results['NM_001530.3:c.231_232delinsGA']['submitted_variant'] == '14-62188231-TT-GA' + assert results['NM_001530.3:c.231_232delinsGA']['genome_context_intronic_sequence'] == '' + assert results['NM_001530.3:c.231_232delinsGA']['hgvs_lrg_variant'] == '' + assert results['NM_001530.3:c.231_232delinsGA']['hgvs_transcript_variant'] == 'NM_001530.3:c.231_232delinsGA' + assert results['NM_001530.3:c.231_232delinsGA']['hgvs_refseqgene_variant'] == 'NG_029606.1:g.31113_31114delinsGA' + assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} + assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} + assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} + assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} + assert results['NM_001530.3:c.231_232delinsGA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029606.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001521.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001530.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001243084.1:c.303_304delinsGA' in results.keys() + assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001243084.1:c.303_304delinsGA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001243084.1:c.303_304delinsGA']['alt_genomic_loci'] == [] + assert results['NM_001243084.1:c.303_304delinsGA']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 3, mRNA' + assert results['NM_001243084.1:c.303_304delinsGA']['gene_symbol'] == 'HIF1A' + assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230013.1:p.(Asp101_Leu102delinsGluMet)', 'slr': 'NP_001230013.1:p.(D101_L102delinsEM)'} + assert results['NM_001243084.1:c.303_304delinsGA']['submitted_variant'] == '14-62188231-TT-GA' + assert results['NM_001243084.1:c.303_304delinsGA']['genome_context_intronic_sequence'] == '' + assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_lrg_variant'] == '' + assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_transcript_variant'] == 'NM_001243084.1:c.303_304delinsGA' + assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_refseqgene_variant'] == '' + assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} + assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} + assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} + assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} + assert results['NM_001243084.1:c.303_304delinsGA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1'} + + assert 'NM_181054.2:c.231_232delinsGA' in results.keys() + assert results['NM_181054.2:c.231_232delinsGA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181054.2:c.231_232delinsGA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181054.2:c.231_232delinsGA']['alt_genomic_loci'] == [] + assert results['NM_181054.2:c.231_232delinsGA']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 2, mRNA' + assert results['NM_181054.2:c.231_232delinsGA']['gene_symbol'] == 'HIF1A' + assert results['NM_181054.2:c.231_232delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_851397.1:p.(Asp77_Leu78delinsGluMet)', 'slr': 'NP_851397.1:p.(D77_L78delinsEM)'} + assert results['NM_181054.2:c.231_232delinsGA']['submitted_variant'] == '14-62188231-TT-GA' + assert results['NM_181054.2:c.231_232delinsGA']['genome_context_intronic_sequence'] == '' + assert results['NM_181054.2:c.231_232delinsGA']['hgvs_lrg_variant'] == '' + assert results['NM_181054.2:c.231_232delinsGA']['hgvs_transcript_variant'] == 'NM_181054.2:c.231_232delinsGA' + assert results['NM_181054.2:c.231_232delinsGA']['hgvs_refseqgene_variant'] == '' + assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} + assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} + assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} + assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} + assert results['NM_181054.2:c.231_232delinsGA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_851397.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181054.2'} + + + def test_variant226(self): + variant = '14-63174827-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_139318.3:c.2366G>T' in results.keys() + assert results['NM_139318.3:c.2366G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_139318.3:c.2366G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_139318.3:c.2366G>T']['alt_genomic_loci'] == [] + assert results['NM_139318.3:c.2366G>T']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel, subfamily H (eag-related), member 5 (KCNH5), transcript variant 1, mRNA' + assert results['NM_139318.3:c.2366G>T']['gene_symbol'] == 'KCNH5' + assert results['NM_139318.3:c.2366G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_647479.2:p.(Gly789Val)', 'slr': 'NP_647479.2:p.(G789V)'} + assert results['NM_139318.3:c.2366G>T']['submitted_variant'] == '14-63174827-C-A' + assert results['NM_139318.3:c.2366G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_139318.3:c.2366G>T']['hgvs_lrg_variant'] == '' + assert results['NM_139318.3:c.2366G>T']['hgvs_transcript_variant'] == 'NM_139318.3:c.2366G>T' + assert results['NM_139318.3:c.2366G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_139318.3:c.2366G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} + assert 'hg38' not in results['NM_139318.3:c.2366G>T']['primary_assembly_loci'].keys() + assert results['NM_139318.3:c.2366G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} + assert 'grch38' not in results['NM_139318.3:c.2366G>T']['primary_assembly_loci'].keys() + assert results['NM_139318.3:c.2366G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.3'} + + assert 'NM_172375.1:c.*333G>T' in results.keys() + assert results['NM_172375.1:c.*333G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_172375.1:c.*333G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_172375.1:c.*333G>T']['alt_genomic_loci'] == [] + assert results['NM_172375.1:c.*333G>T']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel, subfamily H (eag-related), member 5 (KCNH5), transcript variant 3, mRNA' + assert results['NM_172375.1:c.*333G>T']['gene_symbol'] == 'KCNH5' + assert results['NM_172375.1:c.*333G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_758963.1:p.?', 'slr': 'NP_758963.1:p.?'} + assert results['NM_172375.1:c.*333G>T']['submitted_variant'] == '14-63174827-C-A' + assert results['NM_172375.1:c.*333G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_172375.1:c.*333G>T']['hgvs_lrg_variant'] == '' + assert results['NM_172375.1:c.*333G>T']['hgvs_transcript_variant'] == 'NM_172375.1:c.*333G>T' + assert results['NM_172375.1:c.*333G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_172375.1:c.*333G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} + assert 'hg38' not in results['NM_172375.1:c.*333G>T']['primary_assembly_loci'].keys() + assert results['NM_172375.1:c.*333G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} + assert 'grch38' not in results['NM_172375.1:c.*333G>T']['primary_assembly_loci'].keys() + assert results['NM_172375.1:c.*333G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.1'} + + assert 'NM_172375.2:c.*333G>T' in results.keys() + assert results['NM_172375.2:c.*333G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_172375.2:c.*333G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_172375.2:c.*333G>T']['alt_genomic_loci'] == [] + assert results['NM_172375.2:c.*333G>T']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel subfamily H member 5 (KCNH5), transcript variant 3, mRNA' + assert results['NM_172375.2:c.*333G>T']['gene_symbol'] == 'KCNH5' + assert results['NM_172375.2:c.*333G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_758963.1:p.?', 'slr': 'NP_758963.1:p.?'} + assert results['NM_172375.2:c.*333G>T']['submitted_variant'] == '14-63174827-C-A' + assert results['NM_172375.2:c.*333G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_172375.2:c.*333G>T']['hgvs_lrg_variant'] == '' + assert results['NM_172375.2:c.*333G>T']['hgvs_transcript_variant'] == 'NM_172375.2:c.*333G>T' + assert results['NM_172375.2:c.*333G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '62708109', 'alt': u'A'}} + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '62708109', 'alt': u'A'}} + assert results['NM_172375.2:c.*333G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_139318.4:c.2366G>T' in results.keys() + assert results['NM_139318.4:c.2366G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_139318.4:c.2366G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_139318.4:c.2366G>T']['alt_genomic_loci'] == [] + assert results['NM_139318.4:c.2366G>T']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel subfamily H member 5 (KCNH5), transcript variant 1, mRNA' + assert results['NM_139318.4:c.2366G>T']['gene_symbol'] == 'KCNH5' + assert results['NM_139318.4:c.2366G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_647479.2:p.(Gly789Val)', 'slr': 'NP_647479.2:p.(G789V)'} + assert results['NM_139318.4:c.2366G>T']['submitted_variant'] == '14-63174827-C-A' + assert results['NM_139318.4:c.2366G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_139318.4:c.2366G>T']['hgvs_lrg_variant'] == '' + assert results['NM_139318.4:c.2366G>T']['hgvs_transcript_variant'] == 'NM_139318.4:c.2366G>T' + assert results['NM_139318.4:c.2366G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '62708109', 'alt': u'A'}} + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '62708109', 'alt': u'A'}} + assert results['NM_139318.4:c.2366G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.4'} + + + def test_variant227(self): + variant = '15-42680000-CA-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000070.2:c.550del' in results.keys() + assert results['NM_000070.2:c.550del']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.550del' + assert results['NM_000070.2:c.550del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.550del']['alt_genomic_loci'] == [] + assert results['NM_000070.2:c.550del']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA' + assert results['NM_000070.2:c.550del']['gene_symbol'] == 'CAPN3' + assert results['NM_000070.2:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Thr184ArgfsTer36)', 'slr': 'NP_000061.1:p.(T184Rfs*36)'} + assert results['NM_000070.2:c.550del']['submitted_variant'] == '15-42680000-CA-C' + assert results['NM_000070.2:c.550del']['genome_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.550del']['hgvs_lrg_variant'] == 'LRG_849:g.44702del' + assert results['NM_000070.2:c.550del']['hgvs_transcript_variant'] == 'NM_000070.2:c.550del' + assert results['NM_000070.2:c.550del']['hgvs_refseqgene_variant'] == 'NG_008660.1:g.44702del' + assert results['NM_000070.2:c.550del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} + assert results['NM_000070.2:c.550del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} + assert results['NM_000070.2:c.550del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} + assert results['NM_000070.2:c.550del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} + assert results['NM_000070.2:c.550del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_024344.1:c.550del' in results.keys() + assert results['NM_024344.1:c.550del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024344.1:c.550del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.550del']['alt_genomic_loci'] == [] + assert results['NM_024344.1:c.550del']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA' + assert results['NM_024344.1:c.550del']['gene_symbol'] == 'CAPN3' + assert results['NM_024344.1:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Thr184ArgfsTer36)', 'slr': 'NP_077320.1:p.(T184Rfs*36)'} + assert results['NM_024344.1:c.550del']['submitted_variant'] == '15-42680000-CA-C' + assert results['NM_024344.1:c.550del']['genome_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.550del']['hgvs_lrg_variant'] == '' + assert results['NM_024344.1:c.550del']['hgvs_transcript_variant'] == 'NM_024344.1:c.550del' + assert results['NM_024344.1:c.550del']['hgvs_refseqgene_variant'] == '' + assert results['NM_024344.1:c.550del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} + assert results['NM_024344.1:c.550del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} + assert results['NM_024344.1:c.550del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} + assert results['NM_024344.1:c.550del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} + assert results['NM_024344.1:c.550del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1'} + + assert 'NM_173087.1:c.550del' in results.keys() + assert results['NM_173087.1:c.550del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173087.1:c.550del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.550del']['alt_genomic_loci'] == [] + assert results['NM_173087.1:c.550del']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA' + assert results['NM_173087.1:c.550del']['gene_symbol'] == 'CAPN3' + assert results['NM_173087.1:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Thr184ArgfsTer36)', 'slr': 'NP_775110.1:p.(T184Rfs*36)'} + assert results['NM_173087.1:c.550del']['submitted_variant'] == '15-42680000-CA-C' + assert results['NM_173087.1:c.550del']['genome_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.550del']['hgvs_lrg_variant'] == '' + assert results['NM_173087.1:c.550del']['hgvs_transcript_variant'] == 'NM_173087.1:c.550del' + assert results['NM_173087.1:c.550del']['hgvs_refseqgene_variant'] == '' + assert results['NM_173087.1:c.550del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} + assert results['NM_173087.1:c.550del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} + assert results['NM_173087.1:c.550del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} + assert results['NM_173087.1:c.550del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} + assert results['NM_173087.1:c.550del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1'} + + + def test_variant228(self): + variant = '15-42680000-CA-CAA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_024344.1:c.550dup' in results.keys() + assert results['NM_024344.1:c.550dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024344.1:c.550dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.550dup']['alt_genomic_loci'] == [] + assert results['NM_024344.1:c.550dup']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA' + assert results['NM_024344.1:c.550dup']['gene_symbol'] == 'CAPN3' + assert results['NM_024344.1:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Thr184AsnfsTer16)', 'slr': 'NP_077320.1:p.(T184Nfs*16)'} + assert results['NM_024344.1:c.550dup']['submitted_variant'] == '15-42680000-CA-CAA' + assert results['NM_024344.1:c.550dup']['genome_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.550dup']['hgvs_lrg_variant'] == '' + assert results['NM_024344.1:c.550dup']['hgvs_transcript_variant'] == 'NM_024344.1:c.550dup' + assert results['NM_024344.1:c.550dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} + assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} + assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} + assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} + assert results['NM_024344.1:c.550dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1'} + + assert 'NM_173087.1:c.550dup' in results.keys() + assert results['NM_173087.1:c.550dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173087.1:c.550dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.550dup']['alt_genomic_loci'] == [] + assert results['NM_173087.1:c.550dup']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA' + assert results['NM_173087.1:c.550dup']['gene_symbol'] == 'CAPN3' + assert results['NM_173087.1:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Thr184AsnfsTer16)', 'slr': 'NP_775110.1:p.(T184Nfs*16)'} + assert results['NM_173087.1:c.550dup']['submitted_variant'] == '15-42680000-CA-CAA' + assert results['NM_173087.1:c.550dup']['genome_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.550dup']['hgvs_lrg_variant'] == '' + assert results['NM_173087.1:c.550dup']['hgvs_transcript_variant'] == 'NM_173087.1:c.550dup' + assert results['NM_173087.1:c.550dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} + assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} + assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} + assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} + assert results['NM_173087.1:c.550dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000070.2:c.550dup' in results.keys() + assert results['NM_000070.2:c.550dup']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.550dup' + assert results['NM_000070.2:c.550dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.550dup']['alt_genomic_loci'] == [] + assert results['NM_000070.2:c.550dup']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA' + assert results['NM_000070.2:c.550dup']['gene_symbol'] == 'CAPN3' + assert results['NM_000070.2:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Thr184AsnfsTer16)', 'slr': 'NP_000061.1:p.(T184Nfs*16)'} + assert results['NM_000070.2:c.550dup']['submitted_variant'] == '15-42680000-CA-CAA' + assert results['NM_000070.2:c.550dup']['genome_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.550dup']['hgvs_lrg_variant'] == 'LRG_849:g.44702dup' + assert results['NM_000070.2:c.550dup']['hgvs_transcript_variant'] == 'NM_000070.2:c.550dup' + assert results['NM_000070.2:c.550dup']['hgvs_refseqgene_variant'] == 'NG_008660.1:g.44702dup' + assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} + assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} + assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} + assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} + assert results['NM_000070.2:c.550dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} + + + def test_variant229(self): + variant = '15-42703179-T-TTCA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_173088.1:c.825_826insTCA' in results.keys() + assert results['NM_173088.1:c.825_826insTCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173088.1:c.825_826insTCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173088.1:c.825_826insTCA']['alt_genomic_loci'] == [] + assert results['NM_173088.1:c.825_826insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 4, mRNA' + assert results['NM_173088.1:c.825_826insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_173088.1:c.825_826insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775111.1:p.(Val275_Arg276insSer)', 'slr': 'NP_775111.1:p.(V275_R276insS)'} + assert results['NM_173088.1:c.825_826insTCA']['submitted_variant'] == '15-42703179-T-TTCA' + assert results['NM_173088.1:c.825_826insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_173088.1:c.825_826insTCA']['hgvs_lrg_variant'] == '' + assert results['NM_173088.1:c.825_826insTCA']['hgvs_transcript_variant'] == 'NM_173088.1:c.825_826insTCA' + assert results['NM_173088.1:c.825_826insTCA']['hgvs_refseqgene_variant'] == '' + assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_173088.1:c.825_826insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1'} + + assert 'NM_173090.1:c.366_367insTCA' in results.keys() + assert results['NM_173090.1:c.366_367insTCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173090.1:c.366_367insTCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173090.1:c.366_367insTCA']['alt_genomic_loci'] == [] + assert results['NM_173090.1:c.366_367insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 6, mRNA' + assert results['NM_173090.1:c.366_367insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_173090.1:c.366_367insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775113.1:p.(Val122_Arg123insSer)', 'slr': 'NP_775113.1:p.(V122_R123insS)'} + assert results['NM_173090.1:c.366_367insTCA']['submitted_variant'] == '15-42703179-T-TTCA' + assert results['NM_173090.1:c.366_367insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_173090.1:c.366_367insTCA']['hgvs_lrg_variant'] == '' + assert results['NM_173090.1:c.366_367insTCA']['hgvs_transcript_variant'] == 'NM_173090.1:c.366_367insTCA' + assert results['NM_173090.1:c.366_367insTCA']['hgvs_refseqgene_variant'] == '' + assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_173090.1:c.366_367insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1'} + + assert 'NM_173089.1:c.366_367insTCA' in results.keys() + assert results['NM_173089.1:c.366_367insTCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173089.1:c.366_367insTCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173089.1:c.366_367insTCA']['alt_genomic_loci'] == [] + assert results['NM_173089.1:c.366_367insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 5, mRNA' + assert results['NM_173089.1:c.366_367insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_173089.1:c.366_367insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775112.1:p.(Val122_Arg123insSer)', 'slr': 'NP_775112.1:p.(V122_R123insS)'} + assert results['NM_173089.1:c.366_367insTCA']['submitted_variant'] == '15-42703179-T-TTCA' + assert results['NM_173089.1:c.366_367insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_173089.1:c.366_367insTCA']['hgvs_lrg_variant'] == '' + assert results['NM_173089.1:c.366_367insTCA']['hgvs_transcript_variant'] == 'NM_173089.1:c.366_367insTCA' + assert results['NM_173089.1:c.366_367insTCA']['hgvs_refseqgene_variant'] == '' + assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_173089.1:c.366_367insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1'} + + assert 'NM_173087.1:c.2085_2086insTCA' in results.keys() + assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173087.1:c.2085_2086insTCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.2085_2086insTCA']['alt_genomic_loci'] == [] + assert results['NM_173087.1:c.2085_2086insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA' + assert results['NM_173087.1:c.2085_2086insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Val695_Arg696insSer)', 'slr': 'NP_775110.1:p.(V695_R696insS)'} + assert results['NM_173087.1:c.2085_2086insTCA']['submitted_variant'] == '15-42703179-T-TTCA' + assert results['NM_173087.1:c.2085_2086insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_lrg_variant'] == '' + assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_transcript_variant'] == 'NM_173087.1:c.2085_2086insTCA' + assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_refseqgene_variant'] == '' + assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_173087.1:c.2085_2086insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000070.2:c.2361_2362insTCA' in results.keys() + assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.2361_2362insTCA' + assert results['NM_000070.2:c.2361_2362insTCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.2361_2362insTCA']['alt_genomic_loci'] == [] + assert results['NM_000070.2:c.2361_2362insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA' + assert results['NM_000070.2:c.2361_2362insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Val787_Arg788insSer)', 'slr': 'NP_000061.1:p.(V787_R788insS)'} + assert results['NM_000070.2:c.2361_2362insTCA']['submitted_variant'] == '15-42703179-T-TTCA' + assert results['NM_000070.2:c.2361_2362insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_lrg_variant'] == 'LRG_849:g.67879_67880insTCA' + assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_transcript_variant'] == 'NM_000070.2:c.2361_2362insTCA' + assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_refseqgene_variant'] == 'NG_008660.1:g.67879_67880insTCA' + assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_000070.2:c.2361_2362insTCA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} + + assert 'NM_024344.1:c.2343_2344insTCA' in results.keys() + assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024344.1:c.2343_2344insTCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.2343_2344insTCA']['alt_genomic_loci'] == [] + assert results['NM_024344.1:c.2343_2344insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA' + assert results['NM_024344.1:c.2343_2344insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Val781_Arg782insSer)', 'slr': 'NP_077320.1:p.(V781_R782insS)'} + assert results['NM_024344.1:c.2343_2344insTCA']['submitted_variant'] == '15-42703179-T-TTCA' + assert results['NM_024344.1:c.2343_2344insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_lrg_variant'] == '' + assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_transcript_variant'] == 'NM_024344.1:c.2343_2344insTCA' + assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_refseqgene_variant'] == '' + assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} + assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} + assert results['NM_024344.1:c.2343_2344insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1'} + + + def test_variant230(self): + variant = '15-42703179-TAG-TTCATCT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_024344.1:c.2344_2345delinsTCATCT' in results.keys() + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['alt_genomic_loci'] == [] + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Arg782SerfsTer14)', 'slr': 'NP_077320.1:p.(R782Sfs*14)'} + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_lrg_variant'] == '' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_transcript_variant'] == 'NM_024344.1:c.2344_2345delinsTCATCT' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_refseqgene_variant'] == '' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1'} + + assert 'NM_173090.1:c.367_368delinsTCATCT' in results.keys() + assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173090.1:c.367_368delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173090.1:c.367_368delinsTCATCT']['alt_genomic_loci'] == [] + assert results['NM_173090.1:c.367_368delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 6, mRNA' + assert results['NM_173090.1:c.367_368delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775113.1:p.(Arg123SerfsTer14)', 'slr': 'NP_775113.1:p.(R123Sfs*14)'} + assert results['NM_173090.1:c.367_368delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' + assert results['NM_173090.1:c.367_368delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_lrg_variant'] == '' + assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_transcript_variant'] == 'NM_173090.1:c.367_368delinsTCATCT' + assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_refseqgene_variant'] == '' + assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_173090.1:c.367_368delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000070.2:c.2362_2363delinsTCATCT' in results.keys() + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.2362_2363delinsTCATCT' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['alt_genomic_loci'] == [] + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Arg788SerfsTer14)', 'slr': 'NP_000061.1:p.(R788Sfs*14)'} + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_lrg_variant'] == 'LRG_849:g.67880_67881delinsTCATCT' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_transcript_variant'] == 'NM_000070.2:c.2362_2363delinsTCATCT' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_refseqgene_variant'] == 'NG_008660.1:g.67880_67881delinsTCATCT' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} + + assert 'NM_173088.1:c.826_827delinsTCATCT' in results.keys() + assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173088.1:c.826_827delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173088.1:c.826_827delinsTCATCT']['alt_genomic_loci'] == [] + assert results['NM_173088.1:c.826_827delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 4, mRNA' + assert results['NM_173088.1:c.826_827delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775111.1:p.(Arg276SerfsTer14)', 'slr': 'NP_775111.1:p.(R276Sfs*14)'} + assert results['NM_173088.1:c.826_827delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' + assert results['NM_173088.1:c.826_827delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_lrg_variant'] == '' + assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_transcript_variant'] == 'NM_173088.1:c.826_827delinsTCATCT' + assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_refseqgene_variant'] == '' + assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_173088.1:c.826_827delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1'} + + assert 'NM_173089.1:c.367_368delinsTCATCT' in results.keys() + assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173089.1:c.367_368delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173089.1:c.367_368delinsTCATCT']['alt_genomic_loci'] == [] + assert results['NM_173089.1:c.367_368delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 5, mRNA' + assert results['NM_173089.1:c.367_368delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775112.1:p.(Arg123SerfsTer14)', 'slr': 'NP_775112.1:p.(R123Sfs*14)'} + assert results['NM_173089.1:c.367_368delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' + assert results['NM_173089.1:c.367_368delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_lrg_variant'] == '' + assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_transcript_variant'] == 'NM_173089.1:c.367_368delinsTCATCT' + assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_refseqgene_variant'] == '' + assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_173089.1:c.367_368delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1'} + + assert 'NM_173087.1:c.2086_2087delinsTCATCT' in results.keys() + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['alt_genomic_loci'] == [] + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Arg696SerfsTer14)', 'slr': 'NP_775110.1:p.(R696Sfs*14)'} + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_lrg_variant'] == '' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_transcript_variant'] == 'NM_173087.1:c.2086_2087delinsTCATCT' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_refseqgene_variant'] == '' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1'} + + + def test_variant231(self): + variant = '15-48782203-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000138.4:c.2927G>A' in results.keys() + assert results['NM_000138.4:c.2927G>A']['hgvs_lrg_transcript_variant'] == 'LRG_778t1:c.2927G>A' + assert results['NM_000138.4:c.2927G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000138.4:c.2927G>A']['alt_genomic_loci'] == [] + assert results['NM_000138.4:c.2927G>A']['transcript_description'] == 'Homo sapiens fibrillin 1 (FBN1), mRNA' + assert results['NM_000138.4:c.2927G>A']['gene_symbol'] == 'FBN1' + assert results['NM_000138.4:c.2927G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000129.3(LRG_778p1):p.(Arg976His)', 'slr': 'NP_000129.3:p.(R976H)'} + assert results['NM_000138.4:c.2927G>A']['submitted_variant'] == '15-48782203-C-T' + assert results['NM_000138.4:c.2927G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_000138.4:c.2927G>A']['hgvs_lrg_variant'] == 'LRG_778:g.160783G>A' + assert results['NM_000138.4:c.2927G>A']['hgvs_transcript_variant'] == 'NM_000138.4:c.2927G>A' + assert results['NM_000138.4:c.2927G>A']['hgvs_refseqgene_variant'] == 'NG_008805.2:g.160783G>A' + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.48782203C>T', 'vcf': {'chr': 'chr15', 'ref': u'C', 'pos': '48782203', 'alt': u'T'}} + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.48490006C>T', 'vcf': {'chr': 'chr15', 'ref': u'C', 'pos': '48490006', 'alt': u'T'}} + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.48782203C>T', 'vcf': {'chr': '15', 'ref': u'C', 'pos': '48782203', 'alt': u'T'}} + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.48490006C>T', 'vcf': {'chr': '15', 'ref': u'C', 'pos': '48490006', 'alt': u'T'}} + assert results['NM_000138.4:c.2927G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008805.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000129.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000138.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_778.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant232(self): + variant = '15-72105929-CC-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_014249.2:c.946_949=' in results.keys() + assert results['NM_014249.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.946_949=']['alt_genomic_loci'] == [] + assert results['NM_014249.2:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.2:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} + assert results['NM_014249.2:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' + assert results['NM_014249.2:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.2:c.946_949=' + assert results['NM_014249.2:c.946_949=']['hgvs_refseqgene_variant'] == '' + assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert 'hg38' not in results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert 'grch38' not in results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys() + assert results['NM_014249.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + + assert 'NM_016346.3:c.946_949=' in results.keys() + assert results['NM_016346.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.946_949=']['alt_genomic_loci'] == [] + assert results['NM_016346.3:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.3:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} + assert results['NM_016346.3:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' + assert results['NM_016346.3:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.3:c.946_949=' + assert results['NM_016346.3:c.946_949=']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': 'chr15', 'ref': u'GACC', 'pos': '71813587', 'alt': u'GACC'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': '15', 'ref': u'GACC', 'pos': '71813587', 'alt': u'GACC'}} + assert results['NM_016346.3:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_014249.3:c.946_949=' in results.keys() + assert results['NM_014249.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.946_949=']['alt_genomic_loci'] == [] + assert results['NM_014249.3:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + assert results['NM_014249.3:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} + assert results['NM_014249.3:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' + assert results['NM_014249.3:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_014249.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.3:c.946_949=' + assert results['NM_014249.3:c.946_949=']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8034_8037=' + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': 'chr15', 'ref': u'GGACCC', 'pos': '71813586', 'alt': u'GGACCC'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': u'GGACCC', 'pos': '71813586', 'alt': u'GGACCC'}} + assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} + + assert 'NM_016346.2:c.946_949=' in results.keys() + assert results['NM_016346.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.946_949=']['alt_genomic_loci'] == [] + assert results['NM_016346.2:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + assert results['NM_016346.2:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} + assert results['NM_016346.2:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' + assert results['NM_016346.2:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_016346.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.2:c.946_949=' + assert results['NM_016346.2:c.946_949=']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert 'hg38' not in results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert 'grch38' not in results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys() + assert results['NM_016346.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} + + + def test_variant233(self): + variant = '15-89873415-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_002693.2:c.752C>T' in results.keys() + assert results['NM_002693.2:c.752C>T']['hgvs_lrg_transcript_variant'] == 'LRG_765t1:c.752C>T' + assert results['NM_002693.2:c.752C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_002693.2:c.752C>T']['alt_genomic_loci'] == [] + assert results['NM_002693.2:c.752C>T']['transcript_description'] == 'Homo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 1, mRNA' + assert results['NM_002693.2:c.752C>T']['gene_symbol'] == 'POLG' + assert results['NM_002693.2:c.752C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002684.1(LRG_765p1):p.(Thr251Ile)', 'slr': 'NP_002684.1:p.(T251I)'} + assert results['NM_002693.2:c.752C>T']['submitted_variant'] == '15-89873415-G-A' + assert results['NM_002693.2:c.752C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_002693.2:c.752C>T']['hgvs_lrg_variant'] == '' + assert results['NM_002693.2:c.752C>T']['hgvs_transcript_variant'] == 'NM_002693.2:c.752C>T' + assert results['NM_002693.2:c.752C>T']['hgvs_refseqgene_variant'] == 'NG_008218.1:g.9612C>T' + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': 'chr15', 'ref': u'G', 'pos': '89873415', 'alt': u'A'}} + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': 'chr15', 'ref': u'G', 'pos': '89330184', 'alt': u'A'}} + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': '15', 'ref': u'G', 'pos': '89873415', 'alt': u'A'}} + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': '15', 'ref': u'G', 'pos': '89330184', 'alt': u'A'}} + assert results['NM_002693.2:c.752C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008218.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001126131.1:c.752C>T' in results.keys() + assert results['NM_001126131.1:c.752C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001126131.1:c.752C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126131.1:c.752C>T']['alt_genomic_loci'] == [] + assert results['NM_001126131.1:c.752C>T']['transcript_description'] == 'Homo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 2, mRNA' + assert results['NM_001126131.1:c.752C>T']['gene_symbol'] == 'POLG' + assert results['NM_001126131.1:c.752C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119603.1:p.(Thr251Ile)', 'slr': 'NP_001119603.1:p.(T251I)'} + assert results['NM_001126131.1:c.752C>T']['submitted_variant'] == '15-89873415-G-A' + assert results['NM_001126131.1:c.752C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001126131.1:c.752C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001126131.1:c.752C>T']['hgvs_transcript_variant'] == 'NM_001126131.1:c.752C>T' + assert results['NM_001126131.1:c.752C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': 'chr15', 'ref': u'G', 'pos': '89873415', 'alt': u'A'}} + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': 'chr15', 'ref': u'G', 'pos': '89330184', 'alt': u'A'}} + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': '15', 'ref': u'G', 'pos': '89873415', 'alt': u'A'}} + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': '15', 'ref': u'G', 'pos': '89330184', 'alt': u'A'}} + assert results['NM_001126131.1:c.752C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119603.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126131.1'} + + + def test_variant234(self): + variant = '16-2103394-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000548.3:c.277C>T' in results.keys() + assert results['NM_000548.3:c.277C>T']['hgvs_lrg_transcript_variant'] == 'LRG_487t1:c.277C>T' + assert results['NM_000548.3:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000548.3:c.277C>T']['alt_genomic_loci'] == [] + assert results['NM_000548.3:c.277C>T']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 1, mRNA' + assert results['NM_000548.3:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_000548.3:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.(Arg93Trp)', 'slr': 'NP_000539.2:p.(R93W)'} + assert results['NM_000548.3:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_000548.3:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000548.3:c.277C>T']['hgvs_lrg_variant'] == 'LRG_487:g.9088C>T' + assert results['NM_000548.3:c.277C>T']['hgvs_transcript_variant'] == 'NM_000548.3:c.277C>T' + assert results['NM_000548.3:c.277C>T']['hgvs_refseqgene_variant'] == 'NG_005895.1:g.9088C>T' + assert results['NM_000548.3:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert 'hg38' not in results['NM_000548.3:c.277C>T']['primary_assembly_loci'].keys() + assert results['NM_000548.3:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert 'grch38' not in results['NM_000548.3:c.277C>T']['primary_assembly_loci'].keys() + assert results['NM_000548.3:c.277C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005895.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_487.xml'} + + assert 'NM_001318832.1:c.310C>T' in results.keys() + assert results['NM_001318832.1:c.310C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318832.1:c.310C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318832.1:c.310C>T']['alt_genomic_loci'] == [] + assert results['NM_001318832.1:c.310C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA' + assert results['NM_001318832.1:c.310C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001318832.1:c.310C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305761.1:p.(Arg104Trp)', 'slr': 'NP_001305761.1:p.(R104W)'} + assert results['NM_001318832.1:c.310C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001318832.1:c.310C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001318832.1:c.310C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001318832.1:c.310C>T']['hgvs_transcript_variant'] == 'NM_001318832.1:c.310C>T' + assert results['NM_001318832.1:c.310C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001318832.1:c.310C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1'} + + assert 'NM_001318829.1:c.130C>T' in results.keys() + assert results['NM_001318829.1:c.130C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318829.1:c.130C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318829.1:c.130C>T']['alt_genomic_loci'] == [] + assert results['NM_001318829.1:c.130C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA' + assert results['NM_001318829.1:c.130C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001318829.1:c.130C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305758.1:p.(Arg44Trp)', 'slr': 'NP_001305758.1:p.(R44W)'} + assert results['NM_001318829.1:c.130C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001318829.1:c.130C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001318829.1:c.130C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001318829.1:c.130C>T']['hgvs_transcript_variant'] == 'NM_001318829.1:c.130C>T' + assert results['NM_001318829.1:c.130C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001318829.1:c.130C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305758.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318829.1'} + + assert 'NM_001077183.2:c.277C>T' in results.keys() + assert results['NM_001077183.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077183.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001077183.2:c.277C>T']['alt_genomic_loci'] == [] + assert results['NM_001077183.2:c.277C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA' + assert results['NM_001077183.2:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001077183.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.(Arg93Trp)', 'slr': 'NP_001070651.1:p.(R93W)'} + assert results['NM_001077183.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001077183.2:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001077183.2:c.277C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001077183.2:c.277C>T']['hgvs_transcript_variant'] == 'NM_001077183.2:c.277C>T' + assert results['NM_001077183.2:c.277C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001077183.2:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2'} + + assert 'NM_001114382.1:c.277C>T' in results.keys() + assert results['NM_001114382.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001114382.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001114382.1:c.277C>T']['alt_genomic_loci'] == [] + assert results['NM_001114382.1:c.277C>T']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 5, mRNA' + assert results['NM_001114382.1:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001114382.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.(Arg93Trp)', 'slr': 'NP_001107854.1:p.(R93W)'} + assert results['NM_001114382.1:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001114382.1:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001114382.1:c.277C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001114382.1:c.277C>T']['hgvs_transcript_variant'] == 'NM_001114382.1:c.277C>T' + assert results['NM_001114382.1:c.277C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001114382.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert 'hg38' not in results['NM_001114382.1:c.277C>T']['primary_assembly_loci'].keys() + assert results['NM_001114382.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert 'grch38' not in results['NM_001114382.1:c.277C>T']['primary_assembly_loci'].keys() + assert results['NM_001114382.1:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1'} + + assert 'NM_001077183.1:c.277C>T' in results.keys() + assert results['NM_001077183.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077183.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001077183.1:c.277C>T']['alt_genomic_loci'] == [] + assert results['NM_001077183.1:c.277C>T']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 4, mRNA' + assert results['NM_001077183.1:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001077183.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.(Arg93Trp)', 'slr': 'NP_001070651.1:p.(R93W)'} + assert results['NM_001077183.1:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001077183.1:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001077183.1:c.277C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001077183.1:c.277C>T']['hgvs_transcript_variant'] == 'NM_001077183.1:c.277C>T' + assert results['NM_001077183.1:c.277C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001077183.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert 'hg38' not in results['NM_001077183.1:c.277C>T']['primary_assembly_loci'].keys() + assert results['NM_001077183.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert 'grch38' not in results['NM_001077183.1:c.277C>T']['primary_assembly_loci'].keys() + assert results['NM_001077183.1:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1'} + + assert 'NM_001318827.1:c.226-903C>T' in results.keys() + assert results['NM_001318827.1:c.226-903C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318827.1:c.226-903C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318827.1:c.226-903C>T']['alt_genomic_loci'] == [] + assert results['NM_001318827.1:c.226-903C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA' + assert results['NM_001318827.1:c.226-903C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001318827.1:c.226-903C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305756.1:p.?', 'slr': 'NP_001305756.1:p.?'} + assert results['NM_001318827.1:c.226-903C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001318827.1:c.226-903C>T']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318827.1):c.226-903C>T' + assert results['NM_001318827.1:c.226-903C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001318827.1:c.226-903C>T']['hgvs_transcript_variant'] == 'NM_001318827.1:c.226-903C>T' + assert results['NM_001318827.1:c.226-903C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001318827.1:c.226-903C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001114382.2:c.277C>T' in results.keys() + assert results['NM_001114382.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001114382.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001114382.2:c.277C>T']['alt_genomic_loci'] == [] + assert results['NM_001114382.2:c.277C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA' + assert results['NM_001114382.2:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001114382.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.(Arg93Trp)', 'slr': 'NP_001107854.1:p.(R93W)'} + assert results['NM_001114382.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001114382.2:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001114382.2:c.277C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001114382.2:c.277C>T']['hgvs_transcript_variant'] == 'NM_001114382.2:c.277C>T' + assert results['NM_001114382.2:c.277C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001114382.2:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2'} + + assert 'NM_001363528.1:c.277C>T' in results.keys() + assert results['NM_001363528.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363528.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363528.1:c.277C>T']['alt_genomic_loci'] == [] + assert results['NM_001363528.1:c.277C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 10, mRNA' + assert results['NM_001363528.1:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001363528.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350457.1:p.(Arg93Trp)', 'slr': 'NP_001350457.1:p.(R93W)'} + assert results['NM_001363528.1:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001363528.1:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001363528.1:c.277C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001363528.1:c.277C>T']['hgvs_transcript_variant'] == 'NM_001363528.1:c.277C>T' + assert results['NM_001363528.1:c.277C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363528.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert 'hg38' not in results['NM_001363528.1:c.277C>T']['primary_assembly_loci'].keys() + assert results['NM_001363528.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert 'grch38' not in results['NM_001363528.1:c.277C>T']['primary_assembly_loci'].keys() + assert results['NM_001363528.1:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1'} + + assert 'NM_021055.2:c.277C>T' in results.keys() + assert results['NM_021055.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021055.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021055.2:c.277C>T']['alt_genomic_loci'] == [] + assert results['NM_021055.2:c.277C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 2, mRNA' + assert results['NM_021055.2:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_021055.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066399.2:p.(Arg93Trp)', 'slr': 'NP_066399.2:p.(R93W)'} + assert results['NM_021055.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_021055.2:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_021055.2:c.277C>T']['hgvs_lrg_variant'] == '' + assert results['NM_021055.2:c.277C>T']['hgvs_transcript_variant'] == 'NM_021055.2:c.277C>T' + assert results['NM_021055.2:c.277C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_021055.2:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert 'hg38' not in results['NM_021055.2:c.277C>T']['primary_assembly_loci'].keys() + assert results['NM_021055.2:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert 'grch38' not in results['NM_021055.2:c.277C>T']['primary_assembly_loci'].keys() + assert results['NM_021055.2:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2'} + + assert 'NM_000548.4:c.277C>T' in results.keys() + assert results['NM_000548.4:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000548.4:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000548.4:c.277C>T']['alt_genomic_loci'] == [] + assert results['NM_000548.4:c.277C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA' + assert results['NM_000548.4:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_000548.4:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.(Arg93Trp)', 'slr': 'NP_000539.2:p.(R93W)'} + assert results['NM_000548.4:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_000548.4:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000548.4:c.277C>T']['hgvs_lrg_variant'] == '' + assert results['NM_000548.4:c.277C>T']['hgvs_transcript_variant'] == 'NM_000548.4:c.277C>T' + assert results['NM_000548.4:c.277C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_000548.4:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4'} + + assert 'NM_001318831.1:c.-1-2803C>T' in results.keys() + assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318831.1:c.-1-2803C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318831.1:c.-1-2803C>T']['alt_genomic_loci'] == [] + assert results['NM_001318831.1:c.-1-2803C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA' + assert results['NM_001318831.1:c.-1-2803C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305760.1:p.?', 'slr': 'NP_001305760.1:p.?'} + assert results['NM_001318831.1:c.-1-2803C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001318831.1:c.-1-2803C>T']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318831.1):c.-1-2803C>T' + assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_transcript_variant'] == 'NM_001318831.1:c.-1-2803C>T' + assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} + assert results['NM_001318831.1:c.-1-2803C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305760.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318831.1'} + + + def test_variant235(self): + variant = '16-3779300-C-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001079846.1:c.5634G>C' in results.keys() + assert results['NM_001079846.1:c.5634G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001079846.1:c.5634G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001079846.1:c.5634G>C']['alt_genomic_loci'] == [] + assert results['NM_001079846.1:c.5634G>C']['transcript_description'] == 'Homo sapiens CREB binding protein (CREBBP), transcript variant 2, mRNA' + assert results['NM_001079846.1:c.5634G>C']['gene_symbol'] == 'CREBBP' + assert results['NM_001079846.1:c.5634G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073315.1:p.(Met1878Ile)', 'slr': 'NP_001073315.1:p.(M1878I)'} + assert results['NM_001079846.1:c.5634G>C']['submitted_variant'] == '16-3779300-C-G' + assert results['NM_001079846.1:c.5634G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001079846.1:c.5634G>C']['hgvs_lrg_variant'] == '' + assert results['NM_001079846.1:c.5634G>C']['hgvs_transcript_variant'] == 'NM_001079846.1:c.5634G>C' + assert results['NM_001079846.1:c.5634G>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '3779300', 'alt': u'G'}} + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '3729299', 'alt': u'G'}} + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '3779300', 'alt': u'G'}} + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '3729299', 'alt': u'G'}} + assert results['NM_001079846.1:c.5634G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001079846.1'} + + assert 'NM_004380.2:c.5748G>C' in results.keys() + assert results['NM_004380.2:c.5748G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004380.2:c.5748G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004380.2:c.5748G>C']['alt_genomic_loci'] == [] + assert results['NM_004380.2:c.5748G>C']['transcript_description'] == 'Homo sapiens CREB binding protein (CREBBP), transcript variant 1, mRNA' + assert results['NM_004380.2:c.5748G>C']['gene_symbol'] == 'CREBBP' + assert results['NM_004380.2:c.5748G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004371.2:p.(Met1916Ile)', 'slr': 'NP_004371.2:p.(M1916I)'} + assert results['NM_004380.2:c.5748G>C']['submitted_variant'] == '16-3779300-C-G' + assert results['NM_004380.2:c.5748G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004380.2:c.5748G>C']['hgvs_lrg_variant'] == '' + assert results['NM_004380.2:c.5748G>C']['hgvs_transcript_variant'] == 'NM_004380.2:c.5748G>C' + assert results['NM_004380.2:c.5748G>C']['hgvs_refseqgene_variant'] == 'NG_009873.1:g.155822G>C' + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '3779300', 'alt': u'G'}} + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '3729299', 'alt': u'G'}} + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '3779300', 'alt': u'G'}} + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '3729299', 'alt': u'G'}} + assert results['NM_004380.2:c.5748G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009873.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004371.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004380.2'} + + + def test_variant236(self): + variant = '16-5128843-C-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001330504.1:c.493C>G' in results.keys() + assert results['NM_001330504.1:c.493C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330504.1:c.493C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330504.1:c.493C>G']['alt_genomic_loci'] == [] + assert results['NM_001330504.1:c.493C>G']['transcript_description'] == 'Homo sapiens ALG1, chitobiosyldiphosphodolichol beta-mannosyltransferase (ALG1), transcript variant 2, mRNA' + assert results['NM_001330504.1:c.493C>G']['gene_symbol'] == 'ALG1' + assert results['NM_001330504.1:c.493C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317433.1:p.(Arg165Gly)', 'slr': 'NP_001317433.1:p.(R165G)'} + assert results['NM_001330504.1:c.493C>G']['submitted_variant'] == '16-5128843-C-G' + assert results['NM_001330504.1:c.493C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330504.1:c.493C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330504.1:c.493C>G']['hgvs_transcript_variant'] == 'NM_001330504.1:c.493C>G' + assert results['NM_001330504.1:c.493C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '5128843', 'alt': 'G'}} + assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '5078842', 'alt': 'G'}} + assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '5128843', 'alt': 'G'}} + assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '5078842', 'alt': 'G'}} + assert results['NM_001330504.1:c.493C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317433.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330504.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_019109.4:c.826C>G' in results.keys() + assert results['NM_019109.4:c.826C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_019109.4:c.826C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_019109.4:c.826C>G']['alt_genomic_loci'] == [] + assert results['NM_019109.4:c.826C>G']['transcript_description'] == 'Homo sapiens ALG1, chitobiosyldiphosphodolichol beta-mannosyltransferase (ALG1), transcript variant 1, mRNA' + assert results['NM_019109.4:c.826C>G']['gene_symbol'] == 'ALG1' + assert results['NM_019109.4:c.826C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061982.3:p.(Arg276Gly)', 'slr': 'NP_061982.3:p.(R276G)'} + assert results['NM_019109.4:c.826C>G']['submitted_variant'] == '16-5128843-C-G' + assert results['NM_019109.4:c.826C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_019109.4:c.826C>G']['hgvs_lrg_variant'] == '' + assert results['NM_019109.4:c.826C>G']['hgvs_transcript_variant'] == 'NM_019109.4:c.826C>G' + assert results['NM_019109.4:c.826C>G']['hgvs_refseqgene_variant'] == 'NG_009202.1:g.12034C>G' + assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '5128843', 'alt': 'G'}} + assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '5078842', 'alt': 'G'}} + assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '5128843', 'alt': 'G'}} + assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '5078842', 'alt': 'G'}} + assert results['NM_019109.4:c.826C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009202.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061982.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_019109.4'} + + + def test_variant237(self): + variant = '16-74808559-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_024306.4:c.95G>A' in results.keys() + assert results['NM_024306.4:c.95G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024306.4:c.95G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_024306.4:c.95G>A']['alt_genomic_loci'] == [] + assert results['NM_024306.4:c.95G>A']['transcript_description'] == 'Homo sapiens fatty acid 2-hydroxylase (FA2H), mRNA' + assert results['NM_024306.4:c.95G>A']['gene_symbol'] == 'FA2H' + assert results['NM_024306.4:c.95G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077282.3:p.(Arg32His)', 'slr': 'NP_077282.3:p.(R32H)'} + assert results['NM_024306.4:c.95G>A']['submitted_variant'] == '16-74808559-C-T' + assert results['NM_024306.4:c.95G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_024306.4:c.95G>A']['hgvs_lrg_variant'] == '' + assert results['NM_024306.4:c.95G>A']['hgvs_transcript_variant'] == 'NM_024306.4:c.95G>A' + assert results['NM_024306.4:c.95G>A']['hgvs_refseqgene_variant'] == 'NG_017070.1:g.5171G>A' + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.74808559C>T', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '74808559', 'alt': u'T'}} + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.74774661C>T', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '74774661', 'alt': u'T'}} + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.74808559C>T', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '74808559', 'alt': u'T'}} + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.74774661C>T', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '74774661', 'alt': u'T'}} + assert results['NM_024306.4:c.95G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017070.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077282.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024306.4'} + + assert results['flag'] == 'gene_variant' + + def test_variant238(self): + variant = '16-89574804-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_003119.3:c.-22C>A' in results.keys() + assert results['NM_003119.3:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.-22C>A']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.-22C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.-22C>A']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.-22C>A']['submitted_variant'] == '16-89574804-C-A' + assert results['NM_003119.3:c.-22C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.-22C>A']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.-22C>A']['hgvs_transcript_variant'] == 'NM_003119.3:c.-22C>A' + assert results['NM_003119.3:c.-22C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} + assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508396', 'alt': 'A'}} + assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} + assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508396', 'alt': 'A'}} + assert results['NM_003119.3:c.-22C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_199367.2:c.-22C>A' in results.keys() + assert results['NM_199367.2:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.-22C>A']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.-22C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.-22C>A']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.2:c.-22C>A']['submitted_variant'] == '16-89574804-C-A' + assert results['NM_199367.2:c.-22C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.-22C>A']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.-22C>A']['hgvs_transcript_variant'] == 'NM_199367.2:c.-22C>A' + assert results['NM_199367.2:c.-22C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} + assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508396', 'alt': 'A'}} + assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} + assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508396', 'alt': 'A'}} + assert results['NM_199367.2:c.-22C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert 'NM_001363850.1:c.-22C>A' in results.keys() + assert results['NM_001363850.1:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.-22C>A']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.-22C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.-22C>A']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.-22C>A']['submitted_variant'] == '16-89574804-C-A' + assert results['NM_001363850.1:c.-22C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.-22C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.-22C>A']['hgvs_transcript_variant'] == 'NM_001363850.1:c.-22C>A' + assert results['NM_001363850.1:c.-22C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.-22C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} + assert 'hg38' not in results['NM_001363850.1:c.-22C>A']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.-22C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} + assert 'grch38' not in results['NM_001363850.1:c.-22C>A']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.-22C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + + def test_variant239(self): + variant = '16-89574826-A-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_003119.2:c.1A>C' in results.keys() + assert results['NM_003119.2:c.1A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.1A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1A>C']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.1A>C']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.1A>C']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Met1?)', 'slr': 'NP_003110.1:p.(M1?)'} + assert results['NM_003119.2:c.1A>C']['submitted_variant'] == '16-89574826-A-C' + assert results['NM_003119.2:c.1A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1A>C']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.1A>C']['hgvs_transcript_variant'] == 'NM_003119.2:c.1A>C' + assert results['NM_003119.2:c.1A>C']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5022A>C' + assert results['NM_003119.2:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert 'hg38' not in results['NM_003119.2:c.1A>C']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert 'grch38' not in results['NM_003119.2:c.1A>C']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.1A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + assert 'NM_199367.1:c.1A>C' in results.keys() + assert results['NM_199367.1:c.1A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.1A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.1A>C']['alt_genomic_loci'] == [] + assert results['NM_199367.1:c.1A>C']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.1:c.1A>C']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Met1?)', 'slr': 'NP_955399.1:p.(M1?)'} + assert results['NM_199367.1:c.1A>C']['submitted_variant'] == '16-89574826-A-C' + assert results['NM_199367.1:c.1A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.1A>C']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.1A>C']['hgvs_transcript_variant'] == 'NM_199367.1:c.1A>C' + assert results['NM_199367.1:c.1A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert 'hg38' not in results['NM_199367.1:c.1A>C']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert 'grch38' not in results['NM_199367.1:c.1A>C']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.1A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + + assert 'NM_001363850.1:c.1A>C' in results.keys() + assert results['NM_001363850.1:c.1A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.1A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1A>C']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.1A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.1A>C']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Met1?)', 'slr': 'NP_001350779.1:p.(M1?)'} + assert results['NM_001363850.1:c.1A>C']['submitted_variant'] == '16-89574826-A-C' + assert results['NM_001363850.1:c.1A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1A>C']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.1A>C']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1A>C' + assert results['NM_001363850.1:c.1A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert 'hg38' not in results['NM_001363850.1:c.1A>C']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert 'grch38' not in results['NM_001363850.1:c.1A>C']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.1A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert 'NM_199367.2:c.1A>C' in results.keys() + assert results['NM_199367.2:c.1A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.1A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.1A>C']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.1A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.1A>C']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Met1?)', 'slr': 'NP_955399.1:p.(M1?)'} + assert results['NM_199367.2:c.1A>C']['submitted_variant'] == '16-89574826-A-C' + assert results['NM_199367.2:c.1A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.1A>C']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.1A>C']['hgvs_transcript_variant'] == 'NM_199367.2:c.1A>C' + assert results['NM_199367.2:c.1A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89508418', 'alt': 'C'}} + assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89508418', 'alt': 'C'}} + assert results['NM_199367.2:c.1A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_003119.3:c.1A>C' in results.keys() + assert results['NM_003119.3:c.1A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.1A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1A>C']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.1A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.1A>C']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Met1?)', 'slr': 'NP_003110.1:p.(M1?)'} + assert results['NM_003119.3:c.1A>C']['submitted_variant'] == '16-89574826-A-C' + assert results['NM_003119.3:c.1A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1A>C']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.1A>C']['hgvs_transcript_variant'] == 'NM_003119.3:c.1A>C' + assert results['NM_003119.3:c.1A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89508418', 'alt': 'C'}} + assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89508418', 'alt': 'C'}} + assert results['NM_003119.3:c.1A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + + def test_variant240(self): + variant = '16-89574914-G-GT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001363850.1:c.90dup' in results.keys() + assert results['NM_001363850.1:c.90dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.90dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.90dup']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.90dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.90dup']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Pro31SerfsTer43)', 'slr': 'NP_001350779.1:p.(P31Sfs*43)'} + assert results['NM_001363850.1:c.90dup']['submitted_variant'] == '16-89574914-G-GT' + assert results['NM_001363850.1:c.90dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.90dup']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.90dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.90dup' + assert results['NM_001363850.1:c.90dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert 'hg38' not in results['NM_001363850.1:c.90dup']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert 'grch38' not in results['NM_001363850.1:c.90dup']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.90dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert 'NM_199367.1:c.90dup' in results.keys() + assert results['NM_199367.1:c.90dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.90dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.90dup']['alt_genomic_loci'] == [] + assert results['NM_199367.1:c.90dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.1:c.90dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Pro31SerfsTer43)', 'slr': 'NP_955399.1:p.(P31Sfs*43)'} + assert results['NM_199367.1:c.90dup']['submitted_variant'] == '16-89574914-G-GT' + assert results['NM_199367.1:c.90dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.90dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.90dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.90dup' + assert results['NM_199367.1:c.90dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert 'hg38' not in results['NM_199367.1:c.90dup']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert 'grch38' not in results['NM_199367.1:c.90dup']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.90dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + + assert 'NM_003119.2:c.90dup' in results.keys() + assert results['NM_003119.2:c.90dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.90dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.90dup']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.90dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.90dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Pro31SerfsTer43)', 'slr': 'NP_003110.1:p.(P31Sfs*43)'} + assert results['NM_003119.2:c.90dup']['submitted_variant'] == '16-89574914-G-GT' + assert results['NM_003119.2:c.90dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.90dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.90dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.90dup' + assert results['NM_003119.2:c.90dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5111dup' + assert results['NM_003119.2:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert 'hg38' not in results['NM_003119.2:c.90dup']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert 'grch38' not in results['NM_003119.2:c.90dup']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.90dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + assert 'NM_199367.2:c.90dup' in results.keys() + assert results['NM_199367.2:c.90dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.90dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.90dup']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.90dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.90dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Pro31SerfsTer43)', 'slr': 'NP_955399.1:p.(P31Sfs*43)'} + assert results['NM_199367.2:c.90dup']['submitted_variant'] == '16-89574914-G-GT' + assert results['NM_199367.2:c.90dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.90dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.90dup']['hgvs_transcript_variant'] == 'NM_199367.2:c.90dup' + assert results['NM_199367.2:c.90dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89508506', 'alt': 'GT'}} + assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89508506', 'alt': 'GT'}} + assert results['NM_199367.2:c.90dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_003119.3:c.90dup' in results.keys() + assert results['NM_003119.3:c.90dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.90dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.90dup']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.90dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.90dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Pro31SerfsTer43)', 'slr': 'NP_003110.1:p.(P31Sfs*43)'} + assert results['NM_003119.3:c.90dup']['submitted_variant'] == '16-89574914-G-GT' + assert results['NM_003119.3:c.90dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.90dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.90dup']['hgvs_transcript_variant'] == 'NM_003119.3:c.90dup' + assert results['NM_003119.3:c.90dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89508506', 'alt': 'GT'}} + assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89508506', 'alt': 'GT'}} + assert results['NM_003119.3:c.90dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + + def test_variant241(self): + variant = '16-89574916-C-CGTC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_199367.2:c.89_91dup' in results.keys() + assert results['NM_199367.2:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.89_91dup']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.89_91dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.89_91dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_955399.1:p.(S30_P31insR)'} + assert results['NM_199367.2:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' + assert results['NM_199367.2:c.89_91dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.89_91dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.89_91dup']['hgvs_transcript_variant'] == 'NM_199367.2:c.89_91dup' + assert results['NM_199367.2:c.89_91dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89508505', 'alt': 'AGTC'}} + assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89508505', 'alt': 'AGTC'}} + assert results['NM_199367.2:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert 'NM_003119.3:c.89_91dup' in results.keys() + assert results['NM_003119.3:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.89_91dup']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.89_91dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.89_91dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_003110.1:p.(S30_P31insR)'} + assert results['NM_003119.3:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' + assert results['NM_003119.3:c.89_91dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.89_91dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.89_91dup']['hgvs_transcript_variant'] == 'NM_003119.3:c.89_91dup' + assert results['NM_003119.3:c.89_91dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89508505', 'alt': 'AGTC'}} + assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89508505', 'alt': 'AGTC'}} + assert results['NM_003119.3:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + assert 'NM_001363850.1:c.89_91dup' in results.keys() + assert results['NM_001363850.1:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.89_91dup']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.89_91dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.89_91dup']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_001350779.1:p.(S30_P31insR)'} + assert results['NM_001363850.1:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' + assert results['NM_001363850.1:c.89_91dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.89_91dup']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.89_91dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.89_91dup' + assert results['NM_001363850.1:c.89_91dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert 'hg38' not in results['NM_001363850.1:c.89_91dup']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert 'grch38' not in results['NM_001363850.1:c.89_91dup']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_199367.1:c.89_91dup' in results.keys() + assert results['NM_199367.1:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.89_91dup']['alt_genomic_loci'] == [] + assert results['NM_199367.1:c.89_91dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.1:c.89_91dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_955399.1:p.(S30_P31insR)'} + assert results['NM_199367.1:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' + assert results['NM_199367.1:c.89_91dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.89_91dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.89_91dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.89_91dup' + assert results['NM_199367.1:c.89_91dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert 'hg38' not in results['NM_199367.1:c.89_91dup']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert 'grch38' not in results['NM_199367.1:c.89_91dup']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + + assert 'NM_003119.2:c.89_91dup' in results.keys() + assert results['NM_003119.2:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.89_91dup']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.89_91dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.89_91dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_003110.1:p.(S30_P31insR)'} + assert results['NM_003119.2:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' + assert results['NM_003119.2:c.89_91dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.89_91dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.89_91dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.89_91dup' + assert results['NM_003119.2:c.89_91dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5110_5112dup' + assert results['NM_003119.2:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert 'hg38' not in results['NM_003119.2:c.89_91dup']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert 'grch38' not in results['NM_003119.2:c.89_91dup']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.89_91dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + + def test_variant242(self): + variant = '16-89575009-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_199367.2:c.183+1G>A' in results.keys() + assert results['NM_199367.2:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.183+1G>A']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.183+1G>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.183+1G>A']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.2:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' + assert results['NM_199367.2:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.183+1G>A' + assert results['NM_199367.2:c.183+1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_199367.2:c.183+1G>A' + assert results['NM_199367.2:c.183+1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89508601', 'alt': 'A'}} + assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89508601', 'alt': 'A'}} + assert results['NM_199367.2:c.183+1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert 'NM_003119.2:c.183+1G>A' in results.keys() + assert results['NM_003119.2:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.183+1G>A']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+1G>A' + assert results['NM_003119.2:c.183+1G>A']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.183+1G>A']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.183+1G>A']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.2:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' + assert results['NM_003119.2:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.183+1G>A' + assert results['NM_003119.2:c.183+1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_003119.2:c.183+1G>A' + assert results['NM_003119.2:c.183+1G>A']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5205G>A' + assert results['NM_003119.2:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert 'hg38' not in results['NM_003119.2:c.183+1G>A']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert 'grch38' not in results['NM_003119.2:c.183+1G>A']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.183+1G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_199367.1:c.183+1G>A' in results.keys() + assert results['NM_199367.1:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.183+1G>A']['alt_genomic_loci'] == [] + assert results['NM_199367.1:c.183+1G>A']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.1:c.183+1G>A']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.1:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' + assert results['NM_199367.1:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.183+1G>A' + assert results['NM_199367.1:c.183+1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+1G>A' + assert results['NM_199367.1:c.183+1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert 'hg38' not in results['NM_199367.1:c.183+1G>A']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert 'grch38' not in results['NM_199367.1:c.183+1G>A']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.183+1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + + assert 'NM_001363850.1:c.183+1G>A' in results.keys() + assert results['NM_001363850.1:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.183+1G>A']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.183+1G>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.183+1G>A']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' + assert results['NM_001363850.1:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.183+1G>A' + assert results['NM_001363850.1:c.183+1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_001363850.1:c.183+1G>A' + assert results['NM_001363850.1:c.183+1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert 'hg38' not in results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert 'grch38' not in results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.183+1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert 'NM_003119.3:c.183+1G>A' in results.keys() + assert results['NM_003119.3:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.183+1G>A']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.183+1G>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.183+1G>A']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' + assert results['NM_003119.3:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.183+1G>A' + assert results['NM_003119.3:c.183+1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_003119.3:c.183+1G>A' + assert results['NM_003119.3:c.183+1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89508601', 'alt': 'A'}} + assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89508601', 'alt': 'A'}} + assert results['NM_003119.3:c.183+1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + + def test_variant243(self): + variant = '16-89575040-C-A,CA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_199367.1:c.183+32_183+33insA' in results.keys() + assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.183+32_183+33insA']['alt_genomic_loci'] == [] + assert results['NM_199367.1:c.183+32_183+33insA']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.1:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.1:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_199367.1:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.183+32_183+33insA' + assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+32_183+33insA' + assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert 'hg38' not in results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert 'grch38' not in results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + + assert 'NM_001363850.1:c.183+32C>A' in results.keys() + assert results['NM_001363850.1:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.183+32C>A']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.183+32C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.183+32C>A']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_001363850.1:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.183+32C>A' + assert results['NM_001363850.1:c.183+32C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_001363850.1:c.183+32C>A' + assert results['NM_001363850.1:c.183+32C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert 'hg38' not in results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert 'grch38' not in results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert 'NM_001363850.1:c.183+32_183+33insA' in results.keys() + assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.183+32_183+33insA']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.183+32_183+33insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_001363850.1:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.183+32_183+33insA' + assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_001363850.1:c.183+32_183+33insA' + assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert 'hg38' not in results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert 'grch38' not in results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert 'NM_199367.2:c.183+32C>A' in results.keys() + assert results['NM_199367.2:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.183+32C>A']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.183+32C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.183+32C>A']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.2:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_199367.2:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.183+32C>A' + assert results['NM_199367.2:c.183+32C>A']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_199367.2:c.183+32C>A' + assert results['NM_199367.2:c.183+32C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508632', 'alt': 'A'}} + assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508632', 'alt': 'A'}} + assert results['NM_199367.2:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert 'NM_003119.3:c.183+32_183+33insA' in results.keys() + assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.183+32_183+33insA']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.183+32_183+33insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_003119.3:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.183+32_183+33insA' + assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_003119.3:c.183+32_183+33insA' + assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508632', 'alt': 'CA'}} + assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508632', 'alt': 'CA'}} + assert results['NM_003119.3:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_003119.2:c.183+32_183+33insA' in results.keys() + assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+32_183+33insA' + assert results['NM_003119.2:c.183+32_183+33insA']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.183+32_183+33insA']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.2:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_003119.2:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.183+32_183+33insA' + assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_003119.2:c.183+32_183+33insA' + assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5236_5237insA' + assert results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert 'hg38' not in results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert 'grch38' not in results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.183+32_183+33insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + assert 'NM_199367.1:c.183+32C>A' in results.keys() + assert results['NM_199367.1:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.183+32C>A']['alt_genomic_loci'] == [] + assert results['NM_199367.1:c.183+32C>A']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.1:c.183+32C>A']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.1:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_199367.1:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.183+32C>A' + assert results['NM_199367.1:c.183+32C>A']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+32C>A' + assert results['NM_199367.1:c.183+32C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert 'hg38' not in results['NM_199367.1:c.183+32C>A']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert 'grch38' not in results['NM_199367.1:c.183+32C>A']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + + assert 'NM_003119.3:c.183+32C>A' in results.keys() + assert results['NM_003119.3:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.183+32C>A']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.183+32C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.183+32C>A']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_003119.3:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.183+32C>A' + assert results['NM_003119.3:c.183+32C>A']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_003119.3:c.183+32C>A' + assert results['NM_003119.3:c.183+32C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508632', 'alt': 'A'}} + assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508632', 'alt': 'A'}} + assert results['NM_003119.3:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + assert 'NM_199367.2:c.183+32_183+33insA' in results.keys() + assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.183+32_183+33insA']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.183+32_183+33insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.2:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_199367.2:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.183+32_183+33insA' + assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_199367.2:c.183+32_183+33insA' + assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508632', 'alt': 'CA'}} + assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508632', 'alt': 'CA'}} + assert results['NM_199367.2:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert 'NM_003119.2:c.183+32C>A' in results.keys() + assert results['NM_003119.2:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.183+32C>A']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+32C>A' + assert results['NM_003119.2:c.183+32C>A']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.183+32C>A']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.183+32C>A']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.2:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_003119.2:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.183+32C>A' + assert results['NM_003119.2:c.183+32C>A']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_003119.2:c.183+32C>A' + assert results['NM_003119.2:c.183+32C>A']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5236C>A' + assert results['NM_003119.2:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert 'hg38' not in results['NM_003119.2:c.183+32C>A']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert 'grch38' not in results['NM_003119.2:c.183+32C>A']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.183+32C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + + def test_variant244(self): + variant = '16-89576896-A-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_199367.2:c.184-2A>C' in results.keys() + assert results['NM_199367.2:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.184-2A>C']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.184-2A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.184-2A>C']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.2:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' + assert results['NM_199367.2:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.184-2A>C' + assert results['NM_199367.2:c.184-2A>C']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_199367.2:c.184-2A>C' + assert results['NM_199367.2:c.184-2A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89510488', 'alt': 'C'}} + assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89510488', 'alt': 'C'}} + assert results['NM_199367.2:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert 'NM_003119.2:c.184-2A>C' in results.keys() + assert results['NM_003119.2:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.184-2A>C']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.184-2A>C' + assert results['NM_003119.2:c.184-2A>C']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.184-2A>C']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.184-2A>C']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.2:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' + assert results['NM_003119.2:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.184-2A>C' + assert results['NM_003119.2:c.184-2A>C']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_003119.2:c.184-2A>C' + assert results['NM_003119.2:c.184-2A>C']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7092A>C' + assert results['NM_003119.2:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert 'hg38' not in results['NM_003119.2:c.184-2A>C']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert 'grch38' not in results['NM_003119.2:c.184-2A>C']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.184-2A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + assert 'NM_003119.3:c.184-2A>C' in results.keys() + assert results['NM_003119.3:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.184-2A>C']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.184-2A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.184-2A>C']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' + assert results['NM_003119.3:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.184-2A>C' + assert results['NM_003119.3:c.184-2A>C']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_003119.3:c.184-2A>C' + assert results['NM_003119.3:c.184-2A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89510488', 'alt': 'C'}} + assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89510488', 'alt': 'C'}} + assert results['NM_003119.3:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + assert 'NM_001363850.1:c.184-2A>C' in results.keys() + assert results['NM_001363850.1:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.184-2A>C']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.184-2A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.184-2A>C']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' + assert results['NM_001363850.1:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.184-2A>C' + assert results['NM_001363850.1:c.184-2A>C']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_001363850.1:c.184-2A>C' + assert results['NM_001363850.1:c.184-2A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert 'hg38' not in results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert 'grch38' not in results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_199367.1:c.184-2A>C' in results.keys() + assert results['NM_199367.1:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.184-2A>C']['alt_genomic_loci'] == [] + assert results['NM_199367.1:c.184-2A>C']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.1:c.184-2A>C']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.1:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' + assert results['NM_199367.1:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.184-2A>C' + assert results['NM_199367.1:c.184-2A>C']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_199367.1:c.184-2A>C' + assert results['NM_199367.1:c.184-2A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert 'hg38' not in results['NM_199367.1:c.184-2A>C']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert 'grch38' not in results['NM_199367.1:c.184-2A>C']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + + + def test_variant245(self): + variant = '16-89576930-T-TA,TT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_003119.3:c.216dup' in results.keys() + assert results['NM_003119.3:c.216dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.216dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.216dup']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.216dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.216dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73Ter)', 'slr': 'NP_003110.1:p.(E73*)'} + assert results['NM_003119.3:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_003119.3:c.216dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.216dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.216dup']['hgvs_transcript_variant'] == 'NM_003119.3:c.216dup' + assert results['NM_003119.3:c.216dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89510519', 'alt': 'CT'}} + assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89510519', 'alt': 'CT'}} + assert results['NM_003119.3:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + assert 'NM_003119.2:c.216_217insA' in results.keys() + assert results['NM_003119.2:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.216_217insA']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.216_217insA']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.216_217insA']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_003110.1:p.(E73Rfs*30)'} + assert results['NM_003119.2:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_003119.2:c.216_217insA']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.216_217insA']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.216_217insA']['hgvs_transcript_variant'] == 'NM_003119.2:c.216_217insA' + assert results['NM_003119.2:c.216_217insA']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7126_7127insA' + assert results['NM_003119.2:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert 'hg38' not in results['NM_003119.2:c.216_217insA']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert 'grch38' not in results['NM_003119.2:c.216_217insA']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.216_217insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + assert 'NM_199367.2:c.216dup' in results.keys() + assert results['NM_199367.2:c.216dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.216dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.216dup']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.216dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.216dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73Ter)', 'slr': 'NP_955399.1:p.(E73*)'} + assert results['NM_199367.2:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_199367.2:c.216dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.216dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.216dup']['hgvs_transcript_variant'] == 'NM_199367.2:c.216dup' + assert results['NM_199367.2:c.216dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89510519', 'alt': 'CT'}} + assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89510519', 'alt': 'CT'}} + assert results['NM_199367.2:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert 'NM_199367.2:c.216_217insA' in results.keys() + assert results['NM_199367.2:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.216_217insA']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.216_217insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.216_217insA']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_955399.1:p.(E73Rfs*30)'} + assert results['NM_199367.2:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_199367.2:c.216_217insA']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.216_217insA']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.216_217insA']['hgvs_transcript_variant'] == 'NM_199367.2:c.216_217insA' + assert results['NM_199367.2:c.216_217insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89510522', 'alt': 'TA'}} + assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510522', 'alt': 'TA'}} + assert results['NM_199367.2:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert 'NM_001363850.1:c.216dup' in results.keys() + assert results['NM_001363850.1:c.216dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.216dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.216dup']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.216dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.216dup']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73Ter)', 'slr': 'NP_001350779.1:p.(E73*)'} + assert results['NM_001363850.1:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_001363850.1:c.216dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.216dup']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.216dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.216dup' + assert results['NM_001363850.1:c.216dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert 'hg38' not in results['NM_001363850.1:c.216dup']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert 'grch38' not in results['NM_001363850.1:c.216dup']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001363850.1:c.216_217insA' in results.keys() + assert results['NM_001363850.1:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.216_217insA']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.216_217insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.216_217insA']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_001350779.1:p.(E73Rfs*30)'} + assert results['NM_001363850.1:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_001363850.1:c.216_217insA']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.216_217insA']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.216_217insA']['hgvs_transcript_variant'] == 'NM_001363850.1:c.216_217insA' + assert results['NM_001363850.1:c.216_217insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert 'hg38' not in results['NM_001363850.1:c.216_217insA']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert 'grch38' not in results['NM_001363850.1:c.216_217insA']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert 'NM_199367.1:c.216_217insA' in results.keys() + assert results['NM_199367.1:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.216_217insA']['alt_genomic_loci'] == [] + assert results['NM_199367.1:c.216_217insA']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.1:c.216_217insA']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_955399.1:p.(E73Rfs*30)'} + assert results['NM_199367.1:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_199367.1:c.216_217insA']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.216_217insA']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.216_217insA']['hgvs_transcript_variant'] == 'NM_199367.1:c.216_217insA' + assert results['NM_199367.1:c.216_217insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert 'hg38' not in results['NM_199367.1:c.216_217insA']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert 'grch38' not in results['NM_199367.1:c.216_217insA']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + + assert 'NM_199367.1:c.216dup' in results.keys() + assert results['NM_199367.1:c.216dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.216dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.216dup']['alt_genomic_loci'] == [] + assert results['NM_199367.1:c.216dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.1:c.216dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73Ter)', 'slr': 'NP_955399.1:p.(E73*)'} + assert results['NM_199367.1:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_199367.1:c.216dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.216dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.216dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.216dup' + assert results['NM_199367.1:c.216dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert 'hg38' not in results['NM_199367.1:c.216dup']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert 'grch38' not in results['NM_199367.1:c.216dup']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + + assert 'NM_003119.3:c.216_217insA' in results.keys() + assert results['NM_003119.3:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.216_217insA']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.216_217insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.216_217insA']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_003110.1:p.(E73Rfs*30)'} + assert results['NM_003119.3:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_003119.3:c.216_217insA']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.216_217insA']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.216_217insA']['hgvs_transcript_variant'] == 'NM_003119.3:c.216_217insA' + assert results['NM_003119.3:c.216_217insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89510522', 'alt': 'TA'}} + assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510522', 'alt': 'TA'}} + assert results['NM_003119.3:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + assert 'NM_003119.2:c.216dup' in results.keys() + assert results['NM_003119.2:c.216dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.216dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.216dup']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.216dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.216dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73Ter)', 'slr': 'NP_003110.1:p.(E73*)'} + assert results['NM_003119.2:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_003119.2:c.216dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.216dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.216dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.216dup' + assert results['NM_003119.2:c.216dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7126dup' + assert results['NM_003119.2:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert 'hg38' not in results['NM_003119.2:c.216dup']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert 'grch38' not in results['NM_003119.2:c.216dup']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.216dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + + def test_variant246(self): + variant = '16-89576931-G-GTG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_199367.1:c.216_217dup' in results.keys() + assert results['NM_199367.1:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.216_217dup']['alt_genomic_loci'] == [] + assert results['NM_199367.1:c.216_217dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.1:c.216_217dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ValfsTer9)', 'slr': 'NP_955399.1:p.(E73Vfs*9)'} + assert results['NM_199367.1:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' + assert results['NM_199367.1:c.216_217dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.216_217dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.216_217dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.216_217dup' + assert results['NM_199367.1:c.216_217dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert 'hg38' not in results['NM_199367.1:c.216_217dup']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert 'grch38' not in results['NM_199367.1:c.216_217dup']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + + assert 'NM_003119.3:c.216_217dup' in results.keys() + assert results['NM_003119.3:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.216_217dup']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.216_217dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.216_217dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ValfsTer9)', 'slr': 'NP_003110.1:p.(E73Vfs*9)'} + assert results['NM_003119.3:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' + assert results['NM_003119.3:c.216_217dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.216_217dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.216_217dup']['hgvs_transcript_variant'] == 'NM_003119.3:c.216_217dup' + assert results['NM_003119.3:c.216_217dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89510521', 'alt': 'TTG'}} + assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510521', 'alt': 'TTG'}} + assert results['NM_003119.3:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + assert 'NM_199367.2:c.216_217dup' in results.keys() + assert results['NM_199367.2:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.216_217dup']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.216_217dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.216_217dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ValfsTer9)', 'slr': 'NP_955399.1:p.(E73Vfs*9)'} + assert results['NM_199367.2:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' + assert results['NM_199367.2:c.216_217dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.216_217dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.216_217dup']['hgvs_transcript_variant'] == 'NM_199367.2:c.216_217dup' + assert results['NM_199367.2:c.216_217dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89510521', 'alt': 'TTG'}} + assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510521', 'alt': 'TTG'}} + assert results['NM_199367.2:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert 'NM_003119.2:c.216_217dup' in results.keys() + assert results['NM_003119.2:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.216_217dup']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.216_217dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.216_217dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ValfsTer9)', 'slr': 'NP_003110.1:p.(E73Vfs*9)'} + assert results['NM_003119.2:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' + assert results['NM_003119.2:c.216_217dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.216_217dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.216_217dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.216_217dup' + assert results['NM_003119.2:c.216_217dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7126_7127dup' + assert results['NM_003119.2:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert 'hg38' not in results['NM_003119.2:c.216_217dup']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert 'grch38' not in results['NM_003119.2:c.216_217dup']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.216_217dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001363850.1:c.216_217dup' in results.keys() + assert results['NM_001363850.1:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.216_217dup']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.216_217dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.216_217dup']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73ValfsTer9)', 'slr': 'NP_001350779.1:p.(E73Vfs*9)'} + assert results['NM_001363850.1:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' + assert results['NM_001363850.1:c.216_217dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.216_217dup']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.216_217dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.216_217dup' + assert results['NM_001363850.1:c.216_217dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert 'hg38' not in results['NM_001363850.1:c.216_217dup']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert 'grch38' not in results['NM_001363850.1:c.216_217dup']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + + def test_variant247(self): + variant = '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_199367.1:c.1046_1071del' in results.keys() + assert results['NM_199367.1:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.1046_1071del']['alt_genomic_loci'] == [] + assert results['NM_199367.1:c.1046_1071del']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.1:c.1046_1071del']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Gly349AlafsTer38)', 'slr': 'NP_955399.1:p.(G349Afs*38)'} + assert results['NM_199367.1:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' + assert results['NM_199367.1:c.1046_1071del']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.1046_1071del']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_199367.1:c.1046_1071del' + assert results['NM_199367.1:c.1046_1071del']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert 'hg38' not in results['NM_199367.1:c.1046_1071del']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert 'grch38' not in results['NM_199367.1:c.1046_1071del']['primary_assembly_loci'].keys() + assert results['NM_199367.1:c.1046_1071del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + + assert 'NM_001363850.1:c.1046_1071del' in results.keys() + assert results['NM_001363850.1:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1046_1071del']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.1046_1071del']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.1046_1071del']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Gly349AlafsTer38)', 'slr': 'NP_001350779.1:p.(G349Afs*38)'} + assert results['NM_001363850.1:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' + assert results['NM_001363850.1:c.1046_1071del']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1046_1071del']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1046_1071del' + assert results['NM_001363850.1:c.1046_1071del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert 'hg38' not in results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert 'grch38' not in results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.1046_1071del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert 'NM_199367.2:c.1046_1071del' in results.keys() + assert results['NM_199367.2:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.1046_1071del']['alt_genomic_loci'] == [] + assert results['NM_199367.2:c.1046_1071del']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + assert results['NM_199367.2:c.1046_1071del']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Gly349AlafsTer38)', 'slr': 'NP_955399.1:p.(G349Afs*38)'} + assert results['NM_199367.2:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' + assert results['NM_199367.2:c.1046_1071del']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.1046_1071del']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_199367.2:c.1046_1071del' + assert results['NM_199367.2:c.1046_1071del']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89531960', 'alt': 'C'}} + assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89531960', 'alt': 'C'}} + assert results['NM_199367.2:c.1046_1071del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_003119.2:c.1046_1071del' in results.keys() + assert results['NM_003119.2:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1046_1071del']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.1046_1071del']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.1046_1071del']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Gly349AlafsTer38)', 'slr': 'NP_003110.1:p.(G349Afs*38)'} + assert results['NM_003119.2:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' + assert results['NM_003119.2:c.1046_1071del']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1046_1071del']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_003119.2:c.1046_1071del' + assert results['NM_003119.2:c.1046_1071del']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.28566_28591del' + assert results['NM_003119.2:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert 'hg38' not in results['NM_003119.2:c.1046_1071del']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert 'grch38' not in results['NM_003119.2:c.1046_1071del']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.1046_1071del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + assert 'NM_003119.3:c.1046_1071del' in results.keys() + assert results['NM_003119.3:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1046_1071del']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.1046_1071del']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.1046_1071del']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Gly349AlafsTer38)', 'slr': 'NP_003110.1:p.(G349Afs*38)'} + assert results['NM_003119.3:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' + assert results['NM_003119.3:c.1046_1071del']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1046_1071del']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_003119.3:c.1046_1071del' + assert results['NM_003119.3:c.1046_1071del']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89531960', 'alt': 'C'}} + assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89531960', 'alt': 'C'}} + assert results['NM_003119.3:c.1046_1071del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + + def test_variant248(self): + variant = '16-89613064-AGGAGAGGCG-AT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001363850.1:c.1450-1_1457delinsT' in results.keys() + assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.1450-1_1457delinsT']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.1450-1_1457delinsT']['submitted_variant'] == '16-89613064-AGGAGAGGCG-AT' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.1450-1_1457delinsT' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1450-1_1457delinsT' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} + assert 'hg38' not in results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': '16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} + assert 'grch38' not in results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.1450-1_1457delinsT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_003119.2:c.1450-1_1457delinsT' in results.keys() + assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.1450-1_1457delinsT' + assert results['NM_003119.2:c.1450-1_1457delinsT']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.1450-1_1457delinsT']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.2:c.1450-1_1457delinsT']['submitted_variant'] == '16-89613064-AGGAGAGGCG-AT' + assert results['NM_003119.2:c.1450-1_1457delinsT']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.1450-1_1457delinsT' + assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_transcript_variant'] == 'NM_003119.2:c.1450-1_1457delinsT' + assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.43261_43269delinsT' + assert results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} + assert 'hg38' not in results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': '16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} + assert 'grch38' not in results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.1450-1_1457delinsT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + assert 'NM_003119.3:c.1450-1_1457delinsT' in results.keys() + assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1450-1_1457delinsT']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.1450-1_1457delinsT']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.1450-1_1457delinsT']['submitted_variant'] == '16-89613064-AGGAGAGGCG-AT' + assert results['NM_003119.3:c.1450-1_1457delinsT']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.1450-1_1457delinsT' + assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_transcript_variant'] == 'NM_003119.3:c.1450-1_1457delinsT' + assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} + assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546657_89546665delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGAGAGGCG', 'pos': '89546657', 'alt': 'T'}} + assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': '16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} + assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546657_89546665delinsT', 'vcf': {'chr': '16', 'ref': 'GGAGAGGCG', 'pos': '89546657', 'alt': 'T'}} + assert results['NM_003119.3:c.1450-1_1457delinsT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + + def test_variant249(self): + variant = '16-89613069-AGGCGGGAGA-AT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_003119.2:c.1454_1462delinsT' in results.keys() + assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1454_1462delinsT']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.1454_1462delinsT']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Arg485IlefsTer3)', 'slr': 'NP_003110.1:p.(R485Ifs*3)'} + assert results['NM_003119.2:c.1454_1462delinsT']['submitted_variant'] == '16-89613069-AGGCGGGAGA-AT' + assert results['NM_003119.2:c.1454_1462delinsT']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_transcript_variant'] == 'NM_003119.2:c.1454_1462delinsT' + assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.43266_43274delinsT' + assert results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} + assert 'hg38' not in results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': '16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} + assert 'grch38' not in results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.1454_1462delinsT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001363850.1:c.1454_1462delinsT' in results.keys() + assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1454_1462delinsT']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.1454_1462delinsT']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Arg485IlefsTer3)', 'slr': 'NP_001350779.1:p.(R485Ifs*3)'} + assert results['NM_001363850.1:c.1454_1462delinsT']['submitted_variant'] == '16-89613069-AGGCGGGAGA-AT' + assert results['NM_001363850.1:c.1454_1462delinsT']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1454_1462delinsT' + assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} + assert 'hg38' not in results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': '16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} + assert 'grch38' not in results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.1454_1462delinsT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert 'NM_003119.3:c.1454_1462delinsT' in results.keys() + assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1454_1462delinsT']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.1454_1462delinsT']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Arg485IlefsTer3)', 'slr': 'NP_003110.1:p.(R485Ifs*3)'} + assert results['NM_003119.3:c.1454_1462delinsT']['submitted_variant'] == '16-89613069-AGGCGGGAGA-AT' + assert results['NM_003119.3:c.1454_1462delinsT']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_transcript_variant'] == 'NM_003119.3:c.1454_1462delinsT' + assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} + assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546662_89546670delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGCGGGAGA', 'pos': '89546662', 'alt': 'T'}} + assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': '16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} + assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546662_89546670delinsT', 'vcf': {'chr': '16', 'ref': 'GGCGGGAGA', 'pos': '89546662', 'alt': 'T'}} + assert results['NM_003119.3:c.1454_1462delinsT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + + def test_variant250(self): + variant = '16-89613145-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001363850.1:c.1529C>T' in results.keys() + assert results['NM_001363850.1:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1529C>T']['alt_genomic_loci'] == [] + assert results['NM_001363850.1:c.1529C>T']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + assert results['NM_001363850.1:c.1529C>T']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Ala510Val)', 'slr': 'NP_001350779.1:p.(A510V)'} + assert results['NM_001363850.1:c.1529C>T']['submitted_variant'] == '16-89613145-C-T' + assert results['NM_001363850.1:c.1529C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1529C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.1529C>T']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1529C>T' + assert results['NM_001363850.1:c.1529C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.1529C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} + assert 'hg38' not in results['NM_001363850.1:c.1529C>T']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.1529C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} + assert 'grch38' not in results['NM_001363850.1:c.1529C>T']['primary_assembly_loci'].keys() + assert results['NM_001363850.1:c.1529C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + + assert 'NM_003119.3:c.1529C>T' in results.keys() + assert results['NM_003119.3:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1529C>T']['alt_genomic_loci'] == [] + assert results['NM_003119.3:c.1529C>T']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.3:c.1529C>T']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ala510Val)', 'slr': 'NP_003110.1:p.(A510V)'} + assert results['NM_003119.3:c.1529C>T']['submitted_variant'] == '16-89613145-C-T' + assert results['NM_003119.3:c.1529C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1529C>T']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.1529C>T']['hgvs_transcript_variant'] == 'NM_003119.3:c.1529C>T' + assert results['NM_003119.3:c.1529C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} + assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546737C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89546737', 'alt': 'T'}} + assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} + assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546737C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89546737', 'alt': 'T'}} + assert results['NM_003119.3:c.1529C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_003119.2:c.1529C>T' in results.keys() + assert results['NM_003119.2:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1529C>T']['alt_genomic_loci'] == [] + assert results['NM_003119.2:c.1529C>T']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + assert results['NM_003119.2:c.1529C>T']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ala510Val)', 'slr': 'NP_003110.1:p.(A510V)'} + assert results['NM_003119.2:c.1529C>T']['submitted_variant'] == '16-89613145-C-T' + assert results['NM_003119.2:c.1529C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1529C>T']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.1529C>T']['hgvs_transcript_variant'] == 'NM_003119.2:c.1529C>T' + assert results['NM_003119.2:c.1529C>T']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.43341C>T' + assert results['NM_003119.2:c.1529C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} + assert 'hg38' not in results['NM_003119.2:c.1529C>T']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.1529C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} + assert 'grch38' not in results['NM_003119.2:c.1529C>T']['primary_assembly_loci'].keys() + assert results['NM_003119.2:c.1529C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + + + def test_variant251(self): + variant = '17-7578194-GCAC-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001276695.1:c.535_537del' in results.keys() + assert results['NM_001276695.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276695.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276695.1:c.535_537del']['alt_genomic_loci'] == [] + assert results['NM_001276695.1:c.535_537del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA' + assert results['NM_001276695.1:c.535_537del']['gene_symbol'] == 'TP53' + assert results['NM_001276695.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263624.1:p.(Val179del)', 'slr': 'NP_001263624.1:p.(V179del)'} + assert results['NM_001276695.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276695.1:c.535_537del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276695.1:c.535_537del']['hgvs_lrg_variant'] == '' + assert results['NM_001276695.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276695.1:c.535_537del' + assert results['NM_001276695.1:c.535_537del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1'} + + assert 'NM_001126113.2:c.652_654del' in results.keys() + assert results['NM_001126113.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t4:c.652_654del' + assert results['NM_001126113.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126113.2:c.652_654del']['alt_genomic_loci'] == [] + assert results['NM_001126113.2:c.652_654del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA' + assert results['NM_001126113.2:c.652_654del']['gene_symbol'] == 'TP53' + assert results['NM_001126113.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1:p.(Val218del)', 'slr': 'NP_001119585.1:p.(V218del)'} + assert results['NM_001126113.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001126113.2:c.652_654del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126113.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126113.2:c.652_654del']['hgvs_transcript_variant'] == 'NM_001126113.2:c.652_654del' + assert results['NM_001126113.2:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001126118.1:c.535_537del' in results.keys() + assert results['NM_001126118.1:c.535_537del']['hgvs_lrg_transcript_variant'] == 'LRG_321t8:c.535_537del' + assert results['NM_001126118.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126118.1:c.535_537del']['alt_genomic_loci'] == [] + assert results['NM_001126118.1:c.535_537del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 8, mRNA' + assert results['NM_001126118.1:c.535_537del']['gene_symbol'] == 'TP53' + assert results['NM_001126118.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1:p.(Val179del)', 'slr': 'NP_001119590.1:p.(V179del)'} + assert results['NM_001126118.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001126118.1:c.535_537del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126118.1:c.535_537del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126118.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001126118.1:c.535_537del' + assert results['NM_001126118.1:c.535_537del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001126116.1:c.256_258del' in results.keys() + assert results['NM_001126116.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t6:c.256_258del' + assert results['NM_001126116.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126116.1:c.256_258del']['alt_genomic_loci'] == [] + assert results['NM_001126116.1:c.256_258del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA' + assert results['NM_001126116.1:c.256_258del']['gene_symbol'] == 'TP53' + assert results['NM_001126116.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1:p.(Val86del)', 'slr': 'NP_001119588.1:p.(V86del)'} + assert results['NM_001126116.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001126116.1:c.256_258del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126116.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126116.1:c.256_258del']['hgvs_transcript_variant'] == 'NM_001126116.1:c.256_258del' + assert results['NM_001126116.1:c.256_258del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001126117.1:c.256_258del' in results.keys() + assert results['NM_001126117.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t7:c.256_258del' + assert results['NM_001126117.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126117.1:c.256_258del']['alt_genomic_loci'] == [] + assert results['NM_001126117.1:c.256_258del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA' + assert results['NM_001126117.1:c.256_258del']['gene_symbol'] == 'TP53' + assert results['NM_001126117.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1:p.(Val86del)', 'slr': 'NP_001119589.1:p.(V86del)'} + assert results['NM_001126117.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001126117.1:c.256_258del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126117.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126117.1:c.256_258del']['hgvs_transcript_variant'] == 'NM_001126117.1:c.256_258del' + assert results['NM_001126117.1:c.256_258del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001276761.1:c.535_537del' in results.keys() + assert results['NM_001276761.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276761.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276761.1:c.535_537del']['alt_genomic_loci'] == [] + assert results['NM_001276761.1:c.535_537del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA' + assert results['NM_001276761.1:c.535_537del']['gene_symbol'] == 'TP53' + assert results['NM_001276761.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263690.1:p.(Val179del)', 'slr': 'NP_001263690.1:p.(V179del)'} + assert results['NM_001276761.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276761.1:c.535_537del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276761.1:c.535_537del']['hgvs_lrg_variant'] == '' + assert results['NM_001276761.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276761.1:c.535_537del' + assert results['NM_001276761.1:c.535_537del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1'} + + assert 'NM_001126112.2:c.652_654del' in results.keys() + assert results['NM_001126112.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t2:c.652_654del' + assert results['NM_001126112.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126112.2:c.652_654del']['alt_genomic_loci'] == [] + assert results['NM_001126112.2:c.652_654del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA' + assert results['NM_001126112.2:c.652_654del']['gene_symbol'] == 'TP53' + assert results['NM_001126112.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119584.1:p.(Val218del)', 'slr': 'NP_001119584.1:p.(V218del)'} + assert results['NM_001126112.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001126112.2:c.652_654del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126112.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126112.2:c.652_654del']['hgvs_transcript_variant'] == 'NM_001126112.2:c.652_654del' + assert results['NM_001126112.2:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001276697.1:c.175_177del' in results.keys() + assert results['NM_001276697.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276697.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276697.1:c.175_177del']['alt_genomic_loci'] == [] + assert results['NM_001276697.1:c.175_177del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA' + assert results['NM_001276697.1:c.175_177del']['gene_symbol'] == 'TP53' + assert results['NM_001276697.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263626.1:p.(Val59del)', 'slr': 'NP_001263626.1:p.(V59del)'} + assert results['NM_001276697.1:c.175_177del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276697.1:c.175_177del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276697.1:c.175_177del']['hgvs_lrg_variant'] == '' + assert results['NM_001276697.1:c.175_177del']['hgvs_transcript_variant'] == 'NM_001276697.1:c.175_177del' + assert results['NM_001276697.1:c.175_177del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1'} + + assert 'NM_001276696.1:c.535_537del' in results.keys() + assert results['NM_001276696.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276696.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276696.1:c.535_537del']['alt_genomic_loci'] == [] + assert results['NM_001276696.1:c.535_537del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA' + assert results['NM_001276696.1:c.535_537del']['gene_symbol'] == 'TP53' + assert results['NM_001276696.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263625.1:p.(Val179del)', 'slr': 'NP_001263625.1:p.(V179del)'} + assert results['NM_001276696.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276696.1:c.535_537del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276696.1:c.535_537del']['hgvs_lrg_variant'] == '' + assert results['NM_001276696.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276696.1:c.535_537del' + assert results['NM_001276696.1:c.535_537del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1'} + + assert 'NM_001276698.1:c.175_177del' in results.keys() + assert results['NM_001276698.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276698.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276698.1:c.175_177del']['alt_genomic_loci'] == [] + assert results['NM_001276698.1:c.175_177del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA' + assert results['NM_001276698.1:c.175_177del']['gene_symbol'] == 'TP53' + assert results['NM_001276698.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263627.1:p.(Val59del)', 'slr': 'NP_001263627.1:p.(V59del)'} + assert results['NM_001276698.1:c.175_177del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276698.1:c.175_177del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276698.1:c.175_177del']['hgvs_lrg_variant'] == '' + assert results['NM_001276698.1:c.175_177del']['hgvs_transcript_variant'] == 'NM_001276698.1:c.175_177del' + assert results['NM_001276698.1:c.175_177del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1'} + + assert 'NM_001126115.1:c.256_258del' in results.keys() + assert results['NM_001126115.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t5:c.256_258del' + assert results['NM_001126115.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126115.1:c.256_258del']['alt_genomic_loci'] == [] + assert results['NM_001126115.1:c.256_258del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA' + assert results['NM_001126115.1:c.256_258del']['gene_symbol'] == 'TP53' + assert results['NM_001126115.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1:p.(Val86del)', 'slr': 'NP_001119587.1:p.(V86del)'} + assert results['NM_001126115.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001126115.1:c.256_258del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126115.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126115.1:c.256_258del']['hgvs_transcript_variant'] == 'NM_001126115.1:c.256_258del' + assert results['NM_001126115.1:c.256_258del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001126114.2:c.652_654del' in results.keys() + assert results['NM_001126114.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t3:c.652_654del' + assert results['NM_001126114.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126114.2:c.652_654del']['alt_genomic_loci'] == [] + assert results['NM_001126114.2:c.652_654del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA' + assert results['NM_001126114.2:c.652_654del']['gene_symbol'] == 'TP53' + assert results['NM_001126114.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1:p.(Val218del)', 'slr': 'NP_001119586.1:p.(V218del)'} + assert results['NM_001126114.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001126114.2:c.652_654del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126114.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126114.2:c.652_654del']['hgvs_transcript_variant'] == 'NM_001126114.2:c.652_654del' + assert results['NM_001126114.2:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001276699.1:c.175_177del' in results.keys() + assert results['NM_001276699.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276699.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276699.1:c.175_177del']['alt_genomic_loci'] == [] + assert results['NM_001276699.1:c.175_177del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA' + assert results['NM_001276699.1:c.175_177del']['gene_symbol'] == 'TP53' + assert results['NM_001276699.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263628.1:p.(Val59del)', 'slr': 'NP_001263628.1:p.(V59del)'} + assert results['NM_001276699.1:c.175_177del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276699.1:c.175_177del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276699.1:c.175_177del']['hgvs_lrg_variant'] == '' + assert results['NM_001276699.1:c.175_177del']['hgvs_transcript_variant'] == 'NM_001276699.1:c.175_177del' + assert results['NM_001276699.1:c.175_177del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1'} + + assert 'NM_001276760.1:c.535_537del' in results.keys() + assert results['NM_001276760.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276760.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276760.1:c.535_537del']['alt_genomic_loci'] == [] + assert results['NM_001276760.1:c.535_537del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA' + assert results['NM_001276760.1:c.535_537del']['gene_symbol'] == 'TP53' + assert results['NM_001276760.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263689.1:p.(Val179del)', 'slr': 'NP_001263689.1:p.(V179del)'} + assert results['NM_001276760.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276760.1:c.535_537del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276760.1:c.535_537del']['hgvs_lrg_variant'] == '' + assert results['NM_001276760.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276760.1:c.535_537del' + assert results['NM_001276760.1:c.535_537del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1'} + + assert 'NM_000546.5:c.652_654del' in results.keys() + assert results['NM_000546.5:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t1:c.652_654del' + assert results['NM_000546.5:c.652_654del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000546.5:c.652_654del']['alt_genomic_loci'] == [] + assert results['NM_000546.5:c.652_654del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA' + assert results['NM_000546.5:c.652_654del']['gene_symbol'] == 'TP53' + assert results['NM_000546.5:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000537.3(LRG_321p1):p.(Val218del)', 'slr': 'NP_000537.3:p.(V218del)'} + assert results['NM_000546.5:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_000546.5:c.652_654del']['genome_context_intronic_sequence'] == '' + assert results['NM_000546.5:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_000546.5:c.652_654del']['hgvs_transcript_variant'] == 'NM_000546.5:c.652_654del' + assert results['NM_000546.5:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + + def test_variant252(self): + variant = '17-7578523-T-TG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001276760.1:c.289dup' in results.keys() + assert results['NM_001276760.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276760.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276760.1:c.289dup']['alt_genomic_loci'] == [] + assert results['NM_001276760.1:c.289dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA' + assert results['NM_001276760.1:c.289dup']['gene_symbol'] == 'TP53' + assert results['NM_001276760.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263689.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263689.1:p.(Q97Pfs*13)'} + assert results['NM_001276760.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276760.1:c.289dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276760.1:c.289dup']['hgvs_lrg_variant'] == '' + assert results['NM_001276760.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276760.1:c.289dup' + assert results['NM_001276760.1:c.289dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1'} + + assert 'NM_001126118.1:c.289dup' in results.keys() + assert results['NM_001126118.1:c.289dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t8:c.289dup' + assert results['NM_001126118.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126118.1:c.289dup']['alt_genomic_loci'] == [] + assert results['NM_001126118.1:c.289dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 8, mRNA' + assert results['NM_001126118.1:c.289dup']['gene_symbol'] == 'TP53' + assert results['NM_001126118.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001119590.1:p.(Q97Pfs*13)'} + assert results['NM_001126118.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001126118.1:c.289dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126118.1:c.289dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_001126118.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001126118.1:c.289dup' + assert results['NM_001126118.1:c.289dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001276695.1:c.289dup' in results.keys() + assert results['NM_001276695.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276695.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276695.1:c.289dup']['alt_genomic_loci'] == [] + assert results['NM_001276695.1:c.289dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA' + assert results['NM_001276695.1:c.289dup']['gene_symbol'] == 'TP53' + assert results['NM_001276695.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263624.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263624.1:p.(Q97Pfs*13)'} + assert results['NM_001276695.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276695.1:c.289dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276695.1:c.289dup']['hgvs_lrg_variant'] == '' + assert results['NM_001276695.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276695.1:c.289dup' + assert results['NM_001276695.1:c.289dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1'} + + assert 'NM_001276699.1:c.-72dup' in results.keys() + assert results['NM_001276699.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276699.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276699.1:c.-72dup']['alt_genomic_loci'] == [] + assert results['NM_001276699.1:c.-72dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA' + assert results['NM_001276699.1:c.-72dup']['gene_symbol'] == 'TP53' + assert results['NM_001276699.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263628.1:p.?', 'slr': 'NP_001263628.1:p.?'} + assert results['NM_001276699.1:c.-72dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276699.1:c.-72dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276699.1:c.-72dup']['hgvs_lrg_variant'] == '' + assert results['NM_001276699.1:c.-72dup']['hgvs_transcript_variant'] == 'NM_001276699.1:c.-72dup' + assert results['NM_001276699.1:c.-72dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1'} + + assert 'NM_001126115.1:c.10dup' in results.keys() + assert results['NM_001126115.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t5:c.10dup' + assert results['NM_001126115.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126115.1:c.10dup']['alt_genomic_loci'] == [] + assert results['NM_001126115.1:c.10dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA' + assert results['NM_001126115.1:c.10dup']['gene_symbol'] == 'TP53' + assert results['NM_001126115.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1:p.(Gln4ProfsTer13)', 'slr': 'NP_001119587.1:p.(Q4Pfs*13)'} + assert results['NM_001126115.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001126115.1:c.10dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126115.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_001126115.1:c.10dup']['hgvs_transcript_variant'] == 'NM_001126115.1:c.10dup' + assert results['NM_001126115.1:c.10dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001276697.1:c.-72dup' in results.keys() + assert results['NM_001276697.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276697.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276697.1:c.-72dup']['alt_genomic_loci'] == [] + assert results['NM_001276697.1:c.-72dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA' + assert results['NM_001276697.1:c.-72dup']['gene_symbol'] == 'TP53' + assert results['NM_001276697.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263626.1:p.?', 'slr': 'NP_001263626.1:p.?'} + assert results['NM_001276697.1:c.-72dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276697.1:c.-72dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276697.1:c.-72dup']['hgvs_lrg_variant'] == '' + assert results['NM_001276697.1:c.-72dup']['hgvs_transcript_variant'] == 'NM_001276697.1:c.-72dup' + assert results['NM_001276697.1:c.-72dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1'} + + assert 'NM_001126117.1:c.10dup' in results.keys() + assert results['NM_001126117.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t7:c.10dup' + assert results['NM_001126117.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126117.1:c.10dup']['alt_genomic_loci'] == [] + assert results['NM_001126117.1:c.10dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA' + assert results['NM_001126117.1:c.10dup']['gene_symbol'] == 'TP53' + assert results['NM_001126117.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1:p.(Gln4ProfsTer13)', 'slr': 'NP_001119589.1:p.(Q4Pfs*13)'} + assert results['NM_001126117.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001126117.1:c.10dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126117.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_001126117.1:c.10dup']['hgvs_transcript_variant'] == 'NM_001126117.1:c.10dup' + assert results['NM_001126117.1:c.10dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_000546.5:c.406dup' in results.keys() + assert results['NM_000546.5:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t1:c.406dup' + assert results['NM_000546.5:c.406dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000546.5:c.406dup']['alt_genomic_loci'] == [] + assert results['NM_000546.5:c.406dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA' + assert results['NM_000546.5:c.406dup']['gene_symbol'] == 'TP53' + assert results['NM_000546.5:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000537.3(LRG_321p1):p.(Gln136ProfsTer13)', 'slr': 'NP_000537.3:p.(Q136Pfs*13)'} + assert results['NM_000546.5:c.406dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_000546.5:c.406dup']['genome_context_intronic_sequence'] == '' + assert results['NM_000546.5:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_000546.5:c.406dup']['hgvs_transcript_variant'] == 'NM_000546.5:c.406dup' + assert results['NM_000546.5:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001276696.1:c.289dup' in results.keys() + assert results['NM_001276696.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276696.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276696.1:c.289dup']['alt_genomic_loci'] == [] + assert results['NM_001276696.1:c.289dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA' + assert results['NM_001276696.1:c.289dup']['gene_symbol'] == 'TP53' + assert results['NM_001276696.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263625.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263625.1:p.(Q97Pfs*13)'} + assert results['NM_001276696.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276696.1:c.289dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276696.1:c.289dup']['hgvs_lrg_variant'] == '' + assert results['NM_001276696.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276696.1:c.289dup' + assert results['NM_001276696.1:c.289dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1'} + + assert 'NM_001276698.1:c.-72dup' in results.keys() + assert results['NM_001276698.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276698.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276698.1:c.-72dup']['alt_genomic_loci'] == [] + assert results['NM_001276698.1:c.-72dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA' + assert results['NM_001276698.1:c.-72dup']['gene_symbol'] == 'TP53' + assert results['NM_001276698.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263627.1:p.?', 'slr': 'NP_001263627.1:p.?'} + assert results['NM_001276698.1:c.-72dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276698.1:c.-72dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276698.1:c.-72dup']['hgvs_lrg_variant'] == '' + assert results['NM_001276698.1:c.-72dup']['hgvs_transcript_variant'] == 'NM_001276698.1:c.-72dup' + assert results['NM_001276698.1:c.-72dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1'} + + assert 'NM_001276761.1:c.289dup' in results.keys() + assert results['NM_001276761.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276761.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276761.1:c.289dup']['alt_genomic_loci'] == [] + assert results['NM_001276761.1:c.289dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA' + assert results['NM_001276761.1:c.289dup']['gene_symbol'] == 'TP53' + assert results['NM_001276761.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263690.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263690.1:p.(Q97Pfs*13)'} + assert results['NM_001276761.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276761.1:c.289dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276761.1:c.289dup']['hgvs_lrg_variant'] == '' + assert results['NM_001276761.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276761.1:c.289dup' + assert results['NM_001276761.1:c.289dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1'} + + assert 'NM_001126113.2:c.406dup' in results.keys() + assert results['NM_001126113.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t4:c.406dup' + assert results['NM_001126113.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126113.2:c.406dup']['alt_genomic_loci'] == [] + assert results['NM_001126113.2:c.406dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA' + assert results['NM_001126113.2:c.406dup']['gene_symbol'] == 'TP53' + assert results['NM_001126113.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1:p.(Gln136ProfsTer13)', 'slr': 'NP_001119585.1:p.(Q136Pfs*13)'} + assert results['NM_001126113.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001126113.2:c.406dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126113.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_001126113.2:c.406dup']['hgvs_transcript_variant'] == 'NM_001126113.2:c.406dup' + assert results['NM_001126113.2:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001126116.1:c.10dup' in results.keys() + assert results['NM_001126116.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t6:c.10dup' + assert results['NM_001126116.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126116.1:c.10dup']['alt_genomic_loci'] == [] + assert results['NM_001126116.1:c.10dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA' + assert results['NM_001126116.1:c.10dup']['gene_symbol'] == 'TP53' + assert results['NM_001126116.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1:p.(Gln4ProfsTer13)', 'slr': 'NP_001119588.1:p.(Q4Pfs*13)'} + assert results['NM_001126116.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001126116.1:c.10dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126116.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_001126116.1:c.10dup']['hgvs_transcript_variant'] == 'NM_001126116.1:c.10dup' + assert results['NM_001126116.1:c.10dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001126112.2:c.406dup' in results.keys() + assert results['NM_001126112.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t2:c.406dup' + assert results['NM_001126112.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126112.2:c.406dup']['alt_genomic_loci'] == [] + assert results['NM_001126112.2:c.406dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA' + assert results['NM_001126112.2:c.406dup']['gene_symbol'] == 'TP53' + assert results['NM_001126112.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119584.1:p.(Gln136ProfsTer13)', 'slr': 'NP_001119584.1:p.(Q136Pfs*13)'} + assert results['NM_001126112.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001126112.2:c.406dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126112.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_001126112.2:c.406dup']['hgvs_transcript_variant'] == 'NM_001126112.2:c.406dup' + assert results['NM_001126112.2:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001126114.2:c.406dup' in results.keys() + assert results['NM_001126114.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t3:c.406dup' + assert results['NM_001126114.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126114.2:c.406dup']['alt_genomic_loci'] == [] + assert results['NM_001126114.2:c.406dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA' + assert results['NM_001126114.2:c.406dup']['gene_symbol'] == 'TP53' + assert results['NM_001126114.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1:p.(Gln136ProfsTer13)', 'slr': 'NP_001119586.1:p.(Q136Pfs*13)'} + assert results['NM_001126114.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001126114.2:c.406dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126114.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_001126114.2:c.406dup']['hgvs_transcript_variant'] == 'NM_001126114.2:c.406dup' + assert results['NM_001126114.2:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + + def test_variant253(self): + variant = '17-17119692-A-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_144997.6:c.1300+2T>G' in results.keys() + assert results['NM_144997.6:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_144997.6:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_144997.6:c.1300+2T>G']['alt_genomic_loci'] == [] + assert results['NM_144997.6:c.1300+2T>G']['transcript_description'] == 'Homo sapiens folliculin (FLCN), transcript variant 1, mRNA' + assert results['NM_144997.6:c.1300+2T>G']['gene_symbol'] == 'FLCN' + assert results['NM_144997.6:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_659434.2(LRG_325p1):p.?', 'slr': 'NP_659434.2:p.?'} + assert results['NM_144997.6:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' + assert results['NM_144997.6:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_144997.6):c.1300+2T>G' + assert results['NM_144997.6:c.1300+2T>G']['hgvs_lrg_variant'] == '' + assert results['NM_144997.6:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_144997.6:c.1300+2T>G' + assert results['NM_144997.6:c.1300+2T>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_144997.6:c.1300+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.6'} + + assert 'NM_001353230.1:c.1300+2T>G' in results.keys() + assert results['NM_001353230.1:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353230.1:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353230.1:c.1300+2T>G']['alt_genomic_loci'] == [] + assert results['NM_001353230.1:c.1300+2T>G']['transcript_description'] == 'Homo sapiens folliculin (FLCN), transcript variant 4, mRNA' + assert results['NM_001353230.1:c.1300+2T>G']['gene_symbol'] == 'FLCN' + assert results['NM_001353230.1:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340159.1:p.?', 'slr': 'NP_001340159.1:p.?'} + assert results['NM_001353230.1:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' + assert results['NM_001353230.1:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_001353230.1):c.1300+2T>G' + assert results['NM_001353230.1:c.1300+2T>G']['hgvs_lrg_variant'] == '' + assert results['NM_001353230.1:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_001353230.1:c.1300+2T>G' + assert results['NM_001353230.1:c.1300+2T>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340159.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353230.1'} + + assert 'NM_001353229.1:c.1354+2T>G' in results.keys() + assert results['NM_001353229.1:c.1354+2T>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353229.1:c.1354+2T>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353229.1:c.1354+2T>G']['alt_genomic_loci'] == [] + assert results['NM_001353229.1:c.1354+2T>G']['transcript_description'] == 'Homo sapiens folliculin (FLCN), transcript variant 3, mRNA' + assert results['NM_001353229.1:c.1354+2T>G']['gene_symbol'] == 'FLCN' + assert results['NM_001353229.1:c.1354+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340158.1:p.?', 'slr': 'NP_001340158.1:p.?'} + assert results['NM_001353229.1:c.1354+2T>G']['submitted_variant'] == '17-17119692-A-C' + assert results['NM_001353229.1:c.1354+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_001353229.1):c.1354+2T>G' + assert results['NM_001353229.1:c.1354+2T>G']['hgvs_lrg_variant'] == '' + assert results['NM_001353229.1:c.1354+2T>G']['hgvs_transcript_variant'] == 'NM_001353229.1:c.1354+2T>G' + assert results['NM_001353229.1:c.1354+2T>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340158.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353229.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_144997.5:c.1300+2T>G' in results.keys() + assert results['NM_144997.5:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == 'LRG_325t1:c.1300+2T>G' + assert results['NM_144997.5:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == 'NG_008001.2(NM_144997.5):c.1300+2T>G' + assert results['NM_144997.5:c.1300+2T>G']['alt_genomic_loci'] == [] + assert results['NM_144997.5:c.1300+2T>G']['transcript_description'] == 'Homo sapiens folliculin (FLCN), transcript variant 1, mRNA' + assert results['NM_144997.5:c.1300+2T>G']['gene_symbol'] == 'FLCN' + assert results['NM_144997.5:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_659434.2(LRG_325p1):p.?', 'slr': 'NP_659434.2:p.?'} + assert results['NM_144997.5:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' + assert results['NM_144997.5:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_144997.5):c.1300+2T>G' + assert results['NM_144997.5:c.1300+2T>G']['hgvs_lrg_variant'] == 'LRG_325:g.25811T>G' + assert results['NM_144997.5:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_144997.5:c.1300+2T>G' + assert results['NM_144997.5:c.1300+2T>G']['hgvs_refseqgene_variant'] == 'NG_008001.2:g.25811T>G' + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_144997.5:c.1300+2T>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008001.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_325.xml'} + + assert 'NM_001353231.1:c.1300+2T>G' in results.keys() + assert results['NM_001353231.1:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353231.1:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353231.1:c.1300+2T>G']['alt_genomic_loci'] == [] + assert results['NM_001353231.1:c.1300+2T>G']['transcript_description'] == 'Homo sapiens folliculin (FLCN), transcript variant 5, mRNA' + assert results['NM_001353231.1:c.1300+2T>G']['gene_symbol'] == 'FLCN' + assert results['NM_001353231.1:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340160.1:p.?', 'slr': 'NP_001340160.1:p.?'} + assert results['NM_001353231.1:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' + assert results['NM_001353231.1:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_001353231.1):c.1300+2T>G' + assert results['NM_001353231.1:c.1300+2T>G']['hgvs_lrg_variant'] == '' + assert results['NM_001353231.1:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_001353231.1:c.1300+2T>G' + assert results['NM_001353231.1:c.1300+2T>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340160.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353231.1'} + + + def test_variant254(self): + variant = '17-41197588-GGACA-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_007294.3:c.*103_*106del' in results.keys() + assert results['NM_007294.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == 'LRG_292t1:c.*103_*106del' + assert results['NM_007294.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007294.3:c.*103_*106del']['alt_genomic_loci'] == [] + assert results['NM_007294.3:c.*103_*106del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 1, mRNA' + assert results['NM_007294.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' + assert results['NM_007294.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009225.1(LRG_292p1):p.?', 'slr': 'NP_009225.1:p.?'} + assert results['NM_007294.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' + assert results['NM_007294.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' + assert results['NM_007294.3:c.*103_*106del']['hgvs_lrg_variant'] == 'LRG_292:g.172409_172412del' + assert results['NM_007294.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007294.3:c.*103_*106del' + assert results['NM_007294.3:c.*103_*106del']['hgvs_refseqgene_variant'] == 'NG_005905.2:g.172409_172412del' + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005905.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_292.xml'} + + assert 'NM_007297.3:c.*103_*106del' in results.keys() + assert results['NM_007297.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007297.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007297.3:c.*103_*106del']['alt_genomic_loci'] == [] + assert results['NM_007297.3:c.*103_*106del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 3, mRNA' + assert results['NM_007297.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' + assert results['NM_007297.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009228.2:p.?', 'slr': 'NP_009228.2:p.?'} + assert results['NM_007297.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' + assert results['NM_007297.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' + assert results['NM_007297.3:c.*103_*106del']['hgvs_lrg_variant'] == '' + assert results['NM_007297.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007297.3:c.*103_*106del' + assert results['NM_007297.3:c.*103_*106del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3'} + + assert 'NR_027676.1:n.5831_5834del' in results.keys() + assert results['NR_027676.1:n.5831_5834del']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_027676.1:n.5831_5834del']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_027676.1:n.5831_5834del']['alt_genomic_loci'] == [] + assert results['NR_027676.1:n.5831_5834del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 6, non-coding RNA' + assert results['NR_027676.1:n.5831_5834del']['gene_symbol'] == 'BRCA1' + assert results['NR_027676.1:n.5831_5834del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_027676.1:n.5831_5834del']['submitted_variant'] == '17-41197588-GGACA-G' + assert results['NR_027676.1:n.5831_5834del']['genome_context_intronic_sequence'] == '' + assert results['NR_027676.1:n.5831_5834del']['hgvs_lrg_variant'] == '' + assert results['NR_027676.1:n.5831_5834del']['hgvs_transcript_variant'] == 'NR_027676.1:n.5831_5834del' + assert results['NR_027676.1:n.5831_5834del']['hgvs_refseqgene_variant'] == '' + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_027676.1'} + + assert 'NM_007300.3:c.*103_*106del' in results.keys() + assert results['NM_007300.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007300.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007300.3:c.*103_*106del']['alt_genomic_loci'] == [] + assert results['NM_007300.3:c.*103_*106del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 2, mRNA' + assert results['NM_007300.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' + assert results['NM_007300.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009231.2:p.?', 'slr': 'NP_009231.2:p.?'} + assert results['NM_007300.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' + assert results['NM_007300.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' + assert results['NM_007300.3:c.*103_*106del']['hgvs_lrg_variant'] == '' + assert results['NM_007300.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007300.3:c.*103_*106del' + assert results['NM_007300.3:c.*103_*106del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_007299.3:c.*209_*212del' in results.keys() + assert results['NM_007299.3:c.*209_*212del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007299.3:c.*209_*212del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007299.3:c.*209_*212del']['alt_genomic_loci'] == [] + assert results['NM_007299.3:c.*209_*212del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 5, mRNA' + assert results['NM_007299.3:c.*209_*212del']['gene_symbol'] == 'BRCA1' + assert results['NM_007299.3:c.*209_*212del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009230.2:p.?', 'slr': 'NP_009230.2:p.?'} + assert results['NM_007299.3:c.*209_*212del']['submitted_variant'] == '17-41197588-GGACA-G' + assert results['NM_007299.3:c.*209_*212del']['genome_context_intronic_sequence'] == '' + assert results['NM_007299.3:c.*209_*212del']['hgvs_lrg_variant'] == '' + assert results['NM_007299.3:c.*209_*212del']['hgvs_transcript_variant'] == 'NM_007299.3:c.*209_*212del' + assert results['NM_007299.3:c.*209_*212del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3'} + + assert 'NM_007298.3:c.*103_*106del' in results.keys() + assert results['NM_007298.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007298.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007298.3:c.*103_*106del']['alt_genomic_loci'] == [] + assert results['NM_007298.3:c.*103_*106del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 4, mRNA' + assert results['NM_007298.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' + assert results['NM_007298.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009229.2:p.?', 'slr': 'NP_009229.2:p.?'} + assert results['NM_007298.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' + assert results['NM_007298.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' + assert results['NM_007298.3:c.*103_*106del']['hgvs_lrg_variant'] == '' + assert results['NM_007298.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007298.3:c.*103_*106del' + assert results['NM_007298.3:c.*103_*106del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3'} + + + def test_variant255(self): + variant = '17-41256884-C-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_007299.3:c.301+1G>C' in results.keys() + assert results['NM_007299.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007299.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007299.3:c.301+1G>C']['alt_genomic_loci'] == [] + assert results['NM_007299.3:c.301+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 5, mRNA' + assert results['NM_007299.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NM_007299.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009230.2:p.?', 'slr': 'NP_009230.2:p.?'} + assert results['NM_007299.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' + assert results['NM_007299.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007299.3):c.301+1G>C' + assert results['NM_007299.3:c.301+1G>C']['hgvs_lrg_variant'] == '' + assert results['NM_007299.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007299.3:c.301+1G>C' + assert results['NM_007299.3:c.301+1G>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007299.3:c.301+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3'} + + assert 'NR_027676.1:n.440+1G>C' in results.keys() + assert results['NR_027676.1:n.440+1G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_027676.1:n.440+1G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_027676.1:n.440+1G>C']['alt_genomic_loci'] == [] + assert results['NR_027676.1:n.440+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 6, non-coding RNA' + assert results['NR_027676.1:n.440+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NR_027676.1:n.440+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_027676.1:n.440+1G>C']['submitted_variant'] == '17-41256884-C-G' + assert results['NR_027676.1:n.440+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NR_027676.1):c.440+1G>C' + assert results['NR_027676.1:n.440+1G>C']['hgvs_lrg_variant'] == '' + assert results['NR_027676.1:n.440+1G>C']['hgvs_transcript_variant'] == 'NR_027676.1:n.440+1G>C' + assert results['NR_027676.1:n.440+1G>C']['hgvs_refseqgene_variant'] == '' + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NR_027676.1:n.440+1G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_027676.1'} + + assert 'NM_007300.3:c.301+1G>C' in results.keys() + assert results['NM_007300.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007300.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007300.3:c.301+1G>C']['alt_genomic_loci'] == [] + assert results['NM_007300.3:c.301+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 2, mRNA' + assert results['NM_007300.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NM_007300.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009231.2:p.?', 'slr': 'NP_009231.2:p.?'} + assert results['NM_007300.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' + assert results['NM_007300.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007300.3):c.301+1G>C' + assert results['NM_007300.3:c.301+1G>C']['hgvs_lrg_variant'] == '' + assert results['NM_007300.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007300.3:c.301+1G>C' + assert results['NM_007300.3:c.301+1G>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007300.3:c.301+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3'} + + assert 'NM_007298.3:c.301+1G>C' in results.keys() + assert results['NM_007298.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007298.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007298.3:c.301+1G>C']['alt_genomic_loci'] == [] + assert results['NM_007298.3:c.301+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 4, mRNA' + assert results['NM_007298.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NM_007298.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009229.2:p.?', 'slr': 'NP_009229.2:p.?'} + assert results['NM_007298.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' + assert results['NM_007298.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007298.3):c.301+1G>C' + assert results['NM_007298.3:c.301+1G>C']['hgvs_lrg_variant'] == '' + assert results['NM_007298.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007298.3:c.301+1G>C' + assert results['NM_007298.3:c.301+1G>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007298.3:c.301+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3'} + + assert 'NM_007297.3:c.160+1G>C' in results.keys() + assert results['NM_007297.3:c.160+1G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007297.3:c.160+1G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007297.3:c.160+1G>C']['alt_genomic_loci'] == [] + assert results['NM_007297.3:c.160+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 3, mRNA' + assert results['NM_007297.3:c.160+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NM_007297.3:c.160+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009228.2:p.?', 'slr': 'NP_009228.2:p.?'} + assert results['NM_007297.3:c.160+1G>C']['submitted_variant'] == '17-41256884-C-G' + assert results['NM_007297.3:c.160+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007297.3):c.160+1G>C' + assert results['NM_007297.3:c.160+1G>C']['hgvs_lrg_variant'] == '' + assert results['NM_007297.3:c.160+1G>C']['hgvs_transcript_variant'] == 'NM_007297.3:c.160+1G>C' + assert results['NM_007297.3:c.160+1G>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007297.3:c.160+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3'} + + assert results['flag'] == 'gene_variant' + assert 'NM_007294.3:c.301+1G>C' in results.keys() + assert results['NM_007294.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == 'LRG_292t1:c.301+1G>C' + assert results['NM_007294.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == 'NG_005905.2(NM_007294.3):c.301+1G>C' + assert results['NM_007294.3:c.301+1G>C']['alt_genomic_loci'] == [] + assert results['NM_007294.3:c.301+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 1, mRNA' + assert results['NM_007294.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NM_007294.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009225.1(LRG_292p1):p.?', 'slr': 'NP_009225.1:p.?'} + assert results['NM_007294.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' + assert results['NM_007294.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007294.3):c.301+1G>C' + assert results['NM_007294.3:c.301+1G>C']['hgvs_lrg_variant'] == 'LRG_292:g.113117G>C' + assert results['NM_007294.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007294.3:c.301+1G>C' + assert results['NM_007294.3:c.301+1G>C']['hgvs_refseqgene_variant'] == 'NG_005905.2:g.113117G>C' + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007294.3:c.301+1G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005905.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_292.xml'} + + + def test_variant256(self): + variant = '17-42991428-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001363846.1:c.490G>T' in results.keys() + assert results['NM_001363846.1:c.490G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363846.1:c.490G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363846.1:c.490G>T']['alt_genomic_loci'] == [] + assert results['NM_001363846.1:c.490G>T']['transcript_description'] == 'Homo sapiens glial fibrillary acidic protein (GFAP), transcript variant 4, mRNA' + assert results['NM_001363846.1:c.490G>T']['gene_symbol'] == 'GFAP' + assert results['NM_001363846.1:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350775.1:p.(Glu164Ter)', 'slr': 'NP_001350775.1:p.(E164*)'} + assert results['NM_001363846.1:c.490G>T']['submitted_variant'] == '17-42991428-C-A' + assert results['NM_001363846.1:c.490G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001363846.1:c.490G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001363846.1:c.490G>T']['hgvs_transcript_variant'] == 'NM_001363846.1:c.490G>T' + assert results['NM_001363846.1:c.490G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363846.1:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} + assert 'hg38' not in results['NM_001363846.1:c.490G>T']['primary_assembly_loci'].keys() + assert results['NM_001363846.1:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} + assert 'grch38' not in results['NM_001363846.1:c.490G>T']['primary_assembly_loci'].keys() + assert results['NM_001363846.1:c.490G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350775.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363846.1'} + + assert 'NM_001131019.2:c.490G>T' in results.keys() + assert results['NM_001131019.2:c.490G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001131019.2:c.490G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001131019.2:c.490G>T']['alt_genomic_loci'] == [] + assert results['NM_001131019.2:c.490G>T']['transcript_description'] == 'Homo sapiens glial fibrillary acidic protein (GFAP), transcript variant 2, mRNA' + assert results['NM_001131019.2:c.490G>T']['gene_symbol'] == 'GFAP' + assert results['NM_001131019.2:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124491.1:p.(Glu164Ter)', 'slr': 'NP_001124491.1:p.(E164*)'} + assert results['NM_001131019.2:c.490G>T']['submitted_variant'] == '17-42991428-C-A' + assert results['NM_001131019.2:c.490G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001131019.2:c.490G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001131019.2:c.490G>T']['hgvs_transcript_variant'] == 'NM_001131019.2:c.490G>T' + assert results['NM_001131019.2:c.490G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} + assert results['NM_001131019.2:c.490G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124491.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001131019.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001242376.1:c.490G>T' in results.keys() + assert results['NM_001242376.1:c.490G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001242376.1:c.490G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001242376.1:c.490G>T']['alt_genomic_loci'] == [] + assert results['NM_001242376.1:c.490G>T']['transcript_description'] == 'Homo sapiens glial fibrillary acidic protein (GFAP), transcript variant 3, mRNA' + assert results['NM_001242376.1:c.490G>T']['gene_symbol'] == 'GFAP' + assert results['NM_001242376.1:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001229305.1:p.(Glu164Ter)', 'slr': 'NP_001229305.1:p.(E164*)'} + assert results['NM_001242376.1:c.490G>T']['submitted_variant'] == '17-42991428-C-A' + assert results['NM_001242376.1:c.490G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001242376.1:c.490G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001242376.1:c.490G>T']['hgvs_transcript_variant'] == 'NM_001242376.1:c.490G>T' + assert results['NM_001242376.1:c.490G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} + assert results['NM_001242376.1:c.490G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001229305.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001242376.1'} + + assert 'NM_002055.4:c.490G>T' in results.keys() + assert results['NM_002055.4:c.490G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_002055.4:c.490G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_002055.4:c.490G>T']['alt_genomic_loci'] == [] + assert results['NM_002055.4:c.490G>T']['transcript_description'] == 'Homo sapiens glial fibrillary acidic protein (GFAP), transcript variant 1, mRNA' + assert results['NM_002055.4:c.490G>T']['gene_symbol'] == 'GFAP' + assert results['NM_002055.4:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002046.1:p.(Glu164Ter)', 'slr': 'NP_002046.1:p.(E164*)'} + assert results['NM_002055.4:c.490G>T']['submitted_variant'] == '17-42991428-C-A' + assert results['NM_002055.4:c.490G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_002055.4:c.490G>T']['hgvs_lrg_variant'] == '' + assert results['NM_002055.4:c.490G>T']['hgvs_transcript_variant'] == 'NM_002055.4:c.490G>T' + assert results['NM_002055.4:c.490G>T']['hgvs_refseqgene_variant'] == 'NG_008401.1:g.6487G>T' + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} + assert results['NM_002055.4:c.490G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008401.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002046.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002055.4'} + + + def test_variant257(self): + variant = '17-48252809-A-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NR_135553.1:n.1022A>T' in results.keys() + assert results['NR_135553.1:n.1022A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_135553.1:n.1022A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_135553.1:n.1022A>T']['alt_genomic_loci'] == [] + assert results['NR_135553.1:n.1022A>T']['transcript_description'] == 'Homo sapiens sarcoglycan alpha (SGCA), transcript variant 3, non-coding RNA' + assert results['NR_135553.1:n.1022A>T']['gene_symbol'] == 'SGCA' + assert results['NR_135553.1:n.1022A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_135553.1:n.1022A>T']['submitted_variant'] == '17-48252809-A-T' + assert results['NR_135553.1:n.1022A>T']['genome_context_intronic_sequence'] == '' + assert results['NR_135553.1:n.1022A>T']['hgvs_lrg_variant'] == '' + assert results['NR_135553.1:n.1022A>T']['hgvs_transcript_variant'] == 'NR_135553.1:n.1022A>T' + assert results['NR_135553.1:n.1022A>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} + assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} + assert results['NR_135553.1:n.1022A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_135553.1'} + + assert 'NM_001135697.1:c.*11A>T' in results.keys() + assert results['NM_001135697.1:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135697.1:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001135697.1:c.*11A>T']['alt_genomic_loci'] == [] + assert results['NM_001135697.1:c.*11A>T']['transcript_description'] == 'Homo sapiens sarcoglycan alpha (SGCA), transcript variant 2, mRNA' + assert results['NM_001135697.1:c.*11A>T']['gene_symbol'] == 'SGCA' + assert results['NM_001135697.1:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129169.1:p.?', 'slr': 'NP_001129169.1:p.?'} + assert results['NM_001135697.1:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' + assert results['NM_001135697.1:c.*11A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001135697.1:c.*11A>T']['hgvs_lrg_variant'] == '' + assert results['NM_001135697.1:c.*11A>T']['hgvs_transcript_variant'] == 'NM_001135697.1:c.*11A>T' + assert results['NM_001135697.1:c.*11A>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001135697.1:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert 'hg38' not in results['NM_001135697.1:c.*11A>T']['primary_assembly_loci'].keys() + assert results['NM_001135697.1:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert 'grch38' not in results['NM_001135697.1:c.*11A>T']['primary_assembly_loci'].keys() + assert results['NM_001135697.1:c.*11A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000023.2:c.*11A>T' in results.keys() + assert results['NM_000023.2:c.*11A>T']['hgvs_lrg_transcript_variant'] == 'LRG_203t1:c.*11A>T' + assert results['NM_000023.2:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000023.2:c.*11A>T']['alt_genomic_loci'] == [] + assert results['NM_000023.2:c.*11A>T']['transcript_description'] == 'Homo sapiens sarcoglycan alpha (SGCA), transcript variant 1, mRNA' + assert results['NM_000023.2:c.*11A>T']['gene_symbol'] == 'SGCA' + assert results['NM_000023.2:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000014.1(LRG_203p1):p.?', 'slr': 'NP_000014.1:p.?'} + assert results['NM_000023.2:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' + assert results['NM_000023.2:c.*11A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000023.2:c.*11A>T']['hgvs_lrg_variant'] == 'LRG_203:g.14444A>T' + assert results['NM_000023.2:c.*11A>T']['hgvs_transcript_variant'] == 'NM_000023.2:c.*11A>T' + assert results['NM_000023.2:c.*11A>T']['hgvs_refseqgene_variant'] == 'NG_008889.1:g.14444A>T' + assert results['NM_000023.2:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert 'hg38' not in results['NM_000023.2:c.*11A>T']['primary_assembly_loci'].keys() + assert results['NM_000023.2:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert 'grch38' not in results['NM_000023.2:c.*11A>T']['primary_assembly_loci'].keys() + assert results['NM_000023.2:c.*11A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008889.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000023.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_203.xml'} + + assert 'NM_001135697.2:c.*11A>T' in results.keys() + assert results['NM_001135697.2:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135697.2:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001135697.2:c.*11A>T']['alt_genomic_loci'] == [] + assert results['NM_001135697.2:c.*11A>T']['transcript_description'] == 'Homo sapiens sarcoglycan alpha (SGCA), transcript variant 2, mRNA' + assert results['NM_001135697.2:c.*11A>T']['gene_symbol'] == 'SGCA' + assert results['NM_001135697.2:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129169.1:p.?', 'slr': 'NP_001129169.1:p.?'} + assert results['NM_001135697.2:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' + assert results['NM_001135697.2:c.*11A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001135697.2:c.*11A>T']['hgvs_lrg_variant'] == '' + assert results['NM_001135697.2:c.*11A>T']['hgvs_transcript_variant'] == 'NM_001135697.2:c.*11A>T' + assert results['NM_001135697.2:c.*11A>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} + assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} + assert results['NM_001135697.2:c.*11A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.2'} + + assert 'NM_000023.3:c.*11A>T' in results.keys() + assert results['NM_000023.3:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000023.3:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000023.3:c.*11A>T']['alt_genomic_loci'] == [] + assert results['NM_000023.3:c.*11A>T']['transcript_description'] == 'Homo sapiens sarcoglycan alpha (SGCA), transcript variant 1, mRNA' + assert results['NM_000023.3:c.*11A>T']['gene_symbol'] == 'SGCA' + assert results['NM_000023.3:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000014.1(LRG_203p1):p.?', 'slr': 'NP_000014.1:p.?'} + assert results['NM_000023.3:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' + assert results['NM_000023.3:c.*11A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000023.3:c.*11A>T']['hgvs_lrg_variant'] == '' + assert results['NM_000023.3:c.*11A>T']['hgvs_transcript_variant'] == 'NM_000023.3:c.*11A>T' + assert results['NM_000023.3:c.*11A>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} + assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} + assert results['NM_000023.3:c.*11A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000023.3'} + + + def test_variant258(self): + variant = '17-62022709-G-GTC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000334.4:c.3720+9_3720+10dup' in results.keys() + assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000334.4:c.3720+9_3720+10dup']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3720+9_3720+10dup' + assert results['NM_000334.4:c.3720+9_3720+10dup']['alt_genomic_loci'] == [] + assert results['NM_000334.4:c.3720+9_3720+10dup']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA' + assert results['NM_000334.4:c.3720+9_3720+10dup']['gene_symbol'] == 'SCN4A' + assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} + assert results['NM_000334.4:c.3720+9_3720+10dup']['submitted_variant'] == '17-62022709-G-GTC' + assert results['NM_000334.4:c.3720+9_3720+10dup']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3720+9_3720+10dup' + assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_lrg_variant'] == '' + assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_transcript_variant'] == 'NM_000334.4:c.3720+9_3720+10dup' + assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.32568_32569dup' + assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022710_62022711dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '62022709', 'alt': 'GTC'}} + assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945350_63945351dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '63945349', 'alt': 'GTC'}} + assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022710_62022711dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '62022709', 'alt': 'GTC'}} + assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945350_63945351dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '63945349', 'alt': 'GTC'}} + assert results['NM_000334.4:c.3720+9_3720+10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} + + + def test_variant259(self): + variant = '17-62022711-C-CT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000334.4:c.3720+8_3720+9insA' in results.keys() + assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000334.4:c.3720+8_3720+9insA']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3720+8_3720+9insA' + assert results['NM_000334.4:c.3720+8_3720+9insA']['alt_genomic_loci'] == [] + assert results['NM_000334.4:c.3720+8_3720+9insA']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA' + assert results['NM_000334.4:c.3720+8_3720+9insA']['gene_symbol'] == 'SCN4A' + assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} + assert results['NM_000334.4:c.3720+8_3720+9insA']['submitted_variant'] == '17-62022711-C-CT' + assert results['NM_000334.4:c.3720+8_3720+9insA']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3720+8_3720+9insA' + assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_lrg_variant'] == '' + assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_transcript_variant'] == 'NM_000334.4:c.3720+8_3720+9insA' + assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.32567_32568insA' + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022711_62022712insT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '62022711', 'alt': u'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945351_63945352insT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '63945351', 'alt': u'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022711_62022712insT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '62022711', 'alt': u'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945351_63945352insT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '63945351', 'alt': u'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} + + + def test_variant260(self): + variant = '17-62023005-G-GGC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000334.4:c.3442-8_3442-7insGC' in results.keys() + assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3442-8_3442-7insGC' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['alt_genomic_loci'] == [] + assert results['NM_000334.4:c.3442-8_3442-7insGC']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['gene_symbol'] == 'SCN4A' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['submitted_variant'] == '17-62023005-G-GGC' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3442-8_3442-7insGC' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_lrg_variant'] == '' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_transcript_variant'] == 'NM_000334.4:c.3442-8_3442-7insGC' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.32273_32274insGC' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023005_62023006insGC', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '62023005', 'alt': u'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945645_63945646insGC', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '63945645', 'alt': u'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023005_62023006insGC', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '62023005', 'alt': u'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945645_63945646insGC', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '63945645', 'alt': u'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} + + + def test_variant261(self): + variant = '17-62023006-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000334.4:c.3442-8G>T' in results.keys() + assert results['NM_000334.4:c.3442-8G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000334.4:c.3442-8G>T']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3442-8G>T' + assert results['NM_000334.4:c.3442-8G>T']['alt_genomic_loci'] == [] + assert results['NM_000334.4:c.3442-8G>T']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA' + assert results['NM_000334.4:c.3442-8G>T']['gene_symbol'] == 'SCN4A' + assert results['NM_000334.4:c.3442-8G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} + assert results['NM_000334.4:c.3442-8G>T']['submitted_variant'] == '17-62023006-C-A' + assert results['NM_000334.4:c.3442-8G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3442-8G>T' + assert results['NM_000334.4:c.3442-8G>T']['hgvs_lrg_variant'] == '' + assert results['NM_000334.4:c.3442-8G>T']['hgvs_transcript_variant'] == 'NM_000334.4:c.3442-8G>T' + assert results['NM_000334.4:c.3442-8G>T']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.32273G>T' + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023006C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '62023006', 'alt': u'A'}} + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945646C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '63945646', 'alt': u'A'}} + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023006C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '62023006', 'alt': u'A'}} + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945646C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '63945646', 'alt': u'A'}} + assert results['NM_000334.4:c.3442-8G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} + + + def test_variant262(self): + variant = '17-62034787-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000334.4:c.2111C>T' in results.keys() + assert results['NM_000334.4:c.2111C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000334.4:c.2111C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000334.4:c.2111C>T']['alt_genomic_loci'] == [] + assert results['NM_000334.4:c.2111C>T']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA' + assert results['NM_000334.4:c.2111C>T']['gene_symbol'] == 'SCN4A' + assert results['NM_000334.4:c.2111C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.(Thr704Met)', 'slr': 'NP_000325.4:p.(T704M)'} + assert results['NM_000334.4:c.2111C>T']['submitted_variant'] == '17-62034787-G-A' + assert results['NM_000334.4:c.2111C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000334.4:c.2111C>T']['hgvs_lrg_variant'] == '' + assert results['NM_000334.4:c.2111C>T']['hgvs_transcript_variant'] == 'NM_000334.4:c.2111C>T' + assert results['NM_000334.4:c.2111C>T']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.20492C>T' + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62034787G>A', 'vcf': {'chr': 'chr17', 'ref': u'G', 'pos': '62034787', 'alt': u'A'}} + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63957427G>A', 'vcf': {'chr': 'chr17', 'ref': u'G', 'pos': '63957427', 'alt': u'A'}} + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62034787G>A', 'vcf': {'chr': '17', 'ref': u'G', 'pos': '62034787', 'alt': u'A'}} + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63957427G>A', 'vcf': {'chr': '17', 'ref': u'G', 'pos': '63957427', 'alt': u'A'}} + assert results['NM_000334.4:c.2111C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} + + + def test_variant263(self): + variant = '18-24128261-GTCCTCC-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001351443.1:c.-16+941_-16+946del' in results.keys() + assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001351443.1:c.-16+941_-16+946del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001351443.1:c.-16+941_-16+946del']['alt_genomic_loci'] == [] + assert results['NM_001351443.1:c.-16+941_-16+946del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 6, mRNA' + assert results['NM_001351443.1:c.-16+941_-16+946del']['gene_symbol'] == 'KCTD1' + assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001338372.1:p.?', 'slr': 'NP_001338372.1:p.?'} + assert results['NM_001351443.1:c.-16+941_-16+946del']['submitted_variant'] == '18-24128261-GTCCTCC-G' + assert results['NM_001351443.1:c.-16+941_-16+946del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001351443.1):c.-16+941_-16+946del' + assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_lrg_variant'] == '' + assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_transcript_variant'] == 'NM_001351443.1:c.-16+941_-16+946del' + assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338372.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351443.1'} + + assert 'NM_001258222.1:c.10-47053_10-47048del' in results.keys() + assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001258222.1:c.10-47053_10-47048del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001258222.1:c.10-47053_10-47048del']['alt_genomic_loci'] == [] + assert results['NM_001258222.1:c.10-47053_10-47048del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 5, mRNA' + assert results['NM_001258222.1:c.10-47053_10-47048del']['gene_symbol'] == 'KCTD1' + assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245151.1:p.?', 'slr': 'NP_001245151.1:p.?'} + assert results['NM_001258222.1:c.10-47053_10-47048del']['submitted_variant'] == '18-24128261-GTCCTCC-G' + assert results['NM_001258222.1:c.10-47053_10-47048del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001258222.1):c.10-47053_10-47048del' + assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_lrg_variant'] == '' + assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_transcript_variant'] == 'NM_001258222.1:c.10-47053_10-47048del' + assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.1'} + + assert 'NM_001258221.1:c.-16+1426_-16+1431del' in results.keys() + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['alt_genomic_loci'] == [] + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 4, mRNA' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['gene_symbol'] == 'KCTD1' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245150.1:p.?', 'slr': 'NP_001245150.1:p.?'} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['submitted_variant'] == '18-24128261-GTCCTCC-G' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001258221.1):c.-16+1426_-16+1431del' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_lrg_variant'] == '' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_transcript_variant'] == 'NM_001258221.1:c.-16+1426_-16+1431del' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245150.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258221.1'} + + assert 'NM_001258222.2:c.10-47053_10-47048del' in results.keys() + assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001258222.2:c.10-47053_10-47048del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001258222.2:c.10-47053_10-47048del']['alt_genomic_loci'] == [] + assert results['NM_001258222.2:c.10-47053_10-47048del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 5, mRNA' + assert results['NM_001258222.2:c.10-47053_10-47048del']['gene_symbol'] == 'KCTD1' + assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245151.1:p.?', 'slr': 'NP_001245151.1:p.?'} + assert results['NM_001258222.2:c.10-47053_10-47048del']['submitted_variant'] == '18-24128261-GTCCTCC-G' + assert results['NM_001258222.2:c.10-47053_10-47048del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001258222.2):c.10-47053_10-47048del' + assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_lrg_variant'] == '' + assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_transcript_variant'] == 'NM_001258222.2:c.10-47053_10-47048del' + assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001136205.2:c.-16+588_-16+593del' in results.keys() + assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001136205.2:c.-16+588_-16+593del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001136205.2:c.-16+588_-16+593del']['alt_genomic_loci'] == [] + assert results['NM_001136205.2:c.-16+588_-16+593del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 1, mRNA' + assert results['NM_001136205.2:c.-16+588_-16+593del']['gene_symbol'] == 'KCTD1' + assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129677.1:p.?', 'slr': 'NP_001129677.1:p.?'} + assert results['NM_001136205.2:c.-16+588_-16+593del']['submitted_variant'] == '18-24128261-GTCCTCC-G' + assert results['NM_001136205.2:c.-16+588_-16+593del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001136205.2):c.-16+588_-16+593del' + assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_lrg_variant'] == '' + assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_transcript_variant'] == 'NM_001136205.2:c.-16+588_-16+593del' + assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129677.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001136205.2'} + + assert 'NM_198991.3:c.-15-47053_-15-47048del' in results.keys() + assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['alt_genomic_loci'] == [] + assert results['NM_198991.3:c.-15-47053_-15-47048del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 2, mRNA' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['gene_symbol'] == 'KCTD1' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_945342.1:p.?', 'slr': 'NP_945342.1:p.?'} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['submitted_variant'] == '18-24128261-GTCCTCC-G' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_198991.3):c.-15-47053_-15-47048del' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_lrg_variant'] == '' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_transcript_variant'] == 'NM_198991.3:c.-15-47053_-15-47048del' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_refseqgene_variant'] == '' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_945342.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198991.3'} + + assert 'NM_001142730.2:c.234_239del' in results.keys() + assert results['NM_001142730.2:c.234_239del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001142730.2:c.234_239del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001142730.2:c.234_239del']['alt_genomic_loci'] == [] + assert results['NM_001142730.2:c.234_239del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 3, mRNA' + assert results['NM_001142730.2:c.234_239del']['gene_symbol'] == 'KCTD1' + assert results['NM_001142730.2:c.234_239del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001136202.1:p.(Glu78_Glu79del)', 'slr': 'NP_001136202.1:p.(E78_E79del)'} + assert results['NM_001142730.2:c.234_239del']['submitted_variant'] == '18-24128261-GTCCTCC-G' + assert results['NM_001142730.2:c.234_239del']['genome_context_intronic_sequence'] == '' + assert results['NM_001142730.2:c.234_239del']['hgvs_lrg_variant'] == '' + assert results['NM_001142730.2:c.234_239del']['hgvs_transcript_variant'] == 'NM_001142730.2:c.234_239del' + assert results['NM_001142730.2:c.234_239del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001136202.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001142730.2'} + + + def test_variant264(self): + variant = '19-15291774-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000435.2:c.2992C>T' in results.keys() + assert results['NM_000435.2:c.2992C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000435.2:c.2992C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000435.2:c.2992C>T']['alt_genomic_loci'] == [] + assert results['NM_000435.2:c.2992C>T']['transcript_description'] == 'Homo sapiens notch 3 (NOTCH3), mRNA' + assert results['NM_000435.2:c.2992C>T']['gene_symbol'] == 'NOTCH3' + assert results['NM_000435.2:c.2992C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000426.2:p.(Gln998Ter)', 'slr': 'NP_000426.2:p.(Q998*)'} + assert results['NM_000435.2:c.2992C>T']['submitted_variant'] == '19-15291774-G-A' + assert results['NM_000435.2:c.2992C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000435.2:c.2992C>T']['hgvs_lrg_variant'] == '' + assert results['NM_000435.2:c.2992C>T']['hgvs_transcript_variant'] == 'NM_000435.2:c.2992C>T' + assert results['NM_000435.2:c.2992C>T']['hgvs_refseqgene_variant'] == 'NG_009819.1:g.25019C>T' + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.15291774G>A', 'vcf': {'chr': 'chr19', 'ref': u'G', 'pos': '15291774', 'alt': u'A'}} + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15180963G>A', 'vcf': {'chr': 'chr19', 'ref': u'G', 'pos': '15180963', 'alt': u'A'}} + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.15291774G>A', 'vcf': {'chr': '19', 'ref': u'G', 'pos': '15291774', 'alt': u'A'}} + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15180963G>A', 'vcf': {'chr': '19', 'ref': u'G', 'pos': '15180963', 'alt': u'A'}} + assert results['NM_000435.2:c.2992C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009819.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000426.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000435.2'} + + + def test_variant265(self): + variant = '19-15311794-A-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'intergenic' + assert 'Intergenic_Variant_1' in results.keys() + assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' + assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' + assert results['Intergenic_Variant_1']['alt_genomic_loci'] == [] + assert results['Intergenic_Variant_1']['transcript_description'] == '' + assert results['Intergenic_Variant_1']['gene_symbol'] == '' + assert results['Intergenic_Variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['Intergenic_Variant_1']['submitted_variant'] == '19-15311794-A-G' + assert results['Intergenic_Variant_1']['genome_context_intronic_sequence'] == '' + assert results['Intergenic_Variant_1']['hgvs_lrg_variant'] == '' + assert results['Intergenic_Variant_1']['hgvs_transcript_variant'] == '' + assert results['Intergenic_Variant_1']['hgvs_refseqgene_variant'] == 'NG_009819.1:g.4999T>C' + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': u'NC_000019.9:g.15311794A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': u'NC_000019.10:g.15200983A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': u'NC_000019.9:g.15311794A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': u'NC_000019.10:g.15200983A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} + assert results['Intergenic_Variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009819.1'} + + + def test_variant266(self): + variant = '19-39076592-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000540.2:c.14818G>A' in results.keys() + assert results['NM_000540.2:c.14818G>A']['hgvs_lrg_transcript_variant'] == 'LRG_766t1:c.14818G>A' + assert results['NM_000540.2:c.14818G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000540.2:c.14818G>A']['alt_genomic_loci'] == [] + assert results['NM_000540.2:c.14818G>A']['transcript_description'] == 'Homo sapiens ryanodine receptor 1 (RYR1), transcript variant 1, mRNA' + assert results['NM_000540.2:c.14818G>A']['gene_symbol'] == 'RYR1' + assert results['NM_000540.2:c.14818G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000531.2(LRG_766p1):p.(Ala4940Thr)', 'slr': 'NP_000531.2:p.(A4940T)'} + assert results['NM_000540.2:c.14818G>A']['submitted_variant'] == '19-39076592-G-A' + assert results['NM_000540.2:c.14818G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_000540.2:c.14818G>A']['hgvs_lrg_variant'] == 'LRG_766:g.157253G>A' + assert results['NM_000540.2:c.14818G>A']['hgvs_transcript_variant'] == 'NM_000540.2:c.14818G>A' + assert results['NM_000540.2:c.14818G>A']['hgvs_refseqgene_variant'] == 'NG_008866.1:g.157253G>A' + assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '39076592', 'alt': 'A'}} + assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '38585952', 'alt': 'A'}} + assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '39076592', 'alt': 'A'}} + assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '38585952', 'alt': 'A'}} + assert results['NM_000540.2:c.14818G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008866.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000531.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000540.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_766.xml'} + + assert 'NM_001042723.1:c.14803G>A' in results.keys() + assert results['NM_001042723.1:c.14803G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042723.1:c.14803G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042723.1:c.14803G>A']['alt_genomic_loci'] == [] + assert results['NM_001042723.1:c.14803G>A']['transcript_description'] == 'Homo sapiens ryanodine receptor 1 (RYR1), transcript variant 2, mRNA' + assert results['NM_001042723.1:c.14803G>A']['gene_symbol'] == 'RYR1' + assert results['NM_001042723.1:c.14803G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036188.1:p.(Ala4935Thr)', 'slr': 'NP_001036188.1:p.(A4935T)'} + assert results['NM_001042723.1:c.14803G>A']['submitted_variant'] == '19-39076592-G-A' + assert results['NM_001042723.1:c.14803G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001042723.1:c.14803G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001042723.1:c.14803G>A']['hgvs_transcript_variant'] == 'NM_001042723.1:c.14803G>A' + assert results['NM_001042723.1:c.14803G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '39076592', 'alt': 'A'}} + assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '38585952', 'alt': 'A'}} + assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '39076592', 'alt': 'A'}} + assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '38585952', 'alt': 'A'}} + assert results['NM_001042723.1:c.14803G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036188.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042723.1'} + + + def test_variant267(self): + variant = '2-50149352-T-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001330086.1:c.4245A>G' in results.keys() + assert results['NM_001330086.1:c.4245A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330086.1:c.4245A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330086.1:c.4245A>G']['alt_genomic_loci'] == [] + assert results['NM_001330086.1:c.4245A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha9, mRNA' + assert results['NM_001330086.1:c.4245A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330086.1:c.4245A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317015.1:p.(Pro1415=)', 'slr': 'NP_001317015.1:p.(P1415=)'} + assert results['NM_001330086.1:c.4245A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330086.1:c.4245A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330086.1:c.4245A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330086.1:c.4245A>G']['hgvs_transcript_variant'] == 'NM_001330086.1:c.4245A>G' + assert results['NM_001330086.1:c.4245A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330086.1:c.4245A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1'} + + assert 'NM_001330083.1:c.4089A>G' in results.keys() + assert results['NM_001330083.1:c.4089A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330083.1:c.4089A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330083.1:c.4089A>G']['alt_genomic_loci'] == [] + assert results['NM_001330083.1:c.4089A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha6, mRNA' + assert results['NM_001330083.1:c.4089A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330083.1:c.4089A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317012.1:p.(Pro1363=)', 'slr': 'NP_001317012.1:p.(P1363=)'} + assert results['NM_001330083.1:c.4089A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330083.1:c.4089A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330083.1:c.4089A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330083.1:c.4089A>G']['hgvs_transcript_variant'] == 'NM_001330083.1:c.4089A>G' + assert results['NM_001330083.1:c.4089A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330083.1:c.4089A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1'} + + assert 'NM_001330095.1:c.4113A>G' in results.keys() + assert results['NM_001330095.1:c.4113A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330095.1:c.4113A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330095.1:c.4113A>G']['alt_genomic_loci'] == [] + assert results['NM_001330095.1:c.4113A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha14, mRNA' + assert results['NM_001330095.1:c.4113A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330095.1:c.4113A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317024.1:p.(Pro1371=)', 'slr': 'NP_001317024.1:p.(P1371=)'} + assert results['NM_001330095.1:c.4113A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330095.1:c.4113A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330095.1:c.4113A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330095.1:c.4113A>G']['hgvs_transcript_variant'] == 'NM_001330095.1:c.4113A>G' + assert results['NM_001330095.1:c.4113A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330095.1:c.4113A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1'} + + assert 'NM_138735.2:c.1059A>G' in results.keys() + assert results['NM_138735.2:c.1059A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_138735.2:c.1059A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_138735.2:c.1059A>G']['alt_genomic_loci'] == [] + assert results['NM_138735.2:c.1059A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant beta, mRNA' + assert results['NM_138735.2:c.1059A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_138735.2:c.1059A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_620072.1:p.(Pro353=)', 'slr': 'NP_620072.1:p.(P353=)'} + assert results['NM_138735.2:c.1059A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_138735.2:c.1059A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_138735.2:c.1059A>G']['hgvs_lrg_variant'] == '' + assert results['NM_138735.2:c.1059A>G']['hgvs_transcript_variant'] == 'NM_138735.2:c.1059A>G' + assert results['NM_138735.2:c.1059A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_138735.2:c.1059A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.2'} + + assert 'NM_001330078.1:c.4254A>G' in results.keys() + assert results['NM_001330078.1:c.4254A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330078.1:c.4254A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330078.1:c.4254A>G']['alt_genomic_loci'] == [] + assert results['NM_001330078.1:c.4254A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha4, mRNA' + assert results['NM_001330078.1:c.4254A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330078.1:c.4254A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317007.1:p.(Pro1418=)', 'slr': 'NP_001317007.1:p.(P1418=)'} + assert results['NM_001330078.1:c.4254A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330078.1:c.4254A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330078.1:c.4254A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330078.1:c.4254A>G']['hgvs_transcript_variant'] == 'NM_001330078.1:c.4254A>G' + assert results['NM_001330078.1:c.4254A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330078.1:c.4254A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1'} + + assert 'NM_001330094.1:c.4233A>G' in results.keys() + assert results['NM_001330094.1:c.4233A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330094.1:c.4233A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330094.1:c.4233A>G']['alt_genomic_loci'] == [] + assert results['NM_001330094.1:c.4233A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha13, mRNA' + assert results['NM_001330094.1:c.4233A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330094.1:c.4233A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317023.1:p.(Pro1411=)', 'slr': 'NP_001317023.1:p.(P1411=)'} + assert results['NM_001330094.1:c.4233A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330094.1:c.4233A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330094.1:c.4233A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330094.1:c.4233A>G']['hgvs_transcript_variant'] == 'NM_001330094.1:c.4233A>G' + assert results['NM_001330094.1:c.4233A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330094.1:c.4233A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1'} + + assert 'NM_001320157.3:c.150A>G' in results.keys() + assert results['NM_001320157.3:c.150A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001320157.3:c.150A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001320157.3:c.150A>G']['alt_genomic_loci'] == [] + assert results['NM_001320157.3:c.150A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant gamma2, mRNA' + assert results['NM_001320157.3:c.150A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001320157.3:c.150A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307086.1:p.(Pro50=)', 'slr': 'NP_001307086.1:p.(P50=)'} + assert results['NM_001320157.3:c.150A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001320157.3:c.150A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001320157.3:c.150A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001320157.3:c.150A>G']['hgvs_transcript_variant'] == 'NM_001320157.3:c.150A>G' + assert results['NM_001320157.3:c.150A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320157.3:c.150A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.3'} + + assert 'NM_001330088.1:c.4074A>G' in results.keys() + assert results['NM_001330088.1:c.4074A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330088.1:c.4074A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330088.1:c.4074A>G']['alt_genomic_loci'] == [] + assert results['NM_001330088.1:c.4074A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha11, mRNA' + assert results['NM_001330088.1:c.4074A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330088.1:c.4074A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317017.1:p.(Pro1358=)', 'slr': 'NP_001317017.1:p.(P1358=)'} + assert results['NM_001330088.1:c.4074A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330088.1:c.4074A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330088.1:c.4074A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330088.1:c.4074A>G']['hgvs_transcript_variant'] == 'NM_001330088.1:c.4074A>G' + assert results['NM_001330088.1:c.4074A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330088.1:c.4074A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1'} + + assert 'NM_001330092.1:c.1149A>G' in results.keys() + assert results['NM_001330092.1:c.1149A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330092.1:c.1149A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330092.1:c.1149A>G']['alt_genomic_loci'] == [] + assert results['NM_001330092.1:c.1149A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant beta3, mRNA' + assert results['NM_001330092.1:c.1149A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330092.1:c.1149A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317021.1:p.(Pro383=)', 'slr': 'NP_001317021.1:p.(P383=)'} + assert results['NM_001330092.1:c.1149A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330092.1:c.1149A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330092.1:c.1149A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330092.1:c.1149A>G']['hgvs_transcript_variant'] == 'NM_001330092.1:c.1149A>G' + assert results['NM_001330092.1:c.1149A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330092.1:c.1149A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317021.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330092.1'} + + assert 'NM_138735.4:c.1059A>G' in results.keys() + assert results['NM_138735.4:c.1059A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_138735.4:c.1059A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_138735.4:c.1059A>G']['alt_genomic_loci'] == [] + assert results['NM_138735.4:c.1059A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant beta1, mRNA' + assert results['NM_138735.4:c.1059A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_138735.4:c.1059A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_620072.1:p.(Pro353=)', 'slr': 'NP_620072.1:p.(P353=)'} + assert results['NM_138735.4:c.1059A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_138735.4:c.1059A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_138735.4:c.1059A>G']['hgvs_lrg_variant'] == '' + assert results['NM_138735.4:c.1059A>G']['hgvs_transcript_variant'] == 'NM_138735.4:c.1059A>G' + assert results['NM_138735.4:c.1059A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_138735.4:c.1059A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.4'} + + assert 'NM_001330096.1:c.4044A>G' in results.keys() + assert results['NM_001330096.1:c.4044A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330096.1:c.4044A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330096.1:c.4044A>G']['alt_genomic_loci'] == [] + assert results['NM_001330096.1:c.4044A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha15, mRNA' + assert results['NM_001330096.1:c.4044A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330096.1:c.4044A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317025.1:p.(Pro1348=)', 'slr': 'NP_001317025.1:p.(P1348=)'} + assert results['NM_001330096.1:c.4044A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330096.1:c.4044A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330096.1:c.4044A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330096.1:c.4044A>G']['hgvs_transcript_variant'] == 'NM_001330096.1:c.4044A>G' + assert results['NM_001330096.1:c.4044A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330096.1:c.4044A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1'} + + assert 'NM_001135659.2:c.4374A>G' in results.keys() + assert results['NM_001135659.2:c.4374A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135659.2:c.4374A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001135659.2:c.4374A>G']['alt_genomic_loci'] == [] + assert results['NM_001135659.2:c.4374A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA' + assert results['NM_001135659.2:c.4374A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001135659.2:c.4374A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro1458=)', 'slr': 'NP_001129131.1:p.(P1458=)'} + assert results['NM_001135659.2:c.4374A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001135659.2:c.4374A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001135659.2:c.4374A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001135659.2:c.4374A>G']['hgvs_transcript_variant'] == 'NM_001135659.2:c.4374A>G' + assert results['NM_001135659.2:c.4374A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001135659.2:c.4374A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2'} + + assert 'NM_001330085.1:c.4227A>G' in results.keys() + assert results['NM_001330085.1:c.4227A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330085.1:c.4227A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330085.1:c.4227A>G']['alt_genomic_loci'] == [] + assert results['NM_001330085.1:c.4227A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha8, mRNA' + assert results['NM_001330085.1:c.4227A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330085.1:c.4227A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317014.1:p.(Pro1409=)', 'slr': 'NP_001317014.1:p.(P1409=)'} + assert results['NM_001330085.1:c.4227A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330085.1:c.4227A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330085.1:c.4227A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330085.1:c.4227A>G']['hgvs_transcript_variant'] == 'NM_001330085.1:c.4227A>G' + assert results['NM_001330085.1:c.4227A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330085.1:c.4227A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1'} + + assert 'NM_001320156.1:c.159A>G' in results.keys() + assert results['NM_001320156.1:c.159A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001320156.1:c.159A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001320156.1:c.159A>G']['alt_genomic_loci'] == [] + assert results['NM_001320156.1:c.159A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant gamma1, mRNA' + assert results['NM_001320156.1:c.159A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001320156.1:c.159A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307085.1:p.(Pro53=)', 'slr': 'NP_001307085.1:p.(P53=)'} + assert results['NM_001320156.1:c.159A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001320156.1:c.159A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001320156.1:c.159A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001320156.1:c.159A>G']['hgvs_transcript_variant'] == 'NM_001320156.1:c.159A>G' + assert results['NM_001320156.1:c.159A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320156.1:c.159A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.1'} + + assert 'NM_001330077.1:c.4230A>G' in results.keys() + assert results['NM_001330077.1:c.4230A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330077.1:c.4230A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330077.1:c.4230A>G']['alt_genomic_loci'] == [] + assert results['NM_001330077.1:c.4230A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha3, mRNA' + assert results['NM_001330077.1:c.4230A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330077.1:c.4230A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317006.1:p.(Pro1410=)', 'slr': 'NP_001317006.1:p.(P1410=)'} + assert results['NM_001330077.1:c.4230A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330077.1:c.4230A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330077.1:c.4230A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330077.1:c.4230A>G']['hgvs_transcript_variant'] == 'NM_001330077.1:c.4230A>G' + assert results['NM_001330077.1:c.4230A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330077.1:c.4230A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1'} + + assert 'NM_001330093.1:c.4251A>G' in results.keys() + assert results['NM_001330093.1:c.4251A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330093.1:c.4251A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330093.1:c.4251A>G']['alt_genomic_loci'] == [] + assert results['NM_001330093.1:c.4251A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha12, mRNA' + assert results['NM_001330093.1:c.4251A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330093.1:c.4251A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317022.1:p.(Pro1417=)', 'slr': 'NP_001317022.1:p.(P1417=)'} + assert results['NM_001330093.1:c.4251A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330093.1:c.4251A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330093.1:c.4251A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330093.1:c.4251A>G']['hgvs_transcript_variant'] == 'NM_001330093.1:c.4251A>G' + assert results['NM_001330093.1:c.4251A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330093.1:c.4251A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1'} + + assert 'NM_001135659.1:c.4374A>G' in results.keys() + assert results['NM_001135659.1:c.4374A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135659.1:c.4374A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001135659.1:c.4374A>G']['alt_genomic_loci'] == [] + assert results['NM_001135659.1:c.4374A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA' + assert results['NM_001135659.1:c.4374A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001135659.1:c.4374A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro1458=)', 'slr': 'NP_001129131.1:p.(P1458=)'} + assert results['NM_001135659.1:c.4374A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001135659.1:c.4374A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001135659.1:c.4374A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001135659.1:c.4374A>G']['hgvs_transcript_variant'] == 'NM_001135659.1:c.4374A>G' + assert results['NM_001135659.1:c.4374A>G']['hgvs_refseqgene_variant'] == 'NG_011878.1:g.1115323A>G' + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001135659.1:c.4374A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011878.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1'} + + assert 'NM_001320157.1:c.150A>G' in results.keys() + assert results['NM_001320157.1:c.150A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001320157.1:c.150A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001320157.1:c.150A>G']['alt_genomic_loci'] == [] + assert results['NM_001320157.1:c.150A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant gamma2, mRNA' + assert results['NM_001320157.1:c.150A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001320157.1:c.150A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307086.1:p.(Pro50=)', 'slr': 'NP_001307086.1:p.(P50=)'} + assert results['NM_001320157.1:c.150A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001320157.1:c.150A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001320157.1:c.150A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001320157.1:c.150A>G']['hgvs_transcript_variant'] == 'NM_001320157.1:c.150A>G' + assert results['NM_001320157.1:c.150A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320157.1:c.150A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.1'} + + assert 'NM_001330084.1:c.4188A>G' in results.keys() + assert results['NM_001330084.1:c.4188A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330084.1:c.4188A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330084.1:c.4188A>G']['alt_genomic_loci'] == [] + assert results['NM_001330084.1:c.4188A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha7, mRNA' + assert results['NM_001330084.1:c.4188A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330084.1:c.4188A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317013.1:p.(Pro1396=)', 'slr': 'NP_001317013.1:p.(P1396=)'} + assert results['NM_001330084.1:c.4188A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330084.1:c.4188A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330084.1:c.4188A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330084.1:c.4188A>G']['hgvs_transcript_variant'] == 'NM_001330084.1:c.4188A>G' + assert results['NM_001330084.1:c.4188A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330084.1:c.4188A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1'} + + assert 'NM_004801.4:c.4164A>G' in results.keys() + assert results['NM_004801.4:c.4164A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004801.4:c.4164A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004801.4:c.4164A>G']['alt_genomic_loci'] == [] + assert results['NM_004801.4:c.4164A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA' + assert results['NM_004801.4:c.4164A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_004801.4:c.4164A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro1388=)', 'slr': 'NP_004792.1:p.(P1388=)'} + assert results['NM_004801.4:c.4164A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_004801.4:c.4164A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_004801.4:c.4164A>G']['hgvs_lrg_variant'] == '' + assert results['NM_004801.4:c.4164A>G']['hgvs_transcript_variant'] == 'NM_004801.4:c.4164A>G' + assert results['NM_004801.4:c.4164A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_004801.4:c.4164A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4'} + + assert 'NM_001330082.1:c.4221A>G' in results.keys() + assert results['NM_001330082.1:c.4221A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330082.1:c.4221A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330082.1:c.4221A>G']['alt_genomic_loci'] == [] + assert results['NM_001330082.1:c.4221A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha5, mRNA' + assert results['NM_001330082.1:c.4221A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330082.1:c.4221A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317011.1:p.(Pro1407=)', 'slr': 'NP_001317011.1:p.(P1407=)'} + assert results['NM_001330082.1:c.4221A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330082.1:c.4221A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330082.1:c.4221A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330082.1:c.4221A>G']['hgvs_transcript_variant'] == 'NM_001330082.1:c.4221A>G' + assert results['NM_001330082.1:c.4221A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330082.1:c.4221A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001330091.1:c.1140A>G' in results.keys() + assert results['NM_001330091.1:c.1140A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330091.1:c.1140A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330091.1:c.1140A>G']['alt_genomic_loci'] == [] + assert results['NM_001330091.1:c.1140A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant beta2, mRNA' + assert results['NM_001330091.1:c.1140A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330091.1:c.1140A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317020.1:p.(Pro380=)', 'slr': 'NP_001317020.1:p.(P380=)'} + assert results['NM_001330091.1:c.1140A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330091.1:c.1140A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330091.1:c.1140A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330091.1:c.1140A>G']['hgvs_transcript_variant'] == 'NM_001330091.1:c.1140A>G' + assert results['NM_001330091.1:c.1140A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330091.1:c.1140A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317020.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330091.1'} + + assert 'NM_001320156.3:c.159A>G' in results.keys() + assert results['NM_001320156.3:c.159A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001320156.3:c.159A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001320156.3:c.159A>G']['alt_genomic_loci'] == [] + assert results['NM_001320156.3:c.159A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant gamma1, mRNA' + assert results['NM_001320156.3:c.159A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001320156.3:c.159A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307085.1:p.(Pro53=)', 'slr': 'NP_001307085.1:p.(P53=)'} + assert results['NM_001320156.3:c.159A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001320156.3:c.159A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001320156.3:c.159A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001320156.3:c.159A>G']['hgvs_transcript_variant'] == 'NM_001320156.3:c.159A>G' + assert results['NM_001320156.3:c.159A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320156.3:c.159A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.3'} + + assert 'NM_001330087.1:c.4053A>G' in results.keys() + assert results['NM_001330087.1:c.4053A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330087.1:c.4053A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330087.1:c.4053A>G']['alt_genomic_loci'] == [] + assert results['NM_001330087.1:c.4053A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha10, mRNA' + assert results['NM_001330087.1:c.4053A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330087.1:c.4053A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317016.1:p.(Pro1351=)', 'slr': 'NP_001317016.1:p.(P1351=)'} + assert results['NM_001330087.1:c.4053A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330087.1:c.4053A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330087.1:c.4053A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330087.1:c.4053A>G']['hgvs_transcript_variant'] == 'NM_001330087.1:c.4053A>G' + assert results['NM_001330087.1:c.4053A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330087.1:c.4053A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1'} + + assert 'NM_001330097.1:c.1050A>G' in results.keys() + assert results['NM_001330097.1:c.1050A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330097.1:c.1050A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330097.1:c.1050A>G']['alt_genomic_loci'] == [] + assert results['NM_001330097.1:c.1050A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant beta4, mRNA' + assert results['NM_001330097.1:c.1050A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330097.1:c.1050A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317026.1:p.(Pro350=)', 'slr': 'NP_001317026.1:p.(P350=)'} + assert results['NM_001330097.1:c.1050A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330097.1:c.1050A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330097.1:c.1050A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330097.1:c.1050A>G']['hgvs_transcript_variant'] == 'NM_001330097.1:c.1050A>G' + assert results['NM_001330097.1:c.1050A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330097.1:c.1050A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317026.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330097.1'} + + assert 'NM_004801.5:c.4164A>G' in results.keys() + assert results['NM_004801.5:c.4164A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004801.5:c.4164A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004801.5:c.4164A>G']['alt_genomic_loci'] == [] + assert results['NM_004801.5:c.4164A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA' + assert results['NM_004801.5:c.4164A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_004801.5:c.4164A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro1388=)', 'slr': 'NP_004792.1:p.(P1388=)'} + assert results['NM_004801.5:c.4164A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_004801.5:c.4164A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_004801.5:c.4164A>G']['hgvs_lrg_variant'] == '' + assert results['NM_004801.5:c.4164A>G']['hgvs_transcript_variant'] == 'NM_004801.5:c.4164A>G' + assert results['NM_004801.5:c.4164A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_004801.5:c.4164A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5'} + + + def test_variant268(self): + variant = '2-50847195-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001330096.1:c.1201C>T' in results.keys() + assert results['NM_001330096.1:c.1201C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330096.1:c.1201C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330096.1:c.1201C>T']['alt_genomic_loci'] == [] + assert results['NM_001330096.1:c.1201C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha15, mRNA' + assert results['NM_001330096.1:c.1201C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330096.1:c.1201C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317025.1:p.(Pro401Ser)', 'slr': 'NP_001317025.1:p.(P401S)'} + assert results['NM_001330096.1:c.1201C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330096.1:c.1201C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330096.1:c.1201C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330096.1:c.1201C>T']['hgvs_transcript_variant'] == 'NM_001330096.1:c.1201C>T' + assert results['NM_001330096.1:c.1201C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330096.1:c.1201C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1'} + + assert 'NM_001330084.1:c.1246C>T' in results.keys() + assert results['NM_001330084.1:c.1246C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330084.1:c.1246C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330084.1:c.1246C>T']['alt_genomic_loci'] == [] + assert results['NM_001330084.1:c.1246C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha7, mRNA' + assert results['NM_001330084.1:c.1246C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330084.1:c.1246C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317013.1:p.(Pro416Ser)', 'slr': 'NP_001317013.1:p.(P416S)'} + assert results['NM_001330084.1:c.1246C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330084.1:c.1246C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330084.1:c.1246C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330084.1:c.1246C>T']['hgvs_transcript_variant'] == 'NM_001330084.1:c.1246C>T' + assert results['NM_001330084.1:c.1246C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330084.1:c.1246C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1'} + + assert 'NM_001330077.1:c.1261C>T' in results.keys() + assert results['NM_001330077.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330077.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330077.1:c.1261C>T']['alt_genomic_loci'] == [] + assert results['NM_001330077.1:c.1261C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha3, mRNA' + assert results['NM_001330077.1:c.1261C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330077.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317006.1:p.(Pro421Ser)', 'slr': 'NP_001317006.1:p.(P421S)'} + assert results['NM_001330077.1:c.1261C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330077.1:c.1261C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330077.1:c.1261C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330077.1:c.1261C>T']['hgvs_transcript_variant'] == 'NM_001330077.1:c.1261C>T' + assert results['NM_001330077.1:c.1261C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330077.1:c.1261C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1'} + + assert 'NM_001330086.1:c.1285C>T' in results.keys() + assert results['NM_001330086.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330086.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330086.1:c.1285C>T']['alt_genomic_loci'] == [] + assert results['NM_001330086.1:c.1285C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha9, mRNA' + assert results['NM_001330086.1:c.1285C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330086.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317015.1:p.(Pro429Ser)', 'slr': 'NP_001317015.1:p.(P429S)'} + assert results['NM_001330086.1:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330086.1:c.1285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330086.1:c.1285C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330086.1:c.1285C>T']['hgvs_transcript_variant'] == 'NM_001330086.1:c.1285C>T' + assert results['NM_001330086.1:c.1285C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330086.1:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1'} + + assert 'NM_001330088.1:c.1231C>T' in results.keys() + assert results['NM_001330088.1:c.1231C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330088.1:c.1231C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330088.1:c.1231C>T']['alt_genomic_loci'] == [] + assert results['NM_001330088.1:c.1231C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha11, mRNA' + assert results['NM_001330088.1:c.1231C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330088.1:c.1231C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317017.1:p.(Pro411Ser)', 'slr': 'NP_001317017.1:p.(P411S)'} + assert results['NM_001330088.1:c.1231C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330088.1:c.1231C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330088.1:c.1231C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330088.1:c.1231C>T']['hgvs_transcript_variant'] == 'NM_001330088.1:c.1231C>T' + assert results['NM_001330088.1:c.1231C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330088.1:c.1231C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1'} + + assert 'NM_001330093.1:c.1282C>T' in results.keys() + assert results['NM_001330093.1:c.1282C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330093.1:c.1282C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330093.1:c.1282C>T']['alt_genomic_loci'] == [] + assert results['NM_001330093.1:c.1282C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha12, mRNA' + assert results['NM_001330093.1:c.1282C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330093.1:c.1282C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317022.1:p.(Pro428Ser)', 'slr': 'NP_001317022.1:p.(P428S)'} + assert results['NM_001330093.1:c.1282C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330093.1:c.1282C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330093.1:c.1282C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330093.1:c.1282C>T']['hgvs_transcript_variant'] == 'NM_001330093.1:c.1282C>T' + assert results['NM_001330093.1:c.1282C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330093.1:c.1282C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1'} + + assert 'NM_001330087.1:c.1201C>T' in results.keys() + assert results['NM_001330087.1:c.1201C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330087.1:c.1201C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330087.1:c.1201C>T']['alt_genomic_loci'] == [] + assert results['NM_001330087.1:c.1201C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha10, mRNA' + assert results['NM_001330087.1:c.1201C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330087.1:c.1201C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317016.1:p.(Pro401Ser)', 'slr': 'NP_001317016.1:p.(P401S)'} + assert results['NM_001330087.1:c.1201C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330087.1:c.1201C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330087.1:c.1201C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330087.1:c.1201C>T']['hgvs_transcript_variant'] == 'NM_001330087.1:c.1201C>T' + assert results['NM_001330087.1:c.1201C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330087.1:c.1201C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1'} + + assert 'NM_001330082.1:c.1261C>T' in results.keys() + assert results['NM_001330082.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330082.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330082.1:c.1261C>T']['alt_genomic_loci'] == [] + assert results['NM_001330082.1:c.1261C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha5, mRNA' + assert results['NM_001330082.1:c.1261C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330082.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317011.1:p.(Pro421Ser)', 'slr': 'NP_001317011.1:p.(P421S)'} + assert results['NM_001330082.1:c.1261C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330082.1:c.1261C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330082.1:c.1261C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330082.1:c.1261C>T']['hgvs_transcript_variant'] == 'NM_001330082.1:c.1261C>T' + assert results['NM_001330082.1:c.1261C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330082.1:c.1261C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1'} + + assert 'NM_001330078.1:c.1285C>T' in results.keys() + assert results['NM_001330078.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330078.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330078.1:c.1285C>T']['alt_genomic_loci'] == [] + assert results['NM_001330078.1:c.1285C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha4, mRNA' + assert results['NM_001330078.1:c.1285C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330078.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317007.1:p.(Pro429Ser)', 'slr': 'NP_001317007.1:p.(P429S)'} + assert results['NM_001330078.1:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330078.1:c.1285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330078.1:c.1285C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330078.1:c.1285C>T']['hgvs_transcript_variant'] == 'NM_001330078.1:c.1285C>T' + assert results['NM_001330078.1:c.1285C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330078.1:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1'} + + assert 'NM_001330094.1:c.1273C>T' in results.keys() + assert results['NM_001330094.1:c.1273C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330094.1:c.1273C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330094.1:c.1273C>T']['alt_genomic_loci'] == [] + assert results['NM_001330094.1:c.1273C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha13, mRNA' + assert results['NM_001330094.1:c.1273C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330094.1:c.1273C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317023.1:p.(Pro425Ser)', 'slr': 'NP_001317023.1:p.(P425S)'} + assert results['NM_001330094.1:c.1273C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330094.1:c.1273C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330094.1:c.1273C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330094.1:c.1273C>T']['hgvs_transcript_variant'] == 'NM_001330094.1:c.1273C>T' + assert results['NM_001330094.1:c.1273C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330094.1:c.1273C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001135659.2:c.1405C>T' in results.keys() + assert results['NM_001135659.2:c.1405C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135659.2:c.1405C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001135659.2:c.1405C>T']['alt_genomic_loci'] == [] + assert results['NM_001135659.2:c.1405C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA' + assert results['NM_001135659.2:c.1405C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001135659.2:c.1405C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro469Ser)', 'slr': 'NP_001129131.1:p.(P469S)'} + assert results['NM_001135659.2:c.1405C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001135659.2:c.1405C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001135659.2:c.1405C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001135659.2:c.1405C>T']['hgvs_transcript_variant'] == 'NM_001135659.2:c.1405C>T' + assert results['NM_001135659.2:c.1405C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001135659.2:c.1405C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2'} + + assert 'NM_001330083.1:c.1246C>T' in results.keys() + assert results['NM_001330083.1:c.1246C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330083.1:c.1246C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330083.1:c.1246C>T']['alt_genomic_loci'] == [] + assert results['NM_001330083.1:c.1246C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha6, mRNA' + assert results['NM_001330083.1:c.1246C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330083.1:c.1246C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317012.1:p.(Pro416Ser)', 'slr': 'NP_001317012.1:p.(P416S)'} + assert results['NM_001330083.1:c.1246C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330083.1:c.1246C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330083.1:c.1246C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330083.1:c.1246C>T']['hgvs_transcript_variant'] == 'NM_001330083.1:c.1246C>T' + assert results['NM_001330083.1:c.1246C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330083.1:c.1246C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1'} + + assert 'NM_004801.5:c.1285C>T' in results.keys() + assert results['NM_004801.5:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004801.5:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004801.5:c.1285C>T']['alt_genomic_loci'] == [] + assert results['NM_004801.5:c.1285C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA' + assert results['NM_004801.5:c.1285C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_004801.5:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro429Ser)', 'slr': 'NP_004792.1:p.(P429S)'} + assert results['NM_004801.5:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_004801.5:c.1285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_004801.5:c.1285C>T']['hgvs_lrg_variant'] == '' + assert results['NM_004801.5:c.1285C>T']['hgvs_transcript_variant'] == 'NM_004801.5:c.1285C>T' + assert results['NM_004801.5:c.1285C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_004801.5:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5'} + + assert 'NM_001330085.1:c.1285C>T' in results.keys() + assert results['NM_001330085.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330085.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330085.1:c.1285C>T']['alt_genomic_loci'] == [] + assert results['NM_001330085.1:c.1285C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha8, mRNA' + assert results['NM_001330085.1:c.1285C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330085.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317014.1:p.(Pro429Ser)', 'slr': 'NP_001317014.1:p.(P429S)'} + assert results['NM_001330085.1:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330085.1:c.1285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330085.1:c.1285C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330085.1:c.1285C>T']['hgvs_transcript_variant'] == 'NM_001330085.1:c.1285C>T' + assert results['NM_001330085.1:c.1285C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330085.1:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1'} + + assert 'NM_001330095.1:c.1261C>T' in results.keys() + assert results['NM_001330095.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330095.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330095.1:c.1261C>T']['alt_genomic_loci'] == [] + assert results['NM_001330095.1:c.1261C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha14, mRNA' + assert results['NM_001330095.1:c.1261C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330095.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317024.1:p.(Pro421Ser)', 'slr': 'NP_001317024.1:p.(P421S)'} + assert results['NM_001330095.1:c.1261C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330095.1:c.1261C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330095.1:c.1261C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330095.1:c.1261C>T']['hgvs_transcript_variant'] == 'NM_001330095.1:c.1261C>T' + assert results['NM_001330095.1:c.1261C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330095.1:c.1261C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1'} + + assert 'NM_004801.4:c.1285C>T' in results.keys() + assert results['NM_004801.4:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004801.4:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004801.4:c.1285C>T']['alt_genomic_loci'] == [] + assert results['NM_004801.4:c.1285C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA' + assert results['NM_004801.4:c.1285C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_004801.4:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro429Ser)', 'slr': 'NP_004792.1:p.(P429S)'} + assert results['NM_004801.4:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_004801.4:c.1285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_004801.4:c.1285C>T']['hgvs_lrg_variant'] == '' + assert results['NM_004801.4:c.1285C>T']['hgvs_transcript_variant'] == 'NM_004801.4:c.1285C>T' + assert results['NM_004801.4:c.1285C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_004801.4:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4'} + + assert 'NM_001135659.1:c.1405C>T' in results.keys() + assert results['NM_001135659.1:c.1405C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135659.1:c.1405C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001135659.1:c.1405C>T']['alt_genomic_loci'] == [] + assert results['NM_001135659.1:c.1405C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA' + assert results['NM_001135659.1:c.1405C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001135659.1:c.1405C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro469Ser)', 'slr': 'NP_001129131.1:p.(P469S)'} + assert results['NM_001135659.1:c.1405C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001135659.1:c.1405C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001135659.1:c.1405C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001135659.1:c.1405C>T']['hgvs_transcript_variant'] == 'NM_001135659.1:c.1405C>T' + assert results['NM_001135659.1:c.1405C>T']['hgvs_refseqgene_variant'] == 'NG_011878.1:g.417480C>T' + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001135659.1:c.1405C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011878.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1'} + + + def test_variant269(self): + variant = '2-71825797-C-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001130976.1:c.3582C>G' in results.keys() + assert results['NM_001130976.1:c.3582C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130976.1:c.3582C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130976.1:c.3582C>G']['alt_genomic_loci'] == [] + assert results['NM_001130976.1:c.3582C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 9, mRNA' + assert results['NM_001130976.1:c.3582C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130976.1:c.3582C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124448.1:p.(Ile1194Met)', 'slr': 'NP_001124448.1:p.(I1194M)'} + assert results['NM_001130976.1:c.3582C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130976.1:c.3582C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130976.1:c.3582C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130976.1:c.3582C>G']['hgvs_transcript_variant'] == 'NM_001130976.1:c.3582C>G' + assert results['NM_001130976.1:c.3582C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130976.1:c.3582C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124448.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130976.1'} + + assert 'NM_001130981.1:c.3675C>G' in results.keys() + assert results['NM_001130981.1:c.3675C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130981.1:c.3675C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130981.1:c.3675C>G']['alt_genomic_loci'] == [] + assert results['NM_001130981.1:c.3675C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 14, mRNA' + assert results['NM_001130981.1:c.3675C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130981.1:c.3675C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124453.1:p.(Ile1225Met)', 'slr': 'NP_001124453.1:p.(I1225M)'} + assert results['NM_001130981.1:c.3675C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130981.1:c.3675C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130981.1:c.3675C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130981.1:c.3675C>G']['hgvs_transcript_variant'] == 'NM_001130981.1:c.3675C>G' + assert results['NM_001130981.1:c.3675C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130981.1:c.3675C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124453.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130981.1'} + + assert 'NM_001130979.1:c.3717C>G' in results.keys() + assert results['NM_001130979.1:c.3717C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130979.1:c.3717C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130979.1:c.3717C>G']['alt_genomic_loci'] == [] + assert results['NM_001130979.1:c.3717C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 12, mRNA' + assert results['NM_001130979.1:c.3717C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130979.1:c.3717C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124451.1:p.(Ile1239Met)', 'slr': 'NP_001124451.1:p.(I1239M)'} + assert results['NM_001130979.1:c.3717C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130979.1:c.3717C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130979.1:c.3717C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130979.1:c.3717C>G']['hgvs_transcript_variant'] == 'NM_001130979.1:c.3717C>G' + assert results['NM_001130979.1:c.3717C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130979.1:c.3717C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124451.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130979.1'} + + assert 'NM_001130985.1:c.3678C>G' in results.keys() + assert results['NM_001130985.1:c.3678C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130985.1:c.3678C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130985.1:c.3678C>G']['alt_genomic_loci'] == [] + assert results['NM_001130985.1:c.3678C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 4, mRNA' + assert results['NM_001130985.1:c.3678C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130985.1:c.3678C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124457.1:p.(Ile1226Met)', 'slr': 'NP_001124457.1:p.(I1226M)'} + assert results['NM_001130985.1:c.3678C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130985.1:c.3678C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130985.1:c.3678C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130985.1:c.3678C>G']['hgvs_transcript_variant'] == 'NM_001130985.1:c.3678C>G' + assert results['NM_001130985.1:c.3678C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130985.1:c.3678C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124457.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130985.1'} + + assert 'NM_001130987.1:c.3678C>G' in results.keys() + assert results['NM_001130987.1:c.3678C>G']['hgvs_lrg_transcript_variant'] == 'LRG_845t2:c.3678C>G' + assert results['NM_001130987.1:c.3678C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130987.1:c.3678C>G']['alt_genomic_loci'] == [] + assert results['NM_001130987.1:c.3678C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 1, mRNA' + assert results['NM_001130987.1:c.3678C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130987.1:c.3678C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124459.1:p.(Ile1226Met)', 'slr': 'NP_001124459.1:p.(I1226M)'} + assert results['NM_001130987.1:c.3678C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130987.1:c.3678C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130987.1:c.3678C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130987.1:c.3678C>G']['hgvs_transcript_variant'] == 'NM_001130987.1:c.3678C>G' + assert results['NM_001130987.1:c.3678C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130987.1:c.3678C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124459.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130987.1'} + + assert 'NM_001130983.1:c.3627C>G' in results.keys() + assert results['NM_001130983.1:c.3627C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130983.1:c.3627C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130983.1:c.3627C>G']['alt_genomic_loci'] == [] + assert results['NM_001130983.1:c.3627C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 6, mRNA' + assert results['NM_001130983.1:c.3627C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130983.1:c.3627C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124455.1:p.(Ile1209Met)', 'slr': 'NP_001124455.1:p.(I1209M)'} + assert results['NM_001130983.1:c.3627C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130983.1:c.3627C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130983.1:c.3627C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130983.1:c.3627C>G']['hgvs_transcript_variant'] == 'NM_001130983.1:c.3627C>G' + assert results['NM_001130983.1:c.3627C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130983.1:c.3627C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124455.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130983.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001130980.1:c.3675C>G' in results.keys() + assert results['NM_001130980.1:c.3675C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130980.1:c.3675C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130980.1:c.3675C>G']['alt_genomic_loci'] == [] + assert results['NM_001130980.1:c.3675C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 13, mRNA' + assert results['NM_001130980.1:c.3675C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130980.1:c.3675C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124452.1:p.(Ile1225Met)', 'slr': 'NP_001124452.1:p.(I1225M)'} + assert results['NM_001130980.1:c.3675C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130980.1:c.3675C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130980.1:c.3675C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130980.1:c.3675C>G']['hgvs_transcript_variant'] == 'NM_001130980.1:c.3675C>G' + assert results['NM_001130980.1:c.3675C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130980.1:c.3675C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124452.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130980.1'} + + assert 'NM_003494.3:c.3624C>G' in results.keys() + assert results['NM_003494.3:c.3624C>G']['hgvs_lrg_transcript_variant'] == 'LRG_845t1:c.3624C>G' + assert results['NM_003494.3:c.3624C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003494.3:c.3624C>G']['alt_genomic_loci'] == [] + assert results['NM_003494.3:c.3624C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 8, mRNA' + assert results['NM_003494.3:c.3624C>G']['gene_symbol'] == 'DYSF' + assert results['NM_003494.3:c.3624C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003485.1(LRG_845p1):p.(Ile1208Met)', 'slr': 'NP_003485.1:p.(I1208M)'} + assert results['NM_003494.3:c.3624C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_003494.3:c.3624C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_003494.3:c.3624C>G']['hgvs_lrg_variant'] == '' + assert results['NM_003494.3:c.3624C>G']['hgvs_transcript_variant'] == 'NM_003494.3:c.3624C>G' + assert results['NM_003494.3:c.3624C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_003494.3:c.3624C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003485.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003494.3'} + + assert 'NM_001130984.1:c.3585C>G' in results.keys() + assert results['NM_001130984.1:c.3585C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130984.1:c.3585C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130984.1:c.3585C>G']['alt_genomic_loci'] == [] + assert results['NM_001130984.1:c.3585C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 5, mRNA' + assert results['NM_001130984.1:c.3585C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130984.1:c.3585C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124456.1:p.(Ile1195Met)', 'slr': 'NP_001124456.1:p.(I1195M)'} + assert results['NM_001130984.1:c.3585C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130984.1:c.3585C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130984.1:c.3585C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130984.1:c.3585C>G']['hgvs_transcript_variant'] == 'NM_001130984.1:c.3585C>G' + assert results['NM_001130984.1:c.3585C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130984.1:c.3585C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124456.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130984.1'} + + assert 'NM_001130977.1:c.3582C>G' in results.keys() + assert results['NM_001130977.1:c.3582C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130977.1:c.3582C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130977.1:c.3582C>G']['alt_genomic_loci'] == [] + assert results['NM_001130977.1:c.3582C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 10, mRNA' + assert results['NM_001130977.1:c.3582C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130977.1:c.3582C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124449.1:p.(Ile1194Met)', 'slr': 'NP_001124449.1:p.(I1194M)'} + assert results['NM_001130977.1:c.3582C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130977.1:c.3582C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130977.1:c.3582C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130977.1:c.3582C>G']['hgvs_transcript_variant'] == 'NM_001130977.1:c.3582C>G' + assert results['NM_001130977.1:c.3582C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130977.1:c.3582C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124449.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130977.1'} + + assert 'NM_001130455.1:c.3627C>G' in results.keys() + assert results['NM_001130455.1:c.3627C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130455.1:c.3627C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130455.1:c.3627C>G']['alt_genomic_loci'] == [] + assert results['NM_001130455.1:c.3627C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 2, mRNA' + assert results['NM_001130455.1:c.3627C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130455.1:c.3627C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001123927.1:p.(Ile1209Met)', 'slr': 'NP_001123927.1:p.(I1209M)'} + assert results['NM_001130455.1:c.3627C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130455.1:c.3627C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130455.1:c.3627C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130455.1:c.3627C>G']['hgvs_transcript_variant'] == 'NM_001130455.1:c.3627C>G' + assert results['NM_001130455.1:c.3627C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130455.1:c.3627C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001123927.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130455.1'} + + assert 'NM_001130978.1:c.3624C>G' in results.keys() + assert results['NM_001130978.1:c.3624C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130978.1:c.3624C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130978.1:c.3624C>G']['alt_genomic_loci'] == [] + assert results['NM_001130978.1:c.3624C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 11, mRNA' + assert results['NM_001130978.1:c.3624C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130978.1:c.3624C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124450.1:p.(Ile1208Met)', 'slr': 'NP_001124450.1:p.(I1208M)'} + assert results['NM_001130978.1:c.3624C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130978.1:c.3624C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130978.1:c.3624C>G']['hgvs_lrg_variant'] == 'LRG_845:g.150045C>G' + assert results['NM_001130978.1:c.3624C>G']['hgvs_transcript_variant'] == 'NM_001130978.1:c.3624C>G' + assert results['NM_001130978.1:c.3624C>G']['hgvs_refseqgene_variant'] == 'NG_008694.1:g.150045C>G' + assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130978.1:c.3624C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008694.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124450.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130978.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_845.xml'} + + assert 'NM_001130982.1:c.3720C>G' in results.keys() + assert results['NM_001130982.1:c.3720C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130982.1:c.3720C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130982.1:c.3720C>G']['alt_genomic_loci'] == [] + assert results['NM_001130982.1:c.3720C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 7, mRNA' + assert results['NM_001130982.1:c.3720C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130982.1:c.3720C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124454.1:p.(Ile1240Met)', 'slr': 'NP_001124454.1:p.(I1240M)'} + assert results['NM_001130982.1:c.3720C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130982.1:c.3720C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130982.1:c.3720C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130982.1:c.3720C>G']['hgvs_transcript_variant'] == 'NM_001130982.1:c.3720C>G' + assert results['NM_001130982.1:c.3720C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130982.1:c.3720C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124454.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130982.1'} + + assert 'NM_001130986.1:c.3585C>G' in results.keys() + assert results['NM_001130986.1:c.3585C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130986.1:c.3585C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130986.1:c.3585C>G']['alt_genomic_loci'] == [] + assert results['NM_001130986.1:c.3585C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 3, mRNA' + assert results['NM_001130986.1:c.3585C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130986.1:c.3585C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124458.1:p.(Ile1195Met)', 'slr': 'NP_001124458.1:p.(I1195M)'} + assert results['NM_001130986.1:c.3585C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130986.1:c.3585C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130986.1:c.3585C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130986.1:c.3585C>G']['hgvs_transcript_variant'] == 'NM_001130986.1:c.3585C>G' + assert results['NM_001130986.1:c.3585C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} + assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} + assert results['NM_001130986.1:c.3585C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124458.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130986.1'} + + + def test_variant270(self): + variant = '2-166179712-G-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_021007.2:c.1718G>C' in results.keys() + assert results['NM_021007.2:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021007.2:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021007.2:c.1718G>C']['alt_genomic_loci'] == [] + assert results['NM_021007.2:c.1718G>C']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 1, mRNA' + assert results['NM_021007.2:c.1718G>C']['gene_symbol'] == 'SCN2A' + assert results['NM_021007.2:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066287.2:p.(Ser573Thr)', 'slr': 'NP_066287.2:p.(S573T)'} + assert results['NM_021007.2:c.1718G>C']['submitted_variant'] == '2-166179712-G-C' + assert results['NM_021007.2:c.1718G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_021007.2:c.1718G>C']['hgvs_lrg_variant'] == '' + assert results['NM_021007.2:c.1718G>C']['hgvs_transcript_variant'] == 'NM_021007.2:c.1718G>C' + assert results['NM_021007.2:c.1718G>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} + assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} + assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} + assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} + assert results['NM_021007.2:c.1718G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001040143.1:c.1718G>C' in results.keys() + assert results['NM_001040143.1:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040143.1:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040143.1:c.1718G>C']['alt_genomic_loci'] == [] + assert results['NM_001040143.1:c.1718G>C']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 3, mRNA' + assert results['NM_001040143.1:c.1718G>C']['gene_symbol'] == 'SCN2A' + assert results['NM_001040143.1:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035233.1:p.(Ser573Thr)', 'slr': 'NP_001035233.1:p.(S573T)'} + assert results['NM_001040143.1:c.1718G>C']['submitted_variant'] == '2-166179712-G-C' + assert results['NM_001040143.1:c.1718G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001040143.1:c.1718G>C']['hgvs_lrg_variant'] == '' + assert results['NM_001040143.1:c.1718G>C']['hgvs_transcript_variant'] == 'NM_001040143.1:c.1718G>C' + assert results['NM_001040143.1:c.1718G>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} + assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} + assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} + assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} + assert results['NM_001040143.1:c.1718G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1'} + + assert 'NM_001040142.1:c.1718G>C' in results.keys() + assert results['NM_001040142.1:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040142.1:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040142.1:c.1718G>C']['alt_genomic_loci'] == [] + assert results['NM_001040142.1:c.1718G>C']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 2, mRNA' + assert results['NM_001040142.1:c.1718G>C']['gene_symbol'] == 'SCN2A' + assert results['NM_001040142.1:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035232.1:p.(Ser573Thr)', 'slr': 'NP_001035232.1:p.(S573T)'} + assert results['NM_001040142.1:c.1718G>C']['submitted_variant'] == '2-166179712-G-C' + assert results['NM_001040142.1:c.1718G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001040142.1:c.1718G>C']['hgvs_lrg_variant'] == '' + assert results['NM_001040142.1:c.1718G>C']['hgvs_transcript_variant'] == 'NM_001040142.1:c.1718G>C' + assert results['NM_001040142.1:c.1718G>C']['hgvs_refseqgene_variant'] == 'NG_008143.1:g.88801G>C' + assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} + assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} + assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} + assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} + assert results['NM_001040142.1:c.1718G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008143.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035232.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040142.1'} + + + def test_variant271(self): + variant = '2-166183371-A-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_021007.2:c.2026A>G' in results.keys() + assert results['NM_021007.2:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021007.2:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021007.2:c.2026A>G']['alt_genomic_loci'] == [] + assert results['NM_021007.2:c.2026A>G']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 1, mRNA' + assert results['NM_021007.2:c.2026A>G']['gene_symbol'] == 'SCN2A' + assert results['NM_021007.2:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066287.2:p.(Thr676Ala)', 'slr': 'NP_066287.2:p.(T676A)'} + assert results['NM_021007.2:c.2026A>G']['submitted_variant'] == '2-166183371-A-G' + assert results['NM_021007.2:c.2026A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_021007.2:c.2026A>G']['hgvs_lrg_variant'] == '' + assert results['NM_021007.2:c.2026A>G']['hgvs_transcript_variant'] == 'NM_021007.2:c.2026A>G' + assert results['NM_021007.2:c.2026A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} + assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} + assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} + assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} + assert results['NM_021007.2:c.2026A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001040143.1:c.2026A>G' in results.keys() + assert results['NM_001040143.1:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040143.1:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040143.1:c.2026A>G']['alt_genomic_loci'] == [] + assert results['NM_001040143.1:c.2026A>G']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 3, mRNA' + assert results['NM_001040143.1:c.2026A>G']['gene_symbol'] == 'SCN2A' + assert results['NM_001040143.1:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035233.1:p.(Thr676Ala)', 'slr': 'NP_001035233.1:p.(T676A)'} + assert results['NM_001040143.1:c.2026A>G']['submitted_variant'] == '2-166183371-A-G' + assert results['NM_001040143.1:c.2026A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001040143.1:c.2026A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001040143.1:c.2026A>G']['hgvs_transcript_variant'] == 'NM_001040143.1:c.2026A>G' + assert results['NM_001040143.1:c.2026A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} + assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} + assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} + assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} + assert results['NM_001040143.1:c.2026A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1'} + + assert 'NM_001040142.1:c.2026A>G' in results.keys() + assert results['NM_001040142.1:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040142.1:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040142.1:c.2026A>G']['alt_genomic_loci'] == [] + assert results['NM_001040142.1:c.2026A>G']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 2, mRNA' + assert results['NM_001040142.1:c.2026A>G']['gene_symbol'] == 'SCN2A' + assert results['NM_001040142.1:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035232.1:p.(Thr676Ala)', 'slr': 'NP_001035232.1:p.(T676A)'} + assert results['NM_001040142.1:c.2026A>G']['submitted_variant'] == '2-166183371-A-G' + assert results['NM_001040142.1:c.2026A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001040142.1:c.2026A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001040142.1:c.2026A>G']['hgvs_transcript_variant'] == 'NM_001040142.1:c.2026A>G' + assert results['NM_001040142.1:c.2026A>G']['hgvs_refseqgene_variant'] == 'NG_008143.1:g.92460A>G' + assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} + assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} + assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} + assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} + assert results['NM_001040142.1:c.2026A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008143.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035232.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040142.1'} + + + def test_variant272(self): + variant = '2-166929889-GTCCAGGTCCT-GAC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001353951.1:c.233_242delinsGT' in results.keys() + assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353951.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353951.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001353951.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 8, mRNA' + assert results['NM_001353951.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340880.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340880.1:p.(E78Gfs*7)'} + assert results['NM_001353951.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353951.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353951.1:c.233_242delinsGT' + assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1'} + + assert 'NM_001353958.1:c.233_242delinsGT' in results.keys() + assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353958.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353958.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001353958.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 13, mRNA' + assert results['NM_001353958.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340887.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340887.1:p.(E78Gfs*7)'} + assert results['NM_001353958.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353958.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353958.1:c.233_242delinsGT' + assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1'} + + assert 'NM_001202435.1:c.233_242delinsGT' in results.keys() + assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001202435.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001202435.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001202435.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA' + assert results['NM_001202435.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} + assert results['NM_001202435.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001202435.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001202435.1:c.233_242delinsGT' + assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1'} + + assert 'NR_148667.1:n.638_647delinsGT' in results.keys() + assert results['NR_148667.1:n.638_647delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_148667.1:n.638_647delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_148667.1:n.638_647delinsGT']['alt_genomic_loci'] == [] + assert results['NR_148667.1:n.638_647delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 16, non-coding RNA' + assert results['NR_148667.1:n.638_647delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NR_148667.1:n.638_647delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_148667.1:n.638_647delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NR_148667.1:n.638_647delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NR_148667.1:n.638_647delinsGT']['hgvs_lrg_variant'] == '' + assert results['NR_148667.1:n.638_647delinsGT']['hgvs_transcript_variant'] == 'NR_148667.1:n.638_647delinsGT' + assert results['NR_148667.1:n.638_647delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1'} + + assert 'NM_001165964.1:c.233_242delinsGT' in results.keys() + assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165964.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165964.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001165964.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA' + assert results['NM_001165964.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} + assert results['NM_001165964.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001165964.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165964.1:c.233_242delinsGT' + assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1'} + + assert 'NM_001202435.2:c.233_242delinsGT' in results.keys() + assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001202435.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001202435.2:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001202435.2:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA' + assert results['NM_001202435.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} + assert results['NM_001202435.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001202435.2:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001202435.2:c.233_242delinsGT' + assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2'} + + assert 'NM_006920.5:c.233_242delinsGT' in results.keys() + assert results['NM_006920.5:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006920.5:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006920.5:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_006920.5:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA' + assert results['NM_006920.5:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_006920.5:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} + assert results['NM_006920.5:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_006920.5:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_006920.5:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_006920.5:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_006920.5:c.233_242delinsGT' + assert results['NM_006920.5:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5'} + + assert 'NM_001165963.1:c.233_242delinsGT' in results.keys() + assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165963.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165963.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001165963.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA' + assert results['NM_001165963.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} + assert results['NM_001165963.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001165963.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165963.1:c.233_242delinsGT' + assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1'} + + assert 'NM_001353955.1:c.233_242delinsGT' in results.keys() + assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353955.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353955.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001353955.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 11, mRNA' + assert results['NM_001353955.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340884.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340884.1:p.(E78Gfs*7)'} + assert results['NM_001353955.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353955.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353955.1:c.233_242delinsGT' + assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1'} + + assert 'NM_001353961.1:c.-2193_-2184delinsGT' in results.keys() + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 15, mRNA' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340890.1:p.?', 'slr': 'NP_001340890.1:p.?'} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_transcript_variant'] == 'NM_001353961.1:c.-2193_-2184delinsGT' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1'} + + assert 'NM_001165963.2:c.233_242delinsGT' in results.keys() + assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165963.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165963.2:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001165963.2:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA' + assert results['NM_001165963.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} + assert results['NM_001165963.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001165963.2:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165963.2:c.233_242delinsGT' + assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2'} + + assert 'NM_001353950.1:c.233_242delinsGT' in results.keys() + assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353950.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353950.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001353950.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 7, mRNA' + assert results['NM_001353950.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340879.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340879.1:p.(E78Gfs*7)'} + assert results['NM_001353950.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353950.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353950.1:c.233_242delinsGT' + assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001353948.1:c.233_242delinsGT' in results.keys() + assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353948.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353948.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001353948.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 5, mRNA' + assert results['NM_001353948.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340877.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340877.1:p.(E78Gfs*7)'} + assert results['NM_001353948.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353948.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353948.1:c.233_242delinsGT' + assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1'} + + assert 'NM_001353949.1:c.233_242delinsGT' in results.keys() + assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353949.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353949.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001353949.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 6, mRNA' + assert results['NM_001353949.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340878.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340878.1:p.(E78Gfs*7)'} + assert results['NM_001353949.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353949.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353949.1:c.233_242delinsGT' + assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1'} + + assert 'NM_001353957.1:c.233_242delinsGT' in results.keys() + assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353957.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353957.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001353957.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 12, mRNA' + assert results['NM_001353957.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340886.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340886.1:p.(E78Gfs*7)'} + assert results['NM_001353957.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353957.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353957.1:c.233_242delinsGT' + assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1'} + + assert 'NM_001353952.1:c.233_242delinsGT' in results.keys() + assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353952.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353952.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001353952.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 9, mRNA' + assert results['NM_001353952.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340881.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340881.1:p.(E78Gfs*7)'} + assert results['NM_001353952.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353952.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353952.1:c.233_242delinsGT' + assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1'} + + assert 'NM_001353954.1:c.233_242delinsGT' in results.keys() + assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353954.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353954.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001353954.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 10, mRNA' + assert results['NM_001353954.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340883.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340883.1:p.(E78Gfs*7)'} + assert results['NM_001353954.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353954.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353954.1:c.233_242delinsGT' + assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1'} + + assert 'NM_006920.4:c.233_242delinsGT' in results.keys() + assert results['NM_006920.4:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == 'LRG_8t1:c.233_242delinsGT' + assert results['NM_006920.4:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006920.4:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_006920.4:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA' + assert results['NM_006920.4:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_006920.4:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} + assert results['NM_006920.4:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_006920.4:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_006920.4:c.233_242delinsGT']['hgvs_lrg_variant'] == 'LRG_8:g.5251_5260delinsGT' + assert results['NM_006920.4:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_006920.4:c.233_242delinsGT' + assert results['NM_006920.4:c.233_242delinsGT']['hgvs_refseqgene_variant'] == 'NG_011906.1:g.5251_5260delinsGT' + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011906.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_8.xml'} + + assert 'NM_001353960.1:c.233_242delinsGT' in results.keys() + assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353960.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353960.1:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001353960.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 14, mRNA' + assert results['NM_001353960.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340889.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340889.1:p.(E78Gfs*7)'} + assert results['NM_001353960.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353960.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353960.1:c.233_242delinsGT' + assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1'} + + assert 'NM_001165964.2:c.233_242delinsGT' in results.keys() + assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165964.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165964.2:c.233_242delinsGT']['alt_genomic_loci'] == [] + assert results['NM_001165964.2:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA' + assert results['NM_001165964.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} + assert results['NM_001165964.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001165964.2:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165964.2:c.233_242delinsGT' + assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2'} + + + def test_variant273(self): + variant = '2-166929891-CCAGGTCCT-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NR_148667.1:n.638_645del' in results.keys() + assert results['NR_148667.1:n.638_645del']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_148667.1:n.638_645del']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_148667.1:n.638_645del']['alt_genomic_loci'] == [] + assert results['NR_148667.1:n.638_645del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 16, non-coding RNA' + assert results['NR_148667.1:n.638_645del']['gene_symbol'] == 'SCN1A' + assert results['NR_148667.1:n.638_645del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_148667.1:n.638_645del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NR_148667.1:n.638_645del']['genome_context_intronic_sequence'] == '' + assert results['NR_148667.1:n.638_645del']['hgvs_lrg_variant'] == '' + assert results['NR_148667.1:n.638_645del']['hgvs_transcript_variant'] == 'NR_148667.1:n.638_645del' + assert results['NR_148667.1:n.638_645del']['hgvs_refseqgene_variant'] == '' + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1'} + + assert 'NM_001165964.2:c.233_240del' in results.keys() + assert results['NM_001165964.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165964.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165964.2:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001165964.2:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA' + assert results['NM_001165964.2:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001165964.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} + assert results['NM_001165964.2:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001165964.2:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001165964.2:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001165964.2:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165964.2:c.233_240del' + assert results['NM_001165964.2:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2'} + + assert 'NM_001353951.1:c.233_240del' in results.keys() + assert results['NM_001353951.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353951.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353951.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001353951.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 8, mRNA' + assert results['NM_001353951.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353951.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340880.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340880.1:p.(E78Gfs*7)'} + assert results['NM_001353951.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353951.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353951.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353951.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353951.1:c.233_240del' + assert results['NM_001353951.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1'} + + assert 'NM_001353954.1:c.233_240del' in results.keys() + assert results['NM_001353954.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353954.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353954.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001353954.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 10, mRNA' + assert results['NM_001353954.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353954.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340883.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340883.1:p.(E78Gfs*7)'} + assert results['NM_001353954.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353954.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353954.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353954.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353954.1:c.233_240del' + assert results['NM_001353954.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1'} + + assert 'NM_001353961.1:c.-2193_-2186del' in results.keys() + assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353961.1:c.-2193_-2186del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353961.1:c.-2193_-2186del']['alt_genomic_loci'] == [] + assert results['NM_001353961.1:c.-2193_-2186del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 15, mRNA' + assert results['NM_001353961.1:c.-2193_-2186del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340890.1:p.?', 'slr': 'NP_001340890.1:p.?'} + assert results['NM_001353961.1:c.-2193_-2186del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353961.1:c.-2193_-2186del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_lrg_variant'] == '' + assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_transcript_variant'] == 'NM_001353961.1:c.-2193_-2186del' + assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1'} + + assert 'NM_001353948.1:c.233_240del' in results.keys() + assert results['NM_001353948.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353948.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353948.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001353948.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 5, mRNA' + assert results['NM_001353948.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353948.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340877.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340877.1:p.(E78Gfs*7)'} + assert results['NM_001353948.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353948.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353948.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353948.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353948.1:c.233_240del' + assert results['NM_001353948.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1'} + + assert 'NM_001353960.1:c.233_240del' in results.keys() + assert results['NM_001353960.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353960.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353960.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001353960.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 14, mRNA' + assert results['NM_001353960.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353960.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340889.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340889.1:p.(E78Gfs*7)'} + assert results['NM_001353960.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353960.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353960.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353960.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353960.1:c.233_240del' + assert results['NM_001353960.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1'} + + assert 'NM_001202435.1:c.233_240del' in results.keys() + assert results['NM_001202435.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001202435.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001202435.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001202435.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA' + assert results['NM_001202435.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001202435.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} + assert results['NM_001202435.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001202435.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001202435.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001202435.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001202435.1:c.233_240del' + assert results['NM_001202435.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1'} + + assert 'NM_001202435.2:c.233_240del' in results.keys() + assert results['NM_001202435.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001202435.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001202435.2:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001202435.2:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA' + assert results['NM_001202435.2:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001202435.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} + assert results['NM_001202435.2:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001202435.2:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001202435.2:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001202435.2:c.233_240del']['hgvs_transcript_variant'] == 'NM_001202435.2:c.233_240del' + assert results['NM_001202435.2:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2'} + + assert 'NM_006920.5:c.233_240del' in results.keys() + assert results['NM_006920.5:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006920.5:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006920.5:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_006920.5:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA' + assert results['NM_006920.5:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_006920.5:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} + assert results['NM_006920.5:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_006920.5:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_006920.5:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_006920.5:c.233_240del']['hgvs_transcript_variant'] == 'NM_006920.5:c.233_240del' + assert results['NM_006920.5:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5'} + + assert 'NM_001353955.1:c.233_240del' in results.keys() + assert results['NM_001353955.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353955.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353955.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001353955.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 11, mRNA' + assert results['NM_001353955.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353955.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340884.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340884.1:p.(E78Gfs*7)'} + assert results['NM_001353955.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353955.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353955.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353955.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353955.1:c.233_240del' + assert results['NM_001353955.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1'} + + assert 'NM_001353952.1:c.233_240del' in results.keys() + assert results['NM_001353952.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353952.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353952.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001353952.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 9, mRNA' + assert results['NM_001353952.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353952.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340881.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340881.1:p.(E78Gfs*7)'} + assert results['NM_001353952.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353952.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353952.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353952.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353952.1:c.233_240del' + assert results['NM_001353952.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1'} + + assert 'NM_001353957.1:c.233_240del' in results.keys() + assert results['NM_001353957.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353957.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353957.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001353957.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 12, mRNA' + assert results['NM_001353957.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353957.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340886.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340886.1:p.(E78Gfs*7)'} + assert results['NM_001353957.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353957.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353957.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353957.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353957.1:c.233_240del' + assert results['NM_001353957.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_006920.4:c.233_240del' in results.keys() + assert results['NM_006920.4:c.233_240del']['hgvs_lrg_transcript_variant'] == 'LRG_8t1:c.233_240del' + assert results['NM_006920.4:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006920.4:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_006920.4:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA' + assert results['NM_006920.4:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_006920.4:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} + assert results['NM_006920.4:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_006920.4:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_006920.4:c.233_240del']['hgvs_lrg_variant'] == 'LRG_8:g.5251_5258del' + assert results['NM_006920.4:c.233_240del']['hgvs_transcript_variant'] == 'NM_006920.4:c.233_240del' + assert results['NM_006920.4:c.233_240del']['hgvs_refseqgene_variant'] == 'NG_011906.1:g.5251_5258del' + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011906.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_8.xml'} + + assert 'NM_001353950.1:c.233_240del' in results.keys() + assert results['NM_001353950.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353950.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353950.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001353950.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 7, mRNA' + assert results['NM_001353950.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353950.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340879.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340879.1:p.(E78Gfs*7)'} + assert results['NM_001353950.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353950.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353950.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353950.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353950.1:c.233_240del' + assert results['NM_001353950.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1'} + + assert 'NM_001165963.2:c.233_240del' in results.keys() + assert results['NM_001165963.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165963.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165963.2:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001165963.2:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA' + assert results['NM_001165963.2:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001165963.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} + assert results['NM_001165963.2:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001165963.2:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001165963.2:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001165963.2:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165963.2:c.233_240del' + assert results['NM_001165963.2:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2'} + + assert 'NM_001165963.1:c.233_240del' in results.keys() + assert results['NM_001165963.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165963.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165963.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001165963.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA' + assert results['NM_001165963.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001165963.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} + assert results['NM_001165963.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001165963.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001165963.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001165963.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165963.1:c.233_240del' + assert results['NM_001165963.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1'} + + assert 'NM_001165964.1:c.233_240del' in results.keys() + assert results['NM_001165964.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165964.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165964.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001165964.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA' + assert results['NM_001165964.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001165964.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} + assert results['NM_001165964.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001165964.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001165964.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001165964.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165964.1:c.233_240del' + assert results['NM_001165964.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1'} + + assert 'NM_001353958.1:c.233_240del' in results.keys() + assert results['NM_001353958.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353958.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353958.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001353958.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 13, mRNA' + assert results['NM_001353958.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353958.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340887.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340887.1:p.(E78Gfs*7)'} + assert results['NM_001353958.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353958.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353958.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353958.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353958.1:c.233_240del' + assert results['NM_001353958.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1'} + + assert 'NM_001353949.1:c.233_240del' in results.keys() + assert results['NM_001353949.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353949.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353949.1:c.233_240del']['alt_genomic_loci'] == [] + assert results['NM_001353949.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 6, mRNA' + assert results['NM_001353949.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353949.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340878.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340878.1:p.(E78Gfs*7)'} + assert results['NM_001353949.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353949.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353949.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353949.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353949.1:c.233_240del' + assert results['NM_001353949.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1'} + + + def test_variant274(self): + variant = '2-179393504-G-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001256850.1:c.102051C>A' in results.keys() + assert results['NM_001256850.1:c.102051C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256850.1:c.102051C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256850.1:c.102051C>A']['alt_genomic_loci'] == [] + assert results['NM_001256850.1:c.102051C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant N2BA, mRNA' + assert results['NM_001256850.1:c.102051C>A']['gene_symbol'] == 'TTN' + assert results['NM_001256850.1:c.102051C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243779.1:p.(Ser34017Arg)', 'slr': 'NP_001243779.1:p.(S34017R)'} + assert results['NM_001256850.1:c.102051C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_001256850.1:c.102051C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001256850.1:c.102051C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001256850.1:c.102051C>A']['hgvs_transcript_variant'] == 'NM_001256850.1:c.102051C>A' + assert results['NM_001256850.1:c.102051C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_001256850.1:c.102051C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256850.1'} + + assert 'NR_038271.1:n.446+5141G>T' in results.keys() + assert results['NR_038271.1:n.446+5141G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_038271.1:n.446+5141G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_038271.1:n.446+5141G>T']['alt_genomic_loci'] == [] + assert results['NR_038271.1:n.446+5141G>T']['transcript_description'] == 'Homo sapiens TTN antisense RNA 1 (TTN-AS1), transcript variant 2, long non-coding RNA' + assert results['NR_038271.1:n.446+5141G>T']['gene_symbol'] == 'TTN-AS1' + assert results['NR_038271.1:n.446+5141G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_038271.1:n.446+5141G>T']['submitted_variant'] == '2-179393504-G-T' + assert results['NR_038271.1:n.446+5141G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NR_038271.1):c.446+5141G>T' + assert results['NR_038271.1:n.446+5141G>T']['hgvs_lrg_variant'] == '' + assert results['NR_038271.1:n.446+5141G>T']['hgvs_transcript_variant'] == 'NR_038271.1:n.446+5141G>T' + assert results['NR_038271.1:n.446+5141G>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} + assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} + assert results['NR_038271.1:n.446+5141G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_038271.1'} + + assert 'NM_133432.3:c.80154C>A' in results.keys() + assert results['NM_133432.3:c.80154C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133432.3:c.80154C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_133432.3:c.80154C>A']['alt_genomic_loci'] == [] + assert results['NM_133432.3:c.80154C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant novex-1, mRNA' + assert results['NM_133432.3:c.80154C>A']['gene_symbol'] == 'TTN' + assert results['NM_133432.3:c.80154C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597676.3:p.(Ser26718Arg)', 'slr': 'NP_597676.3:p.(S26718R)'} + assert results['NM_133432.3:c.80154C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_133432.3:c.80154C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_133432.3:c.80154C>A']['hgvs_lrg_variant'] == '' + assert results['NM_133432.3:c.80154C>A']['hgvs_transcript_variant'] == 'NM_133432.3:c.80154C>A' + assert results['NM_133432.3:c.80154C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_133432.3:c.80154C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597676.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133432.3'} + + assert 'NM_001267550.1:c.106974C>A' in results.keys() + assert results['NM_001267550.1:c.106974C>A']['hgvs_lrg_transcript_variant'] == 'LRG_391t1:c.106974C>A' + assert results['NM_001267550.1:c.106974C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001267550.1:c.106974C>A']['alt_genomic_loci'] == [] + assert results['NM_001267550.1:c.106974C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant IC, mRNA' + assert results['NM_001267550.1:c.106974C>A']['gene_symbol'] == 'TTN' + assert results['NM_001267550.1:c.106974C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001254479.1:p.(Ser35658Arg)', 'slr': 'NP_001254479.1:p.(S35658R)'} + assert results['NM_001267550.1:c.106974C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_001267550.1:c.106974C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001267550.1:c.106974C>A']['hgvs_lrg_variant'] == 'LRG_391:g.307026C>A' + assert results['NM_001267550.1:c.106974C>A']['hgvs_transcript_variant'] == 'NM_001267550.1:c.106974C>A' + assert results['NM_001267550.1:c.106974C>A']['hgvs_refseqgene_variant'] == 'NG_011618.3:g.307026C>A' + assert results['NM_001267550.1:c.106974C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert 'hg38' not in results['NM_001267550.1:c.106974C>A']['primary_assembly_loci'].keys() + assert results['NM_001267550.1:c.106974C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert 'grch38' not in results['NM_001267550.1:c.106974C>A']['primary_assembly_loci'].keys() + assert results['NM_001267550.1:c.106974C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011618.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_391.xml'} + + assert 'NR_038272.1:n.219+5141G>T' in results.keys() + assert results['NR_038272.1:n.219+5141G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_038272.1:n.219+5141G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_038272.1:n.219+5141G>T']['alt_genomic_loci'] == [] + assert results['NR_038272.1:n.219+5141G>T']['transcript_description'] == 'Homo sapiens TTN antisense RNA 1 (TTN-AS1), transcript variant 1, long non-coding RNA' + assert results['NR_038272.1:n.219+5141G>T']['gene_symbol'] == 'TTN-AS1' + assert results['NR_038272.1:n.219+5141G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_038272.1:n.219+5141G>T']['submitted_variant'] == '2-179393504-G-T' + assert results['NR_038272.1:n.219+5141G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NR_038272.1):c.219+5141G>T' + assert results['NR_038272.1:n.219+5141G>T']['hgvs_lrg_variant'] == '' + assert results['NR_038272.1:n.219+5141G>T']['hgvs_transcript_variant'] == 'NR_038272.1:n.219+5141G>T' + assert results['NR_038272.1:n.219+5141G>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} + assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} + assert results['NR_038272.1:n.219+5141G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_038272.1'} + + assert 'NM_133437.4:c.80355C>A' in results.keys() + assert results['NM_133437.4:c.80355C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133437.4:c.80355C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_133437.4:c.80355C>A']['alt_genomic_loci'] == [] + assert results['NM_133437.4:c.80355C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant novex-2, mRNA' + assert results['NM_133437.4:c.80355C>A']['gene_symbol'] == 'TTN' + assert results['NM_133437.4:c.80355C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597681.4:p.(Ser26785Arg)', 'slr': 'NP_597681.4:p.(S26785R)'} + assert results['NM_133437.4:c.80355C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_133437.4:c.80355C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_133437.4:c.80355C>A']['hgvs_lrg_variant'] == '' + assert results['NM_133437.4:c.80355C>A']['hgvs_transcript_variant'] == 'NM_133437.4:c.80355C>A' + assert results['NM_133437.4:c.80355C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_133437.4:c.80355C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.4'} + + assert results['flag'] == 'gene_variant' + assert 'NM_133378.4:c.99270C>A' in results.keys() + assert results['NM_133378.4:c.99270C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133378.4:c.99270C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_133378.4:c.99270C>A']['alt_genomic_loci'] == [] + assert results['NM_133378.4:c.99270C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant N2-A, mRNA' + assert results['NM_133378.4:c.99270C>A']['gene_symbol'] == 'TTN' + assert results['NM_133378.4:c.99270C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_596869.4:p.(Ser33090Arg)', 'slr': 'NP_596869.4:p.(S33090R)'} + assert results['NM_133378.4:c.99270C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_133378.4:c.99270C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_133378.4:c.99270C>A']['hgvs_lrg_variant'] == '' + assert results['NM_133378.4:c.99270C>A']['hgvs_transcript_variant'] == 'NM_133378.4:c.99270C>A' + assert results['NM_133378.4:c.99270C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_133378.4:c.99270C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_596869.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133378.4'} + + assert 'NM_001267550.2:c.106974C>A' in results.keys() + assert results['NM_001267550.2:c.106974C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001267550.2:c.106974C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001267550.2:c.106974C>A']['alt_genomic_loci'] == [] + assert results['NM_001267550.2:c.106974C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant IC, mRNA' + assert results['NM_001267550.2:c.106974C>A']['gene_symbol'] == 'TTN' + assert results['NM_001267550.2:c.106974C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001254479.2:p.(Ser35658Arg)', 'slr': 'NP_001254479.2:p.(S35658R)'} + assert results['NM_001267550.2:c.106974C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_001267550.2:c.106974C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001267550.2:c.106974C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001267550.2:c.106974C>A']['hgvs_transcript_variant'] == 'NM_001267550.2:c.106974C>A' + assert results['NM_001267550.2:c.106974C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_001267550.2:c.106974C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.2'} + + assert 'NM_133437.3:c.80355C>A' in results.keys() + assert results['NM_133437.3:c.80355C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133437.3:c.80355C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_133437.3:c.80355C>A']['alt_genomic_loci'] == [] + assert results['NM_133437.3:c.80355C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant novex-2, mRNA' + assert results['NM_133437.3:c.80355C>A']['gene_symbol'] == 'TTN' + assert results['NM_133437.3:c.80355C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597681.3:p.(Ser26785Arg)', 'slr': 'NP_597681.3:p.(S26785R)'} + assert results['NM_133437.3:c.80355C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_133437.3:c.80355C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_133437.3:c.80355C>A']['hgvs_lrg_variant'] == '' + assert results['NM_133437.3:c.80355C>A']['hgvs_transcript_variant'] == 'NM_133437.3:c.80355C>A' + assert results['NM_133437.3:c.80355C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_133437.3:c.80355C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert 'hg38' not in results['NM_133437.3:c.80355C>A']['primary_assembly_loci'].keys() + assert results['NM_133437.3:c.80355C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert 'grch38' not in results['NM_133437.3:c.80355C>A']['primary_assembly_loci'].keys() + assert results['NM_133437.3:c.80355C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.3'} + + assert 'NM_003319.4:c.79779C>A' in results.keys() + assert results['NM_003319.4:c.79779C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003319.4:c.79779C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003319.4:c.79779C>A']['alt_genomic_loci'] == [] + assert results['NM_003319.4:c.79779C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant N2-B, mRNA' + assert results['NM_003319.4:c.79779C>A']['gene_symbol'] == 'TTN' + assert results['NM_003319.4:c.79779C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003310.4:p.(Ser26593Arg)', 'slr': 'NP_003310.4:p.(S26593R)'} + assert results['NM_003319.4:c.79779C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_003319.4:c.79779C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_003319.4:c.79779C>A']['hgvs_lrg_variant'] == '' + assert results['NM_003319.4:c.79779C>A']['hgvs_transcript_variant'] == 'NM_003319.4:c.79779C>A' + assert results['NM_003319.4:c.79779C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_003319.4:c.79779C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003310.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003319.4'} + + + def test_variant275(self): + variant = '2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_194250.1:c.3324_3347del' in results.keys() + assert results['NM_194250.1:c.3324_3347del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_194250.1:c.3324_3347del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_194250.1:c.3324_3347del']['alt_genomic_loci'] == [] + assert results['NM_194250.1:c.3324_3347del']['transcript_description'] == 'Homo sapiens zinc finger protein 804A (ZNF804A), mRNA' + assert results['NM_194250.1:c.3324_3347del']['gene_symbol'] == 'ZNF804A' + assert results['NM_194250.1:c.3324_3347del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_919226.1:p.(Ala1112_Ala1119del)', 'slr': 'NP_919226.1:p.(A1112_A1119del)'} + assert results['NM_194250.1:c.3324_3347del']['submitted_variant'] == '2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T' + assert results['NM_194250.1:c.3324_3347del']['genome_context_intronic_sequence'] == '' + assert results['NM_194250.1:c.3324_3347del']['hgvs_lrg_variant'] == '' + assert results['NM_194250.1:c.3324_3347del']['hgvs_transcript_variant'] == 'NM_194250.1:c.3324_3347del' + assert results['NM_194250.1:c.3324_3347del']['hgvs_refseqgene_variant'] == '' + assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.185803447_185803470del', 'vcf': {'chr': 'chr2', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'pos': '185803444', 'alt': 'T'}} + assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.184938720_184938743del', 'vcf': {'chr': 'chr2', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'pos': '184938717', 'alt': 'T'}} + assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.185803447_185803470del', 'vcf': {'chr': '2', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'pos': '185803444', 'alt': 'T'}} + assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.184938720_184938743del', 'vcf': {'chr': '2', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'pos': '184938717', 'alt': 'T'}} + assert results['NM_194250.1:c.3324_3347del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_919226.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_194250.1'} + + + def test_variant276(self): + variant = '2-201950249-G-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_002491.2:c.208G>T' in results.keys() + assert results['NM_002491.2:c.208G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_002491.2:c.208G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_002491.2:c.208G>T']['alt_genomic_loci'] == [] + assert results['NM_002491.2:c.208G>T']['transcript_description'] == 'Homo sapiens NADH:ubiquinone oxidoreductase subunit B3 (NDUFB3), transcript variant 1, mRNA' + assert results['NM_002491.2:c.208G>T']['gene_symbol'] == 'NDUFB3' + assert results['NM_002491.2:c.208G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002482.1:p.(Gly70Ter)', 'slr': 'NP_002482.1:p.(G70*)'} + assert results['NM_002491.2:c.208G>T']['submitted_variant'] == '2-201950249-G-T' + assert results['NM_002491.2:c.208G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_002491.2:c.208G>T']['hgvs_lrg_variant'] == '' + assert results['NM_002491.2:c.208G>T']['hgvs_transcript_variant'] == 'NM_002491.2:c.208G>T' + assert results['NM_002491.2:c.208G>T']['hgvs_refseqgene_variant'] == 'NG_032156.1:g.18788G>T' + assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '201950249', 'alt': 'T'}} + assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '201085526', 'alt': 'T'}} + assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '201950249', 'alt': 'T'}} + assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '201085526', 'alt': 'T'}} + assert results['NM_002491.2:c.208G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_032156.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002482.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002491.2'} + + assert 'NM_001257102.1:c.208G>T' in results.keys() + assert results['NM_001257102.1:c.208G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257102.1:c.208G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001257102.1:c.208G>T']['alt_genomic_loci'] == [] + assert results['NM_001257102.1:c.208G>T']['transcript_description'] == 'Homo sapiens NADH:ubiquinone oxidoreductase subunit B3 (NDUFB3), transcript variant 2, mRNA' + assert results['NM_001257102.1:c.208G>T']['gene_symbol'] == 'NDUFB3' + assert results['NM_001257102.1:c.208G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244031.1:p.(Gly70Ter)', 'slr': 'NP_001244031.1:p.(G70*)'} + assert results['NM_001257102.1:c.208G>T']['submitted_variant'] == '2-201950249-G-T' + assert results['NM_001257102.1:c.208G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001257102.1:c.208G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001257102.1:c.208G>T']['hgvs_transcript_variant'] == 'NM_001257102.1:c.208G>T' + assert results['NM_001257102.1:c.208G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '201950249', 'alt': 'T'}} + assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '201085526', 'alt': 'T'}} + assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '201950249', 'alt': 'T'}} + assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '201085526', 'alt': 'T'}} + assert results['NM_001257102.1:c.208G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244031.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257102.1'} + + + def test_variant277(self): + variant = '2-238268730-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_004369.3:c.6282+1G>T' in results.keys() + assert results['NM_004369.3:c.6282+1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_473t1:c.6282+1G>T' + assert results['NM_004369.3:c.6282+1G>T']['refseqgene_context_intronic_sequence'] == 'NG_008676.1(NM_004369.3):c.6282+1G>T' + assert results['NM_004369.3:c.6282+1G>T']['alt_genomic_loci'] == [] + assert results['NM_004369.3:c.6282+1G>T']['transcript_description'] == 'Homo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 1, mRNA' + assert results['NM_004369.3:c.6282+1G>T']['gene_symbol'] == 'COL6A3' + assert results['NM_004369.3:c.6282+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004360.2(LRG_473p1):p.?', 'slr': 'NP_004360.2:p.?'} + assert results['NM_004369.3:c.6282+1G>T']['submitted_variant'] == '2-238268730-C-A' + assert results['NM_004369.3:c.6282+1G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NM_004369.3):c.6282+1G>T' + assert results['NM_004369.3:c.6282+1G>T']['hgvs_lrg_variant'] == 'LRG_473:g.59121G>T' + assert results['NM_004369.3:c.6282+1G>T']['hgvs_transcript_variant'] == 'NM_004369.3:c.6282+1G>T' + assert results['NM_004369.3:c.6282+1G>T']['hgvs_refseqgene_variant'] == 'NG_008676.1:g.59121G>T' + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} + assert results['NM_004369.3:c.6282+1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008676.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004360.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004369.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_473.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_057166.4:c.4461+1G>T' in results.keys() + assert results['NM_057166.4:c.4461+1G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_057166.4:c.4461+1G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_057166.4:c.4461+1G>T']['alt_genomic_loci'] == [] + assert results['NM_057166.4:c.4461+1G>T']['transcript_description'] == 'Homo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 4, mRNA' + assert results['NM_057166.4:c.4461+1G>T']['gene_symbol'] == 'COL6A3' + assert results['NM_057166.4:c.4461+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_476507.3:p.?', 'slr': 'NP_476507.3:p.?'} + assert results['NM_057166.4:c.4461+1G>T']['submitted_variant'] == '2-238268730-C-A' + assert results['NM_057166.4:c.4461+1G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NM_057166.4):c.4461+1G>T' + assert results['NM_057166.4:c.4461+1G>T']['hgvs_lrg_variant'] == '' + assert results['NM_057166.4:c.4461+1G>T']['hgvs_transcript_variant'] == 'NM_057166.4:c.4461+1G>T' + assert results['NM_057166.4:c.4461+1G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} + assert results['NM_057166.4:c.4461+1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_476507.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_057166.4'} + + assert 'NM_057167.3:c.5664+1G>T' in results.keys() + assert results['NM_057167.3:c.5664+1G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_057167.3:c.5664+1G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_057167.3:c.5664+1G>T']['alt_genomic_loci'] == [] + assert results['NM_057167.3:c.5664+1G>T']['transcript_description'] == 'Homo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 5, mRNA' + assert results['NM_057167.3:c.5664+1G>T']['gene_symbol'] == 'COL6A3' + assert results['NM_057167.3:c.5664+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_476508.2:p.?', 'slr': 'NP_476508.2:p.?'} + assert results['NM_057167.3:c.5664+1G>T']['submitted_variant'] == '2-238268730-C-A' + assert results['NM_057167.3:c.5664+1G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NM_057167.3):c.5664+1G>T' + assert results['NM_057167.3:c.5664+1G>T']['hgvs_lrg_variant'] == '' + assert results['NM_057167.3:c.5664+1G>T']['hgvs_transcript_variant'] == 'NM_057167.3:c.5664+1G>T' + assert results['NM_057167.3:c.5664+1G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} + assert results['NM_057167.3:c.5664+1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_476508.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_057167.3'} + + + def test_variant278(self): + variant = '21-43897396-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_080860.2:c.727+5G>A' in results.keys() + assert results['NM_080860.2:c.727+5G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080860.2:c.727+5G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_080860.2:c.727+5G>A']['alt_genomic_loci'] == [] + assert results['NM_080860.2:c.727+5G>A']['transcript_description'] == 'Homo sapiens radial spoke head 1 homolog (Chlamydomonas) (RSPH1), mRNA' + assert results['NM_080860.2:c.727+5G>A']['gene_symbol'] == 'RSPH1' + assert results['NM_080860.2:c.727+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543136.1:p.?', 'slr': 'NP_543136.1:p.?'} + assert results['NM_080860.2:c.727+5G>A']['submitted_variant'] == '21-43897396-C-T' + assert results['NM_080860.2:c.727+5G>A']['genome_context_intronic_sequence'] == 'NC_000021.8(NM_080860.2):c.727+5G>A' + assert results['NM_080860.2:c.727+5G>A']['hgvs_lrg_variant'] == '' + assert results['NM_080860.2:c.727+5G>A']['hgvs_transcript_variant'] == 'NM_080860.2:c.727+5G>A' + assert results['NM_080860.2:c.727+5G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_080860.2:c.727+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} + assert 'hg38' not in results['NM_080860.2:c.727+5G>A']['primary_assembly_loci'].keys() + assert results['NM_080860.2:c.727+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} + assert 'grch38' not in results['NM_080860.2:c.727+5G>A']['primary_assembly_loci'].keys() + assert results['NM_080860.2:c.727+5G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_080860.3:c.727+5G>A' in results.keys() + assert results['NM_080860.3:c.727+5G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080860.3:c.727+5G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_080860.3:c.727+5G>A']['alt_genomic_loci'] == [] + assert results['NM_080860.3:c.727+5G>A']['transcript_description'] == 'Homo sapiens radial spoke head component 1 (RSPH1), transcript variant 1, mRNA' + assert results['NM_080860.3:c.727+5G>A']['gene_symbol'] == 'RSPH1' + assert results['NM_080860.3:c.727+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543136.1:p.?', 'slr': 'NP_543136.1:p.?'} + assert results['NM_080860.3:c.727+5G>A']['submitted_variant'] == '21-43897396-C-T' + assert results['NM_080860.3:c.727+5G>A']['genome_context_intronic_sequence'] == 'NC_000021.8(NM_080860.3):c.727+5G>A' + assert results['NM_080860.3:c.727+5G>A']['hgvs_lrg_variant'] == '' + assert results['NM_080860.3:c.727+5G>A']['hgvs_transcript_variant'] == 'NM_080860.3:c.727+5G>A' + assert results['NM_080860.3:c.727+5G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': 'chr21', 'ref': u'C', 'pos': '42477286', 'alt': u'T'}} + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': '21', 'ref': u'C', 'pos': '42477286', 'alt': u'T'}} + assert results['NM_080860.3:c.727+5G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.3'} + + assert 'NM_001286506.1:c.613+5G>A' in results.keys() + assert results['NM_001286506.1:c.613+5G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001286506.1:c.613+5G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001286506.1:c.613+5G>A']['alt_genomic_loci'] == [] + assert results['NM_001286506.1:c.613+5G>A']['transcript_description'] == 'Homo sapiens radial spoke head component 1 (RSPH1), transcript variant 2, mRNA' + assert results['NM_001286506.1:c.613+5G>A']['gene_symbol'] == 'RSPH1' + assert results['NM_001286506.1:c.613+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001273435.1:p.?', 'slr': 'NP_001273435.1:p.?'} + assert results['NM_001286506.1:c.613+5G>A']['submitted_variant'] == '21-43897396-C-T' + assert results['NM_001286506.1:c.613+5G>A']['genome_context_intronic_sequence'] == 'NC_000021.8(NM_001286506.1):c.613+5G>A' + assert results['NM_001286506.1:c.613+5G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001286506.1:c.613+5G>A']['hgvs_transcript_variant'] == 'NM_001286506.1:c.613+5G>A' + assert results['NM_001286506.1:c.613+5G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': 'chr21', 'ref': u'C', 'pos': '42477286', 'alt': u'T'}} + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': '21', 'ref': u'C', 'pos': '42477286', 'alt': u'T'}} + assert results['NM_001286506.1:c.613+5G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001273435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001286506.1'} + + + def test_variant279(self): + variant = '22-30064360-G-GCGACGC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000268.3:c.924_925insCGACGC' in results.keys() + assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == 'LRG_511t1:c.924_925insCGACGC' + assert results['NM_000268.3:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000268.3:c.924_925insCGACGC']['alt_genomic_loci'] == [] + assert results['NM_000268.3:c.924_925insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 1, mRNA' + assert results['NM_000268.3:c.924_925insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000259.1(LRG_511p1):p.(Arg310_Arg311dup)', 'slr': 'NP_000259.1:p.(R310_R311dup)'} + assert results['NM_000268.3:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_000268.3:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_lrg_variant'] == 'LRG_511:g.69816_69817insCGACGC' + assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_transcript_variant'] == 'NM_000268.3:c.924_925insCGACGC' + assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_refseqgene_variant'] == 'NG_009057.1:g.69816_69817insCGACGC' + assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_000268.3:c.924_925insCGACGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009057.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000259.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000268.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_511.xml'} + + assert 'NM_181828.2:c.798_799insCGACGC' in results.keys() + assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181828.2:c.798_799insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181828.2:c.798_799insCGACGC']['alt_genomic_loci'] == [] + assert results['NM_181828.2:c.798_799insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 5, mRNA' + assert results['NM_181828.2:c.798_799insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861966.1:p.(Arg268_Arg269dup)', 'slr': 'NP_861966.1:p.(R268_R269dup)'} + assert results['NM_181828.2:c.798_799insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_181828.2:c.798_799insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_lrg_variant'] == '' + assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_transcript_variant'] == 'NM_181828.2:c.798_799insCGACGC' + assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181828.2:c.798_799insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861966.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181828.2'} + + assert 'NM_181830.2:c.675_676insCGACGC' in results.keys() + assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181830.2:c.675_676insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181830.2:c.675_676insCGACGC']['alt_genomic_loci'] == [] + assert results['NM_181830.2:c.675_676insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 7, mRNA' + assert results['NM_181830.2:c.675_676insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861968.1:p.(Arg227_Arg228dup)', 'slr': 'NP_861968.1:p.(R227_R228dup)'} + assert results['NM_181830.2:c.675_676insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_181830.2:c.675_676insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_lrg_variant'] == '' + assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_transcript_variant'] == 'NM_181830.2:c.675_676insCGACGC' + assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181830.2:c.675_676insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861968.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181830.2'} + + assert 'NM_181825.2:c.924_925insCGACGC' in results.keys() + assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181825.2:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181825.2:c.924_925insCGACGC']['alt_genomic_loci'] == [] + assert results['NM_181825.2:c.924_925insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 12, mRNA' + assert results['NM_181825.2:c.924_925insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861546.1:p.(Arg310_Arg311dup)', 'slr': 'NP_861546.1:p.(R310_R311dup)'} + assert results['NM_181825.2:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_181825.2:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_lrg_variant'] == '' + assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_transcript_variant'] == 'NM_181825.2:c.924_925insCGACGC' + assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181825.2:c.924_925insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861546.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181825.2'} + + assert 'NM_181832.2:c.924_925insCGACGC' in results.keys() + assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181832.2:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181832.2:c.924_925insCGACGC']['alt_genomic_loci'] == [] + assert results['NM_181832.2:c.924_925insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 8, mRNA' + assert results['NM_181832.2:c.924_925insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861970.1:p.(Arg310_Arg311dup)', 'slr': 'NP_861970.1:p.(R310_R311dup)'} + assert results['NM_181832.2:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_181832.2:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_lrg_variant'] == '' + assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_transcript_variant'] == 'NM_181832.2:c.924_925insCGACGC' + assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181832.2:c.924_925insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861970.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181832.2'} + + assert 'NM_181833.2:c.447+26086_447+26087insCGACGC' in results.keys() + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['alt_genomic_loci'] == [] + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 9, mRNA' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861971.1:p.?', 'slr': 'NP_861971.1:p.?'} + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['genome_context_intronic_sequence'] == 'NC_000022.10(NM_181833.2):c.447+26086_447+26087insCGACGC' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_lrg_variant'] == '' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_transcript_variant'] == 'NM_181833.2:c.447+26086_447+26087insCGACGC' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861971.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181833.2'} + + assert 'NM_016418.5:c.924_925insCGACGC' in results.keys() + assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == 'LRG_511t2:c.924_925insCGACGC' + assert results['NM_016418.5:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016418.5:c.924_925insCGACGC']['alt_genomic_loci'] == [] + assert results['NM_016418.5:c.924_925insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 2, mRNA' + assert results['NM_016418.5:c.924_925insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057502.2(LRG_511p2):p.(Arg310_Arg311dup)', 'slr': 'NP_057502.2:p.(R310_R311dup)'} + assert results['NM_016418.5:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_016418.5:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_lrg_variant'] == 'LRG_511:g.69816_69817insCGACGC' + assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_transcript_variant'] == 'NM_016418.5:c.924_925insCGACGC' + assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_refseqgene_variant'] == 'NG_009057.1:g.69816_69817insCGACGC' + assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_016418.5:c.924_925insCGACGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009057.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057502.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016418.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_511.xml'} + + assert 'NM_181829.2:c.801_802insCGACGC' in results.keys() + assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181829.2:c.801_802insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181829.2:c.801_802insCGACGC']['alt_genomic_loci'] == [] + assert results['NM_181829.2:c.801_802insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 6, mRNA' + assert results['NM_181829.2:c.801_802insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861967.1:p.(Arg269_Arg270dup)', 'slr': 'NP_861967.1:p.(R269_R270dup)'} + assert results['NM_181829.2:c.801_802insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_181829.2:c.801_802insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_lrg_variant'] == '' + assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_transcript_variant'] == 'NM_181829.2:c.801_802insCGACGC' + assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181829.2:c.801_802insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861967.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181829.2'} + + assert results['flag'] == 'gene_variant' + assert 'NR_156186.1:n.1483_1484insCGACGC' in results.keys() + assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_156186.1:n.1483_1484insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_156186.1:n.1483_1484insCGACGC']['alt_genomic_loci'] == [] + assert results['NR_156186.1:n.1483_1484insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 14, non-coding RNA' + assert results['NR_156186.1:n.1483_1484insCGACGC']['gene_symbol'] == 'NF2' + assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_156186.1:n.1483_1484insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NR_156186.1:n.1483_1484insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_lrg_variant'] == '' + assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_transcript_variant'] == 'NR_156186.1:n.1483_1484insCGACGC' + assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert 'hg38' not in results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci'].keys() + assert results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert 'grch38' not in results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci'].keys() + assert results['NR_156186.1:n.1483_1484insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_156186.1'} + + assert 'NM_181831.2:c.675_676insCGACGC' in results.keys() + assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181831.2:c.675_676insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181831.2:c.675_676insCGACGC']['alt_genomic_loci'] == [] + assert results['NM_181831.2:c.675_676insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 13, mRNA' + assert results['NM_181831.2:c.675_676insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861969.1:p.(Arg227_Arg228dup)', 'slr': 'NP_861969.1:p.(R227_R228dup)'} + assert results['NM_181831.2:c.675_676insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_181831.2:c.675_676insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_lrg_variant'] == '' + assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_transcript_variant'] == 'NM_181831.2:c.675_676insCGACGC' + assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} + assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} + assert results['NM_181831.2:c.675_676insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861969.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181831.2'} + + + def test_variant280(self): + variant = '3-10188187-TGTCCCGATAG-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_198156.2:c.341-3280_341-3271del' in results.keys() + assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198156.2:c.341-3280_341-3271del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_198156.2:c.341-3280_341-3271del']['alt_genomic_loci'] == [] + assert results['NM_198156.2:c.341-3280_341-3271del']['transcript_description'] == 'Homo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 2, mRNA' + assert results['NM_198156.2:c.341-3280_341-3271del']['gene_symbol'] == 'VHL' + assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_937799.1:p.?', 'slr': 'NP_937799.1:p.?'} + assert results['NM_198156.2:c.341-3280_341-3271del']['submitted_variant'] == '3-10188187-TGTCCCGATAG-T' + assert results['NM_198156.2:c.341-3280_341-3271del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_198156.2):c.341-3280_341-3271del' + assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_lrg_variant'] == '' + assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_transcript_variant'] == 'NM_198156.2:c.341-3280_341-3271del' + assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_refseqgene_variant'] == '' + assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} + assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} + assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} + assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} + assert results['NM_198156.2:c.341-3280_341-3271del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_937799.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198156.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001354723.1:c.*18-3280_*18-3271del' in results.keys() + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['alt_genomic_loci'] == [] + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['transcript_description'] == 'Homo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 3, mRNA' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['gene_symbol'] == 'VHL' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341652.1:p.?', 'slr': 'NP_001341652.1:p.?'} + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['submitted_variant'] == '3-10188187-TGTCCCGATAG-T' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001354723.1):c.*18-3280_*18-3271del' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_lrg_variant'] == '' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_transcript_variant'] == 'NM_001354723.1:c.*18-3280_*18-3271del' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341652.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354723.1'} + + assert 'NM_000551.3:c.341-7_343del' in results.keys() + assert results['NM_000551.3:c.341-7_343del']['hgvs_lrg_transcript_variant'] == 'LRG_322t1:c.341-7_343del' + assert results['NM_000551.3:c.341-7_343del']['refseqgene_context_intronic_sequence'] == 'NG_008212.3(NM_000551.3):c.341-7_343del' + assert results['NM_000551.3:c.341-7_343del']['alt_genomic_loci'] == [] + assert results['NM_000551.3:c.341-7_343del']['transcript_description'] == 'Homo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 1, mRNA' + assert results['NM_000551.3:c.341-7_343del']['gene_symbol'] == 'VHL' + assert results['NM_000551.3:c.341-7_343del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000542.1(LRG_322p1):p.?', 'slr': 'NP_000542.1:p.?'} + assert results['NM_000551.3:c.341-7_343del']['submitted_variant'] == '3-10188187-TGTCCCGATAG-T' + assert results['NM_000551.3:c.341-7_343del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_000551.3):c.341-7_343del' + assert results['NM_000551.3:c.341-7_343del']['hgvs_lrg_variant'] == 'LRG_322:g.9873_9882del' + assert results['NM_000551.3:c.341-7_343del']['hgvs_transcript_variant'] == 'NM_000551.3:c.341-7_343del' + assert results['NM_000551.3:c.341-7_343del']['hgvs_refseqgene_variant'] == 'NG_008212.3:g.9873_9882del' + assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} + assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} + assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} + assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} + assert results['NM_000551.3:c.341-7_343del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008212.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000542.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000551.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_322.xml'} + + + def test_variant281(self): + variant = '3-50402127-T-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001005505.2:c.3408A>C' in results.keys() + assert results['NM_001005505.2:c.3408A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001005505.2:c.3408A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001005505.2:c.3408A>C']['alt_genomic_loci'] == [] + assert results['NM_001005505.2:c.3408A>C']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 1, mRNA' + assert results['NM_001005505.2:c.3408A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001005505.2:c.3408A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Gln1136His)', 'slr': 'NP_001005505.1:p.(Q1136H)'} + assert results['NM_001005505.2:c.3408A>C']['submitted_variant'] == '3-50402127-T-G' + assert results['NM_001005505.2:c.3408A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001005505.2:c.3408A>C']['hgvs_lrg_variant'] == '' + assert results['NM_001005505.2:c.3408A>C']['hgvs_transcript_variant'] == 'NM_001005505.2:c.3408A>C' + assert results['NM_001005505.2:c.3408A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_001005505.2:c.3408A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2'} + + assert 'NM_006030.2:c.3402A>C' in results.keys() + assert results['NM_006030.2:c.3402A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006030.2:c.3402A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006030.2:c.3402A>C']['alt_genomic_loci'] == [] + assert results['NM_006030.2:c.3402A>C']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 2, mRNA' + assert results['NM_006030.2:c.3402A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_006030.2:c.3402A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Gln1134His)', 'slr': 'NP_006021.2:p.(Q1134H)'} + assert results['NM_006030.2:c.3402A>C']['submitted_variant'] == '3-50402127-T-G' + assert results['NM_006030.2:c.3402A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_006030.2:c.3402A>C']['hgvs_lrg_variant'] == '' + assert results['NM_006030.2:c.3402A>C']['hgvs_transcript_variant'] == 'NM_006030.2:c.3402A>C' + assert results['NM_006030.2:c.3402A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_006030.2:c.3402A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert 'hg38' not in results['NM_006030.2:c.3402A>C']['primary_assembly_loci'].keys() + assert results['NM_006030.2:c.3402A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert 'grch38' not in results['NM_006030.2:c.3402A>C']['primary_assembly_loci'].keys() + assert results['NM_006030.2:c.3402A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2'} + + assert 'NM_001174051.1:c.3423A>C' in results.keys() + assert results['NM_001174051.1:c.3423A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001174051.1:c.3423A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001174051.1:c.3423A>C']['alt_genomic_loci'] == [] + assert results['NM_001174051.1:c.3423A>C']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 3, mRNA' + assert results['NM_001174051.1:c.3423A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001174051.1:c.3423A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Gln1141His)', 'slr': 'NP_001167522.1:p.(Q1141H)'} + assert results['NM_001174051.1:c.3423A>C']['submitted_variant'] == '3-50402127-T-G' + assert results['NM_001174051.1:c.3423A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001174051.1:c.3423A>C']['hgvs_lrg_variant'] == '' + assert results['NM_001174051.1:c.3423A>C']['hgvs_transcript_variant'] == 'NM_001174051.1:c.3423A>C' + assert results['NM_001174051.1:c.3423A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001174051.1:c.3423A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert 'hg38' not in results['NM_001174051.1:c.3423A>C']['primary_assembly_loci'].keys() + assert results['NM_001174051.1:c.3423A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert 'grch38' not in results['NM_001174051.1:c.3423A>C']['primary_assembly_loci'].keys() + assert results['NM_001174051.1:c.3423A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1'} + + assert 'NM_001174051.2:c.3423A>C' in results.keys() + assert results['NM_001174051.2:c.3423A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001174051.2:c.3423A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001174051.2:c.3423A>C']['alt_genomic_loci'] == [] + assert results['NM_001174051.2:c.3423A>C']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 3, mRNA' + assert results['NM_001174051.2:c.3423A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001174051.2:c.3423A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Gln1141His)', 'slr': 'NP_001167522.1:p.(Q1141H)'} + assert results['NM_001174051.2:c.3423A>C']['submitted_variant'] == '3-50402127-T-G' + assert results['NM_001174051.2:c.3423A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001174051.2:c.3423A>C']['hgvs_lrg_variant'] == '' + assert results['NM_001174051.2:c.3423A>C']['hgvs_transcript_variant'] == 'NM_001174051.2:c.3423A>C' + assert results['NM_001174051.2:c.3423A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_001174051.2:c.3423A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2'} + + assert 'NM_006030.3:c.3402A>C' in results.keys() + assert results['NM_006030.3:c.3402A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006030.3:c.3402A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006030.3:c.3402A>C']['alt_genomic_loci'] == [] + assert results['NM_006030.3:c.3402A>C']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 2, mRNA' + assert results['NM_006030.3:c.3402A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_006030.3:c.3402A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Gln1134His)', 'slr': 'NP_006021.2:p.(Q1134H)'} + assert results['NM_006030.3:c.3402A>C']['submitted_variant'] == '3-50402127-T-G' + assert results['NM_006030.3:c.3402A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_006030.3:c.3402A>C']['hgvs_lrg_variant'] == '' + assert results['NM_006030.3:c.3402A>C']['hgvs_transcript_variant'] == 'NM_006030.3:c.3402A>C' + assert results['NM_006030.3:c.3402A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_006030.3:c.3402A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3'} + + assert 'NM_001291101.1:c.3201A>C' in results.keys() + assert results['NM_001291101.1:c.3201A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291101.1:c.3201A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001291101.1:c.3201A>C']['alt_genomic_loci'] == [] + assert results['NM_001291101.1:c.3201A>C']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 4, mRNA' + assert results['NM_001291101.1:c.3201A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001291101.1:c.3201A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278030.1:p.(Gln1067His)', 'slr': 'NP_001278030.1:p.(Q1067H)'} + assert results['NM_001291101.1:c.3201A>C']['submitted_variant'] == '3-50402127-T-G' + assert results['NM_001291101.1:c.3201A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001291101.1:c.3201A>C']['hgvs_lrg_variant'] == '' + assert results['NM_001291101.1:c.3201A>C']['hgvs_transcript_variant'] == 'NM_001291101.1:c.3201A>C' + assert results['NM_001291101.1:c.3201A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_001291101.1:c.3201A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1'} + + assert results['flag'] == 'gene_variant' + assert 'NR_111912.1:n.443-1601T>G' in results.keys() + assert results['NR_111912.1:n.443-1601T>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_111912.1:n.443-1601T>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_111912.1:n.443-1601T>G']['alt_genomic_loci'] == [] + assert results['NR_111912.1:n.443-1601T>G']['transcript_description'] == 'Homo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 3, non-coding RNA' + assert results['NR_111912.1:n.443-1601T>G']['gene_symbol'] == 'CYB561D2' + assert results['NR_111912.1:n.443-1601T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_111912.1:n.443-1601T>G']['submitted_variant'] == '3-50402127-T-G' + assert results['NR_111912.1:n.443-1601T>G']['genome_context_intronic_sequence'] == 'NC_000003.11(NR_111912.1):c.443-1601T>G' + assert results['NR_111912.1:n.443-1601T>G']['hgvs_lrg_variant'] == '' + assert results['NR_111912.1:n.443-1601T>G']['hgvs_transcript_variant'] == 'NR_111912.1:n.443-1601T>G' + assert results['NR_111912.1:n.443-1601T>G']['hgvs_refseqgene_variant'] == '' + assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} + assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} + assert results['NR_111912.1:n.443-1601T>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111912.1'} + + assert 'NM_001005505.1:c.3408A>C' in results.keys() + assert results['NM_001005505.1:c.3408A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001005505.1:c.3408A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001005505.1:c.3408A>C']['alt_genomic_loci'] == [] + assert results['NM_001005505.1:c.3408A>C']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 1, mRNA' + assert results['NM_001005505.1:c.3408A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001005505.1:c.3408A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Gln1136His)', 'slr': 'NP_001005505.1:p.(Q1136H)'} + assert results['NM_001005505.1:c.3408A>C']['submitted_variant'] == '3-50402127-T-G' + assert results['NM_001005505.1:c.3408A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001005505.1:c.3408A>C']['hgvs_lrg_variant'] == '' + assert results['NM_001005505.1:c.3408A>C']['hgvs_transcript_variant'] == 'NM_001005505.1:c.3408A>C' + assert results['NM_001005505.1:c.3408A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001005505.1:c.3408A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert 'hg38' not in results['NM_001005505.1:c.3408A>C']['primary_assembly_loci'].keys() + assert results['NM_001005505.1:c.3408A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} + assert 'grch38' not in results['NM_001005505.1:c.3408A>C']['primary_assembly_loci'].keys() + assert results['NM_001005505.1:c.3408A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1'} + + + def test_variant282(self): + variant = '3-50402890-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NR_111913.1:n.126G>A' in results.keys() + assert results['NR_111913.1:n.126G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_111913.1:n.126G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_111913.1:n.126G>A']['alt_genomic_loci'] == [] + assert results['NR_111913.1:n.126G>A']['transcript_description'] == 'Homo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 4, non-coding RNA' + assert results['NR_111913.1:n.126G>A']['gene_symbol'] == 'CYB561D2' + assert results['NR_111913.1:n.126G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_111913.1:n.126G>A']['submitted_variant'] == '3-50402890-G-A' + assert results['NR_111913.1:n.126G>A']['genome_context_intronic_sequence'] == '' + assert results['NR_111913.1:n.126G>A']['hgvs_lrg_variant'] == '' + assert results['NR_111913.1:n.126G>A']['hgvs_transcript_variant'] == 'NR_111913.1:n.126G>A' + assert results['NR_111913.1:n.126G>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NR_111913.1:n.126G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111913.1'} + + assert 'NR_111912.1:n.443-838G>A' in results.keys() + assert results['NR_111912.1:n.443-838G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_111912.1:n.443-838G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_111912.1:n.443-838G>A']['alt_genomic_loci'] == [] + assert results['NR_111912.1:n.443-838G>A']['transcript_description'] == 'Homo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 3, non-coding RNA' + assert results['NR_111912.1:n.443-838G>A']['gene_symbol'] == 'CYB561D2' + assert results['NR_111912.1:n.443-838G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_111912.1:n.443-838G>A']['submitted_variant'] == '3-50402890-G-A' + assert results['NR_111912.1:n.443-838G>A']['genome_context_intronic_sequence'] == 'NC_000003.11(NR_111912.1):c.443-838G>A' + assert results['NR_111912.1:n.443-838G>A']['hgvs_lrg_variant'] == '' + assert results['NR_111912.1:n.443-838G>A']['hgvs_transcript_variant'] == 'NR_111912.1:n.443-838G>A' + assert results['NR_111912.1:n.443-838G>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NR_111912.1:n.443-838G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111912.1'} + + assert 'NM_001291101.1:c.2788C>T' in results.keys() + assert results['NM_001291101.1:c.2788C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291101.1:c.2788C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001291101.1:c.2788C>T']['alt_genomic_loci'] == [] + assert results['NM_001291101.1:c.2788C>T']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 4, mRNA' + assert results['NM_001291101.1:c.2788C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001291101.1:c.2788C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278030.1:p.(Pro930Ser)', 'slr': 'NP_001278030.1:p.(P930S)'} + assert results['NM_001291101.1:c.2788C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_001291101.1:c.2788C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001291101.1:c.2788C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001291101.1:c.2788C>T']['hgvs_transcript_variant'] == 'NM_001291101.1:c.2788C>T' + assert results['NM_001291101.1:c.2788C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_001291101.1:c.2788C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1'} + + assert 'NM_006030.2:c.2995C>T' in results.keys() + assert results['NM_006030.2:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006030.2:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006030.2:c.2995C>T']['alt_genomic_loci'] == [] + assert results['NM_006030.2:c.2995C>T']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 2, mRNA' + assert results['NM_006030.2:c.2995C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_006030.2:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Pro999Ser)', 'slr': 'NP_006021.2:p.(P999S)'} + assert results['NM_006030.2:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_006030.2:c.2995C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_006030.2:c.2995C>T']['hgvs_lrg_variant'] == '' + assert results['NM_006030.2:c.2995C>T']['hgvs_transcript_variant'] == 'NM_006030.2:c.2995C>T' + assert results['NM_006030.2:c.2995C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_006030.2:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert 'hg38' not in results['NM_006030.2:c.2995C>T']['primary_assembly_loci'].keys() + assert results['NM_006030.2:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert 'grch38' not in results['NM_006030.2:c.2995C>T']['primary_assembly_loci'].keys() + assert results['NM_006030.2:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2'} + + assert 'NR_111914.1:n.126G>A' in results.keys() + assert results['NR_111914.1:n.126G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_111914.1:n.126G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_111914.1:n.126G>A']['alt_genomic_loci'] == [] + assert results['NR_111914.1:n.126G>A']['transcript_description'] == 'Homo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 5, non-coding RNA' + assert results['NR_111914.1:n.126G>A']['gene_symbol'] == 'CYB561D2' + assert results['NR_111914.1:n.126G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_111914.1:n.126G>A']['submitted_variant'] == '3-50402890-G-A' + assert results['NR_111914.1:n.126G>A']['genome_context_intronic_sequence'] == '' + assert results['NR_111914.1:n.126G>A']['hgvs_lrg_variant'] == '' + assert results['NR_111914.1:n.126G>A']['hgvs_transcript_variant'] == 'NR_111914.1:n.126G>A' + assert results['NR_111914.1:n.126G>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NR_111914.1:n.126G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111914.1'} + + assert 'NM_001005505.2:c.2995C>T' in results.keys() + assert results['NM_001005505.2:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001005505.2:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001005505.2:c.2995C>T']['alt_genomic_loci'] == [] + assert results['NM_001005505.2:c.2995C>T']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 1, mRNA' + assert results['NM_001005505.2:c.2995C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001005505.2:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Pro999Ser)', 'slr': 'NP_001005505.1:p.(P999S)'} + assert results['NM_001005505.2:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_001005505.2:c.2995C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001005505.2:c.2995C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001005505.2:c.2995C>T']['hgvs_transcript_variant'] == 'NM_001005505.2:c.2995C>T' + assert results['NM_001005505.2:c.2995C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_001005505.2:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001174051.1:c.3016C>T' in results.keys() + assert results['NM_001174051.1:c.3016C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001174051.1:c.3016C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001174051.1:c.3016C>T']['alt_genomic_loci'] == [] + assert results['NM_001174051.1:c.3016C>T']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 3, mRNA' + assert results['NM_001174051.1:c.3016C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001174051.1:c.3016C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Pro1006Ser)', 'slr': 'NP_001167522.1:p.(P1006S)'} + assert results['NM_001174051.1:c.3016C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_001174051.1:c.3016C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001174051.1:c.3016C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001174051.1:c.3016C>T']['hgvs_transcript_variant'] == 'NM_001174051.1:c.3016C>T' + assert results['NM_001174051.1:c.3016C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001174051.1:c.3016C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert 'hg38' not in results['NM_001174051.1:c.3016C>T']['primary_assembly_loci'].keys() + assert results['NM_001174051.1:c.3016C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert 'grch38' not in results['NM_001174051.1:c.3016C>T']['primary_assembly_loci'].keys() + assert results['NM_001174051.1:c.3016C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1'} + + assert 'NM_001174051.2:c.3016C>T' in results.keys() + assert results['NM_001174051.2:c.3016C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001174051.2:c.3016C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001174051.2:c.3016C>T']['alt_genomic_loci'] == [] + assert results['NM_001174051.2:c.3016C>T']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 3, mRNA' + assert results['NM_001174051.2:c.3016C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001174051.2:c.3016C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Pro1006Ser)', 'slr': 'NP_001167522.1:p.(P1006S)'} + assert results['NM_001174051.2:c.3016C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_001174051.2:c.3016C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001174051.2:c.3016C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001174051.2:c.3016C>T']['hgvs_transcript_variant'] == 'NM_001174051.2:c.3016C>T' + assert results['NM_001174051.2:c.3016C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_001174051.2:c.3016C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2'} + + assert 'NM_006030.3:c.2995C>T' in results.keys() + assert results['NM_006030.3:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006030.3:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006030.3:c.2995C>T']['alt_genomic_loci'] == [] + assert results['NM_006030.3:c.2995C>T']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 2, mRNA' + assert results['NM_006030.3:c.2995C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_006030.3:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Pro999Ser)', 'slr': 'NP_006021.2:p.(P999S)'} + assert results['NM_006030.3:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_006030.3:c.2995C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_006030.3:c.2995C>T']['hgvs_lrg_variant'] == '' + assert results['NM_006030.3:c.2995C>T']['hgvs_transcript_variant'] == 'NM_006030.3:c.2995C>T' + assert results['NM_006030.3:c.2995C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_006030.3:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3'} + + assert 'NM_001005505.1:c.2995C>T' in results.keys() + assert results['NM_001005505.1:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001005505.1:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001005505.1:c.2995C>T']['alt_genomic_loci'] == [] + assert results['NM_001005505.1:c.2995C>T']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 1, mRNA' + assert results['NM_001005505.1:c.2995C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001005505.1:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Pro999Ser)', 'slr': 'NP_001005505.1:p.(P999S)'} + assert results['NM_001005505.1:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_001005505.1:c.2995C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001005505.1:c.2995C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001005505.1:c.2995C>T']['hgvs_transcript_variant'] == 'NM_001005505.1:c.2995C>T' + assert results['NM_001005505.1:c.2995C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001005505.1:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert 'hg38' not in results['NM_001005505.1:c.2995C>T']['primary_assembly_loci'].keys() + assert results['NM_001005505.1:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} + assert 'grch38' not in results['NM_001005505.1:c.2995C>T']['primary_assembly_loci'].keys() + assert results['NM_001005505.1:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1'} + + + def test_variant283(self): + variant = '3-57851007-AG-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_007159.4:c.1135+565del' in results.keys() + assert results['NM_007159.4:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007159.4:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007159.4:c.1135+565del']['alt_genomic_loci'] == [] + assert results['NM_007159.4:c.1135+565del']['transcript_description'] == 'Homo sapiens sarcolemma associated protein (SLMAP), transcript variant 2, mRNA' + assert results['NM_007159.4:c.1135+565del']['gene_symbol'] == 'SLMAP' + assert results['NM_007159.4:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009090.2:p.?', 'slr': 'NP_009090.2:p.?'} + assert results['NM_007159.4:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' + assert results['NM_007159.4:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_007159.4):c.1135+565del' + assert results['NM_007159.4:c.1135+565del']['hgvs_lrg_variant'] == '' + assert results['NM_007159.4:c.1135+565del']['hgvs_transcript_variant'] == 'NM_007159.4:c.1135+565del' + assert results['NM_007159.4:c.1135+565del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} + assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} + assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} + assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} + assert results['NM_007159.4:c.1135+565del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.4'} + + assert 'NM_001304420.2:c.1186+424del' in results.keys() + assert results['NM_001304420.2:c.1186+424del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001304420.2:c.1186+424del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001304420.2:c.1186+424del']['alt_genomic_loci'] == [] + assert results['NM_001304420.2:c.1186+424del']['transcript_description'] == 'Homo sapiens sarcolemma associated protein (SLMAP), transcript variant 1, mRNA' + assert results['NM_001304420.2:c.1186+424del']['gene_symbol'] == 'SLMAP' + assert results['NM_001304420.2:c.1186+424del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291349.1:p.?', 'slr': 'NP_001291349.1:p.?'} + assert results['NM_001304420.2:c.1186+424del']['submitted_variant'] == '3-57851007-AG-A' + assert results['NM_001304420.2:c.1186+424del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304420.2):c.1186+424del' + assert results['NM_001304420.2:c.1186+424del']['hgvs_lrg_variant'] == '' + assert results['NM_001304420.2:c.1186+424del']['hgvs_transcript_variant'] == 'NM_001304420.2:c.1186+424del' + assert results['NM_001304420.2:c.1186+424del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} + assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} + assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} + assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} + assert results['NM_001304420.2:c.1186+424del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291349.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304420.2'} + + assert 'NM_001304421.2:c.1135+565del' in results.keys() + assert results['NM_001304421.2:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001304421.2:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001304421.2:c.1135+565del']['alt_genomic_loci'] == [] + assert results['NM_001304421.2:c.1135+565del']['transcript_description'] == 'Homo sapiens sarcolemma associated protein (SLMAP), transcript variant 3, mRNA' + assert results['NM_001304421.2:c.1135+565del']['gene_symbol'] == 'SLMAP' + assert results['NM_001304421.2:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291350.1:p.?', 'slr': 'NP_001291350.1:p.?'} + assert results['NM_001304421.2:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' + assert results['NM_001304421.2:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304421.2):c.1135+565del' + assert results['NM_001304421.2:c.1135+565del']['hgvs_lrg_variant'] == '' + assert results['NM_001304421.2:c.1135+565del']['hgvs_transcript_variant'] == 'NM_001304421.2:c.1135+565del' + assert results['NM_001304421.2:c.1135+565del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} + assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} + assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} + assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} + assert results['NM_001304421.2:c.1135+565del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291350.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304421.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_007159.2:c.1135+565del' in results.keys() + assert results['NM_007159.2:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007159.2:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007159.2:c.1135+565del']['alt_genomic_loci'] == [] + assert results['NM_007159.2:c.1135+565del']['transcript_description'] == 'Homo sapiens sarcolemma associated protein (SLMAP), mRNA' + assert results['NM_007159.2:c.1135+565del']['gene_symbol'] == 'SLMAP' + assert results['NM_007159.2:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009090.2:p.?', 'slr': 'NP_009090.2:p.?'} + assert results['NM_007159.2:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' + assert results['NM_007159.2:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_007159.2):c.1135+565del' + assert results['NM_007159.2:c.1135+565del']['hgvs_lrg_variant'] == '' + assert results['NM_007159.2:c.1135+565del']['hgvs_transcript_variant'] == 'NM_007159.2:c.1135+565del' + assert results['NM_007159.2:c.1135+565del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007159.2:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} + assert 'hg38' not in results['NM_007159.2:c.1135+565del']['primary_assembly_loci'].keys() + assert results['NM_007159.2:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} + assert 'grch38' not in results['NM_007159.2:c.1135+565del']['primary_assembly_loci'].keys() + assert results['NM_007159.2:c.1135+565del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.2'} + + assert 'obsolete_record_3' in results.keys() + assert results['obsolete_record_3']['hgvs_lrg_transcript_variant'] == '' + assert results['obsolete_record_3']['refseqgene_context_intronic_sequence'] == '' + assert results['obsolete_record_3']['alt_genomic_loci'] == [] + assert results['obsolete_record_3']['transcript_description'] == '' + assert results['obsolete_record_3']['gene_symbol'] == '' + assert results['obsolete_record_3']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['obsolete_record_3']['submitted_variant'] == '3-57851007-AG-A' + assert results['obsolete_record_3']['genome_context_intronic_sequence'] == '' + assert results['obsolete_record_3']['hgvs_lrg_variant'] == '' + assert results['obsolete_record_3']['hgvs_transcript_variant'] == '' + assert results['obsolete_record_3']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['obsolete_record_3']['primary_assembly_loci'].keys() + assert 'hg38' not in results['obsolete_record_3']['primary_assembly_loci'].keys() + assert 'grch37' not in results['obsolete_record_3']['primary_assembly_loci'].keys() + assert 'grch38' not in results['obsolete_record_3']['primary_assembly_loci'].keys() + assert results['obsolete_record_3']['reference_sequence_records'] == '' + + assert 'obsolete_record_2' in results.keys() + assert results['obsolete_record_2']['hgvs_lrg_transcript_variant'] == '' + assert results['obsolete_record_2']['refseqgene_context_intronic_sequence'] == '' + assert results['obsolete_record_2']['alt_genomic_loci'] == [] + assert results['obsolete_record_2']['transcript_description'] == '' + assert results['obsolete_record_2']['gene_symbol'] == '' + assert results['obsolete_record_2']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['obsolete_record_2']['submitted_variant'] == '3-57851007-AG-A' + assert results['obsolete_record_2']['genome_context_intronic_sequence'] == '' + assert results['obsolete_record_2']['hgvs_lrg_variant'] == '' + assert results['obsolete_record_2']['hgvs_transcript_variant'] == '' + assert results['obsolete_record_2']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['obsolete_record_2']['primary_assembly_loci'].keys() + assert 'hg38' not in results['obsolete_record_2']['primary_assembly_loci'].keys() + assert 'grch37' not in results['obsolete_record_2']['primary_assembly_loci'].keys() + assert 'grch38' not in results['obsolete_record_2']['primary_assembly_loci'].keys() + assert results['obsolete_record_2']['reference_sequence_records'] == '' + + assert 'obsolete_record_1' in results.keys() + assert results['obsolete_record_1']['hgvs_lrg_transcript_variant'] == '' + assert results['obsolete_record_1']['refseqgene_context_intronic_sequence'] == '' + assert results['obsolete_record_1']['alt_genomic_loci'] == [] + assert results['obsolete_record_1']['transcript_description'] == '' + assert results['obsolete_record_1']['gene_symbol'] == '' + assert results['obsolete_record_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['obsolete_record_1']['submitted_variant'] == '3-57851007-AG-A' + assert results['obsolete_record_1']['genome_context_intronic_sequence'] == '' + assert results['obsolete_record_1']['hgvs_lrg_variant'] == '' + assert results['obsolete_record_1']['hgvs_transcript_variant'] == '' + assert results['obsolete_record_1']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['obsolete_record_1']['primary_assembly_loci'].keys() + assert 'hg38' not in results['obsolete_record_1']['primary_assembly_loci'].keys() + assert 'grch37' not in results['obsolete_record_1']['primary_assembly_loci'].keys() + assert 'grch38' not in results['obsolete_record_1']['primary_assembly_loci'].keys() + assert results['obsolete_record_1']['reference_sequence_records'] == '' + + + def test_variant284(self): + variant = '3-122003832-G-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001178065.1:c.3061C=' in results.keys() + assert results['NM_001178065.1:c.3061C=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001178065.1:c.3061C=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001178065.1:c.3061C=']['alt_genomic_loci'] == [] + assert results['NM_001178065.1:c.3061C=']['transcript_description'] == 'Homo sapiens calcium sensing receptor (CASR), transcript variant 1, mRNA' + assert results['NM_001178065.1:c.3061C=']['gene_symbol'] == 'CASR' + assert results['NM_001178065.1:c.3061C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001171536.1:p.(Gln1021=)', 'slr': 'NP_001171536.1:p.(Q1021=)'} + assert results['NM_001178065.1:c.3061C=']['submitted_variant'] == '3-122003832-G-C' + assert results['NM_001178065.1:c.3061C=']['genome_context_intronic_sequence'] == '' + assert results['NM_001178065.1:c.3061C=']['hgvs_lrg_variant'] == '' + assert results['NM_001178065.1:c.3061C=']['hgvs_transcript_variant'] == 'NM_001178065.1:c.3061C=' + assert results['NM_001178065.1:c.3061C=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '122003832', 'alt': 'C'}} + assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '122284985', 'alt': 'C'}} + assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '122003832', 'alt': 'C'}} + assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '122284985', 'alt': 'C'}} + assert results['NM_001178065.1:c.3061C=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001171536.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001178065.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000388.3:c.3031C=' in results.keys() + assert results['NM_000388.3:c.3031C=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000388.3:c.3031C=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000388.3:c.3031C=']['alt_genomic_loci'] == [] + assert results['NM_000388.3:c.3031C=']['transcript_description'] == 'Homo sapiens calcium sensing receptor (CASR), transcript variant 2, mRNA' + assert results['NM_000388.3:c.3031C=']['gene_symbol'] == 'CASR' + assert results['NM_000388.3:c.3031C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000379.2:p.(Gln1011=)', 'slr': 'NP_000379.2:p.(Q1011=)'} + assert results['NM_000388.3:c.3031C=']['submitted_variant'] == '3-122003832-G-C' + assert results['NM_000388.3:c.3031C=']['genome_context_intronic_sequence'] == '' + assert results['NM_000388.3:c.3031C=']['hgvs_lrg_variant'] == '' + assert results['NM_000388.3:c.3031C=']['hgvs_transcript_variant'] == 'NM_000388.3:c.3031C=' + assert results['NM_000388.3:c.3031C=']['hgvs_refseqgene_variant'] == 'NG_009058.1:g.106303C=' + assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '122003832', 'alt': 'C'}} + assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '122284985', 'alt': 'C'}} + assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '122003832', 'alt': 'C'}} + assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '122284985', 'alt': 'C'}} + assert results['NM_000388.3:c.3031C=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009058.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000379.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000388.3'} + + + def test_variant285(self): + variant = '4-153332910-C-CAGG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001349798.1:c.45_46insCCT' in results.keys() + assert results['NM_001349798.1:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001349798.1:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001349798.1:c.45_46insCCT']['alt_genomic_loci'] == [] + assert results['NM_001349798.1:c.45_46insCCT']['transcript_description'] == 'Homo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 5, mRNA' + assert results['NM_001349798.1:c.45_46insCCT']['gene_symbol'] == 'FBXW7' + assert results['NM_001349798.1:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_361014.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_361014.1:p.(T15_G16insP)'} + assert results['NM_001349798.1:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' + assert results['NM_001349798.1:c.45_46insCCT']['genome_context_intronic_sequence'] == '' + assert results['NM_001349798.1:c.45_46insCCT']['hgvs_lrg_variant'] == '' + assert results['NM_001349798.1:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001349798.1:c.45_46insCCT' + assert results['NM_001349798.1:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.1'} + + assert 'NM_033632.3:c.45_46insCCT' in results.keys() + assert results['NM_033632.3:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_033632.3:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_033632.3:c.45_46insCCT']['alt_genomic_loci'] == [] + assert results['NM_033632.3:c.45_46insCCT']['transcript_description'] == 'Homo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 1, mRNA' + assert results['NM_033632.3:c.45_46insCCT']['gene_symbol'] == 'FBXW7' + assert results['NM_033632.3:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_361014.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_361014.1:p.(T15_G16insP)'} + assert results['NM_033632.3:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' + assert results['NM_033632.3:c.45_46insCCT']['genome_context_intronic_sequence'] == '' + assert results['NM_033632.3:c.45_46insCCT']['hgvs_lrg_variant'] == '' + assert results['NM_033632.3:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_033632.3:c.45_46insCCT' + assert results['NM_033632.3:c.45_46insCCT']['hgvs_refseqgene_variant'] == 'NG_029466.1:g.128262_128263insCCT' + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029466.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033632.3'} + + assert 'NM_001257069.1:c.45_46insCCT' in results.keys() + assert results['NM_001257069.1:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257069.1:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001257069.1:c.45_46insCCT']['alt_genomic_loci'] == [] + assert results['NM_001257069.1:c.45_46insCCT']['transcript_description'] == 'Homo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 4, mRNA' + assert results['NM_001257069.1:c.45_46insCCT']['gene_symbol'] == 'FBXW7' + assert results['NM_001257069.1:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243998.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_001243998.1:p.(T15_G16insP)'} + assert results['NM_001257069.1:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' + assert results['NM_001257069.1:c.45_46insCCT']['genome_context_intronic_sequence'] == '' + assert results['NM_001257069.1:c.45_46insCCT']['hgvs_lrg_variant'] == '' + assert results['NM_001257069.1:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001257069.1:c.45_46insCCT' + assert results['NM_001257069.1:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243998.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257069.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001349798.2:c.45_46insCCT' in results.keys() + assert results['NM_001349798.2:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == 'LRG_1141t1:c.45_46insCCT' + assert results['NM_001349798.2:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001349798.2:c.45_46insCCT']['alt_genomic_loci'] == [] + assert results['NM_001349798.2:c.45_46insCCT']['transcript_description'] == 'Homo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 5, mRNA' + assert results['NM_001349798.2:c.45_46insCCT']['gene_symbol'] == 'FBXW7' + assert results['NM_001349798.2:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001336727.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_001336727.1:p.(T15_G16insP)'} + assert results['NM_001349798.2:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' + assert results['NM_001349798.2:c.45_46insCCT']['genome_context_intronic_sequence'] == '' + assert results['NM_001349798.2:c.45_46insCCT']['hgvs_lrg_variant'] == '' + assert results['NM_001349798.2:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001349798.2:c.45_46insCCT' + assert results['NM_001349798.2:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} + assert 'hg38' not in results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci'].keys() + assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} + assert 'grch38' not in results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci'].keys() + assert results['NM_001349798.2:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001336727.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.2'} + + + def test_variant286(self): + variant = '5-1295183-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'intergenic' + assert 'Intergenic_Variant_1' in results.keys() + assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' + assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' + assert results['Intergenic_Variant_1']['alt_genomic_loci'] == [] + assert results['Intergenic_Variant_1']['transcript_description'] == '' + assert results['Intergenic_Variant_1']['gene_symbol'] == '' + assert results['Intergenic_Variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['Intergenic_Variant_1']['submitted_variant'] == '5-1295183-G-A' + assert results['Intergenic_Variant_1']['genome_context_intronic_sequence'] == '' + assert results['Intergenic_Variant_1']['hgvs_lrg_variant'] == 'LRG_343:g.4980C>T' + assert results['Intergenic_Variant_1']['hgvs_transcript_variant'] == '' + assert results['Intergenic_Variant_1']['hgvs_refseqgene_variant'] == 'NG_009265.1:g.4980C>T' + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': u'NC_000005.9:g.1295183G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': u'NC_000005.10:g.1295068G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': u'NC_000005.9:g.1295183G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': u'NC_000005.10:g.1295068G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} + assert results['Intergenic_Variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009265.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_343.xml'} + + + def test_variant287(self): + variant = '5-77396835-TTTC-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_003664.4:c.2409_2411del' in results.keys() + assert results['NM_003664.4:c.2409_2411del']['hgvs_lrg_transcript_variant'] == 'LRG_170t1:c.2409_2411del' + assert results['NM_003664.4:c.2409_2411del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003664.4:c.2409_2411del']['alt_genomic_loci'] == [] + assert results['NM_003664.4:c.2409_2411del']['transcript_description'] == 'Homo sapiens adaptor related protein complex 3 subunit beta 1 (AP3B1), transcript variant 1, mRNA' + assert results['NM_003664.4:c.2409_2411del']['gene_symbol'] == 'AP3B1' + assert results['NM_003664.4:c.2409_2411del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003655.3(LRG_170p1):p.(Lys804del)', 'slr': 'NP_003655.3:p.(K804del)'} + assert results['NM_003664.4:c.2409_2411del']['submitted_variant'] == '5-77396835-TTTC-T' + assert results['NM_003664.4:c.2409_2411del']['genome_context_intronic_sequence'] == '' + assert results['NM_003664.4:c.2409_2411del']['hgvs_lrg_variant'] == 'LRG_170:g.198691_198693del' + assert results['NM_003664.4:c.2409_2411del']['hgvs_transcript_variant'] == 'NM_003664.4:c.2409_2411del' + assert results['NM_003664.4:c.2409_2411del']['hgvs_refseqgene_variant'] == 'NG_007268.1:g.198691_198693del' + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101012_78101014del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101012_78101014del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007268.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_170.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_003664.3:c.2409_2411del' in results.keys() + assert results['NM_003664.3:c.2409_2411del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003664.3:c.2409_2411del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003664.3:c.2409_2411del']['alt_genomic_loci'] == [] + assert results['NM_003664.3:c.2409_2411del']['transcript_description'] == 'Homo sapiens adaptor-related protein complex 3, beta 1 subunit (AP3B1), mRNA' + assert results['NM_003664.3:c.2409_2411del']['gene_symbol'] == 'AP3B1' + assert results['NM_003664.3:c.2409_2411del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003655.3(LRG_170p1):p.(Lys804del)', 'slr': 'NP_003655.3:p.(K804del)'} + assert results['NM_003664.3:c.2409_2411del']['submitted_variant'] == '5-77396835-TTTC-T' + assert results['NM_003664.3:c.2409_2411del']['genome_context_intronic_sequence'] == '' + assert results['NM_003664.3:c.2409_2411del']['hgvs_lrg_variant'] == '' + assert results['NM_003664.3:c.2409_2411del']['hgvs_transcript_variant'] == 'NM_003664.3:c.2409_2411del' + assert results['NM_003664.3:c.2409_2411del']['hgvs_refseqgene_variant'] == '' + assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert 'hg38' not in results['NM_003664.3:c.2409_2411del']['primary_assembly_loci'].keys() + assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert 'grch38' not in results['NM_003664.3:c.2409_2411del']['primary_assembly_loci'].keys() + assert results['NM_003664.3:c.2409_2411del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.3'} + + assert 'NM_001271769.1:c.2262_2264del' in results.keys() + assert results['NM_001271769.1:c.2262_2264del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001271769.1:c.2262_2264del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001271769.1:c.2262_2264del']['alt_genomic_loci'] == [] + assert results['NM_001271769.1:c.2262_2264del']['transcript_description'] == 'Homo sapiens adaptor related protein complex 3 subunit beta 1 (AP3B1), transcript variant 2, mRNA' + assert results['NM_001271769.1:c.2262_2264del']['gene_symbol'] == 'AP3B1' + assert results['NM_001271769.1:c.2262_2264del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001258698.1:p.(Lys755del)', 'slr': 'NP_001258698.1:p.(K755del)'} + assert results['NM_001271769.1:c.2262_2264del']['submitted_variant'] == '5-77396835-TTTC-T' + assert results['NM_001271769.1:c.2262_2264del']['genome_context_intronic_sequence'] == '' + assert results['NM_001271769.1:c.2262_2264del']['hgvs_lrg_variant'] == '' + assert results['NM_001271769.1:c.2262_2264del']['hgvs_transcript_variant'] == 'NM_001271769.1:c.2262_2264del' + assert results['NM_001271769.1:c.2262_2264del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101012_78101014del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101012_78101014del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001258698.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001271769.1'} + + + def test_variant288(self): + variant = '5-118811422-GGTGA-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000414.3:c.302+3_302+6del' in results.keys() + assert results['NM_000414.3:c.302+3_302+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000414.3:c.302+3_302+6del']['refseqgene_context_intronic_sequence'] == 'NG_008182.1(NM_000414.3):c.302+3_302+6del' + assert results['NM_000414.3:c.302+3_302+6del']['alt_genomic_loci'] == [] + assert results['NM_000414.3:c.302+3_302+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 2, mRNA' + assert results['NM_000414.3:c.302+3_302+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_000414.3:c.302+3_302+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000405.1:p.?', 'slr': 'NP_000405.1:p.?'} + assert results['NM_000414.3:c.302+3_302+6del']['submitted_variant'] == '5-118811422-GGTGA-G' + assert results['NM_000414.3:c.302+3_302+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_000414.3):c.302+3_302+6del' + assert results['NM_000414.3:c.302+3_302+6del']['hgvs_lrg_variant'] == '' + assert results['NM_000414.3:c.302+3_302+6del']['hgvs_transcript_variant'] == 'NM_000414.3:c.302+3_302+6del' + assert results['NM_000414.3:c.302+3_302+6del']['hgvs_refseqgene_variant'] == 'NG_008182.1:g.28278_28281del' + assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} + assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} + assert results['NM_000414.3:c.302+3_302+6del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008182.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3'} + + assert 'NM_001292028.1:c.-110+3_-110+6del' in results.keys() + assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001292028.1:c.-110+3_-110+6del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001292028.1:c.-110+3_-110+6del']['alt_genomic_loci'] == [] + assert results['NM_001292028.1:c.-110+3_-110+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 5, mRNA' + assert results['NM_001292028.1:c.-110+3_-110+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278957.1:p.?', 'slr': 'NP_001278957.1:p.?'} + assert results['NM_001292028.1:c.-110+3_-110+6del']['submitted_variant'] == '5-118811422-GGTGA-G' + assert results['NM_001292028.1:c.-110+3_-110+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292028.1):c.-110+3_-110+6del' + assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_lrg_variant'] == '' + assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_transcript_variant'] == 'NM_001292028.1:c.-110+3_-110+6del' + assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+3_-110+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1'} + + assert 'NM_001199291.2:c.377+3_377+6del' in results.keys() + assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199291.2:c.377+3_377+6del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001199291.2:c.377+3_377+6del']['alt_genomic_loci'] == [] + assert results['NM_001199291.2:c.377+3_377+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA' + assert results['NM_001199291.2:c.377+3_377+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} + assert results['NM_001199291.2:c.377+3_377+6del']['submitted_variant'] == '5-118811422-GGTGA-G' + assert results['NM_001199291.2:c.377+3_377+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.2):c.377+3_377+6del' + assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_lrg_variant'] == '' + assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_transcript_variant'] == 'NM_001199291.2:c.377+3_377+6del' + assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+3_377+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001292027.1:c.230+3_230+6del' in results.keys() + assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001292027.1:c.230+3_230+6del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001292027.1:c.230+3_230+6del']['alt_genomic_loci'] == [] + assert results['NM_001292027.1:c.230+3_230+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 4, mRNA' + assert results['NM_001292027.1:c.230+3_230+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278956.1:p.?', 'slr': 'NP_001278956.1:p.?'} + assert results['NM_001292027.1:c.230+3_230+6del']['submitted_variant'] == '5-118811422-GGTGA-G' + assert results['NM_001292027.1:c.230+3_230+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292027.1):c.230+3_230+6del' + assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_lrg_variant'] == '' + assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_transcript_variant'] == 'NM_001292027.1:c.230+3_230+6del' + assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+3_230+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278956.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292027.1'} + + assert 'NM_001199291.1:c.377+3_377+6del' in results.keys() + assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199291.1:c.377+3_377+6del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001199291.1:c.377+3_377+6del']['alt_genomic_loci'] == [] + assert results['NM_001199291.1:c.377+3_377+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid (17-beta) dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA' + assert results['NM_001199291.1:c.377+3_377+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} + assert results['NM_001199291.1:c.377+3_377+6del']['submitted_variant'] == '5-118811422-GGTGA-G' + assert results['NM_001199291.1:c.377+3_377+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.1):c.377+3_377+6del' + assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_lrg_variant'] == '' + assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_transcript_variant'] == 'NM_001199291.1:c.377+3_377+6del' + assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert 'hg38' not in results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci'].keys() + assert results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert 'grch38' not in results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci'].keys() + assert results['NM_001199291.1:c.377+3_377+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1'} + + assert 'NM_001199292.1:c.248+3_248+6del' in results.keys() + assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199292.1:c.248+3_248+6del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001199292.1:c.248+3_248+6del']['alt_genomic_loci'] == [] + assert results['NM_001199292.1:c.248+3_248+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 3, mRNA' + assert results['NM_001199292.1:c.248+3_248+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186221.1:p.?', 'slr': 'NP_001186221.1:p.?'} + assert results['NM_001199292.1:c.248+3_248+6del']['submitted_variant'] == '5-118811422-GGTGA-G' + assert results['NM_001199292.1:c.248+3_248+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199292.1):c.248+3_248+6del' + assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_lrg_variant'] == '' + assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_transcript_variant'] == 'NM_001199292.1:c.248+3_248+6del' + assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+3_248+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186221.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199292.1'} + + + def test_variant289(self): + variant = '5-118811422-GGTGAG-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001292028.1:c.-110+1_-110+5del' in results.keys() + assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001292028.1:c.-110+1_-110+5del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001292028.1:c.-110+1_-110+5del']['alt_genomic_loci'] == [] + assert results['NM_001292028.1:c.-110+1_-110+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 5, mRNA' + assert results['NM_001292028.1:c.-110+1_-110+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278957.1:p.?', 'slr': 'NP_001278957.1:p.?'} + assert results['NM_001292028.1:c.-110+1_-110+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' + assert results['NM_001292028.1:c.-110+1_-110+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292028.1):c.-110+1_-110+5del' + assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_lrg_variant'] == '' + assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_transcript_variant'] == 'NM_001292028.1:c.-110+1_-110+5del' + assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+1_-110+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1'} + + assert 'NM_000414.3:c.302+1_302+5del' in results.keys() + assert results['NM_000414.3:c.302+1_302+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000414.3:c.302+1_302+5del']['refseqgene_context_intronic_sequence'] == 'NG_008182.1(NM_000414.3):c.302+1_302+5del' + assert results['NM_000414.3:c.302+1_302+5del']['alt_genomic_loci'] == [] + assert results['NM_000414.3:c.302+1_302+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 2, mRNA' + assert results['NM_000414.3:c.302+1_302+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_000414.3:c.302+1_302+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000405.1:p.?', 'slr': 'NP_000405.1:p.?'} + assert results['NM_000414.3:c.302+1_302+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' + assert results['NM_000414.3:c.302+1_302+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_000414.3):c.302+1_302+5del' + assert results['NM_000414.3:c.302+1_302+5del']['hgvs_lrg_variant'] == '' + assert results['NM_000414.3:c.302+1_302+5del']['hgvs_transcript_variant'] == 'NM_000414.3:c.302+1_302+5del' + assert results['NM_000414.3:c.302+1_302+5del']['hgvs_refseqgene_variant'] == 'NG_008182.1:g.28276_28280del' + assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} + assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} + assert results['NM_000414.3:c.302+1_302+5del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008182.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3'} + + assert 'NM_001199291.2:c.377+1_377+5del' in results.keys() + assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199291.2:c.377+1_377+5del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001199291.2:c.377+1_377+5del']['alt_genomic_loci'] == [] + assert results['NM_001199291.2:c.377+1_377+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA' + assert results['NM_001199291.2:c.377+1_377+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} + assert results['NM_001199291.2:c.377+1_377+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' + assert results['NM_001199291.2:c.377+1_377+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.2):c.377+1_377+5del' + assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_lrg_variant'] == '' + assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_transcript_variant'] == 'NM_001199291.2:c.377+1_377+5del' + assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+1_377+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2'} + + assert 'NM_001199292.1:c.248+1_248+5del' in results.keys() + assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199292.1:c.248+1_248+5del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001199292.1:c.248+1_248+5del']['alt_genomic_loci'] == [] + assert results['NM_001199292.1:c.248+1_248+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 3, mRNA' + assert results['NM_001199292.1:c.248+1_248+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186221.1:p.?', 'slr': 'NP_001186221.1:p.?'} + assert results['NM_001199292.1:c.248+1_248+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' + assert results['NM_001199292.1:c.248+1_248+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199292.1):c.248+1_248+5del' + assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_lrg_variant'] == '' + assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_transcript_variant'] == 'NM_001199292.1:c.248+1_248+5del' + assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+1_248+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186221.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199292.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001199291.1:c.377+1_377+5del' in results.keys() + assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199291.1:c.377+1_377+5del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001199291.1:c.377+1_377+5del']['alt_genomic_loci'] == [] + assert results['NM_001199291.1:c.377+1_377+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid (17-beta) dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA' + assert results['NM_001199291.1:c.377+1_377+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} + assert results['NM_001199291.1:c.377+1_377+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' + assert results['NM_001199291.1:c.377+1_377+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.1):c.377+1_377+5del' + assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_lrg_variant'] == '' + assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_transcript_variant'] == 'NM_001199291.1:c.377+1_377+5del' + assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert 'hg38' not in results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci'].keys() + assert results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert 'grch38' not in results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci'].keys() + assert results['NM_001199291.1:c.377+1_377+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1'} + + assert 'NM_001292027.1:c.230+1_230+5del' in results.keys() + assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001292027.1:c.230+1_230+5del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001292027.1:c.230+1_230+5del']['alt_genomic_loci'] == [] + assert results['NM_001292027.1:c.230+1_230+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 4, mRNA' + assert results['NM_001292027.1:c.230+1_230+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278956.1:p.?', 'slr': 'NP_001278956.1:p.?'} + assert results['NM_001292027.1:c.230+1_230+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' + assert results['NM_001292027.1:c.230+1_230+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292027.1):c.230+1_230+5del' + assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_lrg_variant'] == '' + assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_transcript_variant'] == 'NM_001292027.1:c.230+1_230+5del' + assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+1_230+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278956.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292027.1'} + + + def test_variant290(self): + variant = '5-131705587-CG-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NR_110997.1:n.21del' in results.keys() + assert results['NR_110997.1:n.21del']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_110997.1:n.21del']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_110997.1:n.21del']['alt_genomic_loci'] == [] + assert results['NR_110997.1:n.21del']['transcript_description'] == 'Homo sapiens MIR3936 host gene (MIR3936HG), long non-coding RNA' + assert results['NR_110997.1:n.21del']['gene_symbol'] == 'MIR3936HG' + assert results['NR_110997.1:n.21del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_110997.1:n.21del']['submitted_variant'] == '5-131705587-CG-C' + assert results['NR_110997.1:n.21del']['genome_context_intronic_sequence'] == '' + assert results['NR_110997.1:n.21del']['hgvs_lrg_variant'] == '' + assert results['NR_110997.1:n.21del']['hgvs_transcript_variant'] == 'NR_110997.1:n.21del' + assert results['NR_110997.1:n.21del']['hgvs_refseqgene_variant'] == '' + assert results['NR_110997.1:n.21del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} + assert results['NR_110997.1:n.21del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} + assert results['NR_110997.1:n.21del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} + assert results['NR_110997.1:n.21del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} + assert results['NR_110997.1:n.21del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_110997.1'} + + assert 'NM_003060.3:c.-75del' in results.keys() + assert results['NM_003060.3:c.-75del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003060.3:c.-75del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003060.3:c.-75del']['alt_genomic_loci'] == [] + assert results['NM_003060.3:c.-75del']['transcript_description'] == 'Homo sapiens solute carrier family 22 member 5 (SLC22A5), transcript variant 2, mRNA' + assert results['NM_003060.3:c.-75del']['gene_symbol'] == 'SLC22A5' + assert results['NM_003060.3:c.-75del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003051.1:p.?', 'slr': 'NP_003051.1:p.?'} + assert results['NM_003060.3:c.-75del']['submitted_variant'] == '5-131705587-CG-C' + assert results['NM_003060.3:c.-75del']['genome_context_intronic_sequence'] == '' + assert results['NM_003060.3:c.-75del']['hgvs_lrg_variant'] == '' + assert results['NM_003060.3:c.-75del']['hgvs_transcript_variant'] == 'NM_003060.3:c.-75del' + assert results['NM_003060.3:c.-75del']['hgvs_refseqgene_variant'] == '' + assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} + assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} + assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} + assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} + assert results['NM_003060.3:c.-75del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003051.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003060.3'} + + assert 'NM_001308122.1:c.-75del' in results.keys() + assert results['NM_001308122.1:c.-75del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001308122.1:c.-75del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001308122.1:c.-75del']['alt_genomic_loci'] == [] + assert results['NM_001308122.1:c.-75del']['transcript_description'] == 'Homo sapiens solute carrier family 22 member 5 (SLC22A5), transcript variant 1, mRNA' + assert results['NM_001308122.1:c.-75del']['gene_symbol'] == 'SLC22A5' + assert results['NM_001308122.1:c.-75del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295051.1:p.?', 'slr': 'NP_001295051.1:p.?'} + assert results['NM_001308122.1:c.-75del']['submitted_variant'] == '5-131705587-CG-C' + assert results['NM_001308122.1:c.-75del']['genome_context_intronic_sequence'] == '' + assert results['NM_001308122.1:c.-75del']['hgvs_lrg_variant'] == '' + assert results['NM_001308122.1:c.-75del']['hgvs_transcript_variant'] == 'NM_001308122.1:c.-75del' + assert results['NM_001308122.1:c.-75del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} + assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} + assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} + assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} + assert results['NM_001308122.1:c.-75del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295051.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308122.1'} + + + def test_variant291(self): + variant = '5-148406482-T-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_024577.3:c.2813A>G' in results.keys() + assert results['NM_024577.3:c.2813A>G']['hgvs_lrg_transcript_variant'] == 'LRG_269t1:c.2813A>G' + assert results['NM_024577.3:c.2813A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_024577.3:c.2813A>G']['alt_genomic_loci'] == [] + assert results['NM_024577.3:c.2813A>G']['transcript_description'] == 'Homo sapiens SH3 domain and tetratricopeptide repeats 2 (SH3TC2), mRNA' + assert results['NM_024577.3:c.2813A>G']['gene_symbol'] == 'SH3TC2' + assert results['NM_024577.3:c.2813A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_078853.2(LRG_269p1):p.(His938Arg)', 'slr': 'NP_078853.2:p.(H938R)'} + assert results['NM_024577.3:c.2813A>G']['submitted_variant'] == '5-148406482-T-C' + assert results['NM_024577.3:c.2813A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_024577.3:c.2813A>G']['hgvs_lrg_variant'] == 'LRG_269:g.41256A>G' + assert results['NM_024577.3:c.2813A>G']['hgvs_transcript_variant'] == 'NM_024577.3:c.2813A>G' + assert results['NM_024577.3:c.2813A>G']['hgvs_refseqgene_variant'] == 'NG_007947.2:g.41256A>G' + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.148406482T>C', 'vcf': {'chr': 'chr5', 'ref': u'T', 'pos': '148406482', 'alt': u'C'}} + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.149026919T>C', 'vcf': {'chr': 'chr5', 'ref': u'T', 'pos': '149026919', 'alt': u'C'}} + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.148406482T>C', 'vcf': {'chr': '5', 'ref': u'T', 'pos': '148406482', 'alt': u'C'}} + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.149026919T>C', 'vcf': {'chr': '5', 'ref': u'T', 'pos': '149026919', 'alt': u'C'}} + assert results['NM_024577.3:c.2813A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007947.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_078853.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024577.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_269.xml'} + + + def test_variant292(self): + variant = '6-110036337-T-TCAG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_014845.5:c.123_124insCAG' in results.keys() + assert results['NM_014845.5:c.123_124insCAG']['hgvs_lrg_transcript_variant'] == 'LRG_241t1:c.123_124insCAG' + assert results['NM_014845.5:c.123_124insCAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014845.5:c.123_124insCAG']['alt_genomic_loci'] == [] + assert results['NM_014845.5:c.123_124insCAG']['transcript_description'] == 'Homo sapiens FIG4 phosphoinositide 5-phosphatase (FIG4), mRNA' + assert results['NM_014845.5:c.123_124insCAG']['gene_symbol'] == 'FIG4' + assert results['NM_014845.5:c.123_124insCAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055660.1(LRG_241p1):p.(Ile41_Asp42insGln)', 'slr': 'NP_055660.1:p.(I41_D42insQ)'} + assert results['NM_014845.5:c.123_124insCAG']['submitted_variant'] == '6-110036337-T-TCAG' + assert results['NM_014845.5:c.123_124insCAG']['genome_context_intronic_sequence'] == '' + assert results['NM_014845.5:c.123_124insCAG']['hgvs_lrg_variant'] == 'LRG_241:g.28914_28915insCAG' + assert results['NM_014845.5:c.123_124insCAG']['hgvs_transcript_variant'] == 'NM_014845.5:c.123_124insCAG' + assert results['NM_014845.5:c.123_124insCAG']['hgvs_refseqgene_variant'] == 'NG_007977.1:g.28914_28915insCAG' + assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036337_110036338insCAG', 'vcf': {'chr': 'chr6', 'ref': 'T', 'pos': '110036337', 'alt': 'TCAG'}} + assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715134_109715135insCAG', 'vcf': {'chr': 'chr6', 'ref': 'T', 'pos': '109715134', 'alt': 'TCAG'}} + assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036337_110036338insCAG', 'vcf': {'chr': '6', 'ref': 'T', 'pos': '110036337', 'alt': 'TCAG'}} + assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715134_109715135insCAG', 'vcf': {'chr': '6', 'ref': 'T', 'pos': '109715134', 'alt': 'TCAG'}} + assert results['NM_014845.5:c.123_124insCAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007977.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055660.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014845.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_241.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant293(self): + variant = '6-110036337-TGAT-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_014845.5:c.124_126del' in results.keys() + assert results['NM_014845.5:c.124_126del']['hgvs_lrg_transcript_variant'] == 'LRG_241t1:c.124_126del' + assert results['NM_014845.5:c.124_126del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014845.5:c.124_126del']['alt_genomic_loci'] == [] + assert results['NM_014845.5:c.124_126del']['transcript_description'] == 'Homo sapiens FIG4 phosphoinositide 5-phosphatase (FIG4), mRNA' + assert results['NM_014845.5:c.124_126del']['gene_symbol'] == 'FIG4' + assert results['NM_014845.5:c.124_126del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055660.1(LRG_241p1):p.(Asp42del)', 'slr': 'NP_055660.1:p.(D42del)'} + assert results['NM_014845.5:c.124_126del']['submitted_variant'] == '6-110036337-TGAT-T' + assert results['NM_014845.5:c.124_126del']['genome_context_intronic_sequence'] == '' + assert results['NM_014845.5:c.124_126del']['hgvs_lrg_variant'] == 'LRG_241:g.28915_28917del' + assert results['NM_014845.5:c.124_126del']['hgvs_transcript_variant'] == 'NM_014845.5:c.124_126del' + assert results['NM_014845.5:c.124_126del']['hgvs_refseqgene_variant'] == 'NG_007977.1:g.28915_28917del' + assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036338_110036340del', 'vcf': {'chr': 'chr6', 'ref': 'TTGA', 'pos': '110036336', 'alt': 'T'}} + assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715135_109715137del', 'vcf': {'chr': 'chr6', 'ref': 'TTGA', 'pos': '109715133', 'alt': 'T'}} + assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036338_110036340del', 'vcf': {'chr': '6', 'ref': 'TTGA', 'pos': '110036336', 'alt': 'T'}} + assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715135_109715137del', 'vcf': {'chr': '6', 'ref': 'TTGA', 'pos': '109715133', 'alt': 'T'}} + assert results['NM_014845.5:c.124_126del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007977.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055660.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014845.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_241.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant294(self): + variant = '6-152651802-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_182961.3:c.14018G>T' in results.keys() + assert results['NM_182961.3:c.14018G>T']['hgvs_lrg_transcript_variant'] == 'LRG_427t1:c.14018G>T' + assert results['NM_182961.3:c.14018G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_182961.3:c.14018G>T']['alt_genomic_loci'] == [] + assert results['NM_182961.3:c.14018G>T']['transcript_description'] == 'Homo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 1, mRNA' + assert results['NM_182961.3:c.14018G>T']['gene_symbol'] == 'SYNE1' + assert results['NM_182961.3:c.14018G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_892006.3(LRG_427p1):p.(Arg4673Leu)', 'slr': 'NP_892006.3:p.(R4673L)'} + assert results['NM_182961.3:c.14018G>T']['submitted_variant'] == '6-152651802-C-A' + assert results['NM_182961.3:c.14018G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_182961.3:c.14018G>T']['hgvs_lrg_variant'] == 'LRG_427:g.311733G>T' + assert results['NM_182961.3:c.14018G>T']['hgvs_transcript_variant'] == 'NM_182961.3:c.14018G>T' + assert results['NM_182961.3:c.14018G>T']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.311733G>T' + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152651802', 'alt': u'A'}} + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152330667', 'alt': u'A'}} + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152651802', 'alt': u'A'}} + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152330667', 'alt': u'A'}} + assert results['NM_182961.3:c.14018G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} + + assert 'NM_033071.3:c.13805G>T' in results.keys() + assert results['NM_033071.3:c.13805G>T']['hgvs_lrg_transcript_variant'] == 'LRG_427t2:c.13805G>T' + assert results['NM_033071.3:c.13805G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_033071.3:c.13805G>T']['alt_genomic_loci'] == [] + assert results['NM_033071.3:c.13805G>T']['transcript_description'] == 'Homo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 2, mRNA' + assert results['NM_033071.3:c.13805G>T']['gene_symbol'] == 'SYNE1' + assert results['NM_033071.3:c.13805G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_149062.1(LRG_427p2):p.(Arg4602Leu)', 'slr': 'NP_149062.1:p.(R4602L)'} + assert results['NM_033071.3:c.13805G>T']['submitted_variant'] == '6-152651802-C-A' + assert results['NM_033071.3:c.13805G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_033071.3:c.13805G>T']['hgvs_lrg_variant'] == 'LRG_427:g.311733G>T' + assert results['NM_033071.3:c.13805G>T']['hgvs_transcript_variant'] == 'NM_033071.3:c.13805G>T' + assert results['NM_033071.3:c.13805G>T']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.311733G>T' + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152651802', 'alt': u'A'}} + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152330667', 'alt': u'A'}} + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152651802', 'alt': u'A'}} + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152330667', 'alt': u'A'}} + assert results['NM_033071.3:c.13805G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} + + + def test_variant295(self): + variant = '6-152737643-C-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_033071.3:c.5950G>C' in results.keys() + assert results['NM_033071.3:c.5950G>C']['hgvs_lrg_transcript_variant'] == 'LRG_427t2:c.5950G>C' + assert results['NM_033071.3:c.5950G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_033071.3:c.5950G>C']['alt_genomic_loci'] == [] + assert results['NM_033071.3:c.5950G>C']['transcript_description'] == 'Homo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 2, mRNA' + assert results['NM_033071.3:c.5950G>C']['gene_symbol'] == 'SYNE1' + assert results['NM_033071.3:c.5950G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_149062.1(LRG_427p2):p.(Ala1984Pro)', 'slr': 'NP_149062.1:p.(A1984P)'} + assert results['NM_033071.3:c.5950G>C']['submitted_variant'] == '6-152737643-C-G' + assert results['NM_033071.3:c.5950G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_033071.3:c.5950G>C']['hgvs_lrg_variant'] == 'LRG_427:g.225892G>C' + assert results['NM_033071.3:c.5950G>C']['hgvs_transcript_variant'] == 'NM_033071.3:c.5950G>C' + assert results['NM_033071.3:c.5950G>C']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.225892G>C' + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152737643', 'alt': u'G'}} + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152416508', 'alt': u'G'}} + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152737643', 'alt': u'G'}} + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152416508', 'alt': u'G'}} + assert results['NM_033071.3:c.5950G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} + + assert 'NM_182961.3:c.5929G>C' in results.keys() + assert results['NM_182961.3:c.5929G>C']['hgvs_lrg_transcript_variant'] == 'LRG_427t1:c.5929G>C' + assert results['NM_182961.3:c.5929G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_182961.3:c.5929G>C']['alt_genomic_loci'] == [] + assert results['NM_182961.3:c.5929G>C']['transcript_description'] == 'Homo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 1, mRNA' + assert results['NM_182961.3:c.5929G>C']['gene_symbol'] == 'SYNE1' + assert results['NM_182961.3:c.5929G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_892006.3(LRG_427p1):p.(Ala1977Pro)', 'slr': 'NP_892006.3:p.(A1977P)'} + assert results['NM_182961.3:c.5929G>C']['submitted_variant'] == '6-152737643-C-G' + assert results['NM_182961.3:c.5929G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_182961.3:c.5929G>C']['hgvs_lrg_variant'] == 'LRG_427:g.225892G>C' + assert results['NM_182961.3:c.5929G>C']['hgvs_transcript_variant'] == 'NM_182961.3:c.5929G>C' + assert results['NM_182961.3:c.5929G>C']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.225892G>C' + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152737643', 'alt': u'G'}} + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152416508', 'alt': u'G'}} + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152737643', 'alt': u'G'}} + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152416508', 'alt': u'G'}} + assert results['NM_182961.3:c.5929G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} + + + def test_variant296(self): + variant = '7-6026775-T-C' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001322012.1:c.688A>G' in results.keys() + assert results['NM_001322012.1:c.688A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322012.1:c.688A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322012.1:c.688A>G']['alt_genomic_loci'] == [] + assert results['NM_001322012.1:c.688A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 11, mRNA' + assert results['NM_001322012.1:c.688A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322012.1:c.688A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308941.1:p.(Lys230Glu)', 'slr': 'NP_001308941.1:p.(K230E)'} + assert results['NM_001322012.1:c.688A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322012.1:c.688A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322012.1:c.688A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322012.1:c.688A>G']['hgvs_transcript_variant'] == 'NM_001322012.1:c.688A>G' + assert results['NM_001322012.1:c.688A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322012.1:c.688A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308941.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322012.1'} + + assert 'NM_001322010.1:c.1060A>G' in results.keys() + assert results['NM_001322010.1:c.1060A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322010.1:c.1060A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322010.1:c.1060A>G']['alt_genomic_loci'] == [] + assert results['NM_001322010.1:c.1060A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 9, mRNA' + assert results['NM_001322010.1:c.1060A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322010.1:c.1060A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308939.1:p.(Lys354Glu)', 'slr': 'NP_001308939.1:p.(K354E)'} + assert results['NM_001322010.1:c.1060A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322010.1:c.1060A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322010.1:c.1060A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322010.1:c.1060A>G']['hgvs_transcript_variant'] == 'NM_001322010.1:c.1060A>G' + assert results['NM_001322010.1:c.1060A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322010.1:c.1060A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308939.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322010.1'} + + assert 'NM_001322015.1:c.1312A>G' in results.keys() + assert results['NM_001322015.1:c.1312A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322015.1:c.1312A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322015.1:c.1312A>G']['alt_genomic_loci'] == [] + assert results['NM_001322015.1:c.1312A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 14, mRNA' + assert results['NM_001322015.1:c.1312A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322015.1:c.1312A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308944.1:p.(Lys438Glu)', 'slr': 'NP_001308944.1:p.(K438E)'} + assert results['NM_001322015.1:c.1312A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322015.1:c.1312A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322015.1:c.1312A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322015.1:c.1312A>G']['hgvs_transcript_variant'] == 'NM_001322015.1:c.1312A>G' + assert results['NM_001322015.1:c.1312A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322015.1:c.1312A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308944.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322015.1'} + + assert 'NM_001322003.1:c.1216A>G' in results.keys() + assert results['NM_001322003.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322003.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322003.1:c.1216A>G']['alt_genomic_loci'] == [] + assert results['NM_001322003.1:c.1216A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 2, mRNA' + assert results['NM_001322003.1:c.1216A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322003.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308932.1:p.(Lys406Glu)', 'slr': 'NP_001308932.1:p.(K406E)'} + assert results['NM_001322003.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322003.1:c.1216A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322003.1:c.1216A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322003.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322003.1:c.1216A>G' + assert results['NM_001322003.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322003.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308932.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322003.1'} + + assert 'NM_001322014.1:c.1621A>G' in results.keys() + assert results['NM_001322014.1:c.1621A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322014.1:c.1621A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322014.1:c.1621A>G']['alt_genomic_loci'] == [] + assert results['NM_001322014.1:c.1621A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 13, mRNA' + assert results['NM_001322014.1:c.1621A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322014.1:c.1621A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308943.1:p.(Lys541Glu)', 'slr': 'NP_001308943.1:p.(K541E)'} + assert results['NM_001322014.1:c.1621A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322014.1:c.1621A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322014.1:c.1621A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322014.1:c.1621A>G']['hgvs_transcript_variant'] == 'NM_001322014.1:c.1621A>G' + assert results['NM_001322014.1:c.1621A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322014.1:c.1621A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308943.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322014.1'} + + assert 'NM_001322004.1:c.1216A>G' in results.keys() + assert results['NM_001322004.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322004.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322004.1:c.1216A>G']['alt_genomic_loci'] == [] + assert results['NM_001322004.1:c.1216A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 3, mRNA' + assert results['NM_001322004.1:c.1216A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322004.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308933.1:p.(Lys406Glu)', 'slr': 'NP_001308933.1:p.(K406E)'} + assert results['NM_001322004.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322004.1:c.1216A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322004.1:c.1216A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322004.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322004.1:c.1216A>G' + assert results['NM_001322004.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322004.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308933.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322004.1'} + + assert 'NM_001322008.1:c.1303A>G' in results.keys() + assert results['NM_001322008.1:c.1303A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322008.1:c.1303A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322008.1:c.1303A>G']['alt_genomic_loci'] == [] + assert results['NM_001322008.1:c.1303A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 7, mRNA' + assert results['NM_001322008.1:c.1303A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322008.1:c.1303A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308937.1:p.(Lys435Glu)', 'slr': 'NP_001308937.1:p.(K435E)'} + assert results['NM_001322008.1:c.1303A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322008.1:c.1303A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322008.1:c.1303A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322008.1:c.1303A>G']['hgvs_transcript_variant'] == 'NM_001322008.1:c.1303A>G' + assert results['NM_001322008.1:c.1303A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322008.1:c.1303A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308937.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322008.1'} + + assert 'NM_001322006.1:c.1465A>G' in results.keys() + assert results['NM_001322006.1:c.1465A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322006.1:c.1465A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322006.1:c.1465A>G']['alt_genomic_loci'] == [] + assert results['NM_001322006.1:c.1465A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 5, mRNA' + assert results['NM_001322006.1:c.1465A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322006.1:c.1465A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308935.1:p.(Lys489Glu)', 'slr': 'NP_001308935.1:p.(K489E)'} + assert results['NM_001322006.1:c.1465A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322006.1:c.1465A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322006.1:c.1465A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322006.1:c.1465A>G']['hgvs_transcript_variant'] == 'NM_001322006.1:c.1465A>G' + assert results['NM_001322006.1:c.1465A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322006.1:c.1465A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308935.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322006.1'} + + assert 'NM_001322013.1:c.1048A>G' in results.keys() + assert results['NM_001322013.1:c.1048A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322013.1:c.1048A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322013.1:c.1048A>G']['alt_genomic_loci'] == [] + assert results['NM_001322013.1:c.1048A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 12, mRNA' + assert results['NM_001322013.1:c.1048A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322013.1:c.1048A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308942.1:p.(Lys350Glu)', 'slr': 'NP_001308942.1:p.(K350E)'} + assert results['NM_001322013.1:c.1048A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322013.1:c.1048A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322013.1:c.1048A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322013.1:c.1048A>G']['hgvs_transcript_variant'] == 'NM_001322013.1:c.1048A>G' + assert results['NM_001322013.1:c.1048A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322013.1:c.1048A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308942.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322013.1'} + + assert 'NM_001322009.1:c.1216A>G' in results.keys() + assert results['NM_001322009.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322009.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322009.1:c.1216A>G']['alt_genomic_loci'] == [] + assert results['NM_001322009.1:c.1216A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 8, mRNA' + assert results['NM_001322009.1:c.1216A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322009.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308938.1:p.(Lys406Glu)', 'slr': 'NP_001308938.1:p.(K406E)'} + assert results['NM_001322009.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322009.1:c.1216A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322009.1:c.1216A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322009.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322009.1:c.1216A>G' + assert results['NM_001322009.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322009.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308938.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322009.1'} + + assert 'NR_003085.2:n.1703G=' in results.keys() + assert results['NR_003085.2:n.1703G=']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_003085.2:n.1703G=']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_003085.2:n.1703G=']['alt_genomic_loci'] == [] + assert results['NR_003085.2:n.1703G=']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 2, non-coding RNA' + assert results['NR_003085.2:n.1703G=']['gene_symbol'] == 'PMS2' + assert results['NR_003085.2:n.1703G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_003085.2:n.1703G=']['submitted_variant'] == '7-6026775-T-C' + assert results['NR_003085.2:n.1703G=']['genome_context_intronic_sequence'] == '' + assert results['NR_003085.2:n.1703G=']['hgvs_lrg_variant'] == '' + assert results['NR_003085.2:n.1703G=']['hgvs_transcript_variant'] == 'NR_003085.2:n.1703G=' + assert results['NR_003085.2:n.1703G=']['hgvs_refseqgene_variant'] == '' + assert results['NR_003085.2:n.1703G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': u'C'}} + assert 'hg38' not in results['NR_003085.2:n.1703G=']['primary_assembly_loci'].keys() + assert results['NR_003085.2:n.1703G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': u'C'}} + assert 'grch38' not in results['NR_003085.2:n.1703G=']['primary_assembly_loci'].keys() + assert results['NR_003085.2:n.1703G=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_003085.2'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001322005.1:c.1216A>G' in results.keys() + assert results['NM_001322005.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322005.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322005.1:c.1216A>G']['alt_genomic_loci'] == [] + assert results['NM_001322005.1:c.1216A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 4, mRNA' + assert results['NM_001322005.1:c.1216A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322005.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308934.1:p.(Lys406Glu)', 'slr': 'NP_001308934.1:p.(K406E)'} + assert results['NM_001322005.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322005.1:c.1216A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322005.1:c.1216A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322005.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322005.1:c.1216A>G' + assert results['NM_001322005.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322005.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308934.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322005.1'} + + assert 'NM_001322007.1:c.1303A>G' in results.keys() + assert results['NM_001322007.1:c.1303A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322007.1:c.1303A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322007.1:c.1303A>G']['alt_genomic_loci'] == [] + assert results['NM_001322007.1:c.1303A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 6, mRNA' + assert results['NM_001322007.1:c.1303A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322007.1:c.1303A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308936.1:p.(Lys435Glu)', 'slr': 'NP_001308936.1:p.(K435E)'} + assert results['NM_001322007.1:c.1303A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322007.1:c.1303A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322007.1:c.1303A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322007.1:c.1303A>G']['hgvs_transcript_variant'] == 'NM_001322007.1:c.1303A>G' + assert results['NM_001322007.1:c.1303A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322007.1:c.1303A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308936.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322007.1'} + + assert 'NM_000535.5:c.1621G=' in results.keys() + assert results['NM_000535.5:c.1621G=']['hgvs_lrg_transcript_variant'] == 'LRG_161t1:c.1621G=' + assert results['NM_000535.5:c.1621G=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000535.5:c.1621G=']['alt_genomic_loci'] == [] + assert results['NM_000535.5:c.1621G=']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 1, mRNA' + assert results['NM_000535.5:c.1621G=']['gene_symbol'] == 'PMS2' + assert results['NM_000535.5:c.1621G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000526.1(LRG_161p1):p.(Glu541=)', 'slr': 'NP_000526.1:p.(E541=)'} + assert results['NM_000535.5:c.1621G=']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_000535.5:c.1621G=']['genome_context_intronic_sequence'] == '' + assert results['NM_000535.5:c.1621G=']['hgvs_lrg_variant'] == 'LRG_161:g.26963G=' + assert results['NM_000535.5:c.1621G=']['hgvs_transcript_variant'] == 'NM_000535.5:c.1621G=' + assert results['NM_000535.5:c.1621G=']['hgvs_refseqgene_variant'] == 'NG_008466.1:g.26963G=' + assert results['NM_000535.5:c.1621G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': u'C'}} + assert 'hg38' not in results['NM_000535.5:c.1621G=']['primary_assembly_loci'].keys() + assert results['NM_000535.5:c.1621G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': u'C'}} + assert 'grch38' not in results['NM_000535.5:c.1621G=']['primary_assembly_loci'].keys() + assert results['NM_000535.5:c.1621G=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008466.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_161.xml'} + + assert 'NR_136154.1:n.1708A>G' in results.keys() + assert results['NR_136154.1:n.1708A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_136154.1:n.1708A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_136154.1:n.1708A>G']['alt_genomic_loci'] == [] + assert results['NR_136154.1:n.1708A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 15, non-coding RNA' + assert results['NR_136154.1:n.1708A>G']['gene_symbol'] == 'PMS2' + assert results['NR_136154.1:n.1708A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_136154.1:n.1708A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NR_136154.1:n.1708A>G']['genome_context_intronic_sequence'] == '' + assert results['NR_136154.1:n.1708A>G']['hgvs_lrg_variant'] == '' + assert results['NR_136154.1:n.1708A>G']['hgvs_transcript_variant'] == 'NR_136154.1:n.1708A>G' + assert results['NR_136154.1:n.1708A>G']['hgvs_refseqgene_variant'] == '' + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NR_136154.1:n.1708A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_136154.1'} + + assert 'NM_001322011.1:c.688A>G' in results.keys() + assert results['NM_001322011.1:c.688A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322011.1:c.688A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322011.1:c.688A>G']['alt_genomic_loci'] == [] + assert results['NM_001322011.1:c.688A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 10, mRNA' + assert results['NM_001322011.1:c.688A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322011.1:c.688A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308940.1:p.(Lys230Glu)', 'slr': 'NP_001308940.1:p.(K230E)'} + assert results['NM_001322011.1:c.688A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322011.1:c.688A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322011.1:c.688A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322011.1:c.688A>G']['hgvs_transcript_variant'] == 'NM_001322011.1:c.688A>G' + assert results['NM_001322011.1:c.688A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322011.1:c.688A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322011.1'} + + assert 'NM_000535.6:c.1621A>G' in results.keys() + assert results['NM_000535.6:c.1621A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000535.6:c.1621A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000535.6:c.1621A>G']['alt_genomic_loci'] == [] + assert results['NM_000535.6:c.1621A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 1, mRNA' + assert results['NM_000535.6:c.1621A>G']['gene_symbol'] == 'PMS2' + assert results['NM_000535.6:c.1621A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000526.2:p.(Lys541Glu)', 'slr': 'NP_000526.2:p.(K541E)'} + assert results['NM_000535.6:c.1621A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_000535.6:c.1621A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000535.6:c.1621A>G']['hgvs_lrg_variant'] == '' + assert results['NM_000535.6:c.1621A>G']['hgvs_transcript_variant'] == 'NM_000535.6:c.1621A>G' + assert results['NM_000535.6:c.1621A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_000535.6:c.1621A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.6'} + + + def test_variant297(self): + variant = '7-55242465-GGAATTAAGAGAAGCA-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001346900.1:c.2077_2091del' in results.keys() + assert results['NM_001346900.1:c.2077_2091del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346900.1:c.2077_2091del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346900.1:c.2077_2091del']['alt_genomic_loci'] == [] + assert results['NM_001346900.1:c.2077_2091del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 8, mRNA' + assert results['NM_001346900.1:c.2077_2091del']['gene_symbol'] == 'EGFR' + assert results['NM_001346900.1:c.2077_2091del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333829.1:p.(Glu693_Ala697del)', 'slr': 'NP_001333829.1:p.(E693_A697del)'} + assert results['NM_001346900.1:c.2077_2091del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' + assert results['NM_001346900.1:c.2077_2091del']['genome_context_intronic_sequence'] == '' + assert results['NM_001346900.1:c.2077_2091del']['hgvs_lrg_variant'] == '' + assert results['NM_001346900.1:c.2077_2091del']['hgvs_transcript_variant'] == 'NM_001346900.1:c.2077_2091del' + assert results['NM_001346900.1:c.2077_2091del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_001346900.1:c.2077_2091del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1'} + + assert 'NM_001346898.1:c.2236_2250del' in results.keys() + assert results['NM_001346898.1:c.2236_2250del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346898.1:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346898.1:c.2236_2250del']['alt_genomic_loci'] == [] + assert results['NM_001346898.1:c.2236_2250del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 6, mRNA' + assert results['NM_001346898.1:c.2236_2250del']['gene_symbol'] == 'EGFR' + assert results['NM_001346898.1:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333827.1:p.(Glu746_Ala750del)', 'slr': 'NP_001333827.1:p.(E746_A750del)'} + assert results['NM_001346898.1:c.2236_2250del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' + assert results['NM_001346898.1:c.2236_2250del']['genome_context_intronic_sequence'] == '' + assert results['NM_001346898.1:c.2236_2250del']['hgvs_lrg_variant'] == '' + assert results['NM_001346898.1:c.2236_2250del']['hgvs_transcript_variant'] == 'NM_001346898.1:c.2236_2250del' + assert results['NM_001346898.1:c.2236_2250del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_001346898.1:c.2236_2250del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1'} + + assert 'NM_001346941.1:c.1435_1449del' in results.keys() + assert results['NM_001346941.1:c.1435_1449del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346941.1:c.1435_1449del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346941.1:c.1435_1449del']['alt_genomic_loci'] == [] + assert results['NM_001346941.1:c.1435_1449del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant EGFRvIII, mRNA' + assert results['NM_001346941.1:c.1435_1449del']['gene_symbol'] == 'EGFR' + assert results['NM_001346941.1:c.1435_1449del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333870.1:p.(Glu479_Ala483del)', 'slr': 'NP_001333870.1:p.(E479_A483del)'} + assert results['NM_001346941.1:c.1435_1449del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' + assert results['NM_001346941.1:c.1435_1449del']['genome_context_intronic_sequence'] == '' + assert results['NM_001346941.1:c.1435_1449del']['hgvs_lrg_variant'] == '' + assert results['NM_001346941.1:c.1435_1449del']['hgvs_transcript_variant'] == 'NM_001346941.1:c.1435_1449del' + assert results['NM_001346941.1:c.1435_1449del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_001346941.1:c.1435_1449del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001346899.1:c.2101_2115del' in results.keys() + assert results['NM_001346899.1:c.2101_2115del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346899.1:c.2101_2115del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346899.1:c.2101_2115del']['alt_genomic_loci'] == [] + assert results['NM_001346899.1:c.2101_2115del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 7, mRNA' + assert results['NM_001346899.1:c.2101_2115del']['gene_symbol'] == 'EGFR' + assert results['NM_001346899.1:c.2101_2115del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333828.1:p.(Glu701_Ala705del)', 'slr': 'NP_001333828.1:p.(E701_A705del)'} + assert results['NM_001346899.1:c.2101_2115del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' + assert results['NM_001346899.1:c.2101_2115del']['genome_context_intronic_sequence'] == '' + assert results['NM_001346899.1:c.2101_2115del']['hgvs_lrg_variant'] == '' + assert results['NM_001346899.1:c.2101_2115del']['hgvs_transcript_variant'] == 'NM_001346899.1:c.2101_2115del' + assert results['NM_001346899.1:c.2101_2115del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_001346899.1:c.2101_2115del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1'} + + assert 'NM_001346897.1:c.2101_2115del' in results.keys() + assert results['NM_001346897.1:c.2101_2115del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346897.1:c.2101_2115del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346897.1:c.2101_2115del']['alt_genomic_loci'] == [] + assert results['NM_001346897.1:c.2101_2115del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 5, mRNA' + assert results['NM_001346897.1:c.2101_2115del']['gene_symbol'] == 'EGFR' + assert results['NM_001346897.1:c.2101_2115del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333826.1:p.(Glu701_Ala705del)', 'slr': 'NP_001333826.1:p.(E701_A705del)'} + assert results['NM_001346897.1:c.2101_2115del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' + assert results['NM_001346897.1:c.2101_2115del']['genome_context_intronic_sequence'] == '' + assert results['NM_001346897.1:c.2101_2115del']['hgvs_lrg_variant'] == '' + assert results['NM_001346897.1:c.2101_2115del']['hgvs_transcript_variant'] == 'NM_001346897.1:c.2101_2115del' + assert results['NM_001346897.1:c.2101_2115del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_001346897.1:c.2101_2115del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333826.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346897.1'} + + assert 'NM_005228.3:c.2236_2250del' in results.keys() + assert results['NM_005228.3:c.2236_2250del']['hgvs_lrg_transcript_variant'] == 'LRG_304t1:c.2236_2250del' + assert results['NM_005228.3:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005228.3:c.2236_2250del']['alt_genomic_loci'] == [] + assert results['NM_005228.3:c.2236_2250del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA' + assert results['NM_005228.3:c.2236_2250del']['gene_symbol'] == 'EGFR' + assert results['NM_005228.3:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.(Glu746_Ala750del)', 'slr': 'NP_005219.2:p.(E746_A750del)'} + assert results['NM_005228.3:c.2236_2250del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' + assert results['NM_005228.3:c.2236_2250del']['genome_context_intronic_sequence'] == '' + assert results['NM_005228.3:c.2236_2250del']['hgvs_lrg_variant'] == 'LRG_304:g.160742_160756del' + assert results['NM_005228.3:c.2236_2250del']['hgvs_transcript_variant'] == 'NM_005228.3:c.2236_2250del' + assert results['NM_005228.3:c.2236_2250del']['hgvs_refseqgene_variant'] == 'NG_007726.3:g.160742_160756del' + assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_005228.3:c.2236_2250del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007726.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_304.xml'} + + assert 'NM_005228.4:c.2236_2250del' in results.keys() + assert results['NM_005228.4:c.2236_2250del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005228.4:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005228.4:c.2236_2250del']['alt_genomic_loci'] == [] + assert results['NM_005228.4:c.2236_2250del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA' + assert results['NM_005228.4:c.2236_2250del']['gene_symbol'] == 'EGFR' + assert results['NM_005228.4:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.(Glu746_Ala750del)', 'slr': 'NP_005219.2:p.(E746_A750del)'} + assert results['NM_005228.4:c.2236_2250del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' + assert results['NM_005228.4:c.2236_2250del']['genome_context_intronic_sequence'] == '' + assert results['NM_005228.4:c.2236_2250del']['hgvs_lrg_variant'] == '' + assert results['NM_005228.4:c.2236_2250del']['hgvs_transcript_variant'] == 'NM_005228.4:c.2236_2250del' + assert results['NM_005228.4:c.2236_2250del']['hgvs_refseqgene_variant'] == '' + assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} + assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} + assert results['NM_005228.4:c.2236_2250del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.4'} + + + def test_variant298(self): + variant = '7-55248992-T-TTCCAGGAAGCCT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_005228.3:c.2284-5_2290dup' in results.keys() + assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == 'LRG_304t1:c.2284-5_2290dup' + assert results['NM_005228.3:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == 'NG_007726.3(NM_005228.3):c.2284-5_2290dup' + assert results['NM_005228.3:c.2284-5_2290dup']['alt_genomic_loci'] == [] + assert results['NM_005228.3:c.2284-5_2290dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA' + assert results['NM_005228.3:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' + assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.?', 'slr': 'NP_005219.2:p.?'} + assert results['NM_005228.3:c.2284-5_2290dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NM_005228.3:c.2284-5_2290dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_005228.3):c.2284-5_2290dup' + assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_lrg_variant'] == 'LRG_304:g.167257_167268dup' + assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_transcript_variant'] == 'NM_005228.3:c.2284-5_2290dup' + assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_refseqgene_variant'] == 'NG_007726.3:g.167257_167268dup' + assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.3:c.2284-5_2290dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007726.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_304.xml'} + + assert 'NM_001346899.1:c.2149-5_2155dup' in results.keys() + assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346899.1:c.2149-5_2155dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346899.1:c.2149-5_2155dup']['alt_genomic_loci'] == [] + assert results['NM_001346899.1:c.2149-5_2155dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 7, mRNA' + assert results['NM_001346899.1:c.2149-5_2155dup']['gene_symbol'] == 'EGFR' + assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333828.1:p.?', 'slr': 'NP_001333828.1:p.?'} + assert results['NM_001346899.1:c.2149-5_2155dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NM_001346899.1:c.2149-5_2155dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346899.1):c.2149-5_2155dup' + assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_lrg_variant'] == '' + assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_transcript_variant'] == 'NM_001346899.1:c.2149-5_2155dup' + assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346899.1:c.2149-5_2155dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1'} + + assert 'NM_005228.4:c.2284-5_2290dup' in results.keys() + assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005228.4:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005228.4:c.2284-5_2290dup']['alt_genomic_loci'] == [] + assert results['NM_005228.4:c.2284-5_2290dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA' + assert results['NM_005228.4:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' + assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.?', 'slr': 'NP_005219.2:p.?'} + assert results['NM_005228.4:c.2284-5_2290dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NM_005228.4:c.2284-5_2290dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_005228.4):c.2284-5_2290dup' + assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_lrg_variant'] == '' + assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_transcript_variant'] == 'NM_005228.4:c.2284-5_2290dup' + assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.4:c.2284-5_2290dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.4'} + + assert 'NM_001346898.1:c.2284-5_2290dup' in results.keys() + assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346898.1:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346898.1:c.2284-5_2290dup']['alt_genomic_loci'] == [] + assert results['NM_001346898.1:c.2284-5_2290dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 6, mRNA' + assert results['NM_001346898.1:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' + assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333827.1:p.?', 'slr': 'NP_001333827.1:p.?'} + assert results['NM_001346898.1:c.2284-5_2290dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NM_001346898.1:c.2284-5_2290dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346898.1):c.2284-5_2290dup' + assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_lrg_variant'] == '' + assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_transcript_variant'] == 'NM_001346898.1:c.2284-5_2290dup' + assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346898.1:c.2284-5_2290dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1'} + + assert 'NM_001346941.1:c.1483-5_1489dup' in results.keys() + assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346941.1:c.1483-5_1489dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346941.1:c.1483-5_1489dup']['alt_genomic_loci'] == [] + assert results['NM_001346941.1:c.1483-5_1489dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant EGFRvIII, mRNA' + assert results['NM_001346941.1:c.1483-5_1489dup']['gene_symbol'] == 'EGFR' + assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333870.1:p.?', 'slr': 'NP_001333870.1:p.?'} + assert results['NM_001346941.1:c.1483-5_1489dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NM_001346941.1:c.1483-5_1489dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346941.1):c.1483-5_1489dup' + assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_lrg_variant'] == '' + assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_transcript_variant'] == 'NM_001346941.1:c.1483-5_1489dup' + assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346941.1:c.1483-5_1489dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001346900.1:c.2125-5_2131dup' in results.keys() + assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346900.1:c.2125-5_2131dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346900.1:c.2125-5_2131dup']['alt_genomic_loci'] == [] + assert results['NM_001346900.1:c.2125-5_2131dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 8, mRNA' + assert results['NM_001346900.1:c.2125-5_2131dup']['gene_symbol'] == 'EGFR' + assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333829.1:p.?', 'slr': 'NP_001333829.1:p.?'} + assert results['NM_001346900.1:c.2125-5_2131dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NM_001346900.1:c.2125-5_2131dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346900.1):c.2125-5_2131dup' + assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_lrg_variant'] == '' + assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_transcript_variant'] == 'NM_001346900.1:c.2125-5_2131dup' + assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346900.1:c.2125-5_2131dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1'} + + assert 'NR_047551.1:n.1272_1283dup' in results.keys() + assert results['NR_047551.1:n.1272_1283dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_047551.1:n.1272_1283dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_047551.1:n.1272_1283dup']['alt_genomic_loci'] == [] + assert results['NR_047551.1:n.1272_1283dup']['transcript_description'] == 'Homo sapiens EGFR antisense RNA 1 (EGFR-AS1), long non-coding RNA' + assert results['NR_047551.1:n.1272_1283dup']['gene_symbol'] == 'EGFR-AS1' + assert results['NR_047551.1:n.1272_1283dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_047551.1:n.1272_1283dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NR_047551.1:n.1272_1283dup']['genome_context_intronic_sequence'] == '' + assert results['NR_047551.1:n.1272_1283dup']['hgvs_lrg_variant'] == '' + assert results['NR_047551.1:n.1272_1283dup']['hgvs_transcript_variant'] == 'NR_047551.1:n.1272_1283dup' + assert results['NR_047551.1:n.1272_1283dup']['hgvs_refseqgene_variant'] == '' + assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NR_047551.1:n.1272_1283dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_047551.1'} + + assert 'NM_001346897.1:c.2149-5_2155dup' in results.keys() + assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346897.1:c.2149-5_2155dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346897.1:c.2149-5_2155dup']['alt_genomic_loci'] == [] + assert results['NM_001346897.1:c.2149-5_2155dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 5, mRNA' + assert results['NM_001346897.1:c.2149-5_2155dup']['gene_symbol'] == 'EGFR' + assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333826.1:p.?', 'slr': 'NP_001333826.1:p.?'} + assert results['NM_001346897.1:c.2149-5_2155dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NM_001346897.1:c.2149-5_2155dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346897.1):c.2149-5_2155dup' + assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_lrg_variant'] == '' + assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_transcript_variant'] == 'NM_001346897.1:c.2149-5_2155dup' + assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346897.1:c.2149-5_2155dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333826.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346897.1'} + + + def test_variant299(self): + variant = '7-75932111-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001540.4:c.82C>A' in results.keys() + assert results['NM_001540.4:c.82C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001540.4:c.82C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001540.4:c.82C>A']['alt_genomic_loci'] == [] + assert results['NM_001540.4:c.82C>A']['transcript_description'] == 'Homo sapiens heat shock protein family B (small) member 1 (HSPB1), mRNA' + assert results['NM_001540.4:c.82C>A']['gene_symbol'] == 'HSPB1' + assert results['NM_001540.4:c.82C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001531.1(LRG_248p1):p.(Leu28Ile)', 'slr': 'NP_001531.1:p.(L28I)'} + assert results['NM_001540.4:c.82C>A']['submitted_variant'] == '7-75932111-C-A' + assert results['NM_001540.4:c.82C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001540.4:c.82C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001540.4:c.82C>A']['hgvs_transcript_variant'] == 'NM_001540.4:c.82C>A' + assert results['NM_001540.4:c.82C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '75932111', 'alt': 'A'}} + assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '76302794', 'alt': 'A'}} + assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '75932111', 'alt': 'A'}} + assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '76302794', 'alt': 'A'}} + assert results['NM_001540.4:c.82C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001531.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001540.4'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001540.3:c.82C>A' in results.keys() + assert results['NM_001540.3:c.82C>A']['hgvs_lrg_transcript_variant'] == 'LRG_248t1:c.82C>A' + assert results['NM_001540.3:c.82C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001540.3:c.82C>A']['alt_genomic_loci'] == [] + assert results['NM_001540.3:c.82C>A']['transcript_description'] == 'Homo sapiens heat shock protein family B (small) member 1 (HSPB1), mRNA' + assert results['NM_001540.3:c.82C>A']['gene_symbol'] == 'HSPB1' + assert results['NM_001540.3:c.82C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001531.1(LRG_248p1):p.(Leu28Ile)', 'slr': 'NP_001531.1:p.(L28I)'} + assert results['NM_001540.3:c.82C>A']['submitted_variant'] == '7-75932111-C-A' + assert results['NM_001540.3:c.82C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001540.3:c.82C>A']['hgvs_lrg_variant'] == 'LRG_248:g.5237C>A' + assert results['NM_001540.3:c.82C>A']['hgvs_transcript_variant'] == 'NM_001540.3:c.82C>A' + assert results['NM_001540.3:c.82C>A']['hgvs_refseqgene_variant'] == 'NG_008995.1:g.5237C>A' + assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '75932111', 'alt': 'A'}} + assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '76302794', 'alt': 'A'}} + assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '75932111', 'alt': 'A'}} + assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '76302794', 'alt': 'A'}} + assert results['NM_001540.3:c.82C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008995.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001531.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001540.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_248.xml'} + + + def test_variant300(self): + variant = '7-91652178-A-AAAC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_005751.4:c.4004_4006dup' in results.keys() + assert results['NM_005751.4:c.4004_4006dup']['hgvs_lrg_transcript_variant'] == 'LRG_331t1:c.4004_4006dup' + assert results['NM_005751.4:c.4004_4006dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005751.4:c.4004_4006dup']['alt_genomic_loci'] == [] + assert results['NM_005751.4:c.4004_4006dup']['transcript_description'] == 'Homo sapiens A-kinase anchoring protein 9 (AKAP9), transcript variant 2, mRNA' + assert results['NM_005751.4:c.4004_4006dup']['gene_symbol'] == 'AKAP9' + assert results['NM_005751.4:c.4004_4006dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005742.4(LRG_331p1):p.(Lys1335_Leu1336insGln)', 'slr': 'NP_005742.4:p.(K1335_L1336insQ)'} + assert results['NM_005751.4:c.4004_4006dup']['submitted_variant'] == '7-91652178-A-AAAC' + assert results['NM_005751.4:c.4004_4006dup']['genome_context_intronic_sequence'] == '' + assert results['NM_005751.4:c.4004_4006dup']['hgvs_lrg_variant'] == 'LRG_331:g.86991_86993dup' + assert results['NM_005751.4:c.4004_4006dup']['hgvs_transcript_variant'] == 'NM_005751.4:c.4004_4006dup' + assert results['NM_005751.4:c.4004_4006dup']['hgvs_refseqgene_variant'] == 'NG_011623.1:g.86991_86993dup' + assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '91652178', 'alt': 'AAAC'}} + assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '92022864', 'alt': 'AAAC'}} + assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '91652178', 'alt': 'AAAC'}} + assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '92022864', 'alt': 'AAAC'}} + assert results['NM_005751.4:c.4004_4006dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011623.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005742.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005751.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_331.xml'} + + assert 'NM_147185.2:c.4004_4006dup' in results.keys() + assert results['NM_147185.2:c.4004_4006dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_147185.2:c.4004_4006dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_147185.2:c.4004_4006dup']['alt_genomic_loci'] == [] + assert results['NM_147185.2:c.4004_4006dup']['transcript_description'] == 'Homo sapiens A-kinase anchoring protein 9 (AKAP9), transcript variant 3, mRNA' + assert results['NM_147185.2:c.4004_4006dup']['gene_symbol'] == 'AKAP9' + assert results['NM_147185.2:c.4004_4006dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_671714.1:p.(Lys1335_Leu1336insGln)', 'slr': 'NP_671714.1:p.(K1335_L1336insQ)'} + assert results['NM_147185.2:c.4004_4006dup']['submitted_variant'] == '7-91652178-A-AAAC' + assert results['NM_147185.2:c.4004_4006dup']['genome_context_intronic_sequence'] == '' + assert results['NM_147185.2:c.4004_4006dup']['hgvs_lrg_variant'] == '' + assert results['NM_147185.2:c.4004_4006dup']['hgvs_transcript_variant'] == 'NM_147185.2:c.4004_4006dup' + assert results['NM_147185.2:c.4004_4006dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '91652178', 'alt': 'AAAC'}} + assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '92022864', 'alt': 'AAAC'}} + assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '91652178', 'alt': 'AAAC'}} + assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '92022864', 'alt': 'AAAC'}} + assert results['NM_147185.2:c.4004_4006dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_671714.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_147185.2'} + + + def test_variant301(self): + variant = '7-117199644-ATCT-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NR_149084.1:n.221+1140_221+1142del' in results.keys() + assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_149084.1:n.221+1140_221+1142del']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_149084.1:n.221+1140_221+1142del']['alt_genomic_loci'] == [] + assert results['NR_149084.1:n.221+1140_221+1142del']['transcript_description'] == 'Homo sapiens CFTR antisense RNA 1 (CFTR-AS1), long non-coding RNA' + assert results['NR_149084.1:n.221+1140_221+1142del']['gene_symbol'] == 'CFTR-AS1' + assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_149084.1:n.221+1140_221+1142del']['submitted_variant'] == '7-117199644-ATCT-A' + assert results['NR_149084.1:n.221+1140_221+1142del']['genome_context_intronic_sequence'] == 'NC_000007.13(NR_149084.1):c.221+1140_221+1142del' + assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_lrg_variant'] == '' + assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_transcript_variant'] == 'NR_149084.1:n.221+1140_221+1142del' + assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_refseqgene_variant'] == '' + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199645_117199647del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559591_117559593del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199645_117199647del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559591_117559593del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_149084.1'} + + assert 'NM_000492.3:c.1521_1523del' in results.keys() + assert results['NM_000492.3:c.1521_1523del']['hgvs_lrg_transcript_variant'] == 'LRG_663t1:c.1521_1523del' + assert results['NM_000492.3:c.1521_1523del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000492.3:c.1521_1523del']['alt_genomic_loci'] == [] + assert results['NM_000492.3:c.1521_1523del']['transcript_description'] == 'Homo sapiens cystic fibrosis transmembrane conductance regulator (CFTR), mRNA' + assert results['NM_000492.3:c.1521_1523del']['gene_symbol'] == 'CFTR' + assert results['NM_000492.3:c.1521_1523del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000483.3(LRG_663p1):p.(Phe508del)', 'slr': 'NP_000483.3:p.(F508del)'} + assert results['NM_000492.3:c.1521_1523del']['submitted_variant'] == '7-117199644-ATCT-A' + assert results['NM_000492.3:c.1521_1523del']['genome_context_intronic_sequence'] == '' + assert results['NM_000492.3:c.1521_1523del']['hgvs_lrg_variant'] == '' + assert results['NM_000492.3:c.1521_1523del']['hgvs_transcript_variant'] == 'NM_000492.3:c.1521_1523del' + assert results['NM_000492.3:c.1521_1523del']['hgvs_refseqgene_variant'] == 'NG_016465.3:g.98809_98811del' + assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} + assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} + assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} + assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} + assert results['NM_000492.3:c.1521_1523del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_016465.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000483.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000492.3'} + + + def test_variant302(self): + variant = '7-140453136-AC-CT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NR_148928.1:n.2896_2897delinsAG' in results.keys() + assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_148928.1:n.2896_2897delinsAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2896_2897delinsAG']['alt_genomic_loci'] == [] + assert results['NR_148928.1:n.2896_2897delinsAG']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA' + assert results['NR_148928.1:n.2896_2897delinsAG']['gene_symbol'] == 'BRAF' + assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_148928.1:n.2896_2897delinsAG']['submitted_variant'] == '7-140453136-AC-CT' + assert results['NR_148928.1:n.2896_2897delinsAG']['genome_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_lrg_variant'] == '' + assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_transcript_variant'] == 'NR_148928.1:n.2896_2897delinsAG' + assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_refseqgene_variant'] == '' + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} + + assert 'NM_004333.4:c.1798_1799delinsAG' in results.keys() + assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1798_1799delinsAG' + assert results['NM_004333.4:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004333.4:c.1798_1799delinsAG']['alt_genomic_loci'] == [] + assert results['NM_004333.4:c.1798_1799delinsAG']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA' + assert results['NM_004333.4:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' + assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Arg)', 'slr': 'NP_004324.2:p.(V600R)'} + assert results['NM_004333.4:c.1798_1799delinsAG']['submitted_variant'] == '7-140453136-AC-CT' + assert results['NM_004333.4:c.1798_1799delinsAG']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' + assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_transcript_variant'] == 'NM_004333.4:c.1798_1799delinsAG' + assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_refseqgene_variant'] == 'NG_007873.2:g.176428_176429delinsAG' + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4'} + + assert 'NM_004333.5:c.1798_1799delinsAG' in results.keys() + assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004333.5:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004333.5:c.1798_1799delinsAG']['alt_genomic_loci'] == [] + assert results['NM_004333.5:c.1798_1799delinsAG']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA' + assert results['NM_004333.5:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' + assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Arg)', 'slr': 'NP_004324.2:p.(V600R)'} + assert results['NM_004333.5:c.1798_1799delinsAG']['submitted_variant'] == '7-140453136-AC-CT' + assert results['NM_004333.5:c.1798_1799delinsAG']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' + assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_transcript_variant'] == 'NM_004333.5:c.1798_1799delinsAG' + assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_refseqgene_variant'] == '' + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001354609.1:c.1798_1799delinsAG' in results.keys() + assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354609.1:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001354609.1:c.1798_1799delinsAG']['alt_genomic_loci'] == [] + assert results['NM_001354609.1:c.1798_1799delinsAG']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA' + assert results['NM_001354609.1:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' + assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Arg)', 'slr': 'NP_001341538.1:p.(V600R)'} + assert results['NM_001354609.1:c.1798_1799delinsAG']['submitted_variant'] == '7-140453136-AC-CT' + assert results['NM_001354609.1:c.1798_1799delinsAG']['genome_context_intronic_sequence'] == '' + assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' + assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_transcript_variant'] == 'NM_001354609.1:c.1798_1799delinsAG' + assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_refseqgene_variant'] == '' + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1'} + + + def test_variant303(self): + variant = '7-140453136-A-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001354609.1:c.1799T>A' in results.keys() + assert results['NM_001354609.1:c.1799T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354609.1:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001354609.1:c.1799T>A']['alt_genomic_loci'] == [] + assert results['NM_001354609.1:c.1799T>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA' + assert results['NM_001354609.1:c.1799T>A']['gene_symbol'] == 'BRAF' + assert results['NM_001354609.1:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Glu)', 'slr': 'NP_001341538.1:p.(V600E)'} + assert results['NM_001354609.1:c.1799T>A']['submitted_variant'] == '7-140453136-A-T' + assert results['NM_001354609.1:c.1799T>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001354609.1:c.1799T>A']['hgvs_lrg_variant'] == '' + assert results['NM_001354609.1:c.1799T>A']['hgvs_transcript_variant'] == 'NM_001354609.1:c.1799T>A' + assert results['NM_001354609.1:c.1799T>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NM_001354609.1:c.1799T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1'} + + assert 'NR_148928.1:n.2897T>A' in results.keys() + assert results['NR_148928.1:n.2897T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_148928.1:n.2897T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2897T>A']['alt_genomic_loci'] == [] + assert results['NR_148928.1:n.2897T>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA' + assert results['NR_148928.1:n.2897T>A']['gene_symbol'] == 'BRAF' + assert results['NR_148928.1:n.2897T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_148928.1:n.2897T>A']['submitted_variant'] == '7-140453136-A-T' + assert results['NR_148928.1:n.2897T>A']['genome_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2897T>A']['hgvs_lrg_variant'] == '' + assert results['NR_148928.1:n.2897T>A']['hgvs_transcript_variant'] == 'NR_148928.1:n.2897T>A' + assert results['NR_148928.1:n.2897T>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NR_148928.1:n.2897T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} + + assert 'NM_004333.5:c.1799T>A' in results.keys() + assert results['NM_004333.5:c.1799T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004333.5:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004333.5:c.1799T>A']['alt_genomic_loci'] == [] + assert results['NM_004333.5:c.1799T>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA' + assert results['NM_004333.5:c.1799T>A']['gene_symbol'] == 'BRAF' + assert results['NM_004333.5:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Glu)', 'slr': 'NP_004324.2:p.(V600E)'} + assert results['NM_004333.5:c.1799T>A']['submitted_variant'] == '7-140453136-A-T' + assert results['NM_004333.5:c.1799T>A']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.5:c.1799T>A']['hgvs_lrg_variant'] == '' + assert results['NM_004333.5:c.1799T>A']['hgvs_transcript_variant'] == 'NM_004333.5:c.1799T>A' + assert results['NM_004333.5:c.1799T>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NM_004333.5:c.1799T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5'} + + assert results['flag'] == 'gene_variant' + assert 'NM_004333.4:c.1799T>A' in results.keys() + assert results['NM_004333.4:c.1799T>A']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1799T>A' + assert results['NM_004333.4:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004333.4:c.1799T>A']['alt_genomic_loci'] == [] + assert results['NM_004333.4:c.1799T>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA' + assert results['NM_004333.4:c.1799T>A']['gene_symbol'] == 'BRAF' + assert results['NM_004333.4:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Glu)', 'slr': 'NP_004324.2:p.(V600E)'} + assert results['NM_004333.4:c.1799T>A']['submitted_variant'] == '7-140453136-A-T' + assert results['NM_004333.4:c.1799T>A']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.4:c.1799T>A']['hgvs_lrg_variant'] == '' + assert results['NM_004333.4:c.1799T>A']['hgvs_transcript_variant'] == 'NM_004333.4:c.1799T>A' + assert results['NM_004333.4:c.1799T>A']['hgvs_refseqgene_variant'] == 'NG_007873.2:g.176429T>A' + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NM_004333.4:c.1799T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4'} + + + def test_variant304(self): + variant = '7-140453137-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NR_148928.1:n.2896G>A' in results.keys() + assert results['NR_148928.1:n.2896G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_148928.1:n.2896G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2896G>A']['alt_genomic_loci'] == [] + assert results['NR_148928.1:n.2896G>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA' + assert results['NR_148928.1:n.2896G>A']['gene_symbol'] == 'BRAF' + assert results['NR_148928.1:n.2896G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_148928.1:n.2896G>A']['submitted_variant'] == '7-140453137-C-T' + assert results['NR_148928.1:n.2896G>A']['genome_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2896G>A']['hgvs_lrg_variant'] == '' + assert results['NR_148928.1:n.2896G>A']['hgvs_transcript_variant'] == 'NR_148928.1:n.2896G>A' + assert results['NR_148928.1:n.2896G>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NR_148928.1:n.2896G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} + + assert 'NM_004333.5:c.1798G>A' in results.keys() + assert results['NM_004333.5:c.1798G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004333.5:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004333.5:c.1798G>A']['alt_genomic_loci'] == [] + assert results['NM_004333.5:c.1798G>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA' + assert results['NM_004333.5:c.1798G>A']['gene_symbol'] == 'BRAF' + assert results['NM_004333.5:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Met)', 'slr': 'NP_004324.2:p.(V600M)'} + assert results['NM_004333.5:c.1798G>A']['submitted_variant'] == '7-140453137-C-T' + assert results['NM_004333.5:c.1798G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.5:c.1798G>A']['hgvs_lrg_variant'] == '' + assert results['NM_004333.5:c.1798G>A']['hgvs_transcript_variant'] == 'NM_004333.5:c.1798G>A' + assert results['NM_004333.5:c.1798G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NM_004333.5:c.1798G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5'} + + assert 'NM_004333.4:c.1798G>A' in results.keys() + assert results['NM_004333.4:c.1798G>A']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1798G>A' + assert results['NM_004333.4:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004333.4:c.1798G>A']['alt_genomic_loci'] == [] + assert results['NM_004333.4:c.1798G>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA' + assert results['NM_004333.4:c.1798G>A']['gene_symbol'] == 'BRAF' + assert results['NM_004333.4:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Met)', 'slr': 'NP_004324.2:p.(V600M)'} + assert results['NM_004333.4:c.1798G>A']['submitted_variant'] == '7-140453137-C-T' + assert results['NM_004333.4:c.1798G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.4:c.1798G>A']['hgvs_lrg_variant'] == '' + assert results['NM_004333.4:c.1798G>A']['hgvs_transcript_variant'] == 'NM_004333.4:c.1798G>A' + assert results['NM_004333.4:c.1798G>A']['hgvs_refseqgene_variant'] == 'NG_007873.2:g.176428G>A' + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NM_004333.4:c.1798G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4'} + + assert 'NM_001354609.1:c.1798G>A' in results.keys() + assert results['NM_001354609.1:c.1798G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354609.1:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001354609.1:c.1798G>A']['alt_genomic_loci'] == [] + assert results['NM_001354609.1:c.1798G>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA' + assert results['NM_001354609.1:c.1798G>A']['gene_symbol'] == 'BRAF' + assert results['NM_001354609.1:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Met)', 'slr': 'NP_001341538.1:p.(V600M)'} + assert results['NM_001354609.1:c.1798G>A']['submitted_variant'] == '7-140453137-C-T' + assert results['NM_001354609.1:c.1798G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001354609.1:c.1798G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001354609.1:c.1798G>A']['hgvs_transcript_variant'] == 'NM_001354609.1:c.1798G>A' + assert results['NM_001354609.1:c.1798G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NM_001354609.1:c.1798G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1'} + + assert results['flag'] == 'gene_variant' + + def test_variant305(self): + variant = '7-143013488-A-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000083.2:c.180+3A>T' in results.keys() + assert results['NM_000083.2:c.180+3A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000083.2:c.180+3A>T']['refseqgene_context_intronic_sequence'] == 'NG_009815.1(NM_000083.2):c.180+3A>T' + assert results['NM_000083.2:c.180+3A>T']['alt_genomic_loci'] == [] + assert results['NM_000083.2:c.180+3A>T']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA' + assert results['NM_000083.2:c.180+3A>T']['gene_symbol'] == 'CLCN1' + assert results['NM_000083.2:c.180+3A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.?', 'slr': 'NP_000074.2:p.?'} + assert results['NM_000083.2:c.180+3A>T']['submitted_variant'] == '7-143013488-A-T' + assert results['NM_000083.2:c.180+3A>T']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000083.2):c.180+3A>T' + assert results['NM_000083.2:c.180+3A>T']['hgvs_lrg_variant'] == '' + assert results['NM_000083.2:c.180+3A>T']['hgvs_transcript_variant'] == 'NM_000083.2:c.180+3A>T' + assert results['NM_000083.2:c.180+3A>T']['hgvs_refseqgene_variant'] == 'NG_009815.1:g.5270A>T' + assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '143013488', 'alt': 'T'}} + assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '143316395', 'alt': 'T'}} + assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '143013488', 'alt': 'T'}} + assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '143316395', 'alt': 'T'}} + assert results['NM_000083.2:c.180+3A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009815.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2'} + + assert 'NR_046453.1:n.267+3A>T' in results.keys() + assert results['NR_046453.1:n.267+3A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_046453.1:n.267+3A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_046453.1:n.267+3A>T']['alt_genomic_loci'] == [] + assert results['NR_046453.1:n.267+3A>T']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA' + assert results['NR_046453.1:n.267+3A>T']['gene_symbol'] == 'CLCN1' + assert results['NR_046453.1:n.267+3A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_046453.1:n.267+3A>T']['submitted_variant'] == '7-143013488-A-T' + assert results['NR_046453.1:n.267+3A>T']['genome_context_intronic_sequence'] == 'NC_000007.13(NR_046453.1):c.267+3A>T' + assert results['NR_046453.1:n.267+3A>T']['hgvs_lrg_variant'] == '' + assert results['NR_046453.1:n.267+3A>T']['hgvs_transcript_variant'] == 'NR_046453.1:n.267+3A>T' + assert results['NR_046453.1:n.267+3A>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '143013488', 'alt': 'T'}} + assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '143316395', 'alt': 'T'}} + assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '143013488', 'alt': 'T'}} + assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '143316395', 'alt': 'T'}} + assert results['NR_046453.1:n.267+3A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1'} + + + def test_variant306(self): + variant = '7-143018934-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NR_046453.1:n.776G>A' in results.keys() + assert results['NR_046453.1:n.776G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_046453.1:n.776G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_046453.1:n.776G>A']['alt_genomic_loci'] == [] + assert results['NR_046453.1:n.776G>A']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA' + assert results['NR_046453.1:n.776G>A']['gene_symbol'] == 'CLCN1' + assert results['NR_046453.1:n.776G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_046453.1:n.776G>A']['submitted_variant'] == '7-143018934-G-A' + assert results['NR_046453.1:n.776G>A']['genome_context_intronic_sequence'] == '' + assert results['NR_046453.1:n.776G>A']['hgvs_lrg_variant'] == '' + assert results['NR_046453.1:n.776G>A']['hgvs_transcript_variant'] == 'NR_046453.1:n.776G>A' + assert results['NR_046453.1:n.776G>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '143018934', 'alt': 'A'}} + assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '143321841', 'alt': 'A'}} + assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '143018934', 'alt': 'A'}} + assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '143321841', 'alt': 'A'}} + assert results['NR_046453.1:n.776G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000083.2:c.689G>A' in results.keys() + assert results['NM_000083.2:c.689G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000083.2:c.689G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000083.2:c.689G>A']['alt_genomic_loci'] == [] + assert results['NM_000083.2:c.689G>A']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA' + assert results['NM_000083.2:c.689G>A']['gene_symbol'] == 'CLCN1' + assert results['NM_000083.2:c.689G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.(Gly230Glu)', 'slr': 'NP_000074.2:p.(G230E)'} + assert results['NM_000083.2:c.689G>A']['submitted_variant'] == '7-143018934-G-A' + assert results['NM_000083.2:c.689G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_000083.2:c.689G>A']['hgvs_lrg_variant'] == '' + assert results['NM_000083.2:c.689G>A']['hgvs_transcript_variant'] == 'NM_000083.2:c.689G>A' + assert results['NM_000083.2:c.689G>A']['hgvs_refseqgene_variant'] == 'NG_009815.1:g.10716G>A' + assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '143018934', 'alt': 'A'}} + assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '143321841', 'alt': 'A'}} + assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '143018934', 'alt': 'A'}} + assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '143321841', 'alt': 'A'}} + assert results['NM_000083.2:c.689G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009815.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2'} + + + def test_variant307(self): + variant = '7-143048771-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NR_046453.1:n.2620C>T' in results.keys() + assert results['NR_046453.1:n.2620C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_046453.1:n.2620C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_046453.1:n.2620C>T']['alt_genomic_loci'] == [] + assert results['NR_046453.1:n.2620C>T']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA' + assert results['NR_046453.1:n.2620C>T']['gene_symbol'] == 'CLCN1' + assert results['NR_046453.1:n.2620C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_046453.1:n.2620C>T']['submitted_variant'] == '7-143048771-C-T' + assert results['NR_046453.1:n.2620C>T']['genome_context_intronic_sequence'] == '' + assert results['NR_046453.1:n.2620C>T']['hgvs_lrg_variant'] == '' + assert results['NR_046453.1:n.2620C>T']['hgvs_transcript_variant'] == 'NR_046453.1:n.2620C>T' + assert results['NR_046453.1:n.2620C>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '143048771', 'alt': 'T'}} + assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '143351678', 'alt': 'T'}} + assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '143048771', 'alt': 'T'}} + assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '143351678', 'alt': 'T'}} + assert results['NR_046453.1:n.2620C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1'} + + assert 'NM_000083.2:c.2680C>T' in results.keys() + assert results['NM_000083.2:c.2680C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000083.2:c.2680C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000083.2:c.2680C>T']['alt_genomic_loci'] == [] + assert results['NM_000083.2:c.2680C>T']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA' + assert results['NM_000083.2:c.2680C>T']['gene_symbol'] == 'CLCN1' + assert results['NM_000083.2:c.2680C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.(Arg894Ter)', 'slr': 'NP_000074.2:p.(R894*)'} + assert results['NM_000083.2:c.2680C>T']['submitted_variant'] == '7-143048771-C-T' + assert results['NM_000083.2:c.2680C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000083.2:c.2680C>T']['hgvs_lrg_variant'] == '' + assert results['NM_000083.2:c.2680C>T']['hgvs_transcript_variant'] == 'NM_000083.2:c.2680C>T' + assert results['NM_000083.2:c.2680C>T']['hgvs_refseqgene_variant'] == 'NG_009815.1:g.40553C>T' + assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '143048771', 'alt': 'T'}} + assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '143351678', 'alt': 'T'}} + assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '143048771', 'alt': 'T'}} + assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '143351678', 'alt': 'T'}} + assert results['NM_000083.2:c.2680C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009815.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2'} + + + def test_variant308(self): + variant = '8-1871951-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_014629.3:c.2399C>T' in results.keys() + assert results['NM_014629.3:c.2399C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014629.3:c.2399C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014629.3:c.2399C>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}] + assert results['NM_014629.3:c.2399C>T']['transcript_description'] == 'Homo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 1, mRNA' + assert results['NM_014629.3:c.2399C>T']['gene_symbol'] == 'ARHGEF10' + assert results['NM_014629.3:c.2399C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055444.2(LRG_234p1):p.(Pro800Leu)', 'slr': 'NP_055444.2:p.(P800L)'} + assert results['NM_014629.3:c.2399C>T']['submitted_variant'] == '8-1871951-C-T' + assert results['NM_014629.3:c.2399C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_014629.3:c.2399C>T']['hgvs_lrg_variant'] == '' + assert results['NM_014629.3:c.2399C>T']['hgvs_transcript_variant'] == 'NM_014629.3:c.2399C>T' + assert results['NM_014629.3:c.2399C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} + assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} + assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} + assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} + assert results['NM_014629.3:c.2399C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.3'} + + assert 'NM_014629.2:c.2399C>T' in results.keys() + assert results['NM_014629.2:c.2399C>T']['hgvs_lrg_transcript_variant'] == 'LRG_234t1:c.2399C>T' + assert results['NM_014629.2:c.2399C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014629.2:c.2399C>T']['alt_genomic_loci'] == [] + assert results['NM_014629.2:c.2399C>T']['transcript_description'] == 'Homo sapiens Rho guanine nucleotide exchange factor (GEF) 10 (ARHGEF10), mRNA' + assert results['NM_014629.2:c.2399C>T']['gene_symbol'] == 'ARHGEF10' + assert results['NM_014629.2:c.2399C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055444.2(LRG_234p1):p.(Pro800Leu)', 'slr': 'NP_055444.2:p.(P800L)'} + assert results['NM_014629.2:c.2399C>T']['submitted_variant'] == '8-1871951-C-T' + assert results['NM_014629.2:c.2399C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_014629.2:c.2399C>T']['hgvs_lrg_variant'] == 'LRG_234:g.104803C>T' + assert results['NM_014629.2:c.2399C>T']['hgvs_transcript_variant'] == 'NM_014629.2:c.2399C>T' + assert results['NM_014629.2:c.2399C>T']['hgvs_refseqgene_variant'] == 'NG_008480.1:g.104803C>T' + assert results['NM_014629.2:c.2399C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} + assert 'hg38' not in results['NM_014629.2:c.2399C>T']['primary_assembly_loci'].keys() + assert results['NM_014629.2:c.2399C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} + assert 'grch38' not in results['NM_014629.2:c.2399C>T']['primary_assembly_loci'].keys() + assert results['NM_014629.2:c.2399C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008480.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_234.xml'} + + assert 'NM_001308153.1:c.2471C>T' in results.keys() + assert results['NM_001308153.1:c.2471C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001308153.1:c.2471C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001308153.1:c.2471C>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}] + assert results['NM_001308153.1:c.2471C>T']['transcript_description'] == 'Homo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 3, mRNA' + assert results['NM_001308153.1:c.2471C>T']['gene_symbol'] == 'ARHGEF10' + assert results['NM_001308153.1:c.2471C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295082.1:p.(Pro824Leu)', 'slr': 'NP_001295082.1:p.(P824L)'} + assert results['NM_001308153.1:c.2471C>T']['submitted_variant'] == '8-1871951-C-T' + assert results['NM_001308153.1:c.2471C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001308153.1:c.2471C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001308153.1:c.2471C>T']['hgvs_transcript_variant'] == 'NM_001308153.1:c.2471C>T' + assert results['NM_001308153.1:c.2471C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} + assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} + assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} + assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} + assert results['NM_001308153.1:c.2471C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295082.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308153.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001308152.1:c.2285C>T' in results.keys() + assert results['NM_001308152.1:c.2285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001308152.1:c.2285C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001308152.1:c.2285C>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}] + assert results['NM_001308152.1:c.2285C>T']['transcript_description'] == 'Homo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 2, mRNA' + assert results['NM_001308152.1:c.2285C>T']['gene_symbol'] == 'ARHGEF10' + assert results['NM_001308152.1:c.2285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295081.1:p.(Pro762Leu)', 'slr': 'NP_001295081.1:p.(P762L)'} + assert results['NM_001308152.1:c.2285C>T']['submitted_variant'] == '8-1871951-C-T' + assert results['NM_001308152.1:c.2285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001308152.1:c.2285C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001308152.1:c.2285C>T']['hgvs_transcript_variant'] == 'NM_001308152.1:c.2285C>T' + assert results['NM_001308152.1:c.2285C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} + assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} + assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} + assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} + assert results['NM_001308152.1:c.2285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295081.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308152.1'} + + + def test_variant309(self): + variant = '9-13112056-T-TG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001261407.1:c.5504dup' in results.keys() + assert results['NM_001261407.1:c.5504dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001261407.1:c.5504dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001261407.1:c.5504dup']['alt_genomic_loci'] == [] + assert results['NM_001261407.1:c.5504dup']['transcript_description'] == 'Homo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 3, mRNA' + assert results['NM_001261407.1:c.5504dup']['gene_symbol'] == 'MPDZ' + assert results['NM_001261407.1:c.5504dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001248336.1:p.(Thr1836AsnfsTer15)', 'slr': 'NP_001248336.1:p.(T1836Nfs*15)'} + assert results['NM_001261407.1:c.5504dup']['submitted_variant'] == '9-13112056-T-TG' + assert results['NM_001261407.1:c.5504dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001261407.1:c.5504dup']['hgvs_lrg_variant'] == '' + assert results['NM_001261407.1:c.5504dup']['hgvs_transcript_variant'] == 'NM_001261407.1:c.5504dup' + assert results['NM_001261407.1:c.5504dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248336.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261407.1'} + + assert 'NM_001330637.1:c.5690dup' in results.keys() + assert results['NM_001330637.1:c.5690dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330637.1:c.5690dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330637.1:c.5690dup']['alt_genomic_loci'] == [] + assert results['NM_001330637.1:c.5690dup']['transcript_description'] == 'Homo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 4, mRNA' + assert results['NM_001330637.1:c.5690dup']['gene_symbol'] == 'MPDZ' + assert results['NM_001330637.1:c.5690dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317566.1:p.(Thr1898AsnfsTer15)', 'slr': 'NP_001317566.1:p.(T1898Nfs*15)'} + assert results['NM_001330637.1:c.5690dup']['submitted_variant'] == '9-13112056-T-TG' + assert results['NM_001330637.1:c.5690dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001330637.1:c.5690dup']['hgvs_lrg_variant'] == '' + assert results['NM_001330637.1:c.5690dup']['hgvs_transcript_variant'] == 'NM_001330637.1:c.5690dup' + assert results['NM_001330637.1:c.5690dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317566.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330637.1'} + + assert 'NM_001261406.1:c.5591dup' in results.keys() + assert results['NM_001261406.1:c.5591dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001261406.1:c.5591dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001261406.1:c.5591dup']['alt_genomic_loci'] == [] + assert results['NM_001261406.1:c.5591dup']['transcript_description'] == 'Homo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 2, mRNA' + assert results['NM_001261406.1:c.5591dup']['gene_symbol'] == 'MPDZ' + assert results['NM_001261406.1:c.5591dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001248335.1:p.(Thr1865AsnfsTer15)', 'slr': 'NP_001248335.1:p.(T1865Nfs*15)'} + assert results['NM_001261406.1:c.5591dup']['submitted_variant'] == '9-13112056-T-TG' + assert results['NM_001261406.1:c.5591dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001261406.1:c.5591dup']['hgvs_lrg_variant'] == '' + assert results['NM_001261406.1:c.5591dup']['hgvs_transcript_variant'] == 'NM_001261406.1:c.5591dup' + assert results['NM_001261406.1:c.5591dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248335.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261406.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_003829.4:c.5603dup' in results.keys() + assert results['NM_003829.4:c.5603dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003829.4:c.5603dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003829.4:c.5603dup']['alt_genomic_loci'] == [] + assert results['NM_003829.4:c.5603dup']['transcript_description'] == 'Homo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 1, mRNA' + assert results['NM_003829.4:c.5603dup']['gene_symbol'] == 'MPDZ' + assert results['NM_003829.4:c.5603dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003820.2:p.(Thr1869AsnfsTer15)', 'slr': 'NP_003820.2:p.(T1869Nfs*15)'} + assert results['NM_003829.4:c.5603dup']['submitted_variant'] == '9-13112056-T-TG' + assert results['NM_003829.4:c.5603dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003829.4:c.5603dup']['hgvs_lrg_variant'] == '' + assert results['NM_003829.4:c.5603dup']['hgvs_transcript_variant'] == 'NM_003829.4:c.5603dup' + assert results['NM_003829.4:c.5603dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003820.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003829.4'} + + + def test_variant310(self): + variant = '9-21971208-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_058197.4:c.*74-1G>T' in results.keys() + assert results['NM_058197.4:c.*74-1G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_058197.4:c.*74-1G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_058197.4:c.*74-1G>T']['alt_genomic_loci'] == [] + assert results['NM_058197.4:c.*74-1G>T']['transcript_description'] == 'Homo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 3, mRNA' + assert results['NM_058197.4:c.*74-1G>T']['gene_symbol'] == 'CDKN2A' + assert results['NM_058197.4:c.*74-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_478104.2:p.?', 'slr': 'NP_478104.2:p.?'} + assert results['NM_058197.4:c.*74-1G>T']['submitted_variant'] == '9-21971208-C-A' + assert results['NM_058197.4:c.*74-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_058197.4):c.*74-1G>T' + assert results['NM_058197.4:c.*74-1G>T']['hgvs_lrg_variant'] == '' + assert results['NM_058197.4:c.*74-1G>T']['hgvs_transcript_variant'] == 'NM_058197.4:c.*74-1G>T' + assert results['NM_058197.4:c.*74-1G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_058197.4:c.*74-1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_478104.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_058197.4'} + + assert 'NM_000077.4:c.151-1G>T' in results.keys() + assert results['NM_000077.4:c.151-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_11t1:c.151-1G>T' + assert results['NM_000077.4:c.151-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007485.1(NM_000077.4):c.151-1G>T' + assert results['NM_000077.4:c.151-1G>T']['alt_genomic_loci'] == [] + assert results['NM_000077.4:c.151-1G>T']['transcript_description'] == 'Homo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 1, mRNA' + assert results['NM_000077.4:c.151-1G>T']['gene_symbol'] == 'CDKN2A' + assert results['NM_000077.4:c.151-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000068.1(LRG_11p1):p.?', 'slr': 'NP_000068.1:p.?'} + assert results['NM_000077.4:c.151-1G>T']['submitted_variant'] == '9-21971208-C-A' + assert results['NM_000077.4:c.151-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000077.4):c.151-1G>T' + assert results['NM_000077.4:c.151-1G>T']['hgvs_lrg_variant'] == 'LRG_11:g.28283G>T' + assert results['NM_000077.4:c.151-1G>T']['hgvs_transcript_variant'] == 'NM_000077.4:c.151-1G>T' + assert results['NM_000077.4:c.151-1G>T']['hgvs_refseqgene_variant'] == 'NG_007485.1:g.28283G>T' + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_000077.4:c.151-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007485.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000068.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000077.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_11.xml'} + + assert 'NM_001363763.1:c.-3-1G>T' in results.keys() + assert results['NM_001363763.1:c.-3-1G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363763.1:c.-3-1G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363763.1:c.-3-1G>T']['alt_genomic_loci'] == [] + assert results['NM_001363763.1:c.-3-1G>T']['transcript_description'] == 'Homo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 6, mRNA' + assert results['NM_001363763.1:c.-3-1G>T']['gene_symbol'] == 'CDKN2A' + assert results['NM_001363763.1:c.-3-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350692.1:p.?', 'slr': 'NP_001350692.1:p.?'} + assert results['NM_001363763.1:c.-3-1G>T']['submitted_variant'] == '9-21971208-C-A' + assert results['NM_001363763.1:c.-3-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001363763.1):c.-3-1G>T' + assert results['NM_001363763.1:c.-3-1G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001363763.1:c.-3-1G>T']['hgvs_transcript_variant'] == 'NM_001363763.1:c.-3-1G>T' + assert results['NM_001363763.1:c.-3-1G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} + assert 'hg38' not in results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci'].keys() + assert results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} + assert 'grch38' not in results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci'].keys() + assert results['NM_001363763.1:c.-3-1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350692.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363763.1'} + + assert 'NM_001195132.1:c.151-1G>T' in results.keys() + assert results['NM_001195132.1:c.151-1G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001195132.1:c.151-1G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001195132.1:c.151-1G>T']['alt_genomic_loci'] == [] + assert results['NM_001195132.1:c.151-1G>T']['transcript_description'] == 'Homo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 5, mRNA' + assert results['NM_001195132.1:c.151-1G>T']['gene_symbol'] == 'CDKN2A' + assert results['NM_001195132.1:c.151-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001182061.1:p.?', 'slr': 'NP_001182061.1:p.?'} + assert results['NM_001195132.1:c.151-1G>T']['submitted_variant'] == '9-21971208-C-A' + assert results['NM_001195132.1:c.151-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001195132.1):c.151-1G>T' + assert results['NM_001195132.1:c.151-1G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001195132.1:c.151-1G>T']['hgvs_transcript_variant'] == 'NM_001195132.1:c.151-1G>T' + assert results['NM_001195132.1:c.151-1G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_001195132.1:c.151-1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001182061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001195132.1'} + + assert 'NM_058195.3:c.194-1G>T' in results.keys() + assert results['NM_058195.3:c.194-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_11t2:c.194-1G>T' + assert results['NM_058195.3:c.194-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007485.1(NM_058195.3):c.194-1G>T' + assert results['NM_058195.3:c.194-1G>T']['alt_genomic_loci'] == [] + assert results['NM_058195.3:c.194-1G>T']['transcript_description'] == 'Homo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 4, mRNA' + assert results['NM_058195.3:c.194-1G>T']['gene_symbol'] == 'CDKN2A' + assert results['NM_058195.3:c.194-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_478102.2(LRG_11p2):p.?', 'slr': 'NP_478102.2:p.?'} + assert results['NM_058195.3:c.194-1G>T']['submitted_variant'] == '9-21971208-C-A' + assert results['NM_058195.3:c.194-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_058195.3):c.194-1G>T' + assert results['NM_058195.3:c.194-1G>T']['hgvs_lrg_variant'] == 'LRG_11:g.28283G>T' + assert results['NM_058195.3:c.194-1G>T']['hgvs_transcript_variant'] == 'NM_058195.3:c.194-1G>T' + assert results['NM_058195.3:c.194-1G>T']['hgvs_refseqgene_variant'] == 'NG_007485.1:g.28283G>T' + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_058195.3:c.194-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007485.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_478102.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_058195.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_11.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant311(self): + variant = '9-35683240-T-TG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_001301227.1:c.773-3dup' in results.keys() + assert results['NM_001301227.1:c.773-3dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001301227.1:c.773-3dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001301227.1:c.773-3dup']['alt_genomic_loci'] == [] + assert results['NM_001301227.1:c.773-3dup']['transcript_description'] == 'Homo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.4, mRNA' + assert results['NM_001301227.1:c.773-3dup']['gene_symbol'] == 'TPM2' + assert results['NM_001301227.1:c.773-3dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001288156.1:p.?', 'slr': 'NP_001288156.1:p.?'} + assert results['NM_001301227.1:c.773-3dup']['submitted_variant'] == '9-35683240-T-TG' + assert results['NM_001301227.1:c.773-3dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001301227.1):c.773-3dup' + assert results['NM_001301227.1:c.773-3dup']['hgvs_lrg_variant'] == '' + assert results['NM_001301227.1:c.773-3dup']['hgvs_transcript_variant'] == 'NM_001301227.1:c.773-3dup' + assert results['NM_001301227.1:c.773-3dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288156.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301227.1'} + + assert 'NM_001301226.1:c.772+1002dup' in results.keys() + assert results['NM_001301226.1:c.772+1002dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001301226.1:c.772+1002dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001301226.1:c.772+1002dup']['alt_genomic_loci'] == [] + assert results['NM_001301226.1:c.772+1002dup']['transcript_description'] == 'Homo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.3, mRNA' + assert results['NM_001301226.1:c.772+1002dup']['gene_symbol'] == 'TPM2' + assert results['NM_001301226.1:c.772+1002dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001288155.1:p.?', 'slr': 'NP_001288155.1:p.?'} + assert results['NM_001301226.1:c.772+1002dup']['submitted_variant'] == '9-35683240-T-TG' + assert results['NM_001301226.1:c.772+1002dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001301226.1):c.772+1002dup' + assert results['NM_001301226.1:c.772+1002dup']['hgvs_lrg_variant'] == '' + assert results['NM_001301226.1:c.772+1002dup']['hgvs_transcript_variant'] == 'NM_001301226.1:c.772+1002dup' + assert results['NM_001301226.1:c.772+1002dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288155.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301226.1'} + + assert 'NM_213674.1:c.772+1002dup' in results.keys() + assert results['NM_213674.1:c.772+1002dup']['hgvs_lrg_transcript_variant'] == 'LRG_680t1:c.772+1002dup' + assert results['NM_213674.1:c.772+1002dup']['refseqgene_context_intronic_sequence'] == 'NG_011620.1(NM_213674.1):c.772+1002dup' + assert results['NM_213674.1:c.772+1002dup']['alt_genomic_loci'] == [] + assert results['NM_213674.1:c.772+1002dup']['transcript_description'] == 'Homo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.1, mRNA' + assert results['NM_213674.1:c.772+1002dup']['gene_symbol'] == 'TPM2' + assert results['NM_213674.1:c.772+1002dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_998839.1(LRG_680p1):p.?', 'slr': 'NP_998839.1:p.?'} + assert results['NM_213674.1:c.772+1002dup']['submitted_variant'] == '9-35683240-T-TG' + assert results['NM_213674.1:c.772+1002dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_213674.1):c.772+1002dup' + assert results['NM_213674.1:c.772+1002dup']['hgvs_lrg_variant'] == 'LRG_680:g.11814dup' + assert results['NM_213674.1:c.772+1002dup']['hgvs_transcript_variant'] == 'NM_213674.1:c.772+1002dup' + assert results['NM_213674.1:c.772+1002dup']['hgvs_refseqgene_variant'] == 'NG_011620.1:g.11814dup' + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011620.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_998839.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_213674.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_680.xml'} + + assert 'NM_003289.3:c.773-3dup' in results.keys() + assert results['NM_003289.3:c.773-3dup']['hgvs_lrg_transcript_variant'] == 'LRG_680t2:c.773-3dup' + assert results['NM_003289.3:c.773-3dup']['refseqgene_context_intronic_sequence'] == 'NG_011620.1(NM_003289.3):c.773-3dup' + assert results['NM_003289.3:c.773-3dup']['alt_genomic_loci'] == [] + assert results['NM_003289.3:c.773-3dup']['transcript_description'] == 'Homo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.2, mRNA' + assert results['NM_003289.3:c.773-3dup']['gene_symbol'] == 'TPM2' + assert results['NM_003289.3:c.773-3dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003280.2(LRG_680p2):p.?', 'slr': 'NP_003280.2:p.?'} + assert results['NM_003289.3:c.773-3dup']['submitted_variant'] == '9-35683240-T-TG' + assert results['NM_003289.3:c.773-3dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_003289.3):c.773-3dup' + assert results['NM_003289.3:c.773-3dup']['hgvs_lrg_variant'] == 'LRG_680:g.11814dup' + assert results['NM_003289.3:c.773-3dup']['hgvs_transcript_variant'] == 'NM_003289.3:c.773-3dup' + assert results['NM_003289.3:c.773-3dup']['hgvs_refseqgene_variant'] == 'NG_011620.1:g.11814dup' + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011620.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003280.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003289.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_680.xml'} + + assert results['flag'] == 'gene_variant' + + def test_variant312(self): + variant = '9-135796754-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000368.4:c.733C>T' in results.keys() + assert results['NM_000368.4:c.733C>T']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.733C>T' + assert results['NM_000368.4:c.733C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000368.4:c.733C>T']['alt_genomic_loci'] == [] + assert results['NM_000368.4:c.733C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA' + assert results['NM_000368.4:c.733C>T']['gene_symbol'] == 'TSC1' + assert results['NM_000368.4:c.733C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Arg245Ter)', 'slr': 'NP_000359.1:p.(R245*)'} + assert results['NM_000368.4:c.733C>T']['submitted_variant'] == '9-135796754-G-A' + assert results['NM_000368.4:c.733C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000368.4:c.733C>T']['hgvs_lrg_variant'] == 'LRG_486:g.28267C>T' + assert results['NM_000368.4:c.733C>T']['hgvs_transcript_variant'] == 'NM_000368.4:c.733C>T' + assert results['NM_000368.4:c.733C>T']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.28267C>T' + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_000368.4:c.733C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} + + assert 'NM_001162426.1:c.733C>T' in results.keys() + assert results['NM_001162426.1:c.733C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162426.1:c.733C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001162426.1:c.733C>T']['alt_genomic_loci'] == [] + assert results['NM_001162426.1:c.733C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA' + assert results['NM_001162426.1:c.733C>T']['gene_symbol'] == 'TSC1' + assert results['NM_001162426.1:c.733C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.(Arg245Ter)', 'slr': 'NP_001155898.1:p.(R245*)'} + assert results['NM_001162426.1:c.733C>T']['submitted_variant'] == '9-135796754-G-A' + assert results['NM_001162426.1:c.733C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001162426.1:c.733C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001162426.1:c.733C>T']['hgvs_transcript_variant'] == 'NM_001162426.1:c.733C>T' + assert results['NM_001162426.1:c.733C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_001162426.1:c.733C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001362177.1:c.370C>T' in results.keys() + assert results['NM_001362177.1:c.370C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001362177.1:c.370C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001362177.1:c.370C>T']['alt_genomic_loci'] == [] + assert results['NM_001362177.1:c.370C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA' + assert results['NM_001362177.1:c.370C>T']['gene_symbol'] == 'TSC1' + assert results['NM_001362177.1:c.370C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.(Arg124Ter)', 'slr': 'NP_001349106.1:p.(R124*)'} + assert results['NM_001362177.1:c.370C>T']['submitted_variant'] == '9-135796754-G-A' + assert results['NM_001362177.1:c.370C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001362177.1:c.370C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001362177.1:c.370C>T']['hgvs_transcript_variant'] == 'NM_001362177.1:c.370C>T' + assert results['NM_001362177.1:c.370C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_001362177.1:c.370C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1'} + + assert 'NM_001162427.1:c.580C>T' in results.keys() + assert results['NM_001162427.1:c.580C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162427.1:c.580C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001162427.1:c.580C>T']['alt_genomic_loci'] == [] + assert results['NM_001162427.1:c.580C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA' + assert results['NM_001162427.1:c.580C>T']['gene_symbol'] == 'TSC1' + assert results['NM_001162427.1:c.580C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.(Arg194Ter)', 'slr': 'NP_001155899.1:p.(R194*)'} + assert results['NM_001162427.1:c.580C>T']['submitted_variant'] == '9-135796754-G-A' + assert results['NM_001162427.1:c.580C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001162427.1:c.580C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001162427.1:c.580C>T']['hgvs_transcript_variant'] == 'NM_001162427.1:c.580C>T' + assert results['NM_001162427.1:c.580C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_001162427.1:c.580C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1'} + + + def test_variant313(self): + variant = 'HG536_PATCH-10391-AC-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_005247.2:c.616del' in results.keys() + assert results['NM_005247.2:c.616del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005247.2:c.616del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005247.2:c.616del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003571046.1:g.10392del', 'vcf': {'chr': 'HG536_PATCH', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571046.1:g.10392del', 'vcf': {'chr': 'NW_003571046.1', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}] + assert results['NM_005247.2:c.616del']['transcript_description'] == 'Homo sapiens fibroblast growth factor 3 (FGF3), mRNA' + assert results['NM_005247.2:c.616del']['gene_symbol'] == 'FGF3' + assert results['NM_005247.2:c.616del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005238.1:p.(Val206SerfsTer117)', 'slr': 'NP_005238.1:p.(V206Sfs*117)'} + assert results['NM_005247.2:c.616del']['submitted_variant'] == 'HG536_PATCH-10391-AC-A' + assert results['NM_005247.2:c.616del']['genome_context_intronic_sequence'] == '' + assert results['NM_005247.2:c.616del']['hgvs_lrg_variant'] == '' + assert results['NM_005247.2:c.616del']['hgvs_transcript_variant'] == 'NM_005247.2:c.616del' + assert results['NM_005247.2:c.616del']['hgvs_refseqgene_variant'] == 'NG_009016.1:g.14016del' + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625177del', 'vcf': {'chr': 'chr11', 'ref': 'AC', 'pos': '69625176', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810409del', 'vcf': {'chr': 'chr11', 'ref': 'AC', 'pos': '69810408', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625177del', 'vcf': {'chr': '11', 'ref': 'AC', 'pos': '69625176', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810409del', 'vcf': {'chr': '11', 'ref': 'AC', 'pos': '69810408', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009016.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005238.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005247.2'} + + + def test_variant314(self): + variant = 'HG865_PATCH-33547-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NR_110766.1:n.833+969C>T' in results.keys() + assert results['NR_110766.1:n.833+969C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_110766.1:n.833+969C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_110766.1:n.833+969C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}] + assert results['NR_110766.1:n.833+969C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 3, non-coding RNA' + assert results['NR_110766.1:n.833+969C>T']['gene_symbol'] == 'SHANK2' + assert results['NR_110766.1:n.833+969C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_110766.1:n.833+969C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' + assert results['NR_110766.1:n.833+969C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NR_110766.1):c.833+969C>T' + assert results['NR_110766.1:n.833+969C>T']['hgvs_lrg_variant'] == '' + assert results['NR_110766.1:n.833+969C>T']['hgvs_transcript_variant'] == 'NR_110766.1:n.833+969C>T' + assert results['NR_110766.1:n.833+969C>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} + assert results['NR_110766.1:n.833+969C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_110766.1'} + + assert 'NM_012309.4:c.2566C>T' in results.keys() + assert results['NM_012309.4:c.2566C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_012309.4:c.2566C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_012309.4:c.2566C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}] + assert results['NM_012309.4:c.2566C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' + assert results['NM_012309.4:c.2566C>T']['gene_symbol'] == 'SHANK2' + assert results['NM_012309.4:c.2566C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.(Leu856=)', 'slr': 'NP_036441.2:p.(L856=)'} + assert results['NM_012309.4:c.2566C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' + assert results['NM_012309.4:c.2566C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_012309.4:c.2566C>T']['hgvs_lrg_variant'] == '' + assert results['NM_012309.4:c.2566C>T']['hgvs_transcript_variant'] == 'NM_012309.4:c.2566C>T' + assert results['NM_012309.4:c.2566C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70336423G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70336423', 'alt': u'A'}} + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70336423G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70336423', 'alt': u'A'}} + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} + assert results['NM_012309.4:c.2566C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} + + assert 'NM_133266.4:c.802C>T' in results.keys() + assert results['NM_133266.4:c.802C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133266.4:c.802C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_133266.4:c.802C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}] + assert results['NM_133266.4:c.802C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA' + assert results['NM_133266.4:c.802C>T']['gene_symbol'] == 'SHANK2' + assert results['NM_133266.4:c.802C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_573573.2:p.(Leu268=)', 'slr': 'NP_573573.2:p.(L268=)'} + assert results['NM_133266.4:c.802C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' + assert results['NM_133266.4:c.802C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_133266.4:c.802C>T']['hgvs_lrg_variant'] == '' + assert results['NM_133266.4:c.802C>T']['hgvs_transcript_variant'] == 'NM_133266.4:c.802C>T' + assert results['NM_133266.4:c.802C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} + assert results['NM_133266.4:c.802C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.4'} + + assert results['flag'] == 'gene_variant' + assert 'NM_133266.3:c.802C>T' in results.keys() + assert results['NM_133266.3:c.802C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133266.3:c.802C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_133266.3:c.802C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}] + assert results['NM_133266.3:c.802C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA' + assert results['NM_133266.3:c.802C>T']['gene_symbol'] == 'SHANK2' + assert results['NM_133266.3:c.802C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_573573.2:p.(Leu268=)', 'slr': 'NP_573573.2:p.(L268=)'} + assert results['NM_133266.3:c.802C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' + assert results['NM_133266.3:c.802C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_133266.3:c.802C>T']['hgvs_lrg_variant'] == '' + assert results['NM_133266.3:c.802C>T']['hgvs_transcript_variant'] == 'NM_133266.3:c.802C>T' + assert results['NM_133266.3:c.802C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_133266.3:c.802C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} + assert 'hg38' not in results['NM_133266.3:c.802C>T']['primary_assembly_loci'].keys() + assert results['NM_133266.3:c.802C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} + assert 'grch38' not in results['NM_133266.3:c.802C>T']['primary_assembly_loci'].keys() + assert results['NM_133266.3:c.802C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.3'} + + + def test_variant315(self): + variant = 'HG865_PATCH-569441-G-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_012309.4:c.960C>A' in results.keys() + assert results['NM_012309.4:c.960C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_012309.4:c.960C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_012309.4:c.960C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'G', 'pos': '569441', 'alt': u'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'G', 'pos': '569441', 'alt': u'T'}}}] + assert results['NM_012309.4:c.960C>A']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' + assert results['NM_012309.4:c.960C>A']['gene_symbol'] == 'SHANK2' + assert results['NM_012309.4:c.960C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.(Tyr320Ter)', 'slr': 'NP_036441.2:p.(Y320*)'} + assert results['NM_012309.4:c.960C>A']['submitted_variant'] == 'HG865_PATCH-569441-G-T' + assert results['NM_012309.4:c.960C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_012309.4:c.960C>A']['hgvs_lrg_variant'] == '' + assert results['NM_012309.4:c.960C>A']['hgvs_transcript_variant'] == 'NM_012309.4:c.960C>A' + assert results['NM_012309.4:c.960C>A']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['NM_012309.4:c.960C>A']['primary_assembly_loci'].keys() + assert results['NM_012309.4:c.960C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71075228G>T', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '71075228', 'alt': u'T'}} + assert 'grch37' not in results['NM_012309.4:c.960C>A']['primary_assembly_loci'].keys() + assert results['NM_012309.4:c.960C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71075228G>T', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '71075228', 'alt': u'T'}} + assert results['NM_012309.4:c.960C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} + + + def test_variant316(self): + variant = 'HG865_PATCH-574546-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_012309.4:c.913-5058G>A' in results.keys() + assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_012309.4:c.913-5058G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'C', 'pos': '574546', 'alt': u'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'C', 'pos': '574546', 'alt': u'T'}}}] + assert results['NM_012309.4:c.913-5058G>A']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' + assert results['NM_012309.4:c.913-5058G>A']['gene_symbol'] == 'SHANK2' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.?', 'slr': 'NP_036441.2:p.?'} + assert results['NM_012309.4:c.913-5058G>A']['submitted_variant'] == 'HG865_PATCH-574546-C-T' + assert results['NM_012309.4:c.913-5058G>A']['genome_context_intronic_sequence'] == 'NC_000011.10(NM_012309.4):c.913-5058G>A' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_variant'] == '' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_transcript_variant'] == 'NM_012309.4:c.913-5058G>A' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_refseqgene_variant'] == '' + assert 'hg19' not in results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys() + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '71080333', 'alt': u'T'}} + assert 'grch37' not in results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys() + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '71080333', 'alt': u'T'}} + assert results['NM_012309.4:c.913-5058G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} + + + def test_variant317(self): + variant = 'HSCHR1_1_CTG31-133178-TAG-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_020699.2:c.802_803insTT' in results.keys() + assert results['NM_020699.2:c.802_803insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020699.2:c.802_803insTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020699.2:c.802_803insTT']['alt_genomic_loci'] == [] + assert results['NM_020699.2:c.802_803insTT']['transcript_description'] == 'Homo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA' + assert results['NM_020699.2:c.802_803insTT']['gene_symbol'] == 'GATAD2B' + assert results['NM_020699.2:c.802_803insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Pro268LeufsTer26)', 'slr': 'NP_065750.1:p.(P268Lfs*26)'} + assert results['NM_020699.2:c.802_803insTT']['submitted_variant'] == 'HSCHR1_1_CTG31-133178-TAG-T' + assert results['NM_020699.2:c.802_803insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_020699.2:c.802_803insTT']['hgvs_lrg_variant'] == '' + assert results['NM_020699.2:c.802_803insTT']['hgvs_transcript_variant'] == 'NM_020699.2:c.802_803insTT' + assert results['NM_020699.2:c.802_803insTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946delinsGAAG', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '153789945', 'alt': u'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '153817469', 'alt': u'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946delinsGAAG', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '153789945', 'alt': u'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '153817469', 'alt': u'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2'} + + + def test_variant318(self): + variant = 'HSCHR6_MHC_MANN_CTG1-3848158-T-G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_021983.4:c.490G>C' in results.keys() + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}] + assert results['NM_021983.4:c.490G>C']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' + assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' + assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} + assert results['NM_021983.4:c.490G>C']['submitted_variant'] == 'HSCHR6_MHC_MANN_CTG1-3848158-T-G' + assert results['NM_021983.4:c.490G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' + assert results['NM_021983.4:c.490G>C']['hgvs_transcript_variant'] == 'NM_021983.4:c.490G>C' + assert results['NM_021983.4:c.490G>C']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.5724C>G' + assert 'hg19' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'hg38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'grch37' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'grch38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} + + + def test_variant319(self): + variant = 'HSCHR6_MHC_MANN_CTG1-3851043-C-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_021983.4:c.346G>T' in results.keys() + assert results['NM_021983.4:c.346G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021983.4:c.346G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.346G>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': u'C', 'pos': '3851043', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': u'C', 'pos': '3851043', 'alt': u'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': u'C', 'pos': '3845423', 'alt': u'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': u'C', 'pos': '3845423', 'alt': u'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': u'C', 'pos': '3887313', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': u'C', 'pos': '3887313', 'alt': u'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3855423', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': u'C', 'pos': '3855423', 'alt': u'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3856125', 'alt': u'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': u'C', 'pos': '3856125', 'alt': u'A'}}}] + assert results['NM_021983.4:c.346G>T']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' + assert results['NM_021983.4:c.346G>T']['gene_symbol'] == 'HLA-DRB4' + assert results['NM_021983.4:c.346G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Glu116Ter)', 'slr': 'NP_068818.4:p.(E116*)'} + assert results['NM_021983.4:c.346G>T']['submitted_variant'] == 'HSCHR6_MHC_MANN_CTG1-3851043-C-A' + assert results['NM_021983.4:c.346G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.346G>T']['hgvs_lrg_variant'] == '' + assert results['NM_021983.4:c.346G>T']['hgvs_transcript_variant'] == 'NM_021983.4:c.346G>T' + assert results['NM_021983.4:c.346G>T']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.8605C>A' + assert 'hg19' not in results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys() + assert 'hg38' not in results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys() + assert 'grch37' not in results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys() + assert 'grch38' not in results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys() + assert results['NM_021983.4:c.346G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} + + + def test_variant320(self): + variant = 'X-70443101-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001097642.2:c.-16-441C>T' in results.keys() + assert results['NM_001097642.2:c.-16-441C>T']['hgvs_lrg_transcript_variant'] == 'LRG_245t1:c.-16-441C>T' + assert results['NM_001097642.2:c.-16-441C>T']['refseqgene_context_intronic_sequence'] == 'NG_008357.1(NM_001097642.2):c.-16-441C>T' + assert results['NM_001097642.2:c.-16-441C>T']['alt_genomic_loci'] == [] + assert results['NM_001097642.2:c.-16-441C>T']['transcript_description'] == 'Homo sapiens gap junction protein beta 1 (GJB1), transcript variant 1, mRNA' + assert results['NM_001097642.2:c.-16-441C>T']['gene_symbol'] == 'GJB1' + assert results['NM_001097642.2:c.-16-441C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001091111.1:p.?', 'slr': 'NP_001091111.1:p.?'} + assert results['NM_001097642.2:c.-16-441C>T']['submitted_variant'] == 'X-70443101-C-T' + assert results['NM_001097642.2:c.-16-441C>T']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_001097642.2):c.-16-441C>T' + assert results['NM_001097642.2:c.-16-441C>T']['hgvs_lrg_variant'] == 'LRG_245:g.13040C>T' + assert results['NM_001097642.2:c.-16-441C>T']['hgvs_transcript_variant'] == 'NM_001097642.2:c.-16-441C>T' + assert results['NM_001097642.2:c.-16-441C>T']['hgvs_refseqgene_variant'] == 'NG_008357.1:g.13040C>T' + assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '70443101', 'alt': 'T'}} + assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '71223251', 'alt': 'T'}} + assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '70443101', 'alt': 'T'}} + assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '71223251', 'alt': 'T'}} + assert results['NM_001097642.2:c.-16-441C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008357.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001091111.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001097642.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_245.xml'} + + assert 'NM_000166.5:c.-101C>T' in results.keys() + assert results['NM_000166.5:c.-101C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000166.5:c.-101C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000166.5:c.-101C>T']['alt_genomic_loci'] == [] + assert results['NM_000166.5:c.-101C>T']['transcript_description'] == 'Homo sapiens gap junction protein beta 1 (GJB1), transcript variant 2, mRNA' + assert results['NM_000166.5:c.-101C>T']['gene_symbol'] == 'GJB1' + assert results['NM_000166.5:c.-101C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000157.1:p.?', 'slr': 'NP_000157.1:p.?'} + assert results['NM_000166.5:c.-101C>T']['submitted_variant'] == 'X-70443101-C-T' + assert results['NM_000166.5:c.-101C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000166.5:c.-101C>T']['hgvs_lrg_variant'] == '' + assert results['NM_000166.5:c.-101C>T']['hgvs_transcript_variant'] == 'NM_000166.5:c.-101C>T' + assert results['NM_000166.5:c.-101C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '70443101', 'alt': 'T'}} + assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '71223251', 'alt': 'T'}} + assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '70443101', 'alt': 'T'}} + assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '71223251', 'alt': 'T'}} + assert results['NM_000166.5:c.-101C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000157.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000166.5'} + + + def test_variant321(self): + variant = 'X-107845202-GACCACC-GACC,G' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_033380.2:c.2130_2135del' in results.keys() + assert results['NM_033380.2:c.2130_2135del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_033380.2:c.2130_2135del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_033380.2:c.2130_2135del']['alt_genomic_loci'] == [] + assert results['NM_033380.2:c.2130_2135del']['transcript_description'] == 'Homo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 2, mRNA' + assert results['NM_033380.2:c.2130_2135del']['gene_symbol'] == 'COL4A5' + assert results['NM_033380.2:c.2130_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_203699.1(LRG_232p2):p.(Pro711_Pro712del)', 'slr': 'NP_203699.1:p.(P711_P712del)'} + assert results['NM_033380.2:c.2130_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' + assert results['NM_033380.2:c.2130_2135del']['genome_context_intronic_sequence'] == '' + assert results['NM_033380.2:c.2130_2135del']['hgvs_lrg_variant'] == '' + assert results['NM_033380.2:c.2130_2135del']['hgvs_transcript_variant'] == 'NM_033380.2:c.2130_2135del' + assert results['NM_033380.2:c.2130_2135del']['hgvs_refseqgene_variant'] == '' + assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'chrX', 'ref': 'GACCACC', 'pos': '107845202', 'alt': 'G'}} + assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'chrX', 'ref': 'GACCACC', 'pos': '108601972', 'alt': 'G'}} + assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'X', 'ref': 'GACCACC', 'pos': '107845202', 'alt': 'G'}} + assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'X', 'ref': 'GACCACC', 'pos': '108601972', 'alt': 'G'}} + assert results['NM_033380.2:c.2130_2135del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_203699.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033380.2'} + + assert 'NM_000495.4:c.2130_2135del' in results.keys() + assert results['NM_000495.4:c.2130_2135del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000495.4:c.2130_2135del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000495.4:c.2130_2135del']['alt_genomic_loci'] == [] + assert results['NM_000495.4:c.2130_2135del']['transcript_description'] == 'Homo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 1, mRNA' + assert results['NM_000495.4:c.2130_2135del']['gene_symbol'] == 'COL4A5' + assert results['NM_000495.4:c.2130_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000486.1(LRG_232p1):p.(Pro711_Pro712del)', 'slr': 'NP_000486.1:p.(P711_P712del)'} + assert results['NM_000495.4:c.2130_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' + assert results['NM_000495.4:c.2130_2135del']['genome_context_intronic_sequence'] == '' + assert results['NM_000495.4:c.2130_2135del']['hgvs_lrg_variant'] == '' + assert results['NM_000495.4:c.2130_2135del']['hgvs_transcript_variant'] == 'NM_000495.4:c.2130_2135del' + assert results['NM_000495.4:c.2130_2135del']['hgvs_refseqgene_variant'] == '' + assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'chrX', 'ref': 'GACCACC', 'pos': '107845202', 'alt': 'G'}} + assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'chrX', 'ref': 'GACCACC', 'pos': '108601972', 'alt': 'G'}} + assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'X', 'ref': 'GACCACC', 'pos': '107845202', 'alt': 'G'}} + assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'X', 'ref': 'GACCACC', 'pos': '108601972', 'alt': 'G'}} + assert results['NM_000495.4:c.2130_2135del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4'} + + assert results['flag'] == 'gene_variant' + assert 'NM_000495.4:c.2133_2135del' in results.keys() + assert results['NM_000495.4:c.2133_2135del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000495.4:c.2133_2135del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000495.4:c.2133_2135del']['alt_genomic_loci'] == [] + assert results['NM_000495.4:c.2133_2135del']['transcript_description'] == 'Homo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 1, mRNA' + assert results['NM_000495.4:c.2133_2135del']['gene_symbol'] == 'COL4A5' + assert results['NM_000495.4:c.2133_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000486.1(LRG_232p1):p.(Pro712del)', 'slr': 'NP_000486.1:p.(P712del)'} + assert results['NM_000495.4:c.2133_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' + assert results['NM_000495.4:c.2133_2135del']['genome_context_intronic_sequence'] == '' + assert results['NM_000495.4:c.2133_2135del']['hgvs_lrg_variant'] == '' + assert results['NM_000495.4:c.2133_2135del']['hgvs_transcript_variant'] == 'NM_000495.4:c.2133_2135del' + assert results['NM_000495.4:c.2133_2135del']['hgvs_refseqgene_variant'] == '' + assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'chrX', 'ref': 'GACC', 'pos': '107845202', 'alt': 'G'}} + assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'chrX', 'ref': 'GACC', 'pos': '108601972', 'alt': 'G'}} + assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'X', 'ref': 'GACC', 'pos': '107845202', 'alt': 'G'}} + assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'X', 'ref': 'GACC', 'pos': '108601972', 'alt': 'G'}} + assert results['NM_000495.4:c.2133_2135del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4'} + + assert 'NM_033380.2:c.2133_2135del' in results.keys() + assert results['NM_033380.2:c.2133_2135del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_033380.2:c.2133_2135del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_033380.2:c.2133_2135del']['alt_genomic_loci'] == [] + assert results['NM_033380.2:c.2133_2135del']['transcript_description'] == 'Homo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 2, mRNA' + assert results['NM_033380.2:c.2133_2135del']['gene_symbol'] == 'COL4A5' + assert results['NM_033380.2:c.2133_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_203699.1(LRG_232p2):p.(Pro712del)', 'slr': 'NP_203699.1:p.(P712del)'} + assert results['NM_033380.2:c.2133_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' + assert results['NM_033380.2:c.2133_2135del']['genome_context_intronic_sequence'] == '' + assert results['NM_033380.2:c.2133_2135del']['hgvs_lrg_variant'] == '' + assert results['NM_033380.2:c.2133_2135del']['hgvs_transcript_variant'] == 'NM_033380.2:c.2133_2135del' + assert results['NM_033380.2:c.2133_2135del']['hgvs_refseqgene_variant'] == '' + assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'chrX', 'ref': 'GACC', 'pos': '107845202', 'alt': 'G'}} + assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'chrX', 'ref': 'GACC', 'pos': '108601972', 'alt': 'G'}} + assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'X', 'ref': 'GACC', 'pos': '107845202', 'alt': 'G'}} + assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'X', 'ref': 'GACC', 'pos': '108601972', 'alt': 'G'}} + assert results['NM_033380.2:c.2133_2135del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_203699.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033380.2'} + + + def test_variant322(self): + variant = 'X-153296777-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_004992.3:c.502C>T' in results.keys() + assert results['NM_004992.3:c.502C>T']['hgvs_lrg_transcript_variant'] == 'LRG_764t2:c.502C>T' + assert results['NM_004992.3:c.502C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004992.3:c.502C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}] + assert results['NM_004992.3:c.502C>T']['transcript_description'] == 'Homo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 1, mRNA' + assert results['NM_004992.3:c.502C>T']['gene_symbol'] == 'MECP2' + assert results['NM_004992.3:c.502C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004983.1(LRG_764p2):p.(Arg168Ter)', 'slr': 'NP_004983.1:p.(R168*)'} + assert results['NM_004992.3:c.502C>T']['submitted_variant'] == 'X-153296777-G-A' + assert results['NM_004992.3:c.502C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_004992.3:c.502C>T']['hgvs_lrg_variant'] == 'LRG_764:g.110802C>T' + assert results['NM_004992.3:c.502C>T']['hgvs_transcript_variant'] == 'NM_004992.3:c.502C>T' + assert results['NM_004992.3:c.502C>T']['hgvs_refseqgene_variant'] == 'NG_007107.2:g.110802C>T' + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} + assert results['NM_004992.3:c.502C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007107.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004983.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004992.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_764.xml'} + + assert results['flag'] == 'gene_variant' + assert 'NM_001316337.1:c.223C>T' in results.keys() + assert results['NM_001316337.1:c.223C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001316337.1:c.223C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001316337.1:c.223C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}] + assert results['NM_001316337.1:c.223C>T']['transcript_description'] == 'Homo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 3, mRNA' + assert results['NM_001316337.1:c.223C>T']['gene_symbol'] == 'MECP2' + assert results['NM_001316337.1:c.223C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001303266.1:p.(Arg75Ter)', 'slr': 'NP_001303266.1:p.(R75*)'} + assert results['NM_001316337.1:c.223C>T']['submitted_variant'] == 'X-153296777-G-A' + assert results['NM_001316337.1:c.223C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001316337.1:c.223C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001316337.1:c.223C>T']['hgvs_transcript_variant'] == 'NM_001316337.1:c.223C>T' + assert results['NM_001316337.1:c.223C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} + assert results['NM_001316337.1:c.223C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001303266.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001316337.1'} + + assert 'NM_001110792.1:c.538C>T' in results.keys() + assert results['NM_001110792.1:c.538C>T']['hgvs_lrg_transcript_variant'] == 'LRG_764t1:c.538C>T' + assert results['NM_001110792.1:c.538C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001110792.1:c.538C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}] + assert results['NM_001110792.1:c.538C>T']['transcript_description'] == 'Homo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 2, mRNA' + assert results['NM_001110792.1:c.538C>T']['gene_symbol'] == 'MECP2' + assert results['NM_001110792.1:c.538C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001104262.1:p.(Arg180Ter)', 'slr': 'NP_001104262.1:p.(R180*)'} + assert results['NM_001110792.1:c.538C>T']['submitted_variant'] == 'X-153296777-G-A' + assert results['NM_001110792.1:c.538C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001110792.1:c.538C>T']['hgvs_lrg_variant'] == 'LRG_764:g.110802C>T' + assert results['NM_001110792.1:c.538C>T']['hgvs_transcript_variant'] == 'NM_001110792.1:c.538C>T' + assert results['NM_001110792.1:c.538C>T']['hgvs_refseqgene_variant'] == 'NG_007107.2:g.110802C>T' + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} + assert results['NM_001110792.1:c.538C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007107.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001104262.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001110792.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_764.xml'} + + + def test_variant323(self): + variant = 'NM_198180.2:c.408_410delGTG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_198180.2:c.408_410del' in results.keys() + assert results['NM_198180.2:c.408_410del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198180.2:c.408_410del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_198180.2:c.408_410del']['alt_genomic_loci'] == [] + assert results['NM_198180.2:c.408_410del']['transcript_description'] == 'Homo sapiens pyroglutamylated RFamide peptide (QRFP), mRNA' + assert results['NM_198180.2:c.408_410del']['gene_symbol'] == 'QRFP' + assert results['NM_198180.2:c.408_410del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_937823.1:p.?', 'slr': 'NP_937823.1:p.?'} + assert results['NM_198180.2:c.408_410del']['submitted_variant'] == 'NM_198180.2:c.408_410delGTG' + assert results['NM_198180.2:c.408_410del']['genome_context_intronic_sequence'] == '' + assert results['NM_198180.2:c.408_410del']['hgvs_lrg_variant'] == '' + assert results['NM_198180.2:c.408_410del']['hgvs_transcript_variant'] == 'NM_198180.2:c.408_410del' + assert results['NM_198180.2:c.408_410del']['hgvs_refseqgene_variant'] == '' + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.133768816_133768818del', 'vcf': {'chr': 'chr9', 'ref': 'TCAC', 'pos': '133768815', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.130893429_130893431del', 'vcf': {'chr': 'chr9', 'ref': 'TCAC', 'pos': '130893428', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.133768816_133768818del', 'vcf': {'chr': '9', 'ref': 'TCAC', 'pos': '133768815', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.130893429_130893431del', 'vcf': {'chr': '9', 'ref': 'TCAC', 'pos': '130893428', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_937823.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198180.2'} + + + def test_variant324(self): + variant = 'NM_080877.2:c.1733_1735delinsTTT' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_080877.2:c.1733_1735delinsTTT' in results.keys() + assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080877.2:c.1733_1735delinsTTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1733_1735delinsTTT']['alt_genomic_loci'] == [] + assert results['NM_080877.2:c.1733_1735delinsTTT']['transcript_description'] == 'Homo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA' + assert results['NM_080877.2:c.1733_1735delinsTTT']['gene_symbol'] == 'SLC34A3' + assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Pro578_Lys579delinsLeuTer)', 'slr': 'NP_543153.1:p.(P578_K579delinsL*)'} + assert results['NM_080877.2:c.1733_1735delinsTTT']['submitted_variant'] == 'NM_080877.2:c.1733_1735delinsTTT' + assert results['NM_080877.2:c.1733_1735delinsTTT']['genome_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_lrg_variant'] == '' + assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_transcript_variant'] == 'NM_080877.2:c.1733_1735delinsTTT' + assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130801_140130803delinsTTT', 'vcf': {'chr': 'chr9', 'ref': 'CGA', 'pos': '140130801', 'alt': 'TTT'}} + assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236349_137236351delinsTTT', 'vcf': {'chr': 'chr9', 'ref': 'CGA', 'pos': '137236349', 'alt': 'TTT'}} + assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130801_140130803delinsTTT', 'vcf': {'chr': '9', 'ref': 'CGA', 'pos': '140130801', 'alt': 'TTT'}} + assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236349_137236351delinsTTT', 'vcf': {'chr': '9', 'ref': 'CGA', 'pos': '137236349', 'alt': 'TTT'}} + assert results['NM_080877.2:c.1733_1735delinsTTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2'} + + assert results['flag'] == 'gene_variant' + + def test_variant325(self): + variant = 'NM_080877.2:c.1735_1737delinsTGA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_080877.2:c.1735_1737delinsTGA' in results.keys() + assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080877.2:c.1735_1737delinsTGA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1735_1737delinsTGA']['alt_genomic_loci'] == [] + assert results['NM_080877.2:c.1735_1737delinsTGA']['transcript_description'] == 'Homo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA' + assert results['NM_080877.2:c.1735_1737delinsTGA']['gene_symbol'] == 'SLC34A3' + assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579Ter)', 'slr': 'NP_543153.1:p.(K579*)'} + assert results['NM_080877.2:c.1735_1737delinsTGA']['submitted_variant'] == 'NM_080877.2:c.1735_1737delinsTGA' + assert results['NM_080877.2:c.1735_1737delinsTGA']['genome_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_lrg_variant'] == '' + assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_transcript_variant'] == 'NM_080877.2:c.1735_1737delinsTGA' + assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_refseqgene_variant'] == '' + assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTGA', 'vcf': {'chr': 'chr9', 'ref': 'AAG', 'pos': '140130803', 'alt': 'TGA'}} + assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTGA', 'vcf': {'chr': 'chr9', 'ref': 'AAG', 'pos': '137236351', 'alt': 'TGA'}} + assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTGA', 'vcf': {'chr': '9', 'ref': 'AAG', 'pos': '140130803', 'alt': 'TGA'}} + assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTGA', 'vcf': {'chr': '9', 'ref': 'AAG', 'pos': '137236351', 'alt': 'TGA'}} + assert results['NM_080877.2:c.1735_1737delinsTGA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2'} + + assert results['flag'] == 'gene_variant' + + def test_variant326(self): + variant = 'NM_080877.2:c.1735_1737delinsTAATTGTTC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_080877.2:c.1735_1737delinsTAATTGTTC' in results.keys() + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['alt_genomic_loci'] == [] + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['transcript_description'] == 'Homo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['gene_symbol'] == 'SLC34A3' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579Ter)', 'slr': 'NP_543153.1:p.(K579*)'} + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['submitted_variant'] == 'NM_080877.2:c.1735_1737delinsTAATTGTTC' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['genome_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_lrg_variant'] == '' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_transcript_variant'] == 'NM_080877.2:c.1735_1737delinsTAATTGTTC' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_refseqgene_variant'] == '' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTAATTGTTC', 'vcf': {'chr': 'chr9', 'ref': 'AAG', 'pos': '140130803', 'alt': 'TAATTGTTC'}} + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTAATTGTTC', 'vcf': {'chr': 'chr9', 'ref': 'AAG', 'pos': '137236351', 'alt': 'TAATTGTTC'}} + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTAATTGTTC', 'vcf': {'chr': '9', 'ref': 'AAG', 'pos': '140130803', 'alt': 'TAATTGTTC'}} + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTAATTGTTC', 'vcf': {'chr': '9', 'ref': 'AAG', 'pos': '137236351', 'alt': 'TAATTGTTC'}} + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2'} + + + def test_variant327(self): + variant = 'NM_080877.2:c.1737delinsATTGTTC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_080877.2:c.1737delinsATTGTTC' in results.keys() + assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080877.2:c.1737delinsATTGTTC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1737delinsATTGTTC']['alt_genomic_loci'] == [] + assert results['NM_080877.2:c.1737delinsATTGTTC']['transcript_description'] == 'Homo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA' + assert results['NM_080877.2:c.1737delinsATTGTTC']['gene_symbol'] == 'SLC34A3' + assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579_Ala580insLeuPhe)', 'slr': 'NP_543153.1:p.(K579_A580insLF)'} + assert results['NM_080877.2:c.1737delinsATTGTTC']['submitted_variant'] == 'NM_080877.2:c.1737delinsATTGTTC' + assert results['NM_080877.2:c.1737delinsATTGTTC']['genome_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_lrg_variant'] == '' + assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_transcript_variant'] == 'NM_080877.2:c.1737delinsATTGTTC' + assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_refseqgene_variant'] == '' + assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130805delinsATTGTTC', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '140130805', 'alt': 'ATTGTTC'}} + assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236353delinsATTGTTC', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '137236353', 'alt': 'ATTGTTC'}} + assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130805delinsATTGTTC', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '140130805', 'alt': 'ATTGTTC'}} + assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236353delinsATTGTTC', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '137236353', 'alt': 'ATTGTTC'}} + assert results['NM_080877.2:c.1737delinsATTGTTC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2'} + + + def test_variant328(self): + variant = 'NM_000088.3:c.4392_*2delinsAGAG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.4392_*2delinsAGAG' in results.keys() + assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4392_*2delinsAGAG' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.4392_*2delinsAGAG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ter1465GluextTer84)', 'slr': 'NP_000079.2:p.(*1465Eext*84)'} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['submitted_variant'] == 'NM_000088.3:c.4392_*2delinsAGAG' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_lrg_variant'] == 'LRG_1:g.21135_21140delinsAGAG' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_transcript_variant'] == 'NM_000088.3:c.4392_*2delinsAGAG' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.21135_21140delinsAGAG' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262861_48262866delinsCTCT', 'vcf': {'chr': 'chr17', 'ref': 'GTTTAC', 'pos': '48262861', 'alt': u'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185500_50185505delinsCTCT', 'vcf': {'chr': 'chr17', 'ref': 'GTTTAC', 'pos': '50185500', 'alt': u'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262861_48262866delinsCTCT', 'vcf': {'chr': '17', 'ref': 'GTTTAC', 'pos': '48262861', 'alt': u'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185500_50185505delinsCTCT', 'vcf': {'chr': '17', 'ref': 'GTTTAC', 'pos': '50185500', 'alt': u'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant329(self): + variant = 'NM_000088.3:c.589_591delinsAGAAGC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000088.3:c.589_591delinsAGAAGC' in results.keys() + assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589_591delinsAGAAGC' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['alt_genomic_loci'] == [] + assert results['NM_000088.3:c.589_591delinsAGAAGC']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197delinsArgSer)', 'slr': 'NP_000079.2:p.(G197delinsRS)'} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['submitted_variant'] == 'NM_000088.3:c.589_591delinsAGAAGC' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_lrg_variant'] == 'LRG_1:g.8638_8640delinsAGAAGC' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_transcript_variant'] == 'NM_000088.3:c.589_591delinsAGAAGC' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638_8640delinsAGAAGC' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275363delinsGCTTCT', 'vcf': {'chr': 'chr17', 'ref': 'ACC', 'pos': '48275361', 'alt': u'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198002delinsGCTTCT', 'vcf': {'chr': 'chr17', 'ref': 'ACC', 'pos': '50198000', 'alt': u'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275363delinsGCTTCT', 'vcf': {'chr': '17', 'ref': 'ACC', 'pos': '48275361', 'alt': u'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198002delinsGCTTCT', 'vcf': {'chr': '17', 'ref': 'ACC', 'pos': '50198000', 'alt': u'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + + def test_variant330(self): + variant = 'NM_000885.5:c.*2536delinsAGAAAAATCA' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_000885.5:c.*2536delinsAGAAAAATCA' in results.keys() + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['alt_genomic_loci'] == [] + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['transcript_description'] == 'Homo sapiens integrin subunit alpha 4 (ITGA4), transcript variant 1, mRNA' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['gene_symbol'] == 'ITGA4' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000876.3:p.?', 'slr': 'NP_000876.3:p.?'} + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['submitted_variant'] == 'NM_000885.5:c.*2536delinsAGAAAAATCA' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['genome_context_intronic_sequence'] == '' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_lrg_variant'] == '' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_transcript_variant'] == 'NM_000885.5:c.*2536delinsAGAAAAATCA' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_refseqgene_variant'] == '' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.182402790delinsAGAAAAATCA', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '182402790', 'alt': 'AGAAAAATCA'}} + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.181538063delinsAGAAAAATCA', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '181538063', 'alt': 'AGAAAAATCA'}} + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.182402790delinsAGAAAAATCA', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '182402790', 'alt': 'AGAAAAATCA'}} + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.181538063delinsAGAAAAATCA', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '181538063', 'alt': 'AGAAAAATCA'}} + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000876.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000885.5'} + + assert results['flag'] == 'gene_variant' + + def test_variant331(self): + variant = 'NM_002693.2:c.-186_-185delinsCC' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert 'NM_002693.2:c.-186_-185delinsCC' in results.keys() + assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_lrg_transcript_variant'] == 'LRG_765t1:c.-186_-185delinsCC' + assert results['NM_002693.2:c.-186_-185delinsCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_002693.2:c.-186_-185delinsCC']['alt_genomic_loci'] == [] + assert results['NM_002693.2:c.-186_-185delinsCC']['transcript_description'] == 'Homo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 1, mRNA' + assert results['NM_002693.2:c.-186_-185delinsCC']['gene_symbol'] == 'POLG' + assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002684.1(LRG_765p1):p.?', 'slr': 'NP_002684.1:p.?'} + assert results['NM_002693.2:c.-186_-185delinsCC']['submitted_variant'] == 'NM_002693.2:c.-186_-185delinsCC' + assert results['NM_002693.2:c.-186_-185delinsCC']['genome_context_intronic_sequence'] == '' + assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_lrg_variant'] == '' + assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_transcript_variant'] == 'NM_002693.2:c.-186_-185delinsCC' + assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_refseqgene_variant'] == 'NG_008218.1:g.5097_5098delinsCC' + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89877929_89877930delinsGG', 'vcf': {'chr': 'chr15', 'ref': 'CT', 'pos': '89877929', 'alt': u'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89334698_89334699delinsGG', 'vcf': {'chr': 'chr15', 'ref': 'CT', 'pos': '89334698', 'alt': u'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89877929_89877930delinsGG', 'vcf': {'chr': '15', 'ref': 'CT', 'pos': '89877929', 'alt': u'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89334698_89334699delinsGG', 'vcf': {'chr': '15', 'ref': 'CT', 'pos': '89334698', 'alt': u'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008218.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2'} + + assert results['flag'] == 'gene_variant' + + def test_variant332(self): + variant = 'NG_009616.1:g.29052_29053insCTACATAG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_001287344.1:c.690_690+1insCTACATAG' in results.keys() + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}] + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 3, mRNA' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['gene_symbol'] == 'BTK' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274273.1:p.?', 'slr': 'NP_001274273.1:p.?'} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_001287344.1):c.690_690+1insCTACATAG' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_lrg_variant'] == '' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_001287344.1:c.690_690+1insCTACATAG' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_refseqgene_variant'] == '' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274273.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287344.1'} + + assert 'NM_001287345.1:c.588_588+1insCTACATAG' in results.keys() + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}] + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 2, mRNA' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274274.1:p.?', 'slr': 'NP_001274274.1:p.?'} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_001287345.1):c.588_588+1insCTACATAG' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == '' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_001287345.1:c.588_588+1insCTACATAG' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == '' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274274.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287345.1'} + + assert 'NM_000061.2:c.588_588+1insCTACATAG' in results.keys() + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}] + assert results['NM_000061.2:c.588_588+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == 'LRG_128:g.29052_29053insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009616.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_128.xml'} + + + def test_variant333(self): + variant = 'NM_000061.2:c.588_588+1insCTACATAG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000061.2:c.588_588+1insCTACATAG' in results.keys() + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}] + assert results['NM_000061.2:c.588_588+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['submitted_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == 'LRG_128:g.29052_29053insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009616.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_128.xml'} + + + def test_variant334(self): + variant = 'NM_000061.2:c.588_589insCTACATAG' + results = self.vv.validate(variant, 'GRCh37', 'all') + print results + + assert results['flag'] == 'gene_variant' + assert 'NM_000061.2:c.588_589insCTACATAG' in results.keys() + assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_589insCTACATAG' + assert results['NM_000061.2:c.588_589insCTACATAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000061.2:c.588_589insCTACATAG']['alt_genomic_loci'] == [] + assert results['NM_000061.2:c.588_589insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA' + assert results['NM_000061.2:c.588_589insCTACATAG']['gene_symbol'] == 'BTK' + assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.(Ile197LeufsTer5)', 'slr': 'NP_000052.1:p.(I197Lfs*5)'} + assert results['NM_000061.2:c.588_589insCTACATAG']['submitted_variant'] == 'NM_000061.2:c.588_589insCTACATAG' + assert results['NM_000061.2:c.588_589insCTACATAG']['genome_context_intronic_sequence'] == '' + assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_lrg_variant'] == '' + assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_589insCTACATAG' + assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_refseqgene_variant'] == '' + assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC', 'vcf': {'chr': 'chrX', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'pos': '100615751', 'alt': 'G'}} + assert 'hg38' not in results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci'].keys() + assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC', 'vcf': {'chr': 'X', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'pos': '100615751', 'alt': 'G'}} + assert 'grch38' not in results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci'].keys() + assert results['NM_000061.2:c.588_589insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2'} + From 7116447bc1d395a6f90930157a83a98f39fec038 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 21 Feb 2019 10:45:51 +0000 Subject: [PATCH 032/223] Used 2to3 to convert to py3 --- .gitignore | 3 + VariantValidator/__init__.py | 2 +- VariantValidator/modules/vvDBGet.py | 6 +- VariantValidator/modules/vvDBInit.py | 4 +- VariantValidator/modules/vvDBInsert.py | 4 +- VariantValidator/modules/vvDatabase.py | 40 +- VariantValidator/modules/vvFunctions.py | 4 +- VariantValidator/modules/vvHGVS.py | 2 +- VariantValidator/modules/vvLiftover.py | 12 +- VariantValidator/modules/vvLogging.py | 2 +- VariantValidator/modules/vvMixinConverters.py | 54 +- VariantValidator/modules/vvMixinCore.py | 469 +- VariantValidator/modules/vvMixinInit.py | 6 +- VariantValidator/simpleTestScript.py | 4 +- VariantValidator/testing/vvTestCompare.py | 2 +- VariantValidator/testing/vvTestFunctions.py | 30 +- VariantValidator/variantValidator.py | 2 +- test/test_inputs.py | 6108 ++++++++--------- 18 files changed, 3378 insertions(+), 3376 deletions(-) diff --git a/.gitignore b/.gitignore index 04deedb3..9c26d983 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ build dist variantValidator.egg-info VariantValidator/testing/outputs* + +# backedup files after 2to3 conversion +*.bak diff --git a/VariantValidator/__init__.py b/VariantValidator/__init__.py index a6016d48..c2536527 100644 --- a/VariantValidator/__init__.py +++ b/VariantValidator/__init__.py @@ -1,3 +1,3 @@ -from variantValidator import * +from .variantValidator import * __all__=["Validator","Validation"] diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index 76b8c515..38513b29 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -1,6 +1,6 @@ -from vvFunctions import handleCursor -from vvLogging import logger -import vvDBInit +from .vvFunctions import handleCursor +from .vvLogging import logger +from . import vvDBInit class Mixin(vvDBInit.Mixin): ''' diff --git a/VariantValidator/modules/vvDBInit.py b/VariantValidator/modules/vvDBInit.py index 3d2816df..e6731db1 100644 --- a/VariantValidator/modules/vvDBInit.py +++ b/VariantValidator/modules/vvDBInit.py @@ -19,6 +19,7 @@ def __init__(self,val,dbConfig): self.val=val self.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.dbConfig) self.conn=self.pool.get_connection() + def __del__(self): if self.conn: self.conn.close() @@ -26,8 +27,5 @@ def __del__(self): if self.pool: self.pool.close() self.pool=None - if self.cursor: - self.cursor.close() - self.cursor=None if self.val: self.val=None diff --git a/VariantValidator/modules/vvDBInsert.py b/VariantValidator/modules/vvDBInsert.py index 59cac186..1f5e796f 100644 --- a/VariantValidator/modules/vvDBInsert.py +++ b/VariantValidator/modules/vvDBInsert.py @@ -1,5 +1,5 @@ -from vvFunctions import handleCursor -import vvDBGet +from .vvFunctions import handleCursor +from . import vvDBGet class Mixin(vvDBGet.Mixin): ''' diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index f3b77f2e..031d4727 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -1,10 +1,10 @@ -from vvLogging import logger -import vvFunctions as fn -from vvFunctions import handleCursor +from .vvLogging import logger +from . import vvFunctions as fn +from .vvFunctions import handleCursor #from vvDBInsert import vvDBInsert #from vvDBGet import vvDBGet -import vvDBInsert -import urllib2 +from . import vvDBInsert +import urllib.request, urllib.error, urllib.parse import copy import re @@ -185,8 +185,8 @@ def update_rsg(self): # Download data from RefSeqGene # Download data - rsg = urllib2.Request('http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/gene_RefSeqGene') - response = urllib2.urlopen(rsg) + rsg = urllib.request.Request('http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/gene_RefSeqGene') + response = urllib.request.urlopen(rsg) rsg_file = response.read() rsg_data_line = rsg_file.split('\n') rsg_data = [] @@ -194,9 +194,9 @@ def update_rsg(self): rsg_data.append(data) # Download data - grch37 = urllib2.Request( + grch37 = urllib.request.Request( 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.25_refseqgene_alignments.gff3') - response = urllib2.urlopen(grch37) + response = urllib.request.urlopen(grch37) grch37_file = response.read() grch37_data_line = grch37_file.split('\n') grch37_align_data = [] @@ -204,9 +204,9 @@ def update_rsg(self): grch37_align_data.append(data) # Download data - grch38 = urllib2.Request( + grch38 = urllib.request.Request( 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.28_refseqgene_alignments.gff3') - response = urllib2.urlopen(grch38) + response = urllib.request.urlopen(grch38) grch38_file = response.read() grch38_data_line = grch38_file.split('\n') grch38_align_data = [] @@ -380,7 +380,7 @@ def update_rsg(self): to_mysql = [] for line in db: - if line[0] in obsolete.keys(): + if line[0] in list(obsolete.keys()): continue # Only gap-less RefSeqGenes will have passed. The rest will be alternatively curated write = [] @@ -427,36 +427,36 @@ def update_rsg(self): #from compile_lrg_data, this function was originally just called "update" def update_lrg(self): logger.info('Updating LRG lookup tables') - lr2rs_download = urllib2.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt') + lr2rs_download = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt') # Open and read - lr2rs_data = urllib2.urlopen(lr2rs_download) + lr2rs_data = urllib.request.urlopen(lr2rs_download) lr2rs = lr2rs_data.read() # List the data lr2rs = lr2rs.strip() lr2rs = lr2rs.split('\n') # Download - lrg_status_download = urllib2.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_GRCh38.txt') + lrg_status_download = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_GRCh38.txt') # Open and read - lrg_status_data = urllib2.urlopen(lrg_status_download) + lrg_status_data = urllib.request.urlopen(lrg_status_download) lrg_status = lrg_status_data.read() # List the data lrg_status = lrg_status.strip() lrg_status = lrg_status.split('\n') # Download - rs2lr_download = urllib2.Request('http://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene') + rs2lr_download = urllib.request.Request('http://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene') # Open and read - rs2lr_data = urllib2.urlopen(rs2lr_download) + rs2lr_data = urllib.request.urlopen(rs2lr_download) rs2lr = rs2lr_data.read() # List the data rs2lr = rs2lr.strip() rs2lr = rs2lr.split('\n') # Download LRG transcript (_t) to LRG Protein (__p) data file - lr_t2p_downloaded = urllib2.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_proteins_RefSeq.txt') + lr_t2p_downloaded = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_proteins_RefSeq.txt') # Open and read - lr_t2p_data = urllib2.urlopen(lr_t2p_downloaded) + lr_t2p_data = urllib.request.urlopen(lr_t2p_downloaded) lr_t2p = lr_t2p_data.read() # List the data lr_t2p = lr_t2p.strip() diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index acd0978e..54a93f08 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -3,11 +3,11 @@ from Bio.Alphabet import IUPAC import httplib2 as http import json -from urlparse import urlparse #Python 2 +from urllib.parse import urlparse #Python 2 import functools import traceback import sys -from vvLogging import logger +from .vvLogging import logger import re import copy import mysql diff --git a/VariantValidator/modules/vvHGVS.py b/VariantValidator/modules/vvHGVS.py index a978e39f..8ef731fd 100644 --- a/VariantValidator/modules/vvHGVS.py +++ b/VariantValidator/modules/vvHGVS.py @@ -6,7 +6,7 @@ # Import modules import re import copy -import vvChromosomes +from . import vvChromosomes # Import Biopython modules from Bio.Seq import Seq diff --git a/VariantValidator/modules/vvLiftover.py b/VariantValidator/modules/vvLiftover.py index c21b1016..bbdc15e2 100644 --- a/VariantValidator/modules/vvLiftover.py +++ b/VariantValidator/modules/vvLiftover.py @@ -10,9 +10,9 @@ import hgvs.sequencevariant import re import os -import vvChromosomes -import vvHGVS -from vvLogging import logger +from . import vvChromosomes +from . import vvHGVS +from .vvLogging import logger from pyliftover import LiftOver from Bio.Seq import Seq @@ -120,7 +120,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no for tx_dat_2 in rts_list_2: rts_dict[tx_dat_2] = True if rts_dict != {}: - tx_list = rts_dict.keys() + tx_list = list(rts_dict.keys()) # Try to liftover if tx_list is not False: @@ -157,12 +157,12 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no filtered_1 = {} if selected: for chroms in selected: - if chroms[1] in filtered_1.keys(): + if chroms[1] in list(filtered_1.keys()): pass else: filtered_1[chroms[1]] = chroms[0] added_data = False - for key, val in filtered_1.iteritems(): + for key, val in list(filtered_1.items()): try: # Note, due to 0 base positions in UTA (I think) occasionally tx will # be identified that cannot be mapped to. diff --git a/VariantValidator/modules/vvLogging.py b/VariantValidator/modules/vvLogging.py index 6901b6a3..1728e809 100644 --- a/VariantValidator/modules/vvLogging.py +++ b/VariantValidator/modules/vvLogging.py @@ -2,7 +2,7 @@ import logging import datetime import os -from StringIO import StringIO +from io import StringIO VALIDATOR_DEBUG=os.environ.get('VALIDATOR_DEBUG') diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 7c91eaea..b89e4276 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -2,7 +2,7 @@ import os import sys import copy -from vvLogging import logger +from .vvLogging import logger import hgvs import hgvs.exceptions from hgvs.dataproviders import uta @@ -12,14 +12,14 @@ import hgvs.parser import hgvs.variantmapper import hgvs.sequencevariant -import vvMixinInit -import vvChromosomes -import vvHGVS -from urlparse import urlparse +from . import vvMixinInit +from . import vvChromosomes +from . import vvHGVS +from urllib.parse import urlparse import httplib2 as http import json from Bio import Entrez,SeqIO -import vvFunctions as fn +from . import vvFunctions as fn #Error setup @@ -357,7 +357,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): except Exception as e: attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ 1] + '~' - print e + print(e) continue # If not mapped, raise error @@ -379,7 +379,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): else: attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ 1] + '~' - print e + print(e) continue try: hn.normalize(hgvs_genomic) @@ -396,7 +396,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): except Exception as e: attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ 1] + '~' - print e + print(e) continue try: hn.normalize(hgvs_genomic) @@ -415,7 +415,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ option[ 1] + '~' - print e + print(e) continue try: hn.normalize(hgvs_genomic) @@ -433,7 +433,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): except Exception as e: attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ option[1] + '~' - print e + print(e) continue try: hn.normalize(hgvs_genomic) @@ -452,7 +452,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): attempted_mapping_error = attempted_mapping_error + str( e) + "/" + hgvs_c.ac + "/" + \ option[1] + '~' - print e + print(e) continue # Only a RefSeqGene available @@ -469,7 +469,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): except Exception as e: attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ option[1] + '~' - print e + print(e) continue # If not mapped, raise error @@ -656,7 +656,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): alternate_sequence_bases = [] for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): - if int in alt_base_dict.keys(): + if int in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[int]) else: alternate_sequence_bases.append(ref_base_dict[int]) @@ -804,7 +804,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): # Generate the alt sequence alternate_sequence_bases = [] for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): - if int in alt_base_dict.keys(): + if int in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[int]) else: alternate_sequence_bases.append(ref_base_dict[int]) @@ -903,7 +903,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn except Exception as e: attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ 1] + '~' - print e + print(e) continue # If not mapped, raise error @@ -922,7 +922,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn except Exception as e: attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ 1] + '~' - print e + print(e) continue # If not mapped, raise error @@ -945,7 +945,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ option[ 1] + '~' - print e + print(e) continue try: hn.normalize(hgvs_genomic) @@ -964,7 +964,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ option[ 1] + '~' - print e + print(e) continue try: hn.normalize(hgvs_genomic) @@ -984,7 +984,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn e) + "/" + hgvs_c.ac + "/" + \ option[ 1] + '~' - print e + print(e) continue try: hn.normalize(hgvs_genomic) @@ -1003,7 +1003,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn attempted_mapping_error = attempted_mapping_error + str( e) + "/" + hgvs_c.ac + "/" + \ option[1] + '~' - print e + print(e) continue try: hn.normalize(hgvs_genomic) @@ -1022,7 +1022,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn attempted_mapping_error = attempted_mapping_error + str( e) + "/" + hgvs_c.ac + "/" + \ option[1] + '~' - print e + print(e) continue # Only a RefSeqGene available @@ -1040,7 +1040,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn attempted_mapping_error = attempted_mapping_error + str( e) + "/" + hgvs_c.ac + "/" + \ option[1] + '~' - print e + print(e) continue try: hgvs_genomic @@ -1400,7 +1400,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): alternate_sequence_bases = [] for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): - if int in alt_base_dict.keys(): + if int in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[int]) else: alternate_sequence_bases.append(ref_base_dict[int]) @@ -1548,7 +1548,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # Generate the alt sequence alternate_sequence_bases = [] for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): - if int in alt_base_dict.keys(): + if int in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[int]) else: alternate_sequence_bases.append(ref_base_dict[int]) @@ -1773,7 +1773,7 @@ def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method,reverse_normali rts_list_2 = evm.relevant_transcripts(hgvs_genomic) for tx_dat_2 in rts_list_2: rts_dict[tx_dat_2] = True - rts = rts_dict.keys() + rts = list(rts_dict.keys()) # Project genomic variants to new transcripts # and populate a code_var list @@ -1976,7 +1976,7 @@ def merge_hgvs_3pr(self, hgvs_variant_list,hn): try: hgvs_v = self.hp.parse_hgvs_variant(hgvs_v) except Exception as e: - print e + print(e) pass # Validate diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 3974a7a8..9b0fe6fa 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -35,9 +35,9 @@ #import external #import output_formatter #import variantanalyser -from vvLogging import logger +from .vvLogging import logger import hgvs -import vvHGVS +from . import vvHGVS #from variantanalyser import functions as va_func #from variantanalyser import dbControls as va_dbCrl #from variantanalyser import hgvs2vcf as vvHGVS @@ -45,13 +45,13 @@ #from variantanalyser import g_to_g as va_g2g #from variantanalyser import supported_chromosome_builds as va_scb #from variantanalyser.liftover import liftover as lift_over -from vvLiftover import liftover as lift_over #??? +from .vvLiftover import liftover as lift_over #??? -import vvFunctions as fn -import vvDatabase -import vvChromosomes -import vvMixinConverters -from vvFunctions import VariantValidatorError +from . import vvFunctions as fn +from . import vvDatabase +from . import vvChromosomes +from . import vvMixinConverters +from .vvFunctions import VariantValidatorError class Mixin(vvMixinConverters.Mixin): @@ -179,7 +179,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Test for rich text unicode characters try: - unicode_test = u"{}".format(input) + unicode_test = "{}".format(input) except UnicodeDecodeError as e: # Format the trapped character into unicode for styled printing my_unicode = e[1] @@ -203,23 +203,23 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr found_at = found_unicode.encode('raw_unicode_escape') break # Extract character from the error - unicode = re.findall("u'\\\\\w+'", found_error) - character = unicode[0] + chars = re.findall(r"u'\\\\\w+'", found_error) + character = chars[0] search_term = character.replace("u'", '') search_term = search_term.replace("'", '') found_at_decoded = found_at.decode('raw_unicode_escape') found_at = found_at_decoded.encode('raw_unicode_escape') string_char = str(character) # Create a human readable U+ representation - human_code = re.sub("u'\\\\\w", 'U+', string_char) + human_code = re.sub(r"u'\\\\\w", 'U+', string_char) human_code = human_code.replace("'", "") - format_human = u"{}".format(human_code) + format_human = "{}".format(human_code) format_human = format_human.upper() - found_at = re.sub(search_term, u'<' + format_human + u'>', found_at) + found_at = re.sub(search_term, '<' + format_human + '>', found_at) slasher = re.compile("\\\\") found_at = re.sub(slasher, '', found_at) validation['id'] = found_at - error = u'Submitted variant description contains an invalid character which is represented by Unicode character ' + format_human + u' at position ' + found_at + u': Please remove this character and re-submit: A useful search function for Unicode characters can be found at https://unicode-search.net/' + error = 'Submitted variant description contains an invalid character which is represented by Unicode character ' + format_human + ' at position ' + found_at + ': Please remove this character and re-submit: A useful search function for Unicode characters can be found at https://unicode-search.net/' validation['warnings'] = validation['warnings'] + ': ' + error logger.warning(error) continue @@ -276,20 +276,20 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr The output format is a common mistake caused by inaccurate conversion of VCF variants into HGVS - hence the need for conversion step 2 """ - if re.search('[-:]\d+[-:][GATC]+[-:][GATC]+', input): + if re.search(r'[-:]\d+[-:][GATC]+[-:][GATC]+', input): input = input.replace(':', '-') # Extract primary_assembly if provided - if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): + if re.match(r'GRCh3\d+-', input) or re.match(r'hg\d+-', input): in_list = input.split('-') selected_assembly = in_list[0] input = '-'.join(in_list[1:]) pre_input = copy.deepcopy(input) vcf_elements = pre_input.split('-') input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) - elif re.search('[-:]\d+[-:][GATC]+[-:]', input): + elif re.search(r'[-:]\d+[-:][GATC]+[-:]', input): input = input.replace(':', '-') # Extract primary_assembly if provided - if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): + if re.match(r'GRCh3\d+-', input) or re.match(r'hg\d+-', input): in_list = input.split('-') selected_assembly = in_list[0] input = '-'.join(in_list[1:]) @@ -313,14 +313,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr batch_list.append(queryA) batch_list.append(queryB) continue - elif re.search('[-:]\d+[-:][-:][GATC]+', input) or re.search('[-:]\d+[-:][.][-:][GATC]+', input): + elif re.search(r'[-:]\d+[-:][-:][GATC]+', input) or re.search(r'[-:]\d+[-:][.][-:][GATC]+', input): input = input.replace(':', '-') if re.search('-.-', input): input = input.replace('-.-', '-ins-') if re.search('--', input): input = input.replace('--', '-ins-') # Extract primary_assembly if provided - if re.match('GRCh3\d+-', input) or re.match('hg\d+-', input): + if re.match(r'GRCh3\d+-', input) or re.match(r'hg\d+-', input): in_list = input.split('-') selected_assembly = in_list[0] input = '-'.join(in_list[1:]) @@ -341,8 +341,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr The LRG ID data ia stored in the VariantValidator MySQL database. The reference sequence type is also assigned. """ - if re.search('\w+\:', input) and not re.search('\w+\:[gcnmrp]\.', input): - if re.search('\w+\:[gcnmrp]', input) and not re.search('\w+\:[gcnmrp]\.', input): + if re.search(r'\w+\:', input) and not re.search(r'\w+\:[gcnmrp]\.', input): + if re.search(r'\w+\:[gcnmrp]', input) and not re.search(r'\w+\:[gcnmrp]\.', input): # Missing dot pass else: @@ -355,7 +355,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input_list = input.split(':') pos_ref_alt = str(input_list[1]) positionAndEdit = input_list[1] - if not re.match('N[CGTWMRP]_', input) and not re.match('LRG_', input): + if not re.match(r'N[CGTWMRP]_', input) and not re.match(r'LRG_', input): chr_num = str(input_list[0]) chr_num = chr_num.upper() chr_num = chr_num.strip() @@ -373,7 +373,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr accession = input_list[0] if re.search('>', pre_input): if re.search('del', pre_input): - pos = re.match('\d+', pos_ref_alt) + pos = re.match(r'\d+', pos_ref_alt) position = pos.group(0) old_ref, old_alt = pos_ref_alt.split('>') old_ref = old_ref.replace(position, '') @@ -383,7 +383,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr alt = required_base positionAndEdit = str(position) + ref + '>' + alt elif re.search('ins', pre_input): - pos = re.match('\d+', pos_ref_alt) + pos = re.match(r'\d+', pos_ref_alt) position = pos.group(0) old_ref, old_alt = pos_ref_alt.split('>') # old_ref = old_ref.replace(position, '') @@ -407,7 +407,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass(validation) # Descriptions lacking the colon : - if re.search('[gcnmrp]\.', input) and not re.search(':[gcnmrp]\.', input): + if re.search(r'[gcnmrp]\.', input) and not re.search(r':[gcnmrp]\.', input): error = 'Unable to identify a colon (:) in the variant description %s. A colon is required in HGVS variant descriptions to separate the reference accession from the reference type i.e. :. e.g. :c.' % ( input) validation['warnings'] = validation['warnings'] + ': ' + error @@ -421,7 +421,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr formats like Chr16:g.2099572TC>T which are provided by Alamut and other software """ - if re.search('\w+:[gcnmrp]\.', input) and not re.match('N[CGTWMRP]_', input): + if re.search(r'\w+:[gcnmrp]\.', input) and not re.match(r'N[CGTWMRP]_', input): # Take out lowercase Accession characters lower_cased_list = input.split(':') if re.search('LRG', lower_cased_list[0], re.IGNORECASE): @@ -480,7 +480,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr of knowing which the users intended reference sequence was, and the exon boundaries etc of the alternative transcript variants may not be equivalent """ - if re.search('\w+\:[cn]\.', input): + if re.search(r'\w+\:[cn]\.', input): try: pre_input = copy.deepcopy(input) query_a_symbol = pre_input.split(':')[0] @@ -492,16 +492,16 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr select_from_these_transcripts = {} for tx in available_transcripts: if re.match('NM_', tx[3]) or re.match('NR_', tx[3]): - if tx[3] not in select_from_these_transcripts.keys(): + if tx[3] not in list(select_from_these_transcripts.keys()): select_from_these_transcripts[tx[3]] = '' else: continue else: continue - select_from_these_transcripts = '|'.join(select_from_these_transcripts.keys()) + select_from_these_transcripts = '|'.join(list(select_from_these_transcripts.keys())) if select_transcripts != 'all': validation['write'] = 'false' - for transcript in select_transcripts_dict_plus_version.keys(): + for transcript in list(select_transcripts_dict_plus_version.keys()): validation[ 'warnings'] = 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ query_a_symbol + ') in place of a valid reference sequence' @@ -534,9 +534,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr Chromosomal reference sequence identifiers used in the context of c. variant descriptions """ - if re.search('\w+\:[cn]', input): + if re.search(r'\w+\:[cn]', input): try: - if re.match('^NG_', input): + if re.match(r'^NG_', input): refSeqGeneID = input.split(':')[0] tx_edit = input.split(':')[1] gene_symbol = self.db.get_gene_symbol_from_refSeqGeneID(refSeqGeneID) @@ -546,16 +546,16 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr select_from_these_transcripts = {} for tx in available_transcripts: if re.match('NM_', tx[3]) or re.match('NR_', tx[3]): - if tx[3] not in select_from_these_transcripts.keys(): + if tx[3] not in list(select_from_these_transcripts.keys()): select_from_these_transcripts[tx[3]] = '' else: continue else: continue - select_from_these_transcripts = '|'.join(select_from_these_transcripts.keys()) + select_from_these_transcripts = '|'.join(list(select_from_these_transcripts.keys())) if select_transcripts != 'all': validation['write'] = 'false' - for transcript in select_transcripts_dict_plus_version.keys(): + for transcript in list(select_transcripts_dict_plus_version.keys()): validation[ 'warnings'] = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' refreshed_description = refSeqGeneID + '(' + transcript + ')' + ':' + tx_edit @@ -604,15 +604,15 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr automatically submits them for validation """ not_sub = copy.deepcopy(input) - not_sub_find = re.compile("([GATCgatc]+)>([GATCgatc]+)") + not_sub_find = re.compile(r"([GATCgatc]+)>([GATCgatc]+)") if not_sub_find.search(not_sub): try: # If the length of either side of the substitution delimer (>) is >1 matches = not_sub_find.search(not_sub) if len(matches.group(1)) > 1 or len(matches.group(2)) > 1 or re.search( - "([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", input): + r"([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", input): # Search for and remove range - interval_range = re.compile("([0-9]+)_([0-9]+)") + interval_range = re.compile(r"([0-9]+)_([0-9]+)") if interval_range.search(not_sub): m = not_sub_find.search(not_sub) start = m.group(1) @@ -633,7 +633,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr insert = split_greater[1] remainder = split_greater[0] # Split remainder using matches - r = re.compile("([0-9]+)([GATCgatc]+)") + r = re.compile(r"([0-9]+)([GATCgatc]+)") try: m = r.search(remainder) start = m.group(1) @@ -643,7 +643,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_re_try = self.hp.parse_hgvs_variant(re_try) hgvs_re_try.posedit.edit.ref = delete start_pos = str(hgvs_re_try.posedit.pos.start) - if re.search('\-', start_pos): + if re.search(r'\-', start_pos): base, offset = start_pos.split('-') new_offset = 0 - int(offset) + (len(delete)) end_pos = int(base) @@ -651,7 +651,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_re_try.posedit.pos.end.offset = int(new_offset) - 1 not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert - elif re.search('\+', start_pos): + elif re.search(r'\+', start_pos): base, offset = start_pos.split('+') end_pos = int(base) + (len(delete) - int(offset) - 1) new_offset = 0 + int(offset) + (len(delete) - 1) @@ -671,7 +671,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_not_delins = self.hp.parse_hgvs_variant(not_delins) except hgvs.exceptions.HGVSError as e: # Sort out multiple ALTS from VCF inputs - if re.search("([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): + if re.search(r"([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): header, alts = not_delins.split('>') # Split up the alts into a list alt_list = alts.split(',') @@ -733,8 +733,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continues validation If not, an error message is generated and the loop continues """ - edit_pass = re.compile('_\d+$') - edit_fail = re.compile('\d+$') + edit_pass = re.compile(r'_\d+$') + edit_fail = re.compile(r'\d+$') if edit_fail.search(input): if edit_pass.search(input): pass @@ -751,7 +751,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSError as e: error = str(e) error = 'The syntax of the input variant description is invalid ' - if re.search('ins\d+', failed): + if re.search(r'ins\d+', failed): issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' error = error + ' please refer to ' + issue_link validation['warnings'] = validation['warnings'] + error @@ -780,8 +780,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs can now parse the string into an hgvs variant object and manipulate it """ caution = '' - compounder = re.compile('\(NM_') - compounder_b = re.compile('\(ENST') + compounder = re.compile(r'\(NM_') + compounder_b = re.compile(r'\(ENST') if compounder.search(input): # Find pattern e.g. +0000 and assign to a variable transy = re.search(r"(NM_.+)", input) @@ -800,20 +800,20 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr containing intronic variant descriptions. In such instances, allele descriptions should be re-submitted by the user at the gene or genome level """ - if (re.search(':[gcnr].\[', input) and re.search('\;', input)) or ( - re.search(':[gcrn].\d+\[', input) and re.search('\;', input)) or (re.search('\(\;\)', input)): + if (re.search(r':[gcnr].\[', input) and re.search(r'\;', input)) or ( + re.search(r':[gcrn].\d+\[', input) and re.search(r'\;', input)) or (re.search(r'\(\;\)', input)): # handle LRG inputs - if re.match('^LRG', input): - if re.match('^LRG\d+', input): + if re.match(r'^LRG', input): + if re.match(r'^LRG\d+', input): string, remainder = input.split(':') reference = string.replace('LRG', 'LRG_') input = reference + ':' + remainder caution = string + ' updated to ' + reference - if not re.match('^LRG_\d+', input): + if not re.match(r'^LRG_\d+', input): pass - elif re.match('^LRG_\d+:g.', input) or re.match('^LRG_\d+:p.', input) or re.match('^LRG_\d+:c.', + elif re.match(r'^LRG_\d+:g.', input) or re.match(r'^LRG_\d+:p.', input) or re.match(r'^LRG_\d+:c.', input) or re.match( - '^LRG_\d+:n.', input): + r'^LRG_\d+:n.', input): lrg_reference, variation = input.split(':') refseqgene_reference = self.db.get_RefSeqGeneID_from_lrgID(lrg_reference) if refseqgene_reference != 'none': @@ -824,8 +824,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation validation['warnings'] = validation['warnings'] + ': ' + str(caution) logger.warning(str(caution)) - elif re.match('^LRG_\d+t\d+:c.', input) or re.match('^LRG_\d+t\d+:n.', input) or re.match( - '^LRG_\d+t\d+:p.', input) or re.match('^LRG_\d+t\d+:g.', input): + elif re.match(r'^LRG_\d+t\d+:c.', input) or re.match(r'^LRG_\d+t\d+:n.', input) or re.match( + r'^LRG_\d+t\d+:p.', input) or re.match(r'^LRG_\d+t\d+:g.', input): lrg_reference, variation = input.split(':') refseqtranscript_reference = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID( lrg_reference) @@ -894,7 +894,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Check the initial validity of the input if formatted == 'invalid': - if re.search('\w+\:[gcnmrp]', input) and not re.search('\w+\:[gcnmrp]\.', input): + if re.search(r'\w+\:[gcnmrp]', input) and not re.search(r'\w+\:[gcnmrp]\.', input): error = 'Variant description ' + input + ' lacks the . character between and in the expected pattern :.' else: error = 'Variant description ' + input + ' is not in an accepted format' @@ -906,7 +906,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr variant = formatted['variant'] input = formatted['variant'] stash_input = formatted['variant'] - type = formatted['type'] + format_type = formatted['type'] logger.trace("Variant input formatted, proceeding to validate.", validation) # Conversions """ @@ -922,7 +922,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Primary check that hgvs will accept the variant error = 'false' # Change RNA bases to upper case but nothing else - if type == ":r.": + if format_type == ":r.": variant = variant.upper() variant = variant.replace(':R.', ':r.') # lowercase the supported variant types @@ -1028,7 +1028,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr Also identifies some variants which span into the downstream sequence i.e. out of bounds """ - astr = re.compile('\*') + astr = re.compile(r'\*') if astr.search(str(input_parses.posedit)): input_parses_copy = copy.deepcopy(input_parses) input_parses_copy.type = "c" @@ -1055,10 +1055,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass # Catch missing version number in refseq - ref_type = re.compile("^N\w\w\d") - is_version = re.compile("\d\.\d") - en_type = re.compile('^ENS') - lrg_type = re.compile('LRG') + ref_type = re.compile(r"^N\w\w\d") + is_version = re.compile(r"\d\.\d") + en_type = re.compile(r'^ENS') + lrg_type = re.compile(r'LRG') if (ref_type.search(str(input_parses)) and is_version.search(str(input_parses))) or ( en_type.search(str(input_parses))): pass @@ -1077,17 +1077,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr equivalent RefSeq identifiers. The lookup data is stored in the VariantValidator MySQL database """ - if re.match('^LRG', str(input_parses)): - if re.match('^LRG\d+', str(input_parses.ac)): + if re.match(r'^LRG', str(input_parses)): + if re.match(r'^LRG\d+', str(input_parses.ac)): string = str(input_parses.ac) reference = string.replace('LRG', 'LRG_') input_parses.ac = reference caution = string + ' updated to ' + reference - if not re.match('^LRG_\d+', str(input_parses)): + if not re.match(r'^LRG_\d+', str(input_parses)): pass - elif re.match('^LRG_\d+:g.', str(input_parses)) or re.match('^LRG_\d+:p.', + elif re.match(r'^LRG_\d+:g.', str(input_parses)) or re.match(r'^LRG_\d+:p.', str(input_parses)) or re.match( - '^LRG_\d+:c.', str(input_parses)) or re.match('^LRG_\d+:n.', str(input_parses)): + r'^LRG_\d+:c.', str(input_parses)) or re.match(r'^LRG_\d+:n.', str(input_parses)): lrg_reference, variation = str(input_parses).split(':') refseqgene_reference = self.db.get_RefSeqGeneID_from_lrgID(lrg_reference) if refseqgene_reference != 'none': @@ -1101,9 +1101,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation validation['warnings'] = validation['warnings'] + ': ' + str(caution) logger.warning(str(caution)) - elif re.match('^LRG_\d+t\d+:c.', str(input_parses)) or re.match('^LRG_\d+t\d+:n.', + elif re.match(r'^LRG_\d+t\d+:c.', str(input_parses)) or re.match(r'^LRG_\d+t\d+:n.', str(input_parses)) or re.match( - '^LRG_\d+t\d+:p.', str(input_parses)) or re.match('^LRG_\d+t\d+:g.', str(input_parses)): + r'^LRG_\d+t\d+:p.', str(input_parses)) or re.match(r'^LRG_\d+t\d+:g.', str(input_parses)): lrg_reference, variation = str(input_parses).split(':') refseqtranscript_reference = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID( lrg_reference) @@ -1126,21 +1126,21 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr Evolving list of common mistakes, see sections below """ # NM_ .g - if (re.search('^NM_', variant) or re.search('^NR_', variant)) and re.search(':g.', variant): + if (re.search(r'^NM_', variant) or re.search(r'^NR_', variant)) and re.search(r':g.', variant): suggestion = input.replace(':g.', ':c.') error = 'Transcript reference sequence input as genomic (g.) reference sequence. Did you mean ' + suggestion + '?' validation['warnings'] = validation['warnings'] + ': ' + error logger.warning(error) continue # NR_ c. - if re.search('^NR_', input) and re.search(':c.', input): + if re.search(r'^NR_', input) and re.search(r':c.', input): suggestion = input.replace(':c.', ':n.') error = 'Non-coding transcript reference sequence input as coding (c.) reference sequence. Did you mean ' + suggestion + '?' validation['warnings'] = validation['warnings'] + ': ' + error logger.warning(error) continue # NM_ n. - if re.search('^NM_', input) and re.search(':n.', input): + if re.search(r'^NM_', input) and re.search(r':n.', input): suggestion = input.replace(':n.', ':c.') error = 'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. Did you mean ' + suggestion + '?' validation['warnings'] = validation['warnings'] + ': ' + error @@ -1148,8 +1148,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # NM_ NC_ NG_ NR_ p. - if (re.search('^NM_', variant) or re.search('^NR_', variant) or re.search('^NC_', variant) or re.search( - '^NG_', variant)) and re.search(':p.', variant): + if (re.search(r'^NM_', variant) or re.search(r'^NR_', variant) or re.search(r'^NC_', variant) or re.search( + r'^NG_', variant)) and re.search(r':p.', variant): issue_link = 'http://varnomen.hgvs.org/recommendations/protein/' error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' validation['warnings'] = validation['warnings'] + ': ' + error @@ -1157,7 +1157,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # NG_ c or NC_c.. - if (re.search('^NG_', variant) or re.search('^NC_', variant)) and re.search(':c.', variant): + if (re.search(r'^NG_', variant) or re.search(r'^NC_', variant)) and re.search(r':c.', variant): suggestion = ': For additional assistance, submit ' + str(variant) + ' to VariantValidator' error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' + suggestion validation['warnings'] = validation['warnings'] + ': ' + error @@ -1207,14 +1207,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass() elif input_parses.type == 'c': - if re.search('\*', str(input_parses)) or re.search('c.\-', str(input_parses)): + if re.search(r'\*', str(input_parses)) or re.search(r'c.\-', str(input_parses)): # Catch variation in UTRs # These should be in the sequence so can be directly validated. Need to pass to n. try: self.vr.validate(input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('datums is ill-defined', error): + if re.search(r'datums is ill-defined', error): called_ref = input_parses.posedit.edit.ref try: to_n = evm.c_to_n(input_parses) @@ -1238,25 +1238,25 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hn.normalize(input_parses) except hgvs.exceptions.HGVSError as e: fn.exceptPass() - if re.search('bounds', str(e)): + if re.search('bounds', str(error)): try: identity_info = self.hdp.get_tx_identity_info(input_parses.ac) ref_start = identity_info[3] ref_end = identity_info[4] - if re.match('-', str( + if re.match(r'-', str( input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: # upstream positions boundary = int('-' + str(ref_start)) remainder = int(str(input_parses.posedit.pos.start)) - boundary input_parses.posedit.pos.start.base = boundary input_parses.posedit.pos.start.offset = remainder - if re.match('-', str( + if re.match(r'-', str( input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: boundary = int('-' + str(ref_start)) remainder = int(str(input_parses.posedit.pos.end)) - boundary input_parses.posedit.pos.end.base = boundary input_parses.posedit.pos.end.offset = remainder - if re.match('\*', str( + if re.match(r'\*', str( input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: # downstream positions tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') @@ -1265,7 +1265,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input_parses.posedit.pos.start.base = boundary offset = int(tot_end_pos) - int(boundary) input_parses.posedit.pos.start.offset = offset - if re.match('\*', str( + if re.match(r'\*', str( input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') ts_seq = self.sf.fetch_seq(input_parses.ac) @@ -1303,7 +1303,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning(str(e)) continue - if re.search('n.1-', str(input_parses)): + if re.search(r'n.1-', str(input_parses)): input_parses = evm.n_to_c(input_parses) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' genomic_position = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, @@ -1319,7 +1319,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input_parses = evm.n_to_c(input_parses) # Intronic positions in UTRs - if re.search('\d\-\d', str(input_parses)) or re.search('\d\+\d', str(input_parses)): + if re.search(r'\d\-\d', str(input_parses)) or re.search(r'\d\+\d', str(input_parses)): # Can we go c-g-c try: to_genome = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, @@ -1332,18 +1332,18 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr identity_info = self.hdp.get_tx_identity_info(input_parses.ac) ref_start = identity_info[3] ref_end = identity_info[4] - if re.match('-', str(input_parses.posedit.pos.start)): + if re.match(r'-', str(input_parses.posedit.pos.start)): # upstream positions boundary = int('-' + str(ref_start)) remainder = int(str(input_parses.posedit.pos.start)) - boundary input_parses.posedit.pos.start.base = boundary input_parses.posedit.pos.start.offset = remainder - if re.match('-', str(input_parses.posedit.pos.end)): + if re.match(r'-', str(input_parses.posedit.pos.end)): boundary = int('-' + str(ref_start)) remainder = int(str(input_parses.posedit.pos.end)) - boundary input_parses.posedit.pos.end.base = boundary input_parses.posedit.pos.end.offset = remainder - if re.match('\*', str(input_parses.posedit.pos.start)): + if re.match(r'\*', str(input_parses.posedit.pos.start)): # downstream positions tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') ts_seq = self.sf.fetch_seq(input_parses.ac) @@ -1353,7 +1353,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tot_end_pos = int(te1) + int(te2) offset = int(tot_end_pos) - int(boundary) input_parses.posedit.pos.start.offset = offset - if re.match('\*', str(input_parses.posedit.pos.end)): + if re.match(r'\*', str(input_parses.posedit.pos.end)): tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') ts_seq = self.sf.fetch_seq(input_parses.ac) boundary = len(ts_seq) - ref_end @@ -1394,7 +1394,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning(str(error)) continue - elif re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): + elif re.search(r'\d\-', str(input_parses)) or re.search(r'\d\+', str(input_parses)): # Quick look at syntax validation try: self.vr.validate(input_parses) @@ -1548,7 +1548,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif input_parses.type == 'n': - if re.search('\+', str(input_parses)) or re.search('\-', str(input_parses)): + if re.search(r'\+', str(input_parses)) or re.search(r'\-', str(input_parses)): # Catch variation in UTRs # These should be in the sequence so can be directly validated. Need to pass to n. try: @@ -1601,7 +1601,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning(str(error)) continue - if re.search('n.1-', str(input_parses)): + if re.search(r'n.1-', str(input_parses)): error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' genomic_position = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, hn) @@ -1612,7 +1612,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: pass - if re.search('\d\-', str(input_parses)) or re.search('\d\+', str(input_parses)): + if re.search(r'\d\-', str(input_parses)) or re.search(r'\d\+', str(input_parses)): # Quick look at syntax validation try: self.vr.validate(input_parses) @@ -1777,7 +1777,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr """ Reformat m. into the new HGVS standard which is now m again! """ - if type == ':m.' or re.match('NC_012920.1', str(input_parses.ac)) or re.match('NC_001807.4', + if format_type == ':m.' or re.match('NC_012920.1', str(input_parses.ac)) or re.match('NC_001807.4', str(input_parses.ac)): hgvs_mito = copy.deepcopy(input_parses) if (re.match('NC_012920.1', str(hgvs_mito.ac)) and hgvs_mito.type == 'g') or ( @@ -1811,7 +1811,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass # handle :p. - if type == ':p.': + if format_type == ':p.': error = 'false' # Try to validate the variant try: @@ -1859,10 +1859,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr convert r, into c. """ trapped_input = input - if type == ':r.': + if format_type == ':r.': hgvs_input = self.hp.parse_hgvs_variant(input) # Traps the hgvs variant of r. for further use # Change to coding variant - type = ':c.' + format_type = ':c.' # Change input to reflect! try: hgvs_c = self.va_func.hgvs_r_to_c(hgvs_input) @@ -1880,7 +1880,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr """ Identifies the transcript reference sequence name and HGNC gene symbol """ - if (type != ':g.'): + if (format_type != ':g.'): error = 'false' hgvs_vt = self.hp.parse_hgvs_variant(variant) try: @@ -2036,7 +2036,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr all relevant transcripts """ - if (type == ':g.'): + if (format_type == ':g.'): g_query = self.hp.parse_hgvs_variant(variant) # Genomic coordinates can be validated immediately @@ -2108,7 +2108,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if len(rel_var) == 0: # Check for NG_ - rsg = re.compile('^NG_') + rsg = re.compile(r'^NG_') if rsg.search(variant): # parse hgvs_refseqgene = self.hp.parse_hgvs_variant(variant) @@ -2264,7 +2264,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if select_transcripts != 'all': tx_ac = saved_hgvs_coding.ac # If it's in the selected tx dict, keep it - if tx_ac.split('.')[0] in select_transcripts_dict.keys(): + if tx_ac.split('.')[0] in list(select_transcripts_dict.keys()): pass # If not get rid of it! else: @@ -2345,9 +2345,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr intronic_variant = 'true' if intronic_variant != 'hard_fail': - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: @@ -2361,9 +2361,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: intronic_variant = 'true' - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str(hgvs_seek_var.posedit.pos)): + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -2462,11 +2462,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create normalized version of tx_hgvs_not_delins rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) # Check for +ve base and adjust - if (re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', + if (re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str( rn_tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) or re.search( - '\-', str(rn_tx_hgvs_not_delins.posedit.pos.end))): + re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) or re.search( + r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end))): # Remove offsetting to span the gap rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 @@ -2476,7 +2476,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: fn.exceptPass() - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): # move tx end base to next available non-offset base rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 @@ -2490,7 +2490,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.edit.ref = '' @@ -2508,7 +2508,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # pass # Check for -ve base and adjust - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\-', + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', str( rn_tx_hgvs_not_delins.posedit.pos.start)): # Remove offsetting to span the gap @@ -2520,7 +2520,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: fn.exceptPass() - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): # move tx end base back to next available non-offset base rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 # Delete the ref @@ -2539,7 +2539,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 @@ -2639,10 +2639,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str( tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search('\-', + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str( tx_hgvs_not_delins.posedit.pos.end))): gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) @@ -2660,13 +2660,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant_delins_from_dup) # Identify which half of the NOT-intron the start position of the variant is in - if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') tx_gap_fill_variant.posedit.edit.alt = '' tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') @@ -2751,7 +2751,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr alternate_sequence_bases = [] for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in alt_base_dict.keys(): + if integer in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[integer]) else: alternate_sequence_bases.append(ref_base_dict[integer]) @@ -2775,7 +2775,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match('\-', str(for_location_c.posedit.pos.start.offset)): + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base else: @@ -2811,8 +2811,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSInvalidVariantError: fn.exceptPass() - if re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.end)): + if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( disparity_deletion_in[ @@ -2854,8 +2854,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( @@ -2898,9 +2898,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.end)): + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( disparity_deletion_in[ @@ -2942,8 +2942,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( @@ -3137,7 +3137,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # TYPE = :c. - if type == ':c.' or type == ':n.': + if format_type == ':c.' or format_type == ':n.': # Flag for validation valid = 'false' @@ -3148,7 +3148,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Do we keep it? if select_transcripts != 'all': - if tx_ac in select_transcripts_dict_plus_version.keys(): + if tx_ac in list(select_transcripts_dict_plus_version.keys()): pass # If not get rid of it! else: @@ -3214,12 +3214,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # INTRONIC OFFSETS - Required for Exon table # Variable to collect offset to exon boundary ex_offset = 0 - plus = re.compile("\d\+\d") # finds digit + digit - minus = re.compile("\d\-\d") # finds digit - digit + plus = re.compile(r"\d\+\d") # finds digit + digit + minus = re.compile(r"\d\-\d") # finds digit - digit - geno = re.compile(':g.') + geno = re.compile(r':g.') if plus.search(input) or minus.search(input): - es = re.compile('error') + es = re.compile(r'error') if es.search(str(to_g)): if alt_aln_method != 'genebuild': error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g @@ -3252,7 +3252,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif geno.search(input): if plus.search(variant) or minus.search(variant): to_g = self.genomic(variant, no_norm_evm, primary_assembly,hn) - es = re.compile('error') + es = re.compile(r'error') if es.search(str(to_g)): if alt_aln_method != 'genebuild': error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g @@ -3329,7 +3329,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr cck = 'false' if (plus.search(input)): # Regular expression catches the start of the interval only based on .00+00 pattern - inv_start = re.compile("\.\d+\+\d") + inv_start = re.compile(r"\.\d+\+\d") if (inv_start.search(input)): # Find pattern e.g. +0000 and assign to a variable off_value = re.search(r"(\+\d+)", input) @@ -3339,7 +3339,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr cck = 'true' if (minus.search(input)): # Regular expression catches the start of the interval only based on .00-00 pattern - inv_start = re.compile("\.\d+\-\d") + inv_start = re.compile(r"\.\d+\-\d") if (inv_start.search(input)): # Find pattern e.g. -0000 and assign to a variable off_value = re.search(r"(\-\d+)", input) @@ -3709,8 +3709,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr trapped_input) # Traps the hgvs variant of r. for further use inp = str(self.hgvs_r_to_c(hgvs_rna_input)) # Regex - plus = re.compile("\d\+\d") # finds digit + digit - minus = re.compile("\d\-\d") # finds digit - digit + plus = re.compile(r"\d\+\d") # finds digit + digit + minus = re.compile(r"\d\-\d") # finds digit - digit if plus.search(input) or minus.search(input): to_g = self.genomic(inp, no_norm_evm, primary_assembly,hn) es = re.compile('error') @@ -3873,11 +3873,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'false' genomic_validation = str( self.genomic(input, no_norm_evm, primary_assembly,hn) ) - del_end = re.compile('\ddel$') - delins = re.compile('delins') - inv = re.compile('inv') + del_end = re.compile(r'\ddel$') + delins = re.compile(r'delins') + inv = re.compile(r'inv') if fn.valstr(pre_valid) != fn.valstr(post_valid): - if type != ':g.': + if format_type != ':g.': if caution == '': caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) else: @@ -3905,7 +3905,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # v0.1a1 edit if fn.valstr(pre_valid) != fn.valstr(post_valid): - if type == ':g.': + if format_type == ':g.': if caution == '': caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) else: @@ -4459,9 +4459,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr intronic_variant = 'true' if intronic_variant != 'hard_fail': - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: @@ -4475,9 +4475,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: intronic_variant = 'true' - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str(hgvs_seek_var.posedit.pos)): + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -4575,8 +4575,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) # Check for +1 base and adjust - if re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\+', + if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\+', str( rn_tx_hgvs_not_delins.posedit.pos.start)): # Remove offsetting to span the gap @@ -4589,7 +4589,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except: pass - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): # move tx end base to next available non-offset base rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 @@ -4606,7 +4606,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr str(saved_hgvs_coding.ac)) # tx_hgvs_not_delins = rn_tx_hgvs_not_delins - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.edit.ref = '' @@ -4624,8 +4624,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # pass # Check for -ve base and adjust - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\-', + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\-', str( rn_tx_hgvs_not_delins.posedit.pos.start)): # Remove offsetting to span the gap @@ -4637,7 +4637,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: pass - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 # Delete the ref rn_tx_hgvs_not_delins.posedit.edit.ref = '' @@ -4655,7 +4655,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 @@ -4806,12 +4806,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr suppress_c_normalization = 'true' # amend_RefSeqGene = 'true' # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - '\-', + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + r'\-', str( tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - '\-', + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + r'\-', str( tx_hgvs_not_delins.posedit.pos.end))): @@ -4831,13 +4831,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant_delins_from_dup) # Identify which half of the NOT-intron the start position of the variant is in - if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') tx_gap_fill_variant.posedit.edit.alt = '' tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') @@ -4924,7 +4924,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr alternate_sequence_bases = [] for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in alt_base_dict.keys(): + if integer in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[integer]) else: alternate_sequence_bases.append(ref_base_dict[integer]) @@ -4948,7 +4948,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match('\-', str(for_location_c.posedit.pos.start.offset)): + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base else: @@ -4985,9 +4985,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSInvalidVariantError: fn.exceptPass() - if re.search('\+', + if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.end)): + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( disparity_deletion_in[ @@ -5030,9 +5030,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', + elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.start)): + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( @@ -5077,9 +5077,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.end)): + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( disparity_deletion_in[ @@ -5122,9 +5122,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start)): + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( @@ -5267,7 +5267,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr info_out = [] info_out.append( 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + primary_assembly) - for ky in info_keys.keys(): + for ky in list(info_keys.keys()): info_out.append(ky) auto_info = '\n'.join(info_out) auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' @@ -5495,7 +5495,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create normalized version of tx_hgvs_not_delins rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) # Check for +ve base and adjust - if re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\+', + if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\+', str( rn_tx_hgvs_not_delins.posedit.pos.start)): # Remove offsetting to span the gap @@ -5508,7 +5508,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except: fn.exceptPass() - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): # move tx end base to next available non-offset base rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 @@ -5522,7 +5522,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins.posedit.edit.ref = '' # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 @@ -5540,7 +5540,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # pass # Check for -ve base and adjust - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search('\-', + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', str( rn_tx_hgvs_not_delins.posedit.pos.start)): # Remove offsetting to span the gap @@ -5552,7 +5552,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: fn.exceptPass() - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): # move tx end base back to next available non-offset base # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 @@ -5572,7 +5572,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr primary_assembly, hn) rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 @@ -5702,10 +5702,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search('\-', + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str( tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search('\-', + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str( tx_hgvs_not_delins.posedit.pos.end))): gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) @@ -5724,13 +5724,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant_delins_from_dup) # Identify which half of the NOT-intron the start position of the variant is in - if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') tx_gap_fill_variant.posedit.edit.alt = '' tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') @@ -5815,7 +5815,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr alternate_sequence_bases = [] for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in alt_base_dict.keys(): + if integer in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[integer]) else: alternate_sequence_bases.append(ref_base_dict[integer]) @@ -5839,7 +5839,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match('\-', str(for_location_c.posedit.pos.start.offset)): + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base else: @@ -5876,8 +5876,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSInvalidVariantError: fn.exceptPass() - if re.search('\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.end)): + if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( disparity_deletion_in[ @@ -5919,8 +5919,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\+', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( @@ -5963,9 +5963,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.end)): + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( disparity_deletion_in[ @@ -6007,8 +6007,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( @@ -6342,7 +6342,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('does not agree with reference sequence', str(error)): - match = re.findall('\(([GATC]+)\)', error) + match = re.findall(r'\(([GATC]+)\)', error) new_ref = match[1] hgvs_updated.posedit.edit.ref = new_ref self.vr.validate(hgvs_updated) @@ -6393,7 +6393,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr by_order = sorted(batch_list, key=itemgetter('order')) for valid in by_order: - if 'write' in valid.keys(): + if 'write' in list(valid.keys()): if valid['write'] == 'true': # Blank VCF # chr = '' @@ -7079,11 +7079,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr intronic_variant = 'true' if intronic_variant != 'hard_fail': - if re.search('\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search('\d+\-', + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( hgvs_seek_var.posedit.pos)) or re.search( - '\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search('\*\d+\-', str( + r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: @@ -7191,9 +7191,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create normalized version of tx_hgvs_not_delins rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) # Check for +1 base and adjust - if re.search('\+', + if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\+', + r'\+', str( rn_tx_hgvs_not_delins.posedit.pos.start)): # Remove offsetting to span the gap @@ -7206,7 +7206,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except: fn.exceptPass() - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): # move tx end base to next available non-offset base rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 @@ -7221,7 +7221,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( saved_hgvs_coding.ac)) - elif re.search('\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.edit.ref = '' @@ -7240,10 +7240,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # pass # Check for -ve base and adjust - elif re.search('\-', + elif re.search(r'\-', str( rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - '\-', + r'\-', str( rn_tx_hgvs_not_delins.posedit.pos.start)): # Remove offsetting to span the gap @@ -7255,7 +7255,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: fn.exceptPass() - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): # move tx end base back to next available non-offset base rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 # Delete the ref @@ -7275,7 +7275,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, str( saved_hgvs_coding.ac)) - elif re.search('\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 @@ -7427,12 +7427,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if disparity_deletion_in[0] == 'transcript': # amend_RefSeqGene = 'true' # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search('\+', + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search('\+', + r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - '\-', str(tx_hgvs_not_delins.posedit.pos.end))): + r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): gapped_transcripts = gapped_transcripts + ' ' + str( tx_hgvs_not_delins.ac) @@ -7450,7 +7450,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr tx_gap_fill_variant_delins_from_dup) # Identify which half of the NOT-intron the start position of the variant is in - if re.search('\-', str(tx_gap_fill_variant.posedit.pos.start)): + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 tx_gap_fill_variant.posedit.pos.start.offset = int( '0') # int('+1') @@ -7458,7 +7458,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr '0') # int('-1') tx_gap_fill_variant.posedit.edit.alt = '' tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search('\+', str(tx_gap_fill_variant.posedit.pos.start)): + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): tx_gap_fill_variant.posedit.pos.start.offset = int( '0') # int('+1') tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 @@ -7550,7 +7550,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_gap_fill_variant.posedit.pos.start.base, genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in alt_base_dict.keys(): + if integer in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[integer]) else: alternate_sequence_bases.append(ref_base_dict[integer]) @@ -7575,7 +7575,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match('\-', str(for_location_c.posedit.pos.start.offset)): + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base else: @@ -7613,9 +7613,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSInvalidVariantError: fn.exceptPass() - if re.search('\+', str( + if re.search(r'\+', str( tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\+', + r'\+', str( tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str( @@ -7661,8 +7661,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\+', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search('\+', + elif re.search(r'\+', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', str( tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( @@ -7709,9 +7709,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str( + elif re.search(r'\-', str( tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - '\-', + r'\-', str( tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str( @@ -7757,8 +7757,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search('\-', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search('\-', + elif re.search(r'\-', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', str( tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( @@ -7943,7 +7943,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue if multi_g != []: - multi_g.sort() + print((multi_g, type(multi_g))) + multi_gen_vars = multi_g # '|'.join(multi_g) else: multi_gen_vars = [] @@ -8045,7 +8046,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Warn not directly mapped to specified genome build if genomic_accession != '': caution = '' - if primary_assembly.lower() not in primary_genomic_dicts.keys(): + if primary_assembly.lower() not in list(primary_genomic_dicts.keys()): warnings = warnings + ': ' + str( hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + ': See alternative genomic loci or alternative genome builds for aligned genomic positions' @@ -8076,7 +8077,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if not 'Non-coding :n.' in predicted_protein_variant: try: format_p = predicted_protein_variant - format_p = re.sub('\(LRG_.+?\)', '', format_p) + format_p = re.sub(r'\(LRG_.+?\)', '', format_p) re_parse_protein = self.hp.parse_hgvs_variant(format_p) re_parse_protein_singleAA = fn.single_letter_protein(re_parse_protein) predicted_protein_variant_dict["slr"] = str(re_parse_protein_singleAA) @@ -8180,7 +8181,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Note: pyliftover uses the UCSC liftOver tool. # https://pypi.org/project/pyliftover/ genomic_position_info = valid_v['primary_assembly_loci'] - for g_p_key in genomic_position_info.keys(): + for g_p_key in list(genomic_position_info.keys()): # Identify the current build and hgvs_genomic descripsion if re.match('hg', g_p_key): @@ -8208,9 +8209,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Sort the respomse into primary assembly and ALT primary_assembly_loci = {} alt_genomic_loci = [] - for build_key, accession_dict in lifted_response.iteritems(): + for build_key, accession_dict in list(lifted_response.items()): try: - accession_key = accession_dict.keys()[0] + accession_key = list(accession_dict.keys())[0] if re.match('NC_', accession_dict[accession_key]['hgvs_genomic_description']): primary_assembly_loci[build_key.lower()] = accession_dict[accession_key] else: diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 08e00ab6..d2a4b49f 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -18,9 +18,9 @@ import re import copy #import io -from vvDatabase import vvDatabase -from vvLogging import logger -import vvFunctions as fn +from .vvDatabase import vvDatabase +from .vvLogging import logger +from . import vvFunctions as fn diff --git a/VariantValidator/simpleTestScript.py b/VariantValidator/simpleTestScript.py index a351b611..1ff732fc 100644 --- a/VariantValidator/simpleTestScript.py +++ b/VariantValidator/simpleTestScript.py @@ -1,7 +1,7 @@ import json import os -from VariantValidator import Validator +from .VariantValidator import Validator #variant = 'NM_000088.3:c.589G>T' variant = 'NC_000012.11:g.122064776delG' @@ -11,4 +11,4 @@ validator=Validator() out=Validator().validate(variant, selected_assembly, select_transcripts) -print json.dumps(out, sort_keys=True, indent=4, separators=(',', ': ')) +print((json.dumps(out, sort_keys=True, indent=4, separators=(',', ': ')))) diff --git a/VariantValidator/testing/vvTestCompare.py b/VariantValidator/testing/vvTestCompare.py index 8d013405..1887342a 100644 --- a/VariantValidator/testing/vvTestCompare.py +++ b/VariantValidator/testing/vvTestCompare.py @@ -6,6 +6,6 @@ testDirectories=["testOutputs"] for d in testDirectories: - print("Comparing "+masterDirectory+" and "+d) + print(("Comparing "+masterDirectory+" and "+d)) fn.compareBatches(masterDirectory,d) diff --git a/VariantValidator/testing/vvTestFunctions.py b/VariantValidator/testing/vvTestFunctions.py index 415dfe49..86f7e02d 100644 --- a/VariantValidator/testing/vvTestFunctions.py +++ b/VariantValidator/testing/vvTestFunctions.py @@ -48,7 +48,7 @@ def saveValidationsAsJSON(path,validations): jOut=json.dumps(validations) with open(path,"w") as f: f.write(jOut) - print("JSON saved to "+path) + print(("JSON saved to "+path)) def loadVariantFile(path): out=[] @@ -99,14 +99,14 @@ def validateBatch(variantArray,val): selectTranscripts='all' selectedAssembly='GRCh37' for i,v in enumerate(variantArray): - print("VALIDATING Variant"+str(i)+" "+str(i+1)+"/"+str(len(variantArray))+" "+str(v)) + print(("VALIDATING Variant"+str(i)+" "+str(i+1)+"/"+str(len(variantArray))+" "+str(v))) try: out.append(val.validate(v,selectedAssembly,selectTranscripts)) except KeyboardInterrupt: print("Exiting...") sys.exit() except Exception as e: - print("FATAL error processing variant: "+str(e)) + print(("FATAL error processing variant: "+str(e))) out.append({"ERROR":str(e)}) raise #debug - uncomment this line to ensure the test leaves a traceback and fails the first time there's a critical error. return out @@ -114,9 +114,9 @@ def validateBatch(variantArray,val): def retrieveVariant(validation): #Returns the variant string (if possible) from a validation. out=None - for v in validation.values(): + for v in list(validation.values()): try: - if type(v)==type({}) and "submitted_variant" in v.keys(): + if type(v)==type({}) and "submitted_variant" in list(v.keys()): out=v["submitted_variant"] return out except (KeyError, TypeError, AttributeError): @@ -126,32 +126,32 @@ def retrieveVariant(validation): def compareValidations(v1,v2,id): #print(v1,v2) #Remove metadata - v1Keys=v1.keys() + v1Keys=list(v1.keys()) if "metadata" in v1Keys: v1Keys.remove("metadata") else: - print("Variant "+str(id)+": metadata not found in first variant") - v2Keys=v2.keys() + print(("Variant "+str(id)+": metadata not found in first variant")) + v2Keys=list(v2.keys()) if "metadata" in v2Keys: v2Keys.remove("metadata") else: - print("Variant "+str(id)+": metadata not found in second variant") + print(("Variant "+str(id)+": metadata not found in second variant")) for vk in v1Keys: if not (vk in v2Keys): # print("tag "+vk+" : "+str(v1[vk])+" not found in second variant") - print("Variant "+str(id)+": Tag "+vk+" not found in second variant") + print(("Variant "+str(id)+": Tag "+vk+" not found in second variant")) return False for vk in v2Keys: if not (vk in v1Keys): # print("tag "+vk+" : "+str(v2[vk])+" not found in first variant") - print("Variant "+str(id)+": Tag "+vk+" not found in first variant") + print(("Variant "+str(id)+": Tag "+vk+" not found in first variant")) return False for vk in v1Keys: if not (v1[vk]==v2[vk]): if type(v1[vk])==type(dict()) or type(v2[vk])==type(dict()): - print("Variant " + str(id) + ": Different tag values for key " + str(vk)) + print(("Variant " + str(id) + ": Different tag values for key " + str(vk))) else: - print("Variant "+str(id)+": Different tag values - "+str(vk)+" : "+str(v1[vk])+" vs. "+str(vk)+" : "+str(v2[vk])) + print(("Variant "+str(id)+": Different tag values - "+str(vk)+" : "+str(v1[vk])+" vs. "+str(vk)+" : "+str(v2[vk]))) return False return True @@ -169,10 +169,10 @@ def compareBatches(v1path,v2path): passScore+=1 if passScore==len(v1batch): #Test passed. - print("Validation sets are identical, "+str(passScore)+" passed") + print(("Validation sets are identical, "+str(passScore)+" passed")) return True else: - print("Validation sets are NOT identical, passed " + str(passScore) + "/" + str(len(v1batch))) + print(("Validation sets are NOT identical, passed " + str(passScore) + "/" + str(len(v1batch)))) #for i,v in enumerate(v1batch): #if not outFlags[i]: #print("Mismatch in validation "+str(i)) diff --git a/VariantValidator/variantValidator.py b/VariantValidator/variantValidator.py index 9e69723a..cba84e94 100644 --- a/VariantValidator/variantValidator.py +++ b/VariantValidator/variantValidator.py @@ -1,4 +1,4 @@ -import modules.vvMixinCore as vvMixinCore +from .modules import vvMixinCore as vvMixinCore class Validation(): ''' diff --git a/test/test_inputs.py b/test/test_inputs.py index 64b187de..2fdf5cbe 100644 --- a/test/test_inputs.py +++ b/test/test_inputs.py @@ -9,9 +9,9 @@ def setup_class(cls): def test_variant1(self): variant = 'NM_015120.4:c.35T>C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_015120.4:c.35T>C' in results.keys() + assert 'NM_015120.4:c.35T>C' in list(results.keys()) assert results['NM_015120.4:c.35T>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.35T>C' assert results['NM_015120.4:c.35T>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.35T>C']['alt_genomic_loci'] == [] @@ -34,10 +34,10 @@ def test_variant1(self): def test_variant2(self): variant = 'NM_015120.4:c.39G>C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_015120.4:c.39G>C' in results.keys() + assert 'NM_015120.4:c.39G>C' in list(results.keys()) assert results['NM_015120.4:c.39G>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.39G>C' assert results['NM_015120.4:c.39G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.39G>C']['alt_genomic_loci'] == [] @@ -59,10 +59,10 @@ def test_variant2(self): def test_variant3(self): variant = 'NM_015120.4:c.34C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_015120.4:c.34C>T' in results.keys() + assert 'NM_015120.4:c.34C>T' in list(results.keys()) assert results['NM_015120.4:c.34C>T']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.34C>T' assert results['NM_015120.4:c.34C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.34C>T']['alt_genomic_loci'] == [] @@ -74,20 +74,20 @@ def test_variant3(self): assert results['NM_015120.4:c.34C>T']['hgvs_lrg_variant'] == 'LRG_741:g.5145C>T' assert results['NM_015120.4:c.34C>T']['hgvs_transcript_variant'] == 'NM_015120.4:c.34C>T' assert results['NM_015120.4:c.34C>T']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.5145C>T' - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '73613030', 'alt': u'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '73385902', 'alt': u'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '73613030', 'alt': u'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '73385902', 'alt': u'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '73613030', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '73385902', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '73613030', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '73385902', 'alt': 'T'}} assert results['NM_015120.4:c.34C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} def test_variant4(self): variant = 'NC_000002.11:g.73613030C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_015120.4:c.34C>T' in results.keys() + assert 'NM_015120.4:c.34C>T' in list(results.keys()) assert results['NM_015120.4:c.34C>T']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.34C>T' assert results['NM_015120.4:c.34C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.34C>T']['alt_genomic_loci'] == [] @@ -99,20 +99,20 @@ def test_variant4(self): assert results['NM_015120.4:c.34C>T']['hgvs_lrg_variant'] == 'LRG_741:g.5145C>T' assert results['NM_015120.4:c.34C>T']['hgvs_transcript_variant'] == 'NM_015120.4:c.34C>T' assert results['NM_015120.4:c.34C>T']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.5145C>T' - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '73613030', 'alt': u'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '73385902', 'alt': u'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '73613030', 'alt': u'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '73385902', 'alt': u'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '73613030', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '73385902', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '73613030', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '73385902', 'alt': 'T'}} assert results['NM_015120.4:c.34C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} def test_variant5(self): variant = 'NC_000023.10:g.33229673A>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000109.3:c.7+127703T>A' in results.keys() + assert 'NM_000109.3:c.7+127703T>A' in list(results.keys()) assert results['NM_000109.3:c.7+127703T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000109.3:c.7+127703T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000109.3:c.7+127703T>A']['alt_genomic_loci'] == [] @@ -124,13 +124,13 @@ def test_variant5(self): assert results['NM_000109.3:c.7+127703T>A']['hgvs_lrg_variant'] == '' assert results['NM_000109.3:c.7+127703T>A']['hgvs_transcript_variant'] == 'NM_000109.3:c.7+127703T>A' assert results['NM_000109.3:c.7+127703T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '33229673', 'alt': u'T'}} - assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '33211556', 'alt': u'T'}} - assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '33229673', 'alt': u'T'}} - assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '33211556', 'alt': u'T'}} + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '33229673', 'alt': 'T'}} + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '33211556', 'alt': 'T'}} + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '33229673', 'alt': 'T'}} + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '33211556', 'alt': 'T'}} assert results['NM_000109.3:c.7+127703T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000100.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000109.3'} - assert 'NM_004006.2:c.-244T>A' in results.keys() + assert 'NM_004006.2:c.-244T>A' in list(results.keys()) assert results['NM_004006.2:c.-244T>A']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.-244T>A' assert results['NM_004006.2:c.-244T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.-244T>A']['alt_genomic_loci'] == [] @@ -142,20 +142,20 @@ def test_variant5(self): assert results['NM_004006.2:c.-244T>A']['hgvs_lrg_variant'] == 'LRG_199:g.133054T>A' assert results['NM_004006.2:c.-244T>A']['hgvs_transcript_variant'] == 'NM_004006.2:c.-244T>A' assert results['NM_004006.2:c.-244T>A']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.133054T>A' - assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '33229673', 'alt': u'T'}} - assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '33211556', 'alt': u'T'}} - assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '33229673', 'alt': u'T'}} - assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '33211556', 'alt': u'T'}} + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '33229673', 'alt': 'T'}} + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '33211556', 'alt': 'T'}} + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '33229673', 'alt': 'T'}} + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '33211556', 'alt': 'T'}} assert results['NM_004006.2:c.-244T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant6(self): variant = 'NM_001145026.1:c.715A>G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001145026.1:c.715A>G' in results.keys() + assert 'NM_001145026.1:c.715A>G' in list(results.keys()) assert results['NM_001145026.1:c.715A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001145026.1:c.715A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001145026.1:c.715A>G']['alt_genomic_loci'] == [] @@ -167,19 +167,19 @@ def test_variant6(self): assert results['NM_001145026.1:c.715A>G']['hgvs_lrg_variant'] == '' assert results['NM_001145026.1:c.715A>G']['hgvs_transcript_variant'] == 'NM_001145026.1:c.715A>G' assert results['NM_001145026.1:c.715A>G']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['NM_001145026.1:c.715A>G']['primary_assembly_loci'].keys() - assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': 'chr12', 'ref': u'A', 'pos': '80460707', 'alt': u'G'}} - assert 'grch37' not in results['NM_001145026.1:c.715A>G']['primary_assembly_loci'].keys() - assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': '12', 'ref': u'A', 'pos': '80460707', 'alt': u'G'}} + assert 'hg19' not in list(results['NM_001145026.1:c.715A>G']['primary_assembly_loci'].keys()) + assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': 'chr12', 'ref': 'A', 'pos': '80460707', 'alt': 'G'}} + assert 'grch37' not in list(results['NM_001145026.1:c.715A>G']['primary_assembly_loci'].keys()) + assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': '12', 'ref': 'A', 'pos': '80460707', 'alt': 'G'}} assert results['NM_001145026.1:c.715A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001138498.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001145026.1'} def test_variant7(self): variant = 'NC_000016.9:g.2099572TC>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000548.4:c.138+821del' in results.keys() + assert 'NM_000548.4:c.138+821del' in list(results.keys()) assert results['NM_000548.4:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000548.4:c.138+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000548.4:c.138+821del']['alt_genomic_loci'] == [] @@ -197,7 +197,7 @@ def test_variant7(self): assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} assert results['NM_000548.4:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4'} - assert 'NM_001077183.2:c.138+821del' in results.keys() + assert 'NM_001077183.2:c.138+821del' in list(results.keys()) assert results['NM_001077183.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077183.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001077183.2:c.138+821del']['alt_genomic_loci'] == [] @@ -215,7 +215,7 @@ def test_variant7(self): assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} assert results['NM_001077183.2:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2'} - assert 'NM_001318831.1:c.-89+821del' in results.keys() + assert 'NM_001318831.1:c.-89+821del' in list(results.keys()) assert results['NM_001318831.1:c.-89+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318831.1:c.-89+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001318831.1:c.-89+821del']['alt_genomic_loci'] == [] @@ -233,7 +233,7 @@ def test_variant7(self): assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} assert results['NM_001318831.1:c.-89+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305760.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318831.1'} - assert 'NM_000548.3:c.138+821del' in results.keys() + assert 'NM_000548.3:c.138+821del' in list(results.keys()) assert results['NM_000548.3:c.138+821del']['hgvs_lrg_transcript_variant'] == 'LRG_487t1:c.138+821del' assert results['NM_000548.3:c.138+821del']['refseqgene_context_intronic_sequence'] == 'NG_005895.1(NM_000548.3):c.138+821del' assert results['NM_000548.3:c.138+821del']['alt_genomic_loci'] == [] @@ -246,12 +246,12 @@ def test_variant7(self): assert results['NM_000548.3:c.138+821del']['hgvs_transcript_variant'] == 'NM_000548.3:c.138+821del' assert results['NM_000548.3:c.138+821del']['hgvs_refseqgene_variant'] == 'NG_005895.1:g.5269del' assert results['NM_000548.3:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'hg38' not in results['NM_000548.3:c.138+821del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_000548.3:c.138+821del']['primary_assembly_loci'].keys()) assert results['NM_000548.3:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'grch38' not in results['NM_000548.3:c.138+821del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_000548.3:c.138+821del']['primary_assembly_loci'].keys()) assert results['NM_000548.3:c.138+821del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005895.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_487.xml'} - assert 'NM_001114382.1:c.138+821del' in results.keys() + assert 'NM_001114382.1:c.138+821del' in list(results.keys()) assert results['NM_001114382.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001114382.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001114382.1:c.138+821del']['alt_genomic_loci'] == [] @@ -264,12 +264,12 @@ def test_variant7(self): assert results['NM_001114382.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001114382.1:c.138+821del' assert results['NM_001114382.1:c.138+821del']['hgvs_refseqgene_variant'] == '' assert results['NM_001114382.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'hg38' not in results['NM_001114382.1:c.138+821del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001114382.1:c.138+821del']['primary_assembly_loci'].keys()) assert results['NM_001114382.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'grch38' not in results['NM_001114382.1:c.138+821del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001114382.1:c.138+821del']['primary_assembly_loci'].keys()) assert results['NM_001114382.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1'} - assert 'NM_001318832.1:c.171+821del' in results.keys() + assert 'NM_001318832.1:c.171+821del' in list(results.keys()) assert results['NM_001318832.1:c.171+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318832.1:c.171+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001318832.1:c.171+821del']['alt_genomic_loci'] == [] @@ -287,7 +287,7 @@ def test_variant7(self): assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} assert results['NM_001318832.1:c.171+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1'} - assert 'NM_001363528.1:c.138+821del' in results.keys() + assert 'NM_001363528.1:c.138+821del' in list(results.keys()) assert results['NM_001363528.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363528.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363528.1:c.138+821del']['alt_genomic_loci'] == [] @@ -300,13 +300,13 @@ def test_variant7(self): assert results['NM_001363528.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001363528.1:c.138+821del' assert results['NM_001363528.1:c.138+821del']['hgvs_refseqgene_variant'] == '' assert results['NM_001363528.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'hg38' not in results['NM_001363528.1:c.138+821del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363528.1:c.138+821del']['primary_assembly_loci'].keys()) assert results['NM_001363528.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'grch38' not in results['NM_001363528.1:c.138+821del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363528.1:c.138+821del']['primary_assembly_loci'].keys()) assert results['NM_001363528.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1'} assert results['flag'] == 'gene_variant' - assert 'NM_021055.2:c.138+821del' in results.keys() + assert 'NM_021055.2:c.138+821del' in list(results.keys()) assert results['NM_021055.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021055.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021055.2:c.138+821del']['alt_genomic_loci'] == [] @@ -319,12 +319,12 @@ def test_variant7(self): assert results['NM_021055.2:c.138+821del']['hgvs_transcript_variant'] == 'NM_021055.2:c.138+821del' assert results['NM_021055.2:c.138+821del']['hgvs_refseqgene_variant'] == '' assert results['NM_021055.2:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'hg38' not in results['NM_021055.2:c.138+821del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_021055.2:c.138+821del']['primary_assembly_loci'].keys()) assert results['NM_021055.2:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'grch38' not in results['NM_021055.2:c.138+821del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_021055.2:c.138+821del']['primary_assembly_loci'].keys()) assert results['NM_021055.2:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2'} - assert 'NM_001077183.1:c.138+821del' in results.keys() + assert 'NM_001077183.1:c.138+821del' in list(results.keys()) assert results['NM_001077183.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077183.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001077183.1:c.138+821del']['alt_genomic_loci'] == [] @@ -337,12 +337,12 @@ def test_variant7(self): assert results['NM_001077183.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001077183.1:c.138+821del' assert results['NM_001077183.1:c.138+821del']['hgvs_refseqgene_variant'] == '' assert results['NM_001077183.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'hg38' not in results['NM_001077183.1:c.138+821del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001077183.1:c.138+821del']['primary_assembly_loci'].keys()) assert results['NM_001077183.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'grch38' not in results['NM_001077183.1:c.138+821del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001077183.1:c.138+821del']['primary_assembly_loci'].keys()) assert results['NM_001077183.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1'} - assert 'NM_001318827.1:c.138+821del' in results.keys() + assert 'NM_001318827.1:c.138+821del' in list(results.keys()) assert results['NM_001318827.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318827.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001318827.1:c.138+821del']['alt_genomic_loci'] == [] @@ -360,7 +360,7 @@ def test_variant7(self): assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} assert results['NM_001318827.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1'} - assert 'NM_001114382.2:c.138+821del' in results.keys() + assert 'NM_001114382.2:c.138+821del' in list(results.keys()) assert results['NM_001114382.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001114382.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001114382.2:c.138+821del']['alt_genomic_loci'] == [] @@ -378,7 +378,7 @@ def test_variant7(self): assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} assert results['NM_001114382.2:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2'} - assert 'NM_001318829.1:c.-9-826del' in results.keys() + assert 'NM_001318829.1:c.-9-826del' in list(results.keys()) assert results['NM_001318829.1:c.-9-826del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318829.1:c.-9-826del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001318829.1:c.-9-826del']['alt_genomic_loci'] == [] @@ -400,10 +400,10 @@ def test_variant7(self): def test_variant8(self): variant = 'NM_000088.3:c.589GG>CT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589_590delinsCT' in results.keys() + assert 'NM_000088.3:c.589_590delinsCT' in list(results.keys()) assert results['NM_000088.3:c.589_590delinsCT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589_590delinsCT' assert results['NM_000088.3:c.589_590delinsCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589_590delinsCT']['alt_genomic_loci'] == [] @@ -415,19 +415,19 @@ def test_variant8(self): assert results['NM_000088.3:c.589_590delinsCT']['hgvs_lrg_variant'] == 'LRG_1:g.8638_8639delinsCT' assert results['NM_000088.3:c.589_590delinsCT']['hgvs_transcript_variant'] == 'NM_000088.3:c.589_590delinsCT' assert results['NM_000088.3:c.589_590delinsCT']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638_8639delinsCT' - assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275362_48275363delinsAG', 'vcf': {'chr': 'chr17', 'ref': 'CC', 'pos': '48275362', 'alt': u'AG'}} - assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198001_50198002delinsAG', 'vcf': {'chr': 'chr17', 'ref': 'CC', 'pos': '50198001', 'alt': u'AG'}} - assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275362_48275363delinsAG', 'vcf': {'chr': '17', 'ref': 'CC', 'pos': '48275362', 'alt': u'AG'}} - assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198001_50198002delinsAG', 'vcf': {'chr': '17', 'ref': 'CC', 'pos': '50198001', 'alt': u'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275362_48275363delinsAG', 'vcf': {'chr': 'chr17', 'ref': 'CC', 'pos': '48275362', 'alt': 'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198001_50198002delinsAG', 'vcf': {'chr': 'chr17', 'ref': 'CC', 'pos': '50198001', 'alt': 'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275362_48275363delinsAG', 'vcf': {'chr': '17', 'ref': 'CC', 'pos': '48275362', 'alt': 'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198001_50198002delinsAG', 'vcf': {'chr': '17', 'ref': 'CC', 'pos': '50198001', 'alt': 'AG'}} assert results['NM_000088.3:c.589_590delinsCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant9(self): variant = 'NM_000094.3:c.6751-2_6751-3del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -439,10 +439,10 @@ def test_variant9(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -450,9 +450,9 @@ def test_variant9(self): def test_variant10(self): variant = 'COL5A1:c.5071A>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -464,10 +464,10 @@ def test_variant10(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -475,9 +475,9 @@ def test_variant10(self): def test_variant11(self): variant = 'NG_007400.1:c.5071A>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -489,10 +489,10 @@ def test_variant11(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -500,9 +500,9 @@ def test_variant11(self): def test_variant12(self): variant = 'chr16:15832508_15832509delinsAC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_002474.2:c.3034_3035inv' in results.keys() + assert 'NM_002474.2:c.3034_3035inv' in list(results.keys()) assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] @@ -520,7 +520,7 @@ def test_variant12(self): assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} assert results['NM_002474.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2'} - assert 'NM_022844.2:c.3034_3035inv' in results.keys() + assert 'NM_022844.2:c.3034_3035inv' in list(results.keys()) assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_022844.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] @@ -538,7 +538,7 @@ def test_variant12(self): assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} assert results['NM_022844.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2'} - assert 'NM_001040114.1:c.3055_3056inv' in results.keys() + assert 'NM_001040114.1:c.3055_3056inv' in list(results.keys()) assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040114.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] @@ -557,7 +557,7 @@ def test_variant12(self): assert results['NM_001040114.1:c.3055_3056inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001040113.1:c.3055_3056inv' in results.keys() + assert 'NM_001040113.1:c.3055_3056inv' in list(results.keys()) assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] @@ -579,10 +579,10 @@ def test_variant12(self): def test_variant13(self): variant = 'NM_000088.3:c.589-1GG>G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-1_589delinsG' in results.keys() + assert 'NM_000088.3:c.589-1_589delinsG' in list(results.keys()) assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590del' assert results['NM_000088.3:c.589-1_589delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.590del' assert results['NM_000088.3:c.589-1_589delinsG']['alt_genomic_loci'] == [] @@ -604,10 +604,10 @@ def test_variant13(self): def test_variant14(self): variant = 'NM_000088.3:c.642+1GT>G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.642+1_642+2delinsG' in results.keys() + assert 'NM_000088.3:c.642+1_642+2delinsG' in list(results.keys()) assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' assert results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'] == [] @@ -629,10 +629,10 @@ def test_variant14(self): def test_variant15(self): variant = 'NM_000088.3:c.589-2AG>G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-2_589-1delinsG' in results.keys() + assert 'NM_000088.3:c.589-2_589-1delinsG' in list(results.keys()) assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' assert results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'] == [] @@ -654,10 +654,10 @@ def test_variant15(self): def test_variant16(self): variant = 'NC_000017.10:g.48279242G>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'intergenic' - assert 'Intergenic_Variant_1' in results.keys() + assert 'Intergenic_Variant_1' in list(results.keys()) assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' assert results['Intergenic_Variant_1']['alt_genomic_loci'] == [] @@ -669,19 +669,19 @@ def test_variant16(self): assert results['Intergenic_Variant_1']['hgvs_lrg_variant'] == 'LRG_1:g.4759C>A' assert results['Intergenic_Variant_1']['hgvs_transcript_variant'] == '' assert results['Intergenic_Variant_1']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.4759C>A' - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': u'NC_000017.10:g.48279242G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': u'NC_000017.11:g.50201881G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': u'NC_000017.10:g.48279242G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': u'NC_000017.11:g.50201881G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48279242G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50201881G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48279242G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50201881G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} assert results['Intergenic_Variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant17(self): variant = 'NM_000500.7:c.-107-19C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -693,10 +693,10 @@ def test_variant17(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -704,9 +704,9 @@ def test_variant17(self): def test_variant18(self): variant = 'NM_000518.4:c.-130C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -718,10 +718,10 @@ def test_variant18(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -729,9 +729,9 @@ def test_variant18(self): def test_variant19(self): variant = 'NM_000518.4:c.-50-80C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -743,10 +743,10 @@ def test_variant19(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -754,9 +754,9 @@ def test_variant19(self): def test_variant20(self): variant = 'NM_000518.4:c.316_*342delinsCTACTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -768,10 +768,10 @@ def test_variant20(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -779,10 +779,10 @@ def test_variant20(self): def test_variant21(self): variant = 'NM_000518.4:c.316_*100del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000518.4:c.316_*100del' in results.keys() + assert 'NM_000518.4:c.316_*100del' in list(results.keys()) assert results['NM_000518.4:c.316_*100del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000518.4:c.316_*100del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000518.4:c.316_*100del']['alt_genomic_loci'] == [] @@ -804,9 +804,9 @@ def test_variant21(self): def test_variant22(self): variant = 'NM_000518.4:c.*2000C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -818,10 +818,10 @@ def test_variant22(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -829,9 +829,9 @@ def test_variant22(self): def test_variant23(self): variant = 'NM_000518.4:c.*132+1868C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -843,10 +843,10 @@ def test_variant23(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -854,9 +854,9 @@ def test_variant23(self): def test_variant24(self): variant = 'NM_000518.4:c.-130_*2000=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -868,10 +868,10 @@ def test_variant24(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -879,9 +879,9 @@ def test_variant24(self): def test_variant25(self): variant = 'NM_000518.4:c.-50-80_*132+1868=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -893,10 +893,10 @@ def test_variant25(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -904,9 +904,9 @@ def test_variant25(self): def test_variant26(self): variant = 'NR_138595.1:n.-810C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -918,10 +918,10 @@ def test_variant26(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -929,9 +929,9 @@ def test_variant26(self): def test_variant27(self): variant = 'NR_138595.1:n.1-810C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -943,10 +943,10 @@ def test_variant27(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -954,9 +954,9 @@ def test_variant27(self): def test_variant28(self): variant = 'NR_138595.1:n.1071+1A=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -968,19 +968,19 @@ def test_variant28(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['flag'] == 'warning' def test_variant29(self): variant = 'NR_138595.1:n.-810_1071+1=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -992,10 +992,10 @@ def test_variant29(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -1003,10 +1003,10 @@ def test_variant29(self): def test_variant30(self): variant = 'NC_000017.10:g.48261457_48261463TTATGTT=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.*1400_*1406=' in results.keys() + assert 'NM_000088.3:c.*1400_*1406=' in list(results.keys()) assert results['NM_000088.3:c.*1400_*1406=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.*1400_*1406=' assert results['NM_000088.3:c.*1400_*1406=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.*1400_*1406=']['alt_genomic_loci'] == [] @@ -1018,20 +1018,20 @@ def test_variant30(self): assert results['NM_000088.3:c.*1400_*1406=']['hgvs_lrg_variant'] == 'LRG_1:g.22538_22544=' assert results['NM_000088.3:c.*1400_*1406=']['hgvs_transcript_variant'] == 'NM_000088.3:c.*1400_*1406=' assert results['NM_000088.3:c.*1400_*1406=']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.22538_22544=' - assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48261457_48261463=', 'vcf': {'chr': 'chr17', 'ref': u'TTATGTT', 'pos': '48261457', 'alt': u'TTATGTT'}} - assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50184096_50184102=', 'vcf': {'chr': 'chr17', 'ref': u'TTATGTT', 'pos': '50184096', 'alt': u'TTATGTT'}} - assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48261457_48261463=', 'vcf': {'chr': '17', 'ref': u'TTATGTT', 'pos': '48261457', 'alt': u'TTATGTT'}} - assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50184096_50184102=', 'vcf': {'chr': '17', 'ref': u'TTATGTT', 'pos': '50184096', 'alt': u'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48261457_48261463=', 'vcf': {'chr': 'chr17', 'ref': 'TTATGTT', 'pos': '48261457', 'alt': 'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50184096_50184102=', 'vcf': {'chr': 'chr17', 'ref': 'TTATGTT', 'pos': '50184096', 'alt': 'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48261457_48261463=', 'vcf': {'chr': '17', 'ref': 'TTATGTT', 'pos': '48261457', 'alt': 'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50184096_50184102=', 'vcf': {'chr': '17', 'ref': 'TTATGTT', 'pos': '50184096', 'alt': 'TTATGTT'}} assert results['NM_000088.3:c.*1400_*1406=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant31(self): variant = 'NC_000017.10:g.48275363C>A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589G>T' in results.keys() + assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] @@ -1043,20 +1043,20 @@ def test_variant31(self): assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant32(self): variant = 'NM_000088.3:c.589-1G>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-1G>T' in results.keys() + assert 'NM_000088.3:c.589-1G>T' in list(results.keys()) assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'] == [] @@ -1068,19 +1068,19 @@ def test_variant32(self): assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' assert results['NM_000088.3:c.589-1G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8637G>T' - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant33(self): variant = 'NM_000088.3:c.591_593inv' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000088.3:c.591_593inv' in results.keys() + assert 'NM_000088.3:c.591_593inv' in list(results.keys()) assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.591_593inv' assert results['NM_000088.3:c.591_593inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.591_593inv']['alt_genomic_loci'] == [] @@ -1103,10 +1103,10 @@ def test_variant33(self): def test_variant34(self): variant = '11-5248232-T-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000518.5:c.20A>T' in results.keys() + assert 'NM_000518.5:c.20A>T' in list(results.keys()) assert results['NM_000518.5:c.20A>T']['hgvs_lrg_transcript_variant'] == 'LRG_1232t1:c.20A>T' assert results['NM_000518.5:c.20A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000518.5:c.20A>T']['alt_genomic_loci'] == [] @@ -1118,13 +1118,13 @@ def test_variant34(self): assert results['NM_000518.5:c.20A>T']['hgvs_lrg_variant'] == '' assert results['NM_000518.5:c.20A>T']['hgvs_transcript_variant'] == 'NM_000518.5:c.20A>T' assert results['NM_000518.5:c.20A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_000518.5:c.20A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': 'chr11', 'ref': u'T', 'pos': '5248232', 'alt': u'A'}} - assert 'hg38' not in results['NM_000518.5:c.20A>T']['primary_assembly_loci'].keys() - assert results['NM_000518.5:c.20A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': '11', 'ref': u'T', 'pos': '5248232', 'alt': u'A'}} - assert 'grch38' not in results['NM_000518.5:c.20A>T']['primary_assembly_loci'].keys() + assert results['NM_000518.5:c.20A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': 'chr11', 'ref': 'T', 'pos': '5248232', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_000518.5:c.20A>T']['primary_assembly_loci'].keys()) + assert results['NM_000518.5:c.20A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': '11', 'ref': 'T', 'pos': '5248232', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_000518.5:c.20A>T']['primary_assembly_loci'].keys()) assert results['NM_000518.5:c.20A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.5'} - assert 'NM_000518.4:c.20A>T' in results.keys() + assert 'NM_000518.4:c.20A>T' in list(results.keys()) assert results['NM_000518.4:c.20A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000518.4:c.20A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000518.4:c.20A>T']['alt_genomic_loci'] == [] @@ -1136,20 +1136,20 @@ def test_variant34(self): assert results['NM_000518.4:c.20A>T']['hgvs_lrg_variant'] == '' assert results['NM_000518.4:c.20A>T']['hgvs_transcript_variant'] == 'NM_000518.4:c.20A>T' assert results['NM_000518.4:c.20A>T']['hgvs_refseqgene_variant'] == 'NG_000007.3:g.70614A>T' - assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': 'chr11', 'ref': u'T', 'pos': '5248232', 'alt': u'A'}} - assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5227002T>A', 'vcf': {'chr': 'chr11', 'ref': u'T', 'pos': '5227002', 'alt': u'A'}} - assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': '11', 'ref': u'T', 'pos': '5248232', 'alt': u'A'}} - assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5227002T>A', 'vcf': {'chr': '11', 'ref': u'T', 'pos': '5227002', 'alt': u'A'}} + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': 'chr11', 'ref': 'T', 'pos': '5248232', 'alt': 'A'}} + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5227002T>A', 'vcf': {'chr': 'chr11', 'ref': 'T', 'pos': '5227002', 'alt': 'A'}} + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': '11', 'ref': 'T', 'pos': '5248232', 'alt': 'A'}} + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5227002T>A', 'vcf': {'chr': '11', 'ref': 'T', 'pos': '5227002', 'alt': 'A'}} assert results['NM_000518.4:c.20A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_000007.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.4'} def test_variant35(self): variant = 'NG_007400.1(NM_000088.3):c.589-1G>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-1G>T' in results.keys() + assert 'NM_000088.3:c.589-1G>T' in list(results.keys()) assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'] == [] @@ -1161,20 +1161,20 @@ def test_variant35(self): assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' assert results['NM_000088.3:c.589-1G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8637G>T' - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant36(self): variant = '1:150550916G>A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_182763.2:c.688+403C>T' in results.keys() + assert 'NM_182763.2:c.688+403C>T' in list(results.keys()) assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_182763.2:c.688+403C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'] == [] @@ -1186,13 +1186,13 @@ def test_variant36(self): assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_variant'] == '' assert results['NM_182763.2:c.688+403C>T']['hgvs_transcript_variant'] == 'NM_182763.2:c.688+403C>T' assert results['NM_182763.2:c.688+403C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} assert results['NM_182763.2:c.688+403C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2'} - assert 'NM_001197320.1:c.281C>T' in results.keys() + assert 'NM_001197320.1:c.281C>T' in list(results.keys()) assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001197320.1:c.281C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001197320.1:c.281C>T']['alt_genomic_loci'] == [] @@ -1204,13 +1204,13 @@ def test_variant36(self): assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_variant'] == '' assert results['NM_001197320.1:c.281C>T']['hgvs_transcript_variant'] == 'NM_001197320.1:c.281C>T' assert results['NM_001197320.1:c.281C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} assert results['NM_001197320.1:c.281C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1'} - assert 'NM_021960.4:c.740C>T' in results.keys() + assert 'NM_021960.4:c.740C>T' in list(results.keys()) assert results['NM_021960.4:c.740C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021960.4:c.740C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021960.4:c.740C>T']['alt_genomic_loci'] == [] @@ -1222,20 +1222,20 @@ def test_variant36(self): assert results['NM_021960.4:c.740C>T']['hgvs_lrg_variant'] == '' assert results['NM_021960.4:c.740C>T']['hgvs_transcript_variant'] == 'NM_021960.4:c.740C>T' assert results['NM_021960.4:c.740C>T']['hgvs_refseqgene_variant'] == 'NG_029146.1:g.6299C>T' - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} assert results['NM_021960.4:c.740C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029146.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4'} def test_variant37(self): variant = '1-150550916-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_182763.2:c.688+403C>T' in results.keys() + assert 'NM_182763.2:c.688+403C>T' in list(results.keys()) assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_182763.2:c.688+403C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'] == [] @@ -1247,13 +1247,13 @@ def test_variant37(self): assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_variant'] == '' assert results['NM_182763.2:c.688+403C>T']['hgvs_transcript_variant'] == 'NM_182763.2:c.688+403C>T' assert results['NM_182763.2:c.688+403C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} assert results['NM_182763.2:c.688+403C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2'} - assert 'NM_001197320.1:c.281C>T' in results.keys() + assert 'NM_001197320.1:c.281C>T' in list(results.keys()) assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001197320.1:c.281C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001197320.1:c.281C>T']['alt_genomic_loci'] == [] @@ -1265,13 +1265,13 @@ def test_variant37(self): assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_variant'] == '' assert results['NM_001197320.1:c.281C>T']['hgvs_transcript_variant'] == 'NM_001197320.1:c.281C>T' assert results['NM_001197320.1:c.281C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} assert results['NM_001197320.1:c.281C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1'} - assert 'NM_021960.4:c.740C>T' in results.keys() + assert 'NM_021960.4:c.740C>T' in list(results.keys()) assert results['NM_021960.4:c.740C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021960.4:c.740C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021960.4:c.740C>T']['alt_genomic_loci'] == [] @@ -1283,19 +1283,19 @@ def test_variant37(self): assert results['NM_021960.4:c.740C>T']['hgvs_lrg_variant'] == '' assert results['NM_021960.4:c.740C>T']['hgvs_transcript_variant'] == 'NM_021960.4:c.740C>T' assert results['NM_021960.4:c.740C>T']['hgvs_refseqgene_variant'] == 'NG_029146.1:g.6299C>T' - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150550916', 'alt': u'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '150578440', 'alt': u'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} assert results['NM_021960.4:c.740C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029146.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4'} def test_variant38(self): variant = 'NG_008123.1(LEPRE1_v003):c.2055+18G>A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -1307,10 +1307,10 @@ def test_variant38(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -1318,9 +1318,9 @@ def test_variant38(self): def test_variant39(self): variant = 'NG_008123.1:c.2055+18G>A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -1332,10 +1332,10 @@ def test_variant39(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -1343,10 +1343,10 @@ def test_variant39(self): def test_variant40(self): variant = 'NG_008123.1(NM_022356.3):c.2055+18G>A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_022356.3:c.2055+18G>A' in results.keys() + assert 'NM_022356.3:c.2055+18G>A' in list(results.keys()) assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t1:c.2055+18G>A' assert results['NM_022356.3:c.2055+18G>A']['refseqgene_context_intronic_sequence'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' assert results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'] == [] @@ -1358,23 +1358,23 @@ def test_variant40(self): assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' assert results['NM_022356.3:c.2055+18G>A']['hgvs_transcript_variant'] == 'NM_022356.3:c.2055+18G>A' assert results['NM_022356.3:c.2055+18G>A']['hgvs_refseqgene_variant'] == 'NG_008123.1:g.24831G>A' - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} assert results['NM_022356.3:c.2055+18G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} def test_variant41(self): variant = 'NM_021983.4:c.490G>C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_021983.4:c.490G>C' in results.keys() + assert 'NM_021983.4:c.490G>C' in list(results.keys()) assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}] + assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}] assert results['NM_021983.4:c.490G>C']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} @@ -1383,20 +1383,20 @@ def test_variant41(self): assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' assert results['NM_021983.4:c.490G>C']['hgvs_transcript_variant'] == 'NM_021983.4:c.490G>C' assert results['NM_021983.4:c.490G>C']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.5724C>G' - assert 'hg19' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() - assert 'hg38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() - assert 'grch37' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() - assert 'grch38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} def test_variant42(self): variant = 'NM_032470.3:c.4del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032470.3:c.4del' in results.keys() + assert 'NM_032470.3:c.4del' in list(results.keys()) assert results['NM_032470.3:c.4del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_032470.3:c.4del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032470.3:c.4del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}] @@ -1418,9 +1418,9 @@ def test_variant42(self): def test_variant43(self): variant = 'NM_001194958.2:c.20C>A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001194958.2:c.20C>A' in results.keys() + assert 'NM_001194958.2:c.20C>A' in list(results.keys()) assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001194958.2:c.20C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001194958.2:c.20C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}] @@ -1432,9 +1432,9 @@ def test_variant43(self): assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_variant'] == '' assert results['NM_001194958.2:c.20C>A']['hgvs_transcript_variant'] == 'NM_001194958.2:c.20C>A' assert results['NM_001194958.2:c.20C>A']['hgvs_refseqgene_variant'] == 'NG_033093.1:g.15284C>A' - assert 'hg19' not in results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys()) assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} - assert 'grch37' not in results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys() + assert 'grch37' not in list(results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys()) assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} assert results['NM_001194958.2:c.20C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033093.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2'} @@ -1443,10 +1443,10 @@ def test_variant43(self): def test_variant44(self): variant = 'NM_000022.2:c.534A>G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000022.2:c.534A>G' in results.keys() + assert 'NM_000022.2:c.534A>G' in list(results.keys()) assert results['NM_000022.2:c.534A>G']['hgvs_lrg_transcript_variant'] == 'LRG_16t1:c.534A>G' assert results['NM_000022.2:c.534A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000022.2:c.534A>G']['alt_genomic_loci'] == [] @@ -1458,23 +1458,23 @@ def test_variant44(self): assert results['NM_000022.2:c.534A>G']['hgvs_lrg_variant'] == 'LRG_16:g.32462A>G' assert results['NM_000022.2:c.534A>G']['hgvs_transcript_variant'] == 'NM_000022.2:c.534A>G' assert results['NM_000022.2:c.534A>G']['hgvs_refseqgene_variant'] == 'NG_007385.1:g.32462A>G' - assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert 'hg38' not in results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys() - assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert 'grch38' not in results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys() + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert 'hg38' not in list(results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys()) + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert 'grch38' not in list(results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys()) assert results['NM_000022.2:c.534A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007385.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_16.xml'} def test_variant45(self): variant = 'HSCHR6_MHC_SSTO_CTG1-3852542-C-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_021983.4:c.490G>C' in results.keys() + assert 'NM_021983.4:c.490G>C' in list(results.keys()) assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}] + assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}] assert results['NM_021983.4:c.490G>C']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} @@ -1483,20 +1483,20 @@ def test_variant45(self): assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' assert results['NM_021983.4:c.490G>C']['hgvs_transcript_variant'] == 'NM_021983.4:c.490G>C' assert results['NM_021983.4:c.490G>C']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.5724C>G' - assert 'hg19' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() - assert 'hg38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() - assert 'grch37' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() - assert 'grch38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} def test_variant46(self): variant = 'NM_000368.4:c.363+1dupG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000368.4:c.363+1dup' in results.keys() + assert 'NM_000368.4:c.363+1dup' in list(results.keys()) assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] @@ -1518,10 +1518,10 @@ def test_variant46(self): def test_variant47(self): variant = 'NM_000368.4:c.363dupG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000368.4:c.363+1dup' in results.keys() + assert 'NM_000368.4:c.363+1dup' in list(results.keys()) assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] @@ -1543,10 +1543,10 @@ def test_variant47(self): def test_variant48(self): variant = 'NM_000089.3:c.1033_1035delGTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000089.3:c.1035_1035+2del' in results.keys() + assert 'NM_000089.3:c.1035_1035+2del' in list(results.keys()) assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'] == [] @@ -1568,10 +1568,10 @@ def test_variant48(self): def test_variant49(self): variant = 'NM_000089.3:c.1035_1035+2delTGT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000089.3:c.1035_1035+2del' in results.keys() + assert 'NM_000089.3:c.1035_1035+2del' in list(results.keys()) assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'] == [] @@ -1593,10 +1593,10 @@ def test_variant49(self): def test_variant50(self): variant = 'NM_000088.3:c.2023_2028delGCAAGA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.2024_2028+1del' in results.keys() + assert 'NM_000088.3:c.2024_2028+1del' in list(results.keys()) assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'] == [] @@ -1618,10 +1618,10 @@ def test_variant50(self): def test_variant51(self): variant = 'NM_000089.3:c.938-1delG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000089.3:c.938del' in results.keys() + assert 'NM_000089.3:c.938del' in list(results.keys()) assert results['NM_000089.3:c.938del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.938del' assert results['NM_000089.3:c.938del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000089.3:c.938del']['alt_genomic_loci'] == [] @@ -1643,10 +1643,10 @@ def test_variant51(self): def test_variant52(self): variant = 'NM_000088.3:c.589G=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589G=' in results.keys() + assert 'NM_000088.3:c.589G=' in list(results.keys()) assert results['NM_000088.3:c.589G=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G=' assert results['NM_000088.3:c.589G=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G=']['alt_genomic_loci'] == [] @@ -1658,20 +1658,20 @@ def test_variant52(self): assert results['NM_000088.3:c.589G=']['hgvs_lrg_variant'] == 'LRG_1:g.8638G=' assert results['NM_000088.3:c.589G=']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G=' assert results['NM_000088.3:c.589G=']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G=' - assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C=', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'C'}} - assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C=', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'C'}} - assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C=', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'C'}} - assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C=', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'C'}} + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C=', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'C'}} + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C=', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'C'}} + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C=', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'C'}} + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C=', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'C'}} assert results['NM_000088.3:c.589G=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant53(self): variant = 'NM_000088.3:c.642A=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.642A=' in results.keys() + assert 'NM_000088.3:c.642A=' in list(results.keys()) assert results['NM_000088.3:c.642A=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642A=' assert results['NM_000088.3:c.642A=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.642A=']['alt_genomic_loci'] == [] @@ -1683,20 +1683,20 @@ def test_variant53(self): assert results['NM_000088.3:c.642A=']['hgvs_lrg_variant'] == 'LRG_1:g.8691A=' assert results['NM_000088.3:c.642A=']['hgvs_transcript_variant'] == 'NM_000088.3:c.642A=' assert results['NM_000088.3:c.642A=']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8691A=' - assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275310T=', 'vcf': {'chr': 'chr17', 'ref': u'T', 'pos': '48275310', 'alt': u'T'}} - assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197949T=', 'vcf': {'chr': 'chr17', 'ref': u'T', 'pos': '50197949', 'alt': u'T'}} - assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275310T=', 'vcf': {'chr': '17', 'ref': u'T', 'pos': '48275310', 'alt': u'T'}} - assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197949T=', 'vcf': {'chr': '17', 'ref': u'T', 'pos': '50197949', 'alt': u'T'}} + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275310T=', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '48275310', 'alt': 'T'}} + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197949T=', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '50197949', 'alt': 'T'}} + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275310T=', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '48275310', 'alt': 'T'}} + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197949T=', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '50197949', 'alt': 'T'}} assert results['NM_000088.3:c.642A=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant54(self): variant = 'NM_000088.3:c.642+1GG>G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.642+1_642+2delinsG' in results.keys() + assert 'NM_000088.3:c.642+1_642+2delinsG' in list(results.keys()) assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' assert results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'] == [] @@ -1718,10 +1718,10 @@ def test_variant54(self): def test_variant55(self): variant = 'NM_000088.3:c.589-2GG>G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-2_589-1delinsG' in results.keys() + assert 'NM_000088.3:c.589-2_589-1delinsG' in list(results.keys()) assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' assert results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'] == [] @@ -1743,10 +1743,10 @@ def test_variant55(self): def test_variant56(self): variant = 'NM_000088.3:c.589-6_589-5insTTTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-5_589-4insTTTT' in results.keys() + assert 'NM_000088.3:c.589-5_589-4insTTTT' in list(results.keys()) assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-5_589-4insTTTT' assert results['NM_000088.3:c.589-5_589-4insTTTT']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-5_589-4insTTTT' assert results['NM_000088.3:c.589-5_589-4insTTTT']['alt_genomic_loci'] == [] @@ -1758,20 +1758,20 @@ def test_variant56(self): assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_lrg_variant'] == 'LRG_1:g.8633_8634insTTTT' assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-5_589-4insTTTT' assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8633_8634insTTTT' - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275367_48275368insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48275367', 'alt': u'GAAAA'}} - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198006_50198007insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50198006', 'alt': u'GAAAA'}} - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275367_48275368insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48275367', 'alt': u'GAAAA'}} - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198006_50198007insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50198006', 'alt': u'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275367_48275368insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48275367', 'alt': 'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198006_50198007insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50198006', 'alt': 'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275367_48275368insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48275367', 'alt': 'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198006_50198007insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50198006', 'alt': 'GAAAA'}} assert results['NM_000088.3:c.589-5_589-4insTTTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant57(self): variant = 'NM_000088.3:c.642+3_642+4insAAAA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.642+4_642+5insAAAA' in results.keys() + assert 'NM_000088.3:c.642+4_642+5insAAAA' in list(results.keys()) assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+4_642+5insAAAA' assert results['NM_000088.3:c.642+4_642+5insAAAA']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+4_642+5insAAAA' assert results['NM_000088.3:c.642+4_642+5insAAAA']['alt_genomic_loci'] == [] @@ -1783,20 +1783,20 @@ def test_variant57(self): assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_lrg_variant'] == 'LRG_1:g.8695_8696insAAAA' assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+4_642+5insAAAA' assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8695_8696insAAAA' - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275305_48275306insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275305', 'alt': u'CTTTT'}} - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197944_50197945insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50197944', 'alt': u'CTTTT'}} - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275305_48275306insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275305', 'alt': u'CTTTT'}} - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197944_50197945insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50197944', 'alt': u'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275305_48275306insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275305', 'alt': 'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197944_50197945insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50197944', 'alt': 'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275305_48275306insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275305', 'alt': 'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197944_50197945insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50197944', 'alt': 'CTTTT'}} assert results['NM_000088.3:c.642+4_642+5insAAAA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant58(self): variant = 'NM_000088.3:c.589-4_589-3insTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-4_589-3insTT' in results.keys() + assert 'NM_000088.3:c.589-4_589-3insTT' in list(results.keys()) assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-4_589-3insTT' assert results['NM_000088.3:c.589-4_589-3insTT']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-4_589-3insTT' assert results['NM_000088.3:c.589-4_589-3insTT']['alt_genomic_loci'] == [] @@ -1808,20 +1808,20 @@ def test_variant58(self): assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_lrg_variant'] == 'LRG_1:g.8634_8635insTT' assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-4_589-3insTT' assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8634_8635insTT' - assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366_48275367insAA', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '48275366', 'alt': u'TAA'}} - assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005_50198006insAA', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '50198005', 'alt': u'TAA'}} - assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366_48275367insAA', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '48275366', 'alt': u'TAA'}} - assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005_50198006insAA', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '50198005', 'alt': u'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366_48275367insAA', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '48275366', 'alt': 'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005_50198006insAA', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '50198005', 'alt': 'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366_48275367insAA', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '48275366', 'alt': 'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005_50198006insAA', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '50198005', 'alt': 'TAA'}} assert results['NM_000088.3:c.589-4_589-3insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant59(self): variant = 'NM_000088.3:c.589-8del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-7del' in results.keys() + assert 'NM_000088.3:c.589-7del' in list(results.keys()) assert results['NM_000088.3:c.589-7del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-7del' assert results['NM_000088.3:c.589-7del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-7del' assert results['NM_000088.3:c.589-7del']['alt_genomic_loci'] == [] @@ -1843,9 +1843,9 @@ def test_variant59(self): def test_variant60(self): variant = 'NM_000527.4:c.-187_-185delCTC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000527.4:c.-187_-185del' in results.keys() + assert 'NM_000527.4:c.-187_-185del' in list(results.keys()) assert results['NM_000527.4:c.-187_-185del']['hgvs_lrg_transcript_variant'] == 'LRG_274t1:c.-187_-185del' assert results['NM_000527.4:c.-187_-185del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000527.4:c.-187_-185del']['alt_genomic_loci'] == [] @@ -1868,10 +1868,10 @@ def test_variant60(self): def test_variant61(self): variant = 'NM_206933.2:c.6317C>G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_206933.2:c.6317C>G' in results.keys() + assert 'NM_206933.2:c.6317C>G' in list(results.keys()) assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_206933.2:c.6317C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_206933.2:c.6317C>G']['alt_genomic_loci'] == [] @@ -1883,19 +1883,19 @@ def test_variant61(self): assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_variant'] == '' assert results['NM_206933.2:c.6317C>G']['hgvs_transcript_variant'] == 'NM_206933.2:c.6317C>G' assert results['NM_206933.2:c.6317C>G']['hgvs_refseqgene_variant'] == 'NG_009497.1:g.381958C>G' - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216219781', 'alt': u'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216046439', 'alt': u'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216219781', 'alt': u'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216046439', 'alt': u'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216219781', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216046439', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216219781', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216046439', 'alt': 'C'}} assert results['NM_206933.2:c.6317C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009497.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2'} def test_variant62(self): variant = 'NC_000013.10:g.32929387T>C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000059.3:c.7397C=' in results.keys() + assert 'NM_000059.3:c.7397C=' in list(results.keys()) assert results['NM_000059.3:c.7397C=']['hgvs_lrg_transcript_variant'] == 'LRG_293t1:c.7397C=' assert results['NM_000059.3:c.7397C=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000059.3:c.7397C=']['alt_genomic_loci'] == [] @@ -1918,9 +1918,9 @@ def test_variant62(self): def test_variant63(self): variant = 'NM_015102.3:c.2818-2T>A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_015102.3:c.2818-2T>A' in results.keys() + assert 'NM_015102.3:c.2818-2T>A' in list(results.keys()) assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_015102.3:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == 'NG_011724.2(NM_015102.3):c.2818-2A=' assert results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'] == [] @@ -1932,10 +1932,10 @@ def test_variant63(self): assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_variant'] == '' assert results['NM_015102.3:c.2818-2T>A']['hgvs_transcript_variant'] == 'NM_015102.3:c.2818-2T>A' assert results['NM_015102.3:c.2818-2T>A']['hgvs_refseqgene_variant'] == 'NG_011724.2:g.122370A=' - assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} - assert 'hg38' not in results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys() - assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} - assert 'grch38' not in results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys() + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys()) + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys()) assert results['NM_015102.3:c.2818-2T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011724.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3'} assert results['flag'] == 'gene_variant' @@ -1943,10 +1943,10 @@ def test_variant63(self): def test_variant64(self): variant = '19-41123094-G-GG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001042544.1:c.3233_3235=' in results.keys() + assert 'NM_001042544.1:c.3233_3235=' in list(results.keys()) assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042544.1:c.3233_3235=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'] == [] @@ -1959,12 +1959,12 @@ def test_variant64(self): assert results['NM_001042544.1:c.3233_3235=']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3233_3235=' assert results['NM_001042544.1:c.3233_3235=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_001042544.1:c.3233_3235=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} - assert 'NM_001042545.1:c.3032_3034=' in results.keys() + assert 'NM_001042545.1:c.3032_3034=' in list(results.keys()) assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042545.1:c.3032_3034=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'] == [] @@ -1977,12 +1977,12 @@ def test_variant64(self): assert results['NM_001042545.1:c.3032_3034=']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3032_3034=' assert results['NM_001042545.1:c.3032_3034=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_001042545.1:c.3032_3034=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} - assert 'NM_003573.2:c.3122_3124=' in results.keys() + assert 'NM_003573.2:c.3122_3124=' in list(results.keys()) assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003573.2:c.3122_3124=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'] == [] @@ -1995,18 +1995,18 @@ def test_variant64(self): assert results['NM_003573.2:c.3122_3124=']['hgvs_transcript_variant'] == 'NM_003573.2:c.3122_3124=' assert results['NM_003573.2:c.3122_3124=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_003573.2:c.3122_3124=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} def test_variant65(self): variant = '15-72105928-AC-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_014249.2:c.946_949=' in results.keys() + assert 'NM_014249.2:c.946_949=' in list(results.keys()) assert results['NM_014249.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.946_949=']['alt_genomic_loci'] == [] @@ -2019,12 +2019,12 @@ def test_variant65(self): assert results['NM_014249.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.2:c.946_949=' assert results['NM_014249.2:c.946_949=']['hgvs_refseqgene_variant'] == '' assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'hg38' not in results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'grch38' not in results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} - assert 'NM_016346.3:c.946_949=' in results.keys() + assert 'NM_016346.3:c.946_949=' in list(results.keys()) assert results['NM_016346.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.946_949=']['alt_genomic_loci'] == [] @@ -2037,13 +2037,13 @@ def test_variant65(self): assert results['NM_016346.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.3:c.946_949=' assert results['NM_016346.3:c.946_949=']['hgvs_refseqgene_variant'] == '' assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': 'chr15', 'ref': u'GACC', 'pos': '71813587', 'alt': u'GACC'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': 'chr15', 'ref': 'GACC', 'pos': '71813587', 'alt': 'GACC'}} assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': '15', 'ref': u'GACC', 'pos': '71813587', 'alt': u'GACC'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': '15', 'ref': 'GACC', 'pos': '71813587', 'alt': 'GACC'}} assert results['NM_016346.3:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} assert results['flag'] == 'gene_variant' - assert 'NM_014249.3:c.946_949=' in results.keys() + assert 'NM_014249.3:c.946_949=' in list(results.keys()) assert results['NM_014249.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.3:c.946_949=']['alt_genomic_loci'] == [] @@ -2056,12 +2056,12 @@ def test_variant65(self): assert results['NM_014249.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.3:c.946_949=' assert results['NM_014249.3:c.946_949=']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8034_8037=' assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': 'chr15', 'ref': u'GGACCC', 'pos': '71813586', 'alt': u'GGACCC'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': 'chr15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}} assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': u'GGACCC', 'pos': '71813586', 'alt': u'GGACCC'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}} assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} - assert 'NM_016346.2:c.946_949=' in results.keys() + assert 'NM_016346.2:c.946_949=' in list(results.keys()) assert results['NM_016346.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.2:c.946_949=']['alt_genomic_loci'] == [] @@ -2074,19 +2074,19 @@ def test_variant65(self): assert results['NM_016346.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.2:c.946_949=' assert results['NM_016346.2:c.946_949=']['hgvs_refseqgene_variant'] == '' assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'hg38' not in results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'grch38' not in results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} def test_variant66(self): variant = '12-122064773-CCCGCCA-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032790.3:c.126_128=' in results.keys() + assert 'NM_032790.3:c.126_128=' in list(results.keys()) assert results['NM_032790.3:c.126_128=']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_128=' assert results['NM_032790.3:c.126_128=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.126_128=']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'CCCGCCA', 'pos': '302871', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'CCCGCCA', 'pos': '302871', 'alt': 'C'}}}] @@ -2099,18 +2099,18 @@ def test_variant66(self): assert results['NM_032790.3:c.126_128=']['hgvs_transcript_variant'] == 'NM_032790.3:c.126_128=' assert results['NM_032790.3:c.126_128=']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299_5301=' assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064790del', 'vcf': {'chr': 'chr12', 'ref': 'CCCGCCA', 'pos': '122064773', 'alt': 'C'}} - assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626875=', 'vcf': {'chr': 'chr12', 'ref': u'CCC', 'pos': '121626873', 'alt': u'CCC'}} + assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626875=', 'vcf': {'chr': 'chr12', 'ref': 'CCC', 'pos': '121626873', 'alt': 'CCC'}} assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064790del', 'vcf': {'chr': '12', 'ref': 'CCCGCCA', 'pos': '122064773', 'alt': 'C'}} - assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626875=', 'vcf': {'chr': '12', 'ref': u'CCC', 'pos': '121626873', 'alt': u'CCC'}} + assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626875=', 'vcf': {'chr': '12', 'ref': 'CCC', 'pos': '121626873', 'alt': 'CCC'}} assert results['NM_032790.3:c.126_128=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant67(self): variant = '12-122064774-CCGCCA-CCGCCA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_032790.3:c.132_137dup' in results.keys() + assert 'NM_032790.3:c.132_137dup' in list(results.keys()) assert results['NM_032790.3:c.132_137dup']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.132_137dup' assert results['NM_032790.3:c.132_137dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.132_137dup']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '302868', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '302868', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}] @@ -2133,13 +2133,13 @@ def test_variant67(self): def test_variant68(self): variant = '12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032790.3:c.132_135delinsGCCGT' in results.keys() + assert 'NM_032790.3:c.132_135delinsGCCGT' in list(results.keys()) assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.132_135delinsGCCGT' assert results['NM_032790.3:c.132_135delinsGCCGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.132_135delinsGCCGT']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'ACCG', 'pos': '302883', 'alt': u'GCCGT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'ACCG', 'pos': '302883', 'alt': u'GCCGT'}}}] + assert results['NM_032790.3:c.132_135delinsGCCGT']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'ACCG', 'pos': '302883', 'alt': 'GCCGT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'ACCG', 'pos': '302883', 'alt': 'GCCGT'}}}] assert results['NM_032790.3:c.132_135delinsGCCGT']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' assert results['NM_032790.3:c.132_135delinsGCCGT']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46SerfsTer42)', 'slr': 'NP_116179.2:p.(P46Sfs*42)'} @@ -2148,20 +2148,20 @@ def test_variant68(self): assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_lrg_variant'] == 'LRG_93:g.5305_5308delinsGCCGT' assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_transcript_variant'] == 'NM_032790.3:c.132_135delinsGCCGT' assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5305_5308delinsGCCGT' - assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064788delinsGCCGT', 'vcf': {'chr': 'chr12', 'ref': 'ACCG', 'pos': '122064785', 'alt': u'GCCGT'}} - assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626882delinsGCCGT', 'vcf': {'chr': 'chr12', 'ref': 'ACCG', 'pos': '121626879', 'alt': u'GCCGT'}} - assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064788delinsGCCGT', 'vcf': {'chr': '12', 'ref': 'ACCG', 'pos': '122064785', 'alt': u'GCCGT'}} - assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626882delinsGCCGT', 'vcf': {'chr': '12', 'ref': 'ACCG', 'pos': '121626879', 'alt': u'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064788delinsGCCGT', 'vcf': {'chr': 'chr12', 'ref': 'ACCG', 'pos': '122064785', 'alt': 'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626882delinsGCCGT', 'vcf': {'chr': 'chr12', 'ref': 'ACCG', 'pos': '121626879', 'alt': 'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064788delinsGCCGT', 'vcf': {'chr': '12', 'ref': 'ACCG', 'pos': '122064785', 'alt': 'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626882delinsGCCGT', 'vcf': {'chr': '12', 'ref': 'ACCG', 'pos': '121626879', 'alt': 'GCCGT'}} assert results['NM_032790.3:c.132_135delinsGCCGT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant69(self): variant = 'NC_000012.11:g.122064777C>A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032790.3:c.129_130insACACCG' in results.keys() + assert 'NM_032790.3:c.129_130insACACCG' in list(results.keys()) assert results['NM_032790.3:c.129_130insACACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insACACCG' assert results['NM_032790.3:c.129_130insACACCG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.129_130insACACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302875', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302875', 'alt': 'A'}}}] @@ -2174,19 +2174,19 @@ def test_variant69(self): assert results['NM_032790.3:c.129_130insACACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.129_130insACACCG' assert results['NM_032790.3:c.129_130insACACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5302_5303insACACCG' assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064777C>A', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064777', 'alt': 'A'}} - assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insACACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGACA'}} + assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insACACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGACA'}} assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064777C>A', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064777', 'alt': 'A'}} - assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insACACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGACA'}} + assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insACACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGACA'}} assert results['NM_032790.3:c.129_130insACACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant70(self): variant = 'NC_000012.11:g.122064776delG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032790.3:c.128_129insCCACC' in results.keys() + assert 'NM_032790.3:c.128_129insCCACC' in list(results.keys()) assert results['NM_032790.3:c.128_129insCCACC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.128_129insCCACC' assert results['NM_032790.3:c.128_129insCCACC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.128_129insCCACC']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'CG', 'pos': '302873', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'CG', 'pos': '302873', 'alt': 'C'}}}] @@ -2199,19 +2199,19 @@ def test_variant70(self): assert results['NM_032790.3:c.128_129insCCACC']['hgvs_transcript_variant'] == 'NM_032790.3:c.128_129insCCACC' assert results['NM_032790.3:c.128_129insCCACC']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5301_5302insCCACC' assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776del', 'vcf': {'chr': 'chr12', 'ref': 'CG', 'pos': '122064775', 'alt': 'C'}} - assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCACC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCCCA'}} + assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCACC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCCCA'}} assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776del', 'vcf': {'chr': '12', 'ref': 'CG', 'pos': '122064775', 'alt': 'C'}} - assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCACC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCCCA'}} + assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCACC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCCCA'}} assert results['NM_032790.3:c.128_129insCCACC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant71(self): variant = 'NC_000012.11:g.122064776dupG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032790.3:c.129_130insGCCACCG' in results.keys() + assert 'NM_032790.3:c.129_130insGCCACCG' in list(results.keys()) assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insGCCACCG' assert results['NM_032790.3:c.129_130insGCCACCG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.129_130insGCCACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302873', 'alt': 'CG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302873', 'alt': 'CG'}}}] @@ -2224,19 +2224,19 @@ def test_variant71(self): assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.129_130insGCCACCG' assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5302_5303insGCCACCG' assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776dup', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064775', 'alt': 'CG'}} - assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insGCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGGCCA'}} + assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insGCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGGCCA'}} assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776dup', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064775', 'alt': 'CG'}} - assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insGCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGGCCA'}} + assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insGCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGGCCA'}} assert results['NM_032790.3:c.129_130insGCCACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant72(self): variant = 'NC_000012.11:g.122064776_122064777insTTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032790.3:c.129_130insTTTCCACCG' in results.keys() + assert 'NM_032790.3:c.129_130insTTTCCACCG' in list(results.keys()) assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insTTTCCACCG' assert results['NM_032790.3:c.129_130insTTTCCACCG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.129_130insTTTCCACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302874', 'alt': 'GTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302874', 'alt': 'GTTT'}}}] @@ -2249,19 +2249,19 @@ def test_variant72(self): assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.129_130insTTTCCACCG' assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5302_5303insTTTCCACCG' assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776_122064777insTTT', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '122064776', 'alt': 'GTTT'}} - assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insTTTCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGTTTCCA'}} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insTTTCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGTTTCCA'}} assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776_122064777insTTT', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '122064776', 'alt': 'GTTT'}} - assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insTTTCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGTTTCCA'}} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insTTTCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGTTTCCA'}} assert results['NM_032790.3:c.129_130insTTTCCACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant73(self): variant = 'NC_000012.11:g.122064772_122064775del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032790.3:c.125_126delinsGCCA' in results.keys() + assert 'NM_032790.3:c.125_126delinsGCCA' in list(results.keys()) assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.125_126delinsGCCA' assert results['NM_032790.3:c.125_126delinsGCCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.125_126delinsGCCA']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GCCCC', 'pos': '302869', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GCCCC', 'pos': '302869', 'alt': 'G'}}}] @@ -2283,10 +2283,10 @@ def test_variant73(self): def test_variant74(self): variant = 'NC_000012.11:g.122064772_122064775dup' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032790.3:c.128_129insCCCCGCCACC' in results.keys() + assert 'NM_032790.3:c.128_129insCCCCGCCACC' in list(results.keys()) assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.128_129insCCCCGCCACC' assert results['NM_032790.3:c.128_129insCCCCGCCACC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.128_129insCCCCGCCACC']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCC'}}}] @@ -2299,18 +2299,18 @@ def test_variant74(self): assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_transcript_variant'] == 'NM_032790.3:c.128_129insCCCCGCCACC' assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5301_5302insCCCCGCCACC' assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775dup', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCC'}} - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCCCGCCACC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCCCCCGCCA'}} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCCCGCCACC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCCCCCGCCA'}} assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775dup', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCC'}} - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCCCGCCACC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCCCCCGCCA'}} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCCCGCCACC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCCCCCGCCA'}} assert results['NM_032790.3:c.128_129insCCCCGCCACC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant75(self): variant = 'NC_000012.11:g.122064773_122064774insTTTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_032790.3:c.126_127insTTTTCCGCCA' in results.keys() + assert 'NM_032790.3:c.126_127insTTTTCCGCCA' in list(results.keys()) assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_127insTTTTCCGCCA' assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302871', 'alt': 'CTTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302871', 'alt': 'CTTTT'}}}] @@ -2323,9 +2323,9 @@ def test_variant75(self): assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_transcript_variant'] == 'NM_032790.3:c.126_127insTTTTCCGCCA' assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299_5300insTTTTCCGCCA' assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064774insTTTT', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064773', 'alt': 'CTTTT'}} - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insTTTTCCGCCA', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CTTTTCCGCCA'}} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insTTTTCCGCCA', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CTTTTCCGCCA'}} assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064774insTTTT', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064773', 'alt': 'CTTTT'}} - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insTTTTCCGCCA', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CTTTTCCGCCA'}} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insTTTTCCGCCA', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CTTTTCCGCCA'}} assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} assert results['flag'] == 'gene_variant' @@ -2333,9 +2333,9 @@ def test_variant75(self): def test_variant76(self): variant = 'NC_000012.11:g.122064772_122064777del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_032790.3:c.126C>A' in results.keys() + assert 'NM_032790.3:c.126C>A' in list(results.keys()) assert results['NM_032790.3:c.126C>A']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126C>A' assert results['NM_032790.3:c.126C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.126C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GCCCCGC', 'pos': '302869', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GCCCCGC', 'pos': '302869', 'alt': 'G'}}}] @@ -2348,9 +2348,9 @@ def test_variant76(self): assert results['NM_032790.3:c.126C>A']['hgvs_transcript_variant'] == 'NM_032790.3:c.126C>A' assert results['NM_032790.3:c.126C>A']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299C>A' assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': 'chr12', 'ref': 'GCCCCGC', 'pos': '122064771', 'alt': 'G'}} - assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': 'chr12', 'ref': u'C', 'pos': '121626873', 'alt': u'A'}} + assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'A'}} assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': '12', 'ref': 'GCCCCGC', 'pos': '122064771', 'alt': 'G'}} - assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': '12', 'ref': u'C', 'pos': '121626873', 'alt': u'A'}} + assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'A'}} assert results['NM_032790.3:c.126C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} assert results['flag'] == 'gene_variant' @@ -2358,10 +2358,10 @@ def test_variant76(self): def test_variant77(self): variant = 'NC_000012.11:g.122064772_122064777dup' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032790.3:c.131_132insCCCGCCACCGCC' in results.keys() + assert 'NM_032790.3:c.131_132insCCCGCCACCGCC' in list(results.keys()) assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.131_132insCCCGCCACCGCC' assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCCGC'}}}] @@ -2374,18 +2374,18 @@ def test_variant77(self): assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_transcript_variant'] == 'NM_032790.3:c.131_132insCCCGCCACCGCC' assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5304_5305insCCCGCCACCGCC' assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778dup', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCCGC'}} - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGCCCCCGCCA'}} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCCCCGCCA'}} assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778dup', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCCGC'}} - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGCCCCCGCCA'}} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCCCCGCCA'}} assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant78(self): variant = 'NC_000012.11:g.122064779_122064782dup' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_032790.3:c.135_136insACCGCCACCG' in results.keys() + assert 'NM_032790.3:c.135_136insACCGCCACCG' in list(results.keys()) assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.135_136insACCGCCACCG' assert results['NM_032790.3:c.135_136insACCGCCACCG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.135_136insACCGCCACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302876', 'alt': 'CACCG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302876', 'alt': 'CACCG'}}}] @@ -2398,9 +2398,9 @@ def test_variant78(self): assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.135_136insACCGCCACCG' assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5308_5309insACCGCCACCG' assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064779_122064782dup', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064778', 'alt': 'CACCG'}} - assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626882_121626883insACCGCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGCCACCGA'}} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626882_121626883insACCGCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCACCGA'}} assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064779_122064782dup', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064778', 'alt': 'CACCG'}} - assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626882_121626883insACCGCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CCCGCCACCGA'}} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626882_121626883insACCGCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCACCGA'}} assert results['NM_032790.3:c.135_136insACCGCCACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} assert results['flag'] == 'gene_variant' @@ -2408,10 +2408,10 @@ def test_variant78(self): def test_variant79(self): variant = 'NC_000012.11:g.122064772_122064782del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032790.3:c.126_127insA' in results.keys() + assert 'NM_032790.3:c.126_127insA' in list(results.keys()) assert results['NM_032790.3:c.126_127insA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_127insA' assert results['NM_032790.3:c.126_127insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.126_127insA']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GGCCCC', 'pos': '302868', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GGCCCC', 'pos': '302868', 'alt': 'G'}}}] @@ -2424,18 +2424,18 @@ def test_variant79(self): assert results['NM_032790.3:c.126_127insA']['hgvs_transcript_variant'] == 'NM_032790.3:c.126_127insA' assert results['NM_032790.3:c.126_127insA']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299_5300insA' assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064774_122064778del', 'vcf': {'chr': 'chr12', 'ref': 'GGCCCC', 'pos': '122064770', 'alt': 'G'}} - assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insA', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': u'CA'}} + assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insA', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CA'}} assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064774_122064778del', 'vcf': {'chr': '12', 'ref': 'GGCCCC', 'pos': '122064770', 'alt': 'G'}} - assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insA', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': u'CA'}} + assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insA', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CA'}} assert results['NM_032790.3:c.126_127insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant80(self): variant = 'NC_000002.11:g.95847041_95847043GCG=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_021088.3:c.471_473dup' in results.keys() + assert 'NM_021088.3:c.471_473dup' in list(results.keys()) assert results['NM_021088.3:c.471_473dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021088.3:c.471_473dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021088.3:c.471_473dup']['alt_genomic_loci'] == [] @@ -2453,7 +2453,7 @@ def test_variant80(self): assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} assert results['NM_021088.3:c.471_473dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033798.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.3'} - assert 'NM_001291605.1:c.510_512dup' in results.keys() + assert 'NM_001291605.1:c.510_512dup' in list(results.keys()) assert results['NM_001291605.1:c.510_512dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291605.1:c.510_512dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001291605.1:c.510_512dup']['alt_genomic_loci'] == [] @@ -2471,7 +2471,7 @@ def test_variant80(self): assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} assert results['NM_001291605.1:c.510_512dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278534.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291605.1'} - assert 'NM_001017396.2:c.345_347dup' in results.keys() + assert 'NM_001017396.2:c.345_347dup' in list(results.keys()) assert results['NM_001017396.2:c.345_347dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001017396.2:c.345_347dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001017396.2:c.345_347dup']['alt_genomic_loci'] == [] @@ -2489,7 +2489,7 @@ def test_variant80(self): assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} assert results['NM_001017396.2:c.345_347dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.2'} - assert 'NM_001282398.1:c.357_359dup' in results.keys() + assert 'NM_001282398.1:c.357_359dup' in list(results.keys()) assert results['NM_001282398.1:c.357_359dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001282398.1:c.357_359dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282398.1:c.357_359dup']['alt_genomic_loci'] == [] @@ -2508,7 +2508,7 @@ def test_variant80(self): assert results['NM_001282398.1:c.357_359dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269327.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282398.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001291604.1:c.231_233dup' in results.keys() + assert 'NM_001291604.1:c.231_233dup' in list(results.keys()) assert results['NM_001291604.1:c.231_233dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291604.1:c.231_233dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001291604.1:c.231_233dup']['alt_genomic_loci'] == [] @@ -2526,7 +2526,7 @@ def test_variant80(self): assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} assert results['NM_001291604.1:c.231_233dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278533.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291604.1'} - assert 'NM_021088.2:c.471_473dup' in results.keys() + assert 'NM_021088.2:c.471_473dup' in list(results.keys()) assert results['NM_021088.2:c.471_473dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021088.2:c.471_473dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021088.2:c.471_473dup']['alt_genomic_loci'] == [] @@ -2539,12 +2539,12 @@ def test_variant80(self): assert results['NM_021088.2:c.471_473dup']['hgvs_transcript_variant'] == 'NM_021088.2:c.471_473dup' assert results['NM_021088.2:c.471_473dup']['hgvs_refseqgene_variant'] == '' assert results['NM_021088.2:c.471_473dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert 'hg38' not in results['NM_021088.2:c.471_473dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_021088.2:c.471_473dup']['primary_assembly_loci'].keys()) assert results['NM_021088.2:c.471_473dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert 'grch38' not in results['NM_021088.2:c.471_473dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_021088.2:c.471_473dup']['primary_assembly_loci'].keys()) assert results['NM_021088.2:c.471_473dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.2'} - assert 'NM_001017396.1:c.345_347dup' in results.keys() + assert 'NM_001017396.1:c.345_347dup' in list(results.keys()) assert results['NM_001017396.1:c.345_347dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001017396.1:c.345_347dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001017396.1:c.345_347dup']['alt_genomic_loci'] == [] @@ -2557,18 +2557,18 @@ def test_variant80(self): assert results['NM_001017396.1:c.345_347dup']['hgvs_transcript_variant'] == 'NM_001017396.1:c.345_347dup' assert results['NM_001017396.1:c.345_347dup']['hgvs_refseqgene_variant'] == '' assert results['NM_001017396.1:c.345_347dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert 'hg38' not in results['NM_001017396.1:c.345_347dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001017396.1:c.345_347dup']['primary_assembly_loci'].keys()) assert results['NM_001017396.1:c.345_347dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert 'grch38' not in results['NM_001017396.1:c.345_347dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001017396.1:c.345_347dup']['primary_assembly_loci'].keys()) assert results['NM_001017396.1:c.345_347dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.1'} def test_variant81(self): variant = 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001083585.1:c.*344_*368dup' in results.keys() + assert 'NM_001083585.1:c.*344_*368dup' in list(results.keys()) assert results['NM_001083585.1:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001083585.1:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001083585.1:c.*344_*368dup']['alt_genomic_loci'] == [] @@ -2581,12 +2581,12 @@ def test_variant81(self): assert results['NM_001083585.1:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_001083585.1:c.*344_*368dup' assert results['NM_001083585.1:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' assert results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert 'hg38' not in results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci'].keys()) assert results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert 'grch38' not in results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci'].keys()) assert results['NM_001083585.1:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001077054.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001083585.1'} - assert 'NM_004703.5:c.*344_*368dup' in results.keys() + assert 'NM_004703.5:c.*344_*368dup' in list(results.keys()) assert results['NM_004703.5:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004703.5:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004703.5:c.*344_*368dup']['alt_genomic_loci'] == [] @@ -2604,7 +2604,7 @@ def test_variant81(self): assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} assert results['NM_004703.5:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.5'} - assert 'NM_004703.4:c.*344_*368dup' in results.keys() + assert 'NM_004703.4:c.*344_*368dup' in list(results.keys()) assert results['NM_004703.4:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004703.4:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004703.4:c.*344_*368dup']['alt_genomic_loci'] == [] @@ -2617,13 +2617,13 @@ def test_variant81(self): assert results['NM_004703.4:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_004703.4:c.*344_*368dup' assert results['NM_004703.4:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' assert results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert 'hg38' not in results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci'].keys()) assert results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert 'grch38' not in results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci'].keys()) assert results['NM_004703.4:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.4'} assert results['flag'] == 'gene_variant' - assert 'NM_001291581.1:c.*344_*368dup' in results.keys() + assert 'NM_001291581.1:c.*344_*368dup' in list(results.keys()) assert results['NM_001291581.1:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291581.1:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001291581.1:c.*344_*368dup']['alt_genomic_loci'] == [] @@ -2641,7 +2641,7 @@ def test_variant81(self): assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} assert results['NM_001291581.1:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278510.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291581.1'} - assert 'NM_001083585.2:c.*344_*368dup' in results.keys() + assert 'NM_001083585.2:c.*344_*368dup' in list(results.keys()) assert results['NM_001083585.2:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001083585.2:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001083585.2:c.*344_*368dup']['alt_genomic_loci'] == [] @@ -2663,9 +2663,9 @@ def test_variant81(self): def test_variant82(self): variant = 'NC_000003.11:g.14561629_14561630GC=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001080423.3:c.1020del' in results.keys() + assert 'NM_001080423.3:c.1020del' in list(results.keys()) assert results['NM_001080423.3:c.1020del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.3:c.1020del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.3:c.1020del']['alt_genomic_loci'] == [] @@ -2684,7 +2684,7 @@ def test_variant82(self): assert results['NM_001080423.3:c.1020del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} assert results['flag'] == 'gene_variant' - assert 'NM_001080423.2:c.1311del' in results.keys() + assert 'NM_001080423.2:c.1311del' in list(results.keys()) assert results['NM_001080423.2:c.1311del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.2:c.1311del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.2:c.1311del']['alt_genomic_loci'] == [] @@ -2697,18 +2697,18 @@ def test_variant82(self): assert results['NM_001080423.2:c.1311del']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1311del' assert results['NM_001080423.2:c.1311del']['hgvs_refseqgene_variant'] == '' assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} - assert 'hg38' not in results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys()) assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} - assert 'grch38' not in results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys()) assert results['NM_001080423.2:c.1311del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} def test_variant83(self): variant = 'NC_000003.11:g.14561629_14561630insG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001080423.3:c.1016_1020=' in results.keys() + assert 'NM_001080423.3:c.1016_1020=' in list(results.keys()) assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.3:c.1016_1020=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'] == [] @@ -2721,13 +2721,13 @@ def test_variant83(self): assert results['NM_001080423.3:c.1016_1020=']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1016_1020=' assert results['NM_001080423.3:c.1016_1020=']['hgvs_refseqgene_variant'] == '' assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': 'chr3', 'ref': u'GGGCC', 'pos': '14520120', 'alt': u'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': 'chr3', 'ref': 'GGGCC', 'pos': '14520120', 'alt': 'GGGCC'}} assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': '3', 'ref': u'GGGCC', 'pos': '14520120', 'alt': u'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': '3', 'ref': 'GGGCC', 'pos': '14520120', 'alt': 'GGGCC'}} assert results['NM_001080423.3:c.1016_1020=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} assert results['flag'] == 'gene_variant' - assert 'NM_001080423.2:c.1307_1311=' in results.keys() + assert 'NM_001080423.2:c.1307_1311=' in list(results.keys()) assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.2:c.1307_1311=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'] == [] @@ -2740,19 +2740,19 @@ def test_variant83(self): assert results['NM_001080423.2:c.1307_1311=']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1307_1311=' assert results['NM_001080423.2:c.1307_1311=']['hgvs_refseqgene_variant'] == '' assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert 'hg38' not in results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys()) assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert 'grch38' not in results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys()) assert results['NM_001080423.2:c.1307_1311=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} def test_variant84(self): variant = 'NC_000004.11:g.140811111_140811122del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_018717.5:c.1515_1526del' in results.keys() + assert 'NM_018717.5:c.1515_1526del' in list(results.keys()) assert results['NM_018717.5:c.1515_1526del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.5:c.1515_1526del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.5:c.1515_1526del']['alt_genomic_loci'] == [] @@ -2765,12 +2765,12 @@ def test_variant84(self): assert results['NM_018717.5:c.1515_1526del']['hgvs_transcript_variant'] == 'NM_018717.5:c.1515_1526del' assert results['NM_018717.5:c.1515_1526del']['hgvs_refseqgene_variant'] == '' assert results['NM_018717.5:c.1515_1526del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGCTGCTGC', 'pos': '140811063', 'alt': 'T'}} - assert 'hg38' not in results['NM_018717.5:c.1515_1526del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_018717.5:c.1515_1526del']['primary_assembly_loci'].keys()) assert results['NM_018717.5:c.1515_1526del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': '4', 'ref': 'TTGCTGCTGCTGC', 'pos': '140811063', 'alt': 'T'}} - assert 'grch38' not in results['NM_018717.5:c.1515_1526del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_018717.5:c.1515_1526del']['primary_assembly_loci'].keys()) assert results['NM_018717.5:c.1515_1526del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} - assert 'NM_018717.4:c.1465_1469=' in results.keys() + assert 'NM_018717.4:c.1465_1469=' in list(results.keys()) assert results['NM_018717.4:c.1465_1469=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.4:c.1465_1469=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.4:c.1465_1469=']['alt_genomic_loci'] == [] @@ -2792,9 +2792,9 @@ def test_variant84(self): def test_variant85(self): variant = 'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_018717.5:c.1468_1479=' in results.keys() + assert 'NM_018717.5:c.1468_1479=' in list(results.keys()) assert results['NM_018717.5:c.1468_1479=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.5:c.1468_1479=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.5:c.1468_1479=']['alt_genomic_loci'] == [] @@ -2806,14 +2806,14 @@ def test_variant85(self): assert results['NM_018717.5:c.1468_1479=']['hgvs_lrg_variant'] == '' assert results['NM_018717.5:c.1468_1479=']['hgvs_transcript_variant'] == 'NM_018717.5:c.1468_1479=' assert results['NM_018717.5:c.1468_1479=']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.5:c.1468_1479=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122=', 'vcf': {'chr': 'chr4', 'ref': u'CTGCTGCTGCTG', 'pos': '140811111', 'alt': u'CTGCTGCTGCTG'}} - assert 'hg38' not in results['NM_018717.5:c.1468_1479=']['primary_assembly_loci'].keys() - assert results['NM_018717.5:c.1468_1479=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122=', 'vcf': {'chr': '4', 'ref': u'CTGCTGCTGCTG', 'pos': '140811111', 'alt': u'CTGCTGCTGCTG'}} - assert 'grch38' not in results['NM_018717.5:c.1468_1479=']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1468_1479=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122=', 'vcf': {'chr': 'chr4', 'ref': 'CTGCTGCTGCTG', 'pos': '140811111', 'alt': 'CTGCTGCTGCTG'}} + assert 'hg38' not in list(results['NM_018717.5:c.1468_1479=']['primary_assembly_loci'].keys()) + assert results['NM_018717.5:c.1468_1479=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122=', 'vcf': {'chr': '4', 'ref': 'CTGCTGCTGCTG', 'pos': '140811111', 'alt': 'CTGCTGCTGCTG'}} + assert 'grch38' not in list(results['NM_018717.5:c.1468_1479=']['primary_assembly_loci'].keys()) assert results['NM_018717.5:c.1468_1479=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} assert results['flag'] == 'gene_variant' - assert 'NM_018717.4:c.1503_1514dup' in results.keys() + assert 'NM_018717.4:c.1503_1514dup' in list(results.keys()) assert results['NM_018717.4:c.1503_1514dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.4:c.1503_1514dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.4:c.1503_1514dup']['alt_genomic_loci'] == [] @@ -2835,10 +2835,10 @@ def test_variant85(self): def test_variant86(self): variant = 'NC_000004.11:g.140811117_140811122del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_018717.5:c.1521_1526del' in results.keys() + assert 'NM_018717.5:c.1521_1526del' in list(results.keys()) assert results['NM_018717.5:c.1521_1526del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.5:c.1521_1526del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.5:c.1521_1526del']['alt_genomic_loci'] == [] @@ -2851,12 +2851,12 @@ def test_variant86(self): assert results['NM_018717.5:c.1521_1526del']['hgvs_transcript_variant'] == 'NM_018717.5:c.1521_1526del' assert results['NM_018717.5:c.1521_1526del']['hgvs_refseqgene_variant'] == '' assert results['NM_018717.5:c.1521_1526del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGC', 'pos': '140811063', 'alt': 'T'}} - assert 'hg38' not in results['NM_018717.5:c.1521_1526del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_018717.5:c.1521_1526del']['primary_assembly_loci'].keys()) assert results['NM_018717.5:c.1521_1526del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': '4', 'ref': 'TTGCTGC', 'pos': '140811063', 'alt': 'T'}} - assert 'grch38' not in results['NM_018717.5:c.1521_1526del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_018717.5:c.1521_1526del']['primary_assembly_loci'].keys()) assert results['NM_018717.5:c.1521_1526del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} - assert 'NM_018717.4:c.1509_1514dup' in results.keys() + assert 'NM_018717.4:c.1509_1514dup' in list(results.keys()) assert results['NM_018717.4:c.1509_1514dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.4:c.1509_1514dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.4:c.1509_1514dup']['alt_genomic_loci'] == [] @@ -2878,9 +2878,9 @@ def test_variant86(self): def test_variant87(self): variant = 'NC_000004.11:g.140811111_140811117del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_018717.5:c.1473_1479del' in results.keys() + assert 'NM_018717.5:c.1473_1479del' in list(results.keys()) assert results['NM_018717.5:c.1473_1479del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.5:c.1473_1479del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.5:c.1473_1479del']['alt_genomic_loci'] == [] @@ -2893,13 +2893,13 @@ def test_variant87(self): assert results['NM_018717.5:c.1473_1479del']['hgvs_transcript_variant'] == 'NM_018717.5:c.1473_1479del' assert results['NM_018717.5:c.1473_1479del']['hgvs_refseqgene_variant'] == '' assert results['NM_018717.5:c.1473_1479del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': 'chr4', 'ref': 'GCTGCTGC', 'pos': '140811110', 'alt': 'G'}} - assert 'hg38' not in results['NM_018717.5:c.1473_1479del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_018717.5:c.1473_1479del']['primary_assembly_loci'].keys()) assert results['NM_018717.5:c.1473_1479del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': '4', 'ref': 'GCTGCTGC', 'pos': '140811110', 'alt': 'G'}} - assert 'grch38' not in results['NM_018717.5:c.1473_1479del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_018717.5:c.1473_1479del']['primary_assembly_loci'].keys()) assert results['NM_018717.5:c.1473_1479del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} assert results['flag'] == 'gene_variant' - assert 'NM_018717.4:c.1468_1472dup' in results.keys() + assert 'NM_018717.4:c.1468_1472dup' in list(results.keys()) assert results['NM_018717.4:c.1468_1472dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.4:c.1468_1472dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.4:c.1468_1472dup']['alt_genomic_loci'] == [] @@ -2921,10 +2921,10 @@ def test_variant87(self): def test_variant88(self): variant = 'NC_000004.11:g.140811117C>A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' in results.keys() + assert 'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' in list(results.keys()) assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['alt_genomic_loci'] == [] @@ -2936,13 +2936,13 @@ def test_variant88(self): assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_lrg_variant'] == '' assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_transcript_variant'] == 'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '140811117', 'alt': u'A'}} - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963C>A', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '139889963', 'alt': u'A'}} - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '140811117', 'alt': u'A'}} - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963C>A', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '139889963', 'alt': u'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '140811117', 'alt': 'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963C>A', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '139889963', 'alt': 'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '140811117', 'alt': 'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963C>A', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '139889963', 'alt': 'A'}} assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4'} - assert 'NM_018717.5:c.1473G>T' in results.keys() + assert 'NM_018717.5:c.1473G>T' in list(results.keys()) assert results['NM_018717.5:c.1473G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.5:c.1473G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.5:c.1473G>T']['alt_genomic_loci'] == [] @@ -2954,19 +2954,19 @@ def test_variant88(self): assert results['NM_018717.5:c.1473G>T']['hgvs_lrg_variant'] == '' assert results['NM_018717.5:c.1473G>T']['hgvs_transcript_variant'] == 'NM_018717.5:c.1473G>T' assert results['NM_018717.5:c.1473G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.5:c.1473G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': 'chr4', 'ref': u'C', 'pos': '140811117', 'alt': u'A'}} - assert 'hg38' not in results['NM_018717.5:c.1473G>T']['primary_assembly_loci'].keys() - assert results['NM_018717.5:c.1473G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': '4', 'ref': u'C', 'pos': '140811117', 'alt': u'A'}} - assert 'grch38' not in results['NM_018717.5:c.1473G>T']['primary_assembly_loci'].keys() + assert results['NM_018717.5:c.1473G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '140811117', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_018717.5:c.1473G>T']['primary_assembly_loci'].keys()) + assert results['NM_018717.5:c.1473G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '140811117', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_018717.5:c.1473G>T']['primary_assembly_loci'].keys()) assert results['NM_018717.5:c.1473G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} def test_variant89(self): variant = 'NC_000002.11:g.73675227_73675228insCTC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_015120.4:c.1573_1579=' in results.keys() + assert 'NM_015120.4:c.1573_1579=' in list(results.keys()) assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1573_1579=' assert results['NM_015120.4:c.1573_1579=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'] == [] @@ -2979,9 +2979,9 @@ def test_variant89(self): assert results['NM_015120.4:c.1573_1579=']['hgvs_transcript_variant'] == 'NM_015120.4:c.1573_1579=' assert results['NM_015120.4:c.1573_1579=']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.67345_67351=' assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': 'chr2', 'ref': u'TCTCCTC', 'pos': '73448097', 'alt': u'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': 'chr2', 'ref': 'TCTCCTC', 'pos': '73448097', 'alt': 'TCTCCTC'}} assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': '2', 'ref': u'TCTCCTC', 'pos': '73448097', 'alt': u'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': '2', 'ref': 'TCTCCTC', 'pos': '73448097', 'alt': 'TCTCCTC'}} assert results['NM_015120.4:c.1573_1579=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} assert results['flag'] == 'gene_variant' @@ -2989,13 +2989,13 @@ def test_variant89(self): def test_variant90(self): variant = '9-136132908-T-TC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_020469.2:c.260_262=' in results.keys() + assert 'NM_020469.2:c.260_262=' in list(results.keys()) assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.260_262=' assert results['NM_020469.2:c.260_262=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.260_262=']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}] + assert results['NM_020469.2:c.260_262=']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}] assert results['NM_020469.2:c.260_262=']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' assert results['NM_020469.2:c.260_262=']['gene_symbol'] == 'ABO' assert results['NM_020469.2:c.260_262=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87=)', 'slr': 'NP_065202.2:p.(V87=)'} @@ -3004,20 +3004,20 @@ def test_variant90(self): assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == 'LRG_792:g.20145_20147=' assert results['NM_020469.2:c.260_262=']['hgvs_transcript_variant'] == 'NM_020469.2:c.260_262=' assert results['NM_020469.2:c.260_262=']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20145_20147=' - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': u'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': u'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': u'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': u'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} def test_variant91(self): variant = '9-136132908-TAC-TCA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_020469.2:c.259del' in results.keys() + assert 'NM_020469.2:c.259del' in list(results.keys()) assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.259del' assert results['NM_020469.2:c.259del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.259del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}] @@ -3029,19 +3029,19 @@ def test_variant91(self): assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == 'LRG_792:g.20144del' assert results['NM_020469.2:c.259del']['hgvs_transcript_variant'] == 'NM_020469.2:c.259del' assert results['NM_020469.2:c.259del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20144del' - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '136132909', 'alt': u'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '133257522', 'alt': u'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '136132909', 'alt': u'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '133257522', 'alt': u'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} def test_variant92(self): variant = '9-136132908-TA-TA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_020469.2:c.261del' in results.keys() + assert 'NM_020469.2:c.261del' in list(results.keys()) assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261del' assert results['NM_020469.2:c.261del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.261del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}] @@ -3064,10 +3064,10 @@ def test_variant92(self): def test_variant93(self): variant = 'NM_020469.2:c.258delG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_020469.2:c.259del' in results.keys() + assert 'NM_020469.2:c.259del' in list(results.keys()) assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.259del' assert results['NM_020469.2:c.259del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.259del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}] @@ -3079,23 +3079,23 @@ def test_variant93(self): assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == 'LRG_792:g.20144del' assert results['NM_020469.2:c.259del']['hgvs_transcript_variant'] == 'NM_020469.2:c.259del' assert results['NM_020469.2:c.259del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20144del' - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '136132909', 'alt': u'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '133257522', 'alt': u'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '136132909', 'alt': u'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '133257522', 'alt': u'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} def test_variant94(self): variant = 'NM_020469.2:c.260_262TGA=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_020469.2:c.260_262=' in results.keys() + assert 'NM_020469.2:c.260_262=' in list(results.keys()) assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.260_262=' assert results['NM_020469.2:c.260_262=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.260_262=']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': u'TCA', 'pos': '83614', 'alt': u'TCA'}}}] + assert results['NM_020469.2:c.260_262=']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}] assert results['NM_020469.2:c.260_262=']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' assert results['NM_020469.2:c.260_262=']['gene_symbol'] == 'ABO' assert results['NM_020469.2:c.260_262=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87=)', 'slr': 'NP_065202.2:p.(V87=)'} @@ -3104,19 +3104,19 @@ def test_variant94(self): assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == 'LRG_792:g.20145_20147=' assert results['NM_020469.2:c.260_262=']['hgvs_transcript_variant'] == 'NM_020469.2:c.260_262=' assert results['NM_020469.2:c.260_262=']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20145_20147=' - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': u'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': u'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': u'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': u'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} def test_variant95(self): variant = 'NM_020469.2:c.261delG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_020469.2:c.261del' in results.keys() + assert 'NM_020469.2:c.261del' in list(results.keys()) assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261del' assert results['NM_020469.2:c.261del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.261del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}] @@ -3139,10 +3139,10 @@ def test_variant95(self): def test_variant96(self): variant = 'NM_020469.2:c.261dupG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_020469.2:c.261dup' in results.keys() + assert 'NM_020469.2:c.261dup' in list(results.keys()) assert results['NM_020469.2:c.261dup']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261dup' assert results['NM_020469.2:c.261dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.261dup']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}] @@ -3154,23 +3154,23 @@ def test_variant96(self): assert results['NM_020469.2:c.261dup']['hgvs_lrg_variant'] == 'LRG_792:g.20146dup' assert results['NM_020469.2:c.261dup']['hgvs_transcript_variant'] == 'NM_020469.2:c.261dup' assert results['NM_020469.2:c.261dup']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146dup' - assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': u'TCC'}} - assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': u'TCC'}} - assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': u'TCC'}} - assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': u'TCC'}} + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TCC'}} + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TCC'}} + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TCC'}} + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TCC'}} assert results['NM_020469.2:c.261dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} def test_variant97(self): variant = 'NM_020469.2:c.261_262insTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_020469.2:c.261_262insTT' in results.keys() + assert 'NM_020469.2:c.261_262insTT' in list(results.keys()) assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261_262insTT' assert results['NM_020469.2:c.261_262insTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261_262insTT']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'T', 'pos': '83614', 'alt': u'TAA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'T', 'pos': '83614', 'alt': u'TAA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'T', 'pos': '83614', 'alt': u'TAA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'T', 'pos': '83614', 'alt': u'TAA'}}}] + assert results['NM_020469.2:c.261_262insTT']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}] assert results['NM_020469.2:c.261_262insTT']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' assert results['NM_020469.2:c.261_262insTT']['gene_symbol'] == 'ABO' assert results['NM_020469.2:c.261_262insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88LeufsTer32)', 'slr': 'NP_065202.2:p.(T88Lfs*32)'} @@ -3179,20 +3179,20 @@ def test_variant97(self): assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_variant'] == 'LRG_792:g.20146_20147insTT' assert results['NM_020469.2:c.261_262insTT']['hgvs_transcript_variant'] == 'NM_020469.2:c.261_262insTT' assert results['NM_020469.2:c.261_262insTT']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146_20147insTT' - assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': u'TAAC'}} - assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': u'TAAC'}} - assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': u'TAAC'}} - assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': u'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TAAC'}} assert results['NM_020469.2:c.261_262insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} def test_variant98(self): variant = 'NC_000019.10:g.50378563_50378564insTAC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_007121.5:c.515A>T' in results.keys() + assert 'NM_007121.5:c.515A>T' in list(results.keys()) assert results['NM_007121.5:c.515A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.515A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.515A>T']['alt_genomic_loci'] == [] @@ -3204,13 +3204,13 @@ def test_variant98(self): assert results['NM_007121.5:c.515A>T']['hgvs_lrg_variant'] == '' assert results['NM_007121.5:c.515A>T']['hgvs_transcript_variant'] == 'NM_007121.5:c.515A>T' assert results['NM_007121.5:c.515A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATAC'}} - assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATAC'}} - assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATAC'}} - assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATAC'}} + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAC'}} + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAC'}} + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAC'}} + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAC'}} assert results['NM_007121.5:c.515A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} - assert 'NM_001256647.1:c.224A>T' in results.keys() + assert 'NM_001256647.1:c.224A>T' in list(results.keys()) assert results['NM_001256647.1:c.224A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.224A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.224A>T']['alt_genomic_loci'] == [] @@ -3222,20 +3222,20 @@ def test_variant98(self): assert results['NM_001256647.1:c.224A>T']['hgvs_lrg_variant'] == '' assert results['NM_001256647.1:c.224A>T']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224A>T' assert results['NM_001256647.1:c.224A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATAC'}} - assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATAC'}} - assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATAC'}} - assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAC'}} assert results['NM_001256647.1:c.224A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} def test_variant99(self): variant = 'NC_000019.10:g.50378563_50378564insC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_007121.5:c.515_516del' in results.keys() + assert 'NM_007121.5:c.515_516del' in list(results.keys()) assert results['NM_007121.5:c.515_516del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.515_516del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.515_516del']['alt_genomic_loci'] == [] @@ -3253,7 +3253,7 @@ def test_variant99(self): assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AC'}} assert results['NM_007121.5:c.515_516del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} - assert 'NM_001256647.1:c.224_225del' in results.keys() + assert 'NM_001256647.1:c.224_225del' in list(results.keys()) assert results['NM_001256647.1:c.224_225del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.224_225del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.224_225del']['alt_genomic_loci'] == [] @@ -3275,10 +3275,10 @@ def test_variant99(self): def test_variant100(self): variant = 'NC_000019.10:g.50378564_50378565insTACA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_007121.5:c.515_516insT' in results.keys() + assert 'NM_007121.5:c.515_516insT' in list(results.keys()) assert results['NM_007121.5:c.515_516insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.515_516insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.515_516insT']['alt_genomic_loci'] == [] @@ -3290,13 +3290,13 @@ def test_variant100(self): assert results['NM_007121.5:c.515_516insT']['hgvs_lrg_variant'] == '' assert results['NM_007121.5:c.515_516insT']['hgvs_transcript_variant'] == 'NM_007121.5:c.515_516insT' assert results['NM_007121.5:c.515_516insT']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'AATAC'}} - assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'AATAC'}} - assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'AATAC'}} - assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AATAC'}} assert results['NM_007121.5:c.515_516insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} - assert 'NM_001256647.1:c.224_225insT' in results.keys() + assert 'NM_001256647.1:c.224_225insT' in list(results.keys()) assert results['NM_001256647.1:c.224_225insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.224_225insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.224_225insT']['alt_genomic_loci'] == [] @@ -3308,19 +3308,19 @@ def test_variant100(self): assert results['NM_001256647.1:c.224_225insT']['hgvs_lrg_variant'] == '' assert results['NM_001256647.1:c.224_225insT']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224_225insT' assert results['NM_001256647.1:c.224_225insT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'AATAC'}} - assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'AATAC'}} - assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'AATAC'}} - assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AATAC'}} assert results['NM_001256647.1:c.224_225insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} def test_variant101(self): variant = 'NC_000019.10:g.50378565_50378567dup' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_007121.5:c.514_520=' in results.keys() + assert 'NM_007121.5:c.514_520=' in list(results.keys()) assert results['NM_007121.5:c.514_520=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.514_520=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.514_520=']['alt_genomic_loci'] == [] @@ -3338,7 +3338,7 @@ def test_variant101(self): assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AAAC'}} assert results['NM_007121.5:c.514_520=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} - assert 'NM_001256647.1:c.223_229=' in results.keys() + assert 'NM_001256647.1:c.223_229=' in list(results.keys()) assert results['NM_001256647.1:c.223_229=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.223_229=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.223_229=']['alt_genomic_loci'] == [] @@ -3361,10 +3361,10 @@ def test_variant101(self): def test_variant102(self): variant = 'NC_000019.10:g.50378563_50378564=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_007121.5:c.519_521del' in results.keys() + assert 'NM_007121.5:c.519_521del' in list(results.keys()) assert results['NM_007121.5:c.519_521del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.519_521del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.519_521del']['alt_genomic_loci'] == [] @@ -3382,7 +3382,7 @@ def test_variant102(self): assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': '19', 'ref': 'AA', 'pos': '50378563', 'alt': 'AA'}} assert results['NM_007121.5:c.519_521del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} - assert 'NM_001256647.1:c.228_230del' in results.keys() + assert 'NM_001256647.1:c.228_230del' in list(results.keys()) assert results['NM_001256647.1:c.228_230del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.228_230del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.228_230del']['alt_genomic_loci'] == [] @@ -3404,9 +3404,9 @@ def test_variant102(self): def test_variant103(self): variant = 'NC_000019.10:g.50378563_50378564insTCGG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001256647.1:c.224_226delinsTCGG' in results.keys() + assert 'NM_001256647.1:c.224_226delinsTCGG' in list(results.keys()) assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.224_226delinsTCGG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.224_226delinsTCGG']['alt_genomic_loci'] == [] @@ -3418,14 +3418,14 @@ def test_variant103(self): assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_lrg_variant'] == '' assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224_226delinsTCGG' assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATCGG'}} - assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATCGG'}} - assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATCGG'}} - assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATCGG'}} assert results['NM_001256647.1:c.224_226delinsTCGG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} assert results['flag'] == 'gene_variant' - assert 'NM_007121.5:c.515_517delinsTCGG' in results.keys() + assert 'NM_007121.5:c.515_517delinsTCGG' in list(results.keys()) assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.515_517delinsTCGG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.515_517delinsTCGG']['alt_genomic_loci'] == [] @@ -3437,20 +3437,20 @@ def test_variant103(self): assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_lrg_variant'] == '' assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_transcript_variant'] == 'NM_007121.5:c.515_517delinsTCGG' assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATCGG'}} - assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATCGG'}} - assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': u'ATCGG'}} - assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': u'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATCGG'}} assert results['NM_007121.5:c.515_517delinsTCGG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} def test_variant104(self): variant = 'NC_000019.10:g.50378563delinsTTAC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_007121.5:c.514_515inv' in results.keys() + assert 'NM_007121.5:c.514_515inv' in list(results.keys()) assert results['NM_007121.5:c.514_515inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.514_515inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.514_515inv']['alt_genomic_loci'] == [] @@ -3468,7 +3468,7 @@ def test_variant104(self): assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'TTAC'}} assert results['NM_007121.5:c.514_515inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} - assert 'NM_001256647.1:c.223_224inv' in results.keys() + assert 'NM_001256647.1:c.223_224inv' in list(results.keys()) assert results['NM_001256647.1:c.223_224inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.223_224inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.223_224inv']['alt_genomic_loci'] == [] @@ -3490,9 +3490,9 @@ def test_variant104(self): def test_variant105(self): variant = 'NC_000019.10:g.50378563_50378564insTAAC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_007121.5:c.514_515insT' in results.keys() + assert 'NM_007121.5:c.514_515insT' in list(results.keys()) assert results['NM_007121.5:c.514_515insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.514_515insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.514_515insT']['alt_genomic_loci'] == [] @@ -3510,7 +3510,7 @@ def test_variant105(self): assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAAC'}} assert results['NM_007121.5:c.514_515insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} - assert 'NM_001256647.1:c.223_224insT' in results.keys() + assert 'NM_001256647.1:c.223_224insT' in list(results.keys()) assert results['NM_001256647.1:c.223_224insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.223_224insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.223_224insT']['alt_genomic_loci'] == [] @@ -3533,10 +3533,10 @@ def test_variant105(self): def test_variant106(self): variant = 'NC_000019.10:g.50378562_50378565del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001256647.1:c.222_228del' in results.keys() + assert 'NM_001256647.1:c.222_228del' in list(results.keys()) assert results['NM_001256647.1:c.222_228del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.222_228del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.222_228del']['alt_genomic_loci'] == [] @@ -3554,7 +3554,7 @@ def test_variant106(self): assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': '19', 'ref': 'GGAAA', 'pos': '50378561', 'alt': 'G'}} assert results['NM_001256647.1:c.222_228del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} - assert 'NM_007121.5:c.513_519del' in results.keys() + assert 'NM_007121.5:c.513_519del' in list(results.keys()) assert results['NM_007121.5:c.513_519del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.513_519del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.513_519del']['alt_genomic_loci'] == [] @@ -3576,9 +3576,9 @@ def test_variant106(self): def test_variant107(self): variant = 'NC_000019.10:g.50378562_50378565delinsTC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001256647.1:c.222_228delinsTC' in results.keys() + assert 'NM_001256647.1:c.222_228delinsTC' in list(results.keys()) assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.222_228delinsTC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.222_228delinsTC']['alt_genomic_loci'] == [] @@ -3597,7 +3597,7 @@ def test_variant107(self): assert results['NM_001256647.1:c.222_228delinsTC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} assert results['flag'] == 'gene_variant' - assert 'NM_007121.5:c.513_519delinsTC' in results.keys() + assert 'NM_007121.5:c.513_519delinsTC' in list(results.keys()) assert results['NM_007121.5:c.513_519delinsTC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.513_519delinsTC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.513_519delinsTC']['alt_genomic_loci'] == [] @@ -3619,10 +3619,10 @@ def test_variant107(self): def test_variant108(self): variant = 'NC_000007.14:g.149779575_149779577delinsT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_198455.2:c.1115_1116insT' in results.keys() + assert 'NM_198455.2:c.1115_1116insT' in list(results.keys()) assert results['NM_198455.2:c.1115_1116insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1115_1116insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1115_1116insT']['alt_genomic_loci'] == [] @@ -3644,10 +3644,10 @@ def test_variant108(self): def test_variant109(self): variant = 'NC_000007.14:g.149779575_149779577=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_198455.2:c.1116_1118=' in results.keys() + assert 'NM_198455.2:c.1116_1118=' in list(results.keys()) assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1116_1118=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'] == [] @@ -3669,10 +3669,10 @@ def test_variant109(self): def test_variant110(self): variant = 'NC_000007.14:g.149779576_149779578del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_198455.2:c.1116_1118=' in results.keys() + assert 'NM_198455.2:c.1116_1118=' in list(results.keys()) assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1116_1118=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'] == [] @@ -3694,10 +3694,10 @@ def test_variant110(self): def test_variant111(self): variant = 'NC_000007.14:g.149779577del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_198455.2:c.1115_1116dup' in results.keys() + assert 'NM_198455.2:c.1115_1116dup' in list(results.keys()) assert results['NM_198455.2:c.1115_1116dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1115_1116dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1115_1116dup']['alt_genomic_loci'] == [] @@ -3719,9 +3719,9 @@ def test_variant111(self): def test_variant112(self): variant = 'NC_000007.14:g.149779573_149779579del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_198455.2:c.1114_1117del' in results.keys() + assert 'NM_198455.2:c.1114_1117del' in list(results.keys()) assert results['NM_198455.2:c.1114_1117del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1114_1117del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1114_1117del']['alt_genomic_loci'] == [] @@ -3744,9 +3744,9 @@ def test_variant112(self): def test_variant113(self): variant = 'NC_000007.14:g.149779573_149779579delinsCA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_198455.2:c.1114_1117delinsCA' in results.keys() + assert 'NM_198455.2:c.1114_1117delinsCA' in list(results.keys()) assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1114_1117delinsCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1114_1117delinsCA']['alt_genomic_loci'] == [] @@ -3769,9 +3769,9 @@ def test_variant113(self): def test_variant114(self): variant = 'NM_000088.3:c.590_591inv' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000088.3:c.590_591inv' in results.keys() + assert 'NM_000088.3:c.590_591inv' in list(results.keys()) assert results['NM_000088.3:c.590_591inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590_591inv' assert results['NM_000088.3:c.590_591inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.590_591inv']['alt_genomic_loci'] == [] @@ -3794,9 +3794,9 @@ def test_variant114(self): def test_variant115(self): variant = 'NM_024989.3:c.1778_1779inv' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_024989.3:c.1778_1779inv' in results.keys() + assert 'NM_024989.3:c.1778_1779inv' in list(results.keys()) assert results['NM_024989.3:c.1778_1779inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024989.3:c.1778_1779inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024989.3:c.1778_1779inv']['alt_genomic_loci'] == [] @@ -3819,10 +3819,10 @@ def test_variant115(self): def test_variant116(self): variant = 'NM_032815.3:c.555_556inv' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032815.3:c.555_556inv' in results.keys() + assert 'NM_032815.3:c.555_556inv' in list(results.keys()) assert results['NM_032815.3:c.555_556inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_032815.3:c.555_556inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032815.3:c.555_556inv']['alt_genomic_loci'] == [] @@ -3843,9 +3843,9 @@ def test_variant116(self): def test_variant117(self): variant = 'NM_006138.4:c.3_4inv' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_006138.4:c.3_4inv' in results.keys() + assert 'NM_006138.4:c.3_4inv' in list(results.keys()) assert results['NM_006138.4:c.3_4inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006138.4:c.3_4inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006138.4:c.3_4inv']['alt_genomic_loci'] == [] @@ -3867,10 +3867,10 @@ def test_variant117(self): def test_variant118(self): variant = 'NM_000038.5:c.3927_3928delAAinsTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000038.5:c.3927_3928inv' in results.keys() + assert 'NM_000038.5:c.3927_3928inv' in list(results.keys()) assert results['NM_000038.5:c.3927_3928inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000038.5:c.3927_3928inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000038.5:c.3927_3928inv']['alt_genomic_loci'] == [] @@ -3892,10 +3892,10 @@ def test_variant118(self): def test_variant119(self): variant = 'NM_001034853.1:c.2847_2848delAGinsCT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001034853.1:c.2847_2848inv' in results.keys() + assert 'NM_001034853.1:c.2847_2848inv' in list(results.keys()) assert results['NM_001034853.1:c.2847_2848inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001034853.1:c.2847_2848inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001034853.1:c.2847_2848inv']['alt_genomic_loci'] == [] @@ -3917,9 +3917,9 @@ def test_variant119(self): def test_variant120(self): variant = 'NM_000088.3:c.4392_*2inv' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000088.3:c.4394_4395inv' in results.keys() + assert 'NM_000088.3:c.4394_4395inv' in list(results.keys()) assert results['NM_000088.3:c.4394_4395inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4394_4395inv' assert results['NM_000088.3:c.4394_4395inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.4394_4395inv']['alt_genomic_loci'] == [] @@ -3942,10 +3942,10 @@ def test_variant120(self): def test_variant121(self): variant = 'NM_000088.3:c.4392_*5inv' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.4392_*5inv' in results.keys() + assert 'NM_000088.3:c.4392_*5inv' in list(results.keys()) assert results['NM_000088.3:c.4392_*5inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4393_*4inv' assert results['NM_000088.3:c.4392_*5inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.4392_*5inv']['alt_genomic_loci'] == [] @@ -3967,10 +3967,10 @@ def test_variant121(self): def test_variant122(self): variant = 'NM_000088.3:c.4390_*7inv' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.4390_*7inv' in results.keys() + assert 'NM_000088.3:c.4390_*7inv' in list(results.keys()) assert results['NM_000088.3:c.4390_*7inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4390_*7inv' assert results['NM_000088.3:c.4390_*7inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.4390_*7inv']['alt_genomic_loci'] == [] @@ -3992,9 +3992,9 @@ def test_variant122(self): def test_variant123(self): variant = 'NM_005732.3:c.2923-5insT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4006,10 +4006,10 @@ def test_variant123(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -4017,9 +4017,9 @@ def test_variant123(self): def test_variant124(self): variant = 'NM_198283.1(EYS):c.*743120C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4031,10 +4031,10 @@ def test_variant124(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -4042,9 +4042,9 @@ def test_variant124(self): def test_variant125(self): variant = 'NM_133379.4(TTN):c.*265+26591C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4056,10 +4056,10 @@ def test_variant125(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -4067,10 +4067,10 @@ def test_variant125(self): def test_variant126(self): variant = 'NM_000088.3:c.589-2_589-1AG>G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-2_589-1delinsG' in results.keys() + assert 'NM_000088.3:c.589-2_589-1delinsG' in list(results.keys()) assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' assert results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'] == [] @@ -4092,10 +4092,10 @@ def test_variant126(self): def test_variant127(self): variant = 'NM_000088.3:c.642+1_642+2delGTinsG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.642+1_642+2delinsG' in results.keys() + assert 'NM_000088.3:c.642+1_642+2delinsG' in list(results.keys()) assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' assert results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'] == [] @@ -4117,9 +4117,9 @@ def test_variant127(self): def test_variant128(self): variant = 'NM_004415.3:c.1-1insA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4131,10 +4131,10 @@ def test_variant128(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -4142,10 +4142,10 @@ def test_variant128(self): def test_variant129(self): variant = 'NM_004415.3:c.-1_1insA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_004415.3:c.-1_1insA' in results.keys() + assert 'NM_004415.3:c.-1_1insA' in list(results.keys()) assert results['NM_004415.3:c.-1_1insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004415.3:c.-1_1insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004415.3:c.-1_1insA']['alt_genomic_loci'] == [] @@ -4167,9 +4167,9 @@ def test_variant129(self): def test_variant130(self): variant = 'NM_000273.2:c.1-5028_253del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4181,10 +4181,10 @@ def test_variant130(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -4192,10 +4192,10 @@ def test_variant130(self): def test_variant131(self): variant = 'NM_002929.2:c.1006C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_002929.2:c.1006C>T' in results.keys() + assert 'NM_002929.2:c.1006C>T' in list(results.keys()) assert results['NM_002929.2:c.1006C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_002929.2:c.1006C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002929.2:c.1006C>T']['alt_genomic_loci'] == [] @@ -4207,20 +4207,20 @@ def test_variant131(self): assert results['NM_002929.2:c.1006C>T']['hgvs_lrg_variant'] == '' assert results['NM_002929.2:c.1006C>T']['hgvs_transcript_variant'] == 'NM_002929.2:c.1006C>T' assert results['NM_002929.2:c.1006C>T']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['NM_002929.2:c.1006C>T']['primary_assembly_loci'].keys() - assert results['NM_002929.2:c.1006C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000013.11:g.113723094C>T', 'vcf': {'chr': 'chr13', 'ref': u'C', 'pos': '113723094', 'alt': u'T'}} - assert 'grch37' not in results['NM_002929.2:c.1006C>T']['primary_assembly_loci'].keys() - assert results['NM_002929.2:c.1006C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000013.11:g.113723094C>T', 'vcf': {'chr': '13', 'ref': u'C', 'pos': '113723094', 'alt': u'T'}} + assert 'hg19' not in list(results['NM_002929.2:c.1006C>T']['primary_assembly_loci'].keys()) + assert results['NM_002929.2:c.1006C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000013.11:g.113723094C>T', 'vcf': {'chr': 'chr13', 'ref': 'C', 'pos': '113723094', 'alt': 'T'}} + assert 'grch37' not in list(results['NM_002929.2:c.1006C>T']['primary_assembly_loci'].keys()) + assert results['NM_002929.2:c.1006C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000013.11:g.113723094C>T', 'vcf': {'chr': '13', 'ref': 'C', 'pos': '113723094', 'alt': 'T'}} assert results['NM_002929.2:c.1006C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002920.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002929.2'} def test_variant132(self): variant = 'NR_125367.1:n.167+18165G>A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NR_125367.1:n.167+18165G>A' in results.keys() + assert 'NR_125367.1:n.167+18165G>A' in list(results.keys()) assert results['NR_125367.1:n.167+18165G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_125367.1:n.167+18165G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_125367.1:n.167+18165G>A']['alt_genomic_loci'] == [] @@ -4242,9 +4242,9 @@ def test_variant132(self): def test_variant133(self): variant = 'NM_006005.3:c.3071_3073delinsTTA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4256,10 +4256,10 @@ def test_variant133(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -4267,9 +4267,9 @@ def test_variant133(self): def test_variant134(self): variant = 'NM_000089.3:n.1504_1506del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4281,10 +4281,10 @@ def test_variant134(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -4292,9 +4292,9 @@ def test_variant134(self): def test_variant135(self): variant = 'NC_012920.1:m.1011C>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4317,10 +4317,10 @@ def test_variant135(self): def test_variant136(self): variant = 'NC_000006.11:g.90403795G=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_014611.1:c.9879T>C' in results.keys() + assert 'NM_014611.1:c.9879T>C' in list(results.keys()) assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.1:c.9879T>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014611.1:c.9879T>C']['alt_genomic_loci'] == [] @@ -4333,12 +4333,12 @@ def test_variant136(self): assert results['NM_014611.1:c.9879T>C']['hgvs_transcript_variant'] == 'NM_014611.1:c.9879T>C' assert results['NM_014611.1:c.9879T>C']['hgvs_refseqgene_variant'] == '' assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} - assert 'hg38' not in results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys()) assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} - assert 'grch38' not in results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys()) assert results['NM_014611.1:c.9879T>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1'} - assert 'NM_014611.2:c.9879C=' in results.keys() + assert 'NM_014611.2:c.9879C=' in list(results.keys()) assert results['NM_014611.2:c.9879C=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.2:c.9879C=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014611.2:c.9879C=']['alt_genomic_loci'] == [] @@ -4350,19 +4350,19 @@ def test_variant136(self): assert results['NM_014611.2:c.9879C=']['hgvs_lrg_variant'] == '' assert results['NM_014611.2:c.9879C=']['hgvs_transcript_variant'] == 'NM_014611.2:c.9879C=' assert results['NM_014611.2:c.9879C=']['hgvs_refseqgene_variant'] == '' - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '90403795', 'alt': u'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '89694076', 'alt': u'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '90403795', 'alt': u'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '89694076', 'alt': u'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '89694076', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '89694076', 'alt': 'G'}} assert results['NM_014611.2:c.9879C=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2'} def test_variant137(self): variant = '1-169519049-T-.' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000130.4:c.1602del' in results.keys() + assert 'NM_000130.4:c.1602del' in list(results.keys()) assert results['NM_000130.4:c.1602del']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601del' assert results['NM_000130.4:c.1602del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000130.4:c.1602del']['alt_genomic_loci'] == [] @@ -4381,7 +4381,7 @@ def test_variant137(self): assert results['NM_000130.4:c.1602del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_000130.4:c.1601G>A' in results.keys() + assert 'NM_000130.4:c.1601G>A' in list(results.keys()) assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601G>A' assert results['NM_000130.4:c.1601G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000130.4:c.1601G>A']['alt_genomic_loci'] == [] @@ -4394,18 +4394,18 @@ def test_variant137(self): assert results['NM_000130.4:c.1601G>A']['hgvs_transcript_variant'] == 'NM_000130.4:c.1601G>A' assert results['NM_000130.4:c.1601G>A']['hgvs_refseqgene_variant'] == 'NG_011806.1:g.41721G>A' assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '169549811', 'alt': u'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '169549811', 'alt': 'T'}} assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '169549811', 'alt': u'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '169549811', 'alt': 'T'}} assert results['NM_000130.4:c.1601G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} def test_variant138(self): variant = 'NC_000005.9:g.35058667_35058668AG=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001204317.1:c.856-9155_856-9154=' in results.keys() + assert 'NM_001204317.1:c.856-9155_856-9154=' in list(results.keys()) assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204317.1:c.856-9155_856-9154=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204317.1:c.856-9155_856-9154=']['alt_genomic_loci'] == [] @@ -4423,7 +4423,7 @@ def test_variant138(self): assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} assert results['NM_001204317.1:c.856-9155_856-9154=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1'} - assert 'NM_001204316.1:c.1009+7383_1009+7384=' in results.keys() + assert 'NM_001204316.1:c.1009+7383_1009+7384=' in list(results.keys()) assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204316.1:c.1009+7383_1009+7384=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204316.1:c.1009+7383_1009+7384=']['alt_genomic_loci'] == [] @@ -4441,7 +4441,7 @@ def test_variant138(self): assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058565_35058566=', 'vcf': {'chr': '5', 'ref': 'AT', 'pos': '35058565', 'alt': 'AT'}} assert results['NM_001204316.1:c.1009+7383_1009+7384=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1'} - assert 'NM_001204314.2:c.*6528del' in results.keys() + assert 'NM_001204314.2:c.*6528del' in list(results.keys()) assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204314.2:c.*6528del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204314.2:c.*6528del']['alt_genomic_loci'] == [] @@ -4459,7 +4459,7 @@ def test_variant138(self): assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} assert results['NM_001204314.2:c.*6528del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2'} - assert 'NM_001204318.1:c.686-9155_686-9154=' in results.keys() + assert 'NM_001204318.1:c.686-9155_686-9154=' in list(results.keys()) assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204318.1:c.686-9155_686-9154=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204318.1:c.686-9155_686-9154=']['alt_genomic_loci'] == [] @@ -4477,7 +4477,7 @@ def test_variant138(self): assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} assert results['NM_001204318.1:c.686-9155_686-9154=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1'} - assert 'NR_037910.1:n.828-9155_828-9154=' in results.keys() + assert 'NR_037910.1:n.828-9155_828-9154=' in list(results.keys()) assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037910.1:n.828-9155_828-9154=']['refseqgene_context_intronic_sequence'] == '' assert results['NR_037910.1:n.828-9155_828-9154=']['alt_genomic_loci'] == [] @@ -4496,7 +4496,7 @@ def test_variant138(self): assert results['NR_037910.1:n.828-9155_828-9154=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1'} assert results['flag'] == 'gene_variant' - assert 'NM_000949.5:c.*6523_*6524=' in results.keys() + assert 'NM_000949.5:c.*6523_*6524=' in list(results.keys()) assert results['NM_000949.5:c.*6523_*6524=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000949.5:c.*6523_*6524=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000949.5:c.*6523_*6524=']['alt_genomic_loci'] == [] @@ -4508,13 +4508,13 @@ def test_variant138(self): assert results['NM_000949.5:c.*6523_*6524=']['hgvs_lrg_variant'] == '' assert results['NM_000949.5:c.*6523_*6524=']['hgvs_transcript_variant'] == 'NM_000949.5:c.*6523_*6524=' assert results['NM_000949.5:c.*6523_*6524=']['hgvs_refseqgene_variant'] == 'NG_029042.1:g.177156_177157=' - assert results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', 'vcf': {'chr': 'chr5', 'ref': u'AAGA', 'pos': '35058666', 'alt': u'AAGA'}} - assert 'hg38' not in results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci'].keys() - assert results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', 'vcf': {'chr': '5', 'ref': u'AAGA', 'pos': '35058666', 'alt': u'AAGA'}} - assert 'grch38' not in results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci'].keys() + assert results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', 'vcf': {'chr': 'chr5', 'ref': 'AAGA', 'pos': '35058666', 'alt': 'AAGA'}} + assert 'hg38' not in list(results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci'].keys()) + assert results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', 'vcf': {'chr': '5', 'ref': 'AAGA', 'pos': '35058666', 'alt': 'AAGA'}} + assert 'grch38' not in list(results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci'].keys()) assert results['NM_000949.5:c.*6523_*6524=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029042.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5'} - assert 'NM_001204314.1:c.*6523_*6524=' in results.keys() + assert 'NM_001204314.1:c.*6523_*6524=' in list(results.keys()) assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204314.1:c.*6523_*6524=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204314.1:c.*6523_*6524=']['alt_genomic_loci'] == [] @@ -4526,13 +4526,13 @@ def test_variant138(self): assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_lrg_variant'] == '' assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_transcript_variant'] == 'NM_001204314.1:c.*6523_*6524=' assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': u'AG', 'pos': '35058667', 'alt': u'AG'}} - assert 'hg38' not in results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci'].keys() - assert results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': u'AG', 'pos': '35058667', 'alt': u'AG'}} - assert 'grch38' not in results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci'].keys() + assert results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert 'hg38' not in list(results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci'].keys()) + assert results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert 'grch38' not in list(results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci'].keys()) assert results['NM_001204314.1:c.*6523_*6524=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1'} - assert 'NM_000949.6:c.*6528del' in results.keys() + assert 'NM_000949.6:c.*6528del' in list(results.keys()) assert results['NM_000949.6:c.*6528del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000949.6:c.*6528del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000949.6:c.*6528del']['alt_genomic_loci'] == [] @@ -4554,9 +4554,9 @@ def test_variant138(self): def test_variant139(self): variant = 'NM_000251.1:c.1296_1348del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4568,10 +4568,10 @@ def test_variant139(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -4579,10 +4579,10 @@ def test_variant139(self): def test_variant140(self): variant = 'NM_000088.3:c.2023_2028del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.2024_2028+1del' in results.keys() + assert 'NM_000088.3:c.2024_2028+1del' in list(results.keys()) assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'] == [] @@ -4604,10 +4604,10 @@ def test_variant140(self): def test_variant141(self): variant = 'NM_000088.3:c.2024_2028+1del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.2024_2028+1del' in results.keys() + assert 'NM_000088.3:c.2024_2028+1del' in list(results.keys()) assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'] == [] @@ -4629,9 +4629,9 @@ def test_variant141(self): def test_variant142(self): variant = 'ENST00000450616.1:n.31+1G>C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4643,10 +4643,10 @@ def test_variant142(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -4654,9 +4654,9 @@ def test_variant142(self): def test_variant143(self): variant = 'ENST00000491747:c.5071A>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4668,10 +4668,10 @@ def test_variant143(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -4679,10 +4679,10 @@ def test_variant143(self): def test_variant144(self): variant = 'NM_000088.3:c.589G>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589G>T' in results.keys() + assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] @@ -4694,20 +4694,20 @@ def test_variant144(self): assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant145(self): variant = 'NG_007400.1:g.8638G>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589G>T' in results.keys() + assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] @@ -4719,20 +4719,20 @@ def test_variant145(self): assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant146(self): variant = 'LRG_1:g.8638G>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589G>T' in results.keys() + assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] @@ -4744,20 +4744,20 @@ def test_variant146(self): assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant147(self): variant = 'LRG_1t1:c.589G>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589G>T' in results.keys() + assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] @@ -4769,19 +4769,19 @@ def test_variant147(self): assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant148(self): variant = 'chr16:g.15832508_15832509delinsAC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_002474.2:c.3034_3035inv' in results.keys() + assert 'NM_002474.2:c.3034_3035inv' in list(results.keys()) assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] @@ -4799,7 +4799,7 @@ def test_variant148(self): assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} assert results['NM_002474.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2'} - assert 'NM_022844.2:c.3034_3035inv' in results.keys() + assert 'NM_022844.2:c.3034_3035inv' in list(results.keys()) assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_022844.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] @@ -4817,7 +4817,7 @@ def test_variant148(self): assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} assert results['NM_022844.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2'} - assert 'NM_001040114.1:c.3055_3056inv' in results.keys() + assert 'NM_001040114.1:c.3055_3056inv' in list(results.keys()) assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040114.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] @@ -4836,7 +4836,7 @@ def test_variant148(self): assert results['NM_001040114.1:c.3055_3056inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001040113.1:c.3055_3056inv' in results.keys() + assert 'NM_001040113.1:c.3055_3056inv' in list(results.keys()) assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] @@ -4858,9 +4858,9 @@ def test_variant148(self): def test_variant149(self): variant = 'NG_012386.1:g.24048dupG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001162427.1:c.210+1615dup' in results.keys() + assert 'NM_001162427.1:c.210+1615dup' in list(results.keys()) assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162427.1:c.210+1615dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'] == [] @@ -4878,7 +4878,7 @@ def test_variant149(self): assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_001162427.1:c.210+1615dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1'} - assert 'NM_001162426.1:c.363+1dup' in results.keys() + assert 'NM_001162426.1:c.363+1dup' in list(results.keys()) assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162426.1:c.363+1dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'] == [] @@ -4897,7 +4897,7 @@ def test_variant149(self): assert results['NM_001162426.1:c.363+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001362177.1:c.-1+1dup' in results.keys() + assert 'NM_001362177.1:c.-1+1dup' in list(results.keys()) assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001362177.1:c.-1+1dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'] == [] @@ -4915,7 +4915,7 @@ def test_variant149(self): assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_001362177.1:c.-1+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1'} - assert 'NM_000368.4:c.363+1dup' in results.keys() + assert 'NM_000368.4:c.363+1dup' in list(results.keys()) assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] @@ -4937,10 +4937,10 @@ def test_variant149(self): def test_variant150(self): variant = 'NM_033517.1:c.1307_1309delCGA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_033517.1:c.1307_1309del' in results.keys() + assert 'NM_033517.1:c.1307_1309del' in list(results.keys()) assert results['NM_033517.1:c.1307_1309del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_033517.1:c.1307_1309del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_033517.1:c.1307_1309del']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'HG1311_PATCH', 'ref': 'CCGA', 'pos': '33720', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'NW_015148969.1', 'ref': 'CCGA', 'pos': '33720', 'alt': 'C'}}}] @@ -4952,19 +4952,19 @@ def test_variant150(self): assert results['NM_033517.1:c.1307_1309del']['hgvs_lrg_variant'] == '' assert results['NM_033517.1:c.1307_1309del']['hgvs_transcript_variant'] == 'NM_033517.1:c.1307_1309del' assert results['NM_033517.1:c.1307_1309del']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys() - assert 'hg38' not in results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys() - assert 'grch37' not in results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys() - assert 'grch38' not in results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys()) assert results['NM_033517.1:c.1307_1309del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_277052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033517.1'} def test_variant151(self): variant = 'HG1311_PATCH-33720-CCGA-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -4976,10 +4976,10 @@ def test_variant151(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -4987,9 +4987,9 @@ def test_variant151(self): def test_variant152(self): variant = '2-73675227-TCTC-TCTCCTC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_015120.4:c.1573_1579=' in results.keys() + assert 'NM_015120.4:c.1573_1579=' in list(results.keys()) assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1573_1579=' assert results['NM_015120.4:c.1573_1579=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'] == [] @@ -5002,9 +5002,9 @@ def test_variant152(self): assert results['NM_015120.4:c.1573_1579=']['hgvs_transcript_variant'] == 'NM_015120.4:c.1573_1579=' assert results['NM_015120.4:c.1573_1579=']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.67345_67351=' assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': 'chr2', 'ref': u'TCTCCTC', 'pos': '73448097', 'alt': u'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': 'chr2', 'ref': 'TCTCCTC', 'pos': '73448097', 'alt': 'TCTCCTC'}} assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': '2', 'ref': u'TCTCCTC', 'pos': '73448097', 'alt': u'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': '2', 'ref': 'TCTCCTC', 'pos': '73448097', 'alt': 'TCTCCTC'}} assert results['NM_015120.4:c.1573_1579=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} assert results['flag'] == 'gene_variant' @@ -5012,9 +5012,9 @@ def test_variant152(self): def test_variant153(self): variant = '2-73675227-TC-TC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_015120.4:c.1577_1579del' in results.keys() + assert 'NM_015120.4:c.1577_1579del' in list(results.keys()) assert results['NM_015120.4:c.1577_1579del']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1577_1579del' assert results['NM_015120.4:c.1577_1579del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.1577_1579del']['alt_genomic_loci'] == [] @@ -5037,9 +5037,9 @@ def test_variant153(self): def test_variant154(self): variant = '3-14561627-AG-AGG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001080423.3:c.1016_1020=' in results.keys() + assert 'NM_001080423.3:c.1016_1020=' in list(results.keys()) assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.3:c.1016_1020=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'] == [] @@ -5052,13 +5052,13 @@ def test_variant154(self): assert results['NM_001080423.3:c.1016_1020=']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1016_1020=' assert results['NM_001080423.3:c.1016_1020=']['hgvs_refseqgene_variant'] == '' assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': 'chr3', 'ref': u'GGGCC', 'pos': '14520120', 'alt': u'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': 'chr3', 'ref': 'GGGCC', 'pos': '14520120', 'alt': 'GGGCC'}} assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': '3', 'ref': u'GGGCC', 'pos': '14520120', 'alt': u'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': '3', 'ref': 'GGGCC', 'pos': '14520120', 'alt': 'GGGCC'}} assert results['NM_001080423.3:c.1016_1020=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} assert results['flag'] == 'gene_variant' - assert 'NM_001080423.2:c.1307_1311=' in results.keys() + assert 'NM_001080423.2:c.1307_1311=' in list(results.keys()) assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.2:c.1307_1311=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'] == [] @@ -5071,18 +5071,18 @@ def test_variant154(self): assert results['NM_001080423.2:c.1307_1311=']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1307_1311=' assert results['NM_001080423.2:c.1307_1311=']['hgvs_refseqgene_variant'] == '' assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert 'hg38' not in results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys()) assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert 'grch38' not in results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys()) assert results['NM_001080423.2:c.1307_1311=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} def test_variant155(self): variant = '3-14561630-CC-CC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001080423.3:c.1020del' in results.keys() + assert 'NM_001080423.3:c.1020del' in list(results.keys()) assert results['NM_001080423.3:c.1020del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.3:c.1020del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.3:c.1020del']['alt_genomic_loci'] == [] @@ -5101,7 +5101,7 @@ def test_variant155(self): assert results['NM_001080423.3:c.1020del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} assert results['flag'] == 'gene_variant' - assert 'NM_001080423.2:c.1311del' in results.keys() + assert 'NM_001080423.2:c.1311del' in list(results.keys()) assert results['NM_001080423.2:c.1311del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.2:c.1311del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.2:c.1311del']['alt_genomic_loci'] == [] @@ -5114,19 +5114,19 @@ def test_variant155(self): assert results['NM_001080423.2:c.1311del']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1311del' assert results['NM_001080423.2:c.1311del']['hgvs_refseqgene_variant'] == '' assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} - assert 'hg38' not in results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys()) assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} - assert 'grch38' not in results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys()) assert results['NM_001080423.2:c.1311del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} def test_variant156(self): variant = '6-90403795-G-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_014611.1:c.9879T>C' in results.keys() + assert 'NM_014611.1:c.9879T>C' in list(results.keys()) assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.1:c.9879T>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014611.1:c.9879T>C']['alt_genomic_loci'] == [] @@ -5139,12 +5139,12 @@ def test_variant156(self): assert results['NM_014611.1:c.9879T>C']['hgvs_transcript_variant'] == 'NM_014611.1:c.9879T>C' assert results['NM_014611.1:c.9879T>C']['hgvs_refseqgene_variant'] == '' assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} - assert 'hg38' not in results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys()) assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} - assert 'grch38' not in results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys()) assert results['NM_014611.1:c.9879T>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1'} - assert 'NM_014611.2:c.9879C=' in results.keys() + assert 'NM_014611.2:c.9879C=' in list(results.keys()) assert results['NM_014611.2:c.9879C=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.2:c.9879C=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014611.2:c.9879C=']['alt_genomic_loci'] == [] @@ -5156,19 +5156,19 @@ def test_variant156(self): assert results['NM_014611.2:c.9879C=']['hgvs_lrg_variant'] == '' assert results['NM_014611.2:c.9879C=']['hgvs_transcript_variant'] == 'NM_014611.2:c.9879C=' assert results['NM_014611.2:c.9879C=']['hgvs_refseqgene_variant'] == '' - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '90403795', 'alt': u'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '89694076', 'alt': u'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '90403795', 'alt': u'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '89694076', 'alt': u'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '89694076', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '89694076', 'alt': 'G'}} assert results['NM_014611.2:c.9879C=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2'} def test_variant157(self): variant = '6-90403795-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_014611.2:c.9879C>T' in results.keys() + assert 'NM_014611.2:c.9879C>T' in list(results.keys()) assert results['NM_014611.2:c.9879C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.2:c.9879C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014611.2:c.9879C>T']['alt_genomic_loci'] == [] @@ -5180,14 +5180,14 @@ def test_variant157(self): assert results['NM_014611.2:c.9879C>T']['hgvs_lrg_variant'] == '' assert results['NM_014611.2:c.9879C>T']['hgvs_transcript_variant'] == 'NM_014611.2:c.9879C>T' assert results['NM_014611.2:c.9879C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '90403795', 'alt': u'A'}} - assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G>A', 'vcf': {'chr': 'chr6', 'ref': u'G', 'pos': '89694076', 'alt': u'A'}} - assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '90403795', 'alt': u'A'}} - assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G>A', 'vcf': {'chr': '6', 'ref': u'G', 'pos': '89694076', 'alt': u'A'}} + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'A'}} + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G>A', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '89694076', 'alt': 'A'}} + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'A'}} + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G>A', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '89694076', 'alt': 'A'}} assert results['NM_014611.2:c.9879C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2'} assert results['flag'] == 'gene_variant' - assert 'NM_014611.1:c.9879T=' in results.keys() + assert 'NM_014611.1:c.9879T=' in list(results.keys()) assert results['NM_014611.1:c.9879T=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.1:c.9879T=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014611.1:c.9879T=']['alt_genomic_loci'] == [] @@ -5199,20 +5199,20 @@ def test_variant157(self): assert results['NM_014611.1:c.9879T=']['hgvs_lrg_variant'] == '' assert results['NM_014611.1:c.9879T=']['hgvs_transcript_variant'] == 'NM_014611.1:c.9879T=' assert results['NM_014611.1:c.9879T=']['hgvs_refseqgene_variant'] == '' - assert results['NM_014611.1:c.9879T=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': u'A'}} - assert 'hg38' not in results['NM_014611.1:c.9879T=']['primary_assembly_loci'].keys() - assert results['NM_014611.1:c.9879T=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': u'A'}} - assert 'grch38' not in results['NM_014611.1:c.9879T=']['primary_assembly_loci'].keys() + assert results['NM_014611.1:c.9879T=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_014611.1:c.9879T=']['primary_assembly_loci'].keys()) + assert results['NM_014611.1:c.9879T=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_014611.1:c.9879T=']['primary_assembly_loci'].keys()) assert results['NM_014611.1:c.9879T=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1'} def test_variant158(self): variant = '6-32012992-CG-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032470.3:c.4del' in results.keys() + assert 'NM_032470.3:c.4del' in list(results.keys()) assert results['NM_032470.3:c.4del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_032470.3:c.4del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032470.3:c.4del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}] @@ -5230,7 +5230,7 @@ def test_variant158(self): assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} assert results['NM_032470.3:c.4del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_115859.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032470.3'} - assert 'NM_001365276.1:c.10717del' in results.keys() + assert 'NM_001365276.1:c.10717del' in list(results.keys()) assert results['NM_001365276.1:c.10717del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001365276.1:c.10717del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001365276.1:c.10717del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}] @@ -5243,12 +5243,12 @@ def test_variant158(self): assert results['NM_001365276.1:c.10717del']['hgvs_transcript_variant'] == 'NM_001365276.1:c.10717del' assert results['NM_001365276.1:c.10717del']['hgvs_refseqgene_variant'] == '' assert results['NM_001365276.1:c.10717del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert 'hg38' not in results['NM_001365276.1:c.10717del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001365276.1:c.10717del']['primary_assembly_loci'].keys()) assert results['NM_001365276.1:c.10717del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert 'grch38' not in results['NM_001365276.1:c.10717del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001365276.1:c.10717del']['primary_assembly_loci'].keys()) assert results['NM_001365276.1:c.10717del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001352205.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001365276.1'} - assert 'NM_019105.7:c.10711del' in results.keys() + assert 'NM_019105.7:c.10711del' in list(results.keys()) assert results['NM_019105.7:c.10711del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_019105.7:c.10711del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_019105.7:c.10711del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}] @@ -5261,12 +5261,12 @@ def test_variant158(self): assert results['NM_019105.7:c.10711del']['hgvs_transcript_variant'] == 'NM_019105.7:c.10711del' assert results['NM_019105.7:c.10711del']['hgvs_refseqgene_variant'] == '' assert results['NM_019105.7:c.10711del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert 'hg38' not in results['NM_019105.7:c.10711del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_019105.7:c.10711del']['primary_assembly_loci'].keys()) assert results['NM_019105.7:c.10711del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert 'grch38' not in results['NM_019105.7:c.10711del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_019105.7:c.10711del']['primary_assembly_loci'].keys()) assert results['NM_019105.7:c.10711del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061978.6', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_019105.7'} - assert 'NM_019105.6:c.10711del' in results.keys() + assert 'NM_019105.6:c.10711del' in list(results.keys()) assert results['NM_019105.6:c.10711del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_019105.6:c.10711del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_019105.6:c.10711del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'AG', 'pos': '3271858', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'AG', 'pos': '3271858', 'alt': 'A'}}}] @@ -5288,10 +5288,10 @@ def test_variant158(self): def test_variant159(self): variant = '17-48275363-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589G>T' in results.keys() + assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] @@ -5303,20 +5303,20 @@ def test_variant159(self): assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275363', 'alt': u'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198002', 'alt': u'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant160(self): variant = '17-48275364-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-1G>T' in results.keys() + assert 'NM_000088.3:c.589-1G>T' in list(results.keys()) assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'] == [] @@ -5328,19 +5328,19 @@ def test_variant160(self): assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' assert results['NM_000088.3:c.589-1G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8637G>T' - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '48275364', 'alt': u'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '50198003', 'alt': u'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant161(self): variant = '17-48275359-GGA-TCC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000088.3:c.591_593inv' in results.keys() + assert 'NM_000088.3:c.591_593inv' in list(results.keys()) assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.591_593inv' assert results['NM_000088.3:c.591_593inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.591_593inv']['alt_genomic_loci'] == [] @@ -5363,10 +5363,10 @@ def test_variant161(self): def test_variant162(self): variant = '7-94039128-CTTG-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000089.3:c.1035_1035+2del' in results.keys() + assert 'NM_000089.3:c.1035_1035+2del' in list(results.keys()) assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'] == [] @@ -5388,9 +5388,9 @@ def test_variant162(self): def test_variant163(self): variant = '9-135800972-AC-ACC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001162427.1:c.210+1615dup' in results.keys() + assert 'NM_001162427.1:c.210+1615dup' in list(results.keys()) assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162427.1:c.210+1615dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'] == [] @@ -5408,7 +5408,7 @@ def test_variant163(self): assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_001162427.1:c.210+1615dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1'} - assert 'NM_001162426.1:c.363+1dup' in results.keys() + assert 'NM_001162426.1:c.363+1dup' in list(results.keys()) assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162426.1:c.363+1dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'] == [] @@ -5427,7 +5427,7 @@ def test_variant163(self): assert results['NM_001162426.1:c.363+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001362177.1:c.-1+1dup' in results.keys() + assert 'NM_001362177.1:c.-1+1dup' in list(results.keys()) assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001362177.1:c.-1+1dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'] == [] @@ -5445,7 +5445,7 @@ def test_variant163(self): assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_001362177.1:c.-1+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1'} - assert 'NM_000368.4:c.363+1dup' in results.keys() + assert 'NM_000368.4:c.363+1dup' in list(results.keys()) assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] @@ -5467,10 +5467,10 @@ def test_variant163(self): def test_variant164(self): variant = '1-43212925-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001243246.1:c.2073G>A' in results.keys() + assert 'NM_001243246.1:c.2073G>A' in list(results.keys()) assert results['NM_001243246.1:c.2073G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t3:c.2073G>A' assert results['NM_001243246.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001243246.1:c.2073G>A']['alt_genomic_loci'] == [] @@ -5482,13 +5482,13 @@ def test_variant164(self): assert results['NM_001243246.1:c.2073G>A']['hgvs_lrg_variant'] == '' assert results['NM_001243246.1:c.2073G>A']['hgvs_transcript_variant'] == 'NM_001243246.1:c.2073G>A' assert results['NM_001243246.1:c.2073G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} - assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} - assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} - assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} assert results['NM_001243246.1:c.2073G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230175.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243246.1'} - assert 'NM_001146289.1:c.2073G>A' in results.keys() + assert 'NM_001146289.1:c.2073G>A' in list(results.keys()) assert results['NM_001146289.1:c.2073G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t2:c.2073G>A' assert results['NM_001146289.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001146289.1:c.2073G>A']['alt_genomic_loci'] == [] @@ -5500,13 +5500,13 @@ def test_variant164(self): assert results['NM_001146289.1:c.2073G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' assert results['NM_001146289.1:c.2073G>A']['hgvs_transcript_variant'] == 'NM_001146289.1:c.2073G>A' assert results['NM_001146289.1:c.2073G>A']['hgvs_refseqgene_variant'] == 'NG_008123.1:g.24831G>A' - assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} - assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} - assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} - assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} assert results['NM_001146289.1:c.2073G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001139761.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001146289.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} - assert 'NM_022356.3:c.2055+18G>A' in results.keys() + assert 'NM_022356.3:c.2055+18G>A' in list(results.keys()) assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t1:c.2055+18G>A' assert results['NM_022356.3:c.2055+18G>A']['refseqgene_context_intronic_sequence'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' assert results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'] == [] @@ -5518,19 +5518,19 @@ def test_variant164(self): assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' assert results['NM_022356.3:c.2055+18G>A']['hgvs_transcript_variant'] == 'NM_022356.3:c.2055+18G>A' assert results['NM_022356.3:c.2055+18G>A']['hgvs_refseqgene_variant'] == 'NG_008123.1:g.24831G>A' - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '43212925', 'alt': u'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '42747254', 'alt': u'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} assert results['NM_022356.3:c.2055+18G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} def test_variant165(self): variant = 'HG987_PATCH-355171-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001194958.2:c.20C>A' in results.keys() + assert 'NM_001194958.2:c.20C>A' in list(results.keys()) assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001194958.2:c.20C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001194958.2:c.20C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}] @@ -5542,9 +5542,9 @@ def test_variant165(self): assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_variant'] == '' assert results['NM_001194958.2:c.20C>A']['hgvs_transcript_variant'] == 'NM_001194958.2:c.20C>A' assert results['NM_001194958.2:c.20C>A']['hgvs_refseqgene_variant'] == 'NG_033093.1:g.15284C>A' - assert 'hg19' not in results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys()) assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} - assert 'grch37' not in results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys() + assert 'grch37' not in list(results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys()) assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} assert results['NM_001194958.2:c.20C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033093.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2'} @@ -5553,9 +5553,9 @@ def test_variant165(self): def test_variant166(self): variant = '20-43252915-T-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000022.3:c.534A>G' in results.keys() + assert 'NM_000022.3:c.534A>G' in list(results.keys()) assert results['NM_000022.3:c.534A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000022.3:c.534A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000022.3:c.534A>G']['alt_genomic_loci'] == [] @@ -5567,13 +5567,13 @@ def test_variant166(self): assert results['NM_000022.3:c.534A>G']['hgvs_lrg_variant'] == '' assert results['NM_000022.3:c.534A>G']['hgvs_transcript_variant'] == 'NM_000022.3:c.534A>G' assert results['NM_000022.3:c.534A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} - assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} assert results['NM_000022.3:c.534A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.3'} - assert 'NM_001322051.1:c.534A>G' in results.keys() + assert 'NM_001322051.1:c.534A>G' in list(results.keys()) assert results['NM_001322051.1:c.534A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322051.1:c.534A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322051.1:c.534A>G']['alt_genomic_loci'] == [] @@ -5585,13 +5585,13 @@ def test_variant166(self): assert results['NM_001322051.1:c.534A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322051.1:c.534A>G']['hgvs_transcript_variant'] == 'NM_001322051.1:c.534A>G' assert results['NM_001322051.1:c.534A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} - assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} assert results['NM_001322051.1:c.534A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308980.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322051.1'} - assert 'NM_000022.2:c.534A>G' in results.keys() + assert 'NM_000022.2:c.534A>G' in list(results.keys()) assert results['NM_000022.2:c.534A>G']['hgvs_lrg_transcript_variant'] == 'LRG_16t1:c.534A>G' assert results['NM_000022.2:c.534A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000022.2:c.534A>G']['alt_genomic_loci'] == [] @@ -5603,14 +5603,14 @@ def test_variant166(self): assert results['NM_000022.2:c.534A>G']['hgvs_lrg_variant'] == 'LRG_16:g.32462A>G' assert results['NM_000022.2:c.534A>G']['hgvs_transcript_variant'] == 'NM_000022.2:c.534A>G' assert results['NM_000022.2:c.534A>G']['hgvs_refseqgene_variant'] == 'NG_007385.1:g.32462A>G' - assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert 'hg38' not in results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys() - assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert 'grch38' not in results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys() + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert 'hg38' not in list(results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys()) + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert 'grch38' not in list(results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys()) assert results['NM_000022.2:c.534A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007385.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_16.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_001322050.1:c.129A>G' in results.keys() + assert 'NM_001322050.1:c.129A>G' in list(results.keys()) assert results['NM_001322050.1:c.129A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322050.1:c.129A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322050.1:c.129A>G']['alt_genomic_loci'] == [] @@ -5622,13 +5622,13 @@ def test_variant166(self): assert results['NM_001322050.1:c.129A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322050.1:c.129A>G']['hgvs_transcript_variant'] == 'NM_001322050.1:c.129A>G' assert results['NM_001322050.1:c.129A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} - assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} assert results['NM_001322050.1:c.129A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308979.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322050.1'} - assert 'NR_136160.1:n.685A>G' in results.keys() + assert 'NR_136160.1:n.685A>G' in list(results.keys()) assert results['NR_136160.1:n.685A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NR_136160.1:n.685A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NR_136160.1:n.685A>G']['alt_genomic_loci'] == [] @@ -5640,20 +5640,20 @@ def test_variant166(self): assert results['NR_136160.1:n.685A>G']['hgvs_lrg_variant'] == '' assert results['NR_136160.1:n.685A>G']['hgvs_transcript_variant'] == 'NR_136160.1:n.685A>G' assert results['NR_136160.1:n.685A>G']['hgvs_refseqgene_variant'] == '' - assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} - assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '43252915', 'alt': u'C'}} - assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': u'T', 'pos': '44624274', 'alt': u'C'}} + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} assert results['NR_136160.1:n.685A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_136160.1'} def test_variant167(self): variant = '1-216219781-A-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_206933.2:c.6317C>G' in results.keys() + assert 'NM_206933.2:c.6317C>G' in list(results.keys()) assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_206933.2:c.6317C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_206933.2:c.6317C>G']['alt_genomic_loci'] == [] @@ -5665,19 +5665,19 @@ def test_variant167(self): assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_variant'] == '' assert results['NM_206933.2:c.6317C>G']['hgvs_transcript_variant'] == 'NM_206933.2:c.6317C>G' assert results['NM_206933.2:c.6317C>G']['hgvs_refseqgene_variant'] == 'NG_009497.1:g.381958C>G' - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216219781', 'alt': u'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216046439', 'alt': u'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216219781', 'alt': u'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216046439', 'alt': u'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216219781', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216046439', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216219781', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216046439', 'alt': 'C'}} assert results['NM_206933.2:c.6317C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009497.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2'} def test_variant168(self): variant = '2-209113113-G-A,C,T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_005896.3:c.394C>G' in results.keys() + assert 'NM_005896.3:c.394C>G' in list(results.keys()) assert results['NM_005896.3:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>G' assert results['NM_005896.3:c.394C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005896.3:c.394C>G']['alt_genomic_loci'] == [] @@ -5689,13 +5689,13 @@ def test_variant168(self): assert results['NM_005896.3:c.394C>G']['hgvs_lrg_variant'] == '' assert results['NM_005896.3:c.394C>G']['hgvs_transcript_variant'] == 'NM_005896.3:c.394C>G' assert results['NM_005896.3:c.394C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} - assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} - assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} - assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} assert results['NM_005896.3:c.394C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3'} - assert 'NM_001282387.1:c.394C>G' in results.keys() + assert 'NM_001282387.1:c.394C>G' in list(results.keys()) assert results['NM_001282387.1:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>G' assert results['NM_001282387.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282387.1:c.394C>G']['alt_genomic_loci'] == [] @@ -5707,13 +5707,13 @@ def test_variant168(self): assert results['NM_001282387.1:c.394C>G']['hgvs_lrg_variant'] == '' assert results['NM_001282387.1:c.394C>G']['hgvs_transcript_variant'] == 'NM_001282387.1:c.394C>G' assert results['NM_001282387.1:c.394C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} - assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} - assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} - assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} assert results['NM_001282387.1:c.394C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1'} - assert 'NM_001282387.1:c.394C>A' in results.keys() + assert 'NM_001282387.1:c.394C>A' in list(results.keys()) assert results['NM_001282387.1:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>A' assert results['NM_001282387.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282387.1:c.394C>A']['alt_genomic_loci'] == [] @@ -5725,13 +5725,13 @@ def test_variant168(self): assert results['NM_001282387.1:c.394C>A']['hgvs_lrg_variant'] == '' assert results['NM_001282387.1:c.394C>A']['hgvs_transcript_variant'] == 'NM_001282387.1:c.394C>A' assert results['NM_001282387.1:c.394C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} - assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} - assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} - assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} assert results['NM_001282387.1:c.394C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1'} - assert 'NM_005896.3:c.394C>A' in results.keys() + assert 'NM_005896.3:c.394C>A' in list(results.keys()) assert results['NM_005896.3:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>A' assert results['NM_005896.3:c.394C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005896.3:c.394C>A']['alt_genomic_loci'] == [] @@ -5743,13 +5743,13 @@ def test_variant168(self): assert results['NM_005896.3:c.394C>A']['hgvs_lrg_variant'] == '' assert results['NM_005896.3:c.394C>A']['hgvs_transcript_variant'] == 'NM_005896.3:c.394C>A' assert results['NM_005896.3:c.394C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} - assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} - assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} - assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} assert results['NM_005896.3:c.394C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3'} - assert 'NM_001282386.1:c.394C>T' in results.keys() + assert 'NM_001282386.1:c.394C>T' in list(results.keys()) assert results['NM_001282386.1:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>T' assert results['NM_001282386.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282386.1:c.394C>T']['alt_genomic_loci'] == [] @@ -5761,13 +5761,13 @@ def test_variant168(self): assert results['NM_001282386.1:c.394C>T']['hgvs_lrg_variant'] == '' assert results['NM_001282386.1:c.394C>T']['hgvs_transcript_variant'] == 'NM_001282386.1:c.394C>T' assert results['NM_001282386.1:c.394C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} - assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} - assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} - assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} assert results['NM_001282386.1:c.394C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1'} - assert 'NM_005896.2:c.394C>A' in results.keys() + assert 'NM_005896.2:c.394C>A' in list(results.keys()) assert results['NM_005896.2:c.394C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005896.2:c.394C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005896.2:c.394C>A']['alt_genomic_loci'] == [] @@ -5779,13 +5779,13 @@ def test_variant168(self): assert results['NM_005896.2:c.394C>A']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>A' assert results['NM_005896.2:c.394C>A']['hgvs_transcript_variant'] == 'NM_005896.2:c.394C>A' assert results['NM_005896.2:c.394C>A']['hgvs_refseqgene_variant'] == 'NG_023319.2:g.22686C>A' - assert results['NM_005896.2:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} - assert 'hg38' not in results['NM_005896.2:c.394C>A']['primary_assembly_loci'].keys() - assert results['NM_005896.2:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} - assert 'grch38' not in results['NM_005896.2:c.394C>A']['primary_assembly_loci'].keys() + assert results['NM_005896.2:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_005896.2:c.394C>A']['primary_assembly_loci'].keys()) + assert results['NM_005896.2:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_005896.2:c.394C>A']['primary_assembly_loci'].keys()) assert results['NM_005896.2:c.394C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} - assert 'NM_005896.2:c.394C>G' in results.keys() + assert 'NM_005896.2:c.394C>G' in list(results.keys()) assert results['NM_005896.2:c.394C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005896.2:c.394C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005896.2:c.394C>G']['alt_genomic_loci'] == [] @@ -5797,14 +5797,14 @@ def test_variant168(self): assert results['NM_005896.2:c.394C>G']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>G' assert results['NM_005896.2:c.394C>G']['hgvs_transcript_variant'] == 'NM_005896.2:c.394C>G' assert results['NM_005896.2:c.394C>G']['hgvs_refseqgene_variant'] == 'NG_023319.2:g.22686C>G' - assert results['NM_005896.2:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} - assert 'hg38' not in results['NM_005896.2:c.394C>G']['primary_assembly_loci'].keys() - assert results['NM_005896.2:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} - assert 'grch38' not in results['NM_005896.2:c.394C>G']['primary_assembly_loci'].keys() + assert results['NM_005896.2:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} + assert 'hg38' not in list(results['NM_005896.2:c.394C>G']['primary_assembly_loci'].keys()) + assert results['NM_005896.2:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} + assert 'grch38' not in list(results['NM_005896.2:c.394C>G']['primary_assembly_loci'].keys()) assert results['NM_005896.2:c.394C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_005896.3:c.394C>T' in results.keys() + assert 'NM_005896.3:c.394C>T' in list(results.keys()) assert results['NM_005896.3:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>T' assert results['NM_005896.3:c.394C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005896.3:c.394C>T']['alt_genomic_loci'] == [] @@ -5816,13 +5816,13 @@ def test_variant168(self): assert results['NM_005896.3:c.394C>T']['hgvs_lrg_variant'] == '' assert results['NM_005896.3:c.394C>T']['hgvs_transcript_variant'] == 'NM_005896.3:c.394C>T' assert results['NM_005896.3:c.394C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} - assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} - assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} - assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} assert results['NM_005896.3:c.394C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3'} - assert 'NM_001282387.1:c.394C>T' in results.keys() + assert 'NM_001282387.1:c.394C>T' in list(results.keys()) assert results['NM_001282387.1:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>T' assert results['NM_001282387.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282387.1:c.394C>T']['alt_genomic_loci'] == [] @@ -5834,13 +5834,13 @@ def test_variant168(self): assert results['NM_001282387.1:c.394C>T']['hgvs_lrg_variant'] == '' assert results['NM_001282387.1:c.394C>T']['hgvs_transcript_variant'] == 'NM_001282387.1:c.394C>T' assert results['NM_001282387.1:c.394C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} - assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} - assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} - assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'A'}} + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} assert results['NM_001282387.1:c.394C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1'} - assert 'NM_001282386.1:c.394C>G' in results.keys() + assert 'NM_001282386.1:c.394C>G' in list(results.keys()) assert results['NM_001282386.1:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>G' assert results['NM_001282386.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282386.1:c.394C>G']['alt_genomic_loci'] == [] @@ -5852,13 +5852,13 @@ def test_variant168(self): assert results['NM_001282386.1:c.394C>G']['hgvs_lrg_variant'] == '' assert results['NM_001282386.1:c.394C>G']['hgvs_transcript_variant'] == 'NM_001282386.1:c.394C>G' assert results['NM_001282386.1:c.394C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} - assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} - assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'C'}} - assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'C'}} + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} assert results['NM_001282386.1:c.394C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1'} - assert 'NM_005896.2:c.394C>T' in results.keys() + assert 'NM_005896.2:c.394C>T' in list(results.keys()) assert results['NM_005896.2:c.394C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005896.2:c.394C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005896.2:c.394C>T']['alt_genomic_loci'] == [] @@ -5870,13 +5870,13 @@ def test_variant168(self): assert results['NM_005896.2:c.394C>T']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>T' assert results['NM_005896.2:c.394C>T']['hgvs_transcript_variant'] == 'NM_005896.2:c.394C>T' assert results['NM_005896.2:c.394C>T']['hgvs_refseqgene_variant'] == 'NG_023319.2:g.22686C>T' - assert results['NM_005896.2:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} - assert 'hg38' not in results['NM_005896.2:c.394C>T']['primary_assembly_loci'].keys() - assert results['NM_005896.2:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'A'}} - assert 'grch38' not in results['NM_005896.2:c.394C>T']['primary_assembly_loci'].keys() + assert results['NM_005896.2:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_005896.2:c.394C>T']['primary_assembly_loci'].keys()) + assert results['NM_005896.2:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_005896.2:c.394C>T']['primary_assembly_loci'].keys()) assert results['NM_005896.2:c.394C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} - assert 'NM_001282386.1:c.394C>A' in results.keys() + assert 'NM_001282386.1:c.394C>A' in list(results.keys()) assert results['NM_001282386.1:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>A' assert results['NM_001282386.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282386.1:c.394C>A']['alt_genomic_loci'] == [] @@ -5888,19 +5888,19 @@ def test_variant168(self): assert results['NM_001282386.1:c.394C>A']['hgvs_lrg_variant'] == '' assert results['NM_001282386.1:c.394C>A']['hgvs_transcript_variant'] == 'NM_001282386.1:c.394C>A' assert results['NM_001282386.1:c.394C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} - assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} - assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '209113113', 'alt': u'T'}} - assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '208248389', 'alt': u'T'}} + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} assert results['NM_001282386.1:c.394C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1'} def test_variant169(self): variant = 'NC_000005.9:g.35058665_35058666CA=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001204314.1:c.*6525_*6526=' in results.keys() + assert 'NM_001204314.1:c.*6525_*6526=' in list(results.keys()) assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204314.1:c.*6525_*6526=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204314.1:c.*6525_*6526=']['alt_genomic_loci'] == [] @@ -5912,13 +5912,13 @@ def test_variant169(self): assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_lrg_variant'] == '' assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_transcript_variant'] == 'NM_001204314.1:c.*6525_*6526=' assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': u'CA', 'pos': '35058665', 'alt': u'CA'}} - assert 'hg38' not in results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci'].keys() - assert results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': u'CA', 'pos': '35058665', 'alt': u'CA'}} - assert 'grch38' not in results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci'].keys() + assert results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} + assert 'hg38' not in list(results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci'].keys()) + assert results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} + assert 'grch38' not in list(results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci'].keys()) assert results['NM_001204314.1:c.*6525_*6526=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1'} - assert 'NM_001204314.2:c.*6528del' in results.keys() + assert 'NM_001204314.2:c.*6528del' in list(results.keys()) assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204314.2:c.*6528del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204314.2:c.*6528del']['alt_genomic_loci'] == [] @@ -5936,7 +5936,7 @@ def test_variant169(self): assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} assert results['NM_001204314.2:c.*6528del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2'} - assert 'NM_001204317.1:c.856-9153_856-9152=' in results.keys() + assert 'NM_001204317.1:c.856-9153_856-9152=' in list(results.keys()) assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204317.1:c.856-9153_856-9152=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204317.1:c.856-9153_856-9152=']['alt_genomic_loci'] == [] @@ -5954,7 +5954,7 @@ def test_variant169(self): assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} assert results['NM_001204317.1:c.856-9153_856-9152=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1'} - assert 'NM_001204316.1:c.1009+7385_1009+7386=' in results.keys() + assert 'NM_001204316.1:c.1009+7385_1009+7386=' in list(results.keys()) assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204316.1:c.1009+7385_1009+7386=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204316.1:c.1009+7385_1009+7386=']['alt_genomic_loci'] == [] @@ -5973,7 +5973,7 @@ def test_variant169(self): assert results['NM_001204316.1:c.1009+7385_1009+7386=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1'} assert results['flag'] == 'gene_variant' - assert 'NR_037910.1:n.828-9153_828-9152=' in results.keys() + assert 'NR_037910.1:n.828-9153_828-9152=' in list(results.keys()) assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037910.1:n.828-9153_828-9152=']['refseqgene_context_intronic_sequence'] == '' assert results['NR_037910.1:n.828-9153_828-9152=']['alt_genomic_loci'] == [] @@ -5991,7 +5991,7 @@ def test_variant169(self): assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} assert results['NR_037910.1:n.828-9153_828-9152=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1'} - assert 'NM_001204318.1:c.686-9153_686-9152=' in results.keys() + assert 'NM_001204318.1:c.686-9153_686-9152=' in list(results.keys()) assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204318.1:c.686-9153_686-9152=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204318.1:c.686-9153_686-9152=']['alt_genomic_loci'] == [] @@ -6009,7 +6009,7 @@ def test_variant169(self): assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} assert results['NM_001204318.1:c.686-9153_686-9152=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1'} - assert 'NM_000949.5:c.*6525_*6526=' in results.keys() + assert 'NM_000949.5:c.*6525_*6526=' in list(results.keys()) assert results['NM_000949.5:c.*6525_*6526=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000949.5:c.*6525_*6526=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000949.5:c.*6525_*6526=']['alt_genomic_loci'] == [] @@ -6021,13 +6021,13 @@ def test_variant169(self): assert results['NM_000949.5:c.*6525_*6526=']['hgvs_lrg_variant'] == '' assert results['NM_000949.5:c.*6525_*6526=']['hgvs_transcript_variant'] == 'NM_000949.5:c.*6525_*6526=' assert results['NM_000949.5:c.*6525_*6526=']['hgvs_refseqgene_variant'] == 'NG_029042.1:g.177158_177159=' - assert results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', 'vcf': {'chr': 'chr5', 'ref': u'ACAAG', 'pos': '35058664', 'alt': u'ACAAG'}} - assert 'hg38' not in results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci'].keys() - assert results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', 'vcf': {'chr': '5', 'ref': u'ACAAG', 'pos': '35058664', 'alt': u'ACAAG'}} - assert 'grch38' not in results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci'].keys() + assert results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'ACAAG', 'pos': '35058664', 'alt': 'ACAAG'}} + assert 'hg38' not in list(results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci'].keys()) + assert results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', 'vcf': {'chr': '5', 'ref': 'ACAAG', 'pos': '35058664', 'alt': 'ACAAG'}} + assert 'grch38' not in list(results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci'].keys()) assert results['NM_000949.5:c.*6525_*6526=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029042.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5'} - assert 'NM_000949.6:c.*6528del' in results.keys() + assert 'NM_000949.6:c.*6528del' in list(results.keys()) assert results['NM_000949.6:c.*6528del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000949.6:c.*6528del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000949.6:c.*6528del']['alt_genomic_loci'] == [] @@ -6049,10 +6049,10 @@ def test_variant169(self): def test_variant170(self): variant = 'NC_000002.11:g.73675227_73675229delTCTinsTCTCTC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_015120.4:c.1580_1581insCCT' in results.keys() + assert 'NM_015120.4:c.1580_1581insCCT' in list(results.keys()) assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1580_1581insCCT' assert results['NM_015120.4:c.1580_1581insCCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.1580_1581insCCT']['alt_genomic_loci'] == [] @@ -6064,20 +6064,20 @@ def test_variant170(self): assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_lrg_variant'] == 'LRG_741:g.67352_67353insCCT' assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_transcript_variant'] == 'NM_015120.4:c.1580_1581insCCT' assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.67352_67353insCCT' - assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675231_73675232insCCT', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73675229', 'alt': u'TCTC'}} - assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448104_73448105insCCT', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73448102', 'alt': u'TCTC'}} - assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675231_73675232insCCT', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73675229', 'alt': u'TCTC'}} - assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448104_73448105insCCT', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73448102', 'alt': u'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675231_73675232insCCT', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73675229', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448104_73448105insCCT', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73448102', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675231_73675232insCCT', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73675229', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448104_73448105insCCT', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73448102', 'alt': 'TCTC'}} assert results['NM_015120.4:c.1580_1581insCCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} def test_variant171(self): variant = 'NM_000828.4:c.-2dupG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000828.4:c.-2dup' in results.keys() + assert 'NM_000828.4:c.-2dup' in list(results.keys()) assert results['NM_000828.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2dup']['alt_genomic_loci'] == [] @@ -6089,9 +6089,9 @@ def test_variant171(self): assert results['NM_000828.4:c.-2dup']['hgvs_lrg_variant'] == '' assert results['NM_000828.4:c.-2dup']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2dup' assert results['NM_000828.4:c.-2dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} assert results['NM_000828.4:c.-2dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} @@ -6099,9 +6099,9 @@ def test_variant171(self): def test_variant172(self): variant = 'X-122318386-A-AGG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_007325.4:c.-2dup' in results.keys() + assert 'NM_007325.4:c.-2dup' in list(results.keys()) assert results['NM_007325.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007325.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007325.4:c.-2dup']['alt_genomic_loci'] == [] @@ -6113,14 +6113,14 @@ def test_variant172(self): assert results['NM_007325.4:c.-2dup']['hgvs_lrg_variant'] == '' assert results['NM_007325.4:c.-2dup']['hgvs_transcript_variant'] == 'NM_007325.4:c.-2dup' assert results['NM_007325.4:c.-2dup']['hgvs_refseqgene_variant'] == 'NG_009377.1:g.5292dup' - assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} assert results['NM_007325.4:c.-2dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009377.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4'} assert results['flag'] == 'gene_variant' - assert 'NM_001256743.1:c.-2dup' in results.keys() + assert 'NM_001256743.1:c.-2dup' in list(results.keys()) assert results['NM_001256743.1:c.-2dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256743.1:c.-2dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256743.1:c.-2dup']['alt_genomic_loci'] == [] @@ -6132,13 +6132,13 @@ def test_variant172(self): assert results['NM_001256743.1:c.-2dup']['hgvs_lrg_variant'] == '' assert results['NM_001256743.1:c.-2dup']['hgvs_transcript_variant'] == 'NM_001256743.1:c.-2dup' assert results['NM_001256743.1:c.-2dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} assert results['NM_001256743.1:c.-2dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1'} - assert 'NM_000828.4:c.-2dup' in results.keys() + assert 'NM_000828.4:c.-2dup' in list(results.keys()) assert results['NM_000828.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2dup']['alt_genomic_loci'] == [] @@ -6150,9 +6150,9 @@ def test_variant172(self): assert results['NM_000828.4:c.-2dup']['hgvs_lrg_variant'] == '' assert results['NM_000828.4:c.-2dup']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2dup' assert results['NM_000828.4:c.-2dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} assert results['NM_000828.4:c.-2dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} @@ -6160,10 +6160,10 @@ def test_variant172(self): def test_variant173(self): variant = 'NM_000828.4:c.-2G>T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000828.4:c.-2G>T' in results.keys() + assert 'NM_000828.4:c.-2G>T' in list(results.keys()) assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2G>T']['alt_genomic_loci'] == [] @@ -6175,19 +6175,19 @@ def test_variant173(self): assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_variant'] == '' assert results['NM_000828.4:c.-2G>T']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2G>T' assert results['NM_000828.4:c.-2G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} assert results['NM_000828.4:c.-2G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} def test_variant174(self): variant = 'NM_000828.4:c.-2G=' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000828.4:c.-2G=' in results.keys() + assert 'NM_000828.4:c.-2G=' in list(results.keys()) assert results['NM_000828.4:c.-2G=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2G=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2G=']['alt_genomic_loci'] == [] @@ -6199,10 +6199,10 @@ def test_variant174(self): assert results['NM_000828.4:c.-2G=']['hgvs_lrg_variant'] == '' assert results['NM_000828.4:c.-2G=']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2G=' assert results['NM_000828.4:c.-2G=']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AG'}} - assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G=', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '123184534', 'alt': u'G'}} - assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AG'}} - assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G=', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '123184534', 'alt': u'G'}} + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G=', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'G'}} + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G=', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'G'}} assert results['NM_000828.4:c.-2G=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} assert results['flag'] == 'gene_variant' @@ -6210,10 +6210,10 @@ def test_variant174(self): def test_variant175(self): variant = 'X-122318386-A-AT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000828.4:c.-2G>T' in results.keys() + assert 'NM_000828.4:c.-2G>T' in list(results.keys()) assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2G>T']['alt_genomic_loci'] == [] @@ -6225,13 +6225,13 @@ def test_variant175(self): assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_variant'] == '' assert results['NM_000828.4:c.-2G>T']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2G>T' assert results['NM_000828.4:c.-2G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} assert results['NM_000828.4:c.-2G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} - assert 'NM_001256743.1:c.-2G>T' in results.keys() + assert 'NM_001256743.1:c.-2G>T' in list(results.keys()) assert results['NM_001256743.1:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256743.1:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256743.1:c.-2G>T']['alt_genomic_loci'] == [] @@ -6243,13 +6243,13 @@ def test_variant175(self): assert results['NM_001256743.1:c.-2G>T']['hgvs_lrg_variant'] == '' assert results['NM_001256743.1:c.-2G>T']['hgvs_transcript_variant'] == 'NM_001256743.1:c.-2G>T' assert results['NM_001256743.1:c.-2G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} - assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} - assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} - assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} assert results['NM_001256743.1:c.-2G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1'} - assert 'NM_007325.4:c.-2G>T' in results.keys() + assert 'NM_007325.4:c.-2G>T' in list(results.keys()) assert results['NM_007325.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007325.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007325.4:c.-2G>T']['alt_genomic_loci'] == [] @@ -6261,20 +6261,20 @@ def test_variant175(self): assert results['NM_007325.4:c.-2G>T']['hgvs_lrg_variant'] == '' assert results['NM_007325.4:c.-2G>T']['hgvs_transcript_variant'] == 'NM_007325.4:c.-2G>T' assert results['NM_007325.4:c.-2G>T']['hgvs_refseqgene_variant'] == 'NG_009377.1:g.5292G>T' - assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} - assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} - assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'AT'}} - assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '123184534', 'alt': u'T'}} + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} assert results['NM_007325.4:c.-2G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009377.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4'} def test_variant176(self): variant = 'NM_000828.4:c.-2_-1insT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000828.4:c.-2_-1insT' in results.keys() + assert 'NM_000828.4:c.-2_-1insT' in list(results.keys()) assert results['NM_000828.4:c.-2_-1insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2_-1insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2_-1insT']['alt_genomic_loci'] == [] @@ -6287,18 +6287,18 @@ def test_variant176(self): assert results['NM_000828.4:c.-2_-1insT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2_-1insT' assert results['NM_000828.4:c.-2_-1insT']['hgvs_refseqgene_variant'] == '' assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AGT'}} - assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535insT', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': u'GT'}} + assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535insT', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'GT'}} assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AGT'}} - assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535insT', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': u'GT'}} + assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535insT', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'GT'}} assert results['NM_000828.4:c.-2_-1insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} def test_variant177(self): variant = 'NM_000828.4:c.-3_-2insT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000828.4:c.-3_-2insT' in results.keys() + assert 'NM_000828.4:c.-3_-2insT' in list(results.keys()) assert results['NM_000828.4:c.-3_-2insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-3_-2insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-3_-2insT']['alt_genomic_loci'] == [] @@ -6311,9 +6311,9 @@ def test_variant177(self): assert results['NM_000828.4:c.-3_-2insT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-3_-2insT' assert results['NM_000828.4:c.-3_-2insT']['hgvs_refseqgene_variant'] == '' assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'ATG'}} - assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': u'AT'}} + assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AT'}} assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'ATG'}} - assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': u'AT'}} + assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AT'}} assert results['NM_000828.4:c.-3_-2insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} assert results['flag'] == 'gene_variant' @@ -6321,10 +6321,10 @@ def test_variant177(self): def test_variant178(self): variant = 'NM_000828.4:c.-2delGinsTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000828.4:c.-2delinsTT' in results.keys() + assert 'NM_000828.4:c.-2delinsTT' in list(results.keys()) assert results['NM_000828.4:c.-2delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2delinsTT']['alt_genomic_loci'] == [] @@ -6336,20 +6336,20 @@ def test_variant178(self): assert results['NM_000828.4:c.-2delinsTT']['hgvs_lrg_variant'] == '' assert results['NM_000828.4:c.-2delinsTT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2delinsTT' assert results['NM_000828.4:c.-2delinsTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': u'ATT'}} - assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': u'TT'}} - assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': u'ATT'}} - assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534delinsTT', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': u'TT'}} + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'ATT'}} + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'TT'}} + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'ATT'}} + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534delinsTT', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'TT'}} assert results['NM_000828.4:c.-2delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} def test_variant179(self): variant = 'NM_000828.4:c.-2_-1delGCinsTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000828.4:c.-2_-1delinsTT' in results.keys() + assert 'NM_000828.4:c.-2_-1delinsTT' in list(results.keys()) assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2_-1delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2_-1delinsTT']['alt_genomic_loci'] == [] @@ -6362,18 +6362,18 @@ def test_variant179(self): assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2_-1delinsTT' assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_refseqgene_variant'] == '' assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318387delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '122318387', 'alt': 'TT'}} - assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'GC', 'pos': '123184534', 'alt': u'TT'}} + assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'GC', 'pos': '123184534', 'alt': 'TT'}} assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318387delinsTT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '122318387', 'alt': 'TT'}} - assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535delinsTT', 'vcf': {'chr': 'X', 'ref': 'GC', 'pos': '123184534', 'alt': u'TT'}} + assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535delinsTT', 'vcf': {'chr': 'X', 'ref': 'GC', 'pos': '123184534', 'alt': 'TT'}} assert results['NM_000828.4:c.-2_-1delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} def test_variant180(self): variant = 'NM_000828.4:c.-3_-2delAGinsTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000828.4:c.-3_-2delinsTT' in results.keys() + assert 'NM_000828.4:c.-3_-2delinsTT' in list(results.keys()) assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-3_-2delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-3_-2delinsTT']['alt_genomic_loci'] == [] @@ -6386,9 +6386,9 @@ def test_variant180(self): assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-3_-2delinsTT' assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_refseqgene_variant'] == '' assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'TT'}} - assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'AG', 'pos': '123184533', 'alt': u'TT'}} + assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'AG', 'pos': '123184533', 'alt': 'TT'}} assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386delinsTT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'TT'}} - assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534delinsTT', 'vcf': {'chr': 'X', 'ref': 'AG', 'pos': '123184533', 'alt': u'TT'}} + assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534delinsTT', 'vcf': {'chr': 'X', 'ref': 'AG', 'pos': '123184533', 'alt': 'TT'}} assert results['NM_000828.4:c.-3_-2delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} assert results['flag'] == 'gene_variant' @@ -6396,9 +6396,9 @@ def test_variant180(self): def test_variant181(self): variant = '15-72105929-C-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_014249.3:c.951dup' in results.keys() + assert 'NM_014249.3:c.951dup' in list(results.keys()) assert results['NM_014249.3:c.951dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.951dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.3:c.951dup']['alt_genomic_loci'] == [] @@ -6416,7 +6416,7 @@ def test_variant181(self): assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'AC'}} assert results['NM_014249.3:c.951dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} - assert 'NM_014249.2:c.951dup' in results.keys() + assert 'NM_014249.2:c.951dup' in list(results.keys()) assert results['NM_014249.2:c.951dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.951dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.951dup']['alt_genomic_loci'] == [] @@ -6429,13 +6429,13 @@ def test_variant181(self): assert results['NM_014249.2:c.951dup']['hgvs_transcript_variant'] == 'NM_014249.2:c.951dup' assert results['NM_014249.2:c.951dup']['hgvs_refseqgene_variant'] == '' assert results['NM_014249.2:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} - assert 'hg38' not in results['NM_014249.2:c.951dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_014249.2:c.951dup']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} - assert 'grch38' not in results['NM_014249.2:c.951dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_014249.2:c.951dup']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.951dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} assert results['flag'] == 'gene_variant' - assert 'NM_016346.3:c.951dup' in results.keys() + assert 'NM_016346.3:c.951dup' in list(results.keys()) assert results['NM_016346.3:c.951dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.951dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.951dup']['alt_genomic_loci'] == [] @@ -6453,7 +6453,7 @@ def test_variant181(self): assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'AC'}} assert results['NM_016346.3:c.951dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} - assert 'NM_016346.2:c.951dup' in results.keys() + assert 'NM_016346.2:c.951dup' in list(results.keys()) assert results['NM_016346.2:c.951dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.951dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.2:c.951dup']['alt_genomic_loci'] == [] @@ -6466,18 +6466,18 @@ def test_variant181(self): assert results['NM_016346.2:c.951dup']['hgvs_transcript_variant'] == 'NM_016346.2:c.951dup' assert results['NM_016346.2:c.951dup']['hgvs_refseqgene_variant'] == '' assert results['NM_016346.2:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} - assert 'hg38' not in results['NM_016346.2:c.951dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_016346.2:c.951dup']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} - assert 'grch38' not in results['NM_016346.2:c.951dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_016346.2:c.951dup']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.951dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} def test_variant182(self): variant = '15-72105928-AC-ATT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_014249.2:c.947_948insTT' in results.keys() + assert 'NM_014249.2:c.947_948insTT' in list(results.keys()) assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'] == [] @@ -6490,12 +6490,12 @@ def test_variant182(self): assert results['NM_014249.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.2:c.947_948insTT' assert results['NM_014249.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert 'hg38' not in results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert 'grch38' not in results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} - assert 'NM_016346.3:c.947_948insTT' in results.keys() + assert 'NM_016346.3:c.947_948insTT' in list(results.keys()) assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'] == [] @@ -6508,13 +6508,13 @@ def test_variant182(self): assert results['NM_016346.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.3:c.947_948insTT' assert results['NM_016346.3:c.947_948insTT']['hgvs_refseqgene_variant'] == '' assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} assert results['NM_016346.3:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} assert results['flag'] == 'gene_variant' - assert 'NM_016346.2:c.947_948insTT' in results.keys() + assert 'NM_016346.2:c.947_948insTT' in list(results.keys()) assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'] == [] @@ -6527,12 +6527,12 @@ def test_variant182(self): assert results['NM_016346.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.2:c.947_948insTT' assert results['NM_016346.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert 'hg38' not in results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert 'grch38' not in results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} - assert 'NM_014249.3:c.947_948insTT' in results.keys() + assert 'NM_014249.3:c.947_948insTT' in list(results.keys()) assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'] == [] @@ -6545,18 +6545,18 @@ def test_variant182(self): assert results['NM_014249.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.3:c.947_948insTT' assert results['NM_014249.3:c.947_948insTT']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8035_8036insTT' assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} assert results['NM_014249.3:c.947_948insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} def test_variant183(self): variant = '15-72105928-ACC-ATT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_014249.2:c.947_948insTT' in results.keys() + assert 'NM_014249.2:c.947_948insTT' in list(results.keys()) assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'] == [] @@ -6569,12 +6569,12 @@ def test_variant183(self): assert results['NM_014249.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.2:c.947_948insTT' assert results['NM_014249.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert 'hg38' not in results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert 'grch38' not in results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} - assert 'NM_016346.3:c.947_948insTT' in results.keys() + assert 'NM_016346.3:c.947_948insTT' in list(results.keys()) assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'] == [] @@ -6587,13 +6587,13 @@ def test_variant183(self): assert results['NM_016346.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.3:c.947_948insTT' assert results['NM_016346.3:c.947_948insTT']['hgvs_refseqgene_variant'] == '' assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} assert results['NM_016346.3:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} assert results['flag'] == 'gene_variant' - assert 'NM_016346.2:c.947_948insTT' in results.keys() + assert 'NM_016346.2:c.947_948insTT' in list(results.keys()) assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'] == [] @@ -6606,12 +6606,12 @@ def test_variant183(self): assert results['NM_016346.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.2:c.947_948insTT' assert results['NM_016346.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert 'hg38' not in results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert 'grch38' not in results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} - assert 'NM_014249.3:c.947_948insTT' in results.keys() + assert 'NM_014249.3:c.947_948insTT' in list(results.keys()) assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'] == [] @@ -6624,18 +6624,18 @@ def test_variant183(self): assert results['NM_014249.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.3:c.947_948insTT' assert results['NM_014249.3:c.947_948insTT']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8035_8036insTT' assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} assert results['NM_014249.3:c.947_948insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} def test_variant184(self): variant = '15-72105927-GACC-GTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_014249.3:c.947delinsTT' in results.keys() + assert 'NM_014249.3:c.947delinsTT' in list(results.keys()) assert results['NM_014249.3:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.3:c.947delinsTT']['alt_genomic_loci'] == [] @@ -6648,12 +6648,12 @@ def test_variant184(self): assert results['NM_014249.3:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_014249.3:c.947delinsTT' assert results['NM_014249.3:c.947delinsTT']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8035delinsTT' assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'TT'}} + assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'TT'}} assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'TT'}} + assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'TT'}} assert results['NM_014249.3:c.947delinsTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} - assert 'NM_016346.2:c.947delinsTT' in results.keys() + assert 'NM_016346.2:c.947delinsTT' in list(results.keys()) assert results['NM_016346.2:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.2:c.947delinsTT']['alt_genomic_loci'] == [] @@ -6666,12 +6666,12 @@ def test_variant184(self): assert results['NM_016346.2:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_016346.2:c.947delinsTT' assert results['NM_016346.2:c.947delinsTT']['hgvs_refseqgene_variant'] == '' assert results['NM_016346.2:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert 'hg38' not in results['NM_016346.2:c.947delinsTT']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_016346.2:c.947delinsTT']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert 'grch38' not in results['NM_016346.2:c.947delinsTT']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_016346.2:c.947delinsTT']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.947delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} - assert 'NM_014249.2:c.947delinsTT' in results.keys() + assert 'NM_014249.2:c.947delinsTT' in list(results.keys()) assert results['NM_014249.2:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.947delinsTT']['alt_genomic_loci'] == [] @@ -6684,13 +6684,13 @@ def test_variant184(self): assert results['NM_014249.2:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_014249.2:c.947delinsTT' assert results['NM_014249.2:c.947delinsTT']['hgvs_refseqgene_variant'] == '' assert results['NM_014249.2:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert 'hg38' not in results['NM_014249.2:c.947delinsTT']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_014249.2:c.947delinsTT']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert 'grch38' not in results['NM_014249.2:c.947delinsTT']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_014249.2:c.947delinsTT']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.947delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} assert results['flag'] == 'gene_variant' - assert 'NM_016346.3:c.947delinsTT' in results.keys() + assert 'NM_016346.3:c.947delinsTT' in list(results.keys()) assert results['NM_016346.3:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.947delinsTT']['alt_genomic_loci'] == [] @@ -6703,19 +6703,19 @@ def test_variant184(self): assert results['NM_016346.3:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_016346.3:c.947delinsTT' assert results['NM_016346.3:c.947delinsTT']['hgvs_refseqgene_variant'] == '' assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': u'TT'}} + assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'TT'}} assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': u'TT'}} + assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'TT'}} assert results['NM_016346.3:c.947delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} def test_variant185(self): variant = '19-41123093-A-AG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001042544.1:c.3233_3235=' in results.keys() + assert 'NM_001042544.1:c.3233_3235=' in list(results.keys()) assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042544.1:c.3233_3235=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'] == [] @@ -6728,12 +6728,12 @@ def test_variant185(self): assert results['NM_001042544.1:c.3233_3235=']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3233_3235=' assert results['NM_001042544.1:c.3233_3235=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_001042544.1:c.3233_3235=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} - assert 'NM_001042545.1:c.3032_3034=' in results.keys() + assert 'NM_001042545.1:c.3032_3034=' in list(results.keys()) assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042545.1:c.3032_3034=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'] == [] @@ -6746,12 +6746,12 @@ def test_variant185(self): assert results['NM_001042545.1:c.3032_3034=']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3032_3034=' assert results['NM_001042545.1:c.3032_3034=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_001042545.1:c.3032_3034=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} - assert 'NM_003573.2:c.3122_3124=' in results.keys() + assert 'NM_003573.2:c.3122_3124=' in list(results.keys()) assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003573.2:c.3122_3124=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'] == [] @@ -6764,18 +6764,18 @@ def test_variant185(self): assert results['NM_003573.2:c.3122_3124=']['hgvs_transcript_variant'] == 'NM_003573.2:c.3122_3124=' assert results['NM_003573.2:c.3122_3124=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': u'AGG', 'pos': '40617187', 'alt': u'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} assert results['NM_003573.2:c.3122_3124=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} def test_variant186(self): variant = '19-41123093-A-AT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_003573.2:c.3123G>T' in results.keys() + assert 'NM_003573.2:c.3123G>T' in list(results.keys()) assert results['NM_003573.2:c.3123G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003573.2:c.3123G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003573.2:c.3123G>T']['alt_genomic_loci'] == [] @@ -6787,14 +6787,14 @@ def test_variant186(self): assert results['NM_003573.2:c.3123G>T']['hgvs_lrg_variant'] == '' assert results['NM_003573.2:c.3123G>T']['hgvs_transcript_variant'] == 'NM_003573.2:c.3123G>T' assert results['NM_003573.2:c.3123G>T']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29023G>T' - assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} - assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} - assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} - assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} assert results['NM_003573.2:c.3123G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001042545.1:c.3033G>T' in results.keys() + assert 'NM_001042545.1:c.3033G>T' in list(results.keys()) assert results['NM_001042545.1:c.3033G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042545.1:c.3033G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042545.1:c.3033G>T']['alt_genomic_loci'] == [] @@ -6806,13 +6806,13 @@ def test_variant186(self): assert results['NM_001042545.1:c.3033G>T']['hgvs_lrg_variant'] == '' assert results['NM_001042545.1:c.3033G>T']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3033G>T' assert results['NM_001042545.1:c.3033G>T']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29023G>T' - assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} - assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} - assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} - assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} assert results['NM_001042545.1:c.3033G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} - assert 'NM_001042544.1:c.3234G>T' in results.keys() + assert 'NM_001042544.1:c.3234G>T' in list(results.keys()) assert results['NM_001042544.1:c.3234G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042544.1:c.3234G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042544.1:c.3234G>T']['alt_genomic_loci'] == [] @@ -6824,19 +6824,19 @@ def test_variant186(self): assert results['NM_001042544.1:c.3234G>T']['hgvs_lrg_variant'] == '' assert results['NM_001042544.1:c.3234G>T']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3234G>T' assert results['NM_001042544.1:c.3234G>T']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29023G>T' - assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} - assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} - assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': u'AT'}} - assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': u'G', 'pos': '40617188', 'alt': u'T'}} + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} assert results['NM_001042544.1:c.3234G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} def test_variant187(self): variant = '19-41123093-AG-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001042544.1:c.3235_3236del' in results.keys() + assert 'NM_001042544.1:c.3235_3236del' in list(results.keys()) assert results['NM_001042544.1:c.3235_3236del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042544.1:c.3235_3236del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042544.1:c.3235_3236del']['alt_genomic_loci'] == [] @@ -6855,7 +6855,7 @@ def test_variant187(self): assert results['NM_001042544.1:c.3235_3236del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001042545.1:c.3034_3035del' in results.keys() + assert 'NM_001042545.1:c.3034_3035del' in list(results.keys()) assert results['NM_001042545.1:c.3034_3035del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042545.1:c.3034_3035del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042545.1:c.3034_3035del']['alt_genomic_loci'] == [] @@ -6873,7 +6873,7 @@ def test_variant187(self): assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} assert results['NM_001042545.1:c.3034_3035del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} - assert 'NM_003573.2:c.3124_3125del' in results.keys() + assert 'NM_003573.2:c.3124_3125del' in list(results.keys()) assert results['NM_003573.2:c.3124_3125del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003573.2:c.3124_3125del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003573.2:c.3124_3125del']['alt_genomic_loci'] == [] @@ -6895,9 +6895,9 @@ def test_variant187(self): def test_variant188(self): variant = '19-41123093-AG-AG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001042545.1:c.3035del' in results.keys() + assert 'NM_001042545.1:c.3035del' in list(results.keys()) assert results['NM_001042545.1:c.3035del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042545.1:c.3035del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042545.1:c.3035del']['alt_genomic_loci'] == [] @@ -6916,7 +6916,7 @@ def test_variant188(self): assert results['NM_001042545.1:c.3035del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001042544.1:c.3236del' in results.keys() + assert 'NM_001042544.1:c.3236del' in list(results.keys()) assert results['NM_001042544.1:c.3236del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042544.1:c.3236del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042544.1:c.3236del']['alt_genomic_loci'] == [] @@ -6934,7 +6934,7 @@ def test_variant188(self): assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} assert results['NM_001042544.1:c.3236del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} - assert 'NM_003573.2:c.3125del' in results.keys() + assert 'NM_003573.2:c.3125del' in list(results.keys()) assert results['NM_003573.2:c.3125del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003573.2:c.3125del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003573.2:c.3125del']['alt_genomic_loci'] == [] @@ -6956,13 +6956,13 @@ def test_variant188(self): def test_variant189(self): variant = 'NM_012309.4:c.913-5058G>A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_012309.4:c.913-5058G>A' in results.keys() + assert 'NM_012309.4:c.913-5058G>A' in list(results.keys()) assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_012309.4:c.913-5058G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'C', 'pos': '574546', 'alt': u'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'C', 'pos': '574546', 'alt': u'T'}}}] + assert results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}] assert results['NM_012309.4:c.913-5058G>A']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' assert results['NM_012309.4:c.913-5058G>A']['gene_symbol'] == 'SHANK2' assert results['NM_012309.4:c.913-5058G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.?', 'slr': 'NP_036441.2:p.?'} @@ -6971,20 +6971,20 @@ def test_variant189(self): assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_variant'] == '' assert results['NM_012309.4:c.913-5058G>A']['hgvs_transcript_variant'] == 'NM_012309.4:c.913-5058G>A' assert results['NM_012309.4:c.913-5058G>A']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys() - assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '71080333', 'alt': u'T'}} - assert 'grch37' not in results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys() - assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '71080333', 'alt': u'T'}} + assert 'hg19' not in list(results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys()) + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '71080333', 'alt': 'T'}} + assert 'grch37' not in list(results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys()) + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '71080333', 'alt': 'T'}} assert results['NM_012309.4:c.913-5058G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} def test_variant190(self): variant = 'LRG_199t1:c.2376[G>C];[G>C]' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.2376G>C' in results.keys() + assert 'NM_004006.2:c.2376G>C' in list(results.keys()) assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] @@ -6996,19 +6996,19 @@ def test_variant190(self): assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant191(self): variant = 'LRG_199t1:c.[2376G>C];[3103del]' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_004006.2:c.3103del' in results.keys() + assert 'NM_004006.2:c.3103del' in list(results.keys()) assert results['NM_004006.2:c.3103del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.3103del' assert results['NM_004006.2:c.3103del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.3103del']['alt_genomic_loci'] == [] @@ -7027,7 +7027,7 @@ def test_variant191(self): assert results['NM_004006.2:c.3103del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.2376G>C' in results.keys() + assert 'NM_004006.2:c.2376G>C' in list(results.keys()) assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] @@ -7039,20 +7039,20 @@ def test_variant191(self): assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant192(self): variant = 'LRG_199t1:c.[4358_4359del;4361_4372del]' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.4358_4372delinsG' in results.keys() + assert 'NM_004006.2:c.4358_4372delinsG' in list(results.keys()) assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4358_4372delinsG' assert results['NM_004006.2:c.4358_4372delinsG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.4358_4372delinsG']['alt_genomic_loci'] == [] @@ -7064,13 +7064,13 @@ def test_variant192(self): assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_lrg_variant'] == 'LRG_199:g.954949_954963delinsG' assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_transcript_variant'] == 'NM_004006.2:c.4358_4372delinsG' assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.954949_954963delinsG' - assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407778delinsC', 'vcf': {'chr': 'chrX', 'ref': 'ACTTCATGGAGACAT', 'pos': '32407764', 'alt': u'C'}} - assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389661delinsC', 'vcf': {'chr': 'chrX', 'ref': 'ACTTCATGGAGACAT', 'pos': '32389647', 'alt': u'C'}} - assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407778delinsC', 'vcf': {'chr': 'X', 'ref': 'ACTTCATGGAGACAT', 'pos': '32407764', 'alt': u'C'}} - assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389661delinsC', 'vcf': {'chr': 'X', 'ref': 'ACTTCATGGAGACAT', 'pos': '32389647', 'alt': u'C'}} + assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407778delinsC', 'vcf': {'chr': 'chrX', 'ref': 'ACTTCATGGAGACAT', 'pos': '32407764', 'alt': 'C'}} + assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389661delinsC', 'vcf': {'chr': 'chrX', 'ref': 'ACTTCATGGAGACAT', 'pos': '32389647', 'alt': 'C'}} + assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407778delinsC', 'vcf': {'chr': 'X', 'ref': 'ACTTCATGGAGACAT', 'pos': '32407764', 'alt': 'C'}} + assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389661delinsC', 'vcf': {'chr': 'X', 'ref': 'ACTTCATGGAGACAT', 'pos': '32389647', 'alt': 'C'}} assert results['NM_004006.2:c.4358_4372delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert 'NM_004006.2:c.4358_4359del' in results.keys() + assert 'NM_004006.2:c.4358_4359del' in list(results.keys()) assert results['NM_004006.2:c.4358_4359del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4358_4359del' assert results['NM_004006.2:c.4358_4359del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.4358_4359del']['alt_genomic_loci'] == [] @@ -7088,7 +7088,7 @@ def test_variant192(self): assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389660_32389661del', 'vcf': {'chr': 'X', 'ref': 'CAT', 'pos': '32389659', 'alt': 'C'}} assert results['NM_004006.2:c.4358_4359del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert 'NM_004006.2:c.4362_4373del' in results.keys() + assert 'NM_004006.2:c.4362_4373del' in list(results.keys()) assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4362_4373del' assert results['NM_004006.2:c.4362_4373del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.4362_4373del']['alt_genomic_loci'] == [] @@ -7110,9 +7110,9 @@ def test_variant192(self): def test_variant193(self): variant = 'LRG_199t1:c.2376G>C(;)3103del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_004006.2:c.3103del' in results.keys() + assert 'NM_004006.2:c.3103del' in list(results.keys()) assert results['NM_004006.2:c.3103del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.3103del' assert results['NM_004006.2:c.3103del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.3103del']['alt_genomic_loci'] == [] @@ -7131,7 +7131,7 @@ def test_variant193(self): assert results['NM_004006.2:c.3103del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.2376G>C' in results.keys() + assert 'NM_004006.2:c.2376G>C' in list(results.keys()) assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] @@ -7143,20 +7143,20 @@ def test_variant193(self): assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant194(self): variant = 'LRG_199t1:c.2376[G>C];[(G>C)]' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.2376G>C' in results.keys() + assert 'NM_004006.2:c.2376G>C' in list(results.keys()) assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] @@ -7168,20 +7168,20 @@ def test_variant194(self): assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant195(self): variant = 'LRG_199t1:c.[2376G>C];[?]' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.2376G>C' in results.keys() + assert 'NM_004006.2:c.2376G>C' in list(results.keys()) assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] @@ -7193,19 +7193,19 @@ def test_variant195(self): assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32519876', 'alt': u'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': u'C', 'pos': '32501759', 'alt': u'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant196(self): variant = 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_004006.2:c.476T=' in results.keys() + assert 'NM_004006.2:c.476T=' in list(results.keys()) assert results['NM_004006.2:c.476T=']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.476T=' assert results['NM_004006.2:c.476T=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.476T=']['alt_genomic_loci'] == [] @@ -7217,13 +7217,13 @@ def test_variant196(self): assert results['NM_004006.2:c.476T=']['hgvs_lrg_variant'] == 'LRG_199:g.528088T=' assert results['NM_004006.2:c.476T=']['hgvs_transcript_variant'] == 'NM_004006.2:c.476T=' assert results['NM_004006.2:c.476T=']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.528088T=' - assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A=', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32834639', 'alt': u'A'}} - assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32816522', 'alt': u'A'}} - assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A=', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32834639', 'alt': u'A'}} - assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32816522', 'alt': u'A'}} + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A=', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32834639', 'alt': 'A'}} + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32816522', 'alt': 'A'}} + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A=', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32834639', 'alt': 'A'}} + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32816522', 'alt': 'A'}} assert results['NM_004006.2:c.476T=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert 'NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT' in results.keys() + assert 'NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT' in list(results.keys()) assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296_358-3delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['alt_genomic_loci'] == [] @@ -7235,14 +7235,14 @@ def test_variant196(self): assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_lrg_variant'] == 'LRG_199:g.521254_527967delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_transcript_variant'] == 'NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT' assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.521254_527967delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639_32841473delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'chrX', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32834760', 'alt': u'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522_32823356delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'chrX', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32816643', 'alt': u'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639_32841473delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'X', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32834760', 'alt': u'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522_32823356delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'X', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32816643', 'alt': u'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639_32841473delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'chrX', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32834760', 'alt': 'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522_32823356delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'chrX', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32816643', 'alt': 'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639_32841473delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'X', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32834760', 'alt': 'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} + assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522_32823356delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'X', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32816643', 'alt': 'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.296T>G' in results.keys() + assert 'NM_004006.2:c.296T>G' in list(results.keys()) assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.296T>G']['alt_genomic_loci'] == [] @@ -7254,13 +7254,13 @@ def test_variant196(self): assert results['NM_004006.2:c.296T>G']['hgvs_lrg_variant'] == 'LRG_199:g.521254T>G' assert results['NM_004006.2:c.296T>G']['hgvs_transcript_variant'] == 'NM_004006.2:c.296T>G' assert results['NM_004006.2:c.296T>G']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.521254T>G' - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32841473', 'alt': u'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32823356', 'alt': u'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32841473', 'alt': u'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32823356', 'alt': u'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} assert results['NM_004006.2:c.296T>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert 'NM_004006.2:c.1083A>C' in results.keys() + assert 'NM_004006.2:c.1083A>C' in list(results.keys()) assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1083A>C' assert results['NM_004006.2:c.1083A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.1083A>C']['alt_genomic_loci'] == [] @@ -7272,19 +7272,19 @@ def test_variant196(self): assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_variant'] == 'LRG_199:g.699580A>C' assert results['NM_004006.2:c.1083A>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.1083A>C' assert results['NM_004006.2:c.1083A>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.699580A>C' - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'chrX', 'ref': u'T', 'pos': '32663147', 'alt': u'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'chrX', 'ref': u'T', 'pos': '32645030', 'alt': u'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'X', 'ref': u'T', 'pos': '32663147', 'alt': u'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'X', 'ref': u'T', 'pos': '32645030', 'alt': u'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'chrX', 'ref': 'T', 'pos': '32663147', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'chrX', 'ref': 'T', 'pos': '32645030', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'X', 'ref': 'T', 'pos': '32663147', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'X', 'ref': 'T', 'pos': '32645030', 'alt': 'G'}} assert results['NM_004006.2:c.1083A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant197(self): variant = 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_004006.2:c.1408del' in results.keys() + assert 'NM_004006.2:c.1408del' in list(results.keys()) assert results['NM_004006.2:c.1408del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1408del' assert results['NM_004006.2:c.1408del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.1408del']['alt_genomic_loci'] == [] @@ -7303,7 +7303,7 @@ def test_variant197(self): assert results['NM_004006.2:c.1408del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.296T>G' in results.keys() + assert 'NM_004006.2:c.296T>G' in list(results.keys()) assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.296T>G']['alt_genomic_loci'] == [] @@ -7315,13 +7315,13 @@ def test_variant197(self): assert results['NM_004006.2:c.296T>G']['hgvs_lrg_variant'] == 'LRG_199:g.521254T>G' assert results['NM_004006.2:c.296T>G']['hgvs_transcript_variant'] == 'NM_004006.2:c.296T>G' assert results['NM_004006.2:c.296T>G']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.521254T>G' - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32841473', 'alt': u'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32823356', 'alt': u'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32841473', 'alt': u'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32823356', 'alt': u'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} assert results['NM_004006.2:c.296T>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert 'NM_004006.2:c.476T>C' in results.keys() + assert 'NM_004006.2:c.476T>C' in list(results.keys()) assert results['NM_004006.2:c.476T>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.476T>C' assert results['NM_004006.2:c.476T>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.476T>C']['alt_genomic_loci'] == [] @@ -7333,13 +7333,13 @@ def test_variant197(self): assert results['NM_004006.2:c.476T>C']['hgvs_lrg_variant'] == 'LRG_199:g.528088T>C' assert results['NM_004006.2:c.476T>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.476T>C' assert results['NM_004006.2:c.476T>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.528088T>C' - assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A>G', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32834639', 'alt': u'G'}} - assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A>G', 'vcf': {'chr': 'chrX', 'ref': u'A', 'pos': '32816522', 'alt': u'G'}} - assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A>G', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32834639', 'alt': u'G'}} - assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A>G', 'vcf': {'chr': 'X', 'ref': u'A', 'pos': '32816522', 'alt': u'G'}} + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A>G', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32834639', 'alt': 'G'}} + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A>G', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32816522', 'alt': 'G'}} + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A>G', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32834639', 'alt': 'G'}} + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A>G', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32816522', 'alt': 'G'}} assert results['NM_004006.2:c.476T>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert 'NM_004006.2:c.1083A>C' in results.keys() + assert 'NM_004006.2:c.1083A>C' in list(results.keys()) assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1083A>C' assert results['NM_004006.2:c.1083A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.1083A>C']['alt_genomic_loci'] == [] @@ -7351,19 +7351,19 @@ def test_variant197(self): assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_variant'] == 'LRG_199:g.699580A>C' assert results['NM_004006.2:c.1083A>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.1083A>C' assert results['NM_004006.2:c.1083A>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.699580A>C' - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'chrX', 'ref': u'T', 'pos': '32663147', 'alt': u'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'chrX', 'ref': u'T', 'pos': '32645030', 'alt': u'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'X', 'ref': u'T', 'pos': '32663147', 'alt': u'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'X', 'ref': u'T', 'pos': '32645030', 'alt': u'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'chrX', 'ref': 'T', 'pos': '32663147', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'chrX', 'ref': 'T', 'pos': '32645030', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'X', 'ref': 'T', 'pos': '32663147', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'X', 'ref': 'T', 'pos': '32645030', 'alt': 'G'}} assert results['NM_004006.2:c.1083A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant198(self): variant = 'LRG_199t1:c.[976-20T>A;976-17_976-1dup]' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'validation_warning_1' in results.keys() + assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['alt_genomic_loci'] == [] @@ -7375,10 +7375,10 @@ def test_variant198(self): assert results['validation_warning_1']['hgvs_lrg_variant'] == '' assert results['validation_warning_1']['hgvs_transcript_variant'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['validation_warning_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['validation_warning_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -7386,9 +7386,9 @@ def test_variant198(self): def test_variant199(self): variant = '1-5935162-A-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_015102.3:c.2818-2T>A' in results.keys() + assert 'NM_015102.3:c.2818-2T>A' in list(results.keys()) assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_015102.3:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == 'NG_011724.2(NM_015102.3):c.2818-2A=' assert results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'] == [] @@ -7400,13 +7400,13 @@ def test_variant199(self): assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_variant'] == '' assert results['NM_015102.3:c.2818-2T>A']['hgvs_transcript_variant'] == 'NM_015102.3:c.2818-2T>A' assert results['NM_015102.3:c.2818-2T>A']['hgvs_refseqgene_variant'] == 'NG_011724.2:g.122370A=' - assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} - assert 'hg38' not in results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys() - assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} - assert 'grch38' not in results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys() + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys()) + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys()) assert results['NM_015102.3:c.2818-2T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011724.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3'} - assert 'NM_001291593.1:c.1279-2T>A' in results.keys() + assert 'NM_001291593.1:c.1279-2T>A' in list(results.keys()) assert results['NM_001291593.1:c.1279-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291593.1:c.1279-2T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001291593.1:c.1279-2T>A']['alt_genomic_loci'] == [] @@ -7418,13 +7418,13 @@ def test_variant199(self): assert results['NM_001291593.1:c.1279-2T>A']['hgvs_lrg_variant'] == '' assert results['NM_001291593.1:c.1279-2T>A']['hgvs_transcript_variant'] == 'NM_001291593.1:c.1279-2T>A' assert results['NM_001291593.1:c.1279-2T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} assert results['NM_001291593.1:c.1279-2T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291593.1'} - assert 'NM_015102.4:c.2818-2T>A' in results.keys() + assert 'NM_015102.4:c.2818-2T>A' in list(results.keys()) assert results['NM_015102.4:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_015102.4:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015102.4:c.2818-2T>A']['alt_genomic_loci'] == [] @@ -7436,13 +7436,13 @@ def test_variant199(self): assert results['NM_015102.4:c.2818-2T>A']['hgvs_lrg_variant'] == '' assert results['NM_015102.4:c.2818-2T>A']['hgvs_transcript_variant'] == 'NM_015102.4:c.2818-2T>A' assert results['NM_015102.4:c.2818-2T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} assert results['NM_015102.4:c.2818-2T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.4'} - assert 'NM_001291594.1:c.1282-2T>A' in results.keys() + assert 'NM_001291594.1:c.1282-2T>A' in list(results.keys()) assert results['NM_001291594.1:c.1282-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291594.1:c.1282-2T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001291594.1:c.1282-2T>A']['alt_genomic_loci'] == [] @@ -7454,14 +7454,14 @@ def test_variant199(self): assert results['NM_001291594.1:c.1282-2T>A']['hgvs_lrg_variant'] == '' assert results['NM_001291594.1:c.1282-2T>A']['hgvs_transcript_variant'] == 'NM_001291594.1:c.1282-2T>A' assert results['NM_001291594.1:c.1282-2T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} assert results['NM_001291594.1:c.1282-2T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278523.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291594.1'} assert results['flag'] == 'gene_variant' - assert 'NR_111987.1:n.3633-2T>A' in results.keys() + assert 'NR_111987.1:n.3633-2T>A' in list(results.keys()) assert results['NR_111987.1:n.3633-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_111987.1:n.3633-2T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_111987.1:n.3633-2T>A']['alt_genomic_loci'] == [] @@ -7473,9 +7473,9 @@ def test_variant199(self): assert results['NR_111987.1:n.3633-2T>A']['hgvs_lrg_variant'] == '' assert results['NR_111987.1:n.3633-2T>A']['hgvs_transcript_variant'] == 'NR_111987.1:n.3633-2T>A' assert results['NR_111987.1:n.3633-2T>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': u'A', 'pos': '5935162', 'alt': u'T'}} + assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} assert results['NR_111987.1:n.3633-2T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111987.1'} @@ -7483,10 +7483,10 @@ def test_variant199(self): def test_variant200(self): variant = '1-12065948-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001127660.1:c.1676C>T' in results.keys() + assert 'NM_001127660.1:c.1676C>T' in list(results.keys()) assert results['NM_001127660.1:c.1676C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001127660.1:c.1676C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001127660.1:c.1676C>T']['alt_genomic_loci'] == [] @@ -7504,7 +7504,7 @@ def test_variant200(self): assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '12005891', 'alt': 'T'}} assert results['NM_001127660.1:c.1676C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001121132.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001127660.1'} - assert 'NM_014874.3:c.1676C>T' in results.keys() + assert 'NM_014874.3:c.1676C>T' in list(results.keys()) assert results['NM_014874.3:c.1676C>T']['hgvs_lrg_transcript_variant'] == 'LRG_255t1:c.1676C>T' assert results['NM_014874.3:c.1676C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014874.3:c.1676C>T']['alt_genomic_loci'] == [] @@ -7526,9 +7526,9 @@ def test_variant200(self): def test_variant201(self): variant = '1-46655125-CTCAC-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001290129.1:c.1829+5_1829+8del' in results.keys() + assert 'NM_001290129.1:c.1829+5_1829+8del' in list(results.keys()) assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001290129.1:c.1829+5_1829+8del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001290129.1:c.1829+5_1829+8del']['alt_genomic_loci'] == [] @@ -7546,7 +7546,7 @@ def test_variant201(self): assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} assert results['NM_001290129.1:c.1829+5_1829+8del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277058.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290129.1'} - assert 'NM_001290130.1:c.1466+5_1466+8del' in results.keys() + assert 'NM_001290130.1:c.1466+5_1466+8del' in list(results.keys()) assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001290130.1:c.1466+5_1466+8del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001290130.1:c.1466+5_1466+8del']['alt_genomic_loci'] == [] @@ -7564,7 +7564,7 @@ def test_variant201(self): assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} assert results['NM_001290130.1:c.1466+5_1466+8del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277059.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290130.1'} - assert 'NM_017739.3:c.1895+5_1895+8del' in results.keys() + assert 'NM_017739.3:c.1895+5_1895+8del' in list(results.keys()) assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_lrg_transcript_variant'] == 'LRG_701t2:c.1895+5_1895+8del' assert results['NM_017739.3:c.1895+5_1895+8del']['refseqgene_context_intronic_sequence'] == 'NG_009205.2(NM_017739.3):c.1895+5_1895+8del' assert results['NM_017739.3:c.1895+5_1895+8del']['alt_genomic_loci'] == [] @@ -7582,7 +7582,7 @@ def test_variant201(self): assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} assert results['NM_017739.3:c.1895+5_1895+8del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009205.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_060209.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_017739.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_701.xml'} - assert 'NM_001243766.1:c.1869+31_1869+34del' in results.keys() + assert 'NM_001243766.1:c.1869+31_1869+34del' in list(results.keys()) assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_lrg_transcript_variant'] == 'LRG_701t1:c.1869+31_1869+34del' assert results['NM_001243766.1:c.1869+31_1869+34del']['refseqgene_context_intronic_sequence'] == 'NG_009205.2(NM_001243766.1):c.1869+31_1869+34del' assert results['NM_001243766.1:c.1869+31_1869+34del']['alt_genomic_loci'] == [] @@ -7605,9 +7605,9 @@ def test_variant201(self): def test_variant202(self): variant = '1-68912523-TGAGCCAGAG-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000329.2:c.106_114del' in results.keys() + assert 'NM_000329.2:c.106_114del' in list(results.keys()) assert results['NM_000329.2:c.106_114del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000329.2:c.106_114del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000329.2:c.106_114del']['alt_genomic_loci'] == [] @@ -7630,9 +7630,9 @@ def test_variant202(self): def test_variant203(self): variant = '1-68912526-GCCAGAG-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000329.2:c.109_114del' in results.keys() + assert 'NM_000329.2:c.109_114del' in list(results.keys()) assert results['NM_000329.2:c.109_114del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000329.2:c.109_114del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000329.2:c.109_114del']['alt_genomic_loci'] == [] @@ -7655,10 +7655,10 @@ def test_variant203(self): def test_variant204(self): variant = '1-109817590-G-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001408.2:c.*919G>T' in results.keys() + assert 'NM_001408.2:c.*919G>T' in list(results.keys()) assert results['NM_001408.2:c.*919G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001408.2:c.*919G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001408.2:c.*919G>T']['alt_genomic_loci'] == [] @@ -7680,10 +7680,10 @@ def test_variant204(self): def test_variant205(self): variant = '1-145597475-GAAGT-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_006468.6:c.1070+35_1070+38del' in results.keys() + assert 'NM_006468.6:c.1070+35_1070+38del' in list(results.keys()) assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006468.6:c.1070+35_1070+38del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006468.6:c.1070+35_1070+38del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}] @@ -7696,12 +7696,12 @@ def test_variant205(self): assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_transcript_variant'] == 'NM_006468.6:c.1070+35_1070+38del' assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_refseqgene_variant'] == '' assert results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': 'chr1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} - assert 'hg38' not in results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci'].keys()) assert results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': '1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} - assert 'grch38' not in results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci'].keys()) assert results['NM_006468.6:c.1070+35_1070+38del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006459.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006468.6'} - assert 'NM_001303456.1:c.1109+35_1109+38del' in results.keys() + assert 'NM_001303456.1:c.1109+35_1109+38del' in list(results.keys()) assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001303456.1:c.1109+35_1109+38del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001303456.1:c.1109+35_1109+38del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}] @@ -7719,7 +7719,7 @@ def test_variant205(self): assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': '1', 'ref': 'ATACT', 'pos': '145837629', 'alt': 'A'}} assert results['NM_001303456.1:c.1109+35_1109+38del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001290385.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001303456.1'} - assert 'NM_006468.7:c.1070+35_1070+38del' in results.keys() + assert 'NM_006468.7:c.1070+35_1070+38del' in list(results.keys()) assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006468.7:c.1070+35_1070+38del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006468.7:c.1070+35_1070+38del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}] @@ -7741,10 +7741,10 @@ def test_variant205(self): def test_variant206(self): variant = '1-153791300-CTG-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_020699.2:c.562_563del' in results.keys() + assert 'NM_020699.2:c.562_563del' in list(results.keys()) assert results['NM_020699.2:c.562_563del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020699.2:c.562_563del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020699.2:c.562_563del']['alt_genomic_loci'] == [] @@ -7762,7 +7762,7 @@ def test_variant206(self): assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818825_153818826del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} assert results['NM_020699.2:c.562_563del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2'} - assert 'NM_020699.3:c.562_563del' in results.keys() + assert 'NM_020699.3:c.562_563del' in list(results.keys()) assert results['NM_020699.3:c.562_563del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020699.3:c.562_563del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020699.3:c.562_563del']['alt_genomic_loci'] == [] @@ -7784,9 +7784,9 @@ def test_variant206(self): def test_variant207(self): variant = '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_005572.3:c.711_734delinsCCCC' in results.keys() + assert 'NM_005572.3:c.711_734delinsCCCC' in list(results.keys()) assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == 'LRG_254t1:c.711_734delinsCCCC' assert results['NM_005572.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005572.3:c.711_734delinsCCCC']['alt_genomic_loci'] == [] @@ -7804,7 +7804,7 @@ def test_variant207(self): assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} assert results['NM_005572.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008692.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005563.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005572.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_254.xml'} - assert 'NM_001257374.1:c.375_398delinsCCCC' in results.keys() + assert 'NM_001257374.1:c.375_398delinsCCCC' in list(results.keys()) assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257374.1:c.375_398delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001257374.1:c.375_398delinsCCCC']['alt_genomic_loci'] == [] @@ -7817,12 +7817,12 @@ def test_variant207(self): assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_transcript_variant'] == 'NM_001257374.1:c.375_398delinsCCCC' assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_refseqgene_variant'] == '' assert results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert 'hg38' not in results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci'].keys()) assert results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert 'grch38' not in results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci'].keys()) assert results['NM_001257374.1:c.375_398delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1'} - assert 'NM_001257374.2:c.375_398delinsCCCC' in results.keys() + assert 'NM_001257374.2:c.375_398delinsCCCC' in list(results.keys()) assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257374.2:c.375_398delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001257374.2:c.375_398delinsCCCC']['alt_genomic_loci'] == [] @@ -7840,7 +7840,7 @@ def test_variant207(self): assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} assert results['NM_001257374.2:c.375_398delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2'} - assert 'NM_001282624.1:c.468_491delinsCCCC' in results.keys() + assert 'NM_001282624.1:c.468_491delinsCCCC' in list(results.keys()) assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001282624.1:c.468_491delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282624.1:c.468_491delinsCCCC']['alt_genomic_loci'] == [] @@ -7859,7 +7859,7 @@ def test_variant207(self): assert results['NM_001282624.1:c.468_491delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269553.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282624.1'} assert results['flag'] == 'gene_variant' - assert 'NM_170708.3:c.711_734delinsCCCC' in results.keys() + assert 'NM_170708.3:c.711_734delinsCCCC' in list(results.keys()) assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_170708.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_170708.3:c.711_734delinsCCCC']['alt_genomic_loci'] == [] @@ -7877,7 +7877,7 @@ def test_variant207(self): assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} assert results['NM_170708.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3'} - assert 'NM_170707.3:c.711_734delinsCCCC' in results.keys() + assert 'NM_170707.3:c.711_734delinsCCCC' in list(results.keys()) assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_170707.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_170707.3:c.711_734delinsCCCC']['alt_genomic_loci'] == [] @@ -7895,7 +7895,7 @@ def test_variant207(self): assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} assert results['NM_170707.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3'} - assert 'NM_001282626.1:c.711_734delinsCCCC' in results.keys() + assert 'NM_001282626.1:c.711_734delinsCCCC' in list(results.keys()) assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001282626.1:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282626.1:c.711_734delinsCCCC']['alt_genomic_loci'] == [] @@ -7913,7 +7913,7 @@ def test_variant207(self): assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} assert results['NM_001282626.1:c.711_734delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1'} - assert 'NM_001282625.1:c.711_734delinsCCCC' in results.keys() + assert 'NM_001282625.1:c.711_734delinsCCCC' in list(results.keys()) assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001282625.1:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282625.1:c.711_734delinsCCCC']['alt_genomic_loci'] == [] @@ -7935,9 +7935,9 @@ def test_variant207(self): def test_variant208(self): variant = '1-156108541-G-GG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_170707.3:c.1961dup' in results.keys() + assert 'NM_170707.3:c.1961dup' in list(results.keys()) assert results['NM_170707.3:c.1961dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_170707.3:c.1961dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_170707.3:c.1961dup']['alt_genomic_loci'] == [] @@ -7955,7 +7955,7 @@ def test_variant208(self): assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} assert results['NM_170707.3:c.1961dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3'} - assert 'NM_001282626.1:c.1818+143dup' in results.keys() + assert 'NM_001282626.1:c.1818+143dup' in list(results.keys()) assert results['NM_001282626.1:c.1818+143dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001282626.1:c.1818+143dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282626.1:c.1818+143dup']['alt_genomic_loci'] == [] @@ -7974,7 +7974,7 @@ def test_variant208(self): assert results['NM_001282626.1:c.1818+143dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001257374.2:c.1625dup' in results.keys() + assert 'NM_001257374.2:c.1625dup' in list(results.keys()) assert results['NM_001257374.2:c.1625dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257374.2:c.1625dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001257374.2:c.1625dup']['alt_genomic_loci'] == [] @@ -7992,7 +7992,7 @@ def test_variant208(self): assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} assert results['NM_001257374.2:c.1625dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2'} - assert 'NM_170708.3:c.1871dup' in results.keys() + assert 'NM_170708.3:c.1871dup' in list(results.keys()) assert results['NM_170708.3:c.1871dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_170708.3:c.1871dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_170708.3:c.1871dup']['alt_genomic_loci'] == [] @@ -8010,7 +8010,7 @@ def test_variant208(self): assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} assert results['NM_170708.3:c.1871dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3'} - assert 'NM_001257374.1:c.1625dup' in results.keys() + assert 'NM_001257374.1:c.1625dup' in list(results.keys()) assert results['NM_001257374.1:c.1625dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257374.1:c.1625dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001257374.1:c.1625dup']['alt_genomic_loci'] == [] @@ -8023,19 +8023,19 @@ def test_variant208(self): assert results['NM_001257374.1:c.1625dup']['hgvs_transcript_variant'] == 'NM_001257374.1:c.1625dup' assert results['NM_001257374.1:c.1625dup']['hgvs_refseqgene_variant'] == '' assert results['NM_001257374.1:c.1625dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} - assert 'hg38' not in results['NM_001257374.1:c.1625dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001257374.1:c.1625dup']['primary_assembly_loci'].keys()) assert results['NM_001257374.1:c.1625dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} - assert 'grch38' not in results['NM_001257374.1:c.1625dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001257374.1:c.1625dup']['primary_assembly_loci'].keys()) assert results['NM_001257374.1:c.1625dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1'} def test_variant209(self): variant = '1-161279695-T-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001315491.1:c.1A>T' in results.keys() + assert 'NM_001315491.1:c.1A>T' in list(results.keys()) assert results['NM_001315491.1:c.1A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001315491.1:c.1A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001315491.1:c.1A>T']['alt_genomic_loci'] == [] @@ -8047,13 +8047,13 @@ def test_variant209(self): assert results['NM_001315491.1:c.1A>T']['hgvs_lrg_variant'] == '' assert results['NM_001315491.1:c.1A>T']['hgvs_transcript_variant'] == 'NM_001315491.1:c.1A>T' assert results['NM_001315491.1:c.1A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} - assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': 'chr1', 'ref': u'T', 'pos': '161309905', 'alt': u'A'}} - assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} - assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': '1', 'ref': u'T', 'pos': '161309905', 'alt': u'A'}} + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '161309905', 'alt': 'A'}} + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '161309905', 'alt': 'A'}} assert results['NM_001315491.1:c.1A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001302420.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001315491.1'} - assert 'NM_000530.7:c.1A>T' in results.keys() + assert 'NM_000530.7:c.1A>T' in list(results.keys()) assert results['NM_000530.7:c.1A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000530.7:c.1A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000530.7:c.1A>T']['alt_genomic_loci'] == [] @@ -8065,13 +8065,13 @@ def test_variant209(self): assert results['NM_000530.7:c.1A>T']['hgvs_lrg_variant'] == '' assert results['NM_000530.7:c.1A>T']['hgvs_transcript_variant'] == 'NM_000530.7:c.1A>T' assert results['NM_000530.7:c.1A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} - assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': 'chr1', 'ref': u'T', 'pos': '161309905', 'alt': u'A'}} - assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} - assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': '1', 'ref': u'T', 'pos': '161309905', 'alt': u'A'}} + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '161309905', 'alt': 'A'}} + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '161309905', 'alt': 'A'}} assert results['NM_000530.7:c.1A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.7'} - assert 'NM_000530.6:c.1A>T' in results.keys() + assert 'NM_000530.6:c.1A>T' in list(results.keys()) assert results['NM_000530.6:c.1A>T']['hgvs_lrg_transcript_variant'] == 'LRG_256t1:c.1A>T' assert results['NM_000530.6:c.1A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000530.6:c.1A>T']['alt_genomic_loci'] == [] @@ -8083,20 +8083,20 @@ def test_variant209(self): assert results['NM_000530.6:c.1A>T']['hgvs_lrg_variant'] == 'LRG_256:g.5068A>T' assert results['NM_000530.6:c.1A>T']['hgvs_transcript_variant'] == 'NM_000530.6:c.1A>T' assert results['NM_000530.6:c.1A>T']['hgvs_refseqgene_variant'] == 'NG_008055.1:g.5068A>T' - assert results['NM_000530.6:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} - assert 'hg38' not in results['NM_000530.6:c.1A>T']['primary_assembly_loci'].keys() - assert results['NM_000530.6:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': u'T', 'pos': '161279695', 'alt': u'A'}} - assert 'grch38' not in results['NM_000530.6:c.1A>T']['primary_assembly_loci'].keys() + assert results['NM_000530.6:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_000530.6:c.1A>T']['primary_assembly_loci'].keys()) + assert results['NM_000530.6:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_000530.6:c.1A>T']['primary_assembly_loci'].keys()) assert results['NM_000530.6:c.1A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008055.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.6', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_256.xml'} def test_variant210(self): variant = '1-169519049-T-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000130.4:c.1601G>A' in results.keys() + assert 'NM_000130.4:c.1601G>A' in list(results.keys()) assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601G>A' assert results['NM_000130.4:c.1601G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000130.4:c.1601G>A']['alt_genomic_loci'] == [] @@ -8109,18 +8109,18 @@ def test_variant210(self): assert results['NM_000130.4:c.1601G>A']['hgvs_transcript_variant'] == 'NM_000130.4:c.1601G>A' assert results['NM_000130.4:c.1601G>A']['hgvs_refseqgene_variant'] == 'NG_011806.1:g.41721G>A' assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': 'chr1', 'ref': u'C', 'pos': '169549811', 'alt': u'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '169549811', 'alt': 'T'}} assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': '1', 'ref': u'C', 'pos': '169549811', 'alt': u'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '169549811', 'alt': 'T'}} assert results['NM_000130.4:c.1601G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} def test_variant211(self): variant = '1-226125468-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_003240.4:c.774C>T' in results.keys() + assert 'NM_003240.4:c.774C>T' in list(results.keys()) assert results['NM_003240.4:c.774C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003240.4:c.774C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003240.4:c.774C>T']['alt_genomic_loci'] == [] @@ -8132,13 +8132,13 @@ def test_variant211(self): assert results['NM_003240.4:c.774C>T']['hgvs_lrg_variant'] == '' assert results['NM_003240.4:c.774C>T']['hgvs_transcript_variant'] == 'NM_003240.4:c.774C>T' assert results['NM_003240.4:c.774C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} - assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} - assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} - assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} assert results['NM_003240.4:c.774C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.4'} - assert 'NM_003240.3:c.774C>T' in results.keys() + assert 'NM_003240.3:c.774C>T' in list(results.keys()) assert results['NM_003240.3:c.774C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003240.3:c.774C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003240.3:c.774C>T']['alt_genomic_loci'] == [] @@ -8150,13 +8150,13 @@ def test_variant211(self): assert results['NM_003240.3:c.774C>T']['hgvs_lrg_variant'] == '' assert results['NM_003240.3:c.774C>T']['hgvs_transcript_variant'] == 'NM_003240.3:c.774C>T' assert results['NM_003240.3:c.774C>T']['hgvs_refseqgene_variant'] == 'NG_008118.1:g.8453C>T' - assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} - assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} - assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} - assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} assert results['NM_003240.3:c.774C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008118.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.3'} - assert 'NM_001172425.1:c.672C>T' in results.keys() + assert 'NM_001172425.1:c.672C>T' in list(results.keys()) assert results['NM_001172425.1:c.672C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001172425.1:c.672C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001172425.1:c.672C>T']['alt_genomic_loci'] == [] @@ -8168,13 +8168,13 @@ def test_variant211(self): assert results['NM_001172425.1:c.672C>T']['hgvs_lrg_variant'] == '' assert results['NM_001172425.1:c.672C>T']['hgvs_transcript_variant'] == 'NM_001172425.1:c.672C>T' assert results['NM_001172425.1:c.672C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} - assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} - assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} - assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} assert results['NM_001172425.1:c.672C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.1'} - assert 'NM_001172425.2:c.672C>T' in results.keys() + assert 'NM_001172425.2:c.672C>T' in list(results.keys()) assert results['NM_001172425.2:c.672C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001172425.2:c.672C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001172425.2:c.672C>T']['alt_genomic_loci'] == [] @@ -8186,10 +8186,10 @@ def test_variant211(self): assert results['NM_001172425.2:c.672C>T']['hgvs_lrg_variant'] == '' assert results['NM_001172425.2:c.672C>T']['hgvs_transcript_variant'] == 'NM_001172425.2:c.672C>T' assert results['NM_001172425.2:c.672C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} - assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} - assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '226125468', 'alt': u'A'}} - assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': u'G', 'pos': '225937768', 'alt': u'A'}} + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} assert results['NM_001172425.2:c.672C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.2'} assert results['flag'] == 'gene_variant' @@ -8197,10 +8197,10 @@ def test_variant211(self): def test_variant212(self): variant = '10-89623035-CGCA-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001126049.1:c.-794_-792del' in results.keys() + assert 'NM_001126049.1:c.-794_-792del' in list(results.keys()) assert results['NM_001126049.1:c.-794_-792del']['hgvs_lrg_transcript_variant'] == 'LRG_1087t1:c.-794_-792del' assert results['NM_001126049.1:c.-794_-792del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126049.1:c.-794_-792del']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'HG2334_PATCH', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'NW_013171807.1', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}] @@ -8222,9 +8222,9 @@ def test_variant212(self): def test_variant213(self): variant = '11-62457852-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NR_037946.1:n.3896G>T' in results.keys() + assert 'NR_037946.1:n.3896G>T' in list(results.keys()) assert results['NR_037946.1:n.3896G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037946.1:n.3896G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_037946.1:n.3896G>T']['alt_genomic_loci'] == [] @@ -8236,13 +8236,13 @@ def test_variant213(self): assert results['NR_037946.1:n.3896G>T']['hgvs_lrg_variant'] == '' assert results['NR_037946.1:n.3896G>T']['hgvs_transcript_variant'] == 'NR_037946.1:n.3896G>T' assert results['NR_037946.1:n.3896G>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} - assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} assert results['NR_037946.1:n.3896G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037946.1'} - assert 'NM_032667.6:c.1184G>T' in results.keys() + assert 'NM_032667.6:c.1184G>T' in list(results.keys()) assert results['NM_032667.6:c.1184G>T']['hgvs_lrg_transcript_variant'] == 'LRG_235t2:c.1184G>T' assert results['NM_032667.6:c.1184G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032667.6:c.1184G>T']['alt_genomic_loci'] == [] @@ -8254,13 +8254,13 @@ def test_variant213(self): assert results['NM_032667.6:c.1184G>T']['hgvs_lrg_variant'] == '' assert results['NM_032667.6:c.1184G>T']['hgvs_transcript_variant'] == 'NM_032667.6:c.1184G>T' assert results['NM_032667.6:c.1184G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} - assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} assert results['NM_032667.6:c.1184G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116056.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032667.6'} - assert 'NR_037949.1:n.1984G>T' in results.keys() + assert 'NR_037949.1:n.1984G>T' in list(results.keys()) assert results['NR_037949.1:n.1984G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037949.1:n.1984G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_037949.1:n.1984G>T']['alt_genomic_loci'] == [] @@ -8272,13 +8272,13 @@ def test_variant213(self): assert results['NR_037949.1:n.1984G>T']['hgvs_lrg_variant'] == '' assert results['NR_037949.1:n.1984G>T']['hgvs_transcript_variant'] == 'NR_037949.1:n.1984G>T' assert results['NR_037949.1:n.1984G>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} - assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} assert results['NR_037949.1:n.1984G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037949.1'} - assert 'NR_037948.1:n.1978G>T' in results.keys() + assert 'NR_037948.1:n.1978G>T' in list(results.keys()) assert results['NR_037948.1:n.1978G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037948.1:n.1978G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_037948.1:n.1978G>T']['alt_genomic_loci'] == [] @@ -8290,13 +8290,13 @@ def test_variant213(self): assert results['NR_037948.1:n.1978G>T']['hgvs_lrg_variant'] == '' assert results['NR_037948.1:n.1978G>T']['hgvs_transcript_variant'] == 'NR_037948.1:n.1978G>T' assert results['NR_037948.1:n.1978G>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} - assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} assert results['NR_037948.1:n.1978G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037948.1'} - assert 'NM_001122955.3:c.1376G>T' in results.keys() + assert 'NM_001122955.3:c.1376G>T' in list(results.keys()) assert results['NM_001122955.3:c.1376G>T']['hgvs_lrg_transcript_variant'] == 'LRG_235t1:c.1376G>T' assert results['NM_001122955.3:c.1376G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001122955.3:c.1376G>T']['alt_genomic_loci'] == [] @@ -8308,14 +8308,14 @@ def test_variant213(self): assert results['NM_001122955.3:c.1376G>T']['hgvs_lrg_variant'] == 'LRG_235:g.24195G>T' assert results['NM_001122955.3:c.1376G>T']['hgvs_transcript_variant'] == 'NM_001122955.3:c.1376G>T' assert results['NM_001122955.3:c.1376G>T']['hgvs_refseqgene_variant'] == 'NG_008461.1:g.24195G>T' - assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} - assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} assert results['NM_001122955.3:c.1376G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008461.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001116427.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001122955.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_235.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_001130702.2:c.*178G>T' in results.keys() + assert 'NM_001130702.2:c.*178G>T' in list(results.keys()) assert results['NM_001130702.2:c.*178G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130702.2:c.*178G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130702.2:c.*178G>T']['alt_genomic_loci'] == [] @@ -8327,19 +8327,19 @@ def test_variant213(self): assert results['NM_001130702.2:c.*178G>T']['hgvs_lrg_variant'] == '' assert results['NM_001130702.2:c.*178G>T']['hgvs_transcript_variant'] == 'NM_001130702.2:c.*178G>T' assert results['NM_001130702.2:c.*178G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} - assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62457852', 'alt': u'A'}} - assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '62690380', 'alt': u'A'}} + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} assert results['NM_001130702.2:c.*178G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124174.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130702.2'} def test_variant214(self): variant = '11-108178710-A-AT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001351834.1:c.5761_5762insT' in results.keys() + assert 'NM_001351834.1:c.5761_5762insT' in list(results.keys()) assert results['NM_001351834.1:c.5761_5762insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001351834.1:c.5761_5762insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001351834.1:c.5761_5762insT']['alt_genomic_loci'] == [] @@ -8358,7 +8358,7 @@ def test_variant214(self): assert results['NM_001351834.1:c.5761_5762insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338763.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351834.1'} assert results['flag'] == 'gene_variant' - assert 'NM_000051.3:c.5761_5762insT' in results.keys() + assert 'NM_000051.3:c.5761_5762insT' in list(results.keys()) assert results['NM_000051.3:c.5761_5762insT']['hgvs_lrg_transcript_variant'] == 'LRG_135t1:c.5761_5762insT' assert results['NM_000051.3:c.5761_5762insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000051.3:c.5761_5762insT']['alt_genomic_loci'] == [] @@ -8380,12 +8380,12 @@ def test_variant214(self): def test_variant215(self): variant = '11-111735981-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001352419.1:c.-108-7C>T' in results.keys() + assert 'NM_001352419.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352419.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352419.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352419.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352419.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352419.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 15, mRNA' assert results['NM_001352419.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352419.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339348.1:p.?', 'slr': 'NP_001339348.1:p.?'} @@ -8394,16 +8394,16 @@ def test_variant215(self): assert results['NM_001352419.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352419.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352419.1:c.-108-7C>T' assert results['NM_001352419.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352419.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339348.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352419.1'} - assert 'NM_001352412.1:c.-108-7C>T' in results.keys() + assert 'NM_001352412.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352412.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352412.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352412.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352412.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352412.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 8, mRNA' assert results['NM_001352412.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352412.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339341.1:p.?', 'slr': 'NP_001339341.1:p.?'} @@ -8412,16 +8412,16 @@ def test_variant215(self): assert results['NM_001352412.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352412.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352412.1:c.-108-7C>T' assert results['NM_001352412.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352412.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339341.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352412.1'} - assert 'NM_001077692.1:c.-108-7C>T' in results.keys() + assert 'NM_001077692.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001077692.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077692.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001077692.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001077692.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001077692.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 4, mRNA' assert results['NM_001077692.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001077692.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071160.1:p.?', 'slr': 'NP_001071160.1:p.?'} @@ -8430,16 +8430,16 @@ def test_variant215(self): assert results['NM_001077692.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001077692.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001077692.1:c.-108-7C>T' assert results['NM_001077692.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001077692.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071160.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077692.1'} - assert 'NM_001352418.1:c.406-7C>T' in results.keys() + assert 'NM_001352418.1:c.406-7C>T' in list(results.keys()) assert results['NM_001352418.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352418.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352418.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352418.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352418.1:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 14, mRNA' assert results['NM_001352418.1:c.406-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352418.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339347.1:p.?', 'slr': 'NP_001339347.1:p.?'} @@ -8448,16 +8448,16 @@ def test_variant215(self): assert results['NM_001352418.1:c.406-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352418.1:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_001352418.1:c.406-7C>T' assert results['NM_001352418.1:c.406-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352418.1:c.406-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339347.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352418.1'} - assert 'NM_001352423.1:c.-108-7C>T' in results.keys() + assert 'NM_001352423.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352423.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352423.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352423.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352423.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352423.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 19, mRNA' assert results['NM_001352423.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352423.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339352.1:p.?', 'slr': 'NP_001339352.1:p.?'} @@ -8466,16 +8466,16 @@ def test_variant215(self): assert results['NM_001352423.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352423.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352423.1:c.-108-7C>T' assert results['NM_001352423.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352423.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339352.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352423.1'} - assert 'NM_001352415.1:c.-108-7C>T' in results.keys() + assert 'NM_001352415.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352415.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352415.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352415.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352415.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352415.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 11, mRNA' assert results['NM_001352415.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352415.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339344.1:p.?', 'slr': 'NP_001339344.1:p.?'} @@ -8484,16 +8484,16 @@ def test_variant215(self): assert results['NM_001352415.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352415.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352415.1:c.-108-7C>T' assert results['NM_001352415.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352415.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339344.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352415.1'} - assert 'NM_001352421.1:c.-108-7C>T' in results.keys() + assert 'NM_001352421.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352421.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352421.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352421.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352421.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352421.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 17, mRNA' assert results['NM_001352421.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352421.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339350.1:p.?', 'slr': 'NP_001339350.1:p.?'} @@ -8502,16 +8502,16 @@ def test_variant215(self): assert results['NM_001352421.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352421.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352421.1:c.-108-7C>T' assert results['NM_001352421.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352421.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339350.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352421.1'} - assert 'NM_001352411.1:c.-108-7C>T' in results.keys() + assert 'NM_001352411.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352411.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352411.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352411.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352411.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352411.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 7, mRNA' assert results['NM_001352411.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352411.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339340.1:p.?', 'slr': 'NP_001339340.1:p.?'} @@ -8520,16 +8520,16 @@ def test_variant215(self): assert results['NM_001352411.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352411.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352411.1:c.-108-7C>T' assert results['NM_001352411.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352411.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339340.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352411.1'} - assert 'NR_147984.1:n.782-7C>T' in results.keys() + assert 'NR_147984.1:n.782-7C>T' in list(results.keys()) assert results['NR_147984.1:n.782-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_147984.1:n.782-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_147984.1:n.782-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NR_147984.1:n.782-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NR_147984.1:n.782-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 20, non-coding RNA' assert results['NR_147984.1:n.782-7C>T']['gene_symbol'] == 'ALG9' assert results['NR_147984.1:n.782-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} @@ -8538,16 +8538,16 @@ def test_variant215(self): assert results['NR_147984.1:n.782-7C>T']['hgvs_lrg_variant'] == '' assert results['NR_147984.1:n.782-7C>T']['hgvs_transcript_variant'] == 'NR_147984.1:n.782-7C>T' assert results['NR_147984.1:n.782-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NR_147984.1:n.782-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_147984.1'} - assert 'NM_001077691.1:c.-108-7C>T' in results.keys() + assert 'NM_001077691.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001077691.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077691.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001077691.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001077691.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001077691.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 3, mRNA' assert results['NM_001077691.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001077691.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071159.1:p.?', 'slr': 'NP_001071159.1:p.?'} @@ -8556,16 +8556,16 @@ def test_variant215(self): assert results['NM_001077691.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001077691.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001077691.1:c.-108-7C>T' assert results['NM_001077691.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001077691.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071159.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077691.1'} - assert 'NM_001352410.1:c.-108-7C>T' in results.keys() + assert 'NM_001352410.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352410.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352410.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352410.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352410.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352410.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 6, mRNA' assert results['NM_001352410.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352410.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339339.1:p.?', 'slr': 'NP_001339339.1:p.?'} @@ -8574,16 +8574,16 @@ def test_variant215(self): assert results['NM_001352410.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352410.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352410.1:c.-108-7C>T' assert results['NM_001352410.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352410.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339339.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352410.1'} - assert 'NM_001077690.1:c.406-7C>T' in results.keys() + assert 'NM_001077690.1:c.406-7C>T' in list(results.keys()) assert results['NM_001077690.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077690.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001077690.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001077690.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001077690.1:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 2, mRNA' assert results['NM_001077690.1:c.406-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001077690.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071158.1:p.?', 'slr': 'NP_001071158.1:p.?'} @@ -8592,17 +8592,17 @@ def test_variant215(self): assert results['NM_001077690.1:c.406-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001077690.1:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_001077690.1:c.406-7C>T' assert results['NM_001077690.1:c.406-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001077690.1:c.406-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071158.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077690.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001352422.1:c.-326-7C>T' in results.keys() + assert 'NM_001352422.1:c.-326-7C>T' in list(results.keys()) assert results['NM_001352422.1:c.-326-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352422.1:c.-326-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352422.1:c.-326-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352422.1:c.-326-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352422.1:c.-326-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 18, mRNA' assert results['NM_001352422.1:c.-326-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352422.1:c.-326-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339351.1:p.?', 'slr': 'NP_001339351.1:p.?'} @@ -8611,16 +8611,16 @@ def test_variant215(self): assert results['NM_001352422.1:c.-326-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352422.1:c.-326-7C>T']['hgvs_transcript_variant'] == 'NM_001352422.1:c.-326-7C>T' assert results['NM_001352422.1:c.-326-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352422.1:c.-326-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339351.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352422.1'} - assert 'NM_001352416.1:c.-108-7C>T' in results.keys() + assert 'NM_001352416.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352416.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352416.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352416.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352416.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352416.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 12, mRNA' assert results['NM_001352416.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352416.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339345.1:p.?', 'slr': 'NP_001339345.1:p.?'} @@ -8629,16 +8629,16 @@ def test_variant215(self): assert results['NM_001352416.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352416.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352416.1:c.-108-7C>T' assert results['NM_001352416.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352416.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339345.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352416.1'} - assert 'NM_001352420.1:c.-108-7C>T' in results.keys() + assert 'NM_001352420.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352420.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352420.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352420.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352420.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352420.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 16, mRNA' assert results['NM_001352420.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352420.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339349.1:p.?', 'slr': 'NP_001339349.1:p.?'} @@ -8647,16 +8647,16 @@ def test_variant215(self): assert results['NM_001352420.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352420.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352420.1:c.-108-7C>T' assert results['NM_001352420.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352420.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339349.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352420.1'} - assert 'NM_024740.2:c.406-7C>T' in results.keys() + assert 'NM_024740.2:c.406-7C>T' in list(results.keys()) assert results['NM_024740.2:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024740.2:c.406-7C>T']['refseqgene_context_intronic_sequence'] == 'NG_009210.1(NM_024740.2):c.406-7C>T' - assert results['NM_024740.2:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_024740.2:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_024740.2:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 1, mRNA' assert results['NM_024740.2:c.406-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_024740.2:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_079016.2:p.?', 'slr': 'NP_079016.2:p.?'} @@ -8665,16 +8665,16 @@ def test_variant215(self): assert results['NM_024740.2:c.406-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_024740.2:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_024740.2:c.406-7C>T' assert results['NM_024740.2:c.406-7C>T']['hgvs_refseqgene_variant'] == 'NG_009210.1:g.11324C>T' - assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_024740.2:c.406-7C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009210.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_079016.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024740.2'} - assert 'NM_001352414.1:c.-108-7C>T' in results.keys() + assert 'NM_001352414.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352414.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352414.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352414.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352414.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352414.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 10, mRNA' assert results['NM_001352414.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352414.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339343.1:p.?', 'slr': 'NP_001339343.1:p.?'} @@ -8683,16 +8683,16 @@ def test_variant215(self): assert results['NM_001352414.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352414.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352414.1:c.-108-7C>T' assert results['NM_001352414.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352414.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339343.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352414.1'} - assert 'NM_001352417.1:c.406-7C>T' in results.keys() + assert 'NM_001352417.1:c.406-7C>T' in list(results.keys()) assert results['NM_001352417.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352417.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352417.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352417.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352417.1:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 13, mRNA' assert results['NM_001352417.1:c.406-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352417.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339346.1:p.?', 'slr': 'NP_001339346.1:p.?'} @@ -8701,16 +8701,16 @@ def test_variant215(self): assert results['NM_001352417.1:c.406-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352417.1:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_001352417.1:c.406-7C>T' assert results['NM_001352417.1:c.406-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352417.1:c.406-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339346.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352417.1'} - assert 'NM_001352409.1:c.-108-7C>T' in results.keys() + assert 'NM_001352409.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352409.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352409.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352409.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352409.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352409.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 5, mRNA' assert results['NM_001352409.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352409.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339338.1:p.?', 'slr': 'NP_001339338.1:p.?'} @@ -8719,16 +8719,16 @@ def test_variant215(self): assert results['NM_001352409.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352409.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352409.1:c.-108-7C>T' assert results['NM_001352409.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352409.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339338.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352409.1'} - assert 'NM_001352413.1:c.-108-7C>T' in results.keys() + assert 'NM_001352413.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352413.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352413.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352413.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': u'G', 'pos': '117249', 'alt': u'A'}}}] + assert results['NM_001352413.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] assert results['NM_001352413.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 9, mRNA' assert results['NM_001352413.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352413.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339342.1:p.?', 'slr': 'NP_001339342.1:p.?'} @@ -8737,23 +8737,23 @@ def test_variant215(self): assert results['NM_001352413.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' assert results['NM_001352413.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352413.1:c.-108-7C>T' assert results['NM_001352413.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} - assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111735981', 'alt': u'A'}} - assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '111865258', 'alt': u'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} assert results['NM_001352413.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339342.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352413.1'} def test_variant216(self): variant = '12-11023080-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NR_037918.2:n.1184+11736G>T' in results.keys() + assert 'NR_037918.2:n.1184+11736G>T' in list(results.keys()) assert results['NR_037918.2:n.1184+11736G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037918.2:n.1184+11736G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_037918.2:n.1184+11736G>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_3_CTG2', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'chr12_KI270904v1_alt', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'HG1133_PATCH', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'NW_003571047.1', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'NW_003571050.1', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'chr12_GL877876v1_alt', 'ref': u'C', 'pos': '69187', 'alt': u'A'}}}] + assert results['NR_037918.2:n.1184+11736G>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_3_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'chr12_KI270904v1_alt', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'HG1133_PATCH', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'NW_003571047.1', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'NW_003571050.1', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'chr12_GL877876v1_alt', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}] assert results['NR_037918.2:n.1184+11736G>T']['transcript_description'] == 'Homo sapiens PRH1-PRR4 readthrough (PRH1-PRR4), long non-coding RNA' assert results['NR_037918.2:n.1184+11736G>T']['gene_symbol'] == 'PRH1-PRR4' assert results['NR_037918.2:n.1184+11736G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} @@ -8762,19 +8762,19 @@ def test_variant216(self): assert results['NR_037918.2:n.1184+11736G>T']['hgvs_lrg_variant'] == '' assert results['NR_037918.2:n.1184+11736G>T']['hgvs_transcript_variant'] == 'NR_037918.2:n.1184+11736G>T' assert results['NR_037918.2:n.1184+11736G>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.11023080C>A', 'vcf': {'chr': 'chr12', 'ref': u'C', 'pos': '11023080', 'alt': u'A'}} - assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.10870481C>A', 'vcf': {'chr': 'chr12', 'ref': u'C', 'pos': '10870481', 'alt': u'A'}} - assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.11023080C>A', 'vcf': {'chr': '12', 'ref': u'C', 'pos': '11023080', 'alt': u'A'}} - assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.10870481C>A', 'vcf': {'chr': '12', 'ref': u'C', 'pos': '10870481', 'alt': u'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.11023080C>A', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '11023080', 'alt': 'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.10870481C>A', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '10870481', 'alt': 'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.11023080C>A', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '11023080', 'alt': 'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.10870481C>A', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '10870481', 'alt': 'A'}} assert results['NR_037918.2:n.1184+11736G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037918.2'} def test_variant217(self): variant = '12-22018712-TC-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_020297.3:c.2199-1302del' in results.keys() + assert 'NM_020297.3:c.2199-1302del' in list(results.keys()) assert results['NM_020297.3:c.2199-1302del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020297.3:c.2199-1302del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020297.3:c.2199-1302del']['alt_genomic_loci'] == [] @@ -8792,7 +8792,7 @@ def test_variant217(self): assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '21865778', 'alt': 'T'}} assert results['NM_020297.3:c.2199-1302del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.3'} - assert 'NM_005691.3:c.2199-1302del' in results.keys() + assert 'NM_005691.3:c.2199-1302del' in list(results.keys()) assert results['NM_005691.3:c.2199-1302del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005691.3:c.2199-1302del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005691.3:c.2199-1302del']['alt_genomic_loci'] == [] @@ -8810,7 +8810,7 @@ def test_variant217(self): assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '21865778', 'alt': 'T'}} assert results['NM_005691.3:c.2199-1302del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.3'} - assert 'NM_020297.2:c.2199-1302del' in results.keys() + assert 'NM_020297.2:c.2199-1302del' in list(results.keys()) assert results['NM_020297.2:c.2199-1302del']['hgvs_lrg_transcript_variant'] == 'LRG_377t1:c.2199-1302del' assert results['NM_020297.2:c.2199-1302del']['refseqgene_context_intronic_sequence'] == 'NG_012819.1(NM_020297.2):c.2199-1302del' assert results['NM_020297.2:c.2199-1302del']['alt_genomic_loci'] == [] @@ -8823,13 +8823,13 @@ def test_variant217(self): assert results['NM_020297.2:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_020297.2:c.2199-1302del' assert results['NM_020297.2:c.2199-1302del']['hgvs_refseqgene_variant'] == 'NG_012819.1:g.75916del' assert results['NM_020297.2:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} - assert 'hg38' not in results['NM_020297.2:c.2199-1302del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_020297.2:c.2199-1302del']['primary_assembly_loci'].keys()) assert results['NM_020297.2:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} - assert 'grch38' not in results['NM_020297.2:c.2199-1302del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_020297.2:c.2199-1302del']['primary_assembly_loci'].keys()) assert results['NM_020297.2:c.2199-1302del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012819.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_377.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_005691.2:c.2199-1302del' in results.keys() + assert 'NM_005691.2:c.2199-1302del' in list(results.keys()) assert results['NM_005691.2:c.2199-1302del']['hgvs_lrg_transcript_variant'] == 'LRG_377t2:c.2199-1302del' assert results['NM_005691.2:c.2199-1302del']['refseqgene_context_intronic_sequence'] == 'NG_012819.1(NM_005691.2):c.2199-1302del' assert results['NM_005691.2:c.2199-1302del']['alt_genomic_loci'] == [] @@ -8842,19 +8842,19 @@ def test_variant217(self): assert results['NM_005691.2:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_005691.2:c.2199-1302del' assert results['NM_005691.2:c.2199-1302del']['hgvs_refseqgene_variant'] == 'NG_012819.1:g.75916del' assert results['NM_005691.2:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} - assert 'hg38' not in results['NM_005691.2:c.2199-1302del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_005691.2:c.2199-1302del']['primary_assembly_loci'].keys()) assert results['NM_005691.2:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} - assert 'grch38' not in results['NM_005691.2:c.2199-1302del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_005691.2:c.2199-1302del']['primary_assembly_loci'].keys()) assert results['NM_005691.2:c.2199-1302del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012819.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_377.xml'} def test_variant218(self): variant = '12-52912946-T-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000424.3:c.556-2A>G' in results.keys() + assert 'NM_000424.3:c.556-2A>G' in list(results.keys()) assert results['NM_000424.3:c.556-2A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000424.3:c.556-2A>G']['refseqgene_context_intronic_sequence'] == 'NG_008297.1(NM_000424.3):c.556-2A>G' assert results['NM_000424.3:c.556-2A>G']['alt_genomic_loci'] == [] @@ -8866,19 +8866,19 @@ def test_variant218(self): assert results['NM_000424.3:c.556-2A>G']['hgvs_lrg_variant'] == '' assert results['NM_000424.3:c.556-2A>G']['hgvs_transcript_variant'] == 'NM_000424.3:c.556-2A>G' assert results['NM_000424.3:c.556-2A>G']['hgvs_refseqgene_variant'] == 'NG_008297.1:g.6298A>G' - assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.52912946T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '52912946', 'alt': u'C'}} - assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.52519162T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '52519162', 'alt': u'C'}} - assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.52912946T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '52912946', 'alt': u'C'}} - assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.52519162T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '52519162', 'alt': u'C'}} + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.52912946T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '52912946', 'alt': 'C'}} + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.52519162T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '52519162', 'alt': 'C'}} + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.52912946T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '52912946', 'alt': 'C'}} + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.52519162T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '52519162', 'alt': 'C'}} assert results['NM_000424.3:c.556-2A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008297.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000415.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000424.3'} def test_variant219(self): variant = '12-103234292-TC-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001354304.1:c.1200del' in results.keys() + assert 'NM_001354304.1:c.1200del' in list(results.keys()) assert results['NM_001354304.1:c.1200del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354304.1:c.1200del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001354304.1:c.1200del']['alt_genomic_loci'] == [] @@ -8896,7 +8896,7 @@ def test_variant219(self): assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} assert results['NM_001354304.1:c.1200del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1'} - assert 'NM_000277.2:c.1200del' in results.keys() + assert 'NM_000277.2:c.1200del' in list(results.keys()) assert results['NM_000277.2:c.1200del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000277.2:c.1200del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000277.2:c.1200del']['alt_genomic_loci'] == [] @@ -8915,7 +8915,7 @@ def test_variant219(self): assert results['NM_000277.2:c.1200del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2'} assert results['flag'] == 'gene_variant' - assert 'NM_000277.1:c.1200del' in results.keys() + assert 'NM_000277.1:c.1200del' in list(results.keys()) assert results['NM_000277.1:c.1200del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000277.1:c.1200del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000277.1:c.1200del']['alt_genomic_loci'] == [] @@ -8937,9 +8937,9 @@ def test_variant219(self): def test_variant220(self): variant = '12-103311124-T-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001354304.1:c.-95-121A>G' in results.keys() + assert 'NM_001354304.1:c.-95-121A>G' in list(results.keys()) assert results['NM_001354304.1:c.-95-121A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354304.1:c.-95-121A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001354304.1:c.-95-121A>G']['alt_genomic_loci'] == [] @@ -8951,14 +8951,14 @@ def test_variant220(self): assert results['NM_001354304.1:c.-95-121A>G']['hgvs_lrg_variant'] == '' assert results['NM_001354304.1:c.-95-121A>G']['hgvs_transcript_variant'] == 'NM_001354304.1:c.-95-121A>G' assert results['NM_001354304.1:c.-95-121A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} - assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} - assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} - assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} assert results['NM_001354304.1:c.-95-121A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1'} assert results['flag'] == 'gene_variant' - assert 'NM_000277.2:c.-216A>G' in results.keys() + assert 'NM_000277.2:c.-216A>G' in list(results.keys()) assert results['NM_000277.2:c.-216A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000277.2:c.-216A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000277.2:c.-216A>G']['alt_genomic_loci'] == [] @@ -8970,13 +8970,13 @@ def test_variant220(self): assert results['NM_000277.2:c.-216A>G']['hgvs_lrg_variant'] == '' assert results['NM_000277.2:c.-216A>G']['hgvs_transcript_variant'] == 'NM_000277.2:c.-216A>G' assert results['NM_000277.2:c.-216A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} - assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} - assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} - assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} assert results['NM_000277.2:c.-216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2'} - assert 'NM_000277.1:c.-215A>G' in results.keys() + assert 'NM_000277.1:c.-215A>G' in list(results.keys()) assert results['NM_000277.1:c.-215A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000277.1:c.-215A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000277.1:c.-215A>G']['alt_genomic_loci'] == [] @@ -8988,19 +8988,19 @@ def test_variant220(self): assert results['NM_000277.1:c.-215A>G']['hgvs_lrg_variant'] == '' assert results['NM_000277.1:c.-215A>G']['hgvs_transcript_variant'] == 'NM_000277.1:c.-215A>G' assert results['NM_000277.1:c.-215A>G']['hgvs_refseqgene_variant'] == 'NG_008690.1:g.5258A>G' - assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} - assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} - assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '103311124', 'alt': u'C'}} - assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': u'T', 'pos': '102917346', 'alt': u'C'}} + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} assert results['NM_000277.1:c.-215A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.1'} def test_variant221(self): variant = '12-111064166-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001319681.1:c.-366-1G>A' in results.keys() + assert 'NM_001319681.1:c.-366-1G>A' in list(results.keys()) assert results['NM_001319681.1:c.-366-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001319681.1:c.-366-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001319681.1:c.-366-1G>A']['alt_genomic_loci'] == [] @@ -9018,7 +9018,7 @@ def test_variant221(self): assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} assert results['NM_001319681.1:c.-366-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306610.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319681.1'} - assert 'NM_001319680.1:c.342-1G>A' in results.keys() + assert 'NM_001319680.1:c.342-1G>A' in list(results.keys()) assert results['NM_001319680.1:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001319680.1:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001319680.1:c.342-1G>A']['alt_genomic_loci'] == [] @@ -9036,7 +9036,7 @@ def test_variant221(self): assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} assert results['NM_001319680.1:c.342-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306609.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319680.1'} - assert 'NM_001082538.2:c.342-1G>A' in results.keys() + assert 'NM_001082538.2:c.342-1G>A' in list(results.keys()) assert results['NM_001082538.2:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001082538.2:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001082538.2:c.342-1G>A']['alt_genomic_loci'] == [] @@ -9054,7 +9054,7 @@ def test_variant221(self): assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} assert results['NM_001082538.2:c.342-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076007.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082538.2'} - assert 'NM_001173976.1:c.162-1G>A' in results.keys() + assert 'NM_001173976.1:c.162-1G>A' in list(results.keys()) assert results['NM_001173976.1:c.162-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001173976.1:c.162-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001173976.1:c.162-1G>A']['alt_genomic_loci'] == [] @@ -9073,7 +9073,7 @@ def test_variant221(self): assert results['NM_001173976.1:c.162-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167447.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173976.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001082537.2:c.342-1G>A' in results.keys() + assert 'NM_001082537.2:c.342-1G>A' in list(results.keys()) assert results['NM_001082537.2:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001082537.2:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001082537.2:c.342-1G>A']['alt_genomic_loci'] == [] @@ -9091,7 +9091,7 @@ def test_variant221(self): assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} assert results['NM_001082537.2:c.342-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076006.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082537.2'} - assert 'NR_135088.1:n.559-1G>A' in results.keys() + assert 'NR_135088.1:n.559-1G>A' in list(results.keys()) assert results['NR_135088.1:n.559-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_135088.1:n.559-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_135088.1:n.559-1G>A']['alt_genomic_loci'] == [] @@ -9109,7 +9109,7 @@ def test_variant221(self): assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} assert results['NR_135088.1:n.559-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_135088.1'} - assert 'NM_024549.5:c.342-1G>A' in results.keys() + assert 'NM_024549.5:c.342-1G>A' in list(results.keys()) assert results['NM_024549.5:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024549.5:c.342-1G>A']['refseqgene_context_intronic_sequence'] == 'NG_030381.1(NM_024549.5):c.342-1G>A' assert results['NM_024549.5:c.342-1G>A']['alt_genomic_loci'] == [] @@ -9127,7 +9127,7 @@ def test_variant221(self): assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} assert results['NM_024549.5:c.342-1G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_030381.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_078825.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024549.5'} - assert 'NM_001173975.2:c.174-1G>A' in results.keys() + assert 'NM_001173975.2:c.174-1G>A' in list(results.keys()) assert results['NM_001173975.2:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001173975.2:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001173975.2:c.174-1G>A']['alt_genomic_loci'] == [] @@ -9145,7 +9145,7 @@ def test_variant221(self): assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} assert results['NM_001173975.2:c.174-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.2'} - assert 'NM_001173975.1:c.174-1G>A' in results.keys() + assert 'NM_001173975.1:c.174-1G>A' in list(results.keys()) assert results['NM_001173975.1:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001173975.1:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001173975.1:c.174-1G>A']['alt_genomic_loci'] == [] @@ -9158,12 +9158,12 @@ def test_variant221(self): assert results['NM_001173975.1:c.174-1G>A']['hgvs_transcript_variant'] == 'NM_001173975.1:c.174-1G>A' assert results['NM_001173975.1:c.174-1G>A']['hgvs_refseqgene_variant'] == '' assert results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert 'hg38' not in results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci'].keys()) assert results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert 'grch38' not in results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci'].keys()) assert results['NM_001173975.1:c.174-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.1'} - assert 'NM_001319682.1:c.174-1G>A' in results.keys() + assert 'NM_001319682.1:c.174-1G>A' in list(results.keys()) assert results['NM_001319682.1:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001319682.1:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001319682.1:c.174-1G>A']['alt_genomic_loci'] == [] @@ -9185,9 +9185,9 @@ def test_variant221(self): def test_variant222(self): variant = '12-123738430-CA-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001194995.1:c.210del' in results.keys() + assert 'NM_001194995.1:c.210del' in list(results.keys()) assert results['NM_001194995.1:c.210del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001194995.1:c.210del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001194995.1:c.210del']['alt_genomic_loci'] == [] @@ -9206,7 +9206,7 @@ def test_variant222(self): assert results['NM_001194995.1:c.210del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181924.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194995.1'} assert results['flag'] == 'gene_variant' - assert 'NM_152269.4:c.210del' in results.keys() + assert 'NM_152269.4:c.210del' in list(results.keys()) assert results['NM_152269.4:c.210del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_152269.4:c.210del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_152269.4:c.210del']['alt_genomic_loci'] == [] @@ -9224,7 +9224,7 @@ def test_variant222(self): assert results['NM_152269.4:c.210del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} assert results['NM_152269.4:c.210del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_027517.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_689482.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_152269.4'} - assert 'NM_001143905.2:c.210del' in results.keys() + assert 'NM_001143905.2:c.210del' in list(results.keys()) assert results['NM_001143905.2:c.210del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001143905.2:c.210del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001143905.2:c.210del']['alt_genomic_loci'] == [] @@ -9246,10 +9246,10 @@ def test_variant222(self): def test_variant223(self): variant = '13-31789169-CT-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_194318.3:c.71-5del' in results.keys() + assert 'NM_194318.3:c.71-5del' in list(results.keys()) assert results['NM_194318.3:c.71-5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_194318.3:c.71-5del']['refseqgene_context_intronic_sequence'] == 'NG_011732.1(NM_194318.3):c.71-5del' assert results['NM_194318.3:c.71-5del']['alt_genomic_loci'] == [] @@ -9271,9 +9271,9 @@ def test_variant223(self): def test_variant224(self): variant = '14-62187287-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NR_144368.1:n.214-3552C>T' in results.keys() + assert 'NR_144368.1:n.214-3552C>T' in list(results.keys()) assert results['NR_144368.1:n.214-3552C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_144368.1:n.214-3552C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_144368.1:n.214-3552C>T']['alt_genomic_loci'] == [] @@ -9285,13 +9285,13 @@ def test_variant224(self): assert results['NR_144368.1:n.214-3552C>T']['hgvs_lrg_variant'] == '' assert results['NR_144368.1:n.214-3552C>T']['hgvs_transcript_variant'] == 'NR_144368.1:n.214-3552C>T' assert results['NR_144368.1:n.214-3552C>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'ref': u'G', 'pos': '62187287', 'alt': u'A'}} - assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'ref': u'G', 'pos': '61720569', 'alt': u'A'}} - assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'ref': u'G', 'pos': '62187287', 'alt': u'A'}} - assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'ref': u'G', 'pos': '61720569', 'alt': u'A'}} + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} assert results['NR_144368.1:n.214-3552C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_144368.1'} - assert 'NM_181054.2:c.223G>A' in results.keys() + assert 'NM_181054.2:c.223G>A' in list(results.keys()) assert results['NM_181054.2:c.223G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181054.2:c.223G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181054.2:c.223G>A']['alt_genomic_loci'] == [] @@ -9310,7 +9310,7 @@ def test_variant224(self): assert results['NM_181054.2:c.223G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_851397.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181054.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001243084.1:c.295G>A' in results.keys() + assert 'NM_001243084.1:c.295G>A' in list(results.keys()) assert results['NM_001243084.1:c.295G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001243084.1:c.295G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001243084.1:c.295G>A']['alt_genomic_loci'] == [] @@ -9328,7 +9328,7 @@ def test_variant224(self): assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} assert results['NM_001243084.1:c.295G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1'} - assert 'NM_001530.3:c.223G>A' in results.keys() + assert 'NM_001530.3:c.223G>A' in list(results.keys()) assert results['NM_001530.3:c.223G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001530.3:c.223G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001530.3:c.223G>A']['alt_genomic_loci'] == [] @@ -9350,9 +9350,9 @@ def test_variant224(self): def test_variant225(self): variant = '14-62188231-TT-GA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NR_144368.1:n.214-4497_214-4496delinsTC' in results.keys() + assert 'NR_144368.1:n.214-4497_214-4496delinsTC' in list(results.keys()) assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_lrg_transcript_variant'] == '' assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['refseqgene_context_intronic_sequence'] == '' assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['alt_genomic_loci'] == [] @@ -9364,13 +9364,13 @@ def test_variant225(self): assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_lrg_variant'] == '' assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_transcript_variant'] == 'NR_144368.1:n.214-4497_214-4496delinsTC' assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_refseqgene_variant'] == '' - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '62188231', 'alt': u'GA'}} - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '61721513', 'alt': u'GA'}} - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '62188231', 'alt': u'GA'}} - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '61721513', 'alt': u'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_144368.1'} - assert 'NM_001530.3:c.231_232delinsGA' in results.keys() + assert 'NM_001530.3:c.231_232delinsGA' in list(results.keys()) assert results['NM_001530.3:c.231_232delinsGA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001530.3:c.231_232delinsGA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001530.3:c.231_232delinsGA']['alt_genomic_loci'] == [] @@ -9389,7 +9389,7 @@ def test_variant225(self): assert results['NM_001530.3:c.231_232delinsGA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029606.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001521.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001530.3'} assert results['flag'] == 'gene_variant' - assert 'NM_001243084.1:c.303_304delinsGA' in results.keys() + assert 'NM_001243084.1:c.303_304delinsGA' in list(results.keys()) assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001243084.1:c.303_304delinsGA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001243084.1:c.303_304delinsGA']['alt_genomic_loci'] == [] @@ -9407,7 +9407,7 @@ def test_variant225(self): assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} assert results['NM_001243084.1:c.303_304delinsGA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1'} - assert 'NM_181054.2:c.231_232delinsGA' in results.keys() + assert 'NM_181054.2:c.231_232delinsGA' in list(results.keys()) assert results['NM_181054.2:c.231_232delinsGA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181054.2:c.231_232delinsGA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181054.2:c.231_232delinsGA']['alt_genomic_loci'] == [] @@ -9429,9 +9429,9 @@ def test_variant225(self): def test_variant226(self): variant = '14-63174827-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_139318.3:c.2366G>T' in results.keys() + assert 'NM_139318.3:c.2366G>T' in list(results.keys()) assert results['NM_139318.3:c.2366G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_139318.3:c.2366G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_139318.3:c.2366G>T']['alt_genomic_loci'] == [] @@ -9443,13 +9443,13 @@ def test_variant226(self): assert results['NM_139318.3:c.2366G>T']['hgvs_lrg_variant'] == '' assert results['NM_139318.3:c.2366G>T']['hgvs_transcript_variant'] == 'NM_139318.3:c.2366G>T' assert results['NM_139318.3:c.2366G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_139318.3:c.2366G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} - assert 'hg38' not in results['NM_139318.3:c.2366G>T']['primary_assembly_loci'].keys() - assert results['NM_139318.3:c.2366G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} - assert 'grch38' not in results['NM_139318.3:c.2366G>T']['primary_assembly_loci'].keys() + assert results['NM_139318.3:c.2366G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_139318.3:c.2366G>T']['primary_assembly_loci'].keys()) + assert results['NM_139318.3:c.2366G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_139318.3:c.2366G>T']['primary_assembly_loci'].keys()) assert results['NM_139318.3:c.2366G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.3'} - assert 'NM_172375.1:c.*333G>T' in results.keys() + assert 'NM_172375.1:c.*333G>T' in list(results.keys()) assert results['NM_172375.1:c.*333G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_172375.1:c.*333G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_172375.1:c.*333G>T']['alt_genomic_loci'] == [] @@ -9461,13 +9461,13 @@ def test_variant226(self): assert results['NM_172375.1:c.*333G>T']['hgvs_lrg_variant'] == '' assert results['NM_172375.1:c.*333G>T']['hgvs_transcript_variant'] == 'NM_172375.1:c.*333G>T' assert results['NM_172375.1:c.*333G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_172375.1:c.*333G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} - assert 'hg38' not in results['NM_172375.1:c.*333G>T']['primary_assembly_loci'].keys() - assert results['NM_172375.1:c.*333G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} - assert 'grch38' not in results['NM_172375.1:c.*333G>T']['primary_assembly_loci'].keys() + assert results['NM_172375.1:c.*333G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_172375.1:c.*333G>T']['primary_assembly_loci'].keys()) + assert results['NM_172375.1:c.*333G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_172375.1:c.*333G>T']['primary_assembly_loci'].keys()) assert results['NM_172375.1:c.*333G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.1'} - assert 'NM_172375.2:c.*333G>T' in results.keys() + assert 'NM_172375.2:c.*333G>T' in list(results.keys()) assert results['NM_172375.2:c.*333G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_172375.2:c.*333G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_172375.2:c.*333G>T']['alt_genomic_loci'] == [] @@ -9479,14 +9479,14 @@ def test_variant226(self): assert results['NM_172375.2:c.*333G>T']['hgvs_lrg_variant'] == '' assert results['NM_172375.2:c.*333G>T']['hgvs_transcript_variant'] == 'NM_172375.2:c.*333G>T' assert results['NM_172375.2:c.*333G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} - assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '62708109', 'alt': u'A'}} - assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} - assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '62708109', 'alt': u'A'}} + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '62708109', 'alt': 'A'}} + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '62708109', 'alt': 'A'}} assert results['NM_172375.2:c.*333G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.2'} assert results['flag'] == 'gene_variant' - assert 'NM_139318.4:c.2366G>T' in results.keys() + assert 'NM_139318.4:c.2366G>T' in list(results.keys()) assert results['NM_139318.4:c.2366G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_139318.4:c.2366G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_139318.4:c.2366G>T']['alt_genomic_loci'] == [] @@ -9498,19 +9498,19 @@ def test_variant226(self): assert results['NM_139318.4:c.2366G>T']['hgvs_lrg_variant'] == '' assert results['NM_139318.4:c.2366G>T']['hgvs_transcript_variant'] == 'NM_139318.4:c.2366G>T' assert results['NM_139318.4:c.2366G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} - assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': 'chr14', 'ref': u'C', 'pos': '62708109', 'alt': u'A'}} - assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '63174827', 'alt': u'A'}} - assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': '14', 'ref': u'C', 'pos': '62708109', 'alt': u'A'}} + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '62708109', 'alt': 'A'}} + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '62708109', 'alt': 'A'}} assert results['NM_139318.4:c.2366G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.4'} def test_variant227(self): variant = '15-42680000-CA-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000070.2:c.550del' in results.keys() + assert 'NM_000070.2:c.550del' in list(results.keys()) assert results['NM_000070.2:c.550del']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.550del' assert results['NM_000070.2:c.550del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000070.2:c.550del']['alt_genomic_loci'] == [] @@ -9529,7 +9529,7 @@ def test_variant227(self): assert results['NM_000070.2:c.550del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_024344.1:c.550del' in results.keys() + assert 'NM_024344.1:c.550del' in list(results.keys()) assert results['NM_024344.1:c.550del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024344.1:c.550del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024344.1:c.550del']['alt_genomic_loci'] == [] @@ -9547,7 +9547,7 @@ def test_variant227(self): assert results['NM_024344.1:c.550del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} assert results['NM_024344.1:c.550del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1'} - assert 'NM_173087.1:c.550del' in results.keys() + assert 'NM_173087.1:c.550del' in list(results.keys()) assert results['NM_173087.1:c.550del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173087.1:c.550del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173087.1:c.550del']['alt_genomic_loci'] == [] @@ -9569,9 +9569,9 @@ def test_variant227(self): def test_variant228(self): variant = '15-42680000-CA-CAA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_024344.1:c.550dup' in results.keys() + assert 'NM_024344.1:c.550dup' in list(results.keys()) assert results['NM_024344.1:c.550dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024344.1:c.550dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024344.1:c.550dup']['alt_genomic_loci'] == [] @@ -9589,7 +9589,7 @@ def test_variant228(self): assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} assert results['NM_024344.1:c.550dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1'} - assert 'NM_173087.1:c.550dup' in results.keys() + assert 'NM_173087.1:c.550dup' in list(results.keys()) assert results['NM_173087.1:c.550dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173087.1:c.550dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173087.1:c.550dup']['alt_genomic_loci'] == [] @@ -9608,7 +9608,7 @@ def test_variant228(self): assert results['NM_173087.1:c.550dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1'} assert results['flag'] == 'gene_variant' - assert 'NM_000070.2:c.550dup' in results.keys() + assert 'NM_000070.2:c.550dup' in list(results.keys()) assert results['NM_000070.2:c.550dup']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.550dup' assert results['NM_000070.2:c.550dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000070.2:c.550dup']['alt_genomic_loci'] == [] @@ -9630,9 +9630,9 @@ def test_variant228(self): def test_variant229(self): variant = '15-42703179-T-TTCA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_173088.1:c.825_826insTCA' in results.keys() + assert 'NM_173088.1:c.825_826insTCA' in list(results.keys()) assert results['NM_173088.1:c.825_826insTCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173088.1:c.825_826insTCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173088.1:c.825_826insTCA']['alt_genomic_loci'] == [] @@ -9650,7 +9650,7 @@ def test_variant229(self): assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} assert results['NM_173088.1:c.825_826insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1'} - assert 'NM_173090.1:c.366_367insTCA' in results.keys() + assert 'NM_173090.1:c.366_367insTCA' in list(results.keys()) assert results['NM_173090.1:c.366_367insTCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173090.1:c.366_367insTCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173090.1:c.366_367insTCA']['alt_genomic_loci'] == [] @@ -9668,7 +9668,7 @@ def test_variant229(self): assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} assert results['NM_173090.1:c.366_367insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1'} - assert 'NM_173089.1:c.366_367insTCA' in results.keys() + assert 'NM_173089.1:c.366_367insTCA' in list(results.keys()) assert results['NM_173089.1:c.366_367insTCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173089.1:c.366_367insTCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173089.1:c.366_367insTCA']['alt_genomic_loci'] == [] @@ -9686,7 +9686,7 @@ def test_variant229(self): assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} assert results['NM_173089.1:c.366_367insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1'} - assert 'NM_173087.1:c.2085_2086insTCA' in results.keys() + assert 'NM_173087.1:c.2085_2086insTCA' in list(results.keys()) assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173087.1:c.2085_2086insTCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173087.1:c.2085_2086insTCA']['alt_genomic_loci'] == [] @@ -9705,7 +9705,7 @@ def test_variant229(self): assert results['NM_173087.1:c.2085_2086insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1'} assert results['flag'] == 'gene_variant' - assert 'NM_000070.2:c.2361_2362insTCA' in results.keys() + assert 'NM_000070.2:c.2361_2362insTCA' in list(results.keys()) assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.2361_2362insTCA' assert results['NM_000070.2:c.2361_2362insTCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000070.2:c.2361_2362insTCA']['alt_genomic_loci'] == [] @@ -9723,7 +9723,7 @@ def test_variant229(self): assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} assert results['NM_000070.2:c.2361_2362insTCA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} - assert 'NM_024344.1:c.2343_2344insTCA' in results.keys() + assert 'NM_024344.1:c.2343_2344insTCA' in list(results.keys()) assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024344.1:c.2343_2344insTCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024344.1:c.2343_2344insTCA']['alt_genomic_loci'] == [] @@ -9745,9 +9745,9 @@ def test_variant229(self): def test_variant230(self): variant = '15-42703179-TAG-TTCATCT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_024344.1:c.2344_2345delinsTCATCT' in results.keys() + assert 'NM_024344.1:c.2344_2345delinsTCATCT' in list(results.keys()) assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024344.1:c.2344_2345delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024344.1:c.2344_2345delinsTCATCT']['alt_genomic_loci'] == [] @@ -9765,7 +9765,7 @@ def test_variant230(self): assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} assert results['NM_024344.1:c.2344_2345delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1'} - assert 'NM_173090.1:c.367_368delinsTCATCT' in results.keys() + assert 'NM_173090.1:c.367_368delinsTCATCT' in list(results.keys()) assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173090.1:c.367_368delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173090.1:c.367_368delinsTCATCT']['alt_genomic_loci'] == [] @@ -9784,7 +9784,7 @@ def test_variant230(self): assert results['NM_173090.1:c.367_368delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1'} assert results['flag'] == 'gene_variant' - assert 'NM_000070.2:c.2362_2363delinsTCATCT' in results.keys() + assert 'NM_000070.2:c.2362_2363delinsTCATCT' in list(results.keys()) assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.2362_2363delinsTCATCT' assert results['NM_000070.2:c.2362_2363delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000070.2:c.2362_2363delinsTCATCT']['alt_genomic_loci'] == [] @@ -9802,7 +9802,7 @@ def test_variant230(self): assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} assert results['NM_000070.2:c.2362_2363delinsTCATCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} - assert 'NM_173088.1:c.826_827delinsTCATCT' in results.keys() + assert 'NM_173088.1:c.826_827delinsTCATCT' in list(results.keys()) assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173088.1:c.826_827delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173088.1:c.826_827delinsTCATCT']['alt_genomic_loci'] == [] @@ -9820,7 +9820,7 @@ def test_variant230(self): assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} assert results['NM_173088.1:c.826_827delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1'} - assert 'NM_173089.1:c.367_368delinsTCATCT' in results.keys() + assert 'NM_173089.1:c.367_368delinsTCATCT' in list(results.keys()) assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173089.1:c.367_368delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173089.1:c.367_368delinsTCATCT']['alt_genomic_loci'] == [] @@ -9838,7 +9838,7 @@ def test_variant230(self): assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} assert results['NM_173089.1:c.367_368delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1'} - assert 'NM_173087.1:c.2086_2087delinsTCATCT' in results.keys() + assert 'NM_173087.1:c.2086_2087delinsTCATCT' in list(results.keys()) assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173087.1:c.2086_2087delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173087.1:c.2086_2087delinsTCATCT']['alt_genomic_loci'] == [] @@ -9860,9 +9860,9 @@ def test_variant230(self): def test_variant231(self): variant = '15-48782203-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000138.4:c.2927G>A' in results.keys() + assert 'NM_000138.4:c.2927G>A' in list(results.keys()) assert results['NM_000138.4:c.2927G>A']['hgvs_lrg_transcript_variant'] == 'LRG_778t1:c.2927G>A' assert results['NM_000138.4:c.2927G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000138.4:c.2927G>A']['alt_genomic_loci'] == [] @@ -9874,10 +9874,10 @@ def test_variant231(self): assert results['NM_000138.4:c.2927G>A']['hgvs_lrg_variant'] == 'LRG_778:g.160783G>A' assert results['NM_000138.4:c.2927G>A']['hgvs_transcript_variant'] == 'NM_000138.4:c.2927G>A' assert results['NM_000138.4:c.2927G>A']['hgvs_refseqgene_variant'] == 'NG_008805.2:g.160783G>A' - assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.48782203C>T', 'vcf': {'chr': 'chr15', 'ref': u'C', 'pos': '48782203', 'alt': u'T'}} - assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.48490006C>T', 'vcf': {'chr': 'chr15', 'ref': u'C', 'pos': '48490006', 'alt': u'T'}} - assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.48782203C>T', 'vcf': {'chr': '15', 'ref': u'C', 'pos': '48782203', 'alt': u'T'}} - assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.48490006C>T', 'vcf': {'chr': '15', 'ref': u'C', 'pos': '48490006', 'alt': u'T'}} + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.48782203C>T', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '48782203', 'alt': 'T'}} + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.48490006C>T', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '48490006', 'alt': 'T'}} + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.48782203C>T', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '48782203', 'alt': 'T'}} + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.48490006C>T', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '48490006', 'alt': 'T'}} assert results['NM_000138.4:c.2927G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008805.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000129.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000138.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_778.xml'} assert results['flag'] == 'gene_variant' @@ -9885,9 +9885,9 @@ def test_variant231(self): def test_variant232(self): variant = '15-72105929-CC-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_014249.2:c.946_949=' in results.keys() + assert 'NM_014249.2:c.946_949=' in list(results.keys()) assert results['NM_014249.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.946_949=']['alt_genomic_loci'] == [] @@ -9900,12 +9900,12 @@ def test_variant232(self): assert results['NM_014249.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.2:c.946_949=' assert results['NM_014249.2:c.946_949=']['hgvs_refseqgene_variant'] == '' assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'hg38' not in results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'grch38' not in results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys()) assert results['NM_014249.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} - assert 'NM_016346.3:c.946_949=' in results.keys() + assert 'NM_016346.3:c.946_949=' in list(results.keys()) assert results['NM_016346.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.946_949=']['alt_genomic_loci'] == [] @@ -9918,13 +9918,13 @@ def test_variant232(self): assert results['NM_016346.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.3:c.946_949=' assert results['NM_016346.3:c.946_949=']['hgvs_refseqgene_variant'] == '' assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': 'chr15', 'ref': u'GACC', 'pos': '71813587', 'alt': u'GACC'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': 'chr15', 'ref': 'GACC', 'pos': '71813587', 'alt': 'GACC'}} assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': '15', 'ref': u'GACC', 'pos': '71813587', 'alt': u'GACC'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': '15', 'ref': 'GACC', 'pos': '71813587', 'alt': 'GACC'}} assert results['NM_016346.3:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} assert results['flag'] == 'gene_variant' - assert 'NM_014249.3:c.946_949=' in results.keys() + assert 'NM_014249.3:c.946_949=' in list(results.keys()) assert results['NM_014249.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.3:c.946_949=']['alt_genomic_loci'] == [] @@ -9937,12 +9937,12 @@ def test_variant232(self): assert results['NM_014249.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.3:c.946_949=' assert results['NM_014249.3:c.946_949=']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8034_8037=' assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': 'chr15', 'ref': u'GGACCC', 'pos': '71813586', 'alt': u'GGACCC'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': 'chr15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}} assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': u'GGACCC', 'pos': '71813586', 'alt': u'GGACCC'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}} assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} - assert 'NM_016346.2:c.946_949=' in results.keys() + assert 'NM_016346.2:c.946_949=' in list(results.keys()) assert results['NM_016346.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.2:c.946_949=']['alt_genomic_loci'] == [] @@ -9955,18 +9955,18 @@ def test_variant232(self): assert results['NM_016346.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.2:c.946_949=' assert results['NM_016346.2:c.946_949=']['hgvs_refseqgene_variant'] == '' assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'hg38' not in results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'grch38' not in results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) assert results['NM_016346.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} def test_variant233(self): variant = '15-89873415-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_002693.2:c.752C>T' in results.keys() + assert 'NM_002693.2:c.752C>T' in list(results.keys()) assert results['NM_002693.2:c.752C>T']['hgvs_lrg_transcript_variant'] == 'LRG_765t1:c.752C>T' assert results['NM_002693.2:c.752C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002693.2:c.752C>T']['alt_genomic_loci'] == [] @@ -9978,14 +9978,14 @@ def test_variant233(self): assert results['NM_002693.2:c.752C>T']['hgvs_lrg_variant'] == '' assert results['NM_002693.2:c.752C>T']['hgvs_transcript_variant'] == 'NM_002693.2:c.752C>T' assert results['NM_002693.2:c.752C>T']['hgvs_refseqgene_variant'] == 'NG_008218.1:g.9612C>T' - assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': 'chr15', 'ref': u'G', 'pos': '89873415', 'alt': u'A'}} - assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': 'chr15', 'ref': u'G', 'pos': '89330184', 'alt': u'A'}} - assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': '15', 'ref': u'G', 'pos': '89873415', 'alt': u'A'}} - assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': '15', 'ref': u'G', 'pos': '89330184', 'alt': u'A'}} + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': 'chr15', 'ref': 'G', 'pos': '89873415', 'alt': 'A'}} + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': 'chr15', 'ref': 'G', 'pos': '89330184', 'alt': 'A'}} + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': '15', 'ref': 'G', 'pos': '89873415', 'alt': 'A'}} + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': '15', 'ref': 'G', 'pos': '89330184', 'alt': 'A'}} assert results['NM_002693.2:c.752C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008218.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001126131.1:c.752C>T' in results.keys() + assert 'NM_001126131.1:c.752C>T' in list(results.keys()) assert results['NM_001126131.1:c.752C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001126131.1:c.752C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126131.1:c.752C>T']['alt_genomic_loci'] == [] @@ -9997,19 +9997,19 @@ def test_variant233(self): assert results['NM_001126131.1:c.752C>T']['hgvs_lrg_variant'] == '' assert results['NM_001126131.1:c.752C>T']['hgvs_transcript_variant'] == 'NM_001126131.1:c.752C>T' assert results['NM_001126131.1:c.752C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': 'chr15', 'ref': u'G', 'pos': '89873415', 'alt': u'A'}} - assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': 'chr15', 'ref': u'G', 'pos': '89330184', 'alt': u'A'}} - assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': '15', 'ref': u'G', 'pos': '89873415', 'alt': u'A'}} - assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': '15', 'ref': u'G', 'pos': '89330184', 'alt': u'A'}} + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': 'chr15', 'ref': 'G', 'pos': '89873415', 'alt': 'A'}} + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': 'chr15', 'ref': 'G', 'pos': '89330184', 'alt': 'A'}} + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': '15', 'ref': 'G', 'pos': '89873415', 'alt': 'A'}} + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': '15', 'ref': 'G', 'pos': '89330184', 'alt': 'A'}} assert results['NM_001126131.1:c.752C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119603.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126131.1'} def test_variant234(self): variant = '16-2103394-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000548.3:c.277C>T' in results.keys() + assert 'NM_000548.3:c.277C>T' in list(results.keys()) assert results['NM_000548.3:c.277C>T']['hgvs_lrg_transcript_variant'] == 'LRG_487t1:c.277C>T' assert results['NM_000548.3:c.277C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000548.3:c.277C>T']['alt_genomic_loci'] == [] @@ -10022,12 +10022,12 @@ def test_variant234(self): assert results['NM_000548.3:c.277C>T']['hgvs_transcript_variant'] == 'NM_000548.3:c.277C>T' assert results['NM_000548.3:c.277C>T']['hgvs_refseqgene_variant'] == 'NG_005895.1:g.9088C>T' assert results['NM_000548.3:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'hg38' not in results['NM_000548.3:c.277C>T']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_000548.3:c.277C>T']['primary_assembly_loci'].keys()) assert results['NM_000548.3:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'grch38' not in results['NM_000548.3:c.277C>T']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_000548.3:c.277C>T']['primary_assembly_loci'].keys()) assert results['NM_000548.3:c.277C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005895.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_487.xml'} - assert 'NM_001318832.1:c.310C>T' in results.keys() + assert 'NM_001318832.1:c.310C>T' in list(results.keys()) assert results['NM_001318832.1:c.310C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318832.1:c.310C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001318832.1:c.310C>T']['alt_genomic_loci'] == [] @@ -10045,7 +10045,7 @@ def test_variant234(self): assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} assert results['NM_001318832.1:c.310C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1'} - assert 'NM_001318829.1:c.130C>T' in results.keys() + assert 'NM_001318829.1:c.130C>T' in list(results.keys()) assert results['NM_001318829.1:c.130C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318829.1:c.130C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001318829.1:c.130C>T']['alt_genomic_loci'] == [] @@ -10063,7 +10063,7 @@ def test_variant234(self): assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} assert results['NM_001318829.1:c.130C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305758.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318829.1'} - assert 'NM_001077183.2:c.277C>T' in results.keys() + assert 'NM_001077183.2:c.277C>T' in list(results.keys()) assert results['NM_001077183.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077183.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001077183.2:c.277C>T']['alt_genomic_loci'] == [] @@ -10081,7 +10081,7 @@ def test_variant234(self): assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} assert results['NM_001077183.2:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2'} - assert 'NM_001114382.1:c.277C>T' in results.keys() + assert 'NM_001114382.1:c.277C>T' in list(results.keys()) assert results['NM_001114382.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001114382.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001114382.1:c.277C>T']['alt_genomic_loci'] == [] @@ -10094,12 +10094,12 @@ def test_variant234(self): assert results['NM_001114382.1:c.277C>T']['hgvs_transcript_variant'] == 'NM_001114382.1:c.277C>T' assert results['NM_001114382.1:c.277C>T']['hgvs_refseqgene_variant'] == '' assert results['NM_001114382.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'hg38' not in results['NM_001114382.1:c.277C>T']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001114382.1:c.277C>T']['primary_assembly_loci'].keys()) assert results['NM_001114382.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'grch38' not in results['NM_001114382.1:c.277C>T']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001114382.1:c.277C>T']['primary_assembly_loci'].keys()) assert results['NM_001114382.1:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1'} - assert 'NM_001077183.1:c.277C>T' in results.keys() + assert 'NM_001077183.1:c.277C>T' in list(results.keys()) assert results['NM_001077183.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077183.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001077183.1:c.277C>T']['alt_genomic_loci'] == [] @@ -10112,12 +10112,12 @@ def test_variant234(self): assert results['NM_001077183.1:c.277C>T']['hgvs_transcript_variant'] == 'NM_001077183.1:c.277C>T' assert results['NM_001077183.1:c.277C>T']['hgvs_refseqgene_variant'] == '' assert results['NM_001077183.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'hg38' not in results['NM_001077183.1:c.277C>T']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001077183.1:c.277C>T']['primary_assembly_loci'].keys()) assert results['NM_001077183.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'grch38' not in results['NM_001077183.1:c.277C>T']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001077183.1:c.277C>T']['primary_assembly_loci'].keys()) assert results['NM_001077183.1:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1'} - assert 'NM_001318827.1:c.226-903C>T' in results.keys() + assert 'NM_001318827.1:c.226-903C>T' in list(results.keys()) assert results['NM_001318827.1:c.226-903C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318827.1:c.226-903C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001318827.1:c.226-903C>T']['alt_genomic_loci'] == [] @@ -10136,7 +10136,7 @@ def test_variant234(self): assert results['NM_001318827.1:c.226-903C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001114382.2:c.277C>T' in results.keys() + assert 'NM_001114382.2:c.277C>T' in list(results.keys()) assert results['NM_001114382.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001114382.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001114382.2:c.277C>T']['alt_genomic_loci'] == [] @@ -10154,7 +10154,7 @@ def test_variant234(self): assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} assert results['NM_001114382.2:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2'} - assert 'NM_001363528.1:c.277C>T' in results.keys() + assert 'NM_001363528.1:c.277C>T' in list(results.keys()) assert results['NM_001363528.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363528.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363528.1:c.277C>T']['alt_genomic_loci'] == [] @@ -10167,12 +10167,12 @@ def test_variant234(self): assert results['NM_001363528.1:c.277C>T']['hgvs_transcript_variant'] == 'NM_001363528.1:c.277C>T' assert results['NM_001363528.1:c.277C>T']['hgvs_refseqgene_variant'] == '' assert results['NM_001363528.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'hg38' not in results['NM_001363528.1:c.277C>T']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363528.1:c.277C>T']['primary_assembly_loci'].keys()) assert results['NM_001363528.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'grch38' not in results['NM_001363528.1:c.277C>T']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363528.1:c.277C>T']['primary_assembly_loci'].keys()) assert results['NM_001363528.1:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1'} - assert 'NM_021055.2:c.277C>T' in results.keys() + assert 'NM_021055.2:c.277C>T' in list(results.keys()) assert results['NM_021055.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021055.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021055.2:c.277C>T']['alt_genomic_loci'] == [] @@ -10185,12 +10185,12 @@ def test_variant234(self): assert results['NM_021055.2:c.277C>T']['hgvs_transcript_variant'] == 'NM_021055.2:c.277C>T' assert results['NM_021055.2:c.277C>T']['hgvs_refseqgene_variant'] == '' assert results['NM_021055.2:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'hg38' not in results['NM_021055.2:c.277C>T']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_021055.2:c.277C>T']['primary_assembly_loci'].keys()) assert results['NM_021055.2:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'grch38' not in results['NM_021055.2:c.277C>T']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_021055.2:c.277C>T']['primary_assembly_loci'].keys()) assert results['NM_021055.2:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2'} - assert 'NM_000548.4:c.277C>T' in results.keys() + assert 'NM_000548.4:c.277C>T' in list(results.keys()) assert results['NM_000548.4:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000548.4:c.277C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000548.4:c.277C>T']['alt_genomic_loci'] == [] @@ -10208,7 +10208,7 @@ def test_variant234(self): assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} assert results['NM_000548.4:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4'} - assert 'NM_001318831.1:c.-1-2803C>T' in results.keys() + assert 'NM_001318831.1:c.-1-2803C>T' in list(results.keys()) assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318831.1:c.-1-2803C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001318831.1:c.-1-2803C>T']['alt_genomic_loci'] == [] @@ -10230,10 +10230,10 @@ def test_variant234(self): def test_variant235(self): variant = '16-3779300-C-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001079846.1:c.5634G>C' in results.keys() + assert 'NM_001079846.1:c.5634G>C' in list(results.keys()) assert results['NM_001079846.1:c.5634G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001079846.1:c.5634G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001079846.1:c.5634G>C']['alt_genomic_loci'] == [] @@ -10245,13 +10245,13 @@ def test_variant235(self): assert results['NM_001079846.1:c.5634G>C']['hgvs_lrg_variant'] == '' assert results['NM_001079846.1:c.5634G>C']['hgvs_transcript_variant'] == 'NM_001079846.1:c.5634G>C' assert results['NM_001079846.1:c.5634G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '3779300', 'alt': u'G'}} - assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '3729299', 'alt': u'G'}} - assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '3779300', 'alt': u'G'}} - assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '3729299', 'alt': u'G'}} + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '3779300', 'alt': 'G'}} + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '3729299', 'alt': 'G'}} + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '3779300', 'alt': 'G'}} + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '3729299', 'alt': 'G'}} assert results['NM_001079846.1:c.5634G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001079846.1'} - assert 'NM_004380.2:c.5748G>C' in results.keys() + assert 'NM_004380.2:c.5748G>C' in list(results.keys()) assert results['NM_004380.2:c.5748G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004380.2:c.5748G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004380.2:c.5748G>C']['alt_genomic_loci'] == [] @@ -10263,19 +10263,19 @@ def test_variant235(self): assert results['NM_004380.2:c.5748G>C']['hgvs_lrg_variant'] == '' assert results['NM_004380.2:c.5748G>C']['hgvs_transcript_variant'] == 'NM_004380.2:c.5748G>C' assert results['NM_004380.2:c.5748G>C']['hgvs_refseqgene_variant'] == 'NG_009873.1:g.155822G>C' - assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '3779300', 'alt': u'G'}} - assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '3729299', 'alt': u'G'}} - assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '3779300', 'alt': u'G'}} - assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '3729299', 'alt': u'G'}} + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '3779300', 'alt': 'G'}} + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '3729299', 'alt': 'G'}} + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '3779300', 'alt': 'G'}} + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '3729299', 'alt': 'G'}} assert results['NM_004380.2:c.5748G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009873.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004371.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004380.2'} def test_variant236(self): variant = '16-5128843-C-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001330504.1:c.493C>G' in results.keys() + assert 'NM_001330504.1:c.493C>G' in list(results.keys()) assert results['NM_001330504.1:c.493C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330504.1:c.493C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330504.1:c.493C>G']['alt_genomic_loci'] == [] @@ -10294,7 +10294,7 @@ def test_variant236(self): assert results['NM_001330504.1:c.493C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317433.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330504.1'} assert results['flag'] == 'gene_variant' - assert 'NM_019109.4:c.826C>G' in results.keys() + assert 'NM_019109.4:c.826C>G' in list(results.keys()) assert results['NM_019109.4:c.826C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_019109.4:c.826C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_019109.4:c.826C>G']['alt_genomic_loci'] == [] @@ -10316,9 +10316,9 @@ def test_variant236(self): def test_variant237(self): variant = '16-74808559-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_024306.4:c.95G>A' in results.keys() + assert 'NM_024306.4:c.95G>A' in list(results.keys()) assert results['NM_024306.4:c.95G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024306.4:c.95G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024306.4:c.95G>A']['alt_genomic_loci'] == [] @@ -10330,10 +10330,10 @@ def test_variant237(self): assert results['NM_024306.4:c.95G>A']['hgvs_lrg_variant'] == '' assert results['NM_024306.4:c.95G>A']['hgvs_transcript_variant'] == 'NM_024306.4:c.95G>A' assert results['NM_024306.4:c.95G>A']['hgvs_refseqgene_variant'] == 'NG_017070.1:g.5171G>A' - assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.74808559C>T', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '74808559', 'alt': u'T'}} - assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.74774661C>T', 'vcf': {'chr': 'chr16', 'ref': u'C', 'pos': '74774661', 'alt': u'T'}} - assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.74808559C>T', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '74808559', 'alt': u'T'}} - assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.74774661C>T', 'vcf': {'chr': '16', 'ref': u'C', 'pos': '74774661', 'alt': u'T'}} + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.74808559C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '74808559', 'alt': 'T'}} + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.74774661C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '74774661', 'alt': 'T'}} + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.74808559C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '74808559', 'alt': 'T'}} + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.74774661C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '74774661', 'alt': 'T'}} assert results['NM_024306.4:c.95G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017070.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077282.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024306.4'} assert results['flag'] == 'gene_variant' @@ -10341,9 +10341,9 @@ def test_variant237(self): def test_variant238(self): variant = '16-89574804-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_003119.3:c.-22C>A' in results.keys() + assert 'NM_003119.3:c.-22C>A' in list(results.keys()) assert results['NM_003119.3:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.-22C>A']['alt_genomic_loci'] == [] @@ -10362,7 +10362,7 @@ def test_variant238(self): assert results['NM_003119.3:c.-22C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} assert results['flag'] == 'gene_variant' - assert 'NM_199367.2:c.-22C>A' in results.keys() + assert 'NM_199367.2:c.-22C>A' in list(results.keys()) assert results['NM_199367.2:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.-22C>A']['alt_genomic_loci'] == [] @@ -10380,7 +10380,7 @@ def test_variant238(self): assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508396', 'alt': 'A'}} assert results['NM_199367.2:c.-22C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - assert 'NM_001363850.1:c.-22C>A' in results.keys() + assert 'NM_001363850.1:c.-22C>A' in list(results.keys()) assert results['NM_001363850.1:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.-22C>A']['alt_genomic_loci'] == [] @@ -10393,18 +10393,18 @@ def test_variant238(self): assert results['NM_001363850.1:c.-22C>A']['hgvs_transcript_variant'] == 'NM_001363850.1:c.-22C>A' assert results['NM_001363850.1:c.-22C>A']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.-22C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} - assert 'hg38' not in results['NM_001363850.1:c.-22C>A']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.-22C>A']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.-22C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} - assert 'grch38' not in results['NM_001363850.1:c.-22C>A']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.-22C>A']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.-22C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} def test_variant239(self): variant = '16-89574826-A-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_003119.2:c.1A>C' in results.keys() + assert 'NM_003119.2:c.1A>C' in list(results.keys()) assert results['NM_003119.2:c.1A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.1A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.1A>C']['alt_genomic_loci'] == [] @@ -10417,12 +10417,12 @@ def test_variant239(self): assert results['NM_003119.2:c.1A>C']['hgvs_transcript_variant'] == 'NM_003119.2:c.1A>C' assert results['NM_003119.2:c.1A>C']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5022A>C' assert results['NM_003119.2:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert 'hg38' not in results['NM_003119.2:c.1A>C']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.1A>C']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert 'grch38' not in results['NM_003119.2:c.1A>C']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.1A>C']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.1A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - assert 'NM_199367.1:c.1A>C' in results.keys() + assert 'NM_199367.1:c.1A>C' in list(results.keys()) assert results['NM_199367.1:c.1A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.1A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.1A>C']['alt_genomic_loci'] == [] @@ -10435,12 +10435,12 @@ def test_variant239(self): assert results['NM_199367.1:c.1A>C']['hgvs_transcript_variant'] == 'NM_199367.1:c.1A>C' assert results['NM_199367.1:c.1A>C']['hgvs_refseqgene_variant'] == '' assert results['NM_199367.1:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert 'hg38' not in results['NM_199367.1:c.1A>C']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_199367.1:c.1A>C']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert 'grch38' not in results['NM_199367.1:c.1A>C']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_199367.1:c.1A>C']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.1A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - assert 'NM_001363850.1:c.1A>C' in results.keys() + assert 'NM_001363850.1:c.1A>C' in list(results.keys()) assert results['NM_001363850.1:c.1A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.1A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.1A>C']['alt_genomic_loci'] == [] @@ -10453,12 +10453,12 @@ def test_variant239(self): assert results['NM_001363850.1:c.1A>C']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1A>C' assert results['NM_001363850.1:c.1A>C']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert 'hg38' not in results['NM_001363850.1:c.1A>C']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.1A>C']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert 'grch38' not in results['NM_001363850.1:c.1A>C']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.1A>C']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.1A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - assert 'NM_199367.2:c.1A>C' in results.keys() + assert 'NM_199367.2:c.1A>C' in list(results.keys()) assert results['NM_199367.2:c.1A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.1A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.1A>C']['alt_genomic_loci'] == [] @@ -10477,7 +10477,7 @@ def test_variant239(self): assert results['NM_199367.2:c.1A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} assert results['flag'] == 'gene_variant' - assert 'NM_003119.3:c.1A>C' in results.keys() + assert 'NM_003119.3:c.1A>C' in list(results.keys()) assert results['NM_003119.3:c.1A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.1A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.1A>C']['alt_genomic_loci'] == [] @@ -10499,9 +10499,9 @@ def test_variant239(self): def test_variant240(self): variant = '16-89574914-G-GT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001363850.1:c.90dup' in results.keys() + assert 'NM_001363850.1:c.90dup' in list(results.keys()) assert results['NM_001363850.1:c.90dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.90dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.90dup']['alt_genomic_loci'] == [] @@ -10514,12 +10514,12 @@ def test_variant240(self): assert results['NM_001363850.1:c.90dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.90dup' assert results['NM_001363850.1:c.90dup']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} - assert 'hg38' not in results['NM_001363850.1:c.90dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.90dup']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} - assert 'grch38' not in results['NM_001363850.1:c.90dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.90dup']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.90dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - assert 'NM_199367.1:c.90dup' in results.keys() + assert 'NM_199367.1:c.90dup' in list(results.keys()) assert results['NM_199367.1:c.90dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.90dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.90dup']['alt_genomic_loci'] == [] @@ -10532,12 +10532,12 @@ def test_variant240(self): assert results['NM_199367.1:c.90dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.90dup' assert results['NM_199367.1:c.90dup']['hgvs_refseqgene_variant'] == '' assert results['NM_199367.1:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} - assert 'hg38' not in results['NM_199367.1:c.90dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_199367.1:c.90dup']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} - assert 'grch38' not in results['NM_199367.1:c.90dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_199367.1:c.90dup']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.90dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - assert 'NM_003119.2:c.90dup' in results.keys() + assert 'NM_003119.2:c.90dup' in list(results.keys()) assert results['NM_003119.2:c.90dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.90dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.90dup']['alt_genomic_loci'] == [] @@ -10550,12 +10550,12 @@ def test_variant240(self): assert results['NM_003119.2:c.90dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.90dup' assert results['NM_003119.2:c.90dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5111dup' assert results['NM_003119.2:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} - assert 'hg38' not in results['NM_003119.2:c.90dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.90dup']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} - assert 'grch38' not in results['NM_003119.2:c.90dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.90dup']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.90dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - assert 'NM_199367.2:c.90dup' in results.keys() + assert 'NM_199367.2:c.90dup' in list(results.keys()) assert results['NM_199367.2:c.90dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.90dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.90dup']['alt_genomic_loci'] == [] @@ -10574,7 +10574,7 @@ def test_variant240(self): assert results['NM_199367.2:c.90dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} assert results['flag'] == 'gene_variant' - assert 'NM_003119.3:c.90dup' in results.keys() + assert 'NM_003119.3:c.90dup' in list(results.keys()) assert results['NM_003119.3:c.90dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.90dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.90dup']['alt_genomic_loci'] == [] @@ -10596,9 +10596,9 @@ def test_variant240(self): def test_variant241(self): variant = '16-89574916-C-CGTC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_199367.2:c.89_91dup' in results.keys() + assert 'NM_199367.2:c.89_91dup' in list(results.keys()) assert results['NM_199367.2:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.89_91dup']['alt_genomic_loci'] == [] @@ -10616,7 +10616,7 @@ def test_variant241(self): assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89508505', 'alt': 'AGTC'}} assert results['NM_199367.2:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - assert 'NM_003119.3:c.89_91dup' in results.keys() + assert 'NM_003119.3:c.89_91dup' in list(results.keys()) assert results['NM_003119.3:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.89_91dup']['alt_genomic_loci'] == [] @@ -10634,7 +10634,7 @@ def test_variant241(self): assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89508505', 'alt': 'AGTC'}} assert results['NM_003119.3:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - assert 'NM_001363850.1:c.89_91dup' in results.keys() + assert 'NM_001363850.1:c.89_91dup' in list(results.keys()) assert results['NM_001363850.1:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.89_91dup']['alt_genomic_loci'] == [] @@ -10647,13 +10647,13 @@ def test_variant241(self): assert results['NM_001363850.1:c.89_91dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.89_91dup' assert results['NM_001363850.1:c.89_91dup']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} - assert 'hg38' not in results['NM_001363850.1:c.89_91dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.89_91dup']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} - assert 'grch38' not in results['NM_001363850.1:c.89_91dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.89_91dup']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} assert results['flag'] == 'gene_variant' - assert 'NM_199367.1:c.89_91dup' in results.keys() + assert 'NM_199367.1:c.89_91dup' in list(results.keys()) assert results['NM_199367.1:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.89_91dup']['alt_genomic_loci'] == [] @@ -10666,12 +10666,12 @@ def test_variant241(self): assert results['NM_199367.1:c.89_91dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.89_91dup' assert results['NM_199367.1:c.89_91dup']['hgvs_refseqgene_variant'] == '' assert results['NM_199367.1:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} - assert 'hg38' not in results['NM_199367.1:c.89_91dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_199367.1:c.89_91dup']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} - assert 'grch38' not in results['NM_199367.1:c.89_91dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_199367.1:c.89_91dup']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - assert 'NM_003119.2:c.89_91dup' in results.keys() + assert 'NM_003119.2:c.89_91dup' in list(results.keys()) assert results['NM_003119.2:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.89_91dup']['alt_genomic_loci'] == [] @@ -10684,18 +10684,18 @@ def test_variant241(self): assert results['NM_003119.2:c.89_91dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.89_91dup' assert results['NM_003119.2:c.89_91dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5110_5112dup' assert results['NM_003119.2:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} - assert 'hg38' not in results['NM_003119.2:c.89_91dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.89_91dup']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} - assert 'grch38' not in results['NM_003119.2:c.89_91dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.89_91dup']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.89_91dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} def test_variant242(self): variant = '16-89575009-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_199367.2:c.183+1G>A' in results.keys() + assert 'NM_199367.2:c.183+1G>A' in list(results.keys()) assert results['NM_199367.2:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.183+1G>A']['alt_genomic_loci'] == [] @@ -10713,7 +10713,7 @@ def test_variant242(self): assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89508601', 'alt': 'A'}} assert results['NM_199367.2:c.183+1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - assert 'NM_003119.2:c.183+1G>A' in results.keys() + assert 'NM_003119.2:c.183+1G>A' in list(results.keys()) assert results['NM_003119.2:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.183+1G>A']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+1G>A' assert results['NM_003119.2:c.183+1G>A']['alt_genomic_loci'] == [] @@ -10726,13 +10726,13 @@ def test_variant242(self): assert results['NM_003119.2:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_003119.2:c.183+1G>A' assert results['NM_003119.2:c.183+1G>A']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5205G>A' assert results['NM_003119.2:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert 'hg38' not in results['NM_003119.2:c.183+1G>A']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.183+1G>A']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert 'grch38' not in results['NM_003119.2:c.183+1G>A']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.183+1G>A']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.183+1G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} assert results['flag'] == 'gene_variant' - assert 'NM_199367.1:c.183+1G>A' in results.keys() + assert 'NM_199367.1:c.183+1G>A' in list(results.keys()) assert results['NM_199367.1:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.183+1G>A']['alt_genomic_loci'] == [] @@ -10745,12 +10745,12 @@ def test_variant242(self): assert results['NM_199367.1:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+1G>A' assert results['NM_199367.1:c.183+1G>A']['hgvs_refseqgene_variant'] == '' assert results['NM_199367.1:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert 'hg38' not in results['NM_199367.1:c.183+1G>A']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_199367.1:c.183+1G>A']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert 'grch38' not in results['NM_199367.1:c.183+1G>A']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_199367.1:c.183+1G>A']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.183+1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - assert 'NM_001363850.1:c.183+1G>A' in results.keys() + assert 'NM_001363850.1:c.183+1G>A' in list(results.keys()) assert results['NM_001363850.1:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.183+1G>A']['alt_genomic_loci'] == [] @@ -10763,12 +10763,12 @@ def test_variant242(self): assert results['NM_001363850.1:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_001363850.1:c.183+1G>A' assert results['NM_001363850.1:c.183+1G>A']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert 'hg38' not in results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert 'grch38' not in results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.183+1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - assert 'NM_003119.3:c.183+1G>A' in results.keys() + assert 'NM_003119.3:c.183+1G>A' in list(results.keys()) assert results['NM_003119.3:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.183+1G>A']['alt_genomic_loci'] == [] @@ -10790,9 +10790,9 @@ def test_variant242(self): def test_variant243(self): variant = '16-89575040-C-A,CA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_199367.1:c.183+32_183+33insA' in results.keys() + assert 'NM_199367.1:c.183+32_183+33insA' in list(results.keys()) assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.183+32_183+33insA']['alt_genomic_loci'] == [] @@ -10805,12 +10805,12 @@ def test_variant243(self): assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+32_183+33insA' assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' assert results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert 'hg38' not in results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert 'grch38' not in results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - assert 'NM_001363850.1:c.183+32C>A' in results.keys() + assert 'NM_001363850.1:c.183+32C>A' in list(results.keys()) assert results['NM_001363850.1:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.183+32C>A']['alt_genomic_loci'] == [] @@ -10823,12 +10823,12 @@ def test_variant243(self): assert results['NM_001363850.1:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_001363850.1:c.183+32C>A' assert results['NM_001363850.1:c.183+32C>A']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert 'hg38' not in results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert 'grch38' not in results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - assert 'NM_001363850.1:c.183+32_183+33insA' in results.keys() + assert 'NM_001363850.1:c.183+32_183+33insA' in list(results.keys()) assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.183+32_183+33insA']['alt_genomic_loci'] == [] @@ -10841,12 +10841,12 @@ def test_variant243(self): assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_001363850.1:c.183+32_183+33insA' assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert 'hg38' not in results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert 'grch38' not in results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - assert 'NM_199367.2:c.183+32C>A' in results.keys() + assert 'NM_199367.2:c.183+32C>A' in list(results.keys()) assert results['NM_199367.2:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.183+32C>A']['alt_genomic_loci'] == [] @@ -10864,7 +10864,7 @@ def test_variant243(self): assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508632', 'alt': 'A'}} assert results['NM_199367.2:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - assert 'NM_003119.3:c.183+32_183+33insA' in results.keys() + assert 'NM_003119.3:c.183+32_183+33insA' in list(results.keys()) assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.183+32_183+33insA']['alt_genomic_loci'] == [] @@ -10883,7 +10883,7 @@ def test_variant243(self): assert results['NM_003119.3:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} assert results['flag'] == 'gene_variant' - assert 'NM_003119.2:c.183+32_183+33insA' in results.keys() + assert 'NM_003119.2:c.183+32_183+33insA' in list(results.keys()) assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+32_183+33insA' assert results['NM_003119.2:c.183+32_183+33insA']['alt_genomic_loci'] == [] @@ -10896,12 +10896,12 @@ def test_variant243(self): assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_003119.2:c.183+32_183+33insA' assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5236_5237insA' assert results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert 'hg38' not in results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert 'grch38' not in results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.183+32_183+33insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - assert 'NM_199367.1:c.183+32C>A' in results.keys() + assert 'NM_199367.1:c.183+32C>A' in list(results.keys()) assert results['NM_199367.1:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.183+32C>A']['alt_genomic_loci'] == [] @@ -10914,12 +10914,12 @@ def test_variant243(self): assert results['NM_199367.1:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+32C>A' assert results['NM_199367.1:c.183+32C>A']['hgvs_refseqgene_variant'] == '' assert results['NM_199367.1:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert 'hg38' not in results['NM_199367.1:c.183+32C>A']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_199367.1:c.183+32C>A']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert 'grch38' not in results['NM_199367.1:c.183+32C>A']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_199367.1:c.183+32C>A']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - assert 'NM_003119.3:c.183+32C>A' in results.keys() + assert 'NM_003119.3:c.183+32C>A' in list(results.keys()) assert results['NM_003119.3:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.183+32C>A']['alt_genomic_loci'] == [] @@ -10937,7 +10937,7 @@ def test_variant243(self): assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508632', 'alt': 'A'}} assert results['NM_003119.3:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - assert 'NM_199367.2:c.183+32_183+33insA' in results.keys() + assert 'NM_199367.2:c.183+32_183+33insA' in list(results.keys()) assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.183+32_183+33insA']['alt_genomic_loci'] == [] @@ -10955,7 +10955,7 @@ def test_variant243(self): assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508632', 'alt': 'CA'}} assert results['NM_199367.2:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - assert 'NM_003119.2:c.183+32C>A' in results.keys() + assert 'NM_003119.2:c.183+32C>A' in list(results.keys()) assert results['NM_003119.2:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.183+32C>A']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+32C>A' assert results['NM_003119.2:c.183+32C>A']['alt_genomic_loci'] == [] @@ -10968,18 +10968,18 @@ def test_variant243(self): assert results['NM_003119.2:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_003119.2:c.183+32C>A' assert results['NM_003119.2:c.183+32C>A']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5236C>A' assert results['NM_003119.2:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert 'hg38' not in results['NM_003119.2:c.183+32C>A']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.183+32C>A']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert 'grch38' not in results['NM_003119.2:c.183+32C>A']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.183+32C>A']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.183+32C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} def test_variant244(self): variant = '16-89576896-A-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_199367.2:c.184-2A>C' in results.keys() + assert 'NM_199367.2:c.184-2A>C' in list(results.keys()) assert results['NM_199367.2:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.184-2A>C']['alt_genomic_loci'] == [] @@ -10997,7 +10997,7 @@ def test_variant244(self): assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89510488', 'alt': 'C'}} assert results['NM_199367.2:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - assert 'NM_003119.2:c.184-2A>C' in results.keys() + assert 'NM_003119.2:c.184-2A>C' in list(results.keys()) assert results['NM_003119.2:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.184-2A>C']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.184-2A>C' assert results['NM_003119.2:c.184-2A>C']['alt_genomic_loci'] == [] @@ -11010,12 +11010,12 @@ def test_variant244(self): assert results['NM_003119.2:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_003119.2:c.184-2A>C' assert results['NM_003119.2:c.184-2A>C']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7092A>C' assert results['NM_003119.2:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert 'hg38' not in results['NM_003119.2:c.184-2A>C']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.184-2A>C']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert 'grch38' not in results['NM_003119.2:c.184-2A>C']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.184-2A>C']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.184-2A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - assert 'NM_003119.3:c.184-2A>C' in results.keys() + assert 'NM_003119.3:c.184-2A>C' in list(results.keys()) assert results['NM_003119.3:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.184-2A>C']['alt_genomic_loci'] == [] @@ -11033,7 +11033,7 @@ def test_variant244(self): assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89510488', 'alt': 'C'}} assert results['NM_003119.3:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - assert 'NM_001363850.1:c.184-2A>C' in results.keys() + assert 'NM_001363850.1:c.184-2A>C' in list(results.keys()) assert results['NM_001363850.1:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.184-2A>C']['alt_genomic_loci'] == [] @@ -11046,13 +11046,13 @@ def test_variant244(self): assert results['NM_001363850.1:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_001363850.1:c.184-2A>C' assert results['NM_001363850.1:c.184-2A>C']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert 'hg38' not in results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert 'grch38' not in results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} assert results['flag'] == 'gene_variant' - assert 'NM_199367.1:c.184-2A>C' in results.keys() + assert 'NM_199367.1:c.184-2A>C' in list(results.keys()) assert results['NM_199367.1:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.184-2A>C']['alt_genomic_loci'] == [] @@ -11065,18 +11065,18 @@ def test_variant244(self): assert results['NM_199367.1:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_199367.1:c.184-2A>C' assert results['NM_199367.1:c.184-2A>C']['hgvs_refseqgene_variant'] == '' assert results['NM_199367.1:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert 'hg38' not in results['NM_199367.1:c.184-2A>C']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_199367.1:c.184-2A>C']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert 'grch38' not in results['NM_199367.1:c.184-2A>C']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_199367.1:c.184-2A>C']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} def test_variant245(self): variant = '16-89576930-T-TA,TT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_003119.3:c.216dup' in results.keys() + assert 'NM_003119.3:c.216dup' in list(results.keys()) assert results['NM_003119.3:c.216dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.216dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.216dup']['alt_genomic_loci'] == [] @@ -11094,7 +11094,7 @@ def test_variant245(self): assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89510519', 'alt': 'CT'}} assert results['NM_003119.3:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - assert 'NM_003119.2:c.216_217insA' in results.keys() + assert 'NM_003119.2:c.216_217insA' in list(results.keys()) assert results['NM_003119.2:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.216_217insA']['alt_genomic_loci'] == [] @@ -11107,12 +11107,12 @@ def test_variant245(self): assert results['NM_003119.2:c.216_217insA']['hgvs_transcript_variant'] == 'NM_003119.2:c.216_217insA' assert results['NM_003119.2:c.216_217insA']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7126_7127insA' assert results['NM_003119.2:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert 'hg38' not in results['NM_003119.2:c.216_217insA']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.216_217insA']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert 'grch38' not in results['NM_003119.2:c.216_217insA']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.216_217insA']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.216_217insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - assert 'NM_199367.2:c.216dup' in results.keys() + assert 'NM_199367.2:c.216dup' in list(results.keys()) assert results['NM_199367.2:c.216dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.216dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.216dup']['alt_genomic_loci'] == [] @@ -11130,7 +11130,7 @@ def test_variant245(self): assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89510519', 'alt': 'CT'}} assert results['NM_199367.2:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - assert 'NM_199367.2:c.216_217insA' in results.keys() + assert 'NM_199367.2:c.216_217insA' in list(results.keys()) assert results['NM_199367.2:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.216_217insA']['alt_genomic_loci'] == [] @@ -11148,7 +11148,7 @@ def test_variant245(self): assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510522', 'alt': 'TA'}} assert results['NM_199367.2:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - assert 'NM_001363850.1:c.216dup' in results.keys() + assert 'NM_001363850.1:c.216dup' in list(results.keys()) assert results['NM_001363850.1:c.216dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.216dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.216dup']['alt_genomic_loci'] == [] @@ -11161,13 +11161,13 @@ def test_variant245(self): assert results['NM_001363850.1:c.216dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.216dup' assert results['NM_001363850.1:c.216dup']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} - assert 'hg38' not in results['NM_001363850.1:c.216dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.216dup']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} - assert 'grch38' not in results['NM_001363850.1:c.216dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.216dup']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001363850.1:c.216_217insA' in results.keys() + assert 'NM_001363850.1:c.216_217insA' in list(results.keys()) assert results['NM_001363850.1:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.216_217insA']['alt_genomic_loci'] == [] @@ -11180,12 +11180,12 @@ def test_variant245(self): assert results['NM_001363850.1:c.216_217insA']['hgvs_transcript_variant'] == 'NM_001363850.1:c.216_217insA' assert results['NM_001363850.1:c.216_217insA']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert 'hg38' not in results['NM_001363850.1:c.216_217insA']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.216_217insA']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert 'grch38' not in results['NM_001363850.1:c.216_217insA']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.216_217insA']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - assert 'NM_199367.1:c.216_217insA' in results.keys() + assert 'NM_199367.1:c.216_217insA' in list(results.keys()) assert results['NM_199367.1:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.216_217insA']['alt_genomic_loci'] == [] @@ -11198,12 +11198,12 @@ def test_variant245(self): assert results['NM_199367.1:c.216_217insA']['hgvs_transcript_variant'] == 'NM_199367.1:c.216_217insA' assert results['NM_199367.1:c.216_217insA']['hgvs_refseqgene_variant'] == '' assert results['NM_199367.1:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert 'hg38' not in results['NM_199367.1:c.216_217insA']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_199367.1:c.216_217insA']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert 'grch38' not in results['NM_199367.1:c.216_217insA']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_199367.1:c.216_217insA']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - assert 'NM_199367.1:c.216dup' in results.keys() + assert 'NM_199367.1:c.216dup' in list(results.keys()) assert results['NM_199367.1:c.216dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.216dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.216dup']['alt_genomic_loci'] == [] @@ -11216,12 +11216,12 @@ def test_variant245(self): assert results['NM_199367.1:c.216dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.216dup' assert results['NM_199367.1:c.216dup']['hgvs_refseqgene_variant'] == '' assert results['NM_199367.1:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} - assert 'hg38' not in results['NM_199367.1:c.216dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_199367.1:c.216dup']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} - assert 'grch38' not in results['NM_199367.1:c.216dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_199367.1:c.216dup']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - assert 'NM_003119.3:c.216_217insA' in results.keys() + assert 'NM_003119.3:c.216_217insA' in list(results.keys()) assert results['NM_003119.3:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.216_217insA']['alt_genomic_loci'] == [] @@ -11239,7 +11239,7 @@ def test_variant245(self): assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510522', 'alt': 'TA'}} assert results['NM_003119.3:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - assert 'NM_003119.2:c.216dup' in results.keys() + assert 'NM_003119.2:c.216dup' in list(results.keys()) assert results['NM_003119.2:c.216dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.216dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.216dup']['alt_genomic_loci'] == [] @@ -11252,18 +11252,18 @@ def test_variant245(self): assert results['NM_003119.2:c.216dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.216dup' assert results['NM_003119.2:c.216dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7126dup' assert results['NM_003119.2:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} - assert 'hg38' not in results['NM_003119.2:c.216dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.216dup']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} - assert 'grch38' not in results['NM_003119.2:c.216dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.216dup']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.216dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} def test_variant246(self): variant = '16-89576931-G-GTG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_199367.1:c.216_217dup' in results.keys() + assert 'NM_199367.1:c.216_217dup' in list(results.keys()) assert results['NM_199367.1:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.216_217dup']['alt_genomic_loci'] == [] @@ -11276,12 +11276,12 @@ def test_variant246(self): assert results['NM_199367.1:c.216_217dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.216_217dup' assert results['NM_199367.1:c.216_217dup']['hgvs_refseqgene_variant'] == '' assert results['NM_199367.1:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert 'hg38' not in results['NM_199367.1:c.216_217dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_199367.1:c.216_217dup']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert 'grch38' not in results['NM_199367.1:c.216_217dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_199367.1:c.216_217dup']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - assert 'NM_003119.3:c.216_217dup' in results.keys() + assert 'NM_003119.3:c.216_217dup' in list(results.keys()) assert results['NM_003119.3:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.216_217dup']['alt_genomic_loci'] == [] @@ -11299,7 +11299,7 @@ def test_variant246(self): assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510521', 'alt': 'TTG'}} assert results['NM_003119.3:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - assert 'NM_199367.2:c.216_217dup' in results.keys() + assert 'NM_199367.2:c.216_217dup' in list(results.keys()) assert results['NM_199367.2:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.216_217dup']['alt_genomic_loci'] == [] @@ -11317,7 +11317,7 @@ def test_variant246(self): assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510521', 'alt': 'TTG'}} assert results['NM_199367.2:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - assert 'NM_003119.2:c.216_217dup' in results.keys() + assert 'NM_003119.2:c.216_217dup' in list(results.keys()) assert results['NM_003119.2:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.216_217dup']['alt_genomic_loci'] == [] @@ -11330,13 +11330,13 @@ def test_variant246(self): assert results['NM_003119.2:c.216_217dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.216_217dup' assert results['NM_003119.2:c.216_217dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7126_7127dup' assert results['NM_003119.2:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert 'hg38' not in results['NM_003119.2:c.216_217dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.216_217dup']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert 'grch38' not in results['NM_003119.2:c.216_217dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.216_217dup']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.216_217dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001363850.1:c.216_217dup' in results.keys() + assert 'NM_001363850.1:c.216_217dup' in list(results.keys()) assert results['NM_001363850.1:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.216_217dup']['alt_genomic_loci'] == [] @@ -11349,18 +11349,18 @@ def test_variant246(self): assert results['NM_001363850.1:c.216_217dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.216_217dup' assert results['NM_001363850.1:c.216_217dup']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert 'hg38' not in results['NM_001363850.1:c.216_217dup']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.216_217dup']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert 'grch38' not in results['NM_001363850.1:c.216_217dup']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.216_217dup']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} def test_variant247(self): variant = '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_199367.1:c.1046_1071del' in results.keys() + assert 'NM_199367.1:c.1046_1071del' in list(results.keys()) assert results['NM_199367.1:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.1046_1071del']['alt_genomic_loci'] == [] @@ -11373,12 +11373,12 @@ def test_variant247(self): assert results['NM_199367.1:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_199367.1:c.1046_1071del' assert results['NM_199367.1:c.1046_1071del']['hgvs_refseqgene_variant'] == '' assert results['NM_199367.1:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert 'hg38' not in results['NM_199367.1:c.1046_1071del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_199367.1:c.1046_1071del']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert 'grch38' not in results['NM_199367.1:c.1046_1071del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_199367.1:c.1046_1071del']['primary_assembly_loci'].keys()) assert results['NM_199367.1:c.1046_1071del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - assert 'NM_001363850.1:c.1046_1071del' in results.keys() + assert 'NM_001363850.1:c.1046_1071del' in list(results.keys()) assert results['NM_001363850.1:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.1046_1071del']['alt_genomic_loci'] == [] @@ -11391,12 +11391,12 @@ def test_variant247(self): assert results['NM_001363850.1:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1046_1071del' assert results['NM_001363850.1:c.1046_1071del']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert 'hg38' not in results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert 'grch38' not in results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.1046_1071del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - assert 'NM_199367.2:c.1046_1071del' in results.keys() + assert 'NM_199367.2:c.1046_1071del' in list(results.keys()) assert results['NM_199367.2:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.1046_1071del']['alt_genomic_loci'] == [] @@ -11415,7 +11415,7 @@ def test_variant247(self): assert results['NM_199367.2:c.1046_1071del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} assert results['flag'] == 'gene_variant' - assert 'NM_003119.2:c.1046_1071del' in results.keys() + assert 'NM_003119.2:c.1046_1071del' in list(results.keys()) assert results['NM_003119.2:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.1046_1071del']['alt_genomic_loci'] == [] @@ -11428,12 +11428,12 @@ def test_variant247(self): assert results['NM_003119.2:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_003119.2:c.1046_1071del' assert results['NM_003119.2:c.1046_1071del']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.28566_28591del' assert results['NM_003119.2:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert 'hg38' not in results['NM_003119.2:c.1046_1071del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.1046_1071del']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert 'grch38' not in results['NM_003119.2:c.1046_1071del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.1046_1071del']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.1046_1071del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - assert 'NM_003119.3:c.1046_1071del' in results.keys() + assert 'NM_003119.3:c.1046_1071del' in list(results.keys()) assert results['NM_003119.3:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.1046_1071del']['alt_genomic_loci'] == [] @@ -11455,9 +11455,9 @@ def test_variant247(self): def test_variant248(self): variant = '16-89613064-AGGAGAGGCG-AT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001363850.1:c.1450-1_1457delinsT' in results.keys() + assert 'NM_001363850.1:c.1450-1_1457delinsT' in list(results.keys()) assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.1450-1_1457delinsT']['alt_genomic_loci'] == [] @@ -11470,13 +11470,13 @@ def test_variant248(self): assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1450-1_1457delinsT' assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} - assert 'hg38' not in results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': '16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} - assert 'grch38' not in results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.1450-1_1457delinsT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} assert results['flag'] == 'gene_variant' - assert 'NM_003119.2:c.1450-1_1457delinsT' in results.keys() + assert 'NM_003119.2:c.1450-1_1457delinsT' in list(results.keys()) assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.1450-1_1457delinsT' assert results['NM_003119.2:c.1450-1_1457delinsT']['alt_genomic_loci'] == [] @@ -11489,12 +11489,12 @@ def test_variant248(self): assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_transcript_variant'] == 'NM_003119.2:c.1450-1_1457delinsT' assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.43261_43269delinsT' assert results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} - assert 'hg38' not in results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': '16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} - assert 'grch38' not in results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.1450-1_1457delinsT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - assert 'NM_003119.3:c.1450-1_1457delinsT' in results.keys() + assert 'NM_003119.3:c.1450-1_1457delinsT' in list(results.keys()) assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.1450-1_1457delinsT']['alt_genomic_loci'] == [] @@ -11516,9 +11516,9 @@ def test_variant248(self): def test_variant249(self): variant = '16-89613069-AGGCGGGAGA-AT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_003119.2:c.1454_1462delinsT' in results.keys() + assert 'NM_003119.2:c.1454_1462delinsT' in list(results.keys()) assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.1454_1462delinsT']['alt_genomic_loci'] == [] @@ -11531,13 +11531,13 @@ def test_variant249(self): assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_transcript_variant'] == 'NM_003119.2:c.1454_1462delinsT' assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.43266_43274delinsT' assert results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} - assert 'hg38' not in results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': '16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} - assert 'grch38' not in results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.1454_1462delinsT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001363850.1:c.1454_1462delinsT' in results.keys() + assert 'NM_001363850.1:c.1454_1462delinsT' in list(results.keys()) assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.1454_1462delinsT']['alt_genomic_loci'] == [] @@ -11550,12 +11550,12 @@ def test_variant249(self): assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1454_1462delinsT' assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} - assert 'hg38' not in results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': '16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} - assert 'grch38' not in results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.1454_1462delinsT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - assert 'NM_003119.3:c.1454_1462delinsT' in results.keys() + assert 'NM_003119.3:c.1454_1462delinsT' in list(results.keys()) assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.1454_1462delinsT']['alt_genomic_loci'] == [] @@ -11577,9 +11577,9 @@ def test_variant249(self): def test_variant250(self): variant = '16-89613145-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001363850.1:c.1529C>T' in results.keys() + assert 'NM_001363850.1:c.1529C>T' in list(results.keys()) assert results['NM_001363850.1:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.1529C>T']['alt_genomic_loci'] == [] @@ -11592,12 +11592,12 @@ def test_variant250(self): assert results['NM_001363850.1:c.1529C>T']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1529C>T' assert results['NM_001363850.1:c.1529C>T']['hgvs_refseqgene_variant'] == '' assert results['NM_001363850.1:c.1529C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} - assert 'hg38' not in results['NM_001363850.1:c.1529C>T']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001363850.1:c.1529C>T']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.1529C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} - assert 'grch38' not in results['NM_001363850.1:c.1529C>T']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001363850.1:c.1529C>T']['primary_assembly_loci'].keys()) assert results['NM_001363850.1:c.1529C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - assert 'NM_003119.3:c.1529C>T' in results.keys() + assert 'NM_003119.3:c.1529C>T' in list(results.keys()) assert results['NM_003119.3:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.1529C>T']['alt_genomic_loci'] == [] @@ -11616,7 +11616,7 @@ def test_variant250(self): assert results['NM_003119.3:c.1529C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} assert results['flag'] == 'gene_variant' - assert 'NM_003119.2:c.1529C>T' in results.keys() + assert 'NM_003119.2:c.1529C>T' in list(results.keys()) assert results['NM_003119.2:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.1529C>T']['alt_genomic_loci'] == [] @@ -11629,18 +11629,18 @@ def test_variant250(self): assert results['NM_003119.2:c.1529C>T']['hgvs_transcript_variant'] == 'NM_003119.2:c.1529C>T' assert results['NM_003119.2:c.1529C>T']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.43341C>T' assert results['NM_003119.2:c.1529C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} - assert 'hg38' not in results['NM_003119.2:c.1529C>T']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003119.2:c.1529C>T']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.1529C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} - assert 'grch38' not in results['NM_003119.2:c.1529C>T']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003119.2:c.1529C>T']['primary_assembly_loci'].keys()) assert results['NM_003119.2:c.1529C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} def test_variant251(self): variant = '17-7578194-GCAC-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001276695.1:c.535_537del' in results.keys() + assert 'NM_001276695.1:c.535_537del' in list(results.keys()) assert results['NM_001276695.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276695.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276695.1:c.535_537del']['alt_genomic_loci'] == [] @@ -11658,7 +11658,7 @@ def test_variant251(self): assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276695.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1'} - assert 'NM_001126113.2:c.652_654del' in results.keys() + assert 'NM_001126113.2:c.652_654del' in list(results.keys()) assert results['NM_001126113.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t4:c.652_654del' assert results['NM_001126113.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126113.2:c.652_654del']['alt_genomic_loci'] == [] @@ -11676,7 +11676,7 @@ def test_variant251(self): assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126113.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001126118.1:c.535_537del' in results.keys() + assert 'NM_001126118.1:c.535_537del' in list(results.keys()) assert results['NM_001126118.1:c.535_537del']['hgvs_lrg_transcript_variant'] == 'LRG_321t8:c.535_537del' assert results['NM_001126118.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126118.1:c.535_537del']['alt_genomic_loci'] == [] @@ -11694,7 +11694,7 @@ def test_variant251(self): assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126118.1:c.535_537del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001126116.1:c.256_258del' in results.keys() + assert 'NM_001126116.1:c.256_258del' in list(results.keys()) assert results['NM_001126116.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t6:c.256_258del' assert results['NM_001126116.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126116.1:c.256_258del']['alt_genomic_loci'] == [] @@ -11712,7 +11712,7 @@ def test_variant251(self): assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126116.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001126117.1:c.256_258del' in results.keys() + assert 'NM_001126117.1:c.256_258del' in list(results.keys()) assert results['NM_001126117.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t7:c.256_258del' assert results['NM_001126117.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126117.1:c.256_258del']['alt_genomic_loci'] == [] @@ -11730,7 +11730,7 @@ def test_variant251(self): assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126117.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001276761.1:c.535_537del' in results.keys() + assert 'NM_001276761.1:c.535_537del' in list(results.keys()) assert results['NM_001276761.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276761.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276761.1:c.535_537del']['alt_genomic_loci'] == [] @@ -11748,7 +11748,7 @@ def test_variant251(self): assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276761.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1'} - assert 'NM_001126112.2:c.652_654del' in results.keys() + assert 'NM_001126112.2:c.652_654del' in list(results.keys()) assert results['NM_001126112.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t2:c.652_654del' assert results['NM_001126112.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126112.2:c.652_654del']['alt_genomic_loci'] == [] @@ -11767,7 +11767,7 @@ def test_variant251(self): assert results['NM_001126112.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_001276697.1:c.175_177del' in results.keys() + assert 'NM_001276697.1:c.175_177del' in list(results.keys()) assert results['NM_001276697.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276697.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276697.1:c.175_177del']['alt_genomic_loci'] == [] @@ -11785,7 +11785,7 @@ def test_variant251(self): assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276697.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1'} - assert 'NM_001276696.1:c.535_537del' in results.keys() + assert 'NM_001276696.1:c.535_537del' in list(results.keys()) assert results['NM_001276696.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276696.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276696.1:c.535_537del']['alt_genomic_loci'] == [] @@ -11803,7 +11803,7 @@ def test_variant251(self): assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276696.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1'} - assert 'NM_001276698.1:c.175_177del' in results.keys() + assert 'NM_001276698.1:c.175_177del' in list(results.keys()) assert results['NM_001276698.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276698.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276698.1:c.175_177del']['alt_genomic_loci'] == [] @@ -11821,7 +11821,7 @@ def test_variant251(self): assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276698.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1'} - assert 'NM_001126115.1:c.256_258del' in results.keys() + assert 'NM_001126115.1:c.256_258del' in list(results.keys()) assert results['NM_001126115.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t5:c.256_258del' assert results['NM_001126115.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126115.1:c.256_258del']['alt_genomic_loci'] == [] @@ -11839,7 +11839,7 @@ def test_variant251(self): assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126115.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001126114.2:c.652_654del' in results.keys() + assert 'NM_001126114.2:c.652_654del' in list(results.keys()) assert results['NM_001126114.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t3:c.652_654del' assert results['NM_001126114.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126114.2:c.652_654del']['alt_genomic_loci'] == [] @@ -11857,7 +11857,7 @@ def test_variant251(self): assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126114.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001276699.1:c.175_177del' in results.keys() + assert 'NM_001276699.1:c.175_177del' in list(results.keys()) assert results['NM_001276699.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276699.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276699.1:c.175_177del']['alt_genomic_loci'] == [] @@ -11875,7 +11875,7 @@ def test_variant251(self): assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276699.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1'} - assert 'NM_001276760.1:c.535_537del' in results.keys() + assert 'NM_001276760.1:c.535_537del' in list(results.keys()) assert results['NM_001276760.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276760.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276760.1:c.535_537del']['alt_genomic_loci'] == [] @@ -11893,7 +11893,7 @@ def test_variant251(self): assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276760.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1'} - assert 'NM_000546.5:c.652_654del' in results.keys() + assert 'NM_000546.5:c.652_654del' in list(results.keys()) assert results['NM_000546.5:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t1:c.652_654del' assert results['NM_000546.5:c.652_654del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000546.5:c.652_654del']['alt_genomic_loci'] == [] @@ -11915,9 +11915,9 @@ def test_variant251(self): def test_variant252(self): variant = '17-7578523-T-TG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001276760.1:c.289dup' in results.keys() + assert 'NM_001276760.1:c.289dup' in list(results.keys()) assert results['NM_001276760.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276760.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276760.1:c.289dup']['alt_genomic_loci'] == [] @@ -11935,7 +11935,7 @@ def test_variant252(self): assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276760.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1'} - assert 'NM_001126118.1:c.289dup' in results.keys() + assert 'NM_001126118.1:c.289dup' in list(results.keys()) assert results['NM_001126118.1:c.289dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t8:c.289dup' assert results['NM_001126118.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126118.1:c.289dup']['alt_genomic_loci'] == [] @@ -11953,7 +11953,7 @@ def test_variant252(self): assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126118.1:c.289dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001276695.1:c.289dup' in results.keys() + assert 'NM_001276695.1:c.289dup' in list(results.keys()) assert results['NM_001276695.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276695.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276695.1:c.289dup']['alt_genomic_loci'] == [] @@ -11971,7 +11971,7 @@ def test_variant252(self): assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276695.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1'} - assert 'NM_001276699.1:c.-72dup' in results.keys() + assert 'NM_001276699.1:c.-72dup' in list(results.keys()) assert results['NM_001276699.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276699.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276699.1:c.-72dup']['alt_genomic_loci'] == [] @@ -11989,7 +11989,7 @@ def test_variant252(self): assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276699.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1'} - assert 'NM_001126115.1:c.10dup' in results.keys() + assert 'NM_001126115.1:c.10dup' in list(results.keys()) assert results['NM_001126115.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t5:c.10dup' assert results['NM_001126115.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126115.1:c.10dup']['alt_genomic_loci'] == [] @@ -12007,7 +12007,7 @@ def test_variant252(self): assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126115.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001276697.1:c.-72dup' in results.keys() + assert 'NM_001276697.1:c.-72dup' in list(results.keys()) assert results['NM_001276697.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276697.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276697.1:c.-72dup']['alt_genomic_loci'] == [] @@ -12025,7 +12025,7 @@ def test_variant252(self): assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276697.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1'} - assert 'NM_001126117.1:c.10dup' in results.keys() + assert 'NM_001126117.1:c.10dup' in list(results.keys()) assert results['NM_001126117.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t7:c.10dup' assert results['NM_001126117.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126117.1:c.10dup']['alt_genomic_loci'] == [] @@ -12043,7 +12043,7 @@ def test_variant252(self): assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126117.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_000546.5:c.406dup' in results.keys() + assert 'NM_000546.5:c.406dup' in list(results.keys()) assert results['NM_000546.5:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t1:c.406dup' assert results['NM_000546.5:c.406dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000546.5:c.406dup']['alt_genomic_loci'] == [] @@ -12062,7 +12062,7 @@ def test_variant252(self): assert results['NM_000546.5:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_001276696.1:c.289dup' in results.keys() + assert 'NM_001276696.1:c.289dup' in list(results.keys()) assert results['NM_001276696.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276696.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276696.1:c.289dup']['alt_genomic_loci'] == [] @@ -12080,7 +12080,7 @@ def test_variant252(self): assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276696.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1'} - assert 'NM_001276698.1:c.-72dup' in results.keys() + assert 'NM_001276698.1:c.-72dup' in list(results.keys()) assert results['NM_001276698.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276698.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276698.1:c.-72dup']['alt_genomic_loci'] == [] @@ -12098,7 +12098,7 @@ def test_variant252(self): assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276698.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1'} - assert 'NM_001276761.1:c.289dup' in results.keys() + assert 'NM_001276761.1:c.289dup' in list(results.keys()) assert results['NM_001276761.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276761.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276761.1:c.289dup']['alt_genomic_loci'] == [] @@ -12116,7 +12116,7 @@ def test_variant252(self): assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276761.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1'} - assert 'NM_001126113.2:c.406dup' in results.keys() + assert 'NM_001126113.2:c.406dup' in list(results.keys()) assert results['NM_001126113.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t4:c.406dup' assert results['NM_001126113.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126113.2:c.406dup']['alt_genomic_loci'] == [] @@ -12134,7 +12134,7 @@ def test_variant252(self): assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126113.2:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001126116.1:c.10dup' in results.keys() + assert 'NM_001126116.1:c.10dup' in list(results.keys()) assert results['NM_001126116.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t6:c.10dup' assert results['NM_001126116.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126116.1:c.10dup']['alt_genomic_loci'] == [] @@ -12152,7 +12152,7 @@ def test_variant252(self): assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126116.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001126112.2:c.406dup' in results.keys() + assert 'NM_001126112.2:c.406dup' in list(results.keys()) assert results['NM_001126112.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t2:c.406dup' assert results['NM_001126112.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126112.2:c.406dup']['alt_genomic_loci'] == [] @@ -12170,7 +12170,7 @@ def test_variant252(self): assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126112.2:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001126114.2:c.406dup' in results.keys() + assert 'NM_001126114.2:c.406dup' in list(results.keys()) assert results['NM_001126114.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t3:c.406dup' assert results['NM_001126114.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126114.2:c.406dup']['alt_genomic_loci'] == [] @@ -12192,9 +12192,9 @@ def test_variant252(self): def test_variant253(self): variant = '17-17119692-A-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_144997.6:c.1300+2T>G' in results.keys() + assert 'NM_144997.6:c.1300+2T>G' in list(results.keys()) assert results['NM_144997.6:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_144997.6:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_144997.6:c.1300+2T>G']['alt_genomic_loci'] == [] @@ -12206,13 +12206,13 @@ def test_variant253(self): assert results['NM_144997.6:c.1300+2T>G']['hgvs_lrg_variant'] == '' assert results['NM_144997.6:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_144997.6:c.1300+2T>G' assert results['NM_144997.6:c.1300+2T>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} - assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} - assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} - assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} assert results['NM_144997.6:c.1300+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.6'} - assert 'NM_001353230.1:c.1300+2T>G' in results.keys() + assert 'NM_001353230.1:c.1300+2T>G' in list(results.keys()) assert results['NM_001353230.1:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353230.1:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353230.1:c.1300+2T>G']['alt_genomic_loci'] == [] @@ -12224,13 +12224,13 @@ def test_variant253(self): assert results['NM_001353230.1:c.1300+2T>G']['hgvs_lrg_variant'] == '' assert results['NM_001353230.1:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_001353230.1:c.1300+2T>G' assert results['NM_001353230.1:c.1300+2T>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} - assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} - assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} - assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} assert results['NM_001353230.1:c.1300+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340159.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353230.1'} - assert 'NM_001353229.1:c.1354+2T>G' in results.keys() + assert 'NM_001353229.1:c.1354+2T>G' in list(results.keys()) assert results['NM_001353229.1:c.1354+2T>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353229.1:c.1354+2T>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353229.1:c.1354+2T>G']['alt_genomic_loci'] == [] @@ -12242,14 +12242,14 @@ def test_variant253(self): assert results['NM_001353229.1:c.1354+2T>G']['hgvs_lrg_variant'] == '' assert results['NM_001353229.1:c.1354+2T>G']['hgvs_transcript_variant'] == 'NM_001353229.1:c.1354+2T>G' assert results['NM_001353229.1:c.1354+2T>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} - assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} - assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} - assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} assert results['NM_001353229.1:c.1354+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340158.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353229.1'} assert results['flag'] == 'gene_variant' - assert 'NM_144997.5:c.1300+2T>G' in results.keys() + assert 'NM_144997.5:c.1300+2T>G' in list(results.keys()) assert results['NM_144997.5:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == 'LRG_325t1:c.1300+2T>G' assert results['NM_144997.5:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == 'NG_008001.2(NM_144997.5):c.1300+2T>G' assert results['NM_144997.5:c.1300+2T>G']['alt_genomic_loci'] == [] @@ -12261,13 +12261,13 @@ def test_variant253(self): assert results['NM_144997.5:c.1300+2T>G']['hgvs_lrg_variant'] == 'LRG_325:g.25811T>G' assert results['NM_144997.5:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_144997.5:c.1300+2T>G' assert results['NM_144997.5:c.1300+2T>G']['hgvs_refseqgene_variant'] == 'NG_008001.2:g.25811T>G' - assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} - assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} - assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} - assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} assert results['NM_144997.5:c.1300+2T>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008001.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_325.xml'} - assert 'NM_001353231.1:c.1300+2T>G' in results.keys() + assert 'NM_001353231.1:c.1300+2T>G' in list(results.keys()) assert results['NM_001353231.1:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353231.1:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353231.1:c.1300+2T>G']['alt_genomic_loci'] == [] @@ -12279,19 +12279,19 @@ def test_variant253(self): assert results['NM_001353231.1:c.1300+2T>G']['hgvs_lrg_variant'] == '' assert results['NM_001353231.1:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_001353231.1:c.1300+2T>G' assert results['NM_001353231.1:c.1300+2T>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} - assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} - assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17119692', 'alt': u'C'}} - assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': u'A', 'pos': '17216378', 'alt': u'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} assert results['NM_001353231.1:c.1300+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340160.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353231.1'} def test_variant254(self): variant = '17-41197588-GGACA-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_007294.3:c.*103_*106del' in results.keys() + assert 'NM_007294.3:c.*103_*106del' in list(results.keys()) assert results['NM_007294.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == 'LRG_292t1:c.*103_*106del' assert results['NM_007294.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007294.3:c.*103_*106del']['alt_genomic_loci'] == [] @@ -12309,7 +12309,7 @@ def test_variant254(self): assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} assert results['NM_007294.3:c.*103_*106del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005905.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_292.xml'} - assert 'NM_007297.3:c.*103_*106del' in results.keys() + assert 'NM_007297.3:c.*103_*106del' in list(results.keys()) assert results['NM_007297.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007297.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007297.3:c.*103_*106del']['alt_genomic_loci'] == [] @@ -12327,7 +12327,7 @@ def test_variant254(self): assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} assert results['NM_007297.3:c.*103_*106del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3'} - assert 'NR_027676.1:n.5831_5834del' in results.keys() + assert 'NR_027676.1:n.5831_5834del' in list(results.keys()) assert results['NR_027676.1:n.5831_5834del']['hgvs_lrg_transcript_variant'] == '' assert results['NR_027676.1:n.5831_5834del']['refseqgene_context_intronic_sequence'] == '' assert results['NR_027676.1:n.5831_5834del']['alt_genomic_loci'] == [] @@ -12345,7 +12345,7 @@ def test_variant254(self): assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} assert results['NR_027676.1:n.5831_5834del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_027676.1'} - assert 'NM_007300.3:c.*103_*106del' in results.keys() + assert 'NM_007300.3:c.*103_*106del' in list(results.keys()) assert results['NM_007300.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007300.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007300.3:c.*103_*106del']['alt_genomic_loci'] == [] @@ -12364,7 +12364,7 @@ def test_variant254(self): assert results['NM_007300.3:c.*103_*106del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3'} assert results['flag'] == 'gene_variant' - assert 'NM_007299.3:c.*209_*212del' in results.keys() + assert 'NM_007299.3:c.*209_*212del' in list(results.keys()) assert results['NM_007299.3:c.*209_*212del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007299.3:c.*209_*212del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007299.3:c.*209_*212del']['alt_genomic_loci'] == [] @@ -12382,7 +12382,7 @@ def test_variant254(self): assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} assert results['NM_007299.3:c.*209_*212del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3'} - assert 'NM_007298.3:c.*103_*106del' in results.keys() + assert 'NM_007298.3:c.*103_*106del' in list(results.keys()) assert results['NM_007298.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007298.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007298.3:c.*103_*106del']['alt_genomic_loci'] == [] @@ -12404,9 +12404,9 @@ def test_variant254(self): def test_variant255(self): variant = '17-41256884-C-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_007299.3:c.301+1G>C' in results.keys() + assert 'NM_007299.3:c.301+1G>C' in list(results.keys()) assert results['NM_007299.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007299.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007299.3:c.301+1G>C']['alt_genomic_loci'] == [] @@ -12418,13 +12418,13 @@ def test_variant255(self): assert results['NM_007299.3:c.301+1G>C']['hgvs_lrg_variant'] == '' assert results['NM_007299.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007299.3:c.301+1G>C' assert results['NM_007299.3:c.301+1G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} - assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} assert results['NM_007299.3:c.301+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3'} - assert 'NR_027676.1:n.440+1G>C' in results.keys() + assert 'NR_027676.1:n.440+1G>C' in list(results.keys()) assert results['NR_027676.1:n.440+1G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NR_027676.1:n.440+1G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NR_027676.1:n.440+1G>C']['alt_genomic_loci'] == [] @@ -12436,13 +12436,13 @@ def test_variant255(self): assert results['NR_027676.1:n.440+1G>C']['hgvs_lrg_variant'] == '' assert results['NR_027676.1:n.440+1G>C']['hgvs_transcript_variant'] == 'NR_027676.1:n.440+1G>C' assert results['NR_027676.1:n.440+1G>C']['hgvs_refseqgene_variant'] == '' - assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} - assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} assert results['NR_027676.1:n.440+1G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_027676.1'} - assert 'NM_007300.3:c.301+1G>C' in results.keys() + assert 'NM_007300.3:c.301+1G>C' in list(results.keys()) assert results['NM_007300.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007300.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007300.3:c.301+1G>C']['alt_genomic_loci'] == [] @@ -12454,13 +12454,13 @@ def test_variant255(self): assert results['NM_007300.3:c.301+1G>C']['hgvs_lrg_variant'] == '' assert results['NM_007300.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007300.3:c.301+1G>C' assert results['NM_007300.3:c.301+1G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} - assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} assert results['NM_007300.3:c.301+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3'} - assert 'NM_007298.3:c.301+1G>C' in results.keys() + assert 'NM_007298.3:c.301+1G>C' in list(results.keys()) assert results['NM_007298.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007298.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007298.3:c.301+1G>C']['alt_genomic_loci'] == [] @@ -12472,13 +12472,13 @@ def test_variant255(self): assert results['NM_007298.3:c.301+1G>C']['hgvs_lrg_variant'] == '' assert results['NM_007298.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007298.3:c.301+1G>C' assert results['NM_007298.3:c.301+1G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} - assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} assert results['NM_007298.3:c.301+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3'} - assert 'NM_007297.3:c.160+1G>C' in results.keys() + assert 'NM_007297.3:c.160+1G>C' in list(results.keys()) assert results['NM_007297.3:c.160+1G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007297.3:c.160+1G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007297.3:c.160+1G>C']['alt_genomic_loci'] == [] @@ -12490,14 +12490,14 @@ def test_variant255(self): assert results['NM_007297.3:c.160+1G>C']['hgvs_lrg_variant'] == '' assert results['NM_007297.3:c.160+1G>C']['hgvs_transcript_variant'] == 'NM_007297.3:c.160+1G>C' assert results['NM_007297.3:c.160+1G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} - assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} assert results['NM_007297.3:c.160+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3'} assert results['flag'] == 'gene_variant' - assert 'NM_007294.3:c.301+1G>C' in results.keys() + assert 'NM_007294.3:c.301+1G>C' in list(results.keys()) assert results['NM_007294.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == 'LRG_292t1:c.301+1G>C' assert results['NM_007294.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == 'NG_005905.2(NM_007294.3):c.301+1G>C' assert results['NM_007294.3:c.301+1G>C']['alt_genomic_loci'] == [] @@ -12509,19 +12509,19 @@ def test_variant255(self): assert results['NM_007294.3:c.301+1G>C']['hgvs_lrg_variant'] == 'LRG_292:g.113117G>C' assert results['NM_007294.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007294.3:c.301+1G>C' assert results['NM_007294.3:c.301+1G>C']['hgvs_refseqgene_variant'] == 'NG_005905.2:g.113117G>C' - assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} - assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '41256884', 'alt': u'G'}} - assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '43104867', 'alt': u'G'}} + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} assert results['NM_007294.3:c.301+1G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005905.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_292.xml'} def test_variant256(self): variant = '17-42991428-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001363846.1:c.490G>T' in results.keys() + assert 'NM_001363846.1:c.490G>T' in list(results.keys()) assert results['NM_001363846.1:c.490G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363846.1:c.490G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363846.1:c.490G>T']['alt_genomic_loci'] == [] @@ -12533,13 +12533,13 @@ def test_variant256(self): assert results['NM_001363846.1:c.490G>T']['hgvs_lrg_variant'] == '' assert results['NM_001363846.1:c.490G>T']['hgvs_transcript_variant'] == 'NM_001363846.1:c.490G>T' assert results['NM_001363846.1:c.490G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363846.1:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} - assert 'hg38' not in results['NM_001363846.1:c.490G>T']['primary_assembly_loci'].keys() - assert results['NM_001363846.1:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} - assert 'grch38' not in results['NM_001363846.1:c.490G>T']['primary_assembly_loci'].keys() + assert results['NM_001363846.1:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_001363846.1:c.490G>T']['primary_assembly_loci'].keys()) + assert results['NM_001363846.1:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_001363846.1:c.490G>T']['primary_assembly_loci'].keys()) assert results['NM_001363846.1:c.490G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350775.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363846.1'} - assert 'NM_001131019.2:c.490G>T' in results.keys() + assert 'NM_001131019.2:c.490G>T' in list(results.keys()) assert results['NM_001131019.2:c.490G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001131019.2:c.490G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001131019.2:c.490G>T']['alt_genomic_loci'] == [] @@ -12551,14 +12551,14 @@ def test_variant256(self): assert results['NM_001131019.2:c.490G>T']['hgvs_lrg_variant'] == '' assert results['NM_001131019.2:c.490G>T']['hgvs_transcript_variant'] == 'NM_001131019.2:c.490G>T' assert results['NM_001131019.2:c.490G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} - assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} - assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} - assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} assert results['NM_001131019.2:c.490G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124491.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001131019.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001242376.1:c.490G>T' in results.keys() + assert 'NM_001242376.1:c.490G>T' in list(results.keys()) assert results['NM_001242376.1:c.490G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001242376.1:c.490G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001242376.1:c.490G>T']['alt_genomic_loci'] == [] @@ -12570,13 +12570,13 @@ def test_variant256(self): assert results['NM_001242376.1:c.490G>T']['hgvs_lrg_variant'] == '' assert results['NM_001242376.1:c.490G>T']['hgvs_transcript_variant'] == 'NM_001242376.1:c.490G>T' assert results['NM_001242376.1:c.490G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} - assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} - assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} - assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} assert results['NM_001242376.1:c.490G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001229305.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001242376.1'} - assert 'NM_002055.4:c.490G>T' in results.keys() + assert 'NM_002055.4:c.490G>T' in list(results.keys()) assert results['NM_002055.4:c.490G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_002055.4:c.490G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002055.4:c.490G>T']['alt_genomic_loci'] == [] @@ -12588,19 +12588,19 @@ def test_variant256(self): assert results['NM_002055.4:c.490G>T']['hgvs_lrg_variant'] == '' assert results['NM_002055.4:c.490G>T']['hgvs_transcript_variant'] == 'NM_002055.4:c.490G>T' assert results['NM_002055.4:c.490G>T']['hgvs_refseqgene_variant'] == 'NG_008401.1:g.6487G>T' - assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} - assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} - assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '42991428', 'alt': u'A'}} - assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '44914060', 'alt': u'A'}} + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} assert results['NM_002055.4:c.490G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008401.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002046.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002055.4'} def test_variant257(self): variant = '17-48252809-A-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NR_135553.1:n.1022A>T' in results.keys() + assert 'NR_135553.1:n.1022A>T' in list(results.keys()) assert results['NR_135553.1:n.1022A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_135553.1:n.1022A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_135553.1:n.1022A>T']['alt_genomic_loci'] == [] @@ -12618,7 +12618,7 @@ def test_variant257(self): assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} assert results['NR_135553.1:n.1022A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_135553.1'} - assert 'NM_001135697.1:c.*11A>T' in results.keys() + assert 'NM_001135697.1:c.*11A>T' in list(results.keys()) assert results['NM_001135697.1:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135697.1:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001135697.1:c.*11A>T']['alt_genomic_loci'] == [] @@ -12631,13 +12631,13 @@ def test_variant257(self): assert results['NM_001135697.1:c.*11A>T']['hgvs_transcript_variant'] == 'NM_001135697.1:c.*11A>T' assert results['NM_001135697.1:c.*11A>T']['hgvs_refseqgene_variant'] == '' assert results['NM_001135697.1:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} - assert 'hg38' not in results['NM_001135697.1:c.*11A>T']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001135697.1:c.*11A>T']['primary_assembly_loci'].keys()) assert results['NM_001135697.1:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} - assert 'grch38' not in results['NM_001135697.1:c.*11A>T']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001135697.1:c.*11A>T']['primary_assembly_loci'].keys()) assert results['NM_001135697.1:c.*11A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.1'} assert results['flag'] == 'gene_variant' - assert 'NM_000023.2:c.*11A>T' in results.keys() + assert 'NM_000023.2:c.*11A>T' in list(results.keys()) assert results['NM_000023.2:c.*11A>T']['hgvs_lrg_transcript_variant'] == 'LRG_203t1:c.*11A>T' assert results['NM_000023.2:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000023.2:c.*11A>T']['alt_genomic_loci'] == [] @@ -12650,12 +12650,12 @@ def test_variant257(self): assert results['NM_000023.2:c.*11A>T']['hgvs_transcript_variant'] == 'NM_000023.2:c.*11A>T' assert results['NM_000023.2:c.*11A>T']['hgvs_refseqgene_variant'] == 'NG_008889.1:g.14444A>T' assert results['NM_000023.2:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} - assert 'hg38' not in results['NM_000023.2:c.*11A>T']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_000023.2:c.*11A>T']['primary_assembly_loci'].keys()) assert results['NM_000023.2:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} - assert 'grch38' not in results['NM_000023.2:c.*11A>T']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_000023.2:c.*11A>T']['primary_assembly_loci'].keys()) assert results['NM_000023.2:c.*11A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008889.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000023.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_203.xml'} - assert 'NM_001135697.2:c.*11A>T' in results.keys() + assert 'NM_001135697.2:c.*11A>T' in list(results.keys()) assert results['NM_001135697.2:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135697.2:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001135697.2:c.*11A>T']['alt_genomic_loci'] == [] @@ -12673,7 +12673,7 @@ def test_variant257(self): assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} assert results['NM_001135697.2:c.*11A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.2'} - assert 'NM_000023.3:c.*11A>T' in results.keys() + assert 'NM_000023.3:c.*11A>T' in list(results.keys()) assert results['NM_000023.3:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000023.3:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000023.3:c.*11A>T']['alt_genomic_loci'] == [] @@ -12695,10 +12695,10 @@ def test_variant257(self): def test_variant258(self): variant = '17-62022709-G-GTC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000334.4:c.3720+9_3720+10dup' in results.keys() + assert 'NM_000334.4:c.3720+9_3720+10dup' in list(results.keys()) assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000334.4:c.3720+9_3720+10dup']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3720+9_3720+10dup' assert results['NM_000334.4:c.3720+9_3720+10dup']['alt_genomic_loci'] == [] @@ -12720,10 +12720,10 @@ def test_variant258(self): def test_variant259(self): variant = '17-62022711-C-CT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000334.4:c.3720+8_3720+9insA' in results.keys() + assert 'NM_000334.4:c.3720+8_3720+9insA' in list(results.keys()) assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000334.4:c.3720+8_3720+9insA']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3720+8_3720+9insA' assert results['NM_000334.4:c.3720+8_3720+9insA']['alt_genomic_loci'] == [] @@ -12735,20 +12735,20 @@ def test_variant259(self): assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_lrg_variant'] == '' assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_transcript_variant'] == 'NM_000334.4:c.3720+8_3720+9insA' assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.32567_32568insA' - assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022711_62022712insT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '62022711', 'alt': u'CT'}} - assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945351_63945352insT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '63945351', 'alt': u'CT'}} - assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022711_62022712insT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '62022711', 'alt': u'CT'}} - assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945351_63945352insT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '63945351', 'alt': u'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022711_62022712insT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '62022711', 'alt': 'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945351_63945352insT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '63945351', 'alt': 'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022711_62022712insT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '62022711', 'alt': 'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945351_63945352insT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '63945351', 'alt': 'CT'}} assert results['NM_000334.4:c.3720+8_3720+9insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} def test_variant260(self): variant = '17-62023005-G-GGC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000334.4:c.3442-8_3442-7insGC' in results.keys() + assert 'NM_000334.4:c.3442-8_3442-7insGC' in list(results.keys()) assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000334.4:c.3442-8_3442-7insGC']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3442-8_3442-7insGC' assert results['NM_000334.4:c.3442-8_3442-7insGC']['alt_genomic_loci'] == [] @@ -12760,20 +12760,20 @@ def test_variant260(self): assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_lrg_variant'] == '' assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_transcript_variant'] == 'NM_000334.4:c.3442-8_3442-7insGC' assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.32273_32274insGC' - assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023005_62023006insGC', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '62023005', 'alt': u'GGC'}} - assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945645_63945646insGC', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '63945645', 'alt': u'GGC'}} - assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023005_62023006insGC', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '62023005', 'alt': u'GGC'}} - assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945645_63945646insGC', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '63945645', 'alt': u'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023005_62023006insGC', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '62023005', 'alt': 'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945645_63945646insGC', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '63945645', 'alt': 'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023005_62023006insGC', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '62023005', 'alt': 'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945645_63945646insGC', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '63945645', 'alt': 'GGC'}} assert results['NM_000334.4:c.3442-8_3442-7insGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} def test_variant261(self): variant = '17-62023006-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000334.4:c.3442-8G>T' in results.keys() + assert 'NM_000334.4:c.3442-8G>T' in list(results.keys()) assert results['NM_000334.4:c.3442-8G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000334.4:c.3442-8G>T']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3442-8G>T' assert results['NM_000334.4:c.3442-8G>T']['alt_genomic_loci'] == [] @@ -12785,20 +12785,20 @@ def test_variant261(self): assert results['NM_000334.4:c.3442-8G>T']['hgvs_lrg_variant'] == '' assert results['NM_000334.4:c.3442-8G>T']['hgvs_transcript_variant'] == 'NM_000334.4:c.3442-8G>T' assert results['NM_000334.4:c.3442-8G>T']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.32273G>T' - assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023006C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '62023006', 'alt': u'A'}} - assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945646C>A', 'vcf': {'chr': 'chr17', 'ref': u'C', 'pos': '63945646', 'alt': u'A'}} - assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023006C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '62023006', 'alt': u'A'}} - assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945646C>A', 'vcf': {'chr': '17', 'ref': u'C', 'pos': '63945646', 'alt': u'A'}} + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023006C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '62023006', 'alt': 'A'}} + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945646C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '63945646', 'alt': 'A'}} + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023006C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '62023006', 'alt': 'A'}} + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945646C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '63945646', 'alt': 'A'}} assert results['NM_000334.4:c.3442-8G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} def test_variant262(self): variant = '17-62034787-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000334.4:c.2111C>T' in results.keys() + assert 'NM_000334.4:c.2111C>T' in list(results.keys()) assert results['NM_000334.4:c.2111C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000334.4:c.2111C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000334.4:c.2111C>T']['alt_genomic_loci'] == [] @@ -12810,19 +12810,19 @@ def test_variant262(self): assert results['NM_000334.4:c.2111C>T']['hgvs_lrg_variant'] == '' assert results['NM_000334.4:c.2111C>T']['hgvs_transcript_variant'] == 'NM_000334.4:c.2111C>T' assert results['NM_000334.4:c.2111C>T']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.20492C>T' - assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62034787G>A', 'vcf': {'chr': 'chr17', 'ref': u'G', 'pos': '62034787', 'alt': u'A'}} - assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63957427G>A', 'vcf': {'chr': 'chr17', 'ref': u'G', 'pos': '63957427', 'alt': u'A'}} - assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62034787G>A', 'vcf': {'chr': '17', 'ref': u'G', 'pos': '62034787', 'alt': u'A'}} - assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63957427G>A', 'vcf': {'chr': '17', 'ref': u'G', 'pos': '63957427', 'alt': u'A'}} + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62034787G>A', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '62034787', 'alt': 'A'}} + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63957427G>A', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '63957427', 'alt': 'A'}} + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62034787G>A', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '62034787', 'alt': 'A'}} + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63957427G>A', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '63957427', 'alt': 'A'}} assert results['NM_000334.4:c.2111C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} def test_variant263(self): variant = '18-24128261-GTCCTCC-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001351443.1:c.-16+941_-16+946del' in results.keys() + assert 'NM_001351443.1:c.-16+941_-16+946del' in list(results.keys()) assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001351443.1:c.-16+941_-16+946del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001351443.1:c.-16+941_-16+946del']['alt_genomic_loci'] == [] @@ -12840,7 +12840,7 @@ def test_variant263(self): assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_001351443.1:c.-16+941_-16+946del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338372.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351443.1'} - assert 'NM_001258222.1:c.10-47053_10-47048del' in results.keys() + assert 'NM_001258222.1:c.10-47053_10-47048del' in list(results.keys()) assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001258222.1:c.10-47053_10-47048del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001258222.1:c.10-47053_10-47048del']['alt_genomic_loci'] == [] @@ -12858,7 +12858,7 @@ def test_variant263(self): assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_001258222.1:c.10-47053_10-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.1'} - assert 'NM_001258221.1:c.-16+1426_-16+1431del' in results.keys() + assert 'NM_001258221.1:c.-16+1426_-16+1431del' in list(results.keys()) assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001258221.1:c.-16+1426_-16+1431del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001258221.1:c.-16+1426_-16+1431del']['alt_genomic_loci'] == [] @@ -12876,7 +12876,7 @@ def test_variant263(self): assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_001258221.1:c.-16+1426_-16+1431del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245150.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258221.1'} - assert 'NM_001258222.2:c.10-47053_10-47048del' in results.keys() + assert 'NM_001258222.2:c.10-47053_10-47048del' in list(results.keys()) assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001258222.2:c.10-47053_10-47048del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001258222.2:c.10-47053_10-47048del']['alt_genomic_loci'] == [] @@ -12895,7 +12895,7 @@ def test_variant263(self): assert results['NM_001258222.2:c.10-47053_10-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001136205.2:c.-16+588_-16+593del' in results.keys() + assert 'NM_001136205.2:c.-16+588_-16+593del' in list(results.keys()) assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001136205.2:c.-16+588_-16+593del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001136205.2:c.-16+588_-16+593del']['alt_genomic_loci'] == [] @@ -12913,7 +12913,7 @@ def test_variant263(self): assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_001136205.2:c.-16+588_-16+593del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129677.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001136205.2'} - assert 'NM_198991.3:c.-15-47053_-15-47048del' in results.keys() + assert 'NM_198991.3:c.-15-47053_-15-47048del' in list(results.keys()) assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198991.3:c.-15-47053_-15-47048del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198991.3:c.-15-47053_-15-47048del']['alt_genomic_loci'] == [] @@ -12931,7 +12931,7 @@ def test_variant263(self): assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_198991.3:c.-15-47053_-15-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_945342.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198991.3'} - assert 'NM_001142730.2:c.234_239del' in results.keys() + assert 'NM_001142730.2:c.234_239del' in list(results.keys()) assert results['NM_001142730.2:c.234_239del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001142730.2:c.234_239del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001142730.2:c.234_239del']['alt_genomic_loci'] == [] @@ -12953,10 +12953,10 @@ def test_variant263(self): def test_variant264(self): variant = '19-15291774-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000435.2:c.2992C>T' in results.keys() + assert 'NM_000435.2:c.2992C>T' in list(results.keys()) assert results['NM_000435.2:c.2992C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000435.2:c.2992C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000435.2:c.2992C>T']['alt_genomic_loci'] == [] @@ -12968,20 +12968,20 @@ def test_variant264(self): assert results['NM_000435.2:c.2992C>T']['hgvs_lrg_variant'] == '' assert results['NM_000435.2:c.2992C>T']['hgvs_transcript_variant'] == 'NM_000435.2:c.2992C>T' assert results['NM_000435.2:c.2992C>T']['hgvs_refseqgene_variant'] == 'NG_009819.1:g.25019C>T' - assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.15291774G>A', 'vcf': {'chr': 'chr19', 'ref': u'G', 'pos': '15291774', 'alt': u'A'}} - assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15180963G>A', 'vcf': {'chr': 'chr19', 'ref': u'G', 'pos': '15180963', 'alt': u'A'}} - assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.15291774G>A', 'vcf': {'chr': '19', 'ref': u'G', 'pos': '15291774', 'alt': u'A'}} - assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15180963G>A', 'vcf': {'chr': '19', 'ref': u'G', 'pos': '15180963', 'alt': u'A'}} + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.15291774G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '15291774', 'alt': 'A'}} + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15180963G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '15180963', 'alt': 'A'}} + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.15291774G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '15291774', 'alt': 'A'}} + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15180963G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '15180963', 'alt': 'A'}} assert results['NM_000435.2:c.2992C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009819.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000426.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000435.2'} def test_variant265(self): variant = '19-15311794-A-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'intergenic' - assert 'Intergenic_Variant_1' in results.keys() + assert 'Intergenic_Variant_1' in list(results.keys()) assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' assert results['Intergenic_Variant_1']['alt_genomic_loci'] == [] @@ -12993,20 +12993,20 @@ def test_variant265(self): assert results['Intergenic_Variant_1']['hgvs_lrg_variant'] == '' assert results['Intergenic_Variant_1']['hgvs_transcript_variant'] == '' assert results['Intergenic_Variant_1']['hgvs_refseqgene_variant'] == 'NG_009819.1:g.4999T>C' - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': u'NC_000019.9:g.15311794A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': u'NC_000019.10:g.15200983A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': u'NC_000019.9:g.15311794A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': u'NC_000019.10:g.15200983A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.15311794A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15200983A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.15311794A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15200983A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} assert results['Intergenic_Variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009819.1'} def test_variant266(self): variant = '19-39076592-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000540.2:c.14818G>A' in results.keys() + assert 'NM_000540.2:c.14818G>A' in list(results.keys()) assert results['NM_000540.2:c.14818G>A']['hgvs_lrg_transcript_variant'] == 'LRG_766t1:c.14818G>A' assert results['NM_000540.2:c.14818G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000540.2:c.14818G>A']['alt_genomic_loci'] == [] @@ -13024,7 +13024,7 @@ def test_variant266(self): assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '38585952', 'alt': 'A'}} assert results['NM_000540.2:c.14818G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008866.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000531.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000540.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_766.xml'} - assert 'NM_001042723.1:c.14803G>A' in results.keys() + assert 'NM_001042723.1:c.14803G>A' in list(results.keys()) assert results['NM_001042723.1:c.14803G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042723.1:c.14803G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042723.1:c.14803G>A']['alt_genomic_loci'] == [] @@ -13046,9 +13046,9 @@ def test_variant266(self): def test_variant267(self): variant = '2-50149352-T-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001330086.1:c.4245A>G' in results.keys() + assert 'NM_001330086.1:c.4245A>G' in list(results.keys()) assert results['NM_001330086.1:c.4245A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330086.1:c.4245A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330086.1:c.4245A>G']['alt_genomic_loci'] == [] @@ -13060,13 +13060,13 @@ def test_variant267(self): assert results['NM_001330086.1:c.4245A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330086.1:c.4245A>G']['hgvs_transcript_variant'] == 'NM_001330086.1:c.4245A>G' assert results['NM_001330086.1:c.4245A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330086.1:c.4245A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1'} - assert 'NM_001330083.1:c.4089A>G' in results.keys() + assert 'NM_001330083.1:c.4089A>G' in list(results.keys()) assert results['NM_001330083.1:c.4089A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330083.1:c.4089A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330083.1:c.4089A>G']['alt_genomic_loci'] == [] @@ -13078,13 +13078,13 @@ def test_variant267(self): assert results['NM_001330083.1:c.4089A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330083.1:c.4089A>G']['hgvs_transcript_variant'] == 'NM_001330083.1:c.4089A>G' assert results['NM_001330083.1:c.4089A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330083.1:c.4089A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1'} - assert 'NM_001330095.1:c.4113A>G' in results.keys() + assert 'NM_001330095.1:c.4113A>G' in list(results.keys()) assert results['NM_001330095.1:c.4113A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330095.1:c.4113A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330095.1:c.4113A>G']['alt_genomic_loci'] == [] @@ -13096,13 +13096,13 @@ def test_variant267(self): assert results['NM_001330095.1:c.4113A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330095.1:c.4113A>G']['hgvs_transcript_variant'] == 'NM_001330095.1:c.4113A>G' assert results['NM_001330095.1:c.4113A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330095.1:c.4113A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1'} - assert 'NM_138735.2:c.1059A>G' in results.keys() + assert 'NM_138735.2:c.1059A>G' in list(results.keys()) assert results['NM_138735.2:c.1059A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_138735.2:c.1059A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_138735.2:c.1059A>G']['alt_genomic_loci'] == [] @@ -13114,13 +13114,13 @@ def test_variant267(self): assert results['NM_138735.2:c.1059A>G']['hgvs_lrg_variant'] == '' assert results['NM_138735.2:c.1059A>G']['hgvs_transcript_variant'] == 'NM_138735.2:c.1059A>G' assert results['NM_138735.2:c.1059A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_138735.2:c.1059A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.2'} - assert 'NM_001330078.1:c.4254A>G' in results.keys() + assert 'NM_001330078.1:c.4254A>G' in list(results.keys()) assert results['NM_001330078.1:c.4254A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330078.1:c.4254A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330078.1:c.4254A>G']['alt_genomic_loci'] == [] @@ -13132,13 +13132,13 @@ def test_variant267(self): assert results['NM_001330078.1:c.4254A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330078.1:c.4254A>G']['hgvs_transcript_variant'] == 'NM_001330078.1:c.4254A>G' assert results['NM_001330078.1:c.4254A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330078.1:c.4254A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1'} - assert 'NM_001330094.1:c.4233A>G' in results.keys() + assert 'NM_001330094.1:c.4233A>G' in list(results.keys()) assert results['NM_001330094.1:c.4233A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330094.1:c.4233A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330094.1:c.4233A>G']['alt_genomic_loci'] == [] @@ -13150,13 +13150,13 @@ def test_variant267(self): assert results['NM_001330094.1:c.4233A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330094.1:c.4233A>G']['hgvs_transcript_variant'] == 'NM_001330094.1:c.4233A>G' assert results['NM_001330094.1:c.4233A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330094.1:c.4233A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1'} - assert 'NM_001320157.3:c.150A>G' in results.keys() + assert 'NM_001320157.3:c.150A>G' in list(results.keys()) assert results['NM_001320157.3:c.150A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001320157.3:c.150A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001320157.3:c.150A>G']['alt_genomic_loci'] == [] @@ -13168,13 +13168,13 @@ def test_variant267(self): assert results['NM_001320157.3:c.150A>G']['hgvs_lrg_variant'] == '' assert results['NM_001320157.3:c.150A>G']['hgvs_transcript_variant'] == 'NM_001320157.3:c.150A>G' assert results['NM_001320157.3:c.150A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001320157.3:c.150A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.3'} - assert 'NM_001330088.1:c.4074A>G' in results.keys() + assert 'NM_001330088.1:c.4074A>G' in list(results.keys()) assert results['NM_001330088.1:c.4074A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330088.1:c.4074A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330088.1:c.4074A>G']['alt_genomic_loci'] == [] @@ -13186,13 +13186,13 @@ def test_variant267(self): assert results['NM_001330088.1:c.4074A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330088.1:c.4074A>G']['hgvs_transcript_variant'] == 'NM_001330088.1:c.4074A>G' assert results['NM_001330088.1:c.4074A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330088.1:c.4074A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1'} - assert 'NM_001330092.1:c.1149A>G' in results.keys() + assert 'NM_001330092.1:c.1149A>G' in list(results.keys()) assert results['NM_001330092.1:c.1149A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330092.1:c.1149A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330092.1:c.1149A>G']['alt_genomic_loci'] == [] @@ -13204,13 +13204,13 @@ def test_variant267(self): assert results['NM_001330092.1:c.1149A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330092.1:c.1149A>G']['hgvs_transcript_variant'] == 'NM_001330092.1:c.1149A>G' assert results['NM_001330092.1:c.1149A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330092.1:c.1149A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317021.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330092.1'} - assert 'NM_138735.4:c.1059A>G' in results.keys() + assert 'NM_138735.4:c.1059A>G' in list(results.keys()) assert results['NM_138735.4:c.1059A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_138735.4:c.1059A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_138735.4:c.1059A>G']['alt_genomic_loci'] == [] @@ -13222,13 +13222,13 @@ def test_variant267(self): assert results['NM_138735.4:c.1059A>G']['hgvs_lrg_variant'] == '' assert results['NM_138735.4:c.1059A>G']['hgvs_transcript_variant'] == 'NM_138735.4:c.1059A>G' assert results['NM_138735.4:c.1059A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_138735.4:c.1059A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.4'} - assert 'NM_001330096.1:c.4044A>G' in results.keys() + assert 'NM_001330096.1:c.4044A>G' in list(results.keys()) assert results['NM_001330096.1:c.4044A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330096.1:c.4044A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330096.1:c.4044A>G']['alt_genomic_loci'] == [] @@ -13240,13 +13240,13 @@ def test_variant267(self): assert results['NM_001330096.1:c.4044A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330096.1:c.4044A>G']['hgvs_transcript_variant'] == 'NM_001330096.1:c.4044A>G' assert results['NM_001330096.1:c.4044A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330096.1:c.4044A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1'} - assert 'NM_001135659.2:c.4374A>G' in results.keys() + assert 'NM_001135659.2:c.4374A>G' in list(results.keys()) assert results['NM_001135659.2:c.4374A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135659.2:c.4374A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001135659.2:c.4374A>G']['alt_genomic_loci'] == [] @@ -13258,13 +13258,13 @@ def test_variant267(self): assert results['NM_001135659.2:c.4374A>G']['hgvs_lrg_variant'] == '' assert results['NM_001135659.2:c.4374A>G']['hgvs_transcript_variant'] == 'NM_001135659.2:c.4374A>G' assert results['NM_001135659.2:c.4374A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001135659.2:c.4374A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2'} - assert 'NM_001330085.1:c.4227A>G' in results.keys() + assert 'NM_001330085.1:c.4227A>G' in list(results.keys()) assert results['NM_001330085.1:c.4227A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330085.1:c.4227A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330085.1:c.4227A>G']['alt_genomic_loci'] == [] @@ -13276,13 +13276,13 @@ def test_variant267(self): assert results['NM_001330085.1:c.4227A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330085.1:c.4227A>G']['hgvs_transcript_variant'] == 'NM_001330085.1:c.4227A>G' assert results['NM_001330085.1:c.4227A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330085.1:c.4227A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1'} - assert 'NM_001320156.1:c.159A>G' in results.keys() + assert 'NM_001320156.1:c.159A>G' in list(results.keys()) assert results['NM_001320156.1:c.159A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001320156.1:c.159A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001320156.1:c.159A>G']['alt_genomic_loci'] == [] @@ -13294,13 +13294,13 @@ def test_variant267(self): assert results['NM_001320156.1:c.159A>G']['hgvs_lrg_variant'] == '' assert results['NM_001320156.1:c.159A>G']['hgvs_transcript_variant'] == 'NM_001320156.1:c.159A>G' assert results['NM_001320156.1:c.159A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001320156.1:c.159A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.1'} - assert 'NM_001330077.1:c.4230A>G' in results.keys() + assert 'NM_001330077.1:c.4230A>G' in list(results.keys()) assert results['NM_001330077.1:c.4230A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330077.1:c.4230A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330077.1:c.4230A>G']['alt_genomic_loci'] == [] @@ -13312,13 +13312,13 @@ def test_variant267(self): assert results['NM_001330077.1:c.4230A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330077.1:c.4230A>G']['hgvs_transcript_variant'] == 'NM_001330077.1:c.4230A>G' assert results['NM_001330077.1:c.4230A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330077.1:c.4230A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1'} - assert 'NM_001330093.1:c.4251A>G' in results.keys() + assert 'NM_001330093.1:c.4251A>G' in list(results.keys()) assert results['NM_001330093.1:c.4251A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330093.1:c.4251A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330093.1:c.4251A>G']['alt_genomic_loci'] == [] @@ -13330,13 +13330,13 @@ def test_variant267(self): assert results['NM_001330093.1:c.4251A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330093.1:c.4251A>G']['hgvs_transcript_variant'] == 'NM_001330093.1:c.4251A>G' assert results['NM_001330093.1:c.4251A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330093.1:c.4251A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1'} - assert 'NM_001135659.1:c.4374A>G' in results.keys() + assert 'NM_001135659.1:c.4374A>G' in list(results.keys()) assert results['NM_001135659.1:c.4374A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135659.1:c.4374A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001135659.1:c.4374A>G']['alt_genomic_loci'] == [] @@ -13348,13 +13348,13 @@ def test_variant267(self): assert results['NM_001135659.1:c.4374A>G']['hgvs_lrg_variant'] == '' assert results['NM_001135659.1:c.4374A>G']['hgvs_transcript_variant'] == 'NM_001135659.1:c.4374A>G' assert results['NM_001135659.1:c.4374A>G']['hgvs_refseqgene_variant'] == 'NG_011878.1:g.1115323A>G' - assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001135659.1:c.4374A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011878.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1'} - assert 'NM_001320157.1:c.150A>G' in results.keys() + assert 'NM_001320157.1:c.150A>G' in list(results.keys()) assert results['NM_001320157.1:c.150A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001320157.1:c.150A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001320157.1:c.150A>G']['alt_genomic_loci'] == [] @@ -13366,13 +13366,13 @@ def test_variant267(self): assert results['NM_001320157.1:c.150A>G']['hgvs_lrg_variant'] == '' assert results['NM_001320157.1:c.150A>G']['hgvs_transcript_variant'] == 'NM_001320157.1:c.150A>G' assert results['NM_001320157.1:c.150A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001320157.1:c.150A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.1'} - assert 'NM_001330084.1:c.4188A>G' in results.keys() + assert 'NM_001330084.1:c.4188A>G' in list(results.keys()) assert results['NM_001330084.1:c.4188A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330084.1:c.4188A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330084.1:c.4188A>G']['alt_genomic_loci'] == [] @@ -13384,13 +13384,13 @@ def test_variant267(self): assert results['NM_001330084.1:c.4188A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330084.1:c.4188A>G']['hgvs_transcript_variant'] == 'NM_001330084.1:c.4188A>G' assert results['NM_001330084.1:c.4188A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330084.1:c.4188A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1'} - assert 'NM_004801.4:c.4164A>G' in results.keys() + assert 'NM_004801.4:c.4164A>G' in list(results.keys()) assert results['NM_004801.4:c.4164A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004801.4:c.4164A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004801.4:c.4164A>G']['alt_genomic_loci'] == [] @@ -13402,13 +13402,13 @@ def test_variant267(self): assert results['NM_004801.4:c.4164A>G']['hgvs_lrg_variant'] == '' assert results['NM_004801.4:c.4164A>G']['hgvs_transcript_variant'] == 'NM_004801.4:c.4164A>G' assert results['NM_004801.4:c.4164A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_004801.4:c.4164A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4'} - assert 'NM_001330082.1:c.4221A>G' in results.keys() + assert 'NM_001330082.1:c.4221A>G' in list(results.keys()) assert results['NM_001330082.1:c.4221A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330082.1:c.4221A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330082.1:c.4221A>G']['alt_genomic_loci'] == [] @@ -13420,14 +13420,14 @@ def test_variant267(self): assert results['NM_001330082.1:c.4221A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330082.1:c.4221A>G']['hgvs_transcript_variant'] == 'NM_001330082.1:c.4221A>G' assert results['NM_001330082.1:c.4221A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330082.1:c.4221A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001330091.1:c.1140A>G' in results.keys() + assert 'NM_001330091.1:c.1140A>G' in list(results.keys()) assert results['NM_001330091.1:c.1140A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330091.1:c.1140A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330091.1:c.1140A>G']['alt_genomic_loci'] == [] @@ -13439,13 +13439,13 @@ def test_variant267(self): assert results['NM_001330091.1:c.1140A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330091.1:c.1140A>G']['hgvs_transcript_variant'] == 'NM_001330091.1:c.1140A>G' assert results['NM_001330091.1:c.1140A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330091.1:c.1140A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317020.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330091.1'} - assert 'NM_001320156.3:c.159A>G' in results.keys() + assert 'NM_001320156.3:c.159A>G' in list(results.keys()) assert results['NM_001320156.3:c.159A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001320156.3:c.159A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001320156.3:c.159A>G']['alt_genomic_loci'] == [] @@ -13457,13 +13457,13 @@ def test_variant267(self): assert results['NM_001320156.3:c.159A>G']['hgvs_lrg_variant'] == '' assert results['NM_001320156.3:c.159A>G']['hgvs_transcript_variant'] == 'NM_001320156.3:c.159A>G' assert results['NM_001320156.3:c.159A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001320156.3:c.159A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.3'} - assert 'NM_001330087.1:c.4053A>G' in results.keys() + assert 'NM_001330087.1:c.4053A>G' in list(results.keys()) assert results['NM_001330087.1:c.4053A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330087.1:c.4053A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330087.1:c.4053A>G']['alt_genomic_loci'] == [] @@ -13475,13 +13475,13 @@ def test_variant267(self): assert results['NM_001330087.1:c.4053A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330087.1:c.4053A>G']['hgvs_transcript_variant'] == 'NM_001330087.1:c.4053A>G' assert results['NM_001330087.1:c.4053A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330087.1:c.4053A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1'} - assert 'NM_001330097.1:c.1050A>G' in results.keys() + assert 'NM_001330097.1:c.1050A>G' in list(results.keys()) assert results['NM_001330097.1:c.1050A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330097.1:c.1050A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330097.1:c.1050A>G']['alt_genomic_loci'] == [] @@ -13493,13 +13493,13 @@ def test_variant267(self): assert results['NM_001330097.1:c.1050A>G']['hgvs_lrg_variant'] == '' assert results['NM_001330097.1:c.1050A>G']['hgvs_transcript_variant'] == 'NM_001330097.1:c.1050A>G' assert results['NM_001330097.1:c.1050A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_001330097.1:c.1050A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317026.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330097.1'} - assert 'NM_004801.5:c.4164A>G' in results.keys() + assert 'NM_004801.5:c.4164A>G' in list(results.keys()) assert results['NM_004801.5:c.4164A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004801.5:c.4164A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004801.5:c.4164A>G']['alt_genomic_loci'] == [] @@ -13511,19 +13511,19 @@ def test_variant267(self): assert results['NM_004801.5:c.4164A>G']['hgvs_lrg_variant'] == '' assert results['NM_004801.5:c.4164A>G']['hgvs_transcript_variant'] == 'NM_004801.5:c.4164A>G' assert results['NM_004801.5:c.4164A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} - assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '50149352', 'alt': u'C'}} - assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': u'T', 'pos': '49922214', 'alt': u'C'}} + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} assert results['NM_004801.5:c.4164A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5'} def test_variant268(self): variant = '2-50847195-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001330096.1:c.1201C>T' in results.keys() + assert 'NM_001330096.1:c.1201C>T' in list(results.keys()) assert results['NM_001330096.1:c.1201C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330096.1:c.1201C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330096.1:c.1201C>T']['alt_genomic_loci'] == [] @@ -13535,13 +13535,13 @@ def test_variant268(self): assert results['NM_001330096.1:c.1201C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330096.1:c.1201C>T']['hgvs_transcript_variant'] == 'NM_001330096.1:c.1201C>T' assert results['NM_001330096.1:c.1201C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330096.1:c.1201C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1'} - assert 'NM_001330084.1:c.1246C>T' in results.keys() + assert 'NM_001330084.1:c.1246C>T' in list(results.keys()) assert results['NM_001330084.1:c.1246C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330084.1:c.1246C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330084.1:c.1246C>T']['alt_genomic_loci'] == [] @@ -13553,13 +13553,13 @@ def test_variant268(self): assert results['NM_001330084.1:c.1246C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330084.1:c.1246C>T']['hgvs_transcript_variant'] == 'NM_001330084.1:c.1246C>T' assert results['NM_001330084.1:c.1246C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330084.1:c.1246C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1'} - assert 'NM_001330077.1:c.1261C>T' in results.keys() + assert 'NM_001330077.1:c.1261C>T' in list(results.keys()) assert results['NM_001330077.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330077.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330077.1:c.1261C>T']['alt_genomic_loci'] == [] @@ -13571,13 +13571,13 @@ def test_variant268(self): assert results['NM_001330077.1:c.1261C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330077.1:c.1261C>T']['hgvs_transcript_variant'] == 'NM_001330077.1:c.1261C>T' assert results['NM_001330077.1:c.1261C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330077.1:c.1261C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1'} - assert 'NM_001330086.1:c.1285C>T' in results.keys() + assert 'NM_001330086.1:c.1285C>T' in list(results.keys()) assert results['NM_001330086.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330086.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330086.1:c.1285C>T']['alt_genomic_loci'] == [] @@ -13589,13 +13589,13 @@ def test_variant268(self): assert results['NM_001330086.1:c.1285C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330086.1:c.1285C>T']['hgvs_transcript_variant'] == 'NM_001330086.1:c.1285C>T' assert results['NM_001330086.1:c.1285C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330086.1:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1'} - assert 'NM_001330088.1:c.1231C>T' in results.keys() + assert 'NM_001330088.1:c.1231C>T' in list(results.keys()) assert results['NM_001330088.1:c.1231C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330088.1:c.1231C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330088.1:c.1231C>T']['alt_genomic_loci'] == [] @@ -13607,13 +13607,13 @@ def test_variant268(self): assert results['NM_001330088.1:c.1231C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330088.1:c.1231C>T']['hgvs_transcript_variant'] == 'NM_001330088.1:c.1231C>T' assert results['NM_001330088.1:c.1231C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330088.1:c.1231C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1'} - assert 'NM_001330093.1:c.1282C>T' in results.keys() + assert 'NM_001330093.1:c.1282C>T' in list(results.keys()) assert results['NM_001330093.1:c.1282C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330093.1:c.1282C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330093.1:c.1282C>T']['alt_genomic_loci'] == [] @@ -13625,13 +13625,13 @@ def test_variant268(self): assert results['NM_001330093.1:c.1282C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330093.1:c.1282C>T']['hgvs_transcript_variant'] == 'NM_001330093.1:c.1282C>T' assert results['NM_001330093.1:c.1282C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330093.1:c.1282C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1'} - assert 'NM_001330087.1:c.1201C>T' in results.keys() + assert 'NM_001330087.1:c.1201C>T' in list(results.keys()) assert results['NM_001330087.1:c.1201C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330087.1:c.1201C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330087.1:c.1201C>T']['alt_genomic_loci'] == [] @@ -13643,13 +13643,13 @@ def test_variant268(self): assert results['NM_001330087.1:c.1201C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330087.1:c.1201C>T']['hgvs_transcript_variant'] == 'NM_001330087.1:c.1201C>T' assert results['NM_001330087.1:c.1201C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330087.1:c.1201C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1'} - assert 'NM_001330082.1:c.1261C>T' in results.keys() + assert 'NM_001330082.1:c.1261C>T' in list(results.keys()) assert results['NM_001330082.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330082.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330082.1:c.1261C>T']['alt_genomic_loci'] == [] @@ -13661,13 +13661,13 @@ def test_variant268(self): assert results['NM_001330082.1:c.1261C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330082.1:c.1261C>T']['hgvs_transcript_variant'] == 'NM_001330082.1:c.1261C>T' assert results['NM_001330082.1:c.1261C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330082.1:c.1261C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1'} - assert 'NM_001330078.1:c.1285C>T' in results.keys() + assert 'NM_001330078.1:c.1285C>T' in list(results.keys()) assert results['NM_001330078.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330078.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330078.1:c.1285C>T']['alt_genomic_loci'] == [] @@ -13679,13 +13679,13 @@ def test_variant268(self): assert results['NM_001330078.1:c.1285C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330078.1:c.1285C>T']['hgvs_transcript_variant'] == 'NM_001330078.1:c.1285C>T' assert results['NM_001330078.1:c.1285C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330078.1:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1'} - assert 'NM_001330094.1:c.1273C>T' in results.keys() + assert 'NM_001330094.1:c.1273C>T' in list(results.keys()) assert results['NM_001330094.1:c.1273C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330094.1:c.1273C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330094.1:c.1273C>T']['alt_genomic_loci'] == [] @@ -13697,14 +13697,14 @@ def test_variant268(self): assert results['NM_001330094.1:c.1273C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330094.1:c.1273C>T']['hgvs_transcript_variant'] == 'NM_001330094.1:c.1273C>T' assert results['NM_001330094.1:c.1273C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330094.1:c.1273C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001135659.2:c.1405C>T' in results.keys() + assert 'NM_001135659.2:c.1405C>T' in list(results.keys()) assert results['NM_001135659.2:c.1405C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135659.2:c.1405C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001135659.2:c.1405C>T']['alt_genomic_loci'] == [] @@ -13716,13 +13716,13 @@ def test_variant268(self): assert results['NM_001135659.2:c.1405C>T']['hgvs_lrg_variant'] == '' assert results['NM_001135659.2:c.1405C>T']['hgvs_transcript_variant'] == 'NM_001135659.2:c.1405C>T' assert results['NM_001135659.2:c.1405C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001135659.2:c.1405C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2'} - assert 'NM_001330083.1:c.1246C>T' in results.keys() + assert 'NM_001330083.1:c.1246C>T' in list(results.keys()) assert results['NM_001330083.1:c.1246C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330083.1:c.1246C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330083.1:c.1246C>T']['alt_genomic_loci'] == [] @@ -13734,13 +13734,13 @@ def test_variant268(self): assert results['NM_001330083.1:c.1246C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330083.1:c.1246C>T']['hgvs_transcript_variant'] == 'NM_001330083.1:c.1246C>T' assert results['NM_001330083.1:c.1246C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330083.1:c.1246C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1'} - assert 'NM_004801.5:c.1285C>T' in results.keys() + assert 'NM_004801.5:c.1285C>T' in list(results.keys()) assert results['NM_004801.5:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004801.5:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004801.5:c.1285C>T']['alt_genomic_loci'] == [] @@ -13752,13 +13752,13 @@ def test_variant268(self): assert results['NM_004801.5:c.1285C>T']['hgvs_lrg_variant'] == '' assert results['NM_004801.5:c.1285C>T']['hgvs_transcript_variant'] == 'NM_004801.5:c.1285C>T' assert results['NM_004801.5:c.1285C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_004801.5:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5'} - assert 'NM_001330085.1:c.1285C>T' in results.keys() + assert 'NM_001330085.1:c.1285C>T' in list(results.keys()) assert results['NM_001330085.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330085.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330085.1:c.1285C>T']['alt_genomic_loci'] == [] @@ -13770,13 +13770,13 @@ def test_variant268(self): assert results['NM_001330085.1:c.1285C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330085.1:c.1285C>T']['hgvs_transcript_variant'] == 'NM_001330085.1:c.1285C>T' assert results['NM_001330085.1:c.1285C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330085.1:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1'} - assert 'NM_001330095.1:c.1261C>T' in results.keys() + assert 'NM_001330095.1:c.1261C>T' in list(results.keys()) assert results['NM_001330095.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330095.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330095.1:c.1261C>T']['alt_genomic_loci'] == [] @@ -13788,13 +13788,13 @@ def test_variant268(self): assert results['NM_001330095.1:c.1261C>T']['hgvs_lrg_variant'] == '' assert results['NM_001330095.1:c.1261C>T']['hgvs_transcript_variant'] == 'NM_001330095.1:c.1261C>T' assert results['NM_001330095.1:c.1261C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001330095.1:c.1261C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1'} - assert 'NM_004801.4:c.1285C>T' in results.keys() + assert 'NM_004801.4:c.1285C>T' in list(results.keys()) assert results['NM_004801.4:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004801.4:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004801.4:c.1285C>T']['alt_genomic_loci'] == [] @@ -13806,13 +13806,13 @@ def test_variant268(self): assert results['NM_004801.4:c.1285C>T']['hgvs_lrg_variant'] == '' assert results['NM_004801.4:c.1285C>T']['hgvs_transcript_variant'] == 'NM_004801.4:c.1285C>T' assert results['NM_004801.4:c.1285C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_004801.4:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4'} - assert 'NM_001135659.1:c.1405C>T' in results.keys() + assert 'NM_001135659.1:c.1405C>T' in list(results.keys()) assert results['NM_001135659.1:c.1405C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135659.1:c.1405C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001135659.1:c.1405C>T']['alt_genomic_loci'] == [] @@ -13824,19 +13824,19 @@ def test_variant268(self): assert results['NM_001135659.1:c.1405C>T']['hgvs_lrg_variant'] == '' assert results['NM_001135659.1:c.1405C>T']['hgvs_transcript_variant'] == 'NM_001135659.1:c.1405C>T' assert results['NM_001135659.1:c.1405C>T']['hgvs_refseqgene_variant'] == 'NG_011878.1:g.417480C>T' - assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} - assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50847195', 'alt': u'A'}} - assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '50620057', 'alt': u'A'}} + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} assert results['NM_001135659.1:c.1405C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011878.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1'} def test_variant269(self): variant = '2-71825797-C-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001130976.1:c.3582C>G' in results.keys() + assert 'NM_001130976.1:c.3582C>G' in list(results.keys()) assert results['NM_001130976.1:c.3582C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130976.1:c.3582C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130976.1:c.3582C>G']['alt_genomic_loci'] == [] @@ -13854,7 +13854,7 @@ def test_variant269(self): assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_001130976.1:c.3582C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124448.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130976.1'} - assert 'NM_001130981.1:c.3675C>G' in results.keys() + assert 'NM_001130981.1:c.3675C>G' in list(results.keys()) assert results['NM_001130981.1:c.3675C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130981.1:c.3675C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130981.1:c.3675C>G']['alt_genomic_loci'] == [] @@ -13872,7 +13872,7 @@ def test_variant269(self): assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_001130981.1:c.3675C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124453.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130981.1'} - assert 'NM_001130979.1:c.3717C>G' in results.keys() + assert 'NM_001130979.1:c.3717C>G' in list(results.keys()) assert results['NM_001130979.1:c.3717C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130979.1:c.3717C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130979.1:c.3717C>G']['alt_genomic_loci'] == [] @@ -13890,7 +13890,7 @@ def test_variant269(self): assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_001130979.1:c.3717C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124451.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130979.1'} - assert 'NM_001130985.1:c.3678C>G' in results.keys() + assert 'NM_001130985.1:c.3678C>G' in list(results.keys()) assert results['NM_001130985.1:c.3678C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130985.1:c.3678C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130985.1:c.3678C>G']['alt_genomic_loci'] == [] @@ -13908,7 +13908,7 @@ def test_variant269(self): assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_001130985.1:c.3678C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124457.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130985.1'} - assert 'NM_001130987.1:c.3678C>G' in results.keys() + assert 'NM_001130987.1:c.3678C>G' in list(results.keys()) assert results['NM_001130987.1:c.3678C>G']['hgvs_lrg_transcript_variant'] == 'LRG_845t2:c.3678C>G' assert results['NM_001130987.1:c.3678C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130987.1:c.3678C>G']['alt_genomic_loci'] == [] @@ -13926,7 +13926,7 @@ def test_variant269(self): assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_001130987.1:c.3678C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124459.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130987.1'} - assert 'NM_001130983.1:c.3627C>G' in results.keys() + assert 'NM_001130983.1:c.3627C>G' in list(results.keys()) assert results['NM_001130983.1:c.3627C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130983.1:c.3627C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130983.1:c.3627C>G']['alt_genomic_loci'] == [] @@ -13945,7 +13945,7 @@ def test_variant269(self): assert results['NM_001130983.1:c.3627C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124455.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130983.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001130980.1:c.3675C>G' in results.keys() + assert 'NM_001130980.1:c.3675C>G' in list(results.keys()) assert results['NM_001130980.1:c.3675C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130980.1:c.3675C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130980.1:c.3675C>G']['alt_genomic_loci'] == [] @@ -13963,7 +13963,7 @@ def test_variant269(self): assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_001130980.1:c.3675C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124452.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130980.1'} - assert 'NM_003494.3:c.3624C>G' in results.keys() + assert 'NM_003494.3:c.3624C>G' in list(results.keys()) assert results['NM_003494.3:c.3624C>G']['hgvs_lrg_transcript_variant'] == 'LRG_845t1:c.3624C>G' assert results['NM_003494.3:c.3624C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003494.3:c.3624C>G']['alt_genomic_loci'] == [] @@ -13981,7 +13981,7 @@ def test_variant269(self): assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_003494.3:c.3624C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003485.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003494.3'} - assert 'NM_001130984.1:c.3585C>G' in results.keys() + assert 'NM_001130984.1:c.3585C>G' in list(results.keys()) assert results['NM_001130984.1:c.3585C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130984.1:c.3585C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130984.1:c.3585C>G']['alt_genomic_loci'] == [] @@ -13999,7 +13999,7 @@ def test_variant269(self): assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_001130984.1:c.3585C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124456.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130984.1'} - assert 'NM_001130977.1:c.3582C>G' in results.keys() + assert 'NM_001130977.1:c.3582C>G' in list(results.keys()) assert results['NM_001130977.1:c.3582C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130977.1:c.3582C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130977.1:c.3582C>G']['alt_genomic_loci'] == [] @@ -14017,7 +14017,7 @@ def test_variant269(self): assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_001130977.1:c.3582C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124449.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130977.1'} - assert 'NM_001130455.1:c.3627C>G' in results.keys() + assert 'NM_001130455.1:c.3627C>G' in list(results.keys()) assert results['NM_001130455.1:c.3627C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130455.1:c.3627C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130455.1:c.3627C>G']['alt_genomic_loci'] == [] @@ -14035,7 +14035,7 @@ def test_variant269(self): assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_001130455.1:c.3627C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001123927.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130455.1'} - assert 'NM_001130978.1:c.3624C>G' in results.keys() + assert 'NM_001130978.1:c.3624C>G' in list(results.keys()) assert results['NM_001130978.1:c.3624C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130978.1:c.3624C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130978.1:c.3624C>G']['alt_genomic_loci'] == [] @@ -14053,7 +14053,7 @@ def test_variant269(self): assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_001130978.1:c.3624C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008694.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124450.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130978.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_845.xml'} - assert 'NM_001130982.1:c.3720C>G' in results.keys() + assert 'NM_001130982.1:c.3720C>G' in list(results.keys()) assert results['NM_001130982.1:c.3720C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130982.1:c.3720C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130982.1:c.3720C>G']['alt_genomic_loci'] == [] @@ -14071,7 +14071,7 @@ def test_variant269(self): assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} assert results['NM_001130982.1:c.3720C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124454.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130982.1'} - assert 'NM_001130986.1:c.3585C>G' in results.keys() + assert 'NM_001130986.1:c.3585C>G' in list(results.keys()) assert results['NM_001130986.1:c.3585C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130986.1:c.3585C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130986.1:c.3585C>G']['alt_genomic_loci'] == [] @@ -14093,9 +14093,9 @@ def test_variant269(self): def test_variant270(self): variant = '2-166179712-G-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_021007.2:c.1718G>C' in results.keys() + assert 'NM_021007.2:c.1718G>C' in list(results.keys()) assert results['NM_021007.2:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021007.2:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021007.2:c.1718G>C']['alt_genomic_loci'] == [] @@ -14114,7 +14114,7 @@ def test_variant270(self): assert results['NM_021007.2:c.1718G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001040143.1:c.1718G>C' in results.keys() + assert 'NM_001040143.1:c.1718G>C' in list(results.keys()) assert results['NM_001040143.1:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040143.1:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040143.1:c.1718G>C']['alt_genomic_loci'] == [] @@ -14132,7 +14132,7 @@ def test_variant270(self): assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} assert results['NM_001040143.1:c.1718G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1'} - assert 'NM_001040142.1:c.1718G>C' in results.keys() + assert 'NM_001040142.1:c.1718G>C' in list(results.keys()) assert results['NM_001040142.1:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040142.1:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040142.1:c.1718G>C']['alt_genomic_loci'] == [] @@ -14154,9 +14154,9 @@ def test_variant270(self): def test_variant271(self): variant = '2-166183371-A-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_021007.2:c.2026A>G' in results.keys() + assert 'NM_021007.2:c.2026A>G' in list(results.keys()) assert results['NM_021007.2:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021007.2:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021007.2:c.2026A>G']['alt_genomic_loci'] == [] @@ -14175,7 +14175,7 @@ def test_variant271(self): assert results['NM_021007.2:c.2026A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001040143.1:c.2026A>G' in results.keys() + assert 'NM_001040143.1:c.2026A>G' in list(results.keys()) assert results['NM_001040143.1:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040143.1:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040143.1:c.2026A>G']['alt_genomic_loci'] == [] @@ -14193,7 +14193,7 @@ def test_variant271(self): assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} assert results['NM_001040143.1:c.2026A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1'} - assert 'NM_001040142.1:c.2026A>G' in results.keys() + assert 'NM_001040142.1:c.2026A>G' in list(results.keys()) assert results['NM_001040142.1:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040142.1:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040142.1:c.2026A>G']['alt_genomic_loci'] == [] @@ -14215,9 +14215,9 @@ def test_variant271(self): def test_variant272(self): variant = '2-166929889-GTCCAGGTCCT-GAC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001353951.1:c.233_242delinsGT' in results.keys() + assert 'NM_001353951.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353951.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353951.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14229,13 +14229,13 @@ def test_variant272(self): assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353951.1:c.233_242delinsGT' assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001353951.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1'} - assert 'NM_001353958.1:c.233_242delinsGT' in results.keys() + assert 'NM_001353958.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353958.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353958.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14247,13 +14247,13 @@ def test_variant272(self): assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353958.1:c.233_242delinsGT' assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001353958.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1'} - assert 'NM_001202435.1:c.233_242delinsGT' in results.keys() + assert 'NM_001202435.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001202435.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001202435.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14265,13 +14265,13 @@ def test_variant272(self): assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001202435.1:c.233_242delinsGT' assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001202435.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1'} - assert 'NR_148667.1:n.638_647delinsGT' in results.keys() + assert 'NR_148667.1:n.638_647delinsGT' in list(results.keys()) assert results['NR_148667.1:n.638_647delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NR_148667.1:n.638_647delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NR_148667.1:n.638_647delinsGT']['alt_genomic_loci'] == [] @@ -14283,13 +14283,13 @@ def test_variant272(self): assert results['NR_148667.1:n.638_647delinsGT']['hgvs_lrg_variant'] == '' assert results['NR_148667.1:n.638_647delinsGT']['hgvs_transcript_variant'] == 'NR_148667.1:n.638_647delinsGT' assert results['NR_148667.1:n.638_647delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NR_148667.1:n.638_647delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1'} - assert 'NM_001165964.1:c.233_242delinsGT' in results.keys() + assert 'NM_001165964.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165964.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001165964.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14301,13 +14301,13 @@ def test_variant272(self): assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165964.1:c.233_242delinsGT' assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001165964.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1'} - assert 'NM_001202435.2:c.233_242delinsGT' in results.keys() + assert 'NM_001202435.2:c.233_242delinsGT' in list(results.keys()) assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001202435.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001202435.2:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14319,13 +14319,13 @@ def test_variant272(self): assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001202435.2:c.233_242delinsGT' assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001202435.2:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2'} - assert 'NM_006920.5:c.233_242delinsGT' in results.keys() + assert 'NM_006920.5:c.233_242delinsGT' in list(results.keys()) assert results['NM_006920.5:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006920.5:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006920.5:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14337,13 +14337,13 @@ def test_variant272(self): assert results['NM_006920.5:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_006920.5:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_006920.5:c.233_242delinsGT' assert results['NM_006920.5:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_006920.5:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5'} - assert 'NM_001165963.1:c.233_242delinsGT' in results.keys() + assert 'NM_001165963.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165963.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001165963.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14355,13 +14355,13 @@ def test_variant272(self): assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165963.1:c.233_242delinsGT' assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001165963.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1'} - assert 'NM_001353955.1:c.233_242delinsGT' in results.keys() + assert 'NM_001353955.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353955.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353955.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14373,13 +14373,13 @@ def test_variant272(self): assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353955.1:c.233_242delinsGT' assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001353955.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1'} - assert 'NM_001353961.1:c.-2193_-2184delinsGT' in results.keys() + assert 'NM_001353961.1:c.-2193_-2184delinsGT' in list(results.keys()) assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353961.1:c.-2193_-2184delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353961.1:c.-2193_-2184delinsGT']['alt_genomic_loci'] == [] @@ -14391,13 +14391,13 @@ def test_variant272(self): assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_transcript_variant'] == 'NM_001353961.1:c.-2193_-2184delinsGT' assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001353961.1:c.-2193_-2184delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1'} - assert 'NM_001165963.2:c.233_242delinsGT' in results.keys() + assert 'NM_001165963.2:c.233_242delinsGT' in list(results.keys()) assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165963.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001165963.2:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14409,13 +14409,13 @@ def test_variant272(self): assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165963.2:c.233_242delinsGT' assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001165963.2:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2'} - assert 'NM_001353950.1:c.233_242delinsGT' in results.keys() + assert 'NM_001353950.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353950.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353950.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14427,14 +14427,14 @@ def test_variant272(self): assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353950.1:c.233_242delinsGT' assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001353950.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001353948.1:c.233_242delinsGT' in results.keys() + assert 'NM_001353948.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353948.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353948.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14446,13 +14446,13 @@ def test_variant272(self): assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353948.1:c.233_242delinsGT' assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001353948.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1'} - assert 'NM_001353949.1:c.233_242delinsGT' in results.keys() + assert 'NM_001353949.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353949.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353949.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14464,13 +14464,13 @@ def test_variant272(self): assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353949.1:c.233_242delinsGT' assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001353949.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1'} - assert 'NM_001353957.1:c.233_242delinsGT' in results.keys() + assert 'NM_001353957.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353957.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353957.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14482,13 +14482,13 @@ def test_variant272(self): assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353957.1:c.233_242delinsGT' assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001353957.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1'} - assert 'NM_001353952.1:c.233_242delinsGT' in results.keys() + assert 'NM_001353952.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353952.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353952.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14500,13 +14500,13 @@ def test_variant272(self): assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353952.1:c.233_242delinsGT' assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001353952.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1'} - assert 'NM_001353954.1:c.233_242delinsGT' in results.keys() + assert 'NM_001353954.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353954.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353954.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14518,13 +14518,13 @@ def test_variant272(self): assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353954.1:c.233_242delinsGT' assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001353954.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1'} - assert 'NM_006920.4:c.233_242delinsGT' in results.keys() + assert 'NM_006920.4:c.233_242delinsGT' in list(results.keys()) assert results['NM_006920.4:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == 'LRG_8t1:c.233_242delinsGT' assert results['NM_006920.4:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006920.4:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14536,13 +14536,13 @@ def test_variant272(self): assert results['NM_006920.4:c.233_242delinsGT']['hgvs_lrg_variant'] == 'LRG_8:g.5251_5260delinsGT' assert results['NM_006920.4:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_006920.4:c.233_242delinsGT' assert results['NM_006920.4:c.233_242delinsGT']['hgvs_refseqgene_variant'] == 'NG_011906.1:g.5251_5260delinsGT' - assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_006920.4:c.233_242delinsGT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011906.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_8.xml'} - assert 'NM_001353960.1:c.233_242delinsGT' in results.keys() + assert 'NM_001353960.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353960.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353960.1:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14554,13 +14554,13 @@ def test_variant272(self): assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353960.1:c.233_242delinsGT' assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001353960.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1'} - assert 'NM_001165964.2:c.233_242delinsGT' in results.keys() + assert 'NM_001165964.2:c.233_242delinsGT' in list(results.keys()) assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165964.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001165964.2:c.233_242delinsGT']['alt_genomic_loci'] == [] @@ -14572,19 +14572,19 @@ def test_variant272(self): assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165964.2:c.233_242delinsGT' assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} - assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': u'AC'}} - assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': u'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} assert results['NM_001165964.2:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2'} def test_variant273(self): variant = '2-166929891-CCAGGTCCT-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NR_148667.1:n.638_645del' in results.keys() + assert 'NR_148667.1:n.638_645del' in list(results.keys()) assert results['NR_148667.1:n.638_645del']['hgvs_lrg_transcript_variant'] == '' assert results['NR_148667.1:n.638_645del']['refseqgene_context_intronic_sequence'] == '' assert results['NR_148667.1:n.638_645del']['alt_genomic_loci'] == [] @@ -14602,7 +14602,7 @@ def test_variant273(self): assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NR_148667.1:n.638_645del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1'} - assert 'NM_001165964.2:c.233_240del' in results.keys() + assert 'NM_001165964.2:c.233_240del' in list(results.keys()) assert results['NM_001165964.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165964.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001165964.2:c.233_240del']['alt_genomic_loci'] == [] @@ -14620,7 +14620,7 @@ def test_variant273(self): assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001165964.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2'} - assert 'NM_001353951.1:c.233_240del' in results.keys() + assert 'NM_001353951.1:c.233_240del' in list(results.keys()) assert results['NM_001353951.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353951.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353951.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14638,7 +14638,7 @@ def test_variant273(self): assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353951.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1'} - assert 'NM_001353954.1:c.233_240del' in results.keys() + assert 'NM_001353954.1:c.233_240del' in list(results.keys()) assert results['NM_001353954.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353954.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353954.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14656,7 +14656,7 @@ def test_variant273(self): assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353954.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1'} - assert 'NM_001353961.1:c.-2193_-2186del' in results.keys() + assert 'NM_001353961.1:c.-2193_-2186del' in list(results.keys()) assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353961.1:c.-2193_-2186del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353961.1:c.-2193_-2186del']['alt_genomic_loci'] == [] @@ -14674,7 +14674,7 @@ def test_variant273(self): assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353961.1:c.-2193_-2186del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1'} - assert 'NM_001353948.1:c.233_240del' in results.keys() + assert 'NM_001353948.1:c.233_240del' in list(results.keys()) assert results['NM_001353948.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353948.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353948.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14692,7 +14692,7 @@ def test_variant273(self): assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353948.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1'} - assert 'NM_001353960.1:c.233_240del' in results.keys() + assert 'NM_001353960.1:c.233_240del' in list(results.keys()) assert results['NM_001353960.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353960.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353960.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14710,7 +14710,7 @@ def test_variant273(self): assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353960.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1'} - assert 'NM_001202435.1:c.233_240del' in results.keys() + assert 'NM_001202435.1:c.233_240del' in list(results.keys()) assert results['NM_001202435.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001202435.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001202435.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14728,7 +14728,7 @@ def test_variant273(self): assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001202435.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1'} - assert 'NM_001202435.2:c.233_240del' in results.keys() + assert 'NM_001202435.2:c.233_240del' in list(results.keys()) assert results['NM_001202435.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001202435.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001202435.2:c.233_240del']['alt_genomic_loci'] == [] @@ -14746,7 +14746,7 @@ def test_variant273(self): assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001202435.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2'} - assert 'NM_006920.5:c.233_240del' in results.keys() + assert 'NM_006920.5:c.233_240del' in list(results.keys()) assert results['NM_006920.5:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006920.5:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006920.5:c.233_240del']['alt_genomic_loci'] == [] @@ -14764,7 +14764,7 @@ def test_variant273(self): assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_006920.5:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5'} - assert 'NM_001353955.1:c.233_240del' in results.keys() + assert 'NM_001353955.1:c.233_240del' in list(results.keys()) assert results['NM_001353955.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353955.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353955.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14782,7 +14782,7 @@ def test_variant273(self): assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353955.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1'} - assert 'NM_001353952.1:c.233_240del' in results.keys() + assert 'NM_001353952.1:c.233_240del' in list(results.keys()) assert results['NM_001353952.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353952.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353952.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14800,7 +14800,7 @@ def test_variant273(self): assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353952.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1'} - assert 'NM_001353957.1:c.233_240del' in results.keys() + assert 'NM_001353957.1:c.233_240del' in list(results.keys()) assert results['NM_001353957.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353957.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353957.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14819,7 +14819,7 @@ def test_variant273(self): assert results['NM_001353957.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1'} assert results['flag'] == 'gene_variant' - assert 'NM_006920.4:c.233_240del' in results.keys() + assert 'NM_006920.4:c.233_240del' in list(results.keys()) assert results['NM_006920.4:c.233_240del']['hgvs_lrg_transcript_variant'] == 'LRG_8t1:c.233_240del' assert results['NM_006920.4:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006920.4:c.233_240del']['alt_genomic_loci'] == [] @@ -14837,7 +14837,7 @@ def test_variant273(self): assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_006920.4:c.233_240del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011906.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_8.xml'} - assert 'NM_001353950.1:c.233_240del' in results.keys() + assert 'NM_001353950.1:c.233_240del' in list(results.keys()) assert results['NM_001353950.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353950.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353950.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14855,7 +14855,7 @@ def test_variant273(self): assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353950.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1'} - assert 'NM_001165963.2:c.233_240del' in results.keys() + assert 'NM_001165963.2:c.233_240del' in list(results.keys()) assert results['NM_001165963.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165963.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001165963.2:c.233_240del']['alt_genomic_loci'] == [] @@ -14873,7 +14873,7 @@ def test_variant273(self): assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001165963.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2'} - assert 'NM_001165963.1:c.233_240del' in results.keys() + assert 'NM_001165963.1:c.233_240del' in list(results.keys()) assert results['NM_001165963.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165963.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001165963.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14891,7 +14891,7 @@ def test_variant273(self): assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001165963.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1'} - assert 'NM_001165964.1:c.233_240del' in results.keys() + assert 'NM_001165964.1:c.233_240del' in list(results.keys()) assert results['NM_001165964.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165964.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001165964.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14909,7 +14909,7 @@ def test_variant273(self): assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001165964.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1'} - assert 'NM_001353958.1:c.233_240del' in results.keys() + assert 'NM_001353958.1:c.233_240del' in list(results.keys()) assert results['NM_001353958.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353958.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353958.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14927,7 +14927,7 @@ def test_variant273(self): assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353958.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1'} - assert 'NM_001353949.1:c.233_240del' in results.keys() + assert 'NM_001353949.1:c.233_240del' in list(results.keys()) assert results['NM_001353949.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353949.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353949.1:c.233_240del']['alt_genomic_loci'] == [] @@ -14949,9 +14949,9 @@ def test_variant273(self): def test_variant274(self): variant = '2-179393504-G-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001256850.1:c.102051C>A' in results.keys() + assert 'NM_001256850.1:c.102051C>A' in list(results.keys()) assert results['NM_001256850.1:c.102051C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256850.1:c.102051C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256850.1:c.102051C>A']['alt_genomic_loci'] == [] @@ -14963,13 +14963,13 @@ def test_variant274(self): assert results['NM_001256850.1:c.102051C>A']['hgvs_lrg_variant'] == '' assert results['NM_001256850.1:c.102051C>A']['hgvs_transcript_variant'] == 'NM_001256850.1:c.102051C>A' assert results['NM_001256850.1:c.102051C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} - assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} assert results['NM_001256850.1:c.102051C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256850.1'} - assert 'NR_038271.1:n.446+5141G>T' in results.keys() + assert 'NR_038271.1:n.446+5141G>T' in list(results.keys()) assert results['NR_038271.1:n.446+5141G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_038271.1:n.446+5141G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_038271.1:n.446+5141G>T']['alt_genomic_loci'] == [] @@ -14987,7 +14987,7 @@ def test_variant274(self): assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} assert results['NR_038271.1:n.446+5141G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_038271.1'} - assert 'NM_133432.3:c.80154C>A' in results.keys() + assert 'NM_133432.3:c.80154C>A' in list(results.keys()) assert results['NM_133432.3:c.80154C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133432.3:c.80154C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_133432.3:c.80154C>A']['alt_genomic_loci'] == [] @@ -14999,13 +14999,13 @@ def test_variant274(self): assert results['NM_133432.3:c.80154C>A']['hgvs_lrg_variant'] == '' assert results['NM_133432.3:c.80154C>A']['hgvs_transcript_variant'] == 'NM_133432.3:c.80154C>A' assert results['NM_133432.3:c.80154C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} - assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} assert results['NM_133432.3:c.80154C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597676.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133432.3'} - assert 'NM_001267550.1:c.106974C>A' in results.keys() + assert 'NM_001267550.1:c.106974C>A' in list(results.keys()) assert results['NM_001267550.1:c.106974C>A']['hgvs_lrg_transcript_variant'] == 'LRG_391t1:c.106974C>A' assert results['NM_001267550.1:c.106974C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001267550.1:c.106974C>A']['alt_genomic_loci'] == [] @@ -15017,13 +15017,13 @@ def test_variant274(self): assert results['NM_001267550.1:c.106974C>A']['hgvs_lrg_variant'] == 'LRG_391:g.307026C>A' assert results['NM_001267550.1:c.106974C>A']['hgvs_transcript_variant'] == 'NM_001267550.1:c.106974C>A' assert results['NM_001267550.1:c.106974C>A']['hgvs_refseqgene_variant'] == 'NG_011618.3:g.307026C>A' - assert results['NM_001267550.1:c.106974C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert 'hg38' not in results['NM_001267550.1:c.106974C>A']['primary_assembly_loci'].keys() - assert results['NM_001267550.1:c.106974C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert 'grch38' not in results['NM_001267550.1:c.106974C>A']['primary_assembly_loci'].keys() + assert results['NM_001267550.1:c.106974C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_001267550.1:c.106974C>A']['primary_assembly_loci'].keys()) + assert results['NM_001267550.1:c.106974C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_001267550.1:c.106974C>A']['primary_assembly_loci'].keys()) assert results['NM_001267550.1:c.106974C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011618.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_391.xml'} - assert 'NR_038272.1:n.219+5141G>T' in results.keys() + assert 'NR_038272.1:n.219+5141G>T' in list(results.keys()) assert results['NR_038272.1:n.219+5141G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_038272.1:n.219+5141G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_038272.1:n.219+5141G>T']['alt_genomic_loci'] == [] @@ -15041,7 +15041,7 @@ def test_variant274(self): assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} assert results['NR_038272.1:n.219+5141G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_038272.1'} - assert 'NM_133437.4:c.80355C>A' in results.keys() + assert 'NM_133437.4:c.80355C>A' in list(results.keys()) assert results['NM_133437.4:c.80355C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133437.4:c.80355C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_133437.4:c.80355C>A']['alt_genomic_loci'] == [] @@ -15053,14 +15053,14 @@ def test_variant274(self): assert results['NM_133437.4:c.80355C>A']['hgvs_lrg_variant'] == '' assert results['NM_133437.4:c.80355C>A']['hgvs_transcript_variant'] == 'NM_133437.4:c.80355C>A' assert results['NM_133437.4:c.80355C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} - assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} assert results['NM_133437.4:c.80355C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.4'} assert results['flag'] == 'gene_variant' - assert 'NM_133378.4:c.99270C>A' in results.keys() + assert 'NM_133378.4:c.99270C>A' in list(results.keys()) assert results['NM_133378.4:c.99270C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133378.4:c.99270C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_133378.4:c.99270C>A']['alt_genomic_loci'] == [] @@ -15072,13 +15072,13 @@ def test_variant274(self): assert results['NM_133378.4:c.99270C>A']['hgvs_lrg_variant'] == '' assert results['NM_133378.4:c.99270C>A']['hgvs_transcript_variant'] == 'NM_133378.4:c.99270C>A' assert results['NM_133378.4:c.99270C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} - assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} assert results['NM_133378.4:c.99270C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_596869.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133378.4'} - assert 'NM_001267550.2:c.106974C>A' in results.keys() + assert 'NM_001267550.2:c.106974C>A' in list(results.keys()) assert results['NM_001267550.2:c.106974C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001267550.2:c.106974C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001267550.2:c.106974C>A']['alt_genomic_loci'] == [] @@ -15090,13 +15090,13 @@ def test_variant274(self): assert results['NM_001267550.2:c.106974C>A']['hgvs_lrg_variant'] == '' assert results['NM_001267550.2:c.106974C>A']['hgvs_transcript_variant'] == 'NM_001267550.2:c.106974C>A' assert results['NM_001267550.2:c.106974C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} - assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} assert results['NM_001267550.2:c.106974C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.2'} - assert 'NM_133437.3:c.80355C>A' in results.keys() + assert 'NM_133437.3:c.80355C>A' in list(results.keys()) assert results['NM_133437.3:c.80355C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133437.3:c.80355C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_133437.3:c.80355C>A']['alt_genomic_loci'] == [] @@ -15108,13 +15108,13 @@ def test_variant274(self): assert results['NM_133437.3:c.80355C>A']['hgvs_lrg_variant'] == '' assert results['NM_133437.3:c.80355C>A']['hgvs_transcript_variant'] == 'NM_133437.3:c.80355C>A' assert results['NM_133437.3:c.80355C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_133437.3:c.80355C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert 'hg38' not in results['NM_133437.3:c.80355C>A']['primary_assembly_loci'].keys() - assert results['NM_133437.3:c.80355C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert 'grch38' not in results['NM_133437.3:c.80355C>A']['primary_assembly_loci'].keys() + assert results['NM_133437.3:c.80355C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_133437.3:c.80355C>A']['primary_assembly_loci'].keys()) + assert results['NM_133437.3:c.80355C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_133437.3:c.80355C>A']['primary_assembly_loci'].keys()) assert results['NM_133437.3:c.80355C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.3'} - assert 'NM_003319.4:c.79779C>A' in results.keys() + assert 'NM_003319.4:c.79779C>A' in list(results.keys()) assert results['NM_003319.4:c.79779C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003319.4:c.79779C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003319.4:c.79779C>A']['alt_genomic_loci'] == [] @@ -15126,20 +15126,20 @@ def test_variant274(self): assert results['NM_003319.4:c.79779C>A']['hgvs_lrg_variant'] == '' assert results['NM_003319.4:c.79779C>A']['hgvs_transcript_variant'] == 'NM_003319.4:c.79779C>A' assert results['NM_003319.4:c.79779C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} - assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '179393504', 'alt': u'T'}} - assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': u'G', 'pos': '178528777', 'alt': u'T'}} + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} assert results['NM_003319.4:c.79779C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003310.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003319.4'} def test_variant275(self): variant = '2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_194250.1:c.3324_3347del' in results.keys() + assert 'NM_194250.1:c.3324_3347del' in list(results.keys()) assert results['NM_194250.1:c.3324_3347del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_194250.1:c.3324_3347del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_194250.1:c.3324_3347del']['alt_genomic_loci'] == [] @@ -15161,10 +15161,10 @@ def test_variant275(self): def test_variant276(self): variant = '2-201950249-G-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_002491.2:c.208G>T' in results.keys() + assert 'NM_002491.2:c.208G>T' in list(results.keys()) assert results['NM_002491.2:c.208G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_002491.2:c.208G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002491.2:c.208G>T']['alt_genomic_loci'] == [] @@ -15182,7 +15182,7 @@ def test_variant276(self): assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '201085526', 'alt': 'T'}} assert results['NM_002491.2:c.208G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_032156.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002482.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002491.2'} - assert 'NM_001257102.1:c.208G>T' in results.keys() + assert 'NM_001257102.1:c.208G>T' in list(results.keys()) assert results['NM_001257102.1:c.208G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257102.1:c.208G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001257102.1:c.208G>T']['alt_genomic_loci'] == [] @@ -15204,9 +15204,9 @@ def test_variant276(self): def test_variant277(self): variant = '2-238268730-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_004369.3:c.6282+1G>T' in results.keys() + assert 'NM_004369.3:c.6282+1G>T' in list(results.keys()) assert results['NM_004369.3:c.6282+1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_473t1:c.6282+1G>T' assert results['NM_004369.3:c.6282+1G>T']['refseqgene_context_intronic_sequence'] == 'NG_008676.1(NM_004369.3):c.6282+1G>T' assert results['NM_004369.3:c.6282+1G>T']['alt_genomic_loci'] == [] @@ -15218,14 +15218,14 @@ def test_variant277(self): assert results['NM_004369.3:c.6282+1G>T']['hgvs_lrg_variant'] == 'LRG_473:g.59121G>T' assert results['NM_004369.3:c.6282+1G>T']['hgvs_transcript_variant'] == 'NM_004369.3:c.6282+1G>T' assert results['NM_004369.3:c.6282+1G>T']['hgvs_refseqgene_variant'] == 'NG_008676.1:g.59121G>T' - assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} - assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} - assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} - assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} assert results['NM_004369.3:c.6282+1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008676.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004360.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004369.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_473.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_057166.4:c.4461+1G>T' in results.keys() + assert 'NM_057166.4:c.4461+1G>T' in list(results.keys()) assert results['NM_057166.4:c.4461+1G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_057166.4:c.4461+1G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_057166.4:c.4461+1G>T']['alt_genomic_loci'] == [] @@ -15237,13 +15237,13 @@ def test_variant277(self): assert results['NM_057166.4:c.4461+1G>T']['hgvs_lrg_variant'] == '' assert results['NM_057166.4:c.4461+1G>T']['hgvs_transcript_variant'] == 'NM_057166.4:c.4461+1G>T' assert results['NM_057166.4:c.4461+1G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} - assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} - assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} - assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} assert results['NM_057166.4:c.4461+1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_476507.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_057166.4'} - assert 'NM_057167.3:c.5664+1G>T' in results.keys() + assert 'NM_057167.3:c.5664+1G>T' in list(results.keys()) assert results['NM_057167.3:c.5664+1G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_057167.3:c.5664+1G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_057167.3:c.5664+1G>T']['alt_genomic_loci'] == [] @@ -15255,19 +15255,19 @@ def test_variant277(self): assert results['NM_057167.3:c.5664+1G>T']['hgvs_lrg_variant'] == '' assert results['NM_057167.3:c.5664+1G>T']['hgvs_transcript_variant'] == 'NM_057167.3:c.5664+1G>T' assert results['NM_057167.3:c.5664+1G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} - assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} - assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '238268730', 'alt': u'A'}} - assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': u'C', 'pos': '237360087', 'alt': u'A'}} + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} assert results['NM_057167.3:c.5664+1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_476508.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_057167.3'} def test_variant278(self): variant = '21-43897396-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_080860.2:c.727+5G>A' in results.keys() + assert 'NM_080860.2:c.727+5G>A' in list(results.keys()) assert results['NM_080860.2:c.727+5G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080860.2:c.727+5G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_080860.2:c.727+5G>A']['alt_genomic_loci'] == [] @@ -15279,14 +15279,14 @@ def test_variant278(self): assert results['NM_080860.2:c.727+5G>A']['hgvs_lrg_variant'] == '' assert results['NM_080860.2:c.727+5G>A']['hgvs_transcript_variant'] == 'NM_080860.2:c.727+5G>A' assert results['NM_080860.2:c.727+5G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_080860.2:c.727+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} - assert 'hg38' not in results['NM_080860.2:c.727+5G>A']['primary_assembly_loci'].keys() - assert results['NM_080860.2:c.727+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} - assert 'grch38' not in results['NM_080860.2:c.727+5G>A']['primary_assembly_loci'].keys() + assert results['NM_080860.2:c.727+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_080860.2:c.727+5G>A']['primary_assembly_loci'].keys()) + assert results['NM_080860.2:c.727+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_080860.2:c.727+5G>A']['primary_assembly_loci'].keys()) assert results['NM_080860.2:c.727+5G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.2'} assert results['flag'] == 'gene_variant' - assert 'NM_080860.3:c.727+5G>A' in results.keys() + assert 'NM_080860.3:c.727+5G>A' in list(results.keys()) assert results['NM_080860.3:c.727+5G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080860.3:c.727+5G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_080860.3:c.727+5G>A']['alt_genomic_loci'] == [] @@ -15298,13 +15298,13 @@ def test_variant278(self): assert results['NM_080860.3:c.727+5G>A']['hgvs_lrg_variant'] == '' assert results['NM_080860.3:c.727+5G>A']['hgvs_transcript_variant'] == 'NM_080860.3:c.727+5G>A' assert results['NM_080860.3:c.727+5G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} - assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': 'chr21', 'ref': u'C', 'pos': '42477286', 'alt': u'T'}} - assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} - assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': '21', 'ref': u'C', 'pos': '42477286', 'alt': u'T'}} + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': 'chr21', 'ref': 'C', 'pos': '42477286', 'alt': 'T'}} + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': '21', 'ref': 'C', 'pos': '42477286', 'alt': 'T'}} assert results['NM_080860.3:c.727+5G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.3'} - assert 'NM_001286506.1:c.613+5G>A' in results.keys() + assert 'NM_001286506.1:c.613+5G>A' in list(results.keys()) assert results['NM_001286506.1:c.613+5G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001286506.1:c.613+5G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001286506.1:c.613+5G>A']['alt_genomic_loci'] == [] @@ -15316,19 +15316,19 @@ def test_variant278(self): assert results['NM_001286506.1:c.613+5G>A']['hgvs_lrg_variant'] == '' assert results['NM_001286506.1:c.613+5G>A']['hgvs_transcript_variant'] == 'NM_001286506.1:c.613+5G>A' assert results['NM_001286506.1:c.613+5G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} - assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': 'chr21', 'ref': u'C', 'pos': '42477286', 'alt': u'T'}} - assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': u'C', 'pos': '43897396', 'alt': u'T'}} - assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': '21', 'ref': u'C', 'pos': '42477286', 'alt': u'T'}} + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': 'chr21', 'ref': 'C', 'pos': '42477286', 'alt': 'T'}} + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': '21', 'ref': 'C', 'pos': '42477286', 'alt': 'T'}} assert results['NM_001286506.1:c.613+5G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001273435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001286506.1'} def test_variant279(self): variant = '22-30064360-G-GCGACGC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000268.3:c.924_925insCGACGC' in results.keys() + assert 'NM_000268.3:c.924_925insCGACGC' in list(results.keys()) assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == 'LRG_511t1:c.924_925insCGACGC' assert results['NM_000268.3:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000268.3:c.924_925insCGACGC']['alt_genomic_loci'] == [] @@ -15346,7 +15346,7 @@ def test_variant279(self): assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} assert results['NM_000268.3:c.924_925insCGACGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009057.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000259.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000268.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_511.xml'} - assert 'NM_181828.2:c.798_799insCGACGC' in results.keys() + assert 'NM_181828.2:c.798_799insCGACGC' in list(results.keys()) assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181828.2:c.798_799insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181828.2:c.798_799insCGACGC']['alt_genomic_loci'] == [] @@ -15364,7 +15364,7 @@ def test_variant279(self): assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} assert results['NM_181828.2:c.798_799insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861966.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181828.2'} - assert 'NM_181830.2:c.675_676insCGACGC' in results.keys() + assert 'NM_181830.2:c.675_676insCGACGC' in list(results.keys()) assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181830.2:c.675_676insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181830.2:c.675_676insCGACGC']['alt_genomic_loci'] == [] @@ -15382,7 +15382,7 @@ def test_variant279(self): assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} assert results['NM_181830.2:c.675_676insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861968.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181830.2'} - assert 'NM_181825.2:c.924_925insCGACGC' in results.keys() + assert 'NM_181825.2:c.924_925insCGACGC' in list(results.keys()) assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181825.2:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181825.2:c.924_925insCGACGC']['alt_genomic_loci'] == [] @@ -15400,7 +15400,7 @@ def test_variant279(self): assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} assert results['NM_181825.2:c.924_925insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861546.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181825.2'} - assert 'NM_181832.2:c.924_925insCGACGC' in results.keys() + assert 'NM_181832.2:c.924_925insCGACGC' in list(results.keys()) assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181832.2:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181832.2:c.924_925insCGACGC']['alt_genomic_loci'] == [] @@ -15418,7 +15418,7 @@ def test_variant279(self): assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} assert results['NM_181832.2:c.924_925insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861970.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181832.2'} - assert 'NM_181833.2:c.447+26086_447+26087insCGACGC' in results.keys() + assert 'NM_181833.2:c.447+26086_447+26087insCGACGC' in list(results.keys()) assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['alt_genomic_loci'] == [] @@ -15436,7 +15436,7 @@ def test_variant279(self): assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861971.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181833.2'} - assert 'NM_016418.5:c.924_925insCGACGC' in results.keys() + assert 'NM_016418.5:c.924_925insCGACGC' in list(results.keys()) assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == 'LRG_511t2:c.924_925insCGACGC' assert results['NM_016418.5:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016418.5:c.924_925insCGACGC']['alt_genomic_loci'] == [] @@ -15454,7 +15454,7 @@ def test_variant279(self): assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} assert results['NM_016418.5:c.924_925insCGACGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009057.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057502.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016418.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_511.xml'} - assert 'NM_181829.2:c.801_802insCGACGC' in results.keys() + assert 'NM_181829.2:c.801_802insCGACGC' in list(results.keys()) assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181829.2:c.801_802insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181829.2:c.801_802insCGACGC']['alt_genomic_loci'] == [] @@ -15473,7 +15473,7 @@ def test_variant279(self): assert results['NM_181829.2:c.801_802insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861967.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181829.2'} assert results['flag'] == 'gene_variant' - assert 'NR_156186.1:n.1483_1484insCGACGC' in results.keys() + assert 'NR_156186.1:n.1483_1484insCGACGC' in list(results.keys()) assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NR_156186.1:n.1483_1484insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NR_156186.1:n.1483_1484insCGACGC']['alt_genomic_loci'] == [] @@ -15486,12 +15486,12 @@ def test_variant279(self): assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_transcript_variant'] == 'NR_156186.1:n.1483_1484insCGACGC' assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_refseqgene_variant'] == '' assert results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert 'hg38' not in results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci'].keys()) assert results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert 'grch38' not in results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci'].keys()) assert results['NR_156186.1:n.1483_1484insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_156186.1'} - assert 'NM_181831.2:c.675_676insCGACGC' in results.keys() + assert 'NM_181831.2:c.675_676insCGACGC' in list(results.keys()) assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181831.2:c.675_676insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181831.2:c.675_676insCGACGC']['alt_genomic_loci'] == [] @@ -15513,9 +15513,9 @@ def test_variant279(self): def test_variant280(self): variant = '3-10188187-TGTCCCGATAG-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_198156.2:c.341-3280_341-3271del' in results.keys() + assert 'NM_198156.2:c.341-3280_341-3271del' in list(results.keys()) assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198156.2:c.341-3280_341-3271del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198156.2:c.341-3280_341-3271del']['alt_genomic_loci'] == [] @@ -15534,7 +15534,7 @@ def test_variant280(self): assert results['NM_198156.2:c.341-3280_341-3271del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_937799.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198156.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001354723.1:c.*18-3280_*18-3271del' in results.keys() + assert 'NM_001354723.1:c.*18-3280_*18-3271del' in list(results.keys()) assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354723.1:c.*18-3280_*18-3271del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001354723.1:c.*18-3280_*18-3271del']['alt_genomic_loci'] == [] @@ -15552,7 +15552,7 @@ def test_variant280(self): assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} assert results['NM_001354723.1:c.*18-3280_*18-3271del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341652.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354723.1'} - assert 'NM_000551.3:c.341-7_343del' in results.keys() + assert 'NM_000551.3:c.341-7_343del' in list(results.keys()) assert results['NM_000551.3:c.341-7_343del']['hgvs_lrg_transcript_variant'] == 'LRG_322t1:c.341-7_343del' assert results['NM_000551.3:c.341-7_343del']['refseqgene_context_intronic_sequence'] == 'NG_008212.3(NM_000551.3):c.341-7_343del' assert results['NM_000551.3:c.341-7_343del']['alt_genomic_loci'] == [] @@ -15574,9 +15574,9 @@ def test_variant280(self): def test_variant281(self): variant = '3-50402127-T-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001005505.2:c.3408A>C' in results.keys() + assert 'NM_001005505.2:c.3408A>C' in list(results.keys()) assert results['NM_001005505.2:c.3408A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001005505.2:c.3408A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001005505.2:c.3408A>C']['alt_genomic_loci'] == [] @@ -15588,13 +15588,13 @@ def test_variant281(self): assert results['NM_001005505.2:c.3408A>C']['hgvs_lrg_variant'] == '' assert results['NM_001005505.2:c.3408A>C']['hgvs_transcript_variant'] == 'NM_001005505.2:c.3408A>C' assert results['NM_001005505.2:c.3408A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} - assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} assert results['NM_001005505.2:c.3408A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2'} - assert 'NM_006030.2:c.3402A>C' in results.keys() + assert 'NM_006030.2:c.3402A>C' in list(results.keys()) assert results['NM_006030.2:c.3402A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006030.2:c.3402A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006030.2:c.3402A>C']['alt_genomic_loci'] == [] @@ -15606,13 +15606,13 @@ def test_variant281(self): assert results['NM_006030.2:c.3402A>C']['hgvs_lrg_variant'] == '' assert results['NM_006030.2:c.3402A>C']['hgvs_transcript_variant'] == 'NM_006030.2:c.3402A>C' assert results['NM_006030.2:c.3402A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_006030.2:c.3402A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert 'hg38' not in results['NM_006030.2:c.3402A>C']['primary_assembly_loci'].keys() - assert results['NM_006030.2:c.3402A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert 'grch38' not in results['NM_006030.2:c.3402A>C']['primary_assembly_loci'].keys() + assert results['NM_006030.2:c.3402A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert 'hg38' not in list(results['NM_006030.2:c.3402A>C']['primary_assembly_loci'].keys()) + assert results['NM_006030.2:c.3402A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert 'grch38' not in list(results['NM_006030.2:c.3402A>C']['primary_assembly_loci'].keys()) assert results['NM_006030.2:c.3402A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2'} - assert 'NM_001174051.1:c.3423A>C' in results.keys() + assert 'NM_001174051.1:c.3423A>C' in list(results.keys()) assert results['NM_001174051.1:c.3423A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001174051.1:c.3423A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001174051.1:c.3423A>C']['alt_genomic_loci'] == [] @@ -15624,13 +15624,13 @@ def test_variant281(self): assert results['NM_001174051.1:c.3423A>C']['hgvs_lrg_variant'] == '' assert results['NM_001174051.1:c.3423A>C']['hgvs_transcript_variant'] == 'NM_001174051.1:c.3423A>C' assert results['NM_001174051.1:c.3423A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001174051.1:c.3423A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert 'hg38' not in results['NM_001174051.1:c.3423A>C']['primary_assembly_loci'].keys() - assert results['NM_001174051.1:c.3423A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert 'grch38' not in results['NM_001174051.1:c.3423A>C']['primary_assembly_loci'].keys() + assert results['NM_001174051.1:c.3423A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert 'hg38' not in list(results['NM_001174051.1:c.3423A>C']['primary_assembly_loci'].keys()) + assert results['NM_001174051.1:c.3423A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert 'grch38' not in list(results['NM_001174051.1:c.3423A>C']['primary_assembly_loci'].keys()) assert results['NM_001174051.1:c.3423A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1'} - assert 'NM_001174051.2:c.3423A>C' in results.keys() + assert 'NM_001174051.2:c.3423A>C' in list(results.keys()) assert results['NM_001174051.2:c.3423A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001174051.2:c.3423A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001174051.2:c.3423A>C']['alt_genomic_loci'] == [] @@ -15642,13 +15642,13 @@ def test_variant281(self): assert results['NM_001174051.2:c.3423A>C']['hgvs_lrg_variant'] == '' assert results['NM_001174051.2:c.3423A>C']['hgvs_transcript_variant'] == 'NM_001174051.2:c.3423A>C' assert results['NM_001174051.2:c.3423A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} - assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} assert results['NM_001174051.2:c.3423A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2'} - assert 'NM_006030.3:c.3402A>C' in results.keys() + assert 'NM_006030.3:c.3402A>C' in list(results.keys()) assert results['NM_006030.3:c.3402A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006030.3:c.3402A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006030.3:c.3402A>C']['alt_genomic_loci'] == [] @@ -15660,13 +15660,13 @@ def test_variant281(self): assert results['NM_006030.3:c.3402A>C']['hgvs_lrg_variant'] == '' assert results['NM_006030.3:c.3402A>C']['hgvs_transcript_variant'] == 'NM_006030.3:c.3402A>C' assert results['NM_006030.3:c.3402A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} - assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} assert results['NM_006030.3:c.3402A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3'} - assert 'NM_001291101.1:c.3201A>C' in results.keys() + assert 'NM_001291101.1:c.3201A>C' in list(results.keys()) assert results['NM_001291101.1:c.3201A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291101.1:c.3201A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001291101.1:c.3201A>C']['alt_genomic_loci'] == [] @@ -15678,14 +15678,14 @@ def test_variant281(self): assert results['NM_001291101.1:c.3201A>C']['hgvs_lrg_variant'] == '' assert results['NM_001291101.1:c.3201A>C']['hgvs_transcript_variant'] == 'NM_001291101.1:c.3201A>C' assert results['NM_001291101.1:c.3201A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} - assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50364696', 'alt': u'G'}} + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} assert results['NM_001291101.1:c.3201A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1'} assert results['flag'] == 'gene_variant' - assert 'NR_111912.1:n.443-1601T>G' in results.keys() + assert 'NR_111912.1:n.443-1601T>G' in list(results.keys()) assert results['NR_111912.1:n.443-1601T>G']['hgvs_lrg_transcript_variant'] == '' assert results['NR_111912.1:n.443-1601T>G']['refseqgene_context_intronic_sequence'] == '' assert results['NR_111912.1:n.443-1601T>G']['alt_genomic_loci'] == [] @@ -15703,7 +15703,7 @@ def test_variant281(self): assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} assert results['NR_111912.1:n.443-1601T>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111912.1'} - assert 'NM_001005505.1:c.3408A>C' in results.keys() + assert 'NM_001005505.1:c.3408A>C' in list(results.keys()) assert results['NM_001005505.1:c.3408A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001005505.1:c.3408A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001005505.1:c.3408A>C']['alt_genomic_loci'] == [] @@ -15715,19 +15715,19 @@ def test_variant281(self): assert results['NM_001005505.1:c.3408A>C']['hgvs_lrg_variant'] == '' assert results['NM_001005505.1:c.3408A>C']['hgvs_transcript_variant'] == 'NM_001005505.1:c.3408A>C' assert results['NM_001005505.1:c.3408A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001005505.1:c.3408A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert 'hg38' not in results['NM_001005505.1:c.3408A>C']['primary_assembly_loci'].keys() - assert results['NM_001005505.1:c.3408A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': u'T', 'pos': '50402127', 'alt': u'G'}} - assert 'grch38' not in results['NM_001005505.1:c.3408A>C']['primary_assembly_loci'].keys() + assert results['NM_001005505.1:c.3408A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert 'hg38' not in list(results['NM_001005505.1:c.3408A>C']['primary_assembly_loci'].keys()) + assert results['NM_001005505.1:c.3408A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert 'grch38' not in list(results['NM_001005505.1:c.3408A>C']['primary_assembly_loci'].keys()) assert results['NM_001005505.1:c.3408A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1'} def test_variant282(self): variant = '3-50402890-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NR_111913.1:n.126G>A' in results.keys() + assert 'NR_111913.1:n.126G>A' in list(results.keys()) assert results['NR_111913.1:n.126G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_111913.1:n.126G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_111913.1:n.126G>A']['alt_genomic_loci'] == [] @@ -15745,7 +15745,7 @@ def test_variant282(self): assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} assert results['NR_111913.1:n.126G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111913.1'} - assert 'NR_111912.1:n.443-838G>A' in results.keys() + assert 'NR_111912.1:n.443-838G>A' in list(results.keys()) assert results['NR_111912.1:n.443-838G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_111912.1:n.443-838G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_111912.1:n.443-838G>A']['alt_genomic_loci'] == [] @@ -15763,7 +15763,7 @@ def test_variant282(self): assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} assert results['NR_111912.1:n.443-838G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111912.1'} - assert 'NM_001291101.1:c.2788C>T' in results.keys() + assert 'NM_001291101.1:c.2788C>T' in list(results.keys()) assert results['NM_001291101.1:c.2788C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291101.1:c.2788C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001291101.1:c.2788C>T']['alt_genomic_loci'] == [] @@ -15775,13 +15775,13 @@ def test_variant282(self): assert results['NM_001291101.1:c.2788C>T']['hgvs_lrg_variant'] == '' assert results['NM_001291101.1:c.2788C>T']['hgvs_transcript_variant'] == 'NM_001291101.1:c.2788C>T' assert results['NM_001291101.1:c.2788C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} - assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} assert results['NM_001291101.1:c.2788C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1'} - assert 'NM_006030.2:c.2995C>T' in results.keys() + assert 'NM_006030.2:c.2995C>T' in list(results.keys()) assert results['NM_006030.2:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006030.2:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006030.2:c.2995C>T']['alt_genomic_loci'] == [] @@ -15793,13 +15793,13 @@ def test_variant282(self): assert results['NM_006030.2:c.2995C>T']['hgvs_lrg_variant'] == '' assert results['NM_006030.2:c.2995C>T']['hgvs_transcript_variant'] == 'NM_006030.2:c.2995C>T' assert results['NM_006030.2:c.2995C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_006030.2:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert 'hg38' not in results['NM_006030.2:c.2995C>T']['primary_assembly_loci'].keys() - assert results['NM_006030.2:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert 'grch38' not in results['NM_006030.2:c.2995C>T']['primary_assembly_loci'].keys() + assert results['NM_006030.2:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_006030.2:c.2995C>T']['primary_assembly_loci'].keys()) + assert results['NM_006030.2:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_006030.2:c.2995C>T']['primary_assembly_loci'].keys()) assert results['NM_006030.2:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2'} - assert 'NR_111914.1:n.126G>A' in results.keys() + assert 'NR_111914.1:n.126G>A' in list(results.keys()) assert results['NR_111914.1:n.126G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_111914.1:n.126G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_111914.1:n.126G>A']['alt_genomic_loci'] == [] @@ -15817,7 +15817,7 @@ def test_variant282(self): assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} assert results['NR_111914.1:n.126G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111914.1'} - assert 'NM_001005505.2:c.2995C>T' in results.keys() + assert 'NM_001005505.2:c.2995C>T' in list(results.keys()) assert results['NM_001005505.2:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001005505.2:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001005505.2:c.2995C>T']['alt_genomic_loci'] == [] @@ -15829,14 +15829,14 @@ def test_variant282(self): assert results['NM_001005505.2:c.2995C>T']['hgvs_lrg_variant'] == '' assert results['NM_001005505.2:c.2995C>T']['hgvs_transcript_variant'] == 'NM_001005505.2:c.2995C>T' assert results['NM_001005505.2:c.2995C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} - assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} assert results['NM_001005505.2:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001174051.1:c.3016C>T' in results.keys() + assert 'NM_001174051.1:c.3016C>T' in list(results.keys()) assert results['NM_001174051.1:c.3016C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001174051.1:c.3016C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001174051.1:c.3016C>T']['alt_genomic_loci'] == [] @@ -15848,13 +15848,13 @@ def test_variant282(self): assert results['NM_001174051.1:c.3016C>T']['hgvs_lrg_variant'] == '' assert results['NM_001174051.1:c.3016C>T']['hgvs_transcript_variant'] == 'NM_001174051.1:c.3016C>T' assert results['NM_001174051.1:c.3016C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001174051.1:c.3016C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert 'hg38' not in results['NM_001174051.1:c.3016C>T']['primary_assembly_loci'].keys() - assert results['NM_001174051.1:c.3016C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert 'grch38' not in results['NM_001174051.1:c.3016C>T']['primary_assembly_loci'].keys() + assert results['NM_001174051.1:c.3016C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_001174051.1:c.3016C>T']['primary_assembly_loci'].keys()) + assert results['NM_001174051.1:c.3016C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_001174051.1:c.3016C>T']['primary_assembly_loci'].keys()) assert results['NM_001174051.1:c.3016C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1'} - assert 'NM_001174051.2:c.3016C>T' in results.keys() + assert 'NM_001174051.2:c.3016C>T' in list(results.keys()) assert results['NM_001174051.2:c.3016C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001174051.2:c.3016C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001174051.2:c.3016C>T']['alt_genomic_loci'] == [] @@ -15866,13 +15866,13 @@ def test_variant282(self): assert results['NM_001174051.2:c.3016C>T']['hgvs_lrg_variant'] == '' assert results['NM_001174051.2:c.3016C>T']['hgvs_transcript_variant'] == 'NM_001174051.2:c.3016C>T' assert results['NM_001174051.2:c.3016C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} - assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} assert results['NM_001174051.2:c.3016C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2'} - assert 'NM_006030.3:c.2995C>T' in results.keys() + assert 'NM_006030.3:c.2995C>T' in list(results.keys()) assert results['NM_006030.3:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006030.3:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006030.3:c.2995C>T']['alt_genomic_loci'] == [] @@ -15884,13 +15884,13 @@ def test_variant282(self): assert results['NM_006030.3:c.2995C>T']['hgvs_lrg_variant'] == '' assert results['NM_006030.3:c.2995C>T']['hgvs_transcript_variant'] == 'NM_006030.3:c.2995C>T' assert results['NM_006030.3:c.2995C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} - assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50365459', 'alt': u'A'}} + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} assert results['NM_006030.3:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3'} - assert 'NM_001005505.1:c.2995C>T' in results.keys() + assert 'NM_001005505.1:c.2995C>T' in list(results.keys()) assert results['NM_001005505.1:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001005505.1:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001005505.1:c.2995C>T']['alt_genomic_loci'] == [] @@ -15902,19 +15902,19 @@ def test_variant282(self): assert results['NM_001005505.1:c.2995C>T']['hgvs_lrg_variant'] == '' assert results['NM_001005505.1:c.2995C>T']['hgvs_transcript_variant'] == 'NM_001005505.1:c.2995C>T' assert results['NM_001005505.1:c.2995C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001005505.1:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert 'hg38' not in results['NM_001005505.1:c.2995C>T']['primary_assembly_loci'].keys() - assert results['NM_001005505.1:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': u'G', 'pos': '50402890', 'alt': u'A'}} - assert 'grch38' not in results['NM_001005505.1:c.2995C>T']['primary_assembly_loci'].keys() + assert results['NM_001005505.1:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_001005505.1:c.2995C>T']['primary_assembly_loci'].keys()) + assert results['NM_001005505.1:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_001005505.1:c.2995C>T']['primary_assembly_loci'].keys()) assert results['NM_001005505.1:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1'} def test_variant283(self): variant = '3-57851007-AG-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_007159.4:c.1135+565del' in results.keys() + assert 'NM_007159.4:c.1135+565del' in list(results.keys()) assert results['NM_007159.4:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007159.4:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007159.4:c.1135+565del']['alt_genomic_loci'] == [] @@ -15932,7 +15932,7 @@ def test_variant283(self): assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} assert results['NM_007159.4:c.1135+565del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.4'} - assert 'NM_001304420.2:c.1186+424del' in results.keys() + assert 'NM_001304420.2:c.1186+424del' in list(results.keys()) assert results['NM_001304420.2:c.1186+424del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001304420.2:c.1186+424del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001304420.2:c.1186+424del']['alt_genomic_loci'] == [] @@ -15950,7 +15950,7 @@ def test_variant283(self): assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} assert results['NM_001304420.2:c.1186+424del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291349.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304420.2'} - assert 'NM_001304421.2:c.1135+565del' in results.keys() + assert 'NM_001304421.2:c.1135+565del' in list(results.keys()) assert results['NM_001304421.2:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001304421.2:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001304421.2:c.1135+565del']['alt_genomic_loci'] == [] @@ -15969,7 +15969,7 @@ def test_variant283(self): assert results['NM_001304421.2:c.1135+565del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291350.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304421.2'} assert results['flag'] == 'gene_variant' - assert 'NM_007159.2:c.1135+565del' in results.keys() + assert 'NM_007159.2:c.1135+565del' in list(results.keys()) assert results['NM_007159.2:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007159.2:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007159.2:c.1135+565del']['alt_genomic_loci'] == [] @@ -15982,12 +15982,12 @@ def test_variant283(self): assert results['NM_007159.2:c.1135+565del']['hgvs_transcript_variant'] == 'NM_007159.2:c.1135+565del' assert results['NM_007159.2:c.1135+565del']['hgvs_refseqgene_variant'] == '' assert results['NM_007159.2:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} - assert 'hg38' not in results['NM_007159.2:c.1135+565del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_007159.2:c.1135+565del']['primary_assembly_loci'].keys()) assert results['NM_007159.2:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} - assert 'grch38' not in results['NM_007159.2:c.1135+565del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_007159.2:c.1135+565del']['primary_assembly_loci'].keys()) assert results['NM_007159.2:c.1135+565del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.2'} - assert 'obsolete_record_3' in results.keys() + assert 'obsolete_record_3' in list(results.keys()) assert results['obsolete_record_3']['hgvs_lrg_transcript_variant'] == '' assert results['obsolete_record_3']['refseqgene_context_intronic_sequence'] == '' assert results['obsolete_record_3']['alt_genomic_loci'] == [] @@ -15999,13 +15999,13 @@ def test_variant283(self): assert results['obsolete_record_3']['hgvs_lrg_variant'] == '' assert results['obsolete_record_3']['hgvs_transcript_variant'] == '' assert results['obsolete_record_3']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['obsolete_record_3']['primary_assembly_loci'].keys() - assert 'hg38' not in results['obsolete_record_3']['primary_assembly_loci'].keys() - assert 'grch37' not in results['obsolete_record_3']['primary_assembly_loci'].keys() - assert 'grch38' not in results['obsolete_record_3']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['obsolete_record_3']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['obsolete_record_3']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['obsolete_record_3']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['obsolete_record_3']['primary_assembly_loci'].keys()) assert results['obsolete_record_3']['reference_sequence_records'] == '' - assert 'obsolete_record_2' in results.keys() + assert 'obsolete_record_2' in list(results.keys()) assert results['obsolete_record_2']['hgvs_lrg_transcript_variant'] == '' assert results['obsolete_record_2']['refseqgene_context_intronic_sequence'] == '' assert results['obsolete_record_2']['alt_genomic_loci'] == [] @@ -16017,13 +16017,13 @@ def test_variant283(self): assert results['obsolete_record_2']['hgvs_lrg_variant'] == '' assert results['obsolete_record_2']['hgvs_transcript_variant'] == '' assert results['obsolete_record_2']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['obsolete_record_2']['primary_assembly_loci'].keys() - assert 'hg38' not in results['obsolete_record_2']['primary_assembly_loci'].keys() - assert 'grch37' not in results['obsolete_record_2']['primary_assembly_loci'].keys() - assert 'grch38' not in results['obsolete_record_2']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['obsolete_record_2']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['obsolete_record_2']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['obsolete_record_2']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['obsolete_record_2']['primary_assembly_loci'].keys()) assert results['obsolete_record_2']['reference_sequence_records'] == '' - assert 'obsolete_record_1' in results.keys() + assert 'obsolete_record_1' in list(results.keys()) assert results['obsolete_record_1']['hgvs_lrg_transcript_variant'] == '' assert results['obsolete_record_1']['refseqgene_context_intronic_sequence'] == '' assert results['obsolete_record_1']['alt_genomic_loci'] == [] @@ -16035,19 +16035,19 @@ def test_variant283(self): assert results['obsolete_record_1']['hgvs_lrg_variant'] == '' assert results['obsolete_record_1']['hgvs_transcript_variant'] == '' assert results['obsolete_record_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['obsolete_record_1']['primary_assembly_loci'].keys() - assert 'hg38' not in results['obsolete_record_1']['primary_assembly_loci'].keys() - assert 'grch37' not in results['obsolete_record_1']['primary_assembly_loci'].keys() - assert 'grch38' not in results['obsolete_record_1']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['obsolete_record_1']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['obsolete_record_1']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['obsolete_record_1']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['obsolete_record_1']['primary_assembly_loci'].keys()) assert results['obsolete_record_1']['reference_sequence_records'] == '' def test_variant284(self): variant = '3-122003832-G-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001178065.1:c.3061C=' in results.keys() + assert 'NM_001178065.1:c.3061C=' in list(results.keys()) assert results['NM_001178065.1:c.3061C=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001178065.1:c.3061C=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001178065.1:c.3061C=']['alt_genomic_loci'] == [] @@ -16066,7 +16066,7 @@ def test_variant284(self): assert results['NM_001178065.1:c.3061C=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001171536.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001178065.1'} assert results['flag'] == 'gene_variant' - assert 'NM_000388.3:c.3031C=' in results.keys() + assert 'NM_000388.3:c.3031C=' in list(results.keys()) assert results['NM_000388.3:c.3031C=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000388.3:c.3031C=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000388.3:c.3031C=']['alt_genomic_loci'] == [] @@ -16088,9 +16088,9 @@ def test_variant284(self): def test_variant285(self): variant = '4-153332910-C-CAGG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001349798.1:c.45_46insCCT' in results.keys() + assert 'NM_001349798.1:c.45_46insCCT' in list(results.keys()) assert results['NM_001349798.1:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001349798.1:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001349798.1:c.45_46insCCT']['alt_genomic_loci'] == [] @@ -16102,13 +16102,13 @@ def test_variant285(self): assert results['NM_001349798.1:c.45_46insCCT']['hgvs_lrg_variant'] == '' assert results['NM_001349798.1:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001349798.1:c.45_46insCCT' assert results['NM_001349798.1:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} assert results['NM_001349798.1:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.1'} - assert 'NM_033632.3:c.45_46insCCT' in results.keys() + assert 'NM_033632.3:c.45_46insCCT' in list(results.keys()) assert results['NM_033632.3:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_033632.3:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_033632.3:c.45_46insCCT']['alt_genomic_loci'] == [] @@ -16120,13 +16120,13 @@ def test_variant285(self): assert results['NM_033632.3:c.45_46insCCT']['hgvs_lrg_variant'] == '' assert results['NM_033632.3:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_033632.3:c.45_46insCCT' assert results['NM_033632.3:c.45_46insCCT']['hgvs_refseqgene_variant'] == 'NG_029466.1:g.128262_128263insCCT' - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} assert results['NM_033632.3:c.45_46insCCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029466.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033632.3'} - assert 'NM_001257069.1:c.45_46insCCT' in results.keys() + assert 'NM_001257069.1:c.45_46insCCT' in list(results.keys()) assert results['NM_001257069.1:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257069.1:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001257069.1:c.45_46insCCT']['alt_genomic_loci'] == [] @@ -16138,14 +16138,14 @@ def test_variant285(self): assert results['NM_001257069.1:c.45_46insCCT']['hgvs_lrg_variant'] == '' assert results['NM_001257069.1:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001257069.1:c.45_46insCCT' assert results['NM_001257069.1:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': u'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} assert results['NM_001257069.1:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243998.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257069.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001349798.2:c.45_46insCCT' in results.keys() + assert 'NM_001349798.2:c.45_46insCCT' in list(results.keys()) assert results['NM_001349798.2:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == 'LRG_1141t1:c.45_46insCCT' assert results['NM_001349798.2:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001349798.2:c.45_46insCCT']['alt_genomic_loci'] == [] @@ -16157,20 +16157,20 @@ def test_variant285(self): assert results['NM_001349798.2:c.45_46insCCT']['hgvs_lrg_variant'] == '' assert results['NM_001349798.2:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001349798.2:c.45_46insCCT' assert results['NM_001349798.2:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} - assert 'hg38' not in results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci'].keys() - assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': u'CAGG'}} - assert 'grch38' not in results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci'].keys() + assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert 'hg38' not in list(results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci'].keys()) + assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert 'grch38' not in list(results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci'].keys()) assert results['NM_001349798.2:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001336727.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.2'} def test_variant286(self): variant = '5-1295183-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'intergenic' - assert 'Intergenic_Variant_1' in results.keys() + assert 'Intergenic_Variant_1' in list(results.keys()) assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' assert results['Intergenic_Variant_1']['alt_genomic_loci'] == [] @@ -16182,19 +16182,19 @@ def test_variant286(self): assert results['Intergenic_Variant_1']['hgvs_lrg_variant'] == 'LRG_343:g.4980C>T' assert results['Intergenic_Variant_1']['hgvs_transcript_variant'] == '' assert results['Intergenic_Variant_1']['hgvs_refseqgene_variant'] == 'NG_009265.1:g.4980C>T' - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': u'NC_000005.9:g.1295183G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': u'NC_000005.10:g.1295068G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': u'NC_000005.9:g.1295183G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': u'NC_000005.10:g.1295068G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.1295183G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.1295068G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.1295183G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} + assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.1295068G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} assert results['Intergenic_Variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009265.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_343.xml'} def test_variant287(self): variant = '5-77396835-TTTC-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_003664.4:c.2409_2411del' in results.keys() + assert 'NM_003664.4:c.2409_2411del' in list(results.keys()) assert results['NM_003664.4:c.2409_2411del']['hgvs_lrg_transcript_variant'] == 'LRG_170t1:c.2409_2411del' assert results['NM_003664.4:c.2409_2411del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003664.4:c.2409_2411del']['alt_genomic_loci'] == [] @@ -16213,7 +16213,7 @@ def test_variant287(self): assert results['NM_003664.4:c.2409_2411del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007268.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_170.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_003664.3:c.2409_2411del' in results.keys() + assert 'NM_003664.3:c.2409_2411del' in list(results.keys()) assert results['NM_003664.3:c.2409_2411del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003664.3:c.2409_2411del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003664.3:c.2409_2411del']['alt_genomic_loci'] == [] @@ -16226,12 +16226,12 @@ def test_variant287(self): assert results['NM_003664.3:c.2409_2411del']['hgvs_transcript_variant'] == 'NM_003664.3:c.2409_2411del' assert results['NM_003664.3:c.2409_2411del']['hgvs_refseqgene_variant'] == '' assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} - assert 'hg38' not in results['NM_003664.3:c.2409_2411del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_003664.3:c.2409_2411del']['primary_assembly_loci'].keys()) assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} - assert 'grch38' not in results['NM_003664.3:c.2409_2411del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_003664.3:c.2409_2411del']['primary_assembly_loci'].keys()) assert results['NM_003664.3:c.2409_2411del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.3'} - assert 'NM_001271769.1:c.2262_2264del' in results.keys() + assert 'NM_001271769.1:c.2262_2264del' in list(results.keys()) assert results['NM_001271769.1:c.2262_2264del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001271769.1:c.2262_2264del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001271769.1:c.2262_2264del']['alt_genomic_loci'] == [] @@ -16253,9 +16253,9 @@ def test_variant287(self): def test_variant288(self): variant = '5-118811422-GGTGA-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000414.3:c.302+3_302+6del' in results.keys() + assert 'NM_000414.3:c.302+3_302+6del' in list(results.keys()) assert results['NM_000414.3:c.302+3_302+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000414.3:c.302+3_302+6del']['refseqgene_context_intronic_sequence'] == 'NG_008182.1(NM_000414.3):c.302+3_302+6del' assert results['NM_000414.3:c.302+3_302+6del']['alt_genomic_loci'] == [] @@ -16273,7 +16273,7 @@ def test_variant288(self): assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} assert results['NM_000414.3:c.302+3_302+6del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008182.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3'} - assert 'NM_001292028.1:c.-110+3_-110+6del' in results.keys() + assert 'NM_001292028.1:c.-110+3_-110+6del' in list(results.keys()) assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001292028.1:c.-110+3_-110+6del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001292028.1:c.-110+3_-110+6del']['alt_genomic_loci'] == [] @@ -16291,7 +16291,7 @@ def test_variant288(self): assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} assert results['NM_001292028.1:c.-110+3_-110+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1'} - assert 'NM_001199291.2:c.377+3_377+6del' in results.keys() + assert 'NM_001199291.2:c.377+3_377+6del' in list(results.keys()) assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199291.2:c.377+3_377+6del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001199291.2:c.377+3_377+6del']['alt_genomic_loci'] == [] @@ -16310,7 +16310,7 @@ def test_variant288(self): assert results['NM_001199291.2:c.377+3_377+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001292027.1:c.230+3_230+6del' in results.keys() + assert 'NM_001292027.1:c.230+3_230+6del' in list(results.keys()) assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001292027.1:c.230+3_230+6del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001292027.1:c.230+3_230+6del']['alt_genomic_loci'] == [] @@ -16328,7 +16328,7 @@ def test_variant288(self): assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} assert results['NM_001292027.1:c.230+3_230+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278956.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292027.1'} - assert 'NM_001199291.1:c.377+3_377+6del' in results.keys() + assert 'NM_001199291.1:c.377+3_377+6del' in list(results.keys()) assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199291.1:c.377+3_377+6del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001199291.1:c.377+3_377+6del']['alt_genomic_loci'] == [] @@ -16341,12 +16341,12 @@ def test_variant288(self): assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_transcript_variant'] == 'NM_001199291.1:c.377+3_377+6del' assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_refseqgene_variant'] == '' assert results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert 'hg38' not in results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci'].keys()) assert results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert 'grch38' not in results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci'].keys()) assert results['NM_001199291.1:c.377+3_377+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1'} - assert 'NM_001199292.1:c.248+3_248+6del' in results.keys() + assert 'NM_001199292.1:c.248+3_248+6del' in list(results.keys()) assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199292.1:c.248+3_248+6del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001199292.1:c.248+3_248+6del']['alt_genomic_loci'] == [] @@ -16368,9 +16368,9 @@ def test_variant288(self): def test_variant289(self): variant = '5-118811422-GGTGAG-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001292028.1:c.-110+1_-110+5del' in results.keys() + assert 'NM_001292028.1:c.-110+1_-110+5del' in list(results.keys()) assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001292028.1:c.-110+1_-110+5del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001292028.1:c.-110+1_-110+5del']['alt_genomic_loci'] == [] @@ -16388,7 +16388,7 @@ def test_variant289(self): assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} assert results['NM_001292028.1:c.-110+1_-110+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1'} - assert 'NM_000414.3:c.302+1_302+5del' in results.keys() + assert 'NM_000414.3:c.302+1_302+5del' in list(results.keys()) assert results['NM_000414.3:c.302+1_302+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000414.3:c.302+1_302+5del']['refseqgene_context_intronic_sequence'] == 'NG_008182.1(NM_000414.3):c.302+1_302+5del' assert results['NM_000414.3:c.302+1_302+5del']['alt_genomic_loci'] == [] @@ -16406,7 +16406,7 @@ def test_variant289(self): assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} assert results['NM_000414.3:c.302+1_302+5del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008182.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3'} - assert 'NM_001199291.2:c.377+1_377+5del' in results.keys() + assert 'NM_001199291.2:c.377+1_377+5del' in list(results.keys()) assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199291.2:c.377+1_377+5del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001199291.2:c.377+1_377+5del']['alt_genomic_loci'] == [] @@ -16424,7 +16424,7 @@ def test_variant289(self): assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} assert results['NM_001199291.2:c.377+1_377+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2'} - assert 'NM_001199292.1:c.248+1_248+5del' in results.keys() + assert 'NM_001199292.1:c.248+1_248+5del' in list(results.keys()) assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199292.1:c.248+1_248+5del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001199292.1:c.248+1_248+5del']['alt_genomic_loci'] == [] @@ -16443,7 +16443,7 @@ def test_variant289(self): assert results['NM_001199292.1:c.248+1_248+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186221.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199292.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001199291.1:c.377+1_377+5del' in results.keys() + assert 'NM_001199291.1:c.377+1_377+5del' in list(results.keys()) assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199291.1:c.377+1_377+5del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001199291.1:c.377+1_377+5del']['alt_genomic_loci'] == [] @@ -16456,12 +16456,12 @@ def test_variant289(self): assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_transcript_variant'] == 'NM_001199291.1:c.377+1_377+5del' assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_refseqgene_variant'] == '' assert results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert 'hg38' not in results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci'].keys()) assert results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert 'grch38' not in results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci'].keys()) assert results['NM_001199291.1:c.377+1_377+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1'} - assert 'NM_001292027.1:c.230+1_230+5del' in results.keys() + assert 'NM_001292027.1:c.230+1_230+5del' in list(results.keys()) assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001292027.1:c.230+1_230+5del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001292027.1:c.230+1_230+5del']['alt_genomic_loci'] == [] @@ -16483,10 +16483,10 @@ def test_variant289(self): def test_variant290(self): variant = '5-131705587-CG-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NR_110997.1:n.21del' in results.keys() + assert 'NR_110997.1:n.21del' in list(results.keys()) assert results['NR_110997.1:n.21del']['hgvs_lrg_transcript_variant'] == '' assert results['NR_110997.1:n.21del']['refseqgene_context_intronic_sequence'] == '' assert results['NR_110997.1:n.21del']['alt_genomic_loci'] == [] @@ -16504,7 +16504,7 @@ def test_variant290(self): assert results['NR_110997.1:n.21del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} assert results['NR_110997.1:n.21del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_110997.1'} - assert 'NM_003060.3:c.-75del' in results.keys() + assert 'NM_003060.3:c.-75del' in list(results.keys()) assert results['NM_003060.3:c.-75del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003060.3:c.-75del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003060.3:c.-75del']['alt_genomic_loci'] == [] @@ -16522,7 +16522,7 @@ def test_variant290(self): assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} assert results['NM_003060.3:c.-75del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003051.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003060.3'} - assert 'NM_001308122.1:c.-75del' in results.keys() + assert 'NM_001308122.1:c.-75del' in list(results.keys()) assert results['NM_001308122.1:c.-75del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001308122.1:c.-75del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001308122.1:c.-75del']['alt_genomic_loci'] == [] @@ -16544,10 +16544,10 @@ def test_variant290(self): def test_variant291(self): variant = '5-148406482-T-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_024577.3:c.2813A>G' in results.keys() + assert 'NM_024577.3:c.2813A>G' in list(results.keys()) assert results['NM_024577.3:c.2813A>G']['hgvs_lrg_transcript_variant'] == 'LRG_269t1:c.2813A>G' assert results['NM_024577.3:c.2813A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024577.3:c.2813A>G']['alt_genomic_loci'] == [] @@ -16559,19 +16559,19 @@ def test_variant291(self): assert results['NM_024577.3:c.2813A>G']['hgvs_lrg_variant'] == 'LRG_269:g.41256A>G' assert results['NM_024577.3:c.2813A>G']['hgvs_transcript_variant'] == 'NM_024577.3:c.2813A>G' assert results['NM_024577.3:c.2813A>G']['hgvs_refseqgene_variant'] == 'NG_007947.2:g.41256A>G' - assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.148406482T>C', 'vcf': {'chr': 'chr5', 'ref': u'T', 'pos': '148406482', 'alt': u'C'}} - assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.149026919T>C', 'vcf': {'chr': 'chr5', 'ref': u'T', 'pos': '149026919', 'alt': u'C'}} - assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.148406482T>C', 'vcf': {'chr': '5', 'ref': u'T', 'pos': '148406482', 'alt': u'C'}} - assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.149026919T>C', 'vcf': {'chr': '5', 'ref': u'T', 'pos': '149026919', 'alt': u'C'}} + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.148406482T>C', 'vcf': {'chr': 'chr5', 'ref': 'T', 'pos': '148406482', 'alt': 'C'}} + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.149026919T>C', 'vcf': {'chr': 'chr5', 'ref': 'T', 'pos': '149026919', 'alt': 'C'}} + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.148406482T>C', 'vcf': {'chr': '5', 'ref': 'T', 'pos': '148406482', 'alt': 'C'}} + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.149026919T>C', 'vcf': {'chr': '5', 'ref': 'T', 'pos': '149026919', 'alt': 'C'}} assert results['NM_024577.3:c.2813A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007947.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_078853.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024577.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_269.xml'} def test_variant292(self): variant = '6-110036337-T-TCAG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_014845.5:c.123_124insCAG' in results.keys() + assert 'NM_014845.5:c.123_124insCAG' in list(results.keys()) assert results['NM_014845.5:c.123_124insCAG']['hgvs_lrg_transcript_variant'] == 'LRG_241t1:c.123_124insCAG' assert results['NM_014845.5:c.123_124insCAG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014845.5:c.123_124insCAG']['alt_genomic_loci'] == [] @@ -16594,9 +16594,9 @@ def test_variant292(self): def test_variant293(self): variant = '6-110036337-TGAT-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_014845.5:c.124_126del' in results.keys() + assert 'NM_014845.5:c.124_126del' in list(results.keys()) assert results['NM_014845.5:c.124_126del']['hgvs_lrg_transcript_variant'] == 'LRG_241t1:c.124_126del' assert results['NM_014845.5:c.124_126del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014845.5:c.124_126del']['alt_genomic_loci'] == [] @@ -16619,10 +16619,10 @@ def test_variant293(self): def test_variant294(self): variant = '6-152651802-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_182961.3:c.14018G>T' in results.keys() + assert 'NM_182961.3:c.14018G>T' in list(results.keys()) assert results['NM_182961.3:c.14018G>T']['hgvs_lrg_transcript_variant'] == 'LRG_427t1:c.14018G>T' assert results['NM_182961.3:c.14018G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_182961.3:c.14018G>T']['alt_genomic_loci'] == [] @@ -16634,13 +16634,13 @@ def test_variant294(self): assert results['NM_182961.3:c.14018G>T']['hgvs_lrg_variant'] == 'LRG_427:g.311733G>T' assert results['NM_182961.3:c.14018G>T']['hgvs_transcript_variant'] == 'NM_182961.3:c.14018G>T' assert results['NM_182961.3:c.14018G>T']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.311733G>T' - assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152651802', 'alt': u'A'}} - assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152330667', 'alt': u'A'}} - assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152651802', 'alt': u'A'}} - assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152330667', 'alt': u'A'}} + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152651802', 'alt': 'A'}} + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152330667', 'alt': 'A'}} + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152651802', 'alt': 'A'}} + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152330667', 'alt': 'A'}} assert results['NM_182961.3:c.14018G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} - assert 'NM_033071.3:c.13805G>T' in results.keys() + assert 'NM_033071.3:c.13805G>T' in list(results.keys()) assert results['NM_033071.3:c.13805G>T']['hgvs_lrg_transcript_variant'] == 'LRG_427t2:c.13805G>T' assert results['NM_033071.3:c.13805G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_033071.3:c.13805G>T']['alt_genomic_loci'] == [] @@ -16652,20 +16652,20 @@ def test_variant294(self): assert results['NM_033071.3:c.13805G>T']['hgvs_lrg_variant'] == 'LRG_427:g.311733G>T' assert results['NM_033071.3:c.13805G>T']['hgvs_transcript_variant'] == 'NM_033071.3:c.13805G>T' assert results['NM_033071.3:c.13805G>T']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.311733G>T' - assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152651802', 'alt': u'A'}} - assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152330667', 'alt': u'A'}} - assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152651802', 'alt': u'A'}} - assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152330667', 'alt': u'A'}} + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152651802', 'alt': 'A'}} + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152330667', 'alt': 'A'}} + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152651802', 'alt': 'A'}} + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152330667', 'alt': 'A'}} assert results['NM_033071.3:c.13805G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} def test_variant295(self): variant = '6-152737643-C-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_033071.3:c.5950G>C' in results.keys() + assert 'NM_033071.3:c.5950G>C' in list(results.keys()) assert results['NM_033071.3:c.5950G>C']['hgvs_lrg_transcript_variant'] == 'LRG_427t2:c.5950G>C' assert results['NM_033071.3:c.5950G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_033071.3:c.5950G>C']['alt_genomic_loci'] == [] @@ -16677,13 +16677,13 @@ def test_variant295(self): assert results['NM_033071.3:c.5950G>C']['hgvs_lrg_variant'] == 'LRG_427:g.225892G>C' assert results['NM_033071.3:c.5950G>C']['hgvs_transcript_variant'] == 'NM_033071.3:c.5950G>C' assert results['NM_033071.3:c.5950G>C']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.225892G>C' - assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152737643', 'alt': u'G'}} - assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152416508', 'alt': u'G'}} - assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152737643', 'alt': u'G'}} - assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152416508', 'alt': u'G'}} + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152737643', 'alt': 'G'}} + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152416508', 'alt': 'G'}} + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152737643', 'alt': 'G'}} + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152416508', 'alt': 'G'}} assert results['NM_033071.3:c.5950G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} - assert 'NM_182961.3:c.5929G>C' in results.keys() + assert 'NM_182961.3:c.5929G>C' in list(results.keys()) assert results['NM_182961.3:c.5929G>C']['hgvs_lrg_transcript_variant'] == 'LRG_427t1:c.5929G>C' assert results['NM_182961.3:c.5929G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_182961.3:c.5929G>C']['alt_genomic_loci'] == [] @@ -16695,19 +16695,19 @@ def test_variant295(self): assert results['NM_182961.3:c.5929G>C']['hgvs_lrg_variant'] == 'LRG_427:g.225892G>C' assert results['NM_182961.3:c.5929G>C']['hgvs_transcript_variant'] == 'NM_182961.3:c.5929G>C' assert results['NM_182961.3:c.5929G>C']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.225892G>C' - assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152737643', 'alt': u'G'}} - assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': 'chr6', 'ref': u'C', 'pos': '152416508', 'alt': u'G'}} - assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152737643', 'alt': u'G'}} - assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': '6', 'ref': u'C', 'pos': '152416508', 'alt': u'G'}} + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152737643', 'alt': 'G'}} + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152416508', 'alt': 'G'}} + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152737643', 'alt': 'G'}} + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152416508', 'alt': 'G'}} assert results['NM_182961.3:c.5929G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} def test_variant296(self): variant = '7-6026775-T-C' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001322012.1:c.688A>G' in results.keys() + assert 'NM_001322012.1:c.688A>G' in list(results.keys()) assert results['NM_001322012.1:c.688A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322012.1:c.688A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322012.1:c.688A>G']['alt_genomic_loci'] == [] @@ -16719,13 +16719,13 @@ def test_variant296(self): assert results['NM_001322012.1:c.688A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322012.1:c.688A>G']['hgvs_transcript_variant'] == 'NM_001322012.1:c.688A>G' assert results['NM_001322012.1:c.688A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322012.1:c.688A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308941.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322012.1'} - assert 'NM_001322010.1:c.1060A>G' in results.keys() + assert 'NM_001322010.1:c.1060A>G' in list(results.keys()) assert results['NM_001322010.1:c.1060A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322010.1:c.1060A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322010.1:c.1060A>G']['alt_genomic_loci'] == [] @@ -16737,13 +16737,13 @@ def test_variant296(self): assert results['NM_001322010.1:c.1060A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322010.1:c.1060A>G']['hgvs_transcript_variant'] == 'NM_001322010.1:c.1060A>G' assert results['NM_001322010.1:c.1060A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322010.1:c.1060A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308939.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322010.1'} - assert 'NM_001322015.1:c.1312A>G' in results.keys() + assert 'NM_001322015.1:c.1312A>G' in list(results.keys()) assert results['NM_001322015.1:c.1312A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322015.1:c.1312A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322015.1:c.1312A>G']['alt_genomic_loci'] == [] @@ -16755,13 +16755,13 @@ def test_variant296(self): assert results['NM_001322015.1:c.1312A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322015.1:c.1312A>G']['hgvs_transcript_variant'] == 'NM_001322015.1:c.1312A>G' assert results['NM_001322015.1:c.1312A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322015.1:c.1312A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308944.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322015.1'} - assert 'NM_001322003.1:c.1216A>G' in results.keys() + assert 'NM_001322003.1:c.1216A>G' in list(results.keys()) assert results['NM_001322003.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322003.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322003.1:c.1216A>G']['alt_genomic_loci'] == [] @@ -16773,13 +16773,13 @@ def test_variant296(self): assert results['NM_001322003.1:c.1216A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322003.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322003.1:c.1216A>G' assert results['NM_001322003.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322003.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308932.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322003.1'} - assert 'NM_001322014.1:c.1621A>G' in results.keys() + assert 'NM_001322014.1:c.1621A>G' in list(results.keys()) assert results['NM_001322014.1:c.1621A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322014.1:c.1621A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322014.1:c.1621A>G']['alt_genomic_loci'] == [] @@ -16791,13 +16791,13 @@ def test_variant296(self): assert results['NM_001322014.1:c.1621A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322014.1:c.1621A>G']['hgvs_transcript_variant'] == 'NM_001322014.1:c.1621A>G' assert results['NM_001322014.1:c.1621A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322014.1:c.1621A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308943.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322014.1'} - assert 'NM_001322004.1:c.1216A>G' in results.keys() + assert 'NM_001322004.1:c.1216A>G' in list(results.keys()) assert results['NM_001322004.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322004.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322004.1:c.1216A>G']['alt_genomic_loci'] == [] @@ -16809,13 +16809,13 @@ def test_variant296(self): assert results['NM_001322004.1:c.1216A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322004.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322004.1:c.1216A>G' assert results['NM_001322004.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322004.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308933.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322004.1'} - assert 'NM_001322008.1:c.1303A>G' in results.keys() + assert 'NM_001322008.1:c.1303A>G' in list(results.keys()) assert results['NM_001322008.1:c.1303A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322008.1:c.1303A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322008.1:c.1303A>G']['alt_genomic_loci'] == [] @@ -16827,13 +16827,13 @@ def test_variant296(self): assert results['NM_001322008.1:c.1303A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322008.1:c.1303A>G']['hgvs_transcript_variant'] == 'NM_001322008.1:c.1303A>G' assert results['NM_001322008.1:c.1303A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322008.1:c.1303A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308937.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322008.1'} - assert 'NM_001322006.1:c.1465A>G' in results.keys() + assert 'NM_001322006.1:c.1465A>G' in list(results.keys()) assert results['NM_001322006.1:c.1465A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322006.1:c.1465A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322006.1:c.1465A>G']['alt_genomic_loci'] == [] @@ -16845,13 +16845,13 @@ def test_variant296(self): assert results['NM_001322006.1:c.1465A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322006.1:c.1465A>G']['hgvs_transcript_variant'] == 'NM_001322006.1:c.1465A>G' assert results['NM_001322006.1:c.1465A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322006.1:c.1465A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308935.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322006.1'} - assert 'NM_001322013.1:c.1048A>G' in results.keys() + assert 'NM_001322013.1:c.1048A>G' in list(results.keys()) assert results['NM_001322013.1:c.1048A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322013.1:c.1048A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322013.1:c.1048A>G']['alt_genomic_loci'] == [] @@ -16863,13 +16863,13 @@ def test_variant296(self): assert results['NM_001322013.1:c.1048A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322013.1:c.1048A>G']['hgvs_transcript_variant'] == 'NM_001322013.1:c.1048A>G' assert results['NM_001322013.1:c.1048A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322013.1:c.1048A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308942.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322013.1'} - assert 'NM_001322009.1:c.1216A>G' in results.keys() + assert 'NM_001322009.1:c.1216A>G' in list(results.keys()) assert results['NM_001322009.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322009.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322009.1:c.1216A>G']['alt_genomic_loci'] == [] @@ -16881,13 +16881,13 @@ def test_variant296(self): assert results['NM_001322009.1:c.1216A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322009.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322009.1:c.1216A>G' assert results['NM_001322009.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322009.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308938.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322009.1'} - assert 'NR_003085.2:n.1703G=' in results.keys() + assert 'NR_003085.2:n.1703G=' in list(results.keys()) assert results['NR_003085.2:n.1703G=']['hgvs_lrg_transcript_variant'] == '' assert results['NR_003085.2:n.1703G=']['refseqgene_context_intronic_sequence'] == '' assert results['NR_003085.2:n.1703G=']['alt_genomic_loci'] == [] @@ -16899,14 +16899,14 @@ def test_variant296(self): assert results['NR_003085.2:n.1703G=']['hgvs_lrg_variant'] == '' assert results['NR_003085.2:n.1703G=']['hgvs_transcript_variant'] == 'NR_003085.2:n.1703G=' assert results['NR_003085.2:n.1703G=']['hgvs_refseqgene_variant'] == '' - assert results['NR_003085.2:n.1703G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': u'C'}} - assert 'hg38' not in results['NR_003085.2:n.1703G=']['primary_assembly_loci'].keys() - assert results['NR_003085.2:n.1703G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': u'C'}} - assert 'grch38' not in results['NR_003085.2:n.1703G=']['primary_assembly_loci'].keys() + assert results['NR_003085.2:n.1703G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert 'hg38' not in list(results['NR_003085.2:n.1703G=']['primary_assembly_loci'].keys()) + assert results['NR_003085.2:n.1703G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert 'grch38' not in list(results['NR_003085.2:n.1703G=']['primary_assembly_loci'].keys()) assert results['NR_003085.2:n.1703G=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_003085.2'} assert results['flag'] == 'gene_variant' - assert 'NM_001322005.1:c.1216A>G' in results.keys() + assert 'NM_001322005.1:c.1216A>G' in list(results.keys()) assert results['NM_001322005.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322005.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322005.1:c.1216A>G']['alt_genomic_loci'] == [] @@ -16918,13 +16918,13 @@ def test_variant296(self): assert results['NM_001322005.1:c.1216A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322005.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322005.1:c.1216A>G' assert results['NM_001322005.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322005.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308934.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322005.1'} - assert 'NM_001322007.1:c.1303A>G' in results.keys() + assert 'NM_001322007.1:c.1303A>G' in list(results.keys()) assert results['NM_001322007.1:c.1303A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322007.1:c.1303A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322007.1:c.1303A>G']['alt_genomic_loci'] == [] @@ -16936,13 +16936,13 @@ def test_variant296(self): assert results['NM_001322007.1:c.1303A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322007.1:c.1303A>G']['hgvs_transcript_variant'] == 'NM_001322007.1:c.1303A>G' assert results['NM_001322007.1:c.1303A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322007.1:c.1303A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308936.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322007.1'} - assert 'NM_000535.5:c.1621G=' in results.keys() + assert 'NM_000535.5:c.1621G=' in list(results.keys()) assert results['NM_000535.5:c.1621G=']['hgvs_lrg_transcript_variant'] == 'LRG_161t1:c.1621G=' assert results['NM_000535.5:c.1621G=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000535.5:c.1621G=']['alt_genomic_loci'] == [] @@ -16954,13 +16954,13 @@ def test_variant296(self): assert results['NM_000535.5:c.1621G=']['hgvs_lrg_variant'] == 'LRG_161:g.26963G=' assert results['NM_000535.5:c.1621G=']['hgvs_transcript_variant'] == 'NM_000535.5:c.1621G=' assert results['NM_000535.5:c.1621G=']['hgvs_refseqgene_variant'] == 'NG_008466.1:g.26963G=' - assert results['NM_000535.5:c.1621G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': u'C'}} - assert 'hg38' not in results['NM_000535.5:c.1621G=']['primary_assembly_loci'].keys() - assert results['NM_000535.5:c.1621G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': u'C'}} - assert 'grch38' not in results['NM_000535.5:c.1621G=']['primary_assembly_loci'].keys() + assert results['NM_000535.5:c.1621G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert 'hg38' not in list(results['NM_000535.5:c.1621G=']['primary_assembly_loci'].keys()) + assert results['NM_000535.5:c.1621G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert 'grch38' not in list(results['NM_000535.5:c.1621G=']['primary_assembly_loci'].keys()) assert results['NM_000535.5:c.1621G=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008466.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_161.xml'} - assert 'NR_136154.1:n.1708A>G' in results.keys() + assert 'NR_136154.1:n.1708A>G' in list(results.keys()) assert results['NR_136154.1:n.1708A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NR_136154.1:n.1708A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NR_136154.1:n.1708A>G']['alt_genomic_loci'] == [] @@ -16972,13 +16972,13 @@ def test_variant296(self): assert results['NR_136154.1:n.1708A>G']['hgvs_lrg_variant'] == '' assert results['NR_136154.1:n.1708A>G']['hgvs_transcript_variant'] == 'NR_136154.1:n.1708A>G' assert results['NR_136154.1:n.1708A>G']['hgvs_refseqgene_variant'] == '' - assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NR_136154.1:n.1708A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_136154.1'} - assert 'NM_001322011.1:c.688A>G' in results.keys() + assert 'NM_001322011.1:c.688A>G' in list(results.keys()) assert results['NM_001322011.1:c.688A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322011.1:c.688A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322011.1:c.688A>G']['alt_genomic_loci'] == [] @@ -16990,13 +16990,13 @@ def test_variant296(self): assert results['NM_001322011.1:c.688A>G']['hgvs_lrg_variant'] == '' assert results['NM_001322011.1:c.688A>G']['hgvs_transcript_variant'] == 'NM_001322011.1:c.688A>G' assert results['NM_001322011.1:c.688A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_001322011.1:c.688A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322011.1'} - assert 'NM_000535.6:c.1621A>G' in results.keys() + assert 'NM_000535.6:c.1621A>G' in list(results.keys()) assert results['NM_000535.6:c.1621A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000535.6:c.1621A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000535.6:c.1621A>G']['alt_genomic_loci'] == [] @@ -17008,19 +17008,19 @@ def test_variant296(self): assert results['NM_000535.6:c.1621A>G']['hgvs_lrg_variant'] == '' assert results['NM_000535.6:c.1621A>G']['hgvs_transcript_variant'] == 'NM_000535.6:c.1621A>G' assert results['NM_000535.6:c.1621A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} - assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '6026775', 'alt': u'C'}} - assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': u'T', 'pos': '5987144', 'alt': u'C'}} + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} assert results['NM_000535.6:c.1621A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.6'} def test_variant297(self): variant = '7-55242465-GGAATTAAGAGAAGCA-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001346900.1:c.2077_2091del' in results.keys() + assert 'NM_001346900.1:c.2077_2091del' in list(results.keys()) assert results['NM_001346900.1:c.2077_2091del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346900.1:c.2077_2091del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346900.1:c.2077_2091del']['alt_genomic_loci'] == [] @@ -17038,7 +17038,7 @@ def test_variant297(self): assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} assert results['NM_001346900.1:c.2077_2091del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1'} - assert 'NM_001346898.1:c.2236_2250del' in results.keys() + assert 'NM_001346898.1:c.2236_2250del' in list(results.keys()) assert results['NM_001346898.1:c.2236_2250del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346898.1:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346898.1:c.2236_2250del']['alt_genomic_loci'] == [] @@ -17056,7 +17056,7 @@ def test_variant297(self): assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} assert results['NM_001346898.1:c.2236_2250del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1'} - assert 'NM_001346941.1:c.1435_1449del' in results.keys() + assert 'NM_001346941.1:c.1435_1449del' in list(results.keys()) assert results['NM_001346941.1:c.1435_1449del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346941.1:c.1435_1449del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346941.1:c.1435_1449del']['alt_genomic_loci'] == [] @@ -17075,7 +17075,7 @@ def test_variant297(self): assert results['NM_001346941.1:c.1435_1449del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001346899.1:c.2101_2115del' in results.keys() + assert 'NM_001346899.1:c.2101_2115del' in list(results.keys()) assert results['NM_001346899.1:c.2101_2115del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346899.1:c.2101_2115del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346899.1:c.2101_2115del']['alt_genomic_loci'] == [] @@ -17093,7 +17093,7 @@ def test_variant297(self): assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} assert results['NM_001346899.1:c.2101_2115del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1'} - assert 'NM_001346897.1:c.2101_2115del' in results.keys() + assert 'NM_001346897.1:c.2101_2115del' in list(results.keys()) assert results['NM_001346897.1:c.2101_2115del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346897.1:c.2101_2115del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346897.1:c.2101_2115del']['alt_genomic_loci'] == [] @@ -17111,7 +17111,7 @@ def test_variant297(self): assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} assert results['NM_001346897.1:c.2101_2115del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333826.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346897.1'} - assert 'NM_005228.3:c.2236_2250del' in results.keys() + assert 'NM_005228.3:c.2236_2250del' in list(results.keys()) assert results['NM_005228.3:c.2236_2250del']['hgvs_lrg_transcript_variant'] == 'LRG_304t1:c.2236_2250del' assert results['NM_005228.3:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005228.3:c.2236_2250del']['alt_genomic_loci'] == [] @@ -17129,7 +17129,7 @@ def test_variant297(self): assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} assert results['NM_005228.3:c.2236_2250del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007726.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_304.xml'} - assert 'NM_005228.4:c.2236_2250del' in results.keys() + assert 'NM_005228.4:c.2236_2250del' in list(results.keys()) assert results['NM_005228.4:c.2236_2250del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005228.4:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005228.4:c.2236_2250del']['alt_genomic_loci'] == [] @@ -17151,9 +17151,9 @@ def test_variant297(self): def test_variant298(self): variant = '7-55248992-T-TTCCAGGAAGCCT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_005228.3:c.2284-5_2290dup' in results.keys() + assert 'NM_005228.3:c.2284-5_2290dup' in list(results.keys()) assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == 'LRG_304t1:c.2284-5_2290dup' assert results['NM_005228.3:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == 'NG_007726.3(NM_005228.3):c.2284-5_2290dup' assert results['NM_005228.3:c.2284-5_2290dup']['alt_genomic_loci'] == [] @@ -17171,7 +17171,7 @@ def test_variant298(self): assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} assert results['NM_005228.3:c.2284-5_2290dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007726.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_304.xml'} - assert 'NM_001346899.1:c.2149-5_2155dup' in results.keys() + assert 'NM_001346899.1:c.2149-5_2155dup' in list(results.keys()) assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346899.1:c.2149-5_2155dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346899.1:c.2149-5_2155dup']['alt_genomic_loci'] == [] @@ -17189,7 +17189,7 @@ def test_variant298(self): assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} assert results['NM_001346899.1:c.2149-5_2155dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1'} - assert 'NM_005228.4:c.2284-5_2290dup' in results.keys() + assert 'NM_005228.4:c.2284-5_2290dup' in list(results.keys()) assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005228.4:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005228.4:c.2284-5_2290dup']['alt_genomic_loci'] == [] @@ -17207,7 +17207,7 @@ def test_variant298(self): assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} assert results['NM_005228.4:c.2284-5_2290dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.4'} - assert 'NM_001346898.1:c.2284-5_2290dup' in results.keys() + assert 'NM_001346898.1:c.2284-5_2290dup' in list(results.keys()) assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346898.1:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346898.1:c.2284-5_2290dup']['alt_genomic_loci'] == [] @@ -17225,7 +17225,7 @@ def test_variant298(self): assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} assert results['NM_001346898.1:c.2284-5_2290dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1'} - assert 'NM_001346941.1:c.1483-5_1489dup' in results.keys() + assert 'NM_001346941.1:c.1483-5_1489dup' in list(results.keys()) assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346941.1:c.1483-5_1489dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346941.1:c.1483-5_1489dup']['alt_genomic_loci'] == [] @@ -17244,7 +17244,7 @@ def test_variant298(self): assert results['NM_001346941.1:c.1483-5_1489dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001346900.1:c.2125-5_2131dup' in results.keys() + assert 'NM_001346900.1:c.2125-5_2131dup' in list(results.keys()) assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346900.1:c.2125-5_2131dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346900.1:c.2125-5_2131dup']['alt_genomic_loci'] == [] @@ -17262,7 +17262,7 @@ def test_variant298(self): assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} assert results['NM_001346900.1:c.2125-5_2131dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1'} - assert 'NR_047551.1:n.1272_1283dup' in results.keys() + assert 'NR_047551.1:n.1272_1283dup' in list(results.keys()) assert results['NR_047551.1:n.1272_1283dup']['hgvs_lrg_transcript_variant'] == '' assert results['NR_047551.1:n.1272_1283dup']['refseqgene_context_intronic_sequence'] == '' assert results['NR_047551.1:n.1272_1283dup']['alt_genomic_loci'] == [] @@ -17280,7 +17280,7 @@ def test_variant298(self): assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} assert results['NR_047551.1:n.1272_1283dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_047551.1'} - assert 'NM_001346897.1:c.2149-5_2155dup' in results.keys() + assert 'NM_001346897.1:c.2149-5_2155dup' in list(results.keys()) assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346897.1:c.2149-5_2155dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346897.1:c.2149-5_2155dup']['alt_genomic_loci'] == [] @@ -17302,9 +17302,9 @@ def test_variant298(self): def test_variant299(self): variant = '7-75932111-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001540.4:c.82C>A' in results.keys() + assert 'NM_001540.4:c.82C>A' in list(results.keys()) assert results['NM_001540.4:c.82C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001540.4:c.82C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001540.4:c.82C>A']['alt_genomic_loci'] == [] @@ -17323,7 +17323,7 @@ def test_variant299(self): assert results['NM_001540.4:c.82C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001531.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001540.4'} assert results['flag'] == 'gene_variant' - assert 'NM_001540.3:c.82C>A' in results.keys() + assert 'NM_001540.3:c.82C>A' in list(results.keys()) assert results['NM_001540.3:c.82C>A']['hgvs_lrg_transcript_variant'] == 'LRG_248t1:c.82C>A' assert results['NM_001540.3:c.82C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001540.3:c.82C>A']['alt_genomic_loci'] == [] @@ -17345,10 +17345,10 @@ def test_variant299(self): def test_variant300(self): variant = '7-91652178-A-AAAC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_005751.4:c.4004_4006dup' in results.keys() + assert 'NM_005751.4:c.4004_4006dup' in list(results.keys()) assert results['NM_005751.4:c.4004_4006dup']['hgvs_lrg_transcript_variant'] == 'LRG_331t1:c.4004_4006dup' assert results['NM_005751.4:c.4004_4006dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005751.4:c.4004_4006dup']['alt_genomic_loci'] == [] @@ -17366,7 +17366,7 @@ def test_variant300(self): assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '92022864', 'alt': 'AAAC'}} assert results['NM_005751.4:c.4004_4006dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011623.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005742.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005751.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_331.xml'} - assert 'NM_147185.2:c.4004_4006dup' in results.keys() + assert 'NM_147185.2:c.4004_4006dup' in list(results.keys()) assert results['NM_147185.2:c.4004_4006dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_147185.2:c.4004_4006dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_147185.2:c.4004_4006dup']['alt_genomic_loci'] == [] @@ -17388,10 +17388,10 @@ def test_variant300(self): def test_variant301(self): variant = '7-117199644-ATCT-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NR_149084.1:n.221+1140_221+1142del' in results.keys() + assert 'NR_149084.1:n.221+1140_221+1142del' in list(results.keys()) assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_lrg_transcript_variant'] == '' assert results['NR_149084.1:n.221+1140_221+1142del']['refseqgene_context_intronic_sequence'] == '' assert results['NR_149084.1:n.221+1140_221+1142del']['alt_genomic_loci'] == [] @@ -17409,7 +17409,7 @@ def test_variant301(self): assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559591_117559593del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} assert results['NR_149084.1:n.221+1140_221+1142del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_149084.1'} - assert 'NM_000492.3:c.1521_1523del' in results.keys() + assert 'NM_000492.3:c.1521_1523del' in list(results.keys()) assert results['NM_000492.3:c.1521_1523del']['hgvs_lrg_transcript_variant'] == 'LRG_663t1:c.1521_1523del' assert results['NM_000492.3:c.1521_1523del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000492.3:c.1521_1523del']['alt_genomic_loci'] == [] @@ -17431,9 +17431,9 @@ def test_variant301(self): def test_variant302(self): variant = '7-140453136-AC-CT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NR_148928.1:n.2896_2897delinsAG' in results.keys() + assert 'NR_148928.1:n.2896_2897delinsAG' in list(results.keys()) assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_lrg_transcript_variant'] == '' assert results['NR_148928.1:n.2896_2897delinsAG']['refseqgene_context_intronic_sequence'] == '' assert results['NR_148928.1:n.2896_2897delinsAG']['alt_genomic_loci'] == [] @@ -17445,13 +17445,13 @@ def test_variant302(self): assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_lrg_variant'] == '' assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_transcript_variant'] == 'NR_148928.1:n.2896_2897delinsAG' assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_refseqgene_variant'] == '' - assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} - assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} - assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} - assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} assert results['NR_148928.1:n.2896_2897delinsAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} - assert 'NM_004333.4:c.1798_1799delinsAG' in results.keys() + assert 'NM_004333.4:c.1798_1799delinsAG' in list(results.keys()) assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1798_1799delinsAG' assert results['NM_004333.4:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004333.4:c.1798_1799delinsAG']['alt_genomic_loci'] == [] @@ -17463,13 +17463,13 @@ def test_variant302(self): assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_transcript_variant'] == 'NM_004333.4:c.1798_1799delinsAG' assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_refseqgene_variant'] == 'NG_007873.2:g.176428_176429delinsAG' - assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} - assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} - assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} - assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} assert results['NM_004333.4:c.1798_1799delinsAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4'} - assert 'NM_004333.5:c.1798_1799delinsAG' in results.keys() + assert 'NM_004333.5:c.1798_1799delinsAG' in list(results.keys()) assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004333.5:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004333.5:c.1798_1799delinsAG']['alt_genomic_loci'] == [] @@ -17481,14 +17481,14 @@ def test_variant302(self): assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_transcript_variant'] == 'NM_004333.5:c.1798_1799delinsAG' assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} - assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} - assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} - assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} assert results['NM_004333.5:c.1798_1799delinsAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5'} assert results['flag'] == 'gene_variant' - assert 'NM_001354609.1:c.1798_1799delinsAG' in results.keys() + assert 'NM_001354609.1:c.1798_1799delinsAG' in list(results.keys()) assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354609.1:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001354609.1:c.1798_1799delinsAG']['alt_genomic_loci'] == [] @@ -17500,19 +17500,19 @@ def test_variant302(self): assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_transcript_variant'] == 'NM_001354609.1:c.1798_1799delinsAG' assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} - assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} - assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': u'CT'}} - assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': u'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} assert results['NM_001354609.1:c.1798_1799delinsAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1'} def test_variant303(self): variant = '7-140453136-A-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001354609.1:c.1799T>A' in results.keys() + assert 'NM_001354609.1:c.1799T>A' in list(results.keys()) assert results['NM_001354609.1:c.1799T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354609.1:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001354609.1:c.1799T>A']['alt_genomic_loci'] == [] @@ -17524,13 +17524,13 @@ def test_variant303(self): assert results['NM_001354609.1:c.1799T>A']['hgvs_lrg_variant'] == '' assert results['NM_001354609.1:c.1799T>A']['hgvs_transcript_variant'] == 'NM_001354609.1:c.1799T>A' assert results['NM_001354609.1:c.1799T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} - assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} - assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} - assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} assert results['NM_001354609.1:c.1799T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1'} - assert 'NR_148928.1:n.2897T>A' in results.keys() + assert 'NR_148928.1:n.2897T>A' in list(results.keys()) assert results['NR_148928.1:n.2897T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_148928.1:n.2897T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_148928.1:n.2897T>A']['alt_genomic_loci'] == [] @@ -17542,13 +17542,13 @@ def test_variant303(self): assert results['NR_148928.1:n.2897T>A']['hgvs_lrg_variant'] == '' assert results['NR_148928.1:n.2897T>A']['hgvs_transcript_variant'] == 'NR_148928.1:n.2897T>A' assert results['NR_148928.1:n.2897T>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} - assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} - assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} - assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} assert results['NR_148928.1:n.2897T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} - assert 'NM_004333.5:c.1799T>A' in results.keys() + assert 'NM_004333.5:c.1799T>A' in list(results.keys()) assert results['NM_004333.5:c.1799T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004333.5:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004333.5:c.1799T>A']['alt_genomic_loci'] == [] @@ -17560,14 +17560,14 @@ def test_variant303(self): assert results['NM_004333.5:c.1799T>A']['hgvs_lrg_variant'] == '' assert results['NM_004333.5:c.1799T>A']['hgvs_transcript_variant'] == 'NM_004333.5:c.1799T>A' assert results['NM_004333.5:c.1799T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} - assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} - assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} - assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} assert results['NM_004333.5:c.1799T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5'} assert results['flag'] == 'gene_variant' - assert 'NM_004333.4:c.1799T>A' in results.keys() + assert 'NM_004333.4:c.1799T>A' in list(results.keys()) assert results['NM_004333.4:c.1799T>A']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1799T>A' assert results['NM_004333.4:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004333.4:c.1799T>A']['alt_genomic_loci'] == [] @@ -17579,19 +17579,19 @@ def test_variant303(self): assert results['NM_004333.4:c.1799T>A']['hgvs_lrg_variant'] == '' assert results['NM_004333.4:c.1799T>A']['hgvs_transcript_variant'] == 'NM_004333.4:c.1799T>A' assert results['NM_004333.4:c.1799T>A']['hgvs_refseqgene_variant'] == 'NG_007873.2:g.176429T>A' - assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} - assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} - assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140453136', 'alt': u'T'}} - assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': u'A', 'pos': '140753336', 'alt': u'T'}} + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} assert results['NM_004333.4:c.1799T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4'} def test_variant304(self): variant = '7-140453137-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NR_148928.1:n.2896G>A' in results.keys() + assert 'NR_148928.1:n.2896G>A' in list(results.keys()) assert results['NR_148928.1:n.2896G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_148928.1:n.2896G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_148928.1:n.2896G>A']['alt_genomic_loci'] == [] @@ -17603,13 +17603,13 @@ def test_variant304(self): assert results['NR_148928.1:n.2896G>A']['hgvs_lrg_variant'] == '' assert results['NR_148928.1:n.2896G>A']['hgvs_transcript_variant'] == 'NR_148928.1:n.2896G>A' assert results['NR_148928.1:n.2896G>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} - assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} - assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} - assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} assert results['NR_148928.1:n.2896G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} - assert 'NM_004333.5:c.1798G>A' in results.keys() + assert 'NM_004333.5:c.1798G>A' in list(results.keys()) assert results['NM_004333.5:c.1798G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004333.5:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004333.5:c.1798G>A']['alt_genomic_loci'] == [] @@ -17621,13 +17621,13 @@ def test_variant304(self): assert results['NM_004333.5:c.1798G>A']['hgvs_lrg_variant'] == '' assert results['NM_004333.5:c.1798G>A']['hgvs_transcript_variant'] == 'NM_004333.5:c.1798G>A' assert results['NM_004333.5:c.1798G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} - assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} - assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} - assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} assert results['NM_004333.5:c.1798G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5'} - assert 'NM_004333.4:c.1798G>A' in results.keys() + assert 'NM_004333.4:c.1798G>A' in list(results.keys()) assert results['NM_004333.4:c.1798G>A']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1798G>A' assert results['NM_004333.4:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004333.4:c.1798G>A']['alt_genomic_loci'] == [] @@ -17639,13 +17639,13 @@ def test_variant304(self): assert results['NM_004333.4:c.1798G>A']['hgvs_lrg_variant'] == '' assert results['NM_004333.4:c.1798G>A']['hgvs_transcript_variant'] == 'NM_004333.4:c.1798G>A' assert results['NM_004333.4:c.1798G>A']['hgvs_refseqgene_variant'] == 'NG_007873.2:g.176428G>A' - assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} - assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} - assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} - assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} assert results['NM_004333.4:c.1798G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4'} - assert 'NM_001354609.1:c.1798G>A' in results.keys() + assert 'NM_001354609.1:c.1798G>A' in list(results.keys()) assert results['NM_001354609.1:c.1798G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354609.1:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001354609.1:c.1798G>A']['alt_genomic_loci'] == [] @@ -17657,10 +17657,10 @@ def test_variant304(self): assert results['NM_001354609.1:c.1798G>A']['hgvs_lrg_variant'] == '' assert results['NM_001354609.1:c.1798G>A']['hgvs_transcript_variant'] == 'NM_001354609.1:c.1798G>A' assert results['NM_001354609.1:c.1798G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} - assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} - assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140453137', 'alt': u'T'}} - assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': u'C', 'pos': '140753337', 'alt': u'T'}} + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} assert results['NM_001354609.1:c.1798G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1'} assert results['flag'] == 'gene_variant' @@ -17668,10 +17668,10 @@ def test_variant304(self): def test_variant305(self): variant = '7-143013488-A-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000083.2:c.180+3A>T' in results.keys() + assert 'NM_000083.2:c.180+3A>T' in list(results.keys()) assert results['NM_000083.2:c.180+3A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000083.2:c.180+3A>T']['refseqgene_context_intronic_sequence'] == 'NG_009815.1(NM_000083.2):c.180+3A>T' assert results['NM_000083.2:c.180+3A>T']['alt_genomic_loci'] == [] @@ -17689,7 +17689,7 @@ def test_variant305(self): assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '143316395', 'alt': 'T'}} assert results['NM_000083.2:c.180+3A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009815.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2'} - assert 'NR_046453.1:n.267+3A>T' in results.keys() + assert 'NR_046453.1:n.267+3A>T' in list(results.keys()) assert results['NR_046453.1:n.267+3A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_046453.1:n.267+3A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_046453.1:n.267+3A>T']['alt_genomic_loci'] == [] @@ -17711,9 +17711,9 @@ def test_variant305(self): def test_variant306(self): variant = '7-143018934-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NR_046453.1:n.776G>A' in results.keys() + assert 'NR_046453.1:n.776G>A' in list(results.keys()) assert results['NR_046453.1:n.776G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_046453.1:n.776G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_046453.1:n.776G>A']['alt_genomic_loci'] == [] @@ -17732,7 +17732,7 @@ def test_variant306(self): assert results['NR_046453.1:n.776G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1'} assert results['flag'] == 'gene_variant' - assert 'NM_000083.2:c.689G>A' in results.keys() + assert 'NM_000083.2:c.689G>A' in list(results.keys()) assert results['NM_000083.2:c.689G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000083.2:c.689G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000083.2:c.689G>A']['alt_genomic_loci'] == [] @@ -17754,10 +17754,10 @@ def test_variant306(self): def test_variant307(self): variant = '7-143048771-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NR_046453.1:n.2620C>T' in results.keys() + assert 'NR_046453.1:n.2620C>T' in list(results.keys()) assert results['NR_046453.1:n.2620C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_046453.1:n.2620C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_046453.1:n.2620C>T']['alt_genomic_loci'] == [] @@ -17775,7 +17775,7 @@ def test_variant307(self): assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '143351678', 'alt': 'T'}} assert results['NR_046453.1:n.2620C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1'} - assert 'NM_000083.2:c.2680C>T' in results.keys() + assert 'NM_000083.2:c.2680C>T' in list(results.keys()) assert results['NM_000083.2:c.2680C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000083.2:c.2680C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000083.2:c.2680C>T']['alt_genomic_loci'] == [] @@ -17797,9 +17797,9 @@ def test_variant307(self): def test_variant308(self): variant = '8-1871951-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_014629.3:c.2399C>T' in results.keys() + assert 'NM_014629.3:c.2399C>T' in list(results.keys()) assert results['NM_014629.3:c.2399C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014629.3:c.2399C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014629.3:c.2399C>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}] @@ -17817,7 +17817,7 @@ def test_variant308(self): assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} assert results['NM_014629.3:c.2399C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.3'} - assert 'NM_014629.2:c.2399C>T' in results.keys() + assert 'NM_014629.2:c.2399C>T' in list(results.keys()) assert results['NM_014629.2:c.2399C>T']['hgvs_lrg_transcript_variant'] == 'LRG_234t1:c.2399C>T' assert results['NM_014629.2:c.2399C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014629.2:c.2399C>T']['alt_genomic_loci'] == [] @@ -17830,12 +17830,12 @@ def test_variant308(self): assert results['NM_014629.2:c.2399C>T']['hgvs_transcript_variant'] == 'NM_014629.2:c.2399C>T' assert results['NM_014629.2:c.2399C>T']['hgvs_refseqgene_variant'] == 'NG_008480.1:g.104803C>T' assert results['NM_014629.2:c.2399C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} - assert 'hg38' not in results['NM_014629.2:c.2399C>T']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_014629.2:c.2399C>T']['primary_assembly_loci'].keys()) assert results['NM_014629.2:c.2399C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} - assert 'grch38' not in results['NM_014629.2:c.2399C>T']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_014629.2:c.2399C>T']['primary_assembly_loci'].keys()) assert results['NM_014629.2:c.2399C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008480.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_234.xml'} - assert 'NM_001308153.1:c.2471C>T' in results.keys() + assert 'NM_001308153.1:c.2471C>T' in list(results.keys()) assert results['NM_001308153.1:c.2471C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001308153.1:c.2471C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001308153.1:c.2471C>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}] @@ -17854,7 +17854,7 @@ def test_variant308(self): assert results['NM_001308153.1:c.2471C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295082.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308153.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001308152.1:c.2285C>T' in results.keys() + assert 'NM_001308152.1:c.2285C>T' in list(results.keys()) assert results['NM_001308152.1:c.2285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001308152.1:c.2285C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001308152.1:c.2285C>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}] @@ -17876,9 +17876,9 @@ def test_variant308(self): def test_variant309(self): variant = '9-13112056-T-TG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001261407.1:c.5504dup' in results.keys() + assert 'NM_001261407.1:c.5504dup' in list(results.keys()) assert results['NM_001261407.1:c.5504dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001261407.1:c.5504dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001261407.1:c.5504dup']['alt_genomic_loci'] == [] @@ -17896,7 +17896,7 @@ def test_variant309(self): assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} assert results['NM_001261407.1:c.5504dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248336.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261407.1'} - assert 'NM_001330637.1:c.5690dup' in results.keys() + assert 'NM_001330637.1:c.5690dup' in list(results.keys()) assert results['NM_001330637.1:c.5690dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330637.1:c.5690dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330637.1:c.5690dup']['alt_genomic_loci'] == [] @@ -17914,7 +17914,7 @@ def test_variant309(self): assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} assert results['NM_001330637.1:c.5690dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317566.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330637.1'} - assert 'NM_001261406.1:c.5591dup' in results.keys() + assert 'NM_001261406.1:c.5591dup' in list(results.keys()) assert results['NM_001261406.1:c.5591dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001261406.1:c.5591dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001261406.1:c.5591dup']['alt_genomic_loci'] == [] @@ -17933,7 +17933,7 @@ def test_variant309(self): assert results['NM_001261406.1:c.5591dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248335.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261406.1'} assert results['flag'] == 'gene_variant' - assert 'NM_003829.4:c.5603dup' in results.keys() + assert 'NM_003829.4:c.5603dup' in list(results.keys()) assert results['NM_003829.4:c.5603dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003829.4:c.5603dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003829.4:c.5603dup']['alt_genomic_loci'] == [] @@ -17955,9 +17955,9 @@ def test_variant309(self): def test_variant310(self): variant = '9-21971208-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_058197.4:c.*74-1G>T' in results.keys() + assert 'NM_058197.4:c.*74-1G>T' in list(results.keys()) assert results['NM_058197.4:c.*74-1G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_058197.4:c.*74-1G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_058197.4:c.*74-1G>T']['alt_genomic_loci'] == [] @@ -17969,13 +17969,13 @@ def test_variant310(self): assert results['NM_058197.4:c.*74-1G>T']['hgvs_lrg_variant'] == '' assert results['NM_058197.4:c.*74-1G>T']['hgvs_transcript_variant'] == 'NM_058197.4:c.*74-1G>T' assert results['NM_058197.4:c.*74-1G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} - assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} - assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} - assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} assert results['NM_058197.4:c.*74-1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_478104.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_058197.4'} - assert 'NM_000077.4:c.151-1G>T' in results.keys() + assert 'NM_000077.4:c.151-1G>T' in list(results.keys()) assert results['NM_000077.4:c.151-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_11t1:c.151-1G>T' assert results['NM_000077.4:c.151-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007485.1(NM_000077.4):c.151-1G>T' assert results['NM_000077.4:c.151-1G>T']['alt_genomic_loci'] == [] @@ -17987,13 +17987,13 @@ def test_variant310(self): assert results['NM_000077.4:c.151-1G>T']['hgvs_lrg_variant'] == 'LRG_11:g.28283G>T' assert results['NM_000077.4:c.151-1G>T']['hgvs_transcript_variant'] == 'NM_000077.4:c.151-1G>T' assert results['NM_000077.4:c.151-1G>T']['hgvs_refseqgene_variant'] == 'NG_007485.1:g.28283G>T' - assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} - assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} - assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} - assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} assert results['NM_000077.4:c.151-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007485.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000068.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000077.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_11.xml'} - assert 'NM_001363763.1:c.-3-1G>T' in results.keys() + assert 'NM_001363763.1:c.-3-1G>T' in list(results.keys()) assert results['NM_001363763.1:c.-3-1G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363763.1:c.-3-1G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363763.1:c.-3-1G>T']['alt_genomic_loci'] == [] @@ -18005,13 +18005,13 @@ def test_variant310(self): assert results['NM_001363763.1:c.-3-1G>T']['hgvs_lrg_variant'] == '' assert results['NM_001363763.1:c.-3-1G>T']['hgvs_transcript_variant'] == 'NM_001363763.1:c.-3-1G>T' assert results['NM_001363763.1:c.-3-1G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} - assert 'hg38' not in results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci'].keys() - assert results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} - assert 'grch38' not in results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci'].keys() + assert results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci'].keys()) + assert results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci'].keys()) assert results['NM_001363763.1:c.-3-1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350692.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363763.1'} - assert 'NM_001195132.1:c.151-1G>T' in results.keys() + assert 'NM_001195132.1:c.151-1G>T' in list(results.keys()) assert results['NM_001195132.1:c.151-1G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001195132.1:c.151-1G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001195132.1:c.151-1G>T']['alt_genomic_loci'] == [] @@ -18023,13 +18023,13 @@ def test_variant310(self): assert results['NM_001195132.1:c.151-1G>T']['hgvs_lrg_variant'] == '' assert results['NM_001195132.1:c.151-1G>T']['hgvs_transcript_variant'] == 'NM_001195132.1:c.151-1G>T' assert results['NM_001195132.1:c.151-1G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} - assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} - assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} - assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} assert results['NM_001195132.1:c.151-1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001182061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001195132.1'} - assert 'NM_058195.3:c.194-1G>T' in results.keys() + assert 'NM_058195.3:c.194-1G>T' in list(results.keys()) assert results['NM_058195.3:c.194-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_11t2:c.194-1G>T' assert results['NM_058195.3:c.194-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007485.1(NM_058195.3):c.194-1G>T' assert results['NM_058195.3:c.194-1G>T']['alt_genomic_loci'] == [] @@ -18041,10 +18041,10 @@ def test_variant310(self): assert results['NM_058195.3:c.194-1G>T']['hgvs_lrg_variant'] == 'LRG_11:g.28283G>T' assert results['NM_058195.3:c.194-1G>T']['hgvs_transcript_variant'] == 'NM_058195.3:c.194-1G>T' assert results['NM_058195.3:c.194-1G>T']['hgvs_refseqgene_variant'] == 'NG_007485.1:g.28283G>T' - assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} - assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} - assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971208', 'alt': u'A'}} - assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': u'C', 'pos': '21971209', 'alt': u'A'}} + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} assert results['NM_058195.3:c.194-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007485.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_478102.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_058195.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_11.xml'} assert results['flag'] == 'gene_variant' @@ -18052,9 +18052,9 @@ def test_variant310(self): def test_variant311(self): variant = '9-35683240-T-TG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_001301227.1:c.773-3dup' in results.keys() + assert 'NM_001301227.1:c.773-3dup' in list(results.keys()) assert results['NM_001301227.1:c.773-3dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001301227.1:c.773-3dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001301227.1:c.773-3dup']['alt_genomic_loci'] == [] @@ -18072,7 +18072,7 @@ def test_variant311(self): assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} assert results['NM_001301227.1:c.773-3dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288156.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301227.1'} - assert 'NM_001301226.1:c.772+1002dup' in results.keys() + assert 'NM_001301226.1:c.772+1002dup' in list(results.keys()) assert results['NM_001301226.1:c.772+1002dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001301226.1:c.772+1002dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001301226.1:c.772+1002dup']['alt_genomic_loci'] == [] @@ -18090,7 +18090,7 @@ def test_variant311(self): assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} assert results['NM_001301226.1:c.772+1002dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288155.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301226.1'} - assert 'NM_213674.1:c.772+1002dup' in results.keys() + assert 'NM_213674.1:c.772+1002dup' in list(results.keys()) assert results['NM_213674.1:c.772+1002dup']['hgvs_lrg_transcript_variant'] == 'LRG_680t1:c.772+1002dup' assert results['NM_213674.1:c.772+1002dup']['refseqgene_context_intronic_sequence'] == 'NG_011620.1(NM_213674.1):c.772+1002dup' assert results['NM_213674.1:c.772+1002dup']['alt_genomic_loci'] == [] @@ -18108,7 +18108,7 @@ def test_variant311(self): assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} assert results['NM_213674.1:c.772+1002dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011620.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_998839.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_213674.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_680.xml'} - assert 'NM_003289.3:c.773-3dup' in results.keys() + assert 'NM_003289.3:c.773-3dup' in list(results.keys()) assert results['NM_003289.3:c.773-3dup']['hgvs_lrg_transcript_variant'] == 'LRG_680t2:c.773-3dup' assert results['NM_003289.3:c.773-3dup']['refseqgene_context_intronic_sequence'] == 'NG_011620.1(NM_003289.3):c.773-3dup' assert results['NM_003289.3:c.773-3dup']['alt_genomic_loci'] == [] @@ -18131,9 +18131,9 @@ def test_variant311(self): def test_variant312(self): variant = '9-135796754-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000368.4:c.733C>T' in results.keys() + assert 'NM_000368.4:c.733C>T' in list(results.keys()) assert results['NM_000368.4:c.733C>T']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.733C>T' assert results['NM_000368.4:c.733C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000368.4:c.733C>T']['alt_genomic_loci'] == [] @@ -18145,13 +18145,13 @@ def test_variant312(self): assert results['NM_000368.4:c.733C>T']['hgvs_lrg_variant'] == 'LRG_486:g.28267C>T' assert results['NM_000368.4:c.733C>T']['hgvs_transcript_variant'] == 'NM_000368.4:c.733C>T' assert results['NM_000368.4:c.733C>T']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.28267C>T' - assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} - assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} - assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} - assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} assert results['NM_000368.4:c.733C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} - assert 'NM_001162426.1:c.733C>T' in results.keys() + assert 'NM_001162426.1:c.733C>T' in list(results.keys()) assert results['NM_001162426.1:c.733C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162426.1:c.733C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001162426.1:c.733C>T']['alt_genomic_loci'] == [] @@ -18163,14 +18163,14 @@ def test_variant312(self): assert results['NM_001162426.1:c.733C>T']['hgvs_lrg_variant'] == '' assert results['NM_001162426.1:c.733C>T']['hgvs_transcript_variant'] == 'NM_001162426.1:c.733C>T' assert results['NM_001162426.1:c.733C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} - assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} - assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} - assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} assert results['NM_001162426.1:c.733C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1'} assert results['flag'] == 'gene_variant' - assert 'NM_001362177.1:c.370C>T' in results.keys() + assert 'NM_001362177.1:c.370C>T' in list(results.keys()) assert results['NM_001362177.1:c.370C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001362177.1:c.370C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001362177.1:c.370C>T']['alt_genomic_loci'] == [] @@ -18182,13 +18182,13 @@ def test_variant312(self): assert results['NM_001362177.1:c.370C>T']['hgvs_lrg_variant'] == '' assert results['NM_001362177.1:c.370C>T']['hgvs_transcript_variant'] == 'NM_001362177.1:c.370C>T' assert results['NM_001362177.1:c.370C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} - assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} - assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} - assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} assert results['NM_001362177.1:c.370C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1'} - assert 'NM_001162427.1:c.580C>T' in results.keys() + assert 'NM_001162427.1:c.580C>T' in list(results.keys()) assert results['NM_001162427.1:c.580C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162427.1:c.580C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001162427.1:c.580C>T']['alt_genomic_loci'] == [] @@ -18200,20 +18200,20 @@ def test_variant312(self): assert results['NM_001162427.1:c.580C>T']['hgvs_lrg_variant'] == '' assert results['NM_001162427.1:c.580C>T']['hgvs_transcript_variant'] == 'NM_001162427.1:c.580C>T' assert results['NM_001162427.1:c.580C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} - assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} - assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '135796754', 'alt': u'A'}} - assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': u'G', 'pos': '132921367', 'alt': u'A'}} + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} assert results['NM_001162427.1:c.580C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1'} def test_variant313(self): variant = 'HG536_PATCH-10391-AC-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_005247.2:c.616del' in results.keys() + assert 'NM_005247.2:c.616del' in list(results.keys()) assert results['NM_005247.2:c.616del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005247.2:c.616del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005247.2:c.616del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003571046.1:g.10392del', 'vcf': {'chr': 'HG536_PATCH', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571046.1:g.10392del', 'vcf': {'chr': 'NW_003571046.1', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}] @@ -18235,12 +18235,12 @@ def test_variant313(self): def test_variant314(self): variant = 'HG865_PATCH-33547-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NR_110766.1:n.833+969C>T' in results.keys() + assert 'NR_110766.1:n.833+969C>T' in list(results.keys()) assert results['NR_110766.1:n.833+969C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_110766.1:n.833+969C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_110766.1:n.833+969C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}] + assert results['NR_110766.1:n.833+969C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}] assert results['NR_110766.1:n.833+969C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 3, non-coding RNA' assert results['NR_110766.1:n.833+969C>T']['gene_symbol'] == 'SHANK2' assert results['NR_110766.1:n.833+969C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} @@ -18249,16 +18249,16 @@ def test_variant314(self): assert results['NR_110766.1:n.833+969C>T']['hgvs_lrg_variant'] == '' assert results['NR_110766.1:n.833+969C>T']['hgvs_transcript_variant'] == 'NR_110766.1:n.833+969C>T' assert results['NR_110766.1:n.833+969C>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} - assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} - assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} - assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} assert results['NR_110766.1:n.833+969C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_110766.1'} - assert 'NM_012309.4:c.2566C>T' in results.keys() + assert 'NM_012309.4:c.2566C>T' in list(results.keys()) assert results['NM_012309.4:c.2566C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_012309.4:c.2566C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_012309.4:c.2566C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}] + assert results['NM_012309.4:c.2566C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}] assert results['NM_012309.4:c.2566C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' assert results['NM_012309.4:c.2566C>T']['gene_symbol'] == 'SHANK2' assert results['NM_012309.4:c.2566C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.(Leu856=)', 'slr': 'NP_036441.2:p.(L856=)'} @@ -18267,16 +18267,16 @@ def test_variant314(self): assert results['NM_012309.4:c.2566C>T']['hgvs_lrg_variant'] == '' assert results['NM_012309.4:c.2566C>T']['hgvs_transcript_variant'] == 'NM_012309.4:c.2566C>T' assert results['NM_012309.4:c.2566C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70336423G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70336423', 'alt': u'A'}} - assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} - assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70336423G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70336423', 'alt': u'A'}} - assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70336423G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70336423', 'alt': 'A'}} + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70336423G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70336423', 'alt': 'A'}} + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} assert results['NM_012309.4:c.2566C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} - assert 'NM_133266.4:c.802C>T' in results.keys() + assert 'NM_133266.4:c.802C>T' in list(results.keys()) assert results['NM_133266.4:c.802C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133266.4:c.802C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_133266.4:c.802C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}] + assert results['NM_133266.4:c.802C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}] assert results['NM_133266.4:c.802C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA' assert results['NM_133266.4:c.802C>T']['gene_symbol'] == 'SHANK2' assert results['NM_133266.4:c.802C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_573573.2:p.(Leu268=)', 'slr': 'NP_573573.2:p.(L268=)'} @@ -18285,17 +18285,17 @@ def test_variant314(self): assert results['NM_133266.4:c.802C>T']['hgvs_lrg_variant'] == '' assert results['NM_133266.4:c.802C>T']['hgvs_transcript_variant'] == 'NM_133266.4:c.802C>T' assert results['NM_133266.4:c.802C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} - assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} - assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} - assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70489334', 'alt': u'A'}} + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} assert results['NM_133266.4:c.802C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.4'} assert results['flag'] == 'gene_variant' - assert 'NM_133266.3:c.802C>T' in results.keys() + assert 'NM_133266.3:c.802C>T' in list(results.keys()) assert results['NM_133266.3:c.802C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133266.3:c.802C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_133266.3:c.802C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'G', 'pos': '33547', 'alt': u'A'}}}] + assert results['NM_133266.3:c.802C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}] assert results['NM_133266.3:c.802C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA' assert results['NM_133266.3:c.802C>T']['gene_symbol'] == 'SHANK2' assert results['NM_133266.3:c.802C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_573573.2:p.(Leu268=)', 'slr': 'NP_573573.2:p.(L268=)'} @@ -18304,23 +18304,23 @@ def test_variant314(self): assert results['NM_133266.3:c.802C>T']['hgvs_lrg_variant'] == '' assert results['NM_133266.3:c.802C>T']['hgvs_transcript_variant'] == 'NM_133266.3:c.802C>T' assert results['NM_133266.3:c.802C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_133266.3:c.802C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} - assert 'hg38' not in results['NM_133266.3:c.802C>T']['primary_assembly_loci'].keys() - assert results['NM_133266.3:c.802C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '70335439', 'alt': u'A'}} - assert 'grch38' not in results['NM_133266.3:c.802C>T']['primary_assembly_loci'].keys() + assert results['NM_133266.3:c.802C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_133266.3:c.802C>T']['primary_assembly_loci'].keys()) + assert results['NM_133266.3:c.802C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_133266.3:c.802C>T']['primary_assembly_loci'].keys()) assert results['NM_133266.3:c.802C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.3'} def test_variant315(self): variant = 'HG865_PATCH-569441-G-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_012309.4:c.960C>A' in results.keys() + assert 'NM_012309.4:c.960C>A' in list(results.keys()) assert results['NM_012309.4:c.960C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_012309.4:c.960C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_012309.4:c.960C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'G', 'pos': '569441', 'alt': u'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'G', 'pos': '569441', 'alt': u'T'}}}] + assert results['NM_012309.4:c.960C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '569441', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '569441', 'alt': 'T'}}}] assert results['NM_012309.4:c.960C>A']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' assert results['NM_012309.4:c.960C>A']['gene_symbol'] == 'SHANK2' assert results['NM_012309.4:c.960C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.(Tyr320Ter)', 'slr': 'NP_036441.2:p.(Y320*)'} @@ -18329,23 +18329,23 @@ def test_variant315(self): assert results['NM_012309.4:c.960C>A']['hgvs_lrg_variant'] == '' assert results['NM_012309.4:c.960C>A']['hgvs_transcript_variant'] == 'NM_012309.4:c.960C>A' assert results['NM_012309.4:c.960C>A']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['NM_012309.4:c.960C>A']['primary_assembly_loci'].keys() - assert results['NM_012309.4:c.960C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71075228G>T', 'vcf': {'chr': 'chr11', 'ref': u'G', 'pos': '71075228', 'alt': u'T'}} - assert 'grch37' not in results['NM_012309.4:c.960C>A']['primary_assembly_loci'].keys() - assert results['NM_012309.4:c.960C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71075228G>T', 'vcf': {'chr': '11', 'ref': u'G', 'pos': '71075228', 'alt': u'T'}} + assert 'hg19' not in list(results['NM_012309.4:c.960C>A']['primary_assembly_loci'].keys()) + assert results['NM_012309.4:c.960C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71075228G>T', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '71075228', 'alt': 'T'}} + assert 'grch37' not in list(results['NM_012309.4:c.960C>A']['primary_assembly_loci'].keys()) + assert results['NM_012309.4:c.960C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71075228G>T', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '71075228', 'alt': 'T'}} assert results['NM_012309.4:c.960C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} def test_variant316(self): variant = 'HG865_PATCH-574546-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_012309.4:c.913-5058G>A' in results.keys() + assert 'NM_012309.4:c.913-5058G>A' in list(results.keys()) assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_012309.4:c.913-5058G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': u'C', 'pos': '574546', 'alt': u'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': u'C', 'pos': '574546', 'alt': u'T'}}}] + assert results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}] assert results['NM_012309.4:c.913-5058G>A']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' assert results['NM_012309.4:c.913-5058G>A']['gene_symbol'] == 'SHANK2' assert results['NM_012309.4:c.913-5058G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.?', 'slr': 'NP_036441.2:p.?'} @@ -18354,20 +18354,20 @@ def test_variant316(self): assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_variant'] == '' assert results['NM_012309.4:c.913-5058G>A']['hgvs_transcript_variant'] == 'NM_012309.4:c.913-5058G>A' assert results['NM_012309.4:c.913-5058G>A']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys() - assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': 'chr11', 'ref': u'C', 'pos': '71080333', 'alt': u'T'}} - assert 'grch37' not in results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys() - assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': '11', 'ref': u'C', 'pos': '71080333', 'alt': u'T'}} + assert 'hg19' not in list(results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys()) + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '71080333', 'alt': 'T'}} + assert 'grch37' not in list(results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys()) + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '71080333', 'alt': 'T'}} assert results['NM_012309.4:c.913-5058G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} def test_variant317(self): variant = 'HSCHR1_1_CTG31-133178-TAG-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_020699.2:c.802_803insTT' in results.keys() + assert 'NM_020699.2:c.802_803insTT' in list(results.keys()) assert results['NM_020699.2:c.802_803insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020699.2:c.802_803insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020699.2:c.802_803insTT']['alt_genomic_loci'] == [] @@ -18379,23 +18379,23 @@ def test_variant317(self): assert results['NM_020699.2:c.802_803insTT']['hgvs_lrg_variant'] == '' assert results['NM_020699.2:c.802_803insTT']['hgvs_transcript_variant'] == 'NM_020699.2:c.802_803insTT' assert results['NM_020699.2:c.802_803insTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946delinsGAAG', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '153789945', 'alt': u'GAA'}} - assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '153817469', 'alt': u'GAA'}} - assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946delinsGAAG', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '153789945', 'alt': u'GAA'}} - assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '153817469', 'alt': u'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946delinsGAAG', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '153789945', 'alt': 'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '153817469', 'alt': 'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946delinsGAAG', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '153789945', 'alt': 'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '153817469', 'alt': 'GAA'}} assert results['NM_020699.2:c.802_803insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2'} def test_variant318(self): variant = 'HSCHR6_MHC_MANN_CTG1-3848158-T-G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_021983.4:c.490G>C' in results.keys() + assert 'NM_021983.4:c.490G>C' in list(results.keys()) assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': u'C', 'pos': '3884432', 'alt': u'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': u'C', 'pos': '3852542', 'alt': u'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': u'C', 'pos': '3853244', 'alt': u'G'}}}] + assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}] assert results['NM_021983.4:c.490G>C']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} @@ -18404,23 +18404,23 @@ def test_variant318(self): assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' assert results['NM_021983.4:c.490G>C']['hgvs_transcript_variant'] == 'NM_021983.4:c.490G>C' assert results['NM_021983.4:c.490G>C']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.5724C>G' - assert 'hg19' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() - assert 'hg38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() - assert 'grch37' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() - assert 'grch38' not in results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} def test_variant319(self): variant = 'HSCHR6_MHC_MANN_CTG1-3851043-C-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_021983.4:c.346G>T' in results.keys() + assert 'NM_021983.4:c.346G>T' in list(results.keys()) assert results['NM_021983.4:c.346G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021983.4:c.346G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.346G>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': u'C', 'pos': '3851043', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': u'C', 'pos': '3851043', 'alt': u'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': u'C', 'pos': '3845423', 'alt': u'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': u'C', 'pos': '3845423', 'alt': u'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': u'C', 'pos': '3887313', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': u'C', 'pos': '3887313', 'alt': u'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3855423', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': u'C', 'pos': '3855423', 'alt': u'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': u'C', 'pos': '3856125', 'alt': u'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': u'C', 'pos': '3856125', 'alt': u'A'}}}] + assert results['NM_021983.4:c.346G>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'C', 'pos': '3851043', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'C', 'pos': '3851043', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'C', 'pos': '3845423', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'C', 'pos': '3845423', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3887313', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3887313', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3855423', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3855423', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3856125', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3856125', 'alt': 'A'}}}] assert results['NM_021983.4:c.346G>T']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' assert results['NM_021983.4:c.346G>T']['gene_symbol'] == 'HLA-DRB4' assert results['NM_021983.4:c.346G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Glu116Ter)', 'slr': 'NP_068818.4:p.(E116*)'} @@ -18429,20 +18429,20 @@ def test_variant319(self): assert results['NM_021983.4:c.346G>T']['hgvs_lrg_variant'] == '' assert results['NM_021983.4:c.346G>T']['hgvs_transcript_variant'] == 'NM_021983.4:c.346G>T' assert results['NM_021983.4:c.346G>T']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.8605C>A' - assert 'hg19' not in results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys() - assert 'hg38' not in results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys() - assert 'grch37' not in results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys() - assert 'grch38' not in results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys() + assert 'hg19' not in list(results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys()) + assert 'hg38' not in list(results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys()) + assert 'grch37' not in list(results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys()) + assert 'grch38' not in list(results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys()) assert results['NM_021983.4:c.346G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} def test_variant320(self): variant = 'X-70443101-C-T' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001097642.2:c.-16-441C>T' in results.keys() + assert 'NM_001097642.2:c.-16-441C>T' in list(results.keys()) assert results['NM_001097642.2:c.-16-441C>T']['hgvs_lrg_transcript_variant'] == 'LRG_245t1:c.-16-441C>T' assert results['NM_001097642.2:c.-16-441C>T']['refseqgene_context_intronic_sequence'] == 'NG_008357.1(NM_001097642.2):c.-16-441C>T' assert results['NM_001097642.2:c.-16-441C>T']['alt_genomic_loci'] == [] @@ -18460,7 +18460,7 @@ def test_variant320(self): assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '71223251', 'alt': 'T'}} assert results['NM_001097642.2:c.-16-441C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008357.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001091111.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001097642.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_245.xml'} - assert 'NM_000166.5:c.-101C>T' in results.keys() + assert 'NM_000166.5:c.-101C>T' in list(results.keys()) assert results['NM_000166.5:c.-101C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000166.5:c.-101C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000166.5:c.-101C>T']['alt_genomic_loci'] == [] @@ -18482,9 +18482,9 @@ def test_variant320(self): def test_variant321(self): variant = 'X-107845202-GACCACC-GACC,G' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_033380.2:c.2130_2135del' in results.keys() + assert 'NM_033380.2:c.2130_2135del' in list(results.keys()) assert results['NM_033380.2:c.2130_2135del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_033380.2:c.2130_2135del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_033380.2:c.2130_2135del']['alt_genomic_loci'] == [] @@ -18502,7 +18502,7 @@ def test_variant321(self): assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'X', 'ref': 'GACCACC', 'pos': '108601972', 'alt': 'G'}} assert results['NM_033380.2:c.2130_2135del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_203699.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033380.2'} - assert 'NM_000495.4:c.2130_2135del' in results.keys() + assert 'NM_000495.4:c.2130_2135del' in list(results.keys()) assert results['NM_000495.4:c.2130_2135del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000495.4:c.2130_2135del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000495.4:c.2130_2135del']['alt_genomic_loci'] == [] @@ -18521,7 +18521,7 @@ def test_variant321(self): assert results['NM_000495.4:c.2130_2135del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4'} assert results['flag'] == 'gene_variant' - assert 'NM_000495.4:c.2133_2135del' in results.keys() + assert 'NM_000495.4:c.2133_2135del' in list(results.keys()) assert results['NM_000495.4:c.2133_2135del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000495.4:c.2133_2135del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000495.4:c.2133_2135del']['alt_genomic_loci'] == [] @@ -18539,7 +18539,7 @@ def test_variant321(self): assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'X', 'ref': 'GACC', 'pos': '108601972', 'alt': 'G'}} assert results['NM_000495.4:c.2133_2135del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4'} - assert 'NM_033380.2:c.2133_2135del' in results.keys() + assert 'NM_033380.2:c.2133_2135del' in list(results.keys()) assert results['NM_033380.2:c.2133_2135del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_033380.2:c.2133_2135del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_033380.2:c.2133_2135del']['alt_genomic_loci'] == [] @@ -18561,12 +18561,12 @@ def test_variant321(self): def test_variant322(self): variant = 'X-153296777-G-A' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_004992.3:c.502C>T' in results.keys() + assert 'NM_004992.3:c.502C>T' in list(results.keys()) assert results['NM_004992.3:c.502C>T']['hgvs_lrg_transcript_variant'] == 'LRG_764t2:c.502C>T' assert results['NM_004992.3:c.502C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004992.3:c.502C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}] + assert results['NM_004992.3:c.502C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}] assert results['NM_004992.3:c.502C>T']['transcript_description'] == 'Homo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 1, mRNA' assert results['NM_004992.3:c.502C>T']['gene_symbol'] == 'MECP2' assert results['NM_004992.3:c.502C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004983.1(LRG_764p2):p.(Arg168Ter)', 'slr': 'NP_004983.1:p.(R168*)'} @@ -18575,17 +18575,17 @@ def test_variant322(self): assert results['NM_004992.3:c.502C>T']['hgvs_lrg_variant'] == 'LRG_764:g.110802C>T' assert results['NM_004992.3:c.502C>T']['hgvs_transcript_variant'] == 'NM_004992.3:c.502C>T' assert results['NM_004992.3:c.502C>T']['hgvs_refseqgene_variant'] == 'NG_007107.2:g.110802C>T' - assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} - assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} - assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} - assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} assert results['NM_004992.3:c.502C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007107.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004983.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004992.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_764.xml'} assert results['flag'] == 'gene_variant' - assert 'NM_001316337.1:c.223C>T' in results.keys() + assert 'NM_001316337.1:c.223C>T' in list(results.keys()) assert results['NM_001316337.1:c.223C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001316337.1:c.223C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001316337.1:c.223C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}] + assert results['NM_001316337.1:c.223C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}] assert results['NM_001316337.1:c.223C>T']['transcript_description'] == 'Homo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 3, mRNA' assert results['NM_001316337.1:c.223C>T']['gene_symbol'] == 'MECP2' assert results['NM_001316337.1:c.223C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001303266.1:p.(Arg75Ter)', 'slr': 'NP_001303266.1:p.(R75*)'} @@ -18594,16 +18594,16 @@ def test_variant322(self): assert results['NM_001316337.1:c.223C>T']['hgvs_lrg_variant'] == '' assert results['NM_001316337.1:c.223C>T']['hgvs_transcript_variant'] == 'NM_001316337.1:c.223C>T' assert results['NM_001316337.1:c.223C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} - assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} - assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} - assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} assert results['NM_001316337.1:c.223C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001303266.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001316337.1'} - assert 'NM_001110792.1:c.538C>T' in results.keys() + assert 'NM_001110792.1:c.538C>T' in list(results.keys()) assert results['NM_001110792.1:c.538C>T']['hgvs_lrg_transcript_variant'] == 'LRG_764t1:c.538C>T' assert results['NM_001110792.1:c.538C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001110792.1:c.538C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': u'G', 'pos': '1465305', 'alt': u'A'}}}] + assert results['NM_001110792.1:c.538C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}] assert results['NM_001110792.1:c.538C>T']['transcript_description'] == 'Homo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 2, mRNA' assert results['NM_001110792.1:c.538C>T']['gene_symbol'] == 'MECP2' assert results['NM_001110792.1:c.538C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001104262.1:p.(Arg180Ter)', 'slr': 'NP_001104262.1:p.(R180*)'} @@ -18612,20 +18612,20 @@ def test_variant322(self): assert results['NM_001110792.1:c.538C>T']['hgvs_lrg_variant'] == 'LRG_764:g.110802C>T' assert results['NM_001110792.1:c.538C>T']['hgvs_transcript_variant'] == 'NM_001110792.1:c.538C>T' assert results['NM_001110792.1:c.538C>T']['hgvs_refseqgene_variant'] == 'NG_007107.2:g.110802C>T' - assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} - assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} - assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '153296777', 'alt': u'A'}} - assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': u'G', 'pos': '154031326', 'alt': u'A'}} + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} assert results['NM_001110792.1:c.538C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007107.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001104262.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001110792.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_764.xml'} def test_variant323(self): variant = 'NM_198180.2:c.408_410delGTG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_198180.2:c.408_410del' in results.keys() + assert 'NM_198180.2:c.408_410del' in list(results.keys()) assert results['NM_198180.2:c.408_410del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198180.2:c.408_410del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198180.2:c.408_410del']['alt_genomic_loci'] == [] @@ -18647,9 +18647,9 @@ def test_variant323(self): def test_variant324(self): variant = 'NM_080877.2:c.1733_1735delinsTTT' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_080877.2:c.1733_1735delinsTTT' in results.keys() + assert 'NM_080877.2:c.1733_1735delinsTTT' in list(results.keys()) assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080877.2:c.1733_1735delinsTTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_080877.2:c.1733_1735delinsTTT']['alt_genomic_loci'] == [] @@ -18672,9 +18672,9 @@ def test_variant324(self): def test_variant325(self): variant = 'NM_080877.2:c.1735_1737delinsTGA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_080877.2:c.1735_1737delinsTGA' in results.keys() + assert 'NM_080877.2:c.1735_1737delinsTGA' in list(results.keys()) assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080877.2:c.1735_1737delinsTGA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_080877.2:c.1735_1737delinsTGA']['alt_genomic_loci'] == [] @@ -18697,10 +18697,10 @@ def test_variant325(self): def test_variant326(self): variant = 'NM_080877.2:c.1735_1737delinsTAATTGTTC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_080877.2:c.1735_1737delinsTAATTGTTC' in results.keys() + assert 'NM_080877.2:c.1735_1737delinsTAATTGTTC' in list(results.keys()) assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['alt_genomic_loci'] == [] @@ -18722,10 +18722,10 @@ def test_variant326(self): def test_variant327(self): variant = 'NM_080877.2:c.1737delinsATTGTTC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_080877.2:c.1737delinsATTGTTC' in results.keys() + assert 'NM_080877.2:c.1737delinsATTGTTC' in list(results.keys()) assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080877.2:c.1737delinsATTGTTC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_080877.2:c.1737delinsATTGTTC']['alt_genomic_loci'] == [] @@ -18747,10 +18747,10 @@ def test_variant327(self): def test_variant328(self): variant = 'NM_000088.3:c.4392_*2delinsAGAG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.4392_*2delinsAGAG' in results.keys() + assert 'NM_000088.3:c.4392_*2delinsAGAG' in list(results.keys()) assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4392_*2delinsAGAG' assert results['NM_000088.3:c.4392_*2delinsAGAG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.4392_*2delinsAGAG']['alt_genomic_loci'] == [] @@ -18762,20 +18762,20 @@ def test_variant328(self): assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_lrg_variant'] == 'LRG_1:g.21135_21140delinsAGAG' assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_transcript_variant'] == 'NM_000088.3:c.4392_*2delinsAGAG' assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.21135_21140delinsAGAG' - assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262861_48262866delinsCTCT', 'vcf': {'chr': 'chr17', 'ref': 'GTTTAC', 'pos': '48262861', 'alt': u'CTCT'}} - assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185500_50185505delinsCTCT', 'vcf': {'chr': 'chr17', 'ref': 'GTTTAC', 'pos': '50185500', 'alt': u'CTCT'}} - assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262861_48262866delinsCTCT', 'vcf': {'chr': '17', 'ref': 'GTTTAC', 'pos': '48262861', 'alt': u'CTCT'}} - assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185500_50185505delinsCTCT', 'vcf': {'chr': '17', 'ref': 'GTTTAC', 'pos': '50185500', 'alt': u'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262861_48262866delinsCTCT', 'vcf': {'chr': 'chr17', 'ref': 'GTTTAC', 'pos': '48262861', 'alt': 'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185500_50185505delinsCTCT', 'vcf': {'chr': 'chr17', 'ref': 'GTTTAC', 'pos': '50185500', 'alt': 'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262861_48262866delinsCTCT', 'vcf': {'chr': '17', 'ref': 'GTTTAC', 'pos': '48262861', 'alt': 'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185500_50185505delinsCTCT', 'vcf': {'chr': '17', 'ref': 'GTTTAC', 'pos': '50185500', 'alt': 'CTCT'}} assert results['NM_000088.3:c.4392_*2delinsAGAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant329(self): variant = 'NM_000088.3:c.589_591delinsAGAAGC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589_591delinsAGAAGC' in results.keys() + assert 'NM_000088.3:c.589_591delinsAGAAGC' in list(results.keys()) assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589_591delinsAGAAGC' assert results['NM_000088.3:c.589_591delinsAGAAGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589_591delinsAGAAGC']['alt_genomic_loci'] == [] @@ -18787,19 +18787,19 @@ def test_variant329(self): assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_lrg_variant'] == 'LRG_1:g.8638_8640delinsAGAAGC' assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_transcript_variant'] == 'NM_000088.3:c.589_591delinsAGAAGC' assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638_8640delinsAGAAGC' - assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275363delinsGCTTCT', 'vcf': {'chr': 'chr17', 'ref': 'ACC', 'pos': '48275361', 'alt': u'GCTTCT'}} - assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198002delinsGCTTCT', 'vcf': {'chr': 'chr17', 'ref': 'ACC', 'pos': '50198000', 'alt': u'GCTTCT'}} - assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275363delinsGCTTCT', 'vcf': {'chr': '17', 'ref': 'ACC', 'pos': '48275361', 'alt': u'GCTTCT'}} - assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198002delinsGCTTCT', 'vcf': {'chr': '17', 'ref': 'ACC', 'pos': '50198000', 'alt': u'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275363delinsGCTTCT', 'vcf': {'chr': 'chr17', 'ref': 'ACC', 'pos': '48275361', 'alt': 'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198002delinsGCTTCT', 'vcf': {'chr': 'chr17', 'ref': 'ACC', 'pos': '50198000', 'alt': 'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275363delinsGCTTCT', 'vcf': {'chr': '17', 'ref': 'ACC', 'pos': '48275361', 'alt': 'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198002delinsGCTTCT', 'vcf': {'chr': '17', 'ref': 'ACC', 'pos': '50198000', 'alt': 'GCTTCT'}} assert results['NM_000088.3:c.589_591delinsAGAAGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant330(self): variant = 'NM_000885.5:c.*2536delinsAGAAAAATCA' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_000885.5:c.*2536delinsAGAAAAATCA' in results.keys() + assert 'NM_000885.5:c.*2536delinsAGAAAAATCA' in list(results.keys()) assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['alt_genomic_loci'] == [] @@ -18822,9 +18822,9 @@ def test_variant330(self): def test_variant331(self): variant = 'NM_002693.2:c.-186_-185delinsCC' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) - assert 'NM_002693.2:c.-186_-185delinsCC' in results.keys() + assert 'NM_002693.2:c.-186_-185delinsCC' in list(results.keys()) assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_lrg_transcript_variant'] == 'LRG_765t1:c.-186_-185delinsCC' assert results['NM_002693.2:c.-186_-185delinsCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002693.2:c.-186_-185delinsCC']['alt_genomic_loci'] == [] @@ -18836,10 +18836,10 @@ def test_variant331(self): assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_lrg_variant'] == '' assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_transcript_variant'] == 'NM_002693.2:c.-186_-185delinsCC' assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_refseqgene_variant'] == 'NG_008218.1:g.5097_5098delinsCC' - assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89877929_89877930delinsGG', 'vcf': {'chr': 'chr15', 'ref': 'CT', 'pos': '89877929', 'alt': u'GG'}} - assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89334698_89334699delinsGG', 'vcf': {'chr': 'chr15', 'ref': 'CT', 'pos': '89334698', 'alt': u'GG'}} - assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89877929_89877930delinsGG', 'vcf': {'chr': '15', 'ref': 'CT', 'pos': '89877929', 'alt': u'GG'}} - assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89334698_89334699delinsGG', 'vcf': {'chr': '15', 'ref': 'CT', 'pos': '89334698', 'alt': u'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89877929_89877930delinsGG', 'vcf': {'chr': 'chr15', 'ref': 'CT', 'pos': '89877929', 'alt': 'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89334698_89334699delinsGG', 'vcf': {'chr': 'chr15', 'ref': 'CT', 'pos': '89334698', 'alt': 'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89877929_89877930delinsGG', 'vcf': {'chr': '15', 'ref': 'CT', 'pos': '89877929', 'alt': 'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89334698_89334699delinsGG', 'vcf': {'chr': '15', 'ref': 'CT', 'pos': '89334698', 'alt': 'GG'}} assert results['NM_002693.2:c.-186_-185delinsCC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008218.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2'} assert results['flag'] == 'gene_variant' @@ -18847,13 +18847,13 @@ def test_variant331(self): def test_variant332(self): variant = 'NG_009616.1:g.29052_29053insCTACATAG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001287344.1:c.690_690+1insCTACATAG' in results.keys() + assert 'NM_001287344.1:c.690_690+1insCTACATAG' in list(results.keys()) assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001287344.1:c.690_690+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}] + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}] assert results['NM_001287344.1:c.690_690+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 3, mRNA' assert results['NM_001287344.1:c.690_690+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274273.1:p.?', 'slr': 'NP_001274273.1:p.?'} @@ -18862,16 +18862,16 @@ def test_variant332(self): assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_lrg_variant'] == '' assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_001287344.1:c.690_690+1insCTACATAG' assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} assert results['NM_001287344.1:c.690_690+1insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274273.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287344.1'} - assert 'NM_001287345.1:c.588_588+1insCTACATAG' in results.keys() + assert 'NM_001287345.1:c.588_588+1insCTACATAG' in list(results.keys()) assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}] + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}] assert results['NM_001287345.1:c.588_588+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 2, mRNA' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274274.1:p.?', 'slr': 'NP_001274274.1:p.?'} @@ -18880,16 +18880,16 @@ def test_variant332(self): assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == '' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_001287345.1:c.588_588+1insCTACATAG' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} assert results['NM_001287345.1:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274274.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287345.1'} - assert 'NM_000061.2:c.588_588+1insCTACATAG' in results.keys() + assert 'NM_000061.2:c.588_588+1insCTACATAG' in list(results.keys()) assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}] + assert results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}] assert results['NM_000061.2:c.588_588+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA' assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} @@ -18898,23 +18898,23 @@ def test_variant332(self): assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == 'LRG_128:g.29052_29053insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} assert results['NM_000061.2:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009616.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_128.xml'} def test_variant333(self): variant = 'NM_000061.2:c.588_588+1insCTACATAG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000061.2:c.588_588+1insCTACATAG' in results.keys() + assert 'NM_000061.2:c.588_588+1insCTACATAG' in list(results.keys()) assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': u'CCTATGTAG'}}}] + assert results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}] assert results['NM_000061.2:c.588_588+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA' assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} @@ -18923,20 +18923,20 @@ def test_variant333(self): assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == 'LRG_128:g.29052_29053insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': u'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': u'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} assert results['NM_000061.2:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009616.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_128.xml'} def test_variant334(self): variant = 'NM_000061.2:c.588_589insCTACATAG' results = self.vv.validate(variant, 'GRCh37', 'all') - print results + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000061.2:c.588_589insCTACATAG' in results.keys() + assert 'NM_000061.2:c.588_589insCTACATAG' in list(results.keys()) assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_589insCTACATAG' assert results['NM_000061.2:c.588_589insCTACATAG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000061.2:c.588_589insCTACATAG']['alt_genomic_loci'] == [] @@ -18949,8 +18949,8 @@ def test_variant334(self): assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_589insCTACATAG' assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_refseqgene_variant'] == '' assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC', 'vcf': {'chr': 'chrX', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'pos': '100615751', 'alt': 'G'}} - assert 'hg38' not in results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci'].keys() + assert 'hg38' not in list(results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci'].keys()) assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC', 'vcf': {'chr': 'X', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'pos': '100615751', 'alt': 'G'}} - assert 'grch38' not in results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci'].keys() + assert 'grch38' not in list(results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci'].keys()) assert results['NM_000061.2:c.588_589insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2'} From 96550a37b13f5c895891ae08fd67e3fe2d722bd6 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 21 Feb 2019 16:48:48 +0000 Subject: [PATCH 033/223] Fixed bug with py3 division returning float --- VariantValidator/modules/vvMixinInit.py | 2 +- {VariantValidator/testing => test}/inputVariants.txt | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename {VariantValidator/testing => test}/inputVariants.txt (100%) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index d2a4b49f..183d010c 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -439,7 +439,7 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): # Nucleotide variant range aligns to the Termination codon if ins_thr[-3:] == 'Ter': posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( - ins_thr[:3]) + 'ext' + str(ins_thr[-3:]) + str((len(ins_thr) / 3) - 1) + ')' + ins_thr[:3]) + 'ext' + str(ins_thr[-3:]) + str(int((len(ins_thr) / 3)) - 1) + ')' # Nucleotide variant range spans the Termination codon else: posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( diff --git a/VariantValidator/testing/inputVariants.txt b/test/inputVariants.txt similarity index 100% rename from VariantValidator/testing/inputVariants.txt rename to test/inputVariants.txt From b5dfff8d2911442efb49ea7101ee543ed33fdefa Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 21 Feb 2019 16:49:36 +0000 Subject: [PATCH 034/223] Updated following changes in v0 branch --- VariantValidator/modules/vvMixinConverters.py | 2 - VariantValidator/modules/vvMixinCore.py | 10 +- .../testing/testOutputsMasterITS/variant0.txt | 171 - .../testing/testOutputsMasterITS/variant1.txt | 172 - .../testOutputsMasterITS/variant10.txt | 80 - .../testOutputsMasterITS/variant100.txt | 309 -- .../testOutputsMasterITS/variant101.txt | 316 -- .../testOutputsMasterITS/variant102.txt | 302 -- .../testOutputsMasterITS/variant103.txt | 309 -- .../testOutputsMasterITS/variant104.txt | 289 -- .../testOutputsMasterITS/variant105.txt | 309 -- .../testOutputsMasterITS/variant106.txt | 309 -- .../testOutputsMasterITS/variant107.txt | 312 -- .../testOutputsMasterITS/variant108.txt | 183 - .../testOutputsMasterITS/variant109.txt | 183 - .../testOutputsMasterITS/variant11.txt | 80 - .../testOutputsMasterITS/variant110.txt | 183 - .../testOutputsMasterITS/variant111.txt | 182 - .../testOutputsMasterITS/variant112.txt | 182 - .../testOutputsMasterITS/variant113.txt | 183 - .../testOutputsMasterITS/variant114.txt | 177 - .../testOutputsMasterITS/variant115.txt | 177 - .../testOutputsMasterITS/variant116.txt | 177 - .../testOutputsMasterITS/variant117.txt | 23 - .../testOutputsMasterITS/variant118.txt | 177 - .../testOutputsMasterITS/variant119.txt | 177 - .../testOutputsMasterITS/variant12.txt | 704 ---- .../testOutputsMasterITS/variant120.txt | 179 - .../testOutputsMasterITS/variant121.txt | 177 - .../testOutputsMasterITS/variant122.txt | 177 - .../testOutputsMasterITS/variant123.txt | 80 - .../testOutputsMasterITS/variant124.txt | 80 - .../testOutputsMasterITS/variant125.txt | 80 - .../testOutputsMasterITS/variant126.txt | 177 - .../testOutputsMasterITS/variant127.txt | 175 - .../testOutputsMasterITS/variant128.txt | 80 - .../testOutputsMasterITS/variant129.txt | 174 - .../testOutputsMasterITS/variant13.txt | 177 - .../testOutputsMasterITS/variant130.txt | 80 - .../testOutputsMasterITS/variant131.txt | 141 - .../testOutputsMasterITS/variant132.txt | 167 - .../testOutputsMasterITS/variant133.txt | 82 - .../testOutputsMasterITS/variant134.txt | 80 - .../testOutputsMasterITS/variant135.txt | 157 - .../testOutputsMasterITS/variant136.txt | 259 -- .../testOutputsMasterITS/variant137.txt | 304 -- .../testOutputsMasterITS/variant138.txt | 947 ----- .../testOutputsMasterITS/variant139.txt | 80 - .../testOutputsMasterITS/variant14.txt | 177 - .../testOutputsMasterITS/variant140.txt | 177 - .../testOutputsMasterITS/variant141.txt | 175 - .../testOutputsMasterITS/variant142.txt | 80 - .../testOutputsMasterITS/variant143.txt | 80 - .../testOutputsMasterITS/variant144.txt | 171 - .../testOutputsMasterITS/variant145.txt | 80 - .../testOutputsMasterITS/variant146.txt | 80 - .../testOutputsMasterITS/variant147.txt | 704 ---- .../testOutputsMasterITS/variant148.txt | 535 --- .../testOutputsMasterITS/variant149.txt | 144 - .../testOutputsMasterITS/variant15.txt | 177 - .../testOutputsMasterITS/variant150.txt | 80 - .../testOutputsMasterITS/variant151.txt | 184 - .../testOutputsMasterITS/variant152.txt | 183 - .../testOutputsMasterITS/variant153.txt | 267 -- .../testOutputsMasterITS/variant154.txt | 281 -- .../testOutputsMasterITS/variant155.txt | 259 -- .../testOutputsMasterITS/variant156.txt | 260 -- .../testOutputsMasterITS/variant157.txt | 1357 ------- .../testOutputsMasterITS/variant158.txt | 171 - .../testOutputsMasterITS/variant159.txt | 172 - .../testOutputsMasterITS/variant16.txt | 156 - .../testOutputsMasterITS/variant160.txt | 179 - .../testOutputsMasterITS/variant161.txt | 177 - .../testOutputsMasterITS/variant162.txt | 547 --- .../testOutputsMasterITS/variant163.txt | 402 --- .../testOutputsMasterITS/variant164.txt | 179 - .../testOutputsMasterITS/variant165.txt | 600 ---- .../testOutputsMasterITS/variant166.txt | 171 - .../testOutputsMasterITS/variant167.txt | 1405 -------- .../testOutputsMasterITS/variant168.txt | 947 ----- .../testOutputsMasterITS/variant169.txt | 174 - .../testOutputsMasterITS/variant17.txt | 80 - .../testOutputsMasterITS/variant170.txt | 183 - .../testOutputsMasterITS/variant171.txt | 443 --- .../testOutputsMasterITS/variant172.txt | 182 - .../testOutputsMasterITS/variant173.txt | 181 - .../testOutputsMasterITS/variant174.txt | 438 --- .../testOutputsMasterITS/variant175.txt | 183 - .../testOutputsMasterITS/variant176.txt | 182 - .../testOutputsMasterITS/variant177.txt | 182 - .../testOutputsMasterITS/variant178.txt | 182 - .../testOutputsMasterITS/variant179.txt | 174 - .../testOutputsMasterITS/variant18.txt | 82 - .../testOutputsMasterITS/variant180.txt | 506 --- .../testOutputsMasterITS/variant181.txt | 510 --- .../testOutputsMasterITS/variant182.txt | 510 --- .../testOutputsMasterITS/variant183.txt | 515 --- .../testOutputsMasterITS/variant184.txt | 439 --- .../testOutputsMasterITS/variant185.txt | 438 --- .../testOutputsMasterITS/variant186.txt | 442 --- .../testOutputsMasterITS/variant187.txt | 437 --- .../testOutputsMasterITS/variant188.txt | 180 - .../testOutputsMasterITS/variant189.txt | 121 - .../testOutputsMasterITS/variant19.txt | 80 - .../testOutputsMasterITS/variant190.txt | 23 - .../testOutputsMasterITS/variant191.txt | 23 - .../testOutputsMasterITS/variant192.txt | 121 - .../testOutputsMasterITS/variant193.txt | 82 - .../testOutputsMasterITS/variant194.txt | 23 - .../testOutputsMasterITS/variant195.txt | 23 - .../testOutputsMasterITS/variant196.txt | 23 - .../testOutputsMasterITS/variant197.txt | 80 - .../testOutputsMasterITS/variant198.txt | 606 ---- .../testOutputsMasterITS/variant199.txt | 286 -- .../testing/testOutputsMasterITS/variant2.txt | 174 - .../testOutputsMasterITS/variant20.txt | 82 - .../testOutputsMasterITS/variant200.txt | 543 --- .../testOutputsMasterITS/variant201.txt | 176 - .../testOutputsMasterITS/variant202.txt | 176 - .../testOutputsMasterITS/variant203.txt | 171 - .../testOutputsMasterITS/variant204.txt | 511 --- .../testOutputsMasterITS/variant205.txt | 303 -- .../testOutputsMasterITS/variant206.txt | 1001 ------ .../testOutputsMasterITS/variant207.txt | 632 ---- .../testOutputsMasterITS/variant208.txt | 374 -- .../testOutputsMasterITS/variant209.txt | 172 - .../testOutputsMasterITS/variant21.txt | 180 - .../testOutputsMasterITS/variant210.txt | 528 --- .../testOutputsMasterITS/variant211.txt | 215 -- .../testOutputsMasterITS/variant212.txt | 734 ---- .../testOutputsMasterITS/variant213.txt | 297 -- .../testOutputsMasterITS/variant214.txt | 3113 ----------------- .../testOutputsMasterITS/variant215.txt | 316 -- .../testOutputsMasterITS/variant216.txt | 485 --- .../testOutputsMasterITS/variant217.txt | 172 - .../testOutputsMasterITS/variant218.txt | 424 --- .../testOutputsMasterITS/variant219.txt | 408 --- .../testOutputsMasterITS/variant22.txt | 82 - .../testOutputsMasterITS/variant220.txt | 1185 ------- .../testOutputsMasterITS/variant221.txt | 418 --- .../testOutputsMasterITS/variant222.txt | 177 - .../testOutputsMasterITS/variant223.txt | 515 --- .../testOutputsMasterITS/variant224.txt | 540 --- .../testOutputsMasterITS/variant225.txt | 462 --- .../testOutputsMasterITS/variant226.txt | 418 --- .../testOutputsMasterITS/variant227.txt | 418 --- .../testOutputsMasterITS/variant228.txt | 781 ----- .../testOutputsMasterITS/variant229.txt | 786 ----- .../testOutputsMasterITS/variant23.txt | 80 - .../testOutputsMasterITS/variant230.txt | 171 - .../testOutputsMasterITS/variant231.txt | 511 --- .../testOutputsMasterITS/variant232.txt | 286 -- .../testOutputsMasterITS/variant233.txt | 1291 ------- .../testOutputsMasterITS/variant234.txt | 286 -- .../testOutputsMasterITS/variant235.txt | 286 -- .../testOutputsMasterITS/variant236.txt | 171 - .../testOutputsMasterITS/variant237.txt | 368 -- .../testOutputsMasterITS/variant238.txt | 544 --- .../testOutputsMasterITS/variant239.txt | 567 --- .../testOutputsMasterITS/variant24.txt | 82 - .../testOutputsMasterITS/variant240.txt | 580 --- .../testOutputsMasterITS/variant241.txt | 549 --- .../testOutputsMasterITS/variant242.txt | 1106 ------ .../testOutputsMasterITS/variant243.txt | 549 --- .../testOutputsMasterITS/variant244.txt | 1119 ------ .../testOutputsMasterITS/variant245.txt | 580 --- .../testOutputsMasterITS/variant246.txt | 567 --- .../testOutputsMasterITS/variant247.txt | 357 -- .../testOutputsMasterITS/variant248.txt | 354 -- .../testOutputsMasterITS/variant249.txt | 341 -- .../testOutputsMasterITS/variant25.txt | 80 - .../testOutputsMasterITS/variant250.txt | 1870 ---------- .../testOutputsMasterITS/variant251.txt | 1870 ---------- .../testOutputsMasterITS/variant252.txt | 642 ---- .../testOutputsMasterITS/variant253.txt | 777 ---- .../testOutputsMasterITS/variant254.txt | 748 ---- .../testOutputsMasterITS/variant255.txt | 483 --- .../testOutputsMasterITS/variant256.txt | 573 --- .../testOutputsMasterITS/variant257.txt | 180 - .../testOutputsMasterITS/variant258.txt | 177 - .../testOutputsMasterITS/variant259.txt | 177 - .../testOutputsMasterITS/variant26.txt | 80 - .../testOutputsMasterITS/variant260.txt | 172 - .../testOutputsMasterITS/variant261.txt | 171 - .../testOutputsMasterITS/variant262.txt | 914 ----- .../testOutputsMasterITS/variant263.txt | 171 - .../testOutputsMasterITS/variant264.txt | 156 - .../testOutputsMasterITS/variant265.txt | 286 -- .../testOutputsMasterITS/variant266.txt | 3076 ---------------- .../testOutputsMasterITS/variant267.txt | 2023 ----------- .../testOutputsMasterITS/variant268.txt | 1666 --------- .../testOutputsMasterITS/variant269.txt | 401 --- .../testOutputsMasterITS/variant27.txt | 80 - .../testOutputsMasterITS/variant270.txt | 401 --- .../testOutputsMasterITS/variant271.txt | 2534 -------------- .../testOutputsMasterITS/variant272.txt | 2495 ------------- .../testOutputsMasterITS/variant273.txt | 1148 ------ .../testOutputsMasterITS/variant274.txt | 176 - .../testOutputsMasterITS/variant275.txt | 286 -- .../testOutputsMasterITS/variant276.txt | 404 --- .../testOutputsMasterITS/variant277.txt | 377 -- .../testOutputsMasterITS/variant278.txt | 1227 ------- .../testOutputsMasterITS/variant279.txt | 421 --- .../testOutputsMasterITS/variant28.txt | 23 - .../testOutputsMasterITS/variant280.txt | 894 ----- .../testOutputsMasterITS/variant281.txt | 1116 ------ .../testOutputsMasterITS/variant282.txt | 553 --- .../testOutputsMasterITS/variant283.txt | 286 -- .../testOutputsMasterITS/variant284.txt | 510 --- .../testOutputsMasterITS/variant285.txt | 156 - .../testOutputsMasterITS/variant286.txt | 389 -- .../testOutputsMasterITS/variant287.txt | 758 ---- .../testOutputsMasterITS/variant288.txt | 758 ---- .../testOutputsMasterITS/variant289.txt | 414 --- .../testOutputsMasterITS/variant29.txt | 80 - .../testOutputsMasterITS/variant290.txt | 171 - .../testOutputsMasterITS/variant291.txt | 176 - .../testOutputsMasterITS/variant292.txt | 176 - .../testOutputsMasterITS/variant293.txt | 286 -- .../testOutputsMasterITS/variant294.txt | 286 -- .../testOutputsMasterITS/variant295.txt | 1944 ---------- .../testOutputsMasterITS/variant296.txt | 908 ----- .../testOutputsMasterITS/variant297.txt | 1063 ------ .../testOutputsMasterITS/variant298.txt | 292 -- .../testOutputsMasterITS/variant299.txt | 304 -- .../testing/testOutputsMasterITS/variant3.txt | 171 - .../testOutputsMasterITS/variant30.txt | 171 - .../testOutputsMasterITS/variant300.txt | 294 -- .../testOutputsMasterITS/variant301.txt | 548 --- .../testOutputsMasterITS/variant302.txt | 518 --- .../testOutputsMasterITS/variant303.txt | 518 --- .../testOutputsMasterITS/variant304.txt | 284 -- .../testOutputsMasterITS/variant305.txt | 282 -- .../testOutputsMasterITS/variant306.txt | 282 -- .../testOutputsMasterITS/variant307.txt | 596 ---- .../testOutputsMasterITS/variant308.txt | 539 --- .../testOutputsMasterITS/variant309.txt | 603 ---- .../testOutputsMasterITS/variant31.txt | 171 - .../testOutputsMasterITS/variant310.txt | 543 --- .../testOutputsMasterITS/variant311.txt | 516 --- .../testOutputsMasterITS/variant312.txt | 216 -- .../testOutputsMasterITS/variant313.txt | 635 ---- .../testOutputsMasterITS/variant314.txt | 179 - .../testOutputsMasterITS/variant315.txt | 180 - .../testOutputsMasterITS/variant316.txt | 182 - .../testOutputsMasterITS/variant317.txt | 293 -- .../testOutputsMasterITS/variant318.txt | 292 -- .../testOutputsMasterITS/variant319.txt | 287 -- .../testOutputsMasterITS/variant32.txt | 172 - .../testOutputsMasterITS/variant320.txt | 555 --- .../testOutputsMasterITS/variant321.txt | 513 --- .../testOutputsMasterITS/variant322.txt | 176 - .../testOutputsMasterITS/variant323.txt | 174 - .../testOutputsMasterITS/variant324.txt | 174 - .../testOutputsMasterITS/variant325.txt | 174 - .../testOutputsMasterITS/variant326.txt | 171 - .../testOutputsMasterITS/variant327.txt | 175 - .../testOutputsMasterITS/variant328.txt | 175 - .../testOutputsMasterITS/variant329.txt | 171 - .../testOutputsMasterITS/variant33.txt | 177 - .../testOutputsMasterITS/variant330.txt | 175 - .../testOutputsMasterITS/variant331.txt | 533 --- .../testOutputsMasterITS/variant332.txt | 215 -- .../testOutputsMasterITS/variant333.txt | 142 - .../testOutputsMasterITS/variant34.txt | 259 -- .../testOutputsMasterITS/variant35.txt | 172 - .../testOutputsMasterITS/variant36.txt | 402 --- .../testOutputsMasterITS/variant37.txt | 402 --- .../testOutputsMasterITS/variant38.txt | 82 - .../testOutputsMasterITS/variant39.txt | 80 - .../testing/testOutputsMasterITS/variant4.txt | 171 - .../testOutputsMasterITS/variant40.txt | 172 - .../testOutputsMasterITS/variant41.txt | 293 -- .../testOutputsMasterITS/variant42.txt | 564 --- .../testOutputsMasterITS/variant43.txt | 179 - .../testOutputsMasterITS/variant44.txt | 143 - .../testOutputsMasterITS/variant45.txt | 293 -- .../testOutputsMasterITS/variant46.txt | 175 - .../testOutputsMasterITS/variant47.txt | 177 - .../testOutputsMasterITS/variant48.txt | 177 - .../testOutputsMasterITS/variant49.txt | 175 - .../testing/testOutputsMasterITS/variant5.txt | 287 -- .../testOutputsMasterITS/variant50.txt | 177 - .../testOutputsMasterITS/variant51.txt | 176 - .../testOutputsMasterITS/variant52.txt | 170 - .../testOutputsMasterITS/variant53.txt | 170 - .../testOutputsMasterITS/variant54.txt | 177 - .../testOutputsMasterITS/variant55.txt | 177 - .../testOutputsMasterITS/variant56.txt | 177 - .../testOutputsMasterITS/variant57.txt | 177 - .../testOutputsMasterITS/variant58.txt | 175 - .../testOutputsMasterITS/variant59.txt | 177 - .../testing/testOutputsMasterITS/variant6.txt | 143 - .../testOutputsMasterITS/variant60.txt | 174 - .../testOutputsMasterITS/variant61.txt | 171 - .../testOutputsMasterITS/variant62.txt | 171 - .../testOutputsMasterITS/variant63.txt | 144 - .../testOutputsMasterITS/variant64.txt | 439 --- .../testOutputsMasterITS/variant65.txt | 511 --- .../testOutputsMasterITS/variant66.txt | 223 -- .../testOutputsMasterITS/variant67.txt | 223 -- .../testOutputsMasterITS/variant68.txt | 218 -- .../testOutputsMasterITS/variant69.txt | 219 -- .../testing/testOutputsMasterITS/variant7.txt | 1362 -------- .../testOutputsMasterITS/variant70.txt | 224 -- .../testOutputsMasterITS/variant71.txt | 225 -- .../testOutputsMasterITS/variant72.txt | 223 -- .../testOutputsMasterITS/variant73.txt | 223 -- .../testOutputsMasterITS/variant74.txt | 228 -- .../testOutputsMasterITS/variant75.txt | 222 -- .../testOutputsMasterITS/variant76.txt | 222 -- .../testOutputsMasterITS/variant77.txt | 228 -- .../testOutputsMasterITS/variant78.txt | 226 -- .../testOutputsMasterITS/variant79.txt | 223 -- .../testing/testOutputsMasterITS/variant8.txt | 177 - .../testOutputsMasterITS/variant80.txt | 882 ----- .../testOutputsMasterITS/variant81.txt | 628 ---- .../testOutputsMasterITS/variant82.txt | 277 -- .../testOutputsMasterITS/variant83.txt | 263 -- .../testOutputsMasterITS/variant84.txt | 272 -- .../testOutputsMasterITS/variant85.txt | 268 -- .../testOutputsMasterITS/variant86.txt | 272 -- .../testOutputsMasterITS/variant87.txt | 272 -- .../testOutputsMasterITS/variant88.txt | 268 -- .../testOutputsMasterITS/variant89.txt | 182 - .../testing/testOutputsMasterITS/variant9.txt | 82 - .../testOutputsMasterITS/variant90.txt | 261 -- .../testOutputsMasterITS/variant91.txt | 265 -- .../testOutputsMasterITS/variant92.txt | 261 -- .../testOutputsMasterITS/variant93.txt | 265 -- .../testOutputsMasterITS/variant94.txt | 259 -- .../testOutputsMasterITS/variant95.txt | 259 -- .../testOutputsMasterITS/variant96.txt | 262 -- .../testOutputsMasterITS/variant97.txt | 261 -- .../testOutputsMasterITS/variant98.txt | 309 -- .../testOutputsMasterITS/variant99.txt | 309 -- VariantValidator/testing/test_vv.py | 57 - VariantValidator/testing/vvTestCompare.py | 11 - VariantValidator/testing/vvTestFunctions.py | 190 - VariantValidator/testing/vvTestSave.py | 10 - test/test_inputs.py | 2855 +++++---------- 341 files changed, 957 insertions(+), 126811 deletions(-) delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant0.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant1.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant10.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant100.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant101.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant102.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant103.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant104.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant105.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant106.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant107.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant108.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant109.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant11.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant110.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant111.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant112.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant113.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant114.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant115.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant116.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant117.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant118.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant119.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant12.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant120.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant121.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant122.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant123.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant124.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant125.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant126.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant127.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant128.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant129.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant13.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant130.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant131.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant132.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant133.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant134.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant135.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant136.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant137.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant138.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant139.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant14.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant140.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant141.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant142.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant143.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant144.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant145.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant146.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant147.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant148.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant149.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant15.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant150.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant151.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant152.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant153.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant154.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant155.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant156.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant157.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant158.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant159.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant16.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant160.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant161.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant162.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant163.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant164.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant165.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant166.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant167.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant168.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant169.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant17.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant170.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant171.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant172.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant173.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant174.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant175.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant176.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant177.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant178.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant179.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant18.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant180.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant181.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant182.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant183.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant184.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant185.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant186.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant187.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant188.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant189.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant19.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant190.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant191.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant192.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant193.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant194.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant195.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant196.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant197.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant198.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant199.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant2.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant20.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant200.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant201.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant202.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant203.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant204.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant205.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant206.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant207.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant208.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant209.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant21.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant210.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant211.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant212.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant213.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant214.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant215.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant216.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant217.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant218.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant219.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant22.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant220.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant221.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant222.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant223.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant224.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant225.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant226.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant227.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant228.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant229.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant23.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant230.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant231.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant232.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant233.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant234.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant235.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant236.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant237.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant238.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant239.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant24.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant240.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant241.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant242.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant243.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant244.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant245.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant246.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant247.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant248.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant249.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant25.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant250.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant251.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant252.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant253.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant254.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant255.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant256.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant257.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant258.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant259.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant26.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant260.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant261.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant262.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant263.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant264.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant265.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant266.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant267.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant268.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant269.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant27.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant270.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant271.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant272.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant273.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant274.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant275.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant276.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant277.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant278.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant279.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant28.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant280.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant281.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant282.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant283.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant284.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant285.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant286.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant287.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant288.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant289.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant29.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant290.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant291.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant292.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant293.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant294.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant295.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant296.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant297.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant298.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant299.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant3.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant30.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant300.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant301.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant302.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant303.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant304.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant305.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant306.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant307.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant308.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant309.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant31.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant310.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant311.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant312.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant313.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant314.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant315.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant316.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant317.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant318.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant319.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant32.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant320.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant321.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant322.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant323.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant324.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant325.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant326.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant327.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant328.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant329.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant33.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant330.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant331.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant332.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant333.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant34.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant35.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant36.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant37.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant38.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant39.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant4.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant40.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant41.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant42.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant43.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant44.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant45.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant46.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant47.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant48.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant49.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant5.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant50.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant51.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant52.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant53.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant54.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant55.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant56.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant57.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant58.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant59.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant6.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant60.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant61.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant62.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant63.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant64.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant65.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant66.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant67.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant68.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant69.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant7.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant70.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant71.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant72.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant73.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant74.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant75.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant76.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant77.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant78.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant79.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant8.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant80.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant81.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant82.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant83.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant84.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant85.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant86.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant87.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant88.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant89.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant9.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant90.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant91.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant92.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant93.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant94.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant95.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant96.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant97.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant98.txt delete mode 100644 VariantValidator/testing/testOutputsMasterITS/variant99.txt delete mode 100644 VariantValidator/testing/test_vv.py delete mode 100644 VariantValidator/testing/vvTestCompare.py delete mode 100644 VariantValidator/testing/vvTestFunctions.py delete mode 100644 VariantValidator/testing/vvTestSave.py diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index b89e4276..1c8ce395 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -2279,7 +2279,6 @@ def hgvs_alleles(self, variant_description,hn): merge.append(allele) for variant in each_allele: merged_alleles.append([variant]) - merged_alleles.append(merge) my_alleles = merged_alleles elif re.search('\(;\)', remainder): @@ -2330,7 +2329,6 @@ def hgvs_alleles(self, variant_description,hn): merge.append(allele) for variant in each_allele: merged_alleles.append([variant]) - merged_alleles.append(merge) my_alleles = merged_alleles # Extract alleles into strings diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 9b0fe6fa..8f08872e 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -745,19 +745,23 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Catch the trailing digits digits = re.search(r"(\d+$)", failed) digits = digits.group(1) + remove = str(digits) + 'end_anchor' + failed = failed + 'end_anchor' + failed = failed.replace(remove, '') + # Remove them so that the string SHOULD parse try: hgvs_failed = self.hp.parse_hgvs_variant(failed) except hgvs.exceptions.HGVSError as e: error = str(e) error = 'The syntax of the input variant description is invalid ' - if re.search(r'ins\d+', failed): + if re.search(r'ins$', failed): issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' error = error + ' please refer to ' + issue_link validation['warnings'] = validation['warnings'] + error - logger.warning(error + " " + e) + logger.warning(str(error) + " " + str(e)) continue - hgvs_failed = self.hp.parse_hgvs_variant(failed) + hgvs_failed.posedit.edit = str(hgvs_failed.posedit.edit).replace(digits, '') failed = str(hgvs_failed) hgvs_failed = self.hp.parse_hgvs_variant(failed) diff --git a/VariantValidator/testing/testOutputsMasterITS/variant0.txt b/VariantValidator/testing/testOutputsMasterITS/variant0.txt deleted file mode 100644 index 4d5d0a22..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant0.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.(Gly197Cys)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(G197C)' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.589G>T' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000088.3:c.589G>T' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000017.10:g.48275363C>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -VC -p41 -sS'pos' -p42 -S'48275363' -p43 -sS'alt' -p44 -VA -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.50198002C>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'50198002' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000017.10:g.48275363C>A' -p53 -sg36 -(dp54 -g38 -S'17' -p55 -sg40 -g41 -sg42 -S'48275363' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000017.11:g.50198002C>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'50198002' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p67 -sssS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant1.txt b/VariantValidator/testing/testOutputsMasterITS/variant1.txt deleted file mode 100644 index c55d48e9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant1.txt +++ /dev/null @@ -1,172 +0,0 @@ -(dp0 -S'NM_015120.4:c.35T>C' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p12 -sS'gene_symbol' -p13 -S'ALMS1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_055935.4:p.(Leu12Pro)' -p18 -sS'slr' -p19 -S'NP_055935.4:p.(L12P)' -p20 -ssS'submitted_variant' -p21 -S'NM_015120.4:c.35T>C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_015120.4:c.35T>C' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000002.11:g.73613031delinsCGGA' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'T' -p39 -sS'pos' -p40 -S'73613031' -p41 -sS'alt' -p42 -S'CGGA' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.73385903delinsCGGA' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'73385903' -p48 -sg42 -S'CGGA' -p49 -sssS'grch37' -p50 -(dp51 -g32 -S'NC_000002.11:g.73613031delinsCGGA' -p52 -sg34 -(dp53 -g36 -S'2' -p54 -sg38 -g39 -sg40 -S'73613031' -p55 -sg42 -g43 -sssS'grch38' -p56 -(dp57 -g32 -S'NC_000002.12:g.73385903delinsCGGA' -p58 -sg34 -(dp59 -g36 -g54 -sg38 -g39 -sg40 -S'73385903' -p60 -sg42 -g49 -ssssS'reference_sequence_records' -p61 -(dp62 -S'protein' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' -p64 -sS'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' -p66 -sssS'flag' -p67 -S'gene_variant' -p68 -sS'metadata' -p69 -(dp70 -S'variantvalidator_hgvs_version' -p71 -S'1.1.3' -p72 -sS'uta_schema' -p73 -S'uta_20180821' -p74 -sS'seqrepo_db' -p75 -S'2018-08-21' -p76 -sS'variantvalidator_version' -p77 -S'v0.2' -p78 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant10.txt b/VariantValidator/testing/testOutputsMasterITS/variant10.txt deleted file mode 100644 index 7c89309a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant10.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'COL5A1 is not part of genome build GRCh37' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'COL5A1:c.5071A>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant100.txt b/VariantValidator/testing/testOutputsMasterITS/variant100.txt deleted file mode 100644 index 8d42226f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant100.txt +++ /dev/null @@ -1,309 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_007121.5:c.515_516insT' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p19 -aS'NM_007121.5:c.515_517 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p20 -aS'Caution should be used when reporting the displayed variant descriptions' -p21 -aS'If you are unsure, please contact admin' -p22 -aS'RefSeqGene record not available' -p23 -asS'refseqgene_context_intronic_sequence' -p24 -g16 -sS'alt_genomic_loci' -p25 -(lp26 -sS'transcript_description' -p27 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p28 -sS'gene_symbol' -p29 -S'NR1H2' -p30 -sS'hgvs_predicted_protein_consequence' -p31 -(dp32 -S'tlr' -p33 -S'NP_009052.3:p.(Lys172AsnfsTer35)' -p34 -sS'slr' -p35 -S'NP_009052.3:p.(K172Nfs*35)' -p36 -ssS'submitted_variant' -p37 -S'NC_000019.10:g.50378564_50378565insTACA' -p38 -sS'genome_context_intronic_sequence' -p39 -g16 -sS'hgvs_lrg_variant' -p40 -g16 -sS'hgvs_transcript_variant' -p41 -S'NM_007121.5:c.515_516insT' -p42 -sS'hgvs_refseqgene_variant' -p43 -g16 -sS'primary_assembly_loci' -p44 -(dp45 -S'grch38' -p46 -(dp47 -S'hgvs_genomic_description' -p48 -S'NC_000019.10:g.50378564_50378565insTACA' -p49 -sS'vcf' -p50 -(dp51 -S'chr' -p52 -S'19' -p53 -sS'ref' -p54 -S'A' -p55 -sS'pos' -p56 -S'50378563' -p57 -sS'alt' -p58 -VAATAC -p59 -sssS'grch37' -p60 -(dp61 -g48 -S'NC_000019.9:g.50881821_50881822insTACA' -p62 -sg50 -(dp63 -g52 -g53 -sg54 -g55 -sg56 -S'50881820' -p64 -sg58 -VAATAC -p65 -sssS'hg38' -p66 -(dp67 -g48 -S'NC_000019.10:g.50378564_50378565insTACA' -p68 -sg50 -(dp69 -g52 -S'chr19' -p70 -sg54 -g55 -sg56 -S'50378563' -p71 -sg58 -VAATAC -p72 -sssS'hg19' -p73 -(dp74 -g48 -S'NC_000019.9:g.50881821_50881822insTACA' -p75 -sg50 -(dp76 -g52 -g70 -sg54 -g55 -sg56 -S'50881820' -p77 -sg58 -VAATAC -p78 -ssssS'reference_sequence_records' -p79 -(dp80 -S'protein' -p81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' -p82 -sS'transcript' -p83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' -p84 -sssS'NM_001256647.1:c.224_225insT' -p85 -(dp86 -g15 -g16 -sg17 -(lp87 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p88 -aS'NM_001256647.1:c.224_226 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p89 -aS'Caution should be used when reporting the displayed variant descriptions' -p90 -aS'If you are unsure, please contact admin' -p91 -aS'RefSeqGene record not available' -p92 -asg24 -g16 -sg25 -(lp93 -sg27 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p94 -sg29 -S'NR1H2' -p95 -sg31 -(dp96 -g33 -S'NP_001243576.1:p.(Lys75AsnfsTer35)' -p97 -sg35 -S'NP_001243576.1:p.(K75Nfs*35)' -p98 -ssg37 -g38 -sg39 -g16 -sg40 -g16 -sg41 -S'NM_001256647.1:c.224_225insT' -p99 -sg43 -g16 -sg44 -(dp100 -S'grch38' -p101 -(dp102 -g48 -S'NC_000019.10:g.50378564_50378565insTACA' -p103 -sg50 -(dp104 -g52 -g53 -sg54 -g55 -sg56 -S'50378563' -p105 -sg58 -VAATAC -p106 -sssS'grch37' -p107 -(dp108 -g48 -S'NC_000019.9:g.50881821_50881822insTACA' -p109 -sg50 -(dp110 -g52 -g53 -sg54 -g55 -sg56 -S'50881820' -p111 -sg58 -VAATAC -p112 -sssg66 -(dp113 -g48 -S'NC_000019.10:g.50378564_50378565insTACA' -p114 -sg50 -(dp115 -g52 -g70 -sg54 -g55 -sg56 -S'50378563' -p116 -sg58 -VAATAC -p117 -sssS'hg19' -p118 -(dp119 -g48 -S'NC_000019.9:g.50881821_50881822insTACA' -p120 -sg50 -(dp121 -g52 -g70 -sg54 -g55 -sg56 -S'50881820' -p122 -sg58 -VAATAC -p123 -ssssg79 -(dp124 -g81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' -p125 -sg83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' -p126 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant101.txt b/VariantValidator/testing/testOutputsMasterITS/variant101.txt deleted file mode 100644 index 58964b7c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant101.txt +++ /dev/null @@ -1,316 +0,0 @@ -(dp0 -S'NM_007121.5:c.514_520=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p7 -aS'NM_007121.5:c.514_520 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p16 -sS'gene_symbol' -p17 -S'NR1H2' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_009052.3:p.(Lys172=)' -p22 -sS'slr' -p23 -S'NP_009052.3:p.(K172=)' -p24 -ssS'submitted_variant' -p25 -S'NC_000019.10:g.50378565_50378567dup' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_007121.5:c.514_520=' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'grch38' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000019.10:g.50378565_50378567dup' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'19' -p41 -sS'ref' -p42 -S'AAC' -p43 -sS'pos' -p44 -S'50378564' -p45 -sS'alt' -p46 -VAACAAC -p47 -sssS'grch37' -p48 -(dp49 -g36 -S'NC_000019.9:g.50881822_50881824dup' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'AAC' -p52 -sg44 -S'50881821' -p53 -sg46 -VAACAAC -p54 -sssS'hg38' -p55 -(dp56 -g36 -S'NC_000019.10:g.50378565_50378567dup' -p57 -sg38 -(dp58 -g40 -S'chr19' -p59 -sg42 -S'AAC' -p60 -sg44 -S'50378564' -p61 -sg46 -VAACAAC -p62 -sssS'hg19' -p63 -(dp64 -g36 -S'NC_000019.9:g.50881822_50881824dup' -p65 -sg38 -(dp66 -g40 -g59 -sg42 -S'AAC' -p67 -sg44 -S'50881821' -p68 -sg46 -VAACAAC -p69 -ssssS'reference_sequence_records' -p70 -(dp71 -S'protein' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' -p73 -sS'transcript' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' -p75 -sssS'NM_001256647.1:c.223_229=' -p76 -(dp77 -g3 -g4 -sg5 -(lp78 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p79 -aS'NM_001256647.1:c.223_229 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p80 -aS'Caution should be used when reporting the displayed variant descriptions' -p81 -aS'If you are unsure, please contact admin' -p82 -aS'RefSeqGene record not available' -p83 -asg12 -g4 -sg13 -(lp84 -sg15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p85 -sg17 -S'NR1H2' -p86 -sg19 -(dp87 -g21 -S'NP_001243576.1:p.(Lys75=)' -p88 -sg23 -S'NP_001243576.1:p.(K75=)' -p89 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001256647.1:c.223_229=' -p90 -sg31 -g4 -sg32 -(dp91 -S'grch38' -p92 -(dp93 -g36 -S'NC_000019.10:g.50378565_50378567dup' -p94 -sg38 -(dp95 -g40 -g41 -sg42 -S'AAC' -p96 -sg44 -S'50378564' -p97 -sg46 -VAACAAC -p98 -sssS'grch37' -p99 -(dp100 -g36 -S'NC_000019.9:g.50881822_50881824dup' -p101 -sg38 -(dp102 -g40 -g41 -sg42 -S'AAC' -p103 -sg44 -S'50881821' -p104 -sg46 -VAACAAC -p105 -sssg55 -(dp106 -g36 -S'NC_000019.10:g.50378565_50378567dup' -p107 -sg38 -(dp108 -g40 -g59 -sg42 -S'AAC' -p109 -sg44 -S'50378564' -p110 -sg46 -VAACAAC -p111 -sssS'hg19' -p112 -(dp113 -g36 -S'NC_000019.9:g.50881822_50881824dup' -p114 -sg38 -(dp115 -g40 -g59 -sg42 -S'AAC' -p116 -sg44 -S'50881821' -p117 -sg46 -VAACAAC -p118 -ssssg70 -(dp119 -g72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' -p120 -sg74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' -p121 -sssS'flag' -p122 -S'gene_variant' -p123 -sS'metadata' -p124 -(dp125 -S'variantvalidator_hgvs_version' -p126 -S'1.1.3' -p127 -sS'uta_schema' -p128 -S'uta_20180821' -p129 -sS'seqrepo_db' -p130 -S'2018-08-21' -p131 -sS'variantvalidator_version' -p132 -S'v0.2' -p133 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant102.txt b/VariantValidator/testing/testOutputsMasterITS/variant102.txt deleted file mode 100644 index 788c4620..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant102.txt +++ /dev/null @@ -1,302 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_007121.5:c.519_521del' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p19 -aS'NM_007121.5:c.515_517 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p20 -aS'Caution should be used when reporting the displayed variant descriptions' -p21 -aS'If you are unsure, please contact admin' -p22 -aS'RefSeqGene record not available' -p23 -asS'refseqgene_context_intronic_sequence' -p24 -g16 -sS'alt_genomic_loci' -p25 -(lp26 -sS'transcript_description' -p27 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p28 -sS'gene_symbol' -p29 -S'NR1H2' -p30 -sS'hgvs_predicted_protein_consequence' -p31 -(dp32 -S'tlr' -p33 -S'NP_009052.3:p.(Gln176del)' -p34 -sS'slr' -p35 -S'NP_009052.3:p.(Q176del)' -p36 -ssS'submitted_variant' -p37 -S'NC_000019.10:g.50378563_50378564=' -p38 -sS'genome_context_intronic_sequence' -p39 -g16 -sS'hgvs_lrg_variant' -p40 -g16 -sS'hgvs_transcript_variant' -p41 -S'NM_007121.5:c.519_521del' -p42 -sS'hgvs_refseqgene_variant' -p43 -g16 -sS'primary_assembly_loci' -p44 -(dp45 -S'grch38' -p46 -(dp47 -S'hgvs_genomic_description' -p48 -S'NC_000019.10:g.50378563_50378564=' -p49 -sS'vcf' -p50 -(dp51 -S'chr' -p52 -S'19' -p53 -sS'ref' -p54 -S'AA' -p55 -sS'pos' -p56 -S'50378563' -p57 -sS'alt' -p58 -g55 -sssS'grch37' -p59 -(dp60 -g48 -S'NC_000019.9:g.50881820_50881821=' -p61 -sg50 -(dp62 -g52 -g53 -sg54 -S'AA' -p63 -sg56 -S'50881820' -p64 -sg58 -g63 -sssS'hg38' -p65 -(dp66 -g48 -S'NC_000019.10:g.50378563_50378564=' -p67 -sg50 -(dp68 -g52 -S'chr19' -p69 -sg54 -g55 -sg56 -S'50378563' -p70 -sg58 -g55 -sssS'hg19' -p71 -(dp72 -g48 -S'NC_000019.9:g.50881820_50881821=' -p73 -sg50 -(dp74 -g52 -g69 -sg54 -g63 -sg56 -S'50881820' -p75 -sg58 -g63 -ssssS'reference_sequence_records' -p76 -(dp77 -S'protein' -p78 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' -p79 -sS'transcript' -p80 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' -p81 -sssS'NM_001256647.1:c.228_230del' -p82 -(dp83 -g15 -g16 -sg17 -(lp84 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p85 -aS'NM_001256647.1:c.224_226 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p86 -aS'Caution should be used when reporting the displayed variant descriptions' -p87 -aS'If you are unsure, please contact admin' -p88 -aS'RefSeqGene record not available' -p89 -asg24 -g16 -sg25 -(lp90 -sg27 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p91 -sg29 -S'NR1H2' -p92 -sg31 -(dp93 -g33 -S'NP_001243576.1:p.(Gln79del)' -p94 -sg35 -S'NP_001243576.1:p.(Q79del)' -p95 -ssg37 -g38 -sg39 -g16 -sg40 -g16 -sg41 -S'NM_001256647.1:c.228_230del' -p96 -sg43 -g16 -sg44 -(dp97 -S'grch38' -p98 -(dp99 -g48 -S'NC_000019.10:g.50378563_50378564=' -p100 -sg50 -(dp101 -g52 -g53 -sg54 -g55 -sg56 -S'50378563' -p102 -sg58 -g55 -sssS'grch37' -p103 -(dp104 -g48 -S'NC_000019.9:g.50881820_50881821=' -p105 -sg50 -(dp106 -g52 -g53 -sg54 -g63 -sg56 -S'50881820' -p107 -sg58 -g63 -sssg65 -(dp108 -g48 -S'NC_000019.10:g.50378563_50378564=' -p109 -sg50 -(dp110 -g52 -g69 -sg54 -g55 -sg56 -S'50378563' -p111 -sg58 -g55 -sssS'hg19' -p112 -(dp113 -g48 -S'NC_000019.9:g.50881820_50881821=' -p114 -sg50 -(dp115 -g52 -g69 -sg54 -g63 -sg56 -S'50881820' -p116 -sg58 -g63 -ssssg76 -(dp117 -g78 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' -p118 -sg80 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' -p119 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant103.txt b/VariantValidator/testing/testOutputsMasterITS/variant103.txt deleted file mode 100644 index 69998578..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant103.txt +++ /dev/null @@ -1,309 +0,0 @@ -(dp0 -S'NM_001256647.1:c.224_226delinsTCGG' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p7 -aS'NM_001256647.1:c.224_226 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'NR1H2' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_001243576.1:p.(Lys75IlefsTer35)' -p22 -sS'slr' -p23 -S'NP_001243576.1:p.(K75Ifs*35)' -p24 -ssS'submitted_variant' -p25 -S'NC_000019.10:g.50378563_50378564insTCGG' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_001256647.1:c.224_226delinsTCGG' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'grch38' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000019.10:g.50378563_50378564insTCGG' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'19' -p41 -sS'ref' -p42 -S'A' -p43 -sS'pos' -p44 -S'50378563' -p45 -sS'alt' -p46 -VATCGG -p47 -sssS'grch37' -p48 -(dp49 -g36 -S'NC_000019.9:g.50881820_50881821insTCGG' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'50881820' -p52 -sg46 -VATCGG -p53 -sssS'hg38' -p54 -(dp55 -g36 -S'NC_000019.10:g.50378563_50378564insTCGG' -p56 -sg38 -(dp57 -g40 -S'chr19' -p58 -sg42 -g43 -sg44 -S'50378563' -p59 -sg46 -VATCGG -p60 -sssS'hg19' -p61 -(dp62 -g36 -S'NC_000019.9:g.50881820_50881821insTCGG' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'50881820' -p65 -sg46 -VATCGG -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' -p72 -sssS'flag' -p73 -S'gene_variant' -p74 -sS'NM_007121.5:c.515_517delinsTCGG' -p75 -(dp76 -g3 -g4 -sg5 -(lp77 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p78 -aS'NM_007121.5:c.515_517 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p79 -aS'Caution should be used when reporting the displayed variant descriptions' -p80 -aS'If you are unsure, please contact admin' -p81 -aS'RefSeqGene record not available' -p82 -asg12 -g4 -sg13 -(lp83 -sg15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p84 -sg17 -S'NR1H2' -p85 -sg19 -(dp86 -g21 -S'NP_009052.3:p.(Lys172IlefsTer35)' -p87 -sg23 -S'NP_009052.3:p.(K172Ifs*35)' -p88 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_007121.5:c.515_517delinsTCGG' -p89 -sg31 -g4 -sg32 -(dp90 -S'grch38' -p91 -(dp92 -g36 -S'NC_000019.10:g.50378563_50378564insTCGG' -p93 -sg38 -(dp94 -g40 -g41 -sg42 -g43 -sg44 -S'50378563' -p95 -sg46 -VATCGG -p96 -sssS'grch37' -p97 -(dp98 -g36 -S'NC_000019.9:g.50881820_50881821insTCGG' -p99 -sg38 -(dp100 -g40 -g41 -sg42 -g43 -sg44 -S'50881820' -p101 -sg46 -VATCGG -p102 -sssg54 -(dp103 -g36 -S'NC_000019.10:g.50378563_50378564insTCGG' -p104 -sg38 -(dp105 -g40 -g58 -sg42 -g43 -sg44 -S'50378563' -p106 -sg46 -VATCGG -p107 -sssS'hg19' -p108 -(dp109 -g36 -S'NC_000019.9:g.50881820_50881821insTCGG' -p110 -sg38 -(dp111 -g40 -g58 -sg42 -g43 -sg44 -S'50881820' -p112 -sg46 -VATCGG -p113 -ssssg67 -(dp114 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' -p115 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' -p116 -sssS'metadata' -p117 -(dp118 -S'variantvalidator_hgvs_version' -p119 -S'1.1.3' -p120 -sS'uta_schema' -p121 -S'uta_20180821' -p122 -sS'seqrepo_db' -p123 -S'2018-08-21' -p124 -sS'variantvalidator_version' -p125 -S'v0.2' -p126 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant104.txt b/VariantValidator/testing/testOutputsMasterITS/variant104.txt deleted file mode 100644 index e23d6793..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant104.txt +++ /dev/null @@ -1,289 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_007121.5:c.514_515inv' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p24 -sS'gene_symbol' -p25 -S'NR1H2' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_009052.3:p.(Lys172Leu)' -p30 -sS'slr' -p31 -S'NP_009052.3:p.(K172L)' -p32 -ssS'submitted_variant' -p33 -S'NC_000019.10:g.50378563delinsTTAC' -p34 -sS'genome_context_intronic_sequence' -p35 -g16 -sS'hgvs_lrg_variant' -p36 -g16 -sS'hgvs_transcript_variant' -p37 -S'NM_007121.5:c.514_515inv' -p38 -sS'hgvs_refseqgene_variant' -p39 -g16 -sS'primary_assembly_loci' -p40 -(dp41 -S'grch38' -p42 -(dp43 -S'hgvs_genomic_description' -p44 -S'NC_000019.10:g.50378563delinsTTAC' -p45 -sS'vcf' -p46 -(dp47 -S'chr' -p48 -S'19' -p49 -sS'ref' -p50 -S'A' -p51 -sS'pos' -p52 -S'50378563' -p53 -sS'alt' -p54 -S'TTAC' -p55 -sssS'grch37' -p56 -(dp57 -g44 -S'NC_000019.9:g.50881820delinsTTAC' -p58 -sg46 -(dp59 -g48 -g49 -sg50 -g51 -sg52 -S'50881820' -p60 -sg54 -S'TTAC' -p61 -sssS'hg38' -p62 -(dp63 -g44 -S'NC_000019.10:g.50378563delinsTTAC' -p64 -sg46 -(dp65 -g48 -S'chr19' -p66 -sg50 -g51 -sg52 -S'50378563' -p67 -sg54 -g55 -sssS'hg19' -p68 -(dp69 -g44 -S'NC_000019.9:g.50881820delinsTTAC' -p70 -sg46 -(dp71 -g48 -g66 -sg50 -g51 -sg52 -S'50881820' -p72 -sg54 -g61 -ssssS'reference_sequence_records' -p73 -(dp74 -S'protein' -p75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' -p76 -sS'transcript' -p77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' -p78 -sssS'NM_001256647.1:c.223_224inv' -p79 -(dp80 -g15 -g16 -sg17 -(lp81 -S'RefSeqGene record not available' -p82 -asg20 -g16 -sg21 -(lp83 -sg23 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p84 -sg25 -S'NR1H2' -p85 -sg27 -(dp86 -g29 -S'NP_001243576.1:p.(Lys75Leu)' -p87 -sg31 -S'NP_001243576.1:p.(K75L)' -p88 -ssg33 -g34 -sg35 -g16 -sg36 -g16 -sg37 -S'NM_001256647.1:c.223_224inv' -p89 -sg39 -g16 -sg40 -(dp90 -S'grch38' -p91 -(dp92 -g44 -S'NC_000019.10:g.50378563delinsTTAC' -p93 -sg46 -(dp94 -g48 -g49 -sg50 -g51 -sg52 -S'50378563' -p95 -sg54 -S'TTAC' -p96 -sssS'grch37' -p97 -(dp98 -g44 -S'NC_000019.9:g.50881820delinsTTAC' -p99 -sg46 -(dp100 -g48 -g49 -sg50 -g51 -sg52 -S'50881820' -p101 -sg54 -S'TTAC' -p102 -sssg62 -(dp103 -g44 -S'NC_000019.10:g.50378563delinsTTAC' -p104 -sg46 -(dp105 -g48 -g66 -sg50 -g51 -sg52 -S'50378563' -p106 -sg54 -g96 -sssS'hg19' -p107 -(dp108 -g44 -S'NC_000019.9:g.50881820delinsTTAC' -p109 -sg46 -(dp110 -g48 -g66 -sg50 -g51 -sg52 -S'50881820' -p111 -sg54 -g102 -ssssg73 -(dp112 -g75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' -p113 -sg77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' -p114 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant105.txt b/VariantValidator/testing/testOutputsMasterITS/variant105.txt deleted file mode 100644 index 4fc3fb9a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant105.txt +++ /dev/null @@ -1,309 +0,0 @@ -(dp0 -S'NM_007121.5:c.514_515insT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p7 -aS'NM_007121.5:c.514_515 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p16 -sS'gene_symbol' -p17 -S'NR1H2' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_009052.3:p.(Lys172IlefsTer35)' -p22 -sS'slr' -p23 -S'NP_009052.3:p.(K172Ifs*35)' -p24 -ssS'submitted_variant' -p25 -S'NC_000019.10:g.50378563_50378564insTAAC' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_007121.5:c.514_515insT' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'grch38' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000019.10:g.50378563_50378564insTAAC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'19' -p41 -sS'ref' -p42 -S'A' -p43 -sS'pos' -p44 -S'50378563' -p45 -sS'alt' -p46 -S'ATAAC' -p47 -sssS'grch37' -p48 -(dp49 -g36 -S'NC_000019.9:g.50881820_50881821insTAAC' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'50881820' -p52 -sg46 -S'ATAAC' -p53 -sssS'hg38' -p54 -(dp55 -g36 -S'NC_000019.10:g.50378563_50378564insTAAC' -p56 -sg38 -(dp57 -g40 -S'chr19' -p58 -sg42 -g43 -sg44 -S'50378563' -p59 -sg46 -S'ATAAC' -p60 -sssS'hg19' -p61 -(dp62 -g36 -S'NC_000019.9:g.50881820_50881821insTAAC' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'50881820' -p65 -sg46 -S'ATAAC' -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' -p72 -sssS'NM_001256647.1:c.223_224insT' -p73 -(dp74 -g3 -g4 -sg5 -(lp75 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p76 -aS'NM_001256647.1:c.223_224 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p77 -aS'Caution should be used when reporting the displayed variant descriptions' -p78 -aS'If you are unsure, please contact admin' -p79 -aS'RefSeqGene record not available' -p80 -asg12 -g4 -sg13 -(lp81 -sg15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p82 -sg17 -S'NR1H2' -p83 -sg19 -(dp84 -g21 -S'NP_001243576.1:p.(Lys75IlefsTer35)' -p85 -sg23 -S'NP_001243576.1:p.(K75Ifs*35)' -p86 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001256647.1:c.223_224insT' -p87 -sg31 -g4 -sg32 -(dp88 -S'grch38' -p89 -(dp90 -g36 -S'NC_000019.10:g.50378563_50378564insTAAC' -p91 -sg38 -(dp92 -g40 -g41 -sg42 -g43 -sg44 -S'50378563' -p93 -sg46 -S'ATAAC' -p94 -sssS'grch37' -p95 -(dp96 -g36 -S'NC_000019.9:g.50881820_50881821insTAAC' -p97 -sg38 -(dp98 -g40 -g41 -sg42 -g43 -sg44 -S'50881820' -p99 -sg46 -S'ATAAC' -p100 -sssg54 -(dp101 -g36 -S'NC_000019.10:g.50378563_50378564insTAAC' -p102 -sg38 -(dp103 -g40 -g58 -sg42 -g43 -sg44 -S'50378563' -p104 -sg46 -S'ATAAC' -p105 -sssS'hg19' -p106 -(dp107 -g36 -S'NC_000019.9:g.50881820_50881821insTAAC' -p108 -sg38 -(dp109 -g40 -g58 -sg42 -g43 -sg44 -S'50881820' -p110 -sg46 -S'ATAAC' -p111 -ssssg67 -(dp112 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' -p113 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' -p114 -sssS'flag' -p115 -S'gene_variant' -p116 -sS'metadata' -p117 -(dp118 -S'variantvalidator_hgvs_version' -p119 -S'1.1.3' -p120 -sS'uta_schema' -p121 -S'uta_20180821' -p122 -sS'seqrepo_db' -p123 -S'2018-08-21' -p124 -sS'variantvalidator_version' -p125 -S'v0.2' -p126 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant106.txt b/VariantValidator/testing/testOutputsMasterITS/variant106.txt deleted file mode 100644 index 829c9c64..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant106.txt +++ /dev/null @@ -1,309 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001256647.1:c.222_228del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p9 -aS'NM_001256647.1:c.222_228 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'NR1H2' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_001243576.1:p.(Lys75SerfsTer47)' -p24 -sS'slr' -p25 -S'NP_001243576.1:p.(K75Sfs*47)' -p26 -ssS'submitted_variant' -p27 -S'NC_000019.10:g.50378562_50378565del' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'hgvs_lrg_variant' -p30 -g6 -sS'hgvs_transcript_variant' -p31 -S'NM_001256647.1:c.222_228del' -p32 -sS'hgvs_refseqgene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'grch38' -p36 -(dp37 -S'hgvs_genomic_description' -p38 -S'NC_000019.10:g.50378562_50378565del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'19' -p43 -sS'ref' -p44 -S'GGAAA' -p45 -sS'pos' -p46 -S'50378561' -p47 -sS'alt' -p48 -S'G' -p49 -sssS'grch37' -p50 -(dp51 -g38 -S'NC_000019.9:g.50881819_50881822del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'GGAAA' -p54 -sg46 -S'50881818' -p55 -sg48 -g49 -sssS'hg38' -p56 -(dp57 -g38 -S'NC_000019.10:g.50378562_50378565del' -p58 -sg40 -(dp59 -g42 -S'chr19' -p60 -sg44 -S'GGAAA' -p61 -sg46 -S'50378561' -p62 -sg48 -g49 -sssS'hg19' -p63 -(dp64 -g38 -S'NC_000019.9:g.50881819_50881822del' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'GGAAA' -p67 -sg46 -S'50881818' -p68 -sg48 -g49 -ssssS'reference_sequence_records' -p69 -(dp70 -S'protein' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' -p72 -sS'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' -p74 -sssS'NM_007121.5:c.513_519del' -p75 -(dp76 -g5 -g6 -sg7 -(lp77 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p78 -aS'NM_007121.5:c.513_519 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p79 -aS'Caution should be used when reporting the displayed variant descriptions' -p80 -aS'If you are unsure, please contact admin' -p81 -aS'RefSeqGene record not available' -p82 -asg14 -g6 -sg15 -(lp83 -sg17 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p84 -sg19 -S'NR1H2' -p85 -sg21 -(dp86 -g23 -S'NP_009052.3:p.(Lys172SerfsTer47)' -p87 -sg25 -S'NP_009052.3:p.(K172Sfs*47)' -p88 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_007121.5:c.513_519del' -p89 -sg33 -g6 -sg34 -(dp90 -S'grch38' -p91 -(dp92 -g38 -S'NC_000019.10:g.50378562_50378565del' -p93 -sg40 -(dp94 -g42 -g43 -sg44 -S'GGAAA' -p95 -sg46 -S'50378561' -p96 -sg48 -g49 -sssS'grch37' -p97 -(dp98 -g38 -S'NC_000019.9:g.50881819_50881822del' -p99 -sg40 -(dp100 -g42 -g43 -sg44 -S'GGAAA' -p101 -sg46 -S'50881818' -p102 -sg48 -g49 -sssg56 -(dp103 -g38 -S'NC_000019.10:g.50378562_50378565del' -p104 -sg40 -(dp105 -g42 -g60 -sg44 -S'GGAAA' -p106 -sg46 -S'50378561' -p107 -sg48 -g49 -sssS'hg19' -p108 -(dp109 -g38 -S'NC_000019.9:g.50881819_50881822del' -p110 -sg40 -(dp111 -g42 -g60 -sg44 -S'GGAAA' -p112 -sg46 -S'50881818' -p113 -sg48 -g49 -ssssg69 -(dp114 -g71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' -p115 -sg73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' -p116 -sssS'metadata' -p117 -(dp118 -S'variantvalidator_hgvs_version' -p119 -S'1.1.3' -p120 -sS'uta_schema' -p121 -S'uta_20180821' -p122 -sS'seqrepo_db' -p123 -S'2018-08-21' -p124 -sS'variantvalidator_version' -p125 -S'v0.2' -p126 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant107.txt b/VariantValidator/testing/testOutputsMasterITS/variant107.txt deleted file mode 100644 index e625cb1a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant107.txt +++ /dev/null @@ -1,312 +0,0 @@ -(dp0 -S'NM_001256647.1:c.222_228delinsTC' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p7 -aS'NM_001256647.1:c.222_228 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'NR1H2' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_001243576.1:p.(Lys75ProfsTer33)' -p22 -sS'slr' -p23 -S'NP_001243576.1:p.(K75Pfs*33)' -p24 -ssS'submitted_variant' -p25 -S'NC_000019.10:g.50378562_50378565delinsTC' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_001256647.1:c.222_228delinsTC' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'grch38' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000019.10:g.50378562_50378565delinsTC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'19' -p41 -sS'ref' -p42 -S'GAAA' -p43 -sS'pos' -p44 -S'50378562' -p45 -sS'alt' -p46 -S'TC' -p47 -sssS'grch37' -p48 -(dp49 -g36 -S'NC_000019.9:g.50881819_50881822delinsTC' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'GAAA' -p52 -sg44 -S'50881819' -p53 -sg46 -S'TC' -p54 -sssS'hg38' -p55 -(dp56 -g36 -S'NC_000019.10:g.50378562_50378565delinsTC' -p57 -sg38 -(dp58 -g40 -S'chr19' -p59 -sg42 -S'GAAA' -p60 -sg44 -S'50378562' -p61 -sg46 -g47 -sssS'hg19' -p62 -(dp63 -g36 -S'NC_000019.9:g.50881819_50881822delinsTC' -p64 -sg38 -(dp65 -g40 -g59 -sg42 -S'GAAA' -p66 -sg44 -S'50881819' -p67 -sg46 -g54 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' -p73 -sssS'flag' -p74 -S'gene_variant' -p75 -sS'NM_007121.5:c.513_519delinsTC' -p76 -(dp77 -g3 -g4 -sg5 -(lp78 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p79 -aS'NM_007121.5:c.513_519 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p80 -aS'Caution should be used when reporting the displayed variant descriptions' -p81 -aS'If you are unsure, please contact admin' -p82 -aS'RefSeqGene record not available' -p83 -asg12 -g4 -sg13 -(lp84 -sg15 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p85 -sg17 -S'NR1H2' -p86 -sg19 -(dp87 -g21 -S'NP_009052.3:p.(Lys172ProfsTer33)' -p88 -sg23 -S'NP_009052.3:p.(K172Pfs*33)' -p89 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_007121.5:c.513_519delinsTC' -p90 -sg31 -g4 -sg32 -(dp91 -S'grch38' -p92 -(dp93 -g36 -S'NC_000019.10:g.50378562_50378565delinsTC' -p94 -sg38 -(dp95 -g40 -g41 -sg42 -S'GAAA' -p96 -sg44 -S'50378562' -p97 -sg46 -S'TC' -p98 -sssS'grch37' -p99 -(dp100 -g36 -S'NC_000019.9:g.50881819_50881822delinsTC' -p101 -sg38 -(dp102 -g40 -g41 -sg42 -S'GAAA' -p103 -sg44 -S'50881819' -p104 -sg46 -S'TC' -p105 -sssg55 -(dp106 -g36 -S'NC_000019.10:g.50378562_50378565delinsTC' -p107 -sg38 -(dp108 -g40 -g59 -sg42 -S'GAAA' -p109 -sg44 -S'50378562' -p110 -sg46 -g98 -sssS'hg19' -p111 -(dp112 -g36 -S'NC_000019.9:g.50881819_50881822delinsTC' -p113 -sg38 -(dp114 -g40 -g59 -sg42 -S'GAAA' -p115 -sg44 -S'50881819' -p116 -sg46 -g105 -ssssg68 -(dp117 -g70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' -p118 -sg72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' -p119 -sssS'metadata' -p120 -(dp121 -S'variantvalidator_hgvs_version' -p122 -S'1.1.3' -p123 -sS'uta_schema' -p124 -S'uta_20180821' -p125 -sS'seqrepo_db' -p126 -S'2018-08-21' -p127 -sS'variantvalidator_version' -p128 -S'v0.2' -p129 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant108.txt b/VariantValidator/testing/testOutputsMasterITS/variant108.txt deleted file mode 100644 index 3d361f21..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant108.txt +++ /dev/null @@ -1,183 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_198455.2:c.1115_1116insT' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p9 -aS'Genome position NC_000007.13:g.149476667 aligns within a Requires Analysis-bp gap in transcript NM_198455.2 between positions c.1116_1117' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens SCO-spondin (SSPO), mRNA -p18 -sS'gene_symbol' -p19 -S'SSPO' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_940857.2:p.(Leu374ProfsTer16)' -p24 -sS'slr' -p25 -S'NP_940857.2:p.(L374Pfs*16)' -p26 -ssS'submitted_variant' -p27 -S'NC_000007.14:g.149779575_149779577delinsT' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'hgvs_lrg_variant' -p30 -g6 -sS'hgvs_transcript_variant' -p31 -S'NM_198455.2:c.1115_1116insT' -p32 -sS'hgvs_refseqgene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'hgvs_genomic_description' -p38 -S'NC_000007.13:g.149476664_149476666delinsTC' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr7' -p43 -sS'ref' -p44 -S'CAG' -p45 -sS'pos' -p46 -S'149476664' -p47 -sS'alt' -p48 -S'TC' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000007.14:g.149779575_149779577delinsT' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'CAG' -p54 -sg46 -S'149779575' -p55 -sg48 -S'T' -p56 -sssS'grch37' -p57 -(dp58 -g38 -S'NC_000007.13:g.149476664_149476666delinsTC' -p59 -sg40 -(dp60 -g42 -S'7' -p61 -sg44 -S'CAG' -p62 -sg46 -S'149476664' -p63 -sg48 -g49 -sssS'grch38' -p64 -(dp65 -g38 -S'NC_000007.14:g.149779575_149779577delinsT' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -S'CAG' -p68 -sg46 -S'149779575' -p69 -sg48 -g56 -ssssS'reference_sequence_records' -p70 -(dp71 -S'protein' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' -p73 -sS'transcript' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' -p75 -sssS'metadata' -p76 -(dp77 -S'variantvalidator_hgvs_version' -p78 -S'1.1.3' -p79 -sS'uta_schema' -p80 -S'uta_20180821' -p81 -sS'seqrepo_db' -p82 -S'2018-08-21' -p83 -sS'variantvalidator_version' -p84 -S'v0.2' -p85 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant109.txt b/VariantValidator/testing/testOutputsMasterITS/variant109.txt deleted file mode 100644 index 6997f57a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant109.txt +++ /dev/null @@ -1,183 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_198455.2:c.1116_1118=' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p9 -aS'Genome position NC_000007.13:g.149476667 aligns within a Requires Analysis-bp gap in transcript NM_198455.2 between positions c.1116_1117' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens SCO-spondin (SSPO), mRNA -p18 -sS'gene_symbol' -p19 -S'SSPO' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_940857.2:p.(Asp372=)' -p24 -sS'slr' -p25 -S'NP_940857.2:p.(D372=)' -p26 -ssS'submitted_variant' -p27 -S'NC_000007.14:g.149779575_149779577=' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'hgvs_lrg_variant' -p30 -g6 -sS'hgvs_transcript_variant' -p31 -S'NM_198455.2:c.1116_1118=' -p32 -sS'hgvs_refseqgene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'hgvs_genomic_description' -p38 -S'NC_000007.13:g.149476665_149476666del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr7' -p43 -sS'ref' -p44 -S'CAG' -p45 -sS'pos' -p46 -S'149476664' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000007.14:g.149779576_149779578del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'ACAG' -p54 -sg46 -S'149779574' -p55 -sg48 -S'A' -p56 -sssS'grch37' -p57 -(dp58 -g38 -S'NC_000007.13:g.149476665_149476666del' -p59 -sg40 -(dp60 -g42 -S'7' -p61 -sg44 -S'CAG' -p62 -sg46 -S'149476664' -p63 -sg48 -g49 -sssS'grch38' -p64 -(dp65 -g38 -S'NC_000007.14:g.149779576_149779578del' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -S'ACAG' -p68 -sg46 -S'149779574' -p69 -sg48 -g56 -ssssS'reference_sequence_records' -p70 -(dp71 -S'protein' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' -p73 -sS'transcript' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' -p75 -sssS'metadata' -p76 -(dp77 -S'variantvalidator_hgvs_version' -p78 -S'1.1.3' -p79 -sS'uta_schema' -p80 -S'uta_20180821' -p81 -sS'seqrepo_db' -p82 -S'2018-08-21' -p83 -sS'variantvalidator_version' -p84 -S'v0.2' -p85 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant11.txt b/VariantValidator/testing/testOutputsMasterITS/variant11.txt deleted file mode 100644 index bd208042..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant11.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NG_007400.1:c.5071A>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant110.txt b/VariantValidator/testing/testOutputsMasterITS/variant110.txt deleted file mode 100644 index 9b453b66..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant110.txt +++ /dev/null @@ -1,183 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_198455.2:c.1116_1118=' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p9 -aS'Genome position NC_000007.13:g.149476667 aligns within a Requires Analysis-bp gap in transcript NM_198455.2 between positions c.1116_1117' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens SCO-spondin (SSPO), mRNA -p18 -sS'gene_symbol' -p19 -S'SSPO' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_940857.2:p.(Asp372=)' -p24 -sS'slr' -p25 -S'NP_940857.2:p.(D372=)' -p26 -ssS'submitted_variant' -p27 -S'NC_000007.14:g.149779576_149779578del' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'hgvs_lrg_variant' -p30 -g6 -sS'hgvs_transcript_variant' -p31 -S'NM_198455.2:c.1116_1118=' -p32 -sS'hgvs_refseqgene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'hgvs_genomic_description' -p38 -S'NC_000007.13:g.149476665_149476666del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr7' -p43 -sS'ref' -p44 -S'CAG' -p45 -sS'pos' -p46 -S'149476664' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000007.14:g.149779576_149779578del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'ACAG' -p54 -sg46 -S'149779574' -p55 -sg48 -S'A' -p56 -sssS'grch37' -p57 -(dp58 -g38 -S'NC_000007.13:g.149476665_149476666del' -p59 -sg40 -(dp60 -g42 -S'7' -p61 -sg44 -S'CAG' -p62 -sg46 -S'149476664' -p63 -sg48 -g49 -sssS'grch38' -p64 -(dp65 -g38 -S'NC_000007.14:g.149779576_149779578del' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -S'ACAG' -p68 -sg46 -S'149779574' -p69 -sg48 -g56 -ssssS'reference_sequence_records' -p70 -(dp71 -S'protein' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' -p73 -sS'transcript' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' -p75 -sssS'metadata' -p76 -(dp77 -S'variantvalidator_hgvs_version' -p78 -S'1.1.3' -p79 -sS'uta_schema' -p80 -S'uta_20180821' -p81 -sS'seqrepo_db' -p82 -S'2018-08-21' -p83 -sS'variantvalidator_version' -p84 -S'v0.2' -p85 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant111.txt b/VariantValidator/testing/testOutputsMasterITS/variant111.txt deleted file mode 100644 index d2cc31c7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant111.txt +++ /dev/null @@ -1,182 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_198455.2:c.1115_1116dup' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p9 -aS'NC_000007.13:g.149476666 is one of 2 genomic base(s) that fail to align to transcript NM_198455.2 between positions c.1116_1117' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens SCO-spondin (SSPO), mRNA -p18 -sS'gene_symbol' -p19 -S'SSPO' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_940857.2:p.(Pro373ThrfsTer6)' -p24 -sS'slr' -p25 -S'NP_940857.2:p.(P373Tfs*6)' -p26 -ssS'submitted_variant' -p27 -S'NC_000007.14:g.149779577del' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'hgvs_lrg_variant' -p30 -g6 -sS'hgvs_transcript_variant' -p31 -S'NM_198455.2:c.1115_1116dup' -p32 -sS'hgvs_refseqgene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'hgvs_genomic_description' -p38 -S'NC_000007.13:g.149476666G>C' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr7' -p43 -sS'ref' -p44 -S'G' -p45 -sS'pos' -p46 -S'149476666' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000007.14:g.149779577del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'AG' -p54 -sg46 -S'149779576' -p55 -sg48 -S'A' -p56 -sssS'grch37' -p57 -(dp58 -g38 -S'NC_000007.13:g.149476666G>C' -p59 -sg40 -(dp60 -g42 -S'7' -p61 -sg44 -g45 -sg46 -S'149476666' -p62 -sg48 -g49 -sssS'grch38' -p63 -(dp64 -g38 -S'NC_000007.14:g.149779577del' -p65 -sg40 -(dp66 -g42 -g61 -sg44 -S'AG' -p67 -sg46 -S'149779576' -p68 -sg48 -g56 -ssssS'reference_sequence_records' -p69 -(dp70 -S'protein' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' -p72 -sS'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' -p74 -sssS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant112.txt b/VariantValidator/testing/testOutputsMasterITS/variant112.txt deleted file mode 100644 index 3e53ec68..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant112.txt +++ /dev/null @@ -1,182 +0,0 @@ -(dp0 -S'NM_198455.2:c.1114_1117del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p7 -aS'NC_000007.13:g.149476661_149476667 contains 2 genomic base(s) that fail to align to transcript NM_198455.2' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens SCO-spondin (SSPO), mRNA -p16 -sS'gene_symbol' -p17 -S'SSPO' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_940857.2:p.(Asp372ProfsTer5)' -p22 -sS'slr' -p23 -S'NP_940857.2:p.(D372Pfs*5)' -p24 -ssS'submitted_variant' -p25 -S'NC_000007.14:g.149779573_149779579del' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_198455.2:c.1114_1117del' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000007.13:g.149476662_149476667del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr7' -p41 -sS'ref' -p42 -S'TGACAGC' -p43 -sS'pos' -p44 -S'149476661' -p45 -sS'alt' -p46 -S'T' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000007.14:g.149779573_149779579del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'TGACAGCC' -p52 -sg44 -S'149779572' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000007.13:g.149476662_149476667del' -p56 -sg38 -(dp57 -g40 -S'7' -p58 -sg42 -S'TGACAGC' -p59 -sg44 -S'149476661' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000007.14:g.149779573_149779579del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'TGACAGCC' -p65 -sg44 -S'149779572' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' -p72 -sssS'flag' -p73 -S'gene_variant' -p74 -sS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant113.txt b/VariantValidator/testing/testOutputsMasterITS/variant113.txt deleted file mode 100644 index 83c5d575..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant113.txt +++ /dev/null @@ -1,183 +0,0 @@ -(dp0 -S'NM_198455.2:c.1114_1117delinsCA' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_198455.2 with genome build GRCh37' -p7 -aS'NC_000007.13:g.149476661_149476667 contains 2 genomic base(s) that fail to align to transcript NM_198455.2' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens SCO-spondin (SSPO), mRNA -p16 -sS'gene_symbol' -p17 -S'SSPO' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_940857.2:p.(Asp372HisfsTer17)' -p22 -sS'slr' -p23 -S'NP_940857.2:p.(D372Hfs*17)' -p24 -ssS'submitted_variant' -p25 -S'NC_000007.14:g.149779573_149779579delinsCA' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_198455.2:c.1114_1117delinsCA' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000007.13:g.149476662_149476667delinsCA' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr7' -p41 -sS'ref' -p42 -S'GACAGC' -p43 -sS'pos' -p44 -S'149476662' -p45 -sS'alt' -p46 -S'CA' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000007.14:g.149779573_149779579delinsCA' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'GACAGCC' -p52 -sg44 -S'149779573' -p53 -sg46 -S'CA' -p54 -sssS'grch37' -p55 -(dp56 -g36 -S'NC_000007.13:g.149476662_149476667delinsCA' -p57 -sg38 -(dp58 -g40 -S'7' -p59 -sg42 -S'GACAGC' -p60 -sg44 -S'149476662' -p61 -sg46 -g47 -sssS'grch38' -p62 -(dp63 -g36 -S'NC_000007.14:g.149779573_149779579delinsCA' -p64 -sg38 -(dp65 -g40 -g59 -sg42 -S'GACAGCC' -p66 -sg44 -S'149779573' -p67 -sg46 -g54 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2' -p73 -sssS'flag' -p74 -S'gene_variant' -p75 -sS'metadata' -p76 -(dp77 -S'variantvalidator_hgvs_version' -p78 -S'1.1.3' -p79 -sS'uta_schema' -p80 -S'uta_20180821' -p81 -sS'seqrepo_db' -p82 -S'2018-08-21' -p83 -sS'variantvalidator_version' -p84 -S'v0.2' -p85 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant114.txt b/VariantValidator/testing/testOutputsMasterITS/variant114.txt deleted file mode 100644 index 2dfbb5a9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant114.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'NM_000088.3:c.590_591inv' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p12 -sS'gene_symbol' -p13 -S'COL1A1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000079.2:p.(Gly197Asp)' -p18 -sS'slr' -p19 -S'NP_000079.2:p.(G197D)' -p20 -ssS'submitted_variant' -p21 -S'NM_000088.3:c.590_591inv' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_000088.3:c.590_591inv' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000017.10:g.48275361_48275362inv' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr17' -p37 -sS'ref' -p38 -S'AC' -p39 -sS'pos' -p40 -S'48275361' -p41 -sS'alt' -p42 -S'GT' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000017.11:g.50198000_50198001inv' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'AC' -p48 -sg40 -S'50198000' -p49 -sg42 -S'GT' -p50 -sssS'grch37' -p51 -(dp52 -g32 -S'NC_000017.10:g.48275361_48275362inv' -p53 -sg34 -(dp54 -g36 -S'17' -p55 -sg38 -S'AC' -p56 -sg40 -S'48275361' -p57 -sg42 -S'GT' -p58 -sssS'grch38' -p59 -(dp60 -g32 -S'NC_000017.11:g.50198000_50198001inv' -p61 -sg34 -(dp62 -g36 -g55 -sg38 -S'AC' -p63 -sg40 -S'50198000' -p64 -sg42 -S'GT' -p65 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p71 -sssS'flag' -p72 -S'gene_variant' -p73 -sS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant115.txt b/VariantValidator/testing/testOutputsMasterITS/variant115.txt deleted file mode 100644 index 67f43ad9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant115.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'NM_024989.3:c.1778_1779inv' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens post-GPI attachment to proteins 1 (PGAP1), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'PGAP1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_079265.2:p.(Phe593Ter)' -p18 -sS'slr' -p19 -S'NP_079265.2:p.(F593*)' -p20 -ssS'submitted_variant' -p21 -S'NM_024989.3:c.1778_1779inv' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_024989.3:c.1778_1779inv' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000002.11:g.197729793_197729794inv' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'AA' -p39 -sS'pos' -p40 -S'197729793' -p41 -sS'alt' -p42 -S'TT' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.196865069_196865070inv' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'AA' -p48 -sg40 -S'196865069' -p49 -sg42 -S'TT' -p50 -sssS'grch37' -p51 -(dp52 -g32 -S'NC_000002.11:g.197729793_197729794inv' -p53 -sg34 -(dp54 -g36 -S'2' -p55 -sg38 -S'AA' -p56 -sg40 -S'197729793' -p57 -sg42 -S'TT' -p58 -sssS'grch38' -p59 -(dp60 -g32 -S'NC_000002.12:g.196865069_196865070inv' -p61 -sg34 -(dp62 -g36 -g55 -sg38 -S'AA' -p63 -sg40 -S'196865069' -p64 -sg42 -S'TT' -p65 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_079265.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024989.3' -p71 -sssS'flag' -p72 -S'gene_variant' -p73 -sS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant116.txt b/VariantValidator/testing/testOutputsMasterITS/variant116.txt deleted file mode 100644 index b4fed108..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant116.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032815.3:c.555_556inv' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens nuclear factor of activated T cells 2 interacting protein (NFATC2IP), mRNA -p14 -sS'gene_symbol' -p15 -S'NFATC2IP' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_116204.3:p.(Glu185_Glu186delinsAspTer)' -p20 -sS'slr' -p21 -S'NP_116204.3:p.(E185_E186delinsD*)' -p22 -ssS'submitted_variant' -p23 -S'NM_032815.3:c.555_556inv' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_032815.3:c.555_556inv' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'grch38' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000016.10:g.28954659_28954660inv' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'16' -p39 -sS'ref' -p40 -S'AG' -p41 -sS'pos' -p42 -S'28954659' -p43 -sS'alt' -p44 -S'CT' -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000016.9:g.28965980_28965981inv' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'AG' -p50 -sg42 -S'28965980' -p51 -sg44 -S'CT' -p52 -sssS'hg38' -p53 -(dp54 -g34 -S'NC_000016.10:g.28954659_28954660inv' -p55 -sg36 -(dp56 -g38 -S'chr16' -p57 -sg40 -S'AG' -p58 -sg42 -S'28954659' -p59 -sg44 -S'CT' -p60 -sssS'hg19' -p61 -(dp62 -g34 -S'NC_000016.9:g.28965980_28965981inv' -p63 -sg36 -(dp64 -g38 -g57 -sg40 -S'AG' -p65 -sg42 -S'28965980' -p66 -sg44 -S'CT' -p67 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116204.3' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032815.3' -p73 -sssS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant117.txt b/VariantValidator/testing/testOutputsMasterITS/variant117.txt deleted file mode 100644 index 406b2fbf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant117.txt +++ /dev/null @@ -1,23 +0,0 @@ -(dp0 -S'flag' -p1 -NsS'metadata' -p2 -(dp3 -S'variantvalidator_hgvs_version' -p4 -S'1.1.3' -p5 -sS'uta_schema' -p6 -S'uta_20180821' -p7 -sS'seqrepo_db' -p8 -S'2018-08-21' -p9 -sS'variantvalidator_version' -p10 -S'v0.2' -p11 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant118.txt b/VariantValidator/testing/testOutputsMasterITS/variant118.txt deleted file mode 100644 index c5baab8b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant118.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000038.5:c.3927_3928inv' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens APC, WNT signaling pathway regulator (APC), transcript variant 3, mRNA -p14 -sS'gene_symbol' -p15 -S'APC' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000029.2:p.(Glu1309_Lys1310delinsAspTer)' -p20 -sS'slr' -p21 -S'NP_000029.2:p.(E1309_K1310delinsD*)' -p22 -ssS'submitted_variant' -p23 -S'NM_000038.5:c.3927_3928delAAinsTT' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000038.5:c.3927_3928inv' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'grch38' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000005.10:g.112839521_112839522inv' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'5' -p39 -sS'ref' -p40 -S'AA' -p41 -sS'pos' -p42 -S'112839521' -p43 -sS'alt' -p44 -S'TT' -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000005.9:g.112175218_112175219inv' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'AA' -p50 -sg42 -S'112175218' -p51 -sg44 -S'TT' -p52 -sssS'hg38' -p53 -(dp54 -g34 -S'NC_000005.10:g.112839521_112839522inv' -p55 -sg36 -(dp56 -g38 -S'chr5' -p57 -sg40 -S'AA' -p58 -sg42 -S'112839521' -p59 -sg44 -S'TT' -p60 -sssS'hg19' -p61 -(dp62 -g34 -S'NC_000005.9:g.112175218_112175219inv' -p63 -sg36 -(dp64 -g38 -g57 -sg40 -S'AA' -p65 -sg42 -S'112175218' -p66 -sg44 -S'TT' -p67 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000029.2' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000038.5' -p73 -sssS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant119.txt b/VariantValidator/testing/testOutputsMasterITS/variant119.txt deleted file mode 100644 index 83cfa2e6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant119.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001034853.1:c.2847_2848inv' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens retinitis pigmentosa GTPase regulator (RPGR), transcript variant C, mRNA -p14 -sS'gene_symbol' -p15 -S'RPGR' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001030025.1:p.(Glu949_Glu950delinsAspTer)' -p20 -sS'slr' -p21 -S'NP_001030025.1:p.(E949_E950delinsD*)' -p22 -ssS'submitted_variant' -p23 -S'NM_001034853.1:c.2847_2848delAGinsCT' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_001034853.1:c.2847_2848inv' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000023.10:g.38145404_38145405inv' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chrX' -p39 -sS'ref' -p40 -S'CT' -p41 -sS'pos' -p42 -S'38145404' -p43 -sS'alt' -p44 -S'AG' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000023.11:g.38286151_38286152inv' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'CT' -p50 -sg42 -S'38286151' -p51 -sg44 -S'AG' -p52 -sssS'grch37' -p53 -(dp54 -g34 -S'NC_000023.10:g.38145404_38145405inv' -p55 -sg36 -(dp56 -g38 -S'X' -p57 -sg40 -S'CT' -p58 -sg42 -S'38145404' -p59 -sg44 -S'AG' -p60 -sssS'grch38' -p61 -(dp62 -g34 -S'NC_000023.11:g.38286151_38286152inv' -p63 -sg36 -(dp64 -g38 -g57 -sg40 -S'CT' -p65 -sg42 -S'38286151' -p66 -sg44 -S'AG' -p67 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001030025.1' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001034853.1' -p73 -sssS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant12.txt b/VariantValidator/testing/testOutputsMasterITS/variant12.txt deleted file mode 100644 index d6eccd94..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant12.txt +++ /dev/null @@ -1,704 +0,0 @@ -(dp0 -S'NM_002474.2:c.3034_3035inv' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -(dp11 -S'grch38' -p12 -(dp13 -S'hgvs_genomic_description' -p14 -S'NT_187607.1:g.1396662_1396663inv' -p15 -sS'vcf' -p16 -(dp17 -S'chr' -p18 -S'HSCHR16_1_CTG1' -p19 -sS'ref' -p20 -S'GT' -p21 -sS'pos' -p22 -S'1396662' -p23 -sS'alt' -p24 -S'AC' -p25 -sssa(dp26 -S'hg38' -p27 -(dp28 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p29 -sg16 -(dp30 -g18 -S'chr16_KI270853v1_alt' -p31 -sg20 -S'GT' -p32 -sg22 -S'1396662' -p33 -sg24 -S'AC' -p34 -sssasS'transcript_description' -p35 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA -p36 -sS'gene_symbol' -p37 -S'MYH11' -p38 -sS'hgvs_predicted_protein_consequence' -p39 -(dp40 -S'tlr' -p41 -S'NP_002465.1:p.(Thr1012Val)' -p42 -sS'slr' -p43 -S'NP_002465.1:p.(T1012V)' -p44 -ssS'submitted_variant' -p45 -S'chr16:15832508_15832509delinsAC' -p46 -sS'genome_context_intronic_sequence' -p47 -g4 -sS'hgvs_lrg_variant' -p48 -g4 -sS'hgvs_transcript_variant' -p49 -S'NM_002474.2:c.3034_3035inv' -p50 -sS'hgvs_refseqgene_variant' -p51 -g4 -sS'primary_assembly_loci' -p52 -(dp53 -S'grch38' -p54 -(dp55 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p56 -sg16 -(dp57 -g18 -S'16' -p58 -sg20 -S'GT' -p59 -sg22 -S'15738651' -p60 -sg24 -S'AC' -p61 -sssS'grch37' -p62 -(dp63 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p64 -sg16 -(dp65 -g18 -g58 -sg20 -S'GT' -p66 -sg22 -S'15832508' -p67 -sg24 -S'AC' -p68 -sssg27 -(dp69 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p70 -sg16 -(dp71 -g18 -S'chr16' -p72 -sg20 -S'GT' -p73 -sg22 -S'15738651' -p74 -sg24 -S'AC' -p75 -sssS'hg19' -p76 -(dp77 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p78 -sg16 -(dp79 -g18 -g72 -sg20 -S'GT' -p80 -sg22 -S'15832508' -p81 -sg24 -S'AC' -p82 -ssssS'reference_sequence_records' -p83 -(dp84 -S'protein' -p85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1' -p86 -sS'transcript' -p87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2' -p88 -sssS'NM_022844.2:c.3034_3035inv' -p89 -(dp90 -g3 -g4 -sg5 -(lp91 -S'RefSeqGene record not available' -p92 -asg8 -g4 -sg9 -(lp93 -(dp94 -S'grch38' -p95 -(dp96 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p97 -sg16 -(dp98 -g18 -g19 -sg20 -S'GT' -p99 -sg22 -S'1396662' -p100 -sg24 -S'AC' -p101 -sssa(dp102 -g27 -(dp103 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p104 -sg16 -(dp105 -g18 -g31 -sg20 -S'GT' -p106 -sg22 -S'1396662' -p107 -sg24 -S'AC' -p108 -sssasg35 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA -p109 -sg37 -S'MYH11' -p110 -sg39 -(dp111 -g41 -S'NP_074035.1:p.(Thr1012Val)' -p112 -sg43 -S'NP_074035.1:p.(T1012V)' -p113 -ssg45 -g46 -sg47 -g4 -sg48 -g4 -sg49 -S'NM_022844.2:c.3034_3035inv' -p114 -sg51 -g4 -sg52 -(dp115 -S'grch38' -p116 -(dp117 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p118 -sg16 -(dp119 -g18 -g58 -sg20 -S'GT' -p120 -sg22 -S'15738651' -p121 -sg24 -S'AC' -p122 -sssS'grch37' -p123 -(dp124 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p125 -sg16 -(dp126 -g18 -g58 -sg20 -S'GT' -p127 -sg22 -S'15832508' -p128 -sg24 -S'AC' -p129 -sssg27 -(dp130 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p131 -sg16 -(dp132 -g18 -g72 -sg20 -S'GT' -p133 -sg22 -S'15738651' -p134 -sg24 -S'AC' -p135 -sssS'hg19' -p136 -(dp137 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p138 -sg16 -(dp139 -g18 -g72 -sg20 -S'GT' -p140 -sg22 -S'15832508' -p141 -sg24 -S'AC' -p142 -ssssg83 -(dp143 -g85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1' -p144 -sg87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2' -p145 -sssS'NM_001040114.1:c.3055_3056inv' -p146 -(dp147 -g3 -g4 -sg5 -(lp148 -S'RefSeqGene record not available' -p149 -asg8 -g4 -sg9 -(lp150 -(dp151 -S'grch38' -p152 -(dp153 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p154 -sg16 -(dp155 -g18 -g19 -sg20 -S'GT' -p156 -sg22 -S'1396662' -p157 -sg24 -S'AC' -p158 -sssa(dp159 -g27 -(dp160 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p161 -sg16 -(dp162 -g18 -g31 -sg20 -S'GT' -p163 -sg22 -S'1396662' -p164 -sg24 -S'AC' -p165 -sssasg35 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA -p166 -sg37 -S'MYH11' -p167 -sg39 -(dp168 -g41 -S'NP_001035203.1:p.(Thr1019Val)' -p169 -sg43 -S'NP_001035203.1:p.(T1019V)' -p170 -ssg45 -g46 -sg47 -g4 -sg48 -g4 -sg49 -S'NM_001040114.1:c.3055_3056inv' -p171 -sg51 -g4 -sg52 -(dp172 -S'grch38' -p173 -(dp174 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p175 -sg16 -(dp176 -g18 -g58 -sg20 -S'GT' -p177 -sg22 -S'15738651' -p178 -sg24 -S'AC' -p179 -sssS'grch37' -p180 -(dp181 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p182 -sg16 -(dp183 -g18 -g58 -sg20 -S'GT' -p184 -sg22 -S'15832508' -p185 -sg24 -S'AC' -p186 -sssg27 -(dp187 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p188 -sg16 -(dp189 -g18 -g72 -sg20 -S'GT' -p190 -sg22 -S'15738651' -p191 -sg24 -S'AC' -p192 -sssS'hg19' -p193 -(dp194 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p195 -sg16 -(dp196 -g18 -g72 -sg20 -S'GT' -p197 -sg22 -S'15832508' -p198 -sg24 -S'AC' -p199 -ssssg83 -(dp200 -g85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1' -p201 -sg87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1' -p202 -sssS'flag' -p203 -S'gene_variant' -p204 -sS'NM_001040113.1:c.3055_3056inv' -p205 -(dp206 -g3 -g4 -sg5 -(lp207 -S'RefSeqGene record not available' -p208 -asg8 -g4 -sg9 -(lp209 -(dp210 -S'grch38' -p211 -(dp212 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p213 -sg16 -(dp214 -g18 -g19 -sg20 -S'GT' -p215 -sg22 -S'1396662' -p216 -sg24 -S'AC' -p217 -sssa(dp218 -g27 -(dp219 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p220 -sg16 -(dp221 -g18 -g31 -sg20 -S'GT' -p222 -sg22 -S'1396662' -p223 -sg24 -S'AC' -p224 -sssasg35 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA -p225 -sg37 -S'MYH11' -p226 -sg39 -(dp227 -g41 -S'NP_001035202.1:p.(Thr1019Val)' -p228 -sg43 -S'NP_001035202.1:p.(T1019V)' -p229 -ssg45 -g46 -sg47 -g4 -sg48 -g4 -sg49 -S'NM_001040113.1:c.3055_3056inv' -p230 -sg51 -g4 -sg52 -(dp231 -S'grch38' -p232 -(dp233 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p234 -sg16 -(dp235 -g18 -g58 -sg20 -S'GT' -p236 -sg22 -S'15738651' -p237 -sg24 -S'AC' -p238 -sssS'grch37' -p239 -(dp240 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p241 -sg16 -(dp242 -g18 -g58 -sg20 -S'GT' -p243 -sg22 -S'15832508' -p244 -sg24 -S'AC' -p245 -sssg27 -(dp246 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p247 -sg16 -(dp248 -g18 -g72 -sg20 -S'GT' -p249 -sg22 -S'15738651' -p250 -sg24 -S'AC' -p251 -sssS'hg19' -p252 -(dp253 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p254 -sg16 -(dp255 -g18 -g72 -sg20 -S'GT' -p256 -sg22 -S'15832508' -p257 -sg24 -S'AC' -p258 -ssssg83 -(dp259 -g85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1' -p260 -sg87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1' -p261 -sssS'metadata' -p262 -(dp263 -S'variantvalidator_hgvs_version' -p264 -S'1.1.3' -p265 -sS'uta_schema' -p266 -S'uta_20180821' -p267 -sS'seqrepo_db' -p268 -S'2018-08-21' -p269 -sS'variantvalidator_version' -p270 -S'v0.2' -p271 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant120.txt b/VariantValidator/testing/testOutputsMasterITS/variant120.txt deleted file mode 100644 index 725d40b4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant120.txt +++ /dev/null @@ -1,179 +0,0 @@ -(dp0 -S'NM_000088.3:c.4394_4395inv' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NM_000088.3:c.4392_*2inv normalized to NM_000088.3:c.4394_4395inv' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p13 -sS'gene_symbol' -p14 -S'COL1A1' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000079.2:p.(Ter1465PheextTer27)' -p19 -sS'slr' -p20 -S'NP_000079.2:p.(*1465Fext*27)' -p21 -ssS'submitted_variant' -p22 -S'NM_000088.3:c.4392_*2inv' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_000088.3:c.4394_4395inv' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000017.10:g.48262863_48262864inv' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr17' -p38 -sS'ref' -p39 -S'TT' -p40 -sS'pos' -p41 -S'48262863' -p42 -sS'alt' -p43 -S'AA' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000017.11:g.50185502_50185503inv' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TT' -p49 -sg41 -S'50185502' -p50 -sg43 -S'AA' -p51 -sssS'grch37' -p52 -(dp53 -g33 -S'NC_000017.10:g.48262863_48262864inv' -p54 -sg35 -(dp55 -g37 -S'17' -p56 -sg39 -S'TT' -p57 -sg41 -S'48262863' -p58 -sg43 -S'AA' -p59 -sssS'grch38' -p60 -(dp61 -g33 -S'NC_000017.11:g.50185502_50185503inv' -p62 -sg35 -(dp63 -g37 -g56 -sg39 -S'TT' -p64 -sg41 -S'50185502' -p65 -sg43 -S'AA' -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'flag' -p73 -S'gene_variant' -p74 -sS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant121.txt b/VariantValidator/testing/testOutputsMasterITS/variant121.txt deleted file mode 100644 index 85118fd0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant121.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_000088.3:c.4392_*5inv' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p24 -sS'gene_symbol' -p25 -S'COL1A1' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_000079.2:p.?' -p30 -sS'slr' -p31 -S'NP_000079.2:p.?' -p32 -ssS'submitted_variant' -p33 -S'NM_000088.3:c.4392_*5inv' -p34 -sS'genome_context_intronic_sequence' -p35 -g16 -sS'hgvs_lrg_variant' -p36 -g16 -sS'hgvs_transcript_variant' -p37 -S'NM_000088.3:c.4392_*5inv' -p38 -sS'hgvs_refseqgene_variant' -p39 -g16 -sS'primary_assembly_loci' -p40 -(dp41 -S'hg19' -p42 -(dp43 -S'hgvs_genomic_description' -p44 -S'NC_000017.10:g.48262858_48262866inv' -p45 -sS'vcf' -p46 -(dp47 -S'chr' -p48 -S'chr17' -p49 -sS'ref' -p50 -S'GAGTTTA' -p51 -sS'pos' -p52 -S'48262859' -p53 -sS'alt' -p54 -S'TAAACTC' -p55 -sssS'hg38' -p56 -(dp57 -g44 -S'NC_000017.11:g.50185497_50185505inv' -p58 -sg46 -(dp59 -g48 -g49 -sg50 -S'GAGTTTA' -p60 -sg52 -S'50185498' -p61 -sg54 -S'TAAACTC' -p62 -sssS'grch37' -p63 -(dp64 -g44 -S'NC_000017.10:g.48262858_48262866inv' -p65 -sg46 -(dp66 -g48 -S'17' -p67 -sg50 -S'GAGTTTA' -p68 -sg52 -S'48262859' -p69 -sg54 -S'TAAACTC' -p70 -sssS'grch38' -p71 -(dp72 -g44 -S'NC_000017.11:g.50185497_50185505inv' -p73 -sg46 -(dp74 -g48 -g67 -sg50 -S'GAGTTTA' -p75 -sg52 -S'50185498' -p76 -sg54 -S'TAAACTC' -p77 -ssssS'reference_sequence_records' -p78 -(dp79 -S'protein' -p80 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p81 -sS'transcript' -p82 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p83 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant122.txt b/VariantValidator/testing/testOutputsMasterITS/variant122.txt deleted file mode 100644 index 732ad2d7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant122.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.4390_*7inv' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.4390_*7inv' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000088.3:c.4390_*7inv' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000017.10:g.48262856_48262868inv' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -S'AGGGAGTTTACAG' -p41 -sS'pos' -p42 -S'48262856' -p43 -sS'alt' -p44 -S'CTGTAAACTCCCT' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.50185495_50185507inv' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'AGGGAGTTTACAG' -p50 -sg42 -S'50185495' -p51 -sg44 -S'CTGTAAACTCCCT' -p52 -sssS'grch37' -p53 -(dp54 -g34 -S'NC_000017.10:g.48262856_48262868inv' -p55 -sg36 -(dp56 -g38 -S'17' -p57 -sg40 -S'AGGGAGTTTACAG' -p58 -sg42 -S'48262856' -p59 -sg44 -S'CTGTAAACTCCCT' -p60 -sssS'grch38' -p61 -(dp62 -g34 -S'NC_000017.11:g.50185495_50185507inv' -p63 -sg36 -(dp64 -g38 -g57 -sg40 -S'AGGGAGTTTACAG' -p65 -sg42 -S'50185495' -p66 -sg44 -S'CTGTAAACTCCCT' -p67 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p73 -sssS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant123.txt b/VariantValidator/testing/testOutputsMasterITS/variant123.txt deleted file mode 100644 index 0da4596a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant123.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'insertion length must be 1' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_005732.3:c.2923-5insT' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant124.txt b/VariantValidator/testing/testOutputsMasterITS/variant124.txt deleted file mode 100644 index bab14006..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant124.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The given coordinate is outside the bounds of the reference sequence.' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_198283.1(EYS):c.*743120C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant125.txt b/VariantValidator/testing/testOutputsMasterITS/variant125.txt deleted file mode 100644 index 612a82b3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant125.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'start or end or both are beyond the bounds of transcript record' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_133379.4(TTN):c.*265+26591C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant126.txt b/VariantValidator/testing/testOutputsMasterITS/variant126.txt deleted file mode 100644 index a93ce03c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant126.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-2_589-1delinsG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589-2_589-1AG>G automapped to NM_000088.3:c.589-2_589-1delAGinsG' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589-2_589-1AG>G' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000088.3:c.589-2_589-1delinsG' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.48275364_48275365delinsC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'CT' -p43 -sS'pos' -p44 -S'48275364' -p45 -sS'alt' -p46 -S'C' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50198003_50198004delinsC' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'CT' -p52 -sg44 -S'50198003' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.48275364_48275365delinsC' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -S'CT' -p59 -sg44 -S'48275364' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.50198003_50198004delinsC' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'CT' -p65 -sg44 -S'50198003' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant127.txt b/VariantValidator/testing/testOutputsMasterITS/variant127.txt deleted file mode 100644 index 7921b40d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant127.txt +++ /dev/null @@ -1,175 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.642+1_642+2delinsG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.642+1_642+2delGTinsG' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000088.3:c.642+1_642+2delinsG' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000017.10:g.48275308_48275309delinsC' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr17' -p40 -sS'ref' -p41 -S'TA' -p42 -sS'pos' -p43 -S'48275307' -p44 -sS'alt' -p45 -S'T' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000017.11:g.50197947_50197948delinsC' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'TA' -p51 -sg43 -S'50197946' -p52 -sg45 -g46 -sssS'grch37' -p53 -(dp54 -g35 -S'NC_000017.10:g.48275308_48275309delinsC' -p55 -sg37 -(dp56 -g39 -S'17' -p57 -sg41 -S'TA' -p58 -sg43 -S'48275307' -p59 -sg45 -g46 -sssS'grch38' -p60 -(dp61 -g35 -S'NC_000017.11:g.50197947_50197948delinsC' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'TA' -p64 -sg43 -S'50197946' -p65 -sg45 -g46 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p71 -sssS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant128.txt b/VariantValidator/testing/testOutputsMasterITS/variant128.txt deleted file mode 100644 index 06085851..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant128.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'insertion length must be 1' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_004415.3:c.1-1insA' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant129.txt b/VariantValidator/testing/testOutputsMasterITS/variant129.txt deleted file mode 100644 index 8f7b95f8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant129.txt +++ /dev/null @@ -1,174 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_004415.3:c.-1_1insA' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens desmoplakin (DSP), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'DSP' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_004406.2:p.(Met1?)' -p20 -sS'slr' -p21 -S'NP_004406.2:p.(M1?)' -p22 -ssS'submitted_variant' -p23 -S'NM_004415.3:c.-1_1insA' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_004415.3:c.-1_1insA' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000006.11:g.7542148_7542149insA' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr6' -p39 -sS'ref' -p40 -S'A' -p41 -sS'pos' -p42 -S'7542149' -p43 -sS'alt' -p44 -S'AA' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000006.12:g.7541915_7541916insA' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'7541916' -p50 -sg44 -S'AA' -p51 -sssS'grch37' -p52 -(dp53 -g34 -S'NC_000006.11:g.7542148_7542149insA' -p54 -sg36 -(dp55 -g38 -S'6' -p56 -sg40 -g41 -sg42 -S'7542149' -p57 -sg44 -S'AA' -p58 -sssS'grch38' -p59 -(dp60 -g34 -S'NC_000006.12:g.7541915_7541916insA' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -g41 -sg42 -S'7541916' -p63 -sg44 -S'AA' -p64 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004406.2' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004415.3' -p70 -sssS'metadata' -p71 -(dp72 -S'variantvalidator_hgvs_version' -p73 -S'1.1.3' -p74 -sS'uta_schema' -p75 -S'uta_20180821' -p76 -sS'seqrepo_db' -p77 -S'2018-08-21' -p78 -sS'variantvalidator_version' -p79 -S'v0.2' -p80 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant13.txt b/VariantValidator/testing/testOutputsMasterITS/variant13.txt deleted file mode 100644 index e91d2065..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant13.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-1_589delinsG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589-1GG>G automapped to NM_000088.3:c.589-1_589delGGinsG' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589-1GG>G' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.589-1_589delinsG' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000088.3:c.589-1_589delinsG' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.48275363_48275364delinsC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'AC' -p43 -sS'pos' -p44 -S'48275361' -p45 -sS'alt' -p46 -S'A' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50198002_50198003delinsC' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'AC' -p52 -sg44 -S'50198000' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.48275363_48275364delinsC' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -S'AC' -p59 -sg44 -S'48275361' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.50198002_50198003delinsC' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'AC' -p65 -sg44 -S'50198000' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant130.txt b/VariantValidator/testing/testOutputsMasterITS/variant130.txt deleted file mode 100644 index 1efeccff..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant130.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'start or end or both are beyond the bounds of transcript record' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000273.2:c.1-5028_253del' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant131.txt b/VariantValidator/testing/testOutputsMasterITS/variant131.txt deleted file mode 100644 index 9b95a8f7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant131.txt +++ /dev/null @@ -1,141 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_002929.2:c.1006C>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'NM_002929.2:c.1006C>T cannot be mapped directly to genome build GRCh37' -p10 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens G protein-coupled receptor kinase 1 (GRK1), mRNA -p16 -sS'gene_symbol' -p17 -S'GRK1' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_002920.1:p.(Leu336Phe)' -p22 -sS'slr' -p23 -S'NP_002920.1:p.(L336F)' -p24 -ssS'submitted_variant' -p25 -S'NM_002929.2:c.1006C>T' -p26 -sS'genome_context_intronic_sequence' -p27 -g6 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_002929.2:c.1006C>T' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'grch38' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000013.11:g.113723094C>T' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'13' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'113723094' -p45 -sS'alt' -p46 -VT -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000013.11:g.113723094C>T' -p50 -sg38 -(dp51 -g40 -S'chr13' -p52 -sg42 -g43 -sg44 -S'113723094' -p53 -sg46 -g47 -ssssS'reference_sequence_records' -p54 -(dp55 -S'protein' -p56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002920.1' -p57 -sS'transcript' -p58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002929.2' -p59 -sssS'metadata' -p60 -(dp61 -S'variantvalidator_hgvs_version' -p62 -S'1.1.3' -p63 -sS'uta_schema' -p64 -S'uta_20180821' -p65 -sS'seqrepo_db' -p66 -S'2018-08-21' -p67 -sS'variantvalidator_version' -p68 -S'v0.2' -p69 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant132.txt b/VariantValidator/testing/testOutputsMasterITS/variant132.txt deleted file mode 100644 index 7fac199b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant132.txt +++ /dev/null @@ -1,167 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NR_125367.1:n.167+18165G>A' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens myosin heavy chain gene cluster antisense RNA (MYHAS), long non-coding RNA -p14 -sS'gene_symbol' -p15 -S'MYHAS' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'Non-coding :n.' -p20 -sS'slr' -p21 -g20 -ssS'submitted_variant' -p22 -S'NR_125367.1:n.167+18165G>A' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000017.10(NR_125367.1):c.167+18165G>A' -p25 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NR_125367.1:n.167+18165G>A' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000017.10:g.10327720G>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -S'G' -p41 -sS'pos' -p42 -S'10327720' -p43 -sS'alt' -p44 -S'A' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.10424403G>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'10424403' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000017.10:g.10327720G>A' -p53 -sg36 -(dp54 -g38 -S'17' -p55 -sg40 -g41 -sg42 -S'10327720' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000017.11:g.10424403G>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'10424403' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_125367.1' -p65 -sssS'metadata' -p66 -(dp67 -S'variantvalidator_hgvs_version' -p68 -S'1.1.3' -p69 -sS'uta_schema' -p70 -S'uta_20180821' -p71 -sS'seqrepo_db' -p72 -S'2018-08-21' -p73 -sS'variantvalidator_version' -p74 -S'v0.2' -p75 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant133.txt b/VariantValidator/testing/testOutputsMasterITS/variant133.txt deleted file mode 100644 index b680c64b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant133.txt +++ /dev/null @@ -1,82 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Variant coordinate is out of the bound of CDS region (CDS length ' -p7 -aS'2673)' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'hgvs_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_006005.3:c.3071_3073delinsTTA' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'hgvs_lrg_variant' -p21 -g4 -sS'hgvs_transcript_variant' -p22 -g4 -sS'hgvs_refseqgene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -sS'reference_sequence_records' -p26 -g4 -ssS'flag' -p27 -S'warning' -p28 -sS'metadata' -p29 -(dp30 -S'variantvalidator_hgvs_version' -p31 -S'1.1.3' -p32 -sS'uta_schema' -p33 -S'uta_20180821' -p34 -sS'seqrepo_db' -p35 -S'2018-08-21' -p36 -sS'variantvalidator_version' -p37 -S'v0.2' -p38 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant134.txt b/VariantValidator/testing/testOutputsMasterITS/variant134.txt deleted file mode 100644 index c82897c8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant134.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. Did you mean NM_000089.3:c.1504_1506del?' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000089.3:n.1504_1506del' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant135.txt b/VariantValidator/testing/testOutputsMasterITS/variant135.txt deleted file mode 100644 index 91a17310..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant135.txt +++ /dev/null @@ -1,157 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -sS'refseqgene_context_intronic_sequence' -p7 -g4 -sS'alt_genomic_loci' -p8 -(lp9 -sS'transcript_description' -p10 -S'Homo sapiens mitochondrion, complete genome' -p11 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NC_012920.1:m.1011C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -S'hg19' -p25 -(dp26 -S'hgvs_genomic_description' -p27 -S'NC_012920.1:m.1011C>T' -p28 -sS'vcf' -p29 -(dp30 -S'chr' -p31 -S'chrM' -p32 -sS'ref' -p33 -S'C' -p34 -sS'pos' -p35 -S'1011' -p36 -sS'alt' -p37 -S'T' -p38 -sssS'hg38' -p39 -(dp40 -g27 -S'NC_012920.1:m.1011C>T' -p41 -sg29 -(dp42 -g31 -g32 -sg33 -g34 -sg35 -S'1011' -p43 -sg37 -g38 -sssS'grch37' -p44 -(dp45 -g27 -S'NC_012920.1:m.1011C>T' -p46 -sg29 -(dp47 -g31 -S'M' -p48 -sg33 -g34 -sg35 -S'1011' -p49 -sg37 -g38 -sssS'grch38' -p50 -(dp51 -g27 -S'NC_012920.1:m.1011C>T' -p52 -sg29 -(dp53 -g31 -g48 -sg33 -g34 -sg35 -S'1011' -p54 -sg37 -g38 -ssssS'reference_sequence_records' -p55 -g4 -ssS'flag' -p56 -S'warning' -p57 -sS'metadata' -p58 -(dp59 -S'variantvalidator_hgvs_version' -p60 -S'1.1.3' -p61 -sS'uta_schema' -p62 -S'uta_20180821' -p63 -sS'seqrepo_db' -p64 -S'2018-08-21' -p65 -sS'variantvalidator_version' -p66 -S'v0.2' -p67 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant136.txt b/VariantValidator/testing/testOutputsMasterITS/variant136.txt deleted file mode 100644 index ffd271ae..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant136.txt +++ /dev/null @@ -1,259 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_014611.1:c.9879T>C' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)' -p19 -aS'NM_014611.2:c.9879C= MUST be fully validated prior to use in reports' -p20 -aS'select_variants=NM_014611.2:c.9879C=' -p21 -aS'RefSeqGene record not available' -p22 -asS'refseqgene_context_intronic_sequence' -p23 -g16 -sS'alt_genomic_loci' -p24 -(lp25 -sS'transcript_description' -p26 -VHomo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA -p27 -sS'gene_symbol' -p28 -S'MDN1' -p29 -sS'hgvs_predicted_protein_consequence' -p30 -(dp31 -S'tlr' -p32 -S'NP_055426.1:p.(Val3293=)' -p33 -sS'slr' -p34 -S'NP_055426.1:p.(V3293=)' -p35 -ssS'submitted_variant' -p36 -S'NC_000006.11:g.90403795G=' -p37 -sS'genome_context_intronic_sequence' -p38 -g16 -sS'hgvs_lrg_variant' -p39 -g16 -sS'hgvs_transcript_variant' -p40 -S'NM_014611.1:c.9879T>C' -p41 -sS'hgvs_refseqgene_variant' -p42 -g16 -sS'primary_assembly_loci' -p43 -(dp44 -S'hg19' -p45 -(dp46 -S'hgvs_genomic_description' -p47 -S'NC_000006.11:g.90403795G=' -p48 -sS'vcf' -p49 -(dp50 -S'chr' -p51 -S'chr6' -p52 -sS'ref' -p53 -S'G' -p54 -sS'pos' -p55 -S'90403795' -p56 -sS'alt' -p57 -g54 -sssS'grch37' -p58 -(dp59 -g47 -S'NC_000006.11:g.90403795G=' -p60 -sg49 -(dp61 -g51 -S'6' -p62 -sg53 -g54 -sg55 -S'90403795' -p63 -sg57 -g54 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1' -p69 -sssS'NM_014611.2:c.9879C=' -p70 -(dp71 -g15 -g16 -sg17 -(lp72 -S'RefSeqGene record not available' -p73 -asg23 -g16 -sg24 -(lp74 -sg26 -VHomo sapiens midasin AAA ATPase 1 (MDN1), mRNA -p75 -sg28 -S'MDN1' -p76 -sg30 -(dp77 -g32 -S'NP_055426.1:p.(Val3293=)' -p78 -sg34 -S'NP_055426.1:p.(V3293=)' -p79 -ssg36 -g37 -sg38 -g16 -sg39 -g16 -sg40 -S'NM_014611.2:c.9879C=' -p80 -sg42 -g16 -sg43 -(dp81 -S'hg19' -p82 -(dp83 -g47 -S'NC_000006.11:g.90403795G=' -p84 -sg49 -(dp85 -g51 -g52 -sg53 -VG -p86 -sg55 -S'90403795' -p87 -sg57 -g86 -sssS'hg38' -p88 -(dp89 -g47 -S'NC_000006.12:g.89694076G=' -p90 -sg49 -(dp91 -g51 -g52 -sg53 -g86 -sg55 -S'89694076' -p92 -sg57 -g86 -sssS'grch37' -p93 -(dp94 -g47 -S'NC_000006.11:g.90403795G=' -p95 -sg49 -(dp96 -g51 -g62 -sg53 -g86 -sg55 -S'90403795' -p97 -sg57 -g86 -sssS'grch38' -p98 -(dp99 -g47 -S'NC_000006.12:g.89694076G=' -p100 -sg49 -(dp101 -g51 -g62 -sg53 -g86 -sg55 -S'89694076' -p102 -sg57 -g86 -ssssg64 -(dp103 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' -p104 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2' -p105 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant137.txt b/VariantValidator/testing/testOutputsMasterITS/variant137.txt deleted file mode 100644 index 3c3c9142..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant137.txt +++ /dev/null @@ -1,304 +0,0 @@ -(dp0 -S'NM_000130.4:c.1602del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat 1-169519049-T-. as a deletion whereas VCF specification 4.1 onwards would treat 1-169519049-T-. as ALT = REF' -p7 -aS'VariantValidator has output both alternatives' -p8 -aS'NC_000001.10:g.169519048TT>T automapped to NC_000001.10:g.169519049delT' -p9 -aS'NM_000130.4:c.1601del normalized to NM_000130.4:c.1602del' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens coagulation factor V (F5), mRNA -p16 -sS'gene_symbol' -p17 -S'F5' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_000121.2:p.(Arg534GlnfsTer40)' -p22 -sS'slr' -p23 -S'NP_000121.2:p.(R534Qfs*40)' -p24 -ssS'submitted_variant' -p25 -S'1-169519049-T-.' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_000130.4:c.1602del' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000001.10:g.169519048del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr1' -p41 -sS'ref' -p42 -S'CT' -p43 -sS'pos' -p44 -S'169519047' -p45 -sS'alt' -p46 -S'C' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000001.11:g.169549810del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'CT' -p52 -sg44 -S'169549809' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000001.10:g.169519048del' -p56 -sg38 -(dp57 -g40 -S'1' -p58 -sg42 -S'CT' -p59 -sg44 -S'169519047' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000001.11:g.169549810del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'CT' -p65 -sg44 -S'169549809' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4' -p72 -sssS'flag' -p73 -S'gene_variant' -p74 -sS'NM_000130.4:c.1601G>A' -p75 -(dp76 -g3 -g4 -sg5 -(lp77 -S'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat 1-169519049-T-. as a deletion whereas VCF specification 4.1 onwards would treat 1-169519049-T-. as ALT = REF' -p78 -aS'VariantValidator has output both alternatives' -p79 -aS'RefSeqGene record not available' -p80 -asg12 -g4 -sg13 -(lp81 -sg15 -VHomo sapiens coagulation factor V (F5), mRNA -p82 -sg17 -S'F5' -p83 -sg19 -(dp84 -g21 -S'NP_000121.2:p.(Arg534Gln)' -p85 -sg23 -S'NP_000121.2:p.(R534Q)' -p86 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_000130.4:c.1601G>A' -p87 -sg31 -g4 -sg32 -(dp88 -S'hg19' -p89 -(dp90 -g36 -S'NC_000001.10:g.169519049T=' -p91 -sg38 -(dp92 -g40 -g41 -sg42 -S'T' -p93 -sg44 -S'169519049' -p94 -sg46 -g93 -sssg48 -(dp95 -g36 -S'NC_000001.11:g.169549811C>T' -p96 -sg38 -(dp97 -g40 -g41 -sg42 -VC -p98 -sg44 -S'169549811' -p99 -sg46 -VT -p100 -sssS'grch37' -p101 -(dp102 -g36 -S'NC_000001.10:g.169519049T=' -p103 -sg38 -(dp104 -g40 -g58 -sg42 -g93 -sg44 -S'169519049' -p105 -sg46 -g93 -sssS'grch38' -p106 -(dp107 -g36 -S'NC_000001.11:g.169549811C>T' -p108 -sg38 -(dp109 -g40 -g58 -sg42 -g98 -sg44 -S'169549811' -p110 -sg46 -g100 -ssssg67 -(dp111 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2' -p112 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4' -p113 -sssS'metadata' -p114 -(dp115 -S'variantvalidator_hgvs_version' -p116 -S'1.1.3' -p117 -sS'uta_schema' -p118 -S'uta_20180821' -p119 -sS'seqrepo_db' -p120 -S'2018-08-21' -p121 -sS'variantvalidator_version' -p122 -S'v0.2' -p123 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant138.txt b/VariantValidator/testing/testOutputsMasterITS/variant138.txt deleted file mode 100644 index d2442906..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant138.txt +++ /dev/null @@ -1,947 +0,0 @@ -(dp0 -S'NM_001204317.1:c.856-9155_856-9154=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA -p12 -sS'gene_symbol' -p13 -S'PRLR' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001191246.1:p.?' -p18 -sS'slr' -p19 -S'NP_001191246.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'NC_000005.9:g.35058667_35058668AG=' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000005.9(NM_001204317.1):c.856-9155_856-9154=' -p24 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001204317.1:c.856-9155_856-9154=' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'grch38' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000005.10:g.35058562_35058563=' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'5' -p38 -sS'ref' -p39 -S'AA' -p40 -sS'pos' -p41 -S'35058562' -p42 -sS'alt' -p43 -g40 -sssS'grch37' -p44 -(dp45 -g33 -S'NC_000005.9:g.35058667_35058668=' -p46 -sg35 -(dp47 -g37 -g38 -sg39 -S'AG' -p48 -sg41 -S'35058667' -p49 -sg43 -g48 -sssS'hg38' -p50 -(dp51 -g33 -S'NC_000005.10:g.35058562_35058563=' -p52 -sg35 -(dp53 -g37 -S'chr5' -p54 -sg39 -g40 -sg41 -S'35058562' -p55 -sg43 -g40 -sssS'hg19' -p56 -(dp57 -g33 -S'NC_000005.9:g.35058667_35058668=' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g48 -sg41 -S'35058667' -p60 -sg43 -g48 -ssssS'reference_sequence_records' -p61 -(dp62 -S'protein' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1' -p64 -sS'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1' -p66 -sssS'NM_001204316.1:c.1009+7383_1009+7384=' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'RefSeqGene record not available' -p70 -asg8 -g4 -sg9 -(lp71 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA -p72 -sg13 -S'PRLR' -p73 -sg15 -(dp74 -g17 -S'NP_001191245.1:p.?' -p75 -sg19 -S'NP_001191245.1:p.?' -p76 -ssg21 -g22 -sg23 -S'NC_000005.9(NM_001204316.1):c.1009+7383_1009+7384=' -p77 -sg25 -g4 -sg26 -S'NM_001204316.1:c.1009+7383_1009+7384=' -p78 -sg28 -g4 -sg29 -(dp79 -S'grch38' -p80 -(dp81 -g33 -S'NC_000005.10:g.35058565_35058566=' -p82 -sg35 -(dp83 -g37 -g38 -sg39 -S'AT' -p84 -sg41 -S'35058565' -p85 -sg43 -g84 -sssS'grch37' -p86 -(dp87 -g33 -S'NC_000005.9:g.35058667_35058668=' -p88 -sg35 -(dp89 -g37 -g38 -sg39 -g48 -sg41 -S'35058667' -p90 -sg43 -g48 -sssg50 -(dp91 -g33 -S'NC_000005.10:g.35058565_35058566=' -p92 -sg35 -(dp93 -g37 -g54 -sg39 -g84 -sg41 -S'35058565' -p94 -sg43 -g84 -sssS'hg19' -p95 -(dp96 -g33 -S'NC_000005.9:g.35058667_35058668=' -p97 -sg35 -(dp98 -g37 -g54 -sg39 -g48 -sg41 -S'35058667' -p99 -sg43 -g48 -ssssg61 -(dp100 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1' -p101 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1' -p102 -sssS'NM_001204314.2:c.*6528del' -p103 -(dp104 -g3 -g4 -sg5 -(lp105 -S'The displayed variants may be artefacts of aligning NM_001204314.2 with genome build GRCh37' -p106 -aS'NM_001204314.2:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' -p107 -aS'Caution should be used when reporting the displayed variant descriptions' -p108 -aS'If you are unsure, please contact admin' -p109 -aS'RefSeqGene record not available' -p110 -asg8 -g4 -sg9 -(lp111 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA -p112 -sg13 -S'PRLR' -p113 -sg15 -(dp114 -g17 -S'NP_001191243.1:p.?' -p115 -sg19 -S'NP_001191243.1:p.?' -p116 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_001204314.2:c.*6528del' -p117 -sg28 -g4 -sg29 -(dp118 -S'grch38' -p119 -(dp120 -g33 -S'NC_000005.10:g.35058563del' -p121 -sg35 -(dp122 -g37 -g38 -sg39 -S'CA' -p123 -sg41 -S'35058560' -p124 -sg43 -S'C' -p125 -sssS'grch37' -p126 -(dp127 -g33 -S'NC_000005.9:g.35058662_35058668=' -p128 -sg35 -(dp129 -g37 -g38 -sg39 -S'AGACAAG' -p130 -sg41 -S'35058662' -p131 -sg43 -g130 -sssg50 -(dp132 -g33 -S'NC_000005.10:g.35058563del' -p133 -sg35 -(dp134 -g37 -g54 -sg39 -S'CA' -p135 -sg41 -S'35058560' -p136 -sg43 -g125 -sssS'hg19' -p137 -(dp138 -g33 -S'NC_000005.9:g.35058662_35058668=' -p139 -sg35 -(dp140 -g37 -g54 -sg39 -g130 -sg41 -S'35058662' -p141 -sg43 -g130 -ssssg61 -(dp142 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1' -p143 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2' -p144 -sssS'NM_001204318.1:c.686-9155_686-9154=' -p145 -(dp146 -g3 -g4 -sg5 -(lp147 -S'RefSeqGene record not available' -p148 -asg8 -g4 -sg9 -(lp149 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA -p150 -sg13 -S'PRLR' -p151 -sg15 -(dp152 -g17 -S'NP_001191247.1:p.?' -p153 -sg19 -S'NP_001191247.1:p.?' -p154 -ssg21 -g22 -sg23 -S'NC_000005.9(NM_001204318.1):c.686-9155_686-9154=' -p155 -sg25 -g4 -sg26 -S'NM_001204318.1:c.686-9155_686-9154=' -p156 -sg28 -g4 -sg29 -(dp157 -S'grch38' -p158 -(dp159 -g33 -S'NC_000005.10:g.35058562_35058563=' -p160 -sg35 -(dp161 -g37 -g38 -sg39 -g40 -sg41 -S'35058562' -p162 -sg43 -g40 -sssS'grch37' -p163 -(dp164 -g33 -S'NC_000005.9:g.35058667_35058668=' -p165 -sg35 -(dp166 -g37 -g38 -sg39 -g48 -sg41 -S'35058667' -p167 -sg43 -g48 -sssg50 -(dp168 -g33 -S'NC_000005.10:g.35058562_35058563=' -p169 -sg35 -(dp170 -g37 -g54 -sg39 -g40 -sg41 -S'35058562' -p171 -sg43 -g40 -sssS'hg19' -p172 -(dp173 -g33 -S'NC_000005.9:g.35058667_35058668=' -p174 -sg35 -(dp175 -g37 -g54 -sg39 -g48 -sg41 -S'35058667' -p176 -sg43 -g48 -ssssg61 -(dp177 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1' -p178 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1' -p179 -sssS'NR_037910.1:n.828-9155_828-9154=' -p180 -(dp181 -g3 -g4 -sg5 -(lp182 -S'RefSeqGene record not available' -p183 -asg8 -g4 -sg9 -(lp184 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 7, non-coding RNA -p185 -sg13 -S'PRLR' -p186 -sg15 -(dp187 -g17 -S'Non-coding :n.' -p188 -sg19 -g188 -ssg21 -g22 -sg23 -S'NC_000005.9(NR_037910.1):c.828-9155_828-9154=' -p189 -sg25 -g4 -sg26 -S'NR_037910.1:n.828-9155_828-9154=' -p190 -sg28 -g4 -sg29 -(dp191 -S'grch38' -p192 -(dp193 -g33 -S'NC_000005.10:g.35058562_35058563=' -p194 -sg35 -(dp195 -g37 -g38 -sg39 -g40 -sg41 -S'35058562' -p196 -sg43 -g40 -sssS'grch37' -p197 -(dp198 -g33 -S'NC_000005.9:g.35058667_35058668=' -p199 -sg35 -(dp200 -g37 -g38 -sg39 -g48 -sg41 -S'35058667' -p201 -sg43 -g48 -sssg50 -(dp202 -g33 -S'NC_000005.10:g.35058562_35058563=' -p203 -sg35 -(dp204 -g37 -g54 -sg39 -g40 -sg41 -S'35058562' -p205 -sg43 -g40 -sssS'hg19' -p206 -(dp207 -g33 -S'NC_000005.9:g.35058667_35058668=' -p208 -sg35 -(dp209 -g37 -g54 -sg39 -g48 -sg41 -S'35058667' -p210 -sg43 -g48 -ssssg61 -(dp211 -g65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1' -p212 -sssS'metadata' -p213 -(dp214 -S'variantvalidator_hgvs_version' -p215 -S'1.1.3' -p216 -sS'uta_schema' -p217 -S'uta_20180821' -p218 -sS'seqrepo_db' -p219 -S'2018-08-21' -p220 -sS'variantvalidator_version' -p221 -S'v0.2' -p222 -ssS'flag' -p223 -S'gene_variant' -p224 -sS'NM_000949.5:c.*6523_*6524=' -p225 -(dp226 -g3 -g4 -sg5 -(lp227 -S'A more recent version of the selected reference sequence NM_000949.5 is available (NM_000949.6)' -p228 -aS'NM_000949.6:c.*6523_*6524delATinsCT MUST be fully validated prior to use in reports' -p229 -aS'select_variants=NM_000949.6:c.*6523_*6524delinsCT' -p230 -aS'RefSeqGene record not available' -p231 -asg8 -g4 -sg9 -(lp232 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA -p233 -sg13 -S'PRLR' -p234 -sg15 -(dp235 -g17 -S'NP_000940.1:p.?' -p236 -sg19 -S'NP_000940.1:p.?' -p237 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_000949.5:c.*6523_*6524=' -p238 -sg28 -g4 -sg29 -(dp239 -S'hg19' -p240 -(dp241 -g33 -S'NC_000005.9:g.35058667_35058668=' -p242 -sg35 -(dp243 -g37 -g54 -sg39 -VAG -p244 -sg41 -S'35058667' -p245 -sg43 -g244 -sssS'grch37' -p246 -(dp247 -g33 -S'NC_000005.9:g.35058667_35058668=' -p248 -sg35 -(dp249 -g37 -g38 -sg39 -g244 -sg41 -S'35058667' -p250 -sg43 -g244 -ssssg61 -(dp251 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1' -p252 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5' -p253 -sssS'NM_001204314.1:c.*6523_*6524=' -p254 -(dp255 -g3 -g4 -sg5 -(lp256 -S'A more recent version of the selected reference sequence NM_001204314.1 is available (NM_001204314.2)' -p257 -aS'NM_001204314.2:c.*6523_*6524delATinsCT MUST be fully validated prior to use in reports' -p258 -aS'select_variants=NM_001204314.2:c.*6523_*6524delinsCT' -p259 -aS'RefSeqGene record not available' -p260 -asg8 -g4 -sg9 -(lp261 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA -p262 -sg13 -S'PRLR' -p263 -sg15 -(dp264 -g17 -S'NP_001191243.1:p.?' -p265 -sg19 -S'NP_001191243.1:p.?' -p266 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_001204314.1:c.*6523_*6524=' -p267 -sg28 -g4 -sg29 -(dp268 -S'hg19' -p269 -(dp270 -g33 -S'NC_000005.9:g.35058667_35058668=' -p271 -sg35 -(dp272 -g37 -g54 -sg39 -VAG -p273 -sg41 -S'35058667' -p274 -sg43 -g273 -sssS'grch37' -p275 -(dp276 -g33 -S'NC_000005.9:g.35058667_35058668=' -p277 -sg35 -(dp278 -g37 -g38 -sg39 -g273 -sg41 -S'35058667' -p279 -sg43 -g273 -ssssg61 -(dp280 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1' -p281 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1' -p282 -sssS'NM_000949.6:c.*6528del' -p283 -(dp284 -g3 -g4 -sg5 -(lp285 -S'The displayed variants may be artefacts of aligning NM_000949.6 with genome build GRCh37' -p286 -aS'NM_000949.6:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' -p287 -aS'Caution should be used when reporting the displayed variant descriptions' -p288 -aS'If you are unsure, please contact admin' -p289 -aS'RefSeqGene record not available' -p290 -asg8 -g4 -sg9 -(lp291 -sg11 -VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA -p292 -sg13 -S'PRLR' -p293 -sg15 -(dp294 -g17 -S'NP_000940.1:p.?' -p295 -sg19 -S'NP_000940.1:p.?' -p296 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_000949.6:c.*6528del' -p297 -sg28 -g4 -sg29 -(dp298 -S'grch38' -p299 -(dp300 -g33 -S'NC_000005.10:g.35058563del' -p301 -sg35 -(dp302 -g37 -g38 -sg39 -S'CA' -p303 -sg41 -S'35058560' -p304 -sg43 -g125 -sssS'grch37' -p305 -(dp306 -g33 -S'NC_000005.9:g.35058662_35058668=' -p307 -sg35 -(dp308 -g37 -g38 -sg39 -g130 -sg41 -S'35058662' -p309 -sg43 -g130 -sssg50 -(dp310 -g33 -S'NC_000005.10:g.35058563del' -p311 -sg35 -(dp312 -g37 -g54 -sg39 -S'CA' -p313 -sg41 -S'35058560' -p314 -sg43 -g125 -sssS'hg19' -p315 -(dp316 -g33 -S'NC_000005.9:g.35058662_35058668=' -p317 -sg35 -(dp318 -g37 -g54 -sg39 -g130 -sg41 -S'35058662' -p319 -sg43 -g130 -ssssg61 -(dp320 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1' -p321 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.6' -p322 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant139.txt b/VariantValidator/testing/testOutputsMasterITS/variant139.txt deleted file mode 100644 index 1b7cf83d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant139.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Required information for NM_000251.1 is missing from the Universal Transcript Archive, please select an alternative version of NM_000251.1 by submitting NM_000251.1 or MSH2 to https://variantvalidator.org/ref_finder/, or select an alternative genome build' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000251.1:c.1296_1348del' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant14.txt b/VariantValidator/testing/testOutputsMasterITS/variant14.txt deleted file mode 100644 index fbd6f6c2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant14.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.642+1_642+2delinsG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.642+1GT>G automapped to NM_000088.3:c.642+1_642+2delGTinsG' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.642+1GT>G' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000088.3:c.642+1_642+2delinsG' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.48275308_48275309delinsC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'TA' -p43 -sS'pos' -p44 -S'48275307' -p45 -sS'alt' -p46 -S'T' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50197947_50197948delinsC' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'TA' -p52 -sg44 -S'50197946' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.48275308_48275309delinsC' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -S'TA' -p59 -sg44 -S'48275307' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.50197947_50197948delinsC' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'TA' -p65 -sg44 -S'50197946' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant140.txt b/VariantValidator/testing/testOutputsMasterITS/variant140.txt deleted file mode 100644 index 25517962..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant140.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.2024_2028+1del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.2023_2028del normalized to NM_000088.3:c.2024_2028+1del' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.(Ala675_Arg676del)' -p21 -sS'slr' -p22 -S'NP_000079.2:p.(A675_R676del)' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.2023_2028del' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.2024_2028+1del' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000088.3:c.2024_2028+1del' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.48269340_48269345del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'ACTCTTG' -p43 -sS'pos' -p44 -S'48269339' -p45 -sS'alt' -p46 -S'A' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50191979_50191984del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'ACTCTTG' -p52 -sg44 -S'50191978' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.48269340_48269345del' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -S'ACTCTTG' -p59 -sg44 -S'48269339' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.50191979_50191984del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'ACTCTTG' -p65 -sg44 -S'50191978' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant141.txt b/VariantValidator/testing/testOutputsMasterITS/variant141.txt deleted file mode 100644 index aa04dcaf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant141.txt +++ /dev/null @@ -1,175 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.2024_2028+1del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.2024_2028+1del' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.2024_2028+1del' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000088.3:c.2024_2028+1del' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000017.10:g.48269340_48269345del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr17' -p40 -sS'ref' -p41 -S'ACTCTTG' -p42 -sS'pos' -p43 -S'48269339' -p44 -sS'alt' -p45 -S'A' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000017.11:g.50191979_50191984del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'ACTCTTG' -p51 -sg43 -S'50191978' -p52 -sg45 -g46 -sssS'grch37' -p53 -(dp54 -g35 -S'NC_000017.10:g.48269340_48269345del' -p55 -sg37 -(dp56 -g39 -S'17' -p57 -sg41 -S'ACTCTTG' -p58 -sg43 -S'48269339' -p59 -sg45 -g46 -sssS'grch38' -p60 -(dp61 -g35 -S'NC_000017.11:g.50191979_50191984del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'ACTCTTG' -p64 -sg43 -S'50191978' -p65 -sg45 -g46 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p71 -sssS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant142.txt b/VariantValidator/testing/testOutputsMasterITS/variant142.txt deleted file mode 100644 index 09b8d9ec..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant142.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Unable to map ENST00000450616.1 to an equivalent RefSeq transcript' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'ENST00000450616.1:n.31+1G>C' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant143.txt b/VariantValidator/testing/testOutputsMasterITS/variant143.txt deleted file mode 100644 index 0c77bfde..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant143.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Unable to map ENST00000491747 to an equivalent RefSeq transcript' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'ENST00000491747:c.5071A>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant144.txt b/VariantValidator/testing/testOutputsMasterITS/variant144.txt deleted file mode 100644 index 54077e93..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant144.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.(Gly197Cys)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(G197C)' -p22 -ssS'submitted_variant' -p23 -S'NG_007400.1:g.8638G>T' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000088.3:c.589G>T' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000017.10:g.48275363C>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -VC -p41 -sS'pos' -p42 -S'48275363' -p43 -sS'alt' -p44 -VA -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.50198002C>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'50198002' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000017.10:g.48275363C>A' -p53 -sg36 -(dp54 -g38 -S'17' -p55 -sg40 -g41 -sg42 -S'48275363' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000017.11:g.50198002C>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'50198002' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p67 -sssS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant145.txt b/VariantValidator/testing/testOutputsMasterITS/variant145.txt deleted file mode 100644 index a857e60f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant145.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Invalid reference sequence identifier (LRG_1)' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'LRG_1:g.8638G>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant146.txt b/VariantValidator/testing/testOutputsMasterITS/variant146.txt deleted file mode 100644 index 73f553f0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant146.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'No transcript definition for (tx_ac=LRG_1t1)' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'LRG_1t1:c.589G>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant147.txt b/VariantValidator/testing/testOutputsMasterITS/variant147.txt deleted file mode 100644 index d1af95d0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant147.txt +++ /dev/null @@ -1,704 +0,0 @@ -(dp0 -S'NM_002474.2:c.3034_3035inv' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -(dp11 -S'grch38' -p12 -(dp13 -S'hgvs_genomic_description' -p14 -S'NT_187607.1:g.1396662_1396663inv' -p15 -sS'vcf' -p16 -(dp17 -S'chr' -p18 -S'HSCHR16_1_CTG1' -p19 -sS'ref' -p20 -S'GT' -p21 -sS'pos' -p22 -S'1396662' -p23 -sS'alt' -p24 -S'AC' -p25 -sssa(dp26 -S'hg38' -p27 -(dp28 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p29 -sg16 -(dp30 -g18 -S'chr16_KI270853v1_alt' -p31 -sg20 -S'GT' -p32 -sg22 -S'1396662' -p33 -sg24 -S'AC' -p34 -sssasS'transcript_description' -p35 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA -p36 -sS'gene_symbol' -p37 -S'MYH11' -p38 -sS'hgvs_predicted_protein_consequence' -p39 -(dp40 -S'tlr' -p41 -S'NP_002465.1:p.(Thr1012Val)' -p42 -sS'slr' -p43 -S'NP_002465.1:p.(T1012V)' -p44 -ssS'submitted_variant' -p45 -S'chr16:g.15832508_15832509delinsAC' -p46 -sS'genome_context_intronic_sequence' -p47 -g4 -sS'hgvs_lrg_variant' -p48 -g4 -sS'hgvs_transcript_variant' -p49 -S'NM_002474.2:c.3034_3035inv' -p50 -sS'hgvs_refseqgene_variant' -p51 -g4 -sS'primary_assembly_loci' -p52 -(dp53 -S'grch38' -p54 -(dp55 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p56 -sg16 -(dp57 -g18 -S'16' -p58 -sg20 -S'GT' -p59 -sg22 -S'15738651' -p60 -sg24 -S'AC' -p61 -sssS'grch37' -p62 -(dp63 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p64 -sg16 -(dp65 -g18 -g58 -sg20 -S'GT' -p66 -sg22 -S'15832508' -p67 -sg24 -S'AC' -p68 -sssg27 -(dp69 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p70 -sg16 -(dp71 -g18 -S'chr16' -p72 -sg20 -S'GT' -p73 -sg22 -S'15738651' -p74 -sg24 -S'AC' -p75 -sssS'hg19' -p76 -(dp77 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p78 -sg16 -(dp79 -g18 -g72 -sg20 -S'GT' -p80 -sg22 -S'15832508' -p81 -sg24 -S'AC' -p82 -ssssS'reference_sequence_records' -p83 -(dp84 -S'protein' -p85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1' -p86 -sS'transcript' -p87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2' -p88 -sssS'NM_022844.2:c.3034_3035inv' -p89 -(dp90 -g3 -g4 -sg5 -(lp91 -S'RefSeqGene record not available' -p92 -asg8 -g4 -sg9 -(lp93 -(dp94 -S'grch38' -p95 -(dp96 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p97 -sg16 -(dp98 -g18 -g19 -sg20 -S'GT' -p99 -sg22 -S'1396662' -p100 -sg24 -S'AC' -p101 -sssa(dp102 -g27 -(dp103 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p104 -sg16 -(dp105 -g18 -g31 -sg20 -S'GT' -p106 -sg22 -S'1396662' -p107 -sg24 -S'AC' -p108 -sssasg35 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA -p109 -sg37 -S'MYH11' -p110 -sg39 -(dp111 -g41 -S'NP_074035.1:p.(Thr1012Val)' -p112 -sg43 -S'NP_074035.1:p.(T1012V)' -p113 -ssg45 -g46 -sg47 -g4 -sg48 -g4 -sg49 -S'NM_022844.2:c.3034_3035inv' -p114 -sg51 -g4 -sg52 -(dp115 -S'grch38' -p116 -(dp117 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p118 -sg16 -(dp119 -g18 -g58 -sg20 -S'GT' -p120 -sg22 -S'15738651' -p121 -sg24 -S'AC' -p122 -sssS'grch37' -p123 -(dp124 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p125 -sg16 -(dp126 -g18 -g58 -sg20 -S'GT' -p127 -sg22 -S'15832508' -p128 -sg24 -S'AC' -p129 -sssg27 -(dp130 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p131 -sg16 -(dp132 -g18 -g72 -sg20 -S'GT' -p133 -sg22 -S'15738651' -p134 -sg24 -S'AC' -p135 -sssS'hg19' -p136 -(dp137 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p138 -sg16 -(dp139 -g18 -g72 -sg20 -S'GT' -p140 -sg22 -S'15832508' -p141 -sg24 -S'AC' -p142 -ssssg83 -(dp143 -g85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1' -p144 -sg87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2' -p145 -sssS'NM_001040114.1:c.3055_3056inv' -p146 -(dp147 -g3 -g4 -sg5 -(lp148 -S'RefSeqGene record not available' -p149 -asg8 -g4 -sg9 -(lp150 -(dp151 -S'grch38' -p152 -(dp153 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p154 -sg16 -(dp155 -g18 -g19 -sg20 -S'GT' -p156 -sg22 -S'1396662' -p157 -sg24 -S'AC' -p158 -sssa(dp159 -g27 -(dp160 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p161 -sg16 -(dp162 -g18 -g31 -sg20 -S'GT' -p163 -sg22 -S'1396662' -p164 -sg24 -S'AC' -p165 -sssasg35 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA -p166 -sg37 -S'MYH11' -p167 -sg39 -(dp168 -g41 -S'NP_001035203.1:p.(Thr1019Val)' -p169 -sg43 -S'NP_001035203.1:p.(T1019V)' -p170 -ssg45 -g46 -sg47 -g4 -sg48 -g4 -sg49 -S'NM_001040114.1:c.3055_3056inv' -p171 -sg51 -g4 -sg52 -(dp172 -S'grch38' -p173 -(dp174 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p175 -sg16 -(dp176 -g18 -g58 -sg20 -S'GT' -p177 -sg22 -S'15738651' -p178 -sg24 -S'AC' -p179 -sssS'grch37' -p180 -(dp181 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p182 -sg16 -(dp183 -g18 -g58 -sg20 -S'GT' -p184 -sg22 -S'15832508' -p185 -sg24 -S'AC' -p186 -sssg27 -(dp187 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p188 -sg16 -(dp189 -g18 -g72 -sg20 -S'GT' -p190 -sg22 -S'15738651' -p191 -sg24 -S'AC' -p192 -sssS'hg19' -p193 -(dp194 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p195 -sg16 -(dp196 -g18 -g72 -sg20 -S'GT' -p197 -sg22 -S'15832508' -p198 -sg24 -S'AC' -p199 -ssssg83 -(dp200 -g85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1' -p201 -sg87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1' -p202 -sssS'flag' -p203 -S'gene_variant' -p204 -sS'NM_001040113.1:c.3055_3056inv' -p205 -(dp206 -g3 -g4 -sg5 -(lp207 -S'RefSeqGene record not available' -p208 -asg8 -g4 -sg9 -(lp209 -(dp210 -S'grch38' -p211 -(dp212 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p213 -sg16 -(dp214 -g18 -g19 -sg20 -S'GT' -p215 -sg22 -S'1396662' -p216 -sg24 -S'AC' -p217 -sssa(dp218 -g27 -(dp219 -g14 -S'NT_187607.1:g.1396662_1396663inv' -p220 -sg16 -(dp221 -g18 -g31 -sg20 -S'GT' -p222 -sg22 -S'1396662' -p223 -sg24 -S'AC' -p224 -sssasg35 -VHomo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA -p225 -sg37 -S'MYH11' -p226 -sg39 -(dp227 -g41 -S'NP_001035202.1:p.(Thr1019Val)' -p228 -sg43 -S'NP_001035202.1:p.(T1019V)' -p229 -ssg45 -g46 -sg47 -g4 -sg48 -g4 -sg49 -S'NM_001040113.1:c.3055_3056inv' -p230 -sg51 -g4 -sg52 -(dp231 -S'grch38' -p232 -(dp233 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p234 -sg16 -(dp235 -g18 -g58 -sg20 -S'GT' -p236 -sg22 -S'15738651' -p237 -sg24 -S'AC' -p238 -sssS'grch37' -p239 -(dp240 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p241 -sg16 -(dp242 -g18 -g58 -sg20 -S'GT' -p243 -sg22 -S'15832508' -p244 -sg24 -S'AC' -p245 -sssg27 -(dp246 -g14 -S'NC_000016.10:g.15738651_15738652inv' -p247 -sg16 -(dp248 -g18 -g72 -sg20 -S'GT' -p249 -sg22 -S'15738651' -p250 -sg24 -S'AC' -p251 -sssS'hg19' -p252 -(dp253 -g14 -S'NC_000016.9:g.15832508_15832509inv' -p254 -sg16 -(dp255 -g18 -g72 -sg20 -S'GT' -p256 -sg22 -S'15832508' -p257 -sg24 -S'AC' -p258 -ssssg83 -(dp259 -g85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1' -p260 -sg87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1' -p261 -sssS'metadata' -p262 -(dp263 -S'variantvalidator_hgvs_version' -p264 -S'1.1.3' -p265 -sS'uta_schema' -p266 -S'uta_20180821' -p267 -sS'seqrepo_db' -p268 -S'2018-08-21' -p269 -sS'variantvalidator_version' -p270 -S'v0.2' -p271 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant148.txt b/VariantValidator/testing/testOutputsMasterITS/variant148.txt deleted file mode 100644 index 79a095b8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant148.txt +++ /dev/null @@ -1,535 +0,0 @@ -(dp0 -S'NM_001162427.1:c.210+1615dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA -p12 -sS'gene_symbol' -p13 -S'TSC1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001155899.1:p.?' -p18 -sS'slr' -p19 -S'NP_001155899.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'NG_012386.1:g.24048dupG' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000009.11(NM_001162427.1):c.210+1615dup' -p24 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001162427.1:c.210+1615dup' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000009.11:g.135800973dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr9' -p38 -sS'ref' -p39 -S'C' -p40 -sS'pos' -p41 -S'135800973' -p42 -sS'alt' -p43 -S'CC' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000009.12:g.132925586dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'132925586' -p49 -sg43 -S'CC' -p50 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000009.11:g.135800973dup' -p53 -sg35 -(dp54 -g37 -S'9' -p55 -sg39 -g40 -sg41 -S'135800973' -p56 -sg43 -S'CC' -p57 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000009.12:g.132925586dup' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'132925586' -p62 -sg43 -S'CC' -p63 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1' -p69 -sssS'NM_001162426.1:c.363+1dup' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'RefSeqGene record not available' -p73 -asg8 -g4 -sg9 -(lp74 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA -p75 -sg13 -S'TSC1' -p76 -sg15 -(dp77 -g17 -S'NP_001155898.1:p.?' -p78 -sg19 -S'NP_001155898.1:p.?' -p79 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_001162426.1):c.363+1dup' -p80 -sg25 -g4 -sg26 -S'NM_001162426.1:c.363+1dup' -p81 -sg28 -g4 -sg29 -(dp82 -S'hg19' -p83 -(dp84 -g33 -S'NC_000009.11:g.135800973dup' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -g40 -sg41 -S'135800973' -p87 -sg43 -S'CC' -p88 -sssg45 -(dp89 -g33 -S'NC_000009.12:g.132925586dup' -p90 -sg35 -(dp91 -g37 -g38 -sg39 -g40 -sg41 -S'132925586' -p92 -sg43 -S'CC' -p93 -sssS'grch37' -p94 -(dp95 -g33 -S'NC_000009.11:g.135800973dup' -p96 -sg35 -(dp97 -g37 -g55 -sg39 -g40 -sg41 -S'135800973' -p98 -sg43 -S'CC' -p99 -sssS'grch38' -p100 -(dp101 -g33 -S'NC_000009.12:g.132925586dup' -p102 -sg35 -(dp103 -g37 -g55 -sg39 -g40 -sg41 -S'132925586' -p104 -sg43 -S'CC' -p105 -ssssg64 -(dp106 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1' -p107 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1' -p108 -sssS'flag' -p109 -S'gene_variant' -p110 -sS'NM_001362177.1:c.-1+1dup' -p111 -(dp112 -g3 -g4 -sg5 -(lp113 -S'RefSeqGene record not available' -p114 -asg8 -g4 -sg9 -(lp115 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA -p116 -sg13 -S'TSC1' -p117 -sg15 -(dp118 -g17 -S'NP_001349106.1:p.?' -p119 -sg19 -S'NP_001349106.1:p.?' -p120 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_001362177.1):c.-1+1dup' -p121 -sg25 -g4 -sg26 -S'NM_001362177.1:c.-1+1dup' -p122 -sg28 -g4 -sg29 -(dp123 -S'hg19' -p124 -(dp125 -g33 -S'NC_000009.11:g.135800973dup' -p126 -sg35 -(dp127 -g37 -g38 -sg39 -g40 -sg41 -S'135800973' -p128 -sg43 -S'CC' -p129 -sssg45 -(dp130 -g33 -S'NC_000009.12:g.132925586dup' -p131 -sg35 -(dp132 -g37 -g38 -sg39 -g40 -sg41 -S'132925586' -p133 -sg43 -S'CC' -p134 -sssS'grch37' -p135 -(dp136 -g33 -S'NC_000009.11:g.135800973dup' -p137 -sg35 -(dp138 -g37 -g55 -sg39 -g40 -sg41 -S'135800973' -p139 -sg43 -S'CC' -p140 -sssS'grch38' -p141 -(dp142 -g33 -S'NC_000009.12:g.132925586dup' -p143 -sg35 -(dp144 -g37 -g55 -sg39 -g40 -sg41 -S'132925586' -p145 -sg43 -S'CC' -p146 -ssssg64 -(dp147 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1' -p148 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1' -p149 -sssS'NM_000368.4:c.363+1dup' -p150 -(dp151 -g3 -g4 -sg5 -(lp152 -S'RefSeqGene record not available' -p153 -asg8 -g4 -sg9 -(lp154 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA -p155 -sg13 -S'TSC1' -p156 -sg15 -(dp157 -g17 -S'NP_000359.1:p.?' -p158 -sg19 -S'NP_000359.1:p.?' -p159 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_000368.4):c.363+1dup' -p160 -sg25 -g4 -sg26 -S'NM_000368.4:c.363+1dup' -p161 -sg28 -g4 -sg29 -(dp162 -S'hg19' -p163 -(dp164 -g33 -S'NC_000009.11:g.135800973dup' -p165 -sg35 -(dp166 -g37 -g38 -sg39 -g40 -sg41 -S'135800973' -p167 -sg43 -S'CC' -p168 -sssg45 -(dp169 -g33 -S'NC_000009.12:g.132925586dup' -p170 -sg35 -(dp171 -g37 -g38 -sg39 -g40 -sg41 -S'132925586' -p172 -sg43 -S'CC' -p173 -sssS'grch37' -p174 -(dp175 -g33 -S'NC_000009.11:g.135800973dup' -p176 -sg35 -(dp177 -g37 -g55 -sg39 -g40 -sg41 -S'135800973' -p178 -sg43 -S'CC' -p179 -sssS'grch38' -p180 -(dp181 -g33 -S'NC_000009.12:g.132925586dup' -p182 -sg35 -(dp183 -g37 -g55 -sg39 -g40 -sg41 -S'132925586' -p184 -sg43 -S'CC' -p185 -ssssg64 -(dp186 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1' -p187 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4' -p188 -sssS'metadata' -p189 -(dp190 -S'variantvalidator_hgvs_version' -p191 -S'1.1.3' -p192 -sS'uta_schema' -p193 -S'uta_20180821' -p194 -sS'seqrepo_db' -p195 -S'2018-08-21' -p196 -sS'variantvalidator_version' -p197 -S'v0.2' -p198 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant149.txt b/VariantValidator/testing/testOutputsMasterITS/variant149.txt deleted file mode 100644 index 6b333171..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant149.txt +++ /dev/null @@ -1,144 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_033517.1:c.1307_1309del' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -aS'NM_033517.1:c.1307_1309delCGA cannot be mapped directly to genome build GRCh37' -p20 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p21 -asS'refseqgene_context_intronic_sequence' -p22 -g16 -sS'alt_genomic_loci' -p23 -(lp24 -(dp25 -S'grch38' -p26 -(dp27 -S'hgvs_genomic_description' -p28 -S'NW_015148969.1:g.33721_33723del' -p29 -sS'vcf' -p30 -(dp31 -S'chr' -p32 -S'HG1311_PATCH' -p33 -sS'ref' -p34 -S'CCGA' -p35 -sS'pos' -p36 -S'33720' -p37 -sS'alt' -p38 -S'C' -p39 -sssa(dp40 -S'hg38' -p41 -(dp42 -g28 -S'NW_015148969.1:g.33721_33723del' -p43 -sg30 -(dp44 -g32 -S'NW_015148969.1' -p45 -sg34 -S'CCGA' -p46 -sg36 -S'33720' -p47 -sg38 -g39 -sssasS'transcript_description' -p48 -VHomo sapiens SH3 and multiple ankyrin repeat domains 3 (SHANK3), mRNA -p49 -sS'gene_symbol' -p50 -S'SHANK3' -p51 -sS'hgvs_predicted_protein_consequence' -p52 -(dp53 -S'tlr' -p54 -S'NP_277052.1:p.(Pro436_Ser437delinsArg)' -p55 -sS'slr' -p56 -S'NP_277052.1:p.(P436_S437delinsR)' -p57 -ssS'submitted_variant' -p58 -S'NM_033517.1:c.1307_1309delCGA' -p59 -sS'genome_context_intronic_sequence' -p60 -g16 -sS'hgvs_lrg_variant' -p61 -g16 -sS'hgvs_transcript_variant' -p62 -S'NM_033517.1:c.1307_1309del' -p63 -sS'hgvs_refseqgene_variant' -p64 -g16 -sS'primary_assembly_loci' -p65 -(dp66 -sS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_277052.1' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033517.1' -p72 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant15.txt b/VariantValidator/testing/testOutputsMasterITS/variant15.txt deleted file mode 100644 index f8cb933b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant15.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-2_589-1delinsG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589-2AG>G automapped to NM_000088.3:c.589-2_589-1delAGinsG' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589-2AG>G' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000088.3:c.589-2_589-1delinsG' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.48275364_48275365delinsC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'CT' -p43 -sS'pos' -p44 -S'48275364' -p45 -sS'alt' -p46 -S'C' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50198003_50198004delinsC' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'CT' -p52 -sg44 -S'50198003' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.48275364_48275365delinsC' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -S'CT' -p59 -sg44 -S'48275364' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.50198003_50198004delinsC' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'CT' -p65 -sg44 -S'50198003' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant150.txt b/VariantValidator/testing/testOutputsMasterITS/variant150.txt deleted file mode 100644 index 6487afa1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant150.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'HG1311_PATCH is not part of genome build GRCh37' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'HG1311_PATCH-33720-CCGA-C' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant151.txt b/VariantValidator/testing/testOutputsMasterITS/variant151.txt deleted file mode 100644 index c900fb4d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant151.txt +++ /dev/null @@ -1,184 +0,0 @@ -(dp0 -S'NM_015120.4:c.1573_1579=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000002.11:g.73675227TCTC>TCTCCTC automapped to NC_000002.11:g.73675228_73675230dupCTC' -p7 -aS'The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37' -p8 -aS'NM_015120.4:c.1573_1579 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p17 -sS'gene_symbol' -p18 -S'ALMS1' -p19 -sS'hgvs_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_055935.4:p.(Ser525=)' -p23 -sS'slr' -p24 -S'NP_055935.4:p.(S525=)' -p25 -ssS'submitted_variant' -p26 -S'2-73675227-TCTC-TCTCCTC' -p27 -sS'genome_context_intronic_sequence' -p28 -g4 -sS'hgvs_lrg_variant' -p29 -g4 -sS'hgvs_transcript_variant' -p30 -S'NM_015120.4:c.1573_1579=' -p31 -sS'hgvs_refseqgene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000002.11:g.73675228_73675230dup' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr2' -p42 -sS'ref' -p43 -S'CTC' -p44 -sS'pos' -p45 -S'73675228' -p46 -sS'alt' -p47 -VCTCCTC -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000002.12:g.73448097_73448103=' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -VTCTCCTC -p53 -sg45 -S'73448097' -p54 -sg47 -g53 -sssS'grch37' -p55 -(dp56 -g37 -S'NC_000002.11:g.73675228_73675230dup' -p57 -sg39 -(dp58 -g41 -S'2' -p59 -sg43 -S'CTC' -p60 -sg45 -S'73675228' -p61 -sg47 -VCTCCTC -p62 -sssS'grch38' -p63 -(dp64 -g37 -S'NC_000002.12:g.73448097_73448103=' -p65 -sg39 -(dp66 -g41 -g59 -sg43 -g53 -sg45 -S'73448097' -p67 -sg47 -g53 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' -p73 -sssS'flag' -p74 -S'gene_variant' -p75 -sS'metadata' -p76 -(dp77 -S'variantvalidator_hgvs_version' -p78 -S'1.1.3' -p79 -sS'uta_schema' -p80 -S'uta_20180821' -p81 -sS'seqrepo_db' -p82 -S'2018-08-21' -p83 -sS'variantvalidator_version' -p84 -S'v0.2' -p85 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant152.txt b/VariantValidator/testing/testOutputsMasterITS/variant152.txt deleted file mode 100644 index be63fc92..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant152.txt +++ /dev/null @@ -1,183 +0,0 @@ -(dp0 -S'NM_015120.4:c.1577_1579del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000002.11:g.73675227TC>TC automapped to NC_000002.11:g.73675227_73675228TC=' -p7 -aS'The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37' -p8 -aS'NM_015120.4:c.1574_1576 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p17 -sS'gene_symbol' -p18 -S'ALMS1' -p19 -sS'hgvs_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_055935.4:p.(Pro526del)' -p23 -sS'slr' -p24 -S'NP_055935.4:p.(P526del)' -p25 -ssS'submitted_variant' -p26 -S'2-73675227-TC-TC' -p27 -sS'genome_context_intronic_sequence' -p28 -g4 -sS'hgvs_lrg_variant' -p29 -g4 -sS'hgvs_transcript_variant' -p30 -S'NM_015120.4:c.1577_1579del' -p31 -sS'hgvs_refseqgene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000002.11:g.73675227_73675228=' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr2' -p42 -sS'ref' -p43 -S'TC' -p44 -sS'pos' -p45 -S'73675227' -p46 -sS'alt' -p47 -g44 -sssS'hg38' -p48 -(dp49 -g37 -S'NC_000002.12:g.73448101_73448103del' -p50 -sg39 -(dp51 -g41 -g42 -sg43 -S'TCTC' -p52 -sg45 -S'73448097' -p53 -sg47 -S'T' -p54 -sssS'grch37' -p55 -(dp56 -g37 -S'NC_000002.11:g.73675227_73675228=' -p57 -sg39 -(dp58 -g41 -S'2' -p59 -sg43 -g44 -sg45 -S'73675227' -p60 -sg47 -g44 -sssS'grch38' -p61 -(dp62 -g37 -S'NC_000002.12:g.73448101_73448103del' -p63 -sg39 -(dp64 -g41 -g59 -sg43 -S'TCTC' -p65 -sg45 -S'73448097' -p66 -sg47 -g54 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' -p72 -sssS'flag' -p73 -S'gene_variant' -p74 -sS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant153.txt b/VariantValidator/testing/testOutputsMasterITS/variant153.txt deleted file mode 100644 index ec8b6ddb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant153.txt +++ /dev/null @@ -1,267 +0,0 @@ -(dp0 -S'NM_001080423.3:c.1016_1020=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000003.11:g.14561627AG>AGG automapped to NC_000003.11:g.14561629dupG' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p13 -sS'gene_symbol' -p14 -S'GRIP2' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001073892.3:p.(Arg339=)' -p19 -sS'slr' -p20 -S'NP_001073892.3:p.(R339=)' -p21 -ssS'submitted_variant' -p22 -S'3-14561627-AG-AGG' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001080423.3:c.1016_1020=' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000003.11:g.14561629dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr3' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'14561628' -p42 -sS'alt' -p43 -VGG -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000003.12:g.14520120_14520124=' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -VGGGCC -p49 -sg41 -S'14520120' -p50 -sg43 -g49 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000003.11:g.14561629dup' -p53 -sg35 -(dp54 -g37 -S'3' -p55 -sg39 -g40 -sg41 -S'14561628' -p56 -sg43 -VGG -p57 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000003.12:g.14520120_14520124=' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g49 -sg41 -S'14520120' -p62 -sg43 -g49 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3' -p68 -sssS'flag' -p69 -S'gene_variant' -p70 -sS'NM_001080423.2:c.1307_1311=' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000003.11:g.14561627AG>AGG automapped to NC_000003.11:g.14561629dupG' -p74 -aS'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' -p75 -aS'NM_001080423.3:c.1307_1311delinsGGCCC MUST be fully validated prior to use in reports' -p76 -aS'select_variants=NM_001080423.3:c.1307_1311delinsGGCCC' -p77 -aS'RefSeqGene record not available' -p78 -asg9 -g4 -sg10 -(lp79 -sg12 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p80 -sg14 -S'GRIP2' -p81 -sg16 -(dp82 -g18 -S'NP_001073892.2:p.(Arg436=)' -p83 -sg20 -S'NP_001073892.2:p.(R436=)' -p84 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001080423.2:c.1307_1311=' -p85 -sg28 -g4 -sg29 -(dp86 -S'hg19' -p87 -(dp88 -g33 -S'NC_000003.11:g.14561629dup' -p89 -sg35 -(dp90 -g37 -g38 -sg39 -g40 -sg41 -S'14561628' -p91 -sg43 -VGG -p92 -sssS'grch37' -p93 -(dp94 -g33 -S'NC_000003.11:g.14561629dup' -p95 -sg35 -(dp96 -g37 -g55 -sg39 -g40 -sg41 -S'14561628' -p97 -sg43 -VGG -p98 -ssssg63 -(dp99 -g65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2' -p100 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2' -p101 -sssS'metadata' -p102 -(dp103 -S'variantvalidator_hgvs_version' -p104 -S'1.1.3' -p105 -sS'uta_schema' -p106 -S'uta_20180821' -p107 -sS'seqrepo_db' -p108 -S'2018-08-21' -p109 -sS'variantvalidator_version' -p110 -S'v0.2' -p111 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant154.txt b/VariantValidator/testing/testOutputsMasterITS/variant154.txt deleted file mode 100644 index 4ffeb83e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant154.txt +++ /dev/null @@ -1,281 +0,0 @@ -(dp0 -S'NM_001080423.3:c.1020del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000003.11:g.14561630CC>CC automapped to NC_000003.11:g.14561630_14561631CC=' -p7 -aS'The displayed variants may be artefacts of aligning NM_001080423.3 with genome build GRCh37' -p8 -aS'NM_001080423.3:c.1019_1022 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p17 -sS'gene_symbol' -p18 -S'GRIP2' -p19 -sS'hgvs_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_001073892.3:p.(Ser341GlnfsTer4)' -p23 -sS'slr' -p24 -S'NP_001073892.3:p.(S341Qfs*4)' -p25 -ssS'submitted_variant' -p26 -S'3-14561630-CC-CC' -p27 -sS'genome_context_intronic_sequence' -p28 -g4 -sS'hgvs_lrg_variant' -p29 -g4 -sS'hgvs_transcript_variant' -p30 -S'NM_001080423.3:c.1020del' -p31 -sS'hgvs_refseqgene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000003.11:g.14561624_14561630=' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr3' -p42 -sS'ref' -p43 -S'CTGAGGC' -p44 -sS'pos' -p45 -S'14561624' -p46 -sS'alt' -p47 -g44 -sssS'hg38' -p48 -(dp49 -g37 -S'NC_000003.12:g.14520122del' -p50 -sg39 -(dp51 -g41 -g42 -sg43 -S'AG' -p52 -sg45 -S'14520119' -p53 -sg47 -S'A' -p54 -sssS'grch37' -p55 -(dp56 -g37 -S'NC_000003.11:g.14561624_14561630=' -p57 -sg39 -(dp58 -g41 -S'3' -p59 -sg43 -g44 -sg45 -S'14561624' -p60 -sg47 -g44 -sssS'grch38' -p61 -(dp62 -g37 -S'NC_000003.12:g.14520122del' -p63 -sg39 -(dp64 -g41 -g59 -sg43 -S'AG' -p65 -sg45 -S'14520119' -p66 -sg47 -g54 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3' -p72 -sssS'flag' -p73 -S'gene_variant' -p74 -sS'NM_001080423.2:c.1311del' -p75 -(dp76 -g3 -g4 -sg5 -(lp77 -S'NC_000003.11:g.14561630CC>CC automapped to NC_000003.11:g.14561630_14561631CC=' -p78 -aS'The displayed variants may be artefacts of aligning NM_001080423.2 with genome build GRCh37' -p79 -aS'NM_001080423.2:c.1310_1313 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' -p80 -aS'Caution should be used when reporting the displayed variant descriptions' -p81 -aS'If you are unsure, please contact admin' -p82 -aS'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' -p83 -aS'NM_001080423.3:c.1311delG MUST be fully validated prior to use in reports' -p84 -aS'select_variants=NM_001080423.3:c.1311del' -p85 -aS'RefSeqGene record not available' -p86 -asg13 -g4 -sg14 -(lp87 -sg16 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p88 -sg18 -S'GRIP2' -p89 -sg20 -(dp90 -g22 -S'NP_001073892.2:p.(Ser438GlnfsTer4)' -p91 -sg24 -S'NP_001073892.2:p.(S438Qfs*4)' -p92 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_001080423.2:c.1311del' -p93 -sg32 -g4 -sg33 -(dp94 -S'hg19' -p95 -(dp96 -g37 -S'NC_000003.11:g.14561624_14561630=' -p97 -sg39 -(dp98 -g41 -g42 -sg43 -g44 -sg45 -S'14561624' -p99 -sg47 -g44 -sssS'grch37' -p100 -(dp101 -g37 -S'NC_000003.11:g.14561624_14561630=' -p102 -sg39 -(dp103 -g41 -g59 -sg43 -g44 -sg45 -S'14561624' -p104 -sg47 -g44 -ssssg67 -(dp105 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2' -p106 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2' -p107 -sssS'metadata' -p108 -(dp109 -S'variantvalidator_hgvs_version' -p110 -S'1.1.3' -p111 -sS'uta_schema' -p112 -S'uta_20180821' -p113 -sS'seqrepo_db' -p114 -S'2018-08-21' -p115 -sS'variantvalidator_version' -p116 -S'v0.2' -p117 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant155.txt b/VariantValidator/testing/testOutputsMasterITS/variant155.txt deleted file mode 100644 index c2441ec0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant155.txt +++ /dev/null @@ -1,259 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_014611.1:c.9879T>C' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)' -p19 -aS'NM_014611.2:c.9879C= MUST be fully validated prior to use in reports' -p20 -aS'select_variants=NM_014611.2:c.9879C=' -p21 -aS'RefSeqGene record not available' -p22 -asS'refseqgene_context_intronic_sequence' -p23 -g16 -sS'alt_genomic_loci' -p24 -(lp25 -sS'transcript_description' -p26 -VHomo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA -p27 -sS'gene_symbol' -p28 -S'MDN1' -p29 -sS'hgvs_predicted_protein_consequence' -p30 -(dp31 -S'tlr' -p32 -S'NP_055426.1:p.(Val3293=)' -p33 -sS'slr' -p34 -S'NP_055426.1:p.(V3293=)' -p35 -ssS'submitted_variant' -p36 -S'6-90403795-G-G' -p37 -sS'genome_context_intronic_sequence' -p38 -g16 -sS'hgvs_lrg_variant' -p39 -g16 -sS'hgvs_transcript_variant' -p40 -S'NM_014611.1:c.9879T>C' -p41 -sS'hgvs_refseqgene_variant' -p42 -g16 -sS'primary_assembly_loci' -p43 -(dp44 -S'hg19' -p45 -(dp46 -S'hgvs_genomic_description' -p47 -S'NC_000006.11:g.90403795G=' -p48 -sS'vcf' -p49 -(dp50 -S'chr' -p51 -S'chr6' -p52 -sS'ref' -p53 -S'G' -p54 -sS'pos' -p55 -S'90403795' -p56 -sS'alt' -p57 -g54 -sssS'grch37' -p58 -(dp59 -g47 -S'NC_000006.11:g.90403795G=' -p60 -sg49 -(dp61 -g51 -S'6' -p62 -sg53 -g54 -sg55 -S'90403795' -p63 -sg57 -g54 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1' -p69 -sssS'NM_014611.2:c.9879C=' -p70 -(dp71 -g15 -g16 -sg17 -(lp72 -S'RefSeqGene record not available' -p73 -asg23 -g16 -sg24 -(lp74 -sg26 -VHomo sapiens midasin AAA ATPase 1 (MDN1), mRNA -p75 -sg28 -S'MDN1' -p76 -sg30 -(dp77 -g32 -S'NP_055426.1:p.(Val3293=)' -p78 -sg34 -S'NP_055426.1:p.(V3293=)' -p79 -ssg36 -g37 -sg38 -g16 -sg39 -g16 -sg40 -S'NM_014611.2:c.9879C=' -p80 -sg42 -g16 -sg43 -(dp81 -S'hg19' -p82 -(dp83 -g47 -S'NC_000006.11:g.90403795G=' -p84 -sg49 -(dp85 -g51 -g52 -sg53 -VG -p86 -sg55 -S'90403795' -p87 -sg57 -g86 -sssS'hg38' -p88 -(dp89 -g47 -S'NC_000006.12:g.89694076G=' -p90 -sg49 -(dp91 -g51 -g52 -sg53 -g86 -sg55 -S'89694076' -p92 -sg57 -g86 -sssS'grch37' -p93 -(dp94 -g47 -S'NC_000006.11:g.90403795G=' -p95 -sg49 -(dp96 -g51 -g62 -sg53 -g86 -sg55 -S'90403795' -p97 -sg57 -g86 -sssS'grch38' -p98 -(dp99 -g47 -S'NC_000006.12:g.89694076G=' -p100 -sg49 -(dp101 -g51 -g62 -sg53 -g86 -sg55 -S'89694076' -p102 -sg57 -g86 -ssssg64 -(dp103 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' -p104 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2' -p105 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant156.txt b/VariantValidator/testing/testOutputsMasterITS/variant156.txt deleted file mode 100644 index 1ff95bea..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant156.txt +++ /dev/null @@ -1,260 +0,0 @@ -(dp0 -S'NM_014611.2:c.9879C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens midasin AAA ATPase 1 (MDN1), mRNA -p12 -sS'gene_symbol' -p13 -S'MDN1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_055426.1:p.(Val3293=)' -p18 -sS'slr' -p19 -S'NP_055426.1:p.(V3293=)' -p20 -ssS'submitted_variant' -p21 -S'6-90403795-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_014611.2:c.9879C>T' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000006.11:g.90403795G>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr6' -p37 -sS'ref' -p38 -VG -p39 -sS'pos' -p40 -S'90403795' -p41 -sS'alt' -p42 -VA -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000006.12:g.89694076G>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'89694076' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000006.11:g.90403795G>A' -p51 -sg34 -(dp52 -g36 -S'6' -p53 -sg38 -g39 -sg40 -S'90403795' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000006.12:g.89694076G>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'89694076' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'NM_014611.1:c.9879T=' -p68 -(dp69 -g3 -g4 -sg5 -(lp70 -S'A more recent version of the selected reference sequence NM_014611.1 is available (NM_014611.2)' -p71 -aS'NM_014611.2:c.9879C>T MUST be fully validated prior to use in reports' -p72 -aS'select_variants=NM_014611.2:c.9879C>T' -p73 -aS'RefSeqGene record not available' -p74 -asg8 -g4 -sg9 -(lp75 -sg11 -VHomo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA -p76 -sg13 -S'MDN1' -p77 -sg15 -(dp78 -g17 -S'NP_055426.1:p.(Val3293=)' -p79 -sg19 -S'NP_055426.1:p.(V3293=)' -p80 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_014611.1:c.9879T=' -p81 -sg27 -g4 -sg28 -(dp82 -S'hg19' -p83 -(dp84 -g32 -S'NC_000006.11:g.90403795G>A' -p85 -sg34 -(dp86 -g36 -g37 -sg38 -S'G' -p87 -sg40 -S'90403795' -p88 -sg42 -g43 -sssS'grch37' -p89 -(dp90 -g32 -S'NC_000006.11:g.90403795G>A' -p91 -sg34 -(dp92 -g36 -g53 -sg38 -g87 -sg40 -S'90403795' -p93 -sg42 -g43 -ssssg60 -(dp94 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1' -p95 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1' -p96 -sssS'metadata' -p97 -(dp98 -S'variantvalidator_hgvs_version' -p99 -S'1.1.3' -p100 -sS'uta_schema' -p101 -S'uta_20180821' -p102 -sS'seqrepo_db' -p103 -S'2018-08-21' -p104 -sS'variantvalidator_version' -p105 -S'v0.2' -p106 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant157.txt b/VariantValidator/testing/testOutputsMasterITS/variant157.txt deleted file mode 100644 index 3cbb7339..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant157.txt +++ /dev/null @@ -1,1357 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032470.3:c.4del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -(dp14 -S'grch37' -p15 -(dp16 -S'hgvs_genomic_description' -p17 -S'NT_113891.2:g.3483644del' -p18 -sS'vcf' -p19 -(dp20 -S'chr' -p21 -S'HSCHR6_MHC_COX_CTG1' -p22 -sS'ref' -p23 -S'CG' -p24 -sS'pos' -p25 -S'3483643' -p26 -sS'alt' -p27 -S'C' -p28 -sssa(dp29 -S'hg19' -p30 -(dp31 -g17 -S'NT_113891.2:g.3483644del' -p32 -sg19 -(dp33 -g21 -S'chr6_cox_hap2' -p34 -sg23 -S'CG' -p35 -sg25 -S'3483643' -p36 -sg27 -g28 -sssa(dp37 -S'grch38' -p38 -(dp39 -g17 -S'NT_113891.3:g.3483538del' -p40 -sg19 -(dp41 -g21 -g22 -sg23 -S'CG' -p42 -sg25 -S'3483537' -p43 -sg27 -g28 -sssa(dp44 -S'hg38' -p45 -(dp46 -g17 -S'NT_113891.3:g.3483538del' -p47 -sg19 -(dp48 -g21 -S'chr6_GL000251v2_alt' -p49 -sg23 -S'CG' -p50 -sg25 -S'3483537' -p51 -sg27 -g28 -sssa(dp52 -S'grch37' -p53 -(dp54 -g17 -S'NT_167245.1:g.3292210del' -p55 -sg19 -(dp56 -g21 -S'HSCHR6_MHC_DBB_CTG1' -p57 -sg23 -S'CG' -p58 -sg25 -S'3292209' -p59 -sg27 -g28 -sssa(dp60 -S'hg19' -p61 -(dp62 -g17 -S'NT_167245.1:g.3292210del' -p63 -sg19 -(dp64 -g21 -S'chr6_dbb_hap3' -p65 -sg23 -S'CG' -p66 -sg25 -S'3292209' -p67 -sg27 -g28 -sssa(dp68 -S'grch38' -p69 -(dp70 -g17 -S'NT_167245.2:g.3286625del' -p71 -sg19 -(dp72 -g21 -g57 -sg23 -S'CG' -p73 -sg25 -S'3286624' -p74 -sg27 -g28 -sssa(dp75 -g45 -(dp76 -g17 -S'NT_167245.2:g.3286625del' -p77 -sg19 -(dp78 -g21 -S'chr6_GL000252v2_alt' -p79 -sg23 -S'CG' -p80 -sg25 -S'3286624' -p81 -sg27 -g28 -sssa(dp82 -S'grch37' -p83 -(dp84 -g17 -S'NT_167247.1:g.3392834del' -p85 -sg19 -(dp86 -g21 -S'HSCHR6_MHC_MCF_CTG1' -p87 -sg23 -S'CG' -p88 -sg25 -S'3392833' -p89 -sg27 -g28 -sssa(dp90 -S'hg19' -p91 -(dp92 -g17 -S'NT_167247.1:g.3392834del' -p93 -sg19 -(dp94 -g21 -S'chr6_mcf_hap5' -p95 -sg23 -S'CG' -p96 -sg25 -S'3392833' -p97 -sg27 -g28 -sssa(dp98 -S'grch38' -p99 -(dp100 -g17 -S'NT_167247.2:g.3387249del' -p101 -sg19 -(dp102 -g21 -g87 -sg23 -S'CG' -p103 -sg25 -S'3387248' -p104 -sg27 -g28 -sssa(dp105 -g45 -(dp106 -g17 -S'NT_167247.2:g.3387249del' -p107 -sg19 -(dp108 -g21 -S'chr6_GL000254v2_alt' -p109 -sg23 -S'CG' -p110 -sg25 -S'3387248' -p111 -sg27 -g28 -sssa(dp112 -S'grch37' -p113 -(dp114 -g17 -S'NT_167248.1:g.3274047del' -p115 -sg19 -(dp116 -g21 -S'HSCHR6_MHC_QBL_CTG1' -p117 -sg23 -S'CG' -p118 -sg25 -S'3274046' -p119 -sg27 -g28 -sssa(dp120 -S'hg19' -p121 -(dp122 -g17 -S'NT_167248.1:g.3274047del' -p123 -sg19 -(dp124 -g21 -S'chr6_qbl_hap6' -p125 -sg23 -S'CG' -p126 -sg25 -S'3274046' -p127 -sg27 -g28 -sssa(dp128 -S'grch38' -p129 -(dp130 -g17 -S'NT_167248.2:g.3268451del' -p131 -sg19 -(dp132 -g21 -g117 -sg23 -S'CG' -p133 -sg25 -S'3268450' -p134 -sg27 -g28 -sssa(dp135 -g45 -(dp136 -g17 -S'NT_167248.2:g.3268451del' -p137 -sg19 -(dp138 -g21 -S'chr6_GL000255v2_alt' -p139 -sg23 -S'CG' -p140 -sg25 -S'3268450' -p141 -sg27 -g28 -sssa(dp142 -S'grch37' -p143 -(dp144 -g17 -S'NT_167249.1:g.3345701del' -p145 -sg19 -(dp146 -g21 -S'HSCHR6_MHC_SSTO_CTG1' -p147 -sg23 -S'CG' -p148 -sg25 -S'3345700' -p149 -sg27 -g28 -sssa(dp150 -S'hg19' -p151 -(dp152 -g17 -S'NT_167249.1:g.3345701del' -p153 -sg19 -(dp154 -g21 -S'chr6_ssto_hap7' -p155 -sg23 -S'CG' -p156 -sg25 -S'3345700' -p157 -sg27 -g28 -sssa(dp158 -S'grch38' -p159 -(dp160 -g17 -S'NT_167249.2:g.3346403del' -p161 -sg19 -(dp162 -g21 -g147 -sg23 -S'CG' -p163 -sg25 -S'3346402' -p164 -sg27 -g28 -sssa(dp165 -g45 -(dp166 -g17 -S'NT_167249.2:g.3346403del' -p167 -sg19 -(dp168 -g21 -S'chr6_GL000256v2_alt' -p169 -sg23 -S'CG' -p170 -sg25 -S'3346402' -p171 -sg27 -g28 -sssasS'transcript_description' -p172 -VHomo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA -p173 -sS'gene_symbol' -p174 -S'TNXB' -p175 -sS'hgvs_predicted_protein_consequence' -p176 -(dp177 -S'tlr' -p178 -S'NP_115859.2:p.(Arg2AlafsTer91)' -p179 -sS'slr' -p180 -S'NP_115859.2:p.(R2Afs*91)' -p181 -ssS'submitted_variant' -p182 -S'6-32012992-CG-C' -p183 -sS'genome_context_intronic_sequence' -p184 -g6 -sS'hgvs_lrg_variant' -p185 -g6 -sS'hgvs_transcript_variant' -p186 -S'NM_032470.3:c.4del' -p187 -sS'hgvs_refseqgene_variant' -p188 -g6 -sS'primary_assembly_loci' -p189 -(dp190 -S'hg19' -p191 -(dp192 -g17 -S'NC_000006.11:g.32012993del' -p193 -sg19 -(dp194 -g21 -S'chr6' -p195 -sg23 -S'CG' -p196 -sg25 -S'32012992' -p197 -sg27 -g28 -sssg45 -(dp198 -g17 -S'NC_000006.12:g.32045216del' -p199 -sg19 -(dp200 -g21 -g195 -sg23 -S'CG' -p201 -sg25 -S'32045215' -p202 -sg27 -g28 -sssS'grch37' -p203 -(dp204 -g17 -S'NC_000006.11:g.32012993del' -p205 -sg19 -(dp206 -g21 -S'6' -p207 -sg23 -S'CG' -p208 -sg25 -S'32012992' -p209 -sg27 -g28 -sssS'grch38' -p210 -(dp211 -g17 -S'NC_000006.12:g.32045216del' -p212 -sg19 -(dp213 -g21 -g207 -sg23 -S'CG' -p214 -sg25 -S'32045215' -p215 -sg27 -g28 -ssssS'reference_sequence_records' -p216 -(dp217 -S'protein' -p218 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_115859.2' -p219 -sS'transcript' -p220 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032470.3' -p221 -sssS'NM_001365276.1:c.10717del' -p222 -(dp223 -g5 -g6 -sg7 -(lp224 -S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' -p225 -aS'RefSeqGene record not available' -p226 -asg11 -g6 -sg12 -(lp227 -(dp228 -S'grch37' -p229 -(dp230 -g17 -S'NT_113891.2:g.3483644del' -p231 -sg19 -(dp232 -g21 -g22 -sg23 -S'CG' -p233 -sg25 -S'3483643' -p234 -sg27 -g28 -sssa(dp235 -S'hg19' -p236 -(dp237 -g17 -S'NT_113891.2:g.3483644del' -p238 -sg19 -(dp239 -g21 -g34 -sg23 -S'CG' -p240 -sg25 -S'3483643' -p241 -sg27 -g28 -sssa(dp242 -S'grch37' -p243 -(dp244 -g17 -S'NT_167245.1:g.3292210del' -p245 -sg19 -(dp246 -g21 -g57 -sg23 -S'CG' -p247 -sg25 -S'3292209' -p248 -sg27 -g28 -sssa(dp249 -S'hg19' -p250 -(dp251 -g17 -S'NT_167245.1:g.3292210del' -p252 -sg19 -(dp253 -g21 -g65 -sg23 -S'CG' -p254 -sg25 -S'3292209' -p255 -sg27 -g28 -sssa(dp256 -S'grch37' -p257 -(dp258 -g17 -S'NT_167247.1:g.3392834del' -p259 -sg19 -(dp260 -g21 -g87 -sg23 -S'CG' -p261 -sg25 -S'3392833' -p262 -sg27 -g28 -sssa(dp263 -S'hg19' -p264 -(dp265 -g17 -S'NT_167247.1:g.3392834del' -p266 -sg19 -(dp267 -g21 -g95 -sg23 -S'CG' -p268 -sg25 -S'3392833' -p269 -sg27 -g28 -sssasg172 -VHomo sapiens tenascin XB (TNXB), transcript variant 3, mRNA -p270 -sg174 -S'TNXB' -p271 -sg176 -(dp272 -g178 -S'NP_001352205.1:p.(Arg3573AlafsTer91)' -p273 -sg180 -S'NP_001352205.1:p.(R3573Afs*91)' -p274 -ssg182 -g183 -sg184 -g6 -sg185 -g6 -sg186 -S'NM_001365276.1:c.10717del' -p275 -sg188 -g6 -sg189 -(dp276 -S'hg19' -p277 -(dp278 -g17 -S'NC_000006.11:g.32012993del' -p279 -sg19 -(dp280 -g21 -g195 -sg23 -S'CG' -p281 -sg25 -S'32012992' -p282 -sg27 -g28 -sssS'grch37' -p283 -(dp284 -g17 -S'NC_000006.11:g.32012993del' -p285 -sg19 -(dp286 -g21 -g207 -sg23 -S'CG' -p287 -sg25 -S'32012992' -p288 -sg27 -g28 -ssssg216 -(dp289 -g218 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001352205.1' -p290 -sg220 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001365276.1' -p291 -sssS'NM_019105.7:c.10711del' -p292 -(dp293 -g5 -g6 -sg7 -(lp294 -S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' -p295 -aS'RefSeqGene record not available' -p296 -asg11 -g6 -sg12 -(lp297 -(dp298 -S'grch37' -p299 -(dp300 -g17 -S'NT_113891.2:g.3483644del' -p301 -sg19 -(dp302 -g21 -g22 -sg23 -S'CG' -p303 -sg25 -S'3483643' -p304 -sg27 -g28 -sssa(dp305 -S'hg19' -p306 -(dp307 -g17 -S'NT_113891.2:g.3483644del' -p308 -sg19 -(dp309 -g21 -g34 -sg23 -S'CG' -p310 -sg25 -S'3483643' -p311 -sg27 -g28 -sssa(dp312 -S'grch37' -p313 -(dp314 -g17 -S'NT_167245.1:g.3292210del' -p315 -sg19 -(dp316 -g21 -g57 -sg23 -S'CG' -p317 -sg25 -S'3292209' -p318 -sg27 -g28 -sssa(dp319 -S'hg19' -p320 -(dp321 -g17 -S'NT_167245.1:g.3292210del' -p322 -sg19 -(dp323 -g21 -g65 -sg23 -S'CG' -p324 -sg25 -S'3292209' -p325 -sg27 -g28 -sssa(dp326 -S'grch37' -p327 -(dp328 -g17 -S'NT_167247.1:g.3392834del' -p329 -sg19 -(dp330 -g21 -g87 -sg23 -S'CG' -p331 -sg25 -S'3392833' -p332 -sg27 -g28 -sssa(dp333 -S'hg19' -p334 -(dp335 -g17 -S'NT_167247.1:g.3392834del' -p336 -sg19 -(dp337 -g21 -g95 -sg23 -S'CG' -p338 -sg25 -S'3392833' -p339 -sg27 -g28 -sssasg172 -VHomo sapiens tenascin XB (TNXB), transcript variant XB, mRNA -p340 -sg174 -S'TNXB' -p341 -sg176 -(dp342 -g178 -S'NP_061978.6:p.(Arg3571AlafsTer91)' -p343 -sg180 -S'NP_061978.6:p.(R3571Afs*91)' -p344 -ssg182 -g183 -sg184 -g6 -sg185 -g6 -sg186 -S'NM_019105.7:c.10711del' -p345 -sg188 -g6 -sg189 -(dp346 -S'hg19' -p347 -(dp348 -g17 -S'NC_000006.11:g.32012993del' -p349 -sg19 -(dp350 -g21 -g195 -sg23 -S'CG' -p351 -sg25 -S'32012992' -p352 -sg27 -g28 -sssS'grch37' -p353 -(dp354 -g17 -S'NC_000006.11:g.32012993del' -p355 -sg19 -(dp356 -g21 -g207 -sg23 -S'CG' -p357 -sg25 -S'32012992' -p358 -sg27 -g28 -ssssg216 -(dp359 -g218 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061978.6' -p360 -sg220 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_019105.7' -p361 -sssS'NM_019105.6:c.10711del' -p362 -(dp363 -g5 -g6 -sg7 -(lp364 -S'NC_000006.11:g.32012992CG>C automapped to NC_000006.11:g.32012993delG' -p365 -aS'A more recent version of the selected reference sequence NM_019105.6 is available (NM_019105.7)' -p366 -aS'NM_019105.7:c.10711delC MUST be fully validated prior to use in reports' -p367 -aS'select_variants=NM_019105.7:c.10711del' -p368 -aS'RefSeqGene record not available' -p369 -asg11 -g6 -sg12 -(lp370 -(dp371 -S'grch37' -p372 -(dp373 -g17 -S'NT_113891.2:g.3483644del' -p374 -sg19 -(dp375 -g21 -g22 -sg23 -S'CG' -p376 -sg25 -S'3483643' -p377 -sg27 -g28 -sssa(dp378 -S'hg19' -p379 -(dp380 -g17 -S'NT_113891.2:g.3483644del' -p381 -sg19 -(dp382 -g21 -g34 -sg23 -S'CG' -p383 -sg25 -S'3483643' -p384 -sg27 -g28 -sssa(dp385 -S'grch38' -p386 -(dp387 -g17 -S'NT_113891.3:g.3483538del' -p388 -sg19 -(dp389 -g21 -g22 -sg23 -S'CG' -p390 -sg25 -S'3483537' -p391 -sg27 -g28 -sssa(dp392 -g45 -(dp393 -g17 -S'NT_113891.3:g.3483538del' -p394 -sg19 -(dp395 -g21 -g49 -sg23 -S'CG' -p396 -sg25 -S'3483537' -p397 -sg27 -g28 -sssa(dp398 -S'grch37' -p399 -(dp400 -g17 -S'NT_167245.1:g.3292210del' -p401 -sg19 -(dp402 -g21 -g57 -sg23 -S'CG' -p403 -sg25 -S'3292209' -p404 -sg27 -g28 -sssa(dp405 -S'hg19' -p406 -(dp407 -g17 -S'NT_167245.1:g.3292210del' -p408 -sg19 -(dp409 -g21 -g65 -sg23 -S'CG' -p410 -sg25 -S'3292209' -p411 -sg27 -g28 -sssa(dp412 -S'grch38' -p413 -(dp414 -g17 -S'NT_167245.2:g.3286625del' -p415 -sg19 -(dp416 -g21 -g57 -sg23 -S'CG' -p417 -sg25 -S'3286624' -p418 -sg27 -g28 -sssa(dp419 -g45 -(dp420 -g17 -S'NT_167245.2:g.3286625del' -p421 -sg19 -(dp422 -g21 -g79 -sg23 -S'CG' -p423 -sg25 -S'3286624' -p424 -sg27 -g28 -sssa(dp425 -S'grch37' -p426 -(dp427 -g17 -S'NT_167247.1:g.3392834del' -p428 -sg19 -(dp429 -g21 -g87 -sg23 -S'CG' -p430 -sg25 -S'3392833' -p431 -sg27 -g28 -sssa(dp432 -S'hg19' -p433 -(dp434 -g17 -S'NT_167247.1:g.3392834del' -p435 -sg19 -(dp436 -g21 -g95 -sg23 -S'CG' -p437 -sg25 -S'3392833' -p438 -sg27 -g28 -sssa(dp439 -S'grch38' -p440 -(dp441 -g17 -S'NT_167247.2:g.3387249del' -p442 -sg19 -(dp443 -g21 -g87 -sg23 -S'CG' -p444 -sg25 -S'3387248' -p445 -sg27 -g28 -sssa(dp446 -g45 -(dp447 -g17 -S'NT_167247.2:g.3387249del' -p448 -sg19 -(dp449 -g21 -g109 -sg23 -S'CG' -p450 -sg25 -S'3387248' -p451 -sg27 -g28 -sssa(dp452 -S'grch37' -p453 -(dp454 -g17 -S'NT_167248.1:g.3271861del' -p455 -sg19 -(dp456 -g21 -g117 -sg23 -S'AG' -p457 -sg25 -S'3271858' -p458 -sg27 -S'A' -p459 -sssa(dp460 -S'hg19' -p461 -(dp462 -g17 -S'NT_167248.1:g.3271861del' -p463 -sg19 -(dp464 -g21 -g125 -sg23 -S'AG' -p465 -sg25 -S'3271858' -p466 -sg27 -g459 -sssasg172 -VHomo sapiens tenascin XB (TNXB), transcript variant XB, mRNA -p467 -sg174 -S'TNXB' -p468 -sg176 -(dp469 -g178 -S'NP_061978.6:p.(Arg3571AlafsTer91)' -p470 -sg180 -S'NP_061978.6:p.(R3571Afs*91)' -p471 -ssg182 -g183 -sg184 -g6 -sg185 -g6 -sg186 -S'NM_019105.6:c.10711del' -p472 -sg188 -g6 -sg189 -(dp473 -S'hg19' -p474 -(dp475 -g17 -S'NC_000006.11:g.32012993del' -p476 -sg19 -(dp477 -g21 -g195 -sg23 -S'CG' -p478 -sg25 -S'32012992' -p479 -sg27 -g28 -sssg45 -(dp480 -g17 -S'NC_000006.12:g.32045216del' -p481 -sg19 -(dp482 -g21 -g195 -sg23 -S'CG' -p483 -sg25 -S'32045215' -p484 -sg27 -g28 -sssS'grch37' -p485 -(dp486 -g17 -S'NC_000006.11:g.32012993del' -p487 -sg19 -(dp488 -g21 -g207 -sg23 -S'CG' -p489 -sg25 -S'32012992' -p490 -sg27 -g28 -sssS'grch38' -p491 -(dp492 -g17 -S'NC_000006.12:g.32045216del' -p493 -sg19 -(dp494 -g21 -g207 -sg23 -S'CG' -p495 -sg25 -S'32045215' -p496 -sg27 -g28 -ssssg216 -(dp497 -g218 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061978.6' -p498 -sg220 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_019105.6' -p499 -sssS'metadata' -p500 -(dp501 -S'variantvalidator_hgvs_version' -p502 -S'1.1.3' -p503 -sS'uta_schema' -p504 -S'uta_20180821' -p505 -sS'seqrepo_db' -p506 -S'2018-08-21' -p507 -sS'variantvalidator_version' -p508 -S'v0.2' -p509 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant158.txt b/VariantValidator/testing/testOutputsMasterITS/variant158.txt deleted file mode 100644 index 5508fb91..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant158.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.(Gly197Cys)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(G197C)' -p22 -ssS'submitted_variant' -p23 -S'17-48275363-C-A' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000088.3:c.589G>T' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000017.10:g.48275363C>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -VC -p41 -sS'pos' -p42 -S'48275363' -p43 -sS'alt' -p44 -VA -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.50198002C>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'50198002' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000017.10:g.48275363C>A' -p53 -sg36 -(dp54 -g38 -S'17' -p55 -sg40 -g41 -sg42 -S'48275363' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000017.11:g.50198002C>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'50198002' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p67 -sssS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant159.txt b/VariantValidator/testing/testOutputsMasterITS/variant159.txt deleted file mode 100644 index 475bc1f7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant159.txt +++ /dev/null @@ -1,172 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-1G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'17-48275364-C-A' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.589-1G>T' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000088.3:c.589-1G>T' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000017.10:g.48275364C>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr17' -p40 -sS'ref' -p41 -VC -p42 -sS'pos' -p43 -S'48275364' -p44 -sS'alt' -p45 -VA -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000017.11:g.50198003C>A' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'50198003' -p51 -sg45 -g46 -sssS'grch37' -p52 -(dp53 -g35 -S'NC_000017.10:g.48275364C>A' -p54 -sg37 -(dp55 -g39 -S'17' -p56 -sg41 -g42 -sg43 -S'48275364' -p57 -sg45 -g46 -sssS'grch38' -p58 -(dp59 -g35 -S'NC_000017.11:g.50198003C>A' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'50198003' -p62 -sg45 -g46 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p68 -sssS'metadata' -p69 -(dp70 -S'variantvalidator_hgvs_version' -p71 -S'1.1.3' -p72 -sS'uta_schema' -p73 -S'uta_20180821' -p74 -sS'seqrepo_db' -p75 -S'2018-08-21' -p76 -sS'variantvalidator_version' -p77 -S'v0.2' -p78 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant16.txt b/VariantValidator/testing/testOutputsMasterITS/variant16.txt deleted file mode 100644 index eb14f0ff..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant16.txt +++ /dev/null @@ -1,156 +0,0 @@ -(dp0 -S'flag' -p1 -S'intergenic' -p2 -sS'Intergenic_Variant_1' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'No transcripts found that fully overlap the described variation in the genomic sequence' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -g6 -sS'gene_symbol' -p14 -g6 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -g6 -sS'slr' -p18 -g6 -ssS'submitted_variant' -p19 -S'NC_000017.10:g.48279242G>T' -p20 -sS'genome_context_intronic_sequence' -p21 -g6 -sS'hgvs_lrg_variant' -p22 -g6 -sS'hgvs_transcript_variant' -p23 -g6 -sS'hgvs_refseqgene_variant' -p24 -g6 -sS'primary_assembly_loci' -p25 -(dp26 -S'hg19' -p27 -(dp28 -S'hgvs_genomic_description' -p29 -VNC_000017.10:g.48279242G>T -p30 -sS'vcf' -p31 -(dp32 -S'chr' -p33 -S'chr17' -p34 -sS'ref' -p35 -S'G' -p36 -sS'pos' -p37 -S'48279242' -p38 -sS'alt' -p39 -S'T' -p40 -sssS'grch37' -p41 -(dp42 -g29 -VNC_000017.10:g.48279242G>T -p43 -sg31 -(dp44 -g33 -S'17' -p45 -sg35 -g36 -sg37 -g38 -sg39 -g40 -sssS'hg38' -p46 -(dp47 -g29 -VNC_000017.11:g.50201881G>T -p48 -sg31 -(dp49 -g33 -g34 -sg35 -g36 -sg37 -S'50201881' -p50 -sg39 -g40 -sssS'grch38' -p51 -(dp52 -g29 -VNC_000017.11:g.50201881G>T -p53 -sg31 -(dp54 -g33 -g45 -sg35 -g36 -sg37 -g50 -sg39 -g40 -ssssS'reference_sequence_records' -p55 -g6 -ssS'metadata' -p56 -(dp57 -S'variantvalidator_hgvs_version' -p58 -S'1.1.3' -p59 -sS'uta_schema' -p60 -S'uta_20180821' -p61 -sS'seqrepo_db' -p62 -S'2018-08-21' -p63 -sS'variantvalidator_version' -p64 -S'v0.2' -p65 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant160.txt b/VariantValidator/testing/testOutputsMasterITS/variant160.txt deleted file mode 100644 index 54995ddf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant160.txt +++ /dev/null @@ -1,179 +0,0 @@ -(dp0 -S'NM_000088.3:c.591_593inv' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000017.10:g.48275359GGA>TCC automapped to NC_000017.10:g.48275359_48275361inv' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p13 -sS'gene_symbol' -p14 -S'COL1A1' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000079.2:p.(Pro198Asp)' -p19 -sS'slr' -p20 -S'NP_000079.2:p.(P198D)' -p21 -ssS'submitted_variant' -p22 -S'17-48275359-GGA-TCC' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_000088.3:c.591_593inv' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000017.10:g.48275359_48275361inv' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr17' -p38 -sS'ref' -p39 -S'GGA' -p40 -sS'pos' -p41 -S'48275359' -p42 -sS'alt' -p43 -S'TCC' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000017.11:g.50197998_50198000inv' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'GGA' -p49 -sg41 -S'50197998' -p50 -sg43 -S'TCC' -p51 -sssS'grch37' -p52 -(dp53 -g33 -S'NC_000017.10:g.48275359_48275361inv' -p54 -sg35 -(dp55 -g37 -S'17' -p56 -sg39 -S'GGA' -p57 -sg41 -S'48275359' -p58 -sg43 -S'TCC' -p59 -sssS'grch38' -p60 -(dp61 -g33 -S'NC_000017.11:g.50197998_50198000inv' -p62 -sg35 -(dp63 -g37 -g56 -sg39 -S'GGA' -p64 -sg41 -S'50197998' -p65 -sg43 -S'TCC' -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'flag' -p73 -S'gene_variant' -p74 -sS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant161.txt b/VariantValidator/testing/testOutputsMasterITS/variant161.txt deleted file mode 100644 index b707940c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant161.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000089.3:c.1035_1035+2del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000007.13:g.94039128CTTG>C automapped to NC_000007.13:g.94039133_94039135delTGT' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A2' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000080.2:p.(Val345del)' -p21 -sS'slr' -p22 -S'NP_000080.2:p.(V345del)' -p23 -ssS'submitted_variant' -p24 -S'7-94039128-CTTG-C' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000007.13(NM_000089.3):c.1035_1035+2del' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000089.3:c.1035_1035+2del' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000007.13:g.94039133_94039135del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr7' -p41 -sS'ref' -p42 -S'CTTG' -p43 -sS'pos' -p44 -S'94039128' -p45 -sS'alt' -p46 -S'C' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000007.14:g.94409821_94409823del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'CTTG' -p52 -sg44 -S'94409816' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000007.13:g.94039133_94039135del' -p56 -sg38 -(dp57 -g40 -S'7' -p58 -sg42 -S'CTTG' -p59 -sg44 -S'94039128' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000007.14:g.94409821_94409823del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'CTTG' -p65 -sg44 -S'94409816' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant162.txt b/VariantValidator/testing/testOutputsMasterITS/variant162.txt deleted file mode 100644 index bdf685d0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant162.txt +++ /dev/null @@ -1,547 +0,0 @@ -(dp0 -S'NM_001162427.1:c.210+1615dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA -p13 -sS'gene_symbol' -p14 -S'TSC1' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001155899.1:p.?' -p19 -sS'slr' -p20 -S'NP_001155899.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'9-135800972-AC-ACC' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000009.11(NM_001162427.1):c.210+1615dup' -p25 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_001162427.1:c.210+1615dup' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000009.11:g.135800973dup' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr9' -p39 -sS'ref' -p40 -S'C' -p41 -sS'pos' -p42 -S'135800973' -p43 -sS'alt' -p44 -S'CC' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000009.12:g.132925586dup' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'132925586' -p50 -sg44 -S'CC' -p51 -sssS'grch37' -p52 -(dp53 -g34 -S'NC_000009.11:g.135800973dup' -p54 -sg36 -(dp55 -g38 -S'9' -p56 -sg40 -g41 -sg42 -S'135800973' -p57 -sg44 -S'CC' -p58 -sssS'grch38' -p59 -(dp60 -g34 -S'NC_000009.12:g.132925586dup' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -g41 -sg42 -S'132925586' -p63 -sg44 -S'CC' -p64 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1' -p70 -sssS'NM_001162426.1:c.363+1dup' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' -p74 -aS'NM_001162426.1:c.363dup normalized to NM_001162426.1:c.363+1dup' -p75 -aS'RefSeqGene record not available' -p76 -asg9 -g4 -sg10 -(lp77 -sg12 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA -p78 -sg14 -S'TSC1' -p79 -sg16 -(dp80 -g18 -S'NP_001155898.1:p.(Met122AspfsTer4)' -p81 -sg20 -S'NP_001155898.1:p.(M122Dfs*4)' -p82 -ssg22 -g23 -sg24 -S'NC_000009.11(NM_001162426.1):c.363+1dup' -p83 -sg26 -g4 -sg27 -S'NM_001162426.1:c.363+1dup' -p84 -sg29 -g4 -sg30 -(dp85 -S'hg19' -p86 -(dp87 -g34 -S'NC_000009.11:g.135800973dup' -p88 -sg36 -(dp89 -g38 -g39 -sg40 -g41 -sg42 -S'135800973' -p90 -sg44 -S'CC' -p91 -sssg46 -(dp92 -g34 -S'NC_000009.12:g.132925586dup' -p93 -sg36 -(dp94 -g38 -g39 -sg40 -g41 -sg42 -S'132925586' -p95 -sg44 -S'CC' -p96 -sssS'grch37' -p97 -(dp98 -g34 -S'NC_000009.11:g.135800973dup' -p99 -sg36 -(dp100 -g38 -g56 -sg40 -g41 -sg42 -S'135800973' -p101 -sg44 -S'CC' -p102 -sssS'grch38' -p103 -(dp104 -g34 -S'NC_000009.12:g.132925586dup' -p105 -sg36 -(dp106 -g38 -g56 -sg40 -g41 -sg42 -S'132925586' -p107 -sg44 -S'CC' -p108 -ssssg65 -(dp109 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1' -p110 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1' -p111 -sssS'flag' -p112 -S'gene_variant' -p113 -sS'NM_001362177.1:c.-1+1dup' -p114 -(dp115 -g3 -g4 -sg5 -(lp116 -S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' -p117 -aS'RefSeqGene record not available' -p118 -asg9 -g4 -sg10 -(lp119 -sg12 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA -p120 -sg14 -S'TSC1' -p121 -sg16 -(dp122 -g18 -S'NP_001349106.1:p.?' -p123 -sg20 -S'NP_001349106.1:p.?' -p124 -ssg22 -g23 -sg24 -S'NC_000009.11(NM_001362177.1):c.-1+1dup' -p125 -sg26 -g4 -sg27 -S'NM_001362177.1:c.-1+1dup' -p126 -sg29 -g4 -sg30 -(dp127 -S'hg19' -p128 -(dp129 -g34 -S'NC_000009.11:g.135800973dup' -p130 -sg36 -(dp131 -g38 -g39 -sg40 -g41 -sg42 -S'135800973' -p132 -sg44 -S'CC' -p133 -sssg46 -(dp134 -g34 -S'NC_000009.12:g.132925586dup' -p135 -sg36 -(dp136 -g38 -g39 -sg40 -g41 -sg42 -S'132925586' -p137 -sg44 -S'CC' -p138 -sssS'grch37' -p139 -(dp140 -g34 -S'NC_000009.11:g.135800973dup' -p141 -sg36 -(dp142 -g38 -g56 -sg40 -g41 -sg42 -S'135800973' -p143 -sg44 -S'CC' -p144 -sssS'grch38' -p145 -(dp146 -g34 -S'NC_000009.12:g.132925586dup' -p147 -sg36 -(dp148 -g38 -g56 -sg40 -g41 -sg42 -S'132925586' -p149 -sg44 -S'CC' -p150 -ssssg65 -(dp151 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1' -p152 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1' -p153 -sssS'NM_000368.4:c.363+1dup' -p154 -(dp155 -g3 -g4 -sg5 -(lp156 -S'NC_000009.11:g.135800972AC>ACC automapped to NC_000009.11:g.135800974dupC' -p157 -aS'NM_000368.4:c.363dup normalized to NM_000368.4:c.363+1dup' -p158 -aS'RefSeqGene record not available' -p159 -asg9 -g4 -sg10 -(lp160 -sg12 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA -p161 -sg14 -S'TSC1' -p162 -sg16 -(dp163 -g18 -S'NP_000359.1:p.(Met122AspfsTer4)' -p164 -sg20 -S'NP_000359.1:p.(M122Dfs*4)' -p165 -ssg22 -g23 -sg24 -S'NC_000009.11(NM_000368.4):c.363+1dup' -p166 -sg26 -g4 -sg27 -S'NM_000368.4:c.363+1dup' -p167 -sg29 -g4 -sg30 -(dp168 -S'hg19' -p169 -(dp170 -g34 -S'NC_000009.11:g.135800973dup' -p171 -sg36 -(dp172 -g38 -g39 -sg40 -g41 -sg42 -S'135800973' -p173 -sg44 -S'CC' -p174 -sssg46 -(dp175 -g34 -S'NC_000009.12:g.132925586dup' -p176 -sg36 -(dp177 -g38 -g39 -sg40 -g41 -sg42 -S'132925586' -p178 -sg44 -S'CC' -p179 -sssS'grch37' -p180 -(dp181 -g34 -S'NC_000009.11:g.135800973dup' -p182 -sg36 -(dp183 -g38 -g56 -sg40 -g41 -sg42 -S'135800973' -p184 -sg44 -S'CC' -p185 -sssS'grch38' -p186 -(dp187 -g34 -S'NC_000009.12:g.132925586dup' -p188 -sg36 -(dp189 -g38 -g56 -sg40 -g41 -sg42 -S'132925586' -p190 -sg44 -S'CC' -p191 -ssssg65 -(dp192 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1' -p193 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4' -p194 -sssS'metadata' -p195 -(dp196 -S'variantvalidator_hgvs_version' -p197 -S'1.1.3' -p198 -sS'uta_schema' -p199 -S'uta_20180821' -p200 -sS'seqrepo_db' -p201 -S'2018-08-21' -p202 -sS'variantvalidator_version' -p203 -S'v0.2' -p204 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant163.txt b/VariantValidator/testing/testOutputsMasterITS/variant163.txt deleted file mode 100644 index 8b0df0fb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant163.txt +++ /dev/null @@ -1,402 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001243246.1:c.2073G>A' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 3, mRNA -p14 -sS'gene_symbol' -p15 -S'P3H1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001230175.1:p.(Ala691=)' -p20 -sS'slr' -p21 -S'NP_001230175.1:p.(A691=)' -p22 -ssS'submitted_variant' -p23 -S'1-43212925-C-T' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_001243246.1:c.2073G>A' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000001.10:g.43212925C>T' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -VC -p41 -sS'pos' -p42 -S'43212925' -p43 -sS'alt' -p44 -VT -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000001.11:g.42747254C>T' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'42747254' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000001.10:g.43212925C>T' -p53 -sg36 -(dp54 -g38 -S'1' -p55 -sg40 -g41 -sg42 -S'43212925' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000001.11:g.42747254C>T' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'42747254' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230175.1' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243246.1' -p67 -sssS'NM_001146289.1:c.2073G>A' -p68 -(dp69 -g5 -g6 -sg7 -(lp70 -S'RefSeqGene record not available' -p71 -asg10 -g6 -sg11 -(lp72 -sg13 -VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 2, mRNA -p73 -sg15 -S'P3H1' -p74 -sg17 -(dp75 -g19 -S'NP_001139761.1:p.(Ala691=)' -p76 -sg21 -S'NP_001139761.1:p.(A691=)' -p77 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_001146289.1:c.2073G>A' -p78 -sg29 -g6 -sg30 -(dp79 -S'hg19' -p80 -(dp81 -g34 -S'NC_000001.10:g.43212925C>T' -p82 -sg36 -(dp83 -g38 -g39 -sg40 -g41 -sg42 -S'43212925' -p84 -sg44 -g45 -sssg46 -(dp85 -g34 -S'NC_000001.11:g.42747254C>T' -p86 -sg36 -(dp87 -g38 -g39 -sg40 -g41 -sg42 -S'42747254' -p88 -sg44 -g45 -sssS'grch37' -p89 -(dp90 -g34 -S'NC_000001.10:g.43212925C>T' -p91 -sg36 -(dp92 -g38 -g55 -sg40 -g41 -sg42 -S'43212925' -p93 -sg44 -g45 -sssS'grch38' -p94 -(dp95 -g34 -S'NC_000001.11:g.42747254C>T' -p96 -sg36 -(dp97 -g38 -g55 -sg40 -g41 -sg42 -S'42747254' -p98 -sg44 -g45 -ssssg62 -(dp99 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001139761.1' -p100 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001146289.1' -p101 -sssS'NM_022356.3:c.2055+18G>A' -p102 -(dp103 -g5 -g6 -sg7 -(lp104 -S'RefSeqGene record not available' -p105 -asg10 -g6 -sg11 -(lp106 -sg13 -VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA -p107 -sg15 -S'P3H1' -p108 -sg17 -(dp109 -g19 -S'NP_071751.3:p.?' -p110 -sg21 -S'NP_071751.3:p.?' -p111 -ssg23 -g24 -sg25 -S'NC_000001.10(NM_022356.3):c.2055+18G>A' -p112 -sg26 -g6 -sg27 -S'NM_022356.3:c.2055+18G>A' -p113 -sg29 -g6 -sg30 -(dp114 -S'hg19' -p115 -(dp116 -g34 -S'NC_000001.10:g.43212925C>T' -p117 -sg36 -(dp118 -g38 -g39 -sg40 -g41 -sg42 -S'43212925' -p119 -sg44 -g45 -sssg46 -(dp120 -g34 -S'NC_000001.11:g.42747254C>T' -p121 -sg36 -(dp122 -g38 -g39 -sg40 -g41 -sg42 -S'42747254' -p123 -sg44 -g45 -sssS'grch37' -p124 -(dp125 -g34 -S'NC_000001.10:g.43212925C>T' -p126 -sg36 -(dp127 -g38 -g55 -sg40 -g41 -sg42 -S'43212925' -p128 -sg44 -g45 -sssS'grch38' -p129 -(dp130 -g34 -S'NC_000001.11:g.42747254C>T' -p131 -sg36 -(dp132 -g38 -g55 -sg40 -g41 -sg42 -S'42747254' -p133 -sg44 -g45 -ssssg62 -(dp134 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3' -p135 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3' -p136 -sssS'metadata' -p137 -(dp138 -S'variantvalidator_hgvs_version' -p139 -S'1.1.3' -p140 -sS'uta_schema' -p141 -S'uta_20180821' -p142 -sS'seqrepo_db' -p143 -S'2018-08-21' -p144 -sS'variantvalidator_version' -p145 -S'v0.2' -p146 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant164.txt b/VariantValidator/testing/testOutputsMasterITS/variant164.txt deleted file mode 100644 index 668cb668..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant164.txt +++ /dev/null @@ -1,179 +0,0 @@ -(dp0 -S'NM_001194958.2:c.20C>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -aS'NM_001194958.2:c.20C>A cannot be mapped directly to genome build GRCh37' -p8 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g4 -sS'alt_genomic_loci' -p11 -(lp12 -(dp13 -S'grch37' -p14 -(dp15 -S'hgvs_genomic_description' -p16 -S'NW_003315950.2:g.355171C>A' -p17 -sS'vcf' -p18 -(dp19 -S'chr' -p20 -S'HG987_PATCH' -p21 -sS'ref' -p22 -S'C' -p23 -sS'pos' -p24 -S'355171' -p25 -sS'alt' -p26 -S'A' -p27 -sssa(dp28 -S'hg19' -p29 -(dp30 -g16 -S'NW_003315950.2:g.355171C>A' -p31 -sg18 -(dp32 -g20 -S'NW_003315950.2' -p33 -sg22 -g23 -sg24 -S'355171' -p34 -sg26 -g27 -sssasS'transcript_description' -p35 -VHomo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA -p36 -sS'gene_symbol' -p37 -S'KCNJ18' -p38 -sS'hgvs_predicted_protein_consequence' -p39 -(dp40 -S'tlr' -p41 -S'NP_001181887.2:p.(Ala7Asp)' -p42 -sS'slr' -p43 -S'NP_001181887.2:p.(A7D)' -p44 -ssS'submitted_variant' -p45 -S'HG987_PATCH-355171-C-A' -p46 -sS'genome_context_intronic_sequence' -p47 -g4 -sS'hgvs_lrg_variant' -p48 -g4 -sS'hgvs_transcript_variant' -p49 -S'NM_001194958.2:c.20C>A' -p50 -sS'hgvs_refseqgene_variant' -p51 -g4 -sS'primary_assembly_loci' -p52 -(dp53 -S'grch38' -p54 -(dp55 -g16 -S'NC_000017.11:g.21702806C>A' -p56 -sg18 -(dp57 -g20 -S'17' -p58 -sg22 -g23 -sg24 -S'21702806' -p59 -sg26 -g27 -sssS'hg38' -p60 -(dp61 -g16 -S'NC_000017.11:g.21702806C>A' -p62 -sg18 -(dp63 -g20 -S'chr17' -p64 -sg22 -g23 -sg24 -S'21702806' -p65 -sg26 -g27 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2' -p71 -sssS'flag' -p72 -S'gene_variant' -p73 -sS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant165.txt b/VariantValidator/testing/testOutputsMasterITS/variant165.txt deleted file mode 100644 index 7594875c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant165.txt +++ /dev/null @@ -1,600 +0,0 @@ -(dp0 -S'NM_000022.3:c.534A>G' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens adenosine deaminase (ADA), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'ADA' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000013.2:p.(Val178=)' -p18 -sS'slr' -p19 -S'NP_000013.2:p.(V178=)' -p20 -ssS'submitted_variant' -p21 -S'20-43252915-T-C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_000022.3:c.534A>G' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000020.10:g.43252915T>C' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr20' -p37 -sS'ref' -p38 -VT -p39 -sS'pos' -p40 -S'43252915' -p41 -sS'alt' -p42 -VC -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000020.11:g.44624274T>C' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'44624274' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000020.10:g.43252915T>C' -p51 -sg34 -(dp52 -g36 -S'20' -p53 -sg38 -g39 -sg40 -S'43252915' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000020.11:g.44624274T>C' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'44624274' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.3' -p65 -sssS'NM_001322051.1:c.534A>G' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'RefSeqGene record not available' -p69 -asg8 -g4 -sg9 -(lp70 -sg11 -VHomo sapiens adenosine deaminase (ADA), transcript variant 3, mRNA -p71 -sg13 -S'ADA' -p72 -sg15 -(dp73 -g17 -S'NP_001308980.1:p.(Val178=)' -p74 -sg19 -S'NP_001308980.1:p.(V178=)' -p75 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322051.1:c.534A>G' -p76 -sg27 -g4 -sg28 -(dp77 -S'hg19' -p78 -(dp79 -g32 -S'NC_000020.10:g.43252915T>C' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -g39 -sg40 -S'43252915' -p82 -sg42 -g43 -sssg44 -(dp83 -g32 -S'NC_000020.11:g.44624274T>C' -p84 -sg34 -(dp85 -g36 -g37 -sg38 -g39 -sg40 -S'44624274' -p86 -sg42 -g43 -sssS'grch37' -p87 -(dp88 -g32 -S'NC_000020.10:g.43252915T>C' -p89 -sg34 -(dp90 -g36 -g53 -sg38 -g39 -sg40 -S'43252915' -p91 -sg42 -g43 -sssS'grch38' -p92 -(dp93 -g32 -S'NC_000020.11:g.44624274T>C' -p94 -sg34 -(dp95 -g36 -g53 -sg38 -g39 -sg40 -S'44624274' -p96 -sg42 -g43 -ssssg60 -(dp97 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308980.1' -p98 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322051.1' -p99 -sssS'NM_000022.2:c.534A>G' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'A more recent version of the selected reference sequence NM_000022.2 is available (NM_000022.3)' -p103 -aS'NM_000022.3:c.534A>G MUST be fully validated prior to use in reports' -p104 -aS'select_variants=NM_000022.3:c.534A>G' -p105 -aS'RefSeqGene record not available' -p106 -asg8 -g4 -sg9 -(lp107 -sg11 -VHomo sapiens adenosine deaminase (ADA), mRNA -p108 -sg13 -S'ADA' -p109 -sg15 -(dp110 -g17 -S'NP_000013.2:p.(Val178=)' -p111 -sg19 -S'NP_000013.2:p.(V178=)' -p112 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_000022.2:c.534A>G' -p113 -sg27 -g4 -sg28 -(dp114 -S'hg19' -p115 -(dp116 -g32 -S'NC_000020.10:g.43252915T>C' -p117 -sg34 -(dp118 -g36 -g37 -sg38 -g39 -sg40 -S'43252915' -p119 -sg42 -g43 -sssS'grch37' -p120 -(dp121 -g32 -S'NC_000020.10:g.43252915T>C' -p122 -sg34 -(dp123 -g36 -g53 -sg38 -g39 -sg40 -S'43252915' -p124 -sg42 -g43 -ssssg60 -(dp125 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2' -p126 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2' -p127 -sssS'flag' -p128 -S'gene_variant' -p129 -sS'NM_001322050.1:c.129A>G' -p130 -(dp131 -g3 -g4 -sg5 -(lp132 -S'RefSeqGene record not available' -p133 -asg8 -g4 -sg9 -(lp134 -sg11 -VHomo sapiens adenosine deaminase (ADA), transcript variant 2, mRNA -p135 -sg13 -S'ADA' -p136 -sg15 -(dp137 -g17 -S'NP_001308979.1:p.(Val43=)' -p138 -sg19 -S'NP_001308979.1:p.(V43=)' -p139 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322050.1:c.129A>G' -p140 -sg27 -g4 -sg28 -(dp141 -S'hg19' -p142 -(dp143 -g32 -S'NC_000020.10:g.43252915T>C' -p144 -sg34 -(dp145 -g36 -g37 -sg38 -g39 -sg40 -S'43252915' -p146 -sg42 -g43 -sssg44 -(dp147 -g32 -S'NC_000020.11:g.44624274T>C' -p148 -sg34 -(dp149 -g36 -g37 -sg38 -g39 -sg40 -S'44624274' -p150 -sg42 -g43 -sssS'grch37' -p151 -(dp152 -g32 -S'NC_000020.10:g.43252915T>C' -p153 -sg34 -(dp154 -g36 -g53 -sg38 -g39 -sg40 -S'43252915' -p155 -sg42 -g43 -sssS'grch38' -p156 -(dp157 -g32 -S'NC_000020.11:g.44624274T>C' -p158 -sg34 -(dp159 -g36 -g53 -sg38 -g39 -sg40 -S'44624274' -p160 -sg42 -g43 -ssssg60 -(dp161 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308979.1' -p162 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322050.1' -p163 -sssS'NR_136160.1:n.685A>G' -p164 -(dp165 -g3 -g4 -sg5 -(lp166 -S'RefSeqGene record not available' -p167 -asg8 -g4 -sg9 -(lp168 -sg11 -VHomo sapiens adenosine deaminase (ADA), transcript variant 4, non-coding RNA -p169 -sg13 -S'ADA' -p170 -sg15 -(dp171 -g17 -S'Non-coding :n.' -p172 -sg19 -g172 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NR_136160.1:n.685A>G' -p173 -sg27 -g4 -sg28 -(dp174 -S'hg19' -p175 -(dp176 -g32 -S'NC_000020.10:g.43252915T>C' -p177 -sg34 -(dp178 -g36 -g37 -sg38 -g39 -sg40 -S'43252915' -p179 -sg42 -g43 -sssg44 -(dp180 -g32 -S'NC_000020.11:g.44624274T>C' -p181 -sg34 -(dp182 -g36 -g37 -sg38 -g39 -sg40 -S'44624274' -p183 -sg42 -g43 -sssS'grch37' -p184 -(dp185 -g32 -S'NC_000020.10:g.43252915T>C' -p186 -sg34 -(dp187 -g36 -g53 -sg38 -g39 -sg40 -S'43252915' -p188 -sg42 -g43 -sssS'grch38' -p189 -(dp190 -g32 -S'NC_000020.11:g.44624274T>C' -p191 -sg34 -(dp192 -g36 -g53 -sg38 -g39 -sg40 -S'44624274' -p193 -sg42 -g43 -ssssg60 -(dp194 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_136160.1' -p195 -sssS'metadata' -p196 -(dp197 -S'variantvalidator_hgvs_version' -p198 -S'1.1.3' -p199 -sS'uta_schema' -p200 -S'uta_20180821' -p201 -sS'seqrepo_db' -p202 -S'2018-08-21' -p203 -sS'variantvalidator_version' -p204 -S'v0.2' -p205 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant166.txt b/VariantValidator/testing/testOutputsMasterITS/variant166.txt deleted file mode 100644 index e30960a5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant166.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_206933.2:c.6317C>G' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens usherin (USH2A), transcript variant 2, mRNA -p14 -sS'gene_symbol' -p15 -S'USH2A' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_996816.2:p.(Thr2106Arg)' -p20 -sS'slr' -p21 -S'NP_996816.2:p.(T2106R)' -p22 -ssS'submitted_variant' -p23 -S'1-216219781-A-C' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_206933.2:c.6317C>G' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000001.10:g.216219781A>C' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -S'A' -p41 -sS'pos' -p42 -S'216219781' -p43 -sS'alt' -p44 -VC -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000001.11:g.216046439A>C' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'216046439' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000001.10:g.216219781A>C' -p53 -sg36 -(dp54 -g38 -S'1' -p55 -sg40 -g41 -sg42 -S'216219781' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000001.11:g.216046439A>C' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'216046439' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2' -p67 -sssS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant167.txt b/VariantValidator/testing/testOutputsMasterITS/variant167.txt deleted file mode 100644 index b1fff138..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant167.txt +++ /dev/null @@ -1,1405 +0,0 @@ -(dp0 -S'NM_005896.3:c.394C>G' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Multiple ALT sequences detected' -p7 -aS'auto-submitting all possible combinations' -p8 -aS'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g4 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'IDH1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_005887.2:p.(Arg132Gly)' -p20 -sS'slr' -p21 -S'NP_005887.2:p.(R132G)' -p22 -ssS'submitted_variant' -p23 -S'2-209113113-G-A,C,T' -p24 -sS'genome_context_intronic_sequence' -p25 -g4 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_005896.3:c.394C>G' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000002.11:g.209113113G>C' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr2' -p39 -sS'ref' -p40 -VG -p41 -sS'pos' -p42 -S'209113113' -p43 -sS'alt' -p44 -VC -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000002.12:g.208248389G>C' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'208248389' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000002.11:g.209113113G>C' -p53 -sg36 -(dp54 -g38 -S'2' -p55 -sg40 -g41 -sg42 -S'209113113' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000002.12:g.208248389G>C' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'208248389' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3' -p67 -sssS'NM_001282387.1:c.394C>G' -p68 -(dp69 -g3 -g4 -sg5 -(lp70 -S'Multiple ALT sequences detected' -p71 -aS'auto-submitting all possible combinations' -p72 -aS'RefSeqGene record not available' -p73 -asg10 -g4 -sg11 -(lp74 -sg13 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA -p75 -sg15 -S'IDH1' -p76 -sg17 -(dp77 -g19 -S'NP_001269316.1:p.(Arg132Gly)' -p78 -sg21 -S'NP_001269316.1:p.(R132G)' -p79 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_001282387.1:c.394C>G' -p80 -sg29 -g4 -sg30 -(dp81 -S'hg19' -p82 -(dp83 -g34 -S'NC_000002.11:g.209113113G>C' -p84 -sg36 -(dp85 -g38 -g39 -sg40 -g41 -sg42 -S'209113113' -p86 -sg44 -g45 -sssg46 -(dp87 -g34 -S'NC_000002.12:g.208248389G>C' -p88 -sg36 -(dp89 -g38 -g39 -sg40 -g41 -sg42 -S'208248389' -p90 -sg44 -g45 -sssS'grch37' -p91 -(dp92 -g34 -S'NC_000002.11:g.209113113G>C' -p93 -sg36 -(dp94 -g38 -g55 -sg40 -g41 -sg42 -S'209113113' -p95 -sg44 -g45 -sssS'grch38' -p96 -(dp97 -g34 -S'NC_000002.12:g.208248389G>C' -p98 -sg36 -(dp99 -g38 -g55 -sg40 -g41 -sg42 -S'208248389' -p100 -sg44 -g45 -ssssg62 -(dp101 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1' -p102 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1' -p103 -sssS'NM_001282387.1:c.394C>A' -p104 -(dp105 -g3 -g4 -sg5 -(lp106 -S'Multiple ALT sequences detected' -p107 -aS'auto-submitting all possible combinations' -p108 -aS'RefSeqGene record not available' -p109 -asg10 -g4 -sg11 -(lp110 -sg13 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA -p111 -sg15 -S'IDH1' -p112 -sg17 -(dp113 -g19 -S'NP_001269316.1:p.(Arg132Ser)' -p114 -sg21 -S'NP_001269316.1:p.(R132S)' -p115 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_001282387.1:c.394C>A' -p116 -sg29 -g4 -sg30 -(dp117 -S'hg19' -p118 -(dp119 -g34 -S'NC_000002.11:g.209113113G>T' -p120 -sg36 -(dp121 -g38 -g39 -sg40 -g41 -sg42 -S'209113113' -p122 -sg44 -VT -p123 -sssg46 -(dp124 -g34 -S'NC_000002.12:g.208248389G>T' -p125 -sg36 -(dp126 -g38 -g39 -sg40 -g41 -sg42 -S'208248389' -p127 -sg44 -g123 -sssS'grch37' -p128 -(dp129 -g34 -S'NC_000002.11:g.209113113G>T' -p130 -sg36 -(dp131 -g38 -g55 -sg40 -g41 -sg42 -S'209113113' -p132 -sg44 -g123 -sssS'grch38' -p133 -(dp134 -g34 -S'NC_000002.12:g.208248389G>T' -p135 -sg36 -(dp136 -g38 -g55 -sg40 -g41 -sg42 -S'208248389' -p137 -sg44 -g123 -ssssg62 -(dp138 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1' -p139 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1' -p140 -sssS'NM_005896.3:c.394C>A' -p141 -(dp142 -g3 -g4 -sg5 -(lp143 -S'Multiple ALT sequences detected' -p144 -aS'auto-submitting all possible combinations' -p145 -aS'RefSeqGene record not available' -p146 -asg10 -g4 -sg11 -(lp147 -sg13 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA -p148 -sg15 -S'IDH1' -p149 -sg17 -(dp150 -g19 -S'NP_005887.2:p.(Arg132Ser)' -p151 -sg21 -S'NP_005887.2:p.(R132S)' -p152 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_005896.3:c.394C>A' -p153 -sg29 -g4 -sg30 -(dp154 -S'hg19' -p155 -(dp156 -g34 -S'NC_000002.11:g.209113113G>T' -p157 -sg36 -(dp158 -g38 -g39 -sg40 -g41 -sg42 -S'209113113' -p159 -sg44 -g123 -sssg46 -(dp160 -g34 -S'NC_000002.12:g.208248389G>T' -p161 -sg36 -(dp162 -g38 -g39 -sg40 -g41 -sg42 -S'208248389' -p163 -sg44 -g123 -sssS'grch37' -p164 -(dp165 -g34 -S'NC_000002.11:g.209113113G>T' -p166 -sg36 -(dp167 -g38 -g55 -sg40 -g41 -sg42 -S'209113113' -p168 -sg44 -g123 -sssS'grch38' -p169 -(dp170 -g34 -S'NC_000002.12:g.208248389G>T' -p171 -sg36 -(dp172 -g38 -g55 -sg40 -g41 -sg42 -S'208248389' -p173 -sg44 -g123 -ssssg62 -(dp174 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' -p175 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3' -p176 -sssS'NM_001282386.1:c.394C>T' -p177 -(dp178 -g3 -g4 -sg5 -(lp179 -S'Multiple ALT sequences detected' -p180 -aS'auto-submitting all possible combinations' -p181 -aS'RefSeqGene record not available' -p182 -asg10 -g4 -sg11 -(lp183 -sg13 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA -p184 -sg15 -S'IDH1' -p185 -sg17 -(dp186 -g19 -S'NP_001269315.1:p.(Arg132Cys)' -p187 -sg21 -S'NP_001269315.1:p.(R132C)' -p188 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_001282386.1:c.394C>T' -p189 -sg29 -g4 -sg30 -(dp190 -S'hg19' -p191 -(dp192 -g34 -S'NC_000002.11:g.209113113G>A' -p193 -sg36 -(dp194 -g38 -g39 -sg40 -g41 -sg42 -S'209113113' -p195 -sg44 -VA -p196 -sssg46 -(dp197 -g34 -S'NC_000002.12:g.208248389G>A' -p198 -sg36 -(dp199 -g38 -g39 -sg40 -g41 -sg42 -S'208248389' -p200 -sg44 -g196 -sssS'grch37' -p201 -(dp202 -g34 -S'NC_000002.11:g.209113113G>A' -p203 -sg36 -(dp204 -g38 -g55 -sg40 -g41 -sg42 -S'209113113' -p205 -sg44 -g196 -sssS'grch38' -p206 -(dp207 -g34 -S'NC_000002.12:g.208248389G>A' -p208 -sg36 -(dp209 -g38 -g55 -sg40 -g41 -sg42 -S'208248389' -p210 -sg44 -g196 -ssssg62 -(dp211 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1' -p212 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1' -p213 -sssS'NM_005896.2:c.394C>A' -p214 -(dp215 -g3 -g4 -sg5 -(lp216 -S'Multiple ALT sequences detected' -p217 -aS'auto-submitting all possible combinations' -p218 -aS'A more recent version of the selected reference sequence NM_005896.2 is available (NM_005896.3)' -p219 -aS'NM_005896.3:c.394C>A MUST be fully validated prior to use in reports' -p220 -aS'select_variants=NM_005896.3:c.394C>A' -p221 -aS'RefSeqGene record not available' -p222 -asg10 -g4 -sg11 -(lp223 -sg13 -VHomo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA -p224 -sg15 -S'IDH1' -p225 -sg17 -(dp226 -g19 -S'NP_005887.2:p.(Arg132Ser)' -p227 -sg21 -S'NP_005887.2:p.(R132S)' -p228 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_005896.2:c.394C>A' -p229 -sg29 -g4 -sg30 -(dp230 -S'hg19' -p231 -(dp232 -g34 -S'NC_000002.11:g.209113113G>T' -p233 -sg36 -(dp234 -g38 -g39 -sg40 -g41 -sg42 -S'209113113' -p235 -sg44 -g123 -sssS'grch37' -p236 -(dp237 -g34 -S'NC_000002.11:g.209113113G>T' -p238 -sg36 -(dp239 -g38 -g55 -sg40 -g41 -sg42 -S'209113113' -p240 -sg44 -g123 -ssssg62 -(dp241 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' -p242 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2' -p243 -sssS'NM_005896.2:c.394C>G' -p244 -(dp245 -g3 -g4 -sg5 -(lp246 -S'Multiple ALT sequences detected' -p247 -aS'auto-submitting all possible combinations' -p248 -aS'A more recent version of the selected reference sequence NM_005896.2 is available (NM_005896.3)' -p249 -aS'NM_005896.3:c.394C>G MUST be fully validated prior to use in reports' -p250 -aS'select_variants=NM_005896.3:c.394C>G' -p251 -aS'RefSeqGene record not available' -p252 -asg10 -g4 -sg11 -(lp253 -sg13 -VHomo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA -p254 -sg15 -S'IDH1' -p255 -sg17 -(dp256 -g19 -S'NP_005887.2:p.(Arg132Gly)' -p257 -sg21 -S'NP_005887.2:p.(R132G)' -p258 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_005896.2:c.394C>G' -p259 -sg29 -g4 -sg30 -(dp260 -S'hg19' -p261 -(dp262 -g34 -S'NC_000002.11:g.209113113G>C' -p263 -sg36 -(dp264 -g38 -g39 -sg40 -g41 -sg42 -S'209113113' -p265 -sg44 -g45 -sssS'grch37' -p266 -(dp267 -g34 -S'NC_000002.11:g.209113113G>C' -p268 -sg36 -(dp269 -g38 -g55 -sg40 -g41 -sg42 -S'209113113' -p270 -sg44 -g45 -ssssg62 -(dp271 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' -p272 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2' -p273 -sssS'flag' -p274 -S'gene_variant' -p275 -sS'NM_005896.3:c.394C>T' -p276 -(dp277 -g3 -g4 -sg5 -(lp278 -S'Multiple ALT sequences detected' -p279 -aS'auto-submitting all possible combinations' -p280 -aS'RefSeqGene record not available' -p281 -asg10 -g4 -sg11 -(lp282 -sg13 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA -p283 -sg15 -S'IDH1' -p284 -sg17 -(dp285 -g19 -S'NP_005887.2:p.(Arg132Cys)' -p286 -sg21 -S'NP_005887.2:p.(R132C)' -p287 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_005896.3:c.394C>T' -p288 -sg29 -g4 -sg30 -(dp289 -S'hg19' -p290 -(dp291 -g34 -S'NC_000002.11:g.209113113G>A' -p292 -sg36 -(dp293 -g38 -g39 -sg40 -g41 -sg42 -S'209113113' -p294 -sg44 -g196 -sssg46 -(dp295 -g34 -S'NC_000002.12:g.208248389G>A' -p296 -sg36 -(dp297 -g38 -g39 -sg40 -g41 -sg42 -S'208248389' -p298 -sg44 -g196 -sssS'grch37' -p299 -(dp300 -g34 -S'NC_000002.11:g.209113113G>A' -p301 -sg36 -(dp302 -g38 -g55 -sg40 -g41 -sg42 -S'209113113' -p303 -sg44 -g196 -sssS'grch38' -p304 -(dp305 -g34 -S'NC_000002.12:g.208248389G>A' -p306 -sg36 -(dp307 -g38 -g55 -sg40 -g41 -sg42 -S'208248389' -p308 -sg44 -g196 -ssssg62 -(dp309 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' -p310 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3' -p311 -sssS'NM_001282387.1:c.394C>T' -p312 -(dp313 -g3 -g4 -sg5 -(lp314 -S'Multiple ALT sequences detected' -p315 -aS'auto-submitting all possible combinations' -p316 -aS'RefSeqGene record not available' -p317 -asg10 -g4 -sg11 -(lp318 -sg13 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA -p319 -sg15 -S'IDH1' -p320 -sg17 -(dp321 -g19 -S'NP_001269316.1:p.(Arg132Cys)' -p322 -sg21 -S'NP_001269316.1:p.(R132C)' -p323 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_001282387.1:c.394C>T' -p324 -sg29 -g4 -sg30 -(dp325 -S'hg19' -p326 -(dp327 -g34 -S'NC_000002.11:g.209113113G>A' -p328 -sg36 -(dp329 -g38 -g39 -sg40 -g41 -sg42 -S'209113113' -p330 -sg44 -g196 -sssg46 -(dp331 -g34 -S'NC_000002.12:g.208248389G>A' -p332 -sg36 -(dp333 -g38 -g39 -sg40 -g41 -sg42 -S'208248389' -p334 -sg44 -g196 -sssS'grch37' -p335 -(dp336 -g34 -S'NC_000002.11:g.209113113G>A' -p337 -sg36 -(dp338 -g38 -g55 -sg40 -g41 -sg42 -S'209113113' -p339 -sg44 -g196 -sssS'grch38' -p340 -(dp341 -g34 -S'NC_000002.12:g.208248389G>A' -p342 -sg36 -(dp343 -g38 -g55 -sg40 -g41 -sg42 -S'208248389' -p344 -sg44 -g196 -ssssg62 -(dp345 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1' -p346 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1' -p347 -sssS'NM_001282386.1:c.394C>G' -p348 -(dp349 -g3 -g4 -sg5 -(lp350 -S'Multiple ALT sequences detected' -p351 -aS'auto-submitting all possible combinations' -p352 -aS'RefSeqGene record not available' -p353 -asg10 -g4 -sg11 -(lp354 -sg13 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA -p355 -sg15 -S'IDH1' -p356 -sg17 -(dp357 -g19 -S'NP_001269315.1:p.(Arg132Gly)' -p358 -sg21 -S'NP_001269315.1:p.(R132G)' -p359 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_001282386.1:c.394C>G' -p360 -sg29 -g4 -sg30 -(dp361 -S'hg19' -p362 -(dp363 -g34 -S'NC_000002.11:g.209113113G>C' -p364 -sg36 -(dp365 -g38 -g39 -sg40 -g41 -sg42 -S'209113113' -p366 -sg44 -g45 -sssg46 -(dp367 -g34 -S'NC_000002.12:g.208248389G>C' -p368 -sg36 -(dp369 -g38 -g39 -sg40 -g41 -sg42 -S'208248389' -p370 -sg44 -g45 -sssS'grch37' -p371 -(dp372 -g34 -S'NC_000002.11:g.209113113G>C' -p373 -sg36 -(dp374 -g38 -g55 -sg40 -g41 -sg42 -S'209113113' -p375 -sg44 -g45 -sssS'grch38' -p376 -(dp377 -g34 -S'NC_000002.12:g.208248389G>C' -p378 -sg36 -(dp379 -g38 -g55 -sg40 -g41 -sg42 -S'208248389' -p380 -sg44 -g45 -ssssg62 -(dp381 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1' -p382 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1' -p383 -sssS'NM_005896.2:c.394C>T' -p384 -(dp385 -g3 -g4 -sg5 -(lp386 -S'Multiple ALT sequences detected' -p387 -aS'auto-submitting all possible combinations' -p388 -aS'A more recent version of the selected reference sequence NM_005896.2 is available (NM_005896.3)' -p389 -aS'NM_005896.3:c.394C>T MUST be fully validated prior to use in reports' -p390 -aS'select_variants=NM_005896.3:c.394C>T' -p391 -aS'RefSeqGene record not available' -p392 -asg10 -g4 -sg11 -(lp393 -sg13 -VHomo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA -p394 -sg15 -S'IDH1' -p395 -sg17 -(dp396 -g19 -S'NP_005887.2:p.(Arg132Cys)' -p397 -sg21 -S'NP_005887.2:p.(R132C)' -p398 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_005896.2:c.394C>T' -p399 -sg29 -g4 -sg30 -(dp400 -S'hg19' -p401 -(dp402 -g34 -S'NC_000002.11:g.209113113G>A' -p403 -sg36 -(dp404 -g38 -g39 -sg40 -g41 -sg42 -S'209113113' -p405 -sg44 -g196 -sssS'grch37' -p406 -(dp407 -g34 -S'NC_000002.11:g.209113113G>A' -p408 -sg36 -(dp409 -g38 -g55 -sg40 -g41 -sg42 -S'209113113' -p410 -sg44 -g196 -ssssg62 -(dp411 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2' -p412 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2' -p413 -sssS'NM_001282386.1:c.394C>A' -p414 -(dp415 -g3 -g4 -sg5 -(lp416 -S'Multiple ALT sequences detected' -p417 -aS'auto-submitting all possible combinations' -p418 -aS'RefSeqGene record not available' -p419 -asg10 -g4 -sg11 -(lp420 -sg13 -VHomo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA -p421 -sg15 -S'IDH1' -p422 -sg17 -(dp423 -g19 -S'NP_001269315.1:p.(Arg132Ser)' -p424 -sg21 -S'NP_001269315.1:p.(R132S)' -p425 -ssg23 -g24 -sg25 -g4 -sg26 -g4 -sg27 -S'NM_001282386.1:c.394C>A' -p426 -sg29 -g4 -sg30 -(dp427 -S'hg19' -p428 -(dp429 -g34 -S'NC_000002.11:g.209113113G>T' -p430 -sg36 -(dp431 -g38 -g39 -sg40 -g41 -sg42 -S'209113113' -p432 -sg44 -g123 -sssg46 -(dp433 -g34 -S'NC_000002.12:g.208248389G>T' -p434 -sg36 -(dp435 -g38 -g39 -sg40 -g41 -sg42 -S'208248389' -p436 -sg44 -g123 -sssS'grch37' -p437 -(dp438 -g34 -S'NC_000002.11:g.209113113G>T' -p439 -sg36 -(dp440 -g38 -g55 -sg40 -g41 -sg42 -S'209113113' -p441 -sg44 -g123 -sssS'grch38' -p442 -(dp443 -g34 -S'NC_000002.12:g.208248389G>T' -p444 -sg36 -(dp445 -g38 -g55 -sg40 -g41 -sg42 -S'208248389' -p446 -sg44 -g123 -ssssg62 -(dp447 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1' -p448 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1' -p449 -sssS'metadata' -p450 -(dp451 -S'variantvalidator_hgvs_version' -p452 -S'1.1.3' -p453 -sS'uta_schema' -p454 -S'uta_20180821' -p455 -sS'seqrepo_db' -p456 -S'2018-08-21' -p457 -sS'variantvalidator_version' -p458 -S'v0.2' -p459 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant168.txt b/VariantValidator/testing/testOutputsMasterITS/variant168.txt deleted file mode 100644 index 9af81dc2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant168.txt +++ /dev/null @@ -1,947 +0,0 @@ -(dp0 -S'NM_001204314.1:c.*6525_*6526=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_001204314.1 is available (NM_001204314.2)' -p7 -aS'NM_001204314.2:c.*6525_*6526delCTinsTG MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_001204314.2:c.*6525_*6526delinsTG' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA -p15 -sS'gene_symbol' -p16 -S'PRLR' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_001191243.1:p.?' -p21 -sS'slr' -p22 -S'NP_001191243.1:p.?' -p23 -ssS'submitted_variant' -p24 -S'NC_000005.9:g.35058665_35058666CA=' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'hgvs_lrg_variant' -p27 -g4 -sS'hgvs_transcript_variant' -p28 -S'NM_001204314.1:c.*6525_*6526=' -p29 -sS'hgvs_refseqgene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000005.9:g.35058665_35058666=' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr5' -p40 -sS'ref' -p41 -VCA -p42 -sS'pos' -p43 -S'35058665' -p44 -sS'alt' -p45 -g42 -sssS'grch37' -p46 -(dp47 -g35 -S'NC_000005.9:g.35058665_35058666=' -p48 -sg37 -(dp49 -g39 -S'5' -p50 -sg41 -g42 -sg43 -S'35058665' -p51 -sg45 -g42 -ssssS'reference_sequence_records' -p52 -(dp53 -S'protein' -p54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1' -p55 -sS'transcript' -p56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1' -p57 -sssS'NM_001204314.2:c.*6528del' -p58 -(dp59 -g3 -g4 -sg5 -(lp60 -S'The displayed variants may be artefacts of aligning NM_001204314.2 with genome build GRCh37' -p61 -aS'NM_001204314.2:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' -p62 -aS'Caution should be used when reporting the displayed variant descriptions' -p63 -aS'If you are unsure, please contact admin' -p64 -aS'RefSeqGene record not available' -p65 -asg11 -g4 -sg12 -(lp66 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA -p67 -sg16 -S'PRLR' -p68 -sg18 -(dp69 -g20 -S'NP_001191243.1:p.?' -p70 -sg22 -S'NP_001191243.1:p.?' -p71 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001204314.2:c.*6528del' -p72 -sg30 -g4 -sg31 -(dp73 -S'grch38' -p74 -(dp75 -g35 -S'NC_000005.10:g.35058563del' -p76 -sg37 -(dp77 -g39 -g50 -sg41 -S'CA' -p78 -sg43 -S'35058560' -p79 -sg45 -S'C' -p80 -sssS'grch37' -p81 -(dp82 -g35 -S'NC_000005.9:g.35058662_35058668=' -p83 -sg37 -(dp84 -g39 -g50 -sg41 -S'AGACAAG' -p85 -sg43 -S'35058662' -p86 -sg45 -g85 -sssS'hg38' -p87 -(dp88 -g35 -S'NC_000005.10:g.35058563del' -p89 -sg37 -(dp90 -g39 -g40 -sg41 -S'CA' -p91 -sg43 -S'35058560' -p92 -sg45 -g80 -sssS'hg19' -p93 -(dp94 -g35 -S'NC_000005.9:g.35058662_35058668=' -p95 -sg37 -(dp96 -g39 -g40 -sg41 -g85 -sg43 -S'35058662' -p97 -sg45 -g85 -ssssg52 -(dp98 -g54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1' -p99 -sg56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2' -p100 -sssS'NM_001204317.1:c.856-9153_856-9152=' -p101 -(dp102 -g3 -g4 -sg5 -(lp103 -S'RefSeqGene record not available' -p104 -asg11 -g4 -sg12 -(lp105 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA -p106 -sg16 -S'PRLR' -p107 -sg18 -(dp108 -g20 -S'NP_001191246.1:p.?' -p109 -sg22 -S'NP_001191246.1:p.?' -p110 -ssg24 -g25 -sg26 -S'NC_000005.9(NM_001204317.1):c.856-9153_856-9152=' -p111 -sg27 -g4 -sg28 -S'NM_001204317.1:c.856-9153_856-9152=' -p112 -sg30 -g4 -sg31 -(dp113 -S'grch38' -p114 -(dp115 -g35 -S'NC_000005.10:g.35058560_35058561=' -p116 -sg37 -(dp117 -g39 -g50 -sg41 -S'CA' -p118 -sg43 -S'35058560' -p119 -sg45 -g118 -sssS'grch37' -p120 -(dp121 -g35 -S'NC_000005.9:g.35058665_35058666=' -p122 -sg37 -(dp123 -g39 -g50 -sg41 -S'CA' -p124 -sg43 -S'35058665' -p125 -sg45 -g124 -sssg87 -(dp126 -g35 -S'NC_000005.10:g.35058560_35058561=' -p127 -sg37 -(dp128 -g39 -g40 -sg41 -g118 -sg43 -S'35058560' -p129 -sg45 -g118 -sssS'hg19' -p130 -(dp131 -g35 -S'NC_000005.9:g.35058665_35058666=' -p132 -sg37 -(dp133 -g39 -g40 -sg41 -g124 -sg43 -S'35058665' -p134 -sg45 -g124 -ssssg52 -(dp135 -g54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1' -p136 -sg56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1' -p137 -sssS'NM_001204316.1:c.1009+7385_1009+7386=' -p138 -(dp139 -g3 -g4 -sg5 -(lp140 -S'RefSeqGene record not available' -p141 -asg11 -g4 -sg12 -(lp142 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA -p143 -sg16 -S'PRLR' -p144 -sg18 -(dp145 -g20 -S'NP_001191245.1:p.?' -p146 -sg22 -S'NP_001191245.1:p.?' -p147 -ssg24 -g25 -sg26 -S'NC_000005.9(NM_001204316.1):c.1009+7385_1009+7386=' -p148 -sg27 -g4 -sg28 -S'NM_001204316.1:c.1009+7385_1009+7386=' -p149 -sg30 -g4 -sg31 -(dp150 -S'grch38' -p151 -(dp152 -g35 -S'NC_000005.10:g.35058563_35058564=' -p153 -sg37 -(dp154 -g39 -g50 -sg41 -S'AG' -p155 -sg43 -S'35058563' -p156 -sg45 -g155 -sssS'grch37' -p157 -(dp158 -g35 -S'NC_000005.9:g.35058665_35058666=' -p159 -sg37 -(dp160 -g39 -g50 -sg41 -g124 -sg43 -S'35058665' -p161 -sg45 -g124 -sssg87 -(dp162 -g35 -S'NC_000005.10:g.35058563_35058564=' -p163 -sg37 -(dp164 -g39 -g40 -sg41 -g155 -sg43 -S'35058563' -p165 -sg45 -g155 -sssS'hg19' -p166 -(dp167 -g35 -S'NC_000005.9:g.35058665_35058666=' -p168 -sg37 -(dp169 -g39 -g40 -sg41 -g124 -sg43 -S'35058665' -p170 -sg45 -g124 -ssssg52 -(dp171 -g54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1' -p172 -sg56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1' -p173 -sssS'metadata' -p174 -(dp175 -S'variantvalidator_hgvs_version' -p176 -S'1.1.3' -p177 -sS'uta_schema' -p178 -S'uta_20180821' -p179 -sS'seqrepo_db' -p180 -S'2018-08-21' -p181 -sS'variantvalidator_version' -p182 -S'v0.2' -p183 -ssS'flag' -p184 -S'gene_variant' -p185 -sS'NR_037910.1:n.828-9153_828-9152=' -p186 -(dp187 -g3 -g4 -sg5 -(lp188 -S'RefSeqGene record not available' -p189 -asg11 -g4 -sg12 -(lp190 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 7, non-coding RNA -p191 -sg16 -S'PRLR' -p192 -sg18 -(dp193 -g20 -S'Non-coding :n.' -p194 -sg22 -g194 -ssg24 -g25 -sg26 -S'NC_000005.9(NR_037910.1):c.828-9153_828-9152=' -p195 -sg27 -g4 -sg28 -S'NR_037910.1:n.828-9153_828-9152=' -p196 -sg30 -g4 -sg31 -(dp197 -S'grch38' -p198 -(dp199 -g35 -S'NC_000005.10:g.35058560_35058561=' -p200 -sg37 -(dp201 -g39 -g50 -sg41 -g118 -sg43 -S'35058560' -p202 -sg45 -g118 -sssS'grch37' -p203 -(dp204 -g35 -S'NC_000005.9:g.35058665_35058666=' -p205 -sg37 -(dp206 -g39 -g50 -sg41 -g124 -sg43 -S'35058665' -p207 -sg45 -g124 -sssg87 -(dp208 -g35 -S'NC_000005.10:g.35058560_35058561=' -p209 -sg37 -(dp210 -g39 -g40 -sg41 -g118 -sg43 -S'35058560' -p211 -sg45 -g118 -sssS'hg19' -p212 -(dp213 -g35 -S'NC_000005.9:g.35058665_35058666=' -p214 -sg37 -(dp215 -g39 -g40 -sg41 -g124 -sg43 -S'35058665' -p216 -sg45 -g124 -ssssg52 -(dp217 -g56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1' -p218 -sssS'NM_001204318.1:c.686-9153_686-9152=' -p219 -(dp220 -g3 -g4 -sg5 -(lp221 -S'RefSeqGene record not available' -p222 -asg11 -g4 -sg12 -(lp223 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA -p224 -sg16 -S'PRLR' -p225 -sg18 -(dp226 -g20 -S'NP_001191247.1:p.?' -p227 -sg22 -S'NP_001191247.1:p.?' -p228 -ssg24 -g25 -sg26 -S'NC_000005.9(NM_001204318.1):c.686-9153_686-9152=' -p229 -sg27 -g4 -sg28 -S'NM_001204318.1:c.686-9153_686-9152=' -p230 -sg30 -g4 -sg31 -(dp231 -S'grch38' -p232 -(dp233 -g35 -S'NC_000005.10:g.35058560_35058561=' -p234 -sg37 -(dp235 -g39 -g50 -sg41 -g118 -sg43 -S'35058560' -p236 -sg45 -g118 -sssS'grch37' -p237 -(dp238 -g35 -S'NC_000005.9:g.35058665_35058666=' -p239 -sg37 -(dp240 -g39 -g50 -sg41 -g124 -sg43 -S'35058665' -p241 -sg45 -g124 -sssg87 -(dp242 -g35 -S'NC_000005.10:g.35058560_35058561=' -p243 -sg37 -(dp244 -g39 -g40 -sg41 -g118 -sg43 -S'35058560' -p245 -sg45 -g118 -sssS'hg19' -p246 -(dp247 -g35 -S'NC_000005.9:g.35058665_35058666=' -p248 -sg37 -(dp249 -g39 -g40 -sg41 -g124 -sg43 -S'35058665' -p250 -sg45 -g124 -ssssg52 -(dp251 -g54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1' -p252 -sg56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1' -p253 -sssS'NM_000949.5:c.*6525_*6526=' -p254 -(dp255 -g3 -g4 -sg5 -(lp256 -S'A more recent version of the selected reference sequence NM_000949.5 is available (NM_000949.6)' -p257 -aS'NM_000949.6:c.*6525_*6526delCTinsTG MUST be fully validated prior to use in reports' -p258 -aS'select_variants=NM_000949.6:c.*6525_*6526delinsTG' -p259 -aS'RefSeqGene record not available' -p260 -asg11 -g4 -sg12 -(lp261 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA -p262 -sg16 -S'PRLR' -p263 -sg18 -(dp264 -g20 -S'NP_000940.1:p.?' -p265 -sg22 -S'NP_000940.1:p.?' -p266 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_000949.5:c.*6525_*6526=' -p267 -sg30 -g4 -sg31 -(dp268 -S'hg19' -p269 -(dp270 -g35 -S'NC_000005.9:g.35058665_35058666=' -p271 -sg37 -(dp272 -g39 -g40 -sg41 -VCA -p273 -sg43 -S'35058665' -p274 -sg45 -g273 -sssS'grch37' -p275 -(dp276 -g35 -S'NC_000005.9:g.35058665_35058666=' -p277 -sg37 -(dp278 -g39 -g50 -sg41 -g273 -sg43 -S'35058665' -p279 -sg45 -g273 -ssssg52 -(dp280 -g54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1' -p281 -sg56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5' -p282 -sssS'NM_000949.6:c.*6528del' -p283 -(dp284 -g3 -g4 -sg5 -(lp285 -S'The displayed variants may be artefacts of aligning NM_000949.6 with genome build GRCh37' -p286 -aS'NM_000949.6:c.*6527_*6530 contains 1 transcript base(s) that fail to align to chromosome NC_000005.9' -p287 -aS'Caution should be used when reporting the displayed variant descriptions' -p288 -aS'If you are unsure, please contact admin' -p289 -aS'RefSeqGene record not available' -p290 -asg11 -g4 -sg12 -(lp291 -sg14 -VHomo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA -p292 -sg16 -S'PRLR' -p293 -sg18 -(dp294 -g20 -S'NP_000940.1:p.?' -p295 -sg22 -S'NP_000940.1:p.?' -p296 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_000949.6:c.*6528del' -p297 -sg30 -g4 -sg31 -(dp298 -S'grch38' -p299 -(dp300 -g35 -S'NC_000005.10:g.35058563del' -p301 -sg37 -(dp302 -g39 -g50 -sg41 -S'CA' -p303 -sg43 -S'35058560' -p304 -sg45 -g80 -sssS'grch37' -p305 -(dp306 -g35 -S'NC_000005.9:g.35058662_35058668=' -p307 -sg37 -(dp308 -g39 -g50 -sg41 -g85 -sg43 -S'35058662' -p309 -sg45 -g85 -sssg87 -(dp310 -g35 -S'NC_000005.10:g.35058563del' -p311 -sg37 -(dp312 -g39 -g40 -sg41 -S'CA' -p313 -sg43 -S'35058560' -p314 -sg45 -g80 -sssS'hg19' -p315 -(dp316 -g35 -S'NC_000005.9:g.35058662_35058668=' -p317 -sg37 -(dp318 -g39 -g40 -sg41 -g85 -sg43 -S'35058662' -p319 -sg45 -g85 -ssssg52 -(dp320 -g54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1' -p321 -sg56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.6' -p322 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant169.txt b/VariantValidator/testing/testOutputsMasterITS/variant169.txt deleted file mode 100644 index e1b7c706..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant169.txt +++ /dev/null @@ -1,174 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_015120.4:c.1580_1581insCCT' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p14 -sS'gene_symbol' -p15 -S'ALMS1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_055935.4:p.(Leu527dup)' -p20 -sS'slr' -p21 -S'NP_055935.4:p.(L527dup)' -p22 -ssS'submitted_variant' -p23 -S'NC_000002.11:g.73675227_73675229delTCTinsTCTCTC' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_015120.4:c.1580_1581insCCT' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000002.11:g.73675231_73675232insCCT' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr2' -p39 -sS'ref' -p40 -S'T' -p41 -sS'pos' -p42 -S'73675229' -p43 -sS'alt' -p44 -VTCTC -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000002.12:g.73448104_73448105insCCT' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'73448102' -p50 -sg44 -VTCTC -p51 -sssS'grch37' -p52 -(dp53 -g34 -S'NC_000002.11:g.73675231_73675232insCCT' -p54 -sg36 -(dp55 -g38 -S'2' -p56 -sg40 -g41 -sg42 -S'73675229' -p57 -sg44 -VTCTC -p58 -sssS'grch38' -p59 -(dp60 -g34 -S'NC_000002.12:g.73448104_73448105insCCT' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -g41 -sg42 -S'73448102' -p63 -sg44 -VTCTC -p64 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' -p70 -sssS'metadata' -p71 -(dp72 -S'variantvalidator_hgvs_version' -p73 -S'1.1.3' -p74 -sS'uta_schema' -p75 -S'uta_20180821' -p76 -sS'seqrepo_db' -p77 -S'2018-08-21' -p78 -sS'variantvalidator_version' -p79 -S'v0.2' -p80 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant17.txt b/VariantValidator/testing/testOutputsMasterITS/variant17.txt deleted file mode 100644 index f5670b1d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant17.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000006.11:g.32006074C>T' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000500.7:c.-107-19C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant170.txt b/VariantValidator/testing/testOutputsMasterITS/variant170.txt deleted file mode 100644 index 57a83ec5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant170.txt +++ /dev/null @@ -1,183 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000828.4:c.-2dup' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p9 -aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'GRIA3' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_000819.3:p.?' -p24 -sS'slr' -p25 -S'NP_000819.3:p.?' -p26 -ssS'submitted_variant' -p27 -S'NM_000828.4:c.-2dupG' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'hgvs_lrg_variant' -p30 -g6 -sS'hgvs_transcript_variant' -p31 -S'NM_000828.4:c.-2dup' -p32 -sS'hgvs_refseqgene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'hgvs_genomic_description' -p38 -S'NC_000023.10:g.122318386_122318387insGG' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -S'A' -p45 -sS'pos' -p46 -S'122318386' -p47 -sS'alt' -p48 -VAGG -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.123184534dup' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'G' -p54 -sg46 -S'123184534' -p55 -sg48 -VGG -p56 -sssS'grch37' -p57 -(dp58 -g38 -S'NC_000023.10:g.122318386_122318387insGG' -p59 -sg40 -(dp60 -g42 -S'X' -p61 -sg44 -g45 -sg46 -S'122318386' -p62 -sg48 -VAGG -p63 -sssS'grch38' -p64 -(dp65 -g38 -S'NC_000023.11:g.123184534dup' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -g54 -sg46 -S'123184534' -p68 -sg48 -VGG -p69 -ssssS'reference_sequence_records' -p70 -(dp71 -S'protein' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' -p73 -sS'transcript' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' -p75 -sssS'metadata' -p76 -(dp77 -S'variantvalidator_hgvs_version' -p78 -S'1.1.3' -p79 -sS'uta_schema' -p80 -S'uta_20180821' -p81 -sS'seqrepo_db' -p82 -S'2018-08-21' -p83 -sS'variantvalidator_version' -p84 -S'v0.2' -p85 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant171.txt b/VariantValidator/testing/testOutputsMasterITS/variant171.txt deleted file mode 100644 index 6ae08973..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant171.txt +++ /dev/null @@ -1,443 +0,0 @@ -(dp0 -S'NM_007325.4:c.-2dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000023.10:g.122318386A>AGG automapped to NC_000023.10:g.122318386_122318387insGG' -p7 -aS'The displayed variants may be artefacts of aligning NM_007325.4 with genome build GRCh37' -p8 -aS'NM_007325.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA -p17 -sS'gene_symbol' -p18 -S'GRIA3' -p19 -sS'hgvs_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_015564.4:p.?' -p23 -sS'slr' -p24 -S'NP_015564.4:p.?' -p25 -ssS'submitted_variant' -p26 -S'X-122318386-A-AGG' -p27 -sS'genome_context_intronic_sequence' -p28 -g4 -sS'hgvs_lrg_variant' -p29 -g4 -sS'hgvs_transcript_variant' -p30 -S'NM_007325.4:c.-2dup' -p31 -sS'hgvs_refseqgene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000023.10:g.122318386_122318387insGG' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chrX' -p42 -sS'ref' -p43 -S'A' -p44 -sS'pos' -p45 -S'122318386' -p46 -sS'alt' -p47 -VAGG -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000023.11:g.123184534dup' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'G' -p53 -sg45 -S'123184534' -p54 -sg47 -VGG -p55 -sssS'grch37' -p56 -(dp57 -g37 -S'NC_000023.10:g.122318386_122318387insGG' -p58 -sg39 -(dp59 -g41 -S'X' -p60 -sg43 -g44 -sg45 -S'122318386' -p61 -sg47 -VAGG -p62 -sssS'grch38' -p63 -(dp64 -g37 -S'NC_000023.11:g.123184534dup' -p65 -sg39 -(dp66 -g41 -g60 -sg43 -g53 -sg45 -S'123184534' -p67 -sg47 -VGG -p68 -ssssS'reference_sequence_records' -p69 -(dp70 -S'protein' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4' -p72 -sS'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4' -p74 -sssS'flag' -p75 -S'gene_variant' -p76 -sS'NM_001256743.1:c.-2dup' -p77 -(dp78 -g3 -g4 -sg5 -(lp79 -S'NC_000023.10:g.122318386A>AGG automapped to NC_000023.10:g.122318386_122318387insGG' -p80 -aS'The displayed variants may be artefacts of aligning NM_001256743.1 with genome build GRCh37' -p81 -aS'NM_001256743.1:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p82 -aS'Caution should be used when reporting the displayed variant descriptions' -p83 -aS'If you are unsure, please contact admin' -p84 -aS'RefSeqGene record not available' -p85 -asg13 -g4 -sg14 -(lp86 -sg16 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA -p87 -sg18 -S'GRIA3' -p88 -sg20 -(dp89 -g22 -S'NP_001243672.1:p.?' -p90 -sg24 -S'NP_001243672.1:p.?' -p91 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_001256743.1:c.-2dup' -p92 -sg32 -g4 -sg33 -(dp93 -S'hg19' -p94 -(dp95 -g37 -S'NC_000023.10:g.122318386_122318387insGG' -p96 -sg39 -(dp97 -g41 -g42 -sg43 -g44 -sg45 -S'122318386' -p98 -sg47 -VAGG -p99 -sssg49 -(dp100 -g37 -S'NC_000023.11:g.123184534dup' -p101 -sg39 -(dp102 -g41 -g42 -sg43 -g53 -sg45 -S'123184534' -p103 -sg47 -VGG -p104 -sssS'grch37' -p105 -(dp106 -g37 -S'NC_000023.10:g.122318386_122318387insGG' -p107 -sg39 -(dp108 -g41 -g60 -sg43 -g44 -sg45 -S'122318386' -p109 -sg47 -VAGG -p110 -sssS'grch38' -p111 -(dp112 -g37 -S'NC_000023.11:g.123184534dup' -p113 -sg39 -(dp114 -g41 -g60 -sg43 -g53 -sg45 -S'123184534' -p115 -sg47 -VGG -p116 -ssssg69 -(dp117 -g71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1' -p118 -sg73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1' -p119 -sssS'NM_000828.4:c.-2dup' -p120 -(dp121 -g3 -g4 -sg5 -(lp122 -S'NC_000023.10:g.122318386A>AGG automapped to NC_000023.10:g.122318386_122318387insGG' -p123 -aS'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p124 -aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p125 -aS'Caution should be used when reporting the displayed variant descriptions' -p126 -aS'If you are unsure, please contact admin' -p127 -aS'RefSeqGene record not available' -p128 -asg13 -g4 -sg14 -(lp129 -sg16 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p130 -sg18 -S'GRIA3' -p131 -sg20 -(dp132 -g22 -S'NP_000819.3:p.?' -p133 -sg24 -S'NP_000819.3:p.?' -p134 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_000828.4:c.-2dup' -p135 -sg32 -g4 -sg33 -(dp136 -S'hg19' -p137 -(dp138 -g37 -S'NC_000023.10:g.122318386_122318387insGG' -p139 -sg39 -(dp140 -g41 -g42 -sg43 -g44 -sg45 -S'122318386' -p141 -sg47 -VAGG -p142 -sssg49 -(dp143 -g37 -S'NC_000023.11:g.123184534dup' -p144 -sg39 -(dp145 -g41 -g42 -sg43 -g53 -sg45 -S'123184534' -p146 -sg47 -VGG -p147 -sssS'grch37' -p148 -(dp149 -g37 -S'NC_000023.10:g.122318386_122318387insGG' -p150 -sg39 -(dp151 -g41 -g60 -sg43 -g44 -sg45 -S'122318386' -p152 -sg47 -VAGG -p153 -sssS'grch38' -p154 -(dp155 -g37 -S'NC_000023.11:g.123184534dup' -p156 -sg39 -(dp157 -g41 -g60 -sg43 -g53 -sg45 -S'123184534' -p158 -sg47 -VGG -p159 -ssssg69 -(dp160 -g71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' -p161 -sg73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' -p162 -sssS'metadata' -p163 -(dp164 -S'variantvalidator_hgvs_version' -p165 -S'1.1.3' -p166 -sS'uta_schema' -p167 -S'uta_20180821' -p168 -sS'seqrepo_db' -p169 -S'2018-08-21' -p170 -sS'variantvalidator_version' -p171 -S'v0.2' -p172 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant172.txt b/VariantValidator/testing/testOutputsMasterITS/variant172.txt deleted file mode 100644 index 06ff7fd8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant172.txt +++ /dev/null @@ -1,182 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000828.4:c.-2G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p9 -aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'GRIA3' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_000819.3:p.?' -p24 -sS'slr' -p25 -S'NP_000819.3:p.?' -p26 -ssS'submitted_variant' -p27 -S'NM_000828.4:c.-2G>T' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'hgvs_lrg_variant' -p30 -g6 -sS'hgvs_transcript_variant' -p31 -S'NM_000828.4:c.-2G>T' -p32 -sS'hgvs_refseqgene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'hgvs_genomic_description' -p38 -S'NC_000023.10:g.122318386_122318387insT' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -S'A' -p45 -sS'pos' -p46 -S'122318386' -p47 -sS'alt' -p48 -VAT -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.123184534G>T' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -VG -p54 -sg46 -S'123184534' -p55 -sg48 -VT -p56 -sssS'grch37' -p57 -(dp58 -g38 -S'NC_000023.10:g.122318386_122318387insT' -p59 -sg40 -(dp60 -g42 -S'X' -p61 -sg44 -g45 -sg46 -S'122318386' -p62 -sg48 -VAT -p63 -sssS'grch38' -p64 -(dp65 -g38 -S'NC_000023.11:g.123184534G>T' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -g54 -sg46 -S'123184534' -p68 -sg48 -g56 -ssssS'reference_sequence_records' -p69 -(dp70 -S'protein' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' -p72 -sS'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' -p74 -sssS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant173.txt b/VariantValidator/testing/testOutputsMasterITS/variant173.txt deleted file mode 100644 index 82ee8c2e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant173.txt +++ /dev/null @@ -1,181 +0,0 @@ -(dp0 -S'NM_000828.4:c.-2G=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p7 -aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'GRIA3' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_000819.3:p.?' -p22 -sS'slr' -p23 -S'NP_000819.3:p.?' -p24 -ssS'submitted_variant' -p25 -S'NM_000828.4:c.-2G=' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_000828.4:c.-2G=' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000023.10:g.122318386_122318387insG' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chrX' -p41 -sS'ref' -p42 -S'A' -p43 -sS'pos' -p44 -S'122318386' -p45 -sS'alt' -p46 -VAG -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000023.11:g.123184534G=' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -VG -p52 -sg44 -S'123184534' -p53 -sg46 -g52 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000023.10:g.122318386_122318387insG' -p56 -sg38 -(dp57 -g40 -S'X' -p58 -sg42 -g43 -sg44 -S'122318386' -p59 -sg46 -VAG -p60 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000023.11:g.123184534G=' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g52 -sg44 -S'123184534' -p65 -sg46 -g52 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' -p71 -sssS'flag' -p72 -S'gene_variant' -p73 -sS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant174.txt b/VariantValidator/testing/testOutputsMasterITS/variant174.txt deleted file mode 100644 index 6f2fbe91..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant174.txt +++ /dev/null @@ -1,438 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_000828.4:c.-2G>T' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT' -p19 -aS'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p20 -aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p21 -aS'Caution should be used when reporting the displayed variant descriptions' -p22 -aS'If you are unsure, please contact admin' -p23 -aS'RefSeqGene record not available' -p24 -asS'refseqgene_context_intronic_sequence' -p25 -g16 -sS'alt_genomic_loci' -p26 -(lp27 -sS'transcript_description' -p28 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p29 -sS'gene_symbol' -p30 -S'GRIA3' -p31 -sS'hgvs_predicted_protein_consequence' -p32 -(dp33 -S'tlr' -p34 -S'NP_000819.3:p.?' -p35 -sS'slr' -p36 -S'NP_000819.3:p.?' -p37 -ssS'submitted_variant' -p38 -S'X-122318386-A-AT' -p39 -sS'genome_context_intronic_sequence' -p40 -g16 -sS'hgvs_lrg_variant' -p41 -g16 -sS'hgvs_transcript_variant' -p42 -S'NM_000828.4:c.-2G>T' -p43 -sS'hgvs_refseqgene_variant' -p44 -g16 -sS'primary_assembly_loci' -p45 -(dp46 -S'hg19' -p47 -(dp48 -S'hgvs_genomic_description' -p49 -S'NC_000023.10:g.122318386_122318387insT' -p50 -sS'vcf' -p51 -(dp52 -S'chr' -p53 -S'chrX' -p54 -sS'ref' -p55 -S'A' -p56 -sS'pos' -p57 -S'122318386' -p58 -sS'alt' -p59 -VAT -p60 -sssS'hg38' -p61 -(dp62 -g49 -S'NC_000023.11:g.123184534G>T' -p63 -sg51 -(dp64 -g53 -g54 -sg55 -VG -p65 -sg57 -S'123184534' -p66 -sg59 -VT -p67 -sssS'grch37' -p68 -(dp69 -g49 -S'NC_000023.10:g.122318386_122318387insT' -p70 -sg51 -(dp71 -g53 -S'X' -p72 -sg55 -g56 -sg57 -S'122318386' -p73 -sg59 -VAT -p74 -sssS'grch38' -p75 -(dp76 -g49 -S'NC_000023.11:g.123184534G>T' -p77 -sg51 -(dp78 -g53 -g72 -sg55 -g65 -sg57 -S'123184534' -p79 -sg59 -g67 -ssssS'reference_sequence_records' -p80 -(dp81 -S'protein' -p82 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' -p83 -sS'transcript' -p84 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' -p85 -sssS'NM_001256743.1:c.-2G>T' -p86 -(dp87 -g15 -g16 -sg17 -(lp88 -S'NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT' -p89 -aS'The displayed variants may be artefacts of aligning NM_001256743.1 with genome build GRCh37' -p90 -aS'NM_001256743.1:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p91 -aS'Caution should be used when reporting the displayed variant descriptions' -p92 -aS'If you are unsure, please contact admin' -p93 -aS'RefSeqGene record not available' -p94 -asg25 -g16 -sg26 -(lp95 -sg28 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA -p96 -sg30 -S'GRIA3' -p97 -sg32 -(dp98 -g34 -S'NP_001243672.1:p.?' -p99 -sg36 -S'NP_001243672.1:p.?' -p100 -ssg38 -g39 -sg40 -g16 -sg41 -g16 -sg42 -S'NM_001256743.1:c.-2G>T' -p101 -sg44 -g16 -sg45 -(dp102 -S'hg19' -p103 -(dp104 -g49 -S'NC_000023.10:g.122318386_122318387insT' -p105 -sg51 -(dp106 -g53 -g54 -sg55 -g56 -sg57 -S'122318386' -p107 -sg59 -VAT -p108 -sssg61 -(dp109 -g49 -S'NC_000023.11:g.123184534G>T' -p110 -sg51 -(dp111 -g53 -g54 -sg55 -g65 -sg57 -S'123184534' -p112 -sg59 -g67 -sssS'grch37' -p113 -(dp114 -g49 -S'NC_000023.10:g.122318386_122318387insT' -p115 -sg51 -(dp116 -g53 -g72 -sg55 -g56 -sg57 -S'122318386' -p117 -sg59 -VAT -p118 -sssS'grch38' -p119 -(dp120 -g49 -S'NC_000023.11:g.123184534G>T' -p121 -sg51 -(dp122 -g53 -g72 -sg55 -g65 -sg57 -S'123184534' -p123 -sg59 -g67 -ssssg80 -(dp124 -g82 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1' -p125 -sg84 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1' -p126 -sssS'NM_007325.4:c.-2G>T' -p127 -(dp128 -g15 -g16 -sg17 -(lp129 -S'NC_000023.10:g.122318386A>AT automapped to NC_000023.10:g.122318386_122318387insT' -p130 -aS'The displayed variants may be artefacts of aligning NM_007325.4 with genome build GRCh37' -p131 -aS'NM_007325.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p132 -aS'Caution should be used when reporting the displayed variant descriptions' -p133 -aS'If you are unsure, please contact admin' -p134 -aS'RefSeqGene record not available' -p135 -asg25 -g16 -sg26 -(lp136 -sg28 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA -p137 -sg30 -S'GRIA3' -p138 -sg32 -(dp139 -g34 -S'NP_015564.4:p.?' -p140 -sg36 -S'NP_015564.4:p.?' -p141 -ssg38 -g39 -sg40 -g16 -sg41 -g16 -sg42 -S'NM_007325.4:c.-2G>T' -p142 -sg44 -g16 -sg45 -(dp143 -S'hg19' -p144 -(dp145 -g49 -S'NC_000023.10:g.122318386_122318387insT' -p146 -sg51 -(dp147 -g53 -g54 -sg55 -g56 -sg57 -S'122318386' -p148 -sg59 -VAT -p149 -sssg61 -(dp150 -g49 -S'NC_000023.11:g.123184534G>T' -p151 -sg51 -(dp152 -g53 -g54 -sg55 -g65 -sg57 -S'123184534' -p153 -sg59 -g67 -sssS'grch37' -p154 -(dp155 -g49 -S'NC_000023.10:g.122318386_122318387insT' -p156 -sg51 -(dp157 -g53 -g72 -sg55 -g56 -sg57 -S'122318386' -p158 -sg59 -VAT -p159 -sssS'grch38' -p160 -(dp161 -g49 -S'NC_000023.11:g.123184534G>T' -p162 -sg51 -(dp163 -g53 -g72 -sg55 -g65 -sg57 -S'123184534' -p164 -sg59 -g67 -ssssg80 -(dp165 -g82 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4' -p166 -sg84 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4' -p167 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant175.txt b/VariantValidator/testing/testOutputsMasterITS/variant175.txt deleted file mode 100644 index fefdaa7b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant175.txt +++ /dev/null @@ -1,183 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000828.4:c.-2_-1insT' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p9 -aS'NM_000828.4:c.-2_-1 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'GRIA3' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_000819.3:p.?' -p24 -sS'slr' -p25 -S'NP_000819.3:p.?' -p26 -ssS'submitted_variant' -p27 -S'NM_000828.4:c.-2_-1insT' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'hgvs_lrg_variant' -p30 -g6 -sS'hgvs_transcript_variant' -p31 -S'NM_000828.4:c.-2_-1insT' -p32 -sS'hgvs_refseqgene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'hgvs_genomic_description' -p38 -S'NC_000023.10:g.122318386_122318387insGT' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -S'A' -p45 -sS'pos' -p46 -S'122318386' -p47 -sS'alt' -p48 -S'AGT' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.123184534_123184535insT' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'G' -p54 -sg46 -S'123184534' -p55 -sg48 -VGT -p56 -sssS'grch37' -p57 -(dp58 -g38 -S'NC_000023.10:g.122318386_122318387insGT' -p59 -sg40 -(dp60 -g42 -S'X' -p61 -sg44 -g45 -sg46 -S'122318386' -p62 -sg48 -S'AGT' -p63 -sssS'grch38' -p64 -(dp65 -g38 -S'NC_000023.11:g.123184534_123184535insT' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -g54 -sg46 -S'123184534' -p68 -sg48 -VGT -p69 -ssssS'reference_sequence_records' -p70 -(dp71 -S'protein' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' -p73 -sS'transcript' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' -p75 -sssS'metadata' -p76 -(dp77 -S'variantvalidator_hgvs_version' -p78 -S'1.1.3' -p79 -sS'uta_schema' -p80 -S'uta_20180821' -p81 -sS'seqrepo_db' -p82 -S'2018-08-21' -p83 -sS'variantvalidator_version' -p84 -S'v0.2' -p85 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant176.txt b/VariantValidator/testing/testOutputsMasterITS/variant176.txt deleted file mode 100644 index 2b17950e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant176.txt +++ /dev/null @@ -1,182 +0,0 @@ -(dp0 -S'NM_000828.4:c.-3_-2insT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p7 -aS'NM_000828.4:c.-3_-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'GRIA3' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_000819.3:p.?' -p22 -sS'slr' -p23 -S'NP_000819.3:p.?' -p24 -ssS'submitted_variant' -p25 -S'NM_000828.4:c.-3_-2insT' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_000828.4:c.-3_-2insT' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000023.10:g.122318386_122318387insTG' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chrX' -p41 -sS'ref' -p42 -S'A' -p43 -sS'pos' -p44 -S'122318386' -p45 -sS'alt' -p46 -S'ATG' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000023.11:g.123184533_123184534insT' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'123184533' -p52 -sg46 -VAT -p53 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000023.10:g.122318386_122318387insTG' -p56 -sg38 -(dp57 -g40 -S'X' -p58 -sg42 -g43 -sg44 -S'122318386' -p59 -sg46 -S'ATG' -p60 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000023.11:g.123184533_123184534insT' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'123184533' -p65 -sg46 -VAT -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' -p72 -sssS'flag' -p73 -S'gene_variant' -p74 -sS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant177.txt b/VariantValidator/testing/testOutputsMasterITS/variant177.txt deleted file mode 100644 index b0359a87..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant177.txt +++ /dev/null @@ -1,182 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000828.4:c.-2delinsTT' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p9 -aS'NM_000828.4:c.-2 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'GRIA3' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_000819.3:p.?' -p24 -sS'slr' -p25 -S'NP_000819.3:p.?' -p26 -ssS'submitted_variant' -p27 -S'NM_000828.4:c.-2delGinsTT' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'hgvs_lrg_variant' -p30 -g6 -sS'hgvs_transcript_variant' -p31 -S'NM_000828.4:c.-2delinsTT' -p32 -sS'hgvs_refseqgene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'hgvs_genomic_description' -p38 -S'NC_000023.10:g.122318386_122318387insTT' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -S'A' -p45 -sS'pos' -p46 -S'122318386' -p47 -sS'alt' -p48 -VATT -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.123184534delinsTT' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'G' -p54 -sg46 -S'123184534' -p55 -sg48 -VTT -p56 -sssS'grch37' -p57 -(dp58 -g38 -S'NC_000023.10:g.122318386_122318387insTT' -p59 -sg40 -(dp60 -g42 -S'X' -p61 -sg44 -g45 -sg46 -S'122318386' -p62 -sg48 -VATT -p63 -sssS'grch38' -p64 -(dp65 -g38 -S'NC_000023.11:g.123184534delinsTT' -p66 -sg40 -(dp67 -g42 -g61 -sg44 -g54 -sg46 -S'123184534' -p68 -sg48 -g56 -ssssS'reference_sequence_records' -p69 -(dp70 -S'protein' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' -p72 -sS'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' -p74 -sssS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant178.txt b/VariantValidator/testing/testOutputsMasterITS/variant178.txt deleted file mode 100644 index f6fee1b0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant178.txt +++ /dev/null @@ -1,182 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000828.4:c.-2_-1delinsTT' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_000828.4 with genome build GRCh37' -p9 -aS'NM_000828.4:c.-2_-1 contains 1 transcript base(s) that fail to align to chromosome NC_000023.10' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'GRIA3' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_000819.3:p.?' -p24 -sS'slr' -p25 -S'NP_000819.3:p.?' -p26 -ssS'submitted_variant' -p27 -S'NM_000828.4:c.-2_-1delGCinsTT' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'hgvs_lrg_variant' -p30 -g6 -sS'hgvs_transcript_variant' -p31 -S'NM_000828.4:c.-2_-1delinsTT' -p32 -sS'hgvs_refseqgene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'hgvs_genomic_description' -p38 -S'NC_000023.10:g.122318387delinsTT' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chrX' -p43 -sS'ref' -p44 -S'C' -p45 -sS'pos' -p46 -S'122318387' -p47 -sS'alt' -p48 -S'TT' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000023.11:g.123184534_123184535delinsTT' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'GC' -p54 -sg46 -S'123184534' -p55 -sg48 -VTT -p56 -sssS'grch37' -p57 -(dp58 -g38 -S'NC_000023.10:g.122318387delinsTT' -p59 -sg40 -(dp60 -g42 -S'X' -p61 -sg44 -g45 -sg46 -S'122318387' -p62 -sg48 -g49 -sssS'grch38' -p63 -(dp64 -g38 -S'NC_000023.11:g.123184534_123184535delinsTT' -p65 -sg40 -(dp66 -g42 -g61 -sg44 -S'GC' -p67 -sg46 -S'123184534' -p68 -sg48 -g56 -ssssS'reference_sequence_records' -p69 -(dp70 -S'protein' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' -p72 -sS'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' -p74 -sssS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant179.txt b/VariantValidator/testing/testOutputsMasterITS/variant179.txt deleted file mode 100644 index 1a7d2161..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant179.txt +++ /dev/null @@ -1,174 +0,0 @@ -(dp0 -S'NM_000828.4:c.-3_-2delinsTT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'GRIA3' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000819.3:p.?' -p18 -sS'slr' -p19 -S'NP_000819.3:p.?' -p20 -ssS'submitted_variant' -p21 -S'NM_000828.4:c.-3_-2delAGinsTT' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_000828.4:c.-3_-2delinsTT' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000023.10:g.122318386delinsTT' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chrX' -p37 -sS'ref' -p38 -S'A' -p39 -sS'pos' -p40 -S'122318386' -p41 -sS'alt' -p42 -S'TT' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000023.11:g.123184533_123184534delinsTT' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'AG' -p48 -sg40 -S'123184533' -p49 -sg42 -VTT -p50 -sssS'grch37' -p51 -(dp52 -g32 -S'NC_000023.10:g.122318386delinsTT' -p53 -sg34 -(dp54 -g36 -S'X' -p55 -sg38 -g39 -sg40 -S'122318386' -p56 -sg42 -g43 -sssS'grch38' -p57 -(dp58 -g32 -S'NC_000023.11:g.123184533_123184534delinsTT' -p59 -sg34 -(dp60 -g36 -g55 -sg38 -S'AG' -p61 -sg40 -S'123184533' -p62 -sg42 -g50 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4' -p68 -sssS'flag' -p69 -S'gene_variant' -p70 -sS'metadata' -p71 -(dp72 -S'variantvalidator_hgvs_version' -p73 -S'1.1.3' -p74 -sS'uta_schema' -p75 -S'uta_20180821' -p76 -sS'seqrepo_db' -p77 -S'2018-08-21' -p78 -sS'variantvalidator_version' -p79 -S'v0.2' -p80 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant18.txt b/VariantValidator/testing/testOutputsMasterITS/variant18.txt deleted file mode 100644 index ae1a7d0d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant18.txt +++ /dev/null @@ -1,82 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' -p7 -aS'Instead use NC_000011.9:g.5248381A=' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'hgvs_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_000518.4:c.-130C>T' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'hgvs_lrg_variant' -p21 -g4 -sS'hgvs_transcript_variant' -p22 -g4 -sS'hgvs_refseqgene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -sS'reference_sequence_records' -p26 -g4 -ssS'flag' -p27 -S'warning' -p28 -sS'metadata' -p29 -(dp30 -S'variantvalidator_hgvs_version' -p31 -S'1.1.3' -p32 -sS'uta_schema' -p33 -S'uta_20180821' -p34 -sS'seqrepo_db' -p35 -S'2018-08-21' -p36 -sS'variantvalidator_version' -p37 -S'v0.2' -p38 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant180.txt b/VariantValidator/testing/testOutputsMasterITS/variant180.txt deleted file mode 100644 index a3671f32..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant180.txt +++ /dev/null @@ -1,506 +0,0 @@ -(dp0 -S'NM_014249.3:c.951dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p7 -aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_014249.3' -p8 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p17 -sS'gene_symbol' -p18 -S'NR2E3' -p19 -sS'hgvs_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_055064.1:p.(Thr318HisfsTer23)' -p23 -sS'slr' -p24 -S'NP_055064.1:p.(T318Hfs*23)' -p25 -ssS'submitted_variant' -p26 -S'15-72105929-C-C' -p27 -sS'genome_context_intronic_sequence' -p28 -g4 -sS'hgvs_lrg_variant' -p29 -g4 -sS'hgvs_transcript_variant' -p30 -S'NM_014249.3:c.951dup' -p31 -sS'hgvs_refseqgene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'grch38' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000015.10:g.71813592dup' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'15' -p42 -sS'ref' -p43 -S'C' -p44 -sS'pos' -p45 -S'71813589' -p46 -sS'alt' -p47 -VCC -p48 -sssS'grch37' -p49 -(dp50 -g37 -S'NC_000015.9:g.72105924_72105934=' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'GTGGACCCCCA' -p53 -sg45 -S'72105924' -p54 -sg47 -g53 -sssS'hg38' -p55 -(dp56 -g37 -S'NC_000015.10:g.71813592dup' -p57 -sg39 -(dp58 -g41 -S'chr15' -p59 -sg43 -g44 -sg45 -S'71813589' -p60 -sg47 -VCC -p61 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000015.9:g.72105924_72105934=' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -g53 -sg45 -S'72105924' -p66 -sg47 -g53 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' -p72 -sssS'NM_014249.2:c.951dup' -p73 -(dp74 -g3 -g4 -sg5 -(lp75 -S'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p76 -aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_014249.2' -p77 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' -p78 -aS'Caution should be used when reporting the displayed variant descriptions' -p79 -aS'If you are unsure, please contact admin' -p80 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p81 -aS'NM_014249.3:c.951dupC MUST be fully validated prior to use in reports' -p82 -aS'select_variants=NM_014249.3:c.951dup' -p83 -aS'RefSeqGene record not available' -p84 -asg13 -g4 -sg14 -(lp85 -sg16 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p86 -sg18 -S'NR2E3' -p87 -sg20 -(dp88 -g22 -S'NP_055064.1:p.(Thr318HisfsTer23)' -p89 -sg24 -S'NP_055064.1:p.(T318Hfs*23)' -p90 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_014249.2:c.951dup' -p91 -sg32 -g4 -sg33 -(dp92 -S'hg19' -p93 -(dp94 -g37 -S'NC_000015.9:g.72105924_72105934=' -p95 -sg39 -(dp96 -g41 -g59 -sg43 -g53 -sg45 -S'72105924' -p97 -sg47 -g53 -sssS'grch37' -p98 -(dp99 -g37 -S'NC_000015.9:g.72105924_72105934=' -p100 -sg39 -(dp101 -g41 -g42 -sg43 -g53 -sg45 -S'72105924' -p102 -sg47 -g53 -ssssg67 -(dp103 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p104 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' -p105 -sssS'flag' -p106 -S'gene_variant' -p107 -sS'NM_016346.3:c.951dup' -p108 -(dp109 -g3 -g4 -sg5 -(lp110 -S'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p111 -aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_016346.3' -p112 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' -p113 -aS'Caution should be used when reporting the displayed variant descriptions' -p114 -aS'If you are unsure, please contact admin' -p115 -aS'RefSeqGene record not available' -p116 -asg13 -g4 -sg14 -(lp117 -sg16 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p118 -sg18 -S'NR2E3' -p119 -sg20 -(dp120 -g22 -S'NP_057430.1:p.(Thr318HisfsTer23)' -p121 -sg24 -S'NP_057430.1:p.(T318Hfs*23)' -p122 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_016346.3:c.951dup' -p123 -sg32 -g4 -sg33 -(dp124 -S'grch38' -p125 -(dp126 -g37 -S'NC_000015.10:g.71813592dup' -p127 -sg39 -(dp128 -g41 -g42 -sg43 -g44 -sg45 -S'71813589' -p129 -sg47 -VCC -p130 -sssS'grch37' -p131 -(dp132 -g37 -S'NC_000015.9:g.72105924_72105934=' -p133 -sg39 -(dp134 -g41 -g42 -sg43 -g53 -sg45 -S'72105924' -p135 -sg47 -g53 -sssg55 -(dp136 -g37 -S'NC_000015.10:g.71813592dup' -p137 -sg39 -(dp138 -g41 -g59 -sg43 -g44 -sg45 -S'71813589' -p139 -sg47 -VCC -p140 -sssS'hg19' -p141 -(dp142 -g37 -S'NC_000015.9:g.72105924_72105934=' -p143 -sg39 -(dp144 -g41 -g59 -sg43 -g53 -sg45 -S'72105924' -p145 -sg47 -g53 -ssssg67 -(dp146 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p147 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' -p148 -sssS'NM_016346.2:c.951dup' -p149 -(dp150 -g3 -g4 -sg5 -(lp151 -S'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p152 -aS'NC_000015.9:g.72105926_72105932 contains 1 genomic base(s) that fail to align to transcript NM_016346.2' -p153 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' -p154 -aS'Caution should be used when reporting the displayed variant descriptions' -p155 -aS'If you are unsure, please contact admin' -p156 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p157 -aS'NM_016346.3:c.951dupC MUST be fully validated prior to use in reports' -p158 -aS'select_variants=NM_016346.3:c.951dup' -p159 -aS'RefSeqGene record not available' -p160 -asg13 -g4 -sg14 -(lp161 -sg16 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p162 -sg18 -S'NR2E3' -p163 -sg20 -(dp164 -g22 -S'NP_057430.1:p.(Thr318HisfsTer23)' -p165 -sg24 -S'NP_057430.1:p.(T318Hfs*23)' -p166 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_016346.2:c.951dup' -p167 -sg32 -g4 -sg33 -(dp168 -S'hg19' -p169 -(dp170 -g37 -S'NC_000015.9:g.72105924_72105934=' -p171 -sg39 -(dp172 -g41 -g59 -sg43 -g53 -sg45 -S'72105924' -p173 -sg47 -g53 -sssS'grch37' -p174 -(dp175 -g37 -S'NC_000015.9:g.72105924_72105934=' -p176 -sg39 -(dp177 -g41 -g42 -sg43 -g53 -sg45 -S'72105924' -p178 -sg47 -g53 -ssssg67 -(dp179 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p180 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' -p181 -sssS'metadata' -p182 -(dp183 -S'variantvalidator_hgvs_version' -p184 -S'1.1.3' -p185 -sS'uta_schema' -p186 -S'uta_20180821' -p187 -sS'seqrepo_db' -p188 -S'2018-08-21' -p189 -sS'variantvalidator_version' -p190 -S'v0.2' -p191 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant181.txt b/VariantValidator/testing/testOutputsMasterITS/variant181.txt deleted file mode 100644 index 5a30b469..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant181.txt +++ /dev/null @@ -1,510 +0,0 @@ -(dp0 -S'NM_014249.2:c.947_948insTT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' -p7 -aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p8 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p12 -aS'NM_014249.3:c.947_948insTT MUST be fully validated prior to use in reports' -p13 -aS'select_variants=NM_014249.3:c.947_948insTT' -p14 -aS'RefSeqGene record not available' -p15 -asS'refseqgene_context_intronic_sequence' -p16 -g4 -sS'alt_genomic_loci' -p17 -(lp18 -sS'transcript_description' -p19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p20 -sS'gene_symbol' -p21 -S'NR2E3' -p22 -sS'hgvs_predicted_protein_consequence' -p23 -(dp24 -S'tlr' -p25 -S'NP_055064.1:p.(Pro317SerfsTer8)' -p26 -sS'slr' -p27 -S'NP_055064.1:p.(P317Sfs*8)' -p28 -ssS'submitted_variant' -p29 -S'15-72105928-AC-ATT' -p30 -sS'genome_context_intronic_sequence' -p31 -g4 -sS'hgvs_lrg_variant' -p32 -g4 -sS'hgvs_transcript_variant' -p33 -S'NM_014249.2:c.947_948insTT' -p34 -sS'hgvs_refseqgene_variant' -p35 -g4 -sS'primary_assembly_loci' -p36 -(dp37 -S'hg19' -p38 -(dp39 -S'hgvs_genomic_description' -p40 -S'NC_000015.9:g.72105929delinsTT' -p41 -sS'vcf' -p42 -(dp43 -S'chr' -p44 -S'chr15' -p45 -sS'ref' -p46 -S'C' -p47 -sS'pos' -p48 -S'72105929' -p49 -sS'alt' -p50 -S'TT' -p51 -sssS'grch37' -p52 -(dp53 -g40 -S'NC_000015.9:g.72105929delinsTT' -p54 -sg42 -(dp55 -g44 -S'15' -p56 -sg46 -g47 -sg48 -S'72105929' -p57 -sg50 -g51 -ssssS'reference_sequence_records' -p58 -(dp59 -S'protein' -p60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p61 -sS'transcript' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' -p63 -sssS'NM_016346.3:c.947_948insTT' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' -p67 -aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p68 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' -p69 -aS'Caution should be used when reporting the displayed variant descriptions' -p70 -aS'If you are unsure, please contact admin' -p71 -aS'RefSeqGene record not available' -p72 -asg16 -g4 -sg17 -(lp73 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p74 -sg21 -S'NR2E3' -p75 -sg23 -(dp76 -g25 -S'NP_057430.1:p.(Pro317SerfsTer8)' -p77 -sg27 -S'NP_057430.1:p.(P317Sfs*8)' -p78 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.3:c.947_948insTT' -p79 -sg35 -g4 -sg36 -(dp80 -S'grch38' -p81 -(dp82 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p83 -sg42 -(dp84 -g44 -g56 -sg46 -S'A' -p85 -sg48 -S'71813588' -p86 -sg50 -VATT -p87 -sssS'grch37' -p88 -(dp89 -g40 -S'NC_000015.9:g.72105929delinsTT' -p90 -sg42 -(dp91 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p92 -sg50 -S'TT' -p93 -sssS'hg38' -p94 -(dp95 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p96 -sg42 -(dp97 -g44 -g45 -sg46 -g85 -sg48 -S'71813588' -p98 -sg50 -VATT -p99 -sssS'hg19' -p100 -(dp101 -g40 -S'NC_000015.9:g.72105929delinsTT' -p102 -sg42 -(dp103 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p104 -sg50 -g93 -ssssg58 -(dp105 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p106 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' -p107 -sssS'flag' -p108 -S'gene_variant' -p109 -sS'NM_016346.2:c.947_948insTT' -p110 -(dp111 -g3 -g4 -sg5 -(lp112 -S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' -p113 -aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p114 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' -p115 -aS'Caution should be used when reporting the displayed variant descriptions' -p116 -aS'If you are unsure, please contact admin' -p117 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p118 -aS'NM_016346.3:c.947_948insTT MUST be fully validated prior to use in reports' -p119 -aS'select_variants=NM_016346.3:c.947_948insTT' -p120 -aS'RefSeqGene record not available' -p121 -asg16 -g4 -sg17 -(lp122 -sg19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p123 -sg21 -S'NR2E3' -p124 -sg23 -(dp125 -g25 -S'NP_057430.1:p.(Pro317SerfsTer8)' -p126 -sg27 -S'NP_057430.1:p.(P317Sfs*8)' -p127 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.2:c.947_948insTT' -p128 -sg35 -g4 -sg36 -(dp129 -S'hg19' -p130 -(dp131 -g40 -S'NC_000015.9:g.72105929delinsTT' -p132 -sg42 -(dp133 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p134 -sg50 -S'TT' -p135 -sssS'grch37' -p136 -(dp137 -g40 -S'NC_000015.9:g.72105929delinsTT' -p138 -sg42 -(dp139 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p140 -sg50 -g135 -ssssg58 -(dp141 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p142 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' -p143 -sssS'NM_014249.3:c.947_948insTT' -p144 -(dp145 -g3 -g4 -sg5 -(lp146 -S'NC_000015.9:g.72105928AC>ATT automapped to NC_000015.9:g.72105929delCinsTT' -p147 -aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p148 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' -p149 -aS'Caution should be used when reporting the displayed variant descriptions' -p150 -aS'If you are unsure, please contact admin' -p151 -aS'RefSeqGene record not available' -p152 -asg16 -g4 -sg17 -(lp153 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p154 -sg21 -S'NR2E3' -p155 -sg23 -(dp156 -g25 -S'NP_055064.1:p.(Pro317SerfsTer8)' -p157 -sg27 -S'NP_055064.1:p.(P317Sfs*8)' -p158 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_014249.3:c.947_948insTT' -p159 -sg35 -g4 -sg36 -(dp160 -S'grch38' -p161 -(dp162 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p163 -sg42 -(dp164 -g44 -g56 -sg46 -g85 -sg48 -S'71813588' -p165 -sg50 -VATT -p166 -sssS'grch37' -p167 -(dp168 -g40 -S'NC_000015.9:g.72105929delinsTT' -p169 -sg42 -(dp170 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p171 -sg50 -S'TT' -p172 -sssg94 -(dp173 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p174 -sg42 -(dp175 -g44 -g45 -sg46 -g85 -sg48 -S'71813588' -p176 -sg50 -VATT -p177 -sssS'hg19' -p178 -(dp179 -g40 -S'NC_000015.9:g.72105929delinsTT' -p180 -sg42 -(dp181 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p182 -sg50 -g172 -ssssg58 -(dp183 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p184 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' -p185 -sssS'metadata' -p186 -(dp187 -S'variantvalidator_hgvs_version' -p188 -S'1.1.3' -p189 -sS'uta_schema' -p190 -S'uta_20180821' -p191 -sS'seqrepo_db' -p192 -S'2018-08-21' -p193 -sS'variantvalidator_version' -p194 -S'v0.2' -p195 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant182.txt b/VariantValidator/testing/testOutputsMasterITS/variant182.txt deleted file mode 100644 index 14c7d00c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant182.txt +++ /dev/null @@ -1,510 +0,0 @@ -(dp0 -S'NM_014249.2:c.947_948insTT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' -p7 -aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p8 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p12 -aS'NM_014249.3:c.947_948insTT MUST be fully validated prior to use in reports' -p13 -aS'select_variants=NM_014249.3:c.947_948insTT' -p14 -aS'RefSeqGene record not available' -p15 -asS'refseqgene_context_intronic_sequence' -p16 -g4 -sS'alt_genomic_loci' -p17 -(lp18 -sS'transcript_description' -p19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p20 -sS'gene_symbol' -p21 -S'NR2E3' -p22 -sS'hgvs_predicted_protein_consequence' -p23 -(dp24 -S'tlr' -p25 -S'NP_055064.1:p.(Pro317SerfsTer8)' -p26 -sS'slr' -p27 -S'NP_055064.1:p.(P317Sfs*8)' -p28 -ssS'submitted_variant' -p29 -S'15-72105928-ACC-ATT' -p30 -sS'genome_context_intronic_sequence' -p31 -g4 -sS'hgvs_lrg_variant' -p32 -g4 -sS'hgvs_transcript_variant' -p33 -S'NM_014249.2:c.947_948insTT' -p34 -sS'hgvs_refseqgene_variant' -p35 -g4 -sS'primary_assembly_loci' -p36 -(dp37 -S'hg19' -p38 -(dp39 -S'hgvs_genomic_description' -p40 -S'NC_000015.9:g.72105929delinsTT' -p41 -sS'vcf' -p42 -(dp43 -S'chr' -p44 -S'chr15' -p45 -sS'ref' -p46 -S'C' -p47 -sS'pos' -p48 -S'72105929' -p49 -sS'alt' -p50 -S'TT' -p51 -sssS'grch37' -p52 -(dp53 -g40 -S'NC_000015.9:g.72105929delinsTT' -p54 -sg42 -(dp55 -g44 -S'15' -p56 -sg46 -g47 -sg48 -S'72105929' -p57 -sg50 -g51 -ssssS'reference_sequence_records' -p58 -(dp59 -S'protein' -p60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p61 -sS'transcript' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' -p63 -sssS'NM_016346.3:c.947_948insTT' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' -p67 -aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p68 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' -p69 -aS'Caution should be used when reporting the displayed variant descriptions' -p70 -aS'If you are unsure, please contact admin' -p71 -aS'RefSeqGene record not available' -p72 -asg16 -g4 -sg17 -(lp73 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p74 -sg21 -S'NR2E3' -p75 -sg23 -(dp76 -g25 -S'NP_057430.1:p.(Pro317SerfsTer8)' -p77 -sg27 -S'NP_057430.1:p.(P317Sfs*8)' -p78 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.3:c.947_948insTT' -p79 -sg35 -g4 -sg36 -(dp80 -S'grch38' -p81 -(dp82 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p83 -sg42 -(dp84 -g44 -g56 -sg46 -S'A' -p85 -sg48 -S'71813588' -p86 -sg50 -VATT -p87 -sssS'grch37' -p88 -(dp89 -g40 -S'NC_000015.9:g.72105929delinsTT' -p90 -sg42 -(dp91 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p92 -sg50 -S'TT' -p93 -sssS'hg38' -p94 -(dp95 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p96 -sg42 -(dp97 -g44 -g45 -sg46 -g85 -sg48 -S'71813588' -p98 -sg50 -VATT -p99 -sssS'hg19' -p100 -(dp101 -g40 -S'NC_000015.9:g.72105929delinsTT' -p102 -sg42 -(dp103 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p104 -sg50 -g93 -ssssg58 -(dp105 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p106 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' -p107 -sssS'flag' -p108 -S'gene_variant' -p109 -sS'NM_016346.2:c.947_948insTT' -p110 -(dp111 -g3 -g4 -sg5 -(lp112 -S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' -p113 -aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p114 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' -p115 -aS'Caution should be used when reporting the displayed variant descriptions' -p116 -aS'If you are unsure, please contact admin' -p117 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p118 -aS'NM_016346.3:c.947_948insTT MUST be fully validated prior to use in reports' -p119 -aS'select_variants=NM_016346.3:c.947_948insTT' -p120 -aS'RefSeqGene record not available' -p121 -asg16 -g4 -sg17 -(lp122 -sg19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p123 -sg21 -S'NR2E3' -p124 -sg23 -(dp125 -g25 -S'NP_057430.1:p.(Pro317SerfsTer8)' -p126 -sg27 -S'NP_057430.1:p.(P317Sfs*8)' -p127 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.2:c.947_948insTT' -p128 -sg35 -g4 -sg36 -(dp129 -S'hg19' -p130 -(dp131 -g40 -S'NC_000015.9:g.72105929delinsTT' -p132 -sg42 -(dp133 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p134 -sg50 -S'TT' -p135 -sssS'grch37' -p136 -(dp137 -g40 -S'NC_000015.9:g.72105929delinsTT' -p138 -sg42 -(dp139 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p140 -sg50 -g135 -ssssg58 -(dp141 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p142 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' -p143 -sssS'NM_014249.3:c.947_948insTT' -p144 -(dp145 -g3 -g4 -sg5 -(lp146 -S'NC_000015.9:g.72105928ACC>ATT automapped to NC_000015.9:g.72105929_72105930delCCinsTT' -p147 -aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p148 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' -p149 -aS'Caution should be used when reporting the displayed variant descriptions' -p150 -aS'If you are unsure, please contact admin' -p151 -aS'RefSeqGene record not available' -p152 -asg16 -g4 -sg17 -(lp153 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p154 -sg21 -S'NR2E3' -p155 -sg23 -(dp156 -g25 -S'NP_055064.1:p.(Pro317SerfsTer8)' -p157 -sg27 -S'NP_055064.1:p.(P317Sfs*8)' -p158 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_014249.3:c.947_948insTT' -p159 -sg35 -g4 -sg36 -(dp160 -S'grch38' -p161 -(dp162 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p163 -sg42 -(dp164 -g44 -g56 -sg46 -g85 -sg48 -S'71813588' -p165 -sg50 -VATT -p166 -sssS'grch37' -p167 -(dp168 -g40 -S'NC_000015.9:g.72105929delinsTT' -p169 -sg42 -(dp170 -g44 -g56 -sg46 -g47 -sg48 -S'72105929' -p171 -sg50 -S'TT' -p172 -sssg94 -(dp173 -g40 -S'NC_000015.10:g.71813588_71813589insTT' -p174 -sg42 -(dp175 -g44 -g45 -sg46 -g85 -sg48 -S'71813588' -p176 -sg50 -VATT -p177 -sssS'hg19' -p178 -(dp179 -g40 -S'NC_000015.9:g.72105929delinsTT' -p180 -sg42 -(dp181 -g44 -g45 -sg46 -g47 -sg48 -S'72105929' -p182 -sg50 -g172 -ssssg58 -(dp183 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p184 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' -p185 -sssS'metadata' -p186 -(dp187 -S'variantvalidator_hgvs_version' -p188 -S'1.1.3' -p189 -sS'uta_schema' -p190 -S'uta_20180821' -p191 -sS'seqrepo_db' -p192 -S'2018-08-21' -p193 -sS'variantvalidator_version' -p194 -S'v0.2' -p195 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant183.txt b/VariantValidator/testing/testOutputsMasterITS/variant183.txt deleted file mode 100644 index 5a9c75dd..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant183.txt +++ /dev/null @@ -1,515 +0,0 @@ -(dp0 -S'NM_014249.3:c.947delinsTT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' -p7 -aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p8 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.3 between positions c.947_948' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p17 -sS'gene_symbol' -p18 -S'NR2E3' -p19 -sS'hgvs_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_055064.1:p.(Asp316ValfsTer25)' -p23 -sS'slr' -p24 -S'NP_055064.1:p.(D316Vfs*25)' -p25 -ssS'submitted_variant' -p26 -S'15-72105927-GACC-GTT' -p27 -sS'genome_context_intronic_sequence' -p28 -g4 -sS'hgvs_lrg_variant' -p29 -g4 -sS'hgvs_transcript_variant' -p30 -S'NM_014249.3:c.947delinsTT' -p31 -sS'hgvs_refseqgene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'grch38' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000015.10:g.71813588delinsTT' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'15' -p42 -sS'ref' -p43 -S'A' -p44 -sS'pos' -p45 -S'71813588' -p46 -sS'alt' -p47 -VTT -p48 -sssS'grch37' -p49 -(dp50 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'AC' -p53 -sg45 -S'72105928' -p54 -sg47 -S'TT' -p55 -sssS'hg38' -p56 -(dp57 -g37 -S'NC_000015.10:g.71813588delinsTT' -p58 -sg39 -(dp59 -g41 -S'chr15' -p60 -sg43 -g44 -sg45 -S'71813588' -p61 -sg47 -g48 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p64 -sg39 -(dp65 -g41 -g60 -sg43 -S'AC' -p66 -sg45 -S'72105928' -p67 -sg47 -g55 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' -p73 -sssS'NM_016346.2:c.947delinsTT' -p74 -(dp75 -g3 -g4 -sg5 -(lp76 -S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' -p77 -aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p78 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.2 between positions c.947_948' -p79 -aS'Caution should be used when reporting the displayed variant descriptions' -p80 -aS'If you are unsure, please contact admin' -p81 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p82 -aS'NM_016346.3:c.947delAinsTT MUST be fully validated prior to use in reports' -p83 -aS'select_variants=NM_016346.3:c.947delinsTT' -p84 -aS'RefSeqGene record not available' -p85 -asg13 -g4 -sg14 -(lp86 -sg16 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p87 -sg18 -S'NR2E3' -p88 -sg20 -(dp89 -g22 -S'NP_057430.1:p.(Asp316ValfsTer25)' -p90 -sg24 -S'NP_057430.1:p.(D316Vfs*25)' -p91 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_016346.2:c.947delinsTT' -p92 -sg32 -g4 -sg33 -(dp93 -S'hg19' -p94 -(dp95 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p96 -sg39 -(dp97 -g41 -g60 -sg43 -S'AC' -p98 -sg45 -S'72105928' -p99 -sg47 -S'TT' -p100 -sssS'grch37' -p101 -(dp102 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p103 -sg39 -(dp104 -g41 -g42 -sg43 -S'AC' -p105 -sg45 -S'72105928' -p106 -sg47 -g100 -ssssg68 -(dp107 -g70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p108 -sg72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' -p109 -sssS'NM_014249.2:c.947delinsTT' -p110 -(dp111 -g3 -g4 -sg5 -(lp112 -S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' -p113 -aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p114 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_014249.2 between positions c.947_948' -p115 -aS'Caution should be used when reporting the displayed variant descriptions' -p116 -aS'If you are unsure, please contact admin' -p117 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p118 -aS'NM_014249.3:c.947delAinsTT MUST be fully validated prior to use in reports' -p119 -aS'select_variants=NM_014249.3:c.947delinsTT' -p120 -aS'RefSeqGene record not available' -p121 -asg13 -g4 -sg14 -(lp122 -sg16 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p123 -sg18 -S'NR2E3' -p124 -sg20 -(dp125 -g22 -S'NP_055064.1:p.(Asp316ValfsTer25)' -p126 -sg24 -S'NP_055064.1:p.(D316Vfs*25)' -p127 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_014249.2:c.947delinsTT' -p128 -sg32 -g4 -sg33 -(dp129 -S'hg19' -p130 -(dp131 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p132 -sg39 -(dp133 -g41 -g60 -sg43 -S'AC' -p134 -sg45 -S'72105928' -p135 -sg47 -S'TT' -p136 -sssS'grch37' -p137 -(dp138 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p139 -sg39 -(dp140 -g41 -g42 -sg43 -S'AC' -p141 -sg45 -S'72105928' -p142 -sg47 -g136 -ssssg68 -(dp143 -g70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p144 -sg72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' -p145 -sssS'flag' -p146 -S'gene_variant' -p147 -sS'NM_016346.3:c.947delinsTT' -p148 -(dp149 -g3 -g4 -sg5 -(lp150 -S'NC_000015.9:g.72105927GACC>GTT automapped to NC_000015.9:g.72105928_72105930delACCinsTT' -p151 -aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p152 -aS'Genome position NC_000015.9:g.72105930 aligns within a Requires Analysis-bp gap in transcript NM_016346.3 between positions c.947_948' -p153 -aS'Caution should be used when reporting the displayed variant descriptions' -p154 -aS'If you are unsure, please contact admin' -p155 -aS'RefSeqGene record not available' -p156 -asg13 -g4 -sg14 -(lp157 -sg16 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p158 -sg18 -S'NR2E3' -p159 -sg20 -(dp160 -g22 -S'NP_057430.1:p.(Asp316ValfsTer25)' -p161 -sg24 -S'NP_057430.1:p.(D316Vfs*25)' -p162 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_016346.3:c.947delinsTT' -p163 -sg32 -g4 -sg33 -(dp164 -S'grch38' -p165 -(dp166 -g37 -S'NC_000015.10:g.71813588delinsTT' -p167 -sg39 -(dp168 -g41 -g42 -sg43 -g44 -sg45 -S'71813588' -p169 -sg47 -VTT -p170 -sssS'grch37' -p171 -(dp172 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p173 -sg39 -(dp174 -g41 -g42 -sg43 -S'AC' -p175 -sg45 -S'72105928' -p176 -sg47 -S'TT' -p177 -sssg56 -(dp178 -g37 -S'NC_000015.10:g.71813588delinsTT' -p179 -sg39 -(dp180 -g41 -g60 -sg43 -g44 -sg45 -S'71813588' -p181 -sg47 -g170 -sssS'hg19' -p182 -(dp183 -g37 -S'NC_000015.9:g.72105928_72105929delinsTT' -p184 -sg39 -(dp185 -g41 -g60 -sg43 -S'AC' -p186 -sg45 -S'72105928' -p187 -sg47 -g177 -ssssg68 -(dp188 -g70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p189 -sg72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' -p190 -sssS'metadata' -p191 -(dp192 -S'variantvalidator_hgvs_version' -p193 -S'1.1.3' -p194 -sS'uta_schema' -p195 -S'uta_20180821' -p196 -sS'seqrepo_db' -p197 -S'2018-08-21' -p198 -sS'variantvalidator_version' -p199 -S'v0.2' -p200 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant184.txt b/VariantValidator/testing/testOutputsMasterITS/variant184.txt deleted file mode 100644 index c8f4469f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant184.txt +++ /dev/null @@ -1,439 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001042544.1:c.3233_3235=' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG' -p9 -aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' -p10 -aS'NM_001042544.1:c.3233_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'RefSeqGene record not available' -p14 -asS'refseqgene_context_intronic_sequence' -p15 -g6 -sS'alt_genomic_loci' -p16 -(lp17 -sS'transcript_description' -p18 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA -p19 -sS'gene_symbol' -p20 -S'LTBP4' -p21 -sS'hgvs_predicted_protein_consequence' -p22 -(dp23 -S'tlr' -p24 -S'NP_001036009.1:p.(Gln1078=)' -p25 -sS'slr' -p26 -S'NP_001036009.1:p.(Q1078=)' -p27 -ssS'submitted_variant' -p28 -S'19-41123093-A-AG' -p29 -sS'genome_context_intronic_sequence' -p30 -g6 -sS'hgvs_lrg_variant' -p31 -g6 -sS'hgvs_transcript_variant' -p32 -S'NM_001042544.1:c.3233_3235=' -p33 -sS'hgvs_refseqgene_variant' -p34 -g6 -sS'primary_assembly_loci' -p35 -(dp36 -S'grch38' -p37 -(dp38 -S'hgvs_genomic_description' -p39 -S'NC_000019.10:g.40617187_40617189=' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'19' -p44 -sS'ref' -p45 -VAGG -p46 -sS'pos' -p47 -S'40617187' -p48 -sS'alt' -p49 -g46 -sssS'grch37' -p50 -(dp51 -g39 -S'NC_000019.9:g.41123095dup' -p52 -sg41 -(dp53 -g43 -g44 -sg45 -S'G' -p54 -sg47 -S'41123094' -p55 -sg49 -VGG -p56 -sssS'hg38' -p57 -(dp58 -g39 -S'NC_000019.10:g.40617187_40617189=' -p59 -sg41 -(dp60 -g43 -S'chr19' -p61 -sg45 -g46 -sg47 -S'40617187' -p62 -sg49 -g46 -sssS'hg19' -p63 -(dp64 -g39 -S'NC_000019.9:g.41123095dup' -p65 -sg41 -(dp66 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p67 -sg49 -VGG -p68 -ssssS'reference_sequence_records' -p69 -(dp70 -S'protein' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1' -p72 -sS'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1' -p74 -sssS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ssS'NM_001042545.1:c.3032_3034=' -p85 -(dp86 -g5 -g6 -sg7 -(lp87 -S'NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG' -p88 -aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' -p89 -aS'NM_001042545.1:c.3032_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p90 -aS'Caution should be used when reporting the displayed variant descriptions' -p91 -aS'If you are unsure, please contact admin' -p92 -aS'RefSeqGene record not available' -p93 -asg15 -g6 -sg16 -(lp94 -sg18 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA -p95 -sg20 -S'LTBP4' -p96 -sg22 -(dp97 -g24 -S'NP_001036010.1:p.(Gln1011=)' -p98 -sg26 -S'NP_001036010.1:p.(Q1011=)' -p99 -ssg28 -g29 -sg30 -g6 -sg31 -g6 -sg32 -S'NM_001042545.1:c.3032_3034=' -p100 -sg34 -g6 -sg35 -(dp101 -S'grch38' -p102 -(dp103 -g39 -S'NC_000019.10:g.40617187_40617189=' -p104 -sg41 -(dp105 -g43 -g44 -sg45 -VAGG -p106 -sg47 -S'40617187' -p107 -sg49 -g106 -sssS'grch37' -p108 -(dp109 -g39 -S'NC_000019.9:g.41123095dup' -p110 -sg41 -(dp111 -g43 -g44 -sg45 -g54 -sg47 -S'41123094' -p112 -sg49 -VGG -p113 -sssg57 -(dp114 -g39 -S'NC_000019.10:g.40617187_40617189=' -p115 -sg41 -(dp116 -g43 -g61 -sg45 -g106 -sg47 -S'40617187' -p117 -sg49 -g106 -sssS'hg19' -p118 -(dp119 -g39 -S'NC_000019.9:g.41123095dup' -p120 -sg41 -(dp121 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p122 -sg49 -VGG -p123 -ssssg69 -(dp124 -g71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1' -p125 -sg73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1' -p126 -sssS'NM_003573.2:c.3122_3124=' -p127 -(dp128 -g5 -g6 -sg7 -(lp129 -S'NC_000019.9:g.41123093A>AG automapped to NC_000019.9:g.41123095dupG' -p130 -aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' -p131 -aS'NM_003573.2:c.3122_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p132 -aS'Caution should be used when reporting the displayed variant descriptions' -p133 -aS'If you are unsure, please contact admin' -p134 -aS'RefSeqGene record not available' -p135 -asg15 -g6 -sg16 -(lp136 -sg18 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA -p137 -sg20 -S'LTBP4' -p138 -sg22 -(dp139 -g24 -S'NP_003564.2:p.(Gln1041=)' -p140 -sg26 -S'NP_003564.2:p.(Q1041=)' -p141 -ssg28 -g29 -sg30 -g6 -sg31 -g6 -sg32 -S'NM_003573.2:c.3122_3124=' -p142 -sg34 -g6 -sg35 -(dp143 -S'grch38' -p144 -(dp145 -g39 -S'NC_000019.10:g.40617187_40617189=' -p146 -sg41 -(dp147 -g43 -g44 -sg45 -VAGG -p148 -sg47 -S'40617187' -p149 -sg49 -g148 -sssS'grch37' -p150 -(dp151 -g39 -S'NC_000019.9:g.41123095dup' -p152 -sg41 -(dp153 -g43 -g44 -sg45 -g54 -sg47 -S'41123094' -p154 -sg49 -VGG -p155 -sssg57 -(dp156 -g39 -S'NC_000019.10:g.40617187_40617189=' -p157 -sg41 -(dp158 -g43 -g61 -sg45 -g148 -sg47 -S'40617187' -p159 -sg49 -g148 -sssS'hg19' -p160 -(dp161 -g39 -S'NC_000019.9:g.41123095dup' -p162 -sg41 -(dp163 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p164 -sg49 -VGG -p165 -ssssg69 -(dp166 -g71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2' -p167 -sg73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2' -p168 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant185.txt b/VariantValidator/testing/testOutputsMasterITS/variant185.txt deleted file mode 100644 index ee61fa54..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant185.txt +++ /dev/null @@ -1,438 +0,0 @@ -(dp0 -S'NM_003573.2:c.3123G>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT' -p7 -aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' -p8 -aS'NM_003573.2:c.3123 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA -p17 -sS'gene_symbol' -p18 -S'LTBP4' -p19 -sS'hgvs_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_003564.2:p.(Gln1041His)' -p23 -sS'slr' -p24 -S'NP_003564.2:p.(Q1041H)' -p25 -ssS'submitted_variant' -p26 -S'19-41123093-A-AT' -p27 -sS'genome_context_intronic_sequence' -p28 -g4 -sS'hgvs_lrg_variant' -p29 -g4 -sS'hgvs_transcript_variant' -p30 -S'NM_003573.2:c.3123G>T' -p31 -sS'hgvs_refseqgene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'grch38' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000019.10:g.40617188G>T' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'19' -p42 -sS'ref' -p43 -VG -p44 -sS'pos' -p45 -S'40617188' -p46 -sS'alt' -p47 -VT -p48 -sssS'grch37' -p49 -(dp50 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'A' -p53 -sg45 -S'41123093' -p54 -sg47 -VAT -p55 -sssS'hg38' -p56 -(dp57 -g37 -S'NC_000019.10:g.40617188G>T' -p58 -sg39 -(dp59 -g41 -S'chr19' -p60 -sg43 -g44 -sg45 -S'40617188' -p61 -sg47 -g48 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p64 -sg39 -(dp65 -g41 -g60 -sg43 -g53 -sg45 -S'41123093' -p66 -sg47 -VAT -p67 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2' -p73 -sssS'flag' -p74 -S'gene_variant' -p75 -sS'metadata' -p76 -(dp77 -S'variantvalidator_hgvs_version' -p78 -S'1.1.3' -p79 -sS'uta_schema' -p80 -S'uta_20180821' -p81 -sS'seqrepo_db' -p82 -S'2018-08-21' -p83 -sS'variantvalidator_version' -p84 -S'v0.2' -p85 -ssS'NM_001042545.1:c.3033G>T' -p86 -(dp87 -g3 -g4 -sg5 -(lp88 -S'NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT' -p89 -aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' -p90 -aS'NM_001042545.1:c.3033 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p91 -aS'Caution should be used when reporting the displayed variant descriptions' -p92 -aS'If you are unsure, please contact admin' -p93 -aS'RefSeqGene record not available' -p94 -asg13 -g4 -sg14 -(lp95 -sg16 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA -p96 -sg18 -S'LTBP4' -p97 -sg20 -(dp98 -g22 -S'NP_001036010.1:p.(Gln1011His)' -p99 -sg24 -S'NP_001036010.1:p.(Q1011H)' -p100 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_001042545.1:c.3033G>T' -p101 -sg32 -g4 -sg33 -(dp102 -S'grch38' -p103 -(dp104 -g37 -S'NC_000019.10:g.40617188G>T' -p105 -sg39 -(dp106 -g41 -g42 -sg43 -g44 -sg45 -S'40617188' -p107 -sg47 -g48 -sssS'grch37' -p108 -(dp109 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p110 -sg39 -(dp111 -g41 -g42 -sg43 -g53 -sg45 -S'41123093' -p112 -sg47 -VAT -p113 -sssg56 -(dp114 -g37 -S'NC_000019.10:g.40617188G>T' -p115 -sg39 -(dp116 -g41 -g60 -sg43 -g44 -sg45 -S'40617188' -p117 -sg47 -g48 -sssS'hg19' -p118 -(dp119 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p120 -sg39 -(dp121 -g41 -g60 -sg43 -g53 -sg45 -S'41123093' -p122 -sg47 -VAT -p123 -ssssg68 -(dp124 -g70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1' -p125 -sg72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1' -p126 -sssS'NM_001042544.1:c.3234G>T' -p127 -(dp128 -g3 -g4 -sg5 -(lp129 -S'NC_000019.9:g.41123093A>AT automapped to NC_000019.9:g.41123093_41123094insT' -p130 -aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' -p131 -aS'NM_001042544.1:c.3234 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p132 -aS'Caution should be used when reporting the displayed variant descriptions' -p133 -aS'If you are unsure, please contact admin' -p134 -aS'RefSeqGene record not available' -p135 -asg13 -g4 -sg14 -(lp136 -sg16 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA -p137 -sg18 -S'LTBP4' -p138 -sg20 -(dp139 -g22 -S'NP_001036009.1:p.(Gln1078His)' -p140 -sg24 -S'NP_001036009.1:p.(Q1078H)' -p141 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_001042544.1:c.3234G>T' -p142 -sg32 -g4 -sg33 -(dp143 -S'grch38' -p144 -(dp145 -g37 -S'NC_000019.10:g.40617188G>T' -p146 -sg39 -(dp147 -g41 -g42 -sg43 -g44 -sg45 -S'40617188' -p148 -sg47 -g48 -sssS'grch37' -p149 -(dp150 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p151 -sg39 -(dp152 -g41 -g42 -sg43 -g53 -sg45 -S'41123093' -p153 -sg47 -VAT -p154 -sssg56 -(dp155 -g37 -S'NC_000019.10:g.40617188G>T' -p156 -sg39 -(dp157 -g41 -g60 -sg43 -g44 -sg45 -S'40617188' -p158 -sg47 -g48 -sssS'hg19' -p159 -(dp160 -g37 -S'NC_000019.9:g.41123093_41123094insT' -p161 -sg39 -(dp162 -g41 -g60 -sg43 -g53 -sg45 -S'41123093' -p163 -sg47 -VAT -p164 -ssssg68 -(dp165 -g70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1' -p166 -sg72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1' -p167 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant186.txt b/VariantValidator/testing/testOutputsMasterITS/variant186.txt deleted file mode 100644 index 9dd11dde..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant186.txt +++ /dev/null @@ -1,442 +0,0 @@ -(dp0 -S'NM_001042544.1:c.3235_3236del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG' -p7 -aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' -p8 -aS'NM_001042544.1:c.3234_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA -p17 -sS'gene_symbol' -p18 -S'LTBP4' -p19 -sS'hgvs_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_001036009.1:p.(Gly1079LeufsTer17)' -p23 -sS'slr' -p24 -S'NP_001036009.1:p.(G1079Lfs*17)' -p25 -ssS'submitted_variant' -p26 -S'19-41123093-AG-A' -p27 -sS'genome_context_intronic_sequence' -p28 -g4 -sS'hgvs_lrg_variant' -p29 -g4 -sS'hgvs_transcript_variant' -p30 -S'NM_001042544.1:c.3235_3236del' -p31 -sS'hgvs_refseqgene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'grch38' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000019.10:g.40617189_40617190del' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'19' -p42 -sS'ref' -p43 -S'AGG' -p44 -sS'pos' -p45 -S'40617187' -p46 -sS'alt' -p47 -S'A' -p48 -sssS'grch37' -p49 -(dp50 -g37 -S'NC_000019.9:g.41123095del' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'AG' -p53 -sg45 -S'41123093' -p54 -sg47 -g48 -sssS'hg38' -p55 -(dp56 -g37 -S'NC_000019.10:g.40617189_40617190del' -p57 -sg39 -(dp58 -g41 -S'chr19' -p59 -sg43 -S'AGG' -p60 -sg45 -S'40617187' -p61 -sg47 -g48 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000019.9:g.41123095del' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -S'AG' -p66 -sg45 -S'41123093' -p67 -sg47 -g48 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1' -p73 -sssS'flag' -p74 -S'gene_variant' -p75 -sS'NM_001042545.1:c.3034_3035del' -p76 -(dp77 -g3 -g4 -sg5 -(lp78 -S'NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG' -p79 -aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' -p80 -aS'NM_001042545.1:c.3033_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p81 -aS'Caution should be used when reporting the displayed variant descriptions' -p82 -aS'If you are unsure, please contact admin' -p83 -aS'RefSeqGene record not available' -p84 -asg13 -g4 -sg14 -(lp85 -sg16 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA -p86 -sg18 -S'LTBP4' -p87 -sg20 -(dp88 -g22 -S'NP_001036010.1:p.(Gly1012LeufsTer17)' -p89 -sg24 -S'NP_001036010.1:p.(G1012Lfs*17)' -p90 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_001042545.1:c.3034_3035del' -p91 -sg32 -g4 -sg33 -(dp92 -S'grch38' -p93 -(dp94 -g37 -S'NC_000019.10:g.40617189_40617190del' -p95 -sg39 -(dp96 -g41 -g42 -sg43 -S'AGG' -p97 -sg45 -S'40617187' -p98 -sg47 -g48 -sssS'grch37' -p99 -(dp100 -g37 -S'NC_000019.9:g.41123095del' -p101 -sg39 -(dp102 -g41 -g42 -sg43 -S'AG' -p103 -sg45 -S'41123093' -p104 -sg47 -g48 -sssg55 -(dp105 -g37 -S'NC_000019.10:g.40617189_40617190del' -p106 -sg39 -(dp107 -g41 -g59 -sg43 -S'AGG' -p108 -sg45 -S'40617187' -p109 -sg47 -g48 -sssS'hg19' -p110 -(dp111 -g37 -S'NC_000019.9:g.41123095del' -p112 -sg39 -(dp113 -g41 -g59 -sg43 -S'AG' -p114 -sg45 -S'41123093' -p115 -sg47 -g48 -ssssg68 -(dp116 -g70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1' -p117 -sg72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1' -p118 -sssS'NM_003573.2:c.3124_3125del' -p119 -(dp120 -g3 -g4 -sg5 -(lp121 -S'NC_000019.9:g.41123093AG>A automapped to NC_000019.9:g.41123095delG' -p122 -aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' -p123 -aS'NM_003573.2:c.3123_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p124 -aS'Caution should be used when reporting the displayed variant descriptions' -p125 -aS'If you are unsure, please contact admin' -p126 -aS'RefSeqGene record not available' -p127 -asg13 -g4 -sg14 -(lp128 -sg16 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA -p129 -sg18 -S'LTBP4' -p130 -sg20 -(dp131 -g22 -S'NP_003564.2:p.(Gly1042LeufsTer17)' -p132 -sg24 -S'NP_003564.2:p.(G1042Lfs*17)' -p133 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_003573.2:c.3124_3125del' -p134 -sg32 -g4 -sg33 -(dp135 -S'grch38' -p136 -(dp137 -g37 -S'NC_000019.10:g.40617189_40617190del' -p138 -sg39 -(dp139 -g41 -g42 -sg43 -S'AGG' -p140 -sg45 -S'40617187' -p141 -sg47 -g48 -sssS'grch37' -p142 -(dp143 -g37 -S'NC_000019.9:g.41123095del' -p144 -sg39 -(dp145 -g41 -g42 -sg43 -S'AG' -p146 -sg45 -S'41123093' -p147 -sg47 -g48 -sssg55 -(dp148 -g37 -S'NC_000019.10:g.40617189_40617190del' -p149 -sg39 -(dp150 -g41 -g59 -sg43 -S'AGG' -p151 -sg45 -S'40617187' -p152 -sg47 -g48 -sssS'hg19' -p153 -(dp154 -g37 -S'NC_000019.9:g.41123095del' -p155 -sg39 -(dp156 -g41 -g59 -sg43 -S'AG' -p157 -sg45 -S'41123093' -p158 -sg47 -g48 -ssssg68 -(dp159 -g70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2' -p160 -sg72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2' -p161 -sssS'metadata' -p162 -(dp163 -S'variantvalidator_hgvs_version' -p164 -S'1.1.3' -p165 -sS'uta_schema' -p166 -S'uta_20180821' -p167 -sS'seqrepo_db' -p168 -S'2018-08-21' -p169 -sS'variantvalidator_version' -p170 -S'v0.2' -p171 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant187.txt b/VariantValidator/testing/testOutputsMasterITS/variant187.txt deleted file mode 100644 index 01c0951e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant187.txt +++ /dev/null @@ -1,437 +0,0 @@ -(dp0 -S'NM_001042545.1:c.3035del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=' -p7 -aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' -p8 -aS'NM_001042545.1:c.3033 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA -p17 -sS'gene_symbol' -p18 -S'LTBP4' -p19 -sS'hgvs_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_001036010.1:p.(Gly1012ValfsTer14)' -p23 -sS'slr' -p24 -S'NP_001036010.1:p.(G1012Vfs*14)' -p25 -ssS'submitted_variant' -p26 -S'19-41123093-AG-AG' -p27 -sS'genome_context_intronic_sequence' -p28 -g4 -sS'hgvs_lrg_variant' -p29 -g4 -sS'hgvs_transcript_variant' -p30 -S'NM_001042545.1:c.3035del' -p31 -sS'hgvs_refseqgene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'grch38' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000019.10:g.40617190del' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'19' -p42 -sS'ref' -p43 -S'AG' -p44 -sS'pos' -p45 -S'40617187' -p46 -sS'alt' -p47 -S'A' -p48 -sssS'grch37' -p49 -(dp50 -g37 -S'NC_000019.9:g.41123093_41123094=' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'AG' -p53 -sg45 -S'41123093' -p54 -sg47 -g53 -sssS'hg38' -p55 -(dp56 -g37 -S'NC_000019.10:g.40617190del' -p57 -sg39 -(dp58 -g41 -S'chr19' -p59 -sg43 -S'AG' -p60 -sg45 -S'40617187' -p61 -sg47 -g48 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000019.9:g.41123093_41123094=' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -g53 -sg45 -S'41123093' -p66 -sg47 -g53 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1' -p72 -sssS'flag' -p73 -S'gene_variant' -p74 -sS'NM_001042544.1:c.3236del' -p75 -(dp76 -g3 -g4 -sg5 -(lp77 -S'NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=' -p78 -aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' -p79 -aS'NM_001042544.1:c.3234 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p80 -aS'Caution should be used when reporting the displayed variant descriptions' -p81 -aS'If you are unsure, please contact admin' -p82 -aS'RefSeqGene record not available' -p83 -asg13 -g4 -sg14 -(lp84 -sg16 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA -p85 -sg18 -S'LTBP4' -p86 -sg20 -(dp87 -g22 -S'NP_001036009.1:p.(Gly1079ValfsTer14)' -p88 -sg24 -S'NP_001036009.1:p.(G1079Vfs*14)' -p89 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_001042544.1:c.3236del' -p90 -sg32 -g4 -sg33 -(dp91 -S'grch38' -p92 -(dp93 -g37 -S'NC_000019.10:g.40617190del' -p94 -sg39 -(dp95 -g41 -g42 -sg43 -S'AG' -p96 -sg45 -S'40617187' -p97 -sg47 -g48 -sssS'grch37' -p98 -(dp99 -g37 -S'NC_000019.9:g.41123093_41123094=' -p100 -sg39 -(dp101 -g41 -g42 -sg43 -g53 -sg45 -S'41123093' -p102 -sg47 -g53 -sssg55 -(dp103 -g37 -S'NC_000019.10:g.40617190del' -p104 -sg39 -(dp105 -g41 -g59 -sg43 -S'AG' -p106 -sg45 -S'40617187' -p107 -sg47 -g48 -sssS'hg19' -p108 -(dp109 -g37 -S'NC_000019.9:g.41123093_41123094=' -p110 -sg39 -(dp111 -g41 -g59 -sg43 -g53 -sg45 -S'41123093' -p112 -sg47 -g53 -ssssg67 -(dp113 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1' -p114 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1' -p115 -sssS'NM_003573.2:c.3125del' -p116 -(dp117 -g3 -g4 -sg5 -(lp118 -S'NC_000019.9:g.41123093AG>AG automapped to NC_000019.9:g.41123093_41123094AG=' -p119 -aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' -p120 -aS'NM_003573.2:c.3123 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p121 -aS'Caution should be used when reporting the displayed variant descriptions' -p122 -aS'If you are unsure, please contact admin' -p123 -aS'RefSeqGene record not available' -p124 -asg13 -g4 -sg14 -(lp125 -sg16 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA -p126 -sg18 -S'LTBP4' -p127 -sg20 -(dp128 -g22 -S'NP_003564.2:p.(Gly1042ValfsTer14)' -p129 -sg24 -S'NP_003564.2:p.(G1042Vfs*14)' -p130 -ssg26 -g27 -sg28 -g4 -sg29 -g4 -sg30 -S'NM_003573.2:c.3125del' -p131 -sg32 -g4 -sg33 -(dp132 -S'grch38' -p133 -(dp134 -g37 -S'NC_000019.10:g.40617190del' -p135 -sg39 -(dp136 -g41 -g42 -sg43 -S'AG' -p137 -sg45 -S'40617187' -p138 -sg47 -g48 -sssS'grch37' -p139 -(dp140 -g37 -S'NC_000019.9:g.41123093_41123094=' -p141 -sg39 -(dp142 -g41 -g42 -sg43 -g53 -sg45 -S'41123093' -p143 -sg47 -g53 -sssg55 -(dp144 -g37 -S'NC_000019.10:g.40617190del' -p145 -sg39 -(dp146 -g41 -g59 -sg43 -S'AG' -p147 -sg45 -S'40617187' -p148 -sg47 -g48 -sssS'hg19' -p149 -(dp150 -g37 -S'NC_000019.9:g.41123093_41123094=' -p151 -sg39 -(dp152 -g41 -g59 -sg43 -g53 -sg45 -S'41123093' -p153 -sg47 -g53 -ssssg67 -(dp154 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2' -p155 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2' -p156 -sssS'metadata' -p157 -(dp158 -S'variantvalidator_hgvs_version' -p159 -S'1.1.3' -p160 -sS'uta_schema' -p161 -S'uta_20180821' -p162 -sS'seqrepo_db' -p163 -S'2018-08-21' -p164 -sS'variantvalidator_version' -p165 -S'v0.2' -p166 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant188.txt b/VariantValidator/testing/testOutputsMasterITS/variant188.txt deleted file mode 100644 index 7c94f8d1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant188.txt +++ /dev/null @@ -1,180 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_012309.4:c.913-5058G>A' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'NM_012309.4:c.913-5058G>A cannot be mapped directly to genome build GRCh37' -p10 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'grch37' -p16 -(dp17 -S'hgvs_genomic_description' -p18 -S'NW_004070871.1:g.574546C>T' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG865_PATCH' -p23 -sS'ref' -p24 -VC -p25 -sS'pos' -p26 -S'574546' -p27 -sS'alt' -p28 -VT -p29 -sssa(dp30 -S'hg19' -p31 -(dp32 -g18 -S'NW_004070871.1:g.574546C>T' -p33 -sg20 -(dp34 -g22 -S'NW_004070871.1' -p35 -sg24 -g25 -sg26 -S'574546' -p36 -sg28 -g29 -sssasS'transcript_description' -p37 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA -p38 -sS'gene_symbol' -p39 -S'SHANK2' -p40 -sS'hgvs_predicted_protein_consequence' -p41 -(dp42 -S'tlr' -p43 -S'NP_036441.2:p.?' -p44 -sS'slr' -p45 -S'NP_036441.2:p.?' -p46 -ssS'submitted_variant' -p47 -S'NM_012309.4:c.913-5058G>A' -p48 -sS'genome_context_intronic_sequence' -p49 -S'NC_000011.10(NM_012309.4):c.913-5058G>A' -p50 -sS'hgvs_lrg_variant' -p51 -g6 -sS'hgvs_transcript_variant' -p52 -S'NM_012309.4:c.913-5058G>A' -p53 -sS'hgvs_refseqgene_variant' -p54 -g6 -sS'primary_assembly_loci' -p55 -(dp56 -S'grch38' -p57 -(dp58 -g18 -S'NC_000011.10:g.71080333C>T' -p59 -sg20 -(dp60 -g22 -S'11' -p61 -sg24 -g25 -sg26 -S'71080333' -p62 -sg28 -g29 -sssS'hg38' -p63 -(dp64 -g18 -S'NC_000011.10:g.71080333C>T' -p65 -sg20 -(dp66 -g22 -S'chr11' -p67 -sg24 -g25 -sg26 -S'71080333' -p68 -sg28 -g29 -ssssS'reference_sequence_records' -p69 -(dp70 -S'protein' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2' -p72 -sS'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4' -p74 -sssS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant189.txt b/VariantValidator/testing/testOutputsMasterITS/variant189.txt deleted file mode 100644 index 75351fd0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant189.txt +++ /dev/null @@ -1,121 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Automap has extracted possible variant descriptions' -p7 -aS'No transcript definition for (tx_ac=LRG_199t1)' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'hgvs_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'LRG_199t1:c.2376[G>C];[G>C]' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'hgvs_lrg_variant' -p21 -g4 -sS'hgvs_transcript_variant' -p22 -g4 -sS'hgvs_refseqgene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -sS'reference_sequence_records' -p26 -g4 -ssS'flag' -p27 -S'warning' -p28 -sS'validation_warning_2' -p29 -(dp30 -g3 -g4 -sg5 -(lp31 -S'Automap has extracted possible variant descriptions' -p32 -aS'No transcript definition for (tx_ac=LRG_199t1)' -p33 -asg9 -g4 -sg10 -(lp34 -sg12 -g4 -sg13 -g4 -sg14 -(dp35 -g16 -g4 -sg17 -g4 -ssg18 -g19 -sg20 -g4 -sg21 -g4 -sg22 -g4 -sg23 -g4 -sg24 -(dp36 -sg26 -g4 -ssS'metadata' -p37 -(dp38 -S'variantvalidator_hgvs_version' -p39 -S'1.1.3' -p40 -sS'uta_schema' -p41 -S'uta_20180821' -p42 -sS'seqrepo_db' -p43 -S'2018-08-21' -p44 -sS'variantvalidator_version' -p45 -S'v0.2' -p46 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant19.txt b/VariantValidator/testing/testOutputsMasterITS/variant19.txt deleted file mode 100644 index 45468d9c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant19.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000011.9:g.5248381A=' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000518.4:c.-50-80C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant190.txt b/VariantValidator/testing/testOutputsMasterITS/variant190.txt deleted file mode 100644 index 406b2fbf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant190.txt +++ /dev/null @@ -1,23 +0,0 @@ -(dp0 -S'flag' -p1 -NsS'metadata' -p2 -(dp3 -S'variantvalidator_hgvs_version' -p4 -S'1.1.3' -p5 -sS'uta_schema' -p6 -S'uta_20180821' -p7 -sS'seqrepo_db' -p8 -S'2018-08-21' -p9 -sS'variantvalidator_version' -p10 -S'v0.2' -p11 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant191.txt b/VariantValidator/testing/testOutputsMasterITS/variant191.txt deleted file mode 100644 index 406b2fbf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant191.txt +++ /dev/null @@ -1,23 +0,0 @@ -(dp0 -S'flag' -p1 -NsS'metadata' -p2 -(dp3 -S'variantvalidator_hgvs_version' -p4 -S'1.1.3' -p5 -sS'uta_schema' -p6 -S'uta_20180821' -p7 -sS'seqrepo_db' -p8 -S'2018-08-21' -p9 -sS'variantvalidator_version' -p10 -S'v0.2' -p11 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant192.txt b/VariantValidator/testing/testOutputsMasterITS/variant192.txt deleted file mode 100644 index 6241f0af..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant192.txt +++ /dev/null @@ -1,121 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Automap has extracted possible variant descriptions' -p7 -aS'No transcript definition for (tx_ac=LRG_199t1)' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'hgvs_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'LRG_199t1:c.2376G>C(;)3103del' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'hgvs_lrg_variant' -p21 -g4 -sS'hgvs_transcript_variant' -p22 -g4 -sS'hgvs_refseqgene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -sS'reference_sequence_records' -p26 -g4 -ssS'flag' -p27 -S'warning' -p28 -sS'validation_warning_2' -p29 -(dp30 -g3 -g4 -sg5 -(lp31 -S'Automap has extracted possible variant descriptions' -p32 -aS'No transcript definition for (tx_ac=LRG_199t1)' -p33 -asg9 -g4 -sg10 -(lp34 -sg12 -g4 -sg13 -g4 -sg14 -(dp35 -g16 -g4 -sg17 -g4 -ssg18 -g19 -sg20 -g4 -sg21 -g4 -sg22 -g4 -sg23 -g4 -sg24 -(dp36 -sg26 -g4 -ssS'metadata' -p37 -(dp38 -S'variantvalidator_hgvs_version' -p39 -S'1.1.3' -p40 -sS'uta_schema' -p41 -S'uta_20180821' -p42 -sS'seqrepo_db' -p43 -S'2018-08-21' -p44 -sS'variantvalidator_version' -p45 -S'v0.2' -p46 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant193.txt b/VariantValidator/testing/testOutputsMasterITS/variant193.txt deleted file mode 100644 index a6a49aff..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant193.txt +++ /dev/null @@ -1,82 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Automap has extracted possible variant descriptions' -p7 -aS'No transcript definition for (tx_ac=LRG_199t1)' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'hgvs_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'LRG_199t1:c.2376[G>C];[(G>C)]' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'hgvs_lrg_variant' -p21 -g4 -sS'hgvs_transcript_variant' -p22 -g4 -sS'hgvs_refseqgene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -sS'reference_sequence_records' -p26 -g4 -ssS'flag' -p27 -S'warning' -p28 -sS'metadata' -p29 -(dp30 -S'variantvalidator_hgvs_version' -p31 -S'1.1.3' -p32 -sS'uta_schema' -p33 -S'uta_20180821' -p34 -sS'seqrepo_db' -p35 -S'2018-08-21' -p36 -sS'variantvalidator_version' -p37 -S'v0.2' -p38 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant194.txt b/VariantValidator/testing/testOutputsMasterITS/variant194.txt deleted file mode 100644 index 406b2fbf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant194.txt +++ /dev/null @@ -1,23 +0,0 @@ -(dp0 -S'flag' -p1 -NsS'metadata' -p2 -(dp3 -S'variantvalidator_hgvs_version' -p4 -S'1.1.3' -p5 -sS'uta_schema' -p6 -S'uta_20180821' -p7 -sS'seqrepo_db' -p8 -S'2018-08-21' -p9 -sS'variantvalidator_version' -p10 -S'v0.2' -p11 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant195.txt b/VariantValidator/testing/testOutputsMasterITS/variant195.txt deleted file mode 100644 index 406b2fbf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant195.txt +++ /dev/null @@ -1,23 +0,0 @@ -(dp0 -S'flag' -p1 -NsS'metadata' -p2 -(dp3 -S'variantvalidator_hgvs_version' -p4 -S'1.1.3' -p5 -sS'uta_schema' -p6 -S'uta_20180821' -p7 -sS'seqrepo_db' -p8 -S'2018-08-21' -p9 -sS'variantvalidator_version' -p10 -S'v0.2' -p11 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant196.txt b/VariantValidator/testing/testOutputsMasterITS/variant196.txt deleted file mode 100644 index 406b2fbf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant196.txt +++ /dev/null @@ -1,23 +0,0 @@ -(dp0 -S'flag' -p1 -NsS'metadata' -p2 -(dp3 -S'variantvalidator_hgvs_version' -p4 -S'1.1.3' -p5 -sS'uta_schema' -p6 -S'uta_20180821' -p7 -sS'seqrepo_db' -p8 -S'2018-08-21' -p9 -sS'variantvalidator_version' -p10 -S'v0.2' -p11 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant197.txt b/VariantValidator/testing/testOutputsMasterITS/variant197.txt deleted file mode 100644 index e1b8214b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant197.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Intronic positions not supported for HGVS Allele descriptions' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'LRG_199t1:c.[976-20T>A;976-17_976-1dup]' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant198.txt b/VariantValidator/testing/testOutputsMasterITS/variant198.txt deleted file mode 100644 index e445129d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant198.txt +++ /dev/null @@ -1,606 +0,0 @@ -(dp0 -S'NM_015102.3:c.2818-2T>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_015102.3 is available (NM_015102.4)' -p7 -aS'NM_015102.4:c.2818-2T>A MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_015102.4:c.2818-2T>A' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens nephronophthisis 4 (NPHP4), mRNA -p15 -sS'gene_symbol' -p16 -S'NPHP4' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_055917.1:p.?' -p21 -sS'slr' -p22 -S'NP_055917.1:p.?' -p23 -ssS'submitted_variant' -p24 -S'1-5935162-A-T' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000001.10(NM_015102.3):c.2818-2T>A' -p27 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_015102.3:c.2818-2T>A' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000001.10:g.5935162A>T' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr1' -p41 -sS'ref' -p42 -VA -p43 -sS'pos' -p44 -S'5935162' -p45 -sS'alt' -p46 -VT -p47 -sssS'grch37' -p48 -(dp49 -g36 -S'NC_000001.10:g.5935162A>T' -p50 -sg38 -(dp51 -g40 -S'1' -p52 -sg42 -g43 -sg44 -S'5935162' -p53 -sg46 -g47 -ssssS'reference_sequence_records' -p54 -(dp55 -S'protein' -p56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1' -p57 -sS'transcript' -p58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3' -p59 -sssS'NM_001291593.1:c.1279-2T>A' -p60 -(dp61 -g3 -g4 -sg5 -(lp62 -S'RefSeqGene record not available' -p63 -asg11 -g4 -sg12 -(lp64 -sg14 -VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 2, mRNA -p65 -sg16 -S'NPHP4' -p66 -sg18 -(dp67 -g20 -S'NP_001278522.1:p.?' -p68 -sg22 -S'NP_001278522.1:p.?' -p69 -ssg24 -g25 -sg26 -S'NC_000001.10(NM_001291593.1):c.1279-2T>A' -p70 -sg28 -g4 -sg29 -S'NM_001291593.1:c.1279-2T>A' -p71 -sg31 -g4 -sg32 -(dp72 -S'hg19' -p73 -(dp74 -g36 -S'NC_000001.10:g.5935162A>T' -p75 -sg38 -(dp76 -g40 -g41 -sg42 -g43 -sg44 -S'5935162' -p77 -sg46 -g47 -sssS'hg38' -p78 -(dp79 -g36 -S'NC_000001.11:g.5875102T=' -p80 -sg38 -(dp81 -g40 -g41 -sg42 -S'T' -p82 -sg44 -S'5875102' -p83 -sg46 -g82 -sssS'grch37' -p84 -(dp85 -g36 -S'NC_000001.10:g.5935162A>T' -p86 -sg38 -(dp87 -g40 -g52 -sg42 -g43 -sg44 -S'5935162' -p88 -sg46 -g47 -sssS'grch38' -p89 -(dp90 -g36 -S'NC_000001.11:g.5875102T=' -p91 -sg38 -(dp92 -g40 -g52 -sg42 -g82 -sg44 -S'5875102' -p93 -sg46 -g82 -ssssg54 -(dp94 -g56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278522.1' -p95 -sg58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291593.1' -p96 -sssS'NM_015102.4:c.2818-2T>A' -p97 -(dp98 -g3 -g4 -sg5 -(lp99 -S'RefSeqGene record not available' -p100 -asg11 -g4 -sg12 -(lp101 -sg14 -VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 1, mRNA -p102 -sg16 -S'NPHP4' -p103 -sg18 -(dp104 -g20 -S'NP_055917.1:p.?' -p105 -sg22 -S'NP_055917.1:p.?' -p106 -ssg24 -g25 -sg26 -S'NC_000001.10(NM_015102.4):c.2818-2T>A' -p107 -sg28 -g4 -sg29 -S'NM_015102.4:c.2818-2T>A' -p108 -sg31 -g4 -sg32 -(dp109 -S'hg19' -p110 -(dp111 -g36 -S'NC_000001.10:g.5935162A>T' -p112 -sg38 -(dp113 -g40 -g41 -sg42 -g43 -sg44 -S'5935162' -p114 -sg46 -g47 -sssg78 -(dp115 -g36 -S'NC_000001.11:g.5875102T=' -p116 -sg38 -(dp117 -g40 -g41 -sg42 -g82 -sg44 -S'5875102' -p118 -sg46 -g82 -sssS'grch37' -p119 -(dp120 -g36 -S'NC_000001.10:g.5935162A>T' -p121 -sg38 -(dp122 -g40 -g52 -sg42 -g43 -sg44 -S'5935162' -p123 -sg46 -g47 -sssS'grch38' -p124 -(dp125 -g36 -S'NC_000001.11:g.5875102T=' -p126 -sg38 -(dp127 -g40 -g52 -sg42 -g82 -sg44 -S'5875102' -p128 -sg46 -g82 -ssssg54 -(dp129 -g56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1' -p130 -sg58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.4' -p131 -sssS'NM_001291594.1:c.1282-2T>A' -p132 -(dp133 -g3 -g4 -sg5 -(lp134 -S'RefSeqGene record not available' -p135 -asg11 -g4 -sg12 -(lp136 -sg14 -VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 3, mRNA -p137 -sg16 -S'NPHP4' -p138 -sg18 -(dp139 -g20 -S'NP_001278523.1:p.?' -p140 -sg22 -S'NP_001278523.1:p.?' -p141 -ssg24 -g25 -sg26 -S'NC_000001.10(NM_001291594.1):c.1282-2T>A' -p142 -sg28 -g4 -sg29 -S'NM_001291594.1:c.1282-2T>A' -p143 -sg31 -g4 -sg32 -(dp144 -S'hg19' -p145 -(dp146 -g36 -S'NC_000001.10:g.5935162A>T' -p147 -sg38 -(dp148 -g40 -g41 -sg42 -g43 -sg44 -S'5935162' -p149 -sg46 -g47 -sssg78 -(dp150 -g36 -S'NC_000001.11:g.5875102T=' -p151 -sg38 -(dp152 -g40 -g41 -sg42 -g82 -sg44 -S'5875102' -p153 -sg46 -g82 -sssS'grch37' -p154 -(dp155 -g36 -S'NC_000001.10:g.5935162A>T' -p156 -sg38 -(dp157 -g40 -g52 -sg42 -g43 -sg44 -S'5935162' -p158 -sg46 -g47 -sssS'grch38' -p159 -(dp160 -g36 -S'NC_000001.11:g.5875102T=' -p161 -sg38 -(dp162 -g40 -g52 -sg42 -g82 -sg44 -S'5875102' -p163 -sg46 -g82 -ssssg54 -(dp164 -g56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278523.1' -p165 -sg58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291594.1' -p166 -sssS'flag' -p167 -S'gene_variant' -p168 -sS'NR_111987.1:n.3633-2T>A' -p169 -(dp170 -g3 -g4 -sg5 -(lp171 -S'RefSeqGene record not available' -p172 -asg11 -g4 -sg12 -(lp173 -sg14 -VHomo sapiens nephrocystin 4 (NPHP4), transcript variant 4, non-coding RNA -p174 -sg16 -S'NPHP4' -p175 -sg18 -(dp176 -g20 -S'Non-coding :n.' -p177 -sg22 -g177 -ssg24 -g25 -sg26 -S'NC_000001.10(NR_111987.1):c.3633-2T>A' -p178 -sg28 -g4 -sg29 -S'NR_111987.1:n.3633-2T>A' -p179 -sg31 -g4 -sg32 -(dp180 -S'hg19' -p181 -(dp182 -g36 -S'NC_000001.10:g.5935162A>T' -p183 -sg38 -(dp184 -g40 -g41 -sg42 -g43 -sg44 -S'5935162' -p185 -sg46 -g47 -sssg78 -(dp186 -g36 -S'NC_000001.11:g.5875102T=' -p187 -sg38 -(dp188 -g40 -g41 -sg42 -g82 -sg44 -S'5875102' -p189 -sg46 -g82 -sssS'grch37' -p190 -(dp191 -g36 -S'NC_000001.10:g.5935162A>T' -p192 -sg38 -(dp193 -g40 -g52 -sg42 -g43 -sg44 -S'5935162' -p194 -sg46 -g47 -sssS'grch38' -p195 -(dp196 -g36 -S'NC_000001.11:g.5875102T=' -p197 -sg38 -(dp198 -g40 -g52 -sg42 -g82 -sg44 -S'5875102' -p199 -sg46 -g82 -ssssg54 -(dp200 -g58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_111987.1' -p201 -sssS'metadata' -p202 -(dp203 -S'variantvalidator_hgvs_version' -p204 -S'1.1.3' -p205 -sS'uta_schema' -p206 -S'uta_20180821' -p207 -sS'seqrepo_db' -p208 -S'2018-08-21' -p209 -sS'variantvalidator_version' -p210 -S'v0.2' -p211 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant199.txt b/VariantValidator/testing/testOutputsMasterITS/variant199.txt deleted file mode 100644 index 7ff7a9a6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant199.txt +++ /dev/null @@ -1,286 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001127660.1:c.1676C>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens mitofusin 2 (MFN2), transcript variant 2, mRNA -p14 -sS'gene_symbol' -p15 -S'MFN2' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001121132.1:p.(Pro559Leu)' -p20 -sS'slr' -p21 -S'NP_001121132.1:p.(P559L)' -p22 -ssS'submitted_variant' -p23 -S'1-12065948-C-T' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_001127660.1:c.1676C>T' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000001.10:g.12065948C>T' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -S'C' -p41 -sS'pos' -p42 -S'12065948' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000001.11:g.12005891C>T' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'12005891' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000001.10:g.12065948C>T' -p53 -sg36 -(dp54 -g38 -S'1' -p55 -sg40 -g41 -sg42 -S'12065948' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000001.11:g.12005891C>T' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'12005891' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001121132.1' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001127660.1' -p67 -sssS'NM_014874.3:c.1676C>T' -p68 -(dp69 -g5 -g6 -sg7 -(lp70 -S'RefSeqGene record not available' -p71 -asg10 -g6 -sg11 -(lp72 -sg13 -VHomo sapiens mitofusin 2 (MFN2), transcript variant 1, mRNA -p73 -sg15 -S'MFN2' -p74 -sg17 -(dp75 -g19 -S'NP_055689.1:p.(Pro559Leu)' -p76 -sg21 -S'NP_055689.1:p.(P559L)' -p77 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_014874.3:c.1676C>T' -p78 -sg29 -g6 -sg30 -(dp79 -S'hg19' -p80 -(dp81 -g34 -S'NC_000001.10:g.12065948C>T' -p82 -sg36 -(dp83 -g38 -g39 -sg40 -g41 -sg42 -S'12065948' -p84 -sg44 -g45 -sssg46 -(dp85 -g34 -S'NC_000001.11:g.12005891C>T' -p86 -sg36 -(dp87 -g38 -g39 -sg40 -g41 -sg42 -S'12005891' -p88 -sg44 -g45 -sssS'grch37' -p89 -(dp90 -g34 -S'NC_000001.10:g.12065948C>T' -p91 -sg36 -(dp92 -g38 -g55 -sg40 -g41 -sg42 -S'12065948' -p93 -sg44 -g45 -sssS'grch38' -p94 -(dp95 -g34 -S'NC_000001.11:g.12005891C>T' -p96 -sg36 -(dp97 -g38 -g55 -sg40 -g41 -sg42 -S'12005891' -p98 -sg44 -g45 -ssssg62 -(dp99 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055689.1' -p100 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014874.3' -p101 -sssS'metadata' -p102 -(dp103 -S'variantvalidator_hgvs_version' -p104 -S'1.1.3' -p105 -sS'uta_schema' -p106 -S'uta_20180821' -p107 -sS'seqrepo_db' -p108 -S'2018-08-21' -p109 -sS'variantvalidator_version' -p110 -S'v0.2' -p111 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant2.txt b/VariantValidator/testing/testOutputsMasterITS/variant2.txt deleted file mode 100644 index b4c0701c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant2.txt +++ /dev/null @@ -1,174 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_015120.4:c.39G>C' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p14 -sS'gene_symbol' -p15 -S'ALMS1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_055935.4:p.(Glu13Asp)' -p20 -sS'slr' -p21 -S'NP_055935.4:p.(E13D)' -p22 -ssS'submitted_variant' -p23 -S'NM_015120.4:c.39G>C' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_015120.4:c.39G>C' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000002.11:g.73613034_73613035insCGA' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr2' -p39 -sS'ref' -p40 -S'G' -p41 -sS'pos' -p42 -S'73613032' -p43 -sS'alt' -p44 -S'GGAC' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000002.12:g.73385906_73385907insCGA' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'73385904' -p50 -sg44 -S'GGAC' -p51 -sssS'grch37' -p52 -(dp53 -g34 -S'NC_000002.11:g.73613034_73613035insCGA' -p54 -sg36 -(dp55 -g38 -S'2' -p56 -sg40 -g41 -sg42 -S'73613032' -p57 -sg44 -S'GGAC' -p58 -sssS'grch38' -p59 -(dp60 -g34 -S'NC_000002.12:g.73385906_73385907insCGA' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -g41 -sg42 -S'73385904' -p63 -sg44 -S'GGAC' -p64 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' -p70 -sssS'metadata' -p71 -(dp72 -S'variantvalidator_hgvs_version' -p73 -S'1.1.3' -p74 -sS'uta_schema' -p75 -S'uta_20180821' -p76 -sS'seqrepo_db' -p77 -S'2018-08-21' -p78 -sS'variantvalidator_version' -p79 -S'v0.2' -p80 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant20.txt b/VariantValidator/testing/testOutputsMasterITS/variant20.txt deleted file mode 100644 index 0a044332..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant20.txt +++ /dev/null @@ -1,82 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' -p7 -aS'Instead use NC_000011.9:g.5246486_5246956delinsAAGTAG' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'hgvs_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_000518.4:c.316_*342delinsCTACTT' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'hgvs_lrg_variant' -p21 -g4 -sS'hgvs_transcript_variant' -p22 -g4 -sS'hgvs_refseqgene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -sS'reference_sequence_records' -p26 -g4 -ssS'flag' -p27 -S'warning' -p28 -sS'metadata' -p29 -(dp30 -S'variantvalidator_hgvs_version' -p31 -S'1.1.3' -p32 -sS'uta_schema' -p33 -S'uta_20180821' -p34 -sS'seqrepo_db' -p35 -S'2018-08-21' -p36 -sS'variantvalidator_version' -p37 -S'v0.2' -p38 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant200.txt b/VariantValidator/testing/testOutputsMasterITS/variant200.txt deleted file mode 100644 index 8f017a82..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant200.txt +++ /dev/null @@ -1,543 +0,0 @@ -(dp0 -S'NM_001290129.1:c.1829+5_1829+8del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 3, mRNA -p13 -sS'gene_symbol' -p14 -S'POMGNT1' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001277058.1:p.?' -p19 -sS'slr' -p20 -S'NP_001277058.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'1-46655125-CTCAC-C' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000001.10(NM_001290129.1):c.1829+5_1829+8del' -p25 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_001290129.1:c.1829+5_1829+8del' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000001.10:g.46655122_46655125del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -S'GTCAC' -p41 -sS'pos' -p42 -S'46655121' -p43 -sS'alt' -p44 -S'G' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000001.11:g.46189450_46189453del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'GTCAC' -p50 -sg42 -S'46189449' -p51 -sg44 -g45 -sssS'grch37' -p52 -(dp53 -g34 -S'NC_000001.10:g.46655122_46655125del' -p54 -sg36 -(dp55 -g38 -S'1' -p56 -sg40 -S'GTCAC' -p57 -sg42 -S'46655121' -p58 -sg44 -g45 -sssS'grch38' -p59 -(dp60 -g34 -S'NC_000001.11:g.46189450_46189453del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'GTCAC' -p63 -sg42 -S'46189449' -p64 -sg44 -g45 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277058.1' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290129.1' -p70 -sssS'NM_001290130.1:c.1466+5_1466+8del' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' -p74 -aS'RefSeqGene record not available' -p75 -asg9 -g4 -sg10 -(lp76 -sg12 -VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 4, mRNA -p77 -sg14 -S'POMGNT1' -p78 -sg16 -(dp79 -g18 -S'NP_001277059.1:p.?' -p80 -sg20 -S'NP_001277059.1:p.?' -p81 -ssg22 -g23 -sg24 -S'NC_000001.10(NM_001290130.1):c.1466+5_1466+8del' -p82 -sg26 -g4 -sg27 -S'NM_001290130.1:c.1466+5_1466+8del' -p83 -sg29 -g4 -sg30 -(dp84 -S'hg19' -p85 -(dp86 -g34 -S'NC_000001.10:g.46655122_46655125del' -p87 -sg36 -(dp88 -g38 -g39 -sg40 -S'GTCAC' -p89 -sg42 -S'46655121' -p90 -sg44 -g45 -sssg46 -(dp91 -g34 -S'NC_000001.11:g.46189450_46189453del' -p92 -sg36 -(dp93 -g38 -g39 -sg40 -S'GTCAC' -p94 -sg42 -S'46189449' -p95 -sg44 -g45 -sssS'grch37' -p96 -(dp97 -g34 -S'NC_000001.10:g.46655122_46655125del' -p98 -sg36 -(dp99 -g38 -g56 -sg40 -S'GTCAC' -p100 -sg42 -S'46655121' -p101 -sg44 -g45 -sssS'grch38' -p102 -(dp103 -g34 -S'NC_000001.11:g.46189450_46189453del' -p104 -sg36 -(dp105 -g38 -g56 -sg40 -S'GTCAC' -p106 -sg42 -S'46189449' -p107 -sg44 -g45 -ssssg65 -(dp108 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277059.1' -p109 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290130.1' -p110 -sssS'NM_017739.3:c.1895+5_1895+8del' -p111 -(dp112 -g3 -g4 -sg5 -(lp113 -S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' -p114 -aS'RefSeqGene record not available' -p115 -asg9 -g4 -sg10 -(lp116 -sg12 -VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 1, mRNA -p117 -sg14 -S'POMGNT1' -p118 -sg16 -(dp119 -g18 -S'NP_060209.3:p.?' -p120 -sg20 -S'NP_060209.3:p.?' -p121 -ssg22 -g23 -sg24 -S'NC_000001.10(NM_017739.3):c.1895+5_1895+8del' -p122 -sg26 -g4 -sg27 -S'NM_017739.3:c.1895+5_1895+8del' -p123 -sg29 -g4 -sg30 -(dp124 -S'hg19' -p125 -(dp126 -g34 -S'NC_000001.10:g.46655122_46655125del' -p127 -sg36 -(dp128 -g38 -g39 -sg40 -S'GTCAC' -p129 -sg42 -S'46655121' -p130 -sg44 -g45 -sssg46 -(dp131 -g34 -S'NC_000001.11:g.46189450_46189453del' -p132 -sg36 -(dp133 -g38 -g39 -sg40 -S'GTCAC' -p134 -sg42 -S'46189449' -p135 -sg44 -g45 -sssS'grch37' -p136 -(dp137 -g34 -S'NC_000001.10:g.46655122_46655125del' -p138 -sg36 -(dp139 -g38 -g56 -sg40 -S'GTCAC' -p140 -sg42 -S'46655121' -p141 -sg44 -g45 -sssS'grch38' -p142 -(dp143 -g34 -S'NC_000001.11:g.46189450_46189453del' -p144 -sg36 -(dp145 -g38 -g56 -sg40 -S'GTCAC' -p146 -sg42 -S'46189449' -p147 -sg44 -g45 -ssssg65 -(dp148 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_060209.3' -p149 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_017739.3' -p150 -sssS'NM_001243766.1:c.1869+31_1869+34del' -p151 -(dp152 -g3 -g4 -sg5 -(lp153 -S'NC_000001.10:g.46655125CTCAC>C automapped to NC_000001.10:g.46655126_46655129del' -p154 -aS'RefSeqGene record not available' -p155 -asg9 -g4 -sg10 -(lp156 -sg12 -VHomo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 2, mRNA -p157 -sg14 -S'POMGNT1' -p158 -sg16 -(dp159 -g18 -S'NP_001230695.1:p.?' -p160 -sg20 -S'NP_001230695.1:p.?' -p161 -ssg22 -g23 -sg24 -S'NC_000001.10(NM_001243766.1):c.1869+31_1869+34del' -p162 -sg26 -g4 -sg27 -S'NM_001243766.1:c.1869+31_1869+34del' -p163 -sg29 -g4 -sg30 -(dp164 -S'hg19' -p165 -(dp166 -g34 -S'NC_000001.10:g.46655122_46655125del' -p167 -sg36 -(dp168 -g38 -g39 -sg40 -S'GTCAC' -p169 -sg42 -S'46655121' -p170 -sg44 -g45 -sssg46 -(dp171 -g34 -S'NC_000001.11:g.46189450_46189453del' -p172 -sg36 -(dp173 -g38 -g39 -sg40 -S'GTCAC' -p174 -sg42 -S'46189449' -p175 -sg44 -g45 -sssS'grch37' -p176 -(dp177 -g34 -S'NC_000001.10:g.46655122_46655125del' -p178 -sg36 -(dp179 -g38 -g56 -sg40 -S'GTCAC' -p180 -sg42 -S'46655121' -p181 -sg44 -g45 -sssS'grch38' -p182 -(dp183 -g34 -S'NC_000001.11:g.46189450_46189453del' -p184 -sg36 -(dp185 -g38 -g56 -sg40 -S'GTCAC' -p186 -sg42 -S'46189449' -p187 -sg44 -g45 -ssssg65 -(dp188 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230695.1' -p189 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243766.1' -p190 -sssS'flag' -p191 -S'gene_variant' -p192 -sS'metadata' -p193 -(dp194 -S'variantvalidator_hgvs_version' -p195 -S'1.1.3' -p196 -sS'uta_schema' -p197 -S'uta_20180821' -p198 -sS'seqrepo_db' -p199 -S'2018-08-21' -p200 -sS'variantvalidator_version' -p201 -S'v0.2' -p202 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant201.txt b/VariantValidator/testing/testOutputsMasterITS/variant201.txt deleted file mode 100644 index 7ea368a7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant201.txt +++ /dev/null @@ -1,176 +0,0 @@ -(dp0 -S'NM_000329.2:c.106_114del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000001.10:g.68912523TGAGCCAGAG>T automapped to NC_000001.10:g.68912525_68912533del' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens RPE65, retinoid isomerohydrolase (RPE65), mRNA -p13 -sS'gene_symbol' -p14 -S'RPE65' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000320.1:p.(Leu36_Leu38del)' -p19 -sS'slr' -p20 -S'NP_000320.1:p.(L36_L38del)' -p21 -ssS'submitted_variant' -p22 -S'1-68912523-TGAGCCAGAG-T' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_000329.2:c.106_114del' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000001.10:g.68912524_68912532del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr1' -p38 -sS'ref' -p39 -S'TGAGCCAGAG' -p40 -sS'pos' -p41 -S'68912523' -p42 -sS'alt' -p43 -S'T' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000001.11:g.68446841_68446849del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TGAGCCAGAG' -p49 -sg41 -S'68446840' -p50 -sg43 -g44 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000001.10:g.68912524_68912532del' -p53 -sg35 -(dp54 -g37 -S'1' -p55 -sg39 -S'TGAGCCAGAG' -p56 -sg41 -S'68912523' -p57 -sg43 -g44 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000001.11:g.68446841_68446849del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'TGAGCCAGAG' -p62 -sg41 -S'68446840' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2' -p69 -sssS'flag' -p70 -S'gene_variant' -p71 -sS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant202.txt b/VariantValidator/testing/testOutputsMasterITS/variant202.txt deleted file mode 100644 index 7c710760..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant202.txt +++ /dev/null @@ -1,176 +0,0 @@ -(dp0 -S'NM_000329.2:c.109_114del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000001.10:g.68912526GCCAGAG>G automapped to NC_000001.10:g.68912527_68912532del' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens RPE65, retinoid isomerohydrolase (RPE65), mRNA -p13 -sS'gene_symbol' -p14 -S'RPE65' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000320.1:p.(Trp37_Leu38del)' -p19 -sS'slr' -p20 -S'NP_000320.1:p.(W37_L38del)' -p21 -ssS'submitted_variant' -p22 -S'1-68912526-GCCAGAG-G' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_000329.2:c.109_114del' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000001.10:g.68912524_68912529del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr1' -p38 -sS'ref' -p39 -S'TGAGCCA' -p40 -sS'pos' -p41 -S'68912523' -p42 -sS'alt' -p43 -S'T' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000001.11:g.68446841_68446846del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TGAGCCA' -p49 -sg41 -S'68446840' -p50 -sg43 -g44 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000001.10:g.68912524_68912529del' -p53 -sg35 -(dp54 -g37 -S'1' -p55 -sg39 -S'TGAGCCA' -p56 -sg41 -S'68912523' -p57 -sg43 -g44 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000001.11:g.68446841_68446846del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'TGAGCCA' -p62 -sg41 -S'68446840' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2' -p69 -sssS'flag' -p70 -S'gene_variant' -p71 -sS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant203.txt b/VariantValidator/testing/testOutputsMasterITS/variant203.txt deleted file mode 100644 index e99fdc71..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant203.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001408.2:c.*919G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens cadherin EGF LAG seven-pass G-type receptor 2 (CELSR2), mRNA -p14 -sS'gene_symbol' -p15 -S'CELSR2' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001399.1:p.?' -p20 -sS'slr' -p21 -S'NP_001399.1:p.?' -p22 -ssS'submitted_variant' -p23 -S'1-109817590-G-T' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_001408.2:c.*919G>T' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000001.10:g.109817590G>T' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -S'G' -p41 -sS'pos' -p42 -S'109817590' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000001.11:g.109274968G>T' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'109274968' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000001.10:g.109817590G>T' -p53 -sg36 -(dp54 -g38 -S'1' -p55 -sg40 -g41 -sg42 -S'109817590' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000001.11:g.109274968G>T' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'109274968' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001399.1' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001408.2' -p67 -sssS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant204.txt b/VariantValidator/testing/testOutputsMasterITS/variant204.txt deleted file mode 100644 index 2f2f8599..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant204.txt +++ /dev/null @@ -1,511 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_006468.6:c.1070+35_1070+38del' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'NC_000001.10:g.145597475GAAGT>G automapped to NC_000001.10:g.145597477_145597480del' -p19 -aS'A more recent version of the selected reference sequence NM_006468.6 is available (NM_006468.7)' -p20 -aS'NM_006468.7:c.1070+35_1070+38del MUST be fully validated prior to use in reports' -p21 -aS'select_variants=NM_006468.7:c.1070+35_1070+38del' -p22 -aS'RefSeqGene record not available' -p23 -asS'refseqgene_context_intronic_sequence' -p24 -g16 -sS'alt_genomic_loci' -p25 -(lp26 -(dp27 -S'grch37' -p28 -(dp29 -S'hgvs_genomic_description' -p30 -S'NW_003871055.3:g.2653044_2653047del' -p31 -sS'vcf' -p32 -(dp33 -S'chr' -p34 -S'HG1287_PATCH' -p35 -sS'ref' -p36 -S'ATACT' -p37 -sS'pos' -p38 -S'2653042' -p39 -sS'alt' -p40 -S'A' -p41 -sssa(dp42 -S'hg19' -p43 -(dp44 -g30 -S'NW_003871055.3:g.2653044_2653047del' -p45 -sg32 -(dp46 -g34 -S'NW_003871055.3' -p47 -sg36 -S'ATACT' -p48 -sg38 -S'2653042' -p49 -sg40 -g41 -sssasS'transcript_description' -p50 -VHomo sapiens polymerase (RNA) III (DNA directed) polypeptide C (62kD) (POLR3C), mRNA -p51 -sS'gene_symbol' -p52 -S'POLR3C' -p53 -sS'hgvs_predicted_protein_consequence' -p54 -(dp55 -S'tlr' -p56 -S'NP_006459.3:p.?' -p57 -sS'slr' -p58 -S'NP_006459.3:p.?' -p59 -ssS'submitted_variant' -p60 -S'1-145597475-GAAGT-G' -p61 -sS'genome_context_intronic_sequence' -p62 -S'NC_000001.10(NM_006468.6):c.1070+35_1070+38del' -p63 -sS'hgvs_lrg_variant' -p64 -g16 -sS'hgvs_transcript_variant' -p65 -S'NM_006468.6:c.1070+35_1070+38del' -p66 -sS'hgvs_refseqgene_variant' -p67 -g16 -sS'primary_assembly_loci' -p68 -(dp69 -S'hg19' -p70 -(dp71 -g30 -S'NC_000001.10:g.145597477_145597480del' -p72 -sg32 -(dp73 -g34 -S'chr1' -p74 -sg36 -S'GAAGT' -p75 -sg38 -S'145597475' -p76 -sg40 -S'G' -p77 -sssS'grch37' -p78 -(dp79 -g30 -S'NC_000001.10:g.145597477_145597480del' -p80 -sg32 -(dp81 -g34 -S'1' -p82 -sg36 -S'GAAGT' -p83 -sg38 -S'145597475' -p84 -sg40 -g77 -ssssS'reference_sequence_records' -p85 -(dp86 -S'protein' -p87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006459.3' -p88 -sS'transcript' -p89 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006468.6' -p90 -sssS'NM_001303456.1:c.1109+35_1109+38del' -p91 -(dp92 -g15 -g16 -sg17 -(lp93 -S'NC_000001.10:g.145597475GAAGT>G automapped to NC_000001.10:g.145597477_145597480del' -p94 -aS'RefSeqGene record not available' -p95 -asg24 -g16 -sg25 -(lp96 -(dp97 -S'grch37' -p98 -(dp99 -g30 -S'NW_003871055.3:g.2653044_2653047del' -p100 -sg32 -(dp101 -g34 -g35 -sg36 -S'ATACT' -p102 -sg38 -S'2653042' -p103 -sg40 -g41 -sssa(dp104 -S'hg19' -p105 -(dp106 -g30 -S'NW_003871055.3:g.2653044_2653047del' -p107 -sg32 -(dp108 -g34 -S'NW_003871055.3' -p109 -sg36 -S'ATACT' -p110 -sg38 -S'2653042' -p111 -sg40 -g41 -sssasg50 -VHomo sapiens RNA polymerase III subunit C (POLR3C), transcript variant 2, mRNA -p112 -sg52 -S'POLR3C' -p113 -sg54 -(dp114 -g56 -S'NP_001290385.1:p.?' -p115 -sg58 -S'NP_001290385.1:p.?' -p116 -ssg60 -g61 -sg62 -S'NC_000001.10(NM_001303456.1):c.1109+35_1109+38del' -p117 -sg64 -g16 -sg65 -S'NM_001303456.1:c.1109+35_1109+38del' -p118 -sg67 -g16 -sg68 -(dp119 -S'hg19' -p120 -(dp121 -g30 -S'NC_000001.10:g.145597477_145597480del' -p122 -sg32 -(dp123 -g34 -g74 -sg36 -S'GAAGT' -p124 -sg38 -S'145597475' -p125 -sg40 -g77 -sssS'hg38' -p126 -(dp127 -g30 -S'NC_000001.11:g.145837631_145837634del' -p128 -sg32 -(dp129 -g34 -g74 -sg36 -S'ATACT' -p130 -sg38 -S'145837629' -p131 -sg40 -g41 -sssS'grch37' -p132 -(dp133 -g30 -S'NC_000001.10:g.145597477_145597480del' -p134 -sg32 -(dp135 -g34 -g82 -sg36 -S'GAAGT' -p136 -sg38 -S'145597475' -p137 -sg40 -g77 -sssS'grch38' -p138 -(dp139 -g30 -S'NC_000001.11:g.145837631_145837634del' -p140 -sg32 -(dp141 -g34 -g82 -sg36 -S'ATACT' -p142 -sg38 -S'145837629' -p143 -sg40 -g41 -ssssg85 -(dp144 -g87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001290385.1' -p145 -sg89 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001303456.1' -p146 -sssS'NM_006468.7:c.1070+35_1070+38del' -p147 -(dp148 -g15 -g16 -sg17 -(lp149 -S'NC_000001.10:g.145597475GAAGT>G automapped to NC_000001.10:g.145597477_145597480del' -p150 -aS'RefSeqGene record not available' -p151 -asg24 -g16 -sg25 -(lp152 -(dp153 -S'grch37' -p154 -(dp155 -g30 -S'NW_003871055.3:g.2653044_2653047del' -p156 -sg32 -(dp157 -g34 -g35 -sg36 -S'ATACT' -p158 -sg38 -S'2653042' -p159 -sg40 -g41 -sssa(dp160 -S'hg19' -p161 -(dp162 -g30 -S'NW_003871055.3:g.2653044_2653047del' -p163 -sg32 -(dp164 -g34 -S'NW_003871055.3' -p165 -sg36 -S'ATACT' -p166 -sg38 -S'2653042' -p167 -sg40 -g41 -sssasg50 -VHomo sapiens RNA polymerase III subunit C (POLR3C), transcript variant 1, mRNA -p168 -sg52 -S'POLR3C' -p169 -sg54 -(dp170 -g56 -S'NP_006459.3:p.?' -p171 -sg58 -S'NP_006459.3:p.?' -p172 -ssg60 -g61 -sg62 -S'NC_000001.10(NM_006468.7):c.1070+35_1070+38del' -p173 -sg64 -g16 -sg65 -S'NM_006468.7:c.1070+35_1070+38del' -p174 -sg67 -g16 -sg68 -(dp175 -S'hg19' -p176 -(dp177 -g30 -S'NC_000001.10:g.145597477_145597480del' -p178 -sg32 -(dp179 -g34 -g74 -sg36 -S'GAAGT' -p180 -sg38 -S'145597475' -p181 -sg40 -g77 -sssg126 -(dp182 -g30 -S'NC_000001.11:g.145837631_145837634del' -p183 -sg32 -(dp184 -g34 -g74 -sg36 -S'ATACT' -p185 -sg38 -S'145837629' -p186 -sg40 -g41 -sssS'grch37' -p187 -(dp188 -g30 -S'NC_000001.10:g.145597477_145597480del' -p189 -sg32 -(dp190 -g34 -g82 -sg36 -S'GAAGT' -p191 -sg38 -S'145597475' -p192 -sg40 -g77 -sssS'grch38' -p193 -(dp194 -g30 -S'NC_000001.11:g.145837631_145837634del' -p195 -sg32 -(dp196 -g34 -g82 -sg36 -S'ATACT' -p197 -sg38 -S'145837629' -p198 -sg40 -g41 -ssssg85 -(dp199 -g87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006459.3' -p200 -sg89 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006468.7' -p201 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant205.txt b/VariantValidator/testing/testOutputsMasterITS/variant205.txt deleted file mode 100644 index f6eebae5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant205.txt +++ /dev/null @@ -1,303 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020699.2:c.562_563del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000001.10:g.153791300CTG>C automapped to NC_000001.10:g.153791302_153791303delGT' -p9 -aS'A more recent version of the selected reference sequence NM_020699.2 is available (NM_020699.3)' -p10 -aS'NM_020699.3:c.562_563delCA MUST be fully validated prior to use in reports' -p11 -aS'select_variants=NM_020699.3:c.562_563del' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA -p18 -sS'gene_symbol' -p19 -S'GATAD2B' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_065750.1:p.(Gln188GlufsTer36)' -p24 -sS'slr' -p25 -S'NP_065750.1:p.(Q188Efs*36)' -p26 -ssS'submitted_variant' -p27 -S'1-153791300-CTG-C' -p28 -sS'genome_context_intronic_sequence' -p29 -g6 -sS'hgvs_lrg_variant' -p30 -g6 -sS'hgvs_transcript_variant' -p31 -S'NM_020699.2:c.562_563del' -p32 -sS'hgvs_refseqgene_variant' -p33 -g6 -sS'primary_assembly_loci' -p34 -(dp35 -S'hg19' -p36 -(dp37 -S'hgvs_genomic_description' -p38 -S'NC_000001.10:g.153791301_153791302del' -p39 -sS'vcf' -p40 -(dp41 -S'chr' -p42 -S'chr1' -p43 -sS'ref' -p44 -S'CTG' -p45 -sS'pos' -p46 -S'153791300' -p47 -sS'alt' -p48 -S'C' -p49 -sssS'hg38' -p50 -(dp51 -g38 -S'NC_000001.11:g.153818825_153818826del' -p52 -sg40 -(dp53 -g42 -g43 -sg44 -S'CTG' -p54 -sg46 -S'153818824' -p55 -sg48 -g49 -sssS'grch37' -p56 -(dp57 -g38 -S'NC_000001.10:g.153791301_153791302del' -p58 -sg40 -(dp59 -g42 -S'1' -p60 -sg44 -S'CTG' -p61 -sg46 -S'153791300' -p62 -sg48 -g49 -sssS'grch38' -p63 -(dp64 -g38 -S'NC_000001.11:g.153818825_153818826del' -p65 -sg40 -(dp66 -g42 -g60 -sg44 -S'CTG' -p67 -sg46 -S'153818824' -p68 -sg48 -g49 -ssssS'reference_sequence_records' -p69 -(dp70 -S'protein' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1' -p72 -sS'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2' -p74 -sssS'NM_020699.3:c.562_563del' -p75 -(dp76 -g5 -g6 -sg7 -(lp77 -S'NC_000001.10:g.153791300CTG>C automapped to NC_000001.10:g.153791302_153791303delGT' -p78 -aS'RefSeqGene record not available' -p79 -asg14 -g6 -sg15 -(lp80 -sg17 -VHomo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA -p81 -sg19 -S'GATAD2B' -p82 -sg21 -(dp83 -g23 -S'NP_065750.1:p.(Gln188GlufsTer36)' -p84 -sg25 -S'NP_065750.1:p.(Q188Efs*36)' -p85 -ssg27 -g28 -sg29 -g6 -sg30 -g6 -sg31 -S'NM_020699.3:c.562_563del' -p86 -sg33 -g6 -sg34 -(dp87 -S'hg19' -p88 -(dp89 -g38 -S'NC_000001.10:g.153791301_153791302del' -p90 -sg40 -(dp91 -g42 -g43 -sg44 -S'CTG' -p92 -sg46 -S'153791300' -p93 -sg48 -g49 -sssg50 -(dp94 -g38 -S'NC_000001.11:g.153818825_153818826del' -p95 -sg40 -(dp96 -g42 -g43 -sg44 -S'CTG' -p97 -sg46 -S'153818824' -p98 -sg48 -g49 -sssS'grch37' -p99 -(dp100 -g38 -S'NC_000001.10:g.153791301_153791302del' -p101 -sg40 -(dp102 -g42 -g60 -sg44 -S'CTG' -p103 -sg46 -S'153791300' -p104 -sg48 -g49 -sssS'grch38' -p105 -(dp106 -g38 -S'NC_000001.11:g.153818825_153818826del' -p107 -sg40 -(dp108 -g42 -g60 -sg44 -S'CTG' -p109 -sg46 -S'153818824' -p110 -sg48 -g49 -ssssg69 -(dp111 -g71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1' -p112 -sg73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.3' -p113 -sssS'metadata' -p114 -(dp115 -S'variantvalidator_hgvs_version' -p116 -S'1.1.3' -p117 -sS'uta_schema' -p118 -S'uta_20180821' -p119 -sS'seqrepo_db' -p120 -S'2018-08-21' -p121 -sS'variantvalidator_version' -p122 -S'v0.2' -p123 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant206.txt b/VariantValidator/testing/testOutputsMasterITS/variant206.txt deleted file mode 100644 index 3ba748e3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant206.txt +++ /dev/null @@ -1,1001 +0,0 @@ -(dp0 -S'NM_005572.3:c.711_734delinsCCCC' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens lamin A/C (LMNA), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'LMNA' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_005563.1:p.(Glu238ProfsTer9)' -p19 -sS'slr' -p20 -S'NP_005563.1:p.(E238Pfs*9)' -p21 -ssS'submitted_variant' -p22 -S'1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_005572.3:c.711_734delinsCCCC' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr1' -p38 -sS'ref' -p39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p40 -sS'pos' -p41 -S'156104667' -p42 -sS'alt' -p43 -S'CCCC' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p49 -sg41 -S'156134876' -p50 -sg43 -g44 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p53 -sg35 -(dp54 -g37 -S'1' -p55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p56 -sg41 -S'156104667' -p57 -sg43 -g44 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p62 -sg41 -S'156134876' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005563.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005572.3' -p69 -sssS'NM_001257374.1:c.375_398delinsCCCC' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p73 -aS'A more recent version of the selected reference sequence NM_001257374.1 is available (NM_001257374.2)' -p74 -aS'NM_001257374.2:c.375_398delinsCCCC MUST be fully validated prior to use in reports' -p75 -aS'select_variants=NM_001257374.2:c.375_398delinsCCCC' -p76 -aS'RefSeqGene record not available' -p77 -asg9 -g4 -sg10 -(lp78 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA -p79 -sg14 -S'LMNA' -p80 -sg16 -(dp81 -g18 -S'NP_001244303.1:p.(Glu126ProfsTer9)' -p82 -sg20 -S'NP_001244303.1:p.(E126Pfs*9)' -p83 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001257374.1:c.375_398delinsCCCC' -p84 -sg28 -g4 -sg29 -(dp85 -S'hg19' -p86 -(dp87 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p88 -sg35 -(dp89 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p90 -sg41 -S'156104667' -p91 -sg43 -S'CCCC' -p92 -sssS'grch37' -p93 -(dp94 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p95 -sg35 -(dp96 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p97 -sg41 -S'156104667' -p98 -sg43 -g92 -ssssg64 -(dp99 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1' -p100 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1' -p101 -sssS'NM_001257374.2:c.375_398delinsCCCC' -p102 -(dp103 -g3 -g4 -sg5 -(lp104 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p105 -aS'RefSeqGene record not available' -p106 -asg9 -g4 -sg10 -(lp107 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA -p108 -sg14 -S'LMNA' -p109 -sg16 -(dp110 -g18 -S'NP_001244303.1:p.(Glu126ProfsTer9)' -p111 -sg20 -S'NP_001244303.1:p.(E126Pfs*9)' -p112 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001257374.2:c.375_398delinsCCCC' -p113 -sg28 -g4 -sg29 -(dp114 -S'hg19' -p115 -(dp116 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p117 -sg35 -(dp118 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p119 -sg41 -S'156104667' -p120 -sg43 -S'CCCC' -p121 -sssg45 -(dp122 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p123 -sg35 -(dp124 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p125 -sg41 -S'156134876' -p126 -sg43 -g121 -sssS'grch37' -p127 -(dp128 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p129 -sg35 -(dp130 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p131 -sg41 -S'156104667' -p132 -sg43 -g121 -sssS'grch38' -p133 -(dp134 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p135 -sg35 -(dp136 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p137 -sg41 -S'156134876' -p138 -sg43 -g121 -ssssg64 -(dp139 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1' -p140 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2' -p141 -sssS'NM_001282624.1:c.468_491delinsCCCC' -p142 -(dp143 -g3 -g4 -sg5 -(lp144 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p145 -aS'RefSeqGene record not available' -p146 -asg9 -g4 -sg10 -(lp147 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 5, mRNA -p148 -sg14 -S'LMNA' -p149 -sg16 -(dp150 -g18 -S'NP_001269553.1:p.(Glu157ProfsTer9)' -p151 -sg20 -S'NP_001269553.1:p.(E157Pfs*9)' -p152 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001282624.1:c.468_491delinsCCCC' -p153 -sg28 -g4 -sg29 -(dp154 -S'hg19' -p155 -(dp156 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p157 -sg35 -(dp158 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p159 -sg41 -S'156104667' -p160 -sg43 -S'CCCC' -p161 -sssg45 -(dp162 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p163 -sg35 -(dp164 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p165 -sg41 -S'156134876' -p166 -sg43 -g161 -sssS'grch37' -p167 -(dp168 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p169 -sg35 -(dp170 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p171 -sg41 -S'156104667' -p172 -sg43 -g161 -sssS'grch38' -p173 -(dp174 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p175 -sg35 -(dp176 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p177 -sg41 -S'156134876' -p178 -sg43 -g161 -ssssg64 -(dp179 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269553.1' -p180 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282624.1' -p181 -sssS'flag' -p182 -S'gene_variant' -p183 -sS'NM_170708.3:c.711_734delinsCCCC' -p184 -(dp185 -g3 -g4 -sg5 -(lp186 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p187 -aS'RefSeqGene record not available' -p188 -asg9 -g4 -sg10 -(lp189 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 3, mRNA -p190 -sg14 -S'LMNA' -p191 -sg16 -(dp192 -g18 -S'NP_733822.1:p.(Glu238ProfsTer9)' -p193 -sg20 -S'NP_733822.1:p.(E238Pfs*9)' -p194 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_170708.3:c.711_734delinsCCCC' -p195 -sg28 -g4 -sg29 -(dp196 -S'hg19' -p197 -(dp198 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p199 -sg35 -(dp200 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p201 -sg41 -S'156104667' -p202 -sg43 -S'CCCC' -p203 -sssg45 -(dp204 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p205 -sg35 -(dp206 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p207 -sg41 -S'156134876' -p208 -sg43 -g203 -sssS'grch37' -p209 -(dp210 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p211 -sg35 -(dp212 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p213 -sg41 -S'156104667' -p214 -sg43 -g203 -sssS'grch38' -p215 -(dp216 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p217 -sg35 -(dp218 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p219 -sg41 -S'156134876' -p220 -sg43 -g203 -ssssg64 -(dp221 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1' -p222 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3' -p223 -sssS'NM_170707.3:c.711_734delinsCCCC' -p224 -(dp225 -g3 -g4 -sg5 -(lp226 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p227 -aS'RefSeqGene record not available' -p228 -asg9 -g4 -sg10 -(lp229 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 1, mRNA -p230 -sg14 -S'LMNA' -p231 -sg16 -(dp232 -g18 -S'NP_733821.1:p.(Glu238ProfsTer9)' -p233 -sg20 -S'NP_733821.1:p.(E238Pfs*9)' -p234 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_170707.3:c.711_734delinsCCCC' -p235 -sg28 -g4 -sg29 -(dp236 -S'hg19' -p237 -(dp238 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p239 -sg35 -(dp240 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p241 -sg41 -S'156104667' -p242 -sg43 -S'CCCC' -p243 -sssg45 -(dp244 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p245 -sg35 -(dp246 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p247 -sg41 -S'156134876' -p248 -sg43 -g243 -sssS'grch37' -p249 -(dp250 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p251 -sg35 -(dp252 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p253 -sg41 -S'156104667' -p254 -sg43 -g243 -sssS'grch38' -p255 -(dp256 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p257 -sg35 -(dp258 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p259 -sg41 -S'156134876' -p260 -sg43 -g243 -ssssg64 -(dp261 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1' -p262 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3' -p263 -sssS'metadata' -p264 -(dp265 -S'variantvalidator_hgvs_version' -p266 -S'1.1.3' -p267 -sS'uta_schema' -p268 -S'uta_20180821' -p269 -sS'seqrepo_db' -p270 -S'2018-08-21' -p271 -sS'variantvalidator_version' -p272 -S'v0.2' -p273 -ssS'NM_001282626.1:c.711_734delinsCCCC' -p274 -(dp275 -g3 -g4 -sg5 -(lp276 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p277 -aS'RefSeqGene record not available' -p278 -asg9 -g4 -sg10 -(lp279 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 7, mRNA -p280 -sg14 -S'LMNA' -p281 -sg16 -(dp282 -g18 -S'NP_001269555.1:p.(Glu238ProfsTer9)' -p283 -sg20 -S'NP_001269555.1:p.(E238Pfs*9)' -p284 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001282626.1:c.711_734delinsCCCC' -p285 -sg28 -g4 -sg29 -(dp286 -S'hg19' -p287 -(dp288 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p289 -sg35 -(dp290 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p291 -sg41 -S'156104667' -p292 -sg43 -S'CCCC' -p293 -sssg45 -(dp294 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p295 -sg35 -(dp296 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p297 -sg41 -S'156134876' -p298 -sg43 -g293 -sssS'grch37' -p299 -(dp300 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p301 -sg35 -(dp302 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p303 -sg41 -S'156104667' -p304 -sg43 -g293 -sssS'grch38' -p305 -(dp306 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p307 -sg35 -(dp308 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p309 -sg41 -S'156134876' -p310 -sg43 -g293 -ssssg64 -(dp311 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1' -p312 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1' -p313 -sssS'NM_001282625.1:c.711_734delinsCCCC' -p314 -(dp315 -g3 -g4 -sg5 -(lp316 -S'NC_000001.10:g.156104666TTGAGAGCCGGCTGGCGGATGCGCT>TCCCC automapped to NC_000001.10:g.156104667_156104690delinsCCCC' -p317 -aS'RefSeqGene record not available' -p318 -asg9 -g4 -sg10 -(lp319 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 6, mRNA -p320 -sg14 -S'LMNA' -p321 -sg16 -(dp322 -g18 -S'NP_001269554.1:p.(Glu238ProfsTer9)' -p323 -sg20 -S'NP_001269554.1:p.(E238Pfs*9)' -p324 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001282625.1:c.711_734delinsCCCC' -p325 -sg28 -g4 -sg29 -(dp326 -S'hg19' -p327 -(dp328 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p329 -sg35 -(dp330 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p331 -sg41 -S'156104667' -p332 -sg43 -S'CCCC' -p333 -sssg45 -(dp334 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p335 -sg35 -(dp336 -g37 -g38 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p337 -sg41 -S'156134876' -p338 -sg43 -g333 -sssS'grch37' -p339 -(dp340 -g33 -S'NC_000001.10:g.156104667_156104690delinsCCCC' -p341 -sg35 -(dp342 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p343 -sg41 -S'156104667' -p344 -sg43 -g333 -sssS'grch38' -p345 -(dp346 -g33 -S'NC_000001.11:g.156134876_156134899delinsCCCC' -p347 -sg35 -(dp348 -g37 -g55 -sg39 -S'TGAGAGCCGGCTGGCGGATGCGCT' -p349 -sg41 -S'156134876' -p350 -sg43 -g333 -ssssg64 -(dp351 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269554.1' -p352 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282625.1' -p353 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant207.txt b/VariantValidator/testing/testOutputsMasterITS/variant207.txt deleted file mode 100644 index b96d4ce1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant207.txt +++ /dev/null @@ -1,632 +0,0 @@ -(dp0 -S'NM_170707.3:c.1961dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens lamin A/C (LMNA), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'LMNA' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_733821.1:p.(Thr655AsnfsTer49)' -p19 -sS'slr' -p20 -S'NP_733821.1:p.(T655Nfs*49)' -p21 -ssS'submitted_variant' -p22 -S'1-156108541-G-GG' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_170707.3:c.1961dup' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000001.10:g.156108541dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr1' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'156108541' -p42 -sS'alt' -p43 -S'GG' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000001.11:g.156138750dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'156138750' -p49 -sg43 -S'GG' -p50 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000001.10:g.156108541dup' -p53 -sg35 -(dp54 -g37 -S'1' -p55 -sg39 -g40 -sg41 -S'156108541' -p56 -sg43 -S'GG' -p57 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000001.11:g.156138750dup' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'156138750' -p62 -sg43 -S'GG' -p63 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3' -p69 -sssS'NM_001282626.1:c.1818+143dup' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' -p73 -aS'RefSeqGene record not available' -p74 -asg9 -g4 -sg10 -(lp75 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 7, mRNA -p76 -sg14 -S'LMNA' -p77 -sg16 -(dp78 -g18 -S'NP_001269555.1:p.?' -p79 -sg20 -S'NP_001269555.1:p.?' -p80 -ssg22 -g23 -sg24 -S'NC_000001.10(NM_001282626.1):c.1818+143dup' -p81 -sg25 -g4 -sg26 -S'NM_001282626.1:c.1818+143dup' -p82 -sg28 -g4 -sg29 -(dp83 -S'hg19' -p84 -(dp85 -g33 -S'NC_000001.10:g.156108541dup' -p86 -sg35 -(dp87 -g37 -g38 -sg39 -g40 -sg41 -S'156108541' -p88 -sg43 -S'GG' -p89 -sssg45 -(dp90 -g33 -S'NC_000001.11:g.156138750dup' -p91 -sg35 -(dp92 -g37 -g38 -sg39 -g40 -sg41 -S'156138750' -p93 -sg43 -S'GG' -p94 -sssS'grch37' -p95 -(dp96 -g33 -S'NC_000001.10:g.156108541dup' -p97 -sg35 -(dp98 -g37 -g55 -sg39 -g40 -sg41 -S'156108541' -p99 -sg43 -S'GG' -p100 -sssS'grch38' -p101 -(dp102 -g33 -S'NC_000001.11:g.156138750dup' -p103 -sg35 -(dp104 -g37 -g55 -sg39 -g40 -sg41 -S'156138750' -p105 -sg43 -S'GG' -p106 -ssssg64 -(dp107 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1' -p108 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1' -p109 -sssS'flag' -p110 -S'gene_variant' -p111 -sS'NM_001257374.2:c.1625dup' -p112 -(dp113 -g3 -g4 -sg5 -(lp114 -S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' -p115 -aS'RefSeqGene record not available' -p116 -asg9 -g4 -sg10 -(lp117 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA -p118 -sg14 -S'LMNA' -p119 -sg16 -(dp120 -g18 -S'NP_001244303.1:p.(Thr543AsnfsTer90)' -p121 -sg20 -S'NP_001244303.1:p.(T543Nfs*90)' -p122 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001257374.2:c.1625dup' -p123 -sg28 -g4 -sg29 -(dp124 -S'hg19' -p125 -(dp126 -g33 -S'NC_000001.10:g.156108541dup' -p127 -sg35 -(dp128 -g37 -g38 -sg39 -g40 -sg41 -S'156108541' -p129 -sg43 -S'GG' -p130 -sssg45 -(dp131 -g33 -S'NC_000001.11:g.156138750dup' -p132 -sg35 -(dp133 -g37 -g38 -sg39 -g40 -sg41 -S'156138750' -p134 -sg43 -S'GG' -p135 -sssS'grch37' -p136 -(dp137 -g33 -S'NC_000001.10:g.156108541dup' -p138 -sg35 -(dp139 -g37 -g55 -sg39 -g40 -sg41 -S'156108541' -p140 -sg43 -S'GG' -p141 -sssS'grch38' -p142 -(dp143 -g33 -S'NC_000001.11:g.156138750dup' -p144 -sg35 -(dp145 -g37 -g55 -sg39 -g40 -sg41 -S'156138750' -p146 -sg43 -S'GG' -p147 -ssssg64 -(dp148 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1' -p149 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2' -p150 -sssS'NM_170708.3:c.1871dup' -p151 -(dp152 -g3 -g4 -sg5 -(lp153 -S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' -p154 -aS'RefSeqGene record not available' -p155 -asg9 -g4 -sg10 -(lp156 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 3, mRNA -p157 -sg14 -S'LMNA' -p158 -sg16 -(dp159 -g18 -S'NP_733822.1:p.(Thr625AsnfsTer49)' -p160 -sg20 -S'NP_733822.1:p.(T625Nfs*49)' -p161 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_170708.3:c.1871dup' -p162 -sg28 -g4 -sg29 -(dp163 -S'hg19' -p164 -(dp165 -g33 -S'NC_000001.10:g.156108541dup' -p166 -sg35 -(dp167 -g37 -g38 -sg39 -g40 -sg41 -S'156108541' -p168 -sg43 -S'GG' -p169 -sssg45 -(dp170 -g33 -S'NC_000001.11:g.156138750dup' -p171 -sg35 -(dp172 -g37 -g38 -sg39 -g40 -sg41 -S'156138750' -p173 -sg43 -S'GG' -p174 -sssS'grch37' -p175 -(dp176 -g33 -S'NC_000001.10:g.156108541dup' -p177 -sg35 -(dp178 -g37 -g55 -sg39 -g40 -sg41 -S'156108541' -p179 -sg43 -S'GG' -p180 -sssS'grch38' -p181 -(dp182 -g33 -S'NC_000001.11:g.156138750dup' -p183 -sg35 -(dp184 -g37 -g55 -sg39 -g40 -sg41 -S'156138750' -p185 -sg43 -S'GG' -p186 -ssssg64 -(dp187 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1' -p188 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3' -p189 -sssS'NM_001257374.1:c.1625dup' -p190 -(dp191 -g3 -g4 -sg5 -(lp192 -S'NC_000001.10:g.156108541G>GG automapped to NC_000001.10:g.156108541dupG' -p193 -aS'A more recent version of the selected reference sequence NM_001257374.1 is available (NM_001257374.2)' -p194 -aS'NM_001257374.2:c.1625dupG MUST be fully validated prior to use in reports' -p195 -aS'select_variants=NM_001257374.2:c.1625dup' -p196 -aS'RefSeqGene record not available' -p197 -asg9 -g4 -sg10 -(lp198 -sg12 -VHomo sapiens lamin A/C (LMNA), transcript variant 4, mRNA -p199 -sg14 -S'LMNA' -p200 -sg16 -(dp201 -g18 -S'NP_001244303.1:p.(Thr543AsnfsTer90)' -p202 -sg20 -S'NP_001244303.1:p.(T543Nfs*90)' -p203 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001257374.1:c.1625dup' -p204 -sg28 -g4 -sg29 -(dp205 -S'hg19' -p206 -(dp207 -g33 -S'NC_000001.10:g.156108541dup' -p208 -sg35 -(dp209 -g37 -g38 -sg39 -g40 -sg41 -S'156108541' -p210 -sg43 -S'GG' -p211 -sssS'grch37' -p212 -(dp213 -g33 -S'NC_000001.10:g.156108541dup' -p214 -sg35 -(dp215 -g37 -g55 -sg39 -g40 -sg41 -S'156108541' -p216 -sg43 -S'GG' -p217 -ssssg64 -(dp218 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1' -p219 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1' -p220 -sssS'metadata' -p221 -(dp222 -S'variantvalidator_hgvs_version' -p223 -S'1.1.3' -p224 -sS'uta_schema' -p225 -S'uta_20180821' -p226 -sS'seqrepo_db' -p227 -S'2018-08-21' -p228 -sS'variantvalidator_version' -p229 -S'v0.2' -p230 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant208.txt b/VariantValidator/testing/testOutputsMasterITS/variant208.txt deleted file mode 100644 index a236c2ce..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant208.txt +++ /dev/null @@ -1,374 +0,0 @@ -(dp0 -S'metadata' -p1 -(dp2 -S'variantvalidator_hgvs_version' -p3 -S'1.1.3' -p4 -sS'uta_schema' -p5 -S'uta_20180821' -p6 -sS'seqrepo_db' -p7 -S'2018-08-21' -p8 -sS'variantvalidator_version' -p9 -S'v0.2' -p10 -ssS'flag' -p11 -S'gene_variant' -p12 -sS'NM_001315491.1:c.1A>T' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens myelin protein zero (MPZ), transcript variant 1, mRNA -p24 -sS'gene_symbol' -p25 -S'MPZ' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_001302420.1:p.(Met1?)' -p30 -sS'slr' -p31 -S'NP_001302420.1:p.(M1?)' -p32 -ssS'submitted_variant' -p33 -S'1-161279695-T-A' -p34 -sS'genome_context_intronic_sequence' -p35 -g16 -sS'hgvs_lrg_variant' -p36 -g16 -sS'hgvs_transcript_variant' -p37 -S'NM_001315491.1:c.1A>T' -p38 -sS'hgvs_refseqgene_variant' -p39 -g16 -sS'primary_assembly_loci' -p40 -(dp41 -S'hg19' -p42 -(dp43 -S'hgvs_genomic_description' -p44 -S'NC_000001.10:g.161279695T>A' -p45 -sS'vcf' -p46 -(dp47 -S'chr' -p48 -S'chr1' -p49 -sS'ref' -p50 -VT -p51 -sS'pos' -p52 -S'161279695' -p53 -sS'alt' -p54 -VA -p55 -sssS'hg38' -p56 -(dp57 -g44 -S'NC_000001.11:g.161309905T>A' -p58 -sg46 -(dp59 -g48 -g49 -sg50 -g51 -sg52 -S'161309905' -p60 -sg54 -g55 -sssS'grch37' -p61 -(dp62 -g44 -S'NC_000001.10:g.161279695T>A' -p63 -sg46 -(dp64 -g48 -S'1' -p65 -sg50 -g51 -sg52 -S'161279695' -p66 -sg54 -g55 -sssS'grch38' -p67 -(dp68 -g44 -S'NC_000001.11:g.161309905T>A' -p69 -sg46 -(dp70 -g48 -g65 -sg50 -g51 -sg52 -S'161309905' -p71 -sg54 -g55 -ssssS'reference_sequence_records' -p72 -(dp73 -S'protein' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001302420.1' -p75 -sS'transcript' -p76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001315491.1' -p77 -sssS'NM_000530.7:c.1A>T' -p78 -(dp79 -g15 -g16 -sg17 -(lp80 -S'RefSeqGene record not available' -p81 -asg20 -g16 -sg21 -(lp82 -sg23 -VHomo sapiens myelin protein zero (MPZ), transcript variant 1, mRNA -p83 -sg25 -S'MPZ' -p84 -sg27 -(dp85 -g29 -S'NP_000521.2:p.(Met1?)' -p86 -sg31 -S'NP_000521.2:p.(M1?)' -p87 -ssg33 -g34 -sg35 -g16 -sg36 -g16 -sg37 -S'NM_000530.7:c.1A>T' -p88 -sg39 -g16 -sg40 -(dp89 -S'hg19' -p90 -(dp91 -g44 -S'NC_000001.10:g.161279695T>A' -p92 -sg46 -(dp93 -g48 -g49 -sg50 -g51 -sg52 -S'161279695' -p94 -sg54 -g55 -sssg56 -(dp95 -g44 -S'NC_000001.11:g.161309905T>A' -p96 -sg46 -(dp97 -g48 -g49 -sg50 -g51 -sg52 -S'161309905' -p98 -sg54 -g55 -sssS'grch37' -p99 -(dp100 -g44 -S'NC_000001.10:g.161279695T>A' -p101 -sg46 -(dp102 -g48 -g65 -sg50 -g51 -sg52 -S'161279695' -p103 -sg54 -g55 -sssS'grch38' -p104 -(dp105 -g44 -S'NC_000001.11:g.161309905T>A' -p106 -sg46 -(dp107 -g48 -g65 -sg50 -g51 -sg52 -S'161309905' -p108 -sg54 -g55 -ssssg72 -(dp109 -g74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2' -p110 -sg76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.7' -p111 -sssS'NM_000530.6:c.1A>T' -p112 -(dp113 -g15 -g16 -sg17 -(lp114 -S'A more recent version of the selected reference sequence NM_000530.6 is available (NM_000530.7)' -p115 -aS'NM_000530.7:c.1A>T MUST be fully validated prior to use in reports' -p116 -aS'select_variants=NM_000530.7:c.1A>T' -p117 -aS'RefSeqGene record not available' -p118 -asg20 -g16 -sg21 -(lp119 -sg23 -VHomo sapiens myelin protein zero (MPZ), mRNA -p120 -sg25 -S'MPZ' -p121 -sg27 -(dp122 -g29 -S'NP_000521.2:p.(Met1?)' -p123 -sg31 -S'NP_000521.2:p.(M1?)' -p124 -ssg33 -g34 -sg35 -g16 -sg36 -g16 -sg37 -S'NM_000530.6:c.1A>T' -p125 -sg39 -g16 -sg40 -(dp126 -S'hg19' -p127 -(dp128 -g44 -S'NC_000001.10:g.161279695T>A' -p129 -sg46 -(dp130 -g48 -g49 -sg50 -g51 -sg52 -S'161279695' -p131 -sg54 -g55 -sssS'grch37' -p132 -(dp133 -g44 -S'NC_000001.10:g.161279695T>A' -p134 -sg46 -(dp135 -g48 -g65 -sg50 -g51 -sg52 -S'161279695' -p136 -sg54 -g55 -ssssg72 -(dp137 -g74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2' -p138 -sg76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.6' -p139 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant209.txt b/VariantValidator/testing/testOutputsMasterITS/variant209.txt deleted file mode 100644 index f96f2257..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant209.txt +++ /dev/null @@ -1,172 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000130.4:c.1601G>A' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens coagulation factor V (F5), mRNA -p14 -sS'gene_symbol' -p15 -S'F5' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000121.2:p.(Arg534Gln)' -p20 -sS'slr' -p21 -S'NP_000121.2:p.(R534Q)' -p22 -ssS'submitted_variant' -p23 -S'1-169519049-T-T' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000130.4:c.1601G>A' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000001.10:g.169519049T=' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -S'T' -p41 -sS'pos' -p42 -S'169519049' -p43 -sS'alt' -p44 -g41 -sssS'hg38' -p45 -(dp46 -g34 -S'NC_000001.11:g.169549811C>T' -p47 -sg36 -(dp48 -g38 -g39 -sg40 -VC -p49 -sg42 -S'169549811' -p50 -sg44 -VT -p51 -sssS'grch37' -p52 -(dp53 -g34 -S'NC_000001.10:g.169519049T=' -p54 -sg36 -(dp55 -g38 -S'1' -p56 -sg40 -g41 -sg42 -S'169519049' -p57 -sg44 -g41 -sssS'grch38' -p58 -(dp59 -g34 -S'NC_000001.11:g.169549811C>T' -p60 -sg36 -(dp61 -g38 -g56 -sg40 -g49 -sg42 -S'169549811' -p62 -sg44 -g51 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4' -p68 -sssS'metadata' -p69 -(dp70 -S'variantvalidator_hgvs_version' -p71 -S'1.1.3' -p72 -sS'uta_schema' -p73 -S'uta_20180821' -p74 -sS'seqrepo_db' -p75 -S'2018-08-21' -p76 -sS'variantvalidator_version' -p77 -S'v0.2' -p78 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant21.txt b/VariantValidator/testing/testOutputsMasterITS/variant21.txt deleted file mode 100644 index 134f0811..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant21.txt +++ /dev/null @@ -1,180 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000518.4:c.316_*100del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'A more recent version of the selected reference sequence NM_000518.4 is available (NM_000518.5)' -p9 -aS'NM_000518.5:c.316_*100del MUST be fully validated prior to use in reports' -p10 -aS'select_variants=NM_000518.5:c.316_*100del' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g6 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens hemoglobin subunit beta (HBB), mRNA -p17 -sS'gene_symbol' -p18 -S'HBB' -p19 -sS'hgvs_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_000509.1:p.(Leu106SerfsTer3)' -p23 -sS'slr' -p24 -S'NP_000509.1:p.(L106Sfs*3)' -p25 -ssS'submitted_variant' -p26 -S'NM_000518.4:c.316_*100del' -p27 -sS'genome_context_intronic_sequence' -p28 -g6 -sS'hgvs_lrg_variant' -p29 -g6 -sS'hgvs_transcript_variant' -p30 -S'NM_000518.4:c.316_*100del' -p31 -sS'hgvs_refseqgene_variant' -p32 -g6 -sS'primary_assembly_loci' -p33 -(dp34 -S'grch38' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000011.10:g.5225498_5225726del' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'11' -p42 -sS'ref' -p43 -S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' -p44 -sS'pos' -p45 -S'5225497' -p46 -sS'alt' -p47 -S'A' -p48 -sssS'grch37' -p49 -(dp50 -g37 -S'NC_000011.9:g.5246728_5246956del' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' -p53 -sg45 -S'5246727' -p54 -sg47 -g48 -sssS'hg38' -p55 -(dp56 -g37 -S'NC_000011.10:g.5225498_5225726del' -p57 -sg39 -(dp58 -g41 -S'chr11' -p59 -sg43 -S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' -p60 -sg45 -S'5225497' -p61 -sg47 -g48 -sssS'hg19' -p62 -(dp63 -g37 -S'NC_000011.9:g.5246728_5246956del' -p64 -sg39 -(dp65 -g41 -g59 -sg43 -S'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG' -p66 -sg45 -S'5246727' -p67 -sg47 -g48 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.4' -p73 -sssS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant210.txt b/VariantValidator/testing/testOutputsMasterITS/variant210.txt deleted file mode 100644 index bde23683..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant210.txt +++ /dev/null @@ -1,528 +0,0 @@ -(dp0 -S'NM_003240.4:c.774C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'LEFTY2' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_003231.2:p.(Thr258=)' -p18 -sS'slr' -p19 -S'NP_003231.2:p.(T258=)' -p20 -ssS'submitted_variant' -p21 -S'1-226125468-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_003240.4:c.774C>T' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000001.10:g.226125468G>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr1' -p37 -sS'ref' -p38 -VG -p39 -sS'pos' -p40 -S'226125468' -p41 -sS'alt' -p42 -VA -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000001.11:g.225937768G>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'225937768' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000001.10:g.226125468G>A' -p51 -sg34 -(dp52 -g36 -S'1' -p53 -sg38 -g39 -sg40 -S'226125468' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000001.11:g.225937768G>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'225937768' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.4' -p65 -sssS'NM_003240.3:c.774C>T' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'A more recent version of the selected reference sequence NM_003240.3 is available (NM_003240.4)' -p69 -aS'NM_003240.4:c.774C>T MUST be fully validated prior to use in reports' -p70 -aS'select_variants=NM_003240.4:c.774C>T' -p71 -aS'RefSeqGene record not available' -p72 -asg8 -g4 -sg9 -(lp73 -sg11 -VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 1, mRNA -p74 -sg13 -S'LEFTY2' -p75 -sg15 -(dp76 -g17 -S'NP_003231.2:p.(Thr258=)' -p77 -sg19 -S'NP_003231.2:p.(T258=)' -p78 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_003240.3:c.774C>T' -p79 -sg27 -g4 -sg28 -(dp80 -S'hg19' -p81 -(dp82 -g32 -S'NC_000001.10:g.226125468G>A' -p83 -sg34 -(dp84 -g36 -g37 -sg38 -g39 -sg40 -S'226125468' -p85 -sg42 -g43 -sssg44 -(dp86 -g32 -S'NC_000001.11:g.225937768G>A' -p87 -sg34 -(dp88 -g36 -g37 -sg38 -g39 -sg40 -S'225937768' -p89 -sg42 -g43 -sssS'grch37' -p90 -(dp91 -g32 -S'NC_000001.10:g.226125468G>A' -p92 -sg34 -(dp93 -g36 -g53 -sg38 -g39 -sg40 -S'226125468' -p94 -sg42 -g43 -sssS'grch38' -p95 -(dp96 -g32 -S'NC_000001.11:g.225937768G>A' -p97 -sg34 -(dp98 -g36 -g53 -sg38 -g39 -sg40 -S'225937768' -p99 -sg42 -g43 -ssssg60 -(dp100 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2' -p101 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.3' -p102 -sssS'NM_001172425.1:c.672C>T' -p103 -(dp104 -g3 -g4 -sg5 -(lp105 -S'A more recent version of the selected reference sequence NM_001172425.1 is available (NM_001172425.2)' -p106 -aS'NM_001172425.2:c.672C>T MUST be fully validated prior to use in reports' -p107 -aS'select_variants=NM_001172425.2:c.672C>T' -p108 -aS'RefSeqGene record not available' -p109 -asg8 -g4 -sg9 -(lp110 -sg11 -VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 2, mRNA -p111 -sg13 -S'LEFTY2' -p112 -sg15 -(dp113 -g17 -S'NP_001165896.1:p.(Thr224=)' -p114 -sg19 -S'NP_001165896.1:p.(T224=)' -p115 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001172425.1:c.672C>T' -p116 -sg27 -g4 -sg28 -(dp117 -S'hg19' -p118 -(dp119 -g32 -S'NC_000001.10:g.226125468G>A' -p120 -sg34 -(dp121 -g36 -g37 -sg38 -g39 -sg40 -S'226125468' -p122 -sg42 -g43 -sssg44 -(dp123 -g32 -S'NC_000001.11:g.225937768G>A' -p124 -sg34 -(dp125 -g36 -g37 -sg38 -g39 -sg40 -S'225937768' -p126 -sg42 -g43 -sssS'grch37' -p127 -(dp128 -g32 -S'NC_000001.10:g.226125468G>A' -p129 -sg34 -(dp130 -g36 -g53 -sg38 -g39 -sg40 -S'226125468' -p131 -sg42 -g43 -sssS'grch38' -p132 -(dp133 -g32 -S'NC_000001.11:g.225937768G>A' -p134 -sg34 -(dp135 -g36 -g53 -sg38 -g39 -sg40 -S'225937768' -p136 -sg42 -g43 -ssssg60 -(dp137 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1' -p138 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.1' -p139 -sssS'NM_001172425.2:c.672C>T' -p140 -(dp141 -g3 -g4 -sg5 -(lp142 -S'RefSeqGene record not available' -p143 -asg8 -g4 -sg9 -(lp144 -sg11 -VHomo sapiens left-right determination factor 2 (LEFTY2), transcript variant 2, mRNA -p145 -sg13 -S'LEFTY2' -p146 -sg15 -(dp147 -g17 -S'NP_001165896.1:p.(Thr224=)' -p148 -sg19 -S'NP_001165896.1:p.(T224=)' -p149 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001172425.2:c.672C>T' -p150 -sg27 -g4 -sg28 -(dp151 -S'hg19' -p152 -(dp153 -g32 -S'NC_000001.10:g.226125468G>A' -p154 -sg34 -(dp155 -g36 -g37 -sg38 -g39 -sg40 -S'226125468' -p156 -sg42 -g43 -sssg44 -(dp157 -g32 -S'NC_000001.11:g.225937768G>A' -p158 -sg34 -(dp159 -g36 -g37 -sg38 -g39 -sg40 -S'225937768' -p160 -sg42 -g43 -sssS'grch37' -p161 -(dp162 -g32 -S'NC_000001.10:g.226125468G>A' -p163 -sg34 -(dp164 -g36 -g53 -sg38 -g39 -sg40 -S'226125468' -p165 -sg42 -g43 -sssS'grch38' -p166 -(dp167 -g32 -S'NC_000001.11:g.225937768G>A' -p168 -sg34 -(dp169 -g36 -g53 -sg38 -g39 -sg40 -S'225937768' -p170 -sg42 -g43 -ssssg60 -(dp171 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1' -p172 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.2' -p173 -sssS'flag' -p174 -S'gene_variant' -p175 -sS'metadata' -p176 -(dp177 -S'variantvalidator_hgvs_version' -p178 -S'1.1.3' -p179 -sS'uta_schema' -p180 -S'uta_20180821' -p181 -sS'seqrepo_db' -p182 -S'2018-08-21' -p183 -sS'variantvalidator_version' -p184 -S'v0.2' -p185 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant211.txt b/VariantValidator/testing/testOutputsMasterITS/variant211.txt deleted file mode 100644 index 29b689a2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant211.txt +++ /dev/null @@ -1,215 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001126049.1:c.-794_-792del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000010.10:g.89623035CGCA>C automapped to NC_000010.10:g.89623039_89623041delGCA' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -(dp14 -S'grch38' -p15 -(dp16 -S'hgvs_genomic_description' -p17 -S'NW_013171807.1:g.79106_79108del' -p18 -sS'vcf' -p19 -(dp20 -S'chr' -p21 -S'HG2334_PATCH' -p22 -sS'ref' -p23 -S'CGCA' -p24 -sS'pos' -p25 -S'79102' -p26 -sS'alt' -p27 -S'C' -p28 -sssa(dp29 -S'hg38' -p30 -(dp31 -g17 -S'NW_013171807.1:g.79106_79108del' -p32 -sg19 -(dp33 -g21 -S'NW_013171807.1' -p34 -sg23 -S'CGCA' -p35 -sg25 -S'79102' -p36 -sg27 -g28 -sssasS'transcript_description' -p37 -VHomo sapiens killin, p53 regulated DNA replication inhibitor (KLLN), mRNA -p38 -sS'gene_symbol' -p39 -S'KLLN' -p40 -sS'hgvs_predicted_protein_consequence' -p41 -(dp42 -S'tlr' -p43 -S'NP_001119521.1:p.?' -p44 -sS'slr' -p45 -S'NP_001119521.1:p.?' -p46 -ssS'submitted_variant' -p47 -S'10-89623035-CGCA-C' -p48 -sS'genome_context_intronic_sequence' -p49 -g6 -sS'hgvs_lrg_variant' -p50 -g6 -sS'hgvs_transcript_variant' -p51 -S'NM_001126049.1:c.-794_-792del' -p52 -sS'hgvs_refseqgene_variant' -p53 -g6 -sS'primary_assembly_loci' -p54 -(dp55 -S'hg19' -p56 -(dp57 -g17 -S'NC_000010.10:g.89623039_89623041del' -p58 -sg19 -(dp59 -g21 -S'chr10' -p60 -sg23 -S'CGCA' -p61 -sg25 -S'89623035' -p62 -sg27 -g28 -sssg30 -(dp63 -g17 -S'NC_000010.11:g.87863282_87863284del' -p64 -sg19 -(dp65 -g21 -g60 -sg23 -S'CGCA' -p66 -sg25 -S'87863278' -p67 -sg27 -g28 -sssS'grch37' -p68 -(dp69 -g17 -S'NC_000010.10:g.89623039_89623041del' -p70 -sg19 -(dp71 -g21 -S'10' -p72 -sg23 -S'CGCA' -p73 -sg25 -S'89623035' -p74 -sg27 -g28 -sssS'grch38' -p75 -(dp76 -g17 -S'NC_000010.11:g.87863282_87863284del' -p77 -sg19 -(dp78 -g21 -g72 -sg23 -S'CGCA' -p79 -sg25 -S'87863278' -p80 -sg27 -g28 -ssssS'reference_sequence_records' -p81 -(dp82 -S'protein' -p83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119521.1' -p84 -sS'transcript' -p85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126049.1' -p86 -sssS'metadata' -p87 -(dp88 -S'variantvalidator_hgvs_version' -p89 -S'1.1.3' -p90 -sS'uta_schema' -p91 -S'uta_20180821' -p92 -sS'seqrepo_db' -p93 -S'2018-08-21' -p94 -sS'variantvalidator_version' -p95 -S'v0.2' -p96 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant212.txt b/VariantValidator/testing/testOutputsMasterITS/variant212.txt deleted file mode 100644 index da687b88..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant212.txt +++ /dev/null @@ -1,734 +0,0 @@ -(dp0 -S'NR_037946.1:n.3896G>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens HNRNPUL2-BSCL2 readthrough (NMD candidate) (HNRNPUL2-BSCL2), long non-coding RNA -p12 -sS'gene_symbol' -p13 -S'HNRNPUL2-BSCL2' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'Non-coding :n.' -p18 -sS'slr' -p19 -g18 -ssS'submitted_variant' -p20 -S'11-62457852-C-A' -p21 -sS'genome_context_intronic_sequence' -p22 -g4 -sS'hgvs_lrg_variant' -p23 -g4 -sS'hgvs_transcript_variant' -p24 -S'NR_037946.1:n.3896G>T' -p25 -sS'hgvs_refseqgene_variant' -p26 -g4 -sS'primary_assembly_loci' -p27 -(dp28 -S'grch38' -p29 -(dp30 -S'hgvs_genomic_description' -p31 -S'NC_000011.10:g.62690380C>A' -p32 -sS'vcf' -p33 -(dp34 -S'chr' -p35 -S'11' -p36 -sS'ref' -p37 -VC -p38 -sS'pos' -p39 -S'62690380' -p40 -sS'alt' -p41 -VA -p42 -sssS'grch37' -p43 -(dp44 -g31 -S'NC_000011.9:g.62457852C>A' -p45 -sg33 -(dp46 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p47 -sg41 -g42 -sssS'hg38' -p48 -(dp49 -g31 -S'NC_000011.10:g.62690380C>A' -p50 -sg33 -(dp51 -g35 -S'chr11' -p52 -sg37 -g38 -sg39 -S'62690380' -p53 -sg41 -g42 -sssS'hg19' -p54 -(dp55 -g31 -S'NC_000011.9:g.62457852C>A' -p56 -sg33 -(dp57 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p58 -sg41 -g42 -ssssS'reference_sequence_records' -p59 -(dp60 -S'transcript' -p61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037946.1' -p62 -sssS'NM_032667.6:c.1184G>T' -p63 -(dp64 -g3 -g4 -sg5 -(lp65 -S'RefSeqGene record not available' -p66 -asg8 -g4 -sg9 -(lp67 -sg11 -VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 2, mRNA -p68 -sg13 -S'BSCL2' -p69 -sg15 -(dp70 -g17 -S'NP_116056.3:p.(Cys395Phe)' -p71 -sg19 -S'NP_116056.3:p.(C395F)' -p72 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_032667.6:c.1184G>T' -p73 -sg26 -g4 -sg27 -(dp74 -S'grch38' -p75 -(dp76 -g31 -S'NC_000011.10:g.62690380C>A' -p77 -sg33 -(dp78 -g35 -g36 -sg37 -g38 -sg39 -S'62690380' -p79 -sg41 -g42 -sssS'grch37' -p80 -(dp81 -g31 -S'NC_000011.9:g.62457852C>A' -p82 -sg33 -(dp83 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p84 -sg41 -g42 -sssg48 -(dp85 -g31 -S'NC_000011.10:g.62690380C>A' -p86 -sg33 -(dp87 -g35 -g52 -sg37 -g38 -sg39 -S'62690380' -p88 -sg41 -g42 -sssS'hg19' -p89 -(dp90 -g31 -S'NC_000011.9:g.62457852C>A' -p91 -sg33 -(dp92 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p93 -sg41 -g42 -ssssg59 -(dp94 -S'protein' -p95 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116056.3' -p96 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032667.6' -p97 -sssS'NR_037949.1:n.1984G>T' -p98 -(dp99 -g3 -g4 -sg5 -(lp100 -S'RefSeqGene record not available' -p101 -asg8 -g4 -sg9 -(lp102 -sg11 -VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 5, non-coding RNA -p103 -sg13 -S'BSCL2' -p104 -sg15 -(dp105 -g17 -S'Non-coding :n.' -p106 -sg19 -g106 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NR_037949.1:n.1984G>T' -p107 -sg26 -g4 -sg27 -(dp108 -S'grch38' -p109 -(dp110 -g31 -S'NC_000011.10:g.62690380C>A' -p111 -sg33 -(dp112 -g35 -g36 -sg37 -g38 -sg39 -S'62690380' -p113 -sg41 -g42 -sssS'grch37' -p114 -(dp115 -g31 -S'NC_000011.9:g.62457852C>A' -p116 -sg33 -(dp117 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p118 -sg41 -g42 -sssg48 -(dp119 -g31 -S'NC_000011.10:g.62690380C>A' -p120 -sg33 -(dp121 -g35 -g52 -sg37 -g38 -sg39 -S'62690380' -p122 -sg41 -g42 -sssS'hg19' -p123 -(dp124 -g31 -S'NC_000011.9:g.62457852C>A' -p125 -sg33 -(dp126 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p127 -sg41 -g42 -ssssg59 -(dp128 -g61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037949.1' -p129 -sssS'NR_037948.1:n.1978G>T' -p130 -(dp131 -g3 -g4 -sg5 -(lp132 -S'RefSeqGene record not available' -p133 -asg8 -g4 -sg9 -(lp134 -sg11 -VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 4, non-coding RNA -p135 -sg13 -S'BSCL2' -p136 -sg15 -(dp137 -g17 -S'Non-coding :n.' -p138 -sg19 -g138 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NR_037948.1:n.1978G>T' -p139 -sg26 -g4 -sg27 -(dp140 -S'grch38' -p141 -(dp142 -g31 -S'NC_000011.10:g.62690380C>A' -p143 -sg33 -(dp144 -g35 -g36 -sg37 -g38 -sg39 -S'62690380' -p145 -sg41 -g42 -sssS'grch37' -p146 -(dp147 -g31 -S'NC_000011.9:g.62457852C>A' -p148 -sg33 -(dp149 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p150 -sg41 -g42 -sssg48 -(dp151 -g31 -S'NC_000011.10:g.62690380C>A' -p152 -sg33 -(dp153 -g35 -g52 -sg37 -g38 -sg39 -S'62690380' -p154 -sg41 -g42 -sssS'hg19' -p155 -(dp156 -g31 -S'NC_000011.9:g.62457852C>A' -p157 -sg33 -(dp158 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p159 -sg41 -g42 -ssssg59 -(dp160 -g61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037948.1' -p161 -sssS'NM_001122955.3:c.1376G>T' -p162 -(dp163 -g3 -g4 -sg5 -(lp164 -S'RefSeqGene record not available' -p165 -asg8 -g4 -sg9 -(lp166 -sg11 -VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 1, mRNA -p167 -sg13 -S'BSCL2' -p168 -sg15 -(dp169 -g17 -S'NP_001116427.1:p.(Cys459Phe)' -p170 -sg19 -S'NP_001116427.1:p.(C459F)' -p171 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001122955.3:c.1376G>T' -p172 -sg26 -g4 -sg27 -(dp173 -S'grch38' -p174 -(dp175 -g31 -S'NC_000011.10:g.62690380C>A' -p176 -sg33 -(dp177 -g35 -g36 -sg37 -g38 -sg39 -S'62690380' -p178 -sg41 -g42 -sssS'grch37' -p179 -(dp180 -g31 -S'NC_000011.9:g.62457852C>A' -p181 -sg33 -(dp182 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p183 -sg41 -g42 -sssg48 -(dp184 -g31 -S'NC_000011.10:g.62690380C>A' -p185 -sg33 -(dp186 -g35 -g52 -sg37 -g38 -sg39 -S'62690380' -p187 -sg41 -g42 -sssS'hg19' -p188 -(dp189 -g31 -S'NC_000011.9:g.62457852C>A' -p190 -sg33 -(dp191 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p192 -sg41 -g42 -ssssg59 -(dp193 -g95 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001116427.1' -p194 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001122955.3' -p195 -sssS'flag' -p196 -S'gene_variant' -p197 -sS'NM_001130702.2:c.*178G>T' -p198 -(dp199 -g3 -g4 -sg5 -(lp200 -S'RefSeqGene record not available' -p201 -asg8 -g4 -sg9 -(lp202 -sg11 -VHomo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 3, mRNA -p203 -sg13 -S'BSCL2' -p204 -sg15 -(dp205 -g17 -S'NP_001124174.2:p.?' -p206 -sg19 -S'NP_001124174.2:p.?' -p207 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001130702.2:c.*178G>T' -p208 -sg26 -g4 -sg27 -(dp209 -S'grch38' -p210 -(dp211 -g31 -S'NC_000011.10:g.62690380C>A' -p212 -sg33 -(dp213 -g35 -g36 -sg37 -g38 -sg39 -S'62690380' -p214 -sg41 -g42 -sssS'grch37' -p215 -(dp216 -g31 -S'NC_000011.9:g.62457852C>A' -p217 -sg33 -(dp218 -g35 -g36 -sg37 -g38 -sg39 -S'62457852' -p219 -sg41 -g42 -sssg48 -(dp220 -g31 -S'NC_000011.10:g.62690380C>A' -p221 -sg33 -(dp222 -g35 -g52 -sg37 -g38 -sg39 -S'62690380' -p223 -sg41 -g42 -sssS'hg19' -p224 -(dp225 -g31 -S'NC_000011.9:g.62457852C>A' -p226 -sg33 -(dp227 -g35 -g52 -sg37 -g38 -sg39 -S'62457852' -p228 -sg41 -g42 -ssssg59 -(dp229 -g95 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124174.2' -p230 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130702.2' -p231 -sssS'metadata' -p232 -(dp233 -S'variantvalidator_hgvs_version' -p234 -S'1.1.3' -p235 -sS'uta_schema' -p236 -S'uta_20180821' -p237 -sS'seqrepo_db' -p238 -S'2018-08-21' -p239 -sS'variantvalidator_version' -p240 -S'v0.2' -p241 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant213.txt b/VariantValidator/testing/testOutputsMasterITS/variant213.txt deleted file mode 100644 index f0758558..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant213.txt +++ /dev/null @@ -1,297 +0,0 @@ -(dp0 -S'NM_001351834.1:c.5761_5762insT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000011.9:g.108178710A>AT automapped to NC_000011.9:g.108178710_108178711insT' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens ATM serine/threonine kinase (ATM), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'ATM' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001338763.1:p.(Arg1921MetfsTer9)' -p19 -sS'slr' -p20 -S'NP_001338763.1:p.(R1921Mfs*9)' -p21 -ssS'submitted_variant' -p22 -S'11-108178710-A-AT' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001351834.1:c.5761_5762insT' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'grch38' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000011.10:g.108307983_108307984insT' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'11' -p38 -sS'ref' -p39 -S'A' -p40 -sS'pos' -p41 -S'108307983' -p42 -sS'alt' -p43 -S'AT' -p44 -sssS'grch37' -p45 -(dp46 -g33 -S'NC_000011.9:g.108178710_108178711insT' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'108178710' -p49 -sg43 -S'AT' -p50 -sssS'hg38' -p51 -(dp52 -g33 -S'NC_000011.10:g.108307983_108307984insT' -p53 -sg35 -(dp54 -g37 -S'chr11' -p55 -sg39 -g40 -sg41 -S'108307983' -p56 -sg43 -S'AT' -p57 -sssS'hg19' -p58 -(dp59 -g33 -S'NC_000011.9:g.108178710_108178711insT' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'108178710' -p62 -sg43 -S'AT' -p63 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338763.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351834.1' -p69 -sssS'flag' -p70 -S'gene_variant' -p71 -sS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ssS'NM_000051.3:c.5761_5762insT' -p82 -(dp83 -g3 -g4 -sg5 -(lp84 -S'NC_000011.9:g.108178710A>AT automapped to NC_000011.9:g.108178710_108178711insT' -p85 -aS'RefSeqGene record not available' -p86 -asg9 -g4 -sg10 -(lp87 -sg12 -VHomo sapiens ATM serine/threonine kinase (ATM), transcript variant 2, mRNA -p88 -sg14 -S'ATM' -p89 -sg16 -(dp90 -g18 -S'NP_000042.3:p.(Arg1921MetfsTer9)' -p91 -sg20 -S'NP_000042.3:p.(R1921Mfs*9)' -p92 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_000051.3:c.5761_5762insT' -p93 -sg28 -g4 -sg29 -(dp94 -S'grch38' -p95 -(dp96 -g33 -S'NC_000011.10:g.108307983_108307984insT' -p97 -sg35 -(dp98 -g37 -g38 -sg39 -g40 -sg41 -S'108307983' -p99 -sg43 -S'AT' -p100 -sssS'grch37' -p101 -(dp102 -g33 -S'NC_000011.9:g.108178710_108178711insT' -p103 -sg35 -(dp104 -g37 -g38 -sg39 -g40 -sg41 -S'108178710' -p105 -sg43 -S'AT' -p106 -sssg51 -(dp107 -g33 -S'NC_000011.10:g.108307983_108307984insT' -p108 -sg35 -(dp109 -g37 -g55 -sg39 -g40 -sg41 -S'108307983' -p110 -sg43 -S'AT' -p111 -sssS'hg19' -p112 -(dp113 -g33 -S'NC_000011.9:g.108178710_108178711insT' -p114 -sg35 -(dp115 -g37 -g55 -sg39 -g40 -sg41 -S'108178710' -p116 -sg43 -S'AT' -p117 -ssssg64 -(dp118 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000042.3' -p119 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000051.3' -p120 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant214.txt b/VariantValidator/testing/testOutputsMasterITS/variant214.txt deleted file mode 100644 index fc1fd9f6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant214.txt +++ /dev/null @@ -1,3113 +0,0 @@ -(dp0 -S'NM_001352419.1:c.-108-7C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -(dp11 -S'grch37' -p12 -(dp13 -S'hgvs_genomic_description' -p14 -S'NW_003871080.1:g.117249G>A' -p15 -sS'vcf' -p16 -(dp17 -S'chr' -p18 -S'HG388_HG400_PATCH' -p19 -sS'ref' -p20 -VG -p21 -sS'pos' -p22 -S'117249' -p23 -sS'alt' -p24 -VA -p25 -sssa(dp26 -S'hg19' -p27 -(dp28 -g14 -S'NW_003871080.1:g.117249G>A' -p29 -sg16 -(dp30 -g18 -S'NW_003871080.1' -p31 -sg20 -g21 -sg22 -S'117249' -p32 -sg24 -g25 -sssasS'transcript_description' -p33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 15, mRNA -p34 -sS'gene_symbol' -p35 -S'ALG9' -p36 -sS'hgvs_predicted_protein_consequence' -p37 -(dp38 -S'tlr' -p39 -S'NP_001339348.1:p.?' -p40 -sS'slr' -p41 -S'NP_001339348.1:p.?' -p42 -ssS'submitted_variant' -p43 -S'11-111735981-G-A' -p44 -sS'genome_context_intronic_sequence' -p45 -S'NC_000011.9(NM_001352419.1):c.-108-7C>T' -p46 -sS'hgvs_lrg_variant' -p47 -g4 -sS'hgvs_transcript_variant' -p48 -S'NM_001352419.1:c.-108-7C>T' -p49 -sS'hgvs_refseqgene_variant' -p50 -g4 -sS'primary_assembly_loci' -p51 -(dp52 -S'grch38' -p53 -(dp54 -g14 -S'NC_000011.10:g.111865258G>A' -p55 -sg16 -(dp56 -g18 -S'11' -p57 -sg20 -g21 -sg22 -S'111865258' -p58 -sg24 -g25 -sssS'grch37' -p59 -(dp60 -g14 -S'NC_000011.9:g.111735981G>A' -p61 -sg16 -(dp62 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p63 -sg24 -g25 -sssS'hg38' -p64 -(dp65 -g14 -S'NC_000011.10:g.111865258G>A' -p66 -sg16 -(dp67 -g18 -S'chr11' -p68 -sg20 -g21 -sg22 -S'111865258' -p69 -sg24 -g25 -sssS'hg19' -p70 -(dp71 -g14 -S'NC_000011.9:g.111735981G>A' -p72 -sg16 -(dp73 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p74 -sg24 -g25 -ssssS'reference_sequence_records' -p75 -(dp76 -S'protein' -p77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339348.1' -p78 -sS'transcript' -p79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352419.1' -p80 -sssS'NM_001352412.1:c.-108-7C>T' -p81 -(dp82 -g3 -g4 -sg5 -(lp83 -S'RefSeqGene record not available' -p84 -asg8 -g4 -sg9 -(lp85 -(dp86 -S'grch37' -p87 -(dp88 -g14 -S'NW_003871080.1:g.117249G>A' -p89 -sg16 -(dp90 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p91 -sg24 -g25 -sssa(dp92 -S'hg19' -p93 -(dp94 -g14 -S'NW_003871080.1:g.117249G>A' -p95 -sg16 -(dp96 -g18 -S'NW_003871080.1' -p97 -sg20 -g21 -sg22 -S'117249' -p98 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 8, mRNA -p99 -sg35 -S'ALG9' -p100 -sg37 -(dp101 -g39 -S'NP_001339341.1:p.?' -p102 -sg41 -S'NP_001339341.1:p.?' -p103 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352412.1):c.-108-7C>T' -p104 -sg47 -g4 -sg48 -S'NM_001352412.1:c.-108-7C>T' -p105 -sg50 -g4 -sg51 -(dp106 -S'grch38' -p107 -(dp108 -g14 -S'NC_000011.10:g.111865258G>A' -p109 -sg16 -(dp110 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p111 -sg24 -g25 -sssS'grch37' -p112 -(dp113 -g14 -S'NC_000011.9:g.111735981G>A' -p114 -sg16 -(dp115 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p116 -sg24 -g25 -sssg64 -(dp117 -g14 -S'NC_000011.10:g.111865258G>A' -p118 -sg16 -(dp119 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p120 -sg24 -g25 -sssS'hg19' -p121 -(dp122 -g14 -S'NC_000011.9:g.111735981G>A' -p123 -sg16 -(dp124 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p125 -sg24 -g25 -ssssg75 -(dp126 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339341.1' -p127 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352412.1' -p128 -sssS'NM_001077692.1:c.-108-7C>T' -p129 -(dp130 -g3 -g4 -sg5 -(lp131 -S'RefSeqGene record not available' -p132 -asg8 -g4 -sg9 -(lp133 -(dp134 -S'grch37' -p135 -(dp136 -g14 -S'NW_003871080.1:g.117249G>A' -p137 -sg16 -(dp138 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p139 -sg24 -g25 -sssa(dp140 -S'hg19' -p141 -(dp142 -g14 -S'NW_003871080.1:g.117249G>A' -p143 -sg16 -(dp144 -g18 -S'NW_003871080.1' -p145 -sg20 -g21 -sg22 -S'117249' -p146 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 4, mRNA -p147 -sg35 -S'ALG9' -p148 -sg37 -(dp149 -g39 -S'NP_001071160.1:p.?' -p150 -sg41 -S'NP_001071160.1:p.?' -p151 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001077692.1):c.-108-7C>T' -p152 -sg47 -g4 -sg48 -S'NM_001077692.1:c.-108-7C>T' -p153 -sg50 -g4 -sg51 -(dp154 -S'grch38' -p155 -(dp156 -g14 -S'NC_000011.10:g.111865258G>A' -p157 -sg16 -(dp158 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p159 -sg24 -g25 -sssS'grch37' -p160 -(dp161 -g14 -S'NC_000011.9:g.111735981G>A' -p162 -sg16 -(dp163 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p164 -sg24 -g25 -sssg64 -(dp165 -g14 -S'NC_000011.10:g.111865258G>A' -p166 -sg16 -(dp167 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p168 -sg24 -g25 -sssS'hg19' -p169 -(dp170 -g14 -S'NC_000011.9:g.111735981G>A' -p171 -sg16 -(dp172 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p173 -sg24 -g25 -ssssg75 -(dp174 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071160.1' -p175 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077692.1' -p176 -sssS'NM_001352418.1:c.406-7C>T' -p177 -(dp178 -g3 -g4 -sg5 -(lp179 -S'RefSeqGene record not available' -p180 -asg8 -g4 -sg9 -(lp181 -(dp182 -S'grch37' -p183 -(dp184 -g14 -S'NW_003871080.1:g.117249G>A' -p185 -sg16 -(dp186 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p187 -sg24 -g25 -sssa(dp188 -S'hg19' -p189 -(dp190 -g14 -S'NW_003871080.1:g.117249G>A' -p191 -sg16 -(dp192 -g18 -S'NW_003871080.1' -p193 -sg20 -g21 -sg22 -S'117249' -p194 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 14, mRNA -p195 -sg35 -S'ALG9' -p196 -sg37 -(dp197 -g39 -S'NP_001339347.1:p.?' -p198 -sg41 -S'NP_001339347.1:p.?' -p199 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352418.1):c.406-7C>T' -p200 -sg47 -g4 -sg48 -S'NM_001352418.1:c.406-7C>T' -p201 -sg50 -g4 -sg51 -(dp202 -S'grch38' -p203 -(dp204 -g14 -S'NC_000011.10:g.111865258G>A' -p205 -sg16 -(dp206 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p207 -sg24 -g25 -sssS'grch37' -p208 -(dp209 -g14 -S'NC_000011.9:g.111735981G>A' -p210 -sg16 -(dp211 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p212 -sg24 -g25 -sssg64 -(dp213 -g14 -S'NC_000011.10:g.111865258G>A' -p214 -sg16 -(dp215 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p216 -sg24 -g25 -sssS'hg19' -p217 -(dp218 -g14 -S'NC_000011.9:g.111735981G>A' -p219 -sg16 -(dp220 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p221 -sg24 -g25 -ssssg75 -(dp222 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339347.1' -p223 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352418.1' -p224 -sssS'NM_001352423.1:c.-108-7C>T' -p225 -(dp226 -g3 -g4 -sg5 -(lp227 -S'RefSeqGene record not available' -p228 -asg8 -g4 -sg9 -(lp229 -(dp230 -S'grch37' -p231 -(dp232 -g14 -S'NW_003871080.1:g.117249G>A' -p233 -sg16 -(dp234 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p235 -sg24 -g25 -sssa(dp236 -S'hg19' -p237 -(dp238 -g14 -S'NW_003871080.1:g.117249G>A' -p239 -sg16 -(dp240 -g18 -S'NW_003871080.1' -p241 -sg20 -g21 -sg22 -S'117249' -p242 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 19, mRNA -p243 -sg35 -S'ALG9' -p244 -sg37 -(dp245 -g39 -S'NP_001339352.1:p.?' -p246 -sg41 -S'NP_001339352.1:p.?' -p247 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352423.1):c.-108-7C>T' -p248 -sg47 -g4 -sg48 -S'NM_001352423.1:c.-108-7C>T' -p249 -sg50 -g4 -sg51 -(dp250 -S'grch38' -p251 -(dp252 -g14 -S'NC_000011.10:g.111865258G>A' -p253 -sg16 -(dp254 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p255 -sg24 -g25 -sssS'grch37' -p256 -(dp257 -g14 -S'NC_000011.9:g.111735981G>A' -p258 -sg16 -(dp259 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p260 -sg24 -g25 -sssg64 -(dp261 -g14 -S'NC_000011.10:g.111865258G>A' -p262 -sg16 -(dp263 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p264 -sg24 -g25 -sssS'hg19' -p265 -(dp266 -g14 -S'NC_000011.9:g.111735981G>A' -p267 -sg16 -(dp268 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p269 -sg24 -g25 -ssssg75 -(dp270 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339352.1' -p271 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352423.1' -p272 -sssS'NM_001352415.1:c.-108-7C>T' -p273 -(dp274 -g3 -g4 -sg5 -(lp275 -S'RefSeqGene record not available' -p276 -asg8 -g4 -sg9 -(lp277 -(dp278 -S'grch37' -p279 -(dp280 -g14 -S'NW_003871080.1:g.117249G>A' -p281 -sg16 -(dp282 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p283 -sg24 -g25 -sssa(dp284 -S'hg19' -p285 -(dp286 -g14 -S'NW_003871080.1:g.117249G>A' -p287 -sg16 -(dp288 -g18 -S'NW_003871080.1' -p289 -sg20 -g21 -sg22 -S'117249' -p290 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 11, mRNA -p291 -sg35 -S'ALG9' -p292 -sg37 -(dp293 -g39 -S'NP_001339344.1:p.?' -p294 -sg41 -S'NP_001339344.1:p.?' -p295 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352415.1):c.-108-7C>T' -p296 -sg47 -g4 -sg48 -S'NM_001352415.1:c.-108-7C>T' -p297 -sg50 -g4 -sg51 -(dp298 -S'grch38' -p299 -(dp300 -g14 -S'NC_000011.10:g.111865258G>A' -p301 -sg16 -(dp302 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p303 -sg24 -g25 -sssS'grch37' -p304 -(dp305 -g14 -S'NC_000011.9:g.111735981G>A' -p306 -sg16 -(dp307 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p308 -sg24 -g25 -sssg64 -(dp309 -g14 -S'NC_000011.10:g.111865258G>A' -p310 -sg16 -(dp311 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p312 -sg24 -g25 -sssS'hg19' -p313 -(dp314 -g14 -S'NC_000011.9:g.111735981G>A' -p315 -sg16 -(dp316 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p317 -sg24 -g25 -ssssg75 -(dp318 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339344.1' -p319 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352415.1' -p320 -sssS'NM_001352421.1:c.-108-7C>T' -p321 -(dp322 -g3 -g4 -sg5 -(lp323 -S'RefSeqGene record not available' -p324 -asg8 -g4 -sg9 -(lp325 -(dp326 -S'grch37' -p327 -(dp328 -g14 -S'NW_003871080.1:g.117249G>A' -p329 -sg16 -(dp330 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p331 -sg24 -g25 -sssa(dp332 -S'hg19' -p333 -(dp334 -g14 -S'NW_003871080.1:g.117249G>A' -p335 -sg16 -(dp336 -g18 -S'NW_003871080.1' -p337 -sg20 -g21 -sg22 -S'117249' -p338 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 17, mRNA -p339 -sg35 -S'ALG9' -p340 -sg37 -(dp341 -g39 -S'NP_001339350.1:p.?' -p342 -sg41 -S'NP_001339350.1:p.?' -p343 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352421.1):c.-108-7C>T' -p344 -sg47 -g4 -sg48 -S'NM_001352421.1:c.-108-7C>T' -p345 -sg50 -g4 -sg51 -(dp346 -S'grch38' -p347 -(dp348 -g14 -S'NC_000011.10:g.111865258G>A' -p349 -sg16 -(dp350 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p351 -sg24 -g25 -sssS'grch37' -p352 -(dp353 -g14 -S'NC_000011.9:g.111735981G>A' -p354 -sg16 -(dp355 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p356 -sg24 -g25 -sssg64 -(dp357 -g14 -S'NC_000011.10:g.111865258G>A' -p358 -sg16 -(dp359 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p360 -sg24 -g25 -sssS'hg19' -p361 -(dp362 -g14 -S'NC_000011.9:g.111735981G>A' -p363 -sg16 -(dp364 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p365 -sg24 -g25 -ssssg75 -(dp366 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339350.1' -p367 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352421.1' -p368 -sssS'NM_001352411.1:c.-108-7C>T' -p369 -(dp370 -g3 -g4 -sg5 -(lp371 -S'RefSeqGene record not available' -p372 -asg8 -g4 -sg9 -(lp373 -(dp374 -S'grch37' -p375 -(dp376 -g14 -S'NW_003871080.1:g.117249G>A' -p377 -sg16 -(dp378 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p379 -sg24 -g25 -sssa(dp380 -S'hg19' -p381 -(dp382 -g14 -S'NW_003871080.1:g.117249G>A' -p383 -sg16 -(dp384 -g18 -S'NW_003871080.1' -p385 -sg20 -g21 -sg22 -S'117249' -p386 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 7, mRNA -p387 -sg35 -S'ALG9' -p388 -sg37 -(dp389 -g39 -S'NP_001339340.1:p.?' -p390 -sg41 -S'NP_001339340.1:p.?' -p391 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352411.1):c.-108-7C>T' -p392 -sg47 -g4 -sg48 -S'NM_001352411.1:c.-108-7C>T' -p393 -sg50 -g4 -sg51 -(dp394 -S'grch38' -p395 -(dp396 -g14 -S'NC_000011.10:g.111865258G>A' -p397 -sg16 -(dp398 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p399 -sg24 -g25 -sssS'grch37' -p400 -(dp401 -g14 -S'NC_000011.9:g.111735981G>A' -p402 -sg16 -(dp403 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p404 -sg24 -g25 -sssg64 -(dp405 -g14 -S'NC_000011.10:g.111865258G>A' -p406 -sg16 -(dp407 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p408 -sg24 -g25 -sssS'hg19' -p409 -(dp410 -g14 -S'NC_000011.9:g.111735981G>A' -p411 -sg16 -(dp412 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p413 -sg24 -g25 -ssssg75 -(dp414 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339340.1' -p415 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352411.1' -p416 -sssS'NR_147984.1:n.782-7C>T' -p417 -(dp418 -g3 -g4 -sg5 -(lp419 -S'RefSeqGene record not available' -p420 -asg8 -g4 -sg9 -(lp421 -(dp422 -S'grch37' -p423 -(dp424 -g14 -S'NW_003871080.1:g.117249G>A' -p425 -sg16 -(dp426 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p427 -sg24 -g25 -sssa(dp428 -S'hg19' -p429 -(dp430 -g14 -S'NW_003871080.1:g.117249G>A' -p431 -sg16 -(dp432 -g18 -S'NW_003871080.1' -p433 -sg20 -g21 -sg22 -S'117249' -p434 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 20, non-coding RNA -p435 -sg35 -S'ALG9' -p436 -sg37 -(dp437 -g39 -S'Non-coding :n.' -p438 -sg41 -g438 -ssg43 -g44 -sg45 -S'NC_000011.9(NR_147984.1):c.782-7C>T' -p439 -sg47 -g4 -sg48 -S'NR_147984.1:n.782-7C>T' -p440 -sg50 -g4 -sg51 -(dp441 -S'grch38' -p442 -(dp443 -g14 -S'NC_000011.10:g.111865258G>A' -p444 -sg16 -(dp445 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p446 -sg24 -g25 -sssS'grch37' -p447 -(dp448 -g14 -S'NC_000011.9:g.111735981G>A' -p449 -sg16 -(dp450 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p451 -sg24 -g25 -sssg64 -(dp452 -g14 -S'NC_000011.10:g.111865258G>A' -p453 -sg16 -(dp454 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p455 -sg24 -g25 -sssS'hg19' -p456 -(dp457 -g14 -S'NC_000011.9:g.111735981G>A' -p458 -sg16 -(dp459 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p460 -sg24 -g25 -ssssg75 -(dp461 -g79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_147984.1' -p462 -sssS'NM_001077691.1:c.-108-7C>T' -p463 -(dp464 -g3 -g4 -sg5 -(lp465 -S'RefSeqGene record not available' -p466 -asg8 -g4 -sg9 -(lp467 -(dp468 -S'grch37' -p469 -(dp470 -g14 -S'NW_003871080.1:g.117249G>A' -p471 -sg16 -(dp472 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p473 -sg24 -g25 -sssa(dp474 -S'hg19' -p475 -(dp476 -g14 -S'NW_003871080.1:g.117249G>A' -p477 -sg16 -(dp478 -g18 -S'NW_003871080.1' -p479 -sg20 -g21 -sg22 -S'117249' -p480 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 3, mRNA -p481 -sg35 -S'ALG9' -p482 -sg37 -(dp483 -g39 -S'NP_001071159.1:p.?' -p484 -sg41 -S'NP_001071159.1:p.?' -p485 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001077691.1):c.-108-7C>T' -p486 -sg47 -g4 -sg48 -S'NM_001077691.1:c.-108-7C>T' -p487 -sg50 -g4 -sg51 -(dp488 -S'grch38' -p489 -(dp490 -g14 -S'NC_000011.10:g.111865258G>A' -p491 -sg16 -(dp492 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p493 -sg24 -g25 -sssS'grch37' -p494 -(dp495 -g14 -S'NC_000011.9:g.111735981G>A' -p496 -sg16 -(dp497 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p498 -sg24 -g25 -sssg64 -(dp499 -g14 -S'NC_000011.10:g.111865258G>A' -p500 -sg16 -(dp501 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p502 -sg24 -g25 -sssS'hg19' -p503 -(dp504 -g14 -S'NC_000011.9:g.111735981G>A' -p505 -sg16 -(dp506 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p507 -sg24 -g25 -ssssg75 -(dp508 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071159.1' -p509 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077691.1' -p510 -sssS'metadata' -p511 -(dp512 -S'variantvalidator_hgvs_version' -p513 -S'1.1.3' -p514 -sS'uta_schema' -p515 -S'uta_20180821' -p516 -sS'seqrepo_db' -p517 -S'2018-08-21' -p518 -sS'variantvalidator_version' -p519 -S'v0.2' -p520 -ssS'NM_001352410.1:c.-108-7C>T' -p521 -(dp522 -g3 -g4 -sg5 -(lp523 -S'RefSeqGene record not available' -p524 -asg8 -g4 -sg9 -(lp525 -(dp526 -S'grch37' -p527 -(dp528 -g14 -S'NW_003871080.1:g.117249G>A' -p529 -sg16 -(dp530 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p531 -sg24 -g25 -sssa(dp532 -S'hg19' -p533 -(dp534 -g14 -S'NW_003871080.1:g.117249G>A' -p535 -sg16 -(dp536 -g18 -S'NW_003871080.1' -p537 -sg20 -g21 -sg22 -S'117249' -p538 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 6, mRNA -p539 -sg35 -S'ALG9' -p540 -sg37 -(dp541 -g39 -S'NP_001339339.1:p.?' -p542 -sg41 -S'NP_001339339.1:p.?' -p543 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352410.1):c.-108-7C>T' -p544 -sg47 -g4 -sg48 -S'NM_001352410.1:c.-108-7C>T' -p545 -sg50 -g4 -sg51 -(dp546 -S'grch38' -p547 -(dp548 -g14 -S'NC_000011.10:g.111865258G>A' -p549 -sg16 -(dp550 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p551 -sg24 -g25 -sssS'grch37' -p552 -(dp553 -g14 -S'NC_000011.9:g.111735981G>A' -p554 -sg16 -(dp555 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p556 -sg24 -g25 -sssg64 -(dp557 -g14 -S'NC_000011.10:g.111865258G>A' -p558 -sg16 -(dp559 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p560 -sg24 -g25 -sssS'hg19' -p561 -(dp562 -g14 -S'NC_000011.9:g.111735981G>A' -p563 -sg16 -(dp564 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p565 -sg24 -g25 -ssssg75 -(dp566 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339339.1' -p567 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352410.1' -p568 -sssS'NM_001077690.1:c.406-7C>T' -p569 -(dp570 -g3 -g4 -sg5 -(lp571 -S'RefSeqGene record not available' -p572 -asg8 -g4 -sg9 -(lp573 -(dp574 -S'grch37' -p575 -(dp576 -g14 -S'NW_003871080.1:g.117249G>A' -p577 -sg16 -(dp578 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p579 -sg24 -g25 -sssa(dp580 -S'hg19' -p581 -(dp582 -g14 -S'NW_003871080.1:g.117249G>A' -p583 -sg16 -(dp584 -g18 -S'NW_003871080.1' -p585 -sg20 -g21 -sg22 -S'117249' -p586 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 2, mRNA -p587 -sg35 -S'ALG9' -p588 -sg37 -(dp589 -g39 -S'NP_001071158.1:p.?' -p590 -sg41 -S'NP_001071158.1:p.?' -p591 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001077690.1):c.406-7C>T' -p592 -sg47 -g4 -sg48 -S'NM_001077690.1:c.406-7C>T' -p593 -sg50 -g4 -sg51 -(dp594 -S'grch38' -p595 -(dp596 -g14 -S'NC_000011.10:g.111865258G>A' -p597 -sg16 -(dp598 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p599 -sg24 -g25 -sssS'grch37' -p600 -(dp601 -g14 -S'NC_000011.9:g.111735981G>A' -p602 -sg16 -(dp603 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p604 -sg24 -g25 -sssg64 -(dp605 -g14 -S'NC_000011.10:g.111865258G>A' -p606 -sg16 -(dp607 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p608 -sg24 -g25 -sssS'hg19' -p609 -(dp610 -g14 -S'NC_000011.9:g.111735981G>A' -p611 -sg16 -(dp612 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p613 -sg24 -g25 -ssssg75 -(dp614 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071158.1' -p615 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077690.1' -p616 -sssS'flag' -p617 -S'gene_variant' -p618 -sS'NM_001352422.1:c.-326-7C>T' -p619 -(dp620 -g3 -g4 -sg5 -(lp621 -S'RefSeqGene record not available' -p622 -asg8 -g4 -sg9 -(lp623 -(dp624 -S'grch37' -p625 -(dp626 -g14 -S'NW_003871080.1:g.117249G>A' -p627 -sg16 -(dp628 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p629 -sg24 -g25 -sssa(dp630 -S'hg19' -p631 -(dp632 -g14 -S'NW_003871080.1:g.117249G>A' -p633 -sg16 -(dp634 -g18 -S'NW_003871080.1' -p635 -sg20 -g21 -sg22 -S'117249' -p636 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 18, mRNA -p637 -sg35 -S'ALG9' -p638 -sg37 -(dp639 -g39 -S'NP_001339351.1:p.?' -p640 -sg41 -S'NP_001339351.1:p.?' -p641 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352422.1):c.-326-7C>T' -p642 -sg47 -g4 -sg48 -S'NM_001352422.1:c.-326-7C>T' -p643 -sg50 -g4 -sg51 -(dp644 -S'grch38' -p645 -(dp646 -g14 -S'NC_000011.10:g.111865258G>A' -p647 -sg16 -(dp648 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p649 -sg24 -g25 -sssS'grch37' -p650 -(dp651 -g14 -S'NC_000011.9:g.111735981G>A' -p652 -sg16 -(dp653 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p654 -sg24 -g25 -sssg64 -(dp655 -g14 -S'NC_000011.10:g.111865258G>A' -p656 -sg16 -(dp657 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p658 -sg24 -g25 -sssS'hg19' -p659 -(dp660 -g14 -S'NC_000011.9:g.111735981G>A' -p661 -sg16 -(dp662 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p663 -sg24 -g25 -ssssg75 -(dp664 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339351.1' -p665 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352422.1' -p666 -sssS'NM_001352416.1:c.-108-7C>T' -p667 -(dp668 -g3 -g4 -sg5 -(lp669 -S'RefSeqGene record not available' -p670 -asg8 -g4 -sg9 -(lp671 -(dp672 -S'grch37' -p673 -(dp674 -g14 -S'NW_003871080.1:g.117249G>A' -p675 -sg16 -(dp676 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p677 -sg24 -g25 -sssa(dp678 -S'hg19' -p679 -(dp680 -g14 -S'NW_003871080.1:g.117249G>A' -p681 -sg16 -(dp682 -g18 -S'NW_003871080.1' -p683 -sg20 -g21 -sg22 -S'117249' -p684 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 12, mRNA -p685 -sg35 -S'ALG9' -p686 -sg37 -(dp687 -g39 -S'NP_001339345.1:p.?' -p688 -sg41 -S'NP_001339345.1:p.?' -p689 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352416.1):c.-108-7C>T' -p690 -sg47 -g4 -sg48 -S'NM_001352416.1:c.-108-7C>T' -p691 -sg50 -g4 -sg51 -(dp692 -S'grch38' -p693 -(dp694 -g14 -S'NC_000011.10:g.111865258G>A' -p695 -sg16 -(dp696 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p697 -sg24 -g25 -sssS'grch37' -p698 -(dp699 -g14 -S'NC_000011.9:g.111735981G>A' -p700 -sg16 -(dp701 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p702 -sg24 -g25 -sssg64 -(dp703 -g14 -S'NC_000011.10:g.111865258G>A' -p704 -sg16 -(dp705 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p706 -sg24 -g25 -sssS'hg19' -p707 -(dp708 -g14 -S'NC_000011.9:g.111735981G>A' -p709 -sg16 -(dp710 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p711 -sg24 -g25 -ssssg75 -(dp712 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339345.1' -p713 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352416.1' -p714 -sssS'NM_001352420.1:c.-108-7C>T' -p715 -(dp716 -g3 -g4 -sg5 -(lp717 -S'RefSeqGene record not available' -p718 -asg8 -g4 -sg9 -(lp719 -(dp720 -S'grch37' -p721 -(dp722 -g14 -S'NW_003871080.1:g.117249G>A' -p723 -sg16 -(dp724 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p725 -sg24 -g25 -sssa(dp726 -S'hg19' -p727 -(dp728 -g14 -S'NW_003871080.1:g.117249G>A' -p729 -sg16 -(dp730 -g18 -S'NW_003871080.1' -p731 -sg20 -g21 -sg22 -S'117249' -p732 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 16, mRNA -p733 -sg35 -S'ALG9' -p734 -sg37 -(dp735 -g39 -S'NP_001339349.1:p.?' -p736 -sg41 -S'NP_001339349.1:p.?' -p737 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352420.1):c.-108-7C>T' -p738 -sg47 -g4 -sg48 -S'NM_001352420.1:c.-108-7C>T' -p739 -sg50 -g4 -sg51 -(dp740 -S'grch38' -p741 -(dp742 -g14 -S'NC_000011.10:g.111865258G>A' -p743 -sg16 -(dp744 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p745 -sg24 -g25 -sssS'grch37' -p746 -(dp747 -g14 -S'NC_000011.9:g.111735981G>A' -p748 -sg16 -(dp749 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p750 -sg24 -g25 -sssg64 -(dp751 -g14 -S'NC_000011.10:g.111865258G>A' -p752 -sg16 -(dp753 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p754 -sg24 -g25 -sssS'hg19' -p755 -(dp756 -g14 -S'NC_000011.9:g.111735981G>A' -p757 -sg16 -(dp758 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p759 -sg24 -g25 -ssssg75 -(dp760 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339349.1' -p761 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352420.1' -p762 -sssS'NM_024740.2:c.406-7C>T' -p763 -(dp764 -g3 -g4 -sg5 -(lp765 -S'RefSeqGene record not available' -p766 -asg8 -g4 -sg9 -(lp767 -(dp768 -S'grch37' -p769 -(dp770 -g14 -S'NW_003871080.1:g.117249G>A' -p771 -sg16 -(dp772 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p773 -sg24 -g25 -sssa(dp774 -S'hg19' -p775 -(dp776 -g14 -S'NW_003871080.1:g.117249G>A' -p777 -sg16 -(dp778 -g18 -S'NW_003871080.1' -p779 -sg20 -g21 -sg22 -S'117249' -p780 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 1, mRNA -p781 -sg35 -S'ALG9' -p782 -sg37 -(dp783 -g39 -S'NP_079016.2:p.?' -p784 -sg41 -S'NP_079016.2:p.?' -p785 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_024740.2):c.406-7C>T' -p786 -sg47 -g4 -sg48 -S'NM_024740.2:c.406-7C>T' -p787 -sg50 -g4 -sg51 -(dp788 -S'grch38' -p789 -(dp790 -g14 -S'NC_000011.10:g.111865258G>A' -p791 -sg16 -(dp792 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p793 -sg24 -g25 -sssS'grch37' -p794 -(dp795 -g14 -S'NC_000011.9:g.111735981G>A' -p796 -sg16 -(dp797 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p798 -sg24 -g25 -sssg64 -(dp799 -g14 -S'NC_000011.10:g.111865258G>A' -p800 -sg16 -(dp801 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p802 -sg24 -g25 -sssS'hg19' -p803 -(dp804 -g14 -S'NC_000011.9:g.111735981G>A' -p805 -sg16 -(dp806 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p807 -sg24 -g25 -ssssg75 -(dp808 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_079016.2' -p809 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024740.2' -p810 -sssS'NM_001352414.1:c.-108-7C>T' -p811 -(dp812 -g3 -g4 -sg5 -(lp813 -S'RefSeqGene record not available' -p814 -asg8 -g4 -sg9 -(lp815 -(dp816 -S'grch37' -p817 -(dp818 -g14 -S'NW_003871080.1:g.117249G>A' -p819 -sg16 -(dp820 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p821 -sg24 -g25 -sssa(dp822 -S'hg19' -p823 -(dp824 -g14 -S'NW_003871080.1:g.117249G>A' -p825 -sg16 -(dp826 -g18 -S'NW_003871080.1' -p827 -sg20 -g21 -sg22 -S'117249' -p828 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 10, mRNA -p829 -sg35 -S'ALG9' -p830 -sg37 -(dp831 -g39 -S'NP_001339343.1:p.?' -p832 -sg41 -S'NP_001339343.1:p.?' -p833 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352414.1):c.-108-7C>T' -p834 -sg47 -g4 -sg48 -S'NM_001352414.1:c.-108-7C>T' -p835 -sg50 -g4 -sg51 -(dp836 -S'grch38' -p837 -(dp838 -g14 -S'NC_000011.10:g.111865258G>A' -p839 -sg16 -(dp840 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p841 -sg24 -g25 -sssS'grch37' -p842 -(dp843 -g14 -S'NC_000011.9:g.111735981G>A' -p844 -sg16 -(dp845 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p846 -sg24 -g25 -sssg64 -(dp847 -g14 -S'NC_000011.10:g.111865258G>A' -p848 -sg16 -(dp849 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p850 -sg24 -g25 -sssS'hg19' -p851 -(dp852 -g14 -S'NC_000011.9:g.111735981G>A' -p853 -sg16 -(dp854 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p855 -sg24 -g25 -ssssg75 -(dp856 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339343.1' -p857 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352414.1' -p858 -sssS'NM_001352417.1:c.406-7C>T' -p859 -(dp860 -g3 -g4 -sg5 -(lp861 -S'RefSeqGene record not available' -p862 -asg8 -g4 -sg9 -(lp863 -(dp864 -S'grch37' -p865 -(dp866 -g14 -S'NW_003871080.1:g.117249G>A' -p867 -sg16 -(dp868 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p869 -sg24 -g25 -sssa(dp870 -S'hg19' -p871 -(dp872 -g14 -S'NW_003871080.1:g.117249G>A' -p873 -sg16 -(dp874 -g18 -S'NW_003871080.1' -p875 -sg20 -g21 -sg22 -S'117249' -p876 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 13, mRNA -p877 -sg35 -S'ALG9' -p878 -sg37 -(dp879 -g39 -S'NP_001339346.1:p.?' -p880 -sg41 -S'NP_001339346.1:p.?' -p881 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352417.1):c.406-7C>T' -p882 -sg47 -g4 -sg48 -S'NM_001352417.1:c.406-7C>T' -p883 -sg50 -g4 -sg51 -(dp884 -S'grch38' -p885 -(dp886 -g14 -S'NC_000011.10:g.111865258G>A' -p887 -sg16 -(dp888 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p889 -sg24 -g25 -sssS'grch37' -p890 -(dp891 -g14 -S'NC_000011.9:g.111735981G>A' -p892 -sg16 -(dp893 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p894 -sg24 -g25 -sssg64 -(dp895 -g14 -S'NC_000011.10:g.111865258G>A' -p896 -sg16 -(dp897 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p898 -sg24 -g25 -sssS'hg19' -p899 -(dp900 -g14 -S'NC_000011.9:g.111735981G>A' -p901 -sg16 -(dp902 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p903 -sg24 -g25 -ssssg75 -(dp904 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339346.1' -p905 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352417.1' -p906 -sssS'NM_001352409.1:c.-108-7C>T' -p907 -(dp908 -g3 -g4 -sg5 -(lp909 -S'RefSeqGene record not available' -p910 -asg8 -g4 -sg9 -(lp911 -(dp912 -S'grch37' -p913 -(dp914 -g14 -S'NW_003871080.1:g.117249G>A' -p915 -sg16 -(dp916 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p917 -sg24 -g25 -sssa(dp918 -S'hg19' -p919 -(dp920 -g14 -S'NW_003871080.1:g.117249G>A' -p921 -sg16 -(dp922 -g18 -S'NW_003871080.1' -p923 -sg20 -g21 -sg22 -S'117249' -p924 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 5, mRNA -p925 -sg35 -S'ALG9' -p926 -sg37 -(dp927 -g39 -S'NP_001339338.1:p.?' -p928 -sg41 -S'NP_001339338.1:p.?' -p929 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352409.1):c.-108-7C>T' -p930 -sg47 -g4 -sg48 -S'NM_001352409.1:c.-108-7C>T' -p931 -sg50 -g4 -sg51 -(dp932 -S'grch38' -p933 -(dp934 -g14 -S'NC_000011.10:g.111865258G>A' -p935 -sg16 -(dp936 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p937 -sg24 -g25 -sssS'grch37' -p938 -(dp939 -g14 -S'NC_000011.9:g.111735981G>A' -p940 -sg16 -(dp941 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p942 -sg24 -g25 -sssg64 -(dp943 -g14 -S'NC_000011.10:g.111865258G>A' -p944 -sg16 -(dp945 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p946 -sg24 -g25 -sssS'hg19' -p947 -(dp948 -g14 -S'NC_000011.9:g.111735981G>A' -p949 -sg16 -(dp950 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p951 -sg24 -g25 -ssssg75 -(dp952 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339338.1' -p953 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352409.1' -p954 -sssS'NM_001352413.1:c.-108-7C>T' -p955 -(dp956 -g3 -g4 -sg5 -(lp957 -S'RefSeqGene record not available' -p958 -asg8 -g4 -sg9 -(lp959 -(dp960 -S'grch37' -p961 -(dp962 -g14 -S'NW_003871080.1:g.117249G>A' -p963 -sg16 -(dp964 -g18 -g19 -sg20 -g21 -sg22 -S'117249' -p965 -sg24 -g25 -sssa(dp966 -S'hg19' -p967 -(dp968 -g14 -S'NW_003871080.1:g.117249G>A' -p969 -sg16 -(dp970 -g18 -S'NW_003871080.1' -p971 -sg20 -g21 -sg22 -S'117249' -p972 -sg24 -g25 -sssasg33 -VHomo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 9, mRNA -p973 -sg35 -S'ALG9' -p974 -sg37 -(dp975 -g39 -S'NP_001339342.1:p.?' -p976 -sg41 -S'NP_001339342.1:p.?' -p977 -ssg43 -g44 -sg45 -S'NC_000011.9(NM_001352413.1):c.-108-7C>T' -p978 -sg47 -g4 -sg48 -S'NM_001352413.1:c.-108-7C>T' -p979 -sg50 -g4 -sg51 -(dp980 -S'grch38' -p981 -(dp982 -g14 -S'NC_000011.10:g.111865258G>A' -p983 -sg16 -(dp984 -g18 -g57 -sg20 -g21 -sg22 -S'111865258' -p985 -sg24 -g25 -sssS'grch37' -p986 -(dp987 -g14 -S'NC_000011.9:g.111735981G>A' -p988 -sg16 -(dp989 -g18 -g57 -sg20 -g21 -sg22 -S'111735981' -p990 -sg24 -g25 -sssg64 -(dp991 -g14 -S'NC_000011.10:g.111865258G>A' -p992 -sg16 -(dp993 -g18 -g68 -sg20 -g21 -sg22 -S'111865258' -p994 -sg24 -g25 -sssS'hg19' -p995 -(dp996 -g14 -S'NC_000011.9:g.111735981G>A' -p997 -sg16 -(dp998 -g18 -g68 -sg20 -g21 -sg22 -S'111735981' -p999 -sg24 -g25 -ssssg75 -(dp1000 -g77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339342.1' -p1001 -sg79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352413.1' -p1002 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant215.txt b/VariantValidator/testing/testOutputsMasterITS/variant215.txt deleted file mode 100644 index 8941cc1c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant215.txt +++ /dev/null @@ -1,316 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NR_037918.2:n.1184+11736G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -(dp13 -S'grch38' -p14 -(dp15 -S'hgvs_genomic_description' -p16 -S'NT_187658.1:g.69187C>A' -p17 -sS'vcf' -p18 -(dp19 -S'chr' -p20 -S'HSCHR12_3_CTG2' -p21 -sS'ref' -p22 -VC -p23 -sS'pos' -p24 -S'69187' -p25 -sS'alt' -p26 -VA -p27 -sssa(dp28 -S'hg38' -p29 -(dp30 -g16 -S'NT_187658.1:g.69187C>A' -p31 -sg18 -(dp32 -g20 -S'chr12_KI270904v1_alt' -p33 -sg22 -g23 -sg24 -S'69187' -p34 -sg26 -g27 -sssa(dp35 -S'grch37' -p36 -(dp37 -g16 -S'NW_003571047.1:g.69187C>A' -p38 -sg18 -(dp39 -g20 -S'HG1133_PATCH' -p40 -sg22 -g23 -sg24 -S'69187' -p41 -sg26 -g27 -sssa(dp42 -S'hg19' -p43 -(dp44 -g16 -S'NW_003571047.1:g.69187C>A' -p45 -sg18 -(dp46 -g20 -S'NW_003571047.1' -p47 -sg22 -g23 -sg24 -S'69187' -p48 -sg26 -g27 -sssa(dp49 -S'grch37' -p50 -(dp51 -g16 -S'NW_003571050.1:g.69187C>A' -p52 -sg18 -(dp53 -g20 -S'HSCHR12_2_CTG2' -p54 -sg22 -g23 -sg24 -S'69187' -p55 -sg26 -g27 -sssa(dp56 -S'hg19' -p57 -(dp58 -g16 -S'NW_003571050.1:g.69187C>A' -p59 -sg18 -(dp60 -g20 -S'NW_003571050.1' -p61 -sg22 -g23 -sg24 -S'69187' -p62 -sg26 -g27 -sssa(dp63 -S'grch38' -p64 -(dp65 -g16 -S'NW_003571050.1:g.69187C>A' -p66 -sg18 -(dp67 -g20 -g54 -sg22 -g23 -sg24 -S'69187' -p68 -sg26 -g27 -sssa(dp69 -g29 -(dp70 -g16 -S'NW_003571050.1:g.69187C>A' -p71 -sg18 -(dp72 -g20 -S'chr12_GL877876v1_alt' -p73 -sg22 -g23 -sg24 -S'69187' -p74 -sg26 -g27 -sssasS'transcript_description' -p75 -VHomo sapiens PRH1-PRR4 readthrough (PRH1-PRR4), long non-coding RNA -p76 -sS'gene_symbol' -p77 -S'PRH1-PRR4' -p78 -sS'hgvs_predicted_protein_consequence' -p79 -(dp80 -S'tlr' -p81 -S'Non-coding :n.' -p82 -sS'slr' -p83 -g82 -ssS'submitted_variant' -p84 -S'12-11023080-C-A' -p85 -sS'genome_context_intronic_sequence' -p86 -S'NC_000012.11(NR_037918.2):c.1184+11736G>T' -p87 -sS'hgvs_lrg_variant' -p88 -g6 -sS'hgvs_transcript_variant' -p89 -S'NR_037918.2:n.1184+11736G>T' -p90 -sS'hgvs_refseqgene_variant' -p91 -g6 -sS'primary_assembly_loci' -p92 -(dp93 -S'hg19' -p94 -(dp95 -g16 -S'NC_000012.11:g.11023080C>A' -p96 -sg18 -(dp97 -g20 -S'chr12' -p98 -sg22 -g23 -sg24 -S'11023080' -p99 -sg26 -g27 -sssg29 -(dp100 -g16 -S'NC_000012.12:g.10870481C>A' -p101 -sg18 -(dp102 -g20 -g98 -sg22 -g23 -sg24 -S'10870481' -p103 -sg26 -g27 -sssS'grch37' -p104 -(dp105 -g16 -S'NC_000012.11:g.11023080C>A' -p106 -sg18 -(dp107 -g20 -S'12' -p108 -sg22 -g23 -sg24 -S'11023080' -p109 -sg26 -g27 -sssS'grch38' -p110 -(dp111 -g16 -S'NC_000012.12:g.10870481C>A' -p112 -sg18 -(dp113 -g20 -g108 -sg22 -g23 -sg24 -S'10870481' -p114 -sg26 -g27 -ssssS'reference_sequence_records' -p115 -(dp116 -S'transcript' -p117 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_037918.2' -p118 -sssS'metadata' -p119 -(dp120 -S'variantvalidator_hgvs_version' -p121 -S'1.1.3' -p122 -sS'uta_schema' -p123 -S'uta_20180821' -p124 -sS'seqrepo_db' -p125 -S'2018-08-21' -p126 -sS'variantvalidator_version' -p127 -S'v0.2' -p128 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant216.txt b/VariantValidator/testing/testOutputsMasterITS/variant216.txt deleted file mode 100644 index aa954e68..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant216.txt +++ /dev/null @@ -1,485 +0,0 @@ -(dp0 -S'NM_020297.3:c.2199-1302del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens ATP binding cassette subfamily C member 9 (ABCC9), transcript variant SUR2B, mRNA -p13 -sS'gene_symbol' -p14 -S'ABCC9' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_064693.2:p.?' -p19 -sS'slr' -p20 -S'NP_064693.2:p.?' -p21 -ssS'submitted_variant' -p22 -S'12-22018712-TC-T' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000012.11(NM_020297.3):c.2199-1302del' -p25 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_020297.3:c.2199-1302del' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000012.11:g.22018713del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr12' -p39 -sS'ref' -p40 -S'TC' -p41 -sS'pos' -p42 -S'22018712' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000012.12:g.21865779del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'TC' -p50 -sg42 -S'21865778' -p51 -sg44 -g45 -sssS'grch37' -p52 -(dp53 -g34 -S'NC_000012.11:g.22018713del' -p54 -sg36 -(dp55 -g38 -S'12' -p56 -sg40 -S'TC' -p57 -sg42 -S'22018712' -p58 -sg44 -g45 -sssS'grch38' -p59 -(dp60 -g34 -S'NC_000012.12:g.21865779del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'TC' -p63 -sg42 -S'21865778' -p64 -sg44 -g45 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.3' -p70 -sssS'NM_005691.3:c.2199-1302del' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' -p74 -aS'RefSeqGene record not available' -p75 -asg9 -g4 -sg10 -(lp76 -sg12 -VHomo sapiens ATP binding cassette subfamily C member 9 (ABCC9), transcript variant SUR2A, mRNA -p77 -sg14 -S'ABCC9' -p78 -sg16 -(dp79 -g18 -S'NP_005682.2:p.?' -p80 -sg20 -S'NP_005682.2:p.?' -p81 -ssg22 -g23 -sg24 -S'NC_000012.11(NM_005691.3):c.2199-1302del' -p82 -sg26 -g4 -sg27 -S'NM_005691.3:c.2199-1302del' -p83 -sg29 -g4 -sg30 -(dp84 -S'hg19' -p85 -(dp86 -g34 -S'NC_000012.11:g.22018713del' -p87 -sg36 -(dp88 -g38 -g39 -sg40 -S'TC' -p89 -sg42 -S'22018712' -p90 -sg44 -g45 -sssg46 -(dp91 -g34 -S'NC_000012.12:g.21865779del' -p92 -sg36 -(dp93 -g38 -g39 -sg40 -S'TC' -p94 -sg42 -S'21865778' -p95 -sg44 -g45 -sssS'grch37' -p96 -(dp97 -g34 -S'NC_000012.11:g.22018713del' -p98 -sg36 -(dp99 -g38 -g56 -sg40 -S'TC' -p100 -sg42 -S'22018712' -p101 -sg44 -g45 -sssS'grch38' -p102 -(dp103 -g34 -S'NC_000012.12:g.21865779del' -p104 -sg36 -(dp105 -g38 -g56 -sg40 -S'TC' -p106 -sg42 -S'21865778' -p107 -sg44 -g45 -ssssg65 -(dp108 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2' -p109 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.3' -p110 -sssS'NM_020297.2:c.2199-1302del' -p111 -(dp112 -g3 -g4 -sg5 -(lp113 -S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' -p114 -aS'A more recent version of the selected reference sequence NM_020297.2 is available (NM_020297.3)' -p115 -aS'NM_020297.3:c.2199-1302delG MUST be fully validated prior to use in reports' -p116 -aS'select_variants=NM_020297.3:c.2199-1302del' -p117 -aS'RefSeqGene record not available' -p118 -asg9 -g4 -sg10 -(lp119 -sg12 -VHomo sapiens ATP-binding cassette, sub-family C (CFTR/MRP), member 9 (ABCC9), transcript variant SUR2B, mRNA -p120 -sg14 -S'ABCC9' -p121 -sg16 -(dp122 -g18 -S'NP_064693.2:p.?' -p123 -sg20 -S'NP_064693.2:p.?' -p124 -ssg22 -g23 -sg24 -S'NC_000012.11(NM_020297.2):c.2199-1302del' -p125 -sg26 -g4 -sg27 -S'NM_020297.2:c.2199-1302del' -p126 -sg29 -g4 -sg30 -(dp127 -S'hg19' -p128 -(dp129 -g34 -S'NC_000012.11:g.22018713del' -p130 -sg36 -(dp131 -g38 -g39 -sg40 -S'TC' -p132 -sg42 -S'22018712' -p133 -sg44 -g45 -sssS'grch37' -p134 -(dp135 -g34 -S'NC_000012.11:g.22018713del' -p136 -sg36 -(dp137 -g38 -g56 -sg40 -S'TC' -p138 -sg42 -S'22018712' -p139 -sg44 -g45 -ssssg65 -(dp140 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2' -p141 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.2' -p142 -sssS'flag' -p143 -S'gene_variant' -p144 -sS'NM_005691.2:c.2199-1302del' -p145 -(dp146 -g3 -g4 -sg5 -(lp147 -S'NC_000012.11:g.22018712TC>T automapped to NC_000012.11:g.22018713delC' -p148 -aS'A more recent version of the selected reference sequence NM_005691.2 is available (NM_005691.3)' -p149 -aS'NM_005691.3:c.2199-1302delG MUST be fully validated prior to use in reports' -p150 -aS'select_variants=NM_005691.3:c.2199-1302del' -p151 -aS'RefSeqGene record not available' -p152 -asg9 -g4 -sg10 -(lp153 -sg12 -VHomo sapiens ATP-binding cassette, sub-family C (CFTR/MRP), member 9 (ABCC9), transcript variant SUR2A, mRNA -p154 -sg14 -S'ABCC9' -p155 -sg16 -(dp156 -g18 -S'NP_005682.2:p.?' -p157 -sg20 -S'NP_005682.2:p.?' -p158 -ssg22 -g23 -sg24 -S'NC_000012.11(NM_005691.2):c.2199-1302del' -p159 -sg26 -g4 -sg27 -S'NM_005691.2:c.2199-1302del' -p160 -sg29 -g4 -sg30 -(dp161 -S'hg19' -p162 -(dp163 -g34 -S'NC_000012.11:g.22018713del' -p164 -sg36 -(dp165 -g38 -g39 -sg40 -S'TC' -p166 -sg42 -S'22018712' -p167 -sg44 -g45 -sssS'grch37' -p168 -(dp169 -g34 -S'NC_000012.11:g.22018713del' -p170 -sg36 -(dp171 -g38 -g56 -sg40 -S'TC' -p172 -sg42 -S'22018712' -p173 -sg44 -g45 -ssssg65 -(dp174 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2' -p175 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.2' -p176 -sssS'metadata' -p177 -(dp178 -S'variantvalidator_hgvs_version' -p179 -S'1.1.3' -p180 -sS'uta_schema' -p181 -S'uta_20180821' -p182 -sS'seqrepo_db' -p183 -S'2018-08-21' -p184 -sS'variantvalidator_version' -p185 -S'v0.2' -p186 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant217.txt b/VariantValidator/testing/testOutputsMasterITS/variant217.txt deleted file mode 100644 index 97bc3f0a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant217.txt +++ /dev/null @@ -1,172 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000424.3:c.556-2A>G' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens keratin 5 (KRT5), mRNA -p14 -sS'gene_symbol' -p15 -S'KRT5' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000415.2:p.?' -p20 -sS'slr' -p21 -S'NP_000415.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'12-52912946-T-C' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000012.11(NM_000424.3):c.556-2A>G' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000424.3:c.556-2A>G' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000012.11:g.52912946T>C' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr12' -p40 -sS'ref' -p41 -VT -p42 -sS'pos' -p43 -S'52912946' -p44 -sS'alt' -p45 -VC -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000012.12:g.52519162T>C' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'52519162' -p51 -sg45 -g46 -sssS'grch37' -p52 -(dp53 -g35 -S'NC_000012.11:g.52912946T>C' -p54 -sg37 -(dp55 -g39 -S'12' -p56 -sg41 -g42 -sg43 -S'52912946' -p57 -sg45 -g46 -sssS'grch38' -p58 -(dp59 -g35 -S'NC_000012.12:g.52519162T>C' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'52519162' -p62 -sg45 -g46 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000415.2' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000424.3' -p68 -sssS'metadata' -p69 -(dp70 -S'variantvalidator_hgvs_version' -p71 -S'1.1.3' -p72 -sS'uta_schema' -p73 -S'uta_20180821' -p74 -sS'seqrepo_db' -p75 -S'2018-08-21' -p76 -sS'variantvalidator_version' -p77 -S'v0.2' -p78 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant218.txt b/VariantValidator/testing/testOutputsMasterITS/variant218.txt deleted file mode 100644 index 9a542b87..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant218.txt +++ /dev/null @@ -1,424 +0,0 @@ -(dp0 -S'NM_000277.2:c.1200del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000012.11:g.103234292TC>T automapped to NC_000012.11:g.103234294delC' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'PAH' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000268.1:p.(Asn401ThrfsTer51)' -p19 -sS'slr' -p20 -S'NP_000268.1:p.(N401Tfs*51)' -p21 -ssS'submitted_variant' -p22 -S'12-103234292-TC-T' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_000277.2:c.1200del' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000012.11:g.103234294del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr12' -p38 -sS'ref' -p39 -S'TC' -p40 -sS'pos' -p41 -S'103234292' -p42 -sS'alt' -p43 -S'T' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000012.12:g.102840516del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TC' -p49 -sg41 -S'102840514' -p50 -sg43 -g44 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000012.11:g.103234294del' -p53 -sg35 -(dp54 -g37 -S'12' -p55 -sg39 -S'TC' -p56 -sg41 -S'103234292' -p57 -sg43 -g44 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000012.12:g.102840516del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'TC' -p62 -sg41 -S'102840514' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2' -p69 -sssS'NM_001354304.1:c.1200del' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000012.11:g.103234292TC>T automapped to NC_000012.11:g.103234294delC' -p73 -aS'RefSeqGene record not available' -p74 -asg9 -g4 -sg10 -(lp75 -sg12 -VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 2, mRNA -p76 -sg14 -S'PAH' -p77 -sg16 -(dp78 -g18 -S'NP_001341233.1:p.(Asn401ThrfsTer51)' -p79 -sg20 -S'NP_001341233.1:p.(N401Tfs*51)' -p80 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001354304.1:c.1200del' -p81 -sg28 -g4 -sg29 -(dp82 -S'hg19' -p83 -(dp84 -g33 -S'NC_000012.11:g.103234294del' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -S'TC' -p87 -sg41 -S'103234292' -p88 -sg43 -g44 -sssg45 -(dp89 -g33 -S'NC_000012.12:g.102840516del' -p90 -sg35 -(dp91 -g37 -g38 -sg39 -S'TC' -p92 -sg41 -S'102840514' -p93 -sg43 -g44 -sssS'grch37' -p94 -(dp95 -g33 -S'NC_000012.11:g.103234294del' -p96 -sg35 -(dp97 -g37 -g55 -sg39 -S'TC' -p98 -sg41 -S'103234292' -p99 -sg43 -g44 -sssS'grch38' -p100 -(dp101 -g33 -S'NC_000012.12:g.102840516del' -p102 -sg35 -(dp103 -g37 -g55 -sg39 -S'TC' -p104 -sg41 -S'102840514' -p105 -sg43 -g44 -ssssg64 -(dp106 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1' -p107 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1' -p108 -sssS'flag' -p109 -S'gene_variant' -p110 -sS'metadata' -p111 -(dp112 -S'variantvalidator_hgvs_version' -p113 -S'1.1.3' -p114 -sS'uta_schema' -p115 -S'uta_20180821' -p116 -sS'seqrepo_db' -p117 -S'2018-08-21' -p118 -sS'variantvalidator_version' -p119 -S'v0.2' -p120 -ssS'NM_000277.1:c.1200del' -p121 -(dp122 -g3 -g4 -sg5 -(lp123 -S'NC_000012.11:g.103234292TC>T automapped to NC_000012.11:g.103234294delC' -p124 -aS'A more recent version of the selected reference sequence NM_000277.1 is available (NM_000277.2)' -p125 -aS'NM_000277.2:c.1200delG MUST be fully validated prior to use in reports' -p126 -aS'select_variants=NM_000277.2:c.1200del' -p127 -aS'RefSeqGene record not available' -p128 -asg9 -g4 -sg10 -(lp129 -sg12 -VHomo sapiens phenylalanine hydroxylase (PAH), mRNA -p130 -sg14 -S'PAH' -p131 -sg16 -(dp132 -g18 -S'NP_000268.1:p.(Asn401ThrfsTer51)' -p133 -sg20 -S'NP_000268.1:p.(N401Tfs*51)' -p134 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_000277.1:c.1200del' -p135 -sg28 -g4 -sg29 -(dp136 -S'hg19' -p137 -(dp138 -g33 -S'NC_000012.11:g.103234294del' -p139 -sg35 -(dp140 -g37 -g38 -sg39 -S'TC' -p141 -sg41 -S'103234292' -p142 -sg43 -g44 -sssg45 -(dp143 -g33 -S'NC_000012.12:g.102840516del' -p144 -sg35 -(dp145 -g37 -g38 -sg39 -S'TC' -p146 -sg41 -S'102840514' -p147 -sg43 -g44 -sssS'grch37' -p148 -(dp149 -g33 -S'NC_000012.11:g.103234294del' -p150 -sg35 -(dp151 -g37 -g55 -sg39 -S'TC' -p152 -sg41 -S'103234292' -p153 -sg43 -g44 -sssS'grch38' -p154 -(dp155 -g33 -S'NC_000012.12:g.102840516del' -p156 -sg35 -(dp157 -g37 -g55 -sg39 -S'TC' -p158 -sg41 -S'102840514' -p159 -sg43 -g44 -ssssg64 -(dp160 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1' -p161 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.1' -p162 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant219.txt b/VariantValidator/testing/testOutputsMasterITS/variant219.txt deleted file mode 100644 index 932fee13..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant219.txt +++ /dev/null @@ -1,408 +0,0 @@ -(dp0 -S'NM_001354304.1:c.-95-121A>G' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'PAH' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001341233.1:p.?' -p18 -sS'slr' -p19 -S'NP_001341233.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'12-103311124-T-C' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000012.11(NM_001354304.1):c.-95-121A>G' -p24 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001354304.1:c.-95-121A>G' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000012.11:g.103311124T>C' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr12' -p38 -sS'ref' -p39 -VT -p40 -sS'pos' -p41 -S'103311124' -p42 -sS'alt' -p43 -VC -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000012.12:g.102917346T>C' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'102917346' -p49 -sg43 -g44 -sssS'grch37' -p50 -(dp51 -g33 -S'NC_000012.11:g.103311124T>C' -p52 -sg35 -(dp53 -g37 -S'12' -p54 -sg39 -g40 -sg41 -S'103311124' -p55 -sg43 -g44 -sssS'grch38' -p56 -(dp57 -g33 -S'NC_000012.12:g.102917346T>C' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'102917346' -p60 -sg43 -g44 -ssssS'reference_sequence_records' -p61 -(dp62 -S'protein' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1' -p64 -sS'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1' -p66 -sssS'flag' -p67 -S'gene_variant' -p68 -sS'NM_000277.2:c.-216A>G' -p69 -(dp70 -g3 -g4 -sg5 -(lp71 -S'RefSeqGene record not available' -p72 -asg8 -g4 -sg9 -(lp73 -sg11 -VHomo sapiens phenylalanine hydroxylase (PAH), transcript variant 1, mRNA -p74 -sg13 -S'PAH' -p75 -sg15 -(dp76 -g17 -S'NP_000268.1:p.?' -p77 -sg19 -S'NP_000268.1:p.?' -p78 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_000277.2:c.-216A>G' -p79 -sg28 -g4 -sg29 -(dp80 -S'hg19' -p81 -(dp82 -g33 -S'NC_000012.11:g.103311124T>C' -p83 -sg35 -(dp84 -g37 -g38 -sg39 -g40 -sg41 -S'103311124' -p85 -sg43 -g44 -sssg45 -(dp86 -g33 -S'NC_000012.12:g.102917346T>C' -p87 -sg35 -(dp88 -g37 -g38 -sg39 -g40 -sg41 -S'102917346' -p89 -sg43 -g44 -sssS'grch37' -p90 -(dp91 -g33 -S'NC_000012.11:g.103311124T>C' -p92 -sg35 -(dp93 -g37 -g54 -sg39 -g40 -sg41 -S'103311124' -p94 -sg43 -g44 -sssS'grch38' -p95 -(dp96 -g33 -S'NC_000012.12:g.102917346T>C' -p97 -sg35 -(dp98 -g37 -g54 -sg39 -g40 -sg41 -S'102917346' -p99 -sg43 -g44 -ssssg61 -(dp100 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1' -p101 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2' -p102 -sssS'NM_000277.1:c.-215A>G' -p103 -(dp104 -g3 -g4 -sg5 -(lp105 -S'A more recent version of the selected reference sequence NM_000277.1 is available (NM_000277.2)' -p106 -aS'NM_000277.2:c.-215C>G MUST be fully validated prior to use in reports' -p107 -aS'select_variants=NM_000277.2:c.-215C>G' -p108 -aS'RefSeqGene record not available' -p109 -asg8 -g4 -sg9 -(lp110 -sg11 -VHomo sapiens phenylalanine hydroxylase (PAH), mRNA -p111 -sg13 -S'PAH' -p112 -sg15 -(dp113 -g17 -S'NP_000268.1:p.?' -p114 -sg19 -S'NP_000268.1:p.?' -p115 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_000277.1:c.-215A>G' -p116 -sg28 -g4 -sg29 -(dp117 -S'hg19' -p118 -(dp119 -g33 -S'NC_000012.11:g.103311124T>C' -p120 -sg35 -(dp121 -g37 -g38 -sg39 -g40 -sg41 -S'103311124' -p122 -sg43 -g44 -sssg45 -(dp123 -g33 -S'NC_000012.12:g.102917346T>C' -p124 -sg35 -(dp125 -g37 -g38 -sg39 -g40 -sg41 -S'102917346' -p126 -sg43 -g44 -sssS'grch37' -p127 -(dp128 -g33 -S'NC_000012.11:g.103311124T>C' -p129 -sg35 -(dp130 -g37 -g54 -sg39 -g40 -sg41 -S'103311124' -p131 -sg43 -g44 -sssS'grch38' -p132 -(dp133 -g33 -S'NC_000012.12:g.102917346T>C' -p134 -sg35 -(dp135 -g37 -g54 -sg39 -g40 -sg41 -S'102917346' -p136 -sg43 -g44 -ssssg61 -(dp137 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1' -p138 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.1' -p139 -sssS'metadata' -p140 -(dp141 -S'variantvalidator_hgvs_version' -p142 -S'1.1.3' -p143 -sS'uta_schema' -p144 -S'uta_20180821' -p145 -sS'seqrepo_db' -p146 -S'2018-08-21' -p147 -sS'variantvalidator_version' -p148 -S'v0.2' -p149 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant22.txt b/VariantValidator/testing/testOutputsMasterITS/variant22.txt deleted file mode 100644 index b100322f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant22.txt +++ /dev/null @@ -1,82 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' -p7 -aS'Instead use NC_000011.9:g.5244828A=' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'hgvs_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_000518.4:c.*2000C>T' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'hgvs_lrg_variant' -p21 -g4 -sS'hgvs_transcript_variant' -p22 -g4 -sS'hgvs_refseqgene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -sS'reference_sequence_records' -p26 -g4 -ssS'flag' -p27 -S'warning' -p28 -sS'metadata' -p29 -(dp30 -S'variantvalidator_hgvs_version' -p31 -S'1.1.3' -p32 -sS'uta_schema' -p33 -S'uta_20180821' -p34 -sS'seqrepo_db' -p35 -S'2018-08-21' -p36 -sS'variantvalidator_version' -p37 -S'v0.2' -p38 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant220.txt b/VariantValidator/testing/testOutputsMasterITS/variant220.txt deleted file mode 100644 index b7154ffc..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant220.txt +++ /dev/null @@ -1,1185 +0,0 @@ -(dp0 -S'NM_001319681.1:c.-366-1G>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 7, mRNA -p12 -sS'gene_symbol' -p13 -S'TCTN1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001306610.1:p.?' -p18 -sS'slr' -p19 -S'NP_001306610.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'12-111064166-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000012.11(NM_001319681.1):c.-366-1G>A' -p24 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001319681.1:c.-366-1G>A' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000012.11:g.111064166G>A' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr12' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'111064166' -p42 -sS'alt' -p43 -S'A' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000012.12:g.110626361G>A' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p49 -sg43 -g44 -sssS'grch37' -p50 -(dp51 -g33 -S'NC_000012.11:g.111064166G>A' -p52 -sg35 -(dp53 -g37 -S'12' -p54 -sg39 -g40 -sg41 -S'111064166' -p55 -sg43 -g44 -sssS'grch38' -p56 -(dp57 -g33 -S'NC_000012.12:g.110626361G>A' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p60 -sg43 -g44 -ssssS'reference_sequence_records' -p61 -(dp62 -S'protein' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306610.1' -p64 -sS'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319681.1' -p66 -sssS'NM_001319680.1:c.342-1G>A' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'RefSeqGene record not available' -p70 -asg8 -g4 -sg9 -(lp71 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 6, mRNA -p72 -sg13 -S'TCTN1' -p73 -sg15 -(dp74 -g17 -S'NP_001306609.1:p.?' -p75 -sg19 -S'NP_001306609.1:p.?' -p76 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001319680.1):c.342-1G>A' -p77 -sg25 -g4 -sg26 -S'NM_001319680.1:c.342-1G>A' -p78 -sg28 -g4 -sg29 -(dp79 -S'hg19' -p80 -(dp81 -g33 -S'NC_000012.11:g.111064166G>A' -p82 -sg35 -(dp83 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p84 -sg43 -g44 -sssg45 -(dp85 -g33 -S'NC_000012.12:g.110626361G>A' -p86 -sg35 -(dp87 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p88 -sg43 -g44 -sssS'grch37' -p89 -(dp90 -g33 -S'NC_000012.11:g.111064166G>A' -p91 -sg35 -(dp92 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p93 -sg43 -g44 -sssS'grch38' -p94 -(dp95 -g33 -S'NC_000012.12:g.110626361G>A' -p96 -sg35 -(dp97 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p98 -sg43 -g44 -ssssg61 -(dp99 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306609.1' -p100 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319680.1' -p101 -sssS'NM_001082538.2:c.342-1G>A' -p102 -(dp103 -g3 -g4 -sg5 -(lp104 -S'RefSeqGene record not available' -p105 -asg8 -g4 -sg9 -(lp106 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 1, mRNA -p107 -sg13 -S'TCTN1' -p108 -sg15 -(dp109 -g17 -S'NP_001076007.1:p.?' -p110 -sg19 -S'NP_001076007.1:p.?' -p111 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001082538.2):c.342-1G>A' -p112 -sg25 -g4 -sg26 -S'NM_001082538.2:c.342-1G>A' -p113 -sg28 -g4 -sg29 -(dp114 -S'hg19' -p115 -(dp116 -g33 -S'NC_000012.11:g.111064166G>A' -p117 -sg35 -(dp118 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p119 -sg43 -g44 -sssg45 -(dp120 -g33 -S'NC_000012.12:g.110626361G>A' -p121 -sg35 -(dp122 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p123 -sg43 -g44 -sssS'grch37' -p124 -(dp125 -g33 -S'NC_000012.11:g.111064166G>A' -p126 -sg35 -(dp127 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p128 -sg43 -g44 -sssS'grch38' -p129 -(dp130 -g33 -S'NC_000012.12:g.110626361G>A' -p131 -sg35 -(dp132 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p133 -sg43 -g44 -ssssg61 -(dp134 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076007.1' -p135 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082538.2' -p136 -sssS'metadata' -p137 -(dp138 -S'variantvalidator_hgvs_version' -p139 -S'1.1.3' -p140 -sS'uta_schema' -p141 -S'uta_20180821' -p142 -sS'seqrepo_db' -p143 -S'2018-08-21' -p144 -sS'variantvalidator_version' -p145 -S'v0.2' -p146 -ssS'NM_001173976.1:c.162-1G>A' -p147 -(dp148 -g3 -g4 -sg5 -(lp149 -S'RefSeqGene record not available' -p150 -asg8 -g4 -sg9 -(lp151 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 5, mRNA -p152 -sg13 -S'TCTN1' -p153 -sg15 -(dp154 -g17 -S'NP_001167447.1:p.?' -p155 -sg19 -S'NP_001167447.1:p.?' -p156 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001173976.1):c.162-1G>A' -p157 -sg25 -g4 -sg26 -S'NM_001173976.1:c.162-1G>A' -p158 -sg28 -g4 -sg29 -(dp159 -S'hg19' -p160 -(dp161 -g33 -S'NC_000012.11:g.111064166G>A' -p162 -sg35 -(dp163 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p164 -sg43 -g44 -sssg45 -(dp165 -g33 -S'NC_000012.12:g.110626361G>A' -p166 -sg35 -(dp167 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p168 -sg43 -g44 -sssS'grch37' -p169 -(dp170 -g33 -S'NC_000012.11:g.111064166G>A' -p171 -sg35 -(dp172 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p173 -sg43 -g44 -sssS'grch38' -p174 -(dp175 -g33 -S'NC_000012.12:g.110626361G>A' -p176 -sg35 -(dp177 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p178 -sg43 -g44 -ssssg61 -(dp179 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167447.1' -p180 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173976.1' -p181 -sssS'flag' -p182 -S'gene_variant' -p183 -sS'NM_001082537.2:c.342-1G>A' -p184 -(dp185 -g3 -g4 -sg5 -(lp186 -S'RefSeqGene record not available' -p187 -asg8 -g4 -sg9 -(lp188 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 2, mRNA -p189 -sg13 -S'TCTN1' -p190 -sg15 -(dp191 -g17 -S'NP_001076006.1:p.?' -p192 -sg19 -S'NP_001076006.1:p.?' -p193 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001082537.2):c.342-1G>A' -p194 -sg25 -g4 -sg26 -S'NM_001082537.2:c.342-1G>A' -p195 -sg28 -g4 -sg29 -(dp196 -S'hg19' -p197 -(dp198 -g33 -S'NC_000012.11:g.111064166G>A' -p199 -sg35 -(dp200 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p201 -sg43 -g44 -sssg45 -(dp202 -g33 -S'NC_000012.12:g.110626361G>A' -p203 -sg35 -(dp204 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p205 -sg43 -g44 -sssS'grch37' -p206 -(dp207 -g33 -S'NC_000012.11:g.111064166G>A' -p208 -sg35 -(dp209 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p210 -sg43 -g44 -sssS'grch38' -p211 -(dp212 -g33 -S'NC_000012.12:g.110626361G>A' -p213 -sg35 -(dp214 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p215 -sg43 -g44 -ssssg61 -(dp216 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076006.1' -p217 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082537.2' -p218 -sssS'NR_135088.1:n.559-1G>A' -p219 -(dp220 -g3 -g4 -sg5 -(lp221 -S'RefSeqGene record not available' -p222 -asg8 -g4 -sg9 -(lp223 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 9, non-coding RNA -p224 -sg13 -S'TCTN1' -p225 -sg15 -(dp226 -g17 -S'Non-coding :n.' -p227 -sg19 -g227 -ssg21 -g22 -sg23 -S'NC_000012.11(NR_135088.1):c.559-1G>A' -p228 -sg25 -g4 -sg26 -S'NR_135088.1:n.559-1G>A' -p229 -sg28 -g4 -sg29 -(dp230 -S'hg19' -p231 -(dp232 -g33 -S'NC_000012.11:g.111064166G>A' -p233 -sg35 -(dp234 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p235 -sg43 -g44 -sssg45 -(dp236 -g33 -S'NC_000012.12:g.110626361G>A' -p237 -sg35 -(dp238 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p239 -sg43 -g44 -sssS'grch37' -p240 -(dp241 -g33 -S'NC_000012.11:g.111064166G>A' -p242 -sg35 -(dp243 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p244 -sg43 -g44 -sssS'grch38' -p245 -(dp246 -g33 -S'NC_000012.12:g.110626361G>A' -p247 -sg35 -(dp248 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p249 -sg43 -g44 -ssssg61 -(dp250 -g65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_135088.1' -p251 -sssS'NM_024549.5:c.342-1G>A' -p252 -(dp253 -g3 -g4 -sg5 -(lp254 -S'RefSeqGene record not available' -p255 -asg8 -g4 -sg9 -(lp256 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 3, mRNA -p257 -sg13 -S'TCTN1' -p258 -sg15 -(dp259 -g17 -S'NP_078825.2:p.?' -p260 -sg19 -S'NP_078825.2:p.?' -p261 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_024549.5):c.342-1G>A' -p262 -sg25 -g4 -sg26 -S'NM_024549.5:c.342-1G>A' -p263 -sg28 -g4 -sg29 -(dp264 -S'hg19' -p265 -(dp266 -g33 -S'NC_000012.11:g.111064166G>A' -p267 -sg35 -(dp268 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p269 -sg43 -g44 -sssg45 -(dp270 -g33 -S'NC_000012.12:g.110626361G>A' -p271 -sg35 -(dp272 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p273 -sg43 -g44 -sssS'grch37' -p274 -(dp275 -g33 -S'NC_000012.11:g.111064166G>A' -p276 -sg35 -(dp277 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p278 -sg43 -g44 -sssS'grch38' -p279 -(dp280 -g33 -S'NC_000012.12:g.110626361G>A' -p281 -sg35 -(dp282 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p283 -sg43 -g44 -ssssg61 -(dp284 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_078825.2' -p285 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024549.5' -p286 -sssS'NM_001319682.1:c.174-1G>A' -p287 -(dp288 -g3 -g4 -sg5 -(lp289 -S'RefSeqGene record not available' -p290 -asg8 -g4 -sg9 -(lp291 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 8, mRNA -p292 -sg13 -S'TCTN1' -p293 -sg15 -(dp294 -g17 -S'NP_001306611.1:p.?' -p295 -sg19 -S'NP_001306611.1:p.?' -p296 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001319682.1):c.174-1G>A' -p297 -sg25 -g4 -sg26 -S'NM_001319682.1:c.174-1G>A' -p298 -sg28 -g4 -sg29 -(dp299 -S'hg19' -p300 -(dp301 -g33 -S'NC_000012.11:g.111064166G>A' -p302 -sg35 -(dp303 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p304 -sg43 -g44 -sssg45 -(dp305 -g33 -S'NC_000012.12:g.110626361G>A' -p306 -sg35 -(dp307 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p308 -sg43 -g44 -sssS'grch37' -p309 -(dp310 -g33 -S'NC_000012.11:g.111064166G>A' -p311 -sg35 -(dp312 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p313 -sg43 -g44 -sssS'grch38' -p314 -(dp315 -g33 -S'NC_000012.12:g.110626361G>A' -p316 -sg35 -(dp317 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p318 -sg43 -g44 -ssssg61 -(dp319 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306611.1' -p320 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319682.1' -p321 -sssS'NM_001173975.1:c.174-1G>A' -p322 -(dp323 -g3 -g4 -sg5 -(lp324 -S'A more recent version of the selected reference sequence NM_001173975.1 is available (NM_001173975.2)' -p325 -aS'NM_001173975.2:c.174-1G>A MUST be fully validated prior to use in reports' -p326 -aS'select_variants=NM_001173975.2:c.174-1G>A' -p327 -aS'RefSeqGene record not available' -p328 -asg8 -g4 -sg9 -(lp329 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 4, mRNA -p330 -sg13 -S'TCTN1' -p331 -sg15 -(dp332 -g17 -S'NP_001167446.1:p.?' -p333 -sg19 -S'NP_001167446.1:p.?' -p334 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001173975.1):c.174-1G>A' -p335 -sg25 -g4 -sg26 -S'NM_001173975.1:c.174-1G>A' -p336 -sg28 -g4 -sg29 -(dp337 -S'hg19' -p338 -(dp339 -g33 -S'NC_000012.11:g.111064166G>A' -p340 -sg35 -(dp341 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p342 -sg43 -g44 -sssS'grch37' -p343 -(dp344 -g33 -S'NC_000012.11:g.111064166G>A' -p345 -sg35 -(dp346 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p347 -sg43 -g44 -ssssg61 -(dp348 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1' -p349 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.1' -p350 -sssS'NM_001173975.2:c.174-1G>A' -p351 -(dp352 -g3 -g4 -sg5 -(lp353 -S'RefSeqGene record not available' -p354 -asg8 -g4 -sg9 -(lp355 -sg11 -VHomo sapiens tectonic family member 1 (TCTN1), transcript variant 4, mRNA -p356 -sg13 -S'TCTN1' -p357 -sg15 -(dp358 -g17 -S'NP_001167446.1:p.?' -p359 -sg19 -S'NP_001167446.1:p.?' -p360 -ssg21 -g22 -sg23 -S'NC_000012.11(NM_001173975.2):c.174-1G>A' -p361 -sg25 -g4 -sg26 -S'NM_001173975.2:c.174-1G>A' -p362 -sg28 -g4 -sg29 -(dp363 -S'hg19' -p364 -(dp365 -g33 -S'NC_000012.11:g.111064166G>A' -p366 -sg35 -(dp367 -g37 -g38 -sg39 -g40 -sg41 -S'111064166' -p368 -sg43 -g44 -sssg45 -(dp369 -g33 -S'NC_000012.12:g.110626361G>A' -p370 -sg35 -(dp371 -g37 -g38 -sg39 -g40 -sg41 -S'110626361' -p372 -sg43 -g44 -sssS'grch37' -p373 -(dp374 -g33 -S'NC_000012.11:g.111064166G>A' -p375 -sg35 -(dp376 -g37 -g54 -sg39 -g40 -sg41 -S'111064166' -p377 -sg43 -g44 -sssS'grch38' -p378 -(dp379 -g33 -S'NC_000012.12:g.110626361G>A' -p380 -sg35 -(dp381 -g37 -g54 -sg39 -g40 -sg41 -S'110626361' -p382 -sg43 -g44 -ssssg61 -(dp383 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1' -p384 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.2' -p385 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant221.txt b/VariantValidator/testing/testOutputsMasterITS/variant221.txt deleted file mode 100644 index c712e68d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant221.txt +++ /dev/null @@ -1,418 +0,0 @@ -(dp0 -S'NM_001194995.1:c.210del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000012.11:g.123738430CA>C automapped to NC_000012.11:g.123738431delA' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 3, mRNA -p13 -sS'gene_symbol' -p14 -S'C12orf65' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001181924.1:p.(Gly72AlafsTer13)' -p19 -sS'slr' -p20 -S'NP_001181924.1:p.(G72Afs*13)' -p21 -ssS'submitted_variant' -p22 -S'12-123738430-CA-C' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001194995.1:c.210del' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000012.11:g.123738431del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr12' -p38 -sS'ref' -p39 -S'CA' -p40 -sS'pos' -p41 -S'123738430' -p42 -sS'alt' -p43 -S'C' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000012.12:g.123253884del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'CA' -p49 -sg41 -S'123253883' -p50 -sg43 -g44 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000012.11:g.123738431del' -p53 -sg35 -(dp54 -g37 -S'12' -p55 -sg39 -S'CA' -p56 -sg41 -S'123738430' -p57 -sg43 -g44 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000012.12:g.123253884del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'CA' -p62 -sg41 -S'123253883' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181924.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194995.1' -p69 -sssS'flag' -p70 -S'gene_variant' -p71 -sS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ssS'NM_152269.4:c.210del' -p82 -(dp83 -g3 -g4 -sg5 -(lp84 -S'NC_000012.11:g.123738430CA>C automapped to NC_000012.11:g.123738431delA' -p85 -aS'RefSeqGene record not available' -p86 -asg9 -g4 -sg10 -(lp87 -sg12 -VHomo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 1, mRNA -p88 -sg14 -S'C12orf65' -p89 -sg16 -(dp90 -g18 -S'NP_689482.1:p.(Gly72AlafsTer13)' -p91 -sg20 -S'NP_689482.1:p.(G72Afs*13)' -p92 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_152269.4:c.210del' -p93 -sg28 -g4 -sg29 -(dp94 -S'hg19' -p95 -(dp96 -g33 -S'NC_000012.11:g.123738431del' -p97 -sg35 -(dp98 -g37 -g38 -sg39 -S'CA' -p99 -sg41 -S'123738430' -p100 -sg43 -g44 -sssg45 -(dp101 -g33 -S'NC_000012.12:g.123253884del' -p102 -sg35 -(dp103 -g37 -g38 -sg39 -S'CA' -p104 -sg41 -S'123253883' -p105 -sg43 -g44 -sssS'grch37' -p106 -(dp107 -g33 -S'NC_000012.11:g.123738431del' -p108 -sg35 -(dp109 -g37 -g55 -sg39 -S'CA' -p110 -sg41 -S'123738430' -p111 -sg43 -g44 -sssS'grch38' -p112 -(dp113 -g33 -S'NC_000012.12:g.123253884del' -p114 -sg35 -(dp115 -g37 -g55 -sg39 -S'CA' -p116 -sg41 -S'123253883' -p117 -sg43 -g44 -ssssg64 -(dp118 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_689482.1' -p119 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_152269.4' -p120 -sssS'NM_001143905.2:c.210del' -p121 -(dp122 -g3 -g4 -sg5 -(lp123 -S'NC_000012.11:g.123738430CA>C automapped to NC_000012.11:g.123738431delA' -p124 -aS'RefSeqGene record not available' -p125 -asg9 -g4 -sg10 -(lp126 -sg12 -VHomo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 2, mRNA -p127 -sg14 -S'C12orf65' -p128 -sg16 -(dp129 -g18 -S'NP_001137377.1:p.(Gly72AlafsTer13)' -p130 -sg20 -S'NP_001137377.1:p.(G72Afs*13)' -p131 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001143905.2:c.210del' -p132 -sg28 -g4 -sg29 -(dp133 -S'hg19' -p134 -(dp135 -g33 -S'NC_000012.11:g.123738431del' -p136 -sg35 -(dp137 -g37 -g38 -sg39 -S'CA' -p138 -sg41 -S'123738430' -p139 -sg43 -g44 -sssg45 -(dp140 -g33 -S'NC_000012.12:g.123253884del' -p141 -sg35 -(dp142 -g37 -g38 -sg39 -S'CA' -p143 -sg41 -S'123253883' -p144 -sg43 -g44 -sssS'grch37' -p145 -(dp146 -g33 -S'NC_000012.11:g.123738431del' -p147 -sg35 -(dp148 -g37 -g55 -sg39 -S'CA' -p149 -sg41 -S'123738430' -p150 -sg43 -g44 -sssS'grch38' -p151 -(dp152 -g33 -S'NC_000012.12:g.123253884del' -p153 -sg35 -(dp154 -g37 -g55 -sg39 -S'CA' -p155 -sg41 -S'123253883' -p156 -sg43 -g44 -ssssg64 -(dp157 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001137377.1' -p158 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001143905.2' -p159 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant222.txt b/VariantValidator/testing/testOutputsMasterITS/variant222.txt deleted file mode 100644 index f7b7223b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant222.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_194318.3:c.71-5del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000013.10:g.31789169CT>C automapped to NC_000013.10:g.31789183delT' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens beta 3-glucosyltransferase (B3GLCT), mRNA -p15 -sS'gene_symbol' -p16 -S'B3GLCT' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_919299.3:p.?' -p21 -sS'slr' -p22 -S'NP_919299.3:p.?' -p23 -ssS'submitted_variant' -p24 -S'13-31789169-CT-C' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000013.10(NM_194318.3):c.71-5del' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_194318.3:c.71-5del' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000013.10:g.31789183del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr13' -p41 -sS'ref' -p42 -S'CT' -p43 -sS'pos' -p44 -S'31789169' -p45 -sS'alt' -p46 -S'C' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000013.11:g.31215046del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'CT' -p52 -sg44 -S'31215032' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000013.10:g.31789183del' -p56 -sg38 -(dp57 -g40 -S'13' -p58 -sg42 -S'CT' -p59 -sg44 -S'31789169' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000013.11:g.31215046del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'CT' -p65 -sg44 -S'31215032' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_919299.3' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_194318.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant223.txt b/VariantValidator/testing/testOutputsMasterITS/variant223.txt deleted file mode 100644 index 451a92b5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant223.txt +++ /dev/null @@ -1,515 +0,0 @@ -(dp0 -S'NR_144368.1:n.214-3552C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens uncharacterized LOC105370526 (LOC105370526), long non-coding RNA -p12 -sS'gene_symbol' -p13 -S'LOC105370526' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'Non-coding :n.' -p18 -sS'slr' -p19 -g18 -ssS'submitted_variant' -p20 -S'14-62187287-G-A' -p21 -sS'genome_context_intronic_sequence' -p22 -S'NC_000014.8(NR_144368.1):c.214-3552C>T' -p23 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NR_144368.1:n.214-3552C>T' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000014.8:g.62187287G>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr14' -p37 -sS'ref' -p38 -VG -p39 -sS'pos' -p40 -S'62187287' -p41 -sS'alt' -p42 -VA -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000014.9:g.61720569G>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'61720569' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000014.8:g.62187287G>A' -p51 -sg34 -(dp52 -g36 -S'14' -p53 -sg38 -g39 -sg40 -S'62187287' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000014.9:g.61720569G>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'61720569' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'transcript' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_144368.1' -p63 -sssS'NM_181054.2:c.223G>A' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'RefSeqGene record not available' -p67 -asg8 -g4 -sg9 -(lp68 -sg11 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 2, mRNA -p69 -sg13 -S'HIF1A' -p70 -sg15 -(dp71 -g17 -S'NP_851397.1:p.(Ala75Thr)' -p72 -sg19 -S'NP_851397.1:p.(A75T)' -p73 -ssg20 -g21 -sg22 -g4 -sg24 -g4 -sg25 -S'NM_181054.2:c.223G>A' -p74 -sg27 -g4 -sg28 -(dp75 -S'hg19' -p76 -(dp77 -g32 -S'NC_000014.8:g.62187287G>A' -p78 -sg34 -(dp79 -g36 -g37 -sg38 -S'G' -p80 -sg40 -S'62187287' -p81 -sg42 -S'A' -p82 -sssg44 -(dp83 -g32 -S'NC_000014.9:g.61720569G>A' -p84 -sg34 -(dp85 -g36 -g37 -sg38 -g80 -sg40 -S'61720569' -p86 -sg42 -g82 -sssS'grch37' -p87 -(dp88 -g32 -S'NC_000014.8:g.62187287G>A' -p89 -sg34 -(dp90 -g36 -g53 -sg38 -g80 -sg40 -S'62187287' -p91 -sg42 -g82 -sssS'grch38' -p92 -(dp93 -g32 -S'NC_000014.9:g.61720569G>A' -p94 -sg34 -(dp95 -g36 -g53 -sg38 -g80 -sg40 -S'61720569' -p96 -sg42 -g82 -ssssg60 -(dp97 -S'protein' -p98 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_851397.1' -p99 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181054.2' -p100 -sssS'flag' -p101 -S'gene_variant' -p102 -sS'NM_001243084.1:c.295G>A' -p103 -(dp104 -g3 -g4 -sg5 -(lp105 -S'RefSeqGene record not available' -p106 -asg8 -g4 -sg9 -(lp107 -sg11 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 3, mRNA -p108 -sg13 -S'HIF1A' -p109 -sg15 -(dp110 -g17 -S'NP_001230013.1:p.(Ala99Thr)' -p111 -sg19 -S'NP_001230013.1:p.(A99T)' -p112 -ssg20 -g21 -sg22 -g4 -sg24 -g4 -sg25 -S'NM_001243084.1:c.295G>A' -p113 -sg27 -g4 -sg28 -(dp114 -S'hg19' -p115 -(dp116 -g32 -S'NC_000014.8:g.62187287G>A' -p117 -sg34 -(dp118 -g36 -g37 -sg38 -g80 -sg40 -S'62187287' -p119 -sg42 -g82 -sssg44 -(dp120 -g32 -S'NC_000014.9:g.61720569G>A' -p121 -sg34 -(dp122 -g36 -g37 -sg38 -g80 -sg40 -S'61720569' -p123 -sg42 -g82 -sssS'grch37' -p124 -(dp125 -g32 -S'NC_000014.8:g.62187287G>A' -p126 -sg34 -(dp127 -g36 -g53 -sg38 -g80 -sg40 -S'62187287' -p128 -sg42 -g82 -sssS'grch38' -p129 -(dp130 -g32 -S'NC_000014.9:g.61720569G>A' -p131 -sg34 -(dp132 -g36 -g53 -sg38 -g80 -sg40 -S'61720569' -p133 -sg42 -g82 -ssssg60 -(dp134 -g98 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1' -p135 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1' -p136 -sssS'NM_001530.3:c.223G>A' -p137 -(dp138 -g3 -g4 -sg5 -(lp139 -S'RefSeqGene record not available' -p140 -asg8 -g4 -sg9 -(lp141 -sg11 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 1, mRNA -p142 -sg13 -S'HIF1A' -p143 -sg15 -(dp144 -g17 -S'NP_001521.1:p.(Ala75Thr)' -p145 -sg19 -S'NP_001521.1:p.(A75T)' -p146 -ssg20 -g21 -sg22 -g4 -sg24 -g4 -sg25 -S'NM_001530.3:c.223G>A' -p147 -sg27 -g4 -sg28 -(dp148 -S'hg19' -p149 -(dp150 -g32 -S'NC_000014.8:g.62187287G>A' -p151 -sg34 -(dp152 -g36 -g37 -sg38 -g80 -sg40 -S'62187287' -p153 -sg42 -g82 -sssg44 -(dp154 -g32 -S'NC_000014.9:g.61720569G>A' -p155 -sg34 -(dp156 -g36 -g37 -sg38 -g80 -sg40 -S'61720569' -p157 -sg42 -g82 -sssS'grch37' -p158 -(dp159 -g32 -S'NC_000014.8:g.62187287G>A' -p160 -sg34 -(dp161 -g36 -g53 -sg38 -g80 -sg40 -S'62187287' -p162 -sg42 -g82 -sssS'grch38' -p163 -(dp164 -g32 -S'NC_000014.9:g.61720569G>A' -p165 -sg34 -(dp166 -g36 -g53 -sg38 -g80 -sg40 -S'61720569' -p167 -sg42 -g82 -ssssg60 -(dp168 -g98 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001521.1' -p169 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001530.3' -p170 -sssS'metadata' -p171 -(dp172 -S'variantvalidator_hgvs_version' -p173 -S'1.1.3' -p174 -sS'uta_schema' -p175 -S'uta_20180821' -p176 -sS'seqrepo_db' -p177 -S'2018-08-21' -p178 -sS'variantvalidator_version' -p179 -S'v0.2' -p180 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant224.txt b/VariantValidator/testing/testOutputsMasterITS/variant224.txt deleted file mode 100644 index 90c4acf7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant224.txt +++ /dev/null @@ -1,540 +0,0 @@ -(dp0 -S'NR_144368.1:n.214-4497_214-4496delinsTC' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens uncharacterized LOC105370526 (LOC105370526), long non-coding RNA -p13 -sS'gene_symbol' -p14 -S'LOC105370526' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'Non-coding :n.' -p19 -sS'slr' -p20 -g19 -ssS'submitted_variant' -p21 -S'14-62188231-TT-GA' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000014.8(NR_144368.1):c.214-4497_214-4496delinsTC' -p24 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NR_144368.1:n.214-4497_214-4496delinsTC' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000014.8:g.62188231_62188232delinsGA' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr14' -p38 -sS'ref' -p39 -S'TT' -p40 -sS'pos' -p41 -S'62188231' -p42 -sS'alt' -p43 -VGA -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000014.9:g.61721513_61721514delinsGA' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TT' -p49 -sg41 -S'61721513' -p50 -sg43 -VGA -p51 -sssS'grch37' -p52 -(dp53 -g33 -S'NC_000014.8:g.62188231_62188232delinsGA' -p54 -sg35 -(dp55 -g37 -S'14' -p56 -sg39 -S'TT' -p57 -sg41 -S'62188231' -p58 -sg43 -g44 -sssS'grch38' -p59 -(dp60 -g33 -S'NC_000014.9:g.61721513_61721514delinsGA' -p61 -sg35 -(dp62 -g37 -g56 -sg39 -S'TT' -p63 -sg41 -S'61721513' -p64 -sg43 -g51 -ssssS'reference_sequence_records' -p65 -(dp66 -S'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_144368.1' -p68 -sssS'NM_001530.3:c.231_232delinsGA' -p69 -(dp70 -g3 -g4 -sg5 -(lp71 -S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' -p72 -aS'RefSeqGene record not available' -p73 -asg9 -g4 -sg10 -(lp74 -sg12 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 1, mRNA -p75 -sg14 -S'HIF1A' -p76 -sg16 -(dp77 -g18 -S'NP_001521.1:p.(Asp77_Leu78delinsGluMet)' -p78 -sg20 -S'NP_001521.1:p.(D77_L78delinsEM)' -p79 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_001530.3:c.231_232delinsGA' -p80 -sg28 -g4 -sg29 -(dp81 -S'hg19' -p82 -(dp83 -g33 -S'NC_000014.8:g.62188231_62188232delinsGA' -p84 -sg35 -(dp85 -g37 -g38 -sg39 -S'TT' -p86 -sg41 -S'62188231' -p87 -sg43 -S'GA' -p88 -sssg45 -(dp89 -g33 -S'NC_000014.9:g.61721513_61721514delinsGA' -p90 -sg35 -(dp91 -g37 -g38 -sg39 -S'TT' -p92 -sg41 -S'61721513' -p93 -sg43 -g88 -sssS'grch37' -p94 -(dp95 -g33 -S'NC_000014.8:g.62188231_62188232delinsGA' -p96 -sg35 -(dp97 -g37 -g56 -sg39 -S'TT' -p98 -sg41 -S'62188231' -p99 -sg43 -g88 -sssS'grch38' -p100 -(dp101 -g33 -S'NC_000014.9:g.61721513_61721514delinsGA' -p102 -sg35 -(dp103 -g37 -g56 -sg39 -S'TT' -p104 -sg41 -S'61721513' -p105 -sg43 -g88 -ssssg65 -(dp106 -S'protein' -p107 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001521.1' -p108 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001530.3' -p109 -sssS'flag' -p110 -S'gene_variant' -p111 -sS'NM_001243084.1:c.303_304delinsGA' -p112 -(dp113 -g3 -g4 -sg5 -(lp114 -S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' -p115 -aS'RefSeqGene record not available' -p116 -asg9 -g4 -sg10 -(lp117 -sg12 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 3, mRNA -p118 -sg14 -S'HIF1A' -p119 -sg16 -(dp120 -g18 -S'NP_001230013.1:p.(Asp101_Leu102delinsGluMet)' -p121 -sg20 -S'NP_001230013.1:p.(D101_L102delinsEM)' -p122 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_001243084.1:c.303_304delinsGA' -p123 -sg28 -g4 -sg29 -(dp124 -S'hg19' -p125 -(dp126 -g33 -S'NC_000014.8:g.62188231_62188232delinsGA' -p127 -sg35 -(dp128 -g37 -g38 -sg39 -S'TT' -p129 -sg41 -S'62188231' -p130 -sg43 -S'GA' -p131 -sssg45 -(dp132 -g33 -S'NC_000014.9:g.61721513_61721514delinsGA' -p133 -sg35 -(dp134 -g37 -g38 -sg39 -S'TT' -p135 -sg41 -S'61721513' -p136 -sg43 -g131 -sssS'grch37' -p137 -(dp138 -g33 -S'NC_000014.8:g.62188231_62188232delinsGA' -p139 -sg35 -(dp140 -g37 -g56 -sg39 -S'TT' -p141 -sg41 -S'62188231' -p142 -sg43 -g131 -sssS'grch38' -p143 -(dp144 -g33 -S'NC_000014.9:g.61721513_61721514delinsGA' -p145 -sg35 -(dp146 -g37 -g56 -sg39 -S'TT' -p147 -sg41 -S'61721513' -p148 -sg43 -g131 -ssssg65 -(dp149 -g107 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1' -p150 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1' -p151 -sssS'NM_181054.2:c.231_232delinsGA' -p152 -(dp153 -g3 -g4 -sg5 -(lp154 -S'NC_000014.8:g.62188231TT>GA automapped to NC_000014.8:g.62188231_62188232delTTinsGA' -p155 -aS'RefSeqGene record not available' -p156 -asg9 -g4 -sg10 -(lp157 -sg12 -VHomo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 2, mRNA -p158 -sg14 -S'HIF1A' -p159 -sg16 -(dp160 -g18 -S'NP_851397.1:p.(Asp77_Leu78delinsGluMet)' -p161 -sg20 -S'NP_851397.1:p.(D77_L78delinsEM)' -p162 -ssg21 -g22 -sg23 -g4 -sg25 -g4 -sg26 -S'NM_181054.2:c.231_232delinsGA' -p163 -sg28 -g4 -sg29 -(dp164 -S'hg19' -p165 -(dp166 -g33 -S'NC_000014.8:g.62188231_62188232delinsGA' -p167 -sg35 -(dp168 -g37 -g38 -sg39 -S'TT' -p169 -sg41 -S'62188231' -p170 -sg43 -S'GA' -p171 -sssg45 -(dp172 -g33 -S'NC_000014.9:g.61721513_61721514delinsGA' -p173 -sg35 -(dp174 -g37 -g38 -sg39 -S'TT' -p175 -sg41 -S'61721513' -p176 -sg43 -g171 -sssS'grch37' -p177 -(dp178 -g33 -S'NC_000014.8:g.62188231_62188232delinsGA' -p179 -sg35 -(dp180 -g37 -g56 -sg39 -S'TT' -p181 -sg41 -S'62188231' -p182 -sg43 -g171 -sssS'grch38' -p183 -(dp184 -g33 -S'NC_000014.9:g.61721513_61721514delinsGA' -p185 -sg35 -(dp186 -g37 -g56 -sg39 -S'TT' -p187 -sg41 -S'61721513' -p188 -sg43 -g171 -ssssg65 -(dp189 -g107 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_851397.1' -p190 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181054.2' -p191 -sssS'metadata' -p192 -(dp193 -S'variantvalidator_hgvs_version' -p194 -S'1.1.3' -p195 -sS'uta_schema' -p196 -S'uta_20180821' -p197 -sS'seqrepo_db' -p198 -S'2018-08-21' -p199 -sS'variantvalidator_version' -p200 -S'v0.2' -p201 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant225.txt b/VariantValidator/testing/testOutputsMasterITS/variant225.txt deleted file mode 100644 index 69eaacd9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant225.txt +++ /dev/null @@ -1,462 +0,0 @@ -(dp0 -S'NM_139318.3:c.2366G>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_139318.3 is available (NM_139318.4)' -p7 -aS'NM_139318.4:c.2366G>T MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_139318.4:c.2366G>T' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens potassium voltage-gated channel, subfamily H (eag-related), member 5 (KCNH5), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'KCNH5' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_647479.2:p.(Gly789Val)' -p21 -sS'slr' -p22 -S'NP_647479.2:p.(G789V)' -p23 -ssS'submitted_variant' -p24 -S'14-63174827-C-A' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'hgvs_lrg_variant' -p27 -g4 -sS'hgvs_transcript_variant' -p28 -S'NM_139318.3:c.2366G>T' -p29 -sS'hgvs_refseqgene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000014.8:g.63174827C>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr14' -p40 -sS'ref' -p41 -VC -p42 -sS'pos' -p43 -S'63174827' -p44 -sS'alt' -p45 -VA -p46 -sssS'grch37' -p47 -(dp48 -g35 -S'NC_000014.8:g.63174827C>A' -p49 -sg37 -(dp50 -g39 -S'14' -p51 -sg41 -g42 -sg43 -S'63174827' -p52 -sg45 -g46 -ssssS'reference_sequence_records' -p53 -(dp54 -S'protein' -p55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2' -p56 -sS'transcript' -p57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.3' -p58 -sssS'NM_172375.1:c.*333G>T' -p59 -(dp60 -g3 -g4 -sg5 -(lp61 -S'A more recent version of the selected reference sequence NM_172375.1 is available (NM_172375.2)' -p62 -aS'NM_172375.2:c.*333G>T MUST be fully validated prior to use in reports' -p63 -aS'select_variants=NM_172375.2:c.*333G>T' -p64 -aS'RefSeqGene record not available' -p65 -asg11 -g4 -sg12 -(lp66 -sg14 -VHomo sapiens potassium voltage-gated channel, subfamily H (eag-related), member 5 (KCNH5), transcript variant 3, mRNA -p67 -sg16 -S'KCNH5' -p68 -sg18 -(dp69 -g20 -S'NP_758963.1:p.?' -p70 -sg22 -S'NP_758963.1:p.?' -p71 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_172375.1:c.*333G>T' -p72 -sg30 -g4 -sg31 -(dp73 -S'hg19' -p74 -(dp75 -g35 -S'NC_000014.8:g.63174827C>A' -p76 -sg37 -(dp77 -g39 -g40 -sg41 -g42 -sg43 -S'63174827' -p78 -sg45 -g46 -sssS'grch37' -p79 -(dp80 -g35 -S'NC_000014.8:g.63174827C>A' -p81 -sg37 -(dp82 -g39 -g51 -sg41 -g42 -sg43 -S'63174827' -p83 -sg45 -g46 -ssssg53 -(dp84 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1' -p85 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.1' -p86 -sssS'NM_172375.2:c.*333G>T' -p87 -(dp88 -g3 -g4 -sg5 -(lp89 -S'RefSeqGene record not available' -p90 -asg11 -g4 -sg12 -(lp91 -sg14 -VHomo sapiens potassium voltage-gated channel subfamily H member 5 (KCNH5), transcript variant 3, mRNA -p92 -sg16 -S'KCNH5' -p93 -sg18 -(dp94 -g20 -S'NP_758963.1:p.?' -p95 -sg22 -S'NP_758963.1:p.?' -p96 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_172375.2:c.*333G>T' -p97 -sg30 -g4 -sg31 -(dp98 -S'hg19' -p99 -(dp100 -g35 -S'NC_000014.8:g.63174827C>A' -p101 -sg37 -(dp102 -g39 -g40 -sg41 -g42 -sg43 -S'63174827' -p103 -sg45 -g46 -sssS'hg38' -p104 -(dp105 -g35 -S'NC_000014.9:g.62708109C>A' -p106 -sg37 -(dp107 -g39 -g40 -sg41 -g42 -sg43 -S'62708109' -p108 -sg45 -g46 -sssS'grch37' -p109 -(dp110 -g35 -S'NC_000014.8:g.63174827C>A' -p111 -sg37 -(dp112 -g39 -g51 -sg41 -g42 -sg43 -S'63174827' -p113 -sg45 -g46 -sssS'grch38' -p114 -(dp115 -g35 -S'NC_000014.9:g.62708109C>A' -p116 -sg37 -(dp117 -g39 -g51 -sg41 -g42 -sg43 -S'62708109' -p118 -sg45 -g46 -ssssg53 -(dp119 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1' -p120 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.2' -p121 -sssS'flag' -p122 -S'gene_variant' -p123 -sS'NM_139318.4:c.2366G>T' -p124 -(dp125 -g3 -g4 -sg5 -(lp126 -S'RefSeqGene record not available' -p127 -asg11 -g4 -sg12 -(lp128 -sg14 -VHomo sapiens potassium voltage-gated channel subfamily H member 5 (KCNH5), transcript variant 1, mRNA -p129 -sg16 -S'KCNH5' -p130 -sg18 -(dp131 -g20 -S'NP_647479.2:p.(Gly789Val)' -p132 -sg22 -S'NP_647479.2:p.(G789V)' -p133 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_139318.4:c.2366G>T' -p134 -sg30 -g4 -sg31 -(dp135 -S'hg19' -p136 -(dp137 -g35 -S'NC_000014.8:g.63174827C>A' -p138 -sg37 -(dp139 -g39 -g40 -sg41 -g42 -sg43 -S'63174827' -p140 -sg45 -g46 -sssg104 -(dp141 -g35 -S'NC_000014.9:g.62708109C>A' -p142 -sg37 -(dp143 -g39 -g40 -sg41 -g42 -sg43 -S'62708109' -p144 -sg45 -g46 -sssS'grch37' -p145 -(dp146 -g35 -S'NC_000014.8:g.63174827C>A' -p147 -sg37 -(dp148 -g39 -g51 -sg41 -g42 -sg43 -S'63174827' -p149 -sg45 -g46 -sssS'grch38' -p150 -(dp151 -g35 -S'NC_000014.9:g.62708109C>A' -p152 -sg37 -(dp153 -g39 -g51 -sg41 -g42 -sg43 -S'62708109' -p154 -sg45 -g46 -ssssg53 -(dp155 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2' -p156 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.4' -p157 -sssS'metadata' -p158 -(dp159 -S'variantvalidator_hgvs_version' -p160 -S'1.1.3' -p161 -sS'uta_schema' -p162 -S'uta_20180821' -p163 -sS'seqrepo_db' -p164 -S'2018-08-21' -p165 -sS'variantvalidator_version' -p166 -S'v0.2' -p167 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant226.txt b/VariantValidator/testing/testOutputsMasterITS/variant226.txt deleted file mode 100644 index 1d9712ce..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant226.txt +++ /dev/null @@ -1,418 +0,0 @@ -(dp0 -S'NM_000070.2:c.550del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.42680000CA>C automapped to NC_000015.9:g.42680002delA' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'CAPN3' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000061.1:p.(Thr184ArgfsTer36)' -p19 -sS'slr' -p20 -S'NP_000061.1:p.(T184Rfs*36)' -p21 -ssS'submitted_variant' -p22 -S'15-42680000-CA-C' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_000070.2:c.550del' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'grch38' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000015.10:g.42387804del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'15' -p38 -sS'ref' -p39 -S'CA' -p40 -sS'pos' -p41 -S'42387802' -p42 -sS'alt' -p43 -S'C' -p44 -sssS'grch37' -p45 -(dp46 -g33 -S'NC_000015.9:g.42680002del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'CA' -p49 -sg41 -S'42680000' -p50 -sg43 -g44 -sssS'hg38' -p51 -(dp52 -g33 -S'NC_000015.10:g.42387804del' -p53 -sg35 -(dp54 -g37 -S'chr15' -p55 -sg39 -S'CA' -p56 -sg41 -S'42387802' -p57 -sg43 -g44 -sssS'hg19' -p58 -(dp59 -g33 -S'NC_000015.9:g.42680002del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'CA' -p62 -sg41 -S'42680000' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2' -p69 -sssS'flag' -p70 -S'gene_variant' -p71 -sS'NM_024344.1:c.550del' -p72 -(dp73 -g3 -g4 -sg5 -(lp74 -S'NC_000015.9:g.42680000CA>C automapped to NC_000015.9:g.42680002delA' -p75 -aS'RefSeqGene record not available' -p76 -asg9 -g4 -sg10 -(lp77 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA -p78 -sg14 -S'CAPN3' -p79 -sg16 -(dp80 -g18 -S'NP_077320.1:p.(Thr184ArgfsTer36)' -p81 -sg20 -S'NP_077320.1:p.(T184Rfs*36)' -p82 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_024344.1:c.550del' -p83 -sg28 -g4 -sg29 -(dp84 -S'grch38' -p85 -(dp86 -g33 -S'NC_000015.10:g.42387804del' -p87 -sg35 -(dp88 -g37 -g38 -sg39 -S'CA' -p89 -sg41 -S'42387802' -p90 -sg43 -g44 -sssS'grch37' -p91 -(dp92 -g33 -S'NC_000015.9:g.42680002del' -p93 -sg35 -(dp94 -g37 -g38 -sg39 -S'CA' -p95 -sg41 -S'42680000' -p96 -sg43 -g44 -sssg51 -(dp97 -g33 -S'NC_000015.10:g.42387804del' -p98 -sg35 -(dp99 -g37 -g55 -sg39 -S'CA' -p100 -sg41 -S'42387802' -p101 -sg43 -g44 -sssS'hg19' -p102 -(dp103 -g33 -S'NC_000015.9:g.42680002del' -p104 -sg35 -(dp105 -g37 -g55 -sg39 -S'CA' -p106 -sg41 -S'42680000' -p107 -sg43 -g44 -ssssg64 -(dp108 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1' -p109 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1' -p110 -sssS'NM_173087.1:c.550del' -p111 -(dp112 -g3 -g4 -sg5 -(lp113 -S'NC_000015.9:g.42680000CA>C automapped to NC_000015.9:g.42680002delA' -p114 -aS'RefSeqGene record not available' -p115 -asg9 -g4 -sg10 -(lp116 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA -p117 -sg14 -S'CAPN3' -p118 -sg16 -(dp119 -g18 -S'NP_775110.1:p.(Thr184ArgfsTer36)' -p120 -sg20 -S'NP_775110.1:p.(T184Rfs*36)' -p121 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173087.1:c.550del' -p122 -sg28 -g4 -sg29 -(dp123 -S'grch38' -p124 -(dp125 -g33 -S'NC_000015.10:g.42387804del' -p126 -sg35 -(dp127 -g37 -g38 -sg39 -S'CA' -p128 -sg41 -S'42387802' -p129 -sg43 -g44 -sssS'grch37' -p130 -(dp131 -g33 -S'NC_000015.9:g.42680002del' -p132 -sg35 -(dp133 -g37 -g38 -sg39 -S'CA' -p134 -sg41 -S'42680000' -p135 -sg43 -g44 -sssg51 -(dp136 -g33 -S'NC_000015.10:g.42387804del' -p137 -sg35 -(dp138 -g37 -g55 -sg39 -S'CA' -p139 -sg41 -S'42387802' -p140 -sg43 -g44 -sssS'hg19' -p141 -(dp142 -g33 -S'NC_000015.9:g.42680002del' -p143 -sg35 -(dp144 -g37 -g55 -sg39 -S'CA' -p145 -sg41 -S'42680000' -p146 -sg43 -g44 -ssssg64 -(dp147 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1' -p148 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1' -p149 -sssS'metadata' -p150 -(dp151 -S'variantvalidator_hgvs_version' -p152 -S'1.1.3' -p153 -sS'uta_schema' -p154 -S'uta_20180821' -p155 -sS'seqrepo_db' -p156 -S'2018-08-21' -p157 -sS'variantvalidator_version' -p158 -S'v0.2' -p159 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant227.txt b/VariantValidator/testing/testOutputsMasterITS/variant227.txt deleted file mode 100644 index 234808b3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant227.txt +++ /dev/null @@ -1,418 +0,0 @@ -(dp0 -S'NM_024344.1:c.550dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.42680000CA>CAA automapped to NC_000015.9:g.42680002dupA' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'CAPN3' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_077320.1:p.(Thr184AsnfsTer16)' -p19 -sS'slr' -p20 -S'NP_077320.1:p.(T184Nfs*16)' -p21 -ssS'submitted_variant' -p22 -S'15-42680000-CA-CAA' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_024344.1:c.550dup' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'grch38' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000015.10:g.42387804dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'15' -p38 -sS'ref' -p39 -S'A' -p40 -sS'pos' -p41 -S'42387803' -p42 -sS'alt' -p43 -S'AA' -p44 -sssS'grch37' -p45 -(dp46 -g33 -S'NC_000015.9:g.42680002dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'42680001' -p49 -sg43 -S'AA' -p50 -sssS'hg38' -p51 -(dp52 -g33 -S'NC_000015.10:g.42387804dup' -p53 -sg35 -(dp54 -g37 -S'chr15' -p55 -sg39 -g40 -sg41 -S'42387803' -p56 -sg43 -S'AA' -p57 -sssS'hg19' -p58 -(dp59 -g33 -S'NC_000015.9:g.42680002dup' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'42680001' -p62 -sg43 -S'AA' -p63 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1' -p69 -sssS'NM_173087.1:c.550dup' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000015.9:g.42680000CA>CAA automapped to NC_000015.9:g.42680002dupA' -p73 -aS'RefSeqGene record not available' -p74 -asg9 -g4 -sg10 -(lp75 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA -p76 -sg14 -S'CAPN3' -p77 -sg16 -(dp78 -g18 -S'NP_775110.1:p.(Thr184AsnfsTer16)' -p79 -sg20 -S'NP_775110.1:p.(T184Nfs*16)' -p80 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173087.1:c.550dup' -p81 -sg28 -g4 -sg29 -(dp82 -S'grch38' -p83 -(dp84 -g33 -S'NC_000015.10:g.42387804dup' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -g40 -sg41 -S'42387803' -p87 -sg43 -S'AA' -p88 -sssS'grch37' -p89 -(dp90 -g33 -S'NC_000015.9:g.42680002dup' -p91 -sg35 -(dp92 -g37 -g38 -sg39 -g40 -sg41 -S'42680001' -p93 -sg43 -S'AA' -p94 -sssg51 -(dp95 -g33 -S'NC_000015.10:g.42387804dup' -p96 -sg35 -(dp97 -g37 -g55 -sg39 -g40 -sg41 -S'42387803' -p98 -sg43 -S'AA' -p99 -sssS'hg19' -p100 -(dp101 -g33 -S'NC_000015.9:g.42680002dup' -p102 -sg35 -(dp103 -g37 -g55 -sg39 -g40 -sg41 -S'42680001' -p104 -sg43 -S'AA' -p105 -ssssg64 -(dp106 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1' -p107 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1' -p108 -sssS'flag' -p109 -S'gene_variant' -p110 -sS'NM_000070.2:c.550dup' -p111 -(dp112 -g3 -g4 -sg5 -(lp113 -S'NC_000015.9:g.42680000CA>CAA automapped to NC_000015.9:g.42680002dupA' -p114 -aS'RefSeqGene record not available' -p115 -asg9 -g4 -sg10 -(lp116 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA -p117 -sg14 -S'CAPN3' -p118 -sg16 -(dp119 -g18 -S'NP_000061.1:p.(Thr184AsnfsTer16)' -p120 -sg20 -S'NP_000061.1:p.(T184Nfs*16)' -p121 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_000070.2:c.550dup' -p122 -sg28 -g4 -sg29 -(dp123 -S'grch38' -p124 -(dp125 -g33 -S'NC_000015.10:g.42387804dup' -p126 -sg35 -(dp127 -g37 -g38 -sg39 -g40 -sg41 -S'42387803' -p128 -sg43 -S'AA' -p129 -sssS'grch37' -p130 -(dp131 -g33 -S'NC_000015.9:g.42680002dup' -p132 -sg35 -(dp133 -g37 -g38 -sg39 -g40 -sg41 -S'42680001' -p134 -sg43 -S'AA' -p135 -sssg51 -(dp136 -g33 -S'NC_000015.10:g.42387804dup' -p137 -sg35 -(dp138 -g37 -g55 -sg39 -g40 -sg41 -S'42387803' -p139 -sg43 -S'AA' -p140 -sssS'hg19' -p141 -(dp142 -g33 -S'NC_000015.9:g.42680002dup' -p143 -sg35 -(dp144 -g37 -g55 -sg39 -g40 -sg41 -S'42680001' -p145 -sg43 -S'AA' -p146 -ssssg64 -(dp147 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1' -p148 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2' -p149 -sssS'metadata' -p150 -(dp151 -S'variantvalidator_hgvs_version' -p152 -S'1.1.3' -p153 -sS'uta_schema' -p154 -S'uta_20180821' -p155 -sS'seqrepo_db' -p156 -S'2018-08-21' -p157 -sS'variantvalidator_version' -p158 -S'v0.2' -p159 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant228.txt b/VariantValidator/testing/testOutputsMasterITS/variant228.txt deleted file mode 100644 index 23e460bb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant228.txt +++ /dev/null @@ -1,781 +0,0 @@ -(dp0 -S'NM_173088.1:c.825_826insTCA' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 4, mRNA -p13 -sS'gene_symbol' -p14 -S'CAPN3' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_775111.1:p.(Val275_Arg276insSer)' -p19 -sS'slr' -p20 -S'NP_775111.1:p.(V275_R276insS)' -p21 -ssS'submitted_variant' -p22 -S'15-42703179-T-TTCA' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_173088.1:c.825_826insTCA' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'grch38' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000015.10:g.42410981_42410982insTCA' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'15' -p38 -sS'ref' -p39 -S'T' -p40 -sS'pos' -p41 -S'42410981' -p42 -sS'alt' -p43 -S'TTCA' -p44 -sssS'grch37' -p45 -(dp46 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p49 -sg43 -S'TTCA' -p50 -sssS'hg38' -p51 -(dp52 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p53 -sg35 -(dp54 -g37 -S'chr15' -p55 -sg39 -g40 -sg41 -S'42410981' -p56 -sg43 -S'TTCA' -p57 -sssS'hg19' -p58 -(dp59 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p62 -sg43 -S'TTCA' -p63 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1' -p69 -sssS'NM_173090.1:c.366_367insTCA' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p73 -aS'RefSeqGene record not available' -p74 -asg9 -g4 -sg10 -(lp75 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 6, mRNA -p76 -sg14 -S'CAPN3' -p77 -sg16 -(dp78 -g18 -S'NP_775113.1:p.(Val122_Arg123insSer)' -p79 -sg20 -S'NP_775113.1:p.(V122_R123insS)' -p80 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173090.1:c.366_367insTCA' -p81 -sg28 -g4 -sg29 -(dp82 -S'grch38' -p83 -(dp84 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -g40 -sg41 -S'42410981' -p87 -sg43 -S'TTCA' -p88 -sssS'grch37' -p89 -(dp90 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p91 -sg35 -(dp92 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p93 -sg43 -S'TTCA' -p94 -sssg51 -(dp95 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p96 -sg35 -(dp97 -g37 -g55 -sg39 -g40 -sg41 -S'42410981' -p98 -sg43 -S'TTCA' -p99 -sssS'hg19' -p100 -(dp101 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p102 -sg35 -(dp103 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p104 -sg43 -S'TTCA' -p105 -ssssg64 -(dp106 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1' -p107 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1' -p108 -sssS'NM_173089.1:c.366_367insTCA' -p109 -(dp110 -g3 -g4 -sg5 -(lp111 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p112 -aS'RefSeqGene record not available' -p113 -asg9 -g4 -sg10 -(lp114 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 5, mRNA -p115 -sg14 -S'CAPN3' -p116 -sg16 -(dp117 -g18 -S'NP_775112.1:p.(Val122_Arg123insSer)' -p118 -sg20 -S'NP_775112.1:p.(V122_R123insS)' -p119 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173089.1:c.366_367insTCA' -p120 -sg28 -g4 -sg29 -(dp121 -S'grch38' -p122 -(dp123 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p124 -sg35 -(dp125 -g37 -g38 -sg39 -g40 -sg41 -S'42410981' -p126 -sg43 -S'TTCA' -p127 -sssS'grch37' -p128 -(dp129 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p130 -sg35 -(dp131 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p132 -sg43 -S'TTCA' -p133 -sssg51 -(dp134 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p135 -sg35 -(dp136 -g37 -g55 -sg39 -g40 -sg41 -S'42410981' -p137 -sg43 -S'TTCA' -p138 -sssS'hg19' -p139 -(dp140 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p141 -sg35 -(dp142 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p143 -sg43 -S'TTCA' -p144 -ssssg64 -(dp145 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1' -p146 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1' -p147 -sssS'NM_173087.1:c.2085_2086insTCA' -p148 -(dp149 -g3 -g4 -sg5 -(lp150 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p151 -aS'RefSeqGene record not available' -p152 -asg9 -g4 -sg10 -(lp153 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA -p154 -sg14 -S'CAPN3' -p155 -sg16 -(dp156 -g18 -S'NP_775110.1:p.(Val695_Arg696insSer)' -p157 -sg20 -S'NP_775110.1:p.(V695_R696insS)' -p158 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173087.1:c.2085_2086insTCA' -p159 -sg28 -g4 -sg29 -(dp160 -S'grch38' -p161 -(dp162 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p163 -sg35 -(dp164 -g37 -g38 -sg39 -g40 -sg41 -S'42410981' -p165 -sg43 -S'TTCA' -p166 -sssS'grch37' -p167 -(dp168 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p169 -sg35 -(dp170 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p171 -sg43 -S'TTCA' -p172 -sssg51 -(dp173 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p174 -sg35 -(dp175 -g37 -g55 -sg39 -g40 -sg41 -S'42410981' -p176 -sg43 -S'TTCA' -p177 -sssS'hg19' -p178 -(dp179 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p180 -sg35 -(dp181 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p182 -sg43 -S'TTCA' -p183 -ssssg64 -(dp184 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1' -p185 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1' -p186 -sssS'flag' -p187 -S'gene_variant' -p188 -sS'NM_000070.2:c.2361_2362insTCA' -p189 -(dp190 -g3 -g4 -sg5 -(lp191 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p192 -aS'RefSeqGene record not available' -p193 -asg9 -g4 -sg10 -(lp194 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA -p195 -sg14 -S'CAPN3' -p196 -sg16 -(dp197 -g18 -S'NP_000061.1:p.(Val787_Arg788insSer)' -p198 -sg20 -S'NP_000061.1:p.(V787_R788insS)' -p199 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_000070.2:c.2361_2362insTCA' -p200 -sg28 -g4 -sg29 -(dp201 -S'grch38' -p202 -(dp203 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p204 -sg35 -(dp205 -g37 -g38 -sg39 -g40 -sg41 -S'42410981' -p206 -sg43 -S'TTCA' -p207 -sssS'grch37' -p208 -(dp209 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p210 -sg35 -(dp211 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p212 -sg43 -S'TTCA' -p213 -sssg51 -(dp214 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p215 -sg35 -(dp216 -g37 -g55 -sg39 -g40 -sg41 -S'42410981' -p217 -sg43 -S'TTCA' -p218 -sssS'hg19' -p219 -(dp220 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p221 -sg35 -(dp222 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p223 -sg43 -S'TTCA' -p224 -ssssg64 -(dp225 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1' -p226 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2' -p227 -sssS'NM_024344.1:c.2343_2344insTCA' -p228 -(dp229 -g3 -g4 -sg5 -(lp230 -S'NC_000015.9:g.42703179T>TTCA automapped to NC_000015.9:g.42703179_42703180insTCA' -p231 -aS'RefSeqGene record not available' -p232 -asg9 -g4 -sg10 -(lp233 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA -p234 -sg14 -S'CAPN3' -p235 -sg16 -(dp236 -g18 -S'NP_077320.1:p.(Val781_Arg782insSer)' -p237 -sg20 -S'NP_077320.1:p.(V781_R782insS)' -p238 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_024344.1:c.2343_2344insTCA' -p239 -sg28 -g4 -sg29 -(dp240 -S'grch38' -p241 -(dp242 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p243 -sg35 -(dp244 -g37 -g38 -sg39 -g40 -sg41 -S'42410981' -p245 -sg43 -S'TTCA' -p246 -sssS'grch37' -p247 -(dp248 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p249 -sg35 -(dp250 -g37 -g38 -sg39 -g40 -sg41 -S'42703179' -p251 -sg43 -S'TTCA' -p252 -sssg51 -(dp253 -g33 -S'NC_000015.10:g.42410981_42410982insTCA' -p254 -sg35 -(dp255 -g37 -g55 -sg39 -g40 -sg41 -S'42410981' -p256 -sg43 -S'TTCA' -p257 -sssS'hg19' -p258 -(dp259 -g33 -S'NC_000015.9:g.42703179_42703180insTCA' -p260 -sg35 -(dp261 -g37 -g55 -sg39 -g40 -sg41 -S'42703179' -p262 -sg43 -S'TTCA' -p263 -ssssg64 -(dp264 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1' -p265 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1' -p266 -sssS'metadata' -p267 -(dp268 -S'variantvalidator_hgvs_version' -p269 -S'1.1.3' -p270 -sS'uta_schema' -p271 -S'uta_20180821' -p272 -sS'seqrepo_db' -p273 -S'2018-08-21' -p274 -sS'variantvalidator_version' -p275 -S'v0.2' -p276 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant229.txt b/VariantValidator/testing/testOutputsMasterITS/variant229.txt deleted file mode 100644 index dcc6a4e6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant229.txt +++ /dev/null @@ -1,786 +0,0 @@ -(dp0 -S'NM_024344.1:c.2344_2345delinsTCATCT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'CAPN3' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_077320.1:p.(Arg782SerfsTer14)' -p19 -sS'slr' -p20 -S'NP_077320.1:p.(R782Sfs*14)' -p21 -ssS'submitted_variant' -p22 -S'15-42703179-TAG-TTCATCT' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_024344.1:c.2344_2345delinsTCATCT' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'grch38' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'15' -p38 -sS'ref' -p39 -S'AG' -p40 -sS'pos' -p41 -S'42410982' -p42 -sS'alt' -p43 -S'TCATCT' -p44 -sssS'grch37' -p45 -(dp46 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'AG' -p49 -sg41 -S'42703180' -p50 -sg43 -g44 -sssS'hg38' -p51 -(dp52 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p53 -sg35 -(dp54 -g37 -S'chr15' -p55 -sg39 -S'AG' -p56 -sg41 -S'42410982' -p57 -sg43 -g44 -sssS'hg19' -p58 -(dp59 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'AG' -p62 -sg41 -S'42703180' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1' -p69 -sssS'NM_173090.1:c.367_368delinsTCATCT' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p73 -aS'RefSeqGene record not available' -p74 -asg9 -g4 -sg10 -(lp75 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 6, mRNA -p76 -sg14 -S'CAPN3' -p77 -sg16 -(dp78 -g18 -S'NP_775113.1:p.(Arg123SerfsTer14)' -p79 -sg20 -S'NP_775113.1:p.(R123Sfs*14)' -p80 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173090.1:c.367_368delinsTCATCT' -p81 -sg28 -g4 -sg29 -(dp82 -S'grch38' -p83 -(dp84 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -S'AG' -p87 -sg41 -S'42410982' -p88 -sg43 -S'TCATCT' -p89 -sssS'grch37' -p90 -(dp91 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p92 -sg35 -(dp93 -g37 -g38 -sg39 -S'AG' -p94 -sg41 -S'42703180' -p95 -sg43 -g89 -sssg51 -(dp96 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p97 -sg35 -(dp98 -g37 -g55 -sg39 -S'AG' -p99 -sg41 -S'42410982' -p100 -sg43 -g89 -sssS'hg19' -p101 -(dp102 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p103 -sg35 -(dp104 -g37 -g55 -sg39 -S'AG' -p105 -sg41 -S'42703180' -p106 -sg43 -g89 -ssssg64 -(dp107 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1' -p108 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1' -p109 -sssS'flag' -p110 -S'gene_variant' -p111 -sS'NM_000070.2:c.2362_2363delinsTCATCT' -p112 -(dp113 -g3 -g4 -sg5 -(lp114 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p115 -aS'RefSeqGene record not available' -p116 -asg9 -g4 -sg10 -(lp117 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA -p118 -sg14 -S'CAPN3' -p119 -sg16 -(dp120 -g18 -S'NP_000061.1:p.(Arg788SerfsTer14)' -p121 -sg20 -S'NP_000061.1:p.(R788Sfs*14)' -p122 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_000070.2:c.2362_2363delinsTCATCT' -p123 -sg28 -g4 -sg29 -(dp124 -S'grch38' -p125 -(dp126 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p127 -sg35 -(dp128 -g37 -g38 -sg39 -S'AG' -p129 -sg41 -S'42410982' -p130 -sg43 -S'TCATCT' -p131 -sssS'grch37' -p132 -(dp133 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p134 -sg35 -(dp135 -g37 -g38 -sg39 -S'AG' -p136 -sg41 -S'42703180' -p137 -sg43 -g131 -sssg51 -(dp138 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p139 -sg35 -(dp140 -g37 -g55 -sg39 -S'AG' -p141 -sg41 -S'42410982' -p142 -sg43 -g131 -sssS'hg19' -p143 -(dp144 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p145 -sg35 -(dp146 -g37 -g55 -sg39 -S'AG' -p147 -sg41 -S'42703180' -p148 -sg43 -g131 -ssssg64 -(dp149 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1' -p150 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2' -p151 -sssS'NM_173088.1:c.826_827delinsTCATCT' -p152 -(dp153 -g3 -g4 -sg5 -(lp154 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p155 -aS'RefSeqGene record not available' -p156 -asg9 -g4 -sg10 -(lp157 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 4, mRNA -p158 -sg14 -S'CAPN3' -p159 -sg16 -(dp160 -g18 -S'NP_775111.1:p.(Arg276SerfsTer14)' -p161 -sg20 -S'NP_775111.1:p.(R276Sfs*14)' -p162 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173088.1:c.826_827delinsTCATCT' -p163 -sg28 -g4 -sg29 -(dp164 -S'grch38' -p165 -(dp166 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p167 -sg35 -(dp168 -g37 -g38 -sg39 -S'AG' -p169 -sg41 -S'42410982' -p170 -sg43 -S'TCATCT' -p171 -sssS'grch37' -p172 -(dp173 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p174 -sg35 -(dp175 -g37 -g38 -sg39 -S'AG' -p176 -sg41 -S'42703180' -p177 -sg43 -g171 -sssg51 -(dp178 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p179 -sg35 -(dp180 -g37 -g55 -sg39 -S'AG' -p181 -sg41 -S'42410982' -p182 -sg43 -g171 -sssS'hg19' -p183 -(dp184 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p185 -sg35 -(dp186 -g37 -g55 -sg39 -S'AG' -p187 -sg41 -S'42703180' -p188 -sg43 -g171 -ssssg64 -(dp189 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1' -p190 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1' -p191 -sssS'NM_173089.1:c.367_368delinsTCATCT' -p192 -(dp193 -g3 -g4 -sg5 -(lp194 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p195 -aS'RefSeqGene record not available' -p196 -asg9 -g4 -sg10 -(lp197 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 5, mRNA -p198 -sg14 -S'CAPN3' -p199 -sg16 -(dp200 -g18 -S'NP_775112.1:p.(Arg123SerfsTer14)' -p201 -sg20 -S'NP_775112.1:p.(R123Sfs*14)' -p202 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173089.1:c.367_368delinsTCATCT' -p203 -sg28 -g4 -sg29 -(dp204 -S'grch38' -p205 -(dp206 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p207 -sg35 -(dp208 -g37 -g38 -sg39 -S'AG' -p209 -sg41 -S'42410982' -p210 -sg43 -S'TCATCT' -p211 -sssS'grch37' -p212 -(dp213 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p214 -sg35 -(dp215 -g37 -g38 -sg39 -S'AG' -p216 -sg41 -S'42703180' -p217 -sg43 -g211 -sssg51 -(dp218 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p219 -sg35 -(dp220 -g37 -g55 -sg39 -S'AG' -p221 -sg41 -S'42410982' -p222 -sg43 -g211 -sssS'hg19' -p223 -(dp224 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p225 -sg35 -(dp226 -g37 -g55 -sg39 -S'AG' -p227 -sg41 -S'42703180' -p228 -sg43 -g211 -ssssg64 -(dp229 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1' -p230 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1' -p231 -sssS'NM_173087.1:c.2086_2087delinsTCATCT' -p232 -(dp233 -g3 -g4 -sg5 -(lp234 -S'NC_000015.9:g.42703179TAG>TTCATCT automapped to NC_000015.9:g.42703180_42703181delAGinsTCATCT' -p235 -aS'RefSeqGene record not available' -p236 -asg9 -g4 -sg10 -(lp237 -sg12 -VHomo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA -p238 -sg14 -S'CAPN3' -p239 -sg16 -(dp240 -g18 -S'NP_775110.1:p.(Arg696SerfsTer14)' -p241 -sg20 -S'NP_775110.1:p.(R696Sfs*14)' -p242 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_173087.1:c.2086_2087delinsTCATCT' -p243 -sg28 -g4 -sg29 -(dp244 -S'grch38' -p245 -(dp246 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p247 -sg35 -(dp248 -g37 -g38 -sg39 -S'AG' -p249 -sg41 -S'42410982' -p250 -sg43 -S'TCATCT' -p251 -sssS'grch37' -p252 -(dp253 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p254 -sg35 -(dp255 -g37 -g38 -sg39 -S'AG' -p256 -sg41 -S'42703180' -p257 -sg43 -g251 -sssg51 -(dp258 -g33 -S'NC_000015.10:g.42410982_42410983delinsTCATCT' -p259 -sg35 -(dp260 -g37 -g55 -sg39 -S'AG' -p261 -sg41 -S'42410982' -p262 -sg43 -g251 -sssS'hg19' -p263 -(dp264 -g33 -S'NC_000015.9:g.42703180_42703181delinsTCATCT' -p265 -sg35 -(dp266 -g37 -g55 -sg39 -S'AG' -p267 -sg41 -S'42703180' -p268 -sg43 -g251 -ssssg64 -(dp269 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1' -p270 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1' -p271 -sssS'metadata' -p272 -(dp273 -S'variantvalidator_hgvs_version' -p274 -S'1.1.3' -p275 -sS'uta_schema' -p276 -S'uta_20180821' -p277 -sS'seqrepo_db' -p278 -S'2018-08-21' -p279 -sS'variantvalidator_version' -p280 -S'v0.2' -p281 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant23.txt b/VariantValidator/testing/testOutputsMasterITS/variant23.txt deleted file mode 100644 index 924bec35..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant23.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'start or end or both are beyond the bounds of transcript record' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000518.4:c.*132+1868C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant230.txt b/VariantValidator/testing/testOutputsMasterITS/variant230.txt deleted file mode 100644 index ebbb67d3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant230.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'NM_000138.4:c.2927G>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens fibrillin 1 (FBN1), mRNA -p12 -sS'gene_symbol' -p13 -S'FBN1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000129.3:p.(Arg976His)' -p18 -sS'slr' -p19 -S'NP_000129.3:p.(R976H)' -p20 -ssS'submitted_variant' -p21 -S'15-48782203-C-T' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_000138.4:c.2927G>A' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'grch38' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000015.10:g.48490006C>T' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'15' -p37 -sS'ref' -p38 -VC -p39 -sS'pos' -p40 -S'48490006' -p41 -sS'alt' -p42 -VT -p43 -sssS'grch37' -p44 -(dp45 -g32 -S'NC_000015.9:g.48782203C>T' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'48782203' -p48 -sg42 -g43 -sssS'hg38' -p49 -(dp50 -g32 -S'NC_000015.10:g.48490006C>T' -p51 -sg34 -(dp52 -g36 -S'chr15' -p53 -sg38 -g39 -sg40 -S'48490006' -p54 -sg42 -g43 -sssS'hg19' -p55 -(dp56 -g32 -S'NC_000015.9:g.48782203C>T' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'48782203' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000129.3' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000138.4' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant231.txt b/VariantValidator/testing/testOutputsMasterITS/variant231.txt deleted file mode 100644 index fc5a7dda..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant231.txt +++ /dev/null @@ -1,511 +0,0 @@ -(dp0 -S'NM_014249.2:c.946_949=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' -p7 -aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p8 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.2' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p12 -aS'NM_014249.3:c.946_949GACC= MUST be fully validated prior to use in reports' -p13 -aS'select_variants=NM_014249.3:c.946_949=' -p14 -aS'RefSeqGene record not available' -p15 -asS'refseqgene_context_intronic_sequence' -p16 -g4 -sS'alt_genomic_loci' -p17 -(lp18 -sS'transcript_description' -p19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p20 -sS'gene_symbol' -p21 -S'NR2E3' -p22 -sS'hgvs_predicted_protein_consequence' -p23 -(dp24 -S'tlr' -p25 -S'NP_055064.1:p.(Asp316=)' -p26 -sS'slr' -p27 -S'NP_055064.1:p.(D316=)' -p28 -ssS'submitted_variant' -p29 -S'15-72105929-CC-C' -p30 -sS'genome_context_intronic_sequence' -p31 -g4 -sS'hgvs_lrg_variant' -p32 -g4 -sS'hgvs_transcript_variant' -p33 -S'NM_014249.2:c.946_949=' -p34 -sS'hgvs_refseqgene_variant' -p35 -g4 -sS'primary_assembly_loci' -p36 -(dp37 -S'hg19' -p38 -(dp39 -S'hgvs_genomic_description' -p40 -S'NC_000015.9:g.72105933del' -p41 -sS'vcf' -p42 -(dp43 -S'chr' -p44 -S'chr15' -p45 -sS'ref' -p46 -S'AC' -p47 -sS'pos' -p48 -S'72105928' -p49 -sS'alt' -p50 -S'A' -p51 -sssS'grch37' -p52 -(dp53 -g40 -S'NC_000015.9:g.72105933del' -p54 -sg42 -(dp55 -g44 -S'15' -p56 -sg46 -S'AC' -p57 -sg48 -S'72105928' -p58 -sg50 -g51 -ssssS'reference_sequence_records' -p59 -(dp60 -S'protein' -p61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p62 -sS'transcript' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' -p64 -sssS'NM_016346.3:c.946_949=' -p65 -(dp66 -g3 -g4 -sg5 -(lp67 -S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' -p68 -aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p69 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.3' -p70 -aS'Caution should be used when reporting the displayed variant descriptions' -p71 -aS'If you are unsure, please contact admin' -p72 -aS'RefSeqGene record not available' -p73 -asg16 -g4 -sg17 -(lp74 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p75 -sg21 -S'NR2E3' -p76 -sg23 -(dp77 -g25 -S'NP_057430.1:p.(Asp316=)' -p78 -sg27 -S'NP_057430.1:p.(D316=)' -p79 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.3:c.946_949=' -p80 -sg35 -g4 -sg36 -(dp81 -S'grch38' -p82 -(dp83 -g40 -S'NC_000015.10:g.71813587_71813590=' -p84 -sg42 -(dp85 -g44 -g56 -sg46 -VGACC -p86 -sg48 -S'71813587' -p87 -sg50 -g86 -sssS'grch37' -p88 -(dp89 -g40 -S'NC_000015.9:g.72105933del' -p90 -sg42 -(dp91 -g44 -g56 -sg46 -S'AC' -p92 -sg48 -S'72105928' -p93 -sg50 -g51 -sssS'hg38' -p94 -(dp95 -g40 -S'NC_000015.10:g.71813587_71813590=' -p96 -sg42 -(dp97 -g44 -g45 -sg46 -g86 -sg48 -S'71813587' -p98 -sg50 -g86 -sssS'hg19' -p99 -(dp100 -g40 -S'NC_000015.9:g.72105933del' -p101 -sg42 -(dp102 -g44 -g45 -sg46 -S'AC' -p103 -sg48 -S'72105928' -p104 -sg50 -g51 -ssssg59 -(dp105 -g61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p106 -sg63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' -p107 -sssS'flag' -p108 -S'gene_variant' -p109 -sS'NM_014249.3:c.946_949=' -p110 -(dp111 -g3 -g4 -sg5 -(lp112 -S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' -p113 -aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p114 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.3' -p115 -aS'Caution should be used when reporting the displayed variant descriptions' -p116 -aS'If you are unsure, please contact admin' -p117 -aS'RefSeqGene record not available' -p118 -asg16 -g4 -sg17 -(lp119 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p120 -sg21 -S'NR2E3' -p121 -sg23 -(dp122 -g25 -S'NP_055064.1:p.(Asp316=)' -p123 -sg27 -S'NP_055064.1:p.(D316=)' -p124 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_014249.3:c.946_949=' -p125 -sg35 -g4 -sg36 -(dp126 -S'grch38' -p127 -(dp128 -g40 -S'NC_000015.10:g.71813587_71813590=' -p129 -sg42 -(dp130 -g44 -g56 -sg46 -VGACC -p131 -sg48 -S'71813587' -p132 -sg50 -g131 -sssS'grch37' -p133 -(dp134 -g40 -S'NC_000015.9:g.72105933del' -p135 -sg42 -(dp136 -g44 -g56 -sg46 -S'AC' -p137 -sg48 -S'72105928' -p138 -sg50 -g51 -sssg94 -(dp139 -g40 -S'NC_000015.10:g.71813587_71813590=' -p140 -sg42 -(dp141 -g44 -g45 -sg46 -g131 -sg48 -S'71813587' -p142 -sg50 -g131 -sssS'hg19' -p143 -(dp144 -g40 -S'NC_000015.9:g.72105933del' -p145 -sg42 -(dp146 -g44 -g45 -sg46 -S'AC' -p147 -sg48 -S'72105928' -p148 -sg50 -g51 -ssssg59 -(dp149 -g61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p150 -sg63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' -p151 -sssS'NM_016346.2:c.946_949=' -p152 -(dp153 -g3 -g4 -sg5 -(lp154 -S'NC_000015.9:g.72105929CC>C automapped to NC_000015.9:g.72105933delC' -p155 -aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p156 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.2' -p157 -aS'Caution should be used when reporting the displayed variant descriptions' -p158 -aS'If you are unsure, please contact admin' -p159 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p160 -aS'NM_016346.3:c.946_949GACC= MUST be fully validated prior to use in reports' -p161 -aS'select_variants=NM_016346.3:c.946_949=' -p162 -aS'RefSeqGene record not available' -p163 -asg16 -g4 -sg17 -(lp164 -sg19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p165 -sg21 -S'NR2E3' -p166 -sg23 -(dp167 -g25 -S'NP_057430.1:p.(Asp316=)' -p168 -sg27 -S'NP_057430.1:p.(D316=)' -p169 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.2:c.946_949=' -p170 -sg35 -g4 -sg36 -(dp171 -S'hg19' -p172 -(dp173 -g40 -S'NC_000015.9:g.72105933del' -p174 -sg42 -(dp175 -g44 -g45 -sg46 -S'AC' -p176 -sg48 -S'72105928' -p177 -sg50 -g51 -sssS'grch37' -p178 -(dp179 -g40 -S'NC_000015.9:g.72105933del' -p180 -sg42 -(dp181 -g44 -g56 -sg46 -S'AC' -p182 -sg48 -S'72105928' -p183 -sg50 -g51 -ssssg59 -(dp184 -g61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p185 -sg63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' -p186 -sssS'metadata' -p187 -(dp188 -S'variantvalidator_hgvs_version' -p189 -S'1.1.3' -p190 -sS'uta_schema' -p191 -S'uta_20180821' -p192 -sS'seqrepo_db' -p193 -S'2018-08-21' -p194 -sS'variantvalidator_version' -p195 -S'v0.2' -p196 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant232.txt b/VariantValidator/testing/testOutputsMasterITS/variant232.txt deleted file mode 100644 index cac71aaa..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant232.txt +++ /dev/null @@ -1,286 +0,0 @@ -(dp0 -S'NM_002693.2:c.752C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'POLG' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_002684.1:p.(Thr251Ile)' -p18 -sS'slr' -p19 -S'NP_002684.1:p.(T251I)' -p20 -ssS'submitted_variant' -p21 -S'15-89873415-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_002693.2:c.752C>T' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'grch38' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000015.10:g.89330184G>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'15' -p37 -sS'ref' -p38 -VG -p39 -sS'pos' -p40 -S'89330184' -p41 -sS'alt' -p42 -VA -p43 -sssS'grch37' -p44 -(dp45 -g32 -S'NC_000015.9:g.89873415G>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'89873415' -p48 -sg42 -g43 -sssS'hg38' -p49 -(dp50 -g32 -S'NC_000015.10:g.89330184G>A' -p51 -sg34 -(dp52 -g36 -S'chr15' -p53 -sg38 -g39 -sg40 -S'89330184' -p54 -sg42 -g43 -sssS'hg19' -p55 -(dp56 -g32 -S'NC_000015.9:g.89873415G>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'89873415' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ssS'NM_001126131.1:c.752C>T' -p78 -(dp79 -g3 -g4 -sg5 -(lp80 -S'RefSeqGene record not available' -p81 -asg8 -g4 -sg9 -(lp82 -sg11 -VHomo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 2, mRNA -p83 -sg13 -S'POLG' -p84 -sg15 -(dp85 -g17 -S'NP_001119603.1:p.(Thr251Ile)' -p86 -sg19 -S'NP_001119603.1:p.(T251I)' -p87 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001126131.1:c.752C>T' -p88 -sg27 -g4 -sg28 -(dp89 -S'grch38' -p90 -(dp91 -g32 -S'NC_000015.10:g.89330184G>A' -p92 -sg34 -(dp93 -g36 -g37 -sg38 -g39 -sg40 -S'89330184' -p94 -sg42 -g43 -sssS'grch37' -p95 -(dp96 -g32 -S'NC_000015.9:g.89873415G>A' -p97 -sg34 -(dp98 -g36 -g37 -sg38 -g39 -sg40 -S'89873415' -p99 -sg42 -g43 -sssg49 -(dp100 -g32 -S'NC_000015.10:g.89330184G>A' -p101 -sg34 -(dp102 -g36 -g53 -sg38 -g39 -sg40 -S'89330184' -p103 -sg42 -g43 -sssS'hg19' -p104 -(dp105 -g32 -S'NC_000015.9:g.89873415G>A' -p106 -sg34 -(dp107 -g36 -g53 -sg38 -g39 -sg40 -S'89873415' -p108 -sg42 -g43 -ssssg60 -(dp109 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119603.1' -p110 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126131.1' -p111 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant233.txt b/VariantValidator/testing/testOutputsMasterITS/variant233.txt deleted file mode 100644 index 0ef11703..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant233.txt +++ /dev/null @@ -1,1291 +0,0 @@ -(dp0 -S'NM_001318832.1:c.310C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA -p12 -sS'gene_symbol' -p13 -S'TSC2' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001305761.1:p.(Arg104Trp)' -p18 -sS'slr' -p19 -S'NP_001305761.1:p.(R104W)' -p20 -ssS'submitted_variant' -p21 -S'16-2103394-C-T' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001318832.1:c.310C>T' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'grch38' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000016.10:g.2053393C>T' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'16' -p37 -sS'ref' -p38 -S'C' -p39 -sS'pos' -p40 -S'2053393' -p41 -sS'alt' -p42 -S'T' -p43 -sssS'grch37' -p44 -(dp45 -g32 -S'NC_000016.9:g.2103394C>T' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p48 -sg42 -g43 -sssS'hg38' -p49 -(dp50 -g32 -S'NC_000016.10:g.2053393C>T' -p51 -sg34 -(dp52 -g36 -S'chr16' -p53 -sg38 -g39 -sg40 -S'2053393' -p54 -sg42 -g43 -sssS'hg19' -p55 -(dp56 -g32 -S'NC_000016.9:g.2103394C>T' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1' -p65 -sssS'NM_000548.4:c.277C>T' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'RefSeqGene record not available' -p69 -asg8 -g4 -sg9 -(lp70 -sg11 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA -p71 -sg13 -S'TSC2' -p72 -sg15 -(dp73 -g17 -S'NP_000539.2:p.(Arg93Trp)' -p74 -sg19 -S'NP_000539.2:p.(R93W)' -p75 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_000548.4:c.277C>T' -p76 -sg27 -g4 -sg28 -(dp77 -S'grch38' -p78 -(dp79 -g32 -S'NC_000016.10:g.2053393C>T' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -g39 -sg40 -S'2053393' -p82 -sg42 -g43 -sssS'grch37' -p83 -(dp84 -g32 -S'NC_000016.9:g.2103394C>T' -p85 -sg34 -(dp86 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p87 -sg42 -g43 -sssg49 -(dp88 -g32 -S'NC_000016.10:g.2053393C>T' -p89 -sg34 -(dp90 -g36 -g53 -sg38 -g39 -sg40 -S'2053393' -p91 -sg42 -g43 -sssS'hg19' -p92 -(dp93 -g32 -S'NC_000016.9:g.2103394C>T' -p94 -sg34 -(dp95 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p96 -sg42 -g43 -ssssg60 -(dp97 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2' -p98 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4' -p99 -sssS'NM_001318829.1:c.130C>T' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'RefSeqGene record not available' -p103 -asg8 -g4 -sg9 -(lp104 -sg11 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA -p105 -sg13 -S'TSC2' -p106 -sg15 -(dp107 -g17 -S'NP_001305758.1:p.(Arg44Trp)' -p108 -sg19 -S'NP_001305758.1:p.(R44W)' -p109 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001318829.1:c.130C>T' -p110 -sg27 -g4 -sg28 -(dp111 -S'grch38' -p112 -(dp113 -g32 -S'NC_000016.10:g.2053393C>T' -p114 -sg34 -(dp115 -g36 -g37 -sg38 -g39 -sg40 -S'2053393' -p116 -sg42 -g43 -sssS'grch37' -p117 -(dp118 -g32 -S'NC_000016.9:g.2103394C>T' -p119 -sg34 -(dp120 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p121 -sg42 -g43 -sssg49 -(dp122 -g32 -S'NC_000016.10:g.2053393C>T' -p123 -sg34 -(dp124 -g36 -g53 -sg38 -g39 -sg40 -S'2053393' -p125 -sg42 -g43 -sssS'hg19' -p126 -(dp127 -g32 -S'NC_000016.9:g.2103394C>T' -p128 -sg34 -(dp129 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p130 -sg42 -g43 -ssssg60 -(dp131 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305758.1' -p132 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318829.1' -p133 -sssS'NM_001077183.2:c.277C>T' -p134 -(dp135 -g3 -g4 -sg5 -(lp136 -S'RefSeqGene record not available' -p137 -asg8 -g4 -sg9 -(lp138 -sg11 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA -p139 -sg13 -S'TSC2' -p140 -sg15 -(dp141 -g17 -S'NP_001070651.1:p.(Arg93Trp)' -p142 -sg19 -S'NP_001070651.1:p.(R93W)' -p143 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001077183.2:c.277C>T' -p144 -sg27 -g4 -sg28 -(dp145 -S'grch38' -p146 -(dp147 -g32 -S'NC_000016.10:g.2053393C>T' -p148 -sg34 -(dp149 -g36 -g37 -sg38 -g39 -sg40 -S'2053393' -p150 -sg42 -g43 -sssS'grch37' -p151 -(dp152 -g32 -S'NC_000016.9:g.2103394C>T' -p153 -sg34 -(dp154 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p155 -sg42 -g43 -sssg49 -(dp156 -g32 -S'NC_000016.10:g.2053393C>T' -p157 -sg34 -(dp158 -g36 -g53 -sg38 -g39 -sg40 -S'2053393' -p159 -sg42 -g43 -sssS'hg19' -p160 -(dp161 -g32 -S'NC_000016.9:g.2103394C>T' -p162 -sg34 -(dp163 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p164 -sg42 -g43 -ssssg60 -(dp165 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1' -p166 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2' -p167 -sssS'NM_001114382.1:c.277C>T' -p168 -(dp169 -g3 -g4 -sg5 -(lp170 -S'A more recent version of the selected reference sequence NM_001114382.1 is available (NM_001114382.2)' -p171 -aS'NM_001114382.2:c.277C>T MUST be fully validated prior to use in reports' -p172 -aS'select_variants=NM_001114382.2:c.277C>T' -p173 -aS'RefSeqGene record not available' -p174 -asg8 -g4 -sg9 -(lp175 -sg11 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 5, mRNA -p176 -sg13 -S'TSC2' -p177 -sg15 -(dp178 -g17 -S'NP_001107854.1:p.(Arg93Trp)' -p179 -sg19 -S'NP_001107854.1:p.(R93W)' -p180 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001114382.1:c.277C>T' -p181 -sg27 -g4 -sg28 -(dp182 -S'hg19' -p183 -(dp184 -g32 -S'NC_000016.9:g.2103394C>T' -p185 -sg34 -(dp186 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p187 -sg42 -g43 -sssS'grch37' -p188 -(dp189 -g32 -S'NC_000016.9:g.2103394C>T' -p190 -sg34 -(dp191 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p192 -sg42 -g43 -ssssg60 -(dp193 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1' -p194 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1' -p195 -sssS'NM_001077183.1:c.277C>T' -p196 -(dp197 -g3 -g4 -sg5 -(lp198 -S'A more recent version of the selected reference sequence NM_001077183.1 is available (NM_001077183.2)' -p199 -aS'NM_001077183.2:c.277C>T MUST be fully validated prior to use in reports' -p200 -aS'select_variants=NM_001077183.2:c.277C>T' -p201 -aS'RefSeqGene record not available' -p202 -asg8 -g4 -sg9 -(lp203 -sg11 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 4, mRNA -p204 -sg13 -S'TSC2' -p205 -sg15 -(dp206 -g17 -S'NP_001070651.1:p.(Arg93Trp)' -p207 -sg19 -S'NP_001070651.1:p.(R93W)' -p208 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001077183.1:c.277C>T' -p209 -sg27 -g4 -sg28 -(dp210 -S'hg19' -p211 -(dp212 -g32 -S'NC_000016.9:g.2103394C>T' -p213 -sg34 -(dp214 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p215 -sg42 -g43 -sssS'grch37' -p216 -(dp217 -g32 -S'NC_000016.9:g.2103394C>T' -p218 -sg34 -(dp219 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p220 -sg42 -g43 -ssssg60 -(dp221 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1' -p222 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1' -p223 -sssS'NM_001318827.1:c.226-903C>T' -p224 -(dp225 -g3 -g4 -sg5 -(lp226 -S'RefSeqGene record not available' -p227 -asg8 -g4 -sg9 -(lp228 -sg11 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA -p229 -sg13 -S'TSC2' -p230 -sg15 -(dp231 -g17 -S'NP_001305756.1:p.?' -p232 -sg19 -S'NP_001305756.1:p.?' -p233 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_001318827.1):c.226-903C>T' -p234 -sg24 -g4 -sg25 -S'NM_001318827.1:c.226-903C>T' -p235 -sg27 -g4 -sg28 -(dp236 -S'grch38' -p237 -(dp238 -g32 -S'NC_000016.10:g.2053393C>T' -p239 -sg34 -(dp240 -g36 -g37 -sg38 -g39 -sg40 -S'2053393' -p241 -sg42 -g43 -sssS'grch37' -p242 -(dp243 -g32 -S'NC_000016.9:g.2103394C>T' -p244 -sg34 -(dp245 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p246 -sg42 -g43 -sssg49 -(dp247 -g32 -S'NC_000016.10:g.2053393C>T' -p248 -sg34 -(dp249 -g36 -g53 -sg38 -g39 -sg40 -S'2053393' -p250 -sg42 -g43 -sssS'hg19' -p251 -(dp252 -g32 -S'NC_000016.9:g.2103394C>T' -p253 -sg34 -(dp254 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p255 -sg42 -g43 -ssssg60 -(dp256 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1' -p257 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1' -p258 -sssS'flag' -p259 -S'gene_variant' -p260 -sS'NM_001114382.2:c.277C>T' -p261 -(dp262 -g3 -g4 -sg5 -(lp263 -S'RefSeqGene record not available' -p264 -asg8 -g4 -sg9 -(lp265 -sg11 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA -p266 -sg13 -S'TSC2' -p267 -sg15 -(dp268 -g17 -S'NP_001107854.1:p.(Arg93Trp)' -p269 -sg19 -S'NP_001107854.1:p.(R93W)' -p270 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001114382.2:c.277C>T' -p271 -sg27 -g4 -sg28 -(dp272 -S'grch38' -p273 -(dp274 -g32 -S'NC_000016.10:g.2053393C>T' -p275 -sg34 -(dp276 -g36 -g37 -sg38 -g39 -sg40 -S'2053393' -p277 -sg42 -g43 -sssS'grch37' -p278 -(dp279 -g32 -S'NC_000016.9:g.2103394C>T' -p280 -sg34 -(dp281 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p282 -sg42 -g43 -sssg49 -(dp283 -g32 -S'NC_000016.10:g.2053393C>T' -p284 -sg34 -(dp285 -g36 -g53 -sg38 -g39 -sg40 -S'2053393' -p286 -sg42 -g43 -sssS'hg19' -p287 -(dp288 -g32 -S'NC_000016.9:g.2103394C>T' -p289 -sg34 -(dp290 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p291 -sg42 -g43 -ssssg60 -(dp292 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1' -p293 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2' -p294 -sssS'NM_001363528.1:c.277C>T' -p295 -(dp296 -g3 -g4 -sg5 -(lp297 -S'RefSeqGene record not available' -p298 -asg8 -g4 -sg9 -(lp299 -sg11 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 10, mRNA -p300 -sg13 -S'TSC2' -p301 -sg15 -(dp302 -g17 -S'NP_001350457.1:p.(Arg93Trp)' -p303 -sg19 -S'NP_001350457.1:p.(R93W)' -p304 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001363528.1:c.277C>T' -p305 -sg27 -g4 -sg28 -(dp306 -S'hg19' -p307 -(dp308 -g32 -S'NC_000016.9:g.2103394C>T' -p309 -sg34 -(dp310 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p311 -sg42 -g43 -sssS'grch37' -p312 -(dp313 -g32 -S'NC_000016.9:g.2103394C>T' -p314 -sg34 -(dp315 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p316 -sg42 -g43 -ssssg60 -(dp317 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1' -p318 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1' -p319 -sssS'NM_021055.2:c.277C>T' -p320 -(dp321 -g3 -g4 -sg5 -(lp322 -S'RefSeqGene record not available' -p323 -asg8 -g4 -sg9 -(lp324 -sg11 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 2, mRNA -p325 -sg13 -S'TSC2' -p326 -sg15 -(dp327 -g17 -S'NP_066399.2:p.(Arg93Trp)' -p328 -sg19 -S'NP_066399.2:p.(R93W)' -p329 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_021055.2:c.277C>T' -p330 -sg27 -g4 -sg28 -(dp331 -S'hg19' -p332 -(dp333 -g32 -S'NC_000016.9:g.2103394C>T' -p334 -sg34 -(dp335 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p336 -sg42 -g43 -sssS'grch37' -p337 -(dp338 -g32 -S'NC_000016.9:g.2103394C>T' -p339 -sg34 -(dp340 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p341 -sg42 -g43 -ssssg60 -(dp342 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2' -p343 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2' -p344 -sssS'NM_000548.3:c.277C>T' -p345 -(dp346 -g3 -g4 -sg5 -(lp347 -S'A more recent version of the selected reference sequence NM_000548.3 is available (NM_000548.4)' -p348 -aS'NM_000548.4:c.277C>T MUST be fully validated prior to use in reports' -p349 -aS'select_variants=NM_000548.4:c.277C>T' -p350 -aS'RefSeqGene record not available' -p351 -asg8 -g4 -sg9 -(lp352 -sg11 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 1, mRNA -p353 -sg13 -S'TSC2' -p354 -sg15 -(dp355 -g17 -S'NP_000539.2:p.(Arg93Trp)' -p356 -sg19 -S'NP_000539.2:p.(R93W)' -p357 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_000548.3:c.277C>T' -p358 -sg27 -g4 -sg28 -(dp359 -S'hg19' -p360 -(dp361 -g32 -S'NC_000016.9:g.2103394C>T' -p362 -sg34 -(dp363 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p364 -sg42 -g43 -sssS'grch37' -p365 -(dp366 -g32 -S'NC_000016.9:g.2103394C>T' -p367 -sg34 -(dp368 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p369 -sg42 -g43 -ssssg60 -(dp370 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2' -p371 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3' -p372 -sssS'NM_001318831.1:c.-1-2803C>T' -p373 -(dp374 -g3 -g4 -sg5 -(lp375 -S'RefSeqGene record not available' -p376 -asg8 -g4 -sg9 -(lp377 -sg11 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA -p378 -sg13 -S'TSC2' -p379 -sg15 -(dp380 -g17 -S'NP_001305760.1:p.?' -p381 -sg19 -S'NP_001305760.1:p.?' -p382 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_001318831.1):c.-1-2803C>T' -p383 -sg24 -g4 -sg25 -S'NM_001318831.1:c.-1-2803C>T' -p384 -sg27 -g4 -sg28 -(dp385 -S'grch38' -p386 -(dp387 -g32 -S'NC_000016.10:g.2053393C>T' -p388 -sg34 -(dp389 -g36 -g37 -sg38 -g39 -sg40 -S'2053393' -p390 -sg42 -g43 -sssS'grch37' -p391 -(dp392 -g32 -S'NC_000016.9:g.2103394C>T' -p393 -sg34 -(dp394 -g36 -g37 -sg38 -g39 -sg40 -S'2103394' -p395 -sg42 -g43 -sssg49 -(dp396 -g32 -S'NC_000016.10:g.2053393C>T' -p397 -sg34 -(dp398 -g36 -g53 -sg38 -g39 -sg40 -S'2053393' -p399 -sg42 -g43 -sssS'hg19' -p400 -(dp401 -g32 -S'NC_000016.9:g.2103394C>T' -p402 -sg34 -(dp403 -g36 -g53 -sg38 -g39 -sg40 -S'2103394' -p404 -sg42 -g43 -ssssg60 -(dp405 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305760.1' -p406 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318831.1' -p407 -sssS'metadata' -p408 -(dp409 -S'variantvalidator_hgvs_version' -p410 -S'1.1.3' -p411 -sS'uta_schema' -p412 -S'uta_20180821' -p413 -sS'seqrepo_db' -p414 -S'2018-08-21' -p415 -sS'variantvalidator_version' -p416 -S'v0.2' -p417 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant234.txt b/VariantValidator/testing/testOutputsMasterITS/variant234.txt deleted file mode 100644 index 3c5da003..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant234.txt +++ /dev/null @@ -1,286 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001079846.1:c.5634G>C' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens CREB binding protein (CREBBP), transcript variant 2, mRNA -p14 -sS'gene_symbol' -p15 -S'CREBBP' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001073315.1:p.(Met1878Ile)' -p20 -sS'slr' -p21 -S'NP_001073315.1:p.(M1878I)' -p22 -ssS'submitted_variant' -p23 -S'16-3779300-C-G' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_001079846.1:c.5634G>C' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'grch38' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000016.10:g.3729299C>G' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'16' -p39 -sS'ref' -p40 -VC -p41 -sS'pos' -p42 -S'3729299' -p43 -sS'alt' -p44 -VG -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000016.9:g.3779300C>G' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'3779300' -p50 -sg44 -g45 -sssS'hg38' -p51 -(dp52 -g34 -S'NC_000016.10:g.3729299C>G' -p53 -sg36 -(dp54 -g38 -S'chr16' -p55 -sg40 -g41 -sg42 -S'3729299' -p56 -sg44 -g45 -sssS'hg19' -p57 -(dp58 -g34 -S'NC_000016.9:g.3779300C>G' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'3779300' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073315.1' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001079846.1' -p67 -sssS'NM_004380.2:c.5748G>C' -p68 -(dp69 -g5 -g6 -sg7 -(lp70 -S'RefSeqGene record not available' -p71 -asg10 -g6 -sg11 -(lp72 -sg13 -VHomo sapiens CREB binding protein (CREBBP), transcript variant 1, mRNA -p73 -sg15 -S'CREBBP' -p74 -sg17 -(dp75 -g19 -S'NP_004371.2:p.(Met1916Ile)' -p76 -sg21 -S'NP_004371.2:p.(M1916I)' -p77 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_004380.2:c.5748G>C' -p78 -sg29 -g6 -sg30 -(dp79 -S'grch38' -p80 -(dp81 -g34 -S'NC_000016.10:g.3729299C>G' -p82 -sg36 -(dp83 -g38 -g39 -sg40 -g41 -sg42 -S'3729299' -p84 -sg44 -g45 -sssS'grch37' -p85 -(dp86 -g34 -S'NC_000016.9:g.3779300C>G' -p87 -sg36 -(dp88 -g38 -g39 -sg40 -g41 -sg42 -S'3779300' -p89 -sg44 -g45 -sssg51 -(dp90 -g34 -S'NC_000016.10:g.3729299C>G' -p91 -sg36 -(dp92 -g38 -g55 -sg40 -g41 -sg42 -S'3729299' -p93 -sg44 -g45 -sssS'hg19' -p94 -(dp95 -g34 -S'NC_000016.9:g.3779300C>G' -p96 -sg36 -(dp97 -g38 -g55 -sg40 -g41 -sg42 -S'3779300' -p98 -sg44 -g45 -ssssg62 -(dp99 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004371.2' -p100 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004380.2' -p101 -sssS'metadata' -p102 -(dp103 -S'variantvalidator_hgvs_version' -p104 -S'1.1.3' -p105 -sS'uta_schema' -p106 -S'uta_20180821' -p107 -sS'seqrepo_db' -p108 -S'2018-08-21' -p109 -sS'variantvalidator_version' -p110 -S'v0.2' -p111 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant235.txt b/VariantValidator/testing/testOutputsMasterITS/variant235.txt deleted file mode 100644 index b637011e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant235.txt +++ /dev/null @@ -1,286 +0,0 @@ -(dp0 -S'NM_019109.4:c.826C>G' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens ALG1, chitobiosyldiphosphodolichol beta-mannosyltransferase (ALG1), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'ALG1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_061982.3:p.(Arg276Gly)' -p18 -sS'slr' -p19 -S'NP_061982.3:p.(R276G)' -p20 -ssS'submitted_variant' -p21 -S'16-5128843-C-G' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_019109.4:c.826C>G' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'grch38' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000016.10:g.5078842C>G' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'16' -p37 -sS'ref' -p38 -S'C' -p39 -sS'pos' -p40 -S'5078842' -p41 -sS'alt' -p42 -S'G' -p43 -sssS'grch37' -p44 -(dp45 -g32 -S'NC_000016.9:g.5128843C>G' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'5128843' -p48 -sg42 -g43 -sssS'hg38' -p49 -(dp50 -g32 -S'NC_000016.10:g.5078842C>G' -p51 -sg34 -(dp52 -g36 -S'chr16' -p53 -sg38 -g39 -sg40 -S'5078842' -p54 -sg42 -g43 -sssS'hg19' -p55 -(dp56 -g32 -S'NC_000016.9:g.5128843C>G' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'5128843' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061982.3' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_019109.4' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ssS'NM_001330504.1:c.493C>G' -p78 -(dp79 -g3 -g4 -sg5 -(lp80 -S'RefSeqGene record not available' -p81 -asg8 -g4 -sg9 -(lp82 -sg11 -VHomo sapiens ALG1, chitobiosyldiphosphodolichol beta-mannosyltransferase (ALG1), transcript variant 2, mRNA -p83 -sg13 -S'ALG1' -p84 -sg15 -(dp85 -g17 -S'NP_001317433.1:p.(Arg165Gly)' -p86 -sg19 -S'NP_001317433.1:p.(R165G)' -p87 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330504.1:c.493C>G' -p88 -sg27 -g4 -sg28 -(dp89 -S'grch38' -p90 -(dp91 -g32 -S'NC_000016.10:g.5078842C>G' -p92 -sg34 -(dp93 -g36 -g37 -sg38 -g39 -sg40 -S'5078842' -p94 -sg42 -g43 -sssS'grch37' -p95 -(dp96 -g32 -S'NC_000016.9:g.5128843C>G' -p97 -sg34 -(dp98 -g36 -g37 -sg38 -g39 -sg40 -S'5128843' -p99 -sg42 -g43 -sssg49 -(dp100 -g32 -S'NC_000016.10:g.5078842C>G' -p101 -sg34 -(dp102 -g36 -g53 -sg38 -g39 -sg40 -S'5078842' -p103 -sg42 -g43 -sssS'hg19' -p104 -(dp105 -g32 -S'NC_000016.9:g.5128843C>G' -p106 -sg34 -(dp107 -g36 -g53 -sg38 -g39 -sg40 -S'5128843' -p108 -sg42 -g43 -ssssg60 -(dp109 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317433.1' -p110 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330504.1' -p111 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant236.txt b/VariantValidator/testing/testOutputsMasterITS/variant236.txt deleted file mode 100644 index d1db4531..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant236.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'NM_024306.4:c.95G>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens fatty acid 2-hydroxylase (FA2H), mRNA -p12 -sS'gene_symbol' -p13 -S'FA2H' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_077282.3:p.(Arg32His)' -p18 -sS'slr' -p19 -S'NP_077282.3:p.(R32H)' -p20 -ssS'submitted_variant' -p21 -S'16-74808559-C-T' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_024306.4:c.95G>A' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'grch38' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000016.10:g.74774661C>T' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'16' -p37 -sS'ref' -p38 -VC -p39 -sS'pos' -p40 -S'74774661' -p41 -sS'alt' -p42 -VT -p43 -sssS'grch37' -p44 -(dp45 -g32 -S'NC_000016.9:g.74808559C>T' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'74808559' -p48 -sg42 -g43 -sssS'hg38' -p49 -(dp50 -g32 -S'NC_000016.10:g.74774661C>T' -p51 -sg34 -(dp52 -g36 -S'chr16' -p53 -sg38 -g39 -sg40 -S'74774661' -p54 -sg42 -g43 -sssS'hg19' -p55 -(dp56 -g32 -S'NC_000016.9:g.74808559C>T' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'74808559' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_077282.3' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024306.4' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant237.txt b/VariantValidator/testing/testOutputsMasterITS/variant237.txt deleted file mode 100644 index 31669e82..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant237.txt +++ /dev/null @@ -1,368 +0,0 @@ -(dp0 -S'NM_003119.3:c.-22C>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'SPG7' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_003110.1:p.?' -p18 -sS'slr' -p19 -S'NP_003110.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'16-89574804-C-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_003119.3:c.-22C>A' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'grch38' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000016.10:g.89508396C>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'16' -p37 -sS'ref' -p38 -S'C' -p39 -sS'pos' -p40 -S'89508396' -p41 -sS'alt' -p42 -S'A' -p43 -sssS'grch37' -p44 -(dp45 -g32 -S'NC_000016.9:g.89574804C>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'89574804' -p48 -sg42 -g43 -sssS'hg38' -p49 -(dp50 -g32 -S'NC_000016.10:g.89508396C>A' -p51 -sg34 -(dp52 -g36 -S'chr16' -p53 -sg38 -g39 -sg40 -S'89508396' -p54 -sg42 -g43 -sssS'hg19' -p55 -(dp56 -g32 -S'NC_000016.9:g.89574804C>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'89574804' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'NM_199367.2:c.-22C>A' -p68 -(dp69 -g3 -g4 -sg5 -(lp70 -S'RefSeqGene record not available' -p71 -asg8 -g4 -sg9 -(lp72 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p73 -sg13 -S'SPG7' -p74 -sg15 -(dp75 -g17 -S'NP_955399.1:p.?' -p76 -sg19 -S'NP_955399.1:p.?' -p77 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_199367.2:c.-22C>A' -p78 -sg27 -g4 -sg28 -(dp79 -S'grch38' -p80 -(dp81 -g32 -S'NC_000016.10:g.89508396C>A' -p82 -sg34 -(dp83 -g36 -g37 -sg38 -g39 -sg40 -S'89508396' -p84 -sg42 -g43 -sssS'grch37' -p85 -(dp86 -g32 -S'NC_000016.9:g.89574804C>A' -p87 -sg34 -(dp88 -g36 -g37 -sg38 -g39 -sg40 -S'89574804' -p89 -sg42 -g43 -sssg49 -(dp90 -g32 -S'NC_000016.10:g.89508396C>A' -p91 -sg34 -(dp92 -g36 -g53 -sg38 -g39 -sg40 -S'89508396' -p93 -sg42 -g43 -sssS'hg19' -p94 -(dp95 -g32 -S'NC_000016.9:g.89574804C>A' -p96 -sg34 -(dp97 -g36 -g53 -sg38 -g39 -sg40 -S'89574804' -p98 -sg42 -g43 -ssssg60 -(dp99 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p100 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p101 -sssS'NM_001363850.1:c.-22C>A' -p102 -(dp103 -g3 -g4 -sg5 -(lp104 -S'RefSeqGene record not available' -p105 -asg8 -g4 -sg9 -(lp106 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p107 -sg13 -S'SPG7' -p108 -sg15 -(dp109 -g17 -S'NP_001350779.1:p.?' -p110 -sg19 -S'NP_001350779.1:p.?' -p111 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001363850.1:c.-22C>A' -p112 -sg27 -g4 -sg28 -(dp113 -S'hg19' -p114 -(dp115 -g32 -S'NC_000016.9:g.89574804C>A' -p116 -sg34 -(dp117 -g36 -g53 -sg38 -g39 -sg40 -S'89574804' -p118 -sg42 -g43 -sssS'grch37' -p119 -(dp120 -g32 -S'NC_000016.9:g.89574804C>A' -p121 -sg34 -(dp122 -g36 -g37 -sg38 -g39 -sg40 -S'89574804' -p123 -sg42 -g43 -ssssg60 -(dp124 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p125 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p126 -sssS'metadata' -p127 -(dp128 -S'variantvalidator_hgvs_version' -p129 -S'1.1.3' -p130 -sS'uta_schema' -p131 -S'uta_20180821' -p132 -sS'seqrepo_db' -p133 -S'2018-08-21' -p134 -sS'variantvalidator_version' -p135 -S'v0.2' -p136 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant238.txt b/VariantValidator/testing/testOutputsMasterITS/variant238.txt deleted file mode 100644 index 756d1d36..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant238.txt +++ /dev/null @@ -1,544 +0,0 @@ -(dp0 -S'NM_003119.2:c.1A>C' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p7 -aS'NM_003119.3:c.1A>C MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_003119.3:c.1A>C' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'SPG7' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_003110.1:p.(Met1?)' -p21 -sS'slr' -p22 -S'NP_003110.1:p.(M1?)' -p23 -ssS'submitted_variant' -p24 -S'16-89574826-A-C' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'hgvs_lrg_variant' -p27 -g4 -sS'hgvs_transcript_variant' -p28 -S'NM_003119.2:c.1A>C' -p29 -sS'hgvs_refseqgene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000016.9:g.89574826A>C' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr16' -p40 -sS'ref' -p41 -S'A' -p42 -sS'pos' -p43 -S'89574826' -p44 -sS'alt' -p45 -S'C' -p46 -sssS'grch37' -p47 -(dp48 -g35 -S'NC_000016.9:g.89574826A>C' -p49 -sg37 -(dp50 -g39 -S'16' -p51 -sg41 -g42 -sg43 -S'89574826' -p52 -sg45 -g46 -ssssS'reference_sequence_records' -p53 -(dp54 -S'protein' -p55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p56 -sS'transcript' -p57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p58 -sssS'NM_199367.1:c.1A>C' -p59 -(dp60 -g3 -g4 -sg5 -(lp61 -S'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p62 -aS'NM_199367.2:c.1A>C MUST be fully validated prior to use in reports' -p63 -aS'select_variants=NM_199367.2:c.1A>C' -p64 -aS'RefSeqGene record not available' -p65 -asg11 -g4 -sg12 -(lp66 -sg14 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p67 -sg16 -S'SPG7' -p68 -sg18 -(dp69 -g20 -S'NP_955399.1:p.(Met1?)' -p70 -sg22 -S'NP_955399.1:p.(M1?)' -p71 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_199367.1:c.1A>C' -p72 -sg30 -g4 -sg31 -(dp73 -S'hg19' -p74 -(dp75 -g35 -S'NC_000016.9:g.89574826A>C' -p76 -sg37 -(dp77 -g39 -g40 -sg41 -g42 -sg43 -S'89574826' -p78 -sg45 -g46 -sssS'grch37' -p79 -(dp80 -g35 -S'NC_000016.9:g.89574826A>C' -p81 -sg37 -(dp82 -g39 -g51 -sg41 -g42 -sg43 -S'89574826' -p83 -sg45 -g46 -ssssg53 -(dp84 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p85 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' -p86 -sssS'NM_001363850.1:c.1A>C' -p87 -(dp88 -g3 -g4 -sg5 -(lp89 -S'RefSeqGene record not available' -p90 -asg11 -g4 -sg12 -(lp91 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p92 -sg16 -S'SPG7' -p93 -sg18 -(dp94 -g20 -S'NP_001350779.1:p.(Met1?)' -p95 -sg22 -S'NP_001350779.1:p.(M1?)' -p96 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001363850.1:c.1A>C' -p97 -sg30 -g4 -sg31 -(dp98 -S'hg19' -p99 -(dp100 -g35 -S'NC_000016.9:g.89574826A>C' -p101 -sg37 -(dp102 -g39 -g40 -sg41 -g42 -sg43 -S'89574826' -p103 -sg45 -g46 -sssS'grch37' -p104 -(dp105 -g35 -S'NC_000016.9:g.89574826A>C' -p106 -sg37 -(dp107 -g39 -g51 -sg41 -g42 -sg43 -S'89574826' -p108 -sg45 -g46 -ssssg53 -(dp109 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p110 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p111 -sssS'NM_199367.2:c.1A>C' -p112 -(dp113 -g3 -g4 -sg5 -(lp114 -S'RefSeqGene record not available' -p115 -asg11 -g4 -sg12 -(lp116 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p117 -sg16 -S'SPG7' -p118 -sg18 -(dp119 -g20 -S'NP_955399.1:p.(Met1?)' -p120 -sg22 -S'NP_955399.1:p.(M1?)' -p121 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_199367.2:c.1A>C' -p122 -sg30 -g4 -sg31 -(dp123 -S'grch38' -p124 -(dp125 -g35 -S'NC_000016.10:g.89508418A>C' -p126 -sg37 -(dp127 -g39 -g51 -sg41 -g42 -sg43 -S'89508418' -p128 -sg45 -g46 -sssS'grch37' -p129 -(dp130 -g35 -S'NC_000016.9:g.89574826A>C' -p131 -sg37 -(dp132 -g39 -g51 -sg41 -g42 -sg43 -S'89574826' -p133 -sg45 -g46 -sssS'hg38' -p134 -(dp135 -g35 -S'NC_000016.10:g.89508418A>C' -p136 -sg37 -(dp137 -g39 -g40 -sg41 -g42 -sg43 -S'89508418' -p138 -sg45 -g46 -sssS'hg19' -p139 -(dp140 -g35 -S'NC_000016.9:g.89574826A>C' -p141 -sg37 -(dp142 -g39 -g40 -sg41 -g42 -sg43 -S'89574826' -p143 -sg45 -g46 -ssssg53 -(dp144 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p145 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p146 -sssS'flag' -p147 -S'gene_variant' -p148 -sS'NM_003119.3:c.1A>C' -p149 -(dp150 -g3 -g4 -sg5 -(lp151 -S'RefSeqGene record not available' -p152 -asg11 -g4 -sg12 -(lp153 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p154 -sg16 -S'SPG7' -p155 -sg18 -(dp156 -g20 -S'NP_003110.1:p.(Met1?)' -p157 -sg22 -S'NP_003110.1:p.(M1?)' -p158 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_003119.3:c.1A>C' -p159 -sg30 -g4 -sg31 -(dp160 -S'grch38' -p161 -(dp162 -g35 -S'NC_000016.10:g.89508418A>C' -p163 -sg37 -(dp164 -g39 -g51 -sg41 -g42 -sg43 -S'89508418' -p165 -sg45 -g46 -sssS'grch37' -p166 -(dp167 -g35 -S'NC_000016.9:g.89574826A>C' -p168 -sg37 -(dp169 -g39 -g51 -sg41 -g42 -sg43 -S'89574826' -p170 -sg45 -g46 -sssg134 -(dp171 -g35 -S'NC_000016.10:g.89508418A>C' -p172 -sg37 -(dp173 -g39 -g40 -sg41 -g42 -sg43 -S'89508418' -p174 -sg45 -g46 -sssS'hg19' -p175 -(dp176 -g35 -S'NC_000016.9:g.89574826A>C' -p177 -sg37 -(dp178 -g39 -g40 -sg41 -g42 -sg43 -S'89574826' -p179 -sg45 -g46 -ssssg53 -(dp180 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p181 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p182 -sssS'metadata' -p183 -(dp184 -S'variantvalidator_hgvs_version' -p185 -S'1.1.3' -p186 -sS'uta_schema' -p187 -S'uta_20180821' -p188 -sS'seqrepo_db' -p189 -S'2018-08-21' -p190 -sS'variantvalidator_version' -p191 -S'v0.2' -p192 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant239.txt b/VariantValidator/testing/testOutputsMasterITS/variant239.txt deleted file mode 100644 index f9178588..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant239.txt +++ /dev/null @@ -1,567 +0,0 @@ -(dp0 -S'NM_001363850.1:c.90dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p13 -sS'gene_symbol' -p14 -S'SPG7' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001350779.1:p.(Pro31SerfsTer43)' -p19 -sS'slr' -p20 -S'NP_001350779.1:p.(P31Sfs*43)' -p21 -ssS'submitted_variant' -p22 -S'16-89574914-G-GT' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001363850.1:c.90dup' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000016.9:g.89574915dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr16' -p38 -sS'ref' -p39 -S'T' -p40 -sS'pos' -p41 -S'89574915' -p42 -sS'alt' -p43 -S'TT' -p44 -sssS'grch37' -p45 -(dp46 -g33 -S'NC_000016.9:g.89574915dup' -p47 -sg35 -(dp48 -g37 -S'16' -p49 -sg39 -g40 -sg41 -S'89574915' -p50 -sg43 -S'TT' -p51 -ssssS'reference_sequence_records' -p52 -(dp53 -S'protein' -p54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p55 -sS'transcript' -p56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p57 -sssS'NM_199367.1:c.90dup' -p58 -(dp59 -g3 -g4 -sg5 -(lp60 -S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' -p61 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p62 -aS'NM_199367.2:c.90dupT MUST be fully validated prior to use in reports' -p63 -aS'select_variants=NM_199367.2:c.90dup' -p64 -aS'RefSeqGene record not available' -p65 -asg9 -g4 -sg10 -(lp66 -sg12 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p67 -sg14 -S'SPG7' -p68 -sg16 -(dp69 -g18 -S'NP_955399.1:p.(Pro31SerfsTer43)' -p70 -sg20 -S'NP_955399.1:p.(P31Sfs*43)' -p71 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_199367.1:c.90dup' -p72 -sg28 -g4 -sg29 -(dp73 -S'hg19' -p74 -(dp75 -g33 -S'NC_000016.9:g.89574915dup' -p76 -sg35 -(dp77 -g37 -g38 -sg39 -g40 -sg41 -S'89574915' -p78 -sg43 -S'TT' -p79 -sssS'grch37' -p80 -(dp81 -g33 -S'NC_000016.9:g.89574915dup' -p82 -sg35 -(dp83 -g37 -g49 -sg39 -g40 -sg41 -S'89574915' -p84 -sg43 -S'TT' -p85 -ssssg52 -(dp86 -g54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p87 -sg56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' -p88 -sssS'NM_003119.2:c.90dup' -p89 -(dp90 -g3 -g4 -sg5 -(lp91 -S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' -p92 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p93 -aS'NM_003119.3:c.90dupT MUST be fully validated prior to use in reports' -p94 -aS'select_variants=NM_003119.3:c.90dup' -p95 -aS'RefSeqGene record not available' -p96 -asg9 -g4 -sg10 -(lp97 -sg12 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p98 -sg14 -S'SPG7' -p99 -sg16 -(dp100 -g18 -S'NP_003110.1:p.(Pro31SerfsTer43)' -p101 -sg20 -S'NP_003110.1:p.(P31Sfs*43)' -p102 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_003119.2:c.90dup' -p103 -sg28 -g4 -sg29 -(dp104 -S'hg19' -p105 -(dp106 -g33 -S'NC_000016.9:g.89574915dup' -p107 -sg35 -(dp108 -g37 -g38 -sg39 -g40 -sg41 -S'89574915' -p109 -sg43 -S'TT' -p110 -sssS'grch37' -p111 -(dp112 -g33 -S'NC_000016.9:g.89574915dup' -p113 -sg35 -(dp114 -g37 -g49 -sg39 -g40 -sg41 -S'89574915' -p115 -sg43 -S'TT' -p116 -ssssg52 -(dp117 -g54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p118 -sg56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p119 -sssS'NM_199367.2:c.90dup' -p120 -(dp121 -g3 -g4 -sg5 -(lp122 -S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' -p123 -aS'RefSeqGene record not available' -p124 -asg9 -g4 -sg10 -(lp125 -sg12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p126 -sg14 -S'SPG7' -p127 -sg16 -(dp128 -g18 -S'NP_955399.1:p.(Pro31SerfsTer43)' -p129 -sg20 -S'NP_955399.1:p.(P31Sfs*43)' -p130 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_199367.2:c.90dup' -p131 -sg28 -g4 -sg29 -(dp132 -S'grch38' -p133 -(dp134 -g33 -S'NC_000016.10:g.89508507dup' -p135 -sg35 -(dp136 -g37 -g49 -sg39 -g40 -sg41 -S'89508507' -p137 -sg43 -S'TT' -p138 -sssS'grch37' -p139 -(dp140 -g33 -S'NC_000016.9:g.89574915dup' -p141 -sg35 -(dp142 -g37 -g49 -sg39 -g40 -sg41 -S'89574915' -p143 -sg43 -S'TT' -p144 -sssS'hg38' -p145 -(dp146 -g33 -S'NC_000016.10:g.89508507dup' -p147 -sg35 -(dp148 -g37 -g38 -sg39 -g40 -sg41 -S'89508507' -p149 -sg43 -S'TT' -p150 -sssS'hg19' -p151 -(dp152 -g33 -S'NC_000016.9:g.89574915dup' -p153 -sg35 -(dp154 -g37 -g38 -sg39 -g40 -sg41 -S'89574915' -p155 -sg43 -S'TT' -p156 -ssssg52 -(dp157 -g54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p158 -sg56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p159 -sssS'flag' -p160 -S'gene_variant' -p161 -sS'NM_003119.3:c.90dup' -p162 -(dp163 -g3 -g4 -sg5 -(lp164 -S'NC_000016.9:g.89574914G>GT automapped to NC_000016.9:g.89574915dupT' -p165 -aS'RefSeqGene record not available' -p166 -asg9 -g4 -sg10 -(lp167 -sg12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p168 -sg14 -S'SPG7' -p169 -sg16 -(dp170 -g18 -S'NP_003110.1:p.(Pro31SerfsTer43)' -p171 -sg20 -S'NP_003110.1:p.(P31Sfs*43)' -p172 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_003119.3:c.90dup' -p173 -sg28 -g4 -sg29 -(dp174 -S'grch38' -p175 -(dp176 -g33 -S'NC_000016.10:g.89508507dup' -p177 -sg35 -(dp178 -g37 -g49 -sg39 -g40 -sg41 -S'89508507' -p179 -sg43 -S'TT' -p180 -sssS'grch37' -p181 -(dp182 -g33 -S'NC_000016.9:g.89574915dup' -p183 -sg35 -(dp184 -g37 -g49 -sg39 -g40 -sg41 -S'89574915' -p185 -sg43 -S'TT' -p186 -sssg145 -(dp187 -g33 -S'NC_000016.10:g.89508507dup' -p188 -sg35 -(dp189 -g37 -g38 -sg39 -g40 -sg41 -S'89508507' -p190 -sg43 -S'TT' -p191 -sssS'hg19' -p192 -(dp193 -g33 -S'NC_000016.9:g.89574915dup' -p194 -sg35 -(dp195 -g37 -g38 -sg39 -g40 -sg41 -S'89574915' -p196 -sg43 -S'TT' -p197 -ssssg52 -(dp198 -g54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p199 -sg56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p200 -sssS'metadata' -p201 -(dp202 -S'variantvalidator_hgvs_version' -p203 -S'1.1.3' -p204 -sS'uta_schema' -p205 -S'uta_20180821' -p206 -sS'seqrepo_db' -p207 -S'2018-08-21' -p208 -sS'variantvalidator_version' -p209 -S'v0.2' -p210 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant24.txt b/VariantValidator/testing/testOutputsMasterITS/variant24.txt deleted file mode 100644 index 1e278cbd..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant24.txt +++ /dev/null @@ -1,82 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant' -p7 -aS'Instead use NC_000011.9:g.5244828_5248381=' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'hgvs_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_000518.4:c.-130_*2000=' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'hgvs_lrg_variant' -p21 -g4 -sS'hgvs_transcript_variant' -p22 -g4 -sS'hgvs_refseqgene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -sS'reference_sequence_records' -p26 -g4 -ssS'flag' -p27 -S'warning' -p28 -sS'metadata' -p29 -(dp30 -S'variantvalidator_hgvs_version' -p31 -S'1.1.3' -p32 -sS'uta_schema' -p33 -S'uta_20180821' -p34 -sS'seqrepo_db' -p35 -S'2018-08-21' -p36 -sS'variantvalidator_version' -p37 -S'v0.2' -p38 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant240.txt b/VariantValidator/testing/testOutputsMasterITS/variant240.txt deleted file mode 100644 index 3f8bc1a3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant240.txt +++ /dev/null @@ -1,580 +0,0 @@ -(dp0 -S'NM_199367.2:c.89_91dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'SPG7' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_955399.1:p.(Ser30_Pro31insArg)' -p19 -sS'slr' -p20 -S'NP_955399.1:p.(S30_P31insR)' -p21 -ssS'submitted_variant' -p22 -S'16-89574916-C-CGTC' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_199367.2:c.89_91dup' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'grch38' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000016.10:g.89508506_89508508dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'16' -p38 -sS'ref' -p39 -S'GTC' -p40 -sS'pos' -p41 -S'89508506' -p42 -sS'alt' -p43 -S'GTCGTC' -p44 -sssS'grch37' -p45 -(dp46 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'GTC' -p49 -sg41 -S'89574914' -p50 -sg43 -S'GTCGTC' -p51 -sssS'hg38' -p52 -(dp53 -g33 -S'NC_000016.10:g.89508506_89508508dup' -p54 -sg35 -(dp55 -g37 -S'chr16' -p56 -sg39 -S'GTC' -p57 -sg41 -S'89508506' -p58 -sg43 -S'GTCGTC' -p59 -sssS'hg19' -p60 -(dp61 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p62 -sg35 -(dp63 -g37 -g56 -sg39 -S'GTC' -p64 -sg41 -S'89574914' -p65 -sg43 -S'GTCGTC' -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p72 -sssS'NM_003119.3:c.89_91dup' -p73 -(dp74 -g3 -g4 -sg5 -(lp75 -S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' -p76 -aS'RefSeqGene record not available' -p77 -asg9 -g4 -sg10 -(lp78 -sg12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p79 -sg14 -S'SPG7' -p80 -sg16 -(dp81 -g18 -S'NP_003110.1:p.(Ser30_Pro31insArg)' -p82 -sg20 -S'NP_003110.1:p.(S30_P31insR)' -p83 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_003119.3:c.89_91dup' -p84 -sg28 -g4 -sg29 -(dp85 -S'grch38' -p86 -(dp87 -g33 -S'NC_000016.10:g.89508506_89508508dup' -p88 -sg35 -(dp89 -g37 -g38 -sg39 -S'GTC' -p90 -sg41 -S'89508506' -p91 -sg43 -S'GTCGTC' -p92 -sssS'grch37' -p93 -(dp94 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p95 -sg35 -(dp96 -g37 -g38 -sg39 -S'GTC' -p97 -sg41 -S'89574914' -p98 -sg43 -S'GTCGTC' -p99 -sssg52 -(dp100 -g33 -S'NC_000016.10:g.89508506_89508508dup' -p101 -sg35 -(dp102 -g37 -g56 -sg39 -S'GTC' -p103 -sg41 -S'89508506' -p104 -sg43 -S'GTCGTC' -p105 -sssS'hg19' -p106 -(dp107 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p108 -sg35 -(dp109 -g37 -g56 -sg39 -S'GTC' -p110 -sg41 -S'89574914' -p111 -sg43 -S'GTCGTC' -p112 -ssssg67 -(dp113 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p114 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p115 -sssS'NM_001363850.1:c.89_91dup' -p116 -(dp117 -g3 -g4 -sg5 -(lp118 -S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' -p119 -aS'RefSeqGene record not available' -p120 -asg9 -g4 -sg10 -(lp121 -sg12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p122 -sg14 -S'SPG7' -p123 -sg16 -(dp124 -g18 -S'NP_001350779.1:p.(Ser30_Pro31insArg)' -p125 -sg20 -S'NP_001350779.1:p.(S30_P31insR)' -p126 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001363850.1:c.89_91dup' -p127 -sg28 -g4 -sg29 -(dp128 -S'hg19' -p129 -(dp130 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p131 -sg35 -(dp132 -g37 -g56 -sg39 -S'GTC' -p133 -sg41 -S'89574914' -p134 -sg43 -S'GTCGTC' -p135 -sssS'grch37' -p136 -(dp137 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p138 -sg35 -(dp139 -g37 -g38 -sg39 -S'GTC' -p140 -sg41 -S'89574914' -p141 -sg43 -S'GTCGTC' -p142 -ssssg67 -(dp143 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p144 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p145 -sssS'flag' -p146 -S'gene_variant' -p147 -sS'NM_199367.1:c.89_91dup' -p148 -(dp149 -g3 -g4 -sg5 -(lp150 -S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' -p151 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p152 -aS'NM_199367.2:c.89_91dupGTC MUST be fully validated prior to use in reports' -p153 -aS'select_variants=NM_199367.2:c.89_91dup' -p154 -aS'RefSeqGene record not available' -p155 -asg9 -g4 -sg10 -(lp156 -sg12 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p157 -sg14 -S'SPG7' -p158 -sg16 -(dp159 -g18 -S'NP_955399.1:p.(Ser30_Pro31insArg)' -p160 -sg20 -S'NP_955399.1:p.(S30_P31insR)' -p161 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_199367.1:c.89_91dup' -p162 -sg28 -g4 -sg29 -(dp163 -S'hg19' -p164 -(dp165 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p166 -sg35 -(dp167 -g37 -g56 -sg39 -S'GTC' -p168 -sg41 -S'89574914' -p169 -sg43 -S'GTCGTC' -p170 -sssS'grch37' -p171 -(dp172 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p173 -sg35 -(dp174 -g37 -g38 -sg39 -S'GTC' -p175 -sg41 -S'89574914' -p176 -sg43 -S'GTCGTC' -p177 -ssssg67 -(dp178 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p179 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' -p180 -sssS'NM_003119.2:c.89_91dup' -p181 -(dp182 -g3 -g4 -sg5 -(lp183 -S'NC_000016.9:g.89574916C>CGTC automapped to NC_000016.9:g.89574914_89574916dupGTC' -p184 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p185 -aS'NM_003119.3:c.89_91dupGTC MUST be fully validated prior to use in reports' -p186 -aS'select_variants=NM_003119.3:c.89_91dup' -p187 -aS'RefSeqGene record not available' -p188 -asg9 -g4 -sg10 -(lp189 -sg12 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p190 -sg14 -S'SPG7' -p191 -sg16 -(dp192 -g18 -S'NP_003110.1:p.(Ser30_Pro31insArg)' -p193 -sg20 -S'NP_003110.1:p.(S30_P31insR)' -p194 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_003119.2:c.89_91dup' -p195 -sg28 -g4 -sg29 -(dp196 -S'hg19' -p197 -(dp198 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p199 -sg35 -(dp200 -g37 -g56 -sg39 -S'GTC' -p201 -sg41 -S'89574914' -p202 -sg43 -S'GTCGTC' -p203 -sssS'grch37' -p204 -(dp205 -g33 -S'NC_000016.9:g.89574914_89574916dup' -p206 -sg35 -(dp207 -g37 -g38 -sg39 -S'GTC' -p208 -sg41 -S'89574914' -p209 -sg43 -S'GTCGTC' -p210 -ssssg67 -(dp211 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p212 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p213 -sssS'metadata' -p214 -(dp215 -S'variantvalidator_hgvs_version' -p216 -S'1.1.3' -p217 -sS'uta_schema' -p218 -S'uta_20180821' -p219 -sS'seqrepo_db' -p220 -S'2018-08-21' -p221 -sS'variantvalidator_version' -p222 -S'v0.2' -p223 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant241.txt b/VariantValidator/testing/testOutputsMasterITS/variant241.txt deleted file mode 100644 index bb3c2985..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant241.txt +++ /dev/null @@ -1,549 +0,0 @@ -(dp0 -S'NM_199367.2:c.183+1G>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'SPG7' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_955399.1:p.?' -p18 -sS'slr' -p19 -S'NP_955399.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'16-89575009-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000016.9(NM_199367.2):c.183+1G>A' -p24 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_199367.2:c.183+1G>A' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'grch38' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000016.10:g.89508601G>A' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'16' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'89508601' -p42 -sS'alt' -p43 -S'A' -p44 -sssS'grch37' -p45 -(dp46 -g33 -S'NC_000016.9:g.89575009G>A' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'89575009' -p49 -sg43 -g44 -sssS'hg38' -p50 -(dp51 -g33 -S'NC_000016.10:g.89508601G>A' -p52 -sg35 -(dp53 -g37 -S'chr16' -p54 -sg39 -g40 -sg41 -S'89508601' -p55 -sg43 -g44 -sssS'hg19' -p56 -(dp57 -g33 -S'NC_000016.9:g.89575009G>A' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'89575009' -p60 -sg43 -g44 -ssssS'reference_sequence_records' -p61 -(dp62 -S'protein' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p64 -sS'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p66 -sssS'NM_003119.2:c.183+1G>A' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p70 -aS'NM_003119.3:c.183+1G>A MUST be fully validated prior to use in reports' -p71 -aS'select_variants=NM_003119.3:c.183+1G>A' -p72 -aS'RefSeqGene record not available' -p73 -asg8 -g4 -sg9 -(lp74 -sg11 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p75 -sg13 -S'SPG7' -p76 -sg15 -(dp77 -g17 -S'NP_003110.1:p.?' -p78 -sg19 -S'NP_003110.1:p.?' -p79 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_003119.2):c.183+1G>A' -p80 -sg25 -g4 -sg26 -S'NM_003119.2:c.183+1G>A' -p81 -sg28 -g4 -sg29 -(dp82 -S'hg19' -p83 -(dp84 -g33 -S'NC_000016.9:g.89575009G>A' -p85 -sg35 -(dp86 -g37 -g54 -sg39 -g40 -sg41 -S'89575009' -p87 -sg43 -g44 -sssS'grch37' -p88 -(dp89 -g33 -S'NC_000016.9:g.89575009G>A' -p90 -sg35 -(dp91 -g37 -g38 -sg39 -g40 -sg41 -S'89575009' -p92 -sg43 -g44 -ssssg61 -(dp93 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p94 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p95 -sssS'flag' -p96 -S'gene_variant' -p97 -sS'NM_199367.1:c.183+1G>A' -p98 -(dp99 -g3 -g4 -sg5 -(lp100 -S'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p101 -aS'NM_199367.2:c.183+1G>A MUST be fully validated prior to use in reports' -p102 -aS'select_variants=NM_199367.2:c.183+1G>A' -p103 -aS'RefSeqGene record not available' -p104 -asg8 -g4 -sg9 -(lp105 -sg11 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p106 -sg13 -S'SPG7' -p107 -sg15 -(dp108 -g17 -S'NP_955399.1:p.?' -p109 -sg19 -S'NP_955399.1:p.?' -p110 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_199367.1):c.183+1G>A' -p111 -sg25 -g4 -sg26 -S'NM_199367.1:c.183+1G>A' -p112 -sg28 -g4 -sg29 -(dp113 -S'hg19' -p114 -(dp115 -g33 -S'NC_000016.9:g.89575009G>A' -p116 -sg35 -(dp117 -g37 -g54 -sg39 -g40 -sg41 -S'89575009' -p118 -sg43 -g44 -sssS'grch37' -p119 -(dp120 -g33 -S'NC_000016.9:g.89575009G>A' -p121 -sg35 -(dp122 -g37 -g38 -sg39 -g40 -sg41 -S'89575009' -p123 -sg43 -g44 -ssssg61 -(dp124 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p125 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' -p126 -sssS'NM_001363850.1:c.183+1G>A' -p127 -(dp128 -g3 -g4 -sg5 -(lp129 -S'RefSeqGene record not available' -p130 -asg8 -g4 -sg9 -(lp131 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p132 -sg13 -S'SPG7' -p133 -sg15 -(dp134 -g17 -S'NP_001350779.1:p.?' -p135 -sg19 -S'NP_001350779.1:p.?' -p136 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_001363850.1):c.183+1G>A' -p137 -sg25 -g4 -sg26 -S'NM_001363850.1:c.183+1G>A' -p138 -sg28 -g4 -sg29 -(dp139 -S'hg19' -p140 -(dp141 -g33 -S'NC_000016.9:g.89575009G>A' -p142 -sg35 -(dp143 -g37 -g54 -sg39 -g40 -sg41 -S'89575009' -p144 -sg43 -g44 -sssS'grch37' -p145 -(dp146 -g33 -S'NC_000016.9:g.89575009G>A' -p147 -sg35 -(dp148 -g37 -g38 -sg39 -g40 -sg41 -S'89575009' -p149 -sg43 -g44 -ssssg61 -(dp150 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p151 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p152 -sssS'NM_003119.3:c.183+1G>A' -p153 -(dp154 -g3 -g4 -sg5 -(lp155 -S'RefSeqGene record not available' -p156 -asg8 -g4 -sg9 -(lp157 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p158 -sg13 -S'SPG7' -p159 -sg15 -(dp160 -g17 -S'NP_003110.1:p.?' -p161 -sg19 -S'NP_003110.1:p.?' -p162 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_003119.3):c.183+1G>A' -p163 -sg25 -g4 -sg26 -S'NM_003119.3:c.183+1G>A' -p164 -sg28 -g4 -sg29 -(dp165 -S'grch38' -p166 -(dp167 -g33 -S'NC_000016.10:g.89508601G>A' -p168 -sg35 -(dp169 -g37 -g38 -sg39 -g40 -sg41 -S'89508601' -p170 -sg43 -g44 -sssS'grch37' -p171 -(dp172 -g33 -S'NC_000016.9:g.89575009G>A' -p173 -sg35 -(dp174 -g37 -g38 -sg39 -g40 -sg41 -S'89575009' -p175 -sg43 -g44 -sssg50 -(dp176 -g33 -S'NC_000016.10:g.89508601G>A' -p177 -sg35 -(dp178 -g37 -g54 -sg39 -g40 -sg41 -S'89508601' -p179 -sg43 -g44 -sssS'hg19' -p180 -(dp181 -g33 -S'NC_000016.9:g.89575009G>A' -p182 -sg35 -(dp183 -g37 -g54 -sg39 -g40 -sg41 -S'89575009' -p184 -sg43 -g44 -ssssg61 -(dp185 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p186 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p187 -sssS'metadata' -p188 -(dp189 -S'variantvalidator_hgvs_version' -p190 -S'1.1.3' -p191 -sS'uta_schema' -p192 -S'uta_20180821' -p193 -sS'seqrepo_db' -p194 -S'2018-08-21' -p195 -sS'variantvalidator_version' -p196 -S'v0.2' -p197 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant242.txt b/VariantValidator/testing/testOutputsMasterITS/variant242.txt deleted file mode 100644 index 59175b57..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant242.txt +++ /dev/null @@ -1,1106 +0,0 @@ -(dp0 -S'NM_199367.1:c.183+32_183+33insA' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Multiple ALT sequences detected' -p7 -aS'auto-submitting all possible combinations' -p8 -aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' -p9 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p10 -aS'NM_199367.2:c.183+32_183+33insA MUST be fully validated prior to use in reports' -p11 -aS'select_variants=NM_199367.2:c.183+32_183+33insA' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g4 -sS'alt_genomic_loci' -p15 -(lp16 -sS'transcript_description' -p17 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p18 -sS'gene_symbol' -p19 -S'SPG7' -p20 -sS'hgvs_predicted_protein_consequence' -p21 -(dp22 -S'tlr' -p23 -S'NP_955399.1:p.?' -p24 -sS'slr' -p25 -S'NP_955399.1:p.?' -p26 -ssS'submitted_variant' -p27 -S'16-89575040-C-A,CA' -p28 -sS'genome_context_intronic_sequence' -p29 -S'NC_000016.9(NM_199367.1):c.183+32_183+33insA' -p30 -sS'hgvs_lrg_variant' -p31 -g4 -sS'hgvs_transcript_variant' -p32 -S'NM_199367.1:c.183+32_183+33insA' -p33 -sS'hgvs_refseqgene_variant' -p34 -g4 -sS'primary_assembly_loci' -p35 -(dp36 -S'hg19' -p37 -(dp38 -S'hgvs_genomic_description' -p39 -S'NC_000016.9:g.89575040_89575041insA' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'chr16' -p44 -sS'ref' -p45 -S'C' -p46 -sS'pos' -p47 -S'89575040' -p48 -sS'alt' -p49 -S'CA' -p50 -sssS'grch37' -p51 -(dp52 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p53 -sg41 -(dp54 -g43 -S'16' -p55 -sg45 -g46 -sg47 -S'89575040' -p56 -sg49 -S'CA' -p57 -ssssS'reference_sequence_records' -p58 -(dp59 -S'protein' -p60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p61 -sS'transcript' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' -p63 -sssS'NM_001363850.1:c.183+32C>A' -p64 -(dp65 -g3 -g4 -sg5 -(lp66 -S'Multiple ALT sequences detected' -p67 -aS'auto-submitting all possible combinations' -p68 -aS'RefSeqGene record not available' -p69 -asg14 -g4 -sg15 -(lp70 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p71 -sg19 -S'SPG7' -p72 -sg21 -(dp73 -g23 -S'NP_001350779.1:p.?' -p74 -sg25 -S'NP_001350779.1:p.?' -p75 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_001363850.1):c.183+32C>A' -p76 -sg31 -g4 -sg32 -S'NM_001363850.1:c.183+32C>A' -p77 -sg34 -g4 -sg35 -(dp78 -S'hg19' -p79 -(dp80 -g39 -S'NC_000016.9:g.89575040C>A' -p81 -sg41 -(dp82 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p83 -sg49 -S'A' -p84 -sssS'grch37' -p85 -(dp86 -g39 -S'NC_000016.9:g.89575040C>A' -p87 -sg41 -(dp88 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p89 -sg49 -g84 -ssssg58 -(dp90 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p91 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p92 -sssS'NM_001363850.1:c.183+32_183+33insA' -p93 -(dp94 -g3 -g4 -sg5 -(lp95 -S'Multiple ALT sequences detected' -p96 -aS'auto-submitting all possible combinations' -p97 -aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' -p98 -aS'RefSeqGene record not available' -p99 -asg14 -g4 -sg15 -(lp100 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p101 -sg19 -S'SPG7' -p102 -sg21 -(dp103 -g23 -S'NP_001350779.1:p.?' -p104 -sg25 -S'NP_001350779.1:p.?' -p105 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_001363850.1):c.183+32_183+33insA' -p106 -sg31 -g4 -sg32 -S'NM_001363850.1:c.183+32_183+33insA' -p107 -sg34 -g4 -sg35 -(dp108 -S'hg19' -p109 -(dp110 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p111 -sg41 -(dp112 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p113 -sg49 -S'CA' -p114 -sssS'grch37' -p115 -(dp116 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p117 -sg41 -(dp118 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p119 -sg49 -S'CA' -p120 -ssssg58 -(dp121 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p122 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p123 -sssS'NM_199367.2:c.183+32C>A' -p124 -(dp125 -g3 -g4 -sg5 -(lp126 -S'Multiple ALT sequences detected' -p127 -aS'auto-submitting all possible combinations' -p128 -aS'RefSeqGene record not available' -p129 -asg14 -g4 -sg15 -(lp130 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p131 -sg19 -S'SPG7' -p132 -sg21 -(dp133 -g23 -S'NP_955399.1:p.?' -p134 -sg25 -S'NP_955399.1:p.?' -p135 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_199367.2):c.183+32C>A' -p136 -sg31 -g4 -sg32 -S'NM_199367.2:c.183+32C>A' -p137 -sg34 -g4 -sg35 -(dp138 -S'grch38' -p139 -(dp140 -g39 -S'NC_000016.10:g.89508632C>A' -p141 -sg41 -(dp142 -g43 -g55 -sg45 -g46 -sg47 -S'89508632' -p143 -sg49 -g84 -sssS'grch37' -p144 -(dp145 -g39 -S'NC_000016.9:g.89575040C>A' -p146 -sg41 -(dp147 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p148 -sg49 -g84 -sssS'hg38' -p149 -(dp150 -g39 -S'NC_000016.10:g.89508632C>A' -p151 -sg41 -(dp152 -g43 -g44 -sg45 -g46 -sg47 -S'89508632' -p153 -sg49 -g84 -sssS'hg19' -p154 -(dp155 -g39 -S'NC_000016.9:g.89575040C>A' -p156 -sg41 -(dp157 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p158 -sg49 -g84 -ssssg58 -(dp159 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p160 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p161 -sssS'NM_003119.3:c.183+32_183+33insA' -p162 -(dp163 -g3 -g4 -sg5 -(lp164 -S'Multiple ALT sequences detected' -p165 -aS'auto-submitting all possible combinations' -p166 -aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' -p167 -aS'RefSeqGene record not available' -p168 -asg14 -g4 -sg15 -(lp169 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p170 -sg19 -S'SPG7' -p171 -sg21 -(dp172 -g23 -S'NP_003110.1:p.?' -p173 -sg25 -S'NP_003110.1:p.?' -p174 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_003119.3):c.183+32_183+33insA' -p175 -sg31 -g4 -sg32 -S'NM_003119.3:c.183+32_183+33insA' -p176 -sg34 -g4 -sg35 -(dp177 -S'grch38' -p178 -(dp179 -g39 -S'NC_000016.10:g.89508632_89508633insA' -p180 -sg41 -(dp181 -g43 -g55 -sg45 -g46 -sg47 -S'89508632' -p182 -sg49 -S'CA' -p183 -sssS'grch37' -p184 -(dp185 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p186 -sg41 -(dp187 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p188 -sg49 -S'CA' -p189 -sssg149 -(dp190 -g39 -S'NC_000016.10:g.89508632_89508633insA' -p191 -sg41 -(dp192 -g43 -g44 -sg45 -g46 -sg47 -S'89508632' -p193 -sg49 -S'CA' -p194 -sssS'hg19' -p195 -(dp196 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p197 -sg41 -(dp198 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p199 -sg49 -S'CA' -p200 -ssssg58 -(dp201 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p202 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p203 -sssS'flag' -p204 -S'gene_variant' -p205 -sS'NM_003119.2:c.183+32_183+33insA' -p206 -(dp207 -g3 -g4 -sg5 -(lp208 -S'Multiple ALT sequences detected' -p209 -aS'auto-submitting all possible combinations' -p210 -aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' -p211 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p212 -aS'NM_003119.3:c.183+32_183+33insA MUST be fully validated prior to use in reports' -p213 -aS'select_variants=NM_003119.3:c.183+32_183+33insA' -p214 -aS'RefSeqGene record not available' -p215 -asg14 -g4 -sg15 -(lp216 -sg17 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p217 -sg19 -S'SPG7' -p218 -sg21 -(dp219 -g23 -S'NP_003110.1:p.?' -p220 -sg25 -S'NP_003110.1:p.?' -p221 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_003119.2):c.183+32_183+33insA' -p222 -sg31 -g4 -sg32 -S'NM_003119.2:c.183+32_183+33insA' -p223 -sg34 -g4 -sg35 -(dp224 -S'hg19' -p225 -(dp226 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p227 -sg41 -(dp228 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p229 -sg49 -S'CA' -p230 -sssS'grch37' -p231 -(dp232 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p233 -sg41 -(dp234 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p235 -sg49 -S'CA' -p236 -ssssg58 -(dp237 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p238 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p239 -sssS'NM_199367.1:c.183+32C>A' -p240 -(dp241 -g3 -g4 -sg5 -(lp242 -S'Multiple ALT sequences detected' -p243 -aS'auto-submitting all possible combinations' -p244 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p245 -aS'NM_199367.2:c.183+32C>A MUST be fully validated prior to use in reports' -p246 -aS'select_variants=NM_199367.2:c.183+32C>A' -p247 -aS'RefSeqGene record not available' -p248 -asg14 -g4 -sg15 -(lp249 -sg17 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p250 -sg19 -S'SPG7' -p251 -sg21 -(dp252 -g23 -S'NP_955399.1:p.?' -p253 -sg25 -S'NP_955399.1:p.?' -p254 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_199367.1):c.183+32C>A' -p255 -sg31 -g4 -sg32 -S'NM_199367.1:c.183+32C>A' -p256 -sg34 -g4 -sg35 -(dp257 -S'hg19' -p258 -(dp259 -g39 -S'NC_000016.9:g.89575040C>A' -p260 -sg41 -(dp261 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p262 -sg49 -g84 -sssS'grch37' -p263 -(dp264 -g39 -S'NC_000016.9:g.89575040C>A' -p265 -sg41 -(dp266 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p267 -sg49 -g84 -ssssg58 -(dp268 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p269 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' -p270 -sssS'NM_003119.3:c.183+32C>A' -p271 -(dp272 -g3 -g4 -sg5 -(lp273 -S'Multiple ALT sequences detected' -p274 -aS'auto-submitting all possible combinations' -p275 -aS'RefSeqGene record not available' -p276 -asg14 -g4 -sg15 -(lp277 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p278 -sg19 -S'SPG7' -p279 -sg21 -(dp280 -g23 -S'NP_003110.1:p.?' -p281 -sg25 -S'NP_003110.1:p.?' -p282 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_003119.3):c.183+32C>A' -p283 -sg31 -g4 -sg32 -S'NM_003119.3:c.183+32C>A' -p284 -sg34 -g4 -sg35 -(dp285 -S'grch38' -p286 -(dp287 -g39 -S'NC_000016.10:g.89508632C>A' -p288 -sg41 -(dp289 -g43 -g55 -sg45 -g46 -sg47 -S'89508632' -p290 -sg49 -g84 -sssS'grch37' -p291 -(dp292 -g39 -S'NC_000016.9:g.89575040C>A' -p293 -sg41 -(dp294 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p295 -sg49 -g84 -sssg149 -(dp296 -g39 -S'NC_000016.10:g.89508632C>A' -p297 -sg41 -(dp298 -g43 -g44 -sg45 -g46 -sg47 -S'89508632' -p299 -sg49 -g84 -sssS'hg19' -p300 -(dp301 -g39 -S'NC_000016.9:g.89575040C>A' -p302 -sg41 -(dp303 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p304 -sg49 -g84 -ssssg58 -(dp305 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p306 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p307 -sssS'NM_199367.2:c.183+32_183+33insA' -p308 -(dp309 -g3 -g4 -sg5 -(lp310 -S'Multiple ALT sequences detected' -p311 -aS'auto-submitting all possible combinations' -p312 -aS'NC_000016.9:g.89575040C>CA automapped to NC_000016.9:g.89575040_89575041insA' -p313 -aS'RefSeqGene record not available' -p314 -asg14 -g4 -sg15 -(lp315 -sg17 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p316 -sg19 -S'SPG7' -p317 -sg21 -(dp318 -g23 -S'NP_955399.1:p.?' -p319 -sg25 -S'NP_955399.1:p.?' -p320 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_199367.2):c.183+32_183+33insA' -p321 -sg31 -g4 -sg32 -S'NM_199367.2:c.183+32_183+33insA' -p322 -sg34 -g4 -sg35 -(dp323 -S'grch38' -p324 -(dp325 -g39 -S'NC_000016.10:g.89508632_89508633insA' -p326 -sg41 -(dp327 -g43 -g55 -sg45 -g46 -sg47 -S'89508632' -p328 -sg49 -S'CA' -p329 -sssS'grch37' -p330 -(dp331 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p332 -sg41 -(dp333 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p334 -sg49 -S'CA' -p335 -sssg149 -(dp336 -g39 -S'NC_000016.10:g.89508632_89508633insA' -p337 -sg41 -(dp338 -g43 -g44 -sg45 -g46 -sg47 -S'89508632' -p339 -sg49 -S'CA' -p340 -sssS'hg19' -p341 -(dp342 -g39 -S'NC_000016.9:g.89575040_89575041insA' -p343 -sg41 -(dp344 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p345 -sg49 -S'CA' -p346 -ssssg58 -(dp347 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p348 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p349 -sssS'NM_003119.2:c.183+32C>A' -p350 -(dp351 -g3 -g4 -sg5 -(lp352 -S'Multiple ALT sequences detected' -p353 -aS'auto-submitting all possible combinations' -p354 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p355 -aS'NM_003119.3:c.183+32C>A MUST be fully validated prior to use in reports' -p356 -aS'select_variants=NM_003119.3:c.183+32C>A' -p357 -aS'RefSeqGene record not available' -p358 -asg14 -g4 -sg15 -(lp359 -sg17 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p360 -sg19 -S'SPG7' -p361 -sg21 -(dp362 -g23 -S'NP_003110.1:p.?' -p363 -sg25 -S'NP_003110.1:p.?' -p364 -ssg27 -g28 -sg29 -S'NC_000016.9(NM_003119.2):c.183+32C>A' -p365 -sg31 -g4 -sg32 -S'NM_003119.2:c.183+32C>A' -p366 -sg34 -g4 -sg35 -(dp367 -S'hg19' -p368 -(dp369 -g39 -S'NC_000016.9:g.89575040C>A' -p370 -sg41 -(dp371 -g43 -g44 -sg45 -g46 -sg47 -S'89575040' -p372 -sg49 -g84 -sssS'grch37' -p373 -(dp374 -g39 -S'NC_000016.9:g.89575040C>A' -p375 -sg41 -(dp376 -g43 -g55 -sg45 -g46 -sg47 -S'89575040' -p377 -sg49 -g84 -ssssg58 -(dp378 -g60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p379 -sg62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p380 -sssS'metadata' -p381 -(dp382 -S'variantvalidator_hgvs_version' -p383 -S'1.1.3' -p384 -sS'uta_schema' -p385 -S'uta_20180821' -p386 -sS'seqrepo_db' -p387 -S'2018-08-21' -p388 -sS'variantvalidator_version' -p389 -S'v0.2' -p390 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant243.txt b/VariantValidator/testing/testOutputsMasterITS/variant243.txt deleted file mode 100644 index 8f98830f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant243.txt +++ /dev/null @@ -1,549 +0,0 @@ -(dp0 -S'NM_199367.2:c.184-2A>C' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'SPG7' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_955399.1:p.?' -p18 -sS'slr' -p19 -S'NP_955399.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'16-89576896-A-C' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000016.9(NM_199367.2):c.184-2A>C' -p24 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_199367.2:c.184-2A>C' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'grch38' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000016.10:g.89510488A>C' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'16' -p38 -sS'ref' -p39 -S'A' -p40 -sS'pos' -p41 -S'89510488' -p42 -sS'alt' -p43 -S'C' -p44 -sssS'grch37' -p45 -(dp46 -g33 -S'NC_000016.9:g.89576896A>C' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'89576896' -p49 -sg43 -g44 -sssS'hg38' -p50 -(dp51 -g33 -S'NC_000016.10:g.89510488A>C' -p52 -sg35 -(dp53 -g37 -S'chr16' -p54 -sg39 -g40 -sg41 -S'89510488' -p55 -sg43 -g44 -sssS'hg19' -p56 -(dp57 -g33 -S'NC_000016.9:g.89576896A>C' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'89576896' -p60 -sg43 -g44 -ssssS'reference_sequence_records' -p61 -(dp62 -S'protein' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p64 -sS'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p66 -sssS'NM_003119.2:c.184-2A>C' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p70 -aS'NM_003119.3:c.184-2A>C MUST be fully validated prior to use in reports' -p71 -aS'select_variants=NM_003119.3:c.184-2A>C' -p72 -aS'RefSeqGene record not available' -p73 -asg8 -g4 -sg9 -(lp74 -sg11 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p75 -sg13 -S'SPG7' -p76 -sg15 -(dp77 -g17 -S'NP_003110.1:p.?' -p78 -sg19 -S'NP_003110.1:p.?' -p79 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_003119.2):c.184-2A>C' -p80 -sg25 -g4 -sg26 -S'NM_003119.2:c.184-2A>C' -p81 -sg28 -g4 -sg29 -(dp82 -S'hg19' -p83 -(dp84 -g33 -S'NC_000016.9:g.89576896A>C' -p85 -sg35 -(dp86 -g37 -g54 -sg39 -g40 -sg41 -S'89576896' -p87 -sg43 -g44 -sssS'grch37' -p88 -(dp89 -g33 -S'NC_000016.9:g.89576896A>C' -p90 -sg35 -(dp91 -g37 -g38 -sg39 -g40 -sg41 -S'89576896' -p92 -sg43 -g44 -ssssg61 -(dp93 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p94 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p95 -sssS'NM_003119.3:c.184-2A>C' -p96 -(dp97 -g3 -g4 -sg5 -(lp98 -S'RefSeqGene record not available' -p99 -asg8 -g4 -sg9 -(lp100 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p101 -sg13 -S'SPG7' -p102 -sg15 -(dp103 -g17 -S'NP_003110.1:p.?' -p104 -sg19 -S'NP_003110.1:p.?' -p105 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_003119.3):c.184-2A>C' -p106 -sg25 -g4 -sg26 -S'NM_003119.3:c.184-2A>C' -p107 -sg28 -g4 -sg29 -(dp108 -S'grch38' -p109 -(dp110 -g33 -S'NC_000016.10:g.89510488A>C' -p111 -sg35 -(dp112 -g37 -g38 -sg39 -g40 -sg41 -S'89510488' -p113 -sg43 -g44 -sssS'grch37' -p114 -(dp115 -g33 -S'NC_000016.9:g.89576896A>C' -p116 -sg35 -(dp117 -g37 -g38 -sg39 -g40 -sg41 -S'89576896' -p118 -sg43 -g44 -sssg50 -(dp119 -g33 -S'NC_000016.10:g.89510488A>C' -p120 -sg35 -(dp121 -g37 -g54 -sg39 -g40 -sg41 -S'89510488' -p122 -sg43 -g44 -sssS'hg19' -p123 -(dp124 -g33 -S'NC_000016.9:g.89576896A>C' -p125 -sg35 -(dp126 -g37 -g54 -sg39 -g40 -sg41 -S'89576896' -p127 -sg43 -g44 -ssssg61 -(dp128 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p129 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p130 -sssS'NM_001363850.1:c.184-2A>C' -p131 -(dp132 -g3 -g4 -sg5 -(lp133 -S'RefSeqGene record not available' -p134 -asg8 -g4 -sg9 -(lp135 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p136 -sg13 -S'SPG7' -p137 -sg15 -(dp138 -g17 -S'NP_001350779.1:p.?' -p139 -sg19 -S'NP_001350779.1:p.?' -p140 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_001363850.1):c.184-2A>C' -p141 -sg25 -g4 -sg26 -S'NM_001363850.1:c.184-2A>C' -p142 -sg28 -g4 -sg29 -(dp143 -S'hg19' -p144 -(dp145 -g33 -S'NC_000016.9:g.89576896A>C' -p146 -sg35 -(dp147 -g37 -g54 -sg39 -g40 -sg41 -S'89576896' -p148 -sg43 -g44 -sssS'grch37' -p149 -(dp150 -g33 -S'NC_000016.9:g.89576896A>C' -p151 -sg35 -(dp152 -g37 -g38 -sg39 -g40 -sg41 -S'89576896' -p153 -sg43 -g44 -ssssg61 -(dp154 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p155 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p156 -sssS'flag' -p157 -S'gene_variant' -p158 -sS'NM_199367.1:c.184-2A>C' -p159 -(dp160 -g3 -g4 -sg5 -(lp161 -S'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p162 -aS'NM_199367.2:c.184-2A>C MUST be fully validated prior to use in reports' -p163 -aS'select_variants=NM_199367.2:c.184-2A>C' -p164 -aS'RefSeqGene record not available' -p165 -asg8 -g4 -sg9 -(lp166 -sg11 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p167 -sg13 -S'SPG7' -p168 -sg15 -(dp169 -g17 -S'NP_955399.1:p.?' -p170 -sg19 -S'NP_955399.1:p.?' -p171 -ssg21 -g22 -sg23 -S'NC_000016.9(NM_199367.1):c.184-2A>C' -p172 -sg25 -g4 -sg26 -S'NM_199367.1:c.184-2A>C' -p173 -sg28 -g4 -sg29 -(dp174 -S'hg19' -p175 -(dp176 -g33 -S'NC_000016.9:g.89576896A>C' -p177 -sg35 -(dp178 -g37 -g54 -sg39 -g40 -sg41 -S'89576896' -p179 -sg43 -g44 -sssS'grch37' -p180 -(dp181 -g33 -S'NC_000016.9:g.89576896A>C' -p182 -sg35 -(dp183 -g37 -g38 -sg39 -g40 -sg41 -S'89576896' -p184 -sg43 -g44 -ssssg61 -(dp185 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p186 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' -p187 -sssS'metadata' -p188 -(dp189 -S'variantvalidator_hgvs_version' -p190 -S'1.1.3' -p191 -sS'uta_schema' -p192 -S'uta_20180821' -p193 -sS'seqrepo_db' -p194 -S'2018-08-21' -p195 -sS'variantvalidator_version' -p196 -S'v0.2' -p197 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant244.txt b/VariantValidator/testing/testOutputsMasterITS/variant244.txt deleted file mode 100644 index 779881ba..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant244.txt +++ /dev/null @@ -1,1119 +0,0 @@ -(dp0 -S'NM_003119.3:c.216dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Multiple ALT sequences detected' -p7 -aS'auto-submitting all possible combinations' -p8 -aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'SPG7' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_003110.1:p.(Glu73Ter)' -p21 -sS'slr' -p22 -S'NP_003110.1:p.(E73*)' -p23 -ssS'submitted_variant' -p24 -S'16-89576930-T-TA,TT' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'hgvs_lrg_variant' -p27 -g4 -sS'hgvs_transcript_variant' -p28 -S'NM_003119.3:c.216dup' -p29 -sS'hgvs_refseqgene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'grch38' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000016.10:g.89510522dup' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'16' -p40 -sS'ref' -p41 -S'T' -p42 -sS'pos' -p43 -S'89510520' -p44 -sS'alt' -p45 -S'TT' -p46 -sssS'grch37' -p47 -(dp48 -g35 -S'NC_000016.9:g.89576930dup' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'89576928' -p51 -sg45 -S'TT' -p52 -sssS'hg38' -p53 -(dp54 -g35 -S'NC_000016.10:g.89510522dup' -p55 -sg37 -(dp56 -g39 -S'chr16' -p57 -sg41 -g42 -sg43 -S'89510520' -p58 -sg45 -S'TT' -p59 -sssS'hg19' -p60 -(dp61 -g35 -S'NC_000016.9:g.89576930dup' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -g42 -sg43 -S'89576928' -p64 -sg45 -S'TT' -p65 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p71 -sssS'NM_003119.2:c.216_217insA' -p72 -(dp73 -g3 -g4 -sg5 -(lp74 -S'Multiple ALT sequences detected' -p75 -aS'auto-submitting all possible combinations' -p76 -aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' -p77 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p78 -aS'NM_003119.3:c.216_217insA MUST be fully validated prior to use in reports' -p79 -aS'select_variants=NM_003119.3:c.216_217insA' -p80 -aS'RefSeqGene record not available' -p81 -asg11 -g4 -sg12 -(lp82 -sg14 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p83 -sg16 -S'SPG7' -p84 -sg18 -(dp85 -g20 -S'NP_003110.1:p.(Glu73ArgfsTer30)' -p86 -sg22 -S'NP_003110.1:p.(E73Rfs*30)' -p87 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_003119.2:c.216_217insA' -p88 -sg30 -g4 -sg31 -(dp89 -S'hg19' -p90 -(dp91 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p92 -sg37 -(dp93 -g39 -g57 -sg41 -g42 -sg43 -S'89576930' -p94 -sg45 -S'TA' -p95 -sssS'grch37' -p96 -(dp97 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p98 -sg37 -(dp99 -g39 -g40 -sg41 -g42 -sg43 -S'89576930' -p100 -sg45 -S'TA' -p101 -ssssg66 -(dp102 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p103 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p104 -sssS'NM_199367.2:c.216dup' -p105 -(dp106 -g3 -g4 -sg5 -(lp107 -S'Multiple ALT sequences detected' -p108 -aS'auto-submitting all possible combinations' -p109 -aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' -p110 -aS'RefSeqGene record not available' -p111 -asg11 -g4 -sg12 -(lp112 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p113 -sg16 -S'SPG7' -p114 -sg18 -(dp115 -g20 -S'NP_955399.1:p.(Glu73Ter)' -p116 -sg22 -S'NP_955399.1:p.(E73*)' -p117 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_199367.2:c.216dup' -p118 -sg30 -g4 -sg31 -(dp119 -S'grch38' -p120 -(dp121 -g35 -S'NC_000016.10:g.89510522dup' -p122 -sg37 -(dp123 -g39 -g40 -sg41 -g42 -sg43 -S'89510520' -p124 -sg45 -S'TT' -p125 -sssS'grch37' -p126 -(dp127 -g35 -S'NC_000016.9:g.89576930dup' -p128 -sg37 -(dp129 -g39 -g40 -sg41 -g42 -sg43 -S'89576928' -p130 -sg45 -S'TT' -p131 -sssg53 -(dp132 -g35 -S'NC_000016.10:g.89510522dup' -p133 -sg37 -(dp134 -g39 -g57 -sg41 -g42 -sg43 -S'89510520' -p135 -sg45 -S'TT' -p136 -sssS'hg19' -p137 -(dp138 -g35 -S'NC_000016.9:g.89576930dup' -p139 -sg37 -(dp140 -g39 -g57 -sg41 -g42 -sg43 -S'89576928' -p141 -sg45 -S'TT' -p142 -ssssg66 -(dp143 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p144 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p145 -sssS'NM_199367.2:c.216_217insA' -p146 -(dp147 -g3 -g4 -sg5 -(lp148 -S'Multiple ALT sequences detected' -p149 -aS'auto-submitting all possible combinations' -p150 -aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' -p151 -aS'RefSeqGene record not available' -p152 -asg11 -g4 -sg12 -(lp153 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p154 -sg16 -S'SPG7' -p155 -sg18 -(dp156 -g20 -S'NP_955399.1:p.(Glu73ArgfsTer30)' -p157 -sg22 -S'NP_955399.1:p.(E73Rfs*30)' -p158 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_199367.2:c.216_217insA' -p159 -sg30 -g4 -sg31 -(dp160 -S'grch38' -p161 -(dp162 -g35 -S'NC_000016.10:g.89510522_89510523insA' -p163 -sg37 -(dp164 -g39 -g40 -sg41 -g42 -sg43 -S'89510522' -p165 -sg45 -S'TA' -p166 -sssS'grch37' -p167 -(dp168 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p169 -sg37 -(dp170 -g39 -g40 -sg41 -g42 -sg43 -S'89576930' -p171 -sg45 -S'TA' -p172 -sssg53 -(dp173 -g35 -S'NC_000016.10:g.89510522_89510523insA' -p174 -sg37 -(dp175 -g39 -g57 -sg41 -g42 -sg43 -S'89510522' -p176 -sg45 -S'TA' -p177 -sssS'hg19' -p178 -(dp179 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p180 -sg37 -(dp181 -g39 -g57 -sg41 -g42 -sg43 -S'89576930' -p182 -sg45 -S'TA' -p183 -ssssg66 -(dp184 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p185 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p186 -sssS'NM_001363850.1:c.216dup' -p187 -(dp188 -g3 -g4 -sg5 -(lp189 -S'Multiple ALT sequences detected' -p190 -aS'auto-submitting all possible combinations' -p191 -aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' -p192 -aS'RefSeqGene record not available' -p193 -asg11 -g4 -sg12 -(lp194 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p195 -sg16 -S'SPG7' -p196 -sg18 -(dp197 -g20 -S'NP_001350779.1:p.(Glu73Ter)' -p198 -sg22 -S'NP_001350779.1:p.(E73*)' -p199 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001363850.1:c.216dup' -p200 -sg30 -g4 -sg31 -(dp201 -S'hg19' -p202 -(dp203 -g35 -S'NC_000016.9:g.89576930dup' -p204 -sg37 -(dp205 -g39 -g57 -sg41 -g42 -sg43 -S'89576928' -p206 -sg45 -S'TT' -p207 -sssS'grch37' -p208 -(dp209 -g35 -S'NC_000016.9:g.89576930dup' -p210 -sg37 -(dp211 -g39 -g40 -sg41 -g42 -sg43 -S'89576928' -p212 -sg45 -S'TT' -p213 -ssssg66 -(dp214 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p215 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p216 -sssS'flag' -p217 -S'gene_variant' -p218 -sS'NM_001363850.1:c.216_217insA' -p219 -(dp220 -g3 -g4 -sg5 -(lp221 -S'Multiple ALT sequences detected' -p222 -aS'auto-submitting all possible combinations' -p223 -aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' -p224 -aS'RefSeqGene record not available' -p225 -asg11 -g4 -sg12 -(lp226 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p227 -sg16 -S'SPG7' -p228 -sg18 -(dp229 -g20 -S'NP_001350779.1:p.(Glu73ArgfsTer30)' -p230 -sg22 -S'NP_001350779.1:p.(E73Rfs*30)' -p231 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001363850.1:c.216_217insA' -p232 -sg30 -g4 -sg31 -(dp233 -S'hg19' -p234 -(dp235 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p236 -sg37 -(dp237 -g39 -g57 -sg41 -g42 -sg43 -S'89576930' -p238 -sg45 -S'TA' -p239 -sssS'grch37' -p240 -(dp241 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p242 -sg37 -(dp243 -g39 -g40 -sg41 -g42 -sg43 -S'89576930' -p244 -sg45 -S'TA' -p245 -ssssg66 -(dp246 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p247 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p248 -sssS'NM_199367.1:c.216_217insA' -p249 -(dp250 -g3 -g4 -sg5 -(lp251 -S'Multiple ALT sequences detected' -p252 -aS'auto-submitting all possible combinations' -p253 -aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' -p254 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p255 -aS'NM_199367.2:c.216_217insA MUST be fully validated prior to use in reports' -p256 -aS'select_variants=NM_199367.2:c.216_217insA' -p257 -aS'RefSeqGene record not available' -p258 -asg11 -g4 -sg12 -(lp259 -sg14 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p260 -sg16 -S'SPG7' -p261 -sg18 -(dp262 -g20 -S'NP_955399.1:p.(Glu73ArgfsTer30)' -p263 -sg22 -S'NP_955399.1:p.(E73Rfs*30)' -p264 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_199367.1:c.216_217insA' -p265 -sg30 -g4 -sg31 -(dp266 -S'hg19' -p267 -(dp268 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p269 -sg37 -(dp270 -g39 -g57 -sg41 -g42 -sg43 -S'89576930' -p271 -sg45 -S'TA' -p272 -sssS'grch37' -p273 -(dp274 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p275 -sg37 -(dp276 -g39 -g40 -sg41 -g42 -sg43 -S'89576930' -p277 -sg45 -S'TA' -p278 -ssssg66 -(dp279 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p280 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' -p281 -sssS'metadata' -p282 -(dp283 -S'variantvalidator_hgvs_version' -p284 -S'1.1.3' -p285 -sS'uta_schema' -p286 -S'uta_20180821' -p287 -sS'seqrepo_db' -p288 -S'2018-08-21' -p289 -sS'variantvalidator_version' -p290 -S'v0.2' -p291 -ssS'NM_199367.1:c.216dup' -p292 -(dp293 -g3 -g4 -sg5 -(lp294 -S'Multiple ALT sequences detected' -p295 -aS'auto-submitting all possible combinations' -p296 -aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' -p297 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p298 -aS'NM_199367.2:c.216dupT MUST be fully validated prior to use in reports' -p299 -aS'select_variants=NM_199367.2:c.216dup' -p300 -aS'RefSeqGene record not available' -p301 -asg11 -g4 -sg12 -(lp302 -sg14 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p303 -sg16 -S'SPG7' -p304 -sg18 -(dp305 -g20 -S'NP_955399.1:p.(Glu73Ter)' -p306 -sg22 -S'NP_955399.1:p.(E73*)' -p307 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_199367.1:c.216dup' -p308 -sg30 -g4 -sg31 -(dp309 -S'hg19' -p310 -(dp311 -g35 -S'NC_000016.9:g.89576930dup' -p312 -sg37 -(dp313 -g39 -g57 -sg41 -g42 -sg43 -S'89576928' -p314 -sg45 -S'TT' -p315 -sssS'grch37' -p316 -(dp317 -g35 -S'NC_000016.9:g.89576930dup' -p318 -sg37 -(dp319 -g39 -g40 -sg41 -g42 -sg43 -S'89576928' -p320 -sg45 -S'TT' -p321 -ssssg66 -(dp322 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p323 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' -p324 -sssS'NM_003119.3:c.216_217insA' -p325 -(dp326 -g3 -g4 -sg5 -(lp327 -S'Multiple ALT sequences detected' -p328 -aS'auto-submitting all possible combinations' -p329 -aS'NC_000016.9:g.89576930T>TA automapped to NC_000016.9:g.89576930_89576931insA' -p330 -aS'RefSeqGene record not available' -p331 -asg11 -g4 -sg12 -(lp332 -sg14 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p333 -sg16 -S'SPG7' -p334 -sg18 -(dp335 -g20 -S'NP_003110.1:p.(Glu73ArgfsTer30)' -p336 -sg22 -S'NP_003110.1:p.(E73Rfs*30)' -p337 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_003119.3:c.216_217insA' -p338 -sg30 -g4 -sg31 -(dp339 -S'grch38' -p340 -(dp341 -g35 -S'NC_000016.10:g.89510522_89510523insA' -p342 -sg37 -(dp343 -g39 -g40 -sg41 -g42 -sg43 -S'89510522' -p344 -sg45 -S'TA' -p345 -sssS'grch37' -p346 -(dp347 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p348 -sg37 -(dp349 -g39 -g40 -sg41 -g42 -sg43 -S'89576930' -p350 -sg45 -S'TA' -p351 -sssg53 -(dp352 -g35 -S'NC_000016.10:g.89510522_89510523insA' -p353 -sg37 -(dp354 -g39 -g57 -sg41 -g42 -sg43 -S'89510522' -p355 -sg45 -S'TA' -p356 -sssS'hg19' -p357 -(dp358 -g35 -S'NC_000016.9:g.89576930_89576931insA' -p359 -sg37 -(dp360 -g39 -g57 -sg41 -g42 -sg43 -S'89576930' -p361 -sg45 -S'TA' -p362 -ssssg66 -(dp363 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p364 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p365 -sssS'NM_003119.2:c.216dup' -p366 -(dp367 -g3 -g4 -sg5 -(lp368 -S'Multiple ALT sequences detected' -p369 -aS'auto-submitting all possible combinations' -p370 -aS'NC_000016.9:g.89576930T>TT automapped to NC_000016.9:g.89576930dupT' -p371 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p372 -aS'NM_003119.3:c.216dupT MUST be fully validated prior to use in reports' -p373 -aS'select_variants=NM_003119.3:c.216dup' -p374 -aS'RefSeqGene record not available' -p375 -asg11 -g4 -sg12 -(lp376 -sg14 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p377 -sg16 -S'SPG7' -p378 -sg18 -(dp379 -g20 -S'NP_003110.1:p.(Glu73Ter)' -p380 -sg22 -S'NP_003110.1:p.(E73*)' -p381 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_003119.2:c.216dup' -p382 -sg30 -g4 -sg31 -(dp383 -S'hg19' -p384 -(dp385 -g35 -S'NC_000016.9:g.89576930dup' -p386 -sg37 -(dp387 -g39 -g57 -sg41 -g42 -sg43 -S'89576928' -p388 -sg45 -S'TT' -p389 -sssS'grch37' -p390 -(dp391 -g35 -S'NC_000016.9:g.89576930dup' -p392 -sg37 -(dp393 -g39 -g40 -sg41 -g42 -sg43 -S'89576928' -p394 -sg45 -S'TT' -p395 -ssssg66 -(dp396 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p397 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p398 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant245.txt b/VariantValidator/testing/testOutputsMasterITS/variant245.txt deleted file mode 100644 index ab31d77a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant245.txt +++ /dev/null @@ -1,580 +0,0 @@ -(dp0 -S'NM_199367.1:c.216_217dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' -p7 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p8 -aS'NM_199367.2:c.216_217dupTG MUST be fully validated prior to use in reports' -p9 -aS'select_variants=NM_199367.2:c.216_217dup' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'SPG7' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_955399.1:p.(Glu73ValfsTer9)' -p22 -sS'slr' -p23 -S'NP_955399.1:p.(E73Vfs*9)' -p24 -ssS'submitted_variant' -p25 -S'16-89576931-G-GTG' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_199367.1:c.216_217dup' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000016.9:g.89576930_89576931dup' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr16' -p41 -sS'ref' -p42 -S'TG' -p43 -sS'pos' -p44 -S'89576930' -p45 -sS'alt' -p46 -S'TGTG' -p47 -sssS'grch37' -p48 -(dp49 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p50 -sg38 -(dp51 -g40 -S'16' -p52 -sg42 -S'TG' -p53 -sg44 -S'89576930' -p54 -sg46 -S'TGTG' -p55 -ssssS'reference_sequence_records' -p56 -(dp57 -S'protein' -p58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p59 -sS'transcript' -p60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' -p61 -sssS'NM_003119.3:c.216_217dup' -p62 -(dp63 -g3 -g4 -sg5 -(lp64 -S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' -p65 -aS'RefSeqGene record not available' -p66 -asg12 -g4 -sg13 -(lp67 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p68 -sg17 -S'SPG7' -p69 -sg19 -(dp70 -g21 -S'NP_003110.1:p.(Glu73ValfsTer9)' -p71 -sg23 -S'NP_003110.1:p.(E73Vfs*9)' -p72 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_003119.3:c.216_217dup' -p73 -sg31 -g4 -sg32 -(dp74 -S'grch38' -p75 -(dp76 -g36 -S'NC_000016.10:g.89510522_89510523dup' -p77 -sg38 -(dp78 -g40 -g52 -sg42 -S'TG' -p79 -sg44 -S'89510522' -p80 -sg46 -S'TGTG' -p81 -sssS'grch37' -p82 -(dp83 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p84 -sg38 -(dp85 -g40 -g52 -sg42 -S'TG' -p86 -sg44 -S'89576930' -p87 -sg46 -S'TGTG' -p88 -sssS'hg38' -p89 -(dp90 -g36 -S'NC_000016.10:g.89510522_89510523dup' -p91 -sg38 -(dp92 -g40 -g41 -sg42 -S'TG' -p93 -sg44 -S'89510522' -p94 -sg46 -S'TGTG' -p95 -sssS'hg19' -p96 -(dp97 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p98 -sg38 -(dp99 -g40 -g41 -sg42 -S'TG' -p100 -sg44 -S'89576930' -p101 -sg46 -S'TGTG' -p102 -ssssg56 -(dp103 -g58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p104 -sg60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p105 -sssS'NM_199367.2:c.216_217dup' -p106 -(dp107 -g3 -g4 -sg5 -(lp108 -S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' -p109 -aS'RefSeqGene record not available' -p110 -asg12 -g4 -sg13 -(lp111 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p112 -sg17 -S'SPG7' -p113 -sg19 -(dp114 -g21 -S'NP_955399.1:p.(Glu73ValfsTer9)' -p115 -sg23 -S'NP_955399.1:p.(E73Vfs*9)' -p116 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_199367.2:c.216_217dup' -p117 -sg31 -g4 -sg32 -(dp118 -S'grch38' -p119 -(dp120 -g36 -S'NC_000016.10:g.89510522_89510523dup' -p121 -sg38 -(dp122 -g40 -g52 -sg42 -S'TG' -p123 -sg44 -S'89510522' -p124 -sg46 -S'TGTG' -p125 -sssS'grch37' -p126 -(dp127 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p128 -sg38 -(dp129 -g40 -g52 -sg42 -S'TG' -p130 -sg44 -S'89576930' -p131 -sg46 -S'TGTG' -p132 -sssg89 -(dp133 -g36 -S'NC_000016.10:g.89510522_89510523dup' -p134 -sg38 -(dp135 -g40 -g41 -sg42 -S'TG' -p136 -sg44 -S'89510522' -p137 -sg46 -S'TGTG' -p138 -sssS'hg19' -p139 -(dp140 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p141 -sg38 -(dp142 -g40 -g41 -sg42 -S'TG' -p143 -sg44 -S'89576930' -p144 -sg46 -S'TGTG' -p145 -ssssg56 -(dp146 -g58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p147 -sg60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p148 -sssS'NM_003119.2:c.216_217dup' -p149 -(dp150 -g3 -g4 -sg5 -(lp151 -S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' -p152 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p153 -aS'NM_003119.3:c.216_217dupTG MUST be fully validated prior to use in reports' -p154 -aS'select_variants=NM_003119.3:c.216_217dup' -p155 -aS'RefSeqGene record not available' -p156 -asg12 -g4 -sg13 -(lp157 -sg15 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p158 -sg17 -S'SPG7' -p159 -sg19 -(dp160 -g21 -S'NP_003110.1:p.(Glu73ValfsTer9)' -p161 -sg23 -S'NP_003110.1:p.(E73Vfs*9)' -p162 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_003119.2:c.216_217dup' -p163 -sg31 -g4 -sg32 -(dp164 -S'hg19' -p165 -(dp166 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p167 -sg38 -(dp168 -g40 -g41 -sg42 -S'TG' -p169 -sg44 -S'89576930' -p170 -sg46 -S'TGTG' -p171 -sssS'grch37' -p172 -(dp173 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p174 -sg38 -(dp175 -g40 -g52 -sg42 -S'TG' -p176 -sg44 -S'89576930' -p177 -sg46 -S'TGTG' -p178 -ssssg56 -(dp179 -g58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p180 -sg60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p181 -sssS'flag' -p182 -S'gene_variant' -p183 -sS'NM_001363850.1:c.216_217dup' -p184 -(dp185 -g3 -g4 -sg5 -(lp186 -S'NC_000016.9:g.89576931G>GTG automapped to NC_000016.9:g.89576930_89576931dupTG' -p187 -aS'RefSeqGene record not available' -p188 -asg12 -g4 -sg13 -(lp189 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p190 -sg17 -S'SPG7' -p191 -sg19 -(dp192 -g21 -S'NP_001350779.1:p.(Glu73ValfsTer9)' -p193 -sg23 -S'NP_001350779.1:p.(E73Vfs*9)' -p194 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001363850.1:c.216_217dup' -p195 -sg31 -g4 -sg32 -(dp196 -S'hg19' -p197 -(dp198 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p199 -sg38 -(dp200 -g40 -g41 -sg42 -S'TG' -p201 -sg44 -S'89576930' -p202 -sg46 -S'TGTG' -p203 -sssS'grch37' -p204 -(dp205 -g36 -S'NC_000016.9:g.89576930_89576931dup' -p206 -sg38 -(dp207 -g40 -g52 -sg42 -S'TG' -p208 -sg44 -S'89576930' -p209 -sg46 -S'TGTG' -p210 -ssssg56 -(dp211 -g58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p212 -sg60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p213 -sssS'metadata' -p214 -(dp215 -S'variantvalidator_hgvs_version' -p216 -S'1.1.3' -p217 -sS'uta_schema' -p218 -S'uta_20180821' -p219 -sS'seqrepo_db' -p220 -S'2018-08-21' -p221 -sS'variantvalidator_version' -p222 -S'v0.2' -p223 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant246.txt b/VariantValidator/testing/testOutputsMasterITS/variant246.txt deleted file mode 100644 index 6a45a8ca..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant246.txt +++ /dev/null @@ -1,567 +0,0 @@ -(dp0 -S'NM_199367.1:c.1046_1071del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' -p7 -aS'A more recent version of the selected reference sequence NM_199367.1 is available (NM_199367.2)' -p8 -aS'NM_199367.2:c.1046_1071del MUST be fully validated prior to use in reports' -p9 -aS'select_variants=NM_199367.2:c.1046_1071del' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA -p16 -sS'gene_symbol' -p17 -S'SPG7' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_955399.1:p.(Gly349AlafsTer38)' -p22 -sS'slr' -p23 -S'NP_955399.1:p.(G349Afs*38)' -p24 -ssS'submitted_variant' -p25 -S'16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_199367.1:c.1046_1071del' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000016.9:g.89598370_89598395del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr16' -p41 -sS'ref' -p42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p43 -sS'pos' -p44 -S'89598368' -p45 -sS'alt' -p46 -S'C' -p47 -sssS'grch37' -p48 -(dp49 -g36 -S'NC_000016.9:g.89598370_89598395del' -p50 -sg38 -(dp51 -g40 -S'16' -p52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p53 -sg44 -S'89598368' -p54 -sg46 -g47 -ssssS'reference_sequence_records' -p55 -(dp56 -S'protein' -p57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p58 -sS'transcript' -p59 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1' -p60 -sssS'NM_001363850.1:c.1046_1071del' -p61 -(dp62 -g3 -g4 -sg5 -(lp63 -S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' -p64 -aS'RefSeqGene record not available' -p65 -asg12 -g4 -sg13 -(lp66 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p67 -sg17 -S'SPG7' -p68 -sg19 -(dp69 -g21 -S'NP_001350779.1:p.(Gly349AlafsTer38)' -p70 -sg23 -S'NP_001350779.1:p.(G349Afs*38)' -p71 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001363850.1:c.1046_1071del' -p72 -sg31 -g4 -sg32 -(dp73 -S'hg19' -p74 -(dp75 -g36 -S'NC_000016.9:g.89598370_89598395del' -p76 -sg38 -(dp77 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p78 -sg44 -S'89598368' -p79 -sg46 -g47 -sssS'grch37' -p80 -(dp81 -g36 -S'NC_000016.9:g.89598370_89598395del' -p82 -sg38 -(dp83 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p84 -sg44 -S'89598368' -p85 -sg46 -g47 -ssssg55 -(dp86 -g57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p87 -sg59 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p88 -sssS'NM_199367.2:c.1046_1071del' -p89 -(dp90 -g3 -g4 -sg5 -(lp91 -S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' -p92 -aS'RefSeqGene record not available' -p93 -asg12 -g4 -sg13 -(lp94 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA -p95 -sg17 -S'SPG7' -p96 -sg19 -(dp97 -g21 -S'NP_955399.1:p.(Gly349AlafsTer38)' -p98 -sg23 -S'NP_955399.1:p.(G349Afs*38)' -p99 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_199367.2:c.1046_1071del' -p100 -sg31 -g4 -sg32 -(dp101 -S'grch38' -p102 -(dp103 -g36 -S'NC_000016.10:g.89531962_89531987del' -p104 -sg38 -(dp105 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p106 -sg44 -S'89531960' -p107 -sg46 -g47 -sssS'grch37' -p108 -(dp109 -g36 -S'NC_000016.9:g.89598370_89598395del' -p110 -sg38 -(dp111 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p112 -sg44 -S'89598368' -p113 -sg46 -g47 -sssS'hg38' -p114 -(dp115 -g36 -S'NC_000016.10:g.89531962_89531987del' -p116 -sg38 -(dp117 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p118 -sg44 -S'89531960' -p119 -sg46 -g47 -sssS'hg19' -p120 -(dp121 -g36 -S'NC_000016.9:g.89598370_89598395del' -p122 -sg38 -(dp123 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p124 -sg44 -S'89598368' -p125 -sg46 -g47 -ssssg55 -(dp126 -g57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1' -p127 -sg59 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2' -p128 -sssS'flag' -p129 -S'gene_variant' -p130 -sS'NM_003119.2:c.1046_1071del' -p131 -(dp132 -g3 -g4 -sg5 -(lp133 -S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' -p134 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p135 -aS'NM_003119.3:c.1046_1071del MUST be fully validated prior to use in reports' -p136 -aS'select_variants=NM_003119.3:c.1046_1071del' -p137 -aS'RefSeqGene record not available' -p138 -asg12 -g4 -sg13 -(lp139 -sg15 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p140 -sg17 -S'SPG7' -p141 -sg19 -(dp142 -g21 -S'NP_003110.1:p.(Gly349AlafsTer38)' -p143 -sg23 -S'NP_003110.1:p.(G349Afs*38)' -p144 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_003119.2:c.1046_1071del' -p145 -sg31 -g4 -sg32 -(dp146 -S'hg19' -p147 -(dp148 -g36 -S'NC_000016.9:g.89598370_89598395del' -p149 -sg38 -(dp150 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p151 -sg44 -S'89598368' -p152 -sg46 -g47 -sssS'grch37' -p153 -(dp154 -g36 -S'NC_000016.9:g.89598370_89598395del' -p155 -sg38 -(dp156 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p157 -sg44 -S'89598368' -p158 -sg46 -g47 -ssssg55 -(dp159 -g57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p160 -sg59 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p161 -sssS'NM_003119.3:c.1046_1071del' -p162 -(dp163 -g3 -g4 -sg5 -(lp164 -S'NC_000016.9:g.89598368CGGCCCCCCCGGCTGTGGGAAGACGCT>C automapped to NC_000016.9:g.89598370_89598395del' -p165 -aS'RefSeqGene record not available' -p166 -asg12 -g4 -sg13 -(lp167 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p168 -sg17 -S'SPG7' -p169 -sg19 -(dp170 -g21 -S'NP_003110.1:p.(Gly349AlafsTer38)' -p171 -sg23 -S'NP_003110.1:p.(G349Afs*38)' -p172 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_003119.3:c.1046_1071del' -p173 -sg31 -g4 -sg32 -(dp174 -S'grch38' -p175 -(dp176 -g36 -S'NC_000016.10:g.89531962_89531987del' -p177 -sg38 -(dp178 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p179 -sg44 -S'89531960' -p180 -sg46 -g47 -sssS'grch37' -p181 -(dp182 -g36 -S'NC_000016.9:g.89598370_89598395del' -p183 -sg38 -(dp184 -g40 -g52 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p185 -sg44 -S'89598368' -p186 -sg46 -g47 -sssg114 -(dp187 -g36 -S'NC_000016.10:g.89531962_89531987del' -p188 -sg38 -(dp189 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p190 -sg44 -S'89531960' -p191 -sg46 -g47 -sssS'hg19' -p192 -(dp193 -g36 -S'NC_000016.9:g.89598370_89598395del' -p194 -sg38 -(dp195 -g40 -g41 -sg42 -S'CGGCCCCCCCGGCTGTGGGAAGACGCT' -p196 -sg44 -S'89598368' -p197 -sg46 -g47 -ssssg55 -(dp198 -g57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p199 -sg59 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p200 -sssS'metadata' -p201 -(dp202 -S'variantvalidator_hgvs_version' -p203 -S'1.1.3' -p204 -sS'uta_schema' -p205 -S'uta_20180821' -p206 -sS'seqrepo_db' -p207 -S'2018-08-21' -p208 -sS'variantvalidator_version' -p209 -S'v0.2' -p210 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant247.txt b/VariantValidator/testing/testOutputsMasterITS/variant247.txt deleted file mode 100644 index 85637161..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant247.txt +++ /dev/null @@ -1,357 +0,0 @@ -(dp0 -S'NM_001363850.1:c.1450-1_1457delinsT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89613064AGGAGAGGCG>AT automapped to NC_000016.9:g.89613065_89613073delinsT' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p13 -sS'gene_symbol' -p14 -S'SPG7' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001350779.1:p.?' -p19 -sS'slr' -p20 -S'NP_001350779.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'16-89613064-AGGAGAGGCG-AT' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000016.9(NM_001363850.1):c.1450-1_1457delinsT' -p25 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_001363850.1:c.1450-1_1457delinsT' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000016.9:g.89613065_89613073delinsT' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr16' -p39 -sS'ref' -p40 -S'GGAGAGGCG' -p41 -sS'pos' -p42 -S'89613065' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000016.9:g.89613065_89613073delinsT' -p48 -sg36 -(dp49 -g38 -S'16' -p50 -sg40 -S'GGAGAGGCG' -p51 -sg42 -S'89613065' -p52 -sg44 -g45 -ssssS'reference_sequence_records' -p53 -(dp54 -S'protein' -p55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p56 -sS'transcript' -p57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p58 -sssS'flag' -p59 -S'gene_variant' -p60 -sS'metadata' -p61 -(dp62 -S'variantvalidator_hgvs_version' -p63 -S'1.1.3' -p64 -sS'uta_schema' -p65 -S'uta_20180821' -p66 -sS'seqrepo_db' -p67 -S'2018-08-21' -p68 -sS'variantvalidator_version' -p69 -S'v0.2' -p70 -ssS'NM_003119.2:c.1450-1_1457delinsT' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000016.9:g.89613064AGGAGAGGCG>AT automapped to NC_000016.9:g.89613065_89613073delinsT' -p74 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p75 -aS'NM_003119.3:c.1450-1_1457delinsT MUST be fully validated prior to use in reports' -p76 -aS'select_variants=NM_003119.3:c.1450-1_1457delinsT' -p77 -aS'RefSeqGene record not available' -p78 -asg9 -g4 -sg10 -(lp79 -sg12 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p80 -sg14 -S'SPG7' -p81 -sg16 -(dp82 -g18 -S'NP_003110.1:p.?' -p83 -sg20 -S'NP_003110.1:p.?' -p84 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_003119.2):c.1450-1_1457delinsT' -p85 -sg26 -g4 -sg27 -S'NM_003119.2:c.1450-1_1457delinsT' -p86 -sg29 -g4 -sg30 -(dp87 -S'hg19' -p88 -(dp89 -g34 -S'NC_000016.9:g.89613065_89613073delinsT' -p90 -sg36 -(dp91 -g38 -g39 -sg40 -S'GGAGAGGCG' -p92 -sg42 -S'89613065' -p93 -sg44 -g45 -sssS'grch37' -p94 -(dp95 -g34 -S'NC_000016.9:g.89613065_89613073delinsT' -p96 -sg36 -(dp97 -g38 -g50 -sg40 -S'GGAGAGGCG' -p98 -sg42 -S'89613065' -p99 -sg44 -g45 -ssssg53 -(dp100 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p101 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p102 -sssS'NM_003119.3:c.1450-1_1457delinsT' -p103 -(dp104 -g3 -g4 -sg5 -(lp105 -S'NC_000016.9:g.89613064AGGAGAGGCG>AT automapped to NC_000016.9:g.89613065_89613073delinsT' -p106 -aS'RefSeqGene record not available' -p107 -asg9 -g4 -sg10 -(lp108 -sg12 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p109 -sg14 -S'SPG7' -p110 -sg16 -(dp111 -g18 -S'NP_003110.1:p.?' -p112 -sg20 -S'NP_003110.1:p.?' -p113 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_003119.3):c.1450-1_1457delinsT' -p114 -sg26 -g4 -sg27 -S'NM_003119.3:c.1450-1_1457delinsT' -p115 -sg29 -g4 -sg30 -(dp116 -S'grch38' -p117 -(dp118 -g34 -S'NC_000016.10:g.89546657_89546665delinsT' -p119 -sg36 -(dp120 -g38 -g50 -sg40 -S'GGAGAGGCG' -p121 -sg42 -S'89546657' -p122 -sg44 -g45 -sssS'grch37' -p123 -(dp124 -g34 -S'NC_000016.9:g.89613065_89613073delinsT' -p125 -sg36 -(dp126 -g38 -g50 -sg40 -S'GGAGAGGCG' -p127 -sg42 -S'89613065' -p128 -sg44 -g45 -sssS'hg38' -p129 -(dp130 -g34 -S'NC_000016.10:g.89546657_89546665delinsT' -p131 -sg36 -(dp132 -g38 -g39 -sg40 -S'GGAGAGGCG' -p133 -sg42 -S'89546657' -p134 -sg44 -g45 -sssS'hg19' -p135 -(dp136 -g34 -S'NC_000016.9:g.89613065_89613073delinsT' -p137 -sg36 -(dp138 -g38 -g39 -sg40 -S'GGAGAGGCG' -p139 -sg42 -S'89613065' -p140 -sg44 -g45 -ssssg53 -(dp141 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p142 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p143 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant248.txt b/VariantValidator/testing/testOutputsMasterITS/variant248.txt deleted file mode 100644 index 43bdb4bb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant248.txt +++ /dev/null @@ -1,354 +0,0 @@ -(dp0 -S'NM_003119.2:c.1454_1462delinsT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.89613069AGGCGGGAGA>AT automapped to NC_000016.9:g.89613070_89613078delinsT' -p7 -aS'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p8 -aS'NM_003119.3:c.1454_1462delinsT MUST be fully validated prior to use in reports' -p9 -aS'select_variants=NM_003119.3:c.1454_1462delinsT' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p16 -sS'gene_symbol' -p17 -S'SPG7' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_003110.1:p.(Arg485IlefsTer3)' -p22 -sS'slr' -p23 -S'NP_003110.1:p.(R485Ifs*3)' -p24 -ssS'submitted_variant' -p25 -S'16-89613069-AGGCGGGAGA-AT' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_003119.2:c.1454_1462delinsT' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000016.9:g.89613070_89613078delinsT' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr16' -p41 -sS'ref' -p42 -S'GGCGGGAGA' -p43 -sS'pos' -p44 -S'89613070' -p45 -sS'alt' -p46 -S'T' -p47 -sssS'grch37' -p48 -(dp49 -g36 -S'NC_000016.9:g.89613070_89613078delinsT' -p50 -sg38 -(dp51 -g40 -S'16' -p52 -sg42 -S'GGCGGGAGA' -p53 -sg44 -S'89613070' -p54 -sg46 -g47 -ssssS'reference_sequence_records' -p55 -(dp56 -S'protein' -p57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p58 -sS'transcript' -p59 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p60 -sssS'flag' -p61 -S'gene_variant' -p62 -sS'metadata' -p63 -(dp64 -S'variantvalidator_hgvs_version' -p65 -S'1.1.3' -p66 -sS'uta_schema' -p67 -S'uta_20180821' -p68 -sS'seqrepo_db' -p69 -S'2018-08-21' -p70 -sS'variantvalidator_version' -p71 -S'v0.2' -p72 -ssS'NM_001363850.1:c.1454_1462delinsT' -p73 -(dp74 -g3 -g4 -sg5 -(lp75 -S'NC_000016.9:g.89613069AGGCGGGAGA>AT automapped to NC_000016.9:g.89613070_89613078delinsT' -p76 -aS'RefSeqGene record not available' -p77 -asg12 -g4 -sg13 -(lp78 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p79 -sg17 -S'SPG7' -p80 -sg19 -(dp81 -g21 -S'NP_001350779.1:p.(Arg485IlefsTer3)' -p82 -sg23 -S'NP_001350779.1:p.(R485Ifs*3)' -p83 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001363850.1:c.1454_1462delinsT' -p84 -sg31 -g4 -sg32 -(dp85 -S'hg19' -p86 -(dp87 -g36 -S'NC_000016.9:g.89613070_89613078delinsT' -p88 -sg38 -(dp89 -g40 -g41 -sg42 -S'GGCGGGAGA' -p90 -sg44 -S'89613070' -p91 -sg46 -g47 -sssS'grch37' -p92 -(dp93 -g36 -S'NC_000016.9:g.89613070_89613078delinsT' -p94 -sg38 -(dp95 -g40 -g52 -sg42 -S'GGCGGGAGA' -p96 -sg44 -S'89613070' -p97 -sg46 -g47 -ssssg55 -(dp98 -g57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p99 -sg59 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p100 -sssS'NM_003119.3:c.1454_1462delinsT' -p101 -(dp102 -g3 -g4 -sg5 -(lp103 -S'NC_000016.9:g.89613069AGGCGGGAGA>AT automapped to NC_000016.9:g.89613070_89613078delinsT' -p104 -aS'RefSeqGene record not available' -p105 -asg12 -g4 -sg13 -(lp106 -sg15 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p107 -sg17 -S'SPG7' -p108 -sg19 -(dp109 -g21 -S'NP_003110.1:p.(Arg485IlefsTer3)' -p110 -sg23 -S'NP_003110.1:p.(R485Ifs*3)' -p111 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_003119.3:c.1454_1462delinsT' -p112 -sg31 -g4 -sg32 -(dp113 -S'grch38' -p114 -(dp115 -g36 -S'NC_000016.10:g.89546662_89546670delinsT' -p116 -sg38 -(dp117 -g40 -g52 -sg42 -S'GGCGGGAGA' -p118 -sg44 -S'89546662' -p119 -sg46 -g47 -sssS'grch37' -p120 -(dp121 -g36 -S'NC_000016.9:g.89613070_89613078delinsT' -p122 -sg38 -(dp123 -g40 -g52 -sg42 -S'GGCGGGAGA' -p124 -sg44 -S'89613070' -p125 -sg46 -g47 -sssS'hg38' -p126 -(dp127 -g36 -S'NC_000016.10:g.89546662_89546670delinsT' -p128 -sg38 -(dp129 -g40 -g41 -sg42 -S'GGCGGGAGA' -p130 -sg44 -S'89546662' -p131 -sg46 -g47 -sssS'hg19' -p132 -(dp133 -g36 -S'NC_000016.9:g.89613070_89613078delinsT' -p134 -sg38 -(dp135 -g40 -g41 -sg42 -S'GGCGGGAGA' -p136 -sg44 -S'89613070' -p137 -sg46 -g47 -ssssg55 -(dp138 -g57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p139 -sg59 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p140 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant249.txt b/VariantValidator/testing/testOutputsMasterITS/variant249.txt deleted file mode 100644 index 49d55a90..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant249.txt +++ /dev/null @@ -1,341 +0,0 @@ -(dp0 -S'NM_001363850.1:c.1529C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA -p12 -sS'gene_symbol' -p13 -S'SPG7' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001350779.1:p.(Ala510Val)' -p18 -sS'slr' -p19 -S'NP_001350779.1:p.(A510V)' -p20 -ssS'submitted_variant' -p21 -S'16-89613145-C-T' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001363850.1:c.1529C>T' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000016.9:g.89613145C>T' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr16' -p37 -sS'ref' -p38 -S'C' -p39 -sS'pos' -p40 -S'89613145' -p41 -sS'alt' -p42 -S'T' -p43 -sssS'grch37' -p44 -(dp45 -g32 -S'NC_000016.9:g.89613145C>T' -p46 -sg34 -(dp47 -g36 -S'16' -p48 -sg38 -g39 -sg40 -S'89613145' -p49 -sg42 -g43 -ssssS'reference_sequence_records' -p50 -(dp51 -S'protein' -p52 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1' -p53 -sS'transcript' -p54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1' -p55 -sssS'NM_003119.3:c.1529C>T' -p56 -(dp57 -g3 -g4 -sg5 -(lp58 -S'RefSeqGene record not available' -p59 -asg8 -g4 -sg9 -(lp60 -sg11 -VHomo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA -p61 -sg13 -S'SPG7' -p62 -sg15 -(dp63 -g17 -S'NP_003110.1:p.(Ala510Val)' -p64 -sg19 -S'NP_003110.1:p.(A510V)' -p65 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_003119.3:c.1529C>T' -p66 -sg27 -g4 -sg28 -(dp67 -S'grch38' -p68 -(dp69 -g32 -S'NC_000016.10:g.89546737C>T' -p70 -sg34 -(dp71 -g36 -g48 -sg38 -g39 -sg40 -S'89546737' -p72 -sg42 -g43 -sssS'grch37' -p73 -(dp74 -g32 -S'NC_000016.9:g.89613145C>T' -p75 -sg34 -(dp76 -g36 -g48 -sg38 -g39 -sg40 -S'89613145' -p77 -sg42 -g43 -sssS'hg38' -p78 -(dp79 -g32 -S'NC_000016.10:g.89546737C>T' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -g39 -sg40 -S'89546737' -p82 -sg42 -g43 -sssS'hg19' -p83 -(dp84 -g32 -S'NC_000016.9:g.89613145C>T' -p85 -sg34 -(dp86 -g36 -g37 -sg38 -g39 -sg40 -S'89613145' -p87 -sg42 -g43 -ssssg50 -(dp88 -g52 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p89 -sg54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3' -p90 -sssS'flag' -p91 -S'gene_variant' -p92 -sS'NM_003119.2:c.1529C>T' -p93 -(dp94 -g3 -g4 -sg5 -(lp95 -S'A more recent version of the selected reference sequence NM_003119.2 is available (NM_003119.3)' -p96 -aS'NM_003119.3:c.1529C>T MUST be fully validated prior to use in reports' -p97 -aS'select_variants=NM_003119.3:c.1529C>T' -p98 -aS'RefSeqGene record not available' -p99 -asg8 -g4 -sg9 -(lp100 -sg11 -VHomo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA -p101 -sg13 -S'SPG7' -p102 -sg15 -(dp103 -g17 -S'NP_003110.1:p.(Ala510Val)' -p104 -sg19 -S'NP_003110.1:p.(A510V)' -p105 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_003119.2:c.1529C>T' -p106 -sg27 -g4 -sg28 -(dp107 -S'hg19' -p108 -(dp109 -g32 -S'NC_000016.9:g.89613145C>T' -p110 -sg34 -(dp111 -g36 -g37 -sg38 -g39 -sg40 -S'89613145' -p112 -sg42 -g43 -sssS'grch37' -p113 -(dp114 -g32 -S'NC_000016.9:g.89613145C>T' -p115 -sg34 -(dp116 -g36 -g48 -sg38 -g39 -sg40 -S'89613145' -p117 -sg42 -g43 -ssssg50 -(dp118 -g52 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1' -p119 -sg54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2' -p120 -sssS'metadata' -p121 -(dp122 -S'variantvalidator_hgvs_version' -p123 -S'1.1.3' -p124 -sS'uta_schema' -p125 -S'uta_20180821' -p126 -sS'seqrepo_db' -p127 -S'2018-08-21' -p128 -sS'variantvalidator_version' -p129 -S'v0.2' -p130 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant25.txt b/VariantValidator/testing/testOutputsMasterITS/variant25.txt deleted file mode 100644 index 1bf85b0a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant25.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000011.9:g.5244828_5248381=' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NM_000518.4:c.-50-80_*132+1868=' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant250.txt b/VariantValidator/testing/testOutputsMasterITS/variant250.txt deleted file mode 100644 index b886e7b1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant250.txt +++ /dev/null @@ -1,1870 +0,0 @@ -(dp0 -S'NM_001276695.1:c.535_537del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA -p13 -sS'gene_symbol' -p14 -S'TP53' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001263624.1:p.(Val179del)' -p19 -sS'slr' -p20 -S'NP_001263624.1:p.(V179del)' -p21 -ssS'submitted_variant' -p22 -S'17-7578194-GCAC-G' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001276695.1:c.535_537del' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000017.10:g.7578195_7578197del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr17' -p38 -sS'ref' -p39 -S'GCAC' -p40 -sS'pos' -p41 -S'7578194' -p42 -sS'alt' -p43 -S'G' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000017.11:g.7674877_7674879del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'GCAC' -p49 -sg41 -S'7674876' -p50 -sg43 -g44 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000017.10:g.7578195_7578197del' -p53 -sg35 -(dp54 -g37 -S'17' -p55 -sg39 -S'GCAC' -p56 -sg41 -S'7578194' -p57 -sg43 -g44 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000017.11:g.7674877_7674879del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'GCAC' -p62 -sg41 -S'7674876' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1' -p69 -sssS'NM_001126113.2:c.652_654del' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p73 -aS'RefSeqGene record not available' -p74 -asg9 -g4 -sg10 -(lp75 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA -p76 -sg14 -S'TP53' -p77 -sg16 -(dp78 -g18 -S'NP_001119585.1:p.(Val218del)' -p79 -sg20 -S'NP_001119585.1:p.(V218del)' -p80 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126113.2:c.652_654del' -p81 -sg28 -g4 -sg29 -(dp82 -S'hg19' -p83 -(dp84 -g33 -S'NC_000017.10:g.7578195_7578197del' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -S'GCAC' -p87 -sg41 -S'7578194' -p88 -sg43 -g44 -sssg45 -(dp89 -g33 -S'NC_000017.11:g.7674877_7674879del' -p90 -sg35 -(dp91 -g37 -g38 -sg39 -S'GCAC' -p92 -sg41 -S'7674876' -p93 -sg43 -g44 -sssS'grch37' -p94 -(dp95 -g33 -S'NC_000017.10:g.7578195_7578197del' -p96 -sg35 -(dp97 -g37 -g55 -sg39 -S'GCAC' -p98 -sg41 -S'7578194' -p99 -sg43 -g44 -sssS'grch38' -p100 -(dp101 -g33 -S'NC_000017.11:g.7674877_7674879del' -p102 -sg35 -(dp103 -g37 -g55 -sg39 -S'GCAC' -p104 -sg41 -S'7674876' -p105 -sg43 -g44 -ssssg64 -(dp106 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1' -p107 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2' -p108 -sssS'NM_001126118.1:c.535_537del' -p109 -(dp110 -g3 -g4 -sg5 -(lp111 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p112 -aS'RefSeqGene record not available' -p113 -asg9 -g4 -sg10 -(lp114 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 8, mRNA -p115 -sg14 -S'TP53' -p116 -sg16 -(dp117 -g18 -S'NP_001119590.1:p.(Val179del)' -p118 -sg20 -S'NP_001119590.1:p.(V179del)' -p119 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126118.1:c.535_537del' -p120 -sg28 -g4 -sg29 -(dp121 -S'hg19' -p122 -(dp123 -g33 -S'NC_000017.10:g.7578195_7578197del' -p124 -sg35 -(dp125 -g37 -g38 -sg39 -S'GCAC' -p126 -sg41 -S'7578194' -p127 -sg43 -g44 -sssg45 -(dp128 -g33 -S'NC_000017.11:g.7674877_7674879del' -p129 -sg35 -(dp130 -g37 -g38 -sg39 -S'GCAC' -p131 -sg41 -S'7674876' -p132 -sg43 -g44 -sssS'grch37' -p133 -(dp134 -g33 -S'NC_000017.10:g.7578195_7578197del' -p135 -sg35 -(dp136 -g37 -g55 -sg39 -S'GCAC' -p137 -sg41 -S'7578194' -p138 -sg43 -g44 -sssS'grch38' -p139 -(dp140 -g33 -S'NC_000017.11:g.7674877_7674879del' -p141 -sg35 -(dp142 -g37 -g55 -sg39 -S'GCAC' -p143 -sg41 -S'7674876' -p144 -sg43 -g44 -ssssg64 -(dp145 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1' -p146 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1' -p147 -sssS'NM_001126116.1:c.256_258del' -p148 -(dp149 -g3 -g4 -sg5 -(lp150 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p151 -aS'RefSeqGene record not available' -p152 -asg9 -g4 -sg10 -(lp153 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA -p154 -sg14 -S'TP53' -p155 -sg16 -(dp156 -g18 -S'NP_001119588.1:p.(Val86del)' -p157 -sg20 -S'NP_001119588.1:p.(V86del)' -p158 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126116.1:c.256_258del' -p159 -sg28 -g4 -sg29 -(dp160 -S'hg19' -p161 -(dp162 -g33 -S'NC_000017.10:g.7578195_7578197del' -p163 -sg35 -(dp164 -g37 -g38 -sg39 -S'GCAC' -p165 -sg41 -S'7578194' -p166 -sg43 -g44 -sssg45 -(dp167 -g33 -S'NC_000017.11:g.7674877_7674879del' -p168 -sg35 -(dp169 -g37 -g38 -sg39 -S'GCAC' -p170 -sg41 -S'7674876' -p171 -sg43 -g44 -sssS'grch37' -p172 -(dp173 -g33 -S'NC_000017.10:g.7578195_7578197del' -p174 -sg35 -(dp175 -g37 -g55 -sg39 -S'GCAC' -p176 -sg41 -S'7578194' -p177 -sg43 -g44 -sssS'grch38' -p178 -(dp179 -g33 -S'NC_000017.11:g.7674877_7674879del' -p180 -sg35 -(dp181 -g37 -g55 -sg39 -S'GCAC' -p182 -sg41 -S'7674876' -p183 -sg43 -g44 -ssssg64 -(dp184 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1' -p185 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1' -p186 -sssS'NM_001126117.1:c.256_258del' -p187 -(dp188 -g3 -g4 -sg5 -(lp189 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p190 -aS'RefSeqGene record not available' -p191 -asg9 -g4 -sg10 -(lp192 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA -p193 -sg14 -S'TP53' -p194 -sg16 -(dp195 -g18 -S'NP_001119589.1:p.(Val86del)' -p196 -sg20 -S'NP_001119589.1:p.(V86del)' -p197 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126117.1:c.256_258del' -p198 -sg28 -g4 -sg29 -(dp199 -S'hg19' -p200 -(dp201 -g33 -S'NC_000017.10:g.7578195_7578197del' -p202 -sg35 -(dp203 -g37 -g38 -sg39 -S'GCAC' -p204 -sg41 -S'7578194' -p205 -sg43 -g44 -sssg45 -(dp206 -g33 -S'NC_000017.11:g.7674877_7674879del' -p207 -sg35 -(dp208 -g37 -g38 -sg39 -S'GCAC' -p209 -sg41 -S'7674876' -p210 -sg43 -g44 -sssS'grch37' -p211 -(dp212 -g33 -S'NC_000017.10:g.7578195_7578197del' -p213 -sg35 -(dp214 -g37 -g55 -sg39 -S'GCAC' -p215 -sg41 -S'7578194' -p216 -sg43 -g44 -sssS'grch38' -p217 -(dp218 -g33 -S'NC_000017.11:g.7674877_7674879del' -p219 -sg35 -(dp220 -g37 -g55 -sg39 -S'GCAC' -p221 -sg41 -S'7674876' -p222 -sg43 -g44 -ssssg64 -(dp223 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1' -p224 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1' -p225 -sssS'NM_001276761.1:c.535_537del' -p226 -(dp227 -g3 -g4 -sg5 -(lp228 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p229 -aS'RefSeqGene record not available' -p230 -asg9 -g4 -sg10 -(lp231 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA -p232 -sg14 -S'TP53' -p233 -sg16 -(dp234 -g18 -S'NP_001263690.1:p.(Val179del)' -p235 -sg20 -S'NP_001263690.1:p.(V179del)' -p236 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276761.1:c.535_537del' -p237 -sg28 -g4 -sg29 -(dp238 -S'hg19' -p239 -(dp240 -g33 -S'NC_000017.10:g.7578195_7578197del' -p241 -sg35 -(dp242 -g37 -g38 -sg39 -S'GCAC' -p243 -sg41 -S'7578194' -p244 -sg43 -g44 -sssg45 -(dp245 -g33 -S'NC_000017.11:g.7674877_7674879del' -p246 -sg35 -(dp247 -g37 -g38 -sg39 -S'GCAC' -p248 -sg41 -S'7674876' -p249 -sg43 -g44 -sssS'grch37' -p250 -(dp251 -g33 -S'NC_000017.10:g.7578195_7578197del' -p252 -sg35 -(dp253 -g37 -g55 -sg39 -S'GCAC' -p254 -sg41 -S'7578194' -p255 -sg43 -g44 -sssS'grch38' -p256 -(dp257 -g33 -S'NC_000017.11:g.7674877_7674879del' -p258 -sg35 -(dp259 -g37 -g55 -sg39 -S'GCAC' -p260 -sg41 -S'7674876' -p261 -sg43 -g44 -ssssg64 -(dp262 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1' -p263 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1' -p264 -sssS'NM_001126112.2:c.652_654del' -p265 -(dp266 -g3 -g4 -sg5 -(lp267 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p268 -aS'RefSeqGene record not available' -p269 -asg9 -g4 -sg10 -(lp270 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA -p271 -sg14 -S'TP53' -p272 -sg16 -(dp273 -g18 -S'NP_001119584.1:p.(Val218del)' -p274 -sg20 -S'NP_001119584.1:p.(V218del)' -p275 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126112.2:c.652_654del' -p276 -sg28 -g4 -sg29 -(dp277 -S'hg19' -p278 -(dp279 -g33 -S'NC_000017.10:g.7578195_7578197del' -p280 -sg35 -(dp281 -g37 -g38 -sg39 -S'GCAC' -p282 -sg41 -S'7578194' -p283 -sg43 -g44 -sssg45 -(dp284 -g33 -S'NC_000017.11:g.7674877_7674879del' -p285 -sg35 -(dp286 -g37 -g38 -sg39 -S'GCAC' -p287 -sg41 -S'7674876' -p288 -sg43 -g44 -sssS'grch37' -p289 -(dp290 -g33 -S'NC_000017.10:g.7578195_7578197del' -p291 -sg35 -(dp292 -g37 -g55 -sg39 -S'GCAC' -p293 -sg41 -S'7578194' -p294 -sg43 -g44 -sssS'grch38' -p295 -(dp296 -g33 -S'NC_000017.11:g.7674877_7674879del' -p297 -sg35 -(dp298 -g37 -g55 -sg39 -S'GCAC' -p299 -sg41 -S'7674876' -p300 -sg43 -g44 -ssssg64 -(dp301 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1' -p302 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2' -p303 -sssS'flag' -p304 -S'gene_variant' -p305 -sS'NM_001276697.1:c.175_177del' -p306 -(dp307 -g3 -g4 -sg5 -(lp308 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p309 -aS'RefSeqGene record not available' -p310 -asg9 -g4 -sg10 -(lp311 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA -p312 -sg14 -S'TP53' -p313 -sg16 -(dp314 -g18 -S'NP_001263626.1:p.(Val59del)' -p315 -sg20 -S'NP_001263626.1:p.(V59del)' -p316 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276697.1:c.175_177del' -p317 -sg28 -g4 -sg29 -(dp318 -S'hg19' -p319 -(dp320 -g33 -S'NC_000017.10:g.7578195_7578197del' -p321 -sg35 -(dp322 -g37 -g38 -sg39 -S'GCAC' -p323 -sg41 -S'7578194' -p324 -sg43 -g44 -sssg45 -(dp325 -g33 -S'NC_000017.11:g.7674877_7674879del' -p326 -sg35 -(dp327 -g37 -g38 -sg39 -S'GCAC' -p328 -sg41 -S'7674876' -p329 -sg43 -g44 -sssS'grch37' -p330 -(dp331 -g33 -S'NC_000017.10:g.7578195_7578197del' -p332 -sg35 -(dp333 -g37 -g55 -sg39 -S'GCAC' -p334 -sg41 -S'7578194' -p335 -sg43 -g44 -sssS'grch38' -p336 -(dp337 -g33 -S'NC_000017.11:g.7674877_7674879del' -p338 -sg35 -(dp339 -g37 -g55 -sg39 -S'GCAC' -p340 -sg41 -S'7674876' -p341 -sg43 -g44 -ssssg64 -(dp342 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1' -p343 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1' -p344 -sssS'NM_001276696.1:c.535_537del' -p345 -(dp346 -g3 -g4 -sg5 -(lp347 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p348 -aS'RefSeqGene record not available' -p349 -asg9 -g4 -sg10 -(lp350 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA -p351 -sg14 -S'TP53' -p352 -sg16 -(dp353 -g18 -S'NP_001263625.1:p.(Val179del)' -p354 -sg20 -S'NP_001263625.1:p.(V179del)' -p355 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276696.1:c.535_537del' -p356 -sg28 -g4 -sg29 -(dp357 -S'hg19' -p358 -(dp359 -g33 -S'NC_000017.10:g.7578195_7578197del' -p360 -sg35 -(dp361 -g37 -g38 -sg39 -S'GCAC' -p362 -sg41 -S'7578194' -p363 -sg43 -g44 -sssg45 -(dp364 -g33 -S'NC_000017.11:g.7674877_7674879del' -p365 -sg35 -(dp366 -g37 -g38 -sg39 -S'GCAC' -p367 -sg41 -S'7674876' -p368 -sg43 -g44 -sssS'grch37' -p369 -(dp370 -g33 -S'NC_000017.10:g.7578195_7578197del' -p371 -sg35 -(dp372 -g37 -g55 -sg39 -S'GCAC' -p373 -sg41 -S'7578194' -p374 -sg43 -g44 -sssS'grch38' -p375 -(dp376 -g33 -S'NC_000017.11:g.7674877_7674879del' -p377 -sg35 -(dp378 -g37 -g55 -sg39 -S'GCAC' -p379 -sg41 -S'7674876' -p380 -sg43 -g44 -ssssg64 -(dp381 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1' -p382 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1' -p383 -sssS'NM_001276698.1:c.175_177del' -p384 -(dp385 -g3 -g4 -sg5 -(lp386 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p387 -aS'RefSeqGene record not available' -p388 -asg9 -g4 -sg10 -(lp389 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA -p390 -sg14 -S'TP53' -p391 -sg16 -(dp392 -g18 -S'NP_001263627.1:p.(Val59del)' -p393 -sg20 -S'NP_001263627.1:p.(V59del)' -p394 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276698.1:c.175_177del' -p395 -sg28 -g4 -sg29 -(dp396 -S'hg19' -p397 -(dp398 -g33 -S'NC_000017.10:g.7578195_7578197del' -p399 -sg35 -(dp400 -g37 -g38 -sg39 -S'GCAC' -p401 -sg41 -S'7578194' -p402 -sg43 -g44 -sssg45 -(dp403 -g33 -S'NC_000017.11:g.7674877_7674879del' -p404 -sg35 -(dp405 -g37 -g38 -sg39 -S'GCAC' -p406 -sg41 -S'7674876' -p407 -sg43 -g44 -sssS'grch37' -p408 -(dp409 -g33 -S'NC_000017.10:g.7578195_7578197del' -p410 -sg35 -(dp411 -g37 -g55 -sg39 -S'GCAC' -p412 -sg41 -S'7578194' -p413 -sg43 -g44 -sssS'grch38' -p414 -(dp415 -g33 -S'NC_000017.11:g.7674877_7674879del' -p416 -sg35 -(dp417 -g37 -g55 -sg39 -S'GCAC' -p418 -sg41 -S'7674876' -p419 -sg43 -g44 -ssssg64 -(dp420 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1' -p421 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1' -p422 -sssS'NM_001126115.1:c.256_258del' -p423 -(dp424 -g3 -g4 -sg5 -(lp425 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p426 -aS'RefSeqGene record not available' -p427 -asg9 -g4 -sg10 -(lp428 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA -p429 -sg14 -S'TP53' -p430 -sg16 -(dp431 -g18 -S'NP_001119587.1:p.(Val86del)' -p432 -sg20 -S'NP_001119587.1:p.(V86del)' -p433 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126115.1:c.256_258del' -p434 -sg28 -g4 -sg29 -(dp435 -S'hg19' -p436 -(dp437 -g33 -S'NC_000017.10:g.7578195_7578197del' -p438 -sg35 -(dp439 -g37 -g38 -sg39 -S'GCAC' -p440 -sg41 -S'7578194' -p441 -sg43 -g44 -sssg45 -(dp442 -g33 -S'NC_000017.11:g.7674877_7674879del' -p443 -sg35 -(dp444 -g37 -g38 -sg39 -S'GCAC' -p445 -sg41 -S'7674876' -p446 -sg43 -g44 -sssS'grch37' -p447 -(dp448 -g33 -S'NC_000017.10:g.7578195_7578197del' -p449 -sg35 -(dp450 -g37 -g55 -sg39 -S'GCAC' -p451 -sg41 -S'7578194' -p452 -sg43 -g44 -sssS'grch38' -p453 -(dp454 -g33 -S'NC_000017.11:g.7674877_7674879del' -p455 -sg35 -(dp456 -g37 -g55 -sg39 -S'GCAC' -p457 -sg41 -S'7674876' -p458 -sg43 -g44 -ssssg64 -(dp459 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1' -p460 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1' -p461 -sssS'NM_001126114.2:c.652_654del' -p462 -(dp463 -g3 -g4 -sg5 -(lp464 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p465 -aS'RefSeqGene record not available' -p466 -asg9 -g4 -sg10 -(lp467 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA -p468 -sg14 -S'TP53' -p469 -sg16 -(dp470 -g18 -S'NP_001119586.1:p.(Val218del)' -p471 -sg20 -S'NP_001119586.1:p.(V218del)' -p472 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126114.2:c.652_654del' -p473 -sg28 -g4 -sg29 -(dp474 -S'hg19' -p475 -(dp476 -g33 -S'NC_000017.10:g.7578195_7578197del' -p477 -sg35 -(dp478 -g37 -g38 -sg39 -S'GCAC' -p479 -sg41 -S'7578194' -p480 -sg43 -g44 -sssg45 -(dp481 -g33 -S'NC_000017.11:g.7674877_7674879del' -p482 -sg35 -(dp483 -g37 -g38 -sg39 -S'GCAC' -p484 -sg41 -S'7674876' -p485 -sg43 -g44 -sssS'grch37' -p486 -(dp487 -g33 -S'NC_000017.10:g.7578195_7578197del' -p488 -sg35 -(dp489 -g37 -g55 -sg39 -S'GCAC' -p490 -sg41 -S'7578194' -p491 -sg43 -g44 -sssS'grch38' -p492 -(dp493 -g33 -S'NC_000017.11:g.7674877_7674879del' -p494 -sg35 -(dp495 -g37 -g55 -sg39 -S'GCAC' -p496 -sg41 -S'7674876' -p497 -sg43 -g44 -ssssg64 -(dp498 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1' -p499 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2' -p500 -sssS'metadata' -p501 -(dp502 -S'variantvalidator_hgvs_version' -p503 -S'1.1.3' -p504 -sS'uta_schema' -p505 -S'uta_20180821' -p506 -sS'seqrepo_db' -p507 -S'2018-08-21' -p508 -sS'variantvalidator_version' -p509 -S'v0.2' -p510 -ssS'NM_001276699.1:c.175_177del' -p511 -(dp512 -g3 -g4 -sg5 -(lp513 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p514 -aS'RefSeqGene record not available' -p515 -asg9 -g4 -sg10 -(lp516 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA -p517 -sg14 -S'TP53' -p518 -sg16 -(dp519 -g18 -S'NP_001263628.1:p.(Val59del)' -p520 -sg20 -S'NP_001263628.1:p.(V59del)' -p521 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276699.1:c.175_177del' -p522 -sg28 -g4 -sg29 -(dp523 -S'hg19' -p524 -(dp525 -g33 -S'NC_000017.10:g.7578195_7578197del' -p526 -sg35 -(dp527 -g37 -g38 -sg39 -S'GCAC' -p528 -sg41 -S'7578194' -p529 -sg43 -g44 -sssg45 -(dp530 -g33 -S'NC_000017.11:g.7674877_7674879del' -p531 -sg35 -(dp532 -g37 -g38 -sg39 -S'GCAC' -p533 -sg41 -S'7674876' -p534 -sg43 -g44 -sssS'grch37' -p535 -(dp536 -g33 -S'NC_000017.10:g.7578195_7578197del' -p537 -sg35 -(dp538 -g37 -g55 -sg39 -S'GCAC' -p539 -sg41 -S'7578194' -p540 -sg43 -g44 -sssS'grch38' -p541 -(dp542 -g33 -S'NC_000017.11:g.7674877_7674879del' -p543 -sg35 -(dp544 -g37 -g55 -sg39 -S'GCAC' -p545 -sg41 -S'7674876' -p546 -sg43 -g44 -ssssg64 -(dp547 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1' -p548 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1' -p549 -sssS'NM_001276760.1:c.535_537del' -p550 -(dp551 -g3 -g4 -sg5 -(lp552 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p553 -aS'RefSeqGene record not available' -p554 -asg9 -g4 -sg10 -(lp555 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA -p556 -sg14 -S'TP53' -p557 -sg16 -(dp558 -g18 -S'NP_001263689.1:p.(Val179del)' -p559 -sg20 -S'NP_001263689.1:p.(V179del)' -p560 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276760.1:c.535_537del' -p561 -sg28 -g4 -sg29 -(dp562 -S'hg19' -p563 -(dp564 -g33 -S'NC_000017.10:g.7578195_7578197del' -p565 -sg35 -(dp566 -g37 -g38 -sg39 -S'GCAC' -p567 -sg41 -S'7578194' -p568 -sg43 -g44 -sssg45 -(dp569 -g33 -S'NC_000017.11:g.7674877_7674879del' -p570 -sg35 -(dp571 -g37 -g38 -sg39 -S'GCAC' -p572 -sg41 -S'7674876' -p573 -sg43 -g44 -sssS'grch37' -p574 -(dp575 -g33 -S'NC_000017.10:g.7578195_7578197del' -p576 -sg35 -(dp577 -g37 -g55 -sg39 -S'GCAC' -p578 -sg41 -S'7578194' -p579 -sg43 -g44 -sssS'grch38' -p580 -(dp581 -g33 -S'NC_000017.11:g.7674877_7674879del' -p582 -sg35 -(dp583 -g37 -g55 -sg39 -S'GCAC' -p584 -sg41 -S'7674876' -p585 -sg43 -g44 -ssssg64 -(dp586 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1' -p587 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1' -p588 -sssS'NM_000546.5:c.652_654del' -p589 -(dp590 -g3 -g4 -sg5 -(lp591 -S'NC_000017.10:g.7578194GCAC>G automapped to NC_000017.10:g.7578201_7578203delCAC' -p592 -aS'RefSeqGene record not available' -p593 -asg9 -g4 -sg10 -(lp594 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA -p595 -sg14 -S'TP53' -p596 -sg16 -(dp597 -g18 -S'NP_000537.3:p.(Val218del)' -p598 -sg20 -S'NP_000537.3:p.(V218del)' -p599 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_000546.5:c.652_654del' -p600 -sg28 -g4 -sg29 -(dp601 -S'hg19' -p602 -(dp603 -g33 -S'NC_000017.10:g.7578195_7578197del' -p604 -sg35 -(dp605 -g37 -g38 -sg39 -S'GCAC' -p606 -sg41 -S'7578194' -p607 -sg43 -g44 -sssg45 -(dp608 -g33 -S'NC_000017.11:g.7674877_7674879del' -p609 -sg35 -(dp610 -g37 -g38 -sg39 -S'GCAC' -p611 -sg41 -S'7674876' -p612 -sg43 -g44 -sssS'grch37' -p613 -(dp614 -g33 -S'NC_000017.10:g.7578195_7578197del' -p615 -sg35 -(dp616 -g37 -g55 -sg39 -S'GCAC' -p617 -sg41 -S'7578194' -p618 -sg43 -g44 -sssS'grch38' -p619 -(dp620 -g33 -S'NC_000017.11:g.7674877_7674879del' -p621 -sg35 -(dp622 -g37 -g55 -sg39 -S'GCAC' -p623 -sg41 -S'7674876' -p624 -sg43 -g44 -ssssg64 -(dp625 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3' -p626 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5' -p627 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant251.txt b/VariantValidator/testing/testOutputsMasterITS/variant251.txt deleted file mode 100644 index 67533096..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant251.txt +++ /dev/null @@ -1,1870 +0,0 @@ -(dp0 -S'NM_001276760.1:c.289dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'TP53' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001263689.1:p.(Gln97ProfsTer13)' -p19 -sS'slr' -p20 -S'NP_001263689.1:p.(Q97Pfs*13)' -p21 -ssS'submitted_variant' -p22 -S'17-7578523-T-TG' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001276760.1:c.289dup' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000017.10:g.7578524dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr17' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'7578524' -p42 -sS'alt' -p43 -VGG -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000017.11:g.7675206dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p49 -sg43 -VGG -p50 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000017.10:g.7578524dup' -p53 -sg35 -(dp54 -g37 -S'17' -p55 -sg39 -g40 -sg41 -S'7578524' -p56 -sg43 -VGG -p57 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000017.11:g.7675206dup' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p62 -sg43 -VGG -p63 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1' -p69 -sssS'NM_001126118.1:c.289dup' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p73 -aS'RefSeqGene record not available' -p74 -asg9 -g4 -sg10 -(lp75 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 8, mRNA -p76 -sg14 -S'TP53' -p77 -sg16 -(dp78 -g18 -S'NP_001119590.1:p.(Gln97ProfsTer13)' -p79 -sg20 -S'NP_001119590.1:p.(Q97Pfs*13)' -p80 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126118.1:c.289dup' -p81 -sg28 -g4 -sg29 -(dp82 -S'hg19' -p83 -(dp84 -g33 -S'NC_000017.10:g.7578524dup' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p87 -sg43 -VGG -p88 -sssg45 -(dp89 -g33 -S'NC_000017.11:g.7675206dup' -p90 -sg35 -(dp91 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p92 -sg43 -VGG -p93 -sssS'grch37' -p94 -(dp95 -g33 -S'NC_000017.10:g.7578524dup' -p96 -sg35 -(dp97 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p98 -sg43 -VGG -p99 -sssS'grch38' -p100 -(dp101 -g33 -S'NC_000017.11:g.7675206dup' -p102 -sg35 -(dp103 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p104 -sg43 -VGG -p105 -ssssg64 -(dp106 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1' -p107 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1' -p108 -sssS'NM_001276695.1:c.289dup' -p109 -(dp110 -g3 -g4 -sg5 -(lp111 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p112 -aS'RefSeqGene record not available' -p113 -asg9 -g4 -sg10 -(lp114 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA -p115 -sg14 -S'TP53' -p116 -sg16 -(dp117 -g18 -S'NP_001263624.1:p.(Gln97ProfsTer13)' -p118 -sg20 -S'NP_001263624.1:p.(Q97Pfs*13)' -p119 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276695.1:c.289dup' -p120 -sg28 -g4 -sg29 -(dp121 -S'hg19' -p122 -(dp123 -g33 -S'NC_000017.10:g.7578524dup' -p124 -sg35 -(dp125 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p126 -sg43 -VGG -p127 -sssg45 -(dp128 -g33 -S'NC_000017.11:g.7675206dup' -p129 -sg35 -(dp130 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p131 -sg43 -VGG -p132 -sssS'grch37' -p133 -(dp134 -g33 -S'NC_000017.10:g.7578524dup' -p135 -sg35 -(dp136 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p137 -sg43 -VGG -p138 -sssS'grch38' -p139 -(dp140 -g33 -S'NC_000017.11:g.7675206dup' -p141 -sg35 -(dp142 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p143 -sg43 -VGG -p144 -ssssg64 -(dp145 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1' -p146 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1' -p147 -sssS'NM_001276699.1:c.-72dup' -p148 -(dp149 -g3 -g4 -sg5 -(lp150 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p151 -aS'RefSeqGene record not available' -p152 -asg9 -g4 -sg10 -(lp153 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA -p154 -sg14 -S'TP53' -p155 -sg16 -(dp156 -g18 -S'NP_001263628.1:p.?' -p157 -sg20 -S'NP_001263628.1:p.?' -p158 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276699.1:c.-72dup' -p159 -sg28 -g4 -sg29 -(dp160 -S'hg19' -p161 -(dp162 -g33 -S'NC_000017.10:g.7578524dup' -p163 -sg35 -(dp164 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p165 -sg43 -VGG -p166 -sssg45 -(dp167 -g33 -S'NC_000017.11:g.7675206dup' -p168 -sg35 -(dp169 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p170 -sg43 -VGG -p171 -sssS'grch37' -p172 -(dp173 -g33 -S'NC_000017.10:g.7578524dup' -p174 -sg35 -(dp175 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p176 -sg43 -VGG -p177 -sssS'grch38' -p178 -(dp179 -g33 -S'NC_000017.11:g.7675206dup' -p180 -sg35 -(dp181 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p182 -sg43 -VGG -p183 -ssssg64 -(dp184 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1' -p185 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1' -p186 -sssS'NM_001126115.1:c.10dup' -p187 -(dp188 -g3 -g4 -sg5 -(lp189 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p190 -aS'RefSeqGene record not available' -p191 -asg9 -g4 -sg10 -(lp192 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA -p193 -sg14 -S'TP53' -p194 -sg16 -(dp195 -g18 -S'NP_001119587.1:p.(Gln4ProfsTer13)' -p196 -sg20 -S'NP_001119587.1:p.(Q4Pfs*13)' -p197 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126115.1:c.10dup' -p198 -sg28 -g4 -sg29 -(dp199 -S'hg19' -p200 -(dp201 -g33 -S'NC_000017.10:g.7578524dup' -p202 -sg35 -(dp203 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p204 -sg43 -VGG -p205 -sssg45 -(dp206 -g33 -S'NC_000017.11:g.7675206dup' -p207 -sg35 -(dp208 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p209 -sg43 -VGG -p210 -sssS'grch37' -p211 -(dp212 -g33 -S'NC_000017.10:g.7578524dup' -p213 -sg35 -(dp214 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p215 -sg43 -VGG -p216 -sssS'grch38' -p217 -(dp218 -g33 -S'NC_000017.11:g.7675206dup' -p219 -sg35 -(dp220 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p221 -sg43 -VGG -p222 -ssssg64 -(dp223 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1' -p224 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1' -p225 -sssS'NM_001276697.1:c.-72dup' -p226 -(dp227 -g3 -g4 -sg5 -(lp228 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p229 -aS'RefSeqGene record not available' -p230 -asg9 -g4 -sg10 -(lp231 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA -p232 -sg14 -S'TP53' -p233 -sg16 -(dp234 -g18 -S'NP_001263626.1:p.?' -p235 -sg20 -S'NP_001263626.1:p.?' -p236 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276697.1:c.-72dup' -p237 -sg28 -g4 -sg29 -(dp238 -S'hg19' -p239 -(dp240 -g33 -S'NC_000017.10:g.7578524dup' -p241 -sg35 -(dp242 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p243 -sg43 -VGG -p244 -sssg45 -(dp245 -g33 -S'NC_000017.11:g.7675206dup' -p246 -sg35 -(dp247 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p248 -sg43 -VGG -p249 -sssS'grch37' -p250 -(dp251 -g33 -S'NC_000017.10:g.7578524dup' -p252 -sg35 -(dp253 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p254 -sg43 -VGG -p255 -sssS'grch38' -p256 -(dp257 -g33 -S'NC_000017.11:g.7675206dup' -p258 -sg35 -(dp259 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p260 -sg43 -VGG -p261 -ssssg64 -(dp262 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1' -p263 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1' -p264 -sssS'NM_001126117.1:c.10dup' -p265 -(dp266 -g3 -g4 -sg5 -(lp267 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p268 -aS'RefSeqGene record not available' -p269 -asg9 -g4 -sg10 -(lp270 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA -p271 -sg14 -S'TP53' -p272 -sg16 -(dp273 -g18 -S'NP_001119589.1:p.(Gln4ProfsTer13)' -p274 -sg20 -S'NP_001119589.1:p.(Q4Pfs*13)' -p275 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126117.1:c.10dup' -p276 -sg28 -g4 -sg29 -(dp277 -S'hg19' -p278 -(dp279 -g33 -S'NC_000017.10:g.7578524dup' -p280 -sg35 -(dp281 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p282 -sg43 -VGG -p283 -sssg45 -(dp284 -g33 -S'NC_000017.11:g.7675206dup' -p285 -sg35 -(dp286 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p287 -sg43 -VGG -p288 -sssS'grch37' -p289 -(dp290 -g33 -S'NC_000017.10:g.7578524dup' -p291 -sg35 -(dp292 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p293 -sg43 -VGG -p294 -sssS'grch38' -p295 -(dp296 -g33 -S'NC_000017.11:g.7675206dup' -p297 -sg35 -(dp298 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p299 -sg43 -VGG -p300 -ssssg64 -(dp301 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1' -p302 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1' -p303 -sssS'NM_000546.5:c.406dup' -p304 -(dp305 -g3 -g4 -sg5 -(lp306 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p307 -aS'RefSeqGene record not available' -p308 -asg9 -g4 -sg10 -(lp309 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA -p310 -sg14 -S'TP53' -p311 -sg16 -(dp312 -g18 -S'NP_000537.3:p.(Gln136ProfsTer13)' -p313 -sg20 -S'NP_000537.3:p.(Q136Pfs*13)' -p314 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_000546.5:c.406dup' -p315 -sg28 -g4 -sg29 -(dp316 -S'hg19' -p317 -(dp318 -g33 -S'NC_000017.10:g.7578524dup' -p319 -sg35 -(dp320 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p321 -sg43 -VGG -p322 -sssg45 -(dp323 -g33 -S'NC_000017.11:g.7675206dup' -p324 -sg35 -(dp325 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p326 -sg43 -VGG -p327 -sssS'grch37' -p328 -(dp329 -g33 -S'NC_000017.10:g.7578524dup' -p330 -sg35 -(dp331 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p332 -sg43 -VGG -p333 -sssS'grch38' -p334 -(dp335 -g33 -S'NC_000017.11:g.7675206dup' -p336 -sg35 -(dp337 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p338 -sg43 -VGG -p339 -ssssg64 -(dp340 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3' -p341 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5' -p342 -sssS'flag' -p343 -S'gene_variant' -p344 -sS'NM_001276696.1:c.289dup' -p345 -(dp346 -g3 -g4 -sg5 -(lp347 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p348 -aS'RefSeqGene record not available' -p349 -asg9 -g4 -sg10 -(lp350 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA -p351 -sg14 -S'TP53' -p352 -sg16 -(dp353 -g18 -S'NP_001263625.1:p.(Gln97ProfsTer13)' -p354 -sg20 -S'NP_001263625.1:p.(Q97Pfs*13)' -p355 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276696.1:c.289dup' -p356 -sg28 -g4 -sg29 -(dp357 -S'hg19' -p358 -(dp359 -g33 -S'NC_000017.10:g.7578524dup' -p360 -sg35 -(dp361 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p362 -sg43 -VGG -p363 -sssg45 -(dp364 -g33 -S'NC_000017.11:g.7675206dup' -p365 -sg35 -(dp366 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p367 -sg43 -VGG -p368 -sssS'grch37' -p369 -(dp370 -g33 -S'NC_000017.10:g.7578524dup' -p371 -sg35 -(dp372 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p373 -sg43 -VGG -p374 -sssS'grch38' -p375 -(dp376 -g33 -S'NC_000017.11:g.7675206dup' -p377 -sg35 -(dp378 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p379 -sg43 -VGG -p380 -ssssg64 -(dp381 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1' -p382 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1' -p383 -sssS'NM_001276698.1:c.-72dup' -p384 -(dp385 -g3 -g4 -sg5 -(lp386 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p387 -aS'RefSeqGene record not available' -p388 -asg9 -g4 -sg10 -(lp389 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA -p390 -sg14 -S'TP53' -p391 -sg16 -(dp392 -g18 -S'NP_001263627.1:p.?' -p393 -sg20 -S'NP_001263627.1:p.?' -p394 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276698.1:c.-72dup' -p395 -sg28 -g4 -sg29 -(dp396 -S'hg19' -p397 -(dp398 -g33 -S'NC_000017.10:g.7578524dup' -p399 -sg35 -(dp400 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p401 -sg43 -VGG -p402 -sssg45 -(dp403 -g33 -S'NC_000017.11:g.7675206dup' -p404 -sg35 -(dp405 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p406 -sg43 -VGG -p407 -sssS'grch37' -p408 -(dp409 -g33 -S'NC_000017.10:g.7578524dup' -p410 -sg35 -(dp411 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p412 -sg43 -VGG -p413 -sssS'grch38' -p414 -(dp415 -g33 -S'NC_000017.11:g.7675206dup' -p416 -sg35 -(dp417 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p418 -sg43 -VGG -p419 -ssssg64 -(dp420 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1' -p421 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1' -p422 -sssS'NM_001126114.2:c.406dup' -p423 -(dp424 -g3 -g4 -sg5 -(lp425 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p426 -aS'RefSeqGene record not available' -p427 -asg9 -g4 -sg10 -(lp428 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA -p429 -sg14 -S'TP53' -p430 -sg16 -(dp431 -g18 -S'NP_001119586.1:p.(Gln136ProfsTer13)' -p432 -sg20 -S'NP_001119586.1:p.(Q136Pfs*13)' -p433 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126114.2:c.406dup' -p434 -sg28 -g4 -sg29 -(dp435 -S'hg19' -p436 -(dp437 -g33 -S'NC_000017.10:g.7578524dup' -p438 -sg35 -(dp439 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p440 -sg43 -VGG -p441 -sssg45 -(dp442 -g33 -S'NC_000017.11:g.7675206dup' -p443 -sg35 -(dp444 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p445 -sg43 -VGG -p446 -sssS'grch37' -p447 -(dp448 -g33 -S'NC_000017.10:g.7578524dup' -p449 -sg35 -(dp450 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p451 -sg43 -VGG -p452 -sssS'grch38' -p453 -(dp454 -g33 -S'NC_000017.11:g.7675206dup' -p455 -sg35 -(dp456 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p457 -sg43 -VGG -p458 -ssssg64 -(dp459 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1' -p460 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2' -p461 -sssS'NM_001276761.1:c.289dup' -p462 -(dp463 -g3 -g4 -sg5 -(lp464 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p465 -aS'RefSeqGene record not available' -p466 -asg9 -g4 -sg10 -(lp467 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA -p468 -sg14 -S'TP53' -p469 -sg16 -(dp470 -g18 -S'NP_001263690.1:p.(Gln97ProfsTer13)' -p471 -sg20 -S'NP_001263690.1:p.(Q97Pfs*13)' -p472 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001276761.1:c.289dup' -p473 -sg28 -g4 -sg29 -(dp474 -S'hg19' -p475 -(dp476 -g33 -S'NC_000017.10:g.7578524dup' -p477 -sg35 -(dp478 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p479 -sg43 -VGG -p480 -sssg45 -(dp481 -g33 -S'NC_000017.11:g.7675206dup' -p482 -sg35 -(dp483 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p484 -sg43 -VGG -p485 -sssS'grch37' -p486 -(dp487 -g33 -S'NC_000017.10:g.7578524dup' -p488 -sg35 -(dp489 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p490 -sg43 -VGG -p491 -sssS'grch38' -p492 -(dp493 -g33 -S'NC_000017.11:g.7675206dup' -p494 -sg35 -(dp495 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p496 -sg43 -VGG -p497 -ssssg64 -(dp498 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1' -p499 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1' -p500 -sssS'NM_001126113.2:c.406dup' -p501 -(dp502 -g3 -g4 -sg5 -(lp503 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p504 -aS'RefSeqGene record not available' -p505 -asg9 -g4 -sg10 -(lp506 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA -p507 -sg14 -S'TP53' -p508 -sg16 -(dp509 -g18 -S'NP_001119585.1:p.(Gln136ProfsTer13)' -p510 -sg20 -S'NP_001119585.1:p.(Q136Pfs*13)' -p511 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126113.2:c.406dup' -p512 -sg28 -g4 -sg29 -(dp513 -S'hg19' -p514 -(dp515 -g33 -S'NC_000017.10:g.7578524dup' -p516 -sg35 -(dp517 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p518 -sg43 -VGG -p519 -sssg45 -(dp520 -g33 -S'NC_000017.11:g.7675206dup' -p521 -sg35 -(dp522 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p523 -sg43 -VGG -p524 -sssS'grch37' -p525 -(dp526 -g33 -S'NC_000017.10:g.7578524dup' -p527 -sg35 -(dp528 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p529 -sg43 -VGG -p530 -sssS'grch38' -p531 -(dp532 -g33 -S'NC_000017.11:g.7675206dup' -p533 -sg35 -(dp534 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p535 -sg43 -VGG -p536 -ssssg64 -(dp537 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1' -p538 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2' -p539 -sssS'NM_001126116.1:c.10dup' -p540 -(dp541 -g3 -g4 -sg5 -(lp542 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p543 -aS'RefSeqGene record not available' -p544 -asg9 -g4 -sg10 -(lp545 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA -p546 -sg14 -S'TP53' -p547 -sg16 -(dp548 -g18 -S'NP_001119588.1:p.(Gln4ProfsTer13)' -p549 -sg20 -S'NP_001119588.1:p.(Q4Pfs*13)' -p550 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126116.1:c.10dup' -p551 -sg28 -g4 -sg29 -(dp552 -S'hg19' -p553 -(dp554 -g33 -S'NC_000017.10:g.7578524dup' -p555 -sg35 -(dp556 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p557 -sg43 -VGG -p558 -sssg45 -(dp559 -g33 -S'NC_000017.11:g.7675206dup' -p560 -sg35 -(dp561 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p562 -sg43 -VGG -p563 -sssS'grch37' -p564 -(dp565 -g33 -S'NC_000017.10:g.7578524dup' -p566 -sg35 -(dp567 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p568 -sg43 -VGG -p569 -sssS'grch38' -p570 -(dp571 -g33 -S'NC_000017.11:g.7675206dup' -p572 -sg35 -(dp573 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p574 -sg43 -VGG -p575 -ssssg64 -(dp576 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1' -p577 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1' -p578 -sssS'metadata' -p579 -(dp580 -S'variantvalidator_hgvs_version' -p581 -S'1.1.3' -p582 -sS'uta_schema' -p583 -S'uta_20180821' -p584 -sS'seqrepo_db' -p585 -S'2018-08-21' -p586 -sS'variantvalidator_version' -p587 -S'v0.2' -p588 -ssS'NM_001126112.2:c.406dup' -p589 -(dp590 -g3 -g4 -sg5 -(lp591 -S'NC_000017.10:g.7578523T>TG automapped to NC_000017.10:g.7578525dupG' -p592 -aS'RefSeqGene record not available' -p593 -asg9 -g4 -sg10 -(lp594 -sg12 -VHomo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA -p595 -sg14 -S'TP53' -p596 -sg16 -(dp597 -g18 -S'NP_001119584.1:p.(Gln136ProfsTer13)' -p598 -sg20 -S'NP_001119584.1:p.(Q136Pfs*13)' -p599 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001126112.2:c.406dup' -p600 -sg28 -g4 -sg29 -(dp601 -S'hg19' -p602 -(dp603 -g33 -S'NC_000017.10:g.7578524dup' -p604 -sg35 -(dp605 -g37 -g38 -sg39 -g40 -sg41 -S'7578524' -p606 -sg43 -VGG -p607 -sssg45 -(dp608 -g33 -S'NC_000017.11:g.7675206dup' -p609 -sg35 -(dp610 -g37 -g38 -sg39 -g40 -sg41 -S'7675206' -p611 -sg43 -VGG -p612 -sssS'grch37' -p613 -(dp614 -g33 -S'NC_000017.10:g.7578524dup' -p615 -sg35 -(dp616 -g37 -g55 -sg39 -g40 -sg41 -S'7578524' -p617 -sg43 -VGG -p618 -sssS'grch38' -p619 -(dp620 -g33 -S'NC_000017.11:g.7675206dup' -p621 -sg35 -(dp622 -g37 -g55 -sg39 -g40 -sg41 -S'7675206' -p623 -sg43 -VGG -p624 -ssssg64 -(dp625 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1' -p626 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2' -p627 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant252.txt b/VariantValidator/testing/testOutputsMasterITS/variant252.txt deleted file mode 100644 index 86e02399..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant252.txt +++ /dev/null @@ -1,642 +0,0 @@ -(dp0 -S'NM_144997.6:c.1300+2T>G' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens folliculin (FLCN), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'FLCN' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_659434.2:p.?' -p18 -sS'slr' -p19 -S'NP_659434.2:p.?' -p20 -ssS'submitted_variant' -p21 -S'17-17119692-A-C' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000017.10(NM_144997.6):c.1300+2T>G' -p24 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_144997.6:c.1300+2T>G' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000017.10:g.17119692A>C' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr17' -p38 -sS'ref' -p39 -VA -p40 -sS'pos' -p41 -S'17119692' -p42 -sS'alt' -p43 -VC -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000017.11:g.17216378A>C' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'17216378' -p49 -sg43 -g44 -sssS'grch37' -p50 -(dp51 -g33 -S'NC_000017.10:g.17119692A>C' -p52 -sg35 -(dp53 -g37 -S'17' -p54 -sg39 -g40 -sg41 -S'17119692' -p55 -sg43 -g44 -sssS'grch38' -p56 -(dp57 -g33 -S'NC_000017.11:g.17216378A>C' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'17216378' -p60 -sg43 -g44 -ssssS'reference_sequence_records' -p61 -(dp62 -S'protein' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2' -p64 -sS'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.6' -p66 -sssS'NM_001353230.1:c.1300+2T>G' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'RefSeqGene record not available' -p70 -asg8 -g4 -sg9 -(lp71 -sg11 -VHomo sapiens folliculin (FLCN), transcript variant 4, mRNA -p72 -sg13 -S'FLCN' -p73 -sg15 -(dp74 -g17 -S'NP_001340159.1:p.?' -p75 -sg19 -S'NP_001340159.1:p.?' -p76 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_001353230.1):c.1300+2T>G' -p77 -sg25 -g4 -sg26 -S'NM_001353230.1:c.1300+2T>G' -p78 -sg28 -g4 -sg29 -(dp79 -S'hg19' -p80 -(dp81 -g33 -S'NC_000017.10:g.17119692A>C' -p82 -sg35 -(dp83 -g37 -g38 -sg39 -g40 -sg41 -S'17119692' -p84 -sg43 -g44 -sssg45 -(dp85 -g33 -S'NC_000017.11:g.17216378A>C' -p86 -sg35 -(dp87 -g37 -g38 -sg39 -g40 -sg41 -S'17216378' -p88 -sg43 -g44 -sssS'grch37' -p89 -(dp90 -g33 -S'NC_000017.10:g.17119692A>C' -p91 -sg35 -(dp92 -g37 -g54 -sg39 -g40 -sg41 -S'17119692' -p93 -sg43 -g44 -sssS'grch38' -p94 -(dp95 -g33 -S'NC_000017.11:g.17216378A>C' -p96 -sg35 -(dp97 -g37 -g54 -sg39 -g40 -sg41 -S'17216378' -p98 -sg43 -g44 -ssssg61 -(dp99 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340159.1' -p100 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353230.1' -p101 -sssS'NM_001353229.1:c.1354+2T>G' -p102 -(dp103 -g3 -g4 -sg5 -(lp104 -S'RefSeqGene record not available' -p105 -asg8 -g4 -sg9 -(lp106 -sg11 -VHomo sapiens folliculin (FLCN), transcript variant 3, mRNA -p107 -sg13 -S'FLCN' -p108 -sg15 -(dp109 -g17 -S'NP_001340158.1:p.?' -p110 -sg19 -S'NP_001340158.1:p.?' -p111 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_001353229.1):c.1354+2T>G' -p112 -sg25 -g4 -sg26 -S'NM_001353229.1:c.1354+2T>G' -p113 -sg28 -g4 -sg29 -(dp114 -S'hg19' -p115 -(dp116 -g33 -S'NC_000017.10:g.17119692A>C' -p117 -sg35 -(dp118 -g37 -g38 -sg39 -g40 -sg41 -S'17119692' -p119 -sg43 -g44 -sssg45 -(dp120 -g33 -S'NC_000017.11:g.17216378A>C' -p121 -sg35 -(dp122 -g37 -g38 -sg39 -g40 -sg41 -S'17216378' -p123 -sg43 -g44 -sssS'grch37' -p124 -(dp125 -g33 -S'NC_000017.10:g.17119692A>C' -p126 -sg35 -(dp127 -g37 -g54 -sg39 -g40 -sg41 -S'17119692' -p128 -sg43 -g44 -sssS'grch38' -p129 -(dp130 -g33 -S'NC_000017.11:g.17216378A>C' -p131 -sg35 -(dp132 -g37 -g54 -sg39 -g40 -sg41 -S'17216378' -p133 -sg43 -g44 -ssssg61 -(dp134 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340158.1' -p135 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353229.1' -p136 -sssS'flag' -p137 -S'gene_variant' -p138 -sS'NM_144997.5:c.1300+2T>G' -p139 -(dp140 -g3 -g4 -sg5 -(lp141 -S'A more recent version of the selected reference sequence NM_144997.5 is available (NM_144997.6)' -p142 -aS'NM_144997.6:c.1300+2T>G MUST be fully validated prior to use in reports' -p143 -aS'select_variants=NM_144997.6:c.1300+2T>G' -p144 -aS'RefSeqGene record not available' -p145 -asg8 -g4 -sg9 -(lp146 -sg11 -VHomo sapiens folliculin (FLCN), transcript variant 1, mRNA -p147 -sg13 -S'FLCN' -p148 -sg15 -(dp149 -g17 -S'NP_659434.2:p.?' -p150 -sg19 -S'NP_659434.2:p.?' -p151 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_144997.5):c.1300+2T>G' -p152 -sg25 -g4 -sg26 -S'NM_144997.5:c.1300+2T>G' -p153 -sg28 -g4 -sg29 -(dp154 -S'hg19' -p155 -(dp156 -g33 -S'NC_000017.10:g.17119692A>C' -p157 -sg35 -(dp158 -g37 -g38 -sg39 -g40 -sg41 -S'17119692' -p159 -sg43 -g44 -sssg45 -(dp160 -g33 -S'NC_000017.11:g.17216378A>C' -p161 -sg35 -(dp162 -g37 -g38 -sg39 -g40 -sg41 -S'17216378' -p163 -sg43 -g44 -sssS'grch37' -p164 -(dp165 -g33 -S'NC_000017.10:g.17119692A>C' -p166 -sg35 -(dp167 -g37 -g54 -sg39 -g40 -sg41 -S'17119692' -p168 -sg43 -g44 -sssS'grch38' -p169 -(dp170 -g33 -S'NC_000017.11:g.17216378A>C' -p171 -sg35 -(dp172 -g37 -g54 -sg39 -g40 -sg41 -S'17216378' -p173 -sg43 -g44 -ssssg61 -(dp174 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2' -p175 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.5' -p176 -sssS'NM_001353231.1:c.1300+2T>G' -p177 -(dp178 -g3 -g4 -sg5 -(lp179 -S'RefSeqGene record not available' -p180 -asg8 -g4 -sg9 -(lp181 -sg11 -VHomo sapiens folliculin (FLCN), transcript variant 5, mRNA -p182 -sg13 -S'FLCN' -p183 -sg15 -(dp184 -g17 -S'NP_001340160.1:p.?' -p185 -sg19 -S'NP_001340160.1:p.?' -p186 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_001353231.1):c.1300+2T>G' -p187 -sg25 -g4 -sg26 -S'NM_001353231.1:c.1300+2T>G' -p188 -sg28 -g4 -sg29 -(dp189 -S'hg19' -p190 -(dp191 -g33 -S'NC_000017.10:g.17119692A>C' -p192 -sg35 -(dp193 -g37 -g38 -sg39 -g40 -sg41 -S'17119692' -p194 -sg43 -g44 -sssg45 -(dp195 -g33 -S'NC_000017.11:g.17216378A>C' -p196 -sg35 -(dp197 -g37 -g38 -sg39 -g40 -sg41 -S'17216378' -p198 -sg43 -g44 -sssS'grch37' -p199 -(dp200 -g33 -S'NC_000017.10:g.17119692A>C' -p201 -sg35 -(dp202 -g37 -g54 -sg39 -g40 -sg41 -S'17119692' -p203 -sg43 -g44 -sssS'grch38' -p204 -(dp205 -g33 -S'NC_000017.11:g.17216378A>C' -p206 -sg35 -(dp207 -g37 -g54 -sg39 -g40 -sg41 -S'17216378' -p208 -sg43 -g44 -ssssg61 -(dp209 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340160.1' -p210 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353231.1' -p211 -sssS'metadata' -p212 -(dp213 -S'variantvalidator_hgvs_version' -p214 -S'1.1.3' -p215 -sS'uta_schema' -p216 -S'uta_20180821' -p217 -sS'seqrepo_db' -p218 -S'2018-08-21' -p219 -sS'variantvalidator_version' -p220 -S'v0.2' -p221 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant253.txt b/VariantValidator/testing/testOutputsMasterITS/variant253.txt deleted file mode 100644 index d9d8f736..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant253.txt +++ /dev/null @@ -1,777 +0,0 @@ -(dp0 -S'NM_007294.3:c.*103_*106del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'BRCA1' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_009225.1:p.?' -p19 -sS'slr' -p20 -S'NP_009225.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'17-41197588-GGACA-G' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_007294.3:c.*103_*106del' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000017.10:g.41197589_41197592del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr17' -p38 -sS'ref' -p39 -S'GGACA' -p40 -sS'pos' -p41 -S'41197588' -p42 -sS'alt' -p43 -S'G' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000017.11:g.43045572_43045575del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'GGACA' -p49 -sg41 -S'43045571' -p50 -sg43 -g44 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000017.10:g.41197589_41197592del' -p53 -sg35 -(dp54 -g37 -S'17' -p55 -sg39 -S'GGACA' -p56 -sg41 -S'41197588' -p57 -sg43 -g44 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000017.11:g.43045572_43045575del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'GGACA' -p62 -sg41 -S'43045571' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3' -p69 -sssS'NM_007297.3:c.*103_*106del' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p73 -aS'RefSeqGene record not available' -p74 -asg9 -g4 -sg10 -(lp75 -sg12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 3, mRNA -p76 -sg14 -S'BRCA1' -p77 -sg16 -(dp78 -g18 -S'NP_009228.2:p.?' -p79 -sg20 -S'NP_009228.2:p.?' -p80 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_007297.3:c.*103_*106del' -p81 -sg28 -g4 -sg29 -(dp82 -S'hg19' -p83 -(dp84 -g33 -S'NC_000017.10:g.41197589_41197592del' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -S'GGACA' -p87 -sg41 -S'41197588' -p88 -sg43 -g44 -sssg45 -(dp89 -g33 -S'NC_000017.11:g.43045572_43045575del' -p90 -sg35 -(dp91 -g37 -g38 -sg39 -S'GGACA' -p92 -sg41 -S'43045571' -p93 -sg43 -g44 -sssS'grch37' -p94 -(dp95 -g33 -S'NC_000017.10:g.41197589_41197592del' -p96 -sg35 -(dp97 -g37 -g55 -sg39 -S'GGACA' -p98 -sg41 -S'41197588' -p99 -sg43 -g44 -sssS'grch38' -p100 -(dp101 -g33 -S'NC_000017.11:g.43045572_43045575del' -p102 -sg35 -(dp103 -g37 -g55 -sg39 -S'GGACA' -p104 -sg41 -S'43045571' -p105 -sg43 -g44 -ssssg64 -(dp106 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2' -p107 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3' -p108 -sssS'NR_027676.1:n.5831_5834del' -p109 -(dp110 -g3 -g4 -sg5 -(lp111 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p112 -aS'RefSeqGene record not available' -p113 -asg9 -g4 -sg10 -(lp114 -sg12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 6, non-coding RNA -p115 -sg14 -S'BRCA1' -p116 -sg16 -(dp117 -g18 -S'Non-coding :n.' -p118 -sg20 -g118 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NR_027676.1:n.5831_5834del' -p119 -sg28 -g4 -sg29 -(dp120 -S'hg19' -p121 -(dp122 -g33 -S'NC_000017.10:g.41197589_41197592del' -p123 -sg35 -(dp124 -g37 -g38 -sg39 -S'GGACA' -p125 -sg41 -S'41197588' -p126 -sg43 -g44 -sssg45 -(dp127 -g33 -S'NC_000017.11:g.43045572_43045575del' -p128 -sg35 -(dp129 -g37 -g38 -sg39 -S'GGACA' -p130 -sg41 -S'43045571' -p131 -sg43 -g44 -sssS'grch37' -p132 -(dp133 -g33 -S'NC_000017.10:g.41197589_41197592del' -p134 -sg35 -(dp135 -g37 -g55 -sg39 -S'GGACA' -p136 -sg41 -S'41197588' -p137 -sg43 -g44 -sssS'grch38' -p138 -(dp139 -g33 -S'NC_000017.11:g.43045572_43045575del' -p140 -sg35 -(dp141 -g37 -g55 -sg39 -S'GGACA' -p142 -sg41 -S'43045571' -p143 -sg43 -g44 -ssssg64 -(dp144 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_027676.1' -p145 -sssS'NM_007300.3:c.*103_*106del' -p146 -(dp147 -g3 -g4 -sg5 -(lp148 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p149 -aS'RefSeqGene record not available' -p150 -asg9 -g4 -sg10 -(lp151 -sg12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 2, mRNA -p152 -sg14 -S'BRCA1' -p153 -sg16 -(dp154 -g18 -S'NP_009231.2:p.?' -p155 -sg20 -S'NP_009231.2:p.?' -p156 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_007300.3:c.*103_*106del' -p157 -sg28 -g4 -sg29 -(dp158 -S'hg19' -p159 -(dp160 -g33 -S'NC_000017.10:g.41197589_41197592del' -p161 -sg35 -(dp162 -g37 -g38 -sg39 -S'GGACA' -p163 -sg41 -S'41197588' -p164 -sg43 -g44 -sssg45 -(dp165 -g33 -S'NC_000017.11:g.43045572_43045575del' -p166 -sg35 -(dp167 -g37 -g38 -sg39 -S'GGACA' -p168 -sg41 -S'43045571' -p169 -sg43 -g44 -sssS'grch37' -p170 -(dp171 -g33 -S'NC_000017.10:g.41197589_41197592del' -p172 -sg35 -(dp173 -g37 -g55 -sg39 -S'GGACA' -p174 -sg41 -S'41197588' -p175 -sg43 -g44 -sssS'grch38' -p176 -(dp177 -g33 -S'NC_000017.11:g.43045572_43045575del' -p178 -sg35 -(dp179 -g37 -g55 -sg39 -S'GGACA' -p180 -sg41 -S'43045571' -p181 -sg43 -g44 -ssssg64 -(dp182 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2' -p183 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3' -p184 -sssS'flag' -p185 -S'gene_variant' -p186 -sS'NM_007299.3:c.*209_*212del' -p187 -(dp188 -g3 -g4 -sg5 -(lp189 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p190 -aS'RefSeqGene record not available' -p191 -asg9 -g4 -sg10 -(lp192 -sg12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 5, mRNA -p193 -sg14 -S'BRCA1' -p194 -sg16 -(dp195 -g18 -S'NP_009230.2:p.?' -p196 -sg20 -S'NP_009230.2:p.?' -p197 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_007299.3:c.*209_*212del' -p198 -sg28 -g4 -sg29 -(dp199 -S'hg19' -p200 -(dp201 -g33 -S'NC_000017.10:g.41197589_41197592del' -p202 -sg35 -(dp203 -g37 -g38 -sg39 -S'GGACA' -p204 -sg41 -S'41197588' -p205 -sg43 -g44 -sssg45 -(dp206 -g33 -S'NC_000017.11:g.43045572_43045575del' -p207 -sg35 -(dp208 -g37 -g38 -sg39 -S'GGACA' -p209 -sg41 -S'43045571' -p210 -sg43 -g44 -sssS'grch37' -p211 -(dp212 -g33 -S'NC_000017.10:g.41197589_41197592del' -p213 -sg35 -(dp214 -g37 -g55 -sg39 -S'GGACA' -p215 -sg41 -S'41197588' -p216 -sg43 -g44 -sssS'grch38' -p217 -(dp218 -g33 -S'NC_000017.11:g.43045572_43045575del' -p219 -sg35 -(dp220 -g37 -g55 -sg39 -S'GGACA' -p221 -sg41 -S'43045571' -p222 -sg43 -g44 -ssssg64 -(dp223 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2' -p224 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3' -p225 -sssS'metadata' -p226 -(dp227 -S'variantvalidator_hgvs_version' -p228 -S'1.1.3' -p229 -sS'uta_schema' -p230 -S'uta_20180821' -p231 -sS'seqrepo_db' -p232 -S'2018-08-21' -p233 -sS'variantvalidator_version' -p234 -S'v0.2' -p235 -ssS'NM_007298.3:c.*103_*106del' -p236 -(dp237 -g3 -g4 -sg5 -(lp238 -S'NC_000017.10:g.41197588GGACA>G automapped to NC_000017.10:g.41197590_41197593del' -p239 -aS'RefSeqGene record not available' -p240 -asg9 -g4 -sg10 -(lp241 -sg12 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 4, mRNA -p242 -sg14 -S'BRCA1' -p243 -sg16 -(dp244 -g18 -S'NP_009229.2:p.?' -p245 -sg20 -S'NP_009229.2:p.?' -p246 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_007298.3:c.*103_*106del' -p247 -sg28 -g4 -sg29 -(dp248 -S'hg19' -p249 -(dp250 -g33 -S'NC_000017.10:g.41197589_41197592del' -p251 -sg35 -(dp252 -g37 -g38 -sg39 -S'GGACA' -p253 -sg41 -S'41197588' -p254 -sg43 -g44 -sssg45 -(dp255 -g33 -S'NC_000017.11:g.43045572_43045575del' -p256 -sg35 -(dp257 -g37 -g38 -sg39 -S'GGACA' -p258 -sg41 -S'43045571' -p259 -sg43 -g44 -sssS'grch37' -p260 -(dp261 -g33 -S'NC_000017.10:g.41197589_41197592del' -p262 -sg35 -(dp263 -g37 -g55 -sg39 -S'GGACA' -p264 -sg41 -S'41197588' -p265 -sg43 -g44 -sssS'grch38' -p266 -(dp267 -g33 -S'NC_000017.11:g.43045572_43045575del' -p268 -sg35 -(dp269 -g37 -g55 -sg39 -S'GGACA' -p270 -sg41 -S'43045571' -p271 -sg43 -g44 -ssssg64 -(dp272 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2' -p273 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3' -p274 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant254.txt b/VariantValidator/testing/testOutputsMasterITS/variant254.txt deleted file mode 100644 index 58431335..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant254.txt +++ /dev/null @@ -1,748 +0,0 @@ -(dp0 -S'NM_007299.3:c.301+1G>C' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 5, mRNA -p12 -sS'gene_symbol' -p13 -S'BRCA1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_009230.2:p.?' -p18 -sS'slr' -p19 -S'NP_009230.2:p.?' -p20 -ssS'submitted_variant' -p21 -S'17-41256884-C-G' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000017.10(NM_007299.3):c.301+1G>C' -p24 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_007299.3:c.301+1G>C' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000017.10:g.41256884C>G' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr17' -p38 -sS'ref' -p39 -VC -p40 -sS'pos' -p41 -S'41256884' -p42 -sS'alt' -p43 -VG -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000017.11:g.43104867C>G' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p49 -sg43 -g44 -sssS'grch37' -p50 -(dp51 -g33 -S'NC_000017.10:g.41256884C>G' -p52 -sg35 -(dp53 -g37 -S'17' -p54 -sg39 -g40 -sg41 -S'41256884' -p55 -sg43 -g44 -sssS'grch38' -p56 -(dp57 -g33 -S'NC_000017.11:g.43104867C>G' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p60 -sg43 -g44 -ssssS'reference_sequence_records' -p61 -(dp62 -S'protein' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2' -p64 -sS'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3' -p66 -sssS'NR_027676.1:n.440+1G>C' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'RefSeqGene record not available' -p70 -asg8 -g4 -sg9 -(lp71 -sg11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 6, non-coding RNA -p72 -sg13 -S'BRCA1' -p73 -sg15 -(dp74 -g17 -S'Non-coding :n.' -p75 -sg19 -g75 -ssg21 -g22 -sg23 -S'NC_000017.10(NR_027676.1):c.440+1G>C' -p76 -sg25 -g4 -sg26 -S'NR_027676.1:n.440+1G>C' -p77 -sg28 -g4 -sg29 -(dp78 -S'hg19' -p79 -(dp80 -g33 -S'NC_000017.10:g.41256884C>G' -p81 -sg35 -(dp82 -g37 -g38 -sg39 -g40 -sg41 -S'41256884' -p83 -sg43 -g44 -sssg45 -(dp84 -g33 -S'NC_000017.11:g.43104867C>G' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p87 -sg43 -g44 -sssS'grch37' -p88 -(dp89 -g33 -S'NC_000017.10:g.41256884C>G' -p90 -sg35 -(dp91 -g37 -g54 -sg39 -g40 -sg41 -S'41256884' -p92 -sg43 -g44 -sssS'grch38' -p93 -(dp94 -g33 -S'NC_000017.11:g.43104867C>G' -p95 -sg35 -(dp96 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p97 -sg43 -g44 -ssssg61 -(dp98 -g65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_027676.1' -p99 -sssS'NM_007300.3:c.301+1G>C' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'RefSeqGene record not available' -p103 -asg8 -g4 -sg9 -(lp104 -sg11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 2, mRNA -p105 -sg13 -S'BRCA1' -p106 -sg15 -(dp107 -g17 -S'NP_009231.2:p.?' -p108 -sg19 -S'NP_009231.2:p.?' -p109 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_007300.3):c.301+1G>C' -p110 -sg25 -g4 -sg26 -S'NM_007300.3:c.301+1G>C' -p111 -sg28 -g4 -sg29 -(dp112 -S'hg19' -p113 -(dp114 -g33 -S'NC_000017.10:g.41256884C>G' -p115 -sg35 -(dp116 -g37 -g38 -sg39 -g40 -sg41 -S'41256884' -p117 -sg43 -g44 -sssg45 -(dp118 -g33 -S'NC_000017.11:g.43104867C>G' -p119 -sg35 -(dp120 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p121 -sg43 -g44 -sssS'grch37' -p122 -(dp123 -g33 -S'NC_000017.10:g.41256884C>G' -p124 -sg35 -(dp125 -g37 -g54 -sg39 -g40 -sg41 -S'41256884' -p126 -sg43 -g44 -sssS'grch38' -p127 -(dp128 -g33 -S'NC_000017.11:g.43104867C>G' -p129 -sg35 -(dp130 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p131 -sg43 -g44 -ssssg61 -(dp132 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2' -p133 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3' -p134 -sssS'NM_007298.3:c.301+1G>C' -p135 -(dp136 -g3 -g4 -sg5 -(lp137 -S'RefSeqGene record not available' -p138 -asg8 -g4 -sg9 -(lp139 -sg11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 4, mRNA -p140 -sg13 -S'BRCA1' -p141 -sg15 -(dp142 -g17 -S'NP_009229.2:p.?' -p143 -sg19 -S'NP_009229.2:p.?' -p144 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_007298.3):c.301+1G>C' -p145 -sg25 -g4 -sg26 -S'NM_007298.3:c.301+1G>C' -p146 -sg28 -g4 -sg29 -(dp147 -S'hg19' -p148 -(dp149 -g33 -S'NC_000017.10:g.41256884C>G' -p150 -sg35 -(dp151 -g37 -g38 -sg39 -g40 -sg41 -S'41256884' -p152 -sg43 -g44 -sssg45 -(dp153 -g33 -S'NC_000017.11:g.43104867C>G' -p154 -sg35 -(dp155 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p156 -sg43 -g44 -sssS'grch37' -p157 -(dp158 -g33 -S'NC_000017.10:g.41256884C>G' -p159 -sg35 -(dp160 -g37 -g54 -sg39 -g40 -sg41 -S'41256884' -p161 -sg43 -g44 -sssS'grch38' -p162 -(dp163 -g33 -S'NC_000017.11:g.43104867C>G' -p164 -sg35 -(dp165 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p166 -sg43 -g44 -ssssg61 -(dp167 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2' -p168 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3' -p169 -sssS'NM_007297.3:c.160+1G>C' -p170 -(dp171 -g3 -g4 -sg5 -(lp172 -S'RefSeqGene record not available' -p173 -asg8 -g4 -sg9 -(lp174 -sg11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 3, mRNA -p175 -sg13 -S'BRCA1' -p176 -sg15 -(dp177 -g17 -S'NP_009228.2:p.?' -p178 -sg19 -S'NP_009228.2:p.?' -p179 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_007297.3):c.160+1G>C' -p180 -sg25 -g4 -sg26 -S'NM_007297.3:c.160+1G>C' -p181 -sg28 -g4 -sg29 -(dp182 -S'hg19' -p183 -(dp184 -g33 -S'NC_000017.10:g.41256884C>G' -p185 -sg35 -(dp186 -g37 -g38 -sg39 -g40 -sg41 -S'41256884' -p187 -sg43 -g44 -sssg45 -(dp188 -g33 -S'NC_000017.11:g.43104867C>G' -p189 -sg35 -(dp190 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p191 -sg43 -g44 -sssS'grch37' -p192 -(dp193 -g33 -S'NC_000017.10:g.41256884C>G' -p194 -sg35 -(dp195 -g37 -g54 -sg39 -g40 -sg41 -S'41256884' -p196 -sg43 -g44 -sssS'grch38' -p197 -(dp198 -g33 -S'NC_000017.11:g.43104867C>G' -p199 -sg35 -(dp200 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p201 -sg43 -g44 -ssssg61 -(dp202 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2' -p203 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3' -p204 -sssS'flag' -p205 -S'gene_variant' -p206 -sS'NM_007294.3:c.301+1G>C' -p207 -(dp208 -g3 -g4 -sg5 -(lp209 -S'RefSeqGene record not available' -p210 -asg8 -g4 -sg9 -(lp211 -sg11 -VHomo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 1, mRNA -p212 -sg13 -S'BRCA1' -p213 -sg15 -(dp214 -g17 -S'NP_009225.1:p.?' -p215 -sg19 -S'NP_009225.1:p.?' -p216 -ssg21 -g22 -sg23 -S'NC_000017.10(NM_007294.3):c.301+1G>C' -p217 -sg25 -g4 -sg26 -S'NM_007294.3:c.301+1G>C' -p218 -sg28 -g4 -sg29 -(dp219 -S'hg19' -p220 -(dp221 -g33 -S'NC_000017.10:g.41256884C>G' -p222 -sg35 -(dp223 -g37 -g38 -sg39 -g40 -sg41 -S'41256884' -p224 -sg43 -g44 -sssg45 -(dp225 -g33 -S'NC_000017.11:g.43104867C>G' -p226 -sg35 -(dp227 -g37 -g38 -sg39 -g40 -sg41 -S'43104867' -p228 -sg43 -g44 -sssS'grch37' -p229 -(dp230 -g33 -S'NC_000017.10:g.41256884C>G' -p231 -sg35 -(dp232 -g37 -g54 -sg39 -g40 -sg41 -S'41256884' -p233 -sg43 -g44 -sssS'grch38' -p234 -(dp235 -g33 -S'NC_000017.11:g.43104867C>G' -p236 -sg35 -(dp237 -g37 -g54 -sg39 -g40 -sg41 -S'43104867' -p238 -sg43 -g44 -ssssg61 -(dp239 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1' -p240 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3' -p241 -sssS'metadata' -p242 -(dp243 -S'variantvalidator_hgvs_version' -p244 -S'1.1.3' -p245 -sS'uta_schema' -p246 -S'uta_20180821' -p247 -sS'seqrepo_db' -p248 -S'2018-08-21' -p249 -sS'variantvalidator_version' -p250 -S'v0.2' -p251 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant255.txt b/VariantValidator/testing/testOutputsMasterITS/variant255.txt deleted file mode 100644 index cdbcd27b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant255.txt +++ /dev/null @@ -1,483 +0,0 @@ -(dp0 -S'NM_001363846.1:c.490G>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 4, mRNA -p12 -sS'gene_symbol' -p13 -S'GFAP' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001350775.1:p.(Glu164Ter)' -p18 -sS'slr' -p19 -S'NP_001350775.1:p.(E164*)' -p20 -ssS'submitted_variant' -p21 -S'17-42991428-C-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001363846.1:c.490G>T' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000017.10:g.42991428C>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr17' -p37 -sS'ref' -p38 -VC -p39 -sS'pos' -p40 -S'42991428' -p41 -sS'alt' -p42 -VA -p43 -sssS'grch37' -p44 -(dp45 -g32 -S'NC_000017.10:g.42991428C>A' -p46 -sg34 -(dp47 -g36 -S'17' -p48 -sg38 -g39 -sg40 -S'42991428' -p49 -sg42 -g43 -ssssS'reference_sequence_records' -p50 -(dp51 -S'protein' -p52 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350775.1' -p53 -sS'transcript' -p54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363846.1' -p55 -sssS'NM_001131019.2:c.490G>T' -p56 -(dp57 -g3 -g4 -sg5 -(lp58 -S'RefSeqGene record not available' -p59 -asg8 -g4 -sg9 -(lp60 -sg11 -VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 2, mRNA -p61 -sg13 -S'GFAP' -p62 -sg15 -(dp63 -g17 -S'NP_001124491.1:p.(Glu164Ter)' -p64 -sg19 -S'NP_001124491.1:p.(E164*)' -p65 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001131019.2:c.490G>T' -p66 -sg27 -g4 -sg28 -(dp67 -S'hg19' -p68 -(dp69 -g32 -S'NC_000017.10:g.42991428C>A' -p70 -sg34 -(dp71 -g36 -g37 -sg38 -g39 -sg40 -S'42991428' -p72 -sg42 -g43 -sssS'hg38' -p73 -(dp74 -g32 -S'NC_000017.11:g.44914060C>A' -p75 -sg34 -(dp76 -g36 -g37 -sg38 -g39 -sg40 -S'44914060' -p77 -sg42 -g43 -sssS'grch37' -p78 -(dp79 -g32 -S'NC_000017.10:g.42991428C>A' -p80 -sg34 -(dp81 -g36 -g48 -sg38 -g39 -sg40 -S'42991428' -p82 -sg42 -g43 -sssS'grch38' -p83 -(dp84 -g32 -S'NC_000017.11:g.44914060C>A' -p85 -sg34 -(dp86 -g36 -g48 -sg38 -g39 -sg40 -S'44914060' -p87 -sg42 -g43 -ssssg50 -(dp88 -g52 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124491.1' -p89 -sg54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001131019.2' -p90 -sssS'flag' -p91 -S'gene_variant' -p92 -sS'NM_001242376.1:c.490G>T' -p93 -(dp94 -g3 -g4 -sg5 -(lp95 -S'RefSeqGene record not available' -p96 -asg8 -g4 -sg9 -(lp97 -sg11 -VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 3, mRNA -p98 -sg13 -S'GFAP' -p99 -sg15 -(dp100 -g17 -S'NP_001229305.1:p.(Glu164Ter)' -p101 -sg19 -S'NP_001229305.1:p.(E164*)' -p102 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001242376.1:c.490G>T' -p103 -sg27 -g4 -sg28 -(dp104 -S'hg19' -p105 -(dp106 -g32 -S'NC_000017.10:g.42991428C>A' -p107 -sg34 -(dp108 -g36 -g37 -sg38 -g39 -sg40 -S'42991428' -p109 -sg42 -g43 -sssg73 -(dp110 -g32 -S'NC_000017.11:g.44914060C>A' -p111 -sg34 -(dp112 -g36 -g37 -sg38 -g39 -sg40 -S'44914060' -p113 -sg42 -g43 -sssS'grch37' -p114 -(dp115 -g32 -S'NC_000017.10:g.42991428C>A' -p116 -sg34 -(dp117 -g36 -g48 -sg38 -g39 -sg40 -S'42991428' -p118 -sg42 -g43 -sssS'grch38' -p119 -(dp120 -g32 -S'NC_000017.11:g.44914060C>A' -p121 -sg34 -(dp122 -g36 -g48 -sg38 -g39 -sg40 -S'44914060' -p123 -sg42 -g43 -ssssg50 -(dp124 -g52 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001229305.1' -p125 -sg54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001242376.1' -p126 -sssS'NM_002055.4:c.490G>T' -p127 -(dp128 -g3 -g4 -sg5 -(lp129 -S'RefSeqGene record not available' -p130 -asg8 -g4 -sg9 -(lp131 -sg11 -VHomo sapiens glial fibrillary acidic protein (GFAP), transcript variant 1, mRNA -p132 -sg13 -S'GFAP' -p133 -sg15 -(dp134 -g17 -S'NP_002046.1:p.(Glu164Ter)' -p135 -sg19 -S'NP_002046.1:p.(E164*)' -p136 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_002055.4:c.490G>T' -p137 -sg27 -g4 -sg28 -(dp138 -S'hg19' -p139 -(dp140 -g32 -S'NC_000017.10:g.42991428C>A' -p141 -sg34 -(dp142 -g36 -g37 -sg38 -g39 -sg40 -S'42991428' -p143 -sg42 -g43 -sssg73 -(dp144 -g32 -S'NC_000017.11:g.44914060C>A' -p145 -sg34 -(dp146 -g36 -g37 -sg38 -g39 -sg40 -S'44914060' -p147 -sg42 -g43 -sssS'grch37' -p148 -(dp149 -g32 -S'NC_000017.10:g.42991428C>A' -p150 -sg34 -(dp151 -g36 -g48 -sg38 -g39 -sg40 -S'42991428' -p152 -sg42 -g43 -sssS'grch38' -p153 -(dp154 -g32 -S'NC_000017.11:g.44914060C>A' -p155 -sg34 -(dp156 -g36 -g48 -sg38 -g39 -sg40 -S'44914060' -p157 -sg42 -g43 -ssssg50 -(dp158 -g52 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002046.1' -p159 -sg54 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002055.4' -p160 -sssS'metadata' -p161 -(dp162 -S'variantvalidator_hgvs_version' -p163 -S'1.1.3' -p164 -sS'uta_schema' -p165 -S'uta_20180821' -p166 -sS'seqrepo_db' -p167 -S'2018-08-21' -p168 -sS'variantvalidator_version' -p169 -S'v0.2' -p170 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant256.txt b/VariantValidator/testing/testOutputsMasterITS/variant256.txt deleted file mode 100644 index f30fee0b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant256.txt +++ /dev/null @@ -1,573 +0,0 @@ -(dp0 -S'NM_001135697.1:c.*11A>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_001135697.1 is available (NM_001135697.2)' -p7 -aS'NM_001135697.2:c.*11A>T MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_001135697.2:c.*11A>T' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 2, mRNA -p15 -sS'gene_symbol' -p16 -S'SGCA' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_001129169.1:p.?' -p21 -sS'slr' -p22 -S'NP_001129169.1:p.?' -p23 -ssS'submitted_variant' -p24 -S'17-48252809-A-T' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'hgvs_lrg_variant' -p27 -g4 -sS'hgvs_transcript_variant' -p28 -S'NM_001135697.1:c.*11A>T' -p29 -sS'hgvs_refseqgene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000017.10:g.48252809A>T' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr17' -p40 -sS'ref' -p41 -S'A' -p42 -sS'pos' -p43 -S'48252809' -p44 -sS'alt' -p45 -S'T' -p46 -sssS'grch37' -p47 -(dp48 -g35 -S'NC_000017.10:g.48252809A>T' -p49 -sg37 -(dp50 -g39 -S'17' -p51 -sg41 -g42 -sg43 -S'48252809' -p52 -sg45 -g46 -ssssS'reference_sequence_records' -p53 -(dp54 -S'protein' -p55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1' -p56 -sS'transcript' -p57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.1' -p58 -sssS'flag' -p59 -S'gene_variant' -p60 -sS'NR_135553.1:n.1022A>T' -p61 -(dp62 -g3 -g4 -sg5 -(lp63 -S'RefSeqGene record not available' -p64 -asg11 -g4 -sg12 -(lp65 -sg14 -VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 3, non-coding RNA -p66 -sg16 -S'SGCA' -p67 -sg18 -(dp68 -g20 -S'Non-coding :n.' -p69 -sg22 -g69 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NR_135553.1:n.1022A>T' -p70 -sg30 -g4 -sg31 -(dp71 -S'hg19' -p72 -(dp73 -g35 -S'NC_000017.10:g.48252809A>T' -p74 -sg37 -(dp75 -g39 -g40 -sg41 -g42 -sg43 -S'48252809' -p76 -sg45 -g46 -sssS'hg38' -p77 -(dp78 -g35 -S'NC_000017.11:g.50175448A>T' -p79 -sg37 -(dp80 -g39 -g40 -sg41 -g42 -sg43 -S'50175448' -p81 -sg45 -g46 -sssS'grch37' -p82 -(dp83 -g35 -S'NC_000017.10:g.48252809A>T' -p84 -sg37 -(dp85 -g39 -g51 -sg41 -g42 -sg43 -S'48252809' -p86 -sg45 -g46 -sssS'grch38' -p87 -(dp88 -g35 -S'NC_000017.11:g.50175448A>T' -p89 -sg37 -(dp90 -g39 -g51 -sg41 -g42 -sg43 -S'50175448' -p91 -sg45 -g46 -ssssg53 -(dp92 -g57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_135553.1' -p93 -sssS'NM_001135697.2:c.*11A>T' -p94 -(dp95 -g3 -g4 -sg5 -(lp96 -S'RefSeqGene record not available' -p97 -asg11 -g4 -sg12 -(lp98 -sg14 -VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 2, mRNA -p99 -sg16 -S'SGCA' -p100 -sg18 -(dp101 -g20 -S'NP_001129169.1:p.?' -p102 -sg22 -S'NP_001129169.1:p.?' -p103 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_001135697.2:c.*11A>T' -p104 -sg30 -g4 -sg31 -(dp105 -S'hg19' -p106 -(dp107 -g35 -S'NC_000017.10:g.48252809A>T' -p108 -sg37 -(dp109 -g39 -g40 -sg41 -g42 -sg43 -S'48252809' -p110 -sg45 -g46 -sssg77 -(dp111 -g35 -S'NC_000017.11:g.50175448A>T' -p112 -sg37 -(dp113 -g39 -g40 -sg41 -g42 -sg43 -S'50175448' -p114 -sg45 -g46 -sssS'grch37' -p115 -(dp116 -g35 -S'NC_000017.10:g.48252809A>T' -p117 -sg37 -(dp118 -g39 -g51 -sg41 -g42 -sg43 -S'48252809' -p119 -sg45 -g46 -sssS'grch38' -p120 -(dp121 -g35 -S'NC_000017.11:g.50175448A>T' -p122 -sg37 -(dp123 -g39 -g51 -sg41 -g42 -sg43 -S'50175448' -p124 -sg45 -g46 -ssssg53 -(dp125 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1' -p126 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.2' -p127 -sssS'NM_000023.3:c.*11A>T' -p128 -(dp129 -g3 -g4 -sg5 -(lp130 -S'RefSeqGene record not available' -p131 -asg11 -g4 -sg12 -(lp132 -sg14 -VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 1, mRNA -p133 -sg16 -S'SGCA' -p134 -sg18 -(dp135 -g20 -S'NP_000014.1:p.?' -p136 -sg22 -S'NP_000014.1:p.?' -p137 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_000023.3:c.*11A>T' -p138 -sg30 -g4 -sg31 -(dp139 -S'hg19' -p140 -(dp141 -g35 -S'NC_000017.10:g.48252809A>T' -p142 -sg37 -(dp143 -g39 -g40 -sg41 -g42 -sg43 -S'48252809' -p144 -sg45 -g46 -sssg77 -(dp145 -g35 -S'NC_000017.11:g.50175448A>T' -p146 -sg37 -(dp147 -g39 -g40 -sg41 -g42 -sg43 -S'50175448' -p148 -sg45 -g46 -sssS'grch37' -p149 -(dp150 -g35 -S'NC_000017.10:g.48252809A>T' -p151 -sg37 -(dp152 -g39 -g51 -sg41 -g42 -sg43 -S'48252809' -p153 -sg45 -g46 -sssS'grch38' -p154 -(dp155 -g35 -S'NC_000017.11:g.50175448A>T' -p156 -sg37 -(dp157 -g39 -g51 -sg41 -g42 -sg43 -S'50175448' -p158 -sg45 -g46 -ssssg53 -(dp159 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000014.1' -p160 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000023.3' -p161 -sssS'NM_000023.2:c.*11A>T' -p162 -(dp163 -g3 -g4 -sg5 -(lp164 -S'A more recent version of the selected reference sequence NM_000023.2 is available (NM_000023.3)' -p165 -aS'NM_000023.3:c.*11A>T MUST be fully validated prior to use in reports' -p166 -aS'select_variants=NM_000023.3:c.*11A>T' -p167 -aS'RefSeqGene record not available' -p168 -asg11 -g4 -sg12 -(lp169 -sg14 -VHomo sapiens sarcoglycan alpha (SGCA), transcript variant 1, mRNA -p170 -sg16 -S'SGCA' -p171 -sg18 -(dp172 -g20 -S'NP_000014.1:p.?' -p173 -sg22 -S'NP_000014.1:p.?' -p174 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_000023.2:c.*11A>T' -p175 -sg30 -g4 -sg31 -(dp176 -S'hg19' -p177 -(dp178 -g35 -S'NC_000017.10:g.48252809A>T' -p179 -sg37 -(dp180 -g39 -g40 -sg41 -g42 -sg43 -S'48252809' -p181 -sg45 -g46 -sssS'grch37' -p182 -(dp183 -g35 -S'NC_000017.10:g.48252809A>T' -p184 -sg37 -(dp185 -g39 -g51 -sg41 -g42 -sg43 -S'48252809' -p186 -sg45 -g46 -ssssg53 -(dp187 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000014.1' -p188 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000023.2' -p189 -sssS'metadata' -p190 -(dp191 -S'variantvalidator_hgvs_version' -p192 -S'1.1.3' -p193 -sS'uta_schema' -p194 -S'uta_20180821' -p195 -sS'seqrepo_db' -p196 -S'2018-08-21' -p197 -sS'variantvalidator_version' -p198 -S'v0.2' -p199 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant257.txt b/VariantValidator/testing/testOutputsMasterITS/variant257.txt deleted file mode 100644 index 34d5b066..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant257.txt +++ /dev/null @@ -1,180 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000334.4:c.3720+9_3720+10dup' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000017.10:g.62022709G>GTC automapped to NC_000017.10:g.62022710_62022711dupTC' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA -p15 -sS'gene_symbol' -p16 -S'SCN4A' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000325.4:p.?' -p21 -sS'slr' -p22 -S'NP_000325.4:p.?' -p23 -ssS'submitted_variant' -p24 -S'17-62022709-G-GTC' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000334.4):c.3720+9_3720+10dup' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000334.4:c.3720+9_3720+10dup' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.62022710_62022711dup' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'TC' -p43 -sS'pos' -p44 -S'62022710' -p45 -sS'alt' -p46 -S'TCTC' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.63945350_63945351dup' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'TC' -p52 -sg44 -S'63945350' -p53 -sg46 -S'TCTC' -p54 -sssS'grch37' -p55 -(dp56 -g36 -S'NC_000017.10:g.62022710_62022711dup' -p57 -sg38 -(dp58 -g40 -S'17' -p59 -sg42 -S'TC' -p60 -sg44 -S'62022710' -p61 -sg46 -S'TCTC' -p62 -sssS'grch38' -p63 -(dp64 -g36 -S'NC_000017.11:g.63945350_63945351dup' -p65 -sg38 -(dp66 -g40 -g59 -sg42 -S'TC' -p67 -sg44 -S'63945350' -p68 -sg46 -S'TCTC' -p69 -ssssS'reference_sequence_records' -p70 -(dp71 -S'protein' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4' -p73 -sS'transcript' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4' -p75 -sssS'metadata' -p76 -(dp77 -S'variantvalidator_hgvs_version' -p78 -S'1.1.3' -p79 -sS'uta_schema' -p80 -S'uta_20180821' -p81 -sS'seqrepo_db' -p82 -S'2018-08-21' -p83 -sS'variantvalidator_version' -p84 -S'v0.2' -p85 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant258.txt b/VariantValidator/testing/testOutputsMasterITS/variant258.txt deleted file mode 100644 index 1ee1bcfb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant258.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_000334.4:c.3720+8_3720+9insA' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'NC_000017.10:g.62022711C>CT automapped to NC_000017.10:g.62022711_62022712insT' -p19 -aS'RefSeqGene record not available' -p20 -asS'refseqgene_context_intronic_sequence' -p21 -g16 -sS'alt_genomic_loci' -p22 -(lp23 -sS'transcript_description' -p24 -VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA -p25 -sS'gene_symbol' -p26 -S'SCN4A' -p27 -sS'hgvs_predicted_protein_consequence' -p28 -(dp29 -S'tlr' -p30 -S'NP_000325.4:p.?' -p31 -sS'slr' -p32 -S'NP_000325.4:p.?' -p33 -ssS'submitted_variant' -p34 -S'17-62022711-C-CT' -p35 -sS'genome_context_intronic_sequence' -p36 -S'NC_000017.10(NM_000334.4):c.3720+8_3720+9insA' -p37 -sS'hgvs_lrg_variant' -p38 -g16 -sS'hgvs_transcript_variant' -p39 -S'NM_000334.4:c.3720+8_3720+9insA' -p40 -sS'hgvs_refseqgene_variant' -p41 -g16 -sS'primary_assembly_loci' -p42 -(dp43 -S'hg19' -p44 -(dp45 -S'hgvs_genomic_description' -p46 -S'NC_000017.10:g.62022711_62022712insT' -p47 -sS'vcf' -p48 -(dp49 -S'chr' -p50 -S'chr17' -p51 -sS'ref' -p52 -S'C' -p53 -sS'pos' -p54 -S'62022711' -p55 -sS'alt' -p56 -VCT -p57 -sssS'hg38' -p58 -(dp59 -g46 -S'NC_000017.11:g.63945351_63945352insT' -p60 -sg48 -(dp61 -g50 -g51 -sg52 -g53 -sg54 -S'63945351' -p62 -sg56 -VCT -p63 -sssS'grch37' -p64 -(dp65 -g46 -S'NC_000017.10:g.62022711_62022712insT' -p66 -sg48 -(dp67 -g50 -S'17' -p68 -sg52 -g53 -sg54 -S'62022711' -p69 -sg56 -VCT -p70 -sssS'grch38' -p71 -(dp72 -g46 -S'NC_000017.11:g.63945351_63945352insT' -p73 -sg48 -(dp74 -g50 -g68 -sg52 -g53 -sg54 -S'63945351' -p75 -sg56 -VCT -p76 -ssssS'reference_sequence_records' -p77 -(dp78 -S'protein' -p79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4' -p80 -sS'transcript' -p81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4' -p82 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant259.txt b/VariantValidator/testing/testOutputsMasterITS/variant259.txt deleted file mode 100644 index 9ec16ffc..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant259.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000334.4:c.3442-8_3442-7insGC' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000017.10:g.62023005G>GGC automapped to NC_000017.10:g.62023005_62023006insGC' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA -p15 -sS'gene_symbol' -p16 -S'SCN4A' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000325.4:p.?' -p21 -sS'slr' -p22 -S'NP_000325.4:p.?' -p23 -ssS'submitted_variant' -p24 -S'17-62023005-G-GGC' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000334.4):c.3442-8_3442-7insGC' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000334.4:c.3442-8_3442-7insGC' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.62023005_62023006insGC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'G' -p43 -sS'pos' -p44 -S'62023005' -p45 -sS'alt' -p46 -VGGC -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.63945645_63945646insGC' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'63945645' -p52 -sg46 -VGGC -p53 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.62023005_62023006insGC' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -g43 -sg44 -S'62023005' -p59 -sg46 -VGGC -p60 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.63945645_63945646insGC' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'63945645' -p65 -sg46 -VGGC -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant26.txt b/VariantValidator/testing/testOutputsMasterITS/variant26.txt deleted file mode 100644 index dcc46545..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant26.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The given coordinate is outside the bounds of the reference sequence.' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NR_138595.1:n.-810C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant260.txt b/VariantValidator/testing/testOutputsMasterITS/variant260.txt deleted file mode 100644 index 7945a902..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant260.txt +++ /dev/null @@ -1,172 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_000334.4:c.3442-8G>T' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA -p24 -sS'gene_symbol' -p25 -S'SCN4A' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_000325.4:p.?' -p30 -sS'slr' -p31 -S'NP_000325.4:p.?' -p32 -ssS'submitted_variant' -p33 -S'17-62023006-C-A' -p34 -sS'genome_context_intronic_sequence' -p35 -S'NC_000017.10(NM_000334.4):c.3442-8G>T' -p36 -sS'hgvs_lrg_variant' -p37 -g16 -sS'hgvs_transcript_variant' -p38 -S'NM_000334.4:c.3442-8G>T' -p39 -sS'hgvs_refseqgene_variant' -p40 -g16 -sS'primary_assembly_loci' -p41 -(dp42 -S'hg19' -p43 -(dp44 -S'hgvs_genomic_description' -p45 -S'NC_000017.10:g.62023006C>A' -p46 -sS'vcf' -p47 -(dp48 -S'chr' -p49 -S'chr17' -p50 -sS'ref' -p51 -VC -p52 -sS'pos' -p53 -S'62023006' -p54 -sS'alt' -p55 -VA -p56 -sssS'hg38' -p57 -(dp58 -g45 -S'NC_000017.11:g.63945646C>A' -p59 -sg47 -(dp60 -g49 -g50 -sg51 -g52 -sg53 -S'63945646' -p61 -sg55 -g56 -sssS'grch37' -p62 -(dp63 -g45 -S'NC_000017.10:g.62023006C>A' -p64 -sg47 -(dp65 -g49 -S'17' -p66 -sg51 -g52 -sg53 -S'62023006' -p67 -sg55 -g56 -sssS'grch38' -p68 -(dp69 -g45 -S'NC_000017.11:g.63945646C>A' -p70 -sg47 -(dp71 -g49 -g66 -sg51 -g52 -sg53 -S'63945646' -p72 -sg55 -g56 -ssssS'reference_sequence_records' -p73 -(dp74 -S'protein' -p75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4' -p76 -sS'transcript' -p77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4' -p78 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant261.txt b/VariantValidator/testing/testOutputsMasterITS/variant261.txt deleted file mode 100644 index 9384931b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant261.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000334.4:c.2111C>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA -p14 -sS'gene_symbol' -p15 -S'SCN4A' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000325.4:p.(Thr704Met)' -p20 -sS'slr' -p21 -S'NP_000325.4:p.(T704M)' -p22 -ssS'submitted_variant' -p23 -S'17-62034787-G-A' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000334.4:c.2111C>T' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000017.10:g.62034787G>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -VG -p41 -sS'pos' -p42 -S'62034787' -p43 -sS'alt' -p44 -VA -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.63957427G>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'63957427' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000017.10:g.62034787G>A' -p53 -sg36 -(dp54 -g38 -S'17' -p55 -sg40 -g41 -sg42 -S'62034787' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000017.11:g.63957427G>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'63957427' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4' -p67 -sssS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant262.txt b/VariantValidator/testing/testOutputsMasterITS/variant262.txt deleted file mode 100644 index 517193d1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant262.txt +++ /dev/null @@ -1,914 +0,0 @@ -(dp0 -S'NM_001351443.1:c.-16+941_-16+946del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 6, mRNA -p13 -sS'gene_symbol' -p14 -S'KCTD1' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001338372.1:p.?' -p19 -sS'slr' -p20 -S'NP_001338372.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'18-24128261-GTCCTCC-G' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000018.9(NM_001351443.1):c.-16+941_-16+946del' -p25 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_001351443.1:c.-16+941_-16+946del' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'grch38' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000018.10:g.26548298_26548303del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'18' -p39 -sS'ref' -p40 -S'GTCCTCC' -p41 -sS'pos' -p42 -S'26548297' -p43 -sS'alt' -p44 -S'G' -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000018.9:g.24128262_24128267del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'GTCCTCC' -p50 -sg42 -S'24128261' -p51 -sg44 -g45 -sssS'hg38' -p52 -(dp53 -g34 -S'NC_000018.10:g.26548298_26548303del' -p54 -sg36 -(dp55 -g38 -S'chr18' -p56 -sg40 -S'GTCCTCC' -p57 -sg42 -S'26548297' -p58 -sg44 -g45 -sssS'hg19' -p59 -(dp60 -g34 -S'NC_000018.9:g.24128262_24128267del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'GTCCTCC' -p63 -sg42 -S'24128261' -p64 -sg44 -g45 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338372.1' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351443.1' -p70 -sssS'NM_001258222.1:c.10-47053_10-47048del' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p74 -aS'A more recent version of the selected reference sequence NM_001258222.1 is available (NM_001258222.2)' -p75 -aS'NM_001258222.2:c.10-47053_10-47048del MUST be fully validated prior to use in reports' -p76 -aS'select_variants=NM_001258222.2:c.10-47053_10-47048del' -p77 -aS'RefSeqGene record not available' -p78 -asg9 -g4 -sg10 -(lp79 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 5, mRNA -p80 -sg14 -S'KCTD1' -p81 -sg16 -(dp82 -g18 -S'NP_001245151.1:p.?' -p83 -sg20 -S'NP_001245151.1:p.?' -p84 -ssg22 -g23 -sg24 -S'NC_000018.9(NM_001258222.1):c.10-47053_10-47048del' -p85 -sg26 -g4 -sg27 -S'NM_001258222.1:c.10-47053_10-47048del' -p86 -sg29 -g4 -sg30 -(dp87 -S'grch38' -p88 -(dp89 -g34 -S'NC_000018.10:g.26548298_26548303del' -p90 -sg36 -(dp91 -g38 -g39 -sg40 -S'GTCCTCC' -p92 -sg42 -S'26548297' -p93 -sg44 -g45 -sssS'grch37' -p94 -(dp95 -g34 -S'NC_000018.9:g.24128262_24128267del' -p96 -sg36 -(dp97 -g38 -g39 -sg40 -S'GTCCTCC' -p98 -sg42 -S'24128261' -p99 -sg44 -g45 -sssg52 -(dp100 -g34 -S'NC_000018.10:g.26548298_26548303del' -p101 -sg36 -(dp102 -g38 -g56 -sg40 -S'GTCCTCC' -p103 -sg42 -S'26548297' -p104 -sg44 -g45 -sssS'hg19' -p105 -(dp106 -g34 -S'NC_000018.9:g.24128262_24128267del' -p107 -sg36 -(dp108 -g38 -g56 -sg40 -S'GTCCTCC' -p109 -sg42 -S'24128261' -p110 -sg44 -g45 -ssssg65 -(dp111 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1' -p112 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.1' -p113 -sssS'NM_001258221.1:c.-16+1426_-16+1431del' -p114 -(dp115 -g3 -g4 -sg5 -(lp116 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p117 -aS'RefSeqGene record not available' -p118 -asg9 -g4 -sg10 -(lp119 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 4, mRNA -p120 -sg14 -S'KCTD1' -p121 -sg16 -(dp122 -g18 -S'NP_001245150.1:p.?' -p123 -sg20 -S'NP_001245150.1:p.?' -p124 -ssg22 -g23 -sg24 -S'NC_000018.9(NM_001258221.1):c.-16+1426_-16+1431del' -p125 -sg26 -g4 -sg27 -S'NM_001258221.1:c.-16+1426_-16+1431del' -p126 -sg29 -g4 -sg30 -(dp127 -S'grch38' -p128 -(dp129 -g34 -S'NC_000018.10:g.26548298_26548303del' -p130 -sg36 -(dp131 -g38 -g39 -sg40 -S'GTCCTCC' -p132 -sg42 -S'26548297' -p133 -sg44 -g45 -sssS'grch37' -p134 -(dp135 -g34 -S'NC_000018.9:g.24128262_24128267del' -p136 -sg36 -(dp137 -g38 -g39 -sg40 -S'GTCCTCC' -p138 -sg42 -S'24128261' -p139 -sg44 -g45 -sssg52 -(dp140 -g34 -S'NC_000018.10:g.26548298_26548303del' -p141 -sg36 -(dp142 -g38 -g56 -sg40 -S'GTCCTCC' -p143 -sg42 -S'26548297' -p144 -sg44 -g45 -sssS'hg19' -p145 -(dp146 -g34 -S'NC_000018.9:g.24128262_24128267del' -p147 -sg36 -(dp148 -g38 -g56 -sg40 -S'GTCCTCC' -p149 -sg42 -S'24128261' -p150 -sg44 -g45 -ssssg65 -(dp151 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245150.1' -p152 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258221.1' -p153 -sssS'NM_001258222.2:c.10-47053_10-47048del' -p154 -(dp155 -g3 -g4 -sg5 -(lp156 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p157 -aS'RefSeqGene record not available' -p158 -asg9 -g4 -sg10 -(lp159 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 5, mRNA -p160 -sg14 -S'KCTD1' -p161 -sg16 -(dp162 -g18 -S'NP_001245151.1:p.?' -p163 -sg20 -S'NP_001245151.1:p.?' -p164 -ssg22 -g23 -sg24 -S'NC_000018.9(NM_001258222.2):c.10-47053_10-47048del' -p165 -sg26 -g4 -sg27 -S'NM_001258222.2:c.10-47053_10-47048del' -p166 -sg29 -g4 -sg30 -(dp167 -S'grch38' -p168 -(dp169 -g34 -S'NC_000018.10:g.26548298_26548303del' -p170 -sg36 -(dp171 -g38 -g39 -sg40 -S'GTCCTCC' -p172 -sg42 -S'26548297' -p173 -sg44 -g45 -sssS'grch37' -p174 -(dp175 -g34 -S'NC_000018.9:g.24128262_24128267del' -p176 -sg36 -(dp177 -g38 -g39 -sg40 -S'GTCCTCC' -p178 -sg42 -S'24128261' -p179 -sg44 -g45 -sssg52 -(dp180 -g34 -S'NC_000018.10:g.26548298_26548303del' -p181 -sg36 -(dp182 -g38 -g56 -sg40 -S'GTCCTCC' -p183 -sg42 -S'26548297' -p184 -sg44 -g45 -sssS'hg19' -p185 -(dp186 -g34 -S'NC_000018.9:g.24128262_24128267del' -p187 -sg36 -(dp188 -g38 -g56 -sg40 -S'GTCCTCC' -p189 -sg42 -S'24128261' -p190 -sg44 -g45 -ssssg65 -(dp191 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1' -p192 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.2' -p193 -sssS'flag' -p194 -S'gene_variant' -p195 -sS'NM_001136205.2:c.-16+588_-16+593del' -p196 -(dp197 -g3 -g4 -sg5 -(lp198 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p199 -aS'RefSeqGene record not available' -p200 -asg9 -g4 -sg10 -(lp201 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 1, mRNA -p202 -sg14 -S'KCTD1' -p203 -sg16 -(dp204 -g18 -S'NP_001129677.1:p.?' -p205 -sg20 -S'NP_001129677.1:p.?' -p206 -ssg22 -g23 -sg24 -S'NC_000018.9(NM_001136205.2):c.-16+588_-16+593del' -p207 -sg26 -g4 -sg27 -S'NM_001136205.2:c.-16+588_-16+593del' -p208 -sg29 -g4 -sg30 -(dp209 -S'grch38' -p210 -(dp211 -g34 -S'NC_000018.10:g.26548298_26548303del' -p212 -sg36 -(dp213 -g38 -g39 -sg40 -S'GTCCTCC' -p214 -sg42 -S'26548297' -p215 -sg44 -g45 -sssS'grch37' -p216 -(dp217 -g34 -S'NC_000018.9:g.24128262_24128267del' -p218 -sg36 -(dp219 -g38 -g39 -sg40 -S'GTCCTCC' -p220 -sg42 -S'24128261' -p221 -sg44 -g45 -sssg52 -(dp222 -g34 -S'NC_000018.10:g.26548298_26548303del' -p223 -sg36 -(dp224 -g38 -g56 -sg40 -S'GTCCTCC' -p225 -sg42 -S'26548297' -p226 -sg44 -g45 -sssS'hg19' -p227 -(dp228 -g34 -S'NC_000018.9:g.24128262_24128267del' -p229 -sg36 -(dp230 -g38 -g56 -sg40 -S'GTCCTCC' -p231 -sg42 -S'24128261' -p232 -sg44 -g45 -ssssg65 -(dp233 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129677.1' -p234 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001136205.2' -p235 -sssS'NM_198991.3:c.-15-47053_-15-47048del' -p236 -(dp237 -g3 -g4 -sg5 -(lp238 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p239 -aS'RefSeqGene record not available' -p240 -asg9 -g4 -sg10 -(lp241 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 2, mRNA -p242 -sg14 -S'KCTD1' -p243 -sg16 -(dp244 -g18 -S'NP_945342.1:p.?' -p245 -sg20 -S'NP_945342.1:p.?' -p246 -ssg22 -g23 -sg24 -S'NC_000018.9(NM_198991.3):c.-15-47053_-15-47048del' -p247 -sg26 -g4 -sg27 -S'NM_198991.3:c.-15-47053_-15-47048del' -p248 -sg29 -g4 -sg30 -(dp249 -S'grch38' -p250 -(dp251 -g34 -S'NC_000018.10:g.26548298_26548303del' -p252 -sg36 -(dp253 -g38 -g39 -sg40 -S'GTCCTCC' -p254 -sg42 -S'26548297' -p255 -sg44 -g45 -sssS'grch37' -p256 -(dp257 -g34 -S'NC_000018.9:g.24128262_24128267del' -p258 -sg36 -(dp259 -g38 -g39 -sg40 -S'GTCCTCC' -p260 -sg42 -S'24128261' -p261 -sg44 -g45 -sssg52 -(dp262 -g34 -S'NC_000018.10:g.26548298_26548303del' -p263 -sg36 -(dp264 -g38 -g56 -sg40 -S'GTCCTCC' -p265 -sg42 -S'26548297' -p266 -sg44 -g45 -sssS'hg19' -p267 -(dp268 -g34 -S'NC_000018.9:g.24128262_24128267del' -p269 -sg36 -(dp270 -g38 -g56 -sg40 -S'GTCCTCC' -p271 -sg42 -S'24128261' -p272 -sg44 -g45 -ssssg65 -(dp273 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_945342.1' -p274 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198991.3' -p275 -sssS'NM_001142730.2:c.234_239del' -p276 -(dp277 -g3 -g4 -sg5 -(lp278 -S'NC_000018.9:g.24128261GTCCTCC>G automapped to NC_000018.9:g.24128273_24128278del' -p279 -aS'RefSeqGene record not available' -p280 -asg9 -g4 -sg10 -(lp281 -sg12 -VHomo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 3, mRNA -p282 -sg14 -S'KCTD1' -p283 -sg16 -(dp284 -g18 -S'NP_001136202.1:p.(Glu78_Glu79del)' -p285 -sg20 -S'NP_001136202.1:p.(E78_E79del)' -p286 -ssg22 -g23 -sg24 -g4 -sg26 -g4 -sg27 -S'NM_001142730.2:c.234_239del' -p287 -sg29 -g4 -sg30 -(dp288 -S'grch38' -p289 -(dp290 -g34 -S'NC_000018.10:g.26548298_26548303del' -p291 -sg36 -(dp292 -g38 -g39 -sg40 -S'GTCCTCC' -p293 -sg42 -S'26548297' -p294 -sg44 -g45 -sssS'grch37' -p295 -(dp296 -g34 -S'NC_000018.9:g.24128262_24128267del' -p297 -sg36 -(dp298 -g38 -g39 -sg40 -S'GTCCTCC' -p299 -sg42 -S'24128261' -p300 -sg44 -g45 -sssg52 -(dp301 -g34 -S'NC_000018.10:g.26548298_26548303del' -p302 -sg36 -(dp303 -g38 -g56 -sg40 -S'GTCCTCC' -p304 -sg42 -S'26548297' -p305 -sg44 -g45 -sssS'hg19' -p306 -(dp307 -g34 -S'NC_000018.9:g.24128262_24128267del' -p308 -sg36 -(dp309 -g38 -g56 -sg40 -S'GTCCTCC' -p310 -sg42 -S'24128261' -p311 -sg44 -g45 -ssssg65 -(dp312 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001136202.1' -p313 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001142730.2' -p314 -sssS'metadata' -p315 -(dp316 -S'variantvalidator_hgvs_version' -p317 -S'1.1.3' -p318 -sS'uta_schema' -p319 -S'uta_20180821' -p320 -sS'seqrepo_db' -p321 -S'2018-08-21' -p322 -sS'variantvalidator_version' -p323 -S'v0.2' -p324 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant263.txt b/VariantValidator/testing/testOutputsMasterITS/variant263.txt deleted file mode 100644 index 56a73641..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant263.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_000435.2:c.2992C>T' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens notch 3 (NOTCH3), mRNA -p24 -sS'gene_symbol' -p25 -S'NOTCH3' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_000426.2:p.(Gln998Ter)' -p30 -sS'slr' -p31 -S'NP_000426.2:p.(Q998*)' -p32 -ssS'submitted_variant' -p33 -S'19-15291774-G-A' -p34 -sS'genome_context_intronic_sequence' -p35 -g16 -sS'hgvs_lrg_variant' -p36 -g16 -sS'hgvs_transcript_variant' -p37 -S'NM_000435.2:c.2992C>T' -p38 -sS'hgvs_refseqgene_variant' -p39 -g16 -sS'primary_assembly_loci' -p40 -(dp41 -S'grch38' -p42 -(dp43 -S'hgvs_genomic_description' -p44 -S'NC_000019.10:g.15180963G>A' -p45 -sS'vcf' -p46 -(dp47 -S'chr' -p48 -S'19' -p49 -sS'ref' -p50 -VG -p51 -sS'pos' -p52 -S'15180963' -p53 -sS'alt' -p54 -VA -p55 -sssS'grch37' -p56 -(dp57 -g44 -S'NC_000019.9:g.15291774G>A' -p58 -sg46 -(dp59 -g48 -g49 -sg50 -g51 -sg52 -S'15291774' -p60 -sg54 -g55 -sssS'hg38' -p61 -(dp62 -g44 -S'NC_000019.10:g.15180963G>A' -p63 -sg46 -(dp64 -g48 -S'chr19' -p65 -sg50 -g51 -sg52 -S'15180963' -p66 -sg54 -g55 -sssS'hg19' -p67 -(dp68 -g44 -S'NC_000019.9:g.15291774G>A' -p69 -sg46 -(dp70 -g48 -g65 -sg50 -g51 -sg52 -S'15291774' -p71 -sg54 -g55 -ssssS'reference_sequence_records' -p72 -(dp73 -S'protein' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000426.2' -p75 -sS'transcript' -p76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000435.2' -p77 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant264.txt b/VariantValidator/testing/testOutputsMasterITS/variant264.txt deleted file mode 100644 index d197c6d9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant264.txt +++ /dev/null @@ -1,156 +0,0 @@ -(dp0 -S'flag' -p1 -S'intergenic' -p2 -sS'Intergenic_Variant_1' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'No transcripts found that fully overlap the described variation in the genomic sequence' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -g6 -sS'gene_symbol' -p14 -g6 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -g6 -sS'slr' -p18 -g6 -ssS'submitted_variant' -p19 -S'19-15311794-A-G' -p20 -sS'genome_context_intronic_sequence' -p21 -g6 -sS'hgvs_lrg_variant' -p22 -g6 -sS'hgvs_transcript_variant' -p23 -g6 -sS'hgvs_refseqgene_variant' -p24 -g6 -sS'primary_assembly_loci' -p25 -(dp26 -S'hg19' -p27 -(dp28 -S'hgvs_genomic_description' -p29 -VNC_000019.9:g.15311794A>G -p30 -sS'vcf' -p31 -(dp32 -S'chr' -p33 -S'chr19' -p34 -sS'ref' -p35 -S'A' -p36 -sS'pos' -p37 -S'15311794' -p38 -sS'alt' -p39 -S'G' -p40 -sssS'grch37' -p41 -(dp42 -g29 -VNC_000019.9:g.15311794A>G -p43 -sg31 -(dp44 -g33 -S'19' -p45 -sg35 -g36 -sg37 -g38 -sg39 -g40 -sssS'hg38' -p46 -(dp47 -g29 -VNC_000019.10:g.15200983A>G -p48 -sg31 -(dp49 -g33 -g34 -sg35 -g36 -sg37 -S'15200983' -p50 -sg39 -g40 -sssS'grch38' -p51 -(dp52 -g29 -VNC_000019.10:g.15200983A>G -p53 -sg31 -(dp54 -g33 -g45 -sg35 -g36 -sg37 -g50 -sg39 -g40 -ssssS'reference_sequence_records' -p55 -g6 -ssS'metadata' -p56 -(dp57 -S'variantvalidator_hgvs_version' -p58 -S'1.1.3' -p59 -sS'uta_schema' -p60 -S'uta_20180821' -p61 -sS'seqrepo_db' -p62 -S'2018-08-21' -p63 -sS'variantvalidator_version' -p64 -S'v0.2' -p65 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant265.txt b/VariantValidator/testing/testOutputsMasterITS/variant265.txt deleted file mode 100644 index a7e72c16..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant265.txt +++ /dev/null @@ -1,286 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000540.2:c.14818G>A' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens ryanodine receptor 1 (RYR1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'RYR1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000531.2:p.(Ala4940Thr)' -p20 -sS'slr' -p21 -S'NP_000531.2:p.(A4940T)' -p22 -ssS'submitted_variant' -p23 -S'19-39076592-G-A' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000540.2:c.14818G>A' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'grch38' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000019.10:g.38585952G>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'19' -p39 -sS'ref' -p40 -S'G' -p41 -sS'pos' -p42 -S'38585952' -p43 -sS'alt' -p44 -S'A' -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000019.9:g.39076592G>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'39076592' -p50 -sg44 -g45 -sssS'hg38' -p51 -(dp52 -g34 -S'NC_000019.10:g.38585952G>A' -p53 -sg36 -(dp54 -g38 -S'chr19' -p55 -sg40 -g41 -sg42 -S'38585952' -p56 -sg44 -g45 -sssS'hg19' -p57 -(dp58 -g34 -S'NC_000019.9:g.39076592G>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'39076592' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000531.2' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000540.2' -p67 -sssS'NM_001042723.1:c.14803G>A' -p68 -(dp69 -g5 -g6 -sg7 -(lp70 -S'RefSeqGene record not available' -p71 -asg10 -g6 -sg11 -(lp72 -sg13 -VHomo sapiens ryanodine receptor 1 (RYR1), transcript variant 2, mRNA -p73 -sg15 -S'RYR1' -p74 -sg17 -(dp75 -g19 -S'NP_001036188.1:p.(Ala4935Thr)' -p76 -sg21 -S'NP_001036188.1:p.(A4935T)' -p77 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_001042723.1:c.14803G>A' -p78 -sg29 -g6 -sg30 -(dp79 -S'grch38' -p80 -(dp81 -g34 -S'NC_000019.10:g.38585952G>A' -p82 -sg36 -(dp83 -g38 -g39 -sg40 -g41 -sg42 -S'38585952' -p84 -sg44 -g45 -sssS'grch37' -p85 -(dp86 -g34 -S'NC_000019.9:g.39076592G>A' -p87 -sg36 -(dp88 -g38 -g39 -sg40 -g41 -sg42 -S'39076592' -p89 -sg44 -g45 -sssg51 -(dp90 -g34 -S'NC_000019.10:g.38585952G>A' -p91 -sg36 -(dp92 -g38 -g55 -sg40 -g41 -sg42 -S'38585952' -p93 -sg44 -g45 -sssS'hg19' -p94 -(dp95 -g34 -S'NC_000019.9:g.39076592G>A' -p96 -sg36 -(dp97 -g38 -g55 -sg40 -g41 -sg42 -S'39076592' -p98 -sg44 -g45 -ssssg62 -(dp99 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036188.1' -p100 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042723.1' -p101 -sssS'metadata' -p102 -(dp103 -S'variantvalidator_hgvs_version' -p104 -S'1.1.3' -p105 -sS'uta_schema' -p106 -S'uta_20180821' -p107 -sS'seqrepo_db' -p108 -S'2018-08-21' -p109 -sS'variantvalidator_version' -p110 -S'v0.2' -p111 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant266.txt b/VariantValidator/testing/testOutputsMasterITS/variant266.txt deleted file mode 100644 index fc9a1170..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant266.txt +++ /dev/null @@ -1,3076 +0,0 @@ -(dp0 -S'NM_001330086.1:c.4245A>G' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha9, mRNA -p12 -sS'gene_symbol' -p13 -S'NRXN1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001317015.1:p.(Pro1415=)' -p18 -sS'slr' -p19 -S'NP_001317015.1:p.(P1415=)' -p20 -ssS'submitted_variant' -p21 -S'2-50149352-T-C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001330086.1:c.4245A>G' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000002.11:g.50149352T>C' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -VT -p39 -sS'pos' -p40 -S'50149352' -p41 -sS'alt' -p42 -VC -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.49922214T>C' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000002.11:g.50149352T>C' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'50149352' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000002.12:g.49922214T>C' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1' -p65 -sssS'NM_001330083.1:c.4089A>G' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'RefSeqGene record not available' -p69 -asg8 -g4 -sg9 -(lp70 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha6, mRNA -p71 -sg13 -S'NRXN1' -p72 -sg15 -(dp73 -g17 -S'NP_001317012.1:p.(Pro1363=)' -p74 -sg19 -S'NP_001317012.1:p.(P1363=)' -p75 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330083.1:c.4089A>G' -p76 -sg27 -g4 -sg28 -(dp77 -S'hg19' -p78 -(dp79 -g32 -S'NC_000002.11:g.50149352T>C' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p82 -sg42 -g43 -sssg44 -(dp83 -g32 -S'NC_000002.12:g.49922214T>C' -p84 -sg34 -(dp85 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p86 -sg42 -g43 -sssS'grch37' -p87 -(dp88 -g32 -S'NC_000002.11:g.50149352T>C' -p89 -sg34 -(dp90 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p91 -sg42 -g43 -sssS'grch38' -p92 -(dp93 -g32 -S'NC_000002.12:g.49922214T>C' -p94 -sg34 -(dp95 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p96 -sg42 -g43 -ssssg60 -(dp97 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1' -p98 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1' -p99 -sssS'NM_001330095.1:c.4113A>G' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'RefSeqGene record not available' -p103 -asg8 -g4 -sg9 -(lp104 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha14, mRNA -p105 -sg13 -S'NRXN1' -p106 -sg15 -(dp107 -g17 -S'NP_001317024.1:p.(Pro1371=)' -p108 -sg19 -S'NP_001317024.1:p.(P1371=)' -p109 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330095.1:c.4113A>G' -p110 -sg27 -g4 -sg28 -(dp111 -S'hg19' -p112 -(dp113 -g32 -S'NC_000002.11:g.50149352T>C' -p114 -sg34 -(dp115 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p116 -sg42 -g43 -sssg44 -(dp117 -g32 -S'NC_000002.12:g.49922214T>C' -p118 -sg34 -(dp119 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p120 -sg42 -g43 -sssS'grch37' -p121 -(dp122 -g32 -S'NC_000002.11:g.50149352T>C' -p123 -sg34 -(dp124 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p125 -sg42 -g43 -sssS'grch38' -p126 -(dp127 -g32 -S'NC_000002.12:g.49922214T>C' -p128 -sg34 -(dp129 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p130 -sg42 -g43 -ssssg60 -(dp131 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1' -p132 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1' -p133 -sssS'NM_138735.2:c.1059A>G' -p134 -(dp135 -g3 -g4 -sg5 -(lp136 -S'A more recent version of the selected reference sequence NM_138735.2 is available (NM_138735.4)' -p137 -aS'NM_138735.4:c.1059A>G MUST be fully validated prior to use in reports' -p138 -aS'select_variants=NM_138735.4:c.1059A>G' -p139 -aS'RefSeqGene record not available' -p140 -asg8 -g4 -sg9 -(lp141 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant beta, mRNA -p142 -sg13 -S'NRXN1' -p143 -sg15 -(dp144 -g17 -S'NP_620072.1:p.(Pro353=)' -p145 -sg19 -S'NP_620072.1:p.(P353=)' -p146 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_138735.2:c.1059A>G' -p147 -sg27 -g4 -sg28 -(dp148 -S'hg19' -p149 -(dp150 -g32 -S'NC_000002.11:g.50149352T>C' -p151 -sg34 -(dp152 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p153 -sg42 -g43 -sssg44 -(dp154 -g32 -S'NC_000002.12:g.49922214T>C' -p155 -sg34 -(dp156 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p157 -sg42 -g43 -sssS'grch37' -p158 -(dp159 -g32 -S'NC_000002.11:g.50149352T>C' -p160 -sg34 -(dp161 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p162 -sg42 -g43 -sssS'grch38' -p163 -(dp164 -g32 -S'NC_000002.12:g.49922214T>C' -p165 -sg34 -(dp166 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p167 -sg42 -g43 -ssssg60 -(dp168 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1' -p169 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.2' -p170 -sssS'NM_001330078.1:c.4254A>G' -p171 -(dp172 -g3 -g4 -sg5 -(lp173 -S'RefSeqGene record not available' -p174 -asg8 -g4 -sg9 -(lp175 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha4, mRNA -p176 -sg13 -S'NRXN1' -p177 -sg15 -(dp178 -g17 -S'NP_001317007.1:p.(Pro1418=)' -p179 -sg19 -S'NP_001317007.1:p.(P1418=)' -p180 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330078.1:c.4254A>G' -p181 -sg27 -g4 -sg28 -(dp182 -S'hg19' -p183 -(dp184 -g32 -S'NC_000002.11:g.50149352T>C' -p185 -sg34 -(dp186 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p187 -sg42 -g43 -sssg44 -(dp188 -g32 -S'NC_000002.12:g.49922214T>C' -p189 -sg34 -(dp190 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p191 -sg42 -g43 -sssS'grch37' -p192 -(dp193 -g32 -S'NC_000002.11:g.50149352T>C' -p194 -sg34 -(dp195 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p196 -sg42 -g43 -sssS'grch38' -p197 -(dp198 -g32 -S'NC_000002.12:g.49922214T>C' -p199 -sg34 -(dp200 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p201 -sg42 -g43 -ssssg60 -(dp202 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1' -p203 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1' -p204 -sssS'NM_001330094.1:c.4233A>G' -p205 -(dp206 -g3 -g4 -sg5 -(lp207 -S'RefSeqGene record not available' -p208 -asg8 -g4 -sg9 -(lp209 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha13, mRNA -p210 -sg13 -S'NRXN1' -p211 -sg15 -(dp212 -g17 -S'NP_001317023.1:p.(Pro1411=)' -p213 -sg19 -S'NP_001317023.1:p.(P1411=)' -p214 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330094.1:c.4233A>G' -p215 -sg27 -g4 -sg28 -(dp216 -S'hg19' -p217 -(dp218 -g32 -S'NC_000002.11:g.50149352T>C' -p219 -sg34 -(dp220 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p221 -sg42 -g43 -sssg44 -(dp222 -g32 -S'NC_000002.12:g.49922214T>C' -p223 -sg34 -(dp224 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p225 -sg42 -g43 -sssS'grch37' -p226 -(dp227 -g32 -S'NC_000002.11:g.50149352T>C' -p228 -sg34 -(dp229 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p230 -sg42 -g43 -sssS'grch38' -p231 -(dp232 -g32 -S'NC_000002.12:g.49922214T>C' -p233 -sg34 -(dp234 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p235 -sg42 -g43 -ssssg60 -(dp236 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1' -p237 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1' -p238 -sssS'NM_001320157.3:c.150A>G' -p239 -(dp240 -g3 -g4 -sg5 -(lp241 -S'RefSeqGene record not available' -p242 -asg8 -g4 -sg9 -(lp243 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma2, mRNA -p244 -sg13 -S'NRXN1' -p245 -sg15 -(dp246 -g17 -S'NP_001307086.1:p.(Pro50=)' -p247 -sg19 -S'NP_001307086.1:p.(P50=)' -p248 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001320157.3:c.150A>G' -p249 -sg27 -g4 -sg28 -(dp250 -S'hg19' -p251 -(dp252 -g32 -S'NC_000002.11:g.50149352T>C' -p253 -sg34 -(dp254 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p255 -sg42 -g43 -sssg44 -(dp256 -g32 -S'NC_000002.12:g.49922214T>C' -p257 -sg34 -(dp258 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p259 -sg42 -g43 -sssS'grch37' -p260 -(dp261 -g32 -S'NC_000002.11:g.50149352T>C' -p262 -sg34 -(dp263 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p264 -sg42 -g43 -sssS'grch38' -p265 -(dp266 -g32 -S'NC_000002.12:g.49922214T>C' -p267 -sg34 -(dp268 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p269 -sg42 -g43 -ssssg60 -(dp270 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1' -p271 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.3' -p272 -sssS'NM_001330088.1:c.4074A>G' -p273 -(dp274 -g3 -g4 -sg5 -(lp275 -S'RefSeqGene record not available' -p276 -asg8 -g4 -sg9 -(lp277 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha11, mRNA -p278 -sg13 -S'NRXN1' -p279 -sg15 -(dp280 -g17 -S'NP_001317017.1:p.(Pro1358=)' -p281 -sg19 -S'NP_001317017.1:p.(P1358=)' -p282 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330088.1:c.4074A>G' -p283 -sg27 -g4 -sg28 -(dp284 -S'hg19' -p285 -(dp286 -g32 -S'NC_000002.11:g.50149352T>C' -p287 -sg34 -(dp288 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p289 -sg42 -g43 -sssg44 -(dp290 -g32 -S'NC_000002.12:g.49922214T>C' -p291 -sg34 -(dp292 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p293 -sg42 -g43 -sssS'grch37' -p294 -(dp295 -g32 -S'NC_000002.11:g.50149352T>C' -p296 -sg34 -(dp297 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p298 -sg42 -g43 -sssS'grch38' -p299 -(dp300 -g32 -S'NC_000002.12:g.49922214T>C' -p301 -sg34 -(dp302 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p303 -sg42 -g43 -ssssg60 -(dp304 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1' -p305 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1' -p306 -sssS'NM_001330092.1:c.1149A>G' -p307 -(dp308 -g3 -g4 -sg5 -(lp309 -S'RefSeqGene record not available' -p310 -asg8 -g4 -sg9 -(lp311 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant beta3, mRNA -p312 -sg13 -S'NRXN1' -p313 -sg15 -(dp314 -g17 -S'NP_001317021.1:p.(Pro383=)' -p315 -sg19 -S'NP_001317021.1:p.(P383=)' -p316 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330092.1:c.1149A>G' -p317 -sg27 -g4 -sg28 -(dp318 -S'hg19' -p319 -(dp320 -g32 -S'NC_000002.11:g.50149352T>C' -p321 -sg34 -(dp322 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p323 -sg42 -g43 -sssg44 -(dp324 -g32 -S'NC_000002.12:g.49922214T>C' -p325 -sg34 -(dp326 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p327 -sg42 -g43 -sssS'grch37' -p328 -(dp329 -g32 -S'NC_000002.11:g.50149352T>C' -p330 -sg34 -(dp331 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p332 -sg42 -g43 -sssS'grch38' -p333 -(dp334 -g32 -S'NC_000002.12:g.49922214T>C' -p335 -sg34 -(dp336 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p337 -sg42 -g43 -ssssg60 -(dp338 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317021.1' -p339 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330092.1' -p340 -sssS'NM_138735.4:c.1059A>G' -p341 -(dp342 -g3 -g4 -sg5 -(lp343 -S'RefSeqGene record not available' -p344 -asg8 -g4 -sg9 -(lp345 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant beta1, mRNA -p346 -sg13 -S'NRXN1' -p347 -sg15 -(dp348 -g17 -S'NP_620072.1:p.(Pro353=)' -p349 -sg19 -S'NP_620072.1:p.(P353=)' -p350 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_138735.4:c.1059A>G' -p351 -sg27 -g4 -sg28 -(dp352 -S'hg19' -p353 -(dp354 -g32 -S'NC_000002.11:g.50149352T>C' -p355 -sg34 -(dp356 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p357 -sg42 -g43 -sssg44 -(dp358 -g32 -S'NC_000002.12:g.49922214T>C' -p359 -sg34 -(dp360 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p361 -sg42 -g43 -sssS'grch37' -p362 -(dp363 -g32 -S'NC_000002.11:g.50149352T>C' -p364 -sg34 -(dp365 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p366 -sg42 -g43 -sssS'grch38' -p367 -(dp368 -g32 -S'NC_000002.12:g.49922214T>C' -p369 -sg34 -(dp370 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p371 -sg42 -g43 -ssssg60 -(dp372 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1' -p373 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.4' -p374 -sssS'NM_001330096.1:c.4044A>G' -p375 -(dp376 -g3 -g4 -sg5 -(lp377 -S'RefSeqGene record not available' -p378 -asg8 -g4 -sg9 -(lp379 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha15, mRNA -p380 -sg13 -S'NRXN1' -p381 -sg15 -(dp382 -g17 -S'NP_001317025.1:p.(Pro1348=)' -p383 -sg19 -S'NP_001317025.1:p.(P1348=)' -p384 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330096.1:c.4044A>G' -p385 -sg27 -g4 -sg28 -(dp386 -S'hg19' -p387 -(dp388 -g32 -S'NC_000002.11:g.50149352T>C' -p389 -sg34 -(dp390 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p391 -sg42 -g43 -sssg44 -(dp392 -g32 -S'NC_000002.12:g.49922214T>C' -p393 -sg34 -(dp394 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p395 -sg42 -g43 -sssS'grch37' -p396 -(dp397 -g32 -S'NC_000002.11:g.50149352T>C' -p398 -sg34 -(dp399 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p400 -sg42 -g43 -sssS'grch38' -p401 -(dp402 -g32 -S'NC_000002.12:g.49922214T>C' -p403 -sg34 -(dp404 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p405 -sg42 -g43 -ssssg60 -(dp406 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1' -p407 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1' -p408 -sssS'NM_001135659.2:c.4374A>G' -p409 -(dp410 -g3 -g4 -sg5 -(lp411 -S'RefSeqGene record not available' -p412 -asg8 -g4 -sg9 -(lp413 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA -p414 -sg13 -S'NRXN1' -p415 -sg15 -(dp416 -g17 -S'NP_001129131.1:p.(Pro1458=)' -p417 -sg19 -S'NP_001129131.1:p.(P1458=)' -p418 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001135659.2:c.4374A>G' -p419 -sg27 -g4 -sg28 -(dp420 -S'hg19' -p421 -(dp422 -g32 -S'NC_000002.11:g.50149352T>C' -p423 -sg34 -(dp424 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p425 -sg42 -g43 -sssg44 -(dp426 -g32 -S'NC_000002.12:g.49922214T>C' -p427 -sg34 -(dp428 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p429 -sg42 -g43 -sssS'grch37' -p430 -(dp431 -g32 -S'NC_000002.11:g.50149352T>C' -p432 -sg34 -(dp433 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p434 -sg42 -g43 -sssS'grch38' -p435 -(dp436 -g32 -S'NC_000002.12:g.49922214T>C' -p437 -sg34 -(dp438 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p439 -sg42 -g43 -ssssg60 -(dp440 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1' -p441 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2' -p442 -sssS'NM_001330085.1:c.4227A>G' -p443 -(dp444 -g3 -g4 -sg5 -(lp445 -S'RefSeqGene record not available' -p446 -asg8 -g4 -sg9 -(lp447 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha8, mRNA -p448 -sg13 -S'NRXN1' -p449 -sg15 -(dp450 -g17 -S'NP_001317014.1:p.(Pro1409=)' -p451 -sg19 -S'NP_001317014.1:p.(P1409=)' -p452 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330085.1:c.4227A>G' -p453 -sg27 -g4 -sg28 -(dp454 -S'hg19' -p455 -(dp456 -g32 -S'NC_000002.11:g.50149352T>C' -p457 -sg34 -(dp458 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p459 -sg42 -g43 -sssg44 -(dp460 -g32 -S'NC_000002.12:g.49922214T>C' -p461 -sg34 -(dp462 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p463 -sg42 -g43 -sssS'grch37' -p464 -(dp465 -g32 -S'NC_000002.11:g.50149352T>C' -p466 -sg34 -(dp467 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p468 -sg42 -g43 -sssS'grch38' -p469 -(dp470 -g32 -S'NC_000002.12:g.49922214T>C' -p471 -sg34 -(dp472 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p473 -sg42 -g43 -ssssg60 -(dp474 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1' -p475 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1' -p476 -sssS'metadata' -p477 -(dp478 -S'variantvalidator_hgvs_version' -p479 -S'1.1.3' -p480 -sS'uta_schema' -p481 -S'uta_20180821' -p482 -sS'seqrepo_db' -p483 -S'2018-08-21' -p484 -sS'variantvalidator_version' -p485 -S'v0.2' -p486 -ssS'NM_001320156.1:c.159A>G' -p487 -(dp488 -g3 -g4 -sg5 -(lp489 -S'A more recent version of the selected reference sequence NM_001320156.1 is available (NM_001320156.3)' -p490 -aS'NM_001320156.3:c.159A>G MUST be fully validated prior to use in reports' -p491 -aS'select_variants=NM_001320156.3:c.159A>G' -p492 -aS'RefSeqGene record not available' -p493 -asg8 -g4 -sg9 -(lp494 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma1, mRNA -p495 -sg13 -S'NRXN1' -p496 -sg15 -(dp497 -g17 -S'NP_001307085.1:p.(Pro53=)' -p498 -sg19 -S'NP_001307085.1:p.(P53=)' -p499 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001320156.1:c.159A>G' -p500 -sg27 -g4 -sg28 -(dp501 -S'hg19' -p502 -(dp503 -g32 -S'NC_000002.11:g.50149352T>C' -p504 -sg34 -(dp505 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p506 -sg42 -g43 -sssg44 -(dp507 -g32 -S'NC_000002.12:g.49922214T>C' -p508 -sg34 -(dp509 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p510 -sg42 -g43 -sssS'grch37' -p511 -(dp512 -g32 -S'NC_000002.11:g.50149352T>C' -p513 -sg34 -(dp514 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p515 -sg42 -g43 -sssS'grch38' -p516 -(dp517 -g32 -S'NC_000002.12:g.49922214T>C' -p518 -sg34 -(dp519 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p520 -sg42 -g43 -ssssg60 -(dp521 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1' -p522 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.1' -p523 -sssS'NM_001330077.1:c.4230A>G' -p524 -(dp525 -g3 -g4 -sg5 -(lp526 -S'RefSeqGene record not available' -p527 -asg8 -g4 -sg9 -(lp528 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha3, mRNA -p529 -sg13 -S'NRXN1' -p530 -sg15 -(dp531 -g17 -S'NP_001317006.1:p.(Pro1410=)' -p532 -sg19 -S'NP_001317006.1:p.(P1410=)' -p533 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330077.1:c.4230A>G' -p534 -sg27 -g4 -sg28 -(dp535 -S'hg19' -p536 -(dp537 -g32 -S'NC_000002.11:g.50149352T>C' -p538 -sg34 -(dp539 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p540 -sg42 -g43 -sssg44 -(dp541 -g32 -S'NC_000002.12:g.49922214T>C' -p542 -sg34 -(dp543 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p544 -sg42 -g43 -sssS'grch37' -p545 -(dp546 -g32 -S'NC_000002.11:g.50149352T>C' -p547 -sg34 -(dp548 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p549 -sg42 -g43 -sssS'grch38' -p550 -(dp551 -g32 -S'NC_000002.12:g.49922214T>C' -p552 -sg34 -(dp553 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p554 -sg42 -g43 -ssssg60 -(dp555 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1' -p556 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1' -p557 -sssS'NM_001330093.1:c.4251A>G' -p558 -(dp559 -g3 -g4 -sg5 -(lp560 -S'RefSeqGene record not available' -p561 -asg8 -g4 -sg9 -(lp562 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha12, mRNA -p563 -sg13 -S'NRXN1' -p564 -sg15 -(dp565 -g17 -S'NP_001317022.1:p.(Pro1417=)' -p566 -sg19 -S'NP_001317022.1:p.(P1417=)' -p567 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330093.1:c.4251A>G' -p568 -sg27 -g4 -sg28 -(dp569 -S'hg19' -p570 -(dp571 -g32 -S'NC_000002.11:g.50149352T>C' -p572 -sg34 -(dp573 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p574 -sg42 -g43 -sssg44 -(dp575 -g32 -S'NC_000002.12:g.49922214T>C' -p576 -sg34 -(dp577 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p578 -sg42 -g43 -sssS'grch37' -p579 -(dp580 -g32 -S'NC_000002.11:g.50149352T>C' -p581 -sg34 -(dp582 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p583 -sg42 -g43 -sssS'grch38' -p584 -(dp585 -g32 -S'NC_000002.12:g.49922214T>C' -p586 -sg34 -(dp587 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p588 -sg42 -g43 -ssssg60 -(dp589 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1' -p590 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1' -p591 -sssS'NM_001135659.1:c.4374A>G' -p592 -(dp593 -g3 -g4 -sg5 -(lp594 -S'A more recent version of the selected reference sequence NM_001135659.1 is available (NM_001135659.2)' -p595 -aS'NM_001135659.2:c.4374A>G MUST be fully validated prior to use in reports' -p596 -aS'select_variants=NM_001135659.2:c.4374A>G' -p597 -aS'RefSeqGene record not available' -p598 -asg8 -g4 -sg9 -(lp599 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA -p600 -sg13 -S'NRXN1' -p601 -sg15 -(dp602 -g17 -S'NP_001129131.1:p.(Pro1458=)' -p603 -sg19 -S'NP_001129131.1:p.(P1458=)' -p604 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001135659.1:c.4374A>G' -p605 -sg27 -g4 -sg28 -(dp606 -S'hg19' -p607 -(dp608 -g32 -S'NC_000002.11:g.50149352T>C' -p609 -sg34 -(dp610 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p611 -sg42 -g43 -sssg44 -(dp612 -g32 -S'NC_000002.12:g.49922214T>C' -p613 -sg34 -(dp614 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p615 -sg42 -g43 -sssS'grch37' -p616 -(dp617 -g32 -S'NC_000002.11:g.50149352T>C' -p618 -sg34 -(dp619 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p620 -sg42 -g43 -sssS'grch38' -p621 -(dp622 -g32 -S'NC_000002.12:g.49922214T>C' -p623 -sg34 -(dp624 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p625 -sg42 -g43 -ssssg60 -(dp626 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1' -p627 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1' -p628 -sssS'NM_001320157.1:c.150A>G' -p629 -(dp630 -g3 -g4 -sg5 -(lp631 -S'A more recent version of the selected reference sequence NM_001320157.1 is available (NM_001320157.3)' -p632 -aS'NM_001320157.3:c.150A>G MUST be fully validated prior to use in reports' -p633 -aS'select_variants=NM_001320157.3:c.150A>G' -p634 -aS'RefSeqGene record not available' -p635 -asg8 -g4 -sg9 -(lp636 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma2, mRNA -p637 -sg13 -S'NRXN1' -p638 -sg15 -(dp639 -g17 -S'NP_001307086.1:p.(Pro50=)' -p640 -sg19 -S'NP_001307086.1:p.(P50=)' -p641 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001320157.1:c.150A>G' -p642 -sg27 -g4 -sg28 -(dp643 -S'hg19' -p644 -(dp645 -g32 -S'NC_000002.11:g.50149352T>C' -p646 -sg34 -(dp647 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p648 -sg42 -g43 -sssg44 -(dp649 -g32 -S'NC_000002.12:g.49922214T>C' -p650 -sg34 -(dp651 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p652 -sg42 -g43 -sssS'grch37' -p653 -(dp654 -g32 -S'NC_000002.11:g.50149352T>C' -p655 -sg34 -(dp656 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p657 -sg42 -g43 -sssS'grch38' -p658 -(dp659 -g32 -S'NC_000002.12:g.49922214T>C' -p660 -sg34 -(dp661 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p662 -sg42 -g43 -ssssg60 -(dp663 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1' -p664 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.1' -p665 -sssS'NM_001330084.1:c.4188A>G' -p666 -(dp667 -g3 -g4 -sg5 -(lp668 -S'RefSeqGene record not available' -p669 -asg8 -g4 -sg9 -(lp670 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha7, mRNA -p671 -sg13 -S'NRXN1' -p672 -sg15 -(dp673 -g17 -S'NP_001317013.1:p.(Pro1396=)' -p674 -sg19 -S'NP_001317013.1:p.(P1396=)' -p675 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330084.1:c.4188A>G' -p676 -sg27 -g4 -sg28 -(dp677 -S'hg19' -p678 -(dp679 -g32 -S'NC_000002.11:g.50149352T>C' -p680 -sg34 -(dp681 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p682 -sg42 -g43 -sssg44 -(dp683 -g32 -S'NC_000002.12:g.49922214T>C' -p684 -sg34 -(dp685 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p686 -sg42 -g43 -sssS'grch37' -p687 -(dp688 -g32 -S'NC_000002.11:g.50149352T>C' -p689 -sg34 -(dp690 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p691 -sg42 -g43 -sssS'grch38' -p692 -(dp693 -g32 -S'NC_000002.12:g.49922214T>C' -p694 -sg34 -(dp695 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p696 -sg42 -g43 -ssssg60 -(dp697 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1' -p698 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1' -p699 -sssS'NM_004801.4:c.4164A>G' -p700 -(dp701 -g3 -g4 -sg5 -(lp702 -S'A more recent version of the selected reference sequence NM_004801.4 is available (NM_004801.5)' -p703 -aS'NM_004801.5:c.4164A>G MUST be fully validated prior to use in reports' -p704 -aS'select_variants=NM_004801.5:c.4164A>G' -p705 -aS'RefSeqGene record not available' -p706 -asg8 -g4 -sg9 -(lp707 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA -p708 -sg13 -S'NRXN1' -p709 -sg15 -(dp710 -g17 -S'NP_004792.1:p.(Pro1388=)' -p711 -sg19 -S'NP_004792.1:p.(P1388=)' -p712 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004801.4:c.4164A>G' -p713 -sg27 -g4 -sg28 -(dp714 -S'hg19' -p715 -(dp716 -g32 -S'NC_000002.11:g.50149352T>C' -p717 -sg34 -(dp718 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p719 -sg42 -g43 -sssg44 -(dp720 -g32 -S'NC_000002.12:g.49922214T>C' -p721 -sg34 -(dp722 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p723 -sg42 -g43 -sssS'grch37' -p724 -(dp725 -g32 -S'NC_000002.11:g.50149352T>C' -p726 -sg34 -(dp727 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p728 -sg42 -g43 -sssS'grch38' -p729 -(dp730 -g32 -S'NC_000002.12:g.49922214T>C' -p731 -sg34 -(dp732 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p733 -sg42 -g43 -ssssg60 -(dp734 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1' -p735 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4' -p736 -sssS'NM_001330082.1:c.4221A>G' -p737 -(dp738 -g3 -g4 -sg5 -(lp739 -S'RefSeqGene record not available' -p740 -asg8 -g4 -sg9 -(lp741 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha5, mRNA -p742 -sg13 -S'NRXN1' -p743 -sg15 -(dp744 -g17 -S'NP_001317011.1:p.(Pro1407=)' -p745 -sg19 -S'NP_001317011.1:p.(P1407=)' -p746 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330082.1:c.4221A>G' -p747 -sg27 -g4 -sg28 -(dp748 -S'hg19' -p749 -(dp750 -g32 -S'NC_000002.11:g.50149352T>C' -p751 -sg34 -(dp752 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p753 -sg42 -g43 -sssg44 -(dp754 -g32 -S'NC_000002.12:g.49922214T>C' -p755 -sg34 -(dp756 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p757 -sg42 -g43 -sssS'grch37' -p758 -(dp759 -g32 -S'NC_000002.11:g.50149352T>C' -p760 -sg34 -(dp761 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p762 -sg42 -g43 -sssS'grch38' -p763 -(dp764 -g32 -S'NC_000002.12:g.49922214T>C' -p765 -sg34 -(dp766 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p767 -sg42 -g43 -ssssg60 -(dp768 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1' -p769 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1' -p770 -sssS'flag' -p771 -S'gene_variant' -p772 -sS'NM_001330091.1:c.1140A>G' -p773 -(dp774 -g3 -g4 -sg5 -(lp775 -S'RefSeqGene record not available' -p776 -asg8 -g4 -sg9 -(lp777 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant beta2, mRNA -p778 -sg13 -S'NRXN1' -p779 -sg15 -(dp780 -g17 -S'NP_001317020.1:p.(Pro380=)' -p781 -sg19 -S'NP_001317020.1:p.(P380=)' -p782 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330091.1:c.1140A>G' -p783 -sg27 -g4 -sg28 -(dp784 -S'hg19' -p785 -(dp786 -g32 -S'NC_000002.11:g.50149352T>C' -p787 -sg34 -(dp788 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p789 -sg42 -g43 -sssg44 -(dp790 -g32 -S'NC_000002.12:g.49922214T>C' -p791 -sg34 -(dp792 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p793 -sg42 -g43 -sssS'grch37' -p794 -(dp795 -g32 -S'NC_000002.11:g.50149352T>C' -p796 -sg34 -(dp797 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p798 -sg42 -g43 -sssS'grch38' -p799 -(dp800 -g32 -S'NC_000002.12:g.49922214T>C' -p801 -sg34 -(dp802 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p803 -sg42 -g43 -ssssg60 -(dp804 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317020.1' -p805 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330091.1' -p806 -sssS'NM_001320156.3:c.159A>G' -p807 -(dp808 -g3 -g4 -sg5 -(lp809 -S'RefSeqGene record not available' -p810 -asg8 -g4 -sg9 -(lp811 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant gamma1, mRNA -p812 -sg13 -S'NRXN1' -p813 -sg15 -(dp814 -g17 -S'NP_001307085.1:p.(Pro53=)' -p815 -sg19 -S'NP_001307085.1:p.(P53=)' -p816 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001320156.3:c.159A>G' -p817 -sg27 -g4 -sg28 -(dp818 -S'hg19' -p819 -(dp820 -g32 -S'NC_000002.11:g.50149352T>C' -p821 -sg34 -(dp822 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p823 -sg42 -g43 -sssg44 -(dp824 -g32 -S'NC_000002.12:g.49922214T>C' -p825 -sg34 -(dp826 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p827 -sg42 -g43 -sssS'grch37' -p828 -(dp829 -g32 -S'NC_000002.11:g.50149352T>C' -p830 -sg34 -(dp831 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p832 -sg42 -g43 -sssS'grch38' -p833 -(dp834 -g32 -S'NC_000002.12:g.49922214T>C' -p835 -sg34 -(dp836 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p837 -sg42 -g43 -ssssg60 -(dp838 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1' -p839 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.3' -p840 -sssS'NM_001330087.1:c.4053A>G' -p841 -(dp842 -g3 -g4 -sg5 -(lp843 -S'RefSeqGene record not available' -p844 -asg8 -g4 -sg9 -(lp845 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha10, mRNA -p846 -sg13 -S'NRXN1' -p847 -sg15 -(dp848 -g17 -S'NP_001317016.1:p.(Pro1351=)' -p849 -sg19 -S'NP_001317016.1:p.(P1351=)' -p850 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330087.1:c.4053A>G' -p851 -sg27 -g4 -sg28 -(dp852 -S'hg19' -p853 -(dp854 -g32 -S'NC_000002.11:g.50149352T>C' -p855 -sg34 -(dp856 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p857 -sg42 -g43 -sssg44 -(dp858 -g32 -S'NC_000002.12:g.49922214T>C' -p859 -sg34 -(dp860 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p861 -sg42 -g43 -sssS'grch37' -p862 -(dp863 -g32 -S'NC_000002.11:g.50149352T>C' -p864 -sg34 -(dp865 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p866 -sg42 -g43 -sssS'grch38' -p867 -(dp868 -g32 -S'NC_000002.12:g.49922214T>C' -p869 -sg34 -(dp870 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p871 -sg42 -g43 -ssssg60 -(dp872 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1' -p873 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1' -p874 -sssS'NM_001330097.1:c.1050A>G' -p875 -(dp876 -g3 -g4 -sg5 -(lp877 -S'RefSeqGene record not available' -p878 -asg8 -g4 -sg9 -(lp879 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant beta4, mRNA -p880 -sg13 -S'NRXN1' -p881 -sg15 -(dp882 -g17 -S'NP_001317026.1:p.(Pro350=)' -p883 -sg19 -S'NP_001317026.1:p.(P350=)' -p884 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330097.1:c.1050A>G' -p885 -sg27 -g4 -sg28 -(dp886 -S'hg19' -p887 -(dp888 -g32 -S'NC_000002.11:g.50149352T>C' -p889 -sg34 -(dp890 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p891 -sg42 -g43 -sssg44 -(dp892 -g32 -S'NC_000002.12:g.49922214T>C' -p893 -sg34 -(dp894 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p895 -sg42 -g43 -sssS'grch37' -p896 -(dp897 -g32 -S'NC_000002.11:g.50149352T>C' -p898 -sg34 -(dp899 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p900 -sg42 -g43 -sssS'grch38' -p901 -(dp902 -g32 -S'NC_000002.12:g.49922214T>C' -p903 -sg34 -(dp904 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p905 -sg42 -g43 -ssssg60 -(dp906 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317026.1' -p907 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330097.1' -p908 -sssS'NM_004801.5:c.4164A>G' -p909 -(dp910 -g3 -g4 -sg5 -(lp911 -S'RefSeqGene record not available' -p912 -asg8 -g4 -sg9 -(lp913 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA -p914 -sg13 -S'NRXN1' -p915 -sg15 -(dp916 -g17 -S'NP_004792.1:p.(Pro1388=)' -p917 -sg19 -S'NP_004792.1:p.(P1388=)' -p918 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004801.5:c.4164A>G' -p919 -sg27 -g4 -sg28 -(dp920 -S'hg19' -p921 -(dp922 -g32 -S'NC_000002.11:g.50149352T>C' -p923 -sg34 -(dp924 -g36 -g37 -sg38 -g39 -sg40 -S'50149352' -p925 -sg42 -g43 -sssg44 -(dp926 -g32 -S'NC_000002.12:g.49922214T>C' -p927 -sg34 -(dp928 -g36 -g37 -sg38 -g39 -sg40 -S'49922214' -p929 -sg42 -g43 -sssS'grch37' -p930 -(dp931 -g32 -S'NC_000002.11:g.50149352T>C' -p932 -sg34 -(dp933 -g36 -g53 -sg38 -g39 -sg40 -S'50149352' -p934 -sg42 -g43 -sssS'grch38' -p935 -(dp936 -g32 -S'NC_000002.12:g.49922214T>C' -p937 -sg34 -(dp938 -g36 -g53 -sg38 -g39 -sg40 -S'49922214' -p939 -sg42 -g43 -ssssg60 -(dp940 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1' -p941 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5' -p942 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant267.txt b/VariantValidator/testing/testOutputsMasterITS/variant267.txt deleted file mode 100644 index 742727c3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant267.txt +++ /dev/null @@ -1,2023 +0,0 @@ -(dp0 -S'NM_001330096.1:c.1201C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha15, mRNA -p12 -sS'gene_symbol' -p13 -S'NRXN1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001317025.1:p.(Pro401Ser)' -p18 -sS'slr' -p19 -S'NP_001317025.1:p.(P401S)' -p20 -ssS'submitted_variant' -p21 -S'2-50847195-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001330096.1:c.1201C>T' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000002.11:g.50847195G>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -VG -p39 -sS'pos' -p40 -S'50847195' -p41 -sS'alt' -p42 -VA -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.50620057G>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000002.11:g.50847195G>A' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'50847195' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000002.12:g.50620057G>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1' -p65 -sssS'NM_001330084.1:c.1246C>T' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'RefSeqGene record not available' -p69 -asg8 -g4 -sg9 -(lp70 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha7, mRNA -p71 -sg13 -S'NRXN1' -p72 -sg15 -(dp73 -g17 -S'NP_001317013.1:p.(Pro416Ser)' -p74 -sg19 -S'NP_001317013.1:p.(P416S)' -p75 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330084.1:c.1246C>T' -p76 -sg27 -g4 -sg28 -(dp77 -S'hg19' -p78 -(dp79 -g32 -S'NC_000002.11:g.50847195G>A' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p82 -sg42 -g43 -sssg44 -(dp83 -g32 -S'NC_000002.12:g.50620057G>A' -p84 -sg34 -(dp85 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p86 -sg42 -g43 -sssS'grch37' -p87 -(dp88 -g32 -S'NC_000002.11:g.50847195G>A' -p89 -sg34 -(dp90 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p91 -sg42 -g43 -sssS'grch38' -p92 -(dp93 -g32 -S'NC_000002.12:g.50620057G>A' -p94 -sg34 -(dp95 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p96 -sg42 -g43 -ssssg60 -(dp97 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1' -p98 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1' -p99 -sssS'NM_001330077.1:c.1261C>T' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'RefSeqGene record not available' -p103 -asg8 -g4 -sg9 -(lp104 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha3, mRNA -p105 -sg13 -S'NRXN1' -p106 -sg15 -(dp107 -g17 -S'NP_001317006.1:p.(Pro421Ser)' -p108 -sg19 -S'NP_001317006.1:p.(P421S)' -p109 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330077.1:c.1261C>T' -p110 -sg27 -g4 -sg28 -(dp111 -S'hg19' -p112 -(dp113 -g32 -S'NC_000002.11:g.50847195G>A' -p114 -sg34 -(dp115 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p116 -sg42 -g43 -sssg44 -(dp117 -g32 -S'NC_000002.12:g.50620057G>A' -p118 -sg34 -(dp119 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p120 -sg42 -g43 -sssS'grch37' -p121 -(dp122 -g32 -S'NC_000002.11:g.50847195G>A' -p123 -sg34 -(dp124 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p125 -sg42 -g43 -sssS'grch38' -p126 -(dp127 -g32 -S'NC_000002.12:g.50620057G>A' -p128 -sg34 -(dp129 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p130 -sg42 -g43 -ssssg60 -(dp131 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1' -p132 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1' -p133 -sssS'NM_001330086.1:c.1285C>T' -p134 -(dp135 -g3 -g4 -sg5 -(lp136 -S'RefSeqGene record not available' -p137 -asg8 -g4 -sg9 -(lp138 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha9, mRNA -p139 -sg13 -S'NRXN1' -p140 -sg15 -(dp141 -g17 -S'NP_001317015.1:p.(Pro429Ser)' -p142 -sg19 -S'NP_001317015.1:p.(P429S)' -p143 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330086.1:c.1285C>T' -p144 -sg27 -g4 -sg28 -(dp145 -S'hg19' -p146 -(dp147 -g32 -S'NC_000002.11:g.50847195G>A' -p148 -sg34 -(dp149 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p150 -sg42 -g43 -sssg44 -(dp151 -g32 -S'NC_000002.12:g.50620057G>A' -p152 -sg34 -(dp153 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p154 -sg42 -g43 -sssS'grch37' -p155 -(dp156 -g32 -S'NC_000002.11:g.50847195G>A' -p157 -sg34 -(dp158 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p159 -sg42 -g43 -sssS'grch38' -p160 -(dp161 -g32 -S'NC_000002.12:g.50620057G>A' -p162 -sg34 -(dp163 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p164 -sg42 -g43 -ssssg60 -(dp165 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1' -p166 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1' -p167 -sssS'NM_001330088.1:c.1231C>T' -p168 -(dp169 -g3 -g4 -sg5 -(lp170 -S'RefSeqGene record not available' -p171 -asg8 -g4 -sg9 -(lp172 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha11, mRNA -p173 -sg13 -S'NRXN1' -p174 -sg15 -(dp175 -g17 -S'NP_001317017.1:p.(Pro411Ser)' -p176 -sg19 -S'NP_001317017.1:p.(P411S)' -p177 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330088.1:c.1231C>T' -p178 -sg27 -g4 -sg28 -(dp179 -S'hg19' -p180 -(dp181 -g32 -S'NC_000002.11:g.50847195G>A' -p182 -sg34 -(dp183 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p184 -sg42 -g43 -sssg44 -(dp185 -g32 -S'NC_000002.12:g.50620057G>A' -p186 -sg34 -(dp187 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p188 -sg42 -g43 -sssS'grch37' -p189 -(dp190 -g32 -S'NC_000002.11:g.50847195G>A' -p191 -sg34 -(dp192 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p193 -sg42 -g43 -sssS'grch38' -p194 -(dp195 -g32 -S'NC_000002.12:g.50620057G>A' -p196 -sg34 -(dp197 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p198 -sg42 -g43 -ssssg60 -(dp199 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1' -p200 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1' -p201 -sssS'NM_001330093.1:c.1282C>T' -p202 -(dp203 -g3 -g4 -sg5 -(lp204 -S'RefSeqGene record not available' -p205 -asg8 -g4 -sg9 -(lp206 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha12, mRNA -p207 -sg13 -S'NRXN1' -p208 -sg15 -(dp209 -g17 -S'NP_001317022.1:p.(Pro428Ser)' -p210 -sg19 -S'NP_001317022.1:p.(P428S)' -p211 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330093.1:c.1282C>T' -p212 -sg27 -g4 -sg28 -(dp213 -S'hg19' -p214 -(dp215 -g32 -S'NC_000002.11:g.50847195G>A' -p216 -sg34 -(dp217 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p218 -sg42 -g43 -sssg44 -(dp219 -g32 -S'NC_000002.12:g.50620057G>A' -p220 -sg34 -(dp221 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p222 -sg42 -g43 -sssS'grch37' -p223 -(dp224 -g32 -S'NC_000002.11:g.50847195G>A' -p225 -sg34 -(dp226 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p227 -sg42 -g43 -sssS'grch38' -p228 -(dp229 -g32 -S'NC_000002.12:g.50620057G>A' -p230 -sg34 -(dp231 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p232 -sg42 -g43 -ssssg60 -(dp233 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1' -p234 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1' -p235 -sssS'NM_001330087.1:c.1201C>T' -p236 -(dp237 -g3 -g4 -sg5 -(lp238 -S'RefSeqGene record not available' -p239 -asg8 -g4 -sg9 -(lp240 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha10, mRNA -p241 -sg13 -S'NRXN1' -p242 -sg15 -(dp243 -g17 -S'NP_001317016.1:p.(Pro401Ser)' -p244 -sg19 -S'NP_001317016.1:p.(P401S)' -p245 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330087.1:c.1201C>T' -p246 -sg27 -g4 -sg28 -(dp247 -S'hg19' -p248 -(dp249 -g32 -S'NC_000002.11:g.50847195G>A' -p250 -sg34 -(dp251 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p252 -sg42 -g43 -sssg44 -(dp253 -g32 -S'NC_000002.12:g.50620057G>A' -p254 -sg34 -(dp255 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p256 -sg42 -g43 -sssS'grch37' -p257 -(dp258 -g32 -S'NC_000002.11:g.50847195G>A' -p259 -sg34 -(dp260 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p261 -sg42 -g43 -sssS'grch38' -p262 -(dp263 -g32 -S'NC_000002.12:g.50620057G>A' -p264 -sg34 -(dp265 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p266 -sg42 -g43 -ssssg60 -(dp267 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1' -p268 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1' -p269 -sssS'NM_001330082.1:c.1261C>T' -p270 -(dp271 -g3 -g4 -sg5 -(lp272 -S'RefSeqGene record not available' -p273 -asg8 -g4 -sg9 -(lp274 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha5, mRNA -p275 -sg13 -S'NRXN1' -p276 -sg15 -(dp277 -g17 -S'NP_001317011.1:p.(Pro421Ser)' -p278 -sg19 -S'NP_001317011.1:p.(P421S)' -p279 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330082.1:c.1261C>T' -p280 -sg27 -g4 -sg28 -(dp281 -S'hg19' -p282 -(dp283 -g32 -S'NC_000002.11:g.50847195G>A' -p284 -sg34 -(dp285 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p286 -sg42 -g43 -sssg44 -(dp287 -g32 -S'NC_000002.12:g.50620057G>A' -p288 -sg34 -(dp289 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p290 -sg42 -g43 -sssS'grch37' -p291 -(dp292 -g32 -S'NC_000002.11:g.50847195G>A' -p293 -sg34 -(dp294 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p295 -sg42 -g43 -sssS'grch38' -p296 -(dp297 -g32 -S'NC_000002.12:g.50620057G>A' -p298 -sg34 -(dp299 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p300 -sg42 -g43 -ssssg60 -(dp301 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1' -p302 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1' -p303 -sssS'NM_001330078.1:c.1285C>T' -p304 -(dp305 -g3 -g4 -sg5 -(lp306 -S'RefSeqGene record not available' -p307 -asg8 -g4 -sg9 -(lp308 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha4, mRNA -p309 -sg13 -S'NRXN1' -p310 -sg15 -(dp311 -g17 -S'NP_001317007.1:p.(Pro429Ser)' -p312 -sg19 -S'NP_001317007.1:p.(P429S)' -p313 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330078.1:c.1285C>T' -p314 -sg27 -g4 -sg28 -(dp315 -S'hg19' -p316 -(dp317 -g32 -S'NC_000002.11:g.50847195G>A' -p318 -sg34 -(dp319 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p320 -sg42 -g43 -sssg44 -(dp321 -g32 -S'NC_000002.12:g.50620057G>A' -p322 -sg34 -(dp323 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p324 -sg42 -g43 -sssS'grch37' -p325 -(dp326 -g32 -S'NC_000002.11:g.50847195G>A' -p327 -sg34 -(dp328 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p329 -sg42 -g43 -sssS'grch38' -p330 -(dp331 -g32 -S'NC_000002.12:g.50620057G>A' -p332 -sg34 -(dp333 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p334 -sg42 -g43 -ssssg60 -(dp335 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1' -p336 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1' -p337 -sssS'NM_001330094.1:c.1273C>T' -p338 -(dp339 -g3 -g4 -sg5 -(lp340 -S'RefSeqGene record not available' -p341 -asg8 -g4 -sg9 -(lp342 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha13, mRNA -p343 -sg13 -S'NRXN1' -p344 -sg15 -(dp345 -g17 -S'NP_001317023.1:p.(Pro425Ser)' -p346 -sg19 -S'NP_001317023.1:p.(P425S)' -p347 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330094.1:c.1273C>T' -p348 -sg27 -g4 -sg28 -(dp349 -S'hg19' -p350 -(dp351 -g32 -S'NC_000002.11:g.50847195G>A' -p352 -sg34 -(dp353 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p354 -sg42 -g43 -sssg44 -(dp355 -g32 -S'NC_000002.12:g.50620057G>A' -p356 -sg34 -(dp357 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p358 -sg42 -g43 -sssS'grch37' -p359 -(dp360 -g32 -S'NC_000002.11:g.50847195G>A' -p361 -sg34 -(dp362 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p363 -sg42 -g43 -sssS'grch38' -p364 -(dp365 -g32 -S'NC_000002.12:g.50620057G>A' -p366 -sg34 -(dp367 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p368 -sg42 -g43 -ssssg60 -(dp369 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1' -p370 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1' -p371 -sssS'flag' -p372 -S'gene_variant' -p373 -sS'NM_001135659.2:c.1405C>T' -p374 -(dp375 -g3 -g4 -sg5 -(lp376 -S'RefSeqGene record not available' -p377 -asg8 -g4 -sg9 -(lp378 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA -p379 -sg13 -S'NRXN1' -p380 -sg15 -(dp381 -g17 -S'NP_001129131.1:p.(Pro469Ser)' -p382 -sg19 -S'NP_001129131.1:p.(P469S)' -p383 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001135659.2:c.1405C>T' -p384 -sg27 -g4 -sg28 -(dp385 -S'hg19' -p386 -(dp387 -g32 -S'NC_000002.11:g.50847195G>A' -p388 -sg34 -(dp389 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p390 -sg42 -g43 -sssg44 -(dp391 -g32 -S'NC_000002.12:g.50620057G>A' -p392 -sg34 -(dp393 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p394 -sg42 -g43 -sssS'grch37' -p395 -(dp396 -g32 -S'NC_000002.11:g.50847195G>A' -p397 -sg34 -(dp398 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p399 -sg42 -g43 -sssS'grch38' -p400 -(dp401 -g32 -S'NC_000002.12:g.50620057G>A' -p402 -sg34 -(dp403 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p404 -sg42 -g43 -ssssg60 -(dp405 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1' -p406 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2' -p407 -sssS'NM_001330083.1:c.1246C>T' -p408 -(dp409 -g3 -g4 -sg5 -(lp410 -S'RefSeqGene record not available' -p411 -asg8 -g4 -sg9 -(lp412 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha6, mRNA -p413 -sg13 -S'NRXN1' -p414 -sg15 -(dp415 -g17 -S'NP_001317012.1:p.(Pro416Ser)' -p416 -sg19 -S'NP_001317012.1:p.(P416S)' -p417 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330083.1:c.1246C>T' -p418 -sg27 -g4 -sg28 -(dp419 -S'hg19' -p420 -(dp421 -g32 -S'NC_000002.11:g.50847195G>A' -p422 -sg34 -(dp423 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p424 -sg42 -g43 -sssg44 -(dp425 -g32 -S'NC_000002.12:g.50620057G>A' -p426 -sg34 -(dp427 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p428 -sg42 -g43 -sssS'grch37' -p429 -(dp430 -g32 -S'NC_000002.11:g.50847195G>A' -p431 -sg34 -(dp432 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p433 -sg42 -g43 -sssS'grch38' -p434 -(dp435 -g32 -S'NC_000002.12:g.50620057G>A' -p436 -sg34 -(dp437 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p438 -sg42 -g43 -ssssg60 -(dp439 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1' -p440 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1' -p441 -sssS'NM_004801.5:c.1285C>T' -p442 -(dp443 -g3 -g4 -sg5 -(lp444 -S'RefSeqGene record not available' -p445 -asg8 -g4 -sg9 -(lp446 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA -p447 -sg13 -S'NRXN1' -p448 -sg15 -(dp449 -g17 -S'NP_004792.1:p.(Pro429Ser)' -p450 -sg19 -S'NP_004792.1:p.(P429S)' -p451 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004801.5:c.1285C>T' -p452 -sg27 -g4 -sg28 -(dp453 -S'hg19' -p454 -(dp455 -g32 -S'NC_000002.11:g.50847195G>A' -p456 -sg34 -(dp457 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p458 -sg42 -g43 -sssg44 -(dp459 -g32 -S'NC_000002.12:g.50620057G>A' -p460 -sg34 -(dp461 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p462 -sg42 -g43 -sssS'grch37' -p463 -(dp464 -g32 -S'NC_000002.11:g.50847195G>A' -p465 -sg34 -(dp466 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p467 -sg42 -g43 -sssS'grch38' -p468 -(dp469 -g32 -S'NC_000002.12:g.50620057G>A' -p470 -sg34 -(dp471 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p472 -sg42 -g43 -ssssg60 -(dp473 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1' -p474 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5' -p475 -sssS'NM_001330085.1:c.1285C>T' -p476 -(dp477 -g3 -g4 -sg5 -(lp478 -S'RefSeqGene record not available' -p479 -asg8 -g4 -sg9 -(lp480 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha8, mRNA -p481 -sg13 -S'NRXN1' -p482 -sg15 -(dp483 -g17 -S'NP_001317014.1:p.(Pro429Ser)' -p484 -sg19 -S'NP_001317014.1:p.(P429S)' -p485 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330085.1:c.1285C>T' -p486 -sg27 -g4 -sg28 -(dp487 -S'hg19' -p488 -(dp489 -g32 -S'NC_000002.11:g.50847195G>A' -p490 -sg34 -(dp491 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p492 -sg42 -g43 -sssg44 -(dp493 -g32 -S'NC_000002.12:g.50620057G>A' -p494 -sg34 -(dp495 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p496 -sg42 -g43 -sssS'grch37' -p497 -(dp498 -g32 -S'NC_000002.11:g.50847195G>A' -p499 -sg34 -(dp500 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p501 -sg42 -g43 -sssS'grch38' -p502 -(dp503 -g32 -S'NC_000002.12:g.50620057G>A' -p504 -sg34 -(dp505 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p506 -sg42 -g43 -ssssg60 -(dp507 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1' -p508 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1' -p509 -sssS'NM_001330095.1:c.1261C>T' -p510 -(dp511 -g3 -g4 -sg5 -(lp512 -S'RefSeqGene record not available' -p513 -asg8 -g4 -sg9 -(lp514 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha14, mRNA -p515 -sg13 -S'NRXN1' -p516 -sg15 -(dp517 -g17 -S'NP_001317024.1:p.(Pro421Ser)' -p518 -sg19 -S'NP_001317024.1:p.(P421S)' -p519 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001330095.1:c.1261C>T' -p520 -sg27 -g4 -sg28 -(dp521 -S'hg19' -p522 -(dp523 -g32 -S'NC_000002.11:g.50847195G>A' -p524 -sg34 -(dp525 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p526 -sg42 -g43 -sssg44 -(dp527 -g32 -S'NC_000002.12:g.50620057G>A' -p528 -sg34 -(dp529 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p530 -sg42 -g43 -sssS'grch37' -p531 -(dp532 -g32 -S'NC_000002.11:g.50847195G>A' -p533 -sg34 -(dp534 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p535 -sg42 -g43 -sssS'grch38' -p536 -(dp537 -g32 -S'NC_000002.12:g.50620057G>A' -p538 -sg34 -(dp539 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p540 -sg42 -g43 -ssssg60 -(dp541 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1' -p542 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1' -p543 -sssS'NM_004801.4:c.1285C>T' -p544 -(dp545 -g3 -g4 -sg5 -(lp546 -S'A more recent version of the selected reference sequence NM_004801.4 is available (NM_004801.5)' -p547 -aS'NM_004801.5:c.1285C>T MUST be fully validated prior to use in reports' -p548 -aS'select_variants=NM_004801.5:c.1285C>T' -p549 -aS'RefSeqGene record not available' -p550 -asg8 -g4 -sg9 -(lp551 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA -p552 -sg13 -S'NRXN1' -p553 -sg15 -(dp554 -g17 -S'NP_004792.1:p.(Pro429Ser)' -p555 -sg19 -S'NP_004792.1:p.(P429S)' -p556 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004801.4:c.1285C>T' -p557 -sg27 -g4 -sg28 -(dp558 -S'hg19' -p559 -(dp560 -g32 -S'NC_000002.11:g.50847195G>A' -p561 -sg34 -(dp562 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p563 -sg42 -g43 -sssg44 -(dp564 -g32 -S'NC_000002.12:g.50620057G>A' -p565 -sg34 -(dp566 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p567 -sg42 -g43 -sssS'grch37' -p568 -(dp569 -g32 -S'NC_000002.11:g.50847195G>A' -p570 -sg34 -(dp571 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p572 -sg42 -g43 -sssS'grch38' -p573 -(dp574 -g32 -S'NC_000002.12:g.50620057G>A' -p575 -sg34 -(dp576 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p577 -sg42 -g43 -ssssg60 -(dp578 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1' -p579 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4' -p580 -sssS'NM_001135659.1:c.1405C>T' -p581 -(dp582 -g3 -g4 -sg5 -(lp583 -S'A more recent version of the selected reference sequence NM_001135659.1 is available (NM_001135659.2)' -p584 -aS'NM_001135659.2:c.1405C>T MUST be fully validated prior to use in reports' -p585 -aS'select_variants=NM_001135659.2:c.1405C>T' -p586 -aS'RefSeqGene record not available' -p587 -asg8 -g4 -sg9 -(lp588 -sg11 -VHomo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA -p589 -sg13 -S'NRXN1' -p590 -sg15 -(dp591 -g17 -S'NP_001129131.1:p.(Pro469Ser)' -p592 -sg19 -S'NP_001129131.1:p.(P469S)' -p593 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001135659.1:c.1405C>T' -p594 -sg27 -g4 -sg28 -(dp595 -S'hg19' -p596 -(dp597 -g32 -S'NC_000002.11:g.50847195G>A' -p598 -sg34 -(dp599 -g36 -g37 -sg38 -g39 -sg40 -S'50847195' -p600 -sg42 -g43 -sssg44 -(dp601 -g32 -S'NC_000002.12:g.50620057G>A' -p602 -sg34 -(dp603 -g36 -g37 -sg38 -g39 -sg40 -S'50620057' -p604 -sg42 -g43 -sssS'grch37' -p605 -(dp606 -g32 -S'NC_000002.11:g.50847195G>A' -p607 -sg34 -(dp608 -g36 -g53 -sg38 -g39 -sg40 -S'50847195' -p609 -sg42 -g43 -sssS'grch38' -p610 -(dp611 -g32 -S'NC_000002.12:g.50620057G>A' -p612 -sg34 -(dp613 -g36 -g53 -sg38 -g39 -sg40 -S'50620057' -p614 -sg42 -g43 -ssssg60 -(dp615 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1' -p616 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1' -p617 -sssS'metadata' -p618 -(dp619 -S'variantvalidator_hgvs_version' -p620 -S'1.1.3' -p621 -sS'uta_schema' -p622 -S'uta_20180821' -p623 -sS'seqrepo_db' -p624 -S'2018-08-21' -p625 -sS'variantvalidator_version' -p626 -S'v0.2' -p627 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant268.txt b/VariantValidator/testing/testOutputsMasterITS/variant268.txt deleted file mode 100644 index 31ebad6d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant268.txt +++ /dev/null @@ -1,1666 +0,0 @@ -(dp0 -S'NM_001130986.1:c.3585C>G' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens dysferlin (DYSF), transcript variant 3, mRNA -p12 -sS'gene_symbol' -p13 -S'DYSF' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001124458.1:p.(Ile1195Met)' -p18 -sS'slr' -p19 -S'NP_001124458.1:p.(I1195M)' -p20 -ssS'submitted_variant' -p21 -S'2-71825797-C-G' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001130986.1:c.3585C>G' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000002.11:g.71825797C>G' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'C' -p39 -sS'pos' -p40 -S'71825797' -p41 -sS'alt' -p42 -S'G' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.71598667C>G' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000002.11:g.71825797C>G' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'71825797' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000002.12:g.71598667C>G' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124458.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130986.1' -p65 -sssS'NM_001130976.1:c.3582C>G' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'RefSeqGene record not available' -p69 -asg8 -g4 -sg9 -(lp70 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 9, mRNA -p71 -sg13 -S'DYSF' -p72 -sg15 -(dp73 -g17 -S'NP_001124448.1:p.(Ile1194Met)' -p74 -sg19 -S'NP_001124448.1:p.(I1194M)' -p75 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130976.1:c.3582C>G' -p76 -sg27 -g4 -sg28 -(dp77 -S'hg19' -p78 -(dp79 -g32 -S'NC_000002.11:g.71825797C>G' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p82 -sg42 -g43 -sssg44 -(dp83 -g32 -S'NC_000002.12:g.71598667C>G' -p84 -sg34 -(dp85 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p86 -sg42 -g43 -sssS'grch37' -p87 -(dp88 -g32 -S'NC_000002.11:g.71825797C>G' -p89 -sg34 -(dp90 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p91 -sg42 -g43 -sssS'grch38' -p92 -(dp93 -g32 -S'NC_000002.12:g.71598667C>G' -p94 -sg34 -(dp95 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p96 -sg42 -g43 -ssssg60 -(dp97 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124448.1' -p98 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130976.1' -p99 -sssS'NM_001130981.1:c.3675C>G' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'RefSeqGene record not available' -p103 -asg8 -g4 -sg9 -(lp104 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 14, mRNA -p105 -sg13 -S'DYSF' -p106 -sg15 -(dp107 -g17 -S'NP_001124453.1:p.(Ile1225Met)' -p108 -sg19 -S'NP_001124453.1:p.(I1225M)' -p109 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130981.1:c.3675C>G' -p110 -sg27 -g4 -sg28 -(dp111 -S'hg19' -p112 -(dp113 -g32 -S'NC_000002.11:g.71825797C>G' -p114 -sg34 -(dp115 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p116 -sg42 -g43 -sssg44 -(dp117 -g32 -S'NC_000002.12:g.71598667C>G' -p118 -sg34 -(dp119 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p120 -sg42 -g43 -sssS'grch37' -p121 -(dp122 -g32 -S'NC_000002.11:g.71825797C>G' -p123 -sg34 -(dp124 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p125 -sg42 -g43 -sssS'grch38' -p126 -(dp127 -g32 -S'NC_000002.12:g.71598667C>G' -p128 -sg34 -(dp129 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p130 -sg42 -g43 -ssssg60 -(dp131 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124453.1' -p132 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130981.1' -p133 -sssS'NM_003494.3:c.3624C>G' -p134 -(dp135 -g3 -g4 -sg5 -(lp136 -S'RefSeqGene record not available' -p137 -asg8 -g4 -sg9 -(lp138 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 8, mRNA -p139 -sg13 -S'DYSF' -p140 -sg15 -(dp141 -g17 -S'NP_003485.1:p.(Ile1208Met)' -p142 -sg19 -S'NP_003485.1:p.(I1208M)' -p143 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_003494.3:c.3624C>G' -p144 -sg27 -g4 -sg28 -(dp145 -S'hg19' -p146 -(dp147 -g32 -S'NC_000002.11:g.71825797C>G' -p148 -sg34 -(dp149 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p150 -sg42 -g43 -sssg44 -(dp151 -g32 -S'NC_000002.12:g.71598667C>G' -p152 -sg34 -(dp153 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p154 -sg42 -g43 -sssS'grch37' -p155 -(dp156 -g32 -S'NC_000002.11:g.71825797C>G' -p157 -sg34 -(dp158 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p159 -sg42 -g43 -sssS'grch38' -p160 -(dp161 -g32 -S'NC_000002.12:g.71598667C>G' -p162 -sg34 -(dp163 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p164 -sg42 -g43 -ssssg60 -(dp165 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003485.1' -p166 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003494.3' -p167 -sssS'NM_001130985.1:c.3678C>G' -p168 -(dp169 -g3 -g4 -sg5 -(lp170 -S'RefSeqGene record not available' -p171 -asg8 -g4 -sg9 -(lp172 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 4, mRNA -p173 -sg13 -S'DYSF' -p174 -sg15 -(dp175 -g17 -S'NP_001124457.1:p.(Ile1226Met)' -p176 -sg19 -S'NP_001124457.1:p.(I1226M)' -p177 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130985.1:c.3678C>G' -p178 -sg27 -g4 -sg28 -(dp179 -S'hg19' -p180 -(dp181 -g32 -S'NC_000002.11:g.71825797C>G' -p182 -sg34 -(dp183 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p184 -sg42 -g43 -sssg44 -(dp185 -g32 -S'NC_000002.12:g.71598667C>G' -p186 -sg34 -(dp187 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p188 -sg42 -g43 -sssS'grch37' -p189 -(dp190 -g32 -S'NC_000002.11:g.71825797C>G' -p191 -sg34 -(dp192 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p193 -sg42 -g43 -sssS'grch38' -p194 -(dp195 -g32 -S'NC_000002.12:g.71598667C>G' -p196 -sg34 -(dp197 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p198 -sg42 -g43 -ssssg60 -(dp199 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124457.1' -p200 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130985.1' -p201 -sssS'NM_001130983.1:c.3627C>G' -p202 -(dp203 -g3 -g4 -sg5 -(lp204 -S'RefSeqGene record not available' -p205 -asg8 -g4 -sg9 -(lp206 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 6, mRNA -p207 -sg13 -S'DYSF' -p208 -sg15 -(dp209 -g17 -S'NP_001124455.1:p.(Ile1209Met)' -p210 -sg19 -S'NP_001124455.1:p.(I1209M)' -p211 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130983.1:c.3627C>G' -p212 -sg27 -g4 -sg28 -(dp213 -S'hg19' -p214 -(dp215 -g32 -S'NC_000002.11:g.71825797C>G' -p216 -sg34 -(dp217 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p218 -sg42 -g43 -sssg44 -(dp219 -g32 -S'NC_000002.12:g.71598667C>G' -p220 -sg34 -(dp221 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p222 -sg42 -g43 -sssS'grch37' -p223 -(dp224 -g32 -S'NC_000002.11:g.71825797C>G' -p225 -sg34 -(dp226 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p227 -sg42 -g43 -sssS'grch38' -p228 -(dp229 -g32 -S'NC_000002.12:g.71598667C>G' -p230 -sg34 -(dp231 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p232 -sg42 -g43 -ssssg60 -(dp233 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124455.1' -p234 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130983.1' -p235 -sssS'NM_001130987.1:c.3678C>G' -p236 -(dp237 -g3 -g4 -sg5 -(lp238 -S'RefSeqGene record not available' -p239 -asg8 -g4 -sg9 -(lp240 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 1, mRNA -p241 -sg13 -S'DYSF' -p242 -sg15 -(dp243 -g17 -S'NP_001124459.1:p.(Ile1226Met)' -p244 -sg19 -S'NP_001124459.1:p.(I1226M)' -p245 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130987.1:c.3678C>G' -p246 -sg27 -g4 -sg28 -(dp247 -S'hg19' -p248 -(dp249 -g32 -S'NC_000002.11:g.71825797C>G' -p250 -sg34 -(dp251 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p252 -sg42 -g43 -sssg44 -(dp253 -g32 -S'NC_000002.12:g.71598667C>G' -p254 -sg34 -(dp255 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p256 -sg42 -g43 -sssS'grch37' -p257 -(dp258 -g32 -S'NC_000002.11:g.71825797C>G' -p259 -sg34 -(dp260 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p261 -sg42 -g43 -sssS'grch38' -p262 -(dp263 -g32 -S'NC_000002.12:g.71598667C>G' -p264 -sg34 -(dp265 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p266 -sg42 -g43 -ssssg60 -(dp267 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124459.1' -p268 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130987.1' -p269 -sssS'flag' -p270 -S'gene_variant' -p271 -sS'NM_001130980.1:c.3675C>G' -p272 -(dp273 -g3 -g4 -sg5 -(lp274 -S'RefSeqGene record not available' -p275 -asg8 -g4 -sg9 -(lp276 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 13, mRNA -p277 -sg13 -S'DYSF' -p278 -sg15 -(dp279 -g17 -S'NP_001124452.1:p.(Ile1225Met)' -p280 -sg19 -S'NP_001124452.1:p.(I1225M)' -p281 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130980.1:c.3675C>G' -p282 -sg27 -g4 -sg28 -(dp283 -S'hg19' -p284 -(dp285 -g32 -S'NC_000002.11:g.71825797C>G' -p286 -sg34 -(dp287 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p288 -sg42 -g43 -sssg44 -(dp289 -g32 -S'NC_000002.12:g.71598667C>G' -p290 -sg34 -(dp291 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p292 -sg42 -g43 -sssS'grch37' -p293 -(dp294 -g32 -S'NC_000002.11:g.71825797C>G' -p295 -sg34 -(dp296 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p297 -sg42 -g43 -sssS'grch38' -p298 -(dp299 -g32 -S'NC_000002.12:g.71598667C>G' -p300 -sg34 -(dp301 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p302 -sg42 -g43 -ssssg60 -(dp303 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124452.1' -p304 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130980.1' -p305 -sssS'NM_001130979.1:c.3717C>G' -p306 -(dp307 -g3 -g4 -sg5 -(lp308 -S'RefSeqGene record not available' -p309 -asg8 -g4 -sg9 -(lp310 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 12, mRNA -p311 -sg13 -S'DYSF' -p312 -sg15 -(dp313 -g17 -S'NP_001124451.1:p.(Ile1239Met)' -p314 -sg19 -S'NP_001124451.1:p.(I1239M)' -p315 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130979.1:c.3717C>G' -p316 -sg27 -g4 -sg28 -(dp317 -S'hg19' -p318 -(dp319 -g32 -S'NC_000002.11:g.71825797C>G' -p320 -sg34 -(dp321 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p322 -sg42 -g43 -sssg44 -(dp323 -g32 -S'NC_000002.12:g.71598667C>G' -p324 -sg34 -(dp325 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p326 -sg42 -g43 -sssS'grch37' -p327 -(dp328 -g32 -S'NC_000002.11:g.71825797C>G' -p329 -sg34 -(dp330 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p331 -sg42 -g43 -sssS'grch38' -p332 -(dp333 -g32 -S'NC_000002.12:g.71598667C>G' -p334 -sg34 -(dp335 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p336 -sg42 -g43 -ssssg60 -(dp337 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124451.1' -p338 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130979.1' -p339 -sssS'NM_001130984.1:c.3585C>G' -p340 -(dp341 -g3 -g4 -sg5 -(lp342 -S'RefSeqGene record not available' -p343 -asg8 -g4 -sg9 -(lp344 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 5, mRNA -p345 -sg13 -S'DYSF' -p346 -sg15 -(dp347 -g17 -S'NP_001124456.1:p.(Ile1195Met)' -p348 -sg19 -S'NP_001124456.1:p.(I1195M)' -p349 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130984.1:c.3585C>G' -p350 -sg27 -g4 -sg28 -(dp351 -S'hg19' -p352 -(dp353 -g32 -S'NC_000002.11:g.71825797C>G' -p354 -sg34 -(dp355 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p356 -sg42 -g43 -sssg44 -(dp357 -g32 -S'NC_000002.12:g.71598667C>G' -p358 -sg34 -(dp359 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p360 -sg42 -g43 -sssS'grch37' -p361 -(dp362 -g32 -S'NC_000002.11:g.71825797C>G' -p363 -sg34 -(dp364 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p365 -sg42 -g43 -sssS'grch38' -p366 -(dp367 -g32 -S'NC_000002.12:g.71598667C>G' -p368 -sg34 -(dp369 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p370 -sg42 -g43 -ssssg60 -(dp371 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124456.1' -p372 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130984.1' -p373 -sssS'NM_001130977.1:c.3582C>G' -p374 -(dp375 -g3 -g4 -sg5 -(lp376 -S'RefSeqGene record not available' -p377 -asg8 -g4 -sg9 -(lp378 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 10, mRNA -p379 -sg13 -S'DYSF' -p380 -sg15 -(dp381 -g17 -S'NP_001124449.1:p.(Ile1194Met)' -p382 -sg19 -S'NP_001124449.1:p.(I1194M)' -p383 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130977.1:c.3582C>G' -p384 -sg27 -g4 -sg28 -(dp385 -S'hg19' -p386 -(dp387 -g32 -S'NC_000002.11:g.71825797C>G' -p388 -sg34 -(dp389 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p390 -sg42 -g43 -sssg44 -(dp391 -g32 -S'NC_000002.12:g.71598667C>G' -p392 -sg34 -(dp393 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p394 -sg42 -g43 -sssS'grch37' -p395 -(dp396 -g32 -S'NC_000002.11:g.71825797C>G' -p397 -sg34 -(dp398 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p399 -sg42 -g43 -sssS'grch38' -p400 -(dp401 -g32 -S'NC_000002.12:g.71598667C>G' -p402 -sg34 -(dp403 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p404 -sg42 -g43 -ssssg60 -(dp405 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124449.1' -p406 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130977.1' -p407 -sssS'NM_001130455.1:c.3627C>G' -p408 -(dp409 -g3 -g4 -sg5 -(lp410 -S'RefSeqGene record not available' -p411 -asg8 -g4 -sg9 -(lp412 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 2, mRNA -p413 -sg13 -S'DYSF' -p414 -sg15 -(dp415 -g17 -S'NP_001123927.1:p.(Ile1209Met)' -p416 -sg19 -S'NP_001123927.1:p.(I1209M)' -p417 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130455.1:c.3627C>G' -p418 -sg27 -g4 -sg28 -(dp419 -S'hg19' -p420 -(dp421 -g32 -S'NC_000002.11:g.71825797C>G' -p422 -sg34 -(dp423 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p424 -sg42 -g43 -sssg44 -(dp425 -g32 -S'NC_000002.12:g.71598667C>G' -p426 -sg34 -(dp427 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p428 -sg42 -g43 -sssS'grch37' -p429 -(dp430 -g32 -S'NC_000002.11:g.71825797C>G' -p431 -sg34 -(dp432 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p433 -sg42 -g43 -sssS'grch38' -p434 -(dp435 -g32 -S'NC_000002.12:g.71598667C>G' -p436 -sg34 -(dp437 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p438 -sg42 -g43 -ssssg60 -(dp439 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001123927.1' -p440 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130455.1' -p441 -sssS'metadata' -p442 -(dp443 -S'variantvalidator_hgvs_version' -p444 -S'1.1.3' -p445 -sS'uta_schema' -p446 -S'uta_20180821' -p447 -sS'seqrepo_db' -p448 -S'2018-08-21' -p449 -sS'variantvalidator_version' -p450 -S'v0.2' -p451 -ssS'NM_001130982.1:c.3720C>G' -p452 -(dp453 -g3 -g4 -sg5 -(lp454 -S'RefSeqGene record not available' -p455 -asg8 -g4 -sg9 -(lp456 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 7, mRNA -p457 -sg13 -S'DYSF' -p458 -sg15 -(dp459 -g17 -S'NP_001124454.1:p.(Ile1240Met)' -p460 -sg19 -S'NP_001124454.1:p.(I1240M)' -p461 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130982.1:c.3720C>G' -p462 -sg27 -g4 -sg28 -(dp463 -S'hg19' -p464 -(dp465 -g32 -S'NC_000002.11:g.71825797C>G' -p466 -sg34 -(dp467 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p468 -sg42 -g43 -sssg44 -(dp469 -g32 -S'NC_000002.12:g.71598667C>G' -p470 -sg34 -(dp471 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p472 -sg42 -g43 -sssS'grch37' -p473 -(dp474 -g32 -S'NC_000002.11:g.71825797C>G' -p475 -sg34 -(dp476 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p477 -sg42 -g43 -sssS'grch38' -p478 -(dp479 -g32 -S'NC_000002.12:g.71598667C>G' -p480 -sg34 -(dp481 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p482 -sg42 -g43 -ssssg60 -(dp483 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124454.1' -p484 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130982.1' -p485 -sssS'NM_001130978.1:c.3624C>G' -p486 -(dp487 -g3 -g4 -sg5 -(lp488 -S'RefSeqGene record not available' -p489 -asg8 -g4 -sg9 -(lp490 -sg11 -VHomo sapiens dysferlin (DYSF), transcript variant 11, mRNA -p491 -sg13 -S'DYSF' -p492 -sg15 -(dp493 -g17 -S'NP_001124450.1:p.(Ile1208Met)' -p494 -sg19 -S'NP_001124450.1:p.(I1208M)' -p495 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001130978.1:c.3624C>G' -p496 -sg27 -g4 -sg28 -(dp497 -S'hg19' -p498 -(dp499 -g32 -S'NC_000002.11:g.71825797C>G' -p500 -sg34 -(dp501 -g36 -g37 -sg38 -g39 -sg40 -S'71825797' -p502 -sg42 -g43 -sssg44 -(dp503 -g32 -S'NC_000002.12:g.71598667C>G' -p504 -sg34 -(dp505 -g36 -g37 -sg38 -g39 -sg40 -S'71598667' -p506 -sg42 -g43 -sssS'grch37' -p507 -(dp508 -g32 -S'NC_000002.11:g.71825797C>G' -p509 -sg34 -(dp510 -g36 -g53 -sg38 -g39 -sg40 -S'71825797' -p511 -sg42 -g43 -sssS'grch38' -p512 -(dp513 -g32 -S'NC_000002.12:g.71598667C>G' -p514 -sg34 -(dp515 -g36 -g53 -sg38 -g39 -sg40 -S'71598667' -p516 -sg42 -g43 -ssssg60 -(dp517 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124450.1' -p518 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130978.1' -p519 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant269.txt b/VariantValidator/testing/testOutputsMasterITS/variant269.txt deleted file mode 100644 index 44271a5c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant269.txt +++ /dev/null @@ -1,401 +0,0 @@ -(dp0 -S'NM_021007.2:c.1718G>C' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'SCN2A' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_066287.2:p.(Ser573Thr)' -p18 -sS'slr' -p19 -S'NP_066287.2:p.(S573T)' -p20 -ssS'submitted_variant' -p21 -S'2-166179712-G-C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_021007.2:c.1718G>C' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000002.11:g.166179712G>C' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'G' -p39 -sS'pos' -p40 -S'166179712' -p41 -sS'alt' -p42 -S'C' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.165323202G>C' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'165323202' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000002.11:g.166179712G>C' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'166179712' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000002.12:g.165323202G>C' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'165323202' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ssS'NM_001040143.1:c.1718G>C' -p78 -(dp79 -g3 -g4 -sg5 -(lp80 -S'RefSeqGene record not available' -p81 -asg8 -g4 -sg9 -(lp82 -sg11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 3, mRNA -p83 -sg13 -S'SCN2A' -p84 -sg15 -(dp85 -g17 -S'NP_001035233.1:p.(Ser573Thr)' -p86 -sg19 -S'NP_001035233.1:p.(S573T)' -p87 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001040143.1:c.1718G>C' -p88 -sg27 -g4 -sg28 -(dp89 -S'hg19' -p90 -(dp91 -g32 -S'NC_000002.11:g.166179712G>C' -p92 -sg34 -(dp93 -g36 -g37 -sg38 -g39 -sg40 -S'166179712' -p94 -sg42 -g43 -sssg44 -(dp95 -g32 -S'NC_000002.12:g.165323202G>C' -p96 -sg34 -(dp97 -g36 -g37 -sg38 -g39 -sg40 -S'165323202' -p98 -sg42 -g43 -sssS'grch37' -p99 -(dp100 -g32 -S'NC_000002.11:g.166179712G>C' -p101 -sg34 -(dp102 -g36 -g53 -sg38 -g39 -sg40 -S'166179712' -p103 -sg42 -g43 -sssS'grch38' -p104 -(dp105 -g32 -S'NC_000002.12:g.165323202G>C' -p106 -sg34 -(dp107 -g36 -g53 -sg38 -g39 -sg40 -S'165323202' -p108 -sg42 -g43 -ssssg60 -(dp109 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1' -p110 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1' -p111 -sssS'NM_001040142.1:c.1718G>C' -p112 -(dp113 -g3 -g4 -sg5 -(lp114 -S'RefSeqGene record not available' -p115 -asg8 -g4 -sg9 -(lp116 -sg11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 2, mRNA -p117 -sg13 -S'SCN2A' -p118 -sg15 -(dp119 -g17 -S'NP_001035232.1:p.(Ser573Thr)' -p120 -sg19 -S'NP_001035232.1:p.(S573T)' -p121 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001040142.1:c.1718G>C' -p122 -sg27 -g4 -sg28 -(dp123 -S'hg19' -p124 -(dp125 -g32 -S'NC_000002.11:g.166179712G>C' -p126 -sg34 -(dp127 -g36 -g37 -sg38 -g39 -sg40 -S'166179712' -p128 -sg42 -g43 -sssg44 -(dp129 -g32 -S'NC_000002.12:g.165323202G>C' -p130 -sg34 -(dp131 -g36 -g37 -sg38 -g39 -sg40 -S'165323202' -p132 -sg42 -g43 -sssS'grch37' -p133 -(dp134 -g32 -S'NC_000002.11:g.166179712G>C' -p135 -sg34 -(dp136 -g36 -g53 -sg38 -g39 -sg40 -S'166179712' -p137 -sg42 -g43 -sssS'grch38' -p138 -(dp139 -g32 -S'NC_000002.12:g.165323202G>C' -p140 -sg34 -(dp141 -g36 -g53 -sg38 -g39 -sg40 -S'165323202' -p142 -sg42 -g43 -ssssg60 -(dp143 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035232.1' -p144 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040142.1' -p145 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant27.txt b/VariantValidator/testing/testOutputsMasterITS/variant27.txt deleted file mode 100644 index da9bb27b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant27.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use NC_000014.8:g.36989536G>A' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NR_138595.1:n.1-810C>T' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant270.txt b/VariantValidator/testing/testOutputsMasterITS/variant270.txt deleted file mode 100644 index 5a0d9e7b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant270.txt +++ /dev/null @@ -1,401 +0,0 @@ -(dp0 -S'NM_021007.2:c.2026A>G' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'SCN2A' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_066287.2:p.(Thr676Ala)' -p18 -sS'slr' -p19 -S'NP_066287.2:p.(T676A)' -p20 -ssS'submitted_variant' -p21 -S'2-166183371-A-G' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_021007.2:c.2026A>G' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000002.11:g.166183371A>G' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'A' -p39 -sS'pos' -p40 -S'166183371' -p41 -sS'alt' -p42 -S'G' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.165326861A>G' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'165326861' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000002.11:g.166183371A>G' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'166183371' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000002.12:g.165326861A>G' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'165326861' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'NM_001040143.1:c.2026A>G' -p68 -(dp69 -g3 -g4 -sg5 -(lp70 -S'RefSeqGene record not available' -p71 -asg8 -g4 -sg9 -(lp72 -sg11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 3, mRNA -p73 -sg13 -S'SCN2A' -p74 -sg15 -(dp75 -g17 -S'NP_001035233.1:p.(Thr676Ala)' -p76 -sg19 -S'NP_001035233.1:p.(T676A)' -p77 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001040143.1:c.2026A>G' -p78 -sg27 -g4 -sg28 -(dp79 -S'hg19' -p80 -(dp81 -g32 -S'NC_000002.11:g.166183371A>G' -p82 -sg34 -(dp83 -g36 -g37 -sg38 -g39 -sg40 -S'166183371' -p84 -sg42 -g43 -sssg44 -(dp85 -g32 -S'NC_000002.12:g.165326861A>G' -p86 -sg34 -(dp87 -g36 -g37 -sg38 -g39 -sg40 -S'165326861' -p88 -sg42 -g43 -sssS'grch37' -p89 -(dp90 -g32 -S'NC_000002.11:g.166183371A>G' -p91 -sg34 -(dp92 -g36 -g53 -sg38 -g39 -sg40 -S'166183371' -p93 -sg42 -g43 -sssS'grch38' -p94 -(dp95 -g32 -S'NC_000002.12:g.165326861A>G' -p96 -sg34 -(dp97 -g36 -g53 -sg38 -g39 -sg40 -S'165326861' -p98 -sg42 -g43 -ssssg60 -(dp99 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1' -p100 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1' -p101 -sssS'NM_001040142.1:c.2026A>G' -p102 -(dp103 -g3 -g4 -sg5 -(lp104 -S'RefSeqGene record not available' -p105 -asg8 -g4 -sg9 -(lp106 -sg11 -VHomo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 2, mRNA -p107 -sg13 -S'SCN2A' -p108 -sg15 -(dp109 -g17 -S'NP_001035232.1:p.(Thr676Ala)' -p110 -sg19 -S'NP_001035232.1:p.(T676A)' -p111 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001040142.1:c.2026A>G' -p112 -sg27 -g4 -sg28 -(dp113 -S'hg19' -p114 -(dp115 -g32 -S'NC_000002.11:g.166183371A>G' -p116 -sg34 -(dp117 -g36 -g37 -sg38 -g39 -sg40 -S'166183371' -p118 -sg42 -g43 -sssg44 -(dp119 -g32 -S'NC_000002.12:g.165326861A>G' -p120 -sg34 -(dp121 -g36 -g37 -sg38 -g39 -sg40 -S'165326861' -p122 -sg42 -g43 -sssS'grch37' -p123 -(dp124 -g32 -S'NC_000002.11:g.166183371A>G' -p125 -sg34 -(dp126 -g36 -g53 -sg38 -g39 -sg40 -S'166183371' -p127 -sg42 -g43 -sssS'grch38' -p128 -(dp129 -g32 -S'NC_000002.12:g.165326861A>G' -p130 -sg34 -(dp131 -g36 -g53 -sg38 -g39 -sg40 -S'165326861' -p132 -sg42 -g43 -ssssg60 -(dp133 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035232.1' -p134 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040142.1' -p135 -sssS'metadata' -p136 -(dp137 -S'variantvalidator_hgvs_version' -p138 -S'1.1.3' -p139 -sS'uta_schema' -p140 -S'uta_20180821' -p141 -sS'seqrepo_db' -p142 -S'2018-08-21' -p143 -sS'variantvalidator_version' -p144 -S'v0.2' -p145 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant271.txt b/VariantValidator/testing/testOutputsMasterITS/variant271.txt deleted file mode 100644 index 636623b9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant271.txt +++ /dev/null @@ -1,2534 +0,0 @@ -(dp0 -S'NM_001353951.1:c.233_242delinsGT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 8, mRNA -p13 -sS'gene_symbol' -p14 -S'SCN1A' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001340880.1:p.(Glu78GlyfsTer7)' -p19 -sS'slr' -p20 -S'NP_001340880.1:p.(E78Gfs*7)' -p21 -ssS'submitted_variant' -p22 -S'2-166929889-GTCCAGGTCCT-GAC' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001353951.1:c.233_242delinsGT' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr2' -p38 -sS'ref' -p39 -S'TCCAGGTCCT' -p40 -sS'pos' -p41 -S'166929890' -p42 -sS'alt' -p43 -VAC -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p49 -sg41 -S'166073380' -p50 -sg43 -VAC -p51 -sssS'grch37' -p52 -(dp53 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p54 -sg35 -(dp55 -g37 -S'2' -p56 -sg39 -S'TCCAGGTCCT' -p57 -sg41 -S'166929890' -p58 -sg43 -g44 -sssS'grch38' -p59 -(dp60 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p61 -sg35 -(dp62 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p63 -sg41 -S'166073380' -p64 -sg43 -g51 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1' -p70 -sssS'NM_001353958.1:c.233_242delinsGT' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p74 -aS'RefSeqGene record not available' -p75 -asg9 -g4 -sg10 -(lp76 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 13, mRNA -p77 -sg14 -S'SCN1A' -p78 -sg16 -(dp79 -g18 -S'NP_001340887.1:p.(Glu78GlyfsTer7)' -p80 -sg20 -S'NP_001340887.1:p.(E78Gfs*7)' -p81 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353958.1:c.233_242delinsGT' -p82 -sg28 -g4 -sg29 -(dp83 -S'hg19' -p84 -(dp85 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p86 -sg35 -(dp87 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p88 -sg41 -S'166929890' -p89 -sg43 -VAC -p90 -sssg45 -(dp91 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p92 -sg35 -(dp93 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p94 -sg41 -S'166073380' -p95 -sg43 -VAC -p96 -sssS'grch37' -p97 -(dp98 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p99 -sg35 -(dp100 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p101 -sg41 -S'166929890' -p102 -sg43 -g90 -sssS'grch38' -p103 -(dp104 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p105 -sg35 -(dp106 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p107 -sg41 -S'166073380' -p108 -sg43 -g96 -ssssg65 -(dp109 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1' -p110 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1' -p111 -sssS'NM_001202435.1:c.233_242delinsGT' -p112 -(dp113 -g3 -g4 -sg5 -(lp114 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p115 -aS'A more recent version of the selected reference sequence NM_001202435.1 is available (NM_001202435.2)' -p116 -aS'NM_001202435.2:c.233_242delinsGT MUST be fully validated prior to use in reports' -p117 -aS'select_variants=NM_001202435.2:c.233_242delinsGT' -p118 -aS'RefSeqGene record not available' -p119 -asg9 -g4 -sg10 -(lp120 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA -p121 -sg14 -S'SCN1A' -p122 -sg16 -(dp123 -g18 -S'NP_001189364.1:p.(Glu78GlyfsTer7)' -p124 -sg20 -S'NP_001189364.1:p.(E78Gfs*7)' -p125 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001202435.1:c.233_242delinsGT' -p126 -sg28 -g4 -sg29 -(dp127 -S'hg19' -p128 -(dp129 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p130 -sg35 -(dp131 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p132 -sg41 -S'166929890' -p133 -sg43 -VAC -p134 -sssg45 -(dp135 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p136 -sg35 -(dp137 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p138 -sg41 -S'166073380' -p139 -sg43 -VAC -p140 -sssS'grch37' -p141 -(dp142 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p143 -sg35 -(dp144 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p145 -sg41 -S'166929890' -p146 -sg43 -g134 -sssS'grch38' -p147 -(dp148 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p149 -sg35 -(dp150 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p151 -sg41 -S'166073380' -p152 -sg43 -g140 -ssssg65 -(dp153 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1' -p154 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1' -p155 -sssS'NR_148667.1:n.638_647delinsGT' -p156 -(dp157 -g3 -g4 -sg5 -(lp158 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p159 -aS'RefSeqGene record not available' -p160 -asg9 -g4 -sg10 -(lp161 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 16, non-coding RNA -p162 -sg14 -S'SCN1A' -p163 -sg16 -(dp164 -g18 -S'Non-coding :n.' -p165 -sg20 -g165 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NR_148667.1:n.638_647delinsGT' -p166 -sg28 -g4 -sg29 -(dp167 -S'hg19' -p168 -(dp169 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p170 -sg35 -(dp171 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p172 -sg41 -S'166929890' -p173 -sg43 -VAC -p174 -sssg45 -(dp175 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p176 -sg35 -(dp177 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p178 -sg41 -S'166073380' -p179 -sg43 -VAC -p180 -sssS'grch37' -p181 -(dp182 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p183 -sg35 -(dp184 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p185 -sg41 -S'166929890' -p186 -sg43 -g174 -sssS'grch38' -p187 -(dp188 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p189 -sg35 -(dp190 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p191 -sg41 -S'166073380' -p192 -sg43 -g180 -ssssg65 -(dp193 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1' -p194 -sssS'NM_001165964.1:c.233_242delinsGT' -p195 -(dp196 -g3 -g4 -sg5 -(lp197 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p198 -aS'A more recent version of the selected reference sequence NM_001165964.1 is available (NM_001165964.2)' -p199 -aS'NM_001165964.2:c.233_242delinsGT MUST be fully validated prior to use in reports' -p200 -aS'select_variants=NM_001165964.2:c.233_242delinsGT' -p201 -aS'RefSeqGene record not available' -p202 -asg9 -g4 -sg10 -(lp203 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA -p204 -sg14 -S'SCN1A' -p205 -sg16 -(dp206 -g18 -S'NP_001159436.1:p.(Glu78GlyfsTer7)' -p207 -sg20 -S'NP_001159436.1:p.(E78Gfs*7)' -p208 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165964.1:c.233_242delinsGT' -p209 -sg28 -g4 -sg29 -(dp210 -S'hg19' -p211 -(dp212 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p213 -sg35 -(dp214 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p215 -sg41 -S'166929890' -p216 -sg43 -VAC -p217 -sssg45 -(dp218 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p219 -sg35 -(dp220 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p221 -sg41 -S'166073380' -p222 -sg43 -VAC -p223 -sssS'grch37' -p224 -(dp225 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p226 -sg35 -(dp227 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p228 -sg41 -S'166929890' -p229 -sg43 -g217 -sssS'grch38' -p230 -(dp231 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p232 -sg35 -(dp233 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p234 -sg41 -S'166073380' -p235 -sg43 -g223 -ssssg65 -(dp236 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1' -p237 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1' -p238 -sssS'NM_001202435.2:c.233_242delinsGT' -p239 -(dp240 -g3 -g4 -sg5 -(lp241 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p242 -aS'RefSeqGene record not available' -p243 -asg9 -g4 -sg10 -(lp244 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA -p245 -sg14 -S'SCN1A' -p246 -sg16 -(dp247 -g18 -S'NP_001189364.1:p.(Glu78GlyfsTer7)' -p248 -sg20 -S'NP_001189364.1:p.(E78Gfs*7)' -p249 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001202435.2:c.233_242delinsGT' -p250 -sg28 -g4 -sg29 -(dp251 -S'hg19' -p252 -(dp253 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p254 -sg35 -(dp255 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p256 -sg41 -S'166929890' -p257 -sg43 -VAC -p258 -sssg45 -(dp259 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p260 -sg35 -(dp261 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p262 -sg41 -S'166073380' -p263 -sg43 -VAC -p264 -sssS'grch37' -p265 -(dp266 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p267 -sg35 -(dp268 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p269 -sg41 -S'166929890' -p270 -sg43 -g258 -sssS'grch38' -p271 -(dp272 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p273 -sg35 -(dp274 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p275 -sg41 -S'166073380' -p276 -sg43 -g264 -ssssg65 -(dp277 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1' -p278 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2' -p279 -sssS'NM_006920.5:c.233_242delinsGT' -p280 -(dp281 -g3 -g4 -sg5 -(lp282 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p283 -aS'RefSeqGene record not available' -p284 -asg9 -g4 -sg10 -(lp285 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA -p286 -sg14 -S'SCN1A' -p287 -sg16 -(dp288 -g18 -S'NP_008851.3:p.(Glu78GlyfsTer7)' -p289 -sg20 -S'NP_008851.3:p.(E78Gfs*7)' -p290 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_006920.5:c.233_242delinsGT' -p291 -sg28 -g4 -sg29 -(dp292 -S'hg19' -p293 -(dp294 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p295 -sg35 -(dp296 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p297 -sg41 -S'166929890' -p298 -sg43 -VAC -p299 -sssg45 -(dp300 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p301 -sg35 -(dp302 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p303 -sg41 -S'166073380' -p304 -sg43 -VAC -p305 -sssS'grch37' -p306 -(dp307 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p308 -sg35 -(dp309 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p310 -sg41 -S'166929890' -p311 -sg43 -g299 -sssS'grch38' -p312 -(dp313 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p314 -sg35 -(dp315 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p316 -sg41 -S'166073380' -p317 -sg43 -g305 -ssssg65 -(dp318 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3' -p319 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5' -p320 -sssS'NM_001165963.1:c.233_242delinsGT' -p321 -(dp322 -g3 -g4 -sg5 -(lp323 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p324 -aS'A more recent version of the selected reference sequence NM_001165963.1 is available (NM_001165963.2)' -p325 -aS'NM_001165963.2:c.233_242delinsGT MUST be fully validated prior to use in reports' -p326 -aS'select_variants=NM_001165963.2:c.233_242delinsGT' -p327 -aS'RefSeqGene record not available' -p328 -asg9 -g4 -sg10 -(lp329 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA -p330 -sg14 -S'SCN1A' -p331 -sg16 -(dp332 -g18 -S'NP_001159435.1:p.(Glu78GlyfsTer7)' -p333 -sg20 -S'NP_001159435.1:p.(E78Gfs*7)' -p334 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165963.1:c.233_242delinsGT' -p335 -sg28 -g4 -sg29 -(dp336 -S'hg19' -p337 -(dp338 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p339 -sg35 -(dp340 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p341 -sg41 -S'166929890' -p342 -sg43 -VAC -p343 -sssg45 -(dp344 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p345 -sg35 -(dp346 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p347 -sg41 -S'166073380' -p348 -sg43 -VAC -p349 -sssS'grch37' -p350 -(dp351 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p352 -sg35 -(dp353 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p354 -sg41 -S'166929890' -p355 -sg43 -g343 -sssS'grch38' -p356 -(dp357 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p358 -sg35 -(dp359 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p360 -sg41 -S'166073380' -p361 -sg43 -g349 -ssssg65 -(dp362 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1' -p363 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1' -p364 -sssS'NM_001353955.1:c.233_242delinsGT' -p365 -(dp366 -g3 -g4 -sg5 -(lp367 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p368 -aS'RefSeqGene record not available' -p369 -asg9 -g4 -sg10 -(lp370 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 11, mRNA -p371 -sg14 -S'SCN1A' -p372 -sg16 -(dp373 -g18 -S'NP_001340884.1:p.(Glu78GlyfsTer7)' -p374 -sg20 -S'NP_001340884.1:p.(E78Gfs*7)' -p375 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353955.1:c.233_242delinsGT' -p376 -sg28 -g4 -sg29 -(dp377 -S'hg19' -p378 -(dp379 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p380 -sg35 -(dp381 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p382 -sg41 -S'166929890' -p383 -sg43 -VAC -p384 -sssg45 -(dp385 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p386 -sg35 -(dp387 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p388 -sg41 -S'166073380' -p389 -sg43 -VAC -p390 -sssS'grch37' -p391 -(dp392 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p393 -sg35 -(dp394 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p395 -sg41 -S'166929890' -p396 -sg43 -g384 -sssS'grch38' -p397 -(dp398 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p399 -sg35 -(dp400 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p401 -sg41 -S'166073380' -p402 -sg43 -g390 -ssssg65 -(dp403 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1' -p404 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1' -p405 -sssS'NM_001353961.1:c.-2193_-2184delinsGT' -p406 -(dp407 -g3 -g4 -sg5 -(lp408 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p409 -aS'RefSeqGene record not available' -p410 -asg9 -g4 -sg10 -(lp411 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 15, mRNA -p412 -sg14 -S'SCN1A' -p413 -sg16 -(dp414 -g18 -S'NP_001340890.1:p.?' -p415 -sg20 -S'NP_001340890.1:p.?' -p416 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353961.1:c.-2193_-2184delinsGT' -p417 -sg28 -g4 -sg29 -(dp418 -S'hg19' -p419 -(dp420 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p421 -sg35 -(dp422 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p423 -sg41 -S'166929890' -p424 -sg43 -VAC -p425 -sssg45 -(dp426 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p427 -sg35 -(dp428 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p429 -sg41 -S'166073380' -p430 -sg43 -VAC -p431 -sssS'grch37' -p432 -(dp433 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p434 -sg35 -(dp435 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p436 -sg41 -S'166929890' -p437 -sg43 -g425 -sssS'grch38' -p438 -(dp439 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p440 -sg35 -(dp441 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p442 -sg41 -S'166073380' -p443 -sg43 -g431 -ssssg65 -(dp444 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1' -p445 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1' -p446 -sssS'metadata' -p447 -(dp448 -S'variantvalidator_hgvs_version' -p449 -S'1.1.3' -p450 -sS'uta_schema' -p451 -S'uta_20180821' -p452 -sS'seqrepo_db' -p453 -S'2018-08-21' -p454 -sS'variantvalidator_version' -p455 -S'v0.2' -p456 -ssS'NM_001165963.2:c.233_242delinsGT' -p457 -(dp458 -g3 -g4 -sg5 -(lp459 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p460 -aS'RefSeqGene record not available' -p461 -asg9 -g4 -sg10 -(lp462 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA -p463 -sg14 -S'SCN1A' -p464 -sg16 -(dp465 -g18 -S'NP_001159435.1:p.(Glu78GlyfsTer7)' -p466 -sg20 -S'NP_001159435.1:p.(E78Gfs*7)' -p467 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165963.2:c.233_242delinsGT' -p468 -sg28 -g4 -sg29 -(dp469 -S'hg19' -p470 -(dp471 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p472 -sg35 -(dp473 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p474 -sg41 -S'166929890' -p475 -sg43 -VAC -p476 -sssg45 -(dp477 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p478 -sg35 -(dp479 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p480 -sg41 -S'166073380' -p481 -sg43 -VAC -p482 -sssS'grch37' -p483 -(dp484 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p485 -sg35 -(dp486 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p487 -sg41 -S'166929890' -p488 -sg43 -g476 -sssS'grch38' -p489 -(dp490 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p491 -sg35 -(dp492 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p493 -sg41 -S'166073380' -p494 -sg43 -g482 -ssssg65 -(dp495 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1' -p496 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2' -p497 -sssS'NM_001353950.1:c.233_242delinsGT' -p498 -(dp499 -g3 -g4 -sg5 -(lp500 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p501 -aS'RefSeqGene record not available' -p502 -asg9 -g4 -sg10 -(lp503 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 7, mRNA -p504 -sg14 -S'SCN1A' -p505 -sg16 -(dp506 -g18 -S'NP_001340879.1:p.(Glu78GlyfsTer7)' -p507 -sg20 -S'NP_001340879.1:p.(E78Gfs*7)' -p508 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353950.1:c.233_242delinsGT' -p509 -sg28 -g4 -sg29 -(dp510 -S'hg19' -p511 -(dp512 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p513 -sg35 -(dp514 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p515 -sg41 -S'166929890' -p516 -sg43 -VAC -p517 -sssg45 -(dp518 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p519 -sg35 -(dp520 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p521 -sg41 -S'166073380' -p522 -sg43 -VAC -p523 -sssS'grch37' -p524 -(dp525 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p526 -sg35 -(dp527 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p528 -sg41 -S'166929890' -p529 -sg43 -g517 -sssS'grch38' -p530 -(dp531 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p532 -sg35 -(dp533 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p534 -sg41 -S'166073380' -p535 -sg43 -g523 -ssssg65 -(dp536 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1' -p537 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1' -p538 -sssS'flag' -p539 -S'gene_variant' -p540 -sS'NM_001353948.1:c.233_242delinsGT' -p541 -(dp542 -g3 -g4 -sg5 -(lp543 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p544 -aS'RefSeqGene record not available' -p545 -asg9 -g4 -sg10 -(lp546 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 5, mRNA -p547 -sg14 -S'SCN1A' -p548 -sg16 -(dp549 -g18 -S'NP_001340877.1:p.(Glu78GlyfsTer7)' -p550 -sg20 -S'NP_001340877.1:p.(E78Gfs*7)' -p551 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353948.1:c.233_242delinsGT' -p552 -sg28 -g4 -sg29 -(dp553 -S'hg19' -p554 -(dp555 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p556 -sg35 -(dp557 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p558 -sg41 -S'166929890' -p559 -sg43 -VAC -p560 -sssg45 -(dp561 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p562 -sg35 -(dp563 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p564 -sg41 -S'166073380' -p565 -sg43 -VAC -p566 -sssS'grch37' -p567 -(dp568 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p569 -sg35 -(dp570 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p571 -sg41 -S'166929890' -p572 -sg43 -g560 -sssS'grch38' -p573 -(dp574 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p575 -sg35 -(dp576 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p577 -sg41 -S'166073380' -p578 -sg43 -g566 -ssssg65 -(dp579 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1' -p580 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1' -p581 -sssS'NM_001353949.1:c.233_242delinsGT' -p582 -(dp583 -g3 -g4 -sg5 -(lp584 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p585 -aS'RefSeqGene record not available' -p586 -asg9 -g4 -sg10 -(lp587 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 6, mRNA -p588 -sg14 -S'SCN1A' -p589 -sg16 -(dp590 -g18 -S'NP_001340878.1:p.(Glu78GlyfsTer7)' -p591 -sg20 -S'NP_001340878.1:p.(E78Gfs*7)' -p592 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353949.1:c.233_242delinsGT' -p593 -sg28 -g4 -sg29 -(dp594 -S'hg19' -p595 -(dp596 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p597 -sg35 -(dp598 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p599 -sg41 -S'166929890' -p600 -sg43 -VAC -p601 -sssg45 -(dp602 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p603 -sg35 -(dp604 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p605 -sg41 -S'166073380' -p606 -sg43 -VAC -p607 -sssS'grch37' -p608 -(dp609 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p610 -sg35 -(dp611 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p612 -sg41 -S'166929890' -p613 -sg43 -g601 -sssS'grch38' -p614 -(dp615 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p616 -sg35 -(dp617 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p618 -sg41 -S'166073380' -p619 -sg43 -g607 -ssssg65 -(dp620 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1' -p621 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1' -p622 -sssS'NM_001353957.1:c.233_242delinsGT' -p623 -(dp624 -g3 -g4 -sg5 -(lp625 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p626 -aS'RefSeqGene record not available' -p627 -asg9 -g4 -sg10 -(lp628 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 12, mRNA -p629 -sg14 -S'SCN1A' -p630 -sg16 -(dp631 -g18 -S'NP_001340886.1:p.(Glu78GlyfsTer7)' -p632 -sg20 -S'NP_001340886.1:p.(E78Gfs*7)' -p633 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353957.1:c.233_242delinsGT' -p634 -sg28 -g4 -sg29 -(dp635 -S'hg19' -p636 -(dp637 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p638 -sg35 -(dp639 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p640 -sg41 -S'166929890' -p641 -sg43 -VAC -p642 -sssg45 -(dp643 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p644 -sg35 -(dp645 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p646 -sg41 -S'166073380' -p647 -sg43 -VAC -p648 -sssS'grch37' -p649 -(dp650 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p651 -sg35 -(dp652 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p653 -sg41 -S'166929890' -p654 -sg43 -g642 -sssS'grch38' -p655 -(dp656 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p657 -sg35 -(dp658 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p659 -sg41 -S'166073380' -p660 -sg43 -g648 -ssssg65 -(dp661 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1' -p662 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1' -p663 -sssS'NM_001353952.1:c.233_242delinsGT' -p664 -(dp665 -g3 -g4 -sg5 -(lp666 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p667 -aS'RefSeqGene record not available' -p668 -asg9 -g4 -sg10 -(lp669 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 9, mRNA -p670 -sg14 -S'SCN1A' -p671 -sg16 -(dp672 -g18 -S'NP_001340881.1:p.(Glu78GlyfsTer7)' -p673 -sg20 -S'NP_001340881.1:p.(E78Gfs*7)' -p674 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353952.1:c.233_242delinsGT' -p675 -sg28 -g4 -sg29 -(dp676 -S'hg19' -p677 -(dp678 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p679 -sg35 -(dp680 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p681 -sg41 -S'166929890' -p682 -sg43 -VAC -p683 -sssg45 -(dp684 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p685 -sg35 -(dp686 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p687 -sg41 -S'166073380' -p688 -sg43 -VAC -p689 -sssS'grch37' -p690 -(dp691 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p692 -sg35 -(dp693 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p694 -sg41 -S'166929890' -p695 -sg43 -g683 -sssS'grch38' -p696 -(dp697 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p698 -sg35 -(dp699 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p700 -sg41 -S'166073380' -p701 -sg43 -g689 -ssssg65 -(dp702 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1' -p703 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1' -p704 -sssS'NM_001353954.1:c.233_242delinsGT' -p705 -(dp706 -g3 -g4 -sg5 -(lp707 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p708 -aS'RefSeqGene record not available' -p709 -asg9 -g4 -sg10 -(lp710 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 10, mRNA -p711 -sg14 -S'SCN1A' -p712 -sg16 -(dp713 -g18 -S'NP_001340883.1:p.(Glu78GlyfsTer7)' -p714 -sg20 -S'NP_001340883.1:p.(E78Gfs*7)' -p715 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353954.1:c.233_242delinsGT' -p716 -sg28 -g4 -sg29 -(dp717 -S'hg19' -p718 -(dp719 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p720 -sg35 -(dp721 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p722 -sg41 -S'166929890' -p723 -sg43 -VAC -p724 -sssg45 -(dp725 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p726 -sg35 -(dp727 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p728 -sg41 -S'166073380' -p729 -sg43 -VAC -p730 -sssS'grch37' -p731 -(dp732 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p733 -sg35 -(dp734 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p735 -sg41 -S'166929890' -p736 -sg43 -g724 -sssS'grch38' -p737 -(dp738 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p739 -sg35 -(dp740 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p741 -sg41 -S'166073380' -p742 -sg43 -g730 -ssssg65 -(dp743 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1' -p744 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1' -p745 -sssS'NM_006920.4:c.233_242delinsGT' -p746 -(dp747 -g3 -g4 -sg5 -(lp748 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p749 -aS'A more recent version of the selected reference sequence NM_006920.4 is available (NM_006920.5)' -p750 -aS'NM_006920.5:c.233_242delinsGT MUST be fully validated prior to use in reports' -p751 -aS'select_variants=NM_006920.5:c.233_242delinsGT' -p752 -aS'RefSeqGene record not available' -p753 -asg9 -g4 -sg10 -(lp754 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA -p755 -sg14 -S'SCN1A' -p756 -sg16 -(dp757 -g18 -S'NP_008851.3:p.(Glu78GlyfsTer7)' -p758 -sg20 -S'NP_008851.3:p.(E78Gfs*7)' -p759 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_006920.4:c.233_242delinsGT' -p760 -sg28 -g4 -sg29 -(dp761 -S'hg19' -p762 -(dp763 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p764 -sg35 -(dp765 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p766 -sg41 -S'166929890' -p767 -sg43 -VAC -p768 -sssg45 -(dp769 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p770 -sg35 -(dp771 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p772 -sg41 -S'166073380' -p773 -sg43 -VAC -p774 -sssS'grch37' -p775 -(dp776 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p777 -sg35 -(dp778 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p779 -sg41 -S'166929890' -p780 -sg43 -g768 -sssS'grch38' -p781 -(dp782 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p783 -sg35 -(dp784 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p785 -sg41 -S'166073380' -p786 -sg43 -g774 -ssssg65 -(dp787 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3' -p788 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4' -p789 -sssS'NM_001353960.1:c.233_242delinsGT' -p790 -(dp791 -g3 -g4 -sg5 -(lp792 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p793 -aS'RefSeqGene record not available' -p794 -asg9 -g4 -sg10 -(lp795 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 14, mRNA -p796 -sg14 -S'SCN1A' -p797 -sg16 -(dp798 -g18 -S'NP_001340889.1:p.(Glu78GlyfsTer7)' -p799 -sg20 -S'NP_001340889.1:p.(E78Gfs*7)' -p800 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001353960.1:c.233_242delinsGT' -p801 -sg28 -g4 -sg29 -(dp802 -S'hg19' -p803 -(dp804 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p805 -sg35 -(dp806 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p807 -sg41 -S'166929890' -p808 -sg43 -VAC -p809 -sssg45 -(dp810 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p811 -sg35 -(dp812 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p813 -sg41 -S'166073380' -p814 -sg43 -VAC -p815 -sssS'grch37' -p816 -(dp817 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p818 -sg35 -(dp819 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p820 -sg41 -S'166929890' -p821 -sg43 -g809 -sssS'grch38' -p822 -(dp823 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p824 -sg35 -(dp825 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p826 -sg41 -S'166073380' -p827 -sg43 -g815 -ssssg65 -(dp828 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1' -p829 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1' -p830 -sssS'NM_001165964.2:c.233_242delinsGT' -p831 -(dp832 -g3 -g4 -sg5 -(lp833 -S'NC_000002.11:g.166929889GTCCAGGTCCT>GAC automapped to NC_000002.11:g.166929890_166929899delinsAC' -p834 -aS'RefSeqGene record not available' -p835 -asg9 -g4 -sg10 -(lp836 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA -p837 -sg14 -S'SCN1A' -p838 -sg16 -(dp839 -g18 -S'NP_001159436.1:p.(Glu78GlyfsTer7)' -p840 -sg20 -S'NP_001159436.1:p.(E78Gfs*7)' -p841 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001165964.2:c.233_242delinsGT' -p842 -sg28 -g4 -sg29 -(dp843 -S'hg19' -p844 -(dp845 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p846 -sg35 -(dp847 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p848 -sg41 -S'166929890' -p849 -sg43 -VAC -p850 -sssg45 -(dp851 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p852 -sg35 -(dp853 -g37 -g38 -sg39 -S'TCCAGGTCCT' -p854 -sg41 -S'166073380' -p855 -sg43 -VAC -p856 -sssS'grch37' -p857 -(dp858 -g33 -S'NC_000002.11:g.166929890_166929899delinsAC' -p859 -sg35 -(dp860 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p861 -sg41 -S'166929890' -p862 -sg43 -g850 -sssS'grch38' -p863 -(dp864 -g33 -S'NC_000002.12:g.166073380_166073389delinsAC' -p865 -sg35 -(dp866 -g37 -g56 -sg39 -S'TCCAGGTCCT' -p867 -sg41 -S'166073380' -p868 -sg43 -g856 -ssssg65 -(dp869 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1' -p870 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2' -p871 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant272.txt b/VariantValidator/testing/testOutputsMasterITS/variant272.txt deleted file mode 100644 index 4a1c9595..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant272.txt +++ /dev/null @@ -1,2495 +0,0 @@ -(dp0 -S'NR_148667.1:n.638_645del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 16, non-coding RNA -p13 -sS'gene_symbol' -p14 -S'SCN1A' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'Non-coding :n.' -p19 -sS'slr' -p20 -g19 -ssS'submitted_variant' -p21 -S'2-166929891-CCAGGTCCT-C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NR_148667.1:n.638_645del' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000002.11:g.166929892_166929899del' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'CCAGGTCCT' -p39 -sS'pos' -p40 -S'166929891' -p41 -sS'alt' -p42 -S'C' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.166073382_166073389del' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'CCAGGTCCT' -p48 -sg40 -S'166073381' -p49 -sg42 -g43 -sssS'grch37' -p50 -(dp51 -g32 -S'NC_000002.11:g.166929892_166929899del' -p52 -sg34 -(dp53 -g36 -S'2' -p54 -sg38 -S'CCAGGTCCT' -p55 -sg40 -S'166929891' -p56 -sg42 -g43 -sssS'grch38' -p57 -(dp58 -g32 -S'NC_000002.12:g.166073382_166073389del' -p59 -sg34 -(dp60 -g36 -g54 -sg38 -S'CCAGGTCCT' -p61 -sg40 -S'166073381' -p62 -sg42 -g43 -ssssS'reference_sequence_records' -p63 -(dp64 -S'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1' -p66 -sssS'NM_001165964.2:c.233_240del' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p70 -aS'RefSeqGene record not available' -p71 -asg9 -g4 -sg10 -(lp72 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA -p73 -sg14 -S'SCN1A' -p74 -sg16 -(dp75 -g18 -S'NP_001159436.1:p.(Glu78GlyfsTer7)' -p76 -sg20 -S'NP_001159436.1:p.(E78Gfs*7)' -p77 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001165964.2:c.233_240del' -p78 -sg27 -g4 -sg28 -(dp79 -S'hg19' -p80 -(dp81 -g32 -S'NC_000002.11:g.166929892_166929899del' -p82 -sg34 -(dp83 -g36 -g37 -sg38 -S'CCAGGTCCT' -p84 -sg40 -S'166929891' -p85 -sg42 -g43 -sssg44 -(dp86 -g32 -S'NC_000002.12:g.166073382_166073389del' -p87 -sg34 -(dp88 -g36 -g37 -sg38 -S'CCAGGTCCT' -p89 -sg40 -S'166073381' -p90 -sg42 -g43 -sssS'grch37' -p91 -(dp92 -g32 -S'NC_000002.11:g.166929892_166929899del' -p93 -sg34 -(dp94 -g36 -g54 -sg38 -S'CCAGGTCCT' -p95 -sg40 -S'166929891' -p96 -sg42 -g43 -sssS'grch38' -p97 -(dp98 -g32 -S'NC_000002.12:g.166073382_166073389del' -p99 -sg34 -(dp100 -g36 -g54 -sg38 -S'CCAGGTCCT' -p101 -sg40 -S'166073381' -p102 -sg42 -g43 -ssssg63 -(dp103 -S'protein' -p104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1' -p105 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2' -p106 -sssS'NM_001353951.1:c.233_240del' -p107 -(dp108 -g3 -g4 -sg5 -(lp109 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p110 -aS'RefSeqGene record not available' -p111 -asg9 -g4 -sg10 -(lp112 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 8, mRNA -p113 -sg14 -S'SCN1A' -p114 -sg16 -(dp115 -g18 -S'NP_001340880.1:p.(Glu78GlyfsTer7)' -p116 -sg20 -S'NP_001340880.1:p.(E78Gfs*7)' -p117 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001353951.1:c.233_240del' -p118 -sg27 -g4 -sg28 -(dp119 -S'hg19' -p120 -(dp121 -g32 -S'NC_000002.11:g.166929892_166929899del' -p122 -sg34 -(dp123 -g36 -g37 -sg38 -S'CCAGGTCCT' -p124 -sg40 -S'166929891' -p125 -sg42 -g43 -sssg44 -(dp126 -g32 -S'NC_000002.12:g.166073382_166073389del' -p127 -sg34 -(dp128 -g36 -g37 -sg38 -S'CCAGGTCCT' -p129 -sg40 -S'166073381' -p130 -sg42 -g43 -sssS'grch37' -p131 -(dp132 -g32 -S'NC_000002.11:g.166929892_166929899del' -p133 -sg34 -(dp134 -g36 -g54 -sg38 -S'CCAGGTCCT' -p135 -sg40 -S'166929891' -p136 -sg42 -g43 -sssS'grch38' -p137 -(dp138 -g32 -S'NC_000002.12:g.166073382_166073389del' -p139 -sg34 -(dp140 -g36 -g54 -sg38 -S'CCAGGTCCT' -p141 -sg40 -S'166073381' -p142 -sg42 -g43 -ssssg63 -(dp143 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1' -p144 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1' -p145 -sssS'NM_001353954.1:c.233_240del' -p146 -(dp147 -g3 -g4 -sg5 -(lp148 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p149 -aS'RefSeqGene record not available' -p150 -asg9 -g4 -sg10 -(lp151 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 10, mRNA -p152 -sg14 -S'SCN1A' -p153 -sg16 -(dp154 -g18 -S'NP_001340883.1:p.(Glu78GlyfsTer7)' -p155 -sg20 -S'NP_001340883.1:p.(E78Gfs*7)' -p156 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001353954.1:c.233_240del' -p157 -sg27 -g4 -sg28 -(dp158 -S'hg19' -p159 -(dp160 -g32 -S'NC_000002.11:g.166929892_166929899del' -p161 -sg34 -(dp162 -g36 -g37 -sg38 -S'CCAGGTCCT' -p163 -sg40 -S'166929891' -p164 -sg42 -g43 -sssg44 -(dp165 -g32 -S'NC_000002.12:g.166073382_166073389del' -p166 -sg34 -(dp167 -g36 -g37 -sg38 -S'CCAGGTCCT' -p168 -sg40 -S'166073381' -p169 -sg42 -g43 -sssS'grch37' -p170 -(dp171 -g32 -S'NC_000002.11:g.166929892_166929899del' -p172 -sg34 -(dp173 -g36 -g54 -sg38 -S'CCAGGTCCT' -p174 -sg40 -S'166929891' -p175 -sg42 -g43 -sssS'grch38' -p176 -(dp177 -g32 -S'NC_000002.12:g.166073382_166073389del' -p178 -sg34 -(dp179 -g36 -g54 -sg38 -S'CCAGGTCCT' -p180 -sg40 -S'166073381' -p181 -sg42 -g43 -ssssg63 -(dp182 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1' -p183 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1' -p184 -sssS'NM_001353961.1:c.-2193_-2186del' -p185 -(dp186 -g3 -g4 -sg5 -(lp187 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p188 -aS'RefSeqGene record not available' -p189 -asg9 -g4 -sg10 -(lp190 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 15, mRNA -p191 -sg14 -S'SCN1A' -p192 -sg16 -(dp193 -g18 -S'NP_001340890.1:p.?' -p194 -sg20 -S'NP_001340890.1:p.?' -p195 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001353961.1:c.-2193_-2186del' -p196 -sg27 -g4 -sg28 -(dp197 -S'hg19' -p198 -(dp199 -g32 -S'NC_000002.11:g.166929892_166929899del' -p200 -sg34 -(dp201 -g36 -g37 -sg38 -S'CCAGGTCCT' -p202 -sg40 -S'166929891' -p203 -sg42 -g43 -sssg44 -(dp204 -g32 -S'NC_000002.12:g.166073382_166073389del' -p205 -sg34 -(dp206 -g36 -g37 -sg38 -S'CCAGGTCCT' -p207 -sg40 -S'166073381' -p208 -sg42 -g43 -sssS'grch37' -p209 -(dp210 -g32 -S'NC_000002.11:g.166929892_166929899del' -p211 -sg34 -(dp212 -g36 -g54 -sg38 -S'CCAGGTCCT' -p213 -sg40 -S'166929891' -p214 -sg42 -g43 -sssS'grch38' -p215 -(dp216 -g32 -S'NC_000002.12:g.166073382_166073389del' -p217 -sg34 -(dp218 -g36 -g54 -sg38 -S'CCAGGTCCT' -p219 -sg40 -S'166073381' -p220 -sg42 -g43 -ssssg63 -(dp221 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1' -p222 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1' -p223 -sssS'NM_001353948.1:c.233_240del' -p224 -(dp225 -g3 -g4 -sg5 -(lp226 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p227 -aS'RefSeqGene record not available' -p228 -asg9 -g4 -sg10 -(lp229 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 5, mRNA -p230 -sg14 -S'SCN1A' -p231 -sg16 -(dp232 -g18 -S'NP_001340877.1:p.(Glu78GlyfsTer7)' -p233 -sg20 -S'NP_001340877.1:p.(E78Gfs*7)' -p234 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001353948.1:c.233_240del' -p235 -sg27 -g4 -sg28 -(dp236 -S'hg19' -p237 -(dp238 -g32 -S'NC_000002.11:g.166929892_166929899del' -p239 -sg34 -(dp240 -g36 -g37 -sg38 -S'CCAGGTCCT' -p241 -sg40 -S'166929891' -p242 -sg42 -g43 -sssg44 -(dp243 -g32 -S'NC_000002.12:g.166073382_166073389del' -p244 -sg34 -(dp245 -g36 -g37 -sg38 -S'CCAGGTCCT' -p246 -sg40 -S'166073381' -p247 -sg42 -g43 -sssS'grch37' -p248 -(dp249 -g32 -S'NC_000002.11:g.166929892_166929899del' -p250 -sg34 -(dp251 -g36 -g54 -sg38 -S'CCAGGTCCT' -p252 -sg40 -S'166929891' -p253 -sg42 -g43 -sssS'grch38' -p254 -(dp255 -g32 -S'NC_000002.12:g.166073382_166073389del' -p256 -sg34 -(dp257 -g36 -g54 -sg38 -S'CCAGGTCCT' -p258 -sg40 -S'166073381' -p259 -sg42 -g43 -ssssg63 -(dp260 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1' -p261 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1' -p262 -sssS'NM_001353960.1:c.233_240del' -p263 -(dp264 -g3 -g4 -sg5 -(lp265 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p266 -aS'RefSeqGene record not available' -p267 -asg9 -g4 -sg10 -(lp268 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 14, mRNA -p269 -sg14 -S'SCN1A' -p270 -sg16 -(dp271 -g18 -S'NP_001340889.1:p.(Glu78GlyfsTer7)' -p272 -sg20 -S'NP_001340889.1:p.(E78Gfs*7)' -p273 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001353960.1:c.233_240del' -p274 -sg27 -g4 -sg28 -(dp275 -S'hg19' -p276 -(dp277 -g32 -S'NC_000002.11:g.166929892_166929899del' -p278 -sg34 -(dp279 -g36 -g37 -sg38 -S'CCAGGTCCT' -p280 -sg40 -S'166929891' -p281 -sg42 -g43 -sssg44 -(dp282 -g32 -S'NC_000002.12:g.166073382_166073389del' -p283 -sg34 -(dp284 -g36 -g37 -sg38 -S'CCAGGTCCT' -p285 -sg40 -S'166073381' -p286 -sg42 -g43 -sssS'grch37' -p287 -(dp288 -g32 -S'NC_000002.11:g.166929892_166929899del' -p289 -sg34 -(dp290 -g36 -g54 -sg38 -S'CCAGGTCCT' -p291 -sg40 -S'166929891' -p292 -sg42 -g43 -sssS'grch38' -p293 -(dp294 -g32 -S'NC_000002.12:g.166073382_166073389del' -p295 -sg34 -(dp296 -g36 -g54 -sg38 -S'CCAGGTCCT' -p297 -sg40 -S'166073381' -p298 -sg42 -g43 -ssssg63 -(dp299 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1' -p300 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1' -p301 -sssS'NM_001202435.1:c.233_240del' -p302 -(dp303 -g3 -g4 -sg5 -(lp304 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p305 -aS'A more recent version of the selected reference sequence NM_001202435.1 is available (NM_001202435.2)' -p306 -aS'NM_001202435.2:c.233_240del MUST be fully validated prior to use in reports' -p307 -aS'select_variants=NM_001202435.2:c.233_240del' -p308 -aS'RefSeqGene record not available' -p309 -asg9 -g4 -sg10 -(lp310 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA -p311 -sg14 -S'SCN1A' -p312 -sg16 -(dp313 -g18 -S'NP_001189364.1:p.(Glu78GlyfsTer7)' -p314 -sg20 -S'NP_001189364.1:p.(E78Gfs*7)' -p315 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001202435.1:c.233_240del' -p316 -sg27 -g4 -sg28 -(dp317 -S'hg19' -p318 -(dp319 -g32 -S'NC_000002.11:g.166929892_166929899del' -p320 -sg34 -(dp321 -g36 -g37 -sg38 -S'CCAGGTCCT' -p322 -sg40 -S'166929891' -p323 -sg42 -g43 -sssg44 -(dp324 -g32 -S'NC_000002.12:g.166073382_166073389del' -p325 -sg34 -(dp326 -g36 -g37 -sg38 -S'CCAGGTCCT' -p327 -sg40 -S'166073381' -p328 -sg42 -g43 -sssS'grch37' -p329 -(dp330 -g32 -S'NC_000002.11:g.166929892_166929899del' -p331 -sg34 -(dp332 -g36 -g54 -sg38 -S'CCAGGTCCT' -p333 -sg40 -S'166929891' -p334 -sg42 -g43 -sssS'grch38' -p335 -(dp336 -g32 -S'NC_000002.12:g.166073382_166073389del' -p337 -sg34 -(dp338 -g36 -g54 -sg38 -S'CCAGGTCCT' -p339 -sg40 -S'166073381' -p340 -sg42 -g43 -ssssg63 -(dp341 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1' -p342 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1' -p343 -sssS'NM_001202435.2:c.233_240del' -p344 -(dp345 -g3 -g4 -sg5 -(lp346 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p347 -aS'RefSeqGene record not available' -p348 -asg9 -g4 -sg10 -(lp349 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA -p350 -sg14 -S'SCN1A' -p351 -sg16 -(dp352 -g18 -S'NP_001189364.1:p.(Glu78GlyfsTer7)' -p353 -sg20 -S'NP_001189364.1:p.(E78Gfs*7)' -p354 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001202435.2:c.233_240del' -p355 -sg27 -g4 -sg28 -(dp356 -S'hg19' -p357 -(dp358 -g32 -S'NC_000002.11:g.166929892_166929899del' -p359 -sg34 -(dp360 -g36 -g37 -sg38 -S'CCAGGTCCT' -p361 -sg40 -S'166929891' -p362 -sg42 -g43 -sssg44 -(dp363 -g32 -S'NC_000002.12:g.166073382_166073389del' -p364 -sg34 -(dp365 -g36 -g37 -sg38 -S'CCAGGTCCT' -p366 -sg40 -S'166073381' -p367 -sg42 -g43 -sssS'grch37' -p368 -(dp369 -g32 -S'NC_000002.11:g.166929892_166929899del' -p370 -sg34 -(dp371 -g36 -g54 -sg38 -S'CCAGGTCCT' -p372 -sg40 -S'166929891' -p373 -sg42 -g43 -sssS'grch38' -p374 -(dp375 -g32 -S'NC_000002.12:g.166073382_166073389del' -p376 -sg34 -(dp377 -g36 -g54 -sg38 -S'CCAGGTCCT' -p378 -sg40 -S'166073381' -p379 -sg42 -g43 -ssssg63 -(dp380 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1' -p381 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2' -p382 -sssS'metadata' -p383 -(dp384 -S'variantvalidator_hgvs_version' -p385 -S'1.1.3' -p386 -sS'uta_schema' -p387 -S'uta_20180821' -p388 -sS'seqrepo_db' -p389 -S'2018-08-21' -p390 -sS'variantvalidator_version' -p391 -S'v0.2' -p392 -ssS'NM_006920.5:c.233_240del' -p393 -(dp394 -g3 -g4 -sg5 -(lp395 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p396 -aS'RefSeqGene record not available' -p397 -asg9 -g4 -sg10 -(lp398 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA -p399 -sg14 -S'SCN1A' -p400 -sg16 -(dp401 -g18 -S'NP_008851.3:p.(Glu78GlyfsTer7)' -p402 -sg20 -S'NP_008851.3:p.(E78Gfs*7)' -p403 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_006920.5:c.233_240del' -p404 -sg27 -g4 -sg28 -(dp405 -S'hg19' -p406 -(dp407 -g32 -S'NC_000002.11:g.166929892_166929899del' -p408 -sg34 -(dp409 -g36 -g37 -sg38 -S'CCAGGTCCT' -p410 -sg40 -S'166929891' -p411 -sg42 -g43 -sssg44 -(dp412 -g32 -S'NC_000002.12:g.166073382_166073389del' -p413 -sg34 -(dp414 -g36 -g37 -sg38 -S'CCAGGTCCT' -p415 -sg40 -S'166073381' -p416 -sg42 -g43 -sssS'grch37' -p417 -(dp418 -g32 -S'NC_000002.11:g.166929892_166929899del' -p419 -sg34 -(dp420 -g36 -g54 -sg38 -S'CCAGGTCCT' -p421 -sg40 -S'166929891' -p422 -sg42 -g43 -sssS'grch38' -p423 -(dp424 -g32 -S'NC_000002.12:g.166073382_166073389del' -p425 -sg34 -(dp426 -g36 -g54 -sg38 -S'CCAGGTCCT' -p427 -sg40 -S'166073381' -p428 -sg42 -g43 -ssssg63 -(dp429 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3' -p430 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5' -p431 -sssS'NM_001353955.1:c.233_240del' -p432 -(dp433 -g3 -g4 -sg5 -(lp434 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p435 -aS'RefSeqGene record not available' -p436 -asg9 -g4 -sg10 -(lp437 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 11, mRNA -p438 -sg14 -S'SCN1A' -p439 -sg16 -(dp440 -g18 -S'NP_001340884.1:p.(Glu78GlyfsTer7)' -p441 -sg20 -S'NP_001340884.1:p.(E78Gfs*7)' -p442 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001353955.1:c.233_240del' -p443 -sg27 -g4 -sg28 -(dp444 -S'hg19' -p445 -(dp446 -g32 -S'NC_000002.11:g.166929892_166929899del' -p447 -sg34 -(dp448 -g36 -g37 -sg38 -S'CCAGGTCCT' -p449 -sg40 -S'166929891' -p450 -sg42 -g43 -sssg44 -(dp451 -g32 -S'NC_000002.12:g.166073382_166073389del' -p452 -sg34 -(dp453 -g36 -g37 -sg38 -S'CCAGGTCCT' -p454 -sg40 -S'166073381' -p455 -sg42 -g43 -sssS'grch37' -p456 -(dp457 -g32 -S'NC_000002.11:g.166929892_166929899del' -p458 -sg34 -(dp459 -g36 -g54 -sg38 -S'CCAGGTCCT' -p460 -sg40 -S'166929891' -p461 -sg42 -g43 -sssS'grch38' -p462 -(dp463 -g32 -S'NC_000002.12:g.166073382_166073389del' -p464 -sg34 -(dp465 -g36 -g54 -sg38 -S'CCAGGTCCT' -p466 -sg40 -S'166073381' -p467 -sg42 -g43 -ssssg63 -(dp468 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1' -p469 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1' -p470 -sssS'NM_001353952.1:c.233_240del' -p471 -(dp472 -g3 -g4 -sg5 -(lp473 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p474 -aS'RefSeqGene record not available' -p475 -asg9 -g4 -sg10 -(lp476 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 9, mRNA -p477 -sg14 -S'SCN1A' -p478 -sg16 -(dp479 -g18 -S'NP_001340881.1:p.(Glu78GlyfsTer7)' -p480 -sg20 -S'NP_001340881.1:p.(E78Gfs*7)' -p481 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001353952.1:c.233_240del' -p482 -sg27 -g4 -sg28 -(dp483 -S'hg19' -p484 -(dp485 -g32 -S'NC_000002.11:g.166929892_166929899del' -p486 -sg34 -(dp487 -g36 -g37 -sg38 -S'CCAGGTCCT' -p488 -sg40 -S'166929891' -p489 -sg42 -g43 -sssg44 -(dp490 -g32 -S'NC_000002.12:g.166073382_166073389del' -p491 -sg34 -(dp492 -g36 -g37 -sg38 -S'CCAGGTCCT' -p493 -sg40 -S'166073381' -p494 -sg42 -g43 -sssS'grch37' -p495 -(dp496 -g32 -S'NC_000002.11:g.166929892_166929899del' -p497 -sg34 -(dp498 -g36 -g54 -sg38 -S'CCAGGTCCT' -p499 -sg40 -S'166929891' -p500 -sg42 -g43 -sssS'grch38' -p501 -(dp502 -g32 -S'NC_000002.12:g.166073382_166073389del' -p503 -sg34 -(dp504 -g36 -g54 -sg38 -S'CCAGGTCCT' -p505 -sg40 -S'166073381' -p506 -sg42 -g43 -ssssg63 -(dp507 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1' -p508 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1' -p509 -sssS'NM_001353957.1:c.233_240del' -p510 -(dp511 -g3 -g4 -sg5 -(lp512 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p513 -aS'RefSeqGene record not available' -p514 -asg9 -g4 -sg10 -(lp515 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 12, mRNA -p516 -sg14 -S'SCN1A' -p517 -sg16 -(dp518 -g18 -S'NP_001340886.1:p.(Glu78GlyfsTer7)' -p519 -sg20 -S'NP_001340886.1:p.(E78Gfs*7)' -p520 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001353957.1:c.233_240del' -p521 -sg27 -g4 -sg28 -(dp522 -S'hg19' -p523 -(dp524 -g32 -S'NC_000002.11:g.166929892_166929899del' -p525 -sg34 -(dp526 -g36 -g37 -sg38 -S'CCAGGTCCT' -p527 -sg40 -S'166929891' -p528 -sg42 -g43 -sssg44 -(dp529 -g32 -S'NC_000002.12:g.166073382_166073389del' -p530 -sg34 -(dp531 -g36 -g37 -sg38 -S'CCAGGTCCT' -p532 -sg40 -S'166073381' -p533 -sg42 -g43 -sssS'grch37' -p534 -(dp535 -g32 -S'NC_000002.11:g.166929892_166929899del' -p536 -sg34 -(dp537 -g36 -g54 -sg38 -S'CCAGGTCCT' -p538 -sg40 -S'166929891' -p539 -sg42 -g43 -sssS'grch38' -p540 -(dp541 -g32 -S'NC_000002.12:g.166073382_166073389del' -p542 -sg34 -(dp543 -g36 -g54 -sg38 -S'CCAGGTCCT' -p544 -sg40 -S'166073381' -p545 -sg42 -g43 -ssssg63 -(dp546 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1' -p547 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1' -p548 -sssS'flag' -p549 -S'gene_variant' -p550 -sS'NM_006920.4:c.233_240del' -p551 -(dp552 -g3 -g4 -sg5 -(lp553 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p554 -aS'A more recent version of the selected reference sequence NM_006920.4 is available (NM_006920.5)' -p555 -aS'NM_006920.5:c.233_240del MUST be fully validated prior to use in reports' -p556 -aS'select_variants=NM_006920.5:c.233_240del' -p557 -aS'RefSeqGene record not available' -p558 -asg9 -g4 -sg10 -(lp559 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA -p560 -sg14 -S'SCN1A' -p561 -sg16 -(dp562 -g18 -S'NP_008851.3:p.(Glu78GlyfsTer7)' -p563 -sg20 -S'NP_008851.3:p.(E78Gfs*7)' -p564 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_006920.4:c.233_240del' -p565 -sg27 -g4 -sg28 -(dp566 -S'hg19' -p567 -(dp568 -g32 -S'NC_000002.11:g.166929892_166929899del' -p569 -sg34 -(dp570 -g36 -g37 -sg38 -S'CCAGGTCCT' -p571 -sg40 -S'166929891' -p572 -sg42 -g43 -sssg44 -(dp573 -g32 -S'NC_000002.12:g.166073382_166073389del' -p574 -sg34 -(dp575 -g36 -g37 -sg38 -S'CCAGGTCCT' -p576 -sg40 -S'166073381' -p577 -sg42 -g43 -sssS'grch37' -p578 -(dp579 -g32 -S'NC_000002.11:g.166929892_166929899del' -p580 -sg34 -(dp581 -g36 -g54 -sg38 -S'CCAGGTCCT' -p582 -sg40 -S'166929891' -p583 -sg42 -g43 -sssS'grch38' -p584 -(dp585 -g32 -S'NC_000002.12:g.166073382_166073389del' -p586 -sg34 -(dp587 -g36 -g54 -sg38 -S'CCAGGTCCT' -p588 -sg40 -S'166073381' -p589 -sg42 -g43 -ssssg63 -(dp590 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3' -p591 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4' -p592 -sssS'NM_001353950.1:c.233_240del' -p593 -(dp594 -g3 -g4 -sg5 -(lp595 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p596 -aS'RefSeqGene record not available' -p597 -asg9 -g4 -sg10 -(lp598 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 7, mRNA -p599 -sg14 -S'SCN1A' -p600 -sg16 -(dp601 -g18 -S'NP_001340879.1:p.(Glu78GlyfsTer7)' -p602 -sg20 -S'NP_001340879.1:p.(E78Gfs*7)' -p603 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001353950.1:c.233_240del' -p604 -sg27 -g4 -sg28 -(dp605 -S'hg19' -p606 -(dp607 -g32 -S'NC_000002.11:g.166929892_166929899del' -p608 -sg34 -(dp609 -g36 -g37 -sg38 -S'CCAGGTCCT' -p610 -sg40 -S'166929891' -p611 -sg42 -g43 -sssg44 -(dp612 -g32 -S'NC_000002.12:g.166073382_166073389del' -p613 -sg34 -(dp614 -g36 -g37 -sg38 -S'CCAGGTCCT' -p615 -sg40 -S'166073381' -p616 -sg42 -g43 -sssS'grch37' -p617 -(dp618 -g32 -S'NC_000002.11:g.166929892_166929899del' -p619 -sg34 -(dp620 -g36 -g54 -sg38 -S'CCAGGTCCT' -p621 -sg40 -S'166929891' -p622 -sg42 -g43 -sssS'grch38' -p623 -(dp624 -g32 -S'NC_000002.12:g.166073382_166073389del' -p625 -sg34 -(dp626 -g36 -g54 -sg38 -S'CCAGGTCCT' -p627 -sg40 -S'166073381' -p628 -sg42 -g43 -ssssg63 -(dp629 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1' -p630 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1' -p631 -sssS'NM_001165963.2:c.233_240del' -p632 -(dp633 -g3 -g4 -sg5 -(lp634 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p635 -aS'RefSeqGene record not available' -p636 -asg9 -g4 -sg10 -(lp637 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA -p638 -sg14 -S'SCN1A' -p639 -sg16 -(dp640 -g18 -S'NP_001159435.1:p.(Glu78GlyfsTer7)' -p641 -sg20 -S'NP_001159435.1:p.(E78Gfs*7)' -p642 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001165963.2:c.233_240del' -p643 -sg27 -g4 -sg28 -(dp644 -S'hg19' -p645 -(dp646 -g32 -S'NC_000002.11:g.166929892_166929899del' -p647 -sg34 -(dp648 -g36 -g37 -sg38 -S'CCAGGTCCT' -p649 -sg40 -S'166929891' -p650 -sg42 -g43 -sssg44 -(dp651 -g32 -S'NC_000002.12:g.166073382_166073389del' -p652 -sg34 -(dp653 -g36 -g37 -sg38 -S'CCAGGTCCT' -p654 -sg40 -S'166073381' -p655 -sg42 -g43 -sssS'grch37' -p656 -(dp657 -g32 -S'NC_000002.11:g.166929892_166929899del' -p658 -sg34 -(dp659 -g36 -g54 -sg38 -S'CCAGGTCCT' -p660 -sg40 -S'166929891' -p661 -sg42 -g43 -sssS'grch38' -p662 -(dp663 -g32 -S'NC_000002.12:g.166073382_166073389del' -p664 -sg34 -(dp665 -g36 -g54 -sg38 -S'CCAGGTCCT' -p666 -sg40 -S'166073381' -p667 -sg42 -g43 -ssssg63 -(dp668 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1' -p669 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2' -p670 -sssS'NM_001165963.1:c.233_240del' -p671 -(dp672 -g3 -g4 -sg5 -(lp673 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p674 -aS'A more recent version of the selected reference sequence NM_001165963.1 is available (NM_001165963.2)' -p675 -aS'NM_001165963.2:c.233_240del MUST be fully validated prior to use in reports' -p676 -aS'select_variants=NM_001165963.2:c.233_240del' -p677 -aS'RefSeqGene record not available' -p678 -asg9 -g4 -sg10 -(lp679 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA -p680 -sg14 -S'SCN1A' -p681 -sg16 -(dp682 -g18 -S'NP_001159435.1:p.(Glu78GlyfsTer7)' -p683 -sg20 -S'NP_001159435.1:p.(E78Gfs*7)' -p684 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001165963.1:c.233_240del' -p685 -sg27 -g4 -sg28 -(dp686 -S'hg19' -p687 -(dp688 -g32 -S'NC_000002.11:g.166929892_166929899del' -p689 -sg34 -(dp690 -g36 -g37 -sg38 -S'CCAGGTCCT' -p691 -sg40 -S'166929891' -p692 -sg42 -g43 -sssg44 -(dp693 -g32 -S'NC_000002.12:g.166073382_166073389del' -p694 -sg34 -(dp695 -g36 -g37 -sg38 -S'CCAGGTCCT' -p696 -sg40 -S'166073381' -p697 -sg42 -g43 -sssS'grch37' -p698 -(dp699 -g32 -S'NC_000002.11:g.166929892_166929899del' -p700 -sg34 -(dp701 -g36 -g54 -sg38 -S'CCAGGTCCT' -p702 -sg40 -S'166929891' -p703 -sg42 -g43 -sssS'grch38' -p704 -(dp705 -g32 -S'NC_000002.12:g.166073382_166073389del' -p706 -sg34 -(dp707 -g36 -g54 -sg38 -S'CCAGGTCCT' -p708 -sg40 -S'166073381' -p709 -sg42 -g43 -ssssg63 -(dp710 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1' -p711 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1' -p712 -sssS'NM_001165964.1:c.233_240del' -p713 -(dp714 -g3 -g4 -sg5 -(lp715 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p716 -aS'A more recent version of the selected reference sequence NM_001165964.1 is available (NM_001165964.2)' -p717 -aS'NM_001165964.2:c.233_240del MUST be fully validated prior to use in reports' -p718 -aS'select_variants=NM_001165964.2:c.233_240del' -p719 -aS'RefSeqGene record not available' -p720 -asg9 -g4 -sg10 -(lp721 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA -p722 -sg14 -S'SCN1A' -p723 -sg16 -(dp724 -g18 -S'NP_001159436.1:p.(Glu78GlyfsTer7)' -p725 -sg20 -S'NP_001159436.1:p.(E78Gfs*7)' -p726 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001165964.1:c.233_240del' -p727 -sg27 -g4 -sg28 -(dp728 -S'hg19' -p729 -(dp730 -g32 -S'NC_000002.11:g.166929892_166929899del' -p731 -sg34 -(dp732 -g36 -g37 -sg38 -S'CCAGGTCCT' -p733 -sg40 -S'166929891' -p734 -sg42 -g43 -sssg44 -(dp735 -g32 -S'NC_000002.12:g.166073382_166073389del' -p736 -sg34 -(dp737 -g36 -g37 -sg38 -S'CCAGGTCCT' -p738 -sg40 -S'166073381' -p739 -sg42 -g43 -sssS'grch37' -p740 -(dp741 -g32 -S'NC_000002.11:g.166929892_166929899del' -p742 -sg34 -(dp743 -g36 -g54 -sg38 -S'CCAGGTCCT' -p744 -sg40 -S'166929891' -p745 -sg42 -g43 -sssS'grch38' -p746 -(dp747 -g32 -S'NC_000002.12:g.166073382_166073389del' -p748 -sg34 -(dp749 -g36 -g54 -sg38 -S'CCAGGTCCT' -p750 -sg40 -S'166073381' -p751 -sg42 -g43 -ssssg63 -(dp752 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1' -p753 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1' -p754 -sssS'NM_001353958.1:c.233_240del' -p755 -(dp756 -g3 -g4 -sg5 -(lp757 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p758 -aS'RefSeqGene record not available' -p759 -asg9 -g4 -sg10 -(lp760 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 13, mRNA -p761 -sg14 -S'SCN1A' -p762 -sg16 -(dp763 -g18 -S'NP_001340887.1:p.(Glu78GlyfsTer7)' -p764 -sg20 -S'NP_001340887.1:p.(E78Gfs*7)' -p765 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001353958.1:c.233_240del' -p766 -sg27 -g4 -sg28 -(dp767 -S'hg19' -p768 -(dp769 -g32 -S'NC_000002.11:g.166929892_166929899del' -p770 -sg34 -(dp771 -g36 -g37 -sg38 -S'CCAGGTCCT' -p772 -sg40 -S'166929891' -p773 -sg42 -g43 -sssg44 -(dp774 -g32 -S'NC_000002.12:g.166073382_166073389del' -p775 -sg34 -(dp776 -g36 -g37 -sg38 -S'CCAGGTCCT' -p777 -sg40 -S'166073381' -p778 -sg42 -g43 -sssS'grch37' -p779 -(dp780 -g32 -S'NC_000002.11:g.166929892_166929899del' -p781 -sg34 -(dp782 -g36 -g54 -sg38 -S'CCAGGTCCT' -p783 -sg40 -S'166929891' -p784 -sg42 -g43 -sssS'grch38' -p785 -(dp786 -g32 -S'NC_000002.12:g.166073382_166073389del' -p787 -sg34 -(dp788 -g36 -g54 -sg38 -S'CCAGGTCCT' -p789 -sg40 -S'166073381' -p790 -sg42 -g43 -ssssg63 -(dp791 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1' -p792 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1' -p793 -sssS'NM_001353949.1:c.233_240del' -p794 -(dp795 -g3 -g4 -sg5 -(lp796 -S'NC_000002.11:g.166929891CCAGGTCCT>C automapped to NC_000002.11:g.166929893_166929900del' -p797 -aS'RefSeqGene record not available' -p798 -asg9 -g4 -sg10 -(lp799 -sg12 -VHomo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 6, mRNA -p800 -sg14 -S'SCN1A' -p801 -sg16 -(dp802 -g18 -S'NP_001340878.1:p.(Glu78GlyfsTer7)' -p803 -sg20 -S'NP_001340878.1:p.(E78Gfs*7)' -p804 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001353949.1:c.233_240del' -p805 -sg27 -g4 -sg28 -(dp806 -S'hg19' -p807 -(dp808 -g32 -S'NC_000002.11:g.166929892_166929899del' -p809 -sg34 -(dp810 -g36 -g37 -sg38 -S'CCAGGTCCT' -p811 -sg40 -S'166929891' -p812 -sg42 -g43 -sssg44 -(dp813 -g32 -S'NC_000002.12:g.166073382_166073389del' -p814 -sg34 -(dp815 -g36 -g37 -sg38 -S'CCAGGTCCT' -p816 -sg40 -S'166073381' -p817 -sg42 -g43 -sssS'grch37' -p818 -(dp819 -g32 -S'NC_000002.11:g.166929892_166929899del' -p820 -sg34 -(dp821 -g36 -g54 -sg38 -S'CCAGGTCCT' -p822 -sg40 -S'166929891' -p823 -sg42 -g43 -sssS'grch38' -p824 -(dp825 -g32 -S'NC_000002.12:g.166073382_166073389del' -p826 -sg34 -(dp827 -g36 -g54 -sg38 -S'CCAGGTCCT' -p828 -sg40 -S'166073381' -p829 -sg42 -g43 -ssssg63 -(dp830 -g104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1' -p831 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1' -p832 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant273.txt b/VariantValidator/testing/testOutputsMasterITS/variant273.txt deleted file mode 100644 index 7433d100..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant273.txt +++ /dev/null @@ -1,1148 +0,0 @@ -(dp0 -S'NM_001256850.1:c.102051C>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens titin (TTN), transcript variant N2BA, mRNA -p12 -sS'gene_symbol' -p13 -S'TTN' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001243779.1:p.(Ser34017Arg)' -p18 -sS'slr' -p19 -S'NP_001243779.1:p.(S34017R)' -p20 -ssS'submitted_variant' -p21 -S'2-179393504-G-T' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001256850.1:c.102051C>A' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000002.11:g.179393504G>T' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -VG -p39 -sS'pos' -p40 -S'179393504' -p41 -sS'alt' -p42 -VT -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.178528777G>T' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000002.11:g.179393504G>T' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'179393504' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000002.12:g.178528777G>T' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243779.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256850.1' -p65 -sssS'NM_133378.4:c.99270C>A' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'RefSeqGene record not available' -p69 -asg8 -g4 -sg9 -(lp70 -sg11 -VHomo sapiens titin (TTN), transcript variant N2-A, mRNA -p71 -sg13 -S'TTN' -p72 -sg15 -(dp73 -g17 -S'NP_596869.4:p.(Ser33090Arg)' -p74 -sg19 -S'NP_596869.4:p.(S33090R)' -p75 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_133378.4:c.99270C>A' -p76 -sg27 -g4 -sg28 -(dp77 -S'hg19' -p78 -(dp79 -g32 -S'NC_000002.11:g.179393504G>T' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p82 -sg42 -g43 -sssg44 -(dp83 -g32 -S'NC_000002.12:g.178528777G>T' -p84 -sg34 -(dp85 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p86 -sg42 -g43 -sssS'grch37' -p87 -(dp88 -g32 -S'NC_000002.11:g.179393504G>T' -p89 -sg34 -(dp90 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p91 -sg42 -g43 -sssS'grch38' -p92 -(dp93 -g32 -S'NC_000002.12:g.178528777G>T' -p94 -sg34 -(dp95 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p96 -sg42 -g43 -ssssg60 -(dp97 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_596869.4' -p98 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133378.4' -p99 -sssS'NM_133432.3:c.80154C>A' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'RefSeqGene record not available' -p103 -asg8 -g4 -sg9 -(lp104 -sg11 -VHomo sapiens titin (TTN), transcript variant novex-1, mRNA -p105 -sg13 -S'TTN' -p106 -sg15 -(dp107 -g17 -S'NP_597676.3:p.(Ser26718Arg)' -p108 -sg19 -S'NP_597676.3:p.(S26718R)' -p109 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_133432.3:c.80154C>A' -p110 -sg27 -g4 -sg28 -(dp111 -S'hg19' -p112 -(dp113 -g32 -S'NC_000002.11:g.179393504G>T' -p114 -sg34 -(dp115 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p116 -sg42 -g43 -sssg44 -(dp117 -g32 -S'NC_000002.12:g.178528777G>T' -p118 -sg34 -(dp119 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p120 -sg42 -g43 -sssS'grch37' -p121 -(dp122 -g32 -S'NC_000002.11:g.179393504G>T' -p123 -sg34 -(dp124 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p125 -sg42 -g43 -sssS'grch38' -p126 -(dp127 -g32 -S'NC_000002.12:g.178528777G>T' -p128 -sg34 -(dp129 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p130 -sg42 -g43 -ssssg60 -(dp131 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_597676.3' -p132 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133432.3' -p133 -sssS'NM_001267550.1:c.106974C>A' -p134 -(dp135 -g3 -g4 -sg5 -(lp136 -S'A more recent version of the selected reference sequence NM_001267550.1 is available (NM_001267550.2)' -p137 -aS'NM_001267550.2:c.106974C>A MUST be fully validated prior to use in reports' -p138 -aS'select_variants=NM_001267550.2:c.106974C>A' -p139 -aS'RefSeqGene record not available' -p140 -asg8 -g4 -sg9 -(lp141 -sg11 -VHomo sapiens titin (TTN), transcript variant IC, mRNA -p142 -sg13 -S'TTN' -p143 -sg15 -(dp144 -g17 -S'NP_001254479.1:p.(Ser35658Arg)' -p145 -sg19 -S'NP_001254479.1:p.(S35658R)' -p146 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001267550.1:c.106974C>A' -p147 -sg27 -g4 -sg28 -(dp148 -S'hg19' -p149 -(dp150 -g32 -S'NC_000002.11:g.179393504G>T' -p151 -sg34 -(dp152 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p153 -sg42 -g43 -sssS'grch37' -p154 -(dp155 -g32 -S'NC_000002.11:g.179393504G>T' -p156 -sg34 -(dp157 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p158 -sg42 -g43 -ssssg60 -(dp159 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.1' -p160 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.1' -p161 -sssS'NR_038272.1:n.219+5141G>T' -p162 -(dp163 -g3 -g4 -sg5 -(lp164 -S'RefSeqGene record not available' -p165 -asg8 -g4 -sg9 -(lp166 -sg11 -VHomo sapiens TTN antisense RNA 1 (TTN-AS1), transcript variant 1, long non-coding RNA -p167 -sg13 -S'TTN-AS1' -p168 -sg15 -(dp169 -g17 -S'Non-coding :n.' -p170 -sg19 -g170 -ssg21 -g22 -sg23 -S'NC_000002.11(NR_038272.1):c.219+5141G>T' -p171 -sg24 -g4 -sg25 -S'NR_038272.1:n.219+5141G>T' -p172 -sg27 -g4 -sg28 -(dp173 -S'hg19' -p174 -(dp175 -g32 -S'NC_000002.11:g.179393504G>T' -p176 -sg34 -(dp177 -g36 -g37 -sg38 -S'G' -p178 -sg40 -S'179393504' -p179 -sg42 -S'T' -p180 -sssg44 -(dp181 -g32 -S'NC_000002.12:g.178528777G>T' -p182 -sg34 -(dp183 -g36 -g37 -sg38 -g178 -sg40 -S'178528777' -p184 -sg42 -g180 -sssS'grch37' -p185 -(dp186 -g32 -S'NC_000002.11:g.179393504G>T' -p187 -sg34 -(dp188 -g36 -g53 -sg38 -g178 -sg40 -S'179393504' -p189 -sg42 -g180 -sssS'grch38' -p190 -(dp191 -g32 -S'NC_000002.12:g.178528777G>T' -p192 -sg34 -(dp193 -g36 -g53 -sg38 -g178 -sg40 -S'178528777' -p194 -sg42 -g180 -ssssg60 -(dp195 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_038272.1' -p196 -sssS'NM_133437.4:c.80355C>A' -p197 -(dp198 -g3 -g4 -sg5 -(lp199 -S'RefSeqGene record not available' -p200 -asg8 -g4 -sg9 -(lp201 -sg11 -VHomo sapiens titin (TTN), transcript variant novex-2, mRNA -p202 -sg13 -S'TTN' -p203 -sg15 -(dp204 -g17 -S'NP_597681.4:p.(Ser26785Arg)' -p205 -sg19 -S'NP_597681.4:p.(S26785R)' -p206 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_133437.4:c.80355C>A' -p207 -sg27 -g4 -sg28 -(dp208 -S'hg19' -p209 -(dp210 -g32 -S'NC_000002.11:g.179393504G>T' -p211 -sg34 -(dp212 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p213 -sg42 -g43 -sssg44 -(dp214 -g32 -S'NC_000002.12:g.178528777G>T' -p215 -sg34 -(dp216 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p217 -sg42 -g43 -sssS'grch37' -p218 -(dp219 -g32 -S'NC_000002.11:g.179393504G>T' -p220 -sg34 -(dp221 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p222 -sg42 -g43 -sssS'grch38' -p223 -(dp224 -g32 -S'NC_000002.12:g.178528777G>T' -p225 -sg34 -(dp226 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p227 -sg42 -g43 -ssssg60 -(dp228 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.4' -p229 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.4' -p230 -sssS'flag' -p231 -S'gene_variant' -p232 -sS'NR_038271.1:n.446+5141G>T' -p233 -(dp234 -g3 -g4 -sg5 -(lp235 -S'RefSeqGene record not available' -p236 -asg8 -g4 -sg9 -(lp237 -sg11 -VHomo sapiens TTN antisense RNA 1 (TTN-AS1), transcript variant 2, long non-coding RNA -p238 -sg13 -S'TTN-AS1' -p239 -sg15 -(dp240 -g17 -S'Non-coding :n.' -p241 -sg19 -g241 -ssg21 -g22 -sg23 -S'NC_000002.11(NR_038271.1):c.446+5141G>T' -p242 -sg24 -g4 -sg25 -S'NR_038271.1:n.446+5141G>T' -p243 -sg27 -g4 -sg28 -(dp244 -S'hg19' -p245 -(dp246 -g32 -S'NC_000002.11:g.179393504G>T' -p247 -sg34 -(dp248 -g36 -g37 -sg38 -g178 -sg40 -S'179393504' -p249 -sg42 -g180 -sssg44 -(dp250 -g32 -S'NC_000002.12:g.178528777G>T' -p251 -sg34 -(dp252 -g36 -g37 -sg38 -g178 -sg40 -S'178528777' -p253 -sg42 -g180 -sssS'grch37' -p254 -(dp255 -g32 -S'NC_000002.11:g.179393504G>T' -p256 -sg34 -(dp257 -g36 -g53 -sg38 -g178 -sg40 -S'179393504' -p258 -sg42 -g180 -sssS'grch38' -p259 -(dp260 -g32 -S'NC_000002.12:g.178528777G>T' -p261 -sg34 -(dp262 -g36 -g53 -sg38 -g178 -sg40 -S'178528777' -p263 -sg42 -g180 -ssssg60 -(dp264 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_038271.1' -p265 -sssS'NM_001267550.2:c.106974C>A' -p266 -(dp267 -g3 -g4 -sg5 -(lp268 -S'RefSeqGene record not available' -p269 -asg8 -g4 -sg9 -(lp270 -sg11 -VHomo sapiens titin (TTN), transcript variant IC, mRNA -p271 -sg13 -S'TTN' -p272 -sg15 -(dp273 -g17 -S'NP_001254479.2:p.(Ser35658Arg)' -p274 -sg19 -S'NP_001254479.2:p.(S35658R)' -p275 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001267550.2:c.106974C>A' -p276 -sg27 -g4 -sg28 -(dp277 -S'hg19' -p278 -(dp279 -g32 -S'NC_000002.11:g.179393504G>T' -p280 -sg34 -(dp281 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p282 -sg42 -g43 -sssg44 -(dp283 -g32 -S'NC_000002.12:g.178528777G>T' -p284 -sg34 -(dp285 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p286 -sg42 -g43 -sssS'grch37' -p287 -(dp288 -g32 -S'NC_000002.11:g.179393504G>T' -p289 -sg34 -(dp290 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p291 -sg42 -g43 -sssS'grch38' -p292 -(dp293 -g32 -S'NC_000002.12:g.178528777G>T' -p294 -sg34 -(dp295 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p296 -sg42 -g43 -ssssg60 -(dp297 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.2' -p298 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.2' -p299 -sssS'NM_133437.3:c.80355C>A' -p300 -(dp301 -g3 -g4 -sg5 -(lp302 -S'A more recent version of the selected reference sequence NM_133437.3 is available (NM_133437.4)' -p303 -aS'NM_133437.4:c.80355C>A MUST be fully validated prior to use in reports' -p304 -aS'select_variants=NM_133437.4:c.80355C>A' -p305 -aS'RefSeqGene record not available' -p306 -asg8 -g4 -sg9 -(lp307 -sg11 -VHomo sapiens titin (TTN), transcript variant novex-2, mRNA -p308 -sg13 -S'TTN' -p309 -sg15 -(dp310 -g17 -S'NP_597681.3:p.(Ser26785Arg)' -p311 -sg19 -S'NP_597681.3:p.(S26785R)' -p312 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_133437.3:c.80355C>A' -p313 -sg27 -g4 -sg28 -(dp314 -S'hg19' -p315 -(dp316 -g32 -S'NC_000002.11:g.179393504G>T' -p317 -sg34 -(dp318 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p319 -sg42 -g43 -sssS'grch37' -p320 -(dp321 -g32 -S'NC_000002.11:g.179393504G>T' -p322 -sg34 -(dp323 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p324 -sg42 -g43 -ssssg60 -(dp325 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.3' -p326 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.3' -p327 -sssS'NM_003319.4:c.79779C>A' -p328 -(dp329 -g3 -g4 -sg5 -(lp330 -S'RefSeqGene record not available' -p331 -asg8 -g4 -sg9 -(lp332 -sg11 -VHomo sapiens titin (TTN), transcript variant N2-B, mRNA -p333 -sg13 -S'TTN' -p334 -sg15 -(dp335 -g17 -S'NP_003310.4:p.(Ser26593Arg)' -p336 -sg19 -S'NP_003310.4:p.(S26593R)' -p337 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_003319.4:c.79779C>A' -p338 -sg27 -g4 -sg28 -(dp339 -S'hg19' -p340 -(dp341 -g32 -S'NC_000002.11:g.179393504G>T' -p342 -sg34 -(dp343 -g36 -g37 -sg38 -g39 -sg40 -S'179393504' -p344 -sg42 -g43 -sssg44 -(dp345 -g32 -S'NC_000002.12:g.178528777G>T' -p346 -sg34 -(dp347 -g36 -g37 -sg38 -g39 -sg40 -S'178528777' -p348 -sg42 -g43 -sssS'grch37' -p349 -(dp350 -g32 -S'NC_000002.11:g.179393504G>T' -p351 -sg34 -(dp352 -g36 -g53 -sg38 -g39 -sg40 -S'179393504' -p353 -sg42 -g43 -sssS'grch38' -p354 -(dp355 -g32 -S'NC_000002.12:g.178528777G>T' -p356 -sg34 -(dp357 -g36 -g53 -sg38 -g39 -sg40 -S'178528777' -p358 -sg42 -g43 -ssssg60 -(dp359 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003310.4' -p360 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003319.4' -p361 -sssS'metadata' -p362 -(dp363 -S'variantvalidator_hgvs_version' -p364 -S'1.1.3' -p365 -sS'uta_schema' -p366 -S'uta_20180821' -p367 -sS'seqrepo_db' -p368 -S'2018-08-21' -p369 -sS'variantvalidator_version' -p370 -S'v0.2' -p371 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant274.txt b/VariantValidator/testing/testOutputsMasterITS/variant274.txt deleted file mode 100644 index 132314ce..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant274.txt +++ /dev/null @@ -1,176 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_194250.1:c.3324_3347del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000002.11:g.185803444TGCAGCTGCTGCAGCTGCAGCTGCA>T automapped to NC_000002.11:g.185803447_185803470del' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens zinc finger protein 804A (ZNF804A), mRNA -p15 -sS'gene_symbol' -p16 -S'ZNF804A' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_919226.1:p.(Ala1112_Ala1119del)' -p21 -sS'slr' -p22 -S'NP_919226.1:p.(A1112_A1119del)' -p23 -ssS'submitted_variant' -p24 -S'2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T' -p25 -sS'genome_context_intronic_sequence' -p26 -g6 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_194250.1:c.3324_3347del' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000002.11:g.185803447_185803470del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr2' -p40 -sS'ref' -p41 -S'TGCAGCTGCTGCAGCTGCAGCTGCA' -p42 -sS'pos' -p43 -S'185803444' -p44 -sS'alt' -p45 -S'T' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000002.12:g.184938720_184938743del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'TGCAGCTGCTGCAGCTGCAGCTGCA' -p51 -sg43 -S'184938717' -p52 -sg45 -g46 -sssS'grch37' -p53 -(dp54 -g35 -S'NC_000002.11:g.185803447_185803470del' -p55 -sg37 -(dp56 -g39 -S'2' -p57 -sg41 -S'TGCAGCTGCTGCAGCTGCAGCTGCA' -p58 -sg43 -S'185803444' -p59 -sg45 -g46 -sssS'grch38' -p60 -(dp61 -g35 -S'NC_000002.12:g.184938720_184938743del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'TGCAGCTGCTGCAGCTGCAGCTGCA' -p64 -sg43 -S'184938717' -p65 -sg45 -g46 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_919226.1' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_194250.1' -p71 -sssS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant275.txt b/VariantValidator/testing/testOutputsMasterITS/variant275.txt deleted file mode 100644 index 2d7b7e0e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant275.txt +++ /dev/null @@ -1,286 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_002491.2:c.208G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens NADH:ubiquinone oxidoreductase subunit B3 (NDUFB3), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'NDUFB3' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_002482.1:p.(Gly70Ter)' -p20 -sS'slr' -p21 -S'NP_002482.1:p.(G70*)' -p22 -ssS'submitted_variant' -p23 -S'2-201950249-G-T' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_002491.2:c.208G>T' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000002.11:g.201950249G>T' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr2' -p39 -sS'ref' -p40 -S'G' -p41 -sS'pos' -p42 -S'201950249' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000002.12:g.201085526G>T' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'201085526' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000002.11:g.201950249G>T' -p53 -sg36 -(dp54 -g38 -S'2' -p55 -sg40 -g41 -sg42 -S'201950249' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000002.12:g.201085526G>T' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'201085526' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002482.1' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002491.2' -p67 -sssS'NM_001257102.1:c.208G>T' -p68 -(dp69 -g5 -g6 -sg7 -(lp70 -S'RefSeqGene record not available' -p71 -asg10 -g6 -sg11 -(lp72 -sg13 -VHomo sapiens NADH:ubiquinone oxidoreductase subunit B3 (NDUFB3), transcript variant 2, mRNA -p73 -sg15 -S'NDUFB3' -p74 -sg17 -(dp75 -g19 -S'NP_001244031.1:p.(Gly70Ter)' -p76 -sg21 -S'NP_001244031.1:p.(G70*)' -p77 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_001257102.1:c.208G>T' -p78 -sg29 -g6 -sg30 -(dp79 -S'hg19' -p80 -(dp81 -g34 -S'NC_000002.11:g.201950249G>T' -p82 -sg36 -(dp83 -g38 -g39 -sg40 -g41 -sg42 -S'201950249' -p84 -sg44 -g45 -sssg46 -(dp85 -g34 -S'NC_000002.12:g.201085526G>T' -p86 -sg36 -(dp87 -g38 -g39 -sg40 -g41 -sg42 -S'201085526' -p88 -sg44 -g45 -sssS'grch37' -p89 -(dp90 -g34 -S'NC_000002.11:g.201950249G>T' -p91 -sg36 -(dp92 -g38 -g55 -sg40 -g41 -sg42 -S'201950249' -p93 -sg44 -g45 -sssS'grch38' -p94 -(dp95 -g34 -S'NC_000002.12:g.201085526G>T' -p96 -sg36 -(dp97 -g38 -g55 -sg40 -g41 -sg42 -S'201085526' -p98 -sg44 -g45 -ssssg62 -(dp99 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244031.1' -p100 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257102.1' -p101 -sssS'metadata' -p102 -(dp103 -S'variantvalidator_hgvs_version' -p104 -S'1.1.3' -p105 -sS'uta_schema' -p106 -S'uta_20180821' -p107 -sS'seqrepo_db' -p108 -S'2018-08-21' -p109 -sS'variantvalidator_version' -p110 -S'v0.2' -p111 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant276.txt b/VariantValidator/testing/testOutputsMasterITS/variant276.txt deleted file mode 100644 index c3103bb5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant276.txt +++ /dev/null @@ -1,404 +0,0 @@ -(dp0 -S'NM_004369.3:c.6282+1G>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'COL6A3' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_004360.2:p.?' -p18 -sS'slr' -p19 -S'NP_004360.2:p.?' -p20 -ssS'submitted_variant' -p21 -S'2-238268730-C-A' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000002.11(NM_004369.3):c.6282+1G>T' -p24 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_004369.3:c.6282+1G>T' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000002.11:g.238268730C>A' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr2' -p38 -sS'ref' -p39 -VC -p40 -sS'pos' -p41 -S'238268730' -p42 -sS'alt' -p43 -VA -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000002.12:g.237360087C>A' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'237360087' -p49 -sg43 -g44 -sssS'grch37' -p50 -(dp51 -g33 -S'NC_000002.11:g.238268730C>A' -p52 -sg35 -(dp53 -g37 -S'2' -p54 -sg39 -g40 -sg41 -S'238268730' -p55 -sg43 -g44 -sssS'grch38' -p56 -(dp57 -g33 -S'NC_000002.12:g.237360087C>A' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'237360087' -p60 -sg43 -g44 -ssssS'reference_sequence_records' -p61 -(dp62 -S'protein' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004360.2' -p64 -sS'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004369.3' -p66 -sssS'flag' -p67 -S'gene_variant' -p68 -sS'metadata' -p69 -(dp70 -S'variantvalidator_hgvs_version' -p71 -S'1.1.3' -p72 -sS'uta_schema' -p73 -S'uta_20180821' -p74 -sS'seqrepo_db' -p75 -S'2018-08-21' -p76 -sS'variantvalidator_version' -p77 -S'v0.2' -p78 -ssS'NM_057166.4:c.4461+1G>T' -p79 -(dp80 -g3 -g4 -sg5 -(lp81 -S'RefSeqGene record not available' -p82 -asg8 -g4 -sg9 -(lp83 -sg11 -VHomo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 4, mRNA -p84 -sg13 -S'COL6A3' -p85 -sg15 -(dp86 -g17 -S'NP_476507.3:p.?' -p87 -sg19 -S'NP_476507.3:p.?' -p88 -ssg21 -g22 -sg23 -S'NC_000002.11(NM_057166.4):c.4461+1G>T' -p89 -sg25 -g4 -sg26 -S'NM_057166.4:c.4461+1G>T' -p90 -sg28 -g4 -sg29 -(dp91 -S'hg19' -p92 -(dp93 -g33 -S'NC_000002.11:g.238268730C>A' -p94 -sg35 -(dp95 -g37 -g38 -sg39 -g40 -sg41 -S'238268730' -p96 -sg43 -g44 -sssg45 -(dp97 -g33 -S'NC_000002.12:g.237360087C>A' -p98 -sg35 -(dp99 -g37 -g38 -sg39 -g40 -sg41 -S'237360087' -p100 -sg43 -g44 -sssS'grch37' -p101 -(dp102 -g33 -S'NC_000002.11:g.238268730C>A' -p103 -sg35 -(dp104 -g37 -g54 -sg39 -g40 -sg41 -S'238268730' -p105 -sg43 -g44 -sssS'grch38' -p106 -(dp107 -g33 -S'NC_000002.12:g.237360087C>A' -p108 -sg35 -(dp109 -g37 -g54 -sg39 -g40 -sg41 -S'237360087' -p110 -sg43 -g44 -ssssg61 -(dp111 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_476507.3' -p112 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_057166.4' -p113 -sssS'NM_057167.3:c.5664+1G>T' -p114 -(dp115 -g3 -g4 -sg5 -(lp116 -S'RefSeqGene record not available' -p117 -asg8 -g4 -sg9 -(lp118 -sg11 -VHomo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 5, mRNA -p119 -sg13 -S'COL6A3' -p120 -sg15 -(dp121 -g17 -S'NP_476508.2:p.?' -p122 -sg19 -S'NP_476508.2:p.?' -p123 -ssg21 -g22 -sg23 -S'NC_000002.11(NM_057167.3):c.5664+1G>T' -p124 -sg25 -g4 -sg26 -S'NM_057167.3:c.5664+1G>T' -p125 -sg28 -g4 -sg29 -(dp126 -S'hg19' -p127 -(dp128 -g33 -S'NC_000002.11:g.238268730C>A' -p129 -sg35 -(dp130 -g37 -g38 -sg39 -g40 -sg41 -S'238268730' -p131 -sg43 -g44 -sssg45 -(dp132 -g33 -S'NC_000002.12:g.237360087C>A' -p133 -sg35 -(dp134 -g37 -g38 -sg39 -g40 -sg41 -S'237360087' -p135 -sg43 -g44 -sssS'grch37' -p136 -(dp137 -g33 -S'NC_000002.11:g.238268730C>A' -p138 -sg35 -(dp139 -g37 -g54 -sg39 -g40 -sg41 -S'238268730' -p140 -sg43 -g44 -sssS'grch38' -p141 -(dp142 -g33 -S'NC_000002.12:g.237360087C>A' -p143 -sg35 -(dp144 -g37 -g54 -sg39 -g40 -sg41 -S'237360087' -p145 -sg43 -g44 -ssssg61 -(dp146 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_476508.2' -p147 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_057167.3' -p148 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant277.txt b/VariantValidator/testing/testOutputsMasterITS/variant277.txt deleted file mode 100644 index f13649e5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant277.txt +++ /dev/null @@ -1,377 +0,0 @@ -(dp0 -S'NM_080860.2:c.727+5G>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_080860.2 is available (NM_080860.3)' -p7 -aS'NM_080860.3:c.727+5G>A MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_080860.3:c.727+5G>A' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens radial spoke head 1 homolog (Chlamydomonas) (RSPH1), mRNA -p15 -sS'gene_symbol' -p16 -S'RSPH1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_543136.1:p.?' -p21 -sS'slr' -p22 -S'NP_543136.1:p.?' -p23 -ssS'submitted_variant' -p24 -S'21-43897396-C-T' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000021.8(NM_080860.2):c.727+5G>A' -p27 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_080860.2:c.727+5G>A' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000021.8:g.43897396C>T' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr21' -p41 -sS'ref' -p42 -VC -p43 -sS'pos' -p44 -S'43897396' -p45 -sS'alt' -p46 -VT -p47 -sssS'grch37' -p48 -(dp49 -g36 -S'NC_000021.8:g.43897396C>T' -p50 -sg38 -(dp51 -g40 -S'21' -p52 -sg42 -g43 -sg44 -S'43897396' -p53 -sg46 -g47 -ssssS'reference_sequence_records' -p54 -(dp55 -S'protein' -p56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1' -p57 -sS'transcript' -p58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.2' -p59 -sssS'flag' -p60 -S'gene_variant' -p61 -sS'metadata' -p62 -(dp63 -S'variantvalidator_hgvs_version' -p64 -S'1.1.3' -p65 -sS'uta_schema' -p66 -S'uta_20180821' -p67 -sS'seqrepo_db' -p68 -S'2018-08-21' -p69 -sS'variantvalidator_version' -p70 -S'v0.2' -p71 -ssS'NM_080860.3:c.727+5G>A' -p72 -(dp73 -g3 -g4 -sg5 -(lp74 -S'RefSeqGene record not available' -p75 -asg11 -g4 -sg12 -(lp76 -sg14 -VHomo sapiens radial spoke head component 1 (RSPH1), transcript variant 1, mRNA -p77 -sg16 -S'RSPH1' -p78 -sg18 -(dp79 -g20 -S'NP_543136.1:p.?' -p80 -sg22 -S'NP_543136.1:p.?' -p81 -ssg24 -g25 -sg26 -S'NC_000021.8(NM_080860.3):c.727+5G>A' -p82 -sg28 -g4 -sg29 -S'NM_080860.3:c.727+5G>A' -p83 -sg31 -g4 -sg32 -(dp84 -S'hg19' -p85 -(dp86 -g36 -S'NC_000021.8:g.43897396C>T' -p87 -sg38 -(dp88 -g40 -g41 -sg42 -g43 -sg44 -S'43897396' -p89 -sg46 -g47 -sssS'hg38' -p90 -(dp91 -g36 -S'NC_000021.9:g.42477286C>T' -p92 -sg38 -(dp93 -g40 -g41 -sg42 -g43 -sg44 -S'42477286' -p94 -sg46 -g47 -sssS'grch37' -p95 -(dp96 -g36 -S'NC_000021.8:g.43897396C>T' -p97 -sg38 -(dp98 -g40 -g52 -sg42 -g43 -sg44 -S'43897396' -p99 -sg46 -g47 -sssS'grch38' -p100 -(dp101 -g36 -S'NC_000021.9:g.42477286C>T' -p102 -sg38 -(dp103 -g40 -g52 -sg42 -g43 -sg44 -S'42477286' -p104 -sg46 -g47 -ssssg54 -(dp105 -g56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1' -p106 -sg58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.3' -p107 -sssS'NM_001286506.1:c.613+5G>A' -p108 -(dp109 -g3 -g4 -sg5 -(lp110 -S'RefSeqGene record not available' -p111 -asg11 -g4 -sg12 -(lp112 -sg14 -VHomo sapiens radial spoke head component 1 (RSPH1), transcript variant 2, mRNA -p113 -sg16 -S'RSPH1' -p114 -sg18 -(dp115 -g20 -S'NP_001273435.1:p.?' -p116 -sg22 -S'NP_001273435.1:p.?' -p117 -ssg24 -g25 -sg26 -S'NC_000021.8(NM_001286506.1):c.613+5G>A' -p118 -sg28 -g4 -sg29 -S'NM_001286506.1:c.613+5G>A' -p119 -sg31 -g4 -sg32 -(dp120 -S'hg19' -p121 -(dp122 -g36 -S'NC_000021.8:g.43897396C>T' -p123 -sg38 -(dp124 -g40 -g41 -sg42 -g43 -sg44 -S'43897396' -p125 -sg46 -g47 -sssg90 -(dp126 -g36 -S'NC_000021.9:g.42477286C>T' -p127 -sg38 -(dp128 -g40 -g41 -sg42 -g43 -sg44 -S'42477286' -p129 -sg46 -g47 -sssS'grch37' -p130 -(dp131 -g36 -S'NC_000021.8:g.43897396C>T' -p132 -sg38 -(dp133 -g40 -g52 -sg42 -g43 -sg44 -S'43897396' -p134 -sg46 -g47 -sssS'grch38' -p135 -(dp136 -g36 -S'NC_000021.9:g.42477286C>T' -p137 -sg38 -(dp138 -g40 -g52 -sg42 -g43 -sg44 -S'42477286' -p139 -sg46 -g47 -ssssg54 -(dp140 -g56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001273435.1' -p141 -sg58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001286506.1' -p142 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant278.txt b/VariantValidator/testing/testOutputsMasterITS/variant278.txt deleted file mode 100644 index b15685b4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant278.txt +++ /dev/null @@ -1,1227 +0,0 @@ -(dp0 -S'NM_000268.3:c.924_925insCGACGC' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'NF2' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000259.1:p.(Arg310_Arg311dup)' -p19 -sS'slr' -p20 -S'NP_000259.1:p.(R310_R311dup)' -p21 -ssS'submitted_variant' -p22 -S'22-30064360-G-GCGACGC' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_000268.3:c.924_925insCGACGC' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr22' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'30064360' -p42 -sS'alt' -p43 -S'GCGACGC' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'29668371' -p49 -sg43 -S'GCGACGC' -p50 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p53 -sg35 -(dp54 -g37 -S'22' -p55 -sg39 -g40 -sg41 -S'30064360' -p56 -sg43 -S'GCGACGC' -p57 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'29668371' -p62 -sg43 -S'GCGACGC' -p63 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000259.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000268.3' -p69 -sssS'NM_181828.2:c.798_799insCGACGC' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p73 -aS'RefSeqGene record not available' -p74 -asg9 -g4 -sg10 -(lp75 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 5, mRNA -p76 -sg14 -S'NF2' -p77 -sg16 -(dp78 -g18 -S'NP_861966.1:p.(Arg268_Arg269dup)' -p79 -sg20 -S'NP_861966.1:p.(R268_R269dup)' -p80 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_181828.2:c.798_799insCGACGC' -p81 -sg28 -g4 -sg29 -(dp82 -S'hg19' -p83 -(dp84 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -g40 -sg41 -S'30064360' -p87 -sg43 -S'GCGACGC' -p88 -sssg45 -(dp89 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p90 -sg35 -(dp91 -g37 -g38 -sg39 -g40 -sg41 -S'29668371' -p92 -sg43 -S'GCGACGC' -p93 -sssS'grch37' -p94 -(dp95 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p96 -sg35 -(dp97 -g37 -g55 -sg39 -g40 -sg41 -S'30064360' -p98 -sg43 -S'GCGACGC' -p99 -sssS'grch38' -p100 -(dp101 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p102 -sg35 -(dp103 -g37 -g55 -sg39 -g40 -sg41 -S'29668371' -p104 -sg43 -S'GCGACGC' -p105 -ssssg64 -(dp106 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861966.1' -p107 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181828.2' -p108 -sssS'NM_181830.2:c.675_676insCGACGC' -p109 -(dp110 -g3 -g4 -sg5 -(lp111 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p112 -aS'RefSeqGene record not available' -p113 -asg9 -g4 -sg10 -(lp114 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 7, mRNA -p115 -sg14 -S'NF2' -p116 -sg16 -(dp117 -g18 -S'NP_861968.1:p.(Arg227_Arg228dup)' -p118 -sg20 -S'NP_861968.1:p.(R227_R228dup)' -p119 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_181830.2:c.675_676insCGACGC' -p120 -sg28 -g4 -sg29 -(dp121 -S'hg19' -p122 -(dp123 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p124 -sg35 -(dp125 -g37 -g38 -sg39 -g40 -sg41 -S'30064360' -p126 -sg43 -S'GCGACGC' -p127 -sssg45 -(dp128 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p129 -sg35 -(dp130 -g37 -g38 -sg39 -g40 -sg41 -S'29668371' -p131 -sg43 -S'GCGACGC' -p132 -sssS'grch37' -p133 -(dp134 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p135 -sg35 -(dp136 -g37 -g55 -sg39 -g40 -sg41 -S'30064360' -p137 -sg43 -S'GCGACGC' -p138 -sssS'grch38' -p139 -(dp140 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p141 -sg35 -(dp142 -g37 -g55 -sg39 -g40 -sg41 -S'29668371' -p143 -sg43 -S'GCGACGC' -p144 -ssssg64 -(dp145 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861968.1' -p146 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181830.2' -p147 -sssS'NM_181825.2:c.924_925insCGACGC' -p148 -(dp149 -g3 -g4 -sg5 -(lp150 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p151 -aS'RefSeqGene record not available' -p152 -asg9 -g4 -sg10 -(lp153 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 12, mRNA -p154 -sg14 -S'NF2' -p155 -sg16 -(dp156 -g18 -S'NP_861546.1:p.(Arg310_Arg311dup)' -p157 -sg20 -S'NP_861546.1:p.(R310_R311dup)' -p158 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_181825.2:c.924_925insCGACGC' -p159 -sg28 -g4 -sg29 -(dp160 -S'hg19' -p161 -(dp162 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p163 -sg35 -(dp164 -g37 -g38 -sg39 -g40 -sg41 -S'30064360' -p165 -sg43 -S'GCGACGC' -p166 -sssg45 -(dp167 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p168 -sg35 -(dp169 -g37 -g38 -sg39 -g40 -sg41 -S'29668371' -p170 -sg43 -S'GCGACGC' -p171 -sssS'grch37' -p172 -(dp173 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p174 -sg35 -(dp175 -g37 -g55 -sg39 -g40 -sg41 -S'30064360' -p176 -sg43 -S'GCGACGC' -p177 -sssS'grch38' -p178 -(dp179 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p180 -sg35 -(dp181 -g37 -g55 -sg39 -g40 -sg41 -S'29668371' -p182 -sg43 -S'GCGACGC' -p183 -ssssg64 -(dp184 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861546.1' -p185 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181825.2' -p186 -sssS'NM_181832.2:c.924_925insCGACGC' -p187 -(dp188 -g3 -g4 -sg5 -(lp189 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p190 -aS'RefSeqGene record not available' -p191 -asg9 -g4 -sg10 -(lp192 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 8, mRNA -p193 -sg14 -S'NF2' -p194 -sg16 -(dp195 -g18 -S'NP_861970.1:p.(Arg310_Arg311dup)' -p196 -sg20 -S'NP_861970.1:p.(R310_R311dup)' -p197 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_181832.2:c.924_925insCGACGC' -p198 -sg28 -g4 -sg29 -(dp199 -S'hg19' -p200 -(dp201 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p202 -sg35 -(dp203 -g37 -g38 -sg39 -g40 -sg41 -S'30064360' -p204 -sg43 -S'GCGACGC' -p205 -sssg45 -(dp206 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p207 -sg35 -(dp208 -g37 -g38 -sg39 -g40 -sg41 -S'29668371' -p209 -sg43 -S'GCGACGC' -p210 -sssS'grch37' -p211 -(dp212 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p213 -sg35 -(dp214 -g37 -g55 -sg39 -g40 -sg41 -S'30064360' -p215 -sg43 -S'GCGACGC' -p216 -sssS'grch38' -p217 -(dp218 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p219 -sg35 -(dp220 -g37 -g55 -sg39 -g40 -sg41 -S'29668371' -p221 -sg43 -S'GCGACGC' -p222 -ssssg64 -(dp223 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861970.1' -p224 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181832.2' -p225 -sssS'NM_181833.2:c.447+26086_447+26087insCGACGC' -p226 -(dp227 -g3 -g4 -sg5 -(lp228 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p229 -aS'RefSeqGene record not available' -p230 -asg9 -g4 -sg10 -(lp231 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 9, mRNA -p232 -sg14 -S'NF2' -p233 -sg16 -(dp234 -g18 -S'NP_861971.1:p.?' -p235 -sg20 -S'NP_861971.1:p.?' -p236 -ssg22 -g23 -sg24 -S'NC_000022.10(NM_181833.2):c.447+26086_447+26087insCGACGC' -p237 -sg25 -g4 -sg26 -S'NM_181833.2:c.447+26086_447+26087insCGACGC' -p238 -sg28 -g4 -sg29 -(dp239 -S'hg19' -p240 -(dp241 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p242 -sg35 -(dp243 -g37 -g38 -sg39 -g40 -sg41 -S'30064360' -p244 -sg43 -S'GCGACGC' -p245 -sssg45 -(dp246 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p247 -sg35 -(dp248 -g37 -g38 -sg39 -g40 -sg41 -S'29668371' -p249 -sg43 -S'GCGACGC' -p250 -sssS'grch37' -p251 -(dp252 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p253 -sg35 -(dp254 -g37 -g55 -sg39 -g40 -sg41 -S'30064360' -p255 -sg43 -S'GCGACGC' -p256 -sssS'grch38' -p257 -(dp258 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p259 -sg35 -(dp260 -g37 -g55 -sg39 -g40 -sg41 -S'29668371' -p261 -sg43 -S'GCGACGC' -p262 -ssssg64 -(dp263 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861971.1' -p264 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181833.2' -p265 -sssS'NM_016418.5:c.924_925insCGACGC' -p266 -(dp267 -g3 -g4 -sg5 -(lp268 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p269 -aS'RefSeqGene record not available' -p270 -asg9 -g4 -sg10 -(lp271 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 2, mRNA -p272 -sg14 -S'NF2' -p273 -sg16 -(dp274 -g18 -S'NP_057502.2:p.(Arg310_Arg311dup)' -p275 -sg20 -S'NP_057502.2:p.(R310_R311dup)' -p276 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_016418.5:c.924_925insCGACGC' -p277 -sg28 -g4 -sg29 -(dp278 -S'hg19' -p279 -(dp280 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p281 -sg35 -(dp282 -g37 -g38 -sg39 -g40 -sg41 -S'30064360' -p283 -sg43 -S'GCGACGC' -p284 -sssg45 -(dp285 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p286 -sg35 -(dp287 -g37 -g38 -sg39 -g40 -sg41 -S'29668371' -p288 -sg43 -S'GCGACGC' -p289 -sssS'grch37' -p290 -(dp291 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p292 -sg35 -(dp293 -g37 -g55 -sg39 -g40 -sg41 -S'30064360' -p294 -sg43 -S'GCGACGC' -p295 -sssS'grch38' -p296 -(dp297 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p298 -sg35 -(dp299 -g37 -g55 -sg39 -g40 -sg41 -S'29668371' -p300 -sg43 -S'GCGACGC' -p301 -ssssg64 -(dp302 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057502.2' -p303 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016418.5' -p304 -sssS'NM_181829.2:c.801_802insCGACGC' -p305 -(dp306 -g3 -g4 -sg5 -(lp307 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p308 -aS'RefSeqGene record not available' -p309 -asg9 -g4 -sg10 -(lp310 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 6, mRNA -p311 -sg14 -S'NF2' -p312 -sg16 -(dp313 -g18 -S'NP_861967.1:p.(Arg269_Arg270dup)' -p314 -sg20 -S'NP_861967.1:p.(R269_R270dup)' -p315 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_181829.2:c.801_802insCGACGC' -p316 -sg28 -g4 -sg29 -(dp317 -S'hg19' -p318 -(dp319 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p320 -sg35 -(dp321 -g37 -g38 -sg39 -g40 -sg41 -S'30064360' -p322 -sg43 -S'GCGACGC' -p323 -sssg45 -(dp324 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p325 -sg35 -(dp326 -g37 -g38 -sg39 -g40 -sg41 -S'29668371' -p327 -sg43 -S'GCGACGC' -p328 -sssS'grch37' -p329 -(dp330 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p331 -sg35 -(dp332 -g37 -g55 -sg39 -g40 -sg41 -S'30064360' -p333 -sg43 -S'GCGACGC' -p334 -sssS'grch38' -p335 -(dp336 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p337 -sg35 -(dp338 -g37 -g55 -sg39 -g40 -sg41 -S'29668371' -p339 -sg43 -S'GCGACGC' -p340 -ssssg64 -(dp341 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861967.1' -p342 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181829.2' -p343 -sssS'flag' -p344 -S'gene_variant' -p345 -sS'NR_156186.1:n.1483_1484insCGACGC' -p346 -(dp347 -g3 -g4 -sg5 -(lp348 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p349 -aS'RefSeqGene record not available' -p350 -asg9 -g4 -sg10 -(lp351 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 14, non-coding RNA -p352 -sg14 -S'NF2' -p353 -sg16 -(dp354 -g18 -S'Non-coding :n.' -p355 -sg20 -g355 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NR_156186.1:n.1483_1484insCGACGC' -p356 -sg28 -g4 -sg29 -(dp357 -S'hg19' -p358 -(dp359 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p360 -sg35 -(dp361 -g37 -g38 -sg39 -g40 -sg41 -S'30064360' -p362 -sg43 -S'GCGACGC' -p363 -sssS'grch37' -p364 -(dp365 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p366 -sg35 -(dp367 -g37 -g55 -sg39 -g40 -sg41 -S'30064360' -p368 -sg43 -S'GCGACGC' -p369 -ssssg64 -(dp370 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_156186.1' -p371 -sssS'NM_181831.2:c.675_676insCGACGC' -p372 -(dp373 -g3 -g4 -sg5 -(lp374 -S'NC_000022.10:g.30064360G>GCGACGC automapped to NC_000022.10:g.30064360_30064361insCGACGC' -p375 -aS'RefSeqGene record not available' -p376 -asg9 -g4 -sg10 -(lp377 -sg12 -VHomo sapiens neurofibromin 2 (NF2), transcript variant 13, mRNA -p378 -sg14 -S'NF2' -p379 -sg16 -(dp380 -g18 -S'NP_861969.1:p.(Arg227_Arg228dup)' -p381 -sg20 -S'NP_861969.1:p.(R227_R228dup)' -p382 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_181831.2:c.675_676insCGACGC' -p383 -sg28 -g4 -sg29 -(dp384 -S'hg19' -p385 -(dp386 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p387 -sg35 -(dp388 -g37 -g38 -sg39 -g40 -sg41 -S'30064360' -p389 -sg43 -S'GCGACGC' -p390 -sssg45 -(dp391 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p392 -sg35 -(dp393 -g37 -g38 -sg39 -g40 -sg41 -S'29668371' -p394 -sg43 -S'GCGACGC' -p395 -sssS'grch37' -p396 -(dp397 -g33 -S'NC_000022.10:g.30064360_30064361insCGACGC' -p398 -sg35 -(dp399 -g37 -g55 -sg39 -g40 -sg41 -S'30064360' -p400 -sg43 -S'GCGACGC' -p401 -sssS'grch38' -p402 -(dp403 -g33 -S'NC_000022.11:g.29668371_29668372insCGACGC' -p404 -sg35 -(dp405 -g37 -g55 -sg39 -g40 -sg41 -S'29668371' -p406 -sg43 -S'GCGACGC' -p407 -ssssg64 -(dp408 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_861969.1' -p409 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_181831.2' -p410 -sssS'metadata' -p411 -(dp412 -S'variantvalidator_hgvs_version' -p413 -S'1.1.3' -p414 -sS'uta_schema' -p415 -S'uta_20180821' -p416 -sS'seqrepo_db' -p417 -S'2018-08-21' -p418 -sS'variantvalidator_version' -p419 -S'v0.2' -p420 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant279.txt b/VariantValidator/testing/testOutputsMasterITS/variant279.txt deleted file mode 100644 index 9f4a65d8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant279.txt +++ /dev/null @@ -1,421 +0,0 @@ -(dp0 -S'NM_198156.2:c.341-3280_341-3271del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000003.11:g.10188187TGTCCCGATAG>T automapped to NC_000003.11:g.10188191_10188200del' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'VHL' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_937799.1:p.?' -p19 -sS'slr' -p20 -S'NP_937799.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'3-10188187-TGTCCCGATAG-T' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000003.11(NM_198156.2):c.341-3280_341-3271del' -p25 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_198156.2:c.341-3280_341-3271del' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000003.11:g.10188191_10188200del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr3' -p39 -sS'ref' -p40 -S'TGTCCCGATAG' -p41 -sS'pos' -p42 -S'10188187' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000003.12:g.10146507_10146516del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'TGTCCCGATAG' -p50 -sg42 -S'10146503' -p51 -sg44 -g45 -sssS'grch37' -p52 -(dp53 -g34 -S'NC_000003.11:g.10188191_10188200del' -p54 -sg36 -(dp55 -g38 -S'3' -p56 -sg40 -S'TGTCCCGATAG' -p57 -sg42 -S'10188187' -p58 -sg44 -g45 -sssS'grch38' -p59 -(dp60 -g34 -S'NC_000003.12:g.10146507_10146516del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'TGTCCCGATAG' -p63 -sg42 -S'10146503' -p64 -sg44 -g45 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_937799.1' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198156.2' -p70 -sssS'flag' -p71 -S'gene_variant' -p72 -sS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ssS'NM_001354723.1:c.*18-3280_*18-3271del' -p83 -(dp84 -g3 -g4 -sg5 -(lp85 -S'NC_000003.11:g.10188187TGTCCCGATAG>T automapped to NC_000003.11:g.10188191_10188200del' -p86 -aS'RefSeqGene record not available' -p87 -asg9 -g4 -sg10 -(lp88 -sg12 -VHomo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 3, mRNA -p89 -sg14 -S'VHL' -p90 -sg16 -(dp91 -g18 -S'NP_001341652.1:p.?' -p92 -sg20 -S'NP_001341652.1:p.?' -p93 -ssg22 -g23 -sg24 -S'NC_000003.11(NM_001354723.1):c.*18-3280_*18-3271del' -p94 -sg26 -g4 -sg27 -S'NM_001354723.1:c.*18-3280_*18-3271del' -p95 -sg29 -g4 -sg30 -(dp96 -S'hg19' -p97 -(dp98 -g34 -S'NC_000003.11:g.10188191_10188200del' -p99 -sg36 -(dp100 -g38 -g39 -sg40 -S'TGTCCCGATAG' -p101 -sg42 -S'10188187' -p102 -sg44 -g45 -sssg46 -(dp103 -g34 -S'NC_000003.12:g.10146507_10146516del' -p104 -sg36 -(dp105 -g38 -g39 -sg40 -S'TGTCCCGATAG' -p106 -sg42 -S'10146503' -p107 -sg44 -g45 -sssS'grch37' -p108 -(dp109 -g34 -S'NC_000003.11:g.10188191_10188200del' -p110 -sg36 -(dp111 -g38 -g56 -sg40 -S'TGTCCCGATAG' -p112 -sg42 -S'10188187' -p113 -sg44 -g45 -sssS'grch38' -p114 -(dp115 -g34 -S'NC_000003.12:g.10146507_10146516del' -p116 -sg36 -(dp117 -g38 -g56 -sg40 -S'TGTCCCGATAG' -p118 -sg42 -S'10146503' -p119 -sg44 -g45 -ssssg65 -(dp120 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341652.1' -p121 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354723.1' -p122 -sssS'NM_000551.3:c.341-7_343del' -p123 -(dp124 -g3 -g4 -sg5 -(lp125 -S'NC_000003.11:g.10188187TGTCCCGATAG>T automapped to NC_000003.11:g.10188191_10188200del' -p126 -aS'RefSeqGene record not available' -p127 -asg9 -g4 -sg10 -(lp128 -sg12 -VHomo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 1, mRNA -p129 -sg14 -S'VHL' -p130 -sg16 -(dp131 -g18 -S'NP_000542.1:p.?' -p132 -sg20 -S'NP_000542.1:p.?' -p133 -ssg22 -g23 -sg24 -S'NC_000003.11(NM_000551.3):c.341-7_343del' -p134 -sg26 -g4 -sg27 -S'NM_000551.3:c.341-7_343del' -p135 -sg29 -g4 -sg30 -(dp136 -S'hg19' -p137 -(dp138 -g34 -S'NC_000003.11:g.10188191_10188200del' -p139 -sg36 -(dp140 -g38 -g39 -sg40 -S'TGTCCCGATAG' -p141 -sg42 -S'10188187' -p142 -sg44 -g45 -sssg46 -(dp143 -g34 -S'NC_000003.12:g.10146507_10146516del' -p144 -sg36 -(dp145 -g38 -g39 -sg40 -S'TGTCCCGATAG' -p146 -sg42 -S'10146503' -p147 -sg44 -g45 -sssS'grch37' -p148 -(dp149 -g34 -S'NC_000003.11:g.10188191_10188200del' -p150 -sg36 -(dp151 -g38 -g56 -sg40 -S'TGTCCCGATAG' -p152 -sg42 -S'10188187' -p153 -sg44 -g45 -sssS'grch38' -p154 -(dp155 -g34 -S'NC_000003.12:g.10146507_10146516del' -p156 -sg36 -(dp157 -g38 -g56 -sg40 -S'TGTCCCGATAG' -p158 -sg42 -S'10146503' -p159 -sg44 -g45 -ssssg65 -(dp160 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000542.1' -p161 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000551.3' -p162 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant28.txt b/VariantValidator/testing/testOutputsMasterITS/variant28.txt deleted file mode 100644 index 406b2fbf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant28.txt +++ /dev/null @@ -1,23 +0,0 @@ -(dp0 -S'flag' -p1 -NsS'metadata' -p2 -(dp3 -S'variantvalidator_hgvs_version' -p4 -S'1.1.3' -p5 -sS'uta_schema' -p6 -S'uta_20180821' -p7 -sS'seqrepo_db' -p8 -S'2018-08-21' -p9 -sS'variantvalidator_version' -p10 -S'v0.2' -p11 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant280.txt b/VariantValidator/testing/testOutputsMasterITS/variant280.txt deleted file mode 100644 index 7853a80a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant280.txt +++ /dev/null @@ -1,894 +0,0 @@ -(dp0 -S'NM_001005505.2:c.3408A>C' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'CACNA2D2' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001005505.1:p.(Gln1136His)' -p18 -sS'slr' -p19 -S'NP_001005505.1:p.(Q1136H)' -p20 -ssS'submitted_variant' -p21 -S'3-50402127-T-G' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001005505.2:c.3408A>C' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000003.11:g.50402127T>G' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr3' -p37 -sS'ref' -p38 -VT -p39 -sS'pos' -p40 -S'50402127' -p41 -sS'alt' -p42 -VG -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000003.12:g.50364696T>G' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'50364696' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000003.11:g.50402127T>G' -p51 -sg34 -(dp52 -g36 -S'3' -p53 -sg38 -g39 -sg40 -S'50402127' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000003.12:g.50364696T>G' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'50364696' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2' -p65 -sssS'NM_006030.2:c.3402A>C' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'A more recent version of the selected reference sequence NM_006030.2 is available (NM_006030.3)' -p69 -aS'NM_006030.3:c.3402A>C MUST be fully validated prior to use in reports' -p70 -aS'select_variants=NM_006030.3:c.3402A>C' -p71 -aS'RefSeqGene record not available' -p72 -asg8 -g4 -sg9 -(lp73 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 2, mRNA -p74 -sg13 -S'CACNA2D2' -p75 -sg15 -(dp76 -g17 -S'NP_006021.2:p.(Gln1134His)' -p77 -sg19 -S'NP_006021.2:p.(Q1134H)' -p78 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_006030.2:c.3402A>C' -p79 -sg27 -g4 -sg28 -(dp80 -S'hg19' -p81 -(dp82 -g32 -S'NC_000003.11:g.50402127T>G' -p83 -sg34 -(dp84 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p85 -sg42 -g43 -sssS'grch37' -p86 -(dp87 -g32 -S'NC_000003.11:g.50402127T>G' -p88 -sg34 -(dp89 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p90 -sg42 -g43 -ssssg60 -(dp91 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2' -p92 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2' -p93 -sssS'NM_001174051.1:c.3423A>C' -p94 -(dp95 -g3 -g4 -sg5 -(lp96 -S'A more recent version of the selected reference sequence NM_001174051.1 is available (NM_001174051.2)' -p97 -aS'NM_001174051.2:c.3423A>C MUST be fully validated prior to use in reports' -p98 -aS'select_variants=NM_001174051.2:c.3423A>C' -p99 -aS'RefSeqGene record not available' -p100 -asg8 -g4 -sg9 -(lp101 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 3, mRNA -p102 -sg13 -S'CACNA2D2' -p103 -sg15 -(dp104 -g17 -S'NP_001167522.1:p.(Gln1141His)' -p105 -sg19 -S'NP_001167522.1:p.(Q1141H)' -p106 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001174051.1:c.3423A>C' -p107 -sg27 -g4 -sg28 -(dp108 -S'hg19' -p109 -(dp110 -g32 -S'NC_000003.11:g.50402127T>G' -p111 -sg34 -(dp112 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p113 -sg42 -g43 -sssS'grch37' -p114 -(dp115 -g32 -S'NC_000003.11:g.50402127T>G' -p116 -sg34 -(dp117 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p118 -sg42 -g43 -ssssg60 -(dp119 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1' -p120 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1' -p121 -sssS'NM_001174051.2:c.3423A>C' -p122 -(dp123 -g3 -g4 -sg5 -(lp124 -S'RefSeqGene record not available' -p125 -asg8 -g4 -sg9 -(lp126 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 3, mRNA -p127 -sg13 -S'CACNA2D2' -p128 -sg15 -(dp129 -g17 -S'NP_001167522.1:p.(Gln1141His)' -p130 -sg19 -S'NP_001167522.1:p.(Q1141H)' -p131 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001174051.2:c.3423A>C' -p132 -sg27 -g4 -sg28 -(dp133 -S'hg19' -p134 -(dp135 -g32 -S'NC_000003.11:g.50402127T>G' -p136 -sg34 -(dp137 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p138 -sg42 -g43 -sssg44 -(dp139 -g32 -S'NC_000003.12:g.50364696T>G' -p140 -sg34 -(dp141 -g36 -g37 -sg38 -g39 -sg40 -S'50364696' -p142 -sg42 -g43 -sssS'grch37' -p143 -(dp144 -g32 -S'NC_000003.11:g.50402127T>G' -p145 -sg34 -(dp146 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p147 -sg42 -g43 -sssS'grch38' -p148 -(dp149 -g32 -S'NC_000003.12:g.50364696T>G' -p150 -sg34 -(dp151 -g36 -g53 -sg38 -g39 -sg40 -S'50364696' -p152 -sg42 -g43 -ssssg60 -(dp153 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1' -p154 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2' -p155 -sssS'metadata' -p156 -(dp157 -S'variantvalidator_hgvs_version' -p158 -S'1.1.3' -p159 -sS'uta_schema' -p160 -S'uta_20180821' -p161 -sS'seqrepo_db' -p162 -S'2018-08-21' -p163 -sS'variantvalidator_version' -p164 -S'v0.2' -p165 -ssS'NM_006030.3:c.3402A>C' -p166 -(dp167 -g3 -g4 -sg5 -(lp168 -S'RefSeqGene record not available' -p169 -asg8 -g4 -sg9 -(lp170 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 2, mRNA -p171 -sg13 -S'CACNA2D2' -p172 -sg15 -(dp173 -g17 -S'NP_006021.2:p.(Gln1134His)' -p174 -sg19 -S'NP_006021.2:p.(Q1134H)' -p175 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_006030.3:c.3402A>C' -p176 -sg27 -g4 -sg28 -(dp177 -S'hg19' -p178 -(dp179 -g32 -S'NC_000003.11:g.50402127T>G' -p180 -sg34 -(dp181 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p182 -sg42 -g43 -sssg44 -(dp183 -g32 -S'NC_000003.12:g.50364696T>G' -p184 -sg34 -(dp185 -g36 -g37 -sg38 -g39 -sg40 -S'50364696' -p186 -sg42 -g43 -sssS'grch37' -p187 -(dp188 -g32 -S'NC_000003.11:g.50402127T>G' -p189 -sg34 -(dp190 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p191 -sg42 -g43 -sssS'grch38' -p192 -(dp193 -g32 -S'NC_000003.12:g.50364696T>G' -p194 -sg34 -(dp195 -g36 -g53 -sg38 -g39 -sg40 -S'50364696' -p196 -sg42 -g43 -ssssg60 -(dp197 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2' -p198 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3' -p199 -sssS'NM_001291101.1:c.3201A>C' -p200 -(dp201 -g3 -g4 -sg5 -(lp202 -S'RefSeqGene record not available' -p203 -asg8 -g4 -sg9 -(lp204 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 4, mRNA -p205 -sg13 -S'CACNA2D2' -p206 -sg15 -(dp207 -g17 -S'NP_001278030.1:p.(Gln1067His)' -p208 -sg19 -S'NP_001278030.1:p.(Q1067H)' -p209 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001291101.1:c.3201A>C' -p210 -sg27 -g4 -sg28 -(dp211 -S'hg19' -p212 -(dp213 -g32 -S'NC_000003.11:g.50402127T>G' -p214 -sg34 -(dp215 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p216 -sg42 -g43 -sssg44 -(dp217 -g32 -S'NC_000003.12:g.50364696T>G' -p218 -sg34 -(dp219 -g36 -g37 -sg38 -g39 -sg40 -S'50364696' -p220 -sg42 -g43 -sssS'grch37' -p221 -(dp222 -g32 -S'NC_000003.11:g.50402127T>G' -p223 -sg34 -(dp224 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p225 -sg42 -g43 -sssS'grch38' -p226 -(dp227 -g32 -S'NC_000003.12:g.50364696T>G' -p228 -sg34 -(dp229 -g36 -g53 -sg38 -g39 -sg40 -S'50364696' -p230 -sg42 -g43 -ssssg60 -(dp231 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1' -p232 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1' -p233 -sssS'flag' -p234 -S'gene_variant' -p235 -sS'NR_111912.1:n.443-1601T>G' -p236 -(dp237 -g3 -g4 -sg5 -(lp238 -S'RefSeqGene record not available' -p239 -asg8 -g4 -sg9 -(lp240 -sg11 -VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 3, non-coding RNA -p241 -sg13 -S'CYB561D2' -p242 -sg15 -(dp243 -g17 -S'Non-coding :n.' -p244 -sg19 -g244 -ssg21 -g22 -sg23 -S'NC_000003.11(NR_111912.1):c.443-1601T>G' -p245 -sg24 -g4 -sg25 -S'NR_111912.1:n.443-1601T>G' -p246 -sg27 -g4 -sg28 -(dp247 -S'hg19' -p248 -(dp249 -g32 -S'NC_000003.11:g.50402127T>G' -p250 -sg34 -(dp251 -g36 -g37 -sg38 -S'T' -p252 -sg40 -S'50402127' -p253 -sg42 -S'G' -p254 -sssg44 -(dp255 -g32 -S'NC_000003.12:g.50364696T>G' -p256 -sg34 -(dp257 -g36 -g37 -sg38 -g252 -sg40 -S'50364696' -p258 -sg42 -g254 -sssS'grch37' -p259 -(dp260 -g32 -S'NC_000003.11:g.50402127T>G' -p261 -sg34 -(dp262 -g36 -g53 -sg38 -g252 -sg40 -S'50402127' -p263 -sg42 -g254 -sssS'grch38' -p264 -(dp265 -g32 -S'NC_000003.12:g.50364696T>G' -p266 -sg34 -(dp267 -g36 -g53 -sg38 -g252 -sg40 -S'50364696' -p268 -sg42 -g254 -ssssg60 -(dp269 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_111912.1' -p270 -sssS'NM_001005505.1:c.3408A>C' -p271 -(dp272 -g3 -g4 -sg5 -(lp273 -S'A more recent version of the selected reference sequence NM_001005505.1 is available (NM_001005505.2)' -p274 -aS'NM_001005505.2:c.3408A>C MUST be fully validated prior to use in reports' -p275 -aS'select_variants=NM_001005505.2:c.3408A>C' -p276 -aS'RefSeqGene record not available' -p277 -asg8 -g4 -sg9 -(lp278 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 1, mRNA -p279 -sg13 -S'CACNA2D2' -p280 -sg15 -(dp281 -g17 -S'NP_001005505.1:p.(Gln1136His)' -p282 -sg19 -S'NP_001005505.1:p.(Q1136H)' -p283 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001005505.1:c.3408A>C' -p284 -sg27 -g4 -sg28 -(dp285 -S'hg19' -p286 -(dp287 -g32 -S'NC_000003.11:g.50402127T>G' -p288 -sg34 -(dp289 -g36 -g37 -sg38 -g39 -sg40 -S'50402127' -p290 -sg42 -g43 -sssS'grch37' -p291 -(dp292 -g32 -S'NC_000003.11:g.50402127T>G' -p293 -sg34 -(dp294 -g36 -g53 -sg38 -g39 -sg40 -S'50402127' -p295 -sg42 -g43 -ssssg60 -(dp296 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1' -p297 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1' -p298 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant281.txt b/VariantValidator/testing/testOutputsMasterITS/variant281.txt deleted file mode 100644 index 5d933d13..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant281.txt +++ /dev/null @@ -1,1116 +0,0 @@ -(dp0 -S'NR_111913.1:n.126G>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 4, non-coding RNA -p12 -sS'gene_symbol' -p13 -S'CYB561D2' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'Non-coding :n.' -p18 -sS'slr' -p19 -g18 -ssS'submitted_variant' -p20 -S'3-50402890-G-A' -p21 -sS'genome_context_intronic_sequence' -p22 -g4 -sS'hgvs_lrg_variant' -p23 -g4 -sS'hgvs_transcript_variant' -p24 -S'NR_111913.1:n.126G>A' -p25 -sS'hgvs_refseqgene_variant' -p26 -g4 -sS'primary_assembly_loci' -p27 -(dp28 -S'hg19' -p29 -(dp30 -S'hgvs_genomic_description' -p31 -S'NC_000003.11:g.50402890G>A' -p32 -sS'vcf' -p33 -(dp34 -S'chr' -p35 -S'chr3' -p36 -sS'ref' -p37 -S'G' -p38 -sS'pos' -p39 -S'50402890' -p40 -sS'alt' -p41 -S'A' -p42 -sssS'hg38' -p43 -(dp44 -g31 -S'NC_000003.12:g.50365459G>A' -p45 -sg33 -(dp46 -g35 -g36 -sg37 -g38 -sg39 -S'50365459' -p47 -sg41 -g42 -sssS'grch37' -p48 -(dp49 -g31 -S'NC_000003.11:g.50402890G>A' -p50 -sg33 -(dp51 -g35 -S'3' -p52 -sg37 -g38 -sg39 -S'50402890' -p53 -sg41 -g42 -sssS'grch38' -p54 -(dp55 -g31 -S'NC_000003.12:g.50365459G>A' -p56 -sg33 -(dp57 -g35 -g52 -sg37 -g38 -sg39 -S'50365459' -p58 -sg41 -g42 -ssssS'reference_sequence_records' -p59 -(dp60 -S'transcript' -p61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_111913.1' -p62 -sssS'NR_111912.1:n.443-838G>A' -p63 -(dp64 -g3 -g4 -sg5 -(lp65 -S'RefSeqGene record not available' -p66 -asg8 -g4 -sg9 -(lp67 -sg11 -VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 3, non-coding RNA -p68 -sg13 -S'CYB561D2' -p69 -sg15 -(dp70 -g17 -S'Non-coding :n.' -p71 -sg19 -g71 -ssg20 -g21 -sg22 -S'NC_000003.11(NR_111912.1):c.443-838G>A' -p72 -sg23 -g4 -sg24 -S'NR_111912.1:n.443-838G>A' -p73 -sg26 -g4 -sg27 -(dp74 -S'hg19' -p75 -(dp76 -g31 -S'NC_000003.11:g.50402890G>A' -p77 -sg33 -(dp78 -g35 -g36 -sg37 -g38 -sg39 -S'50402890' -p79 -sg41 -g42 -sssg43 -(dp80 -g31 -S'NC_000003.12:g.50365459G>A' -p81 -sg33 -(dp82 -g35 -g36 -sg37 -g38 -sg39 -S'50365459' -p83 -sg41 -g42 -sssS'grch37' -p84 -(dp85 -g31 -S'NC_000003.11:g.50402890G>A' -p86 -sg33 -(dp87 -g35 -g52 -sg37 -g38 -sg39 -S'50402890' -p88 -sg41 -g42 -sssS'grch38' -p89 -(dp90 -g31 -S'NC_000003.12:g.50365459G>A' -p91 -sg33 -(dp92 -g35 -g52 -sg37 -g38 -sg39 -S'50365459' -p93 -sg41 -g42 -ssssg59 -(dp94 -g61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_111912.1' -p95 -sssS'NM_001291101.1:c.2788C>T' -p96 -(dp97 -g3 -g4 -sg5 -(lp98 -S'RefSeqGene record not available' -p99 -asg8 -g4 -sg9 -(lp100 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 4, mRNA -p101 -sg13 -S'CACNA2D2' -p102 -sg15 -(dp103 -g17 -S'NP_001278030.1:p.(Pro930Ser)' -p104 -sg19 -S'NP_001278030.1:p.(P930S)' -p105 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001291101.1:c.2788C>T' -p106 -sg26 -g4 -sg27 -(dp107 -S'hg19' -p108 -(dp109 -g31 -S'NC_000003.11:g.50402890G>A' -p110 -sg33 -(dp111 -g35 -g36 -sg37 -VG -p112 -sg39 -S'50402890' -p113 -sg41 -VA -p114 -sssg43 -(dp115 -g31 -S'NC_000003.12:g.50365459G>A' -p116 -sg33 -(dp117 -g35 -g36 -sg37 -g112 -sg39 -S'50365459' -p118 -sg41 -g114 -sssS'grch37' -p119 -(dp120 -g31 -S'NC_000003.11:g.50402890G>A' -p121 -sg33 -(dp122 -g35 -g52 -sg37 -g112 -sg39 -S'50402890' -p123 -sg41 -g114 -sssS'grch38' -p124 -(dp125 -g31 -S'NC_000003.12:g.50365459G>A' -p126 -sg33 -(dp127 -g35 -g52 -sg37 -g112 -sg39 -S'50365459' -p128 -sg41 -g114 -ssssg59 -(dp129 -S'protein' -p130 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1' -p131 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1' -p132 -sssS'NM_006030.2:c.2995C>T' -p133 -(dp134 -g3 -g4 -sg5 -(lp135 -S'A more recent version of the selected reference sequence NM_006030.2 is available (NM_006030.3)' -p136 -aS'NM_006030.3:c.2995C>T MUST be fully validated prior to use in reports' -p137 -aS'select_variants=NM_006030.3:c.2995C>T' -p138 -aS'RefSeqGene record not available' -p139 -asg8 -g4 -sg9 -(lp140 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 2, mRNA -p141 -sg13 -S'CACNA2D2' -p142 -sg15 -(dp143 -g17 -S'NP_006021.2:p.(Pro999Ser)' -p144 -sg19 -S'NP_006021.2:p.(P999S)' -p145 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_006030.2:c.2995C>T' -p146 -sg26 -g4 -sg27 -(dp147 -S'hg19' -p148 -(dp149 -g31 -S'NC_000003.11:g.50402890G>A' -p150 -sg33 -(dp151 -g35 -g36 -sg37 -g112 -sg39 -S'50402890' -p152 -sg41 -g114 -sssS'grch37' -p153 -(dp154 -g31 -S'NC_000003.11:g.50402890G>A' -p155 -sg33 -(dp156 -g35 -g52 -sg37 -g112 -sg39 -S'50402890' -p157 -sg41 -g114 -ssssg59 -(dp158 -g130 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2' -p159 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2' -p160 -sssS'NR_111914.1:n.126G>A' -p161 -(dp162 -g3 -g4 -sg5 -(lp163 -S'RefSeqGene record not available' -p164 -asg8 -g4 -sg9 -(lp165 -sg11 -VHomo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 5, non-coding RNA -p166 -sg13 -S'CYB561D2' -p167 -sg15 -(dp168 -g17 -S'Non-coding :n.' -p169 -sg19 -g169 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NR_111914.1:n.126G>A' -p170 -sg26 -g4 -sg27 -(dp171 -S'hg19' -p172 -(dp173 -g31 -S'NC_000003.11:g.50402890G>A' -p174 -sg33 -(dp175 -g35 -g36 -sg37 -g38 -sg39 -S'50402890' -p176 -sg41 -g42 -sssg43 -(dp177 -g31 -S'NC_000003.12:g.50365459G>A' -p178 -sg33 -(dp179 -g35 -g36 -sg37 -g38 -sg39 -S'50365459' -p180 -sg41 -g42 -sssS'grch37' -p181 -(dp182 -g31 -S'NC_000003.11:g.50402890G>A' -p183 -sg33 -(dp184 -g35 -g52 -sg37 -g38 -sg39 -S'50402890' -p185 -sg41 -g42 -sssS'grch38' -p186 -(dp187 -g31 -S'NC_000003.12:g.50365459G>A' -p188 -sg33 -(dp189 -g35 -g52 -sg37 -g38 -sg39 -S'50365459' -p190 -sg41 -g42 -ssssg59 -(dp191 -g61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_111914.1' -p192 -sssS'NM_001005505.2:c.2995C>T' -p193 -(dp194 -g3 -g4 -sg5 -(lp195 -S'RefSeqGene record not available' -p196 -asg8 -g4 -sg9 -(lp197 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 1, mRNA -p198 -sg13 -S'CACNA2D2' -p199 -sg15 -(dp200 -g17 -S'NP_001005505.1:p.(Pro999Ser)' -p201 -sg19 -S'NP_001005505.1:p.(P999S)' -p202 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001005505.2:c.2995C>T' -p203 -sg26 -g4 -sg27 -(dp204 -S'hg19' -p205 -(dp206 -g31 -S'NC_000003.11:g.50402890G>A' -p207 -sg33 -(dp208 -g35 -g36 -sg37 -g112 -sg39 -S'50402890' -p209 -sg41 -g114 -sssg43 -(dp210 -g31 -S'NC_000003.12:g.50365459G>A' -p211 -sg33 -(dp212 -g35 -g36 -sg37 -g112 -sg39 -S'50365459' -p213 -sg41 -g114 -sssS'grch37' -p214 -(dp215 -g31 -S'NC_000003.11:g.50402890G>A' -p216 -sg33 -(dp217 -g35 -g52 -sg37 -g112 -sg39 -S'50402890' -p218 -sg41 -g114 -sssS'grch38' -p219 -(dp220 -g31 -S'NC_000003.12:g.50365459G>A' -p221 -sg33 -(dp222 -g35 -g52 -sg37 -g112 -sg39 -S'50365459' -p223 -sg41 -g114 -ssssg59 -(dp224 -g130 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1' -p225 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2' -p226 -sssS'flag' -p227 -S'gene_variant' -p228 -sS'NM_001174051.1:c.3016C>T' -p229 -(dp230 -g3 -g4 -sg5 -(lp231 -S'A more recent version of the selected reference sequence NM_001174051.1 is available (NM_001174051.2)' -p232 -aS'NM_001174051.2:c.3016C>T MUST be fully validated prior to use in reports' -p233 -aS'select_variants=NM_001174051.2:c.3016C>T' -p234 -aS'RefSeqGene record not available' -p235 -asg8 -g4 -sg9 -(lp236 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 3, mRNA -p237 -sg13 -S'CACNA2D2' -p238 -sg15 -(dp239 -g17 -S'NP_001167522.1:p.(Pro1006Ser)' -p240 -sg19 -S'NP_001167522.1:p.(P1006S)' -p241 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001174051.1:c.3016C>T' -p242 -sg26 -g4 -sg27 -(dp243 -S'hg19' -p244 -(dp245 -g31 -S'NC_000003.11:g.50402890G>A' -p246 -sg33 -(dp247 -g35 -g36 -sg37 -g112 -sg39 -S'50402890' -p248 -sg41 -g114 -sssS'grch37' -p249 -(dp250 -g31 -S'NC_000003.11:g.50402890G>A' -p251 -sg33 -(dp252 -g35 -g52 -sg37 -g112 -sg39 -S'50402890' -p253 -sg41 -g114 -ssssg59 -(dp254 -g130 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1' -p255 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1' -p256 -sssS'NM_001174051.2:c.3016C>T' -p257 -(dp258 -g3 -g4 -sg5 -(lp259 -S'RefSeqGene record not available' -p260 -asg8 -g4 -sg9 -(lp261 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 3, mRNA -p262 -sg13 -S'CACNA2D2' -p263 -sg15 -(dp264 -g17 -S'NP_001167522.1:p.(Pro1006Ser)' -p265 -sg19 -S'NP_001167522.1:p.(P1006S)' -p266 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001174051.2:c.3016C>T' -p267 -sg26 -g4 -sg27 -(dp268 -S'hg19' -p269 -(dp270 -g31 -S'NC_000003.11:g.50402890G>A' -p271 -sg33 -(dp272 -g35 -g36 -sg37 -g112 -sg39 -S'50402890' -p273 -sg41 -g114 -sssg43 -(dp274 -g31 -S'NC_000003.12:g.50365459G>A' -p275 -sg33 -(dp276 -g35 -g36 -sg37 -g112 -sg39 -S'50365459' -p277 -sg41 -g114 -sssS'grch37' -p278 -(dp279 -g31 -S'NC_000003.11:g.50402890G>A' -p280 -sg33 -(dp281 -g35 -g52 -sg37 -g112 -sg39 -S'50402890' -p282 -sg41 -g114 -sssS'grch38' -p283 -(dp284 -g31 -S'NC_000003.12:g.50365459G>A' -p285 -sg33 -(dp286 -g35 -g52 -sg37 -g112 -sg39 -S'50365459' -p287 -sg41 -g114 -ssssg59 -(dp288 -g130 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1' -p289 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2' -p290 -sssS'NM_006030.3:c.2995C>T' -p291 -(dp292 -g3 -g4 -sg5 -(lp293 -S'RefSeqGene record not available' -p294 -asg8 -g4 -sg9 -(lp295 -sg11 -VHomo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 2, mRNA -p296 -sg13 -S'CACNA2D2' -p297 -sg15 -(dp298 -g17 -S'NP_006021.2:p.(Pro999Ser)' -p299 -sg19 -S'NP_006021.2:p.(P999S)' -p300 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_006030.3:c.2995C>T' -p301 -sg26 -g4 -sg27 -(dp302 -S'hg19' -p303 -(dp304 -g31 -S'NC_000003.11:g.50402890G>A' -p305 -sg33 -(dp306 -g35 -g36 -sg37 -g112 -sg39 -S'50402890' -p307 -sg41 -g114 -sssg43 -(dp308 -g31 -S'NC_000003.12:g.50365459G>A' -p309 -sg33 -(dp310 -g35 -g36 -sg37 -g112 -sg39 -S'50365459' -p311 -sg41 -g114 -sssS'grch37' -p312 -(dp313 -g31 -S'NC_000003.11:g.50402890G>A' -p314 -sg33 -(dp315 -g35 -g52 -sg37 -g112 -sg39 -S'50402890' -p316 -sg41 -g114 -sssS'grch38' -p317 -(dp318 -g31 -S'NC_000003.12:g.50365459G>A' -p319 -sg33 -(dp320 -g35 -g52 -sg37 -g112 -sg39 -S'50365459' -p321 -sg41 -g114 -ssssg59 -(dp322 -g130 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2' -p323 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3' -p324 -sssS'NM_001005505.1:c.2995C>T' -p325 -(dp326 -g3 -g4 -sg5 -(lp327 -S'A more recent version of the selected reference sequence NM_001005505.1 is available (NM_001005505.2)' -p328 -aS'NM_001005505.2:c.2995C>T MUST be fully validated prior to use in reports' -p329 -aS'select_variants=NM_001005505.2:c.2995C>T' -p330 -aS'RefSeqGene record not available' -p331 -asg8 -g4 -sg9 -(lp332 -sg11 -VHomo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 1, mRNA -p333 -sg13 -S'CACNA2D2' -p334 -sg15 -(dp335 -g17 -S'NP_001005505.1:p.(Pro999Ser)' -p336 -sg19 -S'NP_001005505.1:p.(P999S)' -p337 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001005505.1:c.2995C>T' -p338 -sg26 -g4 -sg27 -(dp339 -S'hg19' -p340 -(dp341 -g31 -S'NC_000003.11:g.50402890G>A' -p342 -sg33 -(dp343 -g35 -g36 -sg37 -g112 -sg39 -S'50402890' -p344 -sg41 -g114 -sssS'grch37' -p345 -(dp346 -g31 -S'NC_000003.11:g.50402890G>A' -p347 -sg33 -(dp348 -g35 -g52 -sg37 -g112 -sg39 -S'50402890' -p349 -sg41 -g114 -ssssg59 -(dp350 -g130 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1' -p351 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1' -p352 -sssS'metadata' -p353 -(dp354 -S'variantvalidator_hgvs_version' -p355 -S'1.1.3' -p356 -sS'uta_schema' -p357 -S'uta_20180821' -p358 -sS'seqrepo_db' -p359 -S'2018-08-21' -p360 -sS'variantvalidator_version' -p361 -S'v0.2' -p362 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant282.txt b/VariantValidator/testing/testOutputsMasterITS/variant282.txt deleted file mode 100644 index bfa4b9ad..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant282.txt +++ /dev/null @@ -1,553 +0,0 @@ -(dp0 -S'NM_007159.4:c.1135+565del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens sarcolemma associated protein (SLMAP), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'SLMAP' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_009090.2:p.?' -p19 -sS'slr' -p20 -S'NP_009090.2:p.?' -p21 -ssS'submitted_variant' -p22 -S'3-57851007-AG-A' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000003.11(NM_007159.4):c.1135+565del' -p25 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_007159.4:c.1135+565del' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000003.11:g.57851008del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr3' -p39 -sS'ref' -p40 -S'AG' -p41 -sS'pos' -p42 -S'57851007' -p43 -sS'alt' -p44 -S'A' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000003.12:g.57865281del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'AG' -p50 -sg42 -S'57865280' -p51 -sg44 -g45 -sssS'grch37' -p52 -(dp53 -g34 -S'NC_000003.11:g.57851008del' -p54 -sg36 -(dp55 -g38 -S'3' -p56 -sg40 -S'AG' -p57 -sg42 -S'57851007' -p58 -sg44 -g45 -sssS'grch38' -p59 -(dp60 -g34 -S'NC_000003.12:g.57865281del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'AG' -p63 -sg42 -S'57865280' -p64 -sg44 -g45 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.4' -p70 -sssS'' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p74 -aS'Unable to assign transcript identity records to NM_001304421.1, potentially an obsolete record :' -p75 -asg9 -g4 -sg10 -(lp76 -sg12 -g4 -sg14 -g4 -sg16 -(dp77 -g18 -g4 -sg20 -g4 -ssg22 -g23 -sg24 -g4 -sg26 -g4 -sg27 -g4 -sg29 -g4 -sg30 -(dp78 -sg65 -g4 -ssS'NM_001304421.2:c.1135+565del' -p79 -(dp80 -g3 -g4 -sg5 -(lp81 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p82 -aS'RefSeqGene record not available' -p83 -asg9 -g4 -sg10 -(lp84 -sg12 -VHomo sapiens sarcolemma associated protein (SLMAP), transcript variant 3, mRNA -p85 -sg14 -S'SLMAP' -p86 -sg16 -(dp87 -g18 -S'NP_001291350.1:p.?' -p88 -sg20 -S'NP_001291350.1:p.?' -p89 -ssg22 -g23 -sg24 -S'NC_000003.11(NM_001304421.2):c.1135+565del' -p90 -sg26 -g4 -sg27 -S'NM_001304421.2:c.1135+565del' -p91 -sg29 -g4 -sg30 -(dp92 -S'hg19' -p93 -(dp94 -g34 -S'NC_000003.11:g.57851008del' -p95 -sg36 -(dp96 -g38 -g39 -sg40 -S'AG' -p97 -sg42 -S'57851007' -p98 -sg44 -g45 -sssg46 -(dp99 -g34 -S'NC_000003.12:g.57865281del' -p100 -sg36 -(dp101 -g38 -g39 -sg40 -S'AG' -p102 -sg42 -S'57865280' -p103 -sg44 -g45 -sssS'grch37' -p104 -(dp105 -g34 -S'NC_000003.11:g.57851008del' -p106 -sg36 -(dp107 -g38 -g56 -sg40 -S'AG' -p108 -sg42 -S'57851007' -p109 -sg44 -g45 -sssS'grch38' -p110 -(dp111 -g34 -S'NC_000003.12:g.57865281del' -p112 -sg36 -(dp113 -g38 -g56 -sg40 -S'AG' -p114 -sg42 -S'57865280' -p115 -sg44 -g45 -ssssg65 -(dp116 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291350.1' -p117 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304421.2' -p118 -sssS'NM_001304420.2:c.1186+424del' -p119 -(dp120 -g3 -g4 -sg5 -(lp121 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p122 -aS'RefSeqGene record not available' -p123 -asg9 -g4 -sg10 -(lp124 -sg12 -VHomo sapiens sarcolemma associated protein (SLMAP), transcript variant 1, mRNA -p125 -sg14 -S'SLMAP' -p126 -sg16 -(dp127 -g18 -S'NP_001291349.1:p.?' -p128 -sg20 -S'NP_001291349.1:p.?' -p129 -ssg22 -g23 -sg24 -S'NC_000003.11(NM_001304420.2):c.1186+424del' -p130 -sg26 -g4 -sg27 -S'NM_001304420.2:c.1186+424del' -p131 -sg29 -g4 -sg30 -(dp132 -S'hg19' -p133 -(dp134 -g34 -S'NC_000003.11:g.57851008del' -p135 -sg36 -(dp136 -g38 -g39 -sg40 -S'AG' -p137 -sg42 -S'57851007' -p138 -sg44 -g45 -sssg46 -(dp139 -g34 -S'NC_000003.12:g.57865281del' -p140 -sg36 -(dp141 -g38 -g39 -sg40 -S'AG' -p142 -sg42 -S'57865280' -p143 -sg44 -g45 -sssS'grch37' -p144 -(dp145 -g34 -S'NC_000003.11:g.57851008del' -p146 -sg36 -(dp147 -g38 -g56 -sg40 -S'AG' -p148 -sg42 -S'57851007' -p149 -sg44 -g45 -sssS'grch38' -p150 -(dp151 -g34 -S'NC_000003.12:g.57865281del' -p152 -sg36 -(dp153 -g38 -g56 -sg40 -S'AG' -p154 -sg42 -S'57865280' -p155 -sg44 -g45 -ssssg65 -(dp156 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291349.1' -p157 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304420.2' -p158 -sssS'flag' -p159 -S'gene_variant' -p160 -sS'NM_007159.2:c.1135+565del' -p161 -(dp162 -g3 -g4 -sg5 -(lp163 -S'NC_000003.11:g.57851007AG>A automapped to NC_000003.11:g.57851008delG' -p164 -aS'A more recent version of the selected reference sequence NM_007159.2 is available (NM_007159.4)' -p165 -aS'NM_007159.4:c.1135+565delG MUST be fully validated prior to use in reports' -p166 -aS'select_variants=NM_007159.4:c.1135+565del' -p167 -aS'RefSeqGene record not available' -p168 -asg9 -g4 -sg10 -(lp169 -sg12 -VHomo sapiens sarcolemma associated protein (SLMAP), mRNA -p170 -sg14 -S'SLMAP' -p171 -sg16 -(dp172 -g18 -S'NP_009090.2:p.?' -p173 -sg20 -S'NP_009090.2:p.?' -p174 -ssg22 -g23 -sg24 -S'NC_000003.11(NM_007159.2):c.1135+565del' -p175 -sg26 -g4 -sg27 -S'NM_007159.2:c.1135+565del' -p176 -sg29 -g4 -sg30 -(dp177 -S'hg19' -p178 -(dp179 -g34 -S'NC_000003.11:g.57851008del' -p180 -sg36 -(dp181 -g38 -g39 -sg40 -S'AG' -p182 -sg42 -S'57851007' -p183 -sg44 -g45 -sssS'grch37' -p184 -(dp185 -g34 -S'NC_000003.11:g.57851008del' -p186 -sg36 -(dp187 -g38 -g56 -sg40 -S'AG' -p188 -sg42 -S'57851007' -p189 -sg44 -g45 -ssssg65 -(dp190 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2' -p191 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.2' -p192 -sssS'metadata' -p193 -(dp194 -S'variantvalidator_hgvs_version' -p195 -S'1.1.3' -p196 -sS'uta_schema' -p197 -S'uta_20180821' -p198 -sS'seqrepo_db' -p199 -S'2018-08-21' -p200 -sS'variantvalidator_version' -p201 -S'v0.2' -p202 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant283.txt b/VariantValidator/testing/testOutputsMasterITS/variant283.txt deleted file mode 100644 index 3416aa30..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant283.txt +++ /dev/null @@ -1,286 +0,0 @@ -(dp0 -S'NM_001178065.1:c.3061C=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens calcium sensing receptor (CASR), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'CASR' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001171536.1:p.(Gln1021=)' -p18 -sS'slr' -p19 -S'NP_001171536.1:p.(Q1021=)' -p20 -ssS'submitted_variant' -p21 -S'3-122003832-G-C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001178065.1:c.3061C=' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000003.11:g.122003832G>C' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr3' -p37 -sS'ref' -p38 -S'G' -p39 -sS'pos' -p40 -S'122003832' -p41 -sS'alt' -p42 -S'C' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000003.12:g.122284985G>C' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'122284985' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000003.11:g.122003832G>C' -p51 -sg34 -(dp52 -g36 -S'3' -p53 -sg38 -g39 -sg40 -S'122003832' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000003.12:g.122284985G>C' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'122284985' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001171536.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001178065.1' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'NM_000388.3:c.3031C=' -p68 -(dp69 -g3 -g4 -sg5 -(lp70 -S'RefSeqGene record not available' -p71 -asg8 -g4 -sg9 -(lp72 -sg11 -VHomo sapiens calcium sensing receptor (CASR), transcript variant 2, mRNA -p73 -sg13 -S'CASR' -p74 -sg15 -(dp75 -g17 -S'NP_000379.2:p.(Gln1011=)' -p76 -sg19 -S'NP_000379.2:p.(Q1011=)' -p77 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_000388.3:c.3031C=' -p78 -sg27 -g4 -sg28 -(dp79 -S'hg19' -p80 -(dp81 -g32 -S'NC_000003.11:g.122003832G>C' -p82 -sg34 -(dp83 -g36 -g37 -sg38 -g39 -sg40 -S'122003832' -p84 -sg42 -g43 -sssg44 -(dp85 -g32 -S'NC_000003.12:g.122284985G>C' -p86 -sg34 -(dp87 -g36 -g37 -sg38 -g39 -sg40 -S'122284985' -p88 -sg42 -g43 -sssS'grch37' -p89 -(dp90 -g32 -S'NC_000003.11:g.122003832G>C' -p91 -sg34 -(dp92 -g36 -g53 -sg38 -g39 -sg40 -S'122003832' -p93 -sg42 -g43 -sssS'grch38' -p94 -(dp95 -g32 -S'NC_000003.12:g.122284985G>C' -p96 -sg34 -(dp97 -g36 -g53 -sg38 -g39 -sg40 -S'122284985' -p98 -sg42 -g43 -ssssg60 -(dp99 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000379.2' -p100 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000388.3' -p101 -sssS'metadata' -p102 -(dp103 -S'variantvalidator_hgvs_version' -p104 -S'1.1.3' -p105 -sS'uta_schema' -p106 -S'uta_20180821' -p107 -sS'seqrepo_db' -p108 -S'2018-08-21' -p109 -sS'variantvalidator_version' -p110 -S'v0.2' -p111 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant284.txt b/VariantValidator/testing/testOutputsMasterITS/variant284.txt deleted file mode 100644 index bd3204c6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant284.txt +++ /dev/null @@ -1,510 +0,0 @@ -(dp0 -S'NM_001349798.1:c.45_46insCCT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' -p7 -aS'A more recent version of the selected reference sequence NM_001349798.1 is available (NM_001349798.2)' -p8 -aS'NM_001349798.2:c.45_46insCCT MUST be fully validated prior to use in reports' -p9 -aS'select_variants=NM_001349798.2:c.45_46insCCT' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 5, mRNA -p16 -sS'gene_symbol' -p17 -S'FBXW7' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_361014.1:p.(Thr15_Gly16insPro)' -p22 -sS'slr' -p23 -S'NP_361014.1:p.(T15_G16insP)' -p24 -ssS'submitted_variant' -p25 -S'4-153332910-C-CAGG' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_001349798.1:c.45_46insCCT' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000004.11:g.153332910_153332911insAGG' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr4' -p41 -sS'ref' -p42 -S'C' -p43 -sS'pos' -p44 -S'153332910' -p45 -sS'alt' -p46 -VCAGG -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000004.12:g.152411758_152411759insAGG' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'152411758' -p52 -sg46 -VCAGG -p53 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000004.11:g.153332910_153332911insAGG' -p56 -sg38 -(dp57 -g40 -S'4' -p58 -sg42 -g43 -sg44 -S'153332910' -p59 -sg46 -VCAGG -p60 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000004.12:g.152411758_152411759insAGG' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'152411758' -p65 -sg46 -VCAGG -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.1' -p72 -sssS'NM_033632.3:c.45_46insCCT' -p73 -(dp74 -g3 -g4 -sg5 -(lp75 -S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' -p76 -aS'RefSeqGene record not available' -p77 -asg12 -g4 -sg13 -(lp78 -sg15 -VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 1, mRNA -p79 -sg17 -S'FBXW7' -p80 -sg19 -(dp81 -g21 -S'NP_361014.1:p.(Thr15_Gly16insPro)' -p82 -sg23 -S'NP_361014.1:p.(T15_G16insP)' -p83 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_033632.3:c.45_46insCCT' -p84 -sg31 -g4 -sg32 -(dp85 -S'hg19' -p86 -(dp87 -g36 -S'NC_000004.11:g.153332910_153332911insAGG' -p88 -sg38 -(dp89 -g40 -g41 -sg42 -g43 -sg44 -S'153332910' -p90 -sg46 -VCAGG -p91 -sssg48 -(dp92 -g36 -S'NC_000004.12:g.152411758_152411759insAGG' -p93 -sg38 -(dp94 -g40 -g41 -sg42 -g43 -sg44 -S'152411758' -p95 -sg46 -VCAGG -p96 -sssS'grch37' -p97 -(dp98 -g36 -S'NC_000004.11:g.153332910_153332911insAGG' -p99 -sg38 -(dp100 -g40 -g58 -sg42 -g43 -sg44 -S'153332910' -p101 -sg46 -VCAGG -p102 -sssS'grch38' -p103 -(dp104 -g36 -S'NC_000004.12:g.152411758_152411759insAGG' -p105 -sg38 -(dp106 -g40 -g58 -sg42 -g43 -sg44 -S'152411758' -p107 -sg46 -VCAGG -p108 -ssssg67 -(dp109 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1' -p110 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033632.3' -p111 -sssS'NM_001257069.1:c.45_46insCCT' -p112 -(dp113 -g3 -g4 -sg5 -(lp114 -S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' -p115 -aS'RefSeqGene record not available' -p116 -asg12 -g4 -sg13 -(lp117 -sg15 -VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 4, mRNA -p118 -sg17 -S'FBXW7' -p119 -sg19 -(dp120 -g21 -S'NP_001243998.1:p.(Thr15_Gly16insPro)' -p121 -sg23 -S'NP_001243998.1:p.(T15_G16insP)' -p122 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001257069.1:c.45_46insCCT' -p123 -sg31 -g4 -sg32 -(dp124 -S'hg19' -p125 -(dp126 -g36 -S'NC_000004.11:g.153332910_153332911insAGG' -p127 -sg38 -(dp128 -g40 -g41 -sg42 -g43 -sg44 -S'153332910' -p129 -sg46 -VCAGG -p130 -sssg48 -(dp131 -g36 -S'NC_000004.12:g.152411758_152411759insAGG' -p132 -sg38 -(dp133 -g40 -g41 -sg42 -g43 -sg44 -S'152411758' -p134 -sg46 -VCAGG -p135 -sssS'grch37' -p136 -(dp137 -g36 -S'NC_000004.11:g.153332910_153332911insAGG' -p138 -sg38 -(dp139 -g40 -g58 -sg42 -g43 -sg44 -S'153332910' -p140 -sg46 -VCAGG -p141 -sssS'grch38' -p142 -(dp143 -g36 -S'NC_000004.12:g.152411758_152411759insAGG' -p144 -sg38 -(dp145 -g40 -g58 -sg42 -g43 -sg44 -S'152411758' -p146 -sg46 -VCAGG -p147 -ssssg67 -(dp148 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243998.1' -p149 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257069.1' -p150 -sssS'flag' -p151 -S'gene_variant' -p152 -sS'NM_001349798.2:c.45_46insCCT' -p153 -(dp154 -g3 -g4 -sg5 -(lp155 -S'NC_000004.11:g.153332910C>CAGG automapped to NC_000004.11:g.153332912_153332913insGAG' -p156 -aS'RefSeqGene record not available' -p157 -asg12 -g4 -sg13 -(lp158 -sg15 -VHomo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 5, mRNA -p159 -sg17 -S'FBXW7' -p160 -sg19 -(dp161 -g21 -S'NP_001336727.1:p.(Thr15_Gly16insPro)' -p162 -sg23 -S'NP_001336727.1:p.(T15_G16insP)' -p163 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001349798.2:c.45_46insCCT' -p164 -sg31 -g4 -sg32 -(dp165 -S'hg19' -p166 -(dp167 -g36 -S'NC_000004.11:g.153332910_153332911insAGG' -p168 -sg38 -(dp169 -g40 -g41 -sg42 -g43 -sg44 -S'153332910' -p170 -sg46 -VCAGG -p171 -sssS'grch37' -p172 -(dp173 -g36 -S'NC_000004.11:g.153332910_153332911insAGG' -p174 -sg38 -(dp175 -g40 -g58 -sg42 -g43 -sg44 -S'153332910' -p176 -sg46 -VCAGG -p177 -ssssg67 -(dp178 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001336727.1' -p179 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.2' -p180 -sssS'metadata' -p181 -(dp182 -S'variantvalidator_hgvs_version' -p183 -S'1.1.3' -p184 -sS'uta_schema' -p185 -S'uta_20180821' -p186 -sS'seqrepo_db' -p187 -S'2018-08-21' -p188 -sS'variantvalidator_version' -p189 -S'v0.2' -p190 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant285.txt b/VariantValidator/testing/testOutputsMasterITS/variant285.txt deleted file mode 100644 index 345bc094..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant285.txt +++ /dev/null @@ -1,156 +0,0 @@ -(dp0 -S'flag' -p1 -S'intergenic' -p2 -sS'Intergenic_Variant_1' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'No transcripts found that fully overlap the described variation in the genomic sequence' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -g6 -sS'gene_symbol' -p14 -g6 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -g6 -sS'slr' -p18 -g6 -ssS'submitted_variant' -p19 -S'5-1295183-G-A' -p20 -sS'genome_context_intronic_sequence' -p21 -g6 -sS'hgvs_lrg_variant' -p22 -g6 -sS'hgvs_transcript_variant' -p23 -g6 -sS'hgvs_refseqgene_variant' -p24 -g6 -sS'primary_assembly_loci' -p25 -(dp26 -S'hg19' -p27 -(dp28 -S'hgvs_genomic_description' -p29 -VNC_000005.9:g.1295183G>A -p30 -sS'vcf' -p31 -(dp32 -S'chr' -p33 -S'chr5' -p34 -sS'ref' -p35 -S'G' -p36 -sS'pos' -p37 -S'1295183' -p38 -sS'alt' -p39 -S'A' -p40 -sssS'grch37' -p41 -(dp42 -g29 -VNC_000005.9:g.1295183G>A -p43 -sg31 -(dp44 -g33 -S'5' -p45 -sg35 -g36 -sg37 -g38 -sg39 -g40 -sssS'hg38' -p46 -(dp47 -g29 -VNC_000005.10:g.1295068G>A -p48 -sg31 -(dp49 -g33 -g34 -sg35 -g36 -sg37 -S'1295068' -p50 -sg39 -g40 -sssS'grch38' -p51 -(dp52 -g29 -VNC_000005.10:g.1295068G>A -p53 -sg31 -(dp54 -g33 -g45 -sg35 -g36 -sg37 -g50 -sg39 -g40 -ssssS'reference_sequence_records' -p55 -g6 -ssS'metadata' -p56 -(dp57 -S'variantvalidator_hgvs_version' -p58 -S'1.1.3' -p59 -sS'uta_schema' -p60 -S'uta_20180821' -p61 -sS'seqrepo_db' -p62 -S'2018-08-21' -p63 -sS'variantvalidator_version' -p64 -S'v0.2' -p65 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant286.txt b/VariantValidator/testing/testOutputsMasterITS/variant286.txt deleted file mode 100644 index ebc21b1a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant286.txt +++ /dev/null @@ -1,389 +0,0 @@ -(dp0 -S'NM_003664.4:c.2409_2411del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000005.9:g.77396835TTTC>T automapped to NC_000005.9:g.77396838_77396840delCTT' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens adaptor related protein complex 3 subunit beta 1 (AP3B1), transcript variant 1, mRNA -p13 -sS'gene_symbol' -p14 -S'AP3B1' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_003655.3:p.(Lys804del)' -p19 -sS'slr' -p20 -S'NP_003655.3:p.(K804del)' -p21 -ssS'submitted_variant' -p22 -S'5-77396835-TTTC-T' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_003664.4:c.2409_2411del' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'grch38' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000005.10:g.78101012_78101014del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'5' -p38 -sS'ref' -p39 -S'TTTC' -p40 -sS'pos' -p41 -S'78101011' -p42 -sS'alt' -p43 -S'T' -p44 -sssS'grch37' -p45 -(dp46 -g33 -S'NC_000005.9:g.77396836_77396838del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TTTC' -p49 -sg41 -S'77396835' -p50 -sg43 -g44 -sssS'hg38' -p51 -(dp52 -g33 -S'NC_000005.10:g.78101012_78101014del' -p53 -sg35 -(dp54 -g37 -S'chr5' -p55 -sg39 -S'TTTC' -p56 -sg41 -S'78101011' -p57 -sg43 -g44 -sssS'hg19' -p58 -(dp59 -g33 -S'NC_000005.9:g.77396836_77396838del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'TTTC' -p62 -sg41 -S'77396835' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.4' -p69 -sssS'flag' -p70 -S'gene_variant' -p71 -sS'NM_003664.3:c.2409_2411del' -p72 -(dp73 -g3 -g4 -sg5 -(lp74 -S'NC_000005.9:g.77396835TTTC>T automapped to NC_000005.9:g.77396838_77396840delCTT' -p75 -aS'A more recent version of the selected reference sequence NM_003664.3 is available (NM_003664.4)' -p76 -aS'NM_003664.4:c.2409_2411delGAA MUST be fully validated prior to use in reports' -p77 -aS'select_variants=NM_003664.4:c.2409_2411del' -p78 -aS'RefSeqGene record not available' -p79 -asg9 -g4 -sg10 -(lp80 -sg12 -VHomo sapiens adaptor-related protein complex 3, beta 1 subunit (AP3B1), mRNA -p81 -sg14 -S'AP3B1' -p82 -sg16 -(dp83 -g18 -S'NP_003655.3:p.(Lys804del)' -p84 -sg20 -S'NP_003655.3:p.(K804del)' -p85 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_003664.3:c.2409_2411del' -p86 -sg28 -g4 -sg29 -(dp87 -S'hg19' -p88 -(dp89 -g33 -S'NC_000005.9:g.77396836_77396838del' -p90 -sg35 -(dp91 -g37 -g55 -sg39 -S'TTTC' -p92 -sg41 -S'77396835' -p93 -sg43 -g44 -sssS'grch37' -p94 -(dp95 -g33 -S'NC_000005.9:g.77396836_77396838del' -p96 -sg35 -(dp97 -g37 -g38 -sg39 -S'TTTC' -p98 -sg41 -S'77396835' -p99 -sg43 -g44 -ssssg64 -(dp100 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3' -p101 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.3' -p102 -sssS'NM_001271769.1:c.2262_2264del' -p103 -(dp104 -g3 -g4 -sg5 -(lp105 -S'NC_000005.9:g.77396835TTTC>T automapped to NC_000005.9:g.77396838_77396840delCTT' -p106 -aS'RefSeqGene record not available' -p107 -asg9 -g4 -sg10 -(lp108 -sg12 -VHomo sapiens adaptor related protein complex 3 subunit beta 1 (AP3B1), transcript variant 2, mRNA -p109 -sg14 -S'AP3B1' -p110 -sg16 -(dp111 -g18 -S'NP_001258698.1:p.(Lys755del)' -p112 -sg20 -S'NP_001258698.1:p.(K755del)' -p113 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001271769.1:c.2262_2264del' -p114 -sg28 -g4 -sg29 -(dp115 -S'grch38' -p116 -(dp117 -g33 -S'NC_000005.10:g.78101012_78101014del' -p118 -sg35 -(dp119 -g37 -g38 -sg39 -S'TTTC' -p120 -sg41 -S'78101011' -p121 -sg43 -g44 -sssS'grch37' -p122 -(dp123 -g33 -S'NC_000005.9:g.77396836_77396838del' -p124 -sg35 -(dp125 -g37 -g38 -sg39 -S'TTTC' -p126 -sg41 -S'77396835' -p127 -sg43 -g44 -sssg51 -(dp128 -g33 -S'NC_000005.10:g.78101012_78101014del' -p129 -sg35 -(dp130 -g37 -g55 -sg39 -S'TTTC' -p131 -sg41 -S'78101011' -p132 -sg43 -g44 -sssS'hg19' -p133 -(dp134 -g33 -S'NC_000005.9:g.77396836_77396838del' -p135 -sg35 -(dp136 -g37 -g55 -sg39 -S'TTTC' -p137 -sg41 -S'77396835' -p138 -sg43 -g44 -ssssg64 -(dp139 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001258698.1' -p140 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001271769.1' -p141 -sssS'metadata' -p142 -(dp143 -S'variantvalidator_hgvs_version' -p144 -S'1.1.3' -p145 -sS'uta_schema' -p146 -S'uta_20180821' -p147 -sS'seqrepo_db' -p148 -S'2018-08-21' -p149 -sS'variantvalidator_version' -p150 -S'v0.2' -p151 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant287.txt b/VariantValidator/testing/testOutputsMasterITS/variant287.txt deleted file mode 100644 index 30ff8d6b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant287.txt +++ /dev/null @@ -1,758 +0,0 @@ -(dp0 -S'NM_000414.3:c.302+3_302+6del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 2, mRNA -p13 -sS'gene_symbol' -p14 -S'HSD17B4' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_000405.1:p.?' -p19 -sS'slr' -p20 -S'NP_000405.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'5-118811422-GGTGA-G' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000005.9(NM_000414.3):c.302+3_302+6del' -p25 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_000414.3:c.302+3_302+6del' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'grch38' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000005.10:g.119475730_119475733del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'5' -p39 -sS'ref' -p40 -S'GGTGA' -p41 -sS'pos' -p42 -S'119475727' -p43 -sS'alt' -p44 -S'G' -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000005.9:g.118811425_118811428del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'GGTGA' -p50 -sg42 -S'118811422' -p51 -sg44 -g45 -sssS'hg38' -p52 -(dp53 -g34 -S'NC_000005.10:g.119475730_119475733del' -p54 -sg36 -(dp55 -g38 -S'chr5' -p56 -sg40 -S'GGTGA' -p57 -sg42 -S'119475727' -p58 -sg44 -g45 -sssS'hg19' -p59 -(dp60 -g34 -S'NC_000005.9:g.118811425_118811428del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'GGTGA' -p63 -sg42 -S'118811422' -p64 -sg44 -g45 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3' -p70 -sssS'NM_001292028.1:c.-110+3_-110+6del' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p74 -aS'RefSeqGene record not available' -p75 -asg9 -g4 -sg10 -(lp76 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 5, mRNA -p77 -sg14 -S'HSD17B4' -p78 -sg16 -(dp79 -g18 -S'NP_001278957.1:p.?' -p80 -sg20 -S'NP_001278957.1:p.?' -p81 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001292028.1):c.-110+3_-110+6del' -p82 -sg26 -g4 -sg27 -S'NM_001292028.1:c.-110+3_-110+6del' -p83 -sg29 -g4 -sg30 -(dp84 -S'grch38' -p85 -(dp86 -g34 -S'NC_000005.10:g.119475730_119475733del' -p87 -sg36 -(dp88 -g38 -g39 -sg40 -S'GGTGA' -p89 -sg42 -S'119475727' -p90 -sg44 -g45 -sssS'grch37' -p91 -(dp92 -g34 -S'NC_000005.9:g.118811425_118811428del' -p93 -sg36 -(dp94 -g38 -g39 -sg40 -S'GGTGA' -p95 -sg42 -S'118811422' -p96 -sg44 -g45 -sssg52 -(dp97 -g34 -S'NC_000005.10:g.119475730_119475733del' -p98 -sg36 -(dp99 -g38 -g56 -sg40 -S'GGTGA' -p100 -sg42 -S'119475727' -p101 -sg44 -g45 -sssS'hg19' -p102 -(dp103 -g34 -S'NC_000005.9:g.118811425_118811428del' -p104 -sg36 -(dp105 -g38 -g56 -sg40 -S'GGTGA' -p106 -sg42 -S'118811422' -p107 -sg44 -g45 -ssssg65 -(dp108 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1' -p109 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1' -p110 -sssS'NM_001199291.2:c.377+3_377+6del' -p111 -(dp112 -g3 -g4 -sg5 -(lp113 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p114 -aS'RefSeqGene record not available' -p115 -asg9 -g4 -sg10 -(lp116 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA -p117 -sg14 -S'HSD17B4' -p118 -sg16 -(dp119 -g18 -S'NP_001186220.1:p.?' -p120 -sg20 -S'NP_001186220.1:p.?' -p121 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199291.2):c.377+3_377+6del' -p122 -sg26 -g4 -sg27 -S'NM_001199291.2:c.377+3_377+6del' -p123 -sg29 -g4 -sg30 -(dp124 -S'grch38' -p125 -(dp126 -g34 -S'NC_000005.10:g.119475730_119475733del' -p127 -sg36 -(dp128 -g38 -g39 -sg40 -S'GGTGA' -p129 -sg42 -S'119475727' -p130 -sg44 -g45 -sssS'grch37' -p131 -(dp132 -g34 -S'NC_000005.9:g.118811425_118811428del' -p133 -sg36 -(dp134 -g38 -g39 -sg40 -S'GGTGA' -p135 -sg42 -S'118811422' -p136 -sg44 -g45 -sssg52 -(dp137 -g34 -S'NC_000005.10:g.119475730_119475733del' -p138 -sg36 -(dp139 -g38 -g56 -sg40 -S'GGTGA' -p140 -sg42 -S'119475727' -p141 -sg44 -g45 -sssS'hg19' -p142 -(dp143 -g34 -S'NC_000005.9:g.118811425_118811428del' -p144 -sg36 -(dp145 -g38 -g56 -sg40 -S'GGTGA' -p146 -sg42 -S'118811422' -p147 -sg44 -g45 -ssssg65 -(dp148 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1' -p149 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2' -p150 -sssS'flag' -p151 -S'gene_variant' -p152 -sS'NM_001292027.1:c.230+3_230+6del' -p153 -(dp154 -g3 -g4 -sg5 -(lp155 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p156 -aS'RefSeqGene record not available' -p157 -asg9 -g4 -sg10 -(lp158 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 4, mRNA -p159 -sg14 -S'HSD17B4' -p160 -sg16 -(dp161 -g18 -S'NP_001278956.1:p.?' -p162 -sg20 -S'NP_001278956.1:p.?' -p163 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001292027.1):c.230+3_230+6del' -p164 -sg26 -g4 -sg27 -S'NM_001292027.1:c.230+3_230+6del' -p165 -sg29 -g4 -sg30 -(dp166 -S'grch38' -p167 -(dp168 -g34 -S'NC_000005.10:g.119475730_119475733del' -p169 -sg36 -(dp170 -g38 -g39 -sg40 -S'GGTGA' -p171 -sg42 -S'119475727' -p172 -sg44 -g45 -sssS'grch37' -p173 -(dp174 -g34 -S'NC_000005.9:g.118811425_118811428del' -p175 -sg36 -(dp176 -g38 -g39 -sg40 -S'GGTGA' -p177 -sg42 -S'118811422' -p178 -sg44 -g45 -sssg52 -(dp179 -g34 -S'NC_000005.10:g.119475730_119475733del' -p180 -sg36 -(dp181 -g38 -g56 -sg40 -S'GGTGA' -p182 -sg42 -S'119475727' -p183 -sg44 -g45 -sssS'hg19' -p184 -(dp185 -g34 -S'NC_000005.9:g.118811425_118811428del' -p186 -sg36 -(dp187 -g38 -g56 -sg40 -S'GGTGA' -p188 -sg42 -S'118811422' -p189 -sg44 -g45 -ssssg65 -(dp190 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278956.1' -p191 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292027.1' -p192 -sssS'NM_001199291.1:c.377+3_377+6del' -p193 -(dp194 -g3 -g4 -sg5 -(lp195 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p196 -aS'A more recent version of the selected reference sequence NM_001199291.1 is available (NM_001199291.2)' -p197 -aS'NM_001199291.2:c.377+3_377+6del MUST be fully validated prior to use in reports' -p198 -aS'select_variants=NM_001199291.2:c.377+3_377+6del' -p199 -aS'RefSeqGene record not available' -p200 -asg9 -g4 -sg10 -(lp201 -sg12 -VHomo sapiens hydroxysteroid (17-beta) dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA -p202 -sg14 -S'HSD17B4' -p203 -sg16 -(dp204 -g18 -S'NP_001186220.1:p.?' -p205 -sg20 -S'NP_001186220.1:p.?' -p206 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199291.1):c.377+3_377+6del' -p207 -sg26 -g4 -sg27 -S'NM_001199291.1:c.377+3_377+6del' -p208 -sg29 -g4 -sg30 -(dp209 -S'hg19' -p210 -(dp211 -g34 -S'NC_000005.9:g.118811425_118811428del' -p212 -sg36 -(dp213 -g38 -g56 -sg40 -S'GGTGA' -p214 -sg42 -S'118811422' -p215 -sg44 -g45 -sssS'grch37' -p216 -(dp217 -g34 -S'NC_000005.9:g.118811425_118811428del' -p218 -sg36 -(dp219 -g38 -g39 -sg40 -S'GGTGA' -p220 -sg42 -S'118811422' -p221 -sg44 -g45 -ssssg65 -(dp222 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1' -p223 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1' -p224 -sssS'metadata' -p225 -(dp226 -S'variantvalidator_hgvs_version' -p227 -S'1.1.3' -p228 -sS'uta_schema' -p229 -S'uta_20180821' -p230 -sS'seqrepo_db' -p231 -S'2018-08-21' -p232 -sS'variantvalidator_version' -p233 -S'v0.2' -p234 -ssS'NM_001199292.1:c.248+3_248+6del' -p235 -(dp236 -g3 -g4 -sg5 -(lp237 -S'NC_000005.9:g.118811422GGTGA>G automapped to NC_000005.9:g.118811425_118811428del' -p238 -aS'RefSeqGene record not available' -p239 -asg9 -g4 -sg10 -(lp240 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 3, mRNA -p241 -sg14 -S'HSD17B4' -p242 -sg16 -(dp243 -g18 -S'NP_001186221.1:p.?' -p244 -sg20 -S'NP_001186221.1:p.?' -p245 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199292.1):c.248+3_248+6del' -p246 -sg26 -g4 -sg27 -S'NM_001199292.1:c.248+3_248+6del' -p247 -sg29 -g4 -sg30 -(dp248 -S'grch38' -p249 -(dp250 -g34 -S'NC_000005.10:g.119475730_119475733del' -p251 -sg36 -(dp252 -g38 -g39 -sg40 -S'GGTGA' -p253 -sg42 -S'119475727' -p254 -sg44 -g45 -sssS'grch37' -p255 -(dp256 -g34 -S'NC_000005.9:g.118811425_118811428del' -p257 -sg36 -(dp258 -g38 -g39 -sg40 -S'GGTGA' -p259 -sg42 -S'118811422' -p260 -sg44 -g45 -sssg52 -(dp261 -g34 -S'NC_000005.10:g.119475730_119475733del' -p262 -sg36 -(dp263 -g38 -g56 -sg40 -S'GGTGA' -p264 -sg42 -S'119475727' -p265 -sg44 -g45 -sssS'hg19' -p266 -(dp267 -g34 -S'NC_000005.9:g.118811425_118811428del' -p268 -sg36 -(dp269 -g38 -g56 -sg40 -S'GGTGA' -p270 -sg42 -S'118811422' -p271 -sg44 -g45 -ssssg65 -(dp272 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186221.1' -p273 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199292.1' -p274 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant288.txt b/VariantValidator/testing/testOutputsMasterITS/variant288.txt deleted file mode 100644 index b855ac72..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant288.txt +++ /dev/null @@ -1,758 +0,0 @@ -(dp0 -S'NM_001292028.1:c.-110+1_-110+5del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 5, mRNA -p13 -sS'gene_symbol' -p14 -S'HSD17B4' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001278957.1:p.?' -p19 -sS'slr' -p20 -S'NP_001278957.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'5-118811422-GGTGAG-G' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000005.9(NM_001292028.1):c.-110+1_-110+5del' -p25 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_001292028.1:c.-110+1_-110+5del' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'grch38' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000005.10:g.119475728_119475732del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'5' -p39 -sS'ref' -p40 -S'GGGTGA' -p41 -sS'pos' -p42 -S'119475726' -p43 -sS'alt' -p44 -S'G' -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000005.9:g.118811423_118811427del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'GGGTGA' -p50 -sg42 -S'118811421' -p51 -sg44 -g45 -sssS'hg38' -p52 -(dp53 -g34 -S'NC_000005.10:g.119475728_119475732del' -p54 -sg36 -(dp55 -g38 -S'chr5' -p56 -sg40 -S'GGGTGA' -p57 -sg42 -S'119475726' -p58 -sg44 -g45 -sssS'hg19' -p59 -(dp60 -g34 -S'NC_000005.9:g.118811423_118811427del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'GGGTGA' -p63 -sg42 -S'118811421' -p64 -sg44 -g45 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1' -p70 -sssS'NM_000414.3:c.302+1_302+5del' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p74 -aS'RefSeqGene record not available' -p75 -asg9 -g4 -sg10 -(lp76 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 2, mRNA -p77 -sg14 -S'HSD17B4' -p78 -sg16 -(dp79 -g18 -S'NP_000405.1:p.?' -p80 -sg20 -S'NP_000405.1:p.?' -p81 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_000414.3):c.302+1_302+5del' -p82 -sg26 -g4 -sg27 -S'NM_000414.3:c.302+1_302+5del' -p83 -sg29 -g4 -sg30 -(dp84 -S'grch38' -p85 -(dp86 -g34 -S'NC_000005.10:g.119475728_119475732del' -p87 -sg36 -(dp88 -g38 -g39 -sg40 -S'GGGTGA' -p89 -sg42 -S'119475726' -p90 -sg44 -g45 -sssS'grch37' -p91 -(dp92 -g34 -S'NC_000005.9:g.118811423_118811427del' -p93 -sg36 -(dp94 -g38 -g39 -sg40 -S'GGGTGA' -p95 -sg42 -S'118811421' -p96 -sg44 -g45 -sssg52 -(dp97 -g34 -S'NC_000005.10:g.119475728_119475732del' -p98 -sg36 -(dp99 -g38 -g56 -sg40 -S'GGGTGA' -p100 -sg42 -S'119475726' -p101 -sg44 -g45 -sssS'hg19' -p102 -(dp103 -g34 -S'NC_000005.9:g.118811423_118811427del' -p104 -sg36 -(dp105 -g38 -g56 -sg40 -S'GGGTGA' -p106 -sg42 -S'118811421' -p107 -sg44 -g45 -ssssg65 -(dp108 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1' -p109 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3' -p110 -sssS'NM_001199291.2:c.377+1_377+5del' -p111 -(dp112 -g3 -g4 -sg5 -(lp113 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p114 -aS'RefSeqGene record not available' -p115 -asg9 -g4 -sg10 -(lp116 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA -p117 -sg14 -S'HSD17B4' -p118 -sg16 -(dp119 -g18 -S'NP_001186220.1:p.?' -p120 -sg20 -S'NP_001186220.1:p.?' -p121 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199291.2):c.377+1_377+5del' -p122 -sg26 -g4 -sg27 -S'NM_001199291.2:c.377+1_377+5del' -p123 -sg29 -g4 -sg30 -(dp124 -S'grch38' -p125 -(dp126 -g34 -S'NC_000005.10:g.119475728_119475732del' -p127 -sg36 -(dp128 -g38 -g39 -sg40 -S'GGGTGA' -p129 -sg42 -S'119475726' -p130 -sg44 -g45 -sssS'grch37' -p131 -(dp132 -g34 -S'NC_000005.9:g.118811423_118811427del' -p133 -sg36 -(dp134 -g38 -g39 -sg40 -S'GGGTGA' -p135 -sg42 -S'118811421' -p136 -sg44 -g45 -sssg52 -(dp137 -g34 -S'NC_000005.10:g.119475728_119475732del' -p138 -sg36 -(dp139 -g38 -g56 -sg40 -S'GGGTGA' -p140 -sg42 -S'119475726' -p141 -sg44 -g45 -sssS'hg19' -p142 -(dp143 -g34 -S'NC_000005.9:g.118811423_118811427del' -p144 -sg36 -(dp145 -g38 -g56 -sg40 -S'GGGTGA' -p146 -sg42 -S'118811421' -p147 -sg44 -g45 -ssssg65 -(dp148 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1' -p149 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2' -p150 -sssS'NM_001199292.1:c.248+1_248+5del' -p151 -(dp152 -g3 -g4 -sg5 -(lp153 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p154 -aS'RefSeqGene record not available' -p155 -asg9 -g4 -sg10 -(lp156 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 3, mRNA -p157 -sg14 -S'HSD17B4' -p158 -sg16 -(dp159 -g18 -S'NP_001186221.1:p.?' -p160 -sg20 -S'NP_001186221.1:p.?' -p161 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199292.1):c.248+1_248+5del' -p162 -sg26 -g4 -sg27 -S'NM_001199292.1:c.248+1_248+5del' -p163 -sg29 -g4 -sg30 -(dp164 -S'grch38' -p165 -(dp166 -g34 -S'NC_000005.10:g.119475728_119475732del' -p167 -sg36 -(dp168 -g38 -g39 -sg40 -S'GGGTGA' -p169 -sg42 -S'119475726' -p170 -sg44 -g45 -sssS'grch37' -p171 -(dp172 -g34 -S'NC_000005.9:g.118811423_118811427del' -p173 -sg36 -(dp174 -g38 -g39 -sg40 -S'GGGTGA' -p175 -sg42 -S'118811421' -p176 -sg44 -g45 -sssg52 -(dp177 -g34 -S'NC_000005.10:g.119475728_119475732del' -p178 -sg36 -(dp179 -g38 -g56 -sg40 -S'GGGTGA' -p180 -sg42 -S'119475726' -p181 -sg44 -g45 -sssS'hg19' -p182 -(dp183 -g34 -S'NC_000005.9:g.118811423_118811427del' -p184 -sg36 -(dp185 -g38 -g56 -sg40 -S'GGGTGA' -p186 -sg42 -S'118811421' -p187 -sg44 -g45 -ssssg65 -(dp188 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186221.1' -p189 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199292.1' -p190 -sssS'flag' -p191 -S'gene_variant' -p192 -sS'NM_001199291.1:c.377+1_377+5del' -p193 -(dp194 -g3 -g4 -sg5 -(lp195 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p196 -aS'A more recent version of the selected reference sequence NM_001199291.1 is available (NM_001199291.2)' -p197 -aS'NM_001199291.2:c.377+1_377+5del MUST be fully validated prior to use in reports' -p198 -aS'select_variants=NM_001199291.2:c.377+1_377+5del' -p199 -aS'RefSeqGene record not available' -p200 -asg9 -g4 -sg10 -(lp201 -sg12 -VHomo sapiens hydroxysteroid (17-beta) dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA -p202 -sg14 -S'HSD17B4' -p203 -sg16 -(dp204 -g18 -S'NP_001186220.1:p.?' -p205 -sg20 -S'NP_001186220.1:p.?' -p206 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001199291.1):c.377+1_377+5del' -p207 -sg26 -g4 -sg27 -S'NM_001199291.1:c.377+1_377+5del' -p208 -sg29 -g4 -sg30 -(dp209 -S'hg19' -p210 -(dp211 -g34 -S'NC_000005.9:g.118811423_118811427del' -p212 -sg36 -(dp213 -g38 -g56 -sg40 -S'GGGTGA' -p214 -sg42 -S'118811421' -p215 -sg44 -g45 -sssS'grch37' -p216 -(dp217 -g34 -S'NC_000005.9:g.118811423_118811427del' -p218 -sg36 -(dp219 -g38 -g39 -sg40 -S'GGGTGA' -p220 -sg42 -S'118811421' -p221 -sg44 -g45 -ssssg65 -(dp222 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1' -p223 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1' -p224 -sssS'NM_001292027.1:c.230+1_230+5del' -p225 -(dp226 -g3 -g4 -sg5 -(lp227 -S'NC_000005.9:g.118811422GGTGAG>G automapped to NC_000005.9:g.118811423_118811427del' -p228 -aS'RefSeqGene record not available' -p229 -asg9 -g4 -sg10 -(lp230 -sg12 -VHomo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 4, mRNA -p231 -sg14 -S'HSD17B4' -p232 -sg16 -(dp233 -g18 -S'NP_001278956.1:p.?' -p234 -sg20 -S'NP_001278956.1:p.?' -p235 -ssg22 -g23 -sg24 -S'NC_000005.9(NM_001292027.1):c.230+1_230+5del' -p236 -sg26 -g4 -sg27 -S'NM_001292027.1:c.230+1_230+5del' -p237 -sg29 -g4 -sg30 -(dp238 -S'grch38' -p239 -(dp240 -g34 -S'NC_000005.10:g.119475728_119475732del' -p241 -sg36 -(dp242 -g38 -g39 -sg40 -S'GGGTGA' -p243 -sg42 -S'119475726' -p244 -sg44 -g45 -sssS'grch37' -p245 -(dp246 -g34 -S'NC_000005.9:g.118811423_118811427del' -p247 -sg36 -(dp248 -g38 -g39 -sg40 -S'GGGTGA' -p249 -sg42 -S'118811421' -p250 -sg44 -g45 -sssg52 -(dp251 -g34 -S'NC_000005.10:g.119475728_119475732del' -p252 -sg36 -(dp253 -g38 -g56 -sg40 -S'GGGTGA' -p254 -sg42 -S'119475726' -p255 -sg44 -g45 -sssS'hg19' -p256 -(dp257 -g34 -S'NC_000005.9:g.118811423_118811427del' -p258 -sg36 -(dp259 -g38 -g56 -sg40 -S'GGGTGA' -p260 -sg42 -S'118811421' -p261 -sg44 -g45 -ssssg65 -(dp262 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278956.1' -p263 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292027.1' -p264 -sssS'metadata' -p265 -(dp266 -S'variantvalidator_hgvs_version' -p267 -S'1.1.3' -p268 -sS'uta_schema' -p269 -S'uta_20180821' -p270 -sS'seqrepo_db' -p271 -S'2018-08-21' -p272 -sS'variantvalidator_version' -p273 -S'v0.2' -p274 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant289.txt b/VariantValidator/testing/testOutputsMasterITS/variant289.txt deleted file mode 100644 index 0415f3d6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant289.txt +++ /dev/null @@ -1,414 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NR_110997.1:n.21del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000005.9:g.131705587CG>C automapped to NC_000005.9:g.131705590delG' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens MIR3936 host gene (MIR3936HG), long non-coding RNA -p15 -sS'gene_symbol' -p16 -S'MIR3936HG' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'Non-coding :n.' -p21 -sS'slr' -p22 -g21 -ssS'submitted_variant' -p23 -S'5-131705587-CG-C' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NR_110997.1:n.21del' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'grch38' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000005.10:g.132369898del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'5' -p39 -sS'ref' -p40 -S'CG' -p41 -sS'pos' -p42 -S'132369895' -p43 -sS'alt' -p44 -S'C' -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000005.9:g.131705590del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'CG' -p50 -sg42 -S'131705587' -p51 -sg44 -g45 -sssS'hg38' -p52 -(dp53 -g34 -S'NC_000005.10:g.132369898del' -p54 -sg36 -(dp55 -g38 -S'chr5' -p56 -sg40 -S'CG' -p57 -sg42 -S'132369895' -p58 -sg44 -g45 -sssS'hg19' -p59 -(dp60 -g34 -S'NC_000005.9:g.131705590del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'CG' -p63 -sg42 -S'131705587' -p64 -sg44 -g45 -ssssS'reference_sequence_records' -p65 -(dp66 -S'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_110997.1' -p68 -sssS'NM_003060.3:c.-75del' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'NC_000005.9:g.131705587CG>C automapped to NC_000005.9:g.131705590delG' -p72 -aS'RefSeqGene record not available' -p73 -asg11 -g6 -sg12 -(lp74 -sg14 -VHomo sapiens solute carrier family 22 member 5 (SLC22A5), transcript variant 2, mRNA -p75 -sg16 -S'SLC22A5' -p76 -sg18 -(dp77 -g20 -S'NP_003051.1:p.?' -p78 -sg22 -S'NP_003051.1:p.?' -p79 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_003060.3:c.-75del' -p80 -sg29 -g6 -sg30 -(dp81 -S'grch38' -p82 -(dp83 -g34 -S'NC_000005.10:g.132369898del' -p84 -sg36 -(dp85 -g38 -g39 -sg40 -S'CG' -p86 -sg42 -S'132369895' -p87 -sg44 -g45 -sssS'grch37' -p88 -(dp89 -g34 -S'NC_000005.9:g.131705590del' -p90 -sg36 -(dp91 -g38 -g39 -sg40 -S'CG' -p92 -sg42 -S'131705587' -p93 -sg44 -g45 -sssg52 -(dp94 -g34 -S'NC_000005.10:g.132369898del' -p95 -sg36 -(dp96 -g38 -g56 -sg40 -S'CG' -p97 -sg42 -S'132369895' -p98 -sg44 -g45 -sssS'hg19' -p99 -(dp100 -g34 -S'NC_000005.9:g.131705590del' -p101 -sg36 -(dp102 -g38 -g56 -sg40 -S'CG' -p103 -sg42 -S'131705587' -p104 -sg44 -g45 -ssssg65 -(dp105 -S'protein' -p106 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003051.1' -p107 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003060.3' -p108 -sssS'NM_001308122.1:c.-75del' -p109 -(dp110 -g5 -g6 -sg7 -(lp111 -S'NC_000005.9:g.131705587CG>C automapped to NC_000005.9:g.131705590delG' -p112 -aS'RefSeqGene record not available' -p113 -asg11 -g6 -sg12 -(lp114 -sg14 -VHomo sapiens solute carrier family 22 member 5 (SLC22A5), transcript variant 1, mRNA -p115 -sg16 -S'SLC22A5' -p116 -sg18 -(dp117 -g20 -S'NP_001295051.1:p.?' -p118 -sg22 -S'NP_001295051.1:p.?' -p119 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_001308122.1:c.-75del' -p120 -sg29 -g6 -sg30 -(dp121 -S'grch38' -p122 -(dp123 -g34 -S'NC_000005.10:g.132369898del' -p124 -sg36 -(dp125 -g38 -g39 -sg40 -S'CG' -p126 -sg42 -S'132369895' -p127 -sg44 -g45 -sssS'grch37' -p128 -(dp129 -g34 -S'NC_000005.9:g.131705590del' -p130 -sg36 -(dp131 -g38 -g39 -sg40 -S'CG' -p132 -sg42 -S'131705587' -p133 -sg44 -g45 -sssg52 -(dp134 -g34 -S'NC_000005.10:g.132369898del' -p135 -sg36 -(dp136 -g38 -g56 -sg40 -S'CG' -p137 -sg42 -S'132369895' -p138 -sg44 -g45 -sssS'hg19' -p139 -(dp140 -g34 -S'NC_000005.9:g.131705590del' -p141 -sg36 -(dp142 -g38 -g56 -sg40 -S'CG' -p143 -sg42 -S'131705587' -p144 -sg44 -g45 -ssssg65 -(dp145 -g106 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295051.1' -p146 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308122.1' -p147 -sssS'metadata' -p148 -(dp149 -S'variantvalidator_hgvs_version' -p150 -S'1.1.3' -p151 -sS'uta_schema' -p152 -S'uta_20180821' -p153 -sS'seqrepo_db' -p154 -S'2018-08-21' -p155 -sS'variantvalidator_version' -p156 -S'v0.2' -p157 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant29.txt b/VariantValidator/testing/testOutputsMasterITS/variant29.txt deleted file mode 100644 index a385ce0a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant29.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The given coordinate is outside the bounds of the reference sequence.' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NR_138595.1:n.-810_1071+1=' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant290.txt b/VariantValidator/testing/testOutputsMasterITS/variant290.txt deleted file mode 100644 index e7d08e3b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant290.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_024577.3:c.2813A>G' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens SH3 domain and tetratricopeptide repeats 2 (SH3TC2), mRNA -p14 -sS'gene_symbol' -p15 -S'SH3TC2' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_078853.2:p.(His938Arg)' -p20 -sS'slr' -p21 -S'NP_078853.2:p.(H938R)' -p22 -ssS'submitted_variant' -p23 -S'5-148406482-T-C' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_024577.3:c.2813A>G' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'grch38' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000005.10:g.149026919T>C' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'5' -p39 -sS'ref' -p40 -VT -p41 -sS'pos' -p42 -S'149026919' -p43 -sS'alt' -p44 -VC -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000005.9:g.148406482T>C' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'148406482' -p50 -sg44 -g45 -sssS'hg38' -p51 -(dp52 -g34 -S'NC_000005.10:g.149026919T>C' -p53 -sg36 -(dp54 -g38 -S'chr5' -p55 -sg40 -g41 -sg42 -S'149026919' -p56 -sg44 -g45 -sssS'hg19' -p57 -(dp58 -g34 -S'NC_000005.9:g.148406482T>C' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'148406482' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_078853.2' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_024577.3' -p67 -sssS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant291.txt b/VariantValidator/testing/testOutputsMasterITS/variant291.txt deleted file mode 100644 index 6fb68886..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant291.txt +++ /dev/null @@ -1,176 +0,0 @@ -(dp0 -S'NM_014845.5:c.123_124insCAG' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000006.11:g.110036337T>TCAG automapped to NC_000006.11:g.110036337_110036338insCAG' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens FIG4 phosphoinositide 5-phosphatase (FIG4), mRNA -p13 -sS'gene_symbol' -p14 -S'FIG4' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_055660.1:p.(Ile41_Asp42insGln)' -p19 -sS'slr' -p20 -S'NP_055660.1:p.(I41_D42insQ)' -p21 -ssS'submitted_variant' -p22 -S'6-110036337-T-TCAG' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_014845.5:c.123_124insCAG' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000006.11:g.110036337_110036338insCAG' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr6' -p38 -sS'ref' -p39 -S'T' -p40 -sS'pos' -p41 -S'110036337' -p42 -sS'alt' -p43 -S'TCAG' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000006.12:g.109715134_109715135insCAG' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'109715134' -p49 -sg43 -S'TCAG' -p50 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000006.11:g.110036337_110036338insCAG' -p53 -sg35 -(dp54 -g37 -S'6' -p55 -sg39 -g40 -sg41 -S'110036337' -p56 -sg43 -S'TCAG' -p57 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000006.12:g.109715134_109715135insCAG' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'109715134' -p62 -sg43 -S'TCAG' -p63 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055660.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014845.5' -p69 -sssS'flag' -p70 -S'gene_variant' -p71 -sS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant292.txt b/VariantValidator/testing/testOutputsMasterITS/variant292.txt deleted file mode 100644 index bf03b80b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant292.txt +++ /dev/null @@ -1,176 +0,0 @@ -(dp0 -S'NM_014845.5:c.124_126del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000006.11:g.110036337TGAT>T automapped to NC_000006.11:g.110036338_110036340delGAT' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens FIG4 phosphoinositide 5-phosphatase (FIG4), mRNA -p13 -sS'gene_symbol' -p14 -S'FIG4' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_055660.1:p.(Asp42del)' -p19 -sS'slr' -p20 -S'NP_055660.1:p.(D42del)' -p21 -ssS'submitted_variant' -p22 -S'6-110036337-TGAT-T' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_014845.5:c.124_126del' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000006.11:g.110036338_110036340del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr6' -p38 -sS'ref' -p39 -S'TTGA' -p40 -sS'pos' -p41 -S'110036336' -p42 -sS'alt' -p43 -S'T' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000006.12:g.109715135_109715137del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'TTGA' -p49 -sg41 -S'109715133' -p50 -sg43 -g44 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000006.11:g.110036338_110036340del' -p53 -sg35 -(dp54 -g37 -S'6' -p55 -sg39 -S'TTGA' -p56 -sg41 -S'110036336' -p57 -sg43 -g44 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000006.12:g.109715135_109715137del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'TTGA' -p62 -sg41 -S'109715133' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055660.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014845.5' -p69 -sssS'flag' -p70 -S'gene_variant' -p71 -sS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant293.txt b/VariantValidator/testing/testOutputsMasterITS/variant293.txt deleted file mode 100644 index 0540b605..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant293.txt +++ /dev/null @@ -1,286 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_182961.3:c.14018G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'SYNE1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_892006.3:p.(Arg4673Leu)' -p20 -sS'slr' -p21 -S'NP_892006.3:p.(R4673L)' -p22 -ssS'submitted_variant' -p23 -S'6-152651802-C-A' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_182961.3:c.14018G>T' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000006.11:g.152651802C>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr6' -p39 -sS'ref' -p40 -VC -p41 -sS'pos' -p42 -S'152651802' -p43 -sS'alt' -p44 -VA -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000006.12:g.152330667C>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'152330667' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000006.11:g.152651802C>A' -p53 -sg36 -(dp54 -g38 -S'6' -p55 -sg40 -g41 -sg42 -S'152651802' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000006.12:g.152330667C>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'152330667' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3' -p67 -sssS'NM_033071.3:c.13805G>T' -p68 -(dp69 -g5 -g6 -sg7 -(lp70 -S'RefSeqGene record not available' -p71 -asg10 -g6 -sg11 -(lp72 -sg13 -VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 2, mRNA -p73 -sg15 -S'SYNE1' -p74 -sg17 -(dp75 -g19 -S'NP_149062.1:p.(Arg4602Leu)' -p76 -sg21 -S'NP_149062.1:p.(R4602L)' -p77 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_033071.3:c.13805G>T' -p78 -sg29 -g6 -sg30 -(dp79 -S'hg19' -p80 -(dp81 -g34 -S'NC_000006.11:g.152651802C>A' -p82 -sg36 -(dp83 -g38 -g39 -sg40 -g41 -sg42 -S'152651802' -p84 -sg44 -g45 -sssg46 -(dp85 -g34 -S'NC_000006.12:g.152330667C>A' -p86 -sg36 -(dp87 -g38 -g39 -sg40 -g41 -sg42 -S'152330667' -p88 -sg44 -g45 -sssS'grch37' -p89 -(dp90 -g34 -S'NC_000006.11:g.152651802C>A' -p91 -sg36 -(dp92 -g38 -g55 -sg40 -g41 -sg42 -S'152651802' -p93 -sg44 -g45 -sssS'grch38' -p94 -(dp95 -g34 -S'NC_000006.12:g.152330667C>A' -p96 -sg36 -(dp97 -g38 -g55 -sg40 -g41 -sg42 -S'152330667' -p98 -sg44 -g45 -ssssg62 -(dp99 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1' -p100 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3' -p101 -sssS'metadata' -p102 -(dp103 -S'variantvalidator_hgvs_version' -p104 -S'1.1.3' -p105 -sS'uta_schema' -p106 -S'uta_20180821' -p107 -sS'seqrepo_db' -p108 -S'2018-08-21' -p109 -sS'variantvalidator_version' -p110 -S'v0.2' -p111 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant294.txt b/VariantValidator/testing/testOutputsMasterITS/variant294.txt deleted file mode 100644 index 662b8068..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant294.txt +++ /dev/null @@ -1,286 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_033071.3:c.5950G>C' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 2, mRNA -p24 -sS'gene_symbol' -p25 -S'SYNE1' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_149062.1:p.(Ala1984Pro)' -p30 -sS'slr' -p31 -S'NP_149062.1:p.(A1984P)' -p32 -ssS'submitted_variant' -p33 -S'6-152737643-C-G' -p34 -sS'genome_context_intronic_sequence' -p35 -g16 -sS'hgvs_lrg_variant' -p36 -g16 -sS'hgvs_transcript_variant' -p37 -S'NM_033071.3:c.5950G>C' -p38 -sS'hgvs_refseqgene_variant' -p39 -g16 -sS'primary_assembly_loci' -p40 -(dp41 -S'hg19' -p42 -(dp43 -S'hgvs_genomic_description' -p44 -S'NC_000006.11:g.152737643C>G' -p45 -sS'vcf' -p46 -(dp47 -S'chr' -p48 -S'chr6' -p49 -sS'ref' -p50 -VC -p51 -sS'pos' -p52 -S'152737643' -p53 -sS'alt' -p54 -VG -p55 -sssS'hg38' -p56 -(dp57 -g44 -S'NC_000006.12:g.152416508C>G' -p58 -sg46 -(dp59 -g48 -g49 -sg50 -g51 -sg52 -S'152416508' -p60 -sg54 -g55 -sssS'grch37' -p61 -(dp62 -g44 -S'NC_000006.11:g.152737643C>G' -p63 -sg46 -(dp64 -g48 -S'6' -p65 -sg50 -g51 -sg52 -S'152737643' -p66 -sg54 -g55 -sssS'grch38' -p67 -(dp68 -g44 -S'NC_000006.12:g.152416508C>G' -p69 -sg46 -(dp70 -g48 -g65 -sg50 -g51 -sg52 -S'152416508' -p71 -sg54 -g55 -ssssS'reference_sequence_records' -p72 -(dp73 -S'protein' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1' -p75 -sS'transcript' -p76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3' -p77 -sssS'NM_182961.3:c.5929G>C' -p78 -(dp79 -g15 -g16 -sg17 -(lp80 -S'RefSeqGene record not available' -p81 -asg20 -g16 -sg21 -(lp82 -sg23 -VHomo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 1, mRNA -p83 -sg25 -S'SYNE1' -p84 -sg27 -(dp85 -g29 -S'NP_892006.3:p.(Ala1977Pro)' -p86 -sg31 -S'NP_892006.3:p.(A1977P)' -p87 -ssg33 -g34 -sg35 -g16 -sg36 -g16 -sg37 -S'NM_182961.3:c.5929G>C' -p88 -sg39 -g16 -sg40 -(dp89 -S'hg19' -p90 -(dp91 -g44 -S'NC_000006.11:g.152737643C>G' -p92 -sg46 -(dp93 -g48 -g49 -sg50 -g51 -sg52 -S'152737643' -p94 -sg54 -g55 -sssg56 -(dp95 -g44 -S'NC_000006.12:g.152416508C>G' -p96 -sg46 -(dp97 -g48 -g49 -sg50 -g51 -sg52 -S'152416508' -p98 -sg54 -g55 -sssS'grch37' -p99 -(dp100 -g44 -S'NC_000006.11:g.152737643C>G' -p101 -sg46 -(dp102 -g48 -g65 -sg50 -g51 -sg52 -S'152737643' -p103 -sg54 -g55 -sssS'grch38' -p104 -(dp105 -g44 -S'NC_000006.12:g.152416508C>G' -p106 -sg46 -(dp107 -g48 -g65 -sg50 -g51 -sg52 -S'152416508' -p108 -sg54 -g55 -ssssg72 -(dp109 -g74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3' -p110 -sg76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3' -p111 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant295.txt b/VariantValidator/testing/testOutputsMasterITS/variant295.txt deleted file mode 100644 index 11551fc0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant295.txt +++ /dev/null @@ -1,1944 +0,0 @@ -(dp0 -S'NM_001322005.1:c.1216A>G' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 4, mRNA -p12 -sS'gene_symbol' -p13 -S'PMS2' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001308934.1:p.(Lys406Glu)' -p18 -sS'slr' -p19 -S'NP_001308934.1:p.(K406E)' -p20 -ssS'submitted_variant' -p21 -S'7-6026775-T-C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001322005.1:c.1216A>G' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000007.13:g.6026775T>C' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr7' -p37 -sS'ref' -p38 -VT -p39 -sS'pos' -p40 -S'6026775' -p41 -sS'alt' -p42 -VC -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000007.14:g.5987144T>C' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000007.13:g.6026775T>C' -p51 -sg34 -(dp52 -g36 -S'7' -p53 -sg38 -g39 -sg40 -S'6026775' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000007.14:g.5987144T>C' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308934.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322005.1' -p65 -sssS'NM_001322012.1:c.688A>G' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'RefSeqGene record not available' -p69 -asg8 -g4 -sg9 -(lp70 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 11, mRNA -p71 -sg13 -S'PMS2' -p72 -sg15 -(dp73 -g17 -S'NP_001308941.1:p.(Lys230Glu)' -p74 -sg19 -S'NP_001308941.1:p.(K230E)' -p75 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322012.1:c.688A>G' -p76 -sg27 -g4 -sg28 -(dp77 -S'hg19' -p78 -(dp79 -g32 -S'NC_000007.13:g.6026775T>C' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p82 -sg42 -g43 -sssg44 -(dp83 -g32 -S'NC_000007.14:g.5987144T>C' -p84 -sg34 -(dp85 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p86 -sg42 -g43 -sssS'grch37' -p87 -(dp88 -g32 -S'NC_000007.13:g.6026775T>C' -p89 -sg34 -(dp90 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p91 -sg42 -g43 -sssS'grch38' -p92 -(dp93 -g32 -S'NC_000007.14:g.5987144T>C' -p94 -sg34 -(dp95 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p96 -sg42 -g43 -ssssg60 -(dp97 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308941.1' -p98 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322012.1' -p99 -sssS'NM_001322010.1:c.1060A>G' -p100 -(dp101 -g3 -g4 -sg5 -(lp102 -S'RefSeqGene record not available' -p103 -asg8 -g4 -sg9 -(lp104 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 9, mRNA -p105 -sg13 -S'PMS2' -p106 -sg15 -(dp107 -g17 -S'NP_001308939.1:p.(Lys354Glu)' -p108 -sg19 -S'NP_001308939.1:p.(K354E)' -p109 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322010.1:c.1060A>G' -p110 -sg27 -g4 -sg28 -(dp111 -S'hg19' -p112 -(dp113 -g32 -S'NC_000007.13:g.6026775T>C' -p114 -sg34 -(dp115 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p116 -sg42 -g43 -sssg44 -(dp117 -g32 -S'NC_000007.14:g.5987144T>C' -p118 -sg34 -(dp119 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p120 -sg42 -g43 -sssS'grch37' -p121 -(dp122 -g32 -S'NC_000007.13:g.6026775T>C' -p123 -sg34 -(dp124 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p125 -sg42 -g43 -sssS'grch38' -p126 -(dp127 -g32 -S'NC_000007.14:g.5987144T>C' -p128 -sg34 -(dp129 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p130 -sg42 -g43 -ssssg60 -(dp131 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308939.1' -p132 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322010.1' -p133 -sssS'NM_001322015.1:c.1312A>G' -p134 -(dp135 -g3 -g4 -sg5 -(lp136 -S'RefSeqGene record not available' -p137 -asg8 -g4 -sg9 -(lp138 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 14, mRNA -p139 -sg13 -S'PMS2' -p140 -sg15 -(dp141 -g17 -S'NP_001308944.1:p.(Lys438Glu)' -p142 -sg19 -S'NP_001308944.1:p.(K438E)' -p143 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322015.1:c.1312A>G' -p144 -sg27 -g4 -sg28 -(dp145 -S'hg19' -p146 -(dp147 -g32 -S'NC_000007.13:g.6026775T>C' -p148 -sg34 -(dp149 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p150 -sg42 -g43 -sssg44 -(dp151 -g32 -S'NC_000007.14:g.5987144T>C' -p152 -sg34 -(dp153 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p154 -sg42 -g43 -sssS'grch37' -p155 -(dp156 -g32 -S'NC_000007.13:g.6026775T>C' -p157 -sg34 -(dp158 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p159 -sg42 -g43 -sssS'grch38' -p160 -(dp161 -g32 -S'NC_000007.14:g.5987144T>C' -p162 -sg34 -(dp163 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p164 -sg42 -g43 -ssssg60 -(dp165 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308944.1' -p166 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322015.1' -p167 -sssS'NM_001322003.1:c.1216A>G' -p168 -(dp169 -g3 -g4 -sg5 -(lp170 -S'RefSeqGene record not available' -p171 -asg8 -g4 -sg9 -(lp172 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 2, mRNA -p173 -sg13 -S'PMS2' -p174 -sg15 -(dp175 -g17 -S'NP_001308932.1:p.(Lys406Glu)' -p176 -sg19 -S'NP_001308932.1:p.(K406E)' -p177 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322003.1:c.1216A>G' -p178 -sg27 -g4 -sg28 -(dp179 -S'hg19' -p180 -(dp181 -g32 -S'NC_000007.13:g.6026775T>C' -p182 -sg34 -(dp183 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p184 -sg42 -g43 -sssg44 -(dp185 -g32 -S'NC_000007.14:g.5987144T>C' -p186 -sg34 -(dp187 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p188 -sg42 -g43 -sssS'grch37' -p189 -(dp190 -g32 -S'NC_000007.13:g.6026775T>C' -p191 -sg34 -(dp192 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p193 -sg42 -g43 -sssS'grch38' -p194 -(dp195 -g32 -S'NC_000007.14:g.5987144T>C' -p196 -sg34 -(dp197 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p198 -sg42 -g43 -ssssg60 -(dp199 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308932.1' -p200 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322003.1' -p201 -sssS'NM_001322014.1:c.1621A>G' -p202 -(dp203 -g3 -g4 -sg5 -(lp204 -S'RefSeqGene record not available' -p205 -asg8 -g4 -sg9 -(lp206 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 13, mRNA -p207 -sg13 -S'PMS2' -p208 -sg15 -(dp209 -g17 -S'NP_001308943.1:p.(Lys541Glu)' -p210 -sg19 -S'NP_001308943.1:p.(K541E)' -p211 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322014.1:c.1621A>G' -p212 -sg27 -g4 -sg28 -(dp213 -S'hg19' -p214 -(dp215 -g32 -S'NC_000007.13:g.6026775T>C' -p216 -sg34 -(dp217 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p218 -sg42 -g43 -sssg44 -(dp219 -g32 -S'NC_000007.14:g.5987144T>C' -p220 -sg34 -(dp221 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p222 -sg42 -g43 -sssS'grch37' -p223 -(dp224 -g32 -S'NC_000007.13:g.6026775T>C' -p225 -sg34 -(dp226 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p227 -sg42 -g43 -sssS'grch38' -p228 -(dp229 -g32 -S'NC_000007.14:g.5987144T>C' -p230 -sg34 -(dp231 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p232 -sg42 -g43 -ssssg60 -(dp233 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308943.1' -p234 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322014.1' -p235 -sssS'NM_001322004.1:c.1216A>G' -p236 -(dp237 -g3 -g4 -sg5 -(lp238 -S'RefSeqGene record not available' -p239 -asg8 -g4 -sg9 -(lp240 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 3, mRNA -p241 -sg13 -S'PMS2' -p242 -sg15 -(dp243 -g17 -S'NP_001308933.1:p.(Lys406Glu)' -p244 -sg19 -S'NP_001308933.1:p.(K406E)' -p245 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322004.1:c.1216A>G' -p246 -sg27 -g4 -sg28 -(dp247 -S'hg19' -p248 -(dp249 -g32 -S'NC_000007.13:g.6026775T>C' -p250 -sg34 -(dp251 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p252 -sg42 -g43 -sssg44 -(dp253 -g32 -S'NC_000007.14:g.5987144T>C' -p254 -sg34 -(dp255 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p256 -sg42 -g43 -sssS'grch37' -p257 -(dp258 -g32 -S'NC_000007.13:g.6026775T>C' -p259 -sg34 -(dp260 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p261 -sg42 -g43 -sssS'grch38' -p262 -(dp263 -g32 -S'NC_000007.14:g.5987144T>C' -p264 -sg34 -(dp265 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p266 -sg42 -g43 -ssssg60 -(dp267 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308933.1' -p268 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322004.1' -p269 -sssS'NM_001322008.1:c.1303A>G' -p270 -(dp271 -g3 -g4 -sg5 -(lp272 -S'RefSeqGene record not available' -p273 -asg8 -g4 -sg9 -(lp274 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 7, mRNA -p275 -sg13 -S'PMS2' -p276 -sg15 -(dp277 -g17 -S'NP_001308937.1:p.(Lys435Glu)' -p278 -sg19 -S'NP_001308937.1:p.(K435E)' -p279 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322008.1:c.1303A>G' -p280 -sg27 -g4 -sg28 -(dp281 -S'hg19' -p282 -(dp283 -g32 -S'NC_000007.13:g.6026775T>C' -p284 -sg34 -(dp285 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p286 -sg42 -g43 -sssg44 -(dp287 -g32 -S'NC_000007.14:g.5987144T>C' -p288 -sg34 -(dp289 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p290 -sg42 -g43 -sssS'grch37' -p291 -(dp292 -g32 -S'NC_000007.13:g.6026775T>C' -p293 -sg34 -(dp294 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p295 -sg42 -g43 -sssS'grch38' -p296 -(dp297 -g32 -S'NC_000007.14:g.5987144T>C' -p298 -sg34 -(dp299 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p300 -sg42 -g43 -ssssg60 -(dp301 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308937.1' -p302 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322008.1' -p303 -sssS'NM_001322006.1:c.1465A>G' -p304 -(dp305 -g3 -g4 -sg5 -(lp306 -S'RefSeqGene record not available' -p307 -asg8 -g4 -sg9 -(lp308 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 5, mRNA -p309 -sg13 -S'PMS2' -p310 -sg15 -(dp311 -g17 -S'NP_001308935.1:p.(Lys489Glu)' -p312 -sg19 -S'NP_001308935.1:p.(K489E)' -p313 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322006.1:c.1465A>G' -p314 -sg27 -g4 -sg28 -(dp315 -S'hg19' -p316 -(dp317 -g32 -S'NC_000007.13:g.6026775T>C' -p318 -sg34 -(dp319 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p320 -sg42 -g43 -sssg44 -(dp321 -g32 -S'NC_000007.14:g.5987144T>C' -p322 -sg34 -(dp323 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p324 -sg42 -g43 -sssS'grch37' -p325 -(dp326 -g32 -S'NC_000007.13:g.6026775T>C' -p327 -sg34 -(dp328 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p329 -sg42 -g43 -sssS'grch38' -p330 -(dp331 -g32 -S'NC_000007.14:g.5987144T>C' -p332 -sg34 -(dp333 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p334 -sg42 -g43 -ssssg60 -(dp335 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308935.1' -p336 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322006.1' -p337 -sssS'NM_001322013.1:c.1048A>G' -p338 -(dp339 -g3 -g4 -sg5 -(lp340 -S'RefSeqGene record not available' -p341 -asg8 -g4 -sg9 -(lp342 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 12, mRNA -p343 -sg13 -S'PMS2' -p344 -sg15 -(dp345 -g17 -S'NP_001308942.1:p.(Lys350Glu)' -p346 -sg19 -S'NP_001308942.1:p.(K350E)' -p347 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322013.1:c.1048A>G' -p348 -sg27 -g4 -sg28 -(dp349 -S'hg19' -p350 -(dp351 -g32 -S'NC_000007.13:g.6026775T>C' -p352 -sg34 -(dp353 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p354 -sg42 -g43 -sssg44 -(dp355 -g32 -S'NC_000007.14:g.5987144T>C' -p356 -sg34 -(dp357 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p358 -sg42 -g43 -sssS'grch37' -p359 -(dp360 -g32 -S'NC_000007.13:g.6026775T>C' -p361 -sg34 -(dp362 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p363 -sg42 -g43 -sssS'grch38' -p364 -(dp365 -g32 -S'NC_000007.14:g.5987144T>C' -p366 -sg34 -(dp367 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p368 -sg42 -g43 -ssssg60 -(dp369 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308942.1' -p370 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322013.1' -p371 -sssS'NM_001322009.1:c.1216A>G' -p372 -(dp373 -g3 -g4 -sg5 -(lp374 -S'RefSeqGene record not available' -p375 -asg8 -g4 -sg9 -(lp376 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 8, mRNA -p377 -sg13 -S'PMS2' -p378 -sg15 -(dp379 -g17 -S'NP_001308938.1:p.(Lys406Glu)' -p380 -sg19 -S'NP_001308938.1:p.(K406E)' -p381 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322009.1:c.1216A>G' -p382 -sg27 -g4 -sg28 -(dp383 -S'hg19' -p384 -(dp385 -g32 -S'NC_000007.13:g.6026775T>C' -p386 -sg34 -(dp387 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p388 -sg42 -g43 -sssg44 -(dp389 -g32 -S'NC_000007.14:g.5987144T>C' -p390 -sg34 -(dp391 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p392 -sg42 -g43 -sssS'grch37' -p393 -(dp394 -g32 -S'NC_000007.13:g.6026775T>C' -p395 -sg34 -(dp396 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p397 -sg42 -g43 -sssS'grch38' -p398 -(dp399 -g32 -S'NC_000007.14:g.5987144T>C' -p400 -sg34 -(dp401 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p402 -sg42 -g43 -ssssg60 -(dp403 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308938.1' -p404 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322009.1' -p405 -sssS'NR_003085.2:n.1703G=' -p406 -(dp407 -g3 -g4 -sg5 -(lp408 -S'RefSeqGene record not available' -p409 -asg8 -g4 -sg9 -(lp410 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 2, non-coding RNA -p411 -sg13 -S'PMS2' -p412 -sg15 -(dp413 -g17 -S'Non-coding :n.' -p414 -sg19 -g414 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NR_003085.2:n.1703G=' -p415 -sg27 -g4 -sg28 -(dp416 -S'hg19' -p417 -(dp418 -g32 -S'NC_000007.13:g.6026775T>C' -p419 -sg34 -(dp420 -g36 -g37 -sg38 -S'T' -p421 -sg40 -S'6026775' -p422 -sg42 -g43 -sssS'grch37' -p423 -(dp424 -g32 -S'NC_000007.13:g.6026775T>C' -p425 -sg34 -(dp426 -g36 -g53 -sg38 -g421 -sg40 -S'6026775' -p427 -sg42 -g43 -ssssg60 -(dp428 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_003085.2' -p429 -sssS'flag' -p430 -S'gene_variant' -p431 -sS'NM_000535.5:c.1621G=' -p432 -(dp433 -g3 -g4 -sg5 -(lp434 -S'A more recent version of the selected reference sequence NM_000535.5 is available (NM_000535.6)' -p435 -aS'NM_000535.6:c.1621A>G MUST be fully validated prior to use in reports' -p436 -aS'select_variants=NM_000535.6:c.1621A>G' -p437 -aS'RefSeqGene record not available' -p438 -asg8 -g4 -sg9 -(lp439 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 1, mRNA -p440 -sg13 -S'PMS2' -p441 -sg15 -(dp442 -g17 -S'NP_000526.1:p.(Glu541=)' -p443 -sg19 -S'NP_000526.1:p.(E541=)' -p444 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_000535.5:c.1621G=' -p445 -sg27 -g4 -sg28 -(dp446 -S'hg19' -p447 -(dp448 -g32 -S'NC_000007.13:g.6026775T>C' -p449 -sg34 -(dp450 -g36 -g37 -sg38 -g421 -sg40 -S'6026775' -p451 -sg42 -g43 -sssS'grch37' -p452 -(dp453 -g32 -S'NC_000007.13:g.6026775T>C' -p454 -sg34 -(dp455 -g36 -g53 -sg38 -g421 -sg40 -S'6026775' -p456 -sg42 -g43 -ssssg60 -(dp457 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.1' -p458 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.5' -p459 -sssS'NM_001322007.1:c.1303A>G' -p460 -(dp461 -g3 -g4 -sg5 -(lp462 -S'RefSeqGene record not available' -p463 -asg8 -g4 -sg9 -(lp464 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 6, mRNA -p465 -sg13 -S'PMS2' -p466 -sg15 -(dp467 -g17 -S'NP_001308936.1:p.(Lys435Glu)' -p468 -sg19 -S'NP_001308936.1:p.(K435E)' -p469 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322007.1:c.1303A>G' -p470 -sg27 -g4 -sg28 -(dp471 -S'hg19' -p472 -(dp473 -g32 -S'NC_000007.13:g.6026775T>C' -p474 -sg34 -(dp475 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p476 -sg42 -g43 -sssg44 -(dp477 -g32 -S'NC_000007.14:g.5987144T>C' -p478 -sg34 -(dp479 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p480 -sg42 -g43 -sssS'grch37' -p481 -(dp482 -g32 -S'NC_000007.13:g.6026775T>C' -p483 -sg34 -(dp484 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p485 -sg42 -g43 -sssS'grch38' -p486 -(dp487 -g32 -S'NC_000007.14:g.5987144T>C' -p488 -sg34 -(dp489 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p490 -sg42 -g43 -ssssg60 -(dp491 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308936.1' -p492 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322007.1' -p493 -sssS'NR_136154.1:n.1708A>G' -p494 -(dp495 -g3 -g4 -sg5 -(lp496 -S'RefSeqGene record not available' -p497 -asg8 -g4 -sg9 -(lp498 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 15, non-coding RNA -p499 -sg13 -S'PMS2' -p500 -sg15 -(dp501 -g17 -S'Non-coding :n.' -p502 -sg19 -g502 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NR_136154.1:n.1708A>G' -p503 -sg27 -g4 -sg28 -(dp504 -S'hg19' -p505 -(dp506 -g32 -S'NC_000007.13:g.6026775T>C' -p507 -sg34 -(dp508 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p509 -sg42 -g43 -sssg44 -(dp510 -g32 -S'NC_000007.14:g.5987144T>C' -p511 -sg34 -(dp512 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p513 -sg42 -g43 -sssS'grch37' -p514 -(dp515 -g32 -S'NC_000007.13:g.6026775T>C' -p516 -sg34 -(dp517 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p518 -sg42 -g43 -sssS'grch38' -p519 -(dp520 -g32 -S'NC_000007.14:g.5987144T>C' -p521 -sg34 -(dp522 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p523 -sg42 -g43 -ssssg60 -(dp524 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_136154.1' -p525 -sssS'NM_001322011.1:c.688A>G' -p526 -(dp527 -g3 -g4 -sg5 -(lp528 -S'RefSeqGene record not available' -p529 -asg8 -g4 -sg9 -(lp530 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 10, mRNA -p531 -sg13 -S'PMS2' -p532 -sg15 -(dp533 -g17 -S'NP_001308940.1:p.(Lys230Glu)' -p534 -sg19 -S'NP_001308940.1:p.(K230E)' -p535 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001322011.1:c.688A>G' -p536 -sg27 -g4 -sg28 -(dp537 -S'hg19' -p538 -(dp539 -g32 -S'NC_000007.13:g.6026775T>C' -p540 -sg34 -(dp541 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p542 -sg42 -g43 -sssg44 -(dp543 -g32 -S'NC_000007.14:g.5987144T>C' -p544 -sg34 -(dp545 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p546 -sg42 -g43 -sssS'grch37' -p547 -(dp548 -g32 -S'NC_000007.13:g.6026775T>C' -p549 -sg34 -(dp550 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p551 -sg42 -g43 -sssS'grch38' -p552 -(dp553 -g32 -S'NC_000007.14:g.5987144T>C' -p554 -sg34 -(dp555 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p556 -sg42 -g43 -ssssg60 -(dp557 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308940.1' -p558 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322011.1' -p559 -sssS'NM_000535.6:c.1621A>G' -p560 -(dp561 -g3 -g4 -sg5 -(lp562 -S'RefSeqGene record not available' -p563 -asg8 -g4 -sg9 -(lp564 -sg11 -VHomo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 1, mRNA -p565 -sg13 -S'PMS2' -p566 -sg15 -(dp567 -g17 -S'NP_000526.2:p.(Lys541Glu)' -p568 -sg19 -S'NP_000526.2:p.(K541E)' -p569 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_000535.6:c.1621A>G' -p570 -sg27 -g4 -sg28 -(dp571 -S'hg19' -p572 -(dp573 -g32 -S'NC_000007.13:g.6026775T>C' -p574 -sg34 -(dp575 -g36 -g37 -sg38 -g39 -sg40 -S'6026775' -p576 -sg42 -g43 -sssg44 -(dp577 -g32 -S'NC_000007.14:g.5987144T>C' -p578 -sg34 -(dp579 -g36 -g37 -sg38 -g39 -sg40 -S'5987144' -p580 -sg42 -g43 -sssS'grch37' -p581 -(dp582 -g32 -S'NC_000007.13:g.6026775T>C' -p583 -sg34 -(dp584 -g36 -g53 -sg38 -g39 -sg40 -S'6026775' -p585 -sg42 -g43 -sssS'grch38' -p586 -(dp587 -g32 -S'NC_000007.14:g.5987144T>C' -p588 -sg34 -(dp589 -g36 -g53 -sg38 -g39 -sg40 -S'5987144' -p590 -sg42 -g43 -ssssg60 -(dp591 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.2' -p592 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.6' -p593 -sssS'metadata' -p594 -(dp595 -S'variantvalidator_hgvs_version' -p596 -S'1.1.3' -p597 -sS'uta_schema' -p598 -S'uta_20180821' -p599 -sS'seqrepo_db' -p600 -S'2018-08-21' -p601 -sS'variantvalidator_version' -p602 -S'v0.2' -p603 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant296.txt b/VariantValidator/testing/testOutputsMasterITS/variant296.txt deleted file mode 100644 index 479838ba..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant296.txt +++ /dev/null @@ -1,908 +0,0 @@ -(dp0 -S'NM_001346900.1:c.2077_2091del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 8, mRNA -p13 -sS'gene_symbol' -p14 -S'EGFR' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001333829.1:p.(Glu693_Ala697del)' -p19 -sS'slr' -p20 -S'NP_001333829.1:p.(E693_A697del)' -p21 -ssS'submitted_variant' -p22 -S'7-55242465-GGAATTAAGAGAAGCA-G' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001346900.1:c.2077_2091del' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000007.13:g.55242466_55242480del' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr7' -p38 -sS'ref' -p39 -S'GGAATTAAGAGAAGCA' -p40 -sS'pos' -p41 -S'55242465' -p42 -sS'alt' -p43 -S'G' -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000007.14:g.55174773_55174787del' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p49 -sg41 -S'55174772' -p50 -sg43 -g44 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000007.13:g.55242466_55242480del' -p53 -sg35 -(dp54 -g37 -S'7' -p55 -sg39 -S'GGAATTAAGAGAAGCA' -p56 -sg41 -S'55242465' -p57 -sg43 -g44 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000007.14:g.55174773_55174787del' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p62 -sg41 -S'55174772' -p63 -sg43 -g44 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1' -p69 -sssS'NM_001346898.1:c.2236_2250del' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p73 -aS'RefSeqGene record not available' -p74 -asg9 -g4 -sg10 -(lp75 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 6, mRNA -p76 -sg14 -S'EGFR' -p77 -sg16 -(dp78 -g18 -S'NP_001333827.1:p.(Glu746_Ala750del)' -p79 -sg20 -S'NP_001333827.1:p.(E746_A750del)' -p80 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001346898.1:c.2236_2250del' -p81 -sg28 -g4 -sg29 -(dp82 -S'hg19' -p83 -(dp84 -g33 -S'NC_000007.13:g.55242466_55242480del' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p87 -sg41 -S'55242465' -p88 -sg43 -g44 -sssg45 -(dp89 -g33 -S'NC_000007.14:g.55174773_55174787del' -p90 -sg35 -(dp91 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p92 -sg41 -S'55174772' -p93 -sg43 -g44 -sssS'grch37' -p94 -(dp95 -g33 -S'NC_000007.13:g.55242466_55242480del' -p96 -sg35 -(dp97 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p98 -sg41 -S'55242465' -p99 -sg43 -g44 -sssS'grch38' -p100 -(dp101 -g33 -S'NC_000007.14:g.55174773_55174787del' -p102 -sg35 -(dp103 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p104 -sg41 -S'55174772' -p105 -sg43 -g44 -ssssg64 -(dp106 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1' -p107 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1' -p108 -sssS'NM_001346941.1:c.1435_1449del' -p109 -(dp110 -g3 -g4 -sg5 -(lp111 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p112 -aS'RefSeqGene record not available' -p113 -asg9 -g4 -sg10 -(lp114 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant EGFRvIII, mRNA -p115 -sg14 -S'EGFR' -p116 -sg16 -(dp117 -g18 -S'NP_001333870.1:p.(Glu479_Ala483del)' -p118 -sg20 -S'NP_001333870.1:p.(E479_A483del)' -p119 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001346941.1:c.1435_1449del' -p120 -sg28 -g4 -sg29 -(dp121 -S'hg19' -p122 -(dp123 -g33 -S'NC_000007.13:g.55242466_55242480del' -p124 -sg35 -(dp125 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p126 -sg41 -S'55242465' -p127 -sg43 -g44 -sssg45 -(dp128 -g33 -S'NC_000007.14:g.55174773_55174787del' -p129 -sg35 -(dp130 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p131 -sg41 -S'55174772' -p132 -sg43 -g44 -sssS'grch37' -p133 -(dp134 -g33 -S'NC_000007.13:g.55242466_55242480del' -p135 -sg35 -(dp136 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p137 -sg41 -S'55242465' -p138 -sg43 -g44 -sssS'grch38' -p139 -(dp140 -g33 -S'NC_000007.14:g.55174773_55174787del' -p141 -sg35 -(dp142 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p143 -sg41 -S'55174772' -p144 -sg43 -g44 -ssssg64 -(dp145 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1' -p146 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1' -p147 -sssS'flag' -p148 -S'gene_variant' -p149 -sS'NM_001346899.1:c.2101_2115del' -p150 -(dp151 -g3 -g4 -sg5 -(lp152 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p153 -aS'RefSeqGene record not available' -p154 -asg9 -g4 -sg10 -(lp155 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 7, mRNA -p156 -sg14 -S'EGFR' -p157 -sg16 -(dp158 -g18 -S'NP_001333828.1:p.(Glu701_Ala705del)' -p159 -sg20 -S'NP_001333828.1:p.(E701_A705del)' -p160 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001346899.1:c.2101_2115del' -p161 -sg28 -g4 -sg29 -(dp162 -S'hg19' -p163 -(dp164 -g33 -S'NC_000007.13:g.55242466_55242480del' -p165 -sg35 -(dp166 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p167 -sg41 -S'55242465' -p168 -sg43 -g44 -sssg45 -(dp169 -g33 -S'NC_000007.14:g.55174773_55174787del' -p170 -sg35 -(dp171 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p172 -sg41 -S'55174772' -p173 -sg43 -g44 -sssS'grch37' -p174 -(dp175 -g33 -S'NC_000007.13:g.55242466_55242480del' -p176 -sg35 -(dp177 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p178 -sg41 -S'55242465' -p179 -sg43 -g44 -sssS'grch38' -p180 -(dp181 -g33 -S'NC_000007.14:g.55174773_55174787del' -p182 -sg35 -(dp183 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p184 -sg41 -S'55174772' -p185 -sg43 -g44 -ssssg64 -(dp186 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1' -p187 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1' -p188 -sssS'NM_001346897.1:c.2101_2115del' -p189 -(dp190 -g3 -g4 -sg5 -(lp191 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p192 -aS'RefSeqGene record not available' -p193 -asg9 -g4 -sg10 -(lp194 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 5, mRNA -p195 -sg14 -S'EGFR' -p196 -sg16 -(dp197 -g18 -S'NP_001333826.1:p.(Glu701_Ala705del)' -p198 -sg20 -S'NP_001333826.1:p.(E701_A705del)' -p199 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001346897.1:c.2101_2115del' -p200 -sg28 -g4 -sg29 -(dp201 -S'hg19' -p202 -(dp203 -g33 -S'NC_000007.13:g.55242466_55242480del' -p204 -sg35 -(dp205 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p206 -sg41 -S'55242465' -p207 -sg43 -g44 -sssg45 -(dp208 -g33 -S'NC_000007.14:g.55174773_55174787del' -p209 -sg35 -(dp210 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p211 -sg41 -S'55174772' -p212 -sg43 -g44 -sssS'grch37' -p213 -(dp214 -g33 -S'NC_000007.13:g.55242466_55242480del' -p215 -sg35 -(dp216 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p217 -sg41 -S'55242465' -p218 -sg43 -g44 -sssS'grch38' -p219 -(dp220 -g33 -S'NC_000007.14:g.55174773_55174787del' -p221 -sg35 -(dp222 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p223 -sg41 -S'55174772' -p224 -sg43 -g44 -ssssg64 -(dp225 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333826.1' -p226 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346897.1' -p227 -sssS'NM_005228.3:c.2236_2250del' -p228 -(dp229 -g3 -g4 -sg5 -(lp230 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p231 -aS'A more recent version of the selected reference sequence NM_005228.3 is available (NM_005228.4)' -p232 -aS'NM_005228.4:c.2236_2250del MUST be fully validated prior to use in reports' -p233 -aS'select_variants=NM_005228.4:c.2236_2250del' -p234 -aS'RefSeqGene record not available' -p235 -asg9 -g4 -sg10 -(lp236 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA -p237 -sg14 -S'EGFR' -p238 -sg16 -(dp239 -g18 -S'NP_005219.2:p.(Glu746_Ala750del)' -p240 -sg20 -S'NP_005219.2:p.(E746_A750del)' -p241 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_005228.3:c.2236_2250del' -p242 -sg28 -g4 -sg29 -(dp243 -S'hg19' -p244 -(dp245 -g33 -S'NC_000007.13:g.55242466_55242480del' -p246 -sg35 -(dp247 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p248 -sg41 -S'55242465' -p249 -sg43 -g44 -sssg45 -(dp250 -g33 -S'NC_000007.14:g.55174773_55174787del' -p251 -sg35 -(dp252 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p253 -sg41 -S'55174772' -p254 -sg43 -g44 -sssS'grch37' -p255 -(dp256 -g33 -S'NC_000007.13:g.55242466_55242480del' -p257 -sg35 -(dp258 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p259 -sg41 -S'55242465' -p260 -sg43 -g44 -sssS'grch38' -p261 -(dp262 -g33 -S'NC_000007.14:g.55174773_55174787del' -p263 -sg35 -(dp264 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p265 -sg41 -S'55174772' -p266 -sg43 -g44 -ssssg64 -(dp267 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2' -p268 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3' -p269 -sssS'NM_005228.4:c.2236_2250del' -p270 -(dp271 -g3 -g4 -sg5 -(lp272 -S'NC_000007.13:g.55242465GGAATTAAGAGAAGCA>G automapped to NC_000007.13:g.55242466_55242480del' -p273 -aS'RefSeqGene record not available' -p274 -asg9 -g4 -sg10 -(lp275 -sg12 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA -p276 -sg14 -S'EGFR' -p277 -sg16 -(dp278 -g18 -S'NP_005219.2:p.(Glu746_Ala750del)' -p279 -sg20 -S'NP_005219.2:p.(E746_A750del)' -p280 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_005228.4:c.2236_2250del' -p281 -sg28 -g4 -sg29 -(dp282 -S'hg19' -p283 -(dp284 -g33 -S'NC_000007.13:g.55242466_55242480del' -p285 -sg35 -(dp286 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p287 -sg41 -S'55242465' -p288 -sg43 -g44 -sssg45 -(dp289 -g33 -S'NC_000007.14:g.55174773_55174787del' -p290 -sg35 -(dp291 -g37 -g38 -sg39 -S'GGAATTAAGAGAAGCA' -p292 -sg41 -S'55174772' -p293 -sg43 -g44 -sssS'grch37' -p294 -(dp295 -g33 -S'NC_000007.13:g.55242466_55242480del' -p296 -sg35 -(dp297 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p298 -sg41 -S'55242465' -p299 -sg43 -g44 -sssS'grch38' -p300 -(dp301 -g33 -S'NC_000007.14:g.55174773_55174787del' -p302 -sg35 -(dp303 -g37 -g55 -sg39 -S'GGAATTAAGAGAAGCA' -p304 -sg41 -S'55174772' -p305 -sg43 -g44 -ssssg64 -(dp306 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2' -p307 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.4' -p308 -sssS'metadata' -p309 -(dp310 -S'variantvalidator_hgvs_version' -p311 -S'1.1.3' -p312 -sS'uta_schema' -p313 -S'uta_20180821' -p314 -sS'seqrepo_db' -p315 -S'2018-08-21' -p316 -sS'variantvalidator_version' -p317 -S'v0.2' -p318 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant297.txt b/VariantValidator/testing/testOutputsMasterITS/variant297.txt deleted file mode 100644 index 14000cc1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant297.txt +++ /dev/null @@ -1,1063 +0,0 @@ -(dp0 -S'NM_005228.3:c.2284-5_2290dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p7 -aS'A more recent version of the selected reference sequence NM_005228.3 is available (NM_005228.4)' -p8 -aS'NM_005228.4:c.2284-5_2290dupTCCAGGAAGCCT MUST be fully validated prior to use in reports' -p9 -aS'select_variants=NM_005228.4:c.2284-5_2290dup' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA -p16 -sS'gene_symbol' -p17 -S'EGFR' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_005219.2:p.?' -p22 -sS'slr' -p23 -S'NP_005219.2:p.?' -p24 -ssS'submitted_variant' -p25 -S'7-55248992-T-TTCCAGGAAGCCT' -p26 -sS'genome_context_intronic_sequence' -p27 -S'NC_000007.13(NM_005228.3):c.2284-5_2290dup' -p28 -sS'hgvs_lrg_variant' -p29 -g4 -sS'hgvs_transcript_variant' -p30 -S'NM_005228.3:c.2284-5_2290dup' -p31 -sS'hgvs_refseqgene_variant' -p32 -g4 -sS'primary_assembly_loci' -p33 -(dp34 -S'hg19' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000007.13:g.55248981_55248992dup' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'chr7' -p42 -sS'ref' -p43 -S'TCCAGGAAGCCT' -p44 -sS'pos' -p45 -S'55248981' -p46 -sS'alt' -p47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p51 -sg39 -(dp52 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p53 -sg45 -S'55181288' -p54 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p55 -sssS'grch37' -p56 -(dp57 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p58 -sg39 -(dp59 -g41 -S'7' -p60 -sg43 -S'TCCAGGAAGCCT' -p61 -sg45 -S'55248981' -p62 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p63 -sssS'grch38' -p64 -(dp65 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p66 -sg39 -(dp67 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p68 -sg45 -S'55181288' -p69 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p70 -ssssS'reference_sequence_records' -p71 -(dp72 -S'protein' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2' -p74 -sS'transcript' -p75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3' -p76 -sssS'NM_001346899.1:c.2149-5_2155dup' -p77 -(dp78 -g3 -g4 -sg5 -(lp79 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p80 -aS'RefSeqGene record not available' -p81 -asg12 -g4 -sg13 -(lp82 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 7, mRNA -p83 -sg17 -S'EGFR' -p84 -sg19 -(dp85 -g21 -S'NP_001333828.1:p.?' -p86 -sg23 -S'NP_001333828.1:p.?' -p87 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_001346899.1):c.2149-5_2155dup' -p88 -sg29 -g4 -sg30 -S'NM_001346899.1:c.2149-5_2155dup' -p89 -sg32 -g4 -sg33 -(dp90 -S'hg19' -p91 -(dp92 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p93 -sg39 -(dp94 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p95 -sg45 -S'55248981' -p96 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p97 -sssg49 -(dp98 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p99 -sg39 -(dp100 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p101 -sg45 -S'55181288' -p102 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p103 -sssS'grch37' -p104 -(dp105 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p106 -sg39 -(dp107 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p108 -sg45 -S'55248981' -p109 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p110 -sssS'grch38' -p111 -(dp112 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p113 -sg39 -(dp114 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p115 -sg45 -S'55181288' -p116 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p117 -ssssg71 -(dp118 -g73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1' -p119 -sg75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1' -p120 -sssS'NM_005228.4:c.2284-5_2290dup' -p121 -(dp122 -g3 -g4 -sg5 -(lp123 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p124 -aS'RefSeqGene record not available' -p125 -asg12 -g4 -sg13 -(lp126 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA -p127 -sg17 -S'EGFR' -p128 -sg19 -(dp129 -g21 -S'NP_005219.2:p.?' -p130 -sg23 -S'NP_005219.2:p.?' -p131 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_005228.4):c.2284-5_2290dup' -p132 -sg29 -g4 -sg30 -S'NM_005228.4:c.2284-5_2290dup' -p133 -sg32 -g4 -sg33 -(dp134 -S'hg19' -p135 -(dp136 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p137 -sg39 -(dp138 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p139 -sg45 -S'55248981' -p140 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p141 -sssg49 -(dp142 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p143 -sg39 -(dp144 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p145 -sg45 -S'55181288' -p146 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p147 -sssS'grch37' -p148 -(dp149 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p150 -sg39 -(dp151 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p152 -sg45 -S'55248981' -p153 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p154 -sssS'grch38' -p155 -(dp156 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p157 -sg39 -(dp158 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p159 -sg45 -S'55181288' -p160 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p161 -ssssg71 -(dp162 -g73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2' -p163 -sg75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.4' -p164 -sssS'NM_001346898.1:c.2284-5_2290dup' -p165 -(dp166 -g3 -g4 -sg5 -(lp167 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p168 -aS'RefSeqGene record not available' -p169 -asg12 -g4 -sg13 -(lp170 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 6, mRNA -p171 -sg17 -S'EGFR' -p172 -sg19 -(dp173 -g21 -S'NP_001333827.1:p.?' -p174 -sg23 -S'NP_001333827.1:p.?' -p175 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_001346898.1):c.2284-5_2290dup' -p176 -sg29 -g4 -sg30 -S'NM_001346898.1:c.2284-5_2290dup' -p177 -sg32 -g4 -sg33 -(dp178 -S'hg19' -p179 -(dp180 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p181 -sg39 -(dp182 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p183 -sg45 -S'55248981' -p184 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p185 -sssg49 -(dp186 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p187 -sg39 -(dp188 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p189 -sg45 -S'55181288' -p190 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p191 -sssS'grch37' -p192 -(dp193 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p194 -sg39 -(dp195 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p196 -sg45 -S'55248981' -p197 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p198 -sssS'grch38' -p199 -(dp200 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p201 -sg39 -(dp202 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p203 -sg45 -S'55181288' -p204 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p205 -ssssg71 -(dp206 -g73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1' -p207 -sg75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1' -p208 -sssS'NM_001346941.1:c.1483-5_1489dup' -p209 -(dp210 -g3 -g4 -sg5 -(lp211 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p212 -aS'RefSeqGene record not available' -p213 -asg12 -g4 -sg13 -(lp214 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant EGFRvIII, mRNA -p215 -sg17 -S'EGFR' -p216 -sg19 -(dp217 -g21 -S'NP_001333870.1:p.?' -p218 -sg23 -S'NP_001333870.1:p.?' -p219 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_001346941.1):c.1483-5_1489dup' -p220 -sg29 -g4 -sg30 -S'NM_001346941.1:c.1483-5_1489dup' -p221 -sg32 -g4 -sg33 -(dp222 -S'hg19' -p223 -(dp224 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p225 -sg39 -(dp226 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p227 -sg45 -S'55248981' -p228 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p229 -sssg49 -(dp230 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p231 -sg39 -(dp232 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p233 -sg45 -S'55181288' -p234 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p235 -sssS'grch37' -p236 -(dp237 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p238 -sg39 -(dp239 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p240 -sg45 -S'55248981' -p241 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p242 -sssS'grch38' -p243 -(dp244 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p245 -sg39 -(dp246 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p247 -sg45 -S'55181288' -p248 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p249 -ssssg71 -(dp250 -g73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1' -p251 -sg75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1' -p252 -sssS'flag' -p253 -S'gene_variant' -p254 -sS'NM_001346900.1:c.2125-5_2131dup' -p255 -(dp256 -g3 -g4 -sg5 -(lp257 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p258 -aS'RefSeqGene record not available' -p259 -asg12 -g4 -sg13 -(lp260 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 8, mRNA -p261 -sg17 -S'EGFR' -p262 -sg19 -(dp263 -g21 -S'NP_001333829.1:p.?' -p264 -sg23 -S'NP_001333829.1:p.?' -p265 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_001346900.1):c.2125-5_2131dup' -p266 -sg29 -g4 -sg30 -S'NM_001346900.1:c.2125-5_2131dup' -p267 -sg32 -g4 -sg33 -(dp268 -S'hg19' -p269 -(dp270 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p271 -sg39 -(dp272 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p273 -sg45 -S'55248981' -p274 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p275 -sssg49 -(dp276 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p277 -sg39 -(dp278 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p279 -sg45 -S'55181288' -p280 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p281 -sssS'grch37' -p282 -(dp283 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p284 -sg39 -(dp285 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p286 -sg45 -S'55248981' -p287 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p288 -sssS'grch38' -p289 -(dp290 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p291 -sg39 -(dp292 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p293 -sg45 -S'55181288' -p294 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p295 -ssssg71 -(dp296 -g73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1' -p297 -sg75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1' -p298 -sssS'NR_047551.1:n.1272_1283dup' -p299 -(dp300 -g3 -g4 -sg5 -(lp301 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p302 -aS'RefSeqGene record not available' -p303 -asg12 -g4 -sg13 -(lp304 -sg15 -VHomo sapiens EGFR antisense RNA 1 (EGFR-AS1), long non-coding RNA -p305 -sg17 -S'EGFR-AS1' -p306 -sg19 -(dp307 -g21 -S'Non-coding :n.' -p308 -sg23 -g308 -ssg25 -g26 -sg27 -g4 -sg29 -g4 -sg30 -S'NR_047551.1:n.1272_1283dup' -p309 -sg32 -g4 -sg33 -(dp310 -S'hg19' -p311 -(dp312 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p313 -sg39 -(dp314 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p315 -sg45 -S'55248981' -p316 -sg47 -VTCCAGGAAGCCTTCCAGGAAGCCT -p317 -sssg49 -(dp318 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p319 -sg39 -(dp320 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p321 -sg45 -S'55181288' -p322 -sg47 -VTCCAGGAAGCCTTCCAGGAAGCCT -p323 -sssS'grch37' -p324 -(dp325 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p326 -sg39 -(dp327 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p328 -sg45 -S'55248981' -p329 -sg47 -VTCCAGGAAGCCTTCCAGGAAGCCT -p330 -sssS'grch38' -p331 -(dp332 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p333 -sg39 -(dp334 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p335 -sg45 -S'55181288' -p336 -sg47 -VTCCAGGAAGCCTTCCAGGAAGCCT -p337 -ssssg71 -(dp338 -g75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_047551.1' -p339 -sssS'NM_001346897.1:c.2149-5_2155dup' -p340 -(dp341 -g3 -g4 -sg5 -(lp342 -S'NC_000007.13:g.55248992T>TTCCAGGAAGCCT automapped to NC_000007.13:g.55248981_55248992dupTCCAGGAAGCCT' -p343 -aS'RefSeqGene record not available' -p344 -asg12 -g4 -sg13 -(lp345 -sg15 -VHomo sapiens epidermal growth factor receptor (EGFR), transcript variant 5, mRNA -p346 -sg17 -S'EGFR' -p347 -sg19 -(dp348 -g21 -S'NP_001333826.1:p.?' -p349 -sg23 -S'NP_001333826.1:p.?' -p350 -ssg25 -g26 -sg27 -S'NC_000007.13(NM_001346897.1):c.2149-5_2155dup' -p351 -sg29 -g4 -sg30 -S'NM_001346897.1:c.2149-5_2155dup' -p352 -sg32 -g4 -sg33 -(dp353 -S'hg19' -p354 -(dp355 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p356 -sg39 -(dp357 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p358 -sg45 -S'55248981' -p359 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p360 -sssg49 -(dp361 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p362 -sg39 -(dp363 -g41 -g42 -sg43 -S'TCCAGGAAGCCT' -p364 -sg45 -S'55181288' -p365 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p366 -sssS'grch37' -p367 -(dp368 -g37 -S'NC_000007.13:g.55248981_55248992dup' -p369 -sg39 -(dp370 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p371 -sg45 -S'55248981' -p372 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p373 -sssS'grch38' -p374 -(dp375 -g37 -S'NC_000007.14:g.55181288_55181299dup' -p376 -sg39 -(dp377 -g41 -g60 -sg43 -S'TCCAGGAAGCCT' -p378 -sg45 -S'55181288' -p379 -sg47 -S'TCCAGGAAGCCTTCCAGGAAGCCT' -p380 -ssssg71 -(dp381 -g73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333826.1' -p382 -sg75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346897.1' -p383 -sssS'metadata' -p384 -(dp385 -S'variantvalidator_hgvs_version' -p386 -S'1.1.3' -p387 -sS'uta_schema' -p388 -S'uta_20180821' -p389 -sS'seqrepo_db' -p390 -S'2018-08-21' -p391 -sS'variantvalidator_version' -p392 -S'v0.2' -p393 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant298.txt b/VariantValidator/testing/testOutputsMasterITS/variant298.txt deleted file mode 100644 index 7951ff48..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant298.txt +++ /dev/null @@ -1,292 +0,0 @@ -(dp0 -S'NM_001540.4:c.82C>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens heat shock protein family B (small) member 1 (HSPB1), mRNA -p12 -sS'gene_symbol' -p13 -S'HSPB1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001531.1:p.(Leu28Ile)' -p18 -sS'slr' -p19 -S'NP_001531.1:p.(L28I)' -p20 -ssS'submitted_variant' -p21 -S'7-75932111-C-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001540.4:c.82C>A' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000007.13:g.75932111C>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr7' -p37 -sS'ref' -p38 -S'C' -p39 -sS'pos' -p40 -S'75932111' -p41 -sS'alt' -p42 -S'A' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000007.14:g.76302794C>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'76302794' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000007.13:g.75932111C>A' -p51 -sg34 -(dp52 -g36 -S'7' -p53 -sg38 -g39 -sg40 -S'75932111' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000007.14:g.76302794C>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'76302794' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001531.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001540.4' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ssS'NM_001540.3:c.82C>A' -p78 -(dp79 -g3 -g4 -sg5 -(lp80 -S'A more recent version of the selected reference sequence NM_001540.3 is available (NM_001540.4)' -p81 -aS'NM_001540.4:c.82C>A MUST be fully validated prior to use in reports' -p82 -aS'select_variants=NM_001540.4:c.82C>A' -p83 -aS'RefSeqGene record not available' -p84 -asg8 -g4 -sg9 -(lp85 -sg11 -VHomo sapiens heat shock protein family B (small) member 1 (HSPB1), mRNA -p86 -sg13 -S'HSPB1' -p87 -sg15 -(dp88 -g17 -S'NP_001531.1:p.(Leu28Ile)' -p89 -sg19 -S'NP_001531.1:p.(L28I)' -p90 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001540.3:c.82C>A' -p91 -sg27 -g4 -sg28 -(dp92 -S'hg19' -p93 -(dp94 -g32 -S'NC_000007.13:g.75932111C>A' -p95 -sg34 -(dp96 -g36 -g37 -sg38 -g39 -sg40 -S'75932111' -p97 -sg42 -g43 -sssg44 -(dp98 -g32 -S'NC_000007.14:g.76302794C>A' -p99 -sg34 -(dp100 -g36 -g37 -sg38 -g39 -sg40 -S'76302794' -p101 -sg42 -g43 -sssS'grch37' -p102 -(dp103 -g32 -S'NC_000007.13:g.75932111C>A' -p104 -sg34 -(dp105 -g36 -g53 -sg38 -g39 -sg40 -S'75932111' -p106 -sg42 -g43 -sssS'grch38' -p107 -(dp108 -g32 -S'NC_000007.14:g.76302794C>A' -p109 -sg34 -(dp110 -g36 -g53 -sg38 -g39 -sg40 -S'76302794' -p111 -sg42 -g43 -ssssg60 -(dp112 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001531.1' -p113 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001540.3' -p114 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant299.txt b/VariantValidator/testing/testOutputsMasterITS/variant299.txt deleted file mode 100644 index d507da8c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant299.txt +++ /dev/null @@ -1,304 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_005751.4:c.4004_4006dup' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'NC_000007.13:g.91652178A>AAAC automapped to NC_000007.13:g.91652179_91652181dupAAC' -p19 -aS'RefSeqGene record not available' -p20 -asS'refseqgene_context_intronic_sequence' -p21 -g16 -sS'alt_genomic_loci' -p22 -(lp23 -sS'transcript_description' -p24 -VHomo sapiens A-kinase anchoring protein 9 (AKAP9), transcript variant 2, mRNA -p25 -sS'gene_symbol' -p26 -S'AKAP9' -p27 -sS'hgvs_predicted_protein_consequence' -p28 -(dp29 -S'tlr' -p30 -S'NP_005742.4:p.(Lys1335_Leu1336insGln)' -p31 -sS'slr' -p32 -S'NP_005742.4:p.(K1335_L1336insQ)' -p33 -ssS'submitted_variant' -p34 -S'7-91652178-A-AAAC' -p35 -sS'genome_context_intronic_sequence' -p36 -g16 -sS'hgvs_lrg_variant' -p37 -g16 -sS'hgvs_transcript_variant' -p38 -S'NM_005751.4:c.4004_4006dup' -p39 -sS'hgvs_refseqgene_variant' -p40 -g16 -sS'primary_assembly_loci' -p41 -(dp42 -S'hg19' -p43 -(dp44 -S'hgvs_genomic_description' -p45 -S'NC_000007.13:g.91652179_91652181dup' -p46 -sS'vcf' -p47 -(dp48 -S'chr' -p49 -S'chr7' -p50 -sS'ref' -p51 -S'AAC' -p52 -sS'pos' -p53 -S'91652179' -p54 -sS'alt' -p55 -S'AACAAC' -p56 -sssS'hg38' -p57 -(dp58 -g45 -S'NC_000007.14:g.92022865_92022867dup' -p59 -sg47 -(dp60 -g49 -g50 -sg51 -S'AAC' -p61 -sg53 -S'92022865' -p62 -sg55 -S'AACAAC' -p63 -sssS'grch37' -p64 -(dp65 -g45 -S'NC_000007.13:g.91652179_91652181dup' -p66 -sg47 -(dp67 -g49 -S'7' -p68 -sg51 -S'AAC' -p69 -sg53 -S'91652179' -p70 -sg55 -S'AACAAC' -p71 -sssS'grch38' -p72 -(dp73 -g45 -S'NC_000007.14:g.92022865_92022867dup' -p74 -sg47 -(dp75 -g49 -g68 -sg51 -S'AAC' -p76 -sg53 -S'92022865' -p77 -sg55 -S'AACAAC' -p78 -ssssS'reference_sequence_records' -p79 -(dp80 -S'protein' -p81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005742.4' -p82 -sS'transcript' -p83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005751.4' -p84 -sssS'NM_147185.2:c.4004_4006dup' -p85 -(dp86 -g15 -g16 -sg17 -(lp87 -S'NC_000007.13:g.91652178A>AAAC automapped to NC_000007.13:g.91652179_91652181dupAAC' -p88 -aS'RefSeqGene record not available' -p89 -asg21 -g16 -sg22 -(lp90 -sg24 -VHomo sapiens A-kinase anchoring protein 9 (AKAP9), transcript variant 3, mRNA -p91 -sg26 -S'AKAP9' -p92 -sg28 -(dp93 -g30 -S'NP_671714.1:p.(Lys1335_Leu1336insGln)' -p94 -sg32 -S'NP_671714.1:p.(K1335_L1336insQ)' -p95 -ssg34 -g35 -sg36 -g16 -sg37 -g16 -sg38 -S'NM_147185.2:c.4004_4006dup' -p96 -sg40 -g16 -sg41 -(dp97 -S'hg19' -p98 -(dp99 -g45 -S'NC_000007.13:g.91652179_91652181dup' -p100 -sg47 -(dp101 -g49 -g50 -sg51 -S'AAC' -p102 -sg53 -S'91652179' -p103 -sg55 -S'AACAAC' -p104 -sssg57 -(dp105 -g45 -S'NC_000007.14:g.92022865_92022867dup' -p106 -sg47 -(dp107 -g49 -g50 -sg51 -S'AAC' -p108 -sg53 -S'92022865' -p109 -sg55 -S'AACAAC' -p110 -sssS'grch37' -p111 -(dp112 -g45 -S'NC_000007.13:g.91652179_91652181dup' -p113 -sg47 -(dp114 -g49 -g68 -sg51 -S'AAC' -p115 -sg53 -S'91652179' -p116 -sg55 -S'AACAAC' -p117 -sssS'grch38' -p118 -(dp119 -g45 -S'NC_000007.14:g.92022865_92022867dup' -p120 -sg47 -(dp121 -g49 -g68 -sg51 -S'AAC' -p122 -sg53 -S'92022865' -p123 -sg55 -S'AACAAC' -p124 -ssssg79 -(dp125 -g81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_671714.1' -p126 -sg83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_147185.2' -p127 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant3.txt b/VariantValidator/testing/testOutputsMasterITS/variant3.txt deleted file mode 100644 index 7efe2ac2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant3.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_015120.4:c.34C>T' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p24 -sS'gene_symbol' -p25 -S'ALMS1' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_055935.4:p.(Leu12=)' -p30 -sS'slr' -p31 -S'NP_055935.4:p.(L12=)' -p32 -ssS'submitted_variant' -p33 -S'NM_015120.4:c.34C>T' -p34 -sS'genome_context_intronic_sequence' -p35 -g16 -sS'hgvs_lrg_variant' -p36 -g16 -sS'hgvs_transcript_variant' -p37 -S'NM_015120.4:c.34C>T' -p38 -sS'hgvs_refseqgene_variant' -p39 -g16 -sS'primary_assembly_loci' -p40 -(dp41 -S'hg19' -p42 -(dp43 -S'hgvs_genomic_description' -p44 -S'NC_000002.11:g.73613030C>T' -p45 -sS'vcf' -p46 -(dp47 -S'chr' -p48 -S'chr2' -p49 -sS'ref' -p50 -VC -p51 -sS'pos' -p52 -S'73613030' -p53 -sS'alt' -p54 -VT -p55 -sssS'hg38' -p56 -(dp57 -g44 -S'NC_000002.12:g.73385902C>T' -p58 -sg46 -(dp59 -g48 -g49 -sg50 -g51 -sg52 -S'73385902' -p60 -sg54 -g55 -sssS'grch37' -p61 -(dp62 -g44 -S'NC_000002.11:g.73613030C>T' -p63 -sg46 -(dp64 -g48 -S'2' -p65 -sg50 -g51 -sg52 -S'73613030' -p66 -sg54 -g55 -sssS'grch38' -p67 -(dp68 -g44 -S'NC_000002.12:g.73385902C>T' -p69 -sg46 -(dp70 -g48 -g65 -sg50 -g51 -sg52 -S'73385902' -p71 -sg54 -g55 -ssssS'reference_sequence_records' -p72 -(dp73 -S'protein' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' -p75 -sS'transcript' -p76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' -p77 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant30.txt b/VariantValidator/testing/testOutputsMasterITS/variant30.txt deleted file mode 100644 index 277e90b3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant30.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.*1400_*1406=' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NC_000017.10:g.48261457_48261463TTATGTT=' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000088.3:c.*1400_*1406=' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000017.10:g.48261457_48261463=' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -VTTATGTT -p41 -sS'pos' -p42 -S'48261457' -p43 -sS'alt' -p44 -g41 -sssS'hg38' -p45 -(dp46 -g34 -S'NC_000017.11:g.50184096_50184102=' -p47 -sg36 -(dp48 -g38 -g39 -sg40 -VTTATGTT -p49 -sg42 -S'50184096' -p50 -sg44 -g49 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000017.10:g.48261457_48261463=' -p53 -sg36 -(dp54 -g38 -S'17' -p55 -sg40 -g41 -sg42 -S'48261457' -p56 -sg44 -g41 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000017.11:g.50184096_50184102=' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g49 -sg42 -S'50184096' -p61 -sg44 -g49 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p67 -sssS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant300.txt b/VariantValidator/testing/testOutputsMasterITS/variant300.txt deleted file mode 100644 index e68159d5..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant300.txt +++ /dev/null @@ -1,294 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NR_149084.1:n.221+1140_221+1142del' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'NC_000007.13:g.117199644ATCT>A automapped to NC_000007.13:g.117199646_117199648delCTT' -p19 -aS'RefSeqGene record not available' -p20 -asS'refseqgene_context_intronic_sequence' -p21 -g16 -sS'alt_genomic_loci' -p22 -(lp23 -sS'transcript_description' -p24 -VHomo sapiens CFTR antisense RNA 1 (CFTR-AS1), long non-coding RNA -p25 -sS'gene_symbol' -p26 -S'CFTR-AS1' -p27 -sS'hgvs_predicted_protein_consequence' -p28 -(dp29 -S'tlr' -p30 -S'Non-coding :n.' -p31 -sS'slr' -p32 -g31 -ssS'submitted_variant' -p33 -S'7-117199644-ATCT-A' -p34 -sS'genome_context_intronic_sequence' -p35 -S'NC_000007.13(NR_149084.1):c.221+1140_221+1142del' -p36 -sS'hgvs_lrg_variant' -p37 -g16 -sS'hgvs_transcript_variant' -p38 -S'NR_149084.1:n.221+1140_221+1142del' -p39 -sS'hgvs_refseqgene_variant' -p40 -g16 -sS'primary_assembly_loci' -p41 -(dp42 -S'hg19' -p43 -(dp44 -S'hgvs_genomic_description' -p45 -S'NC_000007.13:g.117199645_117199647del' -p46 -sS'vcf' -p47 -(dp48 -S'chr' -p49 -S'chr7' -p50 -sS'ref' -p51 -S'ATCT' -p52 -sS'pos' -p53 -S'117199644' -p54 -sS'alt' -p55 -S'A' -p56 -sssS'hg38' -p57 -(dp58 -g45 -S'NC_000007.14:g.117559591_117559593del' -p59 -sg47 -(dp60 -g49 -g50 -sg51 -S'ATCT' -p61 -sg53 -S'117559590' -p62 -sg55 -g56 -sssS'grch37' -p63 -(dp64 -g45 -S'NC_000007.13:g.117199645_117199647del' -p65 -sg47 -(dp66 -g49 -S'7' -p67 -sg51 -S'ATCT' -p68 -sg53 -S'117199644' -p69 -sg55 -g56 -sssS'grch38' -p70 -(dp71 -g45 -S'NC_000007.14:g.117559591_117559593del' -p72 -sg47 -(dp73 -g49 -g67 -sg51 -S'ATCT' -p74 -sg53 -S'117559590' -p75 -sg55 -g56 -ssssS'reference_sequence_records' -p76 -(dp77 -S'transcript' -p78 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_149084.1' -p79 -sssS'NM_000492.3:c.1521_1523del' -p80 -(dp81 -g15 -g16 -sg17 -(lp82 -S'NC_000007.13:g.117199644ATCT>A automapped to NC_000007.13:g.117199646_117199648delCTT' -p83 -aS'RefSeqGene record not available' -p84 -asg21 -g16 -sg22 -(lp85 -sg24 -VHomo sapiens cystic fibrosis transmembrane conductance regulator (CFTR), mRNA -p86 -sg26 -S'CFTR' -p87 -sg28 -(dp88 -g30 -S'NP_000483.3:p.(Phe508del)' -p89 -sg32 -S'NP_000483.3:p.(F508del)' -p90 -ssg33 -g34 -sg35 -g16 -sg37 -g16 -sg38 -S'NM_000492.3:c.1521_1523del' -p91 -sg40 -g16 -sg41 -(dp92 -S'hg19' -p93 -(dp94 -g45 -S'NC_000007.13:g.117199646_117199648del' -p95 -sg47 -(dp96 -g49 -g50 -sg51 -S'ATCT' -p97 -sg53 -S'117199644' -p98 -sg55 -g56 -sssg57 -(dp99 -g45 -S'NC_000007.14:g.117559592_117559594del' -p100 -sg47 -(dp101 -g49 -g50 -sg51 -S'ATCT' -p102 -sg53 -S'117559590' -p103 -sg55 -g56 -sssS'grch37' -p104 -(dp105 -g45 -S'NC_000007.13:g.117199646_117199648del' -p106 -sg47 -(dp107 -g49 -g67 -sg51 -S'ATCT' -p108 -sg53 -S'117199644' -p109 -sg55 -g56 -sssS'grch38' -p110 -(dp111 -g45 -S'NC_000007.14:g.117559592_117559594del' -p112 -sg47 -(dp113 -g49 -g67 -sg51 -S'ATCT' -p114 -sg53 -S'117559590' -p115 -sg55 -g56 -ssssg76 -(dp116 -S'protein' -p117 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000483.3' -p118 -sg78 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000492.3' -p119 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant301.txt b/VariantValidator/testing/testOutputsMasterITS/variant301.txt deleted file mode 100644 index 6cd8e292..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant301.txt +++ /dev/null @@ -1,548 +0,0 @@ -(dp0 -S'NR_148928.1:n.2896_2897delinsAG' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA -p13 -sS'gene_symbol' -p14 -S'BRAF' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'Non-coding :n.' -p19 -sS'slr' -p20 -g19 -ssS'submitted_variant' -p21 -S'7-140453136-AC-CT' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NR_148928.1:n.2896_2897delinsAG' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000007.13:g.140453136_140453137delinsCT' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr7' -p37 -sS'ref' -p38 -S'AC' -p39 -sS'pos' -p40 -S'140453136' -p41 -sS'alt' -p42 -VCT -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000007.14:g.140753336_140753337delinsCT' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'AC' -p48 -sg40 -S'140753336' -p49 -sg42 -VCT -p50 -sssS'grch37' -p51 -(dp52 -g32 -S'NC_000007.13:g.140453136_140453137delinsCT' -p53 -sg34 -(dp54 -g36 -S'7' -p55 -sg38 -S'AC' -p56 -sg40 -S'140453136' -p57 -sg42 -g43 -sssS'grch38' -p58 -(dp59 -g32 -S'NC_000007.14:g.140753336_140753337delinsCT' -p60 -sg34 -(dp61 -g36 -g55 -sg38 -S'AC' -p62 -sg40 -S'140753336' -p63 -sg42 -g50 -ssssS'reference_sequence_records' -p64 -(dp65 -S'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1' -p67 -sssS'NM_004333.4:c.1798_1799delinsAG' -p68 -(dp69 -g3 -g4 -sg5 -(lp70 -S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' -p71 -aS'A more recent version of the selected reference sequence NM_004333.4 is available (NM_004333.5)' -p72 -aS'NM_004333.5:c.1798_1799delGTinsAG MUST be fully validated prior to use in reports' -p73 -aS'select_variants=NM_004333.5:c.1798_1799delinsAG' -p74 -aS'RefSeqGene record not available' -p75 -asg9 -g4 -sg10 -(lp76 -sg12 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA -p77 -sg14 -S'BRAF' -p78 -sg16 -(dp79 -g18 -S'NP_004324.2:p.(Val600Arg)' -p80 -sg20 -S'NP_004324.2:p.(V600R)' -p81 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004333.4:c.1798_1799delinsAG' -p82 -sg27 -g4 -sg28 -(dp83 -S'hg19' -p84 -(dp85 -g32 -S'NC_000007.13:g.140453136_140453137delinsCT' -p86 -sg34 -(dp87 -g36 -g37 -sg38 -S'AC' -p88 -sg40 -S'140453136' -p89 -sg42 -VCT -p90 -sssg44 -(dp91 -g32 -S'NC_000007.14:g.140753336_140753337delinsCT' -p92 -sg34 -(dp93 -g36 -g37 -sg38 -S'AC' -p94 -sg40 -S'140753336' -p95 -sg42 -VCT -p96 -sssS'grch37' -p97 -(dp98 -g32 -S'NC_000007.13:g.140453136_140453137delinsCT' -p99 -sg34 -(dp100 -g36 -g55 -sg38 -S'AC' -p101 -sg40 -S'140453136' -p102 -sg42 -g90 -sssS'grch38' -p103 -(dp104 -g32 -S'NC_000007.14:g.140753336_140753337delinsCT' -p105 -sg34 -(dp106 -g36 -g55 -sg38 -S'AC' -p107 -sg40 -S'140753336' -p108 -sg42 -g96 -ssssg64 -(dp109 -S'protein' -p110 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' -p111 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4' -p112 -sssS'NM_004333.5:c.1798_1799delinsAG' -p113 -(dp114 -g3 -g4 -sg5 -(lp115 -S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' -p116 -aS'RefSeqGene record not available' -p117 -asg9 -g4 -sg10 -(lp118 -sg12 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA -p119 -sg14 -S'BRAF' -p120 -sg16 -(dp121 -g18 -S'NP_004324.2:p.(Val600Arg)' -p122 -sg20 -S'NP_004324.2:p.(V600R)' -p123 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004333.5:c.1798_1799delinsAG' -p124 -sg27 -g4 -sg28 -(dp125 -S'hg19' -p126 -(dp127 -g32 -S'NC_000007.13:g.140453136_140453137delinsCT' -p128 -sg34 -(dp129 -g36 -g37 -sg38 -S'AC' -p130 -sg40 -S'140453136' -p131 -sg42 -VCT -p132 -sssg44 -(dp133 -g32 -S'NC_000007.14:g.140753336_140753337delinsCT' -p134 -sg34 -(dp135 -g36 -g37 -sg38 -S'AC' -p136 -sg40 -S'140753336' -p137 -sg42 -VCT -p138 -sssS'grch37' -p139 -(dp140 -g32 -S'NC_000007.13:g.140453136_140453137delinsCT' -p141 -sg34 -(dp142 -g36 -g55 -sg38 -S'AC' -p143 -sg40 -S'140453136' -p144 -sg42 -g132 -sssS'grch38' -p145 -(dp146 -g32 -S'NC_000007.14:g.140753336_140753337delinsCT' -p147 -sg34 -(dp148 -g36 -g55 -sg38 -S'AC' -p149 -sg40 -S'140753336' -p150 -sg42 -g138 -ssssg64 -(dp151 -g110 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' -p152 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5' -p153 -sssS'flag' -p154 -S'gene_variant' -p155 -sS'NM_001354609.1:c.1798_1799delinsAG' -p156 -(dp157 -g3 -g4 -sg5 -(lp158 -S'NC_000007.13:g.140453136AC>CT automapped to NC_000007.13:g.140453136_140453137delACinsCT' -p159 -aS'RefSeqGene record not available' -p160 -asg9 -g4 -sg10 -(lp161 -sg12 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA -p162 -sg14 -S'BRAF' -p163 -sg16 -(dp164 -g18 -S'NP_001341538.1:p.(Val600Arg)' -p165 -sg20 -S'NP_001341538.1:p.(V600R)' -p166 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001354609.1:c.1798_1799delinsAG' -p167 -sg27 -g4 -sg28 -(dp168 -S'hg19' -p169 -(dp170 -g32 -S'NC_000007.13:g.140453136_140453137delinsCT' -p171 -sg34 -(dp172 -g36 -g37 -sg38 -S'AC' -p173 -sg40 -S'140453136' -p174 -sg42 -VCT -p175 -sssg44 -(dp176 -g32 -S'NC_000007.14:g.140753336_140753337delinsCT' -p177 -sg34 -(dp178 -g36 -g37 -sg38 -S'AC' -p179 -sg40 -S'140753336' -p180 -sg42 -VCT -p181 -sssS'grch37' -p182 -(dp183 -g32 -S'NC_000007.13:g.140453136_140453137delinsCT' -p184 -sg34 -(dp185 -g36 -g55 -sg38 -S'AC' -p186 -sg40 -S'140453136' -p187 -sg42 -g175 -sssS'grch38' -p188 -(dp189 -g32 -S'NC_000007.14:g.140753336_140753337delinsCT' -p190 -sg34 -(dp191 -g36 -g55 -sg38 -S'AC' -p192 -sg40 -S'140753336' -p193 -sg42 -g181 -ssssg64 -(dp194 -g110 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1' -p195 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1' -p196 -sssS'metadata' -p197 -(dp198 -S'variantvalidator_hgvs_version' -p199 -S'1.1.3' -p200 -sS'uta_schema' -p201 -S'uta_20180821' -p202 -sS'seqrepo_db' -p203 -S'2018-08-21' -p204 -sS'variantvalidator_version' -p205 -S'v0.2' -p206 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant302.txt b/VariantValidator/testing/testOutputsMasterITS/variant302.txt deleted file mode 100644 index 9977ca2c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant302.txt +++ /dev/null @@ -1,518 +0,0 @@ -(dp0 -S'NM_001354609.1:c.1799T>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA -p12 -sS'gene_symbol' -p13 -S'BRAF' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001341538.1:p.(Val600Glu)' -p18 -sS'slr' -p19 -S'NP_001341538.1:p.(V600E)' -p20 -ssS'submitted_variant' -p21 -S'7-140453136-A-T' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001354609.1:c.1799T>A' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000007.13:g.140453136A>T' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr7' -p37 -sS'ref' -p38 -VA -p39 -sS'pos' -p40 -S'140453136' -p41 -sS'alt' -p42 -VT -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000007.14:g.140753336A>T' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'140753336' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000007.13:g.140453136A>T' -p51 -sg34 -(dp52 -g36 -S'7' -p53 -sg38 -g39 -sg40 -S'140453136' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000007.14:g.140753336A>T' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'140753336' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1' -p65 -sssS'NR_148928.1:n.2897T>A' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'RefSeqGene record not available' -p69 -asg8 -g4 -sg9 -(lp70 -sg11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA -p71 -sg13 -S'BRAF' -p72 -sg15 -(dp73 -g17 -S'Non-coding :n.' -p74 -sg19 -g74 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NR_148928.1:n.2897T>A' -p75 -sg27 -g4 -sg28 -(dp76 -S'hg19' -p77 -(dp78 -g32 -S'NC_000007.13:g.140453136A>T' -p79 -sg34 -(dp80 -g36 -g37 -sg38 -g39 -sg40 -S'140453136' -p81 -sg42 -g43 -sssg44 -(dp82 -g32 -S'NC_000007.14:g.140753336A>T' -p83 -sg34 -(dp84 -g36 -g37 -sg38 -g39 -sg40 -S'140753336' -p85 -sg42 -g43 -sssS'grch37' -p86 -(dp87 -g32 -S'NC_000007.13:g.140453136A>T' -p88 -sg34 -(dp89 -g36 -g53 -sg38 -g39 -sg40 -S'140453136' -p90 -sg42 -g43 -sssS'grch38' -p91 -(dp92 -g32 -S'NC_000007.14:g.140753336A>T' -p93 -sg34 -(dp94 -g36 -g53 -sg38 -g39 -sg40 -S'140753336' -p95 -sg42 -g43 -ssssg60 -(dp96 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1' -p97 -sssS'NM_004333.5:c.1799T>A' -p98 -(dp99 -g3 -g4 -sg5 -(lp100 -S'RefSeqGene record not available' -p101 -asg8 -g4 -sg9 -(lp102 -sg11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA -p103 -sg13 -S'BRAF' -p104 -sg15 -(dp105 -g17 -S'NP_004324.2:p.(Val600Glu)' -p106 -sg19 -S'NP_004324.2:p.(V600E)' -p107 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004333.5:c.1799T>A' -p108 -sg27 -g4 -sg28 -(dp109 -S'hg19' -p110 -(dp111 -g32 -S'NC_000007.13:g.140453136A>T' -p112 -sg34 -(dp113 -g36 -g37 -sg38 -g39 -sg40 -S'140453136' -p114 -sg42 -g43 -sssg44 -(dp115 -g32 -S'NC_000007.14:g.140753336A>T' -p116 -sg34 -(dp117 -g36 -g37 -sg38 -g39 -sg40 -S'140753336' -p118 -sg42 -g43 -sssS'grch37' -p119 -(dp120 -g32 -S'NC_000007.13:g.140453136A>T' -p121 -sg34 -(dp122 -g36 -g53 -sg38 -g39 -sg40 -S'140453136' -p123 -sg42 -g43 -sssS'grch38' -p124 -(dp125 -g32 -S'NC_000007.14:g.140753336A>T' -p126 -sg34 -(dp127 -g36 -g53 -sg38 -g39 -sg40 -S'140753336' -p128 -sg42 -g43 -ssssg60 -(dp129 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' -p130 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5' -p131 -sssS'flag' -p132 -S'gene_variant' -p133 -sS'NM_004333.4:c.1799T>A' -p134 -(dp135 -g3 -g4 -sg5 -(lp136 -S'A more recent version of the selected reference sequence NM_004333.4 is available (NM_004333.5)' -p137 -aS'NM_004333.5:c.1799T>A MUST be fully validated prior to use in reports' -p138 -aS'select_variants=NM_004333.5:c.1799T>A' -p139 -aS'RefSeqGene record not available' -p140 -asg8 -g4 -sg9 -(lp141 -sg11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA -p142 -sg13 -S'BRAF' -p143 -sg15 -(dp144 -g17 -S'NP_004324.2:p.(Val600Glu)' -p145 -sg19 -S'NP_004324.2:p.(V600E)' -p146 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_004333.4:c.1799T>A' -p147 -sg27 -g4 -sg28 -(dp148 -S'hg19' -p149 -(dp150 -g32 -S'NC_000007.13:g.140453136A>T' -p151 -sg34 -(dp152 -g36 -g37 -sg38 -g39 -sg40 -S'140453136' -p153 -sg42 -g43 -sssg44 -(dp154 -g32 -S'NC_000007.14:g.140753336A>T' -p155 -sg34 -(dp156 -g36 -g37 -sg38 -g39 -sg40 -S'140753336' -p157 -sg42 -g43 -sssS'grch37' -p158 -(dp159 -g32 -S'NC_000007.13:g.140453136A>T' -p160 -sg34 -(dp161 -g36 -g53 -sg38 -g39 -sg40 -S'140453136' -p162 -sg42 -g43 -sssS'grch38' -p163 -(dp164 -g32 -S'NC_000007.14:g.140753336A>T' -p165 -sg34 -(dp166 -g36 -g53 -sg38 -g39 -sg40 -S'140753336' -p167 -sg42 -g43 -ssssg60 -(dp168 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' -p169 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4' -p170 -sssS'metadata' -p171 -(dp172 -S'variantvalidator_hgvs_version' -p173 -S'1.1.3' -p174 -sS'uta_schema' -p175 -S'uta_20180821' -p176 -sS'seqrepo_db' -p177 -S'2018-08-21' -p178 -sS'variantvalidator_version' -p179 -S'v0.2' -p180 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant303.txt b/VariantValidator/testing/testOutputsMasterITS/variant303.txt deleted file mode 100644 index f7a06ef7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant303.txt +++ /dev/null @@ -1,518 +0,0 @@ -(dp0 -S'NR_148928.1:n.2896G>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA -p12 -sS'gene_symbol' -p13 -S'BRAF' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'Non-coding :n.' -p18 -sS'slr' -p19 -g18 -ssS'submitted_variant' -p20 -S'7-140453137-C-T' -p21 -sS'genome_context_intronic_sequence' -p22 -g4 -sS'hgvs_lrg_variant' -p23 -g4 -sS'hgvs_transcript_variant' -p24 -S'NR_148928.1:n.2896G>A' -p25 -sS'hgvs_refseqgene_variant' -p26 -g4 -sS'primary_assembly_loci' -p27 -(dp28 -S'hg19' -p29 -(dp30 -S'hgvs_genomic_description' -p31 -S'NC_000007.13:g.140453137C>T' -p32 -sS'vcf' -p33 -(dp34 -S'chr' -p35 -S'chr7' -p36 -sS'ref' -p37 -VC -p38 -sS'pos' -p39 -S'140453137' -p40 -sS'alt' -p41 -VT -p42 -sssS'hg38' -p43 -(dp44 -g31 -S'NC_000007.14:g.140753337C>T' -p45 -sg33 -(dp46 -g35 -g36 -sg37 -g38 -sg39 -S'140753337' -p47 -sg41 -g42 -sssS'grch37' -p48 -(dp49 -g31 -S'NC_000007.13:g.140453137C>T' -p50 -sg33 -(dp51 -g35 -S'7' -p52 -sg37 -g38 -sg39 -S'140453137' -p53 -sg41 -g42 -sssS'grch38' -p54 -(dp55 -g31 -S'NC_000007.14:g.140753337C>T' -p56 -sg33 -(dp57 -g35 -g52 -sg37 -g38 -sg39 -S'140753337' -p58 -sg41 -g42 -ssssS'reference_sequence_records' -p59 -(dp60 -S'transcript' -p61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1' -p62 -sssS'NM_004333.5:c.1798G>A' -p63 -(dp64 -g3 -g4 -sg5 -(lp65 -S'RefSeqGene record not available' -p66 -asg8 -g4 -sg9 -(lp67 -sg11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA -p68 -sg13 -S'BRAF' -p69 -sg15 -(dp70 -g17 -S'NP_004324.2:p.(Val600Met)' -p71 -sg19 -S'NP_004324.2:p.(V600M)' -p72 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_004333.5:c.1798G>A' -p73 -sg26 -g4 -sg27 -(dp74 -S'hg19' -p75 -(dp76 -g31 -S'NC_000007.13:g.140453137C>T' -p77 -sg33 -(dp78 -g35 -g36 -sg37 -g38 -sg39 -S'140453137' -p79 -sg41 -g42 -sssg43 -(dp80 -g31 -S'NC_000007.14:g.140753337C>T' -p81 -sg33 -(dp82 -g35 -g36 -sg37 -g38 -sg39 -S'140753337' -p83 -sg41 -g42 -sssS'grch37' -p84 -(dp85 -g31 -S'NC_000007.13:g.140453137C>T' -p86 -sg33 -(dp87 -g35 -g52 -sg37 -g38 -sg39 -S'140453137' -p88 -sg41 -g42 -sssS'grch38' -p89 -(dp90 -g31 -S'NC_000007.14:g.140753337C>T' -p91 -sg33 -(dp92 -g35 -g52 -sg37 -g38 -sg39 -S'140753337' -p93 -sg41 -g42 -ssssg59 -(dp94 -S'protein' -p95 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' -p96 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5' -p97 -sssS'NM_004333.4:c.1798G>A' -p98 -(dp99 -g3 -g4 -sg5 -(lp100 -S'A more recent version of the selected reference sequence NM_004333.4 is available (NM_004333.5)' -p101 -aS'NM_004333.5:c.1798G>A MUST be fully validated prior to use in reports' -p102 -aS'select_variants=NM_004333.5:c.1798G>A' -p103 -aS'RefSeqGene record not available' -p104 -asg8 -g4 -sg9 -(lp105 -sg11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA -p106 -sg13 -S'BRAF' -p107 -sg15 -(dp108 -g17 -S'NP_004324.2:p.(Val600Met)' -p109 -sg19 -S'NP_004324.2:p.(V600M)' -p110 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_004333.4:c.1798G>A' -p111 -sg26 -g4 -sg27 -(dp112 -S'hg19' -p113 -(dp114 -g31 -S'NC_000007.13:g.140453137C>T' -p115 -sg33 -(dp116 -g35 -g36 -sg37 -g38 -sg39 -S'140453137' -p117 -sg41 -g42 -sssg43 -(dp118 -g31 -S'NC_000007.14:g.140753337C>T' -p119 -sg33 -(dp120 -g35 -g36 -sg37 -g38 -sg39 -S'140753337' -p121 -sg41 -g42 -sssS'grch37' -p122 -(dp123 -g31 -S'NC_000007.13:g.140453137C>T' -p124 -sg33 -(dp125 -g35 -g52 -sg37 -g38 -sg39 -S'140453137' -p126 -sg41 -g42 -sssS'grch38' -p127 -(dp128 -g31 -S'NC_000007.14:g.140753337C>T' -p129 -sg33 -(dp130 -g35 -g52 -sg37 -g38 -sg39 -S'140753337' -p131 -sg41 -g42 -ssssg59 -(dp132 -g95 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2' -p133 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4' -p134 -sssS'NM_001354609.1:c.1798G>A' -p135 -(dp136 -g3 -g4 -sg5 -(lp137 -S'RefSeqGene record not available' -p138 -asg8 -g4 -sg9 -(lp139 -sg11 -VHomo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA -p140 -sg13 -S'BRAF' -p141 -sg15 -(dp142 -g17 -S'NP_001341538.1:p.(Val600Met)' -p143 -sg19 -S'NP_001341538.1:p.(V600M)' -p144 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_001354609.1:c.1798G>A' -p145 -sg26 -g4 -sg27 -(dp146 -S'hg19' -p147 -(dp148 -g31 -S'NC_000007.13:g.140453137C>T' -p149 -sg33 -(dp150 -g35 -g36 -sg37 -g38 -sg39 -S'140453137' -p151 -sg41 -g42 -sssg43 -(dp152 -g31 -S'NC_000007.14:g.140753337C>T' -p153 -sg33 -(dp154 -g35 -g36 -sg37 -g38 -sg39 -S'140753337' -p155 -sg41 -g42 -sssS'grch37' -p156 -(dp157 -g31 -S'NC_000007.13:g.140453137C>T' -p158 -sg33 -(dp159 -g35 -g52 -sg37 -g38 -sg39 -S'140453137' -p160 -sg41 -g42 -sssS'grch38' -p161 -(dp162 -g31 -S'NC_000007.14:g.140753337C>T' -p163 -sg33 -(dp164 -g35 -g52 -sg37 -g38 -sg39 -S'140753337' -p165 -sg41 -g42 -ssssg59 -(dp166 -g95 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1' -p167 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1' -p168 -sssS'flag' -p169 -S'gene_variant' -p170 -sS'metadata' -p171 -(dp172 -S'variantvalidator_hgvs_version' -p173 -S'1.1.3' -p174 -sS'uta_schema' -p175 -S'uta_20180821' -p176 -sS'seqrepo_db' -p177 -S'2018-08-21' -p178 -sS'variantvalidator_version' -p179 -S'v0.2' -p180 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant304.txt b/VariantValidator/testing/testOutputsMasterITS/variant304.txt deleted file mode 100644 index 150eff4f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant304.txt +++ /dev/null @@ -1,284 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000083.2:c.180+3A>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'CLCN1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000074.2:p.?' -p20 -sS'slr' -p21 -S'NP_000074.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'7-143013488-A-T' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000007.13(NM_000083.2):c.180+3A>T' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000083.2:c.180+3A>T' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000007.13:g.143013488A>T' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr7' -p40 -sS'ref' -p41 -S'A' -p42 -sS'pos' -p43 -S'143013488' -p44 -sS'alt' -p45 -S'T' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000007.14:g.143316395A>T' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'143316395' -p51 -sg45 -g46 -sssS'grch37' -p52 -(dp53 -g35 -S'NC_000007.13:g.143013488A>T' -p54 -sg37 -(dp55 -g39 -S'7' -p56 -sg41 -g42 -sg43 -S'143013488' -p57 -sg45 -g46 -sssS'grch38' -p58 -(dp59 -g35 -S'NC_000007.14:g.143316395A>T' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'143316395' -p62 -sg45 -g46 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2' -p68 -sssS'NR_046453.1:n.267+3A>T' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'RefSeqGene record not available' -p72 -asg10 -g6 -sg11 -(lp73 -sg13 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA -p74 -sg15 -S'CLCN1' -p75 -sg17 -(dp76 -g19 -S'Non-coding :n.' -p77 -sg21 -g77 -ssg23 -g24 -sg25 -S'NC_000007.13(NR_046453.1):c.267+3A>T' -p78 -sg27 -g6 -sg28 -S'NR_046453.1:n.267+3A>T' -p79 -sg30 -g6 -sg31 -(dp80 -S'hg19' -p81 -(dp82 -g35 -S'NC_000007.13:g.143013488A>T' -p83 -sg37 -(dp84 -g39 -g40 -sg41 -g42 -sg43 -S'143013488' -p85 -sg45 -g46 -sssg47 -(dp86 -g35 -S'NC_000007.14:g.143316395A>T' -p87 -sg37 -(dp88 -g39 -g40 -sg41 -g42 -sg43 -S'143316395' -p89 -sg45 -g46 -sssS'grch37' -p90 -(dp91 -g35 -S'NC_000007.13:g.143013488A>T' -p92 -sg37 -(dp93 -g39 -g56 -sg41 -g42 -sg43 -S'143013488' -p94 -sg45 -g46 -sssS'grch38' -p95 -(dp96 -g35 -S'NC_000007.14:g.143316395A>T' -p97 -sg37 -(dp98 -g39 -g56 -sg41 -g42 -sg43 -S'143316395' -p99 -sg45 -g46 -ssssg63 -(dp100 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1' -p101 -sssS'metadata' -p102 -(dp103 -S'variantvalidator_hgvs_version' -p104 -S'1.1.3' -p105 -sS'uta_schema' -p106 -S'uta_20180821' -p107 -sS'seqrepo_db' -p108 -S'2018-08-21' -p109 -sS'variantvalidator_version' -p110 -S'v0.2' -p111 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant305.txt b/VariantValidator/testing/testOutputsMasterITS/variant305.txt deleted file mode 100644 index cec1f10a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant305.txt +++ /dev/null @@ -1,282 +0,0 @@ -(dp0 -S'NR_046453.1:n.776G>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA -p12 -sS'gene_symbol' -p13 -S'CLCN1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'Non-coding :n.' -p18 -sS'slr' -p19 -g18 -ssS'submitted_variant' -p20 -S'7-143018934-G-A' -p21 -sS'genome_context_intronic_sequence' -p22 -g4 -sS'hgvs_lrg_variant' -p23 -g4 -sS'hgvs_transcript_variant' -p24 -S'NR_046453.1:n.776G>A' -p25 -sS'hgvs_refseqgene_variant' -p26 -g4 -sS'primary_assembly_loci' -p27 -(dp28 -S'hg19' -p29 -(dp30 -S'hgvs_genomic_description' -p31 -S'NC_000007.13:g.143018934G>A' -p32 -sS'vcf' -p33 -(dp34 -S'chr' -p35 -S'chr7' -p36 -sS'ref' -p37 -S'G' -p38 -sS'pos' -p39 -S'143018934' -p40 -sS'alt' -p41 -S'A' -p42 -sssS'hg38' -p43 -(dp44 -g31 -S'NC_000007.14:g.143321841G>A' -p45 -sg33 -(dp46 -g35 -g36 -sg37 -g38 -sg39 -S'143321841' -p47 -sg41 -g42 -sssS'grch37' -p48 -(dp49 -g31 -S'NC_000007.13:g.143018934G>A' -p50 -sg33 -(dp51 -g35 -S'7' -p52 -sg37 -g38 -sg39 -S'143018934' -p53 -sg41 -g42 -sssS'grch38' -p54 -(dp55 -g31 -S'NC_000007.14:g.143321841G>A' -p56 -sg33 -(dp57 -g35 -g52 -sg37 -g38 -sg39 -S'143321841' -p58 -sg41 -g42 -ssssS'reference_sequence_records' -p59 -(dp60 -S'transcript' -p61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1' -p62 -sssS'flag' -p63 -S'gene_variant' -p64 -sS'NM_000083.2:c.689G>A' -p65 -(dp66 -g3 -g4 -sg5 -(lp67 -S'RefSeqGene record not available' -p68 -asg8 -g4 -sg9 -(lp69 -sg11 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA -p70 -sg13 -S'CLCN1' -p71 -sg15 -(dp72 -g17 -S'NP_000074.2:p.(Gly230Glu)' -p73 -sg19 -S'NP_000074.2:p.(G230E)' -p74 -ssg20 -g21 -sg22 -g4 -sg23 -g4 -sg24 -S'NM_000083.2:c.689G>A' -p75 -sg26 -g4 -sg27 -(dp76 -S'hg19' -p77 -(dp78 -g31 -S'NC_000007.13:g.143018934G>A' -p79 -sg33 -(dp80 -g35 -g36 -sg37 -g38 -sg39 -S'143018934' -p81 -sg41 -g42 -sssg43 -(dp82 -g31 -S'NC_000007.14:g.143321841G>A' -p83 -sg33 -(dp84 -g35 -g36 -sg37 -g38 -sg39 -S'143321841' -p85 -sg41 -g42 -sssS'grch37' -p86 -(dp87 -g31 -S'NC_000007.13:g.143018934G>A' -p88 -sg33 -(dp89 -g35 -g52 -sg37 -g38 -sg39 -S'143018934' -p90 -sg41 -g42 -sssS'grch38' -p91 -(dp92 -g31 -S'NC_000007.14:g.143321841G>A' -p93 -sg33 -(dp94 -g35 -g52 -sg37 -g38 -sg39 -S'143321841' -p95 -sg41 -g42 -ssssg59 -(dp96 -S'protein' -p97 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2' -p98 -sg61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2' -p99 -sssS'metadata' -p100 -(dp101 -S'variantvalidator_hgvs_version' -p102 -S'1.1.3' -p103 -sS'uta_schema' -p104 -S'uta_20180821' -p105 -sS'seqrepo_db' -p106 -S'2018-08-21' -p107 -sS'variantvalidator_version' -p108 -S'v0.2' -p109 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant306.txt b/VariantValidator/testing/testOutputsMasterITS/variant306.txt deleted file mode 100644 index 0e383f0a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant306.txt +++ /dev/null @@ -1,282 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NR_046453.1:n.2620C>T' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA -p24 -sS'gene_symbol' -p25 -S'CLCN1' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'Non-coding :n.' -p30 -sS'slr' -p31 -g30 -ssS'submitted_variant' -p32 -S'7-143048771-C-T' -p33 -sS'genome_context_intronic_sequence' -p34 -g16 -sS'hgvs_lrg_variant' -p35 -g16 -sS'hgvs_transcript_variant' -p36 -S'NR_046453.1:n.2620C>T' -p37 -sS'hgvs_refseqgene_variant' -p38 -g16 -sS'primary_assembly_loci' -p39 -(dp40 -S'hg19' -p41 -(dp42 -S'hgvs_genomic_description' -p43 -S'NC_000007.13:g.143048771C>T' -p44 -sS'vcf' -p45 -(dp46 -S'chr' -p47 -S'chr7' -p48 -sS'ref' -p49 -S'C' -p50 -sS'pos' -p51 -S'143048771' -p52 -sS'alt' -p53 -S'T' -p54 -sssS'hg38' -p55 -(dp56 -g43 -S'NC_000007.14:g.143351678C>T' -p57 -sg45 -(dp58 -g47 -g48 -sg49 -g50 -sg51 -S'143351678' -p59 -sg53 -g54 -sssS'grch37' -p60 -(dp61 -g43 -S'NC_000007.13:g.143048771C>T' -p62 -sg45 -(dp63 -g47 -S'7' -p64 -sg49 -g50 -sg51 -S'143048771' -p65 -sg53 -g54 -sssS'grch38' -p66 -(dp67 -g43 -S'NC_000007.14:g.143351678C>T' -p68 -sg45 -(dp69 -g47 -g64 -sg49 -g50 -sg51 -S'143351678' -p70 -sg53 -g54 -ssssS'reference_sequence_records' -p71 -(dp72 -S'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1' -p74 -sssS'NM_000083.2:c.2680C>T' -p75 -(dp76 -g15 -g16 -sg17 -(lp77 -S'RefSeqGene record not available' -p78 -asg20 -g16 -sg21 -(lp79 -sg23 -VHomo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA -p80 -sg25 -S'CLCN1' -p81 -sg27 -(dp82 -g29 -S'NP_000074.2:p.(Arg894Ter)' -p83 -sg31 -S'NP_000074.2:p.(R894*)' -p84 -ssg32 -g33 -sg34 -g16 -sg35 -g16 -sg36 -S'NM_000083.2:c.2680C>T' -p85 -sg38 -g16 -sg39 -(dp86 -S'hg19' -p87 -(dp88 -g43 -S'NC_000007.13:g.143048771C>T' -p89 -sg45 -(dp90 -g47 -g48 -sg49 -g50 -sg51 -S'143048771' -p91 -sg53 -g54 -sssg55 -(dp92 -g43 -S'NC_000007.14:g.143351678C>T' -p93 -sg45 -(dp94 -g47 -g48 -sg49 -g50 -sg51 -S'143351678' -p95 -sg53 -g54 -sssS'grch37' -p96 -(dp97 -g43 -S'NC_000007.13:g.143048771C>T' -p98 -sg45 -(dp99 -g47 -g64 -sg49 -g50 -sg51 -S'143048771' -p100 -sg53 -g54 -sssS'grch38' -p101 -(dp102 -g43 -S'NC_000007.14:g.143351678C>T' -p103 -sg45 -(dp104 -g47 -g64 -sg49 -g50 -sg51 -S'143351678' -p105 -sg53 -g54 -ssssg71 -(dp106 -S'protein' -p107 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2' -p108 -sg73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2' -p109 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant307.txt b/VariantValidator/testing/testOutputsMasterITS/variant307.txt deleted file mode 100644 index 219b49d9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant307.txt +++ /dev/null @@ -1,596 +0,0 @@ -(dp0 -S'NM_014629.3:c.2399C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -(dp11 -S'grch38' -p12 -(dp13 -S'hgvs_genomic_description' -p14 -S'NT_187576.1:g.107161C>T' -p15 -sS'vcf' -p16 -(dp17 -S'chr' -p18 -S'HSCHR8_8_CTG1' -p19 -sS'ref' -p20 -S'C' -p21 -sS'pos' -p22 -S'107161' -p23 -sS'alt' -p24 -S'T' -p25 -sssa(dp26 -S'hg38' -p27 -(dp28 -g14 -S'NT_187576.1:g.107161C>T' -p29 -sg16 -(dp30 -g18 -S'chr8_KI270821v1_alt' -p31 -sg20 -g21 -sg22 -S'107161' -p32 -sg24 -g25 -sssasS'transcript_description' -p33 -VHomo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 1, mRNA -p34 -sS'gene_symbol' -p35 -S'ARHGEF10' -p36 -sS'hgvs_predicted_protein_consequence' -p37 -(dp38 -S'tlr' -p39 -S'NP_055444.2:p.(Pro800Leu)' -p40 -sS'slr' -p41 -S'NP_055444.2:p.(P800L)' -p42 -ssS'submitted_variant' -p43 -S'8-1871951-C-T' -p44 -sS'genome_context_intronic_sequence' -p45 -g4 -sS'hgvs_lrg_variant' -p46 -g4 -sS'hgvs_transcript_variant' -p47 -S'NM_014629.3:c.2399C>T' -p48 -sS'hgvs_refseqgene_variant' -p49 -g4 -sS'primary_assembly_loci' -p50 -(dp51 -S'hg19' -p52 -(dp53 -g14 -S'NC_000008.10:g.1871951C>T' -p54 -sg16 -(dp55 -g18 -S'chr8' -p56 -sg20 -g21 -sg22 -S'1871951' -p57 -sg24 -g25 -sssg27 -(dp58 -g14 -S'NC_000008.11:g.1923785C>T' -p59 -sg16 -(dp60 -g18 -g56 -sg20 -g21 -sg22 -S'1923785' -p61 -sg24 -g25 -sssS'grch37' -p62 -(dp63 -g14 -S'NC_000008.10:g.1871951C>T' -p64 -sg16 -(dp65 -g18 -S'8' -p66 -sg20 -g21 -sg22 -S'1871951' -p67 -sg24 -g25 -sssS'grch38' -p68 -(dp69 -g14 -S'NC_000008.11:g.1923785C>T' -p70 -sg16 -(dp71 -g18 -g66 -sg20 -g21 -sg22 -S'1923785' -p72 -sg24 -g25 -ssssS'reference_sequence_records' -p73 -(dp74 -S'protein' -p75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2' -p76 -sS'transcript' -p77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.3' -p78 -sssS'NM_014629.2:c.2399C>T' -p79 -(dp80 -g3 -g4 -sg5 -(lp81 -S'A more recent version of the selected reference sequence NM_014629.2 is available (NM_014629.3)' -p82 -aS'NM_014629.3:c.2399C>T MUST be fully validated prior to use in reports' -p83 -aS'select_variants=NM_014629.3:c.2399C>T' -p84 -aS'RefSeqGene record not available' -p85 -asg8 -g4 -sg9 -(lp86 -sg33 -VHomo sapiens Rho guanine nucleotide exchange factor (GEF) 10 (ARHGEF10), mRNA -p87 -sg35 -S'ARHGEF10' -p88 -sg37 -(dp89 -g39 -S'NP_055444.2:p.(Pro800Leu)' -p90 -sg41 -S'NP_055444.2:p.(P800L)' -p91 -ssg43 -g44 -sg45 -g4 -sg46 -g4 -sg47 -S'NM_014629.2:c.2399C>T' -p92 -sg49 -g4 -sg50 -(dp93 -S'hg19' -p94 -(dp95 -g14 -S'NC_000008.10:g.1871951C>T' -p96 -sg16 -(dp97 -g18 -g56 -sg20 -g21 -sg22 -S'1871951' -p98 -sg24 -g25 -sssS'grch37' -p99 -(dp100 -g14 -S'NC_000008.10:g.1871951C>T' -p101 -sg16 -(dp102 -g18 -g66 -sg20 -g21 -sg22 -S'1871951' -p103 -sg24 -g25 -ssssg73 -(dp104 -g75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2' -p105 -sg77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.2' -p106 -sssS'NM_001308153.1:c.2471C>T' -p107 -(dp108 -g3 -g4 -sg5 -(lp109 -S'RefSeqGene record not available' -p110 -asg8 -g4 -sg9 -(lp111 -(dp112 -S'grch38' -p113 -(dp114 -g14 -S'NT_187576.1:g.107161C>T' -p115 -sg16 -(dp116 -g18 -g19 -sg20 -g21 -sg22 -S'107161' -p117 -sg24 -g25 -sssa(dp118 -g27 -(dp119 -g14 -S'NT_187576.1:g.107161C>T' -p120 -sg16 -(dp121 -g18 -g31 -sg20 -g21 -sg22 -S'107161' -p122 -sg24 -g25 -sssasg33 -VHomo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 3, mRNA -p123 -sg35 -S'ARHGEF10' -p124 -sg37 -(dp125 -g39 -S'NP_001295082.1:p.(Pro824Leu)' -p126 -sg41 -S'NP_001295082.1:p.(P824L)' -p127 -ssg43 -g44 -sg45 -g4 -sg46 -g4 -sg47 -S'NM_001308153.1:c.2471C>T' -p128 -sg49 -g4 -sg50 -(dp129 -S'hg19' -p130 -(dp131 -g14 -S'NC_000008.10:g.1871951C>T' -p132 -sg16 -(dp133 -g18 -g56 -sg20 -g21 -sg22 -S'1871951' -p134 -sg24 -g25 -sssg27 -(dp135 -g14 -S'NC_000008.11:g.1923785C>T' -p136 -sg16 -(dp137 -g18 -g56 -sg20 -g21 -sg22 -S'1923785' -p138 -sg24 -g25 -sssS'grch37' -p139 -(dp140 -g14 -S'NC_000008.10:g.1871951C>T' -p141 -sg16 -(dp142 -g18 -g66 -sg20 -g21 -sg22 -S'1871951' -p143 -sg24 -g25 -sssS'grch38' -p144 -(dp145 -g14 -S'NC_000008.11:g.1923785C>T' -p146 -sg16 -(dp147 -g18 -g66 -sg20 -g21 -sg22 -S'1923785' -p148 -sg24 -g25 -ssssg73 -(dp149 -g75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295082.1' -p150 -sg77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308153.1' -p151 -sssS'flag' -p152 -S'gene_variant' -p153 -sS'NM_001308152.1:c.2285C>T' -p154 -(dp155 -g3 -g4 -sg5 -(lp156 -S'RefSeqGene record not available' -p157 -asg8 -g4 -sg9 -(lp158 -(dp159 -S'grch38' -p160 -(dp161 -g14 -S'NT_187576.1:g.107161C>T' -p162 -sg16 -(dp163 -g18 -g19 -sg20 -g21 -sg22 -S'107161' -p164 -sg24 -g25 -sssa(dp165 -g27 -(dp166 -g14 -S'NT_187576.1:g.107161C>T' -p167 -sg16 -(dp168 -g18 -g31 -sg20 -g21 -sg22 -S'107161' -p169 -sg24 -g25 -sssasg33 -VHomo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 2, mRNA -p170 -sg35 -S'ARHGEF10' -p171 -sg37 -(dp172 -g39 -S'NP_001295081.1:p.(Pro762Leu)' -p173 -sg41 -S'NP_001295081.1:p.(P762L)' -p174 -ssg43 -g44 -sg45 -g4 -sg46 -g4 -sg47 -S'NM_001308152.1:c.2285C>T' -p175 -sg49 -g4 -sg50 -(dp176 -S'hg19' -p177 -(dp178 -g14 -S'NC_000008.10:g.1871951C>T' -p179 -sg16 -(dp180 -g18 -g56 -sg20 -g21 -sg22 -S'1871951' -p181 -sg24 -g25 -sssg27 -(dp182 -g14 -S'NC_000008.11:g.1923785C>T' -p183 -sg16 -(dp184 -g18 -g56 -sg20 -g21 -sg22 -S'1923785' -p185 -sg24 -g25 -sssS'grch37' -p186 -(dp187 -g14 -S'NC_000008.10:g.1871951C>T' -p188 -sg16 -(dp189 -g18 -g66 -sg20 -g21 -sg22 -S'1871951' -p190 -sg24 -g25 -sssS'grch38' -p191 -(dp192 -g14 -S'NC_000008.11:g.1923785C>T' -p193 -sg16 -(dp194 -g18 -g66 -sg20 -g21 -sg22 -S'1923785' -p195 -sg24 -g25 -ssssg73 -(dp196 -g75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295081.1' -p197 -sg77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308152.1' -p198 -sssS'metadata' -p199 -(dp200 -S'variantvalidator_hgvs_version' -p201 -S'1.1.3' -p202 -sS'uta_schema' -p203 -S'uta_20180821' -p204 -sS'seqrepo_db' -p205 -S'2018-08-21' -p206 -sS'variantvalidator_version' -p207 -S'v0.2' -p208 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant308.txt b/VariantValidator/testing/testOutputsMasterITS/variant308.txt deleted file mode 100644 index 2dcae498..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant308.txt +++ /dev/null @@ -1,539 +0,0 @@ -(dp0 -S'NM_001261407.1:c.5504dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 3, mRNA -p13 -sS'gene_symbol' -p14 -S'MPDZ' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001248336.1:p.(Thr1836AsnfsTer15)' -p19 -sS'slr' -p20 -S'NP_001248336.1:p.(T1836Nfs*15)' -p21 -ssS'submitted_variant' -p22 -S'9-13112056-T-TG' -p23 -sS'genome_context_intronic_sequence' -p24 -g4 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_001261407.1:c.5504dup' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000009.11:g.13112057dup' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr9' -p38 -sS'ref' -p39 -S'G' -p40 -sS'pos' -p41 -S'13112057' -p42 -sS'alt' -p43 -VGG -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000009.12:g.13112058dup' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'13112058' -p49 -sg43 -VGG -p50 -sssS'grch37' -p51 -(dp52 -g33 -S'NC_000009.11:g.13112057dup' -p53 -sg35 -(dp54 -g37 -S'9' -p55 -sg39 -g40 -sg41 -S'13112057' -p56 -sg43 -VGG -p57 -sssS'grch38' -p58 -(dp59 -g33 -S'NC_000009.12:g.13112058dup' -p60 -sg35 -(dp61 -g37 -g55 -sg39 -g40 -sg41 -S'13112058' -p62 -sg43 -VGG -p63 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248336.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261407.1' -p69 -sssS'NM_001330637.1:c.5690dup' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' -p73 -aS'RefSeqGene record not available' -p74 -asg9 -g4 -sg10 -(lp75 -sg12 -VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 4, mRNA -p76 -sg14 -S'MPDZ' -p77 -sg16 -(dp78 -g18 -S'NP_001317566.1:p.(Thr1898AsnfsTer15)' -p79 -sg20 -S'NP_001317566.1:p.(T1898Nfs*15)' -p80 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001330637.1:c.5690dup' -p81 -sg28 -g4 -sg29 -(dp82 -S'hg19' -p83 -(dp84 -g33 -S'NC_000009.11:g.13112057dup' -p85 -sg35 -(dp86 -g37 -g38 -sg39 -g40 -sg41 -S'13112057' -p87 -sg43 -VGG -p88 -sssg45 -(dp89 -g33 -S'NC_000009.12:g.13112058dup' -p90 -sg35 -(dp91 -g37 -g38 -sg39 -g40 -sg41 -S'13112058' -p92 -sg43 -VGG -p93 -sssS'grch37' -p94 -(dp95 -g33 -S'NC_000009.11:g.13112057dup' -p96 -sg35 -(dp97 -g37 -g55 -sg39 -g40 -sg41 -S'13112057' -p98 -sg43 -VGG -p99 -sssS'grch38' -p100 -(dp101 -g33 -S'NC_000009.12:g.13112058dup' -p102 -sg35 -(dp103 -g37 -g55 -sg39 -g40 -sg41 -S'13112058' -p104 -sg43 -VGG -p105 -ssssg64 -(dp106 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317566.1' -p107 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330637.1' -p108 -sssS'NM_001261406.1:c.5591dup' -p109 -(dp110 -g3 -g4 -sg5 -(lp111 -S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' -p112 -aS'RefSeqGene record not available' -p113 -asg9 -g4 -sg10 -(lp114 -sg12 -VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 2, mRNA -p115 -sg14 -S'MPDZ' -p116 -sg16 -(dp117 -g18 -S'NP_001248335.1:p.(Thr1865AsnfsTer15)' -p118 -sg20 -S'NP_001248335.1:p.(T1865Nfs*15)' -p119 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_001261406.1:c.5591dup' -p120 -sg28 -g4 -sg29 -(dp121 -S'hg19' -p122 -(dp123 -g33 -S'NC_000009.11:g.13112057dup' -p124 -sg35 -(dp125 -g37 -g38 -sg39 -g40 -sg41 -S'13112057' -p126 -sg43 -VGG -p127 -sssg45 -(dp128 -g33 -S'NC_000009.12:g.13112058dup' -p129 -sg35 -(dp130 -g37 -g38 -sg39 -g40 -sg41 -S'13112058' -p131 -sg43 -VGG -p132 -sssS'grch37' -p133 -(dp134 -g33 -S'NC_000009.11:g.13112057dup' -p135 -sg35 -(dp136 -g37 -g55 -sg39 -g40 -sg41 -S'13112057' -p137 -sg43 -VGG -p138 -sssS'grch38' -p139 -(dp140 -g33 -S'NC_000009.12:g.13112058dup' -p141 -sg35 -(dp142 -g37 -g55 -sg39 -g40 -sg41 -S'13112058' -p143 -sg43 -VGG -p144 -ssssg64 -(dp145 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248335.1' -p146 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261406.1' -p147 -sssS'flag' -p148 -S'gene_variant' -p149 -sS'NM_003829.4:c.5603dup' -p150 -(dp151 -g3 -g4 -sg5 -(lp152 -S'NC_000009.11:g.13112056T>TG automapped to NC_000009.11:g.13112059dupG' -p153 -aS'RefSeqGene record not available' -p154 -asg9 -g4 -sg10 -(lp155 -sg12 -VHomo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 1, mRNA -p156 -sg14 -S'MPDZ' -p157 -sg16 -(dp158 -g18 -S'NP_003820.2:p.(Thr1869AsnfsTer15)' -p159 -sg20 -S'NP_003820.2:p.(T1869Nfs*15)' -p160 -ssg22 -g23 -sg24 -g4 -sg25 -g4 -sg26 -S'NM_003829.4:c.5603dup' -p161 -sg28 -g4 -sg29 -(dp162 -S'hg19' -p163 -(dp164 -g33 -S'NC_000009.11:g.13112057dup' -p165 -sg35 -(dp166 -g37 -g38 -sg39 -g40 -sg41 -S'13112057' -p167 -sg43 -VGG -p168 -sssg45 -(dp169 -g33 -S'NC_000009.12:g.13112058dup' -p170 -sg35 -(dp171 -g37 -g38 -sg39 -g40 -sg41 -S'13112058' -p172 -sg43 -VGG -p173 -sssS'grch37' -p174 -(dp175 -g33 -S'NC_000009.11:g.13112057dup' -p176 -sg35 -(dp177 -g37 -g55 -sg39 -g40 -sg41 -S'13112057' -p178 -sg43 -VGG -p179 -sssS'grch38' -p180 -(dp181 -g33 -S'NC_000009.12:g.13112058dup' -p182 -sg35 -(dp183 -g37 -g55 -sg39 -g40 -sg41 -S'13112058' -p184 -sg43 -VGG -p185 -ssssg64 -(dp186 -g66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003820.2' -p187 -sg68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003829.4' -p188 -sssS'metadata' -p189 -(dp190 -S'variantvalidator_hgvs_version' -p191 -S'1.1.3' -p192 -sS'uta_schema' -p193 -S'uta_20180821' -p194 -sS'seqrepo_db' -p195 -S'2018-08-21' -p196 -sS'variantvalidator_version' -p197 -S'v0.2' -p198 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant309.txt b/VariantValidator/testing/testOutputsMasterITS/variant309.txt deleted file mode 100644 index 9798130c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant309.txt +++ /dev/null @@ -1,603 +0,0 @@ -(dp0 -S'NM_058197.4:c.*74-1G>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 3, mRNA -p12 -sS'gene_symbol' -p13 -S'CDKN2A' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_478104.2:p.?' -p18 -sS'slr' -p19 -S'NP_478104.2:p.?' -p20 -ssS'submitted_variant' -p21 -S'9-21971208-C-A' -p22 -sS'genome_context_intronic_sequence' -p23 -S'NC_000009.11(NM_058197.4):c.*74-1G>T' -p24 -sS'hgvs_lrg_variant' -p25 -g4 -sS'hgvs_transcript_variant' -p26 -S'NM_058197.4:c.*74-1G>T' -p27 -sS'hgvs_refseqgene_variant' -p28 -g4 -sS'primary_assembly_loci' -p29 -(dp30 -S'hg19' -p31 -(dp32 -S'hgvs_genomic_description' -p33 -S'NC_000009.11:g.21971208C>A' -p34 -sS'vcf' -p35 -(dp36 -S'chr' -p37 -S'chr9' -p38 -sS'ref' -p39 -VC -p40 -sS'pos' -p41 -S'21971208' -p42 -sS'alt' -p43 -VA -p44 -sssS'hg38' -p45 -(dp46 -g33 -S'NC_000009.12:g.21971209C>A' -p47 -sg35 -(dp48 -g37 -g38 -sg39 -g40 -sg41 -S'21971209' -p49 -sg43 -g44 -sssS'grch37' -p50 -(dp51 -g33 -S'NC_000009.11:g.21971208C>A' -p52 -sg35 -(dp53 -g37 -S'9' -p54 -sg39 -g40 -sg41 -S'21971208' -p55 -sg43 -g44 -sssS'grch38' -p56 -(dp57 -g33 -S'NC_000009.12:g.21971209C>A' -p58 -sg35 -(dp59 -g37 -g54 -sg39 -g40 -sg41 -S'21971209' -p60 -sg43 -g44 -ssssS'reference_sequence_records' -p61 -(dp62 -S'protein' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_478104.2' -p64 -sS'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_058197.4' -p66 -sssS'NM_000077.4:c.151-1G>T' -p67 -(dp68 -g3 -g4 -sg5 -(lp69 -S'RefSeqGene record not available' -p70 -asg8 -g4 -sg9 -(lp71 -sg11 -VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 1, mRNA -p72 -sg13 -S'CDKN2A' -p73 -sg15 -(dp74 -g17 -S'NP_000068.1:p.?' -p75 -sg19 -S'NP_000068.1:p.?' -p76 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_000077.4):c.151-1G>T' -p77 -sg25 -g4 -sg26 -S'NM_000077.4:c.151-1G>T' -p78 -sg28 -g4 -sg29 -(dp79 -S'hg19' -p80 -(dp81 -g33 -S'NC_000009.11:g.21971208C>A' -p82 -sg35 -(dp83 -g37 -g38 -sg39 -g40 -sg41 -S'21971208' -p84 -sg43 -g44 -sssg45 -(dp85 -g33 -S'NC_000009.12:g.21971209C>A' -p86 -sg35 -(dp87 -g37 -g38 -sg39 -g40 -sg41 -S'21971209' -p88 -sg43 -g44 -sssS'grch37' -p89 -(dp90 -g33 -S'NC_000009.11:g.21971208C>A' -p91 -sg35 -(dp92 -g37 -g54 -sg39 -g40 -sg41 -S'21971208' -p93 -sg43 -g44 -sssS'grch38' -p94 -(dp95 -g33 -S'NC_000009.12:g.21971209C>A' -p96 -sg35 -(dp97 -g37 -g54 -sg39 -g40 -sg41 -S'21971209' -p98 -sg43 -g44 -ssssg61 -(dp99 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000068.1' -p100 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000077.4' -p101 -sssS'NM_001363763.1:c.-3-1G>T' -p102 -(dp103 -g3 -g4 -sg5 -(lp104 -S'RefSeqGene record not available' -p105 -asg8 -g4 -sg9 -(lp106 -sg11 -VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 6, mRNA -p107 -sg13 -S'CDKN2A' -p108 -sg15 -(dp109 -g17 -S'NP_001350692.1:p.?' -p110 -sg19 -S'NP_001350692.1:p.?' -p111 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_001363763.1):c.-3-1G>T' -p112 -sg25 -g4 -sg26 -S'NM_001363763.1:c.-3-1G>T' -p113 -sg28 -g4 -sg29 -(dp114 -S'hg19' -p115 -(dp116 -g33 -S'NC_000009.11:g.21971208C>A' -p117 -sg35 -(dp118 -g37 -g38 -sg39 -g40 -sg41 -S'21971208' -p119 -sg43 -g44 -sssS'grch37' -p120 -(dp121 -g33 -S'NC_000009.11:g.21971208C>A' -p122 -sg35 -(dp123 -g37 -g54 -sg39 -g40 -sg41 -S'21971208' -p124 -sg43 -g44 -ssssg61 -(dp125 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350692.1' -p126 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363763.1' -p127 -sssS'NM_001195132.1:c.151-1G>T' -p128 -(dp129 -g3 -g4 -sg5 -(lp130 -S'RefSeqGene record not available' -p131 -asg8 -g4 -sg9 -(lp132 -sg11 -VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 5, mRNA -p133 -sg13 -S'CDKN2A' -p134 -sg15 -(dp135 -g17 -S'NP_001182061.1:p.?' -p136 -sg19 -S'NP_001182061.1:p.?' -p137 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_001195132.1):c.151-1G>T' -p138 -sg25 -g4 -sg26 -S'NM_001195132.1:c.151-1G>T' -p139 -sg28 -g4 -sg29 -(dp140 -S'hg19' -p141 -(dp142 -g33 -S'NC_000009.11:g.21971208C>A' -p143 -sg35 -(dp144 -g37 -g38 -sg39 -g40 -sg41 -S'21971208' -p145 -sg43 -g44 -sssg45 -(dp146 -g33 -S'NC_000009.12:g.21971209C>A' -p147 -sg35 -(dp148 -g37 -g38 -sg39 -g40 -sg41 -S'21971209' -p149 -sg43 -g44 -sssS'grch37' -p150 -(dp151 -g33 -S'NC_000009.11:g.21971208C>A' -p152 -sg35 -(dp153 -g37 -g54 -sg39 -g40 -sg41 -S'21971208' -p154 -sg43 -g44 -sssS'grch38' -p155 -(dp156 -g33 -S'NC_000009.12:g.21971209C>A' -p157 -sg35 -(dp158 -g37 -g54 -sg39 -g40 -sg41 -S'21971209' -p159 -sg43 -g44 -ssssg61 -(dp160 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001182061.1' -p161 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001195132.1' -p162 -sssS'NM_058195.3:c.194-1G>T' -p163 -(dp164 -g3 -g4 -sg5 -(lp165 -S'RefSeqGene record not available' -p166 -asg8 -g4 -sg9 -(lp167 -sg11 -VHomo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 4, mRNA -p168 -sg13 -S'CDKN2A' -p169 -sg15 -(dp170 -g17 -S'NP_478102.2:p.?' -p171 -sg19 -S'NP_478102.2:p.?' -p172 -ssg21 -g22 -sg23 -S'NC_000009.11(NM_058195.3):c.194-1G>T' -p173 -sg25 -g4 -sg26 -S'NM_058195.3:c.194-1G>T' -p174 -sg28 -g4 -sg29 -(dp175 -S'hg19' -p176 -(dp177 -g33 -S'NC_000009.11:g.21971208C>A' -p178 -sg35 -(dp179 -g37 -g38 -sg39 -g40 -sg41 -S'21971208' -p180 -sg43 -g44 -sssg45 -(dp181 -g33 -S'NC_000009.12:g.21971209C>A' -p182 -sg35 -(dp183 -g37 -g38 -sg39 -g40 -sg41 -S'21971209' -p184 -sg43 -g44 -sssS'grch37' -p185 -(dp186 -g33 -S'NC_000009.11:g.21971208C>A' -p187 -sg35 -(dp188 -g37 -g54 -sg39 -g40 -sg41 -S'21971208' -p189 -sg43 -g44 -sssS'grch38' -p190 -(dp191 -g33 -S'NC_000009.12:g.21971209C>A' -p192 -sg35 -(dp193 -g37 -g54 -sg39 -g40 -sg41 -S'21971209' -p194 -sg43 -g44 -ssssg61 -(dp195 -g63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_478102.2' -p196 -sg65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_058195.3' -p197 -sssS'flag' -p198 -S'gene_variant' -p199 -sS'metadata' -p200 -(dp201 -S'variantvalidator_hgvs_version' -p202 -S'1.1.3' -p203 -sS'uta_schema' -p204 -S'uta_20180821' -p205 -sS'seqrepo_db' -p206 -S'2018-08-21' -p207 -sS'variantvalidator_version' -p208 -S'v0.2' -p209 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant31.txt b/VariantValidator/testing/testOutputsMasterITS/variant31.txt deleted file mode 100644 index ff4da964..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant31.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.(Gly197Cys)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(G197C)' -p22 -ssS'submitted_variant' -p23 -S'NC_000017.10:g.48275363C>A' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000088.3:c.589G>T' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000017.10:g.48275363C>A' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -VC -p41 -sS'pos' -p42 -S'48275363' -p43 -sS'alt' -p44 -VA -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.50198002C>A' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'50198002' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000017.10:g.48275363C>A' -p53 -sg36 -(dp54 -g38 -S'17' -p55 -sg40 -g41 -sg42 -S'48275363' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000017.11:g.50198002C>A' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'50198002' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p67 -sssS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant310.txt b/VariantValidator/testing/testOutputsMasterITS/variant310.txt deleted file mode 100644 index ea4a8be6..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant310.txt +++ /dev/null @@ -1,543 +0,0 @@ -(dp0 -S'NM_001301227.1:c.773-3dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.4, mRNA -p13 -sS'gene_symbol' -p14 -S'TPM2' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001288156.1:p.?' -p19 -sS'slr' -p20 -S'NP_001288156.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'9-35683240-T-TG' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000009.11(NM_001301227.1):c.773-3dup' -p25 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_001301227.1:c.773-3dup' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000009.11:g.35683241dup' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr9' -p39 -sS'ref' -p40 -S'G' -p41 -sS'pos' -p42 -S'35683241' -p43 -sS'alt' -p44 -S'GG' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000009.12:g.35683244dup' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'35683244' -p50 -sg44 -S'GG' -p51 -sssS'grch37' -p52 -(dp53 -g34 -S'NC_000009.11:g.35683241dup' -p54 -sg36 -(dp55 -g38 -S'9' -p56 -sg40 -g41 -sg42 -S'35683241' -p57 -sg44 -S'GG' -p58 -sssS'grch38' -p59 -(dp60 -g34 -S'NC_000009.12:g.35683244dup' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -g41 -sg42 -S'35683244' -p63 -sg44 -S'GG' -p64 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288156.1' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301227.1' -p70 -sssS'NM_001301226.1:c.772+1002dup' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' -p74 -aS'RefSeqGene record not available' -p75 -asg9 -g4 -sg10 -(lp76 -sg12 -VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.3, mRNA -p77 -sg14 -S'TPM2' -p78 -sg16 -(dp79 -g18 -S'NP_001288155.1:p.?' -p80 -sg20 -S'NP_001288155.1:p.?' -p81 -ssg22 -g23 -sg24 -S'NC_000009.11(NM_001301226.1):c.772+1002dup' -p82 -sg26 -g4 -sg27 -S'NM_001301226.1:c.772+1002dup' -p83 -sg29 -g4 -sg30 -(dp84 -S'hg19' -p85 -(dp86 -g34 -S'NC_000009.11:g.35683241dup' -p87 -sg36 -(dp88 -g38 -g39 -sg40 -g41 -sg42 -S'35683241' -p89 -sg44 -S'GG' -p90 -sssg46 -(dp91 -g34 -S'NC_000009.12:g.35683244dup' -p92 -sg36 -(dp93 -g38 -g39 -sg40 -g41 -sg42 -S'35683244' -p94 -sg44 -S'GG' -p95 -sssS'grch37' -p96 -(dp97 -g34 -S'NC_000009.11:g.35683241dup' -p98 -sg36 -(dp99 -g38 -g56 -sg40 -g41 -sg42 -S'35683241' -p100 -sg44 -S'GG' -p101 -sssS'grch38' -p102 -(dp103 -g34 -S'NC_000009.12:g.35683244dup' -p104 -sg36 -(dp105 -g38 -g56 -sg40 -g41 -sg42 -S'35683244' -p106 -sg44 -S'GG' -p107 -ssssg65 -(dp108 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288155.1' -p109 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301226.1' -p110 -sssS'NM_213674.1:c.772+1002dup' -p111 -(dp112 -g3 -g4 -sg5 -(lp113 -S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' -p114 -aS'RefSeqGene record not available' -p115 -asg9 -g4 -sg10 -(lp116 -sg12 -VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.1, mRNA -p117 -sg14 -S'TPM2' -p118 -sg16 -(dp119 -g18 -S'NP_998839.1:p.?' -p120 -sg20 -S'NP_998839.1:p.?' -p121 -ssg22 -g23 -sg24 -S'NC_000009.11(NM_213674.1):c.772+1002dup' -p122 -sg26 -g4 -sg27 -S'NM_213674.1:c.772+1002dup' -p123 -sg29 -g4 -sg30 -(dp124 -S'hg19' -p125 -(dp126 -g34 -S'NC_000009.11:g.35683241dup' -p127 -sg36 -(dp128 -g38 -g39 -sg40 -g41 -sg42 -S'35683241' -p129 -sg44 -S'GG' -p130 -sssg46 -(dp131 -g34 -S'NC_000009.12:g.35683244dup' -p132 -sg36 -(dp133 -g38 -g39 -sg40 -g41 -sg42 -S'35683244' -p134 -sg44 -S'GG' -p135 -sssS'grch37' -p136 -(dp137 -g34 -S'NC_000009.11:g.35683241dup' -p138 -sg36 -(dp139 -g38 -g56 -sg40 -g41 -sg42 -S'35683241' -p140 -sg44 -S'GG' -p141 -sssS'grch38' -p142 -(dp143 -g34 -S'NC_000009.12:g.35683244dup' -p144 -sg36 -(dp145 -g38 -g56 -sg40 -g41 -sg42 -S'35683244' -p146 -sg44 -S'GG' -p147 -ssssg65 -(dp148 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_998839.1' -p149 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_213674.1' -p150 -sssS'NM_003289.3:c.773-3dup' -p151 -(dp152 -g3 -g4 -sg5 -(lp153 -S'NC_000009.11:g.35683240T>TG automapped to NC_000009.11:g.35683248dupG' -p154 -aS'RefSeqGene record not available' -p155 -asg9 -g4 -sg10 -(lp156 -sg12 -VHomo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.2, mRNA -p157 -sg14 -S'TPM2' -p158 -sg16 -(dp159 -g18 -S'NP_003280.2:p.?' -p160 -sg20 -S'NP_003280.2:p.?' -p161 -ssg22 -g23 -sg24 -S'NC_000009.11(NM_003289.3):c.773-3dup' -p162 -sg26 -g4 -sg27 -S'NM_003289.3:c.773-3dup' -p163 -sg29 -g4 -sg30 -(dp164 -S'hg19' -p165 -(dp166 -g34 -S'NC_000009.11:g.35683241dup' -p167 -sg36 -(dp168 -g38 -g39 -sg40 -g41 -sg42 -S'35683241' -p169 -sg44 -S'GG' -p170 -sssg46 -(dp171 -g34 -S'NC_000009.12:g.35683244dup' -p172 -sg36 -(dp173 -g38 -g39 -sg40 -g41 -sg42 -S'35683244' -p174 -sg44 -S'GG' -p175 -sssS'grch37' -p176 -(dp177 -g34 -S'NC_000009.11:g.35683241dup' -p178 -sg36 -(dp179 -g38 -g56 -sg40 -g41 -sg42 -S'35683241' -p180 -sg44 -S'GG' -p181 -sssS'grch38' -p182 -(dp183 -g34 -S'NC_000009.12:g.35683244dup' -p184 -sg36 -(dp185 -g38 -g56 -sg40 -g41 -sg42 -S'35683244' -p186 -sg44 -S'GG' -p187 -ssssg65 -(dp188 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003280.2' -p189 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003289.3' -p190 -sssS'flag' -p191 -S'gene_variant' -p192 -sS'metadata' -p193 -(dp194 -S'variantvalidator_hgvs_version' -p195 -S'1.1.3' -p196 -sS'uta_schema' -p197 -S'uta_20180821' -p198 -sS'seqrepo_db' -p199 -S'2018-08-21' -p200 -sS'variantvalidator_version' -p201 -S'v0.2' -p202 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant311.txt b/VariantValidator/testing/testOutputsMasterITS/variant311.txt deleted file mode 100644 index 5866371f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant311.txt +++ /dev/null @@ -1,516 +0,0 @@ -(dp0 -S'NM_000368.4:c.733C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'TSC1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000359.1:p.(Arg245Ter)' -p18 -sS'slr' -p19 -S'NP_000359.1:p.(R245*)' -p20 -ssS'submitted_variant' -p21 -S'9-135796754-G-A' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_000368.4:c.733C>T' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000009.11:g.135796754G>A' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr9' -p37 -sS'ref' -p38 -VG -p39 -sS'pos' -p40 -S'135796754' -p41 -sS'alt' -p42 -VA -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000009.12:g.132921367G>A' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'132921367' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000009.11:g.135796754G>A' -p51 -sg34 -(dp52 -g36 -S'9' -p53 -sg38 -g39 -sg40 -S'135796754' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000009.12:g.132921367G>A' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'132921367' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4' -p65 -sssS'NM_001162426.1:c.733C>T' -p66 -(dp67 -g3 -g4 -sg5 -(lp68 -S'RefSeqGene record not available' -p69 -asg8 -g4 -sg9 -(lp70 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA -p71 -sg13 -S'TSC1' -p72 -sg15 -(dp73 -g17 -S'NP_001155898.1:p.(Arg245Ter)' -p74 -sg19 -S'NP_001155898.1:p.(R245*)' -p75 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001162426.1:c.733C>T' -p76 -sg27 -g4 -sg28 -(dp77 -S'hg19' -p78 -(dp79 -g32 -S'NC_000009.11:g.135796754G>A' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -g39 -sg40 -S'135796754' -p82 -sg42 -g43 -sssg44 -(dp83 -g32 -S'NC_000009.12:g.132921367G>A' -p84 -sg34 -(dp85 -g36 -g37 -sg38 -g39 -sg40 -S'132921367' -p86 -sg42 -g43 -sssS'grch37' -p87 -(dp88 -g32 -S'NC_000009.11:g.135796754G>A' -p89 -sg34 -(dp90 -g36 -g53 -sg38 -g39 -sg40 -S'135796754' -p91 -sg42 -g43 -sssS'grch38' -p92 -(dp93 -g32 -S'NC_000009.12:g.132921367G>A' -p94 -sg34 -(dp95 -g36 -g53 -sg38 -g39 -sg40 -S'132921367' -p96 -sg42 -g43 -ssssg60 -(dp97 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1' -p98 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1' -p99 -sssS'flag' -p100 -S'gene_variant' -p101 -sS'NM_001362177.1:c.370C>T' -p102 -(dp103 -g3 -g4 -sg5 -(lp104 -S'RefSeqGene record not available' -p105 -asg8 -g4 -sg9 -(lp106 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA -p107 -sg13 -S'TSC1' -p108 -sg15 -(dp109 -g17 -S'NP_001349106.1:p.(Arg124Ter)' -p110 -sg19 -S'NP_001349106.1:p.(R124*)' -p111 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001362177.1:c.370C>T' -p112 -sg27 -g4 -sg28 -(dp113 -S'hg19' -p114 -(dp115 -g32 -S'NC_000009.11:g.135796754G>A' -p116 -sg34 -(dp117 -g36 -g37 -sg38 -g39 -sg40 -S'135796754' -p118 -sg42 -g43 -sssg44 -(dp119 -g32 -S'NC_000009.12:g.132921367G>A' -p120 -sg34 -(dp121 -g36 -g37 -sg38 -g39 -sg40 -S'132921367' -p122 -sg42 -g43 -sssS'grch37' -p123 -(dp124 -g32 -S'NC_000009.11:g.135796754G>A' -p125 -sg34 -(dp126 -g36 -g53 -sg38 -g39 -sg40 -S'135796754' -p127 -sg42 -g43 -sssS'grch38' -p128 -(dp129 -g32 -S'NC_000009.12:g.132921367G>A' -p130 -sg34 -(dp131 -g36 -g53 -sg38 -g39 -sg40 -S'132921367' -p132 -sg42 -g43 -ssssg60 -(dp133 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1' -p134 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1' -p135 -sssS'NM_001162427.1:c.580C>T' -p136 -(dp137 -g3 -g4 -sg5 -(lp138 -S'RefSeqGene record not available' -p139 -asg8 -g4 -sg9 -(lp140 -sg11 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA -p141 -sg13 -S'TSC1' -p142 -sg15 -(dp143 -g17 -S'NP_001155899.1:p.(Arg194Ter)' -p144 -sg19 -S'NP_001155899.1:p.(R194*)' -p145 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001162427.1:c.580C>T' -p146 -sg27 -g4 -sg28 -(dp147 -S'hg19' -p148 -(dp149 -g32 -S'NC_000009.11:g.135796754G>A' -p150 -sg34 -(dp151 -g36 -g37 -sg38 -g39 -sg40 -S'135796754' -p152 -sg42 -g43 -sssg44 -(dp153 -g32 -S'NC_000009.12:g.132921367G>A' -p154 -sg34 -(dp155 -g36 -g37 -sg38 -g39 -sg40 -S'132921367' -p156 -sg42 -g43 -sssS'grch37' -p157 -(dp158 -g32 -S'NC_000009.11:g.135796754G>A' -p159 -sg34 -(dp160 -g36 -g53 -sg38 -g39 -sg40 -S'135796754' -p161 -sg42 -g43 -sssS'grch38' -p162 -(dp163 -g32 -S'NC_000009.12:g.132921367G>A' -p164 -sg34 -(dp165 -g36 -g53 -sg38 -g39 -sg40 -S'132921367' -p166 -sg42 -g43 -ssssg60 -(dp167 -g62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1' -p168 -sg64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1' -p169 -sssS'metadata' -p170 -(dp171 -S'variantvalidator_hgvs_version' -p172 -S'1.1.3' -p173 -sS'uta_schema' -p174 -S'uta_20180821' -p175 -sS'seqrepo_db' -p176 -S'2018-08-21' -p177 -sS'variantvalidator_version' -p178 -S'v0.2' -p179 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant312.txt b/VariantValidator/testing/testOutputsMasterITS/variant312.txt deleted file mode 100644 index 0ccd7dc4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant312.txt +++ /dev/null @@ -1,216 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_005247.2:c.616del' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'NW_003571046.1:g.10391AC>A automapped to NW_003571046.1:g.10396delC' -p19 -aS'RefSeqGene record not available' -p20 -asS'refseqgene_context_intronic_sequence' -p21 -g16 -sS'alt_genomic_loci' -p22 -(lp23 -(dp24 -S'grch37' -p25 -(dp26 -S'hgvs_genomic_description' -p27 -S'NW_003571046.1:g.10392del' -p28 -sS'vcf' -p29 -(dp30 -S'chr' -p31 -S'HG536_PATCH' -p32 -sS'ref' -p33 -S'AC' -p34 -sS'pos' -p35 -S'10391' -p36 -sS'alt' -p37 -S'A' -p38 -sssa(dp39 -S'hg19' -p40 -(dp41 -g27 -S'NW_003571046.1:g.10392del' -p42 -sg29 -(dp43 -g31 -S'NW_003571046.1' -p44 -sg33 -S'AC' -p45 -sg35 -S'10391' -p46 -sg37 -g38 -sssasS'transcript_description' -p47 -VHomo sapiens fibroblast growth factor 3 (FGF3), mRNA -p48 -sS'gene_symbol' -p49 -S'FGF3' -p50 -sS'hgvs_predicted_protein_consequence' -p51 -(dp52 -S'tlr' -p53 -S'NP_005238.1:p.(Val206SerfsTer117)' -p54 -sS'slr' -p55 -S'NP_005238.1:p.(V206Sfs*117)' -p56 -ssS'submitted_variant' -p57 -S'HG536_PATCH-10391-AC-A' -p58 -sS'genome_context_intronic_sequence' -p59 -g16 -sS'hgvs_lrg_variant' -p60 -g16 -sS'hgvs_transcript_variant' -p61 -S'NM_005247.2:c.616del' -p62 -sS'hgvs_refseqgene_variant' -p63 -g16 -sS'primary_assembly_loci' -p64 -(dp65 -S'grch38' -p66 -(dp67 -g27 -S'NC_000011.10:g.69810409del' -p68 -sg29 -(dp69 -g31 -S'11' -p70 -sg33 -S'AC' -p71 -sg35 -S'69810408' -p72 -sg37 -g38 -sssS'grch37' -p73 -(dp74 -g27 -S'NC_000011.9:g.69625177del' -p75 -sg29 -(dp76 -g31 -g70 -sg33 -S'AC' -p77 -sg35 -S'69625176' -p78 -sg37 -g38 -sssS'hg38' -p79 -(dp80 -g27 -S'NC_000011.10:g.69810409del' -p81 -sg29 -(dp82 -g31 -S'chr11' -p83 -sg33 -S'AC' -p84 -sg35 -S'69810408' -p85 -sg37 -g38 -sssS'hg19' -p86 -(dp87 -g27 -S'NC_000011.9:g.69625177del' -p88 -sg29 -(dp89 -g31 -g83 -sg33 -S'AC' -p90 -sg35 -S'69625176' -p91 -sg37 -g38 -ssssS'reference_sequence_records' -p92 -(dp93 -S'protein' -p94 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_005238.1' -p95 -sS'transcript' -p96 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_005247.2' -p97 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant313.txt b/VariantValidator/testing/testOutputsMasterITS/variant313.txt deleted file mode 100644 index 7149a06b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant313.txt +++ /dev/null @@ -1,635 +0,0 @@ -(dp0 -S'NR_110766.1:n.833+969C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -(dp11 -S'grch37' -p12 -(dp13 -S'hgvs_genomic_description' -p14 -S'NW_004070871.1:g.33547G>A' -p15 -sS'vcf' -p16 -(dp17 -S'chr' -p18 -S'HG865_PATCH' -p19 -sS'ref' -p20 -VG -p21 -sS'pos' -p22 -S'33547' -p23 -sS'alt' -p24 -VA -p25 -sssa(dp26 -S'hg19' -p27 -(dp28 -g14 -S'NW_004070871.1:g.33547G>A' -p29 -sg16 -(dp30 -g18 -S'NW_004070871.1' -p31 -sg20 -g21 -sg22 -S'33547' -p32 -sg24 -g25 -sssasS'transcript_description' -p33 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 3, non-coding RNA -p34 -sS'gene_symbol' -p35 -S'SHANK2' -p36 -sS'hgvs_predicted_protein_consequence' -p37 -(dp38 -S'tlr' -p39 -S'Non-coding :n.' -p40 -sS'slr' -p41 -g40 -ssS'submitted_variant' -p42 -S'HG865_PATCH-33547-G-A' -p43 -sS'genome_context_intronic_sequence' -p44 -S'NC_000011.9(NR_110766.1):c.833+969C>T' -p45 -sS'hgvs_lrg_variant' -p46 -g4 -sS'hgvs_transcript_variant' -p47 -S'NR_110766.1:n.833+969C>T' -p48 -sS'hgvs_refseqgene_variant' -p49 -g4 -sS'primary_assembly_loci' -p50 -(dp51 -S'grch38' -p52 -(dp53 -g14 -S'NC_000011.10:g.70489334G>A' -p54 -sg16 -(dp55 -g18 -S'11' -p56 -sg20 -g21 -sg22 -S'70489334' -p57 -sg24 -g25 -sssS'grch37' -p58 -(dp59 -g14 -S'NC_000011.9:g.70335439G>A' -p60 -sg16 -(dp61 -g18 -g56 -sg20 -g21 -sg22 -S'70335439' -p62 -sg24 -g25 -sssS'hg38' -p63 -(dp64 -g14 -S'NC_000011.10:g.70489334G>A' -p65 -sg16 -(dp66 -g18 -S'chr11' -p67 -sg20 -g21 -sg22 -S'70489334' -p68 -sg24 -g25 -sssS'hg19' -p69 -(dp70 -g14 -S'NC_000011.9:g.70335439G>A' -p71 -sg16 -(dp72 -g18 -g67 -sg20 -g21 -sg22 -S'70335439' -p73 -sg24 -g25 -ssssS'reference_sequence_records' -p74 -(dp75 -S'transcript' -p76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NR_110766.1' -p77 -sssS'NM_012309.4:c.2566C>T' -p78 -(dp79 -g3 -g4 -sg5 -(lp80 -S'RefSeqGene record not available' -p81 -asg8 -g4 -sg9 -(lp82 -(dp83 -S'grch37' -p84 -(dp85 -g14 -S'NW_004070871.1:g.33547G>A' -p86 -sg16 -(dp87 -g18 -g19 -sg20 -g21 -sg22 -S'33547' -p88 -sg24 -g25 -sssa(dp89 -S'hg19' -p90 -(dp91 -g14 -S'NW_004070871.1:g.33547G>A' -p92 -sg16 -(dp93 -g18 -S'NW_004070871.1' -p94 -sg20 -g21 -sg22 -S'33547' -p95 -sg24 -g25 -sssasg33 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA -p96 -sg35 -S'SHANK2' -p97 -sg37 -(dp98 -g39 -S'NP_036441.2:p.(Leu856=)' -p99 -sg41 -S'NP_036441.2:p.(L856=)' -p100 -ssg42 -g43 -sg44 -g4 -sg46 -g4 -sg47 -S'NM_012309.4:c.2566C>T' -p101 -sg49 -g4 -sg50 -(dp102 -S'grch38' -p103 -(dp104 -g14 -S'NC_000011.10:g.70489334G>A' -p105 -sg16 -(dp106 -g18 -g56 -sg20 -g21 -sg22 -S'70489334' -p107 -sg24 -g25 -sssS'grch37' -p108 -(dp109 -g14 -S'NC_000011.9:g.70336423G>A' -p110 -sg16 -(dp111 -g18 -g56 -sg20 -g21 -sg22 -S'70336423' -p112 -sg24 -g25 -sssg63 -(dp113 -g14 -S'NC_000011.10:g.70489334G>A' -p114 -sg16 -(dp115 -g18 -g67 -sg20 -g21 -sg22 -S'70489334' -p116 -sg24 -g25 -sssS'hg19' -p117 -(dp118 -g14 -S'NC_000011.9:g.70336423G>A' -p119 -sg16 -(dp120 -g18 -g67 -sg20 -g21 -sg22 -S'70336423' -p121 -sg24 -g25 -ssssg74 -(dp122 -S'protein' -p123 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2' -p124 -sg76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4' -p125 -sssS'NM_133266.4:c.802C>T' -p126 -(dp127 -g3 -g4 -sg5 -(lp128 -S'RefSeqGene record not available' -p129 -asg8 -g4 -sg9 -(lp130 -(dp131 -S'grch37' -p132 -(dp133 -g14 -S'NW_004070871.1:g.33547G>A' -p134 -sg16 -(dp135 -g18 -g19 -sg20 -g21 -sg22 -S'33547' -p136 -sg24 -g25 -sssa(dp137 -S'hg19' -p138 -(dp139 -g14 -S'NW_004070871.1:g.33547G>A' -p140 -sg16 -(dp141 -g18 -S'NW_004070871.1' -p142 -sg20 -g21 -sg22 -S'33547' -p143 -sg24 -g25 -sssasg33 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA -p144 -sg35 -S'SHANK2' -p145 -sg37 -(dp146 -g39 -S'NP_573573.2:p.(Leu268=)' -p147 -sg41 -S'NP_573573.2:p.(L268=)' -p148 -ssg42 -g43 -sg44 -g4 -sg46 -g4 -sg47 -S'NM_133266.4:c.802C>T' -p149 -sg49 -g4 -sg50 -(dp150 -S'grch38' -p151 -(dp152 -g14 -S'NC_000011.10:g.70489334G>A' -p153 -sg16 -(dp154 -g18 -g56 -sg20 -g21 -sg22 -S'70489334' -p155 -sg24 -g25 -sssS'grch37' -p156 -(dp157 -g14 -S'NC_000011.9:g.70335439G>A' -p158 -sg16 -(dp159 -g18 -g56 -sg20 -g21 -sg22 -S'70335439' -p160 -sg24 -g25 -sssg63 -(dp161 -g14 -S'NC_000011.10:g.70489334G>A' -p162 -sg16 -(dp163 -g18 -g67 -sg20 -g21 -sg22 -S'70489334' -p164 -sg24 -g25 -sssS'hg19' -p165 -(dp166 -g14 -S'NC_000011.9:g.70335439G>A' -p167 -sg16 -(dp168 -g18 -g67 -sg20 -g21 -sg22 -S'70335439' -p169 -sg24 -g25 -ssssg74 -(dp170 -g123 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2' -p171 -sg76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.4' -p172 -sssS'flag' -p173 -S'gene_variant' -p174 -sS'NM_133266.3:c.802C>T' -p175 -(dp176 -g3 -g4 -sg5 -(lp177 -S'A more recent version of the selected reference sequence NM_133266.3 is available (NM_133266.4)' -p178 -aS'NM_133266.4:c.802C>T MUST be fully validated prior to use in reports' -p179 -aS'select_variants=NM_133266.4:c.802C>T' -p180 -aS'RefSeqGene record not available' -p181 -asg8 -g4 -sg9 -(lp182 -(dp183 -S'grch37' -p184 -(dp185 -g14 -S'NW_004070871.1:g.33547G>A' -p186 -sg16 -(dp187 -g18 -g19 -sg20 -g21 -sg22 -S'33547' -p188 -sg24 -g25 -sssa(dp189 -S'hg19' -p190 -(dp191 -g14 -S'NW_004070871.1:g.33547G>A' -p192 -sg16 -(dp193 -g18 -S'NW_004070871.1' -p194 -sg20 -g21 -sg22 -S'33547' -p195 -sg24 -g25 -sssasg33 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA -p196 -sg35 -S'SHANK2' -p197 -sg37 -(dp198 -g39 -S'NP_573573.2:p.(Leu268=)' -p199 -sg41 -S'NP_573573.2:p.(L268=)' -p200 -ssg42 -g43 -sg44 -g4 -sg46 -g4 -sg47 -S'NM_133266.3:c.802C>T' -p201 -sg49 -g4 -sg50 -(dp202 -S'hg19' -p203 -(dp204 -g14 -S'NC_000011.9:g.70335439G>A' -p205 -sg16 -(dp206 -g18 -g67 -sg20 -g21 -sg22 -S'70335439' -p207 -sg24 -g25 -sssS'grch37' -p208 -(dp209 -g14 -S'NC_000011.9:g.70335439G>A' -p210 -sg16 -(dp211 -g18 -g56 -sg20 -g21 -sg22 -S'70335439' -p212 -sg24 -g25 -ssssg74 -(dp213 -g123 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2' -p214 -sg76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.3' -p215 -sssS'metadata' -p216 -(dp217 -S'variantvalidator_hgvs_version' -p218 -S'1.1.3' -p219 -sS'uta_schema' -p220 -S'uta_20180821' -p221 -sS'seqrepo_db' -p222 -S'2018-08-21' -p223 -sS'variantvalidator_version' -p224 -S'v0.2' -p225 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant314.txt b/VariantValidator/testing/testOutputsMasterITS/variant314.txt deleted file mode 100644 index 42975a4a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant314.txt +++ /dev/null @@ -1,179 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_012309.4:c.960C>A' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'NM_012309.4:c.960C>A cannot be mapped directly to genome build GRCh37' -p10 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'grch37' -p16 -(dp17 -S'hgvs_genomic_description' -p18 -S'NW_004070871.1:g.569441G>T' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG865_PATCH' -p23 -sS'ref' -p24 -VG -p25 -sS'pos' -p26 -S'569441' -p27 -sS'alt' -p28 -VT -p29 -sssa(dp30 -S'hg19' -p31 -(dp32 -g18 -S'NW_004070871.1:g.569441G>T' -p33 -sg20 -(dp34 -g22 -S'NW_004070871.1' -p35 -sg24 -g25 -sg26 -S'569441' -p36 -sg28 -g29 -sssasS'transcript_description' -p37 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA -p38 -sS'gene_symbol' -p39 -S'SHANK2' -p40 -sS'hgvs_predicted_protein_consequence' -p41 -(dp42 -S'tlr' -p43 -S'NP_036441.2:p.(Tyr320Ter)' -p44 -sS'slr' -p45 -S'NP_036441.2:p.(Y320*)' -p46 -ssS'submitted_variant' -p47 -S'HG865_PATCH-569441-G-T' -p48 -sS'genome_context_intronic_sequence' -p49 -g6 -sS'hgvs_lrg_variant' -p50 -g6 -sS'hgvs_transcript_variant' -p51 -S'NM_012309.4:c.960C>A' -p52 -sS'hgvs_refseqgene_variant' -p53 -g6 -sS'primary_assembly_loci' -p54 -(dp55 -S'grch38' -p56 -(dp57 -g18 -S'NC_000011.10:g.71075228G>T' -p58 -sg20 -(dp59 -g22 -S'11' -p60 -sg24 -g25 -sg26 -S'71075228' -p61 -sg28 -g29 -sssS'hg38' -p62 -(dp63 -g18 -S'NC_000011.10:g.71075228G>T' -p64 -sg20 -(dp65 -g22 -S'chr11' -p66 -sg24 -g25 -sg26 -S'71075228' -p67 -sg28 -g29 -ssssS'reference_sequence_records' -p68 -(dp69 -S'protein' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2' -p71 -sS'transcript' -p72 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4' -p73 -sssS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant315.txt b/VariantValidator/testing/testOutputsMasterITS/variant315.txt deleted file mode 100644 index 9601b8bd..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant315.txt +++ /dev/null @@ -1,180 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_012309.4:c.913-5058G>A' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'NM_012309.4:c.913-5058G>A cannot be mapped directly to genome build GRCh37' -p10 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'grch37' -p16 -(dp17 -S'hgvs_genomic_description' -p18 -S'NW_004070871.1:g.574546C>T' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG865_PATCH' -p23 -sS'ref' -p24 -VC -p25 -sS'pos' -p26 -S'574546' -p27 -sS'alt' -p28 -VT -p29 -sssa(dp30 -S'hg19' -p31 -(dp32 -g18 -S'NW_004070871.1:g.574546C>T' -p33 -sg20 -(dp34 -g22 -S'NW_004070871.1' -p35 -sg24 -g25 -sg26 -S'574546' -p36 -sg28 -g29 -sssasS'transcript_description' -p37 -VHomo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA -p38 -sS'gene_symbol' -p39 -S'SHANK2' -p40 -sS'hgvs_predicted_protein_consequence' -p41 -(dp42 -S'tlr' -p43 -S'NP_036441.2:p.?' -p44 -sS'slr' -p45 -S'NP_036441.2:p.?' -p46 -ssS'submitted_variant' -p47 -S'HG865_PATCH-574546-C-T' -p48 -sS'genome_context_intronic_sequence' -p49 -S'NC_000011.10(NM_012309.4):c.913-5058G>A' -p50 -sS'hgvs_lrg_variant' -p51 -g6 -sS'hgvs_transcript_variant' -p52 -S'NM_012309.4:c.913-5058G>A' -p53 -sS'hgvs_refseqgene_variant' -p54 -g6 -sS'primary_assembly_loci' -p55 -(dp56 -S'grch38' -p57 -(dp58 -g18 -S'NC_000011.10:g.71080333C>T' -p59 -sg20 -(dp60 -g22 -S'11' -p61 -sg24 -g25 -sg26 -S'71080333' -p62 -sg28 -g29 -sssS'hg38' -p63 -(dp64 -g18 -S'NC_000011.10:g.71080333C>T' -p65 -sg20 -(dp66 -g22 -S'chr11' -p67 -sg24 -g25 -sg26 -S'71080333' -p68 -sg28 -g29 -ssssS'reference_sequence_records' -p69 -(dp70 -S'protein' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2' -p72 -sS'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4' -p74 -sssS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant316.txt b/VariantValidator/testing/testOutputsMasterITS/variant316.txt deleted file mode 100644 index e6ec98b8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant316.txt +++ /dev/null @@ -1,182 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_020699.2:c.802_803insTT' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'NW_003315905.1:g.133178TAG>T automapped to NW_003315905.1:g.133179_133180delAG' -p19 -aS'A more recent version of the selected reference sequence NM_020699.2 is available (NM_020699.3)' -p20 -aS'NM_020699.3:c.802_803insTT MUST be fully validated prior to use in reports' -p21 -aS'select_variants=NM_020699.3:c.802_803insTT' -p22 -aS'RefSeqGene record not available' -p23 -asS'refseqgene_context_intronic_sequence' -p24 -g16 -sS'alt_genomic_loci' -p25 -(lp26 -sS'transcript_description' -p27 -VHomo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA -p28 -sS'gene_symbol' -p29 -S'GATAD2B' -p30 -sS'hgvs_predicted_protein_consequence' -p31 -(dp32 -S'tlr' -p33 -S'NP_065750.1:p.(Pro268LeufsTer26)' -p34 -sS'slr' -p35 -S'NP_065750.1:p.(P268Lfs*26)' -p36 -ssS'submitted_variant' -p37 -S'HSCHR1_1_CTG31-133178-TAG-T' -p38 -sS'genome_context_intronic_sequence' -p39 -g16 -sS'hgvs_lrg_variant' -p40 -g16 -sS'hgvs_transcript_variant' -p41 -S'NM_020699.2:c.802_803insTT' -p42 -sS'hgvs_refseqgene_variant' -p43 -g16 -sS'primary_assembly_loci' -p44 -(dp45 -S'hg19' -p46 -(dp47 -S'hgvs_genomic_description' -p48 -S'NC_000001.10:g.153789945_153789946delinsGAAG' -p49 -sS'vcf' -p50 -(dp51 -S'chr' -p52 -S'chr1' -p53 -sS'ref' -p54 -S'G' -p55 -sS'pos' -p56 -S'153789945' -p57 -sS'alt' -p58 -VGAA -p59 -sssS'hg38' -p60 -(dp61 -g48 -S'NC_000001.11:g.153817469_153817470insAA' -p62 -sg50 -(dp63 -g52 -g53 -sg54 -g55 -sg56 -S'153817469' -p64 -sg58 -VGAA -p65 -sssS'grch37' -p66 -(dp67 -g48 -S'NC_000001.10:g.153789945_153789946delinsGAAG' -p68 -sg50 -(dp69 -g52 -S'1' -p70 -sg54 -g55 -sg56 -S'153789945' -p71 -sg58 -VGAA -p72 -sssS'grch38' -p73 -(dp74 -g48 -S'NC_000001.11:g.153817469_153817470insAA' -p75 -sg50 -(dp76 -g52 -g70 -sg54 -g55 -sg56 -S'153817469' -p77 -sg58 -VGAA -p78 -ssssS'reference_sequence_records' -p79 -(dp80 -S'protein' -p81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1' -p82 -sS'transcript' -p83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2' -p84 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant317.txt b/VariantValidator/testing/testOutputsMasterITS/variant317.txt deleted file mode 100644 index 6c1b3a14..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant317.txt +++ /dev/null @@ -1,293 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_021983.4:c.490G>C' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -aS'NM_021983.4:c.490G>C cannot be mapped directly to genome build GRCh37' -p20 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p21 -asS'refseqgene_context_intronic_sequence' -p22 -g16 -sS'alt_genomic_loci' -p23 -(lp24 -(dp25 -S'grch37' -p26 -(dp27 -S'hgvs_genomic_description' -p28 -S'NT_167246.1:g.3848158T>G' -p29 -sS'vcf' -p30 -(dp31 -S'chr' -p32 -S'HSCHR6_MHC_MANN_CTG1' -p33 -sS'ref' -p34 -S'T' -p35 -sS'pos' -p36 -S'3848158' -p37 -sS'alt' -p38 -VG -p39 -sssa(dp40 -S'hg19' -p41 -(dp42 -g28 -S'NT_167246.1:g.3848158T>G' -p43 -sg30 -(dp44 -g32 -S'chr6_mann_hap4' -p45 -sg34 -g35 -sg36 -S'3848158' -p46 -sg38 -g39 -sssa(dp47 -S'grch38' -p48 -(dp49 -g28 -S'NT_167246.2:g.3842538T>G' -p50 -sg30 -(dp51 -g32 -g33 -sg34 -g35 -sg36 -S'3842538' -p52 -sg38 -g39 -sssa(dp53 -S'hg38' -p54 -(dp55 -g28 -S'NT_167246.2:g.3842538T>G' -p56 -sg30 -(dp57 -g32 -S'chr6_GL000253v2_alt' -p58 -sg34 -g35 -sg36 -S'3842538' -p59 -sg38 -g39 -sssa(dp60 -S'grch37' -p61 -(dp62 -g28 -S'NT_167247.1:g.3884432C>G' -p63 -sg30 -(dp64 -g32 -S'HSCHR6_MHC_MCF_CTG1' -p65 -sg34 -VC -p66 -sg36 -S'3884432' -p67 -sg38 -g39 -sssa(dp68 -S'hg19' -p69 -(dp70 -g28 -S'NT_167247.1:g.3884432C>G' -p71 -sg30 -(dp72 -g32 -S'chr6_mcf_hap5' -p73 -sg34 -g66 -sg36 -S'3884432' -p74 -sg38 -g39 -sssa(dp75 -S'grch37' -p76 -(dp77 -g28 -S'NT_167249.1:g.3852542C>G' -p78 -sg30 -(dp79 -g32 -S'HSCHR6_MHC_SSTO_CTG1' -p80 -sg34 -g66 -sg36 -S'3852542' -p81 -sg38 -g39 -sssa(dp82 -S'hg19' -p83 -(dp84 -g28 -S'NT_167249.1:g.3852542C>G' -p85 -sg30 -(dp86 -g32 -S'chr6_ssto_hap7' -p87 -sg34 -g66 -sg36 -S'3852542' -p88 -sg38 -g39 -sssa(dp89 -S'grch38' -p90 -(dp91 -g28 -S'NT_167249.2:g.3853244C>G' -p92 -sg30 -(dp93 -g32 -g80 -sg34 -g66 -sg36 -S'3853244' -p94 -sg38 -g39 -sssa(dp95 -g54 -(dp96 -g28 -S'NT_167249.2:g.3853244C>G' -p97 -sg30 -(dp98 -g32 -S'chr6_GL000256v2_alt' -p99 -sg34 -g66 -sg36 -S'3853244' -p100 -sg38 -g39 -sssasS'transcript_description' -p101 -VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA -p102 -sS'gene_symbol' -p103 -S'HLA-DRB4' -p104 -sS'hgvs_predicted_protein_consequence' -p105 -(dp106 -S'tlr' -p107 -S'NP_068818.4:p.(Gly164Arg)' -p108 -sS'slr' -p109 -S'NP_068818.4:p.(G164R)' -p110 -ssS'submitted_variant' -p111 -S'HSCHR6_MHC_MANN_CTG1-3848158-T-G' -p112 -sS'genome_context_intronic_sequence' -p113 -g16 -sS'hgvs_lrg_variant' -p114 -g16 -sS'hgvs_transcript_variant' -p115 -S'NM_021983.4:c.490G>C' -p116 -sS'hgvs_refseqgene_variant' -p117 -g16 -sS'primary_assembly_loci' -p118 -(dp119 -sS'reference_sequence_records' -p120 -(dp121 -S'protein' -p122 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4' -p123 -sS'transcript' -p124 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4' -p125 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant318.txt b/VariantValidator/testing/testOutputsMasterITS/variant318.txt deleted file mode 100644 index 49d77588..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant318.txt +++ /dev/null @@ -1,292 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_021983.4:c.346G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'NM_021983.4:c.346G>T cannot be mapped directly to genome build GRCh37' -p10 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'grch37' -p16 -(dp17 -S'hgvs_genomic_description' -p18 -S'NT_167246.1:g.3851043C>A' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HSCHR6_MHC_MANN_CTG1' -p23 -sS'ref' -p24 -VC -p25 -sS'pos' -p26 -S'3851043' -p27 -sS'alt' -p28 -VA -p29 -sssa(dp30 -S'hg19' -p31 -(dp32 -g18 -S'NT_167246.1:g.3851043C>A' -p33 -sg20 -(dp34 -g22 -S'chr6_mann_hap4' -p35 -sg24 -g25 -sg26 -S'3851043' -p36 -sg28 -g29 -sssa(dp37 -S'grch38' -p38 -(dp39 -g18 -S'NT_167246.2:g.3845423C>A' -p40 -sg20 -(dp41 -g22 -g23 -sg24 -g25 -sg26 -S'3845423' -p42 -sg28 -g29 -sssa(dp43 -S'hg38' -p44 -(dp45 -g18 -S'NT_167246.2:g.3845423C>A' -p46 -sg20 -(dp47 -g22 -S'chr6_GL000253v2_alt' -p48 -sg24 -g25 -sg26 -S'3845423' -p49 -sg28 -g29 -sssa(dp50 -S'grch37' -p51 -(dp52 -g18 -S'NT_167247.1:g.3887313C>A' -p53 -sg20 -(dp54 -g22 -S'HSCHR6_MHC_MCF_CTG1' -p55 -sg24 -g25 -sg26 -S'3887313' -p56 -sg28 -g29 -sssa(dp57 -S'hg19' -p58 -(dp59 -g18 -S'NT_167247.1:g.3887313C>A' -p60 -sg20 -(dp61 -g22 -S'chr6_mcf_hap5' -p62 -sg24 -g25 -sg26 -S'3887313' -p63 -sg28 -g29 -sssa(dp64 -S'grch37' -p65 -(dp66 -g18 -S'NT_167249.1:g.3855423C>A' -p67 -sg20 -(dp68 -g22 -S'HSCHR6_MHC_SSTO_CTG1' -p69 -sg24 -g25 -sg26 -S'3855423' -p70 -sg28 -g29 -sssa(dp71 -S'hg19' -p72 -(dp73 -g18 -S'NT_167249.1:g.3855423C>A' -p74 -sg20 -(dp75 -g22 -S'chr6_ssto_hap7' -p76 -sg24 -g25 -sg26 -S'3855423' -p77 -sg28 -g29 -sssa(dp78 -S'grch38' -p79 -(dp80 -g18 -S'NT_167249.2:g.3856125C>A' -p81 -sg20 -(dp82 -g22 -g69 -sg24 -g25 -sg26 -S'3856125' -p83 -sg28 -g29 -sssa(dp84 -g44 -(dp85 -g18 -S'NT_167249.2:g.3856125C>A' -p86 -sg20 -(dp87 -g22 -S'chr6_GL000256v2_alt' -p88 -sg24 -g25 -sg26 -S'3856125' -p89 -sg28 -g29 -sssasS'transcript_description' -p90 -VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA -p91 -sS'gene_symbol' -p92 -S'HLA-DRB4' -p93 -sS'hgvs_predicted_protein_consequence' -p94 -(dp95 -S'tlr' -p96 -S'NP_068818.4:p.(Glu116Ter)' -p97 -sS'slr' -p98 -S'NP_068818.4:p.(E116*)' -p99 -ssS'submitted_variant' -p100 -S'HSCHR6_MHC_MANN_CTG1-3851043-C-A' -p101 -sS'genome_context_intronic_sequence' -p102 -g6 -sS'hgvs_lrg_variant' -p103 -g6 -sS'hgvs_transcript_variant' -p104 -S'NM_021983.4:c.346G>T' -p105 -sS'hgvs_refseqgene_variant' -p106 -g6 -sS'primary_assembly_loci' -p107 -(dp108 -sS'reference_sequence_records' -p109 -(dp110 -S'protein' -p111 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4' -p112 -sS'transcript' -p113 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4' -p114 -sssS'metadata' -p115 -(dp116 -S'variantvalidator_hgvs_version' -p117 -S'1.1.3' -p118 -sS'uta_schema' -p119 -S'uta_20180821' -p120 -sS'seqrepo_db' -p121 -S'2018-08-21' -p122 -sS'variantvalidator_version' -p123 -S'v0.2' -p124 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant319.txt b/VariantValidator/testing/testOutputsMasterITS/variant319.txt deleted file mode 100644 index d663d806..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant319.txt +++ /dev/null @@ -1,287 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001097642.2:c.-16-441C>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens gap junction protein beta 1 (GJB1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'GJB1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_001091111.1:p.?' -p20 -sS'slr' -p21 -S'NP_001091111.1:p.?' -p22 -ssS'submitted_variant' -p23 -S'X-70443101-C-T' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000023.10(NM_001097642.2):c.-16-441C>T' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_001097642.2:c.-16-441C>T' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000023.10:g.70443101C>T' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chrX' -p40 -sS'ref' -p41 -S'C' -p42 -sS'pos' -p43 -S'70443101' -p44 -sS'alt' -p45 -S'T' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000023.11:g.71223251C>T' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'71223251' -p51 -sg45 -g46 -sssS'grch37' -p52 -(dp53 -g35 -S'NC_000023.10:g.70443101C>T' -p54 -sg37 -(dp55 -g39 -S'X' -p56 -sg41 -g42 -sg43 -S'70443101' -p57 -sg45 -g46 -sssS'grch38' -p58 -(dp59 -g35 -S'NC_000023.11:g.71223251C>T' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'71223251' -p62 -sg45 -g46 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001091111.1' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001097642.2' -p68 -sssS'NM_000166.5:c.-101C>T' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'RefSeqGene record not available' -p72 -asg10 -g6 -sg11 -(lp73 -sg13 -VHomo sapiens gap junction protein beta 1 (GJB1), transcript variant 2, mRNA -p74 -sg15 -S'GJB1' -p75 -sg17 -(dp76 -g19 -S'NP_000157.1:p.?' -p77 -sg21 -S'NP_000157.1:p.?' -p78 -ssg23 -g24 -sg25 -g6 -sg27 -g6 -sg28 -S'NM_000166.5:c.-101C>T' -p79 -sg30 -g6 -sg31 -(dp80 -S'hg19' -p81 -(dp82 -g35 -S'NC_000023.10:g.70443101C>T' -p83 -sg37 -(dp84 -g39 -g40 -sg41 -g42 -sg43 -S'70443101' -p85 -sg45 -g46 -sssg47 -(dp86 -g35 -S'NC_000023.11:g.71223251C>T' -p87 -sg37 -(dp88 -g39 -g40 -sg41 -g42 -sg43 -S'71223251' -p89 -sg45 -g46 -sssS'grch37' -p90 -(dp91 -g35 -S'NC_000023.10:g.70443101C>T' -p92 -sg37 -(dp93 -g39 -g56 -sg41 -g42 -sg43 -S'70443101' -p94 -sg45 -g46 -sssS'grch38' -p95 -(dp96 -g35 -S'NC_000023.11:g.71223251C>T' -p97 -sg37 -(dp98 -g39 -g56 -sg41 -g42 -sg43 -S'71223251' -p99 -sg45 -g46 -ssssg63 -(dp100 -g65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000157.1' -p101 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000166.5' -p102 -sssS'metadata' -p103 -(dp104 -S'variantvalidator_hgvs_version' -p105 -S'1.1.3' -p106 -sS'uta_schema' -p107 -S'uta_20180821' -p108 -sS'seqrepo_db' -p109 -S'2018-08-21' -p110 -sS'variantvalidator_version' -p111 -S'v0.2' -p112 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant32.txt b/VariantValidator/testing/testOutputsMasterITS/variant32.txt deleted file mode 100644 index 54d25dc8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant32.txt +++ /dev/null @@ -1,172 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-1G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.589-1G>T' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.589-1G>T' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000088.3:c.589-1G>T' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000017.10:g.48275364C>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr17' -p40 -sS'ref' -p41 -VC -p42 -sS'pos' -p43 -S'48275364' -p44 -sS'alt' -p45 -VA -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000017.11:g.50198003C>A' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'50198003' -p51 -sg45 -g46 -sssS'grch37' -p52 -(dp53 -g35 -S'NC_000017.10:g.48275364C>A' -p54 -sg37 -(dp55 -g39 -S'17' -p56 -sg41 -g42 -sg43 -S'48275364' -p57 -sg45 -g46 -sssS'grch38' -p58 -(dp59 -g35 -S'NC_000017.11:g.50198003C>A' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'50198003' -p62 -sg45 -g46 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p68 -sssS'metadata' -p69 -(dp70 -S'variantvalidator_hgvs_version' -p71 -S'1.1.3' -p72 -sS'uta_schema' -p73 -S'uta_20180821' -p74 -sS'seqrepo_db' -p75 -S'2018-08-21' -p76 -sS'variantvalidator_version' -p77 -S'v0.2' -p78 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant320.txt b/VariantValidator/testing/testOutputsMasterITS/variant320.txt deleted file mode 100644 index 2bd34839..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant320.txt +++ /dev/null @@ -1,555 +0,0 @@ -(dp0 -S'NM_033380.2:c.2130_2135del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'Multiple ALT sequences detected' -p7 -aS'auto-submitting all possible combinations' -p8 -aS'NC_000023.10:g.107845202GACCACC>G automapped to NC_000023.10:g.107845203_107845208del' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 2, mRNA -p15 -sS'gene_symbol' -p16 -S'COL4A5' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_203699.1:p.(Pro711_Pro712del)' -p21 -sS'slr' -p22 -S'NP_203699.1:p.(P711_P712del)' -p23 -ssS'submitted_variant' -p24 -S'X-107845202-GACCACC-GACC,G' -p25 -sS'genome_context_intronic_sequence' -p26 -g4 -sS'hgvs_lrg_variant' -p27 -g4 -sS'hgvs_transcript_variant' -p28 -S'NM_033380.2:c.2130_2135del' -p29 -sS'hgvs_refseqgene_variant' -p30 -g4 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000023.10:g.107845203_107845208del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chrX' -p40 -sS'ref' -p41 -S'GACCACC' -p42 -sS'pos' -p43 -S'107845202' -p44 -sS'alt' -p45 -S'G' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000023.11:g.108601973_108601978del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'GACCACC' -p51 -sg43 -S'108601972' -p52 -sg45 -g46 -sssS'grch37' -p53 -(dp54 -g35 -S'NC_000023.10:g.107845203_107845208del' -p55 -sg37 -(dp56 -g39 -S'X' -p57 -sg41 -S'GACCACC' -p58 -sg43 -S'107845202' -p59 -sg45 -g46 -sssS'grch38' -p60 -(dp61 -g35 -S'NC_000023.11:g.108601973_108601978del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'GACCACC' -p64 -sg43 -S'108601972' -p65 -sg45 -g46 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_203699.1' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033380.2' -p71 -sssS'NM_000495.4:c.2130_2135del' -p72 -(dp73 -g3 -g4 -sg5 -(lp74 -S'Multiple ALT sequences detected' -p75 -aS'auto-submitting all possible combinations' -p76 -aS'NC_000023.10:g.107845202GACCACC>G automapped to NC_000023.10:g.107845203_107845208del' -p77 -aS'RefSeqGene record not available' -p78 -asg11 -g4 -sg12 -(lp79 -sg14 -VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 1, mRNA -p80 -sg16 -S'COL4A5' -p81 -sg18 -(dp82 -g20 -S'NP_000486.1:p.(Pro711_Pro712del)' -p83 -sg22 -S'NP_000486.1:p.(P711_P712del)' -p84 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_000495.4:c.2130_2135del' -p85 -sg30 -g4 -sg31 -(dp86 -S'hg19' -p87 -(dp88 -g35 -S'NC_000023.10:g.107845203_107845208del' -p89 -sg37 -(dp90 -g39 -g40 -sg41 -S'GACCACC' -p91 -sg43 -S'107845202' -p92 -sg45 -g46 -sssg47 -(dp93 -g35 -S'NC_000023.11:g.108601973_108601978del' -p94 -sg37 -(dp95 -g39 -g40 -sg41 -S'GACCACC' -p96 -sg43 -S'108601972' -p97 -sg45 -g46 -sssS'grch37' -p98 -(dp99 -g35 -S'NC_000023.10:g.107845203_107845208del' -p100 -sg37 -(dp101 -g39 -g57 -sg41 -S'GACCACC' -p102 -sg43 -S'107845202' -p103 -sg45 -g46 -sssS'grch38' -p104 -(dp105 -g35 -S'NC_000023.11:g.108601973_108601978del' -p106 -sg37 -(dp107 -g39 -g57 -sg41 -S'GACCACC' -p108 -sg43 -S'108601972' -p109 -sg45 -g46 -ssssg66 -(dp110 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1' -p111 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4' -p112 -sssS'flag' -p113 -S'gene_variant' -p114 -sS'NM_000495.4:c.2133_2135del' -p115 -(dp116 -g3 -g4 -sg5 -(lp117 -S'Multiple ALT sequences detected' -p118 -aS'auto-submitting all possible combinations' -p119 -aS'NC_000023.10:g.107845202GACCACC>GACC automapped to NC_000023.10:g.107845206_107845208delACC' -p120 -aS'RefSeqGene record not available' -p121 -asg11 -g4 -sg12 -(lp122 -sg14 -VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 1, mRNA -p123 -sg16 -S'COL4A5' -p124 -sg18 -(dp125 -g20 -S'NP_000486.1:p.(Pro712del)' -p126 -sg22 -S'NP_000486.1:p.(P712del)' -p127 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_000495.4:c.2133_2135del' -p128 -sg30 -g4 -sg31 -(dp129 -S'hg19' -p130 -(dp131 -g35 -S'NC_000023.10:g.107845206_107845208del' -p132 -sg37 -(dp133 -g39 -g40 -sg41 -S'GACC' -p134 -sg43 -S'107845202' -p135 -sg45 -g46 -sssg47 -(dp136 -g35 -S'NC_000023.11:g.108601976_108601978del' -p137 -sg37 -(dp138 -g39 -g40 -sg41 -S'GACC' -p139 -sg43 -S'108601972' -p140 -sg45 -g46 -sssS'grch37' -p141 -(dp142 -g35 -S'NC_000023.10:g.107845206_107845208del' -p143 -sg37 -(dp144 -g39 -g57 -sg41 -S'GACC' -p145 -sg43 -S'107845202' -p146 -sg45 -g46 -sssS'grch38' -p147 -(dp148 -g35 -S'NC_000023.11:g.108601976_108601978del' -p149 -sg37 -(dp150 -g39 -g57 -sg41 -S'GACC' -p151 -sg43 -S'108601972' -p152 -sg45 -g46 -ssssg66 -(dp153 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1' -p154 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4' -p155 -sssS'NM_033380.2:c.2133_2135del' -p156 -(dp157 -g3 -g4 -sg5 -(lp158 -S'Multiple ALT sequences detected' -p159 -aS'auto-submitting all possible combinations' -p160 -aS'NC_000023.10:g.107845202GACCACC>GACC automapped to NC_000023.10:g.107845206_107845208delACC' -p161 -aS'RefSeqGene record not available' -p162 -asg11 -g4 -sg12 -(lp163 -sg14 -VHomo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 2, mRNA -p164 -sg16 -S'COL4A5' -p165 -sg18 -(dp166 -g20 -S'NP_203699.1:p.(Pro712del)' -p167 -sg22 -S'NP_203699.1:p.(P712del)' -p168 -ssg24 -g25 -sg26 -g4 -sg27 -g4 -sg28 -S'NM_033380.2:c.2133_2135del' -p169 -sg30 -g4 -sg31 -(dp170 -S'hg19' -p171 -(dp172 -g35 -S'NC_000023.10:g.107845206_107845208del' -p173 -sg37 -(dp174 -g39 -g40 -sg41 -S'GACC' -p175 -sg43 -S'107845202' -p176 -sg45 -g46 -sssg47 -(dp177 -g35 -S'NC_000023.11:g.108601976_108601978del' -p178 -sg37 -(dp179 -g39 -g40 -sg41 -S'GACC' -p180 -sg43 -S'108601972' -p181 -sg45 -g46 -sssS'grch37' -p182 -(dp183 -g35 -S'NC_000023.10:g.107845206_107845208del' -p184 -sg37 -(dp185 -g39 -g57 -sg41 -S'GACC' -p186 -sg43 -S'107845202' -p187 -sg45 -g46 -sssS'grch38' -p188 -(dp189 -g35 -S'NC_000023.11:g.108601976_108601978del' -p190 -sg37 -(dp191 -g39 -g57 -sg41 -S'GACC' -p192 -sg43 -S'108601972' -p193 -sg45 -g46 -ssssg66 -(dp194 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_203699.1' -p195 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_033380.2' -p196 -sssS'metadata' -p197 -(dp198 -S'variantvalidator_hgvs_version' -p199 -S'1.1.3' -p200 -sS'uta_schema' -p201 -S'uta_20180821' -p202 -sS'seqrepo_db' -p203 -S'2018-08-21' -p204 -sS'variantvalidator_version' -p205 -S'v0.2' -p206 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant321.txt b/VariantValidator/testing/testOutputsMasterITS/variant321.txt deleted file mode 100644 index f3204457..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant321.txt +++ /dev/null @@ -1,513 +0,0 @@ -(dp0 -S'NM_004992.3:c.502C>T' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -(dp11 -S'grch37' -p12 -(dp13 -S'hgvs_genomic_description' -p14 -S'NW_003871103.3:g.1465305G>A' -p15 -sS'vcf' -p16 -(dp17 -S'chr' -p18 -S'HG1497_PATCH' -p19 -sS'ref' -p20 -VG -p21 -sS'pos' -p22 -S'1465305' -p23 -sS'alt' -p24 -VA -p25 -sssa(dp26 -S'hg19' -p27 -(dp28 -g14 -S'NW_003871103.3:g.1465305G>A' -p29 -sg16 -(dp30 -g18 -S'NW_003871103.3' -p31 -sg20 -g21 -sg22 -S'1465305' -p32 -sg24 -g25 -sssasS'transcript_description' -p33 -VHomo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 1, mRNA -p34 -sS'gene_symbol' -p35 -S'MECP2' -p36 -sS'hgvs_predicted_protein_consequence' -p37 -(dp38 -S'tlr' -p39 -S'NP_004983.1:p.(Arg168Ter)' -p40 -sS'slr' -p41 -S'NP_004983.1:p.(R168*)' -p42 -ssS'submitted_variant' -p43 -S'X-153296777-G-A' -p44 -sS'genome_context_intronic_sequence' -p45 -g4 -sS'hgvs_lrg_variant' -p46 -g4 -sS'hgvs_transcript_variant' -p47 -S'NM_004992.3:c.502C>T' -p48 -sS'hgvs_refseqgene_variant' -p49 -g4 -sS'primary_assembly_loci' -p50 -(dp51 -S'hg19' -p52 -(dp53 -g14 -S'NC_000023.10:g.153296777G>A' -p54 -sg16 -(dp55 -g18 -S'chrX' -p56 -sg20 -g21 -sg22 -S'153296777' -p57 -sg24 -g25 -sssS'hg38' -p58 -(dp59 -g14 -S'NC_000023.11:g.154031326G>A' -p60 -sg16 -(dp61 -g18 -g56 -sg20 -g21 -sg22 -S'154031326' -p62 -sg24 -g25 -sssS'grch37' -p63 -(dp64 -g14 -S'NC_000023.10:g.153296777G>A' -p65 -sg16 -(dp66 -g18 -S'X' -p67 -sg20 -g21 -sg22 -S'153296777' -p68 -sg24 -g25 -sssS'grch38' -p69 -(dp70 -g14 -S'NC_000023.11:g.154031326G>A' -p71 -sg16 -(dp72 -g18 -g67 -sg20 -g21 -sg22 -S'154031326' -p73 -sg24 -g25 -ssssS'reference_sequence_records' -p74 -(dp75 -S'protein' -p76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004983.1' -p77 -sS'transcript' -p78 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004992.3' -p79 -sssS'flag' -p80 -S'gene_variant' -p81 -sS'NM_001316337.1:c.223C>T' -p82 -(dp83 -g3 -g4 -sg5 -(lp84 -S'RefSeqGene record not available' -p85 -asg8 -g4 -sg9 -(lp86 -(dp87 -S'grch37' -p88 -(dp89 -g14 -S'NW_003871103.3:g.1465305G>A' -p90 -sg16 -(dp91 -g18 -g19 -sg20 -g21 -sg22 -S'1465305' -p92 -sg24 -g25 -sssa(dp93 -S'hg19' -p94 -(dp95 -g14 -S'NW_003871103.3:g.1465305G>A' -p96 -sg16 -(dp97 -g18 -S'NW_003871103.3' -p98 -sg20 -g21 -sg22 -S'1465305' -p99 -sg24 -g25 -sssasg33 -VHomo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 3, mRNA -p100 -sg35 -S'MECP2' -p101 -sg37 -(dp102 -g39 -S'NP_001303266.1:p.(Arg75Ter)' -p103 -sg41 -S'NP_001303266.1:p.(R75*)' -p104 -ssg43 -g44 -sg45 -g4 -sg46 -g4 -sg47 -S'NM_001316337.1:c.223C>T' -p105 -sg49 -g4 -sg50 -(dp106 -S'hg19' -p107 -(dp108 -g14 -S'NC_000023.10:g.153296777G>A' -p109 -sg16 -(dp110 -g18 -g56 -sg20 -g21 -sg22 -S'153296777' -p111 -sg24 -g25 -sssg58 -(dp112 -g14 -S'NC_000023.11:g.154031326G>A' -p113 -sg16 -(dp114 -g18 -g56 -sg20 -g21 -sg22 -S'154031326' -p115 -sg24 -g25 -sssS'grch37' -p116 -(dp117 -g14 -S'NC_000023.10:g.153296777G>A' -p118 -sg16 -(dp119 -g18 -g67 -sg20 -g21 -sg22 -S'153296777' -p120 -sg24 -g25 -sssS'grch38' -p121 -(dp122 -g14 -S'NC_000023.11:g.154031326G>A' -p123 -sg16 -(dp124 -g18 -g67 -sg20 -g21 -sg22 -S'154031326' -p125 -sg24 -g25 -ssssg74 -(dp126 -g76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001303266.1' -p127 -sg78 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001316337.1' -p128 -sssS'metadata' -p129 -(dp130 -S'variantvalidator_hgvs_version' -p131 -S'1.1.3' -p132 -sS'uta_schema' -p133 -S'uta_20180821' -p134 -sS'seqrepo_db' -p135 -S'2018-08-21' -p136 -sS'variantvalidator_version' -p137 -S'v0.2' -p138 -ssS'NM_001110792.1:c.538C>T' -p139 -(dp140 -g3 -g4 -sg5 -(lp141 -S'RefSeqGene record not available' -p142 -asg8 -g4 -sg9 -(lp143 -(dp144 -S'grch37' -p145 -(dp146 -g14 -S'NW_003871103.3:g.1465305G>A' -p147 -sg16 -(dp148 -g18 -g19 -sg20 -g21 -sg22 -S'1465305' -p149 -sg24 -g25 -sssa(dp150 -S'hg19' -p151 -(dp152 -g14 -S'NW_003871103.3:g.1465305G>A' -p153 -sg16 -(dp154 -g18 -S'NW_003871103.3' -p155 -sg20 -g21 -sg22 -S'1465305' -p156 -sg24 -g25 -sssasg33 -VHomo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 2, mRNA -p157 -sg35 -S'MECP2' -p158 -sg37 -(dp159 -g39 -S'NP_001104262.1:p.(Arg180Ter)' -p160 -sg41 -S'NP_001104262.1:p.(R180*)' -p161 -ssg43 -g44 -sg45 -g4 -sg46 -g4 -sg47 -S'NM_001110792.1:c.538C>T' -p162 -sg49 -g4 -sg50 -(dp163 -S'hg19' -p164 -(dp165 -g14 -S'NC_000023.10:g.153296777G>A' -p166 -sg16 -(dp167 -g18 -g56 -sg20 -g21 -sg22 -S'153296777' -p168 -sg24 -g25 -sssg58 -(dp169 -g14 -S'NC_000023.11:g.154031326G>A' -p170 -sg16 -(dp171 -g18 -g56 -sg20 -g21 -sg22 -S'154031326' -p172 -sg24 -g25 -sssS'grch37' -p173 -(dp174 -g14 -S'NC_000023.10:g.153296777G>A' -p175 -sg16 -(dp176 -g18 -g67 -sg20 -g21 -sg22 -S'153296777' -p177 -sg24 -g25 -sssS'grch38' -p178 -(dp179 -g14 -S'NC_000023.11:g.154031326G>A' -p180 -sg16 -(dp181 -g18 -g67 -sg20 -g21 -sg22 -S'154031326' -p182 -sg24 -g25 -ssssg74 -(dp183 -g76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001104262.1' -p184 -sg78 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001110792.1' -p185 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant322.txt b/VariantValidator/testing/testOutputsMasterITS/variant322.txt deleted file mode 100644 index ea61adba..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant322.txt +++ /dev/null @@ -1,176 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_198180.2:c.408_410del' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'Cannot identify an in-frame Termination codon in the variant mRNA sequence' -p19 -aS'RefSeqGene record not available' -p20 -asS'refseqgene_context_intronic_sequence' -p21 -g16 -sS'alt_genomic_loci' -p22 -(lp23 -sS'transcript_description' -p24 -VHomo sapiens pyroglutamylated RFamide peptide (QRFP), mRNA -p25 -sS'gene_symbol' -p26 -S'QRFP' -p27 -sS'hgvs_predicted_protein_consequence' -p28 -(dp29 -S'tlr' -p30 -S'NP_937823.1:p.?' -p31 -sS'slr' -p32 -S'NP_937823.1:p.?' -p33 -ssS'submitted_variant' -p34 -S'NM_198180.2:c.408_410delGTG' -p35 -sS'genome_context_intronic_sequence' -p36 -g16 -sS'hgvs_lrg_variant' -p37 -g16 -sS'hgvs_transcript_variant' -p38 -S'NM_198180.2:c.408_410del' -p39 -sS'hgvs_refseqgene_variant' -p40 -g16 -sS'primary_assembly_loci' -p41 -(dp42 -S'hg19' -p43 -(dp44 -S'hgvs_genomic_description' -p45 -S'NC_000009.11:g.133768816_133768818del' -p46 -sS'vcf' -p47 -(dp48 -S'chr' -p49 -S'chr9' -p50 -sS'ref' -p51 -S'TCAC' -p52 -sS'pos' -p53 -S'133768815' -p54 -sS'alt' -p55 -S'T' -p56 -sssS'hg38' -p57 -(dp58 -g45 -S'NC_000009.12:g.130893429_130893431del' -p59 -sg47 -(dp60 -g49 -g50 -sg51 -S'TCAC' -p61 -sg53 -S'130893428' -p62 -sg55 -g56 -sssS'grch37' -p63 -(dp64 -g45 -S'NC_000009.11:g.133768816_133768818del' -p65 -sg47 -(dp66 -g49 -S'9' -p67 -sg51 -S'TCAC' -p68 -sg53 -S'133768815' -p69 -sg55 -g56 -sssS'grch38' -p70 -(dp71 -g45 -S'NC_000009.12:g.130893429_130893431del' -p72 -sg47 -(dp73 -g49 -g67 -sg51 -S'TCAC' -p74 -sg53 -S'130893428' -p75 -sg55 -g56 -ssssS'reference_sequence_records' -p76 -(dp77 -S'protein' -p78 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_937823.1' -p79 -sS'transcript' -p80 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_198180.2' -p81 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant323.txt b/VariantValidator/testing/testOutputsMasterITS/variant323.txt deleted file mode 100644 index b12158b2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant323.txt +++ /dev/null @@ -1,174 +0,0 @@ -(dp0 -S'NM_080877.2:c.1733_1735delinsTTT' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA -p12 -sS'gene_symbol' -p13 -S'SLC34A3' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_543153.1:p.(Pro578_Lys579delinsLeuTer)' -p18 -sS'slr' -p19 -S'NP_543153.1:p.(P578_K579delinsL*)' -p20 -ssS'submitted_variant' -p21 -S'NM_080877.2:c.1733_1735delinsTTT' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_080877.2:c.1733_1735delinsTTT' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000009.11:g.140130801_140130803delinsTTT' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr9' -p37 -sS'ref' -p38 -S'CGA' -p39 -sS'pos' -p40 -S'140130801' -p41 -sS'alt' -p42 -S'TTT' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000009.12:g.137236349_137236351delinsTTT' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'CGA' -p48 -sg40 -S'137236349' -p49 -sg42 -g43 -sssS'grch37' -p50 -(dp51 -g32 -S'NC_000009.11:g.140130801_140130803delinsTTT' -p52 -sg34 -(dp53 -g36 -S'9' -p54 -sg38 -S'CGA' -p55 -sg40 -S'140130801' -p56 -sg42 -g43 -sssS'grch38' -p57 -(dp58 -g32 -S'NC_000009.12:g.137236349_137236351delinsTTT' -p59 -sg34 -(dp60 -g36 -g54 -sg38 -S'CGA' -p61 -sg40 -S'137236349' -p62 -sg42 -g43 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2' -p68 -sssS'flag' -p69 -S'gene_variant' -p70 -sS'metadata' -p71 -(dp72 -S'variantvalidator_hgvs_version' -p73 -S'1.1.3' -p74 -sS'uta_schema' -p75 -S'uta_20180821' -p76 -sS'seqrepo_db' -p77 -S'2018-08-21' -p78 -sS'variantvalidator_version' -p79 -S'v0.2' -p80 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant324.txt b/VariantValidator/testing/testOutputsMasterITS/variant324.txt deleted file mode 100644 index a7f2bd3d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant324.txt +++ /dev/null @@ -1,174 +0,0 @@ -(dp0 -S'NM_080877.2:c.1735_1737delinsTGA' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA -p12 -sS'gene_symbol' -p13 -S'SLC34A3' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_543153.1:p.(Lys579Ter)' -p18 -sS'slr' -p19 -S'NP_543153.1:p.(K579*)' -p20 -ssS'submitted_variant' -p21 -S'NM_080877.2:c.1735_1737delinsTGA' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_080877.2:c.1735_1737delinsTGA' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000009.11:g.140130803_140130805delinsTGA' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr9' -p37 -sS'ref' -p38 -S'AAG' -p39 -sS'pos' -p40 -S'140130803' -p41 -sS'alt' -p42 -S'TGA' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000009.12:g.137236351_137236353delinsTGA' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'AAG' -p48 -sg40 -S'137236351' -p49 -sg42 -g43 -sssS'grch37' -p50 -(dp51 -g32 -S'NC_000009.11:g.140130803_140130805delinsTGA' -p52 -sg34 -(dp53 -g36 -S'9' -p54 -sg38 -S'AAG' -p55 -sg40 -S'140130803' -p56 -sg42 -g43 -sssS'grch38' -p57 -(dp58 -g32 -S'NC_000009.12:g.137236351_137236353delinsTGA' -p59 -sg34 -(dp60 -g36 -g54 -sg38 -S'AAG' -p61 -sg40 -S'137236351' -p62 -sg42 -g43 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2' -p68 -sssS'flag' -p69 -S'gene_variant' -p70 -sS'metadata' -p71 -(dp72 -S'variantvalidator_hgvs_version' -p73 -S'1.1.3' -p74 -sS'uta_schema' -p75 -S'uta_20180821' -p76 -sS'seqrepo_db' -p77 -S'2018-08-21' -p78 -sS'variantvalidator_version' -p79 -S'v0.2' -p80 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant325.txt b/VariantValidator/testing/testOutputsMasterITS/variant325.txt deleted file mode 100644 index 984fed33..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant325.txt +++ /dev/null @@ -1,174 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_080877.2:c.1735_1737delinsTAATTGTTC' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA -p24 -sS'gene_symbol' -p25 -S'SLC34A3' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_543153.1:p.(Lys579Ter)' -p30 -sS'slr' -p31 -S'NP_543153.1:p.(K579*)' -p32 -ssS'submitted_variant' -p33 -S'NM_080877.2:c.1735_1737delinsTAATTGTTC' -p34 -sS'genome_context_intronic_sequence' -p35 -g16 -sS'hgvs_lrg_variant' -p36 -g16 -sS'hgvs_transcript_variant' -p37 -S'NM_080877.2:c.1735_1737delinsTAATTGTTC' -p38 -sS'hgvs_refseqgene_variant' -p39 -g16 -sS'primary_assembly_loci' -p40 -(dp41 -S'hg19' -p42 -(dp43 -S'hgvs_genomic_description' -p44 -S'NC_000009.11:g.140130803_140130805delinsTAATTGTTC' -p45 -sS'vcf' -p46 -(dp47 -S'chr' -p48 -S'chr9' -p49 -sS'ref' -p50 -S'AAG' -p51 -sS'pos' -p52 -S'140130803' -p53 -sS'alt' -p54 -S'TAATTGTTC' -p55 -sssS'hg38' -p56 -(dp57 -g44 -S'NC_000009.12:g.137236351_137236353delinsTAATTGTTC' -p58 -sg46 -(dp59 -g48 -g49 -sg50 -S'AAG' -p60 -sg52 -S'137236351' -p61 -sg54 -g55 -sssS'grch37' -p62 -(dp63 -g44 -S'NC_000009.11:g.140130803_140130805delinsTAATTGTTC' -p64 -sg46 -(dp65 -g48 -S'9' -p66 -sg50 -S'AAG' -p67 -sg52 -S'140130803' -p68 -sg54 -g55 -sssS'grch38' -p69 -(dp70 -g44 -S'NC_000009.12:g.137236351_137236353delinsTAATTGTTC' -p71 -sg46 -(dp72 -g48 -g66 -sg50 -S'AAG' -p73 -sg52 -S'137236351' -p74 -sg54 -g55 -ssssS'reference_sequence_records' -p75 -(dp76 -S'protein' -p77 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1' -p78 -sS'transcript' -p79 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2' -p80 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant326.txt b/VariantValidator/testing/testOutputsMasterITS/variant326.txt deleted file mode 100644 index 91ec5c8c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant326.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_080877.2:c.1737delinsATTGTTC' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA -p14 -sS'gene_symbol' -p15 -S'SLC34A3' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_543153.1:p.(Lys579_Ala580insLeuPhe)' -p20 -sS'slr' -p21 -S'NP_543153.1:p.(K579_A580insLF)' -p22 -ssS'submitted_variant' -p23 -S'NM_080877.2:c.1737delinsATTGTTC' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_080877.2:c.1737delinsATTGTTC' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000009.11:g.140130805delinsATTGTTC' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr9' -p39 -sS'ref' -p40 -S'G' -p41 -sS'pos' -p42 -S'140130805' -p43 -sS'alt' -p44 -S'ATTGTTC' -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000009.12:g.137236353delinsATTGTTC' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'137236353' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000009.11:g.140130805delinsATTGTTC' -p53 -sg36 -(dp54 -g38 -S'9' -p55 -sg40 -g41 -sg42 -S'140130805' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000009.12:g.137236353delinsATTGTTC' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'137236353' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2' -p67 -sssS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant327.txt b/VariantValidator/testing/testOutputsMasterITS/variant327.txt deleted file mode 100644 index f2da1be1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant327.txt +++ /dev/null @@ -1,175 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.4392_*2delinsAGAG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.(Ter1465GluextTer84)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(*1465Eext*84)' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.4392_*2delinsAGAG' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000088.3:c.4392_*2delinsAGAG' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000017.10:g.48262861_48262866delinsCTCT' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -S'GTTTAC' -p41 -sS'pos' -p42 -S'48262861' -p43 -sS'alt' -p44 -VCTCT -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.50185500_50185505delinsCTCT' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'GTTTAC' -p50 -sg42 -S'50185500' -p51 -sg44 -VCTCT -p52 -sssS'grch37' -p53 -(dp54 -g34 -S'NC_000017.10:g.48262861_48262866delinsCTCT' -p55 -sg36 -(dp56 -g38 -S'17' -p57 -sg40 -S'GTTTAC' -p58 -sg42 -S'48262861' -p59 -sg44 -g45 -sssS'grch38' -p60 -(dp61 -g34 -S'NC_000017.11:g.50185500_50185505delinsCTCT' -p62 -sg36 -(dp63 -g38 -g57 -sg40 -S'GTTTAC' -p64 -sg42 -S'50185500' -p65 -sg44 -g52 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p71 -sssS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant328.txt b/VariantValidator/testing/testOutputsMasterITS/variant328.txt deleted file mode 100644 index 6151bb5b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant328.txt +++ /dev/null @@ -1,175 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589_591delinsAGAAGC' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.(Gly197delinsArgSer)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(G197delinsRS)' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.589_591delinsAGAAGC' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000088.3:c.589_591delinsAGAAGC' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000017.10:g.48275361_48275363delinsGCTTCT' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -S'ACC' -p41 -sS'pos' -p42 -S'48275361' -p43 -sS'alt' -p44 -VGCTTCT -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000017.11:g.50198000_50198002delinsGCTTCT' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'ACC' -p50 -sg42 -S'50198000' -p51 -sg44 -VGCTTCT -p52 -sssS'grch37' -p53 -(dp54 -g34 -S'NC_000017.10:g.48275361_48275363delinsGCTTCT' -p55 -sg36 -(dp56 -g38 -S'17' -p57 -sg40 -S'ACC' -p58 -sg42 -S'48275361' -p59 -sg44 -g45 -sssS'grch38' -p60 -(dp61 -g34 -S'NC_000017.11:g.50198000_50198002delinsGCTTCT' -p62 -sg36 -(dp63 -g38 -g57 -sg40 -S'ACC' -p64 -sg42 -S'50198000' -p65 -sg44 -g52 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p71 -sssS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant329.txt b/VariantValidator/testing/testOutputsMasterITS/variant329.txt deleted file mode 100644 index 42b8acfb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant329.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'NM_000885.5:c.*2536delinsAGAAAAATCA' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens integrin subunit alpha 4 (ITGA4), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'ITGA4' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000876.3:p.?' -p18 -sS'slr' -p19 -S'NP_000876.3:p.?' -p20 -ssS'submitted_variant' -p21 -S'NM_000885.5:c.*2536delinsAGAAAAATCA' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_000885.5:c.*2536delinsAGAAAAATCA' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000002.11:g.182402790delinsAGAAAAATCA' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr2' -p37 -sS'ref' -p38 -S'G' -p39 -sS'pos' -p40 -S'182402790' -p41 -sS'alt' -p42 -S'AGAAAAATCA' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000002.12:g.181538063delinsAGAAAAATCA' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'181538063' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000002.11:g.182402790delinsAGAAAAATCA' -p51 -sg34 -(dp52 -g36 -S'2' -p53 -sg38 -g39 -sg40 -S'182402790' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000002.12:g.181538063delinsAGAAAAATCA' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'181538063' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000876.3' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000885.5' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant33.txt b/VariantValidator/testing/testOutputsMasterITS/variant33.txt deleted file mode 100644 index 79f857c0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant33.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'NM_000088.3:c.591_593inv' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p12 -sS'gene_symbol' -p13 -S'COL1A1' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000079.2:p.(Pro198Asp)' -p18 -sS'slr' -p19 -S'NP_000079.2:p.(P198D)' -p20 -ssS'submitted_variant' -p21 -S'NM_000088.3:c.591_593inv' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_000088.3:c.591_593inv' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000017.10:g.48275359_48275361inv' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr17' -p37 -sS'ref' -p38 -S'GGA' -p39 -sS'pos' -p40 -S'48275359' -p41 -sS'alt' -p42 -S'TCC' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000017.11:g.50197998_50198000inv' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'GGA' -p48 -sg40 -S'50197998' -p49 -sg42 -S'TCC' -p50 -sssS'grch37' -p51 -(dp52 -g32 -S'NC_000017.10:g.48275359_48275361inv' -p53 -sg34 -(dp54 -g36 -S'17' -p55 -sg38 -S'GGA' -p56 -sg40 -S'48275359' -p57 -sg42 -S'TCC' -p58 -sssS'grch38' -p59 -(dp60 -g32 -S'NC_000017.11:g.50197998_50198000inv' -p61 -sg34 -(dp62 -g36 -g55 -sg38 -S'GGA' -p63 -sg40 -S'50197998' -p64 -sg42 -S'TCC' -p65 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p71 -sssS'flag' -p72 -S'gene_variant' -p73 -sS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant330.txt b/VariantValidator/testing/testOutputsMasterITS/variant330.txt deleted file mode 100644 index 6f4cace8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant330.txt +++ /dev/null @@ -1,175 +0,0 @@ -(dp0 -S'NM_002693.2:c.-186_-185delinsCC' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'POLG' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_002684.1:p.?' -p18 -sS'slr' -p19 -S'NP_002684.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'NM_002693.2:c.-186_-185delinsCC' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_002693.2:c.-186_-185delinsCC' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'grch38' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000015.10:g.89334698_89334699delinsGG' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'15' -p37 -sS'ref' -p38 -S'CT' -p39 -sS'pos' -p40 -S'89334698' -p41 -sS'alt' -p42 -VGG -p43 -sssS'grch37' -p44 -(dp45 -g32 -S'NC_000015.9:g.89877929_89877930delinsGG' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'CT' -p48 -sg40 -S'89877929' -p49 -sg42 -VGG -p50 -sssS'hg38' -p51 -(dp52 -g32 -S'NC_000015.10:g.89334698_89334699delinsGG' -p53 -sg34 -(dp54 -g36 -S'chr15' -p55 -sg38 -S'CT' -p56 -sg40 -S'89334698' -p57 -sg42 -g43 -sssS'hg19' -p58 -(dp59 -g32 -S'NC_000015.9:g.89877929_89877930delinsGG' -p60 -sg34 -(dp61 -g36 -g55 -sg38 -S'CT' -p62 -sg40 -S'89877929' -p63 -sg42 -g50 -ssssS'reference_sequence_records' -p64 -(dp65 -S'protein' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1' -p67 -sS'transcript' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2' -p69 -sssS'flag' -p70 -S'gene_variant' -p71 -sS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant331.txt b/VariantValidator/testing/testOutputsMasterITS/variant331.txt deleted file mode 100644 index daa65a33..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant331.txt +++ /dev/null @@ -1,533 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_001287344.1:c.690_690+1insCTACATAG' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -(dp23 -S'grch37' -p24 -(dp25 -S'hgvs_genomic_description' -p26 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p27 -sS'vcf' -p28 -(dp29 -S'chr' -p30 -S'HG1439_PATCH' -p31 -sS'ref' -p32 -S'C' -p33 -sS'pos' -p34 -S'43848' -p35 -sS'alt' -p36 -VCCTATGTAG -p37 -sssa(dp38 -S'hg19' -p39 -(dp40 -g26 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p41 -sg28 -(dp42 -g30 -S'NW_004070883.1' -p43 -sg32 -g33 -sg34 -S'43848' -p44 -sg36 -VCCTATGTAG -p45 -sssasS'transcript_description' -p46 -VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 3, mRNA -p47 -sS'gene_symbol' -p48 -S'BTK' -p49 -sS'hgvs_predicted_protein_consequence' -p50 -(dp51 -S'tlr' -p52 -S'NP_001274273.1:p.?' -p53 -sS'slr' -p54 -S'NP_001274273.1:p.?' -p55 -ssS'submitted_variant' -p56 -S'NG_009616.1:g.29052_29053insCTACATAG' -p57 -sS'genome_context_intronic_sequence' -p58 -S'NC_000023.10(NM_001287344.1):c.690_690+1insCTACATAG' -p59 -sS'hgvs_lrg_variant' -p60 -g16 -sS'hgvs_transcript_variant' -p61 -S'NM_001287344.1:c.690_690+1insCTACATAG' -p62 -sS'hgvs_refseqgene_variant' -p63 -g16 -sS'primary_assembly_loci' -p64 -(dp65 -S'hg19' -p66 -(dp67 -g26 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p68 -sg28 -(dp69 -g30 -S'chrX' -p70 -sg32 -g33 -sg34 -S'100617160' -p71 -sg36 -VCCTATGTAG -p72 -sssS'hg38' -p73 -(dp74 -g26 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p75 -sg28 -(dp76 -g30 -g70 -sg32 -g33 -sg34 -S'101362172' -p77 -sg36 -VCCTATGTAG -p78 -sssS'grch37' -p79 -(dp80 -g26 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p81 -sg28 -(dp82 -g30 -S'X' -p83 -sg32 -g33 -sg34 -S'100617160' -p84 -sg36 -VCCTATGTAG -p85 -sssS'grch38' -p86 -(dp87 -g26 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p88 -sg28 -(dp89 -g30 -g83 -sg32 -g33 -sg34 -S'101362172' -p90 -sg36 -VCCTATGTAG -p91 -ssssS'reference_sequence_records' -p92 -(dp93 -S'protein' -p94 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274273.1' -p95 -sS'transcript' -p96 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287344.1' -p97 -sssS'NM_001287345.1:c.588_588+1insCTACATAG' -p98 -(dp99 -g15 -g16 -sg17 -(lp100 -S'RefSeqGene record not available' -p101 -asg20 -g16 -sg21 -(lp102 -(dp103 -S'grch37' -p104 -(dp105 -g26 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p106 -sg28 -(dp107 -g30 -g31 -sg32 -g33 -sg34 -S'43848' -p108 -sg36 -VCCTATGTAG -p109 -sssa(dp110 -S'hg19' -p111 -(dp112 -g26 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p113 -sg28 -(dp114 -g30 -S'NW_004070883.1' -p115 -sg32 -g33 -sg34 -S'43848' -p116 -sg36 -VCCTATGTAG -p117 -sssasg46 -VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 2, mRNA -p118 -sg48 -S'BTK' -p119 -sg50 -(dp120 -g52 -S'NP_001274274.1:p.?' -p121 -sg54 -S'NP_001274274.1:p.?' -p122 -ssg56 -g57 -sg58 -S'NC_000023.10(NM_001287345.1):c.588_588+1insCTACATAG' -p123 -sg60 -g16 -sg61 -S'NM_001287345.1:c.588_588+1insCTACATAG' -p124 -sg63 -g16 -sg64 -(dp125 -S'hg19' -p126 -(dp127 -g26 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p128 -sg28 -(dp129 -g30 -g70 -sg32 -g33 -sg34 -S'100617160' -p130 -sg36 -VCCTATGTAG -p131 -sssg73 -(dp132 -g26 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p133 -sg28 -(dp134 -g30 -g70 -sg32 -g33 -sg34 -S'101362172' -p135 -sg36 -VCCTATGTAG -p136 -sssS'grch37' -p137 -(dp138 -g26 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p139 -sg28 -(dp140 -g30 -g83 -sg32 -g33 -sg34 -S'100617160' -p141 -sg36 -VCCTATGTAG -p142 -sssS'grch38' -p143 -(dp144 -g26 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p145 -sg28 -(dp146 -g30 -g83 -sg32 -g33 -sg34 -S'101362172' -p147 -sg36 -VCCTATGTAG -p148 -ssssg92 -(dp149 -g94 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274274.1' -p150 -sg96 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287345.1' -p151 -sssS'NM_000061.2:c.588_588+1insCTACATAG' -p152 -(dp153 -g15 -g16 -sg17 -(lp154 -S'RefSeqGene record not available' -p155 -asg20 -g16 -sg21 -(lp156 -(dp157 -S'grch37' -p158 -(dp159 -g26 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p160 -sg28 -(dp161 -g30 -g31 -sg32 -g33 -sg34 -S'43848' -p162 -sg36 -VCCTATGTAG -p163 -sssa(dp164 -S'hg19' -p165 -(dp166 -g26 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p167 -sg28 -(dp168 -g30 -S'NW_004070883.1' -p169 -sg32 -g33 -sg34 -S'43848' -p170 -sg36 -VCCTATGTAG -p171 -sssasg46 -VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA -p172 -sg48 -S'BTK' -p173 -sg50 -(dp174 -g52 -S'NP_000052.1:p.?' -p175 -sg54 -S'NP_000052.1:p.?' -p176 -ssg56 -g57 -sg58 -S'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' -p177 -sg60 -g16 -sg61 -S'NM_000061.2:c.588_588+1insCTACATAG' -p178 -sg63 -g16 -sg64 -(dp179 -S'hg19' -p180 -(dp181 -g26 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p182 -sg28 -(dp183 -g30 -g70 -sg32 -g33 -sg34 -S'100617160' -p184 -sg36 -VCCTATGTAG -p185 -sssg73 -(dp186 -g26 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p187 -sg28 -(dp188 -g30 -g70 -sg32 -g33 -sg34 -S'101362172' -p189 -sg36 -VCCTATGTAG -p190 -sssS'grch37' -p191 -(dp192 -g26 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p193 -sg28 -(dp194 -g30 -g83 -sg32 -g33 -sg34 -S'100617160' -p195 -sg36 -VCCTATGTAG -p196 -sssS'grch38' -p197 -(dp198 -g26 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p199 -sg28 -(dp200 -g30 -g83 -sg32 -g33 -sg34 -S'101362172' -p201 -sg36 -VCCTATGTAG -p202 -ssssg92 -(dp203 -g94 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1' -p204 -sg96 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2' -p205 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant332.txt b/VariantValidator/testing/testOutputsMasterITS/variant332.txt deleted file mode 100644 index d21731b7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant332.txt +++ /dev/null @@ -1,215 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000061.2:c.588_588+1insCTACATAG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -(dp13 -S'grch37' -p14 -(dp15 -S'hgvs_genomic_description' -p16 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p17 -sS'vcf' -p18 -(dp19 -S'chr' -p20 -S'HG1439_PATCH' -p21 -sS'ref' -p22 -S'C' -p23 -sS'pos' -p24 -S'43848' -p25 -sS'alt' -p26 -VCCTATGTAG -p27 -sssa(dp28 -S'hg19' -p29 -(dp30 -g16 -S'NW_004070883.1:g.43848_43849insCTATGTAG' -p31 -sg18 -(dp32 -g20 -S'NW_004070883.1' -p33 -sg22 -g23 -sg24 -S'43848' -p34 -sg26 -VCCTATGTAG -p35 -sssasS'transcript_description' -p36 -VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA -p37 -sS'gene_symbol' -p38 -S'BTK' -p39 -sS'hgvs_predicted_protein_consequence' -p40 -(dp41 -S'tlr' -p42 -S'NP_000052.1:p.?' -p43 -sS'slr' -p44 -S'NP_000052.1:p.?' -p45 -ssS'submitted_variant' -p46 -S'NM_000061.2:c.588_588+1insCTACATAG' -p47 -sS'genome_context_intronic_sequence' -p48 -S'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' -p49 -sS'hgvs_lrg_variant' -p50 -g6 -sS'hgvs_transcript_variant' -p51 -S'NM_000061.2:c.588_588+1insCTACATAG' -p52 -sS'hgvs_refseqgene_variant' -p53 -g6 -sS'primary_assembly_loci' -p54 -(dp55 -S'hg19' -p56 -(dp57 -g16 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p58 -sg18 -(dp59 -g20 -S'chrX' -p60 -sg22 -g23 -sg24 -S'100617160' -p61 -sg26 -VCCTATGTAG -p62 -sssS'hg38' -p63 -(dp64 -g16 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p65 -sg18 -(dp66 -g20 -g60 -sg22 -g23 -sg24 -S'101362172' -p67 -sg26 -VCCTATGTAG -p68 -sssS'grch37' -p69 -(dp70 -g16 -S'NC_000023.10:g.100617160_100617161insCTATGTAG' -p71 -sg18 -(dp72 -g20 -S'X' -p73 -sg22 -g23 -sg24 -S'100617160' -p74 -sg26 -VCCTATGTAG -p75 -sssS'grch38' -p76 -(dp77 -g16 -S'NC_000023.11:g.101362172_101362173insCTATGTAG' -p78 -sg18 -(dp79 -g20 -g73 -sg22 -g23 -sg24 -S'101362172' -p80 -sg26 -VCCTATGTAG -p81 -ssssS'reference_sequence_records' -p82 -(dp83 -S'protein' -p84 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1' -p85 -sS'transcript' -p86 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2' -p87 -sssS'metadata' -p88 -(dp89 -S'variantvalidator_hgvs_version' -p90 -S'1.1.3' -p91 -sS'uta_schema' -p92 -S'uta_20180821' -p93 -sS'seqrepo_db' -p94 -S'2018-08-21' -p95 -sS'variantvalidator_version' -p96 -S'v0.2' -p97 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant333.txt b/VariantValidator/testing/testOutputsMasterITS/variant333.txt deleted file mode 100644 index fa35e680..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant333.txt +++ /dev/null @@ -1,142 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000061.2:c.588_589insCTACATAG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'This coding sequence variant description spans at least one intron' -p9 -aS'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g6 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA -p16 -sS'gene_symbol' -p17 -S'BTK' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_000052.1:p.(Ile197LeufsTer5)' -p22 -sS'slr' -p23 -S'NP_000052.1:p.(I197Lfs*5)' -p24 -ssS'submitted_variant' -p25 -S'NM_000061.2:c.588_589insCTACATAG' -p26 -sS'genome_context_intronic_sequence' -p27 -g6 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000061.2:c.588_589insCTACATAG' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chrX' -p41 -sS'ref' -p42 -S'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC' -p43 -sS'pos' -p44 -S'100615751' -p45 -sS'alt' -p46 -S'G' -p47 -sssS'grch37' -p48 -(dp49 -g36 -S'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC' -p50 -sg38 -(dp51 -g40 -S'X' -p52 -sg42 -S'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC' -p53 -sg44 -S'100615751' -p54 -sg46 -g47 -ssssS'reference_sequence_records' -p55 -(dp56 -S'protein' -p57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1' -p58 -sS'transcript' -p59 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2' -p60 -sssS'metadata' -p61 -(dp62 -S'variantvalidator_hgvs_version' -p63 -S'1.1.3' -p64 -sS'uta_schema' -p65 -S'uta_20180821' -p66 -sS'seqrepo_db' -p67 -S'2018-08-21' -p68 -sS'variantvalidator_version' -p69 -S'v0.2' -p70 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant34.txt b/VariantValidator/testing/testOutputsMasterITS/variant34.txt deleted file mode 100644 index a1dc82a1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant34.txt +++ /dev/null @@ -1,259 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_000518.5:c.20A>T' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens hemoglobin subunit beta (HBB), mRNA -p24 -sS'gene_symbol' -p25 -S'HBB' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_000509.1:p.(Glu7Val)' -p30 -sS'slr' -p31 -S'NP_000509.1:p.(E7V)' -p32 -ssS'submitted_variant' -p33 -S'11-5248232-T-A' -p34 -sS'genome_context_intronic_sequence' -p35 -g16 -sS'hgvs_lrg_variant' -p36 -g16 -sS'hgvs_transcript_variant' -p37 -S'NM_000518.5:c.20A>T' -p38 -sS'hgvs_refseqgene_variant' -p39 -g16 -sS'primary_assembly_loci' -p40 -(dp41 -S'hg19' -p42 -(dp43 -S'hgvs_genomic_description' -p44 -S'NC_000011.9:g.5248232T>A' -p45 -sS'vcf' -p46 -(dp47 -S'chr' -p48 -S'chr11' -p49 -sS'ref' -p50 -VT -p51 -sS'pos' -p52 -S'5248232' -p53 -sS'alt' -p54 -VA -p55 -sssS'grch37' -p56 -(dp57 -g44 -S'NC_000011.9:g.5248232T>A' -p58 -sg46 -(dp59 -g48 -S'11' -p60 -sg50 -g51 -sg52 -S'5248232' -p61 -sg54 -g55 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.5' -p67 -sssS'NM_000518.4:c.20A>T' -p68 -(dp69 -g15 -g16 -sg17 -(lp70 -S'A more recent version of the selected reference sequence NM_000518.4 is available (NM_000518.5)' -p71 -aS'NM_000518.5:c.20A>T MUST be fully validated prior to use in reports' -p72 -aS'select_variants=NM_000518.5:c.20A>T' -p73 -aS'RefSeqGene record not available' -p74 -asg20 -g16 -sg21 -(lp75 -sg23 -VHomo sapiens hemoglobin subunit beta (HBB), mRNA -p76 -sg25 -S'HBB' -p77 -sg27 -(dp78 -g29 -S'NP_000509.1:p.(Glu7Val)' -p79 -sg31 -S'NP_000509.1:p.(E7V)' -p80 -ssg33 -g34 -sg35 -g16 -sg36 -g16 -sg37 -S'NM_000518.4:c.20A>T' -p81 -sg39 -g16 -sg40 -(dp82 -S'grch38' -p83 -(dp84 -g44 -S'NC_000011.10:g.5227002T>A' -p85 -sg46 -(dp86 -g48 -g60 -sg50 -g51 -sg52 -S'5227002' -p87 -sg54 -g55 -sssS'grch37' -p88 -(dp89 -g44 -S'NC_000011.9:g.5248232T>A' -p90 -sg46 -(dp91 -g48 -g60 -sg50 -g51 -sg52 -S'5248232' -p92 -sg54 -g55 -sssS'hg38' -p93 -(dp94 -g44 -S'NC_000011.10:g.5227002T>A' -p95 -sg46 -(dp96 -g48 -g49 -sg50 -g51 -sg52 -S'5227002' -p97 -sg54 -g55 -sssS'hg19' -p98 -(dp99 -g44 -S'NC_000011.9:g.5248232T>A' -p100 -sg46 -(dp101 -g48 -g49 -sg50 -g51 -sg52 -S'5248232' -p102 -sg54 -g55 -ssssg62 -(dp103 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1' -p104 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.4' -p105 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant35.txt b/VariantValidator/testing/testOutputsMasterITS/variant35.txt deleted file mode 100644 index 26ca558f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant35.txt +++ /dev/null @@ -1,172 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-1G>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NG_007400.1(NM_000088.3):c.589-1G>T' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.589-1G>T' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000088.3:c.589-1G>T' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000017.10:g.48275364C>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr17' -p40 -sS'ref' -p41 -VC -p42 -sS'pos' -p43 -S'48275364' -p44 -sS'alt' -p45 -VA -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000017.11:g.50198003C>A' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'50198003' -p51 -sg45 -g46 -sssS'grch37' -p52 -(dp53 -g35 -S'NC_000017.10:g.48275364C>A' -p54 -sg37 -(dp55 -g39 -S'17' -p56 -sg41 -g42 -sg43 -S'48275364' -p57 -sg45 -g46 -sssS'grch38' -p58 -(dp59 -g35 -S'NC_000017.11:g.50198003C>A' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'50198003' -p62 -sg45 -g46 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p68 -sssS'metadata' -p69 -(dp70 -S'variantvalidator_hgvs_version' -p71 -S'1.1.3' -p72 -sS'uta_schema' -p73 -S'uta_20180821' -p74 -sS'seqrepo_db' -p75 -S'2018-08-21' -p76 -sS'variantvalidator_version' -p77 -S'v0.2' -p78 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant36.txt b/VariantValidator/testing/testOutputsMasterITS/variant36.txt deleted file mode 100644 index 38461988..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant36.txt +++ /dev/null @@ -1,402 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_182763.2:c.688+403C>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA -p14 -sS'gene_symbol' -p15 -S'MCL1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_877495.1:p.?' -p20 -sS'slr' -p21 -S'NP_877495.1:p.?' -p22 -ssS'submitted_variant' -p23 -S'1:150550916G>A' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000001.10(NM_182763.2):c.688+403C>T' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_182763.2:c.688+403C>T' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000001.10:g.150550916G>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr1' -p40 -sS'ref' -p41 -VG -p42 -sS'pos' -p43 -S'150550916' -p44 -sS'alt' -p45 -VA -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000001.11:g.150578440G>A' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p51 -sg45 -g46 -sssS'grch37' -p52 -(dp53 -g35 -S'NC_000001.10:g.150550916G>A' -p54 -sg37 -(dp55 -g39 -S'1' -p56 -sg41 -g42 -sg43 -S'150550916' -p57 -sg45 -g46 -sssS'grch38' -p58 -(dp59 -g35 -S'NC_000001.11:g.150578440G>A' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p62 -sg45 -g46 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2' -p68 -sssS'NM_001197320.1:c.281C>T' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'RefSeqGene record not available' -p72 -asg10 -g6 -sg11 -(lp73 -sg13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA -p74 -sg15 -S'MCL1' -p75 -sg17 -(dp76 -g19 -S'NP_001184249.1:p.(Ser94Phe)' -p77 -sg21 -S'NP_001184249.1:p.(S94F)' -p78 -ssg23 -g24 -sg25 -g6 -sg27 -g6 -sg28 -S'NM_001197320.1:c.281C>T' -p79 -sg30 -g6 -sg31 -(dp80 -S'hg19' -p81 -(dp82 -g35 -S'NC_000001.10:g.150550916G>A' -p83 -sg37 -(dp84 -g39 -g40 -sg41 -g42 -sg43 -S'150550916' -p85 -sg45 -g46 -sssg47 -(dp86 -g35 -S'NC_000001.11:g.150578440G>A' -p87 -sg37 -(dp88 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p89 -sg45 -g46 -sssS'grch37' -p90 -(dp91 -g35 -S'NC_000001.10:g.150550916G>A' -p92 -sg37 -(dp93 -g39 -g56 -sg41 -g42 -sg43 -S'150550916' -p94 -sg45 -g46 -sssS'grch38' -p95 -(dp96 -g35 -S'NC_000001.11:g.150578440G>A' -p97 -sg37 -(dp98 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p99 -sg45 -g46 -ssssg63 -(dp100 -g65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1' -p101 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1' -p102 -sssS'metadata' -p103 -(dp104 -S'variantvalidator_hgvs_version' -p105 -S'1.1.3' -p106 -sS'uta_schema' -p107 -S'uta_20180821' -p108 -sS'seqrepo_db' -p109 -S'2018-08-21' -p110 -sS'variantvalidator_version' -p111 -S'v0.2' -p112 -ssS'NM_021960.4:c.740C>T' -p113 -(dp114 -g5 -g6 -sg7 -(lp115 -S'RefSeqGene record not available' -p116 -asg10 -g6 -sg11 -(lp117 -sg13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA -p118 -sg15 -S'MCL1' -p119 -sg17 -(dp120 -g19 -S'NP_068779.1:p.(Ser247Phe)' -p121 -sg21 -S'NP_068779.1:p.(S247F)' -p122 -ssg23 -g24 -sg25 -g6 -sg27 -g6 -sg28 -S'NM_021960.4:c.740C>T' -p123 -sg30 -g6 -sg31 -(dp124 -S'hg19' -p125 -(dp126 -g35 -S'NC_000001.10:g.150550916G>A' -p127 -sg37 -(dp128 -g39 -g40 -sg41 -g42 -sg43 -S'150550916' -p129 -sg45 -g46 -sssg47 -(dp130 -g35 -S'NC_000001.11:g.150578440G>A' -p131 -sg37 -(dp132 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p133 -sg45 -g46 -sssS'grch37' -p134 -(dp135 -g35 -S'NC_000001.10:g.150550916G>A' -p136 -sg37 -(dp137 -g39 -g56 -sg41 -g42 -sg43 -S'150550916' -p138 -sg45 -g46 -sssS'grch38' -p139 -(dp140 -g35 -S'NC_000001.11:g.150578440G>A' -p141 -sg37 -(dp142 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p143 -sg45 -g46 -ssssg63 -(dp144 -g65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1' -p145 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4' -p146 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant37.txt b/VariantValidator/testing/testOutputsMasterITS/variant37.txt deleted file mode 100644 index 4337a37d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant37.txt +++ /dev/null @@ -1,402 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_182763.2:c.688+403C>T' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA -p14 -sS'gene_symbol' -p15 -S'MCL1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_877495.1:p.?' -p20 -sS'slr' -p21 -S'NP_877495.1:p.?' -p22 -ssS'submitted_variant' -p23 -S'1-150550916-G-A' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000001.10(NM_182763.2):c.688+403C>T' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_182763.2:c.688+403C>T' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000001.10:g.150550916G>A' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr1' -p40 -sS'ref' -p41 -VG -p42 -sS'pos' -p43 -S'150550916' -p44 -sS'alt' -p45 -VA -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000001.11:g.150578440G>A' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p51 -sg45 -g46 -sssS'grch37' -p52 -(dp53 -g35 -S'NC_000001.10:g.150550916G>A' -p54 -sg37 -(dp55 -g39 -S'1' -p56 -sg41 -g42 -sg43 -S'150550916' -p57 -sg45 -g46 -sssS'grch38' -p58 -(dp59 -g35 -S'NC_000001.11:g.150578440G>A' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p62 -sg45 -g46 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2' -p68 -sssS'NM_001197320.1:c.281C>T' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'RefSeqGene record not available' -p72 -asg10 -g6 -sg11 -(lp73 -sg13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA -p74 -sg15 -S'MCL1' -p75 -sg17 -(dp76 -g19 -S'NP_001184249.1:p.(Ser94Phe)' -p77 -sg21 -S'NP_001184249.1:p.(S94F)' -p78 -ssg23 -g24 -sg25 -g6 -sg27 -g6 -sg28 -S'NM_001197320.1:c.281C>T' -p79 -sg30 -g6 -sg31 -(dp80 -S'hg19' -p81 -(dp82 -g35 -S'NC_000001.10:g.150550916G>A' -p83 -sg37 -(dp84 -g39 -g40 -sg41 -g42 -sg43 -S'150550916' -p85 -sg45 -g46 -sssg47 -(dp86 -g35 -S'NC_000001.11:g.150578440G>A' -p87 -sg37 -(dp88 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p89 -sg45 -g46 -sssS'grch37' -p90 -(dp91 -g35 -S'NC_000001.10:g.150550916G>A' -p92 -sg37 -(dp93 -g39 -g56 -sg41 -g42 -sg43 -S'150550916' -p94 -sg45 -g46 -sssS'grch38' -p95 -(dp96 -g35 -S'NC_000001.11:g.150578440G>A' -p97 -sg37 -(dp98 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p99 -sg45 -g46 -ssssg63 -(dp100 -g65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1' -p101 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1' -p102 -sssS'metadata' -p103 -(dp104 -S'variantvalidator_hgvs_version' -p105 -S'1.1.3' -p106 -sS'uta_schema' -p107 -S'uta_20180821' -p108 -sS'seqrepo_db' -p109 -S'2018-08-21' -p110 -sS'variantvalidator_version' -p111 -S'v0.2' -p112 -ssS'NM_021960.4:c.740C>T' -p113 -(dp114 -g5 -g6 -sg7 -(lp115 -S'RefSeqGene record not available' -p116 -asg10 -g6 -sg11 -(lp117 -sg13 -VHomo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA -p118 -sg15 -S'MCL1' -p119 -sg17 -(dp120 -g19 -S'NP_068779.1:p.(Ser247Phe)' -p121 -sg21 -S'NP_068779.1:p.(S247F)' -p122 -ssg23 -g24 -sg25 -g6 -sg27 -g6 -sg28 -S'NM_021960.4:c.740C>T' -p123 -sg30 -g6 -sg31 -(dp124 -S'hg19' -p125 -(dp126 -g35 -S'NC_000001.10:g.150550916G>A' -p127 -sg37 -(dp128 -g39 -g40 -sg41 -g42 -sg43 -S'150550916' -p129 -sg45 -g46 -sssg47 -(dp130 -g35 -S'NC_000001.11:g.150578440G>A' -p131 -sg37 -(dp132 -g39 -g40 -sg41 -g42 -sg43 -S'150578440' -p133 -sg45 -g46 -sssS'grch37' -p134 -(dp135 -g35 -S'NC_000001.10:g.150550916G>A' -p136 -sg37 -(dp137 -g39 -g56 -sg41 -g42 -sg43 -S'150550916' -p138 -sg45 -g46 -sssS'grch38' -p139 -(dp140 -g35 -S'NC_000001.11:g.150578440G>A' -p141 -sg37 -(dp142 -g39 -g56 -sg41 -g42 -sg43 -S'150578440' -p143 -sg45 -g46 -ssssg63 -(dp144 -g65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1' -p145 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4' -p146 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant38.txt b/VariantValidator/testing/testOutputsMasterITS/variant38.txt deleted file mode 100644 index 2cbceff8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant38.txt +++ /dev/null @@ -1,82 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' -p7 -aS'For additional assistance, submit NG_008123.1:c.2055+18G>A to VariantValidator' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'hgvs_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NG_008123.1(LEPRE1_v003):c.2055+18G>A' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'hgvs_lrg_variant' -p21 -g4 -sS'hgvs_transcript_variant' -p22 -g4 -sS'hgvs_refseqgene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -sS'reference_sequence_records' -p26 -g4 -ssS'flag' -p27 -S'warning' -p28 -sS'metadata' -p29 -(dp30 -S'variantvalidator_hgvs_version' -p31 -S'1.1.3' -p32 -sS'uta_schema' -p33 -S'uta_20180821' -p34 -sS'seqrepo_db' -p35 -S'2018-08-21' -p36 -sS'variantvalidator_version' -p37 -S'v0.2' -p38 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant39.txt b/VariantValidator/testing/testOutputsMasterITS/variant39.txt deleted file mode 100644 index c07a1c1d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant39.txt +++ /dev/null @@ -1,80 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -g4 -sS'gene_symbol' -p12 -g4 -sS'hgvs_predicted_protein_consequence' -p13 -(dp14 -S'tlr' -p15 -g4 -sS'slr' -p16 -g4 -ssS'submitted_variant' -p17 -S'NG_008123.1:c.2055+18G>A' -p18 -sS'genome_context_intronic_sequence' -p19 -g4 -sS'hgvs_lrg_variant' -p20 -g4 -sS'hgvs_transcript_variant' -p21 -g4 -sS'hgvs_refseqgene_variant' -p22 -g4 -sS'primary_assembly_loci' -p23 -(dp24 -sS'reference_sequence_records' -p25 -g4 -ssS'flag' -p26 -S'warning' -p27 -sS'metadata' -p28 -(dp29 -S'variantvalidator_hgvs_version' -p30 -S'1.1.3' -p31 -sS'uta_schema' -p32 -S'uta_20180821' -p33 -sS'seqrepo_db' -p34 -S'2018-08-21' -p35 -sS'variantvalidator_version' -p36 -S'v0.2' -p37 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant4.txt b/VariantValidator/testing/testOutputsMasterITS/variant4.txt deleted file mode 100644 index 9ff6b60e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant4.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_015120.4:c.34C>T' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p24 -sS'gene_symbol' -p25 -S'ALMS1' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_055935.4:p.(Leu12=)' -p30 -sS'slr' -p31 -S'NP_055935.4:p.(L12=)' -p32 -ssS'submitted_variant' -p33 -S'NC_000002.11:g.73613030C>T' -p34 -sS'genome_context_intronic_sequence' -p35 -g16 -sS'hgvs_lrg_variant' -p36 -g16 -sS'hgvs_transcript_variant' -p37 -S'NM_015120.4:c.34C>T' -p38 -sS'hgvs_refseqgene_variant' -p39 -g16 -sS'primary_assembly_loci' -p40 -(dp41 -S'hg19' -p42 -(dp43 -S'hgvs_genomic_description' -p44 -S'NC_000002.11:g.73613030C>T' -p45 -sS'vcf' -p46 -(dp47 -S'chr' -p48 -S'chr2' -p49 -sS'ref' -p50 -VC -p51 -sS'pos' -p52 -S'73613030' -p53 -sS'alt' -p54 -VT -p55 -sssS'hg38' -p56 -(dp57 -g44 -S'NC_000002.12:g.73385902C>T' -p58 -sg46 -(dp59 -g48 -g49 -sg50 -g51 -sg52 -S'73385902' -p60 -sg54 -g55 -sssS'grch37' -p61 -(dp62 -g44 -S'NC_000002.11:g.73613030C>T' -p63 -sg46 -(dp64 -g48 -S'2' -p65 -sg50 -g51 -sg52 -S'73613030' -p66 -sg54 -g55 -sssS'grch38' -p67 -(dp68 -g44 -S'NC_000002.12:g.73385902C>T' -p69 -sg46 -(dp70 -g48 -g65 -sg50 -g51 -sg52 -S'73385902' -p71 -sg54 -g55 -ssssS'reference_sequence_records' -p72 -(dp73 -S'protein' -p74 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' -p75 -sS'transcript' -p76 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' -p77 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant40.txt b/VariantValidator/testing/testOutputsMasterITS/variant40.txt deleted file mode 100644 index f5bc3f75..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant40.txt +++ /dev/null @@ -1,172 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_022356.3:c.2055+18G>A' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'P3H1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_071751.3:p.?' -p20 -sS'slr' -p21 -S'NP_071751.3:p.?' -p22 -ssS'submitted_variant' -p23 -S'NG_008123.1(NM_022356.3):c.2055+18G>A' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000001.10(NM_022356.3):c.2055+18G>A' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_022356.3:c.2055+18G>A' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000001.10:g.43212925C>T' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr1' -p40 -sS'ref' -p41 -VC -p42 -sS'pos' -p43 -S'43212925' -p44 -sS'alt' -p45 -VT -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000001.11:g.42747254C>T' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'42747254' -p51 -sg45 -g46 -sssS'grch37' -p52 -(dp53 -g35 -S'NC_000001.10:g.43212925C>T' -p54 -sg37 -(dp55 -g39 -S'1' -p56 -sg41 -g42 -sg43 -S'43212925' -p57 -sg45 -g46 -sssS'grch38' -p58 -(dp59 -g35 -S'NC_000001.11:g.42747254C>T' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'42747254' -p62 -sg45 -g46 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3' -p68 -sssS'metadata' -p69 -(dp70 -S'variantvalidator_hgvs_version' -p71 -S'1.1.3' -p72 -sS'uta_schema' -p73 -S'uta_20180821' -p74 -sS'seqrepo_db' -p75 -S'2018-08-21' -p76 -sS'variantvalidator_version' -p77 -S'v0.2' -p78 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant41.txt b/VariantValidator/testing/testOutputsMasterITS/variant41.txt deleted file mode 100644 index 9d99ffdb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant41.txt +++ /dev/null @@ -1,293 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_021983.4:c.490G>C' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -aS'NM_021983.4:c.490G>C cannot be mapped directly to genome build GRCh37' -p20 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p21 -asS'refseqgene_context_intronic_sequence' -p22 -g16 -sS'alt_genomic_loci' -p23 -(lp24 -(dp25 -S'grch37' -p26 -(dp27 -S'hgvs_genomic_description' -p28 -S'NT_167246.1:g.3848158T>G' -p29 -sS'vcf' -p30 -(dp31 -S'chr' -p32 -S'HSCHR6_MHC_MANN_CTG1' -p33 -sS'ref' -p34 -S'T' -p35 -sS'pos' -p36 -S'3848158' -p37 -sS'alt' -p38 -VG -p39 -sssa(dp40 -S'hg19' -p41 -(dp42 -g28 -S'NT_167246.1:g.3848158T>G' -p43 -sg30 -(dp44 -g32 -S'chr6_mann_hap4' -p45 -sg34 -g35 -sg36 -S'3848158' -p46 -sg38 -g39 -sssa(dp47 -S'grch38' -p48 -(dp49 -g28 -S'NT_167246.2:g.3842538T>G' -p50 -sg30 -(dp51 -g32 -g33 -sg34 -g35 -sg36 -S'3842538' -p52 -sg38 -g39 -sssa(dp53 -S'hg38' -p54 -(dp55 -g28 -S'NT_167246.2:g.3842538T>G' -p56 -sg30 -(dp57 -g32 -S'chr6_GL000253v2_alt' -p58 -sg34 -g35 -sg36 -S'3842538' -p59 -sg38 -g39 -sssa(dp60 -S'grch37' -p61 -(dp62 -g28 -S'NT_167247.1:g.3884432C>G' -p63 -sg30 -(dp64 -g32 -S'HSCHR6_MHC_MCF_CTG1' -p65 -sg34 -VC -p66 -sg36 -S'3884432' -p67 -sg38 -g39 -sssa(dp68 -S'hg19' -p69 -(dp70 -g28 -S'NT_167247.1:g.3884432C>G' -p71 -sg30 -(dp72 -g32 -S'chr6_mcf_hap5' -p73 -sg34 -g66 -sg36 -S'3884432' -p74 -sg38 -g39 -sssa(dp75 -S'grch37' -p76 -(dp77 -g28 -S'NT_167249.1:g.3852542C>G' -p78 -sg30 -(dp79 -g32 -S'HSCHR6_MHC_SSTO_CTG1' -p80 -sg34 -g66 -sg36 -S'3852542' -p81 -sg38 -g39 -sssa(dp82 -S'hg19' -p83 -(dp84 -g28 -S'NT_167249.1:g.3852542C>G' -p85 -sg30 -(dp86 -g32 -S'chr6_ssto_hap7' -p87 -sg34 -g66 -sg36 -S'3852542' -p88 -sg38 -g39 -sssa(dp89 -S'grch38' -p90 -(dp91 -g28 -S'NT_167249.2:g.3853244C>G' -p92 -sg30 -(dp93 -g32 -g80 -sg34 -g66 -sg36 -S'3853244' -p94 -sg38 -g39 -sssa(dp95 -g54 -(dp96 -g28 -S'NT_167249.2:g.3853244C>G' -p97 -sg30 -(dp98 -g32 -S'chr6_GL000256v2_alt' -p99 -sg34 -g66 -sg36 -S'3853244' -p100 -sg38 -g39 -sssasS'transcript_description' -p101 -VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA -p102 -sS'gene_symbol' -p103 -S'HLA-DRB4' -p104 -sS'hgvs_predicted_protein_consequence' -p105 -(dp106 -S'tlr' -p107 -S'NP_068818.4:p.(Gly164Arg)' -p108 -sS'slr' -p109 -S'NP_068818.4:p.(G164R)' -p110 -ssS'submitted_variant' -p111 -S'NM_021983.4:c.490G>C' -p112 -sS'genome_context_intronic_sequence' -p113 -g16 -sS'hgvs_lrg_variant' -p114 -g16 -sS'hgvs_transcript_variant' -p115 -S'NM_021983.4:c.490G>C' -p116 -sS'hgvs_refseqgene_variant' -p117 -g16 -sS'primary_assembly_loci' -p118 -(dp119 -sS'reference_sequence_records' -p120 -(dp121 -S'protein' -p122 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4' -p123 -sS'transcript' -p124 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4' -p125 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant42.txt b/VariantValidator/testing/testOutputsMasterITS/variant42.txt deleted file mode 100644 index 9b6cb56e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant42.txt +++ /dev/null @@ -1,564 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032470.3:c.4del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -(dp13 -S'grch37' -p14 -(dp15 -S'hgvs_genomic_description' -p16 -S'NT_113891.2:g.3483644del' -p17 -sS'vcf' -p18 -(dp19 -S'chr' -p20 -S'HSCHR6_MHC_COX_CTG1' -p21 -sS'ref' -p22 -S'CG' -p23 -sS'pos' -p24 -S'3483643' -p25 -sS'alt' -p26 -S'C' -p27 -sssa(dp28 -S'hg19' -p29 -(dp30 -g16 -S'NT_113891.2:g.3483644del' -p31 -sg18 -(dp32 -g20 -S'chr6_cox_hap2' -p33 -sg22 -S'CG' -p34 -sg24 -S'3483643' -p35 -sg26 -g27 -sssa(dp36 -S'grch38' -p37 -(dp38 -g16 -S'NT_113891.3:g.3483538del' -p39 -sg18 -(dp40 -g20 -g21 -sg22 -S'CG' -p41 -sg24 -S'3483537' -p42 -sg26 -g27 -sssa(dp43 -S'hg38' -p44 -(dp45 -g16 -S'NT_113891.3:g.3483538del' -p46 -sg18 -(dp47 -g20 -S'chr6_GL000251v2_alt' -p48 -sg22 -S'CG' -p49 -sg24 -S'3483537' -p50 -sg26 -g27 -sssa(dp51 -S'grch37' -p52 -(dp53 -g16 -S'NT_167245.1:g.3292210del' -p54 -sg18 -(dp55 -g20 -S'HSCHR6_MHC_DBB_CTG1' -p56 -sg22 -S'CG' -p57 -sg24 -S'3292209' -p58 -sg26 -g27 -sssa(dp59 -S'hg19' -p60 -(dp61 -g16 -S'NT_167245.1:g.3292210del' -p62 -sg18 -(dp63 -g20 -S'chr6_dbb_hap3' -p64 -sg22 -S'CG' -p65 -sg24 -S'3292209' -p66 -sg26 -g27 -sssa(dp67 -S'grch38' -p68 -(dp69 -g16 -S'NT_167245.2:g.3286625del' -p70 -sg18 -(dp71 -g20 -g56 -sg22 -S'CG' -p72 -sg24 -S'3286624' -p73 -sg26 -g27 -sssa(dp74 -g44 -(dp75 -g16 -S'NT_167245.2:g.3286625del' -p76 -sg18 -(dp77 -g20 -S'chr6_GL000252v2_alt' -p78 -sg22 -S'CG' -p79 -sg24 -S'3286624' -p80 -sg26 -g27 -sssa(dp81 -S'grch37' -p82 -(dp83 -g16 -S'NT_167247.1:g.3392834del' -p84 -sg18 -(dp85 -g20 -S'HSCHR6_MHC_MCF_CTG1' -p86 -sg22 -S'CG' -p87 -sg24 -S'3392833' -p88 -sg26 -g27 -sssa(dp89 -S'hg19' -p90 -(dp91 -g16 -S'NT_167247.1:g.3392834del' -p92 -sg18 -(dp93 -g20 -S'chr6_mcf_hap5' -p94 -sg22 -S'CG' -p95 -sg24 -S'3392833' -p96 -sg26 -g27 -sssa(dp97 -S'grch38' -p98 -(dp99 -g16 -S'NT_167247.2:g.3387249del' -p100 -sg18 -(dp101 -g20 -g86 -sg22 -S'CG' -p102 -sg24 -S'3387248' -p103 -sg26 -g27 -sssa(dp104 -g44 -(dp105 -g16 -S'NT_167247.2:g.3387249del' -p106 -sg18 -(dp107 -g20 -S'chr6_GL000254v2_alt' -p108 -sg22 -S'CG' -p109 -sg24 -S'3387248' -p110 -sg26 -g27 -sssa(dp111 -S'grch37' -p112 -(dp113 -g16 -S'NT_167248.1:g.3274047del' -p114 -sg18 -(dp115 -g20 -S'HSCHR6_MHC_QBL_CTG1' -p116 -sg22 -S'CG' -p117 -sg24 -S'3274046' -p118 -sg26 -g27 -sssa(dp119 -S'hg19' -p120 -(dp121 -g16 -S'NT_167248.1:g.3274047del' -p122 -sg18 -(dp123 -g20 -S'chr6_qbl_hap6' -p124 -sg22 -S'CG' -p125 -sg24 -S'3274046' -p126 -sg26 -g27 -sssa(dp127 -S'grch38' -p128 -(dp129 -g16 -S'NT_167248.2:g.3268451del' -p130 -sg18 -(dp131 -g20 -g116 -sg22 -S'CG' -p132 -sg24 -S'3268450' -p133 -sg26 -g27 -sssa(dp134 -g44 -(dp135 -g16 -S'NT_167248.2:g.3268451del' -p136 -sg18 -(dp137 -g20 -S'chr6_GL000255v2_alt' -p138 -sg22 -S'CG' -p139 -sg24 -S'3268450' -p140 -sg26 -g27 -sssa(dp141 -S'grch37' -p142 -(dp143 -g16 -S'NT_167249.1:g.3345701del' -p144 -sg18 -(dp145 -g20 -S'HSCHR6_MHC_SSTO_CTG1' -p146 -sg22 -S'CG' -p147 -sg24 -S'3345700' -p148 -sg26 -g27 -sssa(dp149 -S'hg19' -p150 -(dp151 -g16 -S'NT_167249.1:g.3345701del' -p152 -sg18 -(dp153 -g20 -S'chr6_ssto_hap7' -p154 -sg22 -S'CG' -p155 -sg24 -S'3345700' -p156 -sg26 -g27 -sssa(dp157 -S'grch38' -p158 -(dp159 -g16 -S'NT_167249.2:g.3346403del' -p160 -sg18 -(dp161 -g20 -g146 -sg22 -S'CG' -p162 -sg24 -S'3346402' -p163 -sg26 -g27 -sssa(dp164 -g44 -(dp165 -g16 -S'NT_167249.2:g.3346403del' -p166 -sg18 -(dp167 -g20 -S'chr6_GL000256v2_alt' -p168 -sg22 -S'CG' -p169 -sg24 -S'3346402' -p170 -sg26 -g27 -sssasS'transcript_description' -p171 -VHomo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA -p172 -sS'gene_symbol' -p173 -S'TNXB' -p174 -sS'hgvs_predicted_protein_consequence' -p175 -(dp176 -S'tlr' -p177 -S'NP_115859.2:p.(Arg2AlafsTer91)' -p178 -sS'slr' -p179 -S'NP_115859.2:p.(R2Afs*91)' -p180 -ssS'submitted_variant' -p181 -S'NM_032470.3:c.4del' -p182 -sS'genome_context_intronic_sequence' -p183 -g6 -sS'hgvs_lrg_variant' -p184 -g6 -sS'hgvs_transcript_variant' -p185 -S'NM_032470.3:c.4del' -p186 -sS'hgvs_refseqgene_variant' -p187 -g6 -sS'primary_assembly_loci' -p188 -(dp189 -S'hg19' -p190 -(dp191 -g16 -S'NC_000006.11:g.32012993del' -p192 -sg18 -(dp193 -g20 -S'chr6' -p194 -sg22 -S'CG' -p195 -sg24 -S'32012992' -p196 -sg26 -g27 -sssg44 -(dp197 -g16 -S'NC_000006.12:g.32045216del' -p198 -sg18 -(dp199 -g20 -g194 -sg22 -S'CG' -p200 -sg24 -S'32045215' -p201 -sg26 -g27 -sssS'grch37' -p202 -(dp203 -g16 -S'NC_000006.11:g.32012993del' -p204 -sg18 -(dp205 -g20 -S'6' -p206 -sg22 -S'CG' -p207 -sg24 -S'32012992' -p208 -sg26 -g27 -sssS'grch38' -p209 -(dp210 -g16 -S'NC_000006.12:g.32045216del' -p211 -sg18 -(dp212 -g20 -g206 -sg22 -S'CG' -p213 -sg24 -S'32045215' -p214 -sg26 -g27 -ssssS'reference_sequence_records' -p215 -(dp216 -S'protein' -p217 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_115859.2' -p218 -sS'transcript' -p219 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032470.3' -p220 -sssS'metadata' -p221 -(dp222 -S'variantvalidator_hgvs_version' -p223 -S'1.1.3' -p224 -sS'uta_schema' -p225 -S'uta_20180821' -p226 -sS'seqrepo_db' -p227 -S'2018-08-21' -p228 -sS'variantvalidator_version' -p229 -S'v0.2' -p230 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant43.txt b/VariantValidator/testing/testOutputsMasterITS/variant43.txt deleted file mode 100644 index 6e3ef37b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant43.txt +++ /dev/null @@ -1,179 +0,0 @@ -(dp0 -S'NM_001194958.2:c.20C>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -aS'NM_001194958.2:c.20C>A cannot be mapped directly to genome build GRCh37' -p8 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g4 -sS'alt_genomic_loci' -p11 -(lp12 -(dp13 -S'grch37' -p14 -(dp15 -S'hgvs_genomic_description' -p16 -S'NW_003315950.2:g.355171C>A' -p17 -sS'vcf' -p18 -(dp19 -S'chr' -p20 -S'HG987_PATCH' -p21 -sS'ref' -p22 -S'C' -p23 -sS'pos' -p24 -S'355171' -p25 -sS'alt' -p26 -S'A' -p27 -sssa(dp28 -S'hg19' -p29 -(dp30 -g16 -S'NW_003315950.2:g.355171C>A' -p31 -sg18 -(dp32 -g20 -S'NW_003315950.2' -p33 -sg22 -g23 -sg24 -S'355171' -p34 -sg26 -g27 -sssasS'transcript_description' -p35 -VHomo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA -p36 -sS'gene_symbol' -p37 -S'KCNJ18' -p38 -sS'hgvs_predicted_protein_consequence' -p39 -(dp40 -S'tlr' -p41 -S'NP_001181887.2:p.(Ala7Asp)' -p42 -sS'slr' -p43 -S'NP_001181887.2:p.(A7D)' -p44 -ssS'submitted_variant' -p45 -S'NM_001194958.2:c.20C>A' -p46 -sS'genome_context_intronic_sequence' -p47 -g4 -sS'hgvs_lrg_variant' -p48 -g4 -sS'hgvs_transcript_variant' -p49 -S'NM_001194958.2:c.20C>A' -p50 -sS'hgvs_refseqgene_variant' -p51 -g4 -sS'primary_assembly_loci' -p52 -(dp53 -S'grch38' -p54 -(dp55 -g16 -S'NC_000017.11:g.21702806C>A' -p56 -sg18 -(dp57 -g20 -S'17' -p58 -sg22 -g23 -sg24 -S'21702806' -p59 -sg26 -g27 -sssS'hg38' -p60 -(dp61 -g16 -S'NC_000017.11:g.21702806C>A' -p62 -sg18 -(dp63 -g20 -S'chr17' -p64 -sg22 -g23 -sg24 -S'21702806' -p65 -sg26 -g27 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2' -p71 -sssS'flag' -p72 -S'gene_variant' -p73 -sS'metadata' -p74 -(dp75 -S'variantvalidator_hgvs_version' -p76 -S'1.1.3' -p77 -sS'uta_schema' -p78 -S'uta_20180821' -p79 -sS'seqrepo_db' -p80 -S'2018-08-21' -p81 -sS'variantvalidator_version' -p82 -S'v0.2' -p83 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant44.txt b/VariantValidator/testing/testOutputsMasterITS/variant44.txt deleted file mode 100644 index f156b082..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant44.txt +++ /dev/null @@ -1,143 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_000022.2:c.534A>G' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'A more recent version of the selected reference sequence NM_000022.2 is available (NM_000022.3)' -p19 -aS'NM_000022.3:c.534A>G MUST be fully validated prior to use in reports' -p20 -aS'select_variants=NM_000022.3:c.534A>G' -p21 -aS'RefSeqGene record not available' -p22 -asS'refseqgene_context_intronic_sequence' -p23 -g16 -sS'alt_genomic_loci' -p24 -(lp25 -sS'transcript_description' -p26 -VHomo sapiens adenosine deaminase (ADA), mRNA -p27 -sS'gene_symbol' -p28 -S'ADA' -p29 -sS'hgvs_predicted_protein_consequence' -p30 -(dp31 -S'tlr' -p32 -S'NP_000013.2:p.(Val178=)' -p33 -sS'slr' -p34 -S'NP_000013.2:p.(V178=)' -p35 -ssS'submitted_variant' -p36 -S'NM_000022.2:c.534A>G' -p37 -sS'genome_context_intronic_sequence' -p38 -g16 -sS'hgvs_lrg_variant' -p39 -g16 -sS'hgvs_transcript_variant' -p40 -S'NM_000022.2:c.534A>G' -p41 -sS'hgvs_refseqgene_variant' -p42 -g16 -sS'primary_assembly_loci' -p43 -(dp44 -S'hg19' -p45 -(dp46 -S'hgvs_genomic_description' -p47 -S'NC_000020.10:g.43252915T>C' -p48 -sS'vcf' -p49 -(dp50 -S'chr' -p51 -S'chr20' -p52 -sS'ref' -p53 -VT -p54 -sS'pos' -p55 -S'43252915' -p56 -sS'alt' -p57 -VC -p58 -sssS'grch37' -p59 -(dp60 -g47 -S'NC_000020.10:g.43252915T>C' -p61 -sg49 -(dp62 -g51 -S'20' -p63 -sg53 -g54 -sg55 -S'43252915' -p64 -sg57 -g58 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2' -p70 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant45.txt b/VariantValidator/testing/testOutputsMasterITS/variant45.txt deleted file mode 100644 index 3fcafa99..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant45.txt +++ /dev/null @@ -1,293 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_021983.4:c.490G>C' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -aS'NM_021983.4:c.490G>C cannot be mapped directly to genome build GRCh37' -p20 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p21 -asS'refseqgene_context_intronic_sequence' -p22 -g16 -sS'alt_genomic_loci' -p23 -(lp24 -(dp25 -S'grch37' -p26 -(dp27 -S'hgvs_genomic_description' -p28 -S'NT_167246.1:g.3848158T>G' -p29 -sS'vcf' -p30 -(dp31 -S'chr' -p32 -S'HSCHR6_MHC_MANN_CTG1' -p33 -sS'ref' -p34 -S'T' -p35 -sS'pos' -p36 -S'3848158' -p37 -sS'alt' -p38 -VG -p39 -sssa(dp40 -S'hg19' -p41 -(dp42 -g28 -S'NT_167246.1:g.3848158T>G' -p43 -sg30 -(dp44 -g32 -S'chr6_mann_hap4' -p45 -sg34 -g35 -sg36 -S'3848158' -p46 -sg38 -g39 -sssa(dp47 -S'grch38' -p48 -(dp49 -g28 -S'NT_167246.2:g.3842538T>G' -p50 -sg30 -(dp51 -g32 -g33 -sg34 -g35 -sg36 -S'3842538' -p52 -sg38 -g39 -sssa(dp53 -S'hg38' -p54 -(dp55 -g28 -S'NT_167246.2:g.3842538T>G' -p56 -sg30 -(dp57 -g32 -S'chr6_GL000253v2_alt' -p58 -sg34 -g35 -sg36 -S'3842538' -p59 -sg38 -g39 -sssa(dp60 -S'grch37' -p61 -(dp62 -g28 -S'NT_167247.1:g.3884432C>G' -p63 -sg30 -(dp64 -g32 -S'HSCHR6_MHC_MCF_CTG1' -p65 -sg34 -VC -p66 -sg36 -S'3884432' -p67 -sg38 -g39 -sssa(dp68 -S'hg19' -p69 -(dp70 -g28 -S'NT_167247.1:g.3884432C>G' -p71 -sg30 -(dp72 -g32 -S'chr6_mcf_hap5' -p73 -sg34 -g66 -sg36 -S'3884432' -p74 -sg38 -g39 -sssa(dp75 -S'grch37' -p76 -(dp77 -g28 -S'NT_167249.1:g.3852542C>G' -p78 -sg30 -(dp79 -g32 -S'HSCHR6_MHC_SSTO_CTG1' -p80 -sg34 -g66 -sg36 -S'3852542' -p81 -sg38 -g39 -sssa(dp82 -S'hg19' -p83 -(dp84 -g28 -S'NT_167249.1:g.3852542C>G' -p85 -sg30 -(dp86 -g32 -S'chr6_ssto_hap7' -p87 -sg34 -g66 -sg36 -S'3852542' -p88 -sg38 -g39 -sssa(dp89 -S'grch38' -p90 -(dp91 -g28 -S'NT_167249.2:g.3853244C>G' -p92 -sg30 -(dp93 -g32 -g80 -sg34 -g66 -sg36 -S'3853244' -p94 -sg38 -g39 -sssa(dp95 -g54 -(dp96 -g28 -S'NT_167249.2:g.3853244C>G' -p97 -sg30 -(dp98 -g32 -S'chr6_GL000256v2_alt' -p99 -sg34 -g66 -sg36 -S'3853244' -p100 -sg38 -g39 -sssasS'transcript_description' -p101 -VHomo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA -p102 -sS'gene_symbol' -p103 -S'HLA-DRB4' -p104 -sS'hgvs_predicted_protein_consequence' -p105 -(dp106 -S'tlr' -p107 -S'NP_068818.4:p.(Gly164Arg)' -p108 -sS'slr' -p109 -S'NP_068818.4:p.(G164R)' -p110 -ssS'submitted_variant' -p111 -S'HSCHR6_MHC_SSTO_CTG1-3852542-C-G' -p112 -sS'genome_context_intronic_sequence' -p113 -g16 -sS'hgvs_lrg_variant' -p114 -g16 -sS'hgvs_transcript_variant' -p115 -S'NM_021983.4:c.490G>C' -p116 -sS'hgvs_refseqgene_variant' -p117 -g16 -sS'primary_assembly_loci' -p118 -(dp119 -sS'reference_sequence_records' -p120 -(dp121 -S'protein' -p122 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4' -p123 -sS'transcript' -p124 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4' -p125 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant46.txt b/VariantValidator/testing/testOutputsMasterITS/variant46.txt deleted file mode 100644 index a27a5070..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant46.txt +++ /dev/null @@ -1,175 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000368.4:c.363+1dup' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA -p14 -sS'gene_symbol' -p15 -S'TSC1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000359.1:p.?' -p20 -sS'slr' -p21 -S'NP_000359.1:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000368.4:c.363+1dupG' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000009.11(NM_000368.4):c.363+1dup' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000368.4:c.363+1dup' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000009.11:g.135800973dup' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr9' -p40 -sS'ref' -p41 -S'C' -p42 -sS'pos' -p43 -S'135800973' -p44 -sS'alt' -p45 -S'CC' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000009.12:g.132925586dup' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'132925586' -p51 -sg45 -S'CC' -p52 -sssS'grch37' -p53 -(dp54 -g35 -S'NC_000009.11:g.135800973dup' -p55 -sg37 -(dp56 -g39 -S'9' -p57 -sg41 -g42 -sg43 -S'135800973' -p58 -sg45 -S'CC' -p59 -sssS'grch38' -p60 -(dp61 -g35 -S'NC_000009.12:g.132925586dup' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -g42 -sg43 -S'132925586' -p64 -sg45 -S'CC' -p65 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4' -p71 -sssS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant47.txt b/VariantValidator/testing/testOutputsMasterITS/variant47.txt deleted file mode 100644 index bf747895..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant47.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000368.4:c.363+1dup' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000368.4:c.363dup normalized to NM_000368.4:c.363+1dup' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA -p15 -sS'gene_symbol' -p16 -S'TSC1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000359.1:p.(Met122AspfsTer4)' -p21 -sS'slr' -p22 -S'NP_000359.1:p.(M122Dfs*4)' -p23 -ssS'submitted_variant' -p24 -S'NM_000368.4:c.363dupG' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000009.11(NM_000368.4):c.363+1dup' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000368.4:c.363+1dup' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000009.11:g.135800973dup' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr9' -p41 -sS'ref' -p42 -S'C' -p43 -sS'pos' -p44 -S'135800973' -p45 -sS'alt' -p46 -S'CC' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000009.12:g.132925586dup' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'132925586' -p52 -sg46 -S'CC' -p53 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000009.11:g.135800973dup' -p56 -sg38 -(dp57 -g40 -S'9' -p58 -sg42 -g43 -sg44 -S'135800973' -p59 -sg46 -S'CC' -p60 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000009.12:g.132925586dup' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'132925586' -p65 -sg46 -S'CC' -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant48.txt b/VariantValidator/testing/testOutputsMasterITS/variant48.txt deleted file mode 100644 index a4f8aedf..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant48.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000089.3:c.1035_1035+2del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000089.3:c.1033_1035del normalized to NM_000089.3:c.1035_1035+2del' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A2' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000080.2:p.(Val345del)' -p21 -sS'slr' -p22 -S'NP_000080.2:p.(V345del)' -p23 -ssS'submitted_variant' -p24 -S'NM_000089.3:c.1033_1035delGTT' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000007.13(NM_000089.3):c.1035_1035+2del' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000089.3:c.1035_1035+2del' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000007.13:g.94039133_94039135del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr7' -p41 -sS'ref' -p42 -S'CTTG' -p43 -sS'pos' -p44 -S'94039128' -p45 -sS'alt' -p46 -S'C' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000007.14:g.94409821_94409823del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'CTTG' -p52 -sg44 -S'94409816' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000007.13:g.94039133_94039135del' -p56 -sg38 -(dp57 -g40 -S'7' -p58 -sg42 -S'CTTG' -p59 -sg44 -S'94039128' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000007.14:g.94409821_94409823del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'CTTG' -p65 -sg44 -S'94409816' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant49.txt b/VariantValidator/testing/testOutputsMasterITS/variant49.txt deleted file mode 100644 index 9f06f84b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant49.txt +++ /dev/null @@ -1,175 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000089.3:c.1035_1035+2del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A2' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000080.2:p.(Val345del)' -p20 -sS'slr' -p21 -S'NP_000080.2:p.(V345del)' -p22 -ssS'submitted_variant' -p23 -S'NM_000089.3:c.1035_1035+2delTGT' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000007.13(NM_000089.3):c.1035_1035+2del' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000089.3:c.1035_1035+2del' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000007.13:g.94039133_94039135del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr7' -p40 -sS'ref' -p41 -S'CTTG' -p42 -sS'pos' -p43 -S'94039128' -p44 -sS'alt' -p45 -S'C' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000007.14:g.94409821_94409823del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'CTTG' -p51 -sg43 -S'94409816' -p52 -sg45 -g46 -sssS'grch37' -p53 -(dp54 -g35 -S'NC_000007.13:g.94039133_94039135del' -p55 -sg37 -(dp56 -g39 -S'7' -p57 -sg41 -S'CTTG' -p58 -sg43 -S'94039128' -p59 -sg45 -g46 -sssS'grch38' -p60 -(dp61 -g35 -S'NC_000007.14:g.94409821_94409823del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'CTTG' -p64 -sg43 -S'94409816' -p65 -sg45 -g46 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3' -p71 -sssS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant5.txt b/VariantValidator/testing/testOutputsMasterITS/variant5.txt deleted file mode 100644 index 52ecc3f7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant5.txt +++ /dev/null @@ -1,287 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000109.3:c.7+127703T>A' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427c, mRNA -p14 -sS'gene_symbol' -p15 -S'DMD' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000100.2:p.?' -p20 -sS'slr' -p21 -S'NP_000100.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NC_000023.10:g.33229673A>T' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000023.10(NM_000109.3):c.7+127703T>A' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000109.3:c.7+127703T>A' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000023.10:g.33229673A>T' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chrX' -p40 -sS'ref' -p41 -VA -p42 -sS'pos' -p43 -S'33229673' -p44 -sS'alt' -p45 -VT -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000023.11:g.33211556A>T' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'33211556' -p51 -sg45 -g46 -sssS'grch37' -p52 -(dp53 -g35 -S'NC_000023.10:g.33229673A>T' -p54 -sg37 -(dp55 -g39 -S'X' -p56 -sg41 -g42 -sg43 -S'33229673' -p57 -sg45 -g46 -sssS'grch38' -p58 -(dp59 -g35 -S'NC_000023.11:g.33211556A>T' -p60 -sg37 -(dp61 -g39 -g56 -sg41 -g42 -sg43 -S'33211556' -p62 -sg45 -g46 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000100.2' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000109.3' -p68 -sssS'NM_004006.2:c.-244T>A' -p69 -(dp70 -g5 -g6 -sg7 -(lp71 -S'RefSeqGene record not available' -p72 -asg10 -g6 -sg11 -(lp73 -sg13 -VHomo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA -p74 -sg15 -S'DMD' -p75 -sg17 -(dp76 -g19 -S'NP_003997.1:p.?' -p77 -sg21 -S'NP_003997.1:p.?' -p78 -ssg23 -g24 -sg25 -g6 -sg27 -g6 -sg28 -S'NM_004006.2:c.-244T>A' -p79 -sg30 -g6 -sg31 -(dp80 -S'hg19' -p81 -(dp82 -g35 -S'NC_000023.10:g.33229673A>T' -p83 -sg37 -(dp84 -g39 -g40 -sg41 -g42 -sg43 -S'33229673' -p85 -sg45 -g46 -sssg47 -(dp86 -g35 -S'NC_000023.11:g.33211556A>T' -p87 -sg37 -(dp88 -g39 -g40 -sg41 -g42 -sg43 -S'33211556' -p89 -sg45 -g46 -sssS'grch37' -p90 -(dp91 -g35 -S'NC_000023.10:g.33229673A>T' -p92 -sg37 -(dp93 -g39 -g56 -sg41 -g42 -sg43 -S'33229673' -p94 -sg45 -g46 -sssS'grch38' -p95 -(dp96 -g35 -S'NC_000023.11:g.33211556A>T' -p97 -sg37 -(dp98 -g39 -g56 -sg41 -g42 -sg43 -S'33211556' -p99 -sg45 -g46 -ssssg63 -(dp100 -g65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1' -p101 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2' -p102 -sssS'metadata' -p103 -(dp104 -S'variantvalidator_hgvs_version' -p105 -S'1.1.3' -p106 -sS'uta_schema' -p107 -S'uta_20180821' -p108 -sS'seqrepo_db' -p109 -S'2018-08-21' -p110 -sS'variantvalidator_version' -p111 -S'v0.2' -p112 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant50.txt b/VariantValidator/testing/testOutputsMasterITS/variant50.txt deleted file mode 100644 index 91540969..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant50.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.2024_2028+1del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.2023_2028del normalized to NM_000088.3:c.2024_2028+1del' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.(Ala675_Arg676del)' -p21 -sS'slr' -p22 -S'NP_000079.2:p.(A675_R676del)' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.2023_2028delGCAAGA' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.2024_2028+1del' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000088.3:c.2024_2028+1del' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.48269340_48269345del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'ACTCTTG' -p43 -sS'pos' -p44 -S'48269339' -p45 -sS'alt' -p46 -S'A' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50191979_50191984del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'ACTCTTG' -p52 -sg44 -S'50191978' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.48269340_48269345del' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -S'ACTCTTG' -p59 -sg44 -S'48269339' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.50191979_50191984del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'ACTCTTG' -p65 -sg44 -S'50191978' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant51.txt b/VariantValidator/testing/testOutputsMasterITS/variant51.txt deleted file mode 100644 index eed5dbf9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant51.txt +++ /dev/null @@ -1,176 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000089.3:c.938del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000089.3:c.938-1del automapped to NM_000089.3:c.938del' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 2 chain (COL1A2), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A2' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000080.2:p.(Gly313AlafsTer86)' -p21 -sS'slr' -p22 -S'NP_000080.2:p.(G313Afs*86)' -p23 -ssS'submitted_variant' -p24 -S'NM_000089.3:c.938-1delG' -p25 -sS'genome_context_intronic_sequence' -p26 -g6 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000089.3:c.938del' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000007.13:g.94039036del' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr7' -p40 -sS'ref' -p41 -S'AG' -p42 -sS'pos' -p43 -S'94039033' -p44 -sS'alt' -p45 -S'A' -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000007.14:g.94409724del' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'AG' -p51 -sg43 -S'94409721' -p52 -sg45 -g46 -sssS'grch37' -p53 -(dp54 -g35 -S'NC_000007.13:g.94039036del' -p55 -sg37 -(dp56 -g39 -S'7' -p57 -sg41 -S'AG' -p58 -sg43 -S'94039033' -p59 -sg45 -g46 -sssS'grch38' -p60 -(dp61 -g35 -S'NC_000007.14:g.94409724del' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -S'AG' -p64 -sg43 -S'94409721' -p65 -sg45 -g46 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3' -p71 -sssS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant52.txt b/VariantValidator/testing/testOutputsMasterITS/variant52.txt deleted file mode 100644 index af719c3c..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant52.txt +++ /dev/null @@ -1,170 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_000088.3:c.589G=' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p24 -sS'gene_symbol' -p25 -S'COL1A1' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_000079.2:p.(Gly197=)' -p30 -sS'slr' -p31 -S'NP_000079.2:p.(G197=)' -p32 -ssS'submitted_variant' -p33 -S'NM_000088.3:c.589G=' -p34 -sS'genome_context_intronic_sequence' -p35 -g16 -sS'hgvs_lrg_variant' -p36 -g16 -sS'hgvs_transcript_variant' -p37 -S'NM_000088.3:c.589G=' -p38 -sS'hgvs_refseqgene_variant' -p39 -g16 -sS'primary_assembly_loci' -p40 -(dp41 -S'hg19' -p42 -(dp43 -S'hgvs_genomic_description' -p44 -S'NC_000017.10:g.48275363C=' -p45 -sS'vcf' -p46 -(dp47 -S'chr' -p48 -S'chr17' -p49 -sS'ref' -p50 -VC -p51 -sS'pos' -p52 -S'48275363' -p53 -sS'alt' -p54 -g51 -sssS'hg38' -p55 -(dp56 -g44 -S'NC_000017.11:g.50198002C=' -p57 -sg46 -(dp58 -g48 -g49 -sg50 -g51 -sg52 -S'50198002' -p59 -sg54 -g51 -sssS'grch37' -p60 -(dp61 -g44 -S'NC_000017.10:g.48275363C=' -p62 -sg46 -(dp63 -g48 -S'17' -p64 -sg50 -g51 -sg52 -S'48275363' -p65 -sg54 -g51 -sssS'grch38' -p66 -(dp67 -g44 -S'NC_000017.11:g.50198002C=' -p68 -sg46 -(dp69 -g48 -g64 -sg50 -g51 -sg52 -S'50198002' -p70 -sg54 -g51 -ssssS'reference_sequence_records' -p71 -(dp72 -S'protein' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p74 -sS'transcript' -p75 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p76 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant53.txt b/VariantValidator/testing/testOutputsMasterITS/variant53.txt deleted file mode 100644 index 82bb9336..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant53.txt +++ /dev/null @@ -1,170 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.642A=' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.(Ser214=)' -p20 -sS'slr' -p21 -S'NP_000079.2:p.(S214=)' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.642A=' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_000088.3:c.642A=' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000017.10:g.48275310T=' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr17' -p39 -sS'ref' -p40 -VT -p41 -sS'pos' -p42 -S'48275310' -p43 -sS'alt' -p44 -g41 -sssS'hg38' -p45 -(dp46 -g34 -S'NC_000017.11:g.50197949T=' -p47 -sg36 -(dp48 -g38 -g39 -sg40 -g41 -sg42 -S'50197949' -p49 -sg44 -g41 -sssS'grch37' -p50 -(dp51 -g34 -S'NC_000017.10:g.48275310T=' -p52 -sg36 -(dp53 -g38 -S'17' -p54 -sg40 -g41 -sg42 -S'48275310' -p55 -sg44 -g41 -sssS'grch38' -p56 -(dp57 -g34 -S'NC_000017.11:g.50197949T=' -p58 -sg36 -(dp59 -g38 -g54 -sg40 -g41 -sg42 -S'50197949' -p60 -sg44 -g41 -ssssS'reference_sequence_records' -p61 -(dp62 -S'protein' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p64 -sS'transcript' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p66 -sssS'metadata' -p67 -(dp68 -S'variantvalidator_hgvs_version' -p69 -S'1.1.3' -p70 -sS'uta_schema' -p71 -S'uta_20180821' -p72 -sS'seqrepo_db' -p73 -S'2018-08-21' -p74 -sS'variantvalidator_version' -p75 -S'v0.2' -p76 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant54.txt b/VariantValidator/testing/testOutputsMasterITS/variant54.txt deleted file mode 100644 index 06d041ef..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant54.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.642+1_642+2delinsG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.642+1GG>G automapped to NM_000088.3:c.642+1_642+2delGGinsG' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.642+1GG>G' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000088.3:c.642+1_642+2delinsG' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.48275308_48275309delinsC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'TA' -p43 -sS'pos' -p44 -S'48275307' -p45 -sS'alt' -p46 -S'T' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50197947_50197948delinsC' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'TA' -p52 -sg44 -S'50197946' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.48275308_48275309delinsC' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -S'TA' -p59 -sg44 -S'48275307' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.50197947_50197948delinsC' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'TA' -p65 -sg44 -S'50197946' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant55.txt b/VariantValidator/testing/testOutputsMasterITS/variant55.txt deleted file mode 100644 index f9a61c92..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant55.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-2_589-1delinsG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589-2GG>G automapped to NM_000088.3:c.589-2_589-1delGGinsG' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589-2GG>G' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000088.3:c.589-2_589-1delinsG' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.48275364_48275365delinsC' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'CT' -p43 -sS'pos' -p44 -S'48275364' -p45 -sS'alt' -p46 -S'C' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50198003_50198004delinsC' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'CT' -p52 -sg44 -S'50198003' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.48275364_48275365delinsC' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -S'CT' -p59 -sg44 -S'48275364' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.50198003_50198004delinsC' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'CT' -p65 -sg44 -S'50198003' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant56.txt b/VariantValidator/testing/testOutputsMasterITS/variant56.txt deleted file mode 100644 index e22ae1d7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant56.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-5_589-4insTTTT' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589-6_589-5insTTTT normalized to NM_000088.3:c.589-5_589-4insTTTT' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589-6_589-5insTTTT' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.589-5_589-4insTTTT' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000088.3:c.589-5_589-4insTTTT' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.48275367_48275368insAAAA' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'G' -p43 -sS'pos' -p44 -S'48275367' -p45 -sS'alt' -p46 -VGAAAA -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50198006_50198007insAAAA' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'50198006' -p52 -sg46 -VGAAAA -p53 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.48275367_48275368insAAAA' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -g43 -sg44 -S'48275367' -p59 -sg46 -VGAAAA -p60 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.50198006_50198007insAAAA' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'50198006' -p65 -sg46 -VGAAAA -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant57.txt b/VariantValidator/testing/testOutputsMasterITS/variant57.txt deleted file mode 100644 index a1f9b37a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant57.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.642+4_642+5insAAAA' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.642+3_642+4insAAAA normalized to NM_000088.3:c.642+4_642+5insAAAA' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.642+3_642+4insAAAA' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.642+4_642+5insAAAA' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000088.3:c.642+4_642+5insAAAA' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.48275305_48275306insTTTT' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'C' -p43 -sS'pos' -p44 -S'48275305' -p45 -sS'alt' -p46 -VCTTTT -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50197944_50197945insTTTT' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -g43 -sg44 -S'50197944' -p52 -sg46 -VCTTTT -p53 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.48275305_48275306insTTTT' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -g43 -sg44 -S'48275305' -p59 -sg46 -VCTTTT -p60 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.50197944_50197945insTTTT' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -g43 -sg44 -S'50197944' -p65 -sg46 -VCTTTT -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant58.txt b/VariantValidator/testing/testOutputsMasterITS/variant58.txt deleted file mode 100644 index e75142b4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant58.txt +++ /dev/null @@ -1,175 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-4_589-3insTT' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p14 -sS'gene_symbol' -p15 -S'COL1A1' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_000079.2:p.?' -p20 -sS'slr' -p21 -S'NP_000079.2:p.?' -p22 -ssS'submitted_variant' -p23 -S'NM_000088.3:c.589-4_589-3insTT' -p24 -sS'genome_context_intronic_sequence' -p25 -S'NC_000017.10(NM_000088.3):c.589-4_589-3insTT' -p26 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000088.3:c.589-4_589-3insTT' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000017.10:g.48275366_48275367insAA' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr17' -p40 -sS'ref' -p41 -S'T' -p42 -sS'pos' -p43 -S'48275366' -p44 -sS'alt' -p45 -VTAA -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000017.11:g.50198005_50198006insAA' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -g42 -sg43 -S'50198005' -p51 -sg45 -VTAA -p52 -sssS'grch37' -p53 -(dp54 -g35 -S'NC_000017.10:g.48275366_48275367insAA' -p55 -sg37 -(dp56 -g39 -S'17' -p57 -sg41 -g42 -sg43 -S'48275366' -p58 -sg45 -VTAA -p59 -sssS'grch38' -p60 -(dp61 -g35 -S'NC_000017.11:g.50198005_50198006insAA' -p62 -sg37 -(dp63 -g39 -g57 -sg41 -g42 -sg43 -S'50198005' -p64 -sg45 -VTAA -p65 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p71 -sssS'metadata' -p72 -(dp73 -S'variantvalidator_hgvs_version' -p74 -S'1.1.3' -p75 -sS'uta_schema' -p76 -S'uta_20180821' -p77 -sS'seqrepo_db' -p78 -S'2018-08-21' -p79 -sS'variantvalidator_version' -p80 -S'v0.2' -p81 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant59.txt b/VariantValidator/testing/testOutputsMasterITS/variant59.txt deleted file mode 100644 index ea974b83..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant59.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589-7del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589-8del normalized to NM_000088.3:c.589-7del' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.?' -p21 -sS'slr' -p22 -S'NP_000079.2:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589-8del' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000017.10(NM_000088.3):c.589-7del' -p27 -sS'hgvs_lrg_variant' -p28 -g6 -sS'hgvs_transcript_variant' -p29 -S'NM_000088.3:c.589-7del' -p30 -sS'hgvs_refseqgene_variant' -p31 -g6 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000017.10:g.48275370del' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr17' -p41 -sS'ref' -p42 -S'GA' -p43 -sS'pos' -p44 -S'48275369' -p45 -sS'alt' -p46 -S'G' -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000017.11:g.50198009del' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -S'GA' -p52 -sg44 -S'50198008' -p53 -sg46 -g47 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000017.10:g.48275370del' -p56 -sg38 -(dp57 -g40 -S'17' -p58 -sg42 -S'GA' -p59 -sg44 -S'48275369' -p60 -sg46 -g47 -sssS'grch38' -p61 -(dp62 -g36 -S'NC_000017.11:g.50198009del' -p63 -sg38 -(dp64 -g40 -g58 -sg42 -S'GA' -p65 -sg44 -S'50198008' -p66 -sg46 -g47 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant6.txt b/VariantValidator/testing/testOutputsMasterITS/variant6.txt deleted file mode 100644 index 7319021d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant6.txt +++ /dev/null @@ -1,143 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001145026.1:c.715A>G' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -aS'Suspected incomplete alignment between transcript NM_001145026.1 and genomic reference sequence NC_000012.11' -p10 -aS'NM_001145026.1:c.715A>G cannot be mapped directly to genome build GRCh37' -p11 -aS'See alternative genomic loci or alternative genome builds for aligned genomic positions' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g6 -sS'alt_genomic_loci' -p14 -(lp15 -sS'transcript_description' -p16 -VHomo sapiens protein tyrosine phosphatase, receptor type Q (PTPRQ), mRNA -p17 -sS'gene_symbol' -p18 -S'PTPRQ' -p19 -sS'hgvs_predicted_protein_consequence' -p20 -(dp21 -S'tlr' -p22 -S'NP_001138498.1:p.(Arg239Gly)' -p23 -sS'slr' -p24 -S'NP_001138498.1:p.(R239G)' -p25 -ssS'submitted_variant' -p26 -S'NM_001145026.1:c.715A>G' -p27 -sS'genome_context_intronic_sequence' -p28 -g6 -sS'hgvs_lrg_variant' -p29 -g6 -sS'hgvs_transcript_variant' -p30 -S'NM_001145026.1:c.715A>G' -p31 -sS'hgvs_refseqgene_variant' -p32 -g6 -sS'primary_assembly_loci' -p33 -(dp34 -S'grch38' -p35 -(dp36 -S'hgvs_genomic_description' -p37 -S'NC_000012.12:g.80460707A>G' -p38 -sS'vcf' -p39 -(dp40 -S'chr' -p41 -S'12' -p42 -sS'ref' -p43 -VA -p44 -sS'pos' -p45 -S'80460707' -p46 -sS'alt' -p47 -VG -p48 -sssS'hg38' -p49 -(dp50 -g37 -S'NC_000012.12:g.80460707A>G' -p51 -sg39 -(dp52 -g41 -S'chr12' -p53 -sg43 -g44 -sg45 -S'80460707' -p54 -sg47 -g48 -ssssS'reference_sequence_records' -p55 -(dp56 -S'protein' -p57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001138498.1' -p58 -sS'transcript' -p59 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001145026.1' -p60 -sssS'metadata' -p61 -(dp62 -S'variantvalidator_hgvs_version' -p63 -S'1.1.3' -p64 -sS'uta_schema' -p65 -S'uta_20180821' -p66 -sS'seqrepo_db' -p67 -S'2018-08-21' -p68 -sS'variantvalidator_version' -p69 -S'v0.2' -p70 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant60.txt b/VariantValidator/testing/testOutputsMasterITS/variant60.txt deleted file mode 100644 index 4eb27550..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant60.txt +++ /dev/null @@ -1,174 +0,0 @@ -(dp0 -S'NM_000527.4:c.-187_-185del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens low density lipoprotein receptor (LDLR), transcript variant 1, mRNA -p12 -sS'gene_symbol' -p13 -S'LDLR' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000518.1:p.?' -p18 -sS'slr' -p19 -S'NP_000518.1:p.?' -p20 -ssS'submitted_variant' -p21 -S'NM_000527.4:c.-187_-185delCTC' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_000527.4:c.-187_-185del' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'grch38' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000019.10:g.11089362_11089364del' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'19' -p37 -sS'ref' -p38 -S'ACTC' -p39 -sS'pos' -p40 -S'11089355' -p41 -sS'alt' -p42 -S'A' -p43 -sssS'grch37' -p44 -(dp45 -g32 -S'NC_000019.9:g.11200038_11200040del' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -S'ACTC' -p48 -sg40 -S'11200031' -p49 -sg42 -g43 -sssS'hg38' -p50 -(dp51 -g32 -S'NC_000019.10:g.11089362_11089364del' -p52 -sg34 -(dp53 -g36 -S'chr19' -p54 -sg38 -S'ACTC' -p55 -sg40 -S'11089355' -p56 -sg42 -g43 -sssS'hg19' -p57 -(dp58 -g32 -S'NC_000019.9:g.11200038_11200040del' -p59 -sg34 -(dp60 -g36 -g54 -sg38 -S'ACTC' -p61 -sg40 -S'11200031' -p62 -sg42 -g43 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000518.1' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000527.4' -p68 -sssS'flag' -p69 -S'gene_variant' -p70 -sS'metadata' -p71 -(dp72 -S'variantvalidator_hgvs_version' -p73 -S'1.1.3' -p74 -sS'uta_schema' -p75 -S'uta_20180821' -p76 -sS'seqrepo_db' -p77 -S'2018-08-21' -p78 -sS'variantvalidator_version' -p79 -S'v0.2' -p80 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant61.txt b/VariantValidator/testing/testOutputsMasterITS/variant61.txt deleted file mode 100644 index 413e615a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant61.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_206933.2:c.6317C>G' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens usherin (USH2A), transcript variant 2, mRNA -p14 -sS'gene_symbol' -p15 -S'USH2A' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_996816.2:p.(Thr2106Arg)' -p20 -sS'slr' -p21 -S'NP_996816.2:p.(T2106R)' -p22 -ssS'submitted_variant' -p23 -S'NM_206933.2:c.6317C>G' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_206933.2:c.6317C>G' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000001.10:g.216219781A>C' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr1' -p39 -sS'ref' -p40 -S'A' -p41 -sS'pos' -p42 -S'216219781' -p43 -sS'alt' -p44 -VC -p45 -sssS'hg38' -p46 -(dp47 -g34 -S'NC_000001.11:g.216046439A>C' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -g41 -sg42 -S'216046439' -p50 -sg44 -g45 -sssS'grch37' -p51 -(dp52 -g34 -S'NC_000001.10:g.216219781A>C' -p53 -sg36 -(dp54 -g38 -S'1' -p55 -sg40 -g41 -sg42 -S'216219781' -p56 -sg44 -g45 -sssS'grch38' -p57 -(dp58 -g34 -S'NC_000001.11:g.216046439A>C' -p59 -sg36 -(dp60 -g38 -g55 -sg40 -g41 -sg42 -S'216046439' -p61 -sg44 -g45 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2' -p67 -sssS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant62.txt b/VariantValidator/testing/testOutputsMasterITS/variant62.txt deleted file mode 100644 index a7821cbb..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant62.txt +++ /dev/null @@ -1,171 +0,0 @@ -(dp0 -S'NM_000059.3:c.7397C=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens BRCA2, DNA repair associated (BRCA2), mRNA -p12 -sS'gene_symbol' -p13 -S'BRCA2' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_000050.2:p.(Ala2466=)' -p18 -sS'slr' -p19 -S'NP_000050.2:p.(A2466=)' -p20 -ssS'submitted_variant' -p21 -S'NC_000013.10:g.32929387T>C' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_000059.3:c.7397C=' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000013.10:g.32929387T>C' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr13' -p37 -sS'ref' -p38 -S'T' -p39 -sS'pos' -p40 -S'32929387' -p41 -sS'alt' -p42 -S'C' -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000013.11:g.32355250T>C' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -g39 -sg40 -S'32355250' -p48 -sg42 -g43 -sssS'grch37' -p49 -(dp50 -g32 -S'NC_000013.10:g.32929387T>C' -p51 -sg34 -(dp52 -g36 -S'13' -p53 -sg38 -g39 -sg40 -S'32929387' -p54 -sg42 -g43 -sssS'grch38' -p55 -(dp56 -g32 -S'NC_000013.11:g.32355250T>C' -p57 -sg34 -(dp58 -g36 -g53 -sg38 -g39 -sg40 -S'32355250' -p59 -sg42 -g43 -ssssS'reference_sequence_records' -p60 -(dp61 -S'protein' -p62 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000050.2' -p63 -sS'transcript' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000059.3' -p65 -sssS'flag' -p66 -S'gene_variant' -p67 -sS'metadata' -p68 -(dp69 -S'variantvalidator_hgvs_version' -p70 -S'1.1.3' -p71 -sS'uta_schema' -p72 -S'uta_20180821' -p73 -sS'seqrepo_db' -p74 -S'2018-08-21' -p75 -sS'variantvalidator_version' -p76 -S'v0.2' -p77 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant63.txt b/VariantValidator/testing/testOutputsMasterITS/variant63.txt deleted file mode 100644 index 06210e37..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant63.txt +++ /dev/null @@ -1,144 +0,0 @@ -(dp0 -S'NM_015102.3:c.2818-2T>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'A more recent version of the selected reference sequence NM_015102.3 is available (NM_015102.4)' -p7 -aS'NM_015102.4:c.2818-2T>A MUST be fully validated prior to use in reports' -p8 -aS'select_variants=NM_015102.4:c.2818-2T>A' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g4 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens nephronophthisis 4 (NPHP4), mRNA -p15 -sS'gene_symbol' -p16 -S'NPHP4' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_055917.1:p.?' -p21 -sS'slr' -p22 -S'NP_055917.1:p.?' -p23 -ssS'submitted_variant' -p24 -S'NM_015102.3:c.2818-2T>A' -p25 -sS'genome_context_intronic_sequence' -p26 -S'NC_000001.10(NM_015102.3):c.2818-2T>A' -p27 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_015102.3:c.2818-2T>A' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000001.10:g.5935162A>T' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr1' -p41 -sS'ref' -p42 -VA -p43 -sS'pos' -p44 -S'5935162' -p45 -sS'alt' -p46 -VT -p47 -sssS'grch37' -p48 -(dp49 -g36 -S'NC_000001.10:g.5935162A>T' -p50 -sg38 -(dp51 -g40 -S'1' -p52 -sg42 -g43 -sg44 -S'5935162' -p53 -sg46 -g47 -ssssS'reference_sequence_records' -p54 -(dp55 -S'protein' -p56 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1' -p57 -sS'transcript' -p58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3' -p59 -sssS'flag' -p60 -S'gene_variant' -p61 -sS'metadata' -p62 -(dp63 -S'variantvalidator_hgvs_version' -p64 -S'1.1.3' -p65 -sS'uta_schema' -p66 -S'uta_20180821' -p67 -sS'seqrepo_db' -p68 -S'2018-08-21' -p69 -sS'variantvalidator_version' -p70 -S'v0.2' -p71 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant64.txt b/VariantValidator/testing/testOutputsMasterITS/variant64.txt deleted file mode 100644 index 5e367564..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant64.txt +++ /dev/null @@ -1,439 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_001042544.1:c.3233_3235=' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG' -p9 -aS'The displayed variants may be artefacts of aligning NM_001042544.1 with genome build GRCh37' -p10 -aS'NM_001042544.1:c.3233_3235 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'RefSeqGene record not available' -p14 -asS'refseqgene_context_intronic_sequence' -p15 -g6 -sS'alt_genomic_loci' -p16 -(lp17 -sS'transcript_description' -p18 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA -p19 -sS'gene_symbol' -p20 -S'LTBP4' -p21 -sS'hgvs_predicted_protein_consequence' -p22 -(dp23 -S'tlr' -p24 -S'NP_001036009.1:p.(Gln1078=)' -p25 -sS'slr' -p26 -S'NP_001036009.1:p.(Q1078=)' -p27 -ssS'submitted_variant' -p28 -S'19-41123094-G-GG' -p29 -sS'genome_context_intronic_sequence' -p30 -g6 -sS'hgvs_lrg_variant' -p31 -g6 -sS'hgvs_transcript_variant' -p32 -S'NM_001042544.1:c.3233_3235=' -p33 -sS'hgvs_refseqgene_variant' -p34 -g6 -sS'primary_assembly_loci' -p35 -(dp36 -S'grch38' -p37 -(dp38 -S'hgvs_genomic_description' -p39 -S'NC_000019.10:g.40617187_40617189=' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'19' -p44 -sS'ref' -p45 -VAGG -p46 -sS'pos' -p47 -S'40617187' -p48 -sS'alt' -p49 -g46 -sssS'grch37' -p50 -(dp51 -g39 -S'NC_000019.9:g.41123095dup' -p52 -sg41 -(dp53 -g43 -g44 -sg45 -S'G' -p54 -sg47 -S'41123094' -p55 -sg49 -VGG -p56 -sssS'hg38' -p57 -(dp58 -g39 -S'NC_000019.10:g.40617187_40617189=' -p59 -sg41 -(dp60 -g43 -S'chr19' -p61 -sg45 -g46 -sg47 -S'40617187' -p62 -sg49 -g46 -sssS'hg19' -p63 -(dp64 -g39 -S'NC_000019.9:g.41123095dup' -p65 -sg41 -(dp66 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p67 -sg49 -VGG -p68 -ssssS'reference_sequence_records' -p69 -(dp70 -S'protein' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1' -p72 -sS'transcript' -p73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1' -p74 -sssS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ssS'NM_001042545.1:c.3032_3034=' -p85 -(dp86 -g5 -g6 -sg7 -(lp87 -S'NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG' -p88 -aS'The displayed variants may be artefacts of aligning NM_001042545.1 with genome build GRCh37' -p89 -aS'NM_001042545.1:c.3032_3034 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p90 -aS'Caution should be used when reporting the displayed variant descriptions' -p91 -aS'If you are unsure, please contact admin' -p92 -aS'RefSeqGene record not available' -p93 -asg15 -g6 -sg16 -(lp94 -sg18 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA -p95 -sg20 -S'LTBP4' -p96 -sg22 -(dp97 -g24 -S'NP_001036010.1:p.(Gln1011=)' -p98 -sg26 -S'NP_001036010.1:p.(Q1011=)' -p99 -ssg28 -g29 -sg30 -g6 -sg31 -g6 -sg32 -S'NM_001042545.1:c.3032_3034=' -p100 -sg34 -g6 -sg35 -(dp101 -S'grch38' -p102 -(dp103 -g39 -S'NC_000019.10:g.40617187_40617189=' -p104 -sg41 -(dp105 -g43 -g44 -sg45 -VAGG -p106 -sg47 -S'40617187' -p107 -sg49 -g106 -sssS'grch37' -p108 -(dp109 -g39 -S'NC_000019.9:g.41123095dup' -p110 -sg41 -(dp111 -g43 -g44 -sg45 -g54 -sg47 -S'41123094' -p112 -sg49 -VGG -p113 -sssg57 -(dp114 -g39 -S'NC_000019.10:g.40617187_40617189=' -p115 -sg41 -(dp116 -g43 -g61 -sg45 -g106 -sg47 -S'40617187' -p117 -sg49 -g106 -sssS'hg19' -p118 -(dp119 -g39 -S'NC_000019.9:g.41123095dup' -p120 -sg41 -(dp121 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p122 -sg49 -VGG -p123 -ssssg69 -(dp124 -g71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1' -p125 -sg73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1' -p126 -sssS'NM_003573.2:c.3122_3124=' -p127 -(dp128 -g5 -g6 -sg7 -(lp129 -S'NC_000019.9:g.41123094G>GG automapped to NC_000019.9:g.41123095dupG' -p130 -aS'The displayed variants may be artefacts of aligning NM_003573.2 with genome build GRCh37' -p131 -aS'NM_003573.2:c.3122_3124 contains 1 transcript base(s) that fail to align to chromosome NC_000019.9' -p132 -aS'Caution should be used when reporting the displayed variant descriptions' -p133 -aS'If you are unsure, please contact admin' -p134 -aS'RefSeqGene record not available' -p135 -asg15 -g6 -sg16 -(lp136 -sg18 -VHomo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA -p137 -sg20 -S'LTBP4' -p138 -sg22 -(dp139 -g24 -S'NP_003564.2:p.(Gln1041=)' -p140 -sg26 -S'NP_003564.2:p.(Q1041=)' -p141 -ssg28 -g29 -sg30 -g6 -sg31 -g6 -sg32 -S'NM_003573.2:c.3122_3124=' -p142 -sg34 -g6 -sg35 -(dp143 -S'grch38' -p144 -(dp145 -g39 -S'NC_000019.10:g.40617187_40617189=' -p146 -sg41 -(dp147 -g43 -g44 -sg45 -VAGG -p148 -sg47 -S'40617187' -p149 -sg49 -g148 -sssS'grch37' -p150 -(dp151 -g39 -S'NC_000019.9:g.41123095dup' -p152 -sg41 -(dp153 -g43 -g44 -sg45 -g54 -sg47 -S'41123094' -p154 -sg49 -VGG -p155 -sssg57 -(dp156 -g39 -S'NC_000019.10:g.40617187_40617189=' -p157 -sg41 -(dp158 -g43 -g61 -sg45 -g148 -sg47 -S'40617187' -p159 -sg49 -g148 -sssS'hg19' -p160 -(dp161 -g39 -S'NC_000019.9:g.41123095dup' -p162 -sg41 -(dp163 -g43 -g61 -sg45 -g54 -sg47 -S'41123094' -p164 -sg49 -VGG -p165 -ssssg69 -(dp166 -g71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2' -p167 -sg73 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2' -p168 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant65.txt b/VariantValidator/testing/testOutputsMasterITS/variant65.txt deleted file mode 100644 index 22ec5a84..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant65.txt +++ /dev/null @@ -1,511 +0,0 @@ -(dp0 -S'NM_014249.2:c.946_949=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' -p7 -aS'The displayed variants may be artefacts of aligning NM_014249.2 with genome build GRCh37' -p8 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.2' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'A more recent version of the selected reference sequence NM_014249.2 is available (NM_014249.3)' -p12 -aS'NM_014249.3:c.946_949GACC= MUST be fully validated prior to use in reports' -p13 -aS'select_variants=NM_014249.3:c.946_949=' -p14 -aS'RefSeqGene record not available' -p15 -asS'refseqgene_context_intronic_sequence' -p16 -g4 -sS'alt_genomic_loci' -p17 -(lp18 -sS'transcript_description' -p19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA -p20 -sS'gene_symbol' -p21 -S'NR2E3' -p22 -sS'hgvs_predicted_protein_consequence' -p23 -(dp24 -S'tlr' -p25 -S'NP_055064.1:p.(Asp316=)' -p26 -sS'slr' -p27 -S'NP_055064.1:p.(D316=)' -p28 -ssS'submitted_variant' -p29 -S'15-72105928-AC-A' -p30 -sS'genome_context_intronic_sequence' -p31 -g4 -sS'hgvs_lrg_variant' -p32 -g4 -sS'hgvs_transcript_variant' -p33 -S'NM_014249.2:c.946_949=' -p34 -sS'hgvs_refseqgene_variant' -p35 -g4 -sS'primary_assembly_loci' -p36 -(dp37 -S'hg19' -p38 -(dp39 -S'hgvs_genomic_description' -p40 -S'NC_000015.9:g.72105933del' -p41 -sS'vcf' -p42 -(dp43 -S'chr' -p44 -S'chr15' -p45 -sS'ref' -p46 -S'AC' -p47 -sS'pos' -p48 -S'72105928' -p49 -sS'alt' -p50 -S'A' -p51 -sssS'grch37' -p52 -(dp53 -g40 -S'NC_000015.9:g.72105933del' -p54 -sg42 -(dp55 -g44 -S'15' -p56 -sg46 -S'AC' -p57 -sg48 -S'72105928' -p58 -sg50 -g51 -ssssS'reference_sequence_records' -p59 -(dp60 -S'protein' -p61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p62 -sS'transcript' -p63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2' -p64 -sssS'NM_016346.3:c.946_949=' -p65 -(dp66 -g3 -g4 -sg5 -(lp67 -S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' -p68 -aS'The displayed variants may be artefacts of aligning NM_016346.3 with genome build GRCh37' -p69 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.3' -p70 -aS'Caution should be used when reporting the displayed variant descriptions' -p71 -aS'If you are unsure, please contact admin' -p72 -aS'RefSeqGene record not available' -p73 -asg16 -g4 -sg17 -(lp74 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA -p75 -sg21 -S'NR2E3' -p76 -sg23 -(dp77 -g25 -S'NP_057430.1:p.(Asp316=)' -p78 -sg27 -S'NP_057430.1:p.(D316=)' -p79 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.3:c.946_949=' -p80 -sg35 -g4 -sg36 -(dp81 -S'grch38' -p82 -(dp83 -g40 -S'NC_000015.10:g.71813587_71813590=' -p84 -sg42 -(dp85 -g44 -g56 -sg46 -VGACC -p86 -sg48 -S'71813587' -p87 -sg50 -g86 -sssS'grch37' -p88 -(dp89 -g40 -S'NC_000015.9:g.72105933del' -p90 -sg42 -(dp91 -g44 -g56 -sg46 -S'AC' -p92 -sg48 -S'72105928' -p93 -sg50 -g51 -sssS'hg38' -p94 -(dp95 -g40 -S'NC_000015.10:g.71813587_71813590=' -p96 -sg42 -(dp97 -g44 -g45 -sg46 -g86 -sg48 -S'71813587' -p98 -sg50 -g86 -sssS'hg19' -p99 -(dp100 -g40 -S'NC_000015.9:g.72105933del' -p101 -sg42 -(dp102 -g44 -g45 -sg46 -S'AC' -p103 -sg48 -S'72105928' -p104 -sg50 -g51 -ssssg59 -(dp105 -g61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p106 -sg63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3' -p107 -sssS'flag' -p108 -S'gene_variant' -p109 -sS'NM_014249.3:c.946_949=' -p110 -(dp111 -g3 -g4 -sg5 -(lp112 -S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' -p113 -aS'The displayed variants may be artefacts of aligning NM_014249.3 with genome build GRCh37' -p114 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_014249.3' -p115 -aS'Caution should be used when reporting the displayed variant descriptions' -p116 -aS'If you are unsure, please contact admin' -p117 -aS'RefSeqGene record not available' -p118 -asg16 -g4 -sg17 -(lp119 -sg19 -VHomo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA -p120 -sg21 -S'NR2E3' -p121 -sg23 -(dp122 -g25 -S'NP_055064.1:p.(Asp316=)' -p123 -sg27 -S'NP_055064.1:p.(D316=)' -p124 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_014249.3:c.946_949=' -p125 -sg35 -g4 -sg36 -(dp126 -S'grch38' -p127 -(dp128 -g40 -S'NC_000015.10:g.71813587_71813590=' -p129 -sg42 -(dp130 -g44 -g56 -sg46 -VGACC -p131 -sg48 -S'71813587' -p132 -sg50 -g131 -sssS'grch37' -p133 -(dp134 -g40 -S'NC_000015.9:g.72105933del' -p135 -sg42 -(dp136 -g44 -g56 -sg46 -S'AC' -p137 -sg48 -S'72105928' -p138 -sg50 -g51 -sssg94 -(dp139 -g40 -S'NC_000015.10:g.71813587_71813590=' -p140 -sg42 -(dp141 -g44 -g45 -sg46 -g131 -sg48 -S'71813587' -p142 -sg50 -g131 -sssS'hg19' -p143 -(dp144 -g40 -S'NC_000015.9:g.72105933del' -p145 -sg42 -(dp146 -g44 -g45 -sg46 -S'AC' -p147 -sg48 -S'72105928' -p148 -sg50 -g51 -ssssg59 -(dp149 -g61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1' -p150 -sg63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3' -p151 -sssS'NM_016346.2:c.946_949=' -p152 -(dp153 -g3 -g4 -sg5 -(lp154 -S'NC_000015.9:g.72105928AC>A automapped to NC_000015.9:g.72105933delC' -p155 -aS'The displayed variants may be artefacts of aligning NM_016346.2 with genome build GRCh37' -p156 -aS'NC_000015.9:g.72105928_72105929 contains 1 genomic base(s) that fail to align to transcript NM_016346.2' -p157 -aS'Caution should be used when reporting the displayed variant descriptions' -p158 -aS'If you are unsure, please contact admin' -p159 -aS'A more recent version of the selected reference sequence NM_016346.2 is available (NM_016346.3)' -p160 -aS'NM_016346.3:c.946_949GACC= MUST be fully validated prior to use in reports' -p161 -aS'select_variants=NM_016346.3:c.946_949=' -p162 -aS'RefSeqGene record not available' -p163 -asg16 -g4 -sg17 -(lp164 -sg19 -VHomo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA -p165 -sg21 -S'NR2E3' -p166 -sg23 -(dp167 -g25 -S'NP_057430.1:p.(Asp316=)' -p168 -sg27 -S'NP_057430.1:p.(D316=)' -p169 -ssg29 -g30 -sg31 -g4 -sg32 -g4 -sg33 -S'NM_016346.2:c.946_949=' -p170 -sg35 -g4 -sg36 -(dp171 -S'hg19' -p172 -(dp173 -g40 -S'NC_000015.9:g.72105933del' -p174 -sg42 -(dp175 -g44 -g45 -sg46 -S'AC' -p176 -sg48 -S'72105928' -p177 -sg50 -g51 -sssS'grch37' -p178 -(dp179 -g40 -S'NC_000015.9:g.72105933del' -p180 -sg42 -(dp181 -g44 -g56 -sg46 -S'AC' -p182 -sg48 -S'72105928' -p183 -sg50 -g51 -ssssg59 -(dp184 -g61 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1' -p185 -sg63 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2' -p186 -sssS'metadata' -p187 -(dp188 -S'variantvalidator_hgvs_version' -p189 -S'1.1.3' -p190 -sS'uta_schema' -p191 -S'uta_20180821' -p192 -sS'seqrepo_db' -p193 -S'2018-08-21' -p194 -sS'variantvalidator_version' -p195 -S'v0.2' -p196 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant66.txt b/VariantValidator/testing/testOutputsMasterITS/variant66.txt deleted file mode 100644 index 86a77815..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant66.txt +++ /dev/null @@ -1,223 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.126_128=' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000012.11:g.122064773CCCGCCA>C automapped to NC_000012.11:g.122064785_122064790del' -p9 -aS'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p10 -aS'Genome position NC_000012.11:g.122064780 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'RefSeqGene record not available' -p14 -asS'refseqgene_context_intronic_sequence' -p15 -g6 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'grch37' -p19 -(dp20 -S'hgvs_genomic_description' -p21 -S'NW_004504303.2:g.302883_302888del' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG1595_PATCH' -p26 -sS'ref' -p27 -S'CCCGCCA' -p28 -sS'pos' -p29 -S'302871' -p30 -sS'alt' -p31 -S'C' -p32 -sssa(dp33 -S'hg19' -p34 -(dp35 -g21 -S'NW_004504303.2:g.302883_302888del' -p36 -sg23 -(dp37 -g25 -S'NW_004504303.2' -p38 -sg27 -S'CCCGCCA' -p39 -sg29 -S'302871' -p40 -sg31 -g32 -sssasS'transcript_description' -p41 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p42 -sS'gene_symbol' -p43 -S'ORAI1' -p44 -sS'hgvs_predicted_protein_consequence' -p45 -(dp46 -S'tlr' -p47 -S'NP_116179.2:p.(Ala42=)' -p48 -sS'slr' -p49 -S'NP_116179.2:p.(A42=)' -p50 -ssS'submitted_variant' -p51 -S'12-122064773-CCCGCCA-C' -p52 -sS'genome_context_intronic_sequence' -p53 -g6 -sS'hgvs_lrg_variant' -p54 -g6 -sS'hgvs_transcript_variant' -p55 -S'NM_032790.3:c.126_128=' -p56 -sS'hgvs_refseqgene_variant' -p57 -g6 -sS'primary_assembly_loci' -p58 -(dp59 -S'hg19' -p60 -(dp61 -g21 -S'NC_000012.11:g.122064785_122064790del' -p62 -sg23 -(dp63 -g25 -S'chr12' -p64 -sg27 -S'CCCGCCA' -p65 -sg29 -S'122064773' -p66 -sg31 -g32 -sssS'hg38' -p67 -(dp68 -g21 -S'NC_000012.12:g.121626873_121626875=' -p69 -sg23 -(dp70 -g25 -g64 -sg27 -VCCC -p71 -sg29 -S'121626873' -p72 -sg31 -g71 -sssS'grch37' -p73 -(dp74 -g21 -S'NC_000012.11:g.122064785_122064790del' -p75 -sg23 -(dp76 -g25 -S'12' -p77 -sg27 -S'CCCGCCA' -p78 -sg29 -S'122064773' -p79 -sg31 -g32 -sssS'grch38' -p80 -(dp81 -g21 -S'NC_000012.12:g.121626873_121626875=' -p82 -sg23 -(dp83 -g25 -g77 -sg27 -g71 -sg29 -S'121626873' -p84 -sg31 -g71 -ssssS'reference_sequence_records' -p85 -(dp86 -S'protein' -p87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p88 -sS'transcript' -p89 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p90 -sssS'metadata' -p91 -(dp92 -S'variantvalidator_hgvs_version' -p93 -S'1.1.3' -p94 -sS'uta_schema' -p95 -S'uta_20180821' -p96 -sS'seqrepo_db' -p97 -S'2018-08-21' -p98 -sS'variantvalidator_version' -p99 -S'v0.2' -p100 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant67.txt b/VariantValidator/testing/testOutputsMasterITS/variant67.txt deleted file mode 100644 index ad633cd9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant67.txt +++ /dev/null @@ -1,223 +0,0 @@ -(dp0 -S'NM_032790.3:c.132_137dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000012.11:g.122064774CCGCCA>CCGCCA automapped to NC_000012.11:g.122064774_122064779CCGCCA=' -p7 -aS'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p8 -aS'NC_000012.11:g.122064773_122064779 contains 6 genomic base(s) that fail to align to transcript NM_032790.3' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -(dp16 -S'grch37' -p17 -(dp18 -S'hgvs_genomic_description' -p19 -S'NW_004504303.2:g.302868_302887=' -p20 -sS'vcf' -p21 -(dp22 -S'chr' -p23 -S'HG1595_PATCH' -p24 -sS'ref' -p25 -S'GGCCCCGCCACCGCCACCGC' -p26 -sS'pos' -p27 -S'302868' -p28 -sS'alt' -p29 -g26 -sssa(dp30 -S'hg19' -p31 -(dp32 -g19 -S'NW_004504303.2:g.302868_302887=' -p33 -sg21 -(dp34 -g23 -S'NW_004504303.2' -p35 -sg25 -g26 -sg27 -S'302868' -p36 -sg29 -g26 -sssasS'transcript_description' -p37 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p38 -sS'gene_symbol' -p39 -S'ORAI1' -p40 -sS'hgvs_predicted_protein_consequence' -p41 -(dp42 -S'tlr' -p43 -S'NP_116179.2:p.(Pro46_Pro47dup)' -p44 -sS'slr' -p45 -S'NP_116179.2:p.(P46_P47dup)' -p46 -ssS'submitted_variant' -p47 -S'12-122064774-CCGCCA-CCGCCA' -p48 -sS'genome_context_intronic_sequence' -p49 -g4 -sS'hgvs_lrg_variant' -p50 -g4 -sS'hgvs_transcript_variant' -p51 -S'NM_032790.3:c.132_137dup' -p52 -sS'hgvs_refseqgene_variant' -p53 -g4 -sS'primary_assembly_loci' -p54 -(dp55 -S'hg19' -p56 -(dp57 -g19 -S'NC_000012.11:g.122064770_122064789=' -p58 -sg21 -(dp59 -g23 -S'chr12' -p60 -sg25 -S'GGCCCCGCCACCGCCACCGC' -p61 -sg27 -S'122064770' -p62 -sg29 -g61 -sssS'hg38' -p63 -(dp64 -g19 -S'NC_000012.12:g.121626879_121626884dup' -p65 -sg21 -(dp66 -g23 -g60 -sg25 -S'CCGCCA' -p67 -sg27 -S'121626874' -p68 -sg29 -VCCGCCACCGCCA -p69 -sssS'grch37' -p70 -(dp71 -g19 -S'NC_000012.11:g.122064770_122064789=' -p72 -sg21 -(dp73 -g23 -S'12' -p74 -sg25 -g61 -sg27 -S'122064770' -p75 -sg29 -g61 -sssS'grch38' -p76 -(dp77 -g19 -S'NC_000012.12:g.121626879_121626884dup' -p78 -sg21 -(dp79 -g23 -g74 -sg25 -S'CCGCCA' -p80 -sg27 -S'121626874' -p81 -sg29 -VCCGCCACCGCCA -p82 -ssssS'reference_sequence_records' -p83 -(dp84 -S'protein' -p85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p86 -sS'transcript' -p87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p88 -sssS'flag' -p89 -S'gene_variant' -p90 -sS'metadata' -p91 -(dp92 -S'variantvalidator_hgvs_version' -p93 -S'1.1.3' -p94 -sS'uta_schema' -p95 -S'uta_20180821' -p96 -sS'seqrepo_db' -p97 -S'2018-08-21' -p98 -sS'variantvalidator_version' -p99 -S'v0.2' -p100 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant68.txt b/VariantValidator/testing/testOutputsMasterITS/variant68.txt deleted file mode 100644 index 759d2009..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant68.txt +++ /dev/null @@ -1,218 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.132_135delinsGCCGT' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000012.11:g.122064773CCCGCCACCGCCACCGC>CCCGCCACCGCCGCCGTC automapped to NC_000012.11:g.122064785_122064788delinsGCCGT' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -(dp14 -S'grch37' -p15 -(dp16 -S'hgvs_genomic_description' -p17 -S'NW_004504303.2:g.302883_302886delinsGCCGT' -p18 -sS'vcf' -p19 -(dp20 -S'chr' -p21 -S'HG1595_PATCH' -p22 -sS'ref' -p23 -S'ACCG' -p24 -sS'pos' -p25 -S'302883' -p26 -sS'alt' -p27 -VGCCGT -p28 -sssa(dp29 -S'hg19' -p30 -(dp31 -g17 -S'NW_004504303.2:g.302883_302886delinsGCCGT' -p32 -sg19 -(dp33 -g21 -S'NW_004504303.2' -p34 -sg23 -S'ACCG' -p35 -sg25 -S'302883' -p36 -sg27 -g28 -sssasS'transcript_description' -p37 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p38 -sS'gene_symbol' -p39 -S'ORAI1' -p40 -sS'hgvs_predicted_protein_consequence' -p41 -(dp42 -S'tlr' -p43 -S'NP_116179.2:p.(Pro46SerfsTer42)' -p44 -sS'slr' -p45 -S'NP_116179.2:p.(P46Sfs*42)' -p46 -ssS'submitted_variant' -p47 -S'12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC' -p48 -sS'genome_context_intronic_sequence' -p49 -g6 -sS'hgvs_lrg_variant' -p50 -g6 -sS'hgvs_transcript_variant' -p51 -S'NM_032790.3:c.132_135delinsGCCGT' -p52 -sS'hgvs_refseqgene_variant' -p53 -g6 -sS'primary_assembly_loci' -p54 -(dp55 -S'hg19' -p56 -(dp57 -g17 -S'NC_000012.11:g.122064785_122064788delinsGCCGT' -p58 -sg19 -(dp59 -g21 -S'chr12' -p60 -sg23 -S'ACCG' -p61 -sg25 -S'122064785' -p62 -sg27 -VGCCGT -p63 -sssS'hg38' -p64 -(dp65 -g17 -S'NC_000012.12:g.121626879_121626882delinsGCCGT' -p66 -sg19 -(dp67 -g21 -g60 -sg23 -S'ACCG' -p68 -sg25 -S'121626879' -p69 -sg27 -VGCCGT -p70 -sssS'grch37' -p71 -(dp72 -g17 -S'NC_000012.11:g.122064785_122064788delinsGCCGT' -p73 -sg19 -(dp74 -g21 -S'12' -p75 -sg23 -S'ACCG' -p76 -sg25 -S'122064785' -p77 -sg27 -g63 -sssS'grch38' -p78 -(dp79 -g17 -S'NC_000012.12:g.121626879_121626882delinsGCCGT' -p80 -sg19 -(dp81 -g21 -g75 -sg23 -S'ACCG' -p82 -sg25 -S'121626879' -p83 -sg27 -g70 -ssssS'reference_sequence_records' -p84 -(dp85 -S'protein' -p86 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p87 -sS'transcript' -p88 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p89 -sssS'metadata' -p90 -(dp91 -S'variantvalidator_hgvs_version' -p92 -S'1.1.3' -p93 -sS'uta_schema' -p94 -S'uta_20180821' -p95 -sS'seqrepo_db' -p96 -S'2018-08-21' -p97 -sS'variantvalidator_version' -p98 -S'v0.2' -p99 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant69.txt b/VariantValidator/testing/testOutputsMasterITS/variant69.txt deleted file mode 100644 index 5e82de2a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant69.txt +++ /dev/null @@ -1,219 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.129_130insACACCG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'NC_000012.11:g.122064777 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'grch37' -p18 -(dp19 -S'hgvs_genomic_description' -p20 -S'NW_004504303.2:g.302875C>A' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG1595_PATCH' -p25 -sS'ref' -p26 -S'C' -p27 -sS'pos' -p28 -S'302875' -p29 -sS'alt' -p30 -S'A' -p31 -sssa(dp32 -S'hg19' -p33 -(dp34 -g20 -S'NW_004504303.2:g.302875C>A' -p35 -sg22 -(dp36 -g24 -S'NW_004504303.2' -p37 -sg26 -g27 -sg28 -S'302875' -p38 -sg30 -g31 -sssasS'transcript_description' -p39 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p40 -sS'gene_symbol' -p41 -S'ORAI1' -p42 -sS'hgvs_predicted_protein_consequence' -p43 -(dp44 -S'tlr' -p45 -S'NP_116179.2:p.(Pro43_Pro44insThrPro)' -p46 -sS'slr' -p47 -S'NP_116179.2:p.(P43_P44insTP)' -p48 -ssS'submitted_variant' -p49 -S'NC_000012.11:g.122064777C>A' -p50 -sS'genome_context_intronic_sequence' -p51 -g6 -sS'hgvs_lrg_variant' -p52 -g6 -sS'hgvs_transcript_variant' -p53 -S'NM_032790.3:c.129_130insACACCG' -p54 -sS'hgvs_refseqgene_variant' -p55 -g6 -sS'primary_assembly_loci' -p56 -(dp57 -S'hg19' -p58 -(dp59 -g20 -S'NC_000012.11:g.122064777C>A' -p60 -sg22 -(dp61 -g24 -S'chr12' -p62 -sg26 -g27 -sg28 -S'122064777' -p63 -sg30 -g31 -sssS'hg38' -p64 -(dp65 -g20 -S'NC_000012.12:g.121626876_121626877insACACCG' -p66 -sg22 -(dp67 -g24 -g62 -sg26 -g27 -sg28 -S'121626873' -p68 -sg30 -VCCCGACA -p69 -sssS'grch37' -p70 -(dp71 -g20 -S'NC_000012.11:g.122064777C>A' -p72 -sg22 -(dp73 -g24 -S'12' -p74 -sg26 -g27 -sg28 -S'122064777' -p75 -sg30 -g31 -sssS'grch38' -p76 -(dp77 -g20 -S'NC_000012.12:g.121626876_121626877insACACCG' -p78 -sg22 -(dp79 -g24 -g74 -sg26 -g27 -sg28 -S'121626873' -p80 -sg30 -VCCCGACA -p81 -ssssS'reference_sequence_records' -p82 -(dp83 -S'protein' -p84 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p85 -sS'transcript' -p86 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p87 -sssS'metadata' -p88 -(dp89 -S'variantvalidator_hgvs_version' -p90 -S'1.1.3' -p91 -sS'uta_schema' -p92 -S'uta_20180821' -p93 -sS'seqrepo_db' -p94 -S'2018-08-21' -p95 -sS'variantvalidator_version' -p96 -S'v0.2' -p97 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant7.txt b/VariantValidator/testing/testOutputsMasterITS/variant7.txt deleted file mode 100644 index 3ddbed10..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant7.txt +++ /dev/null @@ -1,1362 +0,0 @@ -(dp0 -S'NM_001077183.2:c.138+821del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p7 -aS'RefSeqGene record not available' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA -p13 -sS'gene_symbol' -p14 -S'TSC2' -p15 -sS'hgvs_predicted_protein_consequence' -p16 -(dp17 -S'tlr' -p18 -S'NP_001070651.1:p.?' -p19 -sS'slr' -p20 -S'NP_001070651.1:p.?' -p21 -ssS'submitted_variant' -p22 -S'NC_000016.9:g.2099572TC>T' -p23 -sS'genome_context_intronic_sequence' -p24 -S'NC_000016.9(NM_001077183.2):c.138+821del' -p25 -sS'hgvs_lrg_variant' -p26 -g4 -sS'hgvs_transcript_variant' -p27 -S'NM_001077183.2:c.138+821del' -p28 -sS'hgvs_refseqgene_variant' -p29 -g4 -sS'primary_assembly_loci' -p30 -(dp31 -S'grch38' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000016.10:g.2049574del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'16' -p39 -sS'ref' -p40 -S'TC' -p41 -sS'pos' -p42 -S'2049571' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000016.9:g.2099575del' -p48 -sg36 -(dp49 -g38 -g39 -sg40 -S'TC' -p50 -sg42 -S'2099572' -p51 -sg44 -g45 -sssS'hg38' -p52 -(dp53 -g34 -S'NC_000016.10:g.2049574del' -p54 -sg36 -(dp55 -g38 -S'chr16' -p56 -sg40 -S'TC' -p57 -sg42 -S'2049571' -p58 -sg44 -g45 -sssS'hg19' -p59 -(dp60 -g34 -S'NC_000016.9:g.2099575del' -p61 -sg36 -(dp62 -g38 -g56 -sg40 -S'TC' -p63 -sg42 -S'2099572' -p64 -sg44 -g45 -ssssS'reference_sequence_records' -p65 -(dp66 -S'protein' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1' -p68 -sS'transcript' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2' -p70 -sssS'NM_001318831.1:c.-89+821del' -p71 -(dp72 -g3 -g4 -sg5 -(lp73 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p74 -aS'RefSeqGene record not available' -p75 -asg9 -g4 -sg10 -(lp76 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA -p77 -sg14 -S'TSC2' -p78 -sg16 -(dp79 -g18 -S'NP_001305760.1:p.?' -p80 -sg20 -S'NP_001305760.1:p.?' -p81 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001318831.1):c.-89+821del' -p82 -sg26 -g4 -sg27 -S'NM_001318831.1:c.-89+821del' -p83 -sg29 -g4 -sg30 -(dp84 -S'grch38' -p85 -(dp86 -g34 -S'NC_000016.10:g.2049574del' -p87 -sg36 -(dp88 -g38 -g39 -sg40 -S'TC' -p89 -sg42 -S'2049571' -p90 -sg44 -g45 -sssS'grch37' -p91 -(dp92 -g34 -S'NC_000016.9:g.2099575del' -p93 -sg36 -(dp94 -g38 -g39 -sg40 -S'TC' -p95 -sg42 -S'2099572' -p96 -sg44 -g45 -sssg52 -(dp97 -g34 -S'NC_000016.10:g.2049574del' -p98 -sg36 -(dp99 -g38 -g56 -sg40 -S'TC' -p100 -sg42 -S'2049571' -p101 -sg44 -g45 -sssS'hg19' -p102 -(dp103 -g34 -S'NC_000016.9:g.2099575del' -p104 -sg36 -(dp105 -g38 -g56 -sg40 -S'TC' -p106 -sg42 -S'2099572' -p107 -sg44 -g45 -ssssg65 -(dp108 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305760.1' -p109 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318831.1' -p110 -sssS'NM_021055.2:c.138+821del' -p111 -(dp112 -g3 -g4 -sg5 -(lp113 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p114 -aS'RefSeqGene record not available' -p115 -asg9 -g4 -sg10 -(lp116 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 2, mRNA -p117 -sg14 -S'TSC2' -p118 -sg16 -(dp119 -g18 -S'NP_066399.2:p.?' -p120 -sg20 -S'NP_066399.2:p.?' -p121 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_021055.2):c.138+821del' -p122 -sg26 -g4 -sg27 -S'NM_021055.2:c.138+821del' -p123 -sg29 -g4 -sg30 -(dp124 -S'hg19' -p125 -(dp126 -g34 -S'NC_000016.9:g.2099575del' -p127 -sg36 -(dp128 -g38 -g56 -sg40 -S'TC' -p129 -sg42 -S'2099572' -p130 -sg44 -g45 -sssS'grch37' -p131 -(dp132 -g34 -S'NC_000016.9:g.2099575del' -p133 -sg36 -(dp134 -g38 -g39 -sg40 -S'TC' -p135 -sg42 -S'2099572' -p136 -sg44 -g45 -ssssg65 -(dp137 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2' -p138 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2' -p139 -sssS'NM_001318832.1:c.171+821del' -p140 -(dp141 -g3 -g4 -sg5 -(lp142 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p143 -aS'RefSeqGene record not available' -p144 -asg9 -g4 -sg10 -(lp145 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA -p146 -sg14 -S'TSC2' -p147 -sg16 -(dp148 -g18 -S'NP_001305761.1:p.?' -p149 -sg20 -S'NP_001305761.1:p.?' -p150 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001318832.1):c.171+821del' -p151 -sg26 -g4 -sg27 -S'NM_001318832.1:c.171+821del' -p152 -sg29 -g4 -sg30 -(dp153 -S'grch38' -p154 -(dp155 -g34 -S'NC_000016.10:g.2049574del' -p156 -sg36 -(dp157 -g38 -g39 -sg40 -S'TC' -p158 -sg42 -S'2049571' -p159 -sg44 -g45 -sssS'grch37' -p160 -(dp161 -g34 -S'NC_000016.9:g.2099575del' -p162 -sg36 -(dp163 -g38 -g39 -sg40 -S'TC' -p164 -sg42 -S'2099572' -p165 -sg44 -g45 -sssg52 -(dp166 -g34 -S'NC_000016.10:g.2049574del' -p167 -sg36 -(dp168 -g38 -g56 -sg40 -S'TC' -p169 -sg42 -S'2049571' -p170 -sg44 -g45 -sssS'hg19' -p171 -(dp172 -g34 -S'NC_000016.9:g.2099575del' -p173 -sg36 -(dp174 -g38 -g56 -sg40 -S'TC' -p175 -sg42 -S'2099572' -p176 -sg44 -g45 -ssssg65 -(dp177 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1' -p178 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1' -p179 -sssS'NM_001114382.1:c.138+821del' -p180 -(dp181 -g3 -g4 -sg5 -(lp182 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p183 -aS'A more recent version of the selected reference sequence NM_001114382.1 is available (NM_001114382.2)' -p184 -aS'NM_001114382.2:c.138+821delC MUST be fully validated prior to use in reports' -p185 -aS'select_variants=NM_001114382.2:c.138+821del' -p186 -aS'RefSeqGene record not available' -p187 -asg9 -g4 -sg10 -(lp188 -sg12 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 5, mRNA -p189 -sg14 -S'TSC2' -p190 -sg16 -(dp191 -g18 -S'NP_001107854.1:p.?' -p192 -sg20 -S'NP_001107854.1:p.?' -p193 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001114382.1):c.138+821del' -p194 -sg26 -g4 -sg27 -S'NM_001114382.1:c.138+821del' -p195 -sg29 -g4 -sg30 -(dp196 -S'hg19' -p197 -(dp198 -g34 -S'NC_000016.9:g.2099575del' -p199 -sg36 -(dp200 -g38 -g56 -sg40 -S'TC' -p201 -sg42 -S'2099572' -p202 -sg44 -g45 -sssS'grch37' -p203 -(dp204 -g34 -S'NC_000016.9:g.2099575del' -p205 -sg36 -(dp206 -g38 -g39 -sg40 -S'TC' -p207 -sg42 -S'2099572' -p208 -sg44 -g45 -ssssg65 -(dp209 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1' -p210 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1' -p211 -sssS'NM_000548.4:c.138+821del' -p212 -(dp213 -g3 -g4 -sg5 -(lp214 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p215 -aS'RefSeqGene record not available' -p216 -asg9 -g4 -sg10 -(lp217 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA -p218 -sg14 -S'TSC2' -p219 -sg16 -(dp220 -g18 -S'NP_000539.2:p.?' -p221 -sg20 -S'NP_000539.2:p.?' -p222 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_000548.4):c.138+821del' -p223 -sg26 -g4 -sg27 -S'NM_000548.4:c.138+821del' -p224 -sg29 -g4 -sg30 -(dp225 -S'grch38' -p226 -(dp227 -g34 -S'NC_000016.10:g.2049574del' -p228 -sg36 -(dp229 -g38 -g39 -sg40 -S'TC' -p230 -sg42 -S'2049571' -p231 -sg44 -g45 -sssS'grch37' -p232 -(dp233 -g34 -S'NC_000016.9:g.2099575del' -p234 -sg36 -(dp235 -g38 -g39 -sg40 -S'TC' -p236 -sg42 -S'2099572' -p237 -sg44 -g45 -sssg52 -(dp238 -g34 -S'NC_000016.10:g.2049574del' -p239 -sg36 -(dp240 -g38 -g56 -sg40 -S'TC' -p241 -sg42 -S'2049571' -p242 -sg44 -g45 -sssS'hg19' -p243 -(dp244 -g34 -S'NC_000016.9:g.2099575del' -p245 -sg36 -(dp246 -g38 -g56 -sg40 -S'TC' -p247 -sg42 -S'2099572' -p248 -sg44 -g45 -ssssg65 -(dp249 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2' -p250 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4' -p251 -sssS'NM_001363528.1:c.138+821del' -p252 -(dp253 -g3 -g4 -sg5 -(lp254 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p255 -aS'RefSeqGene record not available' -p256 -asg9 -g4 -sg10 -(lp257 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 10, mRNA -p258 -sg14 -S'TSC2' -p259 -sg16 -(dp260 -g18 -S'NP_001350457.1:p.?' -p261 -sg20 -S'NP_001350457.1:p.?' -p262 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001363528.1):c.138+821del' -p263 -sg26 -g4 -sg27 -S'NM_001363528.1:c.138+821del' -p264 -sg29 -g4 -sg30 -(dp265 -S'hg19' -p266 -(dp267 -g34 -S'NC_000016.9:g.2099575del' -p268 -sg36 -(dp269 -g38 -g56 -sg40 -S'TC' -p270 -sg42 -S'2099572' -p271 -sg44 -g45 -sssS'grch37' -p272 -(dp273 -g34 -S'NC_000016.9:g.2099575del' -p274 -sg36 -(dp275 -g38 -g39 -sg40 -S'TC' -p276 -sg42 -S'2099572' -p277 -sg44 -g45 -ssssg65 -(dp278 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1' -p279 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1' -p280 -sssS'flag' -p281 -S'gene_variant' -p282 -sS'NM_001077183.1:c.138+821del' -p283 -(dp284 -g3 -g4 -sg5 -(lp285 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p286 -aS'A more recent version of the selected reference sequence NM_001077183.1 is available (NM_001077183.2)' -p287 -aS'NM_001077183.2:c.138+821delC MUST be fully validated prior to use in reports' -p288 -aS'select_variants=NM_001077183.2:c.138+821del' -p289 -aS'RefSeqGene record not available' -p290 -asg9 -g4 -sg10 -(lp291 -sg12 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 4, mRNA -p292 -sg14 -S'TSC2' -p293 -sg16 -(dp294 -g18 -S'NP_001070651.1:p.?' -p295 -sg20 -S'NP_001070651.1:p.?' -p296 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001077183.1):c.138+821del' -p297 -sg26 -g4 -sg27 -S'NM_001077183.1:c.138+821del' -p298 -sg29 -g4 -sg30 -(dp299 -S'hg19' -p300 -(dp301 -g34 -S'NC_000016.9:g.2099575del' -p302 -sg36 -(dp303 -g38 -g56 -sg40 -S'TC' -p304 -sg42 -S'2099572' -p305 -sg44 -g45 -sssS'grch37' -p306 -(dp307 -g34 -S'NC_000016.9:g.2099575del' -p308 -sg36 -(dp309 -g38 -g39 -sg40 -S'TC' -p310 -sg42 -S'2099572' -p311 -sg44 -g45 -ssssg65 -(dp312 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1' -p313 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1' -p314 -sssS'NM_001318827.1:c.138+821del' -p315 -(dp316 -g3 -g4 -sg5 -(lp317 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p318 -aS'RefSeqGene record not available' -p319 -asg9 -g4 -sg10 -(lp320 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA -p321 -sg14 -S'TSC2' -p322 -sg16 -(dp323 -g18 -S'NP_001305756.1:p.?' -p324 -sg20 -S'NP_001305756.1:p.?' -p325 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001318827.1):c.138+821del' -p326 -sg26 -g4 -sg27 -S'NM_001318827.1:c.138+821del' -p327 -sg29 -g4 -sg30 -(dp328 -S'grch38' -p329 -(dp330 -g34 -S'NC_000016.10:g.2049574del' -p331 -sg36 -(dp332 -g38 -g39 -sg40 -S'TC' -p333 -sg42 -S'2049571' -p334 -sg44 -g45 -sssS'grch37' -p335 -(dp336 -g34 -S'NC_000016.9:g.2099575del' -p337 -sg36 -(dp338 -g38 -g39 -sg40 -S'TC' -p339 -sg42 -S'2099572' -p340 -sg44 -g45 -sssg52 -(dp341 -g34 -S'NC_000016.10:g.2049574del' -p342 -sg36 -(dp343 -g38 -g56 -sg40 -S'TC' -p344 -sg42 -S'2049571' -p345 -sg44 -g45 -sssS'hg19' -p346 -(dp347 -g34 -S'NC_000016.9:g.2099575del' -p348 -sg36 -(dp349 -g38 -g56 -sg40 -S'TC' -p350 -sg42 -S'2099572' -p351 -sg44 -g45 -ssssg65 -(dp352 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1' -p353 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1' -p354 -sssS'NM_000548.3:c.138+821del' -p355 -(dp356 -g3 -g4 -sg5 -(lp357 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p358 -aS'A more recent version of the selected reference sequence NM_000548.3 is available (NM_000548.4)' -p359 -aS'NM_000548.4:c.138+821delC MUST be fully validated prior to use in reports' -p360 -aS'select_variants=NM_000548.4:c.138+821del' -p361 -aS'RefSeqGene record not available' -p362 -asg9 -g4 -sg10 -(lp363 -sg12 -VHomo sapiens tuberous sclerosis 2 (TSC2), transcript variant 1, mRNA -p364 -sg14 -S'TSC2' -p365 -sg16 -(dp366 -g18 -S'NP_000539.2:p.?' -p367 -sg20 -S'NP_000539.2:p.?' -p368 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_000548.3):c.138+821del' -p369 -sg26 -g4 -sg27 -S'NM_000548.3:c.138+821del' -p370 -sg29 -g4 -sg30 -(dp371 -S'hg19' -p372 -(dp373 -g34 -S'NC_000016.9:g.2099575del' -p374 -sg36 -(dp375 -g38 -g56 -sg40 -S'TC' -p376 -sg42 -S'2099572' -p377 -sg44 -g45 -sssS'grch37' -p378 -(dp379 -g34 -S'NC_000016.9:g.2099575del' -p380 -sg36 -(dp381 -g38 -g39 -sg40 -S'TC' -p382 -sg42 -S'2099572' -p383 -sg44 -g45 -ssssg65 -(dp384 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2' -p385 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3' -p386 -sssS'NM_001114382.2:c.138+821del' -p387 -(dp388 -g3 -g4 -sg5 -(lp389 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p390 -aS'RefSeqGene record not available' -p391 -asg9 -g4 -sg10 -(lp392 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA -p393 -sg14 -S'TSC2' -p394 -sg16 -(dp395 -g18 -S'NP_001107854.1:p.?' -p396 -sg20 -S'NP_001107854.1:p.?' -p397 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001114382.2):c.138+821del' -p398 -sg26 -g4 -sg27 -S'NM_001114382.2:c.138+821del' -p399 -sg29 -g4 -sg30 -(dp400 -S'grch38' -p401 -(dp402 -g34 -S'NC_000016.10:g.2049574del' -p403 -sg36 -(dp404 -g38 -g39 -sg40 -S'TC' -p405 -sg42 -S'2049571' -p406 -sg44 -g45 -sssS'grch37' -p407 -(dp408 -g34 -S'NC_000016.9:g.2099575del' -p409 -sg36 -(dp410 -g38 -g39 -sg40 -S'TC' -p411 -sg42 -S'2099572' -p412 -sg44 -g45 -sssg52 -(dp413 -g34 -S'NC_000016.10:g.2049574del' -p414 -sg36 -(dp415 -g38 -g56 -sg40 -S'TC' -p416 -sg42 -S'2049571' -p417 -sg44 -g45 -sssS'hg19' -p418 -(dp419 -g34 -S'NC_000016.9:g.2099575del' -p420 -sg36 -(dp421 -g38 -g56 -sg40 -S'TC' -p422 -sg42 -S'2099572' -p423 -sg44 -g45 -ssssg65 -(dp424 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1' -p425 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2' -p426 -sssS'NM_001318829.1:c.-9-826del' -p427 -(dp428 -g3 -g4 -sg5 -(lp429 -S'NC_000016.9:g.2099572TC>T automapped to NC_000016.9:g.2099575delC' -p430 -aS'RefSeqGene record not available' -p431 -asg9 -g4 -sg10 -(lp432 -sg12 -VHomo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA -p433 -sg14 -S'TSC2' -p434 -sg16 -(dp435 -g18 -S'NP_001305758.1:p.?' -p436 -sg20 -S'NP_001305758.1:p.?' -p437 -ssg22 -g23 -sg24 -S'NC_000016.9(NM_001318829.1):c.-9-826del' -p438 -sg26 -g4 -sg27 -S'NM_001318829.1:c.-9-826del' -p439 -sg29 -g4 -sg30 -(dp440 -S'grch38' -p441 -(dp442 -g34 -S'NC_000016.10:g.2049574del' -p443 -sg36 -(dp444 -g38 -g39 -sg40 -S'TC' -p445 -sg42 -S'2049571' -p446 -sg44 -g45 -sssS'grch37' -p447 -(dp448 -g34 -S'NC_000016.9:g.2099575del' -p449 -sg36 -(dp450 -g38 -g39 -sg40 -S'TC' -p451 -sg42 -S'2099572' -p452 -sg44 -g45 -sssg52 -(dp453 -g34 -S'NC_000016.10:g.2049574del' -p454 -sg36 -(dp455 -g38 -g56 -sg40 -S'TC' -p456 -sg42 -S'2049571' -p457 -sg44 -g45 -sssS'hg19' -p458 -(dp459 -g34 -S'NC_000016.9:g.2099575del' -p460 -sg36 -(dp461 -g38 -g56 -sg40 -S'TC' -p462 -sg42 -S'2099572' -p463 -sg44 -g45 -ssssg65 -(dp464 -g67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305758.1' -p465 -sg69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318829.1' -p466 -sssS'metadata' -p467 -(dp468 -S'variantvalidator_hgvs_version' -p469 -S'1.1.3' -p470 -sS'uta_schema' -p471 -S'uta_20180821' -p472 -sS'seqrepo_db' -p473 -S'2018-08-21' -p474 -sS'variantvalidator_version' -p475 -S'v0.2' -p476 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant70.txt b/VariantValidator/testing/testOutputsMasterITS/variant70.txt deleted file mode 100644 index 25f99e45..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant70.txt +++ /dev/null @@ -1,224 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.128_129insCCACC' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'NC_000012.11:g.122064775 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p10 -aS'NC_000012.11:g.122064776 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'RefSeqGene record not available' -p14 -asS'refseqgene_context_intronic_sequence' -p15 -g6 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'grch37' -p19 -(dp20 -S'hgvs_genomic_description' -p21 -S'NW_004504303.2:g.302874del' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG1595_PATCH' -p26 -sS'ref' -p27 -S'CG' -p28 -sS'pos' -p29 -S'302873' -p30 -sS'alt' -p31 -S'C' -p32 -sssa(dp33 -S'hg19' -p34 -(dp35 -g21 -S'NW_004504303.2:g.302874del' -p36 -sg23 -(dp37 -g25 -S'NW_004504303.2' -p38 -sg27 -S'CG' -p39 -sg29 -S'302873' -p40 -sg31 -g32 -sssasS'transcript_description' -p41 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p42 -sS'gene_symbol' -p43 -S'ORAI1' -p44 -sS'hgvs_predicted_protein_consequence' -p45 -(dp46 -S'tlr' -p47 -S'NP_116179.2:p.(Pro44HisfsTer22)' -p48 -sS'slr' -p49 -S'NP_116179.2:p.(P44Hfs*22)' -p50 -ssS'submitted_variant' -p51 -S'NC_000012.11:g.122064776delG' -p52 -sS'genome_context_intronic_sequence' -p53 -g6 -sS'hgvs_lrg_variant' -p54 -g6 -sS'hgvs_transcript_variant' -p55 -S'NM_032790.3:c.128_129insCCACC' -p56 -sS'hgvs_refseqgene_variant' -p57 -g6 -sS'primary_assembly_loci' -p58 -(dp59 -S'hg19' -p60 -(dp61 -g21 -S'NC_000012.11:g.122064776del' -p62 -sg23 -(dp63 -g25 -S'chr12' -p64 -sg27 -S'CG' -p65 -sg29 -S'122064775' -p66 -sg31 -g32 -sssS'hg38' -p67 -(dp68 -g21 -S'NC_000012.12:g.121626875_121626876insCCACC' -p69 -sg23 -(dp70 -g25 -g64 -sg27 -g32 -sg29 -S'121626873' -p71 -sg31 -VCCCCCA -p72 -sssS'grch37' -p73 -(dp74 -g21 -S'NC_000012.11:g.122064776del' -p75 -sg23 -(dp76 -g25 -S'12' -p77 -sg27 -S'CG' -p78 -sg29 -S'122064775' -p79 -sg31 -g32 -sssS'grch38' -p80 -(dp81 -g21 -S'NC_000012.12:g.121626875_121626876insCCACC' -p82 -sg23 -(dp83 -g25 -g77 -sg27 -g32 -sg29 -S'121626873' -p84 -sg31 -VCCCCCA -p85 -ssssS'reference_sequence_records' -p86 -(dp87 -S'protein' -p88 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p89 -sS'transcript' -p90 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p91 -sssS'metadata' -p92 -(dp93 -S'variantvalidator_hgvs_version' -p94 -S'1.1.3' -p95 -sS'uta_schema' -p96 -S'uta_20180821' -p97 -sS'seqrepo_db' -p98 -S'2018-08-21' -p99 -sS'variantvalidator_version' -p100 -S'v0.2' -p101 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant71.txt b/VariantValidator/testing/testOutputsMasterITS/variant71.txt deleted file mode 100644 index ef02ced7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant71.txt +++ /dev/null @@ -1,225 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.129_130insGCCACCG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'NC_000012.11:g.122064775 is one of 6 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p10 -aS'NC_000012.11:g.122064775 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'RefSeqGene record not available' -p14 -asS'refseqgene_context_intronic_sequence' -p15 -g6 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'grch37' -p19 -(dp20 -S'hgvs_genomic_description' -p21 -S'NW_004504303.2:g.302874dup' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG1595_PATCH' -p26 -sS'ref' -p27 -S'G' -p28 -sS'pos' -p29 -S'302874' -p30 -sS'alt' -p31 -S'GG' -p32 -sssa(dp33 -S'hg19' -p34 -(dp35 -g21 -S'NW_004504303.2:g.302874dup' -p36 -sg23 -(dp37 -g25 -S'NW_004504303.2' -p38 -sg27 -g28 -sg29 -S'302874' -p39 -sg31 -S'GG' -p40 -sssasS'transcript_description' -p41 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p42 -sS'gene_symbol' -p43 -S'ORAI1' -p44 -sS'hgvs_predicted_protein_consequence' -p45 -(dp46 -S'tlr' -p47 -S'NP_116179.2:p.(Pro44AlafsTer46)' -p48 -sS'slr' -p49 -S'NP_116179.2:p.(P44Afs*46)' -p50 -ssS'submitted_variant' -p51 -S'NC_000012.11:g.122064776dupG' -p52 -sS'genome_context_intronic_sequence' -p53 -g6 -sS'hgvs_lrg_variant' -p54 -g6 -sS'hgvs_transcript_variant' -p55 -S'NM_032790.3:c.129_130insGCCACCG' -p56 -sS'hgvs_refseqgene_variant' -p57 -g6 -sS'primary_assembly_loci' -p58 -(dp59 -S'hg19' -p60 -(dp61 -g21 -S'NC_000012.11:g.122064776dup' -p62 -sg23 -(dp63 -g25 -S'chr12' -p64 -sg27 -g28 -sg29 -S'122064776' -p65 -sg31 -S'GG' -p66 -sssS'hg38' -p67 -(dp68 -g21 -S'NC_000012.12:g.121626876_121626877insGCCACCG' -p69 -sg23 -(dp70 -g25 -g64 -sg27 -S'C' -p71 -sg29 -S'121626873' -p72 -sg31 -VCCCGGCCA -p73 -sssS'grch37' -p74 -(dp75 -g21 -S'NC_000012.11:g.122064776dup' -p76 -sg23 -(dp77 -g25 -S'12' -p78 -sg27 -g28 -sg29 -S'122064776' -p79 -sg31 -S'GG' -p80 -sssS'grch38' -p81 -(dp82 -g21 -S'NC_000012.12:g.121626876_121626877insGCCACCG' -p83 -sg23 -(dp84 -g25 -g78 -sg27 -g71 -sg29 -S'121626873' -p85 -sg31 -VCCCGGCCA -p86 -ssssS'reference_sequence_records' -p87 -(dp88 -S'protein' -p89 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p90 -sS'transcript' -p91 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p92 -sssS'metadata' -p93 -(dp94 -S'variantvalidator_hgvs_version' -p95 -S'1.1.3' -p96 -sS'uta_schema' -p97 -S'uta_20180821' -p98 -sS'seqrepo_db' -p99 -S'2018-08-21' -p100 -sS'variantvalidator_version' -p101 -S'v0.2' -p102 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant72.txt b/VariantValidator/testing/testOutputsMasterITS/variant72.txt deleted file mode 100644 index 0ab2ff7d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant72.txt +++ /dev/null @@ -1,223 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.129_130insTTTCCACCG' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'NC_000012.11:g.122064776 is one of 7 genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'grch37' -p18 -(dp19 -S'hgvs_genomic_description' -p20 -S'NW_004504303.2:g.302874_302875insTTT' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG1595_PATCH' -p25 -sS'ref' -p26 -S'G' -p27 -sS'pos' -p28 -S'302874' -p29 -sS'alt' -p30 -S'GTTT' -p31 -sssa(dp32 -S'hg19' -p33 -(dp34 -g20 -S'NW_004504303.2:g.302874_302875insTTT' -p35 -sg22 -(dp36 -g24 -S'NW_004504303.2' -p37 -sg26 -g27 -sg28 -S'302874' -p38 -sg30 -S'GTTT' -p39 -sssasS'transcript_description' -p40 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p41 -sS'gene_symbol' -p42 -S'ORAI1' -p43 -sS'hgvs_predicted_protein_consequence' -p44 -(dp45 -S'tlr' -p46 -S'NP_116179.2:p.(Pro43_Pro44insPheProPro)' -p47 -sS'slr' -p48 -S'NP_116179.2:p.(P43_P44insFPP)' -p49 -ssS'submitted_variant' -p50 -S'NC_000012.11:g.122064776_122064777insTTT' -p51 -sS'genome_context_intronic_sequence' -p52 -g6 -sS'hgvs_lrg_variant' -p53 -g6 -sS'hgvs_transcript_variant' -p54 -S'NM_032790.3:c.129_130insTTTCCACCG' -p55 -sS'hgvs_refseqgene_variant' -p56 -g6 -sS'primary_assembly_loci' -p57 -(dp58 -S'hg19' -p59 -(dp60 -g20 -S'NC_000012.11:g.122064776_122064777insTTT' -p61 -sg22 -(dp62 -g24 -S'chr12' -p63 -sg26 -g27 -sg28 -S'122064776' -p64 -sg30 -S'GTTT' -p65 -sssS'hg38' -p66 -(dp67 -g20 -S'NC_000012.12:g.121626876_121626877insTTTCCACCG' -p68 -sg22 -(dp69 -g24 -g63 -sg26 -S'C' -p70 -sg28 -S'121626873' -p71 -sg30 -VCCCGTTTCCA -p72 -sssS'grch37' -p73 -(dp74 -g20 -S'NC_000012.11:g.122064776_122064777insTTT' -p75 -sg22 -(dp76 -g24 -S'12' -p77 -sg26 -g27 -sg28 -S'122064776' -p78 -sg30 -S'GTTT' -p79 -sssS'grch38' -p80 -(dp81 -g20 -S'NC_000012.12:g.121626876_121626877insTTTCCACCG' -p82 -sg22 -(dp83 -g24 -g77 -sg26 -g70 -sg28 -S'121626873' -p84 -sg30 -VCCCGTTTCCA -p85 -ssssS'reference_sequence_records' -p86 -(dp87 -S'protein' -p88 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p89 -sS'transcript' -p90 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p91 -sssS'metadata' -p92 -(dp93 -S'variantvalidator_hgvs_version' -p94 -S'1.1.3' -p95 -sS'uta_schema' -p96 -S'uta_20180821' -p97 -sS'seqrepo_db' -p98 -S'2018-08-21' -p99 -sS'variantvalidator_version' -p100 -S'v0.2' -p101 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant73.txt b/VariantValidator/testing/testOutputsMasterITS/variant73.txt deleted file mode 100644 index e5812afa..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant73.txt +++ /dev/null @@ -1,223 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.125_126delinsGCCA' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'Genome position NC_000012.11:g.122064776 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'grch37' -p18 -(dp19 -S'hgvs_genomic_description' -p20 -S'NW_004504303.2:g.302870_302873del' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG1595_PATCH' -p25 -sS'ref' -p26 -S'GCCCC' -p27 -sS'pos' -p28 -S'302869' -p29 -sS'alt' -p30 -S'G' -p31 -sssa(dp32 -S'hg19' -p33 -(dp34 -g20 -S'NW_004504303.2:g.302870_302873del' -p35 -sg22 -(dp36 -g24 -S'NW_004504303.2' -p37 -sg26 -S'GCCCC' -p38 -sg28 -S'302869' -p39 -sg30 -g31 -sssasS'transcript_description' -p40 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p41 -sS'gene_symbol' -p42 -S'ORAI1' -p43 -sS'hgvs_predicted_protein_consequence' -p44 -(dp45 -S'tlr' -p46 -S'NP_116179.2:p.(Ala42GlyfsTer23)' -p47 -sS'slr' -p48 -S'NP_116179.2:p.(A42Gfs*23)' -p49 -ssS'submitted_variant' -p50 -S'NC_000012.11:g.122064772_122064775del' -p51 -sS'genome_context_intronic_sequence' -p52 -g6 -sS'hgvs_lrg_variant' -p53 -g6 -sS'hgvs_transcript_variant' -p54 -S'NM_032790.3:c.125_126delinsGCCA' -p55 -sS'hgvs_refseqgene_variant' -p56 -g6 -sS'primary_assembly_loci' -p57 -(dp58 -S'hg19' -p59 -(dp60 -g20 -S'NC_000012.11:g.122064772_122064775del' -p61 -sg22 -(dp62 -g24 -S'chr12' -p63 -sg26 -S'GCCCC' -p64 -sg28 -S'122064771' -p65 -sg30 -g31 -sssS'hg38' -p66 -(dp67 -g20 -S'NC_000012.12:g.121626867_121626873delinsGCCA' -p68 -sg22 -(dp69 -g24 -g63 -sg26 -S'CCCCGCC' -p70 -sg28 -S'121626867' -p71 -sg30 -S'GCCA' -p72 -sssS'grch37' -p73 -(dp74 -g20 -S'NC_000012.11:g.122064772_122064775del' -p75 -sg22 -(dp76 -g24 -S'12' -p77 -sg26 -S'GCCCC' -p78 -sg28 -S'122064771' -p79 -sg30 -g31 -sssS'grch38' -p80 -(dp81 -g20 -S'NC_000012.12:g.121626867_121626873delinsGCCA' -p82 -sg22 -(dp83 -g24 -g77 -sg26 -S'CCCCGCC' -p84 -sg28 -S'121626867' -p85 -sg30 -g72 -ssssS'reference_sequence_records' -p86 -(dp87 -S'protein' -p88 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p89 -sS'transcript' -p90 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p91 -sssS'metadata' -p92 -(dp93 -S'variantvalidator_hgvs_version' -p94 -S'1.1.3' -p95 -sS'uta_schema' -p96 -S'uta_20180821' -p97 -sS'seqrepo_db' -p98 -S'2018-08-21' -p99 -sS'variantvalidator_version' -p100 -S'v0.2' -p101 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant74.txt b/VariantValidator/testing/testOutputsMasterITS/variant74.txt deleted file mode 100644 index f6142300..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant74.txt +++ /dev/null @@ -1,228 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.128_129insCCCCGCCACC' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'Genome position NC_000012.11:g.122064782 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' -p10 -aS'Genome position NC_000012.11:g.122064776 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'RefSeqGene record not available' -p14 -asS'refseqgene_context_intronic_sequence' -p15 -g6 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'grch37' -p19 -(dp20 -S'hgvs_genomic_description' -p21 -S'NW_004504303.2:g.302870_302873dup' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG1595_PATCH' -p26 -sS'ref' -p27 -S'CCCC' -p28 -sS'pos' -p29 -S'302870' -p30 -sS'alt' -p31 -S'CCCCCCCC' -p32 -sssa(dp33 -S'hg19' -p34 -(dp35 -g21 -S'NW_004504303.2:g.302870_302873dup' -p36 -sg23 -(dp37 -g25 -S'NW_004504303.2' -p38 -sg27 -S'CCCC' -p39 -sg29 -S'302870' -p40 -sg31 -S'CCCCCCCC' -p41 -sssasS'transcript_description' -p42 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p43 -sS'gene_symbol' -p44 -S'ORAI1' -p45 -sS'hgvs_predicted_protein_consequence' -p46 -(dp47 -S'tlr' -p48 -S'NP_116179.2:p.(Pro45AlafsTer46)' -p49 -sS'slr' -p50 -S'NP_116179.2:p.(P45Afs*46)' -p51 -ssS'submitted_variant' -p52 -S'NC_000012.11:g.122064772_122064775dup' -p53 -sS'genome_context_intronic_sequence' -p54 -g6 -sS'hgvs_lrg_variant' -p55 -g6 -sS'hgvs_transcript_variant' -p56 -S'NM_032790.3:c.128_129insCCCCGCCACC' -p57 -sS'hgvs_refseqgene_variant' -p58 -g6 -sS'primary_assembly_loci' -p59 -(dp60 -S'hg19' -p61 -(dp62 -g21 -S'NC_000012.11:g.122064772_122064775dup' -p63 -sg23 -(dp64 -g25 -S'chr12' -p65 -sg27 -S'CCCC' -p66 -sg29 -S'122064772' -p67 -sg31 -S'CCCCCCCC' -p68 -sssS'hg38' -p69 -(dp70 -g21 -S'NC_000012.12:g.121626875_121626876insCCCCGCCACC' -p71 -sg23 -(dp72 -g25 -g65 -sg27 -S'C' -p73 -sg29 -S'121626873' -p74 -sg31 -VCCCCCCCGCCA -p75 -sssS'grch37' -p76 -(dp77 -g21 -S'NC_000012.11:g.122064772_122064775dup' -p78 -sg23 -(dp79 -g25 -S'12' -p80 -sg27 -S'CCCC' -p81 -sg29 -S'122064772' -p82 -sg31 -S'CCCCCCCC' -p83 -sssS'grch38' -p84 -(dp85 -g21 -S'NC_000012.12:g.121626875_121626876insCCCCGCCACC' -p86 -sg23 -(dp87 -g25 -g80 -sg27 -g73 -sg29 -S'121626873' -p88 -sg31 -VCCCCCCCGCCA -p89 -ssssS'reference_sequence_records' -p90 -(dp91 -S'protein' -p92 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p93 -sS'transcript' -p94 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p95 -sssS'metadata' -p96 -(dp97 -S'variantvalidator_hgvs_version' -p98 -S'1.1.3' -p99 -sS'uta_schema' -p100 -S'uta_20180821' -p101 -sS'seqrepo_db' -p102 -S'2018-08-21' -p103 -sS'variantvalidator_version' -p104 -S'v0.2' -p105 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant75.txt b/VariantValidator/testing/testOutputsMasterITS/variant75.txt deleted file mode 100644 index d43f63f8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant75.txt +++ /dev/null @@ -1,222 +0,0 @@ -(dp0 -S'NM_032790.3:c.126_127insTTTTCCGCCA' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p7 -aS'Genome position NC_000012.11:g.122064774 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'grch37' -p16 -(dp17 -S'hgvs_genomic_description' -p18 -S'NW_004504303.2:g.302871_302872insTTTT' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG1595_PATCH' -p23 -sS'ref' -p24 -S'C' -p25 -sS'pos' -p26 -S'302871' -p27 -sS'alt' -p28 -S'CTTTT' -p29 -sssa(dp30 -S'hg19' -p31 -(dp32 -g18 -S'NW_004504303.2:g.302871_302872insTTTT' -p33 -sg20 -(dp34 -g22 -S'NW_004504303.2' -p35 -sg24 -g25 -sg26 -S'302871' -p36 -sg28 -S'CTTTT' -p37 -sssasS'transcript_description' -p38 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p39 -sS'gene_symbol' -p40 -S'ORAI1' -p41 -sS'hgvs_predicted_protein_consequence' -p42 -(dp43 -S'tlr' -p44 -S'NP_116179.2:p.(Pro43PhefsTer48)' -p45 -sS'slr' -p46 -S'NP_116179.2:p.(P43Ffs*48)' -p47 -ssS'submitted_variant' -p48 -S'NC_000012.11:g.122064773_122064774insTTTT' -p49 -sS'genome_context_intronic_sequence' -p50 -g4 -sS'hgvs_lrg_variant' -p51 -g4 -sS'hgvs_transcript_variant' -p52 -S'NM_032790.3:c.126_127insTTTTCCGCCA' -p53 -sS'hgvs_refseqgene_variant' -p54 -g4 -sS'primary_assembly_loci' -p55 -(dp56 -S'hg19' -p57 -(dp58 -g18 -S'NC_000012.11:g.122064773_122064774insTTTT' -p59 -sg20 -(dp60 -g22 -S'chr12' -p61 -sg24 -g25 -sg26 -S'122064773' -p62 -sg28 -S'CTTTT' -p63 -sssS'hg38' -p64 -(dp65 -g18 -S'NC_000012.12:g.121626873_121626874insTTTTCCGCCA' -p66 -sg20 -(dp67 -g22 -g61 -sg24 -g25 -sg26 -S'121626873' -p68 -sg28 -VCTTTTCCGCCA -p69 -sssS'grch37' -p70 -(dp71 -g18 -S'NC_000012.11:g.122064773_122064774insTTTT' -p72 -sg20 -(dp73 -g22 -S'12' -p74 -sg24 -g25 -sg26 -S'122064773' -p75 -sg28 -S'CTTTT' -p76 -sssS'grch38' -p77 -(dp78 -g18 -S'NC_000012.12:g.121626873_121626874insTTTTCCGCCA' -p79 -sg20 -(dp80 -g22 -g74 -sg24 -g25 -sg26 -S'121626873' -p81 -sg28 -VCTTTTCCGCCA -p82 -ssssS'reference_sequence_records' -p83 -(dp84 -S'protein' -p85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p86 -sS'transcript' -p87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p88 -sssS'flag' -p89 -S'gene_variant' -p90 -sS'metadata' -p91 -(dp92 -S'variantvalidator_hgvs_version' -p93 -S'1.1.3' -p94 -sS'uta_schema' -p95 -S'uta_20180821' -p96 -sS'seqrepo_db' -p97 -S'2018-08-21' -p98 -sS'variantvalidator_version' -p99 -S'v0.2' -p100 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant76.txt b/VariantValidator/testing/testOutputsMasterITS/variant76.txt deleted file mode 100644 index 1892999a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant76.txt +++ /dev/null @@ -1,222 +0,0 @@ -(dp0 -S'NM_032790.3:c.126C>A' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p7 -aS'Genome position NC_000012.11:g.122064778 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'grch37' -p16 -(dp17 -S'hgvs_genomic_description' -p18 -S'NW_004504303.2:g.302871_302876del' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG1595_PATCH' -p23 -sS'ref' -p24 -S'GCCCCGC' -p25 -sS'pos' -p26 -S'302869' -p27 -sS'alt' -p28 -S'G' -p29 -sssa(dp30 -S'hg19' -p31 -(dp32 -g18 -S'NW_004504303.2:g.302871_302876del' -p33 -sg20 -(dp34 -g22 -S'NW_004504303.2' -p35 -sg24 -S'GCCCCGC' -p36 -sg26 -S'302869' -p37 -sg28 -g29 -sssasS'transcript_description' -p38 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p39 -sS'gene_symbol' -p40 -S'ORAI1' -p41 -sS'hgvs_predicted_protein_consequence' -p42 -(dp43 -S'tlr' -p44 -S'NP_116179.2:p.(Ala42=)' -p45 -sS'slr' -p46 -S'NP_116179.2:p.(A42=)' -p47 -ssS'submitted_variant' -p48 -S'NC_000012.11:g.122064772_122064777del' -p49 -sS'genome_context_intronic_sequence' -p50 -g4 -sS'hgvs_lrg_variant' -p51 -g4 -sS'hgvs_transcript_variant' -p52 -S'NM_032790.3:c.126C>A' -p53 -sS'hgvs_refseqgene_variant' -p54 -g4 -sS'primary_assembly_loci' -p55 -(dp56 -S'hg19' -p57 -(dp58 -g18 -S'NC_000012.11:g.122064773_122064778del' -p59 -sg20 -(dp60 -g22 -S'chr12' -p61 -sg24 -S'GCCCCGC' -p62 -sg26 -S'122064771' -p63 -sg28 -g29 -sssS'hg38' -p64 -(dp65 -g18 -S'NC_000012.12:g.121626873C>A' -p66 -sg20 -(dp67 -g22 -g61 -sg24 -VC -p68 -sg26 -S'121626873' -p69 -sg28 -VA -p70 -sssS'grch37' -p71 -(dp72 -g18 -S'NC_000012.11:g.122064773_122064778del' -p73 -sg20 -(dp74 -g22 -S'12' -p75 -sg24 -S'GCCCCGC' -p76 -sg26 -S'122064771' -p77 -sg28 -g29 -sssS'grch38' -p78 -(dp79 -g18 -S'NC_000012.12:g.121626873C>A' -p80 -sg20 -(dp81 -g22 -g75 -sg24 -g68 -sg26 -S'121626873' -p82 -sg28 -g70 -ssssS'reference_sequence_records' -p83 -(dp84 -S'protein' -p85 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p86 -sS'transcript' -p87 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p88 -sssS'flag' -p89 -S'gene_variant' -p90 -sS'metadata' -p91 -(dp92 -S'variantvalidator_hgvs_version' -p93 -S'1.1.3' -p94 -sS'uta_schema' -p95 -S'uta_20180821' -p96 -sS'seqrepo_db' -p97 -S'2018-08-21' -p98 -sS'variantvalidator_version' -p99 -S'v0.2' -p100 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant77.txt b/VariantValidator/testing/testOutputsMasterITS/variant77.txt deleted file mode 100644 index d42f6ac0..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant77.txt +++ /dev/null @@ -1,228 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.131_132insCCCGCCACCGCC' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'Genome position NC_000012.11:g.122064778 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' -p10 -aS'Genome position NC_000012.11:g.122064784 aligns within a 6-bp gap in transcript NM_032790.3 between positions c.126_127' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'RefSeqGene record not available' -p14 -asS'refseqgene_context_intronic_sequence' -p15 -g6 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'grch37' -p19 -(dp20 -S'hgvs_genomic_description' -p21 -S'NW_004504303.2:g.302871_302876dup' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG1595_PATCH' -p26 -sS'ref' -p27 -S'CCCCGC' -p28 -sS'pos' -p29 -S'302870' -p30 -sS'alt' -p31 -S'CCCCGCCCCCGC' -p32 -sssa(dp33 -S'hg19' -p34 -(dp35 -g21 -S'NW_004504303.2:g.302871_302876dup' -p36 -sg23 -(dp37 -g25 -S'NW_004504303.2' -p38 -sg27 -S'CCCCGC' -p39 -sg29 -S'302870' -p40 -sg31 -S'CCCCGCCCCCGC' -p41 -sssasS'transcript_description' -p42 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p43 -sS'gene_symbol' -p44 -S'ORAI1' -p45 -sS'hgvs_predicted_protein_consequence' -p46 -(dp47 -S'tlr' -p48 -S'NP_116179.2:p.(Pro44_Pro47dup)' -p49 -sS'slr' -p50 -S'NP_116179.2:p.(P44_P47dup)' -p51 -ssS'submitted_variant' -p52 -S'NC_000012.11:g.122064772_122064777dup' -p53 -sS'genome_context_intronic_sequence' -p54 -g6 -sS'hgvs_lrg_variant' -p55 -g6 -sS'hgvs_transcript_variant' -p56 -S'NM_032790.3:c.131_132insCCCGCCACCGCC' -p57 -sS'hgvs_refseqgene_variant' -p58 -g6 -sS'primary_assembly_loci' -p59 -(dp60 -S'hg19' -p61 -(dp62 -g21 -S'NC_000012.11:g.122064773_122064778dup' -p63 -sg23 -(dp64 -g25 -S'chr12' -p65 -sg27 -S'CCCCGC' -p66 -sg29 -S'122064772' -p67 -sg31 -S'CCCCGCCCCCGC' -p68 -sssS'hg38' -p69 -(dp70 -g21 -S'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC' -p71 -sg23 -(dp72 -g25 -g65 -sg27 -S'C' -p73 -sg29 -S'121626873' -p74 -sg31 -VCCCGCCCCCGCCA -p75 -sssS'grch37' -p76 -(dp77 -g21 -S'NC_000012.11:g.122064773_122064778dup' -p78 -sg23 -(dp79 -g25 -S'12' -p80 -sg27 -S'CCCCGC' -p81 -sg29 -S'122064772' -p82 -sg31 -S'CCCCGCCCCCGC' -p83 -sssS'grch38' -p84 -(dp85 -g21 -S'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC' -p86 -sg23 -(dp87 -g25 -g80 -sg27 -g73 -sg29 -S'121626873' -p88 -sg31 -VCCCGCCCCCGCCA -p89 -ssssS'reference_sequence_records' -p90 -(dp91 -S'protein' -p92 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p93 -sS'transcript' -p94 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p95 -sssS'metadata' -p96 -(dp97 -S'variantvalidator_hgvs_version' -p98 -S'1.1.3' -p99 -sS'uta_schema' -p100 -S'uta_20180821' -p101 -sS'seqrepo_db' -p102 -S'2018-08-21' -p103 -sS'variantvalidator_version' -p104 -S'v0.2' -p105 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant78.txt b/VariantValidator/testing/testOutputsMasterITS/variant78.txt deleted file mode 100644 index 07aa6f8d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant78.txt +++ /dev/null @@ -1,226 +0,0 @@ -(dp0 -S'NM_032790.3:c.135_136insACCGCCACCG' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p7 -aS'NC_000012.11:g.122064778 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_032790.3 between positions c.126_127' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'grch37' -p16 -(dp17 -S'hgvs_genomic_description' -p18 -S'NW_004504303.2:g.302877_302880dup' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG1595_PATCH' -p23 -sS'ref' -p24 -S'ACCG' -p25 -sS'pos' -p26 -S'302877' -p27 -sS'alt' -p28 -S'ACCGACCG' -p29 -sssa(dp30 -S'hg19' -p31 -(dp32 -g18 -S'NW_004504303.2:g.302877_302880dup' -p33 -sg20 -(dp34 -g22 -S'NW_004504303.2' -p35 -sg24 -S'ACCG' -p36 -sg26 -S'302877' -p37 -sg28 -S'ACCGACCG' -p38 -sssasS'transcript_description' -p39 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p40 -sS'gene_symbol' -p41 -S'ORAI1' -p42 -sS'hgvs_predicted_protein_consequence' -p43 -(dp44 -S'tlr' -p45 -S'NP_116179.2:p.(Pro46ThrfsTer45)' -p46 -sS'slr' -p47 -S'NP_116179.2:p.(P46Tfs*45)' -p48 -ssS'submitted_variant' -p49 -S'NC_000012.11:g.122064779_122064782dup' -p50 -sS'genome_context_intronic_sequence' -p51 -g4 -sS'hgvs_lrg_variant' -p52 -g4 -sS'hgvs_transcript_variant' -p53 -S'NM_032790.3:c.135_136insACCGCCACCG' -p54 -sS'hgvs_refseqgene_variant' -p55 -g4 -sS'primary_assembly_loci' -p56 -(dp57 -S'hg19' -p58 -(dp59 -g18 -S'NC_000012.11:g.122064779_122064782dup' -p60 -sg20 -(dp61 -g22 -S'chr12' -p62 -sg24 -S'ACCG' -p63 -sg26 -S'122064779' -p64 -sg28 -S'ACCGACCG' -p65 -sssS'hg38' -p66 -(dp67 -g18 -S'NC_000012.12:g.121626882_121626883insACCGCCACCG' -p68 -sg20 -(dp69 -g22 -g62 -sg24 -S'C' -p70 -sg26 -S'121626873' -p71 -sg28 -VCCCGCCACCGA -p72 -sssS'grch37' -p73 -(dp74 -g18 -S'NC_000012.11:g.122064779_122064782dup' -p75 -sg20 -(dp76 -g22 -S'12' -p77 -sg24 -S'ACCG' -p78 -sg26 -S'122064779' -p79 -sg28 -S'ACCGACCG' -p80 -sssS'grch38' -p81 -(dp82 -g18 -S'NC_000012.12:g.121626882_121626883insACCGCCACCG' -p83 -sg20 -(dp84 -g22 -g77 -sg24 -g70 -sg26 -S'121626873' -p85 -sg28 -VCCCGCCACCGA -p86 -ssssS'reference_sequence_records' -p87 -(dp88 -S'protein' -p89 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p90 -sS'transcript' -p91 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p92 -sssS'flag' -p93 -S'gene_variant' -p94 -sS'metadata' -p95 -(dp96 -S'variantvalidator_hgvs_version' -p97 -S'1.1.3' -p98 -sS'uta_schema' -p99 -S'uta_20180821' -p100 -sS'seqrepo_db' -p101 -S'2018-08-21' -p102 -sS'variantvalidator_version' -p103 -S'v0.2' -p104 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant79.txt b/VariantValidator/testing/testOutputsMasterITS/variant79.txt deleted file mode 100644 index b484c2d4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant79.txt +++ /dev/null @@ -1,223 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_032790.3:c.126_127insA' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_032790.3 with genome build GRCh37' -p9 -aS'Genome position NC_000012.11:g.122064776 aligns within a Requires Analysis-bp gap in transcript NM_032790.3 between positions c.126_127' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'grch37' -p18 -(dp19 -S'hgvs_genomic_description' -p20 -S'NW_004504303.2:g.302872_302876del' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG1595_PATCH' -p25 -sS'ref' -p26 -S'GGCCCC' -p27 -sS'pos' -p28 -S'302868' -p29 -sS'alt' -p30 -S'G' -p31 -sssa(dp32 -S'hg19' -p33 -(dp34 -g20 -S'NW_004504303.2:g.302872_302876del' -p35 -sg22 -(dp36 -g24 -S'NW_004504303.2' -p37 -sg26 -S'GGCCCC' -p38 -sg28 -S'302868' -p39 -sg30 -g31 -sssasS'transcript_description' -p40 -VHomo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA -p41 -sS'gene_symbol' -p42 -S'ORAI1' -p43 -sS'hgvs_predicted_protein_consequence' -p44 -(dp45 -S'tlr' -p46 -S'NP_116179.2:p.(Pro43ThrfsTer45)' -p47 -sS'slr' -p48 -S'NP_116179.2:p.(P43Tfs*45)' -p49 -ssS'submitted_variant' -p50 -S'NC_000012.11:g.122064772_122064782del' -p51 -sS'genome_context_intronic_sequence' -p52 -g6 -sS'hgvs_lrg_variant' -p53 -g6 -sS'hgvs_transcript_variant' -p54 -S'NM_032790.3:c.126_127insA' -p55 -sS'hgvs_refseqgene_variant' -p56 -g6 -sS'primary_assembly_loci' -p57 -(dp58 -S'hg19' -p59 -(dp60 -g20 -S'NC_000012.11:g.122064774_122064778del' -p61 -sg22 -(dp62 -g24 -S'chr12' -p63 -sg26 -S'GGCCCC' -p64 -sg28 -S'122064770' -p65 -sg30 -g31 -sssS'hg38' -p66 -(dp67 -g20 -S'NC_000012.12:g.121626873_121626874insA' -p68 -sg22 -(dp69 -g24 -g63 -sg26 -S'C' -p70 -sg28 -S'121626873' -p71 -sg30 -VCA -p72 -sssS'grch37' -p73 -(dp74 -g20 -S'NC_000012.11:g.122064774_122064778del' -p75 -sg22 -(dp76 -g24 -S'12' -p77 -sg26 -S'GGCCCC' -p78 -sg28 -S'122064770' -p79 -sg30 -g31 -sssS'grch38' -p80 -(dp81 -g20 -S'NC_000012.12:g.121626873_121626874insA' -p82 -sg22 -(dp83 -g24 -g77 -sg26 -g70 -sg28 -S'121626873' -p84 -sg30 -VCA -p85 -ssssS'reference_sequence_records' -p86 -(dp87 -S'protein' -p88 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2' -p89 -sS'transcript' -p90 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3' -p91 -sssS'metadata' -p92 -(dp93 -S'variantvalidator_hgvs_version' -p94 -S'1.1.3' -p95 -sS'uta_schema' -p96 -S'uta_20180821' -p97 -sS'seqrepo_db' -p98 -S'2018-08-21' -p99 -sS'variantvalidator_version' -p100 -S'v0.2' -p101 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant8.txt b/VariantValidator/testing/testOutputsMasterITS/variant8.txt deleted file mode 100644 index 836f91e7..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant8.txt +++ /dev/null @@ -1,177 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_000088.3:c.589_590delinsCT' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_000088.3:c.589GG>CT automapped to NM_000088.3:c.589_590delGGinsCT' -p9 -aS'RefSeqGene record not available' -p10 -asS'refseqgene_context_intronic_sequence' -p11 -g6 -sS'alt_genomic_loci' -p12 -(lp13 -sS'transcript_description' -p14 -VHomo sapiens collagen type I alpha 1 chain (COL1A1), mRNA -p15 -sS'gene_symbol' -p16 -S'COL1A1' -p17 -sS'hgvs_predicted_protein_consequence' -p18 -(dp19 -S'tlr' -p20 -S'NP_000079.2:p.(Gly197Leu)' -p21 -sS'slr' -p22 -S'NP_000079.2:p.(G197L)' -p23 -ssS'submitted_variant' -p24 -S'NM_000088.3:c.589GG>CT' -p25 -sS'genome_context_intronic_sequence' -p26 -g6 -sS'hgvs_lrg_variant' -p27 -g6 -sS'hgvs_transcript_variant' -p28 -S'NM_000088.3:c.589_590delinsCT' -p29 -sS'hgvs_refseqgene_variant' -p30 -g6 -sS'primary_assembly_loci' -p31 -(dp32 -S'hg19' -p33 -(dp34 -S'hgvs_genomic_description' -p35 -S'NC_000017.10:g.48275362_48275363delinsAG' -p36 -sS'vcf' -p37 -(dp38 -S'chr' -p39 -S'chr17' -p40 -sS'ref' -p41 -S'CC' -p42 -sS'pos' -p43 -S'48275362' -p44 -sS'alt' -p45 -VAG -p46 -sssS'hg38' -p47 -(dp48 -g35 -S'NC_000017.11:g.50198001_50198002delinsAG' -p49 -sg37 -(dp50 -g39 -g40 -sg41 -S'CC' -p51 -sg43 -S'50198001' -p52 -sg45 -VAG -p53 -sssS'grch37' -p54 -(dp55 -g35 -S'NC_000017.10:g.48275362_48275363delinsAG' -p56 -sg37 -(dp57 -g39 -S'17' -p58 -sg41 -S'CC' -p59 -sg43 -S'48275362' -p60 -sg45 -g46 -sssS'grch38' -p61 -(dp62 -g35 -S'NC_000017.11:g.50198001_50198002delinsAG' -p63 -sg37 -(dp64 -g39 -g58 -sg41 -S'CC' -p65 -sg43 -S'50198001' -p66 -sg45 -g53 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3' -p72 -sssS'metadata' -p73 -(dp74 -S'variantvalidator_hgvs_version' -p75 -S'1.1.3' -p76 -sS'uta_schema' -p77 -S'uta_20180821' -p78 -sS'seqrepo_db' -p79 -S'2018-08-21' -p80 -sS'variantvalidator_version' -p81 -S'v0.2' -p82 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant80.txt b/VariantValidator/testing/testOutputsMasterITS/variant80.txt deleted file mode 100644 index 725ebe82..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant80.txt +++ /dev/null @@ -1,882 +0,0 @@ -(dp0 -S'NM_021088.3:c.471_473dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_021088.3 with genome build GRCh37' -p7 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_021088.3' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA -p16 -sS'gene_symbol' -p17 -S'ZNF2' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_066574.2:p.(Arg159dup)' -p22 -sS'slr' -p23 -S'NP_066574.2:p.(R159dup)' -p24 -ssS'submitted_variant' -p25 -S'NC_000002.11:g.95847041_95847043GCG=' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_021088.3:c.471_473dup' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000002.11:g.95847037_95847050=' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr2' -p41 -sS'ref' -p42 -S'GCTTGCGGCGGCGA' -p43 -sS'pos' -p44 -S'95847037' -p45 -sS'alt' -p46 -g43 -sssS'hg38' -p47 -(dp48 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p49 -sg38 -(dp50 -g40 -g41 -sg42 -S'GCG' -p51 -sg44 -S'95181296' -p52 -sg46 -VGCGGCG -p53 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000002.11:g.95847037_95847050=' -p56 -sg38 -(dp57 -g40 -S'2' -p58 -sg42 -g43 -sg44 -S'95847037' -p59 -sg46 -g43 -sssS'grch38' -p60 -(dp61 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p62 -sg38 -(dp63 -g40 -g58 -sg42 -S'GCG' -p64 -sg44 -S'95181296' -p65 -sg46 -VGCGGCG -p66 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.3' -p72 -sssS'NM_001291605.1:c.510_512dup' -p73 -(dp74 -g3 -g4 -sg5 -(lp75 -S'The displayed variants may be artefacts of aligning NM_001291605.1 with genome build GRCh37' -p76 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001291605.1' -p77 -aS'Caution should be used when reporting the displayed variant descriptions' -p78 -aS'If you are unsure, please contact admin' -p79 -aS'RefSeqGene record not available' -p80 -asg12 -g4 -sg13 -(lp81 -sg15 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 5, mRNA -p82 -sg17 -S'ZNF2' -p83 -sg19 -(dp84 -g21 -S'NP_001278534.1:p.(Arg172dup)' -p85 -sg23 -S'NP_001278534.1:p.(R172dup)' -p86 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001291605.1:c.510_512dup' -p87 -sg31 -g4 -sg32 -(dp88 -S'hg19' -p89 -(dp90 -g36 -S'NC_000002.11:g.95847037_95847050=' -p91 -sg38 -(dp92 -g40 -g41 -sg42 -g43 -sg44 -S'95847037' -p93 -sg46 -g43 -sssg47 -(dp94 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p95 -sg38 -(dp96 -g40 -g41 -sg42 -S'GCG' -p97 -sg44 -S'95181296' -p98 -sg46 -VGCGGCG -p99 -sssS'grch37' -p100 -(dp101 -g36 -S'NC_000002.11:g.95847037_95847050=' -p102 -sg38 -(dp103 -g40 -g58 -sg42 -g43 -sg44 -S'95847037' -p104 -sg46 -g43 -sssS'grch38' -p105 -(dp106 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p107 -sg38 -(dp108 -g40 -g58 -sg42 -S'GCG' -p109 -sg44 -S'95181296' -p110 -sg46 -VGCGGCG -p111 -ssssg67 -(dp112 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278534.1' -p113 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291605.1' -p114 -sssS'NM_001017396.2:c.345_347dup' -p115 -(dp116 -g3 -g4 -sg5 -(lp117 -S'The displayed variants may be artefacts of aligning NM_001017396.2 with genome build GRCh37' -p118 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001017396.2' -p119 -aS'Caution should be used when reporting the displayed variant descriptions' -p120 -aS'If you are unsure, please contact admin' -p121 -aS'RefSeqGene record not available' -p122 -asg12 -g4 -sg13 -(lp123 -sg15 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA -p124 -sg17 -S'ZNF2' -p125 -sg19 -(dp126 -g21 -S'NP_001017396.1:p.(Arg117dup)' -p127 -sg23 -S'NP_001017396.1:p.(R117dup)' -p128 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001017396.2:c.345_347dup' -p129 -sg31 -g4 -sg32 -(dp130 -S'hg19' -p131 -(dp132 -g36 -S'NC_000002.11:g.95847037_95847050=' -p133 -sg38 -(dp134 -g40 -g41 -sg42 -g43 -sg44 -S'95847037' -p135 -sg46 -g43 -sssg47 -(dp136 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p137 -sg38 -(dp138 -g40 -g41 -sg42 -S'GCG' -p139 -sg44 -S'95181296' -p140 -sg46 -VGCGGCG -p141 -sssS'grch37' -p142 -(dp143 -g36 -S'NC_000002.11:g.95847037_95847050=' -p144 -sg38 -(dp145 -g40 -g58 -sg42 -g43 -sg44 -S'95847037' -p146 -sg46 -g43 -sssS'grch38' -p147 -(dp148 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p149 -sg38 -(dp150 -g40 -g58 -sg42 -S'GCG' -p151 -sg44 -S'95181296' -p152 -sg46 -VGCGGCG -p153 -ssssg67 -(dp154 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1' -p155 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.2' -p156 -sssS'NM_001282398.1:c.357_359dup' -p157 -(dp158 -g3 -g4 -sg5 -(lp159 -S'The displayed variants may be artefacts of aligning NM_001282398.1 with genome build GRCh37' -p160 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001282398.1' -p161 -aS'Caution should be used when reporting the displayed variant descriptions' -p162 -aS'If you are unsure, please contact admin' -p163 -aS'RefSeqGene record not available' -p164 -asg12 -g4 -sg13 -(lp165 -sg15 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 3, mRNA -p166 -sg17 -S'ZNF2' -p167 -sg19 -(dp168 -g21 -S'NP_001269327.1:p.(Arg121dup)' -p169 -sg23 -S'NP_001269327.1:p.(R121dup)' -p170 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001282398.1:c.357_359dup' -p171 -sg31 -g4 -sg32 -(dp172 -S'hg19' -p173 -(dp174 -g36 -S'NC_000002.11:g.95847037_95847050=' -p175 -sg38 -(dp176 -g40 -g41 -sg42 -g43 -sg44 -S'95847037' -p177 -sg46 -g43 -sssg47 -(dp178 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p179 -sg38 -(dp180 -g40 -g41 -sg42 -S'GCG' -p181 -sg44 -S'95181296' -p182 -sg46 -VGCGGCG -p183 -sssS'grch37' -p184 -(dp185 -g36 -S'NC_000002.11:g.95847037_95847050=' -p186 -sg38 -(dp187 -g40 -g58 -sg42 -g43 -sg44 -S'95847037' -p188 -sg46 -g43 -sssS'grch38' -p189 -(dp190 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p191 -sg38 -(dp192 -g40 -g58 -sg42 -S'GCG' -p193 -sg44 -S'95181296' -p194 -sg46 -VGCGGCG -p195 -ssssg67 -(dp196 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269327.1' -p197 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282398.1' -p198 -sssS'flag' -p199 -S'gene_variant' -p200 -sS'NM_001291604.1:c.231_233dup' -p201 -(dp202 -g3 -g4 -sg5 -(lp203 -S'The displayed variants may be artefacts of aligning NM_001291604.1 with genome build GRCh37' -p204 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001291604.1' -p205 -aS'Caution should be used when reporting the displayed variant descriptions' -p206 -aS'If you are unsure, please contact admin' -p207 -aS'RefSeqGene record not available' -p208 -asg12 -g4 -sg13 -(lp209 -sg15 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 4, mRNA -p210 -sg17 -S'ZNF2' -p211 -sg19 -(dp212 -g21 -S'NP_001278533.1:p.(Arg79dup)' -p213 -sg23 -S'NP_001278533.1:p.(R79dup)' -p214 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001291604.1:c.231_233dup' -p215 -sg31 -g4 -sg32 -(dp216 -S'hg19' -p217 -(dp218 -g36 -S'NC_000002.11:g.95847037_95847050=' -p219 -sg38 -(dp220 -g40 -g41 -sg42 -g43 -sg44 -S'95847037' -p221 -sg46 -g43 -sssg47 -(dp222 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p223 -sg38 -(dp224 -g40 -g41 -sg42 -S'GCG' -p225 -sg44 -S'95181296' -p226 -sg46 -VGCGGCG -p227 -sssS'grch37' -p228 -(dp229 -g36 -S'NC_000002.11:g.95847037_95847050=' -p230 -sg38 -(dp231 -g40 -g58 -sg42 -g43 -sg44 -S'95847037' -p232 -sg46 -g43 -sssS'grch38' -p233 -(dp234 -g36 -S'NC_000002.12:g.95181299_95181301dup' -p235 -sg38 -(dp236 -g40 -g58 -sg42 -S'GCG' -p237 -sg44 -S'95181296' -p238 -sg46 -VGCGGCG -p239 -ssssg67 -(dp240 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278533.1' -p241 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291604.1' -p242 -sssS'NM_021088.2:c.471_473dup' -p243 -(dp244 -g3 -g4 -sg5 -(lp245 -S'The displayed variants may be artefacts of aligning NM_021088.2 with genome build GRCh37' -p246 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_021088.2' -p247 -aS'Caution should be used when reporting the displayed variant descriptions' -p248 -aS'If you are unsure, please contact admin' -p249 -aS'A more recent version of the selected reference sequence NM_021088.2 is available (NM_021088.3)' -p250 -aS'NM_021088.3:c.471_473dupGCG MUST be fully validated prior to use in reports' -p251 -aS'select_variants=NM_021088.3:c.471_473dup' -p252 -aS'RefSeqGene record not available' -p253 -asg12 -g4 -sg13 -(lp254 -sg15 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA -p255 -sg17 -S'ZNF2' -p256 -sg19 -(dp257 -g21 -S'NP_066574.2:p.(Arg159dup)' -p258 -sg23 -S'NP_066574.2:p.(R159dup)' -p259 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_021088.2:c.471_473dup' -p260 -sg31 -g4 -sg32 -(dp261 -S'hg19' -p262 -(dp263 -g36 -S'NC_000002.11:g.95847037_95847050=' -p264 -sg38 -(dp265 -g40 -g41 -sg42 -g43 -sg44 -S'95847037' -p266 -sg46 -g43 -sssS'grch37' -p267 -(dp268 -g36 -S'NC_000002.11:g.95847037_95847050=' -p269 -sg38 -(dp270 -g40 -g58 -sg42 -g43 -sg44 -S'95847037' -p271 -sg46 -g43 -ssssg67 -(dp272 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2' -p273 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.2' -p274 -sssS'NM_001017396.1:c.345_347dup' -p275 -(dp276 -g3 -g4 -sg5 -(lp277 -S'The displayed variants may be artefacts of aligning NM_001017396.1 with genome build GRCh37' -p278 -aS'NC_000002.11:g.95847040_95847043 contains 3 genomic base(s) that fail to align to transcript NM_001017396.1' -p279 -aS'Caution should be used when reporting the displayed variant descriptions' -p280 -aS'If you are unsure, please contact admin' -p281 -aS'A more recent version of the selected reference sequence NM_001017396.1 is available (NM_001017396.2)' -p282 -aS'NM_001017396.2:c.345_347dupGCG MUST be fully validated prior to use in reports' -p283 -aS'select_variants=NM_001017396.2:c.345_347dup' -p284 -aS'RefSeqGene record not available' -p285 -asg12 -g4 -sg13 -(lp286 -sg15 -VHomo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA -p287 -sg17 -S'ZNF2' -p288 -sg19 -(dp289 -g21 -S'NP_001017396.1:p.(Arg117dup)' -p290 -sg23 -S'NP_001017396.1:p.(R117dup)' -p291 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001017396.1:c.345_347dup' -p292 -sg31 -g4 -sg32 -(dp293 -S'hg19' -p294 -(dp295 -g36 -S'NC_000002.11:g.95847037_95847050=' -p296 -sg38 -(dp297 -g40 -g41 -sg42 -g43 -sg44 -S'95847037' -p298 -sg46 -g43 -sssS'grch37' -p299 -(dp300 -g36 -S'NC_000002.11:g.95847037_95847050=' -p301 -sg38 -(dp302 -g40 -g58 -sg42 -g43 -sg44 -S'95847037' -p303 -sg46 -g43 -ssssg67 -(dp304 -g69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1' -p305 -sg71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.1' -p306 -sssS'metadata' -p307 -(dp308 -S'variantvalidator_hgvs_version' -p309 -S'1.1.3' -p310 -sS'uta_schema' -p311 -S'uta_20180821' -p312 -sS'seqrepo_db' -p313 -S'2018-08-21' -p314 -sS'variantvalidator_version' -p315 -S'v0.2' -p316 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant81.txt b/VariantValidator/testing/testOutputsMasterITS/variant81.txt deleted file mode 100644 index cceaacd1..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant81.txt +++ /dev/null @@ -1,628 +0,0 @@ -(dp0 -S'NM_001083585.1:c.*344_*368dup' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_001083585.1 with genome build GRCh37' -p7 -aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_001083585.1' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'A more recent version of the selected reference sequence NM_001083585.1 is available (NM_001083585.2)' -p11 -aS'NM_001083585.2:c.*344_*368dupTAGTGTTTGGAATTTTCTGTTCATA MUST be fully validated prior to use in reports' -p12 -aS'select_variants=NM_001083585.2:c.*344_*368dup' -p13 -aS'RefSeqGene record not available' -p14 -asS'refseqgene_context_intronic_sequence' -p15 -g4 -sS'alt_genomic_loci' -p16 -(lp17 -sS'transcript_description' -p18 -VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA -p19 -sS'gene_symbol' -p20 -S'RABEP1' -p21 -sS'hgvs_predicted_protein_consequence' -p22 -(dp23 -S'tlr' -p24 -S'NP_001077054.1:p.?' -p25 -sS'slr' -p26 -S'NP_001077054.1:p.?' -p27 -ssS'submitted_variant' -p28 -S'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' -p29 -sS'genome_context_intronic_sequence' -p30 -g4 -sS'hgvs_lrg_variant' -p31 -g4 -sS'hgvs_transcript_variant' -p32 -S'NM_001083585.1:c.*344_*368dup' -p33 -sS'hgvs_refseqgene_variant' -p34 -g4 -sS'primary_assembly_loci' -p35 -(dp36 -S'hg19' -p37 -(dp38 -S'hgvs_genomic_description' -p39 -S'NC_000017.10:g.5286857_5286915=' -p40 -sS'vcf' -p41 -(dp42 -S'chr' -p43 -S'chr17' -p44 -sS'ref' -p45 -S'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA' -p46 -sS'pos' -p47 -S'5286857' -p48 -sS'alt' -p49 -g46 -sssS'grch37' -p50 -(dp51 -g39 -S'NC_000017.10:g.5286857_5286915=' -p52 -sg41 -(dp53 -g43 -S'17' -p54 -sg45 -g46 -sg47 -S'5286857' -p55 -sg49 -g46 -ssssS'reference_sequence_records' -p56 -(dp57 -S'protein' -p58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001077054.1' -p59 -sS'transcript' -p60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001083585.1' -p61 -sssS'NM_004703.5:c.*344_*368dup' -p62 -(dp63 -g3 -g4 -sg5 -(lp64 -S'The displayed variants may be artefacts of aligning NM_004703.5 with genome build GRCh37' -p65 -aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_004703.5' -p66 -aS'Caution should be used when reporting the displayed variant descriptions' -p67 -aS'If you are unsure, please contact admin' -p68 -aS'RefSeqGene record not available' -p69 -asg15 -g4 -sg16 -(lp70 -sg18 -VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA -p71 -sg20 -S'RABEP1' -p72 -sg22 -(dp73 -g24 -S'NP_004694.2:p.?' -p74 -sg26 -S'NP_004694.2:p.?' -p75 -ssg28 -g29 -sg30 -g4 -sg31 -g4 -sg32 -S'NM_004703.5:c.*344_*368dup' -p76 -sg34 -g4 -sg35 -(dp77 -S'hg19' -p78 -(dp79 -g39 -S'NC_000017.10:g.5286857_5286915=' -p80 -sg41 -(dp81 -g43 -g44 -sg45 -g46 -sg47 -S'5286857' -p82 -sg49 -g46 -sssS'hg38' -p83 -(dp84 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p85 -sg41 -(dp86 -g43 -g44 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p87 -sg47 -S'5383567' -p88 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p89 -sssS'grch37' -p90 -(dp91 -g39 -S'NC_000017.10:g.5286857_5286915=' -p92 -sg41 -(dp93 -g43 -g54 -sg45 -g46 -sg47 -S'5286857' -p94 -sg49 -g46 -sssS'grch38' -p95 -(dp96 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p97 -sg41 -(dp98 -g43 -g54 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p99 -sg47 -S'5383567' -p100 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p101 -ssssg56 -(dp102 -g58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2' -p103 -sg60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.5' -p104 -sssS'NM_004703.4:c.*344_*368dup' -p105 -(dp106 -g3 -g4 -sg5 -(lp107 -S'The displayed variants may be artefacts of aligning NM_004703.4 with genome build GRCh37' -p108 -aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_004703.4' -p109 -aS'Caution should be used when reporting the displayed variant descriptions' -p110 -aS'If you are unsure, please contact admin' -p111 -aS'A more recent version of the selected reference sequence NM_004703.4 is available (NM_004703.5)' -p112 -aS'NM_004703.5:c.*344_*368dupTAGTGTTTGGAATTTTCTGTTCATA MUST be fully validated prior to use in reports' -p113 -aS'select_variants=NM_004703.5:c.*344_*368dup' -p114 -aS'RefSeqGene record not available' -p115 -asg15 -g4 -sg16 -(lp116 -sg18 -VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA -p117 -sg20 -S'RABEP1' -p118 -sg22 -(dp119 -g24 -S'NP_004694.2:p.?' -p120 -sg26 -S'NP_004694.2:p.?' -p121 -ssg28 -g29 -sg30 -g4 -sg31 -g4 -sg32 -S'NM_004703.4:c.*344_*368dup' -p122 -sg34 -g4 -sg35 -(dp123 -S'hg19' -p124 -(dp125 -g39 -S'NC_000017.10:g.5286857_5286915=' -p126 -sg41 -(dp127 -g43 -g44 -sg45 -g46 -sg47 -S'5286857' -p128 -sg49 -g46 -sssS'grch37' -p129 -(dp130 -g39 -S'NC_000017.10:g.5286857_5286915=' -p131 -sg41 -(dp132 -g43 -g54 -sg45 -g46 -sg47 -S'5286857' -p133 -sg49 -g46 -ssssg56 -(dp134 -g58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2' -p135 -sg60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.4' -p136 -sssS'flag' -p137 -S'gene_variant' -p138 -sS'NM_001291581.1:c.*344_*368dup' -p139 -(dp140 -g3 -g4 -sg5 -(lp141 -S'The displayed variants may be artefacts of aligning NM_001291581.1 with genome build GRCh37' -p142 -aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_001291581.1' -p143 -aS'Caution should be used when reporting the displayed variant descriptions' -p144 -aS'If you are unsure, please contact admin' -p145 -aS'RefSeqGene record not available' -p146 -asg15 -g4 -sg16 -(lp147 -sg18 -VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 3, mRNA -p148 -sg20 -S'RABEP1' -p149 -sg22 -(dp150 -g24 -S'NP_001278510.1:p.?' -p151 -sg26 -S'NP_001278510.1:p.?' -p152 -ssg28 -g29 -sg30 -g4 -sg31 -g4 -sg32 -S'NM_001291581.1:c.*344_*368dup' -p153 -sg34 -g4 -sg35 -(dp154 -S'hg19' -p155 -(dp156 -g39 -S'NC_000017.10:g.5286857_5286915=' -p157 -sg41 -(dp158 -g43 -g44 -sg45 -g46 -sg47 -S'5286857' -p159 -sg49 -g46 -sssg83 -(dp160 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p161 -sg41 -(dp162 -g43 -g44 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p163 -sg47 -S'5383567' -p164 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p165 -sssS'grch37' -p166 -(dp167 -g39 -S'NC_000017.10:g.5286857_5286915=' -p168 -sg41 -(dp169 -g43 -g54 -sg45 -g46 -sg47 -S'5286857' -p170 -sg49 -g46 -sssS'grch38' -p171 -(dp172 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p173 -sg41 -(dp174 -g43 -g54 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p175 -sg47 -S'5383567' -p176 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p177 -ssssg56 -(dp178 -g58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278510.1' -p179 -sg60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291581.1' -p180 -sssS'NM_001083585.2:c.*344_*368dup' -p181 -(dp182 -g3 -g4 -sg5 -(lp183 -S'The displayed variants may be artefacts of aligning NM_001083585.2 with genome build GRCh37' -p184 -aS'NC_000017.10:g.5286859_5286913 contains 25 genomic base(s) that fail to align to transcript NM_001083585.2' -p185 -aS'Caution should be used when reporting the displayed variant descriptions' -p186 -aS'If you are unsure, please contact admin' -p187 -aS'RefSeqGene record not available' -p188 -asg15 -g4 -sg16 -(lp189 -sg18 -VHomo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA -p190 -sg20 -S'RABEP1' -p191 -sg22 -(dp192 -g24 -S'NP_001077054.1:p.?' -p193 -sg26 -S'NP_001077054.1:p.?' -p194 -ssg28 -g29 -sg30 -g4 -sg31 -g4 -sg32 -S'NM_001083585.2:c.*344_*368dup' -p195 -sg34 -g4 -sg35 -(dp196 -S'hg19' -p197 -(dp198 -g39 -S'NC_000017.10:g.5286857_5286915=' -p199 -sg41 -(dp200 -g43 -g44 -sg45 -g46 -sg47 -S'5286857' -p201 -sg49 -g46 -sssg83 -(dp202 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p203 -sg41 -(dp204 -g43 -g44 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p205 -sg47 -S'5383567' -p206 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p207 -sssS'grch37' -p208 -(dp209 -g39 -S'NC_000017.10:g.5286857_5286915=' -p210 -sg41 -(dp211 -g43 -g54 -sg45 -g46 -sg47 -S'5286857' -p212 -sg49 -g46 -sssS'grch38' -p213 -(dp214 -g39 -S'NC_000017.11:g.5383567_5383591dup' -p215 -sg41 -(dp216 -g43 -g54 -sg45 -S'TAGTGTTTGGAATTTTCTGTTCATA' -p217 -sg47 -S'5383567' -p218 -sg49 -VTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATA -p219 -ssssg56 -(dp220 -g58 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001077054.1' -p221 -sg60 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001083585.2' -p222 -sssS'metadata' -p223 -(dp224 -S'variantvalidator_hgvs_version' -p225 -S'1.1.3' -p226 -sS'uta_schema' -p227 -S'uta_20180821' -p228 -sS'seqrepo_db' -p229 -S'2018-08-21' -p230 -sS'variantvalidator_version' -p231 -S'v0.2' -p232 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant82.txt b/VariantValidator/testing/testOutputsMasterITS/variant82.txt deleted file mode 100644 index dfcc0f30..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant82.txt +++ /dev/null @@ -1,277 +0,0 @@ -(dp0 -S'NM_001080423.3:c.1020del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_001080423.3 with genome build GRCh37' -p7 -aS'NM_001080423.3:c.1019_1022 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p16 -sS'gene_symbol' -p17 -S'GRIP2' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_001073892.3:p.(Ser341GlnfsTer4)' -p22 -sS'slr' -p23 -S'NP_001073892.3:p.(S341Qfs*4)' -p24 -ssS'submitted_variant' -p25 -S'NC_000003.11:g.14561629_14561630GC=' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_001080423.3:c.1020del' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000003.11:g.14561624_14561630=' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr3' -p41 -sS'ref' -p42 -S'CTGAGGC' -p43 -sS'pos' -p44 -S'14561624' -p45 -sS'alt' -p46 -g43 -sssS'hg38' -p47 -(dp48 -g36 -S'NC_000003.12:g.14520122del' -p49 -sg38 -(dp50 -g40 -g41 -sg42 -S'AG' -p51 -sg44 -S'14520119' -p52 -sg46 -S'A' -p53 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000003.11:g.14561624_14561630=' -p56 -sg38 -(dp57 -g40 -S'3' -p58 -sg42 -g43 -sg44 -S'14561624' -p59 -sg46 -g43 -sssS'grch38' -p60 -(dp61 -g36 -S'NC_000003.12:g.14520122del' -p62 -sg38 -(dp63 -g40 -g58 -sg42 -S'AG' -p64 -sg44 -S'14520119' -p65 -sg46 -g53 -ssssS'reference_sequence_records' -p66 -(dp67 -S'protein' -p68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3' -p69 -sS'transcript' -p70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3' -p71 -sssS'flag' -p72 -S'gene_variant' -p73 -sS'NM_001080423.2:c.1311del' -p74 -(dp75 -g3 -g4 -sg5 -(lp76 -S'The displayed variants may be artefacts of aligning NM_001080423.2 with genome build GRCh37' -p77 -aS'NM_001080423.2:c.1310_1313 contains 1 transcript base(s) that fail to align to chromosome NC_000003.11' -p78 -aS'Caution should be used when reporting the displayed variant descriptions' -p79 -aS'If you are unsure, please contact admin' -p80 -aS'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' -p81 -aS'NM_001080423.3:c.1311delG MUST be fully validated prior to use in reports' -p82 -aS'select_variants=NM_001080423.3:c.1311del' -p83 -aS'RefSeqGene record not available' -p84 -asg12 -g4 -sg13 -(lp85 -sg15 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p86 -sg17 -S'GRIP2' -p87 -sg19 -(dp88 -g21 -S'NP_001073892.2:p.(Ser438GlnfsTer4)' -p89 -sg23 -S'NP_001073892.2:p.(S438Qfs*4)' -p90 -ssg25 -g26 -sg27 -g4 -sg28 -g4 -sg29 -S'NM_001080423.2:c.1311del' -p91 -sg31 -g4 -sg32 -(dp92 -S'hg19' -p93 -(dp94 -g36 -S'NC_000003.11:g.14561624_14561630=' -p95 -sg38 -(dp96 -g40 -g41 -sg42 -g43 -sg44 -S'14561624' -p97 -sg46 -g43 -sssS'grch37' -p98 -(dp99 -g36 -S'NC_000003.11:g.14561624_14561630=' -p100 -sg38 -(dp101 -g40 -g58 -sg42 -g43 -sg44 -S'14561624' -p102 -sg46 -g43 -ssssg66 -(dp103 -g68 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2' -p104 -sg70 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2' -p105 -sssS'metadata' -p106 -(dp107 -S'variantvalidator_hgvs_version' -p108 -S'1.1.3' -p109 -sS'uta_schema' -p110 -S'uta_20180821' -p111 -sS'seqrepo_db' -p112 -S'2018-08-21' -p113 -sS'variantvalidator_version' -p114 -S'v0.2' -p115 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant83.txt b/VariantValidator/testing/testOutputsMasterITS/variant83.txt deleted file mode 100644 index 6e4d3190..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant83.txt +++ /dev/null @@ -1,263 +0,0 @@ -(dp0 -S'NM_001080423.3:c.1016_1020=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p12 -sS'gene_symbol' -p13 -S'GRIP2' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_001073892.3:p.(Arg339=)' -p18 -sS'slr' -p19 -S'NP_001073892.3:p.(R339=)' -p20 -ssS'submitted_variant' -p21 -S'NC_000003.11:g.14561629_14561630insG' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_001080423.3:c.1016_1020=' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000003.11:g.14561629dup' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr3' -p37 -sS'ref' -p38 -S'G' -p39 -sS'pos' -p40 -S'14561628' -p41 -sS'alt' -p42 -VGG -p43 -sssS'hg38' -p44 -(dp45 -g32 -S'NC_000003.12:g.14520120_14520124=' -p46 -sg34 -(dp47 -g36 -g37 -sg38 -VGGGCC -p48 -sg40 -S'14520120' -p49 -sg42 -g48 -sssS'grch37' -p50 -(dp51 -g32 -S'NC_000003.11:g.14561629dup' -p52 -sg34 -(dp53 -g36 -S'3' -p54 -sg38 -g39 -sg40 -S'14561628' -p55 -sg42 -VGG -p56 -sssS'grch38' -p57 -(dp58 -g32 -S'NC_000003.12:g.14520120_14520124=' -p59 -sg34 -(dp60 -g36 -g54 -sg38 -g48 -sg40 -S'14520120' -p61 -sg42 -g48 -ssssS'reference_sequence_records' -p62 -(dp63 -S'protein' -p64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3' -p65 -sS'transcript' -p66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3' -p67 -sssS'flag' -p68 -S'gene_variant' -p69 -sS'NM_001080423.2:c.1307_1311=' -p70 -(dp71 -g3 -g4 -sg5 -(lp72 -S'A more recent version of the selected reference sequence NM_001080423.2 is available (NM_001080423.3)' -p73 -aS'NM_001080423.3:c.1307_1311delinsGGCCC MUST be fully validated prior to use in reports' -p74 -aS'select_variants=NM_001080423.3:c.1307_1311delinsGGCCC' -p75 -aS'RefSeqGene record not available' -p76 -asg8 -g4 -sg9 -(lp77 -sg11 -VHomo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA -p78 -sg13 -S'GRIP2' -p79 -sg15 -(dp80 -g17 -S'NP_001073892.2:p.(Arg436=)' -p81 -sg19 -S'NP_001073892.2:p.(R436=)' -p82 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_001080423.2:c.1307_1311=' -p83 -sg27 -g4 -sg28 -(dp84 -S'hg19' -p85 -(dp86 -g32 -S'NC_000003.11:g.14561629dup' -p87 -sg34 -(dp88 -g36 -g37 -sg38 -g39 -sg40 -S'14561628' -p89 -sg42 -VGG -p90 -sssS'grch37' -p91 -(dp92 -g32 -S'NC_000003.11:g.14561629dup' -p93 -sg34 -(dp94 -g36 -g54 -sg38 -g39 -sg40 -S'14561628' -p95 -sg42 -VGG -p96 -ssssg62 -(dp97 -g64 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2' -p98 -sg66 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2' -p99 -sssS'metadata' -p100 -(dp101 -S'variantvalidator_hgvs_version' -p102 -S'1.1.3' -p103 -sS'uta_schema' -p104 -S'uta_20180821' -p105 -sS'seqrepo_db' -p106 -S'2018-08-21' -p107 -sS'variantvalidator_version' -p108 -S'v0.2' -p109 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant84.txt b/VariantValidator/testing/testOutputsMasterITS/variant84.txt deleted file mode 100644 index 341be722..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant84.txt +++ /dev/null @@ -1,272 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_018717.5:c.1515_1526del' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'RefSeqGene record not available' -p19 -asS'refseqgene_context_intronic_sequence' -p20 -g16 -sS'alt_genomic_loci' -p21 -(lp22 -sS'transcript_description' -p23 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p24 -sS'gene_symbol' -p25 -S'MAML3' -p26 -sS'hgvs_predicted_protein_consequence' -p27 -(dp28 -S'tlr' -p29 -S'NP_061187.3:p.(Gln507_Gln510del)' -p30 -sS'slr' -p31 -S'NP_061187.3:p.(Q507_Q510del)' -p32 -ssS'submitted_variant' -p33 -S'NC_000004.11:g.140811111_140811122del' -p34 -sS'genome_context_intronic_sequence' -p35 -g16 -sS'hgvs_lrg_variant' -p36 -g16 -sS'hgvs_transcript_variant' -p37 -S'NM_018717.5:c.1515_1526del' -p38 -sS'hgvs_refseqgene_variant' -p39 -g16 -sS'primary_assembly_loci' -p40 -(dp41 -S'hg19' -p42 -(dp43 -S'hgvs_genomic_description' -p44 -S'NC_000004.11:g.140811111_140811122del' -p45 -sS'vcf' -p46 -(dp47 -S'chr' -p48 -S'chr4' -p49 -sS'ref' -p50 -S'TTGCTGCTGCTGC' -p51 -sS'pos' -p52 -S'140811063' -p53 -sS'alt' -p54 -S'T' -p55 -sssS'grch37' -p56 -(dp57 -g44 -S'NC_000004.11:g.140811111_140811122del' -p58 -sg46 -(dp59 -g48 -S'4' -p60 -sg50 -S'TTGCTGCTGCTGC' -p61 -sg52 -S'140811063' -p62 -sg54 -g55 -ssssS'reference_sequence_records' -p63 -(dp64 -S'protein' -p65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3' -p66 -sS'transcript' -p67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5' -p68 -sssS'NM_018717.4:c.1465_1469=' -p69 -(dp70 -g15 -g16 -sg17 -(lp71 -S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' -p72 -aS'NC_000004.11:g.140811063_140811075 contains 12 genomic base(s) that fail to align to transcript NM_018717.4' -p73 -aS'Caution should be used when reporting the displayed variant descriptions' -p74 -aS'If you are unsure, please contact admin' -p75 -aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' -p76 -aS'NM_018717.5:c.1465_1469CAACA= MUST be fully validated prior to use in reports' -p77 -aS'select_variants=NM_018717.5:c.1465_1469=' -p78 -aS'RefSeqGene record not available' -p79 -asg20 -g16 -sg21 -(lp80 -sg23 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p81 -sg25 -S'MAML3' -p82 -sg27 -(dp83 -g29 -S'NP_061187.2:p.(Gln489=)' -p84 -sg31 -S'NP_061187.2:p.(Q489=)' -p85 -ssg33 -g34 -sg35 -g16 -sg36 -g16 -sg37 -S'NM_018717.4:c.1465_1469=' -p86 -sg39 -g16 -sg40 -(dp87 -S'hg19' -p88 -(dp89 -g44 -S'NC_000004.11:g.140811111_140811122del' -p90 -sg46 -(dp91 -g48 -g49 -sg50 -S'TTGCTGCTGCTGC' -p92 -sg52 -S'140811063' -p93 -sg54 -g55 -sssS'hg38' -p94 -(dp95 -g44 -S'NC_000004.12:g.139889957_139889968del' -p96 -sg46 -(dp97 -g48 -g49 -sg50 -S'TTGCTGCTGCTGC' -p98 -sg52 -S'139889909' -p99 -sg54 -g55 -sssS'grch37' -p100 -(dp101 -g44 -S'NC_000004.11:g.140811111_140811122del' -p102 -sg46 -(dp103 -g48 -g60 -sg50 -S'TTGCTGCTGCTGC' -p104 -sg52 -S'140811063' -p105 -sg54 -g55 -sssS'grch38' -p106 -(dp107 -g44 -S'NC_000004.12:g.139889957_139889968del' -p108 -sg46 -(dp109 -g48 -g60 -sg50 -S'TTGCTGCTGCTGC' -p110 -sg52 -S'139889909' -p111 -sg54 -g55 -ssssg63 -(dp112 -g65 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2' -p113 -sg67 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4' -p114 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant85.txt b/VariantValidator/testing/testOutputsMasterITS/variant85.txt deleted file mode 100644 index f37d4e0f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant85.txt +++ /dev/null @@ -1,268 +0,0 @@ -(dp0 -S'NM_018717.5:c.1468_1479=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p12 -sS'gene_symbol' -p13 -S'MAML3' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_061187.3:p.(Gln490=)' -p18 -sS'slr' -p19 -S'NP_061187.3:p.(Q490=)' -p20 -ssS'submitted_variant' -p21 -S'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_018717.5:c.1468_1479=' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000004.11:g.140811111_140811122=' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr4' -p37 -sS'ref' -p38 -VCTGCTGCTGCTG -p39 -sS'pos' -p40 -S'140811111' -p41 -sS'alt' -p42 -g39 -sssS'grch37' -p43 -(dp44 -g32 -S'NC_000004.11:g.140811111_140811122=' -p45 -sg34 -(dp46 -g36 -S'4' -p47 -sg38 -g39 -sg40 -S'140811111' -p48 -sg42 -g39 -ssssS'reference_sequence_records' -p49 -(dp50 -S'protein' -p51 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3' -p52 -sS'transcript' -p53 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5' -p54 -sssS'flag' -p55 -S'gene_variant' -p56 -sS'NM_018717.4:c.1503_1514dup' -p57 -(dp58 -g3 -g4 -sg5 -(lp59 -S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' -p60 -aS'NC_000004.11:g.140811063_140811075 contains 12 genomic base(s) that fail to align to transcript NM_018717.4' -p61 -aS'Caution should be used when reporting the displayed variant descriptions' -p62 -aS'If you are unsure, please contact admin' -p63 -aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' -p64 -aS'NM_018717.5:c.1503_1514dupGCAGCAGCAGCA MUST be fully validated prior to use in reports' -p65 -aS'select_variants=NM_018717.5:c.1503_1514dup' -p66 -aS'RefSeqGene record not available' -p67 -asg8 -g4 -sg9 -(lp68 -sg11 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p69 -sg13 -S'MAML3' -p70 -sg15 -(dp71 -g17 -S'NP_061187.2:p.(Gln503_Gln506dup)' -p72 -sg19 -S'NP_061187.2:p.(Q503_Q506dup)' -p73 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_018717.4:c.1503_1514dup' -p74 -sg27 -g4 -sg28 -(dp75 -S'hg19' -p76 -(dp77 -g32 -S'NC_000004.11:g.140811095_140811128=' -p78 -sg34 -(dp79 -g36 -g37 -sg38 -S'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG' -p80 -sg40 -S'140811095' -p81 -sg42 -g80 -sssS'hg38' -p82 -(dp83 -g32 -S'NC_000004.12:g.139889941_139889974=' -p84 -sg34 -(dp85 -g36 -g37 -sg38 -S'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG' -p86 -sg40 -S'139889941' -p87 -sg42 -g86 -sssS'grch37' -p88 -(dp89 -g32 -S'NC_000004.11:g.140811095_140811128=' -p90 -sg34 -(dp91 -g36 -g47 -sg38 -g80 -sg40 -S'140811095' -p92 -sg42 -g80 -sssS'grch38' -p93 -(dp94 -g32 -S'NC_000004.12:g.139889941_139889974=' -p95 -sg34 -(dp96 -g36 -g47 -sg38 -g86 -sg40 -S'139889941' -p97 -sg42 -g86 -ssssg49 -(dp98 -g51 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2' -p99 -sg53 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4' -p100 -sssS'metadata' -p101 -(dp102 -S'variantvalidator_hgvs_version' -p103 -S'1.1.3' -p104 -sS'uta_schema' -p105 -S'uta_20180821' -p106 -sS'seqrepo_db' -p107 -S'2018-08-21' -p108 -sS'variantvalidator_version' -p109 -S'v0.2' -p110 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant86.txt b/VariantValidator/testing/testOutputsMasterITS/variant86.txt deleted file mode 100644 index 3ad98372..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant86.txt +++ /dev/null @@ -1,272 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_018717.5:c.1521_1526del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'RefSeqGene record not available' -p9 -asS'refseqgene_context_intronic_sequence' -p10 -g6 -sS'alt_genomic_loci' -p11 -(lp12 -sS'transcript_description' -p13 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p14 -sS'gene_symbol' -p15 -S'MAML3' -p16 -sS'hgvs_predicted_protein_consequence' -p17 -(dp18 -S'tlr' -p19 -S'NP_061187.3:p.(Gln509_Gln510del)' -p20 -sS'slr' -p21 -S'NP_061187.3:p.(Q509_Q510del)' -p22 -ssS'submitted_variant' -p23 -S'NC_000004.11:g.140811117_140811122del' -p24 -sS'genome_context_intronic_sequence' -p25 -g6 -sS'hgvs_lrg_variant' -p26 -g6 -sS'hgvs_transcript_variant' -p27 -S'NM_018717.5:c.1521_1526del' -p28 -sS'hgvs_refseqgene_variant' -p29 -g6 -sS'primary_assembly_loci' -p30 -(dp31 -S'hg19' -p32 -(dp33 -S'hgvs_genomic_description' -p34 -S'NC_000004.11:g.140811117_140811122del' -p35 -sS'vcf' -p36 -(dp37 -S'chr' -p38 -S'chr4' -p39 -sS'ref' -p40 -S'TTGCTGC' -p41 -sS'pos' -p42 -S'140811063' -p43 -sS'alt' -p44 -S'T' -p45 -sssS'grch37' -p46 -(dp47 -g34 -S'NC_000004.11:g.140811117_140811122del' -p48 -sg36 -(dp49 -g38 -S'4' -p50 -sg40 -S'TTGCTGC' -p51 -sg42 -S'140811063' -p52 -sg44 -g45 -ssssS'reference_sequence_records' -p53 -(dp54 -S'protein' -p55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3' -p56 -sS'transcript' -p57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5' -p58 -sssS'NM_018717.4:c.1509_1514dup' -p59 -(dp60 -g5 -g6 -sg7 -(lp61 -S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' -p62 -aS'NC_000004.11:g.140811063 is one of 12 genomic base(s) that fail to align to transcript NM_018717.4 between positions c.1467_1468' -p63 -aS'Caution should be used when reporting the displayed variant descriptions' -p64 -aS'If you are unsure, please contact admin' -p65 -aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' -p66 -aS'NM_018717.5:c.1509_1514dupGCAGCA MUST be fully validated prior to use in reports' -p67 -aS'select_variants=NM_018717.5:c.1509_1514dup' -p68 -aS'RefSeqGene record not available' -p69 -asg10 -g6 -sg11 -(lp70 -sg13 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p71 -sg15 -S'MAML3' -p72 -sg17 -(dp73 -g19 -S'NP_061187.2:p.(Gln505_Gln506dup)' -p74 -sg21 -S'NP_061187.2:p.(Q505_Q506dup)' -p75 -ssg23 -g24 -sg25 -g6 -sg26 -g6 -sg27 -S'NM_018717.4:c.1509_1514dup' -p76 -sg29 -g6 -sg30 -(dp77 -S'hg19' -p78 -(dp79 -g34 -S'NC_000004.11:g.140811117_140811122del' -p80 -sg36 -(dp81 -g38 -g39 -sg40 -S'TTGCTGC' -p82 -sg42 -S'140811063' -p83 -sg44 -g45 -sssS'hg38' -p84 -(dp85 -g34 -S'NC_000004.12:g.139889963_139889968del' -p86 -sg36 -(dp87 -g38 -g39 -sg40 -S'TTGCTGC' -p88 -sg42 -S'139889909' -p89 -sg44 -g45 -sssS'grch37' -p90 -(dp91 -g34 -S'NC_000004.11:g.140811117_140811122del' -p92 -sg36 -(dp93 -g38 -g50 -sg40 -S'TTGCTGC' -p94 -sg42 -S'140811063' -p95 -sg44 -g45 -sssS'grch38' -p96 -(dp97 -g34 -S'NC_000004.12:g.139889963_139889968del' -p98 -sg36 -(dp99 -g38 -g50 -sg40 -S'TTGCTGC' -p100 -sg42 -S'139889909' -p101 -sg44 -g45 -ssssg53 -(dp102 -g55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2' -p103 -sg57 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4' -p104 -sssS'metadata' -p105 -(dp106 -S'variantvalidator_hgvs_version' -p107 -S'1.1.3' -p108 -sS'uta_schema' -p109 -S'uta_20180821' -p110 -sS'seqrepo_db' -p111 -S'2018-08-21' -p112 -sS'variantvalidator_version' -p113 -S'v0.2' -p114 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant87.txt b/VariantValidator/testing/testOutputsMasterITS/variant87.txt deleted file mode 100644 index 6ff8ccb4..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant87.txt +++ /dev/null @@ -1,272 +0,0 @@ -(dp0 -S'NM_018717.5:c.1473_1479del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'RefSeqGene record not available' -p7 -asS'refseqgene_context_intronic_sequence' -p8 -g4 -sS'alt_genomic_loci' -p9 -(lp10 -sS'transcript_description' -p11 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p12 -sS'gene_symbol' -p13 -S'MAML3' -p14 -sS'hgvs_predicted_protein_consequence' -p15 -(dp16 -S'tlr' -p17 -S'NP_061187.3:p.(Gln491HisfsTer29)' -p18 -sS'slr' -p19 -S'NP_061187.3:p.(Q491Hfs*29)' -p20 -ssS'submitted_variant' -p21 -S'NC_000004.11:g.140811111_140811117del' -p22 -sS'genome_context_intronic_sequence' -p23 -g4 -sS'hgvs_lrg_variant' -p24 -g4 -sS'hgvs_transcript_variant' -p25 -S'NM_018717.5:c.1473_1479del' -p26 -sS'hgvs_refseqgene_variant' -p27 -g4 -sS'primary_assembly_loci' -p28 -(dp29 -S'hg19' -p30 -(dp31 -S'hgvs_genomic_description' -p32 -S'NC_000004.11:g.140811111_140811117del' -p33 -sS'vcf' -p34 -(dp35 -S'chr' -p36 -S'chr4' -p37 -sS'ref' -p38 -S'GCTGCTGC' -p39 -sS'pos' -p40 -S'140811110' -p41 -sS'alt' -p42 -S'G' -p43 -sssS'grch37' -p44 -(dp45 -g32 -S'NC_000004.11:g.140811111_140811117del' -p46 -sg34 -(dp47 -g36 -S'4' -p48 -sg38 -S'GCTGCTGC' -p49 -sg40 -S'140811110' -p50 -sg42 -g43 -ssssS'reference_sequence_records' -p51 -(dp52 -S'protein' -p53 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3' -p54 -sS'transcript' -p55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5' -p56 -sssS'flag' -p57 -S'gene_variant' -p58 -sS'NM_018717.4:c.1468_1472dup' -p59 -(dp60 -g3 -g4 -sg5 -(lp61 -S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' -p62 -aS'NC_000004.11:g.140811110 is one of Requires Analysis genomic base(s) that fail to align to transcript NM_018717.4 between positions c.1467_1468' -p63 -aS'Caution should be used when reporting the displayed variant descriptions' -p64 -aS'If you are unsure, please contact admin' -p65 -aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' -p66 -aS'NM_018717.5:c.1468_1472dupCAGCA MUST be fully validated prior to use in reports' -p67 -aS'select_variants=NM_018717.5:c.1468_1472dup' -p68 -aS'RefSeqGene record not available' -p69 -asg8 -g4 -sg9 -(lp70 -sg11 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p71 -sg13 -S'MAML3' -p72 -sg15 -(dp73 -g17 -S'NP_061187.2:p.(Gln491HisfsTer29)' -p74 -sg19 -S'NP_061187.2:p.(Q491Hfs*29)' -p75 -ssg21 -g22 -sg23 -g4 -sg24 -g4 -sg25 -S'NM_018717.4:c.1468_1472dup' -p76 -sg27 -g4 -sg28 -(dp77 -S'hg19' -p78 -(dp79 -g32 -S'NC_000004.11:g.140811111_140811117del' -p80 -sg34 -(dp81 -g36 -g37 -sg38 -S'GCTGCTGC' -p82 -sg40 -S'140811110' -p83 -sg42 -g43 -sssS'hg38' -p84 -(dp85 -g32 -S'NC_000004.12:g.139889957_139889963del' -p86 -sg34 -(dp87 -g36 -g37 -sg38 -S'GCTGCTGC' -p88 -sg40 -S'139889956' -p89 -sg42 -g43 -sssS'grch37' -p90 -(dp91 -g32 -S'NC_000004.11:g.140811111_140811117del' -p92 -sg34 -(dp93 -g36 -g48 -sg38 -S'GCTGCTGC' -p94 -sg40 -S'140811110' -p95 -sg42 -g43 -sssS'grch38' -p96 -(dp97 -g32 -S'NC_000004.12:g.139889957_139889963del' -p98 -sg34 -(dp99 -g36 -g48 -sg38 -S'GCTGCTGC' -p100 -sg40 -S'139889956' -p101 -sg42 -g43 -ssssg51 -(dp102 -g53 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2' -p103 -sg55 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4' -p104 -sssS'metadata' -p105 -(dp106 -S'variantvalidator_hgvs_version' -p107 -S'1.1.3' -p108 -sS'uta_schema' -p109 -S'uta_20180821' -p110 -sS'seqrepo_db' -p111 -S'2018-08-21' -p112 -sS'variantvalidator_version' -p113 -S'v0.2' -p114 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant88.txt b/VariantValidator/testing/testOutputsMasterITS/variant88.txt deleted file mode 100644 index d21ee799..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant88.txt +++ /dev/null @@ -1,268 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'The displayed variants may be artefacts of aligning NM_018717.4 with genome build GRCh37' -p19 -aS'NC_000004.11:g.140811117 is one of 12 genomic base(s) that fail to align to transcript NM_018717.4 between positions c.1467_1468' -p20 -aS'Caution should be used when reporting the displayed variant descriptions' -p21 -aS'If you are unsure, please contact admin' -p22 -aS'A more recent version of the selected reference sequence NM_018717.4 is available (NM_018717.5)' -p23 -aS'NM_018717.5:c.1472_1473insTCAGCAGCAGCA MUST be fully validated prior to use in reports' -p24 -aS'select_variants=NM_018717.5:c.1472_1473insTCAGCAGCAGCA' -p25 -aS'RefSeqGene record not available' -p26 -asS'refseqgene_context_intronic_sequence' -p27 -g16 -sS'alt_genomic_loci' -p28 -(lp29 -sS'transcript_description' -p30 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p31 -sS'gene_symbol' -p32 -S'MAML3' -p33 -sS'hgvs_predicted_protein_consequence' -p34 -(dp35 -S'tlr' -p36 -S'NP_061187.2:p.(Gln490_Gln491insHisGlnGlnGln)' -p37 -sS'slr' -p38 -S'NP_061187.2:p.(Q490_Q491insHQQQ)' -p39 -ssS'submitted_variant' -p40 -S'NC_000004.11:g.140811117C>A' -p41 -sS'genome_context_intronic_sequence' -p42 -g16 -sS'hgvs_lrg_variant' -p43 -g16 -sS'hgvs_transcript_variant' -p44 -S'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' -p45 -sS'hgvs_refseqgene_variant' -p46 -g16 -sS'primary_assembly_loci' -p47 -(dp48 -S'hg19' -p49 -(dp50 -S'hgvs_genomic_description' -p51 -S'NC_000004.11:g.140811117C>A' -p52 -sS'vcf' -p53 -(dp54 -S'chr' -p55 -S'chr4' -p56 -sS'ref' -p57 -S'C' -p58 -sS'pos' -p59 -S'140811117' -p60 -sS'alt' -p61 -VA -p62 -sssS'hg38' -p63 -(dp64 -g51 -S'NC_000004.12:g.139889963C>A' -p65 -sg53 -(dp66 -g55 -g56 -sg57 -g58 -sg59 -S'139889963' -p67 -sg61 -g62 -sssS'grch37' -p68 -(dp69 -g51 -S'NC_000004.11:g.140811117C>A' -p70 -sg53 -(dp71 -g55 -S'4' -p72 -sg57 -g58 -sg59 -S'140811117' -p73 -sg61 -g62 -sssS'grch38' -p74 -(dp75 -g51 -S'NC_000004.12:g.139889963C>A' -p76 -sg53 -(dp77 -g55 -g72 -sg57 -g58 -sg59 -S'139889963' -p78 -sg61 -g62 -ssssS'reference_sequence_records' -p79 -(dp80 -S'protein' -p81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2' -p82 -sS'transcript' -p83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4' -p84 -sssS'NM_018717.5:c.1473G>T' -p85 -(dp86 -g15 -g16 -sg17 -(lp87 -S'RefSeqGene record not available' -p88 -asg27 -g16 -sg28 -(lp89 -sg30 -VHomo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA -p90 -sg32 -S'MAML3' -p91 -sg34 -(dp92 -g36 -S'NP_061187.3:p.(Gln491His)' -p93 -sg38 -S'NP_061187.3:p.(Q491H)' -p94 -ssg40 -g41 -sg42 -g16 -sg43 -g16 -sg44 -S'NM_018717.5:c.1473G>T' -p95 -sg46 -g16 -sg47 -(dp96 -S'hg19' -p97 -(dp98 -g51 -S'NC_000004.11:g.140811117C>A' -p99 -sg53 -(dp100 -g55 -g56 -sg57 -VC -p101 -sg59 -S'140811117' -p102 -sg61 -g62 -sssS'grch37' -p103 -(dp104 -g51 -S'NC_000004.11:g.140811117C>A' -p105 -sg53 -(dp106 -g55 -g72 -sg57 -g101 -sg59 -S'140811117' -p107 -sg61 -g62 -ssssg79 -(dp108 -g81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3' -p109 -sg83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5' -p110 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant89.txt b/VariantValidator/testing/testOutputsMasterITS/variant89.txt deleted file mode 100644 index dda98ce8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant89.txt +++ /dev/null @@ -1,182 +0,0 @@ -(dp0 -S'NM_015120.4:c.1573_1579=' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_015120.4 with genome build GRCh37' -p7 -aS'NM_015120.4:c.1573_1579 contains 3 transcript base(s) that fail to align to chromosome NC_000002.11' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -sS'transcript_description' -p15 -VHomo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA -p16 -sS'gene_symbol' -p17 -S'ALMS1' -p18 -sS'hgvs_predicted_protein_consequence' -p19 -(dp20 -S'tlr' -p21 -S'NP_055935.4:p.(Ser525=)' -p22 -sS'slr' -p23 -S'NP_055935.4:p.(S525=)' -p24 -ssS'submitted_variant' -p25 -S'NC_000002.11:g.73675227_73675228insCTC' -p26 -sS'genome_context_intronic_sequence' -p27 -g4 -sS'hgvs_lrg_variant' -p28 -g4 -sS'hgvs_transcript_variant' -p29 -S'NM_015120.4:c.1573_1579=' -p30 -sS'hgvs_refseqgene_variant' -p31 -g4 -sS'primary_assembly_loci' -p32 -(dp33 -S'hg19' -p34 -(dp35 -S'hgvs_genomic_description' -p36 -S'NC_000002.11:g.73675228_73675230dup' -p37 -sS'vcf' -p38 -(dp39 -S'chr' -p40 -S'chr2' -p41 -sS'ref' -p42 -S'CTC' -p43 -sS'pos' -p44 -S'73675228' -p45 -sS'alt' -p46 -VCTCCTC -p47 -sssS'hg38' -p48 -(dp49 -g36 -S'NC_000002.12:g.73448097_73448103=' -p50 -sg38 -(dp51 -g40 -g41 -sg42 -VTCTCCTC -p52 -sg44 -S'73448097' -p53 -sg46 -g52 -sssS'grch37' -p54 -(dp55 -g36 -S'NC_000002.11:g.73675228_73675230dup' -p56 -sg38 -(dp57 -g40 -S'2' -p58 -sg42 -S'CTC' -p59 -sg44 -S'73675228' -p60 -sg46 -VCTCCTC -p61 -sssS'grch38' -p62 -(dp63 -g36 -S'NC_000002.12:g.73448097_73448103=' -p64 -sg38 -(dp65 -g40 -g58 -sg42 -g52 -sg44 -S'73448097' -p66 -sg46 -g52 -ssssS'reference_sequence_records' -p67 -(dp68 -S'protein' -p69 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4' -p70 -sS'transcript' -p71 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4' -p72 -sssS'flag' -p73 -S'gene_variant' -p74 -sS'metadata' -p75 -(dp76 -S'variantvalidator_hgvs_version' -p77 -S'1.1.3' -p78 -sS'uta_schema' -p79 -S'uta_20180821' -p80 -sS'seqrepo_db' -p81 -S'2018-08-21' -p82 -sS'variantvalidator_version' -p83 -S'v0.2' -p84 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant9.txt b/VariantValidator/testing/testOutputsMasterITS/variant9.txt deleted file mode 100644 index 3097623f..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant9.txt +++ /dev/null @@ -1,82 +0,0 @@ -(dp0 -S'validation_warning_1' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'base start position must be <= end position' -p7 -aS'Did you mean NM_000094.3:c.6751-3_6751-2del?' -p8 -asS'refseqgene_context_intronic_sequence' -p9 -g4 -sS'alt_genomic_loci' -p10 -(lp11 -sS'transcript_description' -p12 -g4 -sS'gene_symbol' -p13 -g4 -sS'hgvs_predicted_protein_consequence' -p14 -(dp15 -S'tlr' -p16 -g4 -sS'slr' -p17 -g4 -ssS'submitted_variant' -p18 -S'NM_000094.3:c.6751-2_6751-3del' -p19 -sS'genome_context_intronic_sequence' -p20 -g4 -sS'hgvs_lrg_variant' -p21 -g4 -sS'hgvs_transcript_variant' -p22 -g4 -sS'hgvs_refseqgene_variant' -p23 -g4 -sS'primary_assembly_loci' -p24 -(dp25 -sS'reference_sequence_records' -p26 -g4 -ssS'flag' -p27 -S'warning' -p28 -sS'metadata' -p29 -(dp30 -S'variantvalidator_hgvs_version' -p31 -S'1.1.3' -p32 -sS'uta_schema' -p33 -S'uta_20180821' -p34 -sS'seqrepo_db' -p35 -S'2018-08-21' -p36 -sS'variantvalidator_version' -p37 -S'v0.2' -p38 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant90.txt b/VariantValidator/testing/testOutputsMasterITS/variant90.txt deleted file mode 100644 index 8601fc9d..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant90.txt +++ /dev/null @@ -1,261 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.260_262=' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000009.11:g.136132908T>TC automapped to NC_000009.11:g.136132908_136132909insC' -p9 -aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p10 -aS'NM_020469.2:c.260_262 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'RefSeqGene record not available' -p14 -asS'refseqgene_context_intronic_sequence' -p15 -g6 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'grch37' -p19 -(dp20 -S'hgvs_genomic_description' -p21 -S'NW_003315925.1:g.83614_83616=' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG79_PATCH' -p26 -sS'ref' -p27 -VTCA -p28 -sS'pos' -p29 -S'83614' -p30 -sS'alt' -p31 -g28 -sssa(dp32 -S'hg19' -p33 -(dp34 -g21 -S'NW_003315925.1:g.83614_83616=' -p35 -sg23 -(dp36 -g25 -S'NW_003315925.1' -p37 -sg27 -g28 -sg29 -S'83614' -p38 -sg31 -g28 -sssa(dp39 -S'grch38' -p40 -(dp41 -g21 -S'NW_009646201.1:g.83614_83616=' -p42 -sg23 -(dp43 -g25 -S'HG2030_PATCH' -p44 -sg27 -VTCA -p45 -sg29 -S'83614' -p46 -sg31 -g45 -sssa(dp47 -S'hg38' -p48 -(dp49 -g21 -S'NW_009646201.1:g.83614_83616=' -p50 -sg23 -(dp51 -g25 -S'NW_009646201.1' -p52 -sg27 -g45 -sg29 -S'83614' -p53 -sg31 -g45 -sssasS'transcript_description' -p54 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p55 -sS'gene_symbol' -p56 -S'ABO' -p57 -sS'hgvs_predicted_protein_consequence' -p58 -(dp59 -S'tlr' -p60 -S'NP_065202.2:p.(Val87=)' -p61 -sS'slr' -p62 -S'NP_065202.2:p.(V87=)' -p63 -ssS'submitted_variant' -p64 -S'9-136132908-T-TC' -p65 -sS'genome_context_intronic_sequence' -p66 -g6 -sS'hgvs_lrg_variant' -p67 -g6 -sS'hgvs_transcript_variant' -p68 -S'NM_020469.2:c.260_262=' -p69 -sS'hgvs_refseqgene_variant' -p70 -g6 -sS'primary_assembly_loci' -p71 -(dp72 -S'hg19' -p73 -(dp74 -g21 -S'NC_000009.11:g.136132908_136132909insC' -p75 -sg23 -(dp76 -g25 -S'chr9' -p77 -sg27 -S'T' -p78 -sg29 -S'136132908' -p79 -sg31 -VTC -p80 -sssg48 -(dp81 -g21 -S'NC_000009.12:g.133257521_133257522insC' -p82 -sg23 -(dp83 -g25 -g77 -sg27 -g78 -sg29 -S'133257521' -p84 -sg31 -VTC -p85 -sssS'grch37' -p86 -(dp87 -g21 -S'NC_000009.11:g.136132908_136132909insC' -p88 -sg23 -(dp89 -g25 -S'9' -p90 -sg27 -g78 -sg29 -S'136132908' -p91 -sg31 -VTC -p92 -sssS'grch38' -p93 -(dp94 -g21 -S'NC_000009.12:g.133257521_133257522insC' -p95 -sg23 -(dp96 -g25 -g90 -sg27 -g78 -sg29 -S'133257521' -p97 -sg31 -VTC -p98 -ssssS'reference_sequence_records' -p99 -(dp100 -S'protein' -p101 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' -p102 -sS'transcript' -p103 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' -p104 -sssS'metadata' -p105 -(dp106 -S'variantvalidator_hgvs_version' -p107 -S'1.1.3' -p108 -sS'uta_schema' -p109 -S'uta_20180821' -p110 -sS'seqrepo_db' -p111 -S'2018-08-21' -p112 -sS'variantvalidator_version' -p113 -S'v0.2' -p114 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant91.txt b/VariantValidator/testing/testOutputsMasterITS/variant91.txt deleted file mode 100644 index 05e0fda3..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant91.txt +++ /dev/null @@ -1,265 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.259del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NC_000009.11:g.136132908TAC>TCA automapped to NC_000009.11:g.136132909_136132910delACinsCA' -p9 -aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p10 -aS'NM_020469.2:c.258_261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'RefSeqGene record not available' -p14 -asS'refseqgene_context_intronic_sequence' -p15 -g6 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'grch37' -p19 -(dp20 -S'hgvs_genomic_description' -p21 -S'NW_003315925.1:g.83618del' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG79_PATCH' -p26 -sS'ref' -p27 -S'AC' -p28 -sS'pos' -p29 -S'83616' -p30 -sS'alt' -p31 -S'A' -p32 -sssa(dp33 -S'hg19' -p34 -(dp35 -g21 -S'NW_003315925.1:g.83618del' -p36 -sg23 -(dp37 -g25 -S'NW_003315925.1' -p38 -sg27 -S'AC' -p39 -sg29 -S'83616' -p40 -sg31 -g32 -sssa(dp41 -S'grch38' -p42 -(dp43 -g21 -S'NW_009646201.1:g.83618del' -p44 -sg23 -(dp45 -g25 -S'HG2030_PATCH' -p46 -sg27 -S'AC' -p47 -sg29 -S'83616' -p48 -sg31 -g32 -sssa(dp49 -S'hg38' -p50 -(dp51 -g21 -S'NW_009646201.1:g.83618del' -p52 -sg23 -(dp53 -g25 -S'NW_009646201.1' -p54 -sg27 -S'AC' -p55 -sg29 -S'83616' -p56 -sg31 -g32 -sssasS'transcript_description' -p57 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p58 -sS'gene_symbol' -p59 -S'ABO' -p60 -sS'hgvs_predicted_protein_consequence' -p61 -(dp62 -S'tlr' -p63 -S'NP_065202.2:p.(Val87Ter)' -p64 -sS'slr' -p65 -S'NP_065202.2:p.(V87*)' -p66 -ssS'submitted_variant' -p67 -S'9-136132908-TAC-TCA' -p68 -sS'genome_context_intronic_sequence' -p69 -g6 -sS'hgvs_lrg_variant' -p70 -g6 -sS'hgvs_transcript_variant' -p71 -S'NM_020469.2:c.259del' -p72 -sS'hgvs_refseqgene_variant' -p73 -g6 -sS'primary_assembly_loci' -p74 -(dp75 -S'hg19' -p76 -(dp77 -g21 -S'NC_000009.11:g.136132909_136132910delinsCA' -p78 -sg23 -(dp79 -g25 -S'chr9' -p80 -sg27 -S'AC' -p81 -sg29 -S'136132909' -p82 -sg31 -VCA -p83 -sssg50 -(dp84 -g21 -S'NC_000009.12:g.133257522_133257523delinsCA' -p85 -sg23 -(dp86 -g25 -g80 -sg27 -S'AC' -p87 -sg29 -S'133257522' -p88 -sg31 -VCA -p89 -sssS'grch37' -p90 -(dp91 -g21 -S'NC_000009.11:g.136132909_136132910delinsCA' -p92 -sg23 -(dp93 -g25 -S'9' -p94 -sg27 -S'AC' -p95 -sg29 -S'136132909' -p96 -sg31 -g83 -sssS'grch38' -p97 -(dp98 -g21 -S'NC_000009.12:g.133257522_133257523delinsCA' -p99 -sg23 -(dp100 -g25 -g94 -sg27 -S'AC' -p101 -sg29 -S'133257522' -p102 -sg31 -g89 -ssssS'reference_sequence_records' -p103 -(dp104 -S'protein' -p105 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' -p106 -sS'transcript' -p107 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' -p108 -sssS'metadata' -p109 -(dp110 -S'variantvalidator_hgvs_version' -p111 -S'1.1.3' -p112 -sS'uta_schema' -p113 -S'uta_20180821' -p114 -sS'seqrepo_db' -p115 -S'2018-08-21' -p116 -sS'variantvalidator_version' -p117 -S'v0.2' -p118 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant92.txt b/VariantValidator/testing/testOutputsMasterITS/variant92.txt deleted file mode 100644 index d999d667..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant92.txt +++ /dev/null @@ -1,261 +0,0 @@ -(dp0 -S'NM_020469.2:c.261del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'NC_000009.11:g.136132908TA>TA automapped to NC_000009.11:g.136132908_136132909TA=' -p7 -aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p8 -aS'NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p9 -aS'Caution should be used when reporting the displayed variant descriptions' -p10 -aS'If you are unsure, please contact admin' -p11 -aS'RefSeqGene record not available' -p12 -asS'refseqgene_context_intronic_sequence' -p13 -g4 -sS'alt_genomic_loci' -p14 -(lp15 -(dp16 -S'grch37' -p17 -(dp18 -S'hgvs_genomic_description' -p19 -S'NW_003315925.1:g.83615del' -p20 -sS'vcf' -p21 -(dp22 -S'chr' -p23 -S'HG79_PATCH' -p24 -sS'ref' -p25 -S'TC' -p26 -sS'pos' -p27 -S'83614' -p28 -sS'alt' -p29 -S'T' -p30 -sssa(dp31 -S'hg19' -p32 -(dp33 -g19 -S'NW_003315925.1:g.83615del' -p34 -sg21 -(dp35 -g23 -S'NW_003315925.1' -p36 -sg25 -S'TC' -p37 -sg27 -S'83614' -p38 -sg29 -g30 -sssa(dp39 -S'grch38' -p40 -(dp41 -g19 -S'NW_009646201.1:g.83615del' -p42 -sg21 -(dp43 -g23 -S'HG2030_PATCH' -p44 -sg25 -S'TC' -p45 -sg27 -S'83614' -p46 -sg29 -g30 -sssa(dp47 -S'hg38' -p48 -(dp49 -g19 -S'NW_009646201.1:g.83615del' -p50 -sg21 -(dp51 -g23 -S'NW_009646201.1' -p52 -sg25 -S'TC' -p53 -sg27 -S'83614' -p54 -sg29 -g30 -sssasS'transcript_description' -p55 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p56 -sS'gene_symbol' -p57 -S'ABO' -p58 -sS'hgvs_predicted_protein_consequence' -p59 -(dp60 -S'tlr' -p61 -S'NP_065202.2:p.(Thr88ProfsTer31)' -p62 -sS'slr' -p63 -S'NP_065202.2:p.(T88Pfs*31)' -p64 -ssS'submitted_variant' -p65 -S'9-136132908-TA-TA' -p66 -sS'genome_context_intronic_sequence' -p67 -g4 -sS'hgvs_lrg_variant' -p68 -g4 -sS'hgvs_transcript_variant' -p69 -S'NM_020469.2:c.261del' -p70 -sS'hgvs_refseqgene_variant' -p71 -g4 -sS'primary_assembly_loci' -p72 -(dp73 -S'hg19' -p74 -(dp75 -g19 -S'NC_000009.11:g.136132908_136132909=' -p76 -sg21 -(dp77 -g23 -S'chr9' -p78 -sg25 -S'TA' -p79 -sg27 -S'136132908' -p80 -sg29 -g79 -sssg48 -(dp81 -g19 -S'NC_000009.12:g.133257521_133257522=' -p82 -sg21 -(dp83 -g23 -g78 -sg25 -S'TA' -p84 -sg27 -S'133257521' -p85 -sg29 -g84 -sssS'grch37' -p86 -(dp87 -g19 -S'NC_000009.11:g.136132908_136132909=' -p88 -sg21 -(dp89 -g23 -S'9' -p90 -sg25 -g79 -sg27 -S'136132908' -p91 -sg29 -g79 -sssS'grch38' -p92 -(dp93 -g19 -S'NC_000009.12:g.133257521_133257522=' -p94 -sg21 -(dp95 -g23 -g90 -sg25 -g84 -sg27 -S'133257521' -p96 -sg29 -g84 -ssssS'reference_sequence_records' -p97 -(dp98 -S'protein' -p99 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' -p100 -sS'transcript' -p101 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' -p102 -sssS'flag' -p103 -S'gene_variant' -p104 -sS'metadata' -p105 -(dp106 -S'variantvalidator_hgvs_version' -p107 -S'1.1.3' -p108 -sS'uta_schema' -p109 -S'uta_20180821' -p110 -sS'seqrepo_db' -p111 -S'2018-08-21' -p112 -sS'variantvalidator_version' -p113 -S'v0.2' -p114 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant93.txt b/VariantValidator/testing/testOutputsMasterITS/variant93.txt deleted file mode 100644 index af56eddc..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant93.txt +++ /dev/null @@ -1,265 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.259del' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'NM_020469.2:c.258delG automapped to NM_020469.2:c.259delG' -p9 -aS'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p10 -aS'NM_020469.2:c.258_261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p11 -aS'Caution should be used when reporting the displayed variant descriptions' -p12 -aS'If you are unsure, please contact admin' -p13 -aS'RefSeqGene record not available' -p14 -asS'refseqgene_context_intronic_sequence' -p15 -g6 -sS'alt_genomic_loci' -p16 -(lp17 -(dp18 -S'grch37' -p19 -(dp20 -S'hgvs_genomic_description' -p21 -S'NW_003315925.1:g.83618del' -p22 -sS'vcf' -p23 -(dp24 -S'chr' -p25 -S'HG79_PATCH' -p26 -sS'ref' -p27 -S'AC' -p28 -sS'pos' -p29 -S'83616' -p30 -sS'alt' -p31 -S'A' -p32 -sssa(dp33 -S'hg19' -p34 -(dp35 -g21 -S'NW_003315925.1:g.83618del' -p36 -sg23 -(dp37 -g25 -S'NW_003315925.1' -p38 -sg27 -S'AC' -p39 -sg29 -S'83616' -p40 -sg31 -g32 -sssa(dp41 -S'grch38' -p42 -(dp43 -g21 -S'NW_009646201.1:g.83618del' -p44 -sg23 -(dp45 -g25 -S'HG2030_PATCH' -p46 -sg27 -S'AC' -p47 -sg29 -S'83616' -p48 -sg31 -g32 -sssa(dp49 -S'hg38' -p50 -(dp51 -g21 -S'NW_009646201.1:g.83618del' -p52 -sg23 -(dp53 -g25 -S'NW_009646201.1' -p54 -sg27 -S'AC' -p55 -sg29 -S'83616' -p56 -sg31 -g32 -sssasS'transcript_description' -p57 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p58 -sS'gene_symbol' -p59 -S'ABO' -p60 -sS'hgvs_predicted_protein_consequence' -p61 -(dp62 -S'tlr' -p63 -S'NP_065202.2:p.(Val87Ter)' -p64 -sS'slr' -p65 -S'NP_065202.2:p.(V87*)' -p66 -ssS'submitted_variant' -p67 -S'NM_020469.2:c.258delG' -p68 -sS'genome_context_intronic_sequence' -p69 -g6 -sS'hgvs_lrg_variant' -p70 -g6 -sS'hgvs_transcript_variant' -p71 -S'NM_020469.2:c.259del' -p72 -sS'hgvs_refseqgene_variant' -p73 -g6 -sS'primary_assembly_loci' -p74 -(dp75 -S'hg19' -p76 -(dp77 -g21 -S'NC_000009.11:g.136132909_136132910delinsCA' -p78 -sg23 -(dp79 -g25 -S'chr9' -p80 -sg27 -S'AC' -p81 -sg29 -S'136132909' -p82 -sg31 -VCA -p83 -sssg50 -(dp84 -g21 -S'NC_000009.12:g.133257522_133257523delinsCA' -p85 -sg23 -(dp86 -g25 -g80 -sg27 -S'AC' -p87 -sg29 -S'133257522' -p88 -sg31 -VCA -p89 -sssS'grch37' -p90 -(dp91 -g21 -S'NC_000009.11:g.136132909_136132910delinsCA' -p92 -sg23 -(dp93 -g25 -S'9' -p94 -sg27 -S'AC' -p95 -sg29 -S'136132909' -p96 -sg31 -g83 -sssS'grch38' -p97 -(dp98 -g21 -S'NC_000009.12:g.133257522_133257523delinsCA' -p99 -sg23 -(dp100 -g25 -g94 -sg27 -S'AC' -p101 -sg29 -S'133257522' -p102 -sg31 -g89 -ssssS'reference_sequence_records' -p103 -(dp104 -S'protein' -p105 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' -p106 -sS'transcript' -p107 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' -p108 -sssS'metadata' -p109 -(dp110 -S'variantvalidator_hgvs_version' -p111 -S'1.1.3' -p112 -sS'uta_schema' -p113 -S'uta_20180821' -p114 -sS'seqrepo_db' -p115 -S'2018-08-21' -p116 -sS'variantvalidator_version' -p117 -S'v0.2' -p118 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant94.txt b/VariantValidator/testing/testOutputsMasterITS/variant94.txt deleted file mode 100644 index d0b9f8b2..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant94.txt +++ /dev/null @@ -1,259 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.260_262=' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p9 -aS'NM_020469.2:c.260_262 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'grch37' -p18 -(dp19 -S'hgvs_genomic_description' -p20 -S'NW_003315925.1:g.83614_83616=' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG79_PATCH' -p25 -sS'ref' -p26 -VTCA -p27 -sS'pos' -p28 -S'83614' -p29 -sS'alt' -p30 -g27 -sssa(dp31 -S'hg19' -p32 -(dp33 -g20 -S'NW_003315925.1:g.83614_83616=' -p34 -sg22 -(dp35 -g24 -S'NW_003315925.1' -p36 -sg26 -g27 -sg28 -S'83614' -p37 -sg30 -g27 -sssa(dp38 -S'grch38' -p39 -(dp40 -g20 -S'NW_009646201.1:g.83614_83616=' -p41 -sg22 -(dp42 -g24 -S'HG2030_PATCH' -p43 -sg26 -VTCA -p44 -sg28 -S'83614' -p45 -sg30 -g44 -sssa(dp46 -S'hg38' -p47 -(dp48 -g20 -S'NW_009646201.1:g.83614_83616=' -p49 -sg22 -(dp50 -g24 -S'NW_009646201.1' -p51 -sg26 -g44 -sg28 -S'83614' -p52 -sg30 -g44 -sssasS'transcript_description' -p53 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p54 -sS'gene_symbol' -p55 -S'ABO' -p56 -sS'hgvs_predicted_protein_consequence' -p57 -(dp58 -S'tlr' -p59 -S'NP_065202.2:p.(Val87=)' -p60 -sS'slr' -p61 -S'NP_065202.2:p.(V87=)' -p62 -ssS'submitted_variant' -p63 -S'NM_020469.2:c.260_262TGA=' -p64 -sS'genome_context_intronic_sequence' -p65 -g6 -sS'hgvs_lrg_variant' -p66 -g6 -sS'hgvs_transcript_variant' -p67 -S'NM_020469.2:c.260_262=' -p68 -sS'hgvs_refseqgene_variant' -p69 -g6 -sS'primary_assembly_loci' -p70 -(dp71 -S'hg19' -p72 -(dp73 -g20 -S'NC_000009.11:g.136132908_136132909insC' -p74 -sg22 -(dp75 -g24 -S'chr9' -p76 -sg26 -S'T' -p77 -sg28 -S'136132908' -p78 -sg30 -VTC -p79 -sssg47 -(dp80 -g20 -S'NC_000009.12:g.133257521_133257522insC' -p81 -sg22 -(dp82 -g24 -g76 -sg26 -g77 -sg28 -S'133257521' -p83 -sg30 -VTC -p84 -sssS'grch37' -p85 -(dp86 -g20 -S'NC_000009.11:g.136132908_136132909insC' -p87 -sg22 -(dp88 -g24 -S'9' -p89 -sg26 -g77 -sg28 -S'136132908' -p90 -sg30 -VTC -p91 -sssS'grch38' -p92 -(dp93 -g20 -S'NC_000009.12:g.133257521_133257522insC' -p94 -sg22 -(dp95 -g24 -g89 -sg26 -g77 -sg28 -S'133257521' -p96 -sg30 -VTC -p97 -ssssS'reference_sequence_records' -p98 -(dp99 -S'protein' -p100 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' -p101 -sS'transcript' -p102 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' -p103 -sssS'metadata' -p104 -(dp105 -S'variantvalidator_hgvs_version' -p106 -S'1.1.3' -p107 -sS'uta_schema' -p108 -S'uta_20180821' -p109 -sS'seqrepo_db' -p110 -S'2018-08-21' -p111 -sS'variantvalidator_version' -p112 -S'v0.2' -p113 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant95.txt b/VariantValidator/testing/testOutputsMasterITS/variant95.txt deleted file mode 100644 index f2a0e24e..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant95.txt +++ /dev/null @@ -1,259 +0,0 @@ -(dp0 -S'NM_020469.2:c.261del' -p1 -(dp2 -S'hgvs_lrg_transcript_variant' -p3 -S'' -p4 -sS'validation_warnings' -p5 -(lp6 -S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p7 -aS'NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p8 -aS'Caution should be used when reporting the displayed variant descriptions' -p9 -aS'If you are unsure, please contact admin' -p10 -aS'RefSeqGene record not available' -p11 -asS'refseqgene_context_intronic_sequence' -p12 -g4 -sS'alt_genomic_loci' -p13 -(lp14 -(dp15 -S'grch37' -p16 -(dp17 -S'hgvs_genomic_description' -p18 -S'NW_003315925.1:g.83615del' -p19 -sS'vcf' -p20 -(dp21 -S'chr' -p22 -S'HG79_PATCH' -p23 -sS'ref' -p24 -S'TC' -p25 -sS'pos' -p26 -S'83614' -p27 -sS'alt' -p28 -S'T' -p29 -sssa(dp30 -S'hg19' -p31 -(dp32 -g18 -S'NW_003315925.1:g.83615del' -p33 -sg20 -(dp34 -g22 -S'NW_003315925.1' -p35 -sg24 -S'TC' -p36 -sg26 -S'83614' -p37 -sg28 -g29 -sssa(dp38 -S'grch38' -p39 -(dp40 -g18 -S'NW_009646201.1:g.83615del' -p41 -sg20 -(dp42 -g22 -S'HG2030_PATCH' -p43 -sg24 -S'TC' -p44 -sg26 -S'83614' -p45 -sg28 -g29 -sssa(dp46 -S'hg38' -p47 -(dp48 -g18 -S'NW_009646201.1:g.83615del' -p49 -sg20 -(dp50 -g22 -S'NW_009646201.1' -p51 -sg24 -S'TC' -p52 -sg26 -S'83614' -p53 -sg28 -g29 -sssasS'transcript_description' -p54 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p55 -sS'gene_symbol' -p56 -S'ABO' -p57 -sS'hgvs_predicted_protein_consequence' -p58 -(dp59 -S'tlr' -p60 -S'NP_065202.2:p.(Thr88ProfsTer31)' -p61 -sS'slr' -p62 -S'NP_065202.2:p.(T88Pfs*31)' -p63 -ssS'submitted_variant' -p64 -S'NM_020469.2:c.261delG' -p65 -sS'genome_context_intronic_sequence' -p66 -g4 -sS'hgvs_lrg_variant' -p67 -g4 -sS'hgvs_transcript_variant' -p68 -S'NM_020469.2:c.261del' -p69 -sS'hgvs_refseqgene_variant' -p70 -g4 -sS'primary_assembly_loci' -p71 -(dp72 -S'hg19' -p73 -(dp74 -g18 -S'NC_000009.11:g.136132908_136132909=' -p75 -sg20 -(dp76 -g22 -S'chr9' -p77 -sg24 -S'TA' -p78 -sg26 -S'136132908' -p79 -sg28 -g78 -sssg47 -(dp80 -g18 -S'NC_000009.12:g.133257521_133257522=' -p81 -sg20 -(dp82 -g22 -g77 -sg24 -S'TA' -p83 -sg26 -S'133257521' -p84 -sg28 -g83 -sssS'grch37' -p85 -(dp86 -g18 -S'NC_000009.11:g.136132908_136132909=' -p87 -sg20 -(dp88 -g22 -S'9' -p89 -sg24 -g78 -sg26 -S'136132908' -p90 -sg28 -g78 -sssS'grch38' -p91 -(dp92 -g18 -S'NC_000009.12:g.133257521_133257522=' -p93 -sg20 -(dp94 -g22 -g89 -sg24 -g83 -sg26 -S'133257521' -p95 -sg28 -g83 -ssssS'reference_sequence_records' -p96 -(dp97 -S'protein' -p98 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' -p99 -sS'transcript' -p100 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' -p101 -sssS'flag' -p102 -S'gene_variant' -p103 -sS'metadata' -p104 -(dp105 -S'variantvalidator_hgvs_version' -p106 -S'1.1.3' -p107 -sS'uta_schema' -p108 -S'uta_20180821' -p109 -sS'seqrepo_db' -p110 -S'2018-08-21' -p111 -sS'variantvalidator_version' -p112 -S'v0.2' -p113 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant96.txt b/VariantValidator/testing/testOutputsMasterITS/variant96.txt deleted file mode 100644 index 89f2fb9a..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant96.txt +++ /dev/null @@ -1,262 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.261dup' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p9 -aS'NM_020469.2:c.261 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'grch37' -p18 -(dp19 -S'hgvs_genomic_description' -p20 -S'NW_003315925.1:g.83615dup' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG79_PATCH' -p25 -sS'ref' -p26 -S'C' -p27 -sS'pos' -p28 -S'83615' -p29 -sS'alt' -p30 -VCC -p31 -sssa(dp32 -S'hg19' -p33 -(dp34 -g20 -S'NW_003315925.1:g.83615dup' -p35 -sg22 -(dp36 -g24 -S'NW_003315925.1' -p37 -sg26 -g27 -sg28 -S'83615' -p38 -sg30 -VCC -p39 -sssa(dp40 -S'grch38' -p41 -(dp42 -g20 -S'NW_009646201.1:g.83615dup' -p43 -sg22 -(dp44 -g24 -S'HG2030_PATCH' -p45 -sg26 -g27 -sg28 -S'83615' -p46 -sg30 -VCC -p47 -sssa(dp48 -S'hg38' -p49 -(dp50 -g20 -S'NW_009646201.1:g.83615dup' -p51 -sg22 -(dp52 -g24 -S'NW_009646201.1' -p53 -sg26 -g27 -sg28 -S'83615' -p54 -sg30 -VCC -p55 -sssasS'transcript_description' -p56 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p57 -sS'gene_symbol' -p58 -S'ABO' -p59 -sS'hgvs_predicted_protein_consequence' -p60 -(dp61 -S'tlr' -p62 -S'NP_065202.2:p.(Thr88AspfsTer107)' -p63 -sS'slr' -p64 -S'NP_065202.2:p.(T88Dfs*107)' -p65 -ssS'submitted_variant' -p66 -S'NM_020469.2:c.261dupG' -p67 -sS'genome_context_intronic_sequence' -p68 -g6 -sS'hgvs_lrg_variant' -p69 -g6 -sS'hgvs_transcript_variant' -p70 -S'NM_020469.2:c.261dup' -p71 -sS'hgvs_refseqgene_variant' -p72 -g6 -sS'primary_assembly_loci' -p73 -(dp74 -S'hg19' -p75 -(dp76 -g20 -S'NC_000009.11:g.136132908_136132909insCC' -p77 -sg22 -(dp78 -g24 -S'chr9' -p79 -sg26 -S'T' -p80 -sg28 -S'136132908' -p81 -sg30 -VTCC -p82 -sssg49 -(dp83 -g20 -S'NC_000009.12:g.133257521_133257522insCC' -p84 -sg22 -(dp85 -g24 -g79 -sg26 -g80 -sg28 -S'133257521' -p86 -sg30 -VTCC -p87 -sssS'grch37' -p88 -(dp89 -g20 -S'NC_000009.11:g.136132908_136132909insCC' -p90 -sg22 -(dp91 -g24 -S'9' -p92 -sg26 -g80 -sg28 -S'136132908' -p93 -sg30 -VTCC -p94 -sssS'grch38' -p95 -(dp96 -g20 -S'NC_000009.12:g.133257521_133257522insCC' -p97 -sg22 -(dp98 -g24 -g92 -sg26 -g80 -sg28 -S'133257521' -p99 -sg30 -VTCC -p100 -ssssS'reference_sequence_records' -p101 -(dp102 -S'protein' -p103 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' -p104 -sS'transcript' -p105 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' -p106 -sssS'metadata' -p107 -(dp108 -S'variantvalidator_hgvs_version' -p109 -S'1.1.3' -p110 -sS'uta_schema' -p111 -S'uta_20180821' -p112 -sS'seqrepo_db' -p113 -S'2018-08-21' -p114 -sS'variantvalidator_version' -p115 -S'v0.2' -p116 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant97.txt b/VariantValidator/testing/testOutputsMasterITS/variant97.txt deleted file mode 100644 index 9b6e02f9..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant97.txt +++ /dev/null @@ -1,261 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'NM_020469.2:c.261_262insTT' -p3 -(dp4 -S'hgvs_lrg_transcript_variant' -p5 -S'' -p6 -sS'validation_warnings' -p7 -(lp8 -S'The displayed variants may be artefacts of aligning NM_020469.2 with genome build GRCh37' -p9 -aS'NM_020469.2:c.261_262 contains 1 transcript base(s) that fail to align to chromosome NC_000009.11' -p10 -aS'Caution should be used when reporting the displayed variant descriptions' -p11 -aS'If you are unsure, please contact admin' -p12 -aS'RefSeqGene record not available' -p13 -asS'refseqgene_context_intronic_sequence' -p14 -g6 -sS'alt_genomic_loci' -p15 -(lp16 -(dp17 -S'grch37' -p18 -(dp19 -S'hgvs_genomic_description' -p20 -S'NW_003315925.1:g.83614_83615insAA' -p21 -sS'vcf' -p22 -(dp23 -S'chr' -p24 -S'HG79_PATCH' -p25 -sS'ref' -p26 -S'T' -p27 -sS'pos' -p28 -S'83614' -p29 -sS'alt' -p30 -VTAA -p31 -sssa(dp32 -S'hg19' -p33 -(dp34 -g20 -S'NW_003315925.1:g.83614_83615insAA' -p35 -sg22 -(dp36 -g24 -S'NW_003315925.1' -p37 -sg26 -g27 -sg28 -S'83614' -p38 -sg30 -VTAA -p39 -sssa(dp40 -S'grch38' -p41 -(dp42 -g20 -S'NW_009646201.1:g.83614_83615insAA' -p43 -sg22 -(dp44 -g24 -S'HG2030_PATCH' -p45 -sg26 -g27 -sg28 -S'83614' -p46 -sg30 -VTAA -p47 -sssa(dp48 -S'hg38' -p49 -(dp50 -g20 -S'NW_009646201.1:g.83614_83615insAA' -p51 -sg22 -(dp52 -g24 -S'NW_009646201.1' -p53 -sg26 -g27 -sg28 -S'83614' -p54 -sg30 -VTAA -p55 -sssasS'transcript_description' -p56 -VHomo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA -p57 -sS'gene_symbol' -p58 -S'ABO' -p59 -sS'hgvs_predicted_protein_consequence' -p60 -(dp61 -S'tlr' -p62 -S'NP_065202.2:p.(Thr88LeufsTer32)' -p63 -sS'slr' -p64 -S'NP_065202.2:p.(T88Lfs*32)' -p65 -ssS'submitted_variant' -p66 -S'NM_020469.2:c.261_262insTT' -p67 -sS'genome_context_intronic_sequence' -p68 -g6 -sS'hgvs_lrg_variant' -p69 -g6 -sS'hgvs_transcript_variant' -p70 -S'NM_020469.2:c.261_262insTT' -p71 -sS'hgvs_refseqgene_variant' -p72 -g6 -sS'primary_assembly_loci' -p73 -(dp74 -S'hg19' -p75 -(dp76 -g20 -S'NC_000009.11:g.136132909_136132910insACA' -p77 -sg22 -(dp78 -g24 -S'chr9' -p79 -sg26 -g27 -sg28 -S'136132908' -p80 -sg30 -VTAAC -p81 -sssg49 -(dp82 -g20 -S'NC_000009.12:g.133257522_133257523insACA' -p83 -sg22 -(dp84 -g24 -g79 -sg26 -g27 -sg28 -S'133257521' -p85 -sg30 -VTAAC -p86 -sssS'grch37' -p87 -(dp88 -g20 -S'NC_000009.11:g.136132909_136132910insACA' -p89 -sg22 -(dp90 -g24 -S'9' -p91 -sg26 -g27 -sg28 -S'136132908' -p92 -sg30 -VTAAC -p93 -sssS'grch38' -p94 -(dp95 -g20 -S'NC_000009.12:g.133257522_133257523insACA' -p96 -sg22 -(dp97 -g24 -g91 -sg26 -g27 -sg28 -S'133257521' -p98 -sg30 -VTAAC -p99 -ssssS'reference_sequence_records' -p100 -(dp101 -S'protein' -p102 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2' -p103 -sS'transcript' -p104 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2' -p105 -sssS'metadata' -p106 -(dp107 -S'variantvalidator_hgvs_version' -p108 -S'1.1.3' -p109 -sS'uta_schema' -p110 -S'uta_20180821' -p111 -sS'seqrepo_db' -p112 -S'2018-08-21' -p113 -sS'variantvalidator_version' -p114 -S'v0.2' -p115 -ss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant98.txt b/VariantValidator/testing/testOutputsMasterITS/variant98.txt deleted file mode 100644 index 37a32b2b..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant98.txt +++ /dev/null @@ -1,309 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_007121.5:c.515A>T' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p19 -aS'NM_007121.5:c.515_517 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p20 -aS'Caution should be used when reporting the displayed variant descriptions' -p21 -aS'If you are unsure, please contact admin' -p22 -aS'RefSeqGene record not available' -p23 -asS'refseqgene_context_intronic_sequence' -p24 -g16 -sS'alt_genomic_loci' -p25 -(lp26 -sS'transcript_description' -p27 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p28 -sS'gene_symbol' -p29 -S'NR1H2' -p30 -sS'hgvs_predicted_protein_consequence' -p31 -(dp32 -S'tlr' -p33 -S'NP_009052.3:p.(Lys172Ile)' -p34 -sS'slr' -p35 -S'NP_009052.3:p.(K172I)' -p36 -ssS'submitted_variant' -p37 -S'NC_000019.10:g.50378563_50378564insTAC' -p38 -sS'genome_context_intronic_sequence' -p39 -g16 -sS'hgvs_lrg_variant' -p40 -g16 -sS'hgvs_transcript_variant' -p41 -S'NM_007121.5:c.515A>T' -p42 -sS'hgvs_refseqgene_variant' -p43 -g16 -sS'primary_assembly_loci' -p44 -(dp45 -S'grch38' -p46 -(dp47 -S'hgvs_genomic_description' -p48 -S'NC_000019.10:g.50378563_50378564insTAC' -p49 -sS'vcf' -p50 -(dp51 -S'chr' -p52 -S'19' -p53 -sS'ref' -p54 -S'A' -p55 -sS'pos' -p56 -S'50378563' -p57 -sS'alt' -p58 -VATAC -p59 -sssS'grch37' -p60 -(dp61 -g48 -S'NC_000019.9:g.50881820_50881821insTAC' -p62 -sg50 -(dp63 -g52 -g53 -sg54 -g55 -sg56 -S'50881820' -p64 -sg58 -VATAC -p65 -sssS'hg38' -p66 -(dp67 -g48 -S'NC_000019.10:g.50378563_50378564insTAC' -p68 -sg50 -(dp69 -g52 -S'chr19' -p70 -sg54 -g55 -sg56 -S'50378563' -p71 -sg58 -VATAC -p72 -sssS'hg19' -p73 -(dp74 -g48 -S'NC_000019.9:g.50881820_50881821insTAC' -p75 -sg50 -(dp76 -g52 -g70 -sg54 -g55 -sg56 -S'50881820' -p77 -sg58 -VATAC -p78 -ssssS'reference_sequence_records' -p79 -(dp80 -S'protein' -p81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' -p82 -sS'transcript' -p83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' -p84 -sssS'NM_001256647.1:c.224A>T' -p85 -(dp86 -g15 -g16 -sg17 -(lp87 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p88 -aS'NM_001256647.1:c.224_226 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p89 -aS'Caution should be used when reporting the displayed variant descriptions' -p90 -aS'If you are unsure, please contact admin' -p91 -aS'RefSeqGene record not available' -p92 -asg24 -g16 -sg25 -(lp93 -sg27 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p94 -sg29 -S'NR1H2' -p95 -sg31 -(dp96 -g33 -S'NP_001243576.1:p.(Lys75Ile)' -p97 -sg35 -S'NP_001243576.1:p.(K75I)' -p98 -ssg37 -g38 -sg39 -g16 -sg40 -g16 -sg41 -S'NM_001256647.1:c.224A>T' -p99 -sg43 -g16 -sg44 -(dp100 -S'grch38' -p101 -(dp102 -g48 -S'NC_000019.10:g.50378563_50378564insTAC' -p103 -sg50 -(dp104 -g52 -g53 -sg54 -g55 -sg56 -S'50378563' -p105 -sg58 -VATAC -p106 -sssS'grch37' -p107 -(dp108 -g48 -S'NC_000019.9:g.50881820_50881821insTAC' -p109 -sg50 -(dp110 -g52 -g53 -sg54 -g55 -sg56 -S'50881820' -p111 -sg58 -VATAC -p112 -sssg66 -(dp113 -g48 -S'NC_000019.10:g.50378563_50378564insTAC' -p114 -sg50 -(dp115 -g52 -g70 -sg54 -g55 -sg56 -S'50378563' -p116 -sg58 -VATAC -p117 -sssS'hg19' -p118 -(dp119 -g48 -S'NC_000019.9:g.50881820_50881821insTAC' -p120 -sg50 -(dp121 -g52 -g70 -sg54 -g55 -sg56 -S'50881820' -p122 -sg58 -VATAC -p123 -ssssg79 -(dp124 -g81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' -p125 -sg83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' -p126 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/testOutputsMasterITS/variant99.txt b/VariantValidator/testing/testOutputsMasterITS/variant99.txt deleted file mode 100644 index 2d9a43c8..00000000 --- a/VariantValidator/testing/testOutputsMasterITS/variant99.txt +++ /dev/null @@ -1,309 +0,0 @@ -(dp0 -S'flag' -p1 -S'gene_variant' -p2 -sS'metadata' -p3 -(dp4 -S'variantvalidator_hgvs_version' -p5 -S'1.1.3' -p6 -sS'uta_schema' -p7 -S'uta_20180821' -p8 -sS'seqrepo_db' -p9 -S'2018-08-21' -p10 -sS'variantvalidator_version' -p11 -S'v0.2' -p12 -ssS'NM_007121.5:c.515_516del' -p13 -(dp14 -S'hgvs_lrg_transcript_variant' -p15 -S'' -p16 -sS'validation_warnings' -p17 -(lp18 -S'The displayed variants may be artefacts of aligning NM_007121.5 with genome build GRCh37' -p19 -aS'NM_007121.5:c.514_515 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p20 -aS'Caution should be used when reporting the displayed variant descriptions' -p21 -aS'If you are unsure, please contact admin' -p22 -aS'RefSeqGene record not available' -p23 -asS'refseqgene_context_intronic_sequence' -p24 -g16 -sS'alt_genomic_loci' -p25 -(lp26 -sS'transcript_description' -p27 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA -p28 -sS'gene_symbol' -p29 -S'NR1H2' -p30 -sS'hgvs_predicted_protein_consequence' -p31 -(dp32 -S'tlr' -p33 -S'NP_009052.3:p.(Lys172ThrfsTer34)' -p34 -sS'slr' -p35 -S'NP_009052.3:p.(K172Tfs*34)' -p36 -ssS'submitted_variant' -p37 -S'NC_000019.10:g.50378563_50378564insC' -p38 -sS'genome_context_intronic_sequence' -p39 -g16 -sS'hgvs_lrg_variant' -p40 -g16 -sS'hgvs_transcript_variant' -p41 -S'NM_007121.5:c.515_516del' -p42 -sS'hgvs_refseqgene_variant' -p43 -g16 -sS'primary_assembly_loci' -p44 -(dp45 -S'grch38' -p46 -(dp47 -S'hgvs_genomic_description' -p48 -S'NC_000019.10:g.50378563_50378564insC' -p49 -sS'vcf' -p50 -(dp51 -S'chr' -p52 -S'19' -p53 -sS'ref' -p54 -S'A' -p55 -sS'pos' -p56 -S'50378563' -p57 -sS'alt' -p58 -S'AC' -p59 -sssS'grch37' -p60 -(dp61 -g48 -S'NC_000019.9:g.50881820_50881821insC' -p62 -sg50 -(dp63 -g52 -g53 -sg54 -g55 -sg56 -S'50881820' -p64 -sg58 -S'AC' -p65 -sssS'hg38' -p66 -(dp67 -g48 -S'NC_000019.10:g.50378563_50378564insC' -p68 -sg50 -(dp69 -g52 -S'chr19' -p70 -sg54 -g55 -sg56 -S'50378563' -p71 -sg58 -S'AC' -p72 -sssS'hg19' -p73 -(dp74 -g48 -S'NC_000019.9:g.50881820_50881821insC' -p75 -sg50 -(dp76 -g52 -g70 -sg54 -g55 -sg56 -S'50881820' -p77 -sg58 -S'AC' -p78 -ssssS'reference_sequence_records' -p79 -(dp80 -S'protein' -p81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3' -p82 -sS'transcript' -p83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5' -p84 -sssS'NM_001256647.1:c.224_225del' -p85 -(dp86 -g15 -g16 -sg17 -(lp87 -S'The displayed variants may be artefacts of aligning NM_001256647.1 with genome build GRCh37' -p88 -aS'NM_001256647.1:c.223_224 contains 3 transcript base(s) that fail to align to chromosome NC_000019.9' -p89 -aS'Caution should be used when reporting the displayed variant descriptions' -p90 -aS'If you are unsure, please contact admin' -p91 -aS'RefSeqGene record not available' -p92 -asg24 -g16 -sg25 -(lp93 -sg27 -VHomo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA -p94 -sg29 -S'NR1H2' -p95 -sg31 -(dp96 -g33 -S'NP_001243576.1:p.(Lys75ThrfsTer34)' -p97 -sg35 -S'NP_001243576.1:p.(K75Tfs*34)' -p98 -ssg37 -g38 -sg39 -g16 -sg40 -g16 -sg41 -S'NM_001256647.1:c.224_225del' -p99 -sg43 -g16 -sg44 -(dp100 -S'grch38' -p101 -(dp102 -g48 -S'NC_000019.10:g.50378563_50378564insC' -p103 -sg50 -(dp104 -g52 -g53 -sg54 -g55 -sg56 -S'50378563' -p105 -sg58 -S'AC' -p106 -sssS'grch37' -p107 -(dp108 -g48 -S'NC_000019.9:g.50881820_50881821insC' -p109 -sg50 -(dp110 -g52 -g53 -sg54 -g55 -sg56 -S'50881820' -p111 -sg58 -S'AC' -p112 -sssg66 -(dp113 -g48 -S'NC_000019.10:g.50378563_50378564insC' -p114 -sg50 -(dp115 -g52 -g70 -sg54 -g55 -sg56 -S'50378563' -p116 -sg58 -S'AC' -p117 -sssS'hg19' -p118 -(dp119 -g48 -S'NC_000019.9:g.50881820_50881821insC' -p120 -sg50 -(dp121 -g52 -g70 -sg54 -g55 -sg56 -S'50881820' -p122 -sg58 -S'AC' -p123 -ssssg79 -(dp124 -g81 -S'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1' -p125 -sg83 -S'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1' -p126 -sss. \ No newline at end of file diff --git a/VariantValidator/testing/test_vv.py b/VariantValidator/testing/test_vv.py deleted file mode 100644 index 4032f77b..00000000 --- a/VariantValidator/testing/test_vv.py +++ /dev/null @@ -1,57 +0,0 @@ -#Why isn't this discovered I wonder. -import os -import pytest -import vvTestFunctions as fn -from VariantValidator import Validator - -inputVariants=fn.loadVariantFile(os.path.join(os.path.dirname(__file__), "inputVariants.txt")) - -''' -print("Configuring for personal linux") -seqrepo_current_version='2018-08-21' -HGVS_SEQREPO_DIR='/home/buran/documents/workspace/ITS/seqrepo/'+seqrepo_current_version -os.environ['HGVS_SEQREPO_DIR']=HGVS_SEQREPO_DIR -uta_current_version='uta_20180821' -UTA_DB_URL='postgresql://uta_admin:uta_admin@127.0.0.1/uta/' + uta_current_version -os.environ['UTA_DB_URL']=UTA_DB_URL -from VariantValidator import variantValidator as vv -vv.my_config() -''' - - -@pytest.fixture(params=inputVariants[:]) -def constructValidation(request): - val=Validator() -# print request.param - selectTranscripts='all' - selectedAssembly='GRCh37' - out=val.validate(request.param,selectedAssembly,selectTranscripts) - del val.db - del val - return out - -@pytest.mark.skip(reason="old test") -def test_validation_output(constructValidation): - v=constructValidation - assert v!=None - -@pytest.mark.skip(reason="old test") -def test_validation_errors(constructValidation): - v=constructValidation - logs=v["metadata"]["logs"].split("\n") - e=0 - for l in logs: - if "ERROR:" in l: - e+=1 - assert e==0 - -@pytest.mark.skip(reason="old test") -def test_validation_criticals(constructValidation): - v=constructValidation - logs=v["metadata"]["logs"].split("\n") - c=0 - for l in logs: - if "CRIT:" in l: - c+=1 - assert c==0 - diff --git a/VariantValidator/testing/vvTestCompare.py b/VariantValidator/testing/vvTestCompare.py deleted file mode 100644 index 1887342a..00000000 --- a/VariantValidator/testing/vvTestCompare.py +++ /dev/null @@ -1,11 +0,0 @@ -#Saving script - -import vvTestFunctions as fn - -masterDirectory="testOutputsMasterITS " -testDirectories=["testOutputs"] - -for d in testDirectories: - print(("Comparing "+masterDirectory+" and "+d)) - fn.compareBatches(masterDirectory,d) - diff --git a/VariantValidator/testing/vvTestFunctions.py b/VariantValidator/testing/vvTestFunctions.py deleted file mode 100644 index 86f7e02d..00000000 --- a/VariantValidator/testing/vvTestFunctions.py +++ /dev/null @@ -1,190 +0,0 @@ -#PJDP testing suite for variant validator - -#Run this test to validate all variants and SAVE the results for comparison with a different version. -#The input variants file should contain a bunch of variants on each line in quotes. Anything outside the -#quotes is discarded. - -import os -import pickle -import json -import sys - -import sqlite3 -import logging - -logConsoleHandler = logging.StreamHandler() -logConsoleHandler.setLevel(logging.DEBUG) -#Debug -hl=logging.getLogger("hgvs.dataproviders.uta") -hl.addHandler(logConsoleHandler) - -def generateTestFolder(path, inputVariants, validator): - #Saves the results of running inputVariants to a folder given in saveDirectory. - if not os.path.isdir(path): - os.mkdir(path) - variantArray=loadVariantFile(inputVariants) - #Go through the variant array, validating, and save the results. - batch=validateBatch(variantArray,validator) - #Save copy of the resulting dictionary - saveValidationsAsFolder(path,batch) - -def generateTestJSON(path, inputVariants,sysOut): - variantArray=loadVariantFile(inputVariants) - #Go through the variant array, validating, and save the results. - batch=validateBatch(variantArray) - #batch.append(sysOut.getvalue()) - #Save copy of the resulting dictionary - saveValidationsAsJSON(path,batch) - -def saveValidationsAsFolder(path, validations): - #Pickles validation dictionaries into the given folder. - for i,v in enumerate(validations): - with open(os.path.join(path,"variant"+str(i)+".txt") ,"w") as f: - pickle.dump(v,f) - -def saveValidationsAsJSON(path,validations): - #Saves a set of validations (v is a list of dictionaries) or a bunch of validations (v is a list of dictionaries) - #as the json given in path. The name of the file will be that of the input variant string. - jOut=json.dumps(validations) - with open(path,"w") as f: - f.write(jOut) - print(("JSON saved to "+path)) - -def loadVariantFile(path): - out=[] - #Load up the input variant file, should be passed in path.txt. Extra space, commas and quotes will be stripped. - with open(path) as f: - for l in f.readlines(): - l=l.strip() - if len(l)>3: - if l[-1]==",": - l=l[:-1] - if l[-1]=='"': - l=l[:-1] - if l[0]=='"': - l=l[1:] - out.append(l) - return out - -def saveVariantFile(path, variants): - #Saves a variant input array (a bunch of strings) into a new text file given by path. - with open(path,"w") as f: - for v in variants: - f.write(v+"\n") - -def mergeVariantList(variants1,variants2): - #Merges two lists of variants, avoiding duplicants. - out=[] - for v in variants1: - if not v in out: - out.append(v) - for v in variants2: - if not v in out: - out.append(v) - return out - -def loadValidations(path): - #Loads a set of validations from the folder given in path. - out=[] - for paths,dirs,files in os.walk(path): - for filePath in files: - with open(os.path.join(paths,filePath)) as f: - out.append(pickle.load(f)) - #print(type(out[-1])) - return out - -def validateBatch(variantArray,val): - #Returns an array of validations (themselves dictionary objects). - out=[] - selectTranscripts='all' - selectedAssembly='GRCh37' - for i,v in enumerate(variantArray): - print(("VALIDATING Variant"+str(i)+" "+str(i+1)+"/"+str(len(variantArray))+" "+str(v))) - try: - out.append(val.validate(v,selectedAssembly,selectTranscripts)) - except KeyboardInterrupt: - print("Exiting...") - sys.exit() - except Exception as e: - print(("FATAL error processing variant: "+str(e))) - out.append({"ERROR":str(e)}) - raise #debug - uncomment this line to ensure the test leaves a traceback and fails the first time there's a critical error. - return out - -def retrieveVariant(validation): - #Returns the variant string (if possible) from a validation. - out=None - for v in list(validation.values()): - try: - if type(v)==type({}) and "submitted_variant" in list(v.keys()): - out=v["submitted_variant"] - return out - except (KeyError, TypeError, AttributeError): - pass - raise AttributeError("Validation does not contain the original variant string") - -def compareValidations(v1,v2,id): - #print(v1,v2) - #Remove metadata - v1Keys=list(v1.keys()) - if "metadata" in v1Keys: - v1Keys.remove("metadata") - else: - print(("Variant "+str(id)+": metadata not found in first variant")) - v2Keys=list(v2.keys()) - if "metadata" in v2Keys: - v2Keys.remove("metadata") - else: - print(("Variant "+str(id)+": metadata not found in second variant")) - for vk in v1Keys: - if not (vk in v2Keys): -# print("tag "+vk+" : "+str(v1[vk])+" not found in second variant") - print(("Variant "+str(id)+": Tag "+vk+" not found in second variant")) - return False - for vk in v2Keys: - if not (vk in v1Keys): -# print("tag "+vk+" : "+str(v2[vk])+" not found in first variant") - print(("Variant "+str(id)+": Tag "+vk+" not found in first variant")) - return False - for vk in v1Keys: - if not (v1[vk]==v2[vk]): - if type(v1[vk])==type(dict()) or type(v2[vk])==type(dict()): - print(("Variant " + str(id) + ": Different tag values for key " + str(vk))) - else: - print(("Variant "+str(id)+": Different tag values - "+str(vk)+" : "+str(v1[vk])+" vs. "+str(vk)+" : "+str(v2[vk]))) - return False - return True - -def compareBatches(v1path,v2path): - #Loads all files in validations folder and compares them - outFlags=[] - passScore=0 - v1batch=loadValidations(v1path) - v2batch=loadValidations(v2path) - print("Comparing validation sets...") - for i,v in enumerate(v1batch): -# print("Comparing validation "+str(i)) - outFlags.append(compareValidations(v1batch[i],v2batch[i],i)) - if outFlags[-1]: - passScore+=1 - if passScore==len(v1batch): - #Test passed. - print(("Validation sets are identical, "+str(passScore)+" passed")) - return True - else: - print(("Validation sets are NOT identical, passed " + str(passScore) + "/" + str(len(v1batch)))) - #for i,v in enumerate(v1batch): - #if not outFlags[i]: - #print("Mismatch in validation "+str(i)) - #print(v1batch[i]) - #print("Verses") - #print(v2batch[i]) - return False - -if __name__=="__main__": - - inputVariants="inputVariants.txt" - #saveOut="testJSON.json" - - #fn.generateTestJSON(saveOut,inputVariants,sysOut) - generateTestFolder("testOutputs",inputVariants) diff --git a/VariantValidator/testing/vvTestSave.py b/VariantValidator/testing/vvTestSave.py deleted file mode 100644 index 7228505c..00000000 --- a/VariantValidator/testing/vvTestSave.py +++ /dev/null @@ -1,10 +0,0 @@ -#Saving script - -import vvTestFunctions as fn -#from VariantValidator import Validator -import VariantValidator as vv -import os - -val=vv.Validator() -os.environ["ADD_LOGS"]="True" -fn.generateTestFolder("testOutputs","inputVariants.txt",val) diff --git a/test/test_inputs.py b/test/test_inputs.py index 2fdf5cbe..42bfdbc0 100644 --- a/test/test_inputs.py +++ b/test/test_inputs.py @@ -1,6 +1,7 @@ from VariantValidator import Validator +from unittest import TestCase -class TestVariants(object): +class TestVariants(TestCase): @classmethod def setup_class(cls): @@ -14,8 +15,7 @@ def test_variant1(self): assert 'NM_015120.4:c.35T>C' in list(results.keys()) assert results['NM_015120.4:c.35T>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.35T>C' assert results['NM_015120.4:c.35T>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.35T>C']['alt_genomic_loci'] == [] - assert results['NM_015120.4:c.35T>C']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + self.assertCountEqual(results['NM_015120.4:c.35T>C']['alt_genomic_loci'], []) assert results['NM_015120.4:c.35T>C']['gene_symbol'] == 'ALMS1' assert results['NM_015120.4:c.35T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12Pro)', 'slr': 'NP_055935.4:p.(L12P)'} assert results['NM_015120.4:c.35T>C']['submitted_variant'] == 'NM_015120.4:c.35T>C' @@ -40,8 +40,7 @@ def test_variant2(self): assert 'NM_015120.4:c.39G>C' in list(results.keys()) assert results['NM_015120.4:c.39G>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.39G>C' assert results['NM_015120.4:c.39G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.39G>C']['alt_genomic_loci'] == [] - assert results['NM_015120.4:c.39G>C']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + self.assertCountEqual(results['NM_015120.4:c.39G>C']['alt_genomic_loci'], []) assert results['NM_015120.4:c.39G>C']['gene_symbol'] == 'ALMS1' assert results['NM_015120.4:c.39G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Glu13Asp)', 'slr': 'NP_055935.4:p.(E13D)'} assert results['NM_015120.4:c.39G>C']['submitted_variant'] == 'NM_015120.4:c.39G>C' @@ -65,8 +64,7 @@ def test_variant3(self): assert 'NM_015120.4:c.34C>T' in list(results.keys()) assert results['NM_015120.4:c.34C>T']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.34C>T' assert results['NM_015120.4:c.34C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.34C>T']['alt_genomic_loci'] == [] - assert results['NM_015120.4:c.34C>T']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + self.assertCountEqual(results['NM_015120.4:c.34C>T']['alt_genomic_loci'], []) assert results['NM_015120.4:c.34C>T']['gene_symbol'] == 'ALMS1' assert results['NM_015120.4:c.34C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12=)', 'slr': 'NP_055935.4:p.(L12=)'} assert results['NM_015120.4:c.34C>T']['submitted_variant'] == 'NM_015120.4:c.34C>T' @@ -90,8 +88,7 @@ def test_variant4(self): assert 'NM_015120.4:c.34C>T' in list(results.keys()) assert results['NM_015120.4:c.34C>T']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.34C>T' assert results['NM_015120.4:c.34C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.34C>T']['alt_genomic_loci'] == [] - assert results['NM_015120.4:c.34C>T']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + self.assertCountEqual(results['NM_015120.4:c.34C>T']['alt_genomic_loci'], []) assert results['NM_015120.4:c.34C>T']['gene_symbol'] == 'ALMS1' assert results['NM_015120.4:c.34C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12=)', 'slr': 'NP_055935.4:p.(L12=)'} assert results['NM_015120.4:c.34C>T']['submitted_variant'] == 'NC_000002.11:g.73613030C>T' @@ -115,8 +112,7 @@ def test_variant5(self): assert 'NM_000109.3:c.7+127703T>A' in list(results.keys()) assert results['NM_000109.3:c.7+127703T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000109.3:c.7+127703T>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000109.3:c.7+127703T>A']['alt_genomic_loci'] == [] - assert results['NM_000109.3:c.7+127703T>A']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427c, mRNA' + self.assertCountEqual(results['NM_000109.3:c.7+127703T>A']['alt_genomic_loci'], []) assert results['NM_000109.3:c.7+127703T>A']['gene_symbol'] == 'DMD' assert results['NM_000109.3:c.7+127703T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000100.2:p.?', 'slr': 'NP_000100.2:p.?'} assert results['NM_000109.3:c.7+127703T>A']['submitted_variant'] == 'NC_000023.10:g.33229673A>T' @@ -133,8 +129,7 @@ def test_variant5(self): assert 'NM_004006.2:c.-244T>A' in list(results.keys()) assert results['NM_004006.2:c.-244T>A']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.-244T>A' assert results['NM_004006.2:c.-244T>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.-244T>A']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.-244T>A']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.-244T>A']['alt_genomic_loci'], []) assert results['NM_004006.2:c.-244T>A']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.-244T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.?', 'slr': 'NP_003997.1:p.?'} assert results['NM_004006.2:c.-244T>A']['submitted_variant'] == 'NC_000023.10:g.33229673A>T' @@ -158,8 +153,7 @@ def test_variant6(self): assert 'NM_001145026.1:c.715A>G' in list(results.keys()) assert results['NM_001145026.1:c.715A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001145026.1:c.715A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001145026.1:c.715A>G']['alt_genomic_loci'] == [] - assert results['NM_001145026.1:c.715A>G']['transcript_description'] == 'Homo sapiens protein tyrosine phosphatase, receptor type Q (PTPRQ), mRNA' + self.assertCountEqual(results['NM_001145026.1:c.715A>G']['alt_genomic_loci'], []) assert results['NM_001145026.1:c.715A>G']['gene_symbol'] == 'PTPRQ' assert results['NM_001145026.1:c.715A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001138498.1:p.(Arg239Gly)', 'slr': 'NP_001138498.1:p.(R239G)'} assert results['NM_001145026.1:c.715A>G']['submitted_variant'] == 'NM_001145026.1:c.715A>G' @@ -182,8 +176,7 @@ def test_variant7(self): assert 'NM_000548.4:c.138+821del' in list(results.keys()) assert results['NM_000548.4:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000548.4:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000548.4:c.138+821del']['alt_genomic_loci'] == [] - assert results['NM_000548.4:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000548.4:c.138+821del']['alt_genomic_loci'], []) assert results['NM_000548.4:c.138+821del']['gene_symbol'] == 'TSC2' assert results['NM_000548.4:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.?', 'slr': 'NP_000539.2:p.?'} assert results['NM_000548.4:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -200,8 +193,7 @@ def test_variant7(self): assert 'NM_001077183.2:c.138+821del' in list(results.keys()) assert results['NM_001077183.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077183.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001077183.2:c.138+821del']['alt_genomic_loci'] == [] - assert results['NM_001077183.2:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001077183.2:c.138+821del']['alt_genomic_loci'], []) assert results['NM_001077183.2:c.138+821del']['gene_symbol'] == 'TSC2' assert results['NM_001077183.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.?', 'slr': 'NP_001070651.1:p.?'} assert results['NM_001077183.2:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -218,8 +210,7 @@ def test_variant7(self): assert 'NM_001318831.1:c.-89+821del' in list(results.keys()) assert results['NM_001318831.1:c.-89+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318831.1:c.-89+821del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001318831.1:c.-89+821del']['alt_genomic_loci'] == [] - assert results['NM_001318831.1:c.-89+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_001318831.1:c.-89+821del']['alt_genomic_loci'], []) assert results['NM_001318831.1:c.-89+821del']['gene_symbol'] == 'TSC2' assert results['NM_001318831.1:c.-89+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305760.1:p.?', 'slr': 'NP_001305760.1:p.?'} assert results['NM_001318831.1:c.-89+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -236,8 +227,7 @@ def test_variant7(self): assert 'NM_000548.3:c.138+821del' in list(results.keys()) assert results['NM_000548.3:c.138+821del']['hgvs_lrg_transcript_variant'] == 'LRG_487t1:c.138+821del' assert results['NM_000548.3:c.138+821del']['refseqgene_context_intronic_sequence'] == 'NG_005895.1(NM_000548.3):c.138+821del' - assert results['NM_000548.3:c.138+821del']['alt_genomic_loci'] == [] - assert results['NM_000548.3:c.138+821del']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000548.3:c.138+821del']['alt_genomic_loci'], []) assert results['NM_000548.3:c.138+821del']['gene_symbol'] == 'TSC2' assert results['NM_000548.3:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.?', 'slr': 'NP_000539.2:p.?'} assert results['NM_000548.3:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -254,8 +244,7 @@ def test_variant7(self): assert 'NM_001114382.1:c.138+821del' in list(results.keys()) assert results['NM_001114382.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001114382.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001114382.1:c.138+821del']['alt_genomic_loci'] == [] - assert results['NM_001114382.1:c.138+821del']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001114382.1:c.138+821del']['alt_genomic_loci'], []) assert results['NM_001114382.1:c.138+821del']['gene_symbol'] == 'TSC2' assert results['NM_001114382.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.?', 'slr': 'NP_001107854.1:p.?'} assert results['NM_001114382.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -272,8 +261,7 @@ def test_variant7(self): assert 'NM_001318832.1:c.171+821del' in list(results.keys()) assert results['NM_001318832.1:c.171+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318832.1:c.171+821del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001318832.1:c.171+821del']['alt_genomic_loci'] == [] - assert results['NM_001318832.1:c.171+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA' + self.assertCountEqual(results['NM_001318832.1:c.171+821del']['alt_genomic_loci'], []) assert results['NM_001318832.1:c.171+821del']['gene_symbol'] == 'TSC2' assert results['NM_001318832.1:c.171+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305761.1:p.?', 'slr': 'NP_001305761.1:p.?'} assert results['NM_001318832.1:c.171+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -290,8 +278,7 @@ def test_variant7(self): assert 'NM_001363528.1:c.138+821del' in list(results.keys()) assert results['NM_001363528.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363528.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363528.1:c.138+821del']['alt_genomic_loci'] == [] - assert results['NM_001363528.1:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 10, mRNA' + self.assertCountEqual(results['NM_001363528.1:c.138+821del']['alt_genomic_loci'], []) assert results['NM_001363528.1:c.138+821del']['gene_symbol'] == 'TSC2' assert results['NM_001363528.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350457.1:p.?', 'slr': 'NP_001350457.1:p.?'} assert results['NM_001363528.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -309,8 +296,7 @@ def test_variant7(self): assert 'NM_021055.2:c.138+821del' in list(results.keys()) assert results['NM_021055.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021055.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021055.2:c.138+821del']['alt_genomic_loci'] == [] - assert results['NM_021055.2:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_021055.2:c.138+821del']['alt_genomic_loci'], []) assert results['NM_021055.2:c.138+821del']['gene_symbol'] == 'TSC2' assert results['NM_021055.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066399.2:p.?', 'slr': 'NP_066399.2:p.?'} assert results['NM_021055.2:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -327,8 +313,7 @@ def test_variant7(self): assert 'NM_001077183.1:c.138+821del' in list(results.keys()) assert results['NM_001077183.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077183.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001077183.1:c.138+821del']['alt_genomic_loci'] == [] - assert results['NM_001077183.1:c.138+821del']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001077183.1:c.138+821del']['alt_genomic_loci'], []) assert results['NM_001077183.1:c.138+821del']['gene_symbol'] == 'TSC2' assert results['NM_001077183.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.?', 'slr': 'NP_001070651.1:p.?'} assert results['NM_001077183.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -345,8 +330,7 @@ def test_variant7(self): assert 'NM_001318827.1:c.138+821del' in list(results.keys()) assert results['NM_001318827.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318827.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001318827.1:c.138+821del']['alt_genomic_loci'] == [] - assert results['NM_001318827.1:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001318827.1:c.138+821del']['alt_genomic_loci'], []) assert results['NM_001318827.1:c.138+821del']['gene_symbol'] == 'TSC2' assert results['NM_001318827.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305756.1:p.?', 'slr': 'NP_001305756.1:p.?'} assert results['NM_001318827.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -363,8 +347,7 @@ def test_variant7(self): assert 'NM_001114382.2:c.138+821del' in list(results.keys()) assert results['NM_001114382.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001114382.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001114382.2:c.138+821del']['alt_genomic_loci'] == [] - assert results['NM_001114382.2:c.138+821del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001114382.2:c.138+821del']['alt_genomic_loci'], []) assert results['NM_001114382.2:c.138+821del']['gene_symbol'] == 'TSC2' assert results['NM_001114382.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.?', 'slr': 'NP_001107854.1:p.?'} assert results['NM_001114382.2:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -381,8 +364,7 @@ def test_variant7(self): assert 'NM_001318829.1:c.-9-826del' in list(results.keys()) assert results['NM_001318829.1:c.-9-826del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318829.1:c.-9-826del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001318829.1:c.-9-826del']['alt_genomic_loci'] == [] - assert results['NM_001318829.1:c.-9-826del']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001318829.1:c.-9-826del']['alt_genomic_loci'], []) assert results['NM_001318829.1:c.-9-826del']['gene_symbol'] == 'TSC2' assert results['NM_001318829.1:c.-9-826del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305758.1:p.?', 'slr': 'NP_001305758.1:p.?'} assert results['NM_001318829.1:c.-9-826del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' @@ -406,8 +388,7 @@ def test_variant8(self): assert 'NM_000088.3:c.589_590delinsCT' in list(results.keys()) assert results['NM_000088.3:c.589_590delinsCT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589_590delinsCT' assert results['NM_000088.3:c.589_590delinsCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589_590delinsCT']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589_590delinsCT']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589_590delinsCT']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589_590delinsCT']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589_590delinsCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Leu)', 'slr': 'NP_000079.2:p.(G197L)'} assert results['NM_000088.3:c.589_590delinsCT']['submitted_variant'] == 'NM_000088.3:c.589GG>CT' @@ -430,8 +411,7 @@ def test_variant9(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000094.3:c.6751-2_6751-3del' @@ -455,8 +435,7 @@ def test_variant10(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'COL5A1:c.5071A>T' @@ -480,8 +459,7 @@ def test_variant11(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NG_007400.1:c.5071A>T' @@ -505,8 +483,7 @@ def test_variant12(self): assert 'NM_002474.2:c.3034_3035inv' in list(results.keys()) assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] - assert results['NM_002474.2:c.3034_3035inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA' + self.assertCountEqual(results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_002474.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1:p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} assert results['NM_002474.2:c.3034_3035inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' @@ -523,8 +500,7 @@ def test_variant12(self): assert 'NM_022844.2:c.3034_3035inv' in list(results.keys()) assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_022844.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] - assert results['NM_022844.2:c.3034_3035inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA' + self.assertCountEqual(results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_022844.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' assert results['NM_022844.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_074035.1:p.(Thr1012Val)', 'slr': 'NP_074035.1:p.(T1012V)'} assert results['NM_022844.2:c.3034_3035inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' @@ -541,8 +517,7 @@ def test_variant12(self): assert 'NM_001040114.1:c.3055_3056inv' in list(results.keys()) assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040114.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] - assert results['NM_001040114.1:c.3055_3056inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA' + self.assertCountEqual(results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_001040114.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' assert results['NM_001040114.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035203.1:p.(Thr1019Val)', 'slr': 'NP_001035203.1:p.(T1019V)'} assert results['NM_001040114.1:c.3055_3056inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' @@ -560,8 +535,7 @@ def test_variant12(self): assert 'NM_001040113.1:c.3055_3056inv' in list(results.keys()) assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] - assert results['NM_001040113.1:c.3055_3056inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA' + self.assertCountEqual(results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_001040113.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1:p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} assert results['NM_001040113.1:c.3055_3056inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' @@ -585,8 +559,7 @@ def test_variant13(self): assert 'NM_000088.3:c.589-1_589delinsG' in list(results.keys()) assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590del' assert results['NM_000088.3:c.589-1_589delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.590del' - assert results['NM_000088.3:c.589-1_589delinsG']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589-1_589delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589-1_589delinsG']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589-1_589delinsG']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-1_589delinsG']['submitted_variant'] == 'NM_000088.3:c.589-1GG>G' @@ -610,8 +583,7 @@ def test_variant14(self): assert 'NM_000088.3:c.642+1_642+2delinsG' in list(results.keys()) assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.642+1_642+2delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'], []) assert results['NM_000088.3:c.642+1_642+2delinsG']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.642+1_642+2delinsG']['submitted_variant'] == 'NM_000088.3:c.642+1GT>G' @@ -635,8 +607,7 @@ def test_variant15(self): assert 'NM_000088.3:c.589-2_589-1delinsG' in list(results.keys()) assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589-2_589-1delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589-2_589-1delinsG']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-2_589-1delinsG']['submitted_variant'] == 'NM_000088.3:c.589-2AG>G' @@ -660,8 +631,7 @@ def test_variant16(self): assert 'Intergenic_Variant_1' in list(results.keys()) assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' - assert results['Intergenic_Variant_1']['alt_genomic_loci'] == [] - assert results['Intergenic_Variant_1']['transcript_description'] == '' + self.assertCountEqual(results['Intergenic_Variant_1']['alt_genomic_loci'], []) assert results['Intergenic_Variant_1']['gene_symbol'] == '' assert results['Intergenic_Variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['Intergenic_Variant_1']['submitted_variant'] == 'NC_000017.10:g.48279242G>T' @@ -684,8 +654,7 @@ def test_variant17(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000500.7:c.-107-19C>T' @@ -709,8 +678,7 @@ def test_variant18(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-130C>T' @@ -734,8 +702,7 @@ def test_variant19(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-50-80C>T' @@ -759,8 +726,7 @@ def test_variant20(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.316_*342delinsCTACTT' @@ -785,8 +751,7 @@ def test_variant21(self): assert 'NM_000518.4:c.316_*100del' in list(results.keys()) assert results['NM_000518.4:c.316_*100del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000518.4:c.316_*100del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000518.4:c.316_*100del']['alt_genomic_loci'] == [] - assert results['NM_000518.4:c.316_*100del']['transcript_description'] == 'Homo sapiens hemoglobin subunit beta (HBB), mRNA' + self.assertCountEqual(results['NM_000518.4:c.316_*100del']['alt_genomic_loci'], []) assert results['NM_000518.4:c.316_*100del']['gene_symbol'] == 'HBB' assert results['NM_000518.4:c.316_*100del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Leu106SerfsTer3)', 'slr': 'NP_000509.1:p.(L106Sfs*3)'} assert results['NM_000518.4:c.316_*100del']['submitted_variant'] == 'NM_000518.4:c.316_*100del' @@ -809,8 +774,7 @@ def test_variant22(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.*2000C>T' @@ -834,8 +798,7 @@ def test_variant23(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.*132+1868C>T' @@ -859,8 +822,7 @@ def test_variant24(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-130_*2000=' @@ -884,8 +846,7 @@ def test_variant25(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-50-80_*132+1868=' @@ -909,8 +870,7 @@ def test_variant26(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.-810C>T' @@ -934,8 +894,7 @@ def test_variant27(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.1-810C>T' @@ -959,8 +918,7 @@ def test_variant28(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.1071+1A=' @@ -983,8 +941,7 @@ def test_variant29(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.-810_1071+1=' @@ -1009,8 +966,7 @@ def test_variant30(self): assert 'NM_000088.3:c.*1400_*1406=' in list(results.keys()) assert results['NM_000088.3:c.*1400_*1406=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.*1400_*1406=' assert results['NM_000088.3:c.*1400_*1406=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.*1400_*1406=']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.*1400_*1406=']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.*1400_*1406=']['alt_genomic_loci'], []) assert results['NM_000088.3:c.*1400_*1406=']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.*1400_*1406=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.*1400_*1406=']['submitted_variant'] == 'NC_000017.10:g.48261457_48261463TTATGTT=' @@ -1034,8 +990,7 @@ def test_variant31(self): assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'NC_000017.10:g.48275363C>A' @@ -1059,8 +1014,7 @@ def test_variant32(self): assert 'NM_000088.3:c.589-1G>T' in list(results.keys()) assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' - assert results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589-1G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-1G>T']['submitted_variant'] == 'NM_000088.3:c.589-1G>T' @@ -1083,8 +1037,7 @@ def test_variant33(self): assert 'NM_000088.3:c.591_593inv' in list(results.keys()) assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.591_593inv' assert results['NM_000088.3:c.591_593inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.591_593inv']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.591_593inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.591_593inv']['alt_genomic_loci'], []) assert results['NM_000088.3:c.591_593inv']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.591_593inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Pro198Asp)', 'slr': 'NP_000079.2:p.(P198D)'} assert results['NM_000088.3:c.591_593inv']['submitted_variant'] == 'NM_000088.3:c.591_593inv' @@ -1109,8 +1062,7 @@ def test_variant34(self): assert 'NM_000518.5:c.20A>T' in list(results.keys()) assert results['NM_000518.5:c.20A>T']['hgvs_lrg_transcript_variant'] == 'LRG_1232t1:c.20A>T' assert results['NM_000518.5:c.20A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000518.5:c.20A>T']['alt_genomic_loci'] == [] - assert results['NM_000518.5:c.20A>T']['transcript_description'] == 'Homo sapiens hemoglobin subunit beta (HBB), mRNA' + self.assertCountEqual(results['NM_000518.5:c.20A>T']['alt_genomic_loci'], []) assert results['NM_000518.5:c.20A>T']['gene_symbol'] == 'HBB' assert results['NM_000518.5:c.20A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Glu7Val)', 'slr': 'NP_000509.1:p.(E7V)'} assert results['NM_000518.5:c.20A>T']['submitted_variant'] == '11-5248232-T-A' @@ -1127,8 +1079,7 @@ def test_variant34(self): assert 'NM_000518.4:c.20A>T' in list(results.keys()) assert results['NM_000518.4:c.20A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000518.4:c.20A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000518.4:c.20A>T']['alt_genomic_loci'] == [] - assert results['NM_000518.4:c.20A>T']['transcript_description'] == 'Homo sapiens hemoglobin subunit beta (HBB), mRNA' + self.assertCountEqual(results['NM_000518.4:c.20A>T']['alt_genomic_loci'], []) assert results['NM_000518.4:c.20A>T']['gene_symbol'] == 'HBB' assert results['NM_000518.4:c.20A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Glu7Val)', 'slr': 'NP_000509.1:p.(E7V)'} assert results['NM_000518.4:c.20A>T']['submitted_variant'] == '11-5248232-T-A' @@ -1152,8 +1103,7 @@ def test_variant35(self): assert 'NM_000088.3:c.589-1G>T' in list(results.keys()) assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' - assert results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589-1G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-1G>T']['submitted_variant'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' @@ -1177,8 +1127,7 @@ def test_variant36(self): assert 'NM_182763.2:c.688+403C>T' in list(results.keys()) assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_182763.2:c.688+403C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'] == [] - assert results['NM_182763.2:c.688+403C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'], []) assert results['NM_182763.2:c.688+403C>T']['gene_symbol'] == 'MCL1' assert results['NM_182763.2:c.688+403C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_877495.1:p.?', 'slr': 'NP_877495.1:p.?'} assert results['NM_182763.2:c.688+403C>T']['submitted_variant'] == '1:150550916G>A' @@ -1195,8 +1144,7 @@ def test_variant36(self): assert 'NM_001197320.1:c.281C>T' in list(results.keys()) assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001197320.1:c.281C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001197320.1:c.281C>T']['alt_genomic_loci'] == [] - assert results['NM_001197320.1:c.281C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001197320.1:c.281C>T']['alt_genomic_loci'], []) assert results['NM_001197320.1:c.281C>T']['gene_symbol'] == 'MCL1' assert results['NM_001197320.1:c.281C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001184249.1:p.(Ser94Phe)', 'slr': 'NP_001184249.1:p.(S94F)'} assert results['NM_001197320.1:c.281C>T']['submitted_variant'] == '1:150550916G>A' @@ -1213,8 +1161,7 @@ def test_variant36(self): assert 'NM_021960.4:c.740C>T' in list(results.keys()) assert results['NM_021960.4:c.740C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021960.4:c.740C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021960.4:c.740C>T']['alt_genomic_loci'] == [] - assert results['NM_021960.4:c.740C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_021960.4:c.740C>T']['alt_genomic_loci'], []) assert results['NM_021960.4:c.740C>T']['gene_symbol'] == 'MCL1' assert results['NM_021960.4:c.740C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068779.1:p.(Ser247Phe)', 'slr': 'NP_068779.1:p.(S247F)'} assert results['NM_021960.4:c.740C>T']['submitted_variant'] == '1:150550916G>A' @@ -1238,8 +1185,7 @@ def test_variant37(self): assert 'NM_182763.2:c.688+403C>T' in list(results.keys()) assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_182763.2:c.688+403C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'] == [] - assert results['NM_182763.2:c.688+403C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'], []) assert results['NM_182763.2:c.688+403C>T']['gene_symbol'] == 'MCL1' assert results['NM_182763.2:c.688+403C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_877495.1:p.?', 'slr': 'NP_877495.1:p.?'} assert results['NM_182763.2:c.688+403C>T']['submitted_variant'] == '1-150550916-G-A' @@ -1256,8 +1202,7 @@ def test_variant37(self): assert 'NM_001197320.1:c.281C>T' in list(results.keys()) assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001197320.1:c.281C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001197320.1:c.281C>T']['alt_genomic_loci'] == [] - assert results['NM_001197320.1:c.281C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001197320.1:c.281C>T']['alt_genomic_loci'], []) assert results['NM_001197320.1:c.281C>T']['gene_symbol'] == 'MCL1' assert results['NM_001197320.1:c.281C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001184249.1:p.(Ser94Phe)', 'slr': 'NP_001184249.1:p.(S94F)'} assert results['NM_001197320.1:c.281C>T']['submitted_variant'] == '1-150550916-G-A' @@ -1274,8 +1219,7 @@ def test_variant37(self): assert 'NM_021960.4:c.740C>T' in list(results.keys()) assert results['NM_021960.4:c.740C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021960.4:c.740C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021960.4:c.740C>T']['alt_genomic_loci'] == [] - assert results['NM_021960.4:c.740C>T']['transcript_description'] == 'Homo sapiens MCL1, BCL2 family apoptosis regulator (MCL1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_021960.4:c.740C>T']['alt_genomic_loci'], []) assert results['NM_021960.4:c.740C>T']['gene_symbol'] == 'MCL1' assert results['NM_021960.4:c.740C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068779.1:p.(Ser247Phe)', 'slr': 'NP_068779.1:p.(S247F)'} assert results['NM_021960.4:c.740C>T']['submitted_variant'] == '1-150550916-G-A' @@ -1298,8 +1242,7 @@ def test_variant38(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NG_008123.1(LEPRE1_v003):c.2055+18G>A' @@ -1323,8 +1266,7 @@ def test_variant39(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NG_008123.1:c.2055+18G>A' @@ -1349,8 +1291,7 @@ def test_variant40(self): assert 'NM_022356.3:c.2055+18G>A' in list(results.keys()) assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t1:c.2055+18G>A' assert results['NM_022356.3:c.2055+18G>A']['refseqgene_context_intronic_sequence'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' - assert results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'] == [] - assert results['NM_022356.3:c.2055+18G>A']['transcript_description'] == 'Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'], []) assert results['NM_022356.3:c.2055+18G>A']['gene_symbol'] == 'P3H1' assert results['NM_022356.3:c.2055+18G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_071751.3(LRG_5p1):p.?', 'slr': 'NP_071751.3:p.?'} assert results['NM_022356.3:c.2055+18G>A']['submitted_variant'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' @@ -1374,8 +1315,7 @@ def test_variant41(self): assert 'NM_021983.4:c.490G>C' in list(results.keys()) assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}] - assert results['NM_021983.4:c.490G>C']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' + self.assertCountEqual(results['NM_021983.4:c.490G>C']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}]) assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} assert results['NM_021983.4:c.490G>C']['submitted_variant'] == 'NM_021983.4:c.490G>C' @@ -1399,8 +1339,7 @@ def test_variant42(self): assert 'NM_032470.3:c.4del' in list(results.keys()) assert results['NM_032470.3:c.4del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_032470.3:c.4del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032470.3:c.4del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}] - assert results['NM_032470.3:c.4del']['transcript_description'] == 'Homo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA' + self.assertCountEqual(results['NM_032470.3:c.4del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}]) assert results['NM_032470.3:c.4del']['gene_symbol'] == 'TNXB' assert results['NM_032470.3:c.4del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_115859.2:p.(Arg2AlafsTer91)', 'slr': 'NP_115859.2:p.(R2Afs*91)'} assert results['NM_032470.3:c.4del']['submitted_variant'] == 'NM_032470.3:c.4del' @@ -1423,8 +1362,7 @@ def test_variant43(self): assert 'NM_001194958.2:c.20C>A' in list(results.keys()) assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001194958.2:c.20C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001194958.2:c.20C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}] - assert results['NM_001194958.2:c.20C>A']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA' + self.assertCountEqual(results['NM_001194958.2:c.20C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}]) assert results['NM_001194958.2:c.20C>A']['gene_symbol'] == 'KCNJ18' assert results['NM_001194958.2:c.20C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181887.2:p.(Ala7Asp)', 'slr': 'NP_001181887.2:p.(A7D)'} assert results['NM_001194958.2:c.20C>A']['submitted_variant'] == 'NM_001194958.2:c.20C>A' @@ -1449,8 +1387,7 @@ def test_variant44(self): assert 'NM_000022.2:c.534A>G' in list(results.keys()) assert results['NM_000022.2:c.534A>G']['hgvs_lrg_transcript_variant'] == 'LRG_16t1:c.534A>G' assert results['NM_000022.2:c.534A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000022.2:c.534A>G']['alt_genomic_loci'] == [] - assert results['NM_000022.2:c.534A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), mRNA' + self.assertCountEqual(results['NM_000022.2:c.534A>G']['alt_genomic_loci'], []) assert results['NM_000022.2:c.534A>G']['gene_symbol'] == 'ADA' assert results['NM_000022.2:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} assert results['NM_000022.2:c.534A>G']['submitted_variant'] == 'NM_000022.2:c.534A>G' @@ -1474,8 +1411,7 @@ def test_variant45(self): assert 'NM_021983.4:c.490G>C' in list(results.keys()) assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}] - assert results['NM_021983.4:c.490G>C']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' + self.assertCountEqual(results['NM_021983.4:c.490G>C']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}]) assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} assert results['NM_021983.4:c.490G>C']['submitted_variant'] == 'HSCHR6_MHC_SSTO_CTG1-3852542-C-G' @@ -1499,8 +1435,7 @@ def test_variant46(self): assert 'NM_000368.4:c.363+1dup' in list(results.keys()) assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] - assert results['NM_000368.4:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.?', 'slr': 'NP_000359.1:p.?'} assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == 'NM_000368.4:c.363+1dupG' @@ -1524,8 +1459,7 @@ def test_variant47(self): assert 'NM_000368.4:c.363+1dup' in list(results.keys()) assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] - assert results['NM_000368.4:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)', 'slr': 'NP_000359.1:p.(M122Dfs*4)'} assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == 'NM_000368.4:c.363dupG' @@ -1549,8 +1483,7 @@ def test_variant48(self): assert 'NM_000089.3:c.1035_1035+2del' in list(results.keys()) assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' - assert results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'] == [] - assert results['NM_000089.3:c.1035_1035+2del']['transcript_description'] == 'Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA' + self.assertCountEqual(results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'], []) assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} assert results['NM_000089.3:c.1035_1035+2del']['submitted_variant'] == 'NM_000089.3:c.1033_1035delGTT' @@ -1574,8 +1507,7 @@ def test_variant49(self): assert 'NM_000089.3:c.1035_1035+2del' in list(results.keys()) assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' - assert results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'] == [] - assert results['NM_000089.3:c.1035_1035+2del']['transcript_description'] == 'Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA' + self.assertCountEqual(results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'], []) assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} assert results['NM_000089.3:c.1035_1035+2del']['submitted_variant'] == 'NM_000089.3:c.1035_1035+2delTGT' @@ -1599,8 +1531,7 @@ def test_variant50(self): assert 'NM_000088.3:c.2024_2028+1del' in list(results.keys()) assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' - assert results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.2024_2028+1del']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'], []) assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)', 'slr': 'NP_000079.2:p.(A675_R676del)'} assert results['NM_000088.3:c.2024_2028+1del']['submitted_variant'] == 'NM_000088.3:c.2023_2028delGCAAGA' @@ -1624,8 +1555,7 @@ def test_variant51(self): assert 'NM_000089.3:c.938del' in list(results.keys()) assert results['NM_000089.3:c.938del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.938del' assert results['NM_000089.3:c.938del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000089.3:c.938del']['alt_genomic_loci'] == [] - assert results['NM_000089.3:c.938del']['transcript_description'] == 'Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA' + self.assertCountEqual(results['NM_000089.3:c.938del']['alt_genomic_loci'], []) assert results['NM_000089.3:c.938del']['gene_symbol'] == 'COL1A2' assert results['NM_000089.3:c.938del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Gly313AlafsTer86)', 'slr': 'NP_000080.2:p.(G313Afs*86)'} assert results['NM_000089.3:c.938del']['submitted_variant'] == 'NM_000089.3:c.938-1delG' @@ -1649,8 +1579,7 @@ def test_variant52(self): assert 'NM_000088.3:c.589G=' in list(results.keys()) assert results['NM_000088.3:c.589G=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G=' assert results['NM_000088.3:c.589G=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G=']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589G=']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589G=']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589G=']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197=)', 'slr': 'NP_000079.2:p.(G197=)'} assert results['NM_000088.3:c.589G=']['submitted_variant'] == 'NM_000088.3:c.589G=' @@ -1674,8 +1603,7 @@ def test_variant53(self): assert 'NM_000088.3:c.642A=' in list(results.keys()) assert results['NM_000088.3:c.642A=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642A=' assert results['NM_000088.3:c.642A=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.642A=']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.642A=']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.642A=']['alt_genomic_loci'], []) assert results['NM_000088.3:c.642A=']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.642A=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ser214=)', 'slr': 'NP_000079.2:p.(S214=)'} assert results['NM_000088.3:c.642A=']['submitted_variant'] == 'NM_000088.3:c.642A=' @@ -1699,8 +1627,7 @@ def test_variant54(self): assert 'NM_000088.3:c.642+1_642+2delinsG' in list(results.keys()) assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.642+1_642+2delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'], []) assert results['NM_000088.3:c.642+1_642+2delinsG']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.642+1_642+2delinsG']['submitted_variant'] == 'NM_000088.3:c.642+1GG>G' @@ -1724,8 +1651,7 @@ def test_variant55(self): assert 'NM_000088.3:c.589-2_589-1delinsG' in list(results.keys()) assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589-2_589-1delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589-2_589-1delinsG']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-2_589-1delinsG']['submitted_variant'] == 'NM_000088.3:c.589-2GG>G' @@ -1749,8 +1675,7 @@ def test_variant56(self): assert 'NM_000088.3:c.589-5_589-4insTTTT' in list(results.keys()) assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-5_589-4insTTTT' assert results['NM_000088.3:c.589-5_589-4insTTTT']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-5_589-4insTTTT' - assert results['NM_000088.3:c.589-5_589-4insTTTT']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589-5_589-4insTTTT']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589-5_589-4insTTTT']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589-5_589-4insTTTT']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-5_589-4insTTTT']['submitted_variant'] == 'NM_000088.3:c.589-6_589-5insTTTT' @@ -1774,8 +1699,7 @@ def test_variant57(self): assert 'NM_000088.3:c.642+4_642+5insAAAA' in list(results.keys()) assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+4_642+5insAAAA' assert results['NM_000088.3:c.642+4_642+5insAAAA']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+4_642+5insAAAA' - assert results['NM_000088.3:c.642+4_642+5insAAAA']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.642+4_642+5insAAAA']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.642+4_642+5insAAAA']['alt_genomic_loci'], []) assert results['NM_000088.3:c.642+4_642+5insAAAA']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.642+4_642+5insAAAA']['submitted_variant'] == 'NM_000088.3:c.642+3_642+4insAAAA' @@ -1799,8 +1723,7 @@ def test_variant58(self): assert 'NM_000088.3:c.589-4_589-3insTT' in list(results.keys()) assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-4_589-3insTT' assert results['NM_000088.3:c.589-4_589-3insTT']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-4_589-3insTT' - assert results['NM_000088.3:c.589-4_589-3insTT']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589-4_589-3insTT']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589-4_589-3insTT']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589-4_589-3insTT']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-4_589-3insTT']['submitted_variant'] == 'NM_000088.3:c.589-4_589-3insTT' @@ -1824,8 +1747,7 @@ def test_variant59(self): assert 'NM_000088.3:c.589-7del' in list(results.keys()) assert results['NM_000088.3:c.589-7del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-7del' assert results['NM_000088.3:c.589-7del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-7del' - assert results['NM_000088.3:c.589-7del']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589-7del']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589-7del']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589-7del']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589-7del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-7del']['submitted_variant'] == 'NM_000088.3:c.589-8del' @@ -1848,8 +1770,7 @@ def test_variant60(self): assert 'NM_000527.4:c.-187_-185del' in list(results.keys()) assert results['NM_000527.4:c.-187_-185del']['hgvs_lrg_transcript_variant'] == 'LRG_274t1:c.-187_-185del' assert results['NM_000527.4:c.-187_-185del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000527.4:c.-187_-185del']['alt_genomic_loci'] == [] - assert results['NM_000527.4:c.-187_-185del']['transcript_description'] == 'Homo sapiens low density lipoprotein receptor (LDLR), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000527.4:c.-187_-185del']['alt_genomic_loci'], []) assert results['NM_000527.4:c.-187_-185del']['gene_symbol'] == 'LDLR' assert results['NM_000527.4:c.-187_-185del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000518.1(LRG_274p1):p.?', 'slr': 'NP_000518.1:p.?'} assert results['NM_000527.4:c.-187_-185del']['submitted_variant'] == 'NM_000527.4:c.-187_-185delCTC' @@ -1874,8 +1795,7 @@ def test_variant61(self): assert 'NM_206933.2:c.6317C>G' in list(results.keys()) assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_206933.2:c.6317C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_206933.2:c.6317C>G']['alt_genomic_loci'] == [] - assert results['NM_206933.2:c.6317C>G']['transcript_description'] == 'Homo sapiens usherin (USH2A), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_206933.2:c.6317C>G']['alt_genomic_loci'], []) assert results['NM_206933.2:c.6317C>G']['gene_symbol'] == 'USH2A' assert results['NM_206933.2:c.6317C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_996816.2:p.(Thr2106Arg)', 'slr': 'NP_996816.2:p.(T2106R)'} assert results['NM_206933.2:c.6317C>G']['submitted_variant'] == 'NM_206933.2:c.6317C>G' @@ -1898,8 +1818,7 @@ def test_variant62(self): assert 'NM_000059.3:c.7397C=' in list(results.keys()) assert results['NM_000059.3:c.7397C=']['hgvs_lrg_transcript_variant'] == 'LRG_293t1:c.7397C=' assert results['NM_000059.3:c.7397C=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000059.3:c.7397C=']['alt_genomic_loci'] == [] - assert results['NM_000059.3:c.7397C=']['transcript_description'] == 'Homo sapiens BRCA2, DNA repair associated (BRCA2), mRNA' + self.assertCountEqual(results['NM_000059.3:c.7397C=']['alt_genomic_loci'], []) assert results['NM_000059.3:c.7397C=']['gene_symbol'] == 'BRCA2' assert results['NM_000059.3:c.7397C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000050.2(LRG_293p1):p.(Ala2466=)', 'slr': 'NP_000050.2:p.(A2466=)'} assert results['NM_000059.3:c.7397C=']['submitted_variant'] == 'NC_000013.10:g.32929387T>C' @@ -1923,8 +1842,7 @@ def test_variant63(self): assert 'NM_015102.3:c.2818-2T>A' in list(results.keys()) assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_015102.3:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == 'NG_011724.2(NM_015102.3):c.2818-2A=' - assert results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'] == [] - assert results['NM_015102.3:c.2818-2T>A']['transcript_description'] == 'Homo sapiens nephronophthisis 4 (NPHP4), mRNA' + self.assertCountEqual(results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'], []) assert results['NM_015102.3:c.2818-2T>A']['gene_symbol'] == 'NPHP4' assert results['NM_015102.3:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} assert results['NM_015102.3:c.2818-2T>A']['submitted_variant'] == 'NM_015102.3:c.2818-2T>A' @@ -1949,8 +1867,7 @@ def test_variant64(self): assert 'NM_001042544.1:c.3233_3235=' in list(results.keys()) assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042544.1:c.3233_3235=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'] == [] - assert results['NM_001042544.1:c.3233_3235=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'], []) assert results['NM_001042544.1:c.3233_3235=']['gene_symbol'] == 'LTBP4' assert results['NM_001042544.1:c.3233_3235=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078=)', 'slr': 'NP_001036009.1:p.(Q1078=)'} assert results['NM_001042544.1:c.3233_3235=']['submitted_variant'] == '19-41123094-G-GG' @@ -1967,8 +1884,7 @@ def test_variant64(self): assert 'NM_001042545.1:c.3032_3034=' in list(results.keys()) assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042545.1:c.3032_3034=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'] == [] - assert results['NM_001042545.1:c.3032_3034=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'], []) assert results['NM_001042545.1:c.3032_3034=']['gene_symbol'] == 'LTBP4' assert results['NM_001042545.1:c.3032_3034=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011=)', 'slr': 'NP_001036010.1:p.(Q1011=)'} assert results['NM_001042545.1:c.3032_3034=']['submitted_variant'] == '19-41123094-G-GG' @@ -1985,8 +1901,7 @@ def test_variant64(self): assert 'NM_003573.2:c.3122_3124=' in list(results.keys()) assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003573.2:c.3122_3124=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'] == [] - assert results['NM_003573.2:c.3122_3124=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'], []) assert results['NM_003573.2:c.3122_3124=']['gene_symbol'] == 'LTBP4' assert results['NM_003573.2:c.3122_3124=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041=)', 'slr': 'NP_003564.2:p.(Q1041=)'} assert results['NM_003573.2:c.3122_3124=']['submitted_variant'] == '19-41123094-G-GG' @@ -2009,8 +1924,7 @@ def test_variant65(self): assert 'NM_014249.2:c.946_949=' in list(results.keys()) assert results['NM_014249.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.946_949=']['alt_genomic_loci'] == [] - assert results['NM_014249.2:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.2:c.946_949=']['alt_genomic_loci'], []) assert results['NM_014249.2:c.946_949=']['gene_symbol'] == 'NR2E3' assert results['NM_014249.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} assert results['NM_014249.2:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' @@ -2027,8 +1941,7 @@ def test_variant65(self): assert 'NM_016346.3:c.946_949=' in list(results.keys()) assert results['NM_016346.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.946_949=']['alt_genomic_loci'] == [] - assert results['NM_016346.3:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.3:c.946_949=']['alt_genomic_loci'], []) assert results['NM_016346.3:c.946_949=']['gene_symbol'] == 'NR2E3' assert results['NM_016346.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} assert results['NM_016346.3:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' @@ -2046,8 +1959,7 @@ def test_variant65(self): assert 'NM_014249.3:c.946_949=' in list(results.keys()) assert results['NM_014249.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.946_949=']['alt_genomic_loci'] == [] - assert results['NM_014249.3:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.3:c.946_949=']['alt_genomic_loci'], []) assert results['NM_014249.3:c.946_949=']['gene_symbol'] == 'NR2E3' assert results['NM_014249.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} assert results['NM_014249.3:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' @@ -2064,8 +1976,7 @@ def test_variant65(self): assert 'NM_016346.2:c.946_949=' in list(results.keys()) assert results['NM_016346.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.946_949=']['alt_genomic_loci'] == [] - assert results['NM_016346.2:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.2:c.946_949=']['alt_genomic_loci'], []) assert results['NM_016346.2:c.946_949=']['gene_symbol'] == 'NR2E3' assert results['NM_016346.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} assert results['NM_016346.2:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' @@ -2089,8 +2000,7 @@ def test_variant66(self): assert 'NM_032790.3:c.126_128=' in list(results.keys()) assert results['NM_032790.3:c.126_128=']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_128=' assert results['NM_032790.3:c.126_128=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.126_128=']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'CCCGCCA', 'pos': '302871', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'CCCGCCA', 'pos': '302871', 'alt': 'C'}}}] - assert results['NM_032790.3:c.126_128=']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.126_128=']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'CCCGCCA', 'pos': '302871', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'CCCGCCA', 'pos': '302871', 'alt': 'C'}}}]) assert results['NM_032790.3:c.126_128=']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.126_128=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42=)', 'slr': 'NP_116179.2:p.(A42=)'} assert results['NM_032790.3:c.126_128=']['submitted_variant'] == '12-122064773-CCCGCCA-C' @@ -2113,8 +2023,7 @@ def test_variant67(self): assert 'NM_032790.3:c.132_137dup' in list(results.keys()) assert results['NM_032790.3:c.132_137dup']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.132_137dup' assert results['NM_032790.3:c.132_137dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.132_137dup']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '302868', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '302868', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}] - assert results['NM_032790.3:c.132_137dup']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.132_137dup']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '302868', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '302868', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}]) assert results['NM_032790.3:c.132_137dup']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.132_137dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46_Pro47dup)', 'slr': 'NP_116179.2:p.(P46_P47dup)'} assert results['NM_032790.3:c.132_137dup']['submitted_variant'] == '12-122064774-CCGCCA-CCGCCA' @@ -2139,8 +2048,7 @@ def test_variant68(self): assert 'NM_032790.3:c.132_135delinsGCCGT' in list(results.keys()) assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.132_135delinsGCCGT' assert results['NM_032790.3:c.132_135delinsGCCGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.132_135delinsGCCGT']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'ACCG', 'pos': '302883', 'alt': 'GCCGT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'ACCG', 'pos': '302883', 'alt': 'GCCGT'}}}] - assert results['NM_032790.3:c.132_135delinsGCCGT']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.132_135delinsGCCGT']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'ACCG', 'pos': '302883', 'alt': 'GCCGT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'ACCG', 'pos': '302883', 'alt': 'GCCGT'}}}]) assert results['NM_032790.3:c.132_135delinsGCCGT']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46SerfsTer42)', 'slr': 'NP_116179.2:p.(P46Sfs*42)'} assert results['NM_032790.3:c.132_135delinsGCCGT']['submitted_variant'] == '12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC' @@ -2164,8 +2072,7 @@ def test_variant69(self): assert 'NM_032790.3:c.129_130insACACCG' in list(results.keys()) assert results['NM_032790.3:c.129_130insACACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insACACCG' assert results['NM_032790.3:c.129_130insACACCG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.129_130insACACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302875', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302875', 'alt': 'A'}}}] - assert results['NM_032790.3:c.129_130insACACCG']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.129_130insACACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302875', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302875', 'alt': 'A'}}}]) assert results['NM_032790.3:c.129_130insACACCG']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.129_130insACACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43_Pro44insThrPro)', 'slr': 'NP_116179.2:p.(P43_P44insTP)'} assert results['NM_032790.3:c.129_130insACACCG']['submitted_variant'] == 'NC_000012.11:g.122064777C>A' @@ -2189,8 +2096,7 @@ def test_variant70(self): assert 'NM_032790.3:c.128_129insCCACC' in list(results.keys()) assert results['NM_032790.3:c.128_129insCCACC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.128_129insCCACC' assert results['NM_032790.3:c.128_129insCCACC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.128_129insCCACC']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'CG', 'pos': '302873', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'CG', 'pos': '302873', 'alt': 'C'}}}] - assert results['NM_032790.3:c.128_129insCCACC']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.128_129insCCACC']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'CG', 'pos': '302873', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'CG', 'pos': '302873', 'alt': 'C'}}}]) assert results['NM_032790.3:c.128_129insCCACC']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.128_129insCCACC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44HisfsTer22)', 'slr': 'NP_116179.2:p.(P44Hfs*22)'} assert results['NM_032790.3:c.128_129insCCACC']['submitted_variant'] == 'NC_000012.11:g.122064776delG' @@ -2214,8 +2120,7 @@ def test_variant71(self): assert 'NM_032790.3:c.129_130insGCCACCG' in list(results.keys()) assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insGCCACCG' assert results['NM_032790.3:c.129_130insGCCACCG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.129_130insGCCACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302873', 'alt': 'CG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302873', 'alt': 'CG'}}}] - assert results['NM_032790.3:c.129_130insGCCACCG']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.129_130insGCCACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302873', 'alt': 'CG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302873', 'alt': 'CG'}}}]) assert results['NM_032790.3:c.129_130insGCCACCG']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44AlafsTer46)', 'slr': 'NP_116179.2:p.(P44Afs*46)'} assert results['NM_032790.3:c.129_130insGCCACCG']['submitted_variant'] == 'NC_000012.11:g.122064776dupG' @@ -2239,8 +2144,7 @@ def test_variant72(self): assert 'NM_032790.3:c.129_130insTTTCCACCG' in list(results.keys()) assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insTTTCCACCG' assert results['NM_032790.3:c.129_130insTTTCCACCG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.129_130insTTTCCACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302874', 'alt': 'GTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302874', 'alt': 'GTTT'}}}] - assert results['NM_032790.3:c.129_130insTTTCCACCG']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.129_130insTTTCCACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302874', 'alt': 'GTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302874', 'alt': 'GTTT'}}}]) assert results['NM_032790.3:c.129_130insTTTCCACCG']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43_Pro44insPheProPro)', 'slr': 'NP_116179.2:p.(P43_P44insFPP)'} assert results['NM_032790.3:c.129_130insTTTCCACCG']['submitted_variant'] == 'NC_000012.11:g.122064776_122064777insTTT' @@ -2264,8 +2168,7 @@ def test_variant73(self): assert 'NM_032790.3:c.125_126delinsGCCA' in list(results.keys()) assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.125_126delinsGCCA' assert results['NM_032790.3:c.125_126delinsGCCA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.125_126delinsGCCA']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GCCCC', 'pos': '302869', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GCCCC', 'pos': '302869', 'alt': 'G'}}}] - assert results['NM_032790.3:c.125_126delinsGCCA']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.125_126delinsGCCA']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GCCCC', 'pos': '302869', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GCCCC', 'pos': '302869', 'alt': 'G'}}}]) assert results['NM_032790.3:c.125_126delinsGCCA']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42GlyfsTer23)', 'slr': 'NP_116179.2:p.(A42Gfs*23)'} assert results['NM_032790.3:c.125_126delinsGCCA']['submitted_variant'] == 'NC_000012.11:g.122064772_122064775del' @@ -2289,8 +2192,7 @@ def test_variant74(self): assert 'NM_032790.3:c.128_129insCCCCGCCACC' in list(results.keys()) assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.128_129insCCCCGCCACC' assert results['NM_032790.3:c.128_129insCCCCGCCACC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCC'}}}] - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.128_129insCCCCGCCACC']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCC'}}}]) assert results['NM_032790.3:c.128_129insCCCCGCCACC']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro45AlafsTer46)', 'slr': 'NP_116179.2:p.(P45Afs*46)'} assert results['NM_032790.3:c.128_129insCCCCGCCACC']['submitted_variant'] == 'NC_000012.11:g.122064772_122064775dup' @@ -2313,8 +2215,7 @@ def test_variant75(self): assert 'NM_032790.3:c.126_127insTTTTCCGCCA' in list(results.keys()) assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_127insTTTTCCGCCA' assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302871', 'alt': 'CTTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302871', 'alt': 'CTTTT'}}}] - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.126_127insTTTTCCGCCA']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302871', 'alt': 'CTTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302871', 'alt': 'CTTTT'}}}]) assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43PhefsTer48)', 'slr': 'NP_116179.2:p.(P43Ffs*48)'} assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['submitted_variant'] == 'NC_000012.11:g.122064773_122064774insTTTT' @@ -2338,8 +2239,7 @@ def test_variant76(self): assert 'NM_032790.3:c.126C>A' in list(results.keys()) assert results['NM_032790.3:c.126C>A']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126C>A' assert results['NM_032790.3:c.126C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.126C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GCCCCGC', 'pos': '302869', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GCCCCGC', 'pos': '302869', 'alt': 'G'}}}] - assert results['NM_032790.3:c.126C>A']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.126C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GCCCCGC', 'pos': '302869', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GCCCCGC', 'pos': '302869', 'alt': 'G'}}}]) assert results['NM_032790.3:c.126C>A']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.126C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42=)', 'slr': 'NP_116179.2:p.(A42=)'} assert results['NM_032790.3:c.126C>A']['submitted_variant'] == 'NC_000012.11:g.122064772_122064777del' @@ -2364,8 +2264,7 @@ def test_variant77(self): assert 'NM_032790.3:c.131_132insCCCGCCACCGCC' in list(results.keys()) assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.131_132insCCCGCCACCGCC' assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCCGC'}}}] - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.131_132insCCCGCCACCGCC']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCCGC'}}}]) assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44_Pro47dup)', 'slr': 'NP_116179.2:p.(P44_P47dup)'} assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['submitted_variant'] == 'NC_000012.11:g.122064772_122064777dup' @@ -2388,8 +2287,7 @@ def test_variant78(self): assert 'NM_032790.3:c.135_136insACCGCCACCG' in list(results.keys()) assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.135_136insACCGCCACCG' assert results['NM_032790.3:c.135_136insACCGCCACCG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.135_136insACCGCCACCG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302876', 'alt': 'CACCG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302876', 'alt': 'CACCG'}}}] - assert results['NM_032790.3:c.135_136insACCGCCACCG']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.135_136insACCGCCACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302876', 'alt': 'CACCG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302876', 'alt': 'CACCG'}}}]) assert results['NM_032790.3:c.135_136insACCGCCACCG']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46ThrfsTer45)', 'slr': 'NP_116179.2:p.(P46Tfs*45)'} assert results['NM_032790.3:c.135_136insACCGCCACCG']['submitted_variant'] == 'NC_000012.11:g.122064779_122064782dup' @@ -2414,8 +2312,7 @@ def test_variant79(self): assert 'NM_032790.3:c.126_127insA' in list(results.keys()) assert results['NM_032790.3:c.126_127insA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_127insA' assert results['NM_032790.3:c.126_127insA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.126_127insA']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GGCCCC', 'pos': '302868', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GGCCCC', 'pos': '302868', 'alt': 'G'}}}] - assert results['NM_032790.3:c.126_127insA']['transcript_description'] == 'Homo sapiens ORAI calcium release-activated calcium modulator 1 (ORAI1), mRNA' + self.assertCountEqual(results['NM_032790.3:c.126_127insA']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GGCCCC', 'pos': '302868', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GGCCCC', 'pos': '302868', 'alt': 'G'}}}]) assert results['NM_032790.3:c.126_127insA']['gene_symbol'] == 'ORAI1' assert results['NM_032790.3:c.126_127insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43ThrfsTer45)', 'slr': 'NP_116179.2:p.(P43Tfs*45)'} assert results['NM_032790.3:c.126_127insA']['submitted_variant'] == 'NC_000012.11:g.122064772_122064782del' @@ -2438,8 +2335,7 @@ def test_variant80(self): assert 'NM_021088.3:c.471_473dup' in list(results.keys()) assert results['NM_021088.3:c.471_473dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021088.3:c.471_473dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021088.3:c.471_473dup']['alt_genomic_loci'] == [] - assert results['NM_021088.3:c.471_473dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_021088.3:c.471_473dup']['alt_genomic_loci'], []) assert results['NM_021088.3:c.471_473dup']['gene_symbol'] == 'ZNF2' assert results['NM_021088.3:c.471_473dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066574.2:p.(Arg159dup)', 'slr': 'NP_066574.2:p.(R159dup)'} assert results['NM_021088.3:c.471_473dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' @@ -2456,8 +2352,7 @@ def test_variant80(self): assert 'NM_001291605.1:c.510_512dup' in list(results.keys()) assert results['NM_001291605.1:c.510_512dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291605.1:c.510_512dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001291605.1:c.510_512dup']['alt_genomic_loci'] == [] - assert results['NM_001291605.1:c.510_512dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001291605.1:c.510_512dup']['alt_genomic_loci'], []) assert results['NM_001291605.1:c.510_512dup']['gene_symbol'] == 'ZNF2' assert results['NM_001291605.1:c.510_512dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278534.1:p.(Arg172dup)', 'slr': 'NP_001278534.1:p.(R172dup)'} assert results['NM_001291605.1:c.510_512dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' @@ -2474,8 +2369,7 @@ def test_variant80(self): assert 'NM_001017396.2:c.345_347dup' in list(results.keys()) assert results['NM_001017396.2:c.345_347dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001017396.2:c.345_347dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001017396.2:c.345_347dup']['alt_genomic_loci'] == [] - assert results['NM_001017396.2:c.345_347dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001017396.2:c.345_347dup']['alt_genomic_loci'], []) assert results['NM_001017396.2:c.345_347dup']['gene_symbol'] == 'ZNF2' assert results['NM_001017396.2:c.345_347dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001017396.1:p.(Arg117dup)', 'slr': 'NP_001017396.1:p.(R117dup)'} assert results['NM_001017396.2:c.345_347dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' @@ -2492,8 +2386,7 @@ def test_variant80(self): assert 'NM_001282398.1:c.357_359dup' in list(results.keys()) assert results['NM_001282398.1:c.357_359dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001282398.1:c.357_359dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001282398.1:c.357_359dup']['alt_genomic_loci'] == [] - assert results['NM_001282398.1:c.357_359dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001282398.1:c.357_359dup']['alt_genomic_loci'], []) assert results['NM_001282398.1:c.357_359dup']['gene_symbol'] == 'ZNF2' assert results['NM_001282398.1:c.357_359dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269327.1:p.(Arg121dup)', 'slr': 'NP_001269327.1:p.(R121dup)'} assert results['NM_001282398.1:c.357_359dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' @@ -2511,8 +2404,7 @@ def test_variant80(self): assert 'NM_001291604.1:c.231_233dup' in list(results.keys()) assert results['NM_001291604.1:c.231_233dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291604.1:c.231_233dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001291604.1:c.231_233dup']['alt_genomic_loci'] == [] - assert results['NM_001291604.1:c.231_233dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001291604.1:c.231_233dup']['alt_genomic_loci'], []) assert results['NM_001291604.1:c.231_233dup']['gene_symbol'] == 'ZNF2' assert results['NM_001291604.1:c.231_233dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278533.1:p.(Arg79dup)', 'slr': 'NP_001278533.1:p.(R79dup)'} assert results['NM_001291604.1:c.231_233dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' @@ -2529,8 +2421,7 @@ def test_variant80(self): assert 'NM_021088.2:c.471_473dup' in list(results.keys()) assert results['NM_021088.2:c.471_473dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021088.2:c.471_473dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021088.2:c.471_473dup']['alt_genomic_loci'] == [] - assert results['NM_021088.2:c.471_473dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_021088.2:c.471_473dup']['alt_genomic_loci'], []) assert results['NM_021088.2:c.471_473dup']['gene_symbol'] == 'ZNF2' assert results['NM_021088.2:c.471_473dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066574.2:p.(Arg159dup)', 'slr': 'NP_066574.2:p.(R159dup)'} assert results['NM_021088.2:c.471_473dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' @@ -2547,8 +2438,7 @@ def test_variant80(self): assert 'NM_001017396.1:c.345_347dup' in list(results.keys()) assert results['NM_001017396.1:c.345_347dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001017396.1:c.345_347dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001017396.1:c.345_347dup']['alt_genomic_loci'] == [] - assert results['NM_001017396.1:c.345_347dup']['transcript_description'] == 'Homo sapiens zinc finger protein 2 (ZNF2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001017396.1:c.345_347dup']['alt_genomic_loci'], []) assert results['NM_001017396.1:c.345_347dup']['gene_symbol'] == 'ZNF2' assert results['NM_001017396.1:c.345_347dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001017396.1:p.(Arg117dup)', 'slr': 'NP_001017396.1:p.(R117dup)'} assert results['NM_001017396.1:c.345_347dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' @@ -2571,8 +2461,7 @@ def test_variant81(self): assert 'NM_001083585.1:c.*344_*368dup' in list(results.keys()) assert results['NM_001083585.1:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001083585.1:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001083585.1:c.*344_*368dup']['alt_genomic_loci'] == [] - assert results['NM_001083585.1:c.*344_*368dup']['transcript_description'] == 'Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001083585.1:c.*344_*368dup']['alt_genomic_loci'], []) assert results['NM_001083585.1:c.*344_*368dup']['gene_symbol'] == 'RABEP1' assert results['NM_001083585.1:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001077054.1:p.?', 'slr': 'NP_001077054.1:p.?'} assert results['NM_001083585.1:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' @@ -2589,8 +2478,7 @@ def test_variant81(self): assert 'NM_004703.5:c.*344_*368dup' in list(results.keys()) assert results['NM_004703.5:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004703.5:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004703.5:c.*344_*368dup']['alt_genomic_loci'] == [] - assert results['NM_004703.5:c.*344_*368dup']['transcript_description'] == 'Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_004703.5:c.*344_*368dup']['alt_genomic_loci'], []) assert results['NM_004703.5:c.*344_*368dup']['gene_symbol'] == 'RABEP1' assert results['NM_004703.5:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004694.2:p.?', 'slr': 'NP_004694.2:p.?'} assert results['NM_004703.5:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' @@ -2607,8 +2495,7 @@ def test_variant81(self): assert 'NM_004703.4:c.*344_*368dup' in list(results.keys()) assert results['NM_004703.4:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004703.4:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004703.4:c.*344_*368dup']['alt_genomic_loci'] == [] - assert results['NM_004703.4:c.*344_*368dup']['transcript_description'] == 'Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_004703.4:c.*344_*368dup']['alt_genomic_loci'], []) assert results['NM_004703.4:c.*344_*368dup']['gene_symbol'] == 'RABEP1' assert results['NM_004703.4:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004694.2:p.?', 'slr': 'NP_004694.2:p.?'} assert results['NM_004703.4:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' @@ -2626,8 +2513,7 @@ def test_variant81(self): assert 'NM_001291581.1:c.*344_*368dup' in list(results.keys()) assert results['NM_001291581.1:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291581.1:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001291581.1:c.*344_*368dup']['alt_genomic_loci'] == [] - assert results['NM_001291581.1:c.*344_*368dup']['transcript_description'] == 'Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001291581.1:c.*344_*368dup']['alt_genomic_loci'], []) assert results['NM_001291581.1:c.*344_*368dup']['gene_symbol'] == 'RABEP1' assert results['NM_001291581.1:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278510.1:p.?', 'slr': 'NP_001278510.1:p.?'} assert results['NM_001291581.1:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' @@ -2644,8 +2530,7 @@ def test_variant81(self): assert 'NM_001083585.2:c.*344_*368dup' in list(results.keys()) assert results['NM_001083585.2:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001083585.2:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001083585.2:c.*344_*368dup']['alt_genomic_loci'] == [] - assert results['NM_001083585.2:c.*344_*368dup']['transcript_description'] == 'Homo sapiens rabaptin, RAB GTPase binding effector protein 1 (RABEP1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001083585.2:c.*344_*368dup']['alt_genomic_loci'], []) assert results['NM_001083585.2:c.*344_*368dup']['gene_symbol'] == 'RABEP1' assert results['NM_001083585.2:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001077054.1:p.?', 'slr': 'NP_001077054.1:p.?'} assert results['NM_001083585.2:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' @@ -2668,8 +2553,7 @@ def test_variant82(self): assert 'NM_001080423.3:c.1020del' in list(results.keys()) assert results['NM_001080423.3:c.1020del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.3:c.1020del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001080423.3:c.1020del']['alt_genomic_loci'] == [] - assert results['NM_001080423.3:c.1020del']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + self.assertCountEqual(results['NM_001080423.3:c.1020del']['alt_genomic_loci'], []) assert results['NM_001080423.3:c.1020del']['gene_symbol'] == 'GRIP2' assert results['NM_001080423.3:c.1020del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Ser341GlnfsTer4)', 'slr': 'NP_001073892.3:p.(S341Qfs*4)'} assert results['NM_001080423.3:c.1020del']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630GC=' @@ -2687,8 +2571,7 @@ def test_variant82(self): assert 'NM_001080423.2:c.1311del' in list(results.keys()) assert results['NM_001080423.2:c.1311del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.2:c.1311del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001080423.2:c.1311del']['alt_genomic_loci'] == [] - assert results['NM_001080423.2:c.1311del']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + self.assertCountEqual(results['NM_001080423.2:c.1311del']['alt_genomic_loci'], []) assert results['NM_001080423.2:c.1311del']['gene_symbol'] == 'GRIP2' assert results['NM_001080423.2:c.1311del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Ser438GlnfsTer4)', 'slr': 'NP_001073892.2:p.(S438Qfs*4)'} assert results['NM_001080423.2:c.1311del']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630GC=' @@ -2711,8 +2594,7 @@ def test_variant83(self): assert 'NM_001080423.3:c.1016_1020=' in list(results.keys()) assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.3:c.1016_1020=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'] == [] - assert results['NM_001080423.3:c.1016_1020=']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + self.assertCountEqual(results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'], []) assert results['NM_001080423.3:c.1016_1020=']['gene_symbol'] == 'GRIP2' assert results['NM_001080423.3:c.1016_1020=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Arg339=)', 'slr': 'NP_001073892.3:p.(R339=)'} assert results['NM_001080423.3:c.1016_1020=']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630insG' @@ -2730,8 +2612,7 @@ def test_variant83(self): assert 'NM_001080423.2:c.1307_1311=' in list(results.keys()) assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.2:c.1307_1311=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'] == [] - assert results['NM_001080423.2:c.1307_1311=']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + self.assertCountEqual(results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'], []) assert results['NM_001080423.2:c.1307_1311=']['gene_symbol'] == 'GRIP2' assert results['NM_001080423.2:c.1307_1311=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Arg436=)', 'slr': 'NP_001073892.2:p.(R436=)'} assert results['NM_001080423.2:c.1307_1311=']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630insG' @@ -2755,8 +2636,7 @@ def test_variant84(self): assert 'NM_018717.5:c.1515_1526del' in list(results.keys()) assert results['NM_018717.5:c.1515_1526del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.5:c.1515_1526del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_018717.5:c.1515_1526del']['alt_genomic_loci'] == [] - assert results['NM_018717.5:c.1515_1526del']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + self.assertCountEqual(results['NM_018717.5:c.1515_1526del']['alt_genomic_loci'], []) assert results['NM_018717.5:c.1515_1526del']['gene_symbol'] == 'MAML3' assert results['NM_018717.5:c.1515_1526del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln507_Gln510del)', 'slr': 'NP_061187.3:p.(Q507_Q510del)'} assert results['NM_018717.5:c.1515_1526del']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122del' @@ -2773,8 +2653,7 @@ def test_variant84(self): assert 'NM_018717.4:c.1465_1469=' in list(results.keys()) assert results['NM_018717.4:c.1465_1469=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.4:c.1465_1469=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_018717.4:c.1465_1469=']['alt_genomic_loci'] == [] - assert results['NM_018717.4:c.1465_1469=']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + self.assertCountEqual(results['NM_018717.4:c.1465_1469=']['alt_genomic_loci'], []) assert results['NM_018717.4:c.1465_1469=']['gene_symbol'] == 'MAML3' assert results['NM_018717.4:c.1465_1469=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln489=)', 'slr': 'NP_061187.2:p.(Q489=)'} assert results['NM_018717.4:c.1465_1469=']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122del' @@ -2797,8 +2676,7 @@ def test_variant85(self): assert 'NM_018717.5:c.1468_1479=' in list(results.keys()) assert results['NM_018717.5:c.1468_1479=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.5:c.1468_1479=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_018717.5:c.1468_1479=']['alt_genomic_loci'] == [] - assert results['NM_018717.5:c.1468_1479=']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + self.assertCountEqual(results['NM_018717.5:c.1468_1479=']['alt_genomic_loci'], []) assert results['NM_018717.5:c.1468_1479=']['gene_symbol'] == 'MAML3' assert results['NM_018717.5:c.1468_1479=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln490=)', 'slr': 'NP_061187.3:p.(Q490=)'} assert results['NM_018717.5:c.1468_1479=']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' @@ -2816,8 +2694,7 @@ def test_variant85(self): assert 'NM_018717.4:c.1503_1514dup' in list(results.keys()) assert results['NM_018717.4:c.1503_1514dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.4:c.1503_1514dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_018717.4:c.1503_1514dup']['alt_genomic_loci'] == [] - assert results['NM_018717.4:c.1503_1514dup']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + self.assertCountEqual(results['NM_018717.4:c.1503_1514dup']['alt_genomic_loci'], []) assert results['NM_018717.4:c.1503_1514dup']['gene_symbol'] == 'MAML3' assert results['NM_018717.4:c.1503_1514dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln503_Gln506dup)', 'slr': 'NP_061187.2:p.(Q503_Q506dup)'} assert results['NM_018717.4:c.1503_1514dup']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' @@ -2841,8 +2718,7 @@ def test_variant86(self): assert 'NM_018717.5:c.1521_1526del' in list(results.keys()) assert results['NM_018717.5:c.1521_1526del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.5:c.1521_1526del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_018717.5:c.1521_1526del']['alt_genomic_loci'] == [] - assert results['NM_018717.5:c.1521_1526del']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + self.assertCountEqual(results['NM_018717.5:c.1521_1526del']['alt_genomic_loci'], []) assert results['NM_018717.5:c.1521_1526del']['gene_symbol'] == 'MAML3' assert results['NM_018717.5:c.1521_1526del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln509_Gln510del)', 'slr': 'NP_061187.3:p.(Q509_Q510del)'} assert results['NM_018717.5:c.1521_1526del']['submitted_variant'] == 'NC_000004.11:g.140811117_140811122del' @@ -2859,8 +2735,7 @@ def test_variant86(self): assert 'NM_018717.4:c.1509_1514dup' in list(results.keys()) assert results['NM_018717.4:c.1509_1514dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.4:c.1509_1514dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_018717.4:c.1509_1514dup']['alt_genomic_loci'] == [] - assert results['NM_018717.4:c.1509_1514dup']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + self.assertCountEqual(results['NM_018717.4:c.1509_1514dup']['alt_genomic_loci'], []) assert results['NM_018717.4:c.1509_1514dup']['gene_symbol'] == 'MAML3' assert results['NM_018717.4:c.1509_1514dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln505_Gln506dup)', 'slr': 'NP_061187.2:p.(Q505_Q506dup)'} assert results['NM_018717.4:c.1509_1514dup']['submitted_variant'] == 'NC_000004.11:g.140811117_140811122del' @@ -2883,8 +2758,7 @@ def test_variant87(self): assert 'NM_018717.5:c.1473_1479del' in list(results.keys()) assert results['NM_018717.5:c.1473_1479del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.5:c.1473_1479del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_018717.5:c.1473_1479del']['alt_genomic_loci'] == [] - assert results['NM_018717.5:c.1473_1479del']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + self.assertCountEqual(results['NM_018717.5:c.1473_1479del']['alt_genomic_loci'], []) assert results['NM_018717.5:c.1473_1479del']['gene_symbol'] == 'MAML3' assert results['NM_018717.5:c.1473_1479del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln491HisfsTer29)', 'slr': 'NP_061187.3:p.(Q491Hfs*29)'} assert results['NM_018717.5:c.1473_1479del']['submitted_variant'] == 'NC_000004.11:g.140811111_140811117del' @@ -2902,8 +2776,7 @@ def test_variant87(self): assert 'NM_018717.4:c.1468_1472dup' in list(results.keys()) assert results['NM_018717.4:c.1468_1472dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.4:c.1468_1472dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_018717.4:c.1468_1472dup']['alt_genomic_loci'] == [] - assert results['NM_018717.4:c.1468_1472dup']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + self.assertCountEqual(results['NM_018717.4:c.1468_1472dup']['alt_genomic_loci'], []) assert results['NM_018717.4:c.1468_1472dup']['gene_symbol'] == 'MAML3' assert results['NM_018717.4:c.1468_1472dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln491HisfsTer29)', 'slr': 'NP_061187.2:p.(Q491Hfs*29)'} assert results['NM_018717.4:c.1468_1472dup']['submitted_variant'] == 'NC_000004.11:g.140811111_140811117del' @@ -2927,8 +2800,7 @@ def test_variant88(self): assert 'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' in list(results.keys()) assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['alt_genomic_loci'] == [] - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + self.assertCountEqual(results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['alt_genomic_loci'], []) assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['gene_symbol'] == 'MAML3' assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln490_Gln491insHisGlnGlnGln)', 'slr': 'NP_061187.2:p.(Q490_Q491insHQQQ)'} assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['submitted_variant'] == 'NC_000004.11:g.140811117C>A' @@ -2945,8 +2817,7 @@ def test_variant88(self): assert 'NM_018717.5:c.1473G>T' in list(results.keys()) assert results['NM_018717.5:c.1473G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_018717.5:c.1473G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_018717.5:c.1473G>T']['alt_genomic_loci'] == [] - assert results['NM_018717.5:c.1473G>T']['transcript_description'] == 'Homo sapiens mastermind like transcriptional coactivator 3 (MAML3), mRNA' + self.assertCountEqual(results['NM_018717.5:c.1473G>T']['alt_genomic_loci'], []) assert results['NM_018717.5:c.1473G>T']['gene_symbol'] == 'MAML3' assert results['NM_018717.5:c.1473G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln491His)', 'slr': 'NP_061187.3:p.(Q491H)'} assert results['NM_018717.5:c.1473G>T']['submitted_variant'] == 'NC_000004.11:g.140811117C>A' @@ -2969,8 +2840,7 @@ def test_variant89(self): assert 'NM_015120.4:c.1573_1579=' in list(results.keys()) assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1573_1579=' assert results['NM_015120.4:c.1573_1579=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'] == [] - assert results['NM_015120.4:c.1573_1579=']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + self.assertCountEqual(results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'], []) assert results['NM_015120.4:c.1573_1579=']['gene_symbol'] == 'ALMS1' assert results['NM_015120.4:c.1573_1579=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Ser525=)', 'slr': 'NP_055935.4:p.(S525=)'} assert results['NM_015120.4:c.1573_1579=']['submitted_variant'] == 'NC_000002.11:g.73675227_73675228insCTC' @@ -2993,22 +2863,21 @@ def test_variant90(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.260_262=' in list(results.keys()) - assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.260_262=' + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020469.2:c.260_262=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.260_262=']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}] - assert results['NM_020469.2:c.260_262=']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + self.assertCountEqual(results['NM_020469.2:c.260_262=']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}]) assert results['NM_020469.2:c.260_262=']['gene_symbol'] == 'ABO' assert results['NM_020469.2:c.260_262=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87=)', 'slr': 'NP_065202.2:p.(V87=)'} assert results['NM_020469.2:c.260_262=']['submitted_variant'] == '9-136132908-T-TC' assert results['NM_020469.2:c.260_262=']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == 'LRG_792:g.20145_20147=' + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == '' assert results['NM_020469.2:c.260_262=']['hgvs_transcript_variant'] == 'NM_020469.2:c.260_262=' assert results['NM_020469.2:c.260_262=']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20145_20147=' assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} - assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} def test_variant91(self): @@ -3018,22 +2887,21 @@ def test_variant91(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.259del' in list(results.keys()) - assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.259del' + assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020469.2:c.259del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.259del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}] - assert results['NM_020469.2:c.259del']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + self.assertCountEqual(results['NM_020469.2:c.259del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}]) assert results['NM_020469.2:c.259del']['gene_symbol'] == 'ABO' assert results['NM_020469.2:c.259del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87Ter)', 'slr': 'NP_065202.2:p.(V87*)'} assert results['NM_020469.2:c.259del']['submitted_variant'] == '9-136132908-TAC-TCA' assert results['NM_020469.2:c.259del']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == 'LRG_792:g.20144del' + assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == '' assert results['NM_020469.2:c.259del']['hgvs_transcript_variant'] == 'NM_020469.2:c.259del' assert results['NM_020469.2:c.259del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20144del' assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} - assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} def test_variant92(self): @@ -3042,22 +2910,21 @@ def test_variant92(self): print(results) assert 'NM_020469.2:c.261del' in list(results.keys()) - assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261del' + assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020469.2:c.261del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}] - assert results['NM_020469.2:c.261del']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + self.assertCountEqual(results['NM_020469.2:c.261del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}]) assert results['NM_020469.2:c.261del']['gene_symbol'] == 'ABO' assert results['NM_020469.2:c.261del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)', 'slr': 'NP_065202.2:p.(T88Pfs*31)'} assert results['NM_020469.2:c.261del']['submitted_variant'] == '9-136132908-TA-TA' assert results['NM_020469.2:c.261del']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261del']['hgvs_lrg_variant'] == 'LRG_792:g.20146del' + assert results['NM_020469.2:c.261del']['hgvs_lrg_variant'] == '' assert results['NM_020469.2:c.261del']['hgvs_transcript_variant'] == 'NM_020469.2:c.261del' assert results['NM_020469.2:c.261del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146del' assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} - assert results['NM_020469.2:c.261del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + assert results['NM_020469.2:c.261del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} assert results['flag'] == 'gene_variant' @@ -3068,22 +2935,21 @@ def test_variant93(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.259del' in list(results.keys()) - assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.259del' + assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020469.2:c.259del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.259del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}] - assert results['NM_020469.2:c.259del']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + self.assertCountEqual(results['NM_020469.2:c.259del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}]) assert results['NM_020469.2:c.259del']['gene_symbol'] == 'ABO' assert results['NM_020469.2:c.259del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87Ter)', 'slr': 'NP_065202.2:p.(V87*)'} assert results['NM_020469.2:c.259del']['submitted_variant'] == 'NM_020469.2:c.258delG' assert results['NM_020469.2:c.259del']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == 'LRG_792:g.20144del' + assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == '' assert results['NM_020469.2:c.259del']['hgvs_transcript_variant'] == 'NM_020469.2:c.259del' assert results['NM_020469.2:c.259del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20144del' assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} - assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} def test_variant94(self): @@ -3093,22 +2959,21 @@ def test_variant94(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.260_262=' in list(results.keys()) - assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.260_262=' + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020469.2:c.260_262=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.260_262=']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}] - assert results['NM_020469.2:c.260_262=']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + self.assertCountEqual(results['NM_020469.2:c.260_262=']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}]) assert results['NM_020469.2:c.260_262=']['gene_symbol'] == 'ABO' assert results['NM_020469.2:c.260_262=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87=)', 'slr': 'NP_065202.2:p.(V87=)'} assert results['NM_020469.2:c.260_262=']['submitted_variant'] == 'NM_020469.2:c.260_262TGA=' assert results['NM_020469.2:c.260_262=']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == 'LRG_792:g.20145_20147=' + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == '' assert results['NM_020469.2:c.260_262=']['hgvs_transcript_variant'] == 'NM_020469.2:c.260_262=' assert results['NM_020469.2:c.260_262=']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20145_20147=' assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} - assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} def test_variant95(self): @@ -3117,22 +2982,21 @@ def test_variant95(self): print(results) assert 'NM_020469.2:c.261del' in list(results.keys()) - assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261del' + assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020469.2:c.261del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}] - assert results['NM_020469.2:c.261del']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + self.assertCountEqual(results['NM_020469.2:c.261del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}]) assert results['NM_020469.2:c.261del']['gene_symbol'] == 'ABO' assert results['NM_020469.2:c.261del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)', 'slr': 'NP_065202.2:p.(T88Pfs*31)'} assert results['NM_020469.2:c.261del']['submitted_variant'] == 'NM_020469.2:c.261delG' assert results['NM_020469.2:c.261del']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261del']['hgvs_lrg_variant'] == 'LRG_792:g.20146del' + assert results['NM_020469.2:c.261del']['hgvs_lrg_variant'] == '' assert results['NM_020469.2:c.261del']['hgvs_transcript_variant'] == 'NM_020469.2:c.261del' assert results['NM_020469.2:c.261del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146del' assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} - assert results['NM_020469.2:c.261del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + assert results['NM_020469.2:c.261del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} assert results['flag'] == 'gene_variant' @@ -3143,22 +3007,21 @@ def test_variant96(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.261dup' in list(results.keys()) - assert results['NM_020469.2:c.261dup']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261dup' + assert results['NM_020469.2:c.261dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020469.2:c.261dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261dup']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}] - assert results['NM_020469.2:c.261dup']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + self.assertCountEqual(results['NM_020469.2:c.261dup']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}]) assert results['NM_020469.2:c.261dup']['gene_symbol'] == 'ABO' assert results['NM_020469.2:c.261dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88AspfsTer107)', 'slr': 'NP_065202.2:p.(T88Dfs*107)'} assert results['NM_020469.2:c.261dup']['submitted_variant'] == 'NM_020469.2:c.261dupG' assert results['NM_020469.2:c.261dup']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261dup']['hgvs_lrg_variant'] == 'LRG_792:g.20146dup' + assert results['NM_020469.2:c.261dup']['hgvs_lrg_variant'] == '' assert results['NM_020469.2:c.261dup']['hgvs_transcript_variant'] == 'NM_020469.2:c.261dup' assert results['NM_020469.2:c.261dup']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146dup' assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TCC'}} assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TCC'}} assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TCC'}} assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TCC'}} - assert results['NM_020469.2:c.261dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + assert results['NM_020469.2:c.261dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} def test_variant97(self): @@ -3168,22 +3031,21 @@ def test_variant97(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.261_262insTT' in list(results.keys()) - assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_transcript_variant'] == 'LRG_792t1:c.261_262insTT' + assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020469.2:c.261_262insTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261_262insTT']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}] - assert results['NM_020469.2:c.261_262insTT']['transcript_description'] == 'Homo sapiens ABO, alpha 1-3-N-acetylgalactosaminyltransferase and alpha 1-3-galactosyltransferase (ABO), mRNA' + self.assertCountEqual(results['NM_020469.2:c.261_262insTT']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}]) assert results['NM_020469.2:c.261_262insTT']['gene_symbol'] == 'ABO' assert results['NM_020469.2:c.261_262insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88LeufsTer32)', 'slr': 'NP_065202.2:p.(T88Lfs*32)'} assert results['NM_020469.2:c.261_262insTT']['submitted_variant'] == 'NM_020469.2:c.261_262insTT' assert results['NM_020469.2:c.261_262insTT']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_variant'] == 'LRG_792:g.20146_20147insTT' + assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_variant'] == '' assert results['NM_020469.2:c.261_262insTT']['hgvs_transcript_variant'] == 'NM_020469.2:c.261_262insTT' assert results['NM_020469.2:c.261_262insTT']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146_20147insTT' assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TAAC'}} assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TAAC'}} assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TAAC'}} assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TAAC'}} - assert results['NM_020469.2:c.261_262insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_792.xml'} + assert results['NM_020469.2:c.261_262insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} def test_variant98(self): @@ -3195,8 +3057,7 @@ def test_variant98(self): assert 'NM_007121.5:c.515A>T' in list(results.keys()) assert results['NM_007121.5:c.515A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.515A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.515A>T']['alt_genomic_loci'] == [] - assert results['NM_007121.5:c.515A>T']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007121.5:c.515A>T']['alt_genomic_loci'], []) assert results['NM_007121.5:c.515A>T']['gene_symbol'] == 'NR1H2' assert results['NM_007121.5:c.515A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172Ile)', 'slr': 'NP_009052.3:p.(K172I)'} assert results['NM_007121.5:c.515A>T']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAC' @@ -3213,8 +3074,7 @@ def test_variant98(self): assert 'NM_001256647.1:c.224A>T' in list(results.keys()) assert results['NM_001256647.1:c.224A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.224A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.224A>T']['alt_genomic_loci'] == [] - assert results['NM_001256647.1:c.224A>T']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001256647.1:c.224A>T']['alt_genomic_loci'], []) assert results['NM_001256647.1:c.224A>T']['gene_symbol'] == 'NR1H2' assert results['NM_001256647.1:c.224A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75Ile)', 'slr': 'NP_001243576.1:p.(K75I)'} assert results['NM_001256647.1:c.224A>T']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAC' @@ -3238,8 +3098,7 @@ def test_variant99(self): assert 'NM_007121.5:c.515_516del' in list(results.keys()) assert results['NM_007121.5:c.515_516del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.515_516del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.515_516del']['alt_genomic_loci'] == [] - assert results['NM_007121.5:c.515_516del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007121.5:c.515_516del']['alt_genomic_loci'], []) assert results['NM_007121.5:c.515_516del']['gene_symbol'] == 'NR1H2' assert results['NM_007121.5:c.515_516del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172ThrfsTer34)', 'slr': 'NP_009052.3:p.(K172Tfs*34)'} assert results['NM_007121.5:c.515_516del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insC' @@ -3256,8 +3115,7 @@ def test_variant99(self): assert 'NM_001256647.1:c.224_225del' in list(results.keys()) assert results['NM_001256647.1:c.224_225del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.224_225del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.224_225del']['alt_genomic_loci'] == [] - assert results['NM_001256647.1:c.224_225del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001256647.1:c.224_225del']['alt_genomic_loci'], []) assert results['NM_001256647.1:c.224_225del']['gene_symbol'] == 'NR1H2' assert results['NM_001256647.1:c.224_225del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75ThrfsTer34)', 'slr': 'NP_001243576.1:p.(K75Tfs*34)'} assert results['NM_001256647.1:c.224_225del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insC' @@ -3281,8 +3139,7 @@ def test_variant100(self): assert 'NM_007121.5:c.515_516insT' in list(results.keys()) assert results['NM_007121.5:c.515_516insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.515_516insT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.515_516insT']['alt_genomic_loci'] == [] - assert results['NM_007121.5:c.515_516insT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007121.5:c.515_516insT']['alt_genomic_loci'], []) assert results['NM_007121.5:c.515_516insT']['gene_symbol'] == 'NR1H2' assert results['NM_007121.5:c.515_516insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172AsnfsTer35)', 'slr': 'NP_009052.3:p.(K172Nfs*35)'} assert results['NM_007121.5:c.515_516insT']['submitted_variant'] == 'NC_000019.10:g.50378564_50378565insTACA' @@ -3299,8 +3156,7 @@ def test_variant100(self): assert 'NM_001256647.1:c.224_225insT' in list(results.keys()) assert results['NM_001256647.1:c.224_225insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.224_225insT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.224_225insT']['alt_genomic_loci'] == [] - assert results['NM_001256647.1:c.224_225insT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001256647.1:c.224_225insT']['alt_genomic_loci'], []) assert results['NM_001256647.1:c.224_225insT']['gene_symbol'] == 'NR1H2' assert results['NM_001256647.1:c.224_225insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75AsnfsTer35)', 'slr': 'NP_001243576.1:p.(K75Nfs*35)'} assert results['NM_001256647.1:c.224_225insT']['submitted_variant'] == 'NC_000019.10:g.50378564_50378565insTACA' @@ -3323,8 +3179,7 @@ def test_variant101(self): assert 'NM_007121.5:c.514_520=' in list(results.keys()) assert results['NM_007121.5:c.514_520=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.514_520=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.514_520=']['alt_genomic_loci'] == [] - assert results['NM_007121.5:c.514_520=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007121.5:c.514_520=']['alt_genomic_loci'], []) assert results['NM_007121.5:c.514_520=']['gene_symbol'] == 'NR1H2' assert results['NM_007121.5:c.514_520=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172=)', 'slr': 'NP_009052.3:p.(K172=)'} assert results['NM_007121.5:c.514_520=']['submitted_variant'] == 'NC_000019.10:g.50378565_50378567dup' @@ -3341,8 +3196,7 @@ def test_variant101(self): assert 'NM_001256647.1:c.223_229=' in list(results.keys()) assert results['NM_001256647.1:c.223_229=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.223_229=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.223_229=']['alt_genomic_loci'] == [] - assert results['NM_001256647.1:c.223_229=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001256647.1:c.223_229=']['alt_genomic_loci'], []) assert results['NM_001256647.1:c.223_229=']['gene_symbol'] == 'NR1H2' assert results['NM_001256647.1:c.223_229=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75=)', 'slr': 'NP_001243576.1:p.(K75=)'} assert results['NM_001256647.1:c.223_229=']['submitted_variant'] == 'NC_000019.10:g.50378565_50378567dup' @@ -3367,8 +3221,7 @@ def test_variant102(self): assert 'NM_007121.5:c.519_521del' in list(results.keys()) assert results['NM_007121.5:c.519_521del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.519_521del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.519_521del']['alt_genomic_loci'] == [] - assert results['NM_007121.5:c.519_521del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007121.5:c.519_521del']['alt_genomic_loci'], []) assert results['NM_007121.5:c.519_521del']['gene_symbol'] == 'NR1H2' assert results['NM_007121.5:c.519_521del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Gln176del)', 'slr': 'NP_009052.3:p.(Q176del)'} assert results['NM_007121.5:c.519_521del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564=' @@ -3385,8 +3238,7 @@ def test_variant102(self): assert 'NM_001256647.1:c.228_230del' in list(results.keys()) assert results['NM_001256647.1:c.228_230del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.228_230del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.228_230del']['alt_genomic_loci'] == [] - assert results['NM_001256647.1:c.228_230del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001256647.1:c.228_230del']['alt_genomic_loci'], []) assert results['NM_001256647.1:c.228_230del']['gene_symbol'] == 'NR1H2' assert results['NM_001256647.1:c.228_230del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Gln79del)', 'slr': 'NP_001243576.1:p.(Q79del)'} assert results['NM_001256647.1:c.228_230del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564=' @@ -3409,8 +3261,7 @@ def test_variant103(self): assert 'NM_001256647.1:c.224_226delinsTCGG' in list(results.keys()) assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.224_226delinsTCGG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.224_226delinsTCGG']['alt_genomic_loci'] == [] - assert results['NM_001256647.1:c.224_226delinsTCGG']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001256647.1:c.224_226delinsTCGG']['alt_genomic_loci'], []) assert results['NM_001256647.1:c.224_226delinsTCGG']['gene_symbol'] == 'NR1H2' assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75IlefsTer35)', 'slr': 'NP_001243576.1:p.(K75Ifs*35)'} assert results['NM_001256647.1:c.224_226delinsTCGG']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTCGG' @@ -3428,8 +3279,7 @@ def test_variant103(self): assert 'NM_007121.5:c.515_517delinsTCGG' in list(results.keys()) assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.515_517delinsTCGG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.515_517delinsTCGG']['alt_genomic_loci'] == [] - assert results['NM_007121.5:c.515_517delinsTCGG']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007121.5:c.515_517delinsTCGG']['alt_genomic_loci'], []) assert results['NM_007121.5:c.515_517delinsTCGG']['gene_symbol'] == 'NR1H2' assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172IlefsTer35)', 'slr': 'NP_009052.3:p.(K172Ifs*35)'} assert results['NM_007121.5:c.515_517delinsTCGG']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTCGG' @@ -3453,8 +3303,7 @@ def test_variant104(self): assert 'NM_007121.5:c.514_515inv' in list(results.keys()) assert results['NM_007121.5:c.514_515inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.514_515inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.514_515inv']['alt_genomic_loci'] == [] - assert results['NM_007121.5:c.514_515inv']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007121.5:c.514_515inv']['alt_genomic_loci'], []) assert results['NM_007121.5:c.514_515inv']['gene_symbol'] == 'NR1H2' assert results['NM_007121.5:c.514_515inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172Leu)', 'slr': 'NP_009052.3:p.(K172L)'} assert results['NM_007121.5:c.514_515inv']['submitted_variant'] == 'NC_000019.10:g.50378563delinsTTAC' @@ -3471,8 +3320,7 @@ def test_variant104(self): assert 'NM_001256647.1:c.223_224inv' in list(results.keys()) assert results['NM_001256647.1:c.223_224inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.223_224inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.223_224inv']['alt_genomic_loci'] == [] - assert results['NM_001256647.1:c.223_224inv']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001256647.1:c.223_224inv']['alt_genomic_loci'], []) assert results['NM_001256647.1:c.223_224inv']['gene_symbol'] == 'NR1H2' assert results['NM_001256647.1:c.223_224inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75Leu)', 'slr': 'NP_001243576.1:p.(K75L)'} assert results['NM_001256647.1:c.223_224inv']['submitted_variant'] == 'NC_000019.10:g.50378563delinsTTAC' @@ -3495,8 +3343,7 @@ def test_variant105(self): assert 'NM_007121.5:c.514_515insT' in list(results.keys()) assert results['NM_007121.5:c.514_515insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.514_515insT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.514_515insT']['alt_genomic_loci'] == [] - assert results['NM_007121.5:c.514_515insT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007121.5:c.514_515insT']['alt_genomic_loci'], []) assert results['NM_007121.5:c.514_515insT']['gene_symbol'] == 'NR1H2' assert results['NM_007121.5:c.514_515insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172IlefsTer35)', 'slr': 'NP_009052.3:p.(K172Ifs*35)'} assert results['NM_007121.5:c.514_515insT']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAAC' @@ -3513,8 +3360,7 @@ def test_variant105(self): assert 'NM_001256647.1:c.223_224insT' in list(results.keys()) assert results['NM_001256647.1:c.223_224insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.223_224insT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.223_224insT']['alt_genomic_loci'] == [] - assert results['NM_001256647.1:c.223_224insT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001256647.1:c.223_224insT']['alt_genomic_loci'], []) assert results['NM_001256647.1:c.223_224insT']['gene_symbol'] == 'NR1H2' assert results['NM_001256647.1:c.223_224insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75IlefsTer35)', 'slr': 'NP_001243576.1:p.(K75Ifs*35)'} assert results['NM_001256647.1:c.223_224insT']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAAC' @@ -3539,8 +3385,7 @@ def test_variant106(self): assert 'NM_001256647.1:c.222_228del' in list(results.keys()) assert results['NM_001256647.1:c.222_228del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.222_228del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.222_228del']['alt_genomic_loci'] == [] - assert results['NM_001256647.1:c.222_228del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001256647.1:c.222_228del']['alt_genomic_loci'], []) assert results['NM_001256647.1:c.222_228del']['gene_symbol'] == 'NR1H2' assert results['NM_001256647.1:c.222_228del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75SerfsTer47)', 'slr': 'NP_001243576.1:p.(K75Sfs*47)'} assert results['NM_001256647.1:c.222_228del']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565del' @@ -3557,8 +3402,7 @@ def test_variant106(self): assert 'NM_007121.5:c.513_519del' in list(results.keys()) assert results['NM_007121.5:c.513_519del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.513_519del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.513_519del']['alt_genomic_loci'] == [] - assert results['NM_007121.5:c.513_519del']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007121.5:c.513_519del']['alt_genomic_loci'], []) assert results['NM_007121.5:c.513_519del']['gene_symbol'] == 'NR1H2' assert results['NM_007121.5:c.513_519del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172SerfsTer47)', 'slr': 'NP_009052.3:p.(K172Sfs*47)'} assert results['NM_007121.5:c.513_519del']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565del' @@ -3581,8 +3425,7 @@ def test_variant107(self): assert 'NM_001256647.1:c.222_228delinsTC' in list(results.keys()) assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256647.1:c.222_228delinsTC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.222_228delinsTC']['alt_genomic_loci'] == [] - assert results['NM_001256647.1:c.222_228delinsTC']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001256647.1:c.222_228delinsTC']['alt_genomic_loci'], []) assert results['NM_001256647.1:c.222_228delinsTC']['gene_symbol'] == 'NR1H2' assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75ProfsTer33)', 'slr': 'NP_001243576.1:p.(K75Pfs*33)'} assert results['NM_001256647.1:c.222_228delinsTC']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565delinsTC' @@ -3600,8 +3443,7 @@ def test_variant107(self): assert 'NM_007121.5:c.513_519delinsTC' in list(results.keys()) assert results['NM_007121.5:c.513_519delinsTC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007121.5:c.513_519delinsTC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.513_519delinsTC']['alt_genomic_loci'] == [] - assert results['NM_007121.5:c.513_519delinsTC']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 1 group H member 2 (NR1H2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007121.5:c.513_519delinsTC']['alt_genomic_loci'], []) assert results['NM_007121.5:c.513_519delinsTC']['gene_symbol'] == 'NR1H2' assert results['NM_007121.5:c.513_519delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172ProfsTer33)', 'slr': 'NP_009052.3:p.(K172Pfs*33)'} assert results['NM_007121.5:c.513_519delinsTC']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565delinsTC' @@ -3625,8 +3467,7 @@ def test_variant108(self): assert 'NM_198455.2:c.1115_1116insT' in list(results.keys()) assert results['NM_198455.2:c.1115_1116insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1115_1116insT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1115_1116insT']['alt_genomic_loci'] == [] - assert results['NM_198455.2:c.1115_1116insT']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + self.assertCountEqual(results['NM_198455.2:c.1115_1116insT']['alt_genomic_loci'], []) assert results['NM_198455.2:c.1115_1116insT']['gene_symbol'] == 'SSPO' assert results['NM_198455.2:c.1115_1116insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Leu374ProfsTer16)', 'slr': 'NP_940857.2:p.(L374Pfs*16)'} assert results['NM_198455.2:c.1115_1116insT']['submitted_variant'] == 'NC_000007.14:g.149779575_149779577delinsT' @@ -3650,8 +3491,7 @@ def test_variant109(self): assert 'NM_198455.2:c.1116_1118=' in list(results.keys()) assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1116_1118=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'] == [] - assert results['NM_198455.2:c.1116_1118=']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + self.assertCountEqual(results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'], []) assert results['NM_198455.2:c.1116_1118=']['gene_symbol'] == 'SSPO' assert results['NM_198455.2:c.1116_1118=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372=)', 'slr': 'NP_940857.2:p.(D372=)'} assert results['NM_198455.2:c.1116_1118=']['submitted_variant'] == 'NC_000007.14:g.149779575_149779577=' @@ -3675,8 +3515,7 @@ def test_variant110(self): assert 'NM_198455.2:c.1116_1118=' in list(results.keys()) assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1116_1118=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'] == [] - assert results['NM_198455.2:c.1116_1118=']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + self.assertCountEqual(results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'], []) assert results['NM_198455.2:c.1116_1118=']['gene_symbol'] == 'SSPO' assert results['NM_198455.2:c.1116_1118=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372=)', 'slr': 'NP_940857.2:p.(D372=)'} assert results['NM_198455.2:c.1116_1118=']['submitted_variant'] == 'NC_000007.14:g.149779576_149779578del' @@ -3700,8 +3539,7 @@ def test_variant111(self): assert 'NM_198455.2:c.1115_1116dup' in list(results.keys()) assert results['NM_198455.2:c.1115_1116dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1115_1116dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1115_1116dup']['alt_genomic_loci'] == [] - assert results['NM_198455.2:c.1115_1116dup']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + self.assertCountEqual(results['NM_198455.2:c.1115_1116dup']['alt_genomic_loci'], []) assert results['NM_198455.2:c.1115_1116dup']['gene_symbol'] == 'SSPO' assert results['NM_198455.2:c.1115_1116dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Pro373ThrfsTer6)', 'slr': 'NP_940857.2:p.(P373Tfs*6)'} assert results['NM_198455.2:c.1115_1116dup']['submitted_variant'] == 'NC_000007.14:g.149779577del' @@ -3724,8 +3562,7 @@ def test_variant112(self): assert 'NM_198455.2:c.1114_1117del' in list(results.keys()) assert results['NM_198455.2:c.1114_1117del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1114_1117del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1114_1117del']['alt_genomic_loci'] == [] - assert results['NM_198455.2:c.1114_1117del']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + self.assertCountEqual(results['NM_198455.2:c.1114_1117del']['alt_genomic_loci'], []) assert results['NM_198455.2:c.1114_1117del']['gene_symbol'] == 'SSPO' assert results['NM_198455.2:c.1114_1117del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372ProfsTer5)', 'slr': 'NP_940857.2:p.(D372Pfs*5)'} assert results['NM_198455.2:c.1114_1117del']['submitted_variant'] == 'NC_000007.14:g.149779573_149779579del' @@ -3749,8 +3586,7 @@ def test_variant113(self): assert 'NM_198455.2:c.1114_1117delinsCA' in list(results.keys()) assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198455.2:c.1114_1117delinsCA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1114_1117delinsCA']['alt_genomic_loci'] == [] - assert results['NM_198455.2:c.1114_1117delinsCA']['transcript_description'] == 'Homo sapiens SCO-spondin (SSPO), mRNA' + self.assertCountEqual(results['NM_198455.2:c.1114_1117delinsCA']['alt_genomic_loci'], []) assert results['NM_198455.2:c.1114_1117delinsCA']['gene_symbol'] == 'SSPO' assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372HisfsTer17)', 'slr': 'NP_940857.2:p.(D372Hfs*17)'} assert results['NM_198455.2:c.1114_1117delinsCA']['submitted_variant'] == 'NC_000007.14:g.149779573_149779579delinsCA' @@ -3774,8 +3610,7 @@ def test_variant114(self): assert 'NM_000088.3:c.590_591inv' in list(results.keys()) assert results['NM_000088.3:c.590_591inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590_591inv' assert results['NM_000088.3:c.590_591inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.590_591inv']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.590_591inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.590_591inv']['alt_genomic_loci'], []) assert results['NM_000088.3:c.590_591inv']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.590_591inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Asp)', 'slr': 'NP_000079.2:p.(G197D)'} assert results['NM_000088.3:c.590_591inv']['submitted_variant'] == 'NM_000088.3:c.590_591inv' @@ -3799,8 +3634,7 @@ def test_variant115(self): assert 'NM_024989.3:c.1778_1779inv' in list(results.keys()) assert results['NM_024989.3:c.1778_1779inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024989.3:c.1778_1779inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_024989.3:c.1778_1779inv']['alt_genomic_loci'] == [] - assert results['NM_024989.3:c.1778_1779inv']['transcript_description'] == 'Homo sapiens post-GPI attachment to proteins 1 (PGAP1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_024989.3:c.1778_1779inv']['alt_genomic_loci'], []) assert results['NM_024989.3:c.1778_1779inv']['gene_symbol'] == 'PGAP1' assert results['NM_024989.3:c.1778_1779inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_079265.2:p.(Phe593Ter)', 'slr': 'NP_079265.2:p.(F593*)'} assert results['NM_024989.3:c.1778_1779inv']['submitted_variant'] == 'NM_024989.3:c.1778_1779inv' @@ -3825,8 +3659,7 @@ def test_variant116(self): assert 'NM_032815.3:c.555_556inv' in list(results.keys()) assert results['NM_032815.3:c.555_556inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_032815.3:c.555_556inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032815.3:c.555_556inv']['alt_genomic_loci'] == [] - assert results['NM_032815.3:c.555_556inv']['transcript_description'] == 'Homo sapiens nuclear factor of activated T cells 2 interacting protein (NFATC2IP), mRNA' + self.assertCountEqual(results['NM_032815.3:c.555_556inv']['alt_genomic_loci'], []) assert results['NM_032815.3:c.555_556inv']['gene_symbol'] == 'NFATC2IP' assert results['NM_032815.3:c.555_556inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116204.3:p.(Glu185_Glu186delinsAspTer)', 'slr': 'NP_116204.3:p.(E185_E186delinsD*)'} assert results['NM_032815.3:c.555_556inv']['submitted_variant'] == 'NM_032815.3:c.555_556inv' @@ -3848,8 +3681,7 @@ def test_variant117(self): assert 'NM_006138.4:c.3_4inv' in list(results.keys()) assert results['NM_006138.4:c.3_4inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006138.4:c.3_4inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_006138.4:c.3_4inv']['alt_genomic_loci'] == [] - assert results['NM_006138.4:c.3_4inv']['transcript_description'] == 'Homo sapiens membrane spanning 4-domains A3 (MS4A3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_006138.4:c.3_4inv']['alt_genomic_loci'], []) assert results['NM_006138.4:c.3_4inv']['gene_symbol'] == 'MS4A3' assert results['NM_006138.4:c.3_4inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006129.4:p.(Met1?)', 'slr': 'NP_006129.4:p.(M1?)'} assert results['NM_006138.4:c.3_4inv']['submitted_variant'] == 'NM_006138.4:c.3_4inv' @@ -3873,8 +3705,7 @@ def test_variant118(self): assert 'NM_000038.5:c.3927_3928inv' in list(results.keys()) assert results['NM_000038.5:c.3927_3928inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000038.5:c.3927_3928inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000038.5:c.3927_3928inv']['alt_genomic_loci'] == [] - assert results['NM_000038.5:c.3927_3928inv']['transcript_description'] == 'Homo sapiens APC, WNT signaling pathway regulator (APC), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_000038.5:c.3927_3928inv']['alt_genomic_loci'], []) assert results['NM_000038.5:c.3927_3928inv']['gene_symbol'] == 'APC' assert results['NM_000038.5:c.3927_3928inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000029.2(LRG_130p1):p.(Glu1309_Lys1310delinsAspTer)', 'slr': 'NP_000029.2:p.(E1309_K1310delinsD*)'} assert results['NM_000038.5:c.3927_3928inv']['submitted_variant'] == 'NM_000038.5:c.3927_3928delAAinsTT' @@ -3898,8 +3729,7 @@ def test_variant119(self): assert 'NM_001034853.1:c.2847_2848inv' in list(results.keys()) assert results['NM_001034853.1:c.2847_2848inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001034853.1:c.2847_2848inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001034853.1:c.2847_2848inv']['alt_genomic_loci'] == [] - assert results['NM_001034853.1:c.2847_2848inv']['transcript_description'] == 'Homo sapiens retinitis pigmentosa GTPase regulator (RPGR), transcript variant C, mRNA' + self.assertCountEqual(results['NM_001034853.1:c.2847_2848inv']['alt_genomic_loci'], []) assert results['NM_001034853.1:c.2847_2848inv']['gene_symbol'] == 'RPGR' assert results['NM_001034853.1:c.2847_2848inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001030025.1:p.(Glu949_Glu950delinsAspTer)', 'slr': 'NP_001030025.1:p.(E949_E950delinsD*)'} assert results['NM_001034853.1:c.2847_2848inv']['submitted_variant'] == 'NM_001034853.1:c.2847_2848delAGinsCT' @@ -3922,8 +3752,7 @@ def test_variant120(self): assert 'NM_000088.3:c.4394_4395inv' in list(results.keys()) assert results['NM_000088.3:c.4394_4395inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4394_4395inv' assert results['NM_000088.3:c.4394_4395inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.4394_4395inv']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.4394_4395inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.4394_4395inv']['alt_genomic_loci'], []) assert results['NM_000088.3:c.4394_4395inv']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.4394_4395inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ter1465PheextTer27)', 'slr': 'NP_000079.2:p.(*1465Fext*27)'} assert results['NM_000088.3:c.4394_4395inv']['submitted_variant'] == 'NM_000088.3:c.4392_*2inv' @@ -3948,8 +3777,7 @@ def test_variant121(self): assert 'NM_000088.3:c.4392_*5inv' in list(results.keys()) assert results['NM_000088.3:c.4392_*5inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4393_*4inv' assert results['NM_000088.3:c.4392_*5inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.4392_*5inv']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.4392_*5inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.4392_*5inv']['alt_genomic_loci'], []) assert results['NM_000088.3:c.4392_*5inv']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.4392_*5inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.4392_*5inv']['submitted_variant'] == 'NM_000088.3:c.4392_*5inv' @@ -3973,8 +3801,7 @@ def test_variant122(self): assert 'NM_000088.3:c.4390_*7inv' in list(results.keys()) assert results['NM_000088.3:c.4390_*7inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4390_*7inv' assert results['NM_000088.3:c.4390_*7inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.4390_*7inv']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.4390_*7inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.4390_*7inv']['alt_genomic_loci'], []) assert results['NM_000088.3:c.4390_*7inv']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.4390_*7inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.4390_*7inv']['submitted_variant'] == 'NM_000088.3:c.4390_*7inv' @@ -3997,8 +3824,7 @@ def test_variant123(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_005732.3:c.2923-5insT' @@ -4022,8 +3848,7 @@ def test_variant124(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_198283.1(EYS):c.*743120C>T' @@ -4047,8 +3872,7 @@ def test_variant125(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_133379.4(TTN):c.*265+26591C>T' @@ -4073,8 +3897,7 @@ def test_variant126(self): assert 'NM_000088.3:c.589-2_589-1delinsG' in list(results.keys()) assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589-2_589-1delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589-2_589-1delinsG']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-2_589-1delinsG']['submitted_variant'] == 'NM_000088.3:c.589-2_589-1AG>G' @@ -4098,8 +3921,7 @@ def test_variant127(self): assert 'NM_000088.3:c.642+1_642+2delinsG' in list(results.keys()) assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.642+1_642+2delinsG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'], []) assert results['NM_000088.3:c.642+1_642+2delinsG']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.642+1_642+2delinsG']['submitted_variant'] == 'NM_000088.3:c.642+1_642+2delGTinsG' @@ -4122,8 +3944,7 @@ def test_variant128(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_004415.3:c.1-1insA' @@ -4148,8 +3969,7 @@ def test_variant129(self): assert 'NM_004415.3:c.-1_1insA' in list(results.keys()) assert results['NM_004415.3:c.-1_1insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004415.3:c.-1_1insA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004415.3:c.-1_1insA']['alt_genomic_loci'] == [] - assert results['NM_004415.3:c.-1_1insA']['transcript_description'] == 'Homo sapiens desmoplakin (DSP), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_004415.3:c.-1_1insA']['alt_genomic_loci'], []) assert results['NM_004415.3:c.-1_1insA']['gene_symbol'] == 'DSP' assert results['NM_004415.3:c.-1_1insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004406.2(LRG_423p1):p.(Met1?)', 'slr': 'NP_004406.2:p.(M1?)'} assert results['NM_004415.3:c.-1_1insA']['submitted_variant'] == 'NM_004415.3:c.-1_1insA' @@ -4172,8 +3992,7 @@ def test_variant130(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000273.2:c.1-5028_253del' @@ -4198,8 +4017,7 @@ def test_variant131(self): assert 'NM_002929.2:c.1006C>T' in list(results.keys()) assert results['NM_002929.2:c.1006C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_002929.2:c.1006C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_002929.2:c.1006C>T']['alt_genomic_loci'] == [] - assert results['NM_002929.2:c.1006C>T']['transcript_description'] == 'Homo sapiens G protein-coupled receptor kinase 1 (GRK1), mRNA' + self.assertCountEqual(results['NM_002929.2:c.1006C>T']['alt_genomic_loci'], []) assert results['NM_002929.2:c.1006C>T']['gene_symbol'] == 'GRK1' assert results['NM_002929.2:c.1006C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002920.1:p.(Leu336Phe)', 'slr': 'NP_002920.1:p.(L336F)'} assert results['NM_002929.2:c.1006C>T']['submitted_variant'] == 'NM_002929.2:c.1006C>T' @@ -4223,8 +4041,7 @@ def test_variant132(self): assert 'NR_125367.1:n.167+18165G>A' in list(results.keys()) assert results['NR_125367.1:n.167+18165G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_125367.1:n.167+18165G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_125367.1:n.167+18165G>A']['alt_genomic_loci'] == [] - assert results['NR_125367.1:n.167+18165G>A']['transcript_description'] == 'Homo sapiens myosin heavy chain gene cluster antisense RNA (MYHAS), long non-coding RNA' + self.assertCountEqual(results['NR_125367.1:n.167+18165G>A']['alt_genomic_loci'], []) assert results['NR_125367.1:n.167+18165G>A']['gene_symbol'] == 'MYHAS' assert results['NR_125367.1:n.167+18165G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_125367.1:n.167+18165G>A']['submitted_variant'] == 'NR_125367.1:n.167+18165G>A' @@ -4247,8 +4064,7 @@ def test_variant133(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_006005.3:c.3071_3073delinsTTA' @@ -4272,8 +4088,7 @@ def test_variant134(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000089.3:n.1504_1506del' @@ -4297,8 +4112,7 @@ def test_variant135(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == 'Homo sapiens mitochondrion, complete genome' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NC_012920.1:m.1011C>T' @@ -4323,8 +4137,7 @@ def test_variant136(self): assert 'NM_014611.1:c.9879T>C' in list(results.keys()) assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.1:c.9879T>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014611.1:c.9879T>C']['alt_genomic_loci'] == [] - assert results['NM_014611.1:c.9879T>C']['transcript_description'] == 'Homo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA' + self.assertCountEqual(results['NM_014611.1:c.9879T>C']['alt_genomic_loci'], []) assert results['NM_014611.1:c.9879T>C']['gene_symbol'] == 'MDN1' assert results['NM_014611.1:c.9879T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} assert results['NM_014611.1:c.9879T>C']['submitted_variant'] == 'NC_000006.11:g.90403795G=' @@ -4341,8 +4154,7 @@ def test_variant136(self): assert 'NM_014611.2:c.9879C=' in list(results.keys()) assert results['NM_014611.2:c.9879C=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.2:c.9879C=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014611.2:c.9879C=']['alt_genomic_loci'] == [] - assert results['NM_014611.2:c.9879C=']['transcript_description'] == 'Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA' + self.assertCountEqual(results['NM_014611.2:c.9879C=']['alt_genomic_loci'], []) assert results['NM_014611.2:c.9879C=']['gene_symbol'] == 'MDN1' assert results['NM_014611.2:c.9879C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} assert results['NM_014611.2:c.9879C=']['submitted_variant'] == 'NC_000006.11:g.90403795G=' @@ -4365,8 +4177,7 @@ def test_variant137(self): assert 'NM_000130.4:c.1602del' in list(results.keys()) assert results['NM_000130.4:c.1602del']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601del' assert results['NM_000130.4:c.1602del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000130.4:c.1602del']['alt_genomic_loci'] == [] - assert results['NM_000130.4:c.1602del']['transcript_description'] == 'Homo sapiens coagulation factor V (F5), mRNA' + self.assertCountEqual(results['NM_000130.4:c.1602del']['alt_genomic_loci'], []) assert results['NM_000130.4:c.1602del']['gene_symbol'] == 'F5' assert results['NM_000130.4:c.1602del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534GlnfsTer40)', 'slr': 'NP_000121.2:p.(R534Qfs*40)'} assert results['NM_000130.4:c.1602del']['submitted_variant'] == '1-169519049-T-.' @@ -4384,8 +4195,7 @@ def test_variant137(self): assert 'NM_000130.4:c.1601G>A' in list(results.keys()) assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601G>A' assert results['NM_000130.4:c.1601G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000130.4:c.1601G>A']['alt_genomic_loci'] == [] - assert results['NM_000130.4:c.1601G>A']['transcript_description'] == 'Homo sapiens coagulation factor V (F5), mRNA' + self.assertCountEqual(results['NM_000130.4:c.1601G>A']['alt_genomic_loci'], []) assert results['NM_000130.4:c.1601G>A']['gene_symbol'] == 'F5' assert results['NM_000130.4:c.1601G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534Gln)', 'slr': 'NP_000121.2:p.(R534Q)'} assert results['NM_000130.4:c.1601G>A']['submitted_variant'] == '1-169519049-T-.' @@ -4408,8 +4218,7 @@ def test_variant138(self): assert 'NM_001204317.1:c.856-9155_856-9154=' in list(results.keys()) assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204317.1:c.856-9155_856-9154=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001204317.1:c.856-9155_856-9154=']['alt_genomic_loci'] == [] - assert results['NM_001204317.1:c.856-9155_856-9154=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001204317.1:c.856-9155_856-9154=']['alt_genomic_loci'], []) assert results['NM_001204317.1:c.856-9155_856-9154=']['gene_symbol'] == 'PRLR' assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191246.1:p.?', 'slr': 'NP_001191246.1:p.?'} assert results['NM_001204317.1:c.856-9155_856-9154=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' @@ -4426,8 +4235,7 @@ def test_variant138(self): assert 'NM_001204316.1:c.1009+7383_1009+7384=' in list(results.keys()) assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204316.1:c.1009+7383_1009+7384=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['alt_genomic_loci'] == [] - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001204316.1:c.1009+7383_1009+7384=']['alt_genomic_loci'], []) assert results['NM_001204316.1:c.1009+7383_1009+7384=']['gene_symbol'] == 'PRLR' assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191245.1:p.?', 'slr': 'NP_001191245.1:p.?'} assert results['NM_001204316.1:c.1009+7383_1009+7384=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' @@ -4444,8 +4252,7 @@ def test_variant138(self): assert 'NM_001204314.2:c.*6528del' in list(results.keys()) assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204314.2:c.*6528del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001204314.2:c.*6528del']['alt_genomic_loci'] == [] - assert results['NM_001204314.2:c.*6528del']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001204314.2:c.*6528del']['alt_genomic_loci'], []) assert results['NM_001204314.2:c.*6528del']['gene_symbol'] == 'PRLR' assert results['NM_001204314.2:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} assert results['NM_001204314.2:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' @@ -4462,8 +4269,7 @@ def test_variant138(self): assert 'NM_001204318.1:c.686-9155_686-9154=' in list(results.keys()) assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204318.1:c.686-9155_686-9154=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001204318.1:c.686-9155_686-9154=']['alt_genomic_loci'] == [] - assert results['NM_001204318.1:c.686-9155_686-9154=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001204318.1:c.686-9155_686-9154=']['alt_genomic_loci'], []) assert results['NM_001204318.1:c.686-9155_686-9154=']['gene_symbol'] == 'PRLR' assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191247.1:p.?', 'slr': 'NP_001191247.1:p.?'} assert results['NM_001204318.1:c.686-9155_686-9154=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' @@ -4480,8 +4286,7 @@ def test_variant138(self): assert 'NR_037910.1:n.828-9155_828-9154=' in list(results.keys()) assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037910.1:n.828-9155_828-9154=']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_037910.1:n.828-9155_828-9154=']['alt_genomic_loci'] == [] - assert results['NR_037910.1:n.828-9155_828-9154=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 7, non-coding RNA' + self.assertCountEqual(results['NR_037910.1:n.828-9155_828-9154=']['alt_genomic_loci'], []) assert results['NR_037910.1:n.828-9155_828-9154=']['gene_symbol'] == 'PRLR' assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_037910.1:n.828-9155_828-9154=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' @@ -4499,8 +4304,7 @@ def test_variant138(self): assert 'NM_000949.5:c.*6523_*6524=' in list(results.keys()) assert results['NM_000949.5:c.*6523_*6524=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000949.5:c.*6523_*6524=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000949.5:c.*6523_*6524=']['alt_genomic_loci'] == [] - assert results['NM_000949.5:c.*6523_*6524=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000949.5:c.*6523_*6524=']['alt_genomic_loci'], []) assert results['NM_000949.5:c.*6523_*6524=']['gene_symbol'] == 'PRLR' assert results['NM_000949.5:c.*6523_*6524=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} assert results['NM_000949.5:c.*6523_*6524=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' @@ -4517,8 +4321,7 @@ def test_variant138(self): assert 'NM_001204314.1:c.*6523_*6524=' in list(results.keys()) assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204314.1:c.*6523_*6524=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001204314.1:c.*6523_*6524=']['alt_genomic_loci'] == [] - assert results['NM_001204314.1:c.*6523_*6524=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001204314.1:c.*6523_*6524=']['alt_genomic_loci'], []) assert results['NM_001204314.1:c.*6523_*6524=']['gene_symbol'] == 'PRLR' assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} assert results['NM_001204314.1:c.*6523_*6524=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' @@ -4535,8 +4338,7 @@ def test_variant138(self): assert 'NM_000949.6:c.*6528del' in list(results.keys()) assert results['NM_000949.6:c.*6528del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000949.6:c.*6528del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000949.6:c.*6528del']['alt_genomic_loci'] == [] - assert results['NM_000949.6:c.*6528del']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000949.6:c.*6528del']['alt_genomic_loci'], []) assert results['NM_000949.6:c.*6528del']['gene_symbol'] == 'PRLR' assert results['NM_000949.6:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} assert results['NM_000949.6:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' @@ -4559,8 +4361,7 @@ def test_variant139(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000251.1:c.1296_1348del' @@ -4585,8 +4386,7 @@ def test_variant140(self): assert 'NM_000088.3:c.2024_2028+1del' in list(results.keys()) assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' - assert results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.2024_2028+1del']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'], []) assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)', 'slr': 'NP_000079.2:p.(A675_R676del)'} assert results['NM_000088.3:c.2024_2028+1del']['submitted_variant'] == 'NM_000088.3:c.2023_2028del' @@ -4610,8 +4410,7 @@ def test_variant141(self): assert 'NM_000088.3:c.2024_2028+1del' in list(results.keys()) assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' - assert results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.2024_2028+1del']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'], []) assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.2024_2028+1del']['submitted_variant'] == 'NM_000088.3:c.2024_2028+1del' @@ -4634,8 +4433,7 @@ def test_variant142(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'ENST00000450616.1:n.31+1G>C' @@ -4659,8 +4457,7 @@ def test_variant143(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'ENST00000491747:c.5071A>T' @@ -4685,8 +4482,7 @@ def test_variant144(self): assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'NM_000088.3:c.589G>T' @@ -4710,8 +4506,7 @@ def test_variant145(self): assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'NG_007400.1:g.8638G>T' @@ -4735,8 +4530,7 @@ def test_variant146(self): assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'LRG_1:g.8638G>T' @@ -4760,8 +4554,7 @@ def test_variant147(self): assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'LRG_1t1:c.589G>T' @@ -4784,8 +4577,7 @@ def test_variant148(self): assert 'NM_002474.2:c.3034_3035inv' in list(results.keys()) assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] - assert results['NM_002474.2:c.3034_3035inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1A, mRNA' + self.assertCountEqual(results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_002474.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1:p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} assert results['NM_002474.2:c.3034_3035inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' @@ -4802,8 +4594,7 @@ def test_variant148(self): assert 'NM_022844.2:c.3034_3035inv' in list(results.keys()) assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_022844.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] - assert results['NM_022844.2:c.3034_3035inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2A, mRNA' + self.assertCountEqual(results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_022844.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' assert results['NM_022844.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_074035.1:p.(Thr1012Val)', 'slr': 'NP_074035.1:p.(T1012V)'} assert results['NM_022844.2:c.3034_3035inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' @@ -4820,8 +4611,7 @@ def test_variant148(self): assert 'NM_001040114.1:c.3055_3056inv' in list(results.keys()) assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040114.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] - assert results['NM_001040114.1:c.3055_3056inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM1B, mRNA' + self.assertCountEqual(results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_001040114.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' assert results['NM_001040114.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035203.1:p.(Thr1019Val)', 'slr': 'NP_001035203.1:p.(T1019V)'} assert results['NM_001040114.1:c.3055_3056inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' @@ -4839,8 +4629,7 @@ def test_variant148(self): assert 'NM_001040113.1:c.3055_3056inv' in list(results.keys()) assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}] - assert results['NM_001040113.1:c.3055_3056inv']['transcript_description'] == 'Homo sapiens myosin heavy chain 11 (MYH11), transcript variant SM2B, mRNA' + self.assertCountEqual(results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_001040113.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1:p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} assert results['NM_001040113.1:c.3055_3056inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' @@ -4863,8 +4652,7 @@ def test_variant149(self): assert 'NM_001162427.1:c.210+1615dup' in list(results.keys()) assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162427.1:c.210+1615dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'] == [] - assert results['NM_001162427.1:c.210+1615dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'], []) assert results['NM_001162427.1:c.210+1615dup']['gene_symbol'] == 'TSC1' assert results['NM_001162427.1:c.210+1615dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.?', 'slr': 'NP_001155899.1:p.?'} assert results['NM_001162427.1:c.210+1615dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' @@ -4881,8 +4669,7 @@ def test_variant149(self): assert 'NM_001162426.1:c.363+1dup' in list(results.keys()) assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162426.1:c.363+1dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'] == [] - assert results['NM_001162426.1:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'], []) assert results['NM_001162426.1:c.363+1dup']['gene_symbol'] == 'TSC1' assert results['NM_001162426.1:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.?', 'slr': 'NP_001155898.1:p.?'} assert results['NM_001162426.1:c.363+1dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' @@ -4900,8 +4687,7 @@ def test_variant149(self): assert 'NM_001362177.1:c.-1+1dup' in list(results.keys()) assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001362177.1:c.-1+1dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'] == [] - assert results['NM_001362177.1:c.-1+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'], []) assert results['NM_001362177.1:c.-1+1dup']['gene_symbol'] == 'TSC1' assert results['NM_001362177.1:c.-1+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.?', 'slr': 'NP_001349106.1:p.?'} assert results['NM_001362177.1:c.-1+1dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' @@ -4918,8 +4704,7 @@ def test_variant149(self): assert 'NM_000368.4:c.363+1dup' in list(results.keys()) assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] - assert results['NM_000368.4:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.?', 'slr': 'NP_000359.1:p.?'} assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' @@ -4943,8 +4728,7 @@ def test_variant150(self): assert 'NM_033517.1:c.1307_1309del' in list(results.keys()) assert results['NM_033517.1:c.1307_1309del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_033517.1:c.1307_1309del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_033517.1:c.1307_1309del']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'HG1311_PATCH', 'ref': 'CCGA', 'pos': '33720', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'NW_015148969.1', 'ref': 'CCGA', 'pos': '33720', 'alt': 'C'}}}] - assert results['NM_033517.1:c.1307_1309del']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 3 (SHANK3), mRNA' + self.assertCountEqual(results['NM_033517.1:c.1307_1309del']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'HG1311_PATCH', 'ref': 'CCGA', 'pos': '33720', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'NW_015148969.1', 'ref': 'CCGA', 'pos': '33720', 'alt': 'C'}}}]) assert results['NM_033517.1:c.1307_1309del']['gene_symbol'] == 'SHANK3' assert results['NM_033517.1:c.1307_1309del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_277052.1:p.(Pro436_Ser437delinsArg)', 'slr': 'NP_277052.1:p.(P436_S437delinsR)'} assert results['NM_033517.1:c.1307_1309del']['submitted_variant'] == 'NM_033517.1:c.1307_1309delCGA' @@ -4967,8 +4751,7 @@ def test_variant151(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'HG1311_PATCH-33720-CCGA-C' @@ -4992,8 +4775,7 @@ def test_variant152(self): assert 'NM_015120.4:c.1573_1579=' in list(results.keys()) assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1573_1579=' assert results['NM_015120.4:c.1573_1579=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'] == [] - assert results['NM_015120.4:c.1573_1579=']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + self.assertCountEqual(results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'], []) assert results['NM_015120.4:c.1573_1579=']['gene_symbol'] == 'ALMS1' assert results['NM_015120.4:c.1573_1579=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Ser525=)', 'slr': 'NP_055935.4:p.(S525=)'} assert results['NM_015120.4:c.1573_1579=']['submitted_variant'] == '2-73675227-TCTC-TCTCCTC' @@ -5017,8 +4799,7 @@ def test_variant153(self): assert 'NM_015120.4:c.1577_1579del' in list(results.keys()) assert results['NM_015120.4:c.1577_1579del']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1577_1579del' assert results['NM_015120.4:c.1577_1579del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.1577_1579del']['alt_genomic_loci'] == [] - assert results['NM_015120.4:c.1577_1579del']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + self.assertCountEqual(results['NM_015120.4:c.1577_1579del']['alt_genomic_loci'], []) assert results['NM_015120.4:c.1577_1579del']['gene_symbol'] == 'ALMS1' assert results['NM_015120.4:c.1577_1579del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Pro526del)', 'slr': 'NP_055935.4:p.(P526del)'} assert results['NM_015120.4:c.1577_1579del']['submitted_variant'] == '2-73675227-TC-TC' @@ -5042,8 +4823,7 @@ def test_variant154(self): assert 'NM_001080423.3:c.1016_1020=' in list(results.keys()) assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.3:c.1016_1020=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'] == [] - assert results['NM_001080423.3:c.1016_1020=']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + self.assertCountEqual(results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'], []) assert results['NM_001080423.3:c.1016_1020=']['gene_symbol'] == 'GRIP2' assert results['NM_001080423.3:c.1016_1020=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Arg339=)', 'slr': 'NP_001073892.3:p.(R339=)'} assert results['NM_001080423.3:c.1016_1020=']['submitted_variant'] == '3-14561627-AG-AGG' @@ -5061,8 +4841,7 @@ def test_variant154(self): assert 'NM_001080423.2:c.1307_1311=' in list(results.keys()) assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.2:c.1307_1311=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'] == [] - assert results['NM_001080423.2:c.1307_1311=']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + self.assertCountEqual(results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'], []) assert results['NM_001080423.2:c.1307_1311=']['gene_symbol'] == 'GRIP2' assert results['NM_001080423.2:c.1307_1311=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Arg436=)', 'slr': 'NP_001073892.2:p.(R436=)'} assert results['NM_001080423.2:c.1307_1311=']['submitted_variant'] == '3-14561627-AG-AGG' @@ -5085,8 +4864,7 @@ def test_variant155(self): assert 'NM_001080423.3:c.1020del' in list(results.keys()) assert results['NM_001080423.3:c.1020del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.3:c.1020del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001080423.3:c.1020del']['alt_genomic_loci'] == [] - assert results['NM_001080423.3:c.1020del']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + self.assertCountEqual(results['NM_001080423.3:c.1020del']['alt_genomic_loci'], []) assert results['NM_001080423.3:c.1020del']['gene_symbol'] == 'GRIP2' assert results['NM_001080423.3:c.1020del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Ser341GlnfsTer4)', 'slr': 'NP_001073892.3:p.(S341Qfs*4)'} assert results['NM_001080423.3:c.1020del']['submitted_variant'] == '3-14561630-CC-CC' @@ -5104,8 +4882,7 @@ def test_variant155(self): assert 'NM_001080423.2:c.1311del' in list(results.keys()) assert results['NM_001080423.2:c.1311del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001080423.2:c.1311del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001080423.2:c.1311del']['alt_genomic_loci'] == [] - assert results['NM_001080423.2:c.1311del']['transcript_description'] == 'Homo sapiens glutamate receptor interacting protein 2 (GRIP2), mRNA' + self.assertCountEqual(results['NM_001080423.2:c.1311del']['alt_genomic_loci'], []) assert results['NM_001080423.2:c.1311del']['gene_symbol'] == 'GRIP2' assert results['NM_001080423.2:c.1311del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Ser438GlnfsTer4)', 'slr': 'NP_001073892.2:p.(S438Qfs*4)'} assert results['NM_001080423.2:c.1311del']['submitted_variant'] == '3-14561630-CC-CC' @@ -5129,8 +4906,7 @@ def test_variant156(self): assert 'NM_014611.1:c.9879T>C' in list(results.keys()) assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.1:c.9879T>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014611.1:c.9879T>C']['alt_genomic_loci'] == [] - assert results['NM_014611.1:c.9879T>C']['transcript_description'] == 'Homo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA' + self.assertCountEqual(results['NM_014611.1:c.9879T>C']['alt_genomic_loci'], []) assert results['NM_014611.1:c.9879T>C']['gene_symbol'] == 'MDN1' assert results['NM_014611.1:c.9879T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} assert results['NM_014611.1:c.9879T>C']['submitted_variant'] == '6-90403795-G-G' @@ -5147,8 +4923,7 @@ def test_variant156(self): assert 'NM_014611.2:c.9879C=' in list(results.keys()) assert results['NM_014611.2:c.9879C=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.2:c.9879C=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014611.2:c.9879C=']['alt_genomic_loci'] == [] - assert results['NM_014611.2:c.9879C=']['transcript_description'] == 'Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA' + self.assertCountEqual(results['NM_014611.2:c.9879C=']['alt_genomic_loci'], []) assert results['NM_014611.2:c.9879C=']['gene_symbol'] == 'MDN1' assert results['NM_014611.2:c.9879C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} assert results['NM_014611.2:c.9879C=']['submitted_variant'] == '6-90403795-G-G' @@ -5171,8 +4946,7 @@ def test_variant157(self): assert 'NM_014611.2:c.9879C>T' in list(results.keys()) assert results['NM_014611.2:c.9879C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.2:c.9879C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014611.2:c.9879C>T']['alt_genomic_loci'] == [] - assert results['NM_014611.2:c.9879C>T']['transcript_description'] == 'Homo sapiens midasin AAA ATPase 1 (MDN1), mRNA' + self.assertCountEqual(results['NM_014611.2:c.9879C>T']['alt_genomic_loci'], []) assert results['NM_014611.2:c.9879C>T']['gene_symbol'] == 'MDN1' assert results['NM_014611.2:c.9879C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} assert results['NM_014611.2:c.9879C>T']['submitted_variant'] == '6-90403795-G-A' @@ -5190,8 +4964,7 @@ def test_variant157(self): assert 'NM_014611.1:c.9879T=' in list(results.keys()) assert results['NM_014611.1:c.9879T=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014611.1:c.9879T=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014611.1:c.9879T=']['alt_genomic_loci'] == [] - assert results['NM_014611.1:c.9879T=']['transcript_description'] == 'Homo sapiens MDN1, midasin homolog (yeast) (MDN1), mRNA' + self.assertCountEqual(results['NM_014611.1:c.9879T=']['alt_genomic_loci'], []) assert results['NM_014611.1:c.9879T=']['gene_symbol'] == 'MDN1' assert results['NM_014611.1:c.9879T=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} assert results['NM_014611.1:c.9879T=']['submitted_variant'] == '6-90403795-G-A' @@ -5215,8 +4988,7 @@ def test_variant158(self): assert 'NM_032470.3:c.4del' in list(results.keys()) assert results['NM_032470.3:c.4del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_032470.3:c.4del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032470.3:c.4del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}] - assert results['NM_032470.3:c.4del']['transcript_description'] == 'Homo sapiens tenascin XB (TNXB), transcript variant XB-S, mRNA' + self.assertCountEqual(results['NM_032470.3:c.4del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}]) assert results['NM_032470.3:c.4del']['gene_symbol'] == 'TNXB' assert results['NM_032470.3:c.4del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_115859.2:p.(Arg2AlafsTer91)', 'slr': 'NP_115859.2:p.(R2Afs*91)'} assert results['NM_032470.3:c.4del']['submitted_variant'] == '6-32012992-CG-C' @@ -5233,8 +5005,7 @@ def test_variant158(self): assert 'NM_001365276.1:c.10717del' in list(results.keys()) assert results['NM_001365276.1:c.10717del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001365276.1:c.10717del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001365276.1:c.10717del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}] - assert results['NM_001365276.1:c.10717del']['transcript_description'] == 'Homo sapiens tenascin XB (TNXB), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001365276.1:c.10717del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}]) assert results['NM_001365276.1:c.10717del']['gene_symbol'] == 'TNXB' assert results['NM_001365276.1:c.10717del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001352205.1:p.(Arg3573AlafsTer91)', 'slr': 'NP_001352205.1:p.(R3573Afs*91)'} assert results['NM_001365276.1:c.10717del']['submitted_variant'] == '6-32012992-CG-C' @@ -5251,8 +5022,7 @@ def test_variant158(self): assert 'NM_019105.7:c.10711del' in list(results.keys()) assert results['NM_019105.7:c.10711del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_019105.7:c.10711del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_019105.7:c.10711del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}] - assert results['NM_019105.7:c.10711del']['transcript_description'] == 'Homo sapiens tenascin XB (TNXB), transcript variant XB, mRNA' + self.assertCountEqual(results['NM_019105.7:c.10711del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}]) assert results['NM_019105.7:c.10711del']['gene_symbol'] == 'TNXB' assert results['NM_019105.7:c.10711del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061978.6:p.(Arg3571AlafsTer91)', 'slr': 'NP_061978.6:p.(R3571Afs*91)'} assert results['NM_019105.7:c.10711del']['submitted_variant'] == '6-32012992-CG-C' @@ -5269,8 +5039,7 @@ def test_variant158(self): assert 'NM_019105.6:c.10711del' in list(results.keys()) assert results['NM_019105.6:c.10711del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_019105.6:c.10711del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_019105.6:c.10711del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'AG', 'pos': '3271858', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'AG', 'pos': '3271858', 'alt': 'A'}}}] - assert results['NM_019105.6:c.10711del']['transcript_description'] == 'Homo sapiens tenascin XB (TNXB), transcript variant XB, mRNA' + self.assertCountEqual(results['NM_019105.6:c.10711del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'AG', 'pos': '3271858', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'AG', 'pos': '3271858', 'alt': 'A'}}}]) assert results['NM_019105.6:c.10711del']['gene_symbol'] == 'TNXB' assert results['NM_019105.6:c.10711del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061978.6:p.(Arg3571AlafsTer91)', 'slr': 'NP_061978.6:p.(R3571Afs*91)'} assert results['NM_019105.6:c.10711del']['submitted_variant'] == '6-32012992-CG-C' @@ -5294,8 +5063,7 @@ def test_variant159(self): assert 'NM_000088.3:c.589G>T' in list(results.keys()) assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == '17-48275363-C-A' @@ -5319,8 +5087,7 @@ def test_variant160(self): assert 'NM_000088.3:c.589-1G>T' in list(results.keys()) assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' - assert results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589-1G>T']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-1G>T']['submitted_variant'] == '17-48275364-C-A' @@ -5343,8 +5110,7 @@ def test_variant161(self): assert 'NM_000088.3:c.591_593inv' in list(results.keys()) assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.591_593inv' assert results['NM_000088.3:c.591_593inv']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.591_593inv']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.591_593inv']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.591_593inv']['alt_genomic_loci'], []) assert results['NM_000088.3:c.591_593inv']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.591_593inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Pro198Asp)', 'slr': 'NP_000079.2:p.(P198D)'} assert results['NM_000088.3:c.591_593inv']['submitted_variant'] == '17-48275359-GGA-TCC' @@ -5369,8 +5135,7 @@ def test_variant162(self): assert 'NM_000089.3:c.1035_1035+2del' in list(results.keys()) assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' - assert results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'] == [] - assert results['NM_000089.3:c.1035_1035+2del']['transcript_description'] == 'Homo sapiens collagen type I alpha 2 chain (COL1A2), mRNA' + self.assertCountEqual(results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'], []) assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} assert results['NM_000089.3:c.1035_1035+2del']['submitted_variant'] == '7-94039128-CTTG-C' @@ -5393,8 +5158,7 @@ def test_variant163(self): assert 'NM_001162427.1:c.210+1615dup' in list(results.keys()) assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162427.1:c.210+1615dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'] == [] - assert results['NM_001162427.1:c.210+1615dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'], []) assert results['NM_001162427.1:c.210+1615dup']['gene_symbol'] == 'TSC1' assert results['NM_001162427.1:c.210+1615dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.?', 'slr': 'NP_001155899.1:p.?'} assert results['NM_001162427.1:c.210+1615dup']['submitted_variant'] == '9-135800972-AC-ACC' @@ -5411,8 +5175,7 @@ def test_variant163(self): assert 'NM_001162426.1:c.363+1dup' in list(results.keys()) assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162426.1:c.363+1dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'] == [] - assert results['NM_001162426.1:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'], []) assert results['NM_001162426.1:c.363+1dup']['gene_symbol'] == 'TSC1' assert results['NM_001162426.1:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.(Met122AspfsTer4)', 'slr': 'NP_001155898.1:p.(M122Dfs*4)'} assert results['NM_001162426.1:c.363+1dup']['submitted_variant'] == '9-135800972-AC-ACC' @@ -5430,8 +5193,7 @@ def test_variant163(self): assert 'NM_001362177.1:c.-1+1dup' in list(results.keys()) assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001362177.1:c.-1+1dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'] == [] - assert results['NM_001362177.1:c.-1+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'], []) assert results['NM_001362177.1:c.-1+1dup']['gene_symbol'] == 'TSC1' assert results['NM_001362177.1:c.-1+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.?', 'slr': 'NP_001349106.1:p.?'} assert results['NM_001362177.1:c.-1+1dup']['submitted_variant'] == '9-135800972-AC-ACC' @@ -5448,8 +5210,7 @@ def test_variant163(self): assert 'NM_000368.4:c.363+1dup' in list(results.keys()) assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['alt_genomic_loci'] == [] - assert results['NM_000368.4:c.363+1dup']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)', 'slr': 'NP_000359.1:p.(M122Dfs*4)'} assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == '9-135800972-AC-ACC' @@ -5473,8 +5234,7 @@ def test_variant164(self): assert 'NM_001243246.1:c.2073G>A' in list(results.keys()) assert results['NM_001243246.1:c.2073G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t3:c.2073G>A' assert results['NM_001243246.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001243246.1:c.2073G>A']['alt_genomic_loci'] == [] - assert results['NM_001243246.1:c.2073G>A']['transcript_description'] == 'Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001243246.1:c.2073G>A']['alt_genomic_loci'], []) assert results['NM_001243246.1:c.2073G>A']['gene_symbol'] == 'P3H1' assert results['NM_001243246.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230175.1:p.(Ala691=)', 'slr': 'NP_001230175.1:p.(A691=)'} assert results['NM_001243246.1:c.2073G>A']['submitted_variant'] == '1-43212925-C-T' @@ -5491,8 +5251,7 @@ def test_variant164(self): assert 'NM_001146289.1:c.2073G>A' in list(results.keys()) assert results['NM_001146289.1:c.2073G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t2:c.2073G>A' assert results['NM_001146289.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001146289.1:c.2073G>A']['alt_genomic_loci'] == [] - assert results['NM_001146289.1:c.2073G>A']['transcript_description'] == 'Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001146289.1:c.2073G>A']['alt_genomic_loci'], []) assert results['NM_001146289.1:c.2073G>A']['gene_symbol'] == 'P3H1' assert results['NM_001146289.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001139761.1:p.(Ala691=)', 'slr': 'NP_001139761.1:p.(A691=)'} assert results['NM_001146289.1:c.2073G>A']['submitted_variant'] == '1-43212925-C-T' @@ -5509,8 +5268,7 @@ def test_variant164(self): assert 'NM_022356.3:c.2055+18G>A' in list(results.keys()) assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t1:c.2055+18G>A' assert results['NM_022356.3:c.2055+18G>A']['refseqgene_context_intronic_sequence'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' - assert results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'] == [] - assert results['NM_022356.3:c.2055+18G>A']['transcript_description'] == 'Homo sapiens prolyl 3-hydroxylase 1 (P3H1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'], []) assert results['NM_022356.3:c.2055+18G>A']['gene_symbol'] == 'P3H1' assert results['NM_022356.3:c.2055+18G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_071751.3(LRG_5p1):p.?', 'slr': 'NP_071751.3:p.?'} assert results['NM_022356.3:c.2055+18G>A']['submitted_variant'] == '1-43212925-C-T' @@ -5533,8 +5291,7 @@ def test_variant165(self): assert 'NM_001194958.2:c.20C>A' in list(results.keys()) assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001194958.2:c.20C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001194958.2:c.20C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}] - assert results['NM_001194958.2:c.20C>A']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel subfamily J member 18 (KCNJ18), mRNA' + self.assertCountEqual(results['NM_001194958.2:c.20C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}]) assert results['NM_001194958.2:c.20C>A']['gene_symbol'] == 'KCNJ18' assert results['NM_001194958.2:c.20C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181887.2:p.(Ala7Asp)', 'slr': 'NP_001181887.2:p.(A7D)'} assert results['NM_001194958.2:c.20C>A']['submitted_variant'] == 'HG987_PATCH-355171-C-A' @@ -5558,8 +5315,7 @@ def test_variant166(self): assert 'NM_000022.3:c.534A>G' in list(results.keys()) assert results['NM_000022.3:c.534A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000022.3:c.534A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000022.3:c.534A>G']['alt_genomic_loci'] == [] - assert results['NM_000022.3:c.534A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000022.3:c.534A>G']['alt_genomic_loci'], []) assert results['NM_000022.3:c.534A>G']['gene_symbol'] == 'ADA' assert results['NM_000022.3:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} assert results['NM_000022.3:c.534A>G']['submitted_variant'] == '20-43252915-T-C' @@ -5576,8 +5332,7 @@ def test_variant166(self): assert 'NM_001322051.1:c.534A>G' in list(results.keys()) assert results['NM_001322051.1:c.534A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322051.1:c.534A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322051.1:c.534A>G']['alt_genomic_loci'] == [] - assert results['NM_001322051.1:c.534A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001322051.1:c.534A>G']['alt_genomic_loci'], []) assert results['NM_001322051.1:c.534A>G']['gene_symbol'] == 'ADA' assert results['NM_001322051.1:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308980.1:p.(Val178=)', 'slr': 'NP_001308980.1:p.(V178=)'} assert results['NM_001322051.1:c.534A>G']['submitted_variant'] == '20-43252915-T-C' @@ -5594,8 +5349,7 @@ def test_variant166(self): assert 'NM_000022.2:c.534A>G' in list(results.keys()) assert results['NM_000022.2:c.534A>G']['hgvs_lrg_transcript_variant'] == 'LRG_16t1:c.534A>G' assert results['NM_000022.2:c.534A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000022.2:c.534A>G']['alt_genomic_loci'] == [] - assert results['NM_000022.2:c.534A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), mRNA' + self.assertCountEqual(results['NM_000022.2:c.534A>G']['alt_genomic_loci'], []) assert results['NM_000022.2:c.534A>G']['gene_symbol'] == 'ADA' assert results['NM_000022.2:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} assert results['NM_000022.2:c.534A>G']['submitted_variant'] == '20-43252915-T-C' @@ -5613,8 +5367,7 @@ def test_variant166(self): assert 'NM_001322050.1:c.129A>G' in list(results.keys()) assert results['NM_001322050.1:c.129A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322050.1:c.129A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322050.1:c.129A>G']['alt_genomic_loci'] == [] - assert results['NM_001322050.1:c.129A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001322050.1:c.129A>G']['alt_genomic_loci'], []) assert results['NM_001322050.1:c.129A>G']['gene_symbol'] == 'ADA' assert results['NM_001322050.1:c.129A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308979.1:p.(Val43=)', 'slr': 'NP_001308979.1:p.(V43=)'} assert results['NM_001322050.1:c.129A>G']['submitted_variant'] == '20-43252915-T-C' @@ -5631,8 +5384,7 @@ def test_variant166(self): assert 'NR_136160.1:n.685A>G' in list(results.keys()) assert results['NR_136160.1:n.685A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NR_136160.1:n.685A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_136160.1:n.685A>G']['alt_genomic_loci'] == [] - assert results['NR_136160.1:n.685A>G']['transcript_description'] == 'Homo sapiens adenosine deaminase (ADA), transcript variant 4, non-coding RNA' + self.assertCountEqual(results['NR_136160.1:n.685A>G']['alt_genomic_loci'], []) assert results['NR_136160.1:n.685A>G']['gene_symbol'] == 'ADA' assert results['NR_136160.1:n.685A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_136160.1:n.685A>G']['submitted_variant'] == '20-43252915-T-C' @@ -5656,8 +5408,7 @@ def test_variant167(self): assert 'NM_206933.2:c.6317C>G' in list(results.keys()) assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_206933.2:c.6317C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_206933.2:c.6317C>G']['alt_genomic_loci'] == [] - assert results['NM_206933.2:c.6317C>G']['transcript_description'] == 'Homo sapiens usherin (USH2A), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_206933.2:c.6317C>G']['alt_genomic_loci'], []) assert results['NM_206933.2:c.6317C>G']['gene_symbol'] == 'USH2A' assert results['NM_206933.2:c.6317C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_996816.2:p.(Thr2106Arg)', 'slr': 'NP_996816.2:p.(T2106R)'} assert results['NM_206933.2:c.6317C>G']['submitted_variant'] == '1-216219781-A-C' @@ -5680,8 +5431,7 @@ def test_variant168(self): assert 'NM_005896.3:c.394C>G' in list(results.keys()) assert results['NM_005896.3:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>G' assert results['NM_005896.3:c.394C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005896.3:c.394C>G']['alt_genomic_loci'] == [] - assert results['NM_005896.3:c.394C>G']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_005896.3:c.394C>G']['alt_genomic_loci'], []) assert results['NM_005896.3:c.394C>G']['gene_symbol'] == 'IDH1' assert results['NM_005896.3:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Gly)', 'slr': 'NP_005887.2:p.(R132G)'} assert results['NM_005896.3:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5698,8 +5448,7 @@ def test_variant168(self): assert 'NM_001282387.1:c.394C>G' in list(results.keys()) assert results['NM_001282387.1:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>G' assert results['NM_001282387.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001282387.1:c.394C>G']['alt_genomic_loci'] == [] - assert results['NM_001282387.1:c.394C>G']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001282387.1:c.394C>G']['alt_genomic_loci'], []) assert results['NM_001282387.1:c.394C>G']['gene_symbol'] == 'IDH1' assert results['NM_001282387.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1:p.(Arg132Gly)', 'slr': 'NP_001269316.1:p.(R132G)'} assert results['NM_001282387.1:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5716,8 +5465,7 @@ def test_variant168(self): assert 'NM_001282387.1:c.394C>A' in list(results.keys()) assert results['NM_001282387.1:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>A' assert results['NM_001282387.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001282387.1:c.394C>A']['alt_genomic_loci'] == [] - assert results['NM_001282387.1:c.394C>A']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001282387.1:c.394C>A']['alt_genomic_loci'], []) assert results['NM_001282387.1:c.394C>A']['gene_symbol'] == 'IDH1' assert results['NM_001282387.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1:p.(Arg132Ser)', 'slr': 'NP_001269316.1:p.(R132S)'} assert results['NM_001282387.1:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5734,8 +5482,7 @@ def test_variant168(self): assert 'NM_005896.3:c.394C>A' in list(results.keys()) assert results['NM_005896.3:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>A' assert results['NM_005896.3:c.394C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005896.3:c.394C>A']['alt_genomic_loci'] == [] - assert results['NM_005896.3:c.394C>A']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_005896.3:c.394C>A']['alt_genomic_loci'], []) assert results['NM_005896.3:c.394C>A']['gene_symbol'] == 'IDH1' assert results['NM_005896.3:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Ser)', 'slr': 'NP_005887.2:p.(R132S)'} assert results['NM_005896.3:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5752,8 +5499,7 @@ def test_variant168(self): assert 'NM_001282386.1:c.394C>T' in list(results.keys()) assert results['NM_001282386.1:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>T' assert results['NM_001282386.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001282386.1:c.394C>T']['alt_genomic_loci'] == [] - assert results['NM_001282386.1:c.394C>T']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001282386.1:c.394C>T']['alt_genomic_loci'], []) assert results['NM_001282386.1:c.394C>T']['gene_symbol'] == 'IDH1' assert results['NM_001282386.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1:p.(Arg132Cys)', 'slr': 'NP_001269315.1:p.(R132C)'} assert results['NM_001282386.1:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5770,8 +5516,7 @@ def test_variant168(self): assert 'NM_005896.2:c.394C>A' in list(results.keys()) assert results['NM_005896.2:c.394C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005896.2:c.394C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005896.2:c.394C>A']['alt_genomic_loci'] == [] - assert results['NM_005896.2:c.394C>A']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA' + self.assertCountEqual(results['NM_005896.2:c.394C>A']['alt_genomic_loci'], []) assert results['NM_005896.2:c.394C>A']['gene_symbol'] == 'IDH1' assert results['NM_005896.2:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Ser)', 'slr': 'NP_005887.2:p.(R132S)'} assert results['NM_005896.2:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5788,8 +5533,7 @@ def test_variant168(self): assert 'NM_005896.2:c.394C>G' in list(results.keys()) assert results['NM_005896.2:c.394C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005896.2:c.394C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005896.2:c.394C>G']['alt_genomic_loci'] == [] - assert results['NM_005896.2:c.394C>G']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA' + self.assertCountEqual(results['NM_005896.2:c.394C>G']['alt_genomic_loci'], []) assert results['NM_005896.2:c.394C>G']['gene_symbol'] == 'IDH1' assert results['NM_005896.2:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Gly)', 'slr': 'NP_005887.2:p.(R132G)'} assert results['NM_005896.2:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5807,8 +5551,7 @@ def test_variant168(self): assert 'NM_005896.3:c.394C>T' in list(results.keys()) assert results['NM_005896.3:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>T' assert results['NM_005896.3:c.394C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005896.3:c.394C>T']['alt_genomic_loci'] == [] - assert results['NM_005896.3:c.394C>T']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_005896.3:c.394C>T']['alt_genomic_loci'], []) assert results['NM_005896.3:c.394C>T']['gene_symbol'] == 'IDH1' assert results['NM_005896.3:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Cys)', 'slr': 'NP_005887.2:p.(R132C)'} assert results['NM_005896.3:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5825,8 +5568,7 @@ def test_variant168(self): assert 'NM_001282387.1:c.394C>T' in list(results.keys()) assert results['NM_001282387.1:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>T' assert results['NM_001282387.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001282387.1:c.394C>T']['alt_genomic_loci'] == [] - assert results['NM_001282387.1:c.394C>T']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001282387.1:c.394C>T']['alt_genomic_loci'], []) assert results['NM_001282387.1:c.394C>T']['gene_symbol'] == 'IDH1' assert results['NM_001282387.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1:p.(Arg132Cys)', 'slr': 'NP_001269316.1:p.(R132C)'} assert results['NM_001282387.1:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5843,8 +5585,7 @@ def test_variant168(self): assert 'NM_001282386.1:c.394C>G' in list(results.keys()) assert results['NM_001282386.1:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>G' assert results['NM_001282386.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001282386.1:c.394C>G']['alt_genomic_loci'] == [] - assert results['NM_001282386.1:c.394C>G']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001282386.1:c.394C>G']['alt_genomic_loci'], []) assert results['NM_001282386.1:c.394C>G']['gene_symbol'] == 'IDH1' assert results['NM_001282386.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1:p.(Arg132Gly)', 'slr': 'NP_001269315.1:p.(R132G)'} assert results['NM_001282386.1:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5861,8 +5602,7 @@ def test_variant168(self): assert 'NM_005896.2:c.394C>T' in list(results.keys()) assert results['NM_005896.2:c.394C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005896.2:c.394C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005896.2:c.394C>T']['alt_genomic_loci'] == [] - assert results['NM_005896.2:c.394C>T']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase 1 (NADP+), soluble (IDH1), mRNA' + self.assertCountEqual(results['NM_005896.2:c.394C>T']['alt_genomic_loci'], []) assert results['NM_005896.2:c.394C>T']['gene_symbol'] == 'IDH1' assert results['NM_005896.2:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Cys)', 'slr': 'NP_005887.2:p.(R132C)'} assert results['NM_005896.2:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5879,8 +5619,7 @@ def test_variant168(self): assert 'NM_001282386.1:c.394C>A' in list(results.keys()) assert results['NM_001282386.1:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>A' assert results['NM_001282386.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001282386.1:c.394C>A']['alt_genomic_loci'] == [] - assert results['NM_001282386.1:c.394C>A']['transcript_description'] == 'Homo sapiens isocitrate dehydrogenase (NADP(+)) 1, cytosolic (IDH1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001282386.1:c.394C>A']['alt_genomic_loci'], []) assert results['NM_001282386.1:c.394C>A']['gene_symbol'] == 'IDH1' assert results['NM_001282386.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1:p.(Arg132Ser)', 'slr': 'NP_001269315.1:p.(R132S)'} assert results['NM_001282386.1:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' @@ -5903,8 +5642,7 @@ def test_variant169(self): assert 'NM_001204314.1:c.*6525_*6526=' in list(results.keys()) assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204314.1:c.*6525_*6526=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001204314.1:c.*6525_*6526=']['alt_genomic_loci'] == [] - assert results['NM_001204314.1:c.*6525_*6526=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001204314.1:c.*6525_*6526=']['alt_genomic_loci'], []) assert results['NM_001204314.1:c.*6525_*6526=']['gene_symbol'] == 'PRLR' assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} assert results['NM_001204314.1:c.*6525_*6526=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' @@ -5921,8 +5659,7 @@ def test_variant169(self): assert 'NM_001204314.2:c.*6528del' in list(results.keys()) assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204314.2:c.*6528del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001204314.2:c.*6528del']['alt_genomic_loci'] == [] - assert results['NM_001204314.2:c.*6528del']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001204314.2:c.*6528del']['alt_genomic_loci'], []) assert results['NM_001204314.2:c.*6528del']['gene_symbol'] == 'PRLR' assert results['NM_001204314.2:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} assert results['NM_001204314.2:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' @@ -5939,8 +5676,7 @@ def test_variant169(self): assert 'NM_001204317.1:c.856-9153_856-9152=' in list(results.keys()) assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204317.1:c.856-9153_856-9152=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001204317.1:c.856-9153_856-9152=']['alt_genomic_loci'] == [] - assert results['NM_001204317.1:c.856-9153_856-9152=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001204317.1:c.856-9153_856-9152=']['alt_genomic_loci'], []) assert results['NM_001204317.1:c.856-9153_856-9152=']['gene_symbol'] == 'PRLR' assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191246.1:p.?', 'slr': 'NP_001191246.1:p.?'} assert results['NM_001204317.1:c.856-9153_856-9152=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' @@ -5957,8 +5693,7 @@ def test_variant169(self): assert 'NM_001204316.1:c.1009+7385_1009+7386=' in list(results.keys()) assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204316.1:c.1009+7385_1009+7386=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['alt_genomic_loci'] == [] - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001204316.1:c.1009+7385_1009+7386=']['alt_genomic_loci'], []) assert results['NM_001204316.1:c.1009+7385_1009+7386=']['gene_symbol'] == 'PRLR' assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191245.1:p.?', 'slr': 'NP_001191245.1:p.?'} assert results['NM_001204316.1:c.1009+7385_1009+7386=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' @@ -5976,8 +5711,7 @@ def test_variant169(self): assert 'NR_037910.1:n.828-9153_828-9152=' in list(results.keys()) assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037910.1:n.828-9153_828-9152=']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_037910.1:n.828-9153_828-9152=']['alt_genomic_loci'] == [] - assert results['NR_037910.1:n.828-9153_828-9152=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 7, non-coding RNA' + self.assertCountEqual(results['NR_037910.1:n.828-9153_828-9152=']['alt_genomic_loci'], []) assert results['NR_037910.1:n.828-9153_828-9152=']['gene_symbol'] == 'PRLR' assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_037910.1:n.828-9153_828-9152=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' @@ -5994,8 +5728,7 @@ def test_variant169(self): assert 'NM_001204318.1:c.686-9153_686-9152=' in list(results.keys()) assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001204318.1:c.686-9153_686-9152=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001204318.1:c.686-9153_686-9152=']['alt_genomic_loci'] == [] - assert results['NM_001204318.1:c.686-9153_686-9152=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001204318.1:c.686-9153_686-9152=']['alt_genomic_loci'], []) assert results['NM_001204318.1:c.686-9153_686-9152=']['gene_symbol'] == 'PRLR' assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191247.1:p.?', 'slr': 'NP_001191247.1:p.?'} assert results['NM_001204318.1:c.686-9153_686-9152=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' @@ -6012,8 +5745,7 @@ def test_variant169(self): assert 'NM_000949.5:c.*6525_*6526=' in list(results.keys()) assert results['NM_000949.5:c.*6525_*6526=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000949.5:c.*6525_*6526=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000949.5:c.*6525_*6526=']['alt_genomic_loci'] == [] - assert results['NM_000949.5:c.*6525_*6526=']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000949.5:c.*6525_*6526=']['alt_genomic_loci'], []) assert results['NM_000949.5:c.*6525_*6526=']['gene_symbol'] == 'PRLR' assert results['NM_000949.5:c.*6525_*6526=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} assert results['NM_000949.5:c.*6525_*6526=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' @@ -6030,8 +5762,7 @@ def test_variant169(self): assert 'NM_000949.6:c.*6528del' in list(results.keys()) assert results['NM_000949.6:c.*6528del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000949.6:c.*6528del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000949.6:c.*6528del']['alt_genomic_loci'] == [] - assert results['NM_000949.6:c.*6528del']['transcript_description'] == 'Homo sapiens prolactin receptor (PRLR), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000949.6:c.*6528del']['alt_genomic_loci'], []) assert results['NM_000949.6:c.*6528del']['gene_symbol'] == 'PRLR' assert results['NM_000949.6:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} assert results['NM_000949.6:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' @@ -6055,8 +5786,7 @@ def test_variant170(self): assert 'NM_015120.4:c.1580_1581insCCT' in list(results.keys()) assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1580_1581insCCT' assert results['NM_015120.4:c.1580_1581insCCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.1580_1581insCCT']['alt_genomic_loci'] == [] - assert results['NM_015120.4:c.1580_1581insCCT']['transcript_description'] == 'Homo sapiens ALMS1, centrosome and basal body associated protein (ALMS1), mRNA' + self.assertCountEqual(results['NM_015120.4:c.1580_1581insCCT']['alt_genomic_loci'], []) assert results['NM_015120.4:c.1580_1581insCCT']['gene_symbol'] == 'ALMS1' assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu527dup)', 'slr': 'NP_055935.4:p.(L527dup)'} assert results['NM_015120.4:c.1580_1581insCCT']['submitted_variant'] == 'NC_000002.11:g.73675227_73675229delTCTinsTCTCTC' @@ -6080,8 +5810,7 @@ def test_variant171(self): assert 'NM_000828.4:c.-2dup' in list(results.keys()) assert results['NM_000828.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2dup']['alt_genomic_loci'] == [] - assert results['NM_000828.4:c.-2dup']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000828.4:c.-2dup']['alt_genomic_loci'], []) assert results['NM_000828.4:c.-2dup']['gene_symbol'] == 'GRIA3' assert results['NM_000828.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2dup']['submitted_variant'] == 'NM_000828.4:c.-2dupG' @@ -6104,8 +5833,7 @@ def test_variant172(self): assert 'NM_007325.4:c.-2dup' in list(results.keys()) assert results['NM_007325.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007325.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007325.4:c.-2dup']['alt_genomic_loci'] == [] - assert results['NM_007325.4:c.-2dup']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007325.4:c.-2dup']['alt_genomic_loci'], []) assert results['NM_007325.4:c.-2dup']['gene_symbol'] == 'GRIA3' assert results['NM_007325.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_015564.4:p.?', 'slr': 'NP_015564.4:p.?'} assert results['NM_007325.4:c.-2dup']['submitted_variant'] == 'X-122318386-A-AGG' @@ -6123,8 +5851,7 @@ def test_variant172(self): assert 'NM_001256743.1:c.-2dup' in list(results.keys()) assert results['NM_001256743.1:c.-2dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256743.1:c.-2dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256743.1:c.-2dup']['alt_genomic_loci'] == [] - assert results['NM_001256743.1:c.-2dup']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001256743.1:c.-2dup']['alt_genomic_loci'], []) assert results['NM_001256743.1:c.-2dup']['gene_symbol'] == 'GRIA3' assert results['NM_001256743.1:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243672.1:p.?', 'slr': 'NP_001243672.1:p.?'} assert results['NM_001256743.1:c.-2dup']['submitted_variant'] == 'X-122318386-A-AGG' @@ -6141,8 +5868,7 @@ def test_variant172(self): assert 'NM_000828.4:c.-2dup' in list(results.keys()) assert results['NM_000828.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2dup']['alt_genomic_loci'] == [] - assert results['NM_000828.4:c.-2dup']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000828.4:c.-2dup']['alt_genomic_loci'], []) assert results['NM_000828.4:c.-2dup']['gene_symbol'] == 'GRIA3' assert results['NM_000828.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2dup']['submitted_variant'] == 'X-122318386-A-AGG' @@ -6166,8 +5892,7 @@ def test_variant173(self): assert 'NM_000828.4:c.-2G>T' in list(results.keys()) assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2G>T']['alt_genomic_loci'] == [] - assert results['NM_000828.4:c.-2G>T']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000828.4:c.-2G>T']['alt_genomic_loci'], []) assert results['NM_000828.4:c.-2G>T']['gene_symbol'] == 'GRIA3' assert results['NM_000828.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2G>T']['submitted_variant'] == 'NM_000828.4:c.-2G>T' @@ -6190,8 +5915,7 @@ def test_variant174(self): assert 'NM_000828.4:c.-2G=' in list(results.keys()) assert results['NM_000828.4:c.-2G=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2G=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2G=']['alt_genomic_loci'] == [] - assert results['NM_000828.4:c.-2G=']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000828.4:c.-2G=']['alt_genomic_loci'], []) assert results['NM_000828.4:c.-2G=']['gene_symbol'] == 'GRIA3' assert results['NM_000828.4:c.-2G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2G=']['submitted_variant'] == 'NM_000828.4:c.-2G=' @@ -6216,8 +5940,7 @@ def test_variant175(self): assert 'NM_000828.4:c.-2G>T' in list(results.keys()) assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2G>T']['alt_genomic_loci'] == [] - assert results['NM_000828.4:c.-2G>T']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000828.4:c.-2G>T']['alt_genomic_loci'], []) assert results['NM_000828.4:c.-2G>T']['gene_symbol'] == 'GRIA3' assert results['NM_000828.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2G>T']['submitted_variant'] == 'X-122318386-A-AT' @@ -6234,8 +5957,7 @@ def test_variant175(self): assert 'NM_001256743.1:c.-2G>T' in list(results.keys()) assert results['NM_001256743.1:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256743.1:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256743.1:c.-2G>T']['alt_genomic_loci'] == [] - assert results['NM_001256743.1:c.-2G>T']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001256743.1:c.-2G>T']['alt_genomic_loci'], []) assert results['NM_001256743.1:c.-2G>T']['gene_symbol'] == 'GRIA3' assert results['NM_001256743.1:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243672.1:p.?', 'slr': 'NP_001243672.1:p.?'} assert results['NM_001256743.1:c.-2G>T']['submitted_variant'] == 'X-122318386-A-AT' @@ -6252,8 +5974,7 @@ def test_variant175(self): assert 'NM_007325.4:c.-2G>T' in list(results.keys()) assert results['NM_007325.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007325.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007325.4:c.-2G>T']['alt_genomic_loci'] == [] - assert results['NM_007325.4:c.-2G>T']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007325.4:c.-2G>T']['alt_genomic_loci'], []) assert results['NM_007325.4:c.-2G>T']['gene_symbol'] == 'GRIA3' assert results['NM_007325.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_015564.4:p.?', 'slr': 'NP_015564.4:p.?'} assert results['NM_007325.4:c.-2G>T']['submitted_variant'] == 'X-122318386-A-AT' @@ -6277,8 +5998,7 @@ def test_variant176(self): assert 'NM_000828.4:c.-2_-1insT' in list(results.keys()) assert results['NM_000828.4:c.-2_-1insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2_-1insT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2_-1insT']['alt_genomic_loci'] == [] - assert results['NM_000828.4:c.-2_-1insT']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000828.4:c.-2_-1insT']['alt_genomic_loci'], []) assert results['NM_000828.4:c.-2_-1insT']['gene_symbol'] == 'GRIA3' assert results['NM_000828.4:c.-2_-1insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2_-1insT']['submitted_variant'] == 'NM_000828.4:c.-2_-1insT' @@ -6301,8 +6021,7 @@ def test_variant177(self): assert 'NM_000828.4:c.-3_-2insT' in list(results.keys()) assert results['NM_000828.4:c.-3_-2insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-3_-2insT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-3_-2insT']['alt_genomic_loci'] == [] - assert results['NM_000828.4:c.-3_-2insT']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000828.4:c.-3_-2insT']['alt_genomic_loci'], []) assert results['NM_000828.4:c.-3_-2insT']['gene_symbol'] == 'GRIA3' assert results['NM_000828.4:c.-3_-2insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-3_-2insT']['submitted_variant'] == 'NM_000828.4:c.-3_-2insT' @@ -6327,8 +6046,7 @@ def test_variant178(self): assert 'NM_000828.4:c.-2delinsTT' in list(results.keys()) assert results['NM_000828.4:c.-2delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2delinsTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2delinsTT']['alt_genomic_loci'] == [] - assert results['NM_000828.4:c.-2delinsTT']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000828.4:c.-2delinsTT']['alt_genomic_loci'], []) assert results['NM_000828.4:c.-2delinsTT']['gene_symbol'] == 'GRIA3' assert results['NM_000828.4:c.-2delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2delinsTT']['submitted_variant'] == 'NM_000828.4:c.-2delGinsTT' @@ -6352,8 +6070,7 @@ def test_variant179(self): assert 'NM_000828.4:c.-2_-1delinsTT' in list(results.keys()) assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-2_-1delinsTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2_-1delinsTT']['alt_genomic_loci'] == [] - assert results['NM_000828.4:c.-2_-1delinsTT']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000828.4:c.-2_-1delinsTT']['alt_genomic_loci'], []) assert results['NM_000828.4:c.-2_-1delinsTT']['gene_symbol'] == 'GRIA3' assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2_-1delinsTT']['submitted_variant'] == 'NM_000828.4:c.-2_-1delGCinsTT' @@ -6376,8 +6093,7 @@ def test_variant180(self): assert 'NM_000828.4:c.-3_-2delinsTT' in list(results.keys()) assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000828.4:c.-3_-2delinsTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-3_-2delinsTT']['alt_genomic_loci'] == [] - assert results['NM_000828.4:c.-3_-2delinsTT']['transcript_description'] == 'Homo sapiens glutamate ionotropic receptor AMPA type subunit 3 (GRIA3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000828.4:c.-3_-2delinsTT']['alt_genomic_loci'], []) assert results['NM_000828.4:c.-3_-2delinsTT']['gene_symbol'] == 'GRIA3' assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-3_-2delinsTT']['submitted_variant'] == 'NM_000828.4:c.-3_-2delAGinsTT' @@ -6401,8 +6117,7 @@ def test_variant181(self): assert 'NM_014249.3:c.951dup' in list(results.keys()) assert results['NM_014249.3:c.951dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.951dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.951dup']['alt_genomic_loci'] == [] - assert results['NM_014249.3:c.951dup']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.3:c.951dup']['alt_genomic_loci'], []) assert results['NM_014249.3:c.951dup']['gene_symbol'] == 'NR2E3' assert results['NM_014249.3:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Thr318HisfsTer23)', 'slr': 'NP_055064.1:p.(T318Hfs*23)'} assert results['NM_014249.3:c.951dup']['submitted_variant'] == '15-72105929-C-C' @@ -6419,8 +6134,7 @@ def test_variant181(self): assert 'NM_014249.2:c.951dup' in list(results.keys()) assert results['NM_014249.2:c.951dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.951dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.951dup']['alt_genomic_loci'] == [] - assert results['NM_014249.2:c.951dup']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.2:c.951dup']['alt_genomic_loci'], []) assert results['NM_014249.2:c.951dup']['gene_symbol'] == 'NR2E3' assert results['NM_014249.2:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Thr318HisfsTer23)', 'slr': 'NP_055064.1:p.(T318Hfs*23)'} assert results['NM_014249.2:c.951dup']['submitted_variant'] == '15-72105929-C-C' @@ -6438,8 +6152,7 @@ def test_variant181(self): assert 'NM_016346.3:c.951dup' in list(results.keys()) assert results['NM_016346.3:c.951dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.951dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.951dup']['alt_genomic_loci'] == [] - assert results['NM_016346.3:c.951dup']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.3:c.951dup']['alt_genomic_loci'], []) assert results['NM_016346.3:c.951dup']['gene_symbol'] == 'NR2E3' assert results['NM_016346.3:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Thr318HisfsTer23)', 'slr': 'NP_057430.1:p.(T318Hfs*23)'} assert results['NM_016346.3:c.951dup']['submitted_variant'] == '15-72105929-C-C' @@ -6456,8 +6169,7 @@ def test_variant181(self): assert 'NM_016346.2:c.951dup' in list(results.keys()) assert results['NM_016346.2:c.951dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.951dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.951dup']['alt_genomic_loci'] == [] - assert results['NM_016346.2:c.951dup']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.2:c.951dup']['alt_genomic_loci'], []) assert results['NM_016346.2:c.951dup']['gene_symbol'] == 'NR2E3' assert results['NM_016346.2:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Thr318HisfsTer23)', 'slr': 'NP_057430.1:p.(T318Hfs*23)'} assert results['NM_016346.2:c.951dup']['submitted_variant'] == '15-72105929-C-C' @@ -6480,8 +6192,7 @@ def test_variant182(self): assert 'NM_014249.2:c.947_948insTT' in list(results.keys()) assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'] == [] - assert results['NM_014249.2:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'], []) assert results['NM_014249.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' assert results['NM_014249.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} assert results['NM_014249.2:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' @@ -6498,8 +6209,7 @@ def test_variant182(self): assert 'NM_016346.3:c.947_948insTT' in list(results.keys()) assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'] == [] - assert results['NM_016346.3:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'], []) assert results['NM_016346.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' assert results['NM_016346.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} assert results['NM_016346.3:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' @@ -6517,8 +6227,7 @@ def test_variant182(self): assert 'NM_016346.2:c.947_948insTT' in list(results.keys()) assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'] == [] - assert results['NM_016346.2:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'], []) assert results['NM_016346.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' assert results['NM_016346.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} assert results['NM_016346.2:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' @@ -6535,8 +6244,7 @@ def test_variant182(self): assert 'NM_014249.3:c.947_948insTT' in list(results.keys()) assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'] == [] - assert results['NM_014249.3:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'], []) assert results['NM_014249.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' assert results['NM_014249.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} assert results['NM_014249.3:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' @@ -6559,8 +6267,7 @@ def test_variant183(self): assert 'NM_014249.2:c.947_948insTT' in list(results.keys()) assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'] == [] - assert results['NM_014249.2:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'], []) assert results['NM_014249.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' assert results['NM_014249.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} assert results['NM_014249.2:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' @@ -6577,8 +6284,7 @@ def test_variant183(self): assert 'NM_016346.3:c.947_948insTT' in list(results.keys()) assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'] == [] - assert results['NM_016346.3:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'], []) assert results['NM_016346.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' assert results['NM_016346.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} assert results['NM_016346.3:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' @@ -6596,8 +6302,7 @@ def test_variant183(self): assert 'NM_016346.2:c.947_948insTT' in list(results.keys()) assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'] == [] - assert results['NM_016346.2:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'], []) assert results['NM_016346.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' assert results['NM_016346.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} assert results['NM_016346.2:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' @@ -6614,8 +6319,7 @@ def test_variant183(self): assert 'NM_014249.3:c.947_948insTT' in list(results.keys()) assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'] == [] - assert results['NM_014249.3:c.947_948insTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'], []) assert results['NM_014249.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' assert results['NM_014249.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} assert results['NM_014249.3:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' @@ -6638,8 +6342,7 @@ def test_variant184(self): assert 'NM_014249.3:c.947delinsTT' in list(results.keys()) assert results['NM_014249.3:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.947delinsTT']['alt_genomic_loci'] == [] - assert results['NM_014249.3:c.947delinsTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.3:c.947delinsTT']['alt_genomic_loci'], []) assert results['NM_014249.3:c.947delinsTT']['gene_symbol'] == 'NR2E3' assert results['NM_014249.3:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316ValfsTer25)', 'slr': 'NP_055064.1:p.(D316Vfs*25)'} assert results['NM_014249.3:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' @@ -6656,8 +6359,7 @@ def test_variant184(self): assert 'NM_016346.2:c.947delinsTT' in list(results.keys()) assert results['NM_016346.2:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.947delinsTT']['alt_genomic_loci'] == [] - assert results['NM_016346.2:c.947delinsTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.2:c.947delinsTT']['alt_genomic_loci'], []) assert results['NM_016346.2:c.947delinsTT']['gene_symbol'] == 'NR2E3' assert results['NM_016346.2:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316ValfsTer25)', 'slr': 'NP_057430.1:p.(D316Vfs*25)'} assert results['NM_016346.2:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' @@ -6674,8 +6376,7 @@ def test_variant184(self): assert 'NM_014249.2:c.947delinsTT' in list(results.keys()) assert results['NM_014249.2:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.947delinsTT']['alt_genomic_loci'] == [] - assert results['NM_014249.2:c.947delinsTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.2:c.947delinsTT']['alt_genomic_loci'], []) assert results['NM_014249.2:c.947delinsTT']['gene_symbol'] == 'NR2E3' assert results['NM_014249.2:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316ValfsTer25)', 'slr': 'NP_055064.1:p.(D316Vfs*25)'} assert results['NM_014249.2:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' @@ -6693,8 +6394,7 @@ def test_variant184(self): assert 'NM_016346.3:c.947delinsTT' in list(results.keys()) assert results['NM_016346.3:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.947delinsTT']['alt_genomic_loci'] == [] - assert results['NM_016346.3:c.947delinsTT']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.3:c.947delinsTT']['alt_genomic_loci'], []) assert results['NM_016346.3:c.947delinsTT']['gene_symbol'] == 'NR2E3' assert results['NM_016346.3:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316ValfsTer25)', 'slr': 'NP_057430.1:p.(D316Vfs*25)'} assert results['NM_016346.3:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' @@ -6718,8 +6418,7 @@ def test_variant185(self): assert 'NM_001042544.1:c.3233_3235=' in list(results.keys()) assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042544.1:c.3233_3235=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'] == [] - assert results['NM_001042544.1:c.3233_3235=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'], []) assert results['NM_001042544.1:c.3233_3235=']['gene_symbol'] == 'LTBP4' assert results['NM_001042544.1:c.3233_3235=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078=)', 'slr': 'NP_001036009.1:p.(Q1078=)'} assert results['NM_001042544.1:c.3233_3235=']['submitted_variant'] == '19-41123093-A-AG' @@ -6736,8 +6435,7 @@ def test_variant185(self): assert 'NM_001042545.1:c.3032_3034=' in list(results.keys()) assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042545.1:c.3032_3034=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'] == [] - assert results['NM_001042545.1:c.3032_3034=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'], []) assert results['NM_001042545.1:c.3032_3034=']['gene_symbol'] == 'LTBP4' assert results['NM_001042545.1:c.3032_3034=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011=)', 'slr': 'NP_001036010.1:p.(Q1011=)'} assert results['NM_001042545.1:c.3032_3034=']['submitted_variant'] == '19-41123093-A-AG' @@ -6754,8 +6452,7 @@ def test_variant185(self): assert 'NM_003573.2:c.3122_3124=' in list(results.keys()) assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003573.2:c.3122_3124=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'] == [] - assert results['NM_003573.2:c.3122_3124=']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'], []) assert results['NM_003573.2:c.3122_3124=']['gene_symbol'] == 'LTBP4' assert results['NM_003573.2:c.3122_3124=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041=)', 'slr': 'NP_003564.2:p.(Q1041=)'} assert results['NM_003573.2:c.3122_3124=']['submitted_variant'] == '19-41123093-A-AG' @@ -6778,8 +6475,7 @@ def test_variant186(self): assert 'NM_003573.2:c.3123G>T' in list(results.keys()) assert results['NM_003573.2:c.3123G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003573.2:c.3123G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003573.2:c.3123G>T']['alt_genomic_loci'] == [] - assert results['NM_003573.2:c.3123G>T']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_003573.2:c.3123G>T']['alt_genomic_loci'], []) assert results['NM_003573.2:c.3123G>T']['gene_symbol'] == 'LTBP4' assert results['NM_003573.2:c.3123G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041His)', 'slr': 'NP_003564.2:p.(Q1041H)'} assert results['NM_003573.2:c.3123G>T']['submitted_variant'] == '19-41123093-A-AT' @@ -6797,8 +6493,7 @@ def test_variant186(self): assert 'NM_001042545.1:c.3033G>T' in list(results.keys()) assert results['NM_001042545.1:c.3033G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042545.1:c.3033G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001042545.1:c.3033G>T']['alt_genomic_loci'] == [] - assert results['NM_001042545.1:c.3033G>T']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001042545.1:c.3033G>T']['alt_genomic_loci'], []) assert results['NM_001042545.1:c.3033G>T']['gene_symbol'] == 'LTBP4' assert results['NM_001042545.1:c.3033G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011His)', 'slr': 'NP_001036010.1:p.(Q1011H)'} assert results['NM_001042545.1:c.3033G>T']['submitted_variant'] == '19-41123093-A-AT' @@ -6815,8 +6510,7 @@ def test_variant186(self): assert 'NM_001042544.1:c.3234G>T' in list(results.keys()) assert results['NM_001042544.1:c.3234G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042544.1:c.3234G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001042544.1:c.3234G>T']['alt_genomic_loci'] == [] - assert results['NM_001042544.1:c.3234G>T']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001042544.1:c.3234G>T']['alt_genomic_loci'], []) assert results['NM_001042544.1:c.3234G>T']['gene_symbol'] == 'LTBP4' assert results['NM_001042544.1:c.3234G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078His)', 'slr': 'NP_001036009.1:p.(Q1078H)'} assert results['NM_001042544.1:c.3234G>T']['submitted_variant'] == '19-41123093-A-AT' @@ -6839,8 +6533,7 @@ def test_variant187(self): assert 'NM_001042544.1:c.3235_3236del' in list(results.keys()) assert results['NM_001042544.1:c.3235_3236del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042544.1:c.3235_3236del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001042544.1:c.3235_3236del']['alt_genomic_loci'] == [] - assert results['NM_001042544.1:c.3235_3236del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001042544.1:c.3235_3236del']['alt_genomic_loci'], []) assert results['NM_001042544.1:c.3235_3236del']['gene_symbol'] == 'LTBP4' assert results['NM_001042544.1:c.3235_3236del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gly1079LeufsTer17)', 'slr': 'NP_001036009.1:p.(G1079Lfs*17)'} assert results['NM_001042544.1:c.3235_3236del']['submitted_variant'] == '19-41123093-AG-A' @@ -6858,8 +6551,7 @@ def test_variant187(self): assert 'NM_001042545.1:c.3034_3035del' in list(results.keys()) assert results['NM_001042545.1:c.3034_3035del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042545.1:c.3034_3035del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001042545.1:c.3034_3035del']['alt_genomic_loci'] == [] - assert results['NM_001042545.1:c.3034_3035del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001042545.1:c.3034_3035del']['alt_genomic_loci'], []) assert results['NM_001042545.1:c.3034_3035del']['gene_symbol'] == 'LTBP4' assert results['NM_001042545.1:c.3034_3035del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gly1012LeufsTer17)', 'slr': 'NP_001036010.1:p.(G1012Lfs*17)'} assert results['NM_001042545.1:c.3034_3035del']['submitted_variant'] == '19-41123093-AG-A' @@ -6876,8 +6568,7 @@ def test_variant187(self): assert 'NM_003573.2:c.3124_3125del' in list(results.keys()) assert results['NM_003573.2:c.3124_3125del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003573.2:c.3124_3125del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003573.2:c.3124_3125del']['alt_genomic_loci'] == [] - assert results['NM_003573.2:c.3124_3125del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_003573.2:c.3124_3125del']['alt_genomic_loci'], []) assert results['NM_003573.2:c.3124_3125del']['gene_symbol'] == 'LTBP4' assert results['NM_003573.2:c.3124_3125del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gly1042LeufsTer17)', 'slr': 'NP_003564.2:p.(G1042Lfs*17)'} assert results['NM_003573.2:c.3124_3125del']['submitted_variant'] == '19-41123093-AG-A' @@ -6900,8 +6591,7 @@ def test_variant188(self): assert 'NM_001042545.1:c.3035del' in list(results.keys()) assert results['NM_001042545.1:c.3035del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042545.1:c.3035del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001042545.1:c.3035del']['alt_genomic_loci'] == [] - assert results['NM_001042545.1:c.3035del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001042545.1:c.3035del']['alt_genomic_loci'], []) assert results['NM_001042545.1:c.3035del']['gene_symbol'] == 'LTBP4' assert results['NM_001042545.1:c.3035del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gly1012ValfsTer14)', 'slr': 'NP_001036010.1:p.(G1012Vfs*14)'} assert results['NM_001042545.1:c.3035del']['submitted_variant'] == '19-41123093-AG-AG' @@ -6919,8 +6609,7 @@ def test_variant188(self): assert 'NM_001042544.1:c.3236del' in list(results.keys()) assert results['NM_001042544.1:c.3236del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042544.1:c.3236del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001042544.1:c.3236del']['alt_genomic_loci'] == [] - assert results['NM_001042544.1:c.3236del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001042544.1:c.3236del']['alt_genomic_loci'], []) assert results['NM_001042544.1:c.3236del']['gene_symbol'] == 'LTBP4' assert results['NM_001042544.1:c.3236del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gly1079ValfsTer14)', 'slr': 'NP_001036009.1:p.(G1079Vfs*14)'} assert results['NM_001042544.1:c.3236del']['submitted_variant'] == '19-41123093-AG-AG' @@ -6937,8 +6626,7 @@ def test_variant188(self): assert 'NM_003573.2:c.3125del' in list(results.keys()) assert results['NM_003573.2:c.3125del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003573.2:c.3125del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003573.2:c.3125del']['alt_genomic_loci'] == [] - assert results['NM_003573.2:c.3125del']['transcript_description'] == 'Homo sapiens latent transforming growth factor beta binding protein 4 (LTBP4), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_003573.2:c.3125del']['alt_genomic_loci'], []) assert results['NM_003573.2:c.3125del']['gene_symbol'] == 'LTBP4' assert results['NM_003573.2:c.3125del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gly1042ValfsTer14)', 'slr': 'NP_003564.2:p.(G1042Vfs*14)'} assert results['NM_003573.2:c.3125del']['submitted_variant'] == '19-41123093-AG-AG' @@ -6962,8 +6650,7 @@ def test_variant189(self): assert 'NM_012309.4:c.913-5058G>A' in list(results.keys()) assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_012309.4:c.913-5058G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}] - assert results['NM_012309.4:c.913-5058G>A']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}]) assert results['NM_012309.4:c.913-5058G>A']['gene_symbol'] == 'SHANK2' assert results['NM_012309.4:c.913-5058G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.?', 'slr': 'NP_036441.2:p.?'} assert results['NM_012309.4:c.913-5058G>A']['submitted_variant'] == 'NM_012309.4:c.913-5058G>A' @@ -6987,8 +6674,7 @@ def test_variant190(self): assert 'NM_004006.2:c.2376G>C' in list(results.keys()) assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.2376G>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.2376[G>C];[G>C]' @@ -7011,8 +6697,7 @@ def test_variant191(self): assert 'NM_004006.2:c.3103del' in list(results.keys()) assert results['NM_004006.2:c.3103del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.3103del' assert results['NM_004006.2:c.3103del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.3103del']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.3103del']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.3103del']['alt_genomic_loci'], []) assert results['NM_004006.2:c.3103del']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.3103del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)', 'slr': 'NP_003997.1:p.(Q1035Sfs*9)'} assert results['NM_004006.2:c.3103del']['submitted_variant'] == 'LRG_199t1:c.[2376G>C];[3103del]' @@ -7030,8 +6715,7 @@ def test_variant191(self): assert 'NM_004006.2:c.2376G>C' in list(results.keys()) assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.2376G>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.[2376G>C];[3103del]' @@ -7052,29 +6736,11 @@ def test_variant192(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.4358_4372delinsG' in list(results.keys()) - assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4358_4372delinsG' - assert results['NM_004006.2:c.4358_4372delinsG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.4358_4372delinsG']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.4358_4372delinsG']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' - assert results['NM_004006.2:c.4358_4372delinsG']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Asp1453GlyfsTer11)', 'slr': 'NP_003997.1:p.(D1453Gfs*11)'} - assert results['NM_004006.2:c.4358_4372delinsG']['submitted_variant'] == 'LRG_199t1:c.[4358_4359del;4361_4372del]' - assert results['NM_004006.2:c.4358_4372delinsG']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_lrg_variant'] == 'LRG_199:g.954949_954963delinsG' - assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_transcript_variant'] == 'NM_004006.2:c.4358_4372delinsG' - assert results['NM_004006.2:c.4358_4372delinsG']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.954949_954963delinsG' - assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407778delinsC', 'vcf': {'chr': 'chrX', 'ref': 'ACTTCATGGAGACAT', 'pos': '32407764', 'alt': 'C'}} - assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389661delinsC', 'vcf': {'chr': 'chrX', 'ref': 'ACTTCATGGAGACAT', 'pos': '32389647', 'alt': 'C'}} - assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407778delinsC', 'vcf': {'chr': 'X', 'ref': 'ACTTCATGGAGACAT', 'pos': '32407764', 'alt': 'C'}} - assert results['NM_004006.2:c.4358_4372delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389661delinsC', 'vcf': {'chr': 'X', 'ref': 'ACTTCATGGAGACAT', 'pos': '32389647', 'alt': 'C'}} - assert results['NM_004006.2:c.4358_4372delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} assert 'NM_004006.2:c.4358_4359del' in list(results.keys()) assert results['NM_004006.2:c.4358_4359del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4358_4359del' assert results['NM_004006.2:c.4358_4359del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.4358_4359del']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.4358_4359del']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.4358_4359del']['alt_genomic_loci'], []) assert results['NM_004006.2:c.4358_4359del']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.4358_4359del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Asp1453GlyfsTer15)', 'slr': 'NP_003997.1:p.(D1453Gfs*15)'} assert results['NM_004006.2:c.4358_4359del']['submitted_variant'] == 'LRG_199t1:c.[4358_4359del;4361_4372del]' @@ -7091,8 +6757,7 @@ def test_variant192(self): assert 'NM_004006.2:c.4362_4373del' in list(results.keys()) assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4362_4373del' assert results['NM_004006.2:c.4362_4373del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.4362_4373del']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.4362_4373del']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.4362_4373del']['alt_genomic_loci'], []) assert results['NM_004006.2:c.4362_4373del']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.4362_4373del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ser1455_Phe1458del)', 'slr': 'NP_003997.1:p.(S1455_F1458del)'} assert results['NM_004006.2:c.4362_4373del']['submitted_variant'] == 'LRG_199t1:c.[4358_4359del;4361_4372del]' @@ -7115,8 +6780,7 @@ def test_variant193(self): assert 'NM_004006.2:c.3103del' in list(results.keys()) assert results['NM_004006.2:c.3103del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.3103del' assert results['NM_004006.2:c.3103del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.3103del']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.3103del']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.3103del']['alt_genomic_loci'], []) assert results['NM_004006.2:c.3103del']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.3103del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)', 'slr': 'NP_003997.1:p.(Q1035Sfs*9)'} assert results['NM_004006.2:c.3103del']['submitted_variant'] == 'LRG_199t1:c.2376G>C(;)3103del' @@ -7134,8 +6798,7 @@ def test_variant193(self): assert 'NM_004006.2:c.2376G>C' in list(results.keys()) assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.2376G>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.2376G>C(;)3103del' @@ -7159,8 +6822,7 @@ def test_variant194(self): assert 'NM_004006.2:c.2376G>C' in list(results.keys()) assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.2376G>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.2376[G>C];[(G>C)]' @@ -7184,8 +6846,7 @@ def test_variant195(self): assert 'NM_004006.2:c.2376G>C' in list(results.keys()) assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.2376G>C']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.2376G>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.[2376G>C];[?]' @@ -7208,8 +6869,7 @@ def test_variant196(self): assert 'NM_004006.2:c.476T=' in list(results.keys()) assert results['NM_004006.2:c.476T=']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.476T=' assert results['NM_004006.2:c.476T=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.476T=']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.476T=']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.476T=']['alt_genomic_loci'], []) assert results['NM_004006.2:c.476T=']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.476T=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Phe159=)', 'slr': 'NP_003997.1:p.(F159=)'} assert results['NM_004006.2:c.476T=']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' @@ -7223,30 +6883,11 @@ def test_variant196(self): assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32816522', 'alt': 'A'}} assert results['NM_004006.2:c.476T=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert 'NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT' in list(results.keys()) - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296_358-3delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_lrg_variant'] == 'LRG_199:g.521254_527967delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_transcript_variant'] == 'NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT' - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.521254_527967delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGC' - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639_32841473delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'chrX', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32834760', 'alt': 'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522_32823356delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'chrX', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32816643', 'alt': 'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639_32841473delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'X', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32834760', 'alt': 'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522_32823356delinsAAGTTGATTACATTAACCTGTGGATAATTACGAGTTGATTGTCGGACCCAGCTCAGGAGAATCTTTTCACTGTTGGTTTGTTGCAATCCAGCCATGATATTTTTCATTACATTTTTGACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC', 'vcf': {'chr': 'X', 'ref': 'ACATGTGGAAATAAATTTTCATAAGAAAATGCATTCCTTGAGCAAGAACCATGCAAACTTAAATATGAATGTCCTTGATCTTCAGTGATAAATAGAAATTTTAGGGCCAATTAGTAATGAGACATAATAGATTCTACCAGAAGTTAAGTCTATTCTCAAAGGCTAGGAGTCTATTCTGATTCATTGGTATCTATGCCATACCATTTACTGAATGTGTTCACTATTACTCCTGGATTCTGCCATATTAGACCCAACATTTACTGTTTATAGAATTGCTCACCTTCTAGCTTTCAATGAACAAGCTAGAGAAGACAGTGTATAAAACATTAGACTTATTGTTAACCAAGGACTTAAGATATAAAAGCGCTTTTTGGATAGGAGGAATACTATAGATAATTGTTTTTTAAAGTATATGTTCAAATTTTGCCTTTGCTATTAATTGTGGTAATTGGACAAGTTGCTTAACCTCTTCTAGGCCTCATTTCTGTGTATCTTACATGCAAGGTTAGTGTAAGATCAAAGTACAAAATATGGTCAGATAAAACCCCTGATTTCATACCATCATTTAACGCAAGCTATATTTATTGGCTGAACTGAAAAGAAGACAGAAATGTTTGCCTACCTAAAATAAAGAAAAAAAACAACAAAGGAAGGAAATATTTATCCCTAAGAGGTGTACATATGTAGCCTTGGTTCTATTTTCATTACTTGGTTTCCATGTATTTGATTGTCTCATAGGCTTCGTGCATGTGCAGATGTTATTCAACAGATTATCAAGTTTAATTCTTCTGAGTCATCTGATGCAGAAATCCATTCTAAGCATTTTTTCTAAGTTTCATGAATGCAGTGACGCTTCATAGAAAAAACTGTCTACATCTTTTATTAAAGAATTATTATTTATTAAAGACTGCTTTTTAAATATAAGCCCCTTATGAGATGCTGGAAGAAGACATCTATGTAAAAGTGATCAGTCTGTACCACTAAGCACTTTCCTTCTGGGAAGGTTAAGGTTCATATACTATCAACATTTGATCAGATGATGAATATTTATTGTGCATTCTAAATATGACAATTTTATTTTTTCATCCATATAGAAGTTAACAAATGCCTTTGGGGTAAATGCATATTTTAACATCTCCAATGTTAGTCATCTTTTTATTAAAAAGTAAATGCAAATTTTAGGATATACTACTAAATCTCATTTTAACATACTTTTAAAAGATTTGAGATGATGCTTTGTAAACACAGTAACGATTCGGAGATATGAAATCTTTGCATGTATCTGTCACAATCCCTTGATAACTAAGCCTATTATTTAGGAGTGGATTATCCAAGTTTTCCTAATTCCTTCTTGACCTACTTATTTAGTCAGTTACTCAGTGAATACAGGTCAAATAATTACGTAATTTGTCTCATATTCTAAAATTATAAATTGCTCAACCTTTATGTACAGTGACTGCAGGAGGTGTATTTTGTGTCCTGTGCTAAATTACGGTTGGAATCAGCAGATAGTGGCTTAATGGTGGTAATGGATTTGCAATAATCAAGCAATCCTCCAATGATGAATGTGATTCATTTGTGAAATGCTTGGCCAGTCTGCCCTCTATGTGCATAACGTTAAAGGACAACAAAGTATTCAATCCAAAACTCCTTTATAGCATCATTATGTCACTAATTCAGCTTATCAGCCTCAGCCTCGCTGTTATTGCTTCTACTTAGGTGCAAATGTTGCTCAGACAAAAAAAAATAATAAAGGGCAATGTATGATAATCATGTCTCCTTTTCCTACATAAAAGGCAGTTATCAAAAAATCATTGAGCTGGTCCCCAAAGGTGAGCCTGTAAACAGAAAACTGTCCCGTCCTCAGGTCTCACTGTCTCTAAAGGAGGCTCAATTTAAACAGATGTATTTGTGTGTGTGCGCGCATACAACTATAAATCAATTTTTAAAAGGAAAAAAGTTGAAATGTCTTTGAGCAGTGATTTTTAAAAATGACTAGGAGTTAACTGGGCCAAGAAGGCATGATGGACATGGAGGTGCACCACCAAGATTGACCTTCAAGGAAGGGCTTGTTGCTCCAGCTGTGGTCAGGCAGCTTCTAGTTGTTAGTTCTCTCAGGTACCACCTGAGTTGCAGAGTCCTGCCAGCCAATGTCACACCCTCTCTAGGGCAACCCACTACTATTGACTGATGAAGGGGCAGAGTATAAACACTCAGCCATTTTAACCCCATTTGAGACAACTCTGAGGGGTCATCCTAGCTGCAAGTTGTCTATGGGGTTTGAATAAGCGTGTCATAGGGTCTACGTCACAGCTCTAGTTCTTCCTCTTTCCACTCCTGTCACCTCTGCCCTTCTACAGGTGTTTTTTTTTTTTTTTTTTTTTCCAGGGCATGGCTTAATAAACATGCTGCCTGCCAAACTCAACTCCGAAGTCTACTCCCCAGAACCCAGACTGTGATTAAAAGAAGTTGCAGCAGAGGAATAGAAATGAAGAGGGTAGGAATAATCTTATATGGTAAGGATAGTTTCTGTGAAGCTCTGCAGAAGAGCAGAGCAAGAACTGTCTTGCACAGAAGTGACTTGGTAGGCAGGAATTTACAGGGATGGCTTTCAGCAGTGCGAACGTCATCATTCTCCTGTAATTGCAAATTGTATCAATAATATGAACATTCTAAAGTGAATTAGCGTTGCCATTAAGTACATCCCTTGAATTGTCCAGTAAAAGTATGGACCTGACTTCTGATAGCTCACCACTGTTAGCAAGATTTGTTATTACAACCAGCTGGTTGCCAGAGCCAAGTTTAGCACTCCGAAAAAAAAAGAATTGCACTTAAGAAGGAAGAAAAAGATGCCAAACACATGTTATATGCTTTGCAATTTTGTTCAAACCTGCCTATGTTACAAAATATGCGTTACTGTTTTAGGCTAGCAATTTTATTTGAGATGGACAGTAGAAAAACAGTAATAGGAAAACAGAGTTTGCATTCCTGAGTTTTCACAAGTTCAATGAGAAGTCCTCCATGGCACTATTAATAACATAGTATGTTGCATTAGAAAAAGGGGCCAACATGGTTCTATCTGTAAATGCCGCTCGTTATCTGATAGATCTAGAATGTACATTAAGTTGACCAAAATGTCCTTTAATCTTTGCTGAAATTCAAGGGCTTCTACCATTTAATAGGAAGAAAATATGTAATGTTGGTGTAAAAAAAAAAAAAAAAAAGAAAAACACATTTTTAGATTCACTGGATTATGATGAACACTGAGCAAGATAATTTTTCAAAGTGTGCTTAATAGTTTTAGAACAGGGCAGTGTGATATAAGAGAGATGAGATAAGTAAGCAGAGGCTGGGAAGAGTCTACAGTCTTGCAGTCTCAAGGATATGGTGATTCAAGGGCATAAATGAAAAACAACAGGCTATAAGAGAGGTCTCAGACAAAGAAAGATTTAAAAAGCATTCCTTCTGATATTCCTCATTGCAAAAGCGCCTCCACATGCTAATAATGAAAAATGGGCTGGGTGCGGTGGCTCACGCCTGTAATCCAAACACTTTGGGAGGCCGAGACGGGCGGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTGAACAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAAGAGAATGGCATGAACCCAGGAGGCGGAGCTTGCAGTGAGCAGAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCGAGACTCTGTCTCAAAAATAAAAAATAAAAAAATAATAATAAATGAACAACTTGCAAGCAAGTGAAGAAATTATCAGACATAATAGTCTTGAAAGCTTATCTTGGACTAGTTCAGATGGAGTTGGAAAGGGCTTCAGGATATTTTTAAAAATGGTAAGATCATATAAGAAGTAGTATAAACAATAAATACAGTTCAGAGTGCCTTTTGGAGCTCTGTGCATGCTTGAGTTCTTCTGCTGGCTTTGTCAAAACTTACATCTGTCTTACGCAAAGCGGGATCTGCAAACTGGATTCACTCTAGGAATTATTTGTTACTATGCCATAAGATAAAGTCAACAATTCAGAGTGCATCAAGAAACTTATGTAGCGATTTTACAGAGTACCATTTATGTCTAATGTATTTAATAATAAAGGAGGAAGCATGGGTGTTGGGCATCTTATTTTTGTAACGCTTTGGTTTTATTCTATCTTACATACAATGGATTAGGGGAAAAGTGTTCCTTCCTCAAGATATTTTGAGAAGTACTGAGCAACATATGAAAAGCAGTTTGGGGAGAGATGCGGTATGTTGCTTGCTGGTTCTATTTGACACCATCACTATATGGAACGGGCTGAAAATCGGCCAACTTGGGCTCACTTAAGGCTCCTATGAGCTATTCTTTGTTGCCAGCACATATTAATTCCCGCTCTTTCTCTTCCCCTCTCCCCGCTTACTGTTGTGAAGTAGCATTAAGCCTGTTCAGAGAATTTGGAATAAAAATATATGGGGGCCAATTAGGAGAGCAACATGGCTGCTGAATTTAATAGGTACTCTTTGTGTCTACTCACTATTTGACTCTGAGCAAGAGCAATCTACAATTCAAGTTAAAATTCAAGCCGGGCGCAGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCGAGGCAGGAGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTAAAAATACAAAAAAAAATTAGCCGGGCGTGGTGGGGGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAATCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAATAAAAATAAAAAAAAATAAAAATAAAAAATAAAAAAATTCAAGTTTACCGAATGAAGCCATGATATCTGCTACAATAGGGCTGGATCTGAACTATTACCCTCATTGCAAGAAGCCAGACACAAAAGAACATGTATTATATTGATATAAAATATCCAAAAAGGCAAATACATATGGACAGAAATAGATTGACAATTGCCTAGGGCTGGGGTGGGAGTGAAAAATGACTGTAAATGGAGAAGAAACTTCTTTTTAAGGTGGTGGAAAAGTGCCAAAATTAGATCGCTATGATGGCTATATAACTCTGTAAATATCCTGCAAATTTTAATGAACGTGGTGGGGGGGAGTTATTCATCCCTCAAAATAGTTTGAGAGGCTACCAAGAACCAGAACTAAACCAGATTGATGCAAATTGAATTTTACACTTAGAATGAGTACATTGATGGTCTAACAATTACATTTCAAAAACGCTGATTTATAAAAAAGTAATCCCGACCAGTAAAAACAGCAGTCATTTCTCAGCCTCCAAATTTTTTAAATGAGTGGAAAACTTTAAGCAATCAATACAAGTCATATGATACACCTCAAATAAAAGTGGTATACTACACTGAGTGCCTTGCAGATATTCTCCCACTTAATCGTGATAAGAAATACATACACATTTATTTTTATTTTTACTCTTTTGATAGTGAAGGGACCATGGCCCAGAACAGGGGTGGCAATATACCAAATGCCACTAAGATGGTAACCGTGGAAGTTTAAATTCAAGTTCATGTTGGTATATATATATACACATACATATTTATACATATGTGTATAACGAGTTACTATTACGTATATATATACAATGAGTGGAAAATATTAGTGGATATAAGTTCATGTATGATTATGTATACATATTATGATTATACTATTATGTATATGTATACAATGAGTGGATAATATTAGTGGATATAAGTTCATGTTGGTCTTATATATACATATATAATGTGTGGATATGAGTGTGTATATATATACATATATAATGTGTGGATATGAGTGTGTGTATATACACACATGCATATTGTATATATGTGTGTATATATAGACACATTATATATATACGCATATATACTACACACACACACACACGGGTGTGTGTCTGTATCTTTTCCACAAATCCTTCAACCCATTTTGCAGAGGTCAAATAGACAGTCGGAAGACCCTATGCTCAGGTGACTTAAAAATAATTTCCAAATCACATTATGGAGTTTGTATGTATTACACACATTTATTGATAGAGATACCCATATTCTACTAATCTTTTATTGGCAATAATTTATGTTAAGAATACCCAAGACTGAGAAAGCCTCATTCCTTTGGTAGTGATTAAAATAAAACATACTAAATTAACTTATAGACAAGTTATAGAACATACATTTGTGAAAAAAATTACTCACCTATGATTGGGACTTTGTATTTTTACCTTATACTTACTCAATGAAATAAAATTTTGAAAAATATTCCTGTAAATGTACCAGAACCTATTTTATACCGTGATGATCCTTAACATTTCAGACGACATGGTAGTGTCAATTTAAAAAGCAGCACTATGGAGCAGGGTTTGTTATTGTTAGAAATACACATTTGTTTCACACGTCAAGGGTAAAAATTAAAAAACAAGATTAATGTTACCCAAAAGGAAACCATTCATCAGGATTCTTACCTGCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGA', 'pos': '32816643', 'alt': 'GCCAGTGGAGGATTATATTCCAAATCAAACCAAGAGTCAGTTTATGATTTCCATCTACGC'}} - assert results['NM_004006.2:c.296_476delinsGCGTAGATGGAAATCATAAACTGACTCTTGGTTTGATTTGGAATATAATCCTCCACTGGCAGGTCAAAAATGTAATGAAAAATATCATGGCTGGATTGCAACAAACCAACAGTGAAAAGATTCTCCTGAGCTGGGTCCGACAATCAACTCGTAATTATCCACAGGTTAATGTAATCAACTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_004006.2:c.296T>G' in list(results.keys()) assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.296T>G']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.296T>G']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.296T>G']['alt_genomic_loci'], []) assert results['NM_004006.2:c.296T>G']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.296T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} assert results['NM_004006.2:c.296T>G']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' @@ -7263,8 +6904,7 @@ def test_variant196(self): assert 'NM_004006.2:c.1083A>C' in list(results.keys()) assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1083A>C' assert results['NM_004006.2:c.1083A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.1083A>C']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.1083A>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.1083A>C']['alt_genomic_loci'], []) assert results['NM_004006.2:c.1083A>C']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.1083A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Thr361=)', 'slr': 'NP_003997.1:p.(T361=)'} assert results['NM_004006.2:c.1083A>C']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' @@ -7287,8 +6927,7 @@ def test_variant197(self): assert 'NM_004006.2:c.1408del' in list(results.keys()) assert results['NM_004006.2:c.1408del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1408del' assert results['NM_004006.2:c.1408del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.1408del']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.1408del']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.1408del']['alt_genomic_loci'], []) assert results['NM_004006.2:c.1408del']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.1408del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Arg470GlufsTer17)', 'slr': 'NP_003997.1:p.(R470Efs*17)'} assert results['NM_004006.2:c.1408del']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' @@ -7306,8 +6945,7 @@ def test_variant197(self): assert 'NM_004006.2:c.296T>G' in list(results.keys()) assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.296T>G']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.296T>G']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.296T>G']['alt_genomic_loci'], []) assert results['NM_004006.2:c.296T>G']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.296T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} assert results['NM_004006.2:c.296T>G']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' @@ -7324,8 +6962,7 @@ def test_variant197(self): assert 'NM_004006.2:c.476T>C' in list(results.keys()) assert results['NM_004006.2:c.476T>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.476T>C' assert results['NM_004006.2:c.476T>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.476T>C']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.476T>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.476T>C']['alt_genomic_loci'], []) assert results['NM_004006.2:c.476T>C']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.476T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Phe159Ser)', 'slr': 'NP_003997.1:p.(F159S)'} assert results['NM_004006.2:c.476T>C']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' @@ -7342,8 +6979,7 @@ def test_variant197(self): assert 'NM_004006.2:c.1083A>C' in list(results.keys()) assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1083A>C' assert results['NM_004006.2:c.1083A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.1083A>C']['alt_genomic_loci'] == [] - assert results['NM_004006.2:c.1083A>C']['transcript_description'] == 'Homo sapiens dystrophin (DMD), transcript variant Dp427m, mRNA' + self.assertCountEqual(results['NM_004006.2:c.1083A>C']['alt_genomic_loci'], []) assert results['NM_004006.2:c.1083A>C']['gene_symbol'] == 'DMD' assert results['NM_004006.2:c.1083A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Thr361=)', 'slr': 'NP_003997.1:p.(T361=)'} assert results['NM_004006.2:c.1083A>C']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' @@ -7366,8 +7002,7 @@ def test_variant198(self): assert 'validation_warning_1' in list(results.keys()) assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - assert results['validation_warning_1']['alt_genomic_loci'] == [] - assert results['validation_warning_1']['transcript_description'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert results['validation_warning_1']['gene_symbol'] == '' assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'LRG_199t1:c.[976-20T>A;976-17_976-1dup]' @@ -7391,8 +7026,7 @@ def test_variant199(self): assert 'NM_015102.3:c.2818-2T>A' in list(results.keys()) assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_015102.3:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == 'NG_011724.2(NM_015102.3):c.2818-2A=' - assert results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'] == [] - assert results['NM_015102.3:c.2818-2T>A']['transcript_description'] == 'Homo sapiens nephronophthisis 4 (NPHP4), mRNA' + self.assertCountEqual(results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'], []) assert results['NM_015102.3:c.2818-2T>A']['gene_symbol'] == 'NPHP4' assert results['NM_015102.3:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} assert results['NM_015102.3:c.2818-2T>A']['submitted_variant'] == '1-5935162-A-T' @@ -7409,8 +7043,7 @@ def test_variant199(self): assert 'NM_001291593.1:c.1279-2T>A' in list(results.keys()) assert results['NM_001291593.1:c.1279-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291593.1:c.1279-2T>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001291593.1:c.1279-2T>A']['alt_genomic_loci'] == [] - assert results['NM_001291593.1:c.1279-2T>A']['transcript_description'] == 'Homo sapiens nephrocystin 4 (NPHP4), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001291593.1:c.1279-2T>A']['alt_genomic_loci'], []) assert results['NM_001291593.1:c.1279-2T>A']['gene_symbol'] == 'NPHP4' assert results['NM_001291593.1:c.1279-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278522.1:p.?', 'slr': 'NP_001278522.1:p.?'} assert results['NM_001291593.1:c.1279-2T>A']['submitted_variant'] == '1-5935162-A-T' @@ -7427,8 +7060,7 @@ def test_variant199(self): assert 'NM_015102.4:c.2818-2T>A' in list(results.keys()) assert results['NM_015102.4:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_015102.4:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_015102.4:c.2818-2T>A']['alt_genomic_loci'] == [] - assert results['NM_015102.4:c.2818-2T>A']['transcript_description'] == 'Homo sapiens nephrocystin 4 (NPHP4), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_015102.4:c.2818-2T>A']['alt_genomic_loci'], []) assert results['NM_015102.4:c.2818-2T>A']['gene_symbol'] == 'NPHP4' assert results['NM_015102.4:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} assert results['NM_015102.4:c.2818-2T>A']['submitted_variant'] == '1-5935162-A-T' @@ -7445,8 +7077,7 @@ def test_variant199(self): assert 'NM_001291594.1:c.1282-2T>A' in list(results.keys()) assert results['NM_001291594.1:c.1282-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291594.1:c.1282-2T>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001291594.1:c.1282-2T>A']['alt_genomic_loci'] == [] - assert results['NM_001291594.1:c.1282-2T>A']['transcript_description'] == 'Homo sapiens nephrocystin 4 (NPHP4), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001291594.1:c.1282-2T>A']['alt_genomic_loci'], []) assert results['NM_001291594.1:c.1282-2T>A']['gene_symbol'] == 'NPHP4' assert results['NM_001291594.1:c.1282-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278523.1:p.?', 'slr': 'NP_001278523.1:p.?'} assert results['NM_001291594.1:c.1282-2T>A']['submitted_variant'] == '1-5935162-A-T' @@ -7464,8 +7095,7 @@ def test_variant199(self): assert 'NR_111987.1:n.3633-2T>A' in list(results.keys()) assert results['NR_111987.1:n.3633-2T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_111987.1:n.3633-2T>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_111987.1:n.3633-2T>A']['alt_genomic_loci'] == [] - assert results['NR_111987.1:n.3633-2T>A']['transcript_description'] == 'Homo sapiens nephrocystin 4 (NPHP4), transcript variant 4, non-coding RNA' + self.assertCountEqual(results['NR_111987.1:n.3633-2T>A']['alt_genomic_loci'], []) assert results['NR_111987.1:n.3633-2T>A']['gene_symbol'] == 'NPHP4' assert results['NR_111987.1:n.3633-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_111987.1:n.3633-2T>A']['submitted_variant'] == '1-5935162-A-T' @@ -7489,8 +7119,7 @@ def test_variant200(self): assert 'NM_001127660.1:c.1676C>T' in list(results.keys()) assert results['NM_001127660.1:c.1676C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001127660.1:c.1676C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001127660.1:c.1676C>T']['alt_genomic_loci'] == [] - assert results['NM_001127660.1:c.1676C>T']['transcript_description'] == 'Homo sapiens mitofusin 2 (MFN2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001127660.1:c.1676C>T']['alt_genomic_loci'], []) assert results['NM_001127660.1:c.1676C>T']['gene_symbol'] == 'MFN2' assert results['NM_001127660.1:c.1676C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001121132.1:p.(Pro559Leu)', 'slr': 'NP_001121132.1:p.(P559L)'} assert results['NM_001127660.1:c.1676C>T']['submitted_variant'] == '1-12065948-C-T' @@ -7507,8 +7136,7 @@ def test_variant200(self): assert 'NM_014874.3:c.1676C>T' in list(results.keys()) assert results['NM_014874.3:c.1676C>T']['hgvs_lrg_transcript_variant'] == 'LRG_255t1:c.1676C>T' assert results['NM_014874.3:c.1676C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014874.3:c.1676C>T']['alt_genomic_loci'] == [] - assert results['NM_014874.3:c.1676C>T']['transcript_description'] == 'Homo sapiens mitofusin 2 (MFN2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_014874.3:c.1676C>T']['alt_genomic_loci'], []) assert results['NM_014874.3:c.1676C>T']['gene_symbol'] == 'MFN2' assert results['NM_014874.3:c.1676C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055689.1(LRG_255p1):p.(Pro559Leu)', 'slr': 'NP_055689.1:p.(P559L)'} assert results['NM_014874.3:c.1676C>T']['submitted_variant'] == '1-12065948-C-T' @@ -7531,8 +7159,7 @@ def test_variant201(self): assert 'NM_001290129.1:c.1829+5_1829+8del' in list(results.keys()) assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001290129.1:c.1829+5_1829+8del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001290129.1:c.1829+5_1829+8del']['alt_genomic_loci'] == [] - assert results['NM_001290129.1:c.1829+5_1829+8del']['transcript_description'] == 'Homo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001290129.1:c.1829+5_1829+8del']['alt_genomic_loci'], []) assert results['NM_001290129.1:c.1829+5_1829+8del']['gene_symbol'] == 'POMGNT1' assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001277058.1:p.?', 'slr': 'NP_001277058.1:p.?'} assert results['NM_001290129.1:c.1829+5_1829+8del']['submitted_variant'] == '1-46655125-CTCAC-C' @@ -7549,8 +7176,7 @@ def test_variant201(self): assert 'NM_001290130.1:c.1466+5_1466+8del' in list(results.keys()) assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001290130.1:c.1466+5_1466+8del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001290130.1:c.1466+5_1466+8del']['alt_genomic_loci'] == [] - assert results['NM_001290130.1:c.1466+5_1466+8del']['transcript_description'] == 'Homo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001290130.1:c.1466+5_1466+8del']['alt_genomic_loci'], []) assert results['NM_001290130.1:c.1466+5_1466+8del']['gene_symbol'] == 'POMGNT1' assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001277059.1:p.?', 'slr': 'NP_001277059.1:p.?'} assert results['NM_001290130.1:c.1466+5_1466+8del']['submitted_variant'] == '1-46655125-CTCAC-C' @@ -7567,8 +7193,7 @@ def test_variant201(self): assert 'NM_017739.3:c.1895+5_1895+8del' in list(results.keys()) assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_lrg_transcript_variant'] == 'LRG_701t2:c.1895+5_1895+8del' assert results['NM_017739.3:c.1895+5_1895+8del']['refseqgene_context_intronic_sequence'] == 'NG_009205.2(NM_017739.3):c.1895+5_1895+8del' - assert results['NM_017739.3:c.1895+5_1895+8del']['alt_genomic_loci'] == [] - assert results['NM_017739.3:c.1895+5_1895+8del']['transcript_description'] == 'Homo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_017739.3:c.1895+5_1895+8del']['alt_genomic_loci'], []) assert results['NM_017739.3:c.1895+5_1895+8del']['gene_symbol'] == 'POMGNT1' assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_060209.3(LRG_701p2):p.?', 'slr': 'NP_060209.3:p.?'} assert results['NM_017739.3:c.1895+5_1895+8del']['submitted_variant'] == '1-46655125-CTCAC-C' @@ -7585,8 +7210,7 @@ def test_variant201(self): assert 'NM_001243766.1:c.1869+31_1869+34del' in list(results.keys()) assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_lrg_transcript_variant'] == 'LRG_701t1:c.1869+31_1869+34del' assert results['NM_001243766.1:c.1869+31_1869+34del']['refseqgene_context_intronic_sequence'] == 'NG_009205.2(NM_001243766.1):c.1869+31_1869+34del' - assert results['NM_001243766.1:c.1869+31_1869+34del']['alt_genomic_loci'] == [] - assert results['NM_001243766.1:c.1869+31_1869+34del']['transcript_description'] == 'Homo sapiens protein O-linked mannose N-acetylglucosaminyltransferase 1 (beta 1,2-) (POMGNT1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001243766.1:c.1869+31_1869+34del']['alt_genomic_loci'], []) assert results['NM_001243766.1:c.1869+31_1869+34del']['gene_symbol'] == 'POMGNT1' assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230695.1:p.?', 'slr': 'NP_001230695.1:p.?'} assert results['NM_001243766.1:c.1869+31_1869+34del']['submitted_variant'] == '1-46655125-CTCAC-C' @@ -7610,8 +7234,7 @@ def test_variant202(self): assert 'NM_000329.2:c.106_114del' in list(results.keys()) assert results['NM_000329.2:c.106_114del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000329.2:c.106_114del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000329.2:c.106_114del']['alt_genomic_loci'] == [] - assert results['NM_000329.2:c.106_114del']['transcript_description'] == 'Homo sapiens RPE65, retinoid isomerohydrolase (RPE65), mRNA' + self.assertCountEqual(results['NM_000329.2:c.106_114del']['alt_genomic_loci'], []) assert results['NM_000329.2:c.106_114del']['gene_symbol'] == 'RPE65' assert results['NM_000329.2:c.106_114del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000320.1:p.(Leu36_Leu38del)', 'slr': 'NP_000320.1:p.(L36_L38del)'} assert results['NM_000329.2:c.106_114del']['submitted_variant'] == '1-68912523-TGAGCCAGAG-T' @@ -7635,8 +7258,7 @@ def test_variant203(self): assert 'NM_000329.2:c.109_114del' in list(results.keys()) assert results['NM_000329.2:c.109_114del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000329.2:c.109_114del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000329.2:c.109_114del']['alt_genomic_loci'] == [] - assert results['NM_000329.2:c.109_114del']['transcript_description'] == 'Homo sapiens RPE65, retinoid isomerohydrolase (RPE65), mRNA' + self.assertCountEqual(results['NM_000329.2:c.109_114del']['alt_genomic_loci'], []) assert results['NM_000329.2:c.109_114del']['gene_symbol'] == 'RPE65' assert results['NM_000329.2:c.109_114del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000320.1:p.(Trp37_Leu38del)', 'slr': 'NP_000320.1:p.(W37_L38del)'} assert results['NM_000329.2:c.109_114del']['submitted_variant'] == '1-68912526-GCCAGAG-G' @@ -7661,8 +7283,7 @@ def test_variant204(self): assert 'NM_001408.2:c.*919G>T' in list(results.keys()) assert results['NM_001408.2:c.*919G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001408.2:c.*919G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001408.2:c.*919G>T']['alt_genomic_loci'] == [] - assert results['NM_001408.2:c.*919G>T']['transcript_description'] == 'Homo sapiens cadherin EGF LAG seven-pass G-type receptor 2 (CELSR2), mRNA' + self.assertCountEqual(results['NM_001408.2:c.*919G>T']['alt_genomic_loci'], []) assert results['NM_001408.2:c.*919G>T']['gene_symbol'] == 'CELSR2' assert results['NM_001408.2:c.*919G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001399.1:p.?', 'slr': 'NP_001399.1:p.?'} assert results['NM_001408.2:c.*919G>T']['submitted_variant'] == '1-109817590-G-T' @@ -7686,8 +7307,7 @@ def test_variant205(self): assert 'NM_006468.6:c.1070+35_1070+38del' in list(results.keys()) assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006468.6:c.1070+35_1070+38del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_006468.6:c.1070+35_1070+38del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}] - assert results['NM_006468.6:c.1070+35_1070+38del']['transcript_description'] == 'Homo sapiens polymerase (RNA) III (DNA directed) polypeptide C (62kD) (POLR3C), mRNA' + self.assertCountEqual(results['NM_006468.6:c.1070+35_1070+38del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}]) assert results['NM_006468.6:c.1070+35_1070+38del']['gene_symbol'] == 'POLR3C' assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006459.3:p.?', 'slr': 'NP_006459.3:p.?'} assert results['NM_006468.6:c.1070+35_1070+38del']['submitted_variant'] == '1-145597475-GAAGT-G' @@ -7704,8 +7324,7 @@ def test_variant205(self): assert 'NM_001303456.1:c.1109+35_1109+38del' in list(results.keys()) assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001303456.1:c.1109+35_1109+38del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001303456.1:c.1109+35_1109+38del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}] - assert results['NM_001303456.1:c.1109+35_1109+38del']['transcript_description'] == 'Homo sapiens RNA polymerase III subunit C (POLR3C), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001303456.1:c.1109+35_1109+38del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}]) assert results['NM_001303456.1:c.1109+35_1109+38del']['gene_symbol'] == 'POLR3C' assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001290385.1:p.?', 'slr': 'NP_001290385.1:p.?'} assert results['NM_001303456.1:c.1109+35_1109+38del']['submitted_variant'] == '1-145597475-GAAGT-G' @@ -7722,8 +7341,7 @@ def test_variant205(self): assert 'NM_006468.7:c.1070+35_1070+38del' in list(results.keys()) assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006468.7:c.1070+35_1070+38del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_006468.7:c.1070+35_1070+38del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}] - assert results['NM_006468.7:c.1070+35_1070+38del']['transcript_description'] == 'Homo sapiens RNA polymerase III subunit C (POLR3C), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_006468.7:c.1070+35_1070+38del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}]) assert results['NM_006468.7:c.1070+35_1070+38del']['gene_symbol'] == 'POLR3C' assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006459.3:p.?', 'slr': 'NP_006459.3:p.?'} assert results['NM_006468.7:c.1070+35_1070+38del']['submitted_variant'] == '1-145597475-GAAGT-G' @@ -7747,8 +7365,7 @@ def test_variant206(self): assert 'NM_020699.2:c.562_563del' in list(results.keys()) assert results['NM_020699.2:c.562_563del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020699.2:c.562_563del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020699.2:c.562_563del']['alt_genomic_loci'] == [] - assert results['NM_020699.2:c.562_563del']['transcript_description'] == 'Homo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA' + self.assertCountEqual(results['NM_020699.2:c.562_563del']['alt_genomic_loci'], []) assert results['NM_020699.2:c.562_563del']['gene_symbol'] == 'GATAD2B' assert results['NM_020699.2:c.562_563del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Gln188GlufsTer36)', 'slr': 'NP_065750.1:p.(Q188Efs*36)'} assert results['NM_020699.2:c.562_563del']['submitted_variant'] == '1-153791300-CTG-C' @@ -7765,8 +7382,7 @@ def test_variant206(self): assert 'NM_020699.3:c.562_563del' in list(results.keys()) assert results['NM_020699.3:c.562_563del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020699.3:c.562_563del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020699.3:c.562_563del']['alt_genomic_loci'] == [] - assert results['NM_020699.3:c.562_563del']['transcript_description'] == 'Homo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA' + self.assertCountEqual(results['NM_020699.3:c.562_563del']['alt_genomic_loci'], []) assert results['NM_020699.3:c.562_563del']['gene_symbol'] == 'GATAD2B' assert results['NM_020699.3:c.562_563del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Gln188GlufsTer36)', 'slr': 'NP_065750.1:p.(Q188Efs*36)'} assert results['NM_020699.3:c.562_563del']['submitted_variant'] == '1-153791300-CTG-C' @@ -7789,8 +7405,7 @@ def test_variant207(self): assert 'NM_005572.3:c.711_734delinsCCCC' in list(results.keys()) assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == 'LRG_254t1:c.711_734delinsCCCC' assert results['NM_005572.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005572.3:c.711_734delinsCCCC']['alt_genomic_loci'] == [] - assert results['NM_005572.3:c.711_734delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_005572.3:c.711_734delinsCCCC']['alt_genomic_loci'], []) assert results['NM_005572.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005563.1(LRG_254p1):p.(Glu238ProfsTer9)', 'slr': 'NP_005563.1:p.(E238Pfs*9)'} assert results['NM_005572.3:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' @@ -7807,8 +7422,7 @@ def test_variant207(self): assert 'NM_001257374.1:c.375_398delinsCCCC' in list(results.keys()) assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257374.1:c.375_398delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001257374.1:c.375_398delinsCCCC']['alt_genomic_loci'] == [] - assert results['NM_001257374.1:c.375_398delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001257374.1:c.375_398delinsCCCC']['alt_genomic_loci'], []) assert results['NM_001257374.1:c.375_398delinsCCCC']['gene_symbol'] == 'LMNA' assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Glu126ProfsTer9)', 'slr': 'NP_001244303.1:p.(E126Pfs*9)'} assert results['NM_001257374.1:c.375_398delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' @@ -7825,8 +7439,7 @@ def test_variant207(self): assert 'NM_001257374.2:c.375_398delinsCCCC' in list(results.keys()) assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257374.2:c.375_398delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001257374.2:c.375_398delinsCCCC']['alt_genomic_loci'] == [] - assert results['NM_001257374.2:c.375_398delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001257374.2:c.375_398delinsCCCC']['alt_genomic_loci'], []) assert results['NM_001257374.2:c.375_398delinsCCCC']['gene_symbol'] == 'LMNA' assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Glu126ProfsTer9)', 'slr': 'NP_001244303.1:p.(E126Pfs*9)'} assert results['NM_001257374.2:c.375_398delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' @@ -7843,8 +7456,7 @@ def test_variant207(self): assert 'NM_001282624.1:c.468_491delinsCCCC' in list(results.keys()) assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001282624.1:c.468_491delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001282624.1:c.468_491delinsCCCC']['alt_genomic_loci'] == [] - assert results['NM_001282624.1:c.468_491delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001282624.1:c.468_491delinsCCCC']['alt_genomic_loci'], []) assert results['NM_001282624.1:c.468_491delinsCCCC']['gene_symbol'] == 'LMNA' assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269553.1:p.(Glu157ProfsTer9)', 'slr': 'NP_001269553.1:p.(E157Pfs*9)'} assert results['NM_001282624.1:c.468_491delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' @@ -7862,8 +7474,7 @@ def test_variant207(self): assert 'NM_170708.3:c.711_734delinsCCCC' in list(results.keys()) assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_170708.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_170708.3:c.711_734delinsCCCC']['alt_genomic_loci'] == [] - assert results['NM_170708.3:c.711_734delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_170708.3:c.711_734delinsCCCC']['alt_genomic_loci'], []) assert results['NM_170708.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733822.1(LRG_254p3):p.(Glu238ProfsTer9)', 'slr': 'NP_733822.1:p.(E238Pfs*9)'} assert results['NM_170708.3:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' @@ -7880,8 +7491,7 @@ def test_variant207(self): assert 'NM_170707.3:c.711_734delinsCCCC' in list(results.keys()) assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_170707.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_170707.3:c.711_734delinsCCCC']['alt_genomic_loci'] == [] - assert results['NM_170707.3:c.711_734delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_170707.3:c.711_734delinsCCCC']['alt_genomic_loci'], []) assert results['NM_170707.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733821.1(LRG_254p2):p.(Glu238ProfsTer9)', 'slr': 'NP_733821.1:p.(E238Pfs*9)'} assert results['NM_170707.3:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' @@ -7898,8 +7508,7 @@ def test_variant207(self): assert 'NM_001282626.1:c.711_734delinsCCCC' in list(results.keys()) assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001282626.1:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001282626.1:c.711_734delinsCCCC']['alt_genomic_loci'] == [] - assert results['NM_001282626.1:c.711_734delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001282626.1:c.711_734delinsCCCC']['alt_genomic_loci'], []) assert results['NM_001282626.1:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269555.1:p.(Glu238ProfsTer9)', 'slr': 'NP_001269555.1:p.(E238Pfs*9)'} assert results['NM_001282626.1:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' @@ -7916,8 +7525,7 @@ def test_variant207(self): assert 'NM_001282625.1:c.711_734delinsCCCC' in list(results.keys()) assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001282625.1:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001282625.1:c.711_734delinsCCCC']['alt_genomic_loci'] == [] - assert results['NM_001282625.1:c.711_734delinsCCCC']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001282625.1:c.711_734delinsCCCC']['alt_genomic_loci'], []) assert results['NM_001282625.1:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269554.1:p.(Glu238ProfsTer9)', 'slr': 'NP_001269554.1:p.(E238Pfs*9)'} assert results['NM_001282625.1:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' @@ -7940,8 +7548,7 @@ def test_variant208(self): assert 'NM_170707.3:c.1961dup' in list(results.keys()) assert results['NM_170707.3:c.1961dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_170707.3:c.1961dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_170707.3:c.1961dup']['alt_genomic_loci'] == [] - assert results['NM_170707.3:c.1961dup']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_170707.3:c.1961dup']['alt_genomic_loci'], []) assert results['NM_170707.3:c.1961dup']['gene_symbol'] == 'LMNA' assert results['NM_170707.3:c.1961dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733821.1(LRG_254p2):p.(Thr655AsnfsTer49)', 'slr': 'NP_733821.1:p.(T655Nfs*49)'} assert results['NM_170707.3:c.1961dup']['submitted_variant'] == '1-156108541-G-GG' @@ -7958,8 +7565,7 @@ def test_variant208(self): assert 'NM_001282626.1:c.1818+143dup' in list(results.keys()) assert results['NM_001282626.1:c.1818+143dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001282626.1:c.1818+143dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001282626.1:c.1818+143dup']['alt_genomic_loci'] == [] - assert results['NM_001282626.1:c.1818+143dup']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001282626.1:c.1818+143dup']['alt_genomic_loci'], []) assert results['NM_001282626.1:c.1818+143dup']['gene_symbol'] == 'LMNA' assert results['NM_001282626.1:c.1818+143dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269555.1:p.?', 'slr': 'NP_001269555.1:p.?'} assert results['NM_001282626.1:c.1818+143dup']['submitted_variant'] == '1-156108541-G-GG' @@ -7977,8 +7583,7 @@ def test_variant208(self): assert 'NM_001257374.2:c.1625dup' in list(results.keys()) assert results['NM_001257374.2:c.1625dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257374.2:c.1625dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001257374.2:c.1625dup']['alt_genomic_loci'] == [] - assert results['NM_001257374.2:c.1625dup']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001257374.2:c.1625dup']['alt_genomic_loci'], []) assert results['NM_001257374.2:c.1625dup']['gene_symbol'] == 'LMNA' assert results['NM_001257374.2:c.1625dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Thr543AsnfsTer90)', 'slr': 'NP_001244303.1:p.(T543Nfs*90)'} assert results['NM_001257374.2:c.1625dup']['submitted_variant'] == '1-156108541-G-GG' @@ -7995,8 +7600,7 @@ def test_variant208(self): assert 'NM_170708.3:c.1871dup' in list(results.keys()) assert results['NM_170708.3:c.1871dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_170708.3:c.1871dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_170708.3:c.1871dup']['alt_genomic_loci'] == [] - assert results['NM_170708.3:c.1871dup']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_170708.3:c.1871dup']['alt_genomic_loci'], []) assert results['NM_170708.3:c.1871dup']['gene_symbol'] == 'LMNA' assert results['NM_170708.3:c.1871dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733822.1(LRG_254p3):p.(Thr625AsnfsTer49)', 'slr': 'NP_733822.1:p.(T625Nfs*49)'} assert results['NM_170708.3:c.1871dup']['submitted_variant'] == '1-156108541-G-GG' @@ -8013,8 +7617,7 @@ def test_variant208(self): assert 'NM_001257374.1:c.1625dup' in list(results.keys()) assert results['NM_001257374.1:c.1625dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257374.1:c.1625dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001257374.1:c.1625dup']['alt_genomic_loci'] == [] - assert results['NM_001257374.1:c.1625dup']['transcript_description'] == 'Homo sapiens lamin A/C (LMNA), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001257374.1:c.1625dup']['alt_genomic_loci'], []) assert results['NM_001257374.1:c.1625dup']['gene_symbol'] == 'LMNA' assert results['NM_001257374.1:c.1625dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Thr543AsnfsTer90)', 'slr': 'NP_001244303.1:p.(T543Nfs*90)'} assert results['NM_001257374.1:c.1625dup']['submitted_variant'] == '1-156108541-G-GG' @@ -8038,8 +7641,7 @@ def test_variant209(self): assert 'NM_001315491.1:c.1A>T' in list(results.keys()) assert results['NM_001315491.1:c.1A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001315491.1:c.1A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001315491.1:c.1A>T']['alt_genomic_loci'] == [] - assert results['NM_001315491.1:c.1A>T']['transcript_description'] == 'Homo sapiens myelin protein zero (MPZ), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001315491.1:c.1A>T']['alt_genomic_loci'], []) assert results['NM_001315491.1:c.1A>T']['gene_symbol'] == 'MPZ' assert results['NM_001315491.1:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001302420.1:p.(Met1?)', 'slr': 'NP_001302420.1:p.(M1?)'} assert results['NM_001315491.1:c.1A>T']['submitted_variant'] == '1-161279695-T-A' @@ -8056,8 +7658,7 @@ def test_variant209(self): assert 'NM_000530.7:c.1A>T' in list(results.keys()) assert results['NM_000530.7:c.1A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000530.7:c.1A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000530.7:c.1A>T']['alt_genomic_loci'] == [] - assert results['NM_000530.7:c.1A>T']['transcript_description'] == 'Homo sapiens myelin protein zero (MPZ), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000530.7:c.1A>T']['alt_genomic_loci'], []) assert results['NM_000530.7:c.1A>T']['gene_symbol'] == 'MPZ' assert results['NM_000530.7:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000521.2(LRG_256p1):p.(Met1?)', 'slr': 'NP_000521.2:p.(M1?)'} assert results['NM_000530.7:c.1A>T']['submitted_variant'] == '1-161279695-T-A' @@ -8074,8 +7675,7 @@ def test_variant209(self): assert 'NM_000530.6:c.1A>T' in list(results.keys()) assert results['NM_000530.6:c.1A>T']['hgvs_lrg_transcript_variant'] == 'LRG_256t1:c.1A>T' assert results['NM_000530.6:c.1A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000530.6:c.1A>T']['alt_genomic_loci'] == [] - assert results['NM_000530.6:c.1A>T']['transcript_description'] == 'Homo sapiens myelin protein zero (MPZ), mRNA' + self.assertCountEqual(results['NM_000530.6:c.1A>T']['alt_genomic_loci'], []) assert results['NM_000530.6:c.1A>T']['gene_symbol'] == 'MPZ' assert results['NM_000530.6:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000521.2(LRG_256p1):p.(Met1?)', 'slr': 'NP_000521.2:p.(M1?)'} assert results['NM_000530.6:c.1A>T']['submitted_variant'] == '1-161279695-T-A' @@ -8099,8 +7699,7 @@ def test_variant210(self): assert 'NM_000130.4:c.1601G>A' in list(results.keys()) assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601G>A' assert results['NM_000130.4:c.1601G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000130.4:c.1601G>A']['alt_genomic_loci'] == [] - assert results['NM_000130.4:c.1601G>A']['transcript_description'] == 'Homo sapiens coagulation factor V (F5), mRNA' + self.assertCountEqual(results['NM_000130.4:c.1601G>A']['alt_genomic_loci'], []) assert results['NM_000130.4:c.1601G>A']['gene_symbol'] == 'F5' assert results['NM_000130.4:c.1601G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534Gln)', 'slr': 'NP_000121.2:p.(R534Q)'} assert results['NM_000130.4:c.1601G>A']['submitted_variant'] == '1-169519049-T-T' @@ -8123,8 +7722,7 @@ def test_variant211(self): assert 'NM_003240.4:c.774C>T' in list(results.keys()) assert results['NM_003240.4:c.774C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003240.4:c.774C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003240.4:c.774C>T']['alt_genomic_loci'] == [] - assert results['NM_003240.4:c.774C>T']['transcript_description'] == 'Homo sapiens left-right determination factor 2 (LEFTY2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003240.4:c.774C>T']['alt_genomic_loci'], []) assert results['NM_003240.4:c.774C>T']['gene_symbol'] == 'LEFTY2' assert results['NM_003240.4:c.774C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003231.2:p.(Thr258=)', 'slr': 'NP_003231.2:p.(T258=)'} assert results['NM_003240.4:c.774C>T']['submitted_variant'] == '1-226125468-G-A' @@ -8141,8 +7739,7 @@ def test_variant211(self): assert 'NM_003240.3:c.774C>T' in list(results.keys()) assert results['NM_003240.3:c.774C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003240.3:c.774C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003240.3:c.774C>T']['alt_genomic_loci'] == [] - assert results['NM_003240.3:c.774C>T']['transcript_description'] == 'Homo sapiens left-right determination factor 2 (LEFTY2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003240.3:c.774C>T']['alt_genomic_loci'], []) assert results['NM_003240.3:c.774C>T']['gene_symbol'] == 'LEFTY2' assert results['NM_003240.3:c.774C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003231.2:p.(Thr258=)', 'slr': 'NP_003231.2:p.(T258=)'} assert results['NM_003240.3:c.774C>T']['submitted_variant'] == '1-226125468-G-A' @@ -8159,8 +7756,7 @@ def test_variant211(self): assert 'NM_001172425.1:c.672C>T' in list(results.keys()) assert results['NM_001172425.1:c.672C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001172425.1:c.672C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001172425.1:c.672C>T']['alt_genomic_loci'] == [] - assert results['NM_001172425.1:c.672C>T']['transcript_description'] == 'Homo sapiens left-right determination factor 2 (LEFTY2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001172425.1:c.672C>T']['alt_genomic_loci'], []) assert results['NM_001172425.1:c.672C>T']['gene_symbol'] == 'LEFTY2' assert results['NM_001172425.1:c.672C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001165896.1:p.(Thr224=)', 'slr': 'NP_001165896.1:p.(T224=)'} assert results['NM_001172425.1:c.672C>T']['submitted_variant'] == '1-226125468-G-A' @@ -8177,8 +7773,7 @@ def test_variant211(self): assert 'NM_001172425.2:c.672C>T' in list(results.keys()) assert results['NM_001172425.2:c.672C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001172425.2:c.672C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001172425.2:c.672C>T']['alt_genomic_loci'] == [] - assert results['NM_001172425.2:c.672C>T']['transcript_description'] == 'Homo sapiens left-right determination factor 2 (LEFTY2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001172425.2:c.672C>T']['alt_genomic_loci'], []) assert results['NM_001172425.2:c.672C>T']['gene_symbol'] == 'LEFTY2' assert results['NM_001172425.2:c.672C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001165896.1:p.(Thr224=)', 'slr': 'NP_001165896.1:p.(T224=)'} assert results['NM_001172425.2:c.672C>T']['submitted_variant'] == '1-226125468-G-A' @@ -8203,8 +7798,7 @@ def test_variant212(self): assert 'NM_001126049.1:c.-794_-792del' in list(results.keys()) assert results['NM_001126049.1:c.-794_-792del']['hgvs_lrg_transcript_variant'] == 'LRG_1087t1:c.-794_-792del' assert results['NM_001126049.1:c.-794_-792del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126049.1:c.-794_-792del']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'HG2334_PATCH', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'NW_013171807.1', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}] - assert results['NM_001126049.1:c.-794_-792del']['transcript_description'] == 'Homo sapiens killin, p53 regulated DNA replication inhibitor (KLLN), mRNA' + self.assertCountEqual(results['NM_001126049.1:c.-794_-792del']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'HG2334_PATCH', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'NW_013171807.1', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}]) assert results['NM_001126049.1:c.-794_-792del']['gene_symbol'] == 'KLLN' assert results['NM_001126049.1:c.-794_-792del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119521.1:p.?', 'slr': 'NP_001119521.1:p.?'} assert results['NM_001126049.1:c.-794_-792del']['submitted_variant'] == '10-89623035-CGCA-C' @@ -8227,8 +7821,7 @@ def test_variant213(self): assert 'NR_037946.1:n.3896G>T' in list(results.keys()) assert results['NR_037946.1:n.3896G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037946.1:n.3896G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_037946.1:n.3896G>T']['alt_genomic_loci'] == [] - assert results['NR_037946.1:n.3896G>T']['transcript_description'] == 'Homo sapiens HNRNPUL2-BSCL2 readthrough (NMD candidate) (HNRNPUL2-BSCL2), long non-coding RNA' + self.assertCountEqual(results['NR_037946.1:n.3896G>T']['alt_genomic_loci'], []) assert results['NR_037946.1:n.3896G>T']['gene_symbol'] == 'HNRNPUL2-BSCL2' assert results['NR_037946.1:n.3896G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_037946.1:n.3896G>T']['submitted_variant'] == '11-62457852-C-A' @@ -8245,8 +7838,7 @@ def test_variant213(self): assert 'NM_032667.6:c.1184G>T' in list(results.keys()) assert results['NM_032667.6:c.1184G>T']['hgvs_lrg_transcript_variant'] == 'LRG_235t2:c.1184G>T' assert results['NM_032667.6:c.1184G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_032667.6:c.1184G>T']['alt_genomic_loci'] == [] - assert results['NM_032667.6:c.1184G>T']['transcript_description'] == 'Homo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_032667.6:c.1184G>T']['alt_genomic_loci'], []) assert results['NM_032667.6:c.1184G>T']['gene_symbol'] == 'BSCL2' assert results['NM_032667.6:c.1184G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116056.3(LRG_235p2):p.(Cys395Phe)', 'slr': 'NP_116056.3:p.(C395F)'} assert results['NM_032667.6:c.1184G>T']['submitted_variant'] == '11-62457852-C-A' @@ -8263,8 +7855,7 @@ def test_variant213(self): assert 'NR_037949.1:n.1984G>T' in list(results.keys()) assert results['NR_037949.1:n.1984G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037949.1:n.1984G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_037949.1:n.1984G>T']['alt_genomic_loci'] == [] - assert results['NR_037949.1:n.1984G>T']['transcript_description'] == 'Homo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 5, non-coding RNA' + self.assertCountEqual(results['NR_037949.1:n.1984G>T']['alt_genomic_loci'], []) assert results['NR_037949.1:n.1984G>T']['gene_symbol'] == 'BSCL2' assert results['NR_037949.1:n.1984G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_037949.1:n.1984G>T']['submitted_variant'] == '11-62457852-C-A' @@ -8281,8 +7872,7 @@ def test_variant213(self): assert 'NR_037948.1:n.1978G>T' in list(results.keys()) assert results['NR_037948.1:n.1978G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037948.1:n.1978G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_037948.1:n.1978G>T']['alt_genomic_loci'] == [] - assert results['NR_037948.1:n.1978G>T']['transcript_description'] == 'Homo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 4, non-coding RNA' + self.assertCountEqual(results['NR_037948.1:n.1978G>T']['alt_genomic_loci'], []) assert results['NR_037948.1:n.1978G>T']['gene_symbol'] == 'BSCL2' assert results['NR_037948.1:n.1978G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_037948.1:n.1978G>T']['submitted_variant'] == '11-62457852-C-A' @@ -8299,8 +7889,7 @@ def test_variant213(self): assert 'NM_001122955.3:c.1376G>T' in list(results.keys()) assert results['NM_001122955.3:c.1376G>T']['hgvs_lrg_transcript_variant'] == 'LRG_235t1:c.1376G>T' assert results['NM_001122955.3:c.1376G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001122955.3:c.1376G>T']['alt_genomic_loci'] == [] - assert results['NM_001122955.3:c.1376G>T']['transcript_description'] == 'Homo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001122955.3:c.1376G>T']['alt_genomic_loci'], []) assert results['NM_001122955.3:c.1376G>T']['gene_symbol'] == 'BSCL2' assert results['NM_001122955.3:c.1376G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001116427.1:p.(Cys459Phe)', 'slr': 'NP_001116427.1:p.(C459F)'} assert results['NM_001122955.3:c.1376G>T']['submitted_variant'] == '11-62457852-C-A' @@ -8318,8 +7907,7 @@ def test_variant213(self): assert 'NM_001130702.2:c.*178G>T' in list(results.keys()) assert results['NM_001130702.2:c.*178G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130702.2:c.*178G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130702.2:c.*178G>T']['alt_genomic_loci'] == [] - assert results['NM_001130702.2:c.*178G>T']['transcript_description'] == 'Homo sapiens BSCL2, seipin lipid droplet biogenesis associated (BSCL2), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001130702.2:c.*178G>T']['alt_genomic_loci'], []) assert results['NM_001130702.2:c.*178G>T']['gene_symbol'] == 'BSCL2' assert results['NM_001130702.2:c.*178G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124174.2:p.?', 'slr': 'NP_001124174.2:p.?'} assert results['NM_001130702.2:c.*178G>T']['submitted_variant'] == '11-62457852-C-A' @@ -8342,8 +7930,7 @@ def test_variant214(self): assert 'NM_001351834.1:c.5761_5762insT' in list(results.keys()) assert results['NM_001351834.1:c.5761_5762insT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001351834.1:c.5761_5762insT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001351834.1:c.5761_5762insT']['alt_genomic_loci'] == [] - assert results['NM_001351834.1:c.5761_5762insT']['transcript_description'] == 'Homo sapiens ATM serine/threonine kinase (ATM), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001351834.1:c.5761_5762insT']['alt_genomic_loci'], []) assert results['NM_001351834.1:c.5761_5762insT']['gene_symbol'] == 'ATM' assert results['NM_001351834.1:c.5761_5762insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001338763.1:p.(Arg1921MetfsTer9)', 'slr': 'NP_001338763.1:p.(R1921Mfs*9)'} assert results['NM_001351834.1:c.5761_5762insT']['submitted_variant'] == '11-108178710-A-AT' @@ -8361,8 +7948,7 @@ def test_variant214(self): assert 'NM_000051.3:c.5761_5762insT' in list(results.keys()) assert results['NM_000051.3:c.5761_5762insT']['hgvs_lrg_transcript_variant'] == 'LRG_135t1:c.5761_5762insT' assert results['NM_000051.3:c.5761_5762insT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000051.3:c.5761_5762insT']['alt_genomic_loci'] == [] - assert results['NM_000051.3:c.5761_5762insT']['transcript_description'] == 'Homo sapiens ATM serine/threonine kinase (ATM), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000051.3:c.5761_5762insT']['alt_genomic_loci'], []) assert results['NM_000051.3:c.5761_5762insT']['gene_symbol'] == 'ATM' assert results['NM_000051.3:c.5761_5762insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000042.3(LRG_135p1):p.(Arg1921MetfsTer9)', 'slr': 'NP_000042.3:p.(R1921Mfs*9)'} assert results['NM_000051.3:c.5761_5762insT']['submitted_variant'] == '11-108178710-A-AT' @@ -8385,8 +7971,7 @@ def test_variant215(self): assert 'NM_001352419.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352419.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352419.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352419.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352419.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 15, mRNA' + self.assertCountEqual(results['NM_001352419.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352419.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352419.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339348.1:p.?', 'slr': 'NP_001339348.1:p.?'} assert results['NM_001352419.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8403,8 +7988,7 @@ def test_variant215(self): assert 'NM_001352412.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352412.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352412.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352412.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352412.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_001352412.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352412.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352412.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339341.1:p.?', 'slr': 'NP_001339341.1:p.?'} assert results['NM_001352412.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8421,8 +8005,7 @@ def test_variant215(self): assert 'NM_001077692.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001077692.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077692.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001077692.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001077692.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001077692.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001077692.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001077692.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071160.1:p.?', 'slr': 'NP_001071160.1:p.?'} assert results['NM_001077692.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8439,8 +8022,7 @@ def test_variant215(self): assert 'NM_001352418.1:c.406-7C>T' in list(results.keys()) assert results['NM_001352418.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352418.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352418.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352418.1:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 14, mRNA' + self.assertCountEqual(results['NM_001352418.1:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352418.1:c.406-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352418.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339347.1:p.?', 'slr': 'NP_001339347.1:p.?'} assert results['NM_001352418.1:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8457,8 +8039,7 @@ def test_variant215(self): assert 'NM_001352423.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352423.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352423.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352423.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352423.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 19, mRNA' + self.assertCountEqual(results['NM_001352423.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352423.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352423.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339352.1:p.?', 'slr': 'NP_001339352.1:p.?'} assert results['NM_001352423.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8475,8 +8056,7 @@ def test_variant215(self): assert 'NM_001352415.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352415.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352415.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352415.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352415.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 11, mRNA' + self.assertCountEqual(results['NM_001352415.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352415.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352415.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339344.1:p.?', 'slr': 'NP_001339344.1:p.?'} assert results['NM_001352415.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8493,8 +8073,7 @@ def test_variant215(self): assert 'NM_001352421.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352421.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352421.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352421.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352421.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 17, mRNA' + self.assertCountEqual(results['NM_001352421.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352421.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352421.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339350.1:p.?', 'slr': 'NP_001339350.1:p.?'} assert results['NM_001352421.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8511,8 +8090,7 @@ def test_variant215(self): assert 'NM_001352411.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352411.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352411.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352411.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352411.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001352411.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352411.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352411.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339340.1:p.?', 'slr': 'NP_001339340.1:p.?'} assert results['NM_001352411.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8529,8 +8107,7 @@ def test_variant215(self): assert 'NR_147984.1:n.782-7C>T' in list(results.keys()) assert results['NR_147984.1:n.782-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_147984.1:n.782-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_147984.1:n.782-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NR_147984.1:n.782-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 20, non-coding RNA' + self.assertCountEqual(results['NR_147984.1:n.782-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NR_147984.1:n.782-7C>T']['gene_symbol'] == 'ALG9' assert results['NR_147984.1:n.782-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_147984.1:n.782-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8547,8 +8124,7 @@ def test_variant215(self): assert 'NM_001077691.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001077691.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077691.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001077691.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001077691.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001077691.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001077691.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001077691.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071159.1:p.?', 'slr': 'NP_001071159.1:p.?'} assert results['NM_001077691.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8565,8 +8141,7 @@ def test_variant215(self): assert 'NM_001352410.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352410.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352410.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352410.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352410.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001352410.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352410.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352410.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339339.1:p.?', 'slr': 'NP_001339339.1:p.?'} assert results['NM_001352410.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8583,8 +8158,7 @@ def test_variant215(self): assert 'NM_001077690.1:c.406-7C>T' in list(results.keys()) assert results['NM_001077690.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077690.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001077690.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001077690.1:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001077690.1:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001077690.1:c.406-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001077690.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071158.1:p.?', 'slr': 'NP_001071158.1:p.?'} assert results['NM_001077690.1:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8602,8 +8176,7 @@ def test_variant215(self): assert 'NM_001352422.1:c.-326-7C>T' in list(results.keys()) assert results['NM_001352422.1:c.-326-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352422.1:c.-326-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352422.1:c.-326-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352422.1:c.-326-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 18, mRNA' + self.assertCountEqual(results['NM_001352422.1:c.-326-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352422.1:c.-326-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352422.1:c.-326-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339351.1:p.?', 'slr': 'NP_001339351.1:p.?'} assert results['NM_001352422.1:c.-326-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8620,8 +8193,7 @@ def test_variant215(self): assert 'NM_001352416.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352416.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352416.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352416.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352416.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 12, mRNA' + self.assertCountEqual(results['NM_001352416.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352416.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352416.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339345.1:p.?', 'slr': 'NP_001339345.1:p.?'} assert results['NM_001352416.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8638,8 +8210,7 @@ def test_variant215(self): assert 'NM_001352420.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352420.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352420.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352420.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352420.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 16, mRNA' + self.assertCountEqual(results['NM_001352420.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352420.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352420.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339349.1:p.?', 'slr': 'NP_001339349.1:p.?'} assert results['NM_001352420.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8656,8 +8227,7 @@ def test_variant215(self): assert 'NM_024740.2:c.406-7C>T' in list(results.keys()) assert results['NM_024740.2:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024740.2:c.406-7C>T']['refseqgene_context_intronic_sequence'] == 'NG_009210.1(NM_024740.2):c.406-7C>T' - assert results['NM_024740.2:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_024740.2:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_024740.2:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_024740.2:c.406-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_024740.2:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_079016.2:p.?', 'slr': 'NP_079016.2:p.?'} assert results['NM_024740.2:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8674,8 +8244,7 @@ def test_variant215(self): assert 'NM_001352414.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352414.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352414.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352414.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352414.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 10, mRNA' + self.assertCountEqual(results['NM_001352414.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352414.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352414.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339343.1:p.?', 'slr': 'NP_001339343.1:p.?'} assert results['NM_001352414.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8692,8 +8261,7 @@ def test_variant215(self): assert 'NM_001352417.1:c.406-7C>T' in list(results.keys()) assert results['NM_001352417.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352417.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352417.1:c.406-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352417.1:c.406-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 13, mRNA' + self.assertCountEqual(results['NM_001352417.1:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352417.1:c.406-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352417.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339346.1:p.?', 'slr': 'NP_001339346.1:p.?'} assert results['NM_001352417.1:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8710,8 +8278,7 @@ def test_variant215(self): assert 'NM_001352409.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352409.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352409.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352409.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352409.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001352409.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352409.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352409.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339338.1:p.?', 'slr': 'NP_001339338.1:p.?'} assert results['NM_001352409.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8728,8 +8295,7 @@ def test_variant215(self): assert 'NM_001352413.1:c.-108-7C>T' in list(results.keys()) assert results['NM_001352413.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001352413.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001352413.1:c.-108-7C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}] - assert results['NM_001352413.1:c.-108-7C>T']['transcript_description'] == 'Homo sapiens ALG9, alpha-1,2-mannosyltransferase (ALG9), transcript variant 9, mRNA' + self.assertCountEqual(results['NM_001352413.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) assert results['NM_001352413.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' assert results['NM_001352413.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339342.1:p.?', 'slr': 'NP_001339342.1:p.?'} assert results['NM_001352413.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' @@ -8753,8 +8319,7 @@ def test_variant216(self): assert 'NR_037918.2:n.1184+11736G>T' in list(results.keys()) assert results['NR_037918.2:n.1184+11736G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_037918.2:n.1184+11736G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_037918.2:n.1184+11736G>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_3_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'chr12_KI270904v1_alt', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'HG1133_PATCH', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'NW_003571047.1', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'NW_003571050.1', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'chr12_GL877876v1_alt', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}] - assert results['NR_037918.2:n.1184+11736G>T']['transcript_description'] == 'Homo sapiens PRH1-PRR4 readthrough (PRH1-PRR4), long non-coding RNA' + self.assertCountEqual(results['NR_037918.2:n.1184+11736G>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_3_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'chr12_KI270904v1_alt', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'HG1133_PATCH', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'NW_003571047.1', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'NW_003571050.1', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'chr12_GL877876v1_alt', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}]) assert results['NR_037918.2:n.1184+11736G>T']['gene_symbol'] == 'PRH1-PRR4' assert results['NR_037918.2:n.1184+11736G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_037918.2:n.1184+11736G>T']['submitted_variant'] == '12-11023080-C-A' @@ -8777,8 +8342,7 @@ def test_variant217(self): assert 'NM_020297.3:c.2199-1302del' in list(results.keys()) assert results['NM_020297.3:c.2199-1302del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020297.3:c.2199-1302del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020297.3:c.2199-1302del']['alt_genomic_loci'] == [] - assert results['NM_020297.3:c.2199-1302del']['transcript_description'] == 'Homo sapiens ATP binding cassette subfamily C member 9 (ABCC9), transcript variant SUR2B, mRNA' + self.assertCountEqual(results['NM_020297.3:c.2199-1302del']['alt_genomic_loci'], []) assert results['NM_020297.3:c.2199-1302del']['gene_symbol'] == 'ABCC9' assert results['NM_020297.3:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_064693.2(LRG_377p1):p.?', 'slr': 'NP_064693.2:p.?'} assert results['NM_020297.3:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' @@ -8795,8 +8359,7 @@ def test_variant217(self): assert 'NM_005691.3:c.2199-1302del' in list(results.keys()) assert results['NM_005691.3:c.2199-1302del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005691.3:c.2199-1302del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005691.3:c.2199-1302del']['alt_genomic_loci'] == [] - assert results['NM_005691.3:c.2199-1302del']['transcript_description'] == 'Homo sapiens ATP binding cassette subfamily C member 9 (ABCC9), transcript variant SUR2A, mRNA' + self.assertCountEqual(results['NM_005691.3:c.2199-1302del']['alt_genomic_loci'], []) assert results['NM_005691.3:c.2199-1302del']['gene_symbol'] == 'ABCC9' assert results['NM_005691.3:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005682.2(LRG_377p2):p.?', 'slr': 'NP_005682.2:p.?'} assert results['NM_005691.3:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' @@ -8813,8 +8376,7 @@ def test_variant217(self): assert 'NM_020297.2:c.2199-1302del' in list(results.keys()) assert results['NM_020297.2:c.2199-1302del']['hgvs_lrg_transcript_variant'] == 'LRG_377t1:c.2199-1302del' assert results['NM_020297.2:c.2199-1302del']['refseqgene_context_intronic_sequence'] == 'NG_012819.1(NM_020297.2):c.2199-1302del' - assert results['NM_020297.2:c.2199-1302del']['alt_genomic_loci'] == [] - assert results['NM_020297.2:c.2199-1302del']['transcript_description'] == 'Homo sapiens ATP-binding cassette, sub-family C (CFTR/MRP), member 9 (ABCC9), transcript variant SUR2B, mRNA' + self.assertCountEqual(results['NM_020297.2:c.2199-1302del']['alt_genomic_loci'], []) assert results['NM_020297.2:c.2199-1302del']['gene_symbol'] == 'ABCC9' assert results['NM_020297.2:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_064693.2(LRG_377p1):p.?', 'slr': 'NP_064693.2:p.?'} assert results['NM_020297.2:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' @@ -8832,8 +8394,7 @@ def test_variant217(self): assert 'NM_005691.2:c.2199-1302del' in list(results.keys()) assert results['NM_005691.2:c.2199-1302del']['hgvs_lrg_transcript_variant'] == 'LRG_377t2:c.2199-1302del' assert results['NM_005691.2:c.2199-1302del']['refseqgene_context_intronic_sequence'] == 'NG_012819.1(NM_005691.2):c.2199-1302del' - assert results['NM_005691.2:c.2199-1302del']['alt_genomic_loci'] == [] - assert results['NM_005691.2:c.2199-1302del']['transcript_description'] == 'Homo sapiens ATP-binding cassette, sub-family C (CFTR/MRP), member 9 (ABCC9), transcript variant SUR2A, mRNA' + self.assertCountEqual(results['NM_005691.2:c.2199-1302del']['alt_genomic_loci'], []) assert results['NM_005691.2:c.2199-1302del']['gene_symbol'] == 'ABCC9' assert results['NM_005691.2:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005682.2(LRG_377p2):p.?', 'slr': 'NP_005682.2:p.?'} assert results['NM_005691.2:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' @@ -8857,8 +8418,7 @@ def test_variant218(self): assert 'NM_000424.3:c.556-2A>G' in list(results.keys()) assert results['NM_000424.3:c.556-2A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000424.3:c.556-2A>G']['refseqgene_context_intronic_sequence'] == 'NG_008297.1(NM_000424.3):c.556-2A>G' - assert results['NM_000424.3:c.556-2A>G']['alt_genomic_loci'] == [] - assert results['NM_000424.3:c.556-2A>G']['transcript_description'] == 'Homo sapiens keratin 5 (KRT5), mRNA' + self.assertCountEqual(results['NM_000424.3:c.556-2A>G']['alt_genomic_loci'], []) assert results['NM_000424.3:c.556-2A>G']['gene_symbol'] == 'KRT5' assert results['NM_000424.3:c.556-2A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000415.2:p.?', 'slr': 'NP_000415.2:p.?'} assert results['NM_000424.3:c.556-2A>G']['submitted_variant'] == '12-52912946-T-C' @@ -8881,8 +8441,7 @@ def test_variant219(self): assert 'NM_001354304.1:c.1200del' in list(results.keys()) assert results['NM_001354304.1:c.1200del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354304.1:c.1200del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001354304.1:c.1200del']['alt_genomic_loci'] == [] - assert results['NM_001354304.1:c.1200del']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001354304.1:c.1200del']['alt_genomic_loci'], []) assert results['NM_001354304.1:c.1200del']['gene_symbol'] == 'PAH' assert results['NM_001354304.1:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341233.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_001341233.1:p.(N401Tfs*51)'} assert results['NM_001354304.1:c.1200del']['submitted_variant'] == '12-103234292-TC-T' @@ -8899,8 +8458,7 @@ def test_variant219(self): assert 'NM_000277.2:c.1200del' in list(results.keys()) assert results['NM_000277.2:c.1200del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000277.2:c.1200del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000277.2:c.1200del']['alt_genomic_loci'] == [] - assert results['NM_000277.2:c.1200del']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000277.2:c.1200del']['alt_genomic_loci'], []) assert results['NM_000277.2:c.1200del']['gene_symbol'] == 'PAH' assert results['NM_000277.2:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_000268.1:p.(N401Tfs*51)'} assert results['NM_000277.2:c.1200del']['submitted_variant'] == '12-103234292-TC-T' @@ -8918,8 +8476,7 @@ def test_variant219(self): assert 'NM_000277.1:c.1200del' in list(results.keys()) assert results['NM_000277.1:c.1200del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000277.1:c.1200del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000277.1:c.1200del']['alt_genomic_loci'] == [] - assert results['NM_000277.1:c.1200del']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), mRNA' + self.assertCountEqual(results['NM_000277.1:c.1200del']['alt_genomic_loci'], []) assert results['NM_000277.1:c.1200del']['gene_symbol'] == 'PAH' assert results['NM_000277.1:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_000268.1:p.(N401Tfs*51)'} assert results['NM_000277.1:c.1200del']['submitted_variant'] == '12-103234292-TC-T' @@ -8942,8 +8499,7 @@ def test_variant220(self): assert 'NM_001354304.1:c.-95-121A>G' in list(results.keys()) assert results['NM_001354304.1:c.-95-121A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354304.1:c.-95-121A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001354304.1:c.-95-121A>G']['alt_genomic_loci'] == [] - assert results['NM_001354304.1:c.-95-121A>G']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001354304.1:c.-95-121A>G']['alt_genomic_loci'], []) assert results['NM_001354304.1:c.-95-121A>G']['gene_symbol'] == 'PAH' assert results['NM_001354304.1:c.-95-121A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341233.1:p.?', 'slr': 'NP_001341233.1:p.?'} assert results['NM_001354304.1:c.-95-121A>G']['submitted_variant'] == '12-103311124-T-C' @@ -8961,8 +8517,7 @@ def test_variant220(self): assert 'NM_000277.2:c.-216A>G' in list(results.keys()) assert results['NM_000277.2:c.-216A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000277.2:c.-216A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000277.2:c.-216A>G']['alt_genomic_loci'] == [] - assert results['NM_000277.2:c.-216A>G']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000277.2:c.-216A>G']['alt_genomic_loci'], []) assert results['NM_000277.2:c.-216A>G']['gene_symbol'] == 'PAH' assert results['NM_000277.2:c.-216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.?', 'slr': 'NP_000268.1:p.?'} assert results['NM_000277.2:c.-216A>G']['submitted_variant'] == '12-103311124-T-C' @@ -8979,8 +8534,7 @@ def test_variant220(self): assert 'NM_000277.1:c.-215A>G' in list(results.keys()) assert results['NM_000277.1:c.-215A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000277.1:c.-215A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000277.1:c.-215A>G']['alt_genomic_loci'] == [] - assert results['NM_000277.1:c.-215A>G']['transcript_description'] == 'Homo sapiens phenylalanine hydroxylase (PAH), mRNA' + self.assertCountEqual(results['NM_000277.1:c.-215A>G']['alt_genomic_loci'], []) assert results['NM_000277.1:c.-215A>G']['gene_symbol'] == 'PAH' assert results['NM_000277.1:c.-215A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.?', 'slr': 'NP_000268.1:p.?'} assert results['NM_000277.1:c.-215A>G']['submitted_variant'] == '12-103311124-T-C' @@ -9003,8 +8557,7 @@ def test_variant221(self): assert 'NM_001319681.1:c.-366-1G>A' in list(results.keys()) assert results['NM_001319681.1:c.-366-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001319681.1:c.-366-1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001319681.1:c.-366-1G>A']['alt_genomic_loci'] == [] - assert results['NM_001319681.1:c.-366-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001319681.1:c.-366-1G>A']['alt_genomic_loci'], []) assert results['NM_001319681.1:c.-366-1G>A']['gene_symbol'] == 'TCTN1' assert results['NM_001319681.1:c.-366-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306610.1:p.?', 'slr': 'NP_001306610.1:p.?'} assert results['NM_001319681.1:c.-366-1G>A']['submitted_variant'] == '12-111064166-G-A' @@ -9021,8 +8574,7 @@ def test_variant221(self): assert 'NM_001319680.1:c.342-1G>A' in list(results.keys()) assert results['NM_001319680.1:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001319680.1:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001319680.1:c.342-1G>A']['alt_genomic_loci'] == [] - assert results['NM_001319680.1:c.342-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001319680.1:c.342-1G>A']['alt_genomic_loci'], []) assert results['NM_001319680.1:c.342-1G>A']['gene_symbol'] == 'TCTN1' assert results['NM_001319680.1:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306609.1:p.?', 'slr': 'NP_001306609.1:p.?'} assert results['NM_001319680.1:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' @@ -9039,8 +8591,7 @@ def test_variant221(self): assert 'NM_001082538.2:c.342-1G>A' in list(results.keys()) assert results['NM_001082538.2:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001082538.2:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001082538.2:c.342-1G>A']['alt_genomic_loci'] == [] - assert results['NM_001082538.2:c.342-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001082538.2:c.342-1G>A']['alt_genomic_loci'], []) assert results['NM_001082538.2:c.342-1G>A']['gene_symbol'] == 'TCTN1' assert results['NM_001082538.2:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001076007.1:p.?', 'slr': 'NP_001076007.1:p.?'} assert results['NM_001082538.2:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' @@ -9057,8 +8608,7 @@ def test_variant221(self): assert 'NM_001173976.1:c.162-1G>A' in list(results.keys()) assert results['NM_001173976.1:c.162-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001173976.1:c.162-1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001173976.1:c.162-1G>A']['alt_genomic_loci'] == [] - assert results['NM_001173976.1:c.162-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001173976.1:c.162-1G>A']['alt_genomic_loci'], []) assert results['NM_001173976.1:c.162-1G>A']['gene_symbol'] == 'TCTN1' assert results['NM_001173976.1:c.162-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167447.1:p.?', 'slr': 'NP_001167447.1:p.?'} assert results['NM_001173976.1:c.162-1G>A']['submitted_variant'] == '12-111064166-G-A' @@ -9076,8 +8626,7 @@ def test_variant221(self): assert 'NM_001082537.2:c.342-1G>A' in list(results.keys()) assert results['NM_001082537.2:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001082537.2:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001082537.2:c.342-1G>A']['alt_genomic_loci'] == [] - assert results['NM_001082537.2:c.342-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001082537.2:c.342-1G>A']['alt_genomic_loci'], []) assert results['NM_001082537.2:c.342-1G>A']['gene_symbol'] == 'TCTN1' assert results['NM_001082537.2:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001076006.1:p.?', 'slr': 'NP_001076006.1:p.?'} assert results['NM_001082537.2:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' @@ -9094,8 +8643,7 @@ def test_variant221(self): assert 'NR_135088.1:n.559-1G>A' in list(results.keys()) assert results['NR_135088.1:n.559-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_135088.1:n.559-1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_135088.1:n.559-1G>A']['alt_genomic_loci'] == [] - assert results['NR_135088.1:n.559-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 9, non-coding RNA' + self.assertCountEqual(results['NR_135088.1:n.559-1G>A']['alt_genomic_loci'], []) assert results['NR_135088.1:n.559-1G>A']['gene_symbol'] == 'TCTN1' assert results['NR_135088.1:n.559-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_135088.1:n.559-1G>A']['submitted_variant'] == '12-111064166-G-A' @@ -9112,8 +8660,7 @@ def test_variant221(self): assert 'NM_024549.5:c.342-1G>A' in list(results.keys()) assert results['NM_024549.5:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024549.5:c.342-1G>A']['refseqgene_context_intronic_sequence'] == 'NG_030381.1(NM_024549.5):c.342-1G>A' - assert results['NM_024549.5:c.342-1G>A']['alt_genomic_loci'] == [] - assert results['NM_024549.5:c.342-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_024549.5:c.342-1G>A']['alt_genomic_loci'], []) assert results['NM_024549.5:c.342-1G>A']['gene_symbol'] == 'TCTN1' assert results['NM_024549.5:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_078825.2:p.?', 'slr': 'NP_078825.2:p.?'} assert results['NM_024549.5:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' @@ -9130,8 +8677,7 @@ def test_variant221(self): assert 'NM_001173975.2:c.174-1G>A' in list(results.keys()) assert results['NM_001173975.2:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001173975.2:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001173975.2:c.174-1G>A']['alt_genomic_loci'] == [] - assert results['NM_001173975.2:c.174-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001173975.2:c.174-1G>A']['alt_genomic_loci'], []) assert results['NM_001173975.2:c.174-1G>A']['gene_symbol'] == 'TCTN1' assert results['NM_001173975.2:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167446.1:p.?', 'slr': 'NP_001167446.1:p.?'} assert results['NM_001173975.2:c.174-1G>A']['submitted_variant'] == '12-111064166-G-A' @@ -9148,8 +8694,7 @@ def test_variant221(self): assert 'NM_001173975.1:c.174-1G>A' in list(results.keys()) assert results['NM_001173975.1:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001173975.1:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001173975.1:c.174-1G>A']['alt_genomic_loci'] == [] - assert results['NM_001173975.1:c.174-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001173975.1:c.174-1G>A']['alt_genomic_loci'], []) assert results['NM_001173975.1:c.174-1G>A']['gene_symbol'] == 'TCTN1' assert results['NM_001173975.1:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167446.1:p.?', 'slr': 'NP_001167446.1:p.?'} assert results['NM_001173975.1:c.174-1G>A']['submitted_variant'] == '12-111064166-G-A' @@ -9166,8 +8711,7 @@ def test_variant221(self): assert 'NM_001319682.1:c.174-1G>A' in list(results.keys()) assert results['NM_001319682.1:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001319682.1:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001319682.1:c.174-1G>A']['alt_genomic_loci'] == [] - assert results['NM_001319682.1:c.174-1G>A']['transcript_description'] == 'Homo sapiens tectonic family member 1 (TCTN1), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_001319682.1:c.174-1G>A']['alt_genomic_loci'], []) assert results['NM_001319682.1:c.174-1G>A']['gene_symbol'] == 'TCTN1' assert results['NM_001319682.1:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306611.1:p.?', 'slr': 'NP_001306611.1:p.?'} assert results['NM_001319682.1:c.174-1G>A']['submitted_variant'] == '12-111064166-G-A' @@ -9190,8 +8734,7 @@ def test_variant222(self): assert 'NM_001194995.1:c.210del' in list(results.keys()) assert results['NM_001194995.1:c.210del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001194995.1:c.210del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001194995.1:c.210del']['alt_genomic_loci'] == [] - assert results['NM_001194995.1:c.210del']['transcript_description'] == 'Homo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001194995.1:c.210del']['alt_genomic_loci'], []) assert results['NM_001194995.1:c.210del']['gene_symbol'] == 'C12orf65' assert results['NM_001194995.1:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181924.1:p.(Gly72AlafsTer13)', 'slr': 'NP_001181924.1:p.(G72Afs*13)'} assert results['NM_001194995.1:c.210del']['submitted_variant'] == '12-123738430-CA-C' @@ -9209,8 +8752,7 @@ def test_variant222(self): assert 'NM_152269.4:c.210del' in list(results.keys()) assert results['NM_152269.4:c.210del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_152269.4:c.210del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_152269.4:c.210del']['alt_genomic_loci'] == [] - assert results['NM_152269.4:c.210del']['transcript_description'] == 'Homo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_152269.4:c.210del']['alt_genomic_loci'], []) assert results['NM_152269.4:c.210del']['gene_symbol'] == 'C12orf65' assert results['NM_152269.4:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_689482.1:p.(Gly72AlafsTer13)', 'slr': 'NP_689482.1:p.(G72Afs*13)'} assert results['NM_152269.4:c.210del']['submitted_variant'] == '12-123738430-CA-C' @@ -9227,8 +8769,7 @@ def test_variant222(self): assert 'NM_001143905.2:c.210del' in list(results.keys()) assert results['NM_001143905.2:c.210del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001143905.2:c.210del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001143905.2:c.210del']['alt_genomic_loci'] == [] - assert results['NM_001143905.2:c.210del']['transcript_description'] == 'Homo sapiens chromosome 12 open reading frame 65 (C12orf65), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001143905.2:c.210del']['alt_genomic_loci'], []) assert results['NM_001143905.2:c.210del']['gene_symbol'] == 'C12orf65' assert results['NM_001143905.2:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001137377.1:p.(Gly72AlafsTer13)', 'slr': 'NP_001137377.1:p.(G72Afs*13)'} assert results['NM_001143905.2:c.210del']['submitted_variant'] == '12-123738430-CA-C' @@ -9252,8 +8793,7 @@ def test_variant223(self): assert 'NM_194318.3:c.71-5del' in list(results.keys()) assert results['NM_194318.3:c.71-5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_194318.3:c.71-5del']['refseqgene_context_intronic_sequence'] == 'NG_011732.1(NM_194318.3):c.71-5del' - assert results['NM_194318.3:c.71-5del']['alt_genomic_loci'] == [] - assert results['NM_194318.3:c.71-5del']['transcript_description'] == 'Homo sapiens beta 3-glucosyltransferase (B3GLCT), mRNA' + self.assertCountEqual(results['NM_194318.3:c.71-5del']['alt_genomic_loci'], []) assert results['NM_194318.3:c.71-5del']['gene_symbol'] == 'B3GLCT' assert results['NM_194318.3:c.71-5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_919299.3:p.?', 'slr': 'NP_919299.3:p.?'} assert results['NM_194318.3:c.71-5del']['submitted_variant'] == '13-31789169-CT-C' @@ -9276,8 +8816,7 @@ def test_variant224(self): assert 'NR_144368.1:n.214-3552C>T' in list(results.keys()) assert results['NR_144368.1:n.214-3552C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_144368.1:n.214-3552C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_144368.1:n.214-3552C>T']['alt_genomic_loci'] == [] - assert results['NR_144368.1:n.214-3552C>T']['transcript_description'] == 'Homo sapiens uncharacterized LOC105370526 (LOC105370526), long non-coding RNA' + self.assertCountEqual(results['NR_144368.1:n.214-3552C>T']['alt_genomic_loci'], []) assert results['NR_144368.1:n.214-3552C>T']['gene_symbol'] == 'LOC105370526' assert results['NR_144368.1:n.214-3552C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_144368.1:n.214-3552C>T']['submitted_variant'] == '14-62187287-G-A' @@ -9294,8 +8833,7 @@ def test_variant224(self): assert 'NM_181054.2:c.223G>A' in list(results.keys()) assert results['NM_181054.2:c.223G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181054.2:c.223G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_181054.2:c.223G>A']['alt_genomic_loci'] == [] - assert results['NM_181054.2:c.223G>A']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_181054.2:c.223G>A']['alt_genomic_loci'], []) assert results['NM_181054.2:c.223G>A']['gene_symbol'] == 'HIF1A' assert results['NM_181054.2:c.223G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_851397.1:p.(Ala75Thr)', 'slr': 'NP_851397.1:p.(A75T)'} assert results['NM_181054.2:c.223G>A']['submitted_variant'] == '14-62187287-G-A' @@ -9313,8 +8851,7 @@ def test_variant224(self): assert 'NM_001243084.1:c.295G>A' in list(results.keys()) assert results['NM_001243084.1:c.295G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001243084.1:c.295G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001243084.1:c.295G>A']['alt_genomic_loci'] == [] - assert results['NM_001243084.1:c.295G>A']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001243084.1:c.295G>A']['alt_genomic_loci'], []) assert results['NM_001243084.1:c.295G>A']['gene_symbol'] == 'HIF1A' assert results['NM_001243084.1:c.295G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230013.1:p.(Ala99Thr)', 'slr': 'NP_001230013.1:p.(A99T)'} assert results['NM_001243084.1:c.295G>A']['submitted_variant'] == '14-62187287-G-A' @@ -9331,8 +8868,7 @@ def test_variant224(self): assert 'NM_001530.3:c.223G>A' in list(results.keys()) assert results['NM_001530.3:c.223G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001530.3:c.223G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001530.3:c.223G>A']['alt_genomic_loci'] == [] - assert results['NM_001530.3:c.223G>A']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001530.3:c.223G>A']['alt_genomic_loci'], []) assert results['NM_001530.3:c.223G>A']['gene_symbol'] == 'HIF1A' assert results['NM_001530.3:c.223G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001521.1:p.(Ala75Thr)', 'slr': 'NP_001521.1:p.(A75T)'} assert results['NM_001530.3:c.223G>A']['submitted_variant'] == '14-62187287-G-A' @@ -9355,8 +8891,7 @@ def test_variant225(self): assert 'NR_144368.1:n.214-4497_214-4496delinsTC' in list(results.keys()) assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_lrg_transcript_variant'] == '' assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['alt_genomic_loci'] == [] - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['transcript_description'] == 'Homo sapiens uncharacterized LOC105370526 (LOC105370526), long non-coding RNA' + self.assertCountEqual(results['NR_144368.1:n.214-4497_214-4496delinsTC']['alt_genomic_loci'], []) assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['gene_symbol'] == 'LOC105370526' assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['submitted_variant'] == '14-62188231-TT-GA' @@ -9373,8 +8908,7 @@ def test_variant225(self): assert 'NM_001530.3:c.231_232delinsGA' in list(results.keys()) assert results['NM_001530.3:c.231_232delinsGA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001530.3:c.231_232delinsGA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001530.3:c.231_232delinsGA']['alt_genomic_loci'] == [] - assert results['NM_001530.3:c.231_232delinsGA']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001530.3:c.231_232delinsGA']['alt_genomic_loci'], []) assert results['NM_001530.3:c.231_232delinsGA']['gene_symbol'] == 'HIF1A' assert results['NM_001530.3:c.231_232delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001521.1:p.(Asp77_Leu78delinsGluMet)', 'slr': 'NP_001521.1:p.(D77_L78delinsEM)'} assert results['NM_001530.3:c.231_232delinsGA']['submitted_variant'] == '14-62188231-TT-GA' @@ -9392,8 +8926,7 @@ def test_variant225(self): assert 'NM_001243084.1:c.303_304delinsGA' in list(results.keys()) assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001243084.1:c.303_304delinsGA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001243084.1:c.303_304delinsGA']['alt_genomic_loci'] == [] - assert results['NM_001243084.1:c.303_304delinsGA']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001243084.1:c.303_304delinsGA']['alt_genomic_loci'], []) assert results['NM_001243084.1:c.303_304delinsGA']['gene_symbol'] == 'HIF1A' assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230013.1:p.(Asp101_Leu102delinsGluMet)', 'slr': 'NP_001230013.1:p.(D101_L102delinsEM)'} assert results['NM_001243084.1:c.303_304delinsGA']['submitted_variant'] == '14-62188231-TT-GA' @@ -9410,8 +8943,7 @@ def test_variant225(self): assert 'NM_181054.2:c.231_232delinsGA' in list(results.keys()) assert results['NM_181054.2:c.231_232delinsGA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181054.2:c.231_232delinsGA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_181054.2:c.231_232delinsGA']['alt_genomic_loci'] == [] - assert results['NM_181054.2:c.231_232delinsGA']['transcript_description'] == 'Homo sapiens hypoxia inducible factor 1 subunit alpha (HIF1A), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_181054.2:c.231_232delinsGA']['alt_genomic_loci'], []) assert results['NM_181054.2:c.231_232delinsGA']['gene_symbol'] == 'HIF1A' assert results['NM_181054.2:c.231_232delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_851397.1:p.(Asp77_Leu78delinsGluMet)', 'slr': 'NP_851397.1:p.(D77_L78delinsEM)'} assert results['NM_181054.2:c.231_232delinsGA']['submitted_variant'] == '14-62188231-TT-GA' @@ -9434,8 +8966,7 @@ def test_variant226(self): assert 'NM_139318.3:c.2366G>T' in list(results.keys()) assert results['NM_139318.3:c.2366G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_139318.3:c.2366G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_139318.3:c.2366G>T']['alt_genomic_loci'] == [] - assert results['NM_139318.3:c.2366G>T']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel, subfamily H (eag-related), member 5 (KCNH5), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_139318.3:c.2366G>T']['alt_genomic_loci'], []) assert results['NM_139318.3:c.2366G>T']['gene_symbol'] == 'KCNH5' assert results['NM_139318.3:c.2366G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_647479.2:p.(Gly789Val)', 'slr': 'NP_647479.2:p.(G789V)'} assert results['NM_139318.3:c.2366G>T']['submitted_variant'] == '14-63174827-C-A' @@ -9452,8 +8983,7 @@ def test_variant226(self): assert 'NM_172375.1:c.*333G>T' in list(results.keys()) assert results['NM_172375.1:c.*333G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_172375.1:c.*333G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_172375.1:c.*333G>T']['alt_genomic_loci'] == [] - assert results['NM_172375.1:c.*333G>T']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel, subfamily H (eag-related), member 5 (KCNH5), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_172375.1:c.*333G>T']['alt_genomic_loci'], []) assert results['NM_172375.1:c.*333G>T']['gene_symbol'] == 'KCNH5' assert results['NM_172375.1:c.*333G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_758963.1:p.?', 'slr': 'NP_758963.1:p.?'} assert results['NM_172375.1:c.*333G>T']['submitted_variant'] == '14-63174827-C-A' @@ -9470,8 +9000,7 @@ def test_variant226(self): assert 'NM_172375.2:c.*333G>T' in list(results.keys()) assert results['NM_172375.2:c.*333G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_172375.2:c.*333G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_172375.2:c.*333G>T']['alt_genomic_loci'] == [] - assert results['NM_172375.2:c.*333G>T']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel subfamily H member 5 (KCNH5), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_172375.2:c.*333G>T']['alt_genomic_loci'], []) assert results['NM_172375.2:c.*333G>T']['gene_symbol'] == 'KCNH5' assert results['NM_172375.2:c.*333G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_758963.1:p.?', 'slr': 'NP_758963.1:p.?'} assert results['NM_172375.2:c.*333G>T']['submitted_variant'] == '14-63174827-C-A' @@ -9489,8 +9018,7 @@ def test_variant226(self): assert 'NM_139318.4:c.2366G>T' in list(results.keys()) assert results['NM_139318.4:c.2366G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_139318.4:c.2366G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_139318.4:c.2366G>T']['alt_genomic_loci'] == [] - assert results['NM_139318.4:c.2366G>T']['transcript_description'] == 'Homo sapiens potassium voltage-gated channel subfamily H member 5 (KCNH5), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_139318.4:c.2366G>T']['alt_genomic_loci'], []) assert results['NM_139318.4:c.2366G>T']['gene_symbol'] == 'KCNH5' assert results['NM_139318.4:c.2366G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_647479.2:p.(Gly789Val)', 'slr': 'NP_647479.2:p.(G789V)'} assert results['NM_139318.4:c.2366G>T']['submitted_variant'] == '14-63174827-C-A' @@ -9513,8 +9041,7 @@ def test_variant227(self): assert 'NM_000070.2:c.550del' in list(results.keys()) assert results['NM_000070.2:c.550del']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.550del' assert results['NM_000070.2:c.550del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000070.2:c.550del']['alt_genomic_loci'] == [] - assert results['NM_000070.2:c.550del']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000070.2:c.550del']['alt_genomic_loci'], []) assert results['NM_000070.2:c.550del']['gene_symbol'] == 'CAPN3' assert results['NM_000070.2:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Thr184ArgfsTer36)', 'slr': 'NP_000061.1:p.(T184Rfs*36)'} assert results['NM_000070.2:c.550del']['submitted_variant'] == '15-42680000-CA-C' @@ -9532,8 +9059,7 @@ def test_variant227(self): assert 'NM_024344.1:c.550del' in list(results.keys()) assert results['NM_024344.1:c.550del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024344.1:c.550del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_024344.1:c.550del']['alt_genomic_loci'] == [] - assert results['NM_024344.1:c.550del']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_024344.1:c.550del']['alt_genomic_loci'], []) assert results['NM_024344.1:c.550del']['gene_symbol'] == 'CAPN3' assert results['NM_024344.1:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Thr184ArgfsTer36)', 'slr': 'NP_077320.1:p.(T184Rfs*36)'} assert results['NM_024344.1:c.550del']['submitted_variant'] == '15-42680000-CA-C' @@ -9550,8 +9076,7 @@ def test_variant227(self): assert 'NM_173087.1:c.550del' in list(results.keys()) assert results['NM_173087.1:c.550del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173087.1:c.550del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_173087.1:c.550del']['alt_genomic_loci'] == [] - assert results['NM_173087.1:c.550del']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_173087.1:c.550del']['alt_genomic_loci'], []) assert results['NM_173087.1:c.550del']['gene_symbol'] == 'CAPN3' assert results['NM_173087.1:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Thr184ArgfsTer36)', 'slr': 'NP_775110.1:p.(T184Rfs*36)'} assert results['NM_173087.1:c.550del']['submitted_variant'] == '15-42680000-CA-C' @@ -9574,8 +9099,7 @@ def test_variant228(self): assert 'NM_024344.1:c.550dup' in list(results.keys()) assert results['NM_024344.1:c.550dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024344.1:c.550dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_024344.1:c.550dup']['alt_genomic_loci'] == [] - assert results['NM_024344.1:c.550dup']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_024344.1:c.550dup']['alt_genomic_loci'], []) assert results['NM_024344.1:c.550dup']['gene_symbol'] == 'CAPN3' assert results['NM_024344.1:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Thr184AsnfsTer16)', 'slr': 'NP_077320.1:p.(T184Nfs*16)'} assert results['NM_024344.1:c.550dup']['submitted_variant'] == '15-42680000-CA-CAA' @@ -9592,8 +9116,7 @@ def test_variant228(self): assert 'NM_173087.1:c.550dup' in list(results.keys()) assert results['NM_173087.1:c.550dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173087.1:c.550dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_173087.1:c.550dup']['alt_genomic_loci'] == [] - assert results['NM_173087.1:c.550dup']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_173087.1:c.550dup']['alt_genomic_loci'], []) assert results['NM_173087.1:c.550dup']['gene_symbol'] == 'CAPN3' assert results['NM_173087.1:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Thr184AsnfsTer16)', 'slr': 'NP_775110.1:p.(T184Nfs*16)'} assert results['NM_173087.1:c.550dup']['submitted_variant'] == '15-42680000-CA-CAA' @@ -9611,8 +9134,7 @@ def test_variant228(self): assert 'NM_000070.2:c.550dup' in list(results.keys()) assert results['NM_000070.2:c.550dup']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.550dup' assert results['NM_000070.2:c.550dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000070.2:c.550dup']['alt_genomic_loci'] == [] - assert results['NM_000070.2:c.550dup']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000070.2:c.550dup']['alt_genomic_loci'], []) assert results['NM_000070.2:c.550dup']['gene_symbol'] == 'CAPN3' assert results['NM_000070.2:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Thr184AsnfsTer16)', 'slr': 'NP_000061.1:p.(T184Nfs*16)'} assert results['NM_000070.2:c.550dup']['submitted_variant'] == '15-42680000-CA-CAA' @@ -9635,8 +9157,7 @@ def test_variant229(self): assert 'NM_173088.1:c.825_826insTCA' in list(results.keys()) assert results['NM_173088.1:c.825_826insTCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173088.1:c.825_826insTCA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_173088.1:c.825_826insTCA']['alt_genomic_loci'] == [] - assert results['NM_173088.1:c.825_826insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_173088.1:c.825_826insTCA']['alt_genomic_loci'], []) assert results['NM_173088.1:c.825_826insTCA']['gene_symbol'] == 'CAPN3' assert results['NM_173088.1:c.825_826insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775111.1:p.(Val275_Arg276insSer)', 'slr': 'NP_775111.1:p.(V275_R276insS)'} assert results['NM_173088.1:c.825_826insTCA']['submitted_variant'] == '15-42703179-T-TTCA' @@ -9653,8 +9174,7 @@ def test_variant229(self): assert 'NM_173090.1:c.366_367insTCA' in list(results.keys()) assert results['NM_173090.1:c.366_367insTCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173090.1:c.366_367insTCA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_173090.1:c.366_367insTCA']['alt_genomic_loci'] == [] - assert results['NM_173090.1:c.366_367insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_173090.1:c.366_367insTCA']['alt_genomic_loci'], []) assert results['NM_173090.1:c.366_367insTCA']['gene_symbol'] == 'CAPN3' assert results['NM_173090.1:c.366_367insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775113.1:p.(Val122_Arg123insSer)', 'slr': 'NP_775113.1:p.(V122_R123insS)'} assert results['NM_173090.1:c.366_367insTCA']['submitted_variant'] == '15-42703179-T-TTCA' @@ -9671,8 +9191,7 @@ def test_variant229(self): assert 'NM_173089.1:c.366_367insTCA' in list(results.keys()) assert results['NM_173089.1:c.366_367insTCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173089.1:c.366_367insTCA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_173089.1:c.366_367insTCA']['alt_genomic_loci'] == [] - assert results['NM_173089.1:c.366_367insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_173089.1:c.366_367insTCA']['alt_genomic_loci'], []) assert results['NM_173089.1:c.366_367insTCA']['gene_symbol'] == 'CAPN3' assert results['NM_173089.1:c.366_367insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775112.1:p.(Val122_Arg123insSer)', 'slr': 'NP_775112.1:p.(V122_R123insS)'} assert results['NM_173089.1:c.366_367insTCA']['submitted_variant'] == '15-42703179-T-TTCA' @@ -9689,8 +9208,7 @@ def test_variant229(self): assert 'NM_173087.1:c.2085_2086insTCA' in list(results.keys()) assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173087.1:c.2085_2086insTCA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_173087.1:c.2085_2086insTCA']['alt_genomic_loci'] == [] - assert results['NM_173087.1:c.2085_2086insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_173087.1:c.2085_2086insTCA']['alt_genomic_loci'], []) assert results['NM_173087.1:c.2085_2086insTCA']['gene_symbol'] == 'CAPN3' assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Val695_Arg696insSer)', 'slr': 'NP_775110.1:p.(V695_R696insS)'} assert results['NM_173087.1:c.2085_2086insTCA']['submitted_variant'] == '15-42703179-T-TTCA' @@ -9708,8 +9226,7 @@ def test_variant229(self): assert 'NM_000070.2:c.2361_2362insTCA' in list(results.keys()) assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.2361_2362insTCA' assert results['NM_000070.2:c.2361_2362insTCA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000070.2:c.2361_2362insTCA']['alt_genomic_loci'] == [] - assert results['NM_000070.2:c.2361_2362insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000070.2:c.2361_2362insTCA']['alt_genomic_loci'], []) assert results['NM_000070.2:c.2361_2362insTCA']['gene_symbol'] == 'CAPN3' assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Val787_Arg788insSer)', 'slr': 'NP_000061.1:p.(V787_R788insS)'} assert results['NM_000070.2:c.2361_2362insTCA']['submitted_variant'] == '15-42703179-T-TTCA' @@ -9726,8 +9243,7 @@ def test_variant229(self): assert 'NM_024344.1:c.2343_2344insTCA' in list(results.keys()) assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024344.1:c.2343_2344insTCA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_024344.1:c.2343_2344insTCA']['alt_genomic_loci'] == [] - assert results['NM_024344.1:c.2343_2344insTCA']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_024344.1:c.2343_2344insTCA']['alt_genomic_loci'], []) assert results['NM_024344.1:c.2343_2344insTCA']['gene_symbol'] == 'CAPN3' assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Val781_Arg782insSer)', 'slr': 'NP_077320.1:p.(V781_R782insS)'} assert results['NM_024344.1:c.2343_2344insTCA']['submitted_variant'] == '15-42703179-T-TTCA' @@ -9750,8 +9266,7 @@ def test_variant230(self): assert 'NM_024344.1:c.2344_2345delinsTCATCT' in list(results.keys()) assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024344.1:c.2344_2345delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['alt_genomic_loci'] == [] - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_024344.1:c.2344_2345delinsTCATCT']['alt_genomic_loci'], []) assert results['NM_024344.1:c.2344_2345delinsTCATCT']['gene_symbol'] == 'CAPN3' assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Arg782SerfsTer14)', 'slr': 'NP_077320.1:p.(R782Sfs*14)'} assert results['NM_024344.1:c.2344_2345delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' @@ -9768,8 +9283,7 @@ def test_variant230(self): assert 'NM_173090.1:c.367_368delinsTCATCT' in list(results.keys()) assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173090.1:c.367_368delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_173090.1:c.367_368delinsTCATCT']['alt_genomic_loci'] == [] - assert results['NM_173090.1:c.367_368delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_173090.1:c.367_368delinsTCATCT']['alt_genomic_loci'], []) assert results['NM_173090.1:c.367_368delinsTCATCT']['gene_symbol'] == 'CAPN3' assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775113.1:p.(Arg123SerfsTer14)', 'slr': 'NP_775113.1:p.(R123Sfs*14)'} assert results['NM_173090.1:c.367_368delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' @@ -9787,8 +9301,7 @@ def test_variant230(self): assert 'NM_000070.2:c.2362_2363delinsTCATCT' in list(results.keys()) assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.2362_2363delinsTCATCT' assert results['NM_000070.2:c.2362_2363delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['alt_genomic_loci'] == [] - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000070.2:c.2362_2363delinsTCATCT']['alt_genomic_loci'], []) assert results['NM_000070.2:c.2362_2363delinsTCATCT']['gene_symbol'] == 'CAPN3' assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Arg788SerfsTer14)', 'slr': 'NP_000061.1:p.(R788Sfs*14)'} assert results['NM_000070.2:c.2362_2363delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' @@ -9805,8 +9318,7 @@ def test_variant230(self): assert 'NM_173088.1:c.826_827delinsTCATCT' in list(results.keys()) assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173088.1:c.826_827delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_173088.1:c.826_827delinsTCATCT']['alt_genomic_loci'] == [] - assert results['NM_173088.1:c.826_827delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_173088.1:c.826_827delinsTCATCT']['alt_genomic_loci'], []) assert results['NM_173088.1:c.826_827delinsTCATCT']['gene_symbol'] == 'CAPN3' assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775111.1:p.(Arg276SerfsTer14)', 'slr': 'NP_775111.1:p.(R276Sfs*14)'} assert results['NM_173088.1:c.826_827delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' @@ -9823,8 +9335,7 @@ def test_variant230(self): assert 'NM_173089.1:c.367_368delinsTCATCT' in list(results.keys()) assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173089.1:c.367_368delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_173089.1:c.367_368delinsTCATCT']['alt_genomic_loci'] == [] - assert results['NM_173089.1:c.367_368delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_173089.1:c.367_368delinsTCATCT']['alt_genomic_loci'], []) assert results['NM_173089.1:c.367_368delinsTCATCT']['gene_symbol'] == 'CAPN3' assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775112.1:p.(Arg123SerfsTer14)', 'slr': 'NP_775112.1:p.(R123Sfs*14)'} assert results['NM_173089.1:c.367_368delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' @@ -9841,8 +9352,7 @@ def test_variant230(self): assert 'NM_173087.1:c.2086_2087delinsTCATCT' in list(results.keys()) assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_173087.1:c.2086_2087delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['alt_genomic_loci'] == [] - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['transcript_description'] == 'Homo sapiens calpain 3 (CAPN3), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_173087.1:c.2086_2087delinsTCATCT']['alt_genomic_loci'], []) assert results['NM_173087.1:c.2086_2087delinsTCATCT']['gene_symbol'] == 'CAPN3' assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Arg696SerfsTer14)', 'slr': 'NP_775110.1:p.(R696Sfs*14)'} assert results['NM_173087.1:c.2086_2087delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' @@ -9865,8 +9375,7 @@ def test_variant231(self): assert 'NM_000138.4:c.2927G>A' in list(results.keys()) assert results['NM_000138.4:c.2927G>A']['hgvs_lrg_transcript_variant'] == 'LRG_778t1:c.2927G>A' assert results['NM_000138.4:c.2927G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000138.4:c.2927G>A']['alt_genomic_loci'] == [] - assert results['NM_000138.4:c.2927G>A']['transcript_description'] == 'Homo sapiens fibrillin 1 (FBN1), mRNA' + self.assertCountEqual(results['NM_000138.4:c.2927G>A']['alt_genomic_loci'], []) assert results['NM_000138.4:c.2927G>A']['gene_symbol'] == 'FBN1' assert results['NM_000138.4:c.2927G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000129.3(LRG_778p1):p.(Arg976His)', 'slr': 'NP_000129.3:p.(R976H)'} assert results['NM_000138.4:c.2927G>A']['submitted_variant'] == '15-48782203-C-T' @@ -9890,8 +9399,7 @@ def test_variant232(self): assert 'NM_014249.2:c.946_949=' in list(results.keys()) assert results['NM_014249.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.946_949=']['alt_genomic_loci'] == [] - assert results['NM_014249.2:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.2:c.946_949=']['alt_genomic_loci'], []) assert results['NM_014249.2:c.946_949=']['gene_symbol'] == 'NR2E3' assert results['NM_014249.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} assert results['NM_014249.2:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' @@ -9908,8 +9416,7 @@ def test_variant232(self): assert 'NM_016346.3:c.946_949=' in list(results.keys()) assert results['NM_016346.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.946_949=']['alt_genomic_loci'] == [] - assert results['NM_016346.3:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.3:c.946_949=']['alt_genomic_loci'], []) assert results['NM_016346.3:c.946_949=']['gene_symbol'] == 'NR2E3' assert results['NM_016346.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} assert results['NM_016346.3:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' @@ -9927,8 +9434,7 @@ def test_variant232(self): assert 'NM_014249.3:c.946_949=' in list(results.keys()) assert results['NM_014249.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014249.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.946_949=']['alt_genomic_loci'] == [] - assert results['NM_014249.3:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2 group E member 3 (NR2E3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_014249.3:c.946_949=']['alt_genomic_loci'], []) assert results['NM_014249.3:c.946_949=']['gene_symbol'] == 'NR2E3' assert results['NM_014249.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} assert results['NM_014249.3:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' @@ -9945,8 +9451,7 @@ def test_variant232(self): assert 'NM_016346.2:c.946_949=' in list(results.keys()) assert results['NM_016346.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_016346.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.946_949=']['alt_genomic_loci'] == [] - assert results['NM_016346.2:c.946_949=']['transcript_description'] == 'Homo sapiens nuclear receptor subfamily 2, group E, member 3 (NR2E3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_016346.2:c.946_949=']['alt_genomic_loci'], []) assert results['NM_016346.2:c.946_949=']['gene_symbol'] == 'NR2E3' assert results['NM_016346.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} assert results['NM_016346.2:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' @@ -9969,8 +9474,7 @@ def test_variant233(self): assert 'NM_002693.2:c.752C>T' in list(results.keys()) assert results['NM_002693.2:c.752C>T']['hgvs_lrg_transcript_variant'] == 'LRG_765t1:c.752C>T' assert results['NM_002693.2:c.752C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_002693.2:c.752C>T']['alt_genomic_loci'] == [] - assert results['NM_002693.2:c.752C>T']['transcript_description'] == 'Homo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_002693.2:c.752C>T']['alt_genomic_loci'], []) assert results['NM_002693.2:c.752C>T']['gene_symbol'] == 'POLG' assert results['NM_002693.2:c.752C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002684.1(LRG_765p1):p.(Thr251Ile)', 'slr': 'NP_002684.1:p.(T251I)'} assert results['NM_002693.2:c.752C>T']['submitted_variant'] == '15-89873415-G-A' @@ -9988,8 +9492,7 @@ def test_variant233(self): assert 'NM_001126131.1:c.752C>T' in list(results.keys()) assert results['NM_001126131.1:c.752C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001126131.1:c.752C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126131.1:c.752C>T']['alt_genomic_loci'] == [] - assert results['NM_001126131.1:c.752C>T']['transcript_description'] == 'Homo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001126131.1:c.752C>T']['alt_genomic_loci'], []) assert results['NM_001126131.1:c.752C>T']['gene_symbol'] == 'POLG' assert results['NM_001126131.1:c.752C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119603.1:p.(Thr251Ile)', 'slr': 'NP_001119603.1:p.(T251I)'} assert results['NM_001126131.1:c.752C>T']['submitted_variant'] == '15-89873415-G-A' @@ -10012,8 +9515,7 @@ def test_variant234(self): assert 'NM_000548.3:c.277C>T' in list(results.keys()) assert results['NM_000548.3:c.277C>T']['hgvs_lrg_transcript_variant'] == 'LRG_487t1:c.277C>T' assert results['NM_000548.3:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000548.3:c.277C>T']['alt_genomic_loci'] == [] - assert results['NM_000548.3:c.277C>T']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000548.3:c.277C>T']['alt_genomic_loci'], []) assert results['NM_000548.3:c.277C>T']['gene_symbol'] == 'TSC2' assert results['NM_000548.3:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.(Arg93Trp)', 'slr': 'NP_000539.2:p.(R93W)'} assert results['NM_000548.3:c.277C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10030,8 +9532,7 @@ def test_variant234(self): assert 'NM_001318832.1:c.310C>T' in list(results.keys()) assert results['NM_001318832.1:c.310C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318832.1:c.310C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001318832.1:c.310C>T']['alt_genomic_loci'] == [] - assert results['NM_001318832.1:c.310C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 9, mRNA' + self.assertCountEqual(results['NM_001318832.1:c.310C>T']['alt_genomic_loci'], []) assert results['NM_001318832.1:c.310C>T']['gene_symbol'] == 'TSC2' assert results['NM_001318832.1:c.310C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305761.1:p.(Arg104Trp)', 'slr': 'NP_001305761.1:p.(R104W)'} assert results['NM_001318832.1:c.310C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10048,8 +9549,7 @@ def test_variant234(self): assert 'NM_001318829.1:c.130C>T' in list(results.keys()) assert results['NM_001318829.1:c.130C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318829.1:c.130C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001318829.1:c.130C>T']['alt_genomic_loci'] == [] - assert results['NM_001318829.1:c.130C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001318829.1:c.130C>T']['alt_genomic_loci'], []) assert results['NM_001318829.1:c.130C>T']['gene_symbol'] == 'TSC2' assert results['NM_001318829.1:c.130C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305758.1:p.(Arg44Trp)', 'slr': 'NP_001305758.1:p.(R44W)'} assert results['NM_001318829.1:c.130C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10066,8 +9566,7 @@ def test_variant234(self): assert 'NM_001077183.2:c.277C>T' in list(results.keys()) assert results['NM_001077183.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077183.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001077183.2:c.277C>T']['alt_genomic_loci'] == [] - assert results['NM_001077183.2:c.277C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001077183.2:c.277C>T']['alt_genomic_loci'], []) assert results['NM_001077183.2:c.277C>T']['gene_symbol'] == 'TSC2' assert results['NM_001077183.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.(Arg93Trp)', 'slr': 'NP_001070651.1:p.(R93W)'} assert results['NM_001077183.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10084,8 +9583,7 @@ def test_variant234(self): assert 'NM_001114382.1:c.277C>T' in list(results.keys()) assert results['NM_001114382.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001114382.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001114382.1:c.277C>T']['alt_genomic_loci'] == [] - assert results['NM_001114382.1:c.277C>T']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001114382.1:c.277C>T']['alt_genomic_loci'], []) assert results['NM_001114382.1:c.277C>T']['gene_symbol'] == 'TSC2' assert results['NM_001114382.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.(Arg93Trp)', 'slr': 'NP_001107854.1:p.(R93W)'} assert results['NM_001114382.1:c.277C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10102,8 +9600,7 @@ def test_variant234(self): assert 'NM_001077183.1:c.277C>T' in list(results.keys()) assert results['NM_001077183.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001077183.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001077183.1:c.277C>T']['alt_genomic_loci'] == [] - assert results['NM_001077183.1:c.277C>T']['transcript_description'] == 'Homo sapiens tuberous sclerosis 2 (TSC2), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001077183.1:c.277C>T']['alt_genomic_loci'], []) assert results['NM_001077183.1:c.277C>T']['gene_symbol'] == 'TSC2' assert results['NM_001077183.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.(Arg93Trp)', 'slr': 'NP_001070651.1:p.(R93W)'} assert results['NM_001077183.1:c.277C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10120,8 +9617,7 @@ def test_variant234(self): assert 'NM_001318827.1:c.226-903C>T' in list(results.keys()) assert results['NM_001318827.1:c.226-903C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318827.1:c.226-903C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001318827.1:c.226-903C>T']['alt_genomic_loci'] == [] - assert results['NM_001318827.1:c.226-903C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001318827.1:c.226-903C>T']['alt_genomic_loci'], []) assert results['NM_001318827.1:c.226-903C>T']['gene_symbol'] == 'TSC2' assert results['NM_001318827.1:c.226-903C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305756.1:p.?', 'slr': 'NP_001305756.1:p.?'} assert results['NM_001318827.1:c.226-903C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10139,8 +9635,7 @@ def test_variant234(self): assert 'NM_001114382.2:c.277C>T' in list(results.keys()) assert results['NM_001114382.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001114382.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001114382.2:c.277C>T']['alt_genomic_loci'] == [] - assert results['NM_001114382.2:c.277C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001114382.2:c.277C>T']['alt_genomic_loci'], []) assert results['NM_001114382.2:c.277C>T']['gene_symbol'] == 'TSC2' assert results['NM_001114382.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.(Arg93Trp)', 'slr': 'NP_001107854.1:p.(R93W)'} assert results['NM_001114382.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10157,8 +9652,7 @@ def test_variant234(self): assert 'NM_001363528.1:c.277C>T' in list(results.keys()) assert results['NM_001363528.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363528.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363528.1:c.277C>T']['alt_genomic_loci'] == [] - assert results['NM_001363528.1:c.277C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 10, mRNA' + self.assertCountEqual(results['NM_001363528.1:c.277C>T']['alt_genomic_loci'], []) assert results['NM_001363528.1:c.277C>T']['gene_symbol'] == 'TSC2' assert results['NM_001363528.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350457.1:p.(Arg93Trp)', 'slr': 'NP_001350457.1:p.(R93W)'} assert results['NM_001363528.1:c.277C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10175,8 +9669,7 @@ def test_variant234(self): assert 'NM_021055.2:c.277C>T' in list(results.keys()) assert results['NM_021055.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021055.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021055.2:c.277C>T']['alt_genomic_loci'] == [] - assert results['NM_021055.2:c.277C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_021055.2:c.277C>T']['alt_genomic_loci'], []) assert results['NM_021055.2:c.277C>T']['gene_symbol'] == 'TSC2' assert results['NM_021055.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066399.2:p.(Arg93Trp)', 'slr': 'NP_066399.2:p.(R93W)'} assert results['NM_021055.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10193,8 +9686,7 @@ def test_variant234(self): assert 'NM_000548.4:c.277C>T' in list(results.keys()) assert results['NM_000548.4:c.277C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000548.4:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000548.4:c.277C>T']['alt_genomic_loci'] == [] - assert results['NM_000548.4:c.277C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000548.4:c.277C>T']['alt_genomic_loci'], []) assert results['NM_000548.4:c.277C>T']['gene_symbol'] == 'TSC2' assert results['NM_000548.4:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.(Arg93Trp)', 'slr': 'NP_000539.2:p.(R93W)'} assert results['NM_000548.4:c.277C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10211,8 +9703,7 @@ def test_variant234(self): assert 'NM_001318831.1:c.-1-2803C>T' in list(results.keys()) assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001318831.1:c.-1-2803C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001318831.1:c.-1-2803C>T']['alt_genomic_loci'] == [] - assert results['NM_001318831.1:c.-1-2803C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 2 (TSC2), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_001318831.1:c.-1-2803C>T']['alt_genomic_loci'], []) assert results['NM_001318831.1:c.-1-2803C>T']['gene_symbol'] == 'TSC2' assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305760.1:p.?', 'slr': 'NP_001305760.1:p.?'} assert results['NM_001318831.1:c.-1-2803C>T']['submitted_variant'] == '16-2103394-C-T' @@ -10236,8 +9727,7 @@ def test_variant235(self): assert 'NM_001079846.1:c.5634G>C' in list(results.keys()) assert results['NM_001079846.1:c.5634G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001079846.1:c.5634G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001079846.1:c.5634G>C']['alt_genomic_loci'] == [] - assert results['NM_001079846.1:c.5634G>C']['transcript_description'] == 'Homo sapiens CREB binding protein (CREBBP), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001079846.1:c.5634G>C']['alt_genomic_loci'], []) assert results['NM_001079846.1:c.5634G>C']['gene_symbol'] == 'CREBBP' assert results['NM_001079846.1:c.5634G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073315.1:p.(Met1878Ile)', 'slr': 'NP_001073315.1:p.(M1878I)'} assert results['NM_001079846.1:c.5634G>C']['submitted_variant'] == '16-3779300-C-G' @@ -10254,8 +9744,7 @@ def test_variant235(self): assert 'NM_004380.2:c.5748G>C' in list(results.keys()) assert results['NM_004380.2:c.5748G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004380.2:c.5748G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004380.2:c.5748G>C']['alt_genomic_loci'] == [] - assert results['NM_004380.2:c.5748G>C']['transcript_description'] == 'Homo sapiens CREB binding protein (CREBBP), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_004380.2:c.5748G>C']['alt_genomic_loci'], []) assert results['NM_004380.2:c.5748G>C']['gene_symbol'] == 'CREBBP' assert results['NM_004380.2:c.5748G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004371.2:p.(Met1916Ile)', 'slr': 'NP_004371.2:p.(M1916I)'} assert results['NM_004380.2:c.5748G>C']['submitted_variant'] == '16-3779300-C-G' @@ -10278,8 +9767,7 @@ def test_variant236(self): assert 'NM_001330504.1:c.493C>G' in list(results.keys()) assert results['NM_001330504.1:c.493C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330504.1:c.493C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330504.1:c.493C>G']['alt_genomic_loci'] == [] - assert results['NM_001330504.1:c.493C>G']['transcript_description'] == 'Homo sapiens ALG1, chitobiosyldiphosphodolichol beta-mannosyltransferase (ALG1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001330504.1:c.493C>G']['alt_genomic_loci'], []) assert results['NM_001330504.1:c.493C>G']['gene_symbol'] == 'ALG1' assert results['NM_001330504.1:c.493C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317433.1:p.(Arg165Gly)', 'slr': 'NP_001317433.1:p.(R165G)'} assert results['NM_001330504.1:c.493C>G']['submitted_variant'] == '16-5128843-C-G' @@ -10297,8 +9785,7 @@ def test_variant236(self): assert 'NM_019109.4:c.826C>G' in list(results.keys()) assert results['NM_019109.4:c.826C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_019109.4:c.826C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_019109.4:c.826C>G']['alt_genomic_loci'] == [] - assert results['NM_019109.4:c.826C>G']['transcript_description'] == 'Homo sapiens ALG1, chitobiosyldiphosphodolichol beta-mannosyltransferase (ALG1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_019109.4:c.826C>G']['alt_genomic_loci'], []) assert results['NM_019109.4:c.826C>G']['gene_symbol'] == 'ALG1' assert results['NM_019109.4:c.826C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061982.3:p.(Arg276Gly)', 'slr': 'NP_061982.3:p.(R276G)'} assert results['NM_019109.4:c.826C>G']['submitted_variant'] == '16-5128843-C-G' @@ -10321,8 +9808,7 @@ def test_variant237(self): assert 'NM_024306.4:c.95G>A' in list(results.keys()) assert results['NM_024306.4:c.95G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_024306.4:c.95G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_024306.4:c.95G>A']['alt_genomic_loci'] == [] - assert results['NM_024306.4:c.95G>A']['transcript_description'] == 'Homo sapiens fatty acid 2-hydroxylase (FA2H), mRNA' + self.assertCountEqual(results['NM_024306.4:c.95G>A']['alt_genomic_loci'], []) assert results['NM_024306.4:c.95G>A']['gene_symbol'] == 'FA2H' assert results['NM_024306.4:c.95G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077282.3:p.(Arg32His)', 'slr': 'NP_077282.3:p.(R32H)'} assert results['NM_024306.4:c.95G>A']['submitted_variant'] == '16-74808559-C-T' @@ -10346,8 +9832,7 @@ def test_variant238(self): assert 'NM_003119.3:c.-22C>A' in list(results.keys()) assert results['NM_003119.3:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.-22C>A']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.-22C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.-22C>A']['alt_genomic_loci'], []) assert results['NM_003119.3:c.-22C>A']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.3:c.-22C>A']['submitted_variant'] == '16-89574804-C-A' @@ -10365,8 +9850,7 @@ def test_variant238(self): assert 'NM_199367.2:c.-22C>A' in list(results.keys()) assert results['NM_199367.2:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.-22C>A']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.-22C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.-22C>A']['alt_genomic_loci'], []) assert results['NM_199367.2:c.-22C>A']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.2:c.-22C>A']['submitted_variant'] == '16-89574804-C-A' @@ -10383,8 +9867,7 @@ def test_variant238(self): assert 'NM_001363850.1:c.-22C>A' in list(results.keys()) assert results['NM_001363850.1:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.-22C>A']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.-22C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.-22C>A']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.-22C>A']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} assert results['NM_001363850.1:c.-22C>A']['submitted_variant'] == '16-89574804-C-A' @@ -10407,8 +9890,7 @@ def test_variant239(self): assert 'NM_003119.2:c.1A>C' in list(results.keys()) assert results['NM_003119.2:c.1A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.1A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.1A>C']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.1A>C']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.1A>C']['alt_genomic_loci'], []) assert results['NM_003119.2:c.1A>C']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Met1?)', 'slr': 'NP_003110.1:p.(M1?)'} assert results['NM_003119.2:c.1A>C']['submitted_variant'] == '16-89574826-A-C' @@ -10425,8 +9907,7 @@ def test_variant239(self): assert 'NM_199367.1:c.1A>C' in list(results.keys()) assert results['NM_199367.1:c.1A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.1A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.1A>C']['alt_genomic_loci'] == [] - assert results['NM_199367.1:c.1A>C']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.1:c.1A>C']['alt_genomic_loci'], []) assert results['NM_199367.1:c.1A>C']['gene_symbol'] == 'SPG7' assert results['NM_199367.1:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Met1?)', 'slr': 'NP_955399.1:p.(M1?)'} assert results['NM_199367.1:c.1A>C']['submitted_variant'] == '16-89574826-A-C' @@ -10443,8 +9924,7 @@ def test_variant239(self): assert 'NM_001363850.1:c.1A>C' in list(results.keys()) assert results['NM_001363850.1:c.1A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.1A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.1A>C']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.1A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.1A>C']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.1A>C']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Met1?)', 'slr': 'NP_001350779.1:p.(M1?)'} assert results['NM_001363850.1:c.1A>C']['submitted_variant'] == '16-89574826-A-C' @@ -10461,8 +9941,7 @@ def test_variant239(self): assert 'NM_199367.2:c.1A>C' in list(results.keys()) assert results['NM_199367.2:c.1A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.1A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.1A>C']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.1A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.1A>C']['alt_genomic_loci'], []) assert results['NM_199367.2:c.1A>C']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Met1?)', 'slr': 'NP_955399.1:p.(M1?)'} assert results['NM_199367.2:c.1A>C']['submitted_variant'] == '16-89574826-A-C' @@ -10480,8 +9959,7 @@ def test_variant239(self): assert 'NM_003119.3:c.1A>C' in list(results.keys()) assert results['NM_003119.3:c.1A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.1A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.1A>C']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.1A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.1A>C']['alt_genomic_loci'], []) assert results['NM_003119.3:c.1A>C']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Met1?)', 'slr': 'NP_003110.1:p.(M1?)'} assert results['NM_003119.3:c.1A>C']['submitted_variant'] == '16-89574826-A-C' @@ -10504,8 +9982,7 @@ def test_variant240(self): assert 'NM_001363850.1:c.90dup' in list(results.keys()) assert results['NM_001363850.1:c.90dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.90dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.90dup']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.90dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.90dup']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.90dup']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Pro31SerfsTer43)', 'slr': 'NP_001350779.1:p.(P31Sfs*43)'} assert results['NM_001363850.1:c.90dup']['submitted_variant'] == '16-89574914-G-GT' @@ -10522,8 +9999,7 @@ def test_variant240(self): assert 'NM_199367.1:c.90dup' in list(results.keys()) assert results['NM_199367.1:c.90dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.90dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.90dup']['alt_genomic_loci'] == [] - assert results['NM_199367.1:c.90dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.1:c.90dup']['alt_genomic_loci'], []) assert results['NM_199367.1:c.90dup']['gene_symbol'] == 'SPG7' assert results['NM_199367.1:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Pro31SerfsTer43)', 'slr': 'NP_955399.1:p.(P31Sfs*43)'} assert results['NM_199367.1:c.90dup']['submitted_variant'] == '16-89574914-G-GT' @@ -10540,8 +10016,7 @@ def test_variant240(self): assert 'NM_003119.2:c.90dup' in list(results.keys()) assert results['NM_003119.2:c.90dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.90dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.90dup']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.90dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.90dup']['alt_genomic_loci'], []) assert results['NM_003119.2:c.90dup']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Pro31SerfsTer43)', 'slr': 'NP_003110.1:p.(P31Sfs*43)'} assert results['NM_003119.2:c.90dup']['submitted_variant'] == '16-89574914-G-GT' @@ -10558,8 +10033,7 @@ def test_variant240(self): assert 'NM_199367.2:c.90dup' in list(results.keys()) assert results['NM_199367.2:c.90dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.90dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.90dup']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.90dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.90dup']['alt_genomic_loci'], []) assert results['NM_199367.2:c.90dup']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Pro31SerfsTer43)', 'slr': 'NP_955399.1:p.(P31Sfs*43)'} assert results['NM_199367.2:c.90dup']['submitted_variant'] == '16-89574914-G-GT' @@ -10577,8 +10051,7 @@ def test_variant240(self): assert 'NM_003119.3:c.90dup' in list(results.keys()) assert results['NM_003119.3:c.90dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.90dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.90dup']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.90dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.90dup']['alt_genomic_loci'], []) assert results['NM_003119.3:c.90dup']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Pro31SerfsTer43)', 'slr': 'NP_003110.1:p.(P31Sfs*43)'} assert results['NM_003119.3:c.90dup']['submitted_variant'] == '16-89574914-G-GT' @@ -10601,8 +10074,7 @@ def test_variant241(self): assert 'NM_199367.2:c.89_91dup' in list(results.keys()) assert results['NM_199367.2:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.89_91dup']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.89_91dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.89_91dup']['alt_genomic_loci'], []) assert results['NM_199367.2:c.89_91dup']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_955399.1:p.(S30_P31insR)'} assert results['NM_199367.2:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' @@ -10619,8 +10091,7 @@ def test_variant241(self): assert 'NM_003119.3:c.89_91dup' in list(results.keys()) assert results['NM_003119.3:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.89_91dup']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.89_91dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.89_91dup']['alt_genomic_loci'], []) assert results['NM_003119.3:c.89_91dup']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_003110.1:p.(S30_P31insR)'} assert results['NM_003119.3:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' @@ -10637,8 +10108,7 @@ def test_variant241(self): assert 'NM_001363850.1:c.89_91dup' in list(results.keys()) assert results['NM_001363850.1:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.89_91dup']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.89_91dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.89_91dup']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.89_91dup']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_001350779.1:p.(S30_P31insR)'} assert results['NM_001363850.1:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' @@ -10656,8 +10126,7 @@ def test_variant241(self): assert 'NM_199367.1:c.89_91dup' in list(results.keys()) assert results['NM_199367.1:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.89_91dup']['alt_genomic_loci'] == [] - assert results['NM_199367.1:c.89_91dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.1:c.89_91dup']['alt_genomic_loci'], []) assert results['NM_199367.1:c.89_91dup']['gene_symbol'] == 'SPG7' assert results['NM_199367.1:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_955399.1:p.(S30_P31insR)'} assert results['NM_199367.1:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' @@ -10674,8 +10143,7 @@ def test_variant241(self): assert 'NM_003119.2:c.89_91dup' in list(results.keys()) assert results['NM_003119.2:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.89_91dup']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.89_91dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.89_91dup']['alt_genomic_loci'], []) assert results['NM_003119.2:c.89_91dup']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_003110.1:p.(S30_P31insR)'} assert results['NM_003119.2:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' @@ -10698,8 +10166,7 @@ def test_variant242(self): assert 'NM_199367.2:c.183+1G>A' in list(results.keys()) assert results['NM_199367.2:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.183+1G>A']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.183+1G>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.183+1G>A']['alt_genomic_loci'], []) assert results['NM_199367.2:c.183+1G>A']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.2:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' @@ -10716,8 +10183,7 @@ def test_variant242(self): assert 'NM_003119.2:c.183+1G>A' in list(results.keys()) assert results['NM_003119.2:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.183+1G>A']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+1G>A' - assert results['NM_003119.2:c.183+1G>A']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.183+1G>A']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.183+1G>A']['alt_genomic_loci'], []) assert results['NM_003119.2:c.183+1G>A']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.2:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' @@ -10735,8 +10201,7 @@ def test_variant242(self): assert 'NM_199367.1:c.183+1G>A' in list(results.keys()) assert results['NM_199367.1:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.183+1G>A']['alt_genomic_loci'] == [] - assert results['NM_199367.1:c.183+1G>A']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.1:c.183+1G>A']['alt_genomic_loci'], []) assert results['NM_199367.1:c.183+1G>A']['gene_symbol'] == 'SPG7' assert results['NM_199367.1:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.1:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' @@ -10753,8 +10218,7 @@ def test_variant242(self): assert 'NM_001363850.1:c.183+1G>A' in list(results.keys()) assert results['NM_001363850.1:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.183+1G>A']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.183+1G>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.183+1G>A']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.183+1G>A']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} assert results['NM_001363850.1:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' @@ -10771,8 +10235,7 @@ def test_variant242(self): assert 'NM_003119.3:c.183+1G>A' in list(results.keys()) assert results['NM_003119.3:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.183+1G>A']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.183+1G>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.183+1G>A']['alt_genomic_loci'], []) assert results['NM_003119.3:c.183+1G>A']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.3:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' @@ -10795,8 +10258,7 @@ def test_variant243(self): assert 'NM_199367.1:c.183+32_183+33insA' in list(results.keys()) assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.183+32_183+33insA']['alt_genomic_loci'] == [] - assert results['NM_199367.1:c.183+32_183+33insA']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.1:c.183+32_183+33insA']['alt_genomic_loci'], []) assert results['NM_199367.1:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.1:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' @@ -10813,8 +10275,7 @@ def test_variant243(self): assert 'NM_001363850.1:c.183+32C>A' in list(results.keys()) assert results['NM_001363850.1:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.183+32C>A']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.183+32C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.183+32C>A']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.183+32C>A']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} assert results['NM_001363850.1:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' @@ -10831,8 +10292,7 @@ def test_variant243(self): assert 'NM_001363850.1:c.183+32_183+33insA' in list(results.keys()) assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.183+32_183+33insA']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.183+32_183+33insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.183+32_183+33insA']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} assert results['NM_001363850.1:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' @@ -10849,8 +10309,7 @@ def test_variant243(self): assert 'NM_199367.2:c.183+32C>A' in list(results.keys()) assert results['NM_199367.2:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.183+32C>A']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.183+32C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.183+32C>A']['alt_genomic_loci'], []) assert results['NM_199367.2:c.183+32C>A']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.2:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' @@ -10867,8 +10326,7 @@ def test_variant243(self): assert 'NM_003119.3:c.183+32_183+33insA' in list(results.keys()) assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.183+32_183+33insA']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.183+32_183+33insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.183+32_183+33insA']['alt_genomic_loci'], []) assert results['NM_003119.3:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.3:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' @@ -10886,8 +10344,7 @@ def test_variant243(self): assert 'NM_003119.2:c.183+32_183+33insA' in list(results.keys()) assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+32_183+33insA' - assert results['NM_003119.2:c.183+32_183+33insA']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.183+32_183+33insA']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.183+32_183+33insA']['alt_genomic_loci'], []) assert results['NM_003119.2:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.2:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' @@ -10904,8 +10361,7 @@ def test_variant243(self): assert 'NM_199367.1:c.183+32C>A' in list(results.keys()) assert results['NM_199367.1:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.183+32C>A']['alt_genomic_loci'] == [] - assert results['NM_199367.1:c.183+32C>A']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.1:c.183+32C>A']['alt_genomic_loci'], []) assert results['NM_199367.1:c.183+32C>A']['gene_symbol'] == 'SPG7' assert results['NM_199367.1:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.1:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' @@ -10922,8 +10378,7 @@ def test_variant243(self): assert 'NM_003119.3:c.183+32C>A' in list(results.keys()) assert results['NM_003119.3:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.183+32C>A']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.183+32C>A']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.183+32C>A']['alt_genomic_loci'], []) assert results['NM_003119.3:c.183+32C>A']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.3:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' @@ -10940,8 +10395,7 @@ def test_variant243(self): assert 'NM_199367.2:c.183+32_183+33insA' in list(results.keys()) assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.183+32_183+33insA']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.183+32_183+33insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.183+32_183+33insA']['alt_genomic_loci'], []) assert results['NM_199367.2:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.2:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' @@ -10958,8 +10412,7 @@ def test_variant243(self): assert 'NM_003119.2:c.183+32C>A' in list(results.keys()) assert results['NM_003119.2:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.183+32C>A']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+32C>A' - assert results['NM_003119.2:c.183+32C>A']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.183+32C>A']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.183+32C>A']['alt_genomic_loci'], []) assert results['NM_003119.2:c.183+32C>A']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.2:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' @@ -10982,8 +10435,7 @@ def test_variant244(self): assert 'NM_199367.2:c.184-2A>C' in list(results.keys()) assert results['NM_199367.2:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.184-2A>C']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.184-2A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.184-2A>C']['alt_genomic_loci'], []) assert results['NM_199367.2:c.184-2A>C']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.2:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' @@ -11000,8 +10452,7 @@ def test_variant244(self): assert 'NM_003119.2:c.184-2A>C' in list(results.keys()) assert results['NM_003119.2:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.184-2A>C']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.184-2A>C' - assert results['NM_003119.2:c.184-2A>C']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.184-2A>C']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.184-2A>C']['alt_genomic_loci'], []) assert results['NM_003119.2:c.184-2A>C']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.2:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' @@ -11018,8 +10469,7 @@ def test_variant244(self): assert 'NM_003119.3:c.184-2A>C' in list(results.keys()) assert results['NM_003119.3:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.184-2A>C']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.184-2A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.184-2A>C']['alt_genomic_loci'], []) assert results['NM_003119.3:c.184-2A>C']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.3:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' @@ -11036,8 +10486,7 @@ def test_variant244(self): assert 'NM_001363850.1:c.184-2A>C' in list(results.keys()) assert results['NM_001363850.1:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.184-2A>C']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.184-2A>C']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.184-2A>C']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.184-2A>C']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} assert results['NM_001363850.1:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' @@ -11055,8 +10504,7 @@ def test_variant244(self): assert 'NM_199367.1:c.184-2A>C' in list(results.keys()) assert results['NM_199367.1:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.184-2A>C']['alt_genomic_loci'] == [] - assert results['NM_199367.1:c.184-2A>C']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.1:c.184-2A>C']['alt_genomic_loci'], []) assert results['NM_199367.1:c.184-2A>C']['gene_symbol'] == 'SPG7' assert results['NM_199367.1:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.1:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' @@ -11079,8 +10527,7 @@ def test_variant245(self): assert 'NM_003119.3:c.216dup' in list(results.keys()) assert results['NM_003119.3:c.216dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.216dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.216dup']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.216dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.216dup']['alt_genomic_loci'], []) assert results['NM_003119.3:c.216dup']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73Ter)', 'slr': 'NP_003110.1:p.(E73*)'} assert results['NM_003119.3:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' @@ -11097,8 +10544,7 @@ def test_variant245(self): assert 'NM_003119.2:c.216_217insA' in list(results.keys()) assert results['NM_003119.2:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.216_217insA']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.216_217insA']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.216_217insA']['alt_genomic_loci'], []) assert results['NM_003119.2:c.216_217insA']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_003110.1:p.(E73Rfs*30)'} assert results['NM_003119.2:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' @@ -11115,8 +10561,7 @@ def test_variant245(self): assert 'NM_199367.2:c.216dup' in list(results.keys()) assert results['NM_199367.2:c.216dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.216dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.216dup']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.216dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.216dup']['alt_genomic_loci'], []) assert results['NM_199367.2:c.216dup']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73Ter)', 'slr': 'NP_955399.1:p.(E73*)'} assert results['NM_199367.2:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' @@ -11133,8 +10578,7 @@ def test_variant245(self): assert 'NM_199367.2:c.216_217insA' in list(results.keys()) assert results['NM_199367.2:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.216_217insA']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.216_217insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.216_217insA']['alt_genomic_loci'], []) assert results['NM_199367.2:c.216_217insA']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_955399.1:p.(E73Rfs*30)'} assert results['NM_199367.2:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' @@ -11151,8 +10595,7 @@ def test_variant245(self): assert 'NM_001363850.1:c.216dup' in list(results.keys()) assert results['NM_001363850.1:c.216dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.216dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.216dup']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.216dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.216dup']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.216dup']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73Ter)', 'slr': 'NP_001350779.1:p.(E73*)'} assert results['NM_001363850.1:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' @@ -11170,8 +10613,7 @@ def test_variant245(self): assert 'NM_001363850.1:c.216_217insA' in list(results.keys()) assert results['NM_001363850.1:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.216_217insA']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.216_217insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.216_217insA']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.216_217insA']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_001350779.1:p.(E73Rfs*30)'} assert results['NM_001363850.1:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' @@ -11188,8 +10630,7 @@ def test_variant245(self): assert 'NM_199367.1:c.216_217insA' in list(results.keys()) assert results['NM_199367.1:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.216_217insA']['alt_genomic_loci'] == [] - assert results['NM_199367.1:c.216_217insA']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.1:c.216_217insA']['alt_genomic_loci'], []) assert results['NM_199367.1:c.216_217insA']['gene_symbol'] == 'SPG7' assert results['NM_199367.1:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_955399.1:p.(E73Rfs*30)'} assert results['NM_199367.1:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' @@ -11206,8 +10647,7 @@ def test_variant245(self): assert 'NM_199367.1:c.216dup' in list(results.keys()) assert results['NM_199367.1:c.216dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.216dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.216dup']['alt_genomic_loci'] == [] - assert results['NM_199367.1:c.216dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.1:c.216dup']['alt_genomic_loci'], []) assert results['NM_199367.1:c.216dup']['gene_symbol'] == 'SPG7' assert results['NM_199367.1:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73Ter)', 'slr': 'NP_955399.1:p.(E73*)'} assert results['NM_199367.1:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' @@ -11224,8 +10664,7 @@ def test_variant245(self): assert 'NM_003119.3:c.216_217insA' in list(results.keys()) assert results['NM_003119.3:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.216_217insA']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.216_217insA']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.216_217insA']['alt_genomic_loci'], []) assert results['NM_003119.3:c.216_217insA']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_003110.1:p.(E73Rfs*30)'} assert results['NM_003119.3:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' @@ -11242,8 +10681,7 @@ def test_variant245(self): assert 'NM_003119.2:c.216dup' in list(results.keys()) assert results['NM_003119.2:c.216dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.216dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.216dup']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.216dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.216dup']['alt_genomic_loci'], []) assert results['NM_003119.2:c.216dup']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73Ter)', 'slr': 'NP_003110.1:p.(E73*)'} assert results['NM_003119.2:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' @@ -11266,8 +10704,7 @@ def test_variant246(self): assert 'NM_199367.1:c.216_217dup' in list(results.keys()) assert results['NM_199367.1:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.216_217dup']['alt_genomic_loci'] == [] - assert results['NM_199367.1:c.216_217dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.1:c.216_217dup']['alt_genomic_loci'], []) assert results['NM_199367.1:c.216_217dup']['gene_symbol'] == 'SPG7' assert results['NM_199367.1:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ValfsTer9)', 'slr': 'NP_955399.1:p.(E73Vfs*9)'} assert results['NM_199367.1:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' @@ -11284,8 +10721,7 @@ def test_variant246(self): assert 'NM_003119.3:c.216_217dup' in list(results.keys()) assert results['NM_003119.3:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.216_217dup']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.216_217dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.216_217dup']['alt_genomic_loci'], []) assert results['NM_003119.3:c.216_217dup']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ValfsTer9)', 'slr': 'NP_003110.1:p.(E73Vfs*9)'} assert results['NM_003119.3:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' @@ -11302,8 +10738,7 @@ def test_variant246(self): assert 'NM_199367.2:c.216_217dup' in list(results.keys()) assert results['NM_199367.2:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.216_217dup']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.216_217dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.216_217dup']['alt_genomic_loci'], []) assert results['NM_199367.2:c.216_217dup']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ValfsTer9)', 'slr': 'NP_955399.1:p.(E73Vfs*9)'} assert results['NM_199367.2:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' @@ -11320,8 +10755,7 @@ def test_variant246(self): assert 'NM_003119.2:c.216_217dup' in list(results.keys()) assert results['NM_003119.2:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.216_217dup']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.216_217dup']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.216_217dup']['alt_genomic_loci'], []) assert results['NM_003119.2:c.216_217dup']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ValfsTer9)', 'slr': 'NP_003110.1:p.(E73Vfs*9)'} assert results['NM_003119.2:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' @@ -11339,8 +10773,7 @@ def test_variant246(self): assert 'NM_001363850.1:c.216_217dup' in list(results.keys()) assert results['NM_001363850.1:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.216_217dup']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.216_217dup']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.216_217dup']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.216_217dup']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73ValfsTer9)', 'slr': 'NP_001350779.1:p.(E73Vfs*9)'} assert results['NM_001363850.1:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' @@ -11363,8 +10796,7 @@ def test_variant247(self): assert 'NM_199367.1:c.1046_1071del' in list(results.keys()) assert results['NM_199367.1:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.1:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.1046_1071del']['alt_genomic_loci'] == [] - assert results['NM_199367.1:c.1046_1071del']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.1:c.1046_1071del']['alt_genomic_loci'], []) assert results['NM_199367.1:c.1046_1071del']['gene_symbol'] == 'SPG7' assert results['NM_199367.1:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Gly349AlafsTer38)', 'slr': 'NP_955399.1:p.(G349Afs*38)'} assert results['NM_199367.1:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' @@ -11381,8 +10813,7 @@ def test_variant247(self): assert 'NM_001363850.1:c.1046_1071del' in list(results.keys()) assert results['NM_001363850.1:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.1046_1071del']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.1046_1071del']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.1046_1071del']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.1046_1071del']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Gly349AlafsTer38)', 'slr': 'NP_001350779.1:p.(G349Afs*38)'} assert results['NM_001363850.1:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' @@ -11399,8 +10830,7 @@ def test_variant247(self): assert 'NM_199367.2:c.1046_1071del' in list(results.keys()) assert results['NM_199367.2:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_199367.2:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.1046_1071del']['alt_genomic_loci'] == [] - assert results['NM_199367.2:c.1046_1071del']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_199367.2:c.1046_1071del']['alt_genomic_loci'], []) assert results['NM_199367.2:c.1046_1071del']['gene_symbol'] == 'SPG7' assert results['NM_199367.2:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Gly349AlafsTer38)', 'slr': 'NP_955399.1:p.(G349Afs*38)'} assert results['NM_199367.2:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' @@ -11418,8 +10848,7 @@ def test_variant247(self): assert 'NM_003119.2:c.1046_1071del' in list(results.keys()) assert results['NM_003119.2:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.1046_1071del']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.1046_1071del']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.1046_1071del']['alt_genomic_loci'], []) assert results['NM_003119.2:c.1046_1071del']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Gly349AlafsTer38)', 'slr': 'NP_003110.1:p.(G349Afs*38)'} assert results['NM_003119.2:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' @@ -11436,8 +10865,7 @@ def test_variant247(self): assert 'NM_003119.3:c.1046_1071del' in list(results.keys()) assert results['NM_003119.3:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.1046_1071del']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.1046_1071del']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.1046_1071del']['alt_genomic_loci'], []) assert results['NM_003119.3:c.1046_1071del']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Gly349AlafsTer38)', 'slr': 'NP_003110.1:p.(G349Afs*38)'} assert results['NM_003119.3:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' @@ -11460,8 +10888,7 @@ def test_variant248(self): assert 'NM_001363850.1:c.1450-1_1457delinsT' in list(results.keys()) assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.1450-1_1457delinsT']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.1450-1_1457delinsT']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.1450-1_1457delinsT']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} assert results['NM_001363850.1:c.1450-1_1457delinsT']['submitted_variant'] == '16-89613064-AGGAGAGGCG-AT' @@ -11479,8 +10906,7 @@ def test_variant248(self): assert 'NM_003119.2:c.1450-1_1457delinsT' in list(results.keys()) assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.1450-1_1457delinsT' - assert results['NM_003119.2:c.1450-1_1457delinsT']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.1450-1_1457delinsT']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.1450-1_1457delinsT']['alt_genomic_loci'], []) assert results['NM_003119.2:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.2:c.1450-1_1457delinsT']['submitted_variant'] == '16-89613064-AGGAGAGGCG-AT' @@ -11497,8 +10923,7 @@ def test_variant248(self): assert 'NM_003119.3:c.1450-1_1457delinsT' in list(results.keys()) assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.1450-1_1457delinsT']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.1450-1_1457delinsT']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.1450-1_1457delinsT']['alt_genomic_loci'], []) assert results['NM_003119.3:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.3:c.1450-1_1457delinsT']['submitted_variant'] == '16-89613064-AGGAGAGGCG-AT' @@ -11521,8 +10946,7 @@ def test_variant249(self): assert 'NM_003119.2:c.1454_1462delinsT' in list(results.keys()) assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.1454_1462delinsT']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.1454_1462delinsT']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.1454_1462delinsT']['alt_genomic_loci'], []) assert results['NM_003119.2:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Arg485IlefsTer3)', 'slr': 'NP_003110.1:p.(R485Ifs*3)'} assert results['NM_003119.2:c.1454_1462delinsT']['submitted_variant'] == '16-89613069-AGGCGGGAGA-AT' @@ -11540,8 +10964,7 @@ def test_variant249(self): assert 'NM_001363850.1:c.1454_1462delinsT' in list(results.keys()) assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.1454_1462delinsT']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.1454_1462delinsT']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.1454_1462delinsT']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Arg485IlefsTer3)', 'slr': 'NP_001350779.1:p.(R485Ifs*3)'} assert results['NM_001363850.1:c.1454_1462delinsT']['submitted_variant'] == '16-89613069-AGGCGGGAGA-AT' @@ -11558,8 +10981,7 @@ def test_variant249(self): assert 'NM_003119.3:c.1454_1462delinsT' in list(results.keys()) assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.1454_1462delinsT']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.1454_1462delinsT']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.1454_1462delinsT']['alt_genomic_loci'], []) assert results['NM_003119.3:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Arg485IlefsTer3)', 'slr': 'NP_003110.1:p.(R485Ifs*3)'} assert results['NM_003119.3:c.1454_1462delinsT']['submitted_variant'] == '16-89613069-AGGCGGGAGA-AT' @@ -11582,8 +11004,7 @@ def test_variant250(self): assert 'NM_001363850.1:c.1529C>T' in list(results.keys()) assert results['NM_001363850.1:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363850.1:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.1529C>T']['alt_genomic_loci'] == [] - assert results['NM_001363850.1:c.1529C>T']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001363850.1:c.1529C>T']['alt_genomic_loci'], []) assert results['NM_001363850.1:c.1529C>T']['gene_symbol'] == 'SPG7' assert results['NM_001363850.1:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Ala510Val)', 'slr': 'NP_001350779.1:p.(A510V)'} assert results['NM_001363850.1:c.1529C>T']['submitted_variant'] == '16-89613145-C-T' @@ -11600,8 +11021,7 @@ def test_variant250(self): assert 'NM_003119.3:c.1529C>T' in list(results.keys()) assert results['NM_003119.3:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.3:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.1529C>T']['alt_genomic_loci'] == [] - assert results['NM_003119.3:c.1529C>T']['transcript_description'] == 'Homo sapiens SPG7, paraplegin matrix AAA peptidase subunit (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.3:c.1529C>T']['alt_genomic_loci'], []) assert results['NM_003119.3:c.1529C>T']['gene_symbol'] == 'SPG7' assert results['NM_003119.3:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ala510Val)', 'slr': 'NP_003110.1:p.(A510V)'} assert results['NM_003119.3:c.1529C>T']['submitted_variant'] == '16-89613145-C-T' @@ -11619,8 +11039,7 @@ def test_variant250(self): assert 'NM_003119.2:c.1529C>T' in list(results.keys()) assert results['NM_003119.2:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003119.2:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.1529C>T']['alt_genomic_loci'] == [] - assert results['NM_003119.2:c.1529C>T']['transcript_description'] == 'Homo sapiens spastic paraplegia 7 (pure and complicated autosomal recessive) (SPG7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003119.2:c.1529C>T']['alt_genomic_loci'], []) assert results['NM_003119.2:c.1529C>T']['gene_symbol'] == 'SPG7' assert results['NM_003119.2:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ala510Val)', 'slr': 'NP_003110.1:p.(A510V)'} assert results['NM_003119.2:c.1529C>T']['submitted_variant'] == '16-89613145-C-T' @@ -11643,8 +11062,7 @@ def test_variant251(self): assert 'NM_001276695.1:c.535_537del' in list(results.keys()) assert results['NM_001276695.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276695.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276695.1:c.535_537del']['alt_genomic_loci'] == [] - assert results['NM_001276695.1:c.535_537del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001276695.1:c.535_537del']['alt_genomic_loci'], []) assert results['NM_001276695.1:c.535_537del']['gene_symbol'] == 'TP53' assert results['NM_001276695.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263624.1:p.(Val179del)', 'slr': 'NP_001263624.1:p.(V179del)'} assert results['NM_001276695.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11661,8 +11079,7 @@ def test_variant251(self): assert 'NM_001126113.2:c.652_654del' in list(results.keys()) assert results['NM_001126113.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t4:c.652_654del' assert results['NM_001126113.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126113.2:c.652_654del']['alt_genomic_loci'] == [] - assert results['NM_001126113.2:c.652_654del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001126113.2:c.652_654del']['alt_genomic_loci'], []) assert results['NM_001126113.2:c.652_654del']['gene_symbol'] == 'TP53' assert results['NM_001126113.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1:p.(Val218del)', 'slr': 'NP_001119585.1:p.(V218del)'} assert results['NM_001126113.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11679,8 +11096,7 @@ def test_variant251(self): assert 'NM_001126118.1:c.535_537del' in list(results.keys()) assert results['NM_001126118.1:c.535_537del']['hgvs_lrg_transcript_variant'] == 'LRG_321t8:c.535_537del' assert results['NM_001126118.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126118.1:c.535_537del']['alt_genomic_loci'] == [] - assert results['NM_001126118.1:c.535_537del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_001126118.1:c.535_537del']['alt_genomic_loci'], []) assert results['NM_001126118.1:c.535_537del']['gene_symbol'] == 'TP53' assert results['NM_001126118.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1:p.(Val179del)', 'slr': 'NP_001119590.1:p.(V179del)'} assert results['NM_001126118.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11697,8 +11113,7 @@ def test_variant251(self): assert 'NM_001126116.1:c.256_258del' in list(results.keys()) assert results['NM_001126116.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t6:c.256_258del' assert results['NM_001126116.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126116.1:c.256_258del']['alt_genomic_loci'] == [] - assert results['NM_001126116.1:c.256_258del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001126116.1:c.256_258del']['alt_genomic_loci'], []) assert results['NM_001126116.1:c.256_258del']['gene_symbol'] == 'TP53' assert results['NM_001126116.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1:p.(Val86del)', 'slr': 'NP_001119588.1:p.(V86del)'} assert results['NM_001126116.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11715,8 +11130,7 @@ def test_variant251(self): assert 'NM_001126117.1:c.256_258del' in list(results.keys()) assert results['NM_001126117.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t7:c.256_258del' assert results['NM_001126117.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126117.1:c.256_258del']['alt_genomic_loci'] == [] - assert results['NM_001126117.1:c.256_258del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001126117.1:c.256_258del']['alt_genomic_loci'], []) assert results['NM_001126117.1:c.256_258del']['gene_symbol'] == 'TP53' assert results['NM_001126117.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1:p.(Val86del)', 'slr': 'NP_001119589.1:p.(V86del)'} assert results['NM_001126117.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11733,8 +11147,7 @@ def test_variant251(self): assert 'NM_001276761.1:c.535_537del' in list(results.keys()) assert results['NM_001276761.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276761.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276761.1:c.535_537del']['alt_genomic_loci'] == [] - assert results['NM_001276761.1:c.535_537del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001276761.1:c.535_537del']['alt_genomic_loci'], []) assert results['NM_001276761.1:c.535_537del']['gene_symbol'] == 'TP53' assert results['NM_001276761.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263690.1:p.(Val179del)', 'slr': 'NP_001263690.1:p.(V179del)'} assert results['NM_001276761.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11751,8 +11164,7 @@ def test_variant251(self): assert 'NM_001126112.2:c.652_654del' in list(results.keys()) assert results['NM_001126112.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t2:c.652_654del' assert results['NM_001126112.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126112.2:c.652_654del']['alt_genomic_loci'] == [] - assert results['NM_001126112.2:c.652_654del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001126112.2:c.652_654del']['alt_genomic_loci'], []) assert results['NM_001126112.2:c.652_654del']['gene_symbol'] == 'TP53' assert results['NM_001126112.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119584.1:p.(Val218del)', 'slr': 'NP_001119584.1:p.(V218del)'} assert results['NM_001126112.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11770,8 +11182,7 @@ def test_variant251(self): assert 'NM_001276697.1:c.175_177del' in list(results.keys()) assert results['NM_001276697.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276697.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276697.1:c.175_177del']['alt_genomic_loci'] == [] - assert results['NM_001276697.1:c.175_177del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001276697.1:c.175_177del']['alt_genomic_loci'], []) assert results['NM_001276697.1:c.175_177del']['gene_symbol'] == 'TP53' assert results['NM_001276697.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263626.1:p.(Val59del)', 'slr': 'NP_001263626.1:p.(V59del)'} assert results['NM_001276697.1:c.175_177del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11788,8 +11199,7 @@ def test_variant251(self): assert 'NM_001276696.1:c.535_537del' in list(results.keys()) assert results['NM_001276696.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276696.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276696.1:c.535_537del']['alt_genomic_loci'] == [] - assert results['NM_001276696.1:c.535_537del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001276696.1:c.535_537del']['alt_genomic_loci'], []) assert results['NM_001276696.1:c.535_537del']['gene_symbol'] == 'TP53' assert results['NM_001276696.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263625.1:p.(Val179del)', 'slr': 'NP_001263625.1:p.(V179del)'} assert results['NM_001276696.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11806,8 +11216,7 @@ def test_variant251(self): assert 'NM_001276698.1:c.175_177del' in list(results.keys()) assert results['NM_001276698.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276698.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276698.1:c.175_177del']['alt_genomic_loci'] == [] - assert results['NM_001276698.1:c.175_177del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001276698.1:c.175_177del']['alt_genomic_loci'], []) assert results['NM_001276698.1:c.175_177del']['gene_symbol'] == 'TP53' assert results['NM_001276698.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263627.1:p.(Val59del)', 'slr': 'NP_001263627.1:p.(V59del)'} assert results['NM_001276698.1:c.175_177del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11824,8 +11233,7 @@ def test_variant251(self): assert 'NM_001126115.1:c.256_258del' in list(results.keys()) assert results['NM_001126115.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t5:c.256_258del' assert results['NM_001126115.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126115.1:c.256_258del']['alt_genomic_loci'] == [] - assert results['NM_001126115.1:c.256_258del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001126115.1:c.256_258del']['alt_genomic_loci'], []) assert results['NM_001126115.1:c.256_258del']['gene_symbol'] == 'TP53' assert results['NM_001126115.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1:p.(Val86del)', 'slr': 'NP_001119587.1:p.(V86del)'} assert results['NM_001126115.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11842,8 +11250,7 @@ def test_variant251(self): assert 'NM_001126114.2:c.652_654del' in list(results.keys()) assert results['NM_001126114.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t3:c.652_654del' assert results['NM_001126114.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126114.2:c.652_654del']['alt_genomic_loci'] == [] - assert results['NM_001126114.2:c.652_654del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001126114.2:c.652_654del']['alt_genomic_loci'], []) assert results['NM_001126114.2:c.652_654del']['gene_symbol'] == 'TP53' assert results['NM_001126114.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1:p.(Val218del)', 'slr': 'NP_001119586.1:p.(V218del)'} assert results['NM_001126114.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11860,8 +11267,7 @@ def test_variant251(self): assert 'NM_001276699.1:c.175_177del' in list(results.keys()) assert results['NM_001276699.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276699.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276699.1:c.175_177del']['alt_genomic_loci'] == [] - assert results['NM_001276699.1:c.175_177del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001276699.1:c.175_177del']['alt_genomic_loci'], []) assert results['NM_001276699.1:c.175_177del']['gene_symbol'] == 'TP53' assert results['NM_001276699.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263628.1:p.(Val59del)', 'slr': 'NP_001263628.1:p.(V59del)'} assert results['NM_001276699.1:c.175_177del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11878,8 +11284,7 @@ def test_variant251(self): assert 'NM_001276760.1:c.535_537del' in list(results.keys()) assert results['NM_001276760.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276760.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276760.1:c.535_537del']['alt_genomic_loci'] == [] - assert results['NM_001276760.1:c.535_537del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001276760.1:c.535_537del']['alt_genomic_loci'], []) assert results['NM_001276760.1:c.535_537del']['gene_symbol'] == 'TP53' assert results['NM_001276760.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263689.1:p.(Val179del)', 'slr': 'NP_001263689.1:p.(V179del)'} assert results['NM_001276760.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11896,8 +11301,7 @@ def test_variant251(self): assert 'NM_000546.5:c.652_654del' in list(results.keys()) assert results['NM_000546.5:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t1:c.652_654del' assert results['NM_000546.5:c.652_654del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000546.5:c.652_654del']['alt_genomic_loci'] == [] - assert results['NM_000546.5:c.652_654del']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000546.5:c.652_654del']['alt_genomic_loci'], []) assert results['NM_000546.5:c.652_654del']['gene_symbol'] == 'TP53' assert results['NM_000546.5:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000537.3(LRG_321p1):p.(Val218del)', 'slr': 'NP_000537.3:p.(V218del)'} assert results['NM_000546.5:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' @@ -11920,8 +11324,7 @@ def test_variant252(self): assert 'NM_001276760.1:c.289dup' in list(results.keys()) assert results['NM_001276760.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276760.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276760.1:c.289dup']['alt_genomic_loci'] == [] - assert results['NM_001276760.1:c.289dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001276760.1:c.289dup']['alt_genomic_loci'], []) assert results['NM_001276760.1:c.289dup']['gene_symbol'] == 'TP53' assert results['NM_001276760.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263689.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263689.1:p.(Q97Pfs*13)'} assert results['NM_001276760.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' @@ -11938,8 +11341,7 @@ def test_variant252(self): assert 'NM_001126118.1:c.289dup' in list(results.keys()) assert results['NM_001126118.1:c.289dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t8:c.289dup' assert results['NM_001126118.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126118.1:c.289dup']['alt_genomic_loci'] == [] - assert results['NM_001126118.1:c.289dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_001126118.1:c.289dup']['alt_genomic_loci'], []) assert results['NM_001126118.1:c.289dup']['gene_symbol'] == 'TP53' assert results['NM_001126118.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001119590.1:p.(Q97Pfs*13)'} assert results['NM_001126118.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' @@ -11956,8 +11358,7 @@ def test_variant252(self): assert 'NM_001276695.1:c.289dup' in list(results.keys()) assert results['NM_001276695.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276695.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276695.1:c.289dup']['alt_genomic_loci'] == [] - assert results['NM_001276695.1:c.289dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001276695.1:c.289dup']['alt_genomic_loci'], []) assert results['NM_001276695.1:c.289dup']['gene_symbol'] == 'TP53' assert results['NM_001276695.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263624.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263624.1:p.(Q97Pfs*13)'} assert results['NM_001276695.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' @@ -11974,8 +11375,7 @@ def test_variant252(self): assert 'NM_001276699.1:c.-72dup' in list(results.keys()) assert results['NM_001276699.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276699.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276699.1:c.-72dup']['alt_genomic_loci'] == [] - assert results['NM_001276699.1:c.-72dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001276699.1:c.-72dup']['alt_genomic_loci'], []) assert results['NM_001276699.1:c.-72dup']['gene_symbol'] == 'TP53' assert results['NM_001276699.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263628.1:p.?', 'slr': 'NP_001263628.1:p.?'} assert results['NM_001276699.1:c.-72dup']['submitted_variant'] == '17-7578523-T-TG' @@ -11992,8 +11392,7 @@ def test_variant252(self): assert 'NM_001126115.1:c.10dup' in list(results.keys()) assert results['NM_001126115.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t5:c.10dup' assert results['NM_001126115.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126115.1:c.10dup']['alt_genomic_loci'] == [] - assert results['NM_001126115.1:c.10dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001126115.1:c.10dup']['alt_genomic_loci'], []) assert results['NM_001126115.1:c.10dup']['gene_symbol'] == 'TP53' assert results['NM_001126115.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1:p.(Gln4ProfsTer13)', 'slr': 'NP_001119587.1:p.(Q4Pfs*13)'} assert results['NM_001126115.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' @@ -12010,8 +11409,7 @@ def test_variant252(self): assert 'NM_001276697.1:c.-72dup' in list(results.keys()) assert results['NM_001276697.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276697.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276697.1:c.-72dup']['alt_genomic_loci'] == [] - assert results['NM_001276697.1:c.-72dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001276697.1:c.-72dup']['alt_genomic_loci'], []) assert results['NM_001276697.1:c.-72dup']['gene_symbol'] == 'TP53' assert results['NM_001276697.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263626.1:p.?', 'slr': 'NP_001263626.1:p.?'} assert results['NM_001276697.1:c.-72dup']['submitted_variant'] == '17-7578523-T-TG' @@ -12028,8 +11426,7 @@ def test_variant252(self): assert 'NM_001126117.1:c.10dup' in list(results.keys()) assert results['NM_001126117.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t7:c.10dup' assert results['NM_001126117.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126117.1:c.10dup']['alt_genomic_loci'] == [] - assert results['NM_001126117.1:c.10dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001126117.1:c.10dup']['alt_genomic_loci'], []) assert results['NM_001126117.1:c.10dup']['gene_symbol'] == 'TP53' assert results['NM_001126117.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1:p.(Gln4ProfsTer13)', 'slr': 'NP_001119589.1:p.(Q4Pfs*13)'} assert results['NM_001126117.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' @@ -12046,8 +11443,7 @@ def test_variant252(self): assert 'NM_000546.5:c.406dup' in list(results.keys()) assert results['NM_000546.5:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t1:c.406dup' assert results['NM_000546.5:c.406dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000546.5:c.406dup']['alt_genomic_loci'] == [] - assert results['NM_000546.5:c.406dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000546.5:c.406dup']['alt_genomic_loci'], []) assert results['NM_000546.5:c.406dup']['gene_symbol'] == 'TP53' assert results['NM_000546.5:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000537.3(LRG_321p1):p.(Gln136ProfsTer13)', 'slr': 'NP_000537.3:p.(Q136Pfs*13)'} assert results['NM_000546.5:c.406dup']['submitted_variant'] == '17-7578523-T-TG' @@ -12065,8 +11461,7 @@ def test_variant252(self): assert 'NM_001276696.1:c.289dup' in list(results.keys()) assert results['NM_001276696.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276696.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276696.1:c.289dup']['alt_genomic_loci'] == [] - assert results['NM_001276696.1:c.289dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001276696.1:c.289dup']['alt_genomic_loci'], []) assert results['NM_001276696.1:c.289dup']['gene_symbol'] == 'TP53' assert results['NM_001276696.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263625.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263625.1:p.(Q97Pfs*13)'} assert results['NM_001276696.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' @@ -12083,8 +11478,7 @@ def test_variant252(self): assert 'NM_001276698.1:c.-72dup' in list(results.keys()) assert results['NM_001276698.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276698.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276698.1:c.-72dup']['alt_genomic_loci'] == [] - assert results['NM_001276698.1:c.-72dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001276698.1:c.-72dup']['alt_genomic_loci'], []) assert results['NM_001276698.1:c.-72dup']['gene_symbol'] == 'TP53' assert results['NM_001276698.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263627.1:p.?', 'slr': 'NP_001263627.1:p.?'} assert results['NM_001276698.1:c.-72dup']['submitted_variant'] == '17-7578523-T-TG' @@ -12101,8 +11495,7 @@ def test_variant252(self): assert 'NM_001276761.1:c.289dup' in list(results.keys()) assert results['NM_001276761.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001276761.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001276761.1:c.289dup']['alt_genomic_loci'] == [] - assert results['NM_001276761.1:c.289dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001276761.1:c.289dup']['alt_genomic_loci'], []) assert results['NM_001276761.1:c.289dup']['gene_symbol'] == 'TP53' assert results['NM_001276761.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263690.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263690.1:p.(Q97Pfs*13)'} assert results['NM_001276761.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' @@ -12119,8 +11512,7 @@ def test_variant252(self): assert 'NM_001126113.2:c.406dup' in list(results.keys()) assert results['NM_001126113.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t4:c.406dup' assert results['NM_001126113.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126113.2:c.406dup']['alt_genomic_loci'] == [] - assert results['NM_001126113.2:c.406dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001126113.2:c.406dup']['alt_genomic_loci'], []) assert results['NM_001126113.2:c.406dup']['gene_symbol'] == 'TP53' assert results['NM_001126113.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1:p.(Gln136ProfsTer13)', 'slr': 'NP_001119585.1:p.(Q136Pfs*13)'} assert results['NM_001126113.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' @@ -12137,8 +11529,7 @@ def test_variant252(self): assert 'NM_001126116.1:c.10dup' in list(results.keys()) assert results['NM_001126116.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t6:c.10dup' assert results['NM_001126116.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126116.1:c.10dup']['alt_genomic_loci'] == [] - assert results['NM_001126116.1:c.10dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001126116.1:c.10dup']['alt_genomic_loci'], []) assert results['NM_001126116.1:c.10dup']['gene_symbol'] == 'TP53' assert results['NM_001126116.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1:p.(Gln4ProfsTer13)', 'slr': 'NP_001119588.1:p.(Q4Pfs*13)'} assert results['NM_001126116.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' @@ -12155,8 +11546,7 @@ def test_variant252(self): assert 'NM_001126112.2:c.406dup' in list(results.keys()) assert results['NM_001126112.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t2:c.406dup' assert results['NM_001126112.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126112.2:c.406dup']['alt_genomic_loci'] == [] - assert results['NM_001126112.2:c.406dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001126112.2:c.406dup']['alt_genomic_loci'], []) assert results['NM_001126112.2:c.406dup']['gene_symbol'] == 'TP53' assert results['NM_001126112.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119584.1:p.(Gln136ProfsTer13)', 'slr': 'NP_001119584.1:p.(Q136Pfs*13)'} assert results['NM_001126112.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' @@ -12173,8 +11563,7 @@ def test_variant252(self): assert 'NM_001126114.2:c.406dup' in list(results.keys()) assert results['NM_001126114.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t3:c.406dup' assert results['NM_001126114.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001126114.2:c.406dup']['alt_genomic_loci'] == [] - assert results['NM_001126114.2:c.406dup']['transcript_description'] == 'Homo sapiens tumor protein p53 (TP53), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001126114.2:c.406dup']['alt_genomic_loci'], []) assert results['NM_001126114.2:c.406dup']['gene_symbol'] == 'TP53' assert results['NM_001126114.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1:p.(Gln136ProfsTer13)', 'slr': 'NP_001119586.1:p.(Q136Pfs*13)'} assert results['NM_001126114.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' @@ -12197,8 +11586,7 @@ def test_variant253(self): assert 'NM_144997.6:c.1300+2T>G' in list(results.keys()) assert results['NM_144997.6:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_144997.6:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_144997.6:c.1300+2T>G']['alt_genomic_loci'] == [] - assert results['NM_144997.6:c.1300+2T>G']['transcript_description'] == 'Homo sapiens folliculin (FLCN), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_144997.6:c.1300+2T>G']['alt_genomic_loci'], []) assert results['NM_144997.6:c.1300+2T>G']['gene_symbol'] == 'FLCN' assert results['NM_144997.6:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_659434.2(LRG_325p1):p.?', 'slr': 'NP_659434.2:p.?'} assert results['NM_144997.6:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' @@ -12215,8 +11603,7 @@ def test_variant253(self): assert 'NM_001353230.1:c.1300+2T>G' in list(results.keys()) assert results['NM_001353230.1:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353230.1:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353230.1:c.1300+2T>G']['alt_genomic_loci'] == [] - assert results['NM_001353230.1:c.1300+2T>G']['transcript_description'] == 'Homo sapiens folliculin (FLCN), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001353230.1:c.1300+2T>G']['alt_genomic_loci'], []) assert results['NM_001353230.1:c.1300+2T>G']['gene_symbol'] == 'FLCN' assert results['NM_001353230.1:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340159.1:p.?', 'slr': 'NP_001340159.1:p.?'} assert results['NM_001353230.1:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' @@ -12233,8 +11620,7 @@ def test_variant253(self): assert 'NM_001353229.1:c.1354+2T>G' in list(results.keys()) assert results['NM_001353229.1:c.1354+2T>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353229.1:c.1354+2T>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353229.1:c.1354+2T>G']['alt_genomic_loci'] == [] - assert results['NM_001353229.1:c.1354+2T>G']['transcript_description'] == 'Homo sapiens folliculin (FLCN), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001353229.1:c.1354+2T>G']['alt_genomic_loci'], []) assert results['NM_001353229.1:c.1354+2T>G']['gene_symbol'] == 'FLCN' assert results['NM_001353229.1:c.1354+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340158.1:p.?', 'slr': 'NP_001340158.1:p.?'} assert results['NM_001353229.1:c.1354+2T>G']['submitted_variant'] == '17-17119692-A-C' @@ -12252,8 +11638,7 @@ def test_variant253(self): assert 'NM_144997.5:c.1300+2T>G' in list(results.keys()) assert results['NM_144997.5:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == 'LRG_325t1:c.1300+2T>G' assert results['NM_144997.5:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == 'NG_008001.2(NM_144997.5):c.1300+2T>G' - assert results['NM_144997.5:c.1300+2T>G']['alt_genomic_loci'] == [] - assert results['NM_144997.5:c.1300+2T>G']['transcript_description'] == 'Homo sapiens folliculin (FLCN), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_144997.5:c.1300+2T>G']['alt_genomic_loci'], []) assert results['NM_144997.5:c.1300+2T>G']['gene_symbol'] == 'FLCN' assert results['NM_144997.5:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_659434.2(LRG_325p1):p.?', 'slr': 'NP_659434.2:p.?'} assert results['NM_144997.5:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' @@ -12270,8 +11655,7 @@ def test_variant253(self): assert 'NM_001353231.1:c.1300+2T>G' in list(results.keys()) assert results['NM_001353231.1:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353231.1:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353231.1:c.1300+2T>G']['alt_genomic_loci'] == [] - assert results['NM_001353231.1:c.1300+2T>G']['transcript_description'] == 'Homo sapiens folliculin (FLCN), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001353231.1:c.1300+2T>G']['alt_genomic_loci'], []) assert results['NM_001353231.1:c.1300+2T>G']['gene_symbol'] == 'FLCN' assert results['NM_001353231.1:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340160.1:p.?', 'slr': 'NP_001340160.1:p.?'} assert results['NM_001353231.1:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' @@ -12294,8 +11678,7 @@ def test_variant254(self): assert 'NM_007294.3:c.*103_*106del' in list(results.keys()) assert results['NM_007294.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == 'LRG_292t1:c.*103_*106del' assert results['NM_007294.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007294.3:c.*103_*106del']['alt_genomic_loci'] == [] - assert results['NM_007294.3:c.*103_*106del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007294.3:c.*103_*106del']['alt_genomic_loci'], []) assert results['NM_007294.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' assert results['NM_007294.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009225.1(LRG_292p1):p.?', 'slr': 'NP_009225.1:p.?'} assert results['NM_007294.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' @@ -12312,8 +11695,7 @@ def test_variant254(self): assert 'NM_007297.3:c.*103_*106del' in list(results.keys()) assert results['NM_007297.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007297.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007297.3:c.*103_*106del']['alt_genomic_loci'] == [] - assert results['NM_007297.3:c.*103_*106del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_007297.3:c.*103_*106del']['alt_genomic_loci'], []) assert results['NM_007297.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' assert results['NM_007297.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009228.2:p.?', 'slr': 'NP_009228.2:p.?'} assert results['NM_007297.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' @@ -12330,8 +11712,7 @@ def test_variant254(self): assert 'NR_027676.1:n.5831_5834del' in list(results.keys()) assert results['NR_027676.1:n.5831_5834del']['hgvs_lrg_transcript_variant'] == '' assert results['NR_027676.1:n.5831_5834del']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_027676.1:n.5831_5834del']['alt_genomic_loci'] == [] - assert results['NR_027676.1:n.5831_5834del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 6, non-coding RNA' + self.assertCountEqual(results['NR_027676.1:n.5831_5834del']['alt_genomic_loci'], []) assert results['NR_027676.1:n.5831_5834del']['gene_symbol'] == 'BRCA1' assert results['NR_027676.1:n.5831_5834del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_027676.1:n.5831_5834del']['submitted_variant'] == '17-41197588-GGACA-G' @@ -12348,8 +11729,7 @@ def test_variant254(self): assert 'NM_007300.3:c.*103_*106del' in list(results.keys()) assert results['NM_007300.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007300.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007300.3:c.*103_*106del']['alt_genomic_loci'] == [] - assert results['NM_007300.3:c.*103_*106del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_007300.3:c.*103_*106del']['alt_genomic_loci'], []) assert results['NM_007300.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' assert results['NM_007300.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009231.2:p.?', 'slr': 'NP_009231.2:p.?'} assert results['NM_007300.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' @@ -12367,8 +11747,7 @@ def test_variant254(self): assert 'NM_007299.3:c.*209_*212del' in list(results.keys()) assert results['NM_007299.3:c.*209_*212del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007299.3:c.*209_*212del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007299.3:c.*209_*212del']['alt_genomic_loci'] == [] - assert results['NM_007299.3:c.*209_*212del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_007299.3:c.*209_*212del']['alt_genomic_loci'], []) assert results['NM_007299.3:c.*209_*212del']['gene_symbol'] == 'BRCA1' assert results['NM_007299.3:c.*209_*212del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009230.2:p.?', 'slr': 'NP_009230.2:p.?'} assert results['NM_007299.3:c.*209_*212del']['submitted_variant'] == '17-41197588-GGACA-G' @@ -12385,8 +11764,7 @@ def test_variant254(self): assert 'NM_007298.3:c.*103_*106del' in list(results.keys()) assert results['NM_007298.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007298.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007298.3:c.*103_*106del']['alt_genomic_loci'] == [] - assert results['NM_007298.3:c.*103_*106del']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_007298.3:c.*103_*106del']['alt_genomic_loci'], []) assert results['NM_007298.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' assert results['NM_007298.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009229.2:p.?', 'slr': 'NP_009229.2:p.?'} assert results['NM_007298.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' @@ -12409,8 +11787,7 @@ def test_variant255(self): assert 'NM_007299.3:c.301+1G>C' in list(results.keys()) assert results['NM_007299.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007299.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007299.3:c.301+1G>C']['alt_genomic_loci'] == [] - assert results['NM_007299.3:c.301+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_007299.3:c.301+1G>C']['alt_genomic_loci'], []) assert results['NM_007299.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' assert results['NM_007299.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009230.2:p.?', 'slr': 'NP_009230.2:p.?'} assert results['NM_007299.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' @@ -12427,8 +11804,7 @@ def test_variant255(self): assert 'NR_027676.1:n.440+1G>C' in list(results.keys()) assert results['NR_027676.1:n.440+1G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NR_027676.1:n.440+1G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_027676.1:n.440+1G>C']['alt_genomic_loci'] == [] - assert results['NR_027676.1:n.440+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 6, non-coding RNA' + self.assertCountEqual(results['NR_027676.1:n.440+1G>C']['alt_genomic_loci'], []) assert results['NR_027676.1:n.440+1G>C']['gene_symbol'] == 'BRCA1' assert results['NR_027676.1:n.440+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_027676.1:n.440+1G>C']['submitted_variant'] == '17-41256884-C-G' @@ -12445,8 +11821,7 @@ def test_variant255(self): assert 'NM_007300.3:c.301+1G>C' in list(results.keys()) assert results['NM_007300.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007300.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007300.3:c.301+1G>C']['alt_genomic_loci'] == [] - assert results['NM_007300.3:c.301+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_007300.3:c.301+1G>C']['alt_genomic_loci'], []) assert results['NM_007300.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' assert results['NM_007300.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009231.2:p.?', 'slr': 'NP_009231.2:p.?'} assert results['NM_007300.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' @@ -12463,8 +11838,7 @@ def test_variant255(self): assert 'NM_007298.3:c.301+1G>C' in list(results.keys()) assert results['NM_007298.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007298.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007298.3:c.301+1G>C']['alt_genomic_loci'] == [] - assert results['NM_007298.3:c.301+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_007298.3:c.301+1G>C']['alt_genomic_loci'], []) assert results['NM_007298.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' assert results['NM_007298.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009229.2:p.?', 'slr': 'NP_009229.2:p.?'} assert results['NM_007298.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' @@ -12481,8 +11855,7 @@ def test_variant255(self): assert 'NM_007297.3:c.160+1G>C' in list(results.keys()) assert results['NM_007297.3:c.160+1G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007297.3:c.160+1G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007297.3:c.160+1G>C']['alt_genomic_loci'] == [] - assert results['NM_007297.3:c.160+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_007297.3:c.160+1G>C']['alt_genomic_loci'], []) assert results['NM_007297.3:c.160+1G>C']['gene_symbol'] == 'BRCA1' assert results['NM_007297.3:c.160+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009228.2:p.?', 'slr': 'NP_009228.2:p.?'} assert results['NM_007297.3:c.160+1G>C']['submitted_variant'] == '17-41256884-C-G' @@ -12500,8 +11873,7 @@ def test_variant255(self): assert 'NM_007294.3:c.301+1G>C' in list(results.keys()) assert results['NM_007294.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == 'LRG_292t1:c.301+1G>C' assert results['NM_007294.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == 'NG_005905.2(NM_007294.3):c.301+1G>C' - assert results['NM_007294.3:c.301+1G>C']['alt_genomic_loci'] == [] - assert results['NM_007294.3:c.301+1G>C']['transcript_description'] == 'Homo sapiens BRCA1, DNA repair associated (BRCA1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_007294.3:c.301+1G>C']['alt_genomic_loci'], []) assert results['NM_007294.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' assert results['NM_007294.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009225.1(LRG_292p1):p.?', 'slr': 'NP_009225.1:p.?'} assert results['NM_007294.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' @@ -12524,8 +11896,7 @@ def test_variant256(self): assert 'NM_001363846.1:c.490G>T' in list(results.keys()) assert results['NM_001363846.1:c.490G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363846.1:c.490G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363846.1:c.490G>T']['alt_genomic_loci'] == [] - assert results['NM_001363846.1:c.490G>T']['transcript_description'] == 'Homo sapiens glial fibrillary acidic protein (GFAP), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001363846.1:c.490G>T']['alt_genomic_loci'], []) assert results['NM_001363846.1:c.490G>T']['gene_symbol'] == 'GFAP' assert results['NM_001363846.1:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350775.1:p.(Glu164Ter)', 'slr': 'NP_001350775.1:p.(E164*)'} assert results['NM_001363846.1:c.490G>T']['submitted_variant'] == '17-42991428-C-A' @@ -12542,8 +11913,7 @@ def test_variant256(self): assert 'NM_001131019.2:c.490G>T' in list(results.keys()) assert results['NM_001131019.2:c.490G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001131019.2:c.490G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001131019.2:c.490G>T']['alt_genomic_loci'] == [] - assert results['NM_001131019.2:c.490G>T']['transcript_description'] == 'Homo sapiens glial fibrillary acidic protein (GFAP), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001131019.2:c.490G>T']['alt_genomic_loci'], []) assert results['NM_001131019.2:c.490G>T']['gene_symbol'] == 'GFAP' assert results['NM_001131019.2:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124491.1:p.(Glu164Ter)', 'slr': 'NP_001124491.1:p.(E164*)'} assert results['NM_001131019.2:c.490G>T']['submitted_variant'] == '17-42991428-C-A' @@ -12561,8 +11931,7 @@ def test_variant256(self): assert 'NM_001242376.1:c.490G>T' in list(results.keys()) assert results['NM_001242376.1:c.490G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001242376.1:c.490G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001242376.1:c.490G>T']['alt_genomic_loci'] == [] - assert results['NM_001242376.1:c.490G>T']['transcript_description'] == 'Homo sapiens glial fibrillary acidic protein (GFAP), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001242376.1:c.490G>T']['alt_genomic_loci'], []) assert results['NM_001242376.1:c.490G>T']['gene_symbol'] == 'GFAP' assert results['NM_001242376.1:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001229305.1:p.(Glu164Ter)', 'slr': 'NP_001229305.1:p.(E164*)'} assert results['NM_001242376.1:c.490G>T']['submitted_variant'] == '17-42991428-C-A' @@ -12579,8 +11948,7 @@ def test_variant256(self): assert 'NM_002055.4:c.490G>T' in list(results.keys()) assert results['NM_002055.4:c.490G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_002055.4:c.490G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_002055.4:c.490G>T']['alt_genomic_loci'] == [] - assert results['NM_002055.4:c.490G>T']['transcript_description'] == 'Homo sapiens glial fibrillary acidic protein (GFAP), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_002055.4:c.490G>T']['alt_genomic_loci'], []) assert results['NM_002055.4:c.490G>T']['gene_symbol'] == 'GFAP' assert results['NM_002055.4:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002046.1:p.(Glu164Ter)', 'slr': 'NP_002046.1:p.(E164*)'} assert results['NM_002055.4:c.490G>T']['submitted_variant'] == '17-42991428-C-A' @@ -12603,8 +11971,7 @@ def test_variant257(self): assert 'NR_135553.1:n.1022A>T' in list(results.keys()) assert results['NR_135553.1:n.1022A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_135553.1:n.1022A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_135553.1:n.1022A>T']['alt_genomic_loci'] == [] - assert results['NR_135553.1:n.1022A>T']['transcript_description'] == 'Homo sapiens sarcoglycan alpha (SGCA), transcript variant 3, non-coding RNA' + self.assertCountEqual(results['NR_135553.1:n.1022A>T']['alt_genomic_loci'], []) assert results['NR_135553.1:n.1022A>T']['gene_symbol'] == 'SGCA' assert results['NR_135553.1:n.1022A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_135553.1:n.1022A>T']['submitted_variant'] == '17-48252809-A-T' @@ -12621,8 +11988,7 @@ def test_variant257(self): assert 'NM_001135697.1:c.*11A>T' in list(results.keys()) assert results['NM_001135697.1:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135697.1:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001135697.1:c.*11A>T']['alt_genomic_loci'] == [] - assert results['NM_001135697.1:c.*11A>T']['transcript_description'] == 'Homo sapiens sarcoglycan alpha (SGCA), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001135697.1:c.*11A>T']['alt_genomic_loci'], []) assert results['NM_001135697.1:c.*11A>T']['gene_symbol'] == 'SGCA' assert results['NM_001135697.1:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129169.1:p.?', 'slr': 'NP_001129169.1:p.?'} assert results['NM_001135697.1:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' @@ -12640,8 +12006,7 @@ def test_variant257(self): assert 'NM_000023.2:c.*11A>T' in list(results.keys()) assert results['NM_000023.2:c.*11A>T']['hgvs_lrg_transcript_variant'] == 'LRG_203t1:c.*11A>T' assert results['NM_000023.2:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000023.2:c.*11A>T']['alt_genomic_loci'] == [] - assert results['NM_000023.2:c.*11A>T']['transcript_description'] == 'Homo sapiens sarcoglycan alpha (SGCA), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000023.2:c.*11A>T']['alt_genomic_loci'], []) assert results['NM_000023.2:c.*11A>T']['gene_symbol'] == 'SGCA' assert results['NM_000023.2:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000014.1(LRG_203p1):p.?', 'slr': 'NP_000014.1:p.?'} assert results['NM_000023.2:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' @@ -12658,8 +12023,7 @@ def test_variant257(self): assert 'NM_001135697.2:c.*11A>T' in list(results.keys()) assert results['NM_001135697.2:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135697.2:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001135697.2:c.*11A>T']['alt_genomic_loci'] == [] - assert results['NM_001135697.2:c.*11A>T']['transcript_description'] == 'Homo sapiens sarcoglycan alpha (SGCA), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001135697.2:c.*11A>T']['alt_genomic_loci'], []) assert results['NM_001135697.2:c.*11A>T']['gene_symbol'] == 'SGCA' assert results['NM_001135697.2:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129169.1:p.?', 'slr': 'NP_001129169.1:p.?'} assert results['NM_001135697.2:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' @@ -12676,8 +12040,7 @@ def test_variant257(self): assert 'NM_000023.3:c.*11A>T' in list(results.keys()) assert results['NM_000023.3:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000023.3:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000023.3:c.*11A>T']['alt_genomic_loci'] == [] - assert results['NM_000023.3:c.*11A>T']['transcript_description'] == 'Homo sapiens sarcoglycan alpha (SGCA), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000023.3:c.*11A>T']['alt_genomic_loci'], []) assert results['NM_000023.3:c.*11A>T']['gene_symbol'] == 'SGCA' assert results['NM_000023.3:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000014.1(LRG_203p1):p.?', 'slr': 'NP_000014.1:p.?'} assert results['NM_000023.3:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' @@ -12701,8 +12064,7 @@ def test_variant258(self): assert 'NM_000334.4:c.3720+9_3720+10dup' in list(results.keys()) assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000334.4:c.3720+9_3720+10dup']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3720+9_3720+10dup' - assert results['NM_000334.4:c.3720+9_3720+10dup']['alt_genomic_loci'] == [] - assert results['NM_000334.4:c.3720+9_3720+10dup']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA' + self.assertCountEqual(results['NM_000334.4:c.3720+9_3720+10dup']['alt_genomic_loci'], []) assert results['NM_000334.4:c.3720+9_3720+10dup']['gene_symbol'] == 'SCN4A' assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} assert results['NM_000334.4:c.3720+9_3720+10dup']['submitted_variant'] == '17-62022709-G-GTC' @@ -12726,8 +12088,7 @@ def test_variant259(self): assert 'NM_000334.4:c.3720+8_3720+9insA' in list(results.keys()) assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000334.4:c.3720+8_3720+9insA']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3720+8_3720+9insA' - assert results['NM_000334.4:c.3720+8_3720+9insA']['alt_genomic_loci'] == [] - assert results['NM_000334.4:c.3720+8_3720+9insA']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA' + self.assertCountEqual(results['NM_000334.4:c.3720+8_3720+9insA']['alt_genomic_loci'], []) assert results['NM_000334.4:c.3720+8_3720+9insA']['gene_symbol'] == 'SCN4A' assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} assert results['NM_000334.4:c.3720+8_3720+9insA']['submitted_variant'] == '17-62022711-C-CT' @@ -12751,8 +12112,7 @@ def test_variant260(self): assert 'NM_000334.4:c.3442-8_3442-7insGC' in list(results.keys()) assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000334.4:c.3442-8_3442-7insGC']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3442-8_3442-7insGC' - assert results['NM_000334.4:c.3442-8_3442-7insGC']['alt_genomic_loci'] == [] - assert results['NM_000334.4:c.3442-8_3442-7insGC']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA' + self.assertCountEqual(results['NM_000334.4:c.3442-8_3442-7insGC']['alt_genomic_loci'], []) assert results['NM_000334.4:c.3442-8_3442-7insGC']['gene_symbol'] == 'SCN4A' assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} assert results['NM_000334.4:c.3442-8_3442-7insGC']['submitted_variant'] == '17-62023005-G-GGC' @@ -12776,8 +12136,7 @@ def test_variant261(self): assert 'NM_000334.4:c.3442-8G>T' in list(results.keys()) assert results['NM_000334.4:c.3442-8G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000334.4:c.3442-8G>T']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3442-8G>T' - assert results['NM_000334.4:c.3442-8G>T']['alt_genomic_loci'] == [] - assert results['NM_000334.4:c.3442-8G>T']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA' + self.assertCountEqual(results['NM_000334.4:c.3442-8G>T']['alt_genomic_loci'], []) assert results['NM_000334.4:c.3442-8G>T']['gene_symbol'] == 'SCN4A' assert results['NM_000334.4:c.3442-8G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} assert results['NM_000334.4:c.3442-8G>T']['submitted_variant'] == '17-62023006-C-A' @@ -12801,8 +12160,7 @@ def test_variant262(self): assert 'NM_000334.4:c.2111C>T' in list(results.keys()) assert results['NM_000334.4:c.2111C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000334.4:c.2111C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000334.4:c.2111C>T']['alt_genomic_loci'] == [] - assert results['NM_000334.4:c.2111C>T']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 4 (SCN4A), mRNA' + self.assertCountEqual(results['NM_000334.4:c.2111C>T']['alt_genomic_loci'], []) assert results['NM_000334.4:c.2111C>T']['gene_symbol'] == 'SCN4A' assert results['NM_000334.4:c.2111C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.(Thr704Met)', 'slr': 'NP_000325.4:p.(T704M)'} assert results['NM_000334.4:c.2111C>T']['submitted_variant'] == '17-62034787-G-A' @@ -12825,8 +12183,7 @@ def test_variant263(self): assert 'NM_001351443.1:c.-16+941_-16+946del' in list(results.keys()) assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001351443.1:c.-16+941_-16+946del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001351443.1:c.-16+941_-16+946del']['alt_genomic_loci'] == [] - assert results['NM_001351443.1:c.-16+941_-16+946del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001351443.1:c.-16+941_-16+946del']['alt_genomic_loci'], []) assert results['NM_001351443.1:c.-16+941_-16+946del']['gene_symbol'] == 'KCTD1' assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001338372.1:p.?', 'slr': 'NP_001338372.1:p.?'} assert results['NM_001351443.1:c.-16+941_-16+946del']['submitted_variant'] == '18-24128261-GTCCTCC-G' @@ -12843,8 +12200,7 @@ def test_variant263(self): assert 'NM_001258222.1:c.10-47053_10-47048del' in list(results.keys()) assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001258222.1:c.10-47053_10-47048del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001258222.1:c.10-47053_10-47048del']['alt_genomic_loci'] == [] - assert results['NM_001258222.1:c.10-47053_10-47048del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001258222.1:c.10-47053_10-47048del']['alt_genomic_loci'], []) assert results['NM_001258222.1:c.10-47053_10-47048del']['gene_symbol'] == 'KCTD1' assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245151.1:p.?', 'slr': 'NP_001245151.1:p.?'} assert results['NM_001258222.1:c.10-47053_10-47048del']['submitted_variant'] == '18-24128261-GTCCTCC-G' @@ -12861,8 +12217,7 @@ def test_variant263(self): assert 'NM_001258221.1:c.-16+1426_-16+1431del' in list(results.keys()) assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001258221.1:c.-16+1426_-16+1431del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['alt_genomic_loci'] == [] - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001258221.1:c.-16+1426_-16+1431del']['alt_genomic_loci'], []) assert results['NM_001258221.1:c.-16+1426_-16+1431del']['gene_symbol'] == 'KCTD1' assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245150.1:p.?', 'slr': 'NP_001245150.1:p.?'} assert results['NM_001258221.1:c.-16+1426_-16+1431del']['submitted_variant'] == '18-24128261-GTCCTCC-G' @@ -12879,8 +12234,7 @@ def test_variant263(self): assert 'NM_001258222.2:c.10-47053_10-47048del' in list(results.keys()) assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001258222.2:c.10-47053_10-47048del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001258222.2:c.10-47053_10-47048del']['alt_genomic_loci'] == [] - assert results['NM_001258222.2:c.10-47053_10-47048del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001258222.2:c.10-47053_10-47048del']['alt_genomic_loci'], []) assert results['NM_001258222.2:c.10-47053_10-47048del']['gene_symbol'] == 'KCTD1' assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245151.1:p.?', 'slr': 'NP_001245151.1:p.?'} assert results['NM_001258222.2:c.10-47053_10-47048del']['submitted_variant'] == '18-24128261-GTCCTCC-G' @@ -12898,8 +12252,7 @@ def test_variant263(self): assert 'NM_001136205.2:c.-16+588_-16+593del' in list(results.keys()) assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001136205.2:c.-16+588_-16+593del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001136205.2:c.-16+588_-16+593del']['alt_genomic_loci'] == [] - assert results['NM_001136205.2:c.-16+588_-16+593del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001136205.2:c.-16+588_-16+593del']['alt_genomic_loci'], []) assert results['NM_001136205.2:c.-16+588_-16+593del']['gene_symbol'] == 'KCTD1' assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129677.1:p.?', 'slr': 'NP_001129677.1:p.?'} assert results['NM_001136205.2:c.-16+588_-16+593del']['submitted_variant'] == '18-24128261-GTCCTCC-G' @@ -12916,8 +12269,7 @@ def test_variant263(self): assert 'NM_198991.3:c.-15-47053_-15-47048del' in list(results.keys()) assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198991.3:c.-15-47053_-15-47048del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_198991.3:c.-15-47053_-15-47048del']['alt_genomic_loci'] == [] - assert results['NM_198991.3:c.-15-47053_-15-47048del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_198991.3:c.-15-47053_-15-47048del']['alt_genomic_loci'], []) assert results['NM_198991.3:c.-15-47053_-15-47048del']['gene_symbol'] == 'KCTD1' assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_945342.1:p.?', 'slr': 'NP_945342.1:p.?'} assert results['NM_198991.3:c.-15-47053_-15-47048del']['submitted_variant'] == '18-24128261-GTCCTCC-G' @@ -12934,8 +12286,7 @@ def test_variant263(self): assert 'NM_001142730.2:c.234_239del' in list(results.keys()) assert results['NM_001142730.2:c.234_239del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001142730.2:c.234_239del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001142730.2:c.234_239del']['alt_genomic_loci'] == [] - assert results['NM_001142730.2:c.234_239del']['transcript_description'] == 'Homo sapiens potassium channel tetramerization domain containing 1 (KCTD1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001142730.2:c.234_239del']['alt_genomic_loci'], []) assert results['NM_001142730.2:c.234_239del']['gene_symbol'] == 'KCTD1' assert results['NM_001142730.2:c.234_239del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001136202.1:p.(Glu78_Glu79del)', 'slr': 'NP_001136202.1:p.(E78_E79del)'} assert results['NM_001142730.2:c.234_239del']['submitted_variant'] == '18-24128261-GTCCTCC-G' @@ -12959,8 +12310,7 @@ def test_variant264(self): assert 'NM_000435.2:c.2992C>T' in list(results.keys()) assert results['NM_000435.2:c.2992C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000435.2:c.2992C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000435.2:c.2992C>T']['alt_genomic_loci'] == [] - assert results['NM_000435.2:c.2992C>T']['transcript_description'] == 'Homo sapiens notch 3 (NOTCH3), mRNA' + self.assertCountEqual(results['NM_000435.2:c.2992C>T']['alt_genomic_loci'], []) assert results['NM_000435.2:c.2992C>T']['gene_symbol'] == 'NOTCH3' assert results['NM_000435.2:c.2992C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000426.2:p.(Gln998Ter)', 'slr': 'NP_000426.2:p.(Q998*)'} assert results['NM_000435.2:c.2992C>T']['submitted_variant'] == '19-15291774-G-A' @@ -12984,8 +12334,7 @@ def test_variant265(self): assert 'Intergenic_Variant_1' in list(results.keys()) assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' - assert results['Intergenic_Variant_1']['alt_genomic_loci'] == [] - assert results['Intergenic_Variant_1']['transcript_description'] == '' + self.assertCountEqual(results['Intergenic_Variant_1']['alt_genomic_loci'], []) assert results['Intergenic_Variant_1']['gene_symbol'] == '' assert results['Intergenic_Variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['Intergenic_Variant_1']['submitted_variant'] == '19-15311794-A-G' @@ -13009,8 +12358,7 @@ def test_variant266(self): assert 'NM_000540.2:c.14818G>A' in list(results.keys()) assert results['NM_000540.2:c.14818G>A']['hgvs_lrg_transcript_variant'] == 'LRG_766t1:c.14818G>A' assert results['NM_000540.2:c.14818G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000540.2:c.14818G>A']['alt_genomic_loci'] == [] - assert results['NM_000540.2:c.14818G>A']['transcript_description'] == 'Homo sapiens ryanodine receptor 1 (RYR1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000540.2:c.14818G>A']['alt_genomic_loci'], []) assert results['NM_000540.2:c.14818G>A']['gene_symbol'] == 'RYR1' assert results['NM_000540.2:c.14818G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000531.2(LRG_766p1):p.(Ala4940Thr)', 'slr': 'NP_000531.2:p.(A4940T)'} assert results['NM_000540.2:c.14818G>A']['submitted_variant'] == '19-39076592-G-A' @@ -13027,8 +12375,7 @@ def test_variant266(self): assert 'NM_001042723.1:c.14803G>A' in list(results.keys()) assert results['NM_001042723.1:c.14803G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001042723.1:c.14803G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001042723.1:c.14803G>A']['alt_genomic_loci'] == [] - assert results['NM_001042723.1:c.14803G>A']['transcript_description'] == 'Homo sapiens ryanodine receptor 1 (RYR1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001042723.1:c.14803G>A']['alt_genomic_loci'], []) assert results['NM_001042723.1:c.14803G>A']['gene_symbol'] == 'RYR1' assert results['NM_001042723.1:c.14803G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036188.1:p.(Ala4935Thr)', 'slr': 'NP_001036188.1:p.(A4935T)'} assert results['NM_001042723.1:c.14803G>A']['submitted_variant'] == '19-39076592-G-A' @@ -13051,8 +12398,7 @@ def test_variant267(self): assert 'NM_001330086.1:c.4245A>G' in list(results.keys()) assert results['NM_001330086.1:c.4245A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330086.1:c.4245A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330086.1:c.4245A>G']['alt_genomic_loci'] == [] - assert results['NM_001330086.1:c.4245A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha9, mRNA' + self.assertCountEqual(results['NM_001330086.1:c.4245A>G']['alt_genomic_loci'], []) assert results['NM_001330086.1:c.4245A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330086.1:c.4245A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317015.1:p.(Pro1415=)', 'slr': 'NP_001317015.1:p.(P1415=)'} assert results['NM_001330086.1:c.4245A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13069,8 +12415,7 @@ def test_variant267(self): assert 'NM_001330083.1:c.4089A>G' in list(results.keys()) assert results['NM_001330083.1:c.4089A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330083.1:c.4089A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330083.1:c.4089A>G']['alt_genomic_loci'] == [] - assert results['NM_001330083.1:c.4089A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha6, mRNA' + self.assertCountEqual(results['NM_001330083.1:c.4089A>G']['alt_genomic_loci'], []) assert results['NM_001330083.1:c.4089A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330083.1:c.4089A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317012.1:p.(Pro1363=)', 'slr': 'NP_001317012.1:p.(P1363=)'} assert results['NM_001330083.1:c.4089A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13087,8 +12432,7 @@ def test_variant267(self): assert 'NM_001330095.1:c.4113A>G' in list(results.keys()) assert results['NM_001330095.1:c.4113A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330095.1:c.4113A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330095.1:c.4113A>G']['alt_genomic_loci'] == [] - assert results['NM_001330095.1:c.4113A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha14, mRNA' + self.assertCountEqual(results['NM_001330095.1:c.4113A>G']['alt_genomic_loci'], []) assert results['NM_001330095.1:c.4113A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330095.1:c.4113A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317024.1:p.(Pro1371=)', 'slr': 'NP_001317024.1:p.(P1371=)'} assert results['NM_001330095.1:c.4113A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13105,8 +12449,7 @@ def test_variant267(self): assert 'NM_138735.2:c.1059A>G' in list(results.keys()) assert results['NM_138735.2:c.1059A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_138735.2:c.1059A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_138735.2:c.1059A>G']['alt_genomic_loci'] == [] - assert results['NM_138735.2:c.1059A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant beta, mRNA' + self.assertCountEqual(results['NM_138735.2:c.1059A>G']['alt_genomic_loci'], []) assert results['NM_138735.2:c.1059A>G']['gene_symbol'] == 'NRXN1' assert results['NM_138735.2:c.1059A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_620072.1:p.(Pro353=)', 'slr': 'NP_620072.1:p.(P353=)'} assert results['NM_138735.2:c.1059A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13123,8 +12466,7 @@ def test_variant267(self): assert 'NM_001330078.1:c.4254A>G' in list(results.keys()) assert results['NM_001330078.1:c.4254A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330078.1:c.4254A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330078.1:c.4254A>G']['alt_genomic_loci'] == [] - assert results['NM_001330078.1:c.4254A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha4, mRNA' + self.assertCountEqual(results['NM_001330078.1:c.4254A>G']['alt_genomic_loci'], []) assert results['NM_001330078.1:c.4254A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330078.1:c.4254A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317007.1:p.(Pro1418=)', 'slr': 'NP_001317007.1:p.(P1418=)'} assert results['NM_001330078.1:c.4254A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13141,8 +12483,7 @@ def test_variant267(self): assert 'NM_001330094.1:c.4233A>G' in list(results.keys()) assert results['NM_001330094.1:c.4233A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330094.1:c.4233A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330094.1:c.4233A>G']['alt_genomic_loci'] == [] - assert results['NM_001330094.1:c.4233A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha13, mRNA' + self.assertCountEqual(results['NM_001330094.1:c.4233A>G']['alt_genomic_loci'], []) assert results['NM_001330094.1:c.4233A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330094.1:c.4233A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317023.1:p.(Pro1411=)', 'slr': 'NP_001317023.1:p.(P1411=)'} assert results['NM_001330094.1:c.4233A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13159,8 +12500,7 @@ def test_variant267(self): assert 'NM_001320157.3:c.150A>G' in list(results.keys()) assert results['NM_001320157.3:c.150A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001320157.3:c.150A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001320157.3:c.150A>G']['alt_genomic_loci'] == [] - assert results['NM_001320157.3:c.150A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant gamma2, mRNA' + self.assertCountEqual(results['NM_001320157.3:c.150A>G']['alt_genomic_loci'], []) assert results['NM_001320157.3:c.150A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001320157.3:c.150A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307086.1:p.(Pro50=)', 'slr': 'NP_001307086.1:p.(P50=)'} assert results['NM_001320157.3:c.150A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13177,8 +12517,7 @@ def test_variant267(self): assert 'NM_001330088.1:c.4074A>G' in list(results.keys()) assert results['NM_001330088.1:c.4074A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330088.1:c.4074A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330088.1:c.4074A>G']['alt_genomic_loci'] == [] - assert results['NM_001330088.1:c.4074A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha11, mRNA' + self.assertCountEqual(results['NM_001330088.1:c.4074A>G']['alt_genomic_loci'], []) assert results['NM_001330088.1:c.4074A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330088.1:c.4074A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317017.1:p.(Pro1358=)', 'slr': 'NP_001317017.1:p.(P1358=)'} assert results['NM_001330088.1:c.4074A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13195,8 +12534,7 @@ def test_variant267(self): assert 'NM_001330092.1:c.1149A>G' in list(results.keys()) assert results['NM_001330092.1:c.1149A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330092.1:c.1149A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330092.1:c.1149A>G']['alt_genomic_loci'] == [] - assert results['NM_001330092.1:c.1149A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant beta3, mRNA' + self.assertCountEqual(results['NM_001330092.1:c.1149A>G']['alt_genomic_loci'], []) assert results['NM_001330092.1:c.1149A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330092.1:c.1149A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317021.1:p.(Pro383=)', 'slr': 'NP_001317021.1:p.(P383=)'} assert results['NM_001330092.1:c.1149A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13213,8 +12551,7 @@ def test_variant267(self): assert 'NM_138735.4:c.1059A>G' in list(results.keys()) assert results['NM_138735.4:c.1059A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_138735.4:c.1059A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_138735.4:c.1059A>G']['alt_genomic_loci'] == [] - assert results['NM_138735.4:c.1059A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant beta1, mRNA' + self.assertCountEqual(results['NM_138735.4:c.1059A>G']['alt_genomic_loci'], []) assert results['NM_138735.4:c.1059A>G']['gene_symbol'] == 'NRXN1' assert results['NM_138735.4:c.1059A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_620072.1:p.(Pro353=)', 'slr': 'NP_620072.1:p.(P353=)'} assert results['NM_138735.4:c.1059A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13231,8 +12568,7 @@ def test_variant267(self): assert 'NM_001330096.1:c.4044A>G' in list(results.keys()) assert results['NM_001330096.1:c.4044A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330096.1:c.4044A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330096.1:c.4044A>G']['alt_genomic_loci'] == [] - assert results['NM_001330096.1:c.4044A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha15, mRNA' + self.assertCountEqual(results['NM_001330096.1:c.4044A>G']['alt_genomic_loci'], []) assert results['NM_001330096.1:c.4044A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330096.1:c.4044A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317025.1:p.(Pro1348=)', 'slr': 'NP_001317025.1:p.(P1348=)'} assert results['NM_001330096.1:c.4044A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13249,8 +12585,7 @@ def test_variant267(self): assert 'NM_001135659.2:c.4374A>G' in list(results.keys()) assert results['NM_001135659.2:c.4374A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135659.2:c.4374A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001135659.2:c.4374A>G']['alt_genomic_loci'] == [] - assert results['NM_001135659.2:c.4374A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA' + self.assertCountEqual(results['NM_001135659.2:c.4374A>G']['alt_genomic_loci'], []) assert results['NM_001135659.2:c.4374A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001135659.2:c.4374A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro1458=)', 'slr': 'NP_001129131.1:p.(P1458=)'} assert results['NM_001135659.2:c.4374A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13267,8 +12602,7 @@ def test_variant267(self): assert 'NM_001330085.1:c.4227A>G' in list(results.keys()) assert results['NM_001330085.1:c.4227A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330085.1:c.4227A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330085.1:c.4227A>G']['alt_genomic_loci'] == [] - assert results['NM_001330085.1:c.4227A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha8, mRNA' + self.assertCountEqual(results['NM_001330085.1:c.4227A>G']['alt_genomic_loci'], []) assert results['NM_001330085.1:c.4227A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330085.1:c.4227A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317014.1:p.(Pro1409=)', 'slr': 'NP_001317014.1:p.(P1409=)'} assert results['NM_001330085.1:c.4227A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13285,8 +12619,7 @@ def test_variant267(self): assert 'NM_001320156.1:c.159A>G' in list(results.keys()) assert results['NM_001320156.1:c.159A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001320156.1:c.159A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001320156.1:c.159A>G']['alt_genomic_loci'] == [] - assert results['NM_001320156.1:c.159A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant gamma1, mRNA' + self.assertCountEqual(results['NM_001320156.1:c.159A>G']['alt_genomic_loci'], []) assert results['NM_001320156.1:c.159A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001320156.1:c.159A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307085.1:p.(Pro53=)', 'slr': 'NP_001307085.1:p.(P53=)'} assert results['NM_001320156.1:c.159A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13303,8 +12636,7 @@ def test_variant267(self): assert 'NM_001330077.1:c.4230A>G' in list(results.keys()) assert results['NM_001330077.1:c.4230A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330077.1:c.4230A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330077.1:c.4230A>G']['alt_genomic_loci'] == [] - assert results['NM_001330077.1:c.4230A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha3, mRNA' + self.assertCountEqual(results['NM_001330077.1:c.4230A>G']['alt_genomic_loci'], []) assert results['NM_001330077.1:c.4230A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330077.1:c.4230A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317006.1:p.(Pro1410=)', 'slr': 'NP_001317006.1:p.(P1410=)'} assert results['NM_001330077.1:c.4230A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13321,8 +12653,7 @@ def test_variant267(self): assert 'NM_001330093.1:c.4251A>G' in list(results.keys()) assert results['NM_001330093.1:c.4251A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330093.1:c.4251A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330093.1:c.4251A>G']['alt_genomic_loci'] == [] - assert results['NM_001330093.1:c.4251A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha12, mRNA' + self.assertCountEqual(results['NM_001330093.1:c.4251A>G']['alt_genomic_loci'], []) assert results['NM_001330093.1:c.4251A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330093.1:c.4251A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317022.1:p.(Pro1417=)', 'slr': 'NP_001317022.1:p.(P1417=)'} assert results['NM_001330093.1:c.4251A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13339,8 +12670,7 @@ def test_variant267(self): assert 'NM_001135659.1:c.4374A>G' in list(results.keys()) assert results['NM_001135659.1:c.4374A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135659.1:c.4374A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001135659.1:c.4374A>G']['alt_genomic_loci'] == [] - assert results['NM_001135659.1:c.4374A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA' + self.assertCountEqual(results['NM_001135659.1:c.4374A>G']['alt_genomic_loci'], []) assert results['NM_001135659.1:c.4374A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001135659.1:c.4374A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro1458=)', 'slr': 'NP_001129131.1:p.(P1458=)'} assert results['NM_001135659.1:c.4374A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13357,8 +12687,7 @@ def test_variant267(self): assert 'NM_001320157.1:c.150A>G' in list(results.keys()) assert results['NM_001320157.1:c.150A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001320157.1:c.150A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001320157.1:c.150A>G']['alt_genomic_loci'] == [] - assert results['NM_001320157.1:c.150A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant gamma2, mRNA' + self.assertCountEqual(results['NM_001320157.1:c.150A>G']['alt_genomic_loci'], []) assert results['NM_001320157.1:c.150A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001320157.1:c.150A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307086.1:p.(Pro50=)', 'slr': 'NP_001307086.1:p.(P50=)'} assert results['NM_001320157.1:c.150A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13375,8 +12704,7 @@ def test_variant267(self): assert 'NM_001330084.1:c.4188A>G' in list(results.keys()) assert results['NM_001330084.1:c.4188A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330084.1:c.4188A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330084.1:c.4188A>G']['alt_genomic_loci'] == [] - assert results['NM_001330084.1:c.4188A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha7, mRNA' + self.assertCountEqual(results['NM_001330084.1:c.4188A>G']['alt_genomic_loci'], []) assert results['NM_001330084.1:c.4188A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330084.1:c.4188A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317013.1:p.(Pro1396=)', 'slr': 'NP_001317013.1:p.(P1396=)'} assert results['NM_001330084.1:c.4188A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13393,8 +12721,7 @@ def test_variant267(self): assert 'NM_004801.4:c.4164A>G' in list(results.keys()) assert results['NM_004801.4:c.4164A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004801.4:c.4164A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004801.4:c.4164A>G']['alt_genomic_loci'] == [] - assert results['NM_004801.4:c.4164A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA' + self.assertCountEqual(results['NM_004801.4:c.4164A>G']['alt_genomic_loci'], []) assert results['NM_004801.4:c.4164A>G']['gene_symbol'] == 'NRXN1' assert results['NM_004801.4:c.4164A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro1388=)', 'slr': 'NP_004792.1:p.(P1388=)'} assert results['NM_004801.4:c.4164A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13411,8 +12738,7 @@ def test_variant267(self): assert 'NM_001330082.1:c.4221A>G' in list(results.keys()) assert results['NM_001330082.1:c.4221A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330082.1:c.4221A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330082.1:c.4221A>G']['alt_genomic_loci'] == [] - assert results['NM_001330082.1:c.4221A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha5, mRNA' + self.assertCountEqual(results['NM_001330082.1:c.4221A>G']['alt_genomic_loci'], []) assert results['NM_001330082.1:c.4221A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330082.1:c.4221A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317011.1:p.(Pro1407=)', 'slr': 'NP_001317011.1:p.(P1407=)'} assert results['NM_001330082.1:c.4221A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13430,8 +12756,7 @@ def test_variant267(self): assert 'NM_001330091.1:c.1140A>G' in list(results.keys()) assert results['NM_001330091.1:c.1140A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330091.1:c.1140A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330091.1:c.1140A>G']['alt_genomic_loci'] == [] - assert results['NM_001330091.1:c.1140A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant beta2, mRNA' + self.assertCountEqual(results['NM_001330091.1:c.1140A>G']['alt_genomic_loci'], []) assert results['NM_001330091.1:c.1140A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330091.1:c.1140A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317020.1:p.(Pro380=)', 'slr': 'NP_001317020.1:p.(P380=)'} assert results['NM_001330091.1:c.1140A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13448,8 +12773,7 @@ def test_variant267(self): assert 'NM_001320156.3:c.159A>G' in list(results.keys()) assert results['NM_001320156.3:c.159A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001320156.3:c.159A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001320156.3:c.159A>G']['alt_genomic_loci'] == [] - assert results['NM_001320156.3:c.159A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant gamma1, mRNA' + self.assertCountEqual(results['NM_001320156.3:c.159A>G']['alt_genomic_loci'], []) assert results['NM_001320156.3:c.159A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001320156.3:c.159A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307085.1:p.(Pro53=)', 'slr': 'NP_001307085.1:p.(P53=)'} assert results['NM_001320156.3:c.159A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13466,8 +12790,7 @@ def test_variant267(self): assert 'NM_001330087.1:c.4053A>G' in list(results.keys()) assert results['NM_001330087.1:c.4053A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330087.1:c.4053A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330087.1:c.4053A>G']['alt_genomic_loci'] == [] - assert results['NM_001330087.1:c.4053A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha10, mRNA' + self.assertCountEqual(results['NM_001330087.1:c.4053A>G']['alt_genomic_loci'], []) assert results['NM_001330087.1:c.4053A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330087.1:c.4053A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317016.1:p.(Pro1351=)', 'slr': 'NP_001317016.1:p.(P1351=)'} assert results['NM_001330087.1:c.4053A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13484,8 +12807,7 @@ def test_variant267(self): assert 'NM_001330097.1:c.1050A>G' in list(results.keys()) assert results['NM_001330097.1:c.1050A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330097.1:c.1050A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330097.1:c.1050A>G']['alt_genomic_loci'] == [] - assert results['NM_001330097.1:c.1050A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant beta4, mRNA' + self.assertCountEqual(results['NM_001330097.1:c.1050A>G']['alt_genomic_loci'], []) assert results['NM_001330097.1:c.1050A>G']['gene_symbol'] == 'NRXN1' assert results['NM_001330097.1:c.1050A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317026.1:p.(Pro350=)', 'slr': 'NP_001317026.1:p.(P350=)'} assert results['NM_001330097.1:c.1050A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13502,8 +12824,7 @@ def test_variant267(self): assert 'NM_004801.5:c.4164A>G' in list(results.keys()) assert results['NM_004801.5:c.4164A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004801.5:c.4164A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004801.5:c.4164A>G']['alt_genomic_loci'] == [] - assert results['NM_004801.5:c.4164A>G']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA' + self.assertCountEqual(results['NM_004801.5:c.4164A>G']['alt_genomic_loci'], []) assert results['NM_004801.5:c.4164A>G']['gene_symbol'] == 'NRXN1' assert results['NM_004801.5:c.4164A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro1388=)', 'slr': 'NP_004792.1:p.(P1388=)'} assert results['NM_004801.5:c.4164A>G']['submitted_variant'] == '2-50149352-T-C' @@ -13526,8 +12847,7 @@ def test_variant268(self): assert 'NM_001330096.1:c.1201C>T' in list(results.keys()) assert results['NM_001330096.1:c.1201C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330096.1:c.1201C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330096.1:c.1201C>T']['alt_genomic_loci'] == [] - assert results['NM_001330096.1:c.1201C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha15, mRNA' + self.assertCountEqual(results['NM_001330096.1:c.1201C>T']['alt_genomic_loci'], []) assert results['NM_001330096.1:c.1201C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330096.1:c.1201C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317025.1:p.(Pro401Ser)', 'slr': 'NP_001317025.1:p.(P401S)'} assert results['NM_001330096.1:c.1201C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13544,8 +12864,7 @@ def test_variant268(self): assert 'NM_001330084.1:c.1246C>T' in list(results.keys()) assert results['NM_001330084.1:c.1246C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330084.1:c.1246C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330084.1:c.1246C>T']['alt_genomic_loci'] == [] - assert results['NM_001330084.1:c.1246C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha7, mRNA' + self.assertCountEqual(results['NM_001330084.1:c.1246C>T']['alt_genomic_loci'], []) assert results['NM_001330084.1:c.1246C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330084.1:c.1246C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317013.1:p.(Pro416Ser)', 'slr': 'NP_001317013.1:p.(P416S)'} assert results['NM_001330084.1:c.1246C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13562,8 +12881,7 @@ def test_variant268(self): assert 'NM_001330077.1:c.1261C>T' in list(results.keys()) assert results['NM_001330077.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330077.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330077.1:c.1261C>T']['alt_genomic_loci'] == [] - assert results['NM_001330077.1:c.1261C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha3, mRNA' + self.assertCountEqual(results['NM_001330077.1:c.1261C>T']['alt_genomic_loci'], []) assert results['NM_001330077.1:c.1261C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330077.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317006.1:p.(Pro421Ser)', 'slr': 'NP_001317006.1:p.(P421S)'} assert results['NM_001330077.1:c.1261C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13580,8 +12898,7 @@ def test_variant268(self): assert 'NM_001330086.1:c.1285C>T' in list(results.keys()) assert results['NM_001330086.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330086.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330086.1:c.1285C>T']['alt_genomic_loci'] == [] - assert results['NM_001330086.1:c.1285C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha9, mRNA' + self.assertCountEqual(results['NM_001330086.1:c.1285C>T']['alt_genomic_loci'], []) assert results['NM_001330086.1:c.1285C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330086.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317015.1:p.(Pro429Ser)', 'slr': 'NP_001317015.1:p.(P429S)'} assert results['NM_001330086.1:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13598,8 +12915,7 @@ def test_variant268(self): assert 'NM_001330088.1:c.1231C>T' in list(results.keys()) assert results['NM_001330088.1:c.1231C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330088.1:c.1231C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330088.1:c.1231C>T']['alt_genomic_loci'] == [] - assert results['NM_001330088.1:c.1231C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha11, mRNA' + self.assertCountEqual(results['NM_001330088.1:c.1231C>T']['alt_genomic_loci'], []) assert results['NM_001330088.1:c.1231C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330088.1:c.1231C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317017.1:p.(Pro411Ser)', 'slr': 'NP_001317017.1:p.(P411S)'} assert results['NM_001330088.1:c.1231C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13616,8 +12932,7 @@ def test_variant268(self): assert 'NM_001330093.1:c.1282C>T' in list(results.keys()) assert results['NM_001330093.1:c.1282C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330093.1:c.1282C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330093.1:c.1282C>T']['alt_genomic_loci'] == [] - assert results['NM_001330093.1:c.1282C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha12, mRNA' + self.assertCountEqual(results['NM_001330093.1:c.1282C>T']['alt_genomic_loci'], []) assert results['NM_001330093.1:c.1282C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330093.1:c.1282C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317022.1:p.(Pro428Ser)', 'slr': 'NP_001317022.1:p.(P428S)'} assert results['NM_001330093.1:c.1282C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13634,8 +12949,7 @@ def test_variant268(self): assert 'NM_001330087.1:c.1201C>T' in list(results.keys()) assert results['NM_001330087.1:c.1201C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330087.1:c.1201C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330087.1:c.1201C>T']['alt_genomic_loci'] == [] - assert results['NM_001330087.1:c.1201C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha10, mRNA' + self.assertCountEqual(results['NM_001330087.1:c.1201C>T']['alt_genomic_loci'], []) assert results['NM_001330087.1:c.1201C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330087.1:c.1201C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317016.1:p.(Pro401Ser)', 'slr': 'NP_001317016.1:p.(P401S)'} assert results['NM_001330087.1:c.1201C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13652,8 +12966,7 @@ def test_variant268(self): assert 'NM_001330082.1:c.1261C>T' in list(results.keys()) assert results['NM_001330082.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330082.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330082.1:c.1261C>T']['alt_genomic_loci'] == [] - assert results['NM_001330082.1:c.1261C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha5, mRNA' + self.assertCountEqual(results['NM_001330082.1:c.1261C>T']['alt_genomic_loci'], []) assert results['NM_001330082.1:c.1261C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330082.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317011.1:p.(Pro421Ser)', 'slr': 'NP_001317011.1:p.(P421S)'} assert results['NM_001330082.1:c.1261C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13670,8 +12983,7 @@ def test_variant268(self): assert 'NM_001330078.1:c.1285C>T' in list(results.keys()) assert results['NM_001330078.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330078.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330078.1:c.1285C>T']['alt_genomic_loci'] == [] - assert results['NM_001330078.1:c.1285C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha4, mRNA' + self.assertCountEqual(results['NM_001330078.1:c.1285C>T']['alt_genomic_loci'], []) assert results['NM_001330078.1:c.1285C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330078.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317007.1:p.(Pro429Ser)', 'slr': 'NP_001317007.1:p.(P429S)'} assert results['NM_001330078.1:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13688,8 +13000,7 @@ def test_variant268(self): assert 'NM_001330094.1:c.1273C>T' in list(results.keys()) assert results['NM_001330094.1:c.1273C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330094.1:c.1273C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330094.1:c.1273C>T']['alt_genomic_loci'] == [] - assert results['NM_001330094.1:c.1273C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha13, mRNA' + self.assertCountEqual(results['NM_001330094.1:c.1273C>T']['alt_genomic_loci'], []) assert results['NM_001330094.1:c.1273C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330094.1:c.1273C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317023.1:p.(Pro425Ser)', 'slr': 'NP_001317023.1:p.(P425S)'} assert results['NM_001330094.1:c.1273C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13707,8 +13018,7 @@ def test_variant268(self): assert 'NM_001135659.2:c.1405C>T' in list(results.keys()) assert results['NM_001135659.2:c.1405C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135659.2:c.1405C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001135659.2:c.1405C>T']['alt_genomic_loci'] == [] - assert results['NM_001135659.2:c.1405C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA' + self.assertCountEqual(results['NM_001135659.2:c.1405C>T']['alt_genomic_loci'], []) assert results['NM_001135659.2:c.1405C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001135659.2:c.1405C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro469Ser)', 'slr': 'NP_001129131.1:p.(P469S)'} assert results['NM_001135659.2:c.1405C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13725,8 +13035,7 @@ def test_variant268(self): assert 'NM_001330083.1:c.1246C>T' in list(results.keys()) assert results['NM_001330083.1:c.1246C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330083.1:c.1246C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330083.1:c.1246C>T']['alt_genomic_loci'] == [] - assert results['NM_001330083.1:c.1246C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha6, mRNA' + self.assertCountEqual(results['NM_001330083.1:c.1246C>T']['alt_genomic_loci'], []) assert results['NM_001330083.1:c.1246C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330083.1:c.1246C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317012.1:p.(Pro416Ser)', 'slr': 'NP_001317012.1:p.(P416S)'} assert results['NM_001330083.1:c.1246C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13743,8 +13052,7 @@ def test_variant268(self): assert 'NM_004801.5:c.1285C>T' in list(results.keys()) assert results['NM_004801.5:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004801.5:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004801.5:c.1285C>T']['alt_genomic_loci'] == [] - assert results['NM_004801.5:c.1285C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA' + self.assertCountEqual(results['NM_004801.5:c.1285C>T']['alt_genomic_loci'], []) assert results['NM_004801.5:c.1285C>T']['gene_symbol'] == 'NRXN1' assert results['NM_004801.5:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro429Ser)', 'slr': 'NP_004792.1:p.(P429S)'} assert results['NM_004801.5:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13761,8 +13069,7 @@ def test_variant268(self): assert 'NM_001330085.1:c.1285C>T' in list(results.keys()) assert results['NM_001330085.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330085.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330085.1:c.1285C>T']['alt_genomic_loci'] == [] - assert results['NM_001330085.1:c.1285C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha8, mRNA' + self.assertCountEqual(results['NM_001330085.1:c.1285C>T']['alt_genomic_loci'], []) assert results['NM_001330085.1:c.1285C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330085.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317014.1:p.(Pro429Ser)', 'slr': 'NP_001317014.1:p.(P429S)'} assert results['NM_001330085.1:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13779,8 +13086,7 @@ def test_variant268(self): assert 'NM_001330095.1:c.1261C>T' in list(results.keys()) assert results['NM_001330095.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330095.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330095.1:c.1261C>T']['alt_genomic_loci'] == [] - assert results['NM_001330095.1:c.1261C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha14, mRNA' + self.assertCountEqual(results['NM_001330095.1:c.1261C>T']['alt_genomic_loci'], []) assert results['NM_001330095.1:c.1261C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001330095.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317024.1:p.(Pro421Ser)', 'slr': 'NP_001317024.1:p.(P421S)'} assert results['NM_001330095.1:c.1261C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13797,8 +13103,7 @@ def test_variant268(self): assert 'NM_004801.4:c.1285C>T' in list(results.keys()) assert results['NM_004801.4:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004801.4:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004801.4:c.1285C>T']['alt_genomic_loci'] == [] - assert results['NM_004801.4:c.1285C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha1, mRNA' + self.assertCountEqual(results['NM_004801.4:c.1285C>T']['alt_genomic_loci'], []) assert results['NM_004801.4:c.1285C>T']['gene_symbol'] == 'NRXN1' assert results['NM_004801.4:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro429Ser)', 'slr': 'NP_004792.1:p.(P429S)'} assert results['NM_004801.4:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13815,8 +13120,7 @@ def test_variant268(self): assert 'NM_001135659.1:c.1405C>T' in list(results.keys()) assert results['NM_001135659.1:c.1405C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001135659.1:c.1405C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001135659.1:c.1405C>T']['alt_genomic_loci'] == [] - assert results['NM_001135659.1:c.1405C>T']['transcript_description'] == 'Homo sapiens neurexin 1 (NRXN1), transcript variant alpha2, mRNA' + self.assertCountEqual(results['NM_001135659.1:c.1405C>T']['alt_genomic_loci'], []) assert results['NM_001135659.1:c.1405C>T']['gene_symbol'] == 'NRXN1' assert results['NM_001135659.1:c.1405C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro469Ser)', 'slr': 'NP_001129131.1:p.(P469S)'} assert results['NM_001135659.1:c.1405C>T']['submitted_variant'] == '2-50847195-G-A' @@ -13839,8 +13143,7 @@ def test_variant269(self): assert 'NM_001130976.1:c.3582C>G' in list(results.keys()) assert results['NM_001130976.1:c.3582C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130976.1:c.3582C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130976.1:c.3582C>G']['alt_genomic_loci'] == [] - assert results['NM_001130976.1:c.3582C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 9, mRNA' + self.assertCountEqual(results['NM_001130976.1:c.3582C>G']['alt_genomic_loci'], []) assert results['NM_001130976.1:c.3582C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130976.1:c.3582C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124448.1:p.(Ile1194Met)', 'slr': 'NP_001124448.1:p.(I1194M)'} assert results['NM_001130976.1:c.3582C>G']['submitted_variant'] == '2-71825797-C-G' @@ -13857,8 +13160,7 @@ def test_variant269(self): assert 'NM_001130981.1:c.3675C>G' in list(results.keys()) assert results['NM_001130981.1:c.3675C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130981.1:c.3675C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130981.1:c.3675C>G']['alt_genomic_loci'] == [] - assert results['NM_001130981.1:c.3675C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 14, mRNA' + self.assertCountEqual(results['NM_001130981.1:c.3675C>G']['alt_genomic_loci'], []) assert results['NM_001130981.1:c.3675C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130981.1:c.3675C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124453.1:p.(Ile1225Met)', 'slr': 'NP_001124453.1:p.(I1225M)'} assert results['NM_001130981.1:c.3675C>G']['submitted_variant'] == '2-71825797-C-G' @@ -13875,8 +13177,7 @@ def test_variant269(self): assert 'NM_001130979.1:c.3717C>G' in list(results.keys()) assert results['NM_001130979.1:c.3717C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130979.1:c.3717C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130979.1:c.3717C>G']['alt_genomic_loci'] == [] - assert results['NM_001130979.1:c.3717C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 12, mRNA' + self.assertCountEqual(results['NM_001130979.1:c.3717C>G']['alt_genomic_loci'], []) assert results['NM_001130979.1:c.3717C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130979.1:c.3717C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124451.1:p.(Ile1239Met)', 'slr': 'NP_001124451.1:p.(I1239M)'} assert results['NM_001130979.1:c.3717C>G']['submitted_variant'] == '2-71825797-C-G' @@ -13893,8 +13194,7 @@ def test_variant269(self): assert 'NM_001130985.1:c.3678C>G' in list(results.keys()) assert results['NM_001130985.1:c.3678C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130985.1:c.3678C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130985.1:c.3678C>G']['alt_genomic_loci'] == [] - assert results['NM_001130985.1:c.3678C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001130985.1:c.3678C>G']['alt_genomic_loci'], []) assert results['NM_001130985.1:c.3678C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130985.1:c.3678C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124457.1:p.(Ile1226Met)', 'slr': 'NP_001124457.1:p.(I1226M)'} assert results['NM_001130985.1:c.3678C>G']['submitted_variant'] == '2-71825797-C-G' @@ -13911,8 +13211,7 @@ def test_variant269(self): assert 'NM_001130987.1:c.3678C>G' in list(results.keys()) assert results['NM_001130987.1:c.3678C>G']['hgvs_lrg_transcript_variant'] == 'LRG_845t2:c.3678C>G' assert results['NM_001130987.1:c.3678C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130987.1:c.3678C>G']['alt_genomic_loci'] == [] - assert results['NM_001130987.1:c.3678C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001130987.1:c.3678C>G']['alt_genomic_loci'], []) assert results['NM_001130987.1:c.3678C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130987.1:c.3678C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124459.1:p.(Ile1226Met)', 'slr': 'NP_001124459.1:p.(I1226M)'} assert results['NM_001130987.1:c.3678C>G']['submitted_variant'] == '2-71825797-C-G' @@ -13929,8 +13228,7 @@ def test_variant269(self): assert 'NM_001130983.1:c.3627C>G' in list(results.keys()) assert results['NM_001130983.1:c.3627C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130983.1:c.3627C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130983.1:c.3627C>G']['alt_genomic_loci'] == [] - assert results['NM_001130983.1:c.3627C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001130983.1:c.3627C>G']['alt_genomic_loci'], []) assert results['NM_001130983.1:c.3627C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130983.1:c.3627C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124455.1:p.(Ile1209Met)', 'slr': 'NP_001124455.1:p.(I1209M)'} assert results['NM_001130983.1:c.3627C>G']['submitted_variant'] == '2-71825797-C-G' @@ -13948,8 +13246,7 @@ def test_variant269(self): assert 'NM_001130980.1:c.3675C>G' in list(results.keys()) assert results['NM_001130980.1:c.3675C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130980.1:c.3675C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130980.1:c.3675C>G']['alt_genomic_loci'] == [] - assert results['NM_001130980.1:c.3675C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 13, mRNA' + self.assertCountEqual(results['NM_001130980.1:c.3675C>G']['alt_genomic_loci'], []) assert results['NM_001130980.1:c.3675C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130980.1:c.3675C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124452.1:p.(Ile1225Met)', 'slr': 'NP_001124452.1:p.(I1225M)'} assert results['NM_001130980.1:c.3675C>G']['submitted_variant'] == '2-71825797-C-G' @@ -13966,8 +13263,7 @@ def test_variant269(self): assert 'NM_003494.3:c.3624C>G' in list(results.keys()) assert results['NM_003494.3:c.3624C>G']['hgvs_lrg_transcript_variant'] == 'LRG_845t1:c.3624C>G' assert results['NM_003494.3:c.3624C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003494.3:c.3624C>G']['alt_genomic_loci'] == [] - assert results['NM_003494.3:c.3624C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_003494.3:c.3624C>G']['alt_genomic_loci'], []) assert results['NM_003494.3:c.3624C>G']['gene_symbol'] == 'DYSF' assert results['NM_003494.3:c.3624C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003485.1(LRG_845p1):p.(Ile1208Met)', 'slr': 'NP_003485.1:p.(I1208M)'} assert results['NM_003494.3:c.3624C>G']['submitted_variant'] == '2-71825797-C-G' @@ -13984,8 +13280,7 @@ def test_variant269(self): assert 'NM_001130984.1:c.3585C>G' in list(results.keys()) assert results['NM_001130984.1:c.3585C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130984.1:c.3585C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130984.1:c.3585C>G']['alt_genomic_loci'] == [] - assert results['NM_001130984.1:c.3585C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001130984.1:c.3585C>G']['alt_genomic_loci'], []) assert results['NM_001130984.1:c.3585C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130984.1:c.3585C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124456.1:p.(Ile1195Met)', 'slr': 'NP_001124456.1:p.(I1195M)'} assert results['NM_001130984.1:c.3585C>G']['submitted_variant'] == '2-71825797-C-G' @@ -14002,8 +13297,7 @@ def test_variant269(self): assert 'NM_001130977.1:c.3582C>G' in list(results.keys()) assert results['NM_001130977.1:c.3582C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130977.1:c.3582C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130977.1:c.3582C>G']['alt_genomic_loci'] == [] - assert results['NM_001130977.1:c.3582C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 10, mRNA' + self.assertCountEqual(results['NM_001130977.1:c.3582C>G']['alt_genomic_loci'], []) assert results['NM_001130977.1:c.3582C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130977.1:c.3582C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124449.1:p.(Ile1194Met)', 'slr': 'NP_001124449.1:p.(I1194M)'} assert results['NM_001130977.1:c.3582C>G']['submitted_variant'] == '2-71825797-C-G' @@ -14020,8 +13314,7 @@ def test_variant269(self): assert 'NM_001130455.1:c.3627C>G' in list(results.keys()) assert results['NM_001130455.1:c.3627C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130455.1:c.3627C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130455.1:c.3627C>G']['alt_genomic_loci'] == [] - assert results['NM_001130455.1:c.3627C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001130455.1:c.3627C>G']['alt_genomic_loci'], []) assert results['NM_001130455.1:c.3627C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130455.1:c.3627C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001123927.1:p.(Ile1209Met)', 'slr': 'NP_001123927.1:p.(I1209M)'} assert results['NM_001130455.1:c.3627C>G']['submitted_variant'] == '2-71825797-C-G' @@ -14038,8 +13331,7 @@ def test_variant269(self): assert 'NM_001130978.1:c.3624C>G' in list(results.keys()) assert results['NM_001130978.1:c.3624C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130978.1:c.3624C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130978.1:c.3624C>G']['alt_genomic_loci'] == [] - assert results['NM_001130978.1:c.3624C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 11, mRNA' + self.assertCountEqual(results['NM_001130978.1:c.3624C>G']['alt_genomic_loci'], []) assert results['NM_001130978.1:c.3624C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130978.1:c.3624C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124450.1:p.(Ile1208Met)', 'slr': 'NP_001124450.1:p.(I1208M)'} assert results['NM_001130978.1:c.3624C>G']['submitted_variant'] == '2-71825797-C-G' @@ -14056,8 +13348,7 @@ def test_variant269(self): assert 'NM_001130982.1:c.3720C>G' in list(results.keys()) assert results['NM_001130982.1:c.3720C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130982.1:c.3720C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130982.1:c.3720C>G']['alt_genomic_loci'] == [] - assert results['NM_001130982.1:c.3720C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001130982.1:c.3720C>G']['alt_genomic_loci'], []) assert results['NM_001130982.1:c.3720C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130982.1:c.3720C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124454.1:p.(Ile1240Met)', 'slr': 'NP_001124454.1:p.(I1240M)'} assert results['NM_001130982.1:c.3720C>G']['submitted_variant'] == '2-71825797-C-G' @@ -14074,8 +13365,7 @@ def test_variant269(self): assert 'NM_001130986.1:c.3585C>G' in list(results.keys()) assert results['NM_001130986.1:c.3585C>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001130986.1:c.3585C>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001130986.1:c.3585C>G']['alt_genomic_loci'] == [] - assert results['NM_001130986.1:c.3585C>G']['transcript_description'] == 'Homo sapiens dysferlin (DYSF), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001130986.1:c.3585C>G']['alt_genomic_loci'], []) assert results['NM_001130986.1:c.3585C>G']['gene_symbol'] == 'DYSF' assert results['NM_001130986.1:c.3585C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124458.1:p.(Ile1195Met)', 'slr': 'NP_001124458.1:p.(I1195M)'} assert results['NM_001130986.1:c.3585C>G']['submitted_variant'] == '2-71825797-C-G' @@ -14098,8 +13388,7 @@ def test_variant270(self): assert 'NM_021007.2:c.1718G>C' in list(results.keys()) assert results['NM_021007.2:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021007.2:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021007.2:c.1718G>C']['alt_genomic_loci'] == [] - assert results['NM_021007.2:c.1718G>C']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_021007.2:c.1718G>C']['alt_genomic_loci'], []) assert results['NM_021007.2:c.1718G>C']['gene_symbol'] == 'SCN2A' assert results['NM_021007.2:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066287.2:p.(Ser573Thr)', 'slr': 'NP_066287.2:p.(S573T)'} assert results['NM_021007.2:c.1718G>C']['submitted_variant'] == '2-166179712-G-C' @@ -14117,8 +13406,7 @@ def test_variant270(self): assert 'NM_001040143.1:c.1718G>C' in list(results.keys()) assert results['NM_001040143.1:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040143.1:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001040143.1:c.1718G>C']['alt_genomic_loci'] == [] - assert results['NM_001040143.1:c.1718G>C']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001040143.1:c.1718G>C']['alt_genomic_loci'], []) assert results['NM_001040143.1:c.1718G>C']['gene_symbol'] == 'SCN2A' assert results['NM_001040143.1:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035233.1:p.(Ser573Thr)', 'slr': 'NP_001035233.1:p.(S573T)'} assert results['NM_001040143.1:c.1718G>C']['submitted_variant'] == '2-166179712-G-C' @@ -14135,8 +13423,7 @@ def test_variant270(self): assert 'NM_001040142.1:c.1718G>C' in list(results.keys()) assert results['NM_001040142.1:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040142.1:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001040142.1:c.1718G>C']['alt_genomic_loci'] == [] - assert results['NM_001040142.1:c.1718G>C']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001040142.1:c.1718G>C']['alt_genomic_loci'], []) assert results['NM_001040142.1:c.1718G>C']['gene_symbol'] == 'SCN2A' assert results['NM_001040142.1:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035232.1:p.(Ser573Thr)', 'slr': 'NP_001035232.1:p.(S573T)'} assert results['NM_001040142.1:c.1718G>C']['submitted_variant'] == '2-166179712-G-C' @@ -14159,8 +13446,7 @@ def test_variant271(self): assert 'NM_021007.2:c.2026A>G' in list(results.keys()) assert results['NM_021007.2:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021007.2:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021007.2:c.2026A>G']['alt_genomic_loci'] == [] - assert results['NM_021007.2:c.2026A>G']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_021007.2:c.2026A>G']['alt_genomic_loci'], []) assert results['NM_021007.2:c.2026A>G']['gene_symbol'] == 'SCN2A' assert results['NM_021007.2:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066287.2:p.(Thr676Ala)', 'slr': 'NP_066287.2:p.(T676A)'} assert results['NM_021007.2:c.2026A>G']['submitted_variant'] == '2-166183371-A-G' @@ -14178,8 +13464,7 @@ def test_variant271(self): assert 'NM_001040143.1:c.2026A>G' in list(results.keys()) assert results['NM_001040143.1:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040143.1:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001040143.1:c.2026A>G']['alt_genomic_loci'] == [] - assert results['NM_001040143.1:c.2026A>G']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001040143.1:c.2026A>G']['alt_genomic_loci'], []) assert results['NM_001040143.1:c.2026A>G']['gene_symbol'] == 'SCN2A' assert results['NM_001040143.1:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035233.1:p.(Thr676Ala)', 'slr': 'NP_001035233.1:p.(T676A)'} assert results['NM_001040143.1:c.2026A>G']['submitted_variant'] == '2-166183371-A-G' @@ -14196,8 +13481,7 @@ def test_variant271(self): assert 'NM_001040142.1:c.2026A>G' in list(results.keys()) assert results['NM_001040142.1:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001040142.1:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001040142.1:c.2026A>G']['alt_genomic_loci'] == [] - assert results['NM_001040142.1:c.2026A>G']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 2 (SCN2A), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001040142.1:c.2026A>G']['alt_genomic_loci'], []) assert results['NM_001040142.1:c.2026A>G']['gene_symbol'] == 'SCN2A' assert results['NM_001040142.1:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035232.1:p.(Thr676Ala)', 'slr': 'NP_001035232.1:p.(T676A)'} assert results['NM_001040142.1:c.2026A>G']['submitted_variant'] == '2-166183371-A-G' @@ -14220,8 +13504,7 @@ def test_variant272(self): assert 'NM_001353951.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353951.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353951.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001353951.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_001353951.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001353951.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340880.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340880.1:p.(E78Gfs*7)'} assert results['NM_001353951.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14238,8 +13521,7 @@ def test_variant272(self): assert 'NM_001353958.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353958.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353958.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001353958.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 13, mRNA' + self.assertCountEqual(results['NM_001353958.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001353958.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340887.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340887.1:p.(E78Gfs*7)'} assert results['NM_001353958.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14256,8 +13538,7 @@ def test_variant272(self): assert 'NM_001202435.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001202435.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001202435.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001202435.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001202435.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001202435.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} assert results['NM_001202435.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14274,8 +13555,7 @@ def test_variant272(self): assert 'NR_148667.1:n.638_647delinsGT' in list(results.keys()) assert results['NR_148667.1:n.638_647delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NR_148667.1:n.638_647delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_148667.1:n.638_647delinsGT']['alt_genomic_loci'] == [] - assert results['NR_148667.1:n.638_647delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 16, non-coding RNA' + self.assertCountEqual(results['NR_148667.1:n.638_647delinsGT']['alt_genomic_loci'], []) assert results['NR_148667.1:n.638_647delinsGT']['gene_symbol'] == 'SCN1A' assert results['NR_148667.1:n.638_647delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_148667.1:n.638_647delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14292,8 +13572,7 @@ def test_variant272(self): assert 'NM_001165964.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165964.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001165964.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001165964.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001165964.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001165964.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} assert results['NM_001165964.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14310,8 +13589,7 @@ def test_variant272(self): assert 'NM_001202435.2:c.233_242delinsGT' in list(results.keys()) assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001202435.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001202435.2:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001202435.2:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001202435.2:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001202435.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} assert results['NM_001202435.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14328,8 +13606,7 @@ def test_variant272(self): assert 'NM_006920.5:c.233_242delinsGT' in list(results.keys()) assert results['NM_006920.5:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006920.5:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_006920.5:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_006920.5:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_006920.5:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_006920.5:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_006920.5:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} assert results['NM_006920.5:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14346,8 +13623,7 @@ def test_variant272(self): assert 'NM_001165963.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165963.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001165963.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001165963.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001165963.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001165963.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} assert results['NM_001165963.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14364,8 +13640,7 @@ def test_variant272(self): assert 'NM_001353955.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353955.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353955.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001353955.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 11, mRNA' + self.assertCountEqual(results['NM_001353955.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001353955.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340884.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340884.1:p.(E78Gfs*7)'} assert results['NM_001353955.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14382,8 +13657,7 @@ def test_variant272(self): assert 'NM_001353961.1:c.-2193_-2184delinsGT' in list(results.keys()) assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353961.1:c.-2193_-2184delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 15, mRNA' + self.assertCountEqual(results['NM_001353961.1:c.-2193_-2184delinsGT']['alt_genomic_loci'], []) assert results['NM_001353961.1:c.-2193_-2184delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340890.1:p.?', 'slr': 'NP_001340890.1:p.?'} assert results['NM_001353961.1:c.-2193_-2184delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14400,8 +13674,7 @@ def test_variant272(self): assert 'NM_001165963.2:c.233_242delinsGT' in list(results.keys()) assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165963.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001165963.2:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001165963.2:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001165963.2:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001165963.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} assert results['NM_001165963.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14418,8 +13691,7 @@ def test_variant272(self): assert 'NM_001353950.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353950.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353950.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001353950.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001353950.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001353950.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340879.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340879.1:p.(E78Gfs*7)'} assert results['NM_001353950.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14437,8 +13709,7 @@ def test_variant272(self): assert 'NM_001353948.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353948.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353948.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001353948.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001353948.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001353948.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340877.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340877.1:p.(E78Gfs*7)'} assert results['NM_001353948.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14455,8 +13726,7 @@ def test_variant272(self): assert 'NM_001353949.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353949.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353949.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001353949.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001353949.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001353949.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340878.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340878.1:p.(E78Gfs*7)'} assert results['NM_001353949.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14473,8 +13743,7 @@ def test_variant272(self): assert 'NM_001353957.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353957.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353957.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001353957.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 12, mRNA' + self.assertCountEqual(results['NM_001353957.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001353957.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340886.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340886.1:p.(E78Gfs*7)'} assert results['NM_001353957.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14491,8 +13760,7 @@ def test_variant272(self): assert 'NM_001353952.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353952.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353952.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001353952.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 9, mRNA' + self.assertCountEqual(results['NM_001353952.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001353952.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340881.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340881.1:p.(E78Gfs*7)'} assert results['NM_001353952.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14509,8 +13777,7 @@ def test_variant272(self): assert 'NM_001353954.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353954.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353954.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001353954.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 10, mRNA' + self.assertCountEqual(results['NM_001353954.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001353954.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340883.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340883.1:p.(E78Gfs*7)'} assert results['NM_001353954.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14527,8 +13794,7 @@ def test_variant272(self): assert 'NM_006920.4:c.233_242delinsGT' in list(results.keys()) assert results['NM_006920.4:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == 'LRG_8t1:c.233_242delinsGT' assert results['NM_006920.4:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_006920.4:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_006920.4:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_006920.4:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_006920.4:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_006920.4:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} assert results['NM_006920.4:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14545,8 +13811,7 @@ def test_variant272(self): assert 'NM_001353960.1:c.233_242delinsGT' in list(results.keys()) assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353960.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353960.1:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001353960.1:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 14, mRNA' + self.assertCountEqual(results['NM_001353960.1:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001353960.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340889.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340889.1:p.(E78Gfs*7)'} assert results['NM_001353960.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14563,8 +13828,7 @@ def test_variant272(self): assert 'NM_001165964.2:c.233_242delinsGT' in list(results.keys()) assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165964.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001165964.2:c.233_242delinsGT']['alt_genomic_loci'] == [] - assert results['NM_001165964.2:c.233_242delinsGT']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001165964.2:c.233_242delinsGT']['alt_genomic_loci'], []) assert results['NM_001165964.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} assert results['NM_001165964.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' @@ -14587,8 +13851,7 @@ def test_variant273(self): assert 'NR_148667.1:n.638_645del' in list(results.keys()) assert results['NR_148667.1:n.638_645del']['hgvs_lrg_transcript_variant'] == '' assert results['NR_148667.1:n.638_645del']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_148667.1:n.638_645del']['alt_genomic_loci'] == [] - assert results['NR_148667.1:n.638_645del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 16, non-coding RNA' + self.assertCountEqual(results['NR_148667.1:n.638_645del']['alt_genomic_loci'], []) assert results['NR_148667.1:n.638_645del']['gene_symbol'] == 'SCN1A' assert results['NR_148667.1:n.638_645del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_148667.1:n.638_645del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14605,8 +13868,7 @@ def test_variant273(self): assert 'NM_001165964.2:c.233_240del' in list(results.keys()) assert results['NM_001165964.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165964.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001165964.2:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001165964.2:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001165964.2:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001165964.2:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001165964.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} assert results['NM_001165964.2:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14623,8 +13885,7 @@ def test_variant273(self): assert 'NM_001353951.1:c.233_240del' in list(results.keys()) assert results['NM_001353951.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353951.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353951.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001353951.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_001353951.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001353951.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001353951.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340880.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340880.1:p.(E78Gfs*7)'} assert results['NM_001353951.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14641,8 +13902,7 @@ def test_variant273(self): assert 'NM_001353954.1:c.233_240del' in list(results.keys()) assert results['NM_001353954.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353954.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353954.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001353954.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 10, mRNA' + self.assertCountEqual(results['NM_001353954.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001353954.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001353954.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340883.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340883.1:p.(E78Gfs*7)'} assert results['NM_001353954.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14659,8 +13919,7 @@ def test_variant273(self): assert 'NM_001353961.1:c.-2193_-2186del' in list(results.keys()) assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353961.1:c.-2193_-2186del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353961.1:c.-2193_-2186del']['alt_genomic_loci'] == [] - assert results['NM_001353961.1:c.-2193_-2186del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 15, mRNA' + self.assertCountEqual(results['NM_001353961.1:c.-2193_-2186del']['alt_genomic_loci'], []) assert results['NM_001353961.1:c.-2193_-2186del']['gene_symbol'] == 'SCN1A' assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340890.1:p.?', 'slr': 'NP_001340890.1:p.?'} assert results['NM_001353961.1:c.-2193_-2186del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14677,8 +13936,7 @@ def test_variant273(self): assert 'NM_001353948.1:c.233_240del' in list(results.keys()) assert results['NM_001353948.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353948.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353948.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001353948.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001353948.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001353948.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001353948.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340877.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340877.1:p.(E78Gfs*7)'} assert results['NM_001353948.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14695,8 +13953,7 @@ def test_variant273(self): assert 'NM_001353960.1:c.233_240del' in list(results.keys()) assert results['NM_001353960.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353960.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353960.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001353960.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 14, mRNA' + self.assertCountEqual(results['NM_001353960.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001353960.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001353960.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340889.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340889.1:p.(E78Gfs*7)'} assert results['NM_001353960.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14713,8 +13970,7 @@ def test_variant273(self): assert 'NM_001202435.1:c.233_240del' in list(results.keys()) assert results['NM_001202435.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001202435.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001202435.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001202435.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001202435.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001202435.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001202435.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} assert results['NM_001202435.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14731,8 +13987,7 @@ def test_variant273(self): assert 'NM_001202435.2:c.233_240del' in list(results.keys()) assert results['NM_001202435.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001202435.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001202435.2:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001202435.2:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001202435.2:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001202435.2:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001202435.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} assert results['NM_001202435.2:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14749,8 +14004,7 @@ def test_variant273(self): assert 'NM_006920.5:c.233_240del' in list(results.keys()) assert results['NM_006920.5:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006920.5:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_006920.5:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_006920.5:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_006920.5:c.233_240del']['alt_genomic_loci'], []) assert results['NM_006920.5:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_006920.5:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} assert results['NM_006920.5:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14767,8 +14021,7 @@ def test_variant273(self): assert 'NM_001353955.1:c.233_240del' in list(results.keys()) assert results['NM_001353955.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353955.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353955.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001353955.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 11, mRNA' + self.assertCountEqual(results['NM_001353955.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001353955.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001353955.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340884.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340884.1:p.(E78Gfs*7)'} assert results['NM_001353955.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14785,8 +14038,7 @@ def test_variant273(self): assert 'NM_001353952.1:c.233_240del' in list(results.keys()) assert results['NM_001353952.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353952.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353952.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001353952.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 9, mRNA' + self.assertCountEqual(results['NM_001353952.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001353952.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001353952.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340881.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340881.1:p.(E78Gfs*7)'} assert results['NM_001353952.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14803,8 +14055,7 @@ def test_variant273(self): assert 'NM_001353957.1:c.233_240del' in list(results.keys()) assert results['NM_001353957.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353957.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353957.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001353957.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 12, mRNA' + self.assertCountEqual(results['NM_001353957.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001353957.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001353957.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340886.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340886.1:p.(E78Gfs*7)'} assert results['NM_001353957.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14822,8 +14073,7 @@ def test_variant273(self): assert 'NM_006920.4:c.233_240del' in list(results.keys()) assert results['NM_006920.4:c.233_240del']['hgvs_lrg_transcript_variant'] == 'LRG_8t1:c.233_240del' assert results['NM_006920.4:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_006920.4:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_006920.4:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_006920.4:c.233_240del']['alt_genomic_loci'], []) assert results['NM_006920.4:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_006920.4:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} assert results['NM_006920.4:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14840,8 +14090,7 @@ def test_variant273(self): assert 'NM_001353950.1:c.233_240del' in list(results.keys()) assert results['NM_001353950.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353950.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353950.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001353950.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001353950.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001353950.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001353950.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340879.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340879.1:p.(E78Gfs*7)'} assert results['NM_001353950.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14858,8 +14107,7 @@ def test_variant273(self): assert 'NM_001165963.2:c.233_240del' in list(results.keys()) assert results['NM_001165963.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165963.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001165963.2:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001165963.2:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001165963.2:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001165963.2:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001165963.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} assert results['NM_001165963.2:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14876,8 +14124,7 @@ def test_variant273(self): assert 'NM_001165963.1:c.233_240del' in list(results.keys()) assert results['NM_001165963.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165963.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001165963.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001165963.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001165963.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001165963.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001165963.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} assert results['NM_001165963.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14894,8 +14141,7 @@ def test_variant273(self): assert 'NM_001165964.1:c.233_240del' in list(results.keys()) assert results['NM_001165964.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001165964.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001165964.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001165964.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001165964.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001165964.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001165964.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} assert results['NM_001165964.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14912,8 +14158,7 @@ def test_variant273(self): assert 'NM_001353958.1:c.233_240del' in list(results.keys()) assert results['NM_001353958.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353958.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353958.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001353958.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 13, mRNA' + self.assertCountEqual(results['NM_001353958.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001353958.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001353958.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340887.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340887.1:p.(E78Gfs*7)'} assert results['NM_001353958.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14930,8 +14175,7 @@ def test_variant273(self): assert 'NM_001353949.1:c.233_240del' in list(results.keys()) assert results['NM_001353949.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001353949.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001353949.1:c.233_240del']['alt_genomic_loci'] == [] - assert results['NM_001353949.1:c.233_240del']['transcript_description'] == 'Homo sapiens sodium voltage-gated channel alpha subunit 1 (SCN1A), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001353949.1:c.233_240del']['alt_genomic_loci'], []) assert results['NM_001353949.1:c.233_240del']['gene_symbol'] == 'SCN1A' assert results['NM_001353949.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340878.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340878.1:p.(E78Gfs*7)'} assert results['NM_001353949.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' @@ -14954,8 +14198,7 @@ def test_variant274(self): assert 'NM_001256850.1:c.102051C>A' in list(results.keys()) assert results['NM_001256850.1:c.102051C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001256850.1:c.102051C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001256850.1:c.102051C>A']['alt_genomic_loci'] == [] - assert results['NM_001256850.1:c.102051C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant N2BA, mRNA' + self.assertCountEqual(results['NM_001256850.1:c.102051C>A']['alt_genomic_loci'], []) assert results['NM_001256850.1:c.102051C>A']['gene_symbol'] == 'TTN' assert results['NM_001256850.1:c.102051C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243779.1:p.(Ser34017Arg)', 'slr': 'NP_001243779.1:p.(S34017R)'} assert results['NM_001256850.1:c.102051C>A']['submitted_variant'] == '2-179393504-G-T' @@ -14972,8 +14215,7 @@ def test_variant274(self): assert 'NR_038271.1:n.446+5141G>T' in list(results.keys()) assert results['NR_038271.1:n.446+5141G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_038271.1:n.446+5141G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_038271.1:n.446+5141G>T']['alt_genomic_loci'] == [] - assert results['NR_038271.1:n.446+5141G>T']['transcript_description'] == 'Homo sapiens TTN antisense RNA 1 (TTN-AS1), transcript variant 2, long non-coding RNA' + self.assertCountEqual(results['NR_038271.1:n.446+5141G>T']['alt_genomic_loci'], []) assert results['NR_038271.1:n.446+5141G>T']['gene_symbol'] == 'TTN-AS1' assert results['NR_038271.1:n.446+5141G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_038271.1:n.446+5141G>T']['submitted_variant'] == '2-179393504-G-T' @@ -14990,8 +14232,7 @@ def test_variant274(self): assert 'NM_133432.3:c.80154C>A' in list(results.keys()) assert results['NM_133432.3:c.80154C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133432.3:c.80154C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_133432.3:c.80154C>A']['alt_genomic_loci'] == [] - assert results['NM_133432.3:c.80154C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant novex-1, mRNA' + self.assertCountEqual(results['NM_133432.3:c.80154C>A']['alt_genomic_loci'], []) assert results['NM_133432.3:c.80154C>A']['gene_symbol'] == 'TTN' assert results['NM_133432.3:c.80154C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597676.3:p.(Ser26718Arg)', 'slr': 'NP_597676.3:p.(S26718R)'} assert results['NM_133432.3:c.80154C>A']['submitted_variant'] == '2-179393504-G-T' @@ -15008,8 +14249,7 @@ def test_variant274(self): assert 'NM_001267550.1:c.106974C>A' in list(results.keys()) assert results['NM_001267550.1:c.106974C>A']['hgvs_lrg_transcript_variant'] == 'LRG_391t1:c.106974C>A' assert results['NM_001267550.1:c.106974C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001267550.1:c.106974C>A']['alt_genomic_loci'] == [] - assert results['NM_001267550.1:c.106974C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant IC, mRNA' + self.assertCountEqual(results['NM_001267550.1:c.106974C>A']['alt_genomic_loci'], []) assert results['NM_001267550.1:c.106974C>A']['gene_symbol'] == 'TTN' assert results['NM_001267550.1:c.106974C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001254479.1:p.(Ser35658Arg)', 'slr': 'NP_001254479.1:p.(S35658R)'} assert results['NM_001267550.1:c.106974C>A']['submitted_variant'] == '2-179393504-G-T' @@ -15026,8 +14266,7 @@ def test_variant274(self): assert 'NR_038272.1:n.219+5141G>T' in list(results.keys()) assert results['NR_038272.1:n.219+5141G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_038272.1:n.219+5141G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_038272.1:n.219+5141G>T']['alt_genomic_loci'] == [] - assert results['NR_038272.1:n.219+5141G>T']['transcript_description'] == 'Homo sapiens TTN antisense RNA 1 (TTN-AS1), transcript variant 1, long non-coding RNA' + self.assertCountEqual(results['NR_038272.1:n.219+5141G>T']['alt_genomic_loci'], []) assert results['NR_038272.1:n.219+5141G>T']['gene_symbol'] == 'TTN-AS1' assert results['NR_038272.1:n.219+5141G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_038272.1:n.219+5141G>T']['submitted_variant'] == '2-179393504-G-T' @@ -15044,8 +14283,7 @@ def test_variant274(self): assert 'NM_133437.4:c.80355C>A' in list(results.keys()) assert results['NM_133437.4:c.80355C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133437.4:c.80355C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_133437.4:c.80355C>A']['alt_genomic_loci'] == [] - assert results['NM_133437.4:c.80355C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant novex-2, mRNA' + self.assertCountEqual(results['NM_133437.4:c.80355C>A']['alt_genomic_loci'], []) assert results['NM_133437.4:c.80355C>A']['gene_symbol'] == 'TTN' assert results['NM_133437.4:c.80355C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597681.4:p.(Ser26785Arg)', 'slr': 'NP_597681.4:p.(S26785R)'} assert results['NM_133437.4:c.80355C>A']['submitted_variant'] == '2-179393504-G-T' @@ -15063,8 +14301,7 @@ def test_variant274(self): assert 'NM_133378.4:c.99270C>A' in list(results.keys()) assert results['NM_133378.4:c.99270C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133378.4:c.99270C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_133378.4:c.99270C>A']['alt_genomic_loci'] == [] - assert results['NM_133378.4:c.99270C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant N2-A, mRNA' + self.assertCountEqual(results['NM_133378.4:c.99270C>A']['alt_genomic_loci'], []) assert results['NM_133378.4:c.99270C>A']['gene_symbol'] == 'TTN' assert results['NM_133378.4:c.99270C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_596869.4:p.(Ser33090Arg)', 'slr': 'NP_596869.4:p.(S33090R)'} assert results['NM_133378.4:c.99270C>A']['submitted_variant'] == '2-179393504-G-T' @@ -15081,8 +14318,7 @@ def test_variant274(self): assert 'NM_001267550.2:c.106974C>A' in list(results.keys()) assert results['NM_001267550.2:c.106974C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001267550.2:c.106974C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001267550.2:c.106974C>A']['alt_genomic_loci'] == [] - assert results['NM_001267550.2:c.106974C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant IC, mRNA' + self.assertCountEqual(results['NM_001267550.2:c.106974C>A']['alt_genomic_loci'], []) assert results['NM_001267550.2:c.106974C>A']['gene_symbol'] == 'TTN' assert results['NM_001267550.2:c.106974C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001254479.2:p.(Ser35658Arg)', 'slr': 'NP_001254479.2:p.(S35658R)'} assert results['NM_001267550.2:c.106974C>A']['submitted_variant'] == '2-179393504-G-T' @@ -15099,8 +14335,7 @@ def test_variant274(self): assert 'NM_133437.3:c.80355C>A' in list(results.keys()) assert results['NM_133437.3:c.80355C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133437.3:c.80355C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_133437.3:c.80355C>A']['alt_genomic_loci'] == [] - assert results['NM_133437.3:c.80355C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant novex-2, mRNA' + self.assertCountEqual(results['NM_133437.3:c.80355C>A']['alt_genomic_loci'], []) assert results['NM_133437.3:c.80355C>A']['gene_symbol'] == 'TTN' assert results['NM_133437.3:c.80355C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597681.3:p.(Ser26785Arg)', 'slr': 'NP_597681.3:p.(S26785R)'} assert results['NM_133437.3:c.80355C>A']['submitted_variant'] == '2-179393504-G-T' @@ -15117,8 +14352,7 @@ def test_variant274(self): assert 'NM_003319.4:c.79779C>A' in list(results.keys()) assert results['NM_003319.4:c.79779C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003319.4:c.79779C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003319.4:c.79779C>A']['alt_genomic_loci'] == [] - assert results['NM_003319.4:c.79779C>A']['transcript_description'] == 'Homo sapiens titin (TTN), transcript variant N2-B, mRNA' + self.assertCountEqual(results['NM_003319.4:c.79779C>A']['alt_genomic_loci'], []) assert results['NM_003319.4:c.79779C>A']['gene_symbol'] == 'TTN' assert results['NM_003319.4:c.79779C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003310.4:p.(Ser26593Arg)', 'slr': 'NP_003310.4:p.(S26593R)'} assert results['NM_003319.4:c.79779C>A']['submitted_variant'] == '2-179393504-G-T' @@ -15142,8 +14376,7 @@ def test_variant275(self): assert 'NM_194250.1:c.3324_3347del' in list(results.keys()) assert results['NM_194250.1:c.3324_3347del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_194250.1:c.3324_3347del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_194250.1:c.3324_3347del']['alt_genomic_loci'] == [] - assert results['NM_194250.1:c.3324_3347del']['transcript_description'] == 'Homo sapiens zinc finger protein 804A (ZNF804A), mRNA' + self.assertCountEqual(results['NM_194250.1:c.3324_3347del']['alt_genomic_loci'], []) assert results['NM_194250.1:c.3324_3347del']['gene_symbol'] == 'ZNF804A' assert results['NM_194250.1:c.3324_3347del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_919226.1:p.(Ala1112_Ala1119del)', 'slr': 'NP_919226.1:p.(A1112_A1119del)'} assert results['NM_194250.1:c.3324_3347del']['submitted_variant'] == '2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T' @@ -15167,8 +14400,7 @@ def test_variant276(self): assert 'NM_002491.2:c.208G>T' in list(results.keys()) assert results['NM_002491.2:c.208G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_002491.2:c.208G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_002491.2:c.208G>T']['alt_genomic_loci'] == [] - assert results['NM_002491.2:c.208G>T']['transcript_description'] == 'Homo sapiens NADH:ubiquinone oxidoreductase subunit B3 (NDUFB3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_002491.2:c.208G>T']['alt_genomic_loci'], []) assert results['NM_002491.2:c.208G>T']['gene_symbol'] == 'NDUFB3' assert results['NM_002491.2:c.208G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002482.1:p.(Gly70Ter)', 'slr': 'NP_002482.1:p.(G70*)'} assert results['NM_002491.2:c.208G>T']['submitted_variant'] == '2-201950249-G-T' @@ -15185,8 +14417,7 @@ def test_variant276(self): assert 'NM_001257102.1:c.208G>T' in list(results.keys()) assert results['NM_001257102.1:c.208G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257102.1:c.208G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001257102.1:c.208G>T']['alt_genomic_loci'] == [] - assert results['NM_001257102.1:c.208G>T']['transcript_description'] == 'Homo sapiens NADH:ubiquinone oxidoreductase subunit B3 (NDUFB3), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001257102.1:c.208G>T']['alt_genomic_loci'], []) assert results['NM_001257102.1:c.208G>T']['gene_symbol'] == 'NDUFB3' assert results['NM_001257102.1:c.208G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244031.1:p.(Gly70Ter)', 'slr': 'NP_001244031.1:p.(G70*)'} assert results['NM_001257102.1:c.208G>T']['submitted_variant'] == '2-201950249-G-T' @@ -15209,8 +14440,7 @@ def test_variant277(self): assert 'NM_004369.3:c.6282+1G>T' in list(results.keys()) assert results['NM_004369.3:c.6282+1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_473t1:c.6282+1G>T' assert results['NM_004369.3:c.6282+1G>T']['refseqgene_context_intronic_sequence'] == 'NG_008676.1(NM_004369.3):c.6282+1G>T' - assert results['NM_004369.3:c.6282+1G>T']['alt_genomic_loci'] == [] - assert results['NM_004369.3:c.6282+1G>T']['transcript_description'] == 'Homo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_004369.3:c.6282+1G>T']['alt_genomic_loci'], []) assert results['NM_004369.3:c.6282+1G>T']['gene_symbol'] == 'COL6A3' assert results['NM_004369.3:c.6282+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004360.2(LRG_473p1):p.?', 'slr': 'NP_004360.2:p.?'} assert results['NM_004369.3:c.6282+1G>T']['submitted_variant'] == '2-238268730-C-A' @@ -15228,8 +14458,7 @@ def test_variant277(self): assert 'NM_057166.4:c.4461+1G>T' in list(results.keys()) assert results['NM_057166.4:c.4461+1G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_057166.4:c.4461+1G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_057166.4:c.4461+1G>T']['alt_genomic_loci'] == [] - assert results['NM_057166.4:c.4461+1G>T']['transcript_description'] == 'Homo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_057166.4:c.4461+1G>T']['alt_genomic_loci'], []) assert results['NM_057166.4:c.4461+1G>T']['gene_symbol'] == 'COL6A3' assert results['NM_057166.4:c.4461+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_476507.3:p.?', 'slr': 'NP_476507.3:p.?'} assert results['NM_057166.4:c.4461+1G>T']['submitted_variant'] == '2-238268730-C-A' @@ -15246,8 +14475,7 @@ def test_variant277(self): assert 'NM_057167.3:c.5664+1G>T' in list(results.keys()) assert results['NM_057167.3:c.5664+1G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_057167.3:c.5664+1G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_057167.3:c.5664+1G>T']['alt_genomic_loci'] == [] - assert results['NM_057167.3:c.5664+1G>T']['transcript_description'] == 'Homo sapiens collagen type VI alpha 3 chain (COL6A3), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_057167.3:c.5664+1G>T']['alt_genomic_loci'], []) assert results['NM_057167.3:c.5664+1G>T']['gene_symbol'] == 'COL6A3' assert results['NM_057167.3:c.5664+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_476508.2:p.?', 'slr': 'NP_476508.2:p.?'} assert results['NM_057167.3:c.5664+1G>T']['submitted_variant'] == '2-238268730-C-A' @@ -15270,8 +14498,7 @@ def test_variant278(self): assert 'NM_080860.2:c.727+5G>A' in list(results.keys()) assert results['NM_080860.2:c.727+5G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080860.2:c.727+5G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_080860.2:c.727+5G>A']['alt_genomic_loci'] == [] - assert results['NM_080860.2:c.727+5G>A']['transcript_description'] == 'Homo sapiens radial spoke head 1 homolog (Chlamydomonas) (RSPH1), mRNA' + self.assertCountEqual(results['NM_080860.2:c.727+5G>A']['alt_genomic_loci'], []) assert results['NM_080860.2:c.727+5G>A']['gene_symbol'] == 'RSPH1' assert results['NM_080860.2:c.727+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543136.1:p.?', 'slr': 'NP_543136.1:p.?'} assert results['NM_080860.2:c.727+5G>A']['submitted_variant'] == '21-43897396-C-T' @@ -15289,8 +14516,7 @@ def test_variant278(self): assert 'NM_080860.3:c.727+5G>A' in list(results.keys()) assert results['NM_080860.3:c.727+5G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080860.3:c.727+5G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_080860.3:c.727+5G>A']['alt_genomic_loci'] == [] - assert results['NM_080860.3:c.727+5G>A']['transcript_description'] == 'Homo sapiens radial spoke head component 1 (RSPH1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_080860.3:c.727+5G>A']['alt_genomic_loci'], []) assert results['NM_080860.3:c.727+5G>A']['gene_symbol'] == 'RSPH1' assert results['NM_080860.3:c.727+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543136.1:p.?', 'slr': 'NP_543136.1:p.?'} assert results['NM_080860.3:c.727+5G>A']['submitted_variant'] == '21-43897396-C-T' @@ -15307,8 +14533,7 @@ def test_variant278(self): assert 'NM_001286506.1:c.613+5G>A' in list(results.keys()) assert results['NM_001286506.1:c.613+5G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001286506.1:c.613+5G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001286506.1:c.613+5G>A']['alt_genomic_loci'] == [] - assert results['NM_001286506.1:c.613+5G>A']['transcript_description'] == 'Homo sapiens radial spoke head component 1 (RSPH1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001286506.1:c.613+5G>A']['alt_genomic_loci'], []) assert results['NM_001286506.1:c.613+5G>A']['gene_symbol'] == 'RSPH1' assert results['NM_001286506.1:c.613+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001273435.1:p.?', 'slr': 'NP_001273435.1:p.?'} assert results['NM_001286506.1:c.613+5G>A']['submitted_variant'] == '21-43897396-C-T' @@ -15331,8 +14556,7 @@ def test_variant279(self): assert 'NM_000268.3:c.924_925insCGACGC' in list(results.keys()) assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == 'LRG_511t1:c.924_925insCGACGC' assert results['NM_000268.3:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000268.3:c.924_925insCGACGC']['alt_genomic_loci'] == [] - assert results['NM_000268.3:c.924_925insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000268.3:c.924_925insCGACGC']['alt_genomic_loci'], []) assert results['NM_000268.3:c.924_925insCGACGC']['gene_symbol'] == 'NF2' assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000259.1(LRG_511p1):p.(Arg310_Arg311dup)', 'slr': 'NP_000259.1:p.(R310_R311dup)'} assert results['NM_000268.3:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' @@ -15349,8 +14573,7 @@ def test_variant279(self): assert 'NM_181828.2:c.798_799insCGACGC' in list(results.keys()) assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181828.2:c.798_799insCGACGC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_181828.2:c.798_799insCGACGC']['alt_genomic_loci'] == [] - assert results['NM_181828.2:c.798_799insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_181828.2:c.798_799insCGACGC']['alt_genomic_loci'], []) assert results['NM_181828.2:c.798_799insCGACGC']['gene_symbol'] == 'NF2' assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861966.1:p.(Arg268_Arg269dup)', 'slr': 'NP_861966.1:p.(R268_R269dup)'} assert results['NM_181828.2:c.798_799insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' @@ -15367,8 +14590,7 @@ def test_variant279(self): assert 'NM_181830.2:c.675_676insCGACGC' in list(results.keys()) assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181830.2:c.675_676insCGACGC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_181830.2:c.675_676insCGACGC']['alt_genomic_loci'] == [] - assert results['NM_181830.2:c.675_676insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_181830.2:c.675_676insCGACGC']['alt_genomic_loci'], []) assert results['NM_181830.2:c.675_676insCGACGC']['gene_symbol'] == 'NF2' assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861968.1:p.(Arg227_Arg228dup)', 'slr': 'NP_861968.1:p.(R227_R228dup)'} assert results['NM_181830.2:c.675_676insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' @@ -15385,8 +14607,7 @@ def test_variant279(self): assert 'NM_181825.2:c.924_925insCGACGC' in list(results.keys()) assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181825.2:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_181825.2:c.924_925insCGACGC']['alt_genomic_loci'] == [] - assert results['NM_181825.2:c.924_925insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 12, mRNA' + self.assertCountEqual(results['NM_181825.2:c.924_925insCGACGC']['alt_genomic_loci'], []) assert results['NM_181825.2:c.924_925insCGACGC']['gene_symbol'] == 'NF2' assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861546.1:p.(Arg310_Arg311dup)', 'slr': 'NP_861546.1:p.(R310_R311dup)'} assert results['NM_181825.2:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' @@ -15403,8 +14624,7 @@ def test_variant279(self): assert 'NM_181832.2:c.924_925insCGACGC' in list(results.keys()) assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181832.2:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_181832.2:c.924_925insCGACGC']['alt_genomic_loci'] == [] - assert results['NM_181832.2:c.924_925insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_181832.2:c.924_925insCGACGC']['alt_genomic_loci'], []) assert results['NM_181832.2:c.924_925insCGACGC']['gene_symbol'] == 'NF2' assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861970.1:p.(Arg310_Arg311dup)', 'slr': 'NP_861970.1:p.(R310_R311dup)'} assert results['NM_181832.2:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' @@ -15421,8 +14641,7 @@ def test_variant279(self): assert 'NM_181833.2:c.447+26086_447+26087insCGACGC' in list(results.keys()) assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['alt_genomic_loci'] == [] - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 9, mRNA' + self.assertCountEqual(results['NM_181833.2:c.447+26086_447+26087insCGACGC']['alt_genomic_loci'], []) assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['gene_symbol'] == 'NF2' assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861971.1:p.?', 'slr': 'NP_861971.1:p.?'} assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' @@ -15439,8 +14658,7 @@ def test_variant279(self): assert 'NM_016418.5:c.924_925insCGACGC' in list(results.keys()) assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == 'LRG_511t2:c.924_925insCGACGC' assert results['NM_016418.5:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_016418.5:c.924_925insCGACGC']['alt_genomic_loci'] == [] - assert results['NM_016418.5:c.924_925insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_016418.5:c.924_925insCGACGC']['alt_genomic_loci'], []) assert results['NM_016418.5:c.924_925insCGACGC']['gene_symbol'] == 'NF2' assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057502.2(LRG_511p2):p.(Arg310_Arg311dup)', 'slr': 'NP_057502.2:p.(R310_R311dup)'} assert results['NM_016418.5:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' @@ -15457,8 +14675,7 @@ def test_variant279(self): assert 'NM_181829.2:c.801_802insCGACGC' in list(results.keys()) assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181829.2:c.801_802insCGACGC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_181829.2:c.801_802insCGACGC']['alt_genomic_loci'] == [] - assert results['NM_181829.2:c.801_802insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_181829.2:c.801_802insCGACGC']['alt_genomic_loci'], []) assert results['NM_181829.2:c.801_802insCGACGC']['gene_symbol'] == 'NF2' assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861967.1:p.(Arg269_Arg270dup)', 'slr': 'NP_861967.1:p.(R269_R270dup)'} assert results['NM_181829.2:c.801_802insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' @@ -15476,8 +14693,7 @@ def test_variant279(self): assert 'NR_156186.1:n.1483_1484insCGACGC' in list(results.keys()) assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NR_156186.1:n.1483_1484insCGACGC']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_156186.1:n.1483_1484insCGACGC']['alt_genomic_loci'] == [] - assert results['NR_156186.1:n.1483_1484insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 14, non-coding RNA' + self.assertCountEqual(results['NR_156186.1:n.1483_1484insCGACGC']['alt_genomic_loci'], []) assert results['NR_156186.1:n.1483_1484insCGACGC']['gene_symbol'] == 'NF2' assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_156186.1:n.1483_1484insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' @@ -15494,8 +14710,7 @@ def test_variant279(self): assert 'NM_181831.2:c.675_676insCGACGC' in list(results.keys()) assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_181831.2:c.675_676insCGACGC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_181831.2:c.675_676insCGACGC']['alt_genomic_loci'] == [] - assert results['NM_181831.2:c.675_676insCGACGC']['transcript_description'] == 'Homo sapiens neurofibromin 2 (NF2), transcript variant 13, mRNA' + self.assertCountEqual(results['NM_181831.2:c.675_676insCGACGC']['alt_genomic_loci'], []) assert results['NM_181831.2:c.675_676insCGACGC']['gene_symbol'] == 'NF2' assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861969.1:p.(Arg227_Arg228dup)', 'slr': 'NP_861969.1:p.(R227_R228dup)'} assert results['NM_181831.2:c.675_676insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' @@ -15518,8 +14733,7 @@ def test_variant280(self): assert 'NM_198156.2:c.341-3280_341-3271del' in list(results.keys()) assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198156.2:c.341-3280_341-3271del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_198156.2:c.341-3280_341-3271del']['alt_genomic_loci'] == [] - assert results['NM_198156.2:c.341-3280_341-3271del']['transcript_description'] == 'Homo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_198156.2:c.341-3280_341-3271del']['alt_genomic_loci'], []) assert results['NM_198156.2:c.341-3280_341-3271del']['gene_symbol'] == 'VHL' assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_937799.1:p.?', 'slr': 'NP_937799.1:p.?'} assert results['NM_198156.2:c.341-3280_341-3271del']['submitted_variant'] == '3-10188187-TGTCCCGATAG-T' @@ -15537,8 +14751,7 @@ def test_variant280(self): assert 'NM_001354723.1:c.*18-3280_*18-3271del' in list(results.keys()) assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354723.1:c.*18-3280_*18-3271del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['alt_genomic_loci'] == [] - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['transcript_description'] == 'Homo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001354723.1:c.*18-3280_*18-3271del']['alt_genomic_loci'], []) assert results['NM_001354723.1:c.*18-3280_*18-3271del']['gene_symbol'] == 'VHL' assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341652.1:p.?', 'slr': 'NP_001341652.1:p.?'} assert results['NM_001354723.1:c.*18-3280_*18-3271del']['submitted_variant'] == '3-10188187-TGTCCCGATAG-T' @@ -15555,8 +14768,7 @@ def test_variant280(self): assert 'NM_000551.3:c.341-7_343del' in list(results.keys()) assert results['NM_000551.3:c.341-7_343del']['hgvs_lrg_transcript_variant'] == 'LRG_322t1:c.341-7_343del' assert results['NM_000551.3:c.341-7_343del']['refseqgene_context_intronic_sequence'] == 'NG_008212.3(NM_000551.3):c.341-7_343del' - assert results['NM_000551.3:c.341-7_343del']['alt_genomic_loci'] == [] - assert results['NM_000551.3:c.341-7_343del']['transcript_description'] == 'Homo sapiens von Hippel-Lindau tumor suppressor (VHL), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000551.3:c.341-7_343del']['alt_genomic_loci'], []) assert results['NM_000551.3:c.341-7_343del']['gene_symbol'] == 'VHL' assert results['NM_000551.3:c.341-7_343del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000542.1(LRG_322p1):p.?', 'slr': 'NP_000542.1:p.?'} assert results['NM_000551.3:c.341-7_343del']['submitted_variant'] == '3-10188187-TGTCCCGATAG-T' @@ -15579,8 +14791,7 @@ def test_variant281(self): assert 'NM_001005505.2:c.3408A>C' in list(results.keys()) assert results['NM_001005505.2:c.3408A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001005505.2:c.3408A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001005505.2:c.3408A>C']['alt_genomic_loci'] == [] - assert results['NM_001005505.2:c.3408A>C']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001005505.2:c.3408A>C']['alt_genomic_loci'], []) assert results['NM_001005505.2:c.3408A>C']['gene_symbol'] == 'CACNA2D2' assert results['NM_001005505.2:c.3408A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Gln1136His)', 'slr': 'NP_001005505.1:p.(Q1136H)'} assert results['NM_001005505.2:c.3408A>C']['submitted_variant'] == '3-50402127-T-G' @@ -15597,8 +14808,7 @@ def test_variant281(self): assert 'NM_006030.2:c.3402A>C' in list(results.keys()) assert results['NM_006030.2:c.3402A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006030.2:c.3402A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_006030.2:c.3402A>C']['alt_genomic_loci'] == [] - assert results['NM_006030.2:c.3402A>C']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_006030.2:c.3402A>C']['alt_genomic_loci'], []) assert results['NM_006030.2:c.3402A>C']['gene_symbol'] == 'CACNA2D2' assert results['NM_006030.2:c.3402A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Gln1134His)', 'slr': 'NP_006021.2:p.(Q1134H)'} assert results['NM_006030.2:c.3402A>C']['submitted_variant'] == '3-50402127-T-G' @@ -15615,8 +14825,7 @@ def test_variant281(self): assert 'NM_001174051.1:c.3423A>C' in list(results.keys()) assert results['NM_001174051.1:c.3423A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001174051.1:c.3423A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001174051.1:c.3423A>C']['alt_genomic_loci'] == [] - assert results['NM_001174051.1:c.3423A>C']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001174051.1:c.3423A>C']['alt_genomic_loci'], []) assert results['NM_001174051.1:c.3423A>C']['gene_symbol'] == 'CACNA2D2' assert results['NM_001174051.1:c.3423A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Gln1141His)', 'slr': 'NP_001167522.1:p.(Q1141H)'} assert results['NM_001174051.1:c.3423A>C']['submitted_variant'] == '3-50402127-T-G' @@ -15633,8 +14842,7 @@ def test_variant281(self): assert 'NM_001174051.2:c.3423A>C' in list(results.keys()) assert results['NM_001174051.2:c.3423A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001174051.2:c.3423A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001174051.2:c.3423A>C']['alt_genomic_loci'] == [] - assert results['NM_001174051.2:c.3423A>C']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001174051.2:c.3423A>C']['alt_genomic_loci'], []) assert results['NM_001174051.2:c.3423A>C']['gene_symbol'] == 'CACNA2D2' assert results['NM_001174051.2:c.3423A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Gln1141His)', 'slr': 'NP_001167522.1:p.(Q1141H)'} assert results['NM_001174051.2:c.3423A>C']['submitted_variant'] == '3-50402127-T-G' @@ -15651,8 +14859,7 @@ def test_variant281(self): assert 'NM_006030.3:c.3402A>C' in list(results.keys()) assert results['NM_006030.3:c.3402A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006030.3:c.3402A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_006030.3:c.3402A>C']['alt_genomic_loci'] == [] - assert results['NM_006030.3:c.3402A>C']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_006030.3:c.3402A>C']['alt_genomic_loci'], []) assert results['NM_006030.3:c.3402A>C']['gene_symbol'] == 'CACNA2D2' assert results['NM_006030.3:c.3402A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Gln1134His)', 'slr': 'NP_006021.2:p.(Q1134H)'} assert results['NM_006030.3:c.3402A>C']['submitted_variant'] == '3-50402127-T-G' @@ -15669,8 +14876,7 @@ def test_variant281(self): assert 'NM_001291101.1:c.3201A>C' in list(results.keys()) assert results['NM_001291101.1:c.3201A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291101.1:c.3201A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001291101.1:c.3201A>C']['alt_genomic_loci'] == [] - assert results['NM_001291101.1:c.3201A>C']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001291101.1:c.3201A>C']['alt_genomic_loci'], []) assert results['NM_001291101.1:c.3201A>C']['gene_symbol'] == 'CACNA2D2' assert results['NM_001291101.1:c.3201A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278030.1:p.(Gln1067His)', 'slr': 'NP_001278030.1:p.(Q1067H)'} assert results['NM_001291101.1:c.3201A>C']['submitted_variant'] == '3-50402127-T-G' @@ -15688,8 +14894,7 @@ def test_variant281(self): assert 'NR_111912.1:n.443-1601T>G' in list(results.keys()) assert results['NR_111912.1:n.443-1601T>G']['hgvs_lrg_transcript_variant'] == '' assert results['NR_111912.1:n.443-1601T>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_111912.1:n.443-1601T>G']['alt_genomic_loci'] == [] - assert results['NR_111912.1:n.443-1601T>G']['transcript_description'] == 'Homo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 3, non-coding RNA' + self.assertCountEqual(results['NR_111912.1:n.443-1601T>G']['alt_genomic_loci'], []) assert results['NR_111912.1:n.443-1601T>G']['gene_symbol'] == 'CYB561D2' assert results['NR_111912.1:n.443-1601T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_111912.1:n.443-1601T>G']['submitted_variant'] == '3-50402127-T-G' @@ -15706,8 +14911,7 @@ def test_variant281(self): assert 'NM_001005505.1:c.3408A>C' in list(results.keys()) assert results['NM_001005505.1:c.3408A>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001005505.1:c.3408A>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001005505.1:c.3408A>C']['alt_genomic_loci'] == [] - assert results['NM_001005505.1:c.3408A>C']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001005505.1:c.3408A>C']['alt_genomic_loci'], []) assert results['NM_001005505.1:c.3408A>C']['gene_symbol'] == 'CACNA2D2' assert results['NM_001005505.1:c.3408A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Gln1136His)', 'slr': 'NP_001005505.1:p.(Q1136H)'} assert results['NM_001005505.1:c.3408A>C']['submitted_variant'] == '3-50402127-T-G' @@ -15730,8 +14934,7 @@ def test_variant282(self): assert 'NR_111913.1:n.126G>A' in list(results.keys()) assert results['NR_111913.1:n.126G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_111913.1:n.126G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_111913.1:n.126G>A']['alt_genomic_loci'] == [] - assert results['NR_111913.1:n.126G>A']['transcript_description'] == 'Homo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 4, non-coding RNA' + self.assertCountEqual(results['NR_111913.1:n.126G>A']['alt_genomic_loci'], []) assert results['NR_111913.1:n.126G>A']['gene_symbol'] == 'CYB561D2' assert results['NR_111913.1:n.126G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_111913.1:n.126G>A']['submitted_variant'] == '3-50402890-G-A' @@ -15748,8 +14951,7 @@ def test_variant282(self): assert 'NR_111912.1:n.443-838G>A' in list(results.keys()) assert results['NR_111912.1:n.443-838G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_111912.1:n.443-838G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_111912.1:n.443-838G>A']['alt_genomic_loci'] == [] - assert results['NR_111912.1:n.443-838G>A']['transcript_description'] == 'Homo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 3, non-coding RNA' + self.assertCountEqual(results['NR_111912.1:n.443-838G>A']['alt_genomic_loci'], []) assert results['NR_111912.1:n.443-838G>A']['gene_symbol'] == 'CYB561D2' assert results['NR_111912.1:n.443-838G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_111912.1:n.443-838G>A']['submitted_variant'] == '3-50402890-G-A' @@ -15766,8 +14968,7 @@ def test_variant282(self): assert 'NM_001291101.1:c.2788C>T' in list(results.keys()) assert results['NM_001291101.1:c.2788C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001291101.1:c.2788C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001291101.1:c.2788C>T']['alt_genomic_loci'] == [] - assert results['NM_001291101.1:c.2788C>T']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001291101.1:c.2788C>T']['alt_genomic_loci'], []) assert results['NM_001291101.1:c.2788C>T']['gene_symbol'] == 'CACNA2D2' assert results['NM_001291101.1:c.2788C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278030.1:p.(Pro930Ser)', 'slr': 'NP_001278030.1:p.(P930S)'} assert results['NM_001291101.1:c.2788C>T']['submitted_variant'] == '3-50402890-G-A' @@ -15784,8 +14985,7 @@ def test_variant282(self): assert 'NM_006030.2:c.2995C>T' in list(results.keys()) assert results['NM_006030.2:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006030.2:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_006030.2:c.2995C>T']['alt_genomic_loci'] == [] - assert results['NM_006030.2:c.2995C>T']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_006030.2:c.2995C>T']['alt_genomic_loci'], []) assert results['NM_006030.2:c.2995C>T']['gene_symbol'] == 'CACNA2D2' assert results['NM_006030.2:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Pro999Ser)', 'slr': 'NP_006021.2:p.(P999S)'} assert results['NM_006030.2:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' @@ -15802,8 +15002,7 @@ def test_variant282(self): assert 'NR_111914.1:n.126G>A' in list(results.keys()) assert results['NR_111914.1:n.126G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_111914.1:n.126G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_111914.1:n.126G>A']['alt_genomic_loci'] == [] - assert results['NR_111914.1:n.126G>A']['transcript_description'] == 'Homo sapiens cytochrome b561 family member D2 (CYB561D2), transcript variant 5, non-coding RNA' + self.assertCountEqual(results['NR_111914.1:n.126G>A']['alt_genomic_loci'], []) assert results['NR_111914.1:n.126G>A']['gene_symbol'] == 'CYB561D2' assert results['NR_111914.1:n.126G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_111914.1:n.126G>A']['submitted_variant'] == '3-50402890-G-A' @@ -15820,8 +15019,7 @@ def test_variant282(self): assert 'NM_001005505.2:c.2995C>T' in list(results.keys()) assert results['NM_001005505.2:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001005505.2:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001005505.2:c.2995C>T']['alt_genomic_loci'] == [] - assert results['NM_001005505.2:c.2995C>T']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001005505.2:c.2995C>T']['alt_genomic_loci'], []) assert results['NM_001005505.2:c.2995C>T']['gene_symbol'] == 'CACNA2D2' assert results['NM_001005505.2:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Pro999Ser)', 'slr': 'NP_001005505.1:p.(P999S)'} assert results['NM_001005505.2:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' @@ -15839,8 +15037,7 @@ def test_variant282(self): assert 'NM_001174051.1:c.3016C>T' in list(results.keys()) assert results['NM_001174051.1:c.3016C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001174051.1:c.3016C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001174051.1:c.3016C>T']['alt_genomic_loci'] == [] - assert results['NM_001174051.1:c.3016C>T']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001174051.1:c.3016C>T']['alt_genomic_loci'], []) assert results['NM_001174051.1:c.3016C>T']['gene_symbol'] == 'CACNA2D2' assert results['NM_001174051.1:c.3016C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Pro1006Ser)', 'slr': 'NP_001167522.1:p.(P1006S)'} assert results['NM_001174051.1:c.3016C>T']['submitted_variant'] == '3-50402890-G-A' @@ -15857,8 +15054,7 @@ def test_variant282(self): assert 'NM_001174051.2:c.3016C>T' in list(results.keys()) assert results['NM_001174051.2:c.3016C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001174051.2:c.3016C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001174051.2:c.3016C>T']['alt_genomic_loci'] == [] - assert results['NM_001174051.2:c.3016C>T']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001174051.2:c.3016C>T']['alt_genomic_loci'], []) assert results['NM_001174051.2:c.3016C>T']['gene_symbol'] == 'CACNA2D2' assert results['NM_001174051.2:c.3016C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Pro1006Ser)', 'slr': 'NP_001167522.1:p.(P1006S)'} assert results['NM_001174051.2:c.3016C>T']['submitted_variant'] == '3-50402890-G-A' @@ -15875,8 +15071,7 @@ def test_variant282(self): assert 'NM_006030.3:c.2995C>T' in list(results.keys()) assert results['NM_006030.3:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_006030.3:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_006030.3:c.2995C>T']['alt_genomic_loci'] == [] - assert results['NM_006030.3:c.2995C>T']['transcript_description'] == 'Homo sapiens calcium voltage-gated channel auxiliary subunit alpha2delta 2 (CACNA2D2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_006030.3:c.2995C>T']['alt_genomic_loci'], []) assert results['NM_006030.3:c.2995C>T']['gene_symbol'] == 'CACNA2D2' assert results['NM_006030.3:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Pro999Ser)', 'slr': 'NP_006021.2:p.(P999S)'} assert results['NM_006030.3:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' @@ -15893,8 +15088,7 @@ def test_variant282(self): assert 'NM_001005505.1:c.2995C>T' in list(results.keys()) assert results['NM_001005505.1:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001005505.1:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001005505.1:c.2995C>T']['alt_genomic_loci'] == [] - assert results['NM_001005505.1:c.2995C>T']['transcript_description'] == 'Homo sapiens calcium channel, voltage-dependent, alpha 2/delta subunit 2 (CACNA2D2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001005505.1:c.2995C>T']['alt_genomic_loci'], []) assert results['NM_001005505.1:c.2995C>T']['gene_symbol'] == 'CACNA2D2' assert results['NM_001005505.1:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Pro999Ser)', 'slr': 'NP_001005505.1:p.(P999S)'} assert results['NM_001005505.1:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' @@ -15917,8 +15111,7 @@ def test_variant283(self): assert 'NM_007159.4:c.1135+565del' in list(results.keys()) assert results['NM_007159.4:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007159.4:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007159.4:c.1135+565del']['alt_genomic_loci'] == [] - assert results['NM_007159.4:c.1135+565del']['transcript_description'] == 'Homo sapiens sarcolemma associated protein (SLMAP), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_007159.4:c.1135+565del']['alt_genomic_loci'], []) assert results['NM_007159.4:c.1135+565del']['gene_symbol'] == 'SLMAP' assert results['NM_007159.4:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009090.2:p.?', 'slr': 'NP_009090.2:p.?'} assert results['NM_007159.4:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' @@ -15935,8 +15128,7 @@ def test_variant283(self): assert 'NM_001304420.2:c.1186+424del' in list(results.keys()) assert results['NM_001304420.2:c.1186+424del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001304420.2:c.1186+424del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001304420.2:c.1186+424del']['alt_genomic_loci'] == [] - assert results['NM_001304420.2:c.1186+424del']['transcript_description'] == 'Homo sapiens sarcolemma associated protein (SLMAP), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001304420.2:c.1186+424del']['alt_genomic_loci'], []) assert results['NM_001304420.2:c.1186+424del']['gene_symbol'] == 'SLMAP' assert results['NM_001304420.2:c.1186+424del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291349.1:p.?', 'slr': 'NP_001291349.1:p.?'} assert results['NM_001304420.2:c.1186+424del']['submitted_variant'] == '3-57851007-AG-A' @@ -15953,8 +15145,7 @@ def test_variant283(self): assert 'NM_001304421.2:c.1135+565del' in list(results.keys()) assert results['NM_001304421.2:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001304421.2:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001304421.2:c.1135+565del']['alt_genomic_loci'] == [] - assert results['NM_001304421.2:c.1135+565del']['transcript_description'] == 'Homo sapiens sarcolemma associated protein (SLMAP), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001304421.2:c.1135+565del']['alt_genomic_loci'], []) assert results['NM_001304421.2:c.1135+565del']['gene_symbol'] == 'SLMAP' assert results['NM_001304421.2:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291350.1:p.?', 'slr': 'NP_001291350.1:p.?'} assert results['NM_001304421.2:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' @@ -15972,8 +15163,7 @@ def test_variant283(self): assert 'NM_007159.2:c.1135+565del' in list(results.keys()) assert results['NM_007159.2:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_007159.2:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_007159.2:c.1135+565del']['alt_genomic_loci'] == [] - assert results['NM_007159.2:c.1135+565del']['transcript_description'] == 'Homo sapiens sarcolemma associated protein (SLMAP), mRNA' + self.assertCountEqual(results['NM_007159.2:c.1135+565del']['alt_genomic_loci'], []) assert results['NM_007159.2:c.1135+565del']['gene_symbol'] == 'SLMAP' assert results['NM_007159.2:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009090.2:p.?', 'slr': 'NP_009090.2:p.?'} assert results['NM_007159.2:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' @@ -15990,8 +15180,7 @@ def test_variant283(self): assert 'obsolete_record_3' in list(results.keys()) assert results['obsolete_record_3']['hgvs_lrg_transcript_variant'] == '' assert results['obsolete_record_3']['refseqgene_context_intronic_sequence'] == '' - assert results['obsolete_record_3']['alt_genomic_loci'] == [] - assert results['obsolete_record_3']['transcript_description'] == '' + self.assertCountEqual(results['obsolete_record_3']['alt_genomic_loci'], []) assert results['obsolete_record_3']['gene_symbol'] == '' assert results['obsolete_record_3']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['obsolete_record_3']['submitted_variant'] == '3-57851007-AG-A' @@ -16008,8 +15197,7 @@ def test_variant283(self): assert 'obsolete_record_2' in list(results.keys()) assert results['obsolete_record_2']['hgvs_lrg_transcript_variant'] == '' assert results['obsolete_record_2']['refseqgene_context_intronic_sequence'] == '' - assert results['obsolete_record_2']['alt_genomic_loci'] == [] - assert results['obsolete_record_2']['transcript_description'] == '' + self.assertCountEqual(results['obsolete_record_2']['alt_genomic_loci'], []) assert results['obsolete_record_2']['gene_symbol'] == '' assert results['obsolete_record_2']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['obsolete_record_2']['submitted_variant'] == '3-57851007-AG-A' @@ -16026,8 +15214,7 @@ def test_variant283(self): assert 'obsolete_record_1' in list(results.keys()) assert results['obsolete_record_1']['hgvs_lrg_transcript_variant'] == '' assert results['obsolete_record_1']['refseqgene_context_intronic_sequence'] == '' - assert results['obsolete_record_1']['alt_genomic_loci'] == [] - assert results['obsolete_record_1']['transcript_description'] == '' + self.assertCountEqual(results['obsolete_record_1']['alt_genomic_loci'], []) assert results['obsolete_record_1']['gene_symbol'] == '' assert results['obsolete_record_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['obsolete_record_1']['submitted_variant'] == '3-57851007-AG-A' @@ -16050,8 +15237,7 @@ def test_variant284(self): assert 'NM_001178065.1:c.3061C=' in list(results.keys()) assert results['NM_001178065.1:c.3061C=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001178065.1:c.3061C=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001178065.1:c.3061C=']['alt_genomic_loci'] == [] - assert results['NM_001178065.1:c.3061C=']['transcript_description'] == 'Homo sapiens calcium sensing receptor (CASR), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001178065.1:c.3061C=']['alt_genomic_loci'], []) assert results['NM_001178065.1:c.3061C=']['gene_symbol'] == 'CASR' assert results['NM_001178065.1:c.3061C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001171536.1:p.(Gln1021=)', 'slr': 'NP_001171536.1:p.(Q1021=)'} assert results['NM_001178065.1:c.3061C=']['submitted_variant'] == '3-122003832-G-C' @@ -16069,8 +15255,7 @@ def test_variant284(self): assert 'NM_000388.3:c.3031C=' in list(results.keys()) assert results['NM_000388.3:c.3031C=']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000388.3:c.3031C=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000388.3:c.3031C=']['alt_genomic_loci'] == [] - assert results['NM_000388.3:c.3031C=']['transcript_description'] == 'Homo sapiens calcium sensing receptor (CASR), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000388.3:c.3031C=']['alt_genomic_loci'], []) assert results['NM_000388.3:c.3031C=']['gene_symbol'] == 'CASR' assert results['NM_000388.3:c.3031C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000379.2:p.(Gln1011=)', 'slr': 'NP_000379.2:p.(Q1011=)'} assert results['NM_000388.3:c.3031C=']['submitted_variant'] == '3-122003832-G-C' @@ -16093,8 +15278,7 @@ def test_variant285(self): assert 'NM_001349798.1:c.45_46insCCT' in list(results.keys()) assert results['NM_001349798.1:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001349798.1:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001349798.1:c.45_46insCCT']['alt_genomic_loci'] == [] - assert results['NM_001349798.1:c.45_46insCCT']['transcript_description'] == 'Homo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001349798.1:c.45_46insCCT']['alt_genomic_loci'], []) assert results['NM_001349798.1:c.45_46insCCT']['gene_symbol'] == 'FBXW7' assert results['NM_001349798.1:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_361014.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_361014.1:p.(T15_G16insP)'} assert results['NM_001349798.1:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' @@ -16111,8 +15295,7 @@ def test_variant285(self): assert 'NM_033632.3:c.45_46insCCT' in list(results.keys()) assert results['NM_033632.3:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_033632.3:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_033632.3:c.45_46insCCT']['alt_genomic_loci'] == [] - assert results['NM_033632.3:c.45_46insCCT']['transcript_description'] == 'Homo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_033632.3:c.45_46insCCT']['alt_genomic_loci'], []) assert results['NM_033632.3:c.45_46insCCT']['gene_symbol'] == 'FBXW7' assert results['NM_033632.3:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_361014.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_361014.1:p.(T15_G16insP)'} assert results['NM_033632.3:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' @@ -16129,8 +15312,7 @@ def test_variant285(self): assert 'NM_001257069.1:c.45_46insCCT' in list(results.keys()) assert results['NM_001257069.1:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001257069.1:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001257069.1:c.45_46insCCT']['alt_genomic_loci'] == [] - assert results['NM_001257069.1:c.45_46insCCT']['transcript_description'] == 'Homo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001257069.1:c.45_46insCCT']['alt_genomic_loci'], []) assert results['NM_001257069.1:c.45_46insCCT']['gene_symbol'] == 'FBXW7' assert results['NM_001257069.1:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243998.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_001243998.1:p.(T15_G16insP)'} assert results['NM_001257069.1:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' @@ -16148,8 +15330,7 @@ def test_variant285(self): assert 'NM_001349798.2:c.45_46insCCT' in list(results.keys()) assert results['NM_001349798.2:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == 'LRG_1141t1:c.45_46insCCT' assert results['NM_001349798.2:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001349798.2:c.45_46insCCT']['alt_genomic_loci'] == [] - assert results['NM_001349798.2:c.45_46insCCT']['transcript_description'] == 'Homo sapiens F-box and WD repeat domain containing 7 (FBXW7), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001349798.2:c.45_46insCCT']['alt_genomic_loci'], []) assert results['NM_001349798.2:c.45_46insCCT']['gene_symbol'] == 'FBXW7' assert results['NM_001349798.2:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001336727.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_001336727.1:p.(T15_G16insP)'} assert results['NM_001349798.2:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' @@ -16173,8 +15354,7 @@ def test_variant286(self): assert 'Intergenic_Variant_1' in list(results.keys()) assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' - assert results['Intergenic_Variant_1']['alt_genomic_loci'] == [] - assert results['Intergenic_Variant_1']['transcript_description'] == '' + self.assertCountEqual(results['Intergenic_Variant_1']['alt_genomic_loci'], []) assert results['Intergenic_Variant_1']['gene_symbol'] == '' assert results['Intergenic_Variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['Intergenic_Variant_1']['submitted_variant'] == '5-1295183-G-A' @@ -16197,8 +15377,7 @@ def test_variant287(self): assert 'NM_003664.4:c.2409_2411del' in list(results.keys()) assert results['NM_003664.4:c.2409_2411del']['hgvs_lrg_transcript_variant'] == 'LRG_170t1:c.2409_2411del' assert results['NM_003664.4:c.2409_2411del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003664.4:c.2409_2411del']['alt_genomic_loci'] == [] - assert results['NM_003664.4:c.2409_2411del']['transcript_description'] == 'Homo sapiens adaptor related protein complex 3 subunit beta 1 (AP3B1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003664.4:c.2409_2411del']['alt_genomic_loci'], []) assert results['NM_003664.4:c.2409_2411del']['gene_symbol'] == 'AP3B1' assert results['NM_003664.4:c.2409_2411del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003655.3(LRG_170p1):p.(Lys804del)', 'slr': 'NP_003655.3:p.(K804del)'} assert results['NM_003664.4:c.2409_2411del']['submitted_variant'] == '5-77396835-TTTC-T' @@ -16216,8 +15395,7 @@ def test_variant287(self): assert 'NM_003664.3:c.2409_2411del' in list(results.keys()) assert results['NM_003664.3:c.2409_2411del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003664.3:c.2409_2411del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003664.3:c.2409_2411del']['alt_genomic_loci'] == [] - assert results['NM_003664.3:c.2409_2411del']['transcript_description'] == 'Homo sapiens adaptor-related protein complex 3, beta 1 subunit (AP3B1), mRNA' + self.assertCountEqual(results['NM_003664.3:c.2409_2411del']['alt_genomic_loci'], []) assert results['NM_003664.3:c.2409_2411del']['gene_symbol'] == 'AP3B1' assert results['NM_003664.3:c.2409_2411del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003655.3(LRG_170p1):p.(Lys804del)', 'slr': 'NP_003655.3:p.(K804del)'} assert results['NM_003664.3:c.2409_2411del']['submitted_variant'] == '5-77396835-TTTC-T' @@ -16234,8 +15412,7 @@ def test_variant287(self): assert 'NM_001271769.1:c.2262_2264del' in list(results.keys()) assert results['NM_001271769.1:c.2262_2264del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001271769.1:c.2262_2264del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001271769.1:c.2262_2264del']['alt_genomic_loci'] == [] - assert results['NM_001271769.1:c.2262_2264del']['transcript_description'] == 'Homo sapiens adaptor related protein complex 3 subunit beta 1 (AP3B1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001271769.1:c.2262_2264del']['alt_genomic_loci'], []) assert results['NM_001271769.1:c.2262_2264del']['gene_symbol'] == 'AP3B1' assert results['NM_001271769.1:c.2262_2264del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001258698.1:p.(Lys755del)', 'slr': 'NP_001258698.1:p.(K755del)'} assert results['NM_001271769.1:c.2262_2264del']['submitted_variant'] == '5-77396835-TTTC-T' @@ -16258,8 +15435,7 @@ def test_variant288(self): assert 'NM_000414.3:c.302+3_302+6del' in list(results.keys()) assert results['NM_000414.3:c.302+3_302+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000414.3:c.302+3_302+6del']['refseqgene_context_intronic_sequence'] == 'NG_008182.1(NM_000414.3):c.302+3_302+6del' - assert results['NM_000414.3:c.302+3_302+6del']['alt_genomic_loci'] == [] - assert results['NM_000414.3:c.302+3_302+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000414.3:c.302+3_302+6del']['alt_genomic_loci'], []) assert results['NM_000414.3:c.302+3_302+6del']['gene_symbol'] == 'HSD17B4' assert results['NM_000414.3:c.302+3_302+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000405.1:p.?', 'slr': 'NP_000405.1:p.?'} assert results['NM_000414.3:c.302+3_302+6del']['submitted_variant'] == '5-118811422-GGTGA-G' @@ -16276,8 +15452,7 @@ def test_variant288(self): assert 'NM_001292028.1:c.-110+3_-110+6del' in list(results.keys()) assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001292028.1:c.-110+3_-110+6del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001292028.1:c.-110+3_-110+6del']['alt_genomic_loci'] == [] - assert results['NM_001292028.1:c.-110+3_-110+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001292028.1:c.-110+3_-110+6del']['alt_genomic_loci'], []) assert results['NM_001292028.1:c.-110+3_-110+6del']['gene_symbol'] == 'HSD17B4' assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278957.1:p.?', 'slr': 'NP_001278957.1:p.?'} assert results['NM_001292028.1:c.-110+3_-110+6del']['submitted_variant'] == '5-118811422-GGTGA-G' @@ -16294,8 +15469,7 @@ def test_variant288(self): assert 'NM_001199291.2:c.377+3_377+6del' in list(results.keys()) assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199291.2:c.377+3_377+6del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001199291.2:c.377+3_377+6del']['alt_genomic_loci'] == [] - assert results['NM_001199291.2:c.377+3_377+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001199291.2:c.377+3_377+6del']['alt_genomic_loci'], []) assert results['NM_001199291.2:c.377+3_377+6del']['gene_symbol'] == 'HSD17B4' assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} assert results['NM_001199291.2:c.377+3_377+6del']['submitted_variant'] == '5-118811422-GGTGA-G' @@ -16313,8 +15487,7 @@ def test_variant288(self): assert 'NM_001292027.1:c.230+3_230+6del' in list(results.keys()) assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001292027.1:c.230+3_230+6del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001292027.1:c.230+3_230+6del']['alt_genomic_loci'] == [] - assert results['NM_001292027.1:c.230+3_230+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001292027.1:c.230+3_230+6del']['alt_genomic_loci'], []) assert results['NM_001292027.1:c.230+3_230+6del']['gene_symbol'] == 'HSD17B4' assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278956.1:p.?', 'slr': 'NP_001278956.1:p.?'} assert results['NM_001292027.1:c.230+3_230+6del']['submitted_variant'] == '5-118811422-GGTGA-G' @@ -16331,8 +15504,7 @@ def test_variant288(self): assert 'NM_001199291.1:c.377+3_377+6del' in list(results.keys()) assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199291.1:c.377+3_377+6del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001199291.1:c.377+3_377+6del']['alt_genomic_loci'] == [] - assert results['NM_001199291.1:c.377+3_377+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid (17-beta) dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001199291.1:c.377+3_377+6del']['alt_genomic_loci'], []) assert results['NM_001199291.1:c.377+3_377+6del']['gene_symbol'] == 'HSD17B4' assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} assert results['NM_001199291.1:c.377+3_377+6del']['submitted_variant'] == '5-118811422-GGTGA-G' @@ -16349,8 +15521,7 @@ def test_variant288(self): assert 'NM_001199292.1:c.248+3_248+6del' in list(results.keys()) assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199292.1:c.248+3_248+6del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001199292.1:c.248+3_248+6del']['alt_genomic_loci'] == [] - assert results['NM_001199292.1:c.248+3_248+6del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001199292.1:c.248+3_248+6del']['alt_genomic_loci'], []) assert results['NM_001199292.1:c.248+3_248+6del']['gene_symbol'] == 'HSD17B4' assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186221.1:p.?', 'slr': 'NP_001186221.1:p.?'} assert results['NM_001199292.1:c.248+3_248+6del']['submitted_variant'] == '5-118811422-GGTGA-G' @@ -16373,8 +15544,7 @@ def test_variant289(self): assert 'NM_001292028.1:c.-110+1_-110+5del' in list(results.keys()) assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001292028.1:c.-110+1_-110+5del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001292028.1:c.-110+1_-110+5del']['alt_genomic_loci'] == [] - assert results['NM_001292028.1:c.-110+1_-110+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001292028.1:c.-110+1_-110+5del']['alt_genomic_loci'], []) assert results['NM_001292028.1:c.-110+1_-110+5del']['gene_symbol'] == 'HSD17B4' assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278957.1:p.?', 'slr': 'NP_001278957.1:p.?'} assert results['NM_001292028.1:c.-110+1_-110+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' @@ -16391,8 +15561,7 @@ def test_variant289(self): assert 'NM_000414.3:c.302+1_302+5del' in list(results.keys()) assert results['NM_000414.3:c.302+1_302+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000414.3:c.302+1_302+5del']['refseqgene_context_intronic_sequence'] == 'NG_008182.1(NM_000414.3):c.302+1_302+5del' - assert results['NM_000414.3:c.302+1_302+5del']['alt_genomic_loci'] == [] - assert results['NM_000414.3:c.302+1_302+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000414.3:c.302+1_302+5del']['alt_genomic_loci'], []) assert results['NM_000414.3:c.302+1_302+5del']['gene_symbol'] == 'HSD17B4' assert results['NM_000414.3:c.302+1_302+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000405.1:p.?', 'slr': 'NP_000405.1:p.?'} assert results['NM_000414.3:c.302+1_302+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' @@ -16409,8 +15578,7 @@ def test_variant289(self): assert 'NM_001199291.2:c.377+1_377+5del' in list(results.keys()) assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199291.2:c.377+1_377+5del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001199291.2:c.377+1_377+5del']['alt_genomic_loci'] == [] - assert results['NM_001199291.2:c.377+1_377+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001199291.2:c.377+1_377+5del']['alt_genomic_loci'], []) assert results['NM_001199291.2:c.377+1_377+5del']['gene_symbol'] == 'HSD17B4' assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} assert results['NM_001199291.2:c.377+1_377+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' @@ -16427,8 +15595,7 @@ def test_variant289(self): assert 'NM_001199292.1:c.248+1_248+5del' in list(results.keys()) assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199292.1:c.248+1_248+5del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001199292.1:c.248+1_248+5del']['alt_genomic_loci'] == [] - assert results['NM_001199292.1:c.248+1_248+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001199292.1:c.248+1_248+5del']['alt_genomic_loci'], []) assert results['NM_001199292.1:c.248+1_248+5del']['gene_symbol'] == 'HSD17B4' assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186221.1:p.?', 'slr': 'NP_001186221.1:p.?'} assert results['NM_001199292.1:c.248+1_248+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' @@ -16446,8 +15613,7 @@ def test_variant289(self): assert 'NM_001199291.1:c.377+1_377+5del' in list(results.keys()) assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001199291.1:c.377+1_377+5del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001199291.1:c.377+1_377+5del']['alt_genomic_loci'] == [] - assert results['NM_001199291.1:c.377+1_377+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid (17-beta) dehydrogenase 4 (HSD17B4), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001199291.1:c.377+1_377+5del']['alt_genomic_loci'], []) assert results['NM_001199291.1:c.377+1_377+5del']['gene_symbol'] == 'HSD17B4' assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} assert results['NM_001199291.1:c.377+1_377+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' @@ -16464,8 +15630,7 @@ def test_variant289(self): assert 'NM_001292027.1:c.230+1_230+5del' in list(results.keys()) assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001292027.1:c.230+1_230+5del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001292027.1:c.230+1_230+5del']['alt_genomic_loci'] == [] - assert results['NM_001292027.1:c.230+1_230+5del']['transcript_description'] == 'Homo sapiens hydroxysteroid 17-beta dehydrogenase 4 (HSD17B4), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001292027.1:c.230+1_230+5del']['alt_genomic_loci'], []) assert results['NM_001292027.1:c.230+1_230+5del']['gene_symbol'] == 'HSD17B4' assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278956.1:p.?', 'slr': 'NP_001278956.1:p.?'} assert results['NM_001292027.1:c.230+1_230+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' @@ -16489,8 +15654,7 @@ def test_variant290(self): assert 'NR_110997.1:n.21del' in list(results.keys()) assert results['NR_110997.1:n.21del']['hgvs_lrg_transcript_variant'] == '' assert results['NR_110997.1:n.21del']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_110997.1:n.21del']['alt_genomic_loci'] == [] - assert results['NR_110997.1:n.21del']['transcript_description'] == 'Homo sapiens MIR3936 host gene (MIR3936HG), long non-coding RNA' + self.assertCountEqual(results['NR_110997.1:n.21del']['alt_genomic_loci'], []) assert results['NR_110997.1:n.21del']['gene_symbol'] == 'MIR3936HG' assert results['NR_110997.1:n.21del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_110997.1:n.21del']['submitted_variant'] == '5-131705587-CG-C' @@ -16507,8 +15671,7 @@ def test_variant290(self): assert 'NM_003060.3:c.-75del' in list(results.keys()) assert results['NM_003060.3:c.-75del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003060.3:c.-75del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003060.3:c.-75del']['alt_genomic_loci'] == [] - assert results['NM_003060.3:c.-75del']['transcript_description'] == 'Homo sapiens solute carrier family 22 member 5 (SLC22A5), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_003060.3:c.-75del']['alt_genomic_loci'], []) assert results['NM_003060.3:c.-75del']['gene_symbol'] == 'SLC22A5' assert results['NM_003060.3:c.-75del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003051.1:p.?', 'slr': 'NP_003051.1:p.?'} assert results['NM_003060.3:c.-75del']['submitted_variant'] == '5-131705587-CG-C' @@ -16525,8 +15688,7 @@ def test_variant290(self): assert 'NM_001308122.1:c.-75del' in list(results.keys()) assert results['NM_001308122.1:c.-75del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001308122.1:c.-75del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001308122.1:c.-75del']['alt_genomic_loci'] == [] - assert results['NM_001308122.1:c.-75del']['transcript_description'] == 'Homo sapiens solute carrier family 22 member 5 (SLC22A5), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001308122.1:c.-75del']['alt_genomic_loci'], []) assert results['NM_001308122.1:c.-75del']['gene_symbol'] == 'SLC22A5' assert results['NM_001308122.1:c.-75del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295051.1:p.?', 'slr': 'NP_001295051.1:p.?'} assert results['NM_001308122.1:c.-75del']['submitted_variant'] == '5-131705587-CG-C' @@ -16550,8 +15712,7 @@ def test_variant291(self): assert 'NM_024577.3:c.2813A>G' in list(results.keys()) assert results['NM_024577.3:c.2813A>G']['hgvs_lrg_transcript_variant'] == 'LRG_269t1:c.2813A>G' assert results['NM_024577.3:c.2813A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_024577.3:c.2813A>G']['alt_genomic_loci'] == [] - assert results['NM_024577.3:c.2813A>G']['transcript_description'] == 'Homo sapiens SH3 domain and tetratricopeptide repeats 2 (SH3TC2), mRNA' + self.assertCountEqual(results['NM_024577.3:c.2813A>G']['alt_genomic_loci'], []) assert results['NM_024577.3:c.2813A>G']['gene_symbol'] == 'SH3TC2' assert results['NM_024577.3:c.2813A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_078853.2(LRG_269p1):p.(His938Arg)', 'slr': 'NP_078853.2:p.(H938R)'} assert results['NM_024577.3:c.2813A>G']['submitted_variant'] == '5-148406482-T-C' @@ -16574,8 +15735,7 @@ def test_variant292(self): assert 'NM_014845.5:c.123_124insCAG' in list(results.keys()) assert results['NM_014845.5:c.123_124insCAG']['hgvs_lrg_transcript_variant'] == 'LRG_241t1:c.123_124insCAG' assert results['NM_014845.5:c.123_124insCAG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014845.5:c.123_124insCAG']['alt_genomic_loci'] == [] - assert results['NM_014845.5:c.123_124insCAG']['transcript_description'] == 'Homo sapiens FIG4 phosphoinositide 5-phosphatase (FIG4), mRNA' + self.assertCountEqual(results['NM_014845.5:c.123_124insCAG']['alt_genomic_loci'], []) assert results['NM_014845.5:c.123_124insCAG']['gene_symbol'] == 'FIG4' assert results['NM_014845.5:c.123_124insCAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055660.1(LRG_241p1):p.(Ile41_Asp42insGln)', 'slr': 'NP_055660.1:p.(I41_D42insQ)'} assert results['NM_014845.5:c.123_124insCAG']['submitted_variant'] == '6-110036337-T-TCAG' @@ -16599,8 +15759,7 @@ def test_variant293(self): assert 'NM_014845.5:c.124_126del' in list(results.keys()) assert results['NM_014845.5:c.124_126del']['hgvs_lrg_transcript_variant'] == 'LRG_241t1:c.124_126del' assert results['NM_014845.5:c.124_126del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014845.5:c.124_126del']['alt_genomic_loci'] == [] - assert results['NM_014845.5:c.124_126del']['transcript_description'] == 'Homo sapiens FIG4 phosphoinositide 5-phosphatase (FIG4), mRNA' + self.assertCountEqual(results['NM_014845.5:c.124_126del']['alt_genomic_loci'], []) assert results['NM_014845.5:c.124_126del']['gene_symbol'] == 'FIG4' assert results['NM_014845.5:c.124_126del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055660.1(LRG_241p1):p.(Asp42del)', 'slr': 'NP_055660.1:p.(D42del)'} assert results['NM_014845.5:c.124_126del']['submitted_variant'] == '6-110036337-TGAT-T' @@ -16625,8 +15784,7 @@ def test_variant294(self): assert 'NM_182961.3:c.14018G>T' in list(results.keys()) assert results['NM_182961.3:c.14018G>T']['hgvs_lrg_transcript_variant'] == 'LRG_427t1:c.14018G>T' assert results['NM_182961.3:c.14018G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_182961.3:c.14018G>T']['alt_genomic_loci'] == [] - assert results['NM_182961.3:c.14018G>T']['transcript_description'] == 'Homo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_182961.3:c.14018G>T']['alt_genomic_loci'], []) assert results['NM_182961.3:c.14018G>T']['gene_symbol'] == 'SYNE1' assert results['NM_182961.3:c.14018G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_892006.3(LRG_427p1):p.(Arg4673Leu)', 'slr': 'NP_892006.3:p.(R4673L)'} assert results['NM_182961.3:c.14018G>T']['submitted_variant'] == '6-152651802-C-A' @@ -16643,8 +15801,7 @@ def test_variant294(self): assert 'NM_033071.3:c.13805G>T' in list(results.keys()) assert results['NM_033071.3:c.13805G>T']['hgvs_lrg_transcript_variant'] == 'LRG_427t2:c.13805G>T' assert results['NM_033071.3:c.13805G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_033071.3:c.13805G>T']['alt_genomic_loci'] == [] - assert results['NM_033071.3:c.13805G>T']['transcript_description'] == 'Homo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_033071.3:c.13805G>T']['alt_genomic_loci'], []) assert results['NM_033071.3:c.13805G>T']['gene_symbol'] == 'SYNE1' assert results['NM_033071.3:c.13805G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_149062.1(LRG_427p2):p.(Arg4602Leu)', 'slr': 'NP_149062.1:p.(R4602L)'} assert results['NM_033071.3:c.13805G>T']['submitted_variant'] == '6-152651802-C-A' @@ -16668,8 +15825,7 @@ def test_variant295(self): assert 'NM_033071.3:c.5950G>C' in list(results.keys()) assert results['NM_033071.3:c.5950G>C']['hgvs_lrg_transcript_variant'] == 'LRG_427t2:c.5950G>C' assert results['NM_033071.3:c.5950G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_033071.3:c.5950G>C']['alt_genomic_loci'] == [] - assert results['NM_033071.3:c.5950G>C']['transcript_description'] == 'Homo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_033071.3:c.5950G>C']['alt_genomic_loci'], []) assert results['NM_033071.3:c.5950G>C']['gene_symbol'] == 'SYNE1' assert results['NM_033071.3:c.5950G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_149062.1(LRG_427p2):p.(Ala1984Pro)', 'slr': 'NP_149062.1:p.(A1984P)'} assert results['NM_033071.3:c.5950G>C']['submitted_variant'] == '6-152737643-C-G' @@ -16686,8 +15842,7 @@ def test_variant295(self): assert 'NM_182961.3:c.5929G>C' in list(results.keys()) assert results['NM_182961.3:c.5929G>C']['hgvs_lrg_transcript_variant'] == 'LRG_427t1:c.5929G>C' assert results['NM_182961.3:c.5929G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_182961.3:c.5929G>C']['alt_genomic_loci'] == [] - assert results['NM_182961.3:c.5929G>C']['transcript_description'] == 'Homo sapiens spectrin repeat containing nuclear envelope protein 1 (SYNE1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_182961.3:c.5929G>C']['alt_genomic_loci'], []) assert results['NM_182961.3:c.5929G>C']['gene_symbol'] == 'SYNE1' assert results['NM_182961.3:c.5929G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_892006.3(LRG_427p1):p.(Ala1977Pro)', 'slr': 'NP_892006.3:p.(A1977P)'} assert results['NM_182961.3:c.5929G>C']['submitted_variant'] == '6-152737643-C-G' @@ -16710,8 +15865,7 @@ def test_variant296(self): assert 'NM_001322012.1:c.688A>G' in list(results.keys()) assert results['NM_001322012.1:c.688A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322012.1:c.688A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322012.1:c.688A>G']['alt_genomic_loci'] == [] - assert results['NM_001322012.1:c.688A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 11, mRNA' + self.assertCountEqual(results['NM_001322012.1:c.688A>G']['alt_genomic_loci'], []) assert results['NM_001322012.1:c.688A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322012.1:c.688A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308941.1:p.(Lys230Glu)', 'slr': 'NP_001308941.1:p.(K230E)'} assert results['NM_001322012.1:c.688A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16728,8 +15882,7 @@ def test_variant296(self): assert 'NM_001322010.1:c.1060A>G' in list(results.keys()) assert results['NM_001322010.1:c.1060A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322010.1:c.1060A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322010.1:c.1060A>G']['alt_genomic_loci'] == [] - assert results['NM_001322010.1:c.1060A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 9, mRNA' + self.assertCountEqual(results['NM_001322010.1:c.1060A>G']['alt_genomic_loci'], []) assert results['NM_001322010.1:c.1060A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322010.1:c.1060A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308939.1:p.(Lys354Glu)', 'slr': 'NP_001308939.1:p.(K354E)'} assert results['NM_001322010.1:c.1060A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16746,8 +15899,7 @@ def test_variant296(self): assert 'NM_001322015.1:c.1312A>G' in list(results.keys()) assert results['NM_001322015.1:c.1312A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322015.1:c.1312A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322015.1:c.1312A>G']['alt_genomic_loci'] == [] - assert results['NM_001322015.1:c.1312A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 14, mRNA' + self.assertCountEqual(results['NM_001322015.1:c.1312A>G']['alt_genomic_loci'], []) assert results['NM_001322015.1:c.1312A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322015.1:c.1312A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308944.1:p.(Lys438Glu)', 'slr': 'NP_001308944.1:p.(K438E)'} assert results['NM_001322015.1:c.1312A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16764,8 +15916,7 @@ def test_variant296(self): assert 'NM_001322003.1:c.1216A>G' in list(results.keys()) assert results['NM_001322003.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322003.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322003.1:c.1216A>G']['alt_genomic_loci'] == [] - assert results['NM_001322003.1:c.1216A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001322003.1:c.1216A>G']['alt_genomic_loci'], []) assert results['NM_001322003.1:c.1216A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322003.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308932.1:p.(Lys406Glu)', 'slr': 'NP_001308932.1:p.(K406E)'} assert results['NM_001322003.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16782,8 +15933,7 @@ def test_variant296(self): assert 'NM_001322014.1:c.1621A>G' in list(results.keys()) assert results['NM_001322014.1:c.1621A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322014.1:c.1621A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322014.1:c.1621A>G']['alt_genomic_loci'] == [] - assert results['NM_001322014.1:c.1621A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 13, mRNA' + self.assertCountEqual(results['NM_001322014.1:c.1621A>G']['alt_genomic_loci'], []) assert results['NM_001322014.1:c.1621A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322014.1:c.1621A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308943.1:p.(Lys541Glu)', 'slr': 'NP_001308943.1:p.(K541E)'} assert results['NM_001322014.1:c.1621A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16800,8 +15950,7 @@ def test_variant296(self): assert 'NM_001322004.1:c.1216A>G' in list(results.keys()) assert results['NM_001322004.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322004.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322004.1:c.1216A>G']['alt_genomic_loci'] == [] - assert results['NM_001322004.1:c.1216A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001322004.1:c.1216A>G']['alt_genomic_loci'], []) assert results['NM_001322004.1:c.1216A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322004.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308933.1:p.(Lys406Glu)', 'slr': 'NP_001308933.1:p.(K406E)'} assert results['NM_001322004.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16818,8 +15967,7 @@ def test_variant296(self): assert 'NM_001322008.1:c.1303A>G' in list(results.keys()) assert results['NM_001322008.1:c.1303A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322008.1:c.1303A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322008.1:c.1303A>G']['alt_genomic_loci'] == [] - assert results['NM_001322008.1:c.1303A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001322008.1:c.1303A>G']['alt_genomic_loci'], []) assert results['NM_001322008.1:c.1303A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322008.1:c.1303A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308937.1:p.(Lys435Glu)', 'slr': 'NP_001308937.1:p.(K435E)'} assert results['NM_001322008.1:c.1303A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16836,8 +15984,7 @@ def test_variant296(self): assert 'NM_001322006.1:c.1465A>G' in list(results.keys()) assert results['NM_001322006.1:c.1465A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322006.1:c.1465A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322006.1:c.1465A>G']['alt_genomic_loci'] == [] - assert results['NM_001322006.1:c.1465A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001322006.1:c.1465A>G']['alt_genomic_loci'], []) assert results['NM_001322006.1:c.1465A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322006.1:c.1465A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308935.1:p.(Lys489Glu)', 'slr': 'NP_001308935.1:p.(K489E)'} assert results['NM_001322006.1:c.1465A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16854,8 +16001,7 @@ def test_variant296(self): assert 'NM_001322013.1:c.1048A>G' in list(results.keys()) assert results['NM_001322013.1:c.1048A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322013.1:c.1048A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322013.1:c.1048A>G']['alt_genomic_loci'] == [] - assert results['NM_001322013.1:c.1048A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 12, mRNA' + self.assertCountEqual(results['NM_001322013.1:c.1048A>G']['alt_genomic_loci'], []) assert results['NM_001322013.1:c.1048A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322013.1:c.1048A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308942.1:p.(Lys350Glu)', 'slr': 'NP_001308942.1:p.(K350E)'} assert results['NM_001322013.1:c.1048A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16872,8 +16018,7 @@ def test_variant296(self): assert 'NM_001322009.1:c.1216A>G' in list(results.keys()) assert results['NM_001322009.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322009.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322009.1:c.1216A>G']['alt_genomic_loci'] == [] - assert results['NM_001322009.1:c.1216A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_001322009.1:c.1216A>G']['alt_genomic_loci'], []) assert results['NM_001322009.1:c.1216A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322009.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308938.1:p.(Lys406Glu)', 'slr': 'NP_001308938.1:p.(K406E)'} assert results['NM_001322009.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16890,8 +16035,7 @@ def test_variant296(self): assert 'NR_003085.2:n.1703G=' in list(results.keys()) assert results['NR_003085.2:n.1703G=']['hgvs_lrg_transcript_variant'] == '' assert results['NR_003085.2:n.1703G=']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_003085.2:n.1703G=']['alt_genomic_loci'] == [] - assert results['NR_003085.2:n.1703G=']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 2, non-coding RNA' + self.assertCountEqual(results['NR_003085.2:n.1703G=']['alt_genomic_loci'], []) assert results['NR_003085.2:n.1703G=']['gene_symbol'] == 'PMS2' assert results['NR_003085.2:n.1703G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_003085.2:n.1703G=']['submitted_variant'] == '7-6026775-T-C' @@ -16909,8 +16053,7 @@ def test_variant296(self): assert 'NM_001322005.1:c.1216A>G' in list(results.keys()) assert results['NM_001322005.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322005.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322005.1:c.1216A>G']['alt_genomic_loci'] == [] - assert results['NM_001322005.1:c.1216A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001322005.1:c.1216A>G']['alt_genomic_loci'], []) assert results['NM_001322005.1:c.1216A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322005.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308934.1:p.(Lys406Glu)', 'slr': 'NP_001308934.1:p.(K406E)'} assert results['NM_001322005.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16927,8 +16070,7 @@ def test_variant296(self): assert 'NM_001322007.1:c.1303A>G' in list(results.keys()) assert results['NM_001322007.1:c.1303A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322007.1:c.1303A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322007.1:c.1303A>G']['alt_genomic_loci'] == [] - assert results['NM_001322007.1:c.1303A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001322007.1:c.1303A>G']['alt_genomic_loci'], []) assert results['NM_001322007.1:c.1303A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322007.1:c.1303A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308936.1:p.(Lys435Glu)', 'slr': 'NP_001308936.1:p.(K435E)'} assert results['NM_001322007.1:c.1303A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16945,8 +16087,7 @@ def test_variant296(self): assert 'NM_000535.5:c.1621G=' in list(results.keys()) assert results['NM_000535.5:c.1621G=']['hgvs_lrg_transcript_variant'] == 'LRG_161t1:c.1621G=' assert results['NM_000535.5:c.1621G=']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000535.5:c.1621G=']['alt_genomic_loci'] == [] - assert results['NM_000535.5:c.1621G=']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000535.5:c.1621G=']['alt_genomic_loci'], []) assert results['NM_000535.5:c.1621G=']['gene_symbol'] == 'PMS2' assert results['NM_000535.5:c.1621G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000526.1(LRG_161p1):p.(Glu541=)', 'slr': 'NP_000526.1:p.(E541=)'} assert results['NM_000535.5:c.1621G=']['submitted_variant'] == '7-6026775-T-C' @@ -16963,8 +16104,7 @@ def test_variant296(self): assert 'NR_136154.1:n.1708A>G' in list(results.keys()) assert results['NR_136154.1:n.1708A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NR_136154.1:n.1708A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_136154.1:n.1708A>G']['alt_genomic_loci'] == [] - assert results['NR_136154.1:n.1708A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 15, non-coding RNA' + self.assertCountEqual(results['NR_136154.1:n.1708A>G']['alt_genomic_loci'], []) assert results['NR_136154.1:n.1708A>G']['gene_symbol'] == 'PMS2' assert results['NR_136154.1:n.1708A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_136154.1:n.1708A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16981,8 +16121,7 @@ def test_variant296(self): assert 'NM_001322011.1:c.688A>G' in list(results.keys()) assert results['NM_001322011.1:c.688A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001322011.1:c.688A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001322011.1:c.688A>G']['alt_genomic_loci'] == [] - assert results['NM_001322011.1:c.688A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 10, mRNA' + self.assertCountEqual(results['NM_001322011.1:c.688A>G']['alt_genomic_loci'], []) assert results['NM_001322011.1:c.688A>G']['gene_symbol'] == 'PMS2' assert results['NM_001322011.1:c.688A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308940.1:p.(Lys230Glu)', 'slr': 'NP_001308940.1:p.(K230E)'} assert results['NM_001322011.1:c.688A>G']['submitted_variant'] == '7-6026775-T-C' @@ -16999,8 +16138,7 @@ def test_variant296(self): assert 'NM_000535.6:c.1621A>G' in list(results.keys()) assert results['NM_000535.6:c.1621A>G']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000535.6:c.1621A>G']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000535.6:c.1621A>G']['alt_genomic_loci'] == [] - assert results['NM_000535.6:c.1621A>G']['transcript_description'] == 'Homo sapiens PMS1 homolog 2, mismatch repair system component (PMS2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000535.6:c.1621A>G']['alt_genomic_loci'], []) assert results['NM_000535.6:c.1621A>G']['gene_symbol'] == 'PMS2' assert results['NM_000535.6:c.1621A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000526.2:p.(Lys541Glu)', 'slr': 'NP_000526.2:p.(K541E)'} assert results['NM_000535.6:c.1621A>G']['submitted_variant'] == '7-6026775-T-C' @@ -17023,8 +16161,7 @@ def test_variant297(self): assert 'NM_001346900.1:c.2077_2091del' in list(results.keys()) assert results['NM_001346900.1:c.2077_2091del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346900.1:c.2077_2091del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001346900.1:c.2077_2091del']['alt_genomic_loci'] == [] - assert results['NM_001346900.1:c.2077_2091del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_001346900.1:c.2077_2091del']['alt_genomic_loci'], []) assert results['NM_001346900.1:c.2077_2091del']['gene_symbol'] == 'EGFR' assert results['NM_001346900.1:c.2077_2091del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333829.1:p.(Glu693_Ala697del)', 'slr': 'NP_001333829.1:p.(E693_A697del)'} assert results['NM_001346900.1:c.2077_2091del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' @@ -17041,8 +16178,7 @@ def test_variant297(self): assert 'NM_001346898.1:c.2236_2250del' in list(results.keys()) assert results['NM_001346898.1:c.2236_2250del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346898.1:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001346898.1:c.2236_2250del']['alt_genomic_loci'] == [] - assert results['NM_001346898.1:c.2236_2250del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001346898.1:c.2236_2250del']['alt_genomic_loci'], []) assert results['NM_001346898.1:c.2236_2250del']['gene_symbol'] == 'EGFR' assert results['NM_001346898.1:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333827.1:p.(Glu746_Ala750del)', 'slr': 'NP_001333827.1:p.(E746_A750del)'} assert results['NM_001346898.1:c.2236_2250del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' @@ -17059,8 +16195,7 @@ def test_variant297(self): assert 'NM_001346941.1:c.1435_1449del' in list(results.keys()) assert results['NM_001346941.1:c.1435_1449del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346941.1:c.1435_1449del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001346941.1:c.1435_1449del']['alt_genomic_loci'] == [] - assert results['NM_001346941.1:c.1435_1449del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant EGFRvIII, mRNA' + self.assertCountEqual(results['NM_001346941.1:c.1435_1449del']['alt_genomic_loci'], []) assert results['NM_001346941.1:c.1435_1449del']['gene_symbol'] == 'EGFR' assert results['NM_001346941.1:c.1435_1449del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333870.1:p.(Glu479_Ala483del)', 'slr': 'NP_001333870.1:p.(E479_A483del)'} assert results['NM_001346941.1:c.1435_1449del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' @@ -17078,8 +16213,7 @@ def test_variant297(self): assert 'NM_001346899.1:c.2101_2115del' in list(results.keys()) assert results['NM_001346899.1:c.2101_2115del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346899.1:c.2101_2115del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001346899.1:c.2101_2115del']['alt_genomic_loci'] == [] - assert results['NM_001346899.1:c.2101_2115del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001346899.1:c.2101_2115del']['alt_genomic_loci'], []) assert results['NM_001346899.1:c.2101_2115del']['gene_symbol'] == 'EGFR' assert results['NM_001346899.1:c.2101_2115del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333828.1:p.(Glu701_Ala705del)', 'slr': 'NP_001333828.1:p.(E701_A705del)'} assert results['NM_001346899.1:c.2101_2115del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' @@ -17096,8 +16230,7 @@ def test_variant297(self): assert 'NM_001346897.1:c.2101_2115del' in list(results.keys()) assert results['NM_001346897.1:c.2101_2115del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346897.1:c.2101_2115del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001346897.1:c.2101_2115del']['alt_genomic_loci'] == [] - assert results['NM_001346897.1:c.2101_2115del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001346897.1:c.2101_2115del']['alt_genomic_loci'], []) assert results['NM_001346897.1:c.2101_2115del']['gene_symbol'] == 'EGFR' assert results['NM_001346897.1:c.2101_2115del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333826.1:p.(Glu701_Ala705del)', 'slr': 'NP_001333826.1:p.(E701_A705del)'} assert results['NM_001346897.1:c.2101_2115del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' @@ -17114,8 +16247,7 @@ def test_variant297(self): assert 'NM_005228.3:c.2236_2250del' in list(results.keys()) assert results['NM_005228.3:c.2236_2250del']['hgvs_lrg_transcript_variant'] == 'LRG_304t1:c.2236_2250del' assert results['NM_005228.3:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005228.3:c.2236_2250del']['alt_genomic_loci'] == [] - assert results['NM_005228.3:c.2236_2250del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_005228.3:c.2236_2250del']['alt_genomic_loci'], []) assert results['NM_005228.3:c.2236_2250del']['gene_symbol'] == 'EGFR' assert results['NM_005228.3:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.(Glu746_Ala750del)', 'slr': 'NP_005219.2:p.(E746_A750del)'} assert results['NM_005228.3:c.2236_2250del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' @@ -17132,8 +16264,7 @@ def test_variant297(self): assert 'NM_005228.4:c.2236_2250del' in list(results.keys()) assert results['NM_005228.4:c.2236_2250del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005228.4:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005228.4:c.2236_2250del']['alt_genomic_loci'] == [] - assert results['NM_005228.4:c.2236_2250del']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_005228.4:c.2236_2250del']['alt_genomic_loci'], []) assert results['NM_005228.4:c.2236_2250del']['gene_symbol'] == 'EGFR' assert results['NM_005228.4:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.(Glu746_Ala750del)', 'slr': 'NP_005219.2:p.(E746_A750del)'} assert results['NM_005228.4:c.2236_2250del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' @@ -17156,8 +16287,7 @@ def test_variant298(self): assert 'NM_005228.3:c.2284-5_2290dup' in list(results.keys()) assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == 'LRG_304t1:c.2284-5_2290dup' assert results['NM_005228.3:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == 'NG_007726.3(NM_005228.3):c.2284-5_2290dup' - assert results['NM_005228.3:c.2284-5_2290dup']['alt_genomic_loci'] == [] - assert results['NM_005228.3:c.2284-5_2290dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_005228.3:c.2284-5_2290dup']['alt_genomic_loci'], []) assert results['NM_005228.3:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.?', 'slr': 'NP_005219.2:p.?'} assert results['NM_005228.3:c.2284-5_2290dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' @@ -17174,8 +16304,7 @@ def test_variant298(self): assert 'NM_001346899.1:c.2149-5_2155dup' in list(results.keys()) assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346899.1:c.2149-5_2155dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001346899.1:c.2149-5_2155dup']['alt_genomic_loci'] == [] - assert results['NM_001346899.1:c.2149-5_2155dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 7, mRNA' + self.assertCountEqual(results['NM_001346899.1:c.2149-5_2155dup']['alt_genomic_loci'], []) assert results['NM_001346899.1:c.2149-5_2155dup']['gene_symbol'] == 'EGFR' assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333828.1:p.?', 'slr': 'NP_001333828.1:p.?'} assert results['NM_001346899.1:c.2149-5_2155dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' @@ -17192,8 +16321,7 @@ def test_variant298(self): assert 'NM_005228.4:c.2284-5_2290dup' in list(results.keys()) assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005228.4:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005228.4:c.2284-5_2290dup']['alt_genomic_loci'] == [] - assert results['NM_005228.4:c.2284-5_2290dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_005228.4:c.2284-5_2290dup']['alt_genomic_loci'], []) assert results['NM_005228.4:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.?', 'slr': 'NP_005219.2:p.?'} assert results['NM_005228.4:c.2284-5_2290dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' @@ -17210,8 +16338,7 @@ def test_variant298(self): assert 'NM_001346898.1:c.2284-5_2290dup' in list(results.keys()) assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346898.1:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001346898.1:c.2284-5_2290dup']['alt_genomic_loci'] == [] - assert results['NM_001346898.1:c.2284-5_2290dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001346898.1:c.2284-5_2290dup']['alt_genomic_loci'], []) assert results['NM_001346898.1:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333827.1:p.?', 'slr': 'NP_001333827.1:p.?'} assert results['NM_001346898.1:c.2284-5_2290dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' @@ -17228,8 +16355,7 @@ def test_variant298(self): assert 'NM_001346941.1:c.1483-5_1489dup' in list(results.keys()) assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346941.1:c.1483-5_1489dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001346941.1:c.1483-5_1489dup']['alt_genomic_loci'] == [] - assert results['NM_001346941.1:c.1483-5_1489dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant EGFRvIII, mRNA' + self.assertCountEqual(results['NM_001346941.1:c.1483-5_1489dup']['alt_genomic_loci'], []) assert results['NM_001346941.1:c.1483-5_1489dup']['gene_symbol'] == 'EGFR' assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333870.1:p.?', 'slr': 'NP_001333870.1:p.?'} assert results['NM_001346941.1:c.1483-5_1489dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' @@ -17247,8 +16373,7 @@ def test_variant298(self): assert 'NM_001346900.1:c.2125-5_2131dup' in list(results.keys()) assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346900.1:c.2125-5_2131dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001346900.1:c.2125-5_2131dup']['alt_genomic_loci'] == [] - assert results['NM_001346900.1:c.2125-5_2131dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 8, mRNA' + self.assertCountEqual(results['NM_001346900.1:c.2125-5_2131dup']['alt_genomic_loci'], []) assert results['NM_001346900.1:c.2125-5_2131dup']['gene_symbol'] == 'EGFR' assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333829.1:p.?', 'slr': 'NP_001333829.1:p.?'} assert results['NM_001346900.1:c.2125-5_2131dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' @@ -17265,8 +16390,7 @@ def test_variant298(self): assert 'NR_047551.1:n.1272_1283dup' in list(results.keys()) assert results['NR_047551.1:n.1272_1283dup']['hgvs_lrg_transcript_variant'] == '' assert results['NR_047551.1:n.1272_1283dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_047551.1:n.1272_1283dup']['alt_genomic_loci'] == [] - assert results['NR_047551.1:n.1272_1283dup']['transcript_description'] == 'Homo sapiens EGFR antisense RNA 1 (EGFR-AS1), long non-coding RNA' + self.assertCountEqual(results['NR_047551.1:n.1272_1283dup']['alt_genomic_loci'], []) assert results['NR_047551.1:n.1272_1283dup']['gene_symbol'] == 'EGFR-AS1' assert results['NR_047551.1:n.1272_1283dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_047551.1:n.1272_1283dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' @@ -17283,8 +16407,7 @@ def test_variant298(self): assert 'NM_001346897.1:c.2149-5_2155dup' in list(results.keys()) assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001346897.1:c.2149-5_2155dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001346897.1:c.2149-5_2155dup']['alt_genomic_loci'] == [] - assert results['NM_001346897.1:c.2149-5_2155dup']['transcript_description'] == 'Homo sapiens epidermal growth factor receptor (EGFR), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001346897.1:c.2149-5_2155dup']['alt_genomic_loci'], []) assert results['NM_001346897.1:c.2149-5_2155dup']['gene_symbol'] == 'EGFR' assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333826.1:p.?', 'slr': 'NP_001333826.1:p.?'} assert results['NM_001346897.1:c.2149-5_2155dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' @@ -17307,8 +16430,7 @@ def test_variant299(self): assert 'NM_001540.4:c.82C>A' in list(results.keys()) assert results['NM_001540.4:c.82C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001540.4:c.82C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001540.4:c.82C>A']['alt_genomic_loci'] == [] - assert results['NM_001540.4:c.82C>A']['transcript_description'] == 'Homo sapiens heat shock protein family B (small) member 1 (HSPB1), mRNA' + self.assertCountEqual(results['NM_001540.4:c.82C>A']['alt_genomic_loci'], []) assert results['NM_001540.4:c.82C>A']['gene_symbol'] == 'HSPB1' assert results['NM_001540.4:c.82C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001531.1(LRG_248p1):p.(Leu28Ile)', 'slr': 'NP_001531.1:p.(L28I)'} assert results['NM_001540.4:c.82C>A']['submitted_variant'] == '7-75932111-C-A' @@ -17326,8 +16448,7 @@ def test_variant299(self): assert 'NM_001540.3:c.82C>A' in list(results.keys()) assert results['NM_001540.3:c.82C>A']['hgvs_lrg_transcript_variant'] == 'LRG_248t1:c.82C>A' assert results['NM_001540.3:c.82C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001540.3:c.82C>A']['alt_genomic_loci'] == [] - assert results['NM_001540.3:c.82C>A']['transcript_description'] == 'Homo sapiens heat shock protein family B (small) member 1 (HSPB1), mRNA' + self.assertCountEqual(results['NM_001540.3:c.82C>A']['alt_genomic_loci'], []) assert results['NM_001540.3:c.82C>A']['gene_symbol'] == 'HSPB1' assert results['NM_001540.3:c.82C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001531.1(LRG_248p1):p.(Leu28Ile)', 'slr': 'NP_001531.1:p.(L28I)'} assert results['NM_001540.3:c.82C>A']['submitted_variant'] == '7-75932111-C-A' @@ -17351,8 +16472,7 @@ def test_variant300(self): assert 'NM_005751.4:c.4004_4006dup' in list(results.keys()) assert results['NM_005751.4:c.4004_4006dup']['hgvs_lrg_transcript_variant'] == 'LRG_331t1:c.4004_4006dup' assert results['NM_005751.4:c.4004_4006dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005751.4:c.4004_4006dup']['alt_genomic_loci'] == [] - assert results['NM_005751.4:c.4004_4006dup']['transcript_description'] == 'Homo sapiens A-kinase anchoring protein 9 (AKAP9), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_005751.4:c.4004_4006dup']['alt_genomic_loci'], []) assert results['NM_005751.4:c.4004_4006dup']['gene_symbol'] == 'AKAP9' assert results['NM_005751.4:c.4004_4006dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005742.4(LRG_331p1):p.(Lys1335_Leu1336insGln)', 'slr': 'NP_005742.4:p.(K1335_L1336insQ)'} assert results['NM_005751.4:c.4004_4006dup']['submitted_variant'] == '7-91652178-A-AAAC' @@ -17369,8 +16489,7 @@ def test_variant300(self): assert 'NM_147185.2:c.4004_4006dup' in list(results.keys()) assert results['NM_147185.2:c.4004_4006dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_147185.2:c.4004_4006dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_147185.2:c.4004_4006dup']['alt_genomic_loci'] == [] - assert results['NM_147185.2:c.4004_4006dup']['transcript_description'] == 'Homo sapiens A-kinase anchoring protein 9 (AKAP9), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_147185.2:c.4004_4006dup']['alt_genomic_loci'], []) assert results['NM_147185.2:c.4004_4006dup']['gene_symbol'] == 'AKAP9' assert results['NM_147185.2:c.4004_4006dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_671714.1:p.(Lys1335_Leu1336insGln)', 'slr': 'NP_671714.1:p.(K1335_L1336insQ)'} assert results['NM_147185.2:c.4004_4006dup']['submitted_variant'] == '7-91652178-A-AAAC' @@ -17394,8 +16513,7 @@ def test_variant301(self): assert 'NR_149084.1:n.221+1140_221+1142del' in list(results.keys()) assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_lrg_transcript_variant'] == '' assert results['NR_149084.1:n.221+1140_221+1142del']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_149084.1:n.221+1140_221+1142del']['alt_genomic_loci'] == [] - assert results['NR_149084.1:n.221+1140_221+1142del']['transcript_description'] == 'Homo sapiens CFTR antisense RNA 1 (CFTR-AS1), long non-coding RNA' + self.assertCountEqual(results['NR_149084.1:n.221+1140_221+1142del']['alt_genomic_loci'], []) assert results['NR_149084.1:n.221+1140_221+1142del']['gene_symbol'] == 'CFTR-AS1' assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_149084.1:n.221+1140_221+1142del']['submitted_variant'] == '7-117199644-ATCT-A' @@ -17412,8 +16530,7 @@ def test_variant301(self): assert 'NM_000492.3:c.1521_1523del' in list(results.keys()) assert results['NM_000492.3:c.1521_1523del']['hgvs_lrg_transcript_variant'] == 'LRG_663t1:c.1521_1523del' assert results['NM_000492.3:c.1521_1523del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000492.3:c.1521_1523del']['alt_genomic_loci'] == [] - assert results['NM_000492.3:c.1521_1523del']['transcript_description'] == 'Homo sapiens cystic fibrosis transmembrane conductance regulator (CFTR), mRNA' + self.assertCountEqual(results['NM_000492.3:c.1521_1523del']['alt_genomic_loci'], []) assert results['NM_000492.3:c.1521_1523del']['gene_symbol'] == 'CFTR' assert results['NM_000492.3:c.1521_1523del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000483.3(LRG_663p1):p.(Phe508del)', 'slr': 'NP_000483.3:p.(F508del)'} assert results['NM_000492.3:c.1521_1523del']['submitted_variant'] == '7-117199644-ATCT-A' @@ -17436,8 +16553,7 @@ def test_variant302(self): assert 'NR_148928.1:n.2896_2897delinsAG' in list(results.keys()) assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_lrg_transcript_variant'] == '' assert results['NR_148928.1:n.2896_2897delinsAG']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_148928.1:n.2896_2897delinsAG']['alt_genomic_loci'] == [] - assert results['NR_148928.1:n.2896_2897delinsAG']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA' + self.assertCountEqual(results['NR_148928.1:n.2896_2897delinsAG']['alt_genomic_loci'], []) assert results['NR_148928.1:n.2896_2897delinsAG']['gene_symbol'] == 'BRAF' assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_148928.1:n.2896_2897delinsAG']['submitted_variant'] == '7-140453136-AC-CT' @@ -17454,8 +16570,7 @@ def test_variant302(self): assert 'NM_004333.4:c.1798_1799delinsAG' in list(results.keys()) assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1798_1799delinsAG' assert results['NM_004333.4:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004333.4:c.1798_1799delinsAG']['alt_genomic_loci'] == [] - assert results['NM_004333.4:c.1798_1799delinsAG']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA' + self.assertCountEqual(results['NM_004333.4:c.1798_1799delinsAG']['alt_genomic_loci'], []) assert results['NM_004333.4:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Arg)', 'slr': 'NP_004324.2:p.(V600R)'} assert results['NM_004333.4:c.1798_1799delinsAG']['submitted_variant'] == '7-140453136-AC-CT' @@ -17472,8 +16587,7 @@ def test_variant302(self): assert 'NM_004333.5:c.1798_1799delinsAG' in list(results.keys()) assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004333.5:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004333.5:c.1798_1799delinsAG']['alt_genomic_loci'] == [] - assert results['NM_004333.5:c.1798_1799delinsAG']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_004333.5:c.1798_1799delinsAG']['alt_genomic_loci'], []) assert results['NM_004333.5:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Arg)', 'slr': 'NP_004324.2:p.(V600R)'} assert results['NM_004333.5:c.1798_1799delinsAG']['submitted_variant'] == '7-140453136-AC-CT' @@ -17491,8 +16605,7 @@ def test_variant302(self): assert 'NM_001354609.1:c.1798_1799delinsAG' in list(results.keys()) assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354609.1:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001354609.1:c.1798_1799delinsAG']['alt_genomic_loci'] == [] - assert results['NM_001354609.1:c.1798_1799delinsAG']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001354609.1:c.1798_1799delinsAG']['alt_genomic_loci'], []) assert results['NM_001354609.1:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Arg)', 'slr': 'NP_001341538.1:p.(V600R)'} assert results['NM_001354609.1:c.1798_1799delinsAG']['submitted_variant'] == '7-140453136-AC-CT' @@ -17515,8 +16628,7 @@ def test_variant303(self): assert 'NM_001354609.1:c.1799T>A' in list(results.keys()) assert results['NM_001354609.1:c.1799T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354609.1:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001354609.1:c.1799T>A']['alt_genomic_loci'] == [] - assert results['NM_001354609.1:c.1799T>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001354609.1:c.1799T>A']['alt_genomic_loci'], []) assert results['NM_001354609.1:c.1799T>A']['gene_symbol'] == 'BRAF' assert results['NM_001354609.1:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Glu)', 'slr': 'NP_001341538.1:p.(V600E)'} assert results['NM_001354609.1:c.1799T>A']['submitted_variant'] == '7-140453136-A-T' @@ -17533,8 +16645,7 @@ def test_variant303(self): assert 'NR_148928.1:n.2897T>A' in list(results.keys()) assert results['NR_148928.1:n.2897T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_148928.1:n.2897T>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_148928.1:n.2897T>A']['alt_genomic_loci'] == [] - assert results['NR_148928.1:n.2897T>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA' + self.assertCountEqual(results['NR_148928.1:n.2897T>A']['alt_genomic_loci'], []) assert results['NR_148928.1:n.2897T>A']['gene_symbol'] == 'BRAF' assert results['NR_148928.1:n.2897T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_148928.1:n.2897T>A']['submitted_variant'] == '7-140453136-A-T' @@ -17551,8 +16662,7 @@ def test_variant303(self): assert 'NM_004333.5:c.1799T>A' in list(results.keys()) assert results['NM_004333.5:c.1799T>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004333.5:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004333.5:c.1799T>A']['alt_genomic_loci'] == [] - assert results['NM_004333.5:c.1799T>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_004333.5:c.1799T>A']['alt_genomic_loci'], []) assert results['NM_004333.5:c.1799T>A']['gene_symbol'] == 'BRAF' assert results['NM_004333.5:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Glu)', 'slr': 'NP_004324.2:p.(V600E)'} assert results['NM_004333.5:c.1799T>A']['submitted_variant'] == '7-140453136-A-T' @@ -17570,8 +16680,7 @@ def test_variant303(self): assert 'NM_004333.4:c.1799T>A' in list(results.keys()) assert results['NM_004333.4:c.1799T>A']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1799T>A' assert results['NM_004333.4:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004333.4:c.1799T>A']['alt_genomic_loci'] == [] - assert results['NM_004333.4:c.1799T>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA' + self.assertCountEqual(results['NM_004333.4:c.1799T>A']['alt_genomic_loci'], []) assert results['NM_004333.4:c.1799T>A']['gene_symbol'] == 'BRAF' assert results['NM_004333.4:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Glu)', 'slr': 'NP_004324.2:p.(V600E)'} assert results['NM_004333.4:c.1799T>A']['submitted_variant'] == '7-140453136-A-T' @@ -17594,8 +16703,7 @@ def test_variant304(self): assert 'NR_148928.1:n.2896G>A' in list(results.keys()) assert results['NR_148928.1:n.2896G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_148928.1:n.2896G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_148928.1:n.2896G>A']['alt_genomic_loci'] == [] - assert results['NR_148928.1:n.2896G>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 3, non-coding RNA' + self.assertCountEqual(results['NR_148928.1:n.2896G>A']['alt_genomic_loci'], []) assert results['NR_148928.1:n.2896G>A']['gene_symbol'] == 'BRAF' assert results['NR_148928.1:n.2896G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_148928.1:n.2896G>A']['submitted_variant'] == '7-140453137-C-T' @@ -17612,8 +16720,7 @@ def test_variant304(self): assert 'NM_004333.5:c.1798G>A' in list(results.keys()) assert results['NM_004333.5:c.1798G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_004333.5:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004333.5:c.1798G>A']['alt_genomic_loci'] == [] - assert results['NM_004333.5:c.1798G>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_004333.5:c.1798G>A']['alt_genomic_loci'], []) assert results['NM_004333.5:c.1798G>A']['gene_symbol'] == 'BRAF' assert results['NM_004333.5:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Met)', 'slr': 'NP_004324.2:p.(V600M)'} assert results['NM_004333.5:c.1798G>A']['submitted_variant'] == '7-140453137-C-T' @@ -17630,8 +16737,7 @@ def test_variant304(self): assert 'NM_004333.4:c.1798G>A' in list(results.keys()) assert results['NM_004333.4:c.1798G>A']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1798G>A' assert results['NM_004333.4:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004333.4:c.1798G>A']['alt_genomic_loci'] == [] - assert results['NM_004333.4:c.1798G>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), mRNA' + self.assertCountEqual(results['NM_004333.4:c.1798G>A']['alt_genomic_loci'], []) assert results['NM_004333.4:c.1798G>A']['gene_symbol'] == 'BRAF' assert results['NM_004333.4:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Met)', 'slr': 'NP_004324.2:p.(V600M)'} assert results['NM_004333.4:c.1798G>A']['submitted_variant'] == '7-140453137-C-T' @@ -17648,8 +16754,7 @@ def test_variant304(self): assert 'NM_001354609.1:c.1798G>A' in list(results.keys()) assert results['NM_001354609.1:c.1798G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001354609.1:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001354609.1:c.1798G>A']['alt_genomic_loci'] == [] - assert results['NM_001354609.1:c.1798G>A']['transcript_description'] == 'Homo sapiens B-Raf proto-oncogene, serine/threonine kinase (BRAF), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001354609.1:c.1798G>A']['alt_genomic_loci'], []) assert results['NM_001354609.1:c.1798G>A']['gene_symbol'] == 'BRAF' assert results['NM_001354609.1:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Met)', 'slr': 'NP_001341538.1:p.(V600M)'} assert results['NM_001354609.1:c.1798G>A']['submitted_variant'] == '7-140453137-C-T' @@ -17674,8 +16779,7 @@ def test_variant305(self): assert 'NM_000083.2:c.180+3A>T' in list(results.keys()) assert results['NM_000083.2:c.180+3A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000083.2:c.180+3A>T']['refseqgene_context_intronic_sequence'] == 'NG_009815.1(NM_000083.2):c.180+3A>T' - assert results['NM_000083.2:c.180+3A>T']['alt_genomic_loci'] == [] - assert results['NM_000083.2:c.180+3A>T']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000083.2:c.180+3A>T']['alt_genomic_loci'], []) assert results['NM_000083.2:c.180+3A>T']['gene_symbol'] == 'CLCN1' assert results['NM_000083.2:c.180+3A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.?', 'slr': 'NP_000074.2:p.?'} assert results['NM_000083.2:c.180+3A>T']['submitted_variant'] == '7-143013488-A-T' @@ -17692,8 +16796,7 @@ def test_variant305(self): assert 'NR_046453.1:n.267+3A>T' in list(results.keys()) assert results['NR_046453.1:n.267+3A>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_046453.1:n.267+3A>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_046453.1:n.267+3A>T']['alt_genomic_loci'] == [] - assert results['NR_046453.1:n.267+3A>T']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA' + self.assertCountEqual(results['NR_046453.1:n.267+3A>T']['alt_genomic_loci'], []) assert results['NR_046453.1:n.267+3A>T']['gene_symbol'] == 'CLCN1' assert results['NR_046453.1:n.267+3A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_046453.1:n.267+3A>T']['submitted_variant'] == '7-143013488-A-T' @@ -17716,8 +16819,7 @@ def test_variant306(self): assert 'NR_046453.1:n.776G>A' in list(results.keys()) assert results['NR_046453.1:n.776G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NR_046453.1:n.776G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_046453.1:n.776G>A']['alt_genomic_loci'] == [] - assert results['NR_046453.1:n.776G>A']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA' + self.assertCountEqual(results['NR_046453.1:n.776G>A']['alt_genomic_loci'], []) assert results['NR_046453.1:n.776G>A']['gene_symbol'] == 'CLCN1' assert results['NR_046453.1:n.776G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_046453.1:n.776G>A']['submitted_variant'] == '7-143018934-G-A' @@ -17735,8 +16837,7 @@ def test_variant306(self): assert 'NM_000083.2:c.689G>A' in list(results.keys()) assert results['NM_000083.2:c.689G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000083.2:c.689G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000083.2:c.689G>A']['alt_genomic_loci'] == [] - assert results['NM_000083.2:c.689G>A']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000083.2:c.689G>A']['alt_genomic_loci'], []) assert results['NM_000083.2:c.689G>A']['gene_symbol'] == 'CLCN1' assert results['NM_000083.2:c.689G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.(Gly230Glu)', 'slr': 'NP_000074.2:p.(G230E)'} assert results['NM_000083.2:c.689G>A']['submitted_variant'] == '7-143018934-G-A' @@ -17760,8 +16861,7 @@ def test_variant307(self): assert 'NR_046453.1:n.2620C>T' in list(results.keys()) assert results['NR_046453.1:n.2620C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_046453.1:n.2620C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_046453.1:n.2620C>T']['alt_genomic_loci'] == [] - assert results['NR_046453.1:n.2620C>T']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 2, non-coding RNA' + self.assertCountEqual(results['NR_046453.1:n.2620C>T']['alt_genomic_loci'], []) assert results['NR_046453.1:n.2620C>T']['gene_symbol'] == 'CLCN1' assert results['NR_046453.1:n.2620C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_046453.1:n.2620C>T']['submitted_variant'] == '7-143048771-C-T' @@ -17778,8 +16878,7 @@ def test_variant307(self): assert 'NM_000083.2:c.2680C>T' in list(results.keys()) assert results['NM_000083.2:c.2680C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000083.2:c.2680C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000083.2:c.2680C>T']['alt_genomic_loci'] == [] - assert results['NM_000083.2:c.2680C>T']['transcript_description'] == 'Homo sapiens chloride voltage-gated channel 1 (CLCN1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000083.2:c.2680C>T']['alt_genomic_loci'], []) assert results['NM_000083.2:c.2680C>T']['gene_symbol'] == 'CLCN1' assert results['NM_000083.2:c.2680C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.(Arg894Ter)', 'slr': 'NP_000074.2:p.(R894*)'} assert results['NM_000083.2:c.2680C>T']['submitted_variant'] == '7-143048771-C-T' @@ -17802,8 +16901,7 @@ def test_variant308(self): assert 'NM_014629.3:c.2399C>T' in list(results.keys()) assert results['NM_014629.3:c.2399C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_014629.3:c.2399C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014629.3:c.2399C>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}] - assert results['NM_014629.3:c.2399C>T']['transcript_description'] == 'Homo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_014629.3:c.2399C>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}]) assert results['NM_014629.3:c.2399C>T']['gene_symbol'] == 'ARHGEF10' assert results['NM_014629.3:c.2399C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055444.2(LRG_234p1):p.(Pro800Leu)', 'slr': 'NP_055444.2:p.(P800L)'} assert results['NM_014629.3:c.2399C>T']['submitted_variant'] == '8-1871951-C-T' @@ -17820,8 +16918,7 @@ def test_variant308(self): assert 'NM_014629.2:c.2399C>T' in list(results.keys()) assert results['NM_014629.2:c.2399C>T']['hgvs_lrg_transcript_variant'] == 'LRG_234t1:c.2399C>T' assert results['NM_014629.2:c.2399C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_014629.2:c.2399C>T']['alt_genomic_loci'] == [] - assert results['NM_014629.2:c.2399C>T']['transcript_description'] == 'Homo sapiens Rho guanine nucleotide exchange factor (GEF) 10 (ARHGEF10), mRNA' + self.assertCountEqual(results['NM_014629.2:c.2399C>T']['alt_genomic_loci'], []) assert results['NM_014629.2:c.2399C>T']['gene_symbol'] == 'ARHGEF10' assert results['NM_014629.2:c.2399C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055444.2(LRG_234p1):p.(Pro800Leu)', 'slr': 'NP_055444.2:p.(P800L)'} assert results['NM_014629.2:c.2399C>T']['submitted_variant'] == '8-1871951-C-T' @@ -17838,8 +16935,7 @@ def test_variant308(self): assert 'NM_001308153.1:c.2471C>T' in list(results.keys()) assert results['NM_001308153.1:c.2471C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001308153.1:c.2471C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001308153.1:c.2471C>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}] - assert results['NM_001308153.1:c.2471C>T']['transcript_description'] == 'Homo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001308153.1:c.2471C>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}]) assert results['NM_001308153.1:c.2471C>T']['gene_symbol'] == 'ARHGEF10' assert results['NM_001308153.1:c.2471C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295082.1:p.(Pro824Leu)', 'slr': 'NP_001295082.1:p.(P824L)'} assert results['NM_001308153.1:c.2471C>T']['submitted_variant'] == '8-1871951-C-T' @@ -17857,8 +16953,7 @@ def test_variant308(self): assert 'NM_001308152.1:c.2285C>T' in list(results.keys()) assert results['NM_001308152.1:c.2285C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001308152.1:c.2285C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001308152.1:c.2285C>T']['alt_genomic_loci'] == [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}] - assert results['NM_001308152.1:c.2285C>T']['transcript_description'] == 'Homo sapiens Rho guanine nucleotide exchange factor 10 (ARHGEF10), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001308152.1:c.2285C>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}]) assert results['NM_001308152.1:c.2285C>T']['gene_symbol'] == 'ARHGEF10' assert results['NM_001308152.1:c.2285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295081.1:p.(Pro762Leu)', 'slr': 'NP_001295081.1:p.(P762L)'} assert results['NM_001308152.1:c.2285C>T']['submitted_variant'] == '8-1871951-C-T' @@ -17881,8 +16976,7 @@ def test_variant309(self): assert 'NM_001261407.1:c.5504dup' in list(results.keys()) assert results['NM_001261407.1:c.5504dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001261407.1:c.5504dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001261407.1:c.5504dup']['alt_genomic_loci'] == [] - assert results['NM_001261407.1:c.5504dup']['transcript_description'] == 'Homo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001261407.1:c.5504dup']['alt_genomic_loci'], []) assert results['NM_001261407.1:c.5504dup']['gene_symbol'] == 'MPDZ' assert results['NM_001261407.1:c.5504dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001248336.1:p.(Thr1836AsnfsTer15)', 'slr': 'NP_001248336.1:p.(T1836Nfs*15)'} assert results['NM_001261407.1:c.5504dup']['submitted_variant'] == '9-13112056-T-TG' @@ -17899,8 +16993,7 @@ def test_variant309(self): assert 'NM_001330637.1:c.5690dup' in list(results.keys()) assert results['NM_001330637.1:c.5690dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001330637.1:c.5690dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001330637.1:c.5690dup']['alt_genomic_loci'] == [] - assert results['NM_001330637.1:c.5690dup']['transcript_description'] == 'Homo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001330637.1:c.5690dup']['alt_genomic_loci'], []) assert results['NM_001330637.1:c.5690dup']['gene_symbol'] == 'MPDZ' assert results['NM_001330637.1:c.5690dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317566.1:p.(Thr1898AsnfsTer15)', 'slr': 'NP_001317566.1:p.(T1898Nfs*15)'} assert results['NM_001330637.1:c.5690dup']['submitted_variant'] == '9-13112056-T-TG' @@ -17917,8 +17010,7 @@ def test_variant309(self): assert 'NM_001261406.1:c.5591dup' in list(results.keys()) assert results['NM_001261406.1:c.5591dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001261406.1:c.5591dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001261406.1:c.5591dup']['alt_genomic_loci'] == [] - assert results['NM_001261406.1:c.5591dup']['transcript_description'] == 'Homo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001261406.1:c.5591dup']['alt_genomic_loci'], []) assert results['NM_001261406.1:c.5591dup']['gene_symbol'] == 'MPDZ' assert results['NM_001261406.1:c.5591dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001248335.1:p.(Thr1865AsnfsTer15)', 'slr': 'NP_001248335.1:p.(T1865Nfs*15)'} assert results['NM_001261406.1:c.5591dup']['submitted_variant'] == '9-13112056-T-TG' @@ -17936,8 +17028,7 @@ def test_variant309(self): assert 'NM_003829.4:c.5603dup' in list(results.keys()) assert results['NM_003829.4:c.5603dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_003829.4:c.5603dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_003829.4:c.5603dup']['alt_genomic_loci'] == [] - assert results['NM_003829.4:c.5603dup']['transcript_description'] == 'Homo sapiens multiple PDZ domain crumbs cell polarity complex component (MPDZ), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_003829.4:c.5603dup']['alt_genomic_loci'], []) assert results['NM_003829.4:c.5603dup']['gene_symbol'] == 'MPDZ' assert results['NM_003829.4:c.5603dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003820.2:p.(Thr1869AsnfsTer15)', 'slr': 'NP_003820.2:p.(T1869Nfs*15)'} assert results['NM_003829.4:c.5603dup']['submitted_variant'] == '9-13112056-T-TG' @@ -17960,8 +17051,7 @@ def test_variant310(self): assert 'NM_058197.4:c.*74-1G>T' in list(results.keys()) assert results['NM_058197.4:c.*74-1G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_058197.4:c.*74-1G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_058197.4:c.*74-1G>T']['alt_genomic_loci'] == [] - assert results['NM_058197.4:c.*74-1G>T']['transcript_description'] == 'Homo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_058197.4:c.*74-1G>T']['alt_genomic_loci'], []) assert results['NM_058197.4:c.*74-1G>T']['gene_symbol'] == 'CDKN2A' assert results['NM_058197.4:c.*74-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_478104.2:p.?', 'slr': 'NP_478104.2:p.?'} assert results['NM_058197.4:c.*74-1G>T']['submitted_variant'] == '9-21971208-C-A' @@ -17978,8 +17068,7 @@ def test_variant310(self): assert 'NM_000077.4:c.151-1G>T' in list(results.keys()) assert results['NM_000077.4:c.151-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_11t1:c.151-1G>T' assert results['NM_000077.4:c.151-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007485.1(NM_000077.4):c.151-1G>T' - assert results['NM_000077.4:c.151-1G>T']['alt_genomic_loci'] == [] - assert results['NM_000077.4:c.151-1G>T']['transcript_description'] == 'Homo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000077.4:c.151-1G>T']['alt_genomic_loci'], []) assert results['NM_000077.4:c.151-1G>T']['gene_symbol'] == 'CDKN2A' assert results['NM_000077.4:c.151-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000068.1(LRG_11p1):p.?', 'slr': 'NP_000068.1:p.?'} assert results['NM_000077.4:c.151-1G>T']['submitted_variant'] == '9-21971208-C-A' @@ -17996,8 +17085,7 @@ def test_variant310(self): assert 'NM_001363763.1:c.-3-1G>T' in list(results.keys()) assert results['NM_001363763.1:c.-3-1G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363763.1:c.-3-1G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001363763.1:c.-3-1G>T']['alt_genomic_loci'] == [] - assert results['NM_001363763.1:c.-3-1G>T']['transcript_description'] == 'Homo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 6, mRNA' + self.assertCountEqual(results['NM_001363763.1:c.-3-1G>T']['alt_genomic_loci'], []) assert results['NM_001363763.1:c.-3-1G>T']['gene_symbol'] == 'CDKN2A' assert results['NM_001363763.1:c.-3-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350692.1:p.?', 'slr': 'NP_001350692.1:p.?'} assert results['NM_001363763.1:c.-3-1G>T']['submitted_variant'] == '9-21971208-C-A' @@ -18014,8 +17102,7 @@ def test_variant310(self): assert 'NM_001195132.1:c.151-1G>T' in list(results.keys()) assert results['NM_001195132.1:c.151-1G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001195132.1:c.151-1G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001195132.1:c.151-1G>T']['alt_genomic_loci'] == [] - assert results['NM_001195132.1:c.151-1G>T']['transcript_description'] == 'Homo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001195132.1:c.151-1G>T']['alt_genomic_loci'], []) assert results['NM_001195132.1:c.151-1G>T']['gene_symbol'] == 'CDKN2A' assert results['NM_001195132.1:c.151-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001182061.1:p.?', 'slr': 'NP_001182061.1:p.?'} assert results['NM_001195132.1:c.151-1G>T']['submitted_variant'] == '9-21971208-C-A' @@ -18032,8 +17119,7 @@ def test_variant310(self): assert 'NM_058195.3:c.194-1G>T' in list(results.keys()) assert results['NM_058195.3:c.194-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_11t2:c.194-1G>T' assert results['NM_058195.3:c.194-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007485.1(NM_058195.3):c.194-1G>T' - assert results['NM_058195.3:c.194-1G>T']['alt_genomic_loci'] == [] - assert results['NM_058195.3:c.194-1G>T']['transcript_description'] == 'Homo sapiens cyclin dependent kinase inhibitor 2A (CDKN2A), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_058195.3:c.194-1G>T']['alt_genomic_loci'], []) assert results['NM_058195.3:c.194-1G>T']['gene_symbol'] == 'CDKN2A' assert results['NM_058195.3:c.194-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_478102.2(LRG_11p2):p.?', 'slr': 'NP_478102.2:p.?'} assert results['NM_058195.3:c.194-1G>T']['submitted_variant'] == '9-21971208-C-A' @@ -18057,8 +17143,7 @@ def test_variant311(self): assert 'NM_001301227.1:c.773-3dup' in list(results.keys()) assert results['NM_001301227.1:c.773-3dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001301227.1:c.773-3dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001301227.1:c.773-3dup']['alt_genomic_loci'] == [] - assert results['NM_001301227.1:c.773-3dup']['transcript_description'] == 'Homo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.4, mRNA' + self.assertCountEqual(results['NM_001301227.1:c.773-3dup']['alt_genomic_loci'], []) assert results['NM_001301227.1:c.773-3dup']['gene_symbol'] == 'TPM2' assert results['NM_001301227.1:c.773-3dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001288156.1:p.?', 'slr': 'NP_001288156.1:p.?'} assert results['NM_001301227.1:c.773-3dup']['submitted_variant'] == '9-35683240-T-TG' @@ -18075,8 +17160,7 @@ def test_variant311(self): assert 'NM_001301226.1:c.772+1002dup' in list(results.keys()) assert results['NM_001301226.1:c.772+1002dup']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001301226.1:c.772+1002dup']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001301226.1:c.772+1002dup']['alt_genomic_loci'] == [] - assert results['NM_001301226.1:c.772+1002dup']['transcript_description'] == 'Homo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.3, mRNA' + self.assertCountEqual(results['NM_001301226.1:c.772+1002dup']['alt_genomic_loci'], []) assert results['NM_001301226.1:c.772+1002dup']['gene_symbol'] == 'TPM2' assert results['NM_001301226.1:c.772+1002dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001288155.1:p.?', 'slr': 'NP_001288155.1:p.?'} assert results['NM_001301226.1:c.772+1002dup']['submitted_variant'] == '9-35683240-T-TG' @@ -18093,8 +17177,7 @@ def test_variant311(self): assert 'NM_213674.1:c.772+1002dup' in list(results.keys()) assert results['NM_213674.1:c.772+1002dup']['hgvs_lrg_transcript_variant'] == 'LRG_680t1:c.772+1002dup' assert results['NM_213674.1:c.772+1002dup']['refseqgene_context_intronic_sequence'] == 'NG_011620.1(NM_213674.1):c.772+1002dup' - assert results['NM_213674.1:c.772+1002dup']['alt_genomic_loci'] == [] - assert results['NM_213674.1:c.772+1002dup']['transcript_description'] == 'Homo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.1, mRNA' + self.assertCountEqual(results['NM_213674.1:c.772+1002dup']['alt_genomic_loci'], []) assert results['NM_213674.1:c.772+1002dup']['gene_symbol'] == 'TPM2' assert results['NM_213674.1:c.772+1002dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_998839.1(LRG_680p1):p.?', 'slr': 'NP_998839.1:p.?'} assert results['NM_213674.1:c.772+1002dup']['submitted_variant'] == '9-35683240-T-TG' @@ -18111,8 +17194,7 @@ def test_variant311(self): assert 'NM_003289.3:c.773-3dup' in list(results.keys()) assert results['NM_003289.3:c.773-3dup']['hgvs_lrg_transcript_variant'] == 'LRG_680t2:c.773-3dup' assert results['NM_003289.3:c.773-3dup']['refseqgene_context_intronic_sequence'] == 'NG_011620.1(NM_003289.3):c.773-3dup' - assert results['NM_003289.3:c.773-3dup']['alt_genomic_loci'] == [] - assert results['NM_003289.3:c.773-3dup']['transcript_description'] == 'Homo sapiens tropomyosin 2 (TPM2), transcript variant Tpm2.2, mRNA' + self.assertCountEqual(results['NM_003289.3:c.773-3dup']['alt_genomic_loci'], []) assert results['NM_003289.3:c.773-3dup']['gene_symbol'] == 'TPM2' assert results['NM_003289.3:c.773-3dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003280.2(LRG_680p2):p.?', 'slr': 'NP_003280.2:p.?'} assert results['NM_003289.3:c.773-3dup']['submitted_variant'] == '9-35683240-T-TG' @@ -18136,8 +17218,7 @@ def test_variant312(self): assert 'NM_000368.4:c.733C>T' in list(results.keys()) assert results['NM_000368.4:c.733C>T']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.733C>T' assert results['NM_000368.4:c.733C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000368.4:c.733C>T']['alt_genomic_loci'] == [] - assert results['NM_000368.4:c.733C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000368.4:c.733C>T']['alt_genomic_loci'], []) assert results['NM_000368.4:c.733C>T']['gene_symbol'] == 'TSC1' assert results['NM_000368.4:c.733C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Arg245Ter)', 'slr': 'NP_000359.1:p.(R245*)'} assert results['NM_000368.4:c.733C>T']['submitted_variant'] == '9-135796754-G-A' @@ -18154,8 +17235,7 @@ def test_variant312(self): assert 'NM_001162426.1:c.733C>T' in list(results.keys()) assert results['NM_001162426.1:c.733C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162426.1:c.733C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001162426.1:c.733C>T']['alt_genomic_loci'] == [] - assert results['NM_001162426.1:c.733C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001162426.1:c.733C>T']['alt_genomic_loci'], []) assert results['NM_001162426.1:c.733C>T']['gene_symbol'] == 'TSC1' assert results['NM_001162426.1:c.733C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.(Arg245Ter)', 'slr': 'NP_001155898.1:p.(R245*)'} assert results['NM_001162426.1:c.733C>T']['submitted_variant'] == '9-135796754-G-A' @@ -18173,8 +17253,7 @@ def test_variant312(self): assert 'NM_001362177.1:c.370C>T' in list(results.keys()) assert results['NM_001362177.1:c.370C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001362177.1:c.370C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001362177.1:c.370C>T']['alt_genomic_loci'] == [] - assert results['NM_001362177.1:c.370C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 5, mRNA' + self.assertCountEqual(results['NM_001362177.1:c.370C>T']['alt_genomic_loci'], []) assert results['NM_001362177.1:c.370C>T']['gene_symbol'] == 'TSC1' assert results['NM_001362177.1:c.370C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.(Arg124Ter)', 'slr': 'NP_001349106.1:p.(R124*)'} assert results['NM_001362177.1:c.370C>T']['submitted_variant'] == '9-135796754-G-A' @@ -18191,8 +17270,7 @@ def test_variant312(self): assert 'NM_001162427.1:c.580C>T' in list(results.keys()) assert results['NM_001162427.1:c.580C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001162427.1:c.580C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001162427.1:c.580C>T']['alt_genomic_loci'] == [] - assert results['NM_001162427.1:c.580C>T']['transcript_description'] == 'Homo sapiens TSC complex subunit 1 (TSC1), transcript variant 4, mRNA' + self.assertCountEqual(results['NM_001162427.1:c.580C>T']['alt_genomic_loci'], []) assert results['NM_001162427.1:c.580C>T']['gene_symbol'] == 'TSC1' assert results['NM_001162427.1:c.580C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.(Arg194Ter)', 'slr': 'NP_001155899.1:p.(R194*)'} assert results['NM_001162427.1:c.580C>T']['submitted_variant'] == '9-135796754-G-A' @@ -18216,20 +17294,19 @@ def test_variant313(self): assert 'NM_005247.2:c.616del' in list(results.keys()) assert results['NM_005247.2:c.616del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005247.2:c.616del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_005247.2:c.616del']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003571046.1:g.10392del', 'vcf': {'chr': 'HG536_PATCH', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571046.1:g.10392del', 'vcf': {'chr': 'NW_003571046.1', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}] - assert results['NM_005247.2:c.616del']['transcript_description'] == 'Homo sapiens fibroblast growth factor 3 (FGF3), mRNA' + self.assertCountEqual(results['NM_005247.2:c.616del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003571046.1:g.10392del', 'vcf': {'chr': 'HG536_PATCH', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571046.1:g.10392del', 'vcf': {'chr': 'NW_003571046.1', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}]) assert results['NM_005247.2:c.616del']['gene_symbol'] == 'FGF3' - assert results['NM_005247.2:c.616del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005238.1:p.(Val206SerfsTer117)', 'slr': 'NP_005238.1:p.(V206Sfs*117)'} + assert results['NM_005247.2:c.616del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005238.1(LRG_1303p1):p.(Val206SerfsTer117)', 'slr': 'NP_005238.1:p.(V206Sfs*117)'} assert results['NM_005247.2:c.616del']['submitted_variant'] == 'HG536_PATCH-10391-AC-A' assert results['NM_005247.2:c.616del']['genome_context_intronic_sequence'] == '' - assert results['NM_005247.2:c.616del']['hgvs_lrg_variant'] == '' + assert results['NM_005247.2:c.616del']['hgvs_lrg_variant'] == 'LRG_1303:g.14016del' assert results['NM_005247.2:c.616del']['hgvs_transcript_variant'] == 'NM_005247.2:c.616del' assert results['NM_005247.2:c.616del']['hgvs_refseqgene_variant'] == 'NG_009016.1:g.14016del' assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625177del', 'vcf': {'chr': 'chr11', 'ref': 'AC', 'pos': '69625176', 'alt': 'A'}} assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810409del', 'vcf': {'chr': 'chr11', 'ref': 'AC', 'pos': '69810408', 'alt': 'A'}} assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625177del', 'vcf': {'chr': '11', 'ref': 'AC', 'pos': '69625176', 'alt': 'A'}} assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810409del', 'vcf': {'chr': '11', 'ref': 'AC', 'pos': '69810408', 'alt': 'A'}} - assert results['NM_005247.2:c.616del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009016.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005238.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005247.2'} + assert results['NM_005247.2:c.616del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009016.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005238.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005247.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1303.xml'} def test_variant314(self): @@ -18240,8 +17317,7 @@ def test_variant314(self): assert 'NR_110766.1:n.833+969C>T' in list(results.keys()) assert results['NR_110766.1:n.833+969C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NR_110766.1:n.833+969C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NR_110766.1:n.833+969C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}] - assert results['NR_110766.1:n.833+969C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 3, non-coding RNA' + self.assertCountEqual(results['NR_110766.1:n.833+969C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}]) assert results['NR_110766.1:n.833+969C>T']['gene_symbol'] == 'SHANK2' assert results['NR_110766.1:n.833+969C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_110766.1:n.833+969C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' @@ -18258,8 +17334,7 @@ def test_variant314(self): assert 'NM_012309.4:c.2566C>T' in list(results.keys()) assert results['NM_012309.4:c.2566C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_012309.4:c.2566C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_012309.4:c.2566C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}] - assert results['NM_012309.4:c.2566C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_012309.4:c.2566C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}]) assert results['NM_012309.4:c.2566C>T']['gene_symbol'] == 'SHANK2' assert results['NM_012309.4:c.2566C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.(Leu856=)', 'slr': 'NP_036441.2:p.(L856=)'} assert results['NM_012309.4:c.2566C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' @@ -18276,8 +17351,7 @@ def test_variant314(self): assert 'NM_133266.4:c.802C>T' in list(results.keys()) assert results['NM_133266.4:c.802C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133266.4:c.802C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_133266.4:c.802C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}] - assert results['NM_133266.4:c.802C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_133266.4:c.802C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}]) assert results['NM_133266.4:c.802C>T']['gene_symbol'] == 'SHANK2' assert results['NM_133266.4:c.802C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_573573.2:p.(Leu268=)', 'slr': 'NP_573573.2:p.(L268=)'} assert results['NM_133266.4:c.802C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' @@ -18295,8 +17369,7 @@ def test_variant314(self): assert 'NM_133266.3:c.802C>T' in list(results.keys()) assert results['NM_133266.3:c.802C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_133266.3:c.802C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_133266.3:c.802C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}] - assert results['NM_133266.3:c.802C>T']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_133266.3:c.802C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}]) assert results['NM_133266.3:c.802C>T']['gene_symbol'] == 'SHANK2' assert results['NM_133266.3:c.802C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_573573.2:p.(Leu268=)', 'slr': 'NP_573573.2:p.(L268=)'} assert results['NM_133266.3:c.802C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' @@ -18320,8 +17393,7 @@ def test_variant315(self): assert 'NM_012309.4:c.960C>A' in list(results.keys()) assert results['NM_012309.4:c.960C>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_012309.4:c.960C>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_012309.4:c.960C>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '569441', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '569441', 'alt': 'T'}}}] - assert results['NM_012309.4:c.960C>A']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_012309.4:c.960C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '569441', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '569441', 'alt': 'T'}}}]) assert results['NM_012309.4:c.960C>A']['gene_symbol'] == 'SHANK2' assert results['NM_012309.4:c.960C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.(Tyr320Ter)', 'slr': 'NP_036441.2:p.(Y320*)'} assert results['NM_012309.4:c.960C>A']['submitted_variant'] == 'HG865_PATCH-569441-G-T' @@ -18345,8 +17417,7 @@ def test_variant316(self): assert 'NM_012309.4:c.913-5058G>A' in list(results.keys()) assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_transcript_variant'] == '' assert results['NM_012309.4:c.913-5058G>A']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}] - assert results['NM_012309.4:c.913-5058G>A']['transcript_description'] == 'Homo sapiens SH3 and multiple ankyrin repeat domains 2 (SHANK2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}]) assert results['NM_012309.4:c.913-5058G>A']['gene_symbol'] == 'SHANK2' assert results['NM_012309.4:c.913-5058G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.?', 'slr': 'NP_036441.2:p.?'} assert results['NM_012309.4:c.913-5058G>A']['submitted_variant'] == 'HG865_PATCH-574546-C-T' @@ -18370,8 +17441,7 @@ def test_variant317(self): assert 'NM_020699.2:c.802_803insTT' in list(results.keys()) assert results['NM_020699.2:c.802_803insTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_020699.2:c.802_803insTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_020699.2:c.802_803insTT']['alt_genomic_loci'] == [] - assert results['NM_020699.2:c.802_803insTT']['transcript_description'] == 'Homo sapiens GATA zinc finger domain containing 2B (GATAD2B), mRNA' + self.assertCountEqual(results['NM_020699.2:c.802_803insTT']['alt_genomic_loci'], []) assert results['NM_020699.2:c.802_803insTT']['gene_symbol'] == 'GATAD2B' assert results['NM_020699.2:c.802_803insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Pro268LeufsTer26)', 'slr': 'NP_065750.1:p.(P268Lfs*26)'} assert results['NM_020699.2:c.802_803insTT']['submitted_variant'] == 'HSCHR1_1_CTG31-133178-TAG-T' @@ -18395,8 +17465,7 @@ def test_variant318(self): assert 'NM_021983.4:c.490G>C' in list(results.keys()) assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.490G>C']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}] - assert results['NM_021983.4:c.490G>C']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' + self.assertCountEqual(results['NM_021983.4:c.490G>C']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}]) assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} assert results['NM_021983.4:c.490G>C']['submitted_variant'] == 'HSCHR6_MHC_MANN_CTG1-3848158-T-G' @@ -18420,8 +17489,7 @@ def test_variant319(self): assert 'NM_021983.4:c.346G>T' in list(results.keys()) assert results['NM_021983.4:c.346G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_021983.4:c.346G>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.346G>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'C', 'pos': '3851043', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'C', 'pos': '3851043', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'C', 'pos': '3845423', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'C', 'pos': '3845423', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3887313', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3887313', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3855423', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3855423', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3856125', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3856125', 'alt': 'A'}}}] - assert results['NM_021983.4:c.346G>T']['transcript_description'] == 'Homo sapiens major histocompatibility complex, class II, DR beta 4 (HLA-DRB4), mRNA' + self.assertCountEqual(results['NM_021983.4:c.346G>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'C', 'pos': '3851043', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'C', 'pos': '3851043', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'C', 'pos': '3845423', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'C', 'pos': '3845423', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3887313', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3887313', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3855423', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3855423', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3856125', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3856125', 'alt': 'A'}}}]) assert results['NM_021983.4:c.346G>T']['gene_symbol'] == 'HLA-DRB4' assert results['NM_021983.4:c.346G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Glu116Ter)', 'slr': 'NP_068818.4:p.(E116*)'} assert results['NM_021983.4:c.346G>T']['submitted_variant'] == 'HSCHR6_MHC_MANN_CTG1-3851043-C-A' @@ -18445,8 +17513,7 @@ def test_variant320(self): assert 'NM_001097642.2:c.-16-441C>T' in list(results.keys()) assert results['NM_001097642.2:c.-16-441C>T']['hgvs_lrg_transcript_variant'] == 'LRG_245t1:c.-16-441C>T' assert results['NM_001097642.2:c.-16-441C>T']['refseqgene_context_intronic_sequence'] == 'NG_008357.1(NM_001097642.2):c.-16-441C>T' - assert results['NM_001097642.2:c.-16-441C>T']['alt_genomic_loci'] == [] - assert results['NM_001097642.2:c.-16-441C>T']['transcript_description'] == 'Homo sapiens gap junction protein beta 1 (GJB1), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_001097642.2:c.-16-441C>T']['alt_genomic_loci'], []) assert results['NM_001097642.2:c.-16-441C>T']['gene_symbol'] == 'GJB1' assert results['NM_001097642.2:c.-16-441C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001091111.1:p.?', 'slr': 'NP_001091111.1:p.?'} assert results['NM_001097642.2:c.-16-441C>T']['submitted_variant'] == 'X-70443101-C-T' @@ -18463,8 +17530,7 @@ def test_variant320(self): assert 'NM_000166.5:c.-101C>T' in list(results.keys()) assert results['NM_000166.5:c.-101C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000166.5:c.-101C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000166.5:c.-101C>T']['alt_genomic_loci'] == [] - assert results['NM_000166.5:c.-101C>T']['transcript_description'] == 'Homo sapiens gap junction protein beta 1 (GJB1), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_000166.5:c.-101C>T']['alt_genomic_loci'], []) assert results['NM_000166.5:c.-101C>T']['gene_symbol'] == 'GJB1' assert results['NM_000166.5:c.-101C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000157.1:p.?', 'slr': 'NP_000157.1:p.?'} assert results['NM_000166.5:c.-101C>T']['submitted_variant'] == 'X-70443101-C-T' @@ -18487,8 +17553,7 @@ def test_variant321(self): assert 'NM_033380.2:c.2130_2135del' in list(results.keys()) assert results['NM_033380.2:c.2130_2135del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_033380.2:c.2130_2135del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_033380.2:c.2130_2135del']['alt_genomic_loci'] == [] - assert results['NM_033380.2:c.2130_2135del']['transcript_description'] == 'Homo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_033380.2:c.2130_2135del']['alt_genomic_loci'], []) assert results['NM_033380.2:c.2130_2135del']['gene_symbol'] == 'COL4A5' assert results['NM_033380.2:c.2130_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_203699.1(LRG_232p2):p.(Pro711_Pro712del)', 'slr': 'NP_203699.1:p.(P711_P712del)'} assert results['NM_033380.2:c.2130_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' @@ -18505,8 +17570,7 @@ def test_variant321(self): assert 'NM_000495.4:c.2130_2135del' in list(results.keys()) assert results['NM_000495.4:c.2130_2135del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000495.4:c.2130_2135del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000495.4:c.2130_2135del']['alt_genomic_loci'] == [] - assert results['NM_000495.4:c.2130_2135del']['transcript_description'] == 'Homo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000495.4:c.2130_2135del']['alt_genomic_loci'], []) assert results['NM_000495.4:c.2130_2135del']['gene_symbol'] == 'COL4A5' assert results['NM_000495.4:c.2130_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000486.1(LRG_232p1):p.(Pro711_Pro712del)', 'slr': 'NP_000486.1:p.(P711_P712del)'} assert results['NM_000495.4:c.2130_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' @@ -18524,8 +17588,7 @@ def test_variant321(self): assert 'NM_000495.4:c.2133_2135del' in list(results.keys()) assert results['NM_000495.4:c.2133_2135del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000495.4:c.2133_2135del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000495.4:c.2133_2135del']['alt_genomic_loci'] == [] - assert results['NM_000495.4:c.2133_2135del']['transcript_description'] == 'Homo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000495.4:c.2133_2135del']['alt_genomic_loci'], []) assert results['NM_000495.4:c.2133_2135del']['gene_symbol'] == 'COL4A5' assert results['NM_000495.4:c.2133_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000486.1(LRG_232p1):p.(Pro712del)', 'slr': 'NP_000486.1:p.(P712del)'} assert results['NM_000495.4:c.2133_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' @@ -18542,8 +17605,7 @@ def test_variant321(self): assert 'NM_033380.2:c.2133_2135del' in list(results.keys()) assert results['NM_033380.2:c.2133_2135del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_033380.2:c.2133_2135del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_033380.2:c.2133_2135del']['alt_genomic_loci'] == [] - assert results['NM_033380.2:c.2133_2135del']['transcript_description'] == 'Homo sapiens collagen type IV alpha 5 chain (COL4A5), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_033380.2:c.2133_2135del']['alt_genomic_loci'], []) assert results['NM_033380.2:c.2133_2135del']['gene_symbol'] == 'COL4A5' assert results['NM_033380.2:c.2133_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_203699.1(LRG_232p2):p.(Pro712del)', 'slr': 'NP_203699.1:p.(P712del)'} assert results['NM_033380.2:c.2133_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' @@ -18566,8 +17628,7 @@ def test_variant322(self): assert 'NM_004992.3:c.502C>T' in list(results.keys()) assert results['NM_004992.3:c.502C>T']['hgvs_lrg_transcript_variant'] == 'LRG_764t2:c.502C>T' assert results['NM_004992.3:c.502C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_004992.3:c.502C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}] - assert results['NM_004992.3:c.502C>T']['transcript_description'] == 'Homo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_004992.3:c.502C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}]) assert results['NM_004992.3:c.502C>T']['gene_symbol'] == 'MECP2' assert results['NM_004992.3:c.502C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004983.1(LRG_764p2):p.(Arg168Ter)', 'slr': 'NP_004983.1:p.(R168*)'} assert results['NM_004992.3:c.502C>T']['submitted_variant'] == 'X-153296777-G-A' @@ -18585,8 +17646,7 @@ def test_variant322(self): assert 'NM_001316337.1:c.223C>T' in list(results.keys()) assert results['NM_001316337.1:c.223C>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001316337.1:c.223C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001316337.1:c.223C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}] - assert results['NM_001316337.1:c.223C>T']['transcript_description'] == 'Homo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001316337.1:c.223C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}]) assert results['NM_001316337.1:c.223C>T']['gene_symbol'] == 'MECP2' assert results['NM_001316337.1:c.223C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001303266.1:p.(Arg75Ter)', 'slr': 'NP_001303266.1:p.(R75*)'} assert results['NM_001316337.1:c.223C>T']['submitted_variant'] == 'X-153296777-G-A' @@ -18603,8 +17663,7 @@ def test_variant322(self): assert 'NM_001110792.1:c.538C>T' in list(results.keys()) assert results['NM_001110792.1:c.538C>T']['hgvs_lrg_transcript_variant'] == 'LRG_764t1:c.538C>T' assert results['NM_001110792.1:c.538C>T']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001110792.1:c.538C>T']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}] - assert results['NM_001110792.1:c.538C>T']['transcript_description'] == 'Homo sapiens methyl-CpG binding protein 2 (MECP2), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001110792.1:c.538C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}]) assert results['NM_001110792.1:c.538C>T']['gene_symbol'] == 'MECP2' assert results['NM_001110792.1:c.538C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001104262.1:p.(Arg180Ter)', 'slr': 'NP_001104262.1:p.(R180*)'} assert results['NM_001110792.1:c.538C>T']['submitted_variant'] == 'X-153296777-G-A' @@ -18628,8 +17687,7 @@ def test_variant323(self): assert 'NM_198180.2:c.408_410del' in list(results.keys()) assert results['NM_198180.2:c.408_410del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_198180.2:c.408_410del']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_198180.2:c.408_410del']['alt_genomic_loci'] == [] - assert results['NM_198180.2:c.408_410del']['transcript_description'] == 'Homo sapiens pyroglutamylated RFamide peptide (QRFP), mRNA' + self.assertCountEqual(results['NM_198180.2:c.408_410del']['alt_genomic_loci'], []) assert results['NM_198180.2:c.408_410del']['gene_symbol'] == 'QRFP' assert results['NM_198180.2:c.408_410del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_937823.1:p.?', 'slr': 'NP_937823.1:p.?'} assert results['NM_198180.2:c.408_410del']['submitted_variant'] == 'NM_198180.2:c.408_410delGTG' @@ -18652,8 +17710,7 @@ def test_variant324(self): assert 'NM_080877.2:c.1733_1735delinsTTT' in list(results.keys()) assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080877.2:c.1733_1735delinsTTT']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_080877.2:c.1733_1735delinsTTT']['alt_genomic_loci'] == [] - assert results['NM_080877.2:c.1733_1735delinsTTT']['transcript_description'] == 'Homo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_080877.2:c.1733_1735delinsTTT']['alt_genomic_loci'], []) assert results['NM_080877.2:c.1733_1735delinsTTT']['gene_symbol'] == 'SLC34A3' assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Pro578_Lys579delinsLeuTer)', 'slr': 'NP_543153.1:p.(P578_K579delinsL*)'} assert results['NM_080877.2:c.1733_1735delinsTTT']['submitted_variant'] == 'NM_080877.2:c.1733_1735delinsTTT' @@ -18677,8 +17734,7 @@ def test_variant325(self): assert 'NM_080877.2:c.1735_1737delinsTGA' in list(results.keys()) assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080877.2:c.1735_1737delinsTGA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_080877.2:c.1735_1737delinsTGA']['alt_genomic_loci'] == [] - assert results['NM_080877.2:c.1735_1737delinsTGA']['transcript_description'] == 'Homo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_080877.2:c.1735_1737delinsTGA']['alt_genomic_loci'], []) assert results['NM_080877.2:c.1735_1737delinsTGA']['gene_symbol'] == 'SLC34A3' assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579Ter)', 'slr': 'NP_543153.1:p.(K579*)'} assert results['NM_080877.2:c.1735_1737delinsTGA']['submitted_variant'] == 'NM_080877.2:c.1735_1737delinsTGA' @@ -18703,8 +17759,7 @@ def test_variant326(self): assert 'NM_080877.2:c.1735_1737delinsTAATTGTTC' in list(results.keys()) assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['alt_genomic_loci'] == [] - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['transcript_description'] == 'Homo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['alt_genomic_loci'], []) assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['gene_symbol'] == 'SLC34A3' assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579Ter)', 'slr': 'NP_543153.1:p.(K579*)'} assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['submitted_variant'] == 'NM_080877.2:c.1735_1737delinsTAATTGTTC' @@ -18728,8 +17783,7 @@ def test_variant327(self): assert 'NM_080877.2:c.1737delinsATTGTTC' in list(results.keys()) assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_lrg_transcript_variant'] == '' assert results['NM_080877.2:c.1737delinsATTGTTC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_080877.2:c.1737delinsATTGTTC']['alt_genomic_loci'] == [] - assert results['NM_080877.2:c.1737delinsATTGTTC']['transcript_description'] == 'Homo sapiens solute carrier family 34 member 3 (SLC34A3), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_080877.2:c.1737delinsATTGTTC']['alt_genomic_loci'], []) assert results['NM_080877.2:c.1737delinsATTGTTC']['gene_symbol'] == 'SLC34A3' assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579_Ala580insLeuPhe)', 'slr': 'NP_543153.1:p.(K579_A580insLF)'} assert results['NM_080877.2:c.1737delinsATTGTTC']['submitted_variant'] == 'NM_080877.2:c.1737delinsATTGTTC' @@ -18753,8 +17807,7 @@ def test_variant328(self): assert 'NM_000088.3:c.4392_*2delinsAGAG' in list(results.keys()) assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4392_*2delinsAGAG' assert results['NM_000088.3:c.4392_*2delinsAGAG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.4392_*2delinsAGAG']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.4392_*2delinsAGAG']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.4392_*2delinsAGAG']['alt_genomic_loci'], []) assert results['NM_000088.3:c.4392_*2delinsAGAG']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ter1465GluextTer84)', 'slr': 'NP_000079.2:p.(*1465Eext*84)'} assert results['NM_000088.3:c.4392_*2delinsAGAG']['submitted_variant'] == 'NM_000088.3:c.4392_*2delinsAGAG' @@ -18778,8 +17831,7 @@ def test_variant329(self): assert 'NM_000088.3:c.589_591delinsAGAAGC' in list(results.keys()) assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589_591delinsAGAAGC' assert results['NM_000088.3:c.589_591delinsAGAAGC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589_591delinsAGAAGC']['alt_genomic_loci'] == [] - assert results['NM_000088.3:c.589_591delinsAGAAGC']['transcript_description'] == 'Homo sapiens collagen type I alpha 1 chain (COL1A1), mRNA' + self.assertCountEqual(results['NM_000088.3:c.589_591delinsAGAAGC']['alt_genomic_loci'], []) assert results['NM_000088.3:c.589_591delinsAGAAGC']['gene_symbol'] == 'COL1A1' assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197delinsArgSer)', 'slr': 'NP_000079.2:p.(G197delinsRS)'} assert results['NM_000088.3:c.589_591delinsAGAAGC']['submitted_variant'] == 'NM_000088.3:c.589_591delinsAGAAGC' @@ -18802,8 +17854,7 @@ def test_variant330(self): assert 'NM_000885.5:c.*2536delinsAGAAAAATCA' in list(results.keys()) assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_lrg_transcript_variant'] == '' assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['alt_genomic_loci'] == [] - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['transcript_description'] == 'Homo sapiens integrin subunit alpha 4 (ITGA4), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000885.5:c.*2536delinsAGAAAAATCA']['alt_genomic_loci'], []) assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['gene_symbol'] == 'ITGA4' assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000876.3:p.?', 'slr': 'NP_000876.3:p.?'} assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['submitted_variant'] == 'NM_000885.5:c.*2536delinsAGAAAAATCA' @@ -18827,8 +17878,7 @@ def test_variant331(self): assert 'NM_002693.2:c.-186_-185delinsCC' in list(results.keys()) assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_lrg_transcript_variant'] == 'LRG_765t1:c.-186_-185delinsCC' assert results['NM_002693.2:c.-186_-185delinsCC']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_002693.2:c.-186_-185delinsCC']['alt_genomic_loci'] == [] - assert results['NM_002693.2:c.-186_-185delinsCC']['transcript_description'] == 'Homo sapiens DNA polymerase gamma, catalytic subunit (POLG), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_002693.2:c.-186_-185delinsCC']['alt_genomic_loci'], []) assert results['NM_002693.2:c.-186_-185delinsCC']['gene_symbol'] == 'POLG' assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002684.1(LRG_765p1):p.?', 'slr': 'NP_002684.1:p.?'} assert results['NM_002693.2:c.-186_-185delinsCC']['submitted_variant'] == 'NM_002693.2:c.-186_-185delinsCC' @@ -18853,8 +17903,7 @@ def test_variant332(self): assert 'NM_001287344.1:c.690_690+1insCTACATAG' in list(results.keys()) assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001287344.1:c.690_690+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}] - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 3, mRNA' + self.assertCountEqual(results['NM_001287344.1:c.690_690+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) assert results['NM_001287344.1:c.690_690+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274273.1:p.?', 'slr': 'NP_001274273.1:p.?'} assert results['NM_001287344.1:c.690_690+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' @@ -18871,8 +17920,7 @@ def test_variant332(self): assert 'NM_001287345.1:c.588_588+1insCTACATAG' in list(results.keys()) assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}] - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 2, mRNA' + self.assertCountEqual(results['NM_001287345.1:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) assert results['NM_001287345.1:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274274.1:p.?', 'slr': 'NP_001274274.1:p.?'} assert results['NM_001287345.1:c.588_588+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' @@ -18889,8 +17937,7 @@ def test_variant332(self): assert 'NM_000061.2:c.588_588+1insCTACATAG' in list(results.keys()) assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}] - assert results['NM_000061.2:c.588_588+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} assert results['NM_000061.2:c.588_588+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' @@ -18914,8 +17961,7 @@ def test_variant333(self): assert 'NM_000061.2:c.588_588+1insCTACATAG' in list(results.keys()) assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'] == [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}] - assert results['NM_000061.2:c.588_588+1insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} assert results['NM_000061.2:c.588_588+1insCTACATAG']['submitted_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' @@ -18939,8 +17985,7 @@ def test_variant334(self): assert 'NM_000061.2:c.588_589insCTACATAG' in list(results.keys()) assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_589insCTACATAG' assert results['NM_000061.2:c.588_589insCTACATAG']['refseqgene_context_intronic_sequence'] == '' - assert results['NM_000061.2:c.588_589insCTACATAG']['alt_genomic_loci'] == [] - assert results['NM_000061.2:c.588_589insCTACATAG']['transcript_description'] == 'Homo sapiens Bruton tyrosine kinase (BTK), transcript variant 1, mRNA' + self.assertCountEqual(results['NM_000061.2:c.588_589insCTACATAG']['alt_genomic_loci'], []) assert results['NM_000061.2:c.588_589insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.(Ile197LeufsTer5)', 'slr': 'NP_000052.1:p.(I197Lfs*5)'} assert results['NM_000061.2:c.588_589insCTACATAG']['submitted_variant'] == 'NM_000061.2:c.588_589insCTACATAG' From 288046d9f469f7fec4fd00b7eecf39f0c1da6f12 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 25 Feb 2019 14:15:43 +0000 Subject: [PATCH 035/223] Created environment.yml file and set requirements file to point at hgvs fork --- REQUIREMENTS.txt | 9 ++++----- environment.yml | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 environment.yml diff --git a/REQUIREMENTS.txt b/REQUIREMENTS.txt index 40b3dad3..71e33cb6 100644 --- a/REQUIREMENTS.txt +++ b/REQUIREMENTS.txt @@ -1,6 +1,5 @@ -hgvs==1.1.3 -biocommons.seqrepo>=0.3.5, -httplib2>=0.9.0, -configparser>=3.5.0, -pyliftover>=0.3, +git+https://github.com/openvar/vv_hgvs@master#egg=hgvs +httplib2>=0.9.0 +configparser>=3.5.0 +pyliftover>=0.3 biotools>=0.3.0 diff --git a/environment.yml b/environment.yml new file mode 100644 index 00000000..479c89d9 --- /dev/null +++ b/environment.yml @@ -0,0 +1,15 @@ +name: vvenv +channels: + - conda-forge + - bioconda +dependencies: + - protobuf=3.5.1 + - mysql-connector-python + - pysam + - pyliftover>=0.3 + - numpy + - httplib2>=0.9.0 + - configparser>=3.5.0 + - pip: + - git+https://github.com/openvar/vv_hgvs@master#egg=hgvs + - biotools>=0.3.0 \ No newline at end of file From cc1829cb70fed9ffb5eb1dd98f37b700a603864d Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 5 Mar 2019 10:17:54 +0000 Subject: [PATCH 036/223] Added new configuration steps and updated the default config file --- VariantValidator/__init__.py | 2 + VariantValidator/modules/vvMixinInit.py | 59 +++++++------------ .../default.ini | 18 +++--- 3 files changed, 32 insertions(+), 47 deletions(-) rename VariantValidator/modules/defaultConfig.ini => configuration/default.ini (86%) diff --git a/VariantValidator/__init__.py b/VariantValidator/__init__.py index c2536527..16b70fa6 100644 --- a/VariantValidator/__init__.py +++ b/VariantValidator/__init__.py @@ -1,3 +1,5 @@ +from . import configure + from .variantValidator import * __all__=["Validator","Validation"] diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 183d010c..bbdbc662 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -21,6 +21,7 @@ from .vvDatabase import vvDatabase from .vvLogging import logger from . import vvFunctions as fn +from VariantValidator.configure import CONFIG_DIR @@ -57,22 +58,11 @@ def __init__(self): 'uta_schema': str(hdp.data_version()), #self.uta_schema 'seqrepo_db': HGVS_SEQREPO_DIR.split('/')[-1] #self.seqrepoVersion ''' - # First load from the configuration file, if it exists. - configName="config.ini" - homePath=os.path.expanduser("~") - configPath=os.path.join(homePath,".config","VariantValidator") - if not os.path.isdir(configPath): - os.makedirs(configPath) - # Now configpath points to the config file itself. - configPath=os.path.join(configPath,configName) - # Does the file exist? - if not os.path.exists(configPath): - self.createConfig(configPath) # Load the configuration file. - config=RawConfigParser(allow_no_value=True) - with open(configPath) as file: - config.read_file(file) + config = ConfigParser() + config.read(CONFIG_DIR) + # The custom vvLogging module will set itself up using the VALDIATOR_DEBUG environment variable. levelString = config["logging"]['level'] consoleString = config["logging"]['console'] @@ -88,15 +78,20 @@ def __init__(self): os.environ["VALIDATOR_DEBUG"] = logString # Handle databases - self.entrezID=config["EntrezID"]["entrezID"] - if config["seqrepo"]["location"]!=None: - self.seqrepoVersion=config["seqrepo"]["version"] - self.seqrepoPath=config["seqrepo"]["location"]+self.seqrepoVersion - os.environ['HGVS_SEQREPO_DIR']=self.seqrepoPath - else: - raise ValueError("The seqrepo location has not been set in ~/.config/VariantValidator/config.ini") - os.environ['UTA_DB_URL']=config["uta"]["location"]+config["uta"]["version"] - self.utaPath=config["uta"]["location"]+config["uta"]["version"] + self.entrezID = config["EntrezID"]["entrezID"] + self.seqrepoVersion = config["seqrepo"]["version"] + self.seqrepoPath = os.path.join(config["seqrepo"]["location"], self.seqrepoVersion) + os.environ['HGVS_SEQREPO_DIR'] = self.seqrepoPath + + os.environ['UTA_DB_URL']= "postgresql://%s:%s@%s/%s/%s" % ( + config["postgres"]["user"], + config["postgres"]["password"], + config['postgres']['host'], + config['postgres']['database'], + config['postgres']['version'] + ) + self.utaPath = os.environ.get('UTA_DB_URL') + self.dbConfig = { 'user': config["mysql"]["user"], 'password':config["mysql"]["password"], @@ -118,7 +113,7 @@ def __init__(self): # Set up other configuration variables self.liftoverPath=config["liftover"]["location"] - if not self.liftoverPath==None: + if not self.liftoverPath == 'PATH/TO/LIFTOVER': os.environ['PYLIFTOVER_DIR']=self.liftoverPath self.entrezID=config["EntrezID"]['entrezid'] @@ -190,21 +185,7 @@ def myConfig(self): 'uta_schema': self.utaSchema, 'seqrepo_db': self.seqrepoPath } - def createConfig(self,outPath): - ''' - # This function reads from the default configuration file stored in the same folder as this module, - # and transfers it to outPath. - # Outpath should include a filename. - ''' - lines=[] - inPath=os.path.join(os.path.dirname(os.path.realpath(__file__)),"defaultConfig.ini") -# print(os.path.join(inPath,"defaultConfig.ini")) - with open(inPath) as file: - for l in file: - lines.append(l) - with open(outPath, "w") as file: - for l in lines: - file.write(l) + def protein(self,variant, evm, hpUnused): # Set regular expressions for if statements pat_c = re.compile("\:c\.") # Pattern looks for :c. Note (gene) has been removed diff --git a/VariantValidator/modules/defaultConfig.ini b/configuration/default.ini similarity index 86% rename from VariantValidator/modules/defaultConfig.ini rename to configuration/default.ini index c8ae0d47..485e9acb 100644 --- a/VariantValidator/modules/defaultConfig.ini +++ b/configuration/default.ini @@ -1,20 +1,22 @@ [variantValidator] version = 0.9 -release_date = tbc [mysql] -host = 127.0.0.1 +host = localhost database = validator -user = vvadmin -password = var1ant +user = USERNAME +password = PASSWORD [seqrepo] version = 2018-08-21 -location +location = /PATH/TO/SEQREPO -[uta] +[postgres] +host = localhost +database = uta version = uta_20180821 -location = postgresql://uta_admin:uta_admin@127.0.0.1/uta/ +user = USERNAME +password = PASSWORD [logging] #Levels control verbosity and can be set to "critical" "error" "warning" "info" or "debug". @@ -30,7 +32,7 @@ trace = false entrezid = admin@variantvalidator.org [liftover] -location +location = /PATH/TO/LIFTOVER # From 8a16da927525fbec9baee36998925de6470ad489 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 5 Mar 2019 10:19:12 +0000 Subject: [PATCH 037/223] Added configuration file --- VariantValidator/configure.py | 45 +++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 VariantValidator/configure.py diff --git a/VariantValidator/configure.py b/VariantValidator/configure.py new file mode 100644 index 00000000..65c99f4e --- /dev/null +++ b/VariantValidator/configure.py @@ -0,0 +1,45 @@ +import os +import shutil +import configparser + +CONFIG_DIR = os.path.join(os.path.expanduser('~'), '.variantvalidator') + + +def read_configuration(): + print("Going to read configuration here") + config = configparser.ConfigParser() + config.read(CONFIG_DIR) + + if config['mysql']['user'] == 'USERNAME' or config['mysql']['password'] == 'PASSWORD': + print("MySQL username and password have not been updated from default.") + exit_with_message() + + if config['postgres']['user'] == 'USERNAME' or config['postgres']['password'] == 'PASSWORD': + print("PostgreSQL username and password have not been updated from default.") + exit_with_message() + + if config['seqrepo']['location'] == 'PATH/TO/SEQREPO': + print("Seqrepo directory location has not been updated from default.") + exit_with_message() + + +def exit_with_message(): + print("Please edit your configuration file %s" % CONFIG_DIR) + print() + raise SystemExit + + +if os.path.exists(CONFIG_DIR): + print("Configuration already set up for this user") + read_configuration() +else: + print("*-----------------------------*") + print("| Welcome to VariantValidator |") + print("*-----------------------------*") + shutil.copyfile(os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'configuration', 'default.ini'), CONFIG_DIR) + print("A configuration file has been copied into your home directory (%s)." % CONFIG_DIR) + print("Please edit this file with your database connection settings prior to continuing.") + print("Items that must be changed are highlighted in capitals.") + print() + raise SystemExit + From c062aef9be9051580bc3d3f033ec44443228e269 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 13 Mar 2019 09:10:27 +0000 Subject: [PATCH 038/223] Removed print statements from configure --- VariantValidator/configure.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/VariantValidator/configure.py b/VariantValidator/configure.py index 65c99f4e..717de82b 100644 --- a/VariantValidator/configure.py +++ b/VariantValidator/configure.py @@ -6,7 +6,6 @@ def read_configuration(): - print("Going to read configuration here") config = configparser.ConfigParser() config.read(CONFIG_DIR) @@ -30,7 +29,6 @@ def exit_with_message(): if os.path.exists(CONFIG_DIR): - print("Configuration already set up for this user") read_configuration() else: print("*-----------------------------*") From 1aec8bf0beb7bcccd568950c2b9407ffc5b47b87 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 13 Mar 2019 09:11:27 +0000 Subject: [PATCH 039/223] Tidyied up the configuration reading in the MixinInit --- VariantValidator/modules/vvMixinInit.py | 128 ++++++++++++------------ 1 file changed, 66 insertions(+), 62 deletions(-) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index bbdbc662..f953b91f 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -1,5 +1,5 @@ import os -from configparser import ConfigParser,RawConfigParser +from configparser import ConfigParser import hgvs import hgvs.parser import hgvs.dataproviders.uta @@ -17,16 +17,14 @@ import re import copy -#import io from .vvDatabase import vvDatabase from .vvLogging import logger from . import vvFunctions as fn from VariantValidator.configure import CONFIG_DIR - -class Mixin(): - ''' +class Mixin: + """ # This object contains configuration options for the validator, but it inherits the mixin # class in vvCore that contains the enormous validator function. @@ -44,7 +42,7 @@ class Mixin(): val=Validator() val.validate("some kind of gene situation","The genome version","the transcripts to use") - ''' + """ def __init__(self): ''' Renaming of variables : @@ -66,14 +64,14 @@ def __init__(self): # The custom vvLogging module will set itself up using the VALDIATOR_DEBUG environment variable. levelString = config["logging"]['level'] consoleString = config["logging"]['console'] - if consoleString.lower()=="true": - consoleString="console" + if consoleString.lower() == "true": + consoleString = "console" fileString = config["logging"]['file'] - if fileString.lower()=="true": - fileString="file" + if fileString.lower() == "true": + fileString = "file" traceString = config["logging"]['trace'] - if traceString.lower()=="true": - traceString="trace" + if traceString.lower() == "true": + traceString = "trace" logString = levelString+" "+consoleString+" "+fileString+" "+traceString os.environ["VALIDATOR_DEBUG"] = logString @@ -83,7 +81,7 @@ def __init__(self): self.seqrepoPath = os.path.join(config["seqrepo"]["location"], self.seqrepoVersion) os.environ['HGVS_SEQREPO_DIR'] = self.seqrepoPath - os.environ['UTA_DB_URL']= "postgresql://%s:%s@%s/%s/%s" % ( + os.environ['UTA_DB_URL'] = "postgresql://%s:%s@%s/%s/%s" % ( config["postgres"]["user"], config["postgres"]["password"], config['postgres']['host'], @@ -93,92 +91,97 @@ def __init__(self): self.utaPath = os.environ.get('UTA_DB_URL') self.dbConfig = { - 'user': config["mysql"]["user"], - 'password':config["mysql"]["password"], - 'host': config["mysql"]["host"], - 'database':config["mysql"]["database"], + 'user': config["mysql"]["user"], + 'password': config["mysql"]["password"], + 'host': config["mysql"]["host"], + 'database': config["mysql"]["database"], 'raise_on_warnings': True } #Create database access objects - self.db=vvDatabase(self,self.dbConfig) + self.db = vvDatabase(self, self.dbConfig) # Set up versions __version__ = config["variantValidator"]['version'] - self.version=__version__ - if re.match('^\d+\.\d+\.\d+$', __version__) is not None: - self.releasedVersion=True + self.version = __version__ + if re.match(r'^\d+\.\d+\.\d+$', __version__) is not None: + self.releasedVersion = True _is_released_version = True else: - self.releasedVersion=False - self.hgvsVersion=hgvs.__version__ + self.releasedVersion = False + self.hgvsVersion = hgvs.__version__ # Set up other configuration variables - self.liftoverPath=config["liftover"]["location"] + self.liftoverPath = config["liftover"]["location"] if not self.liftoverPath == 'PATH/TO/LIFTOVER': - os.environ['PYLIFTOVER_DIR']=self.liftoverPath - self.entrezID=config["EntrezID"]['entrezid'] + os.environ['PYLIFTOVER_DIR'] = self.liftoverPath + self.entrezID = config["EntrezID"]['entrezid'] # Set up HGVS # Configure hgvs package global settings hgvs.global_config.uta.pool_max = 25 hgvs.global_config.formatting.max_ref_length = 1000000 + # Create HGVS objects self.hdp = hgvs.dataproviders.uta.connect(pooling=True) - self.hp = hgvs.parser.Parser() #Parser - self.vr = hgvs.validator.Validator(self.hdp) # Validator - self.vm = hgvs.variantmapper.VariantMapper(self.hdp) # Variant mapper + self.hp = hgvs.parser.Parser() # Parser + self.vr = hgvs.validator.Validator(self.hdp) # Validator + self.vm = hgvs.variantmapper.VariantMapper(self.hdp) # Variant mapper + # Create a lose vm instance self.lose_vm = hgvs.variantmapper.VariantMapper(self.hdp, - replace_reference=True, - prevalidation_level=None - ) - self.nr_vm = hgvs.variantmapper.VariantMapper(self.hdp, replace_reference=False) #No reverse variant mapper - self.sf = hgvs.dataproviders.seqfetcher.SeqFetcher() # Seqfetcher + replace_reference=True, + prevalidation_level=None + ) + + self.nr_vm = hgvs.variantmapper.VariantMapper(self.hdp, replace_reference=False) # No reverse variant mapper + self.sf = hgvs.dataproviders.seqfetcher.SeqFetcher() # Seqfetcher + # Set standard genome builds self.genome_builds = ['GRCh37', 'hg19', 'GRCh38'] self.utaSchema = str(self.hdp.data_version()) # Create normalizer self.reverse_hn = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=5, - alt_aln_method='splign' - ) + cross_boundaries=False, + shuffle_direction=5, + alt_aln_method='splign' + ) # Create normalizer self.merge_normalizer = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, - alt_aln_method='splign', - validate=False - ) + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='splign', + validate=False + ) self.reverse_merge_normalizer = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, - alt_aln_method='splign', - validate=False - ) + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='splign', + validate=False + ) #create no_norm_evm self.no_norm_evm_38 = hgvs.assemblymapper.AssemblyMapper(self.hdp, - assembly_name='GRCh38', - alt_aln_method='splign', - normalize=False, - replace_reference=True - ) + assembly_name='GRCh38', + alt_aln_method='splign', + normalize=False, + replace_reference=True + ) self.no_norm_evm_37 = hgvs.assemblymapper.AssemblyMapper(self.hdp, - assembly_name='GRCh37', - alt_aln_method='splign', - normalize=False, - replace_reference=True - ) + assembly_name='GRCh37', + alt_aln_method='splign', + normalize=False, + replace_reference=True + ) def __del__(self): del self.db + def myConfig(self): - ''' - #Returns configuration: - #version, hgvs version, uta schema, seqrepo db. - ''' + """ + Returns configuration: + version, hgvs version, uta schema, seqrepo db. + """ return { 'variantvalidator_version': self.version, 'variantvalidator_hgvs_version': self.hgvsVersion, @@ -214,6 +217,7 @@ def protein(self,variant, evm, hpUnused): var_p.ac = 'Non-coding transcript' var_p.posedit = '' return var_p + def myc_to_p(self,hgvs_transcript, evm, re_to_p): # Create dictionary to store the information hgvs_transcript_to_hgvs_protein = {'error': '', 'hgvs_protein': '', 'ref_residues': ''} From 1c090ebdf3ecb6985aed28b5c95c83804cd3e2c9 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 13 Mar 2019 10:35:20 +0000 Subject: [PATCH 040/223] Created executable to run update and python3 version of update functions - now in one file --- VariantValidator/update_vv_db.py | 376 +++++++++++++++++++++++++++++++ bin/update_db.py | 7 + 2 files changed, 383 insertions(+) create mode 100644 VariantValidator/update_vv_db.py create mode 100644 bin/update_db.py diff --git a/VariantValidator/update_vv_db.py b/VariantValidator/update_vv_db.py new file mode 100644 index 00000000..c4cf0ba8 --- /dev/null +++ b/VariantValidator/update_vv_db.py @@ -0,0 +1,376 @@ +# -*- coding: utf-8 -*- + +import re +import os +import urllib.request, urllib.error, urllib.parse +import copy +# import variantanalyser +# import variantanalyser.dbControls +# import variantanalyser.dbControls.data as db_data +from .modules import vvDatabase +from . import variantValidator +from configparser import ConfigParser +from . import configure + + +def update(): + + config = ConfigParser() + config.read(configure.CONFIG_DIR) + + dbConfig = { + 'user': config["mysql"]["user"], + 'password': config["mysql"]["password"], + 'host': config["mysql"]["host"], + 'database': config["mysql"]["database"], + 'raise_on_warnings': True + } + # Create database access objects + db = vvDatabase.vvDatabase(variantValidator.Validator(), dbConfig) + + print(dbConfig) + print(db) + + update_refseq(db) + update_lrg(db) + + +def update_refseq(dbcnx): + print('Updating RefSeqGene no Missmatch MySQL data') + # Set os path + # Set up os paths data and log folders + ROOT = os.path.dirname(os.path.abspath(__file__)) + + # Download data from RefSeqGene + # Download data + rsg = urllib.request.Request('http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/gene_RefSeqGene') + response = urllib.request.urlopen(rsg) + rsg_file = response.read() + rsg_data_line = rsg_file.split('\n') + rsg_data = [] + for data in rsg_data_line: + rsg_data.append(data) + + # Download data + grch37 = urllib.request.Request( + 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.25_refseqgene_alignments.gff3') + response = urllib.request.urlopen(grch37) + grch37_file = response.read() + grch37_data_line = grch37_file.split('\n') + grch37_align_data = [] + for data in grch37_data_line: + grch37_align_data.append(data) + + # Download data + grch38 = urllib.request.Request( + 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.28_refseqgene_alignments.gff3') + response = urllib.request.urlopen(grch38) + grch38_file = response.read() + grch38_data_line = grch38_file.split('\n') + grch38_align_data = [] + for data in grch38_data_line: + grch38_align_data.append(data) + + # Open Lists + # rsg_data = open(os.path.join(ROOT, 'gene_RefSeqGene'), 'r') + rsg_id_info = [] + # grch37_align_data = open(os.path.join(ROOT, 'GCF_000001405.25_refseqgene_alignments.gff3'), 'r') + grch37_align = [] + # grch38_align_data = open(os.path.join(ROOT, 'GCF_000001405.28_refseqgene_alignments.gff3'), 'r') + grch38_align = [] + + # Place the required data from each file into a dictionary + hash = re.compile('#') + for line in rsg_data: + if hash.search(line): + pass + else: + line = line.strip() + info = line.split() + if len(info) == 0: + pass + else: + dict = {'symbol': info[2], 'rsg_id': info[3], 'gene_id': info[1]} + rsg_id_info.append(dict) + + # Create dictionary to store RefSeqGene and gene symbol data NOTE RefSeqGene ID stored without version number! + rsg_to_symbol = {} + # Collect the data + for ent in rsg_id_info: + rsg_id = copy.deepcopy(ent['rsg_id']) + rsg_id = rsg_id.split('.')[0] + rsg_to_symbol[rsg_id] = {'symbol': ent['symbol'], 'gene_id': ent['gene_id']} + + # Count total number of NG to NC mappings + total_rsg_to_nc = 0 + total_rsg_to_nc_rejected = 0 + for line in grch37_align_data: + # Count NG_ to NC_ and remove the entries we don't care about! + if re.search('NC_', line) and re.search('NG_', line): + total_rsg_to_nc = total_rsg_to_nc + 1 + else: + continue + if hash.search(line): + pass + elif not re.search('gap_count=0', line): + if re.search('NC_', line) and re.search('NG_', line): + total_rsg_to_nc_rejected = total_rsg_to_nc_rejected + 1 + # print line + pass + else: + line = line.strip() + info = line.split('\t') + if len(info) != 9: + pass + else: + metrics = info[8].split(';') + id_ori = metrics[1].replace('Target=', '') + id_ori_list = id_ori.split() + dict = {'rsg_id': id_ori_list[0], 'chr_id': info[0], 'rsg_start': info[3], 'rsg_end': info[4], + 'ori': id_ori_list[3]} + grch37_align.append(dict) + + for line in grch38_align_data: + if re.search('NC_', line) and re.search('NG_', line): + total_rsg_to_nc = total_rsg_to_nc + 1 + else: + continue + if hash.search(line): + pass + elif not re.search('gap_count=0', line): + if re.search('NC_', line) and re.search('NG_', line): + total_rsg_to_nc_rejected = total_rsg_to_nc_rejected + 1 + # print line + pass + else: + line = line.strip() + info = line.split('\t') + if len(info) != 9: + pass + else: + metrics = info[8].split(';') + id_ori = metrics[1].replace('Target=', '') + id_ori_list = id_ori.split() + dict = {'rsg_id': id_ori_list[0], 'chr_id': info[0], 'rsg_start': info[3], 'rsg_end': info[4], + 'ori': id_ori_list[3]} + grch38_align.append(dict) + + # Create a data array containing the database + db = [] + # map line + for line in grch37_align: + ml = [] + link = line['rsg_id'] + ml.append(link) + ml.append(line['chr_id']) + ml.append('GRCh37') + ml.append(line['rsg_start']) + ml.append(line['rsg_end']) + ml.append(line['ori']) + # Add the additional data from rsg_id_info + for data in rsg_id_info: + if link == data['rsg_id']: + ml.append(data['symbol']) + ml.append(data['gene_id']) + else: + continue + # Create the entry and append to db + db.append(ml) + + for line in grch38_align: + ml = [] + link = line['rsg_id'] + ml.append(link) + ml.append(line['chr_id']) + ml.append('GRCh38') + ml.append(line['rsg_start']) + ml.append(line['rsg_end']) + ml.append(line['ori']) + # Add the additional data from rsg_id_info + for data in rsg_id_info: + if link == data['rsg_id']: + ml.append(data['symbol']) + ml.append(data['gene_id']) + else: + continue + # Create the entry and append to db + db.append(ml) + + missing = [] + + # Identify lines with missing data e.g. gene symbols + for line in db: + try: + line[6] + except IndexError: + try: + identifier = copy.deepcopy(line[0]) + identifier = identifier.split('.')[0] + line.append(rsg_to_symbol[identifier]['symbol']) + line.append(rsg_to_symbol[identifier]['gene_id']) + except KeyError: + print(("Can't identify gene symbol for %s" % line[0])) + missing.append(line[0]) + + # Open a text file to be used as a simple database and write the database + # rsg_db = open(os.path.join(ROOT, 'rsg_chr_db.txt'), 'w') + + to_mysql = [] + for line in db: + if line[0] in missing: + continue + # Only gap-less RefSeqGenes will have passed. The rest will be alternatively curated + write = [] + # Take the mapping data + write = copy.deepcopy(line[0:6]) + # add RSG ranges + write.append('1') + end_rsg = int(line[4]) - int(line[3]) + 1 + end_rsg = str(end_rsg) + write.append(end_rsg) + # Create block data chr then rsg + chr_block = str(line[3]) + '-' + str(line[4]) + write.append(chr_block) + rsg_block = str(write[6]) + '-' + str(write[7]) + write.append(rsg_block) + # Add gene ID and Gene symbol(s) + write.append(line[7]) + write.append(line[6]) + # write_me = '\t'.join(write) + # rsg_db.write(write_me + '\n') + del write[6] + to_mysql.append(write) + + # Set up code to write to database + for line in to_mysql: + current_symbol = db_data.get_gene_symbol_from_refSeqGeneID(line[0]) + if line[10] == current_symbol: + pass + else: + if current_symbol != 'none': + line[10] = current_symbol + else: + pass + db_data.update_refSeqGene_loci(line) + + # Close database + # rsg_db.close() + + print(('Total NG_ to NC_ alignments = ' + str(total_rsg_to_nc))) + print(('Gapps within NG_ to NC_ alignments = ' + str(total_rsg_to_nc_rejected))) + + print('complete') + return + +def update_lrg(): + print('Updating LRG lookup tables') + lr2rs_download = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt') + # Open and read + lr2rs_data = urllib.request.urlopen(lr2rs_download) + lr2rs = lr2rs_data.read() + # List the data + lr2rs = lr2rs.strip() + lr2rs = lr2rs.split('\n') + + # Download + lrg_status_download = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_GRCh38.txt') + # Open and read + lrg_status_data = urllib.request.urlopen(lrg_status_download) + lrg_status = lrg_status_data.read() + # List the data + lrg_status = lrg_status.strip() + lrg_status = lrg_status.split('\n') + + # Download + rs2lr_download = urllib.request.Request('http://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene') + # Open and read + rs2lr_data = urllib.request.urlopen(rs2lr_download) + rs2lr = rs2lr_data.read() + # List the data + rs2lr = rs2lr.strip() + rs2lr = rs2lr.split('\n') + + # Download LRG transcript (_t) to LRG Protein (__p) data file + lr_t2p_downloaded = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_proteins_RefSeq.txt') + # Open and read + lr_t2p_data = urllib.request.urlopen(lr_t2p_downloaded) + lr_t2p = lr_t2p_data.read() + # List the data + lr_t2p = lr_t2p.strip() + lr_t2p = lr_t2p.split('\n') + + # Dictionary the status by LRG_ID + lrg_status_dict = {} + # Compile dictionary + for line in lrg_status: + if re.search('^#', line): + continue + else: + list = line.split() + lrgID = list[0] + stat = list[2] + lrg_status_dict[lrgID] = stat + + # Required lookup tables + # LRG_ID GeneSymbol RefSeqGeneID status + # LRG_ID RefSeqTranscriptID + # LRG_T2LRG_P + + print('Update LRG and LRG_transcript lookup tables') + # Populate lists lrg_rs_lookup (LRG to RefSeqGene) and lrg_t2nm_ (LRG Transcript to RefSeq Transcript) + for line in lr2rs: + if re.search('^#', line): + continue + else: + list = line.split() + # Assign objects + lrg_id = list[0] + symbol = list[1] + rsgid = list[2] + lrg_tx = str(list[0]) + str(list[3]) + rstid = list[4] + status = lrg_status_dict[lrg_id] + # pass data to relevant lists + # lrg_rs_lookup + lrg_rs_lookup = [lrg_id, symbol, rsgid, status] + + # update LRG to RefSeqGene database + data.update_lrg_rs_lookup(lrg_rs_lookup) + + # lrg_t2nm_ + lrgtx_to_rstID = [lrg_tx, rstid] + # update database + data.update_lrgt_rst(lrgtx_to_rstID) + + print('Update LRG protein lookup table') + # Populate LRG protein RefSeqProtein lokup table + for line in lr_t2p: + if re.search('^#', line): + continue + else: + list = line.split() + # Assign objects + lrg_p = list[0] + rs_p = list[1] + # update LRG to RefSeqGene database + data.update_lrg_p_rs_p_lookup(lrg_p, rs_p) + + print('LRG lookup tables updated') + return + +# +# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/bin/update_db.py b/bin/update_db.py new file mode 100644 index 00000000..8ac345c0 --- /dev/null +++ b/bin/update_db.py @@ -0,0 +1,7 @@ +from VariantValidator import update_vv_db +import argparse + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + update_vv_db.update() From 9a301e5084d01e049480c9209eba5efa691653f2 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 13 Mar 2019 13:11:02 +0000 Subject: [PATCH 041/223] Cleaned up the update functions --- VariantValidator/update_vv_db.py | 282 +++++++++++++------------------ 1 file changed, 115 insertions(+), 167 deletions(-) diff --git a/VariantValidator/update_vv_db.py b/VariantValidator/update_vv_db.py index c4cf0ba8..38f63dbf 100644 --- a/VariantValidator/update_vv_db.py +++ b/VariantValidator/update_vv_db.py @@ -1,15 +1,11 @@ # -*- coding: utf-8 -*- -import re import os import urllib.request, urllib.error, urllib.parse import copy -# import variantanalyser -# import variantanalyser.dbControls -# import variantanalyser.dbControls.data as db_data +from configparser import ConfigParser from .modules import vvDatabase from . import variantValidator -from configparser import ConfigParser from . import configure @@ -28,9 +24,6 @@ def update(): # Create database access objects db = vvDatabase.vvDatabase(variantValidator.Validator(), dbConfig) - print(dbConfig) - print(db) - update_refseq(db) update_lrg(db) @@ -46,30 +39,30 @@ def update_refseq(dbcnx): rsg = urllib.request.Request('http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/gene_RefSeqGene') response = urllib.request.urlopen(rsg) rsg_file = response.read() - rsg_data_line = rsg_file.split('\n') + rsg_data_line = rsg_file.split(b'\n') rsg_data = [] for data in rsg_data_line: - rsg_data.append(data) + rsg_data.append(data.decode()) # Download data grch37 = urllib.request.Request( 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.25_refseqgene_alignments.gff3') response = urllib.request.urlopen(grch37) grch37_file = response.read() - grch37_data_line = grch37_file.split('\n') + grch37_data_line = grch37_file.split(b'\n') grch37_align_data = [] for data in grch37_data_line: - grch37_align_data.append(data) + grch37_align_data.append(data.decode()) # Download data grch38 = urllib.request.Request( 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.28_refseqgene_alignments.gff3') response = urllib.request.urlopen(grch38) grch38_file = response.read() - grch38_data_line = grch38_file.split('\n') + grch38_data_line = grch38_file.split(b'\n') grch38_align_data = [] for data in grch38_data_line: - grch38_align_data.append(data) + grch38_align_data.append(data.decode()) # Open Lists # rsg_data = open(os.path.join(ROOT, 'gene_RefSeqGene'), 'r') @@ -80,120 +73,53 @@ def update_refseq(dbcnx): grch38_align = [] # Place the required data from each file into a dictionary - hash = re.compile('#') + for line in rsg_data: - if hash.search(line): - pass - else: - line = line.strip() - info = line.split() - if len(info) == 0: - pass - else: - dict = {'symbol': info[2], 'rsg_id': info[3], 'gene_id': info[1]} - rsg_id_info.append(dict) + if line.startswith('#'): + continue + info = line.strip().split() + if len(info) > 0: + entry = {'symbol': info[2], 'rsg_id': info[3], 'gene_id': info[1]} + rsg_id_info.append(entry) # Create dictionary to store RefSeqGene and gene symbol data NOTE RefSeqGene ID stored without version number! rsg_to_symbol = {} # Collect the data for ent in rsg_id_info: - rsg_id = copy.deepcopy(ent['rsg_id']) - rsg_id = rsg_id.split('.')[0] + rsg_id = ent['rsg_id'].split('.')[0] rsg_to_symbol[rsg_id] = {'symbol': ent['symbol'], 'gene_id': ent['gene_id']} # Count total number of NG to NC mappings total_rsg_to_nc = 0 total_rsg_to_nc_rejected = 0 for line in grch37_align_data: - # Count NG_ to NC_ and remove the entries we don't care about! - if re.search('NC_', line) and re.search('NG_', line): + ng_nc = count_ng_nc(line) + if ng_nc is not None: total_rsg_to_nc = total_rsg_to_nc + 1 - else: - continue - if hash.search(line): - pass - elif not re.search('gap_count=0', line): - if re.search('NC_', line) and re.search('NG_', line): + if ng_nc == 'rejected': total_rsg_to_nc_rejected = total_rsg_to_nc_rejected + 1 - # print line - pass - else: - line = line.strip() - info = line.split('\t') - if len(info) != 9: - pass - else: - metrics = info[8].split(';') - id_ori = metrics[1].replace('Target=', '') - id_ori_list = id_ori.split() - dict = {'rsg_id': id_ori_list[0], 'chr_id': info[0], 'rsg_start': info[3], 'rsg_end': info[4], - 'ori': id_ori_list[3]} - grch37_align.append(dict) + elif ng_nc != 'failed': + grch37_align.append(ng_nc) for line in grch38_align_data: - if re.search('NC_', line) and re.search('NG_', line): + ng_nc = count_ng_nc(line) + if ng_nc is not None: total_rsg_to_nc = total_rsg_to_nc + 1 - else: - continue - if hash.search(line): - pass - elif not re.search('gap_count=0', line): - if re.search('NC_', line) and re.search('NG_', line): + if ng_nc == 'rejected': total_rsg_to_nc_rejected = total_rsg_to_nc_rejected + 1 - # print line - pass - else: - line = line.strip() - info = line.split('\t') - if len(info) != 9: - pass - else: - metrics = info[8].split(';') - id_ori = metrics[1].replace('Target=', '') - id_ori_list = id_ori.split() - dict = {'rsg_id': id_ori_list[0], 'chr_id': info[0], 'rsg_start': info[3], 'rsg_end': info[4], - 'ori': id_ori_list[3]} - grch38_align.append(dict) + elif ng_nc != 'failed': + grch38_align.append(ng_nc) + # Create a data array containing the database db = [] # map line for line in grch37_align: - ml = [] - link = line['rsg_id'] - ml.append(link) - ml.append(line['chr_id']) - ml.append('GRCh37') - ml.append(line['rsg_start']) - ml.append(line['rsg_end']) - ml.append(line['ori']) - # Add the additional data from rsg_id_info - for data in rsg_id_info: - if link == data['rsg_id']: - ml.append(data['symbol']) - ml.append(data['gene_id']) - else: - continue - # Create the entry and append to db + ml = map_line(line, 'GRCh37', rsg_id_info) db.append(ml) for line in grch38_align: - ml = [] - link = line['rsg_id'] - ml.append(link) - ml.append(line['chr_id']) - ml.append('GRCh38') - ml.append(line['rsg_start']) - ml.append(line['rsg_end']) - ml.append(line['ori']) - # Add the additional data from rsg_id_info - for data in rsg_id_info: - if link == data['rsg_id']: - ml.append(data['symbol']) - ml.append(data['gene_id']) - else: - continue - # Create the entry and append to db + ml = map_line(line, 'GRCh38', rsg_id_info) db.append(ml) missing = [] @@ -209,7 +135,7 @@ def update_refseq(dbcnx): line.append(rsg_to_symbol[identifier]['symbol']) line.append(rsg_to_symbol[identifier]['gene_id']) except KeyError: - print(("Can't identify gene symbol for %s" % line[0])) + print("Can't identify gene symbol for %s" % line[0]) missing.append(line[0]) # Open a text file to be used as a simple database and write the database @@ -220,7 +146,6 @@ def update_refseq(dbcnx): if line[0] in missing: continue # Only gap-less RefSeqGenes will have passed. The rest will be alternatively curated - write = [] # Take the mapping data write = copy.deepcopy(line[0:6]) # add RSG ranges @@ -236,40 +161,32 @@ def update_refseq(dbcnx): # Add gene ID and Gene symbol(s) write.append(line[7]) write.append(line[6]) - # write_me = '\t'.join(write) - # rsg_db.write(write_me + '\n') + del write[6] to_mysql.append(write) # Set up code to write to database for line in to_mysql: - current_symbol = db_data.get_gene_symbol_from_refSeqGeneID(line[0]) - if line[10] == current_symbol: - pass - else: + current_symbol = dbcnx.get_gene_symbol_from_refSeqGeneID(line[0]) + if line[10] != current_symbol: if current_symbol != 'none': line[10] = current_symbol - else: - pass - db_data.update_refSeqGene_loci(line) + dbcnx.update_refSeqGene_loci(line) - # Close database - # rsg_db.close() + print('Total NG_ to NC_ alignments = ' + str(total_rsg_to_nc)) + print('Gaps within NG_ to NC_ alignments = ' + str(total_rsg_to_nc_rejected)) - print(('Total NG_ to NC_ alignments = ' + str(total_rsg_to_nc))) - print(('Gapps within NG_ to NC_ alignments = ' + str(total_rsg_to_nc_rejected))) - - print('complete') return -def update_lrg(): +def update_lrg(dbcnx): print('Updating LRG lookup tables') + lr2rs_download = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt') # Open and read lr2rs_data = urllib.request.urlopen(lr2rs_download) lr2rs = lr2rs_data.read() # List the data - lr2rs = lr2rs.strip() + lr2rs = lr2rs.strip().decode() lr2rs = lr2rs.split('\n') # Download @@ -278,38 +195,26 @@ def update_lrg(): lrg_status_data = urllib.request.urlopen(lrg_status_download) lrg_status = lrg_status_data.read() # List the data - lrg_status = lrg_status.strip() + lrg_status = lrg_status.strip().decode() lrg_status = lrg_status.split('\n') - # Download - rs2lr_download = urllib.request.Request('http://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene') - # Open and read - rs2lr_data = urllib.request.urlopen(rs2lr_download) - rs2lr = rs2lr_data.read() - # List the data - rs2lr = rs2lr.strip() - rs2lr = rs2lr.split('\n') - # Download LRG transcript (_t) to LRG Protein (__p) data file lr_t2p_downloaded = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_proteins_RefSeq.txt') # Open and read lr_t2p_data = urllib.request.urlopen(lr_t2p_downloaded) lr_t2p = lr_t2p_data.read() # List the data - lr_t2p = lr_t2p.strip() + lr_t2p = lr_t2p.strip().decode() lr_t2p = lr_t2p.split('\n') # Dictionary the status by LRG_ID lrg_status_dict = {} # Compile dictionary for line in lrg_status: - if re.search('^#', line): + if line.startswith('#'): continue - else: - list = line.split() - lrgID = list[0] - stat = list[2] - lrg_status_dict[lrgID] = stat + data = line.split() + lrg_status_dict[data[0]] = data[2] # Required lookup tables # LRG_ID GeneSymbol RefSeqGeneID status @@ -319,45 +224,88 @@ def update_lrg(): print('Update LRG and LRG_transcript lookup tables') # Populate lists lrg_rs_lookup (LRG to RefSeqGene) and lrg_t2nm_ (LRG Transcript to RefSeq Transcript) for line in lr2rs: - if re.search('^#', line): + if line.startswith('#'): continue - else: - list = line.split() - # Assign objects - lrg_id = list[0] - symbol = list[1] - rsgid = list[2] - lrg_tx = str(list[0]) + str(list[3]) - rstid = list[4] - status = lrg_status_dict[lrg_id] - # pass data to relevant lists - # lrg_rs_lookup - lrg_rs_lookup = [lrg_id, symbol, rsgid, status] - - # update LRG to RefSeqGene database - data.update_lrg_rs_lookup(lrg_rs_lookup) - - # lrg_t2nm_ - lrgtx_to_rstID = [lrg_tx, rstid] - # update database - data.update_lrgt_rst(lrgtx_to_rstID) + data = line.split() + # Assign objects + lrg_id = data[0] + symbol = data[1] + rsgid = data[2] + lrg_tx = str(data[0]) + str(data[3]) + rstid = data[4] + status = lrg_status_dict[lrg_id] + # pass data to relevant lists + # lrg_rs_lookup + lrg_rs_lookup = [lrg_id, symbol, rsgid, status] + + # update LRG to RefSeqGene database + dbcnx.update_lrg_rs_lookup(lrg_rs_lookup) + + # lrg_t2nm_ + lrgtx_to_rstID = [lrg_tx, rstid] + # update database + dbcnx.update_lrgt_rst(lrgtx_to_rstID) print('Update LRG protein lookup table') # Populate LRG protein RefSeqProtein lokup table for line in lr_t2p: - if re.search('^#', line): + if line.startswith('#'): continue - else: - list = line.split() - # Assign objects - lrg_p = list[0] - rs_p = list[1] - # update LRG to RefSeqGene database - data.update_lrg_p_rs_p_lookup(lrg_p, rs_p) + data = line.split() + # Assign objects + lrg_p = data[0] + rs_p = data[1] + # update LRG to RefSeqGene database + dbcnx.update_lrg_p_rs_p_lookup(lrg_p, rs_p) print('LRG lookup tables updated') return + +def count_ng_nc(line): + # Count NG_ to NC_ and remove the entries we don't care about! + if 'NC_' in line and 'NG_' in line: + # print(line) + pass + else: + return None + if '#' in line: + return 'failed' + + if 'gap_count=0' not in line: + return 'rejected' + + else: + line = line.strip() + info = line.split('\t') + if len(info) != 9: + return 'failed' + + metrics = info[8].split(';') + id_ori = metrics[1].replace('Target=', '') + id_ori_list = id_ori.split() + entry = {'rsg_id': id_ori_list[0], 'chr_id': info[0], 'rsg_start': info[3], 'rsg_end': info[4], + 'ori': id_ori_list[3]} + return entry + + +def map_line(line, genome, rsg_id_info): + ml = [] + link = line['rsg_id'] + ml.append(link) + ml.append(line['chr_id']) + ml.append(genome) + ml.append(line['rsg_start']) + ml.append(line['rsg_end']) + ml.append(line['ori']) + # Add the additional data from rsg_id_info + for data in rsg_id_info: + if link == data['rsg_id']: + ml.append(data['symbol']) + ml.append(data['gene_id']) + # Create the entry and append to db + return ml + # # Copyright (C) 2018 Peter Causey-Freeman, University of Leicester # From 9d35242a3e3a757b5484b53ed2b405c7a105a08c Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 13 Mar 2019 14:24:26 +0000 Subject: [PATCH 042/223] Pulled validator object out of database connection object --- VariantValidator/modules/vvDBInit.py | 22 +++++++++------------- VariantValidator/modules/vvDatabase.py | 12 ++++++------ VariantValidator/modules/vvMixinCore.py | 8 ++++---- VariantValidator/modules/vvMixinInit.py | 2 +- VariantValidator/update_vv_db.py | 3 +-- 5 files changed, 21 insertions(+), 26 deletions(-) diff --git a/VariantValidator/modules/vvDBInit.py b/VariantValidator/modules/vvDBInit.py index e6731db1..3cbbcdf3 100644 --- a/VariantValidator/modules/vvDBInit.py +++ b/VariantValidator/modules/vvDBInit.py @@ -6,26 +6,22 @@ class Mixin(): ''' A mixin containing the database initialisation routines. ''' - def __init__(self,val,dbConfig): + def __init__(self, dbConfig): self.conn = None # self.cursor will be none UNLESS you're wrapping a function in @handleCursor, which automatically opens and # closes connections for you. - self.cursor=None - self.dbConfig=dbConfig + self.cursor = None + self.dbConfig = dbConfig # Construct database URL #'mysqlx://vvadmin:var1ant@127.0.0.1/validator' - self.path="mysqlx://"+dbConfig["user"]+":"+dbConfig["password"]+"@"+dbConfig["host"]+"/"+dbConfig["database"] - os.environ["VALIDATOR_DB_URL"]=self.path - self.val=val - self.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.dbConfig) - self.conn=self.pool.get_connection() + self.path = "mysqlx://"+dbConfig["user"]+":"+dbConfig["password"]+"@"+dbConfig["host"]+"/"+dbConfig["database"] + os.environ["VALIDATOR_DB_URL"] = self.path + self.pool = mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.dbConfig) + self.conn = self.pool.get_connection() def __del__(self): if self.conn: self.conn.close() - self.conn=None + self.conn = None if self.pool: - self.pool.close() - self.pool=None - if self.val: - self.val=None + self.pool = None diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 031d4727..f5b9b650 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -35,13 +35,13 @@ def query_with_fetchone(self,entry, table): logger.debug("No data returned from query "+str(query)) return row # From data - def data_add(self, accession): + def data_add(self, accession, validator): ''' # Add accurate transcript descriptions to the database :param accession: :return: ''' - self.update_transcript_info_record(accession, self.val.hdp) + self.update_transcript_info_record(accession, validator) entry = self.in_entries(accession, 'transcript_info') return entry @@ -71,7 +71,7 @@ def in_entries(self,entry, table): data['updated'] = row[6] data['expiry'] = row[7] return data - def update_transcript_info_record(self,accession, hdp): + def update_transcript_info_record(self,accession, validator): ''' # Search Entrez for corresponding record for the RefSeq ID ''' @@ -84,7 +84,7 @@ def update_transcript_info_record(self,accession, hdp): hgnc_symbol = previous_entry['hgnc_symbol'] uta_symbol = previous_entry['uta_symbol'] try: - record = self.val.entrez_efetch(db="nucleotide", id=accession, rettype="gb", retmode="text") + record = validator.entrez_efetch(db="nucleotide", id=accession, rettype="gb", retmode="text") version = record.id description = record.description variant = '0' @@ -100,11 +100,11 @@ def update_transcript_info_record(self,accession, hdp): # Get information from UTA try: - uta_info = hdp.get_tx_identity_info(version) + uta_info = validator.hdp.get_tx_identity_info(version) except: version_ac_ver = version.split('.') version = version_ac_ver[0] + '.' + str(int(version_ac_ver[1]) - 1) - uta_info = hdp.get_tx_identity_info(version) + uta_info = validator.hdp.get_tx_identity_info(version) uta_symbol = str(uta_info[6]) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 8f08872e..1666a52f 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -1947,7 +1947,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if entry['expiry'] == 'true': dbaction = 'update' try: - entry = self.db.data_add(accession=accession) + entry = self.db.data_add(accession=accession, validator=self) except hgvs.exceptions.HGVSError as e: error = 'Transcript %s is not currently supported' % (accession) validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -1965,7 +1965,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif 'none' in entry: dbaction = 'insert' try: - entry = self.db.data_add(accession=accession) + entry = self.db.data_add(accession=accession, validator=self) except Exception as e: logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' @@ -2008,7 +2008,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # If the current entry is too old if entry['expiry'] == 'true': dbaction = 'update' - entry = self.db.data_add(accession=accession) + entry = self.db.data_add(accession=accession, validator=self) hgnc_gene_info = entry['description'] else: hgnc_gene_info = entry['description'] @@ -2016,7 +2016,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr elif 'none' in entry: dbaction = 'insert' try: - entry = self.db.data_add(accession=accession) + entry = self.db.data_add(accession=accession, validator=self) except Exception as e: logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index f953b91f..43a43e19 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -98,7 +98,7 @@ def __init__(self): 'raise_on_warnings': True } #Create database access objects - self.db = vvDatabase(self, self.dbConfig) + self.db = vvDatabase(self.dbConfig) # Set up versions __version__ = config["variantValidator"]['version'] self.version = __version__ diff --git a/VariantValidator/update_vv_db.py b/VariantValidator/update_vv_db.py index 38f63dbf..0c24f8c1 100644 --- a/VariantValidator/update_vv_db.py +++ b/VariantValidator/update_vv_db.py @@ -5,7 +5,6 @@ import copy from configparser import ConfigParser from .modules import vvDatabase -from . import variantValidator from . import configure @@ -22,7 +21,7 @@ def update(): 'raise_on_warnings': True } # Create database access objects - db = vvDatabase.vvDatabase(variantValidator.Validator(), dbConfig) + db = vvDatabase.vvDatabase(dbConfig) update_refseq(db) update_lrg(db) From 20d9e65f0f1f0ba6c247d8c80662fa5c6c40fbee Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 13 Mar 2019 14:24:48 +0000 Subject: [PATCH 043/223] Cleaned up the init --- VariantValidator/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/VariantValidator/__init__.py b/VariantValidator/__init__.py index 16b70fa6..c0924aa4 100644 --- a/VariantValidator/__init__.py +++ b/VariantValidator/__init__.py @@ -1,5 +1,5 @@ from . import configure -from .variantValidator import * +from .variantValidator import Validator -__all__=["Validator","Validation"] +__all__ = ["Validator"] From 5ce0de056c66ba32882a03f44458b16e24b2ec87 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 13 Mar 2019 14:25:18 +0000 Subject: [PATCH 044/223] Cleaned up section in updatte_vv_db --- VariantValidator/update_vv_db.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/VariantValidator/update_vv_db.py b/VariantValidator/update_vv_db.py index 0c24f8c1..7a2ff4af 100644 --- a/VariantValidator/update_vv_db.py +++ b/VariantValidator/update_vv_db.py @@ -148,20 +148,18 @@ def update_refseq(dbcnx): # Take the mapping data write = copy.deepcopy(line[0:6]) # add RSG ranges - write.append('1') end_rsg = int(line[4]) - int(line[3]) + 1 end_rsg = str(end_rsg) write.append(end_rsg) # Create block data chr then rsg chr_block = str(line[3]) + '-' + str(line[4]) write.append(chr_block) - rsg_block = str(write[6]) + '-' + str(write[7]) + rsg_block = '1-' + str(write[6]) write.append(rsg_block) # Add gene ID and Gene symbol(s) write.append(line[7]) write.append(line[6]) - del write[6] to_mysql.append(write) # Set up code to write to database From 43af6a8be13606a238660e7bdc00eb126ea01dd0 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 15 Mar 2019 11:20:27 +0000 Subject: [PATCH 045/223] Moved transcriptSet check to top of function so that it returns an Exception immediately if value is invalid --- VariantValidator/modules/vvMixinCore.py | 29 ++++++++++--------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 1666a52f..e9a0129e 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -55,17 +55,27 @@ class Mixin(vvMixinConverters.Mixin): - def validate(self, batch_variant, selected_assembly, select_transcripts, transcriptSet="refseq"): + def validate(self, batch_variant, selected_assembly, select_transcripts, transcriptSet = "refseq"): ''' This is the main validator function. :param batch_variant: A string containing the variant to be validated :param selected_assembly: The version of the genome assembly to use. :param select_transcripts: Can be an array of different transcripts, or 'all' Selecting multiple transcripts will lead to a multiple variant outputs. - :param transcriptSet: + :param transcriptSet: 'refseq' or 'ensembl'. Currently only 'refseq' is supported :return: ''' logger.info(batch_variant + ' : ' + selected_assembly) + + if transcriptSet == "refseq": + alt_aln_method = 'splign' + elif transcriptSet == "ensembl": + alt_aln_method = 'genebuild' + logger.warning("Ensembl is currently not supported") + raise Exception("Ensembl is currently not supported") + else: + raise Exception("The transcriptSet variable '%s' is invalid, it must be 'refseq' or 'ensembl'" % transcriptSet) + # Take start time start_time = time.time() @@ -133,21 +143,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for validation in batch_list: # Start timing logger.traceStart(validation) - # Re-set cautions and automaps - - if transcriptSet == "refseq": - alt_aln_method = 'splign' - elif transcriptSet == "ensembl": - alt_aln_method = 'genebuild' - logger.warning("Ensembl is currently not supported") - validation['warnings'] += ': ' + "Ensembl is currently not supported" - continue - else: - logger.warning( - "The transcript set variable " + transcriptSet + " is invalid, it needs to be 'refseq' or 'ensembl'") - validation[ - 'warnings'] += ': ' + "The transcript set variable " + transcriptSet + " is invalid, it needs to be 'refseq' or 'ensembl'" - continue # Create Normalizers hn = hgvs.normalizer.Normalizer(self.hdp, From f560e20079ecd7ba957c2b7e59419f62d1688a07 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 15 Mar 2019 14:29:03 +0000 Subject: [PATCH 046/223] Added new variant obj and used it to simplify unicode character detection --- VariantValidator/modules/variant.py | 44 ++++++ VariantValidator/modules/vvMixinCore.py | 200 +++++++++--------------- 2 files changed, 122 insertions(+), 122 deletions(-) create mode 100644 VariantValidator/modules/variant.py diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py new file mode 100644 index 00000000..0b5f839c --- /dev/null +++ b/VariantValidator/modules/variant.py @@ -0,0 +1,44 @@ +import re +import string + + +class Variant(object): + """ + This Variant object will contain the original input, the processed variant description and any other data that's + relevant to what kind of variant it is. + """ + + def __init__(self, original): + self.original = original + self.quibble = original + self.hgvs_formatted = original + + def is_ascii(self): + """ + Instead of the previous test for unicode rich text characters. + Now going to test that all characters are within the ascii alphabet + """ + try: + self.quibble.encode('ascii') + return True + except UnicodeEncodeError or UnicodeDecodeError: + # Will catch errors raised by python 2 and python 3 + return False + + def get_non_ascii(self): + """ + Will return non ascii character positions within variant description + :return: + """ + chars = [] + positions = [] + + for i, c in enumerate(self.quibble): + try: + c.encode('ascii') + except UnicodeEncodeError or UnicodeDecodeError: + chars.append(c) + positions.append(i+1) + + return chars, positions + diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index e9a0129e..5441e212 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -52,6 +52,7 @@ from . import vvChromosomes from . import vvMixinConverters from .vvFunctions import VariantValidatorError +from . import variant class Mixin(vvMixinConverters.Mixin): @@ -156,72 +157,27 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr alt_aln_method=alt_aln_method ) - # Blank cautions - caution = '' - automap = '' - # This will be used to order the final output if str(validation['order']) == 'false': ordering = ordering + 1 validation['order'] = ordering - else: - pass + + my_variant = variant.Variant(validation['id']) + # Bug catcher try: # Note, ID is not touched. It is always the input variant description. Quibble will be altered but id will not if type = g. input = validation['quibble'] logger.trace("Commenced validation of " + str(input), validation) - # Test for rich text unicode characters - try: - unicode_test = "{}".format(input) - except UnicodeDecodeError as e: - # Format the trapped character into unicode for styled printing - my_unicode = e[1] - my_unicode = my_unicode.decode('utf-8') - - # Test for rich text unicode characters - try: - str(my_unicode) - except UnicodeEncodeError as e: - # Format the trapped character into unicode for styled printing - unicoded_it = e[1] - unicoded_it_list = unicoded_it.split() - found_error="" - found_at=None - for try_me in unicoded_it_list: - try: - str(try_me) - except UnicodeEncodeError as e: - found_unicode = try_me - found_error = str(e) - found_at = found_unicode.encode('raw_unicode_escape') - break - # Extract character from the error - chars = re.findall(r"u'\\\\\w+'", found_error) - character = chars[0] - search_term = character.replace("u'", '') - search_term = search_term.replace("'", '') - found_at_decoded = found_at.decode('raw_unicode_escape') - found_at = found_at_decoded.encode('raw_unicode_escape') - string_char = str(character) - # Create a human readable U+ representation - human_code = re.sub(r"u'\\\\\w", 'U+', string_char) - human_code = human_code.replace("'", "") - format_human = "{}".format(human_code) - format_human = format_human.upper() - found_at = re.sub(search_term, '<' + format_human + '>', found_at) - slasher = re.compile("\\\\") - found_at = re.sub(slasher, '', found_at) - validation['id'] = found_at - error = 'Submitted variant description contains an invalid character which is represented by Unicode character ' + format_human + ' at position ' + found_at + ': Please remove this character and re-submit: A useful search function for Unicode characters can be found at https://unicode-search.net/' - validation['warnings'] = validation['warnings'] + ': ' + error - logger.warning(error) - continue - else: - pass - else: - pass + if not my_variant.is_ascii(): + chars, positions = my_variant.get_non_ascii() + error = 'Submitted variant description contains an invalid character(s) %s at position(s) %s: '\ + 'Please remove this character and re-submit: A useful search function for ' \ + 'Unicode characters can be found at https://unicode-search.net/' % (chars, positions) + validation['warnings'] = validation['warnings'] + ': ' + error + logger.warning(error) + continue # Remove whitespace ws = copy.copy(input) @@ -902,7 +858,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning(error) continue else: - variant = formatted['variant'] + formatted_variant = formatted['variant'] input = formatted['variant'] stash_input = formatted['variant'] format_type = formatted['type'] @@ -913,7 +869,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr is rarely seen wrt genomic sequencing data and needs to be re-evaluated """ conversion = re.compile('con') - if conversion.search(variant): + if conversion.search(formatted_variant): validation['warnings'] = validation['warnings'] + ': ' + 'Gene conversions currently unsupported' logger.warning('Gene conversions currently unsupported') continue @@ -922,16 +878,16 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'false' # Change RNA bases to upper case but nothing else if format_type == ":r.": - variant = variant.upper() - variant = variant.replace(':R.', ':r.') + formatted_variant = formatted_variant.upper() + formatted_variant = formatted_variant.replace(':R.', ':r.') # lowercase the supported variant types - variant = variant.replace('DEL', 'del') - variant = variant.replace('INS', 'ins') - variant = variant.replace('INV', 'inv') - variant = variant.replace('DUP', 'dup') + formatted_variant = formatted_variant.replace('DEL', 'del') + formatted_variant = formatted_variant.replace('INS', 'ins') + formatted_variant = formatted_variant.replace('INV', 'inv') + formatted_variant = formatted_variant.replace('DUP', 'dup') try: - input_parses = self.hp.parse_hgvs_variant(variant) + input_parses = self.hp.parse_hgvs_variant(formatted_variant) except hgvs.exceptions.HGVSError as e: error = str(e) if error == 'false': @@ -945,7 +901,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if hasattr(input_parses.posedit.edit, 'ref'): if input_parses.posedit.edit.ref is not None: input_parses.posedit.edit.ref = input_parses.posedit.edit.ref.upper() - variant = str(input_parses) + formatted_variant = str(input_parses) input = str(input_parses) pass else: @@ -965,7 +921,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr line[5]) == 'True' and str(line[6]) == 'True': input_parses.ac = (line[1]) input = str(input_parses) - variant = input + formatted_variant = input break if re.match('^ENST', str(input_parses)): error = 'Unable to map ' + str(input_parses.ac) + ' to an equivalent RefSeq transcript' @@ -974,8 +930,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue else: validation['warnings'] = validation['warnings'] + ': ' + str( - trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + variant - logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + variant) + trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + formatted_variant + logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + formatted_variant) logger.trace("HVGS acceptance test passed", validation) # Check whether supported genome build is requested for non g. descriptions historic_assembly = 'false' @@ -1016,7 +972,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ) else: - error = 'Mapping of ' + variant + ' to genome assembly ' + primary_assembly + ' is not supported' + error = 'Mapping of ' + formatted_variant + ' to genome assembly ' + primary_assembly + ' is not supported' validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue @@ -1091,7 +1047,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr refseqgene_reference = self.db.get_RefSeqGeneID_from_lrgID(lrg_reference) if refseqgene_reference != 'none': input_parses.ac = refseqgene_reference - variant = str(input_parses) + formatted_variant = str(input_parses) input = str(input_parses) stash_input = input if caution == '': @@ -1108,7 +1064,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr lrg_reference) if refseqtranscript_reference != 'none': input_parses.ac = refseqtranscript_reference - variant = str(input_parses) + formatted_variant = str(input_parses) input = str(input_parses) stash_input = input if caution == '': @@ -1125,7 +1081,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr Evolving list of common mistakes, see sections below """ # NM_ .g - if (re.search(r'^NM_', variant) or re.search(r'^NR_', variant)) and re.search(r':g.', variant): + if (re.search(r'^NM_', formatted_variant) or re.search(r'^NR_', formatted_variant)) and re.search(r':g.', formatted_variant): suggestion = input.replace(':g.', ':c.') error = 'Transcript reference sequence input as genomic (g.) reference sequence. Did you mean ' + suggestion + '?' validation['warnings'] = validation['warnings'] + ': ' + error @@ -1147,8 +1103,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # NM_ NC_ NG_ NR_ p. - if (re.search(r'^NM_', variant) or re.search(r'^NR_', variant) or re.search(r'^NC_', variant) or re.search( - r'^NG_', variant)) and re.search(r':p.', variant): + if (re.search(r'^NM_', formatted_variant) or re.search(r'^NR_', formatted_variant) or re.search(r'^NC_', formatted_variant) or re.search( + r'^NG_', formatted_variant)) and re.search(r':p.', formatted_variant): issue_link = 'http://varnomen.hgvs.org/recommendations/protein/' error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' validation['warnings'] = validation['warnings'] + ': ' + error @@ -1156,8 +1112,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # NG_ c or NC_c.. - if (re.search(r'^NG_', variant) or re.search(r'^NC_', variant)) and re.search(r':c.', variant): - suggestion = ': For additional assistance, submit ' + str(variant) + ' to VariantValidator' + if (re.search(r'^NG_', formatted_variant) or re.search(r'^NC_', formatted_variant)) and re.search(r':c.', formatted_variant): + suggestion = ': For additional assistance, submit ' + str(formatted_variant) + ' to VariantValidator' error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' + suggestion validation['warnings'] = validation['warnings'] + ': ' + error logger.warning(error) @@ -1230,7 +1186,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue else: input_parses.posedit.edit.ref = '' - variant = str(input_parses) + formatted_variant = str(input_parses) else: if re.search('bounds', error) or re.search('intronic variant', error): try: @@ -1567,7 +1523,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue else: input_parses.posedit.edit.ref = '' - variant = str(input_parses) + formatted_variant = str(input_parses) elif re.search('base must be >=1 for datum = SEQ_START or CDS_END', error): error = 'The given coordinate is outside the bounds of the reference sequence.' @@ -1814,7 +1770,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'false' # Try to validate the variant try: - hgvs_object = self.hp.parse_hgvs_variant(variant) + hgvs_object = self.hp.parse_hgvs_variant(formatted_variant) except hgvs.exceptions.HGVSError as e: error = str(e) try: @@ -1831,7 +1787,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if alt_aln_method != 'genebuild': # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID # accession number - hgvs_object = self.hp.parse_hgvs_variant(variant) + hgvs_object = self.hp.parse_hgvs_variant(formatted_variant) accession = hgvs_object.ac # Look for the accession in our database # Connect to database and send request @@ -1871,7 +1827,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning(str(error)) continue input = str(hgvs_c) - variant = str(hgvs_c) + formatted_variant = str(hgvs_c) # COLLECT gene symbol, name and ACCESSION INFORMATION # Gene symbol @@ -1881,7 +1837,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr """ if (format_type != ':g.'): error = 'false' - hgvs_vt = self.hp.parse_hgvs_variant(variant) + hgvs_vt = self.hp.parse_hgvs_variant(formatted_variant) try: tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: @@ -1919,7 +1875,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if alt_aln_method != 'genebuild': # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID # accession number - hgvs_object = self.hp.parse_hgvs_variant(variant) + hgvs_object = self.hp.parse_hgvs_variant(formatted_variant) accession = hgvs_object.ac # Look for the accession in our database # Connect to database and send request @@ -1981,7 +1937,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Ensembl databases else: # accession number - hgvs_object = self.hp.parse_hgvs_variant(variant) + hgvs_object = self.hp.parse_hgvs_variant(formatted_variant) accession = hgvs_object.ac # Look for the accession in our database # Connect to database and send request @@ -2036,7 +1992,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr """ if (format_type == ':g.'): - g_query = self.hp.parse_hgvs_variant(variant) + g_query = self.hp.parse_hgvs_variant(formatted_variant) # Genomic coordinates can be validated immediately error = 'false' @@ -2108,9 +2064,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Check for NG_ rsg = re.compile(r'^NG_') - if rsg.search(variant): + if rsg.search(formatted_variant): # parse - hgvs_refseqgene = self.hp.parse_hgvs_variant(variant) + hgvs_refseqgene = self.hp.parse_hgvs_variant(formatted_variant) # Convert to chromosomal position refseqgene_data = self.rsg_to_chr(hgvs_refseqgene, primary_assembly, hn, self.vr) # There should only ever be one description returned @@ -2122,7 +2078,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # re_submit # Tag the line so that it is not written out validation['warnings'] = validation[ - 'warnings'] + ': ' + variant + ' automapped to genome position ' + str( + 'warnings'] + ': ' + formatted_variant + ' automapped to genome position ' + str( input) query = {'quibble': input, 'id': validation['id'], 'warnings': validation['warnings'], 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', @@ -2131,7 +2087,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr coding = 'intergenic' batch_list.append(query) else: - error = 'Mapping unavailable for RefSeqGene ' + variant + ' using alignment method = ' + alt_aln_method + error = 'Mapping unavailable for RefSeqGene ' + formatted_variant + ' using alignment method = ' + alt_aln_method validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue @@ -3141,7 +3097,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Flag for validation valid = 'false' # Collect information for genomic level validation - obj = self.hp.parse_hgvs_variant(variant) + obj = self.hp.parse_hgvs_variant(formatted_variant) tx_ac = obj.ac @@ -3223,7 +3179,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if alt_aln_method != 'genebuild': error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue @@ -3231,7 +3187,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue @@ -3241,22 +3197,22 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if ( obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): - variant = str(obj) + formatted_variant = str(obj) else: # Normalize was I believe to replace ref. Mapping does this anyway # to_g = hn.normalize(to_g) - variant = str(self.myevm_g_to_t(evm, to_g, tx_ac)) + formatted_variant = str(self.myevm_g_to_t(evm, to_g, tx_ac)) tx_ac = '' elif geno.search(input): - if plus.search(variant) or minus.search(variant): - to_g = self.genomic(variant, no_norm_evm, primary_assembly,hn) + if plus.search(formatted_variant) or minus.search(formatted_variant): + to_g = self.genomic(formatted_variant, no_norm_evm, primary_assembly,hn) es = re.compile(r'error') if es.search(str(to_g)): if alt_aln_method != 'genebuild': error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue @@ -3264,7 +3220,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue @@ -3273,11 +3229,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if ( obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): - variant = str(obj) + formatted_variant = str(obj) else: # Normalize was I believe to replace ref. Mapping does this anyway # to_g = hn.normalize(to_g) - variant = str(self.myevm_g_to_t(evm, to_g, tx_ac)) + formatted_variant = str(self.myevm_g_to_t(evm, to_g, tx_ac)) tx_ac = '' else: @@ -3290,19 +3246,19 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.match('Unsupported normalization of variants spanning the exon-intron boundary', error): h_variant = obj - variant = variant + formatted_variant = formatted_variant caution = 'This coding sequence variant description spans at least one intron' automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( automap) logger.warning(str(caution) + ": " + str(automap)) else: - variant = str(h_variant) + formatted_variant = str(h_variant) tx_ac = '' # Create a crosser (exon boundary crossed) variant crossed_variant = str(evm._maybe_normalize(obj)) - if variant == crossed_variant: + if formatted_variant == crossed_variant: cross_variant = 'false' else: hgvs_crossed_variant = evm._maybe_normalize(obj) @@ -3315,11 +3271,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if boundary == 'false': cross_variant = 'false' - error = self.validateHGVS(variant) + error = self.validateHGVS(formatted_variant) if error == 'false': valid = 'true' else: - excep = "%s -- %s -- %s\n" % (time.ctime(), error, variant) + excep = "%s -- %s -- %s\n" % (time.ctime(), error, formatted_variant) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue @@ -3356,14 +3312,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if cck == 'true': dl = re.compile('del') # This should only ever hit coding and RNA variants - if dl.search(variant): + if dl.search(formatted_variant): # RNA if pat_r.search(trapped_input): - coding = self.coding(variant, self.hp) + coding = self.coding(formatted_variant, self.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = self.genomic(variant, no_norm_evm, primary_assembly,hn) + pre_var = self.genomic(formatted_variant, no_norm_evm, primary_assembly,hn) # genome back to C coordinates post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) @@ -3459,10 +3415,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Coding else: - coding = self.coding(variant, self.hp) + coding = self.coding(formatted_variant, self.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = self.hp.parse_hgvs_variant(variant) + pre_var = self.hp.parse_hgvs_variant(formatted_variant) try: pre_var = self.myevm_t_to_g(pre_var, no_norm_evm, primary_assembly, hn) @@ -3573,10 +3529,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: if pat_r.search(trapped_input): - coding = self.coding(variant, self.hp) + coding = self.coding(formatted_variant, self.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = self.genomic(variant, no_norm_evm, primary_assembly,hn) + pre_var = self.genomic(formatted_variant, no_norm_evm, primary_assembly,hn) # genome back to C coordinates post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) @@ -3639,10 +3595,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr batch_list.append(query) else: - coding = self.coding(variant, self.hp) + coding = self.coding(formatted_variant, self.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = self.genomic(variant, no_norm_evm, primary_assembly,hn) + pre_var = self.genomic(formatted_variant, no_norm_evm, primary_assembly,hn) # genome back to C coordinates post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) @@ -3717,7 +3673,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if alt_aln_method != 'genebuild': error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue @@ -3725,7 +3681,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, variant) + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) validation['warnings'] = validation['warnings'] + ': ' + str(error) logger.warning(str(error)) continue @@ -3812,7 +3768,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass else: - query = self.hp.parse_hgvs_variant(variant) + query = self.hp.parse_hgvs_variant(formatted_variant) test = self.hp.parse_hgvs_variant(input) if query.posedit.pos != test.posedit.pos: caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' @@ -3867,7 +3823,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # VALIDATION of intronic variants pre_valid = self.hp.parse_hgvs_variant(input) - post_valid = self.hp.parse_hgvs_variant(variant) + post_valid = self.hp.parse_hgvs_variant(formatted_variant) if valid == 'false': error = 'false' genomic_validation = str( @@ -3894,7 +3850,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr valid = 'true' else: - excep = "%s -- %s -- %s\n" % (time.ctime(), error, variant) + excep = "%s -- %s -- %s\n" % (time.ctime(), error, formatted_variant) validation['warnings'] = validation['warnings'] + ': ' + str(error) continue @@ -3919,7 +3875,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ############################## # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC - hgvs_coding = self.coding(variant, self.hp) + hgvs_coding = self.coding(formatted_variant, self.hp) boundary = re.compile('exon-intron boundary') spanning = re.compile('exon/intron') @@ -6120,7 +6076,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) coding = fn.valstr(hgvs_coding) - variant = coding + formatted_variant = coding # OBTAIN THE RefSeqGene coordinates # Attempt 1 = UTA From 6c77ad830048cf703909495f51d921a1c4aa5407 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 15 Mar 2019 14:48:07 +0000 Subject: [PATCH 047/223] More tidying, including the whitespace removal becoming a function in the variant object --- VariantValidator/modules/variant.py | 7 +++++++ VariantValidator/modules/vvMixinCore.py | 23 +++++++++-------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 0b5f839c..6755a070 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -42,3 +42,10 @@ def get_non_ascii(self): return chars, positions + def remove_whitespace(self): + """ + Will remove all whitespace from quibble + :return: + """ + self.quibble = ''.join(self.quibble.split()) + diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 5441e212..fd4af46a 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -180,13 +180,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # Remove whitespace - ws = copy.copy(input) - input = input.strip() - input = ''.join(input.split()) - if input != ws: - caution = 'Whitespace removed from variant description ' + str(ws) + my_variant.remove_whitespace() + if my_variant.quibble != my_variant.original: + caution = 'Whitespace removed from variant description %s' % my_variant.original validation['warnings'] = validation['warnings'] + ': ' + caution logger.info(caution) + stash_input = copy.copy(input) # Set the primary_assembly if validation['primary_assembly'] == 'false': @@ -202,21 +201,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr selected_assembly = selected_assembly.replace('H', 'h') primary_assembly = selected_assembly # Catch invalid genome build - valid_build = False - for genome_build in self.genome_builds: - if primary_assembly == genome_build: - valid_build = True - if valid_build is False: + if primary_assembly in self.genome_builds: + validation['primary_assembly'] = primary_assembly + else: primary_assembly = 'GRCh38' - validation['warnings'] = validation[ - 'warnings'] + ': Invalid genome build has been specified. Automap has selected the default build (GRCh38)' + validation['warnings'] = validation['warnings'] + ': Invalid genome build has been specified. Automap has selected the default build (GRCh38)' logger.warning( 'Invalid genome build has been specified. Automap has selected the default build ' + primary_assembly) - else: - validation['primary_assembly'] = primary_assembly else: primary_assembly = validation['primary_assembly'] logger.trace("Completed string formatting", validation) + # Set variables that batch will not use but are required crossing = 'false' boundary = 'false' From 61afd55ab3ea69a9ef079bd736923988892ec065 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 15 Mar 2019 15:27:18 +0000 Subject: [PATCH 048/223] Set batch_list to be a part of the validator obj: --- VariantValidator/modules/vvMixinCore.py | 41 ++++++++++++------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index fd4af46a..5ef53280 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -113,20 +113,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr batch_queries = batch_variant.split('|') # Turn each variant into a dictionary. The dictionary will be compiled during validation - batch_list = [] + self.batch_list = [] for queries in batch_queries: queries = queries.strip() query = {'quibble': queries, 'id': queries, 'warnings': '', 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': 'false', 'order': 'false'} - batch_list.append(query) + self.batch_list.append(query) # Create List to carry batch data batch_out = [] - # Ensure batch_list is pulled into the function so that it can be appended to - batch_list = batch_list - # Enter the validation loop ########################### # Allow order by input @@ -140,8 +137,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr flag : gene """ set_output_type_flag = 'warning' - logger.debug("Batch list length " + str(len(batch_list))) - for validation in batch_list: + logger.debug("Batch list length " + str(len(self.batch_list))) + for validation in self.batch_list: # Start timing logger.traceStart(validation) @@ -256,8 +253,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr queryB = {'quibble': input_B, 'id': validation['id'], 'warnings': validation['warnings'], 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(queryA) - batch_list.append(queryB) + self.batch_list.append(queryA) + self.batch_list.append(queryB) continue elif re.search(r'[-:]\d+[-:][-:][GATC]+', input) or re.search(r'[-:]\d+[-:][.][-:][GATC]+', input): input = input.replace(':', '-') @@ -456,7 +453,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'warnings': validation['warnings'], 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + self.batch_list.append(query) logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ query_a_symbol + ') in place of a valid reference sequence') else: @@ -512,7 +509,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'order': ordering} logger.resub( 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation. Resubmitting corrected version.') - batch_list.append(query) + self.batch_list.append(query) else: validation['warnings'] = validation[ 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + input + ' but also specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts @@ -632,7 +629,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + self.batch_list.append(query) logger.resub( 'Multiple ALT sequences detected. Auto-submitting all possible combinations.') continue @@ -801,7 +798,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} coding = 'intergenic' - batch_list.append(query) + self.batch_list.append(query) validation['write'] = 'false' continue except fn.alleleVariantError as e: @@ -2080,7 +2077,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} coding = 'intergenic' - batch_list.append(query) + self.batch_list.append(query) else: error = 'Mapping unavailable for RefSeqGene ' + formatted_variant + ' using alignment method = ' + alt_aln_method validation['warnings'] = validation['warnings'] + ': ' + str(error) @@ -3081,7 +3078,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'warnings': validation['warnings'], 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + self.batch_list.append(query) logger.warning("Continue reached when mapping transcript types to variants") # Call next description continue @@ -3406,7 +3403,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + self.batch_list.append(query) # Coding else: @@ -3520,7 +3517,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + self.batch_list.append(query) else: if pat_r.search(trapped_input): @@ -3587,7 +3584,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + self.batch_list.append(query) else: coding = self.coding(formatted_variant, self.hp) @@ -3649,7 +3646,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + self.batch_list.append(query) # If cck not true @@ -3757,7 +3754,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + self.batch_list.append(query) elif pat_g.search(input): pass @@ -3814,7 +3811,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - batch_list.append(query) + self.batch_list.append(query) # VALIDATION of intronic variants pre_valid = self.hp.parse_hgvs_variant(input) @@ -6340,7 +6337,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.trace("End of for loop") # order the rows # from operator import itemgetter - by_order = sorted(batch_list, key=itemgetter('order')) + by_order = sorted(self.batch_list, key=itemgetter('order')) for valid in by_order: if 'write' in list(valid.keys()): From 2bd5327469a1ea4d76085c84d7fdfc1cb8ab42ef Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 15 Mar 2019 16:15:33 +0000 Subject: [PATCH 049/223] Updated tests to reflect changes in validator database field lengths --- test/test_inputs.py | 56 ++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/test/test_inputs.py b/test/test_inputs.py index 42bfdbc0..cbe28d8a 100644 --- a/test/test_inputs.py +++ b/test/test_inputs.py @@ -5236,7 +5236,7 @@ def test_variant164(self): assert results['NM_001243246.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001243246.1:c.2073G>A']['alt_genomic_loci'], []) assert results['NM_001243246.1:c.2073G>A']['gene_symbol'] == 'P3H1' - assert results['NM_001243246.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230175.1:p.(Ala691=)', 'slr': 'NP_001230175.1:p.(A691=)'} + assert results['NM_001243246.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230175.1(LRG_5p3):p.(Ala691=)', 'slr': 'NP_001230175.1:p.(A691=)'} assert results['NM_001243246.1:c.2073G>A']['submitted_variant'] == '1-43212925-C-T' assert results['NM_001243246.1:c.2073G>A']['genome_context_intronic_sequence'] == '' assert results['NM_001243246.1:c.2073G>A']['hgvs_lrg_variant'] == '' @@ -5253,7 +5253,7 @@ def test_variant164(self): assert results['NM_001146289.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001146289.1:c.2073G>A']['alt_genomic_loci'], []) assert results['NM_001146289.1:c.2073G>A']['gene_symbol'] == 'P3H1' - assert results['NM_001146289.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001139761.1:p.(Ala691=)', 'slr': 'NP_001139761.1:p.(A691=)'} + assert results['NM_001146289.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001139761.1(LRG_5p2):p.(Ala691=)', 'slr': 'NP_001139761.1:p.(A691=)'} assert results['NM_001146289.1:c.2073G>A']['submitted_variant'] == '1-43212925-C-T' assert results['NM_001146289.1:c.2073G>A']['genome_context_intronic_sequence'] == '' assert results['NM_001146289.1:c.2073G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' @@ -5450,7 +5450,7 @@ def test_variant168(self): assert results['NM_001282387.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001282387.1:c.394C>G']['alt_genomic_loci'], []) assert results['NM_001282387.1:c.394C>G']['gene_symbol'] == 'IDH1' - assert results['NM_001282387.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1:p.(Arg132Gly)', 'slr': 'NP_001269316.1:p.(R132G)'} + assert results['NM_001282387.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1(LRG_610p2):p.(Arg132Gly)', 'slr': 'NP_001269316.1:p.(R132G)'} assert results['NM_001282387.1:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' assert results['NM_001282387.1:c.394C>G']['genome_context_intronic_sequence'] == '' assert results['NM_001282387.1:c.394C>G']['hgvs_lrg_variant'] == '' @@ -5467,7 +5467,7 @@ def test_variant168(self): assert results['NM_001282387.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001282387.1:c.394C>A']['alt_genomic_loci'], []) assert results['NM_001282387.1:c.394C>A']['gene_symbol'] == 'IDH1' - assert results['NM_001282387.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1:p.(Arg132Ser)', 'slr': 'NP_001269316.1:p.(R132S)'} + assert results['NM_001282387.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1(LRG_610p2):p.(Arg132Ser)', 'slr': 'NP_001269316.1:p.(R132S)'} assert results['NM_001282387.1:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' assert results['NM_001282387.1:c.394C>A']['genome_context_intronic_sequence'] == '' assert results['NM_001282387.1:c.394C>A']['hgvs_lrg_variant'] == '' @@ -5501,7 +5501,7 @@ def test_variant168(self): assert results['NM_001282386.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001282386.1:c.394C>T']['alt_genomic_loci'], []) assert results['NM_001282386.1:c.394C>T']['gene_symbol'] == 'IDH1' - assert results['NM_001282386.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1:p.(Arg132Cys)', 'slr': 'NP_001269315.1:p.(R132C)'} + assert results['NM_001282386.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1(LRG_610p3):p.(Arg132Cys)', 'slr': 'NP_001269315.1:p.(R132C)'} assert results['NM_001282386.1:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' assert results['NM_001282386.1:c.394C>T']['genome_context_intronic_sequence'] == '' assert results['NM_001282386.1:c.394C>T']['hgvs_lrg_variant'] == '' @@ -5570,7 +5570,7 @@ def test_variant168(self): assert results['NM_001282387.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001282387.1:c.394C>T']['alt_genomic_loci'], []) assert results['NM_001282387.1:c.394C>T']['gene_symbol'] == 'IDH1' - assert results['NM_001282387.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1:p.(Arg132Cys)', 'slr': 'NP_001269316.1:p.(R132C)'} + assert results['NM_001282387.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1(LRG_610p2):p.(Arg132Cys)', 'slr': 'NP_001269316.1:p.(R132C)'} assert results['NM_001282387.1:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' assert results['NM_001282387.1:c.394C>T']['genome_context_intronic_sequence'] == '' assert results['NM_001282387.1:c.394C>T']['hgvs_lrg_variant'] == '' @@ -5587,7 +5587,7 @@ def test_variant168(self): assert results['NM_001282386.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001282386.1:c.394C>G']['alt_genomic_loci'], []) assert results['NM_001282386.1:c.394C>G']['gene_symbol'] == 'IDH1' - assert results['NM_001282386.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1:p.(Arg132Gly)', 'slr': 'NP_001269315.1:p.(R132G)'} + assert results['NM_001282386.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1(LRG_610p3):p.(Arg132Gly)', 'slr': 'NP_001269315.1:p.(R132G)'} assert results['NM_001282386.1:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' assert results['NM_001282386.1:c.394C>G']['genome_context_intronic_sequence'] == '' assert results['NM_001282386.1:c.394C>G']['hgvs_lrg_variant'] == '' @@ -5621,7 +5621,7 @@ def test_variant168(self): assert results['NM_001282386.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001282386.1:c.394C>A']['alt_genomic_loci'], []) assert results['NM_001282386.1:c.394C>A']['gene_symbol'] == 'IDH1' - assert results['NM_001282386.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1:p.(Arg132Ser)', 'slr': 'NP_001269315.1:p.(R132S)'} + assert results['NM_001282386.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1(LRG_610p3):p.(Arg132Ser)', 'slr': 'NP_001269315.1:p.(R132S)'} assert results['NM_001282386.1:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' assert results['NM_001282386.1:c.394C>A']['genome_context_intronic_sequence'] == '' assert results['NM_001282386.1:c.394C>A']['hgvs_lrg_variant'] == '' @@ -7212,7 +7212,7 @@ def test_variant201(self): assert results['NM_001243766.1:c.1869+31_1869+34del']['refseqgene_context_intronic_sequence'] == 'NG_009205.2(NM_001243766.1):c.1869+31_1869+34del' self.assertCountEqual(results['NM_001243766.1:c.1869+31_1869+34del']['alt_genomic_loci'], []) assert results['NM_001243766.1:c.1869+31_1869+34del']['gene_symbol'] == 'POMGNT1' - assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230695.1:p.?', 'slr': 'NP_001230695.1:p.?'} + assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230695.1(LRG_701p1):p.?', 'slr': 'NP_001230695.1:p.?'} assert results['NM_001243766.1:c.1869+31_1869+34del']['submitted_variant'] == '1-46655125-CTCAC-C' assert results['NM_001243766.1:c.1869+31_1869+34del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001243766.1):c.1869+31_1869+34del' assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_lrg_variant'] == 'LRG_701:g.35853_35856del' @@ -7800,7 +7800,7 @@ def test_variant212(self): assert results['NM_001126049.1:c.-794_-792del']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126049.1:c.-794_-792del']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'HG2334_PATCH', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'NW_013171807.1', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}]) assert results['NM_001126049.1:c.-794_-792del']['gene_symbol'] == 'KLLN' - assert results['NM_001126049.1:c.-794_-792del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119521.1:p.?', 'slr': 'NP_001119521.1:p.?'} + assert results['NM_001126049.1:c.-794_-792del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119521.1(LRG_1087p1):p.?', 'slr': 'NP_001119521.1:p.?'} assert results['NM_001126049.1:c.-794_-792del']['submitted_variant'] == '10-89623035-CGCA-C' assert results['NM_001126049.1:c.-794_-792del']['genome_context_intronic_sequence'] == '' assert results['NM_001126049.1:c.-794_-792del']['hgvs_lrg_variant'] == 'LRG_1087:g.5157_5159del' @@ -7891,7 +7891,7 @@ def test_variant213(self): assert results['NM_001122955.3:c.1376G>T']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001122955.3:c.1376G>T']['alt_genomic_loci'], []) assert results['NM_001122955.3:c.1376G>T']['gene_symbol'] == 'BSCL2' - assert results['NM_001122955.3:c.1376G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001116427.1:p.(Cys459Phe)', 'slr': 'NP_001116427.1:p.(C459F)'} + assert results['NM_001122955.3:c.1376G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001116427.1(LRG_235p1):p.(Cys459Phe)', 'slr': 'NP_001116427.1:p.(C459F)'} assert results['NM_001122955.3:c.1376G>T']['submitted_variant'] == '11-62457852-C-A' assert results['NM_001122955.3:c.1376G>T']['genome_context_intronic_sequence'] == '' assert results['NM_001122955.3:c.1376G>T']['hgvs_lrg_variant'] == 'LRG_235:g.24195G>T' @@ -11081,7 +11081,7 @@ def test_variant251(self): assert results['NM_001126113.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126113.2:c.652_654del']['alt_genomic_loci'], []) assert results['NM_001126113.2:c.652_654del']['gene_symbol'] == 'TP53' - assert results['NM_001126113.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1:p.(Val218del)', 'slr': 'NP_001119585.1:p.(V218del)'} + assert results['NM_001126113.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1(LRG_321p4):p.(Val218del)', 'slr': 'NP_001119585.1:p.(V218del)'} assert results['NM_001126113.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' assert results['NM_001126113.2:c.652_654del']['genome_context_intronic_sequence'] == '' assert results['NM_001126113.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' @@ -11098,7 +11098,7 @@ def test_variant251(self): assert results['NM_001126118.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126118.1:c.535_537del']['alt_genomic_loci'], []) assert results['NM_001126118.1:c.535_537del']['gene_symbol'] == 'TP53' - assert results['NM_001126118.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1:p.(Val179del)', 'slr': 'NP_001119590.1:p.(V179del)'} + assert results['NM_001126118.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1(LRG_321p8):p.(Val179del)', 'slr': 'NP_001119590.1:p.(V179del)'} assert results['NM_001126118.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' assert results['NM_001126118.1:c.535_537del']['genome_context_intronic_sequence'] == '' assert results['NM_001126118.1:c.535_537del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' @@ -11115,7 +11115,7 @@ def test_variant251(self): assert results['NM_001126116.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126116.1:c.256_258del']['alt_genomic_loci'], []) assert results['NM_001126116.1:c.256_258del']['gene_symbol'] == 'TP53' - assert results['NM_001126116.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1:p.(Val86del)', 'slr': 'NP_001119588.1:p.(V86del)'} + assert results['NM_001126116.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1(LRG_321p6):p.(Val86del)', 'slr': 'NP_001119588.1:p.(V86del)'} assert results['NM_001126116.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' assert results['NM_001126116.1:c.256_258del']['genome_context_intronic_sequence'] == '' assert results['NM_001126116.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' @@ -11132,7 +11132,7 @@ def test_variant251(self): assert results['NM_001126117.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126117.1:c.256_258del']['alt_genomic_loci'], []) assert results['NM_001126117.1:c.256_258del']['gene_symbol'] == 'TP53' - assert results['NM_001126117.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1:p.(Val86del)', 'slr': 'NP_001119589.1:p.(V86del)'} + assert results['NM_001126117.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1(LRG_321p7):p.(Val86del)', 'slr': 'NP_001119589.1:p.(V86del)'} assert results['NM_001126117.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' assert results['NM_001126117.1:c.256_258del']['genome_context_intronic_sequence'] == '' assert results['NM_001126117.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' @@ -11235,7 +11235,7 @@ def test_variant251(self): assert results['NM_001126115.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126115.1:c.256_258del']['alt_genomic_loci'], []) assert results['NM_001126115.1:c.256_258del']['gene_symbol'] == 'TP53' - assert results['NM_001126115.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1:p.(Val86del)', 'slr': 'NP_001119587.1:p.(V86del)'} + assert results['NM_001126115.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1(LRG_321p5):p.(Val86del)', 'slr': 'NP_001119587.1:p.(V86del)'} assert results['NM_001126115.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' assert results['NM_001126115.1:c.256_258del']['genome_context_intronic_sequence'] == '' assert results['NM_001126115.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' @@ -11252,7 +11252,7 @@ def test_variant251(self): assert results['NM_001126114.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126114.2:c.652_654del']['alt_genomic_loci'], []) assert results['NM_001126114.2:c.652_654del']['gene_symbol'] == 'TP53' - assert results['NM_001126114.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1:p.(Val218del)', 'slr': 'NP_001119586.1:p.(V218del)'} + assert results['NM_001126114.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1(LRG_321p3):p.(Val218del)', 'slr': 'NP_001119586.1:p.(V218del)'} assert results['NM_001126114.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' assert results['NM_001126114.2:c.652_654del']['genome_context_intronic_sequence'] == '' assert results['NM_001126114.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' @@ -11343,7 +11343,7 @@ def test_variant252(self): assert results['NM_001126118.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126118.1:c.289dup']['alt_genomic_loci'], []) assert results['NM_001126118.1:c.289dup']['gene_symbol'] == 'TP53' - assert results['NM_001126118.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001119590.1:p.(Q97Pfs*13)'} + assert results['NM_001126118.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1(LRG_321p8):p.(Gln97ProfsTer13)', 'slr': 'NP_001119590.1:p.(Q97Pfs*13)'} assert results['NM_001126118.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' assert results['NM_001126118.1:c.289dup']['genome_context_intronic_sequence'] == '' assert results['NM_001126118.1:c.289dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' @@ -11394,7 +11394,7 @@ def test_variant252(self): assert results['NM_001126115.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126115.1:c.10dup']['alt_genomic_loci'], []) assert results['NM_001126115.1:c.10dup']['gene_symbol'] == 'TP53' - assert results['NM_001126115.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1:p.(Gln4ProfsTer13)', 'slr': 'NP_001119587.1:p.(Q4Pfs*13)'} + assert results['NM_001126115.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1(LRG_321p5):p.(Gln4ProfsTer13)', 'slr': 'NP_001119587.1:p.(Q4Pfs*13)'} assert results['NM_001126115.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' assert results['NM_001126115.1:c.10dup']['genome_context_intronic_sequence'] == '' assert results['NM_001126115.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' @@ -11428,7 +11428,7 @@ def test_variant252(self): assert results['NM_001126117.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126117.1:c.10dup']['alt_genomic_loci'], []) assert results['NM_001126117.1:c.10dup']['gene_symbol'] == 'TP53' - assert results['NM_001126117.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1:p.(Gln4ProfsTer13)', 'slr': 'NP_001119589.1:p.(Q4Pfs*13)'} + assert results['NM_001126117.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1(LRG_321p7):p.(Gln4ProfsTer13)', 'slr': 'NP_001119589.1:p.(Q4Pfs*13)'} assert results['NM_001126117.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' assert results['NM_001126117.1:c.10dup']['genome_context_intronic_sequence'] == '' assert results['NM_001126117.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' @@ -11514,7 +11514,7 @@ def test_variant252(self): assert results['NM_001126113.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126113.2:c.406dup']['alt_genomic_loci'], []) assert results['NM_001126113.2:c.406dup']['gene_symbol'] == 'TP53' - assert results['NM_001126113.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1:p.(Gln136ProfsTer13)', 'slr': 'NP_001119585.1:p.(Q136Pfs*13)'} + assert results['NM_001126113.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1(LRG_321p4):p.(Gln136ProfsTer13)', 'slr': 'NP_001119585.1:p.(Q136Pfs*13)'} assert results['NM_001126113.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' assert results['NM_001126113.2:c.406dup']['genome_context_intronic_sequence'] == '' assert results['NM_001126113.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' @@ -11531,7 +11531,7 @@ def test_variant252(self): assert results['NM_001126116.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126116.1:c.10dup']['alt_genomic_loci'], []) assert results['NM_001126116.1:c.10dup']['gene_symbol'] == 'TP53' - assert results['NM_001126116.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1:p.(Gln4ProfsTer13)', 'slr': 'NP_001119588.1:p.(Q4Pfs*13)'} + assert results['NM_001126116.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1(LRG_321p6):p.(Gln4ProfsTer13)', 'slr': 'NP_001119588.1:p.(Q4Pfs*13)'} assert results['NM_001126116.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' assert results['NM_001126116.1:c.10dup']['genome_context_intronic_sequence'] == '' assert results['NM_001126116.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' @@ -11565,7 +11565,7 @@ def test_variant252(self): assert results['NM_001126114.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001126114.2:c.406dup']['alt_genomic_loci'], []) assert results['NM_001126114.2:c.406dup']['gene_symbol'] == 'TP53' - assert results['NM_001126114.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1:p.(Gln136ProfsTer13)', 'slr': 'NP_001119586.1:p.(Q136Pfs*13)'} + assert results['NM_001126114.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1(LRG_321p3):p.(Gln136ProfsTer13)', 'slr': 'NP_001119586.1:p.(Q136Pfs*13)'} assert results['NM_001126114.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' assert results['NM_001126114.2:c.406dup']['genome_context_intronic_sequence'] == '' assert results['NM_001126114.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' @@ -13213,7 +13213,7 @@ def test_variant269(self): assert results['NM_001130987.1:c.3678C>G']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001130987.1:c.3678C>G']['alt_genomic_loci'], []) assert results['NM_001130987.1:c.3678C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130987.1:c.3678C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124459.1:p.(Ile1226Met)', 'slr': 'NP_001124459.1:p.(I1226M)'} + assert results['NM_001130987.1:c.3678C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124459.1(LRG_845p2):p.(Ile1226Met)', 'slr': 'NP_001124459.1:p.(I1226M)'} assert results['NM_001130987.1:c.3678C>G']['submitted_variant'] == '2-71825797-C-G' assert results['NM_001130987.1:c.3678C>G']['genome_context_intronic_sequence'] == '' assert results['NM_001130987.1:c.3678C>G']['hgvs_lrg_variant'] == '' @@ -14251,7 +14251,7 @@ def test_variant274(self): assert results['NM_001267550.1:c.106974C>A']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001267550.1:c.106974C>A']['alt_genomic_loci'], []) assert results['NM_001267550.1:c.106974C>A']['gene_symbol'] == 'TTN' - assert results['NM_001267550.1:c.106974C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001254479.1:p.(Ser35658Arg)', 'slr': 'NP_001254479.1:p.(S35658R)'} + assert results['NM_001267550.1:c.106974C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001254479.1(LRG_391p1):p.(Ser35658Arg)', 'slr': 'NP_001254479.1:p.(S35658R)'} assert results['NM_001267550.1:c.106974C>A']['submitted_variant'] == '2-179393504-G-T' assert results['NM_001267550.1:c.106974C>A']['genome_context_intronic_sequence'] == '' assert results['NM_001267550.1:c.106974C>A']['hgvs_lrg_variant'] == 'LRG_391:g.307026C>A' @@ -15332,7 +15332,7 @@ def test_variant285(self): assert results['NM_001349798.2:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001349798.2:c.45_46insCCT']['alt_genomic_loci'], []) assert results['NM_001349798.2:c.45_46insCCT']['gene_symbol'] == 'FBXW7' - assert results['NM_001349798.2:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001336727.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_001336727.1:p.(T15_G16insP)'} + assert results['NM_001349798.2:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001336727.1(LRG_1141p1):p.(Thr15_Gly16insPro)', 'slr': 'NP_001336727.1:p.(T15_G16insP)'} assert results['NM_001349798.2:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' assert results['NM_001349798.2:c.45_46insCCT']['genome_context_intronic_sequence'] == '' assert results['NM_001349798.2:c.45_46insCCT']['hgvs_lrg_variant'] == '' @@ -17515,7 +17515,7 @@ def test_variant320(self): assert results['NM_001097642.2:c.-16-441C>T']['refseqgene_context_intronic_sequence'] == 'NG_008357.1(NM_001097642.2):c.-16-441C>T' self.assertCountEqual(results['NM_001097642.2:c.-16-441C>T']['alt_genomic_loci'], []) assert results['NM_001097642.2:c.-16-441C>T']['gene_symbol'] == 'GJB1' - assert results['NM_001097642.2:c.-16-441C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001091111.1:p.?', 'slr': 'NP_001091111.1:p.?'} + assert results['NM_001097642.2:c.-16-441C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001091111.1(LRG_245p1):p.?', 'slr': 'NP_001091111.1:p.?'} assert results['NM_001097642.2:c.-16-441C>T']['submitted_variant'] == 'X-70443101-C-T' assert results['NM_001097642.2:c.-16-441C>T']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_001097642.2):c.-16-441C>T' assert results['NM_001097642.2:c.-16-441C>T']['hgvs_lrg_variant'] == 'LRG_245:g.13040C>T' @@ -17665,7 +17665,7 @@ def test_variant322(self): assert results['NM_001110792.1:c.538C>T']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001110792.1:c.538C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}]) assert results['NM_001110792.1:c.538C>T']['gene_symbol'] == 'MECP2' - assert results['NM_001110792.1:c.538C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001104262.1:p.(Arg180Ter)', 'slr': 'NP_001104262.1:p.(R180*)'} + assert results['NM_001110792.1:c.538C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001104262.1(LRG_764p1):p.(Arg180Ter)', 'slr': 'NP_001104262.1:p.(R180*)'} assert results['NM_001110792.1:c.538C>T']['submitted_variant'] == 'X-153296777-G-A' assert results['NM_001110792.1:c.538C>T']['genome_context_intronic_sequence'] == '' assert results['NM_001110792.1:c.538C>T']['hgvs_lrg_variant'] == 'LRG_764:g.110802C>T' From 049b7d6bf80fd7497cbd19b8d5c2302c87d0bdb4 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 18 Mar 2019 11:38:20 +0000 Subject: [PATCH 050/223] Replaced validation dictionary with variant object --- VariantValidator/modules/variant.py | 23 +- VariantValidator/modules/vvLogging.py | 28 +- VariantValidator/modules/vvMixinCore.py | 3680 +++++++++++------------ 3 files changed, 1840 insertions(+), 1891 deletions(-) diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 6755a070..fec081b4 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -8,11 +8,30 @@ class Variant(object): relevant to what kind of variant it is. """ - def __init__(self, original): + def __init__(self, original, quibble=None, warnings='', write=True, primary_assembly=False, order=False): self.original = original - self.quibble = original + if quibble is None: + self.quibble = original + else: + self.quibble = quibble self.hgvs_formatted = original + self.warnings = warnings + self.description = '' + self.coding = '' + self.coding_g = '' + self.genomic_r = '' + self.genomic_g = '' + self.protein = '' + self.write = write + self.primary_assembly = primary_assembly + self.order = order + + self.test_stash_tx_left = None + self.test_stash_tx_right = None + + self.timing = {} + def is_ascii(self): """ Instead of the previous test for unicode rich text characters. diff --git a/VariantValidator/modules/vvLogging.py b/VariantValidator/modules/vvLogging.py index 1728e809..b7a3d7c3 100644 --- a/VariantValidator/modules/vvLogging.py +++ b/VariantValidator/modules/vvLogging.py @@ -91,8 +91,8 @@ def critical(s): logger.loggingSetup() logger.logger.critical("CRIT : "+s) @staticmethod - def trace(s,v=None): - #v should be a dictionary with a 'timing' key. + def trace(s, v=None): + #v should be a variant object with a 'timing' attribute. #global VALIDATOR_DEBUG #print(VALIDATOR_DEBUG) #if "trace" in VALIDATOR_DEBUG: @@ -101,9 +101,9 @@ def trace(s,v=None): logger.logger.debug("TRACE: "+s) else: logger.logger.debug("TRACE: "+s) - v['timing']['traceLabels'].append(s) - v['timing']['traceTimes'].append(str((datetime.datetime.now()-v['timing']['checkDT']).microseconds//1000)) - v['timing']['checkDT']=datetime.datetime.now() + v.timing['traceLabels'].append(s) + v.timing['traceTimes'].append(str((datetime.datetime.now() - v.timing['checkDT']).microseconds//1000)) + v.timing['checkDT'] = datetime.datetime.now() @staticmethod def resub(s): #Resubmit one or multiple variants @@ -121,21 +121,21 @@ def traceStart(v): # global VALIDATOR_DEBUG # if "trace" in VALIDATOR_DEBUG: if True: - v['timing']={} - v['timing']['traceLabels']=[] - v['timing']['traceTimes']=[] - v['timing']['startDT']=datetime.datetime.now() - v['timing']['checkDT']=datetime.datetime.now() + v.timing = {} + v.timing['traceLabels'] = [] + v.timing['traceTimes'] = [] + v.timing['startDT'] = datetime.datetime.now() + v.timing['checkDT'] = datetime.datetime.now() @staticmethod def traceEnd(v): logger.loggingSetup() #global VALIDATOR_DEBUG #if "trace" in VALIDATOR_DEBUG: if True: - v['timing']['traceLabels'].append("complete") - v['timing']['traceTimes'].append((datetime.datetime.now()-v['timing']['startDT']).microseconds//1000) - del v['timing']['startDT'] - del v['timing']['checkDT'] + v.timing['traceLabels'].append("complete") + v.timing['traceTimes'].append((datetime.datetime.now() - v.timing['startDT']).microseconds//1000) + del v.timing['startDT'] + del v.timing['checkDT'] #Test #logger.debug("Message D") diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 5ef53280..b157d294 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -116,9 +116,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr self.batch_list = [] for queries in batch_queries: queries = queries.strip() - query = {'quibble': queries, 'id': queries, 'warnings': '', 'description': '', 'coding': '', 'coding_g': '', - 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', 'primary_assembly': 'false', - 'order': 'false'} + query = variant.Variant(queries) self.batch_list.append(query) # Create List to carry batch data @@ -138,9 +136,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr """ set_output_type_flag = 'warning' logger.debug("Batch list length " + str(len(self.batch_list))) - for validation in self.batch_list: + for my_variant in self.batch_list: # Start timing - logger.traceStart(validation) + logger.traceStart(my_variant) # Create Normalizers hn = hgvs.normalizer.Normalizer(self.hdp, @@ -155,24 +153,22 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ) # This will be used to order the final output - if str(validation['order']) == 'false': + if not my_variant.order: ordering = ordering + 1 - validation['order'] = ordering - - my_variant = variant.Variant(validation['id']) + my_variant.order = ordering # Bug catcher try: # Note, ID is not touched. It is always the input variant description. Quibble will be altered but id will not if type = g. - input = validation['quibble'] - logger.trace("Commenced validation of " + str(input), validation) + input = my_variant.quibble + logger.trace("Commenced validation of " + str(my_variant.quibble), my_variant) if not my_variant.is_ascii(): chars, positions = my_variant.get_non_ascii() error = 'Submitted variant description contains an invalid character(s) %s at position(s) %s: '\ 'Please remove this character and re-submit: A useful search function for ' \ 'Unicode characters can be found at https://unicode-search.net/' % (chars, positions) - validation['warnings'] = validation['warnings'] + ': ' + error + my_variant.warnings += ': ' + error logger.warning(error) continue @@ -180,12 +176,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr my_variant.remove_whitespace() if my_variant.quibble != my_variant.original: caution = 'Whitespace removed from variant description %s' % my_variant.original - validation['warnings'] = validation['warnings'] + ': ' + caution + my_variant.warnings += ': ' + caution logger.info(caution) stash_input = copy.copy(input) # Set the primary_assembly - if validation['primary_assembly'] == 'false': + if not my_variant.primary_assembly: if selected_assembly == 'hg19': primary_assembly = 'GRCh37' elif selected_assembly == 'hg38': @@ -199,15 +195,15 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr primary_assembly = selected_assembly # Catch invalid genome build if primary_assembly in self.genome_builds: - validation['primary_assembly'] = primary_assembly + my_variant.primary_assembly = primary_assembly else: primary_assembly = 'GRCh38' - validation['warnings'] = validation['warnings'] + ': Invalid genome build has been specified. Automap has selected the default build (GRCh38)' + my_variant.warnings += ': Invalid genome build has been specified. Automap has selected the default build (GRCh38)' logger.warning( 'Invalid genome build has been specified. Automap has selected the default build ' + primary_assembly) else: - primary_assembly = validation['primary_assembly'] - logger.trace("Completed string formatting", validation) + primary_assembly = my_variant.primary_assembly + logger.trace("Completed string formatting", my_variant) # Set variables that batch will not use but are required crossing = 'false' @@ -238,21 +234,16 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input = '-'.join(in_list[1:]) pre_input = copy.deepcopy(input) vcf_elements = pre_input.split('-') - validation[ - 'warnings'] = 'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF' - validation['warnings'] = validation['warnings'] + ': VariantValidator has output both alternatives' + my_variant.warnings = 'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF' + my_variant.warnings += ': VariantValidator has output both alternatives' logger.resub('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF. Validator will output both alternatives.') - validation['write'] = 'false' + my_variant.write = False input_A = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], 'del') input_B = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) - queryA = {'quibble': input_A, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} - queryB = {'quibble': input_B, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} + queryA = variant.Variant(my_variant.original, quibble=input_A, warnings=my_variant.warnings, primary_assembly=primary_assembly, order=ordering) + queryB = variant.Variant(my_variant.original, quibble=input_B, warnings=my_variant.warnings, primary_assembly=primary_assembly, order=ordering) self.batch_list.append(queryA) self.batch_list.append(queryB) continue @@ -271,7 +262,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr vcf_elements = pre_input.split('-') input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) stash_input = input - logger.trace("Completed VCF-HVGS step 1", validation) + logger.trace("Completed VCF-HVGS step 1", my_variant) # API type non-HGVS # e.g. Chr16:2099572TC>T """ @@ -307,8 +298,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Use selected assembly accession = vvChromosomes.to_accession(chr_num, selected_assembly) if accession is None: - validation['warnings'] = validation[ - 'warnings'] + ': ' + chr_num + \ + my_variant.warnings += ': ' + chr_num + \ ' is not part of genome build ' + selected_assembly logger.warning(chr_num + ' is not part of genome build ' + selected_assembly) continue @@ -347,18 +337,18 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input = str(accession) + ref_type + str(positionAndEdit) stash_input = input except: - fn.exceptPass(validation) + fn.exceptPass(my_variant) # Descriptions lacking the colon : if re.search(r'[gcnmrp]\.', input) and not re.search(r':[gcnmrp]\.', input): error = 'Unable to identify a colon (:) in the variant description %s. A colon is required in HGVS variant descriptions to separate the reference accession from the reference type i.e. :. e.g. :c.' % ( input) - validation['warnings'] = validation['warnings'] + ': ' + error + my_variant.warnings += ': ' + error logger.warning(error) continue # Ambiguous chr reference - logger.trace("Completed VCF-HVGS step 2", validation) + logger.trace("Completed VCF-HVGS step 2", my_variant) """ VCF2HGVS conversion step 3 is similar to step 2 but handles formats like Chr16:g.2099572TC>T which are provided by Alamut and other @@ -397,7 +387,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr chr_num = chr_num.replace('CHR', '') # Use selected assembly accession = vvChromosomes.to_accession(chr_num, selected_assembly) if accession is None: - validation['warnings'] = validation['warnings'] + ': ' + chr_num + \ + my_variant.warnings += ': ' + chr_num + \ ' is not part of genome build ' + selected_assembly continue input = str(accession) + ':' + str(positionAndEdit) @@ -413,7 +403,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.debug(er) # GENE_SYMBOL:c. n. types - logger.trace("Completed VCF-HGVS step 3", validation) + logger.trace("Completed VCF-HGVS step 3", my_variant) """ Searches for gene symbols that have been used as reference sequence identifiers. Provides a sufficiently repremanding warning, but also provides @@ -443,22 +433,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue select_from_these_transcripts = '|'.join(list(select_from_these_transcripts.keys())) if select_transcripts != 'all': - validation['write'] = 'false' + my_variant.write = False for transcript in list(select_transcripts_dict_plus_version.keys()): - validation[ - 'warnings'] = 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + my_variant.warnings = 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ query_a_symbol + ') in place of a valid reference sequence' refreshed_description = transcript + ':' + tx_edit - query = {'quibble': refreshed_description, 'id': validation['id'], - 'warnings': validation['warnings'], 'description': '', 'coding': '', - 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', - 'write': 'true', 'primary_assembly': primary_assembly, 'order': ordering} + query = variant.Variant(my_variant.original, quibble=refreshed_description, warnings=my_variant.warnings, primary_assembly=primary_assembly, order=ordering) self.batch_list.append(query) logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ query_a_symbol + ') in place of a valid reference sequence') else: - validation['warnings'] = validation['warnings'] + \ - ': ' + 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + my_variant.warnings += ': ' + 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + input + \ ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ @@ -469,7 +454,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass except: fn.exceptPass() - logger.trace("Gene symbol reference catching complete", validation) + logger.trace("Gene symbol reference catching complete", my_variant) # NG_:c. or NC_:c. """ @@ -497,22 +482,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue select_from_these_transcripts = '|'.join(list(select_from_these_transcripts.keys())) if select_transcripts != 'all': - validation['write'] = 'false' + my_variant.write = False for transcript in list(select_transcripts_dict_plus_version.keys()): - validation[ - 'warnings'] = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' + my_variant.warnings = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' refreshed_description = refSeqGeneID + '(' + transcript + ')' + ':' + tx_edit - query = {'quibble': refreshed_description, 'id': validation['id'], - 'warnings': validation['warnings'], 'description': '', 'coding': '', - 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', - 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} + query = variant.Variant(my_variant.original, quibble=refreshed_description, warnings=my_variant.warnings, primary_assembly=primary_assembly, order=ordering) + logger.resub( 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation. Resubmitting corrected version.') self.batch_list.append(query) else: - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + input + ' but also specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts + my_variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + input + ' but also specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts logger.warning( + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + str( @@ -520,14 +500,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr select_from_these_transcripts)) continue else: - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation' + my_variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation' logger.warning( 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation') continue elif re.match('^NC_', input): - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified' + my_variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified' logger.warning( 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified') continue @@ -536,7 +514,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except: fn.exceptPass() - logger.trace("Chromosomal/RefSeqGene reference catching complete", validation) + logger.trace("Chromosomal/RefSeqGene reference catching complete", my_variant) # Find not_sub type in input e.g. GGGG>G """ VCF2HGVS conversion step 4 has two purposes @@ -620,15 +598,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr alt_list = alts.split(',') # Assemble and re-submit for alt in alt_list: - validation[ - 'warnings'] = 'Multiple ALT sequences detected: auto-submitting all possible combinations' - validation['write'] = 'false' + my_variant.warnings = 'Multiple ALT sequences detected: auto-submitting all possible combinations' + my_variant.write = False refreshed_description = header + '>' + alt - query = {'quibble': refreshed_description, 'id': validation['id'], - 'warnings': validation['warnings'], 'description': '', 'coding': '', - 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', - 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} + query = variant.Variant(my_variant.original, quibble=refreshed_description, warnings=my_variant.warnings, primary_assembly=primary_assembly, order=ordering) + self.batch_list.append(query) logger.resub( 'Multiple ALT sequences detected. Auto-submitting all possible combinations.') @@ -636,7 +610,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = str(e) issue_link = '' - validation['warnings'] = validation['warnings'] + ': ' + error + my_variant.warnings += ': ' + error logger.warning(str(e)) continue @@ -650,13 +624,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr not_delins = not_delins else: issue_link = '' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(e)) continue # Create warning caution = 'Variant description ' + input + ' is not HGVS compliant' automap = input + ' automapped to ' + not_delins - validation['warnings'] = validation['warnings'] + ': ' + automap + my_variant.warnings += ': ' + automap # Change input to normalized variant input = not_delins else: @@ -665,7 +639,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass() else: pass - logger.trace("Completed VCF-HVGS step 4", validation) + logger.trace("Completed VCF-HVGS step 4", my_variant) # Tackle edit1234 type """ @@ -701,7 +675,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search(r'ins$', failed): issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' error = error + ' please refer to ' + issue_link - validation['warnings'] = validation['warnings'] + error + my_variant.warnings += error logger.warning(str(error) + " " + str(e)) continue @@ -709,11 +683,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr failed = str(hgvs_failed) hgvs_failed = self.hp.parse_hgvs_variant(failed) automap = 'Non HGVS compliant variant description ' + input + ' automapped to ' + failed - validation['warnings'] = validation['warnings'] + ': ' + automap + my_variant.warnings += ': ' + automap logger.warning(automap) input = failed - logger.trace("Ins/Del reference catching complete", validation) + logger.trace("Ins/Del reference catching complete", my_variant) # Tackle compound variant descriptions NG or NC (NM_) i.e. correctly input NG/NC_(NM_):c. """ Fully HGVS compliant intronic variant descriptions take the format e.g @@ -735,7 +709,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr transy = transy.group(1) transy = transy.replace(')', '') input = transy - logger.trace("HVGS typesetting complete", validation) + logger.trace("HVGS typesetting complete", my_variant) # Extract variants from HGVS allele descriptions # http://varnomen.hgvs.org/recommendations/DNA/variant/alleles/ """ @@ -769,7 +743,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation else: caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + my_variant.warnings += ': ' + str(caution) logger.warning(str(caution)) elif re.match(r'^LRG_\d+t\d+:c.', input) or re.match(r'^LRG_\d+t\d+:n.', input) or re.match( r'^LRG_\d+t\d+:p.', input) or re.match(r'^LRG_\d+t\d+:g.', input): @@ -782,39 +756,33 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation else: caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + my_variant.warnings += ': ' + str(caution) logger.warning(str(caution)) else: pass try: # Submit to allele extraction function alleles = self.hgvs_alleles(input,hn) - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'Automap has extracted possible variant descriptions' + my_variant.warnings += ': ' + 'Automap has extracted possible variant descriptions' logger.resub('Automap has extracted possible variant descriptions, resubmitting') for allele in alleles: - query = {'quibble': allele, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} + query = variant.Variant(my_variant.original, quibble=allele, warnings=my_variant.warnings, write=True, primary_assembly=my_variant.primary_assembly, order=ordering) coding = 'intergenic' self.batch_list.append(query) - validation['write'] = 'false' + my_variant.write = False continue except fn.alleleVariantError as e: if re.search("Cannot validate sequence of an intronic variant", str(e)): - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'Intronic positions not supported for HGVS Allele descriptions' + my_variant.warnings += ': ' + 'Intronic positions not supported for HGVS Allele descriptions' logger.warning('Intronic positions not supported for HGVS Allele descriptions') continue elif re.search("No transcript definition for ",str(e)): - validation['warnings'] = validation[ - 'warnings'] + ': ' + str(e) + my_variant.warnings += ': ' + str(e) logger.warning(str(e)) continue else: raise VariantValidatorError(str(e)) - logger.trace("HVGS String allele parsing pass 1 complete", validation) + logger.trace("HVGS String allele parsing pass 1 complete", my_variant) # INITIAL USER INPUT FORMATTING """ Removes whitespace from the ends of the string @@ -845,8 +813,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Variant description ' + input + ' lacks the . character between and in the expected pattern :.' else: error = 'Variant description ' + input + ' is not in an accepted format' - validation['warnings'] = validation[ - 'warnings'] + ': ' + error + my_variant.warnings += ': ' + error logger.warning(error) continue else: @@ -854,7 +821,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input = formatted['variant'] stash_input = formatted['variant'] format_type = formatted['type'] - logger.trace("Variant input formatted, proceeding to validate.", validation) + logger.trace("Variant input formatted, proceeding to validate.", my_variant) # Conversions """ Conversions are not currently supported. The HGVS format for conversions @@ -862,7 +829,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr """ conversion = re.compile('con') if conversion.search(formatted_variant): - validation['warnings'] = validation['warnings'] + ': ' + 'Gene conversions currently unsupported' + my_variant.warnings += ': ' + 'Gene conversions currently unsupported' logger.warning('Gene conversions currently unsupported') continue @@ -897,7 +864,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input = str(input_parses) pass else: - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(error) continue @@ -917,14 +884,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr break if re.match('^ENST', str(input_parses)): error = 'Unable to map ' + str(input_parses.ac) + ' to an equivalent RefSeq transcript' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: - validation['warnings'] = validation['warnings'] + ': ' + str( - trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + formatted_variant + my_variant.warnings += ': ' + str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + formatted_variant logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + formatted_variant) - logger.trace("HVGS acceptance test passed", validation) + logger.trace("HVGS acceptance test passed", my_variant) # Check whether supported genome build is requested for non g. descriptions historic_assembly = 'false' mapable_assemblies = { @@ -965,7 +931,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = 'Mapping of ' + formatted_variant + ' to genome assembly ' + primary_assembly + ' is not supported' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue # Catch interval end > interval start @@ -988,14 +954,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: if to_n.posedit.pos.end.base < to_n.posedit.pos.start.base: error = 'Interval end position < interval start position ' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue elif input_parses.posedit.pos.end.base < input_parses.posedit.pos.start.base: error = 'Interval end position ' + str( input_parses.posedit.pos.end.base) + ' < interval start position ' + str( input_parses.posedit.pos.start.base) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -1014,9 +980,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass if ref_type.search(str(input_parses)): error = 'RefSeq variant accession numbers MUST include a version number' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) continue - logger.trace("HVGS interval/version mapping complete", validation) + logger.trace("HVGS interval/version mapping complete", my_variant) # handle LRG inputs """ @@ -1046,7 +1012,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation else: caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + my_variant.warnings += ': ' + str(caution) logger.warning(str(caution)) elif re.match(r'^LRG_\d+t\d+:c.', str(input_parses)) or re.match(r'^LRG_\d+t\d+:n.', str(input_parses)) or re.match( @@ -1063,11 +1029,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation else: caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + my_variant.warnings += ': ' + str(caution) logger.warning(str(caution)) else: pass - logger.trace("LRG check for conversion to refseq completed", validation) + logger.trace("LRG check for conversion to refseq completed", my_variant) # Additional Incorrectly input variant capture training """ Evolving list of common mistakes, see sections below @@ -1076,21 +1042,21 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if (re.search(r'^NM_', formatted_variant) or re.search(r'^NR_', formatted_variant)) and re.search(r':g.', formatted_variant): suggestion = input.replace(':g.', ':c.') error = 'Transcript reference sequence input as genomic (g.) reference sequence. Did you mean ' + suggestion + '?' - validation['warnings'] = validation['warnings'] + ': ' + error + my_variant.warnings += ': ' + error logger.warning(error) continue # NR_ c. if re.search(r'^NR_', input) and re.search(r':c.', input): suggestion = input.replace(':c.', ':n.') error = 'Non-coding transcript reference sequence input as coding (c.) reference sequence. Did you mean ' + suggestion + '?' - validation['warnings'] = validation['warnings'] + ': ' + error + my_variant.warnings += ': ' + error logger.warning(error) continue # NM_ n. if re.search(r'^NM_', input) and re.search(r':n.', input): suggestion = input.replace(':n.', ':c.') error = 'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. Did you mean ' + suggestion + '?' - validation['warnings'] = validation['warnings'] + ': ' + error + my_variant.warnings += ': ' + error logger.warning(error) continue @@ -1099,7 +1065,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr r'^NG_', formatted_variant)) and re.search(r':p.', formatted_variant): issue_link = 'http://varnomen.hgvs.org/recommendations/protein/' error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' - validation['warnings'] = validation['warnings'] + ': ' + error + my_variant.warnings += ': ' + error logger.warning(error) continue @@ -1107,11 +1073,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if (re.search(r'^NG_', formatted_variant) or re.search(r'^NC_', formatted_variant)) and re.search(r':c.', formatted_variant): suggestion = ': For additional assistance, submit ' + str(formatted_variant) + ' to VariantValidator' error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' + suggestion - validation['warnings'] = validation['warnings'] + ': ' + error + my_variant.warnings += ': ' + error logger.warning(error) continue - logger.trace("Passed 'common mistakes' catcher", validation) + logger.trace("Passed 'common mistakes' catcher", my_variant) # Primary validation of the input """ An evolving set of variant structure and content searches which identify @@ -1127,19 +1093,19 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr pass else: error = 'Invalid reference sequence identifier (' + input_parses.ac + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(error) continue try: self.vr.validate(input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(error) continue except Exception as e: error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(error) continue # Additional test @@ -1147,7 +1113,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hn.normalize(input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(error) continue else: @@ -1167,13 +1133,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr to_n = evm.c_to_n(input_parses) except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(error) continue actual_ref = to_n.posedit.edit.ref if called_ref != actual_ref: error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(error) continue else: @@ -1234,7 +1200,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr report_gen) except Exception as e: fn.exceptPass() - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -1246,7 +1212,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input_parses = evm.c_to_n(input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(e)) continue @@ -1256,7 +1222,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_position = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, hn) error = error + fn.valstr(genomic_position) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -1318,8 +1284,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr fn.exceptPass() else: pass - validation['warnings'] = validation['warnings'] + ': ' + str( - error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -1336,8 +1301,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr acs = '; '.join(gens) error = 'Cannot map ' + fn.valstr( input_parses) + ' to a genomic position. ' + input_parses.ac + ' can only be partially aligned to genomic reference sequences ' + acs - validation['warnings'] = validation['warnings'] + ': ' + str( - error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -1356,11 +1320,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue elif re.search('insertion length must be 1', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue elif re.search('base start position must be <= end position', error): @@ -1371,7 +1335,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr correction.posedit.pos.end = st error = error + ': Did you mean ' + str(correction) + '?' # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -1390,7 +1354,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' else: error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue except ValueError as e: @@ -1399,7 +1363,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Interval start position ' + str( input_parses.posedit.pos.start) + ' > interval end position ' + str( input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue except hgvs.exceptions.HGVSInvalidVariantError as e: @@ -1414,12 +1378,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Interval start position ' + str( input_parses.posedit.pos.start) + ' > interval end position ' + str( input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -1427,7 +1391,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr evm.g_to_t(output, input_parses.ac) except hgvs.exceptions.HGVSError as e: error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -1435,7 +1399,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr self.vr.validate(output) except hgvs.exceptions.HGVSError as e: error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -1453,16 +1417,16 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('Length implied by coordinates', error): # Applies to del and inv # NOTE, there has been no normalization at all so this error is valid here - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) # Will apply to > del and inv if re.search('does not agree with reference sequence', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) # ensures x_y for insertions if re.search('insertion length must be 1', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) # Boundary issue if re.search('Variant coordinate is out of the bound of CDS region', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) """ # This catches errors in introns if re.search('base start position must be <= end position', error): @@ -1475,20 +1439,20 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Interval start position ' + str( input_parses.posedit.pos.start) + ' > interval end position ' + str( input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue except hgvs.exceptions.HGVSDataNotAvailableError as e: error = e - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('bounds', error): error = error + ' (' + input_parses.ac + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue fn.exceptPass() @@ -1510,7 +1474,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr actual_ref = to_n.posedit.edit.ref if called_ref != actual_ref: error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -1540,11 +1504,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr report_gen) except Exception as e: fn.exceptPass() - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -1553,7 +1517,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_position = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, hn) error = error + fn.valstr(genomic_position) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -1574,11 +1538,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue elif re.search('insertion length must be 1', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue elif re.search('base start position must be <= end position', error): @@ -1589,7 +1553,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr correction.posedit.pos.end = st error = error + ': Did you mean ' + str(correction) + '?' # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue elif re.search('Cannot validate sequence of an intronic variant', error): @@ -1604,7 +1568,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr primary_assembly, hn) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -1625,7 +1589,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' else: error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue except ValueError as e: @@ -1634,7 +1598,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Interval start position ' + str( input_parses.posedit.pos.start) + ' > interval end position ' + str( input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue except hgvs.exceptions.HGVSInvalidVariantError as e: @@ -1649,14 +1613,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Interval start position ' + str( input_parses.posedit.pos.start) + ' > interval end position ' + str( input_parses.posedit.pos.end) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue try: self.vr.validate(output) except hgvs.exceptions.HGVSError as e: error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -1673,19 +1637,19 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if re.search('Length implied by coordinates', error): # Applies to del and inv # NOTE, there has been no normalization at all so this error is valid here - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) continue # Will apply to > del and inv if re.search('does not agree with reference sequence', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) continue # ensures x_y for insertions if re.search('insertion length must be 1', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) continue # Boundary issue if re.search('Variant coordinate is out of the bound of CDS region', error): - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) continue """ # This catches errors in introns @@ -1700,26 +1664,26 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input_parses.posedit.pos.start) + ' > interval end position ' + str( input_parses.posedit.pos.end) logger.warning(str(error)) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) continue - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue except hgvs.exceptions.HGVSDataNotAvailableError as e: error = e - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('bounds', error): error = error + ' (' + input_parses.ac + ')' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: pass - logger.trace("Variant structure and contents searches passed", validation) + logger.trace("Variant structure and contents searches passed", my_variant) # Mitochondrial variants """ Reformat m. into the new HGVS standard which is now m again! @@ -1735,12 +1699,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr self.vr.validate(hgvs_mito) except hgvs.exceptions.HGVSError as e: error = caution + ': ' + str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue except KeyError as e: error = caution + ': Currently unable to validate ' + hgvs_mito.ac + ' sequence variation' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -1748,8 +1712,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var = self.relevant_transcripts(hgvs_mito, evm, alt_aln_method, reverse_normalizer) hgvs_genomic = copy.deepcopy(hgvs_mito) if len(rel_var) == 0: - validation['genomic_g'] = fn.valstr(hgvs_mito) - validation['description'] = 'Homo sapiens mitochondrion, complete genome' + my_variant.genomic_g = fn.valstr(hgvs_mito) + my_variant.description = 'Homo sapiens mitochondrion, complete genome' logger.info('Homo sapiens mitochondrion, complete genome') continue # Currently we are not expecting this path to be activated because not m. transcripts seem to be NM_ @@ -1770,7 +1734,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSError as e: error = str(e) if error != 'false': - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -1796,8 +1760,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = str( hgvs_object) + ' is HGVS compliant and contains a valid reference amino acid description' reason = 'Protein level variant descriptions are not fully supported due to redundancy in the genetic code' - validation['warnings'] = validation['warnings'] + ': ' + str(reason) + ': ' + str(error) - validation['protein'] = str(hgvs_object) + my_variant.warnings += ': ' + str(reason) + ': ' + str(error) + my_variant.protein = str(hgvs_object) logger.warning(str(reason) + ": " + str(error)) continue @@ -1815,7 +1779,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_c = self.va_func.hgvs_r_to_c(hgvs_input) except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue input = str(hgvs_c) @@ -1823,7 +1787,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # COLLECT gene symbol, name and ACCESSION INFORMATION # Gene symbol - logger.trace("Handled mitochondrial variants", validation) + logger.trace("Handled mitochondrial variants", my_variant) """ Identifies the transcript reference sequence name and HGNC gene symbol """ @@ -1840,7 +1804,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr issue_link = "https://bitbucket.org/biocommons/uta/issues?status=new&status=open" reason = "VariantValidator cannot recover information for transcript " + str( hgvs_vt.ac) + ' beacuse it is not available in the Universal Transcript Archive' - validation['warnings'] = validation['warnings'] + ': ' + str(reason) + my_variant.warnings += ': ' + str(reason) logger.warning(str(reason) + ": " + str(error)) continue else: @@ -1878,7 +1842,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if 'error' in entry: # Open a hgvs exception log file in append mode error = entry['description'] - validation['warnings'] = validation['warnings'] + ': ' + str( + my_variant.warnings += ': ' + str( error) + ': A Database error occurred, please contact admin' logger.warning(str(error) + ": A Database error occurred, please contact admin") continue @@ -1893,12 +1857,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr entry = self.db.data_add(accession=accession, validator=self) except hgvs.exceptions.HGVSError as e: error = 'Transcript %s is not currently supported' % (accession) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue except Exception as e: error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue hgnc_gene_info = entry['description'] @@ -1912,7 +1876,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except Exception as e: logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue hgnc_gene_info = entry['description'] @@ -1921,7 +1885,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: # Open a hgvs exception log file in append mode error = 'Unknown error type' - validation['warnings'] = validation['warnings'] + ': ' + str( + my_variant.warnings += ': ' + str( error) + ': A Database error occurred, please contact admin' logger.warning(str(error)) continue @@ -1940,7 +1904,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if 'error' in entry: # Open a hgvs exception log file in append mode error = entry['description'] - validation['warnings'] = validation['warnings'] + ': ' + str( + my_variant.warnings += ': ' + str( error) + ': A Database error occurred, please contact admin' logger.warning(str(error)) continue @@ -1963,7 +1927,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except Exception as e: logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue hgnc_gene_info = entry['description'] @@ -1972,7 +1936,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: # Open a hgvs exception log file in append mode error = 'Unknown error type' - validation['warnings'] = validation['warnings'] + ': ' + str( + my_variant.warnings += ': ' + str( error) + ': A Database error occurred, please contact admin' logger.warning(str(error)) continue @@ -1996,7 +1960,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Reference sequence ' + hgvs_genomic.ac + ' is either not supported or does not exist' if error != 'false': reason = 'Invalid variant description' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -2007,7 +1971,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Perform test if g_query.posedit.pos != g_test.posedit.pos: - # validation['warnings'] = validation['warnings'] + ': ' + 'Input variant description normalized to ' + str(g_test) + # my_variant.warnings += ': ' + 'Input variant description normalized to ' + str(g_test) hgvs_genomic = g_test else: hgvs_genomic = g_query @@ -2069,18 +2033,15 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input = refseqgene_data['hgvs_genomic'] # re_submit # Tag the line so that it is not written out - validation['warnings'] = validation[ - 'warnings'] + ': ' + formatted_variant + ' automapped to genome position ' + str( + my_variant.warnings += ': ' + formatted_variant + ' automapped to genome position ' + str( input) - query = {'quibble': input, 'id': validation['id'], 'warnings': validation['warnings'], - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} + query = variant.Variant(my_variant.original, quibble=input, warnings=my_variant.warnings, primary_assembly=my_variant.primary_assembly, order=ordering) + coding = 'intergenic' self.batch_list.append(query) else: error = 'Mapping unavailable for RefSeqGene ' + formatted_variant + ' using alignment method = ' + alt_aln_method - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -2092,7 +2053,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr self.vr.validate(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -2110,20 +2071,20 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # set output type flag set_output_type_flag = 'intergenic' # set genomic and where available RefSeqGene outputs - validation['warnings'] = validation['warnings'] + ': ' + str(error) - validation['genomic_g'] = fn.valstr(hgvs_genomic) - validation['genomic_r'] = str(rsg_data.split('(')[0]) + my_variant.warnings += ': ' + str(error) + my_variant.genomic_g = fn.valstr(hgvs_genomic) + my_variant.genomic_r = str(rsg_data.split('(')[0]) logger.warning(str(error)) continue else: error = 'Please ensure the requested chromosome version relates to a supported genome build. Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: # Tag the line so that it is not written out - validation['write'] = 'false' + my_variant.write = False """ Gap aware projection from g. to c. @@ -3074,10 +3035,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Set the values and append to batch_list for c_description in rel_var: - query = {'quibble': str(c_description), 'id': validation['id'], - 'warnings': validation['warnings'], 'description': '', 'coding': '', - 'coding_g': '', 'genomic_r': '', 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} + query = variant.Variant(my_variant.original, quibble=str(c_description), warnings=my_variant.warnings, primary_assembly=my_variant.primary_assembly, order=ordering) self.batch_list.append(query) logger.warning("Continue reached when mapping transcript types to variants") # Call next description @@ -3100,7 +3058,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # If not get rid of it! else: # By marking it as Do Not Write and continuing through the validation loop - validation['write'] = 'false' + my_variant.write = False continue else: pass @@ -3125,7 +3083,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = str(e) error = error + ': Consequently the input variant description cannot be fully validated and is not supported: Use the Gene to Transcripts function to determine whether an updated transcript reference sequence is available' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue try: @@ -3136,7 +3094,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' else: error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue except TypeError as e: @@ -3148,7 +3106,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' else: error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3172,7 +3130,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3180,7 +3138,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3205,7 +3163,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3213,7 +3171,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -3241,7 +3199,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr formatted_variant = formatted_variant caution = 'This coding sequence variant description spans at least one intron' automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + my_variant.warnings += ': ' + str(caution) + ': ' + str( automap) logger.warning(str(caution) + ": " + str(automap)) else: @@ -3268,7 +3226,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr valid = 'true' else: excep = "%s -- %s -- %s\n" % (time.ctime(), error, formatted_variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3327,7 +3285,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr query.type = 'r' post_var = str(query) automap = trapped_input + ' automapped to ' + str(post_var) - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + my_variant.warnings += ': ' + str(caution) + ': ' + str( automap) relevant = "Select the automapped transcript and click Submit to analyse" rel_var = [] @@ -3351,7 +3309,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -3382,7 +3340,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str( + my_variant.warnings += ': ' + str( error) logger.warning(str(error)) continue @@ -3397,12 +3355,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) # Kill current line and append for re-submission # Tag the line so that it is not written out - validation['write'] = 'false' + my_variant.write = False # Set the values and append to batch_list - query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} + query = variant.Variant(my_variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) self.batch_list.append(query) # Coding @@ -3420,7 +3375,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr reason = 'Input coordinates may be invalid' if error == 'expected from_start_i <= from_end_i': error = 'Automap is unable to correct the input exon/intron boundary coordinates, please check your variant description' - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) continue else: fn.exceptPass() @@ -3430,7 +3385,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) except hgvs.exceptions.HGVSError as error: - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue query = post_var @@ -3440,8 +3395,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' # automapping of variant completed automap = trapped_input + ' automapped to ' + str(post_var) - validation['warnings'] = str(validation['warnings']) + str(caution) + ': ' + str( - automap) + my_variant.warnings += str(caution) + ': ' + str(automap) relevant = "Select the automapped transcript and click Submit to analyse" rel_var = [] rel_var.append(post_var) @@ -3463,7 +3417,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3495,7 +3449,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str( + my_variant.warnings += ': ' + str( error) logger.warning(str(error)) continue @@ -3511,12 +3465,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) # Kill current line and append for re-submission # Tag the line so that it is not written out - validation['write'] = 'false' + my_variant.write = False # Set the values and append to batch_list - query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} + query = variant.Variant(my_variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) self.batch_list.append(query) else: @@ -3540,7 +3491,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr query.type = 'r' post_var = str(query) automap = input + ' automapped to ' + post_var - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + my_variant.warnings += ': ' + str(caution) + ': ' + str( automap) relevant = "Select the automapped transcript and click Submit to analyse" rel_var = [] @@ -3563,7 +3514,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr data = self.va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3578,12 +3529,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) # Kill current line and append for re-submission # Tag the line so that it is not written out - validation['write'] = 'false' + my_variant.write = False # Set the values and append to batch_list - query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} + query = variant.Variant(my_variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) self.batch_list.append(query) else: @@ -3601,7 +3549,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' # automapping of variant completed automap = str(trapped_input) + ' automapped to ' + str(post_var) - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str( + my_variant.warnings += ': ' + str(caution) + ': ' + str( automap) relevant = "Select the automapped transcript and click Submit to analyse" rel_var = [] @@ -3625,7 +3573,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if data['error'] != 'false': reason = 'Cannot currently display the required information:' error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3640,12 +3588,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) # Kill current line and append for re-submission # Tag the line so that it is not written out - validation['write'] = 'false' + my_variant.write = False # Set the values and append to batch_list - query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', - 'genomic_g': '', 'protein': '', 'write': 'true', - 'primary_assembly': primary_assembly, 'order': ordering} + query = variant.Variant(my_variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) self.batch_list.append(query) @@ -3666,7 +3611,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3674,7 +3619,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g reason = "An error has occurred" excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3711,7 +3656,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr automap = 'Automap has corrected the variant description' # automapping of variant completed automap = trapped_input + ' automapped to ' + output - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str(automap) + my_variant.warnings += ': ' + str(caution) + ': ' + str(automap) relevant = "Select the automapped transcript and click Submit to analyse" rel_var = [] rel_var.append(output) @@ -3733,7 +3678,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) if data['error'] != 'false': error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3748,12 +3693,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) # Kill current line and append for re-submission # Tag the line so that it is not written out - validation['write'] = 'false' + my_variant.write = False # Set the values and append to batch_list - query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} + query = variant.Variant(my_variant.original, quibble=fn.valstr(hgsv_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) self.batch_list.append(query) elif pat_g.search(input): @@ -3767,7 +3709,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr automap = 'Automap has corrected the variant description' # automapping of variant completed automap = str(test) + ' automapped to ' + str(query) - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + ': ' + str(automap) + my_variant.warnings += ': ' + str(caution) + ': ' + str(automap) relevant = "Select the automapped transcript and click Submit to analyse" rel_var = [] rel_var.append(query) @@ -3790,7 +3732,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if data['error'] != 'false': reason = 'Cannot currently display the required information:' error = data['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue @@ -3805,12 +3747,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr rel_var.append(accessions) # Kill current line and append for re-submission # Tag the line so that it is not written out - validation['write'] = 'false' + my_variant.write = False # Set the values and append to batch_list - query = {'quibble': fn.valstr(hgvs_vt), 'id': validation['id'], 'warnings': automap, - 'description': '', 'coding': '', 'coding_g': '', 'genomic_r': '', 'genomic_g': '', - 'protein': '', 'write': 'true', 'primary_assembly': primary_assembly, - 'order': ordering} + query = variant.Variant(my_variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) self.batch_list.append(query) # VALIDATION of intronic variants @@ -3829,7 +3768,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) else: pass - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + my_variant.warnings += ': ' + str(caution) logger.warning(str(caution)) else: pass @@ -3843,7 +3782,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: excep = "%s -- %s -- %s\n" % (time.ctime(), error, formatted_variant) - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) continue if valid == 'true': @@ -3857,7 +3796,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) else: pass - validation['warnings'] = validation['warnings'] + ': ' + str(caution) + my_variant.warnings += ': ' + str(caution) else: pass else: @@ -3894,7 +3833,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_coding = evm._maybe_normalize(hgvs_coding) gap_compensation = False except hgvs.exceptions.HGVSError as error: - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) logger.warning(str(error)) continue else: @@ -5219,7 +5158,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr auto_info = '\n'.join(info_out) auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' auto_info = str(auto_info.replace('\n', ': ')) - validation['warnings'] = validation['warnings'] + ': ' + str(auto_info) + my_variant.warnings += ': ' + str(auto_info) logger.warning(str(auto_info)) # Normailse hgvs_genomic try: @@ -6113,7 +6052,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr protein = str(hgvs_protein) else: error = protein_dict['error'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) @@ -6164,7 +6103,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) hgvs_coding = hgvs_seek_var coding = fn.valstr(hgvs_coding) - validation['warnings'] = validation['warnings'] + ': ' + automap + my_variant.warnings += ': ' + automap rng = hn.normalize(query_genomic) except NotImplementedError: pass @@ -6182,7 +6121,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = protein_dict['error'] if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) # Replace protein description in vars table protein = str(hgvs_protein) except NotImplementedError: @@ -6223,7 +6162,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) hgvs_coding = hgvs_seek_var coding = fn.valstr(hgvs_coding) - validation['warnings'] = validation['warnings'] + ': ' + automap + my_variant.warnings += ': ' + automap except NotImplementedError: fn.exceptPass() else: @@ -6255,7 +6194,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = protein_dict['error'] if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] - validation['warnings'] = validation['warnings'] + ': ' + str(error) + my_variant.warnings += ': ' + str(error) # Replace protein description in vars table protein = str(hgvs_protein) except Exception: @@ -6297,31 +6236,30 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: pass updated_transcript_variant = hgvs_updated - validation['warnings'] = validation[ - 'warnings'] + ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( + my_variant.warnings += ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + fn.valstr( updated_transcript_variant) # Set the data set_output_type_flag = 'gene' - validation['description'] = hgnc_gene_info - validation['coding'] = str(hgvs_coding) - validation['genomic_r'] = str(hgvs_refseq) - validation['genomic_g'] = str(hgvs_genomic) - validation['protein'] = str(hgvs_protein) - validation['primary_assembly'] = primary_assembly + my_variant.description = hgnc_gene_info + my_variant.coding = str(hgvs_coding) + my_variant.genomic_r = str(hgvs_refseq) + my_variant.genomic_g = str(hgvs_genomic) + my_variant.protein = str(hgvs_protein) + my_variant.primary_assembly = primary_assembly if gap_compensation is True: - validation['test_stash_tx_left'] = test_stash_tx_left - validation['test_stash_tx_right'] = test_stash_tx_right + my_variant.test_stash_tx_left = test_stash_tx_left + my_variant.test_stash_tx_right = test_stash_tx_right # finish timing - logger.traceEnd(validation) + logger.traceEnd(my_variant) # Report errors to User and VV admin except KeyboardInterrupt: raise except: set_output_type_flag = 'error' error = 'Validation error' - validation['warnings'] = str(error) + my_variant.warnings = str(error) exc_type, exc_value, last_traceback = sys.exc_info() te = traceback.format_exc() tbk = [str(exc_type), str(exc_value), str(te)] @@ -6337,701 +6275,677 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.trace("End of for loop") # order the rows # from operator import itemgetter - by_order = sorted(self.batch_list, key=itemgetter('order')) + by_order = sorted(self.batch_list, key=lambda x: x.order) for valid in by_order: - if 'write' in list(valid.keys()): - if valid['write'] == 'true': - # Blank VCF - # chr = '' - # pos = '' - # ref = '' - # alt = '' - - # Fromulate a json type response - dict_out = {} - - # Set gap compensation bool - gap_compensation = True - - # warngins - warnings = valid['warnings'] - warnings = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warnings) - warnings = re.sub('^: ', '', warnings) - warnings = re.sub('::', ':', warnings) - - # Submitted variant - submitted = valid['id'] - - # Genomic sequence variation - genomic_variant = valid['genomic_g'] - - # genomic accession - if genomic_variant != '': - hgvs_genomic_variant = self.hp.parse_hgvs_variant(genomic_variant) - genomic_variant = fn.valstr(hgvs_genomic_variant) - genomic_accession = hgvs_genomic_variant.ac - else: - genomic_accession = '' - - # RefSeqGene variation - refseqgene_variant = valid['genomic_r'] - refseqgene_variant = refseqgene_variant.strip() - if re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant == '': - warnings = warnings + ': ' + refseqgene_variant - refseqgene_variant = '' + if valid.write: + # Blank VCF + # chr = '' + # pos = '' + # ref = '' + # alt = '' + + # Fromulate a json type response + dict_out = {} + + # Set gap compensation bool + gap_compensation = True + + # warngins + warnings = valid.warnings + warnings = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warnings) + warnings = re.sub('^: ', '', warnings) + warnings = re.sub('::', ':', warnings) + + # Submitted variant + submitted = valid.original + + # Genomic sequence variation + genomic_variant = valid.genomic_g + + # genomic accession + if genomic_variant != '': + hgvs_genomic_variant = self.hp.parse_hgvs_variant(genomic_variant) + genomic_variant = fn.valstr(hgvs_genomic_variant) + genomic_accession = hgvs_genomic_variant.ac + else: + genomic_accession = '' + + # RefSeqGene variation + refseqgene_variant = valid.genomic_r + refseqgene_variant = refseqgene_variant.strip() + if re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant == '': + warnings = warnings + ': ' + refseqgene_variant + refseqgene_variant = '' + lrg_variant = '' + hgvs_refseqgene_variant = 'false' + else: + hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) + rsg_ac = self.db.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) + if rsg_ac[0] == 'none': lrg_variant = '' - hgvs_refseqgene_variant = 'false' else: - hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) - rsg_ac = self.db.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) - if rsg_ac[0] == 'none': - lrg_variant = '' - else: - hgvs_lrg = copy.deepcopy(hgvs_refseqgene_variant) - hgvs_lrg.ac = rsg_ac[0] - lrg_variant = fn.valstr(hgvs_lrg) - if rsg_ac[1] == 'public': - pass - else: - warnings = warnings + ': The current status of ' + str( - hgvs_lrg.ac) + ' is pending therefore changes may be made to the LRG reference sequence' - - # Transcript sequence variation - tx_variant = valid['coding'] - if tx_variant != '': - if '(' in tx_variant and ')' in tx_variant: - tx_variant = tx_variant.split('(')[1] - tx_variant = tx_variant.replace(')', '') - - # transcript accession - hgvs_tx_variant = self.hp.parse_hgvs_variant(tx_variant) - tx_variant = fn.valstr(hgvs_tx_variant) - hgvs_transcript_variant = self.hp.parse_hgvs_variant(tx_variant) - transcript_accession = hgvs_transcript_variant.ac - - # Handle LRG - lrg_status = 'public' - lrg_transcript = self.db.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) - if lrg_transcript == 'none': - lrg_transcript_variant = '' + hgvs_lrg = copy.deepcopy(hgvs_refseqgene_variant) + hgvs_lrg.ac = rsg_ac[0] + lrg_variant = fn.valstr(hgvs_lrg) + if rsg_ac[1] == 'public': + pass else: - # Note - LRG availability is dependant on UTA containing the data. In some - # instances we will be able to display the LRG_tx without being able to - # display the LRG gene data - - # if not re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant != '': - # if hgvs_refseqgene_variant != 'RefSeqGene record not available' and hgvs_refseqgene_variant != 'false': - try: - hgvs_lrg_t = self.vm.g_to_t(hgvs_refseqgene_variant, transcript_accession) - hgvs_lrg_t.ac = lrg_transcript - lrg_transcript_variant = fn.valstr(hgvs_lrg_t) - except: - if hgvs_transcript_variant.posedit.pos.start.offset == 0 and hgvs_transcript_variant.posedit.pos.end.offset == 0: - hgvs_lrg_t = copy.copy(hgvs_transcript_variant) - hgvs_lrg_t.ac = lrg_transcript - lrg_transcript_variant = fn.valstr(hgvs_lrg_t) - else: - lrg_transcript_variant = '' - else: - transcript_accession = '' + warnings = warnings + ': The current status of ' + str( + hgvs_lrg.ac) + ' is pending therefore changes may be made to the LRG reference sequence' + + # Transcript sequence variation + tx_variant = valid.coding + if tx_variant != '': + if '(' in tx_variant and ')' in tx_variant: + tx_variant = tx_variant.split('(')[1] + tx_variant = tx_variant.replace(')', '') + + # transcript accession + hgvs_tx_variant = self.hp.parse_hgvs_variant(tx_variant) + tx_variant = fn.valstr(hgvs_tx_variant) + hgvs_transcript_variant = self.hp.parse_hgvs_variant(tx_variant) + transcript_accession = hgvs_transcript_variant.ac + + # Handle LRG + lrg_status = 'public' + lrg_transcript = self.db.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) + if lrg_transcript == 'none': lrg_transcript_variant = '' + else: + # Note - LRG availability is dependant on UTA containing the data. In some + # instances we will be able to display the LRG_tx without being able to + # display the LRG gene data - # Look for intronic variants - if transcript_accession != '' and genomic_accession != '': - # Remove del bases - str_transcript = fn.valstr(hgvs_transcript_variant) - hgvs_transcript_variant = self.hp.parse_hgvs_variant(str_transcript) + # if not re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant != '': + # if hgvs_refseqgene_variant != 'RefSeqGene record not available' and hgvs_refseqgene_variant != 'false': try: - self.vr.validate(hgvs_transcript_variant) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('intronic variant', error): - genome_context_transcript_variant = genomic_accession + '(' + transcript_accession + '):c.' + str( - hgvs_transcript_variant.posedit) - if refseqgene_variant != '': - hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) - refseqgene_accession = hgvs_refseqgene_variant.ac - hgvs_coding_from_refseqgene = self.vm.g_to_t(hgvs_refseqgene_variant, - hgvs_transcript_variant.ac) - hgvs_coding_from_refseqgene = fn.valstr(hgvs_coding_from_refseqgene) - hgvs_coding_from_refseqgene = self.hp.parse_hgvs_variant(hgvs_coding_from_refseqgene) - RefSeqGene_context_transcript_variant = refseqgene_accession + '(' + transcript_accession + '):c.' + str( - hgvs_coding_from_refseqgene.posedit.pos) + str( - hgvs_coding_from_refseqgene.posedit.edit) - else: - RefSeqGene_context_transcript_variant = '' + hgvs_lrg_t = self.vm.g_to_t(hgvs_refseqgene_variant, transcript_accession) + hgvs_lrg_t.ac = lrg_transcript + lrg_transcript_variant = fn.valstr(hgvs_lrg_t) + except: + if hgvs_transcript_variant.posedit.pos.start.offset == 0 and hgvs_transcript_variant.posedit.pos.end.offset == 0: + hgvs_lrg_t = copy.copy(hgvs_transcript_variant) + hgvs_lrg_t.ac = lrg_transcript + lrg_transcript_variant = fn.valstr(hgvs_lrg_t) + else: + lrg_transcript_variant = '' + else: + transcript_accession = '' + lrg_transcript_variant = '' + + # Look for intronic variants + if transcript_accession != '' and genomic_accession != '': + # Remove del bases + str_transcript = fn.valstr(hgvs_transcript_variant) + hgvs_transcript_variant = self.hp.parse_hgvs_variant(str_transcript) + try: + self.vr.validate(hgvs_transcript_variant) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('intronic variant', error): + genome_context_transcript_variant = genomic_accession + '(' + transcript_accession + '):c.' + str( + hgvs_transcript_variant.posedit) + if refseqgene_variant != '': + hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) + refseqgene_accession = hgvs_refseqgene_variant.ac + hgvs_coding_from_refseqgene = self.vm.g_to_t(hgvs_refseqgene_variant, + hgvs_transcript_variant.ac) + hgvs_coding_from_refseqgene = fn.valstr(hgvs_coding_from_refseqgene) + hgvs_coding_from_refseqgene = self.hp.parse_hgvs_variant(hgvs_coding_from_refseqgene) + RefSeqGene_context_transcript_variant = refseqgene_accession + '(' + transcript_accession + '):c.' + str( + hgvs_coding_from_refseqgene.posedit.pos) + str( + hgvs_coding_from_refseqgene.posedit.edit) else: - genome_context_transcript_variant = '' # transcript_variant RefSeqGene_context_transcript_variant = '' else: genome_context_transcript_variant = '' # transcript_variant RefSeqGene_context_transcript_variant = '' else: - genome_context_transcript_variant = '' + genome_context_transcript_variant = '' # transcript_variant RefSeqGene_context_transcript_variant = '' - - # Protein description - predicted_protein_variant = valid['protein'] - if re.match('NP_', predicted_protein_variant): - rs_p, pred_prot_posedit = predicted_protein_variant.split(':') - lrg_p = self.db.get_lrgProteinID_from_RefSeqProteinID(rs_p) - if re.match('LRG', lrg_p): - predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit - - # Gene - if transcript_accession != '': - try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(transcript_accession) - except: - gene_symbol = 'Unable to verify gene symbol for ' + str(transcript_accession) - else: - gene_symbol = '' - - # Transcript description - transcript_description = valid['description'] - - # Stashed variants - if 'test_stash_tx_left' not in validation: - pass - else: - test_stash_tx_left = validation['test_stash_tx_left'] - if 'test_stash_tx_right' not in validation: - pass + else: + genome_context_transcript_variant = '' + RefSeqGene_context_transcript_variant = '' + + # Protein description + predicted_protein_variant = valid.protein + if re.match('NP_', predicted_protein_variant): + rs_p, pred_prot_posedit = predicted_protein_variant.split(':') + lrg_p = self.db.get_lrgProteinID_from_RefSeqProteinID(rs_p) + if re.match('LRG', lrg_p): + predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit + + # Gene + if transcript_accession != '': + try: + gene_symbol = self.db.get_gene_symbol_from_transcriptID(transcript_accession) + except: + gene_symbol = 'Unable to verify gene symbol for ' + str(transcript_accession) + else: + gene_symbol = '' + + # Transcript description + transcript_description = valid.description + + # Stashed variants + if valid.test_stash_tx_left: + test_stash_tx_left = valid.test_stash_tx_left + if valid.test_stash_tx_right: + test_stash_tx_right = valid.test_stash_tx_right + + # Multiple genomic variants + # multi_gen_vars = [] + if tx_variant != '': + hgvs_coding = self.hp.parse_hgvs_variant(str(tx_variant)) + # Gap gene black list + try: + gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + except Exception: + fn.exceptPass() else: - test_stash_tx_right = validation['test_stash_tx_right'] + # If the gene symbol is not in the list, the value False will be returned + gap_compensation = vvChromosomes.gap_black_list(gene_symbol) - # Multiple genomic variants - # multi_gen_vars = [] - if tx_variant != '': - hgvs_coding = self.hp.parse_hgvs_variant(str(tx_variant)) - # Gap gene black list - try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) - except Exception: - fn.exceptPass() + # Look for variants spanning introns + try: + hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.search('boundary', str(error)) or re.search('spanning', str(error)): + gap_compensation = False else: - # If the gene symbol is not in the list, the value False will be returned - gap_compensation = vvChromosomes.gap_black_list(gene_symbol) + pass + except hgvs.exceptions.HGVSError: + fn.exceptPass() - # Look for variants spanning introns + # Warn gap code status + logger.warning("gap_compensation_3 = " + str(gap_compensation)) + multi_g = [] + multi_list = [] + mapping_options = self.hdp.get_tx_mapping_options(hgvs_coding.ac) + for alt_chr in mapping_options: + if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', + alt_chr[1])) and \ + alt_chr[2] == alt_aln_method: + multi_list.append(alt_chr[1]) + + for alt_chr in multi_list: try: - hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.search('boundary', str(error)) or re.search('spanning', str(error)): - gap_compensation = False - else: - pass - except hgvs.exceptions.HGVSError: - fn.exceptPass() - - # Warn gap code status - logger.warning("gap_compensation_3 = " + str(gap_compensation)) - multi_g = [] - multi_list = [] - mapping_options = self.hdp.get_tx_mapping_options(hgvs_coding.ac) - for alt_chr in mapping_options: - if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', - alt_chr[1])) and \ - alt_chr[2] == alt_aln_method: - multi_list.append(alt_chr[1]) - - for alt_chr in multi_list: - try: - # Re set ori - ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, - alt_aln_method=alt_aln_method) - orientation = int(ori[0]['alt_strand']) - hgvs_alt_genomic = self.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, hn) - # Set hgvs_genomic accordingly - hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) - - # genomic_possibilities - # 1. take the simple 3 pr normalized hgvs_genomic - # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - hgvs_genomic_possibilities = [] - - # Loop out gap code under these circumstances! - if gap_compensation is True: - logger.warning('g_to_t gap code 3 active') - rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_alt_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: - try: - chromosome_normalized_hgvs_coding = reverse_normalizer.normalize( - hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: - chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding + # Re set ori + ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, + alt_aln_method=alt_aln_method) + orientation = int(ori[0]['alt_strand']) + hgvs_alt_genomic = self.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, hn) + # Set hgvs_genomic accordingly + hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) + + # genomic_possibilities + # 1. take the simple 3 pr normalized hgvs_genomic + # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome + hgvs_genomic_possibilities = [] + + # Loop out gap code under these circumstances! + if gap_compensation is True: + logger.warning('g_to_t gap code 3 active') + rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_alt_genomic) + hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if orientation != -1: + try: + chromosome_normalized_hgvs_coding = reverse_normalizer.normalize( + hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + chromosome_normalized_hgvs_coding = hgvs_coding + else: + try: + chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding - most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, - alt_chr, - no_norm_evm, hn) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, + alt_chr, + no_norm_evm, hn) + hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - # First to the right - hgvs_stash = copy.deepcopy(hgvs_coding) + # First to the right + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = self.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) except: fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = self.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + # Store a tx copy for later use + test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, + no_norm_evm, hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_right.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + stash_tx_right = test_stash_tx_right + if hasattr(test_stash_tx_right.posedit.edit, + 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + alt = test_stash_tx_right.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_right.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_right).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, - no_norm_evm, hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_right.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - stash_tx_right = test_stash_tx_right - if hasattr(test_stash_tx_right.posedit.edit, - 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - alt = test_stash_tx_right.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_right.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_right).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append('') - else: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - except ValueError: - fn.exceptPass() + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + hn.normalize(test_stash_tx_right) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + except ValueError: + fn.exceptPass() - # Then to the left - hgvs_stash = copy.deepcopy(hgvs_coding) + # Then to the left + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, + reverse_normalizer, self.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = self.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) except: fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, - reverse_normalizer, self.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = self.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + # Store a tx copy for later use + test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, + no_norm_evm, hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_left.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + stash_tx_left = test_stash_tx_left + if hasattr(test_stash_tx_left.posedit.edit, + 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + alt = test_stash_tx_left.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_left.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_left).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, - no_norm_evm, hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left - if hasattr(test_stash_tx_left.posedit.edit, - 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - alt = test_stash_tx_left.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_left.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_left).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') - else: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - except ValueError: - fn.exceptPass() + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + hn.normalize(test_stash_tx_left) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + except ValueError: + fn.exceptPass() - # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = self.vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = self.vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = self.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = self.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = self.vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = self.vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: + # direct mapping from reverse_normalized transcript insertions in the delins format + try: + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = self.vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = self.vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the variant ref and alt + pr3_ref = self.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = self.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = self.vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = self.vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + + # Normalize - If the variant spans a gap it should then form a static genomic variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - try: + try: + genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + try: + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append( - genomic_from_most_3pr_hgvs_transcript_variant) - if len( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append( - genomic_from_most_5pr_hgvs_transcript_variant) + try: + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) + + if len( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append( + genomic_from_most_3pr_hgvs_transcript_variant) + if len( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append( + genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass - fn.exceptPass() + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + pass + fn.exceptPass() - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' - # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] - # Loop through to see if a gap can be located - possibility_counter = 0 - for possibility in hgvs_genomic_possibilities: - possibility_counter = possibility_counter + 1 - # Loop out stash possibilities which will not spot gaps so are empty - if possibility == '': - continue + # Mark as not disparity detected + disparity_deletion_in = ['false', 'false'] + # Loop through to see if a gap can be located + possibility_counter = 0 + for possibility in hgvs_genomic_possibilities: + possibility_counter = possibility_counter + 1 + # Loop out stash possibilities which will not spot gaps so are empty + if possibility == '': + continue + + # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + hgvs_genomic_variant = possibility + stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) - # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - hgvs_genomic_variant = possibility - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + try: + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic_variant) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error): + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + if re.search('insertion length must be 1', error): + if hgvs_genomic.posedit.edit.type == 'ins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start, end) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # Store a copy for later use + stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + + # Make VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, + reverse_normalizer, self.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Look for exonic gaps within transcript or chromosome + no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # Store a not real deletion insertion to test for gapping + stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + v = [chr, pos, ref, alt] + + # Save a copy of current hgvs_coding + try: + saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, + hgvs_coding.ac) + except Exception as e: + if str( + e) == 'start or end or both are beyond the bounds of transcript record': + saved_hgvs_coding = hgvs_coding + continue - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + # Detect intronic variation using normalization + intronic_variant = 'false' + # Look for normalized variant options that do not match hgvs_coding + if orientation == -1: + # position genomic at its most 5 prime position try: - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic_variant) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error): - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - if re.search('insertion length must be 1', error): - if hgvs_genomic.posedit.edit.type == 'ins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start, end) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - - # Make VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, self.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] - - # Save a copy of current hgvs_coding + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement try: - saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, - hgvs_coding.ac) - except Exception as e: - if str( - e) == 'start or end or both are beyond the bounds of transcript record': - saved_hgvs_coding = hgvs_coding - continue - - # Detect intronic variation using normalization - intronic_variant = 'false' - # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding + hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: seek_var = fn.valstr(hgvs_seek_var) seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + elif orientation != -1: + # position genomic at its most 3 prime position try: - intron_test = hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding - if intronic_variant != 'hard_fail': - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', - str( - hgvs_seek_var.posedit.pos)) or re.search( - r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( - hgvs_seek_var.posedit.pos)): + try: + intron_test = hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -7044,1022 +6958,1038 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: intronic_variant = 'true' - if intronic_variant != 'true': - # Flag RefSeqGene for ammendment - # amend_RefSeqGene = 'false' - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', - hgvs_genomic_5pr.posedit.edit.type) or re.search( - 'ins', hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and re.search('del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and re.search('del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - else: - pass + if intronic_variant != 'hard_fail': + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', + str( + hgvs_seek_var.posedit.pos)) or re.search( + r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( + hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break else: - pass - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +1 base and adjust - if re.search(r'\+', - str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() + intronic_variant = 'true' - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + if intronic_variant != 'true': + # Flag RefSeqGene for ammendment + # amend_RefSeqGene = 'false' + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', + hgvs_genomic_5pr.posedit.edit.type) or re.search( + 'ins', hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and re.search('del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and re.search('del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] else: + pass + else: + pass + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + saved_hgvs_coding.ac) + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +1 base and adjust + if re.search(r'\+', + str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: fn.exceptPass() - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: - re_capture_tx_variant = [] - for possibility in hgvs_genomic_possibilities: - if possibility == '': + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, hn) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, hn) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search(r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, hn) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, hn) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + fn.exceptPass() + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for possibility in hgvs_genomic_possibilities: + if possibility == '': + continue + hgvs_t_possibility = self.vm.g_to_t(possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) + except: continue - hgvs_t_possibility = self.vm.g_to_t(possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) - except: - continue - if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: - continue - ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) - except: - continue - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if possibility.posedit.edit.type == 'ins': - ins_ref = self.sf.fetch_seq(possibility.ac, - possibility.posedit.pos.start.base - 1, - possibility.posedit.pos.end.base) - possibility.posedit.edit.ref = ins_ref - possibility.posedit.edit.alt = ins_ref[ - 0] + possibility.posedit.edit.alt + \ - ins_ref[1] - if len(hgvs_t_possibility.posedit.edit.ref) < len( - possibility.posedit.edit.ref): - gap_length = len(possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] - hgvs_not_delins = possibility - hgvs_genomic_5pr = possibility - break - - if re_capture_tx_variant != []: + if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: + continue + ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) try: - tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) + hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # Final sanity checks - try: - self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str( - e) == 'start or end or both are beyond the bounds of transcript record': - continue - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) + continue + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if possibility.posedit.edit.type == 'ins': + ins_ref = self.sf.fetch_seq(possibility.ac, + possibility.posedit.pos.start.base - 1, + possibility.posedit.pos.end.base) + possibility.posedit.edit.ref = ins_ref + possibility.posedit.edit.alt = ins_ref[ + 0] + possibility.posedit.edit.alt + \ + ins_ref[1] + if len(hgvs_t_possibility.posedit.edit.ref) < len( + possibility.posedit.edit.ref): + gap_length = len(possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, + hgvs_t_possibility] + hgvs_not_delins = possibility + hgvs_genomic_5pr = possibility + break - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - continue - elif re.match('Normalization of intronic variants is not supported', - error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): - rg = reverse_normalizer.normalize(hgvs_not_delins) - rtx = self.vm.g_to_t(rg, tx_hgvs_not_delins.ac) - fg = hn.normalize(hgvs_not_delins) - ftx = self.vm.g_to_t(fg, tx_hgvs_not_delins.ac) - if ( - rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = self.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) - exonic = False - for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], - ex_test[7]): - exonic = True - if exonic is True: - hgvs_not_delins = fg - hgvs_genomic = fg - hgvs_genomic_5pr = fg - try: - tx_hgvs_not_delins = self.vm.c_to_n(ftx) - except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = self.hp.parse_hgvs_variant( - tx_hgvs_not_delins_delins_from_dup) - - if disparity_deletion_in[0] == 'transcript': - # amend_RefSeqGene = 'true' - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) + # Final sanity checks + try: + self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str( + e) == 'start or end or both are beyond the bounds of transcript record': + continue + try: + hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + continue + elif re.match('Normalization of intronic variants is not supported', + error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Find oddly placed gaps where the tx variant is encompassed in the gap + if disparity_deletion_in[0] == 'false' and ( + possibility_counter == 3 or possibility_counter == 4): + rg = reverse_normalizer.normalize(hgvs_not_delins) + rtx = self.vm.g_to_t(rg, tx_hgvs_not_delins.ac) + fg = hn.normalize(hgvs_not_delins) + ftx = self.vm.g_to_t(fg, tx_hgvs_not_delins.ac) + if ( + rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + exons = self.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) + exonic = False + for ex_test in exons: + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], + ex_test[7]): + exonic = True + if exonic is True: + hgvs_not_delins = fg + hgvs_genomic = fg + hgvs_genomic_5pr = fg try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = self.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) + tx_hgvs_not_delins = self.vm.c_to_n(ftx) + except Exception: + tx_hgvs_not_delins = ftx + disparity_deletion_in = ['transcript', 'Requires Analysis'] - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = self.hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup) + + if disparity_deletion_in[0] == 'transcript': + # amend_RefSeqGene = 'true' + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = self.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int( + '0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int( + '0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int( + '0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int( + '0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' - try: - tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + try: + tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - try: - c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) + try: + c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.alt) + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = self.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range( + genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) else: - # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range( - genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range( - genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, - 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range( + genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, + 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = self.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = self.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', str( + tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', + str( + tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str( stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( tx_hgvs_not_delins.ac) non_valid_caution = 'true' - + try: + c2 = self.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: gps = for_location_c.posedit.pos.start.base gpe = for_location_c.posedit.pos.start.base + 1 gap_position = ' between positions c.' + str(gps) + '_' + str( gpe) + '\n' + # Warn update auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + elif re.search(r'\+', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', + str( + tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = self.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = self.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', - str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str( + tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = self.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = self.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref else: - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - # amend_RefSeqGene = 'true' - if possibility_counter == 3: - hgvs_refreshed_variant = stash_tx_right - elif possibility_counter == 4: - hgvs_refreshed_variant = stash_tx_left + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) else: - hgvs_refreshed_variant = chromosome_normalized_hgvs_coding - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + # amend_RefSeqGene = 'true' + if possibility_counter == 3: + hgvs_refreshed_variant = stash_tx_right + elif possibility_counter == 4: + hgvs_refreshed_variant = stash_tx_left else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' + hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = hgvs_coding + # amend_RefSeqGene = 'false' + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', + str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) + else: + pass - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', - str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) + try: + hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding else: - pass - - try: - hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - continue + continue - # Quick check to make sure the coding variant has not changed - try: - to_test = hn.normalize(hgvs_refreshed_variant) - except: - to_test = hgvs_refreshed_variant - if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # Try the next available genomic option - if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - hgvs_coding = to_test - else: - continue + # Quick check to make sure the coding variant has not changed + try: + to_test = hn.normalize(hgvs_refreshed_variant) + except: + to_test = hgvs_refreshed_variant + if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # Try the next available genomic option + if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + hgvs_coding = to_test + else: + continue - # Update hgvs_genomic - hgvs_alt_genomic = self.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, - no_norm_evm, hn) - if hgvs_alt_genomic.posedit.edit.type == 'identity': - re_c = self.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) + # Update hgvs_genomic + hgvs_alt_genomic = self.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, + no_norm_evm, hn) + if hgvs_alt_genomic.posedit.edit.type == 'identity': + re_c = self.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) + if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): + shuffle_left_g = copy.copy(hgvs_alt_genomic) + shuffle_left_g.posedit.edit.ref = '' + shuffle_left_g.posedit.edit.alt = '' + shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) + re_c = self.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - shuffle_left_g = copy.copy(hgvs_alt_genomic) - shuffle_left_g.posedit.edit.ref = '' - shuffle_left_g.posedit.edit.alt = '' - shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) - re_c = self.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - hgvs_alt_genomic = shuffle_left_g - - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' + hgvs_alt_genomic = shuffle_left_g - # Break if gap has been detected - if disparity_deletion_in[0] != 'false': - break + # If it is intronic, these vairables will not have been set + else: + # amend_RefSeqGene = 'false' + no_normalized_c = 'false' - # Normailse hgvs_genomic - try: - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': - if hgvs_alt_genomic.posedit.edit.type == 'delins': - start = hgvs_alt_genomic.posedit.pos.start.base - end = hgvs_alt_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb - hgvs_alt_genomic.posedit.pos.start.base = end - hgvs_alt_genomic.posedit.pos.end.base = start - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - if hgvs_alt_genomic.posedit.edit.type == 'del': - start = hgvs_alt_genomic.posedit.pos.start.base - end = hgvs_alt_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - hgvs_alt_genomic.posedit.edit.alt = lhb + rhb - hgvs_alt_genomic.posedit.pos.start.base = end - hgvs_alt_genomic.posedit.pos.end.base = start - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - - # Refresh the :g. variant - multi_g.append(hgvs_alt_genomic) - else: - multi_g.append(hgvs_alt_genomic) - corrective_action_taken = 'false' - - # In this instance, the gap code has generally found an incomplete-alignment rather than a - # truly gapped alignment. - except KeyError: - warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ - 'genomic reference sequence %s' % (hgvs_coding.ac, - alt_chr) - continue - except hgvs.exceptions.HGVSError as e: - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - error = str(te) - logger.error(str(exc_type) + " " + str(exc_value)) - logger.debug(error) - continue + # Break if gap has been detected + if disparity_deletion_in[0] != 'false': + break - if multi_g != []: - print((multi_g, type(multi_g))) + # Normailse hgvs_genomic + try: + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error) and \ + disparity_deletion_in[0] == 'chromosome': + if hgvs_alt_genomic.posedit.edit.type == 'delins': + start = hgvs_alt_genomic.posedit.pos.start.base + end = hgvs_alt_genomic.posedit.pos.end.base + lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb + hgvs_alt_genomic.posedit.pos.start.base = end + hgvs_alt_genomic.posedit.pos.end.base = start + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + if hgvs_alt_genomic.posedit.edit.type == 'del': + start = hgvs_alt_genomic.posedit.pos.start.base + end = hgvs_alt_genomic.posedit.pos.end.base + lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + hgvs_alt_genomic.posedit.edit.alt = lhb + rhb + hgvs_alt_genomic.posedit.pos.start.base = end + hgvs_alt_genomic.posedit.pos.end.base = start + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + + # Refresh the :g. variant + multi_g.append(hgvs_alt_genomic) + else: + multi_g.append(hgvs_alt_genomic) + corrective_action_taken = 'false' - multi_gen_vars = multi_g # '|'.join(multi_g) - else: - multi_gen_vars = [] - else: - # HGVS genomic in the absence of a transcript variant - if genomic_variant != '': - multi_gen_vars = [hgvs_genomic_variant] - else: - multi_gen_vars = [] + # In this instance, the gap code has generally found an incomplete-alignment rather than a + # truly gapped alignment. + except KeyError: + warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ + 'genomic reference sequence %s' % (hgvs_coding.ac, + alt_chr) + continue + except hgvs.exceptions.HGVSError as e: + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + error = str(te) + logger.error(str(exc_type) + " " + str(exc_value)) + logger.debug(error) + continue - # Dictionaries of genomic loci - alt_genomic_dicts = [] - primary_genomic_dicts = {} + if multi_g != []: + print((multi_g, type(multi_g))) - if len(multi_gen_vars) != 0: - for alt_gen_var in multi_gen_vars: - for build in self.genome_builds: - test = vvChromosomes.supported_for_mapping(alt_gen_var.ac, build) - if test == 'true': - try: - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, reverse_normalizer, self.sf) - except hgvs.exceptions.HGVSInvalidVariantError as e: - continue - # Identify primary assembly positions - if re.match('NC_', alt_gen_var.ac): - if re.match('GRC', build): - primary_genomic_dicts[build.lower()] = { - 'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } + multi_gen_vars = multi_g # '|'.join(multi_g) + else: + multi_gen_vars = [] + else: + # HGVS genomic in the absence of a transcript variant + if genomic_variant != '': + multi_gen_vars = [hgvs_genomic_variant] + else: + multi_gen_vars = [] - else: - primary_genomic_dicts[build.lower()] = { - 'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - if build == 'GRCh38': - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - self.sf) - primary_genomic_dicts['hg38'] = { - 'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } + # Dictionaries of genomic loci + alt_genomic_dicts = [] + primary_genomic_dicts = {} - continue + if len(multi_gen_vars) != 0: + for alt_gen_var in multi_gen_vars: + for build in self.genome_builds: + test = vvChromosomes.supported_for_mapping(alt_gen_var.ac, build) + if test == 'true': + try: + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, reverse_normalizer, self.sf) + except hgvs.exceptions.HGVSInvalidVariantError as e: + continue + # Identify primary assembly positions + if re.match('NC_', alt_gen_var.ac): + if re.match('GRC', build): + primary_genomic_dicts[build.lower()] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } else: - if re.match('GRC', build): - dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } + primary_genomic_dicts[build.lower()] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] } - else: - dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } + } + if build == 'GRCh38': + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, + self.sf) + primary_genomic_dicts['hg38'] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] } + } + + continue + + else: + if re.match('GRC', build): + dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } + else: + dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } + # Append + alt_genomic_dicts.append(dict) + + if build == 'GRCh38': + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, + self.sf) + dict = {'hg38': {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } # Append alt_genomic_dicts.append(dict) + continue + else: + # May need to account for ALT NC_ + pass - if build == 'GRCh38': - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - self.sf) - dict = {'hg38': {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } - # Append - alt_genomic_dicts.append(dict) - continue - else: - # May need to account for ALT NC_ - pass + # Warn not directly mapped to specified genome build + if genomic_accession != '': + caution = '' + if primary_assembly.lower() not in list(primary_genomic_dicts.keys()): + warnings = warnings + ': ' + str( + hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + ': See alternative genomic loci or alternative genome builds for aligned genomic positions' + + warn_list = warnings.split(': ') + warnings_out = [] + for warning in warn_list: + warning.strip() + warning = warning.replace("'", "") + if warning == '': + continue + warnings_out.append(warning) + # Remove duplicate elements but maintain the order + seen = {} + no_rep_list = [seen.setdefault(x, x) for x in warnings_out if x not in seen] + warnings_out = no_rep_list + + # Ensure Variants have had the refs removed. + # if not hasattr(posedit, refseqgene_variant): + if refseqgene_variant != '': + try: + refseqgene_variant = fn.valstr(hgvs_refseqgene_variant) + except: + fn.exceptPass() - # Warn not directly mapped to specified genome build - if genomic_accession != '': - caution = '' - if primary_assembly.lower() not in list(primary_genomic_dicts.keys()): - warnings = warnings + ': ' + str( - hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + ': See alternative genomic loci or alternative genome builds for aligned genomic positions' - - warn_list = warnings.split(': ') - warnings_out = [] - for warning in warn_list: - warning.strip() - warning = warning.replace("'", "") - if warning == '': - continue - warnings_out.append(warning) - # Remove duplicate elements but maintain the order - seen = {} - no_rep_list = [seen.setdefault(x, x) for x in warnings_out if x not in seen] - warnings_out = no_rep_list - - # Ensure Variants have had the refs removed. - # if not hasattr(posedit, refseqgene_variant): - if refseqgene_variant != '': + # Add single letter AA code to protein descriptions + predicted_protein_variant_dict = {"tlr": str(predicted_protein_variant), "slr": ''} + if predicted_protein_variant != '': + if not 'Non-coding :n.' in predicted_protein_variant: try: - refseqgene_variant = fn.valstr(hgvs_refseqgene_variant) - except: + format_p = predicted_protein_variant + format_p = re.sub(r'\(LRG_.+?\)', '', format_p) + re_parse_protein = self.hp.parse_hgvs_variant(format_p) + re_parse_protein_singleAA = fn.single_letter_protein(re_parse_protein) + predicted_protein_variant_dict["slr"] = str(re_parse_protein_singleAA) + except hgvs.exceptions.HGVSParseError: fn.exceptPass() + else: + predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) + + # Populate the dictionary + dict_out['submitted_variant'] = submitted + dict_out['gene_symbol'] = gene_symbol + dict_out['transcript_description'] = transcript_description + dict_out['hgvs_transcript_variant'] = tx_variant + dict_out['genome_context_intronic_sequence'] = genome_context_transcript_variant + dict_out['refseqgene_context_intronic_sequence'] = RefSeqGene_context_transcript_variant + dict_out['hgvs_refseqgene_variant'] = refseqgene_variant + dict_out['hgvs_predicted_protein_consequence'] = predicted_protein_variant_dict + dict_out['validation_warnings'] = warnings_out + dict_out['hgvs_lrg_transcript_variant'] = lrg_transcript_variant + dict_out['hgvs_lrg_variant'] = lrg_variant + dict_out['alt_genomic_loci'] = alt_genomic_dicts + dict_out['primary_assembly_loci'] = primary_genomic_dicts + dict_out['reference_sequence_records'] = '' + + # Add links to reference_sequence_records + ref_records = self.db.get_urls(dict_out) + if ref_records != {}: + dict_out['reference_sequence_records'] = ref_records + + # Append to a list for return + batch_out.append(dict_out) - # Add single letter AA code to protein descriptions - predicted_protein_variant_dict = {"tlr": str(predicted_protein_variant), "slr": ''} - if predicted_protein_variant != '': - if not 'Non-coding :n.' in predicted_protein_variant: - try: - format_p = predicted_protein_variant - format_p = re.sub(r'\(LRG_.+?\)', '', format_p) - re_parse_protein = self.hp.parse_hgvs_variant(format_p) - re_parse_protein_singleAA = fn.single_letter_protein(re_parse_protein) - predicted_protein_variant_dict["slr"] = str(re_parse_protein_singleAA) - except hgvs.exceptions.HGVSParseError: - fn.exceptPass() - else: - predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) - - # Populate the dictionary - dict_out['submitted_variant'] = submitted - dict_out['gene_symbol'] = gene_symbol - dict_out['transcript_description'] = transcript_description - dict_out['hgvs_transcript_variant'] = tx_variant - dict_out['genome_context_intronic_sequence'] = genome_context_transcript_variant - dict_out['refseqgene_context_intronic_sequence'] = RefSeqGene_context_transcript_variant - dict_out['hgvs_refseqgene_variant'] = refseqgene_variant - dict_out['hgvs_predicted_protein_consequence'] = predicted_protein_variant_dict - dict_out['validation_warnings'] = warnings_out - dict_out['hgvs_lrg_transcript_variant'] = lrg_transcript_variant - dict_out['hgvs_lrg_variant'] = lrg_variant - dict_out['alt_genomic_loci'] = alt_genomic_dicts - dict_out['primary_assembly_loci'] = primary_genomic_dicts - dict_out['reference_sequence_records'] = '' - - # Add links to reference_sequence_records - ref_records = self.db.get_urls(dict_out) - if ref_records != {}: - dict_out['reference_sequence_records'] = ref_records - - # Append to a list for return - batch_out.append(dict_out) - else: - continue - else: - continue """ Structure the output into dictionaries rather than a list with descriptive keys From b0faa28dd3c1554d77828defe34e8c20c6b00a64 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 18 Mar 2019 16:56:19 +0000 Subject: [PATCH 051/223] Started moving format converters into a seperate file --- VariantValidator/modules/format_converters.py | 522 ++++++++++++++++++ VariantValidator/modules/vvChromosomes.py | 1 + VariantValidator/modules/vvMixinCore.py | 520 ++--------------- 3 files changed, 554 insertions(+), 489 deletions(-) create mode 100644 VariantValidator/modules/format_converters.py diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py new file mode 100644 index 00000000..73c7c0fc --- /dev/null +++ b/VariantValidator/modules/format_converters.py @@ -0,0 +1,522 @@ +import re +import hgvs +from .vvLogging import logger +from .variant import Variant +from . import vvChromosomes +from . import vvFunctions as fn + + +def vcf2hgvs_stage1(variant, validator): + """ + VCF2HGVS stage 1. converts chr-pos-ref-alt into chr:posRef>Alt + The output format is a common mistake caused by inaccurate conversion of + VCF variants into HGVS - hence the need for conversion step 2 + """ + skipvar = False + + if re.search(r'[-:]\d+[-:][GATC]+[-:][GATC]+', variant.quibble): + variant.quibble = variant.quibble.replace(':', '-') + # Extract primary_assembly if provided + if re.match(r'GRCh3\d+-', variant.quibble) or re.match(r'hg\d+-', variant.quibble): + in_list = variant.quibble.split('-') + validator.selected_assembly = in_list[0] + variant.quibble = '-'.join(in_list[1:]) + pre_input = variant.quibble + vcf_elements = pre_input.split('-') + variant.quibble = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) + elif re.search(r'[-:]\d+[-:][GATC]+[-:]', variant.quibble): + variant.quibble = variant.quibble.replace(':', '-') + # Extract primary_assembly if provided + if re.match(r'GRCh3\d+-', variant.quibble) or re.match(r'hg\d+-', variant.quibble): + in_list = variant.quibble.split('-') + validator.selected_assembly = in_list[0] + variant.quibble = '-'.join(in_list[1:]) + pre_input = variant.quibble + vcf_elements = pre_input.split('-') + variant.warnings = 'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + \ + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + \ + pre_input + ' as ALT = REF' + variant.warnings += ': VariantValidator has output both alternatives' + logger.resub('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + + ' as ALT = REF. Validator will output both alternatives.') + variant.write = False + input_A = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], 'del') + input_B = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) + queryA = Variant(variant.original, quibble=input_A, warnings=variant.warnings, + primary_assembly=variant.primary_assembly, order=variant.order) + queryB = Variant(variant.original, quibble=input_B, warnings=variant.warnings, + primary_assembly=variant.primary_assembly, order=variant.order) + validator.batch_list.append(queryA) + validator.batch_list.append(queryB) + skipvar = True + elif re.search(r'[-:]\d+[-:][-:][GATC]+', variant.quibble) or \ + re.search(r'[-:]\d+[-:][.][-:][GATC]+', variant.quibble): + variant.quibble = variant.quibble.replace(':', '-') + if re.search('-.-', variant.quibble): + variant.quibble = variant.quibble.replace('-.-', '-ins-') + if re.search('--', variant.quibble): + variant.quibble = variant.quibble.replace('--', '-ins-') + # Extract primary_assembly if provided + if re.match(r'GRCh3\d+-', variant.quibble) or re.match(r'hg\d+-', variant.quibble): + in_list = variant.quibble.split('-') + selected_assembly = in_list[0] + variant.quibble = '-'.join(in_list[1:]) + pre_input = variant.quibble + vcf_elements = pre_input.split('-') + variant.quibble = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) + + logger.trace("Completed VCF-HVGS step 1", variant) + + return skipvar + + +def vcf2hgvs_stage2(variant, validator): + """ + VCF2HGVS conversion step 2 identifies the correct chromosomal reference + sequence based upon the non compliant identifier e.g. :2099572TC>T. + The data is currently stored in variantanalyser.supported_chromosome_builds. + Anticipated future builds will be transferred to MySQL which can be more + easily updated and maintained. + LRGs and LRG_ts also need to be assigned the correct reference sequence identifier. + The LRG ID data ia stored in the VariantValidator MySQL database. + The reference sequence type is also assigned. + """ + skipvar = False + if re.search(r'\w+\:', variant.quibble) and not re.search(r'\w+\:[gcnmrp]\.', variant.quibble): + if re.search(r'\w+\:[gcnmrp]', variant.quibble) and not re.search(r'\w+\:[gcnmrp]\.', variant.quibble): + # Missing dot + pass + else: + try: + if 'GRCh37' in variant.quibble or 'hg19' in variant.quibble: + variant.primary_assembly = 'GRCh37' + elif 'GRCh38' in variant.quibble or 'hg38' in variant.quibble: + variant.primary_assembly = 'GRCh38' + input_list = variant.quibble.split(':') + pos_ref_alt = str(input_list[1]) + positionAndEdit = input_list[1] + if not re.match(r'N[CGTWMRP]_', variant.quibble) and not re.match(r'LRG_', variant.quibble): + chr_num = str(input_list[0]) + chr_num = chr_num.upper().strip() + if re.match('CHR', chr_num): + chr_num = chr_num.replace('CHR', '') + # Use selected assembly + accession = vvChromosomes.to_accession(chr_num, validator.selected_assembly) + if accession is None: + variant.warnings += ': ' + chr_num + \ + ' is not part of genome build ' + validator.selected_assembly + logger.warning(chr_num + ' is not part of genome build ' + validator.selected_assembly) + skipvar = True + else: + accession = input_list[0] + if '>' in variant.quibble: + if 'del' in variant.quibble: + pos = re.match(r'\d+', pos_ref_alt) + position = pos.group(0) + old_ref, old_alt = pos_ref_alt.split('>') + old_ref = old_ref.replace(position, '') + position = int(position) - 1 + required_base = validator.sf.fetch_seq(accession, start_i=position - 1, end_i=position) + ref = required_base + old_ref + alt = required_base + positionAndEdit = str(position) + ref + '>' + alt + elif 'ins' in variant.quibble: + pos = re.match(r'\d+', pos_ref_alt) + position = pos.group(0) + old_ref, old_alt = pos_ref_alt.split('>') + # old_ref = old_ref.replace(position, '') + position = int(position) - 1 + required_base = validator.sf.fetch_seq(accession, start_i=position - 1, end_i=position) + ref = required_base + alt = required_base + old_alt + positionAndEdit = str(position) + ref + '>' + alt + # Assign reference sequence type + ref_type = validator.db.ref_type_assign(accession) + if re.match('LRG_', accession): + if ref_type == ':g.': + accession = validator.db.get_RefSeqGeneID_from_lrgID(accession) + else: + accession = validator.db.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) + else: + accession = accession + variant.quibble = str(accession) + ref_type + str(positionAndEdit) + + except: + fn.exceptPass(variant) + + # Descriptions lacking the colon : + if re.search(r'[gcnmrp]\.', variant.quibble) and not re.search(r':[gcnmrp]\.', variant.quibble): + error = 'Unable to identify a colon (:) in the variant description %s. A colon is required in HGVS variant ' \ + 'descriptions to separate the reference accession from the reference type i.e. :. ' \ + 'e.g. :c.' % (variant.quibble) + variant.warnings += ': ' + error + logger.warning(error) + skipvar = True + + # Ambiguous chr reference + logger.trace("Completed VCF-HVGS step 2", variant) + + return skipvar + + +def vcf2hgvs_stage3(variant, validator): + """ + VCF2HGVS conversion step 3 is similar to step 2 but handles + formats like Chr16:g.2099572TC>T which are provided by Alamut and other + software + """ + skipvar = False + if re.search(r'\w+:[gcnmrp]\.', variant.quibble) and not re.match(r'N[CGTWMRP]_', variant.quibble): + # Take out lowercase Accession characters + lower_cased_list = variant.quibble.split(':') + if re.search('LRG', lower_cased_list[0], re.IGNORECASE): + lower_case_accession = lower_cased_list[0] + lower_case_accession = lower_case_accession.replace('l', 'L') + lower_case_accession = lower_case_accession.replace('r', 'R') + lower_case_accession = lower_case_accession.replace('g', 'G') + else: + lower_case_accession = lower_cased_list[0] + lower_case_accession = lower_case_accession.upper() + variant.quibble = ''.join(lower_cased_list[1:]) + variant.quibble = lower_case_accession + ':' + variant.quibble + if 'LRG_' not in variant.quibble and 'ENS' not in variant.quibble and not re.match('N[MRPC]_', variant.quibble): + try: + if 'GRCh37' in variant.quibble or 'hg19' in variant.quibble: + variant.primary_assembly = 'GRCh37' + elif 'GRCh38' in variant.quibble or 'hg38' in variant.quibble: + variant.primary_assembly = 'GRCh38' + input_list = variant.quibble.split(':') + query_a_symbol = input_list[0] + is_it_a_gene = validator.db.get_hgnc_symbol(query_a_symbol) + if is_it_a_gene == 'none': + positionAndEdit = input_list[1] + chr_num = str(input_list[0]) + chr_num = chr_num.upper().strip() + if re.match('CHR', chr_num): + chr_num = chr_num.replace('CHR', '') # Use selected assembly + accession = vvChromosomes.to_accession(chr_num, validator.selected_assembly) + if accession is None: + variant.warnings += ': ' + chr_num + \ + ' is not part of genome build ' + validator.selected_assembly + skipvar = True + variant.quibble = str(accession) + ':' + str(positionAndEdit) + except Exception: + fn.exceptPass(variant) + + logger.trace("Completed VCF-HGVS step 3", variant) + return skipvar + + +def gene_symbol_catch(variant, validator, select_transcripts_dict_plus_version): + """ + Searches for gene symbols that have been used as reference sequence + identifiers. Provides a sufficiently repremanding warning, but also provides + correctly formatted variant descriptions with appropriate transcript + reference sequence identifiers i.e. NM_ .... + Note: the output from the function must be validated because VV has no way + of knowing which the users intended reference sequence was, and the exon + boundaries etc of the alternative transcript variants may not be equivalent + """ + skipvar = False + if re.search(r'\w+\:[cn]\.', variant.quibble): + try: + pre_input = variant.quibble.split(':') + query_a_symbol = pre_input[0] + tx_edit = pre_input[1] + is_it_a_gene = validator.db.get_hgnc_symbol(query_a_symbol) + if is_it_a_gene != 'none': + uta_symbol = validator.db.get_uta_symbol(is_it_a_gene) + available_transcripts = validator.hdp.get_tx_for_gene(uta_symbol) + select_from_these_transcripts = [] + for tx in available_transcripts: + if 'NM_' in tx[3] or 'NR_' in tx[3]: + if tx[3] not in select_from_these_transcripts: + select_from_these_transcripts.append(tx[3]) + select_from_these_transcripts = '|'.join(select_from_these_transcripts) + if validator.select_transcripts != 'all': + variant.write = False + for transcript in list(select_transcripts_dict_plus_version.keys()): + variant.warnings = 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + query_a_symbol + ') in place of a valid reference sequence' + refreshed_description = transcript + ':' + tx_edit + query = Variant(variant.original, quibble=refreshed_description, + warnings=variant.warnings, primary_assembly=variant.primary_assembly, + order=variant.order) + validator.batch_list.append(query) + logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + query_a_symbol + ') in place of a valid reference sequence') + else: + variant.warnings += ': ' + 'HGVS variant nomenclature does not allow the use of a gene symbol ('\ + + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + \ + variant.quibble + ' and specify transcripts from the following: ' + \ + 'select_transcripts=' + select_from_these_transcripts + logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + + variant.quibble + ' and specify transcripts from the following: ' + + 'select_transcripts=' + select_from_these_transcripts) + skipvar = True + except: + fn.exceptPass() + logger.trace("Gene symbol reference catching complete", variant) + return skipvar + + +def refseq_catch(variant, validator, select_transcripts_dict_plus_version): + """ + Similar to the GENE_SYMBOL:c. n. types function, but spots RefSeqGene or + Chromosomal reference sequence identifiers used in the context of c. variant + descriptions + """ + skipvar = False + if re.search(r'\w+\:[cn]', variant.quibble): + try: + if variant.quibble.startswith('NG_'): + refSeqGeneID = variant.quibble.split(':')[0] + tx_edit = variant.quibble.split(':')[1] + gene_symbol = validator.db.get_gene_symbol_from_refSeqGeneID(refSeqGeneID) + if gene_symbol != 'none': + uta_symbol = validator.db.get_uta_symbol(gene_symbol) + available_transcripts = validator.hdp.get_tx_for_gene(uta_symbol) + select_from_these_transcripts = [] + for tx in available_transcripts: + if 'NM_' in tx[3] or 'NR_' in tx[3]: + if tx[3] not in select_from_these_transcripts: + select_from_these_transcripts.append(tx[3]) + select_from_these_transcripts = '|'.join(select_from_these_transcripts) + if validator.select_transcripts != 'all': + variant.write = False + for transcript in list(select_transcripts_dict_plus_version.keys()): + variant.warnings = 'NG_:c.PositionVariation descriptions should not be used unless a ' \ + 'transcript reference sequence has also been provided e.g. ' \ + 'NG_(NM_):c.PositionVariation' + refreshed_description = refSeqGeneID + '(' + transcript + ')' + ':' + tx_edit + query = Variant(variant.original, quibble=refreshed_description, + warnings=variant.warnings, primary_assembly=variant.primary_assembly, + order=variant.order) + + logger.resub('NG_:c.PositionVariation descriptions should not be used unless a transcript ' + 'reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation. ' + 'Resubmitting corrected version.') + validator.batch_list.append(query) + else: + variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. ' \ + 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + \ + ' but also specify transcripts from the following: ' + 'select_transcripts='\ + + select_from_these_transcripts + logger.warning('A transcript reference sequence has not been provided e.g. ' + 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + ' but also ' + 'specify transcripts from the following: select_transcripts=' + + select_from_these_transcripts) + skipvar = True + else: + variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. ' \ + 'NG_(NM_):c.PositionVariation' + logger.warning( + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation') + skipvar = True + elif variant.quibble.startswith('NC_'): + variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. ' \ + 'NC_(NM_):c.PositionVariation. Unable to predict available transcripts ' \ + 'because chromosomal position is not specified' + logger.warning( + 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. ' + 'Unable to predict available transcripts because chromosomal position is not specified') + skipvar = True + except: + fn.exceptPass() + + logger.trace("Chromosomal/RefSeqGene reference catching complete", variant) + + return skipvar + + +def vcf2hgvs_stage4(variant, validator, hn): + """ + VCF2HGVS conversion step 4 has two purposes + 1. VCF is frequently inappropriately converted into HGVS like descriptions + such as GGGG>G which is actually a delins, del or ins. The function assigns + the correct edit type + 2. Detects and extracts multiple ALT sequences into HGVS descriptions and + automatically submits them for validation + """ + skipvar = False + not_sub = variant.quibble + not_sub_find = re.compile(r"([GATCgatc]+)>([GATCgatc]+)") + if not_sub_find.search(not_sub): + try: + # If the length of either side of the substitution delimer (>) is >1 + matches = not_sub_find.search(not_sub) + if len(matches.group(1)) > 1 or len(matches.group(2)) > 1 or re.search( + r"([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", variant.quibble): + # Search for and remove range + interval_range = re.compile(r"([0-9]+)_([0-9]+)") + if interval_range.search(not_sub): + m = not_sub_find.search(not_sub) + start = m.group(1) + delete = m.group(2) + beginning_string, middle_string = not_sub.split(':') + middle_string = middle_string.split('_')[0] + end_string = start + '>' + delete + not_sub = beginning_string + ':' + middle_string + end_string + # Split description + split_colon = not_sub.split(':') + ref_ac = split_colon[0] + remainder = split_colon[1] + split_dot = remainder.split('.') + ref_type = split_dot[0] + remainder = split_dot[1] + posedit = remainder + split_greater = remainder.split('>') + insert = split_greater[1] + remainder = split_greater[0] + # Split remainder using matches + r = re.compile(r"([0-9]+)([GATCgatc]+)") + try: + m = r.search(remainder) + start = m.group(1) + delete = m.group(2) + starts = posedit.split(delete)[0] + re_try = ref_ac + ':' + ref_type + '.' + starts + 'del' + delete[0] + 'ins' + insert + hgvs_re_try = validator.hp.parse_hgvs_variant(re_try) + hgvs_re_try.posedit.edit.ref = delete + start_pos = str(hgvs_re_try.posedit.pos.start) + if re.search(r'\-', start_pos): + base, offset = start_pos.split('-') + new_offset = 0 - int(offset) + (len(delete)) + end_pos = int(base) + hgvs_re_try.posedit.pos.end.base = int(end_pos) + hgvs_re_try.posedit.pos.end.offset = int(new_offset) - 1 + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert + elif re.search(r'\+', start_pos): + base, offset = start_pos.split('+') + end_pos = int(base) + (len(delete) - int(offset) - 1) + new_offset = 0 + int(offset) + (len(delete) - 1) + hgvs_re_try.posedit.pos.end.base = int(end_pos) + hgvs_re_try.posedit.pos.end.offset = int(new_offset) + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert + else: + end_pos = int(start_pos) + (len(delete) - 1) + not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( + end_pos) + 'del' + delete + 'ins' + insert + except: + fn.exceptPass() + not_delins = not_sub + # Parse into hgvs object + try: + hgvs_not_delins = validator.hp.parse_hgvs_variant(not_delins) + except hgvs.exceptions.HGVSError as e: + # Sort out multiple ALTS from VCF inputs + if re.search(r"([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): + header, alts = not_delins.split('>') + # Split up the alts into a list + alt_list = alts.split(',') + # Assemble and re-submit + for alt in alt_list: + variant.warnings = 'Multiple ALT sequences detected: auto-submitting all possible combinations' + variant.write = False + refreshed_description = header + '>' + alt + query = Variant(variant.original, quibble=refreshed_description, + warnings=variant.warnings, primary_assembly=variant.primary_assembly, + order=variant.order) + + validator.batch_list.append(query) + logger.resub( + 'Multiple ALT sequences detected. Auto-submitting all possible combinations.') + skipvar = True + else: + error = str(e) + variant.warnings += ': ' + error + logger.warning(str(e)) + skipvar = True + + try: + not_delins = str(hn.normalize(hgvs_not_delins)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('Normalization of intronic variants is not supported', error): + not_delins = not_delins + else: + issue_link = '' + variant.warnings += ': ' + str(error) + logger.warning(str(e)) + skipvar = True + # Create warning + caution = 'Variant description ' + variant.quibble + ' is not HGVS compliant' + automap = variant.quibble + ' automapped to ' + not_delins + variant.warnings += ': ' + automap + # Change input to normalized variant + variant.quibble = not_delins + except: + fn.exceptPass() + logger.trace("Completed VCF-HVGS step 4", variant) + return skipvar + + +def indel_catching(variant, validator): + """ + Warns that descriptions such as c.ins12 or g.del69 are not HGVS compliant + Strips the trailing numbers and tries to parse the description into an + hgvs object. + If parses, provides a warning including links to the VarNomen web page, but + continues validation + If not, an error message is generated and the loop continues + """ + edit_pass = re.compile(r'_\d+$') + edit_fail = re.compile(r'\d+$') + if edit_fail.search(variant.quibble): + if not edit_pass.search(variant.quibble): + failed = variant.quibble + # Catch the trailing digits + digits = re.search(r"(\d+$)", failed) + digits = digits.group(1) + remove = str(digits) + 'end_anchor' + failed = failed + 'end_anchor' + failed = failed.replace(remove, '') + + # Remove them so that the string SHOULD parse + try: + hgvs_failed = validator.hp.parse_hgvs_variant(failed) + except hgvs.exceptions.HGVSError as e: + error = 'The syntax of the input variant description is invalid ' + if failed.endswith('ins'): + issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' + error = error + ' please refer to ' + issue_link + variant.warnings += error + logger.warning(str(error) + " " + str(e)) + return True + + hgvs_failed.posedit.edit = str(hgvs_failed.posedit.edit).replace(digits, '') + failed = str(hgvs_failed) + automap = 'Non HGVS compliant variant description ' + variant.quibble + ' automapped to ' + failed + variant.warnings += ': ' + automap + logger.warning(automap) + variant.quibble = failed + + logger.trace("Ins/Del reference catching complete", variant) + return False + + +def intronic_converter(variant): + """ + Fully HGVS compliant intronic variant descriptions take the format e.g + NG_007400.1(NM_000088.3):c.589-1G>T. However, hgvs cannot parse and map + these variant strings. + This function: + Removes the g. reference sequence + NG_007400.1(NM_000088.3):c.589-1G>T ---> (NM_000088.3):c.589-1G>T + Removes the parintheses + (NM_000088.3):c.589-1G>T ---> NM_000088.3:c.589-1G>T + hgvs can now parse the string into an hgvs variant object and manipulate it + """ + compounder = re.compile(r'\(NM_') + if compounder.search(variant.quibble): + # Find pattern e.g. +0000 and assign to a variable + transy = re.search(r"(NM_.+)", variant.quibble) + transy = transy.group(1) + transy = transy.replace(')', '') + variant.quibble = transy + logger.trace("HVGS typesetting complete", variant) + diff --git a/VariantValidator/modules/vvChromosomes.py b/VariantValidator/modules/vvChromosomes.py index d92b0c54..78b7ee49 100644 --- a/VariantValidator/modules/vvChromosomes.py +++ b/VariantValidator/modules/vvChromosomes.py @@ -1532,6 +1532,7 @@ def to_accession(chr_num, primary_assembly): } # Convert call line to rs line chr_num = chr_num.upper() + chr_accession = None if 'CHR' in chr_num[:3]: chr_num = chr_num[3:] if primary_assembly == 'GRCh37': diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index b157d294..cdf7ae1d 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -53,6 +53,7 @@ from . import vvMixinConverters from .vvFunctions import VariantValidatorError from . import variant +from . import format_converters class Mixin(vvMixinConverters.Mixin): @@ -85,15 +86,18 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # sf = hgvs.dataproviders.seqfetcher.SeqFetcher() primary_assembly=None + self.selected_assembly = selected_assembly + self.select_transcripts = select_transcripts + try: # Validation ############ # Create a dictionary of transcript ID : '' + select_transcripts_dict = {} + select_transcripts_dict_plus_version = {} if select_transcripts != 'all': select_transcripts_list = select_transcripts.split('|') - select_transcripts_dict = {} - select_transcripts_dict_plus_version = {} for id in select_transcripts_list: id = id.strip() if re.match('LRG', id): @@ -159,7 +163,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Bug catcher try: - # Note, ID is not touched. It is always the input variant description. Quibble will be altered but id will not if type = g. + # Note, ID is not touched. It is always the input variant description. + # Quibble will be altered but id will not if type = g. input = my_variant.quibble logger.trace("Commenced validation of " + str(my_variant.quibble), my_variant) @@ -210,506 +215,43 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr boundary = 'false' # VCF type 1 - """ - VCF2HGVS stage 1. converts chr-pos-ref-alt into chr:posRef>Alt - The output format is a common mistake caused by inaccurate conversion of - VCF variants into HGVS - hence the need for conversion step 2 - """ - if re.search(r'[-:]\d+[-:][GATC]+[-:][GATC]+', input): - input = input.replace(':', '-') - # Extract primary_assembly if provided - if re.match(r'GRCh3\d+-', input) or re.match(r'hg\d+-', input): - in_list = input.split('-') - selected_assembly = in_list[0] - input = '-'.join(in_list[1:]) - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) - elif re.search(r'[-:]\d+[-:][GATC]+[-:]', input): - input = input.replace(':', '-') - # Extract primary_assembly if provided - if re.match(r'GRCh3\d+-', input) or re.match(r'hg\d+-', input): - in_list = input.split('-') - selected_assembly = in_list[0] - input = '-'.join(in_list[1:]) - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - my_variant.warnings = 'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF' - my_variant.warnings += ': VariantValidator has output both alternatives' - logger.resub('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + - pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + - ' as ALT = REF. Validator will output both alternatives.') - my_variant.write = False - input_A = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], 'del') - input_B = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) - queryA = variant.Variant(my_variant.original, quibble=input_A, warnings=my_variant.warnings, primary_assembly=primary_assembly, order=ordering) - queryB = variant.Variant(my_variant.original, quibble=input_B, warnings=my_variant.warnings, primary_assembly=primary_assembly, order=ordering) - self.batch_list.append(queryA) - self.batch_list.append(queryB) + toskip = format_converters.vcf2hgvs_stage1(my_variant, self) + if toskip: continue - elif re.search(r'[-:]\d+[-:][-:][GATC]+', input) or re.search(r'[-:]\d+[-:][.][-:][GATC]+', input): - input = input.replace(':', '-') - if re.search('-.-', input): - input = input.replace('-.-', '-ins-') - if re.search('--', input): - input = input.replace('--', '-ins-') - # Extract primary_assembly if provided - if re.match(r'GRCh3\d+-', input) or re.match(r'hg\d+-', input): - in_list = input.split('-') - selected_assembly = in_list[0] - input = '-'.join(in_list[1:]) - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) - stash_input = input - logger.trace("Completed VCF-HVGS step 1", my_variant) + # API type non-HGVS # e.g. Chr16:2099572TC>T - """ - VCF2HGVS conversion step 2 identifies the correct chromosomal reference - sequence based upon the non compliant identifier e.g. :2099572TC>T. - The data is currently stored in variantanalyser.supported_chromosome_builds. - Anticipated future builds will be transferred to MySQL which can be more - easily updated and maintained. - LRGs and LRG_ts also need to be assigned the correct reference sequence identifier. - The LRG ID data ia stored in the VariantValidator MySQL database. - The reference sequence type is also assigned. - """ - if re.search(r'\w+\:', input) and not re.search(r'\w+\:[gcnmrp]\.', input): - if re.search(r'\w+\:[gcnmrp]', input) and not re.search(r'\w+\:[gcnmrp]\.', input): - # Missing dot - pass - else: - try: - if re.search('GRCh37', input) or re.search('hg19', input): - primary_assembly = 'GRCh37' - elif re.search('GRCh38', input) or re.search('hg38', input): - primary_assembly = 'GRCh38' - pre_input = copy.deepcopy(input) - input_list = input.split(':') - pos_ref_alt = str(input_list[1]) - positionAndEdit = input_list[1] - if not re.match(r'N[CGTWMRP]_', input) and not re.match(r'LRG_', input): - chr_num = str(input_list[0]) - chr_num = chr_num.upper() - chr_num = chr_num.strip() - if re.match('CHR', chr_num): - chr_num = chr_num.replace('CHR', '') - # Use selected assembly - accession = vvChromosomes.to_accession(chr_num, selected_assembly) - if accession is None: - my_variant.warnings += ': ' + chr_num + \ - ' is not part of genome build ' + selected_assembly - logger.warning(chr_num + ' is not part of genome build ' + selected_assembly) - continue - else: - accession = input_list[0] - if re.search('>', pre_input): - if re.search('del', pre_input): - pos = re.match(r'\d+', pos_ref_alt) - position = pos.group(0) - old_ref, old_alt = pos_ref_alt.split('>') - old_ref = old_ref.replace(position, '') - position = int(position) - 1 - required_base = self.sf.fetch_seq(accession, start_i=position - 1, end_i=position) - ref = required_base + old_ref - alt = required_base - positionAndEdit = str(position) + ref + '>' + alt - elif re.search('ins', pre_input): - pos = re.match(r'\d+', pos_ref_alt) - position = pos.group(0) - old_ref, old_alt = pos_ref_alt.split('>') - # old_ref = old_ref.replace(position, '') - position = int(position) - 1 - required_base = self.sf.fetch_seq(accession, start_i=position - 1, end_i=position) - ref = required_base - alt = required_base + old_alt - positionAndEdit = str(position) + ref + '>' + alt - # Assign reference sequence type - ref_type = self.db.ref_type_assign(accession) - if re.match('LRG_', accession): - if ref_type == ':g.': - accession = self.db.get_RefSeqGeneID_from_lrgID(accession) - else: - accession = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) - else: - accession = accession - input = str(accession) + ref_type + str(positionAndEdit) - stash_input = input - except: - fn.exceptPass(my_variant) - - # Descriptions lacking the colon : - if re.search(r'[gcnmrp]\.', input) and not re.search(r':[gcnmrp]\.', input): - error = 'Unable to identify a colon (:) in the variant description %s. A colon is required in HGVS variant descriptions to separate the reference accession from the reference type i.e. :. e.g. :c.' % ( - input) - my_variant.warnings += ': ' + error - logger.warning(error) + toskip = format_converters.vcf2hgvs_stage2(my_variant, self) + if toskip: continue - # Ambiguous chr reference - logger.trace("Completed VCF-HVGS step 2", my_variant) - """ - VCF2HGVS conversion step 3 is similar to step 2 but handles - formats like Chr16:g.2099572TC>T which are provided by Alamut and other - software - """ - if re.search(r'\w+:[gcnmrp]\.', input) and not re.match(r'N[CGTWMRP]_', input): - # Take out lowercase Accession characters - lower_cased_list = input.split(':') - if re.search('LRG', lower_cased_list[0], re.IGNORECASE): - lower_case_accession = lower_cased_list[0] - lower_case_accession = lower_case_accession.replace('l', 'L') - lower_case_accession = lower_case_accession.replace('r', 'R') - lower_case_accession = lower_case_accession.replace('g', 'G') - else: - lower_case_accession = lower_cased_list[0] - lower_case_accession = lower_case_accession.upper() - input = ''.join(lower_cased_list[1:]) - input = lower_case_accession + ':' + input - if not re.match('LRG_', input) and not re.match('ENS', input) and not re.match('N[MRPC]_', input): - try: - if re.search('GRCh37', input) or re.search('hg19', input): - primary_assembly = 'GRCh37' - elif re.search('GRCh38', input) or re.search('hg38', input): - primary_assembly = 'GRCh38' - pre_input = copy.deepcopy(input) - input_list = input.split(':') - query_a_symbol = input_list[0] - is_it_a_gene = self.db.get_hgnc_symbol(query_a_symbol) - if is_it_a_gene == 'none': - pos_ref_alt = str(input_list[1]) - positionAndEdit = input_list[1] - chr_num = str(input_list[0]) - chr_num = chr_num.upper() - chr_num = chr_num.strip() - if re.match('CHR', chr_num): - chr_num = chr_num.replace('CHR', '') # Use selected assembly - accession = vvChromosomes.to_accession(chr_num, selected_assembly) - if accession is None: - my_variant.warnings += ': ' + chr_num + \ - ' is not part of genome build ' + selected_assembly - continue - input = str(accession) + ':' + str(positionAndEdit) - stash_input = input - else: - pass - except Exception as e: - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - tbk = [str(exc_type), str(exc_value), str(te)] - er = str('\n'.join(tbk)) - logger.warning(str(exc_type) + " " + str(exc_value)) - logger.debug(er) + toskip = format_converters.vcf2hgvs_stage3(my_variant, self) + if toskip: + continue - # GENE_SYMBOL:c. n. types - logger.trace("Completed VCF-HGVS step 3", my_variant) - """ - Searches for gene symbols that have been used as reference sequence - identifiers. Provides a sufficiently repremanding warning, but also provides - correctly formatted variant descriptions with appropriate transcript - reference sequence identifiers i.e. NM_ .... - Note: the output from the function must be validated because VV has no way - of knowing which the users intended reference sequence was, and the exon - boundaries etc of the alternative transcript variants may not be equivalent - """ - if re.search(r'\w+\:[cn]\.', input): - try: - pre_input = copy.deepcopy(input) - query_a_symbol = pre_input.split(':')[0] - tx_edit = pre_input.split(':')[1] - is_it_a_gene = self.db.get_hgnc_symbol(query_a_symbol) - if is_it_a_gene != 'none': - uta_symbol = self.db.get_uta_symbol(is_it_a_gene) - available_transcripts = self.hdp.get_tx_for_gene(uta_symbol) - select_from_these_transcripts = {} - for tx in available_transcripts: - if re.match('NM_', tx[3]) or re.match('NR_', tx[3]): - if tx[3] not in list(select_from_these_transcripts.keys()): - select_from_these_transcripts[tx[3]] = '' - else: - continue - else: - continue - select_from_these_transcripts = '|'.join(list(select_from_these_transcripts.keys())) - if select_transcripts != 'all': - my_variant.write = False - for transcript in list(select_transcripts_dict_plus_version.keys()): - my_variant.warnings = 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence' - refreshed_description = transcript + ':' + tx_edit - query = variant.Variant(my_variant.original, quibble=refreshed_description, warnings=my_variant.warnings, primary_assembly=primary_assembly, order=ordering) - self.batch_list.append(query) - logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence') - else: - my_variant.warnings += ': ' + 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + input + \ - ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts - logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + input + \ - ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts) - continue - else: - pass - except: - fn.exceptPass() - logger.trace("Gene symbol reference catching complete", my_variant) + toskip = format_converters.gene_symbol_catch(my_variant, self, select_transcripts_dict_plus_version) + if toskip: + continue # NG_:c. or NC_:c. - """ - Similar to the GENE_SYMBOL:c. n. types function, but spots RefSeqGene or - Chromosomal reference sequence identifiers used in the context of c. variant - descriptions - """ - if re.search(r'\w+\:[cn]', input): - try: - if re.match(r'^NG_', input): - refSeqGeneID = input.split(':')[0] - tx_edit = input.split(':')[1] - gene_symbol = self.db.get_gene_symbol_from_refSeqGeneID(refSeqGeneID) - if gene_symbol != 'none': - uta_symbol = self.db.get_uta_symbol(gene_symbol) - available_transcripts = self.hdp.get_tx_for_gene(uta_symbol) - select_from_these_transcripts = {} - for tx in available_transcripts: - if re.match('NM_', tx[3]) or re.match('NR_', tx[3]): - if tx[3] not in list(select_from_these_transcripts.keys()): - select_from_these_transcripts[tx[3]] = '' - else: - continue - else: - continue - select_from_these_transcripts = '|'.join(list(select_from_these_transcripts.keys())) - if select_transcripts != 'all': - my_variant.write = False - for transcript in list(select_transcripts_dict_plus_version.keys()): - my_variant.warnings = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' - refreshed_description = refSeqGeneID + '(' + transcript + ')' + ':' + tx_edit - query = variant.Variant(my_variant.original, quibble=refreshed_description, warnings=my_variant.warnings, primary_assembly=primary_assembly, order=ordering) - - logger.resub( - 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation. Resubmitting corrected version.') - self.batch_list.append(query) - else: - my_variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + input + ' but also specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts - logger.warning( - + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation. Re-submit ' + - str( - input) + ' but also specify transcripts from the following: ' + 'select_transcripts=' + str( - select_from_these_transcripts)) - continue - else: - my_variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation' - logger.warning( - 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation') - continue - elif re.match('^NC_', input): - my_variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified' - logger.warning( - 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. Unable to predict available transripts because chromosomal position is not specified') - continue - else: - pass - except: - fn.exceptPass() + toskip = format_converters.refseq_catch(my_variant, self, select_transcripts_dict_plus_version) + if toskip: + continue - logger.trace("Chromosomal/RefSeqGene reference catching complete", my_variant) # Find not_sub type in input e.g. GGGG>G - """ - VCF2HGVS conversion step 4 has two purposes - 1. VCF is frequently inappropriately converted into HGVS like descriptions - such as GGGG>G which is actually a delins, del or ins. The function assigns - the correct edit type - 2. Detects and extracts multiple ALT sequences into HGVS descriptions and - automatically submits them for validation - """ - not_sub = copy.deepcopy(input) - not_sub_find = re.compile(r"([GATCgatc]+)>([GATCgatc]+)") - if not_sub_find.search(not_sub): - try: - # If the length of either side of the substitution delimer (>) is >1 - matches = not_sub_find.search(not_sub) - if len(matches.group(1)) > 1 or len(matches.group(2)) > 1 or re.search( - r"([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", input): - # Search for and remove range - interval_range = re.compile(r"([0-9]+)_([0-9]+)") - if interval_range.search(not_sub): - m = not_sub_find.search(not_sub) - start = m.group(1) - delete = m.group(2) - beginning_string, middle_string = not_sub.split(':') - middle_string = middle_string.split('_')[0] - end_string = start + '>' + delete - not_sub = beginning_string + ':' + middle_string + end_string - # Split description - split_colon = not_sub.split(':') - ref_ac = split_colon[0] - remainder = split_colon[1] - split_dot = remainder.split('.') - ref_type = split_dot[0] - remainder = split_dot[1] - posedit = remainder - split_greater = remainder.split('>') - insert = split_greater[1] - remainder = split_greater[0] - # Split remainder using matches - r = re.compile(r"([0-9]+)([GATCgatc]+)") - try: - m = r.search(remainder) - start = m.group(1) - delete = m.group(2) - starts = posedit.split(delete)[0] - re_try = ref_ac + ':' + ref_type + '.' + starts + 'del' + delete[0] + 'ins' + insert - hgvs_re_try = self.hp.parse_hgvs_variant(re_try) - hgvs_re_try.posedit.edit.ref = delete - start_pos = str(hgvs_re_try.posedit.pos.start) - if re.search(r'\-', start_pos): - base, offset = start_pos.split('-') - new_offset = 0 - int(offset) + (len(delete)) - end_pos = int(base) - hgvs_re_try.posedit.pos.end.base = int(end_pos) - hgvs_re_try.posedit.pos.end.offset = int(new_offset) - 1 - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert - elif re.search(r'\+', start_pos): - base, offset = start_pos.split('+') - end_pos = int(base) + (len(delete) - int(offset) - 1) - new_offset = 0 + int(offset) + (len(delete) - 1) - hgvs_re_try.posedit.pos.end.base = int(end_pos) - hgvs_re_try.posedit.pos.end.offset = int(new_offset) - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert - else: - end_pos = int(start_pos) + (len(delete) - 1) - not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( - end_pos) + 'del' + delete + 'ins' + insert - except: - fn.exceptPass() - not_delins = not_sub - # Parse into hgvs object - try: - hgvs_not_delins = self.hp.parse_hgvs_variant(not_delins) - except hgvs.exceptions.HGVSError as e: - # Sort out multiple ALTS from VCF inputs - if re.search(r"([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): - header, alts = not_delins.split('>') - # Split up the alts into a list - alt_list = alts.split(',') - # Assemble and re-submit - for alt in alt_list: - my_variant.warnings = 'Multiple ALT sequences detected: auto-submitting all possible combinations' - my_variant.write = False - refreshed_description = header + '>' + alt - query = variant.Variant(my_variant.original, quibble=refreshed_description, warnings=my_variant.warnings, primary_assembly=primary_assembly, order=ordering) - - self.batch_list.append(query) - logger.resub( - 'Multiple ALT sequences detected. Auto-submitting all possible combinations.') - continue - else: - error = str(e) - issue_link = '' - my_variant.warnings += ': ' + error - logger.warning(str(e)) - continue - - # Re-Stash the input as an HGVS - stash_input = copy.copy(hgvs_not_delins) - try: - not_delins = str(hn.normalize(hgvs_not_delins)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('Normalization of intronic variants is not supported', error): - not_delins = not_delins - else: - issue_link = '' - my_variant.warnings += ': ' + str(error) - logger.warning(str(e)) - continue - # Create warning - caution = 'Variant description ' + input + ' is not HGVS compliant' - automap = input + ' automapped to ' + not_delins - my_variant.warnings += ': ' + automap - # Change input to normalized variant - input = not_delins - else: - pass - except: - fn.exceptPass() - else: - pass - logger.trace("Completed VCF-HVGS step 4", my_variant) + toskip = format_converters.vcf2hgvs_stage4(my_variant, self, hn) + if toskip: + continue - # Tackle edit1234 type - """ - Warns that descriptions such as c.ins12 or g.del69 are not HGVS compliant - Strips the trailing numbers and tries to parse the description into an - hgvs object. - If parses, provides a warning including links to the VarNomen web page, but - continues validation - If not, an error message is generated and the loop continues - """ - edit_pass = re.compile(r'_\d+$') - edit_fail = re.compile(r'\d+$') - if edit_fail.search(input): - if edit_pass.search(input): - pass - else: - error = 'false' - issue_link = 'false' - failed = copy.deepcopy(input) - # Catch the trailing digits - digits = re.search(r"(\d+$)", failed) - digits = digits.group(1) - remove = str(digits) + 'end_anchor' - failed = failed + 'end_anchor' - failed = failed.replace(remove, '') - - # Remove them so that the string SHOULD parse - try: - hgvs_failed = self.hp.parse_hgvs_variant(failed) - except hgvs.exceptions.HGVSError as e: - error = str(e) - error = 'The syntax of the input variant description is invalid ' - if re.search(r'ins$', failed): - issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' - error = error + ' please refer to ' + issue_link - my_variant.warnings += error - logger.warning(str(error) + " " + str(e)) - continue + input = my_variant.quibble - hgvs_failed.posedit.edit = str(hgvs_failed.posedit.edit).replace(digits, '') - failed = str(hgvs_failed) - hgvs_failed = self.hp.parse_hgvs_variant(failed) - automap = 'Non HGVS compliant variant description ' + input + ' automapped to ' + failed - my_variant.warnings += ': ' + automap - logger.warning(automap) - input = failed + toskip = format_converters.indel_catching(my_variant, self) + if toskip: + continue - logger.trace("Ins/Del reference catching complete", my_variant) # Tackle compound variant descriptions NG or NC (NM_) i.e. correctly input NG/NC_(NM_):c. - """ - Fully HGVS compliant intronic variant descriptions take the format e.g - NG_007400.1(NM_000088.3):c.589-1G>T. However, hgvs cannot parse and map - these variant strings. - This function: - Removes the g. reference sequence - NG_007400.1(NM_000088.3):c.589-1G>T ---> (NM_000088.3):c.589-1G>T - Removes the parintheses - (NM_000088.3):c.589-1G>T ---> NM_000088.3:c.589-1G>T - hgvs can now parse the string into an hgvs variant object and manipulate it - """ - caution = '' - compounder = re.compile(r'\(NM_') - compounder_b = re.compile(r'\(ENST') - if compounder.search(input): - # Find pattern e.g. +0000 and assign to a variable - transy = re.search(r"(NM_.+)", input) - transy = transy.group(1) - transy = transy.replace(')', '') - input = transy - logger.trace("HVGS typesetting complete", my_variant) + format_converters.intronic_converter(my_variant) + # Extract variants from HGVS allele descriptions # http://varnomen.hgvs.org/recommendations/DNA/variant/alleles/ """ From ff08c031d8c332379bab468b81efc3005800067c Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 19 Mar 2019 15:43:57 +0000 Subject: [PATCH 052/223] Moved more format converters into seperate file and cleaned up the code running in validator method --- VariantValidator/modules/format_converters.py | 109 ++++++ VariantValidator/modules/variant.py | 79 ++++- VariantValidator/modules/vvMixinCore.py | 319 ++++++------------ 3 files changed, 284 insertions(+), 223 deletions(-) diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index 73c7c0fc..31c59edb 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -520,3 +520,112 @@ def intronic_converter(variant): variant.quibble = transy logger.trace("HVGS typesetting complete", variant) + +def allele_parser(variant, validation, hn): + """ + HGVS allele string parsing function Occurance #1 + Takes a single HGVS allele description and separates each allele into a + list of HGVS variants. The variants are then automatically submitted for + validation. + Note: In this context, it is inappropriate to validate descriptions + containing intronic variant descriptions. In such instances, allele + descriptions should be re-submitted by the user at the gene or genome level + """ + caution = '' + if (re.search(r':[gcnr].\[', variant.quibble) and re.search(r'\;', variant.quibble)) or ( + re.search(r':[gcrn].\d+\[', variant.quibble) and re.search(r'\;', variant.quibble)) or (re.search(r'\(\;\)', variant.quibble)): + # handle LRG inputs + if re.match(r'^LRG', variant.quibble): + if re.match(r'^LRG\d+', variant.quibble): + string, remainder = variant.quibble.split(':') + reference = string.replace('LRG', 'LRG_') + variant.quibble = reference + ':' + remainder + caution = string + ' updated to ' + reference + if not re.match(r'^LRG_\d+', variant.quibble): + pass + elif re.match(r'^LRG_\d+:g.', variant.quibble) or re.match(r'^LRG_\d+:p.', variant.quibble) or re.match(r'^LRG_\d+:c.', + variant.quibble) or re.match( + r'^LRG_\d+:n.', variant.quibble): + lrg_reference, variation = variant.quibble.split(':') + refseqgene_reference = validation.db.get_RefSeqGeneID_from_lrgID(lrg_reference) + if refseqgene_reference != 'none': + variant.quibble = refseqgene_reference + ':' + variation + if caution == '': + caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + else: + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + variant.warnings += ': ' + str(caution) + logger.warning(str(caution)) + elif re.match(r'^LRG_\d+t\d+:c.', variant.quibble) or re.match(r'^LRG_\d+t\d+:n.', variant.quibble) or re.match( + r'^LRG_\d+t\d+:p.', variant.quibble) or re.match(r'^LRG_\d+t\d+:g.', variant.quibble): + lrg_reference, variation = variant.quibble.split(':') + refseqtranscript_reference = validation.db.get_RefSeqTranscriptID_from_lrgTranscriptID( + lrg_reference) + if refseqtranscript_reference != 'none': + variant.quibble = refseqtranscript_reference + ':' + variation + if caution == '': + caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + else: + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + variant.warnings += ': ' + str(caution) + logger.warning(str(caution)) + else: + pass + try: + # Submit to allele extraction function + alleles = validation.hgvs_alleles(variant.quibble, hn) + variant.warnings += ': ' + 'Automap has extracted possible variant descriptions' + logger.resub('Automap has extracted possible variant descriptions, resubmitting') + for allele in alleles: + query = Variant(variant.original, quibble=allele, warnings=variant.warnings, write=True, + primary_assembly=variant.primary_assembly, order=variant.order) + validation.batch_list.append(query) + variant.write = False + return True + except fn.alleleVariantError as e: + if re.search("Cannot validate sequence of an intronic variant", str(e)): + variant.warnings += ': ' + 'Intronic positions not supported for HGVS Allele descriptions' + logger.warning('Intronic positions not supported for HGVS Allele descriptions') + return True + elif re.search("No transcript definition for ", str(e)): + variant.warnings += ': ' + str(e) + logger.warning(str(e)) + return True + else: + raise fn.VariantValidatorError(str(e)) + logger.trace("HVGS String allele parsing pass 1 complete", variant) + return False + + +def lrg_to_refseq(variant, validator): + """ + LRG and LRG_t reference sequence identifiers need to be replaced with + equivalent RefSeq identifiers. The lookup data is stored in the + VariantValidator MySQL database + """ + caution = '' + if variant.refsource == 'LRG': + if re.match(r'^LRG\d+', variant.hgvs_formatted.ac): + reference = variant.hgvs_formatted.ac.replace('LRG', 'LRG_') + caution = variant.hgvs_formatted.ac + ' updated to ' + reference + ': ' + variant.hgvs_formatted.ac = reference + variant.set_quibble(str(variant.hgvs_formatted)) + + if re.match(r'^LRG_\d+t\d+:', variant.quibble): + lrg_reference, variation = variant.quibble.split(':') + refseqtrans_reference = validator.db.get_RefSeqTranscriptID_from_lrgTranscriptID(lrg_reference) + if refseqtrans_reference != 'none': + variant.hgvs_formatted.ac = refseqtrans_reference + variant.set_quibble(str(variant.hgvs_formatted)) + caution += lrg_reference + ':' + variation + ' automapped to ' + refseqtrans_reference + ':' + variation + variant.warnings += ': ' + caution + logger.warning(caution) + elif re.match(r'^LRG_\d+:', variant.quibble): + lrg_reference, variation = variant.quibble.split(':') + refseqgene_reference = validator.db.get_RefSeqGeneID_from_lrgID(lrg_reference) + if refseqgene_reference != 'none': + variant.hgvs_formatted.ac = refseqgene_reference + variant.set_quibble(str(variant.hgvs_formatted)) + caution += lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + variant.warnings += ': ' + caution + logger.warning(caution) \ No newline at end of file diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index fec081b4..41d5ebf7 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -1,5 +1,5 @@ import re -import string +from . import vvFunctions as fn class Variant(object): @@ -14,7 +14,7 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.quibble = original else: self.quibble = quibble - self.hgvs_formatted = original + self.hgvs_formatted = None self.warnings = warnings self.description = '' @@ -32,6 +32,9 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.timing = {} + self.refsource = None + self.reftype = None + def is_ascii(self): """ Instead of the previous test for unicode rich text characters. @@ -68,3 +71,75 @@ def remove_whitespace(self): """ self.quibble = ''.join(self.quibble.split()) + def format_quibble(self): + """ + Removes whitespace from the ends of the string + Removes anything in brackets + Identifies variant type (p. c. etc) + Accepts c, g, n, r currently. And now P also 15.07.15 + """ + # Set regular expressions for if statements + pat_gene = re.compile(r'\(.+?\)') # Pattern looks for (....) + + if pat_gene.search(self.quibble): + self.quibble = pat_gene.sub('', self.quibble) + + try: + self.set_refsource() + except fn.VariantValidatorError: + return True + + try: + self.set_reftype() + except fn.VariantValidatorError: + return True + + return False + + def set_reftype(self): + """ + Method will set the reftype based on the quibble + :return: + """ + pat_est = re.compile(r'\d\:\d') + + if ':g.' in self.quibble: + self.reftype = ':g.' + elif ':r.' in self.quibble: + self.reftype = ':r.' + elif ':n.' in self.quibble: + self.reftype = ':n.' + elif ':c.' in self.quibble: + self.reftype = ':c.' + elif ':p.' in self.quibble: + self.reftype = ':p.' + elif ':m.' in self.quibble: + self.reftype = ':m.' + elif pat_est.search(self.quibble): + self.reftype = 'est' + else: + raise fn.VariantValidatorError("Unable to identity reference type from %s" % self.quibble) + + def set_refsource(self): + """ + Method will set the refsource based on the quibble + :return: + """ + if self.quibble.startswith('LRG'): + self.refsource = 'LRG' + elif self.quibble.startswith('ENS'): + self.refsource = 'ENS' + elif self.quibble.startswith('N'): + self.refsource = 'RefSeq' + else: + raise fn.VariantValidatorError("Unable to identify reference source from %s" % self.quibble) + + def set_quibble(self, newval): + """ + Method will set the quibble and reset the refsource and reftype + :param newval: + :return: + """ + self.quibble = newval + self.set_refsource() + self.set_reftype() diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index cdf7ae1d..d8e555d8 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -202,10 +202,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if primary_assembly in self.genome_builds: my_variant.primary_assembly = primary_assembly else: + my_variant.primary_assembly = 'GRCh38' primary_assembly = 'GRCh38' my_variant.warnings += ': Invalid genome build has been specified. Automap has selected the default build (GRCh38)' logger.warning( - 'Invalid genome build has been specified. Automap has selected the default build ' + primary_assembly) + 'Invalid genome build has been specified. Automap has selected the default build ' + my_variant.primary_assembly) else: primary_assembly = my_variant.primary_assembly logger.trace("Completed string formatting", my_variant) @@ -243,8 +244,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if toskip: continue - input = my_variant.quibble - toskip = format_converters.indel_catching(my_variant, self) if toskip: continue @@ -254,86 +253,31 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Extract variants from HGVS allele descriptions # http://varnomen.hgvs.org/recommendations/DNA/variant/alleles/ - """ - HGVS allele string parsing function Occurance #1 - Takes a single HGVS allele description and separates each allele into a - list of HGVS variants. The variants are then automatically submitted for - validation. - Note: In this context, it is inappropriate to validate descriptions - containing intronic variant descriptions. In such instances, allele - descriptions should be re-submitted by the user at the gene or genome level - """ - if (re.search(r':[gcnr].\[', input) and re.search(r'\;', input)) or ( - re.search(r':[gcrn].\d+\[', input) and re.search(r'\;', input)) or (re.search(r'\(\;\)', input)): - # handle LRG inputs - if re.match(r'^LRG', input): - if re.match(r'^LRG\d+', input): - string, remainder = input.split(':') - reference = string.replace('LRG', 'LRG_') - input = reference + ':' + remainder - caution = string + ' updated to ' + reference - if not re.match(r'^LRG_\d+', input): - pass - elif re.match(r'^LRG_\d+:g.', input) or re.match(r'^LRG_\d+:p.', input) or re.match(r'^LRG_\d+:c.', - input) or re.match( - r'^LRG_\d+:n.', input): - lrg_reference, variation = input.split(':') - refseqgene_reference = self.db.get_RefSeqGeneID_from_lrgID(lrg_reference) - if refseqgene_reference != 'none': - input = refseqgene_reference + ':' + variation - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - my_variant.warnings += ': ' + str(caution) - logger.warning(str(caution)) - elif re.match(r'^LRG_\d+t\d+:c.', input) or re.match(r'^LRG_\d+t\d+:n.', input) or re.match( - r'^LRG_\d+t\d+:p.', input) or re.match(r'^LRG_\d+t\d+:g.', input): - lrg_reference, variation = input.split(':') - refseqtranscript_reference = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID( - lrg_reference) - if refseqtranscript_reference != 'none': - input = refseqtranscript_reference + ':' + variation - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - my_variant.warnings += ': ' + str(caution) - logger.warning(str(caution)) - else: - pass - try: - # Submit to allele extraction function - alleles = self.hgvs_alleles(input,hn) - my_variant.warnings += ': ' + 'Automap has extracted possible variant descriptions' - logger.resub('Automap has extracted possible variant descriptions, resubmitting') - for allele in alleles: - query = variant.Variant(my_variant.original, quibble=allele, warnings=my_variant.warnings, write=True, primary_assembly=my_variant.primary_assembly, order=ordering) - coding = 'intergenic' - self.batch_list.append(query) - my_variant.write = False - continue - except fn.alleleVariantError as e: - if re.search("Cannot validate sequence of an intronic variant", str(e)): - my_variant.warnings += ': ' + 'Intronic positions not supported for HGVS Allele descriptions' - logger.warning('Intronic positions not supported for HGVS Allele descriptions') - continue - elif re.search("No transcript definition for ",str(e)): - my_variant.warnings += ': ' + str(e) - logger.warning(str(e)) - continue - else: - raise VariantValidatorError(str(e)) - logger.trace("HVGS String allele parsing pass 1 complete", my_variant) + toskip = format_converters.allele_parser(my_variant, self, hn) + if toskip: + continue + + input = my_variant.quibble + + print("Original: %s" % my_variant.original) + print("Quibble: %s" % my_variant.quibble) + + caution = '' # INITIAL USER INPUT FORMATTING - """ - Removes whitespace from the ends of the string - Removes anything in brackets - Identifies variant type - Returns a dictionary containing the formatted input string and the variant type - Accepts c, g, n, r currently - """ - formatted = fn.user_input(input) + invalid = my_variant.format_quibble() + if invalid: + if re.search(r'\w+\:[gcnmrp]', my_variant.quibble) and not re.search(r'\w+\:[gcnmrp]\.', my_variant.quibble): + error = 'Variant description ' + my_variant.quibble + ' lacks the . character between and in the expected pattern :.' + else: + error = 'Variant description ' + my_variant.quibble + ' is not in an accepted format' + my_variant.warnings += ': ' + error + logger.warning(error) + continue + + formatted_variant = my_variant.quibble + input = my_variant.quibble + stash_input = my_variant.quibble + format_type = my_variant.reftype # Validator specific variables, note, not all will be necessary for batch, but keep to ensure that batch works # vars = [] @@ -349,28 +293,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # cr_available = 'false' # rcmds_tab = 'false' - # Check the initial validity of the input - if formatted == 'invalid': - if re.search(r'\w+\:[gcnmrp]', input) and not re.search(r'\w+\:[gcnmrp]\.', input): - error = 'Variant description ' + input + ' lacks the . character between and in the expected pattern :.' - else: - error = 'Variant description ' + input + ' is not in an accepted format' - my_variant.warnings += ': ' + error - logger.warning(error) - continue - else: - formatted_variant = formatted['variant'] - input = formatted['variant'] - stash_input = formatted['variant'] - format_type = formatted['type'] logger.trace("Variant input formatted, proceeding to validate.", my_variant) + # Conversions - """ - Conversions are not currently supported. The HGVS format for conversions - is rarely seen wrt genomic sequencing data and needs to be re-evaluated - """ - conversion = re.compile('con') - if conversion.search(formatted_variant): + # Conversions are not currently supported. The HGVS format for conversions + # is rarely seen wrt genomic sequencing data and needs to be re-evaluated + if 'con' in my_variant.quibble: my_variant.warnings += ': ' + 'Gene conversions currently unsupported' logger.warning('Gene conversions currently unsupported') continue @@ -389,59 +317,60 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: input_parses = self.hp.parse_hgvs_variant(formatted_variant) + print(input_parses, input_parses.ac, type(input_parses.ac)) + my_variant.hgvs_formatted = input_parses except hgvs.exceptions.HGVSError as e: - error = str(e) - if error == 'false': - if 'LRG' in input_parses.ac: - input_parses.ac.replace('T', 't') - else: - input_parses.ac = input_parses.ac.upper() - if hasattr(input_parses.posedit.edit, 'alt'): - if input_parses.posedit.edit.alt is not None: - input_parses.posedit.edit.alt = input_parses.posedit.edit.alt.upper() - if hasattr(input_parses.posedit.edit, 'ref'): - if input_parses.posedit.edit.ref is not None: - input_parses.posedit.edit.ref = input_parses.posedit.edit.ref.upper() - formatted_variant = str(input_parses) - input = str(input_parses) - pass - else: - my_variant.warnings += ': ' + str(error) + my_variant.warnings += ': ' + str(e) logger.warning(error) continue - """ - ENST support needs to be re-evaluated, but is very low priority - ENST not supported by ACMG and is under review by HGVS - """ - if re.match('^ENST', str(input_parses)): - trap_ens_in = str(input_parses) - sim_tx = self.hdp.get_similar_transcripts(input_parses.ac) + if 'LRG' in my_variant.hgvs_formatted.ac: + my_variant.hgvs_formatted.ac.replace('T', 't') + else: + my_variant.hgvs_formatted.ac = my_variant.hgvs_formatted.ac.upper() + if hasattr(my_variant.hgvs_formatted.posedit.edit, 'alt'): + if my_variant.hgvs_formatted.posedit.edit.alt is not None: + my_variant.hgvs_formatted.posedit.edit.alt = my_variant.hgvs_formatted.posedit.edit.alt.upper() + if hasattr(my_variant.hgvs_formatted.posedit.edit, 'ref'): + if my_variant.hgvs_formatted.posedit.edit.ref is not None: + my_variant.hgvs_formatted.posedit.edit.ref = my_variant.hgvs_formatted.posedit.edit.ref.upper() + formatted_variant = str(my_variant.hgvs_formatted) + input = str(my_variant.hgvs_formatted) + + my_variant.set_quibble(str(my_variant.hgvs_formatted)) + + # ENST support needs to be re-evaluated, but is very low priority + # ENST not supported by ACMG and is under review by HGVS + if my_variant.refsource == 'ENS': + trap_ens_in = str(my_variant.hgvs_formatted) + sim_tx = self.hdp.get_similar_transcripts(my_variant.hgvs_formatted.ac) for line in sim_tx: - if str(line[2]) == 'True' and str(line[3]) == 'True' and str(line[4]) == 'True' and str( - line[5]) == 'True' and str(line[6]) == 'True': - input_parses.ac = (line[1]) - input = str(input_parses) - formatted_variant = input + print(line) + if line[2] and line[3] and line[4] and line[5] and line[6]: + print("RESET") + my_variant.hgvs_formatted.ac = line[1] + my_variant.set_quibble(str(my_variant.hgvs_formatted)) + formatted_variant = my_variant.quibble break - if re.match('^ENST', str(input_parses)): - error = 'Unable to map ' + str(input_parses.ac) + ' to an equivalent RefSeq transcript' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) + if my_variant.refsource == 'ENS': + error = 'Unable to map ' + my_variant.hgvs_formatted.ac + ' to an equivalent RefSeq transcript' + my_variant.warnings += ': ' + error + logger.warning(error) continue else: - my_variant.warnings += ': ' + str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + formatted_variant - logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + formatted_variant) + my_variant.warnings += ': ' + str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + my_variant.quibble + logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + my_variant.quibble) logger.trace("HVGS acceptance test passed", my_variant) + # Check whether supported genome build is requested for non g. descriptions historic_assembly = 'false' mapable_assemblies = { - 'GRCh37': 'true', - 'GRCh38': 'true', - 'NCBI36': 'false' + 'GRCh37': True, + 'GRCh38': True, + 'NCBI36': False } is_mapable = mapable_assemblies.get(primary_assembly) - if is_mapable == 'true': + if is_mapable: # These objects cannot be moved outside of the main function because they gather data from the # iuser input e.g. alignment method and genome build @@ -473,19 +402,18 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = 'Mapping of ' + formatted_variant + ' to genome assembly ' + primary_assembly + ' is not supported' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) + my_variant.warnings += ': ' + error + logger.warning(error) continue + # Catch interval end > interval start - """ - hgvs did/does not handle 3' UTR position ordering well. This function - ensures that end pos is not > start pos wrt 3' UTRs. - Also identifies some variants which span into the downstream sequence - i.e. out of bounds - """ + # hgvs did/does not handle 3' UTR position ordering well. This function + # ensures that end pos is not > start pos wrt 3' UTRs. + # Also identifies some variants which span into the downstream sequence + # i.e. out of bounds astr = re.compile(r'\*') - if astr.search(str(input_parses.posedit)): - input_parses_copy = copy.deepcopy(input_parses) + if '*' in str(my_variant.hgvs_formatted.posedit): + input_parses_copy = copy.deepcopy(my_variant.hgvs_formatted) input_parses_copy.type = "c" # Map to n. position # Create easy variant mapper (over variant mapper) and splign locked evm @@ -496,86 +424,36 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: if to_n.posedit.pos.end.base < to_n.posedit.pos.start.base: error = 'Interval end position < interval start position ' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) + my_variant.warnings += ': ' + error + logger.warning(error) continue - elif input_parses.posedit.pos.end.base < input_parses.posedit.pos.start.base: - error = 'Interval end position ' + str( - input_parses.posedit.pos.end.base) + ' < interval start position ' + str( - input_parses.posedit.pos.start.base) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) + elif my_variant.hgvs_formatted.posedit.pos.end.base < my_variant.hgvs_formatted.posedit.pos.start.base: + error = 'Interval end position ' + str(my_variant.hgvs_formatted.posedit.pos.end.base) + \ + ' < interval start position ' + str(my_variant.hgvs_formatted.posedit.pos.start.base) + my_variant.warnings += ': ' + error + logger.warning(error) continue - else: - pass # Catch missing version number in refseq ref_type = re.compile(r"^N\w\w\d") is_version = re.compile(r"\d\.\d") en_type = re.compile(r'^ENS') lrg_type = re.compile(r'LRG') - if (ref_type.search(str(input_parses)) and is_version.search(str(input_parses))) or ( - en_type.search(str(input_parses))): - pass - else: - if lrg_type.search(str(input_parses)): - pass - if ref_type.search(str(input_parses)): - error = 'RefSeq variant accession numbers MUST include a version number' - my_variant.warnings += ': ' + str(error) - continue + if my_variant.refsource == 'RefSeq' and not is_version.search(str(my_variant.hgvs_formatted)): + error = 'RefSeq variant accession numbers MUST include a version number' + my_variant.warnings += ': ' + str(error) + continue logger.trace("HVGS interval/version mapping complete", my_variant) # handle LRG inputs - """ - LRG and LRG_t reference sequence identifiers need to be replaced with - equivalent RefSeq identifiers. The lookup data is stored in the - VariantValidator MySQL database - """ - if re.match(r'^LRG', str(input_parses)): - if re.match(r'^LRG\d+', str(input_parses.ac)): - string = str(input_parses.ac) - reference = string.replace('LRG', 'LRG_') - input_parses.ac = reference - caution = string + ' updated to ' + reference - if not re.match(r'^LRG_\d+', str(input_parses)): - pass - elif re.match(r'^LRG_\d+:g.', str(input_parses)) or re.match(r'^LRG_\d+:p.', - str(input_parses)) or re.match( - r'^LRG_\d+:c.', str(input_parses)) or re.match(r'^LRG_\d+:n.', str(input_parses)): - lrg_reference, variation = str(input_parses).split(':') - refseqgene_reference = self.db.get_RefSeqGeneID_from_lrgID(lrg_reference) - if refseqgene_reference != 'none': - input_parses.ac = refseqgene_reference - formatted_variant = str(input_parses) - input = str(input_parses) - stash_input = input - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - my_variant.warnings += ': ' + str(caution) - logger.warning(str(caution)) - elif re.match(r'^LRG_\d+t\d+:c.', str(input_parses)) or re.match(r'^LRG_\d+t\d+:n.', - str(input_parses)) or re.match( - r'^LRG_\d+t\d+:p.', str(input_parses)) or re.match(r'^LRG_\d+t\d+:g.', str(input_parses)): - lrg_reference, variation = str(input_parses).split(':') - refseqtranscript_reference = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID( - lrg_reference) - if refseqtranscript_reference != 'none': - input_parses.ac = refseqtranscript_reference - formatted_variant = str(input_parses) - input = str(input_parses) - stash_input = input - if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation - my_variant.warnings += ': ' + str(caution) - logger.warning(str(caution)) - else: - pass - logger.trace("LRG check for conversion to refseq completed", my_variant) + + if my_variant.refsource == 'LRG': + format_converters.lrg_to_refseq(my_variant, self) + formatted_variant = my_variant.quibble + input = str(my_variant.hgvs_formatted) + stash_input = input + logger.trace("LRG check for conversion to refseq completed", my_variant) + # Additional Incorrectly input variant capture training """ Evolving list of common mistakes, see sections below @@ -7365,7 +7243,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue if multi_g != []: - print((multi_g, type(multi_g))) multi_gen_vars = multi_g # '|'.join(multi_g) else: From 4b1df3ce3801da0bdeab4715b6f9d85d51371444 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 20 Mar 2019 16:59:18 +0000 Subject: [PATCH 053/223] Moved normalizers into Variant obj but kept old copies during move over --- VariantValidator/modules/format_converters.py | 8 +- VariantValidator/modules/variant.py | 7 ++ VariantValidator/modules/vvMixinCore.py | 81 ++++++++----------- 3 files changed, 46 insertions(+), 50 deletions(-) diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index 31c59edb..f5062857 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -331,7 +331,7 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version): return skipvar -def vcf2hgvs_stage4(variant, validator, hn): +def vcf2hgvs_stage4(variant, validator): """ VCF2HGVS conversion step 4 has two purposes 1. VCF is frequently inappropriately converted into HGVS like descriptions @@ -433,7 +433,7 @@ def vcf2hgvs_stage4(variant, validator, hn): skipvar = True try: - not_delins = str(hn.normalize(hgvs_not_delins)) + not_delins = str(variant.hn.normalize(hgvs_not_delins)) except hgvs.exceptions.HGVSError as e: error = str(e) if re.search('Normalization of intronic variants is not supported', error): @@ -521,7 +521,7 @@ def intronic_converter(variant): logger.trace("HVGS typesetting complete", variant) -def allele_parser(variant, validation, hn): +def allele_parser(variant, validation): """ HGVS allele string parsing function Occurance #1 Takes a single HGVS allele description and separates each allele into a @@ -573,7 +573,7 @@ def allele_parser(variant, validation, hn): pass try: # Submit to allele extraction function - alleles = validation.hgvs_alleles(variant.quibble, hn) + alleles = validation.hgvs_alleles(variant.quibble, variant.hn) variant.warnings += ': ' + 'Automap has extracted possible variant descriptions' logger.resub('Automap has extracted possible variant descriptions, resubmitting') for allele in alleles: diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 41d5ebf7..a499733b 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -35,6 +35,13 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.refsource = None self.reftype = None + self.hn = None + self.reverse_normalizer = None + self.evm = None + self.no_norm_evm = None + self.min_evm = None + self.lose_vm = None + def is_ascii(self): """ Instead of the previous test for unicode rich text characters. diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index d8e555d8..5d6be33f 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -54,6 +54,7 @@ from .vvFunctions import VariantValidatorError from . import variant from . import format_converters +from . import use_checking class Mixin(vvMixinConverters.Mixin): @@ -145,17 +146,26 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.traceStart(my_variant) # Create Normalizers + my_variant.hn = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=3, + alt_aln_method=alt_aln_method + ) hn = hgvs.normalizer.Normalizer(self.hdp, cross_boundaries=False, shuffle_direction=3, alt_aln_method=alt_aln_method ) + my_variant.reverse_normalizer = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=5, + alt_aln_method=alt_aln_method + ) reverse_normalizer = hgvs.normalizer.Normalizer(self.hdp, cross_boundaries=False, shuffle_direction=5, alt_aln_method=alt_aln_method ) - # This will be used to order the final output if not my_variant.order: ordering = ordering + 1 @@ -240,7 +250,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # Find not_sub type in input e.g. GGGG>G - toskip = format_converters.vcf2hgvs_stage4(my_variant, self, hn) + toskip = format_converters.vcf2hgvs_stage4(my_variant, self) if toskip: continue @@ -253,7 +263,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Extract variants from HGVS allele descriptions # http://varnomen.hgvs.org/recommendations/DNA/variant/alleles/ - toskip = format_converters.allele_parser(my_variant, self, hn) + toskip = format_converters.allele_parser(my_variant, self) if toskip: continue @@ -377,6 +387,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # They initiate quickly, so no need to move them unnecessarily # Create easy variant mapper (over variant mapper) and splign locked evm + my_variant.evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, + assembly_name=primary_assembly, + alt_aln_method=alt_aln_method, + normalize=True, + replace_reference=True + ) + evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, assembly_name=primary_assembly, alt_aln_method=alt_aln_method, @@ -385,6 +402,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ) # Setup a reverse normalize instance and non-normalize evm + my_variant.no_norm_evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, + assembly_name=primary_assembly, + alt_aln_method=alt_aln_method, + normalize=False, + replace_reference=True + ) no_norm_evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, assembly_name=primary_assembly, alt_aln_method=alt_aln_method, @@ -393,7 +416,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ) # Create a specific minimal evm with no normalizer and no replace_reference - min_evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, + my_variant.min_evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, assembly_name=primary_assembly, alt_aln_method=alt_aln_method, normalize=False, @@ -455,50 +478,16 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.trace("LRG check for conversion to refseq completed", my_variant) # Additional Incorrectly input variant capture training - """ - Evolving list of common mistakes, see sections below - """ - # NM_ .g - if (re.search(r'^NM_', formatted_variant) or re.search(r'^NR_', formatted_variant)) and re.search(r':g.', formatted_variant): - suggestion = input.replace(':g.', ':c.') - error = 'Transcript reference sequence input as genomic (g.) reference sequence. Did you mean ' + suggestion + '?' - my_variant.warnings += ': ' + error - logger.warning(error) - continue - # NR_ c. - if re.search(r'^NR_', input) and re.search(r':c.', input): - suggestion = input.replace(':c.', ':n.') - error = 'Non-coding transcript reference sequence input as coding (c.) reference sequence. Did you mean ' + suggestion + '?' - my_variant.warnings += ': ' + error - logger.warning(error) - continue - # NM_ n. - if re.search(r'^NM_', input) and re.search(r':n.', input): - suggestion = input.replace(':n.', ':c.') - error = 'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. Did you mean ' + suggestion + '?' - my_variant.warnings += ': ' + error - logger.warning(error) - continue - - # NM_ NC_ NG_ NR_ p. - if (re.search(r'^NM_', formatted_variant) or re.search(r'^NR_', formatted_variant) or re.search(r'^NC_', formatted_variant) or re.search( - r'^NG_', formatted_variant)) and re.search(r':p.', formatted_variant): - issue_link = 'http://varnomen.hgvs.org/recommendations/protein/' - error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' - my_variant.warnings += ': ' + error - logger.warning(error) - continue - - # NG_ c or NC_c.. - if (re.search(r'^NG_', formatted_variant) or re.search(r'^NC_', formatted_variant)) and re.search(r':c.', formatted_variant): - suggestion = ': For additional assistance, submit ' + str(formatted_variant) + ' to VariantValidator' - error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation' + suggestion - my_variant.warnings += ': ' + error - logger.warning(error) - continue + if my_variant.refsource == 'RefSeq': + toskip = use_checking.refseq_common_mistakes(my_variant) + if toskip: + continue + logger.trace("Passed 'common mistakes' catcher", my_variant) - logger.trace("Passed 'common mistakes' catcher", my_variant) # Primary validation of the input + toskip = use_checking.structure_checks(my_variant, self) + print(toskip, my_variant.hgvs_formatted, my_variant.quibble) + """ An evolving set of variant structure and content searches which identify and warn users about inappropriate use of HGVS From 976375e14ee223d7349e5dfe2b323f4e69d56c48 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 20 Mar 2019 17:00:40 +0000 Subject: [PATCH 054/223] Created new file with usage checks --- VariantValidator/modules/use_checking.py | 666 +++++++++++++++++++++++ 1 file changed, 666 insertions(+) create mode 100644 VariantValidator/modules/use_checking.py diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py new file mode 100644 index 00000000..2de6e543 --- /dev/null +++ b/VariantValidator/modules/use_checking.py @@ -0,0 +1,666 @@ +import re +import hgvs +from . import vvFunctions as fn +from .vvLogging import logger +import copy + + +def refseq_common_mistakes(variant): + """ + Evolving list of common mistakes, see sections below + """ + # NM_ .g + if (variant.quibble.startswith('NM_') or variant.quibble.startswith('NR_')) and variant.reftype == ':g.': + suggestion = variant.quibble.replace(':g.', ':c.') + error = 'Transcript reference sequence input as genomic (g.) reference sequence. ' \ + 'Did you mean ' + suggestion + '?' + variant.warnings += ': ' + error + logger.warning(error) + return True + # NR_ c. + if variant.quibble.startswith('NR_') and variant.reftype == ':c.': + suggestion = variant.quibble.replace(':c.', ':n.') + error = 'Non-coding transcript reference sequence input as coding (c.) reference sequence. ' \ + 'Did you mean ' + suggestion + '?' + variant.warnings += ': ' + error + logger.warning(error) + return True + # NM_ n. + if variant.quibble.startswith('NM_') and variant.reftype == ':n.': + suggestion = variant.quibble.replace(':n.', ':c.') + error = 'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. ' \ + 'Did you mean ' + suggestion + '?' + variant.warnings += ': ' + error + logger.warning(error) + return True + + # NM_ NC_ NG_ NR_ p. + if (variant.quibble.startswith('NM_') or variant.quibble.startswith('NR_') or variant.quibble.startswith('NC_') or + variant.quibble.startswith('NG_')) and variant.reftype == ':p.': + issue_link = 'http://varnomen.hgvs.org/recommendations/protein/' + error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is ' \ + 'not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' + variant.warnings += ': ' + error + logger.warning(error) + return True + + # NG_ c or NC_c.. + if (variant.quibble.startswith('NG_') or variant.quibble.startswith('NC_')) and variant.reftype == ':c.': + suggestion = ': For additional assistance, submit ' + str(variant.quibble) + ' to VariantValidator' + error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has ' \ + 'also been provided e.g. NG_(NM_):c.PositionVariation' + suggestion + variant.warnings += ': ' + error + logger.warning(error) + return True + + return False + + +def structure_checks(variant, validator): + """ + An evolving set of variant structure and content searches which identify + and warn users about inappropriate use of HGVS + Primarily, this code filters out variants that cannot realistically be + auto corrected and will cause the downstream functions to return errors + """ + input_parses = validator.hp.parse_hgvs_variant(variant.quibble) + if input_parses.type == 'g': + check = structure_checks_g(variant, validator) + if check: + return True + + elif input_parses.type == 'c': + check = structure_checks_c(variant, validator) + if check: + return True + + elif input_parses.type == 'n': + check = structure_checks_n(variant, validator) + if check: + return True + else: + pass + + +def structure_checks_g(variant, validator): + """ + Structure checks for when reftype is genomic + """ + if not variant.quibble.startswith('NC_') and not variant.quibble.startswith('NG_') \ + and not variant.quibble.startswith('NT_') and not variant.quibble.startswith('NW_'): + error = 'Invalid reference sequence identifier (' + variant.hgvs_formatted.ac + ')' + variant.warnings += ': ' + str(error) + logger.warning(error) + return True + + try: + validator.vr.validate(variant.hgvs_formatted) + except Exception as e: + error = str(e) + variant.warnings += ': ' + str(error) + logger.warning(error) + return True + + # Additional test + try: + variant.hn.normalize(variant.hgvs_formatted) + except hgvs.exceptions.HGVSError as e: + error = str(e) + variant.warnings += ': ' + str(error) + logger.warning(error) + return True + + return False + + +def structure_checks_c(variant, validator): + """ + structure checks for when reftype is coding + :param variant: + :param validator: + :param hn: + :return: + """ + + if '*' in str(variant.hgvs_formatted) or 'c.-' in str(variant.hgvs_formatted): + # Catch variation in UTRs + # These should be in the sequence so can be directly validated. Need to pass to n. + try: + validator.vr.validate(variant.hgvs_formatted) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if 'datums is ill-defined' in error: + called_ref = variant.hgvs_formatted.posedit.edit.ref + try: + to_n = variant.evm.c_to_n(variant.hgvs_formatted) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + variant.warnings += ': ' + error + logger.warning(error) + return True + actual_ref = to_n.posedit.edit.ref + if called_ref != actual_ref: + error = 'Variant reference (' + called_ref + ') does not agree with reference sequence ' \ + '(' + actual_ref + ')' + variant.warnings += ': ' + error + logger.warning(error) + return True + else: + variant.hgvs_formatted.posedit.edit.ref = '' + else: + if 'bounds' in error or 'intronic variant' in error: + try: + variant.hn.normalize(variant.hgvs_formatted) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + + if 'bounds' in error: + try: + identity_info = validator.hdp.get_tx_identity_info(variant.hgvs_formatted.ac) + ref_start = identity_info[3] + ref_end = identity_info[4] + if '-' in str(variant.hgvs_formatted.posedit.pos.start) and variant.hgvs_formatted.posedit.pos.start.offset == 0: + # upstream positions + boundary = -ref_start + remainder = variant.hgvs_formatted.posedit.pos.start.base - boundary + variant.hgvs_formatted.posedit.pos.start.base = boundary + variant.hgvs_formatted.posedit.pos.start.offset = remainder + if '-' in str(variant.hgvs_formatted.posedit.pos.end) and variant.hgvs_formatted.posedit.pos.end.offset == 0: + boundary = -ref_start + remainder = variant.hgvs_formatted.posedit.pos.end.base - boundary + variant.hgvs_formatted.posedit.pos.end.base = boundary + variant.hgvs_formatted.posedit.pos.end.offset = remainder + if '*' in str(variant.hgvs_formatted.posedit.pos.start) and variant.hgvs_formatted.posedit.pos.start.offset == 0: + # downstream positions + tot_end_pos = str(variant.hgvs_formatted.posedit.pos.start).replace('*', '') + ts_seq = validator.sf.fetch_seq(variant.hgvs_formatted.ac) + boundary = len(ts_seq) - ref_end + variant.hgvs_formatted.posedit.pos.start.base = boundary + offset = int(tot_end_pos) - boundary + variant.hgvs_formatted.posedit.pos.start.offset = offset + if '*' in str(variant.hgvs_formatted.posedit.pos.end) and variant.hgvs_formatted.posedit.pos.end.offset == 0: + tot_end_pos = str(variant.hgvs_formatted.posedit.pos.end).replace('*', '') + ts_seq = validator.sf.fetch_seq(variant.hgvs_formatted.ac) + boundary = len(ts_seq) - ref_end + variant.hgvs_formatted.posedit.pos.end.base = boundary + offset = int(tot_end_pos) - boundary + variant.hgvs_formatted.posedit.pos.end.offset = offset + + # Create a lose vm instance + variant.lose_vm = hgvs.variantmapper.VariantMapper(validator.hdp, + replace_reference=True, + prevalidation_level=None + ) + + report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + error = 'Using a transcript reference sequence to specify a variant position that lies ' \ + 'outside of the reference sequence is not HGVS-compliant: ' \ + 'Instead use ' + fn.valstr(report_gen) + except Exception: + fn.exceptPass() + variant.warnings += ': ' + error + logger.warning(error) + return True + + try: + variant.hgvs_formatted = variant.evm.c_to_n(variant.hgvs_formatted) + except hgvs.exceptions.HGVSError as e: + error = str(e) + variant.warnings += ': ' + error + logger.warning(e) + return True + + if 'n.1-' in str(variant.hgvs_formatted): + input_parses = variant.evm.n_to_c(variant.hgvs_formatted) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the ' \ + 'reference sequence is not HGVS-compliant. Instead use ' + genomic_position = validator.myevm_t_to_g(input_parses, variant.no_norm_evm, variant.primary_assembly, + variant.hn) + error = error + fn.valstr(genomic_position) + variant.warnings += ': ' + error + logger.warning(error) + return True + + # Re-map input_parses back to c. variant + variant.hgvs_formatted = variant.evm.n_to_c(variant.hgvs_formatted) + + # Intronic positions in UTRs + if re.search(r'\d\-\d', str(variant.hgvs_formatted)) or re.search(r'\d\+\d', str(variant.hgvs_formatted)): + # Can we go c-g-c + try: + to_genome = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + to_tx = variant.evm.g_to_t(to_genome, variant.hgvs_formatted.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + error = str(e) + if 'bounds' in error: + try: + identity_info = validator.hdp.get_tx_identity_info(variant.hgvs_formatted.ac) + ref_start = identity_info[3] + ref_end = identity_info[4] + if '-' in str(variant.hgvs_formatted.posedit.pos.start): + # upstream positions + boundary = -ref_start + remainder = variant.hgvs_formatted.posedit.pos.start.base - boundary + variant.hgvs_formatted.posedit.pos.start.base = boundary + variant.hgvs_formatted.posedit.pos.start.offset = remainder + if '-' in str(variant.hgvs_formatted.posedit.pos.end): + boundary = -ref_start + remainder = variant.hgvs_formatted.posedit.pos.end.base - boundary + variant.hgvs_formatted.posedit.pos.end.base = boundary + variant.hgvs_formatted.posedit.pos.end.offset = remainder + if '*' in str(variant.hgvs_formatted.posedit.pos.start): + # downstream positions + tot_end_pos = str(variant.hgvs_formatted.posedit.pos.start).replace('*', '') + ts_seq = validator.sf.fetch_seq(variant.hgvs_formatted.ac) + boundary = len(ts_seq) - ref_end + variant.hgvs_formatted.posedit.pos.start.base = boundary + te1, te2 = tot_end_pos.split('+') + tot_end_pos = int(te1) + int(te2) + offset = tot_end_pos - boundary + variant.hgvs_formatted.posedit.pos.start.offset = offset + if '*' in str(variant.hgvs_formatted.posedit.pos.end): + tot_end_pos = str(variant.hgvs_formatted.posedit.pos.end).replace('*', '') + ts_seq = validator.sf.fetch_seq(variant.hgvs_formatted.ac) + boundary = len(ts_seq) - ref_end + variant.hgvs_formatted.posedit.pos.end.base = boundary + te1, te2 = tot_end_pos.split('+') + tot_end_pos = int(te1) + int(te2) + offset = tot_end_pos - boundary + variant.hgvs_formatted.posedit.pos.end.offset = offset + + report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + error = 'Using a transcript reference sequence to specify a variant position that lies ' \ + 'outside of the reference sequence is not HGVS-compliant. Instead use '\ + + fn.valstr(report_gen) + except Exception: + fn.exceptPass() + variant.warnings += ': ' + error + logger.warning(error) + return True + + except hgvs.exceptions.HGVSDataNotAvailableError as e: + error = str(e) + if 'Alignment is incomplete' in error: + e_list = error.split('~') + gens = [] + for el in e_list: + el_l = el.split('/') + if el_l[-1] == '': + continue + gens.append(el_l[-1]) + acs = '; '.join(gens) + error = 'Cannot map ' + fn.valstr(variant.hgvs_formatted) + ' to a genomic position. '\ + + variant.hgvs_formatted.ac + ' can only be partially aligned to genomic reference ' \ + 'sequences ' + acs + variant.warnings += ': ' + error + logger.warning(error) + return True + + elif re.search(r'\d-', str(variant.hgvs_formatted)) or re.search(r'\d\+', str(variant.hgvs_formatted)): + # Quick look at syntax validation + try: + validator.vr.validate(variant.hgvs_formatted) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if 'bounds' in error: + try: + report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + else: + error = 'Using a transcript reference sequence to specify a variant position that lies outside of '\ + 'the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr(report_gen) + variant.warnings += ': ' + error + logger.warning(error) + return True + elif 'insertion length must be 1' in error: + variant.warnings += ': ' + error + logger.warning(error) + return True + elif 'base start position must be <= end position' in error: + correction = copy.deepcopy(variant.hgvs_formatted) + st = variant.hgvs_formatted.posedit.pos.start + ed = variant.hgvs_formatted.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + variant.warnings += ': ' + error + logger.warning(error) + return True + + # Create a specific minimal evm with no normalizer and no replace_reference + # Have to use this method due to potential multi chromosome error, note normalizes but does not replace sequence + try: + output = validator.noreplace_myevm_t_to_g(variant.hgvs_formatted, variant.evm, validator.hdp, + variant.primary_assembly, validator.vm, variant.hn, validator.hp, + validator.sf, variant.no_norm_evm) + except hgvs.exceptions.HGVSDataNotAvailableError: + tx_ac = variant.hgvs_formatted.ac + try: + gene_symbol = validator.db.get_gene_symbol_from_transcriptID(tx_ac) + except: + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ + 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ + 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' + else: + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ + 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' \ + + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative ' \ + 'genome build' + variant.warnings += ': ' + error + logger.warning(error) + return True + except ValueError as e: + error = str(e) + if '> end' in error: + error = 'Interval start position ' + str(variant.hgvs_formatted.posedit.pos.start) + ' > interval end '\ + 'position ' + str(variant.hgvs_formatted.posedit.pos.end) + variant.warnings += ': ' + error + logger.warning(error) + return True + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if 'base start position must be <= end position' in error: + # correction = copy.deepcopy(variant.hgvs_formatted) + # st = variant.hgvs_formatted.posedit.pos.start + # ed = variant.hgvs_formatted.posedit.pos.end + # correction.posedit.pos.start = ed + # correction.posedit.pos.end = st + # error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str(variant.hgvs_formatted.posedit.pos.start) + ' > interval end' \ + ' position ' + str(variant.hgvs_formatted.posedit.pos.end) + variant.warnings += ': ' + error + logger.warning(error) + return True + else: + variant.warnings += ': ' + error + logger.warning(error) + return True + + try: + variant.evm.g_to_t(output, variant.hgvs_formatted.ac) + except hgvs.exceptions.HGVSError as e: + error = str(e) + variant.warnings += ': ' + error + logger.warning(error) + return True + + try: + validator.vr.validate(output) + except hgvs.exceptions.HGVSError as e: + error = str(e) + variant.warnings += ': ' + error + logger.warning(error) + return True + + else: + # All other variation + try: + validator.vr.validate(variant.hgvs_formatted) + except hgvs.exceptions.HGVSUnsupportedOperationError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + # This catches errors in introns + if 'base start position must be <= end position' in error: + # correction = variant.hgvs_formatted + # st = variant.hgvs_formatted.posedit.pos.start + # ed = variant.hgvs_formatted.posedit.pos.end + # correction.posedit.pos.start = ed + # correction.posedit.pos.end = st + # error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str(variant.hgvs_formatted.posedit.pos.start) + ' > interval end '\ + 'position ' + str(variant.hgvs_formatted.posedit.pos.end) + variant.warnings += ': ' + error + logger.warning(error) + return True + + except hgvs.exceptions.HGVSDataNotAvailableError as e: + error = str(e) + variant.warnings += ': ' + error + logger.warning(error) + return True + except hgvs.exceptions.HGVSError as e: + error = str(e) + if 'bounds' in error: + error += ' (' + variant.hgvs_formatted.ac + ')' + variant.warnings += ': ' + error + logger.warning(error) + return True + return False + + +def structure_checks_n(variant, validator): + """ + structure checks for reftype nucleotide + :param variant: + :param validator: + :return: + """ + if '+' in str(variant.hgvs_formatted) or '-' in str(variant.hgvs_formatted): + # Catch variation in UTRs + # These should be in the sequence so can be directly validated. Need to pass to n. + try: + validator.vr.validate(variant.hgvs_formatted) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('intronic variant', error): + pass + elif re.search('datums is ill-defined', error): + called_ref = variant.hgvs_formatted.posedit.edit.ref + to_n = variant.evm.c_to_n(variant.hgvs_formatted) + actual_ref = to_n.posedit.edit.ref + if called_ref != actual_ref: + error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + else: + variant.hgvs_formatted.posedit.edit.ref = '' + formatted_variant = str(variant.hgvs_formatted) + + elif re.search('base must be >=1 for datum = SEQ_START or CDS_END', error): + error = 'The given coordinate is outside the bounds of the reference sequence.' + + try: + if re.match('-', str(variant.hgvs_formatted.posedit.pos.start)): + # upstream positions + boundary = 1 + remainder = int(str(variant.hgvs_formatted.posedit.pos.start)) - boundary + remainder = remainder + 1 + variant.hgvs_formatted.posedit.pos.start.base = boundary + variant.hgvs_formatted.posedit.pos.start.offset = remainder + if re.match('-', str(variant.hgvs_formatted.posedit.pos.end)): + boundary = 1 + remainder = int(str(variant.hgvs_formatted.posedit.pos.end)) - boundary + remainder = remainder + 1 + variant.hgvs_formatted.posedit.pos.end.base = boundary + variant.hgvs_formatted.posedit.pos.end.offset = remainder + report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, + variant.hn) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( + report_gen) + except Exception as e: + fn.exceptPass() + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + else: + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + + if re.search(r'n.1-', str(variant.hgvs_formatted)): + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + genomic_position = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, + variant.hn) + error = error + fn.valstr(genomic_position) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + else: + pass + + if re.search(r'\d\-', str(variant.hgvs_formatted)) or re.search(r'\d\+', str(variant.hgvs_formatted)): + # Quick look at syntax validation + try: + validator.vr.validate(variant.hgvs_formatted) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if re.search('bounds', error): + try: + report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, + variant.hn) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + else: + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( + report_gen) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + elif re.search('insertion length must be 1', error): + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + elif re.search('base start position must be <= end position', error): + correction = copy.deepcopy(variant.hgvs_formatted) + st = variant.hgvs_formatted.posedit.pos.start + ed = variant.hgvs_formatted.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + elif re.search('Cannot validate sequence of an intronic variant', error): + try: + test_g = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, + variant.hn) + back_to_n = variant.evm.g_to_t(test_g, variant.hgvs_formatted.ac) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('bounds', error): + report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( + report_gen) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + else: + fn.exceptPass() + + # Create a specific minimal evm with no normalizer and no replace_reference + # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence + try: + output = validator.noreplace_myevm_t_to_g(variant.hgvs_formatted, variant.evm, validator.hdp, variant.primary_assembly, validator.vm, variant.hn, + validator.hp, validator.sf, variant.no_norm_evm) + except hgvs.exceptions.HGVSDataNotAvailableError as e: + tx_ac = variant.hgvs_formatted.ac + try: + gene_symbol = validator.db.get_gene_symbol_from_transcriptID(tx_ac) + except: + gene_symbol = 'none' + if gene_symbol == 'none': + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + else: + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + except ValueError as e: + error = str(e) + if re.search('> end', error): + error = 'Interval start position ' + str( + variant.hgvs_formatted.posedit.pos.start) + ' > interval end position ' + str( + variant.hgvs_formatted.posedit.pos.end) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if re.search('base start position must be <= end position', error): + correction = copy.deepcopy(variant.hgvs_formatted) + st = variant.hgvs_formatted.posedit.pos.start + ed = variant.hgvs_formatted.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str( + variant.hgvs_formatted.posedit.pos.start) + ' > interval end position ' + str( + variant.hgvs_formatted.posedit.pos.end) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + try: + validator.vr.validate(output) + except hgvs.exceptions.HGVSError as e: + error = str(e) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + + else: + # All other variation + try: + validator.vr.validate(variant.hgvs_formatted) + except hgvs.exceptions.HGVSUnsupportedOperationError: + + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + """ + if re.search('Length implied by coordinates', error): + # Applies to del and inv + # NOTE, there has been no normalization at all so this error is valid here + my_variant.warnings += ': ' + str(error) + continue + # Will apply to > del and inv + if re.search('does not agree with reference sequence', error): + my_variant.warnings += ': ' + str(error) + continue + # ensures x_y for insertions + if re.search('insertion length must be 1', error): + my_variant.warnings += ': ' + str(error) + continue + # Boundary issue + if re.search('Variant coordinate is out of the bound of CDS region', error): + my_variant.warnings += ': ' + str(error) + continue + """ + # This catches errors in introns + if re.search('base start position must be <= end position', error): + correction = copy.deepcopy(variant.hgvs_formatted) + st = variant.hgvs_formatted.posedit.pos.start + ed = variant.hgvs_formatted.posedit.pos.end + correction.posedit.pos.start = ed + correction.posedit.pos.end = st + error = error + ': Did you mean ' + str(correction) + '?' + error = 'Interval start position ' + str( + variant.hgvs_formatted.posedit.pos.start) + ' > interval end position ' + str( + variant.hgvs_formatted.posedit.pos.end) + logger.warning(str(error)) + variant.warnings += ': ' + str(error) + return True + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + except hgvs.exceptions.HGVSDataNotAvailableError as e: + error = e + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('bounds', error): + error = error + ' (' + variant.hgvs_formatted.ac + ')' + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + return False \ No newline at end of file From 5c0ebd3abe860afb33a45455095523ca1b5c2d23 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 8 Apr 2019 11:51:19 +0100 Subject: [PATCH 055/223] Cleaned use_checking n variants --- VariantValidator/modules/use_checking.py | 159 +++++++++++------------ 1 file changed, 76 insertions(+), 83 deletions(-) diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index 2de6e543..2f3f6d93 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -448,9 +448,9 @@ def structure_checks_n(variant, validator): validator.vr.validate(variant.hgvs_formatted) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('intronic variant', error): + if 'intronic variant' in error: pass - elif re.search('datums is ill-defined', error): + elif 'datums is ill-defined' in error: called_ref = variant.hgvs_formatted.posedit.edit.ref to_n = variant.evm.c_to_n(variant.hgvs_formatted) actual_ref = to_n.posedit.edit.ref @@ -463,20 +463,20 @@ def structure_checks_n(variant, validator): variant.hgvs_formatted.posedit.edit.ref = '' formatted_variant = str(variant.hgvs_formatted) - elif re.search('base must be >=1 for datum = SEQ_START or CDS_END', error): + elif 'base must be >=1 for datum = SEQ_START or CDS_END' in error: error = 'The given coordinate is outside the bounds of the reference sequence.' try: - if re.match('-', str(variant.hgvs_formatted.posedit.pos.start)): + if '-' in str(variant.hgvs_formatted.posedit.pos.start): # upstream positions boundary = 1 - remainder = int(str(variant.hgvs_formatted.posedit.pos.start)) - boundary + remainder = variant.hgvs_formatted.posedit.pos.start.base - boundary remainder = remainder + 1 variant.hgvs_formatted.posedit.pos.start.base = boundary variant.hgvs_formatted.posedit.pos.start.offset = remainder - if re.match('-', str(variant.hgvs_formatted.posedit.pos.end)): + if '-' in str(variant.hgvs_formatted.posedit.pos.end): boundary = 1 - remainder = int(str(variant.hgvs_formatted.posedit.pos.end)) - boundary + remainder = variant.hgvs_formatted.posedit.pos.end.base - boundary remainder = remainder + 1 variant.hgvs_formatted.posedit.pos.end.base = boundary variant.hgvs_formatted.posedit.pos.end.offset = remainder @@ -484,34 +484,32 @@ def structure_checks_n(variant, validator): variant.hn) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) - except Exception as e: + except Exception: fn.exceptPass() - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True else: - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True - if re.search(r'n.1-', str(variant.hgvs_formatted)): + if 'n.1-' in str(variant.hgvs_formatted): error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' genomic_position = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, variant.hn) error = error + fn.valstr(genomic_position) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True - else: - pass - if re.search(r'\d\-', str(variant.hgvs_formatted)) or re.search(r'\d\+', str(variant.hgvs_formatted)): + if re.search(r'\d-', str(variant.hgvs_formatted)) or re.search(r'\d\+', str(variant.hgvs_formatted)): # Quick look at syntax validation try: validator.vr.validate(variant.hgvs_formatted) except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) - if re.search('bounds', error): + if 'bounds' in error: try: report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, variant.hn) @@ -520,14 +518,14 @@ def structure_checks_n(variant, validator): else: error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True - elif re.search('insertion length must be 1', error): - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + elif 'insertion length must be 1' in error: + variant.warnings += ': ' + error + logger.warning(error) return True - elif re.search('base start position must be <= end position', error): + elif 'base start position must be <= end position' in error: correction = copy.deepcopy(variant.hgvs_formatted) st = variant.hgvs_formatted.posedit.pos.start ed = variant.hgvs_formatted.posedit.pos.end @@ -535,26 +533,24 @@ def structure_checks_n(variant, validator): correction.posedit.pos.end = st error = error + ': Did you mean ' + str(correction) + '?' # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True - elif re.search('Cannot validate sequence of an intronic variant', error): + elif 'Cannot validate sequence of an intronic variant' in error: try: test_g = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, variant.hn) back_to_n = variant.evm.g_to_t(test_g, variant.hgvs_formatted.ac) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('bounds', error): + if 'bounds' in error: report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, variant.hn) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True - else: - fn.exceptPass() # Create a specific minimal evm with no normalizer and no replace_reference # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence @@ -566,26 +562,26 @@ def structure_checks_n(variant, validator): try: gene_symbol = validator.db.get_gene_symbol_from_transcriptID(tx_ac) except: - gene_symbol = 'none' - if gene_symbol == 'none': + gene_symbol = None + if gene_symbol is None: error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' else: error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True except ValueError as e: error = str(e) - if re.search('> end', error): + if '> end' in error: error = 'Interval start position ' + str( variant.hgvs_formatted.posedit.pos.start) + ' > interval end position ' + str( variant.hgvs_formatted.posedit.pos.end) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) - if re.search('base start position must be <= end position', error): + if 'base start position must be <= end position' in error: correction = copy.deepcopy(variant.hgvs_formatted) st = variant.hgvs_formatted.posedit.pos.start ed = variant.hgvs_formatted.posedit.pos.end @@ -595,15 +591,15 @@ def structure_checks_n(variant, validator): error = 'Interval start position ' + str( variant.hgvs_formatted.posedit.pos.start) + ' > interval end position ' + str( variant.hgvs_formatted.posedit.pos.end) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True try: validator.vr.validate(output) except hgvs.exceptions.HGVSError as e: error = str(e) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True else: @@ -611,56 +607,53 @@ def structure_checks_n(variant, validator): try: validator.vr.validate(variant.hgvs_formatted) except hgvs.exceptions.HGVSUnsupportedOperationError: - fn.exceptPass() except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) - """ - if re.search('Length implied by coordinates', error): - # Applies to del and inv - # NOTE, there has been no normalization at all so this error is valid here - my_variant.warnings += ': ' + str(error) - continue - # Will apply to > del and inv - if re.search('does not agree with reference sequence', error): - my_variant.warnings += ': ' + str(error) - continue - # ensures x_y for insertions - if re.search('insertion length must be 1', error): - my_variant.warnings += ': ' + str(error) - continue - # Boundary issue - if re.search('Variant coordinate is out of the bound of CDS region', error): - my_variant.warnings += ': ' + str(error) - continue - """ + # if re.search('Length implied by coordinates', error): + # # Applies to del and inv + # # NOTE, there has been no normalization at all so this error is valid here + # my_variant.warnings += ': ' + str(error) + # continue + # # Will apply to > del and inv + # if re.search('does not agree with reference sequence', error): + # my_variant.warnings += ': ' + str(error) + # continue + # # ensures x_y for insertions + # if re.search('insertion length must be 1', error): + # my_variant.warnings += ': ' + str(error) + # continue + # # Boundary issue + # if re.search('Variant coordinate is out of the bound of CDS region', error): + # my_variant.warnings += ': ' + str(error) + # continue # This catches errors in introns - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(variant.hgvs_formatted) - st = variant.hgvs_formatted.posedit.pos.start - ed = variant.hgvs_formatted.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' + if 'base start position must be <= end position' in error: + # correction = copy.deepcopy(variant.hgvs_formatted) + # st = variant.hgvs_formatted.posedit.pos.start + # ed = variant.hgvs_formatted.posedit.pos.end + # correction.posedit.pos.start = ed + # correction.posedit.pos.end = st + # error = error + ': Did you mean ' + str(correction) + '?' error = 'Interval start position ' + str( variant.hgvs_formatted.posedit.pos.start) + ' > interval end position ' + str( variant.hgvs_formatted.posedit.pos.end) - logger.warning(str(error)) - variant.warnings += ': ' + str(error) + logger.warning(error) + variant.warnings += ': ' + error return True - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = e - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + error = str(e) + variant.warnings += ': ' + error + logger.warning(error) return True except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('bounds', error): + if 'bounds' in error: error = error + ' (' + variant.hgvs_formatted.ac + ')' - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings += ': ' + error + logger.warning(error) return True - return False \ No newline at end of file + return False From fdb837978faee1c2b839e46b4648bb97f7f8a0a0 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 9 Apr 2019 16:33:50 +0100 Subject: [PATCH 056/223] Added new files to hold mappers, gap mapping code and transcript annotating --- VariantValidator/modules/collect_info.py | 152 ++ VariantValidator/modules/format_converters.py | 102 +- VariantValidator/modules/gapped_mapping.py | 949 ++++++++ VariantValidator/modules/mappers.py | 174 ++ VariantValidator/modules/variant.py | 5 +- VariantValidator/modules/vvMixinCore.py | 1996 +---------------- 6 files changed, 1416 insertions(+), 1962 deletions(-) create mode 100644 VariantValidator/modules/collect_info.py create mode 100644 VariantValidator/modules/gapped_mapping.py create mode 100644 VariantValidator/modules/mappers.py diff --git a/VariantValidator/modules/collect_info.py b/VariantValidator/modules/collect_info.py new file mode 100644 index 00000000..2376f079 --- /dev/null +++ b/VariantValidator/modules/collect_info.py @@ -0,0 +1,152 @@ +import re +import hgvs +from .vvLogging import logger +from . import vvHGVS +from .variant import Variant +from . import vvChromosomes +from . import vvFunctions as fn +from . import gapped_mapping + + +def collect_transcript_info(variant, validator): + """Collect transcript information for the variant""" + + if variant.reftype == ':g.': + toskip = from_genomic(variant, validator) + else: + toskip = from_non_genomic(variant, validator) + + return toskip + + +def get_transcript_info(variant, validator): + """Collect transcript information from a non-genomic variant""" + + hgvs_vt = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) + try: + tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = 'Please inform UTA admin of the following error: ' + str(e) + reason = "VariantValidator cannot recover information for transcript " + str( + hgvs_vt.ac) + ' because it is not available in the Universal Transcript Archive' + variant.warnings += ': ' + str(reason) + logger.warning(str(reason) + ": " + str(error)) + return True + else: + # Get hgnc Gene name from command + hgnc = tx_id_info[6] + + # ACCESS THE GENE INFORMATION RECORDS ON THE UTA DATABASE + # Refseq accession + tx_for_gene = validator.tx_for_gene(hgnc, validator.hdp) + refseq_ac = validator.ng_extract(tx_for_gene) + + # Get accurate transcript descriptions from the relevant databases + # RefSeq databases + if validator.alt_aln_method != 'genebuild': + # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID + # accession number + hgvs_object = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) + accession = hgvs_object.ac + # Look for the accession in our database + # Connect to database and send request + entry = validator.db.in_entries(accession, 'transcript_info') + + # Analyse the returned data and take the necessary actions + # If the error key exists + if 'error' in entry: + # Open a hgvs exception log file in append mode + error = entry['description'] + variant.warnings += ': ' + str( + error) + ': A Database error occurred, please contact admin' + logger.warning(str(error) + ": A Database error occurred, please contact admin") + return True + + # If the accession key is found + elif 'accession' in entry: + # If the current entry is too old + if entry['expiry'] == 'true': + try: + entry = validator.db.data_add(accession=accession, validator=validator) + except hgvs.exceptions.HGVSError: + error = 'Transcript %s is not currently supported' % (accession) + variant.warnings += ': ' + error + logger.warning(error) + return True + except Exception: + error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' + variant.warnings += ': ' + error + logger.warning(error) + return True + variant.description = entry['description'] + else: + variant.description = entry['description'] + # If the none key is found add the description to the database + elif 'none' in entry: + try: + entry = validator.db.data_add(accession=accession, validator=validator) + except Exception as e: + logger.warning(str(e)) + error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' + variant.warnings += ': ' + error + logger.warning(error) + return True + variant.description = entry['description'] + + # If no correct keys are found + else: + # Open a hgvs exception log file in append mode + error = 'Unknown error type' + variant.warnings += ': ' + error + ': A Database error occurred, please contact admin' + logger.warning(error) + return True + + # Ensembl databases + else: + # accession number + hgvs_object = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) + accession = hgvs_object.ac + # Look for the accession in our database + # Connect to database and send request + entry = validator.db.in_entries(accession, 'transcript_info') + + # Analyse the returned data and take the necessary actions + # If the error key exists + if 'error' in entry: + # Open a hgvs exception log file in append mode + error = entry['description'] + variant.warnings += ': ' + str( + error) + ': A Database error occurred, please contact admin' + logger.warning(str(error)) + return True + + # If the accession key is found + elif 'accession' in entry: + # If the current entry is too old + if entry['expiry'] == 'true': + entry = validator.db.data_add(accession=accession, validator=validator) + variant.description = entry['description'] + else: + variant.description = entry['description'] + # If the none key is found add the description to the database + elif 'none' in entry: + try: + entry = validator.db.data_add(accession=accession, validator=validator) + except Exception as e: + logger.warning(str(e)) + error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' + variant.warnings += ': ' + error + logger.warning(error) + return True + variant.description = entry['description'] + + # If no correct keys are found + else: + # Open a hgvs exception log file in append mode + error = 'Unknown error type' + variant.warnings += ': ' + error + ': A Database error occurred, please contact admin' + logger.warning(error) + return True + return False + + diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index f5062857..06d11cf6 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -1,5 +1,6 @@ import re import hgvs +import copy from .vvLogging import logger from .variant import Variant from . import vvChromosomes @@ -628,4 +629,103 @@ def lrg_to_refseq(variant, validator): variant.set_quibble(str(variant.hgvs_formatted)) caution += lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation variant.warnings += ': ' + caution - logger.warning(caution) \ No newline at end of file + logger.warning(caution) + + +def mitochondrial(variant, validator): + """Will check if variant is mitochondrial and if so it will reformat the type to 'm' and save a value to the variant + hgvs_genomic attribute""" + + if variant.reftype == ':m.' or variant.hgvs_formatted.ac == 'NC_012920.1' or \ + variant.hgvs_formatted.ac == 'NC_001807.4': + + hgvs_mito = copy.deepcopy(variant.hgvs_formatted) + if hgvs_mito.type == 'g' and (hgvs_mito.ac == 'NC_012920.1' or hgvs_mito.ac == 'NC_001807.4'): + hgvs_mito.type = 'm' + try: + validator.vr.validate(hgvs_mito) + except hgvs.exceptions.HGVSError as e: + error = str(e) + variant.warnings += ': ' + error + logger.warning(error) + return True + except KeyError as e: + error = 'Currently unable to validate ' + hgvs_mito.ac + ' sequence variation' + variant.warnings += ': ' + error + logger.warning(error) + return True + else: + # Any transcripts? + rel_var = validator.relevant_transcripts(hgvs_mito, variant.evm, validator.alt_aln_method, + variant.reverse_normalizer) + variant.hgvs_genomic = hgvs_mito + if len(rel_var) == 0: + variant.genomic_g = fn.valstr(hgvs_mito) + variant.description = 'Homo sapiens mitochondrion, complete genome' + logger.info('Homo sapiens mitochondrion, complete genome') + return True + return False + + +def proteins(variant, validator): + """Handle protein sequences""" + if variant.reftype == ':p.': + error = None + # Try to validate the variant + try: + hgvs_object = validator.hp.parse_hgvs_variant(variant.hgvs_formatted) + except hgvs.exceptions.HGVSError as e: + error = str(e) + try: + validator.vr.validate(hgvs_object) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error: + variant.warnings += ': ' + error + logger.warning(error) + return True + else: + # Get accurate descriptions from the relevant databases + # RefSeq databases + if validator.alt_aln_method != 'genebuild': + # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID + # accession number + accession = hgvs_object.ac + # Look for the accession in our database + # Connect to database and send request + # record = validator.entrez_efetch(db="nuccore", id=accession, rettype="gb", retmode="text") + + try: + validator.vr.validate(hgvs_object) + except hgvs.exceptions.HGVSError as e: + error = str(e) + else: + error = str( + hgvs_object) + ' is HGVS compliant and contains a valid reference amino acid description' + reason = 'Protein level variant descriptions are not fully supported due to redundancy in the genetic code' + variant.warnings += ': ' + reason + ': ' + error + variant.protein = str(hgvs_object) + logger.warning(reason + ": " + error) + return True + return False + + +def rna(variant, validator): + """ + convert r, into c. + """ + if variant.reftype == ':r.': + hgvs_input = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) # Traps the hgvs variant of r. for further use + # Change to coding variant + variant.reftype = ':c.' + # Change input to reflect! + try: + hgvs_c = validator.va_func.hgvs_r_to_c(hgvs_input) + except hgvs.exceptions.HGVSDataNotAvailableError as e: + error = str(e) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + variant.hgvs_formatted = hgvs_c + + return False diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py new file mode 100644 index 00000000..71c00752 --- /dev/null +++ b/VariantValidator/modules/gapped_mapping.py @@ -0,0 +1,949 @@ +import hgvs +import re +import copy +from . import vvHGVS +from . import vvFunctions as fn +from .vvLogging import logger + + +def gapped_g_to_c(variant, validator, rel_var): + """ + Gap aware projection from g. to c. + """ + + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' + disparity_deletion_in = [] + + # Create a pseudo VCF so that normalization can be applied and a delins can be generated + hgvs_genomic_variant = variant.hgvs_genomic + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + + # VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, + variant.reverse_normalizer, validator.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # take a look at the input genomic variant for potential base salvage + stash_ac = vcf_dict['chr'] + stash_pos = int(vcf_dict['pos']) + stash_ref = vcf_dict['ref'] + stash_alt = vcf_dict['alt'] + stash_end = end + stash_input = str(variant.stashed) + # Re-Analyse genomic positions + if 'NG_' in str(variant.hgvs_formatted): + c = validator.hp.parse_hgvs_variant(rel_var[0]) + if hasattr(c.posedit.edit, 'ref') and c.posedit.edit.ref is not None: + c.posedit.edit.ref = c.posedit.edit.ref.upper() + if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: + c.posedit.edit.alt = c.posedit.edit.alt.upper() + stash_input = validator.myevm_t_to_g(c, variant.no_norm_evm, variant.primary_assembly, variant.hn) + if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', + str( + stash_input)): + try: + hgvs_stash = validator.hp.parse_hgvs_variant(stash_input) + except: + hgvs_stash = stash_input + if hasattr(hgvs_stash.posedit.edit, 'ref') and hgvs_stash.posedit.edit.ref is not None: + hgvs_stash.posedit.edit.ref = hgvs_stash.posedit.edit.ref.upper() + if hasattr(hgvs_stash.posedit.edit, 'alt') and hgvs_stash.posedit.edit.alt is not None: + hgvs_stash.posedit.edit.alt = hgvs_stash.posedit.edit.alt.upper() + + stash_ac = hgvs_stash.ac + # MAKE A NO NORM HGVS2VCF + stash_dict = vvHGVS.pos_lock_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.reverse_normalizer, validator.sf) + stash_ac = hgvs_stash.ac + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + + # Store a not real deletion insertion + stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_genomic_5pr.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + + # Set non-valid caution to false + non_valid_caution = 'false' + + # make an empty rel_var + nw_rel_var = [] + + # loop through rel_var and amend where required + for var in rel_var: + # Store the current hgvs:c. description + saved_hgvs_coding = validator.hp.parse_hgvs_variant(var) + + # Remove un-selected transcripts + if validator.select_transcripts != 'all': + tx_ac = saved_hgvs_coding.ac + # If it's in the selected tx dict, keep it + if tx_ac.split('.')[0] in list(validator.select_transcripts_dict.keys()): + pass + # If not get rid of it! + else: + continue + + # Get orientation of the gene wrt genome and a list of exons mapped to the genome + ori = validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + alt_aln_method=validator.alt_aln_method) + orientation = int(ori[0]['alt_strand']) + intronic_variant = 'false' + + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = variant.reverse_normalizer.normalize(variant.hgvs_genomic) + except: + query_genomic = variant.hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = variant.hn.normalize(variant.hgvs_genomic) + except: + query_genomic = variant.hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding + + try: + intron_test = variant.hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'hard_fail': + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', + str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + # If exonic, process + if intronic_variant != 'true': + # map form reverse normalized g. to c. + hgvs_from_5n_g = variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) + + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + disparity_deletion_in = ['false', 'false'] + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + pass + else: + pass + + try: + tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError: + tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_genomic_5pr, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + tx_hgvs_not_delins = saved_hgvs_coding + + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +ve base and adjust + if (re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) or re.search( + r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end))): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + hgvs_stash_t = validator.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) + if len(stash_hgvs_not_delins.posedit.edit.ref) > len( + hgvs_stash_t.posedit.edit.ref): + try: + variant.hn.normalize(hgvs_stash_t) + except: + fn.exceptPass() + else: + gap_length = len(stash_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_stash_t.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + try: + tx_hgvs_not_delins = validator.vm.c_to_n(hgvs_stash_t) + except: + tx_hgvs_not_delins = hgvs_stash_t + hgvs_not_delins = stash_hgvs_not_delins + elif hgvs_stash_t.posedit.pos.start.offset != 0 or hgvs_stash_t.posedit.pos.end.offset != 0: + disparity_deletion_in = ['transcript', 'Requires Analysis'] + try: + tx_hgvs_not_delins = validator.vm.c_to_n(hgvs_stash_t) + except: + tx_hgvs_not_delins = hgvs_stash_t + hgvs_not_delins = stash_hgvs_not_delins + hgvs_genomic_5pr = stash_hgvs_not_delins + else: + pass + + # Final sanity checks + try: + validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + hgvs_not_delins = saved_hgvs_coding + disparity_deletion_in = ['false', 'false'] + logger.warning(str(e)) + try: + variant.hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_not_delins = saved_hgvs_coding + disparity_deletion_in = ['false', 'false'] + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + logger.warning(error) + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly + + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = validator.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = validator.nr_vm.t_to_g(c1, variant.hgvs_genomic.ac) + g3 = validator.nr_vm.t_to_g(c1, variant.hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) + ng2 = variant.hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = validator.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + g2 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) + c2 = validator.vm.g_to_t(g2, c2.ac) + reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] + alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] + c3 = copy.deepcopy(c1) + c3.posedit.pos.end = c2.posedit.pos.end + c3.posedit.edit.ref = '' # reference + c3.posedit.edit.alt = alternate + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + # Set warning variables + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly + hgvs_refreshed_variant = tx_hgvs_not_delins + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + variant.hgvs_genomic.ac) + '\n' + gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' + else: + # Try the push + hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) + stash_ac = hgvs_stash.ac + # Make a hard left and hard right not delins g. + stash_dict_right = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.hn, validator.sf) + stash_pos_right = int(stash_dict_right['pos']) + stash_ref_right = stash_dict_right['ref'] + stash_alt_right = stash_dict_right['alt'] + stash_end_right = str(stash_pos_right + len(stash_ref_right) - 1) + stash_hgvs_not_delins_right = validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) + stash_dict_left = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, variant.primary_assembly, + variant.reverse_normalizer, validator.sf) + stash_pos_left = int(stash_dict_left['pos']) + stash_ref_left = stash_dict_left['ref'] + stash_alt_left = stash_dict_left['alt'] + stash_end_left = str(stash_pos_left + len(stash_ref_left) - 1) + stash_hgvs_not_delins_left = validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos_left) + '_' + stash_end_left + 'del' + stash_ref_left + 'ins' + stash_alt_left) + # Map in-situ to the transcript left and right + try: + tx_hard_right = validator.vm.g_to_t(stash_hgvs_not_delins_right, saved_hgvs_coding.ac) + except Exception as e: + tx_hard_right = saved_hgvs_coding + else: + normalize_stash_right = variant.hn.normalize(stash_hgvs_not_delins_right) + if str(normalize_stash_right.posedit) == str(stash_hgvs_not_delins.posedit): + tx_hard_right = saved_hgvs_coding + try: + tx_hard_left = validator.vm.g_to_t(stash_hgvs_not_delins_left, saved_hgvs_coding.ac) + except Exception as e: + tx_hard_left = saved_hgvs_coding + else: + normalize_stash_left = variant.hn.normalize(stash_hgvs_not_delins_left) + if str(normalize_stash_left.posedit) == str(stash_hgvs_not_delins.posedit): + tx_hard_left = saved_hgvs_coding + # The Logic - Currently limited to genome gaps + if len(stash_hgvs_not_delins_right.posedit.edit.ref) < len( + tx_hard_right.posedit.edit.ref): + tx_hard_right = variant.hn.normalize(tx_hard_right) + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly + hgvs_refreshed_variant = tx_hard_right + gapped_transcripts = gapped_transcripts + str(tx_hard_right.ac) + ' ' + elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len( + tx_hard_left.posedit.edit.ref): + tx_hard_left = variant.hn.normalize(tx_hard_left) + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly + hgvs_refreshed_variant = tx_hard_left + gapped_transcripts = gapped_transcripts + str(tx_hard_left.ac) + ' ' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = saved_hgvs_coding + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = variant.evm.n_to_c(hgvs_refreshed_variant) + else: + pass + try: + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + hgvs_refreshed_variant.posedit.edit.alt[-1]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 0:-1] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 0:-1] + hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[0] == \ + hgvs_refreshed_variant.posedit.edit.alt[0]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 1:] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 1:] + hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + pass + fn.exceptPass() + # Send to empty nw_rel_var + nw_rel_var.append(hgvs_refreshed_variant) + + # Otherwise these variants need to be set + else: + corrective_action_taken = '' + gapped_alignment_warning = '' + # Send to empty nw_rel_var + nw_rel_var.append(saved_hgvs_coding) + + data = { + 'gapped_alignment_warning': gapped_alignment_warning, + 'corrective_action_taken': corrective_action_taken, + 'auto_info': auto_info, + 'disparity_deletion_in': disparity_deletion_in, + 'gapped_transcripts': gapped_transcripts + } + return data, nw_rel_var diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py new file mode 100644 index 00000000..23069588 --- /dev/null +++ b/VariantValidator/modules/mappers.py @@ -0,0 +1,174 @@ +import hgvs +from .vvLogging import logger +from . import vvHGVS +from .variant import Variant +from . import vvChromosomes +from . import vvFunctions as fn +from . import gapped_mapping + + +def gene_to_transcripts(variant, validator): + g_query = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) + + # Genomic coordinates can be validated immediately + error = 'false' + try: + validator.vr.validate(g_query) + except hgvs.exceptions.HGVSError as e: + error = str(e) + except KeyError: + error = 'Reference sequence ' + variant.hgvs_genomic.ac + ' is either not supported or does not exist' + if error != 'false': + reason = 'Invalid variant description' + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + + # Set test to see if Norm alters the coords + g_test = variant.hn.normalize(g_query) + + # Perform test + if g_query.posedit.pos != g_test.posedit.pos: + # my_variant.warnings += ': ' + 'Input variant description normalized to ' + str(g_test) + variant.hgvs_genomic = g_test + else: + variant.hgvs_genomic = g_query + + # Collect rel_var + # rel_var is a keyworded list of relevant transcripts with associated coding variants + """ + Initial simple projection from the provided g. position all overlapping + transcripts + """ + rel_var = validator.relevant_transcripts(variant.hgvs_genomic, variant.evm, validator.alt_aln_method, variant.reverse_normalizer) + + # Double check rel_vars have not been missed when mapping from a RefSeqGene + if len(rel_var) != 0 and 'NG_' in variant.hgvs_genomic.ac: + for var in rel_var: + hgvs_coding_variant = validator.hp.parse_hgvs_variant(var) + try: + variant.hgvs_genomic = validator.myevm_t_to_g(hgvs_coding_variant, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + except hgvs.exceptions.HGVSError as e: + try_rel_var = [] + else: + try_rel_var = validator.relevant_transcripts(variant.hgvs_genomic, variant.evm, validator.alt_aln_method, + variant.reverse_normalizer) + if len(try_rel_var) > len(rel_var): + rel_var = try_rel_var + break + else: + continue + + # Tripple check this assumption by querying the gene position database + if len(rel_var) == 0: + vcf_dict = vvHGVS.hgvs2vcf(variant.hgvs_genomic, variant.primary_assembly, variant.reverse_normalizer, validator.sf) + not_di = str(variant.hgvs_genomic.ac) + ':g.' + str(vcf_dict['pos']) + '_' + str( + int(vcf_dict['pos']) + (len(vcf_dict['ref']) - 1)) + 'del' + vcf_dict['ref'] + 'ins' + \ + vcf_dict['alt'] + hgvs_not_di = validator.hp.parse_hgvs_variant(not_di) + rel_var = validator.relevant_transcripts(hgvs_not_di, variant.evm, validator.alt_aln_method, + variant.reverse_normalizer) + + # list return statements + """ + If mapping to transcripts has been unsuccessful, provide relevant details + """ + if len(rel_var) == 0: + + # Check for NG_ + if str(variant.hgvs_formatted).startswith('NG_'): + # parse + hgvs_refseqgene = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) + # Convert to chromosomal position + refseqgene_data = validator.rsg_to_chr(hgvs_refseqgene, variant.primary_assembly, variant.hn, validator.vr) + # There should only ever be one description returned + refseqgene_data = refseqgene_data[0] + + # Extract data + if refseqgene_data['valid'] == 'true': + input = refseqgene_data['hgvs_genomic'] + # re_submit + # Tag the line so that it is not written out + variant.warnings += ': ' + str(variant.hgvs_formatted) + ' automapped to genome position ' + str(input) + query = Variant(variant.original, quibble=input, warnings=variant.warnings, + primary_assembly=variant.primary_assembly, order=variant.order) + + coding = 'intergenic' + validator.batch_list.append(query) + else: + error = 'Mapping unavailable for RefSeqGene ' + str(variant.hgvs_formatted) + ' using alignment method = ' + validator.alt_aln_method + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + + # Chromosome build is not supported or intergenic??? + else: + sfm = vvChromosomes.supported_for_mapping(variant.hgvs_genomic.ac, variant.primary_assembly) + if sfm == 'true': + try: + validator.vr.validate(variant.hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + error = str(e) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + else: + # Map to RefSeqGene if available + refseqgene_data = validator.chr_to_rsg(variant.hgvs_genomic, variant.hn, validator.vr) + rsg_data = '' + # Example {'gene': 'NTHL1', 'hgvs_refseqgene': 'NG_008412.1:g.3455_3464delCAAACACACA', 'valid': 'true'} + for data in refseqgene_data: + if data['valid'] == 'true': + data['hgvs_refseqgene'] = validator.hp.parse_hgvs_variant(data['hgvs_refseqgene']) + data['hgvs_refseqgene'] = fn.valstr(data['hgvs_refseqgene']) + rsg_data = rsg_data + data['hgvs_refseqgene'] + ' (' + data['gene'] + '), ' + + error = 'No transcripts found that fully overlap the described variation in the genomic sequence' + # set output type flag + variant.output_type_flag = 'intergenic' + # set genomic and where available RefSeqGene outputs + variant.warnings += ': ' + str(error) + variant.genomic_g = fn.valstr(variant.hgvs_genomic) + variant.genomic_r = str(rsg_data.split('(')[0]) + logger.warning(str(error)) + return True + else: + error = 'Please ensure the requested chromosome version relates to a supported genome build. Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + + else: + # Tag the line so that it is not written out + variant.write = False + + data, nw_rel_var = gapped_mapping.gapped_g_to_c(variant, validator, rel_var) + + # Warn the user that the g. description is not valid + if data['gapped_alignment_warning'] != '': + if data['disparity_deletion_in'][0] == 'transcript': + corrective_action_taken = 'Automap has deleted ' + str( + data['disparity_deletion_in'][1]) + ' bp from chromosomal reference sequence ' + str( + variant.hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s)' + data['gapped_transcripts'] + if data['disparity_deletion_in'][0] == 'chromosome': + corrective_action_taken = 'Automap has added ' + str( + data['disparity_deletion_in'][1]) + ' bp to chromosomal reference sequence ' + str( + variant.hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s) ' + data['gapped_transcripts'] + + # Add additional data to the front of automap + if data['auto_info'] != '': + automap = data['auto_info'] + '\n' + 'false' + + rel_var = nw_rel_var + + # Set the values and append to batch_list + for c_description in rel_var: + query = Variant(variant.original, quibble=str(c_description), warnings=variant.warnings, + primary_assembly=variant.primary_assembly, order=variant.order) + validator.batch_list.append(query) + logger.warning("Continue reached when mapping transcript types to variants") + # Call next description + return True + return False + diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index a499733b..9062b519 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -15,9 +15,11 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse else: self.quibble = quibble self.hgvs_formatted = None + self.hgvs_genomic = None + self.stashed = None self.warnings = warnings - self.description = '' + self.description = '' # hgnc_gene_info variable self.coding = '' self.coding_g = '' self.genomic_r = '' @@ -26,6 +28,7 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.write = write self.primary_assembly = primary_assembly self.order = order + self.output_type_flag = 'warning' self.test_stash_tx_left = None self.test_stash_tx_right = None diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 5d6be33f..c8ebca9b 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -55,6 +55,8 @@ from . import variant from . import format_converters from . import use_checking +from . import collect_info +from . import mappers class Mixin(vvMixinConverters.Mixin): @@ -89,6 +91,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr self.selected_assembly = selected_assembly self.select_transcripts = select_transcripts + self.alt_aln_method = alt_aln_method try: # Validation @@ -139,7 +142,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr flag : intragenic flag : gene """ - set_output_type_flag = 'warning' + logger.debug("Batch list length " + str(len(self.batch_list))) for my_variant in self.batch_list: # Start timing @@ -287,6 +290,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr formatted_variant = my_variant.quibble input = my_variant.quibble stash_input = my_variant.quibble + my_variant.stashed = stash_input format_type = my_variant.reftype # Validator specific variables, note, not all will be necessary for batch, but keep to ensure that batch works @@ -487,1968 +491,40 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Primary validation of the input toskip = use_checking.structure_checks(my_variant, self) print(toskip, my_variant.hgvs_formatted, my_variant.quibble) + if toskip: + continue + logger.trace("Variant structure and contents searches passed", my_variant) - """ - An evolving set of variant structure and content searches which identify - and warn users about inappropriate use of HGVS - Primarily, this code filters out variants that cannot realistically be - auto corrected and will cause the downstream functions to return errors - """ - input_parses = self.hp.parse_hgvs_variant(input) - if input_parses.type == 'g': - if re.match('^NC_', input_parses.ac) or re.match('^NG_', input_parses.ac) or re.match('^NT_', - input_parses.ac) or re.match( - '^NW_', input_parses.ac): - pass - else: - error = 'Invalid reference sequence identifier (' + input_parses.ac + ')' - my_variant.warnings += ': ' + str(error) - logger.warning(error) - continue - try: - self.vr.validate(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(error) - continue - except Exception as e: - error = str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(error) - continue - # Additional test - try: - hn.normalize(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(error) - continue - else: - fn.exceptPass() - - elif input_parses.type == 'c': - if re.search(r'\*', str(input_parses)) or re.search(r'c.\-', str(input_parses)): - # Catch variation in UTRs - # These should be in the sequence so can be directly validated. Need to pass to n. - try: - self.vr.validate(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search(r'datums is ill-defined', error): - called_ref = input_parses.posedit.edit.ref - try: - to_n = evm.c_to_n(input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(error) - continue - actual_ref = to_n.posedit.edit.ref - if called_ref != actual_ref: - error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' - my_variant.warnings += ': ' + str(error) - logger.warning(error) - continue - else: - input_parses.posedit.edit.ref = '' - formatted_variant = str(input_parses) - else: - if re.search('bounds', error) or re.search('intronic variant', error): - try: - hn.normalize(input_parses) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - if re.search('bounds', str(error)): - try: - identity_info = self.hdp.get_tx_identity_info(input_parses.ac) - ref_start = identity_info[3] - ref_end = identity_info[4] - if re.match(r'-', str( - input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: - # upstream positions - boundary = int('-' + str(ref_start)) - remainder = int(str(input_parses.posedit.pos.start)) - boundary - input_parses.posedit.pos.start.base = boundary - input_parses.posedit.pos.start.offset = remainder - if re.match(r'-', str( - input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: - boundary = int('-' + str(ref_start)) - remainder = int(str(input_parses.posedit.pos.end)) - boundary - input_parses.posedit.pos.end.base = boundary - input_parses.posedit.pos.end.offset = remainder - if re.match(r'\*', str( - input_parses.posedit.pos.start)) and input_parses.posedit.pos.start.offset == 0: - # downstream positions - tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') - ts_seq = self.sf.fetch_seq(input_parses.ac) - boundary = len(ts_seq) - ref_end - input_parses.posedit.pos.start.base = boundary - offset = int(tot_end_pos) - int(boundary) - input_parses.posedit.pos.start.offset = offset - if re.match(r'\*', str( - input_parses.posedit.pos.end)) and input_parses.posedit.pos.end.offset == 0: - tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') - ts_seq = self.sf.fetch_seq(input_parses.ac) - boundary = len(ts_seq) - ref_end - input_parses.posedit.pos.end.base = boundary - offset = int(tot_end_pos) - int(boundary) - input_parses.posedit.pos.end.offset = offset - - # Create a lose vm instance - lose_vm = hgvs.variantmapper.VariantMapper(self.hdp, - replace_reference=True, - prevalidation_level=None - ) - - - report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, - primary_assembly, hn) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant: Instead use ' + fn.valstr( - report_gen) - except Exception as e: - fn.exceptPass() - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - else: - pass - - try: - input_parses = evm.c_to_n(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(str(e)) - continue - - if re.search(r'n.1-', str(input_parses)): - input_parses = evm.n_to_c(input_parses) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, - hn) - error = error + fn.valstr(genomic_position) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - - # Re-map input_parses back to c. variant - input_parses = evm.n_to_c(input_parses) - - # Intronic positions in UTRs - if re.search(r'\d\-\d', str(input_parses)) or re.search(r'\d\+\d', str(input_parses)): - # Can we go c-g-c - try: - to_genome = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, - hn) - to_tx = evm.g_to_t(to_genome, input_parses.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - error = str(e) - if re.search('bounds', error): - try: - identity_info = self.hdp.get_tx_identity_info(input_parses.ac) - ref_start = identity_info[3] - ref_end = identity_info[4] - if re.match(r'-', str(input_parses.posedit.pos.start)): - # upstream positions - boundary = int('-' + str(ref_start)) - remainder = int(str(input_parses.posedit.pos.start)) - boundary - input_parses.posedit.pos.start.base = boundary - input_parses.posedit.pos.start.offset = remainder - if re.match(r'-', str(input_parses.posedit.pos.end)): - boundary = int('-' + str(ref_start)) - remainder = int(str(input_parses.posedit.pos.end)) - boundary - input_parses.posedit.pos.end.base = boundary - input_parses.posedit.pos.end.offset = remainder - if re.match(r'\*', str(input_parses.posedit.pos.start)): - # downstream positions - tot_end_pos = str(input_parses.posedit.pos.start).replace('*', '') - ts_seq = self.sf.fetch_seq(input_parses.ac) - boundary = len(ts_seq) - ref_end - input_parses.posedit.pos.start.base = boundary - te1, te2 = tot_end_pos.split('+') - tot_end_pos = int(te1) + int(te2) - offset = int(tot_end_pos) - int(boundary) - input_parses.posedit.pos.start.offset = offset - if re.match(r'\*', str(input_parses.posedit.pos.end)): - tot_end_pos = str(input_parses.posedit.pos.end).replace('*', '') - ts_seq = self.sf.fetch_seq(input_parses.ac) - boundary = len(ts_seq) - ref_end - input_parses.posedit.pos.end.base = boundary - te1, te2 = tot_end_pos.split('+') - tot_end_pos = int(te1) + int(te2) - offset = int(tot_end_pos) - int(boundary) - input_parses.posedit.pos.end.offset = offset - - report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, - primary_assembly, hn) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( - report_gen) - except Exception as e: - fn.exceptPass() - else: - pass - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = str(e) - if 'Alignment is incomplete' in error: - e_list = error.split('~') - gens = [] - for el in e_list: - el_l = el.split('/') - if el_l[-1] == '': - continue - gens.append(el_l[-1]) - acs = '; '.join(gens) - error = 'Cannot map ' + fn.valstr( - input_parses) + ' to a genomic position. ' + input_parses.ac + ' can only be partially aligned to genomic reference sequences ' + acs - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - elif re.search(r'\d\-', str(input_parses)) or re.search(r'\d\+', str(input_parses)): - # Quick look at syntax validation - try: - self.vr.validate(input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if re.search('bounds', error): - try: - report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, - hn) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - else: - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( - report_gen) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('insertion length must be 1', error): - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - # Create a specific minimal evm with no normalizer and no replace_reference - # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence - try: - output = self.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, self.vm, hn, - self.hp, self.sf, no_norm_evm) - except hgvs.exceptions.HGVSDataNotAvailableError as e: - tx_ac = input_parses.ac - try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - except ValueError as e: - error = str(e) - if re.search('> end', error): - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - error = str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - try: - evm.g_to_t(output, input_parses.ac) - except hgvs.exceptions.HGVSError as e: - error = str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - try: - self.vr.validate(output) - except hgvs.exceptions.HGVSError as e: - error = str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # All other variation - try: - self.vr.validate(input_parses) - except hgvs.exceptions.HGVSUnsupportedOperationError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - """ - #Phil: Honestly not sure what the purpose of any of these is, we act the same regardless of what - #kind of error it is. - if re.search('Length implied by coordinates', error): - # Applies to del and inv - # NOTE, there has been no normalization at all so this error is valid here - my_variant.warnings += ': ' + str(error) - # Will apply to > del and inv - if re.search('does not agree with reference sequence', error): - my_variant.warnings += ': ' + str(error) - # ensures x_y for insertions - if re.search('insertion length must be 1', error): - my_variant.warnings += ': ' + str(error) - # Boundary issue - if re.search('Variant coordinate is out of the bound of CDS region', error): - my_variant.warnings += ': ' + str(error) - """ - # This catches errors in introns - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = e - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('bounds', error): - error = error + ' (' + input_parses.ac + ')' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - fn.exceptPass() + # Mitochondrial variants + toskip = format_converters.mitochondrial(my_variant, self) + if toskip: + continue + toskip = format_converters.proteins(my_variant, self) + if toskip: + continue - elif input_parses.type == 'n': - if re.search(r'\+', str(input_parses)) or re.search(r'\-', str(input_parses)): - # Catch variation in UTRs - # These should be in the sequence so can be directly validated. Need to pass to n. - try: - self.vr.validate(input_parses) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('intronic variant', error): - pass - elif re.search('datums is ill-defined', error): - called_ref = input_parses.posedit.edit.ref - to_n = evm.c_to_n(input_parses) - actual_ref = to_n.posedit.edit.ref - if called_ref != actual_ref: - error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - input_parses.posedit.edit.ref = '' - formatted_variant = str(input_parses) + trapped_input = str(my_variant.hgvs_formatted) + toskip = format_converters.rna(my_variant, self) + if toskip: + continue - elif re.search('base must be >=1 for datum = SEQ_START or CDS_END', error): - error = 'The given coordinate is outside the bounds of the reference sequence.' + # COLLECT gene symbol, name and ACCESSION INFORMATION + # Gene symbol + if my_variant.reftype != ':g.': + toskip = collect_info.get_transcript_info(my_variant, self) + print(toskip, my_variant.hgvs_formatted, my_variant.hgvs_genomic) + if toskip: + continue - try: - if re.match('-', str(input_parses.posedit.pos.start)): - # upstream positions - boundary = 1 - remainder = int(str(input_parses.posedit.pos.start)) - boundary - remainder = remainder + 1 - input_parses.posedit.pos.start.base = boundary - input_parses.posedit.pos.start.offset = remainder - if re.match('-', str(input_parses.posedit.pos.end)): - boundary = 1 - remainder = int(str(input_parses.posedit.pos.end)) - boundary - remainder = remainder + 1 - input_parses.posedit.pos.end.base = boundary - input_parses.posedit.pos.end.offset = remainder - report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, - hn) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( - report_gen) - except Exception as e: - fn.exceptPass() - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue + # Now start mapping from genome to transcripts - if re.search(r'n.1-', str(input_parses)): - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, - hn) - error = error + fn.valstr(genomic_position) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) + if my_variant.reftype == ':g.': + toskip = mappers.gene_to_transcripts(my_variant, self) + print(toskip, my_variant.hgvs_formatted, my_variant.hgvs_genomic) + if toskip: continue - else: - pass - - if re.search(r'\d\-', str(input_parses)) or re.search(r'\d\+', str(input_parses)): - # Quick look at syntax validation - try: - self.vr.validate(input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if re.search('bounds', error): - try: - report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, - hn) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - else: - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( - report_gen) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('insertion length must be 1', error): - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - elif re.search('Cannot validate sequence of an intronic variant', error): - try: - test_g = self.myevm_t_to_g(input_parses, no_norm_evm, primary_assembly, - hn) - back_to_n = evm.g_to_t(test_g, input_parses.ac) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('bounds', error): - report_gen = self.myevm_t_to_g(input_parses, no_norm_evm, - primary_assembly, hn) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( - report_gen) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - fn.exceptPass() - - # Create a specific minimal evm with no normalizer and no replace_reference - # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence - try: - output = self.noreplace_myevm_t_to_g(input_parses, evm, self.hdp, primary_assembly, self.vm, hn, - self.hp, self.sf, no_norm_evm) - except hgvs.exceptions.HGVSDataNotAvailableError as e: - tx_ac = input_parses.ac - try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - except ValueError as e: - error = str(e) - if re.search('> end', error): - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - try: - self.vr.validate(output) - except hgvs.exceptions.HGVSError as e: - error = str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # All other variation - try: - self.vr.validate(input_parses) - except hgvs.exceptions.HGVSUnsupportedOperationError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - """ - if re.search('Length implied by coordinates', error): - # Applies to del and inv - # NOTE, there has been no normalization at all so this error is valid here - my_variant.warnings += ': ' + str(error) - continue - # Will apply to > del and inv - if re.search('does not agree with reference sequence', error): - my_variant.warnings += ': ' + str(error) - continue - # ensures x_y for insertions - if re.search('insertion length must be 1', error): - my_variant.warnings += ': ' + str(error) - continue - # Boundary issue - if re.search('Variant coordinate is out of the bound of CDS region', error): - my_variant.warnings += ': ' + str(error) - continue - """ - # This catches errors in introns - if re.search('base start position must be <= end position', error): - correction = copy.deepcopy(input_parses) - st = input_parses.posedit.pos.start - ed = input_parses.posedit.pos.end - correction.posedit.pos.start = ed - correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str( - input_parses.posedit.pos.start) + ' > interval end position ' + str( - input_parses.posedit.pos.end) - logger.warning(str(error)) - my_variant.warnings += ': ' + str(error) - continue - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = e - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('bounds', error): - error = error + ' (' + input_parses.ac + ')' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - logger.trace("Variant structure and contents searches passed", my_variant) - # Mitochondrial variants - """ - Reformat m. into the new HGVS standard which is now m again! - """ - if format_type == ':m.' or re.match('NC_012920.1', str(input_parses.ac)) or re.match('NC_001807.4', - str(input_parses.ac)): - hgvs_mito = copy.deepcopy(input_parses) - if (re.match('NC_012920.1', str(hgvs_mito.ac)) and hgvs_mito.type == 'g') or ( - re.match('NC_001807.4', str(hgvs_mito.ac)) and hgvs_mito.type == 'g'): - hgvs_mito.type = 'm' - caution = '' - try: - self.vr.validate(hgvs_mito) - except hgvs.exceptions.HGVSError as e: - error = caution + ': ' + str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - except KeyError as e: - error = caution + ': Currently unable to validate ' + hgvs_mito.ac + ' sequence variation' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Any transcripts? - rel_var = self.relevant_transcripts(hgvs_mito, evm, alt_aln_method, reverse_normalizer) - hgvs_genomic = copy.deepcopy(hgvs_mito) - if len(rel_var) == 0: - my_variant.genomic_g = fn.valstr(hgvs_mito) - my_variant.description = 'Homo sapiens mitochondrion, complete genome' - logger.info('Homo sapiens mitochondrion, complete genome') - continue - # Currently we are not expecting this path to be activated because not m. transcripts seem to be NM_ - # This route may throw up errors in the future - else: - pass - - # handle :p. - if format_type == ':p.': - error = 'false' - # Try to validate the variant - try: - hgvs_object = self.hp.parse_hgvs_variant(formatted_variant) - except hgvs.exceptions.HGVSError as e: - error = str(e) - try: - self.vr.validate(hgvs_object) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Get accurate descriptions from the relevant databases - # RefSeq databases - if alt_aln_method != 'genebuild': - # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID - # accession number - hgvs_object = self.hp.parse_hgvs_variant(formatted_variant) - accession = hgvs_object.ac - # Look for the accession in our database - # Connect to database and send request - record = self.entrez_efetch(db="nuccore", id=accession, rettype="gb", retmode="text") - try: - description = record.description - except: - description = 'Unable to recover the description of ' + accession + ' from Entrez' - try: - self.vr.validate(hgvs_object) - except hgvs.exceptions.HGVSError as e: - error = str(e) - else: - error = str( - hgvs_object) + ' is HGVS compliant and contains a valid reference amino acid description' - reason = 'Protein level variant descriptions are not fully supported due to redundancy in the genetic code' - my_variant.warnings += ': ' + str(reason) + ': ' + str(error) - my_variant.protein = str(hgvs_object) - logger.warning(str(reason) + ": " + str(error)) - continue - - # handle :r. - """ - convert r, into c. - """ - trapped_input = input - if format_type == ':r.': - hgvs_input = self.hp.parse_hgvs_variant(input) # Traps the hgvs variant of r. for further use - # Change to coding variant - format_type = ':c.' - # Change input to reflect! - try: - hgvs_c = self.va_func.hgvs_r_to_c(hgvs_input) - except hgvs.exceptions.HGVSDataNotAvailableError as e: - error = str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - input = str(hgvs_c) - formatted_variant = str(hgvs_c) - - # COLLECT gene symbol, name and ACCESSION INFORMATION - # Gene symbol - logger.trace("Handled mitochondrial variants", my_variant) - """ - Identifies the transcript reference sequence name and HGNC gene symbol - """ - if (format_type != ':g.'): - error = 'false' - hgvs_vt = self.hp.parse_hgvs_variant(formatted_variant) - try: - tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - logger.warning(error) - if error != 'false': - error = 'Please inform UTA admin of the following error: ' + str(error) - issue_link = "https://bitbucket.org/biocommons/uta/issues?status=new&status=open" - reason = "VariantValidator cannot recover information for transcript " + str( - hgvs_vt.ac) + ' beacuse it is not available in the Universal Transcript Archive' - my_variant.warnings += ': ' + str(reason) - logger.warning(str(reason) + ": " + str(error)) - continue - else: - # Get hgnc Gene name from command - hgnc = tx_id_info[6] - issue_link = 'false' - - # ACCESS THE GENE INFORMATION RECORDS ON THE UTA DATABASE - # Refseq accession - tx_for_gene = self.tx_for_gene(hgnc, self.hdp) - refseq_ac = self.ng_extract(tx_for_gene) - - # Additional gene info - gene_info = self.hdp.get_gene_info(hgnc) - # Chromosomal location - try: - maploc = gene_info[1] - except: - maploc = '' - chr_loc = ("Chromosome location: " + maploc) - - # Get accurate transcript descriptions from the relevant databases - # RefSeq databases - if alt_aln_method != 'genebuild': - # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID - # accession number - hgvs_object = self.hp.parse_hgvs_variant(formatted_variant) - accession = hgvs_object.ac - # Look for the accession in our database - # Connect to database and send request - entry = self.db.in_entries(accession, 'transcript_info') - - # Analyse the returned data and take the necessary actions - # If the error key exists - if 'error' in entry: - # Open a hgvs exception log file in append mode - error = entry['description'] - my_variant.warnings += ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error) + ": A Database error occurred, please contact admin") - continue - - # If the accession key is found - elif 'accession' in entry: - description = entry['description'] - # If the current entry is too old - if entry['expiry'] == 'true': - dbaction = 'update' - try: - entry = self.db.data_add(accession=accession, validator=self) - except hgvs.exceptions.HGVSError as e: - error = 'Transcript %s is not currently supported' % (accession) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - except Exception as e: - error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - hgnc_gene_info = entry['description'] - else: - hgnc_gene_info = entry['description'] - # If the none key is found add the description to the database - elif 'none' in entry: - dbaction = 'insert' - try: - entry = self.db.data_add(accession=accession, validator=self) - except Exception as e: - logger.warning(str(e)) - error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - hgnc_gene_info = entry['description'] - - # If no correct keys are found - else: - # Open a hgvs exception log file in append mode - error = 'Unknown error type' - my_variant.warnings += ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error)) - continue - - # Ensembl databases - else: - # accession number - hgvs_object = self.hp.parse_hgvs_variant(formatted_variant) - accession = hgvs_object.ac - # Look for the accession in our database - # Connect to database and send request - entry = self.db.in_entries(accession, 'transcript_info') - - # Analyse the returned data and take the necessary actions - # If the error key exists - if 'error' in entry: - # Open a hgvs exception log file in append mode - error = entry['description'] - my_variant.warnings += ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error)) - continue - - # If the accession key is found - elif 'accession' in entry: - description = entry['description'] - # If the current entry is too old - if entry['expiry'] == 'true': - dbaction = 'update' - entry = self.db.data_add(accession=accession, validator=self) - hgnc_gene_info = entry['description'] - else: - hgnc_gene_info = entry['description'] - # If the none key is found add the description to the database - elif 'none' in entry: - dbaction = 'insert' - try: - entry = self.db.data_add(accession=accession, validator=self) - except Exception as e: - logger.warning(str(e)) - error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - hgnc_gene_info = entry['description'] - - # If no correct keys are found - else: - # Open a hgvs exception log file in append mode - error = 'Unknown error type' - my_variant.warnings += ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error)) - continue - - # Genomic type variants will need to be mapped to transcripts - """ - The following section is used to project genomic variants accurately onto - all relevant transcripts - """ - - if (format_type == ':g.'): - g_query = self.hp.parse_hgvs_variant(formatted_variant) - - # Genomic coordinates can be validated immediately - error = 'false' - try: - self.vr.validate(g_query) - except hgvs.exceptions.HGVSError as e: - error = str(e) - except KeyError: - error = 'Reference sequence ' + hgvs_genomic.ac + ' is either not supported or does not exist' - if error != 'false': - reason = 'Invalid variant description' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - - # Set test to see if Norm alters the coords - g_test = hn.normalize(g_query) - - # Perform test - if g_query.posedit.pos != g_test.posedit.pos: - # my_variant.warnings += ': ' + 'Input variant description normalized to ' + str(g_test) - hgvs_genomic = g_test - else: - hgvs_genomic = g_query - - # Collect rel_var - # rel_var is a keyworded list of relevant transcripts with associated coding variants - """ - Initial simple projection from the provided g. position all overlapping - transcripts - """ - rel_var = self.relevant_transcripts(hgvs_genomic, evm, alt_aln_method, reverse_normalizer) - - # Double check rel_vars have not been missed when mapping from a RefSeqGene - if len(rel_var) != 0 and re.match('NG_', str(hgvs_genomic.ac)): - for var in rel_var: - hgvs_coding_variant = self.hp.parse_hgvs_variant(var) - try: - hgvs_genomic = self.myevm_t_to_g(hgvs_coding_variant, no_norm_evm, - primary_assembly, hn) - except hgvs.exceptions.HGVSError as e: - try_rel_var = [] - else: - try_rel_var = self.relevant_transcripts(hgvs_genomic, evm, alt_aln_method, - reverse_normalizer) - if len(try_rel_var) > len(rel_var): - rel_var = try_rel_var - break - else: - continue - - # Tripple check this assumption by querying the gene position database - if len(rel_var) == 0: - vcf_dict = vvHGVS.hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, self.sf) - not_di = str(hgvs_genomic.ac) + ':g.' + str(vcf_dict['pos']) + '_' + str( - int(vcf_dict['pos']) + (len(vcf_dict['ref']) - 1)) + 'del' + vcf_dict['ref'] + 'ins' + \ - vcf_dict['alt'] - hgvs_not_di = self.hp.parse_hgvs_variant(not_di) - rel_var = self.relevant_transcripts(hgvs_not_di, evm, alt_aln_method, - reverse_normalizer) - - # list return statements - """ - If mapping to transcripts has been unsuccessful, provide relevant details - """ - if len(rel_var) == 0: - - # Check for NG_ - rsg = re.compile(r'^NG_') - if rsg.search(formatted_variant): - # parse - hgvs_refseqgene = self.hp.parse_hgvs_variant(formatted_variant) - # Convert to chromosomal position - refseqgene_data = self.rsg_to_chr(hgvs_refseqgene, primary_assembly, hn, self.vr) - # There should only ever be one description returned - refseqgene_data = refseqgene_data[0] - - # Extract data - if refseqgene_data['valid'] == 'true': - input = refseqgene_data['hgvs_genomic'] - # re_submit - # Tag the line so that it is not written out - my_variant.warnings += ': ' + formatted_variant + ' automapped to genome position ' + str( - input) - query = variant.Variant(my_variant.original, quibble=input, warnings=my_variant.warnings, primary_assembly=my_variant.primary_assembly, order=ordering) - - coding = 'intergenic' - self.batch_list.append(query) - else: - error = 'Mapping unavailable for RefSeqGene ' + formatted_variant + ' using alignment method = ' + alt_aln_method - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - # Chromosome build is not supported or intergenic??? - else: - sfm = vvChromosomes.supported_for_mapping(hgvs_genomic.ac, primary_assembly) - if sfm == 'true': - try: - self.vr.validate(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - error = str(e) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Map to RefSeqGene if available - refseqgene_data = self.chr_to_rsg(hgvs_genomic, hn, self.vr) - rsg_data = '' - # Example {'gene': 'NTHL1', 'hgvs_refseqgene': 'NG_008412.1:g.3455_3464delCAAACACACA', 'valid': 'true'} - for data in refseqgene_data: - if data['valid'] == 'true': - data['hgvs_refseqgene'] = self.hp.parse_hgvs_variant(data['hgvs_refseqgene']) - data['hgvs_refseqgene'] = fn.valstr(data['hgvs_refseqgene']) - rsg_data = rsg_data + data['hgvs_refseqgene'] + ' (' + data['gene'] + '), ' - - error = 'No transcripts found that fully overlap the described variation in the genomic sequence' - # set output type flag - set_output_type_flag = 'intergenic' - # set genomic and where available RefSeqGene outputs - my_variant.warnings += ': ' + str(error) - my_variant.genomic_g = fn.valstr(hgvs_genomic) - my_variant.genomic_r = str(rsg_data.split('(')[0]) - logger.warning(str(error)) - continue - else: - error = 'Please ensure the requested chromosome version relates to a supported genome build. Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Tag the line so that it is not written out - my_variant.write = False - - """ - Gap aware projection from g. to c. - """ - - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' - - # Create a pseudo VCF so that normalization can be applied and a delins can be generated - hgvs_genomic_variant = hgvs_genomic - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, self.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # take a look at the input genomic variant for potential base salvage - stash_ac = vcf_dict['chr'] - stash_pos = int(vcf_dict['pos']) - stash_ref = vcf_dict['ref'] - stash_alt = vcf_dict['alt'] - stash_end = end - # Re-Analyse genomic positions - if re.match('NG_', str(stash_input)): - c = self.hp.parse_hgvs_variant(rel_var[0]) - if hasattr(c.posedit.edit, 'ref') and c.posedit.edit.ref is not None: - c.posedit.edit.ref = c.posedit.edit.ref.upper() - if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: - c.posedit.edit.alt = c.posedit.edit.alt.upper() - stash_input = self.myevm_t_to_g(c, no_norm_evm, primary_assembly, hn) - if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', - str( - stash_input)): - try: - hgvs_stash = self.hp.parse_hgvs_variant(stash_input) - except: - hgvs_stash = stash_input - if hasattr(hgvs_stash.posedit.edit, 'ref') and hgvs_stash.posedit.edit.ref is not None: - hgvs_stash.posedit.edit.ref = hgvs_stash.posedit.edit.ref.upper() - if hasattr(hgvs_stash.posedit.edit, 'alt') and hgvs_stash.posedit.edit.alt is not None: - hgvs_stash.posedit.edit.alt = hgvs_stash.posedit.edit.alt.upper() - - stash_ac = hgvs_stash.ac - # MAKE A NO NORM HGVS2VCF - stash_dict = vvHGVS.pos_lock_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, self.sf) - stash_ac = hgvs_stash.ac - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - - # Store a not real deletion insertion - stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - stash_hgvs_not_delins = self.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_genomic_5pr.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - - # Set non-valid caution to false - non_valid_caution = 'false' - - # make an empty rel_var - nw_rel_var = [] - - # loop through rel_var and amend where required - for var in rel_var: - # Store the current hgvs:c. description - saved_hgvs_coding = self.hp.parse_hgvs_variant(var) - - # Remove un-selected transcripts - if select_transcripts != 'all': - tx_ac = saved_hgvs_coding.ac - # If it's in the selected tx dict, keep it - if tx_ac.split('.')[0] in list(select_transcripts_dict.keys()): - pass - # If not get rid of it! - else: - continue - - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = self.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, - alt_aln_method=alt_aln_method) - orientation = int(ori[0]['alt_strand']) - intronic_variant = 'false' - - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding - - try: - intron_test = hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'hard_fail': - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', - str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - # If exonic, process - if intronic_variant != 'true': - # map form reverse normalized g. to c. - hgvs_from_5n_g = no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) - - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - pass - else: - pass - - try: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_genomic_5pr, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = saved_hgvs_coding - - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +ve base and adjust - if (re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end))): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - hgvs_stash_t = self.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) - if len(stash_hgvs_not_delins.posedit.edit.ref) > len( - hgvs_stash_t.posedit.edit.ref): - try: - hn.normalize(hgvs_stash_t) - except: - fn.exceptPass() - else: - gap_length = len(stash_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_stash_t.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - try: - tx_hgvs_not_delins = self.vm.c_to_n(hgvs_stash_t) - except: - tx_hgvs_not_delins = hgvs_stash_t - hgvs_not_delins = stash_hgvs_not_delins - elif hgvs_stash_t.posedit.pos.start.offset != 0 or hgvs_stash_t.posedit.pos.end.offset != 0: - disparity_deletion_in = ['transcript', 'Requires Analysis'] - try: - tx_hgvs_not_delins = self.vm.c_to_n(hgvs_stash_t) - except: - tx_hgvs_not_delins = hgvs_stash_t - hgvs_not_delins = stash_hgvs_not_delins - hgvs_genomic_5pr = stash_hgvs_not_delins - else: - pass - - # Final sanity checks - try: - self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - hgvs_not_delins = saved_hgvs_coding - disparity_deletion_in = ['false', 'false'] - logger.warning(str(e)) - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_not_delins = saved_hgvs_coding - disparity_deletion_in = ['false', 'false'] - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - logger.warning(error) - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = self.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = self.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = self.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - c2 = self.vm.g_to_t(g2, c2.ac) - reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] - alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] - c3 = copy.deepcopy(c1) - c3.posedit.pos.end = c2.posedit.pos.end - c3.posedit.edit.ref = '' # reference - c3.posedit.edit.alt = alternate - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' - else: - # Try the push - hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) - stash_ac = hgvs_stash.ac - # Make a hard left and hard right not delins g. - stash_dict_right = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) - stash_pos_right = int(stash_dict_right['pos']) - stash_ref_right = stash_dict_right['ref'] - stash_alt_right = stash_dict_right['alt'] - stash_end_right = str(stash_pos_right + len(stash_ref_right) - 1) - stash_hgvs_not_delins_right = self.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) - stash_dict_left = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, - reverse_normalizer, self.sf) - stash_pos_left = int(stash_dict_left['pos']) - stash_ref_left = stash_dict_left['ref'] - stash_alt_left = stash_dict_left['alt'] - stash_end_left = str(stash_pos_left + len(stash_ref_left) - 1) - stash_hgvs_not_delins_left = self.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos_left) + '_' + stash_end_left + 'del' + stash_ref_left + 'ins' + stash_alt_left) - # Map in-situ to the transcript left and right - try: - tx_hard_right = self.vm.g_to_t(stash_hgvs_not_delins_right, saved_hgvs_coding.ac) - except Exception as e: - tx_hard_right = saved_hgvs_coding - else: - normalize_stash_right = hn.normalize(stash_hgvs_not_delins_right) - if str(normalize_stash_right.posedit) == str(stash_hgvs_not_delins.posedit): - tx_hard_right = saved_hgvs_coding - try: - tx_hard_left = self.vm.g_to_t(stash_hgvs_not_delins_left, saved_hgvs_coding.ac) - except Exception as e: - tx_hard_left = saved_hgvs_coding - else: - normalize_stash_left = hn.normalize(stash_hgvs_not_delins_left) - if str(normalize_stash_left.posedit) == str(stash_hgvs_not_delins.posedit): - tx_hard_left = saved_hgvs_coding - # The Logic - Currently limited to genome gaps - if len(stash_hgvs_not_delins_right.posedit.edit.ref) < len( - tx_hard_right.posedit.edit.ref): - tx_hard_right = hn.normalize(tx_hard_right) - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hard_right - gapped_transcripts = gapped_transcripts + str(tx_hard_right.ac) + ' ' - elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len( - tx_hard_left.posedit.edit.ref): - tx_hard_left = hn.normalize(tx_hard_left) - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hard_left - gapped_transcripts = gapped_transcripts + str(tx_hard_left.ac) + ' ' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = saved_hgvs_coding - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = evm.n_to_c(hgvs_refreshed_variant) - else: - pass - try: - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - pass - fn.exceptPass() - # Send to empty nw_rel_var - nw_rel_var.append(hgvs_refreshed_variant) - - # Otherwise these variants need to be set - else: - corrective_action_taken = '' - gapped_alignment_warning = '' - # Send to empty nw_rel_var - nw_rel_var.append(saved_hgvs_coding) - - # Warn the user that the g. description is not valid - if gapped_alignment_warning != '': - if disparity_deletion_in[0] == 'transcript': - corrective_action_taken = 'Automap has deleted ' + str( - disparity_deletion_in[1]) + ' bp from chromosomal reference sequence ' + str( - hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s)' + gapped_transcripts - if disparity_deletion_in[0] == 'chromosome': - corrective_action_taken = 'Automap has added ' + str( - disparity_deletion_in[1]) + ' bp to chromosomal reference sequence ' + str( - hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s) ' + gapped_transcripts - - # Add additional data to the front of automap - if auto_info != '': - automap = auto_info + '\n' + automap - - rel_var = copy.deepcopy(nw_rel_var) - - # Set the values and append to batch_list - for c_description in rel_var: - query = variant.Variant(my_variant.original, quibble=str(c_description), warnings=my_variant.warnings, primary_assembly=my_variant.primary_assembly, order=ordering) - self.batch_list.append(query) - logger.warning("Continue reached when mapping transcript types to variants") - # Call next description - continue # TYPE = :c. if format_type == ':c.' or format_type == ':n.': @@ -5650,7 +3726,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr updated_transcript_variant) # Set the data - set_output_type_flag = 'gene' + my_variant.output_type_flag = 'gene' my_variant.description = hgnc_gene_info my_variant.coding = str(hgvs_coding) my_variant.genomic_r = str(hgvs_refseq) @@ -5666,7 +3742,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except KeyboardInterrupt: raise except: - set_output_type_flag = 'error' + my_variant.output_type_flag = 'error' error = 'Validation error' my_variant.warnings = str(error) exc_type, exc_value, last_traceback = sys.exc_info() @@ -7409,7 +5485,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # For gene outputs, i.e. those that hit transcripts # dotter = '' - if set_output_type_flag == 'gene': + if my_variant.output_type_flag == 'gene': validation_output['flag'] = 'gene_variant' validation_error_counter = 0 validation_obsolete_counter = 0 @@ -7439,7 +5515,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Should only ever be 1 output as an error or a warning of the following types # Gene symbol as reference sequence # Gene as transcript reference sequence - if set_output_type_flag == 'warning': + if my_variant.output_type_flag == 'warning': validation_output['flag'] = 'warning' validation_error_counter = 0 validation_warning_counter = 0 @@ -7456,7 +5532,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Intergenic variants validation_intergenic_counter = 0 - if set_output_type_flag == 'intergenic': + if my_variant.output_type_flag == 'intergenic': validation_output['flag'] = 'intergenic' for valid_v in batch_out: validation_intergenic_counter = validation_intergenic_counter + 1 From e9d16b92d8ff23261c44abd4ccea72322ee7e370 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 10 Apr 2019 12:28:33 +0100 Subject: [PATCH 057/223] Started moving c_to_g mapper over to the mappers file --- VariantValidator/modules/mappers.py | 3235 +++++++++++++++++++++++ VariantValidator/modules/variant.py | 1 + VariantValidator/modules/vvMixinCore.py | 12 + 3 files changed, 3248 insertions(+) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 23069588..26507dbb 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -1,4 +1,8 @@ import hgvs +import re +import copy +import time +import sys from .vvLogging import logger from . import vvHGVS from .variant import Variant @@ -172,3 +176,3234 @@ def gene_to_transcripts(variant, validator): return True return False + +def transcripts_to_gene(variant, validator): + """This seems to use the quibble and not the HGVS formatted variant format.""" + + # Flag for validation + valid = 'false' + boundary = 'false' + warning = '' + caution = '' + # Collect information for genomic level validation + obj = validator.hp.parse_hgvs_variant(str(variant.quibble)) + + tx_ac = obj.ac + + input = str(variant.quibble) + formatted_variant = str(variant.quibble) + + # Do we keep it? + if validator.select_transcripts != 'all': + if tx_ac in list(validator.select_transcripts_dict_plus_version.keys()): + pass + # If not get rid of it! + else: + # By marking it as Do Not Write and continuing through the validation loop + variant.write = False + return True + else: + pass + + print(variant.hgvs_formatted) + print(variant.quibble) + # Set a cross_variant object + cross_variant = 'false' + # Se rec_var to '' so it can be updated later + rec_var = '' + + # First task is to get the genomic equivalent, and print useful error messages if it can't be found. + try: + to_g = validator.myevm_t_to_g(obj, variant.no_norm_evm, variant.primary_assembly, variant.hn) + print('Genomic:', to_g) + genomic_ac = to_g.ac + except hgvs.exceptions.HGVSDataNotAvailableError as e: + if ('~' in str(e) and 'Alignment is incomplete' in str(e)) or "No relevant genomic mapping options" in str(e): + # Unable to map the input variant onto a genomic position + if '~' in str(e) and 'Alignment is incomplete' in str(e): + error_list = str(e).split('~')[:-1] + combos = [ + 'Full alignment data between the specified transcript reference sequence and all GRCh37 and GRCh38 ' + 'genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) ' + 'are not available: Consequently the input variant description cannot be fully validated and is ' + 'not supported: Use the Gene to Transcripts function to determine whether an updated transcript ' + 'reference sequence is available'] + # Partial alignment data is available for the following genomic reference sequences: '] + error = '; '.join(combos) + error = error.replace(': ;', ': ') + else: + error = str(e) + error = error + ': Consequently the input variant description cannot be fully validated and is not ' \ + 'supported: Use the Gene to Transcripts function to determine whether an updated ' \ + 'transcript reference sequence is available' + variant.warnings += ': ' + error + logger.warning(error) + return True + try: + gene_symbol = validator.db.get_gene_symbol_from_transcriptID(tx_ac) + except: + gene_symbol = None + if gene_symbol is None: + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ + 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ + 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' + else: + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ + 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + \ + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + + variant.warnings += ': ' + error + logger.warning(error) + return True + except TypeError: + try: + gene_symbol = validator.db.get_gene_symbol_from_transcriptID(tx_ac) + except: + gene_symbol = 'none' + if gene_symbol == 'none': + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ + 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ + 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' + else: + error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ + 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + \ + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + variant.warnings += ': ' + error + logger.warning(error) + return True + + # Get orientation of the gene wrt genome and a list of exons mapped to the genome + ori = validator.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=validator.alt_aln_method) + #print('exons:', ori) + orientation = int(ori[0]['alt_strand']) + intronic_variant = 'false' + + # Collect variant sequence information via normalisation (normalizer) or if intronic via mapping + # INTRONIC OFFSETS - Required for Exon table + # Variable to collect offset to exon boundary + ex_offset = 0 + plus = re.compile(r"\d\+\d") # finds digit + digit + minus = re.compile(r"\d\-\d") # finds digit - digit + + geno = re.compile(r':g.') + if plus.search(input) or minus.search(input): + if 'error' in str(to_g): + if validator.alt_aln_method != 'genebuild': + error = "If the following error message does not address the issue and the problem persists please " \ + "contact admin: " + str(to_g) + variant.warnings += ': ' + error + logger.warning(error) + return True + + else: + error = "If the following error message does not address the issue and the problem persists please " \ + "contact admin: " + str(to_g) + variant.warnings += ': ' + error + logger.warning(error) + return True + + else: + # Insertions at exon boundaries are miss-handled by vm.g_to_t + if (obj.posedit.edit.type == 'ins' and + obj.posedit.pos.start.offset == 0 and + obj.posedit.pos.end.offset != 0) or (obj.posedit.edit.type == 'ins' and + obj.posedit.pos.start.offset != 0 and + obj.posedit.pos.end.offset == 0): + formatted_variant = str(obj) + else: + # Normalize was I believe to replace ref. Mapping does this anyway + # to_g = variant.hn.normalize(to_g) + formatted_variant = str(validator.myevm_g_to_t(variant.evm, to_g, tx_ac)) + tx_ac = '' + + elif ':g.' in input: + if plus.search(formatted_variant) or minus.search(formatted_variant): + to_g = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly,variant.hn) + es = re.compile(r'error') + if es.search(str(to_g)): + if validator.alt_aln_method != 'genebuild': + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + + else: + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + else: + # Insertions at exon boundaries are miss-handled by vm.g_to_t + if ( + obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( + obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): + formatted_variant = str(obj) + else: + # Normalize was I believe to replace ref. Mapping does this anyway + # to_g = hn.normalize(to_g) + formatted_variant = str(validator.myevm_g_to_t(variant.evm, to_g, tx_ac)) + tx_ac = '' + + else: + # Normalize the variant + error = 'false' + try: + h_variant = variant.hn.normalize(obj) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Unsupported normalization of variants spanning the exon-intron boundary', + error): + h_variant = obj + formatted_variant = formatted_variant + caution = 'This coding sequence variant description spans at least one intron' + automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' + variant.warnings += ': ' + str(caution) + ': ' + str( + automap) + logger.warning(str(caution) + ": " + str(automap)) + else: + formatted_variant = str(h_variant) + + tx_ac = '' + # Create a crosser (exon boundary crossed) variant + crossed_variant = str(variant.evm._maybe_normalize(obj)) + if formatted_variant == crossed_variant: + cross_variant = 'false' + else: + hgvs_crossed_variant = variant.evm._maybe_normalize(obj) + cross_variant = [ + "Coding sequence allowing for exon boundary crossing (default = no crossing)", + crossed_variant, hgvs_crossed_variant.ac] + cr_available = 'true' + + # control of cross_variant + if boundary == 'false': + cross_variant = 'false' + + error = validator.validateHGVS(formatted_variant) + if error == 'false': + valid = 'true' + else: + excep = "%s -- %s -- %s\n" % (time.ctime(), error, formatted_variant) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + + # Tackle the plus intronic offset + cck = 'false' + if (plus.search(input)): + # Regular expression catches the start of the interval only based on .00+00 pattern + inv_start = re.compile(r"\.\d+\+\d") + if (inv_start.search(input)): + # Find pattern e.g. +0000 and assign to a variable + off_value = re.search(r"(\+\d+)", input) + off_value = off_value.group(1) + # Integerise the value and assign to ex_offset + ex_offset = int(off_value) + cck = 'true' + if (minus.search(input)): + # Regular expression catches the start of the interval only based on .00-00 pattern + inv_start = re.compile(r"\.\d+\-\d") + if (inv_start.search(input)): + # Find pattern e.g. -0000 and assign to a variable + off_value = re.search(r"(\-\d+)", input) + off_value = off_value.group(1) + # Integerise the value and assign to ex_offset + ex_offset = int(off_value) + cck = 'true' + + # COORDINATE CHECKER + # hgvs will handle incorrect coordinates so need to automap errors + # Make sure any input intronic coordinates are correct + # Get the desired transcript + pat_r = re.compile(':r.') + pat_g = re.compile(':g.') + if cck == 'true': + dl = re.compile('del') + # This should only ever hit coding and RNA variants + if dl.search(formatted_variant): + # RNA + if pat_r.search(variant.trapped): + + coding = validator.coding(formatted_variant, validator.hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly,variant.hn) + # genome back to C coordinates + post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) + + test = validator.hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + # Change to rna variant + # THERE IS NO SUCH THING AS QUERY. THIS WOULDN'T HAVE WORKED AND ISN'T RUN IN ANY TESTS + query = variant + posedit = query.posedit + posedit = posedit.lower() + query.posedit = posedit + query.type = 'r' + post_var = str(query) + automap = variant.trapped + ' automapped to ' + str(post_var) + variant.warnings += ': ' + str(caution) + ': ' + str( + automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + + else: + # Get hgnc Gene name from command + data = validator.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + continue + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions[1])) + try: + tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = validator.hgnc_rest( + path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + variant.warnings += ': ' + str( + error) + logger.warning(str(error)) + continue + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + variant.write = False + # Set the values and append to batch_list + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + validator.batch_list.append(query) + + # Coding + else: + coding = validator.coding(formatted_variant, validator.hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = validator.hp.parse_hgvs_variant(formatted_variant) + try: + pre_var = validator.myevm_t_to_g(pre_var, variant.no_norm_evm, variant.primary_assembly, + variant.hn) + except: + e = sys.exc_info()[1] + error = str(e) + reason = 'Input coordinates may be invalid' + if error == 'expected from_start_i <= from_end_i': + error = 'Automap is unable to correct the input exon/intron boundary coordinates, please check your variant description' + variant.warnings += ': ' + str(error) + return True + else: + fn.exceptPass() + else: + fn.exceptPass() + # genome back to C coordinates + try: + post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) + except hgvs.exceptions.HGVSError as error: + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + query = post_var + test = validator.hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + automap = variant.trapped + ' automapped to ' + str(post_var) + variant.warnings += str(caution) + ': ' + str(automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = validator.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = validator.parse_hgvs_variant(str(accessions[1])) + try: + tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = validator.hgnc_rest( + path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + variant.warnings += ': ' + str( + error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + variant.write = False + # Set the values and append to batch_list + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + validator.batch_list.append(query) + + else: + if pat_r.search(variant.trapped): + coding = validator.coding(formatted_variant, validator.hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly,variant.hn) + # genome back to C coordinates + post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) + + test = validator.hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + # Change to rna variant + # THERE IS NO SUCH THING AS QUERY. THIS WOULDN'T HAVE WORKED AND ISN'T RUN IN ANY TESTS + query = variant + posedit = query.posedit + posedit = posedit.lower() + query.posedit = posedit + query.type = 'r' + post_var = str(query) + automap = input + ' automapped to ' + post_var + variant.warnings += ': ' + str(caution) + ': ' + str( + automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = validator.va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + variant.write = False + # Set the values and append to batch_list + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + validator.batch_list.append(query) + + else: + coding = validator.coding(formatted_variant, validator.hp) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly,variant.hn) + + # genome back to C coordinates + post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) + + test = validator.hp.parse_hgvs_variant(input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + # automapping of variant completed + automap = str(variant.trapped) + ' automapped to ' + str(post_var) + variant.warnings += ': ' + str(caution) + ': ' + str( + automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(post_var) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = validator.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + reason = 'Cannot currently display the required information:' + error = data['error'] + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + variant.write = False + # Set the values and append to batch_list + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + validator.batch_list.append(query) + + + # If cck not true + elif pat_r.search(variant.trapped): + # set input hgvs object + hgvs_rna_input = validator.hp.parse_hgvs_variant( + variant.trapped) # Traps the hgvs variant of r. for further use + inp = str(validator.hgvs_r_to_c(hgvs_rna_input)) + # Regex + plus = re.compile(r"\d\+\d") # finds digit + digit + minus = re.compile(r"\d\-\d") # finds digit - digit + if plus.search(input) or minus.search(input): + to_g = validator.genomic(inp, variant.no_norm_evm, variant.primary_assembly,variant.hn) + es = re.compile('error') + if es.search(str(to_g)): + if validator.alt_aln_method != 'genebuild': + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + + else: + error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g + reason = "An error has occurred" + excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + + else: + # Set variants pre and post genomic norm + hgvs_inp = validator.myevm_g_to_t(variant.evm, to_g, tx_ac=obj.ac) + to_g = variant.hn.normalize(to_g) + hgvs_otp = validator.myevm_g_to_t(variant.evm, to_g, tx_ac=obj.ac) + tx_ac = '' + else: + # Set variants pre and post RNA norm + hgvs_inp = validator.hp.parse_hgvs_variant(inp) + try: + hgvs_otp = variant.hn.normalize(hgvs_inp) + except hgvs.exceptions.HGVSError as e: + hgvs_otp = hgvs_inp + tx_ac = '' + + # Set remaining variables + redit = str(hgvs_otp.posedit.edit) + redit = redit.lower() + hgvs_otp.posedit.edit = redit + otp = str(hgvs_otp) + query = str(hgvs_otp.posedit.pos) + test = str(hgvs_inp.posedit.pos) + query = query.replace('T', 'U') + query = query.replace('ENSU', 'ENST') + test = test.replace('T', 'U') + test = test.replace('ENSU', 'ENST') + output = otp.replace(':c.', ':r.') + # Apply coordinates test + if query != test: + caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' + automap = 'Automap has corrected the variant description' + # automapping of variant completed + automap = variant.trapped + ' automapped to ' + output + variant.warnings += ': ' + str(caution) + ': ' + str(automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(output) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = validator.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + error = data['error'] + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + variant.write = False + # Set the values and append to batch_list + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + validator.batch_list.append(query) + + elif pat_g.search(input): + pass + + else: + query = validator.hp.parse_hgvs_variant(formatted_variant) + test = validator.hp.parse_hgvs_variant(input) + if query.posedit.pos != test.posedit.pos: + caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' + automap = 'Automap has corrected the variant description' + # automapping of variant completed + automap = str(test) + ' automapped to ' + str(query) + variant.warnings += ': ' + str(caution) + ': ' + str(automap) + relevant = "Select the automapped transcript and click Submit to analyse" + rel_var = [] + rel_var.append(query) + # Add gene symbols to the link + cp_rel = copy.copy(rel_var) + del rel_var[:] + for accessions in cp_rel: + error = 'false' + hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) + try: + tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if error != 'false': + accessions = ['', str(hgvs_vt)] + rel_var.append(accessions) + else: + # Get hgnc Gene name from command + data = validator.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) + if data['error'] != 'false': + reason = 'Cannot currently display the required information:' + error = data['error'] + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + continue + + else: + # Set the hgnc name correctly + # If the name is correct no record will be found + if int(data['record']['response']['numFound']) == 0: + current = tx_id_info[6] + else: + current = data['record']['response']['docs'][0]['symbol'] + accessions = [str(current), str(hgvs_vt)] + rel_var.append(accessions) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + variant.write = False + # Set the values and append to batch_list + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + validator.batch_list.append(query) + + # VALIDATION of intronic variants + pre_valid = validator.hp.parse_hgvs_variant(input) + post_valid = validator.hp.parse_hgvs_variant(formatted_variant) + if valid == 'false': + error = 'false' + genomic_validation = str( + validator.genomic(input, variant.no_norm_evm, variant.primary_assembly,variant.hn) ) + del_end = re.compile(r'\ddel$') + delins = re.compile(r'delins') + inv = re.compile(r'inv') + if fn.valstr(pre_valid) != fn.valstr(post_valid): + if variant.reftype != ':g.': + if caution == '': + caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) + else: + pass + variant.warnings += ': ' + str(caution) + logger.warning(str(caution)) + else: + pass + else: + pass + + # Apply validation to intronic variant descriptions (should be valid but make sure) + error = validator.validateHGVS(genomic_validation) + if error == 'false': + valid = 'true' + else: + + excep = "%s -- %s -- %s\n" % (time.ctime(), error, formatted_variant) + variant.warnings += ': ' + str(error) + return True + + if valid == 'true': + var_tab = 'true' + cores = "HGVS-compliant variant descriptions" + warning + + # v0.1a1 edit + if fn.valstr(pre_valid) != fn.valstr(post_valid): + if variant.reftype == ':g.': + if caution == '': + caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) + else: + pass + variant.warnings += ': ' + str(caution) + else: + pass + else: + pass + + # COLLECT VARIANT DESCRIPTIONS + ############################## + + # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC + hgvs_coding = validator.coding(formatted_variant, validator.hp) + boundary = re.compile('exon-intron boundary') + spanning = re.compile('exon/intron') + + try: + hgvs_coding = variant.hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSError as e: + error = str(e) + + # Gap compensating code status + gap_compensation = True + + # Gap gene black list + try: + gene_symbol = validator.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + except Exception: + fn.exceptPass() + else: + # If the gene symbol is not in the list, the value False will be returned + gap_compensation = vvChromosomes.gap_black_list(gene_symbol) + + # Intron spanning variants + if re.search('boundary', str(error)) or re.search('spanning', str(error)): + try: + hgvs_coding = variant.evm._maybe_normalize(hgvs_coding) + gap_compensation = False + except hgvs.exceptions.HGVSError as error: + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + else: + pass + + # Warn status + logger.warning("gap_compensation_1 = " + str(gap_compensation)) + coding = fn.valstr(hgvs_coding) + + # RNA sequence + hgvs_rna = copy.deepcopy(hgvs_coding) + hgvs_rna = validator.hgvs_c_to_r(hgvs_rna) + rna = str(hgvs_rna) + + # Genomic sequence + hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn) + final_hgvs_genomic = hgvs_genomic + + # genomic_possibilities + # 1. take the simple 3 pr normalized hgvs_genomic + # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome + hgvs_genomic_possibilities = [] + + # Loop out gap finding code under these circumstances! + if gap_compensation is True: + logger.warning('g_to_t gap code 1 active') + rn_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if orientation != -1: + try: + chromosome_normalized_hgvs_coding = variant.reverse_normalizer.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding + else: + try: + chromosome_normalized_hgvs_coding = variant.hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding + + most_3pr_hgvs_genomic = validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, + variant.no_norm_evm, variant.hn) + hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + + # Push from side to side to try pick up odd placements + # MAKE A NO NORM HGVS2VCF + # First to the right + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.hn, validator.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + fn.exceptPass() + # Store a tx copy for later use + test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) + stash_genomic = validator.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_right.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + stash_tx_right = test_stash_tx_right + if hasattr(test_stash_tx_right.posedit.edit, + 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + alt = test_stash_tx_right.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_right.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_right).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) + try: + variant.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + variant.hn.normalize(test_stash_tx_right) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + test_stash_tx_right = copy.deepcopy(hgvs_coding) + fn.exceptPass() + # Intronic positions not supported. Will cause a Value Error + except ValueError: + test_stash_tx_right = copy.deepcopy(hgvs_coding) + fn.exceptPass() + + # Then to the left + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.reverse_normalizer, + validator.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + fn.exceptPass() + # Store a tx copy for later use + test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) + stash_genomic = validator.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_left.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + stash_tx_left = test_stash_tx_left + if hasattr(test_stash_tx_left.posedit.edit, + 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + alt = test_stash_tx_left.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + + if (len(alt) - ( + test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_left.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_left).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) + try: + variant.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + variant.hn.normalize(test_stash_tx_left) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + test_stash_tx_left = copy.deepcopy(hgvs_coding) + fn.exceptPass() + except ValueError: + test_stash_tx_left = copy.deepcopy(hgvs_coding) + fn.exceptPass() + + # direct mapping from reverse_normalized transcript insertions in the delins format + try: + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = variant.reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the variant ref and alt + pr3_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = validator.vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = validator.vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + # Normalize - If the variant spans a gap it should then form a static genomic variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + try: + genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) + + if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) + if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) + + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + pass + + logger.info('\nGENOMIC POSSIBILITIES') + for possibility in hgvs_genomic_possibilities: + if possibility == '': + logger.info('X') + else: + logger.info(fn.valstr(possibility)) + + logger.info('\n') + + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' + + # Mark as not disparity detected + disparity_deletion_in = ['false', 'false'] + + # Loop through to see if a gap can be located + # Set the variables required for corrective normalization + possibility_counter = 0 + suppress_c_normalization = 'false' # Applies to boundary crossing normalization + + # Copy a version of hgvs_genomic_possibilities + for possibility in hgvs_genomic_possibilities: + possibility_counter = possibility_counter + 1 + + # Loop out stash possibilities which will not spot gaps so are empty + if possibility == '': + continue + + # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + hgvs_genomic_variant = copy.deepcopy(possibility) + stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + try: + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error): + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + if re.search('insertion length must be 1', error): + if hgvs_genomic.posedit.edit.type == 'ins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start, end) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # Store a copy for later use + stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + + # Create VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, + variant.reverse_normalizer, validator.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Look for exonic gaps within transcript or chromosome + no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # Store a not real deletion insertion to test for gapping + stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + v = [chr, pos, ref, alt] + + # Detect intronic variation using normalization + intronic_variant = 'false' + + # Save a copy of current hgvs_coding + try: + saved_hgvs_coding = variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + saved_hgvs_coding = hgvs_coding + intronic_variant = 'true' + continue + else: + saved_hgvs_coding = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + hgvs_coding.ac) + + # Look for normalized variant options that do not match hgvs_coding + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = variant.hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + try: + intron_test = variant.hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'hard_fail': + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', + str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'true': + # Flag RefSeqGene for ammendment + # amend_RefSeqGene = 'false' + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + pass + else: + pass + try: + tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError: + tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + saved_hgvs_coding.ac) + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + + # Check for +1 base and adjust + if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + pass + + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + + # tx_hgvs_not_delins = rn_tx_hgvs_not_delins + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + pass + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for internal_possibility in hgvs_genomic_possibilities: + if internal_possibility == '': + continue + + hgvs_t_possibility = validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) + except: + fn.exceptPass() + ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) + except: + fn.exceptPass() + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if internal_possibility.posedit.edit.type == 'ins': + ins_ref = validator.sf.fetch_seq(internal_possibility.ac, + internal_possibility.posedit.pos.start.base - 1, + internal_possibility.posedit.pos.end.base) + internal_possibility.posedit.edit.ref = ins_ref + internal_possibility.posedit.edit.alt = ins_ref[ + 0] + internal_possibility.posedit.edit.alt + \ + ins_ref[1] + + if len(hgvs_t_possibility.posedit.edit.ref) < len( + internal_possibility.posedit.edit.ref): + gap_length = len(internal_possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, + hgvs_t_possibility] + hgvs_not_delins = internal_possibility + hgvs_genomic_5pr = internal_possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # 'At hgvs_genomic' + # Final sanity checks + try: + validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + continue + try: + variant.hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + continue + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # amend_RefSeqGene = 'false' + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Find oddly placed gaps where the tx variant is encompassed in the gap + if disparity_deletion_in[0] == 'false' and ( + possibility_counter == 3 or possibility_counter == 4): + rg = variant.reverse_normalizer.normalize(hgvs_not_delins) + rtx = validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) + fg = variant.hn.normalize(hgvs_not_delins) + ftx = validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) + if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + exons = validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, validator.alt_aln_method) + exonic = False + for ex_test in exons: + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): + exonic = True + if exonic is True: + hgvs_not_delins = fg + hgvs_genomic = fg + hgvs_genomic_5pr = fg + try: + tx_hgvs_not_delins = validator.vm.c_to_n(ftx) + except Exception: + tx_hgvs_not_delins = ftx + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = validator.hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup) + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + # Suppress intron boundary crossing due to non-intron intron based c. seq annotations + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = validator.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, + 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = variant.hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = validator.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( + tx_hgvs_not_delins.posedit.edit.ref) - 1 + hgvs_refreshed_variant = tx_hgvs_not_delins + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' + if possibility_counter == 3: + hgvs_refreshed_variant = stash_tx_right + elif possibility_counter == 4: + hgvs_refreshed_variant = stash_tx_left + else: + hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = hgvs_coding + # amend_RefSeqGene = 'false' + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) + else: + pass + + try: + variant.hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + logger.warning(error) + continue + + # Quick check to make sure the coding variant has not changed + try: + to_test = variant.hn.normalize(hgvs_refreshed_variant) + except: + to_test = hgvs_refreshed_variant + if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # Try the next available genomic option + if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + hgvs_coding = to_test + else: + continue + # Update hgvs_genomic + hgvs_genomic = validator.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, + variant.no_norm_evm,variant.hn) + if hgvs_genomic.posedit.edit.type == 'identity': + re_c = validator.vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) + if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): + shuffle_left_g = copy.copy(hgvs_genomic) + shuffle_left_g.posedit.edit.ref = '' + shuffle_left_g.posedit.edit.alt = '' + shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + shuffle_left_g = variant.reverse_normalizer.normalize(shuffle_left_g) + re_c = validator.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): + hgvs_genomic = shuffle_left_g + + # If it is intronic, these vairables will not have been set + else: + # amend_RefSeqGene = 'false' + no_normalized_c = 'false' + + # Break if gap has been detected + if disparity_deletion_in[0] != 'false': + break + + # Warn user about gapping + if auto_info != '': + info_lines = auto_info.split('\n') + info_keys = {} + for information in info_lines: + info_keys[information] = '' + info_out = [] + info_out.append( + 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + variant.primary_assembly) + for ky in list(info_keys.keys()): + info_out.append(ky) + auto_info = '\n'.join(info_out) + auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' + auto_info = str(auto_info.replace('\n', ': ')) + variant.warnings += ': ' + str(auto_info) + logger.warning(str(auto_info)) + # Normailse hgvs_genomic + try: + hgvs_genomic = variant.hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + + if re.search('base start position must be <= end position', error) and \ + disparity_deletion_in[0] == 'chromosome': + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = variant.hn.normalize(hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = variant.hn.normalize(hgvs_genomic) + genomic = fn.valstr(hgvs_genomic) + + else: + stored_hgvs_genomic_variant = hgvs_genomic + suppress_c_normalization = 'false' + gapped_alignment_warning = '' + auto_info = '' + genomic = fn.valstr(hgvs_genomic) + + # Create pseudo VCF based on amended hgvs_genomic + hgvs_genomic_variant = hgvs_genomic + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + + # Create vcf + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, + variant.reverse_normalizer, validator.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Create a VCF call + vcf_component_list = [str(chr), str(pos), str(ref), (alt)] + vcf_genomic = '-'.join(vcf_component_list) + + # DO NOT DELETE + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # DO NOT DELETE + stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + + # Apply gap code to re-format hgvs_coding + # Store the current hgvs:c. description + saved_hgvs_coding = copy.deepcopy(hgvs_coding) + + # Get orientation of the gene wrt genome and a list of exons mapped to the genome + ori = validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + alt_aln_method=validator.alt_aln_method) + orientation = int(ori[0]['alt_strand']) + + # Look for normalized variant options that do not match hgvs_coding + hgvs_genomic = copy.deepcopy(hgvs_genomic_variant) + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = variant.hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding + + # Loop out gap finding code under these circumstances! + logger.warning("gap_compensation_2 = " + str(gap_compensation)) + if gap_compensation is True: + logger.warning('g_to_t gap code 2 active') + # is it in an exon? + is_it_in_an_exon = 'no' + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + # Take from stored copy + # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + is_it_in_an_exon = 'yes' + if is_it_in_an_exon == 'yes': + # map form reverse normalized g. to c. + hgvs_from_5n_g = variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) + + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + disparity_deletion_in = ['false', 'false'] + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + pass + else: + pass + + hard_fail = 'false' + try: + tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + tx_hgvs_not_delins = hgvs_coding + hard_fail = 'true' + + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +ve base and adjust + if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() + + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for internal_possibility in hgvs_genomic_possibilities: + + if internal_possibility == '': + continue + + hgvs_t_possibility = validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) + except: + fn.exceptPass() + ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) + except: + fn.exceptPass() + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if internal_possibility.posedit.edit.type == 'ins': + ins_ref = validator.sf.fetch_seq(internal_possibility.ac, + internal_possibility.posedit.pos.start.base - 1, + internal_possibility.posedit.pos.end.base) + internal_possibility.posedit.edit.ref = ins_ref + internal_possibility.posedit.edit.alt = ins_ref[ + 0] + internal_possibility.posedit.edit.alt + \ + ins_ref[1] + + if len(hgvs_t_possibility.posedit.edit.ref) < len( + internal_possibility.posedit.edit.ref): + gap_length = len(internal_possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] + hgvs_not_delins = internal_possibility + hgvs_genomic_5pr = internal_possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # Final sanity checks + try: + validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + logger.warning(str(e)) + return True + try: + variant.hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + logger.warning(error) + return True + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + if hard_fail == 'true': + disparity_deletion_in = ['false', 'false'] + + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly + + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = validator.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = variant.hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = validator.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # GAP IN THE CHROMOSOME + + elif disparity_deletion_in[0] == 'chromosome': + # Set warning variables + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly + hgvs_refreshed_variant = tx_hgvs_not_delins + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = saved_hgvs_coding + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = variant.evm.n_to_c(hgvs_refreshed_variant) + else: + pass + try: + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + hgvs_refreshed_variant.posedit.edit.alt[-1]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 0:-1] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 0:-1] + hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[0] == \ + hgvs_refreshed_variant.posedit.edit.alt[0]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 1:] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 1:] + hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + pass + + # Sort out equality to equality c. events where the code will add 2 additional bases + if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): + pass + else: + hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) + coding = fn.valstr(hgvs_coding) + formatted_variant = coding + + # OBTAIN THE RefSeqGene coordinates + # Attempt 1 = UTA + sequences_for_tx = validator.hdp.get_tx_mapping_options(hgvs_coding.ac) + recovered_rsg = [] + + for sequence in sequences_for_tx: + if re.match('^NG_', sequence[1]): + recovered_rsg.append(sequence[1]) + recovered_rsg.sort() + recovered_rsg.reverse() + + if len(recovered_rsg) > 0 and 'NG_' in recovered_rsg[0]: + refseqgene_ac = recovered_rsg[0] + else: + refseqgene_ac = '' + + # Given the difficulties with mapping to and from RefSeqGenes, we now solely rely on UTA + if refseqgene_ac != '': + hgvs_refseq = validator.vm.t_to_g(hgvs_coding, refseqgene_ac) + # Normalize the RefSeqGene Variant to the correct position + try: + hgvs_refseq = variant.hn.normalize(hgvs_refseq) + except Exception as e: + # if re.search('insertion length must be 1', error): + hgvs_refseq = 'RefSeqGene record not available' + refseq = 'RefSeqGene record not available' + hgvs_refseq_ac = 'RefSeqGene record not available' + pass + else: + refseq = fn.valstr(hgvs_refseq) + hgvs_refseq_ac = hgvs_refseq.ac + else: + hgvs_refseq = 'RefSeqGene record not available' + refseq = 'RefSeqGene record not available' + hgvs_refseq_ac = 'RefSeqGene record not available' + + # Predicted effect on protein + protein_dict = validator.myc_to_p(hgvs_coding, variant.evm, re_to_p=False) + if protein_dict['error'] == '': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + error = protein_dict['error'] + variant.warnings += ': ' + str(error) + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + logger.error(error) + return True + + # Gene orientation wrt genome + ori = validator.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, + alt_aln_method=validator.alt_aln_method) + ori = int(ori[0]['alt_strand']) + + # Look for normalized variant options that do not match hgvs_coding + # boundary crossing normalization + # Re-Save the required variants + hgvs_seek_var = copy.deepcopy(hgvs_coding) + saved_hgvs_coding = copy.deepcopy(hgvs_coding) + + if ori == -1: + # position genomic at its most 5 prime position + try: + query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif suppress_c_normalization == 'true': + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + try: + automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) + hgvs_coding = hgvs_seek_var + coding = fn.valstr(hgvs_coding) + variant.warnings += ': ' + automap + rng = variant.hn.normalize(query_genomic) + except NotImplementedError: + pass + try: + c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + c_for_p = seek_var + try: + # Predicted effect on protein + protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False) + if protein_dict['error'] == '': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + error = protein_dict['error'] + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': + hgvs_protein = protein_dict['hgvs_protein'] + variant.warnings += ': ' + str(error) + # Replace protein description in vars table + protein = str(hgvs_protein) + except NotImplementedError: + fn.exceptPass() + else: + # Double check protein position by normalize genomic, and normalize back to c. for normalize or not to normalize issue + coding = fn.valstr(hgvs_coding) + + elif ori != -1: + # position genomic at its most 3 prime position + try: + query_genomic = variant.hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif suppress_c_normalization == 'true': + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + try: + automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) + hgvs_coding = hgvs_seek_var + coding = fn.valstr(hgvs_coding) + variant.warnings += ': ' + automap + except NotImplementedError: + fn.exceptPass() + else: + # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue + coding = fn.valstr(hgvs_coding) + rng = variant.reverse_normalizer.normalize(query_genomic) + try: + # Diagram where - = intron and E = Exon + + # 3 prime + # ---------EEEEEEEEEEEEEEEEE----------- + # < + # Result, normalize of new variant will baulk at intronic + # 5 prime + # < + # Result, normalize of new variant will be happy + c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) + try: + variant.hn.normalize(c_for_p) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + else: + # hgvs_protein = va_func.protein(str(c_for_p), variant.evm, hp) + protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False) + if protein_dict['error'] == '': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + error = protein_dict['error'] + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': + hgvs_protein = protein_dict['hgvs_protein'] + variant.warnings += ': ' + str(error) + # Replace protein description in vars table + protein = str(hgvs_protein) + except Exception: + fn.exceptPass() + + # Check for up-to-date transcript version + updated_transcript_variant = 'None' + tx_id_info = validator.hdp.get_tx_identity_info(hgvs_coding.ac) + uta_gene_symbol = tx_id_info[6] + tx_for_gene = validator.hdp.get_tx_for_gene(uta_gene_symbol) + ac_root, ac_version = hgvs_coding.ac.split('.') + version_tracking = '0' + update = '' + for accession in tx_for_gene: + try: + if re.match(ac_root, accession[3]): + query_version = accession[3].split('.')[1] + if int(query_version) > int(ac_version) and int(query_version) > int( + version_tracking): + version_tracking = query_version + update = accession[3] + except ValueError: + fn.exceptPass() + + if update != '': + hgvs_updated = copy.deepcopy(hgvs_coding) + hgvs_updated.ac = update + try: + validator.vr.validate(hgvs_updated) + # Updated reference sequence + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('does not agree with reference sequence', str(error)): + match = re.findall(r'\(([GATC]+)\)', error) + new_ref = match[1] + hgvs_updated.posedit.edit.ref = new_ref + validator.vr.validate(hgvs_updated) + updated_transcript_variant = hgvs_updated + else: + pass + updated_transcript_variant = hgvs_updated + variant.warnings += ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( + updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + fn.valstr( + updated_transcript_variant) + + return False \ No newline at end of file diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 9062b519..e48d671a 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -17,6 +17,7 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.hgvs_formatted = None self.hgvs_genomic = None self.stashed = None + self.trapped = None self.warnings = warnings self.description = '' # hgnc_gene_info variable diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index c8ebca9b..0d0e5581 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -505,6 +505,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue trapped_input = str(my_variant.hgvs_formatted) + my_variant.trapped = trapped_input toskip = format_converters.rna(my_variant, self) if toskip: continue @@ -528,6 +529,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # TYPE = :c. if format_type == ':c.' or format_type == ':n.': + print('hgvs_formatted:', my_variant.hgvs_formatted) + print('input:', input) + print('trapped:', my_variant.trapped) + print('quibble:', my_variant.quibble) + print('formatted_variant', formatted_variant) + #print(my_variant.hgvs_formatted, my_variant.trapped, input) + toskip = mappers.transcripts_to_gene(my_variant, self) + print(toskip, my_variant.hgvs_formatted) + if toskip: + print("CARRYING ON") + continue # Flag for validation valid = 'false' From 251ea14f10fe4a5d17a6ccbd736c54d350566bb2 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 10 Apr 2019 13:15:51 +0100 Subject: [PATCH 058/223] Fixed bug with use_checking using variant.hgvs_formatted when it should be using a more temporary variable --- VariantValidator/modules/use_checking.py | 248 ++++++++++++----------- VariantValidator/modules/variant.py | 1 + 2 files changed, 126 insertions(+), 123 deletions(-) diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index 2f3f6d93..324ee224 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -64,6 +64,7 @@ def structure_checks(variant, validator): auto corrected and will cause the downstream functions to return errors """ input_parses = validator.hp.parse_hgvs_variant(variant.quibble) + variant.input_parses = input_parses if input_parses.type == 'g': check = structure_checks_g(variant, validator) if check: @@ -88,13 +89,13 @@ def structure_checks_g(variant, validator): """ if not variant.quibble.startswith('NC_') and not variant.quibble.startswith('NG_') \ and not variant.quibble.startswith('NT_') and not variant.quibble.startswith('NW_'): - error = 'Invalid reference sequence identifier (' + variant.hgvs_formatted.ac + ')' + error = 'Invalid reference sequence identifier (' + variant.input_parses.ac + ')' variant.warnings += ': ' + str(error) logger.warning(error) return True try: - validator.vr.validate(variant.hgvs_formatted) + validator.vr.validate(variant.input_parses) except Exception as e: error = str(e) variant.warnings += ': ' + str(error) @@ -103,7 +104,7 @@ def structure_checks_g(variant, validator): # Additional test try: - variant.hn.normalize(variant.hgvs_formatted) + variant.hn.normalize(variant.input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings += ': ' + str(error) @@ -122,17 +123,17 @@ def structure_checks_c(variant, validator): :return: """ - if '*' in str(variant.hgvs_formatted) or 'c.-' in str(variant.hgvs_formatted): + if '*' in str(variant.input_parses) or 'c.-' in str(variant.input_parses): # Catch variation in UTRs # These should be in the sequence so can be directly validated. Need to pass to n. try: - validator.vr.validate(variant.hgvs_formatted) + validator.vr.validate(variant.input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) if 'datums is ill-defined' in error: - called_ref = variant.hgvs_formatted.posedit.edit.ref + called_ref = variant.input_parses.posedit.edit.ref try: - to_n = variant.evm.c_to_n(variant.hgvs_formatted) + to_n = variant.evm.c_to_n(variant.input_parses) except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) variant.warnings += ': ' + error @@ -146,45 +147,46 @@ def structure_checks_c(variant, validator): logger.warning(error) return True else: - variant.hgvs_formatted.posedit.edit.ref = '' + variant.input_parses.posedit.edit.ref = '' + variant.hgvs_formatted = variant.input_parses else: if 'bounds' in error or 'intronic variant' in error: try: - variant.hn.normalize(variant.hgvs_formatted) + variant.hn.normalize(variant.input_parses) except hgvs.exceptions.HGVSError: fn.exceptPass() if 'bounds' in error: try: - identity_info = validator.hdp.get_tx_identity_info(variant.hgvs_formatted.ac) + identity_info = validator.hdp.get_tx_identity_info(variant.input_parses.ac) ref_start = identity_info[3] ref_end = identity_info[4] - if '-' in str(variant.hgvs_formatted.posedit.pos.start) and variant.hgvs_formatted.posedit.pos.start.offset == 0: + if '-' in str(variant.input_parses.posedit.pos.start) and variant.input_parses.posedit.pos.start.offset == 0: # upstream positions boundary = -ref_start - remainder = variant.hgvs_formatted.posedit.pos.start.base - boundary - variant.hgvs_formatted.posedit.pos.start.base = boundary - variant.hgvs_formatted.posedit.pos.start.offset = remainder - if '-' in str(variant.hgvs_formatted.posedit.pos.end) and variant.hgvs_formatted.posedit.pos.end.offset == 0: + remainder = variant.input_parses.posedit.pos.start.base - boundary + variant.input_parses.posedit.pos.start.base = boundary + variant.input_parses.posedit.pos.start.offset = remainder + if '-' in str(variant.input_parses.posedit.pos.end) and variant.input_parses.posedit.pos.end.offset == 0: boundary = -ref_start - remainder = variant.hgvs_formatted.posedit.pos.end.base - boundary - variant.hgvs_formatted.posedit.pos.end.base = boundary - variant.hgvs_formatted.posedit.pos.end.offset = remainder - if '*' in str(variant.hgvs_formatted.posedit.pos.start) and variant.hgvs_formatted.posedit.pos.start.offset == 0: + remainder = variant.input_parses.posedit.pos.end.base - boundary + variant.input_parses.posedit.pos.end.base = boundary + variant.input_parses.posedit.pos.end.offset = remainder + if '*' in str(variant.input_parses.posedit.pos.start) and variant.input_parses.posedit.pos.start.offset == 0: # downstream positions - tot_end_pos = str(variant.hgvs_formatted.posedit.pos.start).replace('*', '') - ts_seq = validator.sf.fetch_seq(variant.hgvs_formatted.ac) + tot_end_pos = str(variant.input_parses.posedit.pos.start).replace('*', '') + ts_seq = validator.sf.fetch_seq(variant.input_parses.ac) boundary = len(ts_seq) - ref_end - variant.hgvs_formatted.posedit.pos.start.base = boundary + variant.input_parses.posedit.pos.start.base = boundary offset = int(tot_end_pos) - boundary - variant.hgvs_formatted.posedit.pos.start.offset = offset - if '*' in str(variant.hgvs_formatted.posedit.pos.end) and variant.hgvs_formatted.posedit.pos.end.offset == 0: - tot_end_pos = str(variant.hgvs_formatted.posedit.pos.end).replace('*', '') - ts_seq = validator.sf.fetch_seq(variant.hgvs_formatted.ac) + variant.input_parses.posedit.pos.start.offset = offset + if '*' in str(variant.input_parses.posedit.pos.end) and variant.input_parses.posedit.pos.end.offset == 0: + tot_end_pos = str(variant.input_parses.posedit.pos.end).replace('*', '') + ts_seq = validator.sf.fetch_seq(variant.input_parses.ac) boundary = len(ts_seq) - ref_end - variant.hgvs_formatted.posedit.pos.end.base = boundary + variant.input_parses.posedit.pos.end.base = boundary offset = int(tot_end_pos) - boundary - variant.hgvs_formatted.posedit.pos.end.offset = offset + variant.input_parses.posedit.pos.end.offset = offset # Create a lose vm instance variant.lose_vm = hgvs.variantmapper.VariantMapper(validator.hdp, @@ -192,7 +194,7 @@ def structure_checks_c(variant, validator): prevalidation_level=None ) - report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, + report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) error = 'Using a transcript reference sequence to specify a variant position that lies ' \ 'outside of the reference sequence is not HGVS-compliant: ' \ @@ -204,15 +206,15 @@ def structure_checks_c(variant, validator): return True try: - variant.hgvs_formatted = variant.evm.c_to_n(variant.hgvs_formatted) + variant.input_parses = variant.evm.c_to_n(variant.input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings += ': ' + error logger.warning(e) return True - if 'n.1-' in str(variant.hgvs_formatted): - input_parses = variant.evm.n_to_c(variant.hgvs_formatted) + if 'n.1-' in str(variant.input_parses): + input_parses = variant.evm.n_to_c(variant.input_parses) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the ' \ 'reference sequence is not HGVS-compliant. Instead use ' genomic_position = validator.myevm_t_to_g(input_parses, variant.no_norm_evm, variant.primary_assembly, @@ -223,54 +225,54 @@ def structure_checks_c(variant, validator): return True # Re-map input_parses back to c. variant - variant.hgvs_formatted = variant.evm.n_to_c(variant.hgvs_formatted) + variant.input_parses = variant.evm.n_to_c(variant.input_parses) # Intronic positions in UTRs - if re.search(r'\d\-\d', str(variant.hgvs_formatted)) or re.search(r'\d\+\d', str(variant.hgvs_formatted)): + if re.search(r'\d\-\d', str(variant.input_parses)) or re.search(r'\d\+\d', str(variant.input_parses)): # Can we go c-g-c try: - to_genome = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, + to_genome = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) - to_tx = variant.evm.g_to_t(to_genome, variant.hgvs_formatted.ac) + to_tx = variant.evm.g_to_t(to_genome, variant.input_parses.ac) except hgvs.exceptions.HGVSInvalidIntervalError as e: error = str(e) if 'bounds' in error: try: - identity_info = validator.hdp.get_tx_identity_info(variant.hgvs_formatted.ac) + identity_info = validator.hdp.get_tx_identity_info(variant.input_parses.ac) ref_start = identity_info[3] ref_end = identity_info[4] - if '-' in str(variant.hgvs_formatted.posedit.pos.start): + if '-' in str(variant.input_parses.posedit.pos.start): # upstream positions boundary = -ref_start - remainder = variant.hgvs_formatted.posedit.pos.start.base - boundary - variant.hgvs_formatted.posedit.pos.start.base = boundary - variant.hgvs_formatted.posedit.pos.start.offset = remainder - if '-' in str(variant.hgvs_formatted.posedit.pos.end): + remainder = variant.input_parses.posedit.pos.start.base - boundary + variant.input_parses.posedit.pos.start.base = boundary + variant.input_parses.posedit.pos.start.offset = remainder + if '-' in str(variant.input_parses.posedit.pos.end): boundary = -ref_start - remainder = variant.hgvs_formatted.posedit.pos.end.base - boundary - variant.hgvs_formatted.posedit.pos.end.base = boundary - variant.hgvs_formatted.posedit.pos.end.offset = remainder - if '*' in str(variant.hgvs_formatted.posedit.pos.start): + remainder = variant.input_parses.posedit.pos.end.base - boundary + variant.input_parses.posedit.pos.end.base = boundary + variant.input_parses.posedit.pos.end.offset = remainder + if '*' in str(variant.input_parses.posedit.pos.start): # downstream positions - tot_end_pos = str(variant.hgvs_formatted.posedit.pos.start).replace('*', '') - ts_seq = validator.sf.fetch_seq(variant.hgvs_formatted.ac) + tot_end_pos = str(variant.input_parses.posedit.pos.start).replace('*', '') + ts_seq = validator.sf.fetch_seq(variant.input_parses.ac) boundary = len(ts_seq) - ref_end - variant.hgvs_formatted.posedit.pos.start.base = boundary + variant.input_parses.posedit.pos.start.base = boundary te1, te2 = tot_end_pos.split('+') tot_end_pos = int(te1) + int(te2) offset = tot_end_pos - boundary - variant.hgvs_formatted.posedit.pos.start.offset = offset - if '*' in str(variant.hgvs_formatted.posedit.pos.end): - tot_end_pos = str(variant.hgvs_formatted.posedit.pos.end).replace('*', '') - ts_seq = validator.sf.fetch_seq(variant.hgvs_formatted.ac) + variant.input_parses.posedit.pos.start.offset = offset + if '*' in str(variant.input_parses.posedit.pos.end): + tot_end_pos = str(variant.input_parses.posedit.pos.end).replace('*', '') + ts_seq = validator.sf.fetch_seq(variant.input_parses.ac) boundary = len(ts_seq) - ref_end - variant.hgvs_formatted.posedit.pos.end.base = boundary + variant.input_parses.posedit.pos.end.base = boundary te1, te2 = tot_end_pos.split('+') tot_end_pos = int(te1) + int(te2) offset = tot_end_pos - boundary - variant.hgvs_formatted.posedit.pos.end.offset = offset + variant.input_parses.posedit.pos.end.offset = offset - report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, + report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) error = 'Using a transcript reference sequence to specify a variant position that lies ' \ 'outside of the reference sequence is not HGVS-compliant. Instead use '\ @@ -292,22 +294,22 @@ def structure_checks_c(variant, validator): continue gens.append(el_l[-1]) acs = '; '.join(gens) - error = 'Cannot map ' + fn.valstr(variant.hgvs_formatted) + ' to a genomic position. '\ - + variant.hgvs_formatted.ac + ' can only be partially aligned to genomic reference ' \ + error = 'Cannot map ' + fn.valstr(variant.input_parses) + ' to a genomic position. '\ + + variant.input_parses.ac + ' can only be partially aligned to genomic reference ' \ 'sequences ' + acs variant.warnings += ': ' + error logger.warning(error) return True - elif re.search(r'\d-', str(variant.hgvs_formatted)) or re.search(r'\d\+', str(variant.hgvs_formatted)): + elif re.search(r'\d-', str(variant.input_parses)) or re.search(r'\d\+', str(variant.input_parses)): # Quick look at syntax validation try: - validator.vr.validate(variant.hgvs_formatted) + validator.vr.validate(variant.input_parses) except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if 'bounds' in error: try: - report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, + report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) except hgvs.exceptions.HGVSError: fn.exceptPass() @@ -322,9 +324,9 @@ def structure_checks_c(variant, validator): logger.warning(error) return True elif 'base start position must be <= end position' in error: - correction = copy.deepcopy(variant.hgvs_formatted) - st = variant.hgvs_formatted.posedit.pos.start - ed = variant.hgvs_formatted.posedit.pos.end + correction = copy.deepcopy(variant.input_parses) + st = variant.input_parses.posedit.pos.start + ed = variant.input_parses.posedit.pos.end correction.posedit.pos.start = ed correction.posedit.pos.end = st error = error + ': Did you mean ' + str(correction) + '?' @@ -335,11 +337,11 @@ def structure_checks_c(variant, validator): # Create a specific minimal evm with no normalizer and no replace_reference # Have to use this method due to potential multi chromosome error, note normalizes but does not replace sequence try: - output = validator.noreplace_myevm_t_to_g(variant.hgvs_formatted, variant.evm, validator.hdp, + output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant.evm, validator.hdp, variant.primary_assembly, validator.vm, variant.hn, validator.hp, validator.sf, variant.no_norm_evm) except hgvs.exceptions.HGVSDataNotAvailableError: - tx_ac = variant.hgvs_formatted.ac + tx_ac = variant.input_parses.ac try: gene_symbol = validator.db.get_gene_symbol_from_transcriptID(tx_ac) except: @@ -357,22 +359,22 @@ def structure_checks_c(variant, validator): except ValueError as e: error = str(e) if '> end' in error: - error = 'Interval start position ' + str(variant.hgvs_formatted.posedit.pos.start) + ' > interval end '\ - 'position ' + str(variant.hgvs_formatted.posedit.pos.end) + error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end '\ + 'position ' + str(variant.input_parses.posedit.pos.end) variant.warnings += ': ' + error logger.warning(error) return True except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if 'base start position must be <= end position' in error: - # correction = copy.deepcopy(variant.hgvs_formatted) - # st = variant.hgvs_formatted.posedit.pos.start - # ed = variant.hgvs_formatted.posedit.pos.end + # correction = copy.deepcopy(variant.input_parses) + # st = variant.input_parses.posedit.pos.start + # ed = variant.input_parses.posedit.pos.end # correction.posedit.pos.start = ed # correction.posedit.pos.end = st # error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str(variant.hgvs_formatted.posedit.pos.start) + ' > interval end' \ - ' position ' + str(variant.hgvs_formatted.posedit.pos.end) + error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end' \ + ' position ' + str(variant.input_parses.posedit.pos.end) variant.warnings += ': ' + error logger.warning(error) return True @@ -382,7 +384,7 @@ def structure_checks_c(variant, validator): return True try: - variant.evm.g_to_t(output, variant.hgvs_formatted.ac) + variant.evm.g_to_t(output, variant.input_parses.ac) except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings += ': ' + error @@ -400,21 +402,21 @@ def structure_checks_c(variant, validator): else: # All other variation try: - validator.vr.validate(variant.hgvs_formatted) + validator.vr.validate(variant.input_parses) except hgvs.exceptions.HGVSUnsupportedOperationError: fn.exceptPass() except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) # This catches errors in introns if 'base start position must be <= end position' in error: - # correction = variant.hgvs_formatted - # st = variant.hgvs_formatted.posedit.pos.start - # ed = variant.hgvs_formatted.posedit.pos.end + # correction = variant.input_parses + # st = variant.input_parses.posedit.pos.start + # ed = variant.input_parses.posedit.pos.end # correction.posedit.pos.start = ed # correction.posedit.pos.end = st # error = error + ': Did you mean ' + str(correction) + '?' - error = 'Interval start position ' + str(variant.hgvs_formatted.posedit.pos.start) + ' > interval end '\ - 'position ' + str(variant.hgvs_formatted.posedit.pos.end) + error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end '\ + 'position ' + str(variant.input_parses.posedit.pos.end) variant.warnings += ': ' + error logger.warning(error) return True @@ -427,7 +429,7 @@ def structure_checks_c(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: - error += ' (' + variant.hgvs_formatted.ac + ')' + error += ' (' + variant.input_parses.ac + ')' variant.warnings += ': ' + error logger.warning(error) return True @@ -441,18 +443,18 @@ def structure_checks_n(variant, validator): :param validator: :return: """ - if '+' in str(variant.hgvs_formatted) or '-' in str(variant.hgvs_formatted): + if '+' in str(variant.input_parses) or '-' in str(variant.input_parses): # Catch variation in UTRs # These should be in the sequence so can be directly validated. Need to pass to n. try: - validator.vr.validate(variant.hgvs_formatted) + validator.vr.validate(variant.input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) if 'intronic variant' in error: pass elif 'datums is ill-defined' in error: - called_ref = variant.hgvs_formatted.posedit.edit.ref - to_n = variant.evm.c_to_n(variant.hgvs_formatted) + called_ref = variant.input_parses.posedit.edit.ref + to_n = variant.evm.c_to_n(variant.input_parses) actual_ref = to_n.posedit.edit.ref if called_ref != actual_ref: error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' @@ -460,27 +462,27 @@ def structure_checks_n(variant, validator): logger.warning(str(error)) return True else: - variant.hgvs_formatted.posedit.edit.ref = '' - formatted_variant = str(variant.hgvs_formatted) + variant.input_parses.posedit.edit.ref = '' + variant.hgvs_formatted = variant.input_parses elif 'base must be >=1 for datum = SEQ_START or CDS_END' in error: error = 'The given coordinate is outside the bounds of the reference sequence.' try: - if '-' in str(variant.hgvs_formatted.posedit.pos.start): + if '-' in str(variant.input_parses.posedit.pos.start): # upstream positions boundary = 1 - remainder = variant.hgvs_formatted.posedit.pos.start.base - boundary + remainder = variant.input_parses.posedit.pos.start.base - boundary remainder = remainder + 1 - variant.hgvs_formatted.posedit.pos.start.base = boundary - variant.hgvs_formatted.posedit.pos.start.offset = remainder - if '-' in str(variant.hgvs_formatted.posedit.pos.end): + variant.input_parses.posedit.pos.start.base = boundary + variant.input_parses.posedit.pos.start.offset = remainder + if '-' in str(variant.input_parses.posedit.pos.end): boundary = 1 - remainder = variant.hgvs_formatted.posedit.pos.end.base - boundary + remainder = variant.input_parses.posedit.pos.end.base - boundary remainder = remainder + 1 - variant.hgvs_formatted.posedit.pos.end.base = boundary - variant.hgvs_formatted.posedit.pos.end.offset = remainder - report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, + variant.input_parses.posedit.pos.end.base = boundary + variant.input_parses.posedit.pos.end.offset = remainder + report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) @@ -494,24 +496,24 @@ def structure_checks_n(variant, validator): logger.warning(error) return True - if 'n.1-' in str(variant.hgvs_formatted): + if 'n.1-' in str(variant.input_parses): error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' - genomic_position = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, + genomic_position = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) error = error + fn.valstr(genomic_position) variant.warnings += ': ' + error logger.warning(error) return True - if re.search(r'\d-', str(variant.hgvs_formatted)) or re.search(r'\d\+', str(variant.hgvs_formatted)): + if re.search(r'\d-', str(variant.input_parses)) or re.search(r'\d\+', str(variant.input_parses)): # Quick look at syntax validation try: - validator.vr.validate(variant.hgvs_formatted) + validator.vr.validate(variant.input_parses) except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if 'bounds' in error: try: - report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, + report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) except hgvs.exceptions.HGVSError as e: fn.exceptPass() @@ -526,9 +528,9 @@ def structure_checks_n(variant, validator): logger.warning(error) return True elif 'base start position must be <= end position' in error: - correction = copy.deepcopy(variant.hgvs_formatted) - st = variant.hgvs_formatted.posedit.pos.start - ed = variant.hgvs_formatted.posedit.pos.end + correction = copy.deepcopy(variant.input_parses) + st = variant.input_parses.posedit.pos.start + ed = variant.input_parses.posedit.pos.end correction.posedit.pos.start = ed correction.posedit.pos.end = st error = error + ': Did you mean ' + str(correction) + '?' @@ -538,13 +540,13 @@ def structure_checks_n(variant, validator): return True elif 'Cannot validate sequence of an intronic variant' in error: try: - test_g = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, variant.primary_assembly, + test_g = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) - back_to_n = variant.evm.g_to_t(test_g, variant.hgvs_formatted.ac) + back_to_n = variant.evm.g_to_t(test_g, variant.input_parses.ac) except hgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: - report_gen = validator.myevm_t_to_g(variant.hgvs_formatted, variant.no_norm_evm, + report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) @@ -555,10 +557,10 @@ def structure_checks_n(variant, validator): # Create a specific minimal evm with no normalizer and no replace_reference # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence try: - output = validator.noreplace_myevm_t_to_g(variant.hgvs_formatted, variant.evm, validator.hdp, variant.primary_assembly, validator.vm, variant.hn, + output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant.evm, validator.hdp, variant.primary_assembly, validator.vm, variant.hn, validator.hp, validator.sf, variant.no_norm_evm) except hgvs.exceptions.HGVSDataNotAvailableError as e: - tx_ac = variant.hgvs_formatted.ac + tx_ac = variant.input_parses.ac try: gene_symbol = validator.db.get_gene_symbol_from_transcriptID(tx_ac) except: @@ -574,23 +576,23 @@ def structure_checks_n(variant, validator): error = str(e) if '> end' in error: error = 'Interval start position ' + str( - variant.hgvs_formatted.posedit.pos.start) + ' > interval end position ' + str( - variant.hgvs_formatted.posedit.pos.end) + variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( + variant.input_parses.posedit.pos.end) variant.warnings += ': ' + error logger.warning(error) return True except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if 'base start position must be <= end position' in error: - correction = copy.deepcopy(variant.hgvs_formatted) - st = variant.hgvs_formatted.posedit.pos.start - ed = variant.hgvs_formatted.posedit.pos.end + correction = copy.deepcopy(variant.input_parses) + st = variant.input_parses.posedit.pos.start + ed = variant.input_parses.posedit.pos.end correction.posedit.pos.start = ed correction.posedit.pos.end = st error = error + ': Did you mean ' + str(correction) + '?' error = 'Interval start position ' + str( - variant.hgvs_formatted.posedit.pos.start) + ' > interval end position ' + str( - variant.hgvs_formatted.posedit.pos.end) + variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( + variant.input_parses.posedit.pos.end) variant.warnings += ': ' + error logger.warning(error) return True @@ -605,7 +607,7 @@ def structure_checks_n(variant, validator): else: # All other variation try: - validator.vr.validate(variant.hgvs_formatted) + validator.vr.validate(variant.input_parses) except hgvs.exceptions.HGVSUnsupportedOperationError: fn.exceptPass() except hgvs.exceptions.HGVSInvalidVariantError as e: @@ -629,15 +631,15 @@ def structure_checks_n(variant, validator): # continue # This catches errors in introns if 'base start position must be <= end position' in error: - # correction = copy.deepcopy(variant.hgvs_formatted) - # st = variant.hgvs_formatted.posedit.pos.start - # ed = variant.hgvs_formatted.posedit.pos.end + # correction = copy.deepcopy(variant.input_parses) + # st = variant.input_parses.posedit.pos.start + # ed = variant.input_parses.posedit.pos.end # correction.posedit.pos.start = ed # correction.posedit.pos.end = st # error = error + ': Did you mean ' + str(correction) + '?' error = 'Interval start position ' + str( - variant.hgvs_formatted.posedit.pos.start) + ' > interval end position ' + str( - variant.hgvs_formatted.posedit.pos.end) + variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( + variant.input_parses.posedit.pos.end) logger.warning(error) variant.warnings += ': ' + error return True @@ -652,7 +654,7 @@ def structure_checks_n(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: - error = error + ' (' + variant.hgvs_formatted.ac + ')' + error = error + ' (' + variant.input_parses.ac + ')' variant.warnings += ': ' + error logger.warning(error) return True diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index e48d671a..6142713b 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -18,6 +18,7 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.hgvs_genomic = None self.stashed = None self.trapped = None + self.input_parses = None self.warnings = warnings self.description = '' # hgnc_gene_info variable From f6fab5b921dfc3bcfff7815ae3f3cf146b3109ff Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 10 Apr 2019 15:41:22 +0100 Subject: [PATCH 059/223] intermediate stage of cleaning and identifying problematic areas --- VariantValidator/modules/mappers.py | 181 +++++++++++++----------- VariantValidator/modules/vvMixinCore.py | 14 +- 2 files changed, 113 insertions(+), 82 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 26507dbb..1e150a85 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -181,17 +181,17 @@ def transcripts_to_gene(variant, validator): """This seems to use the quibble and not the HGVS formatted variant format.""" # Flag for validation - valid = 'false' + valid = False boundary = 'false' warning = '' caution = '' # Collect information for genomic level validation - obj = validator.hp.parse_hgvs_variant(str(variant.quibble)) + obj = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) tx_ac = obj.ac input = str(variant.quibble) - formatted_variant = str(variant.quibble) + formatted_variant = str(variant.hgvs_formatted) # Do we keep it? if validator.select_transcripts != 'all': @@ -318,29 +318,28 @@ def transcripts_to_gene(variant, validator): elif ':g.' in input: if plus.search(formatted_variant) or minus.search(formatted_variant): - to_g = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly,variant.hn) - es = re.compile(r'error') - if es.search(str(to_g)): + to_g = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, variant.hn) + if 'error' in str(to_g): if validator.alt_aln_method != 'genebuild': - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + error = "If the following error message does not address the issue and the problem persists " \ + "please contact admin: " + str(to_g) + variant.warnings += ': ' + error + logger.warning(error) return True else: - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + error = "If the following error message does not address the issue and the problem persists " \ + "please contact admin: " + str(to_g) + variant.warnings += ': ' + error + logger.warning(error) return True else: # Insertions at exon boundaries are miss-handled by vm.g_to_t - if ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): + if (obj.posedit.edit.type == 'ins' and + obj.posedit.pos.start.offset == 0 and + obj.posedit.pos.end.offset != 0) or (obj.posedit.edit.type == 'ins' and + obj.posedit.pos.start.offset != 0 and + obj.posedit.pos.end.offset == 0): formatted_variant = str(obj) else: # Normalize was I believe to replace ref. Mapping does this anyway @@ -355,65 +354,65 @@ def transcripts_to_gene(variant, validator): h_variant = variant.hn.normalize(obj) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) - if re.match('Unsupported normalization of variants spanning the exon-intron boundary', - error): + if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: h_variant = obj formatted_variant = formatted_variant caution = 'This coding sequence variant description spans at least one intron' - automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' - variant.warnings += ': ' + str(caution) + ': ' + str( - automap) - logger.warning(str(caution) + ": " + str(automap)) + automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. ' \ + 'Please refer to https://www35.lamp.le.ac.uk/recommendations/' + variant.warnings += ': ' + caution + ': ' + automap + logger.warning(caution + ": " + automap) else: formatted_variant = str(h_variant) - tx_ac = '' - # Create a crosser (exon boundary crossed) variant - crossed_variant = str(variant.evm._maybe_normalize(obj)) - if formatted_variant == crossed_variant: - cross_variant = 'false' + # tx_ac = '' + # # Create a crosser (exon boundary crossed) variant + # crossed_variant = str(variant.evm._maybe_normalize(obj)) + # if formatted_variant == crossed_variant: + # cross_variant = 'false' + # else: + # hgvs_crossed_variant = variant.evm._maybe_normalize(obj) + # cross_variant = [ + # "Coding sequence allowing for exon boundary crossing (default = no crossing)", + # crossed_variant, hgvs_crossed_variant.ac] + # cr_available = 'true' + # + # # control of cross_variant + # if boundary == 'false': + # cross_variant = 'false' + + # Moved this forwards and removed the previous section as it doesn't seem to be used anywhere + + error = validator.validateHGVS(formatted_variant) + if error == 'false': + valid = True else: - hgvs_crossed_variant = variant.evm._maybe_normalize(obj) - cross_variant = [ - "Coding sequence allowing for exon boundary crossing (default = no crossing)", - crossed_variant, hgvs_crossed_variant.ac] - cr_available = 'true' - - # control of cross_variant - if boundary == 'false': - cross_variant = 'false' - - error = validator.validateHGVS(formatted_variant) - if error == 'false': - valid = 'true' - else: - excep = "%s -- %s -- %s\n" % (time.ctime(), error, formatted_variant) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) - return True + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True # Tackle the plus intronic offset - cck = 'false' - if (plus.search(input)): + cck = False + if plus.search(input): # Regular expression catches the start of the interval only based on .00+00 pattern inv_start = re.compile(r"\.\d+\+\d") - if (inv_start.search(input)): + if inv_start.search(input): # Find pattern e.g. +0000 and assign to a variable off_value = re.search(r"(\+\d+)", input) off_value = off_value.group(1) # Integerise the value and assign to ex_offset - ex_offset = int(off_value) - cck = 'true' - if (minus.search(input)): + #ex_offset = int(off_value) + cck = True + if minus.search(input): # Regular expression catches the start of the interval only based on .00-00 pattern inv_start = re.compile(r"\.\d+\-\d") - if (inv_start.search(input)): + if inv_start.search(input): # Find pattern e.g. -0000 and assign to a variable off_value = re.search(r"(\-\d+)", input) off_value = off_value.group(1) # Integerise the value and assign to ex_offset - ex_offset = int(off_value) - cck = 'true' + #ex_offset = int(off_value) + cck = True # COORDINATE CHECKER # hgvs will handle incorrect coordinates so need to automap errors @@ -421,13 +420,13 @@ def transcripts_to_gene(variant, validator): # Get the desired transcript pat_r = re.compile(':r.') pat_g = re.compile(':g.') - if cck == 'true': + if cck: dl = re.compile('del') # This should only ever hit coding and RNA variants - if dl.search(formatted_variant): - # RNA - if pat_r.search(variant.trapped): - + if 'del' in formatted_variant: + # RNA - looking at trapped variant which was saved before RNA converted to cDNA + #TODO: rename variant.trapped to variant.pre_RNA_conversion or something similar so it makes sense. + if ':r.' in variant.trapped: coding = validator.coding(formatted_variant, validator.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome @@ -436,13 +435,16 @@ def transcripts_to_gene(variant, validator): post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) test = validator.hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ + post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected ' \ + 'transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the ' \ + 'selected transcript' # automapping of variant completed # Change to rna variant # THERE IS NO SUCH THING AS QUERY. THIS WOULDN'T HAVE WORKED AND ISN'T RUN IN ANY TESTS - query = variant + query = variant # Deliberately won't work so I can fix this once I have an appropriate test. posedit = query.posedit posedit = posedit.lower() query.posedit = posedit @@ -521,7 +523,8 @@ def transcripts_to_gene(variant, validator): # Tag the line so that it is not written out variant.write = False # Set the values and append to batch_list - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) # Coding @@ -533,18 +536,15 @@ def transcripts_to_gene(variant, validator): try: pre_var = validator.myevm_t_to_g(pre_var, variant.no_norm_evm, variant.primary_assembly, variant.hn) - except: - e = sys.exc_info()[1] + except Exception as e: error = str(e) - reason = 'Input coordinates may be invalid' if error == 'expected from_start_i <= from_end_i': - error = 'Automap is unable to correct the input exon/intron boundary coordinates, please check your variant description' - variant.warnings += ': ' + str(error) + error = 'Automap is unable to correct the input exon/intron boundary coordinates, ' \ + 'please check your variant description' + variant.warnings += ': ' + error return True else: fn.exceptPass() - else: - fn.exceptPass() # genome back to C coordinates try: post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) @@ -554,9 +554,13 @@ def transcripts_to_gene(variant, validator): return True query = post_var test = validator.hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ + post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the ' \ + 'selected transcript:' + automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the ' \ + 'selected transcript' # automapping of variant completed automap = variant.trapped + ' automapped to ' + str(post_var) variant.warnings += str(caution) + ': ' + str(automap) @@ -634,7 +638,7 @@ def transcripts_to_gene(variant, validator): query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) - else: + else: # del not in formatted_variant if pat_r.search(variant.trapped): coding = validator.coding(formatted_variant, validator.hp) trans_acc = coding.ac @@ -879,12 +883,22 @@ def transcripts_to_gene(variant, validator): relevant = "Select the automapped transcript and click Submit to analyse" rel_var = [] rel_var.append(query) + print(rel_var) # Add gene symbols to the link cp_rel = copy.copy(rel_var) + print(cp_rel) del rel_var[:] + print(rel_var) + + # TODO: This whole loop is very strange and I don't know what it's supposed to be doing. + # It's also repeated in four other places although only this one is run in the tests. + # Test variant 197 goes into this loop, but I don't think it's ever going to loop more than once. + # Would perhaps be worth testing in a seperate little bit of code or perhaps email Pete? for accessions in cp_rel: + print(accessions) error = 'false' hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) + print(hgvs_vt) try: tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) except hgvs.exceptions.HGVSError as e: @@ -911,8 +925,13 @@ def transcripts_to_gene(variant, validator): current = data['record']['response']['docs'][0]['symbol'] accessions = [str(current), str(hgvs_vt)] rel_var.append(accessions) + print(rel_var) # Kill current line and append for re-submission # Tag the line so that it is not written out + print('out of loop') + print(hgvs_vt) + print(rel_var) + raise SystemExit variant.write = False # Set the values and append to batch_list query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) @@ -921,7 +940,7 @@ def transcripts_to_gene(variant, validator): # VALIDATION of intronic variants pre_valid = validator.hp.parse_hgvs_variant(input) post_valid = validator.hp.parse_hgvs_variant(formatted_variant) - if valid == 'false': + if not valid: error = 'false' genomic_validation = str( validator.genomic(input, variant.no_norm_evm, variant.primary_assembly,variant.hn) ) @@ -944,14 +963,14 @@ def transcripts_to_gene(variant, validator): # Apply validation to intronic variant descriptions (should be valid but make sure) error = validator.validateHGVS(genomic_validation) if error == 'false': - valid = 'true' + valid = True else: excep = "%s -- %s -- %s\n" % (time.ctime(), error, formatted_variant) variant.warnings += ': ' + str(error) return True - if valid == 'true': + if valid: var_tab = 'true' cores = "HGVS-compliant variant descriptions" + warning diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 0d0e5581..80a6f4c9 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -351,6 +351,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr formatted_variant = str(my_variant.hgvs_formatted) input = str(my_variant.hgvs_formatted) + assert formatted_variant == str(my_variant.hgvs_formatted) + my_variant.set_quibble(str(my_variant.hgvs_formatted)) # ENST support needs to be re-evaluated, but is very low priority @@ -461,6 +463,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning(error) continue + assert formatted_variant == str(my_variant.hgvs_formatted) + # Catch missing version number in refseq ref_type = re.compile(r"^N\w\w\d") is_version = re.compile(r"\d\.\d") @@ -488,6 +492,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue logger.trace("Passed 'common mistakes' catcher", my_variant) + assert formatted_variant == str(my_variant.hgvs_formatted) + # Primary validation of the input toskip = use_checking.structure_checks(my_variant, self) print(toskip, my_variant.hgvs_formatted, my_variant.quibble) @@ -495,6 +501,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue logger.trace("Variant structure and contents searches passed", my_variant) + assert formatted_variant == str(my_variant.hgvs_formatted) + # Mitochondrial variants toskip = format_converters.mitochondrial(my_variant, self) if toskip: @@ -510,14 +518,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if toskip: continue + assert formatted_variant == str(my_variant.hgvs_formatted) + # COLLECT gene symbol, name and ACCESSION INFORMATION # Gene symbol if my_variant.reftype != ':g.': toskip = collect_info.get_transcript_info(my_variant, self) print(toskip, my_variant.hgvs_formatted, my_variant.hgvs_genomic) if toskip: - continue + continue + assert formatted_variant == str(my_variant.hgvs_formatted) # Now start mapping from genome to transcripts if my_variant.reftype == ':g.': @@ -526,6 +537,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if toskip: continue + assert formatted_variant == str(my_variant.hgvs_formatted) # TYPE = :c. if format_type == ':c.' or format_type == ':n.': From 977c69989b4cd04d154b9adc2b8cefe3dd62d4d1 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 17 Apr 2019 15:36:20 +0100 Subject: [PATCH 060/223] Removed strange rel_var loops from the transcript to genome mapper --- VariantValidator/modules/mappers.py | 299 +++------------------------- 1 file changed, 25 insertions(+), 274 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 1e150a85..927c6c02 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -454,75 +454,13 @@ def transcripts_to_gene(variant, validator): variant.warnings += ': ' + str(caution) + ': ' + str( automap) relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = validator.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions[1])) - try: - tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = validator.hgnc_rest( - path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - variant.warnings += ': ' + str( - error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) # Kill current line and append for re-submission # Tag the line so that it is not written out variant.write = False # Set the values and append to batch_list + hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) + assert str(hgvs_vt) == str(post_var) query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) @@ -565,76 +503,13 @@ def transcripts_to_gene(variant, validator): automap = variant.trapped + ' automapped to ' + str(post_var) variant.warnings += str(caution) + ': ' + str(automap) relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = validator.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = validator.parse_hgvs_variant(str(accessions[1])) - try: - tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = validator.hgnc_rest( - path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - variant.warnings += ': ' + str( - error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) # Kill current line and append for re-submission # Tag the line so that it is not written out variant.write = False # Set the values and append to batch_list + hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) + assert str(hgvs_vt) == str(post_var) query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) @@ -664,43 +539,13 @@ def transcripts_to_gene(variant, validator): variant.warnings += ': ' + str(caution) + ': ' + str( automap) relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = validator.va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) # Kill current line and append for re-submission # Tag the line so that it is not written out variant.write = False # Set the values and append to batch_list + hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) + assert str(hgvs_vt) == str(post_var) query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) @@ -722,44 +567,13 @@ def transcripts_to_gene(variant, validator): variant.warnings += ': ' + str(caution) + ': ' + str( automap) relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = validator.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - reason = 'Cannot currently display the required information:' - error = data['error'] - variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) # Kill current line and append for re-submission # Tag the line so that it is not written out variant.write = False # Set the values and append to batch_list + hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) + assert str(hgvs_vt) == str(post_var) query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) @@ -828,43 +642,13 @@ def transcripts_to_gene(variant, validator): automap = variant.trapped + ' automapped to ' + output variant.warnings += ': ' + str(caution) + ': ' + str(automap) relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(output) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = validator.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) # Kill current line and append for re-submission # Tag the line so that it is not written out variant.write = False # Set the values and append to batch_list + hgvs_vt = validator.hp.parse_hgvs_variant(str(query)) + assert str(hgvs_vt) == str(query) query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) @@ -873,7 +657,9 @@ def transcripts_to_gene(variant, validator): else: query = validator.hp.parse_hgvs_variant(formatted_variant) + print('Query:', query) test = validator.hp.parse_hgvs_variant(input) + print('Test:', test) if query.posedit.pos != test.posedit.pos: caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' automap = 'Automap has corrected the variant description' @@ -881,65 +667,21 @@ def transcripts_to_gene(variant, validator): automap = str(test) + ' automapped to ' + str(query) variant.warnings += ': ' + str(caution) + ': ' + str(automap) relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(query) - print(rel_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - print(cp_rel) - del rel_var[:] - print(rel_var) - - # TODO: This whole loop is very strange and I don't know what it's supposed to be doing. - # It's also repeated in four other places although only this one is run in the tests. - # Test variant 197 goes into this loop, but I don't think it's ever going to loop more than once. - # Would perhaps be worth testing in a seperate little bit of code or perhaps email Pete? - for accessions in cp_rel: - print(accessions) - error = 'false' - hgvs_vt = validator.hp.parse_hgvs_variant(str(accessions)) - print(hgvs_vt) - try: - tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = validator.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - reason = 'Cannot currently display the required information:' - error = data['error'] - variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - print(rel_var) # Kill current line and append for re-submission # Tag the line so that it is not written out - print('out of loop') - print(hgvs_vt) - print(rel_var) - raise SystemExit variant.write = False # Set the values and append to batch_list + hgvs_vt = validator.hp.parse_hgvs_variant(str(query)) + assert str(hgvs_vt) == str(query) query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) # VALIDATION of intronic variants pre_valid = validator.hp.parse_hgvs_variant(input) post_valid = validator.hp.parse_hgvs_variant(formatted_variant) + + # valid is false if the input contains a \d+\d, \d-\d or :g. if not valid: error = 'false' genomic_validation = str( @@ -3425,4 +3167,13 @@ def transcripts_to_gene(variant, validator): updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + fn.valstr( updated_transcript_variant) + variant.coding = str(hgvs_coding) + variant.genomic_r = str(hgvs_refseq) + variant.genomic_g = str(hgvs_genomic) + variant.protein = str(hgvs_protein) + + if gap_compensation is True: + variant.test_stash_tx_left = test_stash_tx_left + variant.test_stash_tx_right = test_stash_tx_right + return False \ No newline at end of file From d2860152ebc4c6ec27153035c858e269be8b06cb Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 17 Apr 2019 16:15:55 +0100 Subject: [PATCH 061/223] Removed indendation and cleaned up t_to_g mapper more --- VariantValidator/modules/mappers.py | 3559 +++++++++++++-------------- 1 file changed, 1754 insertions(+), 1805 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 927c6c02..107ad52a 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -397,21 +397,11 @@ def transcripts_to_gene(variant, validator): # Regular expression catches the start of the interval only based on .00+00 pattern inv_start = re.compile(r"\.\d+\+\d") if inv_start.search(input): - # Find pattern e.g. +0000 and assign to a variable - off_value = re.search(r"(\+\d+)", input) - off_value = off_value.group(1) - # Integerise the value and assign to ex_offset - #ex_offset = int(off_value) cck = True if minus.search(input): # Regular expression catches the start of the interval only based on .00-00 pattern inv_start = re.compile(r"\.\d+\-\d") if inv_start.search(input): - # Find pattern e.g. -0000 and assign to a variable - off_value = re.search(r"(\-\d+)", input) - off_value = off_value.group(1) - # Integerise the value and assign to ex_offset - #ex_offset = int(off_value) cck = True # COORDINATE CHECKER @@ -421,7 +411,6 @@ def transcripts_to_gene(variant, validator): pat_r = re.compile(':r.') pat_g = re.compile(':g.') if cck: - dl = re.compile('del') # This should only ever hit coding and RNA variants if 'del' in formatted_variant: # RNA - looking at trapped variant which was saved before RNA converted to cDNA @@ -443,6 +432,7 @@ def transcripts_to_gene(variant, validator): 'selected transcript' # automapping of variant completed # Change to rna variant + # TODO: Need to look this section over. Doesn't make any sense. # THERE IS NO SUCH THING AS QUERY. THIS WOULDN'T HAVE WORKED AND ISN'T RUN IN ANY TESTS query = variant # Deliberately won't work so I can fix this once I have an appropriate test. posedit = query.posedit @@ -451,9 +441,7 @@ def transcripts_to_gene(variant, validator): query.type = 'r' post_var = str(query) automap = variant.trapped + ' automapped to ' + str(post_var) - variant.warnings += ': ' + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" + variant.warnings += ': ' + str(caution) + ': ' + str(automap) # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -528,6 +516,7 @@ def transcripts_to_gene(variant, validator): automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' # automapping of variant completed # Change to rna variant + # TODO: As before this section needs fixing # THERE IS NO SUCH THING AS QUERY. THIS WOULDN'T HAVE WORKED AND ISN'T RUN IN ANY TESTS query = variant posedit = query.posedit @@ -577,42 +566,26 @@ def transcripts_to_gene(variant, validator): query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) - # If cck not true elif pat_r.search(variant.trapped): # set input hgvs object - hgvs_rna_input = validator.hp.parse_hgvs_variant( - variant.trapped) # Traps the hgvs variant of r. for further use + hgvs_rna_input = validator.hp.parse_hgvs_variant(variant.trapped) # Traps the hgvs variant of r. for further use inp = str(validator.hgvs_r_to_c(hgvs_rna_input)) # Regex - plus = re.compile(r"\d\+\d") # finds digit + digit - minus = re.compile(r"\d\-\d") # finds digit - digit if plus.search(input) or minus.search(input): - to_g = validator.genomic(inp, variant.no_norm_evm, variant.primary_assembly,variant.hn) - es = re.compile('error') - if es.search(str(to_g)): - if validator.alt_aln_method != 'genebuild': - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) - return True - - else: - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - variant.warnings += ': ' + str(error) - logger.warning(str(error)) - return True + to_g = validator.genomic(inp, variant.no_norm_evm, variant.primary_assembly, variant.hn) + if 'error' in str(to_g): + error = "If the following error message does not address the issue and the problem persists " \ + "please contact admin: " + to_g + variant.warnings += ': ' + error + logger.warning(error) + return True else: # Set variants pre and post genomic norm hgvs_inp = validator.myevm_g_to_t(variant.evm, to_g, tx_ac=obj.ac) to_g = variant.hn.normalize(to_g) hgvs_otp = validator.myevm_g_to_t(variant.evm, to_g, tx_ac=obj.ac) - tx_ac = '' else: # Set variants pre and post RNA norm hgvs_inp = validator.hp.parse_hgvs_variant(inp) @@ -620,12 +593,9 @@ def transcripts_to_gene(variant, validator): hgvs_otp = variant.hn.normalize(hgvs_inp) except hgvs.exceptions.HGVSError as e: hgvs_otp = hgvs_inp - tx_ac = '' # Set remaining variables - redit = str(hgvs_otp.posedit.edit) - redit = redit.lower() - hgvs_otp.posedit.edit = redit + hgvs_otp.posedit.edit = str(hgvs_otp.posedit.edit).lower() otp = str(hgvs_otp) query = str(hgvs_otp.posedit.pos) test = str(hgvs_inp.posedit.pos) @@ -636,12 +606,11 @@ def transcripts_to_gene(variant, validator): output = otp.replace(':c.', ':r.') # Apply coordinates test if query != test: - caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' - automap = 'Automap has corrected the variant description' + caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant ' \ + 'nomenclature:' # automapping of variant completed automap = variant.trapped + ' automapped to ' + output - variant.warnings += ': ' + str(caution) + ': ' + str(automap) - relevant = "Select the automapped transcript and click Submit to analyse" + variant.warnings += ': ' + caution + ': ' + automap # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -657,16 +626,13 @@ def transcripts_to_gene(variant, validator): else: query = validator.hp.parse_hgvs_variant(formatted_variant) - print('Query:', query) test = validator.hp.parse_hgvs_variant(input) - print('Test:', test) if query.posedit.pos != test.posedit.pos: - caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' - automap = 'Automap has corrected the variant description' + caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant ' \ + 'nomenclature:' # automapping of variant completed automap = str(test) + ' automapped to ' + str(query) - variant.warnings += ': ' + str(caution) + ': ' + str(automap) - relevant = "Select the automapped transcript and click Submit to analyse" + variant.warnings += ': ' + caution + ': ' + automap # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -683,599 +649,565 @@ def transcripts_to_gene(variant, validator): # valid is false if the input contains a \d+\d, \d-\d or :g. if not valid: - error = 'false' - genomic_validation = str( - validator.genomic(input, variant.no_norm_evm, variant.primary_assembly,variant.hn) ) - del_end = re.compile(r'\ddel$') - delins = re.compile(r'delins') - inv = re.compile(r'inv') + genomic_validation = str(validator.genomic(input, variant.no_norm_evm, variant.primary_assembly, variant.hn)) if fn.valstr(pre_valid) != fn.valstr(post_valid): if variant.reftype != ':g.': if caution == '': caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) - else: - pass - variant.warnings += ': ' + str(caution) - logger.warning(str(caution)) - else: - pass - else: - pass + variant.warnings += ': ' + caution + logger.warning(caution) # Apply validation to intronic variant descriptions (should be valid but make sure) error = validator.validateHGVS(genomic_validation) if error == 'false': valid = True else: - - excep = "%s -- %s -- %s\n" % (time.ctime(), error, formatted_variant) - variant.warnings += ': ' + str(error) + variant.warnings += ': ' + error return True - if valid: - var_tab = 'true' - cores = "HGVS-compliant variant descriptions" + warning + assert valid is True + # If valid is False we won't reach this part, so I can remove the if condition - # v0.1a1 edit - if fn.valstr(pre_valid) != fn.valstr(post_valid): - if variant.reftype == ':g.': - if caution == '': - caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) - else: - pass - variant.warnings += ': ' + str(caution) - else: - pass - else: - pass + var_tab = 'true' + cores = "HGVS-compliant variant descriptions" + warning - # COLLECT VARIANT DESCRIPTIONS - ############################## + # v0.1a1 edit + if fn.valstr(pre_valid) != fn.valstr(post_valid): + if variant.reftype == ':g.': + if caution == '': + caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) + variant.warnings += ': ' + str(caution) - # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC - hgvs_coding = validator.coding(formatted_variant, validator.hp) - boundary = re.compile('exon-intron boundary') - spanning = re.compile('exon/intron') + # COLLECT VARIANT DESCRIPTIONS + ############################## - try: - hgvs_coding = variant.hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSError as e: - error = str(e) + # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC + hgvs_coding = validator.coding(formatted_variant, validator.hp) + boundary = re.compile('exon-intron boundary') + spanning = re.compile('exon/intron') - # Gap compensating code status - gap_compensation = True + try: + hgvs_coding = variant.hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSError as e: + error = str(e) + + # Gap compensating code status + gap_compensation = True - # Gap gene black list + # Gap gene black list + try: + gene_symbol = validator.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + except Exception: + fn.exceptPass() + else: + # If the gene symbol is not in the list, the value False will be returned + gap_compensation = vvChromosomes.gap_black_list(gene_symbol) + + # Intron spanning variants + if re.search('boundary', str(error)) or re.search('spanning', str(error)): try: - gene_symbol = validator.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) - except Exception: - fn.exceptPass() - else: - # If the gene symbol is not in the list, the value False will be returned - gap_compensation = vvChromosomes.gap_black_list(gene_symbol) + hgvs_coding = variant.evm._maybe_normalize(hgvs_coding) + gap_compensation = False + except hgvs.exceptions.HGVSError as error: + variant.warnings += ': ' + str(error) + logger.warning(str(error)) + return True + else: + pass + + # Warn status + logger.warning("gap_compensation_1 = " + str(gap_compensation)) + coding = fn.valstr(hgvs_coding) + + # RNA sequence + hgvs_rna = copy.deepcopy(hgvs_coding) + hgvs_rna = validator.hgvs_c_to_r(hgvs_rna) + rna = str(hgvs_rna) - # Intron spanning variants - if re.search('boundary', str(error)) or re.search('spanning', str(error)): + # Genomic sequence + hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn) + final_hgvs_genomic = hgvs_genomic + + # genomic_possibilities + # 1. take the simple 3 pr normalized hgvs_genomic + # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome + hgvs_genomic_possibilities = [] + + # Loop out gap finding code under these circumstances! + if gap_compensation is True: + logger.warning('g_to_t gap code 1 active') + rn_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if orientation != -1: try: - hgvs_coding = variant.evm._maybe_normalize(hgvs_coding) - gap_compensation = False - except hgvs.exceptions.HGVSError as error: - variant.warnings += ': ' + str(error) - logger.warning(str(error)) - return True + chromosome_normalized_hgvs_coding = variant.reverse_normalizer.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding else: - pass - - # Warn status - logger.warning("gap_compensation_1 = " + str(gap_compensation)) - coding = fn.valstr(hgvs_coding) - - # RNA sequence - hgvs_rna = copy.deepcopy(hgvs_coding) - hgvs_rna = validator.hgvs_c_to_r(hgvs_rna) - rna = str(hgvs_rna) - - # Genomic sequence - hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn) - final_hgvs_genomic = hgvs_genomic - - # genomic_possibilities - # 1. take the simple 3 pr normalized hgvs_genomic - # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - hgvs_genomic_possibilities = [] - - # Loop out gap finding code under these circumstances! - if gap_compensation is True: - logger.warning('g_to_t gap code 1 active') - rn_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: - try: - chromosome_normalized_hgvs_coding = variant.reverse_normalizer.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: - chromosome_normalized_hgvs_coding = variant.hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding + try: + chromosome_normalized_hgvs_coding = variant.hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding - most_3pr_hgvs_genomic = validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, - variant.no_norm_evm, variant.hn) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + most_3pr_hgvs_genomic = validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, + variant.no_norm_evm, variant.hn) + hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - # Push from side to side to try pick up odd placements - # MAKE A NO NORM HGVS2VCF - # First to the right - hgvs_stash = copy.deepcopy(hgvs_coding) + # Push from side to side to try pick up odd placements + # MAKE A NO NORM HGVS2VCF + # First to the right + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.hn, validator.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: - hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) + stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) except: fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.hn, validator.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + # Store a tx copy for later use + test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) + stash_genomic = validator.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_right.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + stash_tx_right = test_stash_tx_right + if hasattr(test_stash_tx_right.posedit.edit, + 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + alt = test_stash_tx_right.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_right.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_right).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) try: - stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) - stash_genomic = validator.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_right.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - stash_tx_right = test_stash_tx_right - if hasattr(test_stash_tx_right.posedit.edit, - 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - alt = test_stash_tx_right.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_right.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_right).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) - try: - variant.hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append('') - else: + variant.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - variant.hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - fn.exceptPass() - # Intronic positions not supported. Will cause a Value Error - except ValueError: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - fn.exceptPass() + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + variant.hn.normalize(test_stash_tx_right) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + test_stash_tx_right = copy.deepcopy(hgvs_coding) + fn.exceptPass() + # Intronic positions not supported. Will cause a Value Error + except ValueError: + test_stash_tx_right = copy.deepcopy(hgvs_coding) + fn.exceptPass() - # Then to the left - hgvs_stash = copy.deepcopy(hgvs_coding) + # Then to the left + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.reverse_normalizer, + validator.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: - hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) + stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) except: fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.reverse_normalizer, - validator.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) - stash_genomic = validator.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left - if hasattr(test_stash_tx_left.posedit.edit, - 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - alt = test_stash_tx_left.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' + # Store a tx copy for later use + test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) + stash_genomic = validator.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_left.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + stash_tx_left = test_stash_tx_left + if hasattr(test_stash_tx_left.posedit.edit, + 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + alt = test_stash_tx_left.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' - if (len(alt) - ( - test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_left.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_left).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) - try: - variant.hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) + if (len(alt) - ( + test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) else: - try: - variant.hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_left.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_left).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) + try: + variant.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - fn.exceptPass() - except ValueError: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - fn.exceptPass() + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + variant.hn.normalize(test_stash_tx_left) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + test_stash_tx_left = copy.deepcopy(hgvs_coding) + fn.exceptPass() + except ValueError: + test_stash_tx_left = copy.deepcopy(hgvs_coding) + fn.exceptPass() - # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = variant.reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = validator.vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = validator.vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: + # direct mapping from reverse_normalized transcript insertions in the delins format + try: + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = variant.reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the variant ref and alt + pr3_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = validator.vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = validator.vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + # Normalize - If the variant spans a gap it should then form a static genomic variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - try: + try: + genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + try: + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) - if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) + try: + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) + + if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) + if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + pass + + logger.info('\nGENOMIC POSSIBILITIES') + for possibility in hgvs_genomic_possibilities: + if possibility == '': + logger.info('X') + else: + logger.info(fn.valstr(possibility)) + + logger.info('\n') + + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' + + # Mark as not disparity detected + disparity_deletion_in = ['false', 'false'] + + # Loop through to see if a gap can be located + # Set the variables required for corrective normalization + possibility_counter = 0 + suppress_c_normalization = 'false' # Applies to boundary crossing normalization + + # Copy a version of hgvs_genomic_possibilities + for possibility in hgvs_genomic_possibilities: + possibility_counter = possibility_counter + 1 + + # Loop out stash possibilities which will not spot gaps so are empty + if possibility == '': + continue + + # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + hgvs_genomic_variant = copy.deepcopy(possibility) + stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + try: + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass + if re.search('base start position must be <= end position', error): + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + if re.search('insertion length must be 1', error): + if hgvs_genomic.posedit.edit.type == 'ins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start, end) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - logger.info('\nGENOMIC POSSIBILITIES') - for possibility in hgvs_genomic_possibilities: - if possibility == '': - logger.info('X') - else: - logger.info(fn.valstr(possibility)) + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # Store a copy for later use + stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - logger.info('\n') + # Create VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, + variant.reverse_normalizer, validator.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' + # Look for exonic gaps within transcript or chromosome + no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) - # Loop through to see if a gap can be located - # Set the variables required for corrective normalization - possibility_counter = 0 - suppress_c_normalization = 'false' # Applies to boundary crossing normalization + # Store a not real deletion insertion to test for gapping + stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + v = [chr, pos, ref, alt] - # Copy a version of hgvs_genomic_possibilities - for possibility in hgvs_genomic_possibilities: - possibility_counter = possibility_counter + 1 + # Detect intronic variation using normalization + intronic_variant = 'false' - # Loop out stash possibilities which will not spot gaps so are empty - if possibility == '': + # Save a copy of current hgvs_coding + try: + saved_hgvs_coding = variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + saved_hgvs_coding = hgvs_coding + intronic_variant = 'true' continue + else: + saved_hgvs_coding = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + hgvs_coding.ac) - # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - hgvs_genomic_variant = copy.deepcopy(possibility) - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) - - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + # Look for normalized variant options that do not match hgvs_coding + if orientation == -1: + # position genomic at its most 5 prime position try: - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) + query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error): - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - if re.search('insertion length must be 1', error): - if hgvs_genomic.posedit.edit.type == 'ins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start, end) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - - # Create VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, - variant.reverse_normalizer, validator.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] - - # Detect intronic variation using normalization - intronic_variant = 'false' - - # Save a copy of current hgvs_coding + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position try: - saved_hgvs_coding = variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - saved_hgvs_coding = hgvs_coding - intronic_variant = 'true' - continue - else: - saved_hgvs_coding = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - hgvs_coding.ac) + query_genomic = variant.hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding - # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = variant.hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass + try: + intron_test = variant.hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' else: - hgvs_seek_var = saved_hgvs_coding - - try: - intron_test = variant.hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'hard_fail': - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', - str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -1288,9 +1220,11 @@ def transcripts_to_gene(variant, validator): else: intronic_variant = 'true' + if intronic_variant != 'hard_fail': if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', + str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -1303,932 +1237,25 @@ def transcripts_to_gene(variant, validator): else: intronic_variant = 'true' - if intronic_variant != 'true': - # Flag RefSeqGene for ammendment - # amend_RefSeqGene = 'false' - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - pass - else: - pass - try: - tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - saved_hgvs_coding.ac) - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - - # Check for +1 base and adjust - if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - pass - - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - - # tx_hgvs_not_delins = rn_tx_hgvs_not_delins - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - pass - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for internal_possibility in hgvs_genomic_possibilities: - if internal_possibility == '': - continue - - hgvs_t_possibility = validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) - except: - fn.exceptPass() - ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) - except: - fn.exceptPass() - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if internal_possibility.posedit.edit.type == 'ins': - ins_ref = validator.sf.fetch_seq(internal_possibility.ac, - internal_possibility.posedit.pos.start.base - 1, - internal_possibility.posedit.pos.end.base) - internal_possibility.posedit.edit.ref = ins_ref - internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] - - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] - hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # 'At hgvs_genomic' - # Final sanity checks - try: - validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - continue - try: - variant.hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - continue - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # amend_RefSeqGene = 'false' - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): - rg = variant.reverse_normalizer.normalize(hgvs_not_delins) - rtx = validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) - fg = variant.hn.normalize(hgvs_not_delins) - ftx = validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) - if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, validator.alt_aln_method) - exonic = False - for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): - exonic = True - if exonic is True: - hgvs_not_delins = fg - hgvs_genomic = fg - hgvs_genomic_5pr = fg - try: - tx_hgvs_not_delins = validator.vm.c_to_n(ftx) - except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = validator.hp.parse_hgvs_variant( - tx_hgvs_not_delins_delins_from_dup) - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - # Suppress intron boundary crossing due to non-intron intron based c. seq annotations - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = validator.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, - 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = variant.hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = validator.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( - tx_hgvs_not_delins.posedit.edit.ref) - 1 - hgvs_refreshed_variant = tx_hgvs_not_delins - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - if possibility_counter == 3: - hgvs_refreshed_variant = stash_tx_right - elif possibility_counter == 4: - hgvs_refreshed_variant = stash_tx_left - else: - hgvs_refreshed_variant = chromosome_normalized_hgvs_coding - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break else: - pass - - try: - variant.hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - logger.warning(error) - continue - - # Quick check to make sure the coding variant has not changed - try: - to_test = variant.hn.normalize(hgvs_refreshed_variant) - except: - to_test = hgvs_refreshed_variant - if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # Try the next available genomic option - if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - hgvs_coding = to_test - else: - continue - # Update hgvs_genomic - hgvs_genomic = validator.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, - variant.no_norm_evm,variant.hn) - if hgvs_genomic.posedit.edit.type == 'identity': - re_c = validator.vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) - if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): - shuffle_left_g = copy.copy(hgvs_genomic) - shuffle_left_g.posedit.edit.ref = '' - shuffle_left_g.posedit.edit.alt = '' - shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - shuffle_left_g = variant.reverse_normalizer.normalize(shuffle_left_g) - re_c = validator.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): - hgvs_genomic = shuffle_left_g - - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' - - # Break if gap has been detected - if disparity_deletion_in[0] != 'false': - break - - # Warn user about gapping - if auto_info != '': - info_lines = auto_info.split('\n') - info_keys = {} - for information in info_lines: - info_keys[information] = '' - info_out = [] - info_out.append( - 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + variant.primary_assembly) - for ky in list(info_keys.keys()): - info_out.append(ky) - auto_info = '\n'.join(info_out) - auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' - auto_info = str(auto_info.replace('\n', ': ')) - variant.warnings += ': ' + str(auto_info) - logger.warning(str(auto_info)) - # Normailse hgvs_genomic - try: - hgvs_genomic = variant.hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - - if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = variant.hn.normalize(hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = variant.hn.normalize(hgvs_genomic) - genomic = fn.valstr(hgvs_genomic) - - else: - stored_hgvs_genomic_variant = hgvs_genomic - suppress_c_normalization = 'false' - gapped_alignment_warning = '' - auto_info = '' - genomic = fn.valstr(hgvs_genomic) - - # Create pseudo VCF based on amended hgvs_genomic - hgvs_genomic_variant = hgvs_genomic - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # Create vcf - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, - variant.reverse_normalizer, validator.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Create a VCF call - vcf_component_list = [str(chr), str(pos), str(ref), (alt)] - vcf_genomic = '-'.join(vcf_component_list) - - # DO NOT DELETE - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # DO NOT DELETE - stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - - # Apply gap code to re-format hgvs_coding - # Store the current hgvs:c. description - saved_hgvs_coding = copy.deepcopy(hgvs_coding) - - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, - alt_aln_method=validator.alt_aln_method) - orientation = int(ori[0]['alt_strand']) - - # Look for normalized variant options that do not match hgvs_coding - hgvs_genomic = copy.deepcopy(hgvs_genomic_variant) - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = variant.hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding - - # Loop out gap finding code under these circumstances! - logger.warning("gap_compensation_2 = " + str(gap_compensation)) - if gap_compensation is True: - logger.warning('g_to_t gap code 2 active') - # is it in an exon? - is_it_in_an_exon = 'no' - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - # Take from stored copy - # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - is_it_in_an_exon = 'yes' - if is_it_in_an_exon == 'yes': - # map form reverse normalized g. to c. - hgvs_from_5n_g = variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) + intronic_variant = 'true' + if intronic_variant != 'true': + # Flag RefSeqGene for ammendment + # amend_RefSeqGene = 'false' # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) @@ -2254,11 +1281,13 @@ def transcripts_to_gene(variant, validator): hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( 'del', str(hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 @@ -2278,11 +1307,13 @@ def transcripts_to_gene(variant, validator): hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( 'del', str(hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 @@ -2296,21 +1327,20 @@ def transcripts_to_gene(variant, validator): pass else: pass - - hard_fail = 'false' try: - tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = hgvs_coding - hard_fail = 'true' - + tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError: + tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + saved_hgvs_coding.ac) # Create normalized version of tx_hgvs_not_delins rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +ve base and adjust - if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): + + # Check for +1 base and adjust + if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): # Remove offsetting to span the gap rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 @@ -2319,7 +1349,7 @@ def transcripts_to_gene(variant, validator): try: rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: - fn.exceptPass() + pass elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): # move tx end base to next available non-offset base @@ -2333,12 +1363,15 @@ def transcripts_to_gene(variant, validator): # re-make genomic and tx hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) + + # tx_hgvs_not_delins = rn_tx_hgvs_not_delins elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins.posedit.edit.ref = '' # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' if re.match('NM_', str(rn_tx_hgvs_not_delins)): test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: @@ -2349,13 +1382,14 @@ def transcripts_to_gene(variant, validator): rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass + # else: + # pass # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): # Remove offsetting to span the gap rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 @@ -2364,10 +1398,8 @@ def transcripts_to_gene(variant, validator): try: rn_tx_hgvs_not_delins.posedit.edit.alt = '' except: - fn.exceptPass() + pass elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 # Delete the ref rn_tx_hgvs_not_delins.posedit.edit.ref = '' @@ -2417,7 +1449,6 @@ def transcripts_to_gene(variant, validator): else: re_capture_tx_variant = [] for internal_possibility in hgvs_genomic_possibilities: - if internal_possibility == '': continue @@ -2451,7 +1482,8 @@ def transcripts_to_gene(variant, validator): internal_possibility.posedit.edit.ref): gap_length = len(internal_possibility.posedit.edit.ref) - len( hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] + re_capture_tx_variant = ['transcript', gap_length, + hgvs_t_possibility] hgvs_not_delins = internal_possibility hgvs_genomic_5pr = internal_possibility break @@ -2465,13 +1497,13 @@ def transcripts_to_gene(variant, validator): else: pass + # 'At hgvs_genomic' # Final sanity checks try: validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': - logger.warning(str(e)) - return True + continue try: variant.hn.normalize(tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: @@ -2483,19 +1515,41 @@ def transcripts_to_gene(variant, validator): if re.match( 'Unsupported normalization of variants spanning the exon-intron boundary', error): - logger.warning(error) - return True + continue elif re.match('Normalization of intronic variants is not supported', error): # We know that this cannot be because of an intronic variant, so must be aligned to tx gap disparity_deletion_in = ['transcript', 'Requires Analysis'] - if hard_fail == 'true': - disparity_deletion_in = ['false', 'false'] - + # amend_RefSeqGene = 'false' # Recreate hgvs_genomic if disparity_deletion_in[0] == 'transcript': hgvs_genomic = hgvs_not_delins + # Find oddly placed gaps where the tx variant is encompassed in the gap + if disparity_deletion_in[0] == 'false' and ( + possibility_counter == 3 or possibility_counter == 4): + rg = variant.reverse_normalizer.normalize(hgvs_not_delins) + rtx = validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) + fg = variant.hn.normalize(hgvs_not_delins) + ftx = validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) + if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + exons = validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, validator.alt_aln_method) + exonic = False + for ex_test in exons: + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): + exonic = True + if exonic is True: + hgvs_not_delins = fg + hgvs_genomic = fg + hgvs_genomic_5pr = fg + try: + tx_hgvs_not_delins = validator.vm.c_to_n(ftx) + except Exception: + tx_hgvs_not_delins = ftx + disparity_deletion_in = ['transcript', 'Requires Analysis'] + # Pre-processing of tx_hgvs_not_delins try: if tx_hgvs_not_delins.posedit.edit.alt is None: @@ -2505,22 +1559,24 @@ def transcripts_to_gene(variant, validator): tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( tx_hgvs_not_delins.posedit.pos.start) + '_' + str( tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) - + tx_hgvs_not_delins = validator.hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED if disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly - + # Suppress intron boundary crossing due to non-intron intron based c. seq annotations + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) # Copy the current variant @@ -2536,7 +1592,7 @@ def transcripts_to_gene(variant, validator): tx_gap_fill_variant = validator.hp.parse_hgvs_variant( tx_gap_fill_variant_delins_from_dup) - # Identify which half of the NOT-intron the start position of the variant is in + # Identify which half of the NOT-intron the start position of the variant is in if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') @@ -2597,7 +1653,8 @@ def transcripts_to_gene(variant, validator): alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) else: # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + pre_alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.ref) alternate_bases = [] for base in pre_alternate_bases: alternate_bases.append('X') @@ -2618,7 +1675,8 @@ def transcripts_to_gene(variant, validator): # Note, all variants will be forced into the format delete insert # Deleted bases in the ALT will be substituted for X for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, + 1): if integer == alt_start: alt_base_dict[integer] = str(''.join(alternate_bases)) else: @@ -2689,8 +1747,9 @@ def transcripts_to_gene(variant, validator): except hgvs.exceptions.HGVSInvalidVariantError: fn.exceptPass() - if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + if re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( disparity_deletion_in[ @@ -2729,17 +1788,20 @@ def transcripts_to_gene(variant, validator): for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.start.base gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( disparity_deletion_in[1]) + '-bp gap in transcript ' + str( tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: c1 = validator.vm.n_to_c(tx_hgvs_not_delins) @@ -2773,7 +1835,8 @@ def transcripts_to_gene(variant, validator): for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.end.base gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) elif re.search(r'\-', @@ -2817,17 +1880,20 @@ def transcripts_to_gene(variant, validator): for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( disparity_deletion_in[1]) + '-bp gap in transcript ' + str( tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: c1 = validator.vm.n_to_c(tx_hgvs_not_delins) @@ -2861,7 +1927,8 @@ def transcripts_to_gene(variant, validator): for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.end.base - 1 gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) else: @@ -2870,176 +1937,1131 @@ def transcripts_to_gene(variant, validator): disparity_deletion_in[ 1]) + ' genomic base(s) that fail to align to transcript ' + str( tx_hgvs_not_delins.ac) + '\n' + tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( + tx_hgvs_not_delins.posedit.edit.ref) - 1 hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' + if possibility_counter == 3: + hgvs_refreshed_variant = stash_tx_right + elif possibility_counter == 4: + hgvs_refreshed_variant = stash_tx_left + else: + hgvs_refreshed_variant = chromosome_normalized_hgvs_coding # Warn auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' else: # Keep the same by re-setting rel_var - hgvs_refreshed_variant = saved_hgvs_coding + hgvs_refreshed_variant = hgvs_coding + # amend_RefSeqGene = 'false' # Edit the output if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = variant.evm.n_to_c(hgvs_refreshed_variant) + hgvs_refreshed_variant = variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) else: pass + try: - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + variant.hn.normalize(hgvs_refreshed_variant) except Exception as e: error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries if re.match('Normalization of intronic variants is not supported', error) or re.match( 'Unsupported normalization of variants spanning the exon-intron boundary', error): hgvs_refreshed_variant = saved_hgvs_coding + else: + logger.warning(error) + continue + + # Quick check to make sure the coding variant has not changed + try: + to_test = variant.hn.normalize(hgvs_refreshed_variant) + except: + to_test = hgvs_refreshed_variant + if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # Try the next available genomic option + if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + hgvs_coding = to_test + else: + continue + # Update hgvs_genomic + hgvs_genomic = validator.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, + variant.no_norm_evm,variant.hn) + if hgvs_genomic.posedit.edit.type == 'identity': + re_c = validator.vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) + if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): + shuffle_left_g = copy.copy(hgvs_genomic) + shuffle_left_g.posedit.edit.ref = '' + shuffle_left_g.posedit.edit.alt = '' + shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + shuffle_left_g = variant.reverse_normalizer.normalize(shuffle_left_g) + re_c = validator.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): + hgvs_genomic = shuffle_left_g + + # If it is intronic, these vairables will not have been set + else: + # amend_RefSeqGene = 'false' + no_normalized_c = 'false' + + # Break if gap has been detected + if disparity_deletion_in[0] != 'false': + break + + # Warn user about gapping + if auto_info != '': + info_lines = auto_info.split('\n') + info_keys = {} + for information in info_lines: + info_keys[information] = '' + info_out = [] + info_out.append( + 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + variant.primary_assembly) + for ky in list(info_keys.keys()): + info_out.append(ky) + auto_info = '\n'.join(info_out) + auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' + auto_info = str(auto_info.replace('\n', ': ')) + variant.warnings += ': ' + str(auto_info) + logger.warning(str(auto_info)) + # Normailse hgvs_genomic + try: + hgvs_genomic = variant.hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + + if re.search('base start position must be <= end position', error) and \ + disparity_deletion_in[0] == 'chromosome': + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = variant.hn.normalize(hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = variant.hn.normalize(hgvs_genomic) + genomic = fn.valstr(hgvs_genomic) + + else: + stored_hgvs_genomic_variant = hgvs_genomic + suppress_c_normalization = 'false' + gapped_alignment_warning = '' + auto_info = '' + genomic = fn.valstr(hgvs_genomic) + + # Create pseudo VCF based on amended hgvs_genomic + hgvs_genomic_variant = hgvs_genomic + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + + # Create vcf + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, + variant.reverse_normalizer, validator.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Create a VCF call + vcf_component_list = [str(chr), str(pos), str(ref), (alt)] + vcf_genomic = '-'.join(vcf_component_list) + + # DO NOT DELETE + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # DO NOT DELETE + stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + + # Apply gap code to re-format hgvs_coding + # Store the current hgvs:c. description + saved_hgvs_coding = copy.deepcopy(hgvs_coding) + + # Get orientation of the gene wrt genome and a list of exons mapped to the genome + ori = validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + alt_aln_method=validator.alt_aln_method) + orientation = int(ori[0]['alt_strand']) + + # Look for normalized variant options that do not match hgvs_coding + hgvs_genomic = copy.deepcopy(hgvs_genomic_variant) + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = variant.hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding + + # Loop out gap finding code under these circumstances! + logger.warning("gap_compensation_2 = " + str(gap_compensation)) + if gap_compensation is True: + logger.warning('g_to_t gap code 2 active') + # is it in an exon? + is_it_in_an_exon = 'no' + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + # Take from stored copy + # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + is_it_in_an_exon = 'yes' + if is_it_in_an_exon == 'yes': + # map form reverse normalized g. to c. + hgvs_from_5n_g = variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) + + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + disparity_deletion_in = ['false', 'false'] + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] else: pass + else: + pass + + hard_fail = 'false' + try: + tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + tx_hgvs_not_delins = hgvs_coding + hard_fail = 'true' + + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +ve base and adjust + if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() - # Sort out equality to equality c. events where the code will add 2 additional bases - if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] else: - hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) - coding = fn.valstr(hgvs_coding) - formatted_variant = coding + re_capture_tx_variant = [] + for internal_possibility in hgvs_genomic_possibilities: - # OBTAIN THE RefSeqGene coordinates - # Attempt 1 = UTA - sequences_for_tx = validator.hdp.get_tx_mapping_options(hgvs_coding.ac) - recovered_rsg = [] + if internal_possibility == '': + continue - for sequence in sequences_for_tx: - if re.match('^NG_', sequence[1]): - recovered_rsg.append(sequence[1]) - recovered_rsg.sort() - recovered_rsg.reverse() + hgvs_t_possibility = validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) + except: + fn.exceptPass() + ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) + except: + fn.exceptPass() + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if internal_possibility.posedit.edit.type == 'ins': + ins_ref = validator.sf.fetch_seq(internal_possibility.ac, + internal_possibility.posedit.pos.start.base - 1, + internal_possibility.posedit.pos.end.base) + internal_possibility.posedit.edit.ref = ins_ref + internal_possibility.posedit.edit.alt = ins_ref[ + 0] + internal_possibility.posedit.edit.alt + \ + ins_ref[1] + + if len(hgvs_t_possibility.posedit.edit.ref) < len( + internal_possibility.posedit.edit.ref): + gap_length = len(internal_possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] + hgvs_not_delins = internal_possibility + hgvs_genomic_5pr = internal_possibility + break - if len(recovered_rsg) > 0 and 'NG_' in recovered_rsg[0]: - refseqgene_ac = recovered_rsg[0] - else: - refseqgene_ac = '' + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # Final sanity checks + try: + validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + logger.warning(str(e)) + return True + try: + variant.hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + logger.warning(error) + return True + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + if hard_fail == 'true': + disparity_deletion_in = ['false', 'false'] + + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly + + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = validator.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = variant.hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = validator.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # GAP IN THE CHROMOSOME + + elif disparity_deletion_in[0] == 'chromosome': + # Set warning variables + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly + hgvs_refreshed_variant = tx_hgvs_not_delins + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = saved_hgvs_coding - # Given the difficulties with mapping to and from RefSeqGenes, we now solely rely on UTA - if refseqgene_ac != '': - hgvs_refseq = validator.vm.t_to_g(hgvs_coding, refseqgene_ac) - # Normalize the RefSeqGene Variant to the correct position + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = variant.evm.n_to_c(hgvs_refreshed_variant) + else: + pass try: - hgvs_refseq = variant.hn.normalize(hgvs_refseq) + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + hgvs_refreshed_variant.posedit.edit.alt[-1]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 0:-1] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 0:-1] + hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[0] == \ + hgvs_refreshed_variant.posedit.edit.alt[0]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 1:] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 1:] + hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) except Exception as e: - # if re.search('insertion length must be 1', error): - hgvs_refseq = 'RefSeqGene record not available' - refseq = 'RefSeqGene record not available' - hgvs_refseq_ac = 'RefSeqGene record not available' + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + pass + + # Sort out equality to equality c. events where the code will add 2 additional bases + if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): pass else: - refseq = fn.valstr(hgvs_refseq) - hgvs_refseq_ac = hgvs_refseq.ac - else: + hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) + coding = fn.valstr(hgvs_coding) + formatted_variant = coding + + # OBTAIN THE RefSeqGene coordinates + # Attempt 1 = UTA + sequences_for_tx = validator.hdp.get_tx_mapping_options(hgvs_coding.ac) + recovered_rsg = [] + + for sequence in sequences_for_tx: + if re.match('^NG_', sequence[1]): + recovered_rsg.append(sequence[1]) + recovered_rsg.sort() + recovered_rsg.reverse() + + if len(recovered_rsg) > 0 and 'NG_' in recovered_rsg[0]: + refseqgene_ac = recovered_rsg[0] + else: + refseqgene_ac = '' + + # Given the difficulties with mapping to and from RefSeqGenes, we now solely rely on UTA + if refseqgene_ac != '': + hgvs_refseq = validator.vm.t_to_g(hgvs_coding, refseqgene_ac) + # Normalize the RefSeqGene Variant to the correct position + try: + hgvs_refseq = variant.hn.normalize(hgvs_refseq) + except Exception as e: + # if re.search('insertion length must be 1', error): hgvs_refseq = 'RefSeqGene record not available' refseq = 'RefSeqGene record not available' hgvs_refseq_ac = 'RefSeqGene record not available' - - # Predicted effect on protein - protein_dict = validator.myc_to_p(hgvs_coding, variant.evm, re_to_p=False) - if protein_dict['error'] == '': + pass + else: + refseq = fn.valstr(hgvs_refseq) + hgvs_refseq_ac = hgvs_refseq.ac + else: + hgvs_refseq = 'RefSeqGene record not available' + refseq = 'RefSeqGene record not available' + hgvs_refseq_ac = 'RefSeqGene record not available' + + # Predicted effect on protein + protein_dict = validator.myc_to_p(hgvs_coding, variant.evm, re_to_p=False) + if protein_dict['error'] == '': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + error = protein_dict['error'] + variant.warnings += ': ' + str(error) + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] protein = str(hgvs_protein) else: - error = protein_dict['error'] - variant.warnings += ': ' + str(error) - if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - logger.error(error) - return True + logger.error(error) + return True - # Gene orientation wrt genome - ori = validator.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, - alt_aln_method=validator.alt_aln_method) - ori = int(ori[0]['alt_strand']) + # Gene orientation wrt genome + ori = validator.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, + alt_aln_method=validator.alt_aln_method) + ori = int(ori[0]['alt_strand']) - # Look for normalized variant options that do not match hgvs_coding - # boundary crossing normalization - # Re-Save the required variants - hgvs_seek_var = copy.deepcopy(hgvs_coding) - saved_hgvs_coding = copy.deepcopy(hgvs_coding) + # Look for normalized variant options that do not match hgvs_coding + # boundary crossing normalization + # Re-Save the required variants + hgvs_seek_var = copy.deepcopy(hgvs_coding) + saved_hgvs_coding = copy.deepcopy(hgvs_coding) - if ori == -1: - # position genomic at its most 5 prime position + if ori == -1: + # position genomic at its most 5 prime position + try: + query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif suppress_c_normalization == 'true': + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': try: - query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement + automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) + hgvs_coding = hgvs_seek_var + coding = fn.valstr(hgvs_coding) + variant.warnings += ': ' + automap + rng = variant.hn.normalize(query_genomic) + except NotImplementedError: + pass try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif suppress_c_normalization == 'true': - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - try: - automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) - hgvs_coding = hgvs_seek_var - coding = fn.valstr(hgvs_coding) - variant.warnings += ': ' + automap - rng = variant.hn.normalize(query_genomic) - except NotImplementedError: - pass - try: - c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - c_for_p = seek_var + c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + c_for_p = seek_var + try: + # Predicted effect on protein + protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False) + if protein_dict['error'] == '': + hgvs_protein = protein_dict['hgvs_protein'] + protein = str(hgvs_protein) + else: + error = protein_dict['error'] + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': + hgvs_protein = protein_dict['hgvs_protein'] + variant.warnings += ': ' + str(error) + # Replace protein description in vars table + protein = str(hgvs_protein) + except NotImplementedError: + fn.exceptPass() + else: + # Double check protein position by normalize genomic, and normalize back to c. for normalize or not to normalize issue + coding = fn.valstr(hgvs_coding) + + elif ori != -1: + # position genomic at its most 3 prime position + try: + query_genomic = variant.hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif suppress_c_normalization == 'true': + rec_var = 'false' + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + try: + automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) + hgvs_coding = hgvs_seek_var + coding = fn.valstr(hgvs_coding) + variant.warnings += ': ' + automap + except NotImplementedError: + fn.exceptPass() + else: + # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue + coding = fn.valstr(hgvs_coding) + rng = variant.reverse_normalizer.normalize(query_genomic) + try: + # Diagram where - = intron and E = Exon + + # 3 prime + # ---------EEEEEEEEEEEEEEEEE----------- + # < + # Result, normalize of new variant will baulk at intronic + # 5 prime + # < + # Result, normalize of new variant will be happy + c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) try: - # Predicted effect on protein + variant.hn.normalize(c_for_p) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + else: + # hgvs_protein = va_func.protein(str(c_for_p), variant.evm, hp) protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] @@ -3051,121 +3073,48 @@ def transcripts_to_gene(variant, validator): variant.warnings += ': ' + str(error) # Replace protein description in vars table protein = str(hgvs_protein) - except NotImplementedError: - fn.exceptPass() - else: - # Double check protein position by normalize genomic, and normalize back to c. for normalize or not to normalize issue - coding = fn.valstr(hgvs_coding) - - elif ori != -1: - # position genomic at its most 3 prime position - try: - query_genomic = variant.hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif suppress_c_normalization == 'true': - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - try: - automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) - hgvs_coding = hgvs_seek_var - coding = fn.valstr(hgvs_coding) - variant.warnings += ': ' + automap - except NotImplementedError: - fn.exceptPass() - else: - # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue - coding = fn.valstr(hgvs_coding) - rng = variant.reverse_normalizer.normalize(query_genomic) - try: - # Diagram where - = intron and E = Exon - - # 3 prime - # ---------EEEEEEEEEEEEEEEEE----------- - # < - # Result, normalize of new variant will baulk at intronic - # 5 prime - # < - # Result, normalize of new variant will be happy - c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) - try: - variant.hn.normalize(c_for_p) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - else: - # hgvs_protein = va_func.protein(str(c_for_p), variant.evm, hp) - protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False) - if protein_dict['error'] == '': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - error = protein_dict['error'] - if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': - hgvs_protein = protein_dict['hgvs_protein'] - variant.warnings += ': ' + str(error) - # Replace protein description in vars table - protein = str(hgvs_protein) - except Exception: - fn.exceptPass() - - # Check for up-to-date transcript version - updated_transcript_variant = 'None' - tx_id_info = validator.hdp.get_tx_identity_info(hgvs_coding.ac) - uta_gene_symbol = tx_id_info[6] - tx_for_gene = validator.hdp.get_tx_for_gene(uta_gene_symbol) - ac_root, ac_version = hgvs_coding.ac.split('.') - version_tracking = '0' - update = '' - for accession in tx_for_gene: - try: - if re.match(ac_root, accession[3]): - query_version = accession[3].split('.')[1] - if int(query_version) > int(ac_version) and int(query_version) > int( - version_tracking): - version_tracking = query_version - update = accession[3] - except ValueError: + except Exception: fn.exceptPass() - if update != '': - hgvs_updated = copy.deepcopy(hgvs_coding) - hgvs_updated.ac = update - try: + # Check for up-to-date transcript version + updated_transcript_variant = 'None' + tx_id_info = validator.hdp.get_tx_identity_info(hgvs_coding.ac) + uta_gene_symbol = tx_id_info[6] + tx_for_gene = validator.hdp.get_tx_for_gene(uta_gene_symbol) + ac_root, ac_version = hgvs_coding.ac.split('.') + version_tracking = '0' + update = '' + for accession in tx_for_gene: + try: + if re.match(ac_root, accession[3]): + query_version = accession[3].split('.')[1] + if int(query_version) > int(ac_version) and int(query_version) > int( + version_tracking): + version_tracking = query_version + update = accession[3] + except ValueError: + fn.exceptPass() + + if update != '': + hgvs_updated = copy.deepcopy(hgvs_coding) + hgvs_updated.ac = update + try: + validator.vr.validate(hgvs_updated) + # Updated reference sequence + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('does not agree with reference sequence', str(error)): + match = re.findall(r'\(([GATC]+)\)', error) + new_ref = match[1] + hgvs_updated.posedit.edit.ref = new_ref validator.vr.validate(hgvs_updated) - # Updated reference sequence - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('does not agree with reference sequence', str(error)): - match = re.findall(r'\(([GATC]+)\)', error) - new_ref = match[1] - hgvs_updated.posedit.edit.ref = new_ref - validator.vr.validate(hgvs_updated) - updated_transcript_variant = hgvs_updated - else: - pass - updated_transcript_variant = hgvs_updated - variant.warnings += ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( - updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + fn.valstr( - updated_transcript_variant) + updated_transcript_variant = hgvs_updated + else: + pass + updated_transcript_variant = hgvs_updated + variant.warnings += ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( + updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + fn.valstr( + updated_transcript_variant) variant.coding = str(hgvs_coding) variant.genomic_r = str(hgvs_refseq) From 278ed9b78007df24b168a1c26d711e4658d88ee9 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 24 Apr 2019 10:15:04 +0100 Subject: [PATCH 062/223] Moved first gap compensation mapping to new file --- VariantValidator/modules/gapped_mapping.py | 1342 ++++++++++++++++++++ VariantValidator/modules/mappers.py | 1342 +------------------- 2 files changed, 1348 insertions(+), 1336 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 71c00752..db82d0bb 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -947,3 +947,1345 @@ def gapped_g_to_c(variant, validator, rel_var): 'gapped_transcripts': gapped_transcripts } return data, nw_rel_var + + +def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): + orientation = int(ori[0]['alt_strand']) + hgvs_genomic_possibilities = [] + hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn) + + logger.warning('g_to_t gap code 1 active') + rn_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if orientation != -1: + try: + chromosome_normalized_hgvs_coding = variant.reverse_normalizer.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding + else: + try: + chromosome_normalized_hgvs_coding = variant.hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding + + most_3pr_hgvs_genomic = validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, + variant.no_norm_evm, variant.hn) + hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + + # Push from side to side to try pick up odd placements + # MAKE A NO NORM HGVS2VCF + # First to the right + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.hn, validator.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + fn.exceptPass() + # Store a tx copy for later use + test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) + stash_genomic = validator.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_right.posedit.edit.ref) == ( + (stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + stash_tx_right = test_stash_tx_right + if hasattr(test_stash_tx_right.posedit.edit, + 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + alt = test_stash_tx_right.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_right.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_right).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) + try: + variant.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + variant.hn.normalize(test_stash_tx_right) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + test_stash_tx_right = copy.deepcopy(hgvs_coding) + fn.exceptPass() + # Intronic positions not supported. Will cause a Value Error + except ValueError: + test_stash_tx_right = copy.deepcopy(hgvs_coding) + fn.exceptPass() + + # Then to the left + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.reverse_normalizer, + validator.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + fn.exceptPass() + # Store a tx copy for later use + test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) + stash_genomic = validator.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_left.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + stash_tx_left = test_stash_tx_left + if hasattr(test_stash_tx_left.posedit.edit, + 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + alt = test_stash_tx_left.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + + if (len(alt) - ( + test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_left.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_left).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) + try: + variant.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + variant.hn.normalize(test_stash_tx_left) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + test_stash_tx_left = copy.deepcopy(hgvs_coding) + fn.exceptPass() + except ValueError: + test_stash_tx_left = copy.deepcopy(hgvs_coding) + fn.exceptPass() + + # direct mapping from reverse_normalized transcript insertions in the delins format + try: + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = variant.reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the variant ref and alt + pr3_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = validator.vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = validator.vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + # Normalize - If the variant spans a gap it should then form a static genomic variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + try: + genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) + + if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) + if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) + + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + pass + + logger.info('\nGENOMIC POSSIBILITIES') + for possibility in hgvs_genomic_possibilities: + if possibility == '': + logger.info('X') + else: + logger.info(fn.valstr(possibility)) + + logger.info('\n') + + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' + + # Mark as not disparity detected + disparity_deletion_in = ['false', 'false'] + + # Loop through to see if a gap can be located + # Set the variables required for corrective normalization + possibility_counter = 0 + suppress_c_normalization = 'false' # Applies to boundary crossing normalization + + # Copy a version of hgvs_genomic_possibilities + for possibility in hgvs_genomic_possibilities: + possibility_counter = possibility_counter + 1 + + # Loop out stash possibilities which will not spot gaps so are empty + if possibility == '': + continue + + # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + hgvs_genomic_variant = copy.deepcopy(possibility) + stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + try: + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error): + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + if re.search('insertion length must be 1', error): + if hgvs_genomic.posedit.edit.type == 'ins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start, end) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # Store a copy for later use + stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + + # Create VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, + variant.reverse_normalizer, validator.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Look for exonic gaps within transcript or chromosome + no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # Store a not real deletion insertion to test for gapping + stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + v = [chr, pos, ref, alt] + + # Detect intronic variation using normalization + intronic_variant = 'false' + + # Save a copy of current hgvs_coding + try: + saved_hgvs_coding = variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + saved_hgvs_coding = hgvs_coding + intronic_variant = 'true' + continue + else: + saved_hgvs_coding = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + hgvs_coding.ac) + + # Look for normalized variant options that do not match hgvs_coding + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = variant.hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + try: + intron_test = variant.hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'hard_fail': + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', + str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'true': + # Flag RefSeqGene for ammendment + # amend_RefSeqGene = 'false' + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + pass + else: + pass + try: + tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError: + tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + saved_hgvs_coding.ac) + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + + # Check for +1 base and adjust + if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + pass + + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + + # tx_hgvs_not_delins = rn_tx_hgvs_not_delins + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + pass + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for internal_possibility in hgvs_genomic_possibilities: + if internal_possibility == '': + continue + + hgvs_t_possibility = validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) + except: + fn.exceptPass() + ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) + except: + fn.exceptPass() + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if internal_possibility.posedit.edit.type == 'ins': + ins_ref = validator.sf.fetch_seq(internal_possibility.ac, + internal_possibility.posedit.pos.start.base - 1, + internal_possibility.posedit.pos.end.base) + internal_possibility.posedit.edit.ref = ins_ref + internal_possibility.posedit.edit.alt = ins_ref[ + 0] + internal_possibility.posedit.edit.alt + \ + ins_ref[1] + + if len(hgvs_t_possibility.posedit.edit.ref) < len( + internal_possibility.posedit.edit.ref): + gap_length = len(internal_possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, + hgvs_t_possibility] + hgvs_not_delins = internal_possibility + hgvs_genomic_5pr = internal_possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # 'At hgvs_genomic' + # Final sanity checks + try: + validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + continue + try: + variant.hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + continue + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # amend_RefSeqGene = 'false' + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Find oddly placed gaps where the tx variant is encompassed in the gap + if disparity_deletion_in[0] == 'false' and ( + possibility_counter == 3 or possibility_counter == 4): + rg = variant.reverse_normalizer.normalize(hgvs_not_delins) + rtx = validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) + fg = variant.hn.normalize(hgvs_not_delins) + ftx = validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) + if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + exons = validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, validator.alt_aln_method) + exonic = False + for ex_test in exons: + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): + exonic = True + if exonic is True: + hgvs_not_delins = fg + hgvs_genomic = fg + hgvs_genomic_5pr = fg + try: + tx_hgvs_not_delins = validator.vm.c_to_n(ftx) + except Exception: + tx_hgvs_not_delins = ftx + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = validator.hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup) + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + # Suppress intron boundary crossing due to non-intron intron based c. seq annotations + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = validator.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, + 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = variant.hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = validator.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( + tx_hgvs_not_delins.posedit.edit.ref) - 1 + hgvs_refreshed_variant = tx_hgvs_not_delins + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' + if possibility_counter == 3: + hgvs_refreshed_variant = stash_tx_right + elif possibility_counter == 4: + hgvs_refreshed_variant = stash_tx_left + else: + hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = hgvs_coding + # amend_RefSeqGene = 'false' + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) + else: + pass + + try: + variant.hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + logger.warning(error) + continue + + # Quick check to make sure the coding variant has not changed + try: + to_test = variant.hn.normalize(hgvs_refreshed_variant) + except: + to_test = hgvs_refreshed_variant + if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # Try the next available genomic option + if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + hgvs_coding = to_test + else: + continue + # Update hgvs_genomic + hgvs_genomic = validator.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, + variant.no_norm_evm, variant.hn) + if hgvs_genomic.posedit.edit.type == 'identity': + re_c = validator.vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) + if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): + shuffle_left_g = copy.copy(hgvs_genomic) + shuffle_left_g.posedit.edit.ref = '' + shuffle_left_g.posedit.edit.alt = '' + shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + shuffle_left_g = variant.reverse_normalizer.normalize(shuffle_left_g) + re_c = validator.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): + hgvs_genomic = shuffle_left_g + + # If it is intronic, these vairables will not have been set + else: + # amend_RefSeqGene = 'false' + no_normalized_c = 'false' + + # Break if gap has been detected + if disparity_deletion_in[0] != 'false': + break + + # Warn user about gapping + if auto_info != '': + info_lines = auto_info.split('\n') + info_keys = {} + for information in info_lines: + info_keys[information] = '' + info_out = [] + info_out.append( + 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + variant.primary_assembly) + for ky in list(info_keys.keys()): + info_out.append(ky) + auto_info = '\n'.join(info_out) + auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' + auto_info = str(auto_info.replace('\n', ': ')) + variant.warnings += ': ' + str(auto_info) + logger.warning(str(auto_info)) + # Normailse hgvs_genomic + try: + hgvs_genomic = variant.hn.normalize(hgvs_genomic) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + + if re.search('base start position must be <= end position', error) and \ + disparity_deletion_in[0] == 'chromosome': + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = variant.hn.normalize(hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + hgvs_genomic = variant.hn.normalize(hgvs_genomic) + genomic = fn.valstr(hgvs_genomic) + + print('in gapped_mapping', hgvs_coding) + + return hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 107ad52a..1e3a5cc8 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -185,6 +185,7 @@ def transcripts_to_gene(variant, validator): boundary = 'false' warning = '' caution = '' + error = '' # Collect information for genomic level validation obj = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) @@ -704,7 +705,7 @@ def transcripts_to_gene(variant, validator): gap_compensation = vvChromosomes.gap_black_list(gene_symbol) # Intron spanning variants - if re.search('boundary', str(error)) or re.search('spanning', str(error)): + if 'boundary' in str(error) or 'spanning' in str(error): try: hgvs_coding = variant.evm._maybe_normalize(hgvs_coding) gap_compensation = False @@ -712,8 +713,6 @@ def transcripts_to_gene(variant, validator): variant.warnings += ': ' + str(error) logger.warning(str(error)) return True - else: - pass # Warn status logger.warning("gap_compensation_1 = " + str(gap_compensation)) @@ -735,1337 +734,8 @@ def transcripts_to_gene(variant, validator): # Loop out gap finding code under these circumstances! if gap_compensation is True: - logger.warning('g_to_t gap code 1 active') - rn_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: - try: - chromosome_normalized_hgvs_coding = variant.reverse_normalizer.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: - chromosome_normalized_hgvs_coding = variant.hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - - most_3pr_hgvs_genomic = validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, - variant.no_norm_evm, variant.hn) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - - # Push from side to side to try pick up odd placements - # MAKE A NO NORM HGVS2VCF - # First to the right - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.hn, validator.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) - stash_genomic = validator.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_right.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - stash_tx_right = test_stash_tx_right - if hasattr(test_stash_tx_right.posedit.edit, - 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - alt = test_stash_tx_right.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_right.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_right).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) - try: - variant.hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - variant.hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - fn.exceptPass() - # Intronic positions not supported. Will cause a Value Error - except ValueError: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - fn.exceptPass() - - # Then to the left - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.reverse_normalizer, - validator.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) - stash_genomic = validator.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left - if hasattr(test_stash_tx_left.posedit.edit, - 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - alt = test_stash_tx_left.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - - if (len(alt) - ( - test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_left.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_left).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) - try: - variant.hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - variant.hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - fn.exceptPass() - except ValueError: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - fn.exceptPass() - - # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = variant.reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = validator.vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = validator.vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: - genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - try: - genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) - if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) - - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass - - logger.info('\nGENOMIC POSSIBILITIES') - for possibility in hgvs_genomic_possibilities: - if possibility == '': - logger.info('X') - else: - logger.info(fn.valstr(possibility)) - - logger.info('\n') - - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' - - # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] - - # Loop through to see if a gap can be located - # Set the variables required for corrective normalization - possibility_counter = 0 - suppress_c_normalization = 'false' # Applies to boundary crossing normalization - - # Copy a version of hgvs_genomic_possibilities - for possibility in hgvs_genomic_possibilities: - possibility_counter = possibility_counter + 1 - - # Loop out stash possibilities which will not spot gaps so are empty - if possibility == '': - continue - - # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - hgvs_genomic_variant = copy.deepcopy(possibility) - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) - - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - try: - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error): - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - if re.search('insertion length must be 1', error): - if hgvs_genomic.posedit.edit.type == 'ins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start, end) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - - # Create VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, - variant.reverse_normalizer, validator.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] - - # Detect intronic variation using normalization - intronic_variant = 'false' - - # Save a copy of current hgvs_coding - try: - saved_hgvs_coding = variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - saved_hgvs_coding = hgvs_coding - intronic_variant = 'true' - continue - else: - saved_hgvs_coding = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - hgvs_coding.ac) - - # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = variant.hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - try: - intron_test = variant.hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'hard_fail': - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', - str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'true': - # Flag RefSeqGene for ammendment - # amend_RefSeqGene = 'false' - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - pass - else: - pass - try: - tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - saved_hgvs_coding.ac) - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - - # Check for +1 base and adjust - if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - pass - - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - - # tx_hgvs_not_delins = rn_tx_hgvs_not_delins - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - pass - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for internal_possibility in hgvs_genomic_possibilities: - if internal_possibility == '': - continue - - hgvs_t_possibility = validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) - except: - fn.exceptPass() - ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) - except: - fn.exceptPass() - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if internal_possibility.posedit.edit.type == 'ins': - ins_ref = validator.sf.fetch_seq(internal_possibility.ac, - internal_possibility.posedit.pos.start.base - 1, - internal_possibility.posedit.pos.end.base) - internal_possibility.posedit.edit.ref = ins_ref - internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] - - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] - hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # 'At hgvs_genomic' - # Final sanity checks - try: - validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - continue - try: - variant.hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - continue - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # amend_RefSeqGene = 'false' - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): - rg = variant.reverse_normalizer.normalize(hgvs_not_delins) - rtx = validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) - fg = variant.hn.normalize(hgvs_not_delins) - ftx = validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) - if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, validator.alt_aln_method) - exonic = False - for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): - exonic = True - if exonic is True: - hgvs_not_delins = fg - hgvs_genomic = fg - hgvs_genomic_5pr = fg - try: - tx_hgvs_not_delins = validator.vm.c_to_n(ftx) - except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = validator.hp.parse_hgvs_variant( - tx_hgvs_not_delins_delins_from_dup) - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - # Suppress intron boundary crossing due to non-intron intron based c. seq annotations - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = validator.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, - 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = variant.hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = validator.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( - tx_hgvs_not_delins.posedit.edit.ref) - 1 - hgvs_refreshed_variant = tx_hgvs_not_delins - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - if possibility_counter == 3: - hgvs_refreshed_variant = stash_tx_right - elif possibility_counter == 4: - hgvs_refreshed_variant = stash_tx_left - else: - hgvs_refreshed_variant = chromosome_normalized_hgvs_coding - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) - else: - pass - - try: - variant.hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - logger.warning(error) - continue - - # Quick check to make sure the coding variant has not changed - try: - to_test = variant.hn.normalize(hgvs_refreshed_variant) - except: - to_test = hgvs_refreshed_variant - if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # Try the next available genomic option - if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - hgvs_coding = to_test - else: - continue - # Update hgvs_genomic - hgvs_genomic = validator.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, - variant.no_norm_evm,variant.hn) - if hgvs_genomic.posedit.edit.type == 'identity': - re_c = validator.vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) - if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): - shuffle_left_g = copy.copy(hgvs_genomic) - shuffle_left_g.posedit.edit.ref = '' - shuffle_left_g.posedit.edit.alt = '' - shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - shuffle_left_g = variant.reverse_normalizer.normalize(shuffle_left_g) - re_c = validator.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): - hgvs_genomic = shuffle_left_g - - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' - - # Break if gap has been detected - if disparity_deletion_in[0] != 'false': - break - - # Warn user about gapping - if auto_info != '': - info_lines = auto_info.split('\n') - info_keys = {} - for information in info_lines: - info_keys[information] = '' - info_out = [] - info_out.append( - 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + variant.primary_assembly) - for ky in list(info_keys.keys()): - info_out.append(ky) - auto_info = '\n'.join(info_out) - auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' - auto_info = str(auto_info.replace('\n', ': ')) - variant.warnings += ': ' + str(auto_info) - logger.warning(str(auto_info)) - # Normailse hgvs_genomic - try: - hgvs_genomic = variant.hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - - if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = variant.hn.normalize(hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = variant.hn.normalize(hgvs_genomic) - genomic = fn.valstr(hgvs_genomic) + hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding = gapped_mapping.g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var) else: stored_hgvs_genomic_variant = hgvs_genomic @@ -3121,8 +1791,8 @@ def transcripts_to_gene(variant, validator): variant.genomic_g = str(hgvs_genomic) variant.protein = str(hgvs_protein) - if gap_compensation is True: - variant.test_stash_tx_left = test_stash_tx_left - variant.test_stash_tx_right = test_stash_tx_right + # if gap_compensation is True: + # variant.test_stash_tx_left = test_stash_tx_left + # variant.test_stash_tx_right = test_stash_tx_right return False \ No newline at end of file From 22c13da583d716a7fbc6114c9fac53f081367d3d Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 24 Apr 2019 10:41:58 +0100 Subject: [PATCH 063/223] Moved last gap mapping section into the new file --- VariantValidator/modules/gapped_mapping.py | 731 +++++ VariantValidator/modules/mappers.py | 728 +---- VariantValidator/modules/vvMixinCore.py | 3217 +------------------- 3 files changed, 747 insertions(+), 3929 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index db82d0bb..09834a1c 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -2289,3 +2289,734 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): print('in gapped_mapping', hgvs_coding) return hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding + + +def g_to_t_gapped_mapping_stage2(validator, variant, ori, hgvs_coding, hgvs_genomic_5pr, saved_hgvs_coding, stored_hgvs_not_delins, gapped_transcripts, hgvs_genomic_possibilities, auto_info, reverse_normalized_hgvs_genomic, hgvs_genomic): + logger.warning('g_to_t gap code 2 active') + + orientation = int(ori[0]['alt_strand']) + + # is it in an exon? + is_it_in_an_exon = 'no' + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + # Take from stored copy + # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + is_it_in_an_exon = 'yes' + if is_it_in_an_exon == 'yes': + # map form reverse normalized g. to c. + hgvs_from_5n_g = variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) + + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + disparity_deletion_in = ['false', 'false'] + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + else: + pass + else: + pass + + hard_fail = 'false' + try: + tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + tx_hgvs_not_delins = hgvs_coding + hard_fail = 'true' + + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +ve base and adjust + if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() + + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, + variant.primary_assembly, variant.hn) + rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for internal_possibility in hgvs_genomic_possibilities: + + if internal_possibility == '': + continue + + hgvs_t_possibility = validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) + except: + fn.exceptPass() + ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) + except: + fn.exceptPass() + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if internal_possibility.posedit.edit.type == 'ins': + ins_ref = validator.sf.fetch_seq(internal_possibility.ac, + internal_possibility.posedit.pos.start.base - 1, + internal_possibility.posedit.pos.end.base) + internal_possibility.posedit.edit.ref = ins_ref + internal_possibility.posedit.edit.alt = ins_ref[ + 0] + internal_possibility.posedit.edit.alt + \ + ins_ref[1] + + if len(hgvs_t_possibility.posedit.edit.ref) < len( + internal_possibility.posedit.edit.ref): + gap_length = len(internal_possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] + hgvs_not_delins = internal_possibility + hgvs_genomic_5pr = internal_possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # Final sanity checks + try: + validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + logger.warning(str(e)) + return True + try: + variant.hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + logger.warning(error) + return True + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + if hard_fail == 'true': + disparity_deletion_in = ['false', 'false'] + + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly + + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = validator.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = variant.hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = validator.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # GAP IN THE CHROMOSOME + + elif disparity_deletion_in[0] == 'chromosome': + # Set warning variables + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly + hgvs_refreshed_variant = tx_hgvs_not_delins + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = saved_hgvs_coding + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = variant.evm.n_to_c(hgvs_refreshed_variant) + else: + pass + try: + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + hgvs_refreshed_variant.posedit.edit.alt[-1]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 0:-1] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 0:-1] + hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[0] == \ + hgvs_refreshed_variant.posedit.edit.alt[0]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 1:] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 1:] + hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + pass + + # Sort out equality to equality c. events where the code will add 2 additional bases + if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): + pass + else: + hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) + coding = fn.valstr(hgvs_coding) + formatted_variant = coding + + return hgvs_coding diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 1e3a5cc8..3d903971 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -830,730 +830,10 @@ def transcripts_to_gene(variant, validator): # Loop out gap finding code under these circumstances! logger.warning("gap_compensation_2 = " + str(gap_compensation)) if gap_compensation is True: - logger.warning('g_to_t gap code 2 active') - # is it in an exon? - is_it_in_an_exon = 'no' - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - # Take from stored copy - # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - is_it_in_an_exon = 'yes' - if is_it_in_an_exon == 'yes': - # map form reverse normalized g. to c. - hgvs_from_5n_g = variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) - - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - pass - else: - pass - - hard_fail = 'false' - try: - tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = hgvs_coding - hard_fail = 'true' - - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +ve base and adjust - if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for internal_possibility in hgvs_genomic_possibilities: - - if internal_possibility == '': - continue - - hgvs_t_possibility = validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) - except: - fn.exceptPass() - ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) - except: - fn.exceptPass() - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if internal_possibility.posedit.edit.type == 'ins': - ins_ref = validator.sf.fetch_seq(internal_possibility.ac, - internal_possibility.posedit.pos.start.base - 1, - internal_possibility.posedit.pos.end.base) - internal_possibility.posedit.edit.ref = ins_ref - internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] - - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] - hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # Final sanity checks - try: - validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - logger.warning(str(e)) - return True - try: - variant.hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - logger.warning(error) - return True - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - if hard_fail == 'true': - disparity_deletion_in = ['false', 'false'] - - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) - - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly - - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = validator.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = variant.hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = validator.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # GAP IN THE CHROMOSOME - - elif disparity_deletion_in[0] == 'chromosome': - # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = saved_hgvs_coding - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = variant.evm.n_to_c(hgvs_refreshed_variant) - else: - pass - try: - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - pass - - # Sort out equality to equality c. events where the code will add 2 additional bases - if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): - pass - else: - hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) - coding = fn.valstr(hgvs_coding) - formatted_variant = coding + hgvs_coding = gapped_mapping.g_to_t_gapped_mapping_stage2( + validator, variant, ori, hgvs_coding, hgvs_genomic_5pr, saved_hgvs_coding, stored_hgvs_not_delins, + gapped_transcripts, hgvs_genomic_possibilities, auto_info, reverse_normalized_hgvs_genomic, hgvs_genomic + ) # OBTAIN THE RefSeqGene coordinates # Attempt 1 = UTA diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 80a6f4c9..0682046a 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -553,3213 +553,20 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr print("CARRYING ON") continue - # Flag for validation - valid = 'false' - # Collect information for genomic level validation - obj = self.hp.parse_hgvs_variant(formatted_variant) - - tx_ac = obj.ac - - # Do we keep it? - if select_transcripts != 'all': - if tx_ac in list(select_transcripts_dict_plus_version.keys()): - pass - # If not get rid of it! - else: - # By marking it as Do Not Write and continuing through the validation loop - my_variant.write = False - continue - else: - pass - - # Set a cross_variant object - cross_variant = 'false' - # Se rec_var to '' so it can be updated later + # TODO: Need to check this as it's only being using outside of this loop as well as inside! rec_var = '' - try: - to_g = self.myevm_t_to_g(obj, no_norm_evm, primary_assembly, hn) - genomic_ac = to_g.ac - except hgvs.exceptions.HGVSDataNotAvailableError as e: - if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))) or re.match( - "No relevant genomic mapping options available", str(e)): - reason = 'Unable to map the input variant onto a genomic position' - if (re.search('~', str(e)) and re.search('Alignment is incomplete', str(e))): - error_list = str(e).split('~')[:-1] - combos = [ - 'Full alignment data between the specified transcript reference sequence and all GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are not available: Consequently the input variant description cannot be fully validated and is not supported: Use the Gene to Transcripts function to determine whether an updated transcript reference sequence is available'] # Partial alignment data is available for the following genomic reference sequences: '] - error = '; '.join(combos) - error = error.replace(': ;', ': ') - else: - error = str(e) - error = error + ': Consequently the input variant description cannot be fully validated and is not supported: Use the Gene to Transcripts function to determine whether an updated transcript reference sequence is available' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - except TypeError as e: - try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = self.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=alt_aln_method) - orientation = int(ori[0]['alt_strand']) - intronic_variant = 'false' - - # Collect variant sequence information via normalisation (normalizer) or if intronic via mapping - # INTRONIC OFFSETS - Required for Exon table - # Variable to collect offset to exon boundary - ex_offset = 0 - plus = re.compile(r"\d\+\d") # finds digit + digit - minus = re.compile(r"\d\-\d") # finds digit - digit - - geno = re.compile(r':g.') - if plus.search(input) or minus.search(input): - es = re.compile(r'error') - if es.search(str(to_g)): - if alt_aln_method != 'genebuild': - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Insertions at exon boundaries are miss-handled by vm.g_to_t - if ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): - formatted_variant = str(obj) - else: - # Normalize was I believe to replace ref. Mapping does this anyway - # to_g = hn.normalize(to_g) - formatted_variant = str(self.myevm_g_to_t(evm, to_g, tx_ac)) - tx_ac = '' - - elif geno.search(input): - if plus.search(formatted_variant) or minus.search(formatted_variant): - to_g = self.genomic(formatted_variant, no_norm_evm, primary_assembly,hn) - es = re.compile(r'error') - if es.search(str(to_g)): - if alt_aln_method != 'genebuild': - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Insertions at exon boundaries are miss-handled by vm.g_to_t - if ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset == 0 and obj.posedit.pos.end.offset != 0) or ( - obj.posedit.edit.type == 'ins' and obj.posedit.pos.start.offset != 0 and obj.posedit.pos.end.offset == 0): - formatted_variant = str(obj) - else: - # Normalize was I believe to replace ref. Mapping does this anyway - # to_g = hn.normalize(to_g) - formatted_variant = str(self.myevm_g_to_t(evm, to_g, tx_ac)) - tx_ac = '' - - else: - # Normalize the variant - error = 'false' - try: - h_variant = hn.normalize(obj) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Unsupported normalization of variants spanning the exon-intron boundary', - error): - h_variant = obj - formatted_variant = formatted_variant - caution = 'This coding sequence variant description spans at least one intron' - automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. Please refer to https://www35.lamp.le.ac.uk/recommendations/' - my_variant.warnings += ': ' + str(caution) + ': ' + str( - automap) - logger.warning(str(caution) + ": " + str(automap)) - else: - formatted_variant = str(h_variant) - - tx_ac = '' - # Create a crosser (exon boundary crossed) variant - crossed_variant = str(evm._maybe_normalize(obj)) - if formatted_variant == crossed_variant: - cross_variant = 'false' - else: - hgvs_crossed_variant = evm._maybe_normalize(obj) - cross_variant = [ - "Coding sequence allowing for exon boundary crossing (default = no crossing)", - crossed_variant, hgvs_crossed_variant.ac] - cr_available = 'true' - - # control of cross_variant - if boundary == 'false': - cross_variant = 'false' - - error = self.validateHGVS(formatted_variant) - if error == 'false': - valid = 'true' - else: - excep = "%s -- %s -- %s\n" % (time.ctime(), error, formatted_variant) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - # Tackle the plus intronic offset - cck = 'false' - if (plus.search(input)): - # Regular expression catches the start of the interval only based on .00+00 pattern - inv_start = re.compile(r"\.\d+\+\d") - if (inv_start.search(input)): - # Find pattern e.g. +0000 and assign to a variable - off_value = re.search(r"(\+\d+)", input) - off_value = off_value.group(1) - # Integerise the value and assign to ex_offset - ex_offset = int(off_value) - cck = 'true' - if (minus.search(input)): - # Regular expression catches the start of the interval only based on .00-00 pattern - inv_start = re.compile(r"\.\d+\-\d") - if (inv_start.search(input)): - # Find pattern e.g. -0000 and assign to a variable - off_value = re.search(r"(\-\d+)", input) - off_value = off_value.group(1) - # Integerise the value and assign to ex_offset - ex_offset = int(off_value) - cck = 'true' - - # COORDINATE CHECKER - # hgvs will handle incorrect coordinates so need to automap errors - # Make sure any input intronic coordinates are correct - # Get the desired transcript - pat_r = re.compile(':r.') - pat_g = re.compile(':g.') - if cck == 'true': - dl = re.compile('del') - # This should only ever hit coding and RNA variants - if dl.search(formatted_variant): - # RNA - if pat_r.search(trapped_input): - - coding = self.coding(formatted_variant, self.hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = self.genomic(formatted_variant, no_norm_evm, primary_assembly,hn) - # genome back to C coordinates - post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) - - test = self.hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - # Change to rna variant - posedit = query.posedit - posedit = posedit.lower() - query.posedit = posedit - query.type = 'r' - post_var = str(query) - automap = trapped_input + ' automapped to ' + str(post_var) - my_variant.warnings += ': ' + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - - else: - # Get hgnc Gene name from command - data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = self.hp.parse_hgvs_variant(str(accessions[1])) - try: - tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = self.hgnc_rest( - path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - my_variant.warnings += ': ' + str( - error) - logger.warning(str(error)) - continue - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - my_variant.write = False - # Set the values and append to batch_list - query = variant.Variant(my_variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) - self.batch_list.append(query) - - # Coding - else: - coding = self.coding(formatted_variant, self.hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = self.hp.parse_hgvs_variant(formatted_variant) - try: - pre_var = self.myevm_t_to_g(pre_var, no_norm_evm, primary_assembly, - hn) - except: - e = sys.exc_info()[1] - error = str(e) - reason = 'Input coordinates may be invalid' - if error == 'expected from_start_i <= from_end_i': - error = 'Automap is unable to correct the input exon/intron boundary coordinates, please check your variant description' - my_variant.warnings += ': ' + str(error) - continue - else: - fn.exceptPass() - else: - fn.exceptPass() - # genome back to C coordinates - try: - post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) - except hgvs.exceptions.HGVSError as error: - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - query = post_var - test = self.hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - automap = trapped_input + ' automapped to ' + str(post_var) - my_variant.warnings += str(caution) + ': ' + str(automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = self.parse_hgvs_variant(str(accessions[1])) - try: - tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = self.hgnc_rest( - path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - my_variant.warnings += ': ' + str( - error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - my_variant.write = False - # Set the values and append to batch_list - query = variant.Variant(my_variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) - self.batch_list.append(query) - - else: - if pat_r.search(trapped_input): - coding = self.coding(formatted_variant, self.hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = self.genomic(formatted_variant, no_norm_evm, primary_assembly,hn) - # genome back to C coordinates - post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) - - test = self.hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - # Change to rna variant - posedit = query.posedit - posedit = posedit.lower() - query.posedit = posedit - query.type = 'r' - post_var = str(query) - automap = input + ' automapped to ' + post_var - my_variant.warnings += ': ' + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = self.va_func.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - my_variant.write = False - # Set the values and append to batch_list - query = variant.Variant(my_variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) - self.batch_list.append(query) - - else: - coding = self.coding(formatted_variant, self.hp) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = self.genomic(formatted_variant, no_norm_evm, primary_assembly,hn) - - # genome back to C coordinates - post_var = self.myevm_g_to_t(evm, pre_var, trans_acc) - - test = self.hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - automap = str(trapped_input) + ' automapped to ' + str(post_var) - my_variant.warnings += ': ' + str(caution) + ': ' + str( - automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(post_var) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - reason = 'Cannot currently display the required information:' - error = data['error'] - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - my_variant.write = False - # Set the values and append to batch_list - query = variant.Variant(my_variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) - self.batch_list.append(query) - - - # If cck not true - elif pat_r.search(trapped_input): - # set input hgvs object - hgvs_rna_input = self.hp.parse_hgvs_variant( - trapped_input) # Traps the hgvs variant of r. for further use - inp = str(self.hgvs_r_to_c(hgvs_rna_input)) - # Regex - plus = re.compile(r"\d\+\d") # finds digit + digit - minus = re.compile(r"\d\-\d") # finds digit - digit - if plus.search(input) or minus.search(input): - to_g = self.genomic(inp, no_norm_evm, primary_assembly,hn) - es = re.compile('error') - if es.search(str(to_g)): - if alt_aln_method != 'genebuild': - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - error = "If the following error message does not address the issue and the problem persists please contact admin: " + to_g - reason = "An error has occurred" - excep = "%s -- %s -- %s\n" % (time.ctime(), reason, formatted_variant) - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set variants pre and post genomic norm - hgvs_inp = self.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) - to_g = hn.normalize(to_g) - hgvs_otp = self.myevm_g_to_t(evm, to_g, tx_ac=obj.ac) - tx_ac = '' - else: - # Set variants pre and post RNA norm - hgvs_inp = self.hp.parse_hgvs_variant(inp) - try: - hgvs_otp = hn.normalize(hgvs_inp) - except hgvs.exceptions.HGVSError as e: - hgvs_otp = hgvs_inp - tx_ac = '' - - # Set remaining variables - redit = str(hgvs_otp.posedit.edit) - redit = redit.lower() - hgvs_otp.posedit.edit = redit - otp = str(hgvs_otp) - query = str(hgvs_otp.posedit.pos) - test = str(hgvs_inp.posedit.pos) - query = query.replace('T', 'U') - query = query.replace('ENSU', 'ENST') - test = test.replace('T', 'U') - test = test.replace('ENSU', 'ENST') - output = otp.replace(':c.', ':r.') - # Apply coordinates test - if query != test: - caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' - automap = 'Automap has corrected the variant description' - # automapping of variant completed - automap = trapped_input + ' automapped to ' + output - my_variant.warnings += ': ' + str(caution) + ': ' + str(automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(output) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - error = data['error'] - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - my_variant.write = False - # Set the values and append to batch_list - query = variant.Variant(my_variant.original, quibble=fn.valstr(hgsv_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) - self.batch_list.append(query) - - elif pat_g.search(input): - pass - - else: - query = self.hp.parse_hgvs_variant(formatted_variant) - test = self.hp.parse_hgvs_variant(input) - if query.posedit.pos != test.posedit.pos: - caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant nomenclature:' - automap = 'Automap has corrected the variant description' - # automapping of variant completed - automap = str(test) + ' automapped to ' + str(query) - my_variant.warnings += ': ' + str(caution) + ': ' + str(automap) - relevant = "Select the automapped transcript and click Submit to analyse" - rel_var = [] - rel_var.append(query) - # Add gene symbols to the link - cp_rel = copy.copy(rel_var) - del rel_var[:] - for accessions in cp_rel: - error = 'false' - hgvs_vt = self.hp.parse_hgvs_variant(str(accessions)) - try: - tx_id_info = self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if error != 'false': - accessions = ['', str(hgvs_vt)] - rel_var.append(accessions) - else: - # Get hgnc Gene name from command - data = self.hgnc_rest(path="/search/prev_symbol/" + tx_id_info[6]) - if data['error'] != 'false': - reason = 'Cannot currently display the required information:' - error = data['error'] - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - - else: - # Set the hgnc name correctly - # If the name is correct no record will be found - if int(data['record']['response']['numFound']) == 0: - current = tx_id_info[6] - else: - current = data['record']['response']['docs'][0]['symbol'] - accessions = [str(current), str(hgvs_vt)] - rel_var.append(accessions) - # Kill current line and append for re-submission - # Tag the line so that it is not written out - my_variant.write = False - # Set the values and append to batch_list - query = variant.Variant(my_variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=my_variant.primary_assembly, order=ordering) - self.batch_list.append(query) - - # VALIDATION of intronic variants - pre_valid = self.hp.parse_hgvs_variant(input) - post_valid = self.hp.parse_hgvs_variant(formatted_variant) - if valid == 'false': - error = 'false' - genomic_validation = str( - self.genomic(input, no_norm_evm, primary_assembly,hn) ) - del_end = re.compile(r'\ddel$') - delins = re.compile(r'delins') - inv = re.compile(r'inv') - if fn.valstr(pre_valid) != fn.valstr(post_valid): - if format_type != ':g.': - if caution == '': - caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) - else: - pass - my_variant.warnings += ': ' + str(caution) - logger.warning(str(caution)) - else: - pass - else: - pass - - # Apply validation to intronic variant descriptions (should be valid but make sure) - error = self.validateHGVS(genomic_validation) - if error == 'false': - valid = 'true' - else: - - excep = "%s -- %s -- %s\n" % (time.ctime(), error, formatted_variant) - my_variant.warnings += ': ' + str(error) - continue - - if valid == 'true': - var_tab = 'true' - cores = "HGVS-compliant variant descriptions" + warning - - # v0.1a1 edit - if fn.valstr(pre_valid) != fn.valstr(post_valid): - if format_type == ':g.': - if caution == '': - caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) - else: - pass - my_variant.warnings += ': ' + str(caution) - else: - pass - else: - pass - - # COLLECT VARIANT DESCRIPTIONS - ############################## - - # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC - hgvs_coding = self.coding(formatted_variant, self.hp) - boundary = re.compile('exon-intron boundary') - spanning = re.compile('exon/intron') - - try: - hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSError as e: - error = str(e) - - # Gap compensating code status - gap_compensation = True - - # Gap gene black list - try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) - except Exception: - fn.exceptPass() - else: - # If the gene symbol is not in the list, the value False will be returned - gap_compensation = vvChromosomes.gap_black_list(gene_symbol) - - # Intron spanning variants - if re.search('boundary', str(error)) or re.search('spanning', str(error)): - try: - hgvs_coding = evm._maybe_normalize(hgvs_coding) - gap_compensation = False - except hgvs.exceptions.HGVSError as error: - my_variant.warnings += ': ' + str(error) - logger.warning(str(error)) - continue - else: - pass - - # Warn status - logger.warning("gap_compensation_1 = " + str(gap_compensation)) - coding = fn.valstr(hgvs_coding) - - # RNA sequence - hgvs_rna = copy.deepcopy(hgvs_coding) - hgvs_rna = self.hgvs_c_to_r(hgvs_rna) - rna = str(hgvs_rna) - - # Genomic sequence - hgvs_genomic = self.myevm_t_to_g(hgvs_coding, no_norm_evm, primary_assembly, hn) - final_hgvs_genomic = hgvs_genomic - - # genomic_possibilities - # 1. take the simple 3 pr normalized hgvs_genomic - # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - hgvs_genomic_possibilities = [] - - # Loop out gap finding code under these circumstances! - if gap_compensation is True: - logger.warning('g_to_t gap code 1 active') - rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: - try: - chromosome_normalized_hgvs_coding = reverse_normalizer.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: - chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - - most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, - no_norm_evm, hn) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - - # Push from side to side to try pick up odd placements - # MAKE A NO NORM HGVS2VCF - # First to the right - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = self.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) - stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, no_norm_evm, hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_right.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - stash_tx_right = test_stash_tx_right - if hasattr(test_stash_tx_right.posedit.edit, - 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - alt = test_stash_tx_right.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_right.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_right).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - fn.exceptPass() - # Intronic positions not supported. Will cause a Value Error - except ValueError: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - fn.exceptPass() - - # Then to the left - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, reverse_normalizer, - self.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = self.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) - stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, no_norm_evm, hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left - if hasattr(test_stash_tx_left.posedit.edit, - 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - alt = test_stash_tx_left.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - - if (len(alt) - ( - test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_left.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_left).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - fn.exceptPass() - except ValueError: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - fn.exceptPass() - - # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = self.vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = self.vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = self.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = self.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = self.vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = self.vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - try: - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) - if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) - - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass - - logger.info('\nGENOMIC POSSIBILITIES') - for possibility in hgvs_genomic_possibilities: - if possibility == '': - logger.info('X') - else: - logger.info(fn.valstr(possibility)) - - logger.info('\n') - - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' - - # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] - - # Loop through to see if a gap can be located - # Set the variables required for corrective normalization - possibility_counter = 0 - suppress_c_normalization = 'false' # Applies to boundary crossing normalization - - # Copy a version of hgvs_genomic_possibilities - for possibility in hgvs_genomic_possibilities: - possibility_counter = possibility_counter + 1 - - # Loop out stash possibilities which will not spot gaps so are empty - if possibility == '': - continue - - # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - hgvs_genomic_variant = copy.deepcopy(possibility) - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) - - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - try: - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error): - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) - if re.search('insertion length must be 1', error): - if hgvs_genomic.posedit.edit.type == 'ins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start, end) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - - # Create VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, self.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] - - # Detect intronic variation using normalization - intronic_variant = 'false' - - # Save a copy of current hgvs_coding - try: - saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - saved_hgvs_coding = hgvs_coding - intronic_variant = 'true' - continue - else: - saved_hgvs_coding = no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - hgvs_coding.ac) - - # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - try: - intron_test = hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'hard_fail': - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', - str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'true': - # Flag RefSeqGene for ammendment - # amend_RefSeqGene = 'false' - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - pass - else: - pass - try: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - saved_hgvs_coding.ac) - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - - # Check for +1 base and adjust - if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - pass - - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - - # tx_hgvs_not_delins = rn_tx_hgvs_not_delins - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - pass - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for internal_possibility in hgvs_genomic_possibilities: - if internal_possibility == '': - continue - - hgvs_t_possibility = self.vm.g_to_t(internal_possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) - except: - fn.exceptPass() - ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) - except: - fn.exceptPass() - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if internal_possibility.posedit.edit.type == 'ins': - ins_ref = self.sf.fetch_seq(internal_possibility.ac, - internal_possibility.posedit.pos.start.base - 1, - internal_possibility.posedit.pos.end.base) - internal_possibility.posedit.edit.ref = ins_ref - internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] - - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] - hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # 'At hgvs_genomic' - # Final sanity checks - try: - self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - continue - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - continue - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # amend_RefSeqGene = 'false' - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): - rg = reverse_normalizer.normalize(hgvs_not_delins) - rtx = self.vm.g_to_t(rg, tx_hgvs_not_delins.ac) - fg = hn.normalize(hgvs_not_delins) - ftx = self.vm.g_to_t(fg, tx_hgvs_not_delins.ac) - if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = self.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) - exonic = False - for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): - exonic = True - if exonic is True: - hgvs_not_delins = fg - hgvs_genomic = fg - hgvs_genomic_5pr = fg - try: - tx_hgvs_not_delins = self.vm.c_to_n(ftx) - except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = self.hp.parse_hgvs_variant( - tx_hgvs_not_delins_delins_from_dup) - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - # Suppress intron boundary crossing due to non-intron intron based c. seq annotations - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = self.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, - 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = self.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( - tx_hgvs_not_delins.posedit.edit.ref) - 1 - hgvs_refreshed_variant = tx_hgvs_not_delins - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - if possibility_counter == 3: - hgvs_refreshed_variant = stash_tx_right - elif possibility_counter == 4: - hgvs_refreshed_variant = stash_tx_left - else: - hgvs_refreshed_variant = chromosome_normalized_hgvs_coding - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) - else: - pass - - try: - hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - logger.warning(error) - continue - - # Quick check to make sure the coding variant has not changed - try: - to_test = hn.normalize(hgvs_refreshed_variant) - except: - to_test = hgvs_refreshed_variant - if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # Try the next available genomic option - if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - hgvs_coding = to_test - else: - continue - # Update hgvs_genomic - hgvs_genomic = self.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, - no_norm_evm,hn) - if hgvs_genomic.posedit.edit.type == 'identity': - re_c = self.vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - shuffle_left_g = copy.copy(hgvs_genomic) - shuffle_left_g.posedit.edit.ref = '' - shuffle_left_g.posedit.edit.alt = '' - shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) - re_c = self.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - hgvs_genomic = shuffle_left_g - - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' - - # Break if gap has been detected - if disparity_deletion_in[0] != 'false': - break - - # Warn user about gapping - if auto_info != '': - info_lines = auto_info.split('\n') - info_keys = {} - for information in info_lines: - info_keys[information] = '' - info_out = [] - info_out.append( - 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + primary_assembly) - for ky in list(info_keys.keys()): - info_out.append(ky) - auto_info = '\n'.join(info_out) - auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' - auto_info = str(auto_info.replace('\n', ': ')) - my_variant.warnings += ': ' + str(auto_info) - logger.warning(str(auto_info)) - # Normailse hgvs_genomic - try: - hgvs_genomic = hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - - if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = hn.normalize(hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = hn.normalize(hgvs_genomic) - genomic = fn.valstr(hgvs_genomic) - - else: - stored_hgvs_genomic_variant = hgvs_genomic - suppress_c_normalization = 'false' - gapped_alignment_warning = '' - auto_info = '' - genomic = fn.valstr(hgvs_genomic) - - # Create pseudo VCF based on amended hgvs_genomic - hgvs_genomic_variant = hgvs_genomic - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # Create vcf - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, self.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Create a VCF call - vcf_component_list = [str(chr), str(pos), str(ref), (alt)] - vcf_genomic = '-'.join(vcf_component_list) - - # DO NOT DELETE - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # DO NOT DELETE - stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - - # Apply gap code to re-format hgvs_coding - # Store the current hgvs:c. description - saved_hgvs_coding = copy.deepcopy(hgvs_coding) - - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = self.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, - alt_aln_method=alt_aln_method) - orientation = int(ori[0]['alt_strand']) - - # Look for normalized variant options that do not match hgvs_coding - hgvs_genomic = copy.deepcopy(hgvs_genomic_variant) - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding - - # Loop out gap finding code under these circumstances! - logger.warning("gap_compensation_2 = " + str(gap_compensation)) - if gap_compensation is True: - logger.warning('g_to_t gap code 2 active') - # is it in an exon? - is_it_in_an_exon = 'no' - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - # Take from stored copy - # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - is_it_in_an_exon = 'yes' - if is_it_in_an_exon == 'yes': - # map form reverse normalized g. to c. - hgvs_from_5n_g = no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) - - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - pass - else: - pass - - hard_fail = 'false' - try: - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = hgvs_coding - hard_fail = 'true' - - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +ve base and adjust - if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myevm_t_to_g(test_tx_var, no_norm_evm, - primary_assembly, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for internal_possibility in hgvs_genomic_possibilities: - - if internal_possibility == '': - continue - - hgvs_t_possibility = self.vm.g_to_t(internal_possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) - except: - fn.exceptPass() - ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) - except: - fn.exceptPass() - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if internal_possibility.posedit.edit.type == 'ins': - ins_ref = self.sf.fetch_seq(internal_possibility.ac, - internal_possibility.posedit.pos.start.base - 1, - internal_possibility.posedit.pos.end.base) - internal_possibility.posedit.edit.ref = ins_ref - internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] - - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] - hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # Final sanity checks - try: - self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - logger.warning(str(e)) - continue - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - logger.warning(error) - continue - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - if hard_fail == 'true': - disparity_deletion_in = ['false', 'false'] - - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = self.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) - - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = self.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = self.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # GAP IN THE CHROMOSOME - - elif disparity_deletion_in[0] == 'chromosome': - # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = saved_hgvs_coding - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = evm.n_to_c(hgvs_refreshed_variant) - else: - pass - try: - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - pass - - # Sort out equality to equality c. events where the code will add 2 additional bases - if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): - pass - else: - hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) - coding = fn.valstr(hgvs_coding) - formatted_variant = coding - - # OBTAIN THE RefSeqGene coordinates - # Attempt 1 = UTA - sequences_for_tx = self.hdp.get_tx_mapping_options(hgvs_coding.ac) - recovered_rsg = [] - - for sequence in sequences_for_tx: - if re.match('^NG_', sequence[1]): - recovered_rsg.append(sequence[1]) - recovered_rsg.sort() - recovered_rsg.reverse() - - if len(recovered_rsg) > 0 and 'NG_' in recovered_rsg[0]: - refseqgene_ac = recovered_rsg[0] - else: - refseqgene_ac = '' - - # Given the difficulties with mapping to and from RefSeqGenes, we now solely rely on UTA - if refseqgene_ac != '': - hgvs_refseq = self.vm.t_to_g(hgvs_coding, refseqgene_ac) - # Normalize the RefSeqGene Variant to the correct position - try: - hgvs_refseq = hn.normalize(hgvs_refseq) - except Exception as e: - # if re.search('insertion length must be 1', error): - hgvs_refseq = 'RefSeqGene record not available' - refseq = 'RefSeqGene record not available' - hgvs_refseq_ac = 'RefSeqGene record not available' - pass - else: - refseq = fn.valstr(hgvs_refseq) - hgvs_refseq_ac = hgvs_refseq.ac - else: - hgvs_refseq = 'RefSeqGene record not available' - refseq = 'RefSeqGene record not available' - hgvs_refseq_ac = 'RefSeqGene record not available' - - # Predicted effect on protein - protein_dict = self.myc_to_p(hgvs_coding, evm, re_to_p=False) - if protein_dict['error'] == '': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - error = protein_dict['error'] - my_variant.warnings += ': ' + str(error) - if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - logger.error(error) - continue - - # Gene orientation wrt genome - ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, - alt_aln_method=alt_aln_method) - ori = int(ori[0]['alt_strand']) - - # Look for normalized variant options that do not match hgvs_coding - # boundary crossing normalization - # Re-Save the required variants - hgvs_seek_var = copy.deepcopy(hgvs_coding) - saved_hgvs_coding = copy.deepcopy(hgvs_coding) - - if ori == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif suppress_c_normalization == 'true': - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - try: - automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) - hgvs_coding = hgvs_seek_var - coding = fn.valstr(hgvs_coding) - my_variant.warnings += ': ' + automap - rng = hn.normalize(query_genomic) - except NotImplementedError: - pass - try: - c_for_p = self.vm.g_to_t(rng, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - c_for_p = seek_var - try: - # Predicted effect on protein - protein_dict = self.myc_to_p(c_for_p, evm, re_to_p=False) - if protein_dict['error'] == '': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - error = protein_dict['error'] - if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': - hgvs_protein = protein_dict['hgvs_protein'] - my_variant.warnings += ': ' + str(error) - # Replace protein description in vars table - protein = str(hgvs_protein) - except NotImplementedError: - fn.exceptPass() - else: - # Double check protein position by normalize genomic, and normalize back to c. for normalize or not to normalize issue - coding = fn.valstr(hgvs_coding) - - elif ori != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif suppress_c_normalization == 'true': - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - try: - automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) - hgvs_coding = hgvs_seek_var - coding = fn.valstr(hgvs_coding) - my_variant.warnings += ': ' + automap - except NotImplementedError: - fn.exceptPass() - else: - # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue - coding = fn.valstr(hgvs_coding) - rng = reverse_normalizer.normalize(query_genomic) - try: - # Diagram where - = intron and E = Exon - - # 3 prime - # ---------EEEEEEEEEEEEEEEEE----------- - # < - # Result, normalize of new variant will baulk at intronic - # 5 prime - # < - # Result, normalize of new variant will be happy - c_for_p = self.vm.g_to_t(rng, hgvs_coding.ac) - try: - hn.normalize(c_for_p) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - else: - # hgvs_protein = va_func.protein(str(c_for_p), evm, hp) - protein_dict = self.myc_to_p(c_for_p, evm, re_to_p=False) - if protein_dict['error'] == '': - hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) - else: - error = protein_dict['error'] - if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': - hgvs_protein = protein_dict['hgvs_protein'] - my_variant.warnings += ': ' + str(error) - # Replace protein description in vars table - protein = str(hgvs_protein) - except Exception: - fn.exceptPass() - - # Check for up-to-date transcript version - updated_transcript_variant = 'None' - tx_id_info = self.hdp.get_tx_identity_info(hgvs_coding.ac) - uta_gene_symbol = tx_id_info[6] - tx_for_gene = self.hdp.get_tx_for_gene(uta_gene_symbol) - ac_root, ac_version = hgvs_coding.ac.split('.') - version_tracking = '0' - update = '' - for accession in tx_for_gene: - try: - if re.match(ac_root, accession[3]): - query_version = accession[3].split('.')[1] - if int(query_version) > int(ac_version) and int(query_version) > int( - version_tracking): - version_tracking = query_version - update = accession[3] - except ValueError: - fn.exceptPass() - - if update != '': - hgvs_updated = copy.deepcopy(hgvs_coding) - hgvs_updated.ac = update - try: - self.vr.validate(hgvs_updated) - # Updated reference sequence - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('does not agree with reference sequence', str(error)): - match = re.findall(r'\(([GATC]+)\)', error) - new_ref = match[1] - hgvs_updated.posedit.edit.ref = new_ref - self.vr.validate(hgvs_updated) - updated_transcript_variant = hgvs_updated - else: - pass - updated_transcript_variant = hgvs_updated - my_variant.warnings += ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( - updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + fn.valstr( - updated_transcript_variant) # Set the data my_variant.output_type_flag = 'gene' my_variant.description = hgnc_gene_info - my_variant.coding = str(hgvs_coding) - my_variant.genomic_r = str(hgvs_refseq) - my_variant.genomic_g = str(hgvs_genomic) - my_variant.protein = str(hgvs_protein) + # my_variant.coding = str(hgvs_coding) + # my_variant.genomic_r = str(hgvs_refseq) + # my_variant.genomic_g = str(hgvs_genomic) + # my_variant.protein = str(hgvs_protein) my_variant.primary_assembly = primary_assembly - if gap_compensation is True: - my_variant.test_stash_tx_left = test_stash_tx_left - my_variant.test_stash_tx_right = test_stash_tx_right + # if gap_compensation is True: + # my_variant.test_stash_tx_left = test_stash_tx_left + # my_variant.test_stash_tx_right = test_stash_tx_right # finish timing logger.traceEnd(my_variant) # Report errors to User and VV admin @@ -3938,10 +745,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr transcript_description = valid.description # Stashed variants - if valid.test_stash_tx_left: - test_stash_tx_left = valid.test_stash_tx_left - if valid.test_stash_tx_right: - test_stash_tx_right = valid.test_stash_tx_right + # if valid.test_stash_tx_left: + # test_stash_tx_left = valid.test_stash_tx_left + # if valid.test_stash_tx_right: + # test_stash_tx_right = valid.test_stash_tx_right # Multiple genomic variants # multi_gen_vars = [] From f0cdd42c65c2bbbe365acedfa79aad5a139e2d26 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 24 Apr 2019 11:13:42 +0100 Subject: [PATCH 064/223] Refined function parameters --- VariantValidator/modules/gapped_mapping.py | 26 ++++++++++- VariantValidator/modules/mappers.py | 52 +++++++++++----------- 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 09834a1c..15a4c15d 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -2288,13 +2288,34 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): print('in gapped_mapping', hgvs_coding) - return hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding + return hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding, hgvs_genomic_possibilities -def g_to_t_gapped_mapping_stage2(validator, variant, ori, hgvs_coding, hgvs_genomic_5pr, saved_hgvs_coding, stored_hgvs_not_delins, gapped_transcripts, hgvs_genomic_possibilities, auto_info, reverse_normalized_hgvs_genomic, hgvs_genomic): +def g_to_t_gapped_mapping_stage2(validator, variant, ori, hgvs_coding, hgvs_genomic, gapped_transcripts, hgvs_genomic_possibilities, auto_info): logger.warning('g_to_t gap code 2 active') + hgvs_genomic_variant = hgvs_genomic + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, + variant.reverse_normalizer, validator.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Create a VCF call + vcf_component_list = [str(chr), str(pos), str(ref), (alt)] + vcf_genomic = '-'.join(vcf_component_list) + + # DO NOT DELETE + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) orientation = int(ori[0]['alt_strand']) + saved_hgvs_coding = copy.deepcopy(hgvs_coding) # is it in an exon? is_it_in_an_exon = 'no' @@ -2500,6 +2521,7 @@ def g_to_t_gapped_mapping_stage2(validator, variant, ori, hgvs_coding, hgvs_geno disparity_deletion_in = ['transcript', gap_length] else: re_capture_tx_variant = [] + # TODO: Need to check if hgvs_genomic_possibilities is ever not empty! for internal_possibility in hgvs_genomic_possibilities: if internal_possibility == '': diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 3d903971..fcf48d85 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -186,6 +186,7 @@ def transcripts_to_gene(variant, validator): warning = '' caution = '' error = '' + gapped_transcripts = '' # Collect information for genomic level validation obj = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) @@ -734,8 +735,9 @@ def transcripts_to_gene(variant, validator): # Loop out gap finding code under these circumstances! if gap_compensation is True: - - hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding = gapped_mapping.g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var) + print(hgvs_genomic_possibilities) + assert hgvs_genomic_possibilities == [] + hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding, hgvs_genomic_possibilities = gapped_mapping.g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var) else: stored_hgvs_genomic_variant = hgvs_genomic @@ -745,44 +747,44 @@ def transcripts_to_gene(variant, validator): genomic = fn.valstr(hgvs_genomic) # Create pseudo VCF based on amended hgvs_genomic - hgvs_genomic_variant = hgvs_genomic + # hgvs_genomic_variant = hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # # Create vcf - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, - variant.reverse_normalizer, validator.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] + # vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, + # variant.reverse_normalizer, validator.sf) + # chr = vcf_dict['chr'] + # pos = vcf_dict['pos'] + # ref = vcf_dict['ref'] + # alt = vcf_dict['alt'] # Create a VCF call - vcf_component_list = [str(chr), str(pos), str(ref), (alt)] - vcf_genomic = '-'.join(vcf_component_list) - - # DO NOT DELETE - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) + # vcf_component_list = [str(chr), str(pos), str(ref), (alt)] + # vcf_genomic = '-'.join(vcf_component_list) + # + # # DO NOT DELETE + # # Generate an end position + # end = str(int(pos) + len(ref) - 1) + # pos = str(pos) # DO NOT DELETE - stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + #stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( + # hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) # Apply gap code to re-format hgvs_coding # Store the current hgvs:c. description saved_hgvs_coding = copy.deepcopy(hgvs_coding) # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + ori = validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=reverse_normalized_hgvs_genomic.ac, alt_aln_method=validator.alt_aln_method) orientation = int(ori[0]['alt_strand']) # Look for normalized variant options that do not match hgvs_coding - hgvs_genomic = copy.deepcopy(hgvs_genomic_variant) + # hgvs_genomic = copy.deepcopy(hgvs_genomic_variant) if orientation == -1: # position genomic at its most 5 prime position try: @@ -831,8 +833,8 @@ def transcripts_to_gene(variant, validator): logger.warning("gap_compensation_2 = " + str(gap_compensation)) if gap_compensation is True: hgvs_coding = gapped_mapping.g_to_t_gapped_mapping_stage2( - validator, variant, ori, hgvs_coding, hgvs_genomic_5pr, saved_hgvs_coding, stored_hgvs_not_delins, - gapped_transcripts, hgvs_genomic_possibilities, auto_info, reverse_normalized_hgvs_genomic, hgvs_genomic + validator, variant, ori, hgvs_coding, hgvs_genomic, gapped_transcripts, hgvs_genomic_possibilities, + auto_info ) # OBTAIN THE RefSeqGene coordinates From bd056b66a015aaada8e9ea9f9234a8af463ddcf6 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 24 Apr 2019 12:02:59 +0100 Subject: [PATCH 065/223] Removed unused variables and tidied up mapper function --- VariantValidator/modules/mappers.py | 213 ++++------------------------ 1 file changed, 31 insertions(+), 182 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index fcf48d85..1460daac 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -276,18 +276,10 @@ def transcripts_to_gene(variant, validator): # Get orientation of the gene wrt genome and a list of exons mapped to the genome ori = validator.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=validator.alt_aln_method) - #print('exons:', ori) - orientation = int(ori[0]['alt_strand']) - intronic_variant = 'false' - - # Collect variant sequence information via normalisation (normalizer) or if intronic via mapping - # INTRONIC OFFSETS - Required for Exon table - # Variable to collect offset to exon boundary - ex_offset = 0 + plus = re.compile(r"\d\+\d") # finds digit + digit minus = re.compile(r"\d\-\d") # finds digit - digit - geno = re.compile(r':g.') if plus.search(input) or minus.search(input): if 'error' in str(to_g): if validator.alt_aln_method != 'genebuild': @@ -410,8 +402,7 @@ def transcripts_to_gene(variant, validator): # hgvs will handle incorrect coordinates so need to automap errors # Make sure any input intronic coordinates are correct # Get the desired transcript - pat_r = re.compile(':r.') - pat_g = re.compile(':g.') + if cck: # This should only ever hit coding and RNA variants if 'del' in formatted_variant: @@ -492,7 +483,6 @@ def transcripts_to_gene(variant, validator): # automapping of variant completed automap = variant.trapped + ' automapped to ' + str(post_var) variant.warnings += str(caution) + ': ' + str(automap) - relevant = "Select the automapped transcript and click Submit to analyse" # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -504,7 +494,7 @@ def transcripts_to_gene(variant, validator): validator.batch_list.append(query) else: # del not in formatted_variant - if pat_r.search(variant.trapped): + if ':r.' in variant.trapped: coding = validator.coding(formatted_variant, validator.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome @@ -529,7 +519,6 @@ def transcripts_to_gene(variant, validator): automap = input + ' automapped to ' + post_var variant.warnings += ': ' + str(caution) + ': ' + str( automap) - relevant = "Select the automapped transcript and click Submit to analyse" # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -557,7 +546,6 @@ def transcripts_to_gene(variant, validator): automap = str(variant.trapped) + ' automapped to ' + str(post_var) variant.warnings += ': ' + str(caution) + ': ' + str( automap) - relevant = "Select the automapped transcript and click Submit to analyse" # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -569,7 +557,7 @@ def transcripts_to_gene(variant, validator): validator.batch_list.append(query) # If cck not true - elif pat_r.search(variant.trapped): + elif ':r.' in variant.trapped: # set input hgvs object hgvs_rna_input = validator.hp.parse_hgvs_variant(variant.trapped) # Traps the hgvs variant of r. for further use inp = str(validator.hgvs_r_to_c(hgvs_rna_input)) @@ -623,7 +611,7 @@ def transcripts_to_gene(variant, validator): query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) - elif pat_g.search(input): + elif ':g.' in input: pass else: @@ -670,9 +658,6 @@ def transcripts_to_gene(variant, validator): assert valid is True # If valid is False we won't reach this part, so I can remove the if condition - var_tab = 'true' - cores = "HGVS-compliant variant descriptions" + warning - # v0.1a1 edit if fn.valstr(pre_valid) != fn.valstr(post_valid): if variant.reftype == ':g.': @@ -685,8 +670,6 @@ def transcripts_to_gene(variant, validator): # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC hgvs_coding = validator.coding(formatted_variant, validator.hp) - boundary = re.compile('exon-intron boundary') - spanning = re.compile('exon/intron') try: hgvs_coding = variant.hn.normalize(hgvs_coding) @@ -717,16 +700,9 @@ def transcripts_to_gene(variant, validator): # Warn status logger.warning("gap_compensation_1 = " + str(gap_compensation)) - coding = fn.valstr(hgvs_coding) - - # RNA sequence - hgvs_rna = copy.deepcopy(hgvs_coding) - hgvs_rna = validator.hgvs_c_to_r(hgvs_rna) - rna = str(hgvs_rna) # Genomic sequence hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn) - final_hgvs_genomic = hgvs_genomic # genomic_possibilities # 1. take the simple 3 pr normalized hgvs_genomic @@ -735,99 +711,21 @@ def transcripts_to_gene(variant, validator): # Loop out gap finding code under these circumstances! if gap_compensation is True: - print(hgvs_genomic_possibilities) - assert hgvs_genomic_possibilities == [] - hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding, hgvs_genomic_possibilities = gapped_mapping.g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var) + hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding, \ + hgvs_genomic_possibilities = gapped_mapping.g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var) else: - stored_hgvs_genomic_variant = hgvs_genomic suppress_c_normalization = 'false' - gapped_alignment_warning = '' auto_info = '' - genomic = fn.valstr(hgvs_genomic) # Create pseudo VCF based on amended hgvs_genomic # hgvs_genomic_variant = hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # - # Create vcf - # vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, - # variant.reverse_normalizer, validator.sf) - # chr = vcf_dict['chr'] - # pos = vcf_dict['pos'] - # ref = vcf_dict['ref'] - # alt = vcf_dict['alt'] - - # Create a VCF call - # vcf_component_list = [str(chr), str(pos), str(ref), (alt)] - # vcf_genomic = '-'.join(vcf_component_list) - # - # # DO NOT DELETE - # # Generate an end position - # end = str(int(pos) + len(ref) - 1) - # pos = str(pos) - - # DO NOT DELETE - #stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( - # hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - - # Apply gap code to re-format hgvs_coding - # Store the current hgvs:c. description - saved_hgvs_coding = copy.deepcopy(hgvs_coding) - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=reverse_normalized_hgvs_genomic.ac, + ori = validator.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=reverse_normalized_hgvs_genomic.ac, alt_aln_method=validator.alt_aln_method) - orientation = int(ori[0]['alt_strand']) - - # Look for normalized variant options that do not match hgvs_coding - # hgvs_genomic = copy.deepcopy(hgvs_genomic_variant) - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = variant.hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding # Loop out gap finding code under these circumstances! logger.warning("gap_compensation_2 = " + str(gap_compensation)) @@ -862,28 +760,18 @@ def transcripts_to_gene(variant, validator): except Exception as e: # if re.search('insertion length must be 1', error): hgvs_refseq = 'RefSeqGene record not available' - refseq = 'RefSeqGene record not available' - hgvs_refseq_ac = 'RefSeqGene record not available' - pass - else: - refseq = fn.valstr(hgvs_refseq) - hgvs_refseq_ac = hgvs_refseq.ac else: hgvs_refseq = 'RefSeqGene record not available' - refseq = 'RefSeqGene record not available' - hgvs_refseq_ac = 'RefSeqGene record not available' # Predicted effect on protein protein_dict = validator.myc_to_p(hgvs_coding, variant.evm, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) else: error = protein_dict['error'] variant.warnings += ': ' + str(error) if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) else: logger.error(error) return True @@ -895,10 +783,6 @@ def transcripts_to_gene(variant, validator): # Look for normalized variant options that do not match hgvs_coding # boundary crossing normalization - # Re-Save the required variants - hgvs_seek_var = copy.deepcopy(hgvs_coding) - saved_hgvs_coding = copy.deepcopy(hgvs_coding) - if ori == -1: # position genomic at its most 5 prime position try: @@ -907,58 +791,42 @@ def transcripts_to_gene(variant, validator): query_genomic = hgvs_genomic # Map to the transcript and test for movement try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) + hgvs_seek_var = hgvs_coding + + if hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: + pass elif suppress_c_normalization == 'true': - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) + pass elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': try: - automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) + automap = fn.valstr(hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) hgvs_coding = hgvs_seek_var - coding = fn.valstr(hgvs_coding) variant.warnings += ': ' + automap rng = variant.hn.normalize(query_genomic) except NotImplementedError: - pass + fn.exceptPass() try: c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) except hgvs.exceptions.HGVSInvalidIntervalError as e: - c_for_p = seek_var + c_for_p = fn.valstr(hgvs_seek_var) try: # Predicted effect on protein protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) else: error = protein_dict['error'] if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] variant.warnings += ': ' + str(error) - # Replace protein description in vars table - protein = str(hgvs_protein) except NotImplementedError: fn.exceptPass() - else: - # Double check protein position by normalize genomic, and normalize back to c. for normalize or not to normalize issue - coding = fn.valstr(hgvs_coding) - - elif ori != -1: + else: # position genomic at its most 3 prime position try: query_genomic = variant.hn.normalize(hgvs_genomic) @@ -966,36 +834,26 @@ def transcripts_to_gene(variant, validator): query_genomic = hgvs_genomic # Map to the transcript and test for movement try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if saved_hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) + hgvs_seek_var = hgvs_coding + + if hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: + pass elif suppress_c_normalization == 'true': - rec_var = 'false' - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) + pass elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': try: - automap = fn.valstr(saved_hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) + automap = fn.valstr(hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) hgvs_coding = hgvs_seek_var - coding = fn.valstr(hgvs_coding) variant.warnings += ': ' + automap except NotImplementedError: fn.exceptPass() else: # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue - coding = fn.valstr(hgvs_coding) rng = variant.reverse_normalizer.normalize(query_genomic) try: # Diagram where - = intron and E = Exon @@ -1017,19 +875,16 @@ def transcripts_to_gene(variant, validator): protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] - protein = str(hgvs_protein) else: error = protein_dict['error'] if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] variant.warnings += ': ' + str(error) # Replace protein description in vars table - protein = str(hgvs_protein) except Exception: fn.exceptPass() # Check for up-to-date transcript version - updated_transcript_variant = 'None' tx_id_info = validator.hdp.get_tx_identity_info(hgvs_coding.ac) uta_gene_symbol = tx_id_info[6] tx_for_gene = validator.hdp.get_tx_for_gene(uta_gene_symbol) @@ -1060,9 +915,7 @@ def transcripts_to_gene(variant, validator): new_ref = match[1] hgvs_updated.posedit.edit.ref = new_ref validator.vr.validate(hgvs_updated) - updated_transcript_variant = hgvs_updated - else: - pass + updated_transcript_variant = hgvs_updated variant.warnings += ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + fn.valstr( @@ -1073,8 +926,4 @@ def transcripts_to_gene(variant, validator): variant.genomic_g = str(hgvs_genomic) variant.protein = str(hgvs_protein) - # if gap_compensation is True: - # variant.test_stash_tx_left = test_stash_tx_left - # variant.test_stash_tx_right = test_stash_tx_right - - return False \ No newline at end of file + return False From 13bed4592ba56f8b805a0d02fe56d8bb7985da79 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 25 Apr 2019 10:59:56 +0100 Subject: [PATCH 066/223] Turned gapped_mapping functions into an object --- VariantValidator/modules/gapped_mapping.py | 4720 ++++++++++---------- VariantValidator/modules/mappers.py | 13 +- 2 files changed, 2288 insertions(+), 2445 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 15a4c15d..d3c23768 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -1,168 +1,194 @@ -import hgvs -import re import copy -from . import vvHGVS +import re + +import hgvs + from . import vvFunctions as fn +from . import vvHGVS from .vvLogging import logger -def gapped_g_to_c(variant, validator, rel_var): - """ - Gap aware projection from g. to c. - """ - - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' - disparity_deletion_in = [] - - # Create a pseudo VCF so that normalization can be applied and a delins can be generated - hgvs_genomic_variant = variant.hgvs_genomic - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - - # VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, - variant.reverse_normalizer, validator.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # take a look at the input genomic variant for potential base salvage - stash_ac = vcf_dict['chr'] - stash_pos = int(vcf_dict['pos']) - stash_ref = vcf_dict['ref'] - stash_alt = vcf_dict['alt'] - stash_end = end - stash_input = str(variant.stashed) - # Re-Analyse genomic positions - if 'NG_' in str(variant.hgvs_formatted): - c = validator.hp.parse_hgvs_variant(rel_var[0]) - if hasattr(c.posedit.edit, 'ref') and c.posedit.edit.ref is not None: - c.posedit.edit.ref = c.posedit.edit.ref.upper() - if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: - c.posedit.edit.alt = c.posedit.edit.alt.upper() - stash_input = validator.myevm_t_to_g(c, variant.no_norm_evm, variant.primary_assembly, variant.hn) - if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', - str( - stash_input)): - try: - hgvs_stash = validator.hp.parse_hgvs_variant(stash_input) - except: - hgvs_stash = stash_input - if hasattr(hgvs_stash.posedit.edit, 'ref') and hgvs_stash.posedit.edit.ref is not None: - hgvs_stash.posedit.edit.ref = hgvs_stash.posedit.edit.ref.upper() - if hasattr(hgvs_stash.posedit.edit, 'alt') and hgvs_stash.posedit.edit.alt is not None: - hgvs_stash.posedit.edit.alt = hgvs_stash.posedit.edit.alt.upper() +class GapMapper(object): - stash_ac = hgvs_stash.ac - # MAKE A NO NORM HGVS2VCF - stash_dict = vvHGVS.pos_lock_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.reverse_normalizer, validator.sf) - stash_ac = hgvs_stash.ac - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - - # Store a not real deletion insertion - stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_genomic_5pr.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - - # Set non-valid caution to false - non_valid_caution = 'false' - - # make an empty rel_var - nw_rel_var = [] - - # loop through rel_var and amend where required - for var in rel_var: - # Store the current hgvs:c. description - saved_hgvs_coding = validator.hp.parse_hgvs_variant(var) - - # Remove un-selected transcripts - if validator.select_transcripts != 'all': - tx_ac = saved_hgvs_coding.ac - # If it's in the selected tx dict, keep it - if tx_ac.split('.')[0] in list(validator.select_transcripts_dict.keys()): - pass - # If not get rid of it! - else: - continue + def __init__(self, variant, validator): + self.variant = variant + self.validator = validator - # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, - alt_aln_method=validator.alt_aln_method) - orientation = int(ori[0]['alt_strand']) - intronic_variant = 'false' + def gapped_g_to_c(self, rel_var): + """ + Gap aware projection from g. to c. + """ - if orientation == -1: - # position genomic at its most 5 prime position + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' + disparity_deletion_in = [] + + # Create a pseudo VCF so that normalization can be applied and a delins can be generated + hgvs_genomic_variant = self.variant.hgvs_genomic + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic_variant) + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + + # VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # take a look at the input genomic variant for potential base salvage + stash_ac = vcf_dict['chr'] + stash_pos = int(vcf_dict['pos']) + stash_ref = vcf_dict['ref'] + stash_alt = vcf_dict['alt'] + stash_end = end + stash_input = str(self.variant.stashed) + # Re-Analyse genomic positions + if 'NG_' in str(self.variant.hgvs_formatted): + c = self.validator.hp.parse_hgvs_variant(rel_var[0]) + if hasattr(c.posedit.edit, 'ref') and c.posedit.edit.ref is not None: + c.posedit.edit.ref = c.posedit.edit.ref.upper() + if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: + c.posedit.edit.alt = c.posedit.edit.alt.upper() + stash_input = self.validator.myevm_t_to_g(c, self.variant.no_norm_evm, self.variant.primary_assembly, self.variant.hn) + if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', + str( + stash_input)): try: - query_genomic = variant.reverse_normalizer.normalize(variant.hgvs_genomic) + hgvs_stash = self.validator.hp.parse_hgvs_variant(stash_input) except: - query_genomic = variant.hgvs_genomic - # Map to the transcript ant test for movement + hgvs_stash = stash_input + if hasattr(hgvs_stash.posedit.edit, 'ref') and hgvs_stash.posedit.edit.ref is not None: + hgvs_stash.posedit.edit.ref = hgvs_stash.posedit.edit.ref.upper() + if hasattr(hgvs_stash.posedit.edit, 'alt') and hgvs_stash.posedit.edit.alt is not None: + hgvs_stash.posedit.edit.alt = hgvs_stash.posedit.edit.alt.upper() + + stash_ac = hgvs_stash.ac + # MAKE A NO NORM HGVS2VCF + stash_dict = vvHGVS.pos_lock_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.reverse_normalizer, + self.validator.sf) + stash_ac = hgvs_stash.ac + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + + # Store a not real deletion insertion + stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + stash_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_genomic_5pr.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + + # Set non-valid caution to false + non_valid_caution = 'false' + + # make an empty rel_var + nw_rel_var = [] + + # loop through rel_var and amend where required + for var in rel_var: + # Store the current hgvs:c. description + saved_hgvs_coding = self.validator.hp.parse_hgvs_variant(var) + + # Remove un-selected transcripts + if self.validator.select_transcripts != 'all': + tx_ac = saved_hgvs_coding.ac + # If it's in the selected tx dict, keep it + if tx_ac.split('.')[0] in list(self.validator.select_transcripts_dict.keys()): + pass + # If not get rid of it! + else: + continue + + # Get orientation of the gene wrt genome and a list of exons mapped to the genome + ori = self.validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + alt_aln_method=self.validator.alt_aln_method) + orientation = int(ori[0]['alt_strand']) + intronic_variant = 'false' + + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = self.variant.reverse_normalizer.normalize(self.variant.hgvs_genomic) + except: + query_genomic = self.variant.hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = self.variant.hn.normalize(self.variant.hgvs_genomic) + except: + query_genomic = self.variant.hgvs_genomic + # Map to the transcript and test for movement try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) except hgvs.exceptions.HGVSError as e: hgvs_seek_var = saved_hgvs_coding else: seek_var = fn.valstr(hgvs_seek_var) seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass + else: + hgvs_seek_var = saved_hgvs_coding - elif orientation != -1: - # position genomic at its most 3 prime position try: - query_genomic = variant.hn.normalize(variant.hgvs_genomic) - except: - query_genomic = variant.hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding - - try: - intron_test = variant.hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( + intron_test = self.variant.hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', error) or re.match( 'Unsupported normalization of variants spanning the exon-intron boundary', error): - intronic_variant = 'hard_fail' - else: + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'hard_fail': + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', + str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -175,11 +201,9 @@ def gapped_g_to_c(variant, validator, rel_var): else: intronic_variant = 'true' - if intronic_variant != 'hard_fail': if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', - str(hgvs_seek_var.posedit.pos)): + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -192,1383 +216,1964 @@ def gapped_g_to_c(variant, validator, rel_var): else: intronic_variant = 'true' - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' + # If exonic, process + if intronic_variant != 'true': + # map form reverse normalized g. to c. + hgvs_from_5n_g = self.variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) - # If exonic, process - if intronic_variant != 'true': - # map form reverse normalized g. to c. - hgvs_from_5n_g = variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + disparity_deletion_in = ['false', 'false'] + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, hgvs_genomic_5pr, + saved_hgvs_coding) - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] + try: + tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError: + tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_genomic_5pr, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + tx_hgvs_not_delins = saved_hgvs_coding + + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +ve base and adjust + if (re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) or re.search( + r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end))): + rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) + + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) + + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 else: pass - else: - pass + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + hgvs_stash_t = self.validator.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) + if len(stash_hgvs_not_delins.posedit.edit.ref) > len( + hgvs_stash_t.posedit.edit.ref): + try: + self.variant.hn.normalize(hgvs_stash_t) + except: + fn.exceptPass() + else: + gap_length = len(stash_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_stash_t.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + try: + tx_hgvs_not_delins = self.validator.vm.c_to_n(hgvs_stash_t) + except: + tx_hgvs_not_delins = hgvs_stash_t + hgvs_not_delins = stash_hgvs_not_delins + elif hgvs_stash_t.posedit.pos.start.offset != 0 or hgvs_stash_t.posedit.pos.end.offset != 0: + disparity_deletion_in = ['transcript', 'Requires Analysis'] + try: + tx_hgvs_not_delins = self.validator.vm.c_to_n(hgvs_stash_t) + except: + tx_hgvs_not_delins = hgvs_stash_t + hgvs_not_delins = stash_hgvs_not_delins + hgvs_genomic_5pr = stash_hgvs_not_delins + else: + pass + + # Final sanity checks try: - tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_genomic_5pr, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError: + self.validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = saved_hgvs_coding - - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +ve base and adjust - if (re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end))): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass + hgvs_not_delins = saved_hgvs_coding + disparity_deletion_in = ['false', 'false'] + logger.warning(str(e)) + try: + self.variant.hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_not_delins = saved_hgvs_coding + disparity_deletion_in = ['false', 'false'] + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + logger.warning(error) + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) - # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - hgvs_stash_t = validator.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) - if len(stash_hgvs_not_delins.posedit.edit.ref) > len( - hgvs_stash_t.posedit.edit.ref): try: - variant.hn.normalize(hgvs_stash_t) + tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) except: fn.exceptPass() + genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base else: - gap_length = len(stash_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_stash_t.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. try: - tx_hgvs_not_delins = validator.vm.c_to_n(hgvs_stash_t) + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) except: - tx_hgvs_not_delins = hgvs_stash_t - hgvs_not_delins = stash_hgvs_not_delins - elif hgvs_stash_t.posedit.pos.start.offset != 0 or hgvs_stash_t.posedit.pos.end.offset != 0: - disparity_deletion_in = ['transcript', 'Requires Analysis'] - try: - tx_hgvs_not_delins = validator.vm.c_to_n(hgvs_stash_t) - except: - tx_hgvs_not_delins = hgvs_stash_t - hgvs_not_delins = stash_hgvs_not_delins - hgvs_genomic_5pr = stash_hgvs_not_delins + c1 = tx_hgvs_not_delins + g1 = self.validator.nr_vm.t_to_g(c1, self.variant.hgvs_genomic.ac) + g3 = self.validator.nr_vm.t_to_g(c1, self.variant.hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) + ng2 = self.variant.hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = self.validator.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + g2 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) + c2 = self.validator.vm.g_to_t(g2, c2.ac) + reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] + alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] + c3 = copy.deepcopy(c1) + c3.posedit.pos.end = c2.posedit.pos.end + c3.posedit.edit.ref = '' # reference + c3.posedit.edit.alt = alternate + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + # Set warning variables + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + hgvs_refreshed_variant = tx_hgvs_not_delins + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + self.variant.hgvs_genomic.ac) + '\n' + gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' + else: + # Try the push + hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) + stash_ac = hgvs_stash.ac + # Make a hard left and hard right not delins g. + stash_dict_right = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, + self.validator.sf) + stash_pos_right = int(stash_dict_right['pos']) + stash_ref_right = stash_dict_right['ref'] + stash_alt_right = stash_dict_right['alt'] + stash_end_right = str(stash_pos_right + len(stash_ref_right) - 1) + stash_hgvs_not_delins_right = self.validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) + stash_dict_left = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) + stash_pos_left = int(stash_dict_left['pos']) + stash_ref_left = stash_dict_left['ref'] + stash_alt_left = stash_dict_left['alt'] + stash_end_left = str(stash_pos_left + len(stash_ref_left) - 1) + stash_hgvs_not_delins_left = self.validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos_left) + '_' + stash_end_left + 'del' + stash_ref_left + 'ins' + stash_alt_left) + # Map in-situ to the transcript left and right + try: + tx_hard_right = self.validator.vm.g_to_t(stash_hgvs_not_delins_right, saved_hgvs_coding.ac) + except Exception as e: + tx_hard_right = saved_hgvs_coding else: - pass + normalize_stash_right = self.variant.hn.normalize(stash_hgvs_not_delins_right) + if str(normalize_stash_right.posedit) == str(stash_hgvs_not_delins.posedit): + tx_hard_right = saved_hgvs_coding + try: + tx_hard_left = self.validator.vm.g_to_t(stash_hgvs_not_delins_left, saved_hgvs_coding.ac) + except Exception as e: + tx_hard_left = saved_hgvs_coding + else: + normalize_stash_left = self.variant.hn.normalize(stash_hgvs_not_delins_left) + if str(normalize_stash_left.posedit) == str(stash_hgvs_not_delins.posedit): + tx_hard_left = saved_hgvs_coding + # The Logic - Currently limited to genome gaps + if len(stash_hgvs_not_delins_right.posedit.edit.ref) < len( + tx_hard_right.posedit.edit.ref): + tx_hard_right = self.variant.hn.normalize(tx_hard_right) + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + hgvs_refreshed_variant = tx_hard_right + gapped_transcripts = gapped_transcripts + str(tx_hard_right.ac) + ' ' + elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len( + tx_hard_left.posedit.edit.ref): + tx_hard_left = self.variant.hn.normalize(tx_hard_left) + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + hgvs_refreshed_variant = tx_hard_left + gapped_transcripts = gapped_transcripts + str(tx_hard_left.ac) + ' ' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = saved_hgvs_coding - # Final sanity checks - try: - validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - hgvs_not_delins = saved_hgvs_coding - disparity_deletion_in = ['false', 'false'] - logger.warning(str(e)) - try: - variant.hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_not_delins = saved_hgvs_coding - disparity_deletion_in = ['false', 'false'] - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - logger.warning(error) - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly - - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = validator.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = self.variant.evm.n_to_c(hgvs_refreshed_variant) + else: + pass + try: + hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + hgvs_refreshed_variant.posedit.edit.alt[-1]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 0:-1] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 0:-1] + hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[0] == \ + hgvs_refreshed_variant.posedit.edit.alt[0]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 1:] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 1:] + hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding else: - # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer + pass + fn.exceptPass() + # Send to empty nw_rel_var + nw_rel_var.append(hgvs_refreshed_variant) - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' + # Otherwise these variants need to be set + else: + corrective_action_taken = '' + gapped_alignment_warning = '' + # Send to empty nw_rel_var + nw_rel_var.append(saved_hgvs_coding) + + data = { + 'gapped_alignment_warning': gapped_alignment_warning, + 'corrective_action_taken': corrective_action_taken, + 'auto_info': auto_info, + 'disparity_deletion_in': disparity_deletion_in, + 'gapped_transcripts': gapped_transcripts + } + return data, nw_rel_var + + + def g_to_t_compensation(self, ori, hgvs_coding, rec_var): + orientation = int(ori[0]['alt_strand']) + hgvs_genomic_possibilities = [] + hgvs_genomic = self.validator.myevm_t_to_g(hgvs_coding, self.variant.no_norm_evm, self.variant.primary_assembly, self.variant.hn) - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') + logger.warning('g_to_t gap code 1 active') + rn_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) + hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if orientation != -1: + try: + chromosome_normalized_hgvs_coding = self.variant.reverse_normalizer.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + chromosome_normalized_hgvs_coding = hgvs_coding + else: + try: + chromosome_normalized_hgvs_coding = self.variant.hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + chromosome_normalized_hgvs_coding = hgvs_coding - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) + most_3pr_hgvs_genomic = self.validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, + self.variant.no_norm_evm, self.variant.hn) + hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' + # Push from side to side to try pick up odd placements + # MAKE A NO NORM HGVS2VCF + # First to the right + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = self.variant.no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, self.validator.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = self.variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + fn.exceptPass() - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) + test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) + stash_genomic = self.validator.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, self.variant.no_norm_evm, self.variant.hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_right.posedit.edit.ref) == ( + (stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + stash_tx_right = test_stash_tx_right + if hasattr(test_stash_tx_right.posedit.edit, + 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + alt = test_stash_tx_right.posedit.edit.alt else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = validator.nr_vm.t_to_g(c1, variant.hgvs_genomic.ac) - g3 = validator.nr_vm.t_to_g(c1, variant.hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) - ng2 = variant.hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = validator.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, variant.hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - g2 = validator.vm.t_to_g(c2, variant.hgvs_genomic.ac) - c2 = validator.vm.g_to_t(g2, c2.ac) - reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] - alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] - c3 = copy.deepcopy(c1) - c3.posedit.pos.end = c2.posedit.pos.end - c3.posedit.edit.ref = '' # reference - c3.posedit.edit.alt = alternate - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - variant.hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' - else: - # Try the push - hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) - stash_ac = hgvs_stash.ac - # Make a hard left and hard right not delins g. - stash_dict_right = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.hn, validator.sf) - stash_pos_right = int(stash_dict_right['pos']) - stash_ref_right = stash_dict_right['ref'] - stash_alt_right = stash_dict_right['alt'] - stash_end_right = str(stash_pos_right + len(stash_ref_right) - 1) - stash_hgvs_not_delins_right = validator.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) - stash_dict_left = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, variant.primary_assembly, - variant.reverse_normalizer, validator.sf) - stash_pos_left = int(stash_dict_left['pos']) - stash_ref_left = stash_dict_left['ref'] - stash_alt_left = stash_dict_left['alt'] - stash_end_left = str(stash_pos_left + len(stash_ref_left) - 1) - stash_hgvs_not_delins_left = validator.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos_left) + '_' + stash_end_left + 'del' + stash_ref_left + 'ins' + stash_alt_left) - # Map in-situ to the transcript left and right - try: - tx_hard_right = validator.vm.g_to_t(stash_hgvs_not_delins_right, saved_hgvs_coding.ac) - except Exception as e: - tx_hard_right = saved_hgvs_coding + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt else: - normalize_stash_right = variant.hn.normalize(stash_hgvs_not_delins_right) - if str(normalize_stash_right.posedit) == str(stash_hgvs_not_delins.posedit): - tx_hard_right = saved_hgvs_coding - try: - tx_hard_left = validator.vm.g_to_t(stash_hgvs_not_delins_left, saved_hgvs_coding.ac) - except Exception as e: - tx_hard_left = saved_hgvs_coding + g_alt = '' + if (len(alt) - ( + test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) else: - normalize_stash_left = variant.hn.normalize(stash_hgvs_not_delins_left) - if str(normalize_stash_left.posedit) == str(stash_hgvs_not_delins.posedit): - tx_hard_left = saved_hgvs_coding - # The Logic - Currently limited to genome gaps - if len(stash_hgvs_not_delins_right.posedit.edit.ref) < len( - tx_hard_right.posedit.edit.ref): - tx_hard_right = variant.hn.normalize(tx_hard_right) - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly - hgvs_refreshed_variant = tx_hard_right - gapped_transcripts = gapped_transcripts + str(tx_hard_right.ac) + ' ' - elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len( - tx_hard_left.posedit.edit.ref): - tx_hard_left = variant.hn.normalize(tx_hard_left) - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly - hgvs_refreshed_variant = tx_hard_left - gapped_transcripts = gapped_transcripts + str(tx_hard_left.ac) + ' ' + hgvs_genomic_possibilities.append('') + elif test_stash_tx_right.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_right).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + hgvs_reform_ident = self.validator.hp.parse_hgvs_variant(reform_ident) + try: + self.variant.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append('') else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = saved_hgvs_coding - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = variant.evm.n_to_c(hgvs_refreshed_variant) + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) else: - pass - try: - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding + try: + self.variant.hn.normalize(test_stash_tx_right) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') else: - pass - fn.exceptPass() - # Send to empty nw_rel_var - nw_rel_var.append(hgvs_refreshed_variant) + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + # Intronic positions not supported. Will cause a Value Error + except ValueError: + fn.exceptPass() - # Otherwise these variants need to be set - else: - corrective_action_taken = '' - gapped_alignment_warning = '' - # Send to empty nw_rel_var - nw_rel_var.append(saved_hgvs_coding) - - data = { - 'gapped_alignment_warning': gapped_alignment_warning, - 'corrective_action_taken': corrective_action_taken, - 'auto_info': auto_info, - 'disparity_deletion_in': disparity_deletion_in, - 'gapped_transcripts': gapped_transcripts - } - return data, nw_rel_var - - -def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): - orientation = int(ori[0]['alt_strand']) - hgvs_genomic_possibilities = [] - hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn) - - logger.warning('g_to_t gap code 1 active') - rn_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: - try: - chromosome_normalized_hgvs_coding = variant.reverse_normalizer.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: - chromosome_normalized_hgvs_coding = variant.hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - - most_3pr_hgvs_genomic = validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, - variant.no_norm_evm, variant.hn) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - - # Push from side to side to try pick up odd placements - # MAKE A NO NORM HGVS2VCF - # First to the right - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.hn, validator.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + # Then to the left + hgvs_stash = copy.deepcopy(hgvs_coding) try: - stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) + hgvs_stash = self.variant.no_norm_evm.c_to_n(hgvs_stash) except: fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) - stash_genomic = validator.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_right.posedit.edit.ref) == ( - (stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - stash_tx_right = test_stash_tx_right - if hasattr(test_stash_tx_right.posedit.edit, - 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - alt = test_stash_tx_right.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_right.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_right).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.reverse_normalizer, + self.validator.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: - variant.hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_right = test_stash_tx_right + stash_hgvs_not_delins = self.variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + fn.exceptPass() + # Store a tx copy for later use + test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) + stash_genomic = self.validator.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, self.variant.no_norm_evm, self.variant.hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_left.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + stash_tx_left = test_stash_tx_left + if hasattr(test_stash_tx_left.posedit.edit, + 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + alt = test_stash_tx_left.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + + if (len(alt) - ( + test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: hgvs_genomic_possibilities.append('') + elif test_stash_tx_left.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_left).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + hgvs_reform_ident = self.validator.hp.parse_hgvs_variant(reform_ident) + try: + self.variant.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - variant.hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - fn.exceptPass() - # Intronic positions not supported. Will cause a Value Error - except ValueError: - test_stash_tx_right = copy.deepcopy(hgvs_coding) - fn.exceptPass() - - # Then to the left - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.reverse_normalizer, - validator.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: + try: + self.variant.hn.normalize(test_stash_tx_left) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + except ValueError: fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) - stash_genomic = validator.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, variant.no_norm_evm, variant.hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left - if hasattr(test_stash_tx_left.posedit.edit, - 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - alt = test_stash_tx_left.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_left.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_left).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) - try: - variant.hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - variant.hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - fn.exceptPass() - except ValueError: - test_stash_tx_left = copy.deepcopy(hgvs_coding) - fn.exceptPass() - - # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = variant.reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = validator.vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = validator.vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: - genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( + # direct mapping from reverse_normalized transcript insertions in the delins format + try: + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = self.variant.reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = self.validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = self.validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the variant ref and alt + pr3_ref = self.validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = self.validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[0] + \ + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[0] + \ + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = self.validator.vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = self.validator.vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + # Normalize - If the variant spans a gap it should then form a static genomic variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = self.variant.hn.normalize( genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_3pr_hgvs_transcript_variant = self.variant.hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + try: + genomic_from_most_5pr_hgvs_transcript_variant = self.variant.hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_5pr_hgvs_transcript_variant = self.variant.hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) + + if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) + if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) + + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + fn.exceptPass() + + logger.info('\nGENOMIC POSSIBILITIES') + for possibility in hgvs_genomic_possibilities: + if possibility == '': + logger.info('X') + else: + logger.info(fn.valstr(possibility)) + + logger.info('\n') + + # Set variables for problem specific warnings + gapped_transcripts = '' + auto_info = '' + + # Mark as not disparity detected + disparity_deletion_in = ['false', 'false'] + + # Loop through to see if a gap can be located + # Set the variables required for corrective normalization + possibility_counter = 0 + suppress_c_normalization = 'false' # Applies to boundary crossing normalization + + # Copy a version of hgvs_genomic_possibilities + for possibility in hgvs_genomic_possibilities: + possibility_counter = possibility_counter + 1 + + # Loop out stash possibilities which will not spot gaps so are empty + if possibility == '': + continue + + # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + hgvs_genomic_variant = copy.deepcopy(possibility) + + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref try: - genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: + reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic_variant) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) + if 'base start position must be <= end position' in error: + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) + if 'insertion length must be 1' in error: + if hgvs_genomic.posedit.edit.type == 'ins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), start, end) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + + # Create VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # Store a not real deletion insertion to test for gapping + stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + + # Detect intronic variation using normalization + intronic_variant = 'false' + + # Save a copy of current hgvs_coding try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + saved_hgvs_coding = self.variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + continue + else: + saved_hgvs_coding = self.variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + hgvs_coding.ac) - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) + # Look for normalized variant options that do not match hgvs_coding + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = self.variant.hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) - if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) - - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass + intron_test = self.variant.hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if 'Normalization of intronic variants is not supported' in error or \ + 'Unsupported normalization of variants spanning the exon-intron boundary' in error: + if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: + intronic_variant = 'hard_fail' + else: + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' - logger.info('\nGENOMIC POSSIBILITIES') - for possibility in hgvs_genomic_possibilities: - if possibility == '': - logger.info('X') - else: - logger.info(fn.valstr(possibility)) + if intronic_variant != 'hard_fail': + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', + str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'true': + # Flag RefSeqGene for ammendment + # amend_RefSeqGene = 'false' + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, hgvs_genomic_5pr, + saved_hgvs_coding) + + try: + tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSInvalidIntervalError: + tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + saved_hgvs_coding.ac) + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + + # Check for +1 base and adjust + if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) + + + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + + # tx_hgvs_not_delins = rn_tx_hgvs_not_delins + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) + + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + pass + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for internal_possibility in hgvs_genomic_possibilities: + if internal_possibility == '': + continue - logger.info('\n') + hgvs_t_possibility = self.validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = self.validator.vm.c_to_n(hgvs_t_possibility) + except: + fn.exceptPass() + ins_ref = self.validator.sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = self.validator.vm.n_to_c(hgvs_t_possibility) + except: + fn.exceptPass() + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if internal_possibility.posedit.edit.type == 'ins': + ins_ref = self.validator.sf.fetch_seq(internal_possibility.ac, + internal_possibility.posedit.pos.start.base - 1, + internal_possibility.posedit.pos.end.base) + internal_possibility.posedit.edit.ref = ins_ref + internal_possibility.posedit.edit.alt = ins_ref[ + 0] + internal_possibility.posedit.edit.alt + \ + ins_ref[1] + + if len(hgvs_t_possibility.posedit.edit.ref) < len( + internal_possibility.posedit.edit.ref): + gap_length = len(internal_possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, + hgvs_t_possibility] + hgvs_not_delins = internal_possibility + hgvs_genomic_5pr = internal_possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # 'At hgvs_genomic' + # Final sanity checks + try: + self.validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + continue + try: + self.variant.hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + continue + elif re.match('Normalization of intronic variants is not supported', error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # amend_RefSeqGene = 'false' + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Find oddly placed gaps where the tx variant is encompassed in the gap + if disparity_deletion_in[0] == 'false' and ( + possibility_counter == 3 or possibility_counter == 4): + rg = self.variant.reverse_normalizer.normalize(hgvs_not_delins) + rtx = self.validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) + fg = self.variant.hn.normalize(hgvs_not_delins) + ftx = self.validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) + if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + exons = self.validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, self.validator.alt_aln_method) + exonic = False + for ex_test in exons: + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): + exonic = True + if exonic is True: + hgvs_not_delins = fg + hgvs_genomic = fg + hgvs_genomic_5pr = fg + try: + tx_hgvs_not_delins = self.validator.vm.c_to_n(ftx) + except Exception: + tx_hgvs_not_delins = ftx + disparity_deletion_in = ['transcript', 'Requires Analysis'] - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup) + + # GAP IN THE TRANSCRIPT DISPARITY DETECTED + if disparity_deletion_in[0] == 'transcript': + # Suppress intron boundary crossing due to non-intron intron based c. seq annotations + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): - # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # Loop through to see if a gap can be located - # Set the variables required for corrective normalization - possibility_counter = 0 - suppress_c_normalization = 'false' # Applies to boundary crossing normalization + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' - # Copy a version of hgvs_genomic_possibilities - for possibility in hgvs_genomic_possibilities: - possibility_counter = possibility_counter + 1 + try: + tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - # Loop out stash possibilities which will not spot gaps so are empty - if possibility == '': - continue + try: + c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) - # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - hgvs_genomic_variant = copy.deepcopy(possibility) - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, + 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = self.variant.hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = self.validator.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( + tx_hgvs_not_delins.posedit.edit.ref) - 1 + hgvs_refreshed_variant = tx_hgvs_not_delins + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + suppress_c_normalization = 'true' + # amend_RefSeqGene = 'true' + if possibility_counter == 3: + hgvs_refreshed_variant = stash_tx_right + elif possibility_counter == 4: + hgvs_refreshed_variant = stash_tx_left + else: + hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = hgvs_coding + # amend_RefSeqGene = 'false' + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = self.variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) + else: + pass + + try: + self.variant.hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + logger.warning(error) + continue + + # Quick check to make sure the coding variant has not changed + try: + to_test = self.variant.hn.normalize(hgvs_refreshed_variant) + except: + to_test = hgvs_refreshed_variant + if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # Try the next available genomic option + if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + hgvs_coding = to_test + else: + continue + # Update hgvs_genomic + hgvs_genomic = self.validator.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, + self.variant.no_norm_evm, self.variant.hn) + if hgvs_genomic.posedit.edit.type == 'identity': + re_c = self.validator.vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) + if (self.variant.hn.normalize(re_c)) != (self.variant.hn.normalize(hgvs_refreshed_variant)): + shuffle_left_g = copy.copy(hgvs_genomic) + shuffle_left_g.posedit.edit.ref = '' + shuffle_left_g.posedit.edit.alt = '' + shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + shuffle_left_g = self.variant.reverse_normalizer.normalize(shuffle_left_g) + re_c = self.validator.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + if (self.variant.hn.normalize(re_c)) != (self.variant.hn.normalize(hgvs_refreshed_variant)): + hgvs_genomic = shuffle_left_g + + # If it is intronic, these vairables will not have been set + else: + # amend_RefSeqGene = 'false' + no_normalized_c = 'false' + + # Break if gap has been detected + if disparity_deletion_in[0] != 'false': + break + + # Warn user about gapping + if auto_info != '': + info_lines = auto_info.split('\n') + info_keys = {} + for information in info_lines: + info_keys[information] = '' + info_out = [] + info_out.append( + 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + self.variant.primary_assembly) + for ky in list(info_keys.keys()): + info_out.append(ky) + auto_info = '\n'.join(info_out) + auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' + auto_info = str(auto_info.replace('\n', ': ')) + self.variant.warnings += ': ' + str(auto_info) + logger.warning(str(auto_info)) + # Normailse hgvs_genomic try: - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) + hgvs_genomic = self.variant.hn.normalize(hgvs_genomic) except hgvs.exceptions.HGVSError as e: # Strange error caused by gap in genomic error = str(e) - if re.search('base start position must be <= end position', error): + + if re.search('base start position must be <= end position', error) and \ + disparity_deletion_in[0] == 'chromosome': if hgvs_genomic.posedit.edit.type == 'delins': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + lhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) hgvs_genomic.posedit.edit.ref = lhb + rhb hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb hgvs_genomic.posedit.pos.start.base = end hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + hgvs_genomic = self.variant.hn.normalize(hgvs_genomic) if hgvs_genomic.posedit.edit.type == 'del': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + lhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) hgvs_genomic.posedit.edit.ref = lhb + rhb hgvs_genomic.posedit.edit.alt = lhb + rhb hgvs_genomic.posedit.pos.start.base = end hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - if re.search('insertion length must be 1', error): - if hgvs_genomic.posedit.edit.type == 'ins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start, end) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + hgvs_genomic = self.variant.hn.normalize(hgvs_genomic) + genomic = fn.valstr(hgvs_genomic) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + print('in gapped_mapping', hgvs_coding) + + return hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding, hgvs_genomic_possibilities + + def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_transcripts, + hgvs_genomic_possibilities, auto_info): + logger.warning('g_to_t gap code 2 active') - # Create VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, - variant.reverse_normalizer, validator.sf) + hgvs_genomic_variant = hgvs_genomic + reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic_variant) + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) chr = vcf_dict['chr'] pos = vcf_dict['pos'] ref = vcf_dict['ref'] alt = vcf_dict['alt'] - # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + # Create a VCF call + vcf_component_list = [str(chr), str(pos), str(ref), (alt)] + vcf_genomic = '-'.join(vcf_component_list) + # DO NOT DELETE # Generate an end position end = str(int(pos) + len(ref) - 1) pos = str(pos) - - # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( + stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] - - # Detect intronic variation using normalization - intronic_variant = 'false' - - # Save a copy of current hgvs_coding - try: - saved_hgvs_coding = variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - saved_hgvs_coding = hgvs_coding - intronic_variant = 'true' - continue - else: - saved_hgvs_coding = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - hgvs_coding.ac) - - # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = variant.hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) + orientation = int(ori[0]['alt_strand']) + saved_hgvs_coding = copy.deepcopy(hgvs_coding) + + # is it in an exon? + is_it_in_an_exon = 'no' + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + # Take from stored copy + # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - try: - intron_test = variant.hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'hard_fail': - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', - str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + is_it_in_an_exon = 'yes' + if is_it_in_an_exon == 'yes': + # map form reverse normalized g. to c. + hgvs_from_5n_g = self.variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) - if intronic_variant != 'true': - # Flag RefSeqGene for ammendment - # amend_RefSeqGene = 'false' # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + disparity_deletion_in = ['false', 'false'] if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - pass - else: - pass + hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, hgvs_genomic_5pr, saved_hgvs_coding) + + hard_fail = 'false' try: - tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - saved_hgvs_coding.ac) + tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + except Exception as e: + if str(e) == 'start or end or both are beyond the bounds of transcript record': + tx_hgvs_not_delins = hgvs_coding + hard_fail = 'true' + # Create normalized version of tx_hgvs_not_delins rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - - # Check for +1 base and adjust - if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - pass + # Check for +ve base and adjust + if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): # move tx end base to next available non-offset base @@ -1576,65 +2181,56 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 rn_tx_hgvs_not_delins.posedit.edit.ref = '' if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) - - # tx_hgvs_not_delins = rn_tx_hgvs_not_delins elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + rn_tx_hgvs_not_delins.posedit.edit.ref = '' # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass + # else: + # pass # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - pass + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 # Delete the ref rn_tx_hgvs_not_delins.posedit.edit.ref = '' # Add the additional base to the ALT start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): # move tx start base to previous available non-offset base @@ -1642,13 +2238,13 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 rn_tx_hgvs_not_delins.posedit.edit.ref = '' if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: test_tx_var = rn_tx_hgvs_not_delins # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 else: @@ -1668,20 +2264,21 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): else: re_capture_tx_variant = [] for internal_possibility in hgvs_genomic_possibilities: + if internal_possibility == '': continue - hgvs_t_possibility = validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) + hgvs_t_possibility = self.validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) if hgvs_t_possibility.posedit.edit.type == 'ins': try: - hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) + hgvs_t_possibility = self.validator.vm.c_to_n(hgvs_t_possibility) except: fn.exceptPass() - ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, + ins_ref = self.validator.sf.fetch_seq(hgvs_t_possibility.ac, hgvs_t_possibility.posedit.pos.start.base - 1, hgvs_t_possibility.posedit.pos.start.base + 1) try: - hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) + hgvs_t_possibility = self.validator.vm.n_to_c(hgvs_t_possibility) except: fn.exceptPass() hgvs_t_possibility.posedit.edit.ref = ins_ref @@ -1689,7 +2286,7 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): 0] + hgvs_t_possibility.posedit.edit.alt + \ ins_ref[1] if internal_possibility.posedit.edit.type == 'ins': - ins_ref = validator.sf.fetch_seq(internal_possibility.ac, + ins_ref = self.validator.sf.fetch_seq(internal_possibility.ac, internal_possibility.posedit.pos.start.base - 1, internal_possibility.posedit.pos.end.base) internal_possibility.posedit.edit.ref = ins_ref @@ -1701,30 +2298,29 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): internal_possibility.posedit.edit.ref): gap_length = len(internal_possibility.posedit.edit.ref) - len( hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] + re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] hgvs_not_delins = internal_possibility hgvs_genomic_5pr = internal_possibility break if re_capture_tx_variant != []: try: - tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) + tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) except: tx_hgvs_not_delins = re_capture_tx_variant[2] disparity_deletion_in = re_capture_tx_variant[0:-1] else: pass - # 'At hgvs_genomic' # Final sanity checks try: - validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + self.validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': - continue + logger.warning(str(e)) + return True try: - variant.hn.normalize(tx_hgvs_not_delins) + self.variant.hn.normalize(tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if re.match('Normalization of intronic variants is not supported', @@ -1734,41 +2330,19 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): if re.match( 'Unsupported normalization of variants spanning the exon-intron boundary', error): - continue + logger.warning(error) + return True elif re.match('Normalization of intronic variants is not supported', error): # We know that this cannot be because of an intronic variant, so must be aligned to tx gap disparity_deletion_in = ['transcript', 'Requires Analysis'] - # amend_RefSeqGene = 'false' + if hard_fail == 'true': + disparity_deletion_in = ['false', 'false'] + # Recreate hgvs_genomic if disparity_deletion_in[0] == 'transcript': hgvs_genomic = hgvs_not_delins - # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): - rg = variant.reverse_normalizer.normalize(hgvs_not_delins) - rtx = validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) - fg = variant.hn.normalize(hgvs_not_delins) - ftx = validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) - if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, validator.alt_aln_method) - exonic = False - for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): - exonic = True - if exonic is True: - hgvs_not_delins = fg - hgvs_genomic = fg - hgvs_genomic_5pr = fg - try: - tx_hgvs_not_delins = validator.vm.c_to_n(ftx) - except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] - # Pre-processing of tx_hgvs_not_delins try: if tx_hgvs_not_delins.posedit.edit.alt is None: @@ -1778,24 +2352,21 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( tx_hgvs_not_delins.posedit.pos.start) + '_' + str( tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = validator.hp.parse_hgvs_variant( - tx_hgvs_not_delins_delins_from_dup) + tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED if disparity_deletion_in[0] == 'transcript': - # Suppress intron boundary crossing due to non-intron intron based c. seq annotations - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end))): gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) # Copy the current variant @@ -1808,10 +2379,10 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( tx_gap_fill_variant.posedit.pos.start) + '_' + str( tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = validator.hp.parse_hgvs_variant( + tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( tx_gap_fill_variant_delins_from_dup) - # Identify which half of the NOT-intron the start position of the variant is in + # Identify which half of the NOT-intron the start position of the variant is in if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') @@ -1826,18 +2397,18 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): tx_gap_fill_variant.posedit.edit.ref = '' try: - tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) + tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) except: fn.exceptPass() - genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, + genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, reverse_normalized_hgvs_genomic.ac) genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref try: - c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) + c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) except Exception: c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, + genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, hgvs_genomic_5pr.ac) # Ensure an ALT exists @@ -1849,17 +2420,17 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( genomic_gap_fill_variant_delins_from_dup) genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( genomic_gap_fill_variant_alt_delins_from_dup) # Correct insertion alts if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, genomic_gap_fill_variant_alt.posedit.pos.end.base) genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ @@ -1872,8 +2443,7 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) else: # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) + pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) alternate_bases = [] for base in pre_alternate_bases: alternate_bases.append('X') @@ -1894,8 +2464,7 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): # Note, all variants will be forced into the format delete insert # Deleted bases in the ALT will be substituted for X for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, - 1): + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): if integer == alt_start: alt_base_dict[integer] = str(''.join(alternate_bases)) else: @@ -1914,7 +2483,7 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): # Add the new alt to the gap fill variant and generate transcript variant genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, + hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, tx_gap_fill_variant.ac) # Set warning @@ -1928,7 +2497,7 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base @@ -1944,31 +2513,30 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): # the transcript variant but do not have a position which actually hits the gap, # so the variant likely spans the gap, and is not picked up by an offset. try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins - g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = variant.hn.normalize(g2) + g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = self.variant.hn.normalize(g2) g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( len(g3.posedit.edit.ref) - 1) try: - c2 = validator.vm.g_to_t(g3, c1.ac) + c2 = self.validator.vm.g_to_t(g3, c1.ac) if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: pass else: tx_hgvs_not_delins = c2 try: - tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) + tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) except hgvs.exceptions.HGVSError: fn.exceptPass() except hgvs.exceptions.HGVSInvalidVariantError: fn.exceptPass() - if re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( disparity_deletion_in[ @@ -1976,7 +2544,7 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) except: c2 = tx_hgvs_not_delins c1 = copy.deepcopy(c2) @@ -1986,12 +2554,12 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): c1.posedit.edit.ref = '' c1.posedit.edit.alt = '' if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -1999,31 +2567,28 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) + c3 = self.validator.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.start.base gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( disparity_deletion_in[1]) + '-bp gap in transcript ' + str( tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins c2 = copy.deepcopy(c1) @@ -2033,12 +2598,12 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): c2.posedit.edit.ref = '' c2.posedit.edit.alt = '' if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -2046,16 +2611,15 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) + c3 = self.validator.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.end.base gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) elif re.search(r'\-', @@ -2068,7 +2632,7 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) except: c2 = tx_hgvs_not_delins c1 = copy.deepcopy(c2) @@ -2078,12 +2642,12 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): c1.posedit.edit.ref = '' c1.posedit.edit.alt = '' if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -2091,31 +2655,28 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) + c3 = self.validator.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): auto_info = auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( disparity_deletion_in[1]) + '-bp gap in transcript ' + str( tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) non_valid_caution = 'true' try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) except: c1 = tx_hgvs_not_delins c2 = copy.deepcopy(c1) @@ -2125,12 +2686,12 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): c2.posedit.edit.ref = '' c2.posedit.edit.alt = '' if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] @@ -2138,16 +2699,15 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): g3.posedit.pos.end.base = g2.posedit.pos.end.base g3.posedit.edit.ref = reference g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) + c3 = self.validator.vm.g_to_t(g3, c1.ac) hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) gps = for_location_c.posedit.pos.end.base - 1 gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update auto_info = auto_info + '%s' % (gap_position) else: @@ -2156,889 +2716,167 @@ def g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var): disparity_deletion_in[ 1]) + ' genomic base(s) that fail to align to transcript ' + str( tx_hgvs_not_delins.ac) + '\n' - tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( - tx_hgvs_not_delins.posedit.edit.ref) - 1 hgvs_refreshed_variant = tx_hgvs_not_delins + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': - suppress_c_normalization = 'true' - # amend_RefSeqGene = 'true' - if possibility_counter == 3: - hgvs_refreshed_variant = stash_tx_right - elif possibility_counter == 4: - hgvs_refreshed_variant = stash_tx_left - else: - hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Set warning variables + gap_position = '' + gapped_alignment_warning = str( + hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + hgvs_refreshed_variant = tx_hgvs_not_delins # Warn auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( hgvs_genomic.ac) + '\n' + gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' else: # Keep the same by re-setting rel_var - hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' + hgvs_refreshed_variant = saved_hgvs_coding # Edit the output if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) + hgvs_refreshed_variant = self.variant.evm.n_to_c(hgvs_refreshed_variant) else: pass - try: - variant.hn.normalize(hgvs_refreshed_variant) + hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + hgvs_refreshed_variant.posedit.edit.alt[-1]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 0:-1] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 0:-1] + hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[0] == \ + hgvs_refreshed_variant.posedit.edit.alt[0]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 1:] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 1:] + hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) except Exception as e: error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries if re.match('Normalization of intronic variants is not supported', error) or re.match( 'Unsupported normalization of variants spanning the exon-intron boundary', error): hgvs_refreshed_variant = saved_hgvs_coding - else: - logger.warning(error) - continue - - # Quick check to make sure the coding variant has not changed - try: - to_test = variant.hn.normalize(hgvs_refreshed_variant) - except: - to_test = hgvs_refreshed_variant - if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # Try the next available genomic option - if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - hgvs_coding = to_test - else: - continue - # Update hgvs_genomic - hgvs_genomic = validator.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, - variant.no_norm_evm, variant.hn) - if hgvs_genomic.posedit.edit.type == 'identity': - re_c = validator.vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) - if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): - shuffle_left_g = copy.copy(hgvs_genomic) - shuffle_left_g.posedit.edit.ref = '' - shuffle_left_g.posedit.edit.alt = '' - shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - shuffle_left_g = variant.reverse_normalizer.normalize(shuffle_left_g) - re_c = validator.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): - hgvs_genomic = shuffle_left_g - - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' - - # Break if gap has been detected - if disparity_deletion_in[0] != 'false': - break - - # Warn user about gapping - if auto_info != '': - info_lines = auto_info.split('\n') - info_keys = {} - for information in info_lines: - info_keys[information] = '' - info_out = [] - info_out.append( - 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + variant.primary_assembly) - for ky in list(info_keys.keys()): - info_out.append(ky) - auto_info = '\n'.join(info_out) - auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' - auto_info = str(auto_info.replace('\n', ': ')) - variant.warnings += ': ' + str(auto_info) - logger.warning(str(auto_info)) - # Normailse hgvs_genomic - try: - hgvs_genomic = variant.hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - - if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = variant.hn.normalize(hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - hgvs_genomic = variant.hn.normalize(hgvs_genomic) - genomic = fn.valstr(hgvs_genomic) - - print('in gapped_mapping', hgvs_coding) - - return hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding, hgvs_genomic_possibilities - - -def g_to_t_gapped_mapping_stage2(validator, variant, ori, hgvs_coding, hgvs_genomic, gapped_transcripts, hgvs_genomic_possibilities, auto_info): - logger.warning('g_to_t gap code 2 active') - - hgvs_genomic_variant = hgvs_genomic - reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_genomic_variant) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, - variant.reverse_normalizer, validator.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Create a VCF call - vcf_component_list = [str(chr), str(pos), str(ref), (alt)] - vcf_genomic = '-'.join(vcf_component_list) - - # DO NOT DELETE - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - orientation = int(ori[0]['alt_strand']) - saved_hgvs_coding = copy.deepcopy(hgvs_coding) - - # is it in an exon? - is_it_in_an_exon = 'no' - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - # Take from stored copy - # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - is_it_in_an_exon = 'yes' - if is_it_in_an_exon == 'yes': - # map form reverse normalized g. to c. - hgvs_from_5n_g = variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) - - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] else: pass - else: - pass - - hard_fail = 'false' - try: - tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = hgvs_coding - hard_fail = 'true' - - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +ve base and adjust - if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = validator.myevm_t_to_g(test_tx_var, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: + # Sort out equality to equality c. events where the code will add 2 additional bases + if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): pass - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] else: - re_capture_tx_variant = [] - # TODO: Need to check if hgvs_genomic_possibilities is ever not empty! - for internal_possibility in hgvs_genomic_possibilities: - - if internal_possibility == '': - continue - - hgvs_t_possibility = validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) - except: - fn.exceptPass() - ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) - except: - fn.exceptPass() - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if internal_possibility.posedit.edit.type == 'ins': - ins_ref = validator.sf.fetch_seq(internal_possibility.ac, - internal_possibility.posedit.pos.start.base - 1, - internal_possibility.posedit.pos.end.base) - internal_possibility.posedit.edit.ref = ins_ref - internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] - - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] - hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # Final sanity checks - try: - validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str(e) == 'start or end or both are beyond the bounds of transcript record': - logger.warning(str(e)) - return True - try: - variant.hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - logger.warning(error) - return True - elif re.match('Normalization of intronic variants is not supported', error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - if hard_fail == 'true': - disparity_deletion_in = ['false', 'false'] - - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) - - # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly - - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = validator.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base + hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) + coding = fn.valstr(hgvs_coding) + formatted_variant = coding + + return hgvs_coding + + def dup_ins_5prime_shift(self, stored_hgvs_not_delins, hgvs_genomic_5pr, saved_hgvs_coding): + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = self.variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', + str(hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = variant.hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = validator.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # GAP IN THE CHROMOSOME - - elif disparity_deletion_in[0] == 'chromosome': - # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + variant.primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = saved_hgvs_coding - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = variant.evm.n_to_c(hgvs_refreshed_variant) + pass else: pass - try: - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = variant.hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - pass - # Sort out equality to equality c. events where the code will add 2 additional bases - if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): + return hgvs_not_delins + + def remove_offsetting_to_span_gap(self, rn_tx_hgvs_not_delins): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: pass - else: - hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) - coding = fn.valstr(hgvs_coding) - formatted_variant = coding - return hgvs_coding + return rn_tx_hgvs_not_delins + + # def move_tx_end_base_to_next_non_offset(validator, variant, rn_tx_hgvs_not_delins): + # # move tx end base back to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # # Delete the ref + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # # Add the additional base to the ALT + # start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + # end = rn_tx_hgvs_not_delins.posedit.pos.end.base + # ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + # rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins \ No newline at end of file diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 1460daac..e9d88ba0 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -147,7 +147,9 @@ def gene_to_transcripts(variant, validator): # Tag the line so that it is not written out variant.write = False - data, nw_rel_var = gapped_mapping.gapped_g_to_c(variant, validator, rel_var) + gap_mapper = gapped_mapping.GapMapper(variant, validator) + + data, nw_rel_var = gap_mapper.gapped_g_to_c(rel_var) # Warn the user that the g. description is not valid if data['gapped_alignment_warning'] != '': @@ -709,10 +711,13 @@ def transcripts_to_gene(variant, validator): # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome hgvs_genomic_possibilities = [] + # Create gap_mapper object instance + gap_mapper = gapped_mapping.GapMapper(variant, validator) + # Loop out gap finding code under these circumstances! if gap_compensation is True: hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding, \ - hgvs_genomic_possibilities = gapped_mapping.g_to_t_compensation(variant, validator, ori, hgvs_coding, rec_var) + hgvs_genomic_possibilities = gap_mapper.g_to_t_compensation(ori, hgvs_coding, rec_var) else: suppress_c_normalization = 'false' @@ -730,8 +735,8 @@ def transcripts_to_gene(variant, validator): # Loop out gap finding code under these circumstances! logger.warning("gap_compensation_2 = " + str(gap_compensation)) if gap_compensation is True: - hgvs_coding = gapped_mapping.g_to_t_gapped_mapping_stage2( - validator, variant, ori, hgvs_coding, hgvs_genomic, gapped_transcripts, hgvs_genomic_possibilities, + hgvs_coding = gap_mapper.g_to_t_gapped_mapping_stage2( + ori, hgvs_coding, hgvs_genomic, gapped_transcripts, hgvs_genomic_possibilities, auto_info ) From b78f7c1b14b64b46185ea5062e0690cc6c11e320 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 25 Apr 2019 15:27:21 +0100 Subject: [PATCH 067/223] Pulled transcript disparity into new method as three almost identical copies existed. --- VariantValidator/modules/gapped_mapping.py | 2985 ++++++++++++-------- 1 file changed, 1736 insertions(+), 1249 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index d3c23768..74c38413 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -247,35 +247,37 @@ def gapped_g_to_c(self, rel_var): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins, back=False) + # # move tx end base to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + # self.variant.primary_assembly, self.variant.hn) + # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str(saved_hgvs_coding.ac)) elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + # # move tx start base to previous available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + # self.variant.primary_assembly, self.variant.hn) + # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str(saved_hgvs_coding.ac)) + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # # else: + # # pass # Check for -ve base and adjust elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', @@ -284,39 +286,41 @@ def gapped_g_to_c(self, rel_var): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins) + # # move tx end base back to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # # Delete the ref + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # # Add the additional base to the ALT + # start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + # end = rn_tx_hgvs_not_delins.posedit.pos.end.base + # ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + # rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + # self.variant.primary_assembly, self.variant.hn) + # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str(saved_hgvs_coding.ac)) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) + # # move tx start base to previous available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + # self.variant.primary_assembly, self.variant.hn) + # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str(saved_hgvs_coding.ac)) + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 else: pass @@ -402,354 +406,357 @@ def gapped_g_to_c(self, rel_var): hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - try: - c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = self.validator.nr_vm.t_to_g(c1, self.variant.hgvs_genomic.ac) - g3 = self.validator.nr_vm.t_to_g(c1, self.variant.hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) - ng2 = self.variant.hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = self.validator.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, self.variant.hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - g2 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) - c2 = self.validator.vm.g_to_t(g2, c2.ac) - reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] - alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] - c3 = copy.deepcopy(c1) - c3.posedit.pos.end = c2.posedit.pos.end - c3.posedit.edit.ref = '' # reference - c3.posedit.edit.alt = alternate - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + hgvs_refreshed_variant, gapped_transcripts, auto_info = self.transcript_disparity(tx_hgvs_not_delins, gapped_transcripts, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, disparity_deletion_in, auto_info, stored_hgvs_not_delins, self.variant.hgvs_genomic, orientation, 1) + + # if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): + # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # + # # Copy the current variant + # tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + # try: + # if tx_gap_fill_variant.posedit.edit.alt is None: + # tx_gap_fill_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + # tx_gap_fill_variant.posedit.pos.start) + '_' + str( + # tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + # tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + # tx_gap_fill_variant_delins_from_dup) + # + # # Identify which half of the NOT-intron the start position of the variant is in + # if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + # tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + # tx_gap_fill_variant.posedit.edit.alt = '' + # tx_gap_fill_variant.posedit.edit.ref = '' + # elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + # tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + # tx_gap_fill_variant.posedit.edit.alt = '' + # tx_gap_fill_variant.posedit.edit.ref = '' + # + # try: + # tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) + # except: + # fn.exceptPass() + # genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, + # reverse_normalized_hgvs_genomic.ac) + # genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + # + # try: + # c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # except Exception: + # c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + # genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, + # hgvs_genomic_5pr.ac) + # + # # Ensure an ALT exists + # try: + # if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + # genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + # genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + # genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + # genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + # genomic_gap_fill_variant_delins_from_dup) + # genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + # genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + # genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + # genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( + # genomic_gap_fill_variant_alt_delins_from_dup) + # + # # Correct insertion alts + # if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + # append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + # genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + # genomic_gap_fill_variant_alt.posedit.pos.end.base) + # genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + # 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + # append_ref[1] + # + # # Split the reference and replacing alt sequence into a dictionary + # reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + # if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + # alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + # else: + # # Deletions with no ins + # pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + # alternate_bases = [] + # for base in pre_alternate_bases: + # alternate_bases.append('X') + # + # # Create the dictionaries + # ref_start = genomic_gap_fill_variant.posedit.pos.start.base + # alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + # ref_base_dict = {} + # for base in reference_bases: + # ref_base_dict[ref_start] = str(base) + # ref_start = ref_start + 1 + # + # alt_base_dict = {} + # + # # NEED TO SEARCH FOR RANGE = and replace with interval_range + # # Need to search for int and replace with integer + # + # # Note, all variants will be forced into the format delete insert + # # Deleted bases in the ALT will be substituted for X + # for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + # genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + # if integer == alt_start: + # alt_base_dict[integer] = str(''.join(alternate_bases)) + # else: + # alt_base_dict[integer] = 'X' + # + # # Generate the alt sequence + # alternate_sequence_bases = [] + # for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + # genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + # if integer in list(alt_base_dict.keys()): + # alternate_sequence_bases.append(alt_base_dict[integer]) + # else: + # alternate_sequence_bases.append(ref_base_dict[integer]) + # alternate_sequence = ''.join(alternate_sequence_bases) + # alternate_sequence = alternate_sequence.replace('X', '') + # + # # Add the new alt to the gap fill variant and generate transcript variant + # genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + # hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, + # tx_gap_fill_variant.ac) + # + # # Set warning + # gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + # disparity_deletion_in[1] = [gap_size] + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + # gps = for_location_c.posedit.pos.start.base - 1 + # gpe = for_location_c.posedit.pos.start.base + # else: + # gps = for_location_c.posedit.pos.start.base + # gpe = for_location_c.posedit.pos.start.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # auto_info = auto_info + '%s' % (gap_position) + # + # else: + # if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # # In this instance, we have identified a transcript gap but the n. version of + # # the transcript variant but do not have a position which actually hits the gap, + # # so the variant likely spans the gap, and is not picked up by an offset. + # try: + # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + # g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # ng2 = self.variant.hn.normalize(g2) + # g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + # len(g3.posedit.edit.ref) - 1) + # try: + # c2 = self.validator.vm.g_to_t(g3, c1.ac) + # if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + # pass + # else: + # tx_hgvs_not_delins = c2 + # try: + # tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) + # except hgvs.exceptions.HGVSError: + # fn.exceptPass() + # except hgvs.exceptions.HGVSInvalidVariantError: + # fn.exceptPass() + # + # if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + # r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # # try: + # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # # except: + # # c2 = tx_hgvs_not_delins + # # c1 = copy.deepcopy(c2) + # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # # c1.posedit.pos.start.offset = 0 + # # c1.posedit.pos.end = c2.posedit.pos.start + # # c1.posedit.edit.ref = '' + # # c1.posedit.edit.alt = '' + # # if orientation != -1: + # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g1.posedit.edit.alt = g1.posedit.edit.ref + # # else: + # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2.posedit.edit.alt = g2.posedit.edit.ref + # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # # g3 = copy.deepcopy(g1) + # # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # # g3.posedit.edit.ref = reference + # # g3.posedit.edit.alt = alternate + # # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.start.base + # gpe = for_location_c.posedit.pos.start.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + # r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + # auto_info = auto_info + 'Genome position ' + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + # tx_hgvs_not_delins.ac) + # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # # try: + # # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # # except: + # # c1 = tx_hgvs_not_delins + # # c2 = copy.deepcopy(c1) + # # c2.posedit.pos.start = c1.posedit.pos.end + # # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + # # c2.posedit.pos.end.offset = 0 + # # c2.posedit.edit.ref = '' + # # c2.posedit.edit.alt = '' + # # if orientation != -1: + # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2.posedit.edit.alt = g2.posedit.edit.ref + # # else: + # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g1.posedit.edit.alt = g1.posedit.edit.ref + # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # # g3 = copy.deepcopy(g1) + # # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # # g3.posedit.edit.ref = reference + # # g3.posedit.edit.alt = alternate + # # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.end.base + # gpe = for_location_c.posedit.pos.end.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\-', + # str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + # r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # # try: + # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # # except: + # # c2 = tx_hgvs_not_delins + # # c1 = copy.deepcopy(c2) + # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # # c1.posedit.pos.start.offset = 0 + # # c1.posedit.pos.end = c2.posedit.pos.start + # # c1.posedit.edit.ref = '' + # # c1.posedit.edit.alt = '' + # # if orientation != -1: + # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g1.posedit.edit.alt = g1.posedit.edit.ref + # # else: + # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2.posedit.edit.alt = g2.posedit.edit.ref + # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # # g3 = copy.deepcopy(g1) + # # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # # g3.posedit.edit.ref = reference + # # g3.posedit.edit.alt = alternate + # # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.start.base - 1 + # gpe = for_location_c.posedit.pos.start.base + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + # r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + # auto_info = auto_info + 'Genome position ' + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + # tx_hgvs_not_delins.ac) + # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # try: + # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # c2 = copy.deepcopy(c1) + # c2.posedit.pos.start = c1.posedit.pos.end + # c2.posedit.pos.end.base = c1.posedit.pos.end.base + # c2.posedit.pos.end.offset = 0 + # c2.posedit.edit.ref = '' + # c2.posedit.edit.alt = '' + # g2 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) + # c2 = self.validator.vm.g_to_t(g2, c2.ac) + # reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] + # alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] + # c3 = copy.deepcopy(c1) + # c3.posedit.pos.end = c2.posedit.pos.end + # c3.posedit.edit.ref = '' # reference + # c3.posedit.edit.alt = alternate + # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.end.base - 1 + # gpe = for_location_c.posedit.pos.end.base + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # else: + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + '\n' + # hgvs_refreshed_variant = tx_hgvs_not_delins + # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) # GAP IN THE CHROMOSOME elif disparity_deletion_in[0] == 'chromosome': @@ -1399,37 +1406,39 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins, back=False) + # # move tx end base to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + # self.variant.primary_assembly, self.variant.hn) + # + # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str(saved_hgvs_coding.ac)) # tx_hgvs_not_delins = rn_tx_hgvs_not_delins elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + # # move tx start base to previous available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + # self.variant.primary_assembly, self.variant.hn) + # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str(saved_hgvs_coding.ac)) + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # # else: # pass # Check for -ve base and adjust @@ -1458,20 +1467,21 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, str(saved_hgvs_coding.ac)) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) + # # move tx start base to previous available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + # self.variant.primary_assembly, self.variant.hn) + # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str(saved_hgvs_coding.ac)) + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 else: pass @@ -1608,378 +1618,366 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): suppress_c_normalization = 'true' # amend_RefSeqGene = 'true' # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, - 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = self.variant.hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = self.validator.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( - tx_hgvs_not_delins.posedit.edit.ref) - 1 - hgvs_refreshed_variant = tx_hgvs_not_delins + hgvs_refreshed_variant, gapped_transcripts, auto_info = self.transcript_disparity(tx_hgvs_not_delins, gapped_transcripts, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, disparity_deletion_in, auto_info, stored_hgvs_not_delins, hgvs_genomic, orientation, 2) + + # if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): + # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # + # # Copy the current variant + # tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + # try: + # if tx_gap_fill_variant.posedit.edit.alt is None: + # tx_gap_fill_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + # tx_gap_fill_variant.posedit.pos.start) + '_' + str( + # tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + # tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + # tx_gap_fill_variant_delins_from_dup) + # + # # Identify which half of the NOT-intron the start position of the variant is in + # if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + # tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + # tx_gap_fill_variant.posedit.edit.alt = '' + # tx_gap_fill_variant.posedit.edit.ref = '' + # elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + # tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + # tx_gap_fill_variant.posedit.edit.alt = '' + # tx_gap_fill_variant.posedit.edit.ref = '' + # + # try: + # tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) + # except: + # fn.exceptPass() + # genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, + # reverse_normalized_hgvs_genomic.ac) + # genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + # + # try: + # c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # except Exception: + # c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + # genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, + # hgvs_genomic_5pr.ac) + # + # # Ensure an ALT exists + # try: + # if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + # genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + # genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + # genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + # genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + # genomic_gap_fill_variant_delins_from_dup) + # genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + # genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + # genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + # genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( + # genomic_gap_fill_variant_alt_delins_from_dup) + # + # # Correct insertion alts + # if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + # append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + # genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + # genomic_gap_fill_variant_alt.posedit.pos.end.base) + # genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + # 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + # append_ref[1] + # + # # Split the reference and replacing alt sequence into a dictionary + # reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + # if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + # alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + # else: + # # Deletions with no ins + # pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + # alternate_bases = [] + # for base in pre_alternate_bases: + # alternate_bases.append('X') + # + # # Create the dictionaries + # ref_start = genomic_gap_fill_variant.posedit.pos.start.base + # alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + # ref_base_dict = {} + # for base in reference_bases: + # ref_base_dict[ref_start] = str(base) + # ref_start = ref_start + 1 + # + # alt_base_dict = {} + # + # # NEED TO SEARCH FOR RANGE = and replace with interval_range + # # Need to search for int and replace with integer + # + # # Note, all variants will be forced into the format delete insert + # # Deleted bases in the ALT will be substituted for X + # for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + # genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + # if integer == alt_start: + # alt_base_dict[integer] = str(''.join(alternate_bases)) + # else: + # alt_base_dict[integer] = 'X' + # + # # Generate the alt sequence + # alternate_sequence_bases = [] + # for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + # genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + # if integer in list(alt_base_dict.keys()): + # alternate_sequence_bases.append(alt_base_dict[integer]) + # else: + # alternate_sequence_bases.append(ref_base_dict[integer]) + # alternate_sequence = ''.join(alternate_sequence_bases) + # alternate_sequence = alternate_sequence.replace('X', '') + # + # # Add the new alt to the gap fill variant and generate transcript variant + # genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + # hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, + # tx_gap_fill_variant.ac) + # + # # Set warning + # gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + # disparity_deletion_in[1] = [gap_size] + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + # gps = for_location_c.posedit.pos.start.base - 1 + # gpe = for_location_c.posedit.pos.start.base + # else: + # gps = for_location_c.posedit.pos.start.base + # gpe = for_location_c.posedit.pos.start.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # auto_info = auto_info + '%s' % (gap_position) + # + # else: + # if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # # In this instance, we have identified a transcript gap but the n. version of + # # the transcript variant but do not have a position which actually hits the gap, + # # so the variant likely spans the gap, and is not picked up by an offset. + # try: + # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + # g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # ng2 = self.variant.hn.normalize(g2) + # g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + # len(g3.posedit.edit.ref) - 1) + # try: + # c2 = self.validator.vm.g_to_t(g3, c1.ac) + # if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + # pass + # else: + # tx_hgvs_not_delins = c2 + # try: + # tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) + # except hgvs.exceptions.HGVSError: + # fn.exceptPass() + # except hgvs.exceptions.HGVSInvalidVariantError: + # fn.exceptPass() + # + # if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + # r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # # try: + # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # # except: + # # c2 = tx_hgvs_not_delins + # # c1 = copy.deepcopy(c2) + # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # # c1.posedit.pos.start.offset = 0 + # # c1.posedit.pos.end = c2.posedit.pos.start + # # c1.posedit.edit.ref = '' + # # c1.posedit.edit.alt = '' + # # if orientation != -1: + # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g1.posedit.edit.alt = g1.posedit.edit.ref + # # else: + # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2.posedit.edit.alt = g2.posedit.edit.ref + # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # # g3 = copy.deepcopy(g1) + # # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # # g3.posedit.edit.ref = reference + # # g3.posedit.edit.alt = alternate + # # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.start.base + # gpe = for_location_c.posedit.pos.start.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + # r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + # auto_info = auto_info + 'Genome position ' + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + # tx_hgvs_not_delins.ac) + # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # # try: + # # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # # except: + # # c1 = tx_hgvs_not_delins + # # c2 = copy.deepcopy(c1) + # # c2.posedit.pos.start = c1.posedit.pos.end + # # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + # # c2.posedit.pos.end.offset = 0 + # # c2.posedit.edit.ref = '' + # # c2.posedit.edit.alt = '' + # # if orientation != -1: + # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2.posedit.edit.alt = g2.posedit.edit.ref + # # else: + # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g1.posedit.edit.alt = g1.posedit.edit.ref + # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # # g3 = copy.deepcopy(g1) + # # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # # g3.posedit.edit.ref = reference + # # g3.posedit.edit.alt = alternate + # # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.end.base + # gpe = for_location_c.posedit.pos.end.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\-', + # str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + # r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # # try: + # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # # except: + # # c2 = tx_hgvs_not_delins + # # c1 = copy.deepcopy(c2) + # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # # c1.posedit.pos.start.offset = 0 + # # c1.posedit.pos.end = c2.posedit.pos.start + # # c1.posedit.edit.ref = '' + # # c1.posedit.edit.alt = '' + # # if orientation != -1: + # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g1.posedit.edit.alt = g1.posedit.edit.ref + # # else: + # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2.posedit.edit.alt = g2.posedit.edit.ref + # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # # g3 = copy.deepcopy(g1) + # # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # # g3.posedit.edit.ref = reference + # # g3.posedit.edit.alt = alternate + # # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.start.base - 1 + # gpe = for_location_c.posedit.pos.start.base + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + # r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + # auto_info = auto_info + 'Genome position ' + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + # tx_hgvs_not_delins.ac) + # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # # try: + # # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # # except: + # # c1 = tx_hgvs_not_delins + # # c2 = copy.deepcopy(c1) + # # c2.posedit.pos.start = c1.posedit.pos.end + # # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + # # c2.posedit.pos.end.offset = 0 + # # c2.posedit.edit.ref = '' + # # c2.posedit.edit.alt = '' + # # if orientation != -1: + # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2.posedit.edit.alt = g2.posedit.edit.ref + # # else: + # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g1.posedit.edit.alt = g1.posedit.edit.ref + # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # # g3 = copy.deepcopy(g1) + # # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # # g3.posedit.edit.ref = reference + # # g3.posedit.edit.alt = alternate + # # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.end.base - 1 + # gpe = for_location_c.posedit.pos.end.base + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # else: + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + '\n' + # tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( + # tx_hgvs_not_delins.posedit.edit.ref) - 1 + # hgvs_refreshed_variant = tx_hgvs_not_delins # GAP IN THE CHROMOSOME elif disparity_deletion_in[0] == 'chromosome': @@ -2176,33 +2174,35 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins, back=False) + # # move tx end base to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + # self.variant.primary_assembly, self.variant.hn) + # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str(saved_hgvs_coding.ac)) elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # # move tx start base to previous available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + # self.variant.primary_assembly, self.variant.hn) + # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str(saved_hgvs_coding.ac)) + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 # else: # pass @@ -2213,40 +2213,43 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins) + # # move tx end base back to next available non-offset base + # # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # # Delete the ref + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # # Add the additional base to the ALT + # start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + # end = rn_tx_hgvs_not_delins.posedit.pos.end.base + # ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + # rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + # self.variant.primary_assembly, self.variant.hn) + # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str(saved_hgvs_coding.ac)) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, + with_base_subtract=True) + # # move tx start base to previous available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + # self.variant.primary_assembly, self.variant.hn) + # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str(saved_hgvs_coding.ac)) + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 else: pass @@ -2361,363 +2364,365 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - try: - tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = self.variant.hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = self.validator.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + hgvs_refreshed_variant, gapped_transcripts, auto_info = self.transcript_disparity(tx_hgvs_not_delins, gapped_transcripts, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, disparity_deletion_in, auto_info, stored_hgvs_not_delins, hgvs_genomic, orientation, 3) + + # if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): + # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # + # # Copy the current variant + # tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + # try: + # if tx_gap_fill_variant.posedit.edit.alt is None: + # tx_gap_fill_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + # tx_gap_fill_variant.posedit.pos.start) + '_' + str( + # tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + # tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + # tx_gap_fill_variant_delins_from_dup) + # + # # Identify which half of the NOT-intron the start position of the variant is in + # if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + # tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + # tx_gap_fill_variant.posedit.edit.alt = '' + # tx_gap_fill_variant.posedit.edit.ref = '' + # elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + # tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + # tx_gap_fill_variant.posedit.edit.alt = '' + # tx_gap_fill_variant.posedit.edit.ref = '' + # + # try: + # tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) + # except: + # fn.exceptPass() + # genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, + # reverse_normalized_hgvs_genomic.ac) + # genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + # + # try: + # c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # except Exception: + # c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + # genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, + # hgvs_genomic_5pr.ac) + # + # # Ensure an ALT exists + # try: + # if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + # genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + # genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + # genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + # genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + # genomic_gap_fill_variant_delins_from_dup) + # genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + # genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + # genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + # genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( + # genomic_gap_fill_variant_alt_delins_from_dup) + # + # # Correct insertion alts + # if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + # append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + # genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + # genomic_gap_fill_variant_alt.posedit.pos.end.base) + # genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + # 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + # append_ref[1] + # + # # Split the reference and replacing alt sequence into a dictionary + # reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + # if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + # alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + # else: + # # Deletions with no ins + # pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + # alternate_bases = [] + # for base in pre_alternate_bases: + # alternate_bases.append('X') + # + # # Create the dictionaries + # ref_start = genomic_gap_fill_variant.posedit.pos.start.base + # alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + # ref_base_dict = {} + # for base in reference_bases: + # ref_base_dict[ref_start] = str(base) + # ref_start = ref_start + 1 + # + # alt_base_dict = {} + # + # # NEED TO SEARCH FOR RANGE = and replace with interval_range + # # Need to search for int and replace with integer + # + # # Note, all variants will be forced into the format delete insert + # # Deleted bases in the ALT will be substituted for X + # for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + # genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + # if integer == alt_start: + # alt_base_dict[integer] = str(''.join(alternate_bases)) + # else: + # alt_base_dict[integer] = 'X' + # + # # Generate the alt sequence + # alternate_sequence_bases = [] + # for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + # genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + # if integer in list(alt_base_dict.keys()): + # alternate_sequence_bases.append(alt_base_dict[integer]) + # else: + # alternate_sequence_bases.append(ref_base_dict[integer]) + # alternate_sequence = ''.join(alternate_sequence_bases) + # alternate_sequence = alternate_sequence.replace('X', '') + # + # # Add the new alt to the gap fill variant and generate transcript variant + # genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + # hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, + # tx_gap_fill_variant.ac) + # + # # Set warning + # gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + # disparity_deletion_in[1] = [gap_size] + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + # gps = for_location_c.posedit.pos.start.base - 1 + # gpe = for_location_c.posedit.pos.start.base + # else: + # gps = for_location_c.posedit.pos.start.base + # gpe = for_location_c.posedit.pos.start.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # auto_info = auto_info + '%s' % (gap_position) + # + # else: + # if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # # In this instance, we have identified a transcript gap but the n. version of + # # the transcript variant but do not have a position which actually hits the gap, + # # so the variant likely spans the gap, and is not picked up by an offset. + # try: + # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + # g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # ng2 = self.variant.hn.normalize(g2) + # g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + # len(g3.posedit.edit.ref) - 1) + # try: + # c2 = self.validator.vm.g_to_t(g3, c1.ac) + # if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + # pass + # else: + # tx_hgvs_not_delins = c2 + # try: + # tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) + # except hgvs.exceptions.HGVSError: + # fn.exceptPass() + # except hgvs.exceptions.HGVSInvalidVariantError: + # fn.exceptPass() + # + # if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + # r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # # try: + # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # # except: + # # c2 = tx_hgvs_not_delins + # # c1 = copy.deepcopy(c2) + # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # # c1.posedit.pos.start.offset = 0 + # # c1.posedit.pos.end = c2.posedit.pos.start + # # c1.posedit.edit.ref = '' + # # c1.posedit.edit.alt = '' + # # if orientation != -1: + # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g1.posedit.edit.alt = g1.posedit.edit.ref + # # else: + # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2.posedit.edit.alt = g2.posedit.edit.ref + # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # # g3 = copy.deepcopy(g1) + # # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # # g3.posedit.edit.ref = reference + # # g3.posedit.edit.alt = alternate + # # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.start.base + # gpe = for_location_c.posedit.pos.start.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + # r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + # auto_info = auto_info + 'Genome position ' + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + # tx_hgvs_not_delins.ac) + # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # # try: + # # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # # except: + # # c1 = tx_hgvs_not_delins + # # c2 = copy.deepcopy(c1) + # # c2.posedit.pos.start = c1.posedit.pos.end + # # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + # # c2.posedit.pos.end.offset = 0 + # # c2.posedit.edit.ref = '' + # # c2.posedit.edit.alt = '' + # # if orientation != -1: + # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2.posedit.edit.alt = g2.posedit.edit.ref + # # else: + # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g1.posedit.edit.alt = g1.posedit.edit.ref + # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # # g3 = copy.deepcopy(g1) + # # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # # g3.posedit.edit.ref = reference + # # g3.posedit.edit.alt = alternate + # # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.end.base + # gpe = for_location_c.posedit.pos.end.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\-', + # str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + # r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # # try: + # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # # except: + # # c2 = tx_hgvs_not_delins + # # c1 = copy.deepcopy(c2) + # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # # c1.posedit.pos.start.offset = 0 + # # c1.posedit.pos.end = c2.posedit.pos.start + # # c1.posedit.edit.ref = '' + # # c1.posedit.edit.alt = '' + # # if orientation != -1: + # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g1.posedit.edit.alt = g1.posedit.edit.ref + # # else: + # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2.posedit.edit.alt = g2.posedit.edit.ref + # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # # g3 = copy.deepcopy(g1) + # # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # # g3.posedit.edit.ref = reference + # # g3.posedit.edit.alt = alternate + # # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.start.base - 1 + # gpe = for_location_c.posedit.pos.start.base + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + # r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + # auto_info = auto_info + 'Genome position ' + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + # tx_hgvs_not_delins.ac) + # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # # try: + # # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # # except: + # # c1 = tx_hgvs_not_delins + # # c2 = copy.deepcopy(c1) + # # c2.posedit.pos.start = c1.posedit.pos.end + # # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + # # c2.posedit.pos.end.offset = 0 + # # c2.posedit.edit.ref = '' + # # c2.posedit.edit.alt = '' + # # if orientation != -1: + # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2.posedit.edit.alt = g2.posedit.edit.ref + # # else: + # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # # g1.posedit.edit.alt = g1.posedit.edit.ref + # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # # g3 = copy.deepcopy(g1) + # # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # # g3.posedit.edit.ref = reference + # # g3.posedit.edit.alt = alternate + # # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.end.base - 1 + # gpe = for_location_c.posedit.pos.end.base + # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # else: + # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + '\n' + # hgvs_refreshed_variant = tx_hgvs_not_delins + # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) # GAP IN THE CHROMOSOME @@ -2866,17 +2871,499 @@ def remove_offsetting_to_span_gap(self, rn_tx_hgvs_not_delins): return rn_tx_hgvs_not_delins - # def move_tx_end_base_to_next_non_offset(validator, variant, rn_tx_hgvs_not_delins): - # # move tx end base back to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # # Delete the ref - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # # Add the additional base to the ALT - # start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - # end = rn_tx_hgvs_not_delins.posedit.pos.end.base - # ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - # rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins \ No newline at end of file + def move_tx_end_base_to_next_nonoffset(self, rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins, back=True): + # move tx end base back to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if back: + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + else: + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + return rn_tx_hgvs_not_delins, hgvs_not_delins + + def move_tx_start_base_to_previous_nonoffset(self, rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=False): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + if with_base_subtract: + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + + return rn_tx_hgvs_not_delins, hgvs_not_delins + + def c2_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic, orientation): + try: + c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + + return hgvs_refreshed_variant + + def c1_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic, orientation): + try: + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + + return hgvs_refreshed_variant + + def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, disparity_deletion_in, auto_info, stored_hgvs_not_delins, hgvs_genomic, orientation, running_option): + if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str( + tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str( + tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = self.variant.hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = self.validator.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # try: + # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c2 = tx_hgvs_not_delins + # c1 = copy.deepcopy(c2) + # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # c1.posedit.pos.start.offset = 0 + # c1.posedit.pos.end = c2.posedit.pos.start + # c1.posedit.edit.ref = '' + # c1.posedit.edit.alt = '' + # if orientation != -1: + # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # else: + # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # try: + # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # c2 = copy.deepcopy(c1) + # c2.posedit.pos.start = c1.posedit.pos.end + # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + # c2.posedit.pos.end.offset = 0 + # c2.posedit.edit.ref = '' + # c2.posedit.edit.alt = '' + # if orientation != -1: + # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # else: + # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', + str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # try: + # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c2 = tx_hgvs_not_delins + # c1 = copy.deepcopy(c2) + # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # c1.posedit.pos.start.offset = 0 + # c1.posedit.pos.end = c2.posedit.pos.start + # c1.posedit.edit.ref = '' + # c1.posedit.edit.alt = '' + # if orientation != -1: + # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # else: + # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + ## Have variation in first copy here! + if running_option == 1: + try: + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + g2 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) + c2 = self.validator.vm.g_to_t(g2, c2.ac) + reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] + alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] + c3 = copy.deepcopy(c1) + c3.posedit.pos.end = c2.posedit.pos.end + c3.posedit.edit.ref = '' # reference + c3.posedit.edit.alt = alternate + hgvs_refreshed_variant = c3 + else: + hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + # try: + # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # c2 = copy.deepcopy(c1) + # c2.posedit.pos.start = c1.posedit.pos.end + # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + # c2.posedit.pos.end.offset = 0 + # c2.posedit.edit.ref = '' + # c2.posedit.edit.alt = '' + # if orientation != -1: + # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # else: + # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = self.validator.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + + ## Have variation in second copy here! + if running_option == 2: + tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( + tx_hgvs_not_delins.posedit.edit.ref) - 1 + hgvs_refreshed_variant = tx_hgvs_not_delins + else: + hgvs_refreshed_variant = tx_hgvs_not_delins + gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + + return hgvs_refreshed_variant, gapped_transcripts, auto_info From 51bb6d7c2a60efdd90be769a0305a48bdfa664f1 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 25 Apr 2019 15:51:44 +0100 Subject: [PATCH 068/223] Pulled one more section out into a seperate method --- VariantValidator/modules/gapped_mapping.py | 182 +++++++++++++-------- 1 file changed, 111 insertions(+), 71 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 74c38413..03859415 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -833,42 +833,43 @@ def gapped_g_to_c(self, rel_var): hgvs_refreshed_variant = saved_hgvs_coding # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = self.variant.evm.n_to_c(hgvs_refreshed_variant) - else: - pass - try: - hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - pass - fn.exceptPass() + hgvs_refreshed_variant = self.edit_output(hgvs_refreshed_variant, saved_hgvs_coding) + # if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + # hgvs_refreshed_variant.type)): + # hgvs_refreshed_variant = self.variant.evm.n_to_c(hgvs_refreshed_variant) + # else: + # pass + # try: + # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + # if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + # hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + # hgvs_refreshed_variant.posedit.edit.alt[-1]: + # hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + # 0:-1] + # hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + # 0:-1] + # hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + # elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + # hgvs_refreshed_variant.posedit.edit.ref[0] == \ + # hgvs_refreshed_variant.posedit.edit.alt[0]: + # hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + # 1:] + # hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + # 1:] + # hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + # except Exception as e: + # error = str(e) + # # Ensure the final variant is not intronic nor does it cross exon boundaries + # if re.match('Normalization of intronic variants is not supported', + # error) or re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # hgvs_refreshed_variant = saved_hgvs_coding + # else: + # pass + # fn.exceptPass() # Send to empty nw_rel_var nw_rel_var.append(hgvs_refreshed_variant) @@ -2743,41 +2744,42 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr hgvs_refreshed_variant = saved_hgvs_coding # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = self.variant.evm.n_to_c(hgvs_refreshed_variant) - else: - pass - try: - hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - hgvs_refreshed_variant.posedit.edit.alt[-1]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 0:-1] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 0:-1] - hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - hgvs_refreshed_variant.posedit.edit.ref[0] == \ - hgvs_refreshed_variant.posedit.edit.alt[0]: - hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - 1:] - hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - 1:] - hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - pass + hgvs_refreshed_variant = self.edit_output(hgvs_refreshed_variant, saved_hgvs_coding) + # if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + # hgvs_refreshed_variant.type)): + # hgvs_refreshed_variant = self.variant.evm.n_to_c(hgvs_refreshed_variant) + # else: + # pass + # try: + # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + # if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + # hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + # hgvs_refreshed_variant.posedit.edit.alt[-1]: + # hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + # 0:-1] + # hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + # 0:-1] + # hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + # elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + # hgvs_refreshed_variant.posedit.edit.ref[0] == \ + # hgvs_refreshed_variant.posedit.edit.alt[0]: + # hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + # 1:] + # hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + # 1:] + # hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + # except Exception as e: + # error = str(e) + # # Ensure the final variant is not intronic nor does it cross exon boundaries + # if re.match('Normalization of intronic variants is not supported', + # error) or re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # hgvs_refreshed_variant = saved_hgvs_coding + # else: + # pass # Sort out equality to equality c. events where the code will add 2 additional bases if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): @@ -3367,3 +3369,41 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) return hgvs_refreshed_variant, gapped_transcripts, auto_info + + def edit_output(self, hgvs_refreshed_variant, saved_hgvs_coding): + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = self.variant.evm.n_to_c(hgvs_refreshed_variant) + else: + pass + try: + hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[-1] == \ + hgvs_refreshed_variant.posedit.edit.alt[-1]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 0:-1] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 0:-1] + hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 + hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ + hgvs_refreshed_variant.posedit.edit.ref[0] == \ + hgvs_refreshed_variant.posedit.edit.alt[0]: + hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ + 1:] + hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ + 1:] + hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 + hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + pass + return hgvs_refreshed_variant From 8134a7fcc2766b78a75fa3a03d2c1fa3f5e41deb Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 25 Apr 2019 16:05:05 +0100 Subject: [PATCH 069/223] Deleted unused code --- VariantValidator/modules/gapped_mapping.py | 1439 +------------------- 1 file changed, 14 insertions(+), 1425 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 03859415..3acc50e9 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -248,81 +248,20 @@ def gapped_g_to_c(self, rel_var): elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins, back=False) - # # move tx end base to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - # self.variant.primary_assembly, self.variant.hn) - # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str(saved_hgvs_coding.ac)) + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) - # # move tx start base to previous available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - # self.variant.primary_assembly, self.variant.hn) - # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str(saved_hgvs_coding.ac)) - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # # else: - # # pass # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins) - # # move tx end base back to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # # Delete the ref - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # # Add the additional base to the ALT - # start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - # end = rn_tx_hgvs_not_delins.posedit.pos.end.base - # ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - # rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - # self.variant.primary_assembly, self.variant.hn) - # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str(saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) - # # move tx start base to previous available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - # self.variant.primary_assembly, self.variant.hn) - # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str(saved_hgvs_coding.ac)) - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass + # Logic if len(hgvs_not_delins.posedit.edit.ref) < len( @@ -406,357 +345,8 @@ def gapped_g_to_c(self, rel_var): hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant, gapped_transcripts, auto_info = self.transcript_disparity(tx_hgvs_not_delins, gapped_transcripts, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, disparity_deletion_in, auto_info, stored_hgvs_not_delins, self.variant.hgvs_genomic, orientation, 1) - # if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): - # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # - # # Copy the current variant - # tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - # try: - # if tx_gap_fill_variant.posedit.edit.alt is None: - # tx_gap_fill_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - # tx_gap_fill_variant.posedit.pos.start) + '_' + str( - # tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - # tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - # tx_gap_fill_variant_delins_from_dup) - # - # # Identify which half of the NOT-intron the start position of the variant is in - # if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - # tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - # tx_gap_fill_variant.posedit.edit.alt = '' - # tx_gap_fill_variant.posedit.edit.ref = '' - # elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - # tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - # tx_gap_fill_variant.posedit.edit.alt = '' - # tx_gap_fill_variant.posedit.edit.ref = '' - # - # try: - # tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) - # except: - # fn.exceptPass() - # genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, - # reverse_normalized_hgvs_genomic.ac) - # genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - # - # try: - # c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # except Exception: - # c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - # genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, - # hgvs_genomic_5pr.ac) - # - # # Ensure an ALT exists - # try: - # if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - # genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - # genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - # genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - # genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - # genomic_gap_fill_variant_delins_from_dup) - # genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - # genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - # genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - # genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( - # genomic_gap_fill_variant_alt_delins_from_dup) - # - # # Correct insertion alts - # if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - # append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - # genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - # genomic_gap_fill_variant_alt.posedit.pos.end.base) - # genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - # 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - # append_ref[1] - # - # # Split the reference and replacing alt sequence into a dictionary - # reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - # if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - # alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - # else: - # # Deletions with no ins - # pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - # alternate_bases = [] - # for base in pre_alternate_bases: - # alternate_bases.append('X') - # - # # Create the dictionaries - # ref_start = genomic_gap_fill_variant.posedit.pos.start.base - # alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - # ref_base_dict = {} - # for base in reference_bases: - # ref_base_dict[ref_start] = str(base) - # ref_start = ref_start + 1 - # - # alt_base_dict = {} - # - # # NEED TO SEARCH FOR RANGE = and replace with interval_range - # # Need to search for int and replace with integer - # - # # Note, all variants will be forced into the format delete insert - # # Deleted bases in the ALT will be substituted for X - # for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - # genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - # if integer == alt_start: - # alt_base_dict[integer] = str(''.join(alternate_bases)) - # else: - # alt_base_dict[integer] = 'X' - # - # # Generate the alt sequence - # alternate_sequence_bases = [] - # for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - # genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - # if integer in list(alt_base_dict.keys()): - # alternate_sequence_bases.append(alt_base_dict[integer]) - # else: - # alternate_sequence_bases.append(ref_base_dict[integer]) - # alternate_sequence = ''.join(alternate_sequence_bases) - # alternate_sequence = alternate_sequence.replace('X', '') - # - # # Add the new alt to the gap fill variant and generate transcript variant - # genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - # hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, - # tx_gap_fill_variant.ac) - # - # # Set warning - # gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - # disparity_deletion_in[1] = [gap_size] - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - # gps = for_location_c.posedit.pos.start.base - 1 - # gpe = for_location_c.posedit.pos.start.base - # else: - # gps = for_location_c.posedit.pos.start.base - # gpe = for_location_c.posedit.pos.start.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # auto_info = auto_info + '%s' % (gap_position) - # - # else: - # if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # # In this instance, we have identified a transcript gap but the n. version of - # # the transcript variant but do not have a position which actually hits the gap, - # # so the variant likely spans the gap, and is not picked up by an offset. - # try: - # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - # g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # ng2 = self.variant.hn.normalize(g2) - # g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - # len(g3.posedit.edit.ref) - 1) - # try: - # c2 = self.validator.vm.g_to_t(g3, c1.ac) - # if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - # pass - # else: - # tx_hgvs_not_delins = c2 - # try: - # tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) - # except hgvs.exceptions.HGVSError: - # fn.exceptPass() - # except hgvs.exceptions.HGVSInvalidVariantError: - # fn.exceptPass() - # - # if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - # r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # # try: - # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # # except: - # # c2 = tx_hgvs_not_delins - # # c1 = copy.deepcopy(c2) - # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # # c1.posedit.pos.start.offset = 0 - # # c1.posedit.pos.end = c2.posedit.pos.start - # # c1.posedit.edit.ref = '' - # # c1.posedit.edit.alt = '' - # # if orientation != -1: - # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g1.posedit.edit.alt = g1.posedit.edit.ref - # # else: - # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2.posedit.edit.alt = g2.posedit.edit.ref - # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # # g3 = copy.deepcopy(g1) - # # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # # g3.posedit.edit.ref = reference - # # g3.posedit.edit.alt = alternate - # # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.start.base - # gpe = for_location_c.posedit.pos.start.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - # r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - # auto_info = auto_info + 'Genome position ' + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - # tx_hgvs_not_delins.ac) - # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # # try: - # # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # # except: - # # c1 = tx_hgvs_not_delins - # # c2 = copy.deepcopy(c1) - # # c2.posedit.pos.start = c1.posedit.pos.end - # # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - # # c2.posedit.pos.end.offset = 0 - # # c2.posedit.edit.ref = '' - # # c2.posedit.edit.alt = '' - # # if orientation != -1: - # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2.posedit.edit.alt = g2.posedit.edit.ref - # # else: - # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g1.posedit.edit.alt = g1.posedit.edit.ref - # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # # g3 = copy.deepcopy(g1) - # # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # # g3.posedit.edit.ref = reference - # # g3.posedit.edit.alt = alternate - # # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.end.base - # gpe = for_location_c.posedit.pos.end.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\-', - # str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - # r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # # try: - # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # # except: - # # c2 = tx_hgvs_not_delins - # # c1 = copy.deepcopy(c2) - # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # # c1.posedit.pos.start.offset = 0 - # # c1.posedit.pos.end = c2.posedit.pos.start - # # c1.posedit.edit.ref = '' - # # c1.posedit.edit.alt = '' - # # if orientation != -1: - # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g1.posedit.edit.alt = g1.posedit.edit.ref - # # else: - # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2.posedit.edit.alt = g2.posedit.edit.ref - # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # # g3 = copy.deepcopy(g1) - # # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # # g3.posedit.edit.ref = reference - # # g3.posedit.edit.alt = alternate - # # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.start.base - 1 - # gpe = for_location_c.posedit.pos.start.base - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - # r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - # auto_info = auto_info + 'Genome position ' + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - # tx_hgvs_not_delins.ac) - # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # try: - # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # c2 = copy.deepcopy(c1) - # c2.posedit.pos.start = c1.posedit.pos.end - # c2.posedit.pos.end.base = c1.posedit.pos.end.base - # c2.posedit.pos.end.offset = 0 - # c2.posedit.edit.ref = '' - # c2.posedit.edit.alt = '' - # g2 = self.validator.vm.t_to_g(c2, self.variant.hgvs_genomic.ac) - # c2 = self.validator.vm.g_to_t(g2, c2.ac) - # reference = c1.posedit.edit.ref + c2.posedit.edit.ref[1:] - # alternate = c1.posedit.edit.alt + c2.posedit.edit.ref[1:] - # c3 = copy.deepcopy(c1) - # c3.posedit.pos.end = c2.posedit.pos.end - # c3.posedit.edit.ref = '' # reference - # c3.posedit.edit.alt = alternate - # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.end.base - 1 - # gpe = for_location_c.posedit.pos.end.base - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # else: - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) + '\n' - # hgvs_refreshed_variant = tx_hgvs_not_delins - # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) # GAP IN THE CHROMOSOME elif disparity_deletion_in[0] == 'chromosome': @@ -834,42 +424,7 @@ def gapped_g_to_c(self, rel_var): # Edit the output hgvs_refreshed_variant = self.edit_output(hgvs_refreshed_variant, saved_hgvs_coding) - # if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - # hgvs_refreshed_variant.type)): - # hgvs_refreshed_variant = self.variant.evm.n_to_c(hgvs_refreshed_variant) - # else: - # pass - # try: - # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - # if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - # hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - # hgvs_refreshed_variant.posedit.edit.alt[-1]: - # hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - # 0:-1] - # hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - # 0:-1] - # hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - # elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - # hgvs_refreshed_variant.posedit.edit.ref[0] == \ - # hgvs_refreshed_variant.posedit.edit.alt[0]: - # hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - # 1:] - # hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - # 1:] - # hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - # except Exception as e: - # error = str(e) - # # Ensure the final variant is not intronic nor does it cross exon boundaries - # if re.match('Normalization of intronic variants is not supported', - # error) or re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # hgvs_refreshed_variant = saved_hgvs_coding - # else: - # pass - # fn.exceptPass() + # Send to empty nw_rel_var nw_rel_var.append(hgvs_refreshed_variant) @@ -889,7 +444,6 @@ def gapped_g_to_c(self, rel_var): } return data, nw_rel_var - def g_to_t_compensation(self, ori, hgvs_coding, rec_var): orientation = int(ori[0]['alt_strand']) hgvs_genomic_possibilities = [] @@ -1408,48 +962,16 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins, back=False) - # # move tx end base to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - # self.variant.primary_assembly, self.variant.hn) - # - # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str(saved_hgvs_coding.ac)) - - # tx_hgvs_not_delins = rn_tx_hgvs_not_delins + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) - # # move tx start base to previous available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - # self.variant.primary_assembly, self.variant.hn) - # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str(saved_hgvs_coding.ac)) - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # # else: - # pass # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + ## TODO: check this if should be move_tx_end_base_to_next rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 # Delete the ref rn_tx_hgvs_not_delins.posedit.edit.ref = '' @@ -1469,22 +991,6 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): str(saved_hgvs_coding.ac)) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) - # # move tx start base to previous available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - # self.variant.primary_assembly, self.variant.hn) - # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str(saved_hgvs_coding.ac)) - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass # Logic if len(hgvs_not_delins.posedit.edit.ref) < len( @@ -1619,367 +1125,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): suppress_c_normalization = 'true' # amend_RefSeqGene = 'true' # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant, gapped_transcripts, auto_info = self.transcript_disparity(tx_hgvs_not_delins, gapped_transcripts, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, disparity_deletion_in, auto_info, stored_hgvs_not_delins, hgvs_genomic, orientation, 2) - # if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): - # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # - # # Copy the current variant - # tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - # try: - # if tx_gap_fill_variant.posedit.edit.alt is None: - # tx_gap_fill_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - # tx_gap_fill_variant.posedit.pos.start) + '_' + str( - # tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - # tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - # tx_gap_fill_variant_delins_from_dup) - # - # # Identify which half of the NOT-intron the start position of the variant is in - # if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - # tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - # tx_gap_fill_variant.posedit.edit.alt = '' - # tx_gap_fill_variant.posedit.edit.ref = '' - # elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - # tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - # tx_gap_fill_variant.posedit.edit.alt = '' - # tx_gap_fill_variant.posedit.edit.ref = '' - # - # try: - # tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) - # except: - # fn.exceptPass() - # genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, - # reverse_normalized_hgvs_genomic.ac) - # genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - # - # try: - # c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # except Exception: - # c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - # genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, - # hgvs_genomic_5pr.ac) - # - # # Ensure an ALT exists - # try: - # if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - # genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - # genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - # genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - # genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - # genomic_gap_fill_variant_delins_from_dup) - # genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - # genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - # genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - # genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( - # genomic_gap_fill_variant_alt_delins_from_dup) - # - # # Correct insertion alts - # if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - # append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - # genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - # genomic_gap_fill_variant_alt.posedit.pos.end.base) - # genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - # 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - # append_ref[1] - # - # # Split the reference and replacing alt sequence into a dictionary - # reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - # if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - # alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - # else: - # # Deletions with no ins - # pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - # alternate_bases = [] - # for base in pre_alternate_bases: - # alternate_bases.append('X') - # - # # Create the dictionaries - # ref_start = genomic_gap_fill_variant.posedit.pos.start.base - # alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - # ref_base_dict = {} - # for base in reference_bases: - # ref_base_dict[ref_start] = str(base) - # ref_start = ref_start + 1 - # - # alt_base_dict = {} - # - # # NEED TO SEARCH FOR RANGE = and replace with interval_range - # # Need to search for int and replace with integer - # - # # Note, all variants will be forced into the format delete insert - # # Deleted bases in the ALT will be substituted for X - # for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - # genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - # if integer == alt_start: - # alt_base_dict[integer] = str(''.join(alternate_bases)) - # else: - # alt_base_dict[integer] = 'X' - # - # # Generate the alt sequence - # alternate_sequence_bases = [] - # for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - # genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - # if integer in list(alt_base_dict.keys()): - # alternate_sequence_bases.append(alt_base_dict[integer]) - # else: - # alternate_sequence_bases.append(ref_base_dict[integer]) - # alternate_sequence = ''.join(alternate_sequence_bases) - # alternate_sequence = alternate_sequence.replace('X', '') - # - # # Add the new alt to the gap fill variant and generate transcript variant - # genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - # hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, - # tx_gap_fill_variant.ac) - # - # # Set warning - # gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - # disparity_deletion_in[1] = [gap_size] - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - # gps = for_location_c.posedit.pos.start.base - 1 - # gpe = for_location_c.posedit.pos.start.base - # else: - # gps = for_location_c.posedit.pos.start.base - # gpe = for_location_c.posedit.pos.start.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # auto_info = auto_info + '%s' % (gap_position) - # - # else: - # if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # # In this instance, we have identified a transcript gap but the n. version of - # # the transcript variant but do not have a position which actually hits the gap, - # # so the variant likely spans the gap, and is not picked up by an offset. - # try: - # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - # g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # ng2 = self.variant.hn.normalize(g2) - # g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - # len(g3.posedit.edit.ref) - 1) - # try: - # c2 = self.validator.vm.g_to_t(g3, c1.ac) - # if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - # pass - # else: - # tx_hgvs_not_delins = c2 - # try: - # tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) - # except hgvs.exceptions.HGVSError: - # fn.exceptPass() - # except hgvs.exceptions.HGVSInvalidVariantError: - # fn.exceptPass() - # - # if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - # r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # # try: - # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # # except: - # # c2 = tx_hgvs_not_delins - # # c1 = copy.deepcopy(c2) - # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # # c1.posedit.pos.start.offset = 0 - # # c1.posedit.pos.end = c2.posedit.pos.start - # # c1.posedit.edit.ref = '' - # # c1.posedit.edit.alt = '' - # # if orientation != -1: - # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g1.posedit.edit.alt = g1.posedit.edit.ref - # # else: - # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2.posedit.edit.alt = g2.posedit.edit.ref - # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # # g3 = copy.deepcopy(g1) - # # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # # g3.posedit.edit.ref = reference - # # g3.posedit.edit.alt = alternate - # # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.start.base - # gpe = for_location_c.posedit.pos.start.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - # r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - # auto_info = auto_info + 'Genome position ' + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - # tx_hgvs_not_delins.ac) - # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # # try: - # # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # # except: - # # c1 = tx_hgvs_not_delins - # # c2 = copy.deepcopy(c1) - # # c2.posedit.pos.start = c1.posedit.pos.end - # # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - # # c2.posedit.pos.end.offset = 0 - # # c2.posedit.edit.ref = '' - # # c2.posedit.edit.alt = '' - # # if orientation != -1: - # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2.posedit.edit.alt = g2.posedit.edit.ref - # # else: - # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g1.posedit.edit.alt = g1.posedit.edit.ref - # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # # g3 = copy.deepcopy(g1) - # # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # # g3.posedit.edit.ref = reference - # # g3.posedit.edit.alt = alternate - # # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.end.base - # gpe = for_location_c.posedit.pos.end.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\-', - # str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - # r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # # try: - # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # # except: - # # c2 = tx_hgvs_not_delins - # # c1 = copy.deepcopy(c2) - # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # # c1.posedit.pos.start.offset = 0 - # # c1.posedit.pos.end = c2.posedit.pos.start - # # c1.posedit.edit.ref = '' - # # c1.posedit.edit.alt = '' - # # if orientation != -1: - # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g1.posedit.edit.alt = g1.posedit.edit.ref - # # else: - # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2.posedit.edit.alt = g2.posedit.edit.ref - # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # # g3 = copy.deepcopy(g1) - # # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # # g3.posedit.edit.ref = reference - # # g3.posedit.edit.alt = alternate - # # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.start.base - 1 - # gpe = for_location_c.posedit.pos.start.base - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - # r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - # auto_info = auto_info + 'Genome position ' + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - # tx_hgvs_not_delins.ac) - # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # # try: - # # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # # except: - # # c1 = tx_hgvs_not_delins - # # c2 = copy.deepcopy(c1) - # # c2.posedit.pos.start = c1.posedit.pos.end - # # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - # # c2.posedit.pos.end.offset = 0 - # # c2.posedit.edit.ref = '' - # # c2.posedit.edit.alt = '' - # # if orientation != -1: - # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2.posedit.edit.alt = g2.posedit.edit.ref - # # else: - # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g1.posedit.edit.alt = g1.posedit.edit.ref - # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # # g3 = copy.deepcopy(g1) - # # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # # g3.posedit.edit.ref = reference - # # g3.posedit.edit.alt = alternate - # # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.end.base - 1 - # gpe = for_location_c.posedit.pos.end.base - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # else: - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) + '\n' - # tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( - # tx_hgvs_not_delins.posedit.edit.ref) - 1 - # hgvs_refreshed_variant = tx_hgvs_not_delins - # GAP IN THE CHROMOSOME elif disparity_deletion_in[0] == 'chromosome': suppress_c_normalization = 'true' @@ -2176,83 +1323,20 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins, back=False) - # # move tx end base to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - # self.variant.primary_assembly, self.variant.hn) - # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str(saved_hgvs_coding.ac)) + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # # move tx start base to previous available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - # self.variant.primary_assembly, self.variant.hn) - # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str(saved_hgvs_coding.ac)) - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins) - # # move tx end base back to next available non-offset base - # # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base - 1 - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # # Delete the ref - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # # Add the additional base to the ALT - # start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - # end = rn_tx_hgvs_not_delins.posedit.pos.end.base - # ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - # rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - # self.variant.primary_assembly, self.variant.hn) - # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str(saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) - # # move tx start base to previous available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - # self.variant.primary_assembly, self.variant.hn) - # rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str(saved_hgvs_coding.ac)) - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - pass # Logic if len(hgvs_not_delins.posedit.edit.ref) < len( @@ -2365,368 +1449,9 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant, gapped_transcripts, auto_info = self.transcript_disparity(tx_hgvs_not_delins, gapped_transcripts, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, disparity_deletion_in, auto_info, stored_hgvs_not_delins, hgvs_genomic, orientation, 3) - # if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): - # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # - # # Copy the current variant - # tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - # try: - # if tx_gap_fill_variant.posedit.edit.alt is None: - # tx_gap_fill_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - # tx_gap_fill_variant.posedit.pos.start) + '_' + str( - # tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - # tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - # tx_gap_fill_variant_delins_from_dup) - # - # # Identify which half of the NOT-intron the start position of the variant is in - # if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - # tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - # tx_gap_fill_variant.posedit.edit.alt = '' - # tx_gap_fill_variant.posedit.edit.ref = '' - # elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - # tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') - # tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - # tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') - # tx_gap_fill_variant.posedit.edit.alt = '' - # tx_gap_fill_variant.posedit.edit.ref = '' - # - # try: - # tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) - # except: - # fn.exceptPass() - # genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, - # reverse_normalized_hgvs_genomic.ac) - # genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - # - # try: - # c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # except Exception: - # c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - # genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, - # hgvs_genomic_5pr.ac) - # - # # Ensure an ALT exists - # try: - # if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - # genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - # genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - # genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - # genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - # genomic_gap_fill_variant_delins_from_dup) - # genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - # genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - # genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - # genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( - # genomic_gap_fill_variant_alt_delins_from_dup) - # - # # Correct insertion alts - # if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - # append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - # genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - # genomic_gap_fill_variant_alt.posedit.pos.end.base) - # genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - # 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - # append_ref[1] - # - # # Split the reference and replacing alt sequence into a dictionary - # reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - # if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - # alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.alt) - # else: - # # Deletions with no ins - # pre_alternate_bases = list(genomic_gap_fill_variant_alt.posedit.edit.ref) - # alternate_bases = [] - # for base in pre_alternate_bases: - # alternate_bases.append('X') - # - # # Create the dictionaries - # ref_start = genomic_gap_fill_variant.posedit.pos.start.base - # alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - # ref_base_dict = {} - # for base in reference_bases: - # ref_base_dict[ref_start] = str(base) - # ref_start = ref_start + 1 - # - # alt_base_dict = {} - # - # # NEED TO SEARCH FOR RANGE = and replace with interval_range - # # Need to search for int and replace with integer - # - # # Note, all variants will be forced into the format delete insert - # # Deleted bases in the ALT will be substituted for X - # for integer in range(genomic_gap_fill_variant_alt.posedit.pos.start.base, - # genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - # if integer == alt_start: - # alt_base_dict[integer] = str(''.join(alternate_bases)) - # else: - # alt_base_dict[integer] = 'X' - # - # # Generate the alt sequence - # alternate_sequence_bases = [] - # for integer in range(genomic_gap_fill_variant.posedit.pos.start.base, - # genomic_gap_fill_variant.posedit.pos.end.base + 1, 1): - # if integer in list(alt_base_dict.keys()): - # alternate_sequence_bases.append(alt_base_dict[integer]) - # else: - # alternate_sequence_bases.append(ref_base_dict[integer]) - # alternate_sequence = ''.join(alternate_sequence_bases) - # alternate_sequence = alternate_sequence.replace('X', '') - # - # # Add the new alt to the gap fill variant and generate transcript variant - # genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - # hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, - # tx_gap_fill_variant.ac) - # - # # Set warning - # gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - # disparity_deletion_in[1] = [gap_size] - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - # gps = for_location_c.posedit.pos.start.base - 1 - # gpe = for_location_c.posedit.pos.start.base - # else: - # gps = for_location_c.posedit.pos.start.base - # gpe = for_location_c.posedit.pos.start.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # auto_info = auto_info + '%s' % (gap_position) - # - # else: - # if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # # In this instance, we have identified a transcript gap but the n. version of - # # the transcript variant but do not have a position which actually hits the gap, - # # so the variant likely spans the gap, and is not picked up by an offset. - # try: - # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - # g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # ng2 = self.variant.hn.normalize(g2) - # g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - # len(g3.posedit.edit.ref) - 1) - # try: - # c2 = self.validator.vm.g_to_t(g3, c1.ac) - # if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - # pass - # else: - # tx_hgvs_not_delins = c2 - # try: - # tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) - # except hgvs.exceptions.HGVSError: - # fn.exceptPass() - # except hgvs.exceptions.HGVSInvalidVariantError: - # fn.exceptPass() - # - # if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - # r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # # try: - # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # # except: - # # c2 = tx_hgvs_not_delins - # # c1 = copy.deepcopy(c2) - # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # # c1.posedit.pos.start.offset = 0 - # # c1.posedit.pos.end = c2.posedit.pos.start - # # c1.posedit.edit.ref = '' - # # c1.posedit.edit.alt = '' - # # if orientation != -1: - # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g1.posedit.edit.alt = g1.posedit.edit.ref - # # else: - # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2.posedit.edit.alt = g2.posedit.edit.ref - # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # # g3 = copy.deepcopy(g1) - # # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # # g3.posedit.edit.ref = reference - # # g3.posedit.edit.alt = alternate - # # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.start.base - # gpe = for_location_c.posedit.pos.start.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - # r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - # auto_info = auto_info + 'Genome position ' + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - # tx_hgvs_not_delins.ac) - # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # # try: - # # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # # except: - # # c1 = tx_hgvs_not_delins - # # c2 = copy.deepcopy(c1) - # # c2.posedit.pos.start = c1.posedit.pos.end - # # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - # # c2.posedit.pos.end.offset = 0 - # # c2.posedit.edit.ref = '' - # # c2.posedit.edit.alt = '' - # # if orientation != -1: - # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2.posedit.edit.alt = g2.posedit.edit.ref - # # else: - # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g1.posedit.edit.alt = g1.posedit.edit.ref - # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # # g3 = copy.deepcopy(g1) - # # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # # g3.posedit.edit.ref = reference - # # g3.posedit.edit.alt = alternate - # # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.end.base - # gpe = for_location_c.posedit.pos.end.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\-', - # str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - # r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # # try: - # # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # # except: - # # c2 = tx_hgvs_not_delins - # # c1 = copy.deepcopy(c2) - # # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # # c1.posedit.pos.start.offset = 0 - # # c1.posedit.pos.end = c2.posedit.pos.start - # # c1.posedit.edit.ref = '' - # # c1.posedit.edit.alt = '' - # # if orientation != -1: - # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g1.posedit.edit.alt = g1.posedit.edit.ref - # # else: - # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2.posedit.edit.alt = g2.posedit.edit.ref - # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # # g3 = copy.deepcopy(g1) - # # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # # g3.posedit.edit.ref = reference - # # g3.posedit.edit.alt = alternate - # # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.start.base - 1 - # gpe = for_location_c.posedit.pos.start.base - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - # r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - # auto_info = auto_info + 'Genome position ' + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - # tx_hgvs_not_delins.ac) - # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # # try: - # # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # # except: - # # c1 = tx_hgvs_not_delins - # # c2 = copy.deepcopy(c1) - # # c2.posedit.pos.start = c1.posedit.pos.end - # # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - # # c2.posedit.pos.end.offset = 0 - # # c2.posedit.edit.ref = '' - # # c2.posedit.edit.alt = '' - # # if orientation != -1: - # # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2.posedit.edit.alt = g2.posedit.edit.ref - # # else: - # # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # # g1.posedit.edit.alt = g1.posedit.edit.ref - # # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # # g3 = copy.deepcopy(g1) - # # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # # g3.posedit.edit.ref = reference - # # g3.posedit.edit.alt = alternate - # # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.end.base - 1 - # gpe = for_location_c.posedit.pos.end.base - # gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # else: - # auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) + '\n' - # hgvs_refreshed_variant = tx_hgvs_not_delins - # gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': # Set warning variables gap_position = '' @@ -2745,41 +1470,6 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr # Edit the output hgvs_refreshed_variant = self.edit_output(hgvs_refreshed_variant, saved_hgvs_coding) - # if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - # hgvs_refreshed_variant.type)): - # hgvs_refreshed_variant = self.variant.evm.n_to_c(hgvs_refreshed_variant) - # else: - # pass - # try: - # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - # if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - # hgvs_refreshed_variant.posedit.edit.ref[-1] == \ - # hgvs_refreshed_variant.posedit.edit.alt[-1]: - # hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - # 0:-1] - # hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - # 0:-1] - # hgvs_refreshed_variant.posedit.pos.end.base = hgvs_refreshed_variant.posedit.pos.end.base - 1 - # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - # elif hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ - # hgvs_refreshed_variant.posedit.edit.ref[0] == \ - # hgvs_refreshed_variant.posedit.edit.alt[0]: - # hgvs_refreshed_variant.posedit.edit.ref = hgvs_refreshed_variant.posedit.edit.ref[ - # 1:] - # hgvs_refreshed_variant.posedit.edit.alt = hgvs_refreshed_variant.posedit.edit.alt[ - # 1:] - # hgvs_refreshed_variant.posedit.pos.start.base = hgvs_refreshed_variant.posedit.pos.start.base + 1 - # hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) - # except Exception as e: - # error = str(e) - # # Ensure the final variant is not intronic nor does it cross exon boundaries - # if re.match('Normalization of intronic variants is not supported', - # error) or re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # hgvs_refreshed_variant = saved_hgvs_coding - # else: - # pass # Sort out equality to equality c. events where the code will add 2 additional bases if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): @@ -3158,32 +1848,7 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n tx_hgvs_not_delins.ac) non_valid_caution = 'true' hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # try: - # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c2 = tx_hgvs_not_delins - # c1 = copy.deepcopy(c2) - # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # c1.posedit.pos.start.offset = 0 - # c1.posedit.pos.end = c2.posedit.pos.start - # c1.posedit.edit.ref = '' - # c1.posedit.edit.alt = '' - # if orientation != -1: - # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # else: - # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 + # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): @@ -3203,32 +1868,7 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) non_valid_caution = 'true' hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # try: - # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # c2 = copy.deepcopy(c1) - # c2.posedit.pos.start = c1.posedit.pos.end - # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - # c2.posedit.pos.end.offset = 0 - # c2.posedit.edit.ref = '' - # c2.posedit.edit.alt = '' - # if orientation != -1: - # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # else: - # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 + # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): @@ -3248,32 +1888,7 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n tx_hgvs_not_delins.ac) non_valid_caution = 'true' hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # try: - # c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c2 = tx_hgvs_not_delins - # c1 = copy.deepcopy(c2) - # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # c1.posedit.pos.start.offset = 0 - # c1.posedit.pos.end = c2.posedit.pos.start - # c1.posedit.edit.ref = '' - # c1.posedit.edit.alt = '' - # if orientation != -1: - # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # else: - # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 + # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): @@ -3316,32 +1931,6 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n hgvs_refreshed_variant = c3 else: hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) - # try: - # c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # c2 = copy.deepcopy(c1) - # c2.posedit.pos.start = c1.posedit.pos.end - # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - # c2.posedit.pos.end.offset = 0 - # c2.posedit.edit.ref = '' - # c2.posedit.edit.alt = '' - # if orientation != -1: - # g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # else: - # g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = self.validator.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) From 8ba975b016e81093c77d329fb450ce20f818aebc Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 25 Apr 2019 16:27:15 +0100 Subject: [PATCH 070/223] Added logic_check method but not implementing it yet --- VariantValidator/modules/gapped_mapping.py | 67 +++++++++++++++++++++- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 3acc50e9..ddaa525c 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -1039,8 +1039,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): internal_possibility.posedit.edit.ref): gap_length = len(internal_possibility.posedit.edit.ref) - len( hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] + re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] hgvs_not_delins = internal_possibility hgvs_genomic_5pr = internal_possibility break @@ -1352,7 +1351,6 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr else: re_capture_tx_variant = [] for internal_possibility in hgvs_genomic_possibilities: - if internal_possibility == '': continue @@ -1996,3 +1994,66 @@ def edit_output(self, hgvs_refreshed_variant, saved_hgvs_coding): else: pass return hgvs_refreshed_variant + + def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_genomic_possibilities, hgvs_coding): + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for internal_possibility in hgvs_genomic_possibilities: + if internal_possibility == '': + continue + + hgvs_t_possibility = self.validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = self.validator.vm.c_to_n(hgvs_t_possibility) + except: + fn.exceptPass() + ins_ref = self.validator.sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = self.validator.vm.n_to_c(hgvs_t_possibility) + except: + fn.exceptPass() + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if internal_possibility.posedit.edit.type == 'ins': + ins_ref = self.validator.sf.fetch_seq(internal_possibility.ac, + internal_possibility.posedit.pos.start.base - 1, + internal_possibility.posedit.pos.end.base) + internal_possibility.posedit.edit.ref = ins_ref + internal_possibility.posedit.edit.alt = ins_ref[ + 0] + internal_possibility.posedit.edit.alt + \ + ins_ref[1] + + if len(hgvs_t_possibility.posedit.edit.ref) < len( + internal_possibility.posedit.edit.ref): + gap_length = len(internal_possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] + hgvs_not_delins = internal_possibility + hgvs_genomic_5pr = internal_possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + return disparity_deletion_in, tx_hgvs_not_delins, hgvs_not_delins, hgvs_genomic_5pr From d1c845a94ee98b9e91a3a2b8c025de81d3abd86f Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 25 Apr 2019 17:00:52 +0100 Subject: [PATCH 071/223] Moved some variables into attributes so don't have to be passed around within the gap mapping obj --- VariantValidator/modules/gapped_mapping.py | 261 +++++++++++---------- VariantValidator/modules/mappers.py | 3 +- 2 files changed, 134 insertions(+), 130 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index ddaa525c..841a06ba 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -14,6 +14,12 @@ def __init__(self, variant, validator): self.variant = variant self.validator = validator + self.gapped_transcripts = '' + self.auto_info = '' + self.orientation = None + self.hgvs_genomic_possibilities = [] + self.disparity_deletion_in = [] + def gapped_g_to_c(self, rel_var): """ Gap aware projection from g. to c. @@ -22,9 +28,9 @@ def gapped_g_to_c(self, rel_var): # Set variables for problem specific warnings gapped_alignment_warning = '' corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' - disparity_deletion_in = [] + self.gapped_transcripts = '' + self.auto_info = '' + self.disparity_deletion_in = [] # Create a pseudo VCF so that normalization can be applied and a delins can be generated hgvs_genomic_variant = self.variant.hgvs_genomic @@ -113,10 +119,10 @@ def gapped_g_to_c(self, rel_var): # Get orientation of the gene wrt genome and a list of exons mapped to the genome ori = self.validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, alt_aln_method=self.validator.alt_aln_method) - orientation = int(ori[0]['alt_strand']) + self.orientation = int(ori[0]['alt_strand']) intronic_variant = 'false' - if orientation == -1: + if self.orientation == -1: # position genomic at its most 5 prime position try: query_genomic = self.variant.reverse_normalizer.normalize(self.variant.hgvs_genomic) @@ -138,7 +144,7 @@ def gapped_g_to_c(self, rel_var): else: hgvs_seek_var = saved_hgvs_coding - elif orientation != -1: + elif self.orientation != -1: # position genomic at its most 3 prime position try: query_genomic = self.variant.hn.normalize(self.variant.hgvs_genomic) @@ -222,7 +228,7 @@ def gapped_g_to_c(self, rel_var): hgvs_from_5n_g = self.variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] + self.disparity_deletion_in = ['false', 'false'] if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, hgvs_genomic_5pr, @@ -268,12 +274,12 @@ def gapped_g_to_c(self, rel_var): rn_tx_hgvs_not_delins.posedit.edit.ref): gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] + self.disparity_deletion_in = ['chromosome', gap_length] elif len(hgvs_not_delins.posedit.edit.ref) > len( rn_tx_hgvs_not_delins.posedit.edit.ref): gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] + self.disparity_deletion_in = ['transcript', gap_length] else: hgvs_stash_t = self.validator.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) if len(stash_hgvs_not_delins.posedit.edit.ref) > len( @@ -285,14 +291,14 @@ def gapped_g_to_c(self, rel_var): else: gap_length = len(stash_hgvs_not_delins.posedit.edit.ref) - len( hgvs_stash_t.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] + self.disparity_deletion_in = ['transcript', gap_length] try: tx_hgvs_not_delins = self.validator.vm.c_to_n(hgvs_stash_t) except: tx_hgvs_not_delins = hgvs_stash_t hgvs_not_delins = stash_hgvs_not_delins elif hgvs_stash_t.posedit.pos.start.offset != 0 or hgvs_stash_t.posedit.pos.end.offset != 0: - disparity_deletion_in = ['transcript', 'Requires Analysis'] + self.disparity_deletion_in = ['transcript', 'Requires Analysis'] try: tx_hgvs_not_delins = self.validator.vm.c_to_n(hgvs_stash_t) except: @@ -308,7 +314,7 @@ def gapped_g_to_c(self, rel_var): except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': hgvs_not_delins = saved_hgvs_coding - disparity_deletion_in = ['false', 'false'] + self.disparity_deletion_in = ['false', 'false'] logger.warning(str(e)) try: self.variant.hn.normalize(tx_hgvs_not_delins) @@ -322,10 +328,10 @@ def gapped_g_to_c(self, rel_var): 'Unsupported normalization of variants spanning the exon-intron boundary', error): hgvs_not_delins = saved_hgvs_coding - disparity_deletion_in = ['false', 'false'] + self.disparity_deletion_in = ['false', 'false'] elif re.match('Normalization of intronic variants is not supported', error): # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] + self.disparity_deletion_in = ['transcript', 'Requires Analysis'] logger.warning(error) # Pre-processing of tx_hgvs_not_delins try: @@ -339,28 +345,28 @@ def gapped_g_to_c(self, rel_var): tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': + if self.disparity_deletion_in[0] == 'transcript': gap_position = '' gapped_alignment_warning = str( hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant, gapped_transcripts, auto_info = self.transcript_disparity(tx_hgvs_not_delins, gapped_transcripts, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, disparity_deletion_in, auto_info, stored_hgvs_not_delins, self.variant.hgvs_genomic, orientation, 1) + hgvs_refreshed_variant = self.transcript_disparity(tx_hgvs_not_delins, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, stored_hgvs_not_delins, self.variant.hgvs_genomic, 1) # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': + elif self.disparity_deletion_in[0] == 'chromosome': # Set warning variables gap_position = '' gapped_alignment_warning = str( hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly hgvs_refreshed_variant = tx_hgvs_not_delins # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + self.auto_info = self.auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(self.disparity_deletion_in[ 1]) + ' transcript base(s) that fail to align to chromosome ' + str( self.variant.hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' + self.gapped_transcripts = self.gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' else: # Try the push hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) @@ -409,7 +415,7 @@ def gapped_g_to_c(self, rel_var): gapped_alignment_warning = str( hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly hgvs_refreshed_variant = tx_hard_right - gapped_transcripts = gapped_transcripts + str(tx_hard_right.ac) + ' ' + self.gapped_transcripts = self.gapped_transcripts + str(tx_hard_right.ac) + ' ' elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len( tx_hard_left.posedit.edit.ref): tx_hard_left = self.variant.hn.normalize(tx_hard_left) @@ -417,7 +423,7 @@ def gapped_g_to_c(self, rel_var): gapped_alignment_warning = str( hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly hgvs_refreshed_variant = tx_hard_left - gapped_transcripts = gapped_transcripts + str(tx_hard_left.ac) + ' ' + self.gapped_transcripts = self.gapped_transcripts + str(tx_hard_left.ac) + ' ' else: # Keep the same by re-setting rel_var hgvs_refreshed_variant = saved_hgvs_coding @@ -438,21 +444,21 @@ def gapped_g_to_c(self, rel_var): data = { 'gapped_alignment_warning': gapped_alignment_warning, 'corrective_action_taken': corrective_action_taken, - 'auto_info': auto_info, - 'disparity_deletion_in': disparity_deletion_in, - 'gapped_transcripts': gapped_transcripts + 'auto_info': self.auto_info, + 'disparity_deletion_in': self.disparity_deletion_in, + 'gapped_transcripts': self.gapped_transcripts } return data, nw_rel_var def g_to_t_compensation(self, ori, hgvs_coding, rec_var): - orientation = int(ori[0]['alt_strand']) - hgvs_genomic_possibilities = [] + self.orientation = int(ori[0]['alt_strand']) + self.hgvs_genomic_possibilities = [] hgvs_genomic = self.validator.myevm_t_to_g(hgvs_coding, self.variant.no_norm_evm, self.variant.primary_assembly, self.variant.hn) logger.warning('g_to_t gap code 1 active') rn_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: + self.hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if self.orientation != -1: try: chromosome_normalized_hgvs_coding = self.variant.reverse_normalizer.normalize(hgvs_coding) except hgvs.exceptions.HGVSUnsupportedOperationError as e: @@ -465,7 +471,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): most_3pr_hgvs_genomic = self.validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, self.variant.no_norm_evm, self.variant.hn) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + self.hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) # Push from side to side to try pick up odd placements # MAKE A NO NORM HGVS2VCF @@ -518,9 +524,9 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( len(g_alt) - ( stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) + self.hgvs_genomic_possibilities.append(stash_genomic) else: - hgvs_genomic_possibilities.append('') + self.hgvs_genomic_possibilities.append('') elif test_stash_tx_right.posedit.edit.type == 'identity': reform_ident = str(test_stash_tx_right).split(':')[0] reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( @@ -532,18 +538,18 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): error = str(e) if re.search('spanning the exon-intron boundary', error): stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append('') + self.hgvs_genomic_possibilities.append('') else: stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) + self.hgvs_genomic_possibilities.append(stash_genomic) else: try: self.variant.hn.normalize(test_stash_tx_right) except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') + self.hgvs_genomic_possibilities.append('') else: stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) + self.hgvs_genomic_possibilities.append(stash_genomic) except hgvs.exceptions.HGVSError as e: fn.exceptPass() # Intronic positions not supported. Will cause a Value Error @@ -601,9 +607,9 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( len(g_alt) - ( stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) + self.hgvs_genomic_possibilities.append(stash_genomic) else: - hgvs_genomic_possibilities.append('') + self.hgvs_genomic_possibilities.append('') elif test_stash_tx_left.posedit.edit.type == 'identity': reform_ident = str(test_stash_tx_left).split(':')[0] reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( @@ -615,18 +621,18 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): error = str(e) if re.search('spanning the exon-intron boundary', error): stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') + self.hgvs_genomic_possibilities.append('') else: stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) + self.hgvs_genomic_possibilities.append(stash_genomic) else: try: self.variant.hn.normalize(test_stash_tx_left) except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') + self.hgvs_genomic_possibilities.append('') else: stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) + self.hgvs_genomic_possibilities.append(stash_genomic) except hgvs.exceptions.HGVSError as e: fn.exceptPass() except ValueError: @@ -732,16 +738,16 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) + self.hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) + self.hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) except hgvs.exceptions.HGVSUnsupportedOperationError as e: fn.exceptPass() logger.info('\nGENOMIC POSSIBILITIES') - for possibility in hgvs_genomic_possibilities: + for possibility in self.hgvs_genomic_possibilities: if possibility == '': logger.info('X') else: @@ -750,11 +756,11 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): logger.info('\n') # Set variables for problem specific warnings - gapped_transcripts = '' - auto_info = '' + self.gapped_transcripts = '' + self.auto_info = '' # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] + self.disparity_deletion_in = ['false', 'false'] # Loop through to see if a gap can be located # Set the variables required for corrective normalization @@ -762,7 +768,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): suppress_c_normalization = 'false' # Applies to boundary crossing normalization # Copy a version of hgvs_genomic_possibilities - for possibility in hgvs_genomic_possibilities: + for possibility in self.hgvs_genomic_possibilities: possibility_counter = possibility_counter + 1 # Loop out stash possibilities which will not spot gaps so are empty @@ -841,7 +847,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): hgvs_coding.ac) # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: + if self.orientation == -1: # position genomic at its most 5 prime position try: query_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) @@ -861,7 +867,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): else: hgvs_seek_var = saved_hgvs_coding - elif orientation != -1: + elif self.orientation != -1: # position genomic at its most 3 prime position try: query_genomic = self.variant.hn.normalize(hgvs_genomic) @@ -997,15 +1003,15 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): rn_tx_hgvs_not_delins.posedit.edit.ref): gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] + self.disparity_deletion_in = ['chromosome', gap_length] elif len(hgvs_not_delins.posedit.edit.ref) > len( rn_tx_hgvs_not_delins.posedit.edit.ref): gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] + self.disparity_deletion_in = ['transcript', gap_length] else: re_capture_tx_variant = [] - for internal_possibility in hgvs_genomic_possibilities: + for internal_possibility in self.hgvs_genomic_possibilities: if internal_possibility == '': continue @@ -1049,7 +1055,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) except: tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] + self.disparity_deletion_in = re_capture_tx_variant[0:-1] else: pass @@ -1074,15 +1080,15 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): continue elif re.match('Normalization of intronic variants is not supported', error): # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] + self.disparity_deletion_in = ['transcript', 'Requires Analysis'] # amend_RefSeqGene = 'false' # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': + if self.disparity_deletion_in[0] == 'transcript': hgvs_genomic = hgvs_not_delins # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( + if self.disparity_deletion_in[0] == 'false' and ( possibility_counter == 3 or possibility_counter == 4): rg = self.variant.reverse_normalizer.normalize(hgvs_not_delins) rtx = self.validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) @@ -1104,7 +1110,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): tx_hgvs_not_delins = self.validator.vm.c_to_n(ftx) except Exception: tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] + self.disparity_deletion_in = ['transcript', 'Requires Analysis'] # Pre-processing of tx_hgvs_not_delins try: @@ -1119,15 +1125,15 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': + if self.disparity_deletion_in[0] == 'transcript': # Suppress intron boundary crossing due to non-intron intron based c. seq annotations suppress_c_normalization = 'true' # amend_RefSeqGene = 'true' # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant, gapped_transcripts, auto_info = self.transcript_disparity(tx_hgvs_not_delins, gapped_transcripts, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, disparity_deletion_in, auto_info, stored_hgvs_not_delins, hgvs_genomic, orientation, 2) + hgvs_refreshed_variant = self.transcript_disparity(tx_hgvs_not_delins, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, stored_hgvs_not_delins, hgvs_genomic, 2) # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': + elif self.disparity_deletion_in[0] == 'chromosome': suppress_c_normalization = 'true' # amend_RefSeqGene = 'true' if possibility_counter == 3: @@ -1137,9 +1143,9 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): else: hgvs_refreshed_variant = chromosome_normalized_hgvs_coding # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + self.auto_info = self.auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ + self.disparity_deletion_in[ 1]) + ' transcript base(s) that fail to align to chromosome ' + str( hgvs_genomic.ac) + '\n' else: @@ -1202,12 +1208,12 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): no_normalized_c = 'false' # Break if gap has been detected - if disparity_deletion_in[0] != 'false': + if self.disparity_deletion_in[0] != 'false': break # Warn user about gapping - if auto_info != '': - info_lines = auto_info.split('\n') + if self.auto_info != '': + info_lines = self.auto_info.split('\n') info_keys = {} for information in info_lines: info_keys[information] = '' @@ -1216,11 +1222,11 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + self.variant.primary_assembly) for ky in list(info_keys.keys()): info_out.append(ky) - auto_info = '\n'.join(info_out) - auto_info = auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' - auto_info = str(auto_info.replace('\n', ': ')) - self.variant.warnings += ': ' + str(auto_info) - logger.warning(str(auto_info)) + self.auto_info = '\n'.join(info_out) + self.auto_info = self.auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' + self.auto_info = str(self.auto_info.replace('\n', ': ')) + self.variant.warnings += ': ' + str(self.auto_info) + logger.warning(str(self.auto_info)) # Normailse hgvs_genomic try: hgvs_genomic = self.variant.hn.normalize(hgvs_genomic) @@ -1229,7 +1235,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): error = str(e) if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': + self.disparity_deletion_in[0] == 'chromosome': if hgvs_genomic.posedit.edit.type == 'delins': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base @@ -1254,10 +1260,9 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): print('in gapped_mapping', hgvs_coding) - return hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding, hgvs_genomic_possibilities + return hgvs_genomic, self.gapped_transcripts, self.auto_info, suppress_c_normalization, hgvs_coding, self.hgvs_genomic_possibilities - def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_transcripts, - hgvs_genomic_possibilities, auto_info): + def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): logger.warning('g_to_t gap code 2 active') hgvs_genomic_variant = hgvs_genomic @@ -1280,7 +1285,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr pos = str(pos) stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - orientation = int(ori[0]['alt_strand']) + self.orientation = int(ori[0]['alt_strand']) saved_hgvs_coding = copy.deepcopy(hgvs_coding) # is it in an exon? @@ -1299,7 +1304,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr hgvs_from_5n_g = self.variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - disparity_deletion_in = ['false', 'false'] + self.disparity_deletion_in = ['false', 'false'] if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, hgvs_genomic_5pr, saved_hgvs_coding) @@ -1342,15 +1347,15 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr rn_tx_hgvs_not_delins.posedit.edit.ref): gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] + self.disparity_deletion_in = ['chromosome', gap_length] elif len(hgvs_not_delins.posedit.edit.ref) > len( rn_tx_hgvs_not_delins.posedit.edit.ref): gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] + self.disparity_deletion_in = ['transcript', gap_length] else: re_capture_tx_variant = [] - for internal_possibility in hgvs_genomic_possibilities: + for internal_possibility in self.hgvs_genomic_possibilities: if internal_possibility == '': continue @@ -1394,7 +1399,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) except: tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] + self.disparity_deletion_in = re_capture_tx_variant[0:-1] else: pass @@ -1420,13 +1425,13 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr return True elif re.match('Normalization of intronic variants is not supported', error): # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] + self.disparity_deletion_in = ['transcript', 'Requires Analysis'] if hard_fail == 'true': - disparity_deletion_in = ['false', 'false'] + self.disparity_deletion_in = ['false', 'false'] # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': + if self.disparity_deletion_in[0] == 'transcript': hgvs_genomic = hgvs_not_delins # Pre-processing of tx_hgvs_not_delins @@ -1441,27 +1446,27 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic, gapped_tr tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED - if disparity_deletion_in[0] == 'transcript': + if self.disparity_deletion_in[0] == 'transcript': gap_position = '' gapped_alignment_warning = str( hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant, gapped_transcripts, auto_info = self.transcript_disparity(tx_hgvs_not_delins, gapped_transcripts, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, disparity_deletion_in, auto_info, stored_hgvs_not_delins, hgvs_genomic, orientation, 3) + hgvs_refreshed_variant = self.transcript_disparity(tx_hgvs_not_delins, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, stored_hgvs_not_delins, hgvs_genomic, 3) # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': + elif self.disparity_deletion_in[0] == 'chromosome': # Set warning variables gap_position = '' gapped_alignment_warning = str( hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly hgvs_refreshed_variant = tx_hgvs_not_delins # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(disparity_deletion_in[ + self.auto_info = self.auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(self.disparity_deletion_in[ 1]) + ' transcript base(s) that fail to align to chromosome ' + str( hgvs_genomic.ac) + '\n' - gapped_transcripts = gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' + self.gapped_transcripts = self.gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' else: # Keep the same by re-setting rel_var hgvs_refreshed_variant = saved_hgvs_coding @@ -1604,7 +1609,7 @@ def move_tx_start_base_to_previous_nonoffset(self, rn_tx_hgvs_not_delins, saved_ return rn_tx_hgvs_not_delins, hgvs_not_delins - def c2_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic, orientation): + def c2_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic): try: c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) except: @@ -1615,7 +1620,7 @@ def c2_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic, orientation): c1.posedit.pos.end = c2.posedit.pos.start c1.posedit.edit.ref = '' c1.posedit.edit.alt = '' - if orientation != -1: + if self.orientation != -1: g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) g1.posedit.edit.alt = g1.posedit.edit.ref @@ -1634,7 +1639,7 @@ def c2_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic, orientation): return hgvs_refreshed_variant - def c1_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic, orientation): + def c1_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic): try: c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) except: @@ -1645,7 +1650,7 @@ def c1_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic, orientation): c2.posedit.pos.end.offset = 0 c2.posedit.edit.ref = '' c2.posedit.edit.alt = '' - if orientation != -1: + if self.orientation != -1: g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) g2.posedit.edit.alt = g2.posedit.edit.ref @@ -1664,12 +1669,12 @@ def c1_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic, orientation): return hgvs_refreshed_variant - def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, disparity_deletion_in, auto_info, stored_hgvs_not_delins, hgvs_genomic, orientation, running_option): + def transcript_disparity(self, tx_hgvs_not_delins, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, stored_hgvs_not_delins, hgvs_genomic, running_option): if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str( tx_hgvs_not_delins.posedit.pos.start))) and ( re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str( tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + self.gapped_transcripts = self.gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) # Copy the current variant tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) @@ -1790,8 +1795,8 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n # Set warning gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + self.disparity_deletion_in[1] = [gap_size] + self.auto_info = self.auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( tx_hgvs_not_delins.ac) non_valid_caution = 'true' @@ -1807,7 +1812,7 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n gps = for_location_c.posedit.pos.start.base gpe = for_location_c.posedit.pos.start.base + 1 gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) + self.auto_info = self.auto_info + '%s' % (gap_position) else: if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: @@ -1839,13 +1844,13 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + self.auto_info = self.auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ + self.disparity_deletion_in[ 1]) + ' genomic base(s) that fail to align to transcript ' + str( tx_hgvs_not_delins.ac) non_valid_caution = 'true' - hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -1855,17 +1860,17 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n gpe = for_location_c.posedit.pos.start.base + 1 gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update - auto_info = auto_info + '%s' % (gap_position) + self.auto_info = self.auto_info + '%s' % (gap_position) elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( + self.auto_info = self.auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + self.disparity_deletion_in[1]) + '-bp gap in transcript ' + str( tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + self.gapped_transcripts = self.gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) non_valid_caution = 'true' - hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -1875,17 +1880,17 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n gpe = for_location_c.posedit.pos.end.base + 1 gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update - auto_info = auto_info + '%s' % (gap_position) + self.auto_info = self.auto_info + '%s' % (gap_position) elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + self.auto_info = self.auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ + self.disparity_deletion_in[ 1]) + ' genomic base(s) that fail to align to transcript ' + str( tx_hgvs_not_delins.ac) non_valid_caution = 'true' - hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -1895,15 +1900,15 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n gpe = for_location_c.posedit.pos.start.base gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update - auto_info = auto_info + '%s' % (gap_position) + self.auto_info = self.auto_info + '%s' % (gap_position) elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( + self.auto_info = self.auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + self.disparity_deletion_in[1]) + '-bp gap in transcript ' + str( tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + self.gapped_transcripts = self.gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) non_valid_caution = 'true' ## Have variation in first copy here! @@ -1928,7 +1933,7 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n c3.posedit.edit.alt = alternate hgvs_refreshed_variant = c3 else: - hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic, orientation) + hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) @@ -1938,11 +1943,11 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n gpe = for_location_c.posedit.pos.end.base gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update - auto_info = auto_info + '%s' % (gap_position) + self.auto_info = self.auto_info + '%s' % (gap_position) else: - auto_info = auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( + self.auto_info = self.auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ + self.disparity_deletion_in[ 1]) + ' genomic base(s) that fail to align to transcript ' + str( tx_hgvs_not_delins.ac) + '\n' @@ -1953,9 +1958,9 @@ def transcript_disparity(self, tx_hgvs_not_delins, gapped_transcripts, reverse_n hgvs_refreshed_variant = tx_hgvs_not_delins else: hgvs_refreshed_variant = tx_hgvs_not_delins - gapped_transcripts = gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + self.gapped_transcripts = self.gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) - return hgvs_refreshed_variant, gapped_transcripts, auto_info + return hgvs_refreshed_variant def edit_output(self, hgvs_refreshed_variant, saved_hgvs_coding): if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( @@ -1995,21 +2000,21 @@ def edit_output(self, hgvs_refreshed_variant, saved_hgvs_coding): pass return hgvs_refreshed_variant - def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_genomic_possibilities, hgvs_coding): + def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding): # Logic if len(hgvs_not_delins.posedit.edit.ref) < len( rn_tx_hgvs_not_delins.posedit.edit.ref): gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] + self.disparity_deletion_in = ['chromosome', gap_length] elif len(hgvs_not_delins.posedit.edit.ref) > len( rn_tx_hgvs_not_delins.posedit.edit.ref): gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] + self.disparity_deletion_in = ['transcript', gap_length] else: re_capture_tx_variant = [] - for internal_possibility in hgvs_genomic_possibilities: + for internal_possibility in self.hgvs_genomic_possibilities: if internal_possibility == '': continue @@ -2053,7 +2058,7 @@ def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_genomic_possi tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) except: tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] + self.disparity_deletion_in = re_capture_tx_variant[0:-1] else: pass - return disparity_deletion_in, tx_hgvs_not_delins, hgvs_not_delins, hgvs_genomic_5pr + return tx_hgvs_not_delins, hgvs_not_delins, hgvs_genomic_5pr diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index e9d88ba0..30671a10 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -736,8 +736,7 @@ def transcripts_to_gene(variant, validator): logger.warning("gap_compensation_2 = " + str(gap_compensation)) if gap_compensation is True: hgvs_coding = gap_mapper.g_to_t_gapped_mapping_stage2( - ori, hgvs_coding, hgvs_genomic, gapped_transcripts, hgvs_genomic_possibilities, - auto_info + ori, hgvs_coding, hgvs_genomic ) # OBTAIN THE RefSeqGene coordinates From f11c3580f3265081fcb0c7e5aa45d1b1bdc5ef45 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 26 Apr 2019 08:54:05 +0100 Subject: [PATCH 072/223] Improved inputs and outputs of gap_mapping methods --- VariantValidator/modules/gapped_mapping.py | 2 +- VariantValidator/modules/mappers.py | 16 ++++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 841a06ba..6492ee49 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -1260,7 +1260,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): print('in gapped_mapping', hgvs_coding) - return hgvs_genomic, self.gapped_transcripts, self.auto_info, suppress_c_normalization, hgvs_coding, self.hgvs_genomic_possibilities + return hgvs_genomic, suppress_c_normalization, hgvs_coding def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): logger.warning('g_to_t gap code 2 active') diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 30671a10..a1c465bd 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -188,7 +188,6 @@ def transcripts_to_gene(variant, validator): warning = '' caution = '' error = '' - gapped_transcripts = '' # Collect information for genomic level validation obj = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) @@ -706,22 +705,16 @@ def transcripts_to_gene(variant, validator): # Genomic sequence hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn) - # genomic_possibilities - # 1. take the simple 3 pr normalized hgvs_genomic - # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - hgvs_genomic_possibilities = [] - # Create gap_mapper object instance gap_mapper = gapped_mapping.GapMapper(variant, validator) + # --- GAP MAPPING 1 --- # Loop out gap finding code under these circumstances! if gap_compensation is True: - hgvs_genomic, gapped_transcripts, auto_info, suppress_c_normalization, hgvs_coding, \ - hgvs_genomic_possibilities = gap_mapper.g_to_t_compensation(ori, hgvs_coding, rec_var) + hgvs_genomic, suppress_c_normalization, hgvs_coding = gap_mapper.g_to_t_compensation(ori, hgvs_coding, rec_var) else: suppress_c_normalization = 'false' - auto_info = '' # Create pseudo VCF based on amended hgvs_genomic # hgvs_genomic_variant = hgvs_genomic @@ -732,12 +725,11 @@ def transcripts_to_gene(variant, validator): ori = validator.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=reverse_normalized_hgvs_genomic.ac, alt_aln_method=validator.alt_aln_method) + # --- GAP MAPPING 2 --- # Loop out gap finding code under these circumstances! logger.warning("gap_compensation_2 = " + str(gap_compensation)) if gap_compensation is True: - hgvs_coding = gap_mapper.g_to_t_gapped_mapping_stage2( - ori, hgvs_coding, hgvs_genomic - ) + hgvs_coding = gap_mapper.g_to_t_gapped_mapping_stage2(ori, hgvs_coding, hgvs_genomic) # OBTAIN THE RefSeqGene coordinates # Attempt 1 = UTA From 4c297d3791d461bf8d25a0193b834c85f3d9f846 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 26 Apr 2019 09:55:03 +0100 Subject: [PATCH 073/223] Moved a couple more variables into attributes and incorporated the check_logic method --- VariantValidator/modules/gapped_mapping.py | 451 ++++++++------------- 1 file changed, 169 insertions(+), 282 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 6492ee49..68525710 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -20,6 +20,9 @@ def __init__(self, variant, validator): self.hgvs_genomic_possibilities = [] self.disparity_deletion_in = [] + self.hgvs_genomic_5pr = None + self.tx_hgvs_not_delins = None + def gapped_g_to_c(self, rel_var): """ Gap aware projection from g. to c. @@ -36,7 +39,7 @@ def gapped_g_to_c(self, rel_var): hgvs_genomic_variant = self.variant.hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic_variant) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + self.hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # VCF vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, @@ -90,9 +93,9 @@ def gapped_g_to_c(self, rel_var): # Store a not real deletion insertion stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + self.hgvs_genomic_5pr.ac) + ':' + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) stash_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_genomic_5pr.type + '.' + str( + stash_ac + ':' + self.hgvs_genomic_5pr.type + '.' + str( stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) # Set non-valid caution to false @@ -117,7 +120,7 @@ def gapped_g_to_c(self, rel_var): continue # Get orientation of the gene wrt genome and a list of exons mapped to the genome - ori = self.validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=hgvs_genomic_5pr.ac, + ori = self.validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=self.hgvs_genomic_5pr.ac, alt_aln_method=self.validator.alt_aln_method) self.orientation = int(ori[0]['alt_strand']) intronic_variant = 'false' @@ -183,8 +186,8 @@ def gapped_g_to_c(self, rel_var): genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: @@ -200,8 +203,8 @@ def gapped_g_to_c(self, rel_var): genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: @@ -215,8 +218,8 @@ def gapped_g_to_c(self, rel_var): genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: @@ -225,25 +228,25 @@ def gapped_g_to_c(self, rel_var): # If exonic, process if intronic_variant != 'true': # map form reverse normalized g. to c. - hgvs_from_5n_g = self.variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) + hgvs_from_5n_g = self.variant.no_norm_evm.g_to_t(self.hgvs_genomic_5pr, saved_hgvs_coding.ac) # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths self.disparity_deletion_in = ['false', 'false'] if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, hgvs_genomic_5pr, + hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, self.hgvs_genomic_5pr, saved_hgvs_coding) try: - tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_genomic_5pr, saved_hgvs_coding.ac) + self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(self.hgvs_genomic_5pr, saved_hgvs_coding.ac) except hgvs.exceptions.HGVSError: if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = saved_hgvs_coding + self.tx_hgvs_not_delins = saved_hgvs_coding # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + rn_tx_hgvs_not_delins = copy.deepcopy(self.tx_hgvs_not_delins) # Check for +ve base and adjust if (re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str( @@ -253,7 +256,7 @@ def gapped_g_to_c(self, rel_var): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins, back=False) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) @@ -263,7 +266,7 @@ def gapped_g_to_c(self, rel_var): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) @@ -293,31 +296,31 @@ def gapped_g_to_c(self, rel_var): hgvs_stash_t.posedit.edit.ref) self.disparity_deletion_in = ['transcript', gap_length] try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(hgvs_stash_t) + self.tx_hgvs_not_delins = self.validator.vm.c_to_n(hgvs_stash_t) except: - tx_hgvs_not_delins = hgvs_stash_t + self.tx_hgvs_not_delins = hgvs_stash_t hgvs_not_delins = stash_hgvs_not_delins elif hgvs_stash_t.posedit.pos.start.offset != 0 or hgvs_stash_t.posedit.pos.end.offset != 0: self.disparity_deletion_in = ['transcript', 'Requires Analysis'] try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(hgvs_stash_t) + self.tx_hgvs_not_delins = self.validator.vm.c_to_n(hgvs_stash_t) except: - tx_hgvs_not_delins = hgvs_stash_t + self.tx_hgvs_not_delins = hgvs_stash_t hgvs_not_delins = stash_hgvs_not_delins - hgvs_genomic_5pr = stash_hgvs_not_delins + self.hgvs_genomic_5pr = stash_hgvs_not_delins else: pass # Final sanity checks try: - self.validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + self.validator.vm.g_to_t(hgvs_not_delins, self.tx_hgvs_not_delins.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': hgvs_not_delins = saved_hgvs_coding self.disparity_deletion_in = ['false', 'false'] logger.warning(str(e)) try: - self.variant.hn.normalize(tx_hgvs_not_delins) + self.variant.hn.normalize(self.tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if re.match('Normalization of intronic variants is not supported', @@ -333,25 +336,25 @@ def gapped_g_to_c(self, rel_var): # We know that this cannot be because of an intronic variant, so must be aligned to tx gap self.disparity_deletion_in = ['transcript', 'Requires Analysis'] logger.warning(error) - # Pre-processing of tx_hgvs_not_delins + # Pre-processing of self.tx_hgvs_not_delins try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' + if self.tx_hgvs_not_delins.posedit.edit.alt is None: + self.tx_hgvs_not_delins.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + self.tx_hgvs_not_delins.type + '.' + str( + self.tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED if self.disparity_deletion_in[0] == 'transcript': gap_position = '' gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + self.hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant = self.transcript_disparity(tx_hgvs_not_delins, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, stored_hgvs_not_delins, self.variant.hgvs_genomic, 1) + hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, stored_hgvs_not_delins, self.variant.hgvs_genomic, 1) # GAP IN THE CHROMOSOME @@ -359,8 +362,8 @@ def gapped_g_to_c(self, rel_var): # Set warning variables gap_position = '' gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins + self.hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + hgvs_refreshed_variant = self.tx_hgvs_not_delins # Warn self.auto_info = self.auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(self.disparity_deletion_in[ @@ -413,7 +416,7 @@ def gapped_g_to_c(self, rel_var): tx_hard_right = self.variant.hn.normalize(tx_hard_right) gap_position = '' gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + self.hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly hgvs_refreshed_variant = tx_hard_right self.gapped_transcripts = self.gapped_transcripts + str(tx_hard_right.ac) + ' ' elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len( @@ -421,7 +424,7 @@ def gapped_g_to_c(self, rel_var): tx_hard_left = self.variant.hn.normalize(tx_hard_left) gap_position = '' gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + self.hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly hgvs_refreshed_variant = tx_hard_left self.gapped_transcripts = self.gapped_transcripts + str(tx_hard_left.ac) + ' ' else: @@ -815,7 +818,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + self.hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # Create VCF vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, @@ -831,7 +834,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # Store a not real deletion insertion to test for gapping stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + self.hgvs_genomic_5pr.ac) + ':' + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) # Detect intronic variation using normalization intronic_variant = 'false' @@ -901,8 +904,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: @@ -918,8 +921,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: @@ -933,8 +936,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: @@ -946,17 +949,17 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, hgvs_genomic_5pr, + hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, self.hgvs_genomic_5pr, saved_hgvs_coding) try: - tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) except hgvs.exceptions.HGVSInvalidIntervalError: - tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, + self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, saved_hgvs_coding.ac) # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + rn_tx_hgvs_not_delins = copy.deepcopy(self.tx_hgvs_not_delins) # Check for +1 base and adjust if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( @@ -967,7 +970,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins, back=False) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) @@ -984,7 +987,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # Add the additional base to the ALT start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + ref_bases = self.validator.sf.fetch_seq(str(self.tx_hgvs_not_delins.ac), start, end) rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases if re.match('NM_', str(rn_tx_hgvs_not_delins)): test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) @@ -999,75 +1002,17 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - self.disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - self.disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for internal_possibility in self.hgvs_genomic_possibilities: - if internal_possibility == '': - continue - - hgvs_t_possibility = self.validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = self.validator.vm.c_to_n(hgvs_t_possibility) - except: - fn.exceptPass() - ins_ref = self.validator.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = self.validator.vm.n_to_c(hgvs_t_possibility) - except: - fn.exceptPass() - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if internal_possibility.posedit.edit.type == 'ins': - ins_ref = self.validator.sf.fetch_seq(internal_possibility.ac, - internal_possibility.posedit.pos.start.base - 1, - internal_possibility.posedit.pos.end.base) - internal_possibility.posedit.edit.ref = ins_ref - internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] - - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] - hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - self.disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass + hgvs_not_delins = self.logic_check(hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding) # 'At hgvs_genomic' # Final sanity checks try: - self.validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + self.validator.vm.g_to_t(hgvs_not_delins, self.tx_hgvs_not_delins.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': continue try: - self.variant.hn.normalize(tx_hgvs_not_delins) + self.variant.hn.normalize(self.tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if re.match('Normalization of intronic variants is not supported', @@ -1091,9 +1036,9 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): if self.disparity_deletion_in[0] == 'false' and ( possibility_counter == 3 or possibility_counter == 4): rg = self.variant.reverse_normalizer.normalize(hgvs_not_delins) - rtx = self.validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) + rtx = self.validator.vm.g_to_t(rg, self.tx_hgvs_not_delins.ac) fg = self.variant.hn.normalize(hgvs_not_delins) - ftx = self.validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) + ftx = self.validator.vm.g_to_t(fg, self.tx_hgvs_not_delins.ac) if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): exons = self.validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, self.validator.alt_aln_method) @@ -1105,23 +1050,23 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): if exonic is True: hgvs_not_delins = fg hgvs_genomic = fg - hgvs_genomic_5pr = fg + self.hgvs_genomic_5pr = fg try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(ftx) + self.tx_hgvs_not_delins = self.validator.vm.c_to_n(ftx) except Exception: - tx_hgvs_not_delins = ftx + self.tx_hgvs_not_delins = ftx self.disparity_deletion_in = ['transcript', 'Requires Analysis'] - # Pre-processing of tx_hgvs_not_delins + # Pre-processing of self.tx_hgvs_not_delins try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' + if self.tx_hgvs_not_delins.posedit.edit.alt is None: + self.tx_hgvs_not_delins.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + self.tx_hgvs_not_delins.type + '.' + str( + self.tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED @@ -1130,7 +1075,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): suppress_c_normalization = 'true' # amend_RefSeqGene = 'true' # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant = self.transcript_disparity(tx_hgvs_not_delins, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, stored_hgvs_not_delins, hgvs_genomic, 2) + hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, stored_hgvs_not_delins, hgvs_genomic, 2) # GAP IN THE CHROMOSOME elif self.disparity_deletion_in[0] == 'chromosome': @@ -1267,7 +1212,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): hgvs_genomic_variant = hgvs_genomic reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic_variant) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + self.hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, self.variant.reverse_normalizer, self.validator.sf) chr = vcf_dict['chr'] @@ -1284,7 +1229,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): end = str(int(pos) + len(ref) - 1) pos = str(pos) stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + self.hgvs_genomic_5pr.ac) + ':' + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) self.orientation = int(ori[0]['alt_strand']) saved_hgvs_coding = copy.deepcopy(hgvs_coding) @@ -1296,29 +1241,29 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): # Take from stored copy # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): is_it_in_an_exon = 'yes' if is_it_in_an_exon == 'yes': # map form reverse normalized g. to c. - hgvs_from_5n_g = self.variant.no_norm_evm.g_to_t(hgvs_genomic_5pr, saved_hgvs_coding.ac) + hgvs_from_5n_g = self.variant.no_norm_evm.g_to_t(self.hgvs_genomic_5pr, saved_hgvs_coding.ac) # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths self.disparity_deletion_in = ['false', 'false'] if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, hgvs_genomic_5pr, saved_hgvs_coding) + hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, self.hgvs_genomic_5pr, saved_hgvs_coding) hard_fail = 'false' try: - tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) + self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': - tx_hgvs_not_delins = hgvs_coding + self.tx_hgvs_not_delins = hgvs_coding hard_fail = 'true' - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Create normalized version of self.tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(self.tx_hgvs_not_delins) # Check for +ve base and adjust if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\+', str( @@ -1326,7 +1271,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins, back=False) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) @@ -1336,82 +1281,24 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - self.disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - self.disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for internal_possibility in self.hgvs_genomic_possibilities: - if internal_possibility == '': - continue - - hgvs_t_possibility = self.validator.vm.g_to_t(internal_possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = self.validator.vm.c_to_n(hgvs_t_possibility) - except: - fn.exceptPass() - ins_ref = self.validator.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = self.validator.vm.n_to_c(hgvs_t_possibility) - except: - fn.exceptPass() - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if internal_possibility.posedit.edit.type == 'ins': - ins_ref = self.validator.sf.fetch_seq(internal_possibility.ac, - internal_possibility.posedit.pos.start.base - 1, - internal_possibility.posedit.pos.end.base) - internal_possibility.posedit.edit.ref = ins_ref - internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] - - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] - hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - self.disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass + hgvs_not_delins = self.logic_check(hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding) # Final sanity checks try: - self.validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + self.validator.vm.g_to_t(hgvs_not_delins, self.tx_hgvs_not_delins.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': logger.warning(str(e)) return True try: - self.variant.hn.normalize(tx_hgvs_not_delins) + self.variant.hn.normalize(self.tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if re.match('Normalization of intronic variants is not supported', @@ -1436,31 +1323,31 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): # Pre-processing of tx_hgvs_not_delins try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' + if self.tx_hgvs_not_delins.posedit.edit.alt is None: + self.tx_hgvs_not_delins.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) + tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + self.tx_hgvs_not_delins.type + '.' + str( + self.tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED if self.disparity_deletion_in[0] == 'transcript': gap_position = '' gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + self.hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant = self.transcript_disparity(tx_hgvs_not_delins, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, stored_hgvs_not_delins, hgvs_genomic, 3) + hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, stored_hgvs_not_delins, hgvs_genomic, 3) # GAP IN THE CHROMOSOME elif self.disparity_deletion_in[0] == 'chromosome': # Set warning variables gap_position = '' gapped_alignment_warning = str( - hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly - hgvs_refreshed_variant = tx_hgvs_not_delins + self.hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + hgvs_refreshed_variant = self.tx_hgvs_not_delins # Warn self.auto_info = self.auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(self.disparity_deletion_in[ @@ -1488,8 +1375,8 @@ def dup_ins_5prime_shift(self, stored_hgvs_not_delins, hgvs_genomic_5pr, saved_h hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) # This test will only occur in dup of single base, insertion or substitution if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - hgvs_genomic_5pr.posedit.edit.type): + if re.search('dup', self.hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + self.hgvs_genomic_5pr.posedit.edit.type): # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 @@ -1498,8 +1385,8 @@ def dup_ins_5prime_shift(self, stored_hgvs_not_delins, hgvs_genomic_5pr, saved_h str(saved_hgvs_coding.ac)) if (( transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + self.hgvs_genomic_5pr.posedit.pos.end.base - self.hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(self.hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base @@ -1508,12 +1395,12 @@ def dup_ins_5prime_shift(self, stored_hgvs_not_delins, hgvs_genomic_5pr, saved_h hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): + elif re.search('ins', str(self.hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(self.hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): + str(self.hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(self.hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base @@ -1523,7 +1410,7 @@ def dup_ins_5prime_shift(self, stored_hgvs_not_delins, hgvs_genomic_5pr, saved_h :1] + hgvs_not_delins.posedit.edit.alt[ 1:] + ref_bases[1:] else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + if re.search('dup', str(self.hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base @@ -1532,12 +1419,12 @@ def dup_ins_5prime_shift(self, stored_hgvs_not_delins, hgvs_genomic_5pr, saved_h hgvs_not_delins.posedit.edit.alt = ref_bases[ :1] + hgvs_not_delins.posedit.edit.alt[ 1:] + ref_bases[1:] - elif re.search('ins', str(hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): + elif re.search('ins', str(self.hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(self.hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 elif re.search('ins', - str(hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(hgvs_genomic_5pr.posedit.edit)): + str(self.hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', str(self.hgvs_genomic_5pr.posedit.edit)): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base @@ -1566,7 +1453,7 @@ def remove_offsetting_to_span_gap(self, rn_tx_hgvs_not_delins): return rn_tx_hgvs_not_delins - def move_tx_end_base_to_next_nonoffset(self, rn_tx_hgvs_not_delins, saved_hgvs_coding, tx_hgvs_not_delins, back=True): + def move_tx_end_base_to_next_nonoffset(self, rn_tx_hgvs_not_delins, saved_hgvs_coding, back=True): # move tx end base back to next available non-offset base rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 rn_tx_hgvs_not_delins.posedit.edit.ref = '' @@ -1574,11 +1461,11 @@ def move_tx_end_base_to_next_nonoffset(self, rn_tx_hgvs_not_delins, saved_hgvs_c # Add the additional base to the ALT start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + ref_bases = self.validator.sf.fetch_seq(str(self.tx_hgvs_not_delins.ac), start, end) rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases else: # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.base = self.tx_hgvs_not_delins.posedit.pos.end.base + 1 if re.match('NM_', str(rn_tx_hgvs_not_delins)): test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: @@ -1609,11 +1496,11 @@ def move_tx_start_base_to_previous_nonoffset(self, rn_tx_hgvs_not_delins, saved_ return rn_tx_hgvs_not_delins, hgvs_not_delins - def c2_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic): + def c2_pos_edit(self, hgvs_genomic): try: - c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + c2 = self.validator.vm.n_to_c(self.tx_hgvs_not_delins) except: - c2 = tx_hgvs_not_delins + c2 = self.tx_hgvs_not_delins c1 = copy.deepcopy(c2) c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 c1.posedit.pos.start.offset = 0 @@ -1639,11 +1526,11 @@ def c2_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic): return hgvs_refreshed_variant - def c1_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic): + def c1_pos_edit(self, hgvs_genomic): try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + c1 = self.validator.vm.n_to_c(self.tx_hgvs_not_delins) except: - c1 = tx_hgvs_not_delins + c1 = self.tx_hgvs_not_delins c2 = copy.deepcopy(c1) c2.posedit.pos.start = c1.posedit.pos.end c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 @@ -1669,15 +1556,15 @@ def c1_pos_edit(self, tx_hgvs_not_delins, hgvs_genomic): return hgvs_refreshed_variant - def transcript_disparity(self, tx_hgvs_not_delins, reverse_normalized_hgvs_genomic, hgvs_genomic_5pr, stored_hgvs_not_delins, hgvs_genomic, running_option): - if (re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str( - tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str( - tx_hgvs_not_delins.posedit.pos.end))): - self.gapped_transcripts = self.gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_delins, hgvs_genomic, running_option): + if (re.search(r'\+', str(self.tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str( + self.tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', str(self.tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str( + self.tx_hgvs_not_delins.posedit.pos.end))): + self.gapped_transcripts = self.gapped_transcripts + ' ' + str(self.tx_hgvs_not_delins.ac) # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + tx_gap_fill_variant = copy.deepcopy(self.tx_hgvs_not_delins) try: if tx_gap_fill_variant.posedit.edit.alt is None: tx_gap_fill_variant.posedit.edit.alt = '' @@ -1712,11 +1599,11 @@ def transcript_disparity(self, tx_hgvs_not_delins, reverse_normalized_hgvs_genom genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref try: - c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) + c_tx_hgvs_not_delins = self.validator.vm.n_to_c(self.tx_hgvs_not_delins) except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + c_tx_hgvs_not_delins = copy.copy(self.tx_hgvs_not_delins) genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) + self.hgvs_genomic_5pr.ac) # Ensure an ALT exists try: @@ -1798,13 +1685,13 @@ def transcript_disparity(self, tx_hgvs_not_delins, reverse_normalized_hgvs_genom self.disparity_deletion_in[1] = [gap_size] self.auto_info = self.auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + self.tx_hgvs_not_delins.ac) non_valid_caution = 'true' # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + for_location_c = self.variant.no_norm_evm.n_to_c(self.tx_hgvs_not_delins) if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base @@ -1815,14 +1702,14 @@ def transcript_disparity(self, tx_hgvs_not_delins, reverse_normalized_hgvs_genom self.auto_info = self.auto_info + '%s' % (gap_position) else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + if self.tx_hgvs_not_delins.posedit.pos.start.offset == 0 and self.tx_hgvs_not_delins.posedit.pos.end.offset == 0: # In this instance, we have identified a transcript gap but the n. version of # the transcript variant but do not have a position which actually hits the gap, # so the variant likely spans the gap, and is not picked up by an offset. try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + c1 = self.validator.vm.n_to_c(self.tx_hgvs_not_delins) except: - c1 = tx_hgvs_not_delins + c1 = self.tx_hgvs_not_delins g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) @@ -1834,89 +1721,89 @@ def transcript_disparity(self, tx_hgvs_not_delins, reverse_normalized_hgvs_genom if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: pass else: - tx_hgvs_not_delins = c2 + self.tx_hgvs_not_delins = c2 try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) + self.tx_hgvs_not_delins = self.validator.vm.c_to_n(self.tx_hgvs_not_delins) except hgvs.exceptions.HGVSError: fn.exceptPass() except hgvs.exceptions.HGVSInvalidVariantError: fn.exceptPass() - if re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.end)): + if re.search(r'\+', str(self.tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', str(self.tx_hgvs_not_delins.posedit.pos.end)): self.auto_info = self.auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( self.disparity_deletion_in[ 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + self.tx_hgvs_not_delins.ac) non_valid_caution = 'true' - hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic) + hgvs_refreshed_variant = self.c2_pos_edit(hgvs_genomic) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + for_location_c = self.variant.no_norm_evm.n_to_c(self.tx_hgvs_not_delins) gps = for_location_c.posedit.pos.start.base gpe = for_location_c.posedit.pos.start.base + 1 gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update self.auto_info = self.auto_info + '%s' % (gap_position) - elif re.search(r'\+', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\+', str(self.tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\+', str(self.tx_hgvs_not_delins.posedit.pos.start)): self.auto_info = self.auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( self.disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - self.gapped_transcripts = self.gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + self.tx_hgvs_not_delins.ac) + self.gapped_transcripts = self.gapped_transcripts + ' ' + str(self.tx_hgvs_not_delins.ac) non_valid_caution = 'true' - hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic) + hgvs_refreshed_variant = self.c1_pos_edit(hgvs_genomic) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + for_location_c = self.variant.no_norm_evm.n_to_c(self.tx_hgvs_not_delins) gps = for_location_c.posedit.pos.end.base gpe = for_location_c.posedit.pos.end.base + 1 gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update self.auto_info = self.auto_info + '%s' % (gap_position) elif re.search(r'\-', - str(tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end)): + str(self.tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', str(self.tx_hgvs_not_delins.posedit.pos.end)): self.auto_info = self.auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( self.disparity_deletion_in[ 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + self.tx_hgvs_not_delins.ac) non_valid_caution = 'true' - hgvs_refreshed_variant = self.c2_pos_edit(tx_hgvs_not_delins, hgvs_genomic) + hgvs_refreshed_variant = self.c2_pos_edit(hgvs_genomic) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + for_location_c = self.variant.no_norm_evm.n_to_c(self.tx_hgvs_not_delins) gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update self.auto_info = self.auto_info + '%s' % (gap_position) - elif re.search(r'\-', str(tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start)): + elif re.search(r'\-', str(self.tx_hgvs_not_delins.posedit.pos.end)) and not re.search( + r'\-', str(self.tx_hgvs_not_delins.posedit.pos.start)): self.auto_info = self.auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( self.disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - self.gapped_transcripts = self.gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + self.tx_hgvs_not_delins.ac) + self.gapped_transcripts = self.gapped_transcripts + ' ' + str(self.tx_hgvs_not_delins.ac) non_valid_caution = 'true' ## Have variation in first copy here! if running_option == 1: try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + c1 = self.validator.vm.n_to_c(self.tx_hgvs_not_delins) except: - c1 = tx_hgvs_not_delins + c1 = self.tx_hgvs_not_delins c2 = copy.deepcopy(c1) c2.posedit.pos.start = c1.posedit.pos.end c2.posedit.pos.end.base = c1.posedit.pos.end.base @@ -1933,12 +1820,12 @@ def transcript_disparity(self, tx_hgvs_not_delins, reverse_normalized_hgvs_genom c3.posedit.edit.alt = alternate hgvs_refreshed_variant = c3 else: - hgvs_refreshed_variant = self.c1_pos_edit(tx_hgvs_not_delins, hgvs_genomic) + hgvs_refreshed_variant = self.c1_pos_edit(hgvs_genomic) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + for_location_c = self.variant.no_norm_evm.n_to_c(self.tx_hgvs_not_delins) gps = for_location_c.posedit.pos.end.base - 1 gpe = for_location_c.posedit.pos.end.base gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' @@ -1949,16 +1836,16 @@ def transcript_disparity(self, tx_hgvs_not_delins, reverse_normalized_hgvs_genom stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( self.disparity_deletion_in[ 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' + self.tx_hgvs_not_delins.ac) + '\n' ## Have variation in second copy here! if running_option == 2: - tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.start.base + len( - tx_hgvs_not_delins.posedit.edit.ref) - 1 - hgvs_refreshed_variant = tx_hgvs_not_delins + self.tx_hgvs_not_delins.posedit.pos.end.base = self.tx_hgvs_not_delins.posedit.pos.start.base + len( + self.tx_hgvs_not_delins.posedit.edit.ref) - 1 + hgvs_refreshed_variant = self.tx_hgvs_not_delins else: - hgvs_refreshed_variant = tx_hgvs_not_delins - self.gapped_transcripts = self.gapped_transcripts + ' ' + str(tx_hgvs_not_delins.ac) + hgvs_refreshed_variant = self.tx_hgvs_not_delins + self.gapped_transcripts = self.gapped_transcripts + ' ' + str(self.tx_hgvs_not_delins.ac) return hgvs_refreshed_variant @@ -2050,15 +1937,15 @@ def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding): hgvs_t_possibility.posedit.edit.ref) re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] hgvs_not_delins = internal_possibility - hgvs_genomic_5pr = internal_possibility + self.hgvs_genomic_5pr = internal_possibility break if re_capture_tx_variant != []: try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) + self.tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) except: - tx_hgvs_not_delins = re_capture_tx_variant[2] + self.tx_hgvs_not_delins = re_capture_tx_variant[2] self.disparity_deletion_in = re_capture_tx_variant[0:-1] else: pass - return tx_hgvs_not_delins, hgvs_not_delins, hgvs_genomic_5pr + return hgvs_not_delins From a0356aed29e17c43d90837c8323b48bbed3be5b7 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 26 Apr 2019 10:40:38 +0100 Subject: [PATCH 074/223] Created method for the hgvs_seek section that's repeated --- VariantValidator/modules/gapped_mapping.py | 121 +++++++------------- VariantValidator/modules/mappers.py | 122 ++++++++------------- 2 files changed, 86 insertions(+), 157 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 68525710..9023dd22 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -125,49 +125,15 @@ def gapped_g_to_c(self, rel_var): self.orientation = int(ori[0]['alt_strand']) intronic_variant = 'false' - if self.orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = self.variant.reverse_normalizer.normalize(self.variant.hgvs_genomic) - except: - query_genomic = self.variant.hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding + hgvs_seek_var = self.get_hgvs_seek_var(self.variant.hgvs_genomic, saved_hgvs_coding) - elif self.orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = self.variant.hn.normalize(self.variant.hgvs_genomic) - except: - query_genomic = self.variant.hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding + if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): + pass else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - saved_hgvs_coding.posedit.pos.start.base + saved_hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - saved_hgvs_coding.posedit.pos.end.base + saved_hgvs_coding.posedit.pos.end.offset): - pass - else: - hgvs_seek_var = saved_hgvs_coding + hgvs_seek_var = saved_hgvs_coding try: intron_test = self.variant.hn.normalize(hgvs_seek_var) @@ -850,45 +816,16 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): hgvs_coding.ac) # Look for normalized variant options that do not match hgvs_coding - if self.orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding + hgvs_seek_var = self.get_hgvs_seek_var(hgvs_genomic, hgvs_coding) - elif self.orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = self.variant.hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding try: intron_test = self.variant.hn.normalize(hgvs_seek_var) @@ -1949,3 +1886,29 @@ def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding): else: pass return hgvs_not_delins + + def get_hgvs_seek_var(self, hgvs_genomic, hgvs_coding, ori=None, with_query_genomic=False): + if not ori: + ori = self.orientation + + if ori == -1: + try: + query_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + else: + # position genomic at its most 3 prime position + try: + query_genomic = self.variant.hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = hgvs_coding + + if with_query_genomic: + return hgvs_seek_var, query_genomic + + return hgvs_seek_var diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index a1c465bd..2def29a4 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -779,33 +779,25 @@ def transcripts_to_gene(variant, validator): # Look for normalized variant options that do not match hgvs_coding # boundary crossing normalization - if ori == -1: - # position genomic at its most 5 prime position - try: - query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = hgvs_coding + hgvs_seek_var, query_genomic = gap_mapper.get_hgvs_seek_var(hgvs_genomic, hgvs_coding, + ori=ori, with_query_genomic=True) - if hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - pass - elif suppress_c_normalization == 'true': - pass - elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - try: - automap = fn.valstr(hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) - hgvs_coding = hgvs_seek_var - variant.warnings += ': ' + automap - rng = variant.hn.normalize(query_genomic) - except NotImplementedError: - fn.exceptPass() + if hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: + pass + elif suppress_c_normalization == 'true': + pass + elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + try: + automap = fn.valstr(hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) + hgvs_coding = hgvs_seek_var + variant.warnings += ': ' + automap + except NotImplementedError: + fn.exceptPass() + if ori == -1: + rng = variant.hn.normalize(query_genomic) try: c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) except hgvs.exceptions.HGVSInvalidIntervalError as e: @@ -822,63 +814,37 @@ def transcripts_to_gene(variant, validator): variant.warnings += ': ' + str(error) except NotImplementedError: fn.exceptPass() - else: - # position genomic at its most 3 prime position + elif ori == 1: + # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue + rng = variant.reverse_normalizer.normalize(query_genomic) try: - query_genomic = variant.hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = hgvs_coding - - if hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: - pass - elif suppress_c_normalization == 'true': - pass - elif (hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + # Diagram where - = intron and E = Exon + + # 3 prime + # ---------EEEEEEEEEEEEEEEEE----------- + # < + # Result, normalize of new variant will baulk at intronic + # 5 prime + # < + # Result, normalize of new variant will be happy + c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) try: - automap = fn.valstr(hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) - hgvs_coding = hgvs_seek_var - variant.warnings += ': ' + automap - except NotImplementedError: + variant.hn.normalize(c_for_p) + except hgvs.exceptions.HGVSError as e: fn.exceptPass() - else: - # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue - rng = variant.reverse_normalizer.normalize(query_genomic) - try: - # Diagram where - = intron and E = Exon - - # 3 prime - # ---------EEEEEEEEEEEEEEEEE----------- - # < - # Result, normalize of new variant will baulk at intronic - # 5 prime - # < - # Result, normalize of new variant will be happy - c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) - try: - variant.hn.normalize(c_for_p) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() + else: + # hgvs_protein = va_func.protein(str(c_for_p), variant.evm, hp) + protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False) + if protein_dict['error'] == '': + hgvs_protein = protein_dict['hgvs_protein'] else: - # hgvs_protein = va_func.protein(str(c_for_p), variant.evm, hp) - protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False) - if protein_dict['error'] == '': + error = protein_dict['error'] + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] - else: - error = protein_dict['error'] - if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': - hgvs_protein = protein_dict['hgvs_protein'] - variant.warnings += ': ' + str(error) - # Replace protein description in vars table - except Exception: - fn.exceptPass() + variant.warnings += ': ' + str(error) + # Replace protein description in vars table + except Exception: + fn.exceptPass() # Check for up-to-date transcript version tx_id_info = validator.hdp.get_tx_identity_info(hgvs_coding.ac) From fcdfbe9b0c414a0c1fe6ae522bd1bbaac7d52304 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 26 Apr 2019 10:47:16 +0100 Subject: [PATCH 075/223] Completed todo in gap_mapper --- VariantValidator/modules/gapped_mapping.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 9023dd22..7f06cf99 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -917,24 +917,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - ## TODO: check this if should be move_tx_end_base_to_next - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.validator.sf.fetch_seq(str(self.tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myevm_t_to_g(test_tx_var, self.variant.no_norm_evm, - self.variant.primary_assembly, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str(saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) From a1a3f691257c1e5a8b18130dec8b7a66250d22dc Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 26 Apr 2019 10:55:30 +0100 Subject: [PATCH 076/223] Changed indentation in core validator --- VariantValidator/modules/vvMixinCore.py | 3202 +++++++++++------------ 1 file changed, 1601 insertions(+), 1601 deletions(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 0682046a..b9f6cd97 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -584,703 +584,684 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.debug(er) #debug raise - continue # Outside the for loop ###################### logger.trace("End of for loop") # order the rows - # from operator import itemgetter by_order = sorted(self.batch_list, key=lambda x: x.order) for valid in by_order: - if valid.write: - # Blank VCF - # chr = '' - # pos = '' - # ref = '' - # alt = '' - - # Fromulate a json type response - dict_out = {} - - # Set gap compensation bool - gap_compensation = True - - # warngins - warnings = valid.warnings - warnings = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warnings) - warnings = re.sub('^: ', '', warnings) - warnings = re.sub('::', ':', warnings) - - # Submitted variant - submitted = valid.original - - # Genomic sequence variation - genomic_variant = valid.genomic_g + if not valid.write: + continue - # genomic accession - if genomic_variant != '': - hgvs_genomic_variant = self.hp.parse_hgvs_variant(genomic_variant) - genomic_variant = fn.valstr(hgvs_genomic_variant) - genomic_accession = hgvs_genomic_variant.ac - else: - genomic_accession = '' - - # RefSeqGene variation - refseqgene_variant = valid.genomic_r - refseqgene_variant = refseqgene_variant.strip() - if re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant == '': - warnings = warnings + ': ' + refseqgene_variant - refseqgene_variant = '' + # Blank VCF + # chr = '' + # pos = '' + # ref = '' + # alt = '' + + # Fromulate a json type response + dict_out = {} + + # Set gap compensation bool + gap_compensation = True + + # warngins + warnings = valid.warnings + warnings = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warnings) + warnings = re.sub('^: ', '', warnings) + warnings = re.sub('::', ':', warnings) + + # Submitted variant + submitted = valid.original + + # Genomic sequence variation + genomic_variant = valid.genomic_g + + # genomic accession + if genomic_variant != '': + hgvs_genomic_variant = self.hp.parse_hgvs_variant(genomic_variant) + genomic_variant = fn.valstr(hgvs_genomic_variant) + genomic_accession = hgvs_genomic_variant.ac + else: + genomic_accession = '' + + # RefSeqGene variation + refseqgene_variant = valid.genomic_r + refseqgene_variant = refseqgene_variant.strip() + if re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant == '': + warnings = warnings + ': ' + refseqgene_variant + refseqgene_variant = '' + lrg_variant = '' + hgvs_refseqgene_variant = 'false' + else: + hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) + rsg_ac = self.db.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) + if rsg_ac[0] == 'none': lrg_variant = '' - hgvs_refseqgene_variant = 'false' else: - hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) - rsg_ac = self.db.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) - if rsg_ac[0] == 'none': - lrg_variant = '' + hgvs_lrg = copy.deepcopy(hgvs_refseqgene_variant) + hgvs_lrg.ac = rsg_ac[0] + lrg_variant = fn.valstr(hgvs_lrg) + if rsg_ac[1] == 'public': + pass else: - hgvs_lrg = copy.deepcopy(hgvs_refseqgene_variant) - hgvs_lrg.ac = rsg_ac[0] - lrg_variant = fn.valstr(hgvs_lrg) - if rsg_ac[1] == 'public': - pass - else: - warnings = warnings + ': The current status of ' + str( - hgvs_lrg.ac) + ' is pending therefore changes may be made to the LRG reference sequence' - - # Transcript sequence variation - tx_variant = valid.coding - if tx_variant != '': - if '(' in tx_variant and ')' in tx_variant: - tx_variant = tx_variant.split('(')[1] - tx_variant = tx_variant.replace(')', '') - - # transcript accession - hgvs_tx_variant = self.hp.parse_hgvs_variant(tx_variant) - tx_variant = fn.valstr(hgvs_tx_variant) - hgvs_transcript_variant = self.hp.parse_hgvs_variant(tx_variant) - transcript_accession = hgvs_transcript_variant.ac - - # Handle LRG - lrg_status = 'public' - lrg_transcript = self.db.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) - if lrg_transcript == 'none': - lrg_transcript_variant = '' - else: - # Note - LRG availability is dependant on UTA containing the data. In some - # instances we will be able to display the LRG_tx without being able to - # display the LRG gene data - - # if not re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant != '': - # if hgvs_refseqgene_variant != 'RefSeqGene record not available' and hgvs_refseqgene_variant != 'false': - try: - hgvs_lrg_t = self.vm.g_to_t(hgvs_refseqgene_variant, transcript_accession) - hgvs_lrg_t.ac = lrg_transcript - lrg_transcript_variant = fn.valstr(hgvs_lrg_t) - except: - if hgvs_transcript_variant.posedit.pos.start.offset == 0 and hgvs_transcript_variant.posedit.pos.end.offset == 0: - hgvs_lrg_t = copy.copy(hgvs_transcript_variant) - hgvs_lrg_t.ac = lrg_transcript - lrg_transcript_variant = fn.valstr(hgvs_lrg_t) - else: - lrg_transcript_variant = '' - else: - transcript_accession = '' + warnings = warnings + ': The current status of ' + str( + hgvs_lrg.ac) + ' is pending therefore changes may be made to the LRG reference sequence' + + # Transcript sequence variation + tx_variant = valid.coding + if tx_variant != '': + if '(' in tx_variant and ')' in tx_variant: + tx_variant = tx_variant.split('(')[1] + tx_variant = tx_variant.replace(')', '') + + # transcript accession + hgvs_tx_variant = self.hp.parse_hgvs_variant(tx_variant) + tx_variant = fn.valstr(hgvs_tx_variant) + hgvs_transcript_variant = self.hp.parse_hgvs_variant(tx_variant) + transcript_accession = hgvs_transcript_variant.ac + + # Handle LRG + lrg_status = 'public' + lrg_transcript = self.db.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) + if lrg_transcript == 'none': lrg_transcript_variant = '' + else: + # Note - LRG availability is dependant on UTA containing the data. In some + # instances we will be able to display the LRG_tx without being able to + # display the LRG gene data - # Look for intronic variants - if transcript_accession != '' and genomic_accession != '': - # Remove del bases - str_transcript = fn.valstr(hgvs_transcript_variant) - hgvs_transcript_variant = self.hp.parse_hgvs_variant(str_transcript) + # if not re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant != '': + # if hgvs_refseqgene_variant != 'RefSeqGene record not available' and hgvs_refseqgene_variant != 'false': try: - self.vr.validate(hgvs_transcript_variant) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('intronic variant', error): - genome_context_transcript_variant = genomic_accession + '(' + transcript_accession + '):c.' + str( - hgvs_transcript_variant.posedit) - if refseqgene_variant != '': - hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) - refseqgene_accession = hgvs_refseqgene_variant.ac - hgvs_coding_from_refseqgene = self.vm.g_to_t(hgvs_refseqgene_variant, - hgvs_transcript_variant.ac) - hgvs_coding_from_refseqgene = fn.valstr(hgvs_coding_from_refseqgene) - hgvs_coding_from_refseqgene = self.hp.parse_hgvs_variant(hgvs_coding_from_refseqgene) - RefSeqGene_context_transcript_variant = refseqgene_accession + '(' + transcript_accession + '):c.' + str( - hgvs_coding_from_refseqgene.posedit.pos) + str( - hgvs_coding_from_refseqgene.posedit.edit) - else: - RefSeqGene_context_transcript_variant = '' + hgvs_lrg_t = self.vm.g_to_t(hgvs_refseqgene_variant, transcript_accession) + hgvs_lrg_t.ac = lrg_transcript + lrg_transcript_variant = fn.valstr(hgvs_lrg_t) + except: + if hgvs_transcript_variant.posedit.pos.start.offset == 0 and hgvs_transcript_variant.posedit.pos.end.offset == 0: + hgvs_lrg_t = copy.copy(hgvs_transcript_variant) + hgvs_lrg_t.ac = lrg_transcript + lrg_transcript_variant = fn.valstr(hgvs_lrg_t) + else: + lrg_transcript_variant = '' + else: + transcript_accession = '' + lrg_transcript_variant = '' + + # Look for intronic variants + if transcript_accession != '' and genomic_accession != '': + # Remove del bases + str_transcript = fn.valstr(hgvs_transcript_variant) + hgvs_transcript_variant = self.hp.parse_hgvs_variant(str_transcript) + try: + self.vr.validate(hgvs_transcript_variant) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('intronic variant', error): + genome_context_transcript_variant = genomic_accession + '(' + transcript_accession + '):c.' + str( + hgvs_transcript_variant.posedit) + if refseqgene_variant != '': + hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) + refseqgene_accession = hgvs_refseqgene_variant.ac + hgvs_coding_from_refseqgene = self.vm.g_to_t(hgvs_refseqgene_variant, + hgvs_transcript_variant.ac) + hgvs_coding_from_refseqgene = fn.valstr(hgvs_coding_from_refseqgene) + hgvs_coding_from_refseqgene = self.hp.parse_hgvs_variant(hgvs_coding_from_refseqgene) + RefSeqGene_context_transcript_variant = refseqgene_accession + '(' + transcript_accession + '):c.' + str( + hgvs_coding_from_refseqgene.posedit.pos) + str( + hgvs_coding_from_refseqgene.posedit.edit) else: - genome_context_transcript_variant = '' # transcript_variant RefSeqGene_context_transcript_variant = '' else: genome_context_transcript_variant = '' # transcript_variant RefSeqGene_context_transcript_variant = '' else: - genome_context_transcript_variant = '' + genome_context_transcript_variant = '' # transcript_variant RefSeqGene_context_transcript_variant = '' - - # Protein description - predicted_protein_variant = valid.protein - if re.match('NP_', predicted_protein_variant): - rs_p, pred_prot_posedit = predicted_protein_variant.split(':') - lrg_p = self.db.get_lrgProteinID_from_RefSeqProteinID(rs_p) - if re.match('LRG', lrg_p): - predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit - - # Gene - if transcript_accession != '': - try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(transcript_accession) - except: - gene_symbol = 'Unable to verify gene symbol for ' + str(transcript_accession) + else: + genome_context_transcript_variant = '' + RefSeqGene_context_transcript_variant = '' + + # Protein description + predicted_protein_variant = valid.protein + if re.match('NP_', predicted_protein_variant): + rs_p, pred_prot_posedit = predicted_protein_variant.split(':') + lrg_p = self.db.get_lrgProteinID_from_RefSeqProteinID(rs_p) + if re.match('LRG', lrg_p): + predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit + + # Gene + if transcript_accession != '': + try: + gene_symbol = self.db.get_gene_symbol_from_transcriptID(transcript_accession) + except: + gene_symbol = 'Unable to verify gene symbol for ' + str(transcript_accession) + else: + gene_symbol = '' + + # Transcript description + transcript_description = valid.description + + # Stashed variants + # if valid.test_stash_tx_left: + # test_stash_tx_left = valid.test_stash_tx_left + # if valid.test_stash_tx_right: + # test_stash_tx_right = valid.test_stash_tx_right + + # Multiple genomic variants + # multi_gen_vars = [] + if tx_variant != '': + hgvs_coding = self.hp.parse_hgvs_variant(str(tx_variant)) + # Gap gene black list + try: + gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + except Exception: + fn.exceptPass() else: - gene_symbol = '' - - # Transcript description - transcript_description = valid.description - - # Stashed variants - # if valid.test_stash_tx_left: - # test_stash_tx_left = valid.test_stash_tx_left - # if valid.test_stash_tx_right: - # test_stash_tx_right = valid.test_stash_tx_right - - # Multiple genomic variants - # multi_gen_vars = [] - if tx_variant != '': - hgvs_coding = self.hp.parse_hgvs_variant(str(tx_variant)) - # Gap gene black list - try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) - except Exception: - fn.exceptPass() - else: - # If the gene symbol is not in the list, the value False will be returned - gap_compensation = vvChromosomes.gap_black_list(gene_symbol) + # If the gene symbol is not in the list, the value False will be returned + gap_compensation = vvChromosomes.gap_black_list(gene_symbol) - # Look for variants spanning introns + # Look for variants spanning introns + try: + hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.search('boundary', str(error)) or re.search('spanning', str(error)): + gap_compensation = False + else: + pass + except hgvs.exceptions.HGVSError: + fn.exceptPass() + + # Warn gap code status + logger.warning("gap_compensation_3 = " + str(gap_compensation)) + multi_g = [] + multi_list = [] + mapping_options = self.hdp.get_tx_mapping_options(hgvs_coding.ac) + for alt_chr in mapping_options: + if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', + alt_chr[1])) and \ + alt_chr[2] == alt_aln_method: + multi_list.append(alt_chr[1]) + + for alt_chr in multi_list: try: - hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.search('boundary', str(error)) or re.search('spanning', str(error)): - gap_compensation = False - else: - pass - except hgvs.exceptions.HGVSError: - fn.exceptPass() - - # Warn gap code status - logger.warning("gap_compensation_3 = " + str(gap_compensation)) - multi_g = [] - multi_list = [] - mapping_options = self.hdp.get_tx_mapping_options(hgvs_coding.ac) - for alt_chr in mapping_options: - if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', - alt_chr[1])) and \ - alt_chr[2] == alt_aln_method: - multi_list.append(alt_chr[1]) - - for alt_chr in multi_list: - try: - # Re set ori - ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, - alt_aln_method=alt_aln_method) - orientation = int(ori[0]['alt_strand']) - hgvs_alt_genomic = self.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, hn) - # Set hgvs_genomic accordingly - hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) - - # genomic_possibilities - # 1. take the simple 3 pr normalized hgvs_genomic - # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - hgvs_genomic_possibilities = [] - - # Loop out gap code under these circumstances! - if gap_compensation is True: - logger.warning('g_to_t gap code 3 active') - rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_alt_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: - try: - chromosome_normalized_hgvs_coding = reverse_normalizer.normalize( - hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: - chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - - most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, - alt_chr, - no_norm_evm, hn) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + # Re set ori + ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, + alt_aln_method=alt_aln_method) + orientation = int(ori[0]['alt_strand']) + hgvs_alt_genomic = self.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, hn) + # Set hgvs_genomic accordingly + hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) + + # genomic_possibilities + # 1. take the simple 3 pr normalized hgvs_genomic + # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome + hgvs_genomic_possibilities = [] + + # Loop out gap code under these circumstances! + if gap_compensation is True: + logger.warning('g_to_t gap code 3 active') + rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_alt_genomic) + hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if orientation != -1: + try: + chromosome_normalized_hgvs_coding = reverse_normalizer.normalize( + hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + chromosome_normalized_hgvs_coding = hgvs_coding + else: + try: + chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding - # First to the right - hgvs_stash = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, + alt_chr, + no_norm_evm, hn) + hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + + # First to the right + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = self.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) except: fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = self.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + # Store a tx copy for later use + test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, + no_norm_evm, hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_right.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + stash_tx_right = test_stash_tx_right + if hasattr(test_stash_tx_right.posedit.edit, + 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + alt = test_stash_tx_right.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_right.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_right).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, - no_norm_evm, hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_right.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - stash_tx_right = test_stash_tx_right - if hasattr(test_stash_tx_right.posedit.edit, - 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - alt = test_stash_tx_right.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_right.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_right).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append('') - else: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - except ValueError: - fn.exceptPass() - - # Then to the left - hgvs_stash = copy.deepcopy(hgvs_coding) + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + hn.normalize(test_stash_tx_right) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + except ValueError: + fn.exceptPass() + + # Then to the left + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, + reverse_normalizer, self.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = self.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) except: fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, - reverse_normalizer, self.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = self.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + # Store a tx copy for later use + test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, + no_norm_evm, hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_left.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + stash_tx_left = test_stash_tx_left + if hasattr(test_stash_tx_left.posedit.edit, + 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + alt = test_stash_tx_left.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + hgvs_genomic_possibilities.append(stash_genomic) + else: + hgvs_genomic_possibilities.append('') + elif test_stash_tx_left.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_left).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, - no_norm_evm, hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left - if hasattr(test_stash_tx_left.posedit.edit, - 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - alt = test_stash_tx_left.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_left.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_left).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') - else: + hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - except ValueError: - fn.exceptPass() - - # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = self.vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = self.vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = self.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = self.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = self.vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = self.vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + hn.normalize(test_stash_tx_left) + except hgvs.exceptions.HGVSUnsupportedOperationError: + hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + except ValueError: + fn.exceptPass() + + # direct mapping from reverse_normalized transcript insertions in the delins format + try: + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = self.vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = self.vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the variant ref and alt + pr3_ref = self.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = self.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = self.vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = self.vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + + # Normalize - If the variant spans a gap it should then form a static genomic variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - try: + try: + genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append( - genomic_from_most_3pr_hgvs_transcript_variant) - if len( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append( - genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass - fn.exceptPass() - - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' - - # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] - # Loop through to see if a gap can be located - possibility_counter = 0 - for possibility in hgvs_genomic_possibilities: - possibility_counter = possibility_counter + 1 - # Loop out stash possibilities which will not spot gaps so are empty - if possibility == '': - continue + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - hgvs_genomic_variant = possibility - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref try: - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic_variant) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error): - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - if re.search('insertion length must be 1', error): - if hgvs_genomic.posedit.edit.type == 'ins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start, end) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - - # Make VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, self.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] - - # Save a copy of current hgvs_coding + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + try: - saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, - hgvs_coding.ac) + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' except Exception as e: - if str( - e) == 'start or end or both are beyond the bounds of transcript record': - saved_hgvs_coding = hgvs_coding - continue + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) + + if len( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append( + genomic_from_most_3pr_hgvs_transcript_variant) + if len( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + hgvs_genomic_possibilities.append( + genomic_from_most_5pr_hgvs_transcript_variant) + + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + pass + fn.exceptPass() + + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' + + # Mark as not disparity detected + disparity_deletion_in = ['false', 'false'] + # Loop through to see if a gap can be located + possibility_counter = 0 + for possibility in hgvs_genomic_possibilities: + possibility_counter = possibility_counter + 1 + # Loop out stash possibilities which will not spot gaps so are empty + if possibility == '': + continue - # Detect intronic variation using normalization - intronic_variant = 'false' - # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding + # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + hgvs_genomic_variant = possibility + stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + try: + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic_variant) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error): + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + if re.search('insertion length must be 1', error): + if hgvs_genomic.posedit.edit.type == 'ins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start, end) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + hgvs_genomic) + + hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # Store a copy for later use + stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + + # Make VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, + reverse_normalizer, self.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Look for exonic gaps within transcript or chromosome + no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # Store a not real deletion insertion to test for gapping + stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( + hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + v = [chr, pos, ref, alt] + + # Save a copy of current hgvs_coding + try: + saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, + hgvs_coding.ac) + except Exception as e: + if str( + e) == 'start or end or both are beyond the bounds of transcript record': + saved_hgvs_coding = hgvs_coding + continue + + # Detect intronic variation using normalization + intronic_variant = 'false' + # Look for normalized variant options that do not match hgvs_coding + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: seek_var = fn.valstr(hgvs_seek_var) seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + elif orientation != -1: + # position genomic at its most 3 prime position try: - intron_test = hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' + query_genomic = hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding - if intronic_variant != 'hard_fail': - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', - str( - hgvs_seek_var.posedit.pos)) or re.search( - r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( - hgvs_seek_var.posedit.pos)): + try: + intron_test = hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -1293,1017 +1274,1036 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: intronic_variant = 'true' - if intronic_variant != 'true': - # Flag RefSeqGene for ammendment - # amend_RefSeqGene = 'false' - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', - hgvs_genomic_5pr.posedit.edit.type) or re.search( - 'ins', hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and re.search('del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and re.search('del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] + if intronic_variant != 'hard_fail': + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', + str( + hgvs_seek_var.posedit.pos)) or re.search( + r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( + hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'true': + # Flag RefSeqGene for ammendment + # amend_RefSeqGene = 'false' + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', + hgvs_genomic_5pr.posedit.edit.type) or re.search( + 'ins', hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and re.search('del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] else: - pass + if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and re.search('del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', str( + hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', + str( + hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] else: pass - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +1 base and adjust - if re.search(r'\+', - str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() + else: + pass + tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + saved_hgvs_coding.ac) + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +1 base and adjust + if re.search(r'\+', + str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, hn) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, hn) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search(r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: fn.exceptPass() - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: - re_capture_tx_variant = [] - for possibility in hgvs_genomic_possibilities: - if possibility == '': - continue - hgvs_t_possibility = self.vm.g_to_t(possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) - except: - continue - if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: - continue - ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) - except: - continue - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if possibility.posedit.edit.type == 'ins': - ins_ref = self.sf.fetch_seq(possibility.ac, - possibility.posedit.pos.start.base - 1, - possibility.posedit.pos.end.base) - possibility.posedit.edit.ref = ins_ref - possibility.posedit.edit.alt = ins_ref[ - 0] + possibility.posedit.edit.alt + \ - ins_ref[1] - if len(hgvs_t_possibility.posedit.edit.ref) < len( - possibility.posedit.edit.ref): - gap_length = len(possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] - hgvs_not_delins = possibility - hgvs_genomic_5pr = possibility - break - - if re_capture_tx_variant != []: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, hn) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + no_norm_evm, hn) + rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + fn.exceptPass() + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for possibility in hgvs_genomic_possibilities: + if possibility == '': + continue + hgvs_t_possibility = self.vm.g_to_t(possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': try: - tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) + hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # Final sanity checks - try: - self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str( - e) == 'start or end or both are beyond the bounds of transcript record': - continue - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - continue - elif re.match('Normalization of intronic variants is not supported', - error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): - rg = reverse_normalizer.normalize(hgvs_not_delins) - rtx = self.vm.g_to_t(rg, tx_hgvs_not_delins.ac) - fg = hn.normalize(hgvs_not_delins) - ftx = self.vm.g_to_t(fg, tx_hgvs_not_delins.ac) - if ( - rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = self.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) - exonic = False - for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], - ex_test[7]): - exonic = True - if exonic is True: - hgvs_not_delins = fg - hgvs_genomic = fg - hgvs_genomic_5pr = fg + continue + if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: + continue + ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) try: - tx_hgvs_not_delins = self.vm.c_to_n(ftx) - except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = self.hp.parse_hgvs_variant( - tx_hgvs_not_delins_delins_from_dup) - - if disparity_deletion_in[0] == 'transcript': - # amend_RefSeqGene = 'true' - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = self.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' + hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) + except: + continue + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if possibility.posedit.edit.type == 'ins': + ins_ref = self.sf.fetch_seq(possibility.ac, + possibility.posedit.pos.start.base - 1, + possibility.posedit.pos.end.base) + possibility.posedit.edit.ref = ins_ref + possibility.posedit.edit.alt = ins_ref[ + 0] + possibility.posedit.edit.alt + \ + ins_ref[1] + if len(hgvs_t_possibility.posedit.edit.ref) < len( + possibility.posedit.edit.ref): + gap_length = len(possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, + hgvs_t_possibility] + hgvs_not_delins = possibility + hgvs_genomic_5pr = possibility + break + if re_capture_tx_variant != []: try: - tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) + tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) except: - fn.exceptPass() - genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # Final sanity checks + try: + self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str( + e) == 'start or end or both are beyond the bounds of transcript record': + continue + try: + hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + continue + elif re.match('Normalization of intronic variants is not supported', + error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Find oddly placed gaps where the tx variant is encompassed in the gap + if disparity_deletion_in[0] == 'false' and ( + possibility_counter == 3 or possibility_counter == 4): + rg = reverse_normalizer.normalize(hgvs_not_delins) + rtx = self.vm.g_to_t(rg, tx_hgvs_not_delins.ac) + fg = hn.normalize(hgvs_not_delins) + ftx = self.vm.g_to_t(fg, tx_hgvs_not_delins.ac) + if ( + rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + exons = self.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) + exonic = False + for ex_test in exons: + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], + ex_test[7]): + exonic = True + if exonic is True: + hgvs_not_delins = fg + hgvs_genomic = fg + hgvs_genomic_5pr = fg try: - c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) + tx_hgvs_not_delins = self.vm.c_to_n(ftx) except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) + tx_hgvs_not_delins = ftx + disparity_deletion_in = ['transcript', 'Requires Analysis'] - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.alt) + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = self.hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup) + + if disparity_deletion_in[0] == 'transcript': + # amend_RefSeqGene = 'true' + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = self.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int( + '0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int( + '0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int( + '0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int( + '0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, + hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = self.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range( + genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) else: - # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range( - genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range( - genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, - 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range( + genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, + 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = self.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = self.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', str( + tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', + str( + tx_hgvs_not_delins.posedit.pos.end)): auto_info = auto_info + str( stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( tx_hgvs_not_delins.ac) non_valid_caution = 'true' - + try: + c2 = self.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) if re.match('NM_', str(for_location_c)): for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: gps = for_location_c.posedit.pos.start.base gpe = for_location_c.posedit.pos.start.base + 1 gap_position = ' between positions c.' + str(gps) + '_' + str( gpe) + '\n' + # Warn update auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + elif re.search(r'\+', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', + str( + tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = self.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = self.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', - str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str( + tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = self.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref else: - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - # amend_RefSeqGene = 'true' - if possibility_counter == 3: - hgvs_refreshed_variant = stash_tx_right - elif possibility_counter == 4: - hgvs_refreshed_variant = stash_tx_left + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = self.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) else: - hgvs_refreshed_variant = chromosome_normalized_hgvs_coding - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', - str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + # amend_RefSeqGene = 'true' + if possibility_counter == 3: + hgvs_refreshed_variant = stash_tx_right + elif possibility_counter == 4: + hgvs_refreshed_variant = stash_tx_left else: - pass + hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = hgvs_coding + # amend_RefSeqGene = 'false' + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', + str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) + else: + pass - try: - hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - continue + try: + hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + continue - # Quick check to make sure the coding variant has not changed - try: - to_test = hn.normalize(hgvs_refreshed_variant) - except: - to_test = hgvs_refreshed_variant - if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # Try the next available genomic option - if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - hgvs_coding = to_test - else: - continue + # Quick check to make sure the coding variant has not changed + try: + to_test = hn.normalize(hgvs_refreshed_variant) + except: + to_test = hgvs_refreshed_variant + if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # Try the next available genomic option + if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + hgvs_coding = to_test + else: + continue - # Update hgvs_genomic - hgvs_alt_genomic = self.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, - no_norm_evm, hn) - if hgvs_alt_genomic.posedit.edit.type == 'identity': - re_c = self.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) + # Update hgvs_genomic + hgvs_alt_genomic = self.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, + no_norm_evm, hn) + if hgvs_alt_genomic.posedit.edit.type == 'identity': + re_c = self.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) + if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): + shuffle_left_g = copy.copy(hgvs_alt_genomic) + shuffle_left_g.posedit.edit.ref = '' + shuffle_left_g.posedit.edit.alt = '' + shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) + re_c = self.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - shuffle_left_g = copy.copy(hgvs_alt_genomic) - shuffle_left_g.posedit.edit.ref = '' - shuffle_left_g.posedit.edit.alt = '' - shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) - re_c = self.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - hgvs_alt_genomic = shuffle_left_g - - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' - - # Break if gap has been detected - if disparity_deletion_in[0] != 'false': - break - - # Normailse hgvs_genomic - try: - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': - if hgvs_alt_genomic.posedit.edit.type == 'delins': - start = hgvs_alt_genomic.posedit.pos.start.base - end = hgvs_alt_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb - hgvs_alt_genomic.posedit.pos.start.base = end - hgvs_alt_genomic.posedit.pos.end.base = start - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - if hgvs_alt_genomic.posedit.edit.type == 'del': - start = hgvs_alt_genomic.posedit.pos.start.base - end = hgvs_alt_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - hgvs_alt_genomic.posedit.edit.alt = lhb + rhb - hgvs_alt_genomic.posedit.pos.start.base = end - hgvs_alt_genomic.posedit.pos.end.base = start - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - - # Refresh the :g. variant - multi_g.append(hgvs_alt_genomic) - else: - multi_g.append(hgvs_alt_genomic) - corrective_action_taken = 'false' + hgvs_alt_genomic = shuffle_left_g - # In this instance, the gap code has generally found an incomplete-alignment rather than a - # truly gapped alignment. - except KeyError: - warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ - 'genomic reference sequence %s' % (hgvs_coding.ac, - alt_chr) - continue - except hgvs.exceptions.HGVSError as e: - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - error = str(te) - logger.error(str(exc_type) + " " + str(exc_value)) - logger.debug(error) - continue + # If it is intronic, these vairables will not have been set + else: + # amend_RefSeqGene = 'false' + no_normalized_c = 'false' + + # Break if gap has been detected + if disparity_deletion_in[0] != 'false': + break + + # Normailse hgvs_genomic + try: + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error) and \ + disparity_deletion_in[0] == 'chromosome': + if hgvs_alt_genomic.posedit.edit.type == 'delins': + start = hgvs_alt_genomic.posedit.pos.start.base + end = hgvs_alt_genomic.posedit.pos.end.base + lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb + hgvs_alt_genomic.posedit.pos.start.base = end + hgvs_alt_genomic.posedit.pos.end.base = start + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + if hgvs_alt_genomic.posedit.edit.type == 'del': + start = hgvs_alt_genomic.posedit.pos.start.base + end = hgvs_alt_genomic.posedit.pos.end.base + lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + hgvs_alt_genomic.posedit.edit.alt = lhb + rhb + hgvs_alt_genomic.posedit.pos.start.base = end + hgvs_alt_genomic.posedit.pos.end.base = start + hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + + # Refresh the :g. variant + multi_g.append(hgvs_alt_genomic) + else: + multi_g.append(hgvs_alt_genomic) + corrective_action_taken = 'false' + + # In this instance, the gap code has generally found an incomplete-alignment rather than a + # truly gapped alignment. + except KeyError: + warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ + 'genomic reference sequence %s' % (hgvs_coding.ac, + alt_chr) + continue + except hgvs.exceptions.HGVSError as e: + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + error = str(te) + logger.error(str(exc_type) + " " + str(exc_value)) + logger.debug(error) + continue - if multi_g != []: + if multi_g != []: - multi_gen_vars = multi_g # '|'.join(multi_g) - else: - multi_gen_vars = [] + multi_gen_vars = multi_g # '|'.join(multi_g) else: - # HGVS genomic in the absence of a transcript variant - if genomic_variant != '': - multi_gen_vars = [hgvs_genomic_variant] - else: - multi_gen_vars = [] - - # Dictionaries of genomic loci - alt_genomic_dicts = [] - primary_genomic_dicts = {} - - if len(multi_gen_vars) != 0: - for alt_gen_var in multi_gen_vars: - for build in self.genome_builds: - test = vvChromosomes.supported_for_mapping(alt_gen_var.ac, build) - if test == 'true': - try: - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, reverse_normalizer, self.sf) - except hgvs.exceptions.HGVSInvalidVariantError as e: - continue - # Identify primary assembly positions - if re.match('NC_', alt_gen_var.ac): - if re.match('GRC', build): - primary_genomic_dicts[build.lower()] = { - 'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - - else: - primary_genomic_dicts[build.lower()] = { - 'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - if build == 'GRCh38': - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - self.sf) - primary_genomic_dicts['hg38'] = { - 'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - - continue + multi_gen_vars = [] + else: + # HGVS genomic in the absence of a transcript variant + if genomic_variant != '': + multi_gen_vars = [hgvs_genomic_variant] + else: + multi_gen_vars = [] + + # Dictionaries of genomic loci + alt_genomic_dicts = [] + primary_genomic_dicts = {} + + if len(multi_gen_vars) != 0: + for alt_gen_var in multi_gen_vars: + for build in self.genome_builds: + test = vvChromosomes.supported_for_mapping(alt_gen_var.ac, build) + if test == 'true': + try: + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, reverse_normalizer, self.sf) + except hgvs.exceptions.HGVSInvalidVariantError as e: + continue + # Identify primary assembly positions + if re.match('NC_', alt_gen_var.ac): + if re.match('GRC', build): + primary_genomic_dicts[build.lower()] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } else: - if re.match('GRC', build): - dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } + primary_genomic_dicts[build.lower()] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] } - else: - dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } + } + if build == 'GRCh38': + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, + self.sf) + primary_genomic_dicts['hg38'] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] } + } + + continue + + else: + if re.match('GRC', build): + dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } + else: + dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } + # Append + alt_genomic_dicts.append(dict) + + if build == 'GRCh38': + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, + self.sf) + dict = {'hg38': {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } # Append alt_genomic_dicts.append(dict) + continue + else: + # May need to account for ALT NC_ + pass - if build == 'GRCh38': - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - self.sf) - dict = {'hg38': {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } - # Append - alt_genomic_dicts.append(dict) - continue - else: - # May need to account for ALT NC_ - pass - - # Warn not directly mapped to specified genome build - if genomic_accession != '': - caution = '' - if primary_assembly.lower() not in list(primary_genomic_dicts.keys()): - warnings = warnings + ': ' + str( - hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + ': See alternative genomic loci or alternative genome builds for aligned genomic positions' - - warn_list = warnings.split(': ') - warnings_out = [] - for warning in warn_list: - warning.strip() - warning = warning.replace("'", "") - if warning == '': - continue - warnings_out.append(warning) - # Remove duplicate elements but maintain the order - seen = {} - no_rep_list = [seen.setdefault(x, x) for x in warnings_out if x not in seen] - warnings_out = no_rep_list - - # Ensure Variants have had the refs removed. - # if not hasattr(posedit, refseqgene_variant): - if refseqgene_variant != '': + # Warn not directly mapped to specified genome build + if genomic_accession != '': + caution = '' + if primary_assembly.lower() not in list(primary_genomic_dicts.keys()): + warnings = warnings + ': ' + str( + hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + ': See alternative genomic loci or alternative genome builds for aligned genomic positions' + + warn_list = warnings.split(': ') + warnings_out = [] + for warning in warn_list: + warning.strip() + warning = warning.replace("'", "") + if warning == '': + continue + warnings_out.append(warning) + # Remove duplicate elements but maintain the order + seen = {} + no_rep_list = [seen.setdefault(x, x) for x in warnings_out if x not in seen] + warnings_out = no_rep_list + + # Ensure Variants have had the refs removed. + # if not hasattr(posedit, refseqgene_variant): + if refseqgene_variant != '': + try: + refseqgene_variant = fn.valstr(hgvs_refseqgene_variant) + except: + fn.exceptPass() + + # Add single letter AA code to protein descriptions + predicted_protein_variant_dict = {"tlr": str(predicted_protein_variant), "slr": ''} + if predicted_protein_variant != '': + if not 'Non-coding :n.' in predicted_protein_variant: try: - refseqgene_variant = fn.valstr(hgvs_refseqgene_variant) - except: + format_p = predicted_protein_variant + format_p = re.sub(r'\(LRG_.+?\)', '', format_p) + re_parse_protein = self.hp.parse_hgvs_variant(format_p) + re_parse_protein_singleAA = fn.single_letter_protein(re_parse_protein) + predicted_protein_variant_dict["slr"] = str(re_parse_protein_singleAA) + except hgvs.exceptions.HGVSParseError: fn.exceptPass() - - # Add single letter AA code to protein descriptions - predicted_protein_variant_dict = {"tlr": str(predicted_protein_variant), "slr": ''} - if predicted_protein_variant != '': - if not 'Non-coding :n.' in predicted_protein_variant: - try: - format_p = predicted_protein_variant - format_p = re.sub(r'\(LRG_.+?\)', '', format_p) - re_parse_protein = self.hp.parse_hgvs_variant(format_p) - re_parse_protein_singleAA = fn.single_letter_protein(re_parse_protein) - predicted_protein_variant_dict["slr"] = str(re_parse_protein_singleAA) - except hgvs.exceptions.HGVSParseError: - fn.exceptPass() - else: - predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) - - # Populate the dictionary - dict_out['submitted_variant'] = submitted - dict_out['gene_symbol'] = gene_symbol - dict_out['transcript_description'] = transcript_description - dict_out['hgvs_transcript_variant'] = tx_variant - dict_out['genome_context_intronic_sequence'] = genome_context_transcript_variant - dict_out['refseqgene_context_intronic_sequence'] = RefSeqGene_context_transcript_variant - dict_out['hgvs_refseqgene_variant'] = refseqgene_variant - dict_out['hgvs_predicted_protein_consequence'] = predicted_protein_variant_dict - dict_out['validation_warnings'] = warnings_out - dict_out['hgvs_lrg_transcript_variant'] = lrg_transcript_variant - dict_out['hgvs_lrg_variant'] = lrg_variant - dict_out['alt_genomic_loci'] = alt_genomic_dicts - dict_out['primary_assembly_loci'] = primary_genomic_dicts - dict_out['reference_sequence_records'] = '' - - # Add links to reference_sequence_records - ref_records = self.db.get_urls(dict_out) - if ref_records != {}: - dict_out['reference_sequence_records'] = ref_records - - # Append to a list for return - batch_out.append(dict_out) + else: + predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) + + # Populate the dictionary + dict_out['submitted_variant'] = submitted + dict_out['gene_symbol'] = gene_symbol + dict_out['transcript_description'] = transcript_description + dict_out['hgvs_transcript_variant'] = tx_variant + dict_out['genome_context_intronic_sequence'] = genome_context_transcript_variant + dict_out['refseqgene_context_intronic_sequence'] = RefSeqGene_context_transcript_variant + dict_out['hgvs_refseqgene_variant'] = refseqgene_variant + dict_out['hgvs_predicted_protein_consequence'] = predicted_protein_variant_dict + dict_out['validation_warnings'] = warnings_out + dict_out['hgvs_lrg_transcript_variant'] = lrg_transcript_variant + dict_out['hgvs_lrg_variant'] = lrg_variant + dict_out['alt_genomic_loci'] = alt_genomic_dicts + dict_out['primary_assembly_loci'] = primary_genomic_dicts + dict_out['reference_sequence_records'] = '' + + # Add links to reference_sequence_records + ref_records = self.db.get_urls(dict_out) + if ref_records != {}: + dict_out['reference_sequence_records'] = ref_records + + # Append to a list for return + batch_out.append(dict_out) """ From cb29c578e1c8b662834fd53057f1fb7762c39744 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 26 Apr 2019 11:15:09 +0100 Subject: [PATCH 077/223] Changed valid variable to variant, also made some minor cosmetic edits --- VariantValidator/modules/vvMixinCore.py | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index b9f6cd97..9aac63db 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -52,7 +52,7 @@ from . import vvChromosomes from . import vvMixinConverters from .vvFunctions import VariantValidatorError -from . import variant +from .variant import Variant from . import format_converters from . import use_checking from . import collect_info @@ -124,7 +124,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr self.batch_list = [] for queries in batch_queries: queries = queries.strip() - query = variant.Variant(queries) + query = Variant(queries) self.batch_list.append(query) # Create List to carry batch data @@ -591,8 +591,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # order the rows by_order = sorted(self.batch_list, key=lambda x: x.order) - for valid in by_order: - if not valid.write: + for variant in by_order: + if not variant.write: continue # Blank VCF @@ -608,16 +608,16 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gap_compensation = True # warngins - warnings = valid.warnings + warnings = variant.warnings warnings = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warnings) warnings = re.sub('^: ', '', warnings) warnings = re.sub('::', ':', warnings) # Submitted variant - submitted = valid.original + submitted = variant.original # Genomic sequence variation - genomic_variant = valid.genomic_g + genomic_variant = variant.genomic_g # genomic accession if genomic_variant != '': @@ -628,9 +628,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr genomic_accession = '' # RefSeqGene variation - refseqgene_variant = valid.genomic_r + refseqgene_variant = variant.genomic_r refseqgene_variant = refseqgene_variant.strip() - if re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant == '': + if 'RefSeqGene' in refseqgene_variant or refseqgene_variant == '': warnings = warnings + ': ' + refseqgene_variant refseqgene_variant = '' lrg_variant = '' @@ -651,7 +651,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_lrg.ac) + ' is pending therefore changes may be made to the LRG reference sequence' # Transcript sequence variation - tx_variant = valid.coding + tx_variant = variant.coding if tx_variant != '': if '(' in tx_variant and ')' in tx_variant: tx_variant = tx_variant.split('(')[1] @@ -699,7 +699,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr self.vr.validate(hgvs_transcript_variant) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('intronic variant', error): + if 'intronic variant' in error: genome_context_transcript_variant = genomic_accession + '(' + transcript_accession + '):c.' + str( hgvs_transcript_variant.posedit) if refseqgene_variant != '': @@ -725,11 +725,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr RefSeqGene_context_transcript_variant = '' # Protein description - predicted_protein_variant = valid.protein - if re.match('NP_', predicted_protein_variant): + predicted_protein_variant = variant.protein + if 'NP_' in predicted_protein_variant: rs_p, pred_prot_posedit = predicted_protein_variant.split(':') lrg_p = self.db.get_lrgProteinID_from_RefSeqProteinID(rs_p) - if re.match('LRG', lrg_p): + if 'LRG' in lrg_p: predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit # Gene @@ -742,7 +742,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr gene_symbol = '' # Transcript description - transcript_description = valid.description + transcript_description = variant.description # Stashed variants # if valid.test_stash_tx_left: From 3f9f44ce8631dfaece0af49481191b0ad0e9e5b1 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 26 Apr 2019 14:31:45 +0100 Subject: [PATCH 078/223] Moved third gap compensation section into GapMapper --- VariantValidator/modules/gapped_mapping.py | 1315 +++++++++ VariantValidator/modules/mappers.py | 1402 ++++++++++ VariantValidator/modules/vvMixinCore.py | 2779 ++++++++++---------- 3 files changed, 4108 insertions(+), 1388 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 7f06cf99..9ec0ad04 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -1292,6 +1292,1321 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): return hgvs_coding + def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, alt_chr, rec_var): + + orientation = int(ori[0]['alt_strand']) + hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) + + logger.warning('g_to_t gap code 3 active') + rn_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_alt_genomic) + self.hgvs_genomic_possibilities.append(rn_hgvs_genomic) + if orientation != -1: + try: + chromosome_normalized_hgvs_coding = self.variant.reverse_normalizer.normalize( + hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + chromosome_normalized_hgvs_coding = hgvs_coding + else: + try: + chromosome_normalized_hgvs_coding = self.variant.hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + chromosome_normalized_hgvs_coding = hgvs_coding + + most_3pr_hgvs_genomic = self.validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, + alt_chr, + self.variant.no_norm_evm, self.variant.hn) + self.hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + + # First to the right + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = self.variant.no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, self.validator.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = self.variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + fn.exceptPass() + # Store a tx copy for later use + test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + stash_genomic = self.validator.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, + self.variant.no_norm_evm, self.variant.hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_right.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + stash_tx_right = test_stash_tx_right + if hasattr(test_stash_tx_right.posedit.edit, + 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + alt = test_stash_tx_right.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + self.hgvs_genomic_possibilities.append(stash_genomic) + else: + self.hgvs_genomic_possibilities.append('') + elif test_stash_tx_right.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_right).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + hgvs_reform_ident = self.validator.hp.parse_hgvs_variant(reform_ident) + try: + self.variant.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_right = test_stash_tx_right + self.hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + self.hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + self.variant.hn.normalize(test_stash_tx_right) + except hgvs.exceptions.HGVSUnsupportedOperationError: + self.hgvs_genomic_possibilities.append('') + else: + stash_tx_right = test_stash_tx_right + self.hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + except ValueError: + fn.exceptPass() + + # Then to the left + hgvs_stash = copy.deepcopy(hgvs_coding) + try: + hgvs_stash = self.variant.no_norm_evm.c_to_n(hgvs_stash) + except: + fn.exceptPass() + try: + stash_ac = hgvs_stash.ac + stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) + stash_pos = int(stash_dict['pos']) + stash_ref = stash_dict['ref'] + stash_alt = stash_dict['alt'] + # Generate an end position + stash_end = str(stash_pos + len(stash_ref) - 1) + # make a not real deletion insertion + stash_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( + stash_ac + ':' + hgvs_stash.type + '.' + str( + stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + try: + stash_hgvs_not_delins = self.variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) + except: + fn.exceptPass() + # Store a tx copy for later use + test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + stash_genomic = self.validator.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, + self.variant.no_norm_evm, self.variant.hn) + # Stash the outputs if required + # test variants = NC_000006.11:g.90403795G= (causes double identity) + # NC_000002.11:g.73675227_73675228insCTC + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # pass + if len(test_stash_tx_left.posedit.edit.ref) == (( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + stash_tx_left = test_stash_tx_left + if hasattr(test_stash_tx_left.posedit.edit, + 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + alt = test_stash_tx_left.posedit.edit.alt + else: + alt = '' + if hasattr(stash_genomic.posedit.edit, + 'alt') and stash_genomic.posedit.edit.alt is not None: + g_alt = stash_genomic.posedit.edit.alt + else: + g_alt = '' + if (len(alt) - ( + test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + len(g_alt) - ( + stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + self.hgvs_genomic_possibilities.append(stash_genomic) + else: + self.hgvs_genomic_possibilities.append('') + elif test_stash_tx_left.posedit.edit.type == 'identity': + reform_ident = str(test_stash_tx_left).split(':')[0] + reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + hgvs_reform_ident = self.validator.hp.parse_hgvs_variant(reform_ident) + try: + self.variant.hn.normalize(hgvs_reform_ident) + except hgvs.exceptions.HGVSError as e: + error = str(e) + if re.search('spanning the exon-intron boundary', error): + stash_tx_left = test_stash_tx_left + self.hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + self.hgvs_genomic_possibilities.append(stash_genomic) + else: + try: + self.variant.hn.normalize(test_stash_tx_left) + except hgvs.exceptions.HGVSUnsupportedOperationError: + self.hgvs_genomic_possibilities.append('') + else: + stash_tx_left = test_stash_tx_left + self.hgvs_genomic_possibilities.append(stash_genomic) + except hgvs.exceptions.HGVSError as e: + fn.exceptPass() + except ValueError: + fn.exceptPass() + + # direct mapping from reverse_normalized transcript insertions in the delins format + try: + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = self.variant.reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = self.validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = self.validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the self.variant ref and alt + pr3_ref = self.validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = self.validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = self.validator.vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = self.validator.vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + + # Normalize - If the self.variant spans a gap it should then form a static genomic self.variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = self.variant.hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_3pr_hgvs_transcript_variant = self.variant.hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + try: + genomic_from_most_5pr_hgvs_transcript_variant = self.variant.hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_5pr_hgvs_transcript_variant = self.variant.hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) + + if len( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + self.hgvs_genomic_possibilities.append( + genomic_from_most_3pr_hgvs_transcript_variant) + if len( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + self.hgvs_genomic_possibilities.append( + genomic_from_most_5pr_hgvs_transcript_variant) + + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + pass + fn.exceptPass() + + # Set variables for problem specific warnings + gapped_alignment_warning = '' + corrective_action_taken = '' + gapped_transcripts = '' + auto_info = '' + + # Mark as not disparity detected + disparity_deletion_in = ['false', 'false'] + # Loop through to see if a gap can be located + possibility_counter = 0 + for possibility in self.hgvs_genomic_possibilities: + possibility_counter = possibility_counter + 1 + # Loop out stash possibilities which will not spot gaps so are empty + if possibility == '': + continue + + # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + hgvs_genomic_variant = possibility + stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + + # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + try: + reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize( + hgvs_genomic_variant) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error): + if hgvs_genomic.posedit.edit.type == 'delins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize( + hgvs_genomic) + if hgvs_genomic.posedit.edit.type == 'del': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + lhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + rhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + rhb + hgvs_genomic.posedit.pos.start.base = end + hgvs_genomic.posedit.pos.end.base = start + reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize( + hgvs_genomic) + if re.search('insertion length must be 1', error): + if hgvs_genomic.posedit.edit.type == 'ins': + start = hgvs_genomic.posedit.pos.start.base + end = hgvs_genomic.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + lhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + rhb = self.validator.sf.fetch_seq(str(hgvs_genomic.ac), start, end) + hgvs_genomic.posedit.edit.ref = lhb + rhb + hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize( + hgvs_genomic) + + self.hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # Store a copy for later use + stored_hgvs_genomic_5pr = copy.deepcopy(self.hgvs_genomic_5pr) + + # Make VCF + vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) + chr = vcf_dict['chr'] + pos = vcf_dict['pos'] + ref = vcf_dict['ref'] + alt = vcf_dict['alt'] + + # Look for exonic gaps within transcript or chromosome + no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + + # Generate an end position + end = str(int(pos) + len(ref) - 1) + pos = str(pos) + + # Store a not real deletion insertion to test for gapping + stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( + self.hgvs_genomic_5pr.ac) + ':' + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + v = [chr, pos, ref, alt] + + # Save a copy of current hgvs_coding + try: + saved_hgvs_coding = self.variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, + hgvs_coding.ac) + except Exception as e: + if str( + e) == 'start or end or both are beyond the bounds of transcript record': + saved_hgvs_coding = hgvs_coding + continue + + # Detect intronic variation using normalization + intronic_variant = 'false' + # Look for normalized variant options that do not match hgvs_coding + if orientation == -1: + # position genomic at its most 5 prime position + try: + query_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript ant test for movement + try: + hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + else: + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + elif orientation != -1: + # position genomic at its most 3 prime position + try: + query_genomic = self.variant.hn.normalize(hgvs_genomic) + except: + query_genomic = hgvs_genomic + # Map to the transcript and test for movement + try: + hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + hgvs_seek_var = saved_hgvs_coding + seek_var = fn.valstr(hgvs_seek_var) + seek_ac = str(hgvs_seek_var.ac) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding + + try: + intron_test = self.variant.hn.normalize(hgvs_seek_var) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + intronic_variant = 'hard_fail' + else: + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'hard_fail': + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', + str( + hgvs_seek_var.posedit.pos)) or re.search( + r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( + hgvs_seek_var.posedit.pos)): + # Double check to see whether the variant is actually intronic? + for exon in ori: + genomic_start = int(exon['alt_start_i']) + genomic_end = int(exon['alt_end_i']) + if ( + self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + intronic_variant = 'false' + break + else: + intronic_variant = 'true' + + if intronic_variant != 'true': + # Flag RefSeqGene for ammendment + # amend_RefSeqGene = 'false' + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + if stored_hgvs_not_delins != '': + # Refresh hgvs_not_delins from stored_hgvs_not_delins + hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # This test will only occur in dup of single base, insertion or substitution + if not re.search('_', str(hgvs_not_delins.posedit.pos)): + if re.search('dup', self.hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', + self.hgvs_genomic_5pr.posedit.edit.type): + # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + plussed_hgvs_not_delins.posedit.edit.ref = '' + transcript_variant = self.variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, + str(saved_hgvs_coding.ac)) + if (( + transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + self.hgvs_genomic_5pr.posedit.pos.end.base - self.hgvs_genomic_5pr.posedit.pos.start.base)): + if re.search('dup', str(self.hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[1:] + elif re.search('ins', str(self.hgvs_genomic_5pr.posedit.edit)) and re.search( + 'del', str(self.hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', str( + self.hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', + str( + self.hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + else: + if re.search('dup', str(self.hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + elif re.search('ins', str( + self.hgvs_genomic_5pr.posedit.edit)) and re.search('del', + str( + self.hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + elif re.search('ins', str( + self.hgvs_genomic_5pr.posedit.edit)) and not re.search( + 'del', + str( + self.hgvs_genomic_5pr.posedit.edit)): + hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + start = hgvs_not_delins.posedit.pos.start.base - 1 + end = hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, + end) + hgvs_not_delins.posedit.edit.ref = ref_bases + hgvs_not_delins.posedit.edit.alt = ref_bases[ + :1] + hgvs_not_delins.posedit.edit.alt[ + 1:] + ref_bases[ + 1:] + else: + pass + else: + pass + tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + saved_hgvs_coding.ac) + # Create normalized version of tx_hgvs_not_delins + rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # Check for +1 base and adjust + if re.search(r'\+', + str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\+', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() + + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myvm_t_to_g(test_tx_var, alt_chr, + self.variant.no_norm_evm, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myvm_t_to_g(test_tx_var, alt_chr, + self.variant.no_norm_evm, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # pass + + # Check for -ve base and adjust + elif re.search(r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + r'\-', + str( + rn_tx_hgvs_not_delins.posedit.pos.start)): + # Remove offsetting to span the gap + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + try: + rn_tx_hgvs_not_delins.posedit.edit.alt = '' + except: + fn.exceptPass() + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # move tx end base back to next available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # Delete the ref + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # Add the additional base to the ALT + start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + end = rn_tx_hgvs_not_delins.posedit.pos.end.base + ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myvm_t_to_g(test_tx_var, alt_chr, + self.variant.no_norm_evm, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # move tx start base to previous available non-offset base + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + rn_tx_hgvs_not_delins.posedit.edit.ref = '' + if re.match('NM_', str(rn_tx_hgvs_not_delins)): + test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + else: + test_tx_var = rn_tx_hgvs_not_delins + # re-make genomic and tx + hgvs_not_delins = self.validator.myvm_t_to_g(test_tx_var, alt_chr, + self.variant.no_norm_evm, self.variant.hn) + rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + str( + saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + else: + fn.exceptPass() + + # Logic + if len(hgvs_not_delins.posedit.edit.ref) < len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['chromosome', gap_length] + elif len(hgvs_not_delins.posedit.edit.ref) > len( + rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + rn_tx_hgvs_not_delins.posedit.edit.ref) + disparity_deletion_in = ['transcript', gap_length] + else: + re_capture_tx_variant = [] + for possibility in self.hgvs_genomic_possibilities: + if possibility == '': + continue + hgvs_t_possibility = self.validator.vm.g_to_t(possibility, hgvs_coding.ac) + if hgvs_t_possibility.posedit.edit.type == 'ins': + try: + hgvs_t_possibility = self.validator.vm.c_to_n(hgvs_t_possibility) + except: + continue + if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: + continue + ins_ref = self.validator.sf.fetch_seq(hgvs_t_possibility.ac, + hgvs_t_possibility.posedit.pos.start.base - 1, + hgvs_t_possibility.posedit.pos.start.base + 1) + try: + hgvs_t_possibility = self.validator.vm.n_to_c(hgvs_t_possibility) + except: + continue + hgvs_t_possibility.posedit.edit.ref = ins_ref + hgvs_t_possibility.posedit.edit.alt = ins_ref[ + 0] + hgvs_t_possibility.posedit.edit.alt + \ + ins_ref[1] + if possibility.posedit.edit.type == 'ins': + ins_ref = self.validator.sf.fetch_seq(possibility.ac, + possibility.posedit.pos.start.base - 1, + possibility.posedit.pos.end.base) + possibility.posedit.edit.ref = ins_ref + possibility.posedit.edit.alt = ins_ref[ + 0] + possibility.posedit.edit.alt + \ + ins_ref[1] + if len(hgvs_t_possibility.posedit.edit.ref) < len( + possibility.posedit.edit.ref): + gap_length = len(possibility.posedit.edit.ref) - len( + hgvs_t_possibility.posedit.edit.ref) + re_capture_tx_variant = ['transcript', gap_length, + hgvs_t_possibility] + hgvs_not_delins = possibility + self.hgvs_genomic_5pr = possibility + break + + if re_capture_tx_variant != []: + try: + tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) + except: + tx_hgvs_not_delins = re_capture_tx_variant[2] + disparity_deletion_in = re_capture_tx_variant[0:-1] + else: + pass + + # Final sanity checks + try: + self.validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + except Exception as e: + if str( + e) == 'start or end or both are beyond the bounds of transcript record': + continue + try: + self.variant.hn.normalize(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + if re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + continue + elif re.match('Normalization of intronic variants is not supported', + error): + # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Recreate hgvs_genomic + if disparity_deletion_in[0] == 'transcript': + hgvs_genomic = hgvs_not_delins + + # Find oddly placed gaps where the tx variant is encompassed in the gap + if disparity_deletion_in[0] == 'false' and ( + possibility_counter == 3 or possibility_counter == 4): + rg = self.variant.reverse_normalizer.normalize(hgvs_not_delins) + rtx = self.validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) + fg = self.variant.hn.normalize(hgvs_not_delins) + ftx = self.validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) + if ( + rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + exons = self.validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, self.validator.alt_aln_method) + exonic = False + for ex_test in exons: + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], + ex_test[7]): + exonic = True + if exonic is True: + hgvs_not_delins = fg + hgvs_genomic = fg + self.hgvs_genomic_5pr = fg + try: + tx_hgvs_not_delins = self.validator.vm.c_to_n(ftx) + except Exception: + tx_hgvs_not_delins = ftx + disparity_deletion_in = ['transcript', 'Requires Analysis'] + + # Pre-processing of tx_hgvs_not_delins + try: + if tx_hgvs_not_delins.posedit.edit.alt is None: + tx_hgvs_not_delins.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup) + + if disparity_deletion_in[0] == 'transcript': + # amend_RefSeqGene = 'true' + # ANY VARIANT WHOLLY WITHIN THE GAP + if (re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( + re.search(r'\+', + str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + + # Copy the current variant + tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + try: + if tx_gap_fill_variant.posedit.edit.alt is None: + tx_gap_fill_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant.posedit.pos.start) + '_' + str( + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + tx_gap_fill_variant_delins_from_dup) + + # Identify which half of the NOT-intron the start position of the variant is in + if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + tx_gap_fill_variant.posedit.pos.start.offset = int( + '0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.offset = int( + '0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + tx_gap_fill_variant.posedit.pos.start.offset = int( + '0') # int('+1') + tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + tx_gap_fill_variant.posedit.pos.end.offset = int( + '0') # int('-1') + tx_gap_fill_variant.posedit.edit.alt = '' + tx_gap_fill_variant.posedit.edit.ref = '' + + try: + tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) + except: + fn.exceptPass() + genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, + reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + + try: + c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except Exception: + c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, + self.hgvs_genomic_5pr.ac) + + # Ensure an ALT exists + try: + if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_delins_from_dup) + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( + genomic_gap_fill_variant_alt_delins_from_dup) + + # Correct insertion alts + if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + genomic_gap_fill_variant_alt.posedit.pos.end.base) + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + append_ref[1] + + # Split the reference and replacing alt sequence into a dictionary + reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.alt) + else: + # Deletions with no ins + pre_alternate_bases = list( + genomic_gap_fill_variant_alt.posedit.edit.ref) + alternate_bases = [] + for base in pre_alternate_bases: + alternate_bases.append('X') + + # Create the dictionaries + ref_start = genomic_gap_fill_variant.posedit.pos.start.base + alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + ref_base_dict = {} + for base in reference_bases: + ref_base_dict[ref_start] = str(base) + ref_start = ref_start + 1 + + alt_base_dict = {} + + # NEED TO SEARCH FOR RANGE = and replace with interval_range + # Need to search for int and replace with integer + + # Note, all variants will be forced into the format delete insert + # Deleted bases in the ALT will be substituted for X + for integer in range( + genomic_gap_fill_variant_alt.posedit.pos.start.base, + genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + if integer == alt_start: + alt_base_dict[integer] = str(''.join(alternate_bases)) + else: + alt_base_dict[integer] = 'X' + + # Generate the alt sequence + alternate_sequence_bases = [] + for integer in range( + genomic_gap_fill_variant.posedit.pos.start.base, + genomic_gap_fill_variant.posedit.pos.end.base + 1, + 1): + if integer in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[integer]) + else: + alternate_sequence_bases.append(ref_base_dict[integer]) + alternate_sequence = ''.join(alternate_sequence_bases) + alternate_sequence = alternate_sequence.replace('X', '') + + # Add the new alt to the gap fill variant and generate transcript variant + genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, + tx_gap_fill_variant.ac) + + # Set warning + gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + disparity_deletion_in[1] = [gap_size] + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + else: + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + auto_info = auto_info + '%s' % (gap_position) + + else: + if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # In this instance, we have identified a transcript gap but the n. version of + # the transcript variant but do not have a position which actually hits the gap, + # so the variant likely spans the gap, and is not picked up by an offset. + try: + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + ng2 = self.variant.hn.normalize(g2) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + len(g3.posedit.edit.ref) - 1) + try: + c2 = self.validator.vm.g_to_t(g3, c1.ac) + if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + pass + else: + tx_hgvs_not_delins = c2 + try: + tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) + except hgvs.exceptions.HGVSError: + fn.exceptPass() + except hgvs.exceptions.HGVSInvalidVariantError: + fn.exceptPass() + + if re.search(r'\+', str( + tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\+', + str( + tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\+', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', + str( + tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base + gpe = for_location_c.posedit.pos.end.base + 1 + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str( + tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + r'\-', + str( + tx_hgvs_not_delins.posedit.pos.end)): + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c2 = tx_hgvs_not_delins + c1 = copy.deepcopy(c2) + c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + c1.posedit.pos.start.offset = 0 + c1.posedit.pos.end = c2.posedit.pos.start + c1.posedit.edit.ref = '' + c1.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.start.base - 1 + gpe = for_location_c.posedit.pos.start.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + elif re.search(r'\-', str( + tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', + str( + tx_hgvs_not_delins.posedit.pos.start)): + auto_info = auto_info + 'Genome position ' + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + tx_hgvs_not_delins.ac) + gapped_transcripts = gapped_transcripts + ' ' + str( + tx_hgvs_not_delins.ac) + non_valid_caution = 'true' + try: + c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) + except: + c1 = tx_hgvs_not_delins + c2 = copy.deepcopy(c1) + c2.posedit.pos.start = c1.posedit.pos.end + c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + c2.posedit.pos.end.offset = 0 + c2.posedit.edit.ref = '' + c2.posedit.edit.alt = '' + if orientation != -1: + g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2.posedit.edit.alt = g2.posedit.edit.ref + else: + g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) + g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) + g1.posedit.edit.alt = g1.posedit.edit.ref + reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + g3 = copy.deepcopy(g1) + g3.posedit.pos.end.base = g2.posedit.pos.end.base + g3.posedit.edit.ref = reference + g3.posedit.edit.alt = alternate + c3 = self.validator.vm.g_to_t(g3, c1.ac) + hgvs_refreshed_variant = c3 + # Alignment position + for_location_c = copy.deepcopy(hgvs_refreshed_variant) + if re.match('NM_', str(for_location_c)): + for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + gps = for_location_c.posedit.pos.end.base - 1 + gpe = for_location_c.posedit.pos.end.base + gap_position = ' between positions c.' + str(gps) + '_' + str( + gpe) + '\n' + # Warn update + auto_info = auto_info + '%s' % (gap_position) + else: + auto_info = auto_info + str( + stored_hgvs_not_delins.ac) + ':g.' + str( + stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' genomic base(s) that fail to align to transcript ' + str( + tx_hgvs_not_delins.ac) + '\n' + hgvs_refreshed_variant = tx_hgvs_not_delins + + # GAP IN THE CHROMOSOME + elif disparity_deletion_in[0] == 'chromosome': + # amend_RefSeqGene = 'true' + if possibility_counter == 3: + hgvs_refreshed_variant = stash_tx_right + elif possibility_counter == 4: + hgvs_refreshed_variant = stash_tx_left + else: + hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # Warn + auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + disparity_deletion_in[ + 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + hgvs_genomic.ac) + '\n' + else: + # Keep the same by re-setting rel_var + hgvs_refreshed_variant = hgvs_coding + # amend_RefSeqGene = 'false' + + # Edit the output + if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', + str( + hgvs_refreshed_variant.type)): + hgvs_refreshed_variant = self.variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) + else: + pass + + try: + self.variant.hn.normalize(hgvs_refreshed_variant) + except Exception as e: + error = str(e) + # Ensure the final variant is not intronic nor does it cross exon boundaries + if re.match('Normalization of intronic variants is not supported', + error) or re.match( + 'Unsupported normalization of variants spanning the exon-intron boundary', + error): + hgvs_refreshed_variant = saved_hgvs_coding + else: + continue + + # Quick check to make sure the coding variant has not changed + try: + to_test = self.variant.hn.normalize(hgvs_refreshed_variant) + except: + to_test = hgvs_refreshed_variant + if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # Try the next available genomic option + if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + hgvs_coding = to_test + else: + continue + + # Update hgvs_genomic + hgvs_alt_genomic = self.validator.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, + self.variant.no_norm_evm, self.variant.hn) + if hgvs_alt_genomic.posedit.edit.type == 'identity': + re_c = self.validator.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) + if (self.variant.hn.normalize(re_c)) != (self.variant.hn.normalize(hgvs_refreshed_variant)): + shuffle_left_g = copy.copy(hgvs_alt_genomic) + shuffle_left_g.posedit.edit.ref = '' + shuffle_left_g.posedit.edit.alt = '' + shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + shuffle_left_g = self.variant.reverse_normalizer.normalize(shuffle_left_g) + re_c = self.validator.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + if (self.variant.hn.normalize(re_c)) != (self.variant.hn.normalize(hgvs_refreshed_variant)): + hgvs_alt_genomic = shuffle_left_g + + # If it is intronic, these vairables will not have been set + else: + # amend_RefSeqGene = 'false' + no_normalized_c = 'false' + + # Break if gap has been detected + if disparity_deletion_in[0] != 'false': + break + + # Normailse hgvs_genomic + try: + hgvs_alt_genomic = self.variant.hn.normalize(hgvs_alt_genomic) + except hgvs.exceptions.HGVSError as e: + # Strange error caused by gap in genomic + error = str(e) + if re.search('base start position must be <= end position', error) and \ + disparity_deletion_in[0] == 'chromosome': + if hgvs_alt_genomic.posedit.edit.type == 'delins': + start = hgvs_alt_genomic.posedit.pos.start.base + end = hgvs_alt_genomic.posedit.pos.end.base + lhb = self.validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = self.validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb + hgvs_alt_genomic.posedit.pos.start.base = end + hgvs_alt_genomic.posedit.pos.end.base = start + hgvs_alt_genomic = self.variant.hn.normalize(hgvs_alt_genomic) + if hgvs_alt_genomic.posedit.edit.type == 'del': + start = hgvs_alt_genomic.posedit.pos.start.base + end = hgvs_alt_genomic.posedit.pos.end.base + lhb = self.validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + rhb = self.validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + hgvs_alt_genomic.posedit.edit.alt = lhb + rhb + hgvs_alt_genomic.posedit.pos.start.base = end + hgvs_alt_genomic.posedit.pos.end.base = start + hgvs_alt_genomic = self.variant.hn.normalize(hgvs_alt_genomic) + + return hgvs_alt_genomic, hgvs_coding + def dup_ins_5prime_shift(self, stored_hgvs_not_delins, hgvs_genomic_5pr, saved_hgvs_coding): hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) # This test will only occur in dup of single base, insertion or substitution diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 2def29a4..a55cfba0 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -889,3 +889,1405 @@ def transcripts_to_gene(variant, validator): variant.protein = str(hgvs_protein) return False + + +def final_tx_to_multiple_genomic(variant, validator, tx_variant, rec_var): + + warnings = '' + + # Multiple genomic variants + # multi_gen_vars = [] + hgvs_coding = validator.hp.parse_hgvs_variant(str(tx_variant)) + # Gap gene black list + try: + gene_symbol = validator.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + except Exception: + fn.exceptPass() + else: + # If the gene symbol is not in the list, the value False will be returned + gap_compensation = vvChromosomes.gap_black_list(gene_symbol) + + # Look for variants spanning introns + try: + hgvs_coding = variant.hn.normalize(hgvs_coding) + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + error = str(e) + if re.search('boundary', str(error)) or re.search('spanning', str(error)): + gap_compensation = False + else: + pass + except hgvs.exceptions.HGVSError: + fn.exceptPass() + + # Warn gap code status + logger.warning("gap_compensation_3 = " + str(gap_compensation)) + multi_g = [] + multi_list = [] + mapping_options = validator.hdp.get_tx_mapping_options(hgvs_coding.ac) + for alt_chr in mapping_options: + if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', + alt_chr[1])) and \ + alt_chr[2] == validator.alt_aln_method: + multi_list.append(alt_chr[1]) + + for alt_chr in multi_list: + try: + # Re set ori + ori = validator.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, + alt_aln_method=validator.alt_aln_method) + orientation = int(ori[0]['alt_strand']) + hgvs_alt_genomic = validator.myvm_t_to_g(hgvs_coding, alt_chr, variant.no_norm_evm, variant.hn) + # Set hgvs_genomic accordingly + hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) + + # genomic_possibilities + # 1. take the simple 3 pr normalized hgvs_genomic + # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome + hgvs_genomic_possibilities = [] + + gap_mapper = gapped_mapping.GapMapper(variant, validator) + + # Loop out gap code under these circumstances! + if gap_compensation is True: + + hgvs_alt_genomic, hgvs_coding = gap_mapper.g_to_t_gap_compensation_version3(hgvs_alt_genomic, hgvs_coding, ori, alt_chr, rec_var) + + # logger.warning('g_to_t gap code 3 active') + # rn_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_alt_genomic) + # hgvs_genomic_possibilities.append(rn_hgvs_genomic) + # if orientation != -1: + # try: + # chromosome_normalized_hgvs_coding = variant.reverse_normalizer.normalize( + # hgvs_coding) + # except hgvs.exceptions.HGVSUnsupportedOperationError as e: + # chromosome_normalized_hgvs_coding = hgvs_coding + # else: + # try: + # chromosome_normalized_hgvs_coding = variant.hn.normalize(hgvs_coding) + # except hgvs.exceptions.HGVSUnsupportedOperationError as e: + # error = str(e) + # chromosome_normalized_hgvs_coding = hgvs_coding + # + # most_3pr_hgvs_genomic = validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, + # alt_chr, + # variant.no_norm_evm, variant.hn) + # hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + # + # # First to the right + # hgvs_stash = copy.deepcopy(hgvs_coding) + # try: + # hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) + # except: + # fn.exceptPass() + # try: + # stash_ac = hgvs_stash.ac + # stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.hn, validator.sf) + # stash_pos = int(stash_dict['pos']) + # stash_ref = stash_dict['ref'] + # stash_alt = stash_dict['alt'] + # # Generate an end position + # stash_end = str(stash_pos + len(stash_ref) - 1) + # # make a not real deletion insertion + # stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( + # stash_ac + ':' + hgvs_stash.type + '.' + str( + # stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + # try: + # stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) + # except: + # fn.exceptPass() + # # Store a tx copy for later use + # test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = validator.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, + # variant.no_norm_evm, variant.hn) + # # Stash the outputs if required + # # test variants = NC_000006.11:g.90403795G= (causes double identity) + # # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # # pass + # if len(test_stash_tx_right.posedit.edit.ref) == (( + # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + # stash_tx_right = test_stash_tx_right + # if hasattr(test_stash_tx_right.posedit.edit, + # 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + # alt = test_stash_tx_right.posedit.edit.alt + # else: + # alt = '' + # if hasattr(stash_genomic.posedit.edit, + # 'alt') and stash_genomic.posedit.edit.alt is not None: + # g_alt = stash_genomic.posedit.edit.alt + # else: + # g_alt = '' + # if (len(alt) - ( + # test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + # len(g_alt) - ( + # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + # hgvs_genomic_possibilities.append(stash_genomic) + # else: + # hgvs_genomic_possibilities.append('') + # elif test_stash_tx_right.posedit.edit.type == 'identity': + # reform_ident = str(test_stash_tx_right).split(':')[0] + # reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + # test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + # hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) + # try: + # variant.hn.normalize(hgvs_reform_ident) + # except hgvs.exceptions.HGVSError as e: + # error = str(e) + # if re.search('spanning the exon-intron boundary', error): + # stash_tx_right = test_stash_tx_right + # hgvs_genomic_possibilities.append('') + # else: + # stash_tx_right = test_stash_tx_right + # hgvs_genomic_possibilities.append(stash_genomic) + # else: + # try: + # variant.hn.normalize(test_stash_tx_right) + # except hgvs.exceptions.HGVSUnsupportedOperationError: + # hgvs_genomic_possibilities.append('') + # else: + # stash_tx_right = test_stash_tx_right + # hgvs_genomic_possibilities.append(stash_genomic) + # except hgvs.exceptions.HGVSError as e: + # fn.exceptPass() + # except ValueError: + # fn.exceptPass() + # + # # Then to the left + # hgvs_stash = copy.deepcopy(hgvs_coding) + # try: + # hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) + # except: + # fn.exceptPass() + # try: + # stash_ac = hgvs_stash.ac + # stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, variant.primary_assembly, + # variant.reverse_normalizer, validator.sf) + # stash_pos = int(stash_dict['pos']) + # stash_ref = stash_dict['ref'] + # stash_alt = stash_dict['alt'] + # # Generate an end position + # stash_end = str(stash_pos + len(stash_ref) - 1) + # # make a not real deletion insertion + # stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( + # stash_ac + ':' + hgvs_stash.type + '.' + str( + # stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + # try: + # stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) + # except: + # fn.exceptPass() + # # Store a tx copy for later use + # test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = validator.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, + # variant.no_norm_evm, variant.hn) + # # Stash the outputs if required + # # test variants = NC_000006.11:g.90403795G= (causes double identity) + # # NC_000002.11:g.73675227_73675228insCTC + # # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # # pass + # if len(test_stash_tx_left.posedit.edit.ref) == (( + # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + # stash_tx_left = test_stash_tx_left + # if hasattr(test_stash_tx_left.posedit.edit, + # 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + # alt = test_stash_tx_left.posedit.edit.alt + # else: + # alt = '' + # if hasattr(stash_genomic.posedit.edit, + # 'alt') and stash_genomic.posedit.edit.alt is not None: + # g_alt = stash_genomic.posedit.edit.alt + # else: + # g_alt = '' + # if (len(alt) - ( + # test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + # len(g_alt) - ( + # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + # hgvs_genomic_possibilities.append(stash_genomic) + # else: + # hgvs_genomic_possibilities.append('') + # elif test_stash_tx_left.posedit.edit.type == 'identity': + # reform_ident = str(test_stash_tx_left).split(':')[0] + # reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + # test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + # hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) + # try: + # variant.hn.normalize(hgvs_reform_ident) + # except hgvs.exceptions.HGVSError as e: + # error = str(e) + # if re.search('spanning the exon-intron boundary', error): + # stash_tx_left = test_stash_tx_left + # hgvs_genomic_possibilities.append('') + # else: + # stash_tx_left = test_stash_tx_left + # hgvs_genomic_possibilities.append(stash_genomic) + # else: + # try: + # variant.hn.normalize(test_stash_tx_left) + # except hgvs.exceptions.HGVSUnsupportedOperationError: + # hgvs_genomic_possibilities.append('') + # else: + # stash_tx_left = test_stash_tx_left + # hgvs_genomic_possibilities.append(stash_genomic) + # except hgvs.exceptions.HGVSError as e: + # fn.exceptPass() + # except ValueError: + # fn.exceptPass() + # + # # direct mapping from reverse_normalized transcript insertions in the delins format + # try: + # if hgvs_coding.posedit.edit.type == 'ins': + # most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + # most_3pr_hgvs_transcript_variant = variant.reverse_normalizer.normalize(hgvs_coding) + # try: + # n_3pr = validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) + # n_5pr = validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) + # except: + # n_3pr = most_3pr_hgvs_transcript_variant + # n_5pr = most_5pr_hgvs_transcript_variant + # # Make into a delins by adding the ref bases to the variant ref and alt + # pr3_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + # n_3pr.posedit.pos.end.base) + # pr5_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + # n_5pr.posedit.pos.end.base) + # most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + # most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + # most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + # 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + # pr3_ref[1] + # most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + # 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + # pr5_ref[1] + # # Map to the genome + # genomic_from_most_3pr_hgvs_transcript_variant = validator.vm.t_to_g( + # most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + # genomic_from_most_5pr_hgvs_transcript_variant = validator.vm.t_to_g( + # most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + # + # # Normalize - If the variant spans a gap it should then form a static genomic variant + # try: + # genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( + # genomic_from_most_3pr_hgvs_transcript_variant) + # except hgvs.exceptions.HGVSInvalidVariantError as e: + # error = str(e) + # if error == 'base start position must be <= end position': + # start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + # end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start + # genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( + # genomic_from_most_3pr_hgvs_transcript_variant) + # try: + # genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( + # genomic_from_most_5pr_hgvs_transcript_variant) + # except hgvs.exceptions.HGVSInvalidVariantError as e: + # error = str(e) + # if error == 'base start position must be <= end position': + # start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + # end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start + # genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( + # genomic_from_most_5pr_hgvs_transcript_variant) + # + # try: + # if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + # genomic_from_most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + # genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + # + # try: + # if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + # most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + # most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + # most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + # most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + # most_3pr_hgvs_transcript_variant_delins_from_dup) + # + # try: + # if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + # genomic_from_most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + # genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + # + # try: + # if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + # most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + # most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + # most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + # most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( + # most_5pr_hgvs_transcript_variant_delins_from_dup) + # + # if len( + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + # most_3pr_hgvs_transcript_variant.posedit.edit.alt): + # hgvs_genomic_possibilities.append( + # genomic_from_most_3pr_hgvs_transcript_variant) + # if len( + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + # most_5pr_hgvs_transcript_variant.posedit.edit.alt): + # hgvs_genomic_possibilities.append( + # genomic_from_most_5pr_hgvs_transcript_variant) + # + # except hgvs.exceptions.HGVSUnsupportedOperationError as e: + # error = str(e) + # if re.match('Normalization of intronic variants is not supported', + # error) or re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # pass + # fn.exceptPass() + # + # # Set variables for problem specific warnings + # gapped_alignment_warning = '' + # corrective_action_taken = '' + # gapped_transcripts = '' + # auto_info = '' + # + # # Mark as not disparity detected + # disparity_deletion_in = ['false', 'false'] + # # Loop through to see if a gap can be located + # possibility_counter = 0 + # for possibility in hgvs_genomic_possibilities: + # possibility_counter = possibility_counter + 1 + # # Loop out stash possibilities which will not spot gaps so are empty + # if possibility == '': + # continue + # + # # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + # hgvs_genomic_variant = possibility + # stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + # + # # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + # try: + # reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize( + # hgvs_genomic_variant) + # except hgvs.exceptions.HGVSError as e: + # # Strange error caused by gap in genomic + # error = str(e) + # if re.search('base start position must be <= end position', error): + # if hgvs_genomic.posedit.edit.type == 'delins': + # start = hgvs_genomic.posedit.pos.start.base + # end = hgvs_genomic.posedit.pos.end.base + # lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + # rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + # hgvs_genomic.posedit.edit.ref = lhb + rhb + # hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + # hgvs_genomic.posedit.pos.start.base = end + # hgvs_genomic.posedit.pos.end.base = start + # reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize( + # hgvs_genomic) + # if hgvs_genomic.posedit.edit.type == 'del': + # start = hgvs_genomic.posedit.pos.start.base + # end = hgvs_genomic.posedit.pos.end.base + # lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + # rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + # hgvs_genomic.posedit.edit.ref = lhb + rhb + # hgvs_genomic.posedit.edit.alt = lhb + rhb + # hgvs_genomic.posedit.pos.start.base = end + # hgvs_genomic.posedit.pos.end.base = start + # reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize( + # hgvs_genomic) + # if re.search('insertion length must be 1', error): + # if hgvs_genomic.posedit.edit.type == 'ins': + # start = hgvs_genomic.posedit.pos.start.base + # end = hgvs_genomic.posedit.pos.end.base + # ref_bases = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + # lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + # rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start, end) + # hgvs_genomic.posedit.edit.ref = lhb + rhb + # hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + # reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize( + # hgvs_genomic) + # + # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # # Store a copy for later use + # stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + # + # # Make VCF + # vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, + # variant.reverse_normalizer, validator.sf) + # chr = vcf_dict['chr'] + # pos = vcf_dict['pos'] + # ref = vcf_dict['ref'] + # alt = vcf_dict['alt'] + # + # # Look for exonic gaps within transcript or chromosome + # no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + # + # # Generate an end position + # end = str(int(pos) + len(ref) - 1) + # pos = str(pos) + # + # # Store a not real deletion insertion to test for gapping + # stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( + # hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + # v = [chr, pos, ref, alt] + # + # # Save a copy of current hgvs_coding + # try: + # saved_hgvs_coding = variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, + # hgvs_coding.ac) + # except Exception as e: + # if str( + # e) == 'start or end or both are beyond the bounds of transcript record': + # saved_hgvs_coding = hgvs_coding + # continue + # + # # Detect intronic variation using normalization + # intronic_variant = 'false' + # # Look for normalized variant options that do not match hgvs_coding + # if orientation == -1: + # # position genomic at its most 5 prime position + # try: + # query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) + # except: + # query_genomic = hgvs_genomic + # # Map to the transcript ant test for movement + # try: + # hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) + # except hgvs.exceptions.HGVSError as e: + # hgvs_seek_var = saved_hgvs_coding + # else: + # seek_var = fn.valstr(hgvs_seek_var) + # seek_ac = str(hgvs_seek_var.ac) + # if ( + # hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + # hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + # hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + # hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + # pass + # else: + # hgvs_seek_var = saved_hgvs_coding + # + # elif orientation != -1: + # # position genomic at its most 3 prime position + # try: + # query_genomic = variant.hn.normalize(hgvs_genomic) + # except: + # query_genomic = hgvs_genomic + # # Map to the transcript and test for movement + # try: + # hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + # except hgvs.exceptions.HGVSError as e: + # hgvs_seek_var = saved_hgvs_coding + # seek_var = fn.valstr(hgvs_seek_var) + # seek_ac = str(hgvs_seek_var.ac) + # if ( + # hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + # hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + # hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + # hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + # pass + # else: + # hgvs_seek_var = saved_hgvs_coding + # + # try: + # intron_test = variant.hn.normalize(hgvs_seek_var) + # except hgvs.exceptions.HGVSUnsupportedOperationError as e: + # error = str(e) + # if re.match('Normalization of intronic variants is not supported', + # error) or re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # if re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # intronic_variant = 'hard_fail' + # else: + # # Double check to see whether the variant is actually intronic? + # for exon in ori: + # genomic_start = int(exon['alt_start_i']) + # genomic_end = int(exon['alt_end_i']) + # if ( + # hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + # hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + # intronic_variant = 'false' + # break + # else: + # intronic_variant = 'true' + # + # if intronic_variant != 'hard_fail': + # if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', + # str( + # hgvs_seek_var.posedit.pos)) or re.search( + # r'\*\d+\+', str( + # hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( + # hgvs_seek_var.posedit.pos)): + # # Double check to see whether the variant is actually intronic? + # for exon in ori: + # genomic_start = int(exon['alt_start_i']) + # genomic_end = int(exon['alt_end_i']) + # if ( + # hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + # hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + # intronic_variant = 'false' + # break + # else: + # intronic_variant = 'true' + # + # if intronic_variant != 'true': + # # Flag RefSeqGene for ammendment + # # amend_RefSeqGene = 'false' + # # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + # if stored_hgvs_not_delins != '': + # # Refresh hgvs_not_delins from stored_hgvs_not_delins + # hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # # This test will only occur in dup of single base, insertion or substitution + # if not re.search('_', str(hgvs_not_delins.posedit.pos)): + # if re.search('dup', + # hgvs_genomic_5pr.posedit.edit.type) or re.search( + # 'ins', hgvs_genomic_5pr.posedit.edit.type): + # # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + # plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + # plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + # plussed_hgvs_not_delins.posedit.edit.ref = '' + # transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, + # str( + # saved_hgvs_coding.ac)) + # if (( + # transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + # hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + # if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # start = hgvs_not_delins.posedit.pos.start.base - 1 + # end = hgvs_not_delins.posedit.pos.end.base + # ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, + # end) + # hgvs_not_delins.posedit.edit.ref = ref_bases + # hgvs_not_delins.posedit.edit.alt = ref_bases[ + # :1] + hgvs_not_delins.posedit.edit.alt[ + # 1:] + ref_bases[ + # 1:] + # elif re.search('ins', str( + # hgvs_genomic_5pr.posedit.edit)) and re.search('del', + # str( + # hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # elif re.search('ins', str( + # hgvs_genomic_5pr.posedit.edit)) and not re.search( + # 'del', + # str( + # hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # start = hgvs_not_delins.posedit.pos.start.base - 1 + # end = hgvs_not_delins.posedit.pos.end.base + # ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, + # end) + # hgvs_not_delins.posedit.edit.ref = ref_bases + # hgvs_not_delins.posedit.edit.alt = ref_bases[ + # :1] + hgvs_not_delins.posedit.edit.alt[ + # 1:] + ref_bases[ + # 1:] + # else: + # if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # start = hgvs_not_delins.posedit.pos.start.base - 1 + # end = hgvs_not_delins.posedit.pos.end.base + # ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, + # end) + # hgvs_not_delins.posedit.edit.ref = ref_bases + # hgvs_not_delins.posedit.edit.alt = ref_bases[ + # :1] + hgvs_not_delins.posedit.edit.alt[ + # 1:] + ref_bases[ + # 1:] + # elif re.search('ins', str( + # hgvs_genomic_5pr.posedit.edit)) and re.search('del', + # str( + # hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # elif re.search('ins', str( + # hgvs_genomic_5pr.posedit.edit)) and not re.search( + # 'del', + # str( + # hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # start = hgvs_not_delins.posedit.pos.start.base - 1 + # end = hgvs_not_delins.posedit.pos.end.base + # ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, + # end) + # hgvs_not_delins.posedit.edit.ref = ref_bases + # hgvs_not_delins.posedit.edit.alt = ref_bases[ + # :1] + hgvs_not_delins.posedit.edit.alt[ + # 1:] + ref_bases[ + # 1:] + # else: + # pass + # else: + # pass + # tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + # saved_hgvs_coding.ac) + # # Create normalized version of tx_hgvs_not_delins + # rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # # Check for +1 base and adjust + # if re.search(r'\+', + # str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + # r'\+', + # str( + # rn_tx_hgvs_not_delins.posedit.pos.start)): + # # Remove offsetting to span the gap + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # try: + # rn_tx_hgvs_not_delins.posedit.edit.alt = '' + # except: + # fn.exceptPass() + # + # elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # # move tx end base to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = validator.myvm_t_to_g(test_tx_var, alt_chr, + # variant.no_norm_evm, variant.hn) + # rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str( + # saved_hgvs_coding.ac)) + # elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # # move tx start base to previous available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = validator.myvm_t_to_g(test_tx_var, alt_chr, + # variant.no_norm_evm, variant.hn) + # rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str( + # saved_hgvs_coding.ac)) + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # # else: + # # pass + # + # # Check for -ve base and adjust + # elif re.search(r'\-', + # str( + # rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + # r'\-', + # str( + # rn_tx_hgvs_not_delins.posedit.pos.start)): + # # Remove offsetting to span the gap + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # try: + # rn_tx_hgvs_not_delins.posedit.edit.alt = '' + # except: + # fn.exceptPass() + # elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # # move tx end base back to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # # Delete the ref + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # # Add the additional base to the ALT + # start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + # end = rn_tx_hgvs_not_delins.posedit.pos.end.base + # ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + # rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = validator.myvm_t_to_g(test_tx_var, alt_chr, + # variant.no_norm_evm, variant.hn) + # rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str( + # saved_hgvs_coding.ac)) + # elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # # move tx start base to previous available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = validator.myvm_t_to_g(test_tx_var, alt_chr, + # variant.no_norm_evm, variant.hn) + # rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, + # str( + # saved_hgvs_coding.ac)) + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # fn.exceptPass() + # + # # Logic + # if len(hgvs_not_delins.posedit.edit.ref) < len( + # rn_tx_hgvs_not_delins.posedit.edit.ref): + # gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + # hgvs_not_delins.posedit.edit.ref) + # disparity_deletion_in = ['chromosome', gap_length] + # elif len(hgvs_not_delins.posedit.edit.ref) > len( + # rn_tx_hgvs_not_delins.posedit.edit.ref): + # gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + # rn_tx_hgvs_not_delins.posedit.edit.ref) + # disparity_deletion_in = ['transcript', gap_length] + # else: + # re_capture_tx_variant = [] + # for possibility in hgvs_genomic_possibilities: + # if possibility == '': + # continue + # hgvs_t_possibility = validator.vm.g_to_t(possibility, hgvs_coding.ac) + # if hgvs_t_possibility.posedit.edit.type == 'ins': + # try: + # hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) + # except: + # continue + # if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: + # continue + # ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, + # hgvs_t_possibility.posedit.pos.start.base - 1, + # hgvs_t_possibility.posedit.pos.start.base + 1) + # try: + # hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) + # except: + # continue + # hgvs_t_possibility.posedit.edit.ref = ins_ref + # hgvs_t_possibility.posedit.edit.alt = ins_ref[ + # 0] + hgvs_t_possibility.posedit.edit.alt + \ + # ins_ref[1] + # if possibility.posedit.edit.type == 'ins': + # ins_ref = validator.sf.fetch_seq(possibility.ac, + # possibility.posedit.pos.start.base - 1, + # possibility.posedit.pos.end.base) + # possibility.posedit.edit.ref = ins_ref + # possibility.posedit.edit.alt = ins_ref[ + # 0] + possibility.posedit.edit.alt + \ + # ins_ref[1] + # if len(hgvs_t_possibility.posedit.edit.ref) < len( + # possibility.posedit.edit.ref): + # gap_length = len(possibility.posedit.edit.ref) - len( + # hgvs_t_possibility.posedit.edit.ref) + # re_capture_tx_variant = ['transcript', gap_length, + # hgvs_t_possibility] + # hgvs_not_delins = possibility + # hgvs_genomic_5pr = possibility + # break + # + # if re_capture_tx_variant != []: + # try: + # tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) + # except: + # tx_hgvs_not_delins = re_capture_tx_variant[2] + # disparity_deletion_in = re_capture_tx_variant[0:-1] + # else: + # pass + # + # # Final sanity checks + # try: + # validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + # except Exception as e: + # if str( + # e) == 'start or end or both are beyond the bounds of transcript record': + # continue + # try: + # variant.hn.normalize(tx_hgvs_not_delins) + # except hgvs.exceptions.HGVSUnsupportedOperationError as e: + # error = str(e) + # + # if re.match('Normalization of intronic variants is not supported', + # error) or re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # if re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # continue + # elif re.match('Normalization of intronic variants is not supported', + # error): + # # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + # disparity_deletion_in = ['transcript', 'Requires Analysis'] + # + # # Recreate hgvs_genomic + # if disparity_deletion_in[0] == 'transcript': + # hgvs_genomic = hgvs_not_delins + # + # # Find oddly placed gaps where the tx variant is encompassed in the gap + # if disparity_deletion_in[0] == 'false' and ( + # possibility_counter == 3 or possibility_counter == 4): + # rg = variant.reverse_normalizer.normalize(hgvs_not_delins) + # rtx = validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) + # fg = variant.hn.normalize(hgvs_not_delins) + # ftx = validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) + # if ( + # rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + # ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + # exons = validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, validator.alt_aln_method) + # exonic = False + # for ex_test in exons: + # if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + # 7]) and ftx.posedit.pos.end.base in range(ex_test[6], + # ex_test[7]): + # exonic = True + # if exonic is True: + # hgvs_not_delins = fg + # hgvs_genomic = fg + # hgvs_genomic_5pr = fg + # try: + # tx_hgvs_not_delins = validator.vm.c_to_n(ftx) + # except Exception: + # tx_hgvs_not_delins = ftx + # disparity_deletion_in = ['transcript', 'Requires Analysis'] + # + # # Pre-processing of tx_hgvs_not_delins + # try: + # if tx_hgvs_not_delins.posedit.edit.alt is None: + # tx_hgvs_not_delins.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + # tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + # tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + # tx_hgvs_not_delins = validator.hp.parse_hgvs_variant( + # tx_hgvs_not_delins_delins_from_dup) + # + # if disparity_deletion_in[0] == 'transcript': + # # amend_RefSeqGene = 'true' + # # ANY VARIANT WHOLLY WITHIN THE GAP + # if (re.search(r'\+', + # str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + # r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( + # re.search(r'\+', + # str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + # r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): + # gapped_transcripts = gapped_transcripts + ' ' + str( + # tx_hgvs_not_delins.ac) + # + # # Copy the current variant + # tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + # try: + # if tx_gap_fill_variant.posedit.edit.alt is None: + # tx_gap_fill_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + # tx_gap_fill_variant.posedit.pos.start) + '_' + str( + # tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + # tx_gap_fill_variant = validator.hp.parse_hgvs_variant( + # tx_gap_fill_variant_delins_from_dup) + # + # # Identify which half of the NOT-intron the start position of the variant is in + # if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + # tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + # tx_gap_fill_variant.posedit.pos.start.offset = int( + # '0') # int('+1') + # tx_gap_fill_variant.posedit.pos.end.offset = int( + # '0') # int('-1') + # tx_gap_fill_variant.posedit.edit.alt = '' + # tx_gap_fill_variant.posedit.edit.ref = '' + # elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + # tx_gap_fill_variant.posedit.pos.start.offset = int( + # '0') # int('+1') + # tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + # tx_gap_fill_variant.posedit.pos.end.offset = int( + # '0') # int('-1') + # tx_gap_fill_variant.posedit.edit.alt = '' + # tx_gap_fill_variant.posedit.edit.ref = '' + # + # try: + # tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) + # except: + # fn.exceptPass() + # genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, + # reverse_normalized_hgvs_genomic.ac) + # genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + # + # try: + # c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) + # except Exception: + # c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + # genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, + # hgvs_genomic_5pr.ac) + # + # # Ensure an ALT exists + # try: + # if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + # genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + # genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + # genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + # genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( + # genomic_gap_fill_variant_delins_from_dup) + # genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + # genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + # genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + # genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( + # genomic_gap_fill_variant_alt_delins_from_dup) + # + # # Correct insertion alts + # if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + # append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + # genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + # genomic_gap_fill_variant_alt.posedit.pos.end.base) + # genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + # 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + # append_ref[1] + # + # # Split the reference and replacing alt sequence into a dictionary + # reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + # if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + # alternate_bases = list( + # genomic_gap_fill_variant_alt.posedit.edit.alt) + # else: + # # Deletions with no ins + # pre_alternate_bases = list( + # genomic_gap_fill_variant_alt.posedit.edit.ref) + # alternate_bases = [] + # for base in pre_alternate_bases: + # alternate_bases.append('X') + # + # # Create the dictionaries + # ref_start = genomic_gap_fill_variant.posedit.pos.start.base + # alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + # ref_base_dict = {} + # for base in reference_bases: + # ref_base_dict[ref_start] = str(base) + # ref_start = ref_start + 1 + # + # alt_base_dict = {} + # + # # NEED TO SEARCH FOR RANGE = and replace with interval_range + # # Need to search for int and replace with integer + # + # # Note, all variants will be forced into the format delete insert + # # Deleted bases in the ALT will be substituted for X + # for integer in range( + # genomic_gap_fill_variant_alt.posedit.pos.start.base, + # genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + # if integer == alt_start: + # alt_base_dict[integer] = str(''.join(alternate_bases)) + # else: + # alt_base_dict[integer] = 'X' + # + # # Generate the alt sequence + # alternate_sequence_bases = [] + # for integer in range( + # genomic_gap_fill_variant.posedit.pos.start.base, + # genomic_gap_fill_variant.posedit.pos.end.base + 1, + # 1): + # if integer in list(alt_base_dict.keys()): + # alternate_sequence_bases.append(alt_base_dict[integer]) + # else: + # alternate_sequence_bases.append(ref_base_dict[integer]) + # alternate_sequence = ''.join(alternate_sequence_bases) + # alternate_sequence = alternate_sequence.replace('X', '') + # + # # Add the new alt to the gap fill variant and generate transcript variant + # genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + # hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, + # tx_gap_fill_variant.ac) + # + # # Set warning + # gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + # disparity_deletion_in[1] = [gap_size] + # auto_info = auto_info + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + # gps = for_location_c.posedit.pos.start.base - 1 + # gpe = for_location_c.posedit.pos.start.base + # else: + # gps = for_location_c.posedit.pos.start.base + # gpe = for_location_c.posedit.pos.start.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str( + # gpe) + '\n' + # auto_info = auto_info + '%s' % (gap_position) + # + # else: + # if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # # In this instance, we have identified a transcript gap but the n. version of + # # the transcript variant but do not have a position which actually hits the gap, + # # so the variant likely spans the gap, and is not picked up by an offset. + # try: + # c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + # g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + # ng2 = variant.hn.normalize(g2) + # g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + # len(g3.posedit.edit.ref) - 1) + # try: + # c2 = validator.vm.g_to_t(g3, c1.ac) + # if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + # pass + # else: + # tx_hgvs_not_delins = c2 + # try: + # tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) + # except hgvs.exceptions.HGVSError: + # fn.exceptPass() + # except hgvs.exceptions.HGVSInvalidVariantError: + # fn.exceptPass() + # + # if re.search(r'\+', str( + # tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + # r'\+', + # str( + # tx_hgvs_not_delins.posedit.pos.end)): + # auto_info = auto_info + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # try: + # c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c2 = tx_hgvs_not_delins + # c1 = copy.deepcopy(c2) + # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # c1.posedit.pos.start.offset = 0 + # c1.posedit.pos.end = c2.posedit.pos.start + # c1.posedit.edit.ref = '' + # c1.posedit.edit.alt = '' + # if orientation != -1: + # g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # else: + # g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = validator.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.start.base + # gpe = for_location_c.posedit.pos.start.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str( + # gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\+', str( + # tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', + # str( + # tx_hgvs_not_delins.posedit.pos.start)): + # auto_info = auto_info + 'Genome position ' + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + # tx_hgvs_not_delins.ac) + # gapped_transcripts = gapped_transcripts + ' ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # try: + # c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # c2 = copy.deepcopy(c1) + # c2.posedit.pos.start = c1.posedit.pos.end + # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + # c2.posedit.pos.end.offset = 0 + # c2.posedit.edit.ref = '' + # c2.posedit.edit.alt = '' + # if orientation != -1: + # g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # else: + # g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = validator.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.end.base + # gpe = for_location_c.posedit.pos.end.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str( + # gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\-', str( + # tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + # r'\-', + # str( + # tx_hgvs_not_delins.posedit.pos.end)): + # auto_info = auto_info + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # try: + # c2 = validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c2 = tx_hgvs_not_delins + # c1 = copy.deepcopy(c2) + # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # c1.posedit.pos.start.offset = 0 + # c1.posedit.pos.end = c2.posedit.pos.start + # c1.posedit.edit.ref = '' + # c1.posedit.edit.alt = '' + # if orientation != -1: + # g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # else: + # g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = validator.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.start.base - 1 + # gpe = for_location_c.posedit.pos.start.base + # gap_position = ' between positions c.' + str(gps) + '_' + str( + # gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\-', str( + # tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', + # str( + # tx_hgvs_not_delins.posedit.pos.start)): + # auto_info = auto_info + 'Genome position ' + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + # tx_hgvs_not_delins.ac) + # gapped_transcripts = gapped_transcripts + ' ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # try: + # c1 = validator.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # c2 = copy.deepcopy(c1) + # c2.posedit.pos.start = c1.posedit.pos.end + # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + # c2.posedit.pos.end.offset = 0 + # c2.posedit.edit.ref = '' + # c2.posedit.edit.alt = '' + # if orientation != -1: + # g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # else: + # g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = validator.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.end.base - 1 + # gpe = for_location_c.posedit.pos.end.base + # gap_position = ' between positions c.' + str(gps) + '_' + str( + # gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # else: + # auto_info = auto_info + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + '\n' + # hgvs_refreshed_variant = tx_hgvs_not_delins + # + # # GAP IN THE CHROMOSOME + # elif disparity_deletion_in[0] == 'chromosome': + # # amend_RefSeqGene = 'true' + # if possibility_counter == 3: + # hgvs_refreshed_variant = stash_tx_right + # elif possibility_counter == 4: + # hgvs_refreshed_variant = stash_tx_left + # else: + # hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # # Warn + # auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + # hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + # disparity_deletion_in[ + # 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + # hgvs_genomic.ac) + '\n' + # else: + # # Keep the same by re-setting rel_var + # hgvs_refreshed_variant = hgvs_coding + # # amend_RefSeqGene = 'false' + # + # # Edit the output + # if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', + # str( + # hgvs_refreshed_variant.type)): + # hgvs_refreshed_variant = variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) + # else: + # pass + # + # try: + # variant.hn.normalize(hgvs_refreshed_variant) + # except Exception as e: + # error = str(e) + # # Ensure the final variant is not intronic nor does it cross exon boundaries + # if re.match('Normalization of intronic variants is not supported', + # error) or re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # hgvs_refreshed_variant = saved_hgvs_coding + # else: + # continue + # + # # Quick check to make sure the coding variant has not changed + # try: + # to_test = variant.hn.normalize(hgvs_refreshed_variant) + # except: + # to_test = hgvs_refreshed_variant + # if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # # Try the next available genomic option + # if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + # hgvs_coding = to_test + # else: + # continue + # + # # Update hgvs_genomic + # hgvs_alt_genomic = validator.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, + # variant.no_norm_evm, variant.hn) + # if hgvs_alt_genomic.posedit.edit.type == 'identity': + # re_c = validator.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) + # if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): + # shuffle_left_g = copy.copy(hgvs_alt_genomic) + # shuffle_left_g.posedit.edit.ref = '' + # shuffle_left_g.posedit.edit.alt = '' + # shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + # shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + # shuffle_left_g = variant.reverse_normalizer.normalize(shuffle_left_g) + # re_c = validator.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + # if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): + # hgvs_alt_genomic = shuffle_left_g + # + # # If it is intronic, these vairables will not have been set + # else: + # # amend_RefSeqGene = 'false' + # no_normalized_c = 'false' + # + # # Break if gap has been detected + # if disparity_deletion_in[0] != 'false': + # break + # + # # Normailse hgvs_genomic + # try: + # hgvs_alt_genomic = variant.hn.normalize(hgvs_alt_genomic) + # except hgvs.exceptions.HGVSError as e: + # # Strange error caused by gap in genomic + # error = str(e) + # if re.search('base start position must be <= end position', error) and \ + # disparity_deletion_in[0] == 'chromosome': + # if hgvs_alt_genomic.posedit.edit.type == 'delins': + # start = hgvs_alt_genomic.posedit.pos.start.base + # end = hgvs_alt_genomic.posedit.pos.end.base + # lhb = validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + # rhb = validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + # hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + # hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb + # hgvs_alt_genomic.posedit.pos.start.base = end + # hgvs_alt_genomic.posedit.pos.end.base = start + # hgvs_alt_genomic = variant.hn.normalize(hgvs_alt_genomic) + # if hgvs_alt_genomic.posedit.edit.type == 'del': + # start = hgvs_alt_genomic.posedit.pos.start.base + # end = hgvs_alt_genomic.posedit.pos.end.base + # lhb = validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + # rhb = validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + # hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + # hgvs_alt_genomic.posedit.edit.alt = lhb + rhb + # hgvs_alt_genomic.posedit.pos.start.base = end + # hgvs_alt_genomic.posedit.pos.end.base = start + # hgvs_alt_genomic = variant.hn.normalize(hgvs_alt_genomic) + + # Refresh the :g. variant + multi_g.append(hgvs_alt_genomic) + else: + multi_g.append(hgvs_alt_genomic) + corrective_action_taken = 'false' + + # In this instance, the gap code has generally found an incomplete-alignment rather than a + # truly gapped alignment. + except KeyError: + warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ + 'genomic reference sequence %s' % (hgvs_coding.ac, + alt_chr) + continue + except hgvs.exceptions.HGVSError as e: + logger.error(str(e)) + logger.debug(str(e)) + continue + + if multi_g != []: + + multi_gen_vars = multi_g # '|'.join(multi_g) + else: + multi_gen_vars = [] + + return multi_gen_vars, hgvs_coding diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 9aac63db..036d16aa 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -753,1396 +753,1399 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Multiple genomic variants # multi_gen_vars = [] if tx_variant != '': - hgvs_coding = self.hp.parse_hgvs_variant(str(tx_variant)) - # Gap gene black list - try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) - except Exception: - fn.exceptPass() - else: - # If the gene symbol is not in the list, the value False will be returned - gap_compensation = vvChromosomes.gap_black_list(gene_symbol) - # Look for variants spanning introns - try: - hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.search('boundary', str(error)) or re.search('spanning', str(error)): - gap_compensation = False - else: - pass - except hgvs.exceptions.HGVSError: - fn.exceptPass() + multi_gen_vars, hgvs_coding = mappers.final_tx_to_multiple_genomic(variant, self, tx_variant, rec_var) - # Warn gap code status - logger.warning("gap_compensation_3 = " + str(gap_compensation)) - multi_g = [] - multi_list = [] - mapping_options = self.hdp.get_tx_mapping_options(hgvs_coding.ac) - for alt_chr in mapping_options: - if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', - alt_chr[1])) and \ - alt_chr[2] == alt_aln_method: - multi_list.append(alt_chr[1]) - - for alt_chr in multi_list: - try: - # Re set ori - ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, - alt_aln_method=alt_aln_method) - orientation = int(ori[0]['alt_strand']) - hgvs_alt_genomic = self.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, hn) - # Set hgvs_genomic accordingly - hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) - - # genomic_possibilities - # 1. take the simple 3 pr normalized hgvs_genomic - # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - hgvs_genomic_possibilities = [] - - # Loop out gap code under these circumstances! - if gap_compensation is True: - logger.warning('g_to_t gap code 3 active') - rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_alt_genomic) - hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: - try: - chromosome_normalized_hgvs_coding = reverse_normalizer.normalize( - hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: - chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - chromosome_normalized_hgvs_coding = hgvs_coding - - most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, - alt_chr, - no_norm_evm, hn) - hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - - # First to the right - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = self.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, - no_norm_evm, hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_right.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - stash_tx_right = test_stash_tx_right - if hasattr(test_stash_tx_right.posedit.edit, - 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - alt = test_stash_tx_right.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_right.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_right).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_right = test_stash_tx_right - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - except ValueError: - fn.exceptPass() - - # Then to the left - hgvs_stash = copy.deepcopy(hgvs_coding) - try: - hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() - try: - stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, - reverse_normalizer, self.sf) - stash_pos = int(stash_dict['pos']) - stash_ref = stash_dict['ref'] - stash_alt = stash_dict['alt'] - # Generate an end position - stash_end = str(stash_pos + len(stash_ref) - 1) - # make a not real deletion insertion - stash_hgvs_not_delins = self.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - try: - stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() - # Store a tx copy for later use - test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, - no_norm_evm, hn) - # Stash the outputs if required - # test variants = NC_000006.11:g.90403795G= (causes double identity) - # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left - if hasattr(test_stash_tx_left.posedit.edit, - 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - alt = test_stash_tx_left.posedit.edit.alt - else: - alt = '' - if hasattr(stash_genomic.posedit.edit, - 'alt') and stash_genomic.posedit.edit.alt is not None: - g_alt = stash_genomic.posedit.edit.alt - else: - g_alt = '' - if (len(alt) - ( - test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - len(g_alt) - ( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - hgvs_genomic_possibilities.append(stash_genomic) - else: - hgvs_genomic_possibilities.append('') - elif test_stash_tx_left.posedit.edit.type == 'identity': - reform_ident = str(test_stash_tx_left).split(':')[0] - reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) - try: - hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: - error = str(e) - if re.search('spanning the exon-intron boundary', error): - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - else: - try: - hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: - hgvs_genomic_possibilities.append('') - else: - stash_tx_left = test_stash_tx_left - hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - except ValueError: - fn.exceptPass() - - # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = self.vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = self.vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = self.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = self.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = self.vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = self.vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - try: - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append( - genomic_from_most_3pr_hgvs_transcript_variant) - if len( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - hgvs_genomic_possibilities.append( - genomic_from_most_5pr_hgvs_transcript_variant) - - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass - fn.exceptPass() - - # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' - - # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] - # Loop through to see if a gap can be located - possibility_counter = 0 - for possibility in hgvs_genomic_possibilities: - possibility_counter = possibility_counter + 1 - # Loop out stash possibilities which will not spot gaps so are empty - if possibility == '': - continue - - # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - hgvs_genomic_variant = possibility - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) - - # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - try: - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic_variant) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error): - if hgvs_genomic.posedit.edit.type == 'delins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - if hgvs_genomic.posedit.edit.type == 'del': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + rhb - hgvs_genomic.posedit.pos.start.base = end - hgvs_genomic.posedit.pos.end.base = start - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - if re.search('insertion length must be 1', error): - if hgvs_genomic.posedit.edit.type == 'ins': - start = hgvs_genomic.posedit.pos.start.base - end = hgvs_genomic.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start, end) - hgvs_genomic.posedit.edit.ref = lhb + rhb - hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - hgvs_genomic) - - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - - # Make VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - reverse_normalizer, self.sf) - chr = vcf_dict['chr'] - pos = vcf_dict['pos'] - ref = vcf_dict['ref'] - alt = vcf_dict['alt'] - - # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - - # Generate an end position - end = str(int(pos) + len(ref) - 1) - pos = str(pos) - - # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( - hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] - - # Save a copy of current hgvs_coding - try: - saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, - hgvs_coding.ac) - except Exception as e: - if str( - e) == 'start or end or both are beyond the bounds of transcript record': - saved_hgvs_coding = hgvs_coding - continue - - # Detect intronic variation using normalization - intronic_variant = 'false' - # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - try: - intron_test = hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - intronic_variant = 'hard_fail' - else: - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'hard_fail': - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', - str( - hgvs_seek_var.posedit.pos)) or re.search( - r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( - hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if intronic_variant != 'true': - # Flag RefSeqGene for ammendment - # amend_RefSeqGene = 'false' - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - if stored_hgvs_not_delins != '': - # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', - hgvs_genomic_5pr.posedit.edit.type) or re.search( - 'ins', hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and re.search('del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - else: - if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and re.search('del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - else: - pass - else: - pass - tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) - # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # Check for +1 base and adjust - if re.search(r'\+', - str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass - - # Check for -ve base and adjust - elif re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - no_norm_evm, hn) - rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - fn.exceptPass() - - # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for possibility in hgvs_genomic_possibilities: - if possibility == '': - continue - hgvs_t_possibility = self.vm.g_to_t(possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) - except: - continue - if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: - continue - ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) - except: - continue - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if possibility.posedit.edit.type == 'ins': - ins_ref = self.sf.fetch_seq(possibility.ac, - possibility.posedit.pos.start.base - 1, - possibility.posedit.pos.end.base) - possibility.posedit.edit.ref = ins_ref - possibility.posedit.edit.alt = ins_ref[ - 0] + possibility.posedit.edit.alt + \ - ins_ref[1] - if len(hgvs_t_possibility.posedit.edit.ref) < len( - possibility.posedit.edit.ref): - gap_length = len(possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] - hgvs_not_delins = possibility - hgvs_genomic_5pr = possibility - break - - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass - - # Final sanity checks - try: - self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - except Exception as e: - if str( - e) == 'start or end or both are beyond the bounds of transcript record': - continue - try: - hn.normalize(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - continue - elif re.match('Normalization of intronic variants is not supported', - error): - # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': - hgvs_genomic = hgvs_not_delins - - # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): - rg = reverse_normalizer.normalize(hgvs_not_delins) - rtx = self.vm.g_to_t(rg, tx_hgvs_not_delins.ac) - fg = hn.normalize(hgvs_not_delins) - ftx = self.vm.g_to_t(fg, tx_hgvs_not_delins.ac) - if ( - rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = self.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) - exonic = False - for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], - ex_test[7]): - exonic = True - if exonic is True: - hgvs_not_delins = fg - hgvs_genomic = fg - hgvs_genomic_5pr = fg - try: - tx_hgvs_not_delins = self.vm.c_to_n(ftx) - except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] - - # Pre-processing of tx_hgvs_not_delins - try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = self.hp.parse_hgvs_variant( - tx_hgvs_not_delins_delins_from_dup) - - if disparity_deletion_in[0] == 'transcript': - # amend_RefSeqGene = 'true' - # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = self.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, - hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range( - genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range( - genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, - 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = self.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', - str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins - - # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': - # amend_RefSeqGene = 'true' - if possibility_counter == 3: - hgvs_refreshed_variant = stash_tx_right - elif possibility_counter == 4: - hgvs_refreshed_variant = stash_tx_left - else: - hgvs_refreshed_variant = chromosome_normalized_hgvs_coding - # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' - else: - # Keep the same by re-setting rel_var - hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' - - # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', - str( - hgvs_refreshed_variant.type)): - hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) - else: - pass - - try: - hn.normalize(hgvs_refreshed_variant) - except Exception as e: - error = str(e) - # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - hgvs_refreshed_variant = saved_hgvs_coding - else: - continue - - # Quick check to make sure the coding variant has not changed - try: - to_test = hn.normalize(hgvs_refreshed_variant) - except: - to_test = hgvs_refreshed_variant - if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # Try the next available genomic option - if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - hgvs_coding = to_test - else: - continue - - # Update hgvs_genomic - hgvs_alt_genomic = self.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, - no_norm_evm, hn) - if hgvs_alt_genomic.posedit.edit.type == 'identity': - re_c = self.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - shuffle_left_g = copy.copy(hgvs_alt_genomic) - shuffle_left_g.posedit.edit.ref = '' - shuffle_left_g.posedit.edit.alt = '' - shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) - re_c = self.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - hgvs_alt_genomic = shuffle_left_g - - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' - - # Break if gap has been detected - if disparity_deletion_in[0] != 'false': - break - - # Normailse hgvs_genomic - try: - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - except hgvs.exceptions.HGVSError as e: - # Strange error caused by gap in genomic - error = str(e) - if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': - if hgvs_alt_genomic.posedit.edit.type == 'delins': - start = hgvs_alt_genomic.posedit.pos.start.base - end = hgvs_alt_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb - hgvs_alt_genomic.posedit.pos.start.base = end - hgvs_alt_genomic.posedit.pos.end.base = start - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - if hgvs_alt_genomic.posedit.edit.type == 'del': - start = hgvs_alt_genomic.posedit.pos.start.base - end = hgvs_alt_genomic.posedit.pos.end.base - lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - hgvs_alt_genomic.posedit.edit.alt = lhb + rhb - hgvs_alt_genomic.posedit.pos.start.base = end - hgvs_alt_genomic.posedit.pos.end.base = start - hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - - # Refresh the :g. variant - multi_g.append(hgvs_alt_genomic) - else: - multi_g.append(hgvs_alt_genomic) - corrective_action_taken = 'false' - - # In this instance, the gap code has generally found an incomplete-alignment rather than a - # truly gapped alignment. - except KeyError: - warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ - 'genomic reference sequence %s' % (hgvs_coding.ac, - alt_chr) - continue - except hgvs.exceptions.HGVSError as e: - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - error = str(te) - logger.error(str(exc_type) + " " + str(exc_value)) - logger.debug(error) - continue - - if multi_g != []: - - multi_gen_vars = multi_g # '|'.join(multi_g) - else: - multi_gen_vars = [] + # hgvs_coding = self.hp.parse_hgvs_variant(str(tx_variant)) + # # Gap gene black list + # try: + # gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + # except Exception: + # fn.exceptPass() + # else: + # # If the gene symbol is not in the list, the value False will be returned + # gap_compensation = vvChromosomes.gap_black_list(gene_symbol) + # + # # Look for variants spanning introns + # try: + # hgvs_coding = hn.normalize(hgvs_coding) + # except hgvs.exceptions.HGVSUnsupportedOperationError as e: + # error = str(e) + # if re.search('boundary', str(error)) or re.search('spanning', str(error)): + # gap_compensation = False + # else: + # pass + # except hgvs.exceptions.HGVSError: + # fn.exceptPass() + # + # # Warn gap code status + # logger.warning("gap_compensation_3 = " + str(gap_compensation)) + # multi_g = [] + # multi_list = [] + # mapping_options = self.hdp.get_tx_mapping_options(hgvs_coding.ac) + # for alt_chr in mapping_options: + # if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', + # alt_chr[1])) and \ + # alt_chr[2] == alt_aln_method: + # multi_list.append(alt_chr[1]) + # + # for alt_chr in multi_list: + # try: + # # Re set ori + # ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, + # alt_aln_method=alt_aln_method) + # orientation = int(ori[0]['alt_strand']) + # hgvs_alt_genomic = self.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, hn) + # # Set hgvs_genomic accordingly + # hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) + # + # # genomic_possibilities + # # 1. take the simple 3 pr normalized hgvs_genomic + # # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome + # hgvs_genomic_possibilities = [] + # + # # Loop out gap code under these circumstances! + # if gap_compensation is True: + # logger.warning('g_to_t gap code 3 active') + # rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_alt_genomic) + # hgvs_genomic_possibilities.append(rn_hgvs_genomic) + # if orientation != -1: + # try: + # chromosome_normalized_hgvs_coding = reverse_normalizer.normalize( + # hgvs_coding) + # except hgvs.exceptions.HGVSUnsupportedOperationError as e: + # chromosome_normalized_hgvs_coding = hgvs_coding + # else: + # try: + # chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) + # except hgvs.exceptions.HGVSUnsupportedOperationError as e: + # error = str(e) + # chromosome_normalized_hgvs_coding = hgvs_coding + # + # most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, + # alt_chr, + # no_norm_evm, hn) + # hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) + # + # # First to the right + # hgvs_stash = copy.deepcopy(hgvs_coding) + # try: + # hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + # except: + # fn.exceptPass() + # try: + # stash_ac = hgvs_stash.ac + # stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) + # stash_pos = int(stash_dict['pos']) + # stash_ref = stash_dict['ref'] + # stash_alt = stash_dict['alt'] + # # Generate an end position + # stash_end = str(stash_pos + len(stash_ref) - 1) + # # make a not real deletion insertion + # stash_hgvs_not_delins = self.hp.parse_hgvs_variant( + # stash_ac + ':' + hgvs_stash.type + '.' + str( + # stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + # try: + # stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) + # except: + # fn.exceptPass() + # # Store a tx copy for later use + # test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, + # no_norm_evm, hn) + # # Stash the outputs if required + # # test variants = NC_000006.11:g.90403795G= (causes double identity) + # # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) + # # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # # pass + # if len(test_stash_tx_right.posedit.edit.ref) == (( + # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + # stash_tx_right = test_stash_tx_right + # if hasattr(test_stash_tx_right.posedit.edit, + # 'alt') and test_stash_tx_right.posedit.edit.alt is not None: + # alt = test_stash_tx_right.posedit.edit.alt + # else: + # alt = '' + # if hasattr(stash_genomic.posedit.edit, + # 'alt') and stash_genomic.posedit.edit.alt is not None: + # g_alt = stash_genomic.posedit.edit.alt + # else: + # g_alt = '' + # if (len(alt) - ( + # test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( + # len(g_alt) - ( + # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + # hgvs_genomic_possibilities.append(stash_genomic) + # else: + # hgvs_genomic_possibilities.append('') + # elif test_stash_tx_right.posedit.edit.type == 'identity': + # reform_ident = str(test_stash_tx_right).split(':')[0] + # reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( + # test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) + # hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) + # try: + # hn.normalize(hgvs_reform_ident) + # except hgvs.exceptions.HGVSError as e: + # error = str(e) + # if re.search('spanning the exon-intron boundary', error): + # stash_tx_right = test_stash_tx_right + # hgvs_genomic_possibilities.append('') + # else: + # stash_tx_right = test_stash_tx_right + # hgvs_genomic_possibilities.append(stash_genomic) + # else: + # try: + # hn.normalize(test_stash_tx_right) + # except hgvs.exceptions.HGVSUnsupportedOperationError: + # hgvs_genomic_possibilities.append('') + # else: + # stash_tx_right = test_stash_tx_right + # hgvs_genomic_possibilities.append(stash_genomic) + # except hgvs.exceptions.HGVSError as e: + # fn.exceptPass() + # except ValueError: + # fn.exceptPass() + # + # # Then to the left + # hgvs_stash = copy.deepcopy(hgvs_coding) + # try: + # hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) + # except: + # fn.exceptPass() + # try: + # stash_ac = hgvs_stash.ac + # stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, + # reverse_normalizer, self.sf) + # stash_pos = int(stash_dict['pos']) + # stash_ref = stash_dict['ref'] + # stash_alt = stash_dict['alt'] + # # Generate an end position + # stash_end = str(stash_pos + len(stash_ref) - 1) + # # make a not real deletion insertion + # stash_hgvs_not_delins = self.hp.parse_hgvs_variant( + # stash_ac + ':' + hgvs_stash.type + '.' + str( + # stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) + # try: + # stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) + # except: + # fn.exceptPass() + # # Store a tx copy for later use + # test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) + # stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, + # no_norm_evm, hn) + # # Stash the outputs if required + # # test variants = NC_000006.11:g.90403795G= (causes double identity) + # # NC_000002.11:g.73675227_73675228insCTC + # # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': + # # pass + # if len(test_stash_tx_left.posedit.edit.ref) == (( + # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + # stash_tx_left = test_stash_tx_left + # if hasattr(test_stash_tx_left.posedit.edit, + # 'alt') and test_stash_tx_left.posedit.edit.alt is not None: + # alt = test_stash_tx_left.posedit.edit.alt + # else: + # alt = '' + # if hasattr(stash_genomic.posedit.edit, + # 'alt') and stash_genomic.posedit.edit.alt is not None: + # g_alt = stash_genomic.posedit.edit.alt + # else: + # g_alt = '' + # if (len(alt) - ( + # test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( + # len(g_alt) - ( + # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + # hgvs_genomic_possibilities.append(stash_genomic) + # else: + # hgvs_genomic_possibilities.append('') + # elif test_stash_tx_left.posedit.edit.type == 'identity': + # reform_ident = str(test_stash_tx_left).split(':')[0] + # reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( + # test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) + # hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) + # try: + # hn.normalize(hgvs_reform_ident) + # except hgvs.exceptions.HGVSError as e: + # error = str(e) + # if re.search('spanning the exon-intron boundary', error): + # stash_tx_left = test_stash_tx_left + # hgvs_genomic_possibilities.append('') + # else: + # stash_tx_left = test_stash_tx_left + # hgvs_genomic_possibilities.append(stash_genomic) + # else: + # try: + # hn.normalize(test_stash_tx_left) + # except hgvs.exceptions.HGVSUnsupportedOperationError: + # hgvs_genomic_possibilities.append('') + # else: + # stash_tx_left = test_stash_tx_left + # hgvs_genomic_possibilities.append(stash_genomic) + # except hgvs.exceptions.HGVSError as e: + # fn.exceptPass() + # except ValueError: + # fn.exceptPass() + # + # # direct mapping from reverse_normalized transcript insertions in the delins format + # try: + # if hgvs_coding.posedit.edit.type == 'ins': + # most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + # most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) + # try: + # n_3pr = self.vm.c_to_n(most_3pr_hgvs_transcript_variant) + # n_5pr = self.vm.c_to_n(most_5pr_hgvs_transcript_variant) + # except: + # n_3pr = most_3pr_hgvs_transcript_variant + # n_5pr = most_5pr_hgvs_transcript_variant + # # Make into a delins by adding the ref bases to the variant ref and alt + # pr3_ref = self.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + # n_3pr.posedit.pos.end.base) + # pr5_ref = self.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + # n_5pr.posedit.pos.end.base) + # most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + # most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + # most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ + # 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + # pr3_ref[1] + # most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ + # 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + # pr5_ref[1] + # # Map to the genome + # genomic_from_most_3pr_hgvs_transcript_variant = self.vm.t_to_g( + # most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + # genomic_from_most_5pr_hgvs_transcript_variant = self.vm.t_to_g( + # most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + # + # # Normalize - If the variant spans a gap it should then form a static genomic variant + # try: + # genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + # genomic_from_most_3pr_hgvs_transcript_variant) + # except hgvs.exceptions.HGVSInvalidVariantError as e: + # error = str(e) + # if error == 'base start position must be <= end position': + # start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + # end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start + # genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( + # genomic_from_most_3pr_hgvs_transcript_variant) + # try: + # genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + # genomic_from_most_5pr_hgvs_transcript_variant) + # except hgvs.exceptions.HGVSInvalidVariantError as e: + # error = str(e) + # if error == 'base start position must be <= end position': + # start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + # end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start + # genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( + # genomic_from_most_5pr_hgvs_transcript_variant) + # + # try: + # if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + # genomic_from_most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + # genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + # + # try: + # if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + # most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + # most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + # most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + # most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + # most_3pr_hgvs_transcript_variant_delins_from_dup) + # + # try: + # if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + # genomic_from_most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + # genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + # + # try: + # if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + # most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + # most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + # most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + # most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( + # most_5pr_hgvs_transcript_variant_delins_from_dup) + # + # if len( + # genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + # most_3pr_hgvs_transcript_variant.posedit.edit.alt): + # hgvs_genomic_possibilities.append( + # genomic_from_most_3pr_hgvs_transcript_variant) + # if len( + # genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + # most_5pr_hgvs_transcript_variant.posedit.edit.alt): + # hgvs_genomic_possibilities.append( + # genomic_from_most_5pr_hgvs_transcript_variant) + # + # except hgvs.exceptions.HGVSUnsupportedOperationError as e: + # error = str(e) + # if re.match('Normalization of intronic variants is not supported', + # error) or re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # pass + # fn.exceptPass() + # + # # Set variables for problem specific warnings + # gapped_alignment_warning = '' + # corrective_action_taken = '' + # gapped_transcripts = '' + # auto_info = '' + # + # # Mark as not disparity detected + # disparity_deletion_in = ['false', 'false'] + # # Loop through to see if a gap can be located + # possibility_counter = 0 + # for possibility in hgvs_genomic_possibilities: + # possibility_counter = possibility_counter + 1 + # # Loop out stash possibilities which will not spot gaps so are empty + # if possibility == '': + # continue + # + # # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps + # hgvs_genomic_variant = possibility + # stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + # + # # Reverse normalize hgvs_genomic_variant: NOTE will replace ref + # try: + # reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + # hgvs_genomic_variant) + # except hgvs.exceptions.HGVSError as e: + # # Strange error caused by gap in genomic + # error = str(e) + # if re.search('base start position must be <= end position', error): + # if hgvs_genomic.posedit.edit.type == 'delins': + # start = hgvs_genomic.posedit.pos.start.base + # end = hgvs_genomic.posedit.pos.end.base + # lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + # rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + # hgvs_genomic.posedit.edit.ref = lhb + rhb + # hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + # hgvs_genomic.posedit.pos.start.base = end + # hgvs_genomic.posedit.pos.end.base = start + # reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + # hgvs_genomic) + # if hgvs_genomic.posedit.edit.type == 'del': + # start = hgvs_genomic.posedit.pos.start.base + # end = hgvs_genomic.posedit.pos.end.base + # lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) + # rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + # hgvs_genomic.posedit.edit.ref = lhb + rhb + # hgvs_genomic.posedit.edit.alt = lhb + rhb + # hgvs_genomic.posedit.pos.start.base = end + # hgvs_genomic.posedit.pos.end.base = start + # reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + # hgvs_genomic) + # if re.search('insertion length must be 1', error): + # if hgvs_genomic.posedit.edit.type == 'ins': + # start = hgvs_genomic.posedit.pos.start.base + # end = hgvs_genomic.posedit.pos.end.base + # ref_bases = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) + # lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) + # rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start, end) + # hgvs_genomic.posedit.edit.ref = lhb + rhb + # hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb + # reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( + # hgvs_genomic) + # + # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + # # Store a copy for later use + # stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) + # + # # Make VCF + # vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, + # reverse_normalizer, self.sf) + # chr = vcf_dict['chr'] + # pos = vcf_dict['pos'] + # ref = vcf_dict['ref'] + # alt = vcf_dict['alt'] + # + # # Look for exonic gaps within transcript or chromosome + # no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. + # + # # Generate an end position + # end = str(int(pos) + len(ref) - 1) + # pos = str(pos) + # + # # Store a not real deletion insertion to test for gapping + # stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( + # hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + # v = [chr, pos, ref, alt] + # + # # Save a copy of current hgvs_coding + # try: + # saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, + # hgvs_coding.ac) + # except Exception as e: + # if str( + # e) == 'start or end or both are beyond the bounds of transcript record': + # saved_hgvs_coding = hgvs_coding + # continue + # + # # Detect intronic variation using normalization + # intronic_variant = 'false' + # # Look for normalized variant options that do not match hgvs_coding + # if orientation == -1: + # # position genomic at its most 5 prime position + # try: + # query_genomic = reverse_normalizer.normalize(hgvs_genomic) + # except: + # query_genomic = hgvs_genomic + # # Map to the transcript ant test for movement + # try: + # hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) + # except hgvs.exceptions.HGVSError as e: + # hgvs_seek_var = saved_hgvs_coding + # else: + # seek_var = fn.valstr(hgvs_seek_var) + # seek_ac = str(hgvs_seek_var.ac) + # if ( + # hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + # hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + # hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + # hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + # pass + # else: + # hgvs_seek_var = saved_hgvs_coding + # + # elif orientation != -1: + # # position genomic at its most 3 prime position + # try: + # query_genomic = hn.normalize(hgvs_genomic) + # except: + # query_genomic = hgvs_genomic + # # Map to the transcript and test for movement + # try: + # hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) + # except hgvs.exceptions.HGVSError as e: + # hgvs_seek_var = saved_hgvs_coding + # seek_var = fn.valstr(hgvs_seek_var) + # seek_ac = str(hgvs_seek_var.ac) + # if ( + # hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + # hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + # hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + # hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + # pass + # else: + # hgvs_seek_var = saved_hgvs_coding + # + # try: + # intron_test = hn.normalize(hgvs_seek_var) + # except hgvs.exceptions.HGVSUnsupportedOperationError as e: + # error = str(e) + # if re.match('Normalization of intronic variants is not supported', + # error) or re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # if re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # intronic_variant = 'hard_fail' + # else: + # # Double check to see whether the variant is actually intronic? + # for exon in ori: + # genomic_start = int(exon['alt_start_i']) + # genomic_end = int(exon['alt_end_i']) + # if ( + # hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + # hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + # intronic_variant = 'false' + # break + # else: + # intronic_variant = 'true' + # + # if intronic_variant != 'hard_fail': + # if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', + # str( + # hgvs_seek_var.posedit.pos)) or re.search( + # r'\*\d+\+', str( + # hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( + # hgvs_seek_var.posedit.pos)): + # # Double check to see whether the variant is actually intronic? + # for exon in ori: + # genomic_start = int(exon['alt_start_i']) + # genomic_end = int(exon['alt_end_i']) + # if ( + # hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + # hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + # intronic_variant = 'false' + # break + # else: + # intronic_variant = 'true' + # + # if intronic_variant != 'true': + # # Flag RefSeqGene for ammendment + # # amend_RefSeqGene = 'false' + # # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + # if stored_hgvs_not_delins != '': + # # Refresh hgvs_not_delins from stored_hgvs_not_delins + # hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) + # # This test will only occur in dup of single base, insertion or substitution + # if not re.search('_', str(hgvs_not_delins.posedit.pos)): + # if re.search('dup', + # hgvs_genomic_5pr.posedit.edit.type) or re.search( + # 'ins', hgvs_genomic_5pr.posedit.edit.type): + # # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + # plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) + # plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 + # plussed_hgvs_not_delins.posedit.edit.ref = '' + # transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, + # str( + # saved_hgvs_coding.ac)) + # if (( + # transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + # hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): + # if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # start = hgvs_not_delins.posedit.pos.start.base - 1 + # end = hgvs_not_delins.posedit.pos.end.base + # ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + # end) + # hgvs_not_delins.posedit.edit.ref = ref_bases + # hgvs_not_delins.posedit.edit.alt = ref_bases[ + # :1] + hgvs_not_delins.posedit.edit.alt[ + # 1:] + ref_bases[ + # 1:] + # elif re.search('ins', str( + # hgvs_genomic_5pr.posedit.edit)) and re.search('del', + # str( + # hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # elif re.search('ins', str( + # hgvs_genomic_5pr.posedit.edit)) and not re.search( + # 'del', + # str( + # hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # start = hgvs_not_delins.posedit.pos.start.base - 1 + # end = hgvs_not_delins.posedit.pos.end.base + # ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + # end) + # hgvs_not_delins.posedit.edit.ref = ref_bases + # hgvs_not_delins.posedit.edit.alt = ref_bases[ + # :1] + hgvs_not_delins.posedit.edit.alt[ + # 1:] + ref_bases[ + # 1:] + # else: + # if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # start = hgvs_not_delins.posedit.pos.start.base - 1 + # end = hgvs_not_delins.posedit.pos.end.base + # ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + # end) + # hgvs_not_delins.posedit.edit.ref = ref_bases + # hgvs_not_delins.posedit.edit.alt = ref_bases[ + # :1] + hgvs_not_delins.posedit.edit.alt[ + # 1:] + ref_bases[ + # 1:] + # elif re.search('ins', str( + # hgvs_genomic_5pr.posedit.edit)) and re.search('del', + # str( + # hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # elif re.search('ins', str( + # hgvs_genomic_5pr.posedit.edit)) and not re.search( + # 'del', + # str( + # hgvs_genomic_5pr.posedit.edit)): + # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 + # start = hgvs_not_delins.posedit.pos.start.base - 1 + # end = hgvs_not_delins.posedit.pos.end.base + # ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, + # end) + # hgvs_not_delins.posedit.edit.ref = ref_bases + # hgvs_not_delins.posedit.edit.alt = ref_bases[ + # :1] + hgvs_not_delins.posedit.edit.alt[ + # 1:] + ref_bases[ + # 1:] + # else: + # pass + # else: + # pass + # tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + # saved_hgvs_coding.ac) + # # Create normalized version of tx_hgvs_not_delins + # rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + # # Check for +1 base and adjust + # if re.search(r'\+', + # str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + # r'\+', + # str( + # rn_tx_hgvs_not_delins.posedit.pos.start)): + # # Remove offsetting to span the gap + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # try: + # rn_tx_hgvs_not_delins.posedit.edit.alt = '' + # except: + # fn.exceptPass() + # + # elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # # move tx end base to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + # no_norm_evm, hn) + # rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + # str( + # saved_hgvs_coding.ac)) + # elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # # move tx start base to previous available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + # no_norm_evm, hn) + # rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + # str( + # saved_hgvs_coding.ac)) + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # # else: + # # pass + # + # # Check for -ve base and adjust + # elif re.search(r'\-', + # str( + # rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( + # r'\-', + # str( + # rn_tx_hgvs_not_delins.posedit.pos.start)): + # # Remove offsetting to span the gap + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # try: + # rn_tx_hgvs_not_delins.posedit.edit.alt = '' + # except: + # fn.exceptPass() + # elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + # # move tx end base back to next available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 + # # Delete the ref + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # # Add the additional base to the ALT + # start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 + # end = rn_tx_hgvs_not_delins.posedit.pos.end.base + # ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) + # rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + # no_norm_evm, hn) + # rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + # str( + # saved_hgvs_coding.ac)) + # elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + # # move tx start base to previous available non-offset base + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 + # rn_tx_hgvs_not_delins.posedit.edit.ref = '' + # if re.match('NM_', str(rn_tx_hgvs_not_delins)): + # test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) + # else: + # test_tx_var = rn_tx_hgvs_not_delins + # # re-make genomic and tx + # hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, + # no_norm_evm, hn) + # rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, + # str( + # saved_hgvs_coding.ac)) + # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 + # else: + # fn.exceptPass() + # + # # Logic + # if len(hgvs_not_delins.posedit.edit.ref) < len( + # rn_tx_hgvs_not_delins.posedit.edit.ref): + # gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( + # hgvs_not_delins.posedit.edit.ref) + # disparity_deletion_in = ['chromosome', gap_length] + # elif len(hgvs_not_delins.posedit.edit.ref) > len( + # rn_tx_hgvs_not_delins.posedit.edit.ref): + # gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( + # rn_tx_hgvs_not_delins.posedit.edit.ref) + # disparity_deletion_in = ['transcript', gap_length] + # else: + # re_capture_tx_variant = [] + # for possibility in hgvs_genomic_possibilities: + # if possibility == '': + # continue + # hgvs_t_possibility = self.vm.g_to_t(possibility, hgvs_coding.ac) + # if hgvs_t_possibility.posedit.edit.type == 'ins': + # try: + # hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) + # except: + # continue + # if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: + # continue + # ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, + # hgvs_t_possibility.posedit.pos.start.base - 1, + # hgvs_t_possibility.posedit.pos.start.base + 1) + # try: + # hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) + # except: + # continue + # hgvs_t_possibility.posedit.edit.ref = ins_ref + # hgvs_t_possibility.posedit.edit.alt = ins_ref[ + # 0] + hgvs_t_possibility.posedit.edit.alt + \ + # ins_ref[1] + # if possibility.posedit.edit.type == 'ins': + # ins_ref = self.sf.fetch_seq(possibility.ac, + # possibility.posedit.pos.start.base - 1, + # possibility.posedit.pos.end.base) + # possibility.posedit.edit.ref = ins_ref + # possibility.posedit.edit.alt = ins_ref[ + # 0] + possibility.posedit.edit.alt + \ + # ins_ref[1] + # if len(hgvs_t_possibility.posedit.edit.ref) < len( + # possibility.posedit.edit.ref): + # gap_length = len(possibility.posedit.edit.ref) - len( + # hgvs_t_possibility.posedit.edit.ref) + # re_capture_tx_variant = ['transcript', gap_length, + # hgvs_t_possibility] + # hgvs_not_delins = possibility + # hgvs_genomic_5pr = possibility + # break + # + # if re_capture_tx_variant != []: + # try: + # tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) + # except: + # tx_hgvs_not_delins = re_capture_tx_variant[2] + # disparity_deletion_in = re_capture_tx_variant[0:-1] + # else: + # pass + # + # # Final sanity checks + # try: + # self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + # except Exception as e: + # if str( + # e) == 'start or end or both are beyond the bounds of transcript record': + # continue + # try: + # hn.normalize(tx_hgvs_not_delins) + # except hgvs.exceptions.HGVSUnsupportedOperationError as e: + # error = str(e) + # + # if re.match('Normalization of intronic variants is not supported', + # error) or re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # if re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # continue + # elif re.match('Normalization of intronic variants is not supported', + # error): + # # We know that this cannot be because of an intronic variant, so must be aligned to tx gap + # disparity_deletion_in = ['transcript', 'Requires Analysis'] + # + # # Recreate hgvs_genomic + # if disparity_deletion_in[0] == 'transcript': + # hgvs_genomic = hgvs_not_delins + # + # # Find oddly placed gaps where the tx variant is encompassed in the gap + # if disparity_deletion_in[0] == 'false' and ( + # possibility_counter == 3 or possibility_counter == 4): + # rg = reverse_normalizer.normalize(hgvs_not_delins) + # rtx = self.vm.g_to_t(rg, tx_hgvs_not_delins.ac) + # fg = hn.normalize(hgvs_not_delins) + # ftx = self.vm.g_to_t(fg, tx_hgvs_not_delins.ac) + # if ( + # rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + # ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): + # exons = self.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) + # exonic = False + # for ex_test in exons: + # if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ + # 7]) and ftx.posedit.pos.end.base in range(ex_test[6], + # ex_test[7]): + # exonic = True + # if exonic is True: + # hgvs_not_delins = fg + # hgvs_genomic = fg + # hgvs_genomic_5pr = fg + # try: + # tx_hgvs_not_delins = self.vm.c_to_n(ftx) + # except Exception: + # tx_hgvs_not_delins = ftx + # disparity_deletion_in = ['transcript', 'Requires Analysis'] + # + # # Pre-processing of tx_hgvs_not_delins + # try: + # if tx_hgvs_not_delins.posedit.edit.alt is None: + # tx_hgvs_not_delins.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( + # tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + # tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref + # tx_hgvs_not_delins = self.hp.parse_hgvs_variant( + # tx_hgvs_not_delins_delins_from_dup) + # + # if disparity_deletion_in[0] == 'transcript': + # # amend_RefSeqGene = 'true' + # # ANY VARIANT WHOLLY WITHIN THE GAP + # if (re.search(r'\+', + # str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( + # r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( + # re.search(r'\+', + # str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( + # r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): + # gapped_transcripts = gapped_transcripts + ' ' + str( + # tx_hgvs_not_delins.ac) + # + # # Copy the current variant + # tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) + # try: + # if tx_gap_fill_variant.posedit.edit.alt is None: + # tx_gap_fill_variant.posedit.edit.alt = '' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + # tx_gap_fill_variant.posedit.pos.start) + '_' + str( + # tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + # tx_gap_fill_variant = self.hp.parse_hgvs_variant( + # tx_gap_fill_variant_delins_from_dup) + # + # # Identify which half of the NOT-intron the start position of the variant is in + # if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + # tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 + # tx_gap_fill_variant.posedit.pos.start.offset = int( + # '0') # int('+1') + # tx_gap_fill_variant.posedit.pos.end.offset = int( + # '0') # int('-1') + # tx_gap_fill_variant.posedit.edit.alt = '' + # tx_gap_fill_variant.posedit.edit.ref = '' + # elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + # tx_gap_fill_variant.posedit.pos.start.offset = int( + # '0') # int('+1') + # tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 + # tx_gap_fill_variant.posedit.pos.end.offset = int( + # '0') # int('-1') + # tx_gap_fill_variant.posedit.edit.alt = '' + # tx_gap_fill_variant.posedit.edit.ref = '' + # + # try: + # tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) + # except: + # fn.exceptPass() + # genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, + # reverse_normalized_hgvs_genomic.ac) + # genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref + # + # try: + # c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) + # except Exception: + # c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) + # genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, + # hgvs_genomic_5pr.ac) + # + # # Ensure an ALT exists + # try: + # if genomic_gap_fill_variant_alt.posedit.edit.alt is None: + # genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' + # except Exception as e: + # if str(e) == "'Dup' object has no attribute 'alt'": + # genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + # genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( + # genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + # genomic_gap_fill_variant = self.hp.parse_hgvs_variant( + # genomic_gap_fill_variant_delins_from_dup) + # genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + # genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( + # genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + # genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( + # genomic_gap_fill_variant_alt_delins_from_dup) + # + # # Correct insertion alts + # if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': + # append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, + # genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, + # genomic_gap_fill_variant_alt.posedit.pos.end.base) + # genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ + # 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ + # append_ref[1] + # + # # Split the reference and replacing alt sequence into a dictionary + # reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) + # if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: + # alternate_bases = list( + # genomic_gap_fill_variant_alt.posedit.edit.alt) + # else: + # # Deletions with no ins + # pre_alternate_bases = list( + # genomic_gap_fill_variant_alt.posedit.edit.ref) + # alternate_bases = [] + # for base in pre_alternate_bases: + # alternate_bases.append('X') + # + # # Create the dictionaries + # ref_start = genomic_gap_fill_variant.posedit.pos.start.base + # alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base + # ref_base_dict = {} + # for base in reference_bases: + # ref_base_dict[ref_start] = str(base) + # ref_start = ref_start + 1 + # + # alt_base_dict = {} + # + # # NEED TO SEARCH FOR RANGE = and replace with interval_range + # # Need to search for int and replace with integer + # + # # Note, all variants will be forced into the format delete insert + # # Deleted bases in the ALT will be substituted for X + # for integer in range( + # genomic_gap_fill_variant_alt.posedit.pos.start.base, + # genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): + # if integer == alt_start: + # alt_base_dict[integer] = str(''.join(alternate_bases)) + # else: + # alt_base_dict[integer] = 'X' + # + # # Generate the alt sequence + # alternate_sequence_bases = [] + # for integer in range( + # genomic_gap_fill_variant.posedit.pos.start.base, + # genomic_gap_fill_variant.posedit.pos.end.base + 1, + # 1): + # if integer in list(alt_base_dict.keys()): + # alternate_sequence_bases.append(alt_base_dict[integer]) + # else: + # alternate_sequence_bases.append(ref_base_dict[integer]) + # alternate_sequence = ''.join(alternate_sequence_bases) + # alternate_sequence = alternate_sequence.replace('X', '') + # + # # Add the new alt to the gap fill variant and generate transcript variant + # genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence + # hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, + # tx_gap_fill_variant.ac) + # + # # Set warning + # gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) + # disparity_deletion_in[1] = [gap_size] + # auto_info = auto_info + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + # if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + # gps = for_location_c.posedit.pos.start.base - 1 + # gpe = for_location_c.posedit.pos.start.base + # else: + # gps = for_location_c.posedit.pos.start.base + # gpe = for_location_c.posedit.pos.start.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str( + # gpe) + '\n' + # auto_info = auto_info + '%s' % (gap_position) + # + # else: + # if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: + # # In this instance, we have identified a transcript gap but the n. version of + # # the transcript variant but do not have a position which actually hits the gap, + # # so the variant likely spans the gap, and is not picked up by an offset. + # try: + # c1 = self.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + # g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + # ng2 = hn.normalize(g2) + # g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( + # len(g3.posedit.edit.ref) - 1) + # try: + # c2 = self.vm.g_to_t(g3, c1.ac) + # if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: + # pass + # else: + # tx_hgvs_not_delins = c2 + # try: + # tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) + # except hgvs.exceptions.HGVSError: + # fn.exceptPass() + # except hgvs.exceptions.HGVSInvalidVariantError: + # fn.exceptPass() + # + # if re.search(r'\+', str( + # tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + # r'\+', + # str( + # tx_hgvs_not_delins.posedit.pos.end)): + # auto_info = auto_info + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # try: + # c2 = self.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c2 = tx_hgvs_not_delins + # c1 = copy.deepcopy(c2) + # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # c1.posedit.pos.start.offset = 0 + # c1.posedit.pos.end = c2.posedit.pos.start + # c1.posedit.edit.ref = '' + # c1.posedit.edit.alt = '' + # if orientation != -1: + # g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # else: + # g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = self.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.start.base + # gpe = for_location_c.posedit.pos.start.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str( + # gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\+', str( + # tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', + # str( + # tx_hgvs_not_delins.posedit.pos.start)): + # auto_info = auto_info + 'Genome position ' + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + # tx_hgvs_not_delins.ac) + # gapped_transcripts = gapped_transcripts + ' ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # try: + # c1 = self.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # c2 = copy.deepcopy(c1) + # c2.posedit.pos.start = c1.posedit.pos.end + # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + # c2.posedit.pos.end.offset = 0 + # c2.posedit.edit.ref = '' + # c2.posedit.edit.alt = '' + # if orientation != -1: + # g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # else: + # g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = self.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.end.base + # gpe = for_location_c.posedit.pos.end.base + 1 + # gap_position = ' between positions c.' + str(gps) + '_' + str( + # gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\-', str( + # tx_hgvs_not_delins.posedit.pos.start)) and not re.search( + # r'\-', + # str( + # tx_hgvs_not_delins.posedit.pos.end)): + # auto_info = auto_info + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # try: + # c2 = self.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c2 = tx_hgvs_not_delins + # c1 = copy.deepcopy(c2) + # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 + # c1.posedit.pos.start.offset = 0 + # c1.posedit.pos.end = c2.posedit.pos.start + # c1.posedit.edit.ref = '' + # c1.posedit.edit.alt = '' + # if orientation != -1: + # g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # else: + # g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = self.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.start.base - 1 + # gpe = for_location_c.posedit.pos.start.base + # gap_position = ' between positions c.' + str(gps) + '_' + str( + # gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # elif re.search(r'\-', str( + # tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', + # str( + # tx_hgvs_not_delins.posedit.pos.start)): + # auto_info = auto_info + 'Genome position ' + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( + # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( + # tx_hgvs_not_delins.ac) + # gapped_transcripts = gapped_transcripts + ' ' + str( + # tx_hgvs_not_delins.ac) + # non_valid_caution = 'true' + # try: + # c1 = self.vm.n_to_c(tx_hgvs_not_delins) + # except: + # c1 = tx_hgvs_not_delins + # c2 = copy.deepcopy(c1) + # c2.posedit.pos.start = c1.posedit.pos.end + # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 + # c2.posedit.pos.end.offset = 0 + # c2.posedit.edit.ref = '' + # c2.posedit.edit.alt = '' + # if orientation != -1: + # g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) + # g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) + # g2.posedit.edit.alt = g2.posedit.edit.ref + # else: + # g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) + # g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) + # g1.posedit.edit.alt = g1.posedit.edit.ref + # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] + # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] + # g3 = copy.deepcopy(g1) + # g3.posedit.pos.end.base = g2.posedit.pos.end.base + # g3.posedit.edit.ref = reference + # g3.posedit.edit.alt = alternate + # c3 = self.vm.g_to_t(g3, c1.ac) + # hgvs_refreshed_variant = c3 + # # Alignment position + # for_location_c = copy.deepcopy(hgvs_refreshed_variant) + # if re.match('NM_', str(for_location_c)): + # for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) + # gps = for_location_c.posedit.pos.end.base - 1 + # gpe = for_location_c.posedit.pos.end.base + # gap_position = ' between positions c.' + str(gps) + '_' + str( + # gpe) + '\n' + # # Warn update + # auto_info = auto_info + '%s' % (gap_position) + # else: + # auto_info = auto_info + str( + # stored_hgvs_not_delins.ac) + ':g.' + str( + # stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( + # disparity_deletion_in[ + # 1]) + ' genomic base(s) that fail to align to transcript ' + str( + # tx_hgvs_not_delins.ac) + '\n' + # hgvs_refreshed_variant = tx_hgvs_not_delins + # + # # GAP IN THE CHROMOSOME + # elif disparity_deletion_in[0] == 'chromosome': + # # amend_RefSeqGene = 'true' + # if possibility_counter == 3: + # hgvs_refreshed_variant = stash_tx_right + # elif possibility_counter == 4: + # hgvs_refreshed_variant = stash_tx_left + # else: + # hgvs_refreshed_variant = chromosome_normalized_hgvs_coding + # # Warn + # auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + # hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( + # disparity_deletion_in[ + # 1]) + ' transcript base(s) that fail to align to chromosome ' + str( + # hgvs_genomic.ac) + '\n' + # else: + # # Keep the same by re-setting rel_var + # hgvs_refreshed_variant = hgvs_coding + # # amend_RefSeqGene = 'false' + # + # # Edit the output + # if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', + # str( + # hgvs_refreshed_variant.type)): + # hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) + # else: + # pass + # + # try: + # hn.normalize(hgvs_refreshed_variant) + # except Exception as e: + # error = str(e) + # # Ensure the final variant is not intronic nor does it cross exon boundaries + # if re.match('Normalization of intronic variants is not supported', + # error) or re.match( + # 'Unsupported normalization of variants spanning the exon-intron boundary', + # error): + # hgvs_refreshed_variant = saved_hgvs_coding + # else: + # continue + # + # # Quick check to make sure the coding variant has not changed + # try: + # to_test = hn.normalize(hgvs_refreshed_variant) + # except: + # to_test = hgvs_refreshed_variant + # if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): + # # Try the next available genomic option + # if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': + # hgvs_coding = to_test + # else: + # continue + # + # # Update hgvs_genomic + # hgvs_alt_genomic = self.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, + # no_norm_evm, hn) + # if hgvs_alt_genomic.posedit.edit.type == 'identity': + # re_c = self.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) + # if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): + # shuffle_left_g = copy.copy(hgvs_alt_genomic) + # shuffle_left_g.posedit.edit.ref = '' + # shuffle_left_g.posedit.edit.alt = '' + # shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 + # shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 + # shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) + # re_c = self.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) + # if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): + # hgvs_alt_genomic = shuffle_left_g + # + # # If it is intronic, these vairables will not have been set + # else: + # # amend_RefSeqGene = 'false' + # no_normalized_c = 'false' + # + # # Break if gap has been detected + # if disparity_deletion_in[0] != 'false': + # break + # + # # Normailse hgvs_genomic + # try: + # hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + # except hgvs.exceptions.HGVSError as e: + # # Strange error caused by gap in genomic + # error = str(e) + # if re.search('base start position must be <= end position', error) and \ + # disparity_deletion_in[0] == 'chromosome': + # if hgvs_alt_genomic.posedit.edit.type == 'delins': + # start = hgvs_alt_genomic.posedit.pos.start.base + # end = hgvs_alt_genomic.posedit.pos.end.base + # lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + # rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + # hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + # hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb + # hgvs_alt_genomic.posedit.pos.start.base = end + # hgvs_alt_genomic.posedit.pos.end.base = start + # hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + # if hgvs_alt_genomic.posedit.edit.type == 'del': + # start = hgvs_alt_genomic.posedit.pos.start.base + # end = hgvs_alt_genomic.posedit.pos.end.base + # lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) + # rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) + # hgvs_alt_genomic.posedit.edit.ref = lhb + rhb + # hgvs_alt_genomic.posedit.edit.alt = lhb + rhb + # hgvs_alt_genomic.posedit.pos.start.base = end + # hgvs_alt_genomic.posedit.pos.end.base = start + # hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) + # + # # Refresh the :g. variant + # multi_g.append(hgvs_alt_genomic) + # else: + # multi_g.append(hgvs_alt_genomic) + # corrective_action_taken = 'false' + # + # # In this instance, the gap code has generally found an incomplete-alignment rather than a + # # truly gapped alignment. + # except KeyError: + # warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ + # 'genomic reference sequence %s' % (hgvs_coding.ac, + # alt_chr) + # continue + # except hgvs.exceptions.HGVSError as e: + # exc_type, exc_value, last_traceback = sys.exc_info() + # te = traceback.format_exc() + # error = str(te) + # logger.error(str(exc_type) + " " + str(exc_value)) + # logger.debug(error) + # continue + # + # if multi_g != []: + # + # multi_gen_vars = multi_g # '|'.join(multi_g) + # else: + # multi_gen_vars = [] else: # HGVS genomic in the absence of a transcript variant if genomic_variant != '': From a7b9c55ee07385e23243023a6b4d9ffe78ca1e04 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 26 Apr 2019 16:06:45 +0100 Subject: [PATCH 079/223] Fixed third section so it uses existing methods --- VariantValidator/modules/gapped_mapping.py | 1082 +++------------ VariantValidator/modules/mappers.py | 1314 ------------------ VariantValidator/modules/vvMixinCore.py | 1391 -------------------- 3 files changed, 175 insertions(+), 3612 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 9ec0ad04..af9c7d53 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -608,112 +608,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): fn.exceptPass() # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = self.variant.reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = self.validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = self.validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the variant ref and alt - pr3_ref = self.validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = self.validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[0] + \ - most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[0] + \ - most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = self.validator.vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = self.validator.vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - # Normalize - If the variant spans a gap it should then form a static genomic variant - try: - genomic_from_most_3pr_hgvs_transcript_variant = self.variant.hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = self.variant.hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - try: - genomic_from_most_5pr_hgvs_transcript_variant = self.variant.hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = self.variant.hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - self.hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) - if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - self.hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) - - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - fn.exceptPass() + self.rev_norm_ins(hgvs_coding, hgvs_genomic) logger.info('\nGENOMIC POSSIBILITIES') for possibility in self.hgvs_genomic_possibilities: @@ -1294,13 +1189,13 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, alt_chr, rec_var): - orientation = int(ori[0]['alt_strand']) + self.orientation = int(ori[0]['alt_strand']) hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) logger.warning('g_to_t gap code 3 active') rn_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_alt_genomic) self.hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if orientation != -1: + if self.orientation != -1: try: chromosome_normalized_hgvs_coding = self.variant.reverse_normalizer.normalize( hgvs_coding) @@ -1480,133 +1375,16 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a fn.exceptPass() # direct mapping from reverse_normalized transcript insertions in the delins format - try: - if hgvs_coding.posedit.edit.type == 'ins': - most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - most_3pr_hgvs_transcript_variant = self.variant.reverse_normalizer.normalize(hgvs_coding) - try: - n_3pr = self.validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) - n_5pr = self.validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) - except: - n_3pr = most_3pr_hgvs_transcript_variant - n_5pr = most_5pr_hgvs_transcript_variant - # Make into a delins by adding the ref bases to the self.variant ref and alt - pr3_ref = self.validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - n_3pr.posedit.pos.end.base) - pr5_ref = self.validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - n_5pr.posedit.pos.end.base) - most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] - most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] - # Map to the genome - genomic_from_most_3pr_hgvs_transcript_variant = self.validator.vm.t_to_g( - most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - genomic_from_most_5pr_hgvs_transcript_variant = self.validator.vm.t_to_g( - most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - - # Normalize - If the self.variant spans a gap it should then form a static genomic self.variant - try: - genomic_from_most_3pr_hgvs_transcript_variant = self.variant.hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_3pr_hgvs_transcript_variant = self.variant.hn.normalize( - genomic_from_most_3pr_hgvs_transcript_variant) - try: - genomic_from_most_5pr_hgvs_transcript_variant = self.variant.hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: - error = str(e) - if error == 'base start position must be <= end position': - start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - genomic_from_most_5pr_hgvs_transcript_variant = self.variant.hn.normalize( - genomic_from_most_5pr_hgvs_transcript_variant) - - try: - if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( - most_3pr_hgvs_transcript_variant_delins_from_dup) - - try: - if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - genomic_from_most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - - try: - if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( - most_5pr_hgvs_transcript_variant_delins_from_dup) - - if len( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_3pr_hgvs_transcript_variant.posedit.edit.alt): - self.hgvs_genomic_possibilities.append( - genomic_from_most_3pr_hgvs_transcript_variant) - if len( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - most_5pr_hgvs_transcript_variant.posedit.edit.alt): - self.hgvs_genomic_possibilities.append( - genomic_from_most_5pr_hgvs_transcript_variant) - - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - pass - fn.exceptPass() + self.rev_norm_ins(hgvs_coding, hgvs_genomic) # Set variables for problem specific warnings gapped_alignment_warning = '' corrective_action_taken = '' - gapped_transcripts = '' - auto_info = '' + self.gapped_transcripts = '' + self.auto_info = '' # Mark as not disparity detected - disparity_deletion_in = ['false', 'false'] + self.disparity_deletion_in = ['false', 'false'] # Loop through to see if a gap can be located possibility_counter = 0 for possibility in self.hgvs_genomic_possibilities: @@ -1698,50 +1476,15 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a # Detect intronic variation using normalization intronic_variant = 'false' # Look for normalized variant options that do not match hgvs_coding - if orientation == -1: - # position genomic at its most 5 prime position - try: - query_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript ant test for movement - try: - hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - else: - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding - - elif orientation != -1: - # position genomic at its most 3 prime position - try: - query_genomic = self.variant.hn.normalize(hgvs_genomic) - except: - query_genomic = hgvs_genomic - # Map to the transcript and test for movement - try: - hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: - hgvs_seek_var = saved_hgvs_coding - seek_var = fn.valstr(hgvs_seek_var) - seek_ac = str(hgvs_seek_var.ac) - if ( - hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - pass - else: - hgvs_seek_var = saved_hgvs_coding + hgvs_seek_var = self.get_hgvs_seek_var(hgvs_genomic, hgvs_coding) + if ( + hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( + hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( + hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( + hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': + pass + else: + hgvs_seek_var = saved_hgvs_coding try: intron_test = self.variant.hn.normalize(hgvs_seek_var) @@ -1793,135 +1536,25 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', self.hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - self.hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - plussed_hgvs_not_delins.posedit.edit.ref = '' - transcript_variant = self.variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - self.hgvs_genomic_5pr.posedit.pos.end.base - self.hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(self.hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[1:] - elif re.search('ins', str(self.hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(self.hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - self.hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - self.hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - else: - if re.search('dup', str(self.hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - elif re.search('ins', str( - self.hgvs_genomic_5pr.posedit.edit)) and re.search('del', - str( - self.hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', str( - self.hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', - str( - self.hgvs_genomic_5pr.posedit.edit)): - hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - start = hgvs_not_delins.posedit.pos.start.base - 1 - end = hgvs_not_delins.posedit.pos.end.base - ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, - end) - hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ - 1:] + ref_bases[ - 1:] - else: - pass - else: - pass - tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, + hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, self.hgvs_genomic_5pr, saved_hgvs_coding) + + self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) # Create normalized version of tx_hgvs_not_delins - rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) + rn_tx_hgvs_not_delins = copy.deepcopy(self.tx_hgvs_not_delins) # Check for +1 base and adjust if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( r'\+', str( rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() + rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myvm_t_to_g(test_tx_var, alt_chr, - self.variant.no_norm_evm, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) + elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myvm_t_to_g(test_tx_var, alt_chr, - self.variant.no_norm_evm, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # pass + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) # Check for -ve base and adjust elif re.search(r'\-', @@ -1930,125 +1563,27 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a r'\-', str( rn_tx_hgvs_not_delins.posedit.pos.start)): - # Remove offsetting to span the gap - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - try: - rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - fn.exceptPass() + rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # move tx end base back to next available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # Delete the ref - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # Add the additional base to the ALT - start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - end = rn_tx_hgvs_not_delins.posedit.pos.end.base - ref_bases = self.validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myvm_t_to_g(test_tx_var, alt_chr, - self.variant.no_norm_evm, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # move tx start base to previous available non-offset base - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): - test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - else: - test_tx_var = rn_tx_hgvs_not_delins - # re-make genomic and tx - hgvs_not_delins = self.validator.myvm_t_to_g(test_tx_var, alt_chr, - self.variant.no_norm_evm, self.variant.hn) - rn_tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - str( - saved_hgvs_coding.ac)) - rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - else: - fn.exceptPass() + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) - disparity_deletion_in = ['transcript', gap_length] - else: - re_capture_tx_variant = [] - for possibility in self.hgvs_genomic_possibilities: - if possibility == '': - continue - hgvs_t_possibility = self.validator.vm.g_to_t(possibility, hgvs_coding.ac) - if hgvs_t_possibility.posedit.edit.type == 'ins': - try: - hgvs_t_possibility = self.validator.vm.c_to_n(hgvs_t_possibility) - except: - continue - if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: - continue - ins_ref = self.validator.sf.fetch_seq(hgvs_t_possibility.ac, - hgvs_t_possibility.posedit.pos.start.base - 1, - hgvs_t_possibility.posedit.pos.start.base + 1) - try: - hgvs_t_possibility = self.validator.vm.n_to_c(hgvs_t_possibility) - except: - continue - hgvs_t_possibility.posedit.edit.ref = ins_ref - hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] - if possibility.posedit.edit.type == 'ins': - ins_ref = self.validator.sf.fetch_seq(possibility.ac, - possibility.posedit.pos.start.base - 1, - possibility.posedit.pos.end.base) - possibility.posedit.edit.ref = ins_ref - possibility.posedit.edit.alt = ins_ref[ - 0] + possibility.posedit.edit.alt + \ - ins_ref[1] - if len(hgvs_t_possibility.posedit.edit.ref) < len( - possibility.posedit.edit.ref): - gap_length = len(possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) - re_capture_tx_variant = ['transcript', gap_length, - hgvs_t_possibility] - hgvs_not_delins = possibility - self.hgvs_genomic_5pr = possibility - break - if re_capture_tx_variant != []: - try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) - except: - tx_hgvs_not_delins = re_capture_tx_variant[2] - disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass + hgvs_not_delins = self.logic_check(hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding, do_continue=True, offset_check=True) # Final sanity checks try: - self.validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) + self.validator.vm.g_to_t(hgvs_not_delins, self.tx_hgvs_not_delins.ac) except Exception as e: if str( e) == 'start or end or both are beyond the bounds of transcript record': continue try: - self.variant.hn.normalize(tx_hgvs_not_delins) + self.variant.hn.normalize(self.tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) @@ -2063,19 +1598,19 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a elif re.match('Normalization of intronic variants is not supported', error): # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - disparity_deletion_in = ['transcript', 'Requires Analysis'] + self.disparity_deletion_in = ['transcript', 'Requires Analysis'] # Recreate hgvs_genomic - if disparity_deletion_in[0] == 'transcript': + if self.disparity_deletion_in[0] == 'transcript': hgvs_genomic = hgvs_not_delins # Find oddly placed gaps where the tx variant is encompassed in the gap - if disparity_deletion_in[0] == 'false' and ( + if self.disparity_deletion_in[0] == 'false' and ( possibility_counter == 3 or possibility_counter == 4): rg = self.variant.reverse_normalizer.normalize(hgvs_not_delins) - rtx = self.validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) + rtx = self.validator.vm.g_to_t(rg, self.tx_hgvs_not_delins.ac) fg = self.variant.hn.normalize(hgvs_not_delins) - ftx = self.validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) + ftx = self.validator.vm.g_to_t(fg, self.tx_hgvs_not_delins.ac) if ( rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): @@ -2091,415 +1626,30 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a hgvs_genomic = fg self.hgvs_genomic_5pr = fg try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(ftx) + self.tx_hgvs_not_delins = self.validator.vm.c_to_n(ftx) except Exception: - tx_hgvs_not_delins = ftx - disparity_deletion_in = ['transcript', 'Requires Analysis'] + self.tx_hgvs_not_delins = ftx + self.disparity_deletion_in = ['transcript', 'Requires Analysis'] # Pre-processing of tx_hgvs_not_delins try: - if tx_hgvs_not_delins.posedit.edit.alt is None: - tx_hgvs_not_delins.posedit.edit.alt = '' + if self.tx_hgvs_not_delins.posedit.edit.alt is None: + self.tx_hgvs_not_delins.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( + tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + self.tx_hgvs_not_delins.type + '.' + str( + self.tx_hgvs_not_delins.posedit.pos.start) + '_' + str( + self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( tx_hgvs_not_delins_delins_from_dup) - if disparity_deletion_in[0] == 'transcript': + if self.disparity_deletion_in[0] == 'transcript': # amend_RefSeqGene = 'true' # ANY VARIANT WHOLLY WITHIN THE GAP - if (re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', - str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - - # Copy the current variant - tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - try: - if tx_gap_fill_variant.posedit.edit.alt is None: - tx_gap_fill_variant.posedit.edit.alt = '' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - tx_gap_fill_variant_delins_from_dup) - - # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - tx_gap_fill_variant.posedit.pos.start.offset = int( - '0') # int('+1') - tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - tx_gap_fill_variant.posedit.pos.end.offset = int( - '0') # int('-1') - tx_gap_fill_variant.posedit.edit.alt = '' - tx_gap_fill_variant.posedit.edit.ref = '' - - try: - tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() - genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) - genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - - try: - c_tx_hgvs_not_delins = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except Exception: - c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, - self.hgvs_genomic_5pr.ac) - - # Ensure an ALT exists - try: - if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( - genomic_gap_fill_variant_alt_delins_from_dup) - - # Correct insertion alts - if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] - - # Split the reference and replacing alt sequence into a dictionary - reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.alt) - else: - # Deletions with no ins - pre_alternate_bases = list( - genomic_gap_fill_variant_alt.posedit.edit.ref) - alternate_bases = [] - for base in pre_alternate_bases: - alternate_bases.append('X') - - # Create the dictionaries - ref_start = genomic_gap_fill_variant.posedit.pos.start.base - alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - ref_base_dict = {} - for base in reference_bases: - ref_base_dict[ref_start] = str(base) - ref_start = ref_start + 1 - - alt_base_dict = {} - - # NEED TO SEARCH FOR RANGE = and replace with interval_range - # Need to search for int and replace with integer - - # Note, all variants will be forced into the format delete insert - # Deleted bases in the ALT will be substituted for X - for integer in range( - genomic_gap_fill_variant_alt.posedit.pos.start.base, - genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - if integer == alt_start: - alt_base_dict[integer] = str(''.join(alternate_bases)) - else: - alt_base_dict[integer] = 'X' - - # Generate the alt sequence - alternate_sequence_bases = [] - for integer in range( - genomic_gap_fill_variant.posedit.pos.start.base, - genomic_gap_fill_variant.posedit.pos.end.base + 1, - 1): - if integer in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[integer]) - else: - alternate_sequence_bases.append(ref_base_dict[integer]) - alternate_sequence = ''.join(alternate_sequence_bases) - alternate_sequence = alternate_sequence.replace('X', '') - - # Add the new alt to the gap fill variant and generate transcript variant - genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) - - # Set warning - gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - disparity_deletion_in[1] = [gap_size] - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - else: - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - auto_info = auto_info + '%s' % (gap_position) - - else: - if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # In this instance, we have identified a transcript gap but the n. version of - # the transcript variant but do not have a position which actually hits the gap, - # so the variant likely spans the gap, and is not picked up by an offset. - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - g1 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - ng2 = self.variant.hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) - try: - c2 = self.validator.vm.g_to_t(g3, c1.ac) - if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - pass - else: - tx_hgvs_not_delins = c2 - try: - tx_hgvs_not_delins = self.validator.vm.c_to_n(tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() - - if re.search(r'\+', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', - str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\+', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - gpe = for_location_c.posedit.pos.end.base + 1 - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str( - tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', - str( - tx_hgvs_not_delins.posedit.pos.end)): - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c2 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c2 = tx_hgvs_not_delins - c1 = copy.deepcopy(c2) - c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - c1.posedit.pos.start.offset = 0 - c1.posedit.pos.end = c2.posedit.pos.start - c1.posedit.edit.ref = '' - c1.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - 1 - gpe = for_location_c.posedit.pos.start.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - elif re.search(r'\-', str( - tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', - str( - tx_hgvs_not_delins.posedit.pos.start)): - auto_info = auto_info + 'Genome position ' + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - tx_hgvs_not_delins.ac) - gapped_transcripts = gapped_transcripts + ' ' + str( - tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - try: - c1 = self.validator.vm.n_to_c(tx_hgvs_not_delins) - except: - c1 = tx_hgvs_not_delins - c2 = copy.deepcopy(c1) - c2.posedit.pos.start = c1.posedit.pos.end - c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - c2.posedit.pos.end.offset = 0 - c2.posedit.edit.ref = '' - c2.posedit.edit.alt = '' - if orientation != -1: - g1 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2.posedit.edit.alt = g2.posedit.edit.ref - else: - g1 = self.validator.vm.t_to_g(c2, hgvs_genomic.ac) - g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) - g1.posedit.edit.alt = g1.posedit.edit.ref - reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - g3 = copy.deepcopy(g1) - g3.posedit.pos.end.base = g2.posedit.pos.end.base - g3.posedit.edit.ref = reference - g3.posedit.edit.alt = alternate - c3 = self.validator.vm.g_to_t(g3, c1.ac) - hgvs_refreshed_variant = c3 - # Alignment position - for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): - for_location_c = self.variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.end.base - 1 - gpe = for_location_c.posedit.pos.end.base - gap_position = ' between positions c.' + str(gps) + '_' + str( - gpe) + '\n' - # Warn update - auto_info = auto_info + '%s' % (gap_position) - else: - auto_info = auto_info + str( - stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ - 1]) + ' genomic base(s) that fail to align to transcript ' + str( - tx_hgvs_not_delins.ac) + '\n' - hgvs_refreshed_variant = tx_hgvs_not_delins + hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, stored_hgvs_not_delins, hgvs_genomic, 4) # GAP IN THE CHROMOSOME - elif disparity_deletion_in[0] == 'chromosome': + elif self.disparity_deletion_in[0] == 'chromosome': # amend_RefSeqGene = 'true' if possibility_counter == 3: hgvs_refreshed_variant = stash_tx_right @@ -2508,9 +1658,9 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a else: hgvs_refreshed_variant = chromosome_normalized_hgvs_coding # Warn - auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( + self.auto_info = self.auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - disparity_deletion_in[ + self.disparity_deletion_in[ 1]) + ' transcript base(s) that fail to align to chromosome ' + str( hgvs_genomic.ac) + '\n' else: @@ -2573,7 +1723,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a no_normalized_c = 'false' # Break if gap has been detected - if disparity_deletion_in[0] != 'false': + if self.disparity_deletion_in[0] != 'false': break # Normailse hgvs_genomic @@ -2583,7 +1733,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a # Strange error caused by gap in genomic error = str(e) if re.search('base start position must be <= end position', error) and \ - disparity_deletion_in[0] == 'chromosome': + self.disparity_deletion_in[0] == 'chromosome': if hgvs_alt_genomic.posedit.edit.type == 'delins': start = hgvs_alt_genomic.posedit.pos.start.base end = hgvs_alt_genomic.posedit.pos.end.base @@ -3078,11 +2228,11 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ if running_option == 2: self.tx_hgvs_not_delins.posedit.pos.end.base = self.tx_hgvs_not_delins.posedit.pos.start.base + len( self.tx_hgvs_not_delins.posedit.edit.ref) - 1 - hgvs_refreshed_variant = self.tx_hgvs_not_delins - else: - hgvs_refreshed_variant = self.tx_hgvs_not_delins + elif running_option != 4: self.gapped_transcripts = self.gapped_transcripts + ' ' + str(self.tx_hgvs_not_delins.ac) + hgvs_refreshed_variant = self.tx_hgvs_not_delins + return hgvs_refreshed_variant def edit_output(self, hgvs_refreshed_variant, saved_hgvs_coding): @@ -3123,7 +2273,7 @@ def edit_output(self, hgvs_refreshed_variant, saved_hgvs_coding): pass return hgvs_refreshed_variant - def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding): + def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding, do_continue=False, offset_check=False): # Logic if len(hgvs_not_delins.posedit.edit.ref) < len( rn_tx_hgvs_not_delins.posedit.edit.ref): @@ -3146,13 +2296,20 @@ def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding): try: hgvs_t_possibility = self.validator.vm.c_to_n(hgvs_t_possibility) except: + if do_continue: + continue fn.exceptPass() + if offset_check: + if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: + continue ins_ref = self.validator.sf.fetch_seq(hgvs_t_possibility.ac, hgvs_t_possibility.posedit.pos.start.base - 1, hgvs_t_possibility.posedit.pos.start.base + 1) try: hgvs_t_possibility = self.validator.vm.n_to_c(hgvs_t_possibility) except: + if do_continue: + continue fn.exceptPass() hgvs_t_possibility.posedit.edit.ref = ins_ref hgvs_t_possibility.posedit.edit.alt = ins_ref[ @@ -3211,3 +2368,114 @@ def get_hgvs_seek_var(self, hgvs_genomic, hgvs_coding, ori=None, with_query_geno return hgvs_seek_var, query_genomic return hgvs_seek_var + + def rev_norm_ins(self, hgvs_coding, hgvs_genomic): + # direct mapping from reverse_normalized transcript insertions in the delins format + try: + if hgvs_coding.posedit.edit.type == 'ins': + most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) + most_3pr_hgvs_transcript_variant = self.variant.reverse_normalizer.normalize(hgvs_coding) + try: + n_3pr = self.validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) + n_5pr = self.validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) + except: + n_3pr = most_3pr_hgvs_transcript_variant + n_5pr = most_5pr_hgvs_transcript_variant + # Make into a delins by adding the ref bases to the variant ref and alt + pr3_ref = self.validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, + n_3pr.posedit.pos.end.base) + pr5_ref = self.validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, + n_5pr.posedit.pos.end.base) + most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref + most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[0] + \ + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr3_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[0] + \ + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ + pr5_ref[1] + # Map to the genome + genomic_from_most_3pr_hgvs_transcript_variant = self.validator.vm.t_to_g( + most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) + genomic_from_most_5pr_hgvs_transcript_variant = self.validator.vm.t_to_g( + most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) + + # Normalize - If the variant spans a gap it should then form a static genomic variant + try: + genomic_from_most_3pr_hgvs_transcript_variant = self.variant.hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_3pr_hgvs_transcript_variant = self.variant.hn.normalize( + genomic_from_most_3pr_hgvs_transcript_variant) + try: + genomic_from_most_5pr_hgvs_transcript_variant = self.variant.hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + except hgvs.exceptions.HGVSInvalidVariantError as e: + error = str(e) + if error == 'base start position must be <= end position': + start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base + end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start + genomic_from_most_5pr_hgvs_transcript_variant = self.variant.hn.normalize( + genomic_from_most_5pr_hgvs_transcript_variant) + + try: + if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + most_3pr_hgvs_transcript_variant_delins_from_dup) + + try: + if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) + + try: + if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: + most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' + except Exception as e: + if str(e) == "'Dup' object has no attribute 'alt'": + most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( + most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( + most_5pr_hgvs_transcript_variant_delins_from_dup) + + if len(genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_3pr_hgvs_transcript_variant.posedit.edit.alt): + self.hgvs_genomic_possibilities.append(genomic_from_most_3pr_hgvs_transcript_variant) + if len(genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( + most_5pr_hgvs_transcript_variant.posedit.edit.alt): + self.hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) + + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + fn.exceptPass() diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index a55cfba0..009820d1 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -952,1320 +952,6 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant, rec_var): hgvs_alt_genomic, hgvs_coding = gap_mapper.g_to_t_gap_compensation_version3(hgvs_alt_genomic, hgvs_coding, ori, alt_chr, rec_var) - # logger.warning('g_to_t gap code 3 active') - # rn_hgvs_genomic = variant.reverse_normalizer.normalize(hgvs_alt_genomic) - # hgvs_genomic_possibilities.append(rn_hgvs_genomic) - # if orientation != -1: - # try: - # chromosome_normalized_hgvs_coding = variant.reverse_normalizer.normalize( - # hgvs_coding) - # except hgvs.exceptions.HGVSUnsupportedOperationError as e: - # chromosome_normalized_hgvs_coding = hgvs_coding - # else: - # try: - # chromosome_normalized_hgvs_coding = variant.hn.normalize(hgvs_coding) - # except hgvs.exceptions.HGVSUnsupportedOperationError as e: - # error = str(e) - # chromosome_normalized_hgvs_coding = hgvs_coding - # - # most_3pr_hgvs_genomic = validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, - # alt_chr, - # variant.no_norm_evm, variant.hn) - # hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - # - # # First to the right - # hgvs_stash = copy.deepcopy(hgvs_coding) - # try: - # hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) - # except: - # fn.exceptPass() - # try: - # stash_ac = hgvs_stash.ac - # stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, variant.primary_assembly, variant.hn, validator.sf) - # stash_pos = int(stash_dict['pos']) - # stash_ref = stash_dict['ref'] - # stash_alt = stash_dict['alt'] - # # Generate an end position - # stash_end = str(stash_pos + len(stash_ref) - 1) - # # make a not real deletion insertion - # stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( - # stash_ac + ':' + hgvs_stash.type + '.' + str( - # stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - # try: - # stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) - # except: - # fn.exceptPass() - # # Store a tx copy for later use - # test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = validator.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, - # variant.no_norm_evm, variant.hn) - # # Stash the outputs if required - # # test variants = NC_000006.11:g.90403795G= (causes double identity) - # # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # # pass - # if len(test_stash_tx_right.posedit.edit.ref) == (( - # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - # stash_tx_right = test_stash_tx_right - # if hasattr(test_stash_tx_right.posedit.edit, - # 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - # alt = test_stash_tx_right.posedit.edit.alt - # else: - # alt = '' - # if hasattr(stash_genomic.posedit.edit, - # 'alt') and stash_genomic.posedit.edit.alt is not None: - # g_alt = stash_genomic.posedit.edit.alt - # else: - # g_alt = '' - # if (len(alt) - ( - # test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - # len(g_alt) - ( - # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - # hgvs_genomic_possibilities.append(stash_genomic) - # else: - # hgvs_genomic_possibilities.append('') - # elif test_stash_tx_right.posedit.edit.type == 'identity': - # reform_ident = str(test_stash_tx_right).split(':')[0] - # reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - # test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - # hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) - # try: - # variant.hn.normalize(hgvs_reform_ident) - # except hgvs.exceptions.HGVSError as e: - # error = str(e) - # if re.search('spanning the exon-intron boundary', error): - # stash_tx_right = test_stash_tx_right - # hgvs_genomic_possibilities.append('') - # else: - # stash_tx_right = test_stash_tx_right - # hgvs_genomic_possibilities.append(stash_genomic) - # else: - # try: - # variant.hn.normalize(test_stash_tx_right) - # except hgvs.exceptions.HGVSUnsupportedOperationError: - # hgvs_genomic_possibilities.append('') - # else: - # stash_tx_right = test_stash_tx_right - # hgvs_genomic_possibilities.append(stash_genomic) - # except hgvs.exceptions.HGVSError as e: - # fn.exceptPass() - # except ValueError: - # fn.exceptPass() - # - # # Then to the left - # hgvs_stash = copy.deepcopy(hgvs_coding) - # try: - # hgvs_stash = variant.no_norm_evm.c_to_n(hgvs_stash) - # except: - # fn.exceptPass() - # try: - # stash_ac = hgvs_stash.ac - # stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, variant.primary_assembly, - # variant.reverse_normalizer, validator.sf) - # stash_pos = int(stash_dict['pos']) - # stash_ref = stash_dict['ref'] - # stash_alt = stash_dict['alt'] - # # Generate an end position - # stash_end = str(stash_pos + len(stash_ref) - 1) - # # make a not real deletion insertion - # stash_hgvs_not_delins = validator.hp.parse_hgvs_variant( - # stash_ac + ':' + hgvs_stash.type + '.' + str( - # stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - # try: - # stash_hgvs_not_delins = variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) - # except: - # fn.exceptPass() - # # Store a tx copy for later use - # test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = validator.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, - # variant.no_norm_evm, variant.hn) - # # Stash the outputs if required - # # test variants = NC_000006.11:g.90403795G= (causes double identity) - # # NC_000002.11:g.73675227_73675228insCTC - # # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # # pass - # if len(test_stash_tx_left.posedit.edit.ref) == (( - # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - # stash_tx_left = test_stash_tx_left - # if hasattr(test_stash_tx_left.posedit.edit, - # 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - # alt = test_stash_tx_left.posedit.edit.alt - # else: - # alt = '' - # if hasattr(stash_genomic.posedit.edit, - # 'alt') and stash_genomic.posedit.edit.alt is not None: - # g_alt = stash_genomic.posedit.edit.alt - # else: - # g_alt = '' - # if (len(alt) - ( - # test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - # len(g_alt) - ( - # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - # hgvs_genomic_possibilities.append(stash_genomic) - # else: - # hgvs_genomic_possibilities.append('') - # elif test_stash_tx_left.posedit.edit.type == 'identity': - # reform_ident = str(test_stash_tx_left).split(':')[0] - # reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - # test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - # hgvs_reform_ident = validator.hp.parse_hgvs_variant(reform_ident) - # try: - # variant.hn.normalize(hgvs_reform_ident) - # except hgvs.exceptions.HGVSError as e: - # error = str(e) - # if re.search('spanning the exon-intron boundary', error): - # stash_tx_left = test_stash_tx_left - # hgvs_genomic_possibilities.append('') - # else: - # stash_tx_left = test_stash_tx_left - # hgvs_genomic_possibilities.append(stash_genomic) - # else: - # try: - # variant.hn.normalize(test_stash_tx_left) - # except hgvs.exceptions.HGVSUnsupportedOperationError: - # hgvs_genomic_possibilities.append('') - # else: - # stash_tx_left = test_stash_tx_left - # hgvs_genomic_possibilities.append(stash_genomic) - # except hgvs.exceptions.HGVSError as e: - # fn.exceptPass() - # except ValueError: - # fn.exceptPass() - # - # # direct mapping from reverse_normalized transcript insertions in the delins format - # try: - # if hgvs_coding.posedit.edit.type == 'ins': - # most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - # most_3pr_hgvs_transcript_variant = variant.reverse_normalizer.normalize(hgvs_coding) - # try: - # n_3pr = validator.vm.c_to_n(most_3pr_hgvs_transcript_variant) - # n_5pr = validator.vm.c_to_n(most_5pr_hgvs_transcript_variant) - # except: - # n_3pr = most_3pr_hgvs_transcript_variant - # n_5pr = most_5pr_hgvs_transcript_variant - # # Make into a delins by adding the ref bases to the variant ref and alt - # pr3_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - # n_3pr.posedit.pos.end.base) - # pr5_ref = validator.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - # n_5pr.posedit.pos.end.base) - # most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - # most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - # most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - # 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - # pr3_ref[1] - # most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - # 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - # pr5_ref[1] - # # Map to the genome - # genomic_from_most_3pr_hgvs_transcript_variant = validator.vm.t_to_g( - # most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - # genomic_from_most_5pr_hgvs_transcript_variant = validator.vm.t_to_g( - # most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - # - # # Normalize - If the variant spans a gap it should then form a static genomic variant - # try: - # genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( - # genomic_from_most_3pr_hgvs_transcript_variant) - # except hgvs.exceptions.HGVSInvalidVariantError as e: - # error = str(e) - # if error == 'base start position must be <= end position': - # start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - # end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - # genomic_from_most_3pr_hgvs_transcript_variant = variant.hn.normalize( - # genomic_from_most_3pr_hgvs_transcript_variant) - # try: - # genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( - # genomic_from_most_5pr_hgvs_transcript_variant) - # except hgvs.exceptions.HGVSInvalidVariantError as e: - # error = str(e) - # if error == 'base start position must be <= end position': - # start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - # end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - # genomic_from_most_5pr_hgvs_transcript_variant = variant.hn.normalize( - # genomic_from_most_5pr_hgvs_transcript_variant) - # - # try: - # if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - # genomic_from_most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - # genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - # - # try: - # if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - # most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - # most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - # most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - # most_3pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - # most_3pr_hgvs_transcript_variant_delins_from_dup) - # - # try: - # if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - # genomic_from_most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - # genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - # - # try: - # if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - # most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - # most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - # most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - # most_5pr_hgvs_transcript_variant = validator.hp.parse_hgvs_variant( - # most_5pr_hgvs_transcript_variant_delins_from_dup) - # - # if len( - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - # most_3pr_hgvs_transcript_variant.posedit.edit.alt): - # hgvs_genomic_possibilities.append( - # genomic_from_most_3pr_hgvs_transcript_variant) - # if len( - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - # most_5pr_hgvs_transcript_variant.posedit.edit.alt): - # hgvs_genomic_possibilities.append( - # genomic_from_most_5pr_hgvs_transcript_variant) - # - # except hgvs.exceptions.HGVSUnsupportedOperationError as e: - # error = str(e) - # if re.match('Normalization of intronic variants is not supported', - # error) or re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # pass - # fn.exceptPass() - # - # # Set variables for problem specific warnings - # gapped_alignment_warning = '' - # corrective_action_taken = '' - # gapped_transcripts = '' - # auto_info = '' - # - # # Mark as not disparity detected - # disparity_deletion_in = ['false', 'false'] - # # Loop through to see if a gap can be located - # possibility_counter = 0 - # for possibility in hgvs_genomic_possibilities: - # possibility_counter = possibility_counter + 1 - # # Loop out stash possibilities which will not spot gaps so are empty - # if possibility == '': - # continue - # - # # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - # hgvs_genomic_variant = possibility - # stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) - # - # # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - # try: - # reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize( - # hgvs_genomic_variant) - # except hgvs.exceptions.HGVSError as e: - # # Strange error caused by gap in genomic - # error = str(e) - # if re.search('base start position must be <= end position', error): - # if hgvs_genomic.posedit.edit.type == 'delins': - # start = hgvs_genomic.posedit.pos.start.base - # end = hgvs_genomic.posedit.pos.end.base - # lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - # rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - # hgvs_genomic.posedit.edit.ref = lhb + rhb - # hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - # hgvs_genomic.posedit.pos.start.base = end - # hgvs_genomic.posedit.pos.end.base = start - # reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize( - # hgvs_genomic) - # if hgvs_genomic.posedit.edit.type == 'del': - # start = hgvs_genomic.posedit.pos.start.base - # end = hgvs_genomic.posedit.pos.end.base - # lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - # rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - # hgvs_genomic.posedit.edit.ref = lhb + rhb - # hgvs_genomic.posedit.edit.alt = lhb + rhb - # hgvs_genomic.posedit.pos.start.base = end - # hgvs_genomic.posedit.pos.end.base = start - # reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize( - # hgvs_genomic) - # if re.search('insertion length must be 1', error): - # if hgvs_genomic.posedit.edit.type == 'ins': - # start = hgvs_genomic.posedit.pos.start.base - # end = hgvs_genomic.posedit.pos.end.base - # ref_bases = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - # lhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - # rhb = validator.sf.fetch_seq(str(hgvs_genomic.ac), start, end) - # hgvs_genomic.posedit.edit.ref = lhb + rhb - # hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - # reverse_normalized_hgvs_genomic = variant.reverse_normalizer.normalize( - # hgvs_genomic) - # - # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # # Store a copy for later use - # stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - # - # # Make VCF - # vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, variant.primary_assembly, - # variant.reverse_normalizer, validator.sf) - # chr = vcf_dict['chr'] - # pos = vcf_dict['pos'] - # ref = vcf_dict['ref'] - # alt = vcf_dict['alt'] - # - # # Look for exonic gaps within transcript or chromosome - # no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - # - # # Generate an end position - # end = str(int(pos) + len(ref) - 1) - # pos = str(pos) - # - # # Store a not real deletion insertion to test for gapping - # stored_hgvs_not_delins = validator.hp.parse_hgvs_variant(str( - # hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - # v = [chr, pos, ref, alt] - # - # # Save a copy of current hgvs_coding - # try: - # saved_hgvs_coding = variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, - # hgvs_coding.ac) - # except Exception as e: - # if str( - # e) == 'start or end or both are beyond the bounds of transcript record': - # saved_hgvs_coding = hgvs_coding - # continue - # - # # Detect intronic variation using normalization - # intronic_variant = 'false' - # # Look for normalized variant options that do not match hgvs_coding - # if orientation == -1: - # # position genomic at its most 5 prime position - # try: - # query_genomic = variant.reverse_normalizer.normalize(hgvs_genomic) - # except: - # query_genomic = hgvs_genomic - # # Map to the transcript ant test for movement - # try: - # hgvs_seek_var = variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - # except hgvs.exceptions.HGVSError as e: - # hgvs_seek_var = saved_hgvs_coding - # else: - # seek_var = fn.valstr(hgvs_seek_var) - # seek_ac = str(hgvs_seek_var.ac) - # if ( - # hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - # hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - # hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - # hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - # pass - # else: - # hgvs_seek_var = saved_hgvs_coding - # - # elif orientation != -1: - # # position genomic at its most 3 prime position - # try: - # query_genomic = variant.hn.normalize(hgvs_genomic) - # except: - # query_genomic = hgvs_genomic - # # Map to the transcript and test for movement - # try: - # hgvs_seek_var = variant.evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - # except hgvs.exceptions.HGVSError as e: - # hgvs_seek_var = saved_hgvs_coding - # seek_var = fn.valstr(hgvs_seek_var) - # seek_ac = str(hgvs_seek_var.ac) - # if ( - # hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - # hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - # hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - # hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - # pass - # else: - # hgvs_seek_var = saved_hgvs_coding - # - # try: - # intron_test = variant.hn.normalize(hgvs_seek_var) - # except hgvs.exceptions.HGVSUnsupportedOperationError as e: - # error = str(e) - # if re.match('Normalization of intronic variants is not supported', - # error) or re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # if re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # intronic_variant = 'hard_fail' - # else: - # # Double check to see whether the variant is actually intronic? - # for exon in ori: - # genomic_start = int(exon['alt_start_i']) - # genomic_end = int(exon['alt_end_i']) - # if ( - # hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - # hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - # intronic_variant = 'false' - # break - # else: - # intronic_variant = 'true' - # - # if intronic_variant != 'hard_fail': - # if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', - # str( - # hgvs_seek_var.posedit.pos)) or re.search( - # r'\*\d+\+', str( - # hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( - # hgvs_seek_var.posedit.pos)): - # # Double check to see whether the variant is actually intronic? - # for exon in ori: - # genomic_start = int(exon['alt_start_i']) - # genomic_end = int(exon['alt_end_i']) - # if ( - # hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - # hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - # intronic_variant = 'false' - # break - # else: - # intronic_variant = 'true' - # - # if intronic_variant != 'true': - # # Flag RefSeqGene for ammendment - # # amend_RefSeqGene = 'false' - # # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - # if stored_hgvs_not_delins != '': - # # Refresh hgvs_not_delins from stored_hgvs_not_delins - # hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # # This test will only occur in dup of single base, insertion or substitution - # if not re.search('_', str(hgvs_not_delins.posedit.pos)): - # if re.search('dup', - # hgvs_genomic_5pr.posedit.edit.type) or re.search( - # 'ins', hgvs_genomic_5pr.posedit.edit.type): - # # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - # plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - # plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - # plussed_hgvs_not_delins.posedit.edit.ref = '' - # transcript_variant = variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, - # str( - # saved_hgvs_coding.ac)) - # if (( - # transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - # hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - # if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # start = hgvs_not_delins.posedit.pos.start.base - 1 - # end = hgvs_not_delins.posedit.pos.end.base - # ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, - # end) - # hgvs_not_delins.posedit.edit.ref = ref_bases - # hgvs_not_delins.posedit.edit.alt = ref_bases[ - # :1] + hgvs_not_delins.posedit.edit.alt[ - # 1:] + ref_bases[ - # 1:] - # elif re.search('ins', str( - # hgvs_genomic_5pr.posedit.edit)) and re.search('del', - # str( - # hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # elif re.search('ins', str( - # hgvs_genomic_5pr.posedit.edit)) and not re.search( - # 'del', - # str( - # hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # start = hgvs_not_delins.posedit.pos.start.base - 1 - # end = hgvs_not_delins.posedit.pos.end.base - # ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, - # end) - # hgvs_not_delins.posedit.edit.ref = ref_bases - # hgvs_not_delins.posedit.edit.alt = ref_bases[ - # :1] + hgvs_not_delins.posedit.edit.alt[ - # 1:] + ref_bases[ - # 1:] - # else: - # if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # start = hgvs_not_delins.posedit.pos.start.base - 1 - # end = hgvs_not_delins.posedit.pos.end.base - # ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, - # end) - # hgvs_not_delins.posedit.edit.ref = ref_bases - # hgvs_not_delins.posedit.edit.alt = ref_bases[ - # :1] + hgvs_not_delins.posedit.edit.alt[ - # 1:] + ref_bases[ - # 1:] - # elif re.search('ins', str( - # hgvs_genomic_5pr.posedit.edit)) and re.search('del', - # str( - # hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # elif re.search('ins', str( - # hgvs_genomic_5pr.posedit.edit)) and not re.search( - # 'del', - # str( - # hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # start = hgvs_not_delins.posedit.pos.start.base - 1 - # end = hgvs_not_delins.posedit.pos.end.base - # ref_bases = validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, - # end) - # hgvs_not_delins.posedit.edit.ref = ref_bases - # hgvs_not_delins.posedit.edit.alt = ref_bases[ - # :1] + hgvs_not_delins.posedit.edit.alt[ - # 1:] + ref_bases[ - # 1:] - # else: - # pass - # else: - # pass - # tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - # saved_hgvs_coding.ac) - # # Create normalized version of tx_hgvs_not_delins - # rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # # Check for +1 base and adjust - # if re.search(r'\+', - # str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - # r'\+', - # str( - # rn_tx_hgvs_not_delins.posedit.pos.start)): - # # Remove offsetting to span the gap - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # try: - # rn_tx_hgvs_not_delins.posedit.edit.alt = '' - # except: - # fn.exceptPass() - # - # elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # # move tx end base to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = validator.myvm_t_to_g(test_tx_var, alt_chr, - # variant.no_norm_evm, variant.hn) - # rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str( - # saved_hgvs_coding.ac)) - # elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # # move tx start base to previous available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = validator.myvm_t_to_g(test_tx_var, alt_chr, - # variant.no_norm_evm, variant.hn) - # rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str( - # saved_hgvs_coding.ac)) - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # # else: - # # pass - # - # # Check for -ve base and adjust - # elif re.search(r'\-', - # str( - # rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - # r'\-', - # str( - # rn_tx_hgvs_not_delins.posedit.pos.start)): - # # Remove offsetting to span the gap - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # try: - # rn_tx_hgvs_not_delins.posedit.edit.alt = '' - # except: - # fn.exceptPass() - # elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # # move tx end base back to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # # Delete the ref - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # # Add the additional base to the ALT - # start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - # end = rn_tx_hgvs_not_delins.posedit.pos.end.base - # ref_bases = validator.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - # rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = validator.myvm_t_to_g(test_tx_var, alt_chr, - # variant.no_norm_evm, variant.hn) - # rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str( - # saved_hgvs_coding.ac)) - # elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # # move tx start base to previous available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = validator.myvm_t_to_g(test_tx_var, alt_chr, - # variant.no_norm_evm, variant.hn) - # rn_tx_hgvs_not_delins = variant.no_norm_evm.g_to_n(hgvs_not_delins, - # str( - # saved_hgvs_coding.ac)) - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # fn.exceptPass() - # - # # Logic - # if len(hgvs_not_delins.posedit.edit.ref) < len( - # rn_tx_hgvs_not_delins.posedit.edit.ref): - # gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - # hgvs_not_delins.posedit.edit.ref) - # disparity_deletion_in = ['chromosome', gap_length] - # elif len(hgvs_not_delins.posedit.edit.ref) > len( - # rn_tx_hgvs_not_delins.posedit.edit.ref): - # gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - # rn_tx_hgvs_not_delins.posedit.edit.ref) - # disparity_deletion_in = ['transcript', gap_length] - # else: - # re_capture_tx_variant = [] - # for possibility in hgvs_genomic_possibilities: - # if possibility == '': - # continue - # hgvs_t_possibility = validator.vm.g_to_t(possibility, hgvs_coding.ac) - # if hgvs_t_possibility.posedit.edit.type == 'ins': - # try: - # hgvs_t_possibility = validator.vm.c_to_n(hgvs_t_possibility) - # except: - # continue - # if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: - # continue - # ins_ref = validator.sf.fetch_seq(hgvs_t_possibility.ac, - # hgvs_t_possibility.posedit.pos.start.base - 1, - # hgvs_t_possibility.posedit.pos.start.base + 1) - # try: - # hgvs_t_possibility = validator.vm.n_to_c(hgvs_t_possibility) - # except: - # continue - # hgvs_t_possibility.posedit.edit.ref = ins_ref - # hgvs_t_possibility.posedit.edit.alt = ins_ref[ - # 0] + hgvs_t_possibility.posedit.edit.alt + \ - # ins_ref[1] - # if possibility.posedit.edit.type == 'ins': - # ins_ref = validator.sf.fetch_seq(possibility.ac, - # possibility.posedit.pos.start.base - 1, - # possibility.posedit.pos.end.base) - # possibility.posedit.edit.ref = ins_ref - # possibility.posedit.edit.alt = ins_ref[ - # 0] + possibility.posedit.edit.alt + \ - # ins_ref[1] - # if len(hgvs_t_possibility.posedit.edit.ref) < len( - # possibility.posedit.edit.ref): - # gap_length = len(possibility.posedit.edit.ref) - len( - # hgvs_t_possibility.posedit.edit.ref) - # re_capture_tx_variant = ['transcript', gap_length, - # hgvs_t_possibility] - # hgvs_not_delins = possibility - # hgvs_genomic_5pr = possibility - # break - # - # if re_capture_tx_variant != []: - # try: - # tx_hgvs_not_delins = validator.vm.c_to_n(re_capture_tx_variant[2]) - # except: - # tx_hgvs_not_delins = re_capture_tx_variant[2] - # disparity_deletion_in = re_capture_tx_variant[0:-1] - # else: - # pass - # - # # Final sanity checks - # try: - # validator.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - # except Exception as e: - # if str( - # e) == 'start or end or both are beyond the bounds of transcript record': - # continue - # try: - # variant.hn.normalize(tx_hgvs_not_delins) - # except hgvs.exceptions.HGVSUnsupportedOperationError as e: - # error = str(e) - # - # if re.match('Normalization of intronic variants is not supported', - # error) or re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # if re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # continue - # elif re.match('Normalization of intronic variants is not supported', - # error): - # # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - # disparity_deletion_in = ['transcript', 'Requires Analysis'] - # - # # Recreate hgvs_genomic - # if disparity_deletion_in[0] == 'transcript': - # hgvs_genomic = hgvs_not_delins - # - # # Find oddly placed gaps where the tx variant is encompassed in the gap - # if disparity_deletion_in[0] == 'false' and ( - # possibility_counter == 3 or possibility_counter == 4): - # rg = variant.reverse_normalizer.normalize(hgvs_not_delins) - # rtx = validator.vm.g_to_t(rg, tx_hgvs_not_delins.ac) - # fg = variant.hn.normalize(hgvs_not_delins) - # ftx = validator.vm.g_to_t(fg, tx_hgvs_not_delins.ac) - # if ( - # rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - # ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - # exons = validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, validator.alt_aln_method) - # exonic = False - # for ex_test in exons: - # if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - # 7]) and ftx.posedit.pos.end.base in range(ex_test[6], - # ex_test[7]): - # exonic = True - # if exonic is True: - # hgvs_not_delins = fg - # hgvs_genomic = fg - # hgvs_genomic_5pr = fg - # try: - # tx_hgvs_not_delins = validator.vm.c_to_n(ftx) - # except Exception: - # tx_hgvs_not_delins = ftx - # disparity_deletion_in = ['transcript', 'Requires Analysis'] - # - # # Pre-processing of tx_hgvs_not_delins - # try: - # if tx_hgvs_not_delins.posedit.edit.alt is None: - # tx_hgvs_not_delins.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - # tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - # tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - # tx_hgvs_not_delins = validator.hp.parse_hgvs_variant( - # tx_hgvs_not_delins_delins_from_dup) - # - # if disparity_deletion_in[0] == 'transcript': - # # amend_RefSeqGene = 'true' - # # ANY VARIANT WHOLLY WITHIN THE GAP - # if (re.search(r'\+', - # str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - # r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( - # re.search(r'\+', - # str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - # r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): - # gapped_transcripts = gapped_transcripts + ' ' + str( - # tx_hgvs_not_delins.ac) - # - # # Copy the current variant - # tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - # try: - # if tx_gap_fill_variant.posedit.edit.alt is None: - # tx_gap_fill_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - # tx_gap_fill_variant.posedit.pos.start) + '_' + str( - # tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - # tx_gap_fill_variant = validator.hp.parse_hgvs_variant( - # tx_gap_fill_variant_delins_from_dup) - # - # # Identify which half of the NOT-intron the start position of the variant is in - # if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - # tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - # tx_gap_fill_variant.posedit.pos.start.offset = int( - # '0') # int('+1') - # tx_gap_fill_variant.posedit.pos.end.offset = int( - # '0') # int('-1') - # tx_gap_fill_variant.posedit.edit.alt = '' - # tx_gap_fill_variant.posedit.edit.ref = '' - # elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - # tx_gap_fill_variant.posedit.pos.start.offset = int( - # '0') # int('+1') - # tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - # tx_gap_fill_variant.posedit.pos.end.offset = int( - # '0') # int('-1') - # tx_gap_fill_variant.posedit.edit.alt = '' - # tx_gap_fill_variant.posedit.edit.ref = '' - # - # try: - # tx_gap_fill_variant = validator.vm.n_to_c(tx_gap_fill_variant) - # except: - # fn.exceptPass() - # genomic_gap_fill_variant = validator.vm.t_to_g(tx_gap_fill_variant, - # reverse_normalized_hgvs_genomic.ac) - # genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - # - # try: - # c_tx_hgvs_not_delins = validator.vm.n_to_c(tx_hgvs_not_delins) - # except Exception: - # c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - # genomic_gap_fill_variant_alt = validator.vm.t_to_g(c_tx_hgvs_not_delins, - # hgvs_genomic_5pr.ac) - # - # # Ensure an ALT exists - # try: - # if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - # genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - # genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - # genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - # genomic_gap_fill_variant = validator.hp.parse_hgvs_variant( - # genomic_gap_fill_variant_delins_from_dup) - # genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - # genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - # genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - # genomic_gap_fill_variant_alt = validator.hp.parse_hgvs_variant( - # genomic_gap_fill_variant_alt_delins_from_dup) - # - # # Correct insertion alts - # if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - # append_ref = validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - # genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - # genomic_gap_fill_variant_alt.posedit.pos.end.base) - # genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - # 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - # append_ref[1] - # - # # Split the reference and replacing alt sequence into a dictionary - # reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - # if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - # alternate_bases = list( - # genomic_gap_fill_variant_alt.posedit.edit.alt) - # else: - # # Deletions with no ins - # pre_alternate_bases = list( - # genomic_gap_fill_variant_alt.posedit.edit.ref) - # alternate_bases = [] - # for base in pre_alternate_bases: - # alternate_bases.append('X') - # - # # Create the dictionaries - # ref_start = genomic_gap_fill_variant.posedit.pos.start.base - # alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - # ref_base_dict = {} - # for base in reference_bases: - # ref_base_dict[ref_start] = str(base) - # ref_start = ref_start + 1 - # - # alt_base_dict = {} - # - # # NEED TO SEARCH FOR RANGE = and replace with interval_range - # # Need to search for int and replace with integer - # - # # Note, all variants will be forced into the format delete insert - # # Deleted bases in the ALT will be substituted for X - # for integer in range( - # genomic_gap_fill_variant_alt.posedit.pos.start.base, - # genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - # if integer == alt_start: - # alt_base_dict[integer] = str(''.join(alternate_bases)) - # else: - # alt_base_dict[integer] = 'X' - # - # # Generate the alt sequence - # alternate_sequence_bases = [] - # for integer in range( - # genomic_gap_fill_variant.posedit.pos.start.base, - # genomic_gap_fill_variant.posedit.pos.end.base + 1, - # 1): - # if integer in list(alt_base_dict.keys()): - # alternate_sequence_bases.append(alt_base_dict[integer]) - # else: - # alternate_sequence_bases.append(ref_base_dict[integer]) - # alternate_sequence = ''.join(alternate_sequence_bases) - # alternate_sequence = alternate_sequence.replace('X', '') - # - # # Add the new alt to the gap fill variant and generate transcript variant - # genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - # hgvs_refreshed_variant = validator.vm.g_to_t(genomic_gap_fill_variant, - # tx_gap_fill_variant.ac) - # - # # Set warning - # gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - # disparity_deletion_in[1] = [gap_size] - # auto_info = auto_info + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - # gps = for_location_c.posedit.pos.start.base - 1 - # gpe = for_location_c.posedit.pos.start.base - # else: - # gps = for_location_c.posedit.pos.start.base - # gpe = for_location_c.posedit.pos.start.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str( - # gpe) + '\n' - # auto_info = auto_info + '%s' % (gap_position) - # - # else: - # if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # # In this instance, we have identified a transcript gap but the n. version of - # # the transcript variant but do not have a position which actually hits the gap, - # # so the variant likely spans the gap, and is not picked up by an offset. - # try: - # c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # g1 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - # g3 = validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - # ng2 = variant.hn.normalize(g2) - # g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - # len(g3.posedit.edit.ref) - 1) - # try: - # c2 = validator.vm.g_to_t(g3, c1.ac) - # if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - # pass - # else: - # tx_hgvs_not_delins = c2 - # try: - # tx_hgvs_not_delins = validator.vm.c_to_n(tx_hgvs_not_delins) - # except hgvs.exceptions.HGVSError: - # fn.exceptPass() - # except hgvs.exceptions.HGVSInvalidVariantError: - # fn.exceptPass() - # - # if re.search(r'\+', str( - # tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - # r'\+', - # str( - # tx_hgvs_not_delins.posedit.pos.end)): - # auto_info = auto_info + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # try: - # c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c2 = tx_hgvs_not_delins - # c1 = copy.deepcopy(c2) - # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # c1.posedit.pos.start.offset = 0 - # c1.posedit.pos.end = c2.posedit.pos.start - # c1.posedit.edit.ref = '' - # c1.posedit.edit.alt = '' - # if orientation != -1: - # g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # else: - # g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = validator.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.start.base - # gpe = for_location_c.posedit.pos.start.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str( - # gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\+', str( - # tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', - # str( - # tx_hgvs_not_delins.posedit.pos.start)): - # auto_info = auto_info + 'Genome position ' + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - # tx_hgvs_not_delins.ac) - # gapped_transcripts = gapped_transcripts + ' ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # try: - # c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # c2 = copy.deepcopy(c1) - # c2.posedit.pos.start = c1.posedit.pos.end - # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - # c2.posedit.pos.end.offset = 0 - # c2.posedit.edit.ref = '' - # c2.posedit.edit.alt = '' - # if orientation != -1: - # g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # else: - # g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = validator.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.end.base - # gpe = for_location_c.posedit.pos.end.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str( - # gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\-', str( - # tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - # r'\-', - # str( - # tx_hgvs_not_delins.posedit.pos.end)): - # auto_info = auto_info + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # try: - # c2 = validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c2 = tx_hgvs_not_delins - # c1 = copy.deepcopy(c2) - # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # c1.posedit.pos.start.offset = 0 - # c1.posedit.pos.end = c2.posedit.pos.start - # c1.posedit.edit.ref = '' - # c1.posedit.edit.alt = '' - # if orientation != -1: - # g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # else: - # g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = validator.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.start.base - 1 - # gpe = for_location_c.posedit.pos.start.base - # gap_position = ' between positions c.' + str(gps) + '_' + str( - # gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\-', str( - # tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', - # str( - # tx_hgvs_not_delins.posedit.pos.start)): - # auto_info = auto_info + 'Genome position ' + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - # tx_hgvs_not_delins.ac) - # gapped_transcripts = gapped_transcripts + ' ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # try: - # c1 = validator.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # c2 = copy.deepcopy(c1) - # c2.posedit.pos.start = c1.posedit.pos.end - # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - # c2.posedit.pos.end.offset = 0 - # c2.posedit.edit.ref = '' - # c2.posedit.edit.alt = '' - # if orientation != -1: - # g1 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # else: - # g1 = validator.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = validator.vm.t_to_g(c1, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = validator.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = variant.no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.end.base - 1 - # gpe = for_location_c.posedit.pos.end.base - # gap_position = ' between positions c.' + str(gps) + '_' + str( - # gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # else: - # auto_info = auto_info + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) + '\n' - # hgvs_refreshed_variant = tx_hgvs_not_delins - # - # # GAP IN THE CHROMOSOME - # elif disparity_deletion_in[0] == 'chromosome': - # # amend_RefSeqGene = 'true' - # if possibility_counter == 3: - # hgvs_refreshed_variant = stash_tx_right - # elif possibility_counter == 4: - # hgvs_refreshed_variant = stash_tx_left - # else: - # hgvs_refreshed_variant = chromosome_normalized_hgvs_coding - # # Warn - # auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - # hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - # disparity_deletion_in[ - # 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - # hgvs_genomic.ac) + '\n' - # else: - # # Keep the same by re-setting rel_var - # hgvs_refreshed_variant = hgvs_coding - # # amend_RefSeqGene = 'false' - # - # # Edit the output - # if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', - # str( - # hgvs_refreshed_variant.type)): - # hgvs_refreshed_variant = variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) - # else: - # pass - # - # try: - # variant.hn.normalize(hgvs_refreshed_variant) - # except Exception as e: - # error = str(e) - # # Ensure the final variant is not intronic nor does it cross exon boundaries - # if re.match('Normalization of intronic variants is not supported', - # error) or re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # hgvs_refreshed_variant = saved_hgvs_coding - # else: - # continue - # - # # Quick check to make sure the coding variant has not changed - # try: - # to_test = variant.hn.normalize(hgvs_refreshed_variant) - # except: - # to_test = hgvs_refreshed_variant - # if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # # Try the next available genomic option - # if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - # hgvs_coding = to_test - # else: - # continue - # - # # Update hgvs_genomic - # hgvs_alt_genomic = validator.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, - # variant.no_norm_evm, variant.hn) - # if hgvs_alt_genomic.posedit.edit.type == 'identity': - # re_c = validator.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) - # if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): - # shuffle_left_g = copy.copy(hgvs_alt_genomic) - # shuffle_left_g.posedit.edit.ref = '' - # shuffle_left_g.posedit.edit.alt = '' - # shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - # shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - # shuffle_left_g = variant.reverse_normalizer.normalize(shuffle_left_g) - # re_c = validator.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - # if (variant.hn.normalize(re_c)) != (variant.hn.normalize(hgvs_refreshed_variant)): - # hgvs_alt_genomic = shuffle_left_g - # - # # If it is intronic, these vairables will not have been set - # else: - # # amend_RefSeqGene = 'false' - # no_normalized_c = 'false' - # - # # Break if gap has been detected - # if disparity_deletion_in[0] != 'false': - # break - # - # # Normailse hgvs_genomic - # try: - # hgvs_alt_genomic = variant.hn.normalize(hgvs_alt_genomic) - # except hgvs.exceptions.HGVSError as e: - # # Strange error caused by gap in genomic - # error = str(e) - # if re.search('base start position must be <= end position', error) and \ - # disparity_deletion_in[0] == 'chromosome': - # if hgvs_alt_genomic.posedit.edit.type == 'delins': - # start = hgvs_alt_genomic.posedit.pos.start.base - # end = hgvs_alt_genomic.posedit.pos.end.base - # lhb = validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - # rhb = validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - # hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - # hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb - # hgvs_alt_genomic.posedit.pos.start.base = end - # hgvs_alt_genomic.posedit.pos.end.base = start - # hgvs_alt_genomic = variant.hn.normalize(hgvs_alt_genomic) - # if hgvs_alt_genomic.posedit.edit.type == 'del': - # start = hgvs_alt_genomic.posedit.pos.start.base - # end = hgvs_alt_genomic.posedit.pos.end.base - # lhb = validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - # rhb = validator.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - # hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - # hgvs_alt_genomic.posedit.edit.alt = lhb + rhb - # hgvs_alt_genomic.posedit.pos.start.base = end - # hgvs_alt_genomic.posedit.pos.end.base = start - # hgvs_alt_genomic = variant.hn.normalize(hgvs_alt_genomic) - # Refresh the :g. variant multi_g.append(hgvs_alt_genomic) else: diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 036d16aa..a9685a70 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -753,1399 +753,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Multiple genomic variants # multi_gen_vars = [] if tx_variant != '': - multi_gen_vars, hgvs_coding = mappers.final_tx_to_multiple_genomic(variant, self, tx_variant, rec_var) - # hgvs_coding = self.hp.parse_hgvs_variant(str(tx_variant)) - # # Gap gene black list - # try: - # gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) - # except Exception: - # fn.exceptPass() - # else: - # # If the gene symbol is not in the list, the value False will be returned - # gap_compensation = vvChromosomes.gap_black_list(gene_symbol) - # - # # Look for variants spanning introns - # try: - # hgvs_coding = hn.normalize(hgvs_coding) - # except hgvs.exceptions.HGVSUnsupportedOperationError as e: - # error = str(e) - # if re.search('boundary', str(error)) or re.search('spanning', str(error)): - # gap_compensation = False - # else: - # pass - # except hgvs.exceptions.HGVSError: - # fn.exceptPass() - # - # # Warn gap code status - # logger.warning("gap_compensation_3 = " + str(gap_compensation)) - # multi_g = [] - # multi_list = [] - # mapping_options = self.hdp.get_tx_mapping_options(hgvs_coding.ac) - # for alt_chr in mapping_options: - # if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', - # alt_chr[1])) and \ - # alt_chr[2] == alt_aln_method: - # multi_list.append(alt_chr[1]) - # - # for alt_chr in multi_list: - # try: - # # Re set ori - # ori = self.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, - # alt_aln_method=alt_aln_method) - # orientation = int(ori[0]['alt_strand']) - # hgvs_alt_genomic = self.myvm_t_to_g(hgvs_coding, alt_chr, no_norm_evm, hn) - # # Set hgvs_genomic accordingly - # hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) - # - # # genomic_possibilities - # # 1. take the simple 3 pr normalized hgvs_genomic - # # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - # hgvs_genomic_possibilities = [] - # - # # Loop out gap code under these circumstances! - # if gap_compensation is True: - # logger.warning('g_to_t gap code 3 active') - # rn_hgvs_genomic = reverse_normalizer.normalize(hgvs_alt_genomic) - # hgvs_genomic_possibilities.append(rn_hgvs_genomic) - # if orientation != -1: - # try: - # chromosome_normalized_hgvs_coding = reverse_normalizer.normalize( - # hgvs_coding) - # except hgvs.exceptions.HGVSUnsupportedOperationError as e: - # chromosome_normalized_hgvs_coding = hgvs_coding - # else: - # try: - # chromosome_normalized_hgvs_coding = hn.normalize(hgvs_coding) - # except hgvs.exceptions.HGVSUnsupportedOperationError as e: - # error = str(e) - # chromosome_normalized_hgvs_coding = hgvs_coding - # - # most_3pr_hgvs_genomic = self.myvm_t_to_g(chromosome_normalized_hgvs_coding, - # alt_chr, - # no_norm_evm, hn) - # hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) - # - # # First to the right - # hgvs_stash = copy.deepcopy(hgvs_coding) - # try: - # hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - # except: - # fn.exceptPass() - # try: - # stash_ac = hgvs_stash.ac - # stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, primary_assembly, hn, self.sf) - # stash_pos = int(stash_dict['pos']) - # stash_ref = stash_dict['ref'] - # stash_alt = stash_dict['alt'] - # # Generate an end position - # stash_end = str(stash_pos + len(stash_ref) - 1) - # # make a not real deletion insertion - # stash_hgvs_not_delins = self.hp.parse_hgvs_variant( - # stash_ac + ':' + hgvs_stash.type + '.' + str( - # stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - # try: - # stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - # except: - # fn.exceptPass() - # # Store a tx copy for later use - # test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = self.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, - # no_norm_evm, hn) - # # Stash the outputs if required - # # test variants = NC_000006.11:g.90403795G= (causes double identity) - # # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # # pass - # if len(test_stash_tx_right.posedit.edit.ref) == (( - # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - # stash_tx_right = test_stash_tx_right - # if hasattr(test_stash_tx_right.posedit.edit, - # 'alt') and test_stash_tx_right.posedit.edit.alt is not None: - # alt = test_stash_tx_right.posedit.edit.alt - # else: - # alt = '' - # if hasattr(stash_genomic.posedit.edit, - # 'alt') and stash_genomic.posedit.edit.alt is not None: - # g_alt = stash_genomic.posedit.edit.alt - # else: - # g_alt = '' - # if (len(alt) - ( - # test_stash_tx_right.posedit.pos.end.base - test_stash_tx_right.posedit.pos.start.base) + 1) != ( - # len(g_alt) - ( - # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - # hgvs_genomic_possibilities.append(stash_genomic) - # else: - # hgvs_genomic_possibilities.append('') - # elif test_stash_tx_right.posedit.edit.type == 'identity': - # reform_ident = str(test_stash_tx_right).split(':')[0] - # reform_ident = reform_ident + ':c.' + str(test_stash_tx_right.posedit.pos) + 'del' + str( - # test_stash_tx_right.posedit.edit.ref) # + 'ins' + str(test_stash_tx_right.posedit.edit.alt) - # hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) - # try: - # hn.normalize(hgvs_reform_ident) - # except hgvs.exceptions.HGVSError as e: - # error = str(e) - # if re.search('spanning the exon-intron boundary', error): - # stash_tx_right = test_stash_tx_right - # hgvs_genomic_possibilities.append('') - # else: - # stash_tx_right = test_stash_tx_right - # hgvs_genomic_possibilities.append(stash_genomic) - # else: - # try: - # hn.normalize(test_stash_tx_right) - # except hgvs.exceptions.HGVSUnsupportedOperationError: - # hgvs_genomic_possibilities.append('') - # else: - # stash_tx_right = test_stash_tx_right - # hgvs_genomic_possibilities.append(stash_genomic) - # except hgvs.exceptions.HGVSError as e: - # fn.exceptPass() - # except ValueError: - # fn.exceptPass() - # - # # Then to the left - # hgvs_stash = copy.deepcopy(hgvs_coding) - # try: - # hgvs_stash = no_norm_evm.c_to_n(hgvs_stash) - # except: - # fn.exceptPass() - # try: - # stash_ac = hgvs_stash.ac - # stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, primary_assembly, - # reverse_normalizer, self.sf) - # stash_pos = int(stash_dict['pos']) - # stash_ref = stash_dict['ref'] - # stash_alt = stash_dict['alt'] - # # Generate an end position - # stash_end = str(stash_pos + len(stash_ref) - 1) - # # make a not real deletion insertion - # stash_hgvs_not_delins = self.hp.parse_hgvs_variant( - # stash_ac + ':' + hgvs_stash.type + '.' + str( - # stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - # try: - # stash_hgvs_not_delins = no_norm_evm.n_to_c(stash_hgvs_not_delins) - # except: - # fn.exceptPass() - # # Store a tx copy for later use - # test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) - # stash_genomic = self.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, - # no_norm_evm, hn) - # # Stash the outputs if required - # # test variants = NC_000006.11:g.90403795G= (causes double identity) - # # NC_000002.11:g.73675227_73675228insCTC - # # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) - # # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': - # # pass - # if len(test_stash_tx_left.posedit.edit.ref) == (( - # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - # stash_tx_left = test_stash_tx_left - # if hasattr(test_stash_tx_left.posedit.edit, - # 'alt') and test_stash_tx_left.posedit.edit.alt is not None: - # alt = test_stash_tx_left.posedit.edit.alt - # else: - # alt = '' - # if hasattr(stash_genomic.posedit.edit, - # 'alt') and stash_genomic.posedit.edit.alt is not None: - # g_alt = stash_genomic.posedit.edit.alt - # else: - # g_alt = '' - # if (len(alt) - ( - # test_stash_tx_left.posedit.pos.end.base - test_stash_tx_left.posedit.pos.start.base) + 1) != ( - # len(g_alt) - ( - # stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): - # hgvs_genomic_possibilities.append(stash_genomic) - # else: - # hgvs_genomic_possibilities.append('') - # elif test_stash_tx_left.posedit.edit.type == 'identity': - # reform_ident = str(test_stash_tx_left).split(':')[0] - # reform_ident = reform_ident + ':c.' + str(test_stash_tx_left.posedit.pos) + 'del' + str( - # test_stash_tx_left.posedit.edit.ref) # + 'ins' + str(test_stash_tx_left.posedit.edit.alt) - # hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) - # try: - # hn.normalize(hgvs_reform_ident) - # except hgvs.exceptions.HGVSError as e: - # error = str(e) - # if re.search('spanning the exon-intron boundary', error): - # stash_tx_left = test_stash_tx_left - # hgvs_genomic_possibilities.append('') - # else: - # stash_tx_left = test_stash_tx_left - # hgvs_genomic_possibilities.append(stash_genomic) - # else: - # try: - # hn.normalize(test_stash_tx_left) - # except hgvs.exceptions.HGVSUnsupportedOperationError: - # hgvs_genomic_possibilities.append('') - # else: - # stash_tx_left = test_stash_tx_left - # hgvs_genomic_possibilities.append(stash_genomic) - # except hgvs.exceptions.HGVSError as e: - # fn.exceptPass() - # except ValueError: - # fn.exceptPass() - # - # # direct mapping from reverse_normalized transcript insertions in the delins format - # try: - # if hgvs_coding.posedit.edit.type == 'ins': - # most_5pr_hgvs_transcript_variant = copy.deepcopy(hgvs_coding) - # most_3pr_hgvs_transcript_variant = reverse_normalizer.normalize(hgvs_coding) - # try: - # n_3pr = self.vm.c_to_n(most_3pr_hgvs_transcript_variant) - # n_5pr = self.vm.c_to_n(most_5pr_hgvs_transcript_variant) - # except: - # n_3pr = most_3pr_hgvs_transcript_variant - # n_5pr = most_5pr_hgvs_transcript_variant - # # Make into a delins by adding the ref bases to the variant ref and alt - # pr3_ref = self.sf.fetch_seq(hgvs_coding.ac, n_3pr.posedit.pos.start.base - 1, - # n_3pr.posedit.pos.end.base) - # pr5_ref = self.sf.fetch_seq(hgvs_coding.ac, n_5pr.posedit.pos.start.base - 1, - # n_5pr.posedit.pos.end.base) - # most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref - # most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref - # most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[ - # 0] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - # pr3_ref[1] - # most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[ - # 0] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - # pr5_ref[1] - # # Map to the genome - # genomic_from_most_3pr_hgvs_transcript_variant = self.vm.t_to_g( - # most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) - # genomic_from_most_5pr_hgvs_transcript_variant = self.vm.t_to_g( - # most_5pr_hgvs_transcript_variant, hgvs_genomic.ac) - # - # # Normalize - If the variant spans a gap it should then form a static genomic variant - # try: - # genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - # genomic_from_most_3pr_hgvs_transcript_variant) - # except hgvs.exceptions.HGVSInvalidVariantError as e: - # error = str(e) - # if error == 'base start position must be <= end position': - # start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base - # end = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base = end - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base = start - # genomic_from_most_3pr_hgvs_transcript_variant = hn.normalize( - # genomic_from_most_3pr_hgvs_transcript_variant) - # try: - # genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - # genomic_from_most_5pr_hgvs_transcript_variant) - # except hgvs.exceptions.HGVSInvalidVariantError as e: - # error = str(e) - # if error == 'base start position must be <= end position': - # start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base - # end = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base = end - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base = start - # genomic_from_most_5pr_hgvs_transcript_variant = hn.normalize( - # genomic_from_most_5pr_hgvs_transcript_variant) - # - # try: - # if genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref - # genomic_from_most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - # genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) - # - # try: - # if most_3pr_hgvs_transcript_variant.posedit.edit.alt is None: - # most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - # most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - # most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref - # most_3pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - # most_3pr_hgvs_transcript_variant_delins_from_dup) - # - # try: - # if genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref - # genomic_from_most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - # genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) - # - # try: - # if most_5pr_hgvs_transcript_variant.posedit.edit.alt is None: - # most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - # most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - # most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref - # most_5pr_hgvs_transcript_variant = self.hp.parse_hgvs_variant( - # most_5pr_hgvs_transcript_variant_delins_from_dup) - # - # if len( - # genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt) < len( - # most_3pr_hgvs_transcript_variant.posedit.edit.alt): - # hgvs_genomic_possibilities.append( - # genomic_from_most_3pr_hgvs_transcript_variant) - # if len( - # genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt) < len( - # most_5pr_hgvs_transcript_variant.posedit.edit.alt): - # hgvs_genomic_possibilities.append( - # genomic_from_most_5pr_hgvs_transcript_variant) - # - # except hgvs.exceptions.HGVSUnsupportedOperationError as e: - # error = str(e) - # if re.match('Normalization of intronic variants is not supported', - # error) or re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # pass - # fn.exceptPass() - # - # # Set variables for problem specific warnings - # gapped_alignment_warning = '' - # corrective_action_taken = '' - # gapped_transcripts = '' - # auto_info = '' - # - # # Mark as not disparity detected - # disparity_deletion_in = ['false', 'false'] - # # Loop through to see if a gap can be located - # possibility_counter = 0 - # for possibility in hgvs_genomic_possibilities: - # possibility_counter = possibility_counter + 1 - # # Loop out stash possibilities which will not spot gaps so are empty - # if possibility == '': - # continue - # - # # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps - # hgvs_genomic_variant = possibility - # stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) - # - # # Reverse normalize hgvs_genomic_variant: NOTE will replace ref - # try: - # reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - # hgvs_genomic_variant) - # except hgvs.exceptions.HGVSError as e: - # # Strange error caused by gap in genomic - # error = str(e) - # if re.search('base start position must be <= end position', error): - # if hgvs_genomic.posedit.edit.type == 'delins': - # start = hgvs_genomic.posedit.pos.start.base - # end = hgvs_genomic.posedit.pos.end.base - # lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - # rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - # hgvs_genomic.posedit.edit.ref = lhb + rhb - # hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - # hgvs_genomic.posedit.pos.start.base = end - # hgvs_genomic.posedit.pos.end.base = start - # reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - # hgvs_genomic) - # if hgvs_genomic.posedit.edit.type == 'del': - # start = hgvs_genomic.posedit.pos.start.base - # end = hgvs_genomic.posedit.pos.end.base - # lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), end - 1, end) - # rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - # hgvs_genomic.posedit.edit.ref = lhb + rhb - # hgvs_genomic.posedit.edit.alt = lhb + rhb - # hgvs_genomic.posedit.pos.start.base = end - # hgvs_genomic.posedit.pos.end.base = start - # reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - # hgvs_genomic) - # if re.search('insertion length must be 1', error): - # if hgvs_genomic.posedit.edit.type == 'ins': - # start = hgvs_genomic.posedit.pos.start.base - # end = hgvs_genomic.posedit.pos.end.base - # ref_bases = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, end) - # lhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start - 1, start) - # rhb = self.sf.fetch_seq(str(hgvs_genomic.ac), start, end) - # hgvs_genomic.posedit.edit.ref = lhb + rhb - # hgvs_genomic.posedit.edit.alt = lhb + hgvs_genomic.posedit.edit.alt + rhb - # reverse_normalized_hgvs_genomic = reverse_normalizer.normalize( - # hgvs_genomic) - # - # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - # # Store a copy for later use - # stored_hgvs_genomic_5pr = copy.deepcopy(hgvs_genomic_5pr) - # - # # Make VCF - # vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, primary_assembly, - # reverse_normalizer, self.sf) - # chr = vcf_dict['chr'] - # pos = vcf_dict['pos'] - # ref = vcf_dict['ref'] - # alt = vcf_dict['alt'] - # - # # Look for exonic gaps within transcript or chromosome - # no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. - # - # # Generate an end position - # end = str(int(pos) + len(ref) - 1) - # pos = str(pos) - # - # # Store a not real deletion insertion to test for gapping - # stored_hgvs_not_delins = self.hp.parse_hgvs_variant(str( - # hgvs_genomic_5pr.ac) + ':' + hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - # v = [chr, pos, ref, alt] - # - # # Save a copy of current hgvs_coding - # try: - # saved_hgvs_coding = no_norm_evm.g_to_t(stored_hgvs_not_delins, - # hgvs_coding.ac) - # except Exception as e: - # if str( - # e) == 'start or end or both are beyond the bounds of transcript record': - # saved_hgvs_coding = hgvs_coding - # continue - # - # # Detect intronic variation using normalization - # intronic_variant = 'false' - # # Look for normalized variant options that do not match hgvs_coding - # if orientation == -1: - # # position genomic at its most 5 prime position - # try: - # query_genomic = reverse_normalizer.normalize(hgvs_genomic) - # except: - # query_genomic = hgvs_genomic - # # Map to the transcript ant test for movement - # try: - # hgvs_seek_var = evm.g_to_t(query_genomic, hgvs_coding.ac) - # except hgvs.exceptions.HGVSError as e: - # hgvs_seek_var = saved_hgvs_coding - # else: - # seek_var = fn.valstr(hgvs_seek_var) - # seek_ac = str(hgvs_seek_var.ac) - # if ( - # hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - # hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - # hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - # hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - # pass - # else: - # hgvs_seek_var = saved_hgvs_coding - # - # elif orientation != -1: - # # position genomic at its most 3 prime position - # try: - # query_genomic = hn.normalize(hgvs_genomic) - # except: - # query_genomic = hgvs_genomic - # # Map to the transcript and test for movement - # try: - # hgvs_seek_var = evm.g_to_t(query_genomic, saved_hgvs_coding.ac) - # except hgvs.exceptions.HGVSError as e: - # hgvs_seek_var = saved_hgvs_coding - # seek_var = fn.valstr(hgvs_seek_var) - # seek_ac = str(hgvs_seek_var.ac) - # if ( - # hgvs_seek_var.posedit.pos.start.base + hgvs_seek_var.posedit.pos.start.offset) > ( - # hgvs_coding.posedit.pos.start.base + hgvs_coding.posedit.pos.start.offset) and ( - # hgvs_seek_var.posedit.pos.end.base + hgvs_seek_var.posedit.pos.end.offset) > ( - # hgvs_coding.posedit.pos.end.base + hgvs_coding.posedit.pos.end.offset) and rec_var != 'false': - # pass - # else: - # hgvs_seek_var = saved_hgvs_coding - # - # try: - # intron_test = hn.normalize(hgvs_seek_var) - # except hgvs.exceptions.HGVSUnsupportedOperationError as e: - # error = str(e) - # if re.match('Normalization of intronic variants is not supported', - # error) or re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # if re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # intronic_variant = 'hard_fail' - # else: - # # Double check to see whether the variant is actually intronic? - # for exon in ori: - # genomic_start = int(exon['alt_start_i']) - # genomic_end = int(exon['alt_end_i']) - # if ( - # hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - # hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - # intronic_variant = 'false' - # break - # else: - # intronic_variant = 'true' - # - # if intronic_variant != 'hard_fail': - # if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', - # str( - # hgvs_seek_var.posedit.pos)) or re.search( - # r'\*\d+\+', str( - # hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( - # hgvs_seek_var.posedit.pos)): - # # Double check to see whether the variant is actually intronic? - # for exon in ori: - # genomic_start = int(exon['alt_start_i']) - # genomic_end = int(exon['alt_end_i']) - # if ( - # hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - # hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - # intronic_variant = 'false' - # break - # else: - # intronic_variant = 'true' - # - # if intronic_variant != 'true': - # # Flag RefSeqGene for ammendment - # # amend_RefSeqGene = 'false' - # # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths - # if stored_hgvs_not_delins != '': - # # Refresh hgvs_not_delins from stored_hgvs_not_delins - # hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) - # # This test will only occur in dup of single base, insertion or substitution - # if not re.search('_', str(hgvs_not_delins.posedit.pos)): - # if re.search('dup', - # hgvs_genomic_5pr.posedit.edit.type) or re.search( - # 'ins', hgvs_genomic_5pr.posedit.edit.type): - # # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos - # plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) - # plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 - # plussed_hgvs_not_delins.posedit.edit.ref = '' - # transcript_variant = no_norm_evm.g_to_t(plussed_hgvs_not_delins, - # str( - # saved_hgvs_coding.ac)) - # if (( - # transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( - # hgvs_genomic_5pr.posedit.pos.end.base - hgvs_genomic_5pr.posedit.pos.start.base)): - # if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # start = hgvs_not_delins.posedit.pos.start.base - 1 - # end = hgvs_not_delins.posedit.pos.end.base - # ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - # end) - # hgvs_not_delins.posedit.edit.ref = ref_bases - # hgvs_not_delins.posedit.edit.alt = ref_bases[ - # :1] + hgvs_not_delins.posedit.edit.alt[ - # 1:] + ref_bases[ - # 1:] - # elif re.search('ins', str( - # hgvs_genomic_5pr.posedit.edit)) and re.search('del', - # str( - # hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # elif re.search('ins', str( - # hgvs_genomic_5pr.posedit.edit)) and not re.search( - # 'del', - # str( - # hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # start = hgvs_not_delins.posedit.pos.start.base - 1 - # end = hgvs_not_delins.posedit.pos.end.base - # ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - # end) - # hgvs_not_delins.posedit.edit.ref = ref_bases - # hgvs_not_delins.posedit.edit.alt = ref_bases[ - # :1] + hgvs_not_delins.posedit.edit.alt[ - # 1:] + ref_bases[ - # 1:] - # else: - # if re.search('dup', str(hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # start = hgvs_not_delins.posedit.pos.start.base - 1 - # end = hgvs_not_delins.posedit.pos.end.base - # ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - # end) - # hgvs_not_delins.posedit.edit.ref = ref_bases - # hgvs_not_delins.posedit.edit.alt = ref_bases[ - # :1] + hgvs_not_delins.posedit.edit.alt[ - # 1:] + ref_bases[ - # 1:] - # elif re.search('ins', str( - # hgvs_genomic_5pr.posedit.edit)) and re.search('del', - # str( - # hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # elif re.search('ins', str( - # hgvs_genomic_5pr.posedit.edit)) and not re.search( - # 'del', - # str( - # hgvs_genomic_5pr.posedit.edit)): - # hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - # start = hgvs_not_delins.posedit.pos.start.base - 1 - # end = hgvs_not_delins.posedit.pos.end.base - # ref_bases = self.sf.fetch_seq(str(hgvs_not_delins.ac), start, - # end) - # hgvs_not_delins.posedit.edit.ref = ref_bases - # hgvs_not_delins.posedit.edit.alt = ref_bases[ - # :1] + hgvs_not_delins.posedit.edit.alt[ - # 1:] + ref_bases[ - # 1:] - # else: - # pass - # else: - # pass - # tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - # saved_hgvs_coding.ac) - # # Create normalized version of tx_hgvs_not_delins - # rn_tx_hgvs_not_delins = copy.deepcopy(tx_hgvs_not_delins) - # # Check for +1 base and adjust - # if re.search(r'\+', - # str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - # r'\+', - # str( - # rn_tx_hgvs_not_delins.posedit.pos.start)): - # # Remove offsetting to span the gap - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # try: - # rn_tx_hgvs_not_delins.posedit.edit.alt = '' - # except: - # fn.exceptPass() - # - # elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # # move tx end base to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.base = tx_hgvs_not_delins.posedit.pos.end.base + 1 - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - # no_norm_evm, hn) - # rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - # str( - # saved_hgvs_coding.ac)) - # elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # # move tx start base to previous available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - # no_norm_evm, hn) - # rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - # str( - # saved_hgvs_coding.ac)) - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # # else: - # # pass - # - # # Check for -ve base and adjust - # elif re.search(r'\-', - # str( - # rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - # r'\-', - # str( - # rn_tx_hgvs_not_delins.posedit.pos.start)): - # # Remove offsetting to span the gap - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.end.base = rn_tx_hgvs_not_delins.posedit.pos.end.base + 1 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # try: - # rn_tx_hgvs_not_delins.posedit.edit.alt = '' - # except: - # fn.exceptPass() - # elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - # # move tx end base back to next available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.end.offset = 0 - # # Delete the ref - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # # Add the additional base to the ALT - # start = rn_tx_hgvs_not_delins.posedit.pos.end.base - 1 - # end = rn_tx_hgvs_not_delins.posedit.pos.end.base - # ref_bases = self.sf.fetch_seq(str(tx_hgvs_not_delins.ac), start, end) - # rn_tx_hgvs_not_delins.posedit.edit.alt = rn_tx_hgvs_not_delins.posedit.edit.alt + ref_bases - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - # no_norm_evm, hn) - # rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - # str( - # saved_hgvs_coding.ac)) - # elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - # # move tx start base to previous available non-offset base - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 - # rn_tx_hgvs_not_delins.posedit.edit.ref = '' - # if re.match('NM_', str(rn_tx_hgvs_not_delins)): - # test_tx_var = no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) - # else: - # test_tx_var = rn_tx_hgvs_not_delins - # # re-make genomic and tx - # hgvs_not_delins = self.myvm_t_to_g(test_tx_var, alt_chr, - # no_norm_evm, hn) - # rn_tx_hgvs_not_delins = no_norm_evm.g_to_n(hgvs_not_delins, - # str( - # saved_hgvs_coding.ac)) - # rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 - # else: - # fn.exceptPass() - # - # # Logic - # if len(hgvs_not_delins.posedit.edit.ref) < len( - # rn_tx_hgvs_not_delins.posedit.edit.ref): - # gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - # hgvs_not_delins.posedit.edit.ref) - # disparity_deletion_in = ['chromosome', gap_length] - # elif len(hgvs_not_delins.posedit.edit.ref) > len( - # rn_tx_hgvs_not_delins.posedit.edit.ref): - # gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - # rn_tx_hgvs_not_delins.posedit.edit.ref) - # disparity_deletion_in = ['transcript', gap_length] - # else: - # re_capture_tx_variant = [] - # for possibility in hgvs_genomic_possibilities: - # if possibility == '': - # continue - # hgvs_t_possibility = self.vm.g_to_t(possibility, hgvs_coding.ac) - # if hgvs_t_possibility.posedit.edit.type == 'ins': - # try: - # hgvs_t_possibility = self.vm.c_to_n(hgvs_t_possibility) - # except: - # continue - # if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: - # continue - # ins_ref = self.sf.fetch_seq(hgvs_t_possibility.ac, - # hgvs_t_possibility.posedit.pos.start.base - 1, - # hgvs_t_possibility.posedit.pos.start.base + 1) - # try: - # hgvs_t_possibility = self.vm.n_to_c(hgvs_t_possibility) - # except: - # continue - # hgvs_t_possibility.posedit.edit.ref = ins_ref - # hgvs_t_possibility.posedit.edit.alt = ins_ref[ - # 0] + hgvs_t_possibility.posedit.edit.alt + \ - # ins_ref[1] - # if possibility.posedit.edit.type == 'ins': - # ins_ref = self.sf.fetch_seq(possibility.ac, - # possibility.posedit.pos.start.base - 1, - # possibility.posedit.pos.end.base) - # possibility.posedit.edit.ref = ins_ref - # possibility.posedit.edit.alt = ins_ref[ - # 0] + possibility.posedit.edit.alt + \ - # ins_ref[1] - # if len(hgvs_t_possibility.posedit.edit.ref) < len( - # possibility.posedit.edit.ref): - # gap_length = len(possibility.posedit.edit.ref) - len( - # hgvs_t_possibility.posedit.edit.ref) - # re_capture_tx_variant = ['transcript', gap_length, - # hgvs_t_possibility] - # hgvs_not_delins = possibility - # hgvs_genomic_5pr = possibility - # break - # - # if re_capture_tx_variant != []: - # try: - # tx_hgvs_not_delins = self.vm.c_to_n(re_capture_tx_variant[2]) - # except: - # tx_hgvs_not_delins = re_capture_tx_variant[2] - # disparity_deletion_in = re_capture_tx_variant[0:-1] - # else: - # pass - # - # # Final sanity checks - # try: - # self.vm.g_to_t(hgvs_not_delins, tx_hgvs_not_delins.ac) - # except Exception as e: - # if str( - # e) == 'start or end or both are beyond the bounds of transcript record': - # continue - # try: - # hn.normalize(tx_hgvs_not_delins) - # except hgvs.exceptions.HGVSUnsupportedOperationError as e: - # error = str(e) - # - # if re.match('Normalization of intronic variants is not supported', - # error) or re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # if re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # continue - # elif re.match('Normalization of intronic variants is not supported', - # error): - # # We know that this cannot be because of an intronic variant, so must be aligned to tx gap - # disparity_deletion_in = ['transcript', 'Requires Analysis'] - # - # # Recreate hgvs_genomic - # if disparity_deletion_in[0] == 'transcript': - # hgvs_genomic = hgvs_not_delins - # - # # Find oddly placed gaps where the tx variant is encompassed in the gap - # if disparity_deletion_in[0] == 'false' and ( - # possibility_counter == 3 or possibility_counter == 4): - # rg = reverse_normalizer.normalize(hgvs_not_delins) - # rtx = self.vm.g_to_t(rg, tx_hgvs_not_delins.ac) - # fg = hn.normalize(hgvs_not_delins) - # ftx = self.vm.g_to_t(fg, tx_hgvs_not_delins.ac) - # if ( - # rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( - # ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - # exons = self.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, alt_aln_method) - # exonic = False - # for ex_test in exons: - # if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - # 7]) and ftx.posedit.pos.end.base in range(ex_test[6], - # ex_test[7]): - # exonic = True - # if exonic is True: - # hgvs_not_delins = fg - # hgvs_genomic = fg - # hgvs_genomic_5pr = fg - # try: - # tx_hgvs_not_delins = self.vm.c_to_n(ftx) - # except Exception: - # tx_hgvs_not_delins = ftx - # disparity_deletion_in = ['transcript', 'Requires Analysis'] - # - # # Pre-processing of tx_hgvs_not_delins - # try: - # if tx_hgvs_not_delins.posedit.edit.alt is None: - # tx_hgvs_not_delins.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # tx_hgvs_not_delins_delins_from_dup = tx_hgvs_not_delins.ac + ':' + tx_hgvs_not_delins.type + '.' + str( - # tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - # tx_hgvs_not_delins.posedit.pos.end) + 'del' + tx_hgvs_not_delins.posedit.edit.ref + 'ins' + tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins.posedit.edit.ref - # tx_hgvs_not_delins = self.hp.parse_hgvs_variant( - # tx_hgvs_not_delins_delins_from_dup) - # - # if disparity_deletion_in[0] == 'transcript': - # # amend_RefSeqGene = 'true' - # # ANY VARIANT WHOLLY WITHIN THE GAP - # if (re.search(r'\+', - # str(tx_hgvs_not_delins.posedit.pos.start)) or re.search( - # r'\-', str(tx_hgvs_not_delins.posedit.pos.start))) and ( - # re.search(r'\+', - # str(tx_hgvs_not_delins.posedit.pos.end)) or re.search( - # r'\-', str(tx_hgvs_not_delins.posedit.pos.end))): - # gapped_transcripts = gapped_transcripts + ' ' + str( - # tx_hgvs_not_delins.ac) - # - # # Copy the current variant - # tx_gap_fill_variant = copy.deepcopy(tx_hgvs_not_delins) - # try: - # if tx_gap_fill_variant.posedit.edit.alt is None: - # tx_gap_fill_variant.posedit.edit.alt = '' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( - # tx_gap_fill_variant.posedit.pos.start) + '_' + str( - # tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref - # tx_gap_fill_variant = self.hp.parse_hgvs_variant( - # tx_gap_fill_variant_delins_from_dup) - # - # # Identify which half of the NOT-intron the start position of the variant is in - # if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): - # tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 - # tx_gap_fill_variant.posedit.pos.start.offset = int( - # '0') # int('+1') - # tx_gap_fill_variant.posedit.pos.end.offset = int( - # '0') # int('-1') - # tx_gap_fill_variant.posedit.edit.alt = '' - # tx_gap_fill_variant.posedit.edit.ref = '' - # elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): - # tx_gap_fill_variant.posedit.pos.start.offset = int( - # '0') # int('+1') - # tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 - # tx_gap_fill_variant.posedit.pos.end.offset = int( - # '0') # int('-1') - # tx_gap_fill_variant.posedit.edit.alt = '' - # tx_gap_fill_variant.posedit.edit.ref = '' - # - # try: - # tx_gap_fill_variant = self.vm.n_to_c(tx_gap_fill_variant) - # except: - # fn.exceptPass() - # genomic_gap_fill_variant = self.vm.t_to_g(tx_gap_fill_variant, - # reverse_normalized_hgvs_genomic.ac) - # genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref - # - # try: - # c_tx_hgvs_not_delins = self.vm.n_to_c(tx_hgvs_not_delins) - # except Exception: - # c_tx_hgvs_not_delins = copy.copy(tx_hgvs_not_delins) - # genomic_gap_fill_variant_alt = self.vm.t_to_g(c_tx_hgvs_not_delins, - # hgvs_genomic_5pr.ac) - # - # # Ensure an ALT exists - # try: - # if genomic_gap_fill_variant_alt.posedit.edit.alt is None: - # genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' - # except Exception as e: - # if str(e) == "'Dup' object has no attribute 'alt'": - # genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( - # genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - # genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref - # genomic_gap_fill_variant = self.hp.parse_hgvs_variant( - # genomic_gap_fill_variant_delins_from_dup) - # genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( - # genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - # genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref - # genomic_gap_fill_variant_alt = self.hp.parse_hgvs_variant( - # genomic_gap_fill_variant_alt_delins_from_dup) - # - # # Correct insertion alts - # if genomic_gap_fill_variant_alt.posedit.edit.type == 'ins': - # append_ref = self.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, - # genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, - # genomic_gap_fill_variant_alt.posedit.pos.end.base) - # genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - # 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - # append_ref[1] - # - # # Split the reference and replacing alt sequence into a dictionary - # reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) - # if genomic_gap_fill_variant_alt.posedit.edit.alt is not None: - # alternate_bases = list( - # genomic_gap_fill_variant_alt.posedit.edit.alt) - # else: - # # Deletions with no ins - # pre_alternate_bases = list( - # genomic_gap_fill_variant_alt.posedit.edit.ref) - # alternate_bases = [] - # for base in pre_alternate_bases: - # alternate_bases.append('X') - # - # # Create the dictionaries - # ref_start = genomic_gap_fill_variant.posedit.pos.start.base - # alt_start = genomic_gap_fill_variant_alt.posedit.pos.start.base - # ref_base_dict = {} - # for base in reference_bases: - # ref_base_dict[ref_start] = str(base) - # ref_start = ref_start + 1 - # - # alt_base_dict = {} - # - # # NEED TO SEARCH FOR RANGE = and replace with interval_range - # # Need to search for int and replace with integer - # - # # Note, all variants will be forced into the format delete insert - # # Deleted bases in the ALT will be substituted for X - # for integer in range( - # genomic_gap_fill_variant_alt.posedit.pos.start.base, - # genomic_gap_fill_variant_alt.posedit.pos.end.base + 1, 1): - # if integer == alt_start: - # alt_base_dict[integer] = str(''.join(alternate_bases)) - # else: - # alt_base_dict[integer] = 'X' - # - # # Generate the alt sequence - # alternate_sequence_bases = [] - # for integer in range( - # genomic_gap_fill_variant.posedit.pos.start.base, - # genomic_gap_fill_variant.posedit.pos.end.base + 1, - # 1): - # if integer in list(alt_base_dict.keys()): - # alternate_sequence_bases.append(alt_base_dict[integer]) - # else: - # alternate_sequence_bases.append(ref_base_dict[integer]) - # alternate_sequence = ''.join(alternate_sequence_bases) - # alternate_sequence = alternate_sequence.replace('X', '') - # - # # Add the new alt to the gap fill variant and generate transcript variant - # genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - # hgvs_refreshed_variant = self.vm.g_to_t(genomic_gap_fill_variant, - # tx_gap_fill_variant.ac) - # - # # Set warning - # gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) - # disparity_deletion_in[1] = [gap_size] - # auto_info = auto_info + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - # if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): - # gps = for_location_c.posedit.pos.start.base - 1 - # gpe = for_location_c.posedit.pos.start.base - # else: - # gps = for_location_c.posedit.pos.start.base - # gpe = for_location_c.posedit.pos.start.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str( - # gpe) + '\n' - # auto_info = auto_info + '%s' % (gap_position) - # - # else: - # if tx_hgvs_not_delins.posedit.pos.start.offset == 0 and tx_hgvs_not_delins.posedit.pos.end.offset == 0: - # # In this instance, we have identified a transcript gap but the n. version of - # # the transcript variant but do not have a position which actually hits the gap, - # # so the variant likely spans the gap, and is not picked up by an offset. - # try: - # c1 = self.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # g1 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - # g3 = self.nr_vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - # ng2 = hn.normalize(g2) - # g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - # len(g3.posedit.edit.ref) - 1) - # try: - # c2 = self.vm.g_to_t(g3, c1.ac) - # if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: - # pass - # else: - # tx_hgvs_not_delins = c2 - # try: - # tx_hgvs_not_delins = self.vm.c_to_n(tx_hgvs_not_delins) - # except hgvs.exceptions.HGVSError: - # fn.exceptPass() - # except hgvs.exceptions.HGVSInvalidVariantError: - # fn.exceptPass() - # - # if re.search(r'\+', str( - # tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - # r'\+', - # str( - # tx_hgvs_not_delins.posedit.pos.end)): - # auto_info = auto_info + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # try: - # c2 = self.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c2 = tx_hgvs_not_delins - # c1 = copy.deepcopy(c2) - # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # c1.posedit.pos.start.offset = 0 - # c1.posedit.pos.end = c2.posedit.pos.start - # c1.posedit.edit.ref = '' - # c1.posedit.edit.alt = '' - # if orientation != -1: - # g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # else: - # g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = self.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.start.base - # gpe = for_location_c.posedit.pos.start.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str( - # gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\+', str( - # tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\+', - # str( - # tx_hgvs_not_delins.posedit.pos.start)): - # auto_info = auto_info + 'Genome position ' + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - # tx_hgvs_not_delins.ac) - # gapped_transcripts = gapped_transcripts + ' ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # try: - # c1 = self.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # c2 = copy.deepcopy(c1) - # c2.posedit.pos.start = c1.posedit.pos.end - # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - # c2.posedit.pos.end.offset = 0 - # c2.posedit.edit.ref = '' - # c2.posedit.edit.alt = '' - # if orientation != -1: - # g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # else: - # g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = self.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.end.base - # gpe = for_location_c.posedit.pos.end.base + 1 - # gap_position = ' between positions c.' + str(gps) + '_' + str( - # gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\-', str( - # tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - # r'\-', - # str( - # tx_hgvs_not_delins.posedit.pos.end)): - # auto_info = auto_info + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # try: - # c2 = self.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c2 = tx_hgvs_not_delins - # c1 = copy.deepcopy(c2) - # c1.posedit.pos.start.base = c2.posedit.pos.start.base - 1 - # c1.posedit.pos.start.offset = 0 - # c1.posedit.pos.end = c2.posedit.pos.start - # c1.posedit.edit.ref = '' - # c1.posedit.edit.alt = '' - # if orientation != -1: - # g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # else: - # g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = self.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.start.base - 1 - # gpe = for_location_c.posedit.pos.start.base - # gap_position = ' between positions c.' + str(gps) + '_' + str( - # gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # elif re.search(r'\-', str( - # tx_hgvs_not_delins.posedit.pos.end)) and not re.search(r'\-', - # str( - # tx_hgvs_not_delins.posedit.pos.start)): - # auto_info = auto_info + 'Genome position ' + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( - # disparity_deletion_in[1]) + '-bp gap in transcript ' + str( - # tx_hgvs_not_delins.ac) - # gapped_transcripts = gapped_transcripts + ' ' + str( - # tx_hgvs_not_delins.ac) - # non_valid_caution = 'true' - # try: - # c1 = self.vm.n_to_c(tx_hgvs_not_delins) - # except: - # c1 = tx_hgvs_not_delins - # c2 = copy.deepcopy(c1) - # c2.posedit.pos.start = c1.posedit.pos.end - # c2.posedit.pos.end.base = c1.posedit.pos.end.base + 1 - # c2.posedit.pos.end.offset = 0 - # c2.posedit.edit.ref = '' - # c2.posedit.edit.alt = '' - # if orientation != -1: - # g1 = self.vm.t_to_g(c1, hgvs_genomic.ac) - # g2 = self.vm.t_to_g(c2, hgvs_genomic.ac) - # g2.posedit.edit.alt = g2.posedit.edit.ref - # else: - # g1 = self.vm.t_to_g(c2, hgvs_genomic.ac) - # g2 = self.vm.t_to_g(c1, hgvs_genomic.ac) - # g1.posedit.edit.alt = g1.posedit.edit.ref - # reference = g1.posedit.edit.ref + g2.posedit.edit.ref[1:] - # alternate = g1.posedit.edit.alt + g2.posedit.edit.alt[1:] - # g3 = copy.deepcopy(g1) - # g3.posedit.pos.end.base = g2.posedit.pos.end.base - # g3.posedit.edit.ref = reference - # g3.posedit.edit.alt = alternate - # c3 = self.vm.g_to_t(g3, c1.ac) - # hgvs_refreshed_variant = c3 - # # Alignment position - # for_location_c = copy.deepcopy(hgvs_refreshed_variant) - # if re.match('NM_', str(for_location_c)): - # for_location_c = no_norm_evm.n_to_c(tx_hgvs_not_delins) - # gps = for_location_c.posedit.pos.end.base - 1 - # gpe = for_location_c.posedit.pos.end.base - # gap_position = ' between positions c.' + str(gps) + '_' + str( - # gpe) + '\n' - # # Warn update - # auto_info = auto_info + '%s' % (gap_position) - # else: - # auto_info = auto_info + str( - # stored_hgvs_not_delins.ac) + ':g.' + str( - # stored_hgvs_not_delins.posedit.pos) + ' contains ' + str( - # disparity_deletion_in[ - # 1]) + ' genomic base(s) that fail to align to transcript ' + str( - # tx_hgvs_not_delins.ac) + '\n' - # hgvs_refreshed_variant = tx_hgvs_not_delins - # - # # GAP IN THE CHROMOSOME - # elif disparity_deletion_in[0] == 'chromosome': - # # amend_RefSeqGene = 'true' - # if possibility_counter == 3: - # hgvs_refreshed_variant = stash_tx_right - # elif possibility_counter == 4: - # hgvs_refreshed_variant = stash_tx_left - # else: - # hgvs_refreshed_variant = chromosome_normalized_hgvs_coding - # # Warn - # auto_info = auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - # hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - # disparity_deletion_in[ - # 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - # hgvs_genomic.ac) + '\n' - # else: - # # Keep the same by re-setting rel_var - # hgvs_refreshed_variant = hgvs_coding - # # amend_RefSeqGene = 'false' - # - # # Edit the output - # if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', - # str( - # hgvs_refreshed_variant.type)): - # hgvs_refreshed_variant = no_norm_evm.n_to_c(hgvs_refreshed_variant) - # else: - # pass - # - # try: - # hn.normalize(hgvs_refreshed_variant) - # except Exception as e: - # error = str(e) - # # Ensure the final variant is not intronic nor does it cross exon boundaries - # if re.match('Normalization of intronic variants is not supported', - # error) or re.match( - # 'Unsupported normalization of variants spanning the exon-intron boundary', - # error): - # hgvs_refreshed_variant = saved_hgvs_coding - # else: - # continue - # - # # Quick check to make sure the coding variant has not changed - # try: - # to_test = hn.normalize(hgvs_refreshed_variant) - # except: - # to_test = hgvs_refreshed_variant - # if str(to_test.posedit.edit) != str(hgvs_coding.posedit.edit): - # # Try the next available genomic option - # if hgvs_coding.posedit.edit.type == 'identity' and to_test.posedit.edit.type == 'identity': - # hgvs_coding = to_test - # else: - # continue - # - # # Update hgvs_genomic - # hgvs_alt_genomic = self.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, - # no_norm_evm, hn) - # if hgvs_alt_genomic.posedit.edit.type == 'identity': - # re_c = self.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) - # if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - # shuffle_left_g = copy.copy(hgvs_alt_genomic) - # shuffle_left_g.posedit.edit.ref = '' - # shuffle_left_g.posedit.edit.alt = '' - # shuffle_left_g.posedit.pos.start.base = shuffle_left_g.posedit.pos.start.base - 1 - # shuffle_left_g.posedit.pos.end.base = shuffle_left_g.posedit.pos.end.base - 1 - # shuffle_left_g = reverse_normalizer.normalize(shuffle_left_g) - # re_c = self.vm.g_to_t(shuffle_left_g, hgvs_refreshed_variant.ac) - # if (hn.normalize(re_c)) != (hn.normalize(hgvs_refreshed_variant)): - # hgvs_alt_genomic = shuffle_left_g - # - # # If it is intronic, these vairables will not have been set - # else: - # # amend_RefSeqGene = 'false' - # no_normalized_c = 'false' - # - # # Break if gap has been detected - # if disparity_deletion_in[0] != 'false': - # break - # - # # Normailse hgvs_genomic - # try: - # hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - # except hgvs.exceptions.HGVSError as e: - # # Strange error caused by gap in genomic - # error = str(e) - # if re.search('base start position must be <= end position', error) and \ - # disparity_deletion_in[0] == 'chromosome': - # if hgvs_alt_genomic.posedit.edit.type == 'delins': - # start = hgvs_alt_genomic.posedit.pos.start.base - # end = hgvs_alt_genomic.posedit.pos.end.base - # lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - # rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - # hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - # hgvs_alt_genomic.posedit.edit.alt = lhb + hgvs_alt_genomic.posedit.edit.alt + rhb - # hgvs_alt_genomic.posedit.pos.start.base = end - # hgvs_alt_genomic.posedit.pos.end.base = start - # hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - # if hgvs_alt_genomic.posedit.edit.type == 'del': - # start = hgvs_alt_genomic.posedit.pos.start.base - # end = hgvs_alt_genomic.posedit.pos.end.base - # lhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), end - 1, end) - # rhb = self.sf.fetch_seq(str(hgvs_alt_genomic.ac), start - 1, start) - # hgvs_alt_genomic.posedit.edit.ref = lhb + rhb - # hgvs_alt_genomic.posedit.edit.alt = lhb + rhb - # hgvs_alt_genomic.posedit.pos.start.base = end - # hgvs_alt_genomic.posedit.pos.end.base = start - # hgvs_alt_genomic = hn.normalize(hgvs_alt_genomic) - # - # # Refresh the :g. variant - # multi_g.append(hgvs_alt_genomic) - # else: - # multi_g.append(hgvs_alt_genomic) - # corrective_action_taken = 'false' - # - # # In this instance, the gap code has generally found an incomplete-alignment rather than a - # # truly gapped alignment. - # except KeyError: - # warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ - # 'genomic reference sequence %s' % (hgvs_coding.ac, - # alt_chr) - # continue - # except hgvs.exceptions.HGVSError as e: - # exc_type, exc_value, last_traceback = sys.exc_info() - # te = traceback.format_exc() - # error = str(te) - # logger.error(str(exc_type) + " " + str(exc_value)) - # logger.debug(error) - # continue - # - # if multi_g != []: - # - # multi_gen_vars = multi_g # '|'.join(multi_g) - # else: - # multi_gen_vars = [] else: # HGVS genomic in the absence of a transcript variant if genomic_variant != '': From ca515dfd251427710f2b82347058fdb6d9fe9a97 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 26 Apr 2019 16:57:57 +0100 Subject: [PATCH 080/223] Manually cleaned up first gap_mapping section --- VariantValidator/modules/gapped_mapping.py | 201 +++++++++------------ 1 file changed, 87 insertions(+), 114 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index af9c7d53..ecd6cefc 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -44,7 +44,6 @@ def gapped_g_to_c(self, rel_var): # VCF vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, self.variant.reverse_normalizer, self.validator.sf) - chr = vcf_dict['chr'] pos = vcf_dict['pos'] ref = vcf_dict['ref'] alt = vcf_dict['alt'] @@ -67,10 +66,9 @@ def gapped_g_to_c(self, rel_var): c.posedit.edit.ref = c.posedit.edit.ref.upper() if hasattr(c.posedit.edit, 'alt') and c.posedit.edit.alt is not None: c.posedit.edit.alt = c.posedit.edit.alt.upper() - stash_input = self.validator.myevm_t_to_g(c, self.variant.no_norm_evm, self.variant.primary_assembly, self.variant.hn) - if re.match('NC_', str(stash_input)) or re.match('NT_', str(stash_input)) or re.match('NW_', - str( - stash_input)): + stash_input = self.validator.myevm_t_to_g(c, self.variant.no_norm_evm, + self.variant.primary_assembly, self.variant.hn) + if 'NC_' in str(stash_input) or 'NT_' in str(stash_input) or 'NW_' in str(stash_input): try: hgvs_stash = self.validator.hp.parse_hgvs_variant(stash_input) except: @@ -80,10 +78,9 @@ def gapped_g_to_c(self, rel_var): if hasattr(hgvs_stash.posedit.edit, 'alt') and hgvs_stash.posedit.edit.alt is not None: hgvs_stash.posedit.edit.alt = hgvs_stash.posedit.edit.alt.upper() - stash_ac = hgvs_stash.ac # MAKE A NO NORM HGVS2VCF - stash_dict = vvHGVS.pos_lock_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.reverse_normalizer, - self.validator.sf) + stash_dict = vvHGVS.pos_lock_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) stash_ac = hgvs_stash.ac stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] @@ -92,14 +89,11 @@ def gapped_g_to_c(self, rel_var): stash_end = str(stash_pos + len(stash_ref) - 1) # Store a not real deletion insertion - stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( - self.hgvs_genomic_5pr.ac) + ':' + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - stash_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( - stash_ac + ':' + self.hgvs_genomic_5pr.type + '.' + str( - stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) - - # Set non-valid caution to false - non_valid_caution = 'false' + stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(self.hgvs_genomic_5pr.ac + ':' + + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + assert stored_hgvs_not_delins != '' + stash_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(stash_ac + ':' + + self.hgvs_genomic_5pr.type + '.' + str(stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) # make an empty rel_var nw_rel_var = [] @@ -121,7 +115,7 @@ def gapped_g_to_c(self, rel_var): # Get orientation of the gene wrt genome and a list of exons mapped to the genome ori = self.validator.tx_exons(tx_ac=saved_hgvs_coding.ac, alt_ac=self.hgvs_genomic_5pr.ac, - alt_aln_method=self.validator.alt_aln_method) + alt_aln_method=self.validator.alt_aln_method) self.orientation = int(ori[0]['alt_strand']) intronic_variant = 'false' @@ -136,56 +130,49 @@ def gapped_g_to_c(self, rel_var): hgvs_seek_var = saved_hgvs_coding try: - intron_test = self.variant.hn.normalize(hgvs_seek_var) + self.variant.hn.normalize(hgvs_seek_var) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) - if re.match('Normalization of intronic variants is not supported', error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): + if 'Normalization of intronic variants is not supported' in error or \ + 'Unsupported normalization of variants spanning the exon-intron boundary' in error: + if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: intronic_variant = 'hard_fail' else: # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) - if ( - self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + if genomic_start < self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end and \ + genomic_start < self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end: intronic_variant = 'false' break else: intronic_variant = 'true' - if intronic_variant != 'hard_fail': - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', - str(hgvs_seek_var.posedit.pos)): - # Double check to see whether the variant is actually intronic? - for exon in ori: - genomic_start = int(exon['alt_start_i']) - genomic_end = int(exon['alt_end_i']) - if ( - self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): - intronic_variant = 'false' - break - else: - intronic_variant = 'true' - - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + # if intronic_variant != 'hard_fail': + # if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + # hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + # hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): + # # Double check to see whether the variant is actually intronic? + # for exon in ori: + # genomic_start = int(exon['alt_start_i']) + # genomic_end = int(exon['alt_end_i']) + # if (genomic_start < self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + # genomic_start < self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + # intronic_variant = 'false' + # break + # else: + # intronic_variant = 'true' + + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+-', str( hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) - if ( - self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + if (genomic_start < self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + genomic_start < self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: @@ -193,66 +180,62 @@ def gapped_g_to_c(self, rel_var): # If exonic, process if intronic_variant != 'true': - # map form reverse normalized g. to c. - hgvs_from_5n_g = self.variant.no_norm_evm.g_to_t(self.hgvs_genomic_5pr, saved_hgvs_coding.ac) - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + # Attempt to find gaps in reference sequence by catching disparity in genome length and + # overlapping transcript lengths self.disparity_deletion_in = ['false', 'false'] if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, self.hgvs_genomic_5pr, - saved_hgvs_coding) + saved_hgvs_coding) try: self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) except hgvs.exceptions.HGVSInvalidIntervalError: self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(self.hgvs_genomic_5pr, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError: + except hgvs.exceptions.HGVSError as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': self.tx_hgvs_not_delins = saved_hgvs_coding # Create normalized version of tx_hgvs_not_delins rn_tx_hgvs_not_delins = copy.deepcopy(self.tx_hgvs_not_delins) # Check for +ve base and adjust - if (re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) or re.search( - r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end))): + if ('+' in str(rn_tx_hgvs_not_delins.posedit.pos.start) or '-' in str(rn_tx_hgvs_not_delins.posedit.pos.start)) and ( + '+' in str(rn_tx_hgvs_not_delins.posedit.pos.end) or '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end)): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) + elif '+' in str(rn_tx_hgvs_not_delins.posedit.pos.end): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + elif '+' in str(rn_tx_hgvs_not_delins.posedit.pos.start): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding) # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end) and '-' in str(rn_tx_hgvs_not_delins.posedit.pos.start): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) - - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding) + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.start): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): + if len(hgvs_not_delins.posedit.edit.ref) < len(rn_tx_hgvs_not_delins.posedit.edit.ref): gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( hgvs_not_delins.posedit.edit.ref) self.disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): + elif len(hgvs_not_delins.posedit.edit.ref) > len(rn_tx_hgvs_not_delins.posedit.edit.ref): gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( rn_tx_hgvs_not_delins.posedit.edit.ref) self.disparity_deletion_in = ['transcript', gap_length] else: hgvs_stash_t = self.validator.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) - if len(stash_hgvs_not_delins.posedit.edit.ref) > len( - hgvs_stash_t.posedit.edit.ref): + if len(stash_hgvs_not_delins.posedit.edit.ref) > len(hgvs_stash_t.posedit.edit.ref): try: self.variant.hn.normalize(hgvs_stash_t) except: @@ -274,8 +257,6 @@ def gapped_g_to_c(self, rel_var): self.tx_hgvs_not_delins = hgvs_stash_t hgvs_not_delins = stash_hgvs_not_delins self.hgvs_genomic_5pr = stash_hgvs_not_delins - else: - pass # Final sanity checks try: @@ -289,16 +270,12 @@ def gapped_g_to_c(self, rel_var): self.variant.hn.normalize(self.tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): + if 'Normalization of intronic variants is not supported' in error or \ + 'Unsupported normalization of variants spanning the exon-intron boundary' in error: + if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: hgvs_not_delins = saved_hgvs_coding self.disparity_deletion_in = ['false', 'false'] - elif re.match('Normalization of intronic variants is not supported', error): + elif 'Normalization of intronic variants is not supported' in error: # We know that this cannot be because of an intronic variant, so must be aligned to tx gap self.disparity_deletion_in = ['transcript', 'Requires Analysis'] logger.warning(error) @@ -315,41 +292,41 @@ def gapped_g_to_c(self, rel_var): # GAP IN THE TRANSCRIPT DISPARITY DETECTED if self.disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - self.hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly - # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, stored_hgvs_not_delins, self.variant.hgvs_genomic, 1) + gapped_alignment_warning = str(self.hgvs_genomic_5pr) + ' does not represent a true variant ' \ + 'because it is an artefact of aligning the transcripts listed below with genome build ' + \ + self.variant.primary_assembly + # ANY VARIANT WHOLLY WITHIN THE GAP + hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, + stored_hgvs_not_delins, self.variant.hgvs_genomic, 1) # GAP IN THE CHROMOSOME elif self.disparity_deletion_in[0] == 'chromosome': # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - self.hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + gapped_alignment_warning = str(self.hgvs_genomic_5pr) + ' does not represent a true variant ' \ + 'because it is an artefact of aligning the transcripts listed below with genome build ' + \ + self.variant.primary_assembly hgvs_refreshed_variant = self.tx_hgvs_not_delins # Warn - self.auto_info = self.auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(self.disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - self.variant.hgvs_genomic.ac) + '\n' + self.auto_info += str(hgvs_refreshed_variant.ac) + ':c.' + str(hgvs_refreshed_variant.posedit.pos)+\ + ' contains ' + str(self.disparity_deletion_in[1]) + ' transcript base(s) that fail to align ' \ + 'to chromosome ' + str(self.variant.hgvs_genomic.ac) + '\n' self.gapped_transcripts = self.gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' else: # Try the push hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) stash_ac = hgvs_stash.ac # Make a hard left and hard right not delins g. - stash_dict_right = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, - self.validator.sf) + stash_dict_right = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, + self.variant.hn, self.validator.sf) stash_pos_right = int(stash_dict_right['pos']) stash_ref_right = stash_dict_right['ref'] stash_alt_right = stash_dict_right['alt'] stash_end_right = str(stash_pos_right + len(stash_ref_right) - 1) - stash_hgvs_not_delins_right = self.validator.hp.parse_hgvs_variant( - stash_ac + ':' + hgvs_stash.type + '.' + str( - stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) + stash_hgvs_not_delins_right = self.validator.hp.parse_hgvs_variant(stash_ac + ':' + + hgvs_stash.type + '.' + str(stash_pos_right) + '_' + stash_end_right + 'del' + + stash_ref_right + 'ins' + stash_alt_right) stash_dict_left = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.reverse_normalizer, self.validator.sf) stash_pos_left = int(stash_dict_left['pos']) @@ -362,7 +339,7 @@ def gapped_g_to_c(self, rel_var): # Map in-situ to the transcript left and right try: tx_hard_right = self.validator.vm.g_to_t(stash_hgvs_not_delins_right, saved_hgvs_coding.ac) - except Exception as e: + except Exception: tx_hard_right = saved_hgvs_coding else: normalize_stash_right = self.variant.hn.normalize(stash_hgvs_not_delins_right) @@ -370,27 +347,23 @@ def gapped_g_to_c(self, rel_var): tx_hard_right = saved_hgvs_coding try: tx_hard_left = self.validator.vm.g_to_t(stash_hgvs_not_delins_left, saved_hgvs_coding.ac) - except Exception as e: + except Exception: tx_hard_left = saved_hgvs_coding else: normalize_stash_left = self.variant.hn.normalize(stash_hgvs_not_delins_left) if str(normalize_stash_left.posedit) == str(stash_hgvs_not_delins.posedit): tx_hard_left = saved_hgvs_coding # The Logic - Currently limited to genome gaps - if len(stash_hgvs_not_delins_right.posedit.edit.ref) < len( - tx_hard_right.posedit.edit.ref): + if len(stash_hgvs_not_delins_right.posedit.edit.ref) < len(tx_hard_right.posedit.edit.ref): tx_hard_right = self.variant.hn.normalize(tx_hard_right) - gap_position = '' - gapped_alignment_warning = str( - self.hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + gapped_alignment_warning = str(self.hgvs_genomic_5pr) + ' may be an artefact of aligning the ' \ + 'transcripts listed below with genome build ' + self.variant.primary_assembly hgvs_refreshed_variant = tx_hard_right self.gapped_transcripts = self.gapped_transcripts + str(tx_hard_right.ac) + ' ' - elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len( - tx_hard_left.posedit.edit.ref): + elif len(stash_hgvs_not_delins_left.posedit.edit.ref) < len(tx_hard_left.posedit.edit.ref): tx_hard_left = self.variant.hn.normalize(tx_hard_left) - gap_position = '' - gapped_alignment_warning = str( - self.hgvs_genomic_5pr) + ' may be an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly + gapped_alignment_warning = str(self.hgvs_genomic_5pr) + ' may be an artefact of aligning the ' \ + 'transcripts listed below with genome build ' + self.variant.primary_assembly hgvs_refreshed_variant = tx_hard_left self.gapped_transcripts = self.gapped_transcripts + str(tx_hard_left.ac) + ' ' else: From ea027ec36a370528c8e4273acb3b3132b135ca5a Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 29 Apr 2019 17:02:02 +0100 Subject: [PATCH 081/223] More clean up --- VariantValidator/modules/gapped_mapping.py | 121 ++++++++------------- 1 file changed, 48 insertions(+), 73 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index ecd6cefc..cf61f3fa 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -400,16 +400,14 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): logger.warning('g_to_t gap code 1 active') rn_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) self.hgvs_genomic_possibilities.append(rn_hgvs_genomic) - if self.orientation != -1: - try: + + try: + if self.orientation != -1: chromosome_normalized_hgvs_coding = self.variant.reverse_normalizer.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - chromosome_normalized_hgvs_coding = hgvs_coding - else: - try: + else: chromosome_normalized_hgvs_coding = self.variant.hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - chromosome_normalized_hgvs_coding = hgvs_coding + except hgvs.exceptions.HGVSUnsupportedOperationError: + chromosome_normalized_hgvs_coding = hgvs_coding most_3pr_hgvs_genomic = self.validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, self.variant.no_norm_evm, self.variant.hn) @@ -657,7 +655,6 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # Create VCF vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, self.variant.reverse_normalizer, self.validator.sf) - chr = vcf_dict['chr'] pos = vcf_dict['pos'] ref = vcf_dict['ref'] alt = vcf_dict['alt'] @@ -667,8 +664,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): pos = str(pos) # Store a not real deletion insertion to test for gapping - stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( - self.hgvs_genomic_5pr.ac) + ':' + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str(self.hgvs_genomic_5pr.ac) + ':' + + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) # Detect intronic variation using normalization intronic_variant = 'false' @@ -696,7 +693,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): hgvs_seek_var = saved_hgvs_coding try: - intron_test = self.variant.hn.normalize(hgvs_seek_var) + self.variant.hn.normalize(hgvs_seek_var) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'Normalization of intronic variants is not supported' in error or \ @@ -708,9 +705,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): for exon in ori: genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) - if ( - self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + if (genomic_start < self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + genomic_start < self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: @@ -719,15 +715,13 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): if intronic_variant != 'hard_fail': if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', - str(hgvs_seek_var.posedit.pos)): + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) - if ( - self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + if (genomic_start < self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + genomic_start < self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: @@ -735,14 +729,13 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) - if ( - self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + if (genomic_start < self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + genomic_start < self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: @@ -755,39 +748,36 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, self.hgvs_genomic_5pr, - saved_hgvs_coding) + saved_hgvs_coding) try: self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) + saved_hgvs_coding.ac) except hgvs.exceptions.HGVSInvalidIntervalError: self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, - saved_hgvs_coding.ac) + saved_hgvs_coding.ac) # Create normalized version of tx_hgvs_not_delins rn_tx_hgvs_not_delins = copy.deepcopy(self.tx_hgvs_not_delins) # Check for +1 base and adjust - if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): + if '+' in str(rn_tx_hgvs_not_delins.posedit.pos.end) and '+' in str( + rn_tx_hgvs_not_delins.posedit.pos.start): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) - - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + elif '+' in str(rn_tx_hgvs_not_delins.posedit.pos.end): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif '+' in str(rn_tx_hgvs_not_delins.posedit.pos.start): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end) and '-' in str(rn_tx_hgvs_not_delins.posedit.pos.start): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.start): rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) # Logic @@ -804,15 +794,11 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): self.variant.hn.normalize(self.tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): + if 'Normalization of intronic variants is not supported' in error or \ + 'Unsupported normalization of variants spanning the exon-intron boundary' in error: + if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: continue - elif re.match('Normalization of intronic variants is not supported', error): + elif 'Normalization of intronic variants is not supported' in error: # We know that this cannot be because of an intronic variant, so must be aligned to tx gap self.disparity_deletion_in = ['transcript', 'Requires Analysis'] @@ -822,8 +808,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): hgvs_genomic = hgvs_not_delins # Find oddly placed gaps where the tx variant is encompassed in the gap - if self.disparity_deletion_in[0] == 'false' and ( - possibility_counter == 3 or possibility_counter == 4): + if self.disparity_deletion_in[0] == 'false' and (possibility_counter == 3 or possibility_counter == 4): rg = self.variant.reverse_normalizer.normalize(hgvs_not_delins) rtx = self.validator.vm.g_to_t(rg, self.tx_hgvs_not_delins.ac) fg = self.variant.hn.normalize(hgvs_not_delins) @@ -834,7 +819,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): exonic = False for ex_test in exons: if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): + 7]) and ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): exonic = True if exonic is True: hgvs_not_delins = fg @@ -864,7 +849,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): suppress_c_normalization = 'true' # amend_RefSeqGene = 'true' # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, stored_hgvs_not_delins, hgvs_genomic, 2) + hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, + stored_hgvs_not_delins, hgvs_genomic, 2) # GAP IN THE CHROMOSOME elif self.disparity_deletion_in[0] == 'chromosome': @@ -878,8 +864,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): hgvs_refreshed_variant = chromosome_normalized_hgvs_coding # Warn self.auto_info = self.auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str( - self.disparity_deletion_in[ + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(self.disparity_deletion_in[ 1]) + ' transcript base(s) that fail to align to chromosome ' + str( hgvs_genomic.ac) + '\n' else: @@ -888,11 +873,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # amend_RefSeqGene = 'false' # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): + if 'NM_' in str(hgvs_refreshed_variant.ac) and not 'c' in str(hgvs_refreshed_variant.type): hgvs_refreshed_variant = self.variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) - else: - pass try: self.variant.hn.normalize(hgvs_refreshed_variant) @@ -900,10 +882,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): error = str(e) # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): + if 'Normalization of intronic variants is not supported' in error or \ + 'Unsupported normalization of variants spanning the exon-intron boundary' in error: hgvs_refreshed_variant = saved_hgvs_coding else: logger.warning(error) @@ -922,7 +902,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): continue # Update hgvs_genomic hgvs_genomic = self.validator.myvm_t_to_g(hgvs_refreshed_variant, hgvs_genomic.ac, - self.variant.no_norm_evm, self.variant.hn) + self.variant.no_norm_evm, self.variant.hn) if hgvs_genomic.posedit.edit.type == 'identity': re_c = self.validator.vm.g_to_t(hgvs_genomic, hgvs_refreshed_variant.ac) if (self.variant.hn.normalize(re_c)) != (self.variant.hn.normalize(hgvs_refreshed_variant)): @@ -936,11 +916,6 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): if (self.variant.hn.normalize(re_c)) != (self.variant.hn.normalize(hgvs_refreshed_variant)): hgvs_genomic = shuffle_left_g - # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' - # Break if gap has been detected if self.disparity_deletion_in[0] != 'false': break @@ -952,15 +927,16 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): for information in info_lines: info_keys[information] = '' info_out = [] - info_out.append( - 'The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome build ' + self.variant.primary_assembly) + info_out.append('The displayed variants may be artefacts of aligning ' + hgvs_coding.ac + ' with genome ' + 'build ' + self.variant.primary_assembly) for ky in list(info_keys.keys()): info_out.append(ky) self.auto_info = '\n'.join(info_out) - self.auto_info = self.auto_info + '\nCaution should be used when reporting the displayed variant descriptions: If you are unsure, please contact admin' - self.auto_info = str(self.auto_info.replace('\n', ': ')) - self.variant.warnings += ': ' + str(self.auto_info) - logger.warning(str(self.auto_info)) + self.auto_info = self.auto_info + '\nCaution should be used when reporting the displayed variant ' \ + 'descriptions: If you are unsure, please contact admin' + self.auto_info = self.auto_info.replace('\n', ': ') + self.variant.warnings += ': ' + self.auto_info + logger.warning(self.auto_info) # Normailse hgvs_genomic try: hgvs_genomic = self.variant.hn.normalize(hgvs_genomic) @@ -968,8 +944,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # Strange error caused by gap in genomic error = str(e) - if re.search('base start position must be <= end position', error) and \ - self.disparity_deletion_in[0] == 'chromosome': + if 'base start position must be <= end position' in error and self.disparity_deletion_in[0] == 'chromosome': if hgvs_genomic.posedit.edit.type == 'delins': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base From 3d220bbe4a6f4709d1853f695c6730dad7e04a71 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 13 May 2019 10:43:54 +0100 Subject: [PATCH 082/223] Removed rec_var from final mapper function --- VariantValidator/modules/mappers.py | 3 ++- VariantValidator/modules/vvMixinCore.py | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 009820d1..e2b22397 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -891,9 +891,10 @@ def transcripts_to_gene(variant, validator): return False -def final_tx_to_multiple_genomic(variant, validator, tx_variant, rec_var): +def final_tx_to_multiple_genomic(variant, validator, tx_variant): warnings = '' + rec_var = '' # Multiple genomic variants # multi_gen_vars = [] diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index a9685a70..0a72b7a4 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -553,9 +553,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr print("CARRYING ON") continue - # TODO: Need to check this as it's only being using outside of this loop as well as inside! - rec_var = '' - # Set the data my_variant.output_type_flag = 'gene' my_variant.description = hgnc_gene_info @@ -753,7 +750,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Multiple genomic variants # multi_gen_vars = [] if tx_variant != '': - multi_gen_vars, hgvs_coding = mappers.final_tx_to_multiple_genomic(variant, self, tx_variant, rec_var) + multi_gen_vars, hgvs_coding = mappers.final_tx_to_multiple_genomic(variant, self, tx_variant) else: # HGVS genomic in the absence of a transcript variant From a3222a52d132752cb2171ba763c32aaa607280a7 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 13 May 2019 17:02:18 +0100 Subject: [PATCH 083/223] Cleaned up code in second loop --- VariantValidator/modules/vvMixinCore.py | 155 ++++++++++++------------ 1 file changed, 75 insertions(+), 80 deletions(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 0a72b7a4..a618688e 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -763,96 +763,91 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr alt_genomic_dicts = [] primary_genomic_dicts = {} - if len(multi_gen_vars) != 0: - for alt_gen_var in multi_gen_vars: - for build in self.genome_builds: - test = vvChromosomes.supported_for_mapping(alt_gen_var.ac, build) - if test == 'true': - try: - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, reverse_normalizer, self.sf) - except hgvs.exceptions.HGVSInvalidVariantError as e: - continue - # Identify primary assembly positions - if re.match('NC_', alt_gen_var.ac): - if re.match('GRC', build): - primary_genomic_dicts[build.lower()] = { - 'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - - else: - primary_genomic_dicts[build.lower()] = { - 'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - if build == 'GRCh38': - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - self.sf) - primary_genomic_dicts['hg38'] = { - 'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - - continue + for alt_gen_var in multi_gen_vars: + for build in self.genome_builds: + test = vvChromosomes.supported_for_mapping(alt_gen_var.ac, build) + if test == 'true': + try: + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, variant.reverse_normalizer, + self.sf) + except hgvs.exceptions.HGVSInvalidVariantError: + continue + # Identify primary assembly positions + if 'NC_' in alt_gen_var.ac: + if 'GRC' in build: + primary_genomic_dicts[build.lower()] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } else: - if re.match('GRC', build): - dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } - else: - dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } + primary_genomic_dicts[build.lower()] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] } - # Append - alt_genomic_dicts.append(dict) - - if build == 'GRCh38': - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', reverse_normalizer, - self.sf) - dict = {'hg38': {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } + } + if build == 'GRCh38': + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', variant.reverse_normalizer, + self.sf) + primary_genomic_dicts['hg38'] = { + 'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] } - # Append - alt_genomic_dicts.append(dict) - continue + } + else: - # May need to account for ALT NC_ - pass + if 'GRC' in build: + alt_dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } + else: + alt_dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } + # Append + alt_genomic_dicts.append(alt_dict) + + if build == 'GRCh38': + vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', variant.reverse_normalizer, + self.sf) + alt_dict = {'hg38': {'hgvs_genomic_description': fn.valstr(alt_gen_var), + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } + # Append + alt_genomic_dicts.append(alt_dict) # Warn not directly mapped to specified genome build if genomic_accession != '': caution = '' if primary_assembly.lower() not in list(primary_genomic_dicts.keys()): warnings = warnings + ': ' + str( - hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + ': See alternative genomic loci or alternative genome builds for aligned genomic positions' + hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + \ + ': See alternative genomic loci or alternative genome builds for aligned genomic positions' warn_list = warnings.split(': ') warnings_out = [] @@ -878,7 +873,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Add single letter AA code to protein descriptions predicted_protein_variant_dict = {"tlr": str(predicted_protein_variant), "slr": ''} if predicted_protein_variant != '': - if not 'Non-coding :n.' in predicted_protein_variant: + if 'Non-coding :n.' not in predicted_protein_variant: try: format_p = predicted_protein_variant format_p = re.sub(r'\(LRG_.+?\)', '', format_p) From 9b5d650fade36dc152622f8d6c5e3d928f291bda Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 14 May 2019 14:33:09 +0100 Subject: [PATCH 084/223] Created new ValOutput object and set the validate function to now return that object. Object can then be used to output results in different formats --- VariantValidator/modules/valoutput.py | 156 ++++++ VariantValidator/modules/variant.py | 44 ++ VariantValidator/modules/vvMixinCore.py | 212 ++------ test/test_inputs.py | 668 ++++++++++++------------ 4 files changed, 573 insertions(+), 507 deletions(-) create mode 100644 VariantValidator/modules/valoutput.py diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py new file mode 100644 index 00000000..3f8380ac --- /dev/null +++ b/VariantValidator/modules/valoutput.py @@ -0,0 +1,156 @@ +import os +from .vvLiftover import liftover as lift_over +from .vvLogging import logger + + +class ValOutput(object): + """This object will hold the all final, validated outputs and provide methods to return this into a number of formats, + with or without extra data""" + + def __init__(self, outputlist, validator): + self.output_list = outputlist + self.validator = validator + + def format_as_dict(self, with_meta=True): + validation_output = {'flag': None} + + validation_error_counter = 0 + validation_obsolete_counter = 0 + validation_warning_counter = 0 + validation_intergenic_counter = 0 + + if len(self.output_list) == 0: + validation_output['flag'] = 'empty_result' + + for variant in self.output_list: + # For gene outputs, i.e. those that hit transcripts + if variant.output_type_flag == 'gene': + validation_output['flag'] = 'gene_variant' + if variant.validation_warnings == ['Validation error']: + validation_error_counter = validation_error_counter + 1 + identification_key = 'Validation_Error_%s' % validation_error_counter + else: + if variant.is_obsolete(): + validation_obsolete_counter += 1 + identification_key = 'obsolete_record_%s' % validation_obsolete_counter + else: + identification_key = '%s' % variant.hgvs_transcript_variant + + # if identification_key not in validation_output.keys(): + validation_output[identification_key] = variant.output_dict() + # else: + # dotter = dotter + ' ' + # validation_output[identification_key + dotter] = valid_v + + # For warning only outputs + # Should only ever be 1 output as an error or a warning of the following types + # Gene symbol as reference sequence + # Gene as transcript reference sequence + if variant.output_type_flag == 'warning': + validation_output['flag'] = 'warning' + if variant.validation_warnings == ['Validation error']: + validation_error_counter = validation_error_counter + 1 + identification_key = 'validation_error_%s' % validation_error_counter + elif variant.is_obsolete(): + validation_obsolete_counter += 1 + identification_key = 'obsolete_record_%s' % validation_obsolete_counter + else: + validation_warning_counter = validation_warning_counter + 1 + identification_key = 'validation_warning_%s' % validation_warning_counter + validation_output[identification_key] = variant.output_dict() + + # Intergenic variants + if variant.output_type_flag == 'intergenic': + validation_output['flag'] = 'intergenic' + validation_intergenic_counter = validation_intergenic_counter + 1 + identification_key = 'Intergenic_Variant_%s' % validation_intergenic_counter + + # Attempt to liftover between genome builds + # Note: pyliftover uses the UCSC liftOver tool. + # https://pypi.org/project/pyliftover/ + genomic_position_info = variant.primary_assembly_loci + for g_p_key in list(genomic_position_info.keys()): + + # Identify the current build and hgvs_genomic descripsion + if 'hg' in g_p_key: + # incoming_vcf = genomic_position_info[g_p_key]['vcf'] + # set builds + if g_p_key == 'hg38': + build_to = 'hg19' + build_from = 'hg38' + if g_p_key == 'hg19': + build_to = 'hg38' + build_from = 'hg19' + elif 'grc' in g_p_key: + # incoming_vcf = genomic_position_info[g_p_key]['vcf'] + # set builds + if g_p_key == 'grch38': + build_to = 'GRCh37' + build_from = 'GRCh38' + if g_p_key == 'grch37': + build_to = 'GRCh38' + build_from = 'GRCh37' + + # Liftover + lifted_response = lift_over(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, + build_to, variant.hn, self.validator.vm, self.validator.vr, + self.validator.hdp, self.validator.hp, variant.reverse_normalizer, + self.validator.sf, variant.evm) + + # Sort the respomse into primary assembly and ALT + primary_assembly_loci = {} + alt_genomic_loci = [] + for build_key, accession_dict in list(lifted_response.items()): + try: + accession_key = list(accession_dict.keys())[0] + if 'NC_' in accession_dict[accession_key]['hgvs_genomic_description']: + primary_assembly_loci[build_key.lower()] = accession_dict[accession_key] + else: + alt_genomic_loci.append({build_key.lower(): accession_dict[accession_key]}) + + # KeyError if the dicts are empty + except KeyError: + continue + + # Add the dictionaries from lifted response to the output + if primary_assembly_loci != {}: + variant.primary_assembly_loci = primary_assembly_loci + if alt_genomic_loci: + variant.alt_genomic_loci = alt_genomic_loci + + # Finalise the output dictionary + validation_output[identification_key] = variant.output_dict() + + if with_meta: + validation_output["metadata"] = self.add_meta() + + # return batch_out + return validation_output + + def add_meta(self): + """ + Returns dictionary of metadata + :return: + """ + metadata = {} + + if os.environ.get("ADD_LOGS") == "True": + logs = [] + logString = logger.getString() + for l in logger.getString().split("\n"): + logs.append(l) + metadata["logs"] = logs + + # metadata["variant"] = batch_variant # original input string to validate function + # metadata["assembly"] = selected_assembly + # metadata["transcripts"] = select_transcripts + # metadata['seqrepo_directory'] = self.seqrepoPath + # metadata['uta_url'] = self.utaPath + # metadata['py_liftover_directory'] = self.liftoverPath + # metadata['variantvalidator_data_url'] = self.db.path + # metadata['entrez_id'] = self.entrezID + metadata['variantvalidator_version'] = self.validator.version + metadata['variantvalidator_hgvs_version'] = self.validator.hgvsVersion + metadata['uta_schema'] = self.validator.utaSchema + metadata['seqrepo_db'] = self.validator.seqrepoVersion + return metadata diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 6142713b..a73c5439 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -47,6 +47,20 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.min_evm = None self.lose_vm = None + self.gene_symbol = None + self.hgvs_transcript_variant = None + self.genome_context_intronic_sequence = None + self.refseqgene_context_intronic_sequence = None + self.hgvs_refseqgene_variant = None + self.hgvs_predicted_protein_consequence = None + self.validation_warnings = None + self.hgvs_lrg_transcript_variant = None + self.hgvs_lrg_variant = None + self.alt_genomic_loci = None + self.primary_assembly_loci = None + self.reference_sequence_records = None + self.validated = False + def is_ascii(self): """ Instead of the previous test for unicode rich text characters. @@ -155,3 +169,33 @@ def set_quibble(self, newval): self.quibble = newval self.set_refsource() self.set_reftype() + + def output_dict(self): + """ + Method will return the output values as a dictionary + :return: dict + """ + dict_out = { + 'submitted_variant': self.original, + 'gene_symbol': self.gene_symbol, + 'transcript_description': self.description, + 'hgvs_transcript_variant': self.hgvs_transcript_variant, + 'genome_context_intronic_sequence': self.genome_context_intronic_sequence, + 'refseqgene_context_intronic_sequence': self.refseqgene_context_intronic_sequence, + 'hgvs_refseqgene_variant': self.hgvs_refseqgene_variant, + 'hgvs_predicted_protein_consequence': self.hgvs_predicted_protein_consequence, + 'validation_warnings': self.validation_warnings, + 'hgvs_lrg_transcript_variant': self.hgvs_lrg_transcript_variant, + 'hgvs_lrg_variant': self.hgvs_lrg_variant, + 'alt_genomic_loci': self.alt_genomic_loci, + 'primary_assembly_loci': self.primary_assembly_loci, + 'reference_sequence_records': self.reference_sequence_records, + } + return dict_out + + def is_obsolete(self): + """ + Checks whether the keyword 'obsolete' appears within the validation warnings + :return: + """ + return any('obsolete' in warning for warning in self.validation_warnings) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index a618688e..7a0c7af0 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -57,6 +57,7 @@ from . import use_checking from . import collect_info from . import mappers +from . import valoutput class Mixin(vvMixinConverters.Mixin): @@ -856,11 +857,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr warning = warning.replace("'", "") if warning == '': continue - warnings_out.append(warning) - # Remove duplicate elements but maintain the order - seen = {} - no_rep_list = [seen.setdefault(x, x) for x in warnings_out if x not in seen] - warnings_out = no_rep_list + if warning not in warnings_out: + # Remove duplicate elements but maintain the order + warnings_out.append(warning) # Ensure Variants have had the refs removed. # if not hasattr(posedit, refseqgene_variant): @@ -885,179 +884,47 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) - # Populate the dictionary - dict_out['submitted_variant'] = submitted - dict_out['gene_symbol'] = gene_symbol - dict_out['transcript_description'] = transcript_description - dict_out['hgvs_transcript_variant'] = tx_variant - dict_out['genome_context_intronic_sequence'] = genome_context_transcript_variant - dict_out['refseqgene_context_intronic_sequence'] = RefSeqGene_context_transcript_variant - dict_out['hgvs_refseqgene_variant'] = refseqgene_variant - dict_out['hgvs_predicted_protein_consequence'] = predicted_protein_variant_dict - dict_out['validation_warnings'] = warnings_out - dict_out['hgvs_lrg_transcript_variant'] = lrg_transcript_variant - dict_out['hgvs_lrg_variant'] = lrg_variant - dict_out['alt_genomic_loci'] = alt_genomic_dicts - dict_out['primary_assembly_loci'] = primary_genomic_dicts - dict_out['reference_sequence_records'] = '' + # # Populate the dictionary + # dict_out['submitted_variant'] = submitted + # dict_out['gene_symbol'] = gene_symbol + # dict_out['transcript_description'] = transcript_description + # dict_out['hgvs_transcript_variant'] = tx_variant + # dict_out['genome_context_intronic_sequence'] = genome_context_transcript_variant + # dict_out['refseqgene_context_intronic_sequence'] = RefSeqGene_context_transcript_variant + # dict_out['hgvs_refseqgene_variant'] = refseqgene_variant + # dict_out['hgvs_predicted_protein_consequence'] = predicted_protein_variant_dict + # dict_out['validation_warnings'] = warnings_out + # dict_out['hgvs_lrg_transcript_variant'] = lrg_transcript_variant + # dict_out['hgvs_lrg_variant'] = lrg_variant + # dict_out['alt_genomic_loci'] = alt_genomic_dicts + # dict_out['primary_assembly_loci'] = primary_genomic_dicts + # dict_out['reference_sequence_records'] = '' + + variant.gene_symbol = gene_symbol + variant.hgvs_transcript_variant = tx_variant + variant.genome_context_intronic_sequence = genome_context_transcript_variant + variant.refseqgene_context_intronic_sequence = RefSeqGene_context_transcript_variant + variant.hgvs_refseqgene_variant = refseqgene_variant + variant.hgvs_predicted_protein_consequence = predicted_protein_variant_dict + variant.validation_warnings = warnings_out + variant.hgvs_lrg_transcript_variant = lrg_transcript_variant + variant.hgvs_lrg_variant = lrg_variant + variant.alt_genomic_loci = alt_genomic_dicts + variant.primary_assembly_loci = primary_genomic_dicts + variant.reference_sequence_records = '' + variant.validated = True # Add links to reference_sequence_records - ref_records = self.db.get_urls(dict_out) + ref_records = self.db.get_urls(variant.output_dict()) if ref_records != {}: - dict_out['reference_sequence_records'] = ref_records + variant.reference_sequence_records = ref_records # Append to a list for return - batch_out.append(dict_out) + batch_out.append(variant) - - """ - Structure the output into dictionaries rather than a list with descriptive keys - and a validation type flag - """ - logger.trace("Populating output dictionary") - # Create output dictionary - validation_output = {'flag': None} - - # For gene outputs, i.e. those that hit transcripts - # dotter = '' - if my_variant.output_type_flag == 'gene': - validation_output['flag'] = 'gene_variant' - validation_error_counter = 0 - validation_obsolete_counter = 0 - for valid_v in batch_out: - if valid_v['validation_warnings'] == ['Validation error']: - validation_error_counter = validation_error_counter + 1 - identification_key = 'Validation_Error_%s' % (str(validation_error_counter)) - else: - obs_obs = False - for ob_rec in valid_v['validation_warnings']: - if 'obsolete' in ob_rec: - validation_obsolete_counter = validation_obsolete_counter + 1 - obs_obs = True - break - if obs_obs is True: - identification_key = 'obsolete_record_%s' % (str(validation_obsolete_counter)) - else: - identification_key = '%s' % (str(valid_v['hgvs_transcript_variant'])) - - # if identification_key not in validation_output.keys(): - validation_output[identification_key] = valid_v - # else: - # dotter = dotter + ' ' - # validation_output[identification_key + dotter] = valid_v - - # For warning only outputs - # Should only ever be 1 output as an error or a warning of the following types - # Gene symbol as reference sequence - # Gene as transcript reference sequence - if my_variant.output_type_flag == 'warning': - validation_output['flag'] = 'warning' - validation_error_counter = 0 - validation_warning_counter = 0 - if len(batch_out) == 0: - validation_output['flag'] = 'empty_result' - for valid_v in batch_out: - if valid_v['validation_warnings'] == ['Validation error']: - validation_error_counter = validation_error_counter + 1 - identification_key = 'validation_error_%s' % (str(validation_error_counter)) - else: - validation_warning_counter = validation_warning_counter + 1 - identification_key = 'validation_warning_%s' % (str(validation_warning_counter)) - validation_output[identification_key] = valid_v - - # Intergenic variants - validation_intergenic_counter = 0 - if my_variant.output_type_flag == 'intergenic': - validation_output['flag'] = 'intergenic' - for valid_v in batch_out: - validation_intergenic_counter = validation_intergenic_counter + 1 - identification_key = 'Intergenic_Variant_%s' % (str(validation_intergenic_counter)) - - # Attempt to liftover between genome builds - # Note: pyliftover uses the UCSC liftOver tool. - # https://pypi.org/project/pyliftover/ - genomic_position_info = valid_v['primary_assembly_loci'] - for g_p_key in list(genomic_position_info.keys()): - - # Identify the current build and hgvs_genomic descripsion - if re.match('hg', g_p_key): - # incoming_vcf = genomic_position_info[g_p_key]['vcf'] - # set builds - if g_p_key == 'hg38': - build_to = 'hg19' - build_from = 'hg38' - if g_p_key == 'hg19': - build_to = 'hg38' - build_from = 'hg19' - elif re.match('grc', g_p_key): - # incoming_vcf = genomic_position_info[g_p_key]['vcf'] - # set builds - if g_p_key == 'grch38': - build_to = 'GRCh37' - build_from = 'GRCh38' - if g_p_key == 'grch37': - build_to = 'GRCh38' - build_from = 'GRCh37' - - # Liftover - lifted_response = lift_over(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, build_to, hn, self.vm, self.vr, self.hdp, self.hp, reverse_normalizer, self.sf, evm) - - # Sort the respomse into primary assembly and ALT - primary_assembly_loci = {} - alt_genomic_loci = [] - for build_key, accession_dict in list(lifted_response.items()): - try: - accession_key = list(accession_dict.keys())[0] - if re.match('NC_', accession_dict[accession_key]['hgvs_genomic_description']): - primary_assembly_loci[build_key.lower()] = accession_dict[accession_key] - else: - alt_genomic_loci.append({build_key.lower(): accession_dict[accession_key]}) - - # KeyError if the dicts are empty - except KeyError: - continue - - # Add the dictionaries from lifted response to the output - if primary_assembly_loci != {}: - valid_v['primary_assembly_loci'] = primary_assembly_loci - if alt_genomic_loci != []: - valid_v['alt_genomic_loci'] = alt_genomic_loci - - # Finalise the output dictionary - validation_output[identification_key] = valid_v - - # Add error strings to validation output - # ''' - metadata = {} - logger.info("Variant successfully validated") - logs = [] - logString = logger.getString() - for l in logger.getString().split("\n"): - logs.append(l) - - if os.environ.get("ADD_LOGS")=="True": - metadata["logs"] = logs - metadata["variant"] = batch_variant - #metadata["assembly"] = selected_assembly - #metadata["transcripts"] = select_transcripts - #metadata['seqrepo_directory'] = self.seqrepoPath - #metadata['uta_url'] = self.utaPath - #metadata['py_liftover_directory'] = self.liftoverPath - #metadata['variantvalidator_data_url'] = self.db.path - #metadata['entrez_id'] = self.entrezID - metadata['variantvalidator_version'] = self.version - metadata['variantvalidator_hgvs_version'] = self.hgvsVersion - metadata['uta_schema'] = self.utaSchema - metadata['seqrepo_db'] = self.seqrepoVersion - validation_output["metadata"] = metadata - # ''' - # Measure time elapsed - time_now = time.time() - elapsed_time = time_now - start_time - logger.debug('validation time = ' + str(elapsed_time)) - - # return batch_out - return validation_output + print('Creating Output object') + output = valoutput.ValOutput(batch_out, self) + return output # Bug catcher except KeyboardInterrupt: @@ -1074,4 +941,3 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # return logger.critical(str(exc_type) + " " + str(exc_value)) raise - logger.debug(str(er)) diff --git a/test/test_inputs.py b/test/test_inputs.py index cbe28d8a..4f0d519a 100644 --- a/test/test_inputs.py +++ b/test/test_inputs.py @@ -9,7 +9,7 @@ def setup_class(cls): def test_variant1(self): variant = 'NM_015120.4:c.35T>C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_015120.4:c.35T>C' in list(results.keys()) @@ -33,7 +33,7 @@ def test_variant1(self): def test_variant2(self): variant = 'NM_015120.4:c.39G>C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -57,7 +57,7 @@ def test_variant2(self): def test_variant3(self): variant = 'NM_015120.4:c.34C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -81,7 +81,7 @@ def test_variant3(self): def test_variant4(self): variant = 'NC_000002.11:g.73613030C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -105,7 +105,7 @@ def test_variant4(self): def test_variant5(self): variant = 'NC_000023.10:g.33229673A>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -146,7 +146,7 @@ def test_variant5(self): def test_variant6(self): variant = 'NM_001145026.1:c.715A>G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -170,7 +170,7 @@ def test_variant6(self): def test_variant7(self): variant = 'NC_000016.9:g.2099572TC>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000548.4:c.138+821del' in list(results.keys()) @@ -381,7 +381,7 @@ def test_variant7(self): def test_variant8(self): variant = 'NM_000088.3:c.589GG>CT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -405,7 +405,7 @@ def test_variant8(self): def test_variant9(self): variant = 'NM_000094.3:c.6751-2_6751-3del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -429,7 +429,7 @@ def test_variant9(self): def test_variant10(self): variant = 'COL5A1:c.5071A>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -453,7 +453,7 @@ def test_variant10(self): def test_variant11(self): variant = 'NG_007400.1:c.5071A>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -477,7 +477,7 @@ def test_variant11(self): def test_variant12(self): variant = 'chr16:15832508_15832509delinsAC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_002474.2:c.3034_3035inv' in list(results.keys()) @@ -552,7 +552,7 @@ def test_variant12(self): def test_variant13(self): variant = 'NM_000088.3:c.589-1GG>G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -576,7 +576,7 @@ def test_variant13(self): def test_variant14(self): variant = 'NM_000088.3:c.642+1GT>G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -600,7 +600,7 @@ def test_variant14(self): def test_variant15(self): variant = 'NM_000088.3:c.589-2AG>G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -624,7 +624,7 @@ def test_variant15(self): def test_variant16(self): variant = 'NC_000017.10:g.48279242G>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'intergenic' @@ -648,7 +648,7 @@ def test_variant16(self): def test_variant17(self): variant = 'NM_000500.7:c.-107-19C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -672,7 +672,7 @@ def test_variant17(self): def test_variant18(self): variant = 'NM_000518.4:c.-130C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -696,7 +696,7 @@ def test_variant18(self): def test_variant19(self): variant = 'NM_000518.4:c.-50-80C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -720,7 +720,7 @@ def test_variant19(self): def test_variant20(self): variant = 'NM_000518.4:c.316_*342delinsCTACTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -744,7 +744,7 @@ def test_variant20(self): def test_variant21(self): variant = 'NM_000518.4:c.316_*100del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -768,7 +768,7 @@ def test_variant21(self): def test_variant22(self): variant = 'NM_000518.4:c.*2000C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -792,7 +792,7 @@ def test_variant22(self): def test_variant23(self): variant = 'NM_000518.4:c.*132+1868C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -816,7 +816,7 @@ def test_variant23(self): def test_variant24(self): variant = 'NM_000518.4:c.-130_*2000=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -840,7 +840,7 @@ def test_variant24(self): def test_variant25(self): variant = 'NM_000518.4:c.-50-80_*132+1868=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -864,7 +864,7 @@ def test_variant25(self): def test_variant26(self): variant = 'NR_138595.1:n.-810C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -888,7 +888,7 @@ def test_variant26(self): def test_variant27(self): variant = 'NR_138595.1:n.1-810C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -912,7 +912,7 @@ def test_variant27(self): def test_variant28(self): variant = 'NR_138595.1:n.1071+1A=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -935,7 +935,7 @@ def test_variant28(self): def test_variant29(self): variant = 'NR_138595.1:n.-810_1071+1=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -959,7 +959,7 @@ def test_variant29(self): def test_variant30(self): variant = 'NC_000017.10:g.48261457_48261463TTATGTT=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -983,7 +983,7 @@ def test_variant30(self): def test_variant31(self): variant = 'NC_000017.10:g.48275363C>A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1007,7 +1007,7 @@ def test_variant31(self): def test_variant32(self): variant = 'NM_000088.3:c.589-1G>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1031,7 +1031,7 @@ def test_variant32(self): def test_variant33(self): variant = 'NM_000088.3:c.591_593inv' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000088.3:c.591_593inv' in list(results.keys()) @@ -1055,7 +1055,7 @@ def test_variant33(self): def test_variant34(self): variant = '11-5248232-T-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1096,7 +1096,7 @@ def test_variant34(self): def test_variant35(self): variant = 'NG_007400.1(NM_000088.3):c.589-1G>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1120,7 +1120,7 @@ def test_variant35(self): def test_variant36(self): variant = '1:150550916G>A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1178,7 +1178,7 @@ def test_variant36(self): def test_variant37(self): variant = '1-150550916-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1236,7 +1236,7 @@ def test_variant37(self): def test_variant38(self): variant = 'NG_008123.1(LEPRE1_v003):c.2055+18G>A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -1260,7 +1260,7 @@ def test_variant38(self): def test_variant39(self): variant = 'NG_008123.1:c.2055+18G>A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -1284,7 +1284,7 @@ def test_variant39(self): def test_variant40(self): variant = 'NG_008123.1(NM_022356.3):c.2055+18G>A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1308,7 +1308,7 @@ def test_variant40(self): def test_variant41(self): variant = 'NM_021983.4:c.490G>C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1332,7 +1332,7 @@ def test_variant41(self): def test_variant42(self): variant = 'NM_032470.3:c.4del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1356,7 +1356,7 @@ def test_variant42(self): def test_variant43(self): variant = 'NM_001194958.2:c.20C>A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001194958.2:c.20C>A' in list(results.keys()) @@ -1380,7 +1380,7 @@ def test_variant43(self): def test_variant44(self): variant = 'NM_000022.2:c.534A>G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1404,7 +1404,7 @@ def test_variant44(self): def test_variant45(self): variant = 'HSCHR6_MHC_SSTO_CTG1-3852542-C-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1428,7 +1428,7 @@ def test_variant45(self): def test_variant46(self): variant = 'NM_000368.4:c.363+1dupG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1452,7 +1452,7 @@ def test_variant46(self): def test_variant47(self): variant = 'NM_000368.4:c.363dupG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1476,7 +1476,7 @@ def test_variant47(self): def test_variant48(self): variant = 'NM_000089.3:c.1033_1035delGTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1500,7 +1500,7 @@ def test_variant48(self): def test_variant49(self): variant = 'NM_000089.3:c.1035_1035+2delTGT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1524,7 +1524,7 @@ def test_variant49(self): def test_variant50(self): variant = 'NM_000088.3:c.2023_2028delGCAAGA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1548,7 +1548,7 @@ def test_variant50(self): def test_variant51(self): variant = 'NM_000089.3:c.938-1delG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1572,7 +1572,7 @@ def test_variant51(self): def test_variant52(self): variant = 'NM_000088.3:c.589G=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1596,7 +1596,7 @@ def test_variant52(self): def test_variant53(self): variant = 'NM_000088.3:c.642A=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1620,7 +1620,7 @@ def test_variant53(self): def test_variant54(self): variant = 'NM_000088.3:c.642+1GG>G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1644,7 +1644,7 @@ def test_variant54(self): def test_variant55(self): variant = 'NM_000088.3:c.589-2GG>G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1668,7 +1668,7 @@ def test_variant55(self): def test_variant56(self): variant = 'NM_000088.3:c.589-6_589-5insTTTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1692,7 +1692,7 @@ def test_variant56(self): def test_variant57(self): variant = 'NM_000088.3:c.642+3_642+4insAAAA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1716,7 +1716,7 @@ def test_variant57(self): def test_variant58(self): variant = 'NM_000088.3:c.589-4_589-3insTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1740,7 +1740,7 @@ def test_variant58(self): def test_variant59(self): variant = 'NM_000088.3:c.589-8del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1764,7 +1764,7 @@ def test_variant59(self): def test_variant60(self): variant = 'NM_000527.4:c.-187_-185delCTC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000527.4:c.-187_-185del' in list(results.keys()) @@ -1788,7 +1788,7 @@ def test_variant60(self): def test_variant61(self): variant = 'NM_206933.2:c.6317C>G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1812,7 +1812,7 @@ def test_variant61(self): def test_variant62(self): variant = 'NC_000013.10:g.32929387T>C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000059.3:c.7397C=' in list(results.keys()) @@ -1836,7 +1836,7 @@ def test_variant62(self): def test_variant63(self): variant = 'NM_015102.3:c.2818-2T>A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_015102.3:c.2818-2T>A' in list(results.keys()) @@ -1860,7 +1860,7 @@ def test_variant63(self): def test_variant64(self): variant = '19-41123094-G-GG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -1918,7 +1918,7 @@ def test_variant64(self): def test_variant65(self): variant = '15-72105928-AC-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_014249.2:c.946_949=' in list(results.keys()) @@ -1993,7 +1993,7 @@ def test_variant65(self): def test_variant66(self): variant = '12-122064773-CCCGCCA-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2017,7 +2017,7 @@ def test_variant66(self): def test_variant67(self): variant = '12-122064774-CCGCCA-CCGCCA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_032790.3:c.132_137dup' in list(results.keys()) @@ -2041,7 +2041,7 @@ def test_variant67(self): def test_variant68(self): variant = '12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2065,7 +2065,7 @@ def test_variant68(self): def test_variant69(self): variant = 'NC_000012.11:g.122064777C>A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2089,7 +2089,7 @@ def test_variant69(self): def test_variant70(self): variant = 'NC_000012.11:g.122064776delG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2113,7 +2113,7 @@ def test_variant70(self): def test_variant71(self): variant = 'NC_000012.11:g.122064776dupG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2137,7 +2137,7 @@ def test_variant71(self): def test_variant72(self): variant = 'NC_000012.11:g.122064776_122064777insTTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2161,7 +2161,7 @@ def test_variant72(self): def test_variant73(self): variant = 'NC_000012.11:g.122064772_122064775del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2185,7 +2185,7 @@ def test_variant73(self): def test_variant74(self): variant = 'NC_000012.11:g.122064772_122064775dup' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2209,7 +2209,7 @@ def test_variant74(self): def test_variant75(self): variant = 'NC_000012.11:g.122064773_122064774insTTTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_032790.3:c.126_127insTTTTCCGCCA' in list(results.keys()) @@ -2233,7 +2233,7 @@ def test_variant75(self): def test_variant76(self): variant = 'NC_000012.11:g.122064772_122064777del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_032790.3:c.126C>A' in list(results.keys()) @@ -2257,7 +2257,7 @@ def test_variant76(self): def test_variant77(self): variant = 'NC_000012.11:g.122064772_122064777dup' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2281,7 +2281,7 @@ def test_variant77(self): def test_variant78(self): variant = 'NC_000012.11:g.122064779_122064782dup' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_032790.3:c.135_136insACCGCCACCG' in list(results.keys()) @@ -2305,7 +2305,7 @@ def test_variant78(self): def test_variant79(self): variant = 'NC_000012.11:g.122064772_122064782del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2329,7 +2329,7 @@ def test_variant79(self): def test_variant80(self): variant = 'NC_000002.11:g.95847041_95847043GCG=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_021088.3:c.471_473dup' in list(results.keys()) @@ -2455,7 +2455,7 @@ def test_variant80(self): def test_variant81(self): variant = 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001083585.1:c.*344_*368dup' in list(results.keys()) @@ -2547,7 +2547,7 @@ def test_variant81(self): def test_variant82(self): variant = 'NC_000003.11:g.14561629_14561630GC=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001080423.3:c.1020del' in list(results.keys()) @@ -2588,7 +2588,7 @@ def test_variant82(self): def test_variant83(self): variant = 'NC_000003.11:g.14561629_14561630insG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001080423.3:c.1016_1020=' in list(results.keys()) @@ -2629,7 +2629,7 @@ def test_variant83(self): def test_variant84(self): variant = 'NC_000004.11:g.140811111_140811122del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2670,7 +2670,7 @@ def test_variant84(self): def test_variant85(self): variant = 'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_018717.5:c.1468_1479=' in list(results.keys()) @@ -2711,7 +2711,7 @@ def test_variant85(self): def test_variant86(self): variant = 'NC_000004.11:g.140811117_140811122del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2752,7 +2752,7 @@ def test_variant86(self): def test_variant87(self): variant = 'NC_000004.11:g.140811111_140811117del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_018717.5:c.1473_1479del' in list(results.keys()) @@ -2793,7 +2793,7 @@ def test_variant87(self): def test_variant88(self): variant = 'NC_000004.11:g.140811117C>A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2834,7 +2834,7 @@ def test_variant88(self): def test_variant89(self): variant = 'NC_000002.11:g.73675227_73675228insCTC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_015120.4:c.1573_1579=' in list(results.keys()) @@ -2858,7 +2858,7 @@ def test_variant89(self): def test_variant90(self): variant = '9-136132908-T-TC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2882,7 +2882,7 @@ def test_variant90(self): def test_variant91(self): variant = '9-136132908-TAC-TCA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2906,7 +2906,7 @@ def test_variant91(self): def test_variant92(self): variant = '9-136132908-TA-TA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_020469.2:c.261del' in list(results.keys()) @@ -2930,7 +2930,7 @@ def test_variant92(self): def test_variant93(self): variant = 'NM_020469.2:c.258delG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2954,7 +2954,7 @@ def test_variant93(self): def test_variant94(self): variant = 'NM_020469.2:c.260_262TGA=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -2978,7 +2978,7 @@ def test_variant94(self): def test_variant95(self): variant = 'NM_020469.2:c.261delG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_020469.2:c.261del' in list(results.keys()) @@ -3002,7 +3002,7 @@ def test_variant95(self): def test_variant96(self): variant = 'NM_020469.2:c.261dupG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3026,7 +3026,7 @@ def test_variant96(self): def test_variant97(self): variant = 'NM_020469.2:c.261_262insTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3050,7 +3050,7 @@ def test_variant97(self): def test_variant98(self): variant = 'NC_000019.10:g.50378563_50378564insTAC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3091,7 +3091,7 @@ def test_variant98(self): def test_variant99(self): variant = 'NC_000019.10:g.50378563_50378564insC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3132,7 +3132,7 @@ def test_variant99(self): def test_variant100(self): variant = 'NC_000019.10:g.50378564_50378565insTACA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3173,7 +3173,7 @@ def test_variant100(self): def test_variant101(self): variant = 'NC_000019.10:g.50378565_50378567dup' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_007121.5:c.514_520=' in list(results.keys()) @@ -3214,7 +3214,7 @@ def test_variant101(self): def test_variant102(self): variant = 'NC_000019.10:g.50378563_50378564=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3255,7 +3255,7 @@ def test_variant102(self): def test_variant103(self): variant = 'NC_000019.10:g.50378563_50378564insTCGG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001256647.1:c.224_226delinsTCGG' in list(results.keys()) @@ -3296,7 +3296,7 @@ def test_variant103(self): def test_variant104(self): variant = 'NC_000019.10:g.50378563delinsTTAC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3337,7 +3337,7 @@ def test_variant104(self): def test_variant105(self): variant = 'NC_000019.10:g.50378563_50378564insTAAC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_007121.5:c.514_515insT' in list(results.keys()) @@ -3378,7 +3378,7 @@ def test_variant105(self): def test_variant106(self): variant = 'NC_000019.10:g.50378562_50378565del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3419,7 +3419,7 @@ def test_variant106(self): def test_variant107(self): variant = 'NC_000019.10:g.50378562_50378565delinsTC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001256647.1:c.222_228delinsTC' in list(results.keys()) @@ -3460,7 +3460,7 @@ def test_variant107(self): def test_variant108(self): variant = 'NC_000007.14:g.149779575_149779577delinsT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3484,7 +3484,7 @@ def test_variant108(self): def test_variant109(self): variant = 'NC_000007.14:g.149779575_149779577=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3508,7 +3508,7 @@ def test_variant109(self): def test_variant110(self): variant = 'NC_000007.14:g.149779576_149779578del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3532,7 +3532,7 @@ def test_variant110(self): def test_variant111(self): variant = 'NC_000007.14:g.149779577del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3556,7 +3556,7 @@ def test_variant111(self): def test_variant112(self): variant = 'NC_000007.14:g.149779573_149779579del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_198455.2:c.1114_1117del' in list(results.keys()) @@ -3580,7 +3580,7 @@ def test_variant112(self): def test_variant113(self): variant = 'NC_000007.14:g.149779573_149779579delinsCA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_198455.2:c.1114_1117delinsCA' in list(results.keys()) @@ -3604,7 +3604,7 @@ def test_variant113(self): def test_variant114(self): variant = 'NM_000088.3:c.590_591inv' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000088.3:c.590_591inv' in list(results.keys()) @@ -3628,7 +3628,7 @@ def test_variant114(self): def test_variant115(self): variant = 'NM_024989.3:c.1778_1779inv' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_024989.3:c.1778_1779inv' in list(results.keys()) @@ -3652,7 +3652,7 @@ def test_variant115(self): def test_variant116(self): variant = 'NM_032815.3:c.555_556inv' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3675,7 +3675,7 @@ def test_variant116(self): def test_variant117(self): variant = 'NM_006138.4:c.3_4inv' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_006138.4:c.3_4inv' in list(results.keys()) @@ -3698,7 +3698,7 @@ def test_variant117(self): def test_variant118(self): variant = 'NM_000038.5:c.3927_3928delAAinsTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3722,7 +3722,7 @@ def test_variant118(self): def test_variant119(self): variant = 'NM_001034853.1:c.2847_2848delAGinsCT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3746,7 +3746,7 @@ def test_variant119(self): def test_variant120(self): variant = 'NM_000088.3:c.4392_*2inv' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000088.3:c.4394_4395inv' in list(results.keys()) @@ -3770,7 +3770,7 @@ def test_variant120(self): def test_variant121(self): variant = 'NM_000088.3:c.4392_*5inv' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3794,7 +3794,7 @@ def test_variant121(self): def test_variant122(self): variant = 'NM_000088.3:c.4390_*7inv' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3818,7 +3818,7 @@ def test_variant122(self): def test_variant123(self): variant = 'NM_005732.3:c.2923-5insT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -3842,7 +3842,7 @@ def test_variant123(self): def test_variant124(self): variant = 'NM_198283.1(EYS):c.*743120C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -3866,7 +3866,7 @@ def test_variant124(self): def test_variant125(self): variant = 'NM_133379.4(TTN):c.*265+26591C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -3890,7 +3890,7 @@ def test_variant125(self): def test_variant126(self): variant = 'NM_000088.3:c.589-2_589-1AG>G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3914,7 +3914,7 @@ def test_variant126(self): def test_variant127(self): variant = 'NM_000088.3:c.642+1_642+2delGTinsG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3938,7 +3938,7 @@ def test_variant127(self): def test_variant128(self): variant = 'NM_004415.3:c.1-1insA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -3962,7 +3962,7 @@ def test_variant128(self): def test_variant129(self): variant = 'NM_004415.3:c.-1_1insA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -3986,7 +3986,7 @@ def test_variant129(self): def test_variant130(self): variant = 'NM_000273.2:c.1-5028_253del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -4010,7 +4010,7 @@ def test_variant130(self): def test_variant131(self): variant = 'NM_002929.2:c.1006C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -4034,7 +4034,7 @@ def test_variant131(self): def test_variant132(self): variant = 'NR_125367.1:n.167+18165G>A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -4058,7 +4058,7 @@ def test_variant132(self): def test_variant133(self): variant = 'NM_006005.3:c.3071_3073delinsTTA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -4082,7 +4082,7 @@ def test_variant133(self): def test_variant134(self): variant = 'NM_000089.3:n.1504_1506del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -4106,7 +4106,7 @@ def test_variant134(self): def test_variant135(self): variant = 'NC_012920.1:m.1011C>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -4130,7 +4130,7 @@ def test_variant135(self): def test_variant136(self): variant = 'NC_000006.11:g.90403795G=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -4171,7 +4171,7 @@ def test_variant136(self): def test_variant137(self): variant = '1-169519049-T-.' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000130.4:c.1602del' in list(results.keys()) @@ -4212,7 +4212,7 @@ def test_variant137(self): def test_variant138(self): variant = 'NC_000005.9:g.35058667_35058668AG=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001204317.1:c.856-9155_856-9154=' in list(results.keys()) @@ -4355,7 +4355,7 @@ def test_variant138(self): def test_variant139(self): variant = 'NM_000251.1:c.1296_1348del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -4379,7 +4379,7 @@ def test_variant139(self): def test_variant140(self): variant = 'NM_000088.3:c.2023_2028del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -4403,7 +4403,7 @@ def test_variant140(self): def test_variant141(self): variant = 'NM_000088.3:c.2024_2028+1del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -4427,7 +4427,7 @@ def test_variant141(self): def test_variant142(self): variant = 'ENST00000450616.1:n.31+1G>C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -4451,7 +4451,7 @@ def test_variant142(self): def test_variant143(self): variant = 'ENST00000491747:c.5071A>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -4475,7 +4475,7 @@ def test_variant143(self): def test_variant144(self): variant = 'NM_000088.3:c.589G>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -4499,7 +4499,7 @@ def test_variant144(self): def test_variant145(self): variant = 'NG_007400.1:g.8638G>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -4523,7 +4523,7 @@ def test_variant145(self): def test_variant146(self): variant = 'LRG_1:g.8638G>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -4547,7 +4547,7 @@ def test_variant146(self): def test_variant147(self): variant = 'LRG_1t1:c.589G>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -4571,7 +4571,7 @@ def test_variant147(self): def test_variant148(self): variant = 'chr16:g.15832508_15832509delinsAC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_002474.2:c.3034_3035inv' in list(results.keys()) @@ -4646,7 +4646,7 @@ def test_variant148(self): def test_variant149(self): variant = 'NG_012386.1:g.24048dupG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001162427.1:c.210+1615dup' in list(results.keys()) @@ -4721,7 +4721,7 @@ def test_variant149(self): def test_variant150(self): variant = 'NM_033517.1:c.1307_1309delCGA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -4745,7 +4745,7 @@ def test_variant150(self): def test_variant151(self): variant = 'HG1311_PATCH-33720-CCGA-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -4769,7 +4769,7 @@ def test_variant151(self): def test_variant152(self): variant = '2-73675227-TCTC-TCTCCTC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_015120.4:c.1573_1579=' in list(results.keys()) @@ -4793,7 +4793,7 @@ def test_variant152(self): def test_variant153(self): variant = '2-73675227-TC-TC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_015120.4:c.1577_1579del' in list(results.keys()) @@ -4817,7 +4817,7 @@ def test_variant153(self): def test_variant154(self): variant = '3-14561627-AG-AGG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001080423.3:c.1016_1020=' in list(results.keys()) @@ -4858,7 +4858,7 @@ def test_variant154(self): def test_variant155(self): variant = '3-14561630-CC-CC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001080423.3:c.1020del' in list(results.keys()) @@ -4899,7 +4899,7 @@ def test_variant155(self): def test_variant156(self): variant = '6-90403795-G-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -4940,7 +4940,7 @@ def test_variant156(self): def test_variant157(self): variant = '6-90403795-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_014611.2:c.9879C>T' in list(results.keys()) @@ -4981,7 +4981,7 @@ def test_variant157(self): def test_variant158(self): variant = '6-32012992-CG-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -5056,7 +5056,7 @@ def test_variant158(self): def test_variant159(self): variant = '17-48275363-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -5080,7 +5080,7 @@ def test_variant159(self): def test_variant160(self): variant = '17-48275364-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -5104,7 +5104,7 @@ def test_variant160(self): def test_variant161(self): variant = '17-48275359-GGA-TCC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000088.3:c.591_593inv' in list(results.keys()) @@ -5128,7 +5128,7 @@ def test_variant161(self): def test_variant162(self): variant = '7-94039128-CTTG-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -5152,7 +5152,7 @@ def test_variant162(self): def test_variant163(self): variant = '9-135800972-AC-ACC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001162427.1:c.210+1615dup' in list(results.keys()) @@ -5227,7 +5227,7 @@ def test_variant163(self): def test_variant164(self): variant = '1-43212925-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -5285,7 +5285,7 @@ def test_variant164(self): def test_variant165(self): variant = 'HG987_PATCH-355171-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001194958.2:c.20C>A' in list(results.keys()) @@ -5309,7 +5309,7 @@ def test_variant165(self): def test_variant166(self): variant = '20-43252915-T-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000022.3:c.534A>G' in list(results.keys()) @@ -5401,7 +5401,7 @@ def test_variant166(self): def test_variant167(self): variant = '1-216219781-A-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -5425,7 +5425,7 @@ def test_variant167(self): def test_variant168(self): variant = '2-209113113-G-A,C,T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_005896.3:c.394C>G' in list(results.keys()) @@ -5636,7 +5636,7 @@ def test_variant168(self): def test_variant169(self): variant = 'NC_000005.9:g.35058665_35058666CA=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001204314.1:c.*6525_*6526=' in list(results.keys()) @@ -5779,7 +5779,7 @@ def test_variant169(self): def test_variant170(self): variant = 'NC_000002.11:g.73675227_73675229delTCTinsTCTCTC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -5803,7 +5803,7 @@ def test_variant170(self): def test_variant171(self): variant = 'NM_000828.4:c.-2dupG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -5827,7 +5827,7 @@ def test_variant171(self): def test_variant172(self): variant = 'X-122318386-A-AGG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_007325.4:c.-2dup' in list(results.keys()) @@ -5885,7 +5885,7 @@ def test_variant172(self): def test_variant173(self): variant = 'NM_000828.4:c.-2G>T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -5909,7 +5909,7 @@ def test_variant173(self): def test_variant174(self): variant = 'NM_000828.4:c.-2G=' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000828.4:c.-2G=' in list(results.keys()) @@ -5933,7 +5933,7 @@ def test_variant174(self): def test_variant175(self): variant = 'X-122318386-A-AT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -5991,7 +5991,7 @@ def test_variant175(self): def test_variant176(self): variant = 'NM_000828.4:c.-2_-1insT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -6015,7 +6015,7 @@ def test_variant176(self): def test_variant177(self): variant = 'NM_000828.4:c.-3_-2insT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000828.4:c.-3_-2insT' in list(results.keys()) @@ -6039,7 +6039,7 @@ def test_variant177(self): def test_variant178(self): variant = 'NM_000828.4:c.-2delGinsTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -6063,7 +6063,7 @@ def test_variant178(self): def test_variant179(self): variant = 'NM_000828.4:c.-2_-1delGCinsTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -6087,7 +6087,7 @@ def test_variant179(self): def test_variant180(self): variant = 'NM_000828.4:c.-3_-2delAGinsTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000828.4:c.-3_-2delinsTT' in list(results.keys()) @@ -6111,7 +6111,7 @@ def test_variant180(self): def test_variant181(self): variant = '15-72105929-C-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_014249.3:c.951dup' in list(results.keys()) @@ -6186,7 +6186,7 @@ def test_variant181(self): def test_variant182(self): variant = '15-72105928-AC-ATT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_014249.2:c.947_948insTT' in list(results.keys()) @@ -6261,7 +6261,7 @@ def test_variant182(self): def test_variant183(self): variant = '15-72105928-ACC-ATT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_014249.2:c.947_948insTT' in list(results.keys()) @@ -6336,7 +6336,7 @@ def test_variant183(self): def test_variant184(self): variant = '15-72105927-GACC-GTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_014249.3:c.947delinsTT' in list(results.keys()) @@ -6411,7 +6411,7 @@ def test_variant184(self): def test_variant185(self): variant = '19-41123093-A-AG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -6469,7 +6469,7 @@ def test_variant185(self): def test_variant186(self): variant = '19-41123093-A-AT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_003573.2:c.3123G>T' in list(results.keys()) @@ -6527,7 +6527,7 @@ def test_variant186(self): def test_variant187(self): variant = '19-41123093-AG-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001042544.1:c.3235_3236del' in list(results.keys()) @@ -6585,7 +6585,7 @@ def test_variant187(self): def test_variant188(self): variant = '19-41123093-AG-AG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001042545.1:c.3035del' in list(results.keys()) @@ -6643,7 +6643,7 @@ def test_variant188(self): def test_variant189(self): variant = 'NM_012309.4:c.913-5058G>A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -6667,7 +6667,7 @@ def test_variant189(self): def test_variant190(self): variant = 'LRG_199t1:c.2376[G>C];[G>C]' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -6691,7 +6691,7 @@ def test_variant190(self): def test_variant191(self): variant = 'LRG_199t1:c.[2376G>C];[3103del]' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_004006.2:c.3103del' in list(results.keys()) @@ -6732,7 +6732,7 @@ def test_variant191(self): def test_variant192(self): variant = 'LRG_199t1:c.[4358_4359del;4361_4372del]' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -6774,7 +6774,7 @@ def test_variant192(self): def test_variant193(self): variant = 'LRG_199t1:c.2376G>C(;)3103del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_004006.2:c.3103del' in list(results.keys()) @@ -6815,7 +6815,7 @@ def test_variant193(self): def test_variant194(self): variant = 'LRG_199t1:c.2376[G>C];[(G>C)]' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -6839,7 +6839,7 @@ def test_variant194(self): def test_variant195(self): variant = 'LRG_199t1:c.[2376G>C];[?]' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -6863,7 +6863,7 @@ def test_variant195(self): def test_variant196(self): variant = 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_004006.2:c.476T=' in list(results.keys()) @@ -6921,7 +6921,7 @@ def test_variant196(self): def test_variant197(self): variant = 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_004006.2:c.1408del' in list(results.keys()) @@ -6996,7 +6996,7 @@ def test_variant197(self): def test_variant198(self): variant = 'LRG_199t1:c.[976-20T>A;976-17_976-1dup]' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'validation_warning_1' in list(results.keys()) @@ -7020,7 +7020,7 @@ def test_variant198(self): def test_variant199(self): variant = '1-5935162-A-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_015102.3:c.2818-2T>A' in list(results.keys()) @@ -7112,7 +7112,7 @@ def test_variant199(self): def test_variant200(self): variant = '1-12065948-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -7153,7 +7153,7 @@ def test_variant200(self): def test_variant201(self): variant = '1-46655125-CTCAC-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001290129.1:c.1829+5_1829+8del' in list(results.keys()) @@ -7228,7 +7228,7 @@ def test_variant201(self): def test_variant202(self): variant = '1-68912523-TGAGCCAGAG-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000329.2:c.106_114del' in list(results.keys()) @@ -7252,7 +7252,7 @@ def test_variant202(self): def test_variant203(self): variant = '1-68912526-GCCAGAG-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000329.2:c.109_114del' in list(results.keys()) @@ -7276,7 +7276,7 @@ def test_variant203(self): def test_variant204(self): variant = '1-109817590-G-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -7300,7 +7300,7 @@ def test_variant204(self): def test_variant205(self): variant = '1-145597475-GAAGT-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -7358,7 +7358,7 @@ def test_variant205(self): def test_variant206(self): variant = '1-153791300-CTG-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -7399,7 +7399,7 @@ def test_variant206(self): def test_variant207(self): variant = '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_005572.3:c.711_734delinsCCCC' in list(results.keys()) @@ -7542,7 +7542,7 @@ def test_variant207(self): def test_variant208(self): variant = '1-156108541-G-GG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_170707.3:c.1961dup' in list(results.keys()) @@ -7634,7 +7634,7 @@ def test_variant208(self): def test_variant209(self): variant = '1-161279695-T-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -7692,7 +7692,7 @@ def test_variant209(self): def test_variant210(self): variant = '1-169519049-T-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -7716,7 +7716,7 @@ def test_variant210(self): def test_variant211(self): variant = '1-226125468-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_003240.4:c.774C>T' in list(results.keys()) @@ -7791,7 +7791,7 @@ def test_variant211(self): def test_variant212(self): variant = '10-89623035-CGCA-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -7815,7 +7815,7 @@ def test_variant212(self): def test_variant213(self): variant = '11-62457852-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NR_037946.1:n.3896G>T' in list(results.keys()) @@ -7924,7 +7924,7 @@ def test_variant213(self): def test_variant214(self): variant = '11-108178710-A-AT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001351834.1:c.5761_5762insT' in list(results.keys()) @@ -7965,7 +7965,7 @@ def test_variant214(self): def test_variant215(self): variant = '11-111735981-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001352419.1:c.-108-7C>T' in list(results.keys()) @@ -8312,7 +8312,7 @@ def test_variant215(self): def test_variant216(self): variant = '12-11023080-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -8336,7 +8336,7 @@ def test_variant216(self): def test_variant217(self): variant = '12-22018712-TC-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_020297.3:c.2199-1302del' in list(results.keys()) @@ -8411,7 +8411,7 @@ def test_variant217(self): def test_variant218(self): variant = '12-52912946-T-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -8435,7 +8435,7 @@ def test_variant218(self): def test_variant219(self): variant = '12-103234292-TC-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001354304.1:c.1200del' in list(results.keys()) @@ -8493,7 +8493,7 @@ def test_variant219(self): def test_variant220(self): variant = '12-103311124-T-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001354304.1:c.-95-121A>G' in list(results.keys()) @@ -8551,7 +8551,7 @@ def test_variant220(self): def test_variant221(self): variant = '12-111064166-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001319681.1:c.-366-1G>A' in list(results.keys()) @@ -8728,7 +8728,7 @@ def test_variant221(self): def test_variant222(self): variant = '12-123738430-CA-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001194995.1:c.210del' in list(results.keys()) @@ -8786,7 +8786,7 @@ def test_variant222(self): def test_variant223(self): variant = '13-31789169-CT-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -8810,7 +8810,7 @@ def test_variant223(self): def test_variant224(self): variant = '14-62187287-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NR_144368.1:n.214-3552C>T' in list(results.keys()) @@ -8885,7 +8885,7 @@ def test_variant224(self): def test_variant225(self): variant = '14-62188231-TT-GA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NR_144368.1:n.214-4497_214-4496delinsTC' in list(results.keys()) @@ -8960,7 +8960,7 @@ def test_variant225(self): def test_variant226(self): variant = '14-63174827-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_139318.3:c.2366G>T' in list(results.keys()) @@ -9035,7 +9035,7 @@ def test_variant226(self): def test_variant227(self): variant = '15-42680000-CA-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000070.2:c.550del' in list(results.keys()) @@ -9093,7 +9093,7 @@ def test_variant227(self): def test_variant228(self): variant = '15-42680000-CA-CAA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_024344.1:c.550dup' in list(results.keys()) @@ -9151,7 +9151,7 @@ def test_variant228(self): def test_variant229(self): variant = '15-42703179-T-TTCA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_173088.1:c.825_826insTCA' in list(results.keys()) @@ -9260,7 +9260,7 @@ def test_variant229(self): def test_variant230(self): variant = '15-42703179-TAG-TTCATCT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_024344.1:c.2344_2345delinsTCATCT' in list(results.keys()) @@ -9369,7 +9369,7 @@ def test_variant230(self): def test_variant231(self): variant = '15-48782203-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000138.4:c.2927G>A' in list(results.keys()) @@ -9393,7 +9393,7 @@ def test_variant231(self): def test_variant232(self): variant = '15-72105929-CC-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_014249.2:c.946_949=' in list(results.keys()) @@ -9468,7 +9468,7 @@ def test_variant232(self): def test_variant233(self): variant = '15-89873415-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_002693.2:c.752C>T' in list(results.keys()) @@ -9509,7 +9509,7 @@ def test_variant233(self): def test_variant234(self): variant = '16-2103394-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000548.3:c.277C>T' in list(results.keys()) @@ -9720,7 +9720,7 @@ def test_variant234(self): def test_variant235(self): variant = '16-3779300-C-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -9761,7 +9761,7 @@ def test_variant235(self): def test_variant236(self): variant = '16-5128843-C-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001330504.1:c.493C>G' in list(results.keys()) @@ -9802,7 +9802,7 @@ def test_variant236(self): def test_variant237(self): variant = '16-74808559-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_024306.4:c.95G>A' in list(results.keys()) @@ -9826,7 +9826,7 @@ def test_variant237(self): def test_variant238(self): variant = '16-89574804-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_003119.3:c.-22C>A' in list(results.keys()) @@ -9884,7 +9884,7 @@ def test_variant238(self): def test_variant239(self): variant = '16-89574826-A-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_003119.2:c.1A>C' in list(results.keys()) @@ -9976,7 +9976,7 @@ def test_variant239(self): def test_variant240(self): variant = '16-89574914-G-GT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001363850.1:c.90dup' in list(results.keys()) @@ -10068,7 +10068,7 @@ def test_variant240(self): def test_variant241(self): variant = '16-89574916-C-CGTC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_199367.2:c.89_91dup' in list(results.keys()) @@ -10160,7 +10160,7 @@ def test_variant241(self): def test_variant242(self): variant = '16-89575009-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_199367.2:c.183+1G>A' in list(results.keys()) @@ -10252,7 +10252,7 @@ def test_variant242(self): def test_variant243(self): variant = '16-89575040-C-A,CA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_199367.1:c.183+32_183+33insA' in list(results.keys()) @@ -10429,7 +10429,7 @@ def test_variant243(self): def test_variant244(self): variant = '16-89576896-A-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_199367.2:c.184-2A>C' in list(results.keys()) @@ -10521,7 +10521,7 @@ def test_variant244(self): def test_variant245(self): variant = '16-89576930-T-TA,TT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_003119.3:c.216dup' in list(results.keys()) @@ -10698,7 +10698,7 @@ def test_variant245(self): def test_variant246(self): variant = '16-89576931-G-GTG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_199367.1:c.216_217dup' in list(results.keys()) @@ -10790,7 +10790,7 @@ def test_variant246(self): def test_variant247(self): variant = '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_199367.1:c.1046_1071del' in list(results.keys()) @@ -10882,7 +10882,7 @@ def test_variant247(self): def test_variant248(self): variant = '16-89613064-AGGAGAGGCG-AT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001363850.1:c.1450-1_1457delinsT' in list(results.keys()) @@ -10940,7 +10940,7 @@ def test_variant248(self): def test_variant249(self): variant = '16-89613069-AGGCGGGAGA-AT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_003119.2:c.1454_1462delinsT' in list(results.keys()) @@ -10998,7 +10998,7 @@ def test_variant249(self): def test_variant250(self): variant = '16-89613145-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001363850.1:c.1529C>T' in list(results.keys()) @@ -11056,7 +11056,7 @@ def test_variant250(self): def test_variant251(self): variant = '17-7578194-GCAC-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001276695.1:c.535_537del' in list(results.keys()) @@ -11318,7 +11318,7 @@ def test_variant251(self): def test_variant252(self): variant = '17-7578523-T-TG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001276760.1:c.289dup' in list(results.keys()) @@ -11580,7 +11580,7 @@ def test_variant252(self): def test_variant253(self): variant = '17-17119692-A-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_144997.6:c.1300+2T>G' in list(results.keys()) @@ -11672,7 +11672,7 @@ def test_variant253(self): def test_variant254(self): variant = '17-41197588-GGACA-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_007294.3:c.*103_*106del' in list(results.keys()) @@ -11781,7 +11781,7 @@ def test_variant254(self): def test_variant255(self): variant = '17-41256884-C-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_007299.3:c.301+1G>C' in list(results.keys()) @@ -11890,7 +11890,7 @@ def test_variant255(self): def test_variant256(self): variant = '17-42991428-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001363846.1:c.490G>T' in list(results.keys()) @@ -11965,7 +11965,7 @@ def test_variant256(self): def test_variant257(self): variant = '17-48252809-A-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NR_135553.1:n.1022A>T' in list(results.keys()) @@ -12057,7 +12057,7 @@ def test_variant257(self): def test_variant258(self): variant = '17-62022709-G-GTC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -12081,7 +12081,7 @@ def test_variant258(self): def test_variant259(self): variant = '17-62022711-C-CT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -12105,7 +12105,7 @@ def test_variant259(self): def test_variant260(self): variant = '17-62023005-G-GGC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -12129,7 +12129,7 @@ def test_variant260(self): def test_variant261(self): variant = '17-62023006-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -12153,7 +12153,7 @@ def test_variant261(self): def test_variant262(self): variant = '17-62034787-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -12177,7 +12177,7 @@ def test_variant262(self): def test_variant263(self): variant = '18-24128261-GTCCTCC-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001351443.1:c.-16+941_-16+946del' in list(results.keys()) @@ -12303,7 +12303,7 @@ def test_variant263(self): def test_variant264(self): variant = '19-15291774-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -12327,7 +12327,7 @@ def test_variant264(self): def test_variant265(self): variant = '19-15311794-A-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'intergenic' @@ -12351,7 +12351,7 @@ def test_variant265(self): def test_variant266(self): variant = '19-39076592-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -12392,7 +12392,7 @@ def test_variant266(self): def test_variant267(self): variant = '2-50149352-T-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001330086.1:c.4245A>G' in list(results.keys()) @@ -12841,7 +12841,7 @@ def test_variant267(self): def test_variant268(self): variant = '2-50847195-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001330096.1:c.1201C>T' in list(results.keys()) @@ -13137,7 +13137,7 @@ def test_variant268(self): def test_variant269(self): variant = '2-71825797-C-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001130976.1:c.3582C>G' in list(results.keys()) @@ -13382,7 +13382,7 @@ def test_variant269(self): def test_variant270(self): variant = '2-166179712-G-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_021007.2:c.1718G>C' in list(results.keys()) @@ -13440,7 +13440,7 @@ def test_variant270(self): def test_variant271(self): variant = '2-166183371-A-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_021007.2:c.2026A>G' in list(results.keys()) @@ -13498,7 +13498,7 @@ def test_variant271(self): def test_variant272(self): variant = '2-166929889-GTCCAGGTCCT-GAC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001353951.1:c.233_242delinsGT' in list(results.keys()) @@ -13845,7 +13845,7 @@ def test_variant272(self): def test_variant273(self): variant = '2-166929891-CCAGGTCCT-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NR_148667.1:n.638_645del' in list(results.keys()) @@ -14192,7 +14192,7 @@ def test_variant273(self): def test_variant274(self): variant = '2-179393504-G-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001256850.1:c.102051C>A' in list(results.keys()) @@ -14369,7 +14369,7 @@ def test_variant274(self): def test_variant275(self): variant = '2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -14393,7 +14393,7 @@ def test_variant275(self): def test_variant276(self): variant = '2-201950249-G-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -14434,7 +14434,7 @@ def test_variant276(self): def test_variant277(self): variant = '2-238268730-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_004369.3:c.6282+1G>T' in list(results.keys()) @@ -14492,7 +14492,7 @@ def test_variant277(self): def test_variant278(self): variant = '21-43897396-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_080860.2:c.727+5G>A' in list(results.keys()) @@ -14550,7 +14550,7 @@ def test_variant278(self): def test_variant279(self): variant = '22-30064360-G-GCGACGC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000268.3:c.924_925insCGACGC' in list(results.keys()) @@ -14727,7 +14727,7 @@ def test_variant279(self): def test_variant280(self): variant = '3-10188187-TGTCCCGATAG-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_198156.2:c.341-3280_341-3271del' in list(results.keys()) @@ -14785,7 +14785,7 @@ def test_variant280(self): def test_variant281(self): variant = '3-50402127-T-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001005505.2:c.3408A>C' in list(results.keys()) @@ -14928,7 +14928,7 @@ def test_variant281(self): def test_variant282(self): variant = '3-50402890-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NR_111913.1:n.126G>A' in list(results.keys()) @@ -15105,7 +15105,7 @@ def test_variant282(self): def test_variant283(self): variant = '3-57851007-AG-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_007159.4:c.1135+565del' in list(results.keys()) @@ -15231,7 +15231,7 @@ def test_variant283(self): def test_variant284(self): variant = '3-122003832-G-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001178065.1:c.3061C=' in list(results.keys()) @@ -15272,7 +15272,7 @@ def test_variant284(self): def test_variant285(self): variant = '4-153332910-C-CAGG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001349798.1:c.45_46insCCT' in list(results.keys()) @@ -15347,7 +15347,7 @@ def test_variant285(self): def test_variant286(self): variant = '5-1295183-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'intergenic' @@ -15371,7 +15371,7 @@ def test_variant286(self): def test_variant287(self): variant = '5-77396835-TTTC-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_003664.4:c.2409_2411del' in list(results.keys()) @@ -15429,7 +15429,7 @@ def test_variant287(self): def test_variant288(self): variant = '5-118811422-GGTGA-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000414.3:c.302+3_302+6del' in list(results.keys()) @@ -15538,7 +15538,7 @@ def test_variant288(self): def test_variant289(self): variant = '5-118811422-GGTGAG-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001292028.1:c.-110+1_-110+5del' in list(results.keys()) @@ -15647,7 +15647,7 @@ def test_variant289(self): def test_variant290(self): variant = '5-131705587-CG-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -15705,7 +15705,7 @@ def test_variant290(self): def test_variant291(self): variant = '5-148406482-T-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -15729,7 +15729,7 @@ def test_variant291(self): def test_variant292(self): variant = '6-110036337-T-TCAG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_014845.5:c.123_124insCAG' in list(results.keys()) @@ -15753,7 +15753,7 @@ def test_variant292(self): def test_variant293(self): variant = '6-110036337-TGAT-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_014845.5:c.124_126del' in list(results.keys()) @@ -15777,7 +15777,7 @@ def test_variant293(self): def test_variant294(self): variant = '6-152651802-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -15818,7 +15818,7 @@ def test_variant294(self): def test_variant295(self): variant = '6-152737643-C-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -15859,7 +15859,7 @@ def test_variant295(self): def test_variant296(self): variant = '7-6026775-T-C' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001322012.1:c.688A>G' in list(results.keys()) @@ -16155,7 +16155,7 @@ def test_variant296(self): def test_variant297(self): variant = '7-55242465-GGAATTAAGAGAAGCA-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001346900.1:c.2077_2091del' in list(results.keys()) @@ -16281,7 +16281,7 @@ def test_variant297(self): def test_variant298(self): variant = '7-55248992-T-TTCCAGGAAGCCT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_005228.3:c.2284-5_2290dup' in list(results.keys()) @@ -16424,7 +16424,7 @@ def test_variant298(self): def test_variant299(self): variant = '7-75932111-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001540.4:c.82C>A' in list(results.keys()) @@ -16465,7 +16465,7 @@ def test_variant299(self): def test_variant300(self): variant = '7-91652178-A-AAAC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -16506,7 +16506,7 @@ def test_variant300(self): def test_variant301(self): variant = '7-117199644-ATCT-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -16547,7 +16547,7 @@ def test_variant301(self): def test_variant302(self): variant = '7-140453136-AC-CT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NR_148928.1:n.2896_2897delinsAG' in list(results.keys()) @@ -16622,7 +16622,7 @@ def test_variant302(self): def test_variant303(self): variant = '7-140453136-A-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001354609.1:c.1799T>A' in list(results.keys()) @@ -16697,7 +16697,7 @@ def test_variant303(self): def test_variant304(self): variant = '7-140453137-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NR_148928.1:n.2896G>A' in list(results.keys()) @@ -16772,7 +16772,7 @@ def test_variant304(self): def test_variant305(self): variant = '7-143013488-A-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -16813,7 +16813,7 @@ def test_variant305(self): def test_variant306(self): variant = '7-143018934-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NR_046453.1:n.776G>A' in list(results.keys()) @@ -16854,7 +16854,7 @@ def test_variant306(self): def test_variant307(self): variant = '7-143048771-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -16895,7 +16895,7 @@ def test_variant307(self): def test_variant308(self): variant = '8-1871951-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_014629.3:c.2399C>T' in list(results.keys()) @@ -16970,7 +16970,7 @@ def test_variant308(self): def test_variant309(self): variant = '9-13112056-T-TG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001261407.1:c.5504dup' in list(results.keys()) @@ -17045,7 +17045,7 @@ def test_variant309(self): def test_variant310(self): variant = '9-21971208-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_058197.4:c.*74-1G>T' in list(results.keys()) @@ -17137,7 +17137,7 @@ def test_variant310(self): def test_variant311(self): variant = '9-35683240-T-TG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_001301227.1:c.773-3dup' in list(results.keys()) @@ -17212,7 +17212,7 @@ def test_variant311(self): def test_variant312(self): variant = '9-135796754-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000368.4:c.733C>T' in list(results.keys()) @@ -17287,7 +17287,7 @@ def test_variant312(self): def test_variant313(self): variant = 'HG536_PATCH-10391-AC-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17311,7 +17311,7 @@ def test_variant313(self): def test_variant314(self): variant = 'HG865_PATCH-33547-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NR_110766.1:n.833+969C>T' in list(results.keys()) @@ -17386,7 +17386,7 @@ def test_variant314(self): def test_variant315(self): variant = 'HG865_PATCH-569441-G-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17410,7 +17410,7 @@ def test_variant315(self): def test_variant316(self): variant = 'HG865_PATCH-574546-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17434,7 +17434,7 @@ def test_variant316(self): def test_variant317(self): variant = 'HSCHR1_1_CTG31-133178-TAG-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17458,7 +17458,7 @@ def test_variant317(self): def test_variant318(self): variant = 'HSCHR6_MHC_MANN_CTG1-3848158-T-G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17482,7 +17482,7 @@ def test_variant318(self): def test_variant319(self): variant = 'HSCHR6_MHC_MANN_CTG1-3851043-C-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17506,7 +17506,7 @@ def test_variant319(self): def test_variant320(self): variant = 'X-70443101-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17547,7 +17547,7 @@ def test_variant320(self): def test_variant321(self): variant = 'X-107845202-GACCACC-GACC,G' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_033380.2:c.2130_2135del' in list(results.keys()) @@ -17622,7 +17622,7 @@ def test_variant321(self): def test_variant322(self): variant = 'X-153296777-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_004992.3:c.502C>T' in list(results.keys()) @@ -17680,7 +17680,7 @@ def test_variant322(self): def test_variant323(self): variant = 'NM_198180.2:c.408_410delGTG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17704,7 +17704,7 @@ def test_variant323(self): def test_variant324(self): variant = 'NM_080877.2:c.1733_1735delinsTTT' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_080877.2:c.1733_1735delinsTTT' in list(results.keys()) @@ -17728,7 +17728,7 @@ def test_variant324(self): def test_variant325(self): variant = 'NM_080877.2:c.1735_1737delinsTGA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_080877.2:c.1735_1737delinsTGA' in list(results.keys()) @@ -17752,7 +17752,7 @@ def test_variant325(self): def test_variant326(self): variant = 'NM_080877.2:c.1735_1737delinsTAATTGTTC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17776,7 +17776,7 @@ def test_variant326(self): def test_variant327(self): variant = 'NM_080877.2:c.1737delinsATTGTTC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17800,7 +17800,7 @@ def test_variant327(self): def test_variant328(self): variant = 'NM_000088.3:c.4392_*2delinsAGAG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17824,7 +17824,7 @@ def test_variant328(self): def test_variant329(self): variant = 'NM_000088.3:c.589_591delinsAGAAGC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17848,7 +17848,7 @@ def test_variant329(self): def test_variant330(self): variant = 'NM_000885.5:c.*2536delinsAGAAAAATCA' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_000885.5:c.*2536delinsAGAAAAATCA' in list(results.keys()) @@ -17872,7 +17872,7 @@ def test_variant330(self): def test_variant331(self): variant = 'NM_002693.2:c.-186_-185delinsCC' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert 'NM_002693.2:c.-186_-185delinsCC' in list(results.keys()) @@ -17896,7 +17896,7 @@ def test_variant331(self): def test_variant332(self): variant = 'NG_009616.1:g.29052_29053insCTACATAG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17954,7 +17954,7 @@ def test_variant332(self): def test_variant333(self): variant = 'NM_000061.2:c.588_588+1insCTACATAG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' @@ -17978,7 +17978,7 @@ def test_variant333(self): def test_variant334(self): variant = 'NM_000061.2:c.588_589insCTACATAG' - results = self.vv.validate(variant, 'GRCh37', 'all') + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) assert results['flag'] == 'gene_variant' From c298305e95cf28c32e1a725ed51beef306dcbe43 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 15 May 2019 09:08:48 +0100 Subject: [PATCH 085/223] Done some final cleaningup and removal of print statements in validate function --- VariantValidator/modules/mappers.py | 60 ++-- VariantValidator/modules/valoutput.py | 7 +- VariantValidator/modules/variant.py | 1 + VariantValidator/modules/vvMixinCore.py | 425 +++++++----------------- VariantValidator/modules/vvMixinInit.py | 5 + 5 files changed, 150 insertions(+), 348 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index e2b22397..0568fe0c 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -1,8 +1,7 @@ import hgvs import re import copy -import time -import sys +import hgvs.exceptions from .vvLogging import logger from . import vvHGVS from .variant import Variant @@ -279,7 +278,7 @@ def transcripts_to_gene(variant, validator): ori = validator.tx_exons(tx_ac=tx_ac, alt_ac=genomic_ac, alt_aln_method=validator.alt_aln_method) plus = re.compile(r"\d\+\d") # finds digit + digit - minus = re.compile(r"\d\-\d") # finds digit - digit + minus = re.compile(r"\d-\d") # finds digit - digit if plus.search(input) or minus.search(input): if 'error' in str(to_g): @@ -395,7 +394,7 @@ def transcripts_to_gene(variant, validator): cck = True if minus.search(input): # Regular expression catches the start of the interval only based on .00-00 pattern - inv_start = re.compile(r"\.\d+\-\d") + inv_start = re.compile(r"\.\d+-\d") if inv_start.search(input): cck = True @@ -895,13 +894,14 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): warnings = '' rec_var = '' + gap_compensation = True # Multiple genomic variants # multi_gen_vars = [] - hgvs_coding = validator.hp.parse_hgvs_variant(str(tx_variant)) + variant.hgvs_coding = validator.hp.parse_hgvs_variant(str(tx_variant)) # Gap gene black list try: - gene_symbol = validator.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) + gene_symbol = validator.db.get_gene_symbol_from_transcriptID(variant.hgvs_coding.ac) except Exception: fn.exceptPass() else: @@ -910,13 +910,12 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): # Look for variants spanning introns try: - hgvs_coding = variant.hn.normalize(hgvs_coding) + hgvs_coding = variant.hn.normalize(variant.hgvs_coding) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) - if re.search('boundary', str(error)) or re.search('spanning', str(error)): + if 'boundary' in error or 'spanning' in error: gap_compensation = False - else: - pass + except hgvs.exceptions.HGVSError: fn.exceptPass() @@ -924,57 +923,40 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): logger.warning("gap_compensation_3 = " + str(gap_compensation)) multi_g = [] multi_list = [] - mapping_options = validator.hdp.get_tx_mapping_options(hgvs_coding.ac) + mapping_options = validator.hdp.get_tx_mapping_options(variant.hgvs_coding.ac) for alt_chr in mapping_options: - if (re.match('NC_', alt_chr[1]) or re.match('NT_', alt_chr[1]) or re.match('NW_', - alt_chr[1])) and \ + if ('NC_' in alt_chr[1] or 'NT_' in alt_chr[1] or 'NW_' in alt_chr[1]) and \ alt_chr[2] == validator.alt_aln_method: multi_list.append(alt_chr[1]) for alt_chr in multi_list: try: # Re set ori - ori = validator.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=alt_chr, - alt_aln_method=validator.alt_aln_method) - orientation = int(ori[0]['alt_strand']) - hgvs_alt_genomic = validator.myvm_t_to_g(hgvs_coding, alt_chr, variant.no_norm_evm, variant.hn) - # Set hgvs_genomic accordingly - hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) - - # genomic_possibilities - # 1. take the simple 3 pr normalized hgvs_genomic - # 2. Lock in hgvs_genomic at its most 5 prime position wrt genome - hgvs_genomic_possibilities = [] + ori = validator.tx_exons(tx_ac=variant.hgvs_coding.ac, alt_ac=alt_chr, + alt_aln_method=validator.alt_aln_method) + + hgvs_alt_genomic = validator.myvm_t_to_g(variant.hgvs_coding, alt_chr, variant.no_norm_evm, variant.hn) gap_mapper = gapped_mapping.GapMapper(variant, validator) # Loop out gap code under these circumstances! - if gap_compensation is True: - - hgvs_alt_genomic, hgvs_coding = gap_mapper.g_to_t_gap_compensation_version3(hgvs_alt_genomic, hgvs_coding, ori, alt_chr, rec_var) + if gap_compensation: + hgvs_alt_genomic, hgvs_coding = gap_mapper.g_to_t_gap_compensation_version3( + hgvs_alt_genomic, variant.hgvs_coding, ori, alt_chr, rec_var) + variant.hgvs_coding = hgvs_coding # Refresh the :g. variant multi_g.append(hgvs_alt_genomic) else: multi_g.append(hgvs_alt_genomic) - corrective_action_taken = 'false' # In this instance, the gap code has generally found an incomplete-alignment rather than a # truly gapped alignment. except KeyError: warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ - 'genomic reference sequence %s' % (hgvs_coding.ac, - alt_chr) - continue + 'genomic reference sequence %s' % (variant.hgvs_coding.ac, alt_chr) except hgvs.exceptions.HGVSError as e: logger.error(str(e)) logger.debug(str(e)) - continue - - if multi_g != []: - - multi_gen_vars = multi_g # '|'.join(multi_g) - else: - multi_gen_vars = [] - return multi_gen_vars, hgvs_coding + return multi_g diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index 3f8380ac..0ecad8ee 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -4,8 +4,8 @@ class ValOutput(object): - """This object will hold the all final, validated outputs and provide methods to return this into a number of formats, - with or without extra data""" + """This object will hold the all final, validated outputs (Variant objects) and provide methods to return this + into a number of formats, with or without meta data""" def __init__(self, outputlist, validator): self.output_list = outputlist @@ -70,6 +70,8 @@ def format_as_dict(self, with_meta=True): # https://pypi.org/project/pyliftover/ genomic_position_info = variant.primary_assembly_loci for g_p_key in list(genomic_position_info.keys()): + build_to = '' + build_from = '' # Identify the current build and hgvs_genomic descripsion if 'hg' in g_p_key: @@ -136,7 +138,6 @@ def add_meta(self): if os.environ.get("ADD_LOGS") == "True": logs = [] - logString = logger.getString() for l in logger.getString().split("\n"): logs.append(l) metadata["logs"] = logs diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index a73c5439..647d4db0 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -16,6 +16,7 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.quibble = quibble self.hgvs_formatted = None self.hgvs_genomic = None + self.hgvs_coding = None self.stashed = None self.trapped = None self.input_parses = None diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 7a0c7af0..ceed5de9 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -1,57 +1,16 @@ -''' -This module contains the main function for variant validator. It's added to the Validator object in the vvObjects file. -''' - import hgvs -import hgvs.parser -import hgvs.dataproviders.uta -import hgvs.dataproviders.seqfetcher -import hgvs.assemblymapper -import hgvs.variantmapper -import hgvs.sequencevariant -import hgvs.validator import hgvs.exceptions -import hgvs.location -import hgvs.posedit -import hgvs.edit import hgvs.normalizer - -# IMPORT PYTHON MODULES import re -import time -#import datetime import copy -import os import sys -from operator import itemgetter -#from pyliftover import LiftOver import traceback -#from configparser import ConfigParser - -#from Bio.Seq import Seq - -# Import variantanalyser and peripheral VV modules -#import ref_seq_type -#import external -#import output_formatter -#import variantanalyser +from hgvs.assemblymapper import AssemblyMapper from .vvLogging import logger -import hgvs from . import vvHGVS -#from variantanalyser import functions as va_func -#from variantanalyser import dbControls as va_dbCrl -#from variantanalyser import hgvs2vcf as vvHGVS -#from variantanalyser import batch as va_btch -#from variantanalyser import g_to_g as va_g2g -#from variantanalyser import supported_chromosome_builds as va_scb -#from variantanalyser.liftover import liftover as lift_over -from .vvLiftover import liftover as lift_over #??? - from . import vvFunctions as fn -from . import vvDatabase from . import vvChromosomes from . import vvMixinConverters -from .vvFunctions import VariantValidatorError from .variant import Variant from . import format_converters from . import use_checking @@ -61,38 +20,37 @@ class Mixin(vvMixinConverters.Mixin): - def validate(self, batch_variant, selected_assembly, select_transcripts, transcriptSet = "refseq"): - ''' + """ + This module contains the main function for variant validator. + It's added to the Validator object in the vvObjects file. + """ + + def validate(self, batch_variant, selected_assembly, select_transcripts, transcript_set="refseq"): + """ This is the main validator function. :param batch_variant: A string containing the variant to be validated :param selected_assembly: The version of the genome assembly to use. :param select_transcripts: Can be an array of different transcripts, or 'all' Selecting multiple transcripts will lead to a multiple variant outputs. - :param transcriptSet: 'refseq' or 'ensembl'. Currently only 'refseq' is supported + :param transcript_set: 'refseq' or 'ensembl'. Currently only 'refseq' is supported :return: - ''' + """ logger.info(batch_variant + ' : ' + selected_assembly) - if transcriptSet == "refseq": - alt_aln_method = 'splign' - elif transcriptSet == "ensembl": - alt_aln_method = 'genebuild' + if transcript_set == "refseq": + self.alt_aln_method = 'splign' + elif transcript_set == "ensembl": + self.alt_aln_method = 'genebuild' logger.warning("Ensembl is currently not supported") raise Exception("Ensembl is currently not supported") else: - raise Exception("The transcriptSet variable '%s' is invalid, it must be 'refseq' or 'ensembl'" % transcriptSet) + raise Exception("The transcriptSet variable '%s' is invalid, it must be 'refseq' or 'ensembl'" % + transcript_set) - # Take start time - start_time = time.time() - - # Set pre defined variables - # SeqFetcher - # sf = hgvs.dataproviders.seqfetcher.SeqFetcher() - primary_assembly=None + primary_assembly = None self.selected_assembly = selected_assembly self.select_transcripts = select_transcripts - self.alt_aln_method = alt_aln_method try: # Validation @@ -103,20 +61,15 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr select_transcripts_dict_plus_version = {} if select_transcripts != 'all': select_transcripts_list = select_transcripts.split('|') - for id in select_transcripts_list: - id = id.strip() - if re.match('LRG', id): - id = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID(id) - if id == 'none': + for trans_id in select_transcripts_list: + trans_id = trans_id.strip() + if 'LRG' in trans_id: + trans_id = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID(trans_id) + if trans_id == 'none': continue - select_transcripts_dict_plus_version[id] = '' - id = id.split('.')[0] - select_transcripts_dict[id] = '' - # Set up gene list dictionary - input_genes = {} - - # Remove genes if transcripts selected - # if select_transcripts != 'all': + select_transcripts_dict_plus_version[trans_id] = '' + trans_id = trans_id.split('.')[0] + select_transcripts_dict[trans_id] = '' # split the batch queries into a list batch_queries = batch_variant.split('|') @@ -128,7 +81,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr query = Variant(queries) self.batch_list.append(query) - # Create List to carry batch data + # Create List to carry batch data output batch_out = [] # Enter the validation loop @@ -151,25 +104,15 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create Normalizers my_variant.hn = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=3, - alt_aln_method=alt_aln_method - ) - hn = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=3, - alt_aln_method=alt_aln_method - ) + cross_boundaries=False, + shuffle_direction=3, + alt_aln_method=self.alt_aln_method + ) my_variant.reverse_normalizer = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=5, - alt_aln_method=alt_aln_method - ) - reverse_normalizer = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=5, - alt_aln_method=alt_aln_method - ) + cross_boundaries=False, + shuffle_direction=5, + alt_aln_method=self.alt_aln_method + ) # This will be used to order the final output if not my_variant.order: ordering = ordering + 1 @@ -179,7 +122,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: # Note, ID is not touched. It is always the input variant description. # Quibble will be altered but id will not if type = g. - input = my_variant.quibble logger.trace("Commenced validation of " + str(my_variant.quibble), my_variant) if not my_variant.is_ascii(): @@ -198,7 +140,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr my_variant.warnings += ': ' + caution logger.info(caution) - stash_input = copy.copy(input) # Set the primary_assembly if not my_variant.primary_assembly: if selected_assembly == 'hg19': @@ -218,17 +159,15 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: my_variant.primary_assembly = 'GRCh38' primary_assembly = 'GRCh38' - my_variant.warnings += ': Invalid genome build has been specified. Automap has selected the default build (GRCh38)' + my_variant.warnings += ': Invalid genome build has been specified. ' \ + 'Automap has selected the default build (GRCh38)' logger.warning( - 'Invalid genome build has been specified. Automap has selected the default build ' + my_variant.primary_assembly) + 'Invalid genome build has been specified. Automap has selected the ' + 'default build ' + my_variant.primary_assembly) else: primary_assembly = my_variant.primary_assembly logger.trace("Completed string formatting", my_variant) - # Set variables that batch will not use but are required - crossing = 'false' - boundary = 'false' - # VCF type 1 toskip = format_converters.vcf2hgvs_stage1(my_variant, self) if toskip: @@ -271,17 +210,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if toskip: continue - input = my_variant.quibble - - print("Original: %s" % my_variant.original) - print("Quibble: %s" % my_variant.quibble) - - caution = '' # INITIAL USER INPUT FORMATTING invalid = my_variant.format_quibble() if invalid: - if re.search(r'\w+\:[gcnmrp]', my_variant.quibble) and not re.search(r'\w+\:[gcnmrp]\.', my_variant.quibble): - error = 'Variant description ' + my_variant.quibble + ' lacks the . character between and in the expected pattern :.' + if re.search(r'\w+:[gcnmrp]', my_variant.quibble) and not \ + re.search(r'\w+:[gcnmrp]\.', my_variant.quibble): + error = 'Variant description ' + my_variant.quibble + ' lacks the . character between ' \ + ' and in the expected pattern :.' else: error = 'Variant description ' + my_variant.quibble + ' is not in an accepted format' my_variant.warnings += ': ' + error @@ -289,24 +224,11 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue formatted_variant = my_variant.quibble - input = my_variant.quibble stash_input = my_variant.quibble my_variant.stashed = stash_input format_type = my_variant.reftype - # Validator specific variables, note, not all will be necessary for batch, but keep to ensure that batch works - # vars = [] - # refseq_gene = '' - # relevant = '' - warning = '' - automap = 'false' - # vmapped = 'false' - # coords = 'false' - # ensembl_gene = 'false' hgnc_gene_info = 'false' - # issue_link = 'false' - # cr_available = 'false' - # rcmds_tab = 'false' logger.trace("Variant input formatted, proceeding to validate.", my_variant) @@ -318,8 +240,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning('Gene conversions currently unsupported') continue - # Primary check that hgvs will accept the variant - error = 'false' # Change RNA bases to upper case but nothing else if format_type == ":r.": formatted_variant = formatted_variant.upper() @@ -332,11 +252,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: input_parses = self.hp.parse_hgvs_variant(formatted_variant) - print(input_parses, input_parses.ac, type(input_parses.ac)) my_variant.hgvs_formatted = input_parses except hgvs.exceptions.HGVSError as e: my_variant.warnings += ': ' + str(e) - logger.warning(error) + logger.warning(str(e)) continue if 'LRG' in my_variant.hgvs_formatted.ac: @@ -345,14 +264,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr my_variant.hgvs_formatted.ac = my_variant.hgvs_formatted.ac.upper() if hasattr(my_variant.hgvs_formatted.posedit.edit, 'alt'): if my_variant.hgvs_formatted.posedit.edit.alt is not None: - my_variant.hgvs_formatted.posedit.edit.alt = my_variant.hgvs_formatted.posedit.edit.alt.upper() + my_variant.hgvs_formatted.posedit.edit.alt = \ + my_variant.hgvs_formatted.posedit.edit.alt.upper() if hasattr(my_variant.hgvs_formatted.posedit.edit, 'ref'): if my_variant.hgvs_formatted.posedit.edit.ref is not None: - my_variant.hgvs_formatted.posedit.edit.ref = my_variant.hgvs_formatted.posedit.edit.ref.upper() + my_variant.hgvs_formatted.posedit.edit.ref = \ + my_variant.hgvs_formatted.posedit.edit.ref.upper() formatted_variant = str(my_variant.hgvs_formatted) - input = str(my_variant.hgvs_formatted) - - assert formatted_variant == str(my_variant.hgvs_formatted) my_variant.set_quibble(str(my_variant.hgvs_formatted)) @@ -362,25 +280,25 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr trap_ens_in = str(my_variant.hgvs_formatted) sim_tx = self.hdp.get_similar_transcripts(my_variant.hgvs_formatted.ac) for line in sim_tx: - print(line) if line[2] and line[3] and line[4] and line[5] and line[6]: - print("RESET") my_variant.hgvs_formatted.ac = line[1] my_variant.set_quibble(str(my_variant.hgvs_formatted)) formatted_variant = my_variant.quibble break if my_variant.refsource == 'ENS': - error = 'Unable to map ' + my_variant.hgvs_formatted.ac + ' to an equivalent RefSeq transcript' + error = 'Unable to map ' + my_variant.hgvs_formatted.ac + \ + ' to an equivalent RefSeq transcript' my_variant.warnings += ': ' + error logger.warning(error) continue else: - my_variant.warnings += ': ' + str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + my_variant.quibble - logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + my_variant.quibble) + my_variant.warnings += ': ' + str(trap_ens_in) + ' automapped to equivalent ' \ + 'RefSeq transcript ' + my_variant.quibble + logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq ' + 'transcript ' + my_variant.quibble) logger.trace("HVGS acceptance test passed", my_variant) # Check whether supported genome build is requested for non g. descriptions - historic_assembly = 'false' mapable_assemblies = { 'GRCh37': True, 'GRCh38': True, @@ -394,44 +312,32 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # They initiate quickly, so no need to move them unnecessarily # Create easy variant mapper (over variant mapper) and splign locked evm - my_variant.evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, - assembly_name=primary_assembly, - alt_aln_method=alt_aln_method, - normalize=True, - replace_reference=True - ) - - evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, - assembly_name=primary_assembly, - alt_aln_method=alt_aln_method, - normalize=True, - replace_reference=True - ) + my_variant.evm = AssemblyMapper(self.hdp, + assembly_name=primary_assembly, + alt_aln_method=self.alt_aln_method, + normalize=True, + replace_reference=True + ) # Setup a reverse normalize instance and non-normalize evm - my_variant.no_norm_evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, - assembly_name=primary_assembly, - alt_aln_method=alt_aln_method, - normalize=False, - replace_reference=True - ) - no_norm_evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, - assembly_name=primary_assembly, - alt_aln_method=alt_aln_method, - normalize=False, - replace_reference=True - ) + my_variant.no_norm_evm = AssemblyMapper(self.hdp, + assembly_name=primary_assembly, + alt_aln_method=self.alt_aln_method, + normalize=False, + replace_reference=True + ) # Create a specific minimal evm with no normalizer and no replace_reference - my_variant.min_evm = hgvs.assemblymapper.AssemblyMapper(self.hdp, - assembly_name=primary_assembly, - alt_aln_method=alt_aln_method, - normalize=False, - replace_reference=False - ) + my_variant.min_evm = AssemblyMapper(self.hdp, + assembly_name=primary_assembly, + alt_aln_method=self.alt_aln_method, + normalize=False, + replace_reference=False + ) else: - error = 'Mapping of ' + formatted_variant + ' to genome assembly ' + primary_assembly + ' is not supported' + error = 'Mapping of ' + formatted_variant + ' to genome assembly ' + \ + primary_assembly + ' is not supported' my_variant.warnings += ': ' + error logger.warning(error) continue @@ -441,15 +347,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # ensures that end pos is not > start pos wrt 3' UTRs. # Also identifies some variants which span into the downstream sequence # i.e. out of bounds - astr = re.compile(r'\*') if '*' in str(my_variant.hgvs_formatted.posedit): input_parses_copy = copy.deepcopy(my_variant.hgvs_formatted) input_parses_copy.type = "c" # Map to n. position # Create easy variant mapper (over variant mapper) and splign locked evm try: - to_n = evm.c_to_n(input_parses_copy) - except hgvs.exceptions.HGVSError as e: + to_n = my_variant.evm.c_to_n(input_parses_copy) + except hgvs.exceptions.HGVSError: fn.exceptPass() else: if to_n.posedit.pos.end.base < to_n.posedit.pos.start.base: @@ -464,13 +369,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.warning(error) continue - assert formatted_variant == str(my_variant.hgvs_formatted) - # Catch missing version number in refseq - ref_type = re.compile(r"^N\w\w\d") is_version = re.compile(r"\d\.\d") - en_type = re.compile(r'^ENS') - lrg_type = re.compile(r'LRG') if my_variant.refsource == 'RefSeq' and not is_version.search(str(my_variant.hgvs_formatted)): error = 'RefSeq variant accession numbers MUST include a version number' my_variant.warnings += ': ' + str(error) @@ -481,9 +381,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if my_variant.refsource == 'LRG': format_converters.lrg_to_refseq(my_variant, self) - formatted_variant = my_variant.quibble - input = str(my_variant.hgvs_formatted) - stash_input = input logger.trace("LRG check for conversion to refseq completed", my_variant) # Additional Incorrectly input variant capture training @@ -493,17 +390,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue logger.trace("Passed 'common mistakes' catcher", my_variant) - assert formatted_variant == str(my_variant.hgvs_formatted) - # Primary validation of the input toskip = use_checking.structure_checks(my_variant, self) - print(toskip, my_variant.hgvs_formatted, my_variant.quibble) if toskip: continue logger.trace("Variant structure and contents searches passed", my_variant) - assert formatted_variant == str(my_variant.hgvs_formatted) - # Mitochondrial variants toskip = format_converters.mitochondrial(my_variant, self) if toskip: @@ -519,58 +411,33 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if toskip: continue - assert formatted_variant == str(my_variant.hgvs_formatted) - # COLLECT gene symbol, name and ACCESSION INFORMATION # Gene symbol if my_variant.reftype != ':g.': toskip = collect_info.get_transcript_info(my_variant, self) - print(toskip, my_variant.hgvs_formatted, my_variant.hgvs_genomic) if toskip: continue - assert formatted_variant == str(my_variant.hgvs_formatted) # Now start mapping from genome to transcripts - if my_variant.reftype == ':g.': toskip = mappers.gene_to_transcripts(my_variant, self) - print(toskip, my_variant.hgvs_formatted, my_variant.hgvs_genomic) if toskip: continue - assert formatted_variant == str(my_variant.hgvs_formatted) - # TYPE = :c. - if format_type == ':c.' or format_type == ':n.': - print('hgvs_formatted:', my_variant.hgvs_formatted) - print('input:', input) - print('trapped:', my_variant.trapped) - print('quibble:', my_variant.quibble) - print('formatted_variant', formatted_variant) - #print(my_variant.hgvs_formatted, my_variant.trapped, input) toskip = mappers.transcripts_to_gene(my_variant, self) - print(toskip, my_variant.hgvs_formatted) if toskip: - print("CARRYING ON") continue # Set the data my_variant.output_type_flag = 'gene' my_variant.description = hgnc_gene_info - # my_variant.coding = str(hgvs_coding) - # my_variant.genomic_r = str(hgvs_refseq) - # my_variant.genomic_g = str(hgvs_genomic) - # my_variant.protein = str(hgvs_protein) my_variant.primary_assembly = primary_assembly - # if gap_compensation is True: - # my_variant.test_stash_tx_left = test_stash_tx_left - # my_variant.test_stash_tx_right = test_stash_tx_right - # finish timing logger.traceEnd(my_variant) # Report errors to User and VV admin except KeyboardInterrupt: raise - except: + except Exception: my_variant.output_type_flag = 'error' error = 'Validation error' my_variant.warnings = str(error) @@ -580,7 +447,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr er = str('\n'.join(tbk)) logger.error(str(exc_type) + " " + str(exc_value)) logger.debug(er) - #debug raise # Outside the for loop @@ -593,29 +459,15 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if not variant.write: continue - # Blank VCF - # chr = '' - # pos = '' - # ref = '' - # alt = '' - - # Fromulate a json type response - dict_out = {} - - # Set gap compensation bool - gap_compensation = True - # warngins warnings = variant.warnings warnings = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warnings) warnings = re.sub('^: ', '', warnings) warnings = re.sub('::', ':', warnings) - # Submitted variant - submitted = variant.original - # Genomic sequence variation genomic_variant = variant.genomic_g + hgvs_genomic_variant = genomic_variant # genomic accession if genomic_variant != '': @@ -642,14 +494,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_lrg = copy.deepcopy(hgvs_refseqgene_variant) hgvs_lrg.ac = rsg_ac[0] lrg_variant = fn.valstr(hgvs_lrg) - if rsg_ac[1] == 'public': - pass - else: + if rsg_ac[1] != 'public': warnings = warnings + ': The current status of ' + str( hgvs_lrg.ac) + ' is pending therefore changes may be made to the LRG reference sequence' # Transcript sequence variation tx_variant = variant.coding + hgvs_transcript_variant = tx_variant if tx_variant != '': if '(' in tx_variant and ')' in tx_variant: tx_variant = tx_variant.split('(')[1] @@ -662,7 +513,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr transcript_accession = hgvs_transcript_variant.ac # Handle LRG - lrg_status = 'public' lrg_transcript = self.db.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) if lrg_transcript == 'none': lrg_transcript_variant = '' @@ -671,14 +521,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # instances we will be able to display the LRG_tx without being able to # display the LRG gene data - # if not re.search('RefSeqGene', refseqgene_variant) or refseqgene_variant != '': - # if hgvs_refseqgene_variant != 'RefSeqGene record not available' and hgvs_refseqgene_variant != 'false': try: hgvs_lrg_t = self.vm.g_to_t(hgvs_refseqgene_variant, transcript_accession) hgvs_lrg_t.ac = lrg_transcript lrg_transcript_variant = fn.valstr(hgvs_lrg_t) - except: - if hgvs_transcript_variant.posedit.pos.start.offset == 0 and hgvs_transcript_variant.posedit.pos.end.offset == 0: + except Exception: + if hgvs_transcript_variant.posedit.pos.start.offset == 0 and \ + hgvs_transcript_variant.posedit.pos.end.offset == 0: hgvs_lrg_t = copy.copy(hgvs_transcript_variant) hgvs_lrg_t.ac = lrg_transcript lrg_transcript_variant = fn.valstr(hgvs_lrg_t) @@ -698,29 +547,29 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSError as e: error = str(e) if 'intronic variant' in error: - genome_context_transcript_variant = genomic_accession + '(' + transcript_accession + '):c.' + str( - hgvs_transcript_variant.posedit) + genome_context_transcript_variant = genomic_accession + '(' + transcript_accession +\ + '):c.' + str(hgvs_transcript_variant.posedit) if refseqgene_variant != '': hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) refseqgene_accession = hgvs_refseqgene_variant.ac hgvs_coding_from_refseqgene = self.vm.g_to_t(hgvs_refseqgene_variant, - hgvs_transcript_variant.ac) + hgvs_transcript_variant.ac) hgvs_coding_from_refseqgene = fn.valstr(hgvs_coding_from_refseqgene) hgvs_coding_from_refseqgene = self.hp.parse_hgvs_variant(hgvs_coding_from_refseqgene) - RefSeqGene_context_transcript_variant = refseqgene_accession + '(' + transcript_accession + '):c.' + str( - hgvs_coding_from_refseqgene.posedit.pos) + str( - hgvs_coding_from_refseqgene.posedit.edit) + refseqgene_context_transcript_variant = refseqgene_accession + '(' + \ + transcript_accession + '):c.' + str(hgvs_coding_from_refseqgene.posedit.pos) + str( + hgvs_coding_from_refseqgene.posedit.edit) else: - RefSeqGene_context_transcript_variant = '' + refseqgene_context_transcript_variant = '' else: genome_context_transcript_variant = '' # transcript_variant - RefSeqGene_context_transcript_variant = '' + refseqgene_context_transcript_variant = '' else: genome_context_transcript_variant = '' # transcript_variant - RefSeqGene_context_transcript_variant = '' + refseqgene_context_transcript_variant = '' else: genome_context_transcript_variant = '' - RefSeqGene_context_transcript_variant = '' + refseqgene_context_transcript_variant = '' # Protein description predicted_protein_variant = variant.protein @@ -739,19 +588,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: gene_symbol = '' - # Transcript description - transcript_description = variant.description - - # Stashed variants - # if valid.test_stash_tx_left: - # test_stash_tx_left = valid.test_stash_tx_left - # if valid.test_stash_tx_right: - # test_stash_tx_right = valid.test_stash_tx_right - - # Multiple genomic variants - # multi_gen_vars = [] if tx_variant != '': - multi_gen_vars, hgvs_coding = mappers.final_tx_to_multiple_genomic(variant, self, tx_variant) + multi_gen_vars = mappers.final_tx_to_multiple_genomic(variant, self, tx_variant) else: # HGVS genomic in the absence of a transcript variant @@ -809,22 +647,22 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: if 'GRC' in build: alt_dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['grc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } + 'vcf': {'chr': vcf_dict['grc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } else: alt_dict = {build.lower(): {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } # Append alt_genomic_dicts.append(alt_dict) @@ -832,22 +670,21 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', variant.reverse_normalizer, self.sf) alt_dict = {'hg38': {'hgvs_genomic_description': fn.valstr(alt_gen_var), - 'vcf': {'chr': vcf_dict['ucsc_chr'], - 'pos': vcf_dict['pos'], - 'ref': vcf_dict['ref'], - 'alt': vcf_dict['alt'] - } - } - } + 'vcf': {'chr': vcf_dict['ucsc_chr'], + 'pos': vcf_dict['pos'], + 'ref': vcf_dict['ref'], + 'alt': vcf_dict['alt'] + } + } + } # Append alt_genomic_dicts.append(alt_dict) # Warn not directly mapped to specified genome build if genomic_accession != '': - caution = '' if primary_assembly.lower() not in list(primary_genomic_dicts.keys()): warnings = warnings + ': ' + str( - hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + \ + variant.hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + \ ': See alternative genomic loci or alternative genome builds for aligned genomic positions' warn_list = warnings.split(': ') @@ -877,33 +714,17 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr format_p = predicted_protein_variant format_p = re.sub(r'\(LRG_.+?\)', '', format_p) re_parse_protein = self.hp.parse_hgvs_variant(format_p) - re_parse_protein_singleAA = fn.single_letter_protein(re_parse_protein) - predicted_protein_variant_dict["slr"] = str(re_parse_protein_singleAA) + re_parse_protein_single_aa = fn.single_letter_protein(re_parse_protein) + predicted_protein_variant_dict["slr"] = str(re_parse_protein_single_aa) except hgvs.exceptions.HGVSParseError: fn.exceptPass() else: predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) - # # Populate the dictionary - # dict_out['submitted_variant'] = submitted - # dict_out['gene_symbol'] = gene_symbol - # dict_out['transcript_description'] = transcript_description - # dict_out['hgvs_transcript_variant'] = tx_variant - # dict_out['genome_context_intronic_sequence'] = genome_context_transcript_variant - # dict_out['refseqgene_context_intronic_sequence'] = RefSeqGene_context_transcript_variant - # dict_out['hgvs_refseqgene_variant'] = refseqgene_variant - # dict_out['hgvs_predicted_protein_consequence'] = predicted_protein_variant_dict - # dict_out['validation_warnings'] = warnings_out - # dict_out['hgvs_lrg_transcript_variant'] = lrg_transcript_variant - # dict_out['hgvs_lrg_variant'] = lrg_variant - # dict_out['alt_genomic_loci'] = alt_genomic_dicts - # dict_out['primary_assembly_loci'] = primary_genomic_dicts - # dict_out['reference_sequence_records'] = '' - variant.gene_symbol = gene_symbol variant.hgvs_transcript_variant = tx_variant variant.genome_context_intronic_sequence = genome_context_transcript_variant - variant.refseqgene_context_intronic_sequence = RefSeqGene_context_transcript_variant + variant.refseqgene_context_intronic_sequence = refseqgene_context_transcript_variant variant.hgvs_refseqgene_variant = refseqgene_variant variant.hgvs_predicted_protein_consequence = predicted_protein_variant_dict variant.validation_warnings = warnings_out @@ -922,22 +743,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Append to a list for return batch_out.append(variant) - print('Creating Output object') output = valoutput.ValOutput(batch_out, self) return output # Bug catcher except KeyboardInterrupt: raise - except BaseException as e: + except BaseException: # Debug mode exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - # tr = ''.join(traceback.format_stack()) - tbk = [str(exc_type), str(exc_value), str(te)] - er = '\n'.join(tbk) - # raise VariantValidatorError('Validation error') - # Return - # return logger.critical(str(exc_type) + " " + str(exc_value)) raise diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 43a43e19..51205ed1 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -173,6 +173,11 @@ def __init__(self): normalize=False, replace_reference=True ) + # Created during validate method + self.selected_assembly = None + self.select_transcripts = None + self.alt_aln_method = None + self.batch_list = [] def __del__(self): del self.db From 474c1739f3944b6a4fb6995a5f826d87e5c7b190 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 15 May 2019 10:35:55 +0100 Subject: [PATCH 086/223] Cleaned up format_converters and created new function to run the initial series of conversions --- VariantValidator/modules/format_converters.py | 159 ++++++++++++------ VariantValidator/modules/vvMixinCore.py | 41 +---- 2 files changed, 105 insertions(+), 95 deletions(-) diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index 06d11cf6..1bc3d97c 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -1,5 +1,5 @@ import re -import hgvs +import hgvs.exceptions import copy from .vvLogging import logger from .variant import Variant @@ -7,6 +7,52 @@ from . import vvFunctions as fn +def initial_format_conversions(variant, validator, select_transcripts_dict_plus_version): + # VCF type 1 + toskip = vcf2hgvs_stage1(variant, validator) + if toskip: + return True + + # API type non-HGVS + # e.g. Chr16:2099572TC>T + toskip = vcf2hgvs_stage2(variant, validator) + if toskip: + return True + + toskip = vcf2hgvs_stage3(variant, validator) + if toskip: + return True + + toskip = gene_symbol_catch(variant, validator, select_transcripts_dict_plus_version) + if toskip: + return True + + # NG_:c. or NC_:c. + toskip = refseq_catch(variant, validator, select_transcripts_dict_plus_version) + if toskip: + return True + + # Find not_sub type in input e.g. GGGG>G + toskip = vcf2hgvs_stage4(variant, validator) + if toskip: + return True + + toskip = indel_catching(variant, validator) + if toskip: + return True + + # Tackle compound variant descriptions NG or NC (NM_) i.e. correctly input NG/NC_(NM_):c. + intronic_converter(variant) + + # Extract variants from HGVS allele descriptions + # http://varnomen.hgvs.org/recommendations/DNA/variant/alleles/ + toskip = allele_parser(variant, validator) + if toskip: + return True + + return False + + def vcf2hgvs_stage1(variant, validator): """ VCF2HGVS stage 1. converts chr-pos-ref-alt into chr:posRef>Alt @@ -42,14 +88,14 @@ def vcf2hgvs_stage1(variant, validator): pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF. Validator will output both alternatives.') variant.write = False - input_A = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], 'del') - input_B = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) - queryA = Variant(variant.original, quibble=input_A, warnings=variant.warnings, - primary_assembly=variant.primary_assembly, order=variant.order) - queryB = Variant(variant.original, quibble=input_B, warnings=variant.warnings, - primary_assembly=variant.primary_assembly, order=variant.order) - validator.batch_list.append(queryA) - validator.batch_list.append(queryB) + input_a = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], 'del') + input_b = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) + query_a = Variant(variant.original, quibble=input_a, warnings=variant.warnings, + primary_assembly=variant.primary_assembly, order=variant.order) + query_b = Variant(variant.original, quibble=input_b, warnings=variant.warnings, + primary_assembly=variant.primary_assembly, order=variant.order) + validator.batch_list.append(query_a) + validator.batch_list.append(query_b) skipvar = True elif re.search(r'[-:]\d+[-:][-:][GATC]+', variant.quibble) or \ re.search(r'[-:]\d+[-:][.][-:][GATC]+', variant.quibble): @@ -61,7 +107,6 @@ def vcf2hgvs_stage1(variant, validator): # Extract primary_assembly if provided if re.match(r'GRCh3\d+-', variant.quibble) or re.match(r'hg\d+-', variant.quibble): in_list = variant.quibble.split('-') - selected_assembly = in_list[0] variant.quibble = '-'.join(in_list[1:]) pre_input = variant.quibble vcf_elements = pre_input.split('-') @@ -84,8 +129,8 @@ def vcf2hgvs_stage2(variant, validator): The reference sequence type is also assigned. """ skipvar = False - if re.search(r'\w+\:', variant.quibble) and not re.search(r'\w+\:[gcnmrp]\.', variant.quibble): - if re.search(r'\w+\:[gcnmrp]', variant.quibble) and not re.search(r'\w+\:[gcnmrp]\.', variant.quibble): + if re.search(r'\w+:', variant.quibble) and not re.search(r'\w+:[gcnmrp]\.', variant.quibble): + if re.search(r'\w+:[gcnmrp]', variant.quibble) and not re.search(r'\w+:[gcnmrp]\.', variant.quibble): # Missing dot pass else: @@ -96,7 +141,7 @@ def vcf2hgvs_stage2(variant, validator): variant.primary_assembly = 'GRCh38' input_list = variant.quibble.split(':') pos_ref_alt = str(input_list[1]) - positionAndEdit = input_list[1] + position_and_edit = input_list[1] if not re.match(r'N[CGTWMRP]_', variant.quibble) and not re.match(r'LRG_', variant.quibble): chr_num = str(input_list[0]) chr_num = chr_num.upper().strip() @@ -121,7 +166,7 @@ def vcf2hgvs_stage2(variant, validator): required_base = validator.sf.fetch_seq(accession, start_i=position - 1, end_i=position) ref = required_base + old_ref alt = required_base - positionAndEdit = str(position) + ref + '>' + alt + position_and_edit = str(position) + ref + '>' + alt elif 'ins' in variant.quibble: pos = re.match(r'\d+', pos_ref_alt) position = pos.group(0) @@ -131,7 +176,7 @@ def vcf2hgvs_stage2(variant, validator): required_base = validator.sf.fetch_seq(accession, start_i=position - 1, end_i=position) ref = required_base alt = required_base + old_alt - positionAndEdit = str(position) + ref + '>' + alt + position_and_edit = str(position) + ref + '>' + alt # Assign reference sequence type ref_type = validator.db.ref_type_assign(accession) if re.match('LRG_', accession): @@ -141,7 +186,7 @@ def vcf2hgvs_stage2(variant, validator): accession = validator.db.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) else: accession = accession - variant.quibble = str(accession) + ref_type + str(positionAndEdit) + variant.quibble = str(accession) + ref_type + str(position_and_edit) except: fn.exceptPass(variant) @@ -150,7 +195,7 @@ def vcf2hgvs_stage2(variant, validator): if re.search(r'[gcnmrp]\.', variant.quibble) and not re.search(r':[gcnmrp]\.', variant.quibble): error = 'Unable to identify a colon (:) in the variant description %s. A colon is required in HGVS variant ' \ 'descriptions to separate the reference accession from the reference type i.e. :. ' \ - 'e.g. :c.' % (variant.quibble) + 'e.g. :c.' % variant.quibble variant.warnings += ': ' + error logger.warning(error) skipvar = True @@ -191,7 +236,7 @@ def vcf2hgvs_stage3(variant, validator): query_a_symbol = input_list[0] is_it_a_gene = validator.db.get_hgnc_symbol(query_a_symbol) if is_it_a_gene == 'none': - positionAndEdit = input_list[1] + position_and_edit = input_list[1] chr_num = str(input_list[0]) chr_num = chr_num.upper().strip() if re.match('CHR', chr_num): @@ -201,7 +246,7 @@ def vcf2hgvs_stage3(variant, validator): variant.warnings += ': ' + chr_num + \ ' is not part of genome build ' + validator.selected_assembly skipvar = True - variant.quibble = str(accession) + ':' + str(positionAndEdit) + variant.quibble = str(accession) + ':' + str(position_and_edit) except Exception: fn.exceptPass(variant) @@ -220,7 +265,7 @@ def gene_symbol_catch(variant, validator, select_transcripts_dict_plus_version): boundaries etc of the alternative transcript variants may not be equivalent """ skipvar = False - if re.search(r'\w+\:[cn]\.', variant.quibble): + if re.search(r'\w+:[cn]\.', variant.quibble): try: pre_input = variant.quibble.split(':') query_a_symbol = pre_input[0] @@ -245,14 +290,14 @@ def gene_symbol_catch(variant, validator, select_transcripts_dict_plus_version): warnings=variant.warnings, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) - logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + query_a_symbol + ') in place of a valid reference sequence') else: variant.warnings += ': ' + 'HGVS variant nomenclature does not allow the use of a gene symbol ('\ + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + \ variant.quibble + ' and specify transcripts from the following: ' + \ 'select_transcripts=' + select_from_these_transcripts - logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + \ + logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + variant.quibble + ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts) @@ -270,12 +315,12 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version): descriptions """ skipvar = False - if re.search(r'\w+\:[cn]', variant.quibble): + if re.search(r'\w+:[cn]', variant.quibble): try: if variant.quibble.startswith('NG_'): - refSeqGeneID = variant.quibble.split(':')[0] + ref_seq_gene_id = variant.quibble.split(':')[0] tx_edit = variant.quibble.split(':')[1] - gene_symbol = validator.db.get_gene_symbol_from_refSeqGeneID(refSeqGeneID) + gene_symbol = validator.db.get_gene_symbol_from_refSeqGeneID(ref_seq_gene_id) if gene_symbol != 'none': uta_symbol = validator.db.get_uta_symbol(gene_symbol) available_transcripts = validator.hdp.get_tx_for_gene(uta_symbol) @@ -291,7 +336,7 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version): variant.warnings = 'NG_:c.PositionVariation descriptions should not be used unless a ' \ 'transcript reference sequence has also been provided e.g. ' \ 'NG_(NM_):c.PositionVariation' - refreshed_description = refSeqGeneID + '(' + transcript + ')' + ':' + tx_edit + refreshed_description = ref_seq_gene_id + '(' + transcript + ')' + ':' + tx_edit query = Variant(variant.original, quibble=refreshed_description, warnings=variant.warnings, primary_assembly=variant.primary_assembly, order=variant.order) @@ -303,8 +348,8 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version): else: variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. ' \ 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + \ - ' but also specify transcripts from the following: ' + 'select_transcripts='\ - + select_from_these_transcripts + ' but also specify transcripts from the following: ' + \ + 'select_transcripts=' + select_from_these_transcripts logger.warning('A transcript reference sequence has not been provided e.g. ' 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + ' but also ' 'specify transcripts from the following: select_transcripts=' + @@ -363,8 +408,8 @@ def vcf2hgvs_stage4(variant, validator): # Split description split_colon = not_sub.split(':') ref_ac = split_colon[0] - remainder = split_colon[1] - split_dot = remainder.split('.') + remainder1 = split_colon[1] + split_dot = remainder1.split('.') ref_type = split_dot[0] remainder = split_dot[1] posedit = remainder @@ -375,14 +420,13 @@ def vcf2hgvs_stage4(variant, validator): r = re.compile(r"([0-9]+)([GATCgatc]+)") try: m = r.search(remainder) - start = m.group(1) delete = m.group(2) starts = posedit.split(delete)[0] re_try = ref_ac + ':' + ref_type + '.' + starts + 'del' + delete[0] + 'ins' + insert hgvs_re_try = validator.hp.parse_hgvs_variant(re_try) hgvs_re_try.posedit.edit.ref = delete start_pos = str(hgvs_re_try.posedit.pos.start) - if re.search(r'\-', start_pos): + if '-' in start_pos: base, offset = start_pos.split('-') new_offset = 0 - int(offset) + (len(delete)) end_pos = int(base) @@ -390,7 +434,7 @@ def vcf2hgvs_stage4(variant, validator): hgvs_re_try.posedit.pos.end.offset = int(new_offset) - 1 not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert - elif re.search(r'\+', start_pos): + elif '+' in start_pos: base, offset = start_pos.split('+') end_pos = int(base) + (len(delete) - int(offset) - 1) new_offset = 0 + int(offset) + (len(delete) - 1) @@ -406,6 +450,7 @@ def vcf2hgvs_stage4(variant, validator): fn.exceptPass() not_delins = not_sub # Parse into hgvs object + hgvs_not_delins = None try: hgvs_not_delins = validator.hp.parse_hgvs_variant(not_delins) except hgvs.exceptions.HGVSError as e: @@ -416,7 +461,8 @@ def vcf2hgvs_stage4(variant, validator): alt_list = alts.split(',') # Assemble and re-submit for alt in alt_list: - variant.warnings = 'Multiple ALT sequences detected: auto-submitting all possible combinations' + variant.warnings = 'Multiple ALT sequences detected: ' \ + 'auto-submitting all possible combinations' variant.write = False refreshed_description = header + '>' + alt query = Variant(variant.original, quibble=refreshed_description, @@ -437,15 +483,13 @@ def vcf2hgvs_stage4(variant, validator): not_delins = str(variant.hn.normalize(hgvs_not_delins)) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('Normalization of intronic variants is not supported', error): + if 'Normalization of intronic variants is not supported' in error: not_delins = not_delins else: - issue_link = '' variant.warnings += ': ' + str(error) logger.warning(str(e)) skipvar = True # Create warning - caution = 'Variant description ' + variant.quibble + ' is not HGVS compliant' automap = variant.quibble + ' automapped to ' + not_delins variant.warnings += ': ' + automap # Change input to normalized variant @@ -533,8 +577,8 @@ def allele_parser(variant, validation): descriptions should be re-submitted by the user at the gene or genome level """ caution = '' - if (re.search(r':[gcnr].\[', variant.quibble) and re.search(r'\;', variant.quibble)) or ( - re.search(r':[gcrn].\d+\[', variant.quibble) and re.search(r'\;', variant.quibble)) or (re.search(r'\(\;\)', variant.quibble)): + if (re.search(r':[gcnr].\[', variant.quibble) and ';' in variant.quibble) or ( + re.search(r':[gcrn].\d+\[', variant.quibble) and ';' in variant.quibble) or ('(;)' in variant.quibble): # handle LRG inputs if re.match(r'^LRG', variant.quibble): if re.match(r'^LRG\d+', variant.quibble): @@ -544,30 +588,33 @@ def allele_parser(variant, validation): caution = string + ' updated to ' + reference if not re.match(r'^LRG_\d+', variant.quibble): pass - elif re.match(r'^LRG_\d+:g.', variant.quibble) or re.match(r'^LRG_\d+:p.', variant.quibble) or re.match(r'^LRG_\d+:c.', - variant.quibble) or re.match( - r'^LRG_\d+:n.', variant.quibble): + elif re.match(r'^LRG_\d+:g.', variant.quibble) or re.match(r'^LRG_\d+:p.', variant.quibble) \ + or re.match(r'^LRG_\d+:c.', variant.quibble) or re.match(r'^LRG_\d+:n.', variant.quibble): lrg_reference, variation = variant.quibble.split(':') refseqgene_reference = validation.db.get_RefSeqGeneID_from_lrgID(lrg_reference) if refseqgene_reference != 'none': variant.quibble = refseqgene_reference + ':' + variation if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + caution = lrg_reference + ':' + variation + ' automapped to ' + \ + refseqgene_reference + ':' + variation else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + \ + refseqgene_reference + ':' + variation variant.warnings += ': ' + str(caution) logger.warning(str(caution)) - elif re.match(r'^LRG_\d+t\d+:c.', variant.quibble) or re.match(r'^LRG_\d+t\d+:n.', variant.quibble) or re.match( - r'^LRG_\d+t\d+:p.', variant.quibble) or re.match(r'^LRG_\d+t\d+:g.', variant.quibble): + elif re.match(r'^LRG_\d+t\d+:c.', variant.quibble) or re.match(r'^LRG_\d+t\d+:n.', variant.quibble) or \ + re.match(r'^LRG_\d+t\d+:p.', variant.quibble) or re.match(r'^LRG_\d+t\d+:g.', variant.quibble): lrg_reference, variation = variant.quibble.split(':') refseqtranscript_reference = validation.db.get_RefSeqTranscriptID_from_lrgTranscriptID( lrg_reference) if refseqtranscript_reference != 'none': variant.quibble = refseqtranscript_reference + ':' + variation if caution == '': - caution = lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + caution = lrg_reference + ':' + variation + ' automapped to ' + \ + refseqtranscript_reference + ':' + variation else: - caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + refseqtranscript_reference + ':' + variation + caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + \ + refseqtranscript_reference + ':' + variation variant.warnings += ': ' + str(caution) logger.warning(str(caution)) else: @@ -579,16 +626,16 @@ def allele_parser(variant, validation): logger.resub('Automap has extracted possible variant descriptions, resubmitting') for allele in alleles: query = Variant(variant.original, quibble=allele, warnings=variant.warnings, write=True, - primary_assembly=variant.primary_assembly, order=variant.order) + primary_assembly=variant.primary_assembly, order=variant.order) validation.batch_list.append(query) variant.write = False return True except fn.alleleVariantError as e: - if re.search("Cannot validate sequence of an intronic variant", str(e)): + if "Cannot validate sequence of an intronic variant" in str(e): variant.warnings += ': ' + 'Intronic positions not supported for HGVS Allele descriptions' logger.warning('Intronic positions not supported for HGVS Allele descriptions') return True - elif re.search("No transcript definition for ", str(e)): + elif "No transcript definition for " in str(e): variant.warnings += ': ' + str(e) logger.warning(str(e)) return True @@ -649,7 +696,7 @@ def mitochondrial(variant, validator): variant.warnings += ': ' + error logger.warning(error) return True - except KeyError as e: + except KeyError: error = 'Currently unable to validate ' + hgvs_mito.ac + ' sequence variation' variant.warnings += ': ' + error logger.warning(error) @@ -671,6 +718,7 @@ def proteins(variant, validator): """Handle protein sequences""" if variant.reftype == ':p.': error = None + hgvs_object = None # Try to validate the variant try: hgvs_object = validator.hp.parse_hgvs_variant(variant.hgvs_formatted) @@ -689,8 +737,6 @@ def proteins(variant, validator): # RefSeq databases if validator.alt_aln_method != 'genebuild': # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID - # accession number - accession = hgvs_object.ac # Look for the accession in our database # Connect to database and send request # record = validator.entrez_efetch(db="nuccore", id=accession, rettype="gb", retmode="text") @@ -702,7 +748,8 @@ def proteins(variant, validator): else: error = str( hgvs_object) + ' is HGVS compliant and contains a valid reference amino acid description' - reason = 'Protein level variant descriptions are not fully supported due to redundancy in the genetic code' + reason = 'Protein level variant descriptions are not fully supported due to redundancy' \ + ' in the genetic code' variant.warnings += ': ' + reason + ': ' + error variant.protein = str(hgvs_object) logger.warning(reason + ": " + error) @@ -715,7 +762,7 @@ def rna(variant, validator): convert r, into c. """ if variant.reftype == ':r.': - hgvs_input = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) # Traps the hgvs variant of r. for further use + hgvs_input = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) # Change to coding variant variant.reftype = ':c.' # Change input to reflect! diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index ceed5de9..b0fd1000 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -168,45 +168,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr primary_assembly = my_variant.primary_assembly logger.trace("Completed string formatting", my_variant) - # VCF type 1 - toskip = format_converters.vcf2hgvs_stage1(my_variant, self) - if toskip: - continue - - # API type non-HGVS - # e.g. Chr16:2099572TC>T - toskip = format_converters.vcf2hgvs_stage2(my_variant, self) - if toskip: - continue - - toskip = format_converters.vcf2hgvs_stage3(my_variant, self) - if toskip: - continue - - toskip = format_converters.gene_symbol_catch(my_variant, self, select_transcripts_dict_plus_version) - if toskip: - continue - - # NG_:c. or NC_:c. - toskip = format_converters.refseq_catch(my_variant, self, select_transcripts_dict_plus_version) - if toskip: - continue - - # Find not_sub type in input e.g. GGGG>G - toskip = format_converters.vcf2hgvs_stage4(my_variant, self) - if toskip: - continue - - toskip = format_converters.indel_catching(my_variant, self) - if toskip: - continue - - # Tackle compound variant descriptions NG or NC (NM_) i.e. correctly input NG/NC_(NM_):c. - format_converters.intronic_converter(my_variant) - - # Extract variants from HGVS allele descriptions - # http://varnomen.hgvs.org/recommendations/DNA/variant/alleles/ - toskip = format_converters.allele_parser(my_variant, self) + toskip = format_converters.initial_format_conversions(my_variant, self, + select_transcripts_dict_plus_version) if toskip: continue From 51b6eaa2044477a5845bfed80badcece9b729cbd Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 15 May 2019 11:00:51 +0100 Subject: [PATCH 087/223] Renamed trapped attribute to pre_RNA_conversion --- VariantValidator/modules/mappers.py | 17 ++++++++--------- VariantValidator/modules/variant.py | 2 +- VariantValidator/modules/vvMixinCore.py | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 0568fe0c..f179d4a8 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -407,8 +407,7 @@ def transcripts_to_gene(variant, validator): # This should only ever hit coding and RNA variants if 'del' in formatted_variant: # RNA - looking at trapped variant which was saved before RNA converted to cDNA - #TODO: rename variant.trapped to variant.pre_RNA_conversion or something similar so it makes sense. - if ':r.' in variant.trapped: + if ':r.' in variant.pre_RNA_conversion: coding = validator.coding(formatted_variant, validator.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome @@ -433,7 +432,7 @@ def transcripts_to_gene(variant, validator): query.posedit = posedit query.type = 'r' post_var = str(query) - automap = variant.trapped + ' automapped to ' + str(post_var) + automap = variant.pre_RNA_conversion + ' automapped to ' + str(post_var) variant.warnings += ': ' + str(caution) + ': ' + str(automap) # Kill current line and append for re-submission @@ -481,7 +480,7 @@ def transcripts_to_gene(variant, validator): automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the ' \ 'selected transcript' # automapping of variant completed - automap = variant.trapped + ' automapped to ' + str(post_var) + automap = variant.pre_RNA_conversion + ' automapped to ' + str(post_var) variant.warnings += str(caution) + ': ' + str(automap) # Kill current line and append for re-submission @@ -494,7 +493,7 @@ def transcripts_to_gene(variant, validator): validator.batch_list.append(query) else: # del not in formatted_variant - if ':r.' in variant.trapped: + if ':r.' in variant.pre_RNA_conversion: coding = validator.coding(formatted_variant, validator.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome @@ -543,7 +542,7 @@ def transcripts_to_gene(variant, validator): caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' # automapping of variant completed - automap = str(variant.trapped) + ' automapped to ' + str(post_var) + automap = str(variant.pre_RNA_conversion) + ' automapped to ' + str(post_var) variant.warnings += ': ' + str(caution) + ': ' + str( automap) @@ -557,9 +556,9 @@ def transcripts_to_gene(variant, validator): validator.batch_list.append(query) # If cck not true - elif ':r.' in variant.trapped: + elif ':r.' in variant.pre_RNA_conversion: # set input hgvs object - hgvs_rna_input = validator.hp.parse_hgvs_variant(variant.trapped) # Traps the hgvs variant of r. for further use + hgvs_rna_input = validator.hp.parse_hgvs_variant(variant.pre_RNA_conversion) # Traps the hgvs variant of r. for further use inp = str(validator.hgvs_r_to_c(hgvs_rna_input)) # Regex if plus.search(input) or minus.search(input): @@ -599,7 +598,7 @@ def transcripts_to_gene(variant, validator): caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant ' \ 'nomenclature:' # automapping of variant completed - automap = variant.trapped + ' automapped to ' + output + automap = variant.pre_RNA_conversion + ' automapped to ' + output variant.warnings += ': ' + caution + ': ' + automap # Kill current line and append for re-submission diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 647d4db0..416f6ef4 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -18,7 +18,7 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.hgvs_genomic = None self.hgvs_coding = None self.stashed = None - self.trapped = None + self.pre_RNA_conversion = None self.input_parses = None self.warnings = warnings diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index b0fd1000..798ffff1 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -369,7 +369,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue trapped_input = str(my_variant.hgvs_formatted) - my_variant.trapped = trapped_input + my_variant.pre_RNA_conversion = trapped_input toskip = format_converters.rna(my_variant, self) if toskip: continue From 9e634cd9f05ef925cfc1a2602b0e77d44a5d8d4e Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 15 May 2019 11:38:27 +0100 Subject: [PATCH 088/223] Cleaned up mappers.py --- VariantValidator/modules/mappers.py | 239 ++++++++++++---------------- 1 file changed, 104 insertions(+), 135 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index f179d4a8..764cd1dd 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -22,7 +22,6 @@ def gene_to_transcripts(variant, validator): except KeyError: error = 'Reference sequence ' + variant.hgvs_genomic.ac + ' is either not supported or does not exist' if error != 'false': - reason = 'Invalid variant description' variant.warnings += ': ' + str(error) logger.warning(str(error)) return True @@ -38,12 +37,13 @@ def gene_to_transcripts(variant, validator): variant.hgvs_genomic = g_query # Collect rel_var - # rel_var is a keyworded list of relevant transcripts with associated coding variants + # rel_var is a key-worded list of relevant transcripts with associated coding variants """ Initial simple projection from the provided g. position all overlapping transcripts """ - rel_var = validator.relevant_transcripts(variant.hgvs_genomic, variant.evm, validator.alt_aln_method, variant.reverse_normalizer) + rel_var = validator.relevant_transcripts(variant.hgvs_genomic, variant.evm, validator.alt_aln_method, + variant.reverse_normalizer) # Double check rel_vars have not been missed when mapping from a RefSeqGene if len(rel_var) != 0 and 'NG_' in variant.hgvs_genomic.ac: @@ -51,12 +51,12 @@ def gene_to_transcripts(variant, validator): hgvs_coding_variant = validator.hp.parse_hgvs_variant(var) try: variant.hgvs_genomic = validator.myevm_t_to_g(hgvs_coding_variant, variant.no_norm_evm, - variant.primary_assembly, variant.hn) - except hgvs.exceptions.HGVSError as e: + variant.primary_assembly, variant.hn) + except hgvs.exceptions.HGVSError: try_rel_var = [] else: - try_rel_var = validator.relevant_transcripts(variant.hgvs_genomic, variant.evm, validator.alt_aln_method, - variant.reverse_normalizer) + try_rel_var = validator.relevant_transcripts(variant.hgvs_genomic, variant.evm, + validator.alt_aln_method, variant.reverse_normalizer) if len(try_rel_var) > len(rel_var): rel_var = try_rel_var break @@ -65,13 +65,14 @@ def gene_to_transcripts(variant, validator): # Tripple check this assumption by querying the gene position database if len(rel_var) == 0: - vcf_dict = vvHGVS.hgvs2vcf(variant.hgvs_genomic, variant.primary_assembly, variant.reverse_normalizer, validator.sf) + vcf_dict = vvHGVS.hgvs2vcf(variant.hgvs_genomic, variant.primary_assembly, variant.reverse_normalizer, + validator.sf) not_di = str(variant.hgvs_genomic.ac) + ':g.' + str(vcf_dict['pos']) + '_' + str( int(vcf_dict['pos']) + (len(vcf_dict['ref']) - 1)) + 'del' + vcf_dict['ref'] + 'ins' + \ - vcf_dict['alt'] + vcf_dict['alt'] hgvs_not_di = validator.hp.parse_hgvs_variant(not_di) rel_var = validator.relevant_transcripts(hgvs_not_di, variant.evm, validator.alt_aln_method, - variant.reverse_normalizer) + variant.reverse_normalizer) # list return statements """ @@ -90,17 +91,18 @@ def gene_to_transcripts(variant, validator): # Extract data if refseqgene_data['valid'] == 'true': - input = refseqgene_data['hgvs_genomic'] + genomic_input = refseqgene_data['hgvs_genomic'] # re_submit # Tag the line so that it is not written out - variant.warnings += ': ' + str(variant.hgvs_formatted) + ' automapped to genome position ' + str(input) - query = Variant(variant.original, quibble=input, warnings=variant.warnings, - primary_assembly=variant.primary_assembly, order=variant.order) + variant.warnings += ': ' + str(variant.hgvs_formatted) + ' automapped to genome position ' + \ + str(genomic_input) + query = Variant(variant.original, quibble=genomic_input, warnings=variant.warnings, + primary_assembly=variant.primary_assembly, order=variant.order) - coding = 'intergenic' validator.batch_list.append(query) else: - error = 'Mapping unavailable for RefSeqGene ' + str(variant.hgvs_formatted) + ' using alignment method = ' + validator.alt_aln_method + error = 'Mapping unavailable for RefSeqGene ' + str(variant.hgvs_formatted) + \ + ' using alignment method = ' + validator.alt_aln_method variant.warnings += ': ' + str(error) logger.warning(str(error)) return True @@ -120,7 +122,8 @@ def gene_to_transcripts(variant, validator): # Map to RefSeqGene if available refseqgene_data = validator.chr_to_rsg(variant.hgvs_genomic, variant.hn, validator.vr) rsg_data = '' - # Example {'gene': 'NTHL1', 'hgvs_refseqgene': 'NG_008412.1:g.3455_3464delCAAACACACA', 'valid': 'true'} + # Example {'gene': 'NTHL1', 'hgvs_refseqgene': 'NG_008412.1:g.3455_3464delCAAACACACA', + # 'valid': 'true'} for data in refseqgene_data: if data['valid'] == 'true': data['hgvs_refseqgene'] = validator.hp.parse_hgvs_variant(data['hgvs_refseqgene']) @@ -137,7 +140,8 @@ def gene_to_transcripts(variant, validator): logger.warning(str(error)) return True else: - error = 'Please ensure the requested chromosome version relates to a supported genome build. Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' + error = 'Please ensure the requested chromosome version relates to a supported genome build. ' \ + 'Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' variant.warnings += ': ' + str(error) logger.warning(str(error)) return True @@ -150,27 +154,29 @@ def gene_to_transcripts(variant, validator): data, nw_rel_var = gap_mapper.gapped_g_to_c(rel_var) - # Warn the user that the g. description is not valid - if data['gapped_alignment_warning'] != '': - if data['disparity_deletion_in'][0] == 'transcript': - corrective_action_taken = 'Automap has deleted ' + str( - data['disparity_deletion_in'][1]) + ' bp from chromosomal reference sequence ' + str( - variant.hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s)' + data['gapped_transcripts'] - if data['disparity_deletion_in'][0] == 'chromosome': - corrective_action_taken = 'Automap has added ' + str( - data['disparity_deletion_in'][1]) + ' bp to chromosomal reference sequence ' + str( - variant.hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s) ' + data['gapped_transcripts'] - - # Add additional data to the front of automap - if data['auto_info'] != '': - automap = data['auto_info'] + '\n' + 'false' + # # Warn the user that the g. description is not valid + # if data['gapped_alignment_warning'] != '': + # if data['disparity_deletion_in'][0] == 'transcript': + # corrective_action_taken = 'Automap has deleted ' + str( + # data['disparity_deletion_in'][1]) + ' bp from chromosomal reference sequence ' + str( + # variant.hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s)'\ + # + data['gapped_transcripts'] + # if data['disparity_deletion_in'][0] == 'chromosome': + # corrective_action_taken = 'Automap has added ' + str( + # data['disparity_deletion_in'][1]) + ' bp to chromosomal reference sequence ' + str( + # variant.hgvs_genomic.ac) + ' to ensure perfect alignment with transcript reference sequence(s) '\ + # + data['gapped_transcripts'] + # + # # Add additional data to the front of automap + # if data['auto_info'] != '': + # automap = data['auto_info'] + '\n' + 'false' rel_var = nw_rel_var # Set the values and append to batch_list for c_description in rel_var: query = Variant(variant.original, quibble=str(c_description), warnings=variant.warnings, - primary_assembly=variant.primary_assembly, order=variant.order) + primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) logger.warning("Continue reached when mapping transcript types to variants") # Call next description @@ -183,8 +189,6 @@ def transcripts_to_gene(variant, validator): # Flag for validation valid = False - boundary = 'false' - warning = '' caution = '' error = '' # Collect information for genomic level validation @@ -192,47 +196,32 @@ def transcripts_to_gene(variant, validator): tx_ac = obj.ac - input = str(variant.quibble) + quibble_input = str(variant.quibble) formatted_variant = str(variant.hgvs_formatted) # Do we keep it? if validator.select_transcripts != 'all': - if tx_ac in list(validator.select_transcripts_dict_plus_version.keys()): - pass - # If not get rid of it! - else: + if tx_ac not in list(validator.select_transcripts_dict_plus_version.keys()): # By marking it as Do Not Write and continuing through the validation loop variant.write = False return True - else: - pass - print(variant.hgvs_formatted) - print(variant.quibble) - # Set a cross_variant object - cross_variant = 'false' # Se rec_var to '' so it can be updated later rec_var = '' # First task is to get the genomic equivalent, and print useful error messages if it can't be found. try: to_g = validator.myevm_t_to_g(obj, variant.no_norm_evm, variant.primary_assembly, variant.hn) - print('Genomic:', to_g) genomic_ac = to_g.ac except hgvs.exceptions.HGVSDataNotAvailableError as e: if ('~' in str(e) and 'Alignment is incomplete' in str(e)) or "No relevant genomic mapping options" in str(e): # Unable to map the input variant onto a genomic position if '~' in str(e) and 'Alignment is incomplete' in str(e): - error_list = str(e).split('~')[:-1] - combos = [ - 'Full alignment data between the specified transcript reference sequence and all GRCh37 and GRCh38 ' - 'genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) ' - 'are not available: Consequently the input variant description cannot be fully validated and is ' - 'not supported: Use the Gene to Transcripts function to determine whether an updated transcript ' - 'reference sequence is available'] - # Partial alignment data is available for the following genomic reference sequences: '] - error = '; '.join(combos) - error = error.replace(': ;', ': ') + error = 'Full alignment data between the specified transcript reference sequence and all GRCh37 ' \ + 'and GRCh38 genomic reference sequences (including alternate chromosome assemblies, ' \ + 'patches and RefSeqGenes) are not available: Consequently the input variant description ' \ + 'cannot be fully validated and is not supported: Use the Gene to Transcripts function to ' \ + 'determine whether an updated transcript reference sequence is available' else: error = str(e) error = error + ': Consequently the input variant description cannot be fully validated and is not ' \ @@ -280,7 +269,7 @@ def transcripts_to_gene(variant, validator): plus = re.compile(r"\d\+\d") # finds digit + digit minus = re.compile(r"\d-\d") # finds digit - digit - if plus.search(input) or minus.search(input): + if plus.search(quibble_input) or minus.search(quibble_input): if 'error' in str(to_g): if validator.alt_aln_method != 'genebuild': error = "If the following error message does not address the issue and the problem persists please " \ @@ -308,9 +297,8 @@ def transcripts_to_gene(variant, validator): # Normalize was I believe to replace ref. Mapping does this anyway # to_g = variant.hn.normalize(to_g) formatted_variant = str(validator.myevm_g_to_t(variant.evm, to_g, tx_ac)) - tx_ac = '' - elif ':g.' in input: + elif ':g.' in quibble_input: if plus.search(formatted_variant) or minus.search(formatted_variant): to_g = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, variant.hn) if 'error' in str(to_g): @@ -339,17 +327,14 @@ def transcripts_to_gene(variant, validator): # Normalize was I believe to replace ref. Mapping does this anyway # to_g = hn.normalize(to_g) formatted_variant = str(validator.myevm_g_to_t(variant.evm, to_g, tx_ac)) - tx_ac = '' else: # Normalize the variant - error = 'false' try: h_variant = variant.hn.normalize(obj) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: - h_variant = obj formatted_variant = formatted_variant caution = 'This coding sequence variant description spans at least one intron' automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. ' \ @@ -359,24 +344,6 @@ def transcripts_to_gene(variant, validator): else: formatted_variant = str(h_variant) - # tx_ac = '' - # # Create a crosser (exon boundary crossed) variant - # crossed_variant = str(variant.evm._maybe_normalize(obj)) - # if formatted_variant == crossed_variant: - # cross_variant = 'false' - # else: - # hgvs_crossed_variant = variant.evm._maybe_normalize(obj) - # cross_variant = [ - # "Coding sequence allowing for exon boundary crossing (default = no crossing)", - # crossed_variant, hgvs_crossed_variant.ac] - # cr_available = 'true' - # - # # control of cross_variant - # if boundary == 'false': - # cross_variant = 'false' - - # Moved this forwards and removed the previous section as it doesn't seem to be used anywhere - error = validator.validateHGVS(formatted_variant) if error == 'false': valid = True @@ -387,15 +354,15 @@ def transcripts_to_gene(variant, validator): # Tackle the plus intronic offset cck = False - if plus.search(input): + if plus.search(quibble_input): # Regular expression catches the start of the interval only based on .00+00 pattern inv_start = re.compile(r"\.\d+\+\d") - if inv_start.search(input): + if inv_start.search(quibble_input): cck = True - if minus.search(input): + if minus.search(quibble_input): # Regular expression catches the start of the interval only based on .00-00 pattern inv_start = re.compile(r"\.\d+-\d") - if inv_start.search(input): + if inv_start.search(quibble_input): cck = True # COORDINATE CHECKER @@ -411,11 +378,12 @@ def transcripts_to_gene(variant, validator): coding = validator.coding(formatted_variant, validator.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly,variant.hn) + pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, + variant.hn) # genome back to C coordinates post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) - test = validator.hp.parse_hgvs_variant(input) + test = validator.hp.parse_hgvs_variant(quibble_input) if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ post_var.posedit.pos.end.base != test.posedit.pos.end.base: caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected ' \ @@ -453,7 +421,7 @@ def transcripts_to_gene(variant, validator): pre_var = validator.hp.parse_hgvs_variant(formatted_variant) try: pre_var = validator.myevm_t_to_g(pre_var, variant.no_norm_evm, variant.primary_assembly, - variant.hn) + variant.hn) except Exception as e: error = str(e) if error == 'expected from_start_i <= from_end_i': @@ -470,15 +438,12 @@ def transcripts_to_gene(variant, validator): variant.warnings += ': ' + str(error) logger.warning(str(error)) return True - query = post_var - test = validator.hp.parse_hgvs_variant(input) + test = validator.hp.parse_hgvs_variant(quibble_input) if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ post_var.posedit.pos.end.base != test.posedit.pos.end.base: caution = 'The entered coordinates do not agree with the intron/exon boundaries for the ' \ 'selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the ' \ - 'selected transcript' # automapping of variant completed automap = variant.pre_RNA_conversion + ' automapped to ' + str(post_var) variant.warnings += str(caution) + ': ' + str(automap) @@ -489,7 +454,8 @@ def transcripts_to_gene(variant, validator): # Set the values and append to batch_list hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) assert str(hgvs_vt) == str(post_var) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) else: # del not in formatted_variant @@ -497,11 +463,12 @@ def transcripts_to_gene(variant, validator): coding = validator.coding(formatted_variant, validator.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly,variant.hn) + pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, + variant.hn) # genome back to C coordinates post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) - test = validator.hp.parse_hgvs_variant(input) + test = validator.hp.parse_hgvs_variant(quibble_input) if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' @@ -515,7 +482,7 @@ def transcripts_to_gene(variant, validator): query.posedit = posedit query.type = 'r' post_var = str(query) - automap = input + ' automapped to ' + post_var + automap = quibble_input + ' automapped to ' + post_var variant.warnings += ': ' + str(caution) + ': ' + str( automap) @@ -525,22 +492,25 @@ def transcripts_to_gene(variant, validator): # Set the values and append to batch_list hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) assert str(hgvs_vt) == str(post_var) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) else: coding = validator.coding(formatted_variant, validator.hp) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome - pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly,variant.hn) + pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, + variant.hn) # genome back to C coordinates post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) - test = validator.hp.parse_hgvs_variant(input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' + test = validator.hp.parse_hgvs_variant(quibble_input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ + post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the ' \ + 'selected transcript:' # automapping of variant completed automap = str(variant.pre_RNA_conversion) + ' automapped to ' + str(post_var) variant.warnings += ': ' + str(caution) + ': ' + str( @@ -552,16 +522,18 @@ def transcripts_to_gene(variant, validator): # Set the values and append to batch_list hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) assert str(hgvs_vt) == str(post_var) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) # If cck not true elif ':r.' in variant.pre_RNA_conversion: # set input hgvs object - hgvs_rna_input = validator.hp.parse_hgvs_variant(variant.pre_RNA_conversion) # Traps the hgvs variant of r. for further use + # Traps the hgvs variant of r. for further use + hgvs_rna_input = validator.hp.parse_hgvs_variant(variant.pre_RNA_conversion) inp = str(validator.hgvs_r_to_c(hgvs_rna_input)) # Regex - if plus.search(input) or minus.search(input): + if plus.search(quibble_input) or minus.search(quibble_input): to_g = validator.genomic(inp, variant.no_norm_evm, variant.primary_assembly, variant.hn) if 'error' in str(to_g): error = "If the following error message does not address the issue and the problem persists " \ @@ -580,7 +552,7 @@ def transcripts_to_gene(variant, validator): hgvs_inp = validator.hp.parse_hgvs_variant(inp) try: hgvs_otp = variant.hn.normalize(hgvs_inp) - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: hgvs_otp = hgvs_inp # Set remaining variables @@ -595,7 +567,7 @@ def transcripts_to_gene(variant, validator): output = otp.replace(':c.', ':r.') # Apply coordinates test if query != test: - caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant ' \ + caution = 'The variant description ' + quibble_input + ' requires alteration to comply with HGVS variant ' \ 'nomenclature:' # automapping of variant completed automap = variant.pre_RNA_conversion + ' automapped to ' + output @@ -607,17 +579,15 @@ def transcripts_to_gene(variant, validator): # Set the values and append to batch_list hgvs_vt = validator.hp.parse_hgvs_variant(str(query)) assert str(hgvs_vt) == str(query) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) - elif ':g.' in input: - pass - - else: + elif ':g.' not in quibble_input: query = validator.hp.parse_hgvs_variant(formatted_variant) - test = validator.hp.parse_hgvs_variant(input) + test = validator.hp.parse_hgvs_variant(quibble_input) if query.posedit.pos != test.posedit.pos: - caution = 'The variant description ' + input + ' requires alteration to comply with HGVS variant ' \ + caution = 'The variant description ' + quibble_input + ' requires alteration to comply with HGVS variant ' \ 'nomenclature:' # automapping of variant completed automap = str(test) + ' automapped to ' + str(query) @@ -629,16 +599,18 @@ def transcripts_to_gene(variant, validator): # Set the values and append to batch_list hgvs_vt = validator.hp.parse_hgvs_variant(str(query)) assert str(hgvs_vt) == str(query) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, primary_assembly=variant.primary_assembly, order=variant.order) + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) # VALIDATION of intronic variants - pre_valid = validator.hp.parse_hgvs_variant(input) + pre_valid = validator.hp.parse_hgvs_variant(quibble_input) post_valid = validator.hp.parse_hgvs_variant(formatted_variant) # valid is false if the input contains a \d+\d, \d-\d or :g. if not valid: - genomic_validation = str(validator.genomic(input, variant.no_norm_evm, variant.primary_assembly, variant.hn)) + genomic_validation = str(validator.genomic(quibble_input, variant.no_norm_evm, variant.primary_assembly, + variant.hn)) if fn.valstr(pre_valid) != fn.valstr(post_valid): if variant.reftype != ':g.': if caution == '': @@ -648,15 +620,10 @@ def transcripts_to_gene(variant, validator): # Apply validation to intronic variant descriptions (should be valid but make sure) error = validator.validateHGVS(genomic_validation) - if error == 'false': - valid = True - else: + if error != 'false': variant.warnings += ': ' + error return True - assert valid is True - # If valid is False we won't reach this part, so I can remove the if condition - # v0.1a1 edit if fn.valstr(pre_valid) != fn.valstr(post_valid): if variant.reftype == ':g.': @@ -721,7 +688,7 @@ def transcripts_to_gene(variant, validator): # Get orientation of the gene wrt genome and a list of exons mapped to the genome ori = validator.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=reverse_normalized_hgvs_genomic.ac, - alt_aln_method=validator.alt_aln_method) + alt_aln_method=validator.alt_aln_method) # --- GAP MAPPING 2 --- # Loop out gap finding code under these circumstances! @@ -735,7 +702,7 @@ def transcripts_to_gene(variant, validator): recovered_rsg = [] for sequence in sequences_for_tx: - if re.match('^NG_', sequence[1]): + if sequence[1].startswith('NG_'): recovered_rsg.append(sequence[1]) recovered_rsg.sort() recovered_rsg.reverse() @@ -751,7 +718,7 @@ def transcripts_to_gene(variant, validator): # Normalize the RefSeqGene Variant to the correct position try: hgvs_refseq = variant.hn.normalize(hgvs_refseq) - except Exception as e: + except Exception: # if re.search('insertion length must be 1', error): hgvs_refseq = 'RefSeqGene record not available' else: @@ -772,7 +739,7 @@ def transcripts_to_gene(variant, validator): # Gene orientation wrt genome ori = validator.tx_exons(tx_ac=hgvs_coding.ac, alt_ac=hgvs_genomic.ac, - alt_aln_method=validator.alt_aln_method) + alt_aln_method=validator.alt_aln_method) ori = int(ori[0]['alt_strand']) # Look for normalized variant options that do not match hgvs_coding @@ -798,7 +765,7 @@ def transcripts_to_gene(variant, validator): rng = variant.hn.normalize(query_genomic) try: c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: + except hgvs.exceptions.HGVSInvalidIntervalError: c_for_p = fn.valstr(hgvs_seek_var) try: # Predicted effect on protein @@ -813,7 +780,8 @@ def transcripts_to_gene(variant, validator): except NotImplementedError: fn.exceptPass() elif ori == 1: - # Double check protein position by reverse_norm genomic, and normalize back to c. for normalize or not to normalize issue + # Double check protein position by reverse_norm genomic, and normalize back to c. + # for normalize or not to normalize issue rng = variant.reverse_normalizer.normalize(query_genomic) try: # Diagram where - = intron and E = Exon @@ -828,7 +796,7 @@ def transcripts_to_gene(variant, validator): c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) try: variant.hn.normalize(c_for_p) - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: fn.exceptPass() else: # hgvs_protein = va_func.protein(str(c_for_p), variant.evm, hp) @@ -870,16 +838,17 @@ def transcripts_to_gene(variant, validator): # Updated reference sequence except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('does not agree with reference sequence', str(error)): + if 'does not agree with reference sequence' in error: match = re.findall(r'\(([GATC]+)\)', error) new_ref = match[1] hgvs_updated.posedit.edit.ref = new_ref validator.vr.validate(hgvs_updated) updated_transcript_variant = hgvs_updated - variant.warnings += ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + str( - updated_transcript_variant) + ' MUST be fully validated prior to use in reports: select_variants=' + fn.valstr( - updated_transcript_variant) + variant.warnings += ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + \ + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + \ + str(updated_transcript_variant) + ' MUST be fully validated prior to use in reports: ' \ + 'select_variants=' + fn.valstr(updated_transcript_variant) variant.coding = str(hgvs_coding) variant.genomic_r = str(hgvs_refseq) @@ -909,7 +878,7 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): # Look for variants spanning introns try: - hgvs_coding = variant.hn.normalize(variant.hgvs_coding) + variant.hn.normalize(variant.hgvs_coding) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'boundary' in error or 'spanning' in error: From 3ba630833abb176a0330c65c5b9459d389742fd6 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 15 May 2019 11:52:09 +0100 Subject: [PATCH 089/223] Moved get_transcript_info and deleted unused file --- VariantValidator/modules/collect_info.py | 152 ----------------------- VariantValidator/modules/vvMixinCore.py | 128 ++++++++++++++++++- 2 files changed, 126 insertions(+), 154 deletions(-) delete mode 100644 VariantValidator/modules/collect_info.py diff --git a/VariantValidator/modules/collect_info.py b/VariantValidator/modules/collect_info.py deleted file mode 100644 index 2376f079..00000000 --- a/VariantValidator/modules/collect_info.py +++ /dev/null @@ -1,152 +0,0 @@ -import re -import hgvs -from .vvLogging import logger -from . import vvHGVS -from .variant import Variant -from . import vvChromosomes -from . import vvFunctions as fn -from . import gapped_mapping - - -def collect_transcript_info(variant, validator): - """Collect transcript information for the variant""" - - if variant.reftype == ':g.': - toskip = from_genomic(variant, validator) - else: - toskip = from_non_genomic(variant, validator) - - return toskip - - -def get_transcript_info(variant, validator): - """Collect transcript information from a non-genomic variant""" - - hgvs_vt = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) - try: - tx_id_info = validator.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: - error = 'Please inform UTA admin of the following error: ' + str(e) - reason = "VariantValidator cannot recover information for transcript " + str( - hgvs_vt.ac) + ' because it is not available in the Universal Transcript Archive' - variant.warnings += ': ' + str(reason) - logger.warning(str(reason) + ": " + str(error)) - return True - else: - # Get hgnc Gene name from command - hgnc = tx_id_info[6] - - # ACCESS THE GENE INFORMATION RECORDS ON THE UTA DATABASE - # Refseq accession - tx_for_gene = validator.tx_for_gene(hgnc, validator.hdp) - refseq_ac = validator.ng_extract(tx_for_gene) - - # Get accurate transcript descriptions from the relevant databases - # RefSeq databases - if validator.alt_aln_method != 'genebuild': - # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID - # accession number - hgvs_object = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) - accession = hgvs_object.ac - # Look for the accession in our database - # Connect to database and send request - entry = validator.db.in_entries(accession, 'transcript_info') - - # Analyse the returned data and take the necessary actions - # If the error key exists - if 'error' in entry: - # Open a hgvs exception log file in append mode - error = entry['description'] - variant.warnings += ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error) + ": A Database error occurred, please contact admin") - return True - - # If the accession key is found - elif 'accession' in entry: - # If the current entry is too old - if entry['expiry'] == 'true': - try: - entry = validator.db.data_add(accession=accession, validator=validator) - except hgvs.exceptions.HGVSError: - error = 'Transcript %s is not currently supported' % (accession) - variant.warnings += ': ' + error - logger.warning(error) - return True - except Exception: - error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - variant.warnings += ': ' + error - logger.warning(error) - return True - variant.description = entry['description'] - else: - variant.description = entry['description'] - # If the none key is found add the description to the database - elif 'none' in entry: - try: - entry = validator.db.data_add(accession=accession, validator=validator) - except Exception as e: - logger.warning(str(e)) - error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - variant.warnings += ': ' + error - logger.warning(error) - return True - variant.description = entry['description'] - - # If no correct keys are found - else: - # Open a hgvs exception log file in append mode - error = 'Unknown error type' - variant.warnings += ': ' + error + ': A Database error occurred, please contact admin' - logger.warning(error) - return True - - # Ensembl databases - else: - # accession number - hgvs_object = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) - accession = hgvs_object.ac - # Look for the accession in our database - # Connect to database and send request - entry = validator.db.in_entries(accession, 'transcript_info') - - # Analyse the returned data and take the necessary actions - # If the error key exists - if 'error' in entry: - # Open a hgvs exception log file in append mode - error = entry['description'] - variant.warnings += ': ' + str( - error) + ': A Database error occurred, please contact admin' - logger.warning(str(error)) - return True - - # If the accession key is found - elif 'accession' in entry: - # If the current entry is too old - if entry['expiry'] == 'true': - entry = validator.db.data_add(accession=accession, validator=validator) - variant.description = entry['description'] - else: - variant.description = entry['description'] - # If the none key is found add the description to the database - elif 'none' in entry: - try: - entry = validator.db.data_add(accession=accession, validator=validator) - except Exception as e: - logger.warning(str(e)) - error = 'Unable to assign transcript identity records to ' + accession + ', potentially an obsolete record :' - variant.warnings += ': ' + error - logger.warning(error) - return True - variant.description = entry['description'] - - # If no correct keys are found - else: - # Open a hgvs exception log file in append mode - error = 'Unknown error type' - variant.warnings += ': ' + error + ': A Database error occurred, please contact admin' - logger.warning(error) - return True - return False - - diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 798ffff1..bfd21aa9 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -14,7 +14,6 @@ from .variant import Variant from . import format_converters from . import use_checking -from . import collect_info from . import mappers from . import valoutput @@ -377,7 +376,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # COLLECT gene symbol, name and ACCESSION INFORMATION # Gene symbol if my_variant.reftype != ':g.': - toskip = collect_info.get_transcript_info(my_variant, self) + toskip = self.get_transcript_info(my_variant) if toskip: continue @@ -717,3 +716,128 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr exc_type, exc_value, last_traceback = sys.exc_info() logger.critical(str(exc_type) + " " + str(exc_value)) raise + + def get_transcript_info(self, variant): + """Collect transcript information from a non-genomic variant""" + + hgvs_vt = self.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) + try: + self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) + except hgvs.exceptions.HGVSError as e: + error = 'Please inform UTA admin of the following error: ' + str(e) + reason = "VariantValidator cannot recover information for transcript " + str( + hgvs_vt.ac) + ' because it is not available in the Universal Transcript Archive' + variant.warnings += ': ' + str(reason) + logger.warning(str(reason) + ": " + str(error)) + return True + + # Get accurate transcript descriptions from the relevant databases + # RefSeq databases + if self.alt_aln_method != 'genebuild': + # Gene description - requires GenBank search to get all the required info, i.e. transcript variant ID + # accession number + hgvs_object = self.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) + accession = hgvs_object.ac + # Look for the accession in our database + # Connect to database and send request + entry = self.db.in_entries(accession, 'transcript_info') + + # Analyse the returned data and take the necessary actions + # If the error key exists + if 'error' in entry: + # Open a hgvs exception log file in append mode + error = entry['description'] + variant.warnings += ': ' + str( + error) + ': A Database error occurred, please contact admin' + logger.warning(str(error) + ": A Database error occurred, please contact admin") + return True + + # If the accession key is found + elif 'accession' in entry: + # If the current entry is too old + if entry['expiry'] == 'true': + try: + entry = self.db.data_add(accession=accession, validator=self) + except hgvs.exceptions.HGVSError: + error = 'Transcript %s is not currently supported' % accession + variant.warnings += ': ' + error + logger.warning(error) + return True + except Exception: + error = 'Unable to assign transcript identity records to ' + accession + \ + ', potentially an obsolete record :' + variant.warnings += ': ' + error + logger.warning(error) + return True + variant.description = entry['description'] + else: + variant.description = entry['description'] + # If the none key is found add the description to the database + elif 'none' in entry: + try: + entry = self.db.data_add(accession=accession, validator=self) + except Exception as e: + logger.warning(str(e)) + error = 'Unable to assign transcript identity records to ' + accession + \ + ', potentially an obsolete record :' + variant.warnings += ': ' + error + logger.warning(error) + return True + variant.description = entry['description'] + + # If no correct keys are found + else: + # Open a hgvs exception log file in append mode + error = 'Unknown error type' + variant.warnings += ': ' + error + ': A Database error occurred, please contact admin' + logger.warning(error) + return True + + # Ensembl databases + else: + # accession number + hgvs_object = self.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) + accession = hgvs_object.ac + # Look for the accession in our database + # Connect to database and send request + entry = self.db.in_entries(accession, 'transcript_info') + + # Analyse the returned data and take the necessary actions + # If the error key exists + if 'error' in entry: + # Open a hgvs exception log file in append mode + error = entry['description'] + variant.warnings += ': ' + str( + error) + ': A Database error occurred, please contact admin' + logger.warning(str(error)) + return True + + # If the accession key is found + elif 'accession' in entry: + # If the current entry is too old + if entry['expiry'] == 'true': + entry = self.db.data_add(accession=accession, validator=self) + variant.description = entry['description'] + else: + variant.description = entry['description'] + # If the none key is found add the description to the database + elif 'none' in entry: + try: + entry = self.db.data_add(accession=accession, validator=self) + except Exception as e: + logger.warning(str(e)) + error = 'Unable to assign transcript identity records to ' + accession + \ + ', potentially an obsolete record :' + variant.warnings += ': ' + error + logger.warning(error) + return True + variant.description = entry['description'] + + # If no correct keys are found + else: + # Open a hgvs exception log file in append mode + error = 'Unknown error type' + variant.warnings += ': ' + error + ': A Database error occurred, please contact admin' + logger.warning(error) + return True + return False From 584074cb8f71b793d4b9a35c1603642be50ea4ba Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 16 May 2019 13:30:25 +0100 Subject: [PATCH 090/223] Cleaned up gap_mapper --- VariantValidator/modules/gapped_mapping.py | 565 ++++++++++----------- 1 file changed, 255 insertions(+), 310 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index cf61f3fa..30bf8f1d 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -1,7 +1,7 @@ import copy import re -import hgvs +import hgvs.exceptions from . import vvFunctions as fn from . import vvHGVS @@ -184,10 +184,10 @@ def gapped_g_to_c(self, rel_var): # Attempt to find gaps in reference sequence by catching disparity in genome length and # overlapping transcript lengths self.disparity_deletion_in = ['false', 'false'] + hgvs_not_delins = '' if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, self.hgvs_genomic_5pr, - saved_hgvs_coding) + hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, saved_hgvs_coding) try: self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) @@ -395,7 +395,8 @@ def gapped_g_to_c(self, rel_var): def g_to_t_compensation(self, ori, hgvs_coding, rec_var): self.orientation = int(ori[0]['alt_strand']) self.hgvs_genomic_possibilities = [] - hgvs_genomic = self.validator.myevm_t_to_g(hgvs_coding, self.variant.no_norm_evm, self.variant.primary_assembly, self.variant.hn) + hgvs_genomic = self.validator.myevm_t_to_g(hgvs_coding, self.variant.no_norm_evm, self.variant.primary_assembly, + self.variant.hn) logger.warning('g_to_t gap code 1 active') rn_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) @@ -417,6 +418,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # MAKE A NO NORM HGVS2VCF # First to the right hgvs_stash = copy.deepcopy(hgvs_coding) + stash_tx_right = '' + stash_tx_left = '' try: hgvs_stash = self.variant.no_norm_evm.c_to_n(hgvs_stash) except: @@ -440,7 +443,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) - stash_genomic = self.validator.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, self.variant.no_norm_evm, self.variant.hn) + stash_genomic = self.validator.myvm_t_to_g(test_stash_tx_right, hgvs_genomic.ac, self.variant.no_norm_evm, + self.variant.hn) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) @@ -504,8 +508,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): fn.exceptPass() try: stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.reverse_normalizer, - self.validator.sf) + stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] stash_alt = stash_dict['alt'] @@ -522,16 +526,17 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # Store a tx copy for later use test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) - stash_genomic = self.validator.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, self.variant.no_norm_evm, self.variant.hn) + stash_genomic = self.validator.myvm_t_to_g(test_stash_tx_left, hgvs_genomic.ac, self.variant.no_norm_evm, + self.variant.hn) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): - stash_tx_left = test_stash_tx_left + # len(stash_genomic.posedit.edit.ref): + if len(test_stash_tx_left.posedit.edit.ref) == ((stash_genomic.posedit.pos.end.base - + stash_genomic.posedit.pos.start.base) + 1): if hasattr(test_stash_tx_left.posedit.edit, 'alt') and test_stash_tx_left.posedit.edit.alt is not None: alt = test_stash_tx_left.posedit.edit.alt @@ -612,7 +617,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps hgvs_genomic_variant = copy.deepcopy(possibility) - + reverse_normalized_hgvs_genomic = '' # Reverse normalize hgvs_genomic_variant: NOTE will replace ref try: reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic_variant) @@ -713,9 +718,9 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): intronic_variant = 'true' if intronic_variant != 'hard_fail': - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+-', str( hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -727,9 +732,9 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): else: intronic_variant = 'true' - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', str( + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+-', str( hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str(hgvs_seek_var.posedit.pos)): + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) @@ -745,10 +750,10 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # Flag RefSeqGene for ammendment # amend_RefSeqGene = 'false' # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + hgvs_not_delins = '' if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, self.hgvs_genomic_5pr, - saved_hgvs_coding) + hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, saved_hgvs_coding) try: self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, @@ -765,20 +770,25 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif '+' in str(rn_tx_hgvs_not_delins.posedit.pos.end): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) elif '+' in str(rn_tx_hgvs_not_delins.posedit.pos.start): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding) # Check for -ve base and adjust - elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end) and '-' in str(rn_tx_hgvs_not_delins.posedit.pos.start): + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end) and '-' in str( + rn_tx_hgvs_not_delins.posedit.pos.start): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding) elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.start): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) # Logic hgvs_not_delins = self.logic_check(hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding) @@ -815,7 +825,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): ftx = self.validator.vm.g_to_t(fg, self.tx_hgvs_not_delins.ac) if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = self.validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, self.validator.alt_aln_method) + exons = self.validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, + self.validator.alt_aln_method) exonic = False for ex_test in exons: if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ @@ -837,9 +848,11 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): self.tx_hgvs_not_delins.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + self.tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + \ + self.tx_hgvs_not_delins.type + '.' + str( self.tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref\ + + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref self.tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( tx_hgvs_not_delins_delins_from_dup) @@ -965,9 +978,6 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): hgvs_genomic.posedit.pos.start.base = end hgvs_genomic.posedit.pos.end.base = start hgvs_genomic = self.variant.hn.normalize(hgvs_genomic) - genomic = fn.valstr(hgvs_genomic) - - print('in gapped_mapping', hgvs_coding) return hgvs_genomic, suppress_c_normalization, hgvs_coding @@ -993,7 +1003,8 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): end = str(int(pos) + len(ref) - 1) pos = str(pos) stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( - self.hgvs_genomic_5pr.ac) + ':' + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) + self.hgvs_genomic_5pr.ac) + ':' + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + + 'ins' + alt) self.orientation = int(ori[0]['alt_strand']) saved_hgvs_coding = copy.deepcopy(hgvs_coding) @@ -1004,21 +1015,21 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): genomic_end = int(exon['alt_end_i']) # Take from stored copy # hgvs_genomic_5pr = copy.deepcopy(stored_hgvs_genomic_5pr) - if ( - self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + if (genomic_start < self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + genomic_start < self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): is_it_in_an_exon = 'yes' if is_it_in_an_exon == 'yes': # map form reverse normalized g. to c. - hgvs_from_5n_g = self.variant.no_norm_evm.g_to_t(self.hgvs_genomic_5pr, saved_hgvs_coding.ac) + # hgvs_from_5n_g = self.variant.no_norm_evm.g_to_t(self.hgvs_genomic_5pr, saved_hgvs_coding.ac) - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + # Attempt to find gaps in reference sequence by catching disparity in genome length and + # overlapping transcript lengths self.disparity_deletion_in = ['false', 'false'] + hgvs_not_delins = '' + hard_fail = 'false' if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, self.hgvs_genomic_5pr, saved_hgvs_coding) - - hard_fail = 'false' + hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, saved_hgvs_coding) try: self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) except Exception as e: @@ -1029,27 +1040,30 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): # Create normalized version of self.tx_hgvs_not_delins rn_tx_hgvs_not_delins = copy.deepcopy(self.tx_hgvs_not_delins) # Check for +ve base and adjust - if re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): + if '+' in str(rn_tx_hgvs_not_delins.posedit.pos.end) and '+' in \ + str(rn_tx_hgvs_not_delins.posedit.pos.start): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) + elif '+' in str(rn_tx_hgvs_not_delins.posedit.pos.end): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + elif '+' in str(rn_tx_hgvs_not_delins.posedit.pos.start): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding) # Check for -ve base and adjust - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end) and '-' in str( + rn_tx_hgvs_not_delins.posedit.pos.start): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, - with_base_subtract=True) + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.start): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) # Logic hgvs_not_delins = self.logic_check(hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding) @@ -1065,16 +1079,12 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): self.variant.hn.normalize(self.tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): + if 'Normalization of intronic variants is not supported' in error or \ + 'Unsupported normalization of variants spanning the exon-intron boundary' in error: + if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: logger.warning(error) return True - elif re.match('Normalization of intronic variants is not supported', error): + elif 'Normalization of intronic variants is not supported' in error: # We know that this cannot be because of an intronic variant, so must be aligned to tx gap self.disparity_deletion_in = ['transcript', 'Requires Analysis'] @@ -1091,32 +1101,26 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): self.tx_hgvs_not_delins.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + self.tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + \ + self.tx_hgvs_not_delins.type + '.' + str( self.tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref + \ + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref self.tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED if self.disparity_deletion_in[0] == 'transcript': - gap_position = '' - gapped_alignment_warning = str( - self.hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly - # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, stored_hgvs_not_delins, hgvs_genomic, 3) + hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, + stored_hgvs_not_delins, hgvs_genomic, 3) # GAP IN THE CHROMOSOME elif self.disparity_deletion_in[0] == 'chromosome': - # Set warning variables - gap_position = '' - gapped_alignment_warning = str( - self.hgvs_genomic_5pr) + ' does not represent a true variant because it is an artefact of aligning the transcripts listed below with genome build ' + self.variant.primary_assembly hgvs_refreshed_variant = self.tx_hgvs_not_delins # Warn self.auto_info = self.auto_info + str(hgvs_refreshed_variant.ac) + ':c.' + str( - hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(self.disparity_deletion_in[ - 1]) + ' transcript base(s) that fail to align to chromosome ' + str( - hgvs_genomic.ac) + '\n' + hgvs_refreshed_variant.posedit.pos) + ' contains ' + str(self.disparity_deletion_in[1]) + \ + ' transcript base(s) that fail to align to chromosome ' + str(hgvs_genomic.ac) + '\n' self.gapped_transcripts = self.gapped_transcripts + str(hgvs_refreshed_variant.ac) + ' ' else: # Keep the same by re-setting rel_var @@ -1126,12 +1130,10 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): hgvs_refreshed_variant = self.edit_output(hgvs_refreshed_variant, saved_hgvs_coding) # Sort out equality to equality c. events where the code will add 2 additional bases - if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': # and len(hgvs_refreshed_variant.posedit.edit.ref) == (len(hgvs_coding.posedit.edit.ref) + 2): + if hgvs_coding.posedit.edit.type == 'identity' and hgvs_refreshed_variant.posedit.edit.type == 'identity': pass else: hgvs_coding = copy.deepcopy(hgvs_refreshed_variant) - coding = fn.valstr(hgvs_coding) - formatted_variant = coding return hgvs_coding @@ -1147,29 +1149,30 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a try: chromosome_normalized_hgvs_coding = self.variant.reverse_normalizer.normalize( hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except hgvs.exceptions.HGVSUnsupportedOperationError: chromosome_normalized_hgvs_coding = hgvs_coding else: try: chromosome_normalized_hgvs_coding = self.variant.hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - error = str(e) + except hgvs.exceptions.HGVSUnsupportedOperationError: chromosome_normalized_hgvs_coding = hgvs_coding - most_3pr_hgvs_genomic = self.validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, - alt_chr, - self.variant.no_norm_evm, self.variant.hn) + most_3pr_hgvs_genomic = self.validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, alt_chr, + self.variant.no_norm_evm, self.variant.hn) self.hgvs_genomic_possibilities.append(most_3pr_hgvs_genomic) # First to the right hgvs_stash = copy.deepcopy(hgvs_coding) + stash_tx_right = '' + stash_tx_left = '' try: hgvs_stash = self.variant.no_norm_evm.c_to_n(hgvs_stash) except: fn.exceptPass() try: stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, self.validator.sf) + stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, + self.validator.sf) stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] stash_alt = stash_dict['alt'] @@ -1186,15 +1189,15 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a # Store a tx copy for later use test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) stash_genomic = self.validator.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, - self.variant.no_norm_evm, self.variant.hn) + self.variant.no_norm_evm, self.variant.hn) # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC (? incorrect assumed insertion position) - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position # if test_stash_tx_right.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': # pass - if len(test_stash_tx_right.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): + if len(test_stash_tx_right.posedit.edit.ref) == ((stash_genomic.posedit.pos.end.base - + stash_genomic.posedit.pos.start.base) + 1): stash_tx_right = test_stash_tx_right if hasattr(test_stash_tx_right.posedit.edit, 'alt') and test_stash_tx_right.posedit.edit.alt is not None: @@ -1222,7 +1225,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a self.variant.hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('spanning the exon-intron boundary', error): + if 'spanning the exon-intron boundary' in error: stash_tx_right = test_stash_tx_right self.hgvs_genomic_possibilities.append('') else: @@ -1236,7 +1239,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a else: stash_tx_right = test_stash_tx_right self.hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: fn.exceptPass() except ValueError: fn.exceptPass() @@ -1271,11 +1274,11 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a # Stash the outputs if required # test variants = NC_000006.11:g.90403795G= (causes double identity) # NC_000002.11:g.73675227_73675228insCTC - # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position) + # NC_000003.11:g.14561629_14561630GC= NC_000003.11:g.14561629_14561630insG (Odd gap position # if test_stash_tx_left.posedit.edit.type == 'identity' and stash_genomic.posedit.edit.type == 'identity': # pass - if len(test_stash_tx_left.posedit.edit.ref) == (( - stash_genomic.posedit.pos.end.base - stash_genomic.posedit.pos.start.base) + 1): # len(stash_genomic.posedit.edit.ref): + if len(test_stash_tx_left.posedit.edit.ref) == ((stash_genomic.posedit.pos.end.base - + stash_genomic.posedit.pos.start.base) + 1): stash_tx_left = test_stash_tx_left if hasattr(test_stash_tx_left.posedit.edit, 'alt') and test_stash_tx_left.posedit.edit.alt is not None: @@ -1303,7 +1306,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a self.variant.hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('spanning the exon-intron boundary', error): + if 'spanning the exon-intron boundary' in error: stash_tx_left = test_stash_tx_left self.hgvs_genomic_possibilities.append('') else: @@ -1326,8 +1329,6 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a self.rev_norm_ins(hgvs_coding, hgvs_genomic) # Set variables for problem specific warnings - gapped_alignment_warning = '' - corrective_action_taken = '' self.gapped_transcripts = '' self.auto_info = '' @@ -1343,7 +1344,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a # Use VCF generation code to push hgvs_genomic as for 5 prime as possible to uncover gaps hgvs_genomic_variant = possibility - stored_hgvs_genomic_variant = copy.deepcopy(hgvs_genomic_variant) + reverse_normalized_hgvs_genomic = '' # Reverse normalize hgvs_genomic_variant: NOTE will replace ref try: @@ -1352,7 +1353,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a except hgvs.exceptions.HGVSError as e: # Strange error caused by gap in genomic error = str(e) - if re.search('base start position must be <= end position', error): + if 'base start position must be <= end position' in error: if hgvs_genomic.posedit.edit.type == 'delins': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base @@ -1375,7 +1376,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a hgvs_genomic.posedit.pos.end.base = start reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize( hgvs_genomic) - if re.search('insertion length must be 1', error): + if 'insertion length must be 1' in error: if hgvs_genomic.posedit.edit.type == 'ins': start = hgvs_genomic.posedit.pos.start.base end = hgvs_genomic.posedit.pos.end.base @@ -1389,7 +1390,6 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a self.hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # Store a copy for later use - stored_hgvs_genomic_5pr = copy.deepcopy(self.hgvs_genomic_5pr) # Make VCF vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, @@ -1400,7 +1400,6 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a alt = vcf_dict['alt'] # Look for exonic gaps within transcript or chromosome - no_normalized_c = 'false' # Mark true to not produce an additional normalization of c. # Generate an end position end = str(int(pos) + len(ref) - 1) @@ -1408,16 +1407,15 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a # Store a not real deletion insertion to test for gapping stored_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(str( - self.hgvs_genomic_5pr.ac) + ':' + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + ref + 'ins' + alt) - v = [chr, pos, ref, alt] - + self.hgvs_genomic_5pr.ac) + ':' + self.hgvs_genomic_5pr.type + '.' + pos + '_' + end + 'del' + + ref + 'ins' + alt) # Save a copy of current hgvs_coding + saved_hgvs_coding = '' try: saved_hgvs_coding = self.variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) except Exception as e: - if str( - e) == 'start or end or both are beyond the bounds of transcript record': + if str(e) == 'start or end or both are beyond the bounds of transcript record': saved_hgvs_coding = hgvs_coding continue @@ -1435,44 +1433,35 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a hgvs_seek_var = saved_hgvs_coding try: - intron_test = self.variant.hn.normalize(hgvs_seek_var) + self.variant.hn.normalize(hgvs_seek_var) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): + if 'Normalization of intronic variants is not supported' in error or \ + 'Unsupported normalization of variants spanning the exon-intron boundary' in error: + if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: intronic_variant = 'hard_fail' else: # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) - if ( - self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + if (genomic_start < self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + genomic_start < self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: intronic_variant = 'true' if intronic_variant != 'hard_fail': - if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+\-', - str( - hgvs_seek_var.posedit.pos)) or re.search( - r'\*\d+\+', str( - hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\-', str( - hgvs_seek_var.posedit.pos)): + if re.search(r'\d+\+', str(hgvs_seek_var.posedit.pos)) or re.search(r'\d+-', + str(hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+\+', str( + hgvs_seek_var.posedit.pos)) or re.search(r'\*\d+-', str(hgvs_seek_var.posedit.pos)): # Double check to see whether the variant is actually intronic? for exon in ori: genomic_start = int(exon['alt_start_i']) genomic_end = int(exon['alt_end_i']) - if ( - self.hgvs_genomic_5pr.posedit.pos.start.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( - self.hgvs_genomic_5pr.posedit.pos.end.base > genomic_start and self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): + if (genomic_start < self.hgvs_genomic_5pr.posedit.pos.start.base <= genomic_end) and ( + genomic_start < self.hgvs_genomic_5pr.posedit.pos.end.base <= genomic_end): intronic_variant = 'false' break else: @@ -1481,70 +1470,62 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a if intronic_variant != 'true': # Flag RefSeqGene for ammendment # amend_RefSeqGene = 'false' - # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping transcript lengths + # Attempt to find gaps in reference sequence by catching disparity in genome length and overlapping + # transcript lengths + hgvs_not_delins = '' if stored_hgvs_not_delins != '': # Refresh hgvs_not_delins from stored_hgvs_not_delins - hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, self.hgvs_genomic_5pr, saved_hgvs_coding) + hgvs_not_delins = self.dup_ins_5prime_shift(stored_hgvs_not_delins, saved_hgvs_coding) - self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, - saved_hgvs_coding.ac) + self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) # Create normalized version of tx_hgvs_not_delins rn_tx_hgvs_not_delins = copy.deepcopy(self.tx_hgvs_not_delins) # Check for +1 base and adjust - if re.search(r'\+', - str(rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\+', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): + if '+' in str(rn_tx_hgvs_not_delins.posedit.pos.end) and '+' in str( + rn_tx_hgvs_not_delins.posedit.pos.start): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) + elif '+' in str(rn_tx_hgvs_not_delins.posedit.pos.end): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding, back=False) - elif re.search(r'\+', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + elif '+' in str(rn_tx_hgvs_not_delins.posedit.pos.start): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding) # Check for -ve base and adjust - elif re.search(r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.end)) and re.search( - r'\-', - str( - rn_tx_hgvs_not_delins.posedit.pos.start)): + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end) and '-' in str( + rn_tx_hgvs_not_delins.posedit.pos.start): rn_tx_hgvs_not_delins = self.remove_offsetting_to_span_gap(rn_tx_hgvs_not_delins) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.end)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding) + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.end): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_end_base_to_next_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding) - elif re.search(r'\-', str(rn_tx_hgvs_not_delins.posedit.pos.start)): - rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset(rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) + elif '-' in str(rn_tx_hgvs_not_delins.posedit.pos.start): + rn_tx_hgvs_not_delins, hgvs_not_delins = self.move_tx_start_base_to_previous_nonoffset( + rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=True) # Logic - hgvs_not_delins = self.logic_check(hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding, do_continue=True, offset_check=True) + hgvs_not_delins = self.logic_check(hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding, + do_continue=True, offset_check=True) # Final sanity checks try: self.validator.vm.g_to_t(hgvs_not_delins, self.tx_hgvs_not_delins.ac) except Exception as e: - if str( - e) == 'start or end or both are beyond the bounds of transcript record': + if str(e) == 'start or end or both are beyond the bounds of transcript record': continue try: self.variant.hn.normalize(self.tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) - - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): - if re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): + if 'Normalization of intronic variants is not supported' in error or \ + 'Unsupported normalization of variants spanning the exon-intron boundary' in error: + if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: continue - elif re.match('Normalization of intronic variants is not supported', - error): + elif 'Normalization of intronic variants is not supported' in error: # We know that this cannot be because of an intronic variant, so must be aligned to tx gap self.disparity_deletion_in = ['transcript', 'Requires Analysis'] @@ -1559,18 +1540,16 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a rtx = self.validator.vm.g_to_t(rg, self.tx_hgvs_not_delins.ac) fg = self.variant.hn.normalize(hgvs_not_delins) ftx = self.validator.vm.g_to_t(fg, self.tx_hgvs_not_delins.ac) - if ( - rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( + if (rtx.posedit.pos.start.offset == 0 and rtx.posedit.pos.end.offset == 0) and ( ftx.posedit.pos.start.offset != 0 and ftx.posedit.pos.end.offset != 0): - exons = self.validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, self.validator.alt_aln_method) + exons = self.validator.hdp.get_tx_exons(ftx.ac, hgvs_not_delins.ac, + self.validator.alt_aln_method) exonic = False for ex_test in exons: - if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[ - 7]) and ftx.posedit.pos.end.base in range(ex_test[6], - ex_test[7]): + if ftx.posedit.pos.start.base in range(ex_test[6], ex_test[7]) and \ + ftx.posedit.pos.end.base in range(ex_test[6], ex_test[7]): exonic = True if exonic is True: - hgvs_not_delins = fg hgvs_genomic = fg self.hgvs_genomic_5pr = fg try: @@ -1585,16 +1564,20 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a self.tx_hgvs_not_delins.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + self.tx_hgvs_not_delins.type + '.' + str( + tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + \ + self.tx_hgvs_not_delins.type + '.' + str( self.tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref\ + + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + \ + self.tx_hgvs_not_delins.posedit.edit.ref self.tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( tx_hgvs_not_delins_delins_from_dup) if self.disparity_deletion_in[0] == 'transcript': # amend_RefSeqGene = 'true' # ANY VARIANT WHOLLY WITHIN THE GAP - hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, stored_hgvs_not_delins, hgvs_genomic, 4) + hgvs_refreshed_variant = self.transcript_disparity(reverse_normalized_hgvs_genomic, + stored_hgvs_not_delins, hgvs_genomic, 4) # GAP IN THE CHROMOSOME elif self.disparity_deletion_in[0] == 'chromosome': @@ -1614,25 +1597,18 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a else: # Keep the same by re-setting rel_var hgvs_refreshed_variant = hgvs_coding - # amend_RefSeqGene = 'false' # Edit the output - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', - str( - hgvs_refreshed_variant.type)): + if 'NM_' in str(hgvs_refreshed_variant.ac) and 'c' not in str(hgvs_refreshed_variant.type): hgvs_refreshed_variant = self.variant.no_norm_evm.n_to_c(hgvs_refreshed_variant) - else: - pass try: self.variant.hn.normalize(hgvs_refreshed_variant) except Exception as e: error = str(e) # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): + if 'Normalization of intronic variants is not supported' in error or \ + 'Unsupported normalization of variants spanning the exon-intron boundary' in error: hgvs_refreshed_variant = saved_hgvs_coding else: continue @@ -1651,7 +1627,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a # Update hgvs_genomic hgvs_alt_genomic = self.validator.myvm_t_to_g(hgvs_refreshed_variant, alt_chr, - self.variant.no_norm_evm, self.variant.hn) + self.variant.no_norm_evm, self.variant.hn) if hgvs_alt_genomic.posedit.edit.type == 'identity': re_c = self.validator.vm.g_to_t(hgvs_alt_genomic, hgvs_refreshed_variant.ac) if (self.variant.hn.normalize(re_c)) != (self.variant.hn.normalize(hgvs_refreshed_variant)): @@ -1666,9 +1642,6 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a hgvs_alt_genomic = shuffle_left_g # If it is intronic, these vairables will not have been set - else: - # amend_RefSeqGene = 'false' - no_normalized_c = 'false' # Break if gap has been detected if self.disparity_deletion_in[0] != 'false': @@ -1680,8 +1653,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a except hgvs.exceptions.HGVSError as e: # Strange error caused by gap in genomic error = str(e) - if re.search('base start position must be <= end position', error) and \ - self.disparity_deletion_in[0] == 'chromosome': + if 'base start position must be <= end position' in error and self.disparity_deletion_in[0] == 'chromosome': if hgvs_alt_genomic.posedit.edit.type == 'delins': start = hgvs_alt_genomic.posedit.pos.start.base end = hgvs_alt_genomic.posedit.pos.end.base @@ -1705,72 +1677,60 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a return hgvs_alt_genomic, hgvs_coding - def dup_ins_5prime_shift(self, stored_hgvs_not_delins, hgvs_genomic_5pr, saved_hgvs_coding): + def dup_ins_5prime_shift(self, stored_hgvs_not_delins, saved_hgvs_coding): hgvs_not_delins = copy.deepcopy(stored_hgvs_not_delins) # This test will only occur in dup of single base, insertion or substitution - if not re.search('_', str(hgvs_not_delins.posedit.pos)): - if re.search('dup', self.hgvs_genomic_5pr.posedit.edit.type) or re.search('ins', - self.hgvs_genomic_5pr.posedit.edit.type): - # For gap in chr, map to t. - but becaouse we have pushed to 5 prime by norm, add 1 to end pos + if '_' not in str(hgvs_not_delins.posedit.pos): + if 'dup' in self.hgvs_genomic_5pr.posedit.edit.type or 'ins' in self.hgvs_genomic_5pr.posedit.edit.type: + # For gap in chr, map to t. - but because we have pushed to 5 prime by norm, add 1 to end pos plussed_hgvs_not_delins = copy.deepcopy(hgvs_not_delins) plussed_hgvs_not_delins.posedit.pos.end.base = plussed_hgvs_not_delins.posedit.pos.end.base + 1 plussed_hgvs_not_delins.posedit.edit.ref = '' transcript_variant = self.variant.no_norm_evm.g_to_t(plussed_hgvs_not_delins, - str(saved_hgvs_coding.ac)) - if (( - transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( + str(saved_hgvs_coding.ac)) + if ((transcript_variant.posedit.pos.end.base - transcript_variant.posedit.pos.start.base) > ( self.hgvs_genomic_5pr.posedit.pos.end.base - self.hgvs_genomic_5pr.posedit.pos.start.base)): - if re.search('dup', str(self.hgvs_genomic_5pr.posedit.edit)): + if 'dup' in str(self.hgvs_genomic_5pr.posedit.edit): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ + hgvs_not_delins.posedit.edit.alt = ref_bases[:1] + hgvs_not_delins.posedit.edit.alt[ 1:] + ref_bases[1:] - elif re.search('ins', str(self.hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(self.hgvs_genomic_5pr.posedit.edit)): + elif 'ins' in str(self.hgvs_genomic_5pr.posedit.edit) and \ + 'del' in str(self.hgvs_genomic_5pr.posedit.edit): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(self.hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(self.hgvs_genomic_5pr.posedit.edit)): + elif 'ins' in str(self.hgvs_genomic_5pr.posedit.edit) and \ + 'del' not in str(self.hgvs_genomic_5pr.posedit.edit): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ + hgvs_not_delins.posedit.edit.alt = ref_bases[:1] + hgvs_not_delins.posedit.edit.alt[ 1:] + ref_bases[1:] else: - if re.search('dup', str(self.hgvs_genomic_5pr.posedit.edit)): + if 'dup' in str(self.hgvs_genomic_5pr.posedit.edit): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ + hgvs_not_delins.posedit.edit.alt = ref_bases[:1] + hgvs_not_delins.posedit.edit.alt[ 1:] + ref_bases[1:] - elif re.search('ins', str(self.hgvs_genomic_5pr.posedit.edit)) and re.search( - 'del', str(self.hgvs_genomic_5pr.posedit.edit)): + elif 'ins' in str(self.hgvs_genomic_5pr.posedit.edit) and \ + 'del' in str(self.hgvs_genomic_5pr.posedit.edit): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 - elif re.search('ins', - str(self.hgvs_genomic_5pr.posedit.edit)) and not re.search( - 'del', str(self.hgvs_genomic_5pr.posedit.edit)): + elif 'ins' in str(self.hgvs_genomic_5pr.posedit.edit) and \ + 'del' not in str(self.hgvs_genomic_5pr.posedit.edit): hgvs_not_delins.posedit.pos.end.base = hgvs_not_delins.posedit.pos.start.base + 1 start = hgvs_not_delins.posedit.pos.start.base - 1 end = hgvs_not_delins.posedit.pos.end.base ref_bases = self.validator.sf.fetch_seq(str(hgvs_not_delins.ac), start, end) hgvs_not_delins.posedit.edit.ref = ref_bases - hgvs_not_delins.posedit.edit.alt = ref_bases[ - :1] + hgvs_not_delins.posedit.edit.alt[ + hgvs_not_delins.posedit.edit.alt = ref_bases[:1] + hgvs_not_delins.posedit.edit.alt[ 1:] + ref_bases[1:] - else: - pass - else: - pass return hgvs_not_delins @@ -1800,7 +1760,7 @@ def move_tx_end_base_to_next_nonoffset(self, rn_tx_hgvs_not_delins, saved_hgvs_c else: # move tx end base to next available non-offset base rn_tx_hgvs_not_delins.posedit.pos.end.base = self.tx_hgvs_not_delins.posedit.pos.end.base + 1 - if re.match('NM_', str(rn_tx_hgvs_not_delins)): + if 'NM_' in str(rn_tx_hgvs_not_delins): test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: test_tx_var = rn_tx_hgvs_not_delins @@ -1811,13 +1771,14 @@ def move_tx_end_base_to_next_nonoffset(self, rn_tx_hgvs_not_delins, saved_hgvs_c str(saved_hgvs_coding.ac)) return rn_tx_hgvs_not_delins, hgvs_not_delins - def move_tx_start_base_to_previous_nonoffset(self, rn_tx_hgvs_not_delins, saved_hgvs_coding, with_base_subtract=False): + def move_tx_start_base_to_previous_nonoffset(self, rn_tx_hgvs_not_delins, saved_hgvs_coding, + with_base_subtract=False): # move tx start base to previous available non-offset base rn_tx_hgvs_not_delins.posedit.pos.start.offset = 0 if with_base_subtract: rn_tx_hgvs_not_delins.posedit.pos.start.base = rn_tx_hgvs_not_delins.posedit.pos.start.base - 1 rn_tx_hgvs_not_delins.posedit.edit.ref = '' - if re.match('NM_', str(rn_tx_hgvs_not_delins)): + if 'NM_' in str(rn_tx_hgvs_not_delins): test_tx_var = self.variant.no_norm_evm.n_to_c(rn_tx_hgvs_not_delins) else: test_tx_var = rn_tx_hgvs_not_delins @@ -1890,11 +1851,12 @@ def c1_pos_edit(self, hgvs_genomic): return hgvs_refreshed_variant - def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_delins, hgvs_genomic, running_option): - if (re.search(r'\+', str(self.tx_hgvs_not_delins.posedit.pos.start)) or re.search(r'\-', str( - self.tx_hgvs_not_delins.posedit.pos.start))) and ( - re.search(r'\+', str(self.tx_hgvs_not_delins.posedit.pos.end)) or re.search(r'\-', str( - self.tx_hgvs_not_delins.posedit.pos.end))): + def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_delins, hgvs_genomic, + running_option): + if ('+' in str(self.tx_hgvs_not_delins.posedit.pos.start) or '-' in str( + self.tx_hgvs_not_delins.posedit.pos.start)) and ( + '+' in str(self.tx_hgvs_not_delins.posedit.pos.end) or '-' in str( + self.tx_hgvs_not_delins.posedit.pos.end)): self.gapped_transcripts = self.gapped_transcripts + ' ' + str(self.tx_hgvs_not_delins.ac) # Copy the current variant @@ -1904,20 +1866,22 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ tx_gap_fill_variant.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + '.' + str( + tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + \ + '.' + str( tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + \ + 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( tx_gap_fill_variant_delins_from_dup) # Identify which half of the NOT-intron the start position of the variant is in - if re.search(r'\-', str(tx_gap_fill_variant.posedit.pos.start)): + if '-' in str(tx_gap_fill_variant.posedit.pos.start): tx_gap_fill_variant.posedit.pos.start.base = tx_gap_fill_variant.posedit.pos.start.base - 1 tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') tx_gap_fill_variant.posedit.edit.alt = '' tx_gap_fill_variant.posedit.edit.ref = '' - elif re.search(r'\+', str(tx_gap_fill_variant.posedit.pos.start)): + elif '+' in str(tx_gap_fill_variant.posedit.pos.start): tx_gap_fill_variant.posedit.pos.start.offset = int('0') # int('+1') tx_gap_fill_variant.posedit.pos.end.base = tx_gap_fill_variant.posedit.pos.end.base + 1 tx_gap_fill_variant.posedit.pos.end.offset = int('0') # int('-1') @@ -1928,16 +1892,14 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) except: fn.exceptPass() - genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, - reverse_normalized_hgvs_genomic.ac) + genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, reverse_normalized_hgvs_genomic.ac) genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref try: c_tx_hgvs_not_delins = self.validator.vm.n_to_c(self.tx_hgvs_not_delins) except Exception: c_tx_hgvs_not_delins = copy.copy(self.tx_hgvs_not_delins) - genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, - self.hgvs_genomic_5pr.ac) + genomic_gap_fill_variant_alt = self.validator.vm.t_to_g(c_tx_hgvs_not_delins, self.hgvs_genomic_5pr.ac) # Ensure an ALT exists try: @@ -1945,14 +1907,20 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' + genomic_gap_fill_variant.type + '.' + str( + genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' \ + + genomic_gap_fill_variant.type + '.' + str( genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + \ + genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref \ + + genomic_gap_fill_variant.posedit.edit.ref genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' + genomic_gap_fill_variant_alt.type + '.' + str( + genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' \ + + genomic_gap_fill_variant_alt.type + '.' + str( genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + \ + genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + \ + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( genomic_gap_fill_variant_alt_delins_from_dup) @@ -1961,9 +1929,8 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ append_ref = self.validator.sf.fetch_seq(genomic_gap_fill_variant_alt.ac, genomic_gap_fill_variant_alt.posedit.pos.start.base - 1, genomic_gap_fill_variant_alt.posedit.pos.end.base) - genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[ - 0] + genomic_gap_fill_variant_alt.posedit.edit.alt + \ - append_ref[1] + genomic_gap_fill_variant_alt.posedit.edit.alt = append_ref[0] + \ + genomic_gap_fill_variant_alt.posedit.edit.alt + append_ref[1] # Split the reference and replacing alt sequence into a dictionary reference_bases = list(genomic_gap_fill_variant.posedit.edit.ref) @@ -2011,22 +1978,20 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ # Add the new alt to the gap fill variant and generate transcript variant genomic_gap_fill_variant.posedit.edit.alt = alternate_sequence - hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, - tx_gap_fill_variant.ac) + hgvs_refreshed_variant = self.validator.vm.g_to_t(genomic_gap_fill_variant, tx_gap_fill_variant.ac) # Set warning gap_size = str(len(genomic_gap_fill_variant.posedit.edit.ref) - 2) self.disparity_deletion_in[1] = [gap_size] self.auto_info = self.auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( - stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + ' genomic base(s) that fail to align to transcript ' + str( - self.tx_hgvs_not_delins.ac) - non_valid_caution = 'true' + stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + gap_size + \ + ' genomic base(s) that fail to align to transcript ' + str(self.tx_hgvs_not_delins.ac) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): + if 'NM_' in str(for_location_c): for_location_c = self.variant.no_norm_evm.n_to_c(self.tx_hgvs_not_delins) - if re.match(r'\-', str(for_location_c.posedit.pos.start.offset)): + if '-' in str(for_location_c.posedit.pos.start.offset): gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base else: @@ -2036,7 +2001,8 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ self.auto_info = self.auto_info + '%s' % (gap_position) else: - if self.tx_hgvs_not_delins.posedit.pos.start.offset == 0 and self.tx_hgvs_not_delins.posedit.pos.end.offset == 0: + if self.tx_hgvs_not_delins.posedit.pos.start.offset == 0 and \ + self.tx_hgvs_not_delins.posedit.pos.end.offset == 0: # In this instance, we have identified a transcript gap but the n. version of # the transcript variant but do not have a position which actually hits the gap, # so the variant likely spans the gap, and is not picked up by an offset. @@ -2048,8 +2014,7 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ g3 = self.validator.nr_vm.t_to_g(c1, hgvs_genomic.ac) g2 = self.validator.vm.t_to_g(c1, hgvs_genomic.ac) ng2 = self.variant.hn.normalize(g2) - g3.posedit.pos.end.base = g3.posedit.pos.start.base + ( - len(g3.posedit.edit.ref) - 1) + g3.posedit.pos.end.base = g3.posedit.pos.start.base + (len(g3.posedit.edit.ref) - 1) try: c2 = self.validator.vm.g_to_t(g3, c1.ac) if c2.posedit.pos.start.offset == 0 and c2.posedit.pos.end.offset == 0: @@ -2063,76 +2028,71 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ except hgvs.exceptions.HGVSInvalidVariantError: fn.exceptPass() - if re.search(r'\+', str(self.tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\+', str(self.tx_hgvs_not_delins.posedit.pos.end)): + if '+' in str(self.tx_hgvs_not_delins.posedit.pos.start) and \ + '+' not in str(self.tx_hgvs_not_delins.posedit.pos.end): self.auto_info = self.auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( self.disparity_deletion_in[ 1]) + ' genomic base(s) that fail to align to transcript ' + str( self.tx_hgvs_not_delins.ac) - non_valid_caution = 'true' hgvs_refreshed_variant = self.c2_pos_edit(hgvs_genomic) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): + if 'NM_' in str(for_location_c): for_location_c = self.variant.no_norm_evm.n_to_c(self.tx_hgvs_not_delins) - gps = for_location_c.posedit.pos.start.base - gpe = for_location_c.posedit.pos.start.base + 1 + gps = for_location_c.posedit.pos.start.base + gpe = for_location_c.posedit.pos.start.base + 1 gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update - self.auto_info = self.auto_info + '%s' % (gap_position) - elif re.search(r'\+', str(self.tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\+', str(self.tx_hgvs_not_delins.posedit.pos.start)): + self.auto_info = self.auto_info + '%s' % gap_position + elif '+' in str(self.tx_hgvs_not_delins.posedit.pos.end) and \ + '+' not in str(self.tx_hgvs_not_delins.posedit.pos.start): self.auto_info = self.auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( self.disparity_deletion_in[1]) + '-bp gap in transcript ' + str( self.tx_hgvs_not_delins.ac) self.gapped_transcripts = self.gapped_transcripts + ' ' + str(self.tx_hgvs_not_delins.ac) - non_valid_caution = 'true' hgvs_refreshed_variant = self.c1_pos_edit(hgvs_genomic) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): + if 'NM_' in str(for_location_c): for_location_c = self.variant.no_norm_evm.n_to_c(self.tx_hgvs_not_delins) gps = for_location_c.posedit.pos.end.base gpe = for_location_c.posedit.pos.end.base + 1 gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update - self.auto_info = self.auto_info + '%s' % (gap_position) - elif re.search(r'\-', - str(self.tx_hgvs_not_delins.posedit.pos.start)) and not re.search( - r'\-', str(self.tx_hgvs_not_delins.posedit.pos.end)): + self.auto_info = self.auto_info + '%s' % gap_position + elif '-' in str(self.tx_hgvs_not_delins.posedit.pos.start) and \ + '-' not in str(self.tx_hgvs_not_delins.posedit.pos.end): self.auto_info = self.auto_info + str(stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.start.base) + ' is one of ' + str( self.disparity_deletion_in[ 1]) + ' genomic base(s) that fail to align to transcript ' + str( self.tx_hgvs_not_delins.ac) - non_valid_caution = 'true' hgvs_refreshed_variant = self.c2_pos_edit(hgvs_genomic) # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): + if 'NM_' in str(for_location_c): for_location_c = self.variant.no_norm_evm.n_to_c(self.tx_hgvs_not_delins) gps = for_location_c.posedit.pos.start.base - 1 gpe = for_location_c.posedit.pos.start.base gap_position = ' between positions c.' + str(gps) + '_' + str(gpe) + '\n' # Warn update - self.auto_info = self.auto_info + '%s' % (gap_position) - elif re.search(r'\-', str(self.tx_hgvs_not_delins.posedit.pos.end)) and not re.search( - r'\-', str(self.tx_hgvs_not_delins.posedit.pos.start)): + self.auto_info = self.auto_info + '%s' % gap_position + elif '-' in str(self.tx_hgvs_not_delins.posedit.pos.end) and \ + '-' not in str(self.tx_hgvs_not_delins.posedit.pos.start): self.auto_info = self.auto_info + 'Genome position ' + str( stored_hgvs_not_delins.ac) + ':g.' + str( stored_hgvs_not_delins.posedit.pos.end.base + 1) + ' aligns within a ' + str( self.disparity_deletion_in[1]) + '-bp gap in transcript ' + str( self.tx_hgvs_not_delins.ac) self.gapped_transcripts = self.gapped_transcripts + ' ' + str(self.tx_hgvs_not_delins.ac) - non_valid_caution = 'true' - ## Have variation in first copy here! + # Have variation in first copy here! if running_option == 1: try: c1 = self.validator.vm.n_to_c(self.tx_hgvs_not_delins) @@ -2158,7 +2118,7 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ # Alignment position for_location_c = copy.deepcopy(hgvs_refreshed_variant) - if re.match('NM_', str(for_location_c)): + if 'NM_' in str(for_location_c): for_location_c = self.variant.no_norm_evm.n_to_c(self.tx_hgvs_not_delins) gps = for_location_c.posedit.pos.end.base - 1 gpe = for_location_c.posedit.pos.end.base @@ -2184,11 +2144,9 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ return hgvs_refreshed_variant def edit_output(self, hgvs_refreshed_variant, saved_hgvs_coding): - if re.match('NM_', str(hgvs_refreshed_variant.ac)) and not re.search('c', str( - hgvs_refreshed_variant.type)): + if 'NM_' in str(hgvs_refreshed_variant.ac) and 'c' not in str(hgvs_refreshed_variant.type): hgvs_refreshed_variant = self.variant.evm.n_to_c(hgvs_refreshed_variant) - else: - pass + try: hgvs_refreshed_variant = self.variant.hn.normalize(hgvs_refreshed_variant) if hgvs_refreshed_variant.posedit.edit.type == 'delins' and \ @@ -2212,26 +2170,19 @@ def edit_output(self, hgvs_refreshed_variant, saved_hgvs_coding): except Exception as e: error = str(e) # Ensure the final variant is not intronic nor does it cross exon boundaries - if re.match('Normalization of intronic variants is not supported', - error) or re.match( - 'Unsupported normalization of variants spanning the exon-intron boundary', - error): + if 'Normalization of intronic variants is not supported' in error or \ + 'Unsupported normalization of variants spanning the exon-intron boundary' in error: hgvs_refreshed_variant = saved_hgvs_coding - else: - pass + return hgvs_refreshed_variant def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding, do_continue=False, offset_check=False): # Logic - if len(hgvs_not_delins.posedit.edit.ref) < len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len( - hgvs_not_delins.posedit.edit.ref) + if len(hgvs_not_delins.posedit.edit.ref) < len(rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(rn_tx_hgvs_not_delins.posedit.edit.ref) - len(hgvs_not_delins.posedit.edit.ref) self.disparity_deletion_in = ['chromosome', gap_length] - elif len(hgvs_not_delins.posedit.edit.ref) > len( - rn_tx_hgvs_not_delins.posedit.edit.ref): - gap_length = len(hgvs_not_delins.posedit.edit.ref) - len( - rn_tx_hgvs_not_delins.posedit.edit.ref) + elif len(hgvs_not_delins.posedit.edit.ref) > len(rn_tx_hgvs_not_delins.posedit.edit.ref): + gap_length = len(hgvs_not_delins.posedit.edit.ref) - len(rn_tx_hgvs_not_delins.posedit.edit.ref) self.disparity_deletion_in = ['transcript', gap_length] else: re_capture_tx_variant = [] @@ -2248,7 +2199,8 @@ def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding, do_co continue fn.exceptPass() if offset_check: - if hgvs_t_possibility.posedit.pos.start.offset != 0 or hgvs_t_possibility.posedit.pos.end.offset != 0: + if hgvs_t_possibility.posedit.pos.start.offset != 0 or \ + hgvs_t_possibility.posedit.pos.end.offset != 0: continue ins_ref = self.validator.sf.fetch_seq(hgvs_t_possibility.ac, hgvs_t_possibility.posedit.pos.start.base - 1, @@ -2261,34 +2213,29 @@ def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding, do_co fn.exceptPass() hgvs_t_possibility.posedit.edit.ref = ins_ref hgvs_t_possibility.posedit.edit.alt = ins_ref[ - 0] + hgvs_t_possibility.posedit.edit.alt + \ - ins_ref[1] + 0] + hgvs_t_possibility.posedit.edit.alt + ins_ref[1] if internal_possibility.posedit.edit.type == 'ins': ins_ref = self.validator.sf.fetch_seq(internal_possibility.ac, internal_possibility.posedit.pos.start.base - 1, internal_possibility.posedit.pos.end.base) internal_possibility.posedit.edit.ref = ins_ref internal_possibility.posedit.edit.alt = ins_ref[ - 0] + internal_possibility.posedit.edit.alt + \ - ins_ref[1] + 0] + internal_possibility.posedit.edit.alt + ins_ref[1] - if len(hgvs_t_possibility.posedit.edit.ref) < len( - internal_possibility.posedit.edit.ref): - gap_length = len(internal_possibility.posedit.edit.ref) - len( - hgvs_t_possibility.posedit.edit.ref) + if len(hgvs_t_possibility.posedit.edit.ref) < len(internal_possibility.posedit.edit.ref): + gap_length = len(internal_possibility.posedit.edit.ref) - len(hgvs_t_possibility.posedit.edit.ref) re_capture_tx_variant = ['transcript', gap_length, hgvs_t_possibility] hgvs_not_delins = internal_possibility self.hgvs_genomic_5pr = internal_possibility break - if re_capture_tx_variant != []: + if re_capture_tx_variant: try: self.tx_hgvs_not_delins = self.validator.vm.c_to_n(re_capture_tx_variant[2]) except: self.tx_hgvs_not_delins = re_capture_tx_variant[2] self.disparity_deletion_in = re_capture_tx_variant[0:-1] - else: - pass + return hgvs_not_delins def get_hgvs_seek_var(self, hgvs_genomic, hgvs_coding, ori=None, with_query_genomic=False): @@ -2309,7 +2256,7 @@ def get_hgvs_seek_var(self, hgvs_genomic, hgvs_coding, ori=None, with_query_geno # Map to the transcript and test for movement try: hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: hgvs_seek_var = hgvs_coding if with_query_genomic: @@ -2337,11 +2284,9 @@ def rev_norm_ins(self, hgvs_coding, hgvs_genomic): most_3pr_hgvs_transcript_variant.posedit.edit.ref = pr3_ref most_5pr_hgvs_transcript_variant.posedit.edit.ref = pr5_ref most_3pr_hgvs_transcript_variant.posedit.edit.alt = pr3_ref[0] + \ - most_3pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr3_ref[1] + most_3pr_hgvs_transcript_variant.posedit.edit.alt + pr3_ref[1] most_5pr_hgvs_transcript_variant.posedit.edit.alt = pr5_ref[0] + \ - most_5pr_hgvs_transcript_variant.posedit.edit.alt + \ - pr5_ref[1] + most_5pr_hgvs_transcript_variant.posedit.edit.alt + pr5_ref[1] # Map to the genome genomic_from_most_3pr_hgvs_transcript_variant = self.validator.vm.t_to_g( most_3pr_hgvs_transcript_variant, hgvs_genomic.ac) From 3d721ee629c0e561b5f642c93f84e8f9a9578524 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 16 May 2019 14:10:22 +0100 Subject: [PATCH 091/223] Added function to turn hgvs dup into a string indel --- VariantValidator/modules/gapped_mapping.py | 63 +++++-------------- VariantValidator/modules/vvFunctions.py | 15 ++++- VariantValidator/modules/vvMixinConverters.py | 32 +++------- 3 files changed, 37 insertions(+), 73 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 30bf8f1d..616e3b4b 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -285,9 +285,7 @@ def gapped_g_to_c(self, rel_var): self.tx_hgvs_not_delins.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + self.tx_hgvs_not_delins.type + '.' + str( - self.tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins_delins_from_dup = fn.hgvs_dup2indel(self.tx_hgvs_not_delins) self.tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED @@ -848,11 +846,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): self.tx_hgvs_not_delins.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + \ - self.tx_hgvs_not_delins.type + '.' + str( - self.tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref\ - + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins_delins_from_dup = fn.hgvs_dup2indel(self.tx_hgvs_not_delins) self.tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( tx_hgvs_not_delins_delins_from_dup) @@ -1101,11 +1095,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): self.tx_hgvs_not_delins.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + \ - self.tx_hgvs_not_delins.type + '.' + str( - self.tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref + \ - 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + self.tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins_delins_from_dup = fn.hgvs_dup2indel(self.tx_hgvs_not_delins) self.tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant(tx_hgvs_not_delins_delins_from_dup) # GAP IN THE TRANSCRIPT DISPARITY DETECTED @@ -1564,12 +1554,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a self.tx_hgvs_not_delins.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_hgvs_not_delins_delins_from_dup = self.tx_hgvs_not_delins.ac + ':' + \ - self.tx_hgvs_not_delins.type + '.' + str( - self.tx_hgvs_not_delins.posedit.pos.start) + '_' + str( - self.tx_hgvs_not_delins.posedit.pos.end) + 'del' + self.tx_hgvs_not_delins.posedit.edit.ref\ - + 'ins' + self.tx_hgvs_not_delins.posedit.edit.ref + \ - self.tx_hgvs_not_delins.posedit.edit.ref + tx_hgvs_not_delins_delins_from_dup = fn.hgvs_dup2indel(self.tx_hgvs_not_delins) self.tx_hgvs_not_delins = self.validator.hp.parse_hgvs_variant( tx_hgvs_not_delins_delins_from_dup) @@ -1866,11 +1851,7 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ tx_gap_fill_variant.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - tx_gap_fill_variant_delins_from_dup = tx_gap_fill_variant.ac + ':' + tx_gap_fill_variant.type + \ - '.' + str( - tx_gap_fill_variant.posedit.pos.start) + '_' + str( - tx_gap_fill_variant.posedit.pos.end) + 'del' + tx_gap_fill_variant.posedit.edit.ref + \ - 'ins' + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant.posedit.edit.ref + tx_gap_fill_variant_delins_from_dup = fn.hgvs_dup2indel(tx_gap_fill_variant) tx_gap_fill_variant = self.validator.hp.parse_hgvs_variant( tx_gap_fill_variant_delins_from_dup) @@ -1907,20 +1888,10 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ genomic_gap_fill_variant_alt.posedit.edit.alt = 'X' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - genomic_gap_fill_variant_delins_from_dup = genomic_gap_fill_variant.ac + ':' \ - + genomic_gap_fill_variant.type + '.' + str( - genomic_gap_fill_variant.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant.posedit.pos.end.base) + 'del' + \ - genomic_gap_fill_variant.posedit.edit.ref + 'ins' + genomic_gap_fill_variant.posedit.edit.ref \ - + genomic_gap_fill_variant.posedit.edit.ref + genomic_gap_fill_variant_delins_from_dup = fn.hgvs_dup2indel(genomic_gap_fill_variant) genomic_gap_fill_variant = self.validator.hp.parse_hgvs_variant( genomic_gap_fill_variant_delins_from_dup) - genomic_gap_fill_variant_alt_delins_from_dup = genomic_gap_fill_variant_alt.ac + ':' \ - + genomic_gap_fill_variant_alt.type + '.' + str( - genomic_gap_fill_variant_alt.posedit.pos.start.base) + '_' + str( - genomic_gap_fill_variant_alt.posedit.pos.end.base) + 'del' + \ - genomic_gap_fill_variant_alt.posedit.edit.ref + 'ins' + \ - genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt.posedit.edit.ref + genomic_gap_fill_variant_alt_delins_from_dup = fn.hgvs_dup2indel(genomic_gap_fill_variant_alt) genomic_gap_fill_variant_alt = self.validator.hp.parse_hgvs_variant( genomic_gap_fill_variant_alt_delins_from_dup) @@ -2324,9 +2295,8 @@ def rev_norm_ins(self, hgvs_coding, hgvs_genomic): genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_3pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_3pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup = fn.hgvs_dup2indel( + genomic_from_most_3pr_hgvs_transcript_variant) genomic_from_most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( genomic_from_most_3pr_hgvs_transcript_variant_delins_from_dup) @@ -2335,9 +2305,8 @@ def rev_norm_ins(self, hgvs_coding, hgvs_genomic): most_3pr_hgvs_transcript_variant.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - most_3pr_hgvs_transcript_variant_delins_from_dup = most_3pr_hgvs_transcript_variant.ac + ':' + most_3pr_hgvs_transcript_variant.type + '.' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_3pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant.posedit.edit.ref + most_3pr_hgvs_transcript_variant_delins_from_dup = fn.hgvs_dup2indel( + most_3pr_hgvs_transcript_variant) most_3pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( most_3pr_hgvs_transcript_variant_delins_from_dup) @@ -2346,9 +2315,8 @@ def rev_norm_ins(self, hgvs_coding, hgvs_genomic): genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = genomic_from_most_5pr_hgvs_transcript_variant.ac + ':' + genomic_from_most_5pr_hgvs_transcript_variant.type + '.' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant.posedit.edit.ref + genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup = fn.hgvs_dup2indel( + genomic_from_most_5pr_hgvs_transcript_variant) genomic_from_most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( genomic_from_most_5pr_hgvs_transcript_variant_delins_from_dup) @@ -2357,9 +2325,8 @@ def rev_norm_ins(self, hgvs_coding, hgvs_genomic): most_5pr_hgvs_transcript_variant.posedit.edit.alt = '' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - most_5pr_hgvs_transcript_variant_delins_from_dup = most_5pr_hgvs_transcript_variant.ac + ':' + most_5pr_hgvs_transcript_variant.type + '.' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.start.base) + '_' + str( - most_5pr_hgvs_transcript_variant.posedit.pos.end.base) + 'del' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + 'ins' + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant.posedit.edit.ref + most_5pr_hgvs_transcript_variant_delins_from_dup = fn.hgvs_dup2indel( + most_5pr_hgvs_transcript_variant) most_5pr_hgvs_transcript_variant = self.validator.hp.parse_hgvs_variant( most_5pr_hgvs_transcript_variant_delins_from_dup) diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index 54a93f08..3bbdd1f3 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -456,6 +456,20 @@ def n_inversion(ref_seq, del_seq, inv_seq, interval_start, interval_end): return sequence +def hgvs_dup2indel(hgvs_seq): + """Will convert hgvs variant object dup into a string with del and ins""" + string = "%s:%s.%s_%sdel%sins%s%s" % ( + hgvs_seq.ac, + hgvs_seq.type, + hgvs_seq.posedit.pos.start.base, + hgvs_seq.posedit.pos.end.base, + hgvs_seq.posedit.edit.ref, + hgvs_seq.posedit.edit.ref, + hgvs_seq.posedit.edit.ref + ) + return string + + # Custom Exceptions class VariantValidatorError(Exception): pass @@ -463,4 +477,3 @@ class mergeHGVSerror(Exception): pass class alleleVariantError(Exception): pass - diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 1c8ce395..51e1680b 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -613,13 +613,9 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): transcript_gap_alt_n.posedit.edit.alt = 'X' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( - transcript_gap_n.posedit.pos.start.base) + '_' + str( - transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n_delins_from_dup = fn.hgvs_dup2indel(transcript_gap_n) transcript_gap_n = self.hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( - transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( - transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n_delins_from_dup = fn.hgvs_dup2indel(transcript_gap_alt_n) transcript_gap_alt_n = self.hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) # Split the reference and replacing alt sequence into a dictionary @@ -762,13 +758,9 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): transcript_gap_alt_n.posedit.edit.alt = 'X' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( - transcript_gap_n.posedit.pos.start.base) + '_' + str( - transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n_delins_from_dup = fn.hgvs_dup2indel(transcript_gap_n) transcript_gap_n = self.hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( - transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( - transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n_delins_from_dup = fn.hgvs_dup2indel(transcript_gap_alt_n) transcript_gap_alt_n = self.hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) # Split the reference and replacing alt sequence into a dictionary @@ -1357,13 +1349,9 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): transcript_gap_alt_n.posedit.edit.alt = 'X' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( - transcript_gap_n.posedit.pos.start.base) + '_' + str( - transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n_delins_from_dup = fn.hgvs_dup2indel(transcript_gap_n) transcript_gap_n = self.hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( - transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( - transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n_delins_from_dup = fn.hgvs_dup2indel(transcript_gap_alt_n) transcript_gap_alt_n = self.hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) # Split the reference and replacing alt sequence into a dictionary @@ -1506,13 +1494,9 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): transcript_gap_alt_n.posedit.edit.alt = 'X' except Exception as e: if str(e) == "'Dup' object has no attribute 'alt'": - transcript_gap_n_delins_from_dup = transcript_gap_n.ac + ':' + transcript_gap_n.type + '.' + str( - transcript_gap_n.posedit.pos.start.base) + '_' + str( - transcript_gap_n.posedit.pos.end.base) + 'del' + transcript_gap_n.posedit.edit.ref + 'ins' + transcript_gap_n.posedit.edit.ref + transcript_gap_n.posedit.edit.ref + transcript_gap_n_delins_from_dup = fn.hgvs_dup2indel(transcript_gap_n) transcript_gap_n = self.hp.parse_hgvs_variant(transcript_gap_n_delins_from_dup) - transcript_gap_alt_n_delins_from_dup = transcript_gap_alt_n.ac + ':' + transcript_gap_alt_n.type + '.' + str( - transcript_gap_alt_n.posedit.pos.start.base) + '_' + str( - transcript_gap_alt_n.posedit.pos.end.base) + 'del' + transcript_gap_alt_n.posedit.edit.ref + 'ins' + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n.posedit.edit.ref + transcript_gap_alt_n_delins_from_dup = fn.hgvs_dup2indel(transcript_gap_alt_n) transcript_gap_alt_n = self.hp.parse_hgvs_variant(transcript_gap_alt_n_delins_from_dup) # Split the reference and replacing alt sequence into a dictionary From 0c5ccd2367c703598c615fb259763a4b2e5e3b86 Mon Sep 17 00:00:00 2001 From: Teri Forey Date: Fri, 17 May 2019 13:11:54 +0100 Subject: [PATCH 092/223] Restructuring py3 travis (#48) * Created initial travis file * Updated travis file, set seqrepo to download via rsync * Added configuration for travis and empty database sql * Changed protein LRG field size in empty db sql * Fixed problem with it assuming records already exist in vv db * Updated environment file and travis * Set to use remote postgres * Added codecov to travis --- .coverage | 1 + .travis.yml | 73 +++++++++++++++ VariantValidator/modules/vvDatabase.py | 11 +-- configuration/empty_vv_db.sql | 120 +++++++++++++++++++++++++ configuration/travis.ini | 53 +++++++++++ environment.yml | 1 + 6 files changed, 254 insertions(+), 5 deletions(-) create mode 100644 .coverage create mode 100644 .travis.yml create mode 100644 configuration/empty_vv_db.sql create mode 100644 configuration/travis.ini diff --git a/.coverage b/.coverage new file mode 100644 index 00000000..5a37b25b --- /dev/null +++ b/.coverage @@ -0,0 +1 @@ +!coverage.py: This is a private format, don't read it directly!{"lines":{"/local/VariantValidator/variantValidator/VariantValidator/__init__.py":[1,3,5],"/local/VariantValidator/variantValidator/VariantValidator/configure.py":[1,2,3,5,8,25,31,32,9,10,12,16,20],"/local/VariantValidator/variantValidator/VariantValidator/variantValidator.py":[1,3,6,7,9,12,13,15,26,27],"/local/VariantValidator/variantValidator/VariantValidator/modules/__init__.py":[1],"/local/VariantValidator/variantValidator/VariantValidator/modules/vvMixinCore.py":[3,5,6,7,8,9,10,11,12,13,14,15,16,17,20,21,23,24,25,26,28,38,39,40,48,50,51,52,53,54,57,58,68,70,75,77,82,96,102,105,106,107,108,109,110,111,114,117,122,131,132,133,135,138,139,153,154,155,156,158,159,160,161,165,166,169,170,171,175,177,178,181,182,232,233,234,235,239,241,242,244,247,248,249,250,251,252,254,255,256,257,258,265,268,270,271,279,289,316,331,344,410,418,424,473,483,484,485,486,487,488,489,526,529,537,538,539,585,592,596,606,607,608,609,611,612,613,720,725,736,737,738,773,786,787,788,789,795,807,808,874,883,889,890,894,900,910,911,912,913,914,920,921,927,929,938,939,942,943,946,947,948,949,950,951,952,953,954,955,965,984,986,988,989,990,992,993,1000,1001,1002,1003,1004,1008,1009,1010,1011,1012,1016,1017,1018,1019,1020,1035,1036,1051,1062,1063,1064,1065,1066,1076,1084,1127,1133,1140,1147,1155,1164,1171,1179,1180,1213,1214,1401,1501,1502,1779,1784,1785,1818,1865,1866,1883,1887,1888,1889,1890,1891,1895,1905,1906,1910,1911,1914,1916,1917,1920,1924,1927,1928,1931,1935,1944,1945,1947,1963,2043,3144,3147,3149,3151,3154,3166,3168,3169,3170,3171,3213,3214,3215,3220,3221,3222,3224,3225,3256,3290,3291,3292,3305,3307,3309,3310,3311,3320,3321,3323,3324,3325,3333,3334,3344,3359,3360,3361,3710,3816,3820,3821,3822,3874,3875,3876,3906,3907,3908,3911,3927,3928,3929,3931,3932,3937,3940,3941,3946,3949,3961,3962,3965,3966,3967,3970,3971,3976,3979,3980,3981,3982,3983,3984,3985,3996,3997,3998,4003,4004,4005,4008,4009,4010,4011,4012,4013,4015,4017,4019,4020,4021,4025,4027,4034,4035,4054,4070,4071,4075,4076,4086,4087,4088,4091,4092,4093,4094,4095,4096,4097,4099,4101,4103,4104,4105,4109,4111,4118,4119,4139,4155,4156,4160,4161,4170,4171,4281,4282,4283,4286,4288,4291,4292,4293,4294,4297,4301,4302,4305,4306,4309,4313,4314,4317,4318,4354,4356,4359,4360,4361,4362,4363,4364,4367,4370,4371,4374,4375,4376,4379,4382,4383,4394,4417,4419,4420,4424,4425,4429,4430,4432,4433,4438,4440,4441,4465,4466,4467,4468,4469,4482,4483,4484,4497,4501,4503,4505,4572,4573,4574,4579,4582,4596,4613,4631,4644,4662,4681,4682,4686,4687,4692,4693,4694,4697,4698,4714,4723,4724,4733,4744,4745,4749,4750,4767,4771,4772,4796,4797,4808,5187,5204,5208,5209,5214,5215,5230,5231,5234,5241,5242,5243,5262,4773,4774,4775,4776,4777,4778,5266,5282,5283,5310,5320,5322,5324,5327,5328,5329,5330,5331,5332,5335,5336,5340,5341,5344,5345,5349,5352,5353,5354,5357,5358,5380,5382,5383,5387,5388,5392,5393,5394,5395,5400,5403,5404,5405,5407,5408,5409,5410,5414,5415,5416,5417,5419,5422,5423,5425,5427,5491,5492,5493,5500,5502,5515,5529,5547,5559,5579,5598,5599,5603,5604,5609,5610,5612,5615,5616,5632,5641,5642,5650,5660,5661,5666,5667,5683,5687,5691,5692,5703,6069,6083,6086,6087,6091,6092,6093,6102,6123,6126,6127,6128,6132,6133,6135,6136,6137,6138,6139,6141,6142,6147,6148,6150,6151,6159,6160,6167,6168,6169,6170,6182,6183,6184,6189,6190,6192,6251,6253,6254,6258,6259,6263,6264,6265,6266,6267,6268,6269,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,6340,6363,6364,6365,6366,6367,6368,6369,6370,6371,6372,6374,6394,6397,6399,6400,6401,6409,6412,6415,6416,6417,6418,6421,6424,6427,6428,6429,6430,6435,6436,6437,6443,6444,6445,6448,6449,6450,6451,6452,6458,6459,6460,6465,6466,6467,6468,6471,6472,6473,6482,6483,6484,6485,6498,6500,6501,6502,6503,6525,6526,6532,6533,6534,6535,6536,6537,6540,6541,6542,6549,6552,6555,6556,6559,6563,6564,6566,6567,6572,6575,6576,6587,6588,6589,6590,6591,6592,6593,6594,6595,6597,6598,6600,6601,6602,6603,6605,6610,6613,6614,6615,6616,6617,6618,6619,6620,6630,6631,6632,6633,6636,6637,6638,6641,6642,6643,6644,6645,6646,6648,6650,6652,6653,6654,6658,6659,6660,6667,6668,6687,6703,6704,6708,6709,6716,6717,6718,6721,6722,6723,6724,6725,6726,6727,6729,6731,6733,6734,6735,6739,6740,6741,6748,6749,6768,6784,6785,6789,6790,6797,6798,6917,6918,6919,6920,6923,6925,6926,6927,6929,6933,6934,6937,6938,6939,6978,6980,6983,6984,6985,6986,6987,6988,6991,6994,6995,6998,6999,7000,7003,7004,7005,7013,7015,7038,7040,7041,7045,7046,7049,7050,7052,7053,7058,7060,7061,7085,7086,7087,7088,7089,7090,7091,7104,7108,7110,7112,7193,7194,7196,7198,7199,7213,7228,7247,7248,7249,7262,7282,7299,7302,7303,7307,7308,7313,7314,7315,7317,7318,7336,7344,7345,7354,7364,7365,7370,7371,7389,7393,7394,7420,7421,7431,7822,7838,7842,7843,7844,7849,7850,7863,7864,7867,7875,7876,7877,7896,7395,7396,7397,7398,7400,7401,7900,7901,7929,6669,6670,6671,6672,6675,6676,6677,6681,6683,6686,6750,6751,6752,6753,6756,6757,6758,6762,6764,6767,7113,7114,7115,7316,6930,7949,7950,7952,7963,7964,7966,7967,7968,7969,7970,7971,7972,7976,7977,7979,7980,7981,7982,7983,7996,7989,7990,7991,7992,7993,7997,7998,8000,8001,8002,8003,8004,8008,8051,8052,8053,8057,8058,8059,8060,8061,8062,8063,8066,8067,8068,8072,8073,8074,8079,8080,8081,8082,8083,8084,8085,8086,8087,8094,8095,8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8110,8111,8112,8115,8125,8127,8131,8132,8133,8134,8135,8136,8140,8141,8146,8149,8152,8161,8177,8178,8239,8240,8241,8242,8243,8244,8246,8248,8256,8257,8258,8259,8260,8263,8264,8265,8268,426,427,433,434,435,436,437,438,439,441,443,444,445,446,447,448,449,450,451,452,453,455,456,458,459,6432,6438,6439,6440,6441,6494,6495,6528,6529,6546,6553,6557,7957,7960,8064,8162,8163,8164,8165,8167,8168,8172,8173,8174,1181,1190,1191,1203,1204,1211,2044,2047,2048,2049,2054,2063,2066,2070,2078,2081,2099,2112,2183,2190,2191,2192,2193,2196,2198,2199,2202,2203,2204,2205,2206,2207,2210,2211,2214,2215,2216,2217,2218,2220,2227,2230,2231,2234,2236,2237,2239,2241,2242,2243,2244,2245,2247,2250,2251,2252,2254,2257,2260,2263,2265,2268,2278,2279,2280,2281,2283,2305,2307,2308,2312,2313,2317,2318,2319,2320,2325,2327,2328,2351,2352,2353,2354,2355,2368,2369,2370,2384,2386,2389,2390,2392,2394,2395,2396,2398,2399,2400,2401,2402,2404,2405,2406,2415,2416,2418,2419,2420,2421,2422,2423,2424,2425,2428,2454,2458,2459,2467,2469,2470,2471,2483,2497,2515,2527,2546,2565,2566,2567,2568,2569,2604,2605,2611,2612,2629,2630,2640,2996,2998,3000,3001,3006,3007,3069,3070,3071,3074,3075,3076,3085,3106,3116,3117,3121,3124,3127,3128,3130,3133,3134,3135,3136,3137,3138,3139,3141,267,4120,4121,4122,4123,4126,4127,4128,4133,4135,4136,4172,4173,4174,4175,4176,4181,4182,4183,4184,4185,4186,4188,4189,4191,4192,4194,4195,4196,4197,4199,4200,4201,4202,4203,4204,4205,4206,4207,4208,4209,4210,4211,4212,4213,4214,4215,4216,4217,4218,4219,4220,4221,4222,4223,4224,4234,4235,4245,4246,4256,4257,4267,4268,4269,4270,4271,4272,4434,4435,4436,4699,4700,4703,4704,4705,4706,4707,4710,4712,4713,4715,4716,4717,4718,4720,4721,4701,4702,4708,4709,5244,5245,5246,5247,5248,5249,5250,5251,5252,5253,5254,4506,4507,4509,4510,4511,4512,4513,4515,4516,4517,4526,4527,4528,4530,4531,4532,4533,4534,4535,4536,4537,4538,4541,4569,4683,4684,4685,5188,5190,5191,5201,5263,5267,5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5396,5397,5398,5428,5429,5431,5432,5433,5434,5435,5437,5438,5439,5448,5449,5451,5452,5453,5454,5455,5456,5457,5458,5461,5487,5600,5601,5602,6071,6073,6074,6079,6080,6088,6144,6162,6163,6164,6270,6271,6272,6273,6274,6474,6765,6799,6800,6801,6802,6803,6808,6809,6810,6811,6812,6813,6815,6816,6818,6819,6821,6822,6823,6824,6827,6828,6829,6830,6831,6832,6833,6834,6835,6836,6837,6838,6839,6840,6841,6842,6843,6844,6845,6846,6847,6848,6849,6850,6852,6853,6863,6864,6874,6875,6885,6886,6896,6897,6898,6899,6900,6901,6902,6903,6904,6905,7054,7055,7319,7320,7323,7325,7326,7327,7328,7329,7332,7334,7335,7337,7338,7339,7340,7342,7343,7321,7322,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888,7117,7118,7119,7120,7121,7122,7124,7125,7126,7137,7138,7139,7140,7142,7143,7144,7145,7146,7147,7148,7149,7150,7151,7152,7155,7156,7190,7304,7305,7306,7824,7825,7835,7897,7158,7169,7170,7171,7172,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183,7184,7187,7188,8142,2235,4055,4056,4057,4058,4059,4060,4067,4068,4140,4141,4142,4143,4144,4145,4152,4153,5192,5195,6124,6688,6689,6690,6691,6692,6693,6700,6701,6769,6770,6771,6772,6773,6774,6781,6782,7826,7829,2100,2102,2103,2104,2105,2106,2314,2315,3077,3078,3086,3087,4426,4427,5389,5390,6094,6095,6103,6104,6260,6261,7047,7048,2570,2571,2576,2577,2578,2592,3010,3011,3013,3014,3015,3016,3017,3018,3020,3021,3022,3023,3024,3025,3026,3027,3029,3031,3032,3036,3037,3039,3040,3044,3045,3048,3049,3050,3051,3053,3054,3055,2329,2330,2331,2334,2335,2336,2340,2341,2342,2344,2349,2345,2346,2347,2357,2358,2359,2361,2366,2362,2363,2364,2372,2373,2374,2376,2381,2377,2378,2379,2516,2517,2529,2531,2533,2534,2535,2536,2537,2538,2542,2543,2544,2545,2593,2594,2595,2598,2599,2613,2614,2615,2616,2619,2620,2621,2624,2626,2627,2641,2643,2646,2647,2648,2791,2818,2861,2905,2906,2949,2950,2954,2955,2956,2957,2958,2959,2962,2963,2964,2965,2966,2967,2968,2969,2970,2971,2972,2973,2974,2975,2976,2978,2979,2980,2981,2982,2983,2985,3120,4036,4037,4038,4039,4042,4043,4044,4048,4050,4051,4138,4284,4442,4443,4444,4445,4448,4449,4450,4454,4455,4456,4458,4463,4459,4460,4461,4471,4472,4473,4475,4480,4476,4477,4478,4486,4487,4488,4490,4495,4491,4492,4493,4632,4633,4634,4645,4647,4649,4650,4651,4652,4653,4654,4658,4659,4660,4661,4695,4751,4752,4753,4754,4757,4758,4759,4761,4763,4768,4810,4813,4814,4815,4816,4965,4992,4993,5037,5038,5084,5085,5129,5130,5131,5135,5136,5137,5138,5139,5140,5141,5144,5145,5146,5147,5148,5149,5150,5151,5152,5153,5158,5159,5160,5161,5162,5163,5164,5165,5167,5168,5169,5170,5171,5173,5175,5548,5549,5562,5564,5566,5567,5568,5569,5570,5571,5575,5576,5577,5578,5613,5668,5669,5670,5671,5674,5675,5676,5679,5681,5688,5704,5706,5709,5710,5711,5856,5883,5926,5970,5971,6014,6015,6019,6020,6021,6022,6023,6024,6027,6028,6029,6030,6031,6032,6033,6034,6035,6036,6041,6042,6043,6044,6045,6046,6047,6048,6050,6051,6052,6053,6054,6055,6057,6684,7062,7063,7064,7065,7068,7069,7070,7074,7075,7076,7078,7083,7079,7080,7081,7093,7094,7095,7097,7102,7098,7099,7100,7250,7251,7252,7264,7266,7268,7269,7270,7271,7272,7273,7277,7278,7279,7280,7281,7372,7373,7375,7376,7379,7380,7381,7383,7384,7386,7390,7434,7435,7436,7593,7620,7621,7668,7669,7716,7717,7764,7765,7766,7767,7771,7772,7773,7774,7775,7776,7777,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7794,7795,7796,7797,7798,7799,7800,7801,7803,7804,7805,7806,7807,7809,7811,4688,4689,4690,5236,5237,5605,5606,5607,7309,7310,7311,7869,7870,540,541,542,543,544,545,546,547,548,549,550,555,556,572,573,576,593,594,1156,1165,1166,1167,1168,1169,2572,2573,2574,2472,2473,2475,2476,2477,2478,2479,2480,2649,2650,2651,2652,2654,2655,2656,2667,2673,2674,2675,2676,2677,2678,2680,2681,2684,2685,2686,2688,2689,2692,2693,2696,2697,2713,2722,2723,2724,2733,2734,2735,2736,2737,2738,2740,2747,2748,2749,2750,2752,2755,2756,2757,2758,2761,2759,2762,2763,2766,2767,2768,2771,2772,2774,2775,2776,2779,2780,2781,2782,2786,2787,2788,2789,4636,4637,4638,4639,4640,4641,4817,4818,4819,4820,4822,4825,4826,4827,4838,4839,4840,4841,4842,4843,4851,4852,4855,4856,4857,4859,4860,4863,4864,4867,4868,4884,4893,4894,4895,4905,4906,4907,4908,4909,4910,4912,4919,4920,4921,4922,4923,4928,4929,4930,4931,4934,4932,4935,4936,4939,4940,4941,4944,4945,4947,4948,4949,4952,4953,4954,4955,4956,4957,4961,4962,5551,5552,5553,5554,5555,5556,5712,5713,5714,5715,5718,5719,5720,5731,5732,5733,5734,5735,5736,5744,5745,5748,5749,5750,5752,5753,5756,5757,5760,5761,5777,5786,5787,5788,5797,5798,5799,5800,5801,5802,5804,5811,5812,5813,5814,5819,5820,5821,5822,5825,5823,5826,5827,5830,5831,5832,5835,5836,5838,5839,5840,5843,5844,5845,5846,5847,5848,5852,5853,7254,7255,7256,7257,7258,7259,7437,7438,7439,7440,7441,7444,7445,7446,7457,7458,7459,7460,7461,7462,7463,7464,7474,7475,7478,7479,7480,7482,7483,7486,7487,7490,7491,7507,7516,7517,7518,7519,7529,7530,7531,7532,7533,7534,7536,7543,7544,7545,7546,7547,7552,7553,7554,7555,7556,7557,7560,7558,7561,7562,7565,7566,7567,7570,7571,7574,7575,7576,7579,7580,7581,7582,7583,7584,7589,7590,7230,7231,7232,7233,7237,7238,7239,7240,7241,7242,7465,7466,7467,7468,7469,7470,7471,7472,7549,7586,7587,2795,2796,2799,2800,2801,2802,2803,2804,2805,2806,2815,2816,2991,2992,2993,4130,4969,4970,4973,4974,4975,4976,4977,4978,4979,4980,4989,4990,5181,5183,5184,5210,5860,5861,5864,5865,5866,5867,5868,5869,5870,5871,5880,5881,6063,6064,6065,6760,7597,7598,7601,7602,7603,7604,7605,7606,7607,7608,7617,7618,7818,7819,7845,6679,5313,5314,5315,5316,5317,5360,5361,5365,5366,5370,5371,5372,5373,5378,6194,6195,6199,6200,6204,6205,6206,6211,6216,6217,6249,7931,7932,6275,6276,6288,6289,6290,6300,6301,6302,6307,6308,6309,6310,6317,3912,345,349,350,352,354,355,356,357,358,359,360,361,362,363,365,366,374,396,397,403,404,405,2285,2286,2290,2291,2295,2296,2297,2298,2303,3056,3057,3066,8011,8012,8013,8014,8015,8016,8030,8032,8033,8034,8035,8036,8037,8038,8039,8044,8045,6446,1037,1038,1041,1042,1046,1217,1218,1302,1303,1310,1323,1326,3293,3294,3295,3296,3933,3934,3950,3951,3952,5374,5375,5376,6218,6219,6220,6221,6222,6223,6224,6225,6228,6229,6232,6234,6235,6236,6237,6244,6577,6578,6579,6580,6582,1403,1404,1405,1406,1407,1419,1420,1421,1422,1043,1044,1219,1220,1221,1240,1241,1242,1243,1244,1245,1246,1247,1248,1249,1250,1251,1257,1258,1263,1264,1266,1267,1268,1269,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279,1282,1283,1284,1288,1289,1292,1293,1294,1295,1296,1300,1328,1329,1330,1331,1332,1333,1334,1335,1336,1337,1338,1339,1345,1350,1352,1353,1354,1355,1356,1357,1358,1359,1360,1361,1362,1363,1364,1365,1366,1367,1368,1370,1371,1374,1375,1378,1379,1380,1381,615,616,617,618,619,620,621,622,623,625,626,627,628,629,630,631,632,633,634,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,653,670,671,701,702,703,704,705,706,707,714,715,716,718,1423,1437,1438,1439,1483,1484,1491,1492,3226,3227,3247,3248,3253,3254,3346,3347,3349,3350,3352,3353,3362,3364,3366,3467,3468,3470,3471,3472,3473,3485,3487,3488,3493,3494,3495,3877,3878,3879,3880,3881,3882,3883,3897,3898,3899,6207,6208,6209,6210,6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6517,6518,3336,3337,3339,3340,3342,3343,1485,1486,1487,1488,1489,4053,4310,7936,7941,7942,7943,7944,7945,7946,7947,8055,1554,1555,1558,1559,1560,1561,1562,1563,1608,1619,1621,1622,1623,1624,1625,1637,1641,1652,1653,1654,1655,1656,1668,1672,1673,1674,1712,1713,3580,3647,3648,3650,3653,3655,3656,6303,6304,6520,8091,1503,1505,1506,1525,1535,1536,1537,1148,1149,1150,1151,1152,1786,1787,1788,1791,1792,1805,1806,1807,1808,1809,1810,1811,7958,6334,6335,6336,6341,6342,6343,6344,6346,6347,6348,6349,6350,6351,6352,6353,6355,6356,6359,6360,290,292,296,297,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,375,376,377,378,379,380,381,382,383,384,654,663,665,2299,2300,2301,2321,2322,2323,385,3110,3111,3113,3990,3991,4396,4397,4401,4402,4406,4407,4409,4410,4415,6212,6213,6214,6215,4006,4007,4077,4081,4082,4083,4089,4090,4162,4165,4166,4167,5259,6624,6625,7017,7018,7022,7023,7027,7028,7030,7031,7036,6639,6640,6710,6712,6713,6719,6720,6791,6793,6794,7893,7032,7033,7346,7347,7348,7349,7350,7351,7352,7355,7356,7359,3172,3173,3174,3188,3189,3192,3195,3196,3197,3198,655,656,657,658,659,661,966,967,968,975,976,977,978,979,969,2082,2083,2084,2085,2086,2090,2091,2092,2221,2222,2223,2224,2225,2226,2232,2233,3038,3046,428,429,430,431,944,1067,1070,1072,1085,1090,1092,1095,1096,1097,1098,1099,1100,1101,1102,1103,1106,1107,1093,1094,1108,1111,1112,1113,1114,1115,1116,1117,1118,1119,1120,1123,1124,454,460,461,463,2093,2094,280,282,286,287,288,369,370,371,4411,4412,8048,8021,8022,8023,8024,8025,2115,2116,2146,2147,2148,2149,2157,2158,2160,2161,2162,2163,2164,2166,2168,2170,2171,2172,2173,2174,8179,8180,8181,8182,8187,8188,8191,8200,8203,8206,8207,8208,8211,8214,8215,8216,8217,8218,8219,8220,8229,8230,8231,8194,8197,8198,8199,8235,2862,2866,2867,2868,2869,2870,2871,2874,2875,2876,2877,2878,2879,2880,2881,2882,2883,2888,2889,2890,2891,2892,2893,2894,2895,2897,2898,2899,2900,2901,2902,2904,3094,3095,3097,3098,3099,3100,3101,3104,6486,6487,6488,6489,6490,1182,1183,1184,2228,2229,1722,1723,666,667,668,672,674,675,677,679,681,682,683,684,685,686,687,688,689,690,691,692,1311,1312,1313,1314,1315,1316,1317,1318,2068,2430,2439,2440,2442,2443,2444,2445,2446,2447,2448,2449,2452,4543,4552,4553,4554,4556,4557,4558,4559,4560,4561,4562,4563,4564,4567,5463,5472,5473,5475,5476,5477,5478,5479,5480,5481,5482,5485,4072,4073,6705,6706,4061,4062,4063,6694,6695,6696,1253,1254,1255,1256,1259,1260,1261,1262,1290,1291,4583,4584,4585,4598,4599,4600,4601,4602,4606,4607,4609,4610,5039,5043,5044,5045,5046,5047,5048,5049,5052,5053,5054,5055,5056,5057,5058,5059,5060,5061,5066,5067,5068,5069,5070,5071,5072,5073,5075,5076,5077,5078,5079,5081,5083,4981,4982,7200,7201,7202,7215,7216,7217,7218,7219,7223,7224,7225,7226,7227,7670,7671,7675,7676,7677,7678,7679,7680,7681,7684,7685,7686,7687,7688,7689,7690,7691,7692,7693,7698,7699,7700,7701,7702,7703,7704,7705,7707,7708,7709,7710,7711,7713,7715,7609,7610,2485,2486,2487,2488,2489,2493,2494,2495,2496,5503,5504,5517,5518,5519,5520,5521,5525,5526,5527,5528,5927,5931,5932,5933,5934,5935,5936,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5953,5954,5955,5956,5957,5958,5959,5960,5962,5963,5964,5965,5966,5967,5969,2807,2810,2811,2812,7973,7974,810,811,816,818,819,820,831,833,834,835,836,837,838,839,842,843,846,848,850,851,852,853,854,855,856,857,858,859,860,3823,3824,3826,3827,3828,3829,3830,3832,3833,3834,3835,3836,3837,3838,3841,3846,3847,3857,3858,3861,3862,3865,3867,3868,3869,3870,3871,861,862,864,865,866,6455,3992,3993,3994,4078,4079,4163,4164,6621,6622,6711,6792,6626,6627,6628,3313,3315,3316,3317,1918,1919,2907,2911,2912,2913,2914,2915,2918,2919,2920,2921,2922,2923,2924,2929,2930,2931,2932,2933,2934,2935,2936,2937,2938,2939,2941,2942,2943,2944,2945,2946,2948,4157,4158,6786,6787,1372,1373,4725,4726,4727,4728,4729,4730,4731,4734,4735,4738,5643,5644,5645,5646,5647,5648,5651,5652,5655,5872,5873,1564,1577,1578,1580,1581,1583,1584,1585,1586,1587,1588,1589,1590,1591,1592,1593,1594,1595,1596,1597,1600,1601,1602,1609,1610,1611,1612,1613,1614,1615,1657,1658,1659,1660,1661,1662,1663,1664,1665,1666,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,8143,8144,8145,8147,4022,4023,4106,4107,6655,6656,6736,6737,2460,2461,2606,2607,2608,2609,2610,3041,3042,2499,2500,2501,2502,2506,2507,2508,2509,2510,2819,2823,2824,2825,2826,2827,2830,2831,2832,2833,2834,2835,2836,2841,2842,2843,2844,2845,2846,2847,2848,2849,2850,2851,2853,2854,2855,2856,2857,2858,2860,6172,6173,6174,6175,6176,3249,3297,3298,3299,3300,3301,3302,3303,6152,6154,6155,6156,6157,791,792,793,794,6277,6278,6279,6280,6281,6282,6283,3033,3034,3884,3885,3886,3889,3890,3892,7937,7938,7939,7940,6318,6319,2668,2669,2670,2671,2672,2783,2784,4925,4225,4248,4249,4252,4253,4254,4615,4616,4617,4618,4622,4623,4624,4625,4626,4994,4998,4999,5000,5001,5002,5005,5006,5007,5008,5009,5010,5011,5012,5013,5014,5019,5020,5021,5022,5023,5024,5025,5026,5028,5029,5030,5031,5032,5034,5036,4587,4588,4589,4590,4591,4592,4844,4845,4846,4847,4848,4849,4959,4960,5506,5507,5508,5509,5510,5511,5737,5738,5739,5740,5741,5742,5816,5850,5851,6854,6877,6878,6881,6882,6883,7622,7623,7624,7629,7630,7631,7632,7633,7636,7637,7638,7639,7640,7641,7642,7643,7644,7645,7650,7651,7652,7653,7654,7655,7656,7657,7659,7660,7661,7662,7663,7665,7667,7204,7205,7206,7207,7208,7209,4226,4227,4230,4231,4232,6855,6856,6859,6860,6861,5530,5532,5533,5534,5538,5539,5540,5541,5542,4984,4985,4986,4799,4800,4803,4804,4805,5694,5695,5698,5699,5875,5876,5877,7612,7613,7614,7423,7424,7427,7428,7429,7324,2548,2549,2550,2551,2552,2556,2557,2558,2559,2560,2925,2926,2927,4664,4665,4666,4667,4668,4672,4673,4674,4675,4676,5086,5090,5091,5092,5093,5094,5097,5098,5099,5100,5101,5102,5103,5104,5105,5106,5111,5112,5113,5114,5115,5116,5117,5118,5120,5121,5122,5123,5124,5126,5128,5581,5582,5583,5584,5585,5589,5590,5591,5592,5593,5972,5976,5977,5978,5979,5980,5983,5984,5985,5986,5987,5988,5989,5990,5991,5992,5997,5998,5999,6000,6001,6002,6003,6004,6006,6007,6008,6009,6010,6011,6013,7284,7285,7286,7287,7288,7292,7293,7294,7295,7296,7297,7718,7719,7720,7725,7726,7727,7728,7729,7732,7733,7734,7735,7736,7737,7738,7739,7740,7741,7746,7747,7748,7749,7750,7751,7752,7753,7755,7756,7757,7758,7759,7761,7763,4247,6876,2837,2838,2839,4413,4779,4780,4781,4782,4783,4784,4785,4786,4787,4788,4789,4790,4793,4798,7402,7403,7404,7405,7406,7407,7408,7409,7410,7411,7412,7413,7414,7417,7422,5016,5017,5018,5884,5888,5889,5890,5891,5892,5895,5896,5897,5898,5899,5900,5901,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5918,5919,5920,5921,5922,5923,5925,7647,7648,7649,1424,1425,1426,1427,1428,1429,1431,1432,1433,2292,2293,4403,4404,5367,5368,6201,6202,7024,7025],"/local/VariantValidator/variantValidator/VariantValidator/modules/vvLogging.py":[2,3,4,5,7,9,12,14,73,77,81,85,89,93,94,107,112,118,129,79,29,30,32,37,40,43,45,47,49,50,54,58,59,60,61,65,66,68,69,70,72,80,75,31,76,120,124,125,126,127,128,100,103,104,105,106,83,84,131,135,136,137,138,101,114,117,87,88,110,111],"/local/VariantValidator/variantValidator/VariantValidator/modules/vvHGVS.py":[4,7,8,9,12,13,20,21,22,185,340,513,664,812,967,669,671,674,675,678,680,686,700,701,702,703,798,800,801,802,803,804,805,808,809,823,825,826,829,830,833,835,841,855,856,857,858,954,955,956,957,958,959,960,963,964,196,198,202,203,206,208,214,215,228,234,235,257,284,285,286,287,288,289,290,291,295,298,299,301,302,303,325,326,327,328,336,337,229,230,231,351,354,358,359,360,361,362,374,375,376,381,382,383,387,393,394,407,413,414,436,464,465,466,467,468,469,470,471,475,478,479,484,485,486,508,509,510,366,367,368,369,363,364,370,371,216,217,218,220,221,223,224,225,524,528,529,536,537,540,542,548,549,550,551,552,554,555,557,558,559,660,661,687,688,689,690,692,693,695,696,697,842,843,844,845,847,848,850,851,852,209,210,211,329,330,331,332,333,395,396,397,399,400,402,403,404,306,307,308,309,310,312,313,315,316,317,525,526,562,568,591,619,642,643,644,645,646,648,649,651,652,653,681,682,683,836,837,838,489,490,491,492,493,495,496,498,499,500,569,620,621,622,623,624,625,626,630,633,634,636,637,638,706,707,708,709,710,711,712,713,717,718,720,721,723,724,725,861,862,863,864,865,866,867,868,872,873,875,876,878,879,880,236,237,238,239,240,241,245,246,248,249,251,252,253,388,389,390,728,756,757,758,759,760,761,762,763,767,770,771,773,774,775,884,912,913,914,915,916,917,918,919,923,926,927,929,930,931,729,730,731,732,733,734,735,736,741,742,743,745,748,749,750,751,753,885,886,887,888,889,890,891,892,897,898,899,901,904,905,906,907,909,258,259,260,261,262,263,264,265,270,271,272,274,275,276,277,278,280,570,571,572,573,574,575,579,580,582,583,585,586,587,415,416,417,418,419,420,424,425,427,428,430,431,432,779,780,781,782,783,785,786,788,789,790,935,936,937,938,939,941,942,944,945,946,408,409,410,437,438,439,440,441,442,443,444,449,450,451,453,456,457,458,459,461,530,543,544,545,792,793,794,795,948,949,950,951,563,564,565,378,592,593,594,595,596,597,598,599,604,605,606,608,611,612,613,614,616,968,973,985,986,987,1013,1014,990,991,992,995,1001,1002,1003,969,970],"/local/VariantValidator/variantValidator/VariantValidator/modules/vvChromosomes.pylocal/VariantValidator/variantValidator/VariantValidator/modules/vvLiftover.py":[6,9,10,11,12,13,14,15,16,17,20,23,25,29,39,40,45,48,49,50,51,52,53,54,68,69,70,71,74,75,76,88,91,92,26,27,94,95,96,97,99,100,102,103,104,105,108,109,113,114,115,116,117,119,120,122,126,213,216,218,224,227,230,231,235,236,237,238,239,241,242,245,250,251,257,258,259,260,266,268,274,277,279,282,285,287,288,289,290,291,293,294,295,296,297,301,302,303,304,305,325,59,60,61,62,63,64,78,79,80,83,84,85,228,310,311,312,313,314,318,319,320,321,322],"/local/VariantValidator/variantValidator/VariantValidator/modules/vvFunctions.py":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,18,40,69,84,89,96,109,191,290,387,420,444,460,461,462,463,464,465,22,35,27,28,29,30,34,119,122,123,124,125,126,127,128,129,132,143,153,163,164,169,170,171,73,74,79,93,94,80,81,97,98,99,100,101,105,106,88,133,139,140,141,75,76,77,102,103,448,450,451,452,453,393,395,396,398,399,401,402,403,404,405,406,407,408,410,411,412,415,196,197,198,199,200,201,202,203,207,211,212,214,216,218,219,223,228,231,232,235,236,237,239,242,244,245,249,254,256,257,259,260,262,263,265,267,268,270,271,274,280,285,286,287,288,425,426,427,428,429,430,432,433,434,435,436,438,440,417,418,275,276,277,165,166,167,154,159,160,161,173,178,179,180,181,292,293,294,295,296,297,298,299,303,307,308,310,312,314,315,319,324,327,328,331,332,333,335,338,340,341,345,350,352,353,355,356,358,359,361,363,364,366,367,377,382,383,384,385,42,43,47,49,50,51,52,53,55,56,57,58,59,60,65,66],"/local/VariantValidator/variantValidator/VariantValidator/modules/vvDatabase.py":[1,2,3,6,7,8,10,11,13,25,27,38,48,74,152,159,164,169,175,181,429,525,55,56,57,30,31,32,33,36,58,61,65,66,67,68,69,70,71,72,73,526,527,545,34,35,62,63,44,79,80,81,87,88,89,90,91,93,94,95,96,97,98,103,104,110,113,115,132,133,135],"/local/VariantValidator/variantValidator/VariantValidator/modules/vvDBInsert.py":[1,2,4,7,9,11,15,36,48,60,73,87,103,111],"/local/VariantValidator/variantValidator/VariantValidator/modules/vvDBGet.py":[1,2,3,5,8,9,17,26,29,32,35,38,41,45,48,51,54,57,60,63,66,70,73,76,80,83,87,85,30,31,11,12,13,14,15,16,36,37,55,56,52,53,64,65,90,91,92,93,94,95,96,97,98,99,100,101,102,67,68,103,104,105,106,114,42,43,82,27,28,46,47,49,50,74,75,19,20,21,24,108,110],"/local/VariantValidator/variantValidator/VariantValidator/modules/vvDBInit.py":[1,2,3,5,8,9,22,10,13,14,17,18,19,20],"/local/VariantValidator/variantValidator/VariantValidator/modules/vvMixinConverters.py":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,26,29,33,34,47,95,111,126,137,166,179,871,1078,1087,1615,1627,1652,1663,1704,1714,1723,1738,1763,1842,1882,1913,1928,1944,1965,2077,2187,2204,2349,2488,1720,1721,1729,1731,1733,1735,1736,197,198,199,202,203,208,210,212,213,216,217,220,221,231,232,236,238,239,260,271,280,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,301,306,307,308,313,314,315,321,333,334,335,476,477,481,483,502,511,521,523,525,526,527,528,529,536,537,538,539,540,553,555,562,563,564,565,571,574,575,576,577,578,579,580,583,584,590,592,593,595,603,604,605,611,612,626,627,628,637,638,639,640,641,642,644,648,649,650,651,653,656,657,658,659,662,660,663,664,667,669,670,674,675,676,696,699,706,707,844,869,1745,1746,1751,1752,1757,1761,1846,1847,1848,1849,1871,1872,1879,1880,132,134,135,1656,1657,1658,1659,1660,1661,1089,1090,1091,1094,1095,1100,1102,1104,1105,1108,1109,1112,1113,1123,1124,1128,1130,1132,1153,1165,1174,1176,1177,1178,1179,1180,1181,1182,1183,1184,1185,1186,1187,1188,1189,1190,1191,1192,1195,1200,1201,1202,1207,1208,1209,1215,1228,1229,1231,1250,1259,1269,1270,1271,1272,1273,1274,1275,1282,1283,1284,1285,1286,1298,1300,1307,1308,1309,1310,1316,1319,1320,1321,1322,1323,1324,1325,1328,1329,1335,1337,1338,1339,1347,1348,1349,1355,1356,1370,1371,1372,1381,1382,1383,1384,1385,1386,1388,1392,1393,1394,1395,1397,1400,1401,1402,1403,1406,1404,1407,1408,1411,1413,1414,1418,1419,1420,1440,1443,1450,1451,1588,1613,1452,1455,1456,1457,1458,1459,1460,1461,1769,1770,1771,1772,1773,1774,1775,1776,1782,1784,1785,1788,1789,1800,1801,1802,1803,1839,1840,272,273,274,277,278,503,504,507,508,509,542,543,544,545,546,547,548,549,551,708,711,712,718,719,720,728,730,731,732,733,734,735,736,737,738,740,741,746,748,749,750,752,753,754,760,761,775,776,777,786,787,788,789,790,791,793,797,798,799,800,802,805,806,807,810,811,812,815,817,818,822,823,824,1166,1167,1168,1171,1172,1251,1252,1255,1256,1257,1288,1289,1290,1291,1292,1293,1294,1295,1296,1462,1463,1464,1472,1474,1475,1476,1477,1478,1479,1480,1481,1482,1484,1485,1490,1492,1493,1494,1496,1497,1498,1504,1505,1519,1520,1521,1530,1531,1532,1533,1534,1535,1537,1541,1542,1543,1544,1546,1549,1550,1551,1554,1555,1556,1559,1561,1562,1566,1567,1568,1465,1466,322,323,324,325,326,327,1216,1217,1218,1219,1220,1221,281,713,714,715,716,717,1175,1790,1792,1793,1794,1795,721,722,240,1948,1949,1950,1951,1952,1953,1955,1957,1958,1959,1961,1962,1963,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,258,259,1133,1135,1136,1137,1138,1139,1140,1141,1142,1143,1144,1145,1146,1147,1148,1149,1151,1152,1804,1805,1808,1809,1814,1815,1820,1826,1829,1832,1833,261,262,263,264,265,266,269,270,1155,1156,1157,1158,1159,1160,1163,1164,1114,1115,1116,1117,1210,1211,1212,1758,1759,337,339,341,346,348,349,351,352,353,354,355,357,359,360,361,350,364,365,366,367,368,370,371,372,373,374,376,377,381,382,383,369,384,385,386,387,388,390,389,401,402,403,404,405,407,406,420,421,422,423,424,426,425,438,439,440,441,442,444,443,459,460,461,462,463,465,464,478,479,466,467,469,471,472,473,883,884,885,1045,1046,1051,1076,1085,1086,144,145,146,149,150,151,152,153,156,158,845,846,1589,1590,375,1821,1827,1836,1838,328,329,330,331,332,1222,1223,1224,1225,1226,1230,342,343,427,428,429,445,446,447,448,449,450,2354,2357,2359,2360,2362,2365,2368,2369,2371,2372,2376,2377,2378,2379,2380,2381,2382,2387,2388,2389,2390,2391,2392,2393,2395,2421,2424,2428,2433,2437,2441,2442,2443,2444,2445,2446,2447,2448,2449,2450,2451,2452,2453,2454,2455,2459,2460,2461,2462,2468,2469,2481,2482,2485,1118,316,317,318,356,2209,2211,2213,2218,2219,2220,2221,2222,2223,2224,2225,2226,2229,2230,2231,2232,2233,2234,2335,2336,2337,2338,2339,2342,2236,2237,2284,2303,2309,2310,2312,2313,2314,2315,2316,2317,2318,2319,2321,2323,2324,2327,2328,1971,1974,1976,1977,1983,1984,1985,1986,1987,1992,1993,1996,1997,1998,1999,2000,2003,2005,2006,2008,2014,2017,2018,2019,2027,2028,2029,2031,2032,2051,2052,2053,2054,2057,2059,2064,2065,2066,2067,2071,2072,2075,2329,2330,2331,2061,2332,2021,2036,2040,2042,2043,2044,2046,2048,1887,1888,1892,1894,1895,1896,1897,1898,1900,1901,1902,1903,1904,1905,1908,1911,2287,2291,2293,2294,2295,2296,2297,2298,2299,2300,2228,2326,2239,2240,2241,2242,2243,2244,2246,2248,2250,2251,2252,2253,2254,2255,2256,2259,2260,2261,2262,2264,2265,2266,2267,2268,2269,2270,2272,2273,2274,2277,2278,2279,2280,2281,2073,2074,2282,2343,2344,2345,2346,2347,222,223,224,225,319,1213,482,1918,1920,1922,1925,1926,1591,1592,1593,1594,1595,1598,1599,1601,1602,1603,1604,1607,1608,391,392,393,394,395,430,431,432,468,847,848,849,850,851,854,855,857,858,859,860,863,864,1260,1276,1301,1302,1312,1313,1314,1315,1340,1341,762,1506],"/local/VariantValidator/variantValidator/VariantValidator/modules/vvMixinInit.py":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,19,20,21,22,23,26,45,46,177,180,192,221,61,62,65,66,67,68,69,70,72,73,75,76,79,80,81,82,84,85,86,87,88,89,91,94,95,96,97,98,101,103,104,105,109,110,113,114,115,116,120,121,124,125,126,127,130,131,132,135,136,139,140,143,144,145,146,150,151,152,153,154,156,157,158,159,160,163,164,165,166,167,170,171,172,173,174,223,226,227,229,239,242,244,245,251,252,262,263,264,273,276,279,281,284,285,289,290,291,292,293,301,449,452,282,298,303,306,321,322,325,326,333,334,335,337,339,340,348,352,370,371,376,382,397,398,401,402,403,406,423,435,436,443,444,446,407,408,413,355,358,359,360,362,363,308,310,311,316,317,318,425,427,295,296,246,247,248,249,456,458,459,460,461,462,294,373,230,231,232,233,234,253,254,255,265,266,286,341,342,343,344,345,346,267,437,440],"/local/VariantValidator/variantValidator/VariantValidator/simpleTestScript.py":[],"/local/VariantValidator/variantValidator/VariantValidator/update_vv_db.py":[]}} \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..0dbf471e --- /dev/null +++ b/.travis.yml @@ -0,0 +1,73 @@ +language: python + +matrix: + include: + + # Here you can add or remove specific builds, and Python versions. You + # should currently be able to use Python 2.6, 2.7, or 3.3 or later. The + # NUMPY and SCIPY versions are set here as an example, but you can + # add/remove environment variables, and use them below during the install. + + - python: 3.6 + +services: + - mysql + - postgresql + +addons: + postgresql: "9.6" + +env: + - CODECOV_TOKEN="50dd5c2e-4259-4cfa-97a7-b4429e0d179e" + +before_install: + + # Increase size of database drive + - sudo mount -o remount,size=50% /var/ramfs + + # Install Miniconda + + - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda3.sh + - bash miniconda3.sh -b -p "$HOME"/miniconda3 + - echo . "$HOME"/miniconda3/etc/profile.d/conda.sh >> "$HOME"/.bashrc + - source "$HOME"/.bashrc + - conda config --set always_yes yes + + # Set up the databases - install seqrepo and UTA + + - mysql -e 'CREATE DATABASE validator;' + - df -h + + - mkdir "$HOME"/seqrepo + - rsync -HavP dl.biocommons.org::seqrepo/2018-08-21/ "$HOME"/seqrepo/2018-08-21/ + + # Copy configuration file + - cp configuration/travis.ini "$HOME"/.variantvalidator + + +install: + + # We just set up a conda environment with the right Python version. + + - sed -i -E 's/(python=)(.*)/\1'$TRAVIS_PYTHON_VERSION'/' environment.yml + - travis_retry conda env create -f environment.yml + - conda activate vvenv + + # Test dependencies + - conda install pytest + - pip install pytest-cov + - pip install codecov + + - pip install . + + # Set up validator database + - mysql validator < configuration/empty_vv_db.sql + - python bin/update_db.py + - df -h + +script: + + - pytest --cov-report=term --cov=VariantValidator/ # will run all tests in the package + +after_success: + - codecov \ No newline at end of file diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index f5b9b650..4a19807d 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -78,11 +78,12 @@ def update_transcript_info_record(self,accession, validator): # Prime these entries, just in case. previous_entry = self.in_entries(accession, 'transcript_info') accession = accession - description = previous_entry['description'] - variant = previous_entry['variant'] - version = previous_entry['version'] - hgnc_symbol = previous_entry['hgnc_symbol'] - uta_symbol = previous_entry['uta_symbol'] + if 'none' not in previous_entry.keys(): + description = previous_entry['description'] + variant = previous_entry['variant'] + version = previous_entry['version'] + hgnc_symbol = previous_entry['hgnc_symbol'] + uta_symbol = previous_entry['uta_symbol'] try: record = validator.entrez_efetch(db="nucleotide", id=accession, rettype="gb", retmode="text") version = record.id diff --git a/configuration/empty_vv_db.sql b/configuration/empty_vv_db.sql new file mode 100644 index 00000000..79ef502b --- /dev/null +++ b/configuration/empty_vv_db.sql @@ -0,0 +1,120 @@ +-- MySQL dump 10.13 Distrib 5.5.62, for debian-linux-gnu (x86_64) +-- +-- Host: localhost Database: validator +-- ------------------------------------------------------ +-- Server version 5.5.62-0ubuntu0.14.04.1 + +/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; +/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; +/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; +/*!40101 SET NAMES utf8 */; +/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; +/*!40103 SET TIME_ZONE='+00:00' */; +/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; +/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; +/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; +/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; + +-- +-- Table structure for table `LRG_RSG_lookup` +-- + +DROP TABLE IF EXISTS `LRG_RSG_lookup`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `LRG_RSG_lookup` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `lrgID` varchar(10) NOT NULL DEFAULT '', + `hgncSymbol` varchar(10) NOT NULL, + `RefSeqGeneID` varchar(15) NOT NULL DEFAULT '', + `status` text NOT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=2092 DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Table structure for table `LRG_proteins` +-- + +DROP TABLE IF EXISTS `LRG_proteins`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `LRG_proteins` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `LRGproteinID` varchar(10) DEFAULT NULL, + `RefSeqProteinID` varchar(15) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=1381 DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Table structure for table `LRG_transcripts` +-- + +DROP TABLE IF EXISTS `LRG_transcripts`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `LRG_transcripts` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `LRGtranscriptID` varchar(10) NOT NULL DEFAULT '', + `RefSeqTranscriptID` varchar(15) NOT NULL DEFAULT '', + PRIMARY KEY (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=2607 DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Table structure for table `refSeqGene_loci` +-- + +DROP TABLE IF EXISTS `refSeqGene_loci`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `refSeqGene_loci` ( + `id` int(11) NOT NULL AUTO_INCREMENT, + `refSeqGeneID` varchar(50) NOT NULL, + `refSeqChromosomeID` varchar(500) NOT NULL, + `genomeBuild` varchar(10) NOT NULL, + `startPos` int(50) NOT NULL, + `endPos` int(50) NOT NULL, + `orientation` varchar(5) NOT NULL, + `totalLength` int(50) NOT NULL, + `chrPos` varchar(20) NOT NULL, + `rsgPos` varchar(20) NOT NULL, + `entrezID` int(20) NOT NULL, + `hgncSymbol` varchar(20) NOT NULL, + `updated` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=27012 DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Table structure for table `transcript_info` +-- + +DROP TABLE IF EXISTS `transcript_info`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `transcript_info` ( + `id` int(11) NOT NULL AUTO_INCREMENT, + `refSeqID` varchar(50) NOT NULL, + `description` varchar(500) NOT NULL, + `transcriptVariant` varchar(10) NOT NULL, + `currentVersion` varchar(50) NOT NULL, + `hgncSymbol` varchar(20) NOT NULL, + `utaSymbol` varchar(20) NOT NULL, + `updated` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (`id`), + KEY `refi` (`refSeqID`) +) ENGINE=InnoDB AUTO_INCREMENT=740 DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; +/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; + +/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; +/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; +/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; +/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; + +-- Dump completed on 2019-01-08 16:04:55 \ No newline at end of file diff --git a/configuration/travis.ini b/configuration/travis.ini new file mode 100644 index 00000000..661457ae --- /dev/null +++ b/configuration/travis.ini @@ -0,0 +1,53 @@ +[variantValidator] +version = 0.9 + +[mysql] +host = localhost +database = validator +user = travis +password = + +[seqrepo] +version = 2018-08-21 +location = /home/travis/seqrepo + +[postgres] +host = uta.invitae.com +database = uta +version = uta_20180821 +user = anonymous +password = anonymous + +[logging] +#Levels control verbosity and can be set to "critical" "error" "warning" "info" or "debug". +level = info +#level = debug +console = true +# Beware - file logging has permission issues. +file = false +# Trace is used for debugging to track variants through the validator function +trace = false + +[EntrezID] +entrezid = admin@variantvalidator.org + +[liftover] +location = /PATH/TO/LIFTOVER + + +# +# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/environment.yml b/environment.yml index 479c89d9..8296266a 100644 --- a/environment.yml +++ b/environment.yml @@ -3,6 +3,7 @@ channels: - conda-forge - bioconda dependencies: + - python=3.6 - protobuf=3.5.1 - mysql-connector-python - pysam From 739166bb99243401251c239d7a0b2523f5e3432d Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 17 May 2019 14:57:27 +0100 Subject: [PATCH 093/223] Respaced vvDBGet.py --- VariantValidator/modules/vvDBGet.py | 99 ++++++++++++++++++----------- 1 file changed, 61 insertions(+), 38 deletions(-) diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index 38513b29..edd9d363 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -2,89 +2,112 @@ from .vvLogging import logger from . import vvDBInit + class Mixin(vvDBInit.Mixin): - ''' + """ Most of the functions in DBGet generate queries for retrieving data from the databases. - ''' + """ + @handleCursor - def execute(self,query): + def execute(self, query): self.cursor.execute(query) row = self.cursor.fetchone() if row is None: logger.debug("No data returned from query "+str(query)) row = ['none', 'No data'] return row + @handleCursor - def executeAll(self,query): + def executeAll(self, query): self.cursor.execute(query) rows = self.cursor.fetchall() - if rows==[]: + if rows == []: logger.debug("No data returned from query "+str(query)) rows = ['none', 'No data'] return rows + # from dbfetchone - def get_utaSymbol(self,gene_symbol): - query= "SELECT utaSymbol FROM transcript_info WHERE hgncSymbol = '%s'" %(gene_symbol) + def get_utaSymbol(self, gene_symbol): + query = "SELECT utaSymbol FROM transcript_info WHERE hgncSymbol = '%s'" % gene_symbol return self.execute(query) - def get_hgncSymbol(self,gene_symbol): - query= "SELECT hgncSymbol FROM transcript_info WHERE utaSymbol = '%s'" %(gene_symbol) + + def get_hgncSymbol(self, gene_symbol): + query = "SELECT hgncSymbol FROM transcript_info WHERE utaSymbol = '%s'" % gene_symbol return self.execute(query) - def get_transcript_description(self,transcript_id): - query= "SELECT description FROM transcript_info WHERE refSeqID = '%s'" %(transcript_id) + + def get_transcript_description(self, transcript_id): + query = "SELECT description FROM transcript_info WHERE refSeqID = '%s'" % transcript_id return str(self.execute(query)[0]) - def get_gene_symbol_from_transcriptID(self,transcript_id): - query = "SELECT hgncSymbol FROM transcript_info WHERE refSeqID = '%s'" %(transcript_id) + + def get_gene_symbol_from_transcriptID(self, transcript_id): + query = "SELECT hgncSymbol FROM transcript_info WHERE refSeqID = '%s'" % transcript_id return str(self.execute(query)[0]) - def get_refSeqGene_data_by_refSeqGeneID(self,refSeqGeneID, genomeBuild): - query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, orientation, totalLength, chrPos, rsgPos, entrezID, hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s' AND genomeBuild = '%s'" %(refSeqGeneID, genomeBuild) + + def get_refSeqGene_data_by_refSeqGeneID(self, refSeqGeneID, genomeBuild): + query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, orientation, totalLength, chrPos, rsgPos, entrezID, hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s' AND genomeBuild = '%s'" % (refSeqGeneID, genomeBuild) return self.execute(query) - def get_gene_symbol_from_refSeqGeneID(self,refSeqGeneID): - query = "SELECT hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s'" %(refSeqGeneID) + + def get_gene_symbol_from_refSeqGeneID(self, refSeqGeneID): + query = "SELECT hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s'" % refSeqGeneID return self.execute(query)[0] + #get_refseqgeneId_from_lrgID - def get_RefSeqGeneID_from_lrgID(self,lrgID): - query = "SELECT RefSeqGeneID FROM LRG_RSG_lookup WHERE lrgID = '%s'" %(lrgID) + def get_RefSeqGeneID_from_lrgID(self, lrgID): + query = "SELECT RefSeqGeneID FROM LRG_RSG_lookup WHERE lrgID = '%s'" % lrgID return self.execute(query)[0] - def get_RefSeqTranscriptID_from_lrgTranscriptID(self,lrgtxID): - query = "SELECT RefSeqTranscriptID FROM LRG_transcripts WHERE LRGtranscriptID = '%s'" %(lrgtxID) + + def get_RefSeqTranscriptID_from_lrgTranscriptID(self, lrgtxID): + query = "SELECT RefSeqTranscriptID FROM LRG_transcripts WHERE LRGtranscriptID = '%s'" % lrgtxID return self.execute(query)[0] - def get_lrgTranscriptID_from_RefSeqTranscriptID(self,rstID): - query = "SELECT LRGtranscriptID FROM LRG_transcripts WHERE RefSeqTranscriptID = '%s'" %(rstID) + + def get_lrgTranscriptID_from_RefSeqTranscriptID(self, rstID): + query = "SELECT LRGtranscriptID FROM LRG_transcripts WHERE RefSeqTranscriptID = '%s'" % rstID return self.execute(query)[0] - def get_lrgID_from_RefSeqGeneID(self,rsgID): - query = "SELECT lrgID, status FROM LRG_RSG_lookup WHERE RefSeqGeneID = '%s'" %(rsgID) + + def get_lrgID_from_RefSeqGeneID(self, rsgID): + query = "SELECT lrgID, status FROM LRG_RSG_lookup WHERE RefSeqGeneID = '%s'" % rsgID return self.execute(query) - def get_refseqgene_info(self,refseqgene_id, primary_assembly): - query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos FROM refSeqGene_loci WHERE refSeqGeneID = '%s' AND genomeBuild = '%s'" %(refseqgene_id, primary_assembly) + + def get_refseqgene_info(self, refseqgene_id, primary_assembly): + query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos FROM refSeqGene_loci WHERE refSeqGeneID = '%s' AND genomeBuild = '%s'" % (refseqgene_id, primary_assembly) return self.execute(query) - def get_RefSeqProteinID_from_lrgProteinID(self,lrg_p): - query = "SELECT RefSeqProteinID FROM LRG_proteins WHERE LRGproteinID = '%s'" %(lrg_p) + + def get_RefSeqProteinID_from_lrgProteinID(self, lrg_p): + query = "SELECT RefSeqProteinID FROM LRG_proteins WHERE LRGproteinID = '%s'" % lrg_p return self.execute(query)[0] - def get_lrgProteinID_from_RefSeqProteinID(self,rs_p): - query = "SELECT LRGproteinID FROM LRG_proteins WHERE RefSeqProteinID = '%s'" %(rs_p) + + def get_lrgProteinID_from_RefSeqProteinID(self, rs_p): + query = "SELECT LRGproteinID FROM LRG_proteins WHERE RefSeqProteinID = '%s'" % rs_p return self.execute(query)[0] - def get_LRG_data_from_LRGid(self,lrg_id): - query = "SELECT * FROM LRG_RSG_lookup WHERE lrgID = '%s'" %(lrg_id) + + def get_LRG_data_from_LRGid(self, lrg_id): + query = "SELECT * FROM LRG_RSG_lookup WHERE lrgID = '%s'" % lrg_id return self.execute(query) + #from dbfetchall - def get_transcript_info_for_gene(self,gene_symbol): - query = "SELECT refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated, IF(updated < NOW() - INTERVAL 3 MONTH , 'true', 'false') FROM transcript_info WHERE hgncSymbol = '%s'" %(gene_symbol) + def get_transcript_info_for_gene(self, gene_symbol): + query = "SELECT refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated, IF(updated < NOW() - INTERVAL 3 MONTH , 'true', 'false') FROM transcript_info WHERE hgncSymbol = '%s'" % gene_symbol return self.executeAll(query) + def get_g_to_g_info(self): query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol, genomeBuild FROM refSeqGene_loci" return self.executeAll(query) + def get_all_transcriptID(self): query = "SELECT refSeqID FROM transcript_info" return self.executeAll(query) + # Direct methods (GET) - def get_uta_symbol(self,gene_symbol): + def get_uta_symbol(self, gene_symbol): # returns the UTA gene symbol when HGNC gene symbol is input return str(self.get_utaSymbol(gene_symbol)[0]) - def get_hgnc_symbol(self,gene_symbol): + + def get_hgnc_symbol(self, gene_symbol): # returns the HGNC gene symbol when UTA gene symbol is input return str(self.get_hgncSymbol(gene_symbol)[0]) + # from external.py - def get_urls(self,dict_out): + def get_urls(self, dict_out): # Provide direct links to reference sequence records # Add urls report_urls = {} From 584b4cb280dc0a55969494e7ae6489e5a9827cf3 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 17 May 2019 15:27:46 +0100 Subject: [PATCH 094/223] Edited variant obj and refactored some attributes --- VariantValidator/modules/gapped_mapping.py | 7 ++++++- VariantValidator/modules/variant.py | 21 ++++++++++----------- VariantValidator/modules/vvMixinCore.py | 2 +- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 616e3b4b..6e816385 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -11,6 +11,11 @@ class GapMapper(object): def __init__(self, variant, validator): + """ + Sets initial values + :param variant: variant.Variant() + :param validator: Validator() + """ self.variant = variant self.validator = validator @@ -58,7 +63,7 @@ def gapped_g_to_c(self, rel_var): stash_ref = vcf_dict['ref'] stash_alt = vcf_dict['alt'] stash_end = end - stash_input = str(self.variant.stashed) + stash_input = str(self.variant.post_format_conversion) # Re-Analyse genomic positions if 'NG_' in str(self.variant.hgvs_formatted): c = self.validator.hp.parse_hgvs_variant(rel_var[0]) diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 416f6ef4..b83f1a60 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -17,9 +17,9 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.hgvs_formatted = None self.hgvs_genomic = None self.hgvs_coding = None - self.stashed = None + self.post_format_conversion = None # Used for first gapped_mapping function self.pre_RNA_conversion = None - self.input_parses = None + self.input_parses = None # quibble as hgvs variant object self.warnings = warnings self.description = '' # hgnc_gene_info variable @@ -32,15 +32,14 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.primary_assembly = primary_assembly self.order = order self.output_type_flag = 'warning' - - self.test_stash_tx_left = None - self.test_stash_tx_right = None + self.gene_symbol = None self.timing = {} self.refsource = None self.reftype = None + # Normalizers self.hn = None self.reverse_normalizer = None self.evm = None @@ -48,15 +47,15 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.min_evm = None self.lose_vm = None - self.gene_symbol = None - self.hgvs_transcript_variant = None + # Required for output + self.hgvs_transcript_variant = None # variant.coding self.genome_context_intronic_sequence = None self.refseqgene_context_intronic_sequence = None - self.hgvs_refseqgene_variant = None + self.hgvs_refseqgene_variant = None # genomic_r self.hgvs_predicted_protein_consequence = None - self.validation_warnings = None + self.validation_warnings = None # warnings but duplicates removed self.hgvs_lrg_transcript_variant = None - self.hgvs_lrg_variant = None + self.hgvs_lrg_variant = None # Same as hgvs_refseqgene_variant ? self.alt_genomic_loci = None self.primary_assembly_loci = None self.reference_sequence_records = None @@ -128,7 +127,7 @@ def set_reftype(self): Method will set the reftype based on the quibble :return: """ - pat_est = re.compile(r'\d\:\d') + pat_est = re.compile(r'\d:\d') if ':g.' in self.quibble: self.reftype = ':g.' diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index bfd21aa9..db9867fd 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -187,7 +187,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr formatted_variant = my_variant.quibble stash_input = my_variant.quibble - my_variant.stashed = stash_input + my_variant.post_format_conversion = stash_input format_type = my_variant.reftype hgnc_gene_info = 'false' From 1e3987385924de4b542a69cec09df54e856bbd81 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 17 May 2019 15:43:28 +0100 Subject: [PATCH 095/223] Moved gene symbol into variant obj so it won't be recalculated so many times --- VariantValidator/modules/mappers.py | 48 +++++++++--------------- VariantValidator/modules/use_checking.py | 38 ++++++++++--------- VariantValidator/modules/variant.py | 2 +- VariantValidator/modules/vvMixinCore.py | 10 +---- 4 files changed, 40 insertions(+), 58 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 764cd1dd..f24ff6af 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -230,35 +230,29 @@ def transcripts_to_gene(variant, validator): variant.warnings += ': ' + error logger.warning(error) return True - try: - gene_symbol = validator.db.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = None - if gene_symbol is None: + + if variant.gene_symbol: error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ - 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ - 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' + 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + \ + variant.gene_symbol + ' to https://variantvalidator.org/ref_finder/, ' \ + 'or select an alternative genome build' else: error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ - 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + \ - gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' - + 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ + 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' variant.warnings += ': ' + error logger.warning(error) return True except TypeError: - try: - gene_symbol = validator.db.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = 'none' - if gene_symbol == 'none': + if variant.gene_symbol: error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ - 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ - 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' + 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + \ + variant.gene_symbol + ' to https://variantvalidator.org/ref_finder/, ' \ + 'or select an alternative genome build' else: error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ - 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + \ - gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ + 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' variant.warnings += ': ' + error logger.warning(error) return True @@ -646,13 +640,9 @@ def transcripts_to_gene(variant, validator): gap_compensation = True # Gap gene black list - try: - gene_symbol = validator.db.get_gene_symbol_from_transcriptID(hgvs_coding.ac) - except Exception: - fn.exceptPass() - else: + if variant.gene_symbol: # If the gene symbol is not in the list, the value False will be returned - gap_compensation = vvChromosomes.gap_black_list(gene_symbol) + gap_compensation = vvChromosomes.gap_black_list(variant.gene_symbol) # Intron spanning variants if 'boundary' in str(error) or 'spanning' in str(error): @@ -868,13 +858,9 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): # multi_gen_vars = [] variant.hgvs_coding = validator.hp.parse_hgvs_variant(str(tx_variant)) # Gap gene black list - try: - gene_symbol = validator.db.get_gene_symbol_from_transcriptID(variant.hgvs_coding.ac) - except Exception: - fn.exceptPass() - else: + if variant.gene_symbol: # If the gene symbol is not in the list, the value False will be returned - gap_compensation = vvChromosomes.gap_black_list(gene_symbol) + gap_compensation = vvChromosomes.gap_black_list(variant.gene_symbol) # Look for variants spanning introns try: diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index 324ee224..8e104bf2 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -65,6 +65,9 @@ def structure_checks(variant, validator): """ input_parses = validator.hp.parse_hgvs_variant(variant.quibble) variant.input_parses = input_parses + variant.gene_symbol = validator.db.get_gene_symbol_from_transcriptID(variant.input_parses.ac) + if variant.gene_symbol == 'none': + variant.gene_symbol = '' if input_parses.type == 'g': check = structure_checks_g(variant, validator) if check: @@ -341,18 +344,16 @@ def structure_checks_c(variant, validator): variant.primary_assembly, validator.vm, variant.hn, validator.hp, validator.sf, variant.no_norm_evm) except hgvs.exceptions.HGVSDataNotAvailableError: - tx_ac = variant.input_parses.ac - try: - gene_symbol = validator.db.get_gene_symbol_from_transcriptID(tx_ac) - except: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ - 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ + if variant.gene_symbol: + error = 'Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' \ + 'Transcript Archive, please select an alternative version of ' + variant.input_parses.ac + \ + ' by submitting ' + variant.input_parses.ac + ' or ' + variant.gene_symbol + ' to ' \ 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ - 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' \ - + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative ' \ - 'genome build' + error = 'Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' \ + 'Transcript Archive, please select an alternative version of ' + variant.input_parses.ac + \ + ' by submitting ' + variant.input_parses.ac + ' to https://variantvalidator.org/ref_finder/, ' \ + 'or select an alternative genome build' variant.warnings += ': ' + error logger.warning(error) return True @@ -560,15 +561,16 @@ def structure_checks_n(variant, validator): output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant.evm, validator.hdp, variant.primary_assembly, validator.vm, variant.hn, validator.hp, validator.sf, variant.no_norm_evm) except hgvs.exceptions.HGVSDataNotAvailableError as e: - tx_ac = variant.input_parses.ac - try: - gene_symbol = validator.db.get_gene_symbol_from_transcriptID(tx_ac) - except: - gene_symbol = None - if gene_symbol is None: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + if variant.gene_symbol: + error = 'Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' \ + 'Transcript Archive, please select an alternative version of ' + variant.input_parses.ac + \ + ' by submitting ' + variant.input_parses.ac + ' or ' + variant.gene_symbol + ' to ' \ + 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + gene_symbol + ' to https://variantvalidator.org/ref_finder/, or select an alternative genome build' + error = 'Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' \ + 'Transcript Archive, please select an alternative version of ' + variant.input_parses.ac + \ + ' by submitting ' + variant.input_parses.ac + ' to https://variantvalidator.org/ref_finder/,' \ + ' or select an alternative genome build' variant.warnings += ': ' + error logger.warning(error) return True diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index b83f1a60..f3033d79 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -32,7 +32,7 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.primary_assembly = primary_assembly self.order = order self.output_type_flag = 'warning' - self.gene_symbol = None + self.gene_symbol = '' self.timing = {} diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index db9867fd..c47fb9b5 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -542,13 +542,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit # Gene - if transcript_accession != '': - try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(transcript_accession) - except: - gene_symbol = 'Unable to verify gene symbol for ' + str(transcript_accession) - else: - gene_symbol = '' + if transcript_accession == '': + variant.gene_symbol = '' if tx_variant != '': multi_gen_vars = mappers.final_tx_to_multiple_genomic(variant, self, tx_variant) @@ -683,7 +678,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) - variant.gene_symbol = gene_symbol variant.hgvs_transcript_variant = tx_variant variant.genome_context_intronic_sequence = genome_context_transcript_variant variant.refseqgene_context_intronic_sequence = refseqgene_context_transcript_variant From f622a9dd0eba9f27812786dd12d596e14ed81b8c Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 17 May 2019 16:16:56 +0100 Subject: [PATCH 096/223] Added more detail in the comments in variant obj --- VariantValidator/modules/variant.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index f3033d79..934b5a00 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -48,14 +48,14 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.lose_vm = None # Required for output - self.hgvs_transcript_variant = None # variant.coding + self.hgvs_transcript_variant = None # variant.coding but edited self.genome_context_intronic_sequence = None self.refseqgene_context_intronic_sequence = None - self.hgvs_refseqgene_variant = None # genomic_r + self.hgvs_refseqgene_variant = None # genomic_r but edited self.hgvs_predicted_protein_consequence = None self.validation_warnings = None # warnings but duplicates removed self.hgvs_lrg_transcript_variant = None - self.hgvs_lrg_variant = None # Same as hgvs_refseqgene_variant ? + self.hgvs_lrg_variant = None # Same as hgvs_refseqgene_variant but with LRG accession self.alt_genomic_loci = None self.primary_assembly_loci = None self.reference_sequence_records = None From 6a5efd7e47177c7a2f0a91978251ba6e63e2a143 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 20 May 2019 11:10:11 +0100 Subject: [PATCH 097/223] Changed variant warnings from string to list, removed duplicate validated_warnings attribute --- VariantValidator/modules/format_converters.py | 90 +++++++++--------- VariantValidator/modules/gapped_mapping.py | 2 +- VariantValidator/modules/mappers.py | 90 +++++++++--------- VariantValidator/modules/use_checking.py | 91 +++++++++---------- VariantValidator/modules/valoutput.py | 6 +- VariantValidator/modules/variant.py | 24 ++++- VariantValidator/modules/vvMixinCore.py | 81 +++++++---------- 7 files changed, 187 insertions(+), 197 deletions(-) diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index 1bc3d97c..64a2e054 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -80,10 +80,10 @@ def vcf2hgvs_stage1(variant, validator): variant.quibble = '-'.join(in_list[1:]) pre_input = variant.quibble vcf_elements = pre_input.split('-') - variant.warnings = 'Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + \ - pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + \ - pre_input + ' as ALT = REF' - variant.warnings += ': VariantValidator has output both alternatives' + variant.warnings = ['Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + + pre_input + ' as ALT = REF'] + variant.warnings.append('VariantValidator has output both alternatives') logger.resub('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF. Validator will output both alternatives.') @@ -150,8 +150,7 @@ def vcf2hgvs_stage2(variant, validator): # Use selected assembly accession = vvChromosomes.to_accession(chr_num, validator.selected_assembly) if accession is None: - variant.warnings += ': ' + chr_num + \ - ' is not part of genome build ' + validator.selected_assembly + variant.warnings.append(chr_num + ' is not part of genome build ' + validator.selected_assembly) logger.warning(chr_num + ' is not part of genome build ' + validator.selected_assembly) skipvar = True else: @@ -196,7 +195,7 @@ def vcf2hgvs_stage2(variant, validator): error = 'Unable to identify a colon (:) in the variant description %s. A colon is required in HGVS variant ' \ 'descriptions to separate the reference accession from the reference type i.e. :. ' \ 'e.g. :c.' % variant.quibble - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) skipvar = True @@ -243,8 +242,7 @@ def vcf2hgvs_stage3(variant, validator): chr_num = chr_num.replace('CHR', '') # Use selected assembly accession = vvChromosomes.to_accession(chr_num, validator.selected_assembly) if accession is None: - variant.warnings += ': ' + chr_num + \ - ' is not part of genome build ' + validator.selected_assembly + variant.warnings.append(chr_num + ' is not part of genome build ' + validator.selected_assembly) skipvar = True variant.quibble = str(accession) + ':' + str(position_and_edit) except Exception: @@ -283,8 +281,8 @@ def gene_symbol_catch(variant, validator, select_transcripts_dict_plus_version): if validator.select_transcripts != 'all': variant.write = False for transcript in list(select_transcripts_dict_plus_version.keys()): - variant.warnings = 'HGVS variant nomenclature does not allow the use of a gene symbol (' + \ - query_a_symbol + ') in place of a valid reference sequence' + variant.warnings = ['HGVS variant nomenclature does not allow the use of a gene symbol (' + + query_a_symbol + ') in place of a valid reference sequence'] refreshed_description = transcript + ':' + tx_edit query = Variant(variant.original, quibble=refreshed_description, warnings=variant.warnings, primary_assembly=variant.primary_assembly, @@ -293,10 +291,10 @@ def gene_symbol_catch(variant, validator, select_transcripts_dict_plus_version): logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + query_a_symbol + ') in place of a valid reference sequence') else: - variant.warnings += ': ' + 'HGVS variant nomenclature does not allow the use of a gene symbol ('\ - + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + \ - variant.quibble + ' and specify transcripts from the following: ' + \ - 'select_transcripts=' + select_from_these_transcripts + variant.warnings.append('HGVS variant nomenclature does not allow the use of a gene symbol (' + + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + + variant.quibble + ' and specify transcripts from the following: ' + + 'select_transcripts=' + select_from_these_transcripts) logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + variant.quibble + ' and specify transcripts from the following: ' + @@ -333,9 +331,9 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version): if validator.select_transcripts != 'all': variant.write = False for transcript in list(select_transcripts_dict_plus_version.keys()): - variant.warnings = 'NG_:c.PositionVariation descriptions should not be used unless a ' \ - 'transcript reference sequence has also been provided e.g. ' \ - 'NG_(NM_):c.PositionVariation' + variant.warnings = ['NG_:c.PositionVariation descriptions should not be used unless a ' + 'transcript reference sequence has also been provided e.g. ' + 'NG_(NM_):c.PositionVariation'] refreshed_description = ref_seq_gene_id + '(' + transcript + ')' + ':' + tx_edit query = Variant(variant.original, quibble=refreshed_description, warnings=variant.warnings, primary_assembly=variant.primary_assembly, @@ -346,25 +344,25 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version): 'Resubmitting corrected version.') validator.batch_list.append(query) else: - variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. ' \ - 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + \ - ' but also specify transcripts from the following: ' + \ - 'select_transcripts=' + select_from_these_transcripts + variant.warnings.append('A transcript reference sequence has not been provided e.g. ' + 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + + ' but also specify transcripts from the following: ' + + 'select_transcripts=' + select_from_these_transcripts) logger.warning('A transcript reference sequence has not been provided e.g. ' 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + ' but also ' 'specify transcripts from the following: select_transcripts=' + select_from_these_transcripts) skipvar = True else: - variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. ' \ - 'NG_(NM_):c.PositionVariation' + variant.warnings.append('A transcript reference sequence has not been provided e.g. ' + 'NG_(NM_):c.PositionVariation') logger.warning( 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation') skipvar = True elif variant.quibble.startswith('NC_'): - variant.warnings += ': ' + 'A transcript reference sequence has not been provided e.g. ' \ - 'NC_(NM_):c.PositionVariation. Unable to predict available transcripts ' \ - 'because chromosomal position is not specified' + variant.warnings.append('A transcript reference sequence has not been provided e.g. ' + 'NC_(NM_):c.PositionVariation. Unable to predict available transcripts ' + 'because chromosomal position is not specified') logger.warning( 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. ' 'Unable to predict available transcripts because chromosomal position is not specified') @@ -461,8 +459,8 @@ def vcf2hgvs_stage4(variant, validator): alt_list = alts.split(',') # Assemble and re-submit for alt in alt_list: - variant.warnings = 'Multiple ALT sequences detected: ' \ - 'auto-submitting all possible combinations' + variant.warnings = ['Multiple ALT sequences detected: ' + 'auto-submitting all possible combinations'] variant.write = False refreshed_description = header + '>' + alt query = Variant(variant.original, quibble=refreshed_description, @@ -475,7 +473,7 @@ def vcf2hgvs_stage4(variant, validator): skipvar = True else: error = str(e) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(str(e)) skipvar = True @@ -486,12 +484,12 @@ def vcf2hgvs_stage4(variant, validator): if 'Normalization of intronic variants is not supported' in error: not_delins = not_delins else: - variant.warnings += ': ' + str(error) + variant.warnings.append(error) logger.warning(str(e)) skipvar = True # Create warning automap = variant.quibble + ' automapped to ' + not_delins - variant.warnings += ': ' + automap + variant.warnings.append(automap) # Change input to normalized variant variant.quibble = not_delins except: @@ -529,14 +527,14 @@ def indel_catching(variant, validator): if failed.endswith('ins'): issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' error = error + ' please refer to ' + issue_link - variant.warnings += error + variant.warnings.append(error) logger.warning(str(error) + " " + str(e)) return True hgvs_failed.posedit.edit = str(hgvs_failed.posedit.edit).replace(digits, '') failed = str(hgvs_failed) automap = 'Non HGVS compliant variant description ' + variant.quibble + ' automapped to ' + failed - variant.warnings += ': ' + automap + variant.warnings.append(automap) logger.warning(automap) variant.quibble = failed @@ -600,7 +598,7 @@ def allele_parser(variant, validation): else: caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + \ refseqgene_reference + ':' + variation - variant.warnings += ': ' + str(caution) + variant.warnings.append(caution) logger.warning(str(caution)) elif re.match(r'^LRG_\d+t\d+:c.', variant.quibble) or re.match(r'^LRG_\d+t\d+:n.', variant.quibble) or \ re.match(r'^LRG_\d+t\d+:p.', variant.quibble) or re.match(r'^LRG_\d+t\d+:g.', variant.quibble): @@ -615,14 +613,14 @@ def allele_parser(variant, validation): else: caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + \ refseqtranscript_reference + ':' + variation - variant.warnings += ': ' + str(caution) + variant.warnings.append(caution) logger.warning(str(caution)) else: pass try: # Submit to allele extraction function alleles = validation.hgvs_alleles(variant.quibble, variant.hn) - variant.warnings += ': ' + 'Automap has extracted possible variant descriptions' + variant.warnings.append('Automap has extracted possible variant descriptions') logger.resub('Automap has extracted possible variant descriptions, resubmitting') for allele in alleles: query = Variant(variant.original, quibble=allele, warnings=variant.warnings, write=True, @@ -632,11 +630,11 @@ def allele_parser(variant, validation): return True except fn.alleleVariantError as e: if "Cannot validate sequence of an intronic variant" in str(e): - variant.warnings += ': ' + 'Intronic positions not supported for HGVS Allele descriptions' + variant.warnings.append('Intronic positions not supported for HGVS Allele descriptions') logger.warning('Intronic positions not supported for HGVS Allele descriptions') return True elif "No transcript definition for " in str(e): - variant.warnings += ': ' + str(e) + variant.warnings.append(str(e)) logger.warning(str(e)) return True else: @@ -666,7 +664,7 @@ def lrg_to_refseq(variant, validator): variant.hgvs_formatted.ac = refseqtrans_reference variant.set_quibble(str(variant.hgvs_formatted)) caution += lrg_reference + ':' + variation + ' automapped to ' + refseqtrans_reference + ':' + variation - variant.warnings += ': ' + caution + variant.warnings.append(caution) logger.warning(caution) elif re.match(r'^LRG_\d+:', variant.quibble): lrg_reference, variation = variant.quibble.split(':') @@ -675,7 +673,7 @@ def lrg_to_refseq(variant, validator): variant.hgvs_formatted.ac = refseqgene_reference variant.set_quibble(str(variant.hgvs_formatted)) caution += lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation - variant.warnings += ': ' + caution + variant.warnings.append(caution) logger.warning(caution) @@ -693,12 +691,12 @@ def mitochondrial(variant, validator): validator.vr.validate(hgvs_mito) except hgvs.exceptions.HGVSError as e: error = str(e) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True except KeyError: error = 'Currently unable to validate ' + hgvs_mito.ac + ' sequence variation' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True else: @@ -729,7 +727,7 @@ def proteins(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) if error: - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True else: @@ -750,7 +748,7 @@ def proteins(variant, validator): hgvs_object) + ' is HGVS compliant and contains a valid reference amino acid description' reason = 'Protein level variant descriptions are not fully supported due to redundancy' \ ' in the genetic code' - variant.warnings += ': ' + reason + ': ' + error + variant.warnings.extend([reason, error]) variant.protein = str(hgvs_object) logger.warning(reason + ": " + error) return True @@ -770,7 +768,7 @@ def rna(variant, validator): hgvs_c = validator.va_func.hgvs_r_to_c(hgvs_input) except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) - variant.warnings += ': ' + str(error) + variant.warnings.append(error) logger.warning(str(error)) return True variant.hgvs_formatted = hgvs_c diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 6e816385..cbb1fdaa 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -947,7 +947,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): self.auto_info = self.auto_info + '\nCaution should be used when reporting the displayed variant ' \ 'descriptions: If you are unsure, please contact admin' self.auto_info = self.auto_info.replace('\n', ': ') - self.variant.warnings += ': ' + self.auto_info + self.variant.warnings.append(self.auto_info) logger.warning(self.auto_info) # Normailse hgvs_genomic try: diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index f24ff6af..3e246f54 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -22,8 +22,8 @@ def gene_to_transcripts(variant, validator): except KeyError: error = 'Reference sequence ' + variant.hgvs_genomic.ac + ' is either not supported or does not exist' if error != 'false': - variant.warnings += ': ' + str(error) - logger.warning(str(error)) + variant.warnings.append(error) + logger.warning(error) return True # Set test to see if Norm alters the coords @@ -94,8 +94,8 @@ def gene_to_transcripts(variant, validator): genomic_input = refseqgene_data['hgvs_genomic'] # re_submit # Tag the line so that it is not written out - variant.warnings += ': ' + str(variant.hgvs_formatted) + ' automapped to genome position ' + \ - str(genomic_input) + variant.warnings.append(str(variant.hgvs_formatted) + ' automapped to genome position ' + + str(genomic_input)) query = Variant(variant.original, quibble=genomic_input, warnings=variant.warnings, primary_assembly=variant.primary_assembly, order=variant.order) @@ -103,7 +103,7 @@ def gene_to_transcripts(variant, validator): else: error = 'Mapping unavailable for RefSeqGene ' + str(variant.hgvs_formatted) + \ ' using alignment method = ' + validator.alt_aln_method - variant.warnings += ': ' + str(error) + variant.warnings.append(error) logger.warning(str(error)) return True @@ -115,7 +115,7 @@ def gene_to_transcripts(variant, validator): validator.vr.validate(variant.hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) - variant.warnings += ': ' + str(error) + variant.warnings.append(error) logger.warning(str(error)) return True else: @@ -134,7 +134,7 @@ def gene_to_transcripts(variant, validator): # set output type flag variant.output_type_flag = 'intergenic' # set genomic and where available RefSeqGene outputs - variant.warnings += ': ' + str(error) + variant.warnings.append(error) variant.genomic_g = fn.valstr(variant.hgvs_genomic) variant.genomic_r = str(rsg_data.split('(')[0]) logger.warning(str(error)) @@ -142,7 +142,7 @@ def gene_to_transcripts(variant, validator): else: error = 'Please ensure the requested chromosome version relates to a supported genome build. ' \ 'Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' - variant.warnings += ': ' + str(error) + variant.warnings.append(error) logger.warning(str(error)) return True @@ -227,7 +227,7 @@ def transcripts_to_gene(variant, validator): error = error + ': Consequently the input variant description cannot be fully validated and is not ' \ 'supported: Use the Gene to Transcripts function to determine whether an updated ' \ 'transcript reference sequence is available' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -240,7 +240,7 @@ def transcripts_to_gene(variant, validator): error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True except TypeError: @@ -253,7 +253,7 @@ def transcripts_to_gene(variant, validator): error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -268,14 +268,14 @@ def transcripts_to_gene(variant, validator): if validator.alt_aln_method != 'genebuild': error = "If the following error message does not address the issue and the problem persists please " \ "contact admin: " + str(to_g) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True else: error = "If the following error message does not address the issue and the problem persists please " \ "contact admin: " + str(to_g) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -299,14 +299,14 @@ def transcripts_to_gene(variant, validator): if validator.alt_aln_method != 'genebuild': error = "If the following error message does not address the issue and the problem persists " \ "please contact admin: " + str(to_g) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True else: error = "If the following error message does not address the issue and the problem persists " \ "please contact admin: " + str(to_g) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True else: @@ -333,7 +333,7 @@ def transcripts_to_gene(variant, validator): caution = 'This coding sequence variant description spans at least one intron' automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. ' \ 'Please refer to https://www35.lamp.le.ac.uk/recommendations/' - variant.warnings += ': ' + caution + ': ' + automap + variant.warnings.extend([caution, automap]) logger.warning(caution + ": " + automap) else: formatted_variant = str(h_variant) @@ -342,7 +342,7 @@ def transcripts_to_gene(variant, validator): if error == 'false': valid = True else: - variant.warnings += ': ' + str(error) + variant.warnings.append(str(error)) logger.warning(str(error)) return True @@ -395,7 +395,7 @@ def transcripts_to_gene(variant, validator): query.type = 'r' post_var = str(query) automap = variant.pre_RNA_conversion + ' automapped to ' + str(post_var) - variant.warnings += ': ' + str(caution) + ': ' + str(automap) + variant.warnings.extend([str(caution), str(automap)]) # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -403,7 +403,7 @@ def transcripts_to_gene(variant, validator): # Set the values and append to batch_list hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) assert str(hgvs_vt) == str(post_var) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) @@ -421,7 +421,7 @@ def transcripts_to_gene(variant, validator): if error == 'expected from_start_i <= from_end_i': error = 'Automap is unable to correct the input exon/intron boundary coordinates, ' \ 'please check your variant description' - variant.warnings += ': ' + error + variant.warnings.append(error) return True else: fn.exceptPass() @@ -429,7 +429,7 @@ def transcripts_to_gene(variant, validator): try: post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) except hgvs.exceptions.HGVSError as error: - variant.warnings += ': ' + str(error) + variant.warnings.append(str(error)) logger.warning(str(error)) return True test = validator.hp.parse_hgvs_variant(quibble_input) @@ -440,7 +440,7 @@ def transcripts_to_gene(variant, validator): 'selected transcript:' # automapping of variant completed automap = variant.pre_RNA_conversion + ' automapped to ' + str(post_var) - variant.warnings += str(caution) + ': ' + str(automap) + variant.warnings.extend([caution, automap]) # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -448,7 +448,7 @@ def transcripts_to_gene(variant, validator): # Set the values and append to batch_list hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) assert str(hgvs_vt) == str(post_var) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) @@ -477,8 +477,7 @@ def transcripts_to_gene(variant, validator): query.type = 'r' post_var = str(query) automap = quibble_input + ' automapped to ' + post_var - variant.warnings += ': ' + str(caution) + ': ' + str( - automap) + variant.warnings.extend([caution, automap]) # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -486,7 +485,7 @@ def transcripts_to_gene(variant, validator): # Set the values and append to batch_list hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) assert str(hgvs_vt) == str(post_var) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) @@ -507,8 +506,7 @@ def transcripts_to_gene(variant, validator): 'selected transcript:' # automapping of variant completed automap = str(variant.pre_RNA_conversion) + ' automapped to ' + str(post_var) - variant.warnings += ': ' + str(caution) + ': ' + str( - automap) + variant.warnings.extend([caution, automap]) # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -516,7 +514,7 @@ def transcripts_to_gene(variant, validator): # Set the values and append to batch_list hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) assert str(hgvs_vt) == str(post_var) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) @@ -532,7 +530,7 @@ def transcripts_to_gene(variant, validator): if 'error' in str(to_g): error = "If the following error message does not address the issue and the problem persists " \ "please contact admin: " + to_g - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -565,7 +563,7 @@ def transcripts_to_gene(variant, validator): 'nomenclature:' # automapping of variant completed automap = variant.pre_RNA_conversion + ' automapped to ' + output - variant.warnings += ': ' + caution + ': ' + automap + variant.warnings.extend([caution, automap]) # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -573,7 +571,7 @@ def transcripts_to_gene(variant, validator): # Set the values and append to batch_list hgvs_vt = validator.hp.parse_hgvs_variant(str(query)) assert str(hgvs_vt) == str(query) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) @@ -585,7 +583,7 @@ def transcripts_to_gene(variant, validator): 'nomenclature:' # automapping of variant completed automap = str(test) + ' automapped to ' + str(query) - variant.warnings += ': ' + caution + ': ' + automap + variant.warnings.extend([caution, automap]) # Kill current line and append for re-submission # Tag the line so that it is not written out @@ -593,7 +591,7 @@ def transcripts_to_gene(variant, validator): # Set the values and append to batch_list hgvs_vt = validator.hp.parse_hgvs_variant(str(query)) assert str(hgvs_vt) == str(query) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=automap, + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) @@ -609,13 +607,13 @@ def transcripts_to_gene(variant, validator): if variant.reftype != ':g.': if caution == '': caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) - variant.warnings += ': ' + caution + variant.warnings.append(caution) logger.warning(caution) # Apply validation to intronic variant descriptions (should be valid but make sure) error = validator.validateHGVS(genomic_validation) if error != 'false': - variant.warnings += ': ' + error + variant.warnings.append(error) return True # v0.1a1 edit @@ -623,7 +621,7 @@ def transcripts_to_gene(variant, validator): if variant.reftype == ':g.': if caution == '': caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) - variant.warnings += ': ' + str(caution) + variant.warnings.append(caution) # COLLECT VARIANT DESCRIPTIONS ############################## @@ -650,7 +648,7 @@ def transcripts_to_gene(variant, validator): hgvs_coding = variant.evm._maybe_normalize(hgvs_coding) gap_compensation = False except hgvs.exceptions.HGVSError as error: - variant.warnings += ': ' + str(error) + variant.warnings.append(str(error)) logger.warning(str(error)) return True @@ -720,7 +718,7 @@ def transcripts_to_gene(variant, validator): hgvs_protein = protein_dict['hgvs_protein'] else: error = protein_dict['error'] - variant.warnings += ': ' + str(error) + variant.warnings.append(str(error)) if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] else: @@ -748,7 +746,7 @@ def transcripts_to_gene(variant, validator): try: automap = fn.valstr(hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) hgvs_coding = hgvs_seek_var - variant.warnings += ': ' + automap + variant.warnings.append(automap) except NotImplementedError: fn.exceptPass() if ori == -1: @@ -766,7 +764,7 @@ def transcripts_to_gene(variant, validator): error = protein_dict['error'] if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] - variant.warnings += ': ' + str(error) + variant.warnings.append(error) except NotImplementedError: fn.exceptPass() elif ori == 1: @@ -797,7 +795,7 @@ def transcripts_to_gene(variant, validator): error = protein_dict['error'] if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] - variant.warnings += ': ' + str(error) + variant.warnings.append(error) # Replace protein description in vars table except Exception: fn.exceptPass() @@ -835,10 +833,10 @@ def transcripts_to_gene(variant, validator): validator.vr.validate(hgvs_updated) updated_transcript_variant = hgvs_updated - variant.warnings += ': ' + 'A more recent version of the selected reference sequence ' + hgvs_coding.ac + \ - ' is available (' + updated_transcript_variant.ac + ')' + ': ' + \ - str(updated_transcript_variant) + ' MUST be fully validated prior to use in reports: ' \ - 'select_variants=' + fn.valstr(updated_transcript_variant) + variant.warnings.append('A more recent version of the selected reference sequence ' + hgvs_coding.ac + + ' is available (' + updated_transcript_variant.ac + ')' + ': ' + + str(updated_transcript_variant) + ' MUST be fully validated prior to use in reports: ' + 'select_variants=' + fn.valstr(updated_transcript_variant)) variant.coding = str(hgvs_coding) variant.genomic_r = str(hgvs_refseq) diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index 8e104bf2..b7588a8e 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -14,7 +14,7 @@ def refseq_common_mistakes(variant): suggestion = variant.quibble.replace(':g.', ':c.') error = 'Transcript reference sequence input as genomic (g.) reference sequence. ' \ 'Did you mean ' + suggestion + '?' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True # NR_ c. @@ -22,7 +22,7 @@ def refseq_common_mistakes(variant): suggestion = variant.quibble.replace(':c.', ':n.') error = 'Non-coding transcript reference sequence input as coding (c.) reference sequence. ' \ 'Did you mean ' + suggestion + '?' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True # NM_ n. @@ -30,26 +30,25 @@ def refseq_common_mistakes(variant): suggestion = variant.quibble.replace(':n.', ':c.') error = 'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. ' \ 'Did you mean ' + suggestion + '?' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True # NM_ NC_ NG_ NR_ p. if (variant.quibble.startswith('NM_') or variant.quibble.startswith('NR_') or variant.quibble.startswith('NC_') or variant.quibble.startswith('NG_')) and variant.reftype == ':p.': - issue_link = 'http://varnomen.hgvs.org/recommendations/protein/' error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is ' \ 'not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True # NG_ c or NC_c.. if (variant.quibble.startswith('NG_') or variant.quibble.startswith('NC_')) and variant.reftype == ':c.': - suggestion = ': For additional assistance, submit ' + str(variant.quibble) + ' to VariantValidator' + suggestion = 'For additional assistance, submit ' + str(variant.quibble) + ' to VariantValidator' error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has ' \ - 'also been provided e.g. NG_(NM_):c.PositionVariation' + suggestion - variant.warnings += ': ' + error + 'also been provided e.g. NG_(NM_):c.PositionVariation' + variant.warnings.extend([error, suggestion]) logger.warning(error) return True @@ -93,7 +92,7 @@ def structure_checks_g(variant, validator): if not variant.quibble.startswith('NC_') and not variant.quibble.startswith('NG_') \ and not variant.quibble.startswith('NT_') and not variant.quibble.startswith('NW_'): error = 'Invalid reference sequence identifier (' + variant.input_parses.ac + ')' - variant.warnings += ': ' + str(error) + variant.warnings.append(error) logger.warning(error) return True @@ -101,7 +100,7 @@ def structure_checks_g(variant, validator): validator.vr.validate(variant.input_parses) except Exception as e: error = str(e) - variant.warnings += ': ' + str(error) + variant.warnings.append(error) logger.warning(error) return True @@ -110,7 +109,7 @@ def structure_checks_g(variant, validator): variant.hn.normalize(variant.input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) - variant.warnings += ': ' + str(error) + variant.warnings.append(error) logger.warning(error) return True @@ -139,14 +138,14 @@ def structure_checks_c(variant, validator): to_n = variant.evm.c_to_n(variant.input_parses) except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True actual_ref = to_n.posedit.edit.ref if called_ref != actual_ref: error = 'Variant reference (' + called_ref + ') does not agree with reference sequence ' \ '(' + actual_ref + ')' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True else: @@ -204,7 +203,7 @@ def structure_checks_c(variant, validator): 'Instead use ' + fn.valstr(report_gen) except Exception: fn.exceptPass() - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -212,7 +211,7 @@ def structure_checks_c(variant, validator): variant.input_parses = variant.evm.c_to_n(variant.input_parses) except hgvs.exceptions.HGVSError as e: error = str(e) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(e) return True @@ -223,7 +222,7 @@ def structure_checks_c(variant, validator): genomic_position = validator.myevm_t_to_g(input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) error = error + fn.valstr(genomic_position) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -282,7 +281,7 @@ def structure_checks_c(variant, validator): + fn.valstr(report_gen) except Exception: fn.exceptPass() - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -300,7 +299,7 @@ def structure_checks_c(variant, validator): error = 'Cannot map ' + fn.valstr(variant.input_parses) + ' to a genomic position. '\ + variant.input_parses.ac + ' can only be partially aligned to genomic reference ' \ 'sequences ' + acs - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -319,11 +318,11 @@ def structure_checks_c(variant, validator): else: error = 'Using a transcript reference sequence to specify a variant position that lies outside of '\ 'the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr(report_gen) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True elif 'insertion length must be 1' in error: - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True elif 'base start position must be <= end position' in error: @@ -333,7 +332,7 @@ def structure_checks_c(variant, validator): correction.posedit.pos.start = ed correction.posedit.pos.end = st error = error + ': Did you mean ' + str(correction) + '?' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -354,7 +353,7 @@ def structure_checks_c(variant, validator): 'Transcript Archive, please select an alternative version of ' + variant.input_parses.ac + \ ' by submitting ' + variant.input_parses.ac + ' to https://variantvalidator.org/ref_finder/, ' \ 'or select an alternative genome build' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True except ValueError as e: @@ -362,7 +361,7 @@ def structure_checks_c(variant, validator): if '> end' in error: error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end '\ 'position ' + str(variant.input_parses.posedit.pos.end) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True except hgvs.exceptions.HGVSInvalidVariantError as e: @@ -376,11 +375,11 @@ def structure_checks_c(variant, validator): # error = error + ': Did you mean ' + str(correction) + '?' error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end' \ ' position ' + str(variant.input_parses.posedit.pos.end) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True else: - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -388,7 +387,7 @@ def structure_checks_c(variant, validator): variant.evm.g_to_t(output, variant.input_parses.ac) except hgvs.exceptions.HGVSError as e: error = str(e) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -396,7 +395,7 @@ def structure_checks_c(variant, validator): validator.vr.validate(output) except hgvs.exceptions.HGVSError as e: error = str(e) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -418,20 +417,20 @@ def structure_checks_c(variant, validator): # error = error + ': Did you mean ' + str(correction) + '?' error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end '\ 'position ' + str(variant.input_parses.posedit.pos.end) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True except hgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: error += ' (' + variant.input_parses.ac + ')' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True return False @@ -459,7 +458,7 @@ def structure_checks_n(variant, validator): actual_ref = to_n.posedit.edit.ref if called_ref != actual_ref: error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' - variant.warnings += ': ' + str(error) + variant.warnings.append(error) logger.warning(str(error)) return True else: @@ -489,11 +488,11 @@ def structure_checks_n(variant, validator): report_gen) except Exception: fn.exceptPass() - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True else: - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -502,7 +501,7 @@ def structure_checks_n(variant, validator): genomic_position = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) error = error + fn.valstr(genomic_position) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -521,11 +520,11 @@ def structure_checks_n(variant, validator): else: error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True elif 'insertion length must be 1' in error: - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True elif 'base start position must be <= end position' in error: @@ -536,7 +535,7 @@ def structure_checks_n(variant, validator): correction.posedit.pos.end = st error = error + ': Did you mean ' + str(correction) + '?' # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True elif 'Cannot validate sequence of an intronic variant' in error: @@ -551,7 +550,7 @@ def structure_checks_n(variant, validator): variant.primary_assembly, variant.hn) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( report_gen) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -571,7 +570,7 @@ def structure_checks_n(variant, validator): 'Transcript Archive, please select an alternative version of ' + variant.input_parses.ac + \ ' by submitting ' + variant.input_parses.ac + ' to https://variantvalidator.org/ref_finder/,' \ ' or select an alternative genome build' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True except ValueError as e: @@ -580,7 +579,7 @@ def structure_checks_n(variant, validator): error = 'Interval start position ' + str( variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( variant.input_parses.posedit.pos.end) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True except hgvs.exceptions.HGVSInvalidVariantError as e: @@ -595,14 +594,14 @@ def structure_checks_n(variant, validator): error = 'Interval start position ' + str( variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( variant.input_parses.posedit.pos.end) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True try: validator.vr.validate(output) except hgvs.exceptions.HGVSError as e: error = str(e) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True @@ -643,21 +642,21 @@ def structure_checks_n(variant, validator): variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( variant.input_parses.posedit.pos.end) logger.warning(error) - variant.warnings += ': ' + error + variant.warnings.append(error) return True - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True except hgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: error = error + ' (' + variant.input_parses.ac + ')' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True return False diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index 0ecad8ee..6b47f3d7 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -26,11 +26,11 @@ def format_as_dict(self, with_meta=True): # For gene outputs, i.e. those that hit transcripts if variant.output_type_flag == 'gene': validation_output['flag'] = 'gene_variant' - if variant.validation_warnings == ['Validation error']: + if variant.warnings == ['Validation error']: validation_error_counter = validation_error_counter + 1 identification_key = 'Validation_Error_%s' % validation_error_counter else: - if variant.is_obsolete(): + if variant.is_obsolete() and variant.hgvs_transcript_variant == '': validation_obsolete_counter += 1 identification_key = 'obsolete_record_%s' % validation_obsolete_counter else: @@ -48,7 +48,7 @@ def format_as_dict(self, with_meta=True): # Gene as transcript reference sequence if variant.output_type_flag == 'warning': validation_output['flag'] = 'warning' - if variant.validation_warnings == ['Validation error']: + if variant.warnings == ['Validation error']: validation_error_counter = validation_error_counter + 1 identification_key = 'validation_error_%s' % validation_error_counter elif variant.is_obsolete(): diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 934b5a00..92f33c63 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -8,7 +8,7 @@ class Variant(object): relevant to what kind of variant it is. """ - def __init__(self, original, quibble=None, warnings='', write=True, primary_assembly=False, order=False): + def __init__(self, original, quibble=None, warnings=None, write=True, primary_assembly=False, order=False): self.original = original if quibble is None: self.quibble = original @@ -21,7 +21,10 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.pre_RNA_conversion = None self.input_parses = None # quibble as hgvs variant object - self.warnings = warnings + if warnings is None: + self.warnings = [] + else: + self.warnings = warnings self.description = '' # hgnc_gene_info variable self.coding = '' self.coding_g = '' @@ -53,7 +56,6 @@ def __init__(self, original, quibble=None, warnings='', write=True, primary_asse self.refseqgene_context_intronic_sequence = None self.hgvs_refseqgene_variant = None # genomic_r but edited self.hgvs_predicted_protein_consequence = None - self.validation_warnings = None # warnings but duplicates removed self.hgvs_lrg_transcript_variant = None self.hgvs_lrg_variant = None # Same as hgvs_refseqgene_variant but with LRG accession self.alt_genomic_loci = None @@ -184,7 +186,7 @@ def output_dict(self): 'refseqgene_context_intronic_sequence': self.refseqgene_context_intronic_sequence, 'hgvs_refseqgene_variant': self.hgvs_refseqgene_variant, 'hgvs_predicted_protein_consequence': self.hgvs_predicted_protein_consequence, - 'validation_warnings': self.validation_warnings, + 'validation_warnings': self.process_warnings(), 'hgvs_lrg_transcript_variant': self.hgvs_lrg_transcript_variant, 'hgvs_lrg_variant': self.hgvs_lrg_variant, 'alt_genomic_loci': self.alt_genomic_loci, @@ -198,4 +200,16 @@ def is_obsolete(self): Checks whether the keyword 'obsolete' appears within the validation warnings :return: """ - return any('obsolete' in warning for warning in self.validation_warnings) + return any('obsolete' in warning for warning in self.warnings) + + def process_warnings(self): + refined = [] + for warning in self.warnings: + warning = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warning) + warning.strip() + warning = warning.replace("'", "") + if warning == '': + continue + if warning not in refined: + refined.append(warning) + return refined diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index c47fb9b5..a3bd908d 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -128,7 +128,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Submitted variant description contains an invalid character(s) %s at position(s) %s: '\ 'Please remove this character and re-submit: A useful search function for ' \ 'Unicode characters can be found at https://unicode-search.net/' % (chars, positions) - my_variant.warnings += ': ' + error + my_variant.warnings.append(error) logger.warning(error) continue @@ -136,7 +136,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr my_variant.remove_whitespace() if my_variant.quibble != my_variant.original: caution = 'Whitespace removed from variant description %s' % my_variant.original - my_variant.warnings += ': ' + caution + my_variant.warnings.append(caution) logger.info(caution) # Set the primary_assembly @@ -158,8 +158,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: my_variant.primary_assembly = 'GRCh38' primary_assembly = 'GRCh38' - my_variant.warnings += ': Invalid genome build has been specified. ' \ - 'Automap has selected the default build (GRCh38)' + my_variant.warnings.append('Invalid genome build has been specified. Automap has selected ' + 'the default build (GRCh38)') logger.warning( 'Invalid genome build has been specified. Automap has selected the ' 'default build ' + my_variant.primary_assembly) @@ -181,7 +181,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr ' and in the expected pattern :.' else: error = 'Variant description ' + my_variant.quibble + ' is not in an accepted format' - my_variant.warnings += ': ' + error + my_variant.warnings.append(error) logger.warning(error) continue @@ -198,7 +198,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Conversions are not currently supported. The HGVS format for conversions # is rarely seen wrt genomic sequencing data and needs to be re-evaluated if 'con' in my_variant.quibble: - my_variant.warnings += ': ' + 'Gene conversions currently unsupported' + my_variant.warnings.append('Gene conversions currently unsupported') logger.warning('Gene conversions currently unsupported') continue @@ -216,7 +216,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr input_parses = self.hp.parse_hgvs_variant(formatted_variant) my_variant.hgvs_formatted = input_parses except hgvs.exceptions.HGVSError as e: - my_variant.warnings += ': ' + str(e) + my_variant.warnings.append(str(e)) logger.warning(str(e)) continue @@ -250,12 +250,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if my_variant.refsource == 'ENS': error = 'Unable to map ' + my_variant.hgvs_formatted.ac + \ ' to an equivalent RefSeq transcript' - my_variant.warnings += ': ' + error + my_variant.warnings.append(error) logger.warning(error) continue else: - my_variant.warnings += ': ' + str(trap_ens_in) + ' automapped to equivalent ' \ - 'RefSeq transcript ' + my_variant.quibble + my_variant.warnings.append(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + + my_variant.quibble) logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq ' 'transcript ' + my_variant.quibble) logger.trace("HVGS acceptance test passed", my_variant) @@ -300,7 +300,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = 'Mapping of ' + formatted_variant + ' to genome assembly ' + \ primary_assembly + ' is not supported' - my_variant.warnings += ': ' + error + my_variant.warnings.append(error) logger.warning(error) continue @@ -321,13 +321,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: if to_n.posedit.pos.end.base < to_n.posedit.pos.start.base: error = 'Interval end position < interval start position ' - my_variant.warnings += ': ' + error + my_variant.warnings.append(error) logger.warning(error) continue elif my_variant.hgvs_formatted.posedit.pos.end.base < my_variant.hgvs_formatted.posedit.pos.start.base: error = 'Interval end position ' + str(my_variant.hgvs_formatted.posedit.pos.end.base) + \ ' < interval start position ' + str(my_variant.hgvs_formatted.posedit.pos.start.base) - my_variant.warnings += ': ' + error + my_variant.warnings.append(error) logger.warning(error) continue @@ -335,7 +335,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr is_version = re.compile(r"\d\.\d") if my_variant.refsource == 'RefSeq' and not is_version.search(str(my_variant.hgvs_formatted)): error = 'RefSeq variant accession numbers MUST include a version number' - my_variant.warnings += ': ' + str(error) + my_variant.warnings.append(error) continue logger.trace("HVGS interval/version mapping complete", my_variant) @@ -402,7 +402,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except Exception: my_variant.output_type_flag = 'error' error = 'Validation error' - my_variant.warnings = str(error) + my_variant.warnings.append(error) exc_type, exc_value, last_traceback = sys.exc_info() te = traceback.format_exc() tbk = [str(exc_type), str(exc_value), str(te)] @@ -421,12 +421,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if not variant.write: continue - # warngins - warnings = variant.warnings - warnings = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warnings) - warnings = re.sub('^: ', '', warnings) - warnings = re.sub('::', ':', warnings) - # Genomic sequence variation genomic_variant = variant.genomic_g hgvs_genomic_variant = genomic_variant @@ -443,7 +437,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr refseqgene_variant = variant.genomic_r refseqgene_variant = refseqgene_variant.strip() if 'RefSeqGene' in refseqgene_variant or refseqgene_variant == '': - warnings = warnings + ': ' + refseqgene_variant + variant.warnings.append(refseqgene_variant) refseqgene_variant = '' lrg_variant = '' hgvs_refseqgene_variant = 'false' @@ -457,8 +451,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_lrg.ac = rsg_ac[0] lrg_variant = fn.valstr(hgvs_lrg) if rsg_ac[1] != 'public': - warnings = warnings + ': The current status of ' + str( - hgvs_lrg.ac) + ' is pending therefore changes may be made to the LRG reference sequence' + variant.warnings.append('The current status of ' + str(hgvs_lrg.ac) + ' is pending ' + 'therefore changes may be made to the LRG reference sequence') # Transcript sequence variation tx_variant = variant.coding @@ -640,20 +634,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Warn not directly mapped to specified genome build if genomic_accession != '': if primary_assembly.lower() not in list(primary_genomic_dicts.keys()): - warnings = warnings + ': ' + str( - variant.hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly + \ - ': See alternative genomic loci or alternative genome builds for aligned genomic positions' - - warn_list = warnings.split(': ') - warnings_out = [] - for warning in warn_list: - warning.strip() - warning = warning.replace("'", "") - if warning == '': - continue - if warning not in warnings_out: - # Remove duplicate elements but maintain the order - warnings_out.append(warning) + variant.warnings.extend([ + str(variant.hgvs_coding) + ' cannot be mapped directly to genome build ' + primary_assembly, + 'See alternative genomic loci or alternative genome builds for aligned genomic positions' + ]) # Ensure Variants have had the refs removed. # if not hasattr(posedit, refseqgene_variant): @@ -683,7 +667,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr variant.refseqgene_context_intronic_sequence = refseqgene_context_transcript_variant variant.hgvs_refseqgene_variant = refseqgene_variant variant.hgvs_predicted_protein_consequence = predicted_protein_variant_dict - variant.validation_warnings = warnings_out variant.hgvs_lrg_transcript_variant = lrg_transcript_variant variant.hgvs_lrg_variant = lrg_variant variant.alt_genomic_loci = alt_genomic_dicts @@ -721,7 +704,7 @@ def get_transcript_info(self, variant): error = 'Please inform UTA admin of the following error: ' + str(e) reason = "VariantValidator cannot recover information for transcript " + str( hgvs_vt.ac) + ' because it is not available in the Universal Transcript Archive' - variant.warnings += ': ' + str(reason) + variant.warnings.append(reason) logger.warning(str(reason) + ": " + str(error)) return True @@ -741,8 +724,7 @@ def get_transcript_info(self, variant): if 'error' in entry: # Open a hgvs exception log file in append mode error = entry['description'] - variant.warnings += ': ' + str( - error) + ': A Database error occurred, please contact admin' + variant.warnings.extend([str(error), 'A Database error occurred, please contact admin']) logger.warning(str(error) + ": A Database error occurred, please contact admin") return True @@ -754,13 +736,13 @@ def get_transcript_info(self, variant): entry = self.db.data_add(accession=accession, validator=self) except hgvs.exceptions.HGVSError: error = 'Transcript %s is not currently supported' % accession - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True except Exception: error = 'Unable to assign transcript identity records to ' + accession + \ ', potentially an obsolete record :' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True variant.description = entry['description'] @@ -774,7 +756,7 @@ def get_transcript_info(self, variant): logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + \ ', potentially an obsolete record :' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True variant.description = entry['description'] @@ -783,7 +765,7 @@ def get_transcript_info(self, variant): else: # Open a hgvs exception log file in append mode error = 'Unknown error type' - variant.warnings += ': ' + error + ': A Database error occurred, please contact admin' + variant.warnings.extend([error, ': A Database error occurred, please contact admin']) logger.warning(error) return True @@ -801,8 +783,7 @@ def get_transcript_info(self, variant): if 'error' in entry: # Open a hgvs exception log file in append mode error = entry['description'] - variant.warnings += ': ' + str( - error) + ': A Database error occurred, please contact admin' + variant.warnings.extend([str(error), ': A Database error occurred, please contact admin']) logger.warning(str(error)) return True @@ -822,7 +803,7 @@ def get_transcript_info(self, variant): logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + \ ', potentially an obsolete record :' - variant.warnings += ': ' + error + variant.warnings.append(error) logger.warning(error) return True variant.description = entry['description'] @@ -831,7 +812,7 @@ def get_transcript_info(self, variant): else: # Open a hgvs exception log file in append mode error = 'Unknown error type' - variant.warnings += ': ' + error + ': A Database error occurred, please contact admin' + variant.warnings.extend([error, ': A Database error occurred, please contact admin']) logger.warning(error) return True return False From c06121f8f44460596b74714e15da7fef8c109ad1 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 20 May 2019 12:03:50 +0100 Subject: [PATCH 098/223] Added ability to delete database contents prior to update --- VariantValidator/update_vv_db.py | 21 +++++++++++++++++++-- bin/update_db.py | 7 +++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/VariantValidator/update_vv_db.py b/VariantValidator/update_vv_db.py index 7a2ff4af..72202229 100644 --- a/VariantValidator/update_vv_db.py +++ b/VariantValidator/update_vv_db.py @@ -8,8 +8,7 @@ from . import configure -def update(): - +def connect(): config = ConfigParser() config.read(configure.CONFIG_DIR) @@ -22,6 +21,23 @@ def update(): } # Create database access objects db = vvDatabase.vvDatabase(dbConfig) + return db + + +def delete(): + + db = connect() + + db.execute('DELETE FROM transcript_info') + db.execute('DELETE FROM refSeqGene_loci') + db.execute('DELETE FROM LRG_transcripts') + db.execute('DELETE FROM LRG_proteins') + db.execute('DELETE FROM LRG_RSG_lookup') + + +def update(): + + db = connect() update_refseq(db) update_lrg(db) @@ -175,6 +191,7 @@ def update_refseq(dbcnx): return + def update_lrg(dbcnx): print('Updating LRG lookup tables') diff --git a/bin/update_db.py b/bin/update_db.py index 8ac345c0..90abbeb8 100644 --- a/bin/update_db.py +++ b/bin/update_db.py @@ -3,5 +3,12 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() + parser.add_argument('--delete', '-d', action='store_true', help='Delete the contents of the current database ' + 'before updating') + + args = parser.parse_args() + if args.delete: + print("Deleting current database contents") + update_vv_db.delete() update_vv_db.update() From d84eab9708a15a87abe2a8907fe6f5361603a049 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 20 May 2019 15:58:59 +0100 Subject: [PATCH 099/223] Updated tests to reflect new values in LRG tables --- test/test_inputs.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/test_inputs.py b/test/test_inputs.py index 4f0d519a..a021f0ff 100644 --- a/test/test_inputs.py +++ b/test/test_inputs.py @@ -485,7 +485,7 @@ def test_variant12(self): assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_002474.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' - assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1:p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} + assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1(LRG_1401p1):p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} assert results['NM_002474.2:c.3034_3035inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' assert results['NM_002474.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' @@ -537,17 +537,17 @@ def test_variant12(self): assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_001040113.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' - assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1:p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1(LRG_1401p2):p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} assert results['NM_001040113.1:c.3055_3056inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' assert results['NM_001040113.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' - assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_variant'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_variant'] == 'LRG_1401:g.123379_123380inv' assert results['NM_001040113.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040113.1:c.3055_3056inv' assert results['NM_001040113.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == 'NG_009299.1:g.123379_123380inv' assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_001040113.1:c.3055_3056inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009299.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1'} + assert results['NM_001040113.1:c.3055_3056inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009299.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1401.xml', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1'} def test_variant13(self): @@ -4579,7 +4579,7 @@ def test_variant148(self): assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_002474.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' - assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1:p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} + assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1(LRG_1401p1):p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} assert results['NM_002474.2:c.3034_3035inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' assert results['NM_002474.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' @@ -4631,17 +4631,17 @@ def test_variant148(self): assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) assert results['NM_001040113.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' - assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1:p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1(LRG_1401p2):p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} assert results['NM_001040113.1:c.3055_3056inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' assert results['NM_001040113.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' - assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_variant'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_variant'] == 'LRG_1401:g.123379_123380inv' assert results['NM_001040113.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040113.1:c.3055_3056inv' assert results['NM_001040113.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == 'NG_009299.1:g.123379_123380inv' assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_001040113.1:c.3055_3056inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009299.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1'} + assert results['NM_001040113.1:c.3055_3056inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009299.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1401.xml', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1'} def test_variant149(self): From acbe053d24bc15d0dd3634cc371960c76dab4210 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 21 May 2019 10:06:12 +0100 Subject: [PATCH 100/223] Set codecov to run after script even if tests fail --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0dbf471e..c243ad53 100644 --- a/.travis.yml +++ b/.travis.yml @@ -69,5 +69,5 @@ script: - pytest --cov-report=term --cov=VariantValidator/ # will run all tests in the package -after_success: +after_script: - codecov \ No newline at end of file From 07e93602ae90b8f74f024136512360799c1d50d7 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 21 May 2019 10:22:31 +0100 Subject: [PATCH 101/223] Created requirements_dev file and tidying up the setup.py file --- REQUIREMENTS.txt => requirements.txt | 1 + requirements_dev.txt | 9 ++++++ setup.py | 48 +++++++++++++--------------- 3 files changed, 33 insertions(+), 25 deletions(-) rename REQUIREMENTS.txt => requirements.txt (82%) create mode 100644 requirements_dev.txt diff --git a/REQUIREMENTS.txt b/requirements.txt similarity index 82% rename from REQUIREMENTS.txt rename to requirements.txt index 71e33cb6..5bf9459b 100644 --- a/REQUIREMENTS.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ git+https://github.com/openvar/vv_hgvs@master#egg=hgvs +biocommons.seqrepo>=0.3.5 httplib2>=0.9.0 configparser>=3.5.0 pyliftover>=0.3 diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 00000000..7b89b41e --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,9 @@ +git+https://github.com/openvar/vv_hgvs@master#egg=hgvs +biocommons.seqrepo>=0.3.5 +httplib2>=0.9.0 +configparser>=3.5.0 +pyliftover>=0.3 +biotools>=0.3.0 +pytest +pytest-cov +codecov \ No newline at end of file diff --git a/setup.py b/setup.py index 7cdfdd5e..b52817d7 100644 --- a/setup.py +++ b/setup.py @@ -5,13 +5,13 @@ setup( name='VariantValidator', - version='0.9', + version=open('VERSION.txt').read().strip(), description='API for accurate, mapping and formatting of sequence variants using HGVS nomenclature', long_description=open('README.md').read(), url='', author='Peter J. Causey-Freeman', author_email='pjf9@leicester.ac.uk', - packages=['VariantValidator','VariantValidator.modules'], + packages=['VariantValidator', 'VariantValidator.modules'], include_package_data=True, license="GNU AFFERO GENERAL PUBLIC LICENSE, Version 3 (https://www.gnu.org/licenses/agpl-3.0.en.html)", # See https://pypi.python.org/pypi?%3Aaction=list_classifiers @@ -27,36 +27,34 @@ 'Topic :: Software Development :: Build Tools', # Specify the Python versions - 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.6', ], - + # What does your project relate to? - keywords=[ - "bioinformatics", - "computational biology", - "genome variants", - "genome variation", - "genomic variants", - "genomic variation", - "genomics", - "hgvs", - "HGVS", - "sequencevariants", - ], + keywords=[ + "bioinformatics", + "computational biology", + "genome variants", + "genome variation", + "genomic variants", + "genomic variation", + "genomics", + "hgvs", + "HGVS", + "sequencevariants", + ], - # List run-time dependencies here. These will be installed by pip when the project is installed. + # List run-time dependencies here. These will be installed by pip when the project is installed. install_requires=[ - "hgvs == 1.1.3", # This will install BioPython - "biocommons.seqrepo >= 0.3.5", - "httplib2 >= 0.9.0", - "configparser >= 3.5.0", - "pyliftover >= 0.3", - "biotools >= 0.3.0", - # "mysql_connector >= 2.1.4", Required but is OS specific. https://dev.mysql.com/downloads/connector/python/ + "hgvs == 1.1.3", # This will install BioPython + "biocommons.seqrepo >= 0.3.5", + "httplib2 >= 0.9.0", + "configparser >= 3.5.0", + "pyliftover >= 0.3", + "biotools >= 0.3.0", ], ) - # # Copyright (C) 2018 Peter Causey-Freeman, University of Leicester # From 310eb86a50456b0ad0ae418436beb76f674e4374 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 21 May 2019 10:26:29 +0100 Subject: [PATCH 102/223] Created docs folder for installation instructions and manual --- README.md | 6 +++--- INSTALLATION.md => docs/INSTALLATION.md | 0 MANUAL.md => docs/MANUAL.md | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename INSTALLATION.md => docs/INSTALLATION.md (100%) rename MANUAL.md => docs/MANUAL.md (100%) diff --git a/README.md b/README.md index c12582bf..de6838cb 100644 --- a/README.md +++ b/README.md @@ -40,15 +40,15 @@ Optional software: * Postgres version 9.5 or above, Postgres 10 is not supported. * SQLite version 3.8.0 or above -For installation instructions please see INSTALLATION.md +For installation instructions please see [INSTALLATION.md](docs/INSTALLATION.md) # Operation and configuration -Please see MANUAL.md +Please see [MANUAL.md](docs/MANUAL.md) ## License -Please see LICENSE.txt +Please see [LICENSE.txt](LICENSE.txt) ## Cite us diff --git a/INSTALLATION.md b/docs/INSTALLATION.md similarity index 100% rename from INSTALLATION.md rename to docs/INSTALLATION.md diff --git a/MANUAL.md b/docs/MANUAL.md similarity index 100% rename from MANUAL.md rename to docs/MANUAL.md From d0ad3ff8b14c37f6568db3647a6777ab8ab87bb3 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 21 May 2019 11:13:06 +0100 Subject: [PATCH 103/223] Added more things to setup.py so it installs all files correctly and renamed update_db --- .travis.yml | 2 +- MANIFEST | 0 MANIFEST.in | 3 +++ bin/{update_db.py => update_vdb.py} | 2 ++ setup.py | 9 +++++++-- 5 files changed, 13 insertions(+), 3 deletions(-) delete mode 100644 MANIFEST rename bin/{update_db.py => update_vdb.py} (95%) diff --git a/.travis.yml b/.travis.yml index c243ad53..b33b483f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -62,7 +62,7 @@ install: # Set up validator database - mysql validator < configuration/empty_vv_db.sql - - python bin/update_db.py + - python bin/update_vdb.py - df -h script: diff --git a/MANIFEST b/MANIFEST deleted file mode 100644 index e69de29b..00000000 diff --git a/MANIFEST.in b/MANIFEST.in index e69de29b..ecaf8cb5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +recursive-include configuration * +include README.md VERSION.txt +recursive-include bin * \ No newline at end of file diff --git a/bin/update_db.py b/bin/update_vdb.py similarity index 95% rename from bin/update_db.py rename to bin/update_vdb.py index 90abbeb8..f15995c7 100644 --- a/bin/update_db.py +++ b/bin/update_vdb.py @@ -1,3 +1,5 @@ +#! /usr/bin/env python + from VariantValidator import update_vv_db import argparse diff --git a/setup.py b/setup.py index b52817d7..98f585c0 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ version=open('VERSION.txt').read().strip(), description='API for accurate, mapping and formatting of sequence variants using HGVS nomenclature', long_description=open('README.md').read(), - url='', + url='https://variantvalidator.org', author='Peter J. Causey-Freeman', author_email='pjf9@leicester.ac.uk', packages=['VariantValidator', 'VariantValidator.modules'], @@ -29,7 +29,12 @@ # Specify the Python versions 'Programming Language :: Python :: 3.6', ], - + scripts=[ + 'bin/update_vdb.py', + ], + data_files=[ + ('configuration', ['configuration/default.ini', 'configuration/empty_vv_db.sql']) + ], # What does your project relate to? keywords=[ "bioinformatics", From a0463fc457f707d428bb8f7bc7e74b7f1263519b Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 21 May 2019 11:19:33 +0100 Subject: [PATCH 104/223] Set dependency links so installation uses vv_hgvs --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 98f585c0..6b15c3d2 100644 --- a/setup.py +++ b/setup.py @@ -51,13 +51,16 @@ # List run-time dependencies here. These will be installed by pip when the project is installed. install_requires=[ - "hgvs == 1.1.3", # This will install BioPython + "hgvs", # This will install BioPython "biocommons.seqrepo >= 0.3.5", "httplib2 >= 0.9.0", "configparser >= 3.5.0", "pyliftover >= 0.3", "biotools >= 0.3.0", ], + dependency_links=[ + "git+https://github.com/openvar/vv_hgvs@master#egg=hgvs" + ] ) # From 9b8d3733a2111094b47b933a679461eba9939b70 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 21 May 2019 11:23:27 +0100 Subject: [PATCH 105/223] Removed conda from .travis.yml file --- .travis.yml | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/.travis.yml b/.travis.yml index b33b483f..1a135ce0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,14 +25,6 @@ before_install: # Increase size of database drive - sudo mount -o remount,size=50% /var/ramfs - # Install Miniconda - - - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda3.sh - - bash miniconda3.sh -b -p "$HOME"/miniconda3 - - echo . "$HOME"/miniconda3/etc/profile.d/conda.sh >> "$HOME"/.bashrc - - source "$HOME"/.bashrc - - conda config --set always_yes yes - # Set up the databases - install seqrepo and UTA - mysql -e 'CREATE DATABASE validator;' @@ -47,17 +39,8 @@ before_install: install: - # We just set up a conda environment with the right Python version. - - - sed -i -E 's/(python=)(.*)/\1'$TRAVIS_PYTHON_VERSION'/' environment.yml - - travis_retry conda env create -f environment.yml - - conda activate vvenv - # Test dependencies - - conda install pytest - - pip install pytest-cov - - pip install codecov - + - pip install -r requirements_dev.txt - pip install . # Set up validator database From 463264864ce0bd5f3a54f1db467c315bd0d569e1 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 21 May 2019 15:21:43 +0100 Subject: [PATCH 106/223] Added new executable to run variant validator --- bin/variant_validator.py | 44 ++++++++++++++++++++++++++++++++++++++++ setup.py | 1 + 2 files changed, 45 insertions(+) create mode 100644 bin/variant_validator.py diff --git a/bin/variant_validator.py b/bin/variant_validator.py new file mode 100644 index 00000000..592db80f --- /dev/null +++ b/bin/variant_validator.py @@ -0,0 +1,44 @@ +#! /usr/bin/env python + +import argparse +import json +import sys +from VariantValidator import Validator + + +def output_results(valoutput, outformat): + if outformat == 'dict': + return str(valoutput.format_as_dict()) + elif outformat == 'json': + return json.dumps(valoutput.format_as_dict()) + else: + return str(valoutput.format_as_dict()) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-v', '--variant', required=True, nargs='+', help="Variant(s) to validate") + parser.add_argument('-g', '--genome', nargs='?', default='GRCh37', choices=['GRCh37', 'GRCh38', 'hg19', 'hg38'], + help="Genome assembly (default: %(default)s)") + parser.add_argument('-t', '--transcripts', nargs='?', default='all', + help='Transcripts to output results for (default: %(default)s)') + parser.add_argument('-s', '--submission', choices=['individual', 'batch'], default='individual', + help='Submit variants individually or as a single batch validation (default: %(default)s') + parser.add_argument('-f', '--output_format', choices=['dict', 'list', 'json'], default='dict', + help='Output validations as a list or as a dictionary (default: %(default)s') + parser.add_argument('-o', '--output', type=argparse.FileType('w'), default='-', + help='Specifies the output file (default: stdout)') + + args = parser.parse_args() + + validator = Validator() + + if args.submission == 'individual': + for variant in args.variant: + output = validator.validate(variant, args.genome, args.transcripts) + args.output.write(output_results(output, args.output_format) + '\n') + else: + batch = '|'.join(args.variant) + sys.stderr.write("Submitting batch query: %s\n" % batch) + output = validator.validate(batch, args.genome, args.transcripts) + args.output.write(output_results(output, args.output_format) + '\n') diff --git a/setup.py b/setup.py index 6b15c3d2..6ca3d52f 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ ], scripts=[ 'bin/update_vdb.py', + 'bin/variant_validator.py' ], data_files=[ ('configuration', ['configuration/default.ini', 'configuration/empty_vv_db.sql']) From 8d1b8370971b6e1b8a0ad394d19ea2ffba5fad47 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 21 May 2019 15:34:47 +0100 Subject: [PATCH 107/223] Added mysql-connector-python to requirements --- requirements.txt | 1 + requirements_dev.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 5bf9459b..44abbe98 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ httplib2>=0.9.0 configparser>=3.5.0 pyliftover>=0.3 biotools>=0.3.0 +mysql-connector-python diff --git a/requirements_dev.txt b/requirements_dev.txt index 7b89b41e..32de7a67 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -4,6 +4,7 @@ httplib2>=0.9.0 configparser>=3.5.0 pyliftover>=0.3 biotools>=0.3.0 +mysql-connector-python pytest pytest-cov codecov \ No newline at end of file From 85f679f9dad5e21bda2282718b93f0a42d2da03a Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 21 May 2019 15:48:42 +0100 Subject: [PATCH 108/223] Added version to pytest in requirements_dev --- requirements_dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index 32de7a67..0c691c7c 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -5,6 +5,6 @@ configparser>=3.5.0 pyliftover>=0.3 biotools>=0.3.0 mysql-connector-python -pytest +pytest>=3.6 pytest-cov codecov \ No newline at end of file From b73e601b8968a7c9e7f644a4fe0c7e4b42f521d7 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 22 May 2019 11:38:22 +0100 Subject: [PATCH 109/223] Removed version from configure files and created version.py to extract info from top-level file --- VariantValidator/__init__.py | 1 + VariantValidator/modules/vvMixinInit.py | 2 +- VariantValidator/version.py | 4 ++++ configuration/default.ini | 3 --- configuration/travis.ini | 3 --- 5 files changed, 6 insertions(+), 7 deletions(-) create mode 100644 VariantValidator/version.py diff --git a/VariantValidator/__init__.py b/VariantValidator/__init__.py index c0924aa4..c3d9eec2 100644 --- a/VariantValidator/__init__.py +++ b/VariantValidator/__init__.py @@ -1,4 +1,5 @@ from . import configure +from .version import __version__ from .variantValidator import Validator diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 51205ed1..1d645459 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -21,6 +21,7 @@ from .vvLogging import logger from . import vvFunctions as fn from VariantValidator.configure import CONFIG_DIR +from VariantValidator.version import __version__ class Mixin: @@ -100,7 +101,6 @@ def __init__(self): #Create database access objects self.db = vvDatabase(self.dbConfig) # Set up versions - __version__ = config["variantValidator"]['version'] self.version = __version__ if re.match(r'^\d+\.\d+\.\d+$', __version__) is not None: self.releasedVersion = True diff --git a/VariantValidator/version.py b/VariantValidator/version.py new file mode 100644 index 00000000..7281e05e --- /dev/null +++ b/VariantValidator/version.py @@ -0,0 +1,4 @@ +import os + +with open(os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'VERSION.txt')) as fh: + __version__ = fh.read().strip() diff --git a/configuration/default.ini b/configuration/default.ini index 485e9acb..9e1f2960 100644 --- a/configuration/default.ini +++ b/configuration/default.ini @@ -1,6 +1,3 @@ -[variantValidator] -version = 0.9 - [mysql] host = localhost database = validator diff --git a/configuration/travis.ini b/configuration/travis.ini index 661457ae..3a67ae52 100644 --- a/configuration/travis.ini +++ b/configuration/travis.ini @@ -1,6 +1,3 @@ -[variantValidator] -version = 0.9 - [mysql] host = localhost database = validator From 5f94175db1cf21094c3415150699277b368531af Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 23 May 2019 11:39:05 +0100 Subject: [PATCH 110/223] Added new bin file to configure settings, also seperated the config_dir setting into a seperate file that is then parsed without import the package --- VariantValidator/configure.py | 7 ++-- VariantValidator/settings.py | 3 ++ bin/vv_configure.py | 61 +++++++++++++++++++++++++++++++++++ setup.py | 3 +- 4 files changed, 69 insertions(+), 5 deletions(-) create mode 100644 VariantValidator/settings.py create mode 100644 bin/vv_configure.py diff --git a/VariantValidator/configure.py b/VariantValidator/configure.py index 717de82b..b7712f2c 100644 --- a/VariantValidator/configure.py +++ b/VariantValidator/configure.py @@ -1,8 +1,7 @@ import os import shutil import configparser - -CONFIG_DIR = os.path.join(os.path.expanduser('~'), '.variantvalidator') +from .settings import CONFIG_DIR def read_configuration(): @@ -34,10 +33,10 @@ def exit_with_message(): print("*-----------------------------*") print("| Welcome to VariantValidator |") print("*-----------------------------*") - shutil.copyfile(os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'configuration', 'default.ini'), CONFIG_DIR) + shutil.copyfile(os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'configuration', + 'default.ini'), CONFIG_DIR) print("A configuration file has been copied into your home directory (%s)." % CONFIG_DIR) print("Please edit this file with your database connection settings prior to continuing.") print("Items that must be changed are highlighted in capitals.") print() raise SystemExit - diff --git a/VariantValidator/settings.py b/VariantValidator/settings.py new file mode 100644 index 00000000..52e2856c --- /dev/null +++ b/VariantValidator/settings.py @@ -0,0 +1,3 @@ +import os + +CONFIG_DIR = os.path.join(os.path.expanduser('~'), '.variantvalidator') diff --git a/bin/vv_configure.py b/bin/vv_configure.py new file mode 100644 index 00000000..c2e1e955 --- /dev/null +++ b/bin/vv_configure.py @@ -0,0 +1,61 @@ +#! /usr/bin/env python +from __future__ import print_function +import argparse +import os +import configparser +import pkgutil + + +def find_root(): + package = pkgutil.get_loader('VariantValidator') + path = os.path.dirname(os.path.dirname(package.get_filename())) + return path + + +def read_settings(): + root = find_root() + settings_file = os.path.join(root, 'VariantValidator', 'settings.py') + with open(settings_file) as f: + values = {} + exec(f.read(), {}, values) + return values + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-s', '--section', choices=['mysql', 'seqrepo', 'postgres', 'logging', 'EntrezID', 'liftover'], + nargs='?', help='Optional choice of section to configure') + + args = parser.parse_args() + + settings = read_settings() + newfile = False + + if os.path.exists(settings['CONFIG_DIR']): + readfile = settings['CONFIG_DIR'] + else: + root = find_root() + readfile = os.path.join(root, 'configuration', 'default.ini') + newfile = True + + config = configparser.ConfigParser() + config.read(readfile) + + values_changed = False + + for section in config.sections(): + if not newfile and args.section and args.section != section: + continue + print('Section:', section) + for name, value in config.items(section): + print("{} [{}]: ".format(name, value), end="") + newval = input() + if newval != '': + config.set(section, name, newval.strip()) + values_changed = True + + print() + + if newfile or values_changed: + with open(settings['CONFIG_DIR'], 'w') as fh: + config.write(fh) diff --git a/setup.py b/setup.py index 6ca3d52f..1e457085 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,8 @@ ], scripts=[ 'bin/update_vdb.py', - 'bin/variant_validator.py' + 'bin/variant_validator.py', + 'bin/vv_configure.py' ], data_files=[ ('configuration', ['configuration/default.ini', 'configuration/empty_vv_db.sql']) From 2de03e5d74f444ce92ded9ae6eeedf1736ce18e0 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 23 May 2019 11:48:27 +0100 Subject: [PATCH 111/223] Set Validator init to import settings file --- VariantValidator/modules/vvMixinInit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 1d645459..985e5bc5 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -20,7 +20,7 @@ from .vvDatabase import vvDatabase from .vvLogging import logger from . import vvFunctions as fn -from VariantValidator.configure import CONFIG_DIR +from VariantValidator.settings import CONFIG_DIR from VariantValidator.version import __version__ From 89187edc1681439273ce64c9fafe5b45b6322ca9 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 23 May 2019 13:47:43 +0100 Subject: [PATCH 112/223] Added changes from 18th Jan v0 --- VariantValidator/modules/vvMixinConverters.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 51e1680b..f53c9316 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -1820,7 +1820,14 @@ def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method,reverse_normali rev_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) # map back to coding variant = evm.g_to_t(rev_hgvs_genomic, tx_ac) - code_var.append(str(variant)) + try: + self.hp.parse_hgvs_variant(str(variant)) + except hgvs.exceptions.HGVSError: + continue + except TypeError: + continue + else: + code_var.append(str(variant)) return code_var def validateHGVS(self, input): From 704b43d05859f2c39bbcde2d631956198f65eadb Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 23 May 2019 14:25:31 +0100 Subject: [PATCH 113/223] Added change following v0 commits on Mar 22 --- VariantValidator/modules/vvMixinCore.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index a3bd908d..3a846c45 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -554,6 +554,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr primary_genomic_dicts = {} for alt_gen_var in multi_gen_vars: + try: + alt_gen_var = variant.hn.normalize(alt_gen_var) + except hgvs.exceptions.HGVSInvalidVariantError: + continue for build in self.genome_builds: test = vvChromosomes.supported_for_mapping(alt_gen_var.ac, build) if test == 'true': From 01bcf2305e6c743fedab618709cab5b82fbaeba4 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 23 May 2019 15:18:53 +0100 Subject: [PATCH 114/223] Added changes following v0 commits on 12 April --- VariantValidator/modules/gapped_mapping.py | 8 +++++++- VariantValidator/modules/mappers.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index cbb1fdaa..79dc116e 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -2229,6 +2229,12 @@ def get_hgvs_seek_var(self, hgvs_genomic, hgvs_coding, ori=None, with_query_geno query_genomic = self.variant.hn.normalize(hgvs_genomic) except: query_genomic = hgvs_genomic + + # Normalise intronic, if called with query_genomic + if with_query_genomic: + if hgvs_coding.posedit.pos.start.offset != 0: + hgvs_coding = self.variant.evm.g_to_t(query_genomic, hgvs_coding.ac) + # Map to the transcript and test for movement try: hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, hgvs_coding.ac) @@ -2236,7 +2242,7 @@ def get_hgvs_seek_var(self, hgvs_genomic, hgvs_coding, ori=None, with_query_geno hgvs_seek_var = hgvs_coding if with_query_genomic: - return hgvs_seek_var, query_genomic + return hgvs_seek_var, query_genomic, hgvs_coding return hgvs_seek_var diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 3e246f54..36e315bb 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -732,7 +732,7 @@ def transcripts_to_gene(variant, validator): # Look for normalized variant options that do not match hgvs_coding # boundary crossing normalization - hgvs_seek_var, query_genomic = gap_mapper.get_hgvs_seek_var(hgvs_genomic, hgvs_coding, + hgvs_seek_var, query_genomic, hgvs_coding = gap_mapper.get_hgvs_seek_var(hgvs_genomic, hgvs_coding, ori=ori, with_query_genomic=True) if hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: From 56db6461482626e84b6422155caa65d179935f0c Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 23 May 2019 15:22:26 +0100 Subject: [PATCH 115/223] Updated empty_vv_db following changes in v0 --- configuration/empty_vv_db.sql | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/configuration/empty_vv_db.sql b/configuration/empty_vv_db.sql index 79ef502b..de6d0247 100644 --- a/configuration/empty_vv_db.sql +++ b/configuration/empty_vv_db.sql @@ -23,10 +23,10 @@ DROP TABLE IF EXISTS `LRG_RSG_lookup`; /*!40101 SET @saved_cs_client = @@character_set_client */; /*!40101 SET character_set_client = utf8 */; CREATE TABLE `LRG_RSG_lookup` ( - `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `lrgID` varchar(10) NOT NULL DEFAULT '', - `hgncSymbol` varchar(10) NOT NULL, - `RefSeqGeneID` varchar(15) NOT NULL DEFAULT '', + `id` int(25) unsigned NOT NULL AUTO_INCREMENT, + `lrgID` varchar(25) NOT NULL DEFAULT '', + `hgncSymbol` varchar(25) NOT NULL, + `RefSeqGeneID` varchar(25) NOT NULL DEFAULT '', `status` text NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=2092 DEFAULT CHARSET=utf8; @@ -40,9 +40,9 @@ DROP TABLE IF EXISTS `LRG_proteins`; /*!40101 SET @saved_cs_client = @@character_set_client */; /*!40101 SET character_set_client = utf8 */; CREATE TABLE `LRG_proteins` ( - `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `LRGproteinID` varchar(10) DEFAULT NULL, - `RefSeqProteinID` varchar(15) DEFAULT NULL, + `id` int(25) unsigned NOT NULL AUTO_INCREMENT, + `LRGproteinID` varchar(25) DEFAULT NULL, + `RefSeqProteinID` varchar(25) DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1381 DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; @@ -55,9 +55,9 @@ DROP TABLE IF EXISTS `LRG_transcripts`; /*!40101 SET @saved_cs_client = @@character_set_client */; /*!40101 SET character_set_client = utf8 */; CREATE TABLE `LRG_transcripts` ( - `id` int(11) unsigned NOT NULL AUTO_INCREMENT, - `LRGtranscriptID` varchar(10) NOT NULL DEFAULT '', - `RefSeqTranscriptID` varchar(15) NOT NULL DEFAULT '', + `id` int(25) unsigned NOT NULL AUTO_INCREMENT, + `LRGtranscriptID` varchar(25) NOT NULL DEFAULT '', + `RefSeqTranscriptID` varchar(25) NOT NULL DEFAULT '', PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=2607 DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; From 392d28dd4630ca266627f7a666dfcb7760c1082a Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 23 May 2019 15:37:46 +0100 Subject: [PATCH 116/223] Added catch following v0 commit on 30 April --- VariantValidator/modules/gapped_mapping.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 79dc116e..c8b03ab0 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -239,7 +239,15 @@ def gapped_g_to_c(self, rel_var): rn_tx_hgvs_not_delins.posedit.edit.ref) self.disparity_deletion_in = ['transcript', gap_length] else: - hgvs_stash_t = self.validator.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) + # store stash_hgvs_not_delins for restorstion after error below + restore_stash_hgvs_not_delins = copy.copy(stash_hgvs_not_delins) + try: + hgvs_stash_t = self.validator.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) + except hgvs.exceptions.HGVSError as e: + if 'bounds' in str(e): + stash_hgvs_not_delins = copy.copy(stored_hgvs_not_delins) + hgvs_stash_t = self.validator.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) + if len(stash_hgvs_not_delins.posedit.edit.ref) > len(hgvs_stash_t.posedit.edit.ref): try: self.variant.hn.normalize(hgvs_stash_t) @@ -263,6 +271,9 @@ def gapped_g_to_c(self, rel_var): hgvs_not_delins = stash_hgvs_not_delins self.hgvs_genomic_5pr = stash_hgvs_not_delins + # Restore stash_hgvs_not_delins + stash_hgvs_not_delins = restore_stash_hgvs_not_delins + # Final sanity checks try: self.validator.vm.g_to_t(hgvs_not_delins, self.tx_hgvs_not_delins.ac) From 757858ff3d5479bbd0071af1f708c78c28e7e185 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 23 May 2019 15:41:13 +0100 Subject: [PATCH 117/223] Adding hg38 as a valid build based on v0 commits May 17th --- VariantValidator/modules/vvMixinCore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 3a846c45..1657ea6c 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -153,7 +153,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr selected_assembly = selected_assembly.replace('H', 'h') primary_assembly = selected_assembly # Catch invalid genome build - if primary_assembly in self.genome_builds: + if primary_assembly in self.genome_builds or primary_assembly == 'hg38': my_variant.primary_assembly = primary_assembly else: my_variant.primary_assembly = 'GRCh38' From 603d5f4857d1108d312f981e4e9d2192fc53e0d7 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 23 May 2019 16:02:05 +0100 Subject: [PATCH 118/223] Added section to detect protein termination in shortest sequence --- VariantValidator/modules/vvFunctions.py | 119 ++++++++++-------------- 1 file changed, 51 insertions(+), 68 deletions(-) diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index 3bbdd1f3..6257ae71 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -308,81 +308,64 @@ def pro_delins_info(prot_ref_seq, prot_var_seq): if term.search(prot_var_seq): # Set the termination reporter to true info['terminate'] = 'true' - # The termination position will be equal to the length of the variant sequence because it's a TERMINATOR!!! - info['ter_pos'] = len(prot_var_seq) - # cut the ref sequence to == size - prot_ref_seq = prot_ref_seq[0:info['ter_pos']] - prot_var_seq = prot_var_seq[0:info['ter_pos']] - # Whether terminated or not, the sequences should now be the same length - # Unless the termination codon has been disrupted - if len(prot_var_seq) < len(prot_ref_seq): - info['error'] = 'true' - return info + # Set the terminal pos dependant on the shortest sequence + if len(prot_var_seq) <= len(prot_ref_seq): + info['ter_pos'] = len(prot_ref_seq) else: - # Set the counter - aa_counter = 0 + info['ter_pos'] = len(prot_var_seq) - # Make list copies of the sequences to gather the required info - ref = list(prot_ref_seq) - var = list(prot_var_seq) + # cut the ref sequence to == size + prot_ref_seq = prot_ref_seq[0:info['ter_pos']] + prot_var_seq = prot_var_seq[0:info['ter_pos']] - # Loop through ref list to find the first missmatch position - for aa in ref: - if ref[aa_counter] == var[aa_counter]: - aa_counter = aa_counter + 1 - else: + # Set the counter + aa_counter = 0 + # Make list copies of the sequences to gather the required info + ref = list(prot_ref_seq) + var = list(prot_var_seq) + # Loop through ref list to find the first missmatch position + for aa in ref: + if ref[aa_counter] == var[aa_counter]: + aa_counter = aa_counter + 1 + else: + break + + # Enter the start position + info['edit_start'] = aa_counter + 1 + # Remove those elements form the list + del ref[0:aa_counter] + del var[0:aa_counter] + + # Reset the aa_counter but to go backwards + aa_counter = 0 + # reverse the lists + ref = ref[::-1] + var = var[::-1] + # Reverse loop through ref list to find the first missmatch position + for aa in ref: + try: + if var[aa_counter] == '\*': break - - # Enter the start position - info['edit_start'] = aa_counter + 1 - # Remove those elements form the list - del ref[0:aa_counter] - del var[0:aa_counter] - - # the sequences should now be the same length - # Except if the termination codon was removed - if len(ref) > len(var): - info['error'] = 'true' - return info + except IndexError: + break + if aa == var[aa_counter]: + aa_counter = aa_counter + 1 else: - # Reset the aa_counter but to go backwards - aa_counter = 0 - # reverse the lists - ref = ref[::-1] - var = var[::-1] - # Reverse loop through ref list to find the first missmatch position - for aa in ref: - if var[aa_counter] == '\*': - break - if aa == var[aa_counter]: - aa_counter = aa_counter + 1 - else: - break - # Remove those elements form the list - del ref[0:aa_counter] - del var[0:aa_counter] - # re-reverse the lists - ref = ref[::-1] - var = var[::-1] + break + # Remove those elements form the list + del ref[0:aa_counter] + del var[0:aa_counter] + # re-reverse the lists + ref = ref[::-1] + var = var[::-1] + + # Enter the sequences + info['prot_del_seq'] = ''.join(ref) + info['prot_ins_seq'] = ''.join(var) + info['edit_end'] = info['edit_start'] + len(ref) - 1 + return info - # If the var is > ref, the ter has been removed, need to re-add ter to each -# if len(ref) < len(var): -# ref.append('*') -# if prot_var_seq[-1] == '*': -# var.append('*') - - # the sequences should now be the same length - # Except if the ter was removed - if len(ref) > len(var): - info['error'] = 'true' - return info - else: - # Enter the sequences - info['prot_del_seq'] = ''.join(ref) - info['prot_ins_seq'] = ''.join(var) - info['edit_end'] = info['edit_start'] + len(ref) -1 - return info def translate(ed_seq, cds_start): """ From 6eb39c59206fe08415c91f8e2fbd6cd8e56a2074 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 23 May 2019 16:26:48 +0100 Subject: [PATCH 119/223] Updated tests to match v0 --- test/test_inputs.py | 1274 ++++++++++++++++++++++--------------------- 1 file changed, 663 insertions(+), 611 deletions(-) diff --git a/test/test_inputs.py b/test/test_inputs.py index a021f0ff..99c16467 100644 --- a/test/test_inputs.py +++ b/test/test_inputs.py @@ -1,6 +1,7 @@ from VariantValidator import Validator from unittest import TestCase + class TestVariants(TestCase): @classmethod @@ -556,22 +557,22 @@ def test_variant13(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-1_589delinsG' in list(results.keys()) - assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590del' - assert results['NM_000088.3:c.589-1_589delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.590del' - self.assertCountEqual(results['NM_000088.3:c.589-1_589delinsG']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-1_589delinsG']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} - assert results['NM_000088.3:c.589-1_589delinsG']['submitted_variant'] == 'NM_000088.3:c.589-1GG>G' - assert results['NM_000088.3:c.589-1_589delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-1_589delinsG' - assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8639del' - assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-1_589delinsG' - assert results['NM_000088.3:c.589-1_589delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8639del' - assert results['NM_000088.3:c.589-1_589delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363_48275364delinsC', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '48275361', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1_589delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002_50198003delinsC', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '50198000', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1_589delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363_48275364delinsC', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '48275361', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1_589delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002_50198003delinsC', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '50198000', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1_589delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + assert 'NM_000088.3:c.590del' in list(results.keys()) + assert results['NM_000088.3:c.590del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590del' + assert results['NM_000088.3:c.590del']['refseqgene_context_intronic_sequence'] == '' + self.assertCountEqual(results['NM_000088.3:c.590del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.590del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.590del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.590del']['submitted_variant'] == 'NM_000088.3:c.589-1GG>G' + assert results['NM_000088.3:c.590del']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.590del']['hgvs_lrg_variant'] == 'LRG_1:g.8639del' + assert results['NM_000088.3:c.590del']['hgvs_transcript_variant'] == 'NM_000088.3:c.590del' + assert results['NM_000088.3:c.590del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8639del' + assert results['NM_000088.3:c.590del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364del', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '48275361', 'alt': 'A'}} + assert results['NM_000088.3:c.590del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003del', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '50198000', 'alt': 'A'}} + assert results['NM_000088.3:c.590del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364del', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '48275361', 'alt': 'A'}} + assert results['NM_000088.3:c.590del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003del', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '50198000', 'alt': 'A'}} + assert results['NM_000088.3:c.590del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant14(self): @@ -580,22 +581,22 @@ def test_variant14(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.642+1_642+2delinsG' in list(results.keys()) - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' - self.assertCountEqual(results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.642+1_642+2delinsG']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} - assert results['NM_000088.3:c.642+1_642+2delinsG']['submitted_variant'] == 'NM_000088.3:c.642+1GT>G' - assert results['NM_000088.3:c.642+1_642+2delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+1_642+2delinsG' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + assert 'NM_000088.3:c.642+2del' in list(results.keys()) + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' + assert results['NM_000088.3:c.642+2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' + self.assertCountEqual(results['NM_000088.3:c.642+2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.642+2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.642+2del']['submitted_variant'] == 'NM_000088.3:c.642+1GT>G' + assert results['NM_000088.3:c.642+2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+2del' + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + assert results['NM_000088.3:c.642+2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+2del' + assert results['NM_000088.3:c.642+2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant15(self): @@ -604,22 +605,22 @@ def test_variant15(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-2_589-1delinsG' in list(results.keys()) - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' - self.assertCountEqual(results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-2_589-1delinsG']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} - assert results['NM_000088.3:c.589-2_589-1delinsG']['submitted_variant'] == 'NM_000088.3:c.589-2AG>G' - assert results['NM_000088.3:c.589-2_589-1delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2_589-1delinsG' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + assert 'NM_000088.3:c.589-2del' in list(results.keys()) + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' + assert results['NM_000088.3:c.589-2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' + self.assertCountEqual(results['NM_000088.3:c.589-2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-2del']['submitted_variant'] == 'NM_000088.3:c.589-2AG>G' + assert results['NM_000088.3:c.589-2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2del' + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + assert results['NM_000088.3:c.589-2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2del' + assert results['NM_000088.3:c.589-2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant16(self): @@ -930,6 +931,7 @@ def test_variant28(self): assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) + assert results['validation_warning_1']['reference_sequence_records'] == '' assert results['flag'] == 'warning' @@ -1443,10 +1445,10 @@ def test_variant46(self): assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} @@ -1467,10 +1469,10 @@ def test_variant47(self): assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} @@ -1539,10 +1541,10 @@ def test_variant50(self): assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_transcript_variant'] == 'NM_000088.3:c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.14656_14661del' - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} @@ -1624,22 +1626,22 @@ def test_variant54(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.642+1_642+2delinsG' in list(results.keys()) - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' - self.assertCountEqual(results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.642+1_642+2delinsG']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} - assert results['NM_000088.3:c.642+1_642+2delinsG']['submitted_variant'] == 'NM_000088.3:c.642+1GG>G' - assert results['NM_000088.3:c.642+1_642+2delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+1_642+2delinsG' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + assert 'NM_000088.3:c.642+2del' in list(results.keys()) + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' + assert results['NM_000088.3:c.642+2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' + self.assertCountEqual(results['NM_000088.3:c.642+2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.642+2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.642+2del']['submitted_variant'] == 'NM_000088.3:c.642+1GG>G' + assert results['NM_000088.3:c.642+2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+2del' + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + assert results['NM_000088.3:c.642+2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+2del' + assert results['NM_000088.3:c.642+2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant55(self): @@ -1648,22 +1650,22 @@ def test_variant55(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-2_589-1delinsG' in list(results.keys()) - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' - self.assertCountEqual(results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-2_589-1delinsG']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} - assert results['NM_000088.3:c.589-2_589-1delinsG']['submitted_variant'] == 'NM_000088.3:c.589-2GG>G' - assert results['NM_000088.3:c.589-2_589-1delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2_589-1delinsG' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + assert 'NM_000088.3:c.589-2del' in list(results.keys()) + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' + assert results['NM_000088.3:c.589-2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' + self.assertCountEqual(results['NM_000088.3:c.589-2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-2del']['submitted_variant'] == 'NM_000088.3:c.589-2GG>G' + assert results['NM_000088.3:c.589-2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2del' + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + assert results['NM_000088.3:c.589-2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2del' + assert results['NM_000088.3:c.589-2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant56(self): @@ -1683,10 +1685,10 @@ def test_variant56(self): assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_lrg_variant'] == 'LRG_1:g.8633_8634insTTTT' assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-5_589-4insTTTT' assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8633_8634insTTTT' - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275367_48275368insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48275367', 'alt': 'GAAAA'}} - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198006_50198007insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50198006', 'alt': 'GAAAA'}} - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275367_48275368insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48275367', 'alt': 'GAAAA'}} - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198006_50198007insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50198006', 'alt': 'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275368_48275369insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48275367', 'alt': 'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198007_50198008insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50198006', 'alt': 'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275368_48275369insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48275367', 'alt': 'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198007_50198008insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50198006', 'alt': 'GAAAA'}} assert results['NM_000088.3:c.589-5_589-4insTTTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} @@ -1707,10 +1709,10 @@ def test_variant57(self): assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_lrg_variant'] == 'LRG_1:g.8695_8696insAAAA' assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+4_642+5insAAAA' assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8695_8696insAAAA' - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275305_48275306insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275305', 'alt': 'CTTTT'}} - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197944_50197945insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50197944', 'alt': 'CTTTT'}} - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275305_48275306insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275305', 'alt': 'CTTTT'}} - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197944_50197945insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50197944', 'alt': 'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275307_48275308insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275305', 'alt': 'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197946_50197947insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50197944', 'alt': 'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275307_48275308insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275305', 'alt': 'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197946_50197947insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50197944', 'alt': 'CTTTT'}} assert results['NM_000088.3:c.642+4_642+5insAAAA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} @@ -1755,10 +1757,10 @@ def test_variant59(self): assert results['NM_000088.3:c.589-7del']['hgvs_lrg_variant'] == 'LRG_1:g.8631del' assert results['NM_000088.3:c.589-7del']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-7del' assert results['NM_000088.3:c.589-7del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8631del' - assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275370del', 'vcf': {'chr': 'chr17', 'ref': 'GA', 'pos': '48275369', 'alt': 'G'}} - assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198009del', 'vcf': {'chr': 'chr17', 'ref': 'GA', 'pos': '50198008', 'alt': 'G'}} - assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275370del', 'vcf': {'chr': '17', 'ref': 'GA', 'pos': '48275369', 'alt': 'G'}} - assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198009del', 'vcf': {'chr': '17', 'ref': 'GA', 'pos': '50198008', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275371del', 'vcf': {'chr': 'chr17', 'ref': 'GA', 'pos': '48275369', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198010del', 'vcf': {'chr': 'chr17', 'ref': 'GA', 'pos': '50198008', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275371del', 'vcf': {'chr': '17', 'ref': 'GA', 'pos': '48275369', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198010del', 'vcf': {'chr': '17', 'ref': 'GA', 'pos': '50198008', 'alt': 'G'}} assert results['NM_000088.3:c.589-7del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} @@ -3673,6 +3675,7 @@ def test_variant116(self): assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.28954659_28954660inv', 'vcf': {'chr': '16', 'ref': 'AG', 'pos': '28954659', 'alt': 'CT'}} assert results['NM_032815.3:c.555_556inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116204.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032815.3'} + def test_variant117(self): variant = 'NM_006138.4:c.3_4inv' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -3693,6 +3696,7 @@ def test_variant117(self): assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.60061163_60061164inv', 'vcf': {'chr': 'chr11', 'ref': 'GG', 'pos': '60061163', 'alt': 'CC'}} assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.59828636_59828637inv', 'vcf': {'chr': '11', 'ref': 'GG', 'pos': '59828636', 'alt': 'CC'}} assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.60061163_60061164inv', 'vcf': {'chr': '11', 'ref': 'GG', 'pos': '60061163', 'alt': 'CC'}} + assert results['NM_006138.4:c.3_4inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006129.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006138.4'} assert results['flag'] == 'gene_variant' @@ -3785,10 +3789,10 @@ def test_variant121(self): assert results['NM_000088.3:c.4392_*5inv']['hgvs_lrg_variant'] == 'LRG_1:g.21136_21142inv' assert results['NM_000088.3:c.4392_*5inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.4392_*5inv' assert results['NM_000088.3:c.4392_*5inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.21136_21142inv' - assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262858_48262866inv', 'vcf': {'chr': 'chr17', 'ref': 'GAGTTTA', 'pos': '48262859', 'alt': 'TAAACTC'}} - assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185497_50185505inv', 'vcf': {'chr': 'chr17', 'ref': 'GAGTTTA', 'pos': '50185498', 'alt': 'TAAACTC'}} - assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262858_48262866inv', 'vcf': {'chr': '17', 'ref': 'GAGTTTA', 'pos': '48262859', 'alt': 'TAAACTC'}} - assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185497_50185505inv', 'vcf': {'chr': '17', 'ref': 'GAGTTTA', 'pos': '50185498', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262859_48262865inv', 'vcf': {'chr': 'chr17', 'ref': 'GAGTTTA', 'pos': '48262859', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185498_50185504inv', 'vcf': {'chr': 'chr17', 'ref': 'GAGTTTA', 'pos': '50185498', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262859_48262865inv', 'vcf': {'chr': '17', 'ref': 'GAGTTTA', 'pos': '48262859', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185498_50185504inv', 'vcf': {'chr': '17', 'ref': 'GAGTTTA', 'pos': '50185498', 'alt': 'TAAACTC'}} assert results['NM_000088.3:c.4392_*5inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} @@ -3894,22 +3898,22 @@ def test_variant126(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.589-2_589-1delinsG' in list(results.keys()) - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' - self.assertCountEqual(results['NM_000088.3:c.589-2_589-1delinsG']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-2_589-1delinsG']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} - assert results['NM_000088.3:c.589-2_589-1delinsG']['submitted_variant'] == 'NM_000088.3:c.589-2_589-1AG>G' - assert results['NM_000088.3:c.589-2_589-1delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2_589-1delinsG' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2_589-1delinsG' - assert results['NM_000088.3:c.589-2_589-1delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364_48275365delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003_50198004delinsC', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2_589-1delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + assert 'NM_000088.3:c.589-2del' in list(results.keys()) + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' + assert results['NM_000088.3:c.589-2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' + self.assertCountEqual(results['NM_000088.3:c.589-2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-2del']['submitted_variant'] == 'NM_000088.3:c.589-2_589-1AG>G' + assert results['NM_000088.3:c.589-2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2del' + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + assert results['NM_000088.3:c.589-2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2del' + assert results['NM_000088.3:c.589-2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant127(self): @@ -3918,22 +3922,22 @@ def test_variant127(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000088.3:c.642+1_642+2delinsG' in list(results.keys()) - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' - self.assertCountEqual(results['NM_000088.3:c.642+1_642+2delinsG']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.642+1_642+2delinsG']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} - assert results['NM_000088.3:c.642+1_642+2delinsG']['submitted_variant'] == 'NM_000088.3:c.642+1_642+2delGTinsG' - assert results['NM_000088.3:c.642+1_642+2delinsG']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+1_642+2delinsG' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+1_642+2delinsG' - assert results['NM_000088.3:c.642+1_642+2delinsG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308_48275309delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947_50197948delinsC', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+1_642+2delinsG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + assert 'NM_000088.3:c.642+2del' in list(results.keys()) + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' + assert results['NM_000088.3:c.642+2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' + self.assertCountEqual(results['NM_000088.3:c.642+2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.642+2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.642+2del']['submitted_variant'] == 'NM_000088.3:c.642+1_642+2delGTinsG' + assert results['NM_000088.3:c.642+2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+2del' + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + assert results['NM_000088.3:c.642+2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+2del' + assert results['NM_000088.3:c.642+2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant128(self): @@ -3977,10 +3981,10 @@ def test_variant129(self): assert results['NM_004415.3:c.-1_1insA']['hgvs_lrg_variant'] == '' assert results['NM_004415.3:c.-1_1insA']['hgvs_transcript_variant'] == 'NM_004415.3:c.-1_1insA' assert results['NM_004415.3:c.-1_1insA']['hgvs_refseqgene_variant'] == '' - assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.7542148_7542149insA', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '7542148', 'alt': 'CA'}} - assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.7541915_7541916insA', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '7541915', 'alt': 'CA'}} - assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.7542148_7542149insA', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '7542148', 'alt': 'CA'}} - assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.7541915_7541916insA', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '7541915', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.7542149dup', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '7542148', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.7541916dup', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '7541915', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.7542149dup', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '7542148', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.7541916dup', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '7541915', 'alt': 'CA'}} assert results['NM_004415.3:c.-1_1insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004406.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004415.3'} @@ -4185,9 +4189,9 @@ def test_variant137(self): assert results['NM_000130.4:c.1602del']['hgvs_lrg_variant'] == 'LRG_553:g.41721del' assert results['NM_000130.4:c.1602del']['hgvs_transcript_variant'] == 'NM_000130.4:c.1602del' assert results['NM_000130.4:c.1602del']['hgvs_refseqgene_variant'] == 'NG_011806.1:g.41721del' - assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519048del', 'vcf': {'chr': 'chr1', 'ref': 'CT', 'pos': '169519047', 'alt': 'C'}} + assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049del', 'vcf': {'chr': 'chr1', 'ref': 'CT', 'pos': '169519047', 'alt': 'C'}} assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549810del', 'vcf': {'chr': 'chr1', 'ref': 'CT', 'pos': '169549809', 'alt': 'C'}} - assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519048del', 'vcf': {'chr': '1', 'ref': 'CT', 'pos': '169519047', 'alt': 'C'}} + assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049del', 'vcf': {'chr': '1', 'ref': 'CT', 'pos': '169519047', 'alt': 'C'}} assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549810del', 'vcf': {'chr': '1', 'ref': 'CT', 'pos': '169549809', 'alt': 'C'}} assert results['NM_000130.4:c.1602del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} @@ -4394,10 +4398,10 @@ def test_variant140(self): assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_transcript_variant'] == 'NM_000088.3:c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.14656_14661del' - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} @@ -4418,10 +4422,10 @@ def test_variant141(self): assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_transcript_variant'] == 'NM_000088.3:c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.14656_14661del' - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269340_48269345del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191979_50191984del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} @@ -4660,10 +4664,10 @@ def test_variant149(self): assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_variant'] == '' assert results['NM_001162427.1:c.210+1615dup']['hgvs_transcript_variant'] == 'NM_001162427.1:c.210+1615dup' assert results['NM_001162427.1:c.210+1615dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_001162427.1:c.210+1615dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1'} assert 'NM_001162426.1:c.363+1dup' in list(results.keys()) @@ -4677,10 +4681,10 @@ def test_variant149(self): assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_variant'] == '' assert results['NM_001162426.1:c.363+1dup']['hgvs_transcript_variant'] == 'NM_001162426.1:c.363+1dup' assert results['NM_001162426.1:c.363+1dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_001162426.1:c.363+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1'} assert results['flag'] == 'gene_variant' @@ -4695,10 +4699,10 @@ def test_variant149(self): assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_variant'] == '' assert results['NM_001362177.1:c.-1+1dup']['hgvs_transcript_variant'] == 'NM_001362177.1:c.-1+1dup' assert results['NM_001362177.1:c.-1+1dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_001362177.1:c.-1+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1'} assert 'NM_000368.4:c.363+1dup' in list(results.keys()) @@ -4712,10 +4716,10 @@ def test_variant149(self): assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} @@ -5166,10 +5170,10 @@ def test_variant163(self): assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_variant'] == '' assert results['NM_001162427.1:c.210+1615dup']['hgvs_transcript_variant'] == 'NM_001162427.1:c.210+1615dup' assert results['NM_001162427.1:c.210+1615dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_001162427.1:c.210+1615dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1'} assert 'NM_001162426.1:c.363+1dup' in list(results.keys()) @@ -5183,10 +5187,10 @@ def test_variant163(self): assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_variant'] == '' assert results['NM_001162426.1:c.363+1dup']['hgvs_transcript_variant'] == 'NM_001162426.1:c.363+1dup' assert results['NM_001162426.1:c.363+1dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_001162426.1:c.363+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1'} assert results['flag'] == 'gene_variant' @@ -5201,10 +5205,10 @@ def test_variant163(self): assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_variant'] == '' assert results['NM_001362177.1:c.-1+1dup']['hgvs_transcript_variant'] == 'NM_001362177.1:c.-1+1dup' assert results['NM_001362177.1:c.-1+1dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_001362177.1:c.-1+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1'} assert 'NM_000368.4:c.363+1dup' in list(results.keys()) @@ -5218,10 +5222,10 @@ def test_variant163(self): assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800973dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925586dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} @@ -6705,10 +6709,10 @@ def test_variant191(self): assert results['NM_004006.2:c.3103del']['hgvs_lrg_variant'] == 'LRG_199:g.876053del' assert results['NM_004006.2:c.3103del']['hgvs_transcript_variant'] == 'NM_004006.2:c.3103del' assert results['NM_004006.2:c.3103del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.876053del' - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486674del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468557del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486674del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468557del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} assert results['NM_004006.2:c.3103del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} assert results['flag'] == 'gene_variant' @@ -6736,6 +6740,22 @@ def test_variant192(self): print(results) assert results['flag'] == 'gene_variant' + assert 'NM_004006.2:c.4362_4373del' in list(results.keys()) + assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4362_4373del' + assert results['NM_004006.2:c.4362_4373del']['refseqgene_context_intronic_sequence'] == '' + self.assertCountEqual(results['NM_004006.2:c.4362_4373del']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.4362_4373del']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.4362_4373del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ser1455_Phe1458del)', 'slr': 'NP_003997.1:p.(S1455_F1458del)'} + assert results['NM_004006.2:c.4362_4373del']['submitted_variant'] == 'LRG_199t1:c.[4358_4359del;4361_4372del]' + assert results['NM_004006.2:c.4362_4373del']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_variant'] == 'LRG_199:g.954953_954964del' + assert results['NM_004006.2:c.4362_4373del']['hgvs_transcript_variant'] == 'NM_004006.2:c.4362_4373del' + assert results['NM_004006.2:c.4362_4373del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.954953_954964del' + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407775del', 'vcf': {'chr': 'chrX', 'ref': 'AAACTTCATGGAG', 'pos': '32407762', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389658del', 'vcf': {'chr': 'chrX', 'ref': 'AAACTTCATGGAG', 'pos': '32389645', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407775del', 'vcf': {'chr': 'X', 'ref': 'AAACTTCATGGAG', 'pos': '32407762', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389658del', 'vcf': {'chr': 'X', 'ref': 'AAACTTCATGGAG', 'pos': '32389645', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} assert 'NM_004006.2:c.4358_4359del' in list(results.keys()) assert results['NM_004006.2:c.4358_4359del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4358_4359del' @@ -6754,23 +6774,6 @@ def test_variant192(self): assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389660_32389661del', 'vcf': {'chr': 'X', 'ref': 'CAT', 'pos': '32389659', 'alt': 'C'}} assert results['NM_004006.2:c.4358_4359del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert 'NM_004006.2:c.4362_4373del' in list(results.keys()) - assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4362_4373del' - assert results['NM_004006.2:c.4362_4373del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.4362_4373del']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.4362_4373del']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.4362_4373del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ser1455_Phe1458del)', 'slr': 'NP_003997.1:p.(S1455_F1458del)'} - assert results['NM_004006.2:c.4362_4373del']['submitted_variant'] == 'LRG_199t1:c.[4358_4359del;4361_4372del]' - assert results['NM_004006.2:c.4362_4373del']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_variant'] == 'LRG_199:g.954953_954964del' - assert results['NM_004006.2:c.4362_4373del']['hgvs_transcript_variant'] == 'NM_004006.2:c.4362_4373del' - assert results['NM_004006.2:c.4362_4373del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.954953_954964del' - assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407763_32407774del', 'vcf': {'chr': 'chrX', 'ref': 'AAACTTCATGGAG', 'pos': '32407762', 'alt': 'A'}} - assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389646_32389657del', 'vcf': {'chr': 'chrX', 'ref': 'AAACTTCATGGAG', 'pos': '32389645', 'alt': 'A'}} - assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407763_32407774del', 'vcf': {'chr': 'X', 'ref': 'AAACTTCATGGAG', 'pos': '32407762', 'alt': 'A'}} - assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389646_32389657del', 'vcf': {'chr': 'X', 'ref': 'AAACTTCATGGAG', 'pos': '32389645', 'alt': 'A'}} - assert results['NM_004006.2:c.4362_4373del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - def test_variant193(self): variant = 'LRG_199t1:c.2376G>C(;)3103del' @@ -6788,10 +6791,10 @@ def test_variant193(self): assert results['NM_004006.2:c.3103del']['hgvs_lrg_variant'] == 'LRG_199:g.876053del' assert results['NM_004006.2:c.3103del']['hgvs_transcript_variant'] == 'NM_004006.2:c.3103del' assert results['NM_004006.2:c.3103del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.876053del' - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486674del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468557del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486674del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468557del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} assert results['NM_004006.2:c.3103del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} assert results['flag'] == 'gene_variant' @@ -6866,6 +6869,7 @@ def test_variant196(self): results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_004006.2:c.476T=' in list(results.keys()) assert results['NM_004006.2:c.476T=']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.476T=' assert results['NM_004006.2:c.476T=']['refseqgene_context_intronic_sequence'] == '' @@ -6883,24 +6887,6 @@ def test_variant196(self): assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32816522', 'alt': 'A'}} assert results['NM_004006.2:c.476T=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.296T>G' in list(results.keys()) - assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' - assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.296T>G']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.296T>G']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.296T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} - assert results['NM_004006.2:c.296T>G']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' - assert results['NM_004006.2:c.296T>G']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.296T>G']['hgvs_lrg_variant'] == 'LRG_199:g.521254T>G' - assert results['NM_004006.2:c.296T>G']['hgvs_transcript_variant'] == 'NM_004006.2:c.296T>G' - assert results['NM_004006.2:c.296T>G']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.521254T>G' - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert 'NM_004006.2:c.1083A>C' in list(results.keys()) assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1083A>C' assert results['NM_004006.2:c.1083A>C']['refseqgene_context_intronic_sequence'] == '' @@ -6918,6 +6904,23 @@ def test_variant196(self): assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'X', 'ref': 'T', 'pos': '32645030', 'alt': 'G'}} assert results['NM_004006.2:c.1083A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert 'NM_004006.2:c.296T>G' in list(results.keys()) + assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' + assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' + self.assertCountEqual(results['NM_004006.2:c.296T>G']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.296T>G']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.296T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} + assert results['NM_004006.2:c.296T>G']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' + assert results['NM_004006.2:c.296T>G']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.296T>G']['hgvs_lrg_variant'] == 'LRG_199:g.521254T>G' + assert results['NM_004006.2:c.296T>G']['hgvs_transcript_variant'] == 'NM_004006.2:c.296T>G' + assert results['NM_004006.2:c.296T>G']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.521254T>G' + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + def test_variant197(self): variant = 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' @@ -6935,10 +6938,10 @@ def test_variant197(self): assert results['NM_004006.2:c.1408del']['hgvs_lrg_variant'] == 'LRG_199:g.730233del' assert results['NM_004006.2:c.1408del']['hgvs_transcript_variant'] == 'NM_004006.2:c.1408del' assert results['NM_004006.2:c.1408del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.730233del' - assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32632494del', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '32632493', 'alt': 'C'}} - assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32614377del', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '32614376', 'alt': 'C'}} - assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32632494del', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '32632493', 'alt': 'C'}} - assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32614377del', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '32614376', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32632496del', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '32632493', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32614379del', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '32614376', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32632496del', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '32632493', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32614379del', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '32614376', 'alt': 'C'}} assert results['NM_004006.2:c.1408del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} assert results['flag'] == 'gene_variant' @@ -7167,10 +7170,10 @@ def test_variant201(self): assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_lrg_variant'] == '' assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_transcript_variant'] == 'NM_001290129.1:c.1829+5_1829+8del' assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} assert results['NM_001290129.1:c.1829+5_1829+8del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277058.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290129.1'} assert 'NM_001290130.1:c.1466+5_1466+8del' in list(results.keys()) @@ -7184,10 +7187,10 @@ def test_variant201(self): assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_lrg_variant'] == '' assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_transcript_variant'] == 'NM_001290130.1:c.1466+5_1466+8del' assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} assert results['NM_001290130.1:c.1466+5_1466+8del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277059.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290130.1'} assert 'NM_017739.3:c.1895+5_1895+8del' in list(results.keys()) @@ -7201,10 +7204,10 @@ def test_variant201(self): assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_lrg_variant'] == 'LRG_701:g.35853_35856del' assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_transcript_variant'] == 'NM_017739.3:c.1895+5_1895+8del' assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_refseqgene_variant'] == 'NG_009205.2:g.35853_35856del' - assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} assert results['NM_017739.3:c.1895+5_1895+8del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009205.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_060209.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_017739.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_701.xml'} assert 'NM_001243766.1:c.1869+31_1869+34del' in list(results.keys()) @@ -7218,10 +7221,10 @@ def test_variant201(self): assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_lrg_variant'] == 'LRG_701:g.35853_35856del' assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_transcript_variant'] == 'NM_001243766.1:c.1869+31_1869+34del' assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_refseqgene_variant'] == 'NG_009205.2:g.35853_35856del' - assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655122_46655125del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189450_46189453del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} assert results['NM_001243766.1:c.1869+31_1869+34del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009205.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230695.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243766.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_701.xml'} assert results['flag'] == 'gene_variant' @@ -7242,10 +7245,10 @@ def test_variant202(self): assert results['NM_000329.2:c.106_114del']['hgvs_lrg_variant'] == '' assert results['NM_000329.2:c.106_114del']['hgvs_transcript_variant'] == 'NM_000329.2:c.106_114del' assert results['NM_000329.2:c.106_114del']['hgvs_refseqgene_variant'] == 'NG_008472.1:g.8111_8119del' - assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912524_68912532del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCAGAG', 'pos': '68912523', 'alt': 'T'}} - assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446841_68446849del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCAGAG', 'pos': '68446840', 'alt': 'T'}} - assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912524_68912532del', 'vcf': {'chr': '1', 'ref': 'TGAGCCAGAG', 'pos': '68912523', 'alt': 'T'}} - assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446841_68446849del', 'vcf': {'chr': '1', 'ref': 'TGAGCCAGAG', 'pos': '68446840', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912525_68912533del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCAGAG', 'pos': '68912523', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446842_68446850del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCAGAG', 'pos': '68446840', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912525_68912533del', 'vcf': {'chr': '1', 'ref': 'TGAGCCAGAG', 'pos': '68912523', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446842_68446850del', 'vcf': {'chr': '1', 'ref': 'TGAGCCAGAG', 'pos': '68446840', 'alt': 'T'}} assert results['NM_000329.2:c.106_114del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008472.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2'} assert results['flag'] == 'gene_variant' @@ -7266,10 +7269,10 @@ def test_variant203(self): assert results['NM_000329.2:c.109_114del']['hgvs_lrg_variant'] == '' assert results['NM_000329.2:c.109_114del']['hgvs_transcript_variant'] == 'NM_000329.2:c.109_114del' assert results['NM_000329.2:c.109_114del']['hgvs_refseqgene_variant'] == 'NG_008472.1:g.8114_8119del' - assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912524_68912529del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCA', 'pos': '68912523', 'alt': 'T'}} - assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446841_68446846del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCA', 'pos': '68446840', 'alt': 'T'}} - assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912524_68912529del', 'vcf': {'chr': '1', 'ref': 'TGAGCCA', 'pos': '68912523', 'alt': 'T'}} - assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446841_68446846del', 'vcf': {'chr': '1', 'ref': 'TGAGCCA', 'pos': '68446840', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912527_68912532del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCA', 'pos': '68912523', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446844_68446849del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCA', 'pos': '68446840', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912527_68912532del', 'vcf': {'chr': '1', 'ref': 'TGAGCCA', 'pos': '68912523', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446844_68446849del', 'vcf': {'chr': '1', 'ref': 'TGAGCCA', 'pos': '68446840', 'alt': 'T'}} assert results['NM_000329.2:c.109_114del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008472.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2'} assert results['flag'] == 'gene_variant' @@ -7373,10 +7376,10 @@ def test_variant206(self): assert results['NM_020699.2:c.562_563del']['hgvs_lrg_variant'] == '' assert results['NM_020699.2:c.562_563del']['hgvs_transcript_variant'] == 'NM_020699.2:c.562_563del' assert results['NM_020699.2:c.562_563del']['hgvs_refseqgene_variant'] == '' - assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791301_153791302del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} - assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818825_153818826del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} - assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791301_153791302del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} - assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818825_153818826del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} assert results['NM_020699.2:c.562_563del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2'} assert 'NM_020699.3:c.562_563del' in list(results.keys()) @@ -7390,10 +7393,10 @@ def test_variant206(self): assert results['NM_020699.3:c.562_563del']['hgvs_lrg_variant'] == '' assert results['NM_020699.3:c.562_563del']['hgvs_transcript_variant'] == 'NM_020699.3:c.562_563del' assert results['NM_020699.3:c.562_563del']['hgvs_refseqgene_variant'] == '' - assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791301_153791302del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} - assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818825_153818826del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} - assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791301_153791302del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} - assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818825_153818826del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} assert results['NM_020699.3:c.562_563del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.3'} @@ -11070,10 +11073,10 @@ def test_variant251(self): assert results['NM_001276695.1:c.535_537del']['hgvs_lrg_variant'] == '' assert results['NM_001276695.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276695.1:c.535_537del' assert results['NM_001276695.1:c.535_537del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276695.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1'} assert 'NM_001126113.2:c.652_654del' in list(results.keys()) @@ -11087,10 +11090,10 @@ def test_variant251(self): assert results['NM_001126113.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' assert results['NM_001126113.2:c.652_654del']['hgvs_transcript_variant'] == 'NM_001126113.2:c.652_654del' assert results['NM_001126113.2:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126113.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126118.1:c.535_537del' in list(results.keys()) @@ -11104,10 +11107,10 @@ def test_variant251(self): assert results['NM_001126118.1:c.535_537del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' assert results['NM_001126118.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001126118.1:c.535_537del' assert results['NM_001126118.1:c.535_537del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126118.1:c.535_537del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126116.1:c.256_258del' in list(results.keys()) @@ -11121,10 +11124,10 @@ def test_variant251(self): assert results['NM_001126116.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' assert results['NM_001126116.1:c.256_258del']['hgvs_transcript_variant'] == 'NM_001126116.1:c.256_258del' assert results['NM_001126116.1:c.256_258del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126116.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126117.1:c.256_258del' in list(results.keys()) @@ -11138,10 +11141,10 @@ def test_variant251(self): assert results['NM_001126117.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' assert results['NM_001126117.1:c.256_258del']['hgvs_transcript_variant'] == 'NM_001126117.1:c.256_258del' assert results['NM_001126117.1:c.256_258del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126117.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001276761.1:c.535_537del' in list(results.keys()) @@ -11155,10 +11158,10 @@ def test_variant251(self): assert results['NM_001276761.1:c.535_537del']['hgvs_lrg_variant'] == '' assert results['NM_001276761.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276761.1:c.535_537del' assert results['NM_001276761.1:c.535_537del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276761.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1'} assert 'NM_001126112.2:c.652_654del' in list(results.keys()) @@ -11172,10 +11175,10 @@ def test_variant251(self): assert results['NM_001126112.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' assert results['NM_001126112.2:c.652_654del']['hgvs_transcript_variant'] == 'NM_001126112.2:c.652_654del' assert results['NM_001126112.2:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126112.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert results['flag'] == 'gene_variant' @@ -11190,10 +11193,10 @@ def test_variant251(self): assert results['NM_001276697.1:c.175_177del']['hgvs_lrg_variant'] == '' assert results['NM_001276697.1:c.175_177del']['hgvs_transcript_variant'] == 'NM_001276697.1:c.175_177del' assert results['NM_001276697.1:c.175_177del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276697.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1'} assert 'NM_001276696.1:c.535_537del' in list(results.keys()) @@ -11207,10 +11210,10 @@ def test_variant251(self): assert results['NM_001276696.1:c.535_537del']['hgvs_lrg_variant'] == '' assert results['NM_001276696.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276696.1:c.535_537del' assert results['NM_001276696.1:c.535_537del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276696.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1'} assert 'NM_001276698.1:c.175_177del' in list(results.keys()) @@ -11224,10 +11227,10 @@ def test_variant251(self): assert results['NM_001276698.1:c.175_177del']['hgvs_lrg_variant'] == '' assert results['NM_001276698.1:c.175_177del']['hgvs_transcript_variant'] == 'NM_001276698.1:c.175_177del' assert results['NM_001276698.1:c.175_177del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276698.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1'} assert 'NM_001126115.1:c.256_258del' in list(results.keys()) @@ -11241,10 +11244,10 @@ def test_variant251(self): assert results['NM_001126115.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' assert results['NM_001126115.1:c.256_258del']['hgvs_transcript_variant'] == 'NM_001126115.1:c.256_258del' assert results['NM_001126115.1:c.256_258del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126115.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126114.2:c.652_654del' in list(results.keys()) @@ -11258,10 +11261,10 @@ def test_variant251(self): assert results['NM_001126114.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' assert results['NM_001126114.2:c.652_654del']['hgvs_transcript_variant'] == 'NM_001126114.2:c.652_654del' assert results['NM_001126114.2:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001126114.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001276699.1:c.175_177del' in list(results.keys()) @@ -11275,10 +11278,10 @@ def test_variant251(self): assert results['NM_001276699.1:c.175_177del']['hgvs_lrg_variant'] == '' assert results['NM_001276699.1:c.175_177del']['hgvs_transcript_variant'] == 'NM_001276699.1:c.175_177del' assert results['NM_001276699.1:c.175_177del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276699.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1'} assert 'NM_001276760.1:c.535_537del' in list(results.keys()) @@ -11292,10 +11295,10 @@ def test_variant251(self): assert results['NM_001276760.1:c.535_537del']['hgvs_lrg_variant'] == '' assert results['NM_001276760.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276760.1:c.535_537del' assert results['NM_001276760.1:c.535_537del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_001276760.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1'} assert 'NM_000546.5:c.652_654del' in list(results.keys()) @@ -11309,10 +11312,10 @@ def test_variant251(self): assert results['NM_000546.5:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' assert results['NM_000546.5:c.652_654del']['hgvs_transcript_variant'] == 'NM_000546.5:c.652_654del' assert results['NM_000546.5:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578195_7578197del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674877_7674879del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} assert results['NM_000546.5:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} @@ -11332,10 +11335,10 @@ def test_variant252(self): assert results['NM_001276760.1:c.289dup']['hgvs_lrg_variant'] == '' assert results['NM_001276760.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276760.1:c.289dup' assert results['NM_001276760.1:c.289dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276760.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1'} assert 'NM_001126118.1:c.289dup' in list(results.keys()) @@ -11349,10 +11352,10 @@ def test_variant252(self): assert results['NM_001126118.1:c.289dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' assert results['NM_001126118.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001126118.1:c.289dup' assert results['NM_001126118.1:c.289dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126118.1:c.289dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001276695.1:c.289dup' in list(results.keys()) @@ -11366,10 +11369,10 @@ def test_variant252(self): assert results['NM_001276695.1:c.289dup']['hgvs_lrg_variant'] == '' assert results['NM_001276695.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276695.1:c.289dup' assert results['NM_001276695.1:c.289dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276695.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1'} assert 'NM_001276699.1:c.-72dup' in list(results.keys()) @@ -11383,10 +11386,10 @@ def test_variant252(self): assert results['NM_001276699.1:c.-72dup']['hgvs_lrg_variant'] == '' assert results['NM_001276699.1:c.-72dup']['hgvs_transcript_variant'] == 'NM_001276699.1:c.-72dup' assert results['NM_001276699.1:c.-72dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276699.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1'} assert 'NM_001126115.1:c.10dup' in list(results.keys()) @@ -11400,10 +11403,10 @@ def test_variant252(self): assert results['NM_001126115.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' assert results['NM_001126115.1:c.10dup']['hgvs_transcript_variant'] == 'NM_001126115.1:c.10dup' assert results['NM_001126115.1:c.10dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126115.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001276697.1:c.-72dup' in list(results.keys()) @@ -11417,10 +11420,10 @@ def test_variant252(self): assert results['NM_001276697.1:c.-72dup']['hgvs_lrg_variant'] == '' assert results['NM_001276697.1:c.-72dup']['hgvs_transcript_variant'] == 'NM_001276697.1:c.-72dup' assert results['NM_001276697.1:c.-72dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276697.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1'} assert 'NM_001126117.1:c.10dup' in list(results.keys()) @@ -11434,10 +11437,10 @@ def test_variant252(self): assert results['NM_001126117.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' assert results['NM_001126117.1:c.10dup']['hgvs_transcript_variant'] == 'NM_001126117.1:c.10dup' assert results['NM_001126117.1:c.10dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126117.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_000546.5:c.406dup' in list(results.keys()) @@ -11451,10 +11454,10 @@ def test_variant252(self): assert results['NM_000546.5:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' assert results['NM_000546.5:c.406dup']['hgvs_transcript_variant'] == 'NM_000546.5:c.406dup' assert results['NM_000546.5:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_000546.5:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert results['flag'] == 'gene_variant' @@ -11469,10 +11472,10 @@ def test_variant252(self): assert results['NM_001276696.1:c.289dup']['hgvs_lrg_variant'] == '' assert results['NM_001276696.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276696.1:c.289dup' assert results['NM_001276696.1:c.289dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276696.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1'} assert 'NM_001276698.1:c.-72dup' in list(results.keys()) @@ -11486,10 +11489,10 @@ def test_variant252(self): assert results['NM_001276698.1:c.-72dup']['hgvs_lrg_variant'] == '' assert results['NM_001276698.1:c.-72dup']['hgvs_transcript_variant'] == 'NM_001276698.1:c.-72dup' assert results['NM_001276698.1:c.-72dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276698.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1'} assert 'NM_001276761.1:c.289dup' in list(results.keys()) @@ -11503,10 +11506,10 @@ def test_variant252(self): assert results['NM_001276761.1:c.289dup']['hgvs_lrg_variant'] == '' assert results['NM_001276761.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276761.1:c.289dup' assert results['NM_001276761.1:c.289dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001276761.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1'} assert 'NM_001126113.2:c.406dup' in list(results.keys()) @@ -11520,10 +11523,10 @@ def test_variant252(self): assert results['NM_001126113.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' assert results['NM_001126113.2:c.406dup']['hgvs_transcript_variant'] == 'NM_001126113.2:c.406dup' assert results['NM_001126113.2:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126113.2:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126116.1:c.10dup' in list(results.keys()) @@ -11537,10 +11540,10 @@ def test_variant252(self): assert results['NM_001126116.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' assert results['NM_001126116.1:c.10dup']['hgvs_transcript_variant'] == 'NM_001126116.1:c.10dup' assert results['NM_001126116.1:c.10dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126116.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126112.2:c.406dup' in list(results.keys()) @@ -11554,10 +11557,10 @@ def test_variant252(self): assert results['NM_001126112.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' assert results['NM_001126112.2:c.406dup']['hgvs_transcript_variant'] == 'NM_001126112.2:c.406dup' assert results['NM_001126112.2:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126112.2:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126114.2:c.406dup' in list(results.keys()) @@ -11571,10 +11574,10 @@ def test_variant252(self): assert results['NM_001126114.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' assert results['NM_001126114.2:c.406dup']['hgvs_transcript_variant'] == 'NM_001126114.2:c.406dup' assert results['NM_001126114.2:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578524dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675206dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} assert results['NM_001126114.2:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} @@ -11686,10 +11689,10 @@ def test_variant254(self): assert results['NM_007294.3:c.*103_*106del']['hgvs_lrg_variant'] == 'LRG_292:g.172409_172412del' assert results['NM_007294.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007294.3:c.*103_*106del' assert results['NM_007294.3:c.*103_*106del']['hgvs_refseqgene_variant'] == 'NG_005905.2:g.172409_172412del' - assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} assert results['NM_007294.3:c.*103_*106del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005905.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_292.xml'} assert 'NM_007297.3:c.*103_*106del' in list(results.keys()) @@ -11703,10 +11706,10 @@ def test_variant254(self): assert results['NM_007297.3:c.*103_*106del']['hgvs_lrg_variant'] == '' assert results['NM_007297.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007297.3:c.*103_*106del' assert results['NM_007297.3:c.*103_*106del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} assert results['NM_007297.3:c.*103_*106del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3'} assert 'NR_027676.1:n.5831_5834del' in list(results.keys()) @@ -11720,10 +11723,10 @@ def test_variant254(self): assert results['NR_027676.1:n.5831_5834del']['hgvs_lrg_variant'] == '' assert results['NR_027676.1:n.5831_5834del']['hgvs_transcript_variant'] == 'NR_027676.1:n.5831_5834del' assert results['NR_027676.1:n.5831_5834del']['hgvs_refseqgene_variant'] == '' - assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} assert results['NR_027676.1:n.5831_5834del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_027676.1'} assert 'NM_007300.3:c.*103_*106del' in list(results.keys()) @@ -11737,10 +11740,10 @@ def test_variant254(self): assert results['NM_007300.3:c.*103_*106del']['hgvs_lrg_variant'] == '' assert results['NM_007300.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007300.3:c.*103_*106del' assert results['NM_007300.3:c.*103_*106del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} assert results['NM_007300.3:c.*103_*106del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3'} assert results['flag'] == 'gene_variant' @@ -11755,10 +11758,10 @@ def test_variant254(self): assert results['NM_007299.3:c.*209_*212del']['hgvs_lrg_variant'] == '' assert results['NM_007299.3:c.*209_*212del']['hgvs_transcript_variant'] == 'NM_007299.3:c.*209_*212del' assert results['NM_007299.3:c.*209_*212del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} assert results['NM_007299.3:c.*209_*212del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3'} assert 'NM_007298.3:c.*103_*106del' in list(results.keys()) @@ -11772,10 +11775,10 @@ def test_variant254(self): assert results['NM_007298.3:c.*103_*106del']['hgvs_lrg_variant'] == '' assert results['NM_007298.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007298.3:c.*103_*106del' assert results['NM_007298.3:c.*103_*106del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197589_41197592del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045572_43045575del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} assert results['NM_007298.3:c.*103_*106del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3'} @@ -12191,10 +12194,10 @@ def test_variant263(self): assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_lrg_variant'] == '' assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_transcript_variant'] == 'NM_001351443.1:c.-16+941_-16+946del' assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_001351443.1:c.-16+941_-16+946del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338372.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351443.1'} assert 'NM_001258222.1:c.10-47053_10-47048del' in list(results.keys()) @@ -12208,10 +12211,10 @@ def test_variant263(self): assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_lrg_variant'] == '' assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_transcript_variant'] == 'NM_001258222.1:c.10-47053_10-47048del' assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_001258222.1:c.10-47053_10-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.1'} assert 'NM_001258221.1:c.-16+1426_-16+1431del' in list(results.keys()) @@ -12225,10 +12228,10 @@ def test_variant263(self): assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_lrg_variant'] == '' assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_transcript_variant'] == 'NM_001258221.1:c.-16+1426_-16+1431del' assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_001258221.1:c.-16+1426_-16+1431del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245150.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258221.1'} assert 'NM_001258222.2:c.10-47053_10-47048del' in list(results.keys()) @@ -12242,10 +12245,10 @@ def test_variant263(self): assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_lrg_variant'] == '' assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_transcript_variant'] == 'NM_001258222.2:c.10-47053_10-47048del' assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_001258222.2:c.10-47053_10-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.2'} assert results['flag'] == 'gene_variant' @@ -12260,10 +12263,10 @@ def test_variant263(self): assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_lrg_variant'] == '' assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_transcript_variant'] == 'NM_001136205.2:c.-16+588_-16+593del' assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_001136205.2:c.-16+588_-16+593del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129677.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001136205.2'} assert 'NM_198991.3:c.-15-47053_-15-47048del' in list(results.keys()) @@ -12277,10 +12280,10 @@ def test_variant263(self): assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_lrg_variant'] == '' assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_transcript_variant'] == 'NM_198991.3:c.-15-47053_-15-47048del' assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_refseqgene_variant'] == '' - assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_198991.3:c.-15-47053_-15-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_945342.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198991.3'} assert 'NM_001142730.2:c.234_239del' in list(results.keys()) @@ -12294,10 +12297,10 @@ def test_variant263(self): assert results['NM_001142730.2:c.234_239del']['hgvs_lrg_variant'] == '' assert results['NM_001142730.2:c.234_239del']['hgvs_transcript_variant'] == 'NM_001142730.2:c.234_239del' assert results['NM_001142730.2:c.234_239del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128262_24128267del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548298_26548303del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} assert results['NM_001142730.2:c.234_239del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001136202.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001142730.2'} @@ -13859,10 +13862,10 @@ def test_variant273(self): assert results['NR_148667.1:n.638_645del']['hgvs_lrg_variant'] == '' assert results['NR_148667.1:n.638_645del']['hgvs_transcript_variant'] == 'NR_148667.1:n.638_645del' assert results['NR_148667.1:n.638_645del']['hgvs_refseqgene_variant'] == '' - assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NR_148667.1:n.638_645del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1'} assert 'NM_001165964.2:c.233_240del' in list(results.keys()) @@ -13876,10 +13879,10 @@ def test_variant273(self): assert results['NM_001165964.2:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001165964.2:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165964.2:c.233_240del' assert results['NM_001165964.2:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001165964.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2'} assert 'NM_001353951.1:c.233_240del' in list(results.keys()) @@ -13893,10 +13896,10 @@ def test_variant273(self): assert results['NM_001353951.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001353951.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353951.1:c.233_240del' assert results['NM_001353951.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353951.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1'} assert 'NM_001353954.1:c.233_240del' in list(results.keys()) @@ -13910,10 +13913,10 @@ def test_variant273(self): assert results['NM_001353954.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001353954.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353954.1:c.233_240del' assert results['NM_001353954.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353954.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1'} assert 'NM_001353961.1:c.-2193_-2186del' in list(results.keys()) @@ -13927,10 +13930,10 @@ def test_variant273(self): assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_lrg_variant'] == '' assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_transcript_variant'] == 'NM_001353961.1:c.-2193_-2186del' assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353961.1:c.-2193_-2186del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1'} assert 'NM_001353948.1:c.233_240del' in list(results.keys()) @@ -13944,10 +13947,10 @@ def test_variant273(self): assert results['NM_001353948.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001353948.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353948.1:c.233_240del' assert results['NM_001353948.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353948.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1'} assert 'NM_001353960.1:c.233_240del' in list(results.keys()) @@ -13961,10 +13964,10 @@ def test_variant273(self): assert results['NM_001353960.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001353960.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353960.1:c.233_240del' assert results['NM_001353960.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353960.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1'} assert 'NM_001202435.1:c.233_240del' in list(results.keys()) @@ -13978,10 +13981,10 @@ def test_variant273(self): assert results['NM_001202435.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001202435.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001202435.1:c.233_240del' assert results['NM_001202435.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001202435.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1'} assert 'NM_001202435.2:c.233_240del' in list(results.keys()) @@ -13995,10 +13998,10 @@ def test_variant273(self): assert results['NM_001202435.2:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001202435.2:c.233_240del']['hgvs_transcript_variant'] == 'NM_001202435.2:c.233_240del' assert results['NM_001202435.2:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001202435.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2'} assert 'NM_006920.5:c.233_240del' in list(results.keys()) @@ -14012,10 +14015,10 @@ def test_variant273(self): assert results['NM_006920.5:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_006920.5:c.233_240del']['hgvs_transcript_variant'] == 'NM_006920.5:c.233_240del' assert results['NM_006920.5:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_006920.5:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5'} assert 'NM_001353955.1:c.233_240del' in list(results.keys()) @@ -14029,10 +14032,10 @@ def test_variant273(self): assert results['NM_001353955.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001353955.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353955.1:c.233_240del' assert results['NM_001353955.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353955.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1'} assert 'NM_001353952.1:c.233_240del' in list(results.keys()) @@ -14046,10 +14049,10 @@ def test_variant273(self): assert results['NM_001353952.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001353952.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353952.1:c.233_240del' assert results['NM_001353952.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353952.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1'} assert 'NM_001353957.1:c.233_240del' in list(results.keys()) @@ -14063,10 +14066,10 @@ def test_variant273(self): assert results['NM_001353957.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001353957.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353957.1:c.233_240del' assert results['NM_001353957.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353957.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1'} assert results['flag'] == 'gene_variant' @@ -14081,10 +14084,10 @@ def test_variant273(self): assert results['NM_006920.4:c.233_240del']['hgvs_lrg_variant'] == 'LRG_8:g.5251_5258del' assert results['NM_006920.4:c.233_240del']['hgvs_transcript_variant'] == 'NM_006920.4:c.233_240del' assert results['NM_006920.4:c.233_240del']['hgvs_refseqgene_variant'] == 'NG_011906.1:g.5251_5258del' - assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_006920.4:c.233_240del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011906.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_8.xml'} assert 'NM_001353950.1:c.233_240del' in list(results.keys()) @@ -14098,10 +14101,10 @@ def test_variant273(self): assert results['NM_001353950.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001353950.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353950.1:c.233_240del' assert results['NM_001353950.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353950.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1'} assert 'NM_001165963.2:c.233_240del' in list(results.keys()) @@ -14115,10 +14118,10 @@ def test_variant273(self): assert results['NM_001165963.2:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001165963.2:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165963.2:c.233_240del' assert results['NM_001165963.2:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001165963.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2'} assert 'NM_001165963.1:c.233_240del' in list(results.keys()) @@ -14132,10 +14135,10 @@ def test_variant273(self): assert results['NM_001165963.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001165963.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165963.1:c.233_240del' assert results['NM_001165963.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001165963.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1'} assert 'NM_001165964.1:c.233_240del' in list(results.keys()) @@ -14149,10 +14152,10 @@ def test_variant273(self): assert results['NM_001165964.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001165964.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165964.1:c.233_240del' assert results['NM_001165964.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001165964.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1'} assert 'NM_001353958.1:c.233_240del' in list(results.keys()) @@ -14166,10 +14169,10 @@ def test_variant273(self): assert results['NM_001353958.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001353958.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353958.1:c.233_240del' assert results['NM_001353958.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353958.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1'} assert 'NM_001353949.1:c.233_240del' in list(results.keys()) @@ -14183,10 +14186,10 @@ def test_variant273(self): assert results['NM_001353949.1:c.233_240del']['hgvs_lrg_variant'] == '' assert results['NM_001353949.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353949.1:c.233_240del' assert results['NM_001353949.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929892_166929899del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073382_166073389del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} assert results['NM_001353949.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1'} @@ -15286,10 +15289,10 @@ def test_variant285(self): assert results['NM_001349798.1:c.45_46insCCT']['hgvs_lrg_variant'] == '' assert results['NM_001349798.1:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001349798.1:c.45_46insCCT' assert results['NM_001349798.1:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} assert results['NM_001349798.1:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.1'} assert 'NM_033632.3:c.45_46insCCT' in list(results.keys()) @@ -15303,10 +15306,10 @@ def test_variant285(self): assert results['NM_033632.3:c.45_46insCCT']['hgvs_lrg_variant'] == '' assert results['NM_033632.3:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_033632.3:c.45_46insCCT' assert results['NM_033632.3:c.45_46insCCT']['hgvs_refseqgene_variant'] == 'NG_029466.1:g.128262_128263insCCT' - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} assert results['NM_033632.3:c.45_46insCCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029466.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033632.3'} assert 'NM_001257069.1:c.45_46insCCT' in list(results.keys()) @@ -15320,10 +15323,10 @@ def test_variant285(self): assert results['NM_001257069.1:c.45_46insCCT']['hgvs_lrg_variant'] == '' assert results['NM_001257069.1:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001257069.1:c.45_46insCCT' assert results['NM_001257069.1:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411758_152411759insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} assert results['NM_001257069.1:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243998.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257069.1'} assert results['flag'] == 'gene_variant' @@ -15338,9 +15341,9 @@ def test_variant285(self): assert results['NM_001349798.2:c.45_46insCCT']['hgvs_lrg_variant'] == '' assert results['NM_001349798.2:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001349798.2:c.45_46insCCT' assert results['NM_001349798.2:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} assert 'hg38' not in list(results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci'].keys()) - assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332910_153332911insAGG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} assert 'grch38' not in list(results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci'].keys()) assert results['NM_001349798.2:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001336727.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.2'} @@ -15385,10 +15388,10 @@ def test_variant287(self): assert results['NM_003664.4:c.2409_2411del']['hgvs_lrg_variant'] == 'LRG_170:g.198691_198693del' assert results['NM_003664.4:c.2409_2411del']['hgvs_transcript_variant'] == 'NM_003664.4:c.2409_2411del' assert results['NM_003664.4:c.2409_2411del']['hgvs_refseqgene_variant'] == 'NG_007268.1:g.198691_198693del' - assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} - assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101012_78101014del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} - assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} - assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101012_78101014del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} assert results['NM_003664.4:c.2409_2411del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007268.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_170.xml'} assert results['flag'] == 'gene_variant' @@ -15403,9 +15406,9 @@ def test_variant287(self): assert results['NM_003664.3:c.2409_2411del']['hgvs_lrg_variant'] == '' assert results['NM_003664.3:c.2409_2411del']['hgvs_transcript_variant'] == 'NM_003664.3:c.2409_2411del' assert results['NM_003664.3:c.2409_2411del']['hgvs_refseqgene_variant'] == '' - assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} assert 'hg38' not in list(results['NM_003664.3:c.2409_2411del']['primary_assembly_loci'].keys()) - assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} assert 'grch38' not in list(results['NM_003664.3:c.2409_2411del']['primary_assembly_loci'].keys()) assert results['NM_003664.3:c.2409_2411del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.3'} @@ -15420,10 +15423,10 @@ def test_variant287(self): assert results['NM_001271769.1:c.2262_2264del']['hgvs_lrg_variant'] == '' assert results['NM_001271769.1:c.2262_2264del']['hgvs_transcript_variant'] == 'NM_001271769.1:c.2262_2264del' assert results['NM_001271769.1:c.2262_2264del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} - assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101012_78101014del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} - assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396836_77396838del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} - assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101012_78101014del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} assert results['NM_001271769.1:c.2262_2264del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001258698.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001271769.1'} @@ -16521,10 +16524,10 @@ def test_variant301(self): assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_lrg_variant'] == '' assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_transcript_variant'] == 'NR_149084.1:n.221+1140_221+1142del' assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_refseqgene_variant'] == '' - assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199645_117199647del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} - assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559591_117559593del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} - assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199645_117199647del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} - assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559591_117559593del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} assert results['NR_149084.1:n.221+1140_221+1142del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_149084.1'} assert 'NM_000492.3:c.1521_1523del' in list(results.keys()) @@ -16984,10 +16987,10 @@ def test_variant309(self): assert results['NM_001261407.1:c.5504dup']['hgvs_lrg_variant'] == '' assert results['NM_001261407.1:c.5504dup']['hgvs_transcript_variant'] == 'NM_001261407.1:c.5504dup' assert results['NM_001261407.1:c.5504dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} assert results['NM_001261407.1:c.5504dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248336.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261407.1'} assert 'NM_001330637.1:c.5690dup' in list(results.keys()) @@ -17001,10 +17004,10 @@ def test_variant309(self): assert results['NM_001330637.1:c.5690dup']['hgvs_lrg_variant'] == '' assert results['NM_001330637.1:c.5690dup']['hgvs_transcript_variant'] == 'NM_001330637.1:c.5690dup' assert results['NM_001330637.1:c.5690dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} assert results['NM_001330637.1:c.5690dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317566.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330637.1'} assert 'NM_001261406.1:c.5591dup' in list(results.keys()) @@ -17018,10 +17021,10 @@ def test_variant309(self): assert results['NM_001261406.1:c.5591dup']['hgvs_lrg_variant'] == '' assert results['NM_001261406.1:c.5591dup']['hgvs_transcript_variant'] == 'NM_001261406.1:c.5591dup' assert results['NM_001261406.1:c.5591dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} assert results['NM_001261406.1:c.5591dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248335.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261406.1'} assert results['flag'] == 'gene_variant' @@ -17036,10 +17039,10 @@ def test_variant309(self): assert results['NM_003829.4:c.5603dup']['hgvs_lrg_variant'] == '' assert results['NM_003829.4:c.5603dup']['hgvs_transcript_variant'] == 'NM_003829.4:c.5603dup' assert results['NM_003829.4:c.5603dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112057dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112058dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} assert results['NM_003829.4:c.5603dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003820.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003829.4'} @@ -17151,10 +17154,10 @@ def test_variant311(self): assert results['NM_001301227.1:c.773-3dup']['hgvs_lrg_variant'] == '' assert results['NM_001301227.1:c.773-3dup']['hgvs_transcript_variant'] == 'NM_001301227.1:c.773-3dup' assert results['NM_001301227.1:c.773-3dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} assert results['NM_001301227.1:c.773-3dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288156.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301227.1'} assert 'NM_001301226.1:c.772+1002dup' in list(results.keys()) @@ -17168,10 +17171,10 @@ def test_variant311(self): assert results['NM_001301226.1:c.772+1002dup']['hgvs_lrg_variant'] == '' assert results['NM_001301226.1:c.772+1002dup']['hgvs_transcript_variant'] == 'NM_001301226.1:c.772+1002dup' assert results['NM_001301226.1:c.772+1002dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} assert results['NM_001301226.1:c.772+1002dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288155.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301226.1'} assert 'NM_213674.1:c.772+1002dup' in list(results.keys()) @@ -17185,10 +17188,10 @@ def test_variant311(self): assert results['NM_213674.1:c.772+1002dup']['hgvs_lrg_variant'] == 'LRG_680:g.11814dup' assert results['NM_213674.1:c.772+1002dup']['hgvs_transcript_variant'] == 'NM_213674.1:c.772+1002dup' assert results['NM_213674.1:c.772+1002dup']['hgvs_refseqgene_variant'] == 'NG_011620.1:g.11814dup' - assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} assert results['NM_213674.1:c.772+1002dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011620.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_998839.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_213674.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_680.xml'} assert 'NM_003289.3:c.773-3dup' in list(results.keys()) @@ -17202,10 +17205,10 @@ def test_variant311(self): assert results['NM_003289.3:c.773-3dup']['hgvs_lrg_variant'] == 'LRG_680:g.11814dup' assert results['NM_003289.3:c.773-3dup']['hgvs_transcript_variant'] == 'NM_003289.3:c.773-3dup' assert results['NM_003289.3:c.773-3dup']['hgvs_refseqgene_variant'] == 'NG_011620.1:g.11814dup' - assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683241dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683244dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} assert results['NM_003289.3:c.773-3dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011620.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003280.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003289.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_680.xml'} assert results['flag'] == 'gene_variant' @@ -17294,7 +17297,7 @@ def test_variant313(self): assert 'NM_005247.2:c.616del' in list(results.keys()) assert results['NM_005247.2:c.616del']['hgvs_lrg_transcript_variant'] == '' assert results['NM_005247.2:c.616del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005247.2:c.616del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003571046.1:g.10392del', 'vcf': {'chr': 'HG536_PATCH', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571046.1:g.10392del', 'vcf': {'chr': 'NW_003571046.1', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}]) + self.assertCountEqual(results['NM_005247.2:c.616del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003571046.1:g.10396del', 'vcf': {'chr': 'HG536_PATCH', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571046.1:g.10396del', 'vcf': {'chr': 'NW_003571046.1', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}]) assert results['NM_005247.2:c.616del']['gene_symbol'] == 'FGF3' assert results['NM_005247.2:c.616del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005238.1(LRG_1303p1):p.(Val206SerfsTer117)', 'slr': 'NP_005238.1:p.(V206Sfs*117)'} assert results['NM_005247.2:c.616del']['submitted_variant'] == 'HG536_PATCH-10391-AC-A' @@ -17302,10 +17305,10 @@ def test_variant313(self): assert results['NM_005247.2:c.616del']['hgvs_lrg_variant'] == 'LRG_1303:g.14016del' assert results['NM_005247.2:c.616del']['hgvs_transcript_variant'] == 'NM_005247.2:c.616del' assert results['NM_005247.2:c.616del']['hgvs_refseqgene_variant'] == 'NG_009016.1:g.14016del' - assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625177del', 'vcf': {'chr': 'chr11', 'ref': 'AC', 'pos': '69625176', 'alt': 'A'}} - assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810409del', 'vcf': {'chr': 'chr11', 'ref': 'AC', 'pos': '69810408', 'alt': 'A'}} - assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625177del', 'vcf': {'chr': '11', 'ref': 'AC', 'pos': '69625176', 'alt': 'A'}} - assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810409del', 'vcf': {'chr': '11', 'ref': 'AC', 'pos': '69810408', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625181del', 'vcf': {'chr': 'chr11', 'ref': 'AC', 'pos': '69625176', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810413del', 'vcf': {'chr': 'chr11', 'ref': 'AC', 'pos': '69810408', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625181del', 'vcf': {'chr': '11', 'ref': 'AC', 'pos': '69625176', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810413del', 'vcf': {'chr': '11', 'ref': 'AC', 'pos': '69810408', 'alt': 'A'}} assert results['NM_005247.2:c.616del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009016.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005238.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005247.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1303.xml'} @@ -17449,9 +17452,9 @@ def test_variant317(self): assert results['NM_020699.2:c.802_803insTT']['hgvs_lrg_variant'] == '' assert results['NM_020699.2:c.802_803insTT']['hgvs_transcript_variant'] == 'NM_020699.2:c.802_803insTT' assert results['NM_020699.2:c.802_803insTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946delinsGAAG', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '153789945', 'alt': 'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946insAA', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '153789945', 'alt': 'GAA'}} assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '153817469', 'alt': 'GAA'}} - assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946delinsGAAG', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '153789945', 'alt': 'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946insAA', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '153789945', 'alt': 'GAA'}} assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '153817469', 'alt': 'GAA'}} assert results['NM_020699.2:c.802_803insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2'} @@ -17695,10 +17698,10 @@ def test_variant323(self): assert results['NM_198180.2:c.408_410del']['hgvs_lrg_variant'] == '' assert results['NM_198180.2:c.408_410del']['hgvs_transcript_variant'] == 'NM_198180.2:c.408_410del' assert results['NM_198180.2:c.408_410del']['hgvs_refseqgene_variant'] == '' - assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.133768816_133768818del', 'vcf': {'chr': 'chr9', 'ref': 'TCAC', 'pos': '133768815', 'alt': 'T'}} - assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.130893429_130893431del', 'vcf': {'chr': 'chr9', 'ref': 'TCAC', 'pos': '130893428', 'alt': 'T'}} - assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.133768816_133768818del', 'vcf': {'chr': '9', 'ref': 'TCAC', 'pos': '133768815', 'alt': 'T'}} - assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.130893429_130893431del', 'vcf': {'chr': '9', 'ref': 'TCAC', 'pos': '130893428', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.133768817_133768819del', 'vcf': {'chr': 'chr9', 'ref': 'TCAC', 'pos': '133768815', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.130893430_130893432del', 'vcf': {'chr': 'chr9', 'ref': 'TCAC', 'pos': '130893428', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.133768817_133768819del', 'vcf': {'chr': '9', 'ref': 'TCAC', 'pos': '133768815', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.130893430_130893432del', 'vcf': {'chr': '9', 'ref': 'TCAC', 'pos': '130893428', 'alt': 'T'}} assert results['NM_198180.2:c.408_410del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_937823.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198180.2'} @@ -17712,7 +17715,7 @@ def test_variant324(self): assert results['NM_080877.2:c.1733_1735delinsTTT']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_080877.2:c.1733_1735delinsTTT']['alt_genomic_loci'], []) assert results['NM_080877.2:c.1733_1735delinsTTT']['gene_symbol'] == 'SLC34A3' - assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Pro578_Lys579delinsLeuTer)', 'slr': 'NP_543153.1:p.(P578_K579delinsL*)'} + assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Pro578_Gln598del)', 'slr': 'NP_543153.1:p.(P578_Q598del)'} assert results['NM_080877.2:c.1733_1735delinsTTT']['submitted_variant'] == 'NM_080877.2:c.1733_1735delinsTTT' assert results['NM_080877.2:c.1733_1735delinsTTT']['genome_context_intronic_sequence'] == '' assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_lrg_variant'] == '' @@ -17903,7 +17906,7 @@ def test_variant332(self): assert 'NM_001287344.1:c.690_690+1insCTACATAG' in list(results.keys()) assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001287344.1:c.690_690+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001287344.1:c.690_690+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) + self.assertCountEqual(results['NM_001287344.1:c.690_690+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) assert results['NM_001287344.1:c.690_690+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274273.1:p.?', 'slr': 'NP_001274273.1:p.?'} assert results['NM_001287344.1:c.690_690+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' @@ -17911,16 +17914,16 @@ def test_variant332(self): assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_lrg_variant'] == '' assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_001287344.1:c.690_690+1insCTACATAG' assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} assert results['NM_001287344.1:c.690_690+1insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274273.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287344.1'} assert 'NM_001287345.1:c.588_588+1insCTACATAG' in list(results.keys()) assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001287345.1:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) + self.assertCountEqual(results['NM_001287345.1:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) assert results['NM_001287345.1:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274274.1:p.?', 'slr': 'NP_001274274.1:p.?'} assert results['NM_001287345.1:c.588_588+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' @@ -17928,16 +17931,16 @@ def test_variant332(self): assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == '' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_001287345.1:c.588_588+1insCTACATAG' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} assert results['NM_001287345.1:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274274.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287345.1'} assert 'NM_000061.2:c.588_588+1insCTACATAG' in list(results.keys()) assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' - self.assertCountEqual(results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) + self.assertCountEqual(results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} assert results['NM_000061.2:c.588_588+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' @@ -17945,10 +17948,10 @@ def test_variant332(self): assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == 'LRG_128:g.29052_29053insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} assert results['NM_000061.2:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009616.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_128.xml'} @@ -17961,7 +17964,7 @@ def test_variant333(self): assert 'NM_000061.2:c.588_588+1insCTACATAG' in list(results.keys()) assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' - self.assertCountEqual(results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43848_43849insCTATGTAG', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) + self.assertCountEqual(results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} assert results['NM_000061.2:c.588_588+1insCTACATAG']['submitted_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' @@ -17969,10 +17972,10 @@ def test_variant333(self): assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == 'LRG_128:g.29052_29053insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617160_100617161insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362172_101362173insCTATGTAG', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} assert results['NM_000061.2:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009616.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_128.xml'} @@ -17993,9 +17996,58 @@ def test_variant334(self): assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_lrg_variant'] == '' assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_589insCTACATAG' assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC', 'vcf': {'chr': 'chrX', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'pos': '100615751', 'alt': 'G'}} + assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615752_100617160del', 'vcf': {'chr': 'chrX', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'pos': '100615751', 'alt': 'G'}} assert 'hg38' not in list(results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci'].keys()) - assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615743_100617161delinsTCTATGTAGC', 'vcf': {'chr': 'X', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'pos': '100615751', 'alt': 'G'}} + assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615752_100617160del', 'vcf': {'chr': 'X', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'pos': '100615751', 'alt': 'G'}} assert 'grch38' not in list(results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci'].keys()) assert results['NM_000061.2:c.588_589insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2'} + + def test_variant335(self): + variant = 'NM_000492.3:c.1210-12_1210-6delinsTTTTTTTTT' + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() + print(results) + + assert 'NM_000492.3:c.1210-7_1210-6dup' in list(results.keys()) + assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_lrg_transcript_variant'] == 'LRG_663t1:c.1210-7_1210-6dup' + assert results['NM_000492.3:c.1210-7_1210-6dup']['refseqgene_context_intronic_sequence'] == 'NG_016465.3(NM_000492.3):c.1210-7_1210-6dup' + self.assertCountEqual(results['NM_000492.3:c.1210-7_1210-6dup']['alt_genomic_loci'], []) + assert results['NM_000492.3:c.1210-7_1210-6dup']['gene_symbol'] == 'CFTR' + assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000483.3(LRG_663p1):p.?', 'slr': 'NP_000483.3:p.?'} + assert results['NM_000492.3:c.1210-7_1210-6dup']['submitted_variant'] == 'NM_000492.3:c.1210-12_1210-6delinsTTTTTTTTT' + assert results['NM_000492.3:c.1210-7_1210-6dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000492.3):c.1210-7_1210-6dup' + assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_lrg_variant'] == '' + assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_transcript_variant'] == 'NM_000492.3:c.1210-7_1210-6dup' + assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_refseqgene_variant'] == 'NG_016465.3:g.87851_87852dup' + assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.117188688_117188689dup', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '117188682', 'alt': 'GTT'}} + assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117548634_117548635dup', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '117548628', 'alt': 'GTT'}} + assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.117188688_117188689dup', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '117188682', 'alt': 'GTT'}} + assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117548634_117548635dup', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '117548628', 'alt': 'GTT'}} + assert results['NM_000492.3:c.1210-7_1210-6dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_016465.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000483.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000492.3'} + + assert results['flag'] == 'gene_variant' + + + def test_variant336(self): + variant = 'NM_000088.3:c.589-18_589-14delinsTTTTTTTTTT' + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() + print(results) + + assert 'NM_000088.3:c.589-18_589-14dup' in list(results.keys()) + assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-18_589-14dup' + assert results['NM_000088.3:c.589-18_589-14dup']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-18_589-14dup' + self.assertCountEqual(results['NM_000088.3:c.589-18_589-14dup']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-18_589-14dup']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-18_589-14dup']['submitted_variant'] == 'NM_000088.3:c.589-18_589-14delinsTTTTTTTTTT' + assert results['NM_000088.3:c.589-18_589-14dup']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-18_589-14dup' + assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_lrg_variant'] == 'LRG_1:g.8620_8624dup' + assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-18_589-14dup' + assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8620_8624dup' + assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275377_48275381dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48275376', 'alt': 'GAAAAA'}} + assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198016_50198020dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50198015', 'alt': 'GAAAAA'}} + assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275377_48275381dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48275376', 'alt': 'GAAAAA'}} + assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198016_50198020dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50198015', 'alt': 'GAAAAA'}} + assert results['NM_000088.3:c.589-18_589-14dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + + assert results['flag'] == 'gene_variant' From f088cc27960bc2a023db2cc551666be63167f8cb Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 24 May 2019 11:20:12 +0100 Subject: [PATCH 120/223] Added gene2transcripts and hgvs2ref functions from v0 into Validator obj --- VariantValidator/modules/vvMixinCore.py | 242 +++++++++++++++++++++++- 1 file changed, 239 insertions(+), 3 deletions(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 1657ea6c..c0b0891b 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -376,7 +376,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # COLLECT gene symbol, name and ACCESSION INFORMATION # Gene symbol if my_variant.reftype != ':g.': - toskip = self.get_transcript_info(my_variant) + toskip = self._get_transcript_info(my_variant) if toskip: continue @@ -698,8 +698,244 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr logger.critical(str(exc_type) + " " + str(exc_value)) raise - def get_transcript_info(self, variant): - """Collect transcript information from a non-genomic variant""" + def gene2transcripts(self, query): + """ + Generates a list of transcript (UTA supported) and transcript names from a gene symbol or RefSeq transcript ID + :param query: string gene symbol or RefSeq ID (e.g. NANOG or NM_024865.3) + :return: dictionary of transcript information + """ + input = query + input = input.upper() + if re.search('\d+ORF\d+', input): + input = input.replace('ORF', 'orf') + # Quick check for blank form + if input == '': + caution = {'error': 'Please enter HGNC gene name or transcript identifier (NM_, NR_, or ENST)'} + return caution + else: + hgnc = input + if re.match('NM_', hgnc) or re.match('NR_', hgnc): # or re.match('ENST', hgnc): + try: + tx_info = self.hdp.get_tx_identity_info(hgnc) + hgnc = tx_info[6] + except hgvs.exceptions.HGVSError as e: + caution = {'error': str(e)} + return caution + + # First perform a search against the input gene symbol or the symbol inferred from UTA + initial = fn.hgnc_rest(path="/fetch/symbol/" + hgnc) + # Check for a record + if str(initial['record']['response']['numFound']) != '0': + current_sym = hgnc + previous = initial + # No record found, is it a previous symbol? + else: + # Look up current name + current = fn.hgnc_rest(path="/search/prev_symbol/" + hgnc) + # Look for historic names + # If historic names = 0 + if str(current['record']['response']['numFound']) == '0': + current_sym = hgnc + else: + current_sym = current['record']['response']['docs'][0]['symbol'] + # Look up previous symbols and gene name + # Re-set the previous variable + previous = fn.hgnc_rest(path="/fetch/symbol/" + current_sym) + + # Extract the relevant data + try: + previous_sym = previous['record']['response']['docs'][0]['prev_symbol'][0] + except: + previous_sym = current_sym + + # Get gene name + try: + gene_name = previous['record']['response']['docs'][0]['name'] + except: + # error = current_sym + ' is not a valid HGNC gene symbol' + gene_name = 'Gene symbol %s not found in the HGNC database of human gene names www.genenames.org' % query + return {'error': gene_name} + + # Look up previous name + try: + previous_name = previous['record']['response']['docs'][0]['prev_name'][0] + except: + previous_name = gene_name + + # Get transcripts + tx_for_gene = self.hdp.get_tx_for_gene(current_sym) + if len(tx_for_gene) == 0: + tx_for_gene = self.hdp.get_tx_for_gene(previous_sym) + if len(tx_for_gene) == 0: + tx_for_gene = {'error': 'Unable to retrieve data from the UTA, please contact admin'} + return tx_for_gene + + # Loop through each transcript and get the relevant transcript description + genes_and_tx = [] + recovered_dict = {} + for line in tx_for_gene: + if re.match('^NM_', line[3]) or re.match('^NR_', line[3]): + # Transcript ID + tx = line[3] + tx_description = self.db.get_transcript_description(tx) + if tx_description == 'none': + self.db.update_transcript_info_record(tx, self) + tx_description = self.db.get_transcript_description(tx) + # Check for duplicates + if tx in list(recovered_dict.keys()): + continue + else: + try: + # Add to recovered_dict + recovered_dict[tx] = '' + genes_and_tx.append([tx, tx_description, line[1] + 1, line[2]]) + except: + # Add to recovered_dict + recovered_dict[tx] = '' + genes_and_tx.append([tx, tx_description, 'not applicable', 'not applicable']) + # LRG information + lrg_transcript = self.db.get_lrgTranscriptID_from_RefSeqTranscriptID(tx) + if lrg_transcript == 'none': + pass + else: + genes_and_tx.append([lrg_transcript, tx_description, line[1] + 1, line[2]]) + + cp_genes_and_tx = copy.deepcopy(genes_and_tx) + genes_and_tx = [] + for tx in cp_genes_and_tx: + if 'not applicable' in str(tx[2]): + tx_d = {'reference': tx[0], + 'description': tx[1], + 'coding_start': 'non-coding', + 'coding_end': 'non-coding' + } + else: + tx_d = {'reference': tx[0], + 'description': tx[1], + 'coding_start': tx[2] + 1, + 'coding_end': tx[3] + } + genes_and_tx.append(tx_d) + + # Return data table + g2d_data = {'current_symbol': current_sym, + 'previous_symbol': previous_sym, + 'current_name': gene_name, + 'previous_name': previous_name, + 'transcripts': genes_and_tx + } + + return g2d_data + + def hgvs2ref(self, query): + """ + Fetch reference sequence from a HGVS variant description + :param query: + :return: + """ + logger.info('Fetching reference sequence for ' + query) + # Dictionary to store the data + reference = {'variant': query, + 'start_position': '', + 'end_position': '', + 'warning': '', + 'sequence': '', + 'error': ''} + # Step 1: parse the query. Dictionary the parse error if parsing fails + try: + input_hgvs_query = self.hp.parse_hgvs_variant(query) + except Exception as e: + reference['error'] = str(e) + return reference + # Step 2: If the variant is a c., it needs to transferred to n. + try: + hgvs_query = self.vm.c_to_n(input_hgvs_query) + except: + hgvs_query = input_hgvs_query + + # For transcript reference sequences + if hgvs_query.type == 'c' or hgvs_query.type == 'n': + # Step 4: Check for intronic sequence + if hgvs_query.posedit.pos.start.offset != 0 and hgvs_query.posedit.pos.end.offset != 0: + reference['warning'] = 'Intronic sequence variation: Use genomic reference sequence' + elif hgvs_query.posedit.pos.start.offset != 0 or hgvs_query.posedit.pos.end.offset != 0: + reference['warning'] = 'Partial intronic sequence variation: Returning exonic and/or UTR sequence only' + + # Step 3: split the variant description into the parts required for seqfetching + accession = hgvs_query.ac + start = hgvs_query.posedit.pos.start.base - 1 + end = hgvs_query.posedit.pos.end.base + + # Step 5: try and fetch the sequence using SeqFetcher. Dictionary an error if this fails + try: + sequence = self.sf.fetch_seq(accession, start, end) + except Exception as e: + reference['error'] = str(e) + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + # tr = ''.join(traceback.format_stack()) + tbk = [str(exc_type), str(exc_value), str(te)] + er = '\n'.join(tbk) + logger.info(str(exc_type) + " " + str(exc_value)) + logger.debug(er) + else: + reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) + reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) + reference['sequence'] = sequence + else: + # Step 3: split the variant description into the parts required for seqfetching + accession = hgvs_query.ac + start = hgvs_query.posedit.pos.start.base - 1 + end = hgvs_query.posedit.pos.end.base + + # Step 5: try and fetch the sequence using SeqFetcher. Dictionary an error if this fails + try: + sequence = self.sf.fetch_seq(accession, start, end) + except Exception as e: + reference['error'] = str(e) + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + # tr = ''.join(traceback.format_stack()) + tbk = [str(exc_type), str(exc_value), str(te)] + er = '\n'.join(tbk) + logger.info(er) + else: + reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) + reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) + reference['sequence'] = sequence + + # Genomic reference sequence + elif hgvs_query.type == 'g' or hgvs_query.type == 'p': + # Step 3: split the variant description into the parts required for seqfetching + accession = hgvs_query.ac + start = hgvs_query.posedit.pos.start.base - 1 + end = hgvs_query.posedit.pos.end.base + + # Step 5: try and fetch the sequence using SeqFetcher. Dictionary an error if this fails + try: + sequence = self.sf.fetch_seq(accession, start, end) + except Exception as e: + reference['error'] = str(e) + exc_type, exc_value, last_traceback = sys.exc_info() + te = traceback.format_exc() + # tr = ''.join(traceback.format_stack()) + tbk = [str(exc_type), str(exc_value), str(te)] + er = '\n'.join(tbk) + logger.info(str(exc_type) + " " + str(exc_value)) + logger.debug(er) + else: + reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) + reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) + reference['sequence'] = sequence + + # Return the resulting reference sequence or error message + return reference + + def _get_transcript_info(self, variant): + """ + Collect transcript information from a non-genomic variant. + Should only be called during the validator process + """ hgvs_vt = self.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) try: From e7041c04c02e86d7716ba77deaf12e7e5839da13 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 24 May 2019 14:28:01 +0100 Subject: [PATCH 121/223] Tidyied up genes2transcripts --- VariantValidator/modules/vvMixinCore.py | 212 +++++++++++------------- 1 file changed, 98 insertions(+), 114 deletions(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index c0b0891b..ba5a57b5 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -704,128 +704,112 @@ def gene2transcripts(self, query): :param query: string gene symbol or RefSeq ID (e.g. NANOG or NM_024865.3) :return: dictionary of transcript information """ - input = query - input = input.upper() - if re.search('\d+ORF\d+', input): - input = input.replace('ORF', 'orf') + query = query.upper() + if re.search(r'\d+ORF\d+', query): + query = query.replace('ORF', 'orf') # Quick check for blank form - if input == '': - caution = {'error': 'Please enter HGNC gene name or transcript identifier (NM_, NR_, or ENST)'} - return caution + if query == '': + return {'error': 'Please enter HGNC gene name or transcript identifier (NM_, NR_, or ENST)'} + + hgnc = query + if 'NM_' in hgnc or 'NR_' in hgnc: # or re.match('ENST', hgnc): + try: + tx_info = self.hdp.get_tx_identity_info(hgnc) + hgnc = tx_info[6] + except hgvs.exceptions.HGVSError as e: + return {'error': str(e)} + + # First perform a search against the input gene symbol or the symbol inferred from UTA + initial = fn.hgnc_rest(path="/fetch/symbol/" + hgnc) + # Check for a record + if str(initial['record']['response']['numFound']) != '0': + current_sym = hgnc + previous = initial + # No record found, is it a previous symbol? else: - hgnc = input - if re.match('NM_', hgnc) or re.match('NR_', hgnc): # or re.match('ENST', hgnc): - try: - tx_info = self.hdp.get_tx_identity_info(hgnc) - hgnc = tx_info[6] - except hgvs.exceptions.HGVSError as e: - caution = {'error': str(e)} - return caution - - # First perform a search against the input gene symbol or the symbol inferred from UTA - initial = fn.hgnc_rest(path="/fetch/symbol/" + hgnc) - # Check for a record - if str(initial['record']['response']['numFound']) != '0': + # Look up current name + current = fn.hgnc_rest(path="/search/prev_symbol/" + hgnc) + # Look for historic names + # If historic names = 0 + if str(current['record']['response']['numFound']) == '0': current_sym = hgnc - previous = initial - # No record found, is it a previous symbol? else: - # Look up current name - current = fn.hgnc_rest(path="/search/prev_symbol/" + hgnc) - # Look for historic names - # If historic names = 0 - if str(current['record']['response']['numFound']) == '0': - current_sym = hgnc - else: - current_sym = current['record']['response']['docs'][0]['symbol'] - # Look up previous symbols and gene name - # Re-set the previous variable - previous = fn.hgnc_rest(path="/fetch/symbol/" + current_sym) - - # Extract the relevant data - try: - previous_sym = previous['record']['response']['docs'][0]['prev_symbol'][0] - except: - previous_sym = current_sym + current_sym = current['record']['response']['docs'][0]['symbol'] + # Look up previous symbols and gene name + # Re-set the previous variable + previous = fn.hgnc_rest(path="/fetch/symbol/" + current_sym) + + # Extract the relevant data + if 'prev_symbol' in list(previous['record']['response']['docs'][0].keys()): + previous_sym = previous['record']['response']['docs'][0]['prev_symbol'][0] + else: + previous_sym = current_sym - # Get gene name - try: - gene_name = previous['record']['response']['docs'][0]['name'] - except: - # error = current_sym + ' is not a valid HGNC gene symbol' - gene_name = 'Gene symbol %s not found in the HGNC database of human gene names www.genenames.org' % query - return {'error': gene_name} + # Get gene name + if 'name' in list(previous['record']['response']['docs'][0].keys()): + gene_name = previous['record']['response']['docs'][0]['name'] + else: + # error = current_sym + ' is not a valid HGNC gene symbol' + gene_name = 'Gene symbol %s not found in the HGNC database of human gene names www.genenames.org' % query + return {'error': gene_name} - # Look up previous name - try: - previous_name = previous['record']['response']['docs'][0]['prev_name'][0] - except: - previous_name = gene_name - - # Get transcripts - tx_for_gene = self.hdp.get_tx_for_gene(current_sym) - if len(tx_for_gene) == 0: - tx_for_gene = self.hdp.get_tx_for_gene(previous_sym) - if len(tx_for_gene) == 0: - tx_for_gene = {'error': 'Unable to retrieve data from the UTA, please contact admin'} - return tx_for_gene - - # Loop through each transcript and get the relevant transcript description - genes_and_tx = [] - recovered_dict = {} - for line in tx_for_gene: - if re.match('^NM_', line[3]) or re.match('^NR_', line[3]): - # Transcript ID - tx = line[3] + # Look up previous name + if 'prev_name' in list(previous['record']['response']['docs'][0].keys()): + previous_name = previous['record']['response']['docs'][0]['prev_name'][0] + else: + previous_name = gene_name + + # Get transcripts + tx_for_gene = self.hdp.get_tx_for_gene(current_sym) + if len(tx_for_gene) == 0: + tx_for_gene = self.hdp.get_tx_for_gene(previous_sym) + if len(tx_for_gene) == 0: + return {'error': 'Unable to retrieve data from the UTA, please contact admin'} + + # Loop through each transcript and get the relevant transcript description + genes_and_tx = [] + recovered = [] + for line in tx_for_gene: + if line[3].startswith('NM_') or line[3].startswith('NR_'): + # Transcript ID + tx = line[3] + tx_description = self.db.get_transcript_description(tx) + if tx_description == 'none': + self.db.update_transcript_info_record(tx, self) tx_description = self.db.get_transcript_description(tx) - if tx_description == 'none': - self.db.update_transcript_info_record(tx, self) - tx_description = self.db.get_transcript_description(tx) - # Check for duplicates - if tx in list(recovered_dict.keys()): - continue + # Check for duplicates + if tx not in recovered: + recovered.append(tx) + if len(line) >= 3 and isinstance(line[1], int): + genes_and_tx.append({'reference': tx, + 'description': tx_description, + 'coding_start': line[1] + 1 + 1, + 'coding_end': line[2] + }) else: - try: - # Add to recovered_dict - recovered_dict[tx] = '' - genes_and_tx.append([tx, tx_description, line[1] + 1, line[2]]) - except: - # Add to recovered_dict - recovered_dict[tx] = '' - genes_and_tx.append([tx, tx_description, 'not applicable', 'not applicable']) - # LRG information - lrg_transcript = self.db.get_lrgTranscriptID_from_RefSeqTranscriptID(tx) - if lrg_transcript == 'none': - pass - else: - genes_and_tx.append([lrg_transcript, tx_description, line[1] + 1, line[2]]) - - cp_genes_and_tx = copy.deepcopy(genes_and_tx) - genes_and_tx = [] - for tx in cp_genes_and_tx: - if 'not applicable' in str(tx[2]): - tx_d = {'reference': tx[0], - 'description': tx[1], - 'coding_start': 'non-coding', - 'coding_end': 'non-coding' - } - else: - tx_d = {'reference': tx[0], - 'description': tx[1], - 'coding_start': tx[2] + 1, - 'coding_end': tx[3] - } - genes_and_tx.append(tx_d) - - # Return data table - g2d_data = {'current_symbol': current_sym, - 'previous_symbol': previous_sym, - 'current_name': gene_name, - 'previous_name': previous_name, - 'transcripts': genes_and_tx - } - - return g2d_data + genes_and_tx.append({'reference': tx, + 'description': tx_description, + 'coding_start': 'non-coding', + 'coding_end': 'non-coding' + }) + # LRG information + lrg_transcript = self.db.get_lrgTranscriptID_from_RefSeqTranscriptID(tx) + if lrg_transcript != 'none': + genes_and_tx.append({'reference': lrg_transcript, + 'description': tx_description, + 'coding_start': line[1] + 1 + 1, + 'coding_end': line[2] + }) + + # Return data table + g2d_data = {'current_symbol': current_sym, + 'previous_symbol': previous_sym, + 'current_name': gene_name, + 'previous_name': previous_name, + 'transcripts': genes_and_tx + } + + return g2d_data def hgvs2ref(self, query): """ From 89be3d476dc07e97fdab045509e3e924d41ac6a5 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 24 May 2019 14:41:12 +0100 Subject: [PATCH 122/223] Tidying up hgvs2ref --- VariantValidator/modules/vvMixinCore.py | 84 ++++++------------------- 1 file changed, 19 insertions(+), 65 deletions(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index ba5a57b5..43676e90 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -842,77 +842,31 @@ def hgvs2ref(self, query): # Step 4: Check for intronic sequence if hgvs_query.posedit.pos.start.offset != 0 and hgvs_query.posedit.pos.end.offset != 0: reference['warning'] = 'Intronic sequence variation: Use genomic reference sequence' + return reference + elif hgvs_query.posedit.pos.start.offset != 0 or hgvs_query.posedit.pos.end.offset != 0: reference['warning'] = 'Partial intronic sequence variation: Returning exonic and/or UTR sequence only' - # Step 3: split the variant description into the parts required for seqfetching - accession = hgvs_query.ac - start = hgvs_query.posedit.pos.start.base - 1 - end = hgvs_query.posedit.pos.end.base + elif hgvs_query.type != 'g' and hgvs_query.type != 'p': + return reference - # Step 5: try and fetch the sequence using SeqFetcher. Dictionary an error if this fails - try: - sequence = self.sf.fetch_seq(accession, start, end) - except Exception as e: - reference['error'] = str(e) - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - # tr = ''.join(traceback.format_stack()) - tbk = [str(exc_type), str(exc_value), str(te)] - er = '\n'.join(tbk) - logger.info(str(exc_type) + " " + str(exc_value)) - logger.debug(er) - else: - reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) - reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) - reference['sequence'] = sequence - else: - # Step 3: split the variant description into the parts required for seqfetching - accession = hgvs_query.ac - start = hgvs_query.posedit.pos.start.base - 1 - end = hgvs_query.posedit.pos.end.base + # Step 3: split the variant description into the parts required for seqfetching + accession = hgvs_query.ac + start = hgvs_query.posedit.pos.start.base - 1 + end = hgvs_query.posedit.pos.end.base - # Step 5: try and fetch the sequence using SeqFetcher. Dictionary an error if this fails - try: - sequence = self.sf.fetch_seq(accession, start, end) - except Exception as e: - reference['error'] = str(e) - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - # tr = ''.join(traceback.format_stack()) - tbk = [str(exc_type), str(exc_value), str(te)] - er = '\n'.join(tbk) - logger.info(er) - else: - reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) - reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) - reference['sequence'] = sequence - - # Genomic reference sequence - elif hgvs_query.type == 'g' or hgvs_query.type == 'p': - # Step 3: split the variant description into the parts required for seqfetching - accession = hgvs_query.ac - start = hgvs_query.posedit.pos.start.base - 1 - end = hgvs_query.posedit.pos.end.base - - # Step 5: try and fetch the sequence using SeqFetcher. Dictionary an error if this fails - try: - sequence = self.sf.fetch_seq(accession, start, end) - except Exception as e: - reference['error'] = str(e) - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - # tr = ''.join(traceback.format_stack()) - tbk = [str(exc_type), str(exc_value), str(te)] - er = '\n'.join(tbk) - logger.info(str(exc_type) + " " + str(exc_value)) - logger.debug(er) - else: - reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) - reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) - reference['sequence'] = sequence + # Step 5: try and fetch the sequence using SeqFetcher. Dictionary an error if this fails + try: + sequence = self.sf.fetch_seq(accession, start, end) + except Exception as e: + reference['error'] = str(e) + logger.warning(str(e)) + else: + reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) + reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) + reference['sequence'] = sequence - # Return the resulting reference sequence or error message + # Return the resulting reference sequence and error message return reference def _get_transcript_info(self, variant): From bb9d36194435acd208906dac8c384f3cd68479f8 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 24 May 2019 16:02:03 +0100 Subject: [PATCH 123/223] Added try except so erorr with db pool closing is hidden --- VariantValidator/modules/vvDBInit.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/VariantValidator/modules/vvDBInit.py b/VariantValidator/modules/vvDBInit.py index 3cbbcdf3..6740e520 100644 --- a/VariantValidator/modules/vvDBInit.py +++ b/VariantValidator/modules/vvDBInit.py @@ -20,8 +20,11 @@ def __init__(self, dbConfig): self.conn = self.pool.get_connection() def __del__(self): - if self.conn: - self.conn.close() + if self.conn.is_connected(): + try: + self.conn.close() + except mysql.connector.errors.NotSupportedError: + pass self.conn = None if self.pool: self.pool = None From a3c3e4801ba2c9c2503c42e06a184127f4641171 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 24 May 2019 16:17:01 +0100 Subject: [PATCH 124/223] Cleaned up vvChromosomes --- VariantValidator/modules/mappers.py | 2 +- VariantValidator/modules/vvChromosomes.py | 1143 ++++++++++----------- VariantValidator/modules/vvMixinCore.py | 2 +- 3 files changed, 571 insertions(+), 576 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 36e315bb..2775d096 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -110,7 +110,7 @@ def gene_to_transcripts(variant, validator): # Chromosome build is not supported or intergenic??? else: sfm = vvChromosomes.supported_for_mapping(variant.hgvs_genomic.ac, variant.primary_assembly) - if sfm == 'true': + if sfm: try: validator.vr.validate(variant.hgvs_genomic) except hgvs.exceptions.HGVSError as e: diff --git a/VariantValidator/modules/vvChromosomes.py b/VariantValidator/modules/vvChromosomes.py index 78b7ee49..a8528bc3 100644 --- a/VariantValidator/modules/vvChromosomes.py +++ b/VariantValidator/modules/vvChromosomes.py @@ -8,24 +8,23 @@ def supported_for_mapping(ac, primary_assembly): - sfm = 'false' + sfm = False test_grc = to_chr_num_refseq(ac, primary_assembly) if test_grc is not None: - sfm = 'true' + sfm = True test_ucsc = to_chr_num_ucsc(ac, primary_assembly) if test_ucsc is not None: - sfm = 'true' + sfm = True return sfm def to_accession(chr_num, primary_assembly): - ''' + """ Available genome builds - :param chr_num: :param primary_assembly: :return: - ''' + """ GRCh37 = { "1": "NC_000001.10", "2": "NC_000002.11", @@ -2110,15 +2109,14 @@ def to_chr_num_ucsc(accession, primary_assembly): "NT_187667.1": "chrX_KI270913v1_alt", "NT_187395.1": "chrY_KI270740v1_random" } + chr_num = None if primary_assembly == 'hg38': chr_num = chr_num_convert_38.get(accession) if primary_assembly == 'hg19': chr_num = chr_num_convert_37.get(accession) - try: - return chr_num - except UnboundLocalError: - chr_num = None - return chr_num + + return chr_num + def to_chr_num_refseq(accession, primary_assembly): """ @@ -2802,15 +2800,13 @@ def to_chr_num_refseq(accession, primary_assembly): 'NT_113949.2': 'HSCHR19KIR_RP5_B_HAP_CTG3_1', 'NT_167235.1': 'HSCHR22_CTG1_3' } + chr_num = None if primary_assembly == 'GRCh38' or primary_assembly == 'hg38': chr_num = chr_num_convert_38.get(accession) if primary_assembly == 'GRCh37' or primary_assembly == 'hg19': chr_num = chr_num_convert_37.get(accession) - try: - return chr_num - except UnboundLocalError: - chr_num = None - return chr_num + return chr_num + # from gap_genes def gap_black_list(symbol): @@ -2818,562 +2814,561 @@ def gap_black_list(symbol): Lists of genes for GRCh37 and GRCh38 which require a gap to be inserted into either the transcript or the genome to maintain a perfect alignment """ - gapGene = { - "LPP": "", - "VPS13D": "", - "SSPO": "", - "HTT": "", - "PRKDC": "", - "RNA45SN4": "", - "RNA45SN1": "", - "RNA45SN2": "", - "RNA45SN3": "", - "ALMS1": "", - "ZNF141": "", - "PRLR": "", - "NBPF10": "", - "ACACA": "", - "ZMYM2": "", - "MIAT": "", - "WDFY4": "", - "CECR2": "", - "FAM30A": "", - "MYO15B": "", - "CELF2": "", - "JRK": "", - "PTEN": "", - "ZNF714": "", - "MGAT4C": "", - "SLITRK4": "", - "ZAN": "", - "COL19A1": "", - "CCDC144B": "", - "RAB11FIP4": "", - "ZNF516": "", - "ZNF518A": "", - "PROX1": "", - "HCG18": "", - "SON": "", - "ARMC9": "", - "CAMK1D": "", - "GRIP2": "", - "KLHL5": "", - "PPIP5K2": "", - "PKD1L2": "", - "SLC7A2": "", - "DGKK": "", - "IQSEC1": "", - "SYNM": "", - "SARM1": "", - "SMAD5": "", - "MAML3": "", - "CXorf40A": "", - "MAPT": "", - "ITIH5": "", - "NOTCH4": "", - "FER1L4": "", - "CNTNAP4": "", - "NLRC3": "", - "COL18A1": "", - "SLC6A6": "", - "DDX52": "", - "CDH4": "", - "SLC46A1": "", - "SLC35E2B": "", - "OCLN": "", - "DCAF7": "", - "SCAMP1": "", - "ATG13": "", - "SMAD3": "", - "DDX6": "", - "SLC25A53": "", - "ALG9": "", - "DCP1A": "", - "NCAM1": "", - "LINC00869": "", - "MYH7": "", - "DIXDC1": "", - "ZBTB4": "", - "RABEP1": "", - "PVR": "", - "POM121C": "", - "HOOK1": "", - "MAPK8IP2": "", - "ZNF280B": "", - "WASF2": "", - "PLEKHA2": "", - "PPP4R3B": "", - "FAM83H": "", - "SALL3": "", - "PHKG2": "", - "C18orf25": "", - "ZNF229": "", - "ZNF765-ZNF761": "", - "KANSL1": "", - "FAM102B": "", - "NOTCH2NL": "", - "YTHDF3": "", - "DPCR1": "", - "DACH1": "", - "PKD1L3": "", - "GRIA3": "", - "CYP1B1": "", - "LTBP4": "", - "SPON1": "", - "RNA28SN4": "", - "RNA28SN1": "", - "TRIL": "", - "RNA28SN3": "", - "RNA28SN2": "", - "XKR5": "", - "RBM8A": "", - "SALL2": "", - "JADE3": "", - "DHX57": "", - "PIGN": "", - "CPNE3": "", - "ANO1": "", - "NATD1": "", - "DKFZP434A062": "", - "TDRD9": "", - "BDNF": "", - "IVD": "", - "STIMATE": "", - "KCP": "", - "PRAG1": "", - "KLHL18": "", - "LYNX1": "", - "HYOU1": "", - "HLA-L": "", - "ATG9B": "", - "SLC6A14": "", - "PCSK6": "", - "MIR99AHG": "", - "TOX4": "", - "GABBR1": "", - "RABGEF1": "", - "PRR36": "", - "MAP3K14": "", - "PCDHB9": "", - "LOC102723753": "", - "MYO19": "", - "SRSF8": "", - "CTPS2": "", - "AHCYL1": "", - "UHRF1": "", - "MARCKS": "", - "ZMYM1": "", - "SENP3-EIF4A1": "", - "SEC14L2": "", - "RAPGEFL1": "", - "ZNF761": "", - "CNTROB": "", - "SSTR3": "", - "PAX2": "", - "GGA3": "", - "MCL1": "", - "EPS8": "", - "LINC02210": "", - "KRBA1": "", - "MSH5-SAPCD1": "", - "HLA-DPB1": "", - "PPP1R9B": "", - "OPLAH": "", - "UBXN4": "", - "ZNF2": "", - "EPHB6": "", - "LIX1L": "", - "RAPGEF4": "", - "MED22": "", - "POLR3C": "", - "DDR1": "", - "SIGLEC16": "", - "NEFL": "", - "ABCG4": "", - "BAG6": "", - "RECQL4": "", - "SPPL2B": "", - "RETREG3": "", - "FZD6": "", - "SCRT1": "", - "LSM14A": "", - "TAPBP": "", - "TWSG1": "", - "FRMD8": "", - "VPS26C": "", - "PNMA3": "", - "ZNF282": "", - "SP8": "", - "SRRM3": "", - "CCDC125": "", - "NPIPB3": "", - "FAM13C": "", - "GTF2IP1": "", - "ANKRD34A": "", - "PPP1R2": "", - "PHYHIPL": "", - "USH1G": "", - "LINC00461": "", - "ZNRD1ASP": "", - "TRIM10": "", - "SPIB": "", - "BCL6B": "", - "SCARF2": "", - "KIR3DX1": "", - "LOC400682": "", - "HLA-DOA": "", - "PLCD3": "", - "VPS11": "", - "FAM231D": "", - "TRIM52": "", - "ABCF1": "", - "ANP32E": "", - "COPG2IT1": "", - "TGIF2": "", - "LHX1": "", - "PIK3R6": "", - "APOL4": "", - "ZNF502": "", - "FGD5P1": "", - "LINC00624": "", - "ADRA2B": "", - "ZNF598": "", - "GNAZ": "", - "TMEM106A": "", - "SLC12A9": "", - "TCF19": "", - "CCDC3": "", - "EFHC2": "", - "KCNE1B": "", - "PBX2": "", - "PAMR1": "", - "GJA5": "", - "TYW1B": "", - "PLP1": "", - "ANKDD1A": "", - "GBE1": "", - "MAMDC2": "", - "PIGW": "", - "MOCOS": "", - "GRIPAP1": "", - "COL26A1": "", - "MAPT-IT1": "", - "SRRT": "", - "ZNF595": "", - "SEMA3B": "", - "C21orf58": "", - "RHBDF1": "", - "EGR2": "", - "ABRAXAS2": "", - "NPRL3": "", - "TXNIP": "", - "RYK": "", - "RXRB": "", - "LILRB2": "", - "SYT3": "", - "TRPV6": "", - "PARG": "", - "CSNK1G2": "", - "ARHGEF16": "", - "HSH2D": "", - "ALDH3B1": "", - "ZNF274": "", - "MUC13": "", - "LINC00842": "", - "AKT1": "", - "CHM": "", - "ZSCAN26": "", - "MAL2": "", - "PTH2R": "", - "GPANK1": "", - "LINC01623": "", - "CD86": "", - "RHBG": "", - "TMSB15B": "", - "ZCCHC3": "", - "TUBB": "", - "POLDIP2": "", - "PRMT3": "", - "PPT2-EGFL8": "", - "LINC02210-CRHR1": "", - "KIFC1": "", - "USP27X": "", - "HDGFL2": "", - "FOXI3": "", - "PAH": "", - "P3H3": "", - "CRHR1": "", - "LOC101927759": "", - "ARFRP1": "", - "C3orf38": "", - "DAXX": "", - "SLC37A4": "", - "IQCA1L": "", - "MMP28": "", - "LINC02197": "", - "NECAP1": "", - "CDSN": "", - "LOC440570": "", - "B3GNT6": "", - "AOAH": "", - "GAS2L1": "", - "MPIG6B": "", - "CDK11B": "", - "ASPN": "", - "HSPA1B": "", - "LOC100508631": "", - "MICB": "", - "LOC102724580": "", - "SENP3": "", - "RBM38": "", - "TMC4": "", - "LILRB5": "", - "C6orf47": "", - "RIOX1": "", - "BHLHE40-AS1": "", - "SRD5A2": "", - "TSEN34": "", - "EI24": "", - "PADI6": "", - "LINC00893": "", - "CYP2D7": "", - "LINC01622": "", - "LINC01879": "", - "REC8": "", - "UNC93B1": "", - "POU5F1": "", - "GPIHBP1": "", - "FOXD1": "", - "GPSM1": "", - "MICA": "", - "UGT2B15": "", - "KIZ": "", - "ARL17A": "", - "PRAMEF36P": "", - "HCG22": "", - "RNF39": "", - "BECN1": "", - "MOG": "", - "PROSER3": "", - "LINC01149": "", - "CYP21A2": "", - "PRAMEF18": "", - "TBC1D3G": "", - "NR2E3": "", - "NR1H2": "", - "VEGFC": "", - "TBC1D3F": "", - "C18orf65": "", - "HOXC11": "", - "TRY2P": "", - "LINC01138": "", - "LINC00243": "", - "HCG4": "", - "GBAP1": "", - "LYPD4": "", - "FAM226A": "", - "ZNF787": "", - "CYP11A1": "", - "EEF1A2": "", - "SLC38A5": "", - "MICB-DT": "", - "ZNF852": "", - "LOC441242": "", - "RNF115": "", - "SMA4": "", - "TAZ": "", - "LENG9": "", - "STRAP": "", - "CYP4F8": "", - "TSPAN10": "", - "KIR3DL1": "", - "HCP5B": "", - "MMP12": "", - "STAG3L2": "", - "GOLGA6L17P": "", - "ZBTB12": "", - "TREH": "", - "PMCHL2": "", - "LAGE3": "", - "ATRNL1": "", - "CEACAM20": "", - "ZG16": "", - "MIR3936HG": "", - "LOC102724562": "", - "INTS4P2": "", - "LINC00221": "", - "DHRS3": "", - "HCG27": "", - "CLTB": "", - "KLK6": "", - "HLA-H": "", - "SPANXA2-OT1": "", - "PRAMEF11": "", - "PPP1R11": "", - "NDUFA6-AS1": "", - "ECHDC3": "", - "HLA-DQB1": "", - "KIR2DS4": "", - "HLA-B": "", - "LOC102725121": "", - "CIB2": "", - "KIR2DL1": "", - "KIR2DL2": "", - "HLA-C": "", - "ABO": "", - "KRTAP10-7": "", - "HLA-G": "", - "CWC15": "", - "C17orf100": "", - "HLA-J": "", - "OR4K3": "", - "HLA-DQA1": "", - "LOC105379550": "", - "MRPS21": "", - "SIGLEC17P": "", - "LINC01115": "", - "NUDT18": "", - "ORAI1": "", - "PNLIPRP2": "", - "KLF14": "", - "SSX2B": "", - "CCL15-CCL14": "", - "UBXN8": "", - "IGFBP2": "", - "TMEM44-AS1": "", - "TEX13A": "", - "LCA10": "", - "SPANXN2": "", - "SYCE1": "", - "LILRA5": "", - "KRTAP5-4": "", - "FAM228B": "", - "OR12D1": "", - "SPC25": "", - "FCGR1CP": "", - "OR52E1": "", - "NOP16": "", - "EGFL8": "", - "PRAF2": "", - "LOC388282": "", - "CCNQ": "", - "VN1R3": "", - "HLA-V": "", - "SBK3": "", - "LOC100128594": "", - "KLRF1": "", - "EMG1": "", - "TARM1": "", - "UBE2NL": "", - "OR5AL1": "", - "TPSB2": "", - "PSORS1C2": "", - "HLA-DQA2": "", - "OR10AC1": "", - "OR2J1": "", - "OR10J4": "", - "CSNK2B": "", - "OR4Q2": "", - "LOC100507547": "", - "ZNF630-AS1": "", - "HLA-DMA": "", - "OR4E1": "", - "PRB3": "", - "CCL15": "", - "C8orf59": "", - "PSMB9": "", - "LINC01719": "", - "CT45A1": "", - "BST2": "", - "NCF4-AS1": "", - "FOLR3": "", - "KRTAP9-9": "", - "COPZ2": "", - "LYNX1-SLURP2": "", - "SAPCD1": "", - "PSORS1C1": "", - "ZNF793-AS1": "", - "ZNRD1": "", - "FRG1CP": "", - "LINC02362": "", - "KRTAP4-1": "", - "PICSAR": "", - "TWIST2": "", - "LINC01796": "", - "HCG25": "", - "KRTAP7-1": "", - "CRLF2": "", - "MDH2": "", - "HCG9": "", - "ATP5MC1": "", - "TTTY14": "", - "LOC100507384": "", - "PMS2P2": "", - "HCG23": "", - "LINC00226": "", - "RPP21": "", - "GPHB5": "", - "GAGE8": "", - "GAGE2E": "", - "LOC101928087": "", - "GAGE12B": "", - "GRIFIN": "", - "LOC102725193": "", - "HCG14": "", - "IFITM4P": "", - "SNORD48": "", - "MUC22": "", - "PTPRQ": "", - "HERC2": "", - "OTUD7A": "", - "LOC646214": "", - "TJP1": "", - "WDR81": "", - "KLF13": "", - "POLR2A": "", - "LOC100288637": "", - "GOLGA8N": "", - "GOLGA8J": "", - "GOLGA8K": "", - "GOLGA8R": "", - "MTMR10": "", - "SMIM10L1": "", - "KLLN": "", - "LINC02249": "", - "APBA2": "", - "CHRNA7": "", - "DBET": "", - "WNT3": "", - "GOLGA2P10": "", - "CHRFAM7A": "", - "RPH3AL": "", - "SORD2P": "", - "LINC00552": "", - "MPV17L": "", - "SLC22A18AS": "", - "C16orf45": "", - "GRK1": "", - "FRG2": "", - "LOC143666": "", - "FRG2EP": "", - "LOC105373100": "", - "GOLGA8Q": "", - "HERC2P7": "", - "SLC22A18": "", - "METRNL": "", - "BTNL2": "", - "ADAM18": "", - "PRSS22": "", - "C2orf27B": "", - "C2orf27A": "", - "LOC283710": "", - "LOC101928804": "", - "IFI27": "", - "ABCC6": "", - "LOC692247": "" - } - is_it_gapped = gapGene.get(symbol) - if is_it_gapped == '': + gapGene = [ + "LPP", + "VPS13D", + "SSPO", + "HTT", + "PRKDC", + "RNA45SN4", + "RNA45SN1", + "RNA45SN2", + "RNA45SN3", + "ALMS1", + "ZNF141", + "PRLR", + "NBPF10", + "ACACA", + "ZMYM2", + "MIAT", + "WDFY4", + "CECR2", + "FAM30A", + "MYO15B", + "CELF2", + "JRK", + "PTEN", + "ZNF714", + "MGAT4C", + "SLITRK4", + "ZAN", + "COL19A1", + "CCDC144B", + "RAB11FIP4", + "ZNF516", + "ZNF518A", + "PROX1", + "HCG18", + "SON", + "ARMC9", + "CAMK1D", + "GRIP2", + "KLHL5", + "PPIP5K2", + "PKD1L2", + "SLC7A2", + "DGKK", + "IQSEC1", + "SYNM", + "SARM1", + "SMAD5", + "MAML3", + "CXorf40A", + "MAPT", + "ITIH5", + "NOTCH4", + "FER1L4", + "CNTNAP4", + "NLRC3", + "COL18A1", + "SLC6A6", + "DDX52", + "CDH4", + "SLC46A1", + "SLC35E2B", + "OCLN", + "DCAF7", + "SCAMP1", + "ATG13", + "SMAD3", + "DDX6", + "SLC25A53", + "ALG9", + "DCP1A", + "NCAM1", + "LINC00869", + "MYH7", + "DIXDC1", + "ZBTB4", + "RABEP1", + "PVR", + "POM121C", + "HOOK1", + "MAPK8IP2", + "ZNF280B", + "WASF2", + "PLEKHA2", + "PPP4R3B", + "FAM83H", + "SALL3", + "PHKG2", + "C18orf25", + "ZNF229", + "ZNF765-ZNF761", + "KANSL1", + "FAM102B", + "NOTCH2NL", + "YTHDF3", + "DPCR1", + "DACH1", + "PKD1L3", + "GRIA3", + "CYP1B1", + "LTBP4", + "SPON1", + "RNA28SN4", + "RNA28SN1", + "TRIL", + "RNA28SN3", + "RNA28SN2", + "XKR5", + "RBM8A", + "SALL2", + "JADE3", + "DHX57", + "PIGN", + "CPNE3", + "ANO1", + "NATD1", + "DKFZP434A062", + "TDRD9", + "BDNF", + "IVD", + "STIMATE", + "KCP", + "PRAG1", + "KLHL18", + "LYNX1", + "HYOU1", + "HLA-L", + "ATG9B", + "SLC6A14", + "PCSK6", + "MIR99AHG", + "TOX4", + "GABBR1", + "RABGEF1", + "PRR36", + "MAP3K14", + "PCDHB9", + "LOC102723753", + "MYO19", + "SRSF8", + "CTPS2", + "AHCYL1", + "UHRF1", + "MARCKS", + "ZMYM1", + "SENP3-EIF4A1", + "SEC14L2", + "RAPGEFL1", + "ZNF761", + "CNTROB", + "SSTR3", + "PAX2", + "GGA3", + "MCL1", + "EPS8", + "LINC02210", + "KRBA1", + "MSH5-SAPCD1", + "HLA-DPB1", + "PPP1R9B", + "OPLAH", + "UBXN4", + "ZNF2", + "EPHB6", + "LIX1L", + "RAPGEF4", + "MED22", + "POLR3C", + "DDR1", + "SIGLEC16", + "NEFL", + "ABCG4", + "BAG6", + "RECQL4", + "SPPL2B", + "RETREG3", + "FZD6", + "SCRT1", + "LSM14A", + "TAPBP", + "TWSG1", + "FRMD8", + "VPS26C", + "PNMA3", + "ZNF282", + "SP8", + "SRRM3", + "CCDC125", + "NPIPB3", + "FAM13C", + "GTF2IP1", + "ANKRD34A", + "PPP1R2", + "PHYHIPL", + "USH1G", + "LINC00461", + "ZNRD1ASP", + "TRIM10", + "SPIB", + "BCL6B", + "SCARF2", + "KIR3DX1", + "LOC400682", + "HLA-DOA", + "PLCD3", + "VPS11", + "FAM231D", + "TRIM52", + "ABCF1", + "ANP32E", + "COPG2IT1", + "TGIF2", + "LHX1", + "PIK3R6", + "APOL4", + "ZNF502", + "FGD5P1", + "LINC00624", + "ADRA2B", + "ZNF598", + "GNAZ", + "TMEM106A", + "SLC12A9", + "TCF19", + "CCDC3", + "EFHC2", + "KCNE1B", + "PBX2", + "PAMR1", + "GJA5", + "TYW1B", + "PLP1", + "ANKDD1A", + "GBE1", + "MAMDC2", + "PIGW", + "MOCOS", + "GRIPAP1", + "COL26A1", + "MAPT-IT1", + "SRRT", + "ZNF595", + "SEMA3B", + "C21orf58", + "RHBDF1", + "EGR2", + "ABRAXAS2", + "NPRL3", + "TXNIP", + "RYK", + "RXRB", + "LILRB2", + "SYT3", + "TRPV6", + "PARG", + "CSNK1G2", + "ARHGEF16", + "HSH2D", + "ALDH3B1", + "ZNF274", + "MUC13", + "LINC00842", + "AKT1", + "CHM", + "ZSCAN26", + "MAL2", + "PTH2R", + "GPANK1", + "LINC01623", + "CD86", + "RHBG", + "TMSB15B", + "ZCCHC3", + "TUBB", + "POLDIP2", + "PRMT3", + "PPT2-EGFL8", + "LINC02210-CRHR1", + "KIFC1", + "USP27X", + "HDGFL2", + "FOXI3", + "PAH", + "P3H3", + "CRHR1", + "LOC101927759", + "ARFRP1", + "C3orf38", + "DAXX", + "SLC37A4", + "IQCA1L", + "MMP28", + "LINC02197", + "NECAP1", + "CDSN", + "LOC440570", + "B3GNT6", + "AOAH", + "GAS2L1", + "MPIG6B", + "CDK11B", + "ASPN", + "HSPA1B", + "LOC100508631", + "MICB", + "LOC102724580", + "SENP3", + "RBM38", + "TMC4", + "LILRB5", + "C6orf47", + "RIOX1", + "BHLHE40-AS1", + "SRD5A2", + "TSEN34", + "EI24", + "PADI6", + "LINC00893", + "CYP2D7", + "LINC01622", + "LINC01879", + "REC8", + "UNC93B1", + "POU5F1", + "GPIHBP1", + "FOXD1", + "GPSM1", + "MICA", + "UGT2B15", + "KIZ", + "ARL17A", + "PRAMEF36P", + "HCG22", + "RNF39", + "BECN1", + "MOG", + "PROSER3", + "LINC01149", + "CYP21A2", + "PRAMEF18", + "TBC1D3G", + "NR2E3", + "NR1H2", + "VEGFC", + "TBC1D3F", + "C18orf65", + "HOXC11", + "TRY2P", + "LINC01138", + "LINC00243", + "HCG4", + "GBAP1", + "LYPD4", + "FAM226A", + "ZNF787", + "CYP11A1", + "EEF1A2", + "SLC38A5", + "MICB-DT", + "ZNF852", + "LOC441242", + "RNF115", + "SMA4", + "TAZ", + "LENG9", + "STRAP", + "CYP4F8", + "TSPAN10", + "KIR3DL1", + "HCP5B", + "MMP12", + "STAG3L2", + "GOLGA6L17P", + "ZBTB12", + "TREH", + "PMCHL2", + "LAGE3", + "ATRNL1", + "CEACAM20", + "ZG16", + "MIR3936HG", + "LOC102724562", + "INTS4P2", + "LINC00221", + "DHRS3", + "HCG27", + "CLTB", + "KLK6", + "HLA-H", + "SPANXA2-OT1", + "PRAMEF11", + "PPP1R11", + "NDUFA6-AS1", + "ECHDC3", + "HLA-DQB1", + "KIR2DS4", + "HLA-B", + "LOC102725121", + "CIB2", + "KIR2DL1", + "KIR2DL2", + "HLA-C", + "ABO", + "KRTAP10-7", + "HLA-G", + "CWC15", + "C17orf100", + "HLA-J", + "OR4K3", + "HLA-DQA1", + "LOC105379550", + "MRPS21", + "SIGLEC17P", + "LINC01115", + "NUDT18", + "ORAI1", + "PNLIPRP2", + "KLF14", + "SSX2B", + "CCL15-CCL14", + "UBXN8", + "IGFBP2", + "TMEM44-AS1", + "TEX13A", + "LCA10", + "SPANXN2", + "SYCE1", + "LILRA5", + "KRTAP5-4", + "FAM228B", + "OR12D1", + "SPC25", + "FCGR1CP", + "OR52E1", + "NOP16", + "EGFL8", + "PRAF2", + "LOC388282", + "CCNQ", + "VN1R3", + "HLA-V", + "SBK3", + "LOC100128594", + "KLRF1", + "EMG1", + "TARM1", + "UBE2NL", + "OR5AL1", + "TPSB2", + "PSORS1C2", + "HLA-DQA2", + "OR10AC1", + "OR2J1", + "OR10J4", + "CSNK2B", + "OR4Q2", + "LOC100507547", + "ZNF630-AS1", + "HLA-DMA", + "OR4E1", + "PRB3", + "CCL15", + "C8orf59", + "PSMB9", + "LINC01719", + "CT45A1", + "BST2", + "NCF4-AS1", + "FOLR3", + "KRTAP9-9", + "COPZ2", + "LYNX1-SLURP2", + "SAPCD1", + "PSORS1C1", + "ZNF793-AS1", + "ZNRD1", + "FRG1CP", + "LINC02362", + "KRTAP4-1", + "PICSAR", + "TWIST2", + "LINC01796", + "HCG25", + "KRTAP7-1", + "CRLF2", + "MDH2", + "HCG9", + "ATP5MC1", + "TTTY14", + "LOC100507384", + "PMS2P2", + "HCG23", + "LINC00226", + "RPP21", + "GPHB5", + "GAGE8", + "GAGE2E", + "LOC101928087", + "GAGE12B", + "GRIFIN", + "LOC102725193", + "HCG14", + "IFITM4P", + "SNORD48", + "MUC22", + "PTPRQ", + "HERC2", + "OTUD7A", + "LOC646214", + "TJP1", + "WDR81", + "KLF13", + "POLR2A", + "LOC100288637", + "GOLGA8N", + "GOLGA8J", + "GOLGA8K", + "GOLGA8R", + "MTMR10", + "SMIM10L1", + "KLLN", + "LINC02249", + "APBA2", + "CHRNA7", + "DBET", + "WNT3", + "GOLGA2P10", + "CHRFAM7A", + "RPH3AL", + "SORD2P", + "LINC00552", + "MPV17L", + "SLC22A18AS", + "C16orf45", + "GRK1", + "FRG2", + "LOC143666", + "FRG2EP", + "LOC105373100", + "GOLGA8Q", + "HERC2P7", + "SLC22A18", + "METRNL", + "BTNL2", + "ADAM18", + "PRSS22", + "C2orf27B", + "C2orf27A", + "LOC283710", + "LOC101928804", + "IFI27", + "ABCC6", + "LOC692247" + ] + if symbol in gapGene: return True - else: - return False \ No newline at end of file + + return False diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 43676e90..bd7c95ef 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -560,7 +560,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue for build in self.genome_builds: test = vvChromosomes.supported_for_mapping(alt_gen_var.ac, build) - if test == 'true': + if test: try: vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, variant.reverse_normalizer, self.sf) From a1ce5baccb87b2bd4980f7d8077fc9475768d384 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 24 May 2019 16:18:06 +0100 Subject: [PATCH 125/223] Refactored vvChromosomes into seq_data filename --- VariantValidator/modules/format_converters.py | 6 +-- VariantValidator/modules/mappers.py | 8 ++-- .../modules/{vvChromosomes.py => seq_data.py} | 0 VariantValidator/modules/vvHGVS.py | 16 +++---- VariantValidator/modules/vvLiftover.py | 16 +++---- VariantValidator/modules/vvMixinConverters.py | 46 +++++++++---------- VariantValidator/modules/vvMixinCore.py | 4 +- 7 files changed, 48 insertions(+), 48 deletions(-) rename VariantValidator/modules/{vvChromosomes.py => seq_data.py} (100%) diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index 64a2e054..8518fd0c 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -3,7 +3,7 @@ import copy from .vvLogging import logger from .variant import Variant -from . import vvChromosomes +from . import seq_data from . import vvFunctions as fn @@ -148,7 +148,7 @@ def vcf2hgvs_stage2(variant, validator): if re.match('CHR', chr_num): chr_num = chr_num.replace('CHR', '') # Use selected assembly - accession = vvChromosomes.to_accession(chr_num, validator.selected_assembly) + accession = seq_data.to_accession(chr_num, validator.selected_assembly) if accession is None: variant.warnings.append(chr_num + ' is not part of genome build ' + validator.selected_assembly) logger.warning(chr_num + ' is not part of genome build ' + validator.selected_assembly) @@ -240,7 +240,7 @@ def vcf2hgvs_stage3(variant, validator): chr_num = chr_num.upper().strip() if re.match('CHR', chr_num): chr_num = chr_num.replace('CHR', '') # Use selected assembly - accession = vvChromosomes.to_accession(chr_num, validator.selected_assembly) + accession = seq_data.to_accession(chr_num, validator.selected_assembly) if accession is None: variant.warnings.append(chr_num + ' is not part of genome build ' + validator.selected_assembly) skipvar = True diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 2775d096..4624deb0 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -5,7 +5,7 @@ from .vvLogging import logger from . import vvHGVS from .variant import Variant -from . import vvChromosomes +from . import seq_data from . import vvFunctions as fn from . import gapped_mapping @@ -109,7 +109,7 @@ def gene_to_transcripts(variant, validator): # Chromosome build is not supported or intergenic??? else: - sfm = vvChromosomes.supported_for_mapping(variant.hgvs_genomic.ac, variant.primary_assembly) + sfm = seq_data.supported_for_mapping(variant.hgvs_genomic.ac, variant.primary_assembly) if sfm: try: validator.vr.validate(variant.hgvs_genomic) @@ -640,7 +640,7 @@ def transcripts_to_gene(variant, validator): # Gap gene black list if variant.gene_symbol: # If the gene symbol is not in the list, the value False will be returned - gap_compensation = vvChromosomes.gap_black_list(variant.gene_symbol) + gap_compensation = seq_data.gap_black_list(variant.gene_symbol) # Intron spanning variants if 'boundary' in str(error) or 'spanning' in str(error): @@ -858,7 +858,7 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): # Gap gene black list if variant.gene_symbol: # If the gene symbol is not in the list, the value False will be returned - gap_compensation = vvChromosomes.gap_black_list(variant.gene_symbol) + gap_compensation = seq_data.gap_black_list(variant.gene_symbol) # Look for variants spanning introns try: diff --git a/VariantValidator/modules/vvChromosomes.py b/VariantValidator/modules/seq_data.py similarity index 100% rename from VariantValidator/modules/vvChromosomes.py rename to VariantValidator/modules/seq_data.py diff --git a/VariantValidator/modules/vvHGVS.py b/VariantValidator/modules/vvHGVS.py index 8ef731fd..2a78fe89 100644 --- a/VariantValidator/modules/vvHGVS.py +++ b/VariantValidator/modules/vvHGVS.py @@ -6,7 +6,7 @@ # Import modules import re import copy -from . import vvChromosomes +from . import seq_data # Import Biopython modules from Bio.Seq import Seq @@ -62,7 +62,7 @@ def pvcf_to_hgvs(input, selected_assembly, normalization_direction, reverse_norm if re.match('CHR', chr_num): chr_num = chr_num.replace('CHR', '') # Use selected assembly - accession = vvChromosomes.to_accession(chr_num, selected_assembly) + accession = seq_data.to_accession(chr_num, selected_assembly) if accession is None: error = chr_num + ' is not part of genome build ' + selected_assembly + ' or is not supported' raise pseudoVCF2HGVSError(error) @@ -199,7 +199,7 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # Chr - chr = vvChromosomes.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) + chr = seq_data.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) if chr is not None: pass else: @@ -371,14 +371,14 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): grc_pa = 'GRCh38' # UCSC Chr - ucsc_chr = vvChromosomes.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, ucsc_pa) + ucsc_chr = seq_data.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, ucsc_pa) if ucsc_chr is not None: pass else: ucsc_chr = reverse_normalized_hgvs_genomic.ac # GRC Chr - grc_chr = vvChromosomes.to_chr_num_refseq(reverse_normalized_hgvs_genomic.ac, grc_pa) + grc_chr = seq_data.to_chr_num_refseq(reverse_normalized_hgvs_genomic.ac, grc_pa) if grc_chr is not None: pass else: @@ -533,7 +533,7 @@ def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # Chr - chr = vvChromosomes.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) + chr = seq_data.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) if chr is not None: pass else: @@ -671,7 +671,7 @@ def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): normalized_hgvs_genomic = hn.normalize(hgvs_genomic_variant) # Chr - chr = vvChromosomes.to_chr_num_ucsc(normalized_hgvs_genomic.ac, primary_assembly) + chr = seq_data.to_chr_num_ucsc(normalized_hgvs_genomic.ac, primary_assembly) if chr is not None: pass else: @@ -826,7 +826,7 @@ def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # Chr - chr = vvChromosomes.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) + chr = seq_data.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) if chr is not None: pass else: diff --git a/VariantValidator/modules/vvLiftover.py b/VariantValidator/modules/vvLiftover.py index bbdc15e2..bdb470b0 100644 --- a/VariantValidator/modules/vvLiftover.py +++ b/VariantValidator/modules/vvLiftover.py @@ -10,7 +10,7 @@ import hgvs.sequencevariant import re import os -from . import vvChromosomes +from . import seq_data from . import vvHGVS from .vvLogging import logger from pyliftover import LiftOver @@ -131,25 +131,25 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no for op in options: if re.match('NC_', op[1]): if re.match('GRC', build_to): - sfm = vvChromosomes.to_chr_num_refseq(op[1], build_to) + sfm = seq_data.to_chr_num_refseq(op[1], build_to) if re.match('hg', build_to): - sfm = vvChromosomes.to_chr_num_ucsc(op[1], build_to) + sfm = seq_data.to_chr_num_ucsc(op[1], build_to) if sfm is not None: selected.append([op[0], op[1]]) for op in options: if re.match('NT_', op[1]): if re.match('GRC', build_to): - sfm = vvChromosomes.to_chr_num_refseq(op[1], build_to) + sfm = seq_data.to_chr_num_refseq(op[1], build_to) if re.match('hg', build_to): - sfm = vvChromosomes.to_chr_num_ucsc(op[1], build_to) + sfm = seq_data.to_chr_num_ucsc(op[1], build_to) if sfm is not None: selected.append([op[0], op[1]]) for op in options: if re.match('NW_', op[1]): if re.match('GRC', build_to): - sfm = vvChromosomes.to_chr_num_refseq(op[1], build_to) + sfm = seq_data.to_chr_num_refseq(op[1], build_to) if re.match('hg', build_to): - sfm = vvChromosomes.to_chr_num_ucsc(op[1], build_to) + sfm = seq_data.to_chr_num_ucsc(op[1], build_to) if sfm is not None: selected.append([op[0], op[1]]) @@ -247,7 +247,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no lifted_ref_bases = my_seq.reverse_complement() your_seq = Seq(lifted_alt_bases) lifted_alt_bases = your_seq.reverse_complement() - accession = vvChromosomes.to_accession(chr, lo_to) + accession = seq_data.to_accession(chr, lo_to) if accession is None: wrn = 'Unable to identify an equivalent %s chromosome ID for %s' % (str(lo_to), str(chr)) logger.warning(wrn) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index f53c9316..25341c8a 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -13,7 +13,7 @@ import hgvs.variantmapper import hgvs.sequencevariant from . import vvMixinInit -from . import vvChromosomes +from . import seq_data from . import vvHGVS from urllib.parse import urlparse import httplib2 as http @@ -205,7 +205,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): utilise_gap_code = False else: # If the gene symbol is not in the list, the value False will be returned - utilise_gap_code = vvChromosomes.gap_black_list(gene_symbol) + utilise_gap_code = seq_data.gap_black_list(gene_symbol) # Warn gap code in use logger.warning("gap_compensation_myevm = " + str(utilise_gap_code)) @@ -349,7 +349,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): if re.match('blat', option[2]): continue if re.match('NC_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) if chr_num != 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -368,7 +368,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): if re.match('blat', option[2]): continue if re.match('NC_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) if chr_num == 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -388,7 +388,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): if re.match('blat', option[2]): continue if re.match('NT_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) if chr_num != 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -405,8 +405,8 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): if re.match('blat', option[2]): continue if re.match('NT_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), - primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), + primary_assembly) if chr_num == 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -424,8 +424,8 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): if re.match('blat', option[2]): continue if re.match('NW_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), - primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), + primary_assembly) if chr_num != 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -442,8 +442,8 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): if re.match('blat', option[2]): continue if re.match('NW_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), - primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), + primary_assembly) if chr_num == 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -887,7 +887,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn if re.match('blat', option[2]): continue if re.match('NC_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) if chr_num != 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -906,7 +906,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn if re.match('blat', option[2]): continue if re.match('NC_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) if chr_num != 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -925,7 +925,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn if re.match('blat', option[2]): continue if re.match('NC_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) if chr_num == 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -946,8 +946,8 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn if re.match('blat', option[2]): continue if re.match('NT_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), - primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), + primary_assembly) if chr_num != 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -965,8 +965,8 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn if re.match('blat', option[2]): continue if re.match('NT_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), - primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), + primary_assembly) if chr_num == 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -985,8 +985,8 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn if re.match('blat', option[2]): continue if re.match('NW_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), - primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), + primary_assembly) if chr_num != 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -1004,8 +1004,8 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn if re.match('blat', option[2]): continue if re.match('NW_', option[1]): - chr_num = vvChromosomes.supported_for_mapping(str(option[1]), - primary_assembly) + chr_num = seq_data.supported_for_mapping(str(option[1]), + primary_assembly) if chr_num == 'false': try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) @@ -1089,7 +1089,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): utilise_gap_code = False else: # If the gene symbol is not in the list, the value False will be returned - utilise_gap_code = vvChromosomes.gap_black_list(gene_symbol) + utilise_gap_code = seq_data.gap_black_list(gene_symbol) # Warn gap code in use logger.warning("gap_compensation_mvm = " + str(utilise_gap_code)) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index bd7c95ef..e838fd8c 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -9,7 +9,7 @@ from .vvLogging import logger from . import vvHGVS from . import vvFunctions as fn -from . import vvChromosomes +from . import seq_data from . import vvMixinConverters from .variant import Variant from . import format_converters @@ -559,7 +559,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except hgvs.exceptions.HGVSInvalidVariantError: continue for build in self.genome_builds: - test = vvChromosomes.supported_for_mapping(alt_gen_var.ac, build) + test = seq_data.supported_for_mapping(alt_gen_var.ac, build) if test: try: vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, variant.reverse_normalizer, From a2c332370f580732e7c366bc5fe6e9bd8992f188 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 10:47:37 +0100 Subject: [PATCH 126/223] Fix issue #49 --- VariantValidator/modules/format_converters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index 8518fd0c..3eff01e9 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -510,7 +510,7 @@ def indel_catching(variant, validator): edit_pass = re.compile(r'_\d+$') edit_fail = re.compile(r'\d+$') if edit_fail.search(variant.quibble): - if not edit_pass.search(variant.quibble): + if not edit_pass.search(variant.quibble) and 'fs' not in variant.quibble: failed = variant.quibble # Catch the trailing digits digits = re.search(r"(\d+$)", failed) From 05fa57ffd73ffa09c759f0632c8dc26308845bed Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 10:57:58 +0100 Subject: [PATCH 127/223] Fix issue #52 --- VariantValidator/modules/mappers.py | 15 +++++++++++++++ test/test_inputs.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 4624deb0..ec562e10 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -800,6 +800,21 @@ def transcripts_to_gene(variant, validator): except Exception: fn.exceptPass() + # Final protein check, i.e. does the output make sense + # We are looking for exonic c. descriptioms labelled as p.? + # This code is triggered by variant NM_000088.3:c.589-1GG>G + # Note, this will not correct read-through stop codons, but it will try! + if hgvs_coding.posedit.pos.start.offset == 0 and hgvs_coding.posedit.pos.start.offset == 0 and \ + '?' in str(hgvs_protein): + protein_dict = validator.myc_to_p(hgvs_coding, variant.evm, re_to_p=False) + if protein_dict['error'] == '': + hgvs_protein = protein_dict['hgvs_protein'] + else: + error = protein_dict['error'] + if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': + hgvs_protein = protein_dict['hgvs_protein'] + variant.warnings.append(error) + # Check for up-to-date transcript version tx_id_info = validator.hdp.get_tx_identity_info(hgvs_coding.ac) uta_gene_symbol = tx_id_info[6] diff --git a/test/test_inputs.py b/test/test_inputs.py index 99c16467..e9858817 100644 --- a/test/test_inputs.py +++ b/test/test_inputs.py @@ -562,7 +562,7 @@ def test_variant13(self): assert results['NM_000088.3:c.590del']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_000088.3:c.590del']['alt_genomic_loci'], []) assert results['NM_000088.3:c.590del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.590del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.590del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197ValfsTer68)', 'slr': 'NP_000079.2:p.(G197Vfs*68)'} assert results['NM_000088.3:c.590del']['submitted_variant'] == 'NM_000088.3:c.589-1GG>G' assert results['NM_000088.3:c.590del']['genome_context_intronic_sequence'] == '' assert results['NM_000088.3:c.590del']['hgvs_lrg_variant'] == 'LRG_1:g.8639del' From 43007b855573eb0e571d74f221144b4ce4d371cd Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 11:11:26 +0100 Subject: [PATCH 128/223] Fix issue #54 --- VariantValidator/modules/use_checking.py | 32 ++++++++++++++++-------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index b7588a8e..7a036d0f 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -198,9 +198,10 @@ def structure_checks_c(variant, validator): report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) + report_gen = variant.hn.normalize(report_gen) error = 'Using a transcript reference sequence to specify a variant position that lies ' \ 'outside of the reference sequence is not HGVS-compliant: ' \ - 'Instead use ' + fn.valstr(report_gen) + 'Instead re-submit ' + fn.valstr(report_gen) except Exception: fn.exceptPass() variant.warnings.append(error) @@ -218,9 +219,10 @@ def structure_checks_c(variant, validator): if 'n.1-' in str(variant.input_parses): input_parses = variant.evm.n_to_c(variant.input_parses) error = 'Using a transcript reference sequence to specify a variant position that lies outside of the ' \ - 'reference sequence is not HGVS-compliant. Instead use ' + 'reference sequence is not HGVS-compliant. Instead re-submit ' genomic_position = validator.myevm_t_to_g(input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) + genomic_position = variant.hn.normalize(genomic_position) error = error + fn.valstr(genomic_position) variant.warnings.append(error) logger.warning(error) @@ -276,8 +278,9 @@ def structure_checks_c(variant, validator): report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) + report_gen = variant.hn.normalize(report_gen) error = 'Using a transcript reference sequence to specify a variant position that lies ' \ - 'outside of the reference sequence is not HGVS-compliant. Instead use '\ + 'outside of the reference sequence is not HGVS-compliant. Instead re-submit '\ + fn.valstr(report_gen) except Exception: fn.exceptPass() @@ -313,11 +316,12 @@ def structure_checks_c(variant, validator): try: report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) + report_gen = variant.hn.normalize(report_gen) except hgvs.exceptions.HGVSError: fn.exceptPass() else: error = 'Using a transcript reference sequence to specify a variant position that lies outside of '\ - 'the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr(report_gen) + 'the reference sequence is not HGVS-compliant. Instead re-submit ' + fn.valstr(report_gen) variant.warnings.append(error) logger.warning(error) return True @@ -484,8 +488,9 @@ def structure_checks_n(variant, validator): variant.input_parses.posedit.pos.end.offset = remainder report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( - report_gen) + report_gen = variant.hn.normalize(report_gen) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of' \ + ' the reference sequence is not HGVS-compliant. Instead re-submit ' + fn.valstr(report_gen) except Exception: fn.exceptPass() variant.warnings.append(error) @@ -497,9 +502,11 @@ def structure_checks_n(variant, validator): return True if 'n.1-' in str(variant.input_parses): - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + error = 'Using a transcript reference sequence to specify a variant position that lies outside of the ' \ + 'reference sequence is not HGVS-compliant. Instead re-submit ' genomic_position = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) + genomic_position = variant.hn.normalize(genomic_position) error = error + fn.valstr(genomic_position) variant.warnings.append(error) logger.warning(error) @@ -515,11 +522,12 @@ def structure_checks_n(variant, validator): try: report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) + report_gen = variant.hn.normalize(report_gen) except hgvs.exceptions.HGVSError as e: fn.exceptPass() else: - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( - report_gen) + error = 'Using a transcript reference sequence to specify a variant position that lies outside of '\ + 'the reference sequence is not HGVS-compliant. Instead re-submit ' + fn.valstr(report_gen) variant.warnings.append(error) logger.warning(error) return True @@ -548,8 +556,10 @@ def structure_checks_n(variant, validator): if 'bounds' in error: report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) - error = 'Using a transcript reference sequence to specify a variant position that lies outside of the reference sequence is not HGVS-compliant. Instead use ' + fn.valstr( - report_gen) + report_gen = variant.hn.normalize(report_gen) + error = 'Using a transcript reference sequence to specify a variant position that lies ' \ + 'outside of the reference sequence is not HGVS-compliant. Instead re-submit ' + \ + fn.valstr(report_gen) variant.warnings.append(error) logger.warning(error) return True From 4ee4f2069e69430f5c5b6a20f7bb1bcb92632dc8 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 13:00:58 +0100 Subject: [PATCH 129/223] Fix issue #55 following v0 commit --- VariantValidator/modules/mappers.py | 32 +++----- VariantValidator/modules/use_checking.py | 35 +++------ VariantValidator/modules/valoutput.py | 2 +- VariantValidator/modules/vvMixinCore.py | 31 ++++++-- test/test_inputs.py | 96 ++++++++++++------------ 5 files changed, 96 insertions(+), 100 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index ec562e10..18d9acc6 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -231,30 +231,18 @@ def transcripts_to_gene(variant, validator): logger.warning(error) return True - if variant.gene_symbol: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ - 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + \ - variant.gene_symbol + ' to https://variantvalidator.org/ref_finder/, ' \ - 'or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ - 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ - 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' - variant.warnings.append(error) - logger.warning(error) + errors = ['Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive', + 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' + 'available transcripts' % tx_ac.split('.')[0]] + variant.warnings.extend(errors) + logger.warning(str(errors)) return True except TypeError: - if variant.gene_symbol: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ - 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' or ' + \ - variant.gene_symbol + ' to https://variantvalidator.org/ref_finder/, ' \ - 'or select an alternative genome build' - else: - error = 'Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive, ' \ - 'please select an alternative version of ' + tx_ac + ' by submitting ' + tx_ac + ' to ' \ - 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' - variant.warnings.append(error) - logger.warning(error) + errors = ['Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive', + 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' + 'available transcripts' % tx_ac.split('.')[0]] + variant.warnings.extend(errors) + logger.warning(str(errors)) return True # Get orientation of the gene wrt genome and a list of exons mapped to the genome diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index 7a036d0f..436dd769 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -347,18 +347,11 @@ def structure_checks_c(variant, validator): variant.primary_assembly, validator.vm, variant.hn, validator.hp, validator.sf, variant.no_norm_evm) except hgvs.exceptions.HGVSDataNotAvailableError: - if variant.gene_symbol: - error = 'Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' \ - 'Transcript Archive, please select an alternative version of ' + variant.input_parses.ac + \ - ' by submitting ' + variant.input_parses.ac + ' or ' + variant.gene_symbol + ' to ' \ - 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' \ - 'Transcript Archive, please select an alternative version of ' + variant.input_parses.ac + \ - ' by submitting ' + variant.input_parses.ac + ' to https://variantvalidator.org/ref_finder/, ' \ - 'or select an alternative genome build' - variant.warnings.append(error) - logger.warning(error) + errors = ['Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' + 'Transcript Archive', 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' + 'available transcripts' % variant.input_parses.ac.split('.')[0]] + variant.warnings.extend(errors) + logger.warning(str(errors)) return True except ValueError as e: error = str(e) @@ -570,18 +563,12 @@ def structure_checks_n(variant, validator): output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant.evm, validator.hdp, variant.primary_assembly, validator.vm, variant.hn, validator.hp, validator.sf, variant.no_norm_evm) except hgvs.exceptions.HGVSDataNotAvailableError as e: - if variant.gene_symbol: - error = 'Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' \ - 'Transcript Archive, please select an alternative version of ' + variant.input_parses.ac + \ - ' by submitting ' + variant.input_parses.ac + ' or ' + variant.gene_symbol + ' to ' \ - 'https://variantvalidator.org/ref_finder/, or select an alternative genome build' - else: - error = 'Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' \ - 'Transcript Archive, please select an alternative version of ' + variant.input_parses.ac + \ - ' by submitting ' + variant.input_parses.ac + ' to https://variantvalidator.org/ref_finder/,' \ - ' or select an alternative genome build' - variant.warnings.append(error) - logger.warning(error) + errors = ['Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' + 'Transcript Archive', + 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' + 'available transcripts' % variant.input_parses.ac.split('.')[0]] + variant.warnings.extend(errors) + logger.warning(str(errors)) return True except ValueError as e: error = str(e) diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index 6b47f3d7..0e773830 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -63,7 +63,7 @@ def format_as_dict(self, with_meta=True): if variant.output_type_flag == 'intergenic': validation_output['flag'] = 'intergenic' validation_intergenic_counter = validation_intergenic_counter + 1 - identification_key = 'Intergenic_Variant_%s' % validation_intergenic_counter + identification_key = 'intergenic_variant_%s' % validation_intergenic_counter # Attempt to liftover between genome builds # Note: pyliftover uses the UCSC liftOver tool. diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index e838fd8c..bf13d17b 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -707,17 +707,38 @@ def gene2transcripts(self, query): query = query.upper() if re.search(r'\d+ORF\d+', query): query = query.replace('ORF', 'orf') + + # Quick check for LRG + elif 'LRG' in query: + lrg_id = query.split('T')[0] + lrg_to_hgnc = self.db.get_LRG_data_from_LRGid(lrg_id) + query = lrg_to_hgnc[2] + # Quick check for blank form if query == '': return {'error': 'Please enter HGNC gene name or transcript identifier (NM_, NR_, or ENST)'} hgnc = query if 'NM_' in hgnc or 'NR_' in hgnc: # or re.match('ENST', hgnc): - try: - tx_info = self.hdp.get_tx_identity_info(hgnc) - hgnc = tx_info[6] - except hgvs.exceptions.HGVSError as e: - return {'error': str(e)} + if '.' in hgnc: + try: + tx_info = self.hdp.get_tx_identity_info(hgnc) + hgnc = tx_info[6] + except hgvs.exceptions.HGVSError as e: + return {'error': str(e)} + else: + found_res = False + for version in range(25): + refresh_hgnc = hgnc + '.' + str(version) + try: + tx_info = self.hdp.get_tx_identity_info(refresh_hgnc) + hgnc = tx_info[6] + found_res = True + break + except hgvs.exceptions.HGVSError: + pass + if not found_res: + return {'error': 'No transcript definition for (tx_ac=' + hgnc + ')'} # First perform a search against the input gene symbol or the symbol inferred from UTA initial = fn.hgnc_rest(path="/fetch/symbol/" + hgnc) diff --git a/test/test_inputs.py b/test/test_inputs.py index e9858817..0a207ce6 100644 --- a/test/test_inputs.py +++ b/test/test_inputs.py @@ -629,22 +629,22 @@ def test_variant16(self): print(results) assert results['flag'] == 'intergenic' - assert 'Intergenic_Variant_1' in list(results.keys()) - assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' - assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['Intergenic_Variant_1']['alt_genomic_loci'], []) - assert results['Intergenic_Variant_1']['gene_symbol'] == '' - assert results['Intergenic_Variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} - assert results['Intergenic_Variant_1']['submitted_variant'] == 'NC_000017.10:g.48279242G>T' - assert results['Intergenic_Variant_1']['genome_context_intronic_sequence'] == '' - assert results['Intergenic_Variant_1']['hgvs_lrg_variant'] == 'LRG_1:g.4759C>A' - assert results['Intergenic_Variant_1']['hgvs_transcript_variant'] == '' - assert results['Intergenic_Variant_1']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.4759C>A' - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48279242G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50201881G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48279242G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50201881G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} - assert results['Intergenic_Variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + assert 'intergenic_variant_1' in list(results.keys()) + assert results['intergenic_variant_1']['hgvs_lrg_transcript_variant'] == '' + assert results['intergenic_variant_1']['refseqgene_context_intronic_sequence'] == '' + self.assertCountEqual(results['intergenic_variant_1']['alt_genomic_loci'], []) + assert results['intergenic_variant_1']['gene_symbol'] == '' + assert results['intergenic_variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['intergenic_variant_1']['submitted_variant'] == 'NC_000017.10:g.48279242G>T' + assert results['intergenic_variant_1']['genome_context_intronic_sequence'] == '' + assert results['intergenic_variant_1']['hgvs_lrg_variant'] == 'LRG_1:g.4759C>A' + assert results['intergenic_variant_1']['hgvs_transcript_variant'] == '' + assert results['intergenic_variant_1']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.4759C>A' + assert results['intergenic_variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48279242G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50201881G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48279242G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50201881G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} + assert results['intergenic_variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant17(self): @@ -12334,22 +12334,22 @@ def test_variant265(self): print(results) assert results['flag'] == 'intergenic' - assert 'Intergenic_Variant_1' in list(results.keys()) - assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' - assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['Intergenic_Variant_1']['alt_genomic_loci'], []) - assert results['Intergenic_Variant_1']['gene_symbol'] == '' - assert results['Intergenic_Variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} - assert results['Intergenic_Variant_1']['submitted_variant'] == '19-15311794-A-G' - assert results['Intergenic_Variant_1']['genome_context_intronic_sequence'] == '' - assert results['Intergenic_Variant_1']['hgvs_lrg_variant'] == '' - assert results['Intergenic_Variant_1']['hgvs_transcript_variant'] == '' - assert results['Intergenic_Variant_1']['hgvs_refseqgene_variant'] == 'NG_009819.1:g.4999T>C' - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.15311794A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15200983A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.15311794A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15200983A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} - assert results['Intergenic_Variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009819.1'} + assert 'intergenic_variant_1' in list(results.keys()) + assert results['intergenic_variant_1']['hgvs_lrg_transcript_variant'] == '' + assert results['intergenic_variant_1']['refseqgene_context_intronic_sequence'] == '' + self.assertCountEqual(results['intergenic_variant_1']['alt_genomic_loci'], []) + assert results['intergenic_variant_1']['gene_symbol'] == '' + assert results['intergenic_variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['intergenic_variant_1']['submitted_variant'] == '19-15311794-A-G' + assert results['intergenic_variant_1']['genome_context_intronic_sequence'] == '' + assert results['intergenic_variant_1']['hgvs_lrg_variant'] == '' + assert results['intergenic_variant_1']['hgvs_transcript_variant'] == '' + assert results['intergenic_variant_1']['hgvs_refseqgene_variant'] == 'NG_009819.1:g.4999T>C' + assert results['intergenic_variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.15311794A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15200983A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.15311794A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15200983A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} + assert results['intergenic_variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009819.1'} def test_variant266(self): @@ -15354,22 +15354,22 @@ def test_variant286(self): print(results) assert results['flag'] == 'intergenic' - assert 'Intergenic_Variant_1' in list(results.keys()) - assert results['Intergenic_Variant_1']['hgvs_lrg_transcript_variant'] == '' - assert results['Intergenic_Variant_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['Intergenic_Variant_1']['alt_genomic_loci'], []) - assert results['Intergenic_Variant_1']['gene_symbol'] == '' - assert results['Intergenic_Variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} - assert results['Intergenic_Variant_1']['submitted_variant'] == '5-1295183-G-A' - assert results['Intergenic_Variant_1']['genome_context_intronic_sequence'] == '' - assert results['Intergenic_Variant_1']['hgvs_lrg_variant'] == 'LRG_343:g.4980C>T' - assert results['Intergenic_Variant_1']['hgvs_transcript_variant'] == '' - assert results['Intergenic_Variant_1']['hgvs_refseqgene_variant'] == 'NG_009265.1:g.4980C>T' - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.1295183G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.1295068G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.1295183G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} - assert results['Intergenic_Variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.1295068G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} - assert results['Intergenic_Variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009265.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_343.xml'} + assert 'intergenic_variant_1' in list(results.keys()) + assert results['intergenic_variant_1']['hgvs_lrg_transcript_variant'] == '' + assert results['intergenic_variant_1']['refseqgene_context_intronic_sequence'] == '' + self.assertCountEqual(results['intergenic_variant_1']['alt_genomic_loci'], []) + assert results['intergenic_variant_1']['gene_symbol'] == '' + assert results['intergenic_variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['intergenic_variant_1']['submitted_variant'] == '5-1295183-G-A' + assert results['intergenic_variant_1']['genome_context_intronic_sequence'] == '' + assert results['intergenic_variant_1']['hgvs_lrg_variant'] == 'LRG_343:g.4980C>T' + assert results['intergenic_variant_1']['hgvs_transcript_variant'] == '' + assert results['intergenic_variant_1']['hgvs_refseqgene_variant'] == 'NG_009265.1:g.4980C>T' + assert results['intergenic_variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.1295183G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.1295068G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.1295183G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.1295068G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} + assert results['intergenic_variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009265.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_343.xml'} def test_variant287(self): From 71395a4feddc51310fd0be2786537c04e1c19d85 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 13:05:57 +0100 Subject: [PATCH 130/223] intergenic liftover, from v0 commit 70c23cd --- VariantValidator/modules/valoutput.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index 0e773830..f768f280 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -113,6 +113,8 @@ def format_as_dict(self, with_meta=True): # KeyError if the dicts are empty except KeyError: continue + except IndexError: + continue # Add the dictionaries from lifted response to the output if primary_assembly_loci != {}: From ef8e2916af32b6e40bc5f19f7623e9f35a51ffe8 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 13:17:56 +0100 Subject: [PATCH 131/223] reinstate failure error from v0 commit 782637f --- VariantValidator/modules/vvMixinCore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index bf13d17b..7d7093f7 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -696,7 +696,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Debug mode exc_type, exc_value, last_traceback = sys.exc_info() logger.critical(str(exc_type) + " " + str(exc_value)) - raise + raise fn.VariantValidatorError('Validation error') def gene2transcripts(self, query): """ From 2892167448ee9325853a79e81309650e7e7ece69 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 13:25:10 +0100 Subject: [PATCH 132/223] fix issue #67 --- VariantValidator/modules/seq_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/VariantValidator/modules/seq_data.py b/VariantValidator/modules/seq_data.py index a8528bc3..472f53c2 100644 --- a/VariantValidator/modules/seq_data.py +++ b/VariantValidator/modules/seq_data.py @@ -2815,6 +2815,7 @@ def gap_black_list(symbol): transcript or the genome to maintain a perfect alignment """ gapGene = [ + "TRPM1", "LPP", "VPS13D", "SSPO", From 0386e88f61729cee1f104032c6e71f6a9b3183d3 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 13:35:09 +0100 Subject: [PATCH 133/223] fix issue #69 --- VariantValidator/modules/vvLiftover.py | 3 ++- VariantValidator/modules/vvMixinCore.py | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/VariantValidator/modules/vvLiftover.py b/VariantValidator/modules/vvLiftover.py index bdb470b0..8150fbc5 100644 --- a/VariantValidator/modules/vvLiftover.py +++ b/VariantValidator/modules/vvLiftover.py @@ -189,7 +189,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no 'alt': alt_vcf['alt']} } added_data = True - except hgvs.exceptions.HGVSInvalidIntervalError as e: + except hgvs.exceptions.HGVSError as e: continue if lifted_response != {} and added_data is not False: @@ -255,6 +255,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no else: not_delins = accession + ':g.' + str(pos) + '_' + str( (pos - 1) + len(lifted_ref_bases)) + 'del' + lifted_ref_bases + 'ins' + lifted_alt_bases + not_delins = str(not_delins) hgvs_not_delins = hp.parse_hgvs_variant(not_delins) try: vr.validate(hgvs_not_delins) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 7d7093f7..32bdc9ec 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -940,7 +940,8 @@ def _get_transcript_info(self, variant): return True except Exception: error = 'Unable to assign transcript identity records to ' + accession + \ - ', potentially an obsolete record :' + ', potentially an obsolete record or there is an issue retrieving data from NCBI. ' \ + 'Please try again later and if the problem persists contact admin' variant.warnings.append(error) logger.warning(error) return True @@ -954,7 +955,8 @@ def _get_transcript_info(self, variant): except Exception as e: logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + \ - ', potentially an obsolete record :' + ', potentially an obsolete record or there is an issue retrieving data from NCBI. ' \ + 'Please try again later and if the problem persists contact admin' variant.warnings.append(error) logger.warning(error) return True @@ -1001,7 +1003,8 @@ def _get_transcript_info(self, variant): except Exception as e: logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + \ - ', potentially an obsolete record :' + ', potentially an obsolete record or there is an issue retrieving data from NCBI. ' \ + 'Please try again later and if the problem persists contact admin' variant.warnings.append(error) logger.warning(error) return True From c3d661294c16d671dd8d88afd55a538192390f55 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 14:37:33 +0100 Subject: [PATCH 134/223] Refactored logging --- VariantValidator/modules/format_converters.py | 74 ++++----- VariantValidator/modules/gapped_mapping.py | 28 ++-- VariantValidator/modules/logger.py | 154 ++++++++++++++++++ VariantValidator/modules/mappers.py | 52 +++--- VariantValidator/modules/use_checking.py | 88 +++++----- VariantValidator/modules/valoutput.py | 4 +- VariantValidator/modules/vvDBGet.py | 6 +- VariantValidator/modules/vvDatabase.py | 22 +-- VariantValidator/modules/vvFunctions.py | 8 +- VariantValidator/modules/vvLiftover.py | 6 +- VariantValidator/modules/vvLogging.py | 148 ----------------- VariantValidator/modules/vvMixinConverters.py | 24 +-- VariantValidator/modules/vvMixinCore.py | 84 +++++----- VariantValidator/modules/vvMixinInit.py | 2 +- 14 files changed, 353 insertions(+), 347 deletions(-) create mode 100644 VariantValidator/modules/logger.py delete mode 100644 VariantValidator/modules/vvLogging.py diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index 3eff01e9..de11a263 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -1,7 +1,7 @@ import re import hgvs.exceptions import copy -from .vvLogging import logger +from .logger import Logger from .variant import Variant from . import seq_data from . import vvFunctions as fn @@ -84,7 +84,7 @@ def vcf2hgvs_stage1(variant, validator): pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF'] variant.warnings.append('VariantValidator has output both alternatives') - logger.resub('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + + Logger.resub('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + ' as ALT = REF. Validator will output both alternatives.') variant.write = False @@ -112,7 +112,7 @@ def vcf2hgvs_stage1(variant, validator): vcf_elements = pre_input.split('-') variant.quibble = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) - logger.trace("Completed VCF-HVGS step 1", variant) + Logger.trace("Completed VCF-HVGS step 1", variant) return skipvar @@ -151,7 +151,7 @@ def vcf2hgvs_stage2(variant, validator): accession = seq_data.to_accession(chr_num, validator.selected_assembly) if accession is None: variant.warnings.append(chr_num + ' is not part of genome build ' + validator.selected_assembly) - logger.warning(chr_num + ' is not part of genome build ' + validator.selected_assembly) + Logger.warning(chr_num + ' is not part of genome build ' + validator.selected_assembly) skipvar = True else: accession = input_list[0] @@ -196,11 +196,11 @@ def vcf2hgvs_stage2(variant, validator): 'descriptions to separate the reference accession from the reference type i.e. :. ' \ 'e.g. :c.' % variant.quibble variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) skipvar = True # Ambiguous chr reference - logger.trace("Completed VCF-HVGS step 2", variant) + Logger.trace("Completed VCF-HVGS step 2", variant) return skipvar @@ -248,7 +248,7 @@ def vcf2hgvs_stage3(variant, validator): except Exception: fn.exceptPass(variant) - logger.trace("Completed VCF-HGVS step 3", variant) + Logger.trace("Completed VCF-HGVS step 3", variant) return skipvar @@ -288,21 +288,21 @@ def gene_symbol_catch(variant, validator, select_transcripts_dict_plus_version): warnings=variant.warnings, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) - logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + + Logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + query_a_symbol + ') in place of a valid reference sequence') else: variant.warnings.append('HGVS variant nomenclature does not allow the use of a gene symbol (' + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + variant.quibble + ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts) - logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + + Logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + variant.quibble + ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts) skipvar = True except: fn.exceptPass() - logger.trace("Gene symbol reference catching complete", variant) + Logger.trace("Gene symbol reference catching complete", variant) return skipvar @@ -339,7 +339,7 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version): warnings=variant.warnings, primary_assembly=variant.primary_assembly, order=variant.order) - logger.resub('NG_:c.PositionVariation descriptions should not be used unless a transcript ' + Logger.resub('NG_:c.PositionVariation descriptions should not be used unless a transcript ' 'reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation. ' 'Resubmitting corrected version.') validator.batch_list.append(query) @@ -348,7 +348,7 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version): 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + ' but also specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts) - logger.warning('A transcript reference sequence has not been provided e.g. ' + Logger.warning('A transcript reference sequence has not been provided e.g. ' 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + ' but also ' 'specify transcripts from the following: select_transcripts=' + select_from_these_transcripts) @@ -356,21 +356,21 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version): else: variant.warnings.append('A transcript reference sequence has not been provided e.g. ' 'NG_(NM_):c.PositionVariation') - logger.warning( + Logger.warning( 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation') skipvar = True elif variant.quibble.startswith('NC_'): variant.warnings.append('A transcript reference sequence has not been provided e.g. ' 'NC_(NM_):c.PositionVariation. Unable to predict available transcripts ' 'because chromosomal position is not specified') - logger.warning( + Logger.warning( 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. ' 'Unable to predict available transcripts because chromosomal position is not specified') skipvar = True except: fn.exceptPass() - logger.trace("Chromosomal/RefSeqGene reference catching complete", variant) + Logger.trace("Chromosomal/RefSeqGene reference catching complete", variant) return skipvar @@ -468,13 +468,13 @@ def vcf2hgvs_stage4(variant, validator): order=variant.order) validator.batch_list.append(query) - logger.resub( + Logger.resub( 'Multiple ALT sequences detected. Auto-submitting all possible combinations.') skipvar = True else: error = str(e) variant.warnings.append(error) - logger.warning(str(e)) + Logger.warning(str(e)) skipvar = True try: @@ -485,7 +485,7 @@ def vcf2hgvs_stage4(variant, validator): not_delins = not_delins else: variant.warnings.append(error) - logger.warning(str(e)) + Logger.warning(str(e)) skipvar = True # Create warning automap = variant.quibble + ' automapped to ' + not_delins @@ -494,7 +494,7 @@ def vcf2hgvs_stage4(variant, validator): variant.quibble = not_delins except: fn.exceptPass() - logger.trace("Completed VCF-HVGS step 4", variant) + Logger.trace("Completed VCF-HVGS step 4", variant) return skipvar @@ -528,17 +528,17 @@ def indel_catching(variant, validator): issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' error = error + ' please refer to ' + issue_link variant.warnings.append(error) - logger.warning(str(error) + " " + str(e)) + Logger.warning(str(error) + " " + str(e)) return True hgvs_failed.posedit.edit = str(hgvs_failed.posedit.edit).replace(digits, '') failed = str(hgvs_failed) automap = 'Non HGVS compliant variant description ' + variant.quibble + ' automapped to ' + failed variant.warnings.append(automap) - logger.warning(automap) + Logger.warning(automap) variant.quibble = failed - logger.trace("Ins/Del reference catching complete", variant) + Logger.trace("Ins/Del reference catching complete", variant) return False @@ -561,7 +561,7 @@ def intronic_converter(variant): transy = transy.group(1) transy = transy.replace(')', '') variant.quibble = transy - logger.trace("HVGS typesetting complete", variant) + Logger.trace("HVGS typesetting complete", variant) def allele_parser(variant, validation): @@ -599,7 +599,7 @@ def allele_parser(variant, validation): caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + \ refseqgene_reference + ':' + variation variant.warnings.append(caution) - logger.warning(str(caution)) + Logger.warning(str(caution)) elif re.match(r'^LRG_\d+t\d+:c.', variant.quibble) or re.match(r'^LRG_\d+t\d+:n.', variant.quibble) or \ re.match(r'^LRG_\d+t\d+:p.', variant.quibble) or re.match(r'^LRG_\d+t\d+:g.', variant.quibble): lrg_reference, variation = variant.quibble.split(':') @@ -614,14 +614,14 @@ def allele_parser(variant, validation): caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + \ refseqtranscript_reference + ':' + variation variant.warnings.append(caution) - logger.warning(str(caution)) + Logger.warning(str(caution)) else: pass try: # Submit to allele extraction function alleles = validation.hgvs_alleles(variant.quibble, variant.hn) variant.warnings.append('Automap has extracted possible variant descriptions') - logger.resub('Automap has extracted possible variant descriptions, resubmitting') + Logger.resub('Automap has extracted possible variant descriptions, resubmitting') for allele in alleles: query = Variant(variant.original, quibble=allele, warnings=variant.warnings, write=True, primary_assembly=variant.primary_assembly, order=variant.order) @@ -631,15 +631,15 @@ def allele_parser(variant, validation): except fn.alleleVariantError as e: if "Cannot validate sequence of an intronic variant" in str(e): variant.warnings.append('Intronic positions not supported for HGVS Allele descriptions') - logger.warning('Intronic positions not supported for HGVS Allele descriptions') + Logger.warning('Intronic positions not supported for HGVS Allele descriptions') return True elif "No transcript definition for " in str(e): variant.warnings.append(str(e)) - logger.warning(str(e)) + Logger.warning(str(e)) return True else: raise fn.VariantValidatorError(str(e)) - logger.trace("HVGS String allele parsing pass 1 complete", variant) + Logger.trace("HVGS String allele parsing pass 1 complete", variant) return False @@ -665,7 +665,7 @@ def lrg_to_refseq(variant, validator): variant.set_quibble(str(variant.hgvs_formatted)) caution += lrg_reference + ':' + variation + ' automapped to ' + refseqtrans_reference + ':' + variation variant.warnings.append(caution) - logger.warning(caution) + Logger.warning(caution) elif re.match(r'^LRG_\d+:', variant.quibble): lrg_reference, variation = variant.quibble.split(':') refseqgene_reference = validator.db.get_RefSeqGeneID_from_lrgID(lrg_reference) @@ -674,7 +674,7 @@ def lrg_to_refseq(variant, validator): variant.set_quibble(str(variant.hgvs_formatted)) caution += lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation variant.warnings.append(caution) - logger.warning(caution) + Logger.warning(caution) def mitochondrial(variant, validator): @@ -692,12 +692,12 @@ def mitochondrial(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True except KeyError: error = 'Currently unable to validate ' + hgvs_mito.ac + ' sequence variation' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: # Any transcripts? @@ -707,7 +707,7 @@ def mitochondrial(variant, validator): if len(rel_var) == 0: variant.genomic_g = fn.valstr(hgvs_mito) variant.description = 'Homo sapiens mitochondrion, complete genome' - logger.info('Homo sapiens mitochondrion, complete genome') + Logger.info('Homo sapiens mitochondrion, complete genome') return True return False @@ -728,7 +728,7 @@ def proteins(variant, validator): error = str(e) if error: variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: # Get accurate descriptions from the relevant databases @@ -750,7 +750,7 @@ def proteins(variant, validator): ' in the genetic code' variant.warnings.extend([reason, error]) variant.protein = str(hgvs_object) - logger.warning(reason + ": " + error) + Logger.warning(reason + ": " + error) return True return False @@ -769,7 +769,7 @@ def rna(variant, validator): except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) variant.warnings.append(error) - logger.warning(str(error)) + Logger.warning(str(error)) return True variant.hgvs_formatted = hgvs_c diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index c8b03ab0..b8d05707 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -5,7 +5,7 @@ from . import vvFunctions as fn from . import vvHGVS -from .vvLogging import logger +from .logger import Logger class GapMapper(object): @@ -281,7 +281,7 @@ def gapped_g_to_c(self, rel_var): if str(e) == 'start or end or both are beyond the bounds of transcript record': hgvs_not_delins = saved_hgvs_coding self.disparity_deletion_in = ['false', 'false'] - logger.warning(str(e)) + Logger.warning(str(e)) try: self.variant.hn.normalize(self.tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: @@ -294,7 +294,7 @@ def gapped_g_to_c(self, rel_var): elif 'Normalization of intronic variants is not supported' in error: # We know that this cannot be because of an intronic variant, so must be aligned to tx gap self.disparity_deletion_in = ['transcript', 'Requires Analysis'] - logger.warning(error) + Logger.warning(error) # Pre-processing of self.tx_hgvs_not_delins try: if self.tx_hgvs_not_delins.posedit.edit.alt is None: @@ -412,7 +412,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): hgvs_genomic = self.validator.myevm_t_to_g(hgvs_coding, self.variant.no_norm_evm, self.variant.primary_assembly, self.variant.hn) - logger.warning('g_to_t gap code 1 active') + Logger.warning('g_to_t gap code 1 active') rn_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) self.hgvs_genomic_possibilities.append(rn_hgvs_genomic) @@ -600,14 +600,14 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # direct mapping from reverse_normalized transcript insertions in the delins format self.rev_norm_ins(hgvs_coding, hgvs_genomic) - logger.info('\nGENOMIC POSSIBILITIES') + Logger.info('\nGENOMIC POSSIBILITIES') for possibility in self.hgvs_genomic_possibilities: if possibility == '': - logger.info('X') + Logger.info('X') else: - logger.info(fn.valstr(possibility)) + Logger.info(fn.valstr(possibility)) - logger.info('\n') + Logger.info('\n') # Set variables for problem specific warnings self.gapped_transcripts = '' @@ -909,7 +909,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): 'Unsupported normalization of variants spanning the exon-intron boundary' in error: hgvs_refreshed_variant = saved_hgvs_coding else: - logger.warning(error) + Logger.warning(error) continue # Quick check to make sure the coding variant has not changed @@ -959,7 +959,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): 'descriptions: If you are unsure, please contact admin' self.auto_info = self.auto_info.replace('\n', ': ') self.variant.warnings.append(self.auto_info) - logger.warning(self.auto_info) + Logger.warning(self.auto_info) # Normailse hgvs_genomic try: hgvs_genomic = self.variant.hn.normalize(hgvs_genomic) @@ -992,7 +992,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): return hgvs_genomic, suppress_c_normalization, hgvs_coding def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): - logger.warning('g_to_t gap code 2 active') + Logger.warning('g_to_t gap code 2 active') hgvs_genomic_variant = hgvs_genomic reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic_variant) @@ -1083,7 +1083,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): self.validator.vm.g_to_t(hgvs_not_delins, self.tx_hgvs_not_delins.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': - logger.warning(str(e)) + Logger.warning(str(e)) return True try: self.variant.hn.normalize(self.tx_hgvs_not_delins) @@ -1092,7 +1092,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): if 'Normalization of intronic variants is not supported' in error or \ 'Unsupported normalization of variants spanning the exon-intron boundary' in error: if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: - logger.warning(error) + Logger.warning(error) return True elif 'Normalization of intronic variants is not supported' in error: # We know that this cannot be because of an intronic variant, so must be aligned to tx gap @@ -1148,7 +1148,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a self.orientation = int(ori[0]['alt_strand']) hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) - logger.warning('g_to_t gap code 3 active') + Logger.warning('g_to_t gap code 3 active') rn_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_alt_genomic) self.hgvs_genomic_possibilities.append(rn_hgvs_genomic) if self.orientation != -1: diff --git a/VariantValidator/modules/logger.py b/VariantValidator/modules/logger.py new file mode 100644 index 00000000..e074d9b2 --- /dev/null +++ b/VariantValidator/modules/logger.py @@ -0,0 +1,154 @@ +import logging +import datetime +import os +from io import StringIO + +VALIDATOR_DEBUG = os.environ.get('VALIDATOR_DEBUG') + + +class Logger(): + """ + Grand unified variant validator logging static class. + """ + # logString=StringIO() + + @staticmethod + def loggingSetup(): + """ + Set up logging + I need to use the VVObfuscator in the logger global dictionary + becuase it's a global variable tied to the logger module + Modules are singletons, but their variables are not. Consequently + this is the only sensible way to ensure that the logging setup is called + once. If another programmer has any better ideas that leave these functions + with a configured VV logger object that only has its handlers added once, + feel free to fix it up. + """ + # print("Entering setup") + # The logger must be at the very least drawn from the logging library's dictionary + # for every time this module is imported. + Logger.logger = logging.getLogger("VV") + if "VVObfuscator" in logging.Logger.manager.loggerDict: + return + logging.getLogger("VVObfuscator") + # print("Engaging setup") + + global VALIDATOR_DEBUG + # Check environment variables + VALIDATOR_DEBUG = os.environ.get('VALIDATOR_DEBUG') + # print("VD",os.environ.get('VALIDATOR_DEBUG')) + + if VALIDATOR_DEBUG is None: + VALIDATOR_DEBUG = "info console" # Set default value + # Set logging urgency levels. + if "debug" in VALIDATOR_DEBUG: + loglevel = logging.DEBUG + elif "warning" in VALIDATOR_DEBUG: + loglevel = logging.WARNING + elif "info" in VALIDATOR_DEBUG: + loglevel = logging.INFO + elif "error" in VALIDATOR_DEBUG: + loglevel = logging.ERROR + elif "critical" in VALIDATOR_DEBUG: + loglevel = logging.CRITICAL + else: + loglevel = logging.WARNING + + if "file" in VALIDATOR_DEBUG: + logFileHandler = logging.FileHandler("VV-log.txt") + logFileHandler.setLevel(loglevel) + Logger.logger.addHandler(logFileHandler) + + if "console" in VALIDATOR_DEBUG: + logConsoleHandler = logging.StreamHandler() + logConsoleHandler.setLevel(loglevel) + Logger.logger.addHandler(logConsoleHandler) + + # Create a log string to add to validations. + # Since it has to survive multiple imports, I'm stuffing it into the logger dictionary. + # Feel free to amend this coding monstrosity without my knowledge. + logging.Logger.manager.loggerDict["VVLogString"] = StringIO() + logStringHandler = logging.StreamHandler(logging.Logger.manager.loggerDict["VVLogString"]) + # We want the validation metadata to not contain debug info which may change with program operation + logStringHandler.setLevel(logging.INFO) + Logger.logger.addHandler(logStringHandler) + Logger.logger.setLevel(logging.DEBUG) # The logger itself must be set with an appropriate level of urgency. + + Logger.logger.propagate = False + + @staticmethod + def debug(s): + Logger.loggingSetup() + Logger.logger.debug("DEBUG: " + s) + + @staticmethod + def info(s): + Logger.loggingSetup() + Logger.logger.info("INFO : " + s) + + @staticmethod + def warning(s): + Logger.loggingSetup() + Logger.logger.warning("WARN : " + s) + + @staticmethod + def error(s): + Logger.loggingSetup() + Logger.logger.error("ERROR: " + s) + + @staticmethod + def critical(s): + Logger.loggingSetup() + Logger.logger.critical("CRIT : " + s) + + @staticmethod + def trace(s, v=None): + # v should be a variant object with a 'timing' attribute. + # global VALIDATOR_DEBUG + # print(VALIDATOR_DEBUG) + # if "trace" in VALIDATOR_DEBUG: + # logger.loggingSetup() + if not v: + Logger.logger.debug("TRACE: " + s) + else: + Logger.logger.debug("TRACE: " + s) + v.timing['traceLabels'].append(s) + v.timing['traceTimes'].append(str((datetime.datetime.now() - v.timing['checkDT']).microseconds//1000)) + v.timing['checkDT'] = datetime.datetime.now() + + @staticmethod + def resub(s): + # Resubmit one or multiple variants + Logger.loggingSetup() + Logger.logger.warning("RESUB: " + s) + + @staticmethod + def getString(): + Logger.loggingSetup() + # print("RETURNING:") + # print(logging.Logger.manager.loggerDict["VVLogString"].getvalue()) + return logging.Logger.manager.loggerDict["VVLogString"].getvalue() + + @staticmethod + def traceStart(v): + Logger.loggingSetup() +# global VALIDATOR_DEBUG +# if "trace" in VALIDATOR_DEBUG: + if True: + v.timing = {} + v.timing['traceLabels'] = [] + v.timing['traceTimes'] = [] + v.timing['startDT'] = datetime.datetime.now() + v.timing['checkDT'] = datetime.datetime.now() + + @staticmethod + def traceEnd(v): + Logger.loggingSetup() + # global VALIDATOR_DEBUG + # if "trace" in VALIDATOR_DEBUG: + if True: + v.timing['traceLabels'].append("complete") + v.timing['traceTimes'].append((datetime.datetime.now() - v.timing['startDT']).microseconds//1000) + del v.timing['startDT'] + del v.timing['checkDT'] + diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 18d9acc6..f8e97192 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -2,7 +2,7 @@ import re import copy import hgvs.exceptions -from .vvLogging import logger +from .logger import Logger from . import vvHGVS from .variant import Variant from . import seq_data @@ -23,7 +23,7 @@ def gene_to_transcripts(variant, validator): error = 'Reference sequence ' + variant.hgvs_genomic.ac + ' is either not supported or does not exist' if error != 'false': variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True # Set test to see if Norm alters the coords @@ -104,7 +104,7 @@ def gene_to_transcripts(variant, validator): error = 'Mapping unavailable for RefSeqGene ' + str(variant.hgvs_formatted) + \ ' using alignment method = ' + validator.alt_aln_method variant.warnings.append(error) - logger.warning(str(error)) + Logger.warning(str(error)) return True # Chromosome build is not supported or intergenic??? @@ -116,7 +116,7 @@ def gene_to_transcripts(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - logger.warning(str(error)) + Logger.warning(str(error)) return True else: # Map to RefSeqGene if available @@ -137,13 +137,13 @@ def gene_to_transcripts(variant, validator): variant.warnings.append(error) variant.genomic_g = fn.valstr(variant.hgvs_genomic) variant.genomic_r = str(rsg_data.split('(')[0]) - logger.warning(str(error)) + Logger.warning(str(error)) return True else: error = 'Please ensure the requested chromosome version relates to a supported genome build. ' \ 'Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' variant.warnings.append(error) - logger.warning(str(error)) + Logger.warning(str(error)) return True else: @@ -178,7 +178,7 @@ def gene_to_transcripts(variant, validator): query = Variant(variant.original, quibble=str(c_description), warnings=variant.warnings, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) - logger.warning("Continue reached when mapping transcript types to variants") + Logger.warning("Continue reached when mapping transcript types to variants") # Call next description return True return False @@ -228,21 +228,21 @@ def transcripts_to_gene(variant, validator): 'supported: Use the Gene to Transcripts function to determine whether an updated ' \ 'transcript reference sequence is available' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True errors = ['Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive', 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' 'available transcripts' % tx_ac.split('.')[0]] variant.warnings.extend(errors) - logger.warning(str(errors)) + Logger.warning(str(errors)) return True except TypeError: errors = ['Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive', 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' 'available transcripts' % tx_ac.split('.')[0]] variant.warnings.extend(errors) - logger.warning(str(errors)) + Logger.warning(str(errors)) return True # Get orientation of the gene wrt genome and a list of exons mapped to the genome @@ -257,14 +257,14 @@ def transcripts_to_gene(variant, validator): error = "If the following error message does not address the issue and the problem persists please " \ "contact admin: " + str(to_g) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: error = "If the following error message does not address the issue and the problem persists please " \ "contact admin: " + str(to_g) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: @@ -288,14 +288,14 @@ def transcripts_to_gene(variant, validator): error = "If the following error message does not address the issue and the problem persists " \ "please contact admin: " + str(to_g) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: error = "If the following error message does not address the issue and the problem persists " \ "please contact admin: " + str(to_g) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: # Insertions at exon boundaries are miss-handled by vm.g_to_t @@ -322,7 +322,7 @@ def transcripts_to_gene(variant, validator): automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. ' \ 'Please refer to https://www35.lamp.le.ac.uk/recommendations/' variant.warnings.extend([caution, automap]) - logger.warning(caution + ": " + automap) + Logger.warning(caution + ": " + automap) else: formatted_variant = str(h_variant) @@ -331,7 +331,7 @@ def transcripts_to_gene(variant, validator): valid = True else: variant.warnings.append(str(error)) - logger.warning(str(error)) + Logger.warning(str(error)) return True # Tackle the plus intronic offset @@ -418,7 +418,7 @@ def transcripts_to_gene(variant, validator): post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) except hgvs.exceptions.HGVSError as error: variant.warnings.append(str(error)) - logger.warning(str(error)) + Logger.warning(str(error)) return True test = validator.hp.parse_hgvs_variant(quibble_input) @@ -519,7 +519,7 @@ def transcripts_to_gene(variant, validator): error = "If the following error message does not address the issue and the problem persists " \ "please contact admin: " + to_g variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: @@ -596,7 +596,7 @@ def transcripts_to_gene(variant, validator): if caution == '': caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) variant.warnings.append(caution) - logger.warning(caution) + Logger.warning(caution) # Apply validation to intronic variant descriptions (should be valid but make sure) error = validator.validateHGVS(genomic_validation) @@ -637,11 +637,11 @@ def transcripts_to_gene(variant, validator): gap_compensation = False except hgvs.exceptions.HGVSError as error: variant.warnings.append(str(error)) - logger.warning(str(error)) + Logger.warning(str(error)) return True # Warn status - logger.warning("gap_compensation_1 = " + str(gap_compensation)) + Logger.warning("gap_compensation_1 = " + str(gap_compensation)) # Genomic sequence hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn) @@ -668,7 +668,7 @@ def transcripts_to_gene(variant, validator): # --- GAP MAPPING 2 --- # Loop out gap finding code under these circumstances! - logger.warning("gap_compensation_2 = " + str(gap_compensation)) + Logger.warning("gap_compensation_2 = " + str(gap_compensation)) if gap_compensation is True: hgvs_coding = gap_mapper.g_to_t_gapped_mapping_stage2(ori, hgvs_coding, hgvs_genomic) @@ -710,7 +710,7 @@ def transcripts_to_gene(variant, validator): if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] else: - logger.error(error) + Logger.error(error) return True # Gene orientation wrt genome @@ -875,7 +875,7 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): fn.exceptPass() # Warn gap code status - logger.warning("gap_compensation_3 = " + str(gap_compensation)) + Logger.warning("gap_compensation_3 = " + str(gap_compensation)) multi_g = [] multi_list = [] mapping_options = validator.hdp.get_tx_mapping_options(variant.hgvs_coding.ac) @@ -911,7 +911,7 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ 'genomic reference sequence %s' % (variant.hgvs_coding.ac, alt_chr) except hgvs.exceptions.HGVSError as e: - logger.error(str(e)) - logger.debug(str(e)) + Logger.error(str(e)) + Logger.debug(str(e)) return multi_g diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index 436dd769..5151bc3b 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -1,7 +1,7 @@ import re import hgvs from . import vvFunctions as fn -from .vvLogging import logger +from .logger import Logger import copy @@ -15,7 +15,7 @@ def refseq_common_mistakes(variant): error = 'Transcript reference sequence input as genomic (g.) reference sequence. ' \ 'Did you mean ' + suggestion + '?' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True # NR_ c. if variant.quibble.startswith('NR_') and variant.reftype == ':c.': @@ -23,7 +23,7 @@ def refseq_common_mistakes(variant): error = 'Non-coding transcript reference sequence input as coding (c.) reference sequence. ' \ 'Did you mean ' + suggestion + '?' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True # NM_ n. if variant.quibble.startswith('NM_') and variant.reftype == ':n.': @@ -31,7 +31,7 @@ def refseq_common_mistakes(variant): error = 'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. ' \ 'Did you mean ' + suggestion + '?' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True # NM_ NC_ NG_ NR_ p. @@ -40,7 +40,7 @@ def refseq_common_mistakes(variant): error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is ' \ 'not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True # NG_ c or NC_c.. @@ -49,7 +49,7 @@ def refseq_common_mistakes(variant): error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has ' \ 'also been provided e.g. NG_(NM_):c.PositionVariation' variant.warnings.extend([error, suggestion]) - logger.warning(error) + Logger.warning(error) return True return False @@ -93,7 +93,7 @@ def structure_checks_g(variant, validator): and not variant.quibble.startswith('NT_') and not variant.quibble.startswith('NW_'): error = 'Invalid reference sequence identifier (' + variant.input_parses.ac + ')' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True try: @@ -101,7 +101,7 @@ def structure_checks_g(variant, validator): except Exception as e: error = str(e) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True # Additional test @@ -110,7 +110,7 @@ def structure_checks_g(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True return False @@ -139,14 +139,14 @@ def structure_checks_c(variant, validator): except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True actual_ref = to_n.posedit.edit.ref if called_ref != actual_ref: error = 'Variant reference (' + called_ref + ') does not agree with reference sequence ' \ '(' + actual_ref + ')' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: variant.input_parses.posedit.edit.ref = '' @@ -205,7 +205,7 @@ def structure_checks_c(variant, validator): except Exception: fn.exceptPass() variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True try: @@ -213,7 +213,7 @@ def structure_checks_c(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - logger.warning(e) + Logger.warning(e) return True if 'n.1-' in str(variant.input_parses): @@ -225,7 +225,7 @@ def structure_checks_c(variant, validator): genomic_position = variant.hn.normalize(genomic_position) error = error + fn.valstr(genomic_position) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True # Re-map input_parses back to c. variant @@ -285,7 +285,7 @@ def structure_checks_c(variant, validator): except Exception: fn.exceptPass() variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True except hgvs.exceptions.HGVSDataNotAvailableError as e: @@ -303,7 +303,7 @@ def structure_checks_c(variant, validator): + variant.input_parses.ac + ' can only be partially aligned to genomic reference ' \ 'sequences ' + acs variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True elif re.search(r'\d-', str(variant.input_parses)) or re.search(r'\d\+', str(variant.input_parses)): @@ -323,11 +323,11 @@ def structure_checks_c(variant, validator): error = 'Using a transcript reference sequence to specify a variant position that lies outside of '\ 'the reference sequence is not HGVS-compliant. Instead re-submit ' + fn.valstr(report_gen) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True elif 'insertion length must be 1' in error: variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True elif 'base start position must be <= end position' in error: correction = copy.deepcopy(variant.input_parses) @@ -337,7 +337,7 @@ def structure_checks_c(variant, validator): correction.posedit.pos.end = st error = error + ': Did you mean ' + str(correction) + '?' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True # Create a specific minimal evm with no normalizer and no replace_reference @@ -351,7 +351,7 @@ def structure_checks_c(variant, validator): 'Transcript Archive', 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' 'available transcripts' % variant.input_parses.ac.split('.')[0]] variant.warnings.extend(errors) - logger.warning(str(errors)) + Logger.warning(str(errors)) return True except ValueError as e: error = str(e) @@ -359,7 +359,7 @@ def structure_checks_c(variant, validator): error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end '\ 'position ' + str(variant.input_parses.posedit.pos.end) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) @@ -373,11 +373,11 @@ def structure_checks_c(variant, validator): error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end' \ ' position ' + str(variant.input_parses.posedit.pos.end) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True try: @@ -385,7 +385,7 @@ def structure_checks_c(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True try: @@ -393,7 +393,7 @@ def structure_checks_c(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: @@ -415,20 +415,20 @@ def structure_checks_c(variant, validator): error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end '\ 'position ' + str(variant.input_parses.posedit.pos.end) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True except hgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: error += ' (' + variant.input_parses.ac + ')' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True return False @@ -456,7 +456,7 @@ def structure_checks_n(variant, validator): if called_ref != actual_ref: error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' variant.warnings.append(error) - logger.warning(str(error)) + Logger.warning(str(error)) return True else: variant.input_parses.posedit.edit.ref = '' @@ -487,11 +487,11 @@ def structure_checks_n(variant, validator): except Exception: fn.exceptPass() variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True if 'n.1-' in str(variant.input_parses): @@ -502,7 +502,7 @@ def structure_checks_n(variant, validator): genomic_position = variant.hn.normalize(genomic_position) error = error + fn.valstr(genomic_position) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True if re.search(r'\d-', str(variant.input_parses)) or re.search(r'\d\+', str(variant.input_parses)): @@ -522,11 +522,11 @@ def structure_checks_n(variant, validator): error = 'Using a transcript reference sequence to specify a variant position that lies outside of '\ 'the reference sequence is not HGVS-compliant. Instead re-submit ' + fn.valstr(report_gen) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True elif 'insertion length must be 1' in error: variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True elif 'base start position must be <= end position' in error: correction = copy.deepcopy(variant.input_parses) @@ -537,7 +537,7 @@ def structure_checks_n(variant, validator): error = error + ': Did you mean ' + str(correction) + '?' # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True elif 'Cannot validate sequence of an intronic variant' in error: try: @@ -554,7 +554,7 @@ def structure_checks_n(variant, validator): 'outside of the reference sequence is not HGVS-compliant. Instead re-submit ' + \ fn.valstr(report_gen) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True # Create a specific minimal evm with no normalizer and no replace_reference @@ -568,7 +568,7 @@ def structure_checks_n(variant, validator): 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' 'available transcripts' % variant.input_parses.ac.split('.')[0]] variant.warnings.extend(errors) - logger.warning(str(errors)) + Logger.warning(str(errors)) return True except ValueError as e: error = str(e) @@ -577,7 +577,7 @@ def structure_checks_n(variant, validator): variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( variant.input_parses.posedit.pos.end) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) @@ -592,14 +592,14 @@ def structure_checks_n(variant, validator): variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( variant.input_parses.posedit.pos.end) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True try: validator.vr.validate(output) except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True else: @@ -638,22 +638,22 @@ def structure_checks_n(variant, validator): error = 'Interval start position ' + str( variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( variant.input_parses.posedit.pos.end) - logger.warning(error) + Logger.warning(error) variant.warnings.append(error) return True variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True except hgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: error = error + ' (' + variant.input_parses.ac + ')' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True return False diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index f768f280..2be346a3 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -1,6 +1,6 @@ import os from .vvLiftover import liftover as lift_over -from .vvLogging import logger +from .logger import Logger class ValOutput(object): @@ -140,7 +140,7 @@ def add_meta(self): if os.environ.get("ADD_LOGS") == "True": logs = [] - for l in logger.getString().split("\n"): + for l in Logger.getString().split("\n"): logs.append(l) metadata["logs"] = logs diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index edd9d363..5cca2928 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -1,5 +1,5 @@ from .vvFunctions import handleCursor -from .vvLogging import logger +from .logger import Logger from . import vvDBInit @@ -13,7 +13,7 @@ def execute(self, query): self.cursor.execute(query) row = self.cursor.fetchone() if row is None: - logger.debug("No data returned from query "+str(query)) + Logger.debug("No data returned from query " + str(query)) row = ['none', 'No data'] return row @@ -22,7 +22,7 @@ def executeAll(self, query): self.cursor.execute(query) rows = self.cursor.fetchall() if rows == []: - logger.debug("No data returned from query "+str(query)) + Logger.debug("No data returned from query " + str(query)) rows = ['none', 'No data'] return rows diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 4a19807d..aa6388d1 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -1,4 +1,4 @@ -from .vvLogging import logger +from .logger import Logger from . import vvFunctions as fn from .vvFunctions import handleCursor #from vvDBInsert import vvDBInsert @@ -32,7 +32,7 @@ def query_with_fetchone(self,entry, table): row = self.cursor.fetchone() if row is None: row = ['none', 'No data'] - logger.debug("No data returned from query "+str(query)) + Logger.debug("No data returned from query " + str(query)) return row # From data def data_add(self, accession, validator): @@ -179,7 +179,7 @@ def update_vv_data(self): self.update_lrg() # From update_refseqgene_nomissmatch.py def update_rsg(self): - logger.info('Updating RefSeqGene no Missmatch MySQL data') + Logger.info('Updating RefSeqGene no Missmatch MySQL data') # Set os path # Set up os paths data and log folders ROOT = os.path.dirname(os.path.abspath(__file__)) @@ -374,7 +374,7 @@ def update_rsg(self): line.append(known[line[0]]['gene_id']) except KeyError: check = obsolete[line[0]] - logger.info(str(line[0]) + ' : ' + check) + Logger.info(str(line[0]) + ' : ' + check) # Open a text file to be used as a simple database and write the database # rsg_db = open(os.path.join(ROOT, 'rsg_chr_db.txt'), 'w') @@ -420,14 +420,14 @@ def update_rsg(self): # Close database # rsg_db.close() - logger.info( 'Total NG_ to NC_ alignments = ' + str(total_rsg_to_nc)) - logger.info( 'Gapps within NG_ to NC_ alignments = ' + str(total_rsg_to_nc_rejected)) + Logger.info('Total NG_ to NC_ alignments = ' + str(total_rsg_to_nc)) + Logger.info('Gapps within NG_ to NC_ alignments = ' + str(total_rsg_to_nc_rejected)) - logger.info( 'complete') + Logger.info('complete') return #from compile_lrg_data, this function was originally just called "update" def update_lrg(self): - logger.info('Updating LRG lookup tables') + Logger.info('Updating LRG lookup tables') lr2rs_download = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt') # Open and read lr2rs_data = urllib.request.urlopen(lr2rs_download) @@ -480,7 +480,7 @@ def update_lrg(self): # LRG_ID RefSeqTranscriptID # LRG_T2LRG_P - logger.info( 'Update LRG and LRG_transcript lookup tables' ) + Logger.info('Update LRG and LRG_transcript lookup tables') # Populate lists lrg_rs_lookup (LRG to RefSeqGene) and lrg_t2nm_ (LRG Transcript to RefSeq Transcript) for line in lr2rs: if re.search('^#', line): @@ -506,7 +506,7 @@ def update_lrg(self): # update database self.update_lrgt_rst(lrgtx_to_rstID) - logger.info( 'Update LRG protein lookup table') + Logger.info('Update LRG protein lookup table') # Populate LRG protein RefSeqProtein lokup table for line in lr_t2p: if re.search('^#', line): @@ -519,7 +519,7 @@ def update_lrg(self): # update LRG to RefSeqGene database self.update_lrg_p_rs_p_lookup(lrg_p, rs_p) - logger.info('LRG lookup tables updated') + Logger.info('LRG lookup tables updated') return #From ref_seq_type def ref_type_assign(self,accession): diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/vvFunctions.py index 6257ae71..519fe67f 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/vvFunctions.py @@ -7,7 +7,7 @@ import functools import traceback import sys -from .vvLogging import logger +from .logger import Logger import re import copy import mysql @@ -99,11 +99,11 @@ def exceptPass(validation=None): tbk = [str(exc_type), str(exc_value), str(te)] er = str('\n'.join(tbk)) if last_traceback: - logger.warning( + Logger.warning( "Except pass for " + str(exc_type) + " " + str(exc_value) + " at line " + str(last_traceback.tb_lineno)) else: - logger.warning("Except pass for " + str(exc_type) + " " + str(exc_value)) - logger.debug(er) + Logger.warning("Except pass for " + str(exc_type) + " " + str(exc_value)) + Logger.debug(er) # From functions.py def user_input(input): diff --git a/VariantValidator/modules/vvLiftover.py b/VariantValidator/modules/vvLiftover.py index 8150fbc5..01ee9454 100644 --- a/VariantValidator/modules/vvLiftover.py +++ b/VariantValidator/modules/vvLiftover.py @@ -12,7 +12,7 @@ import os from . import seq_data from . import vvHGVS -from .vvLogging import logger +from .logger import Logger from pyliftover import LiftOver from Bio.Seq import Seq @@ -250,7 +250,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no accession = seq_data.to_accession(chr, lo_to) if accession is None: wrn = 'Unable to identify an equivalent %s chromosome ID for %s' % (str(lo_to), str(chr)) - logger.warning(wrn) + Logger.warning(wrn) continue else: not_delins = accession + ':g.' + str(pos) + '_' + str( @@ -260,7 +260,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no try: vr.validate(hgvs_not_delins) except hgvs.exceptions.HGVSError as e: - logger.warning(str(e)) + Logger.warning(str(e)) # Most likely incorrect bases continue else: diff --git a/VariantValidator/modules/vvLogging.py b/VariantValidator/modules/vvLogging.py deleted file mode 100644 index b7a3d7c3..00000000 --- a/VariantValidator/modules/vvLogging.py +++ /dev/null @@ -1,148 +0,0 @@ - -import logging -import datetime -import os -from io import StringIO - -VALIDATOR_DEBUG=os.environ.get('VALIDATOR_DEBUG') - -class logger(): - ''' - #Grand unified variant validator logging static class. - ''' - #logString=StringIO() - @staticmethod - def loggingSetup(): - ''' - # Set up logging - # I need to use the VVObfuscator in the logger global dictionary - # becuase it's a global variable tied to the logger module - # Modules are singletons, but their variables are not. Consequently - # this is the only sensible way to ensure that the logging setup is called - # once. If another programmer has any better ideas that leave these functions - # with a configured VV logger object that only has its handlers added once, - # feel free to fix it up. - ''' - #print("Entering setup") - #The logger must be at the very least drawn from the logging library's dictionary - #for every time this module is imported. - logger.logger = logging.getLogger("VV") - if "VVObfuscator" in logging.Logger.manager.loggerDict: - return - logging.getLogger("VVObfuscator") - #print("Engaging setup") - - global VALIDATOR_DEBUG - # Check envrionment variables - VALIDATOR_DEBUG=os.environ.get('VALIDATOR_DEBUG') - #print("VD",os.environ.get('VALIDATOR_DEBUG')) - - if VALIDATOR_DEBUG is None: - VALIDATOR_DEBUG = "info console" # Set default value - # Set logging urgency levels. - if "debug" in VALIDATOR_DEBUG: - logLevel = logging.DEBUG - elif "warning" in VALIDATOR_DEBUG: - logLevel = logging.WARNING - elif "info" in VALIDATOR_DEBUG: - logLevel = logging.INFO - elif "error" in VALIDATOR_DEBUG: - logLevel = logging.ERROR - elif "critical" in VALIDATOR_DEBUG: - logLevel = logging.CRITICAL - - if "file" in VALIDATOR_DEBUG: - logFileHandler = logging.FileHandler("VV-log.txt") - logFileHandler.setLevel(logLevel) - logger.logger.addHandler(logFileHandler) - if "console" in VALIDATOR_DEBUG: - logConsoleHandler = logging.StreamHandler() - logConsoleHandler.setLevel(logLevel) - logger.logger.addHandler(logConsoleHandler) - # Create a log string to add to validations. - # Since it has to survive multiple imports, I'm stuffing it into the logger dictionary. - # Feel free to amend this coding monstrosity without my knowledge. - logging.Logger.manager.loggerDict["VVLogString"]=StringIO() - logStringHandler = logging.StreamHandler(logging.Logger.manager.loggerDict["VVLogString"]) - # We want the validation metadata to not contain debug info which may change with program operation - logStringHandler.setLevel(logging.INFO) - logger.logger.addHandler(logStringHandler) - logger.logger.setLevel(logging.DEBUG) # The logger itself must be set with an appropriate level of urgency. - - logger.logger.propagate = False - @staticmethod - def debug(s): - logger.loggingSetup() - logger.logger.debug("DEBUG: "+s) - @staticmethod - def info(s): - logger.loggingSetup() - logger.logger.info("INFO : "+s) - @staticmethod - def warning(s): - logger.loggingSetup() - logger.logger.warning("WARN : "+s) - @staticmethod - def error(s): - logger.loggingSetup() - logger.logger.error("ERROR: "+s) - @staticmethod - def critical(s): - logger.loggingSetup() - logger.logger.critical("CRIT : "+s) - @staticmethod - def trace(s, v=None): - #v should be a variant object with a 'timing' attribute. - #global VALIDATOR_DEBUG - #print(VALIDATOR_DEBUG) - #if "trace" in VALIDATOR_DEBUG: - # logger.loggingSetup() - if not v: - logger.logger.debug("TRACE: "+s) - else: - logger.logger.debug("TRACE: "+s) - v.timing['traceLabels'].append(s) - v.timing['traceTimes'].append(str((datetime.datetime.now() - v.timing['checkDT']).microseconds//1000)) - v.timing['checkDT'] = datetime.datetime.now() - @staticmethod - def resub(s): - #Resubmit one or multiple variants - logger.loggingSetup() - logger.logger.warning("RESUB: "+s) - @staticmethod - def getString(): - logger.loggingSetup() - #print("RETURNING:") - #print(logging.Logger.manager.loggerDict["VVLogString"].getvalue()) - return logging.Logger.manager.loggerDict["VVLogString"].getvalue() - @staticmethod - def traceStart(v): - logger.loggingSetup() -# global VALIDATOR_DEBUG -# if "trace" in VALIDATOR_DEBUG: - if True: - v.timing = {} - v.timing['traceLabels'] = [] - v.timing['traceTimes'] = [] - v.timing['startDT'] = datetime.datetime.now() - v.timing['checkDT'] = datetime.datetime.now() - @staticmethod - def traceEnd(v): - logger.loggingSetup() - #global VALIDATOR_DEBUG - #if "trace" in VALIDATOR_DEBUG: - if True: - v.timing['traceLabels'].append("complete") - v.timing['traceTimes'].append((datetime.datetime.now() - v.timing['startDT']).microseconds//1000) - del v.timing['startDT'] - del v.timing['checkDT'] - -#Test -#logger.debug("Message D") -#logger.info("Message I") -#logger.warning("Message W") -#logger.error("Message E") -#logger.critical("Message C")# - -#print("TEST "+logString.getvalue()) - diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 25341c8a..41221f34 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -2,7 +2,7 @@ import os import sys import copy -from .vvLogging import logger +from .logger import Logger import hgvs import hgvs.exceptions from hgvs.dataproviders import uta @@ -207,7 +207,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): # If the gene symbol is not in the list, the value False will be returned utilise_gap_code = seq_data.gap_black_list(gene_symbol) # Warn gap code in use - logger.warning("gap_compensation_myevm = " + str(utilise_gap_code)) + Logger.warning("gap_compensation_myevm = " + str(utilise_gap_code)) if utilise_gap_code is True and ( hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): @@ -534,7 +534,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): # Warn of variant location wrt the gap if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - logger.warning('Variant is proximal to the flank of a genomic gap') + Logger.warning('Variant is proximal to the flank of a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: hn.normalize(genomic_gap_variant) @@ -553,7 +553,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): genomic_gap_variant = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) if error_type_1 == 'base start position must be <= end position': - logger.warning('Variant is fully within a genomic gap') + Logger.warning('Variant is fully within a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) # Logic @@ -571,7 +571,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): # This will only happen if the variant is flanking the gap but is # not inside the gap - logger.warning('Variant is on the flank of a genomic gap but not within the gap') + Logger.warning('Variant is on the flank of a genomic gap but not within the gap') gap_start = genomic_gap_variant.posedit.pos.start.base - 1 gap_end = genomic_gap_variant.posedit.pos.end.base + 1 genomic_gap_variant.posedit.pos.start.base = gap_start @@ -856,7 +856,7 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) except Exception as e: error = str(e) - logger.warning('Ins mapping error in myt_to_g ' + error) + Logger.warning('Ins mapping error in myt_to_g ' + error) return hgvs_genomic @@ -1063,7 +1063,7 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) except Exception as e: error = str(e) - logger.warning('Ins mapping error in myt_to_g ' + error) + Logger.warning('Ins mapping error in myt_to_g ' + error) return hgvs_genomic @@ -1091,7 +1091,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # If the gene symbol is not in the list, the value False will be returned utilise_gap_code = seq_data.gap_black_list(gene_symbol) # Warn gap code in use - logger.warning("gap_compensation_mvm = " + str(utilise_gap_code)) + Logger.warning("gap_compensation_mvm = " + str(utilise_gap_code)) if utilise_gap_code is True and ( hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): @@ -1272,7 +1272,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # Warn of variant location wrt the gap if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): - logger.warning('Variant is proximal to the flank of a genomic gap') + Logger.warning('Variant is proximal to the flank of a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: hn.normalize(genomic_gap_variant) @@ -1290,7 +1290,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): genomic_gap_variant = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) if error_type_1 == 'base start position must be <= end position': - logger.warning('Variant is fully within a genomic gap') + Logger.warning('Variant is fully within a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) # Logic @@ -1308,7 +1308,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): # This will only happen if the variant is flanking the gap but is # not inside the gap - logger.warning('Variant is on the flank of a genomic gap but not within the gap') + Logger.warning('Variant is on the flank of a genomic gap but not within the gap') gap_start = genomic_gap_variant.posedit.pos.start.base - 1 gap_end = genomic_gap_variant.posedit.pos.end.base + 1 genomic_gap_variant.posedit.pos.start.base = gap_start @@ -1592,7 +1592,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) except Exception as e: error = str(e) - logger.warning('Ins mapping error in myt_to_g ' + error) + Logger.warning('Ins mapping error in myt_to_g ' + error) return hgvs_genomic diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 32bdc9ec..65cbd436 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -6,7 +6,7 @@ import sys import traceback from hgvs.assemblymapper import AssemblyMapper -from .vvLogging import logger +from .logger import Logger from . import vvHGVS from . import vvFunctions as fn from . import seq_data @@ -34,13 +34,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr :param transcript_set: 'refseq' or 'ensembl'. Currently only 'refseq' is supported :return: """ - logger.info(batch_variant + ' : ' + selected_assembly) + Logger.info(batch_variant + ' : ' + selected_assembly) if transcript_set == "refseq": self.alt_aln_method = 'splign' elif transcript_set == "ensembl": self.alt_aln_method = 'genebuild' - logger.warning("Ensembl is currently not supported") + Logger.warning("Ensembl is currently not supported") raise Exception("Ensembl is currently not supported") else: raise Exception("The transcriptSet variable '%s' is invalid, it must be 'refseq' or 'ensembl'" % @@ -96,10 +96,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr flag : gene """ - logger.debug("Batch list length " + str(len(self.batch_list))) + Logger.debug("Batch list length " + str(len(self.batch_list))) for my_variant in self.batch_list: # Start timing - logger.traceStart(my_variant) + Logger.traceStart(my_variant) # Create Normalizers my_variant.hn = hgvs.normalizer.Normalizer(self.hdp, @@ -121,7 +121,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: # Note, ID is not touched. It is always the input variant description. # Quibble will be altered but id will not if type = g. - logger.trace("Commenced validation of " + str(my_variant.quibble), my_variant) + Logger.trace("Commenced validation of " + str(my_variant.quibble), my_variant) if not my_variant.is_ascii(): chars, positions = my_variant.get_non_ascii() @@ -129,7 +129,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'Please remove this character and re-submit: A useful search function for ' \ 'Unicode characters can be found at https://unicode-search.net/' % (chars, positions) my_variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) continue # Remove whitespace @@ -137,7 +137,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if my_variant.quibble != my_variant.original: caution = 'Whitespace removed from variant description %s' % my_variant.original my_variant.warnings.append(caution) - logger.info(caution) + Logger.info(caution) # Set the primary_assembly if not my_variant.primary_assembly: @@ -160,12 +160,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr primary_assembly = 'GRCh38' my_variant.warnings.append('Invalid genome build has been specified. Automap has selected ' 'the default build (GRCh38)') - logger.warning( + Logger.warning( 'Invalid genome build has been specified. Automap has selected the ' 'default build ' + my_variant.primary_assembly) else: primary_assembly = my_variant.primary_assembly - logger.trace("Completed string formatting", my_variant) + Logger.trace("Completed string formatting", my_variant) toskip = format_converters.initial_format_conversions(my_variant, self, select_transcripts_dict_plus_version) @@ -182,7 +182,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = 'Variant description ' + my_variant.quibble + ' is not in an accepted format' my_variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) continue formatted_variant = my_variant.quibble @@ -192,14 +192,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgnc_gene_info = 'false' - logger.trace("Variant input formatted, proceeding to validate.", my_variant) + Logger.trace("Variant input formatted, proceeding to validate.", my_variant) # Conversions # Conversions are not currently supported. The HGVS format for conversions # is rarely seen wrt genomic sequencing data and needs to be re-evaluated if 'con' in my_variant.quibble: my_variant.warnings.append('Gene conversions currently unsupported') - logger.warning('Gene conversions currently unsupported') + Logger.warning('Gene conversions currently unsupported') continue # Change RNA bases to upper case but nothing else @@ -217,7 +217,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr my_variant.hgvs_formatted = input_parses except hgvs.exceptions.HGVSError as e: my_variant.warnings.append(str(e)) - logger.warning(str(e)) + Logger.warning(str(e)) continue if 'LRG' in my_variant.hgvs_formatted.ac: @@ -251,14 +251,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Unable to map ' + my_variant.hgvs_formatted.ac + \ ' to an equivalent RefSeq transcript' my_variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) continue else: my_variant.warnings.append(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + my_variant.quibble) - logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq ' + Logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq ' 'transcript ' + my_variant.quibble) - logger.trace("HVGS acceptance test passed", my_variant) + Logger.trace("HVGS acceptance test passed", my_variant) # Check whether supported genome build is requested for non g. descriptions mapable_assemblies = { @@ -301,7 +301,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Mapping of ' + formatted_variant + ' to genome assembly ' + \ primary_assembly + ' is not supported' my_variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) continue # Catch interval end > interval start @@ -322,13 +322,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if to_n.posedit.pos.end.base < to_n.posedit.pos.start.base: error = 'Interval end position < interval start position ' my_variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) continue elif my_variant.hgvs_formatted.posedit.pos.end.base < my_variant.hgvs_formatted.posedit.pos.start.base: error = 'Interval end position ' + str(my_variant.hgvs_formatted.posedit.pos.end.base) + \ ' < interval start position ' + str(my_variant.hgvs_formatted.posedit.pos.start.base) my_variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) continue # Catch missing version number in refseq @@ -337,26 +337,26 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'RefSeq variant accession numbers MUST include a version number' my_variant.warnings.append(error) continue - logger.trace("HVGS interval/version mapping complete", my_variant) + Logger.trace("HVGS interval/version mapping complete", my_variant) # handle LRG inputs if my_variant.refsource == 'LRG': format_converters.lrg_to_refseq(my_variant, self) - logger.trace("LRG check for conversion to refseq completed", my_variant) + Logger.trace("LRG check for conversion to refseq completed", my_variant) # Additional Incorrectly input variant capture training if my_variant.refsource == 'RefSeq': toskip = use_checking.refseq_common_mistakes(my_variant) if toskip: continue - logger.trace("Passed 'common mistakes' catcher", my_variant) + Logger.trace("Passed 'common mistakes' catcher", my_variant) # Primary validation of the input toskip = use_checking.structure_checks(my_variant, self) if toskip: continue - logger.trace("Variant structure and contents searches passed", my_variant) + Logger.trace("Variant structure and contents searches passed", my_variant) # Mitochondrial variants toskip = format_converters.mitochondrial(my_variant, self) @@ -395,7 +395,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr my_variant.output_type_flag = 'gene' my_variant.description = hgnc_gene_info my_variant.primary_assembly = primary_assembly - logger.traceEnd(my_variant) + Logger.traceEnd(my_variant) # Report errors to User and VV admin except KeyboardInterrupt: raise @@ -407,13 +407,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr te = traceback.format_exc() tbk = [str(exc_type), str(exc_value), str(te)] er = str('\n'.join(tbk)) - logger.error(str(exc_type) + " " + str(exc_value)) - logger.debug(er) + Logger.error(str(exc_type) + " " + str(exc_value)) + Logger.debug(er) raise # Outside the for loop ###################### - logger.trace("End of for loop") + Logger.trace("End of for loop") # order the rows by_order = sorted(self.batch_list, key=lambda x: x.order) @@ -695,7 +695,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except BaseException: # Debug mode exc_type, exc_value, last_traceback = sys.exc_info() - logger.critical(str(exc_type) + " " + str(exc_value)) + Logger.critical(str(exc_type) + " " + str(exc_value)) raise fn.VariantValidatorError('Validation error') def gene2transcripts(self, query): @@ -838,7 +838,7 @@ def hgvs2ref(self, query): :param query: :return: """ - logger.info('Fetching reference sequence for ' + query) + Logger.info('Fetching reference sequence for ' + query) # Dictionary to store the data reference = {'variant': query, 'start_position': '', @@ -881,7 +881,7 @@ def hgvs2ref(self, query): sequence = self.sf.fetch_seq(accession, start, end) except Exception as e: reference['error'] = str(e) - logger.warning(str(e)) + Logger.warning(str(e)) else: reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) @@ -904,7 +904,7 @@ def _get_transcript_info(self, variant): reason = "VariantValidator cannot recover information for transcript " + str( hgvs_vt.ac) + ' because it is not available in the Universal Transcript Archive' variant.warnings.append(reason) - logger.warning(str(reason) + ": " + str(error)) + Logger.warning(str(reason) + ": " + str(error)) return True # Get accurate transcript descriptions from the relevant databases @@ -924,7 +924,7 @@ def _get_transcript_info(self, variant): # Open a hgvs exception log file in append mode error = entry['description'] variant.warnings.extend([str(error), 'A Database error occurred, please contact admin']) - logger.warning(str(error) + ": A Database error occurred, please contact admin") + Logger.warning(str(error) + ": A Database error occurred, please contact admin") return True # If the accession key is found @@ -936,14 +936,14 @@ def _get_transcript_info(self, variant): except hgvs.exceptions.HGVSError: error = 'Transcript %s is not currently supported' % accession variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True except Exception: error = 'Unable to assign transcript identity records to ' + accession + \ ', potentially an obsolete record or there is an issue retrieving data from NCBI. ' \ 'Please try again later and if the problem persists contact admin' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True variant.description = entry['description'] else: @@ -953,12 +953,12 @@ def _get_transcript_info(self, variant): try: entry = self.db.data_add(accession=accession, validator=self) except Exception as e: - logger.warning(str(e)) + Logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + \ ', potentially an obsolete record or there is an issue retrieving data from NCBI. ' \ 'Please try again later and if the problem persists contact admin' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True variant.description = entry['description'] @@ -967,7 +967,7 @@ def _get_transcript_info(self, variant): # Open a hgvs exception log file in append mode error = 'Unknown error type' variant.warnings.extend([error, ': A Database error occurred, please contact admin']) - logger.warning(error) + Logger.warning(error) return True # Ensembl databases @@ -985,7 +985,7 @@ def _get_transcript_info(self, variant): # Open a hgvs exception log file in append mode error = entry['description'] variant.warnings.extend([str(error), ': A Database error occurred, please contact admin']) - logger.warning(str(error)) + Logger.warning(str(error)) return True # If the accession key is found @@ -1001,12 +1001,12 @@ def _get_transcript_info(self, variant): try: entry = self.db.data_add(accession=accession, validator=self) except Exception as e: - logger.warning(str(e)) + Logger.warning(str(e)) error = 'Unable to assign transcript identity records to ' + accession + \ ', potentially an obsolete record or there is an issue retrieving data from NCBI. ' \ 'Please try again later and if the problem persists contact admin' variant.warnings.append(error) - logger.warning(error) + Logger.warning(error) return True variant.description = entry['description'] @@ -1015,6 +1015,6 @@ def _get_transcript_info(self, variant): # Open a hgvs exception log file in append mode error = 'Unknown error type' variant.warnings.extend([error, ': A Database error occurred, please contact admin']) - logger.warning(error) + Logger.warning(error) return True return False diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 985e5bc5..cf9d676c 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -18,7 +18,7 @@ import re import copy from .vvDatabase import vvDatabase -from .vvLogging import logger +from .logger import Logger from . import vvFunctions as fn from VariantValidator.settings import CONFIG_DIR from VariantValidator.version import __version__ From 40cc4a9d04f054d9aa693d8fa0e2e333a9398ba1 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 15:27:31 +0100 Subject: [PATCH 135/223] Refactored vvHGVS --- VariantValidator/modules/gapped_mapping.py | 44 +-- .../modules/{vvHGVS.py => hgvs_utils.py} | 311 +++++++----------- VariantValidator/modules/mappers.py | 6 +- VariantValidator/modules/vvLiftover.py | 10 +- VariantValidator/modules/vvMixinConverters.py | 8 +- VariantValidator/modules/vvMixinCore.py | 14 +- 6 files changed, 164 insertions(+), 229 deletions(-) rename VariantValidator/modules/{vvHGVS.py => hgvs_utils.py} (75%) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index b8d05707..eb9825f0 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -4,7 +4,7 @@ import hgvs.exceptions from . import vvFunctions as fn -from . import vvHGVS +from . import hgvs_utils from .logger import Logger @@ -47,8 +47,8 @@ def gapped_g_to_c(self, rel_var): self.hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, - self.variant.reverse_normalizer, self.validator.sf) + vcf_dict = hgvs_utils.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) pos = vcf_dict['pos'] ref = vcf_dict['ref'] alt = vcf_dict['alt'] @@ -84,8 +84,8 @@ def gapped_g_to_c(self, rel_var): hgvs_stash.posedit.edit.alt = hgvs_stash.posedit.edit.alt.upper() # MAKE A NO NORM HGVS2VCF - stash_dict = vvHGVS.pos_lock_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, - self.variant.reverse_normalizer, self.validator.sf) + stash_dict = hgvs_utils.pos_lock_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) stash_ac = hgvs_stash.ac stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] @@ -332,8 +332,8 @@ def gapped_g_to_c(self, rel_var): hgvs_stash = copy.deepcopy(stash_hgvs_not_delins) stash_ac = hgvs_stash.ac # Make a hard left and hard right not delins g. - stash_dict_right = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, - self.variant.hn, self.validator.sf) + stash_dict_right = hgvs_utils.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, + self.variant.hn, self.validator.sf) stash_pos_right = int(stash_dict_right['pos']) stash_ref_right = stash_dict_right['ref'] stash_alt_right = stash_dict_right['alt'] @@ -341,8 +341,8 @@ def gapped_g_to_c(self, rel_var): stash_hgvs_not_delins_right = self.validator.hp.parse_hgvs_variant(stash_ac + ':' + hgvs_stash.type + '.' + str(stash_pos_right) + '_' + stash_end_right + 'del' + stash_ref_right + 'ins' + stash_alt_right) - stash_dict_left = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, - self.variant.reverse_normalizer, self.validator.sf) + stash_dict_left = hgvs_utils.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) stash_pos_left = int(stash_dict_left['pos']) stash_ref_left = stash_dict_left['ref'] stash_alt_left = stash_dict_left['alt'] @@ -440,7 +440,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): fn.exceptPass() try: stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, self.validator.sf) + stash_dict = hgvs_utils.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, self.validator.sf) stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] stash_alt = stash_dict['alt'] @@ -522,8 +522,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): fn.exceptPass() try: stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, - self.variant.reverse_normalizer, self.validator.sf) + stash_dict = hgvs_utils.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] stash_alt = stash_dict['alt'] @@ -672,8 +672,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): self.hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # Create VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, - self.variant.reverse_normalizer, self.validator.sf) + vcf_dict = hgvs_utils.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) pos = vcf_dict['pos'] ref = vcf_dict['ref'] alt = vcf_dict['alt'] @@ -997,8 +997,8 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): hgvs_genomic_variant = hgvs_genomic reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic_variant) self.hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, - self.variant.reverse_normalizer, self.validator.sf) + vcf_dict = hgvs_utils.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) chr = vcf_dict['chr'] pos = vcf_dict['pos'] ref = vcf_dict['ref'] @@ -1177,8 +1177,8 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a fn.exceptPass() try: stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, - self.validator.sf) + stash_dict = hgvs_utils.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, + self.validator.sf) stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] stash_alt = stash_dict['alt'] @@ -1258,8 +1258,8 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a fn.exceptPass() try: stash_ac = hgvs_stash.ac - stash_dict = vvHGVS.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, - self.variant.reverse_normalizer, self.validator.sf) + stash_dict = hgvs_utils.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) stash_pos = int(stash_dict['pos']) stash_ref = stash_dict['ref'] stash_alt = stash_dict['alt'] @@ -1398,8 +1398,8 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a # Store a copy for later use # Make VCF - vcf_dict = vvHGVS.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, - self.variant.reverse_normalizer, self.validator.sf) + vcf_dict = hgvs_utils.hgvs2vcf(reverse_normalized_hgvs_genomic, self.variant.primary_assembly, + self.variant.reverse_normalizer, self.validator.sf) chr = vcf_dict['chr'] pos = vcf_dict['pos'] ref = vcf_dict['ref'] diff --git a/VariantValidator/modules/vvHGVS.py b/VariantValidator/modules/hgvs_utils.py similarity index 75% rename from VariantValidator/modules/vvHGVS.py rename to VariantValidator/modules/hgvs_utils.py index 2a78fe89..d89d6721 100644 --- a/VariantValidator/modules/vvHGVS.py +++ b/VariantValidator/modules/hgvs_utils.py @@ -1,5 +1,5 @@ """ -A variety of functions that convert parder hgvs objects into VCF component parts +A variety of functions that convert parser hgvs objects into VCF component parts Each function has a slightly difference emphasis """ @@ -11,86 +11,85 @@ # Import Biopython modules from Bio.Seq import Seq import hgvs +import hgvs.exceptions # Database connections and hgvs objects are now passed from VariantValidator.py - # Error handling -class pseudoVCF2HGVSError(Exception): +class PseudoVCF2HGVSError(Exception): pass -def pvcf_to_hgvs(input, selected_assembly, normalization_direction, reverse_normalizer, validator): - ''' - :param input: pseudo_vcf string + + +def pvcf_to_hgvs(query, selected_assembly, normalization_direction, reverse_normalizer, validator): + """ + :param query: pseudo_vcf string :param selected_assembly: :param normalization_direction: normalization direction an integer, 5 or 3. :param reverse_normalizer: :param validator: :return: - ''' + """ # Set normalizer + selected_normalizer = None if normalization_direction == 3: selected_normalizer = validator.hn if normalization_direction == 5: selected_normalizer = reverse_normalizer # Gel stye pVCF - input = input.replace(':', '-') + query = query.replace(':', '-') + pre_input = copy.deepcopy(query) + vcf_elements = pre_input.split('-') # VCF type 1 - if re.search('-\d+-[GATC]+-[GATC]+', input): - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) - elif re.search('-\d+-[GATC]+-', input): - pre_input = copy.deepcopy(input) - vcf_elements = pre_input.split('-') - input = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) + if re.search(r'-\d+-[GATC]+-[GATC]+', query): + query = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) + elif re.search(r'-\d+-[GATC]+-', query): + query = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) else: - raise pseudoVCF2HGVSError('Unsupported format: VCF specification 4.1 or later') + raise PseudoVCF2HGVSError('Unsupported format: VCF specification 4.1 or later') # Chr16:2099572TC>T try: - pre_input = copy.deepcopy(input) - input_list = input.split(':') - pos_ref_alt = str(input_list[1]) - positionAndEdit = input_list[1] - if not re.match('N[CGWT]_', input) and not re.match('LRG_\d+$', input): + input_list = query.split(':') + position_and_edit = input_list[1] + if not re.match(r'N[CGWT]_', query) and not re.match(r'LRG_\d+$', query): chr_num = str(input_list[0]) chr_num = chr_num.upper() chr_num = chr_num.strip() - if re.match('CHR', chr_num): + if re.match(r'CHR', chr_num): chr_num = chr_num.replace('CHR', '') # Use selected assembly accession = seq_data.to_accession(chr_num, selected_assembly) if accession is None: error = chr_num + ' is not part of genome build ' + selected_assembly + ' or is not supported' - raise pseudoVCF2HGVSError(error) + raise PseudoVCF2HGVSError(error) else: accession = input_list[0] # Assign reference sequence type ref_type = ':g.' - if re.match('LRG_', accession): + if 'LRG_' in accession: accession = validator.db.get_RefSeqGeneID_from_lrgID(accession) # Reformat the variant - input = str(accession) + ref_type + str(positionAndEdit) + query = str(accession) + ref_type + str(position_and_edit) except Exception as e: error = str(e) - raise pseudoVCF2HGVSError(error) + raise PseudoVCF2HGVSError(error) # Find not_sub type in input e.g. GGGG>G - not_sub = copy.deepcopy(input) - not_sub_find = re.compile("([GATCgatc]+)>([GATCgatc]+)") + not_sub = copy.deepcopy(query) + not_sub_find = re.compile(r"([GATCgatc]+)>([GATCgatc]+)") if not_sub_find.search(not_sub): try: # If the length of either side of the substitution delimer (>) is >1 matches = not_sub_find.search(not_sub) if len(matches.group(1)) > 1 or len(matches.group(2)) > 1 or re.search( - "([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", input): + r"([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", query): # Search for and remove range - range = re.compile("([0-9]+)_([0-9]+)") + range = re.compile(r"([0-9]+)_([0-9]+)") if range.search(not_sub): m = not_sub_find.search(not_sub) start = m.group(1) @@ -111,17 +110,16 @@ def pvcf_to_hgvs(input, selected_assembly, normalization_direction, reverse_norm insert = split_greater[1] remainder = split_greater[0] # Split remainder using matches - r = re.compile("([0-9]+)([GATCgatc]+)") + r = re.compile(r"([0-9]+)([GATCgatc]+)") try: m = r.search(remainder) - start = m.group(1) delete = m.group(2) starts = posedit.split(delete)[0] re_try = ref_ac + ':' + ref_type + '.' + starts + 'del' + delete[0] + 'ins' + insert hgvs_re_try = validator.hp.parse_hgvs_variant(re_try) hgvs_re_try.posedit.edit.ref = delete start_pos = str(hgvs_re_try.posedit.pos.start) - if re.search('\-', start_pos): + if '-' in start_pos: base, offset = start_pos.split('-') new_offset = 0 - int(offset) + (len(delete)) end_pos = int(base) @@ -129,7 +127,7 @@ def pvcf_to_hgvs(input, selected_assembly, normalization_direction, reverse_norm hgvs_re_try.posedit.pos.end.offset = int(new_offset) - 1 not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( hgvs_re_try.posedit.pos.end) + 'del' + delete + 'ins' + insert - elif re.search('\+', start_pos): + elif '+' in start_pos: base, offset = start_pos.split('+') end_pos = int(base) + (len(delete) - int(offset) - 1) new_offset = 0 + int(offset) + (len(delete) - 1) @@ -149,32 +147,32 @@ def pvcf_to_hgvs(input, selected_assembly, normalization_direction, reverse_norm except hgvs.exceptions.HGVSError as e: # Sort out multiple ALTS from VCF inputs if re.search("([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): - # header,alts = not_delins.split('>') - # # Split up the alts into a list - # alt_list = alts.split(',') - # # Assemble and re-submit - # for alt in alt_list: - # validation['warnings'] = 'Multiple ALT sequences detected: auto-submitting all possible combinations' - # validation['write'] = 'false' - # refreshed_description = header + '>' + alt - # query = {'quibble' : refreshed_description, 'id' : validation['id'], 'warnings' : validation['warnings'], 'description' : '', 'coding' : '', 'coding_g' : '', 'genomic_r' : '', 'genomic_g' : '', 'protein' : '', 'write' : 'true', 'primary_assembly' : primary_assembly, 'order' : ordering} - # batch_list.append(query) + # header,alts = not_delins.split('>') + # # Split up the alts into a list + # alt_list = alts.split(',') + # # Assemble and re-submit + # for alt in alt_list: + # validation['warnings'] = 'Multiple ALT sequences detected: auto-submitting all possible combinations' + # validation['write'] = 'false' + # refreshed_description = header + '>' + alt + # query = {'quibble' : refreshed_description, 'id' : validation['id'], 'warnings' : validation['warnings'], 'description' : '', 'coding' : '', 'coding_g' : '', 'genomic_r' : '', 'genomic_g' : '', 'protein' : '', 'write' : 'true', 'primary_assembly' : primary_assembly, 'order' : ordering} + # batch_list.append(query) error = 'Multiple ALTs not supported by this function' - raise pseudoVCF2HGVSError(error) + raise PseudoVCF2HGVSError(error) else: error = str(e) - raise pseudoVCF2HGVSError(error) + raise PseudoVCF2HGVSError(error) # HGVS will deal with the errors hgvs_object = hgvs_not_delins else: - hgvs_object = validator.hp.parse_hgvs_variant(input) + hgvs_object = validator.hp.parse_hgvs_variant(query) except Exception as e: error = str(e) - raise pseudoVCF2HGVSError(error) + raise PseudoVCF2HGVSError(error) else: - hgvs_object = validator.hp.parse_hgvs_variant(input) + hgvs_object = validator.hp.parse_hgvs_variant(query) # Normalize hgvs_object = selected_normalizer.normalize(hgvs_object) @@ -183,7 +181,7 @@ def pvcf_to_hgvs(input, selected_assembly, normalization_direction, reverse_norm def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): - ''' + """ Simple conversionwhich ensures identity is as 5 prime as possible by adding an extra 5 prime base. Necessary for most gap handling situations @@ -192,7 +190,7 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): :param reverse_normalizer: :param sf: :return: - ''' + """ hgvs_genomic_variant = hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) @@ -205,14 +203,14 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): else: chr = reverse_normalized_hgvs_genomic.ac - if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): + if re.search(r'[GATC]+=', str(reverse_normalized_hgvs_genomic.posedit)): pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) ref = reverse_normalized_hgvs_genomic.posedit.edit.ref alt = reverse_normalized_hgvs_genomic.posedit.edit.ref # Insertions - elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( - reverse_normalized_hgvs_genomic.posedit))): + elif 'ins' in str(reverse_normalized_hgvs_genomic.posedit) and 'del' not in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) alt_start = start - 1 # @@ -225,26 +223,19 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): alt = ref_seq + ins_seq # Substitutions - elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): + elif '>' in str(reverse_normalized_hgvs_genomic.posedit): ref = reverse_normalized_hgvs_genomic.posedit.edit.ref alt = reverse_normalized_hgvs_genomic.posedit.edit.alt pos = str(reverse_normalized_hgvs_genomic.posedit.pos) # Deletions - elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit)): + elif 'del' in str(reverse_normalized_hgvs_genomic.posedit) and 'ins' not in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 2 start = start - 1 - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' - # Recover sequences + # Recover sequences hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) # Assemble @@ -252,9 +243,8 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): ref = pre_base + hgvs_del_seq alt = pre_base - # inv - elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + elif 'inv' in str(reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 1 @@ -267,22 +257,19 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): if str(ins_seq) == 'None': ins_seq = '' # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) # Assemble pos = str(start) ref = vcf_del_seq alt = ins_seq - if re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + if 'inv' in str(reverse_normalized_hgvs_genomic.posedit): my_seq = Seq(vcf_del_seq) # alt = bs + str(my_seq.reverse_complement()) alt = str(my_seq.reverse_complement()) - # Delins - elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit))): + elif 'del' in str(reverse_normalized_hgvs_genomic.posedit) and 'ins' in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) adj_start = start - 1 @@ -295,7 +282,6 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): if str(ins_seq) == 'None': ins_seq = '' # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) # Assemble pos = str(start) @@ -303,7 +289,7 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): alt = vcf_del_seq[:1] + ins_seq # Duplications - elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): + elif 'dup' in str(reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 2 # @@ -324,7 +310,6 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): # ensure as 5' as possible if chr != '' and pos != '' and ref != '' and alt != '': if len(ref) > 1: - rsb = list(str(ref)) if reverse_normalized_hgvs_genomic.posedit.edit.type == 'identity': pos = int(pos) - 1 prev = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), pos - 1, pos) @@ -338,7 +323,7 @@ def hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): - ''' + """ Used to report the Most true representation of the VCF i.e. 5 prime normalized but no additional bases added. NOTE: no gap handling capabilities @@ -347,26 +332,28 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): :param reverse_normalizer: :param sf: :return: - ''' + """ hgvs_genomic_variant = hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) # hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) + ucsc_pa = '' + grc_pa = '' # Sort the primary assemblies - if re.match('GRC', primary_assembly): - if re.search('37', primary_assembly): + if 'GRC' in primary_assembly: + if '37' in primary_assembly: ucsc_pa = 'hg19' grc_pa = primary_assembly - if re.search('38', primary_assembly): + if '38' in primary_assembly: ucsc_pa = 'hg38' grc_pa = primary_assembly else: - if re.search('19', primary_assembly): + if '19' in primary_assembly: ucsc_pa = primary_assembly grc_pa = 'GRCh37' - if re.search('38', primary_assembly): + if '38' in primary_assembly: ucsc_pa = primary_assembly grc_pa = 'GRCh38' @@ -384,14 +371,14 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): else: grc_chr = reverse_normalized_hgvs_genomic.ac - if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): + if re.search(r'[GATC]+=', str(reverse_normalized_hgvs_genomic.posedit)): pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) ref = reverse_normalized_hgvs_genomic.posedit.edit.ref alt = reverse_normalized_hgvs_genomic.posedit.edit.ref # Insertions - elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( - reverse_normalized_hgvs_genomic.posedit))): + elif 'ins' in str(reverse_normalized_hgvs_genomic.posedit) and 'del' not in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) alt_start = start - 1 # @@ -404,25 +391,18 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): alt = ref_seq + ins_seq # Substitutions - elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): + elif '>' in str(reverse_normalized_hgvs_genomic.posedit): ref = reverse_normalized_hgvs_genomic.posedit.edit.ref alt = reverse_normalized_hgvs_genomic.posedit.edit.alt pos = str(reverse_normalized_hgvs_genomic.posedit.pos) # Deletions - elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit)): + elif 'del' in str(reverse_normalized_hgvs_genomic.posedit) and 'ins' not in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 2 start = start - 1 - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' # Recover sequences hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) @@ -431,9 +411,8 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): ref = pre_base + hgvs_del_seq alt = pre_base - # inv - elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + elif 'inv' in str(reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 1 @@ -446,9 +425,7 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): if str(ins_seq) == 'None': ins_seq = '' # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) # Assemble pos = str(start) # pos = str(start-1) @@ -461,8 +438,8 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): alt = str(my_seq.reverse_complement()) # Delins - elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit))): + elif 'del' in str(reverse_normalized_hgvs_genomic.posedit) and 'ins' in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) adj_start = start - 1 @@ -475,7 +452,6 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): if str(ins_seq) == 'None': ins_seq = '' # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) # Assemble # pos = str(start) @@ -486,20 +462,18 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): alt = ins_seq # Duplications - elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): + elif 'dup' in str(reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 2 # start = start - 1 # # Recover sequences - dup_seq = reverse_normalized_hgvs_genomic.posedit.edit.ref vcf_ref_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) # Assemble pos = str(start) ref = vcf_ref_seq[0] alt = vcf_ref_seq else: - chr = '' ref = '' alt = '' pos = '' @@ -511,7 +485,7 @@ def report_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): - ''' + """ No normalization at all. No additional bases added. Simply returns an in-situ VCF :param hgvs_genomic: @@ -519,7 +493,7 @@ def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): :param reverse_normalizer: :param sf: :return: - ''' + """ # Replace reference manually if hgvs_genomic.posedit.edit.ref == '': hgvs_genomic.posedit.edit.ref = sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, @@ -539,14 +513,14 @@ def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): else: chr = reverse_normalized_hgvs_genomic.ac - if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): + if re.search(r'[GATC]+=', str(reverse_normalized_hgvs_genomic.posedit)): pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) ref = reverse_normalized_hgvs_genomic.posedit.edit.ref alt = reverse_normalized_hgvs_genomic.posedit.edit.ref # Insertions - elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( - reverse_normalized_hgvs_genomic.posedit))): + elif 'ins' in str(reverse_normalized_hgvs_genomic.posedit) and 'del' not in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) alt_start = start - 1 # @@ -559,25 +533,18 @@ def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): alt = ref_seq + ins_seq # Substitutions - elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): + elif '>' in str(reverse_normalized_hgvs_genomic.posedit): ref = reverse_normalized_hgvs_genomic.posedit.edit.ref alt = reverse_normalized_hgvs_genomic.posedit.edit.alt pos = str(reverse_normalized_hgvs_genomic.posedit.pos) # Deletions - elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit)): + elif 'del' in str(reverse_normalized_hgvs_genomic.posedit) and 'ins' not in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 2 start = start - 1 - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' # Recover sequences hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) @@ -586,9 +553,8 @@ def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): ref = pre_base + hgvs_del_seq alt = pre_base - # inv - elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + elif 'inv' in str(reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 1 @@ -601,23 +567,21 @@ def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): if str(ins_seq) == 'None': ins_seq = '' # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) # Assemble pos = str(start) # pos = str(start-1) # ref = bs + vcf_del_seq ref = vcf_del_seq alt = ins_seq - if re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + if 'inv' in str(reverse_normalized_hgvs_genomic.posedit): my_seq = Seq(vcf_del_seq) # alt = bs + str(my_seq.reverse_complement()) alt = str(my_seq.reverse_complement()) # Delins - elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit))): + elif 'del' in str(reverse_normalized_hgvs_genomic.posedit) and 'ins' in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) adj_start = start - 1 @@ -630,16 +594,14 @@ def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): if str(ins_seq) == 'None': ins_seq = '' # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) # Assemble pos = str(start) ref = vcf_del_seq alt = vcf_del_seq[:1] + ins_seq - # Duplications - elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): + elif 'dup' in str(reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 2 # @@ -662,10 +624,10 @@ def pos_lock_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): - ''' + """ Designed specifically for gap handling. hard right pushes as 3 prime as possible and adds additional bases - ''' + """ hgvs_genomic_variant = hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref normalized_hgvs_genomic = hn.normalize(hgvs_genomic_variant) @@ -677,14 +639,14 @@ def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): else: chr = normalized_hgvs_genomic.ac - if re.search('[GATC]+\=', str(normalized_hgvs_genomic.posedit)): + if re.search(r'[GATC]+=', str(normalized_hgvs_genomic.posedit)): pos = str(normalized_hgvs_genomic.posedit.pos.start) ref = normalized_hgvs_genomic.posedit.edit.ref alt = normalized_hgvs_genomic.posedit.edit.ref # Insertions - elif (re.search('ins', str(normalized_hgvs_genomic.posedit)) and not re.search('del', str( - normalized_hgvs_genomic.posedit))): + elif 'ins' in str(normalized_hgvs_genomic.posedit) and 'del' not in str( + normalized_hgvs_genomic.posedit): end = int(normalized_hgvs_genomic.posedit.pos.end.base) start = int(normalized_hgvs_genomic.posedit.pos.start.base) alt_start = start - 1 # @@ -697,25 +659,17 @@ def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): alt = ref_seq + ins_seq # Substitutions - elif re.search('>', str(normalized_hgvs_genomic.posedit)): + elif '>' in str(normalized_hgvs_genomic.posedit): ref = normalized_hgvs_genomic.posedit.edit.ref alt = normalized_hgvs_genomic.posedit.edit.alt pos = str(normalized_hgvs_genomic.posedit.pos) # Deletions - elif re.search('del', str(normalized_hgvs_genomic.posedit)) and not re.search('ins', - str(normalized_hgvs_genomic.posedit)): + elif 'del' in str(normalized_hgvs_genomic.posedit) and 'ins' not in str(normalized_hgvs_genomic.posedit): end = int(normalized_hgvs_genomic.posedit.pos.end.base) start = int(normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 2 start = start - 1 - try: - ins_seq = normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' # Recover sequences hgvs_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), start, end) pre_base = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start, start) @@ -725,7 +679,7 @@ def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): alt = pre_base # inv - elif re.search('inv', str(normalized_hgvs_genomic.posedit)): + elif 'inv' in str(normalized_hgvs_genomic.posedit): end = int(normalized_hgvs_genomic.posedit.pos.end.base) start = int(normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 1 @@ -738,9 +692,7 @@ def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): if str(ins_seq) == 'None': ins_seq = '' # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), start, end) vcf_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start, end) - bs = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start - 1, adj_start) # Assemble pos = str(start) # pos = str(start-1) @@ -753,8 +705,7 @@ def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): alt = str(my_seq.reverse_complement()) # Delins - elif (re.search('del', str(normalized_hgvs_genomic.posedit)) and re.search('ins', - str(normalized_hgvs_genomic.posedit))): + elif 'del' in str(normalized_hgvs_genomic.posedit) and 'ins' in str(normalized_hgvs_genomic.posedit): end = int(normalized_hgvs_genomic.posedit.pos.end.base) start = int(normalized_hgvs_genomic.posedit.pos.start.base - 1) adj_start = start - 1 @@ -767,16 +718,14 @@ def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): if str(ins_seq) == 'None': ins_seq = '' # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), start, end) vcf_del_seq = sf.fetch_seq(str(normalized_hgvs_genomic.ac), adj_start, end) # Assemble pos = str(start) ref = vcf_del_seq alt = vcf_del_seq[:1] + ins_seq - # Duplications - elif (re.search('dup', str(normalized_hgvs_genomic.posedit))): + elif 'dup' in str(normalized_hgvs_genomic.posedit): end = int(normalized_hgvs_genomic.posedit.pos.end.base) # start = int(normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 2 # @@ -810,7 +759,7 @@ def hard_right_hgvs2vcf(hgvs_genomic, primary_assembly, hn, sf): def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): - ''' + """ Designed specifically for gap handling. hard left pushes as 5 prime as possible and adds additional bases @@ -819,11 +768,10 @@ def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): :param reverse_normalizer: :param sf: :return: - ''' + """ hgvs_genomic_variant = hgvs_genomic # Reverse normalize hgvs_genomic_variant: NOTE will replace ref reverse_normalized_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic_variant) - hgvs_genomic_5pr = copy.deepcopy(reverse_normalized_hgvs_genomic) # Chr chr = seq_data.to_chr_num_ucsc(reverse_normalized_hgvs_genomic.ac, primary_assembly) @@ -832,14 +780,14 @@ def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): else: chr = reverse_normalized_hgvs_genomic.ac - if re.search('[GATC]+\=', str(reverse_normalized_hgvs_genomic.posedit)): + if re.search(r'[GATC]+=', str(reverse_normalized_hgvs_genomic.posedit)): pos = str(reverse_normalized_hgvs_genomic.posedit.pos.start) ref = reverse_normalized_hgvs_genomic.posedit.edit.ref alt = reverse_normalized_hgvs_genomic.posedit.edit.ref # Insertions - elif (re.search('ins', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('del', str( - reverse_normalized_hgvs_genomic.posedit))): + elif 'ins' in str(reverse_normalized_hgvs_genomic.posedit) and 'del' not in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) alt_start = start - 1 # @@ -852,25 +800,18 @@ def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): alt = ref_seq + ins_seq # Substitutions - elif re.search('>', str(reverse_normalized_hgvs_genomic.posedit)): + elif '>' in str(reverse_normalized_hgvs_genomic.posedit): ref = reverse_normalized_hgvs_genomic.posedit.edit.ref alt = reverse_normalized_hgvs_genomic.posedit.edit.alt pos = str(reverse_normalized_hgvs_genomic.posedit.pos) # Deletions - elif re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and not re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit)): + elif 'del' in str(reverse_normalized_hgvs_genomic.posedit) and 'ins' not in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 2 start = start - 1 - try: - ins_seq = reverse_normalized_hgvs_genomic.posedit.edit.alt - except: - ins_seq = '' - else: - if str(ins_seq) == 'None': - ins_seq = '' # Recover sequences hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) pre_base = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, start) @@ -879,9 +820,8 @@ def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): ref = pre_base + hgvs_del_seq alt = pre_base - # inv - elif re.search('inv', str(reverse_normalized_hgvs_genomic.posedit)): + elif 'inv' in str(reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 1 @@ -894,9 +834,7 @@ def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): if str(ins_seq) == 'None': ins_seq = '' # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) - bs = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start - 1, adj_start) # Assemble pos = str(start) # pos = str(start-1) @@ -909,8 +847,8 @@ def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): alt = str(my_seq.reverse_complement()) # Delins - elif (re.search('del', str(reverse_normalized_hgvs_genomic.posedit)) and re.search('ins', str( - reverse_normalized_hgvs_genomic.posedit))): + elif 'del' in str(reverse_normalized_hgvs_genomic.posedit) and 'ins' in str( + reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base - 1) adj_start = start - 1 @@ -923,16 +861,14 @@ def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): if str(ins_seq) == 'None': ins_seq = '' # Recover sequences - hgvs_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), start, end) vcf_del_seq = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), adj_start, end) # Assemble pos = str(start) ref = vcf_del_seq alt = vcf_del_seq[:1] + ins_seq - # Duplications - elif (re.search('dup', str(reverse_normalized_hgvs_genomic.posedit))): + elif 'dup' in str(reverse_normalized_hgvs_genomic.posedit): end = int(reverse_normalized_hgvs_genomic.posedit.pos.end.base) # start = int(reverse_normalized_hgvs_genomic.posedit.pos.start.base) adj_start = start - 2 # @@ -953,7 +889,6 @@ def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): # ADD SURROUNDING BASES if chr != '' and pos != '' and ref != '' and alt != '': pre_pos = int(pos) - 1 - pre_pos prev = sf.fetch_seq(str(reverse_normalized_hgvs_genomic.ac), pre_pos - 1, pre_pos) pos = str(pre_pos) ref = prev + ref @@ -965,12 +900,12 @@ def hard_left_hgvs2vcf(hgvs_genomic, primary_assembly, reverse_normalizer, sf): def hgvs_ref_alt(hgvs_variant, sf): - if re.search('[GATC]+\=', str(hgvs_variant.posedit)): + if re.search(r'[GATC]+=', str(hgvs_variant.posedit)): ref = hgvs_variant.posedit.edit.ref alt = hgvs_variant.posedit.edit.ref # Insertions - elif (re.search('ins', str(hgvs_variant.posedit)) and not re.search('del', str(hgvs_variant.posedit))): + elif 'ins' in str(hgvs_variant.posedit) and 'del' not in str(hgvs_variant.posedit): end = int(hgvs_variant.posedit.pos.end.base) start = int(hgvs_variant.posedit.pos.start.base) alt_start = start - 1 # @@ -982,28 +917,28 @@ def hgvs_ref_alt(hgvs_variant, sf): alt = ref_seq[:1] + ins_seq + ref_seq[-1:] # Substitutions - elif re.search('>', str(hgvs_variant.posedit)): + elif '>' in str(hgvs_variant.posedit): ref = hgvs_variant.posedit.edit.ref alt = hgvs_variant.posedit.edit.alt # Deletions - elif re.search('del', str(hgvs_variant.posedit)) and not re.search('ins', str(hgvs_variant.posedit)): + elif 'del' in str(hgvs_variant.posedit) and 'ins' not in str(hgvs_variant.posedit): ref = hgvs_variant.posedit.edit.ref alt = '' # inv - elif re.search('inv', str(hgvs_variant.posedit)): + elif 'inv' in str(hgvs_variant.posedit): ref = hgvs_variant.posedit my_seq = Seq(ref) alt = str(my_seq.reverse_complement()) # Delins - elif (re.search('del', str(hgvs_variant.posedit)) and re.search('ins', str(hgvs_variant.posedit))): + elif 'del' in str(hgvs_variant.posedit) and 'ins' in str(hgvs_variant.posedit): ref = hgvs_variant.posedit.edit.ref alt = hgvs_variant.posedit.edit.alt # Duplications - elif (re.search('dup', str(hgvs_variant.posedit))): + elif 'dup' in str(hgvs_variant.posedit): ref = hgvs_variant.posedit.edit.ref alt = hgvs_variant.posedit.edit.ref + hgvs_variant.posedit.edit.ref else: diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index f8e97192..c54ee426 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -3,7 +3,7 @@ import copy import hgvs.exceptions from .logger import Logger -from . import vvHGVS +from . import hgvs_utils from .variant import Variant from . import seq_data from . import vvFunctions as fn @@ -65,8 +65,8 @@ def gene_to_transcripts(variant, validator): # Tripple check this assumption by querying the gene position database if len(rel_var) == 0: - vcf_dict = vvHGVS.hgvs2vcf(variant.hgvs_genomic, variant.primary_assembly, variant.reverse_normalizer, - validator.sf) + vcf_dict = hgvs_utils.hgvs2vcf(variant.hgvs_genomic, variant.primary_assembly, variant.reverse_normalizer, + validator.sf) not_di = str(variant.hgvs_genomic.ac) + ':g.' + str(vcf_dict['pos']) + '_' + str( int(vcf_dict['pos']) + (len(vcf_dict['ref']) - 1)) + 'del' + vcf_dict['ref'] + 'ins' + \ vcf_dict['alt'] diff --git a/VariantValidator/modules/vvLiftover.py b/VariantValidator/modules/vvLiftover.py index 01ee9454..d0132ea5 100644 --- a/VariantValidator/modules/vvLiftover.py +++ b/VariantValidator/modules/vvLiftover.py @@ -11,7 +11,7 @@ import re import os from . import seq_data -from . import vvHGVS +from . import hgvs_utils from .logger import Logger from pyliftover import LiftOver from Bio.Seq import Seq @@ -85,7 +85,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no alt_build_to = 'GRCh38' # populate the variant from data - vcf = vvHGVS.report_hgvs2vcf(hgvs_genomic, build_from, reverse_normalizer, sf) + vcf = hgvs_utils.report_hgvs2vcf(hgvs_genomic, build_from, reverse_normalizer, sf) # Create to and from dictionaries lifted_response[build_from.lower()] = {} @@ -169,7 +169,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no # In this instance, do not mark added data as True hgvs_tx = vm.g_to_t(hgvs_genomic, val) hgvs_alt_genomic = vm.t_to_g(hgvs_tx, key) - alt_vcf = vvHGVS.report_hgvs2vcf(hgvs_alt_genomic, build_to, reverse_normalizer, sf) + alt_vcf = hgvs_utils.report_hgvs2vcf(hgvs_alt_genomic, build_to, reverse_normalizer, sf) # Add the to build dictionaries lifted_response[build_to.lower()][hgvs_alt_genomic.ac] = { @@ -213,7 +213,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no genome_builds = [build_to] # Create liftover vcf - from_vcf = vvHGVS.report_hgvs2vcf(hgvs_genomic, lo_from, reverse_normalizer, sf) + from_vcf = hgvs_utils.report_hgvs2vcf(hgvs_genomic, lo_from, reverse_normalizer, sf) if PYLIFTOVER_DIR is not None: lo_filename_to = PYLIFTOVER_DIR + "%sTo%s.over.chain" % (lo_from, lo_to) @@ -288,7 +288,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no if lifted_back[0] == from_vcf[from_set] or lifted_back[0] == my_from_chr: if lifted_back[1] == int(from_vcf['pos']): for build in genome_builds: - vcf_dict = vvHGVS.report_hgvs2vcf(hgvs_lifted, build, reverse_normalizer, sf) + vcf_dict = hgvs_utils.report_hgvs2vcf(hgvs_lifted, build, reverse_normalizer, sf) if re.match('GRC', build): lifted_response[build_to.lower()][hgvs_lifted.ac] = { 'hgvs_genomic_description': mystr(hgvs_lifted), diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 41221f34..83b0f937 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -14,7 +14,7 @@ import hgvs.sequencevariant from . import vvMixinInit from . import seq_data -from . import vvHGVS +from . import hgvs_utils from urllib.parse import urlparse import httplib2 as http import json @@ -2041,7 +2041,7 @@ def merge_hgvs_3pr(self, hgvs_variant_list,hn): # Generate the alt sequence alt_sequence = '' for hgvs_v in full_list: - ref_alt = vvHGVS.hgvs_ref_alt(hgvs_v, self.sf) + ref_alt = hgvs_utils.hgvs_ref_alt(hgvs_v, self.sf) alt_sequence = alt_sequence + ref_alt['alt'] # Fetch the reference sequence and copy it for the basis of the alt sequence @@ -2150,7 +2150,7 @@ def merge_hgvs_5pr(self, hgvs_variant_list): # Generate the alt sequence alt_sequence = '' for hgvs_v in full_list: - ref_alt = vvHGVS.hgvs_ref_alt(hgvs_v, self.sf) + ref_alt = hgvs_utils.hgvs_ref_alt(hgvs_v, self.sf) alt_sequence = alt_sequence + ref_alt['alt'] # Fetch the reference sequence and copy it for the basis of the alt sequence @@ -2183,7 +2183,7 @@ def merge_pseudo_vcf(self, vcf_list, genome_build, hn): hgvs_list = [] # Convert pseudo_vcf list into a HGVS list for call in vcf_list: - x55hgvs = vvHGVS.pvcf_to_hgvs(call, genome_build, normalization_direction=5, validator=self) + x55hgvs = hgvs_utils.pvcf_to_hgvs(call, genome_build, normalization_direction=5, validator=self) hgvs_list.append(x55hgvs) # Merge hgvs_delins = self.merge_hgvs_5pr(hgvs_list) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 65cbd436..3f113405 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -7,7 +7,7 @@ import traceback from hgvs.assemblymapper import AssemblyMapper from .logger import Logger -from . import vvHGVS +from . import hgvs_utils from . import vvFunctions as fn from . import seq_data from . import vvMixinConverters @@ -562,8 +562,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr test = seq_data.supported_for_mapping(alt_gen_var.ac, build) if test: try: - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, build, variant.reverse_normalizer, - self.sf) + vcf_dict = hgvs_utils.report_hgvs2vcf(alt_gen_var, build, variant.reverse_normalizer, + self.sf) except hgvs.exceptions.HGVSInvalidVariantError: continue # Identify primary assembly positions @@ -588,8 +588,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr } } if build == 'GRCh38': - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', variant.reverse_normalizer, - self.sf) + vcf_dict = hgvs_utils.report_hgvs2vcf(alt_gen_var, 'hg38', variant.reverse_normalizer, + self.sf) primary_genomic_dicts['hg38'] = { 'hgvs_genomic_description': fn.valstr(alt_gen_var), 'vcf': {'chr': vcf_dict['ucsc_chr'], @@ -622,8 +622,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr alt_genomic_dicts.append(alt_dict) if build == 'GRCh38': - vcf_dict = vvHGVS.report_hgvs2vcf(alt_gen_var, 'hg38', variant.reverse_normalizer, - self.sf) + vcf_dict = hgvs_utils.report_hgvs2vcf(alt_gen_var, 'hg38', variant.reverse_normalizer, + self.sf) alt_dict = {'hg38': {'hgvs_genomic_description': fn.valstr(alt_gen_var), 'vcf': {'chr': vcf_dict['ucsc_chr'], 'pos': vcf_dict['pos'], From b14622d16938cb040600d1037d2bb9dd96ac46f7 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 15:58:22 +0100 Subject: [PATCH 136/223] Refactored vvFunctions into utils --- VariantValidator/modules/format_converters.py | 2 +- VariantValidator/modules/gapped_mapping.py | 2 +- VariantValidator/modules/mappers.py | 2 +- VariantValidator/modules/use_checking.py | 2 +- .../modules/{vvFunctions.py => utils.py} | 87 +++++++++---------- VariantValidator/modules/variant.py | 2 +- VariantValidator/modules/vvDBGet.py | 2 +- VariantValidator/modules/vvDBInsert.py | 2 +- VariantValidator/modules/vvDatabase.py | 4 +- VariantValidator/modules/vvMixinConverters.py | 2 +- VariantValidator/modules/vvMixinCore.py | 2 +- VariantValidator/modules/vvMixinInit.py | 2 +- 12 files changed, 53 insertions(+), 58 deletions(-) rename VariantValidator/modules/{vvFunctions.py => utils.py} (89%) diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index de11a263..0ed7ab64 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -4,7 +4,7 @@ from .logger import Logger from .variant import Variant from . import seq_data -from . import vvFunctions as fn +from . import utils as fn def initial_format_conversions(variant, validator, select_transcripts_dict_plus_version): diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index eb9825f0..0a4c4b56 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -3,7 +3,7 @@ import hgvs.exceptions -from . import vvFunctions as fn +from . import utils as fn from . import hgvs_utils from .logger import Logger diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index c54ee426..152067fb 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -6,7 +6,7 @@ from . import hgvs_utils from .variant import Variant from . import seq_data -from . import vvFunctions as fn +from . import utils as fn from . import gapped_mapping diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index 5151bc3b..c73fe5db 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -1,6 +1,6 @@ import re import hgvs -from . import vvFunctions as fn +from . import utils as fn from .logger import Logger import copy diff --git a/VariantValidator/modules/vvFunctions.py b/VariantValidator/modules/utils.py similarity index 89% rename from VariantValidator/modules/vvFunctions.py rename to VariantValidator/modules/utils.py index 519fe67f..8e8e2d87 100644 --- a/VariantValidator/modules/vvFunctions.py +++ b/VariantValidator/modules/utils.py @@ -1,42 +1,30 @@ -from Bio import Entrez,SeqIO from Bio.Seq import Seq from Bio.Alphabet import IUPAC import httplib2 as http import json -from urllib.parse import urlparse #Python 2 +from urllib.parse import urlparse # Python 3 import functools import traceback import sys from .logger import Logger import re import copy -import mysql -import time -#from urllib.parse import urlparse #Python 3 def handleCursor(func): - ''' + """ Decorator function for handling opening and closing cursors. - ''' + """ @functools.wraps(func) - def wrapper(self,*args,**kwargs): -# if self.pool==None: -# self.pool=mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.dbConfig) -# self.conn=self.pool.get_connection() + def wrapper(self, *args, **kwargs): self.cursor = self.conn.cursor(buffered=True) - out=func(self,*args,**kwargs) + out = func(self, *args, **kwargs) if self.cursor: self.cursor.close() -# if self.conn: -# self.conn.close() - #self.cursor=None return out return wrapper - - def hgnc_rest(path): data = { 'record': '', @@ -65,9 +53,10 @@ def hgnc_rest(path): data['error'] = "Unable to contact the HGNC database: Please try again later" return data -# method for final validation and stringifying parsed hgvs variants prior to printing/passing to html + def valstr(hgvs_variant): """ + Required for final validation and stringifying parsed hgvs variants prior to printing/passing to html. Function to ensure the required number of reference bases are displayed in descriptions """ cp_hgvs_variant = copy.deepcopy(hgvs_variant) @@ -80,12 +69,14 @@ def valstr(hgvs_variant): cp_hgvs_variant = str(cp_hgvs_variant) return cp_hgvs_variant -# From output_formatter + def single_letter_protein(hgvs_protein): """ format protein description into single letter aa code """ return hgvs_protein.format({'p_3_letter': False}) + + def remove_reference(hgvs_nucleotide): """ format nucleotide descriptions to not display reference base @@ -93,6 +84,7 @@ def remove_reference(hgvs_nucleotide): hgvs_nucleotide_refless = hgvs_nucleotide.format({'max_ref_length': 0}) return hgvs_nucleotide_refless + def exceptPass(validation=None): exc_type, exc_value, last_traceback = sys.exc_info() te = traceback.format_exc() @@ -105,8 +97,8 @@ def exceptPass(validation=None): Logger.warning("Except pass for " + str(exc_type) + " " + str(exc_value)) Logger.debug(er) -# From functions.py -def user_input(input): + +def user_input(query): """ user_input collect the input from the form and convert to a hgvs readable string @@ -116,23 +108,22 @@ def user_input(input): parsing and the variant type Accepts c, g, n, r currently. And now P also 15.07.15 """ - raw_variant = input.strip() + raw_variant = query.strip() # Set regular expressions for if statements - pat_g = re.compile("\:g\.") # Pattern looks for :g. - pat_gene = re.compile('\(.+?\)') # Pattern looks for (....) - pat_c = re.compile("\:c\.") # Pattern looks for :c. - pat_r = re.compile("\:r\.") # Pattern looks for :r. - pat_n = re.compile("\:n\.") # Pattern looks for :n. - pat_p = re.compile("\:p\.") # Pattern looks for :p. - pat_m = re.compile("\:m\.") # Pattern looks for :m. - pat_est = re.compile("\d\:\d") # Pattern looks for number:number + pat_g = re.compile(r":g\.") # Pattern looks for :g. + pat_gene = re.compile(r'\(.+?\)') # Pattern looks for (....) + pat_c = re.compile(r":c\.") # Pattern looks for :c. + pat_r = re.compile(r":r\.") # Pattern looks for :r. + pat_n = re.compile(r":n\.") # Pattern looks for :n. + pat_p = re.compile(r":p\.") # Pattern looks for :p. + pat_m = re.compile(r":m\.") # Pattern looks for :m. + pat_est = re.compile(r"\d:\d") # Pattern looks for number:number # If statements if pat_g.search(raw_variant): # If the :g. pattern is present in the raw_variant, g_in is linked to the raw_variant if pat_gene.search(raw_variant): # If pat gene is present in the raw_variant - variant = pat_gene.sub('', - raw_variant) # variant is set to the raw_variant string with the pattern (...) substituted out + variant = pat_gene.sub('', raw_variant) # variant is set to the raw_variant string with the pattern (...) substituted out formated = {'variant': variant, 'type': ':g.'} return formated else: @@ -187,7 +178,7 @@ def user_input(input): formatted = 'invalid' return formatted -# From links.py + def pro_inv_info(prot_ref_seq, prot_var_seq): """ Function which predicts the protein effect of c. inversions @@ -208,8 +199,7 @@ def pro_inv_info(prot_ref_seq, prot_var_seq): info['variant'] = 'false' else: # Deal with terminations - term = re.compile("\*") - if term.search(prot_var_seq): + if '*' in prot_var_seq: # Set the termination reporter to true info['terminate'] = 'true' # The termination position will be equal to the length of the variant sequence because it's a TERMINATOR!!! @@ -287,16 +277,17 @@ def pro_inv_info(prot_ref_seq, prot_var_seq): info['edit_end'] = info['edit_start'] + len(ref) - 1 return info + def pro_delins_info(prot_ref_seq, prot_var_seq): info = { - 'variant' : 'true', - 'prot_del_seq' : '', - 'prot_ins_seq' : '', - 'edit_start' : 0, - 'edit_end' : 0, - 'terminate' : 'false', - 'ter_pos' : 0, - 'error' : 'false' + 'variant': 'true', + 'prot_del_seq': '', + 'prot_ins_seq': '', + 'edit_start': 0, + 'edit_end': 0, + 'terminate': 'false', + 'ter_pos': 0, + 'error': 'false' } # Is there actually any variation? @@ -304,8 +295,8 @@ def pro_delins_info(prot_ref_seq, prot_var_seq): info['variant'] = 'false' else: # Deal with terminations - term = re.compile("\*") - if term.search(prot_var_seq): + + if '*' in prot_var_seq: # Set the termination reporter to true info['terminate'] = 'true' @@ -400,6 +391,7 @@ def translate(ed_seq, cds_start): translation = 'error' return translation + def one_to_three(seq): """ Convert single letter amino acid code to 3 letter code @@ -428,7 +420,6 @@ def n_inversion(ref_seq, del_seq, inv_seq, interval_start, interval_end): """ Takes a reference sequence and inverts the specified position """ - sequence = '' # Use string indexing to check whether the sequences are the same test = ref_seq[interval_start - 1:interval_end] if test == del_seq: @@ -456,7 +447,11 @@ def hgvs_dup2indel(hgvs_seq): # Custom Exceptions class VariantValidatorError(Exception): pass + + class mergeHGVSerror(Exception): pass + + class alleleVariantError(Exception): pass diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 92f33c63..82966720 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -1,5 +1,5 @@ import re -from . import vvFunctions as fn +from . import utils as fn class Variant(object): diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index 5cca2928..07dedb4b 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -1,4 +1,4 @@ -from .vvFunctions import handleCursor +from .utils import handleCursor from .logger import Logger from . import vvDBInit diff --git a/VariantValidator/modules/vvDBInsert.py b/VariantValidator/modules/vvDBInsert.py index 1f5e796f..e54f0185 100644 --- a/VariantValidator/modules/vvDBInsert.py +++ b/VariantValidator/modules/vvDBInsert.py @@ -1,4 +1,4 @@ -from .vvFunctions import handleCursor +from .utils import handleCursor from . import vvDBGet class Mixin(vvDBGet.Mixin): diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index aa6388d1..4eddf87a 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -1,6 +1,6 @@ from .logger import Logger -from . import vvFunctions as fn -from .vvFunctions import handleCursor +from . import utils as fn +from .utils import handleCursor #from vvDBInsert import vvDBInsert #from vvDBGet import vvDBGet from . import vvDBInsert diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 83b0f937..2b5babe6 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -19,7 +19,7 @@ import httplib2 as http import json from Bio import Entrez,SeqIO -from . import vvFunctions as fn +from . import utils as fn #Error setup diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 3f113405..20d7af58 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -8,7 +8,7 @@ from hgvs.assemblymapper import AssemblyMapper from .logger import Logger from . import hgvs_utils -from . import vvFunctions as fn +from . import utils as fn from . import seq_data from . import vvMixinConverters from .variant import Variant diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index cf9d676c..b9cee4ab 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -19,7 +19,7 @@ import copy from .vvDatabase import vvDatabase from .logger import Logger -from . import vvFunctions as fn +from . import utils as fn from VariantValidator.settings import CONFIG_DIR from VariantValidator.version import __version__ From cdbe1d2fe82baefd80e9f4a18707e3086c348966 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 19 Jun 2019 16:56:15 +0100 Subject: [PATCH 137/223] fixed warning within utils --- VariantValidator/modules/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/VariantValidator/modules/utils.py b/VariantValidator/modules/utils.py index 8e8e2d87..900d5d1d 100644 --- a/VariantValidator/modules/utils.py +++ b/VariantValidator/modules/utils.py @@ -247,7 +247,7 @@ def pro_inv_info(prot_ref_seq, prot_var_seq): var = var[::-1] # Reverse loop through ref list to find the first missmatch position for aa in ref: - if var[aa_counter] == '\*': + if var[aa_counter] == r'\*': break if aa == var[aa_counter]: aa_counter = aa_counter + 1 @@ -336,7 +336,7 @@ def pro_delins_info(prot_ref_seq, prot_var_seq): # Reverse loop through ref list to find the first missmatch position for aa in ref: try: - if var[aa_counter] == '\*': + if var[aa_counter] == r'\*': break except IndexError: break From 2f30835baeda15554a31ffed4153c45a3e14b084 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 20 Jun 2019 09:48:05 +0100 Subject: [PATCH 138/223] Refactored liftover module --- .../modules/{vvLiftover.py => liftover.py} | 202 +++++++++--------- VariantValidator/modules/valoutput.py | 9 +- VariantValidator/modules/vvMixinInit.py | 2 - 3 files changed, 110 insertions(+), 103 deletions(-) rename VariantValidator/modules/{vvLiftover.py => liftover.py} (67%) diff --git a/VariantValidator/modules/vvLiftover.py b/VariantValidator/modules/liftover.py similarity index 67% rename from VariantValidator/modules/vvLiftover.py rename to VariantValidator/modules/liftover.py index d0132ea5..ef721c06 100644 --- a/VariantValidator/modules/vvLiftover.py +++ b/VariantValidator/modules/liftover.py @@ -8,8 +8,6 @@ # import modules import hgvs.exceptions import hgvs.sequencevariant -import re -import os from . import seq_data from . import hgvs_utils from .logger import Logger @@ -19,25 +17,29 @@ # Pre compile variables hgvs.global_config.formatting.max_ref_length = 1000000 -# Determine whether a liftover directory has been added to the environment -PYLIFTOVER_DIR = os.environ.get('PYLIFTOVER_DIR') def mystr(hgvs_nucleotide): hgvs_nucleotide_refless = hgvs_nucleotide.format({'max_ref_length': 0}) return hgvs_nucleotide_refless -def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_normalizer, sf, evm): +def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, validator): """ - :param hgvs_genomic: hgvs_object genomic description accession NC, NT, or NW. Not NG + Step 1, attempt to liftover using a common RefSeq transcript + Step 2, attempt to liftover using PyLiftover. + Lift position > Check bases > Lift back and confirm the original position + :param hgvs_genomic: :param build_from: :param build_to: - :return: lifted {} - Step 1, attempt to liftover using a common RefSeq transcript + :param hn: + :param reverse_normalizer: + :param evm: + :param validator: Validator obj + :return: """ try: - hgvs_genomic = hp.parse_hgvs_variant(hgvs_genomic) + hgvs_genomic = validator.hp.parse_hgvs_variant(hgvs_genomic) except TypeError: pass @@ -45,73 +47,89 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no lifted_response = {} # Check genome build type - if re.match('GRC', build_from): + if 'GRC' in build_from: from_set = 'grc_chr' alt_from_set = 'ucsc_chr' - if re.search('37', build_from): + if '37' in build_from: lo_from = 'hg19' alt_build_from = 'hg19' - if re.search('38', build_from): + elif '38' in build_from: lo_from = 'hg38' alt_build_from = 'hg38' + else: + lo_from = '' + alt_build_from = '' else: from_set = 'ucsc_chr' alt_from_set = 'grc_chr' - if re.search('19', build_from): + if '19' in build_from: lo_from = 'hg19' alt_build_from = 'GRCh37' - if re.search('38', build_from): + elif '38' in build_from: lo_from = 'hg38' alt_build_from = 'GRCh38' + else: + lo_from = '' + alt_build_from = '' - if re.match('GRC', build_to): + if 'GRC' in build_to: to_set = 'grc_chr' alt_to_set = 'ucsc_chr' - if re.search('37', build_to): + if '37' in build_to: lo_to = 'hg19' alt_build_to = 'hg19' - if re.search('38', build_to): + elif '38' in build_to: lo_to = 'hg38' alt_build_to = 'hg38' + else: + lo_to = '' + alt_build_to = '' else: to_set = 'ucsc_chr' alt_to_set = 'grc_chr' - if re.search('19', build_to): + if '19' in build_to: lo_to = 'hg19' alt_build_to = 'GRCh37' - if re.search('38', build_to): + elif '38' in build_to: lo_to = 'hg38' alt_build_to = 'GRCh38' + else: + lo_to = '' + alt_build_to = '' # populate the variant from data - vcf = hgvs_utils.report_hgvs2vcf(hgvs_genomic, build_from, reverse_normalizer, sf) + vcf = hgvs_utils.report_hgvs2vcf(hgvs_genomic, build_from, reverse_normalizer, validator.sf) # Create to and from dictionaries lifted_response[build_from.lower()] = {} - lifted_response[build_from.lower()][hgvs_genomic.ac] = {'hgvs_genomic_description': mystr(hgvs_genomic), - 'vcf': { - 'chr': vcf[from_set], - 'pos': str(vcf['pos']), - 'ref': vcf['ref'], - 'alt': vcf['alt']} - } + lifted_response[build_from.lower()][hgvs_genomic.ac] = { + 'hgvs_genomic_description': mystr(hgvs_genomic), + 'vcf': { + 'chr': vcf[from_set], + 'pos': str(vcf['pos']), + 'ref': vcf['ref'], + 'alt': vcf['alt'] + } + } lifted_response[alt_build_from.lower()] = {} - lifted_response[alt_build_from.lower()][hgvs_genomic.ac] = {'hgvs_genomic_description': mystr(hgvs_genomic), - 'vcf': { - 'chr': vcf[alt_from_set], - 'pos': str(vcf['pos']), - 'ref': vcf['ref'], - 'alt': vcf['alt']} - } + lifted_response[alt_build_from.lower()][hgvs_genomic.ac] = { + 'hgvs_genomic_description': mystr(hgvs_genomic), + 'vcf': { + 'chr': vcf[alt_from_set], + 'pos': str(vcf['pos']), + 'ref': vcf['ref'], + 'alt': vcf['alt'] + } + } # From dictionary currently blank lifted_response[build_to.lower()] = {} lifted_response[alt_build_to.lower()] = {} # Get a list of overlapping RefSeq transcripts # Note, due to 0 base positions in UTA (I think) occasionally tx will - rts_list = hdp.get_tx_for_region(hgvs_genomic.ac, 'splign', hgvs_genomic.posedit.pos.start.base - 1, - hgvs_genomic.posedit.pos.end.base - 1) + rts_list = validator.hdp.get_tx_for_region(hgvs_genomic.ac, 'splign', hgvs_genomic.posedit.pos.start.base - 1, + hgvs_genomic.posedit.pos.end.base - 1) rts_dict = {} tx_list = False for tx_dat in rts_list: @@ -127,28 +145,27 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no selected = [] for tx in tx_list: # identify the first transcript if any - options = hdp.get_tx_mapping_options(tx) + options = validator.hdp.get_tx_mapping_options(tx) for op in options: - if re.match('NC_', op[1]): - if re.match('GRC', build_to): + sfm = None + if op[1].startswith('NC_'): + if build_to.startswith('GRC'): sfm = seq_data.to_chr_num_refseq(op[1], build_to) - if re.match('hg', build_to): + if build_to.startswith('hg'): sfm = seq_data.to_chr_num_ucsc(op[1], build_to) if sfm is not None: selected.append([op[0], op[1]]) - for op in options: - if re.match('NT_', op[1]): - if re.match('GRC', build_to): + if op[1].startswith('NT_'): + if build_to.startswith('GRC'): sfm = seq_data.to_chr_num_refseq(op[1], build_to) - if re.match('hg', build_to): + if build_to.startswith('hg'): sfm = seq_data.to_chr_num_ucsc(op[1], build_to) if sfm is not None: selected.append([op[0], op[1]]) - for op in options: - if re.match('NW_', op[1]): - if re.match('GRC', build_to): + if op[1].startswith('NW_'): + if build_to.startswith('GRC'): sfm = seq_data.to_chr_num_refseq(op[1], build_to) - if re.match('hg', build_to): + if build_to.startswith('hg'): sfm = seq_data.to_chr_num_ucsc(op[1], build_to) if sfm is not None: selected.append([op[0], op[1]]) @@ -157,9 +174,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no filtered_1 = {} if selected: for chroms in selected: - if chroms[1] in list(filtered_1.keys()): - pass - else: + if chroms[1] not in list(filtered_1.keys()): filtered_1[chroms[1]] = chroms[0] added_data = False for key, val in list(filtered_1.items()): @@ -167,43 +182,35 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no # Note, due to 0 base positions in UTA (I think) occasionally tx will # be identified that cannot be mapped to. # In this instance, do not mark added data as True - hgvs_tx = vm.g_to_t(hgvs_genomic, val) - hgvs_alt_genomic = vm.t_to_g(hgvs_tx, key) - alt_vcf = hgvs_utils.report_hgvs2vcf(hgvs_alt_genomic, build_to, reverse_normalizer, sf) + hgvs_tx = validator.vm.g_to_t(hgvs_genomic, val) + hgvs_alt_genomic = validator.vm.t_to_g(hgvs_tx, key) + alt_vcf = hgvs_utils.report_hgvs2vcf(hgvs_alt_genomic, build_to, reverse_normalizer, validator.sf) # Add the to build dictionaries lifted_response[build_to.lower()][hgvs_alt_genomic.ac] = { - 'hgvs_genomic_description': mystr(hgvs_alt_genomic), - 'vcf': { - 'chr': alt_vcf[to_set], - 'pos': str(alt_vcf['pos']), - 'ref': alt_vcf['ref'], - 'alt': alt_vcf['alt']} - } + 'hgvs_genomic_description': mystr(hgvs_alt_genomic), + 'vcf': { + 'chr': alt_vcf[to_set], + 'pos': str(alt_vcf['pos']), + 'ref': alt_vcf['ref'], + 'alt': alt_vcf['alt'] + } + } lifted_response[alt_build_to.lower()][hgvs_alt_genomic.ac] = { - 'hgvs_genomic_description': mystr(hgvs_alt_genomic), - 'vcf': { - 'chr': alt_vcf[alt_to_set], - 'pos': str(alt_vcf['pos']), - 'ref': alt_vcf['ref'], - 'alt': alt_vcf['alt']} - } + 'hgvs_genomic_description': mystr(hgvs_alt_genomic), + 'vcf': { + 'chr': alt_vcf[alt_to_set], + 'pos': str(alt_vcf['pos']), + 'ref': alt_vcf['ref'], + 'alt': alt_vcf['alt'] + } + } added_data = True - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: continue if lifted_response != {} and added_data is not False: return lifted_response - else: - pass - else: - # liftover has failed - pass - - """ - Step 2, attempt to liftover using PyLiftover. - Lift position > Check bases > Lift back and confirm the original position - """ # Note: pyliftover uses the UCSC liftOver tool. # https://pypi.org/project/pyliftover/ @@ -213,10 +220,14 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no genome_builds = [build_to] # Create liftover vcf - from_vcf = hgvs_utils.report_hgvs2vcf(hgvs_genomic, lo_from, reverse_normalizer, sf) + from_vcf = hgvs_utils.report_hgvs2vcf(hgvs_genomic, lo_from, reverse_normalizer, validator.sf) - if PYLIFTOVER_DIR is not None: - lo_filename_to = PYLIFTOVER_DIR + "%sTo%s.over.chain" % (lo_from, lo_to) + pyliftover_dir = None + if validator.liftoverPath is not None and validator.liftoverPath != '/PATH/TO/LIFTOVER': + pyliftover_dir = validator.liftoverPath + + if pyliftover_dir is not None: + lo_filename_to = pyliftover_dir + "%sTo%s.over.chain" % (lo_from, lo_to) lo_filename_to = str(lo_filename_to.replace('Tohg', 'ToHg')) lo = LiftOver(lo_filename_to) @@ -224,17 +235,15 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no lo = LiftOver(lo_from, lo_to) # Fix the GRC CHR - if re.match('chr', from_vcf[from_set]): + if from_vcf[from_set].startswith('chr'): liftover_list = lo.convert_coordinate(from_vcf[from_set], int(from_vcf['pos'])) else: my_chrom = 'chr' + from_vcf[from_set] liftover_list = lo.convert_coordinate(my_chrom, int(from_vcf['pos'])) - # Create dictionary - primary_genomic_dicts = {} for lifted in liftover_list: - chr = lifted[0] + chrom = lifted[0] pos = lifted[1] orientated = lifted[2] @@ -247,18 +256,18 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no lifted_ref_bases = my_seq.reverse_complement() your_seq = Seq(lifted_alt_bases) lifted_alt_bases = your_seq.reverse_complement() - accession = seq_data.to_accession(chr, lo_to) + accession = seq_data.to_accession(chrom, lo_to) if accession is None: - wrn = 'Unable to identify an equivalent %s chromosome ID for %s' % (str(lo_to), str(chr)) + wrn = 'Unable to identify an equivalent %s chromosome ID for %s' % (str(lo_to), str(chrom)) Logger.warning(wrn) continue else: not_delins = accession + ':g.' + str(pos) + '_' + str( (pos - 1) + len(lifted_ref_bases)) + 'del' + lifted_ref_bases + 'ins' + lifted_alt_bases not_delins = str(not_delins) - hgvs_not_delins = hp.parse_hgvs_variant(not_delins) + hgvs_not_delins = validator.hp.parse_hgvs_variant(not_delins) try: - vr.validate(hgvs_not_delins) + validator.vr.validate(hgvs_not_delins) except hgvs.exceptions.HGVSError as e: Logger.warning(str(e)) # Most likely incorrect bases @@ -266,8 +275,8 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no else: hgvs_lifted = hn.normalize(hgvs_not_delins) # Now try map back - if PYLIFTOVER_DIR is not None: - lo_filename_from = PYLIFTOVER_DIR + "%sTo%s.over.chain" % (lo_to, lo_from) + if pyliftover_dir is not None: + lo_filename_from = pyliftover_dir + "%sTo%s.over.chain" % (lo_to, lo_from) lo_filename_from = str(lo_filename_from.replace('Tohg', 'ToHg')) lo = LiftOver(lo_filename_from) @@ -275,12 +284,12 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no lo = LiftOver(lo_to, lo_from) # Lift back - liftback_list = lo.convert_coordinate(chr, pos) + liftback_list = lo.convert_coordinate(chrom, pos) for lifted_back in liftback_list: # Pull out the good guys! # Need to add chr to the from_set - if not re.match('chr', lifted_back[0]): + if not lifted_back[0].startswith('chr'): my_from_chr = 'chr' + lifted_back[0] else: my_from_chr = lifted_back[0] @@ -288,8 +297,9 @@ def liftover(hgvs_genomic, build_from, build_to, hn, vm, vr, hdp, hp, reverse_no if lifted_back[0] == from_vcf[from_set] or lifted_back[0] == my_from_chr: if lifted_back[1] == int(from_vcf['pos']): for build in genome_builds: - vcf_dict = hgvs_utils.report_hgvs2vcf(hgvs_lifted, build, reverse_normalizer, sf) - if re.match('GRC', build): + vcf_dict = hgvs_utils.report_hgvs2vcf( + hgvs_lifted, build, reverse_normalizer, validator.sf) + if build.startswith('GRC'): lifted_response[build_to.lower()][hgvs_lifted.ac] = { 'hgvs_genomic_description': mystr(hgvs_lifted), 'vcf': {'chr': vcf_dict['grc_chr'], diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index 2be346a3..97a633de 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -1,5 +1,5 @@ import os -from .vvLiftover import liftover as lift_over +from .liftover import liftover from .logger import Logger @@ -94,10 +94,9 @@ def format_as_dict(self, with_meta=True): build_from = 'GRCh37' # Liftover - lifted_response = lift_over(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, - build_to, variant.hn, self.validator.vm, self.validator.vr, - self.validator.hdp, self.validator.hp, variant.reverse_normalizer, - self.validator.sf, variant.evm) + lifted_response = liftover(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, + build_to, variant.hn, variant.reverse_normalizer, + variant.evm, self.validator) # Sort the respomse into primary assembly and ALT primary_assembly_loci = {} diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index b9cee4ab..7f85437d 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -111,8 +111,6 @@ def __init__(self): # Set up other configuration variables self.liftoverPath = config["liftover"]["location"] - if not self.liftoverPath == 'PATH/TO/LIFTOVER': - os.environ['PYLIFTOVER_DIR'] = self.liftoverPath self.entrezID = config["EntrezID"]['entrezid'] # Set up HGVS From 415eaf659c516ed6214e1fa25dc089fd7c6d88e9 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 20 Jun 2019 10:52:13 +0100 Subject: [PATCH 139/223] Tidyied up MixinInit --- VariantValidator/modules/mappers.py | 8 +- VariantValidator/modules/vvMixinInit.py | 230 ++++++++++++------------ 2 files changed, 120 insertions(+), 118 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 152067fb..970e1fbc 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -701,7 +701,7 @@ def transcripts_to_gene(variant, validator): hgvs_refseq = 'RefSeqGene record not available' # Predicted effect on protein - protein_dict = validator.myc_to_p(hgvs_coding, variant.evm, re_to_p=False) + protein_dict = validator.myc_to_p(hgvs_coding, variant.evm, re_to_p=False, hn=variant.hn) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] else: @@ -745,7 +745,7 @@ def transcripts_to_gene(variant, validator): c_for_p = fn.valstr(hgvs_seek_var) try: # Predicted effect on protein - protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False) + protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False, hn=variant.hn) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] else: @@ -776,7 +776,7 @@ def transcripts_to_gene(variant, validator): fn.exceptPass() else: # hgvs_protein = va_func.protein(str(c_for_p), variant.evm, hp) - protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False) + protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False, hn=variant.hn) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] else: @@ -794,7 +794,7 @@ def transcripts_to_gene(variant, validator): # Note, this will not correct read-through stop codons, but it will try! if hgvs_coding.posedit.pos.start.offset == 0 and hgvs_coding.posedit.pos.start.offset == 0 and \ '?' in str(hgvs_protein): - protein_dict = validator.myc_to_p(hgvs_coding, variant.evm, re_to_p=False) + protein_dict = validator.myc_to_p(hgvs_coding, variant.evm, re_to_p=False, hn=variant.hn) if protein_dict['error'] == '': hgvs_protein = protein_dict['hgvs_protein'] else: diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 7f85437d..ba4e40a5 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -18,24 +18,19 @@ import re import copy from .vvDatabase import vvDatabase -from .logger import Logger -from . import utils as fn +from . import utils from VariantValidator.settings import CONFIG_DIR from VariantValidator.version import __version__ class Mixin: """ - # This object contains configuration options for the validator, but it inherits the mixin - # class in vvCore that contains the enormous validator function. - This mixin is the first for the validator object, which is instantiated in order to perform validator functions. The validator contains configuration information and permanent copies of database links and the like. Much of the validator's inner workings are stored in special one-off function container objects: validator.db : The validator's MySQL database access functions - The validator configuration is stored in ~/.config/VariantValidator/config.ini . This is loaded - when the validator object is initialized. + The validator configuration is loaded when the validator object is initialized. Running variant validator should hopefully be as simple as writing a script like this: import VariantValidator @@ -45,7 +40,7 @@ class Mixin: """ def __init__(self): - ''' + """ Renaming of variables : 'seqrepo_directory': HGVS_SEQREPO_DIR, #self.seqrepoPath 'uta_url': UTA_DB_URL, #self.utaPath @@ -56,25 +51,25 @@ def __init__(self): 'variantvalidator_hgvs_version': hgvs_version, #self.hgvsVersion 'uta_schema': str(hdp.data_version()), #self.uta_schema 'seqrepo_db': HGVS_SEQREPO_DIR.split('/')[-1] #self.seqrepoVersion - ''' + """ # Load the configuration file. config = ConfigParser() config.read(CONFIG_DIR) # The custom vvLogging module will set itself up using the VALDIATOR_DEBUG environment variable. - levelString = config["logging"]['level'] - consoleString = config["logging"]['console'] - if consoleString.lower() == "true": - consoleString = "console" - fileString = config["logging"]['file'] - if fileString.lower() == "true": - fileString = "file" - traceString = config["logging"]['trace'] - if traceString.lower() == "true": - traceString = "trace" - logString = levelString+" "+consoleString+" "+fileString+" "+traceString - os.environ["VALIDATOR_DEBUG"] = logString + level_string = config["logging"]['level'] + console_string = config["logging"]['console'] + if console_string.lower() == "true": + console_string = "console" + file_string = config["logging"]['file'] + if file_string.lower() == "true": + file_string = "file" + trace_string = config["logging"]['trace'] + if trace_string.lower() == "true": + trace_string = "trace" + log_string = level_string+" "+console_string+" "+file_string+" "+trace_string + os.environ["VALIDATOR_DEBUG"] = log_string # Handle databases self.entrezID = config["EntrezID"]["entrezID"] @@ -96,10 +91,11 @@ def __init__(self): 'password': config["mysql"]["password"], 'host': config["mysql"]["host"], 'database': config["mysql"]["database"], - 'raise_on_warnings': True + 'raise_on_warnings': True } - #Create database access objects + # Create database access objects self.db = vvDatabase(self.dbConfig) + # Set up versions self.version = __version__ if re.match(r'^\d+\.\d+\.\d+$', __version__) is not None: @@ -145,19 +141,21 @@ def __init__(self): ) # Create normalizer - self.merge_normalizer = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, - alt_aln_method='splign', - validate=False - ) - self.reverse_merge_normalizer = hgvs.normalizer.Normalizer(self.hdp, - cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, - alt_aln_method='splign', - validate=False - ) - #create no_norm_evm + self.merge_normalizer = hgvs.normalizer.Normalizer( + self.hdp, + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='splign', + validate=False + ) + self.reverse_merge_normalizer = hgvs.normalizer.Normalizer( + self.hdp, + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='splign', + validate=False + ) + # create no_norm_evm self.no_norm_evm_38 = hgvs.assemblymapper.AssemblyMapper(self.hdp, assembly_name='GRCh38', alt_aln_method='splign', @@ -180,7 +178,7 @@ def __init__(self): def __del__(self): del self.db - def myConfig(self): + def my_config(self): """ Returns configuration: version, hgvs version, uta schema, seqrepo db. @@ -192,62 +190,59 @@ def myConfig(self): 'seqrepo_db': self.seqrepoPath } - def protein(self,variant, evm, hpUnused): - # Set regular expressions for if statements - pat_c = re.compile("\:c\.") # Pattern looks for :c. Note (gene) has been removed - + def protein(self, variant, evm, hpUnused): # If the :c. pattern is present in the input variant - if pat_c.search(variant): + if ':c.' in variant: # convert the input string into a hgvs object - var_c = self.hp.parse_hgvs_variant(variant) + var_c = self.hp.parse(variant) # Does the edit affect the start codon? - if ((var_c.posedit.pos.start.base >= 1 and var_c.posedit.pos.start.base <= 3 and var_c.posedit.pos.start.offset == 0) or ( - var_c.posedit.pos.end.base >= 1 and var_c.posedit.pos.end.base <= 3 and var_c.posedit.pos.end.offset == 0)) and not re.search('\*', str( - var_c.posedit.pos)): + if ((1 <= var_c.posedit.pos.start.base <= 3 and var_c.posedit.pos.start.offset == 0) or ( + 1 <= var_c.posedit.pos.end.base <= 3 and var_c.posedit.pos.end.offset == 0)) and '*' not in str( + var_c.posedit.pos): ass_prot = self.hdp.get_pro_ac_for_tx_ac(var_c.ac) - if str(ass_prot) == 'None': + if ass_prot is None: cod = str(var_c) cod = cod.replace('inv', 'del') - cod = self.hp.parse_hgvs_variant(cod) + cod = self.hp.parse(cod) p = evm.c_to_p(cod) ass_prot = p.ac var_p = hgvs.sequencevariant.SequenceVariant(ac=ass_prot, type='p', posedit='(Met1?)') else: var_p = evm.c_to_p(var_c) return var_p - if re.search(':n.', variant): - var_p = self.hp.parse_hgvs_variant(variant) + + if ':n.' in variant: + var_p = self.hp.parse(variant) var_p.ac = 'Non-coding transcript' var_p.posedit = '' return var_p - def myc_to_p(self,hgvs_transcript, evm, re_to_p): + def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): # Create dictionary to store the information hgvs_transcript_to_hgvs_protein = {'error': '', 'hgvs_protein': '', 'ref_residues': ''} + associated_protein_accession = '' # Collect the associated protein if hgvs_transcript.type == 'c': associated_protein_accession = self.hdp.get_pro_ac_for_tx_ac(hgvs_transcript.ac) # This method sometimes fails - if str(associated_protein_accession) == 'None': + if associated_protein_accession is None: cod = str(hgvs_transcript) cod = cod.replace('inv', 'del') - cod = self.hp.parse_hgvs_variant(cod) + cod = self.hp.parse(cod) p = evm.c_to_p(cod) associated_protein_accession = p.ac - else: - pass - # Check for non-coding transcripts if hgvs_transcript.type == 'c': # Handle non inversions with simple c_to_p mapping - if (hgvs_transcript.posedit.edit.type != 'inv') and (hgvs_transcript.posedit.edit.type != 'delins') and (re_to_p is False): + if (hgvs_transcript.posedit.edit.type != 'inv') and (hgvs_transcript.posedit.edit.type != 'delins') and ( + re_to_p is False): + hgvs_protein = '' # Does the edit affect the start codon? - if ((hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) or ( - hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ - and not re.search('\*', str( - hgvs_transcript.posedit.pos)): + if ((1 <= hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0 + ) or (1 <= hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset + == 0)) and '*' not in str(hgvs_transcript.posedit.pos): hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit='(Met1?)') else: @@ -255,28 +250,31 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): hgvs_protein = evm.c_to_p(hgvs_transcript) except IndexError as e: error = str(e) - if re.search('string index out of range', error) and re.search('dup', str(hgvs_transcript)): - hgvs_ins = self.hp.parse_hgvs_variant(str(hgvs_transcript)) + if 'string index out of range' in error and 'dup' in str(hgvs_transcript): + hgvs_ins = self.hp.parse(str(hgvs_transcript)) hgvs_ins = hn.normalize(hgvs_ins) - inst = hgvs_ins.ac + ':c.' + str(hgvs_ins.posedit.pos.start.base - 1) + '_' + str(hgvs_ins.posedit.pos.start.base) + 'ins' + hgvs_ins.posedit.edit.ref - hgvs_transcript = self.hp.parse_hgvs_variant(inst) + inst = hgvs_ins.ac + ':c.' + str(hgvs_ins.posedit.pos.start.base - 1) + '_' + \ + str(hgvs_ins.posedit.pos.start.base) + 'ins' + hgvs_ins.posedit.edit.ref + hgvs_transcript = self.hp.parse(inst) hgvs_protein = evm.c_to_p(hgvs_transcript) try: hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein return hgvs_transcript_to_hgvs_protein except UnboundLocalError: - hgvs_transcript_to_hgvs_protein = self.myc_to_p(hgvs_transcript, evm, re_to_p = True) + hgvs_transcript_to_hgvs_protein = self.myc_to_p(hgvs_transcript, evm, re_to_p=True, hn=hn) return hgvs_transcript_to_hgvs_protein else: # Additional code required to process inversions - # Note, this code was developed for VariantValidator and is not native to the biocommons hgvs Python package + # Note, this code was developed for VariantValidator and is not native to the biocommons hgvs + # Python package # Convert positions to n. position hgvs_naughty = self.vm.c_to_n(hgvs_transcript) # Collect the deleted sequence using fetch_seq - del_seq = self.sf.fetch_seq(str(hgvs_naughty.ac), start_i=hgvs_naughty.posedit.pos.start.base - 1, end_i=hgvs_naughty.posedit.pos.end.base) + del_seq = self.sf.fetch_seq(str(hgvs_naughty.ac), start_i=hgvs_naughty.posedit.pos.start.base - 1, + end_i=hgvs_naughty.posedit.pos.end.base) # Make the inverted sequence my_seq = Seq(del_seq) @@ -288,12 +286,13 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): if inv_seq is None: inv_seq = '' + shifts = '' # Look for p. delins or del not_delins = True if hgvs_transcript.posedit.edit.type != 'inv': try: shifts = evm.c_to_p(hgvs_transcript) - if re.search('del', shifts.posedit.edit.type): + if 'del' in shifts.posedit.edit.type: not_delins = False except Exception: not_delins = False @@ -301,22 +300,23 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): not_delins = False # Use inv delins code? - if not_delins == False: + if not not_delins: # Collect the associated protein associated_protein_accession = self.hdp.get_pro_ac_for_tx_ac(hgvs_transcript.ac) # Intronic inversions are marked as uncertain i.e. p.? - if re.search('\d+\-', str(hgvs_transcript.posedit.pos)) or re.search('\d+\+', str(hgvs_transcript.posedit.pos)) or re.search('\*', str(hgvs_transcript.posedit.pos)) or re.search('[cn].\-', str(hgvs_transcript)): - if (( - hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) - or - (hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ - and not re.search('\*', str(hgvs_transcript.posedit.pos)): - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', + if re.search(r'\d+-', str(hgvs_transcript.posedit.pos)) or re.search(r'\d+\+', str(hgvs_transcript.posedit.pos)) or re.search(r'\*', str(hgvs_transcript.posedit.pos)) or re.search(r'[cn].-', str(hgvs_transcript)): + if ((1 <= hgvs_transcript.posedit.pos.start.base <= 3 and + hgvs_transcript.posedit.pos.start.offset == 0) or (1 <= + hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0))\ + and '*' not in str(hgvs_transcript.posedit.pos): + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + type='p', posedit='(Met1?)') else: # Make the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit='?') + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + type='p', posedit='?') hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein return hgvs_transcript_to_hgvs_protein else: @@ -333,18 +333,19 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): return hgvs_transcript_to_hgvs_protein # Create the variant coding sequence - var_seq = fn.n_inversion(ref_seq, del_seq, inv_seq, + var_seq = utils.n_inversion(ref_seq, del_seq, inv_seq, hgvs_naughty.posedit.pos.start.base, hgvs_naughty.posedit.pos.end.base) # Translate the reference and variant proteins - prot_ref_seq = fn.translate(ref_seq, cds_start) + prot_ref_seq = utils.translate(ref_seq, cds_start) try: - prot_var_seq = fn.translate(var_seq, cds_start) + prot_var_seq = utils.translate(var_seq, cds_start) except IndexError: - hgvs_transcript_to_hgvs_protein['error'] = 'Cannot identify an in-frame Termination codon in the variant mRNA sequence' - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', - posedit='?') + hgvs_transcript_to_hgvs_protein['error'] = \ + 'Cannot identify an in-frame Termination codon in the variant mRNA sequence' + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + type='p', posedit='?') hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein return hgvs_transcript_to_hgvs_protein @@ -354,13 +355,13 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): return hgvs_transcript_to_hgvs_protein elif prot_var_seq == 'error': # Does the edit affect the start codon? - if (( - hgvs_transcript.posedit.pos.start.base >= 1 and hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0) - or - (hgvs_transcript.posedit.pos.end.base >= 1 and hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ - and not re.search('\*', str(hgvs_transcript.posedit.pos)): - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', - posedit='(Met1?)') + if ((1 <= hgvs_transcript.posedit.pos.start.base <= 3 and + hgvs_transcript.posedit.pos.start.offset == 0) or ( + 1 <= hgvs_transcript.posedit.pos.end.base <= 3 and + hgvs_transcript.posedit.pos.end.offset == 0)) \ + and '*' not in str(hgvs_transcript.posedit.pos): + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + type='p', posedit='(Met1?)') hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein return hgvs_transcript_to_hgvs_protein @@ -371,9 +372,9 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): else: # Gather the required information regarding variant interval and sequences if hgvs_transcript.posedit.edit.type != 'delins': - pro_inv_info = fn.pro_inv_info(prot_ref_seq, prot_var_seq) + pro_inv_info = utils.pro_inv_info(prot_ref_seq, prot_var_seq) else: - pro_inv_info = fn.pro_delins_info(prot_ref_seq, prot_var_seq) + pro_inv_info = utils.pro_delins_info(prot_ref_seq, prot_var_seq) # Error has occurred if pro_inv_info['error'] == 'true': @@ -384,8 +385,8 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): # The Nucleotide variant has not affected the protein sequence i.e. synonymous elif pro_inv_info['variant'] != 'true': # Make the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', - posedit='=') + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + type='p', posedit='=') hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein return hgvs_transcript_to_hgvs_protein @@ -397,8 +398,8 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): # Complete variant description # Recode the single letter del and ins sequences into three letter amino acid codes - del_thr = fn.one_to_three(pro_inv_info['prot_del_seq']) - ins_thr = fn.one_to_three(pro_inv_info['prot_ins_seq']) + del_thr = utils.one_to_three(pro_inv_info['prot_del_seq']) + ins_thr = utils.one_to_three(pro_inv_info['prot_ins_seq']) # Write the HGVS position and edit del_len = len(del_thr) @@ -408,43 +409,46 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): # Handle a range of amino acids if pro_inv_info['edit_start'] != pro_inv_info['edit_end']: if len(ins_thr) > 0: - if re.search('Ter', del_thr) and ins_thr[-3:] != 'Ter': - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'delins' + ins_thr + '?)' + if 'Ter' in del_thr and ins_thr[-3:] != 'Ter': + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + \ + str(pro_inv_info['edit_end']) + 'delins' + ins_thr + '?)' else: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'delins' + ins_thr + ')' + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + \ + str(pro_inv_info['edit_end']) + 'delins' + ins_thr + ')' else: - if re.search('Ter', del_thr) and ins_thr[-3:] != 'Ter': - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'del?)' + if 'Ter' in del_thr and ins_thr[-3:] != 'Ter': + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + \ + str(pro_inv_info['edit_end']) + 'del?)' else: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + str( - pro_inv_info['edit_end']) + 'del)' + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + '_' + to_aa + \ + str(pro_inv_info['edit_end']) + 'del)' else: # Handle extended proteins i.e. stop_lost if del_thr == 'Ter' and (len(ins_thr) > len(del_thr)): # Nucleotide variant range aligns to the Termination codon if ins_thr[-3:] == 'Ter': posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( - ins_thr[:3]) + 'ext' + str(ins_thr[-3:]) + str(int((len(ins_thr) / 3)) - 1) + ')' + ins_thr[:3]) + 'ext' + str(ins_thr[-3:]) + str(int((len(ins_thr) / 3)) + - 1) + ')' # Nucleotide variant range spans the Termination codon else: posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + str( ins_thr[:3]) + 'ext?)' - # Nucleotide variation has not affected the length of the protein thus substitution or del + # Nucleotide variation has not affected the length of the protein thus + # substitution or del else: if len(ins_thr) == 3: posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + ins_thr + ')' elif len(ins_thr) == 0: posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + 'del)' else: - posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + 'delins' + ins_thr + ')' + posedit = '(' + from_aa + str(pro_inv_info['edit_start']) + 'delins' + \ + ins_thr + ')' # Complete the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', - posedit=posedit) + hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + type='p', posedit=posedit) hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein @@ -454,7 +458,6 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): # Return return hgvs_transcript_to_hgvs_protein - # Handle non-coding transcript and non transcript descriptions elif hgvs_transcript.type == 'n': # non-coding transcripts @@ -467,4 +470,3 @@ def myc_to_p(self,hgvs_transcript, evm, re_to_p): hgvs_transcript_to_hgvs_protein['error'] = 'Unable to map %s to %s' % ( hgvs_transcript.ac, associated_protein_accession) return hgvs_transcript_to_hgvs_protein - From 092a8f55710471cd4c91ceda8aca07c06a357c55 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 20 Jun 2019 15:13:16 +0100 Subject: [PATCH 140/223] Made partial changes to MixinConverters --- VariantValidator/modules/vvMixinConverters.py | 360 ++++++++++-------- 1 file changed, 197 insertions(+), 163 deletions(-) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 2b5babe6..1bb81383 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -1,17 +1,9 @@ import re -import os import sys import copy from .logger import Logger import hgvs -import hgvs.exceptions -from hgvs.dataproviders import uta -from hgvs.dataproviders import seqfetcher -import hgvs.normalizer import hgvs.validator -import hgvs.parser -import hgvs.variantmapper -import hgvs.sequencevariant from . import vvMixinInit from . import seq_data from . import hgvs_utils @@ -21,16 +13,14 @@ from Bio import Entrez,SeqIO from . import utils as fn - -#Error setup from hgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError class Mixin(vvMixinInit.Mixin): - ''' + """ This mixin contains converters that use the validator's configuration information. - - ''' + It inherits the Init mixin + """ def r_to_c(self, variant, evm): """ r_to_c @@ -63,6 +53,10 @@ def refseq(self, variant, vmOld, refseq_ac, hpOld, evm, hdpOld, primary_assembly alt_aln_method = 'splign' transcripts = self.hdp.get_tx_for_region(alt_ac, alt_aln_method, start_i - 1, end_i) # Take the first transcript + ref_g_dict = { + 'ref_g': '', + 'error': 'false' + } for trans in transcripts: tx_ac = trans[0] try: @@ -70,11 +64,6 @@ def refseq(self, variant, vmOld, refseq_ac, hpOld, evm, hdpOld, primary_assembly except: continue else: - # map the variant co-ordinates to the refseq Gene accession using vm - ref_g_dict = { - 'ref_g': '', - 'error': 'false' - } try: ref_g_dict['ref_g'] = self.vm.t_to_g(ref_c, alt_ac=refseq_ac, alt_aln_method='splign') except: @@ -98,25 +87,22 @@ def g_to_c(self, var_g, tx_ac, hpOld, evm): Maps genomic hgvs object into a coding hgvs object if the c accession string is provided returns a c. variant description string """ - pat_g = re.compile("\:g\.") # Pattern looks for :g. # If the :g. pattern is present in the input variant - if pat_g.search(var_g): + if ':g.' in var_g: # convert the input string into a hgvs object by parsing var_g = self.hp.parse_hgvs_variant(var_g) # Map to coding variant var_c = str(evm.g_to_c(var_g, tx_ac)) return var_c - def g_to_n(self, var_g, tx_ac, hpOld, evm): """ Parses genomic variant strings into hgvs objects Maps genomic hgvs object into a non-coding hgvs object if the n accession string is provided returns a n. variant description string """ - pat_g = re.compile("\:g\.") # Pattern looks for :g. # If the :g. pattern is present in the input variant - if pat_g.search(var_g): + if ':g.' in var_g: # convert the input string into a hgvs object by parsing var_g = self.hp.parse_hgvs_variant(var_g) # Map to coding variant @@ -129,36 +115,28 @@ def coding(self, variant, hpOld): returns parsed hgvs c. or n. object """ # If the :c. pattern is present in the input variant - if re.search(':c.', variant) or re.search(':n.', variant): + if ':c.' in variant or ':n.' in variant: # convert the input string into a hgvs object var_c = self.hp.parse_hgvs_variant(variant) return var_c - def genomic(self, variant, evm, primary_assembly,hn): + def genomic(self, variant, evm, primary_assembly, hn): """ Mapping transcript to genomic position Ensures variant strings are transcript c. or n. returns parsed hgvs g. object """ - # Set regular expressions for if statements - pat_g = re.compile("\:g\.") # Pattern looks for :g. - pat_n = re.compile("\:n\.") - pat_c = re.compile("\:c\.") # Pattern looks for :c. - # If the :c. pattern is present in the input variant - if pat_c.search(variant) or pat_n.search(variant): - error = 'false' + if ':c.' in variant or ':n.' in variant: hgvs_var = self.hp.parse_hgvs_variant(variant) try: - var_g = self.myevm_t_to_g(hgvs_var, evm, primary_assembly,hn) # genomic level variant + var_g = self.myevm_t_to_g(hgvs_var, evm, primary_assembly, hn) # genomic level variant except hgvs.exceptions.HGVSError as e: - error = e - if error != 'false': - var_g = 'error ' + str(e) + return 'error ' + str(e) return var_g # If the :g. pattern is present in the input variant - elif (pat_g.search(variant)): # or (pat_n.search(variant)): + elif ':g.' in variant: # or (pat_n.search(variant)): # convert the input string into a hgvs object var_g = self.hp.parse_hgvs_variant(variant) return var_g @@ -168,15 +146,13 @@ def hgvs_genomic(self, variant, hpOld): Ensures variant strings are g. returns parsed hgvs g. object """ - # Set regular expressions for if statements - pat_g = re.compile("\:g\.") # Pattern looks for :g. Note (gene) has been removed # If the :g. pattern is present in the input variant - if pat_g.search(variant): + if ':g.' in variant: # convert the input string into a hgvs object var_g = self.hp.parse_hgvs_variant(variant) return var_g - def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): + def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): """ Enhanced transcript to genome position mapping function using evm Deals with mapping from transcript positions that do not exist in the genomic sequence @@ -196,7 +172,6 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): # store the input stored_hgvs_c = copy.deepcopy(hgvs_c) expand_out = 'false' - utilise_gap_code = True # Gap gene black list try: @@ -209,11 +184,13 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): # Warn gap code in use Logger.warning("gap_compensation_myevm = " + str(utilise_gap_code)) - if utilise_gap_code is True and ( - hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): + if utilise_gap_code is True and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' + or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' + or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' + or hgvs_c.posedit.edit.type == 'inv'): # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): + if str(hgvs_c.ac).startswith('NM_'): hgvs_c = no_norm_evm.c_to_n(hgvs_c) # Check for intronic @@ -221,15 +198,14 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): hn.normalize(hgvs_c) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('intronic variant', error): - pass - elif re.search('Length implied by coordinates must equal sequence deletion length', error) and re.match( - 'NR_', hgvs_c.ac): + if 'intronic variant' not in error and \ + 'Length implied by coordinates must equal sequence deletion length' in error and \ + hgvs_c.ac.startswith('NR_'): hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 # Check again before continuing - if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search( - '\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): + if re.search(r'\d+\+', str(hgvs_c.posedit.pos)) or re.search(r'\d+-', str(hgvs_c.posedit.pos)) or \ + re.search(r'\*\d+\+', str(hgvs_c.posedit.pos)) or re.search(r'\*\d+-', str(hgvs_c.posedit.pos)): pass else: @@ -303,19 +279,19 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): except Exception: hgvs_c = hgvs_c - if re.match('NM_', str(hgvs_c.ac)): + if str(hgvs_c.ac).startswith('NM_'): try: hgvs_c = no_norm_evm.n_to_c(hgvs_c) - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: hgvs_c = copy.deepcopy(stored_hgvs_c) # Ensure the altered c. variant has not crossed intro exon boundaries hgvs_check_boundaries = copy.deepcopy(hgvs_c) try: - h_variant = hn.normalize(hgvs_check_boundaries) + hn.normalize(hgvs_check_boundaries) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('spanning the exon-intron boundary', error): + if 'spanning the exon-intron boundary' in error: hgvs_c = copy.deepcopy(stored_hgvs_c) # Catch identity at the exon/intron boundary by trying to normalize ref only if hgvs_check_boundaries.posedit.edit.type == 'identity': @@ -327,157 +303,215 @@ def myevm_t_to_g(self,hgvs_c, no_norm_evm, primary_assembly, hn): hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('spanning the exon-intron boundary', error) or re.search( - 'Normalization of intronic variants', error): + if 'spanning the exon-intron boundary' in error or 'Normalization of intronic variants' in error: hgvs_c = copy.deepcopy(stored_hgvs_c) + + # Capture errors from attempted mappings + attempted_mapping_error = '' + hgvs_genomic = '' + try: hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) hn.normalize(hgvs_genomic) # Check the validity of the mapping # This will fail on multiple refs for NC_ - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: # Recover all available mapping options from UTA mapping_options = self.hdp.get_tx_mapping_options(hgvs_c.ac) - if mapping_options == []: + if not mapping_options: raise HGVSDataNotAvailableError( - "No alignment data between the specified transcript reference sequence and any GRCh37 and GRCh38 genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) are available.") - - # Capture errors from attempted mappings - attempted_mapping_error = '' + "No alignment data between the specified transcript reference sequence and any GRCh37 and GRCh38 " + "genomic reference sequences (including alternate chromosome assemblies, patches and RefSeqGenes) " + "are available.") - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - 1] + '~' - print(e) - continue - - # If not mapped, raise error - try: - hn.normalize(hgvs_genomic) - except: + def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): + err = '' for option in mapping_options: - if re.match('blat', option[2]): + if option[2].startswith('blat'): continue - if re.match('NC_', option[1]): + if option[1].startswith(seqtype): chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num == 'false': + if final: try: hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) break except Exception as e: - if re.search(option[1], attempted_mapping_error): - pass - else: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - 1] + '~' + err += str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print(e) + continue + if chr_num_val and chr_num != 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + err += str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' + print(e) + continue + elif chr_num_val is False and chr_num == 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + break + except Exception as e: + err += str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' print(e) continue + + return hgvs_genomic, err + + hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NC_', True) + attempted_mapping_error += new_error + # for option in mapping_options: + # if option[2].startswith('blat'): + # continue + # if option[1].startswith('NC_'): + # chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) + # if chr_num != 'false': + # try: + # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + # break + # except Exception as e: + # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ + # 1] + '~' + # print(e) + # continue + + # If not mapped, raise error + try: + hn.normalize(hgvs_genomic) + except: + hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NC_', False) + attempted_mapping_error += new_error + # for option in mapping_options: + # if option[2].startswith('blat'): + # continue + # if option[1].startswith('NC_'): + # chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) + # if chr_num == 'false': + # try: + # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + # break + # except Exception as e: + # if re.search(option[1], attempted_mapping_error): + # pass + # else: + # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" \ + # + option[1] + '~' + # print(e) + # continue try: hn.normalize(hgvs_genomic) except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NT_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - 1] + '~' - print(e) - continue + hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NT_', True) + attempted_mapping_error += new_error + # for option in mapping_options: + # if re.match('blat', option[2]): + # continue + # if re.match('NT_', option[1]): + # chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) + # if chr_num != 'false': + # try: + # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + # break + # except Exception as e: + # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ + # 1] + '~' + # print(e) + # continue try: hn.normalize(hgvs_genomic) except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NT_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num == 'false': - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - option[ - 1] + '~' - print(e) - continue + hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NT_', False) + attempted_mapping_error += new_error + + # for option in mapping_options: + # if re.match('blat', option[2]): + # continue + # if re.match('NT_', option[1]): + # chr_num = seq_data.supported_for_mapping(str(option[1]), + # primary_assembly) + # if chr_num == 'false': + # try: + # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + # break + # except Exception as e: + # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ + # option[ + # 1] + '~' + # print(e) + # continue try: hn.normalize(hgvs_genomic) except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NW_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print(e) - continue + hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NW_', True) + attempted_mapping_error += new_error + # for option in mapping_options: + # if re.match('blat', option[2]): + # continue + # if re.match('NW_', option[1]): + # chr_num = seq_data.supported_for_mapping(str(option[1]), + # primary_assembly) + # if chr_num != 'false': + # try: + # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + # break + # except Exception as e: + # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ + # option[1] + '~' + # print(e) + # continue try: hn.normalize(hgvs_genomic) except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NW_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num == 'false': - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str( - e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print(e) - continue + hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NW_', False) + attempted_mapping_error += new_error + # for option in mapping_options: + # if re.match('blat', option[2]): + # continue + # if re.match('NW_', option[1]): + # chr_num = seq_data.supported_for_mapping(str(option[1]), + # primary_assembly) + # if chr_num == 'false': + # try: + # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + # break + # except Exception as e: + # attempted_mapping_error = attempted_mapping_error + str( + # e) + "/" + hgvs_c.ac + "/" + \ + # option[1] + '~' + # print(e) + # continue # Only a RefSeqGene available try: hn.normalize(hgvs_genomic) except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NG_', option[1]): - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print(e) - continue + hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NG_', True, + final=True) + attempted_mapping_error += new_error + # for option in mapping_options: + # if re.match('blat', option[2]): + # continue + # if re.match('NG_', option[1]): + # try: + # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + # break + # except Exception as e: + # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ + # option[1] + '~' + # print(e) + # continue # If not mapped, raise error try: + print(hgvs_genomic) + print(attempted_mapping_error) hgvs_genomic except Exception: raise HGVSDataNotAvailableError(attempted_mapping_error) + if hgvs_genomic == '': + raise HGVSDataNotAvailableError(attempted_mapping_error) + if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and hgvs_genomic.posedit.edit.alt == '' and expand_out != 'true': hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: From 439ccbb2fb9296ec2e72ef4ea31a266b572f6619 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 20 Jun 2019 15:33:00 +0100 Subject: [PATCH 141/223] Fixed bug in MixinInit --- VariantValidator/modules/vvMixinInit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index ba4e40a5..19448d4b 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -238,7 +238,7 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): if (hgvs_transcript.posedit.edit.type != 'inv') and (hgvs_transcript.posedit.edit.type != 'delins') and ( re_to_p is False): - hgvs_protein = '' + hgvs_protein = None # Does the edit affect the start codon? if ((1 <= hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0 ) or (1 <= hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset @@ -258,10 +258,10 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): hgvs_transcript = self.hp.parse(inst) hgvs_protein = evm.c_to_p(hgvs_transcript) - try: + if hgvs_protein: hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein return hgvs_transcript_to_hgvs_protein - except UnboundLocalError: + else: hgvs_transcript_to_hgvs_protein = self.myc_to_p(hgvs_transcript, evm, re_to_p=True, hn=hn) return hgvs_transcript_to_hgvs_protein From 7144458df143856638f61cfe26cb54e7750d2f9e Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 20 Jun 2019 16:15:02 +0100 Subject: [PATCH 142/223] More tidying up in MixinConverters --- VariantValidator/modules/vvMixinConverters.py | 281 ++++++------------ 1 file changed, 88 insertions(+), 193 deletions(-) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 1bb81383..9432b874 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -171,7 +171,7 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): """ # store the input stored_hgvs_c = copy.deepcopy(hgvs_c) - expand_out = 'false' + expand_out = False # Gap gene black list try: @@ -216,11 +216,10 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): inv_alt = self.revcomp(hgvs_t.posedit.edit.ref) t_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( hgvs_t.posedit.pos.end.base) + 'del' + hgvs_t.posedit.edit.ref + 'ins' + inv_alt - hgvs_t_delins = self.hp.parse_hgvs_variant(t_delins) pre_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.start.base - 1) + hgvs_t.posedit.pos.start.base - 1) post_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, - hgvs_t.posedit.pos.end.base + 1) + hgvs_t.posedit.pos.end.base + 1) hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base inv_alt = pre_base + inv_alt + post_base hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 @@ -235,9 +234,9 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): hgvs_t = self.hp.parse_hgvs_variant(hgvs_str) elif hgvs_c.posedit.edit.type == 'dup': pre_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.start.base - 1) + hgvs_t.posedit.pos.start.base - 1) post_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, - hgvs_t.posedit.pos.end.base + 1) + hgvs_t.posedit.pos.end.base + 1) alt = pre_base + hgvs_t.posedit.edit.ref + hgvs_t.posedit.edit.ref + post_base ref = pre_base + hgvs_t.posedit.edit.ref + post_base dup_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str( @@ -246,7 +245,7 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): hgvs_t = self.hp.parse_hgvs_variant(dup_to_delins) elif hgvs_c.posedit.edit.type == 'ins': ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.end.base + 1) + hgvs_t.posedit.pos.end.base + 1) ins_alt = ins_ref[:2] + hgvs_t.posedit.edit.alt + ins_ref[-2:] ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str( hgvs_t.posedit.pos.start.base - 1) + '_' + str( @@ -256,9 +255,9 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): if str(hgvs_t.posedit.edit.alt) == 'None': hgvs_t.posedit.edit.alt = '' pre_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 2, - hgvs_t.posedit.pos.start.base - 1) + hgvs_t.posedit.pos.start.base - 1) post_base = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.end.base, - hgvs_t.posedit.pos.end.base + 1) + hgvs_t.posedit.pos.end.base + 1) hgvs_t.posedit.edit.ref = pre_base + hgvs_t.posedit.edit.ref + post_base hgvs_t.posedit.edit.alt = pre_base + hgvs_t.posedit.edit.alt + post_base hgvs_t.posedit.pos.start.base = hgvs_t.posedit.pos.start.base - 1 @@ -274,7 +273,7 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): hgvs_c = copy.deepcopy(hgvs_t) # Set expanded out test to true - expand_out = 'true' + expand_out = True except Exception: hgvs_c = hgvs_c @@ -308,7 +307,7 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): # Capture errors from attempted mappings attempted_mapping_error = '' - hgvs_genomic = '' + hgvs_genomic = None try: hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) @@ -360,20 +359,6 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NC_', True) attempted_mapping_error += new_error - # for option in mapping_options: - # if option[2].startswith('blat'): - # continue - # if option[1].startswith('NC_'): - # chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) - # if chr_num != 'false': - # try: - # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - # break - # except Exception as e: - # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - # 1] + '~' - # print(e) - # continue # If not mapped, raise error try: @@ -381,105 +366,30 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): except: hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NC_', False) attempted_mapping_error += new_error - # for option in mapping_options: - # if option[2].startswith('blat'): - # continue - # if option[1].startswith('NC_'): - # chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) - # if chr_num == 'false': - # try: - # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - # break - # except Exception as e: - # if re.search(option[1], attempted_mapping_error): - # pass - # else: - # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" \ - # + option[1] + '~' - # print(e) - # continue + try: hn.normalize(hgvs_genomic) except: hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NT_', True) attempted_mapping_error += new_error - # for option in mapping_options: - # if re.match('blat', option[2]): - # continue - # if re.match('NT_', option[1]): - # chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) - # if chr_num != 'false': - # try: - # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - # break - # except Exception as e: - # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - # 1] + '~' - # print(e) - # continue + try: hn.normalize(hgvs_genomic) except: hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NT_', False) attempted_mapping_error += new_error - # for option in mapping_options: - # if re.match('blat', option[2]): - # continue - # if re.match('NT_', option[1]): - # chr_num = seq_data.supported_for_mapping(str(option[1]), - # primary_assembly) - # if chr_num == 'false': - # try: - # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - # break - # except Exception as e: - # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - # option[ - # 1] + '~' - # print(e) - # continue try: hn.normalize(hgvs_genomic) except: hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NW_', True) attempted_mapping_error += new_error - # for option in mapping_options: - # if re.match('blat', option[2]): - # continue - # if re.match('NW_', option[1]): - # chr_num = seq_data.supported_for_mapping(str(option[1]), - # primary_assembly) - # if chr_num != 'false': - # try: - # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - # break - # except Exception as e: - # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - # option[1] + '~' - # print(e) - # continue + try: hn.normalize(hgvs_genomic) except: hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NW_', False) attempted_mapping_error += new_error - # for option in mapping_options: - # if re.match('blat', option[2]): - # continue - # if re.match('NW_', option[1]): - # chr_num = seq_data.supported_for_mapping(str(option[1]), - # primary_assembly) - # if chr_num == 'false': - # try: - # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - # break - # except Exception as e: - # attempted_mapping_error = attempted_mapping_error + str( - # e) + "/" + hgvs_c.ac + "/" + \ - # option[1] + '~' - # print(e) - # continue # Only a RefSeqGene available try: @@ -488,31 +398,13 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): hgvs_genomic, new_error = search_through_options(hgvs_genomic, 'NG_', True, final=True) attempted_mapping_error += new_error - # for option in mapping_options: - # if re.match('blat', option[2]): - # continue - # if re.match('NG_', option[1]): - # try: - # hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - # break - # except Exception as e: - # attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - # option[1] + '~' - # print(e) - # continue # If not mapped, raise error - try: - print(hgvs_genomic) - print(attempted_mapping_error) - hgvs_genomic - except Exception: - raise HGVSDataNotAvailableError(attempted_mapping_error) - - if hgvs_genomic == '': + if hgvs_genomic is None: raise HGVSDataNotAvailableError(attempted_mapping_error) - if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and hgvs_genomic.posedit.edit.alt == '' and expand_out != 'true': + if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and \ + hgvs_genomic.posedit.edit.alt == '' and not expand_out: hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: try: @@ -521,7 +413,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): error = str(e) if error == 'insertion length must be 1': ref = self.sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, - hgvs_genomic.posedit.pos.end.base) + hgvs_genomic.posedit.pos.end.base) hgvs_genomic.posedit.edit.ref = ref hgvs_genomic.posedit.edit.alt = ref[0:1] + hgvs_genomic.posedit.edit.alt + ref[-1:] hgvs_genomic = hn.normalize(hgvs_genomic) @@ -533,41 +425,40 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): hgvs_genomic = hn.normalize(hgvs_genomic) # Statements required to reformat the stored_hgvs_c into a useable synonym - if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': + if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out: if stored_hgvs_c.type == 'c': stored_hgvs_n = self.vm.c_to_n(stored_hgvs_c) else: stored_hgvs_n = stored_hgvs_c stored_ref = self.sf.fetch_seq(str(stored_hgvs_n.ac), stored_hgvs_n.posedit.pos.start.base - 1, - stored_hgvs_n.posedit.pos.end.base) + stored_hgvs_n.posedit.pos.end.base) stored_hgvs_c.posedit.edit.ref = stored_ref - if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': + if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out: if hgvs_genomic.posedit.edit.type == 'ins': stored_ref = self.sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, - hgvs_genomic.posedit.pos.end.base) + hgvs_genomic.posedit.pos.end.base) stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] hgvs_genomic.posedit.edit.ref = stored_ref hgvs_genomic.posedit.edit.alt = stored_alt # First look for variants mapping to the flanks of gaps # either in the gap or on the flank but not fully within the gap - if expand_out == 'true': - + if expand_out: nr_genomic = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) try: hn.normalize(nr_genomic) except hgvs.exceptions.HGVSInvalidVariantError as e: error_type_1 = str(e) - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str( + if 'Length implied by coordinates must equal sequence deletion length' in str(e) or str( e) == 'base start position must be <= end position': # Effectively, this code is designed to handle variants that are directly proximal to - # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases due to - # the deletion length being > the specified range. - + # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed + # bases due to the deletion length being > the specified range. + genomic_gap_variant = None # Warn of variant location wrt the gap - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + if 'Length implied by coordinates must equal sequence deletion length' in str(e): Logger.warning('Variant is proximal to the flank of a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: @@ -577,9 +468,11 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): if 'base start position must be <= end position' in str(e) and \ 'Length implied by coordinates must equal' in error_type_1: make_gen_var = copy.copy(nr_genomic) - make_gen_var.posedit.edit.ref = self.sf.fetch_seq(nr_genomic.ac, - nr_genomic.posedit.pos.start.base - 1, - nr_genomic.posedit.pos.end.base) + make_gen_var.posedit.edit.ref = self.sf.fetch_seq( + nr_genomic.ac, + nr_genomic.posedit.pos.start.base - 1, + nr_genomic.posedit.pos.end.base + ) genomic_gap_variant = make_gen_var error_type_1 = None @@ -602,7 +495,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): gap_end = genomic_gap_variant.posedit.pos.start.base genomic_gap_variant.posedit.pos.start.base = gap_start genomic_gap_variant.posedit.pos.end.base = gap_end - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + if 'Length implied by coordinates must equal sequence deletion length' in str(e): # This will only happen if the variant is flanking the gap but is # not inside the gap Logger.warning('Variant is on the flank of a genomic gap but not within the gap') @@ -617,16 +510,14 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): try: genomic_gap_variant.posedit.edit.alt = '' except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass + pass # Should be a delins so will normalize statically and replace the reference bases genomic_gap_variant = hn.normalize(genomic_gap_variant) # Static map to c. and static normalize transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant - if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + if 'Length implied by coordinates must equal sequence deletion length' not in str(e): try: transcript_gap_variant = hn.normalize(transcript_gap_variant) except hgvs.exceptions.HGVSUnsupportedOperationError as e: @@ -634,7 +525,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): pass # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): + if str(hgvs_c.ac).startswith('NM_'): transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) else: @@ -675,21 +566,22 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): # Note, all variants will be forced into the format delete insert # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, - transcript_gap_alt_n.posedit.pos.end.base + 1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) + for i in range(transcript_gap_alt_n.posedit.pos.start.base, + transcript_gap_alt_n.posedit.pos.end.base + 1, 1): + if i == alt_start: + alt_base_dict[i] = str(''.join(alternate_bases)) else: - alt_base_dict[int] = 'X' + alt_base_dict[i] = 'X' # Generate the alt sequence alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, - 1): - if int in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[int]) + for i in range(transcript_gap_n.posedit.pos.start.base, + transcript_gap_n.posedit.pos.end.base + 1, + 1): + if i in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[i]) else: - alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence_bases.append(ref_base_dict[i]) alternate_sequence = ''.join(alternate_sequence_bases) alternate_sequence = alternate_sequence.replace('X', '') @@ -707,14 +599,20 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): except Exception as e: if str(e) == "base start position must be <= end position": # Expansion out is required to map back to the genomic position - pre_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, - transcript_gap_n.posedit.pos.start.base - 1) - post_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, - transcript_gap_n.posedit.pos.end.base + 1) + pre_base = self.sf.fetch_seq( + transcript_gap_n.ac, + transcript_gap_n.posedit.pos.start.base - 2, + transcript_gap_n.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq( + transcript_gap_n.ac, + transcript_gap_n.posedit.pos.end.base, + transcript_gap_n.posedit.pos.end.base + 1) transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 - transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base - transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base + transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + \ + post_base + transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + \ + post_base try: transcript_gap_variant = self.vm.n_to_c(transcript_gap_n) except: @@ -723,39 +621,32 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): hgvs_genomic = hn.normalize(hgvs_genomic) # Bypass the next bit of gap code - expand_out = 'false' - - else: - pass - # No map to the flank of a gap or within the gap - else: - pass + expand_out = False # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS # Remove identity bases if hgvs_c == stored_hgvs_c: - expand_out = 'false' - elif expand_out == 'false' or utilise_gap_code is False: + pass + elif expand_out is False or utilise_gap_code is False: pass # Correct expansion ref + 2 - elif expand_out == 'true' and ( + elif expand_out and ( len(hgvs_genomic.posedit.edit.ref) == (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: hgvs_genomic.posedit.pos.start.base = hgvs_genomic.posedit.pos.start.base + 1 hgvs_genomic.posedit.pos.end.base = hgvs_genomic.posedit.pos.end.base - 1 hgvs_genomic.posedit.edit.ref = hgvs_genomic.posedit.edit.ref[1:-1] if hgvs_genomic.posedit.edit.alt is not None: hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.alt[1:-1] - elif expand_out == 'true' and ( + elif expand_out and ( len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: - if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: - gn = hn.normalize(hgvs_genomic) - pass + if len(hgvs_genomic.posedit.edit.ref) == 2: + hn.normalize(hgvs_genomic) # Likely if the start or end position aligns to a gap in the genomic sequence # Logic # We have checked that the variant does not cross boundaries, or is intronic # So is likely mapping to a genomic gap - elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: + elif len(hgvs_genomic.posedit.edit.ref) <= 1: # Incorrect expansion, likely < ref + 2 genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: @@ -776,10 +667,9 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): genomic_gap_variant = hn.normalize(genomic_gap_variant) # Static map to c. and static normalize transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant transcript_gap_variant = hn.normalize(transcript_gap_variant) # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): + if str(hgvs_c.ac).startswith('NM_'): transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) else: @@ -820,20 +710,21 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): # Note, all variants will be forced into the format delete insert # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, - transcript_gap_alt_n.posedit.pos.end.base + 1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) + for i in range(transcript_gap_alt_n.posedit.pos.start.base, + transcript_gap_alt_n.posedit.pos.end.base + 1, 1): + if i == alt_start: + alt_base_dict[i] = str(''.join(alternate_bases)) else: - alt_base_dict[int] = 'X' + alt_base_dict[i] = 'X' # Generate the alt sequence alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): - if int in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[int]) + for i in range(transcript_gap_n.posedit.pos.start.base, + transcript_gap_n.posedit.pos.end.base + 1, 1): + if i in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[i]) else: - alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence_bases.append(ref_base_dict[i]) alternate_sequence = ''.join(alternate_sequence_bases) alternate_sequence = alternate_sequence.replace('X', '') @@ -851,10 +742,11 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): except Exception as e: if str(e) == "base start position must be <= end position": # Expansion out is required to map back to the genomic position - pre_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, - transcript_gap_n.posedit.pos.start.base - 1) + pre_base = self.sf.fetch_seq(transcript_gap_n.ac, + transcript_gap_n.posedit.pos.start.base - 2, + transcript_gap_n.posedit.pos.start.base - 1) post_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, - transcript_gap_n.posedit.pos.end.base + 1) + transcript_gap_n.posedit.pos.end.base + 1) transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base @@ -867,7 +759,8 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): hgvs_genomic = hn.normalize(hgvs_genomic) # Ins variants map badly - Especially between c. exon/exon boundary - if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: + if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and \ + hgvs_c.posedit.pos.end.offset == 0: try: hn.normalize(hgvs_genomic) except hgvs.exceptions.HGVSError as e: @@ -877,9 +770,11 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): hgvs_t = self.vm.c_to_n(hgvs_c) else: hgvs_t = copy.copy(hgvs_c) - ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, hgvs_t.posedit.pos.end.base) + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, + hgvs_t.posedit.pos.end.base) ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str( + hgvs_t.posedit.pos.start.base) + '_' + str( hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) try: From d8917b8ab02f61686e84a895099cb40235bcaca5 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 20 Jun 2019 16:48:15 +0100 Subject: [PATCH 143/223] More tidying up in MixinConverters --- VariantValidator/modules/use_checking.py | 7 +- VariantValidator/modules/vvMixinConverters.py | 215 ++++++------------ 2 files changed, 73 insertions(+), 149 deletions(-) diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index c73fe5db..e80029d8 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -343,9 +343,7 @@ def structure_checks_c(variant, validator): # Create a specific minimal evm with no normalizer and no replace_reference # Have to use this method due to potential multi chromosome error, note normalizes but does not replace sequence try: - output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant.evm, validator.hdp, - variant.primary_assembly, validator.vm, variant.hn, validator.hp, - validator.sf, variant.no_norm_evm) + output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant) except hgvs.exceptions.HGVSDataNotAvailableError: errors = ['Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' 'Transcript Archive', 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' @@ -560,8 +558,7 @@ def structure_checks_n(variant, validator): # Create a specific minimal evm with no normalizer and no replace_reference # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence try: - output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant.evm, validator.hdp, variant.primary_assembly, validator.vm, variant.hn, - validator.hp, validator.sf, variant.no_norm_evm) + output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant) except hgvs.exceptions.HGVSDataNotAvailableError as e: errors = ['Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' 'Transcript Archive', diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 9432b874..015ea386 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -789,7 +789,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): return hgvs_genomic - def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn, hpOld, sfOld, no_norm_evm): + def noreplace_myevm_t_to_g(self, hgvs_c, variant): """ USE WITH MAPPER THAT DOES NOT REPLACE THE REFERENCE GENOMIC BASES AND DOED NOT NORMALIZE @@ -801,177 +801,103 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn Map to a single NC_ (or ALT) for the specified genome build returns parsed hgvs g. object """ + hgvs_genomic = None + attempted_mapping_error = '' try: - hgvs_genomic = evm.t_to_g(hgvs_c) - hn.normalize(hgvs_genomic) + hgvs_genomic = variant.evm.t_to_g(hgvs_c) + variant.hn.normalize(hgvs_genomic) # This will fail on multiple refs for NC_ - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: # Recover all available mapping options from UTA mapping_options = self.hdp.get_tx_mapping_options(hgvs_c.ac) - if mapping_options == []: + if not mapping_options: raise HGVSDataNotAvailableError("no g. mapping options available") - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - 1] + '~' - print(e) - continue - - # If not mapped, raise error - try: - hn.normalize(hgvs_genomic) - except: - for option in mapping_options: - if re.match('blat', option[2]): + def search_in_options(hgvs_genomic, seqtype, chr_num_val, final=False): + err = '' + for op in mapping_options: + if op[2].startswith('blat'): continue - if re.match('NC_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num != 'false': + if op[1].startswith(seqtype): + if final: try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(op[1])) + break + except Exception as e: + err += str(e) + "/" + hgvs_c.ac + "/" + op[1] + '~' + print(e) + continue + chr_num = seq_data.supported_for_mapping(str(op[1]), variant.primary_assembly) + if chr_num_val and chr_num != 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(op[1])) + break + except Exception as e: + err += str(e) + "/" + hgvs_c.ac + "/" + op[1] + '~' + print(e) + continue + elif not chr_num_val and chr_num == 'false': + try: + hgvs_genomic = self.vm.t_to_g(hgvs_c, str(op[1])) break except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + option[ - 1] + '~' + err += str(e) + "/" + hgvs_c.ac + "/" + op[1] + '~' print(e) continue + return hgvs_genomic, err + + hgvs_genomic, new_errors = search_in_options(hgvs_genomic, 'NC_', True) + attempted_mapping_error += new_errors + + # If not mapped, raise error + try: + variant.hn.normalize(hgvs_genomic) + except: + hgvs_genomic, new_errors = search_in_options(hgvs_genomic, 'NC_', True) + attempted_mapping_error += new_errors # If not mapped, raise error try: - hn.normalize(hgvs_genomic) + variant.hn.normalize(hgvs_genomic) except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NC_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), primary_assembly) - if chr_num == 'false': - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - if re.search(option[1], attempted_mapping_error): - pass - else: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - option[ - 1] + '~' - print(e) - continue + hgvs_genomic, new_errors = search_in_options(hgvs_genomic, 'NC_', False) + attempted_mapping_error += new_errors try: - hn.normalize(hgvs_genomic) + variant.hn.normalize(hgvs_genomic) except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NT_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str(e) + "/" + hgvs_c.ac + "/" + \ - option[ - 1] + '~' - print(e) - continue + hgvs_genomic, new_errors = search_in_options(hgvs_genomic, 'NT_', True) + attempted_mapping_error += new_errors try: - hn.normalize(hgvs_genomic) + variant.hn.normalize(hgvs_genomic) except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NT_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num == 'false': - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str( - e) + "/" + hgvs_c.ac + "/" + \ - option[ - 1] + '~' - print(e) - continue + hgvs_genomic, new_errors = search_in_options(hgvs_genomic, 'NT_', False) + attempted_mapping_error += new_errors try: - hn.normalize(hgvs_genomic) + variant.hn.normalize(hgvs_genomic) except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NW_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num != 'false': - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str( - e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print(e) - continue + hgvs_genomic, new_errors = search_in_options(hgvs_genomic, 'NW_', True) + attempted_mapping_error += new_errors try: - hn.normalize(hgvs_genomic) + variant.hn.normalize(hgvs_genomic) except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NW_', option[1]): - chr_num = seq_data.supported_for_mapping(str(option[1]), - primary_assembly) - if chr_num == 'false': - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str( - e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print(e) - continue - + hgvs_genomic, new_errors = search_in_options(hgvs_genomic, 'NW_', False) + attempted_mapping_error += new_errors # Only a RefSeqGene available try: - hn.normalize(hgvs_genomic) + variant.hn.normalize(hgvs_genomic) except: - for option in mapping_options: - if re.match('blat', option[2]): - continue - if re.match('NG_', option[1]): - try: - hgvs_genomic = self.vm.t_to_g(hgvs_c, str(option[1])) - break - except Exception as e: - attempted_mapping_error = attempted_mapping_error + str( - e) + "/" + hgvs_c.ac + "/" + \ - option[1] + '~' - print(e) - continue - try: - hgvs_genomic - except Exception: + hgvs_genomic, new_errors = search_in_options(hgvs_genomic, 'NG_', True, + final=True) + attempted_mapping_error += new_errors + if hgvs_genomic is None: raise HGVSDataNotAvailableError('No available t_to_g liftover') # Ins variants map badly - Especially between c. exon/exon boundary - if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: + if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and \ + hgvs_c.posedit.pos.end.offset == 0: try: - hn.normalize(hgvs_genomic) + variant.hn.normalize(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': @@ -979,17 +905,18 @@ def noreplace_myevm_t_to_g(self,hgvs_c, evm, hdpOld, primary_assembly, vmOld, hn hgvs_t = self.vm.c_to_n(hgvs_c) else: hgvs_t = copy.copy(hgvs_c) - ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, hgvs_t.posedit.pos.end.base) + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, + hgvs_t.posedit.pos.end.base) ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( - hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + \ + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) try: hgvs_c = self.vm.n_to_c(hgvs_t) except Exception: hgvs_c = copy.copy(hgvs_t) try: - hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) + hgvs_genomic = variant.no_norm_evm.t_to_g(hgvs_c) except Exception as e: error = str(e) Logger.warning('Ins mapping error in myt_to_g ' + error) From 00d3e52d07916345d66da2e722f0a79be2614bb6 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 21 Jun 2019 10:52:13 +0100 Subject: [PATCH 144/223] finished tidying mixinConverters --- VariantValidator/modules/vvMixinConverters.py | 416 ++++++++---------- 1 file changed, 189 insertions(+), 227 deletions(-) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 015ea386..e801a79e 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -923,7 +923,7 @@ def search_in_options(hgvs_genomic, seqtype, chr_num_val, final=False): return hgvs_genomic - def myevm_g_to_t(self,evm, hgvs_genomic, alt_ac): + def myevm_g_to_t(self, evm, hgvs_genomic, alt_ac): """ Enhanced transcript to genome position on a specified genomic reference using vm Deals with mapping from transcript positions that do not exist in the genomic sequence @@ -932,11 +932,11 @@ def myevm_g_to_t(self,evm, hgvs_genomic, alt_ac): """ hgvs_t = evm.g_to_t(hgvs_genomic, alt_ac) return hgvs_t + def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # store the input stored_hgvs_c = copy.deepcopy(hgvs_c) - expand_out = 'false' - utilise_gap_code = True + expand_out = False # Gap gene black list try: @@ -949,11 +949,13 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # Warn gap code in use Logger.warning("gap_compensation_mvm = " + str(utilise_gap_code)) - if utilise_gap_code is True and ( - hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' or hgvs_c.posedit.edit.type == 'inv'): + if utilise_gap_code and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' + or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' + or hgvs_c.posedit.edit.type == 'sub' or hgvs_c.posedit.edit.type == 'ins' + or hgvs_c.posedit.edit.type == 'inv'): # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): + if str(hgvs_c.ac).startswith("NM_"): hgvs_c = no_norm_evm.c_to_n(hgvs_c) # Check for intronic @@ -961,15 +963,15 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): hn.normalize(hgvs_c) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('intronic variant', error): + if 'intronic variant' in error: pass - elif re.search('Length implied by coordinates must equal sequence deletion length', error) and re.match( - 'NR_', hgvs_c.ac): + elif 'Length implied by coordinates must equal sequence deletion length' in error and \ + hgvs_c.ac.startswith('NR_'): hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 # Check again before continuing - if re.search('\d+\+', str(hgvs_c.posedit.pos)) or re.search('\d+\-', str(hgvs_c.posedit.pos)) or re.search( - '\*\d+\+', str(hgvs_c.posedit.pos)) or re.search('\*\d+\-', str(hgvs_c.posedit.pos)): + if re.search(r'\d+\+', str(hgvs_c.posedit.pos)) or re.search(r'\d+-', str(hgvs_c.posedit.pos)) or re.search( + r'\*\d+\+', str(hgvs_c.posedit.pos)) or re.search(r'\*\d+-', str(hgvs_c.posedit.pos)): pass else: @@ -1040,24 +1042,24 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): hgvs_c = copy.deepcopy(hgvs_t) # Set expanded out test to true - expand_out = 'true' + expand_out = True except Exception: hgvs_c = hgvs_c - if re.match('NM_', str(hgvs_c.ac)): + if str(hgvs_c.ac).startswith('NM_'): try: hgvs_c = no_norm_evm.n_to_c(hgvs_c) - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: hgvs_c = copy.deepcopy(stored_hgvs_c) # Ensure the altered c. variant has not crossed intro exon boundaries hgvs_check_boundaries = copy.deepcopy(hgvs_c) try: - h_variant = hn.normalize(hgvs_check_boundaries) + hn.normalize(hgvs_check_boundaries) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('spanning the exon-intron boundary', error): + if 'spanning the exon-intron boundary' in error: hgvs_c = copy.deepcopy(stored_hgvs_c) # Catch identity at the exon/intron boundary by trying to normalize ref only if hgvs_check_boundaries.posedit.edit.type == 'identity': @@ -1069,12 +1071,12 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): hn.normalize(hgvs_reform_ident) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('spanning the exon-intron boundary', error) or re.search( - 'Normalization of intronic variants', error): + if 'spanning the exon-intron boundary' in error or 'Normalization of intronic variants' in error: hgvs_c = copy.deepcopy(stored_hgvs_c) hgvs_genomic = self.vm.t_to_g(hgvs_c, alt_chr) - if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and hgvs_genomic.posedit.edit.alt == '' and expand_out != 'true': + if hgvs_c.posedit.edit.type == 'identity' and hgvs_genomic.posedit.edit.type == 'delins' and \ + hgvs_genomic.posedit.edit.alt == '' and expand_out is False: hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.ref if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: try: @@ -1095,39 +1097,39 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): hgvs_genomic = hn.normalize(hgvs_genomic) # Statements required to reformat the stored_hgvs_c into a useable synonym - if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out != 'false': + if (stored_hgvs_c.posedit.edit.ref == '' or stored_hgvs_c.posedit.edit.ref is None) and expand_out: if stored_hgvs_c.type == 'c': stored_hgvs_n = self.vm.c_to_n(stored_hgvs_c) else: stored_hgvs_n = stored_hgvs_c stored_ref = self.sf.fetch_seq(str(stored_hgvs_n.ac), stored_hgvs_n.posedit.pos.start.base - 1, - stored_hgvs_n.posedit.pos.end.base) + stored_hgvs_n.posedit.pos.end.base) stored_hgvs_c.posedit.edit.ref = stored_ref - if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out != 'false': + if (hgvs_genomic.posedit.edit.ref == '' or hgvs_genomic.posedit.edit.ref is None) and expand_out: if hgvs_genomic.posedit.edit.type == 'ins': stored_ref = self.sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, - hgvs_genomic.posedit.pos.end.base) + hgvs_genomic.posedit.pos.end.base) stored_alt = stored_ref[:1] + hgvs_genomic.posedit.edit.alt + stored_ref[-1:] hgvs_genomic.posedit.edit.ref = stored_ref hgvs_genomic.posedit.edit.alt = stored_alt # First look for variants mapping to the flanks of gaps # either in the gap or on the flank but not fully within the gap - if expand_out == 'true': + if expand_out: nr_genomic = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) try: hn.normalize(nr_genomic) except hgvs.exceptions.HGVSInvalidVariantError as e: error_type_1 = str(e) - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)) or str( + if 'Length implied by coordinates must equal sequence deletion length' in str(e) or str( e) == 'base start position must be <= end position': # Effectively, this code is designed to handle variants that are directly proximal to # gap BOUNDARIES, but in some cases the replace reference function of hgvs mapping has removed bases # due to the deletion length being > the specified range. - + genomic_gap_variant = None # Warn of variant location wrt the gap - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + if 'Length implied by coordinates must equal sequence deletion length' in str(e): Logger.warning('Variant is proximal to the flank of a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: @@ -1137,9 +1139,11 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): if 'base start position must be <= end position' in str(e) and \ 'Length implied by coordinates must equal' in error_type_1: make_gen_var = copy.copy(nr_genomic) - make_gen_var.posedit.edit.ref = self.sf.fetch_seq(nr_genomic.ac, - nr_genomic.posedit.pos.start.base - 1, - nr_genomic.posedit.pos.end.base) + make_gen_var.posedit.edit.ref = self.sf.fetch_seq( + nr_genomic.ac, + nr_genomic.posedit.pos.start.base - 1, + nr_genomic.posedit.pos.end.base + ) genomic_gap_variant = make_gen_var error_type_1 = None else: @@ -1161,7 +1165,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): gap_end = genomic_gap_variant.posedit.pos.start.base genomic_gap_variant.posedit.pos.start.base = gap_start genomic_gap_variant.posedit.pos.end.base = gap_end - if re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + if 'Length implied by coordinates must equal sequence deletion length' in str(e): # This will only happen if the variant is flanking the gap but is # not inside the gap Logger.warning('Variant is on the flank of a genomic gap but not within the gap') @@ -1176,23 +1180,20 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): try: genomic_gap_variant.posedit.edit.alt = '' except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass + pass - # Should be a delins so will normalize statically and replace the reference bases + # Should be a delins so will normalize statically and replace the reference bases genomic_gap_variant = hn.normalize(genomic_gap_variant) # Static map to c. and static normalize transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant - if not re.match('Length implied by coordinates must equal sequence deletion length', str(e)): + if 'Length implied by coordinates must equal sequence deletion length' not in str(e): try: transcript_gap_variant = hn.normalize(transcript_gap_variant) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: - if ' Unsupported normalization of variants spanning the UTR-exon boundary' in str(e): - pass + except hgvs.exceptions.HGVSUnsupportedOperationError: + pass # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): + if str(hgvs_c.ac).startswith('NM_'): transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) else: @@ -1233,21 +1234,22 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # Note, all variants will be forced into the format delete insert # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, - transcript_gap_alt_n.posedit.pos.end.base + 1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) + for i in range(transcript_gap_alt_n.posedit.pos.start.base, + transcript_gap_alt_n.posedit.pos.end.base + 1, 1): + if i == alt_start: + alt_base_dict[i] = str(''.join(alternate_bases)) else: - alt_base_dict[int] = 'X' + alt_base_dict[i] = 'X' # Generate the alt sequence alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, - 1): - if int in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[int]) + for i in range(transcript_gap_n.posedit.pos.start.base, + transcript_gap_n.posedit.pos.end.base + 1, + 1): + if i in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[i]) else: - alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence_bases.append(ref_base_dict[i]) alternate_sequence = ''.join(alternate_sequence_bases) alternate_sequence = alternate_sequence.replace('X', '') @@ -1265,14 +1267,18 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): except Exception as e: if str(e) == "base start position must be <= end position": # Expansion out is required to map back to the genomic position - pre_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, - transcript_gap_n.posedit.pos.start.base - 1) - post_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, - transcript_gap_n.posedit.pos.end.base + 1) + pre_base = self.sf.fetch_seq(transcript_gap_n.ac, + transcript_gap_n.posedit.pos.start.base - 2, + transcript_gap_n.posedit.pos.start.base - 1) + post_base = self.sf.fetch_seq(transcript_gap_n.ac, + transcript_gap_n.posedit.pos.end.base, + transcript_gap_n.posedit.pos.end.base + 1) transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 - transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base - transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + post_base + transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + \ + post_base + transcript_gap_n.posedit.edit.alt = pre_base + transcript_gap_n.posedit.edit.alt + \ + post_base try: transcript_gap_variant = self.vm.n_to_c(transcript_gap_n) except: @@ -1281,39 +1287,32 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): hgvs_genomic = hn.normalize(hgvs_genomic) # Bypass the next bit of gap code - expand_out = 'false' - - else: - pass - # No map to the flank of a gap or within the gap - else: - pass + expand_out = False - # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS + # CASCADING STATEMENTS WHICH CAPTURE t to g MAPPING OPTIONS # Remove identity bases if hgvs_c == stored_hgvs_c: - expand_out = 'false' - elif expand_out == 'false' or utilise_gap_code is False: + expand_out = False + elif expand_out is False or utilise_gap_code is False: pass # Correct expansion ref + 2 - elif expand_out == 'true' and ( + elif expand_out and ( len(hgvs_genomic.posedit.edit.ref) == (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: hgvs_genomic.posedit.pos.start.base = hgvs_genomic.posedit.pos.start.base + 1 hgvs_genomic.posedit.pos.end.base = hgvs_genomic.posedit.pos.end.base - 1 hgvs_genomic.posedit.edit.ref = hgvs_genomic.posedit.edit.ref[1:-1] if hgvs_genomic.posedit.edit.alt is not None: hgvs_genomic.posedit.edit.alt = hgvs_genomic.posedit.edit.alt[1:-1] - elif expand_out == 'true' and ( + elif expand_out and ( len(hgvs_genomic.posedit.edit.ref) != (len(stored_hgvs_c.posedit.edit.ref) + 2)): # >= 3: - if expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) == 2: - gn = hn.normalize(hgvs_genomic) - pass + if len(hgvs_genomic.posedit.edit.ref) == 2: + hn.normalize(hgvs_genomic) # Likely if the start or end position aligns to a gap in the genomic sequence # Logic # We have checked that the variant does not cross boundaries, or is intronic # So is likely mapping to a genomic gap - elif expand_out == 'true' and len(hgvs_genomic.posedit.edit.ref) <= 1: + elif len(hgvs_genomic.posedit.edit.ref) <= 1: # Incorrect expansion, likely < ref + 2 genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: @@ -1328,16 +1327,14 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): try: genomic_gap_variant.posedit.edit.alt = '' except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass + pass # Should be a delins so will normalize statically and replace the reference bases genomic_gap_variant = hn.normalize(genomic_gap_variant) # Static map to c. and static normalize transcript_gap_variant = self.vm.g_to_t(genomic_gap_variant, hgvs_c.ac) - stored_transcript_gap_variant = transcript_gap_variant transcript_gap_variant = hn.normalize(transcript_gap_variant) # if NM_ need the n. position - if re.match('NM_', str(hgvs_c.ac)): + if str(hgvs_c.ac).startswith('NM_'): transcript_gap_n = no_norm_evm.c_to_n(transcript_gap_variant) transcript_gap_alt_n = no_norm_evm.c_to_n(stored_hgvs_c) else: @@ -1378,20 +1375,21 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # Note, all variants will be forced into the format delete insert # Deleted bases in the ALT will be substituted for X - for int in range(transcript_gap_alt_n.posedit.pos.start.base, - transcript_gap_alt_n.posedit.pos.end.base + 1, 1): - if int == alt_start: - alt_base_dict[int] = str(''.join(alternate_bases)) + for i in range(transcript_gap_alt_n.posedit.pos.start.base, + transcript_gap_alt_n.posedit.pos.end.base + 1, 1): + if i == alt_start: + alt_base_dict[i] = str(''.join(alternate_bases)) else: - alt_base_dict[int] = 'X' + alt_base_dict[i] = 'X' # Generate the alt sequence alternate_sequence_bases = [] - for int in range(transcript_gap_n.posedit.pos.start.base, transcript_gap_n.posedit.pos.end.base + 1, 1): - if int in list(alt_base_dict.keys()): - alternate_sequence_bases.append(alt_base_dict[int]) + for i in range(transcript_gap_n.posedit.pos.start.base, + transcript_gap_n.posedit.pos.end.base + 1, 1): + if i in list(alt_base_dict.keys()): + alternate_sequence_bases.append(alt_base_dict[i]) else: - alternate_sequence_bases.append(ref_base_dict[int]) + alternate_sequence_bases.append(ref_base_dict[i]) alternate_sequence = ''.join(alternate_sequence_bases) alternate_sequence = alternate_sequence.replace('X', '') @@ -1409,10 +1407,11 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): except Exception as e: if str(e) == "base start position must be <= end position": # Expansion out is required to map back to the genomic position - pre_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.start.base - 2, - transcript_gap_n.posedit.pos.start.base - 1) + pre_base = self.sf.fetch_seq(transcript_gap_n.ac, + transcript_gap_n.posedit.pos.start.base - 2, + transcript_gap_n.posedit.pos.start.base - 1) post_base = self.sf.fetch_seq(transcript_gap_n.ac, transcript_gap_n.posedit.pos.end.base, - transcript_gap_n.posedit.pos.end.base + 1) + transcript_gap_n.posedit.pos.end.base + 1) transcript_gap_n.posedit.pos.start.base = transcript_gap_n.posedit.pos.start.base - 1 transcript_gap_n.posedit.pos.end.base = transcript_gap_n.posedit.pos.end.base + 1 transcript_gap_n.posedit.edit.ref = pre_base + transcript_gap_n.posedit.edit.ref + post_base @@ -1425,7 +1424,8 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): hgvs_genomic = hn.normalize(hgvs_genomic) # Ins variants map badly - Especially between c. exon/exon boundary - if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and hgvs_c.posedit.pos.end.offset == 0: + if hgvs_c.posedit.edit.type == 'ins' and hgvs_c.posedit.pos.start.offset == 0 and \ + hgvs_c.posedit.pos.end.offset == 0: try: hn.normalize(hgvs_genomic) except hgvs.exceptions.HGVSError as e: @@ -1435,10 +1435,11 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): hgvs_t = self.vm.c_to_n(hgvs_c) else: hgvs_t = copy.copy(hgvs_c) - ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, hgvs_t.posedit.pos.end.base) + ins_ref = self.sf.fetch_seq(str(hgvs_t.ac), hgvs_t.posedit.pos.start.base - 1, + hgvs_t.posedit.pos.end.base) ins_alt = ins_ref[:1] + hgvs_t.posedit.edit.alt + ins_ref[-1:] - ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + str( - hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt + ins_to_delins = hgvs_t.ac + ':' + hgvs_t.type + '.' + str(hgvs_t.posedit.pos.start.base) + '_' + \ + str(hgvs_t.posedit.pos.end.base) + 'del' + ins_ref + 'ins' + ins_alt hgvs_t = self.hp.parse_hgvs_variant(ins_to_delins) try: hgvs_c = self.vm.n_to_c(hgvs_t) @@ -1456,10 +1457,8 @@ def hgvs_protein(self, variant, hpOld): """ parse p. strings into hgvs p. objects """ - # Set regular expressions for if statements - pat_p = re.compile("\:p\.") # Pattern looks for :g. Note (gene) has been removed # If the :p. pattern is present in the input variant - if pat_p.search(variant): + if ':p.' in variant: # convert the input string into a hgvs object var_p = self.hp.parse_hgvs_variant(variant) return var_p @@ -1469,7 +1468,7 @@ def hgvs_r_to_c(self, hgvs_object): Convert r. into c. """ # check for LRG_t with r. - if re.match('LRG', hgvs_object.ac): + if 'LRG' in hgvs_object.ac: transcript_ac = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID(hgvs_object.ac) if transcript_ac == 'none': raise HGVSDataNotAvailableError('Unable to identify a relevant transcript for ' + hgvs_object.ac) @@ -1506,36 +1505,31 @@ def tx_identity_info(self, variant, hdpOld): Use uta.py (hdp) to return the identity information for the transcript variant see hgvs.dataproviders.uta.py for details """ - # Set regular expressions for if statements - pat_c = re.compile("\:c\.") # Pattern looks for :c. Note (gene) has been removed - pat_n = re.compile("\:n\.") # Pattern looks for :c. Note (gene) has been removed - pat_r = re.compile("\:r\.") # Pattern looks for :c. Note (gene) has been removed - # If the :c. pattern is present in the input variant - if pat_c.search(variant): + if ':c.' in variant: # Remove all text to the right and including pat_c tx_ac = variant[:variant.index(':c.') + len(':c.')] - tx_ac = pat_c.sub('', tx_ac) + tx_ac = tx_ac.replace(':c.', '') # Interface with the UTA database via get_tx_identity in uta.py tx_id_info = self.hdp.get_tx_identity_info(tx_ac) # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list return tx_id_info # If the :n. pattern is present in the input variant - if pat_n.search(variant): + if ':n.' in variant: # Remove all text to the right and including pat_c tx_ac = variant[:variant.index(':n.') + len(':n.')] - tx_ac = pat_n.sub('', tx_ac) + tx_ac = tx_ac.replace(':n.', '') # Interface with the UTA database via get_tx_identity in uta.py tx_id_info = self.hdp.get_tx_identity_info(tx_ac) # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list return tx_id_info # If the :r. pattern is present in the input variant - if pat_r.search(variant): + if ':r.' in variant: # Remove all text to the right and including pat_c tx_ac = variant[:variant.index(':r.') + len(':r.')] - tx_ac = pat_r.sub('', tx_ac) + tx_ac = tx_ac.replace(':r.', '') # Interface with the UTA database via get_tx_identity in uta.py tx_id_info = self.hdp.get_tx_identity_info(tx_ac) # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list @@ -1565,14 +1559,12 @@ def ng_extract(self, tx_for_gene): Extract RefSeqGene Accession from transcript information see hgvs.dataproviders.uta.py for details """ - # Set regular expressions for if statements - pat_NG = re.compile("^NG_") # Pattern looks for NG_ at beginning of a string # For each list in the list of lists tx_for_gene - for list in tx_for_gene: + for item in tx_for_gene: # If the pattern NG_ is found in element 4 - if pat_NG.search(list[4]): + if 'NG_' in item[4]: # The gene accession is set to list element 4 - gene_ac = list[4] + gene_ac = item[4] return gene_ac def tx_exons(self, tx_ac, alt_ac, alt_aln_method): @@ -1589,7 +1581,7 @@ def tx_exons(self, tx_ac, alt_ac, alt_aln_method): tx_exons = 'hgvs Exception: ' + str(e) return tx_exons try: - completion = tx_exons[0]['alt_strand'] + tx_exons[0]['alt_strand'] except TypeError: tx_exons = 'error' return tx_exons @@ -1600,13 +1592,15 @@ def tx_exons(self, tx_ac, alt_ac, alt_aln_method): else: return tx_exons - def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method,reverse_normalizer): + def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method, reverse_normalizer): """ Automatically maps genomic positions onto all overlapping transcripts """ # Pass relevant transcripts for the input variant to rts # Note, the evm method misses one end, the hdp. method misses the other. Combine both - rts_list = self.hdp.get_tx_for_region(hgvs_genomic.ac, alt_aln_method, hgvs_genomic.posedit.pos.start.base-1, hgvs_genomic.posedit.pos.end.base-1) + rts_list = self.hdp.get_tx_for_region(hgvs_genomic.ac, alt_aln_method, + hgvs_genomic.posedit.pos.start.base-1, + hgvs_genomic.posedit.pos.end.base-1) rts_dict = {} for tx_dat in rts_list: rts_dict[tx_dat[0]] = True @@ -1627,20 +1621,17 @@ def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method,reverse_normali # Check for coding transcripts try: variant = evm.g_to_t(hgvs_genomic, y) - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: # Check for non-coding transcripts try: variant = evm.g_to_t(hgvs_genomic, y) - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: continue except: continue # Corrective Normalisation of intronic descriptions in the antisense oriemtation - pl = re.compile('\+') - mi = re.compile('\-') - ast = re.compile('\*') - if pl.search(str(variant)) or mi.search(str(variant)) or ast.search(str(variant)): + if '+' in str(variant) or '-' in str(variant) or '*' in str(variant): tx_ac = variant.ac alt_ac = hgvs_genomic.ac @@ -1648,7 +1639,6 @@ def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method,reverse_normali try: tx_exons = self.hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) except hgvs.exceptions.HGVSError as e: - e tx_exons = 'hgvs Exception: ' + str(e) return tx_exons try: @@ -1659,19 +1649,15 @@ def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method,reverse_normali # If on the reverse strand, reverse the order of elements if tx_exons[0]['alt_strand'] == -1: tx_exons = tx_exons[::-1] - else: - pass # Gene orientation if tx_exons[0]['alt_strand'] == -1: - antisense = 'true' + antisense = True else: - antisense = 'false' + antisense = False # Pass if antisense = 'false' - if antisense == 'false': - pass - else: + if antisense: # Reverse normalize hgvs_genomic rev_hgvs_genomic = reverse_normalizer.normalize(hgvs_genomic) # map back to coding @@ -1686,45 +1672,30 @@ def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method,reverse_normali code_var.append(str(variant)) return code_var - def validateHGVS(self, input): + def validateHGVS(self, query): """ Take HGVS string, parse into hgvs object and validate """ - hgvs_input = self.hp.parse_hgvs_variant(input) - g = re.compile(":g.") - p = re.compile(":p.") - if p.search(input): - if hasattr(hgvs_input.posedit.pos.start, 'offset'): - pass - else: + hgvs_input = self.hp.parse_hgvs_variant(query) + + if ':p.' in query: + if not hasattr(hgvs_input.posedit.pos.start, 'offset'): hgvs_input.posedit.pos.start.offset = 0 - if hasattr(hgvs_input.posedit.pos.end, 'offset'): - pass - else: + if not hasattr(hgvs_input.posedit.pos.end, 'offset'): hgvs_input.posedit.pos.end.offset = 0 - if hasattr(hgvs_input.posedit.pos.start, 'datum'): - pass - else: + if not hasattr(hgvs_input.posedit.pos.start, 'datum'): hgvs_input.posedit.pos.start.datum = 0 - if hasattr(hgvs_input.posedit.pos.end, 'datum'): - pass - else: + if not hasattr(hgvs_input.posedit.pos.end, 'datum'): hgvs_input.posedit.pos.end.datum = 0 - if hasattr(hgvs_input.posedit.edit, 'ref_n'): - pass - else: + if not hasattr(hgvs_input.posedit.edit, 'ref_n'): hgvs_input.posedit.edit.ref_n = hgvs_input.posedit.pos.end.base - hgvs_input.posedit.pos.start.base + 1 try: self.vr.validate(hgvs_input) except hgvs.exceptions.HGVSError as e: - - error = e - return error - + return e else: - error = 'false' - return error + return 'false' def hgnc_rest(self, path): """ @@ -1793,9 +1764,9 @@ def revcomp(self, bases): Simple reverse complement function for nucleotide sequences """ l2 = [] - l = list(bases) + listbases = list(bases) element = 0 - for base in l: + for base in listbases: element = element + 1 if base == 'G': l2.append('C') @@ -1836,14 +1807,15 @@ def merge_hgvs_3pr(self, hgvs_variant_list,hn): raise fn.mergeHGVSerror("Unable to map from c. position to absolute position") elif hgvs_v.type == 'g': h_list.append(hgvs_v) - if h_list != []: + + if h_list: hgvs_variant_list = copy.deepcopy(h_list) # Define accession and start/end positions accession = None merge_start_pos = None merge_end_pos = None - type = None + seqtype = None full_list = [] # Loop through the submitted variants and gather the required info @@ -1863,12 +1835,10 @@ def merge_hgvs_3pr(self, hgvs_variant_list,hn): # Set the accession and ensure that multiple reference sequences have not been queried if accession is None: accession = hgvs_v.ac - type = hgvs_v.type + seqtype = hgvs_v.type else: if hgvs_v.ac != accession: raise fn.mergeHGVSerror("More than one reference sequence submitted") - else: - pass # Set initial start and end positions if merge_start_pos is None: @@ -1904,10 +1874,10 @@ def merge_hgvs_3pr(self, hgvs_variant_list,hn): reference_sequence = self.sf.fetch_seq(accession, merge_start_pos - 1, merge_end_pos) # Generate an hgvs_delins if alt_sequence == '': - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + delins = accession + ':' + seqtype + '.' + str(merge_start_pos) + '_' + str( merge_end_pos) + 'del' + reference_sequence else: - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + delins = accession + ':' + seqtype + '.' + str(merge_start_pos) + '_' + str( merge_end_pos) + 'del' + reference_sequence + 'ins' + alt_sequence hgvs_delins = self.hp.parse_hgvs_variant(delins) try: @@ -1945,14 +1915,14 @@ def merge_hgvs_5pr(self, hgvs_variant_list): h_list.append(hgvs_v) except: raise fn.mergeHGVSerror("Unable to map from c. position to absolute position") - if h_list != []: + if h_list: hgvs_variant_list = copy.deepcopy(h_list) # Define accession and start/end positions accession = None merge_start_pos = None merge_end_pos = None - type = None + seqtype = None full_list = [] # Loop through the submitted variants and gather the required info @@ -1972,12 +1942,10 @@ def merge_hgvs_5pr(self, hgvs_variant_list): # Set the accession and ensure that multiple reference sequences have not been queried if accession is None: accession = hgvs_v.ac - type = hgvs_v.type + seqtype = hgvs_v.type else: if hgvs_v.ac != accession: raise fn.mergeHGVSerror("More than one reference sequence submitted") - else: - pass # Set initial start and end positions if merge_start_pos is None: @@ -2014,10 +1982,10 @@ def merge_hgvs_5pr(self, hgvs_variant_list): # Generate an hgvs_delins if alt_sequence == '': - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + delins = accession + ':' + seqtype + '.' + str(merge_start_pos) + '_' + str( merge_end_pos) + 'del' + reference_sequence else: - delins = accession + ':' + type + '.' + str(merge_start_pos) + '_' + str( + delins = accession + ':' + seqtype + '.' + str(merge_start_pos) + '_' + str( merge_end_pos) + 'del' + reference_sequence + 'ins' + alt_sequence hgvs_delins = self.hp.parse_hgvs_variant(delins) try: @@ -2048,7 +2016,7 @@ def merge_pseudo_vcf(self, vcf_list, genome_build, hn): # return return hgvs_delins - def hgvs_alleles(self, variant_description,hn): + def hgvs_alleles(self, variant_description, hn): """ HGVS allele handling function which takes a single HGVS allele description and separates each allele into a list of HGVS variants @@ -2057,20 +2025,20 @@ def hgvs_alleles(self, variant_description,hn): # Split up the description accession, remainder = variant_description.split(':') # Branch - if re.search('[gcn]\.\d+\[', remainder): + if re.search(r'[gcn]\.\d+\[', remainder): # NM_004006.2:c.2376[G>C];[(G>C)] # if re.search('\(', remainder): # raise fn.alleleVariantError('Unsupported format ' + remainder) # NM_004006.2:c.2376[G>C];[G>C] type, remainder = remainder.split('.') - pos = re.match('\d+', remainder) + pos = re.match(r'\d+', remainder) pos = pos.group(0) remainder = remainder.replace(pos, '') remainder = remainder[1:-1] alleles = remainder.split('];[') my_alleles = [] for posedit in alleles: - if re.search('\(', posedit): + if '(' in posedit: # NM_004006.2:c.2376[G>C];[(G>C)] continue posedit_list = [posedit] @@ -2081,13 +2049,13 @@ def hgvs_alleles(self, variant_description,hn): my_alleles.append(current_allele) else: type, remainder = remainder.split('.') - if re.search('\(;\)', remainder) and re.search('\];', remainder): + if '(;)' in remainder and '];' in remainder: # NM_004006.2:c.[296T>G];[476T>C](;)1083A>C(;)1406del pre_alleles = remainder.split('(;)') pre_merges = [] alleles = [] for allele in pre_alleles: - if re.match('\[', allele): + if '[' in allele: pre_merges.append(allele) else: alleles.append(allele) @@ -2118,20 +2086,20 @@ def hgvs_alleles(self, variant_description,hn): # Now merge the alleles into a single variant merged_alleles = [] for each_allele in my_alleles: - if re.search('\?', str(each_allele)): + if '?' in str(each_allele): # NM_004006.2:c.[2376G>C];[?] continue merge = [] - allele = str(self.merge_hgvs_3pr(each_allele,hn)) + allele = str(self.merge_hgvs_3pr(each_allele, hn)) merge.append(allele) for variant in each_allele: merged_alleles.append([variant]) my_alleles = merged_alleles - elif re.search('\(;\)', remainder): + elif '(;)' in remainder: # If statement for uncertainties # NM_004006.2:c.[296T>G;476C>T];[476C>T](;)1083A>C - if re.search('\[', remainder): + if '[' in remainder: raise fn.alleleVariantError('Unsupported format ' + type + '.' + remainder) # NM_004006.2:c.2376G>C(;)3103del # NM_000548.3:c.3623_3647del(;)3745_3756dup @@ -2147,7 +2115,7 @@ def hgvs_alleles(self, variant_description,hn): my_alleles.append(current_allele) else: # If statement for uncertainties - if re.search('\(', remainder): + if '(' in remainder: raise fn.alleleVariantError('Unsupported format ' + type + '.' + remainder) # NM_004006.2:c.[2376G>C];[3103del] # NM_004006.2:c.[2376G>C];[3103del] @@ -2168,11 +2136,11 @@ def hgvs_alleles(self, variant_description,hn): merged_alleles = [] for each_allele in my_alleles: - if re.search('\?', str(each_allele)): + if '?' in str(each_allele): # NM_004006.2:c.[2376G>C];[?] continue merge = [] - allele = str(self.merge_hgvs_3pr(each_allele,hn)) + allele = str(self.merge_hgvs_3pr(each_allele, hn)) merge.append(allele) for variant in each_allele: merged_alleles.append([variant]) @@ -2187,16 +2155,14 @@ def hgvs_alleles(self, variant_description,hn): # return return my_alleles + except Exception as e: - import traceback - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() raise fn.alleleVariantError(str(e)) def chr_to_rsg(self, hgvs_genomic, hn, vrOld): - ''' + """ # Covert chromosomal HGVS description to RefSeqGene - ''' + """ # print 'chr_to_rsg triggered' hgvs_genomic = hn.normalize(hgvs_genomic) # split the description @@ -2217,7 +2183,8 @@ def chr_to_rsg(self, hgvs_genomic, hn, vrOld): # Logic to identify the correct RefSeqGene rsg_data = {} if chr_ac == line[1] and chr_start_pos >= int(line[2]) and chr_end_pos <= int(line[3]): - # query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol FROM refSeqGene_loci" + # query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol FROM + # refSeqGene_loci" # (u'NG_034189.1', u'NC_000004.12', 190173122, 190177845, u'+', u'DUX4L1') # Set the values of the data dictionary rsg_data['rsg_ac'] = line[0] @@ -2254,13 +2221,12 @@ def chr_to_rsg(self, hgvs_genomic, hn, vrOld): self.vr.validate(hgvs_refseqgene) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('does not agree with reference sequence', error): - match = re.findall('\(([GATC]+)\)', error) + if 'does not agree with reference sequence' in error: + match = re.findall(r'\(([GATC]+)\)', error) new_ref = match[1] hgvs_refseqgene.posedit.edit.ref = new_ref error = 'true' - else: - pass + data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': str(error)} else: data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': 'true'} @@ -2268,24 +2234,24 @@ def chr_to_rsg(self, hgvs_genomic, hn, vrOld): if ori == '-': # Reverse complement of bases may be required. Let normalizer do the lifting for strings of bases # Look for scenarios with RC needed bases and extract the bases from the edit - if re.search(r"((del[GATCUgatcu]+))", str(chr_edit)): - bases = re.search(r"((del[GATCUgatcu]+))", str(chr_edit)) + if re.search(r"(del[GATCUgatcu]+)", str(chr_edit)): + bases = re.search(r"(del[GATCUgatcu]+)", str(chr_edit)) bases = bases.group(1) chr_edit = 'del' + str(chr_edit).replace(bases, '') - if re.search(r"((ins[GATCUgatcu]+))", str(chr_edit)): - bases = re.search(r"((ins[GATCUgatcu]+))", str(chr_edit)) + if re.search(r"(ins[GATCUgatcu]+)", str(chr_edit)): + bases = re.search(r"(ins[GATCUgatcu]+)", str(chr_edit)) bases = bases.group(1) ins_revcomp = self.revcomp(bases) chr_edit = str(chr_edit).replace(bases, '') + 'ins' + ins_revcomp - if re.search(r"((dup[GATCUgatcu]+))", str(chr_edit)): - bases = re.search(r"((dup[GATCUgatcu]+))", str(chr_edit)) + if re.search(r"(dup[GATCUgatcu]+)", str(chr_edit)): + bases = re.search(r"(dup[GATCUgatcu]+)", str(chr_edit)) bases = bases.group(1) chr_edit = 'dup' + str(chr_edit).replace(bases, '') - if re.search(r"((inv[GATCUgatcu]+))", str(chr_edit)): - bases = re.search(r"((inv[GATCUgatcu]+))", str(chr_edit)) + if re.search(r"(inv[GATCUgatcu]+)", str(chr_edit)): + bases = re.search(r"(inv[GATCUgatcu]+)", str(chr_edit)) bases = bases.group(1) chr_edit = 'inv' + str(chr_edit).replace(bases, '') - if re.search('>', str(chr_edit)) or re.search('=', str(chr_edit)): + if '>' in str(chr_edit) or '=' in str(chr_edit): chr_edit = str(chr_edit) chr_edit = chr_edit.replace('A>', 't>') chr_edit = chr_edit.replace('T>', 'a>') @@ -2316,13 +2282,12 @@ def chr_to_rsg(self, hgvs_genomic, hn, vrOld): self.vr.validate(hgvs_refseqgene) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('does not agree with reference sequence', error): - match = re.findall('\(([GATC]+)\)', error) + if 'does not agree with reference sequence' in error: + match = re.findall(r'\(([GATC]+)\)', error) new_ref = match[1] hgvs_refseqgene.posedit.edit.ref = new_ref error = 'true' - else: - pass + data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': str(error)} else: data = {'hgvs_refseqgene': str(hgvs_refseqgene), 'gene': gene, 'valid': 'true'} @@ -2331,9 +2296,8 @@ def chr_to_rsg(self, hgvs_genomic, hn, vrOld): # Return the required data. This is a dictionary containing the rsg description, validation status and gene ID return descriptions - def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): - ''' + """ # Covert RefSeqGene HGVS description to Chromosomal :param hgvs_refseqgene: @@ -2341,7 +2305,7 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): :param hn: HGVS Normalizer :param vr: :return: - ''' + """ # normalize try: hgvs_refseqgene = hn.normalize(hgvs_refseqgene) @@ -2365,7 +2329,8 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): # Logic to identify the correct RefSeqGene chr_data = {} if rsg_ac == line[0] and primary_assembly == line[6]: - # query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol FROM refSeqGene_loci" + # query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, + # hgncSymbol FROM refSeqGene_loci" # (u'NG_034189.1', u'NC_000004.12', 190173122, 190177845, u'+', u'DUX4L1') # Set the values of the data dictionary chr_data['rsg_ac'] = line[0] @@ -2382,7 +2347,6 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): descriptions = [] for chr_data in chr_data_set: chr_ac = chr_data['chr_ac'] - rsg_ac = chr_data['rsg_ac'] chr_start = int(chr_data['rsg_start']) chr_end = int(chr_data['rsg_end']) ori = chr_data['ori'] @@ -2397,13 +2361,11 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): vr.validate(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('does not agree with reference sequence', error): - match = re.findall('\(([GATC]+)\)', error) + if 'does not agree with reference sequence' in error: + match = re.findall(r'\(([GATC]+)\)', error) new_ref = match[1] hgvs_genomic.posedit.edit.ref = new_ref error = 'true' - else: - pass # # print str(e) + '\n3.' data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': str(error)} else: @@ -2412,24 +2374,24 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): if ori == '-': # Reverse complement of bases may be required. Let normalizer do the lifting for strings of bases # Look for scenarios with RC needed bases and extract the bases from the edit - if re.search(r"((del[GATCUgatcu]+))", str(rsg_edit)): - bases = re.search(r"((del[GATCUgatcu]+))", str(rsg_edit)) + if re.search(r'(del[GATCUgatcu]+)', str(rsg_edit)): + bases = re.search(r"(del[GATCUgatcu]+)", str(rsg_edit)) bases = bases.group(1) rsg_edit = 'del' + str(rsg_edit).replace(bases, '') - if re.search(r"((ins[GATCUgatcu]+))", str(rsg_edit)): - bases = re.search(r"((ins[GATCUgatcu]+))", str(rsg_edit)) + if re.search(r"(ins[GATCUgatcu]+)", str(rsg_edit)): + bases = re.search(r"(ins[GATCUgatcu]+)", str(rsg_edit)) bases = bases.group(1) ins_revcomp = self.revcomp(bases) rsg_edit = str(rsg_edit).replace(bases, '') + 'ins' + ins_revcomp - if re.search(r"((dup[GATCUgatcu]+))", str(rsg_edit)): - bases = re.search(r"((dup[GATCUgatcu]+))", str(rsg_edit)) + if re.search(r"(dup[GATCUgatcu]+)", str(rsg_edit)): + bases = re.search(r"(dup[GATCUgatcu]+)", str(rsg_edit)) bases = bases.group(1) rsg_edit = 'dup' + str(rsg_edit).replace(bases, '') - if re.search(r"((inv[GATCUgatcu]+))", str(rsg_edit)): - bases = re.search(r"((inv[GATCUgatcu]+))", str(rsg_edit)) + if re.search(r"(inv[GATCUgatcu]+)", str(rsg_edit)): + bases = re.search(r"(inv[GATCUgatcu]+)", str(rsg_edit)) bases = bases.group(1) rsg_edit = 'inv' + str(rsg_edit).replace(bases, '') - if re.search('>', str(rsg_edit)) or re.search('=', str(rsg_edit)): + if '>' in str(rsg_edit) or '=' in str(rsg_edit): rsg_edit = str(rsg_edit) rsg_edit = rsg_edit.replace('A>', 't>') rsg_edit = rsg_edit.replace('T>', 'a>') @@ -2455,8 +2417,8 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): vr.validate(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) - if re.search('does not agree with reference sequence', error): - match = re.findall('\(([GATC]+)\)', error) + if 'does not agree with reference sequence' in error: + match = re.findall(r'\(([GATC]+)\)', error) new_ref = match[1] hgvs_genomic.posedit.edit.ref = new_ref error = 'true' From 3b621a2e64728e4c37177fff8bfcb3707d696624 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 21 Jun 2019 11:14:01 +0100 Subject: [PATCH 145/223] Commented out unused methods in mixinConverters --- VariantValidator/modules/vvMixinConverters.py | 490 +++++++++--------- 1 file changed, 245 insertions(+), 245 deletions(-) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index e801a79e..02d92f90 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -21,93 +21,93 @@ class Mixin(vvMixinInit.Mixin): This mixin contains converters that use the validator's configuration information. It inherits the Init mixin """ - def r_to_c(self, variant, evm): - """ - r_to_c - parses r. variant strings into hgvs object and maps to the c. equivalent. - """ - # convert the input string into a hgvs object by parsing - var_r = self.hp.parse_hgvs_variant(variant) - # map to the coding sequence - var_c = evm.r_to_c(var_r) # coding level variant - variant = str(var_c) - c_from_r = {'variant': variant, 'type': ':c.'} - return c_from_r - - def refseq(self, variant, vmOld, refseq_ac, hpOld, evm, hdpOld, primary_assembly): - """ - Maps transcript variant descriptions onto specified RefSeqGene reference sequences - Return an hgvs object containing the genomic sequence variant relative to the RefSeqGene - acession - refseq_ac = RefSeqGene ac - """ - vr = hgvs.validator.Validator(self.hdp) - # parse the variant into hgvs object - var_c = self.hp.parse_hgvs_variant(variant) - # map to the genomic co-ordinates using the easy variant mapper set to alt_aln_method = alt_aln_method - var_g = self.myevm_t_to_g(var_c, evm, self.hdp, primary_assembly) - # Get overlapping transcripts - forcing a splign alignment - start_i = var_g.posedit.pos.start.base - end_i = var_g.posedit.pos.end.base - alt_ac = var_g.ac - alt_aln_method = 'splign' - transcripts = self.hdp.get_tx_for_region(alt_ac, alt_aln_method, start_i - 1, end_i) - # Take the first transcript - ref_g_dict = { - 'ref_g': '', - 'error': 'false' - } - for trans in transcripts: - tx_ac = trans[0] - try: - ref_c = self.vm.g_to_t(var_g, tx_ac, alt_aln_method='splign') - except: - continue - else: - try: - ref_g_dict['ref_g'] = self.vm.t_to_g(ref_c, alt_ac=refseq_ac, alt_aln_method='splign') - except: - e = sys.exc_info()[0] - ref_g_dict['error'] = e - try: - vr.validate(ref_g_dict['ref_g']) - except: - e = sys.exc_info()[0] - ref_g_dict['error'] = e - if ref_g_dict['error'] == 'false': - return ref_g_dict - else: - continue - # Return as an error if all fail - return ref_g_dict - - def g_to_c(self, var_g, tx_ac, hpOld, evm): - """ - Parses genomic variant strings into hgvs objects - Maps genomic hgvs object into a coding hgvs object if the c accession string is provided - returns a c. variant description string - """ - # If the :g. pattern is present in the input variant - if ':g.' in var_g: - # convert the input string into a hgvs object by parsing - var_g = self.hp.parse_hgvs_variant(var_g) - # Map to coding variant - var_c = str(evm.g_to_c(var_g, tx_ac)) - return var_c - - def g_to_n(self, var_g, tx_ac, hpOld, evm): - """ - Parses genomic variant strings into hgvs objects - Maps genomic hgvs object into a non-coding hgvs object if the n accession string is provided - returns a n. variant description string - """ - # If the :g. pattern is present in the input variant - if ':g.' in var_g: - # convert the input string into a hgvs object by parsing - var_g = self.hp.parse_hgvs_variant(var_g) - # Map to coding variant - var_n = str(evm.g_to_n(var_g, tx_ac)) - return var_n + # def r_to_c(self, variant, evm): + # """ + # r_to_c + # parses r. variant strings into hgvs object and maps to the c. equivalent. + # """ + # # convert the input string into a hgvs object by parsing + # var_r = self.hp.parse_hgvs_variant(variant) + # # map to the coding sequence + # var_c = evm.r_to_c(var_r) # coding level variant + # variant = str(var_c) + # c_from_r = {'variant': variant, 'type': ':c.'} + # return c_from_r + # + # def refseq(self, variant, vmOld, refseq_ac, hpOld, evm, hdpOld, primary_assembly): + # """ + # Maps transcript variant descriptions onto specified RefSeqGene reference sequences + # Return an hgvs object containing the genomic sequence variant relative to the RefSeqGene + # acession + # refseq_ac = RefSeqGene ac + # """ + # vr = hgvs.validator.Validator(self.hdp) + # # parse the variant into hgvs object + # var_c = self.hp.parse_hgvs_variant(variant) + # # map to the genomic co-ordinates using the easy variant mapper set to alt_aln_method = alt_aln_method + # var_g = self.myevm_t_to_g(var_c, evm, self.hdp, primary_assembly) + # # Get overlapping transcripts - forcing a splign alignment + # start_i = var_g.posedit.pos.start.base + # end_i = var_g.posedit.pos.end.base + # alt_ac = var_g.ac + # alt_aln_method = 'splign' + # transcripts = self.hdp.get_tx_for_region(alt_ac, alt_aln_method, start_i - 1, end_i) + # # Take the first transcript + # ref_g_dict = { + # 'ref_g': '', + # 'error': 'false' + # } + # for trans in transcripts: + # tx_ac = trans[0] + # try: + # ref_c = self.vm.g_to_t(var_g, tx_ac, alt_aln_method='splign') + # except: + # continue + # else: + # try: + # ref_g_dict['ref_g'] = self.vm.t_to_g(ref_c, alt_ac=refseq_ac, alt_aln_method='splign') + # except: + # e = sys.exc_info()[0] + # ref_g_dict['error'] = e + # try: + # vr.validate(ref_g_dict['ref_g']) + # except: + # e = sys.exc_info()[0] + # ref_g_dict['error'] = e + # if ref_g_dict['error'] == 'false': + # return ref_g_dict + # else: + # continue + # # Return as an error if all fail + # return ref_g_dict + # + # def g_to_c(self, var_g, tx_ac, hpOld, evm): + # """ + # Parses genomic variant strings into hgvs objects + # Maps genomic hgvs object into a coding hgvs object if the c accession string is provided + # returns a c. variant description string + # """ + # # If the :g. pattern is present in the input variant + # if ':g.' in var_g: + # # convert the input string into a hgvs object by parsing + # var_g = self.hp.parse_hgvs_variant(var_g) + # # Map to coding variant + # var_c = str(evm.g_to_c(var_g, tx_ac)) + # return var_c + # + # def g_to_n(self, var_g, tx_ac, hpOld, evm): + # """ + # Parses genomic variant strings into hgvs objects + # Maps genomic hgvs object into a non-coding hgvs object if the n accession string is provided + # returns a n. variant description string + # """ + # # If the :g. pattern is present in the input variant + # if ':g.' in var_g: + # # convert the input string into a hgvs object by parsing + # var_g = self.hp.parse_hgvs_variant(var_g) + # # Map to coding variant + # var_n = str(evm.g_to_n(var_g, tx_ac)) + # return var_n def coding(self, variant, hpOld): """ @@ -141,16 +141,16 @@ def genomic(self, variant, evm, primary_assembly, hn): var_g = self.hp.parse_hgvs_variant(variant) return var_g - def hgvs_genomic(self, variant, hpOld): - """ - Ensures variant strings are g. - returns parsed hgvs g. object - """ - # If the :g. pattern is present in the input variant - if ':g.' in variant: - # convert the input string into a hgvs object - var_g = self.hp.parse_hgvs_variant(variant) - return var_g + # def hgvs_genomic(self, variant, hpOld): + # """ + # Ensures variant strings are g. + # returns parsed hgvs g. object + # """ + # # If the :g. pattern is present in the input variant + # if ':g.' in variant: + # # convert the input string into a hgvs object + # var_g = self.hp.parse_hgvs_variant(variant) + # return var_g def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): """ @@ -1453,15 +1453,15 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): return hgvs_genomic - def hgvs_protein(self, variant, hpOld): - """ - parse p. strings into hgvs p. objects - """ - # If the :p. pattern is present in the input variant - if ':p.' in variant: - # convert the input string into a hgvs object - var_p = self.hp.parse_hgvs_variant(variant) - return var_p + # def hgvs_protein(self, variant, hpOld): + # """ + # parse p. strings into hgvs p. objects + # """ + # # If the :p. pattern is present in the input variant + # if ':p.' in variant: + # # convert the input string into a hgvs object + # var_p = self.hp.parse_hgvs_variant(variant) + # return var_p def hgvs_r_to_c(self, hgvs_object): """ @@ -1488,84 +1488,84 @@ def hgvs_r_to_c(self, hgvs_object): hgvs_object.posedit.edit = edit return hgvs_object - def hgvs_c_to_r(self, hgvs_object): - """ - Convert c. into r. - """ - hgvs_object.type = 'r' - edit = str(hgvs_object.posedit.edit) - edit = edit.lower() - edit = edit.replace('t', 'u') - hgvs_object.posedit.edit = edit - return hgvs_object - - def tx_identity_info(self, variant, hdpOld): - """ - Input c. r. n. variant string - Use uta.py (hdp) to return the identity information for the transcript variant - see hgvs.dataproviders.uta.py for details - """ - # If the :c. pattern is present in the input variant - if ':c.' in variant: - # Remove all text to the right and including pat_c - tx_ac = variant[:variant.index(':c.') + len(':c.')] - tx_ac = tx_ac.replace(':c.', '') - # Interface with the UTA database via get_tx_identity in uta.py - tx_id_info = self.hdp.get_tx_identity_info(tx_ac) - # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list - return tx_id_info - - # If the :n. pattern is present in the input variant - if ':n.' in variant: - # Remove all text to the right and including pat_c - tx_ac = variant[:variant.index(':n.') + len(':n.')] - tx_ac = tx_ac.replace(':n.', '') - # Interface with the UTA database via get_tx_identity in uta.py - tx_id_info = self.hdp.get_tx_identity_info(tx_ac) - # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list - return tx_id_info - - # If the :r. pattern is present in the input variant - if ':r.' in variant: - # Remove all text to the right and including pat_c - tx_ac = variant[:variant.index(':r.') + len(':r.')] - tx_ac = tx_ac.replace(':r.', '') - # Interface with the UTA database via get_tx_identity in uta.py - tx_id_info = self.hdp.get_tx_identity_info(tx_ac) - # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list - return tx_id_info - - def tx_id_info(self, alt_ac, hdpOld): - """ - Input c. r. nd accession string - Use uta.py (hdp) to return the identity information for the transcript variant - see hgvs.dataproviders.uta.py for details - """ - tx_id_info = self.hdp.get_tx_identity_info(alt_ac) - # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list - return tx_id_info - - def tx_for_gene(self, hgnc, hdpOld): - """ - Use uta.py (hdp) to return the transcript information for a specified gene (HGNC SYMBOL) - see hgvs.dataproviders.uta.py for details - """ - # Interface with the UTA database via get_tx_for_gene in uta.py - tx_for_gene = self.hdp.get_tx_for_gene(hgnc) - return tx_for_gene - - def ng_extract(self, tx_for_gene): - """ - Extract RefSeqGene Accession from transcript information - see hgvs.dataproviders.uta.py for details - """ - # For each list in the list of lists tx_for_gene - for item in tx_for_gene: - # If the pattern NG_ is found in element 4 - if 'NG_' in item[4]: - # The gene accession is set to list element 4 - gene_ac = item[4] - return gene_ac + # def hgvs_c_to_r(self, hgvs_object): + # """ + # Convert c. into r. + # """ + # hgvs_object.type = 'r' + # edit = str(hgvs_object.posedit.edit) + # edit = edit.lower() + # edit = edit.replace('t', 'u') + # hgvs_object.posedit.edit = edit + # return hgvs_object + + # def tx_identity_info(self, variant, hdpOld): + # """ + # Input c. r. n. variant string + # Use uta.py (hdp) to return the identity information for the transcript variant + # see hgvs.dataproviders.uta.py for details + # """ + # # If the :c. pattern is present in the input variant + # if ':c.' in variant: + # # Remove all text to the right and including pat_c + # tx_ac = variant[:variant.index(':c.') + len(':c.')] + # tx_ac = tx_ac.replace(':c.', '') + # # Interface with the UTA database via get_tx_identity in uta.py + # tx_id_info = self.hdp.get_tx_identity_info(tx_ac) + # # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + # return tx_id_info + # + # # If the :n. pattern is present in the input variant + # if ':n.' in variant: + # # Remove all text to the right and including pat_c + # tx_ac = variant[:variant.index(':n.') + len(':n.')] + # tx_ac = tx_ac.replace(':n.', '') + # # Interface with the UTA database via get_tx_identity in uta.py + # tx_id_info = self.hdp.get_tx_identity_info(tx_ac) + # # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + # return tx_id_info + # + # # If the :r. pattern is present in the input variant + # if ':r.' in variant: + # # Remove all text to the right and including pat_c + # tx_ac = variant[:variant.index(':r.') + len(':r.')] + # tx_ac = tx_ac.replace(':r.', '') + # # Interface with the UTA database via get_tx_identity in uta.py + # tx_id_info = self.hdp.get_tx_identity_info(tx_ac) + # # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + # return tx_id_info + + # def tx_id_info(self, alt_ac, hdpOld): + # """ + # Input c. r. nd accession string + # Use uta.py (hdp) to return the identity information for the transcript variant + # see hgvs.dataproviders.uta.py for details + # """ + # tx_id_info = self.hdp.get_tx_identity_info(alt_ac) + # # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list + # return tx_id_info + + # def tx_for_gene(self, hgnc, hdpOld): + # """ + # Use uta.py (hdp) to return the transcript information for a specified gene (HGNC SYMBOL) + # see hgvs.dataproviders.uta.py for details + # """ + # # Interface with the UTA database via get_tx_for_gene in uta.py + # tx_for_gene = self.hdp.get_tx_for_gene(hgnc) + # return tx_for_gene + + # def ng_extract(self, tx_for_gene): + # """ + # Extract RefSeqGene Accession from transcript information + # see hgvs.dataproviders.uta.py for details + # """ + # # For each list in the list of lists tx_for_gene + # for item in tx_for_gene: + # # If the pattern NG_ is found in element 4 + # if 'NG_' in item[4]: + # # The gene accession is set to list element 4 + # gene_ac = item[4] + # return gene_ac def tx_exons(self, tx_ac, alt_ac, alt_aln_method): """ @@ -1697,36 +1697,36 @@ def validateHGVS(self, query): else: return 'false' - def hgnc_rest(self, path): - """ - Search HGNC rest - """ - data = { - 'record': '', - 'error': 'false' - } - # HGNC server - headers = { - 'Accept': 'application/json', - } - uri = 'http://rest.genenames.org' - target = urlparse(uri + path) - method = 'GET' - body = '' - h = http.Http() - # collect the response - response, content = h.request( - target.geturl(), - method, - body, - headers) - if response['status'] == '200': - # assume that content is a json reply - # parse content with the json module - data['record'] = json.loads(content) - else: - data['error'] = "Unable to contact the HGNC database: Please try again later" - return data + # def hgnc_rest(self, path): + # """ + # Search HGNC rest + # """ + # data = { + # 'record': '', + # 'error': 'false' + # } + # # HGNC server + # headers = { + # 'Accept': 'application/json', + # } + # uri = 'http://rest.genenames.org' + # target = urlparse(uri + path) + # method = 'GET' + # body = '' + # h = http.Http() + # # collect the response + # response, content = h.request( + # target.geturl(), + # method, + # body, + # headers) + # if response['status'] == '200': + # # assume that content is a json reply + # # parse content with the json module + # data['record'] = json.loads(content) + # else: + # data['error'] = "Unable to contact the HGNC database: Please try again later" + # return data def entrez_efetch(self, db, id, rettype, retmode): """ @@ -1743,21 +1743,21 @@ def entrez_efetch(self, db, id, rettype, retmode): handle.close() return record - def entrez_read(self,db, id, retmode): - """ - search Entrez databases with efetch and read - """ - # IMPORT Bio modules - # from Bio import Entrez - Entrez.email = self.entrezID - # from Bio import SeqIO - handle = Entrez.efetch(db=db, id=id, retmode=retmode) - # Get record - record = Entrez.read(handle) - # Place into text - # text = handle.read() - handle.close() - return record + # def entrez_read(self,db, id, retmode): + # """ + # search Entrez databases with efetch and read + # """ + # # IMPORT Bio modules + # # from Bio import Entrez + # Entrez.email = self.entrezID + # # from Bio import SeqIO + # handle = Entrez.efetch(db=db, id=id, retmode=retmode) + # # Get record + # record = Entrez.read(handle) + # # Place into text + # # text = handle.read() + # handle.close() + # return record def revcomp(self, bases): """ @@ -1999,22 +1999,22 @@ def merge_hgvs_5pr(self, hgvs_variant_list): pass return hgvs_delins - def merge_pseudo_vcf(self, vcf_list, genome_build, hn): - """ - Function designed to merge multiple pseudo VCF variants (strings) into a single HGVS delins - using 5 prime normalization then return a 3 prime normalized final HGVS object - """ - hgvs_list = [] - # Convert pseudo_vcf list into a HGVS list - for call in vcf_list: - x55hgvs = hgvs_utils.pvcf_to_hgvs(call, genome_build, normalization_direction=5, validator=self) - hgvs_list.append(x55hgvs) - # Merge - hgvs_delins = self.merge_hgvs_5pr(hgvs_list) - # normalize 3 prime - hgvs_delins = hn.normalize(hgvs_delins) - # return - return hgvs_delins + # def merge_pseudo_vcf(self, vcf_list, genome_build, hn): + # """ + # Function designed to merge multiple pseudo VCF variants (strings) into a single HGVS delins + # using 5 prime normalization then return a 3 prime normalized final HGVS object + # """ + # hgvs_list = [] + # # Convert pseudo_vcf list into a HGVS list + # for call in vcf_list: + # x55hgvs = hgvs_utils.pvcf_to_hgvs(call, genome_build, normalization_direction=5, validator=self) + # hgvs_list.append(x55hgvs) + # # Merge + # hgvs_delins = self.merge_hgvs_5pr(hgvs_list) + # # normalize 3 prime + # hgvs_delins = hn.normalize(hgvs_delins) + # # return + # return hgvs_delins def hgvs_alleles(self, variant_description, hn): """ From 128dfe9f19d092bfc5cd59dfa6b0a4b0935169b7 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 21 Jun 2019 13:34:52 +0100 Subject: [PATCH 146/223] Removed unused input arguments from mixinConverter methods --- VariantValidator/modules/mappers.py | 14 +++++----- VariantValidator/modules/vvMixinConverters.py | 26 +++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 970e1fbc..93e5c7c9 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -85,7 +85,7 @@ def gene_to_transcripts(variant, validator): # parse hgvs_refseqgene = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) # Convert to chromosomal position - refseqgene_data = validator.rsg_to_chr(hgvs_refseqgene, variant.primary_assembly, variant.hn, validator.vr) + refseqgene_data = validator.rsg_to_chr(hgvs_refseqgene, variant.primary_assembly, variant.hn) # There should only ever be one description returned refseqgene_data = refseqgene_data[0] @@ -120,7 +120,7 @@ def gene_to_transcripts(variant, validator): return True else: # Map to RefSeqGene if available - refseqgene_data = validator.chr_to_rsg(variant.hgvs_genomic, variant.hn, validator.vr) + refseqgene_data = validator.chr_to_rsg(variant.hgvs_genomic, variant.hn) rsg_data = '' # Example {'gene': 'NTHL1', 'hgvs_refseqgene': 'NG_008412.1:g.3455_3464delCAAACACACA', # 'valid': 'true'} @@ -357,7 +357,7 @@ def transcripts_to_gene(variant, validator): if 'del' in formatted_variant: # RNA - looking at trapped variant which was saved before RNA converted to cDNA if ':r.' in variant.pre_RNA_conversion: - coding = validator.coding(formatted_variant, validator.hp) + coding = validator.coding(formatted_variant) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, @@ -397,7 +397,7 @@ def transcripts_to_gene(variant, validator): # Coding else: - coding = validator.coding(formatted_variant, validator.hp) + coding = validator.coding(formatted_variant) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome pre_var = validator.hp.parse_hgvs_variant(formatted_variant) @@ -442,7 +442,7 @@ def transcripts_to_gene(variant, validator): else: # del not in formatted_variant if ':r.' in variant.pre_RNA_conversion: - coding = validator.coding(formatted_variant, validator.hp) + coding = validator.coding(formatted_variant) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, @@ -478,7 +478,7 @@ def transcripts_to_gene(variant, validator): validator.batch_list.append(query) else: - coding = validator.coding(formatted_variant, validator.hp) + coding = validator.coding(formatted_variant) trans_acc = coding.ac # c to Genome coordinates - Map the variant to the genome pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, @@ -615,7 +615,7 @@ def transcripts_to_gene(variant, validator): ############################## # Coding sequence - BASED ON NORMALIZED VARIANT IF EXONIC - hgvs_coding = validator.coding(formatted_variant, validator.hp) + hgvs_coding = validator.coding(formatted_variant) try: hgvs_coding = variant.hn.normalize(hgvs_coding) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 02d92f90..866d6d86 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -34,7 +34,7 @@ class Mixin(vvMixinInit.Mixin): # c_from_r = {'variant': variant, 'type': ':c.'} # return c_from_r # - # def refseq(self, variant, vmOld, refseq_ac, hpOld, evm, hdpOld, primary_assembly): + # def refseq(self, variant, refseq_ac, evm, primary_assembly): # """ # Maps transcript variant descriptions onto specified RefSeqGene reference sequences # Return an hgvs object containing the genomic sequence variant relative to the RefSeqGene @@ -81,7 +81,7 @@ class Mixin(vvMixinInit.Mixin): # # Return as an error if all fail # return ref_g_dict # - # def g_to_c(self, var_g, tx_ac, hpOld, evm): + # def g_to_c(self, var_g, tx_ac, evm): # """ # Parses genomic variant strings into hgvs objects # Maps genomic hgvs object into a coding hgvs object if the c accession string is provided @@ -95,7 +95,7 @@ class Mixin(vvMixinInit.Mixin): # var_c = str(evm.g_to_c(var_g, tx_ac)) # return var_c # - # def g_to_n(self, var_g, tx_ac, hpOld, evm): + # def g_to_n(self, var_g, tx_ac, evm): # """ # Parses genomic variant strings into hgvs objects # Maps genomic hgvs object into a non-coding hgvs object if the n accession string is provided @@ -109,7 +109,7 @@ class Mixin(vvMixinInit.Mixin): # var_n = str(evm.g_to_n(var_g, tx_ac)) # return var_n - def coding(self, variant, hpOld): + def coding(self, variant): """ Ensures variant strings are transcript c. or n. returns parsed hgvs c. or n. object @@ -141,7 +141,7 @@ def genomic(self, variant, evm, primary_assembly, hn): var_g = self.hp.parse_hgvs_variant(variant) return var_g - # def hgvs_genomic(self, variant, hpOld): + # def hgvs_genomic(self, variant): # """ # Ensures variant strings are g. # returns parsed hgvs g. object @@ -1499,7 +1499,7 @@ def hgvs_r_to_c(self, hgvs_object): # hgvs_object.posedit.edit = edit # return hgvs_object - # def tx_identity_info(self, variant, hdpOld): + # def tx_identity_info(self, variant): # """ # Input c. r. n. variant string # Use uta.py (hdp) to return the identity information for the transcript variant @@ -1535,7 +1535,7 @@ def hgvs_r_to_c(self, hgvs_object): # # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list # return tx_id_info - # def tx_id_info(self, alt_ac, hdpOld): + # def tx_id_info(self, alt_ac): # """ # Input c. r. nd accession string # Use uta.py (hdp) to return the identity information for the transcript variant @@ -1545,7 +1545,7 @@ def hgvs_r_to_c(self, hgvs_object): # # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list # return tx_id_info - # def tx_for_gene(self, hgnc, hdpOld): + # def tx_for_gene(self, hgnc): # """ # Use uta.py (hdp) to return the transcript information for a specified gene (HGNC SYMBOL) # see hgvs.dataproviders.uta.py for details @@ -1780,7 +1780,7 @@ def revcomp(self, bases): revcomp = revcomp[::-1] return revcomp - def merge_hgvs_3pr(self, hgvs_variant_list,hn): + def merge_hgvs_3pr(self, hgvs_variant_list, hn): """ Function designed to merge multiple HGVS variants (hgvs objects) into a single delins using 3 prime normalization @@ -2159,7 +2159,7 @@ def hgvs_alleles(self, variant_description, hn): except Exception as e: raise fn.alleleVariantError(str(e)) - def chr_to_rsg(self, hgvs_genomic, hn, vrOld): + def chr_to_rsg(self, hgvs_genomic, hn): """ # Covert chromosomal HGVS description to RefSeqGene """ @@ -2296,7 +2296,7 @@ def chr_to_rsg(self, hgvs_genomic, hn, vrOld): # Return the required data. This is a dictionary containing the rsg description, validation status and gene ID return descriptions - def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): + def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn): """ # Covert RefSeqGene HGVS description to Chromosomal @@ -2358,7 +2358,7 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): hgvs_genomic = self.hp.parse_hgvs_variant(chr_description) hgvs_genomic = hn.normalize(hgvs_genomic) try: - vr.validate(hgvs_genomic) + self.vr.validate(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) if 'does not agree with reference sequence' in error: @@ -2414,7 +2414,7 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn, vr): hgvs_genomic = self.hp.parse_hgvs_variant(chr_description) hgvs_genomic = hn.normalize(hgvs_genomic) try: - vr.validate(hgvs_genomic) + self.vr.validate(hgvs_genomic) except hgvs.exceptions.HGVSError as e: error = str(e) if 'does not agree with reference sequence' in error: From 07290fcc7baecee5586e1f948ab65d3658434e62 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 21 Jun 2019 14:25:15 +0100 Subject: [PATCH 147/223] Refactored and cleaned up vvDatabase --- VariantValidator/modules/vvDatabase.py | 451 +++--------------------- VariantValidator/modules/vvMixinInit.py | 4 +- VariantValidator/update_vv_db.py | 4 +- 3 files changed, 56 insertions(+), 403 deletions(-) diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 4eddf87a..da3d7b81 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -1,17 +1,12 @@ from .logger import Logger -from . import utils as fn +from . import utils from .utils import handleCursor -#from vvDBInsert import vvDBInsert -#from vvDBGet import vvDBGet from . import vvDBInsert -import urllib.request, urllib.error, urllib.parse -import copy - import re -import os -class vvDatabase(vvDBInsert.Mixin): - ''' + +class Database(vvDBInsert.Mixin): + """ This class contains and handles the mysql connections for the variant validator database. It now uses mixins, and the order of inheritance is @@ -22,39 +17,42 @@ class vvDatabase(vvDBInsert.Mixin): vvDBInsert.Mixin v vvDatabase - ''' + """ # from dbquery @handleCursor - def query_with_fetchone(self,entry, table): - #if table == 'transcript_info': - query = "SELECT refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated, IF(updated < NOW() - INTERVAL 3 MONTH , 'true', 'false') FROM transcript_info WHERE refSeqID = '%s'" %(entry) + def query_with_fetchone(self, entry): + query = "SELECT refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated, " \ + "IF(updated < NOW() - INTERVAL 3 MONTH , 'true', 'false') FROM transcript_info WHERE " \ + "refSeqID = '%s'" % entry self.cursor.execute(query) row = self.cursor.fetchone() if row is None: row = ['none', 'No data'] Logger.debug("No data returned from query " + str(query)) return row + # From data def data_add(self, accession, validator): - ''' + """ # Add accurate transcript descriptions to the database :param accession: + :param validator: :return: - ''' + """ self.update_transcript_info_record(accession, validator) entry = self.in_entries(accession, 'transcript_info') return entry - def in_entries(self,entry, table): - ''' + def in_entries(self, entry, table): + """ Retrieve transcript information :param entry: :param table: :return: - ''' - data={} + """ + data = {} if table == 'transcript_info': - row = self.query_with_fetchone(entry, table) + row = self.query_with_fetchone(entry) if row[0] == 'error': data['error'] = row[0] data['description'] = row[1] @@ -71,10 +69,11 @@ def in_entries(self,entry, table): data['updated'] = row[6] data['expiry'] = row[7] return data - def update_transcript_info_record(self,accession, validator): - ''' - # Search Entrez for corresponding record for the RefSeq ID - ''' + + def update_transcript_info_record(self, accession, validator): + """ + Search Entrez for corresponding record for the RefSeq ID + """ # Prime these entries, just in case. previous_entry = self.in_entries(accession, 'transcript_info') accession = accession @@ -91,7 +90,7 @@ def update_transcript_info_record(self,accession, validator): variant = '0' if 'transcript variant' in description: - tv = re.search('transcript variant \w+', description) + tv = re.search(r'transcript variant \w+', description) tv = str(tv.group(0)) tv = tv.replace('transcript variant', '') variant = tv.strip() @@ -110,14 +109,14 @@ def update_transcript_info_record(self,accession, validator): uta_symbol = str(uta_info[6]) # First perform a search against the input gene symbol or the symbol inferred from UTA - initial = fn.hgnc_rest(path = "/fetch/symbol/" + uta_symbol) + initial = utils.hgnc_rest(path="/fetch/symbol/" + uta_symbol) # Check for a record if str(initial['record']['response']['numFound']) != '0': hgnc_symbol = uta_symbol # No record found, is it a previous symbol? else: # Search hgnc rest to see if symbol is out of date - rest_data = fn.hgnc_rest(path = "/search/prev_symbol/" + uta_symbol) + rest_data = utils.hgnc_rest(path="/search/prev_symbol/" + uta_symbol) # If the name is correct no record will be found if rest_data['error'] == 'false': if int(rest_data['record']['response']['numFound']) == 0: @@ -137,7 +136,7 @@ def update_transcript_info_record(self,accession, validator): # Query information # query_info = [accession, description, variant, version, hgnc_symbol, uta_symbol] query_info = [version, description, variant, version, hgnc_symbol, uta_symbol] - table='transcript_info' + table = 'transcript_info' # Update the transcript_info table (needs plugging in) returned_data = self.in_entries(version, table) @@ -149,399 +148,53 @@ def update_transcript_info_record(self,accession, validator): self.update_entry(version, query_info, table) return - def update_refSeqGene_loci(self,rsg_data): + def update_refseqgene_loci(self, rsg_data): # First query the database entry_exists = self.get_refSeqGene_data_by_refSeqGeneID(rsg_data[0], rsg_data[2]) if entry_exists[0] == 'none': self.insert_refSeqGene_data(rsg_data) else: self.update_refSeqGene_data(rsg_data) - def update_lrg_rs_lookup(self,lrg_rs_lookup): + + def update_lrg_rs_lookup(self, lrg_rs_lookup): # First query the database - rsgID = self.get_RefSeqGeneID_from_lrgID(lrg_rs_lookup[0]) - if rsgID == 'none': + rsg_id = self.get_RefSeqGeneID_from_lrgID(lrg_rs_lookup[0]) + if rsg_id == 'none': self.insert_RefSeqGeneID_from_lrgID(lrg_rs_lookup) - def update_lrgt_rst(self,lrgtx_to_rstID): + + def update_lrgt_rst(self, lrgtx_to_rst_id): # First query the database - rstID = self.get_RefSeqTranscriptID_from_lrgTranscriptID(lrgtx_to_rstID[0]) - if rstID == 'none': - self.insert_LRG_transcript_data(lrgtx_to_rstID) - def update_lrg_p_rs_p_lookup(self,lrg_p, rs_p): + rst_id = self.get_RefSeqTranscriptID_from_lrgTranscriptID(lrgtx_to_rst_id[0]) + if rst_id == 'none': + self.insert_LRG_transcript_data(lrgtx_to_rst_id) + + def update_lrg_p_rs_p_lookup(self, lrg_p, rs_p): # First query the database - rspID = self.get_RefSeqProteinID_from_lrgProteinID(lrg_p) - if rspID == 'none': + rsp_id = self.get_RefSeqProteinID_from_lrgProteinID(lrg_p) + if rsp_id == 'none': self.insert_LRG_protein_data(lrg_p, rs_p) - # From variantValidator.py - def update_vv_data(self): - # Update refSeqGene Primary assembly alignment data - self.update_rsg() - # Update LRG records - self.update_lrg() - # From update_refseqgene_nomissmatch.py - def update_rsg(self): - Logger.info('Updating RefSeqGene no Missmatch MySQL data') - # Set os path - # Set up os paths data and log folders - ROOT = os.path.dirname(os.path.abspath(__file__)) - - # Download data from RefSeqGene - # Download data - rsg = urllib.request.Request('http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/gene_RefSeqGene') - response = urllib.request.urlopen(rsg) - rsg_file = response.read() - rsg_data_line = rsg_file.split('\n') - rsg_data = [] - for data in rsg_data_line: - rsg_data.append(data) - - # Download data - grch37 = urllib.request.Request( - 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.25_refseqgene_alignments.gff3') - response = urllib.request.urlopen(grch37) - grch37_file = response.read() - grch37_data_line = grch37_file.split('\n') - grch37_align_data = [] - for data in grch37_data_line: - grch37_align_data.append(data) - - # Download data - grch38 = urllib.request.Request( - 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.28_refseqgene_alignments.gff3') - response = urllib.request.urlopen(grch38) - grch38_file = response.read() - grch38_data_line = grch38_file.split('\n') - grch38_align_data = [] - for data in grch38_data_line: - grch38_align_data.append(data) - - # Open Lists - # rsg_data = open(os.path.join(ROOT, 'gene_RefSeqGene'), 'r') - rsg_id_info = [] - # grch37_align_data = open(os.path.join(ROOT, 'GCF_000001405.25_refseqgene_alignments.gff3'), 'r') - grch37_align = [] - # grch38_align_data = open(os.path.join(ROOT, 'GCF_000001405.28_refseqgene_alignments.gff3'), 'r') - grch38_align = [] - - # Place the required data from each file into a dictionary - hash = re.compile('#') - for line in rsg_data: - if hash.search(line): - pass - else: - line = line.strip() - info = line.split() - if len(info) == 0: - pass - else: - dict = {'symbol': info[2], 'rsg_id': info[3], 'gene_id': info[1]} - rsg_id_info.append(dict) - - # Create dictionary to store RefSeqGene and gene symbol data NOTE RefSeqGene ID stored without version number! - rsg_to_symbol = {} - # Collect the data - for ent in rsg_id_info: - rsg_id = copy.deepcopy(ent['rsg_id']) - rsg_id = rsg_id.split('.')[0] - rsg_to_symbol[rsg_id] = {'symbol': ent['symbol'], 'gene_id': ent['gene_id']} - - # Count total number of NG to NC mappings - total_rsg_to_nc = 0 - total_rsg_to_nc_rejected = 0 - for line in grch37_align_data: - # Count NG_ to NC_ and remove the entries we don't care about! - if re.search('NC_', line) and re.search('NG_', line): - total_rsg_to_nc = total_rsg_to_nc + 1 - else: - continue - if hash.search(line): - pass - elif not re.search('gap_count=0', line): - if re.search('NC_', line) and re.search('NG_', line): - total_rsg_to_nc_rejected = total_rsg_to_nc_rejected + 1 - # print line - pass - else: - line = line.strip() - info = line.split('\t') - if len(info) != 9: - pass - else: - metrics = info[8].split(';') - id_ori = metrics[1].replace('Target=', '') - id_ori_list = id_ori.split() - dict = {'rsg_id': id_ori_list[0], 'chr_id': info[0], 'rsg_start': info[3], 'rsg_end': info[4], - 'ori': id_ori_list[3]} - grch37_align.append(dict) - - for line in grch38_align_data: - if re.search('NC_', line) and re.search('NG_', line): - total_rsg_to_nc = total_rsg_to_nc + 1 - else: - continue - if hash.search(line): - pass - elif not re.search('gap_count=0', line): - if re.search('NC_', line) and re.search('NG_', line): - total_rsg_to_nc_rejected = total_rsg_to_nc_rejected + 1 - # print line - pass - else: - line = line.strip() - info = line.split('\t') - if len(info) != 9: - pass - else: - metrics = info[8].split(';') - id_ori = metrics[1].replace('Target=', '') - id_ori_list = id_ori.split() - dict = {'rsg_id': id_ori_list[0], 'chr_id': info[0], 'rsg_start': info[3], 'rsg_end': info[4], - 'ori': id_ori_list[3]} - grch38_align.append(dict) - - # Create a data array containing the database - db = [] - # map line - for line in grch37_align: - ml = [] - link = line['rsg_id'] - ml.append(link) - ml.append(line['chr_id']) - ml.append('GRCh37') - ml.append(line['rsg_start']) - ml.append(line['rsg_end']) - ml.append(line['ori']) - # Add the additional data from rsg_id_info - for data in rsg_id_info: - if link == data['rsg_id']: - ml.append(data['symbol']) - ml.append(data['gene_id']) - else: - continue - # Create the entry and append to db - db.append(ml) - - for line in grch38_align: - ml = [] - link = line['rsg_id'] - ml.append(link) - ml.append(line['chr_id']) - ml.append('GRCh38') - ml.append(line['rsg_start']) - ml.append(line['rsg_end']) - ml.append(line['ori']) - # Add the additional data from rsg_id_info - for data in rsg_id_info: - if link == data['rsg_id']: - ml.append(data['symbol']) - ml.append(data['gene_id']) - else: - continue - # Create the entry and append to db - db.append(ml) - - # Known missing identifiers - known = { - 'NG_021289.1' : {'symbol' : 'CFAP47', 'gene_id' : '286464'}, - 'NG_027707.1' : {'symbol' : 'DUX4L1', 'gene_id' : '22947'}, - 'NG_033266.1' : {'symbol' : 'DSE', 'gene_id': '29940'}, - 'NG_061543.1' : {'symbol' : 'CYP1A2', 'gene_id': '1544'}, - 'NG_061374.1' : {'symbol' : 'CYP1A1', 'gene_id': '1543'}, - 'NG_059281.1' : {'symbol' : 'HBB', 'gene_id': '3043'}, - 'NG_012639.1' : {'symbol' : 'VHLL', 'gene_id': '391104'}, - 'NG_059186.1' : {'symbol' : 'HBA1', 'gene_id': '3040'}, - 'NG_059271.1' : {'symbol' : 'HBA2', 'gene_id': '3040'} - } - # Known Obsolete identifiers - obsolete = { - 'NG_016553.1': 'OBSOLETE', - 'NG_012639.1': 'Removed due to questionable status' - } - - # Identify lines with missing data e.g. gene symbols - for line in db: - try: - line[6] - except IndexError: - try: - identifier = copy.deepcopy(line[0]) - identifier = identifier.split('.')[0] - line.append(rsg_to_symbol[identifier]['symbol']) - line.append(rsg_to_symbol[identifier]['gene_id']) - except KeyError: - try: - line.append(known[line[0]]['symbol']) - line.append(known[line[0]]['gene_id']) - except KeyError: - check = obsolete[line[0]] - Logger.info(str(line[0]) + ' : ' + check) - - # Open a text file to be used as a simple database and write the database - # rsg_db = open(os.path.join(ROOT, 'rsg_chr_db.txt'), 'w') - - to_mysql = [] - for line in db: - if line[0] in list(obsolete.keys()): - continue - # Only gap-less RefSeqGenes will have passed. The rest will be alternatively curated - write = [] - # Take the mapping data - write = copy.deepcopy(line[0:6]) - # add RSG ranges - write.append('1') - end_rsg = int(line[4]) - int(line[3]) + 1 - end_rsg = str(end_rsg) - write.append(end_rsg) - # Create block data chr then rsg - chr_block = str(line[3]) + '-' + str(line[4]) - write.append(chr_block) - rsg_block = str(write[6]) + '-' + str(write[7]) - write.append(rsg_block) - # Add gene ID and Gene symbol(s) - write.append(line[7]) - write.append(line[6]) - # write_me = '\t'.join(write) - # rsg_db.write(write_me + '\n') - del write[6] - to_mysql.append(write) - - # Set up code to write to database - for line in to_mysql: - current_symbol = self.get_gene_symbol_from_refSeqGeneID(line[0]) - if line[10] == current_symbol: - pass - else: - if current_symbol != 'none': - line[10] = current_symbol - else: - pass - self.update_refSeqGene_loci(line) - - # Close database - # rsg_db.close() - - Logger.info('Total NG_ to NC_ alignments = ' + str(total_rsg_to_nc)) - Logger.info('Gapps within NG_ to NC_ alignments = ' + str(total_rsg_to_nc_rejected)) - - Logger.info('complete') - return - #from compile_lrg_data, this function was originally just called "update" - def update_lrg(self): - Logger.info('Updating LRG lookup tables') - lr2rs_download = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt') - # Open and read - lr2rs_data = urllib.request.urlopen(lr2rs_download) - lr2rs = lr2rs_data.read() - # List the data - lr2rs = lr2rs.strip() - lr2rs = lr2rs.split('\n') - - # Download - lrg_status_download = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_GRCh38.txt') - # Open and read - lrg_status_data = urllib.request.urlopen(lrg_status_download) - lrg_status = lrg_status_data.read() - # List the data - lrg_status = lrg_status.strip() - lrg_status = lrg_status.split('\n') - - # Download - rs2lr_download = urllib.request.Request('http://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene') - # Open and read - rs2lr_data = urllib.request.urlopen(rs2lr_download) - rs2lr = rs2lr_data.read() - # List the data - rs2lr = rs2lr.strip() - rs2lr = rs2lr.split('\n') - - # Download LRG transcript (_t) to LRG Protein (__p) data file - lr_t2p_downloaded = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_proteins_RefSeq.txt') - # Open and read - lr_t2p_data = urllib.request.urlopen(lr_t2p_downloaded) - lr_t2p = lr_t2p_data.read() - # List the data - lr_t2p = lr_t2p.strip() - lr_t2p = lr_t2p.split('\n') - - # Dictionary the status by LRG_ID - lrg_status_dict = {} - # Compile dictionary - for line in lrg_status: - if re.search('^#', line): - continue - else: - list = line.split() - lrgID = list[0] - stat = list[2] - lrg_status_dict[lrgID] = stat - - # Required lookup tables - # LRG_ID GeneSymbol RefSeqGeneID status - # LRG_ID RefSeqTranscriptID - # LRG_T2LRG_P - - Logger.info('Update LRG and LRG_transcript lookup tables') - # Populate lists lrg_rs_lookup (LRG to RefSeqGene) and lrg_t2nm_ (LRG Transcript to RefSeq Transcript) - for line in lr2rs: - if re.search('^#', line): - continue - else: - list = line.split() - # Assign objects - lrg_id = list[0] - symbol = list[1] - rsgid = list[2] - lrg_tx = str(list[0]) + str(list[3]) - rstid = list[4] - status = lrg_status_dict[lrg_id] - # pass data to relevant lists - # lrg_rs_lookup - lrg_rs_lookup = [lrg_id, symbol, rsgid, status] - - # update LRG to RefSeqGene database - self.update_lrg_rs_lookup(lrg_rs_lookup) - - # lrg_t2nm_ - lrgtx_to_rstID = [lrg_tx, rstid] - # update database - self.update_lrgt_rst(lrgtx_to_rstID) - - Logger.info('Update LRG protein lookup table') - # Populate LRG protein RefSeqProtein lokup table - for line in lr_t2p: - if re.search('^#', line): - continue - else: - list = line.split() - # Assign objects - lrg_p = list[0] - rs_p = list[1] - # update LRG to RefSeqGene database - self.update_lrg_p_rs_p_lookup(lrg_p, rs_p) - - Logger.info('LRG lookup tables updated') - return - #From ref_seq_type - def ref_type_assign(self,accession): + def ref_type_assign(self, accession): if 'NC_' in accession or 'NG_' in accession or 'NT_' in accession or 'NW_' in accession: ref_type = ':g.' - elif re.match('NM_', accession): + elif accession.startswith('NM_'): ref_type = ':c.' - elif re.match('NR_', accession): + elif accession.startswith('NR_'): ref_type = ':n.' - elif re.match('NP_', accession): + elif accession.startswith('NP_'): ref_type = ':p.' - elif re.match('LRG_', accession): - if re.search('t', accession): + elif accession.startswith('LRG_'): + if 't' in accession: refseqtranscript_reference = self.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) - if re.match('NM_', refseqtranscript_reference): + if refseqtranscript_reference.startswith('NM_'): ref_type = ':c.' else: ref_type = ':n.' - elif re.search('_p', accession): + elif '_p' in accession: ref_type = ':p.' else: ref_type = ':g.' + else: + # shouldn't reach this point + raise Exception('Unable to recognise accession') return ref_type - - diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 19448d4b..7fea22d8 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -17,7 +17,7 @@ import re import copy -from .vvDatabase import vvDatabase +from .vvDatabase import Database from . import utils from VariantValidator.settings import CONFIG_DIR from VariantValidator.version import __version__ @@ -94,7 +94,7 @@ def __init__(self): 'raise_on_warnings': True } # Create database access objects - self.db = vvDatabase(self.dbConfig) + self.db = Database(self.dbConfig) # Set up versions self.version = __version__ diff --git a/VariantValidator/update_vv_db.py b/VariantValidator/update_vv_db.py index 72202229..6c8f5656 100644 --- a/VariantValidator/update_vv_db.py +++ b/VariantValidator/update_vv_db.py @@ -20,7 +20,7 @@ def connect(): 'raise_on_warnings': True } # Create database access objects - db = vvDatabase.vvDatabase(dbConfig) + db = vvDatabase.Database(dbConfig) return db @@ -184,7 +184,7 @@ def update_refseq(dbcnx): if line[10] != current_symbol: if current_symbol != 'none': line[10] = current_symbol - dbcnx.update_refSeqGene_loci(line) + dbcnx.update_refseqgene_loci(line) print('Total NG_ to NC_ alignments = ' + str(total_rsg_to_nc)) print('Gaps within NG_ to NC_ alignments = ' + str(total_rsg_to_nc_rejected)) From 5e0501d177e98a77d16efed5099d9c49734ade62 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 21 Jun 2019 15:19:26 +0100 Subject: [PATCH 148/223] Refactored DatabaseMixinGet --- VariantValidator/modules/format_converters.py | 14 ++-- VariantValidator/modules/hgvs_utils.py | 2 +- VariantValidator/modules/use_checking.py | 2 +- VariantValidator/modules/vvDBGet.py | 72 ++++++++++--------- VariantValidator/modules/vvDatabase.py | 10 +-- VariantValidator/modules/vvMixinConverters.py | 6 +- VariantValidator/modules/vvMixinCore.py | 12 ++-- VariantValidator/update_vv_db.py | 2 +- 8 files changed, 62 insertions(+), 58 deletions(-) diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index 0ed7ab64..eb721aa8 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -180,9 +180,9 @@ def vcf2hgvs_stage2(variant, validator): ref_type = validator.db.ref_type_assign(accession) if re.match('LRG_', accession): if ref_type == ':g.': - accession = validator.db.get_RefSeqGeneID_from_lrgID(accession) + accession = validator.db.get_refseq_id_from_lrg_id(accession) else: - accession = validator.db.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) + accession = validator.db.get_refseq_transcript_id_from_lrg_transcript_id(accession) else: accession = accession variant.quibble = str(accession) + ref_type + str(position_and_edit) @@ -318,7 +318,7 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version): if variant.quibble.startswith('NG_'): ref_seq_gene_id = variant.quibble.split(':')[0] tx_edit = variant.quibble.split(':')[1] - gene_symbol = validator.db.get_gene_symbol_from_refSeqGeneID(ref_seq_gene_id) + gene_symbol = validator.db.get_gene_symbol_from_refseq_id(ref_seq_gene_id) if gene_symbol != 'none': uta_symbol = validator.db.get_uta_symbol(gene_symbol) available_transcripts = validator.hdp.get_tx_for_gene(uta_symbol) @@ -589,7 +589,7 @@ def allele_parser(variant, validation): elif re.match(r'^LRG_\d+:g.', variant.quibble) or re.match(r'^LRG_\d+:p.', variant.quibble) \ or re.match(r'^LRG_\d+:c.', variant.quibble) or re.match(r'^LRG_\d+:n.', variant.quibble): lrg_reference, variation = variant.quibble.split(':') - refseqgene_reference = validation.db.get_RefSeqGeneID_from_lrgID(lrg_reference) + refseqgene_reference = validation.db.get_refseq_id_from_lrg_id(lrg_reference) if refseqgene_reference != 'none': variant.quibble = refseqgene_reference + ':' + variation if caution == '': @@ -603,7 +603,7 @@ def allele_parser(variant, validation): elif re.match(r'^LRG_\d+t\d+:c.', variant.quibble) or re.match(r'^LRG_\d+t\d+:n.', variant.quibble) or \ re.match(r'^LRG_\d+t\d+:p.', variant.quibble) or re.match(r'^LRG_\d+t\d+:g.', variant.quibble): lrg_reference, variation = variant.quibble.split(':') - refseqtranscript_reference = validation.db.get_RefSeqTranscriptID_from_lrgTranscriptID( + refseqtranscript_reference = validation.db.get_refseq_transcript_id_from_lrg_transcript_id( lrg_reference) if refseqtranscript_reference != 'none': variant.quibble = refseqtranscript_reference + ':' + variation @@ -659,7 +659,7 @@ def lrg_to_refseq(variant, validator): if re.match(r'^LRG_\d+t\d+:', variant.quibble): lrg_reference, variation = variant.quibble.split(':') - refseqtrans_reference = validator.db.get_RefSeqTranscriptID_from_lrgTranscriptID(lrg_reference) + refseqtrans_reference = validator.db.get_refseq_transcript_id_from_lrg_transcript_id(lrg_reference) if refseqtrans_reference != 'none': variant.hgvs_formatted.ac = refseqtrans_reference variant.set_quibble(str(variant.hgvs_formatted)) @@ -668,7 +668,7 @@ def lrg_to_refseq(variant, validator): Logger.warning(caution) elif re.match(r'^LRG_\d+:', variant.quibble): lrg_reference, variation = variant.quibble.split(':') - refseqgene_reference = validator.db.get_RefSeqGeneID_from_lrgID(lrg_reference) + refseqgene_reference = validator.db.get_refseq_id_from_lrg_id(lrg_reference) if refseqgene_reference != 'none': variant.hgvs_formatted.ac = refseqgene_reference variant.set_quibble(str(variant.hgvs_formatted)) diff --git a/VariantValidator/modules/hgvs_utils.py b/VariantValidator/modules/hgvs_utils.py index d89d6721..36125703 100644 --- a/VariantValidator/modules/hgvs_utils.py +++ b/VariantValidator/modules/hgvs_utils.py @@ -71,7 +71,7 @@ def pvcf_to_hgvs(query, selected_assembly, normalization_direction, reverse_norm # Assign reference sequence type ref_type = ':g.' if 'LRG_' in accession: - accession = validator.db.get_RefSeqGeneID_from_lrgID(accession) + accession = validator.db.get_refseq_id_from_lrg_id(accession) # Reformat the variant query = str(accession) + ref_type + str(position_and_edit) diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index e80029d8..abcbbb24 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -64,7 +64,7 @@ def structure_checks(variant, validator): """ input_parses = validator.hp.parse_hgvs_variant(variant.quibble) variant.input_parses = input_parses - variant.gene_symbol = validator.db.get_gene_symbol_from_transcriptID(variant.input_parses.ac) + variant.gene_symbol = validator.db.get_gene_symbol_from_transcript_id(variant.input_parses.ac) if variant.gene_symbol == 'none': variant.gene_symbol = '' if input_parses.type == 'g': diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index 07dedb4b..30a57331 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -18,20 +18,20 @@ def execute(self, query): return row @handleCursor - def executeAll(self, query): + def execute_all(self, query): self.cursor.execute(query) rows = self.cursor.fetchall() - if rows == []: + if not rows: Logger.debug("No data returned from query " + str(query)) rows = ['none', 'No data'] return rows # from dbfetchone - def get_utaSymbol(self, gene_symbol): + def get_uta(self, gene_symbol): query = "SELECT utaSymbol FROM transcript_info WHERE hgncSymbol = '%s'" % gene_symbol return self.execute(query) - def get_hgncSymbol(self, gene_symbol): + def get_hgnc(self, gene_symbol): query = "SELECT hgncSymbol FROM transcript_info WHERE utaSymbol = '%s'" % gene_symbol return self.execute(query) @@ -39,72 +39,76 @@ def get_transcript_description(self, transcript_id): query = "SELECT description FROM transcript_info WHERE refSeqID = '%s'" % transcript_id return str(self.execute(query)[0]) - def get_gene_symbol_from_transcriptID(self, transcript_id): + def get_gene_symbol_from_transcript_id(self, transcript_id): query = "SELECT hgncSymbol FROM transcript_info WHERE refSeqID = '%s'" % transcript_id return str(self.execute(query)[0]) - def get_refSeqGene_data_by_refSeqGeneID(self, refSeqGeneID, genomeBuild): - query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, orientation, totalLength, chrPos, rsgPos, entrezID, hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s' AND genomeBuild = '%s'" % (refSeqGeneID, genomeBuild) + def get_refseq_data_by_refseq_id(self, refseq_id, genome_build): + query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, orientation, totalLength, " \ + "chrPos, rsgPos, entrezID, hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s' " \ + "AND genomeBuild = '%s'" % (refseq_id, genome_build) return self.execute(query) - def get_gene_symbol_from_refSeqGeneID(self, refSeqGeneID): - query = "SELECT hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s'" % refSeqGeneID + def get_gene_symbol_from_refseq_id(self, refseq_id): + query = "SELECT hgncSymbol FROM refSeqGene_loci WHERE refSeqGeneID = '%s'" % refseq_id return self.execute(query)[0] - #get_refseqgeneId_from_lrgID - def get_RefSeqGeneID_from_lrgID(self, lrgID): - query = "SELECT RefSeqGeneID FROM LRG_RSG_lookup WHERE lrgID = '%s'" % lrgID + def get_refseq_id_from_lrg_id(self, lrg_id): + query = "SELECT RefSeqGeneID FROM LRG_RSG_lookup WHERE lrgID = '%s'" % lrg_id return self.execute(query)[0] - def get_RefSeqTranscriptID_from_lrgTranscriptID(self, lrgtxID): - query = "SELECT RefSeqTranscriptID FROM LRG_transcripts WHERE LRGtranscriptID = '%s'" % lrgtxID + def get_refseq_transcript_id_from_lrg_transcript_id(self, lrg_tx_id): + query = "SELECT RefSeqTranscriptID FROM LRG_transcripts WHERE LRGtranscriptID = '%s'" % lrg_tx_id return self.execute(query)[0] - def get_lrgTranscriptID_from_RefSeqTranscriptID(self, rstID): - query = "SELECT LRGtranscriptID FROM LRG_transcripts WHERE RefSeqTranscriptID = '%s'" % rstID + def get_lrg_transcript_id_from_refseq_transcript_id(self, rst_id): + query = "SELECT LRGtranscriptID FROM LRG_transcripts WHERE RefSeqTranscriptID = '%s'" % rst_id return self.execute(query)[0] - def get_lrgID_from_RefSeqGeneID(self, rsgID): - query = "SELECT lrgID, status FROM LRG_RSG_lookup WHERE RefSeqGeneID = '%s'" % rsgID + def get_lrg_id_from_refseq_gene_id(self, rsg_id): + query = "SELECT lrgID, status FROM LRG_RSG_lookup WHERE RefSeqGeneID = '%s'" % rsg_id return self.execute(query) def get_refseqgene_info(self, refseqgene_id, primary_assembly): - query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos FROM refSeqGene_loci WHERE refSeqGeneID = '%s' AND genomeBuild = '%s'" % (refseqgene_id, primary_assembly) + query = "SELECT refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos FROM refSeqGene_loci " \ + "WHERE refSeqGeneID = '%s' AND genomeBuild = '%s'" % (refseqgene_id, primary_assembly) return self.execute(query) - def get_RefSeqProteinID_from_lrgProteinID(self, lrg_p): + def get_refseq_protein_id_from_lrg_protein_id(self, lrg_p): query = "SELECT RefSeqProteinID FROM LRG_proteins WHERE LRGproteinID = '%s'" % lrg_p return self.execute(query)[0] - def get_lrgProteinID_from_RefSeqProteinID(self, rs_p): + def get_lrg_protein_id_from_ref_seq_protein_id(self, rs_p): query = "SELECT LRGproteinID FROM LRG_proteins WHERE RefSeqProteinID = '%s'" % rs_p return self.execute(query)[0] - def get_LRG_data_from_LRGid(self, lrg_id): + def get_lrg_data_from_lrg_id(self, lrg_id): query = "SELECT * FROM LRG_RSG_lookup WHERE lrgID = '%s'" % lrg_id return self.execute(query) - #from dbfetchall def get_transcript_info_for_gene(self, gene_symbol): - query = "SELECT refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated, IF(updated < NOW() - INTERVAL 3 MONTH , 'true', 'false') FROM transcript_info WHERE hgncSymbol = '%s'" % gene_symbol - return self.executeAll(query) + query = "SELECT refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, " \ + "updated, IF(updated < NOW() - INTERVAL 3 MONTH , 'true', 'false') FROM transcript_info " \ + "WHERE hgncSymbol = '%s'" % gene_symbol + return self.execute_all(query) def get_g_to_g_info(self): - query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol, genomeBuild FROM refSeqGene_loci" - return self.executeAll(query) + query = "SELECT refSeqGeneID, refSeqChromosomeID, startPos, endPos, orientation, hgncSymbol, " \ + "genomeBuild FROM refSeqGene_loci" + return self.execute_all(query) - def get_all_transcriptID(self): + def get_all_transcript_id(self): query = "SELECT refSeqID FROM transcript_info" - return self.executeAll(query) + return self.execute_all(query) # Direct methods (GET) def get_uta_symbol(self, gene_symbol): # returns the UTA gene symbol when HGNC gene symbol is input - return str(self.get_utaSymbol(gene_symbol)[0]) + return str(self.get_uta(gene_symbol)[0]) def get_hgnc_symbol(self, gene_symbol): # returns the HGNC gene symbol when UTA gene symbol is input - return str(self.get_hgncSymbol(gene_symbol)[0]) + return str(self.get_hgnc(gene_symbol)[0]) # from external.py def get_urls(self, dict_out): @@ -116,13 +120,14 @@ def get_urls(self, dict_out): '/nuccore/%s' % dict_out['hgvs_transcript_variant'].split(':')[0] if 'NP_' in dict_out['hgvs_predicted_protein_consequence']['slr']: report_urls['protein'] = 'https://www.ncbi.nlm.nih.gov' \ - '/nuccore/%s' % str(dict_out['hgvs_predicted_protein_consequence']['slr']).split(':')[0] + '/nuccore/%s' % str( + dict_out['hgvs_predicted_protein_consequence']['slr']).split(':')[0] if 'NG_' in dict_out['hgvs_refseqgene_variant']: report_urls['refseqgene'] = 'https://www.ncbi.nlm.nih.gov' \ '/nuccore/%s' % dict_out['hgvs_refseqgene_variant'].split(':')[0] if 'LRG' in dict_out['hgvs_lrg_variant']: lrg_id = dict_out['hgvs_lrg_variant'].split(':')[0] - lrg_data = self.get_LRG_data_from_LRGid(lrg_id) + lrg_data = self.get_lrg_data_from_lrg_id(lrg_id) lrg_status = str(lrg_data[4]) if lrg_status == 'public': report_urls['lrg'] = 'http://ftp.ebi.ac.uk/pub' \ @@ -135,4 +140,3 @@ def get_urls(self, dict_out): # "http://www.ensembl.org/id/" ? What about historic versions????? return report_urls - diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index da3d7b81..8985f279 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -150,7 +150,7 @@ def update_transcript_info_record(self, accession, validator): def update_refseqgene_loci(self, rsg_data): # First query the database - entry_exists = self.get_refSeqGene_data_by_refSeqGeneID(rsg_data[0], rsg_data[2]) + entry_exists = self.get_refseq_data_by_refseq_id(rsg_data[0], rsg_data[2]) if entry_exists[0] == 'none': self.insert_refSeqGene_data(rsg_data) else: @@ -158,19 +158,19 @@ def update_refseqgene_loci(self, rsg_data): def update_lrg_rs_lookup(self, lrg_rs_lookup): # First query the database - rsg_id = self.get_RefSeqGeneID_from_lrgID(lrg_rs_lookup[0]) + rsg_id = self.get_refseq_id_from_lrg_id(lrg_rs_lookup[0]) if rsg_id == 'none': self.insert_RefSeqGeneID_from_lrgID(lrg_rs_lookup) def update_lrgt_rst(self, lrgtx_to_rst_id): # First query the database - rst_id = self.get_RefSeqTranscriptID_from_lrgTranscriptID(lrgtx_to_rst_id[0]) + rst_id = self.get_refseq_transcript_id_from_lrg_transcript_id(lrgtx_to_rst_id[0]) if rst_id == 'none': self.insert_LRG_transcript_data(lrgtx_to_rst_id) def update_lrg_p_rs_p_lookup(self, lrg_p, rs_p): # First query the database - rsp_id = self.get_RefSeqProteinID_from_lrgProteinID(lrg_p) + rsp_id = self.get_refseq_protein_id_from_lrg_protein_id(lrg_p) if rsp_id == 'none': self.insert_LRG_protein_data(lrg_p, rs_p) @@ -185,7 +185,7 @@ def ref_type_assign(self, accession): ref_type = ':p.' elif accession.startswith('LRG_'): if 't' in accession: - refseqtranscript_reference = self.get_RefSeqTranscriptID_from_lrgTranscriptID(accession) + refseqtranscript_reference = self.get_refseq_transcript_id_from_lrg_transcript_id(accession) if refseqtranscript_reference.startswith('NM_'): ref_type = ':c.' else: diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 866d6d86..2191a8d3 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -175,7 +175,7 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): # Gap gene black list try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_c.ac) + gene_symbol = self.db.get_gene_symbol_from_transcript_id(hgvs_c.ac) except Exception: utilise_gap_code = False else: @@ -940,7 +940,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # Gap gene black list try: - gene_symbol = self.db.get_gene_symbol_from_transcriptID(hgvs_c.ac) + gene_symbol = self.db.get_gene_symbol_from_transcript_id(hgvs_c.ac) except Exception: utilise_gap_code = False else: @@ -1469,7 +1469,7 @@ def hgvs_r_to_c(self, hgvs_object): """ # check for LRG_t with r. if 'LRG' in hgvs_object.ac: - transcript_ac = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID(hgvs_object.ac) + transcript_ac = self.db.get_refseq_transcript_id_from_lrg_transcript_id(hgvs_object.ac) if transcript_ac == 'none': raise HGVSDataNotAvailableError('Unable to identify a relevant transcript for ' + hgvs_object.ac) else: diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 20d7af58..87131b23 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -63,7 +63,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for trans_id in select_transcripts_list: trans_id = trans_id.strip() if 'LRG' in trans_id: - trans_id = self.db.get_RefSeqTranscriptID_from_lrgTranscriptID(trans_id) + trans_id = self.db.get_refseq_transcript_id_from_lrg_transcript_id(trans_id) if trans_id == 'none': continue select_transcripts_dict_plus_version[trans_id] = '' @@ -443,7 +443,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_refseqgene_variant = 'false' else: hgvs_refseqgene_variant = self.hp.parse_hgvs_variant(refseqgene_variant) - rsg_ac = self.db.get_lrgID_from_RefSeqGeneID(str(hgvs_refseqgene_variant.ac)) + rsg_ac = self.db.get_lrg_id_from_refseq_gene_id(str(hgvs_refseqgene_variant.ac)) if rsg_ac[0] == 'none': lrg_variant = '' else: @@ -469,7 +469,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr transcript_accession = hgvs_transcript_variant.ac # Handle LRG - lrg_transcript = self.db.get_lrgTranscriptID_from_RefSeqTranscriptID(transcript_accession) + lrg_transcript = self.db.get_lrg_transcript_id_from_refseq_transcript_id(transcript_accession) if lrg_transcript == 'none': lrg_transcript_variant = '' else: @@ -531,7 +531,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr predicted_protein_variant = variant.protein if 'NP_' in predicted_protein_variant: rs_p, pred_prot_posedit = predicted_protein_variant.split(':') - lrg_p = self.db.get_lrgProteinID_from_RefSeqProteinID(rs_p) + lrg_p = self.db.get_lrg_protein_id_from_ref_seq_protein_id(rs_p) if 'LRG' in lrg_p: predicted_protein_variant = rs_p + '(' + lrg_p + '):' + pred_prot_posedit @@ -711,7 +711,7 @@ def gene2transcripts(self, query): # Quick check for LRG elif 'LRG' in query: lrg_id = query.split('T')[0] - lrg_to_hgnc = self.db.get_LRG_data_from_LRGid(lrg_id) + lrg_to_hgnc = self.db.get_lrg_data_from_lrg_id(lrg_id) query = lrg_to_hgnc[2] # Quick check for blank form @@ -814,7 +814,7 @@ def gene2transcripts(self, query): 'coding_end': 'non-coding' }) # LRG information - lrg_transcript = self.db.get_lrgTranscriptID_from_RefSeqTranscriptID(tx) + lrg_transcript = self.db.get_lrg_transcript_id_from_refseq_transcript_id(tx) if lrg_transcript != 'none': genes_and_tx.append({'reference': lrg_transcript, 'description': tx_description, diff --git a/VariantValidator/update_vv_db.py b/VariantValidator/update_vv_db.py index 6c8f5656..6ee8cd96 100644 --- a/VariantValidator/update_vv_db.py +++ b/VariantValidator/update_vv_db.py @@ -180,7 +180,7 @@ def update_refseq(dbcnx): # Set up code to write to database for line in to_mysql: - current_symbol = dbcnx.get_gene_symbol_from_refSeqGeneID(line[0]) + current_symbol = dbcnx.get_gene_symbol_from_refseq_id(line[0]) if line[10] != current_symbol: if current_symbol != 'none': line[10] = current_symbol From c0a2820f0a4d71a8a95ed5fa95ecd6e6dcd27b2d Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 21 Jun 2019 15:29:18 +0100 Subject: [PATCH 149/223] Tidyied up DBInit --- VariantValidator/modules/vvDBInit.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/VariantValidator/modules/vvDBInit.py b/VariantValidator/modules/vvDBInit.py index 6740e520..cae320b2 100644 --- a/VariantValidator/modules/vvDBInit.py +++ b/VariantValidator/modules/vvDBInit.py @@ -12,10 +12,7 @@ def __init__(self, dbConfig): # closes connections for you. self.cursor = None self.dbConfig = dbConfig - # Construct database URL - #'mysqlx://vvadmin:var1ant@127.0.0.1/validator' - self.path = "mysqlx://"+dbConfig["user"]+":"+dbConfig["password"]+"@"+dbConfig["host"]+"/"+dbConfig["database"] - os.environ["VALIDATOR_DB_URL"] = self.path + self.pool = mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.dbConfig) self.conn = self.pool.get_connection() From 4b6431dce26750f48cbaf546c5759b9a4a5cab54 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 21 Jun 2019 15:43:49 +0100 Subject: [PATCH 150/223] Refactored and tidyied up DBinsert --- VariantValidator/modules/vvDBInsert.py | 66 +++++++++++++++----------- VariantValidator/modules/vvDatabase.py | 12 ++--- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/VariantValidator/modules/vvDBInsert.py b/VariantValidator/modules/vvDBInsert.py index e54f0185..9021bf42 100644 --- a/VariantValidator/modules/vvDBInsert.py +++ b/VariantValidator/modules/vvDBInsert.py @@ -1,19 +1,20 @@ from .utils import handleCursor from . import vvDBGet + class Mixin(vvDBGet.Mixin): - ''' + """ This object is a function container for inserting objects into the database. - ''' + """ # Add new entry - def add_entry(self,entry, data, table): + def add_entry(self, entry, data, table): return self.insert(entry, data, table) - def insert_transcript_loci(self,add_data, primary_assembly): + + def insert_transcript_loci(self, add_data, primary_assembly): return self.insert_transcript_loci(add_data, primary_assembly) - #from dbinsert @handleCursor - def insert(self,entry, data, table): + def insert(self, entry, data, table): # MySQL queries if table == 'transcript_info': accession = entry @@ -21,8 +22,9 @@ def insert(self,entry, data, table): variant = data[2] version = data[3] hgnc_symbol = data[4] - uta_symbol = data[5] - query = "INSERT INTO transcript_info(refSeqID, description, transcriptVariant, currentVersion, hgncSymbol, utaSymbol, updated) VALUES(%s,%s, %s, %s, %s, %s, NOW())" + uta_symbol = data[5] + query = "INSERT INTO transcript_info(refSeqID, description, transcriptVariant, currentVersion, " \ + "hgncSymbol, utaSymbol, updated) VALUES(%s,%s, %s, %s, %s, %s, NOW())" self.cursor.execute(query, (accession, description, variant, version, hgnc_symbol, uta_symbol)) # Query report if self.cursor.lastrowid: @@ -33,10 +35,14 @@ def insert(self,entry, data, table): # Commit and close connection (?close?) self.conn.commit() return success + @handleCursor - def insert_refSeqGene_data(self,rsg_data): - query = "INSERT INTO refSeqGene_loci(refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, orientation, totalLength, chrPos, rsgPos, entrezID, hgncSymbol, updated) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())" - self.cursor.execute(query, (rsg_data[0], rsg_data[1], rsg_data[2], rsg_data[3], rsg_data[4], rsg_data[5], rsg_data[6], rsg_data[7], rsg_data[8], rsg_data[9], rsg_data[10])) + def insert_refseq_gene_data(self, rsg_data): + query = "INSERT INTO refSeqGene_loci(refSeqGeneID, refSeqChromosomeID, genomeBuild, startPos, endPos, " \ + "orientation, totalLength, chrPos, rsgPos, entrezID, hgncSymbol, updated) " \ + "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())" + self.cursor.execute(query, (rsg_data[0], rsg_data[1], rsg_data[2], rsg_data[3], rsg_data[4], rsg_data[5], + rsg_data[6], rsg_data[7], rsg_data[8], rsg_data[9], rsg_data[10])) # Query report if self.cursor.lastrowid: success = 'true' @@ -45,9 +51,10 @@ def insert_refSeqGene_data(self,rsg_data): # Commit and close connection self.conn.commit() return success + @handleCursor - def insert_RefSeqGeneID_from_lrgID(self,lrg_rs_lookup): - query = "INSERT INTO LRG_RSG_lookup(lrgID, hgncSymbol, RefSeqGeneID, status) VALUES(%s,%s,%s,%s)" + def insert_refseq_gene_id_from_lrg_id(self, lrg_rs_lookup): + query = "INSERT INTO LRG_RSG_lookup(lrgID, hgncSymbol, RefSeqGeneID, status) VALUES (%s,%s,%s,%s)" self.cursor.execute(query, (lrg_rs_lookup[0], lrg_rs_lookup[1], lrg_rs_lookup[2], lrg_rs_lookup[3])) # Query report if self.cursor.lastrowid: @@ -57,10 +64,11 @@ def insert_RefSeqGeneID_from_lrgID(self,lrg_rs_lookup): # Commit and close connection self.conn.commit() return success + @handleCursor - def insert_LRG_transcript_data(self,lrgtx_to_rstID): - query = "INSERT INTO LRG_transcripts(LRGtranscriptID, RefSeqTranscriptID) VALUES(%s,%s)" - self.cursor.execute(query, (lrgtx_to_rstID[0], lrgtx_to_rstID[1])) + def insert_lrg_transcript_data(self, lrgtx_to_rst_id): + query = "INSERT INTO LRG_transcripts(LRGtranscriptID, RefSeqTranscriptID) VALUES (%s,%s)" + self.cursor.execute(query, (lrgtx_to_rst_id[0], lrgtx_to_rst_id[1])) # Query report if self.cursor.lastrowid: success = 'true' @@ -70,9 +78,10 @@ def insert_LRG_transcript_data(self,lrgtx_to_rstID): # Commit and close connection self.conn.commit() return success + @handleCursor - def insert_LRG_protein_data(self,lrg_p, rs_p): - query = "INSERT INTO LRG_proteins(LRGproteinID, RefSeqProteinID) VALUES(%s,%s)" + def insert_lrg_protein_data(self, lrg_p, rs_p): + query = "INSERT INTO LRG_proteins(LRGproteinID, RefSeqProteinID) VALUES (%s,%s)" self.cursor.execute(query, (lrg_p, rs_p)) # Query report if self.cursor.lastrowid: @@ -83,30 +92,29 @@ def insert_LRG_protein_data(self,lrg_p, rs_p): # Commit and close connection self.conn.commit() return success - # from dbupdate + @handleCursor - def update(self,entry, data, table): - # MySQL queries - #if table == 'transcript_info': + def update(self, entry, data): accession = entry description = data[1] variant = data[2] version = data[3] hgnc_symbol = data[4] - uta_symbol = data[5] - query = "UPDATE transcript_info SET description=%s, transcriptVariant=%s, currentVersion=%s, hgncSymbol=%s, utaSymbol=%s, updated=NOW() WHERE refSeqID = %s" + uta_symbol = data[5] + query = "UPDATE transcript_info SET description=%s, transcriptVariant=%s, currentVersion=%s, hgncSymbol=%s, " \ + "utaSymbol=%s, updated=NOW() WHERE refSeqID = %s" self.cursor.execute(query, (description, variant, version, hgnc_symbol, uta_symbol, accession)) success = 'true' self.conn.commit() return success - # 'true'??? check this. + @handleCursor - def update_refSeqGene_data(self,rsg_data): + def update_refseq_gene_data(self, rsg_data): query = "UPDATE refSeqGene_loci SET hgncSymbol=%s, updated=NOW() WHERE refSeqGeneID=%s" self.cursor.execute(query, (rsg_data[10], rsg_data[0])) success = 'true' self.conn.commit() return success - # Update entries - def update_entry(self,entry, data, table): - return self.update(entry, data, table) + + def update_entry(self, entry, data): + return self.update(entry, data) diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 8985f279..3b30d794 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -145,34 +145,34 @@ def update_transcript_info_record(self, accession, validator): self.add_entry(version, query_info, table) # If the data in the entry has changed, update it else: - self.update_entry(version, query_info, table) + self.update_entry(version, query_info) return def update_refseqgene_loci(self, rsg_data): # First query the database entry_exists = self.get_refseq_data_by_refseq_id(rsg_data[0], rsg_data[2]) if entry_exists[0] == 'none': - self.insert_refSeqGene_data(rsg_data) + self.insert_refseq_gene_data(rsg_data) else: - self.update_refSeqGene_data(rsg_data) + self.update_refseq_gene_data(rsg_data) def update_lrg_rs_lookup(self, lrg_rs_lookup): # First query the database rsg_id = self.get_refseq_id_from_lrg_id(lrg_rs_lookup[0]) if rsg_id == 'none': - self.insert_RefSeqGeneID_from_lrgID(lrg_rs_lookup) + self.insert_refseq_gene_id_from_lrg_id(lrg_rs_lookup) def update_lrgt_rst(self, lrgtx_to_rst_id): # First query the database rst_id = self.get_refseq_transcript_id_from_lrg_transcript_id(lrgtx_to_rst_id[0]) if rst_id == 'none': - self.insert_LRG_transcript_data(lrgtx_to_rst_id) + self.insert_lrg_transcript_data(lrgtx_to_rst_id) def update_lrg_p_rs_p_lookup(self, lrg_p, rs_p): # First query the database rsp_id = self.get_refseq_protein_id_from_lrg_protein_id(lrg_p) if rsp_id == 'none': - self.insert_LRG_protein_data(lrg_p, rs_p) + self.insert_lrg_protein_data(lrg_p, rs_p) def ref_type_assign(self, accession): if 'NC_' in accession or 'NG_' in accession or 'NT_' in accession or 'NW_' in accession: From 649d3a43f34c31be78acc5bdba0c58c5396253fb Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 24 Jun 2019 09:22:04 +0100 Subject: [PATCH 151/223] Updated documentation --- README.md | 7 +- bin/variant_validator.py | 4 +- docs/INSTALLATION.md | 235 +++++++++++++++++++-------------------- docs/MANUAL.md | 153 ++++++++++++++++--------- 4 files changed, 216 insertions(+), 183 deletions(-) diff --git a/README.md b/README.md index de6838cb..dc02883e 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Nomenclature. VariantValidator ensures that users are guided through the intricacies of the HGVS nomenclature, e.g. if the user makes a mistake, VariantValidator automatically corrects the mistake if it can, or provides helpful guidance if it cannot. In addition, -VariantValidator accurately interconverts between transcript variant descriptions and +VariantValidator accurately inter-converts between transcript variant descriptions and genomic variant descriptions in HGVS and Variant Call Format (VCF) VariantValidator interfaces with the hgvs package to parse, format, and manipulate @@ -31,14 +31,15 @@ For sequence variations falling within the open reading frames of genes, Variant ## Pre-requisites -Variant Validator will work on Mac OS X or Linux-compatiable computers. +Variant Validator will work on Mac OS X or Linux-compatible computers. Required software: * MySQL * Python 2.7 +* SQLite version 3.8.0 or above + Optional software: * Postgres version 9.5 or above, Postgres 10 is not supported. -* SQLite version 3.8.0 or above For installation instructions please see [INSTALLATION.md](docs/INSTALLATION.md) diff --git a/bin/variant_validator.py b/bin/variant_validator.py index 592db80f..bb25a964 100644 --- a/bin/variant_validator.py +++ b/bin/variant_validator.py @@ -23,9 +23,9 @@ def output_results(valoutput, outformat): parser.add_argument('-t', '--transcripts', nargs='?', default='all', help='Transcripts to output results for (default: %(default)s)') parser.add_argument('-s', '--submission', choices=['individual', 'batch'], default='individual', - help='Submit variants individually or as a single batch validation (default: %(default)s') + help='Submit variants individually or as a single batch validation (default: %(default)s)') parser.add_argument('-f', '--output_format', choices=['dict', 'list', 'json'], default='dict', - help='Output validations as a list or as a dictionary (default: %(default)s') + help='Output validations as a list or as a dictionary (default: %(default)s)') parser.add_argument('-o', '--output', type=argparse.FileType('w'), default='-', help='Specifies the output file (default: stdout)') diff --git a/docs/INSTALLATION.md b/docs/INSTALLATION.md index db6b3b46..f704101b 100644 --- a/docs/INSTALLATION.md +++ b/docs/INSTALLATION.md @@ -1,128 +1,117 @@ -# Variant Validator installation instructions - -In these instructions, lines that must be entered at the command prompt are preceded with >, like so: -> ls - -These instructions will allow you to configure the software on Linux. Mac OS X computers operate similarly. - -There are several steps involved in setting up variant validator: -* The application files themselves must be installed from SVN. -* The python environment must be set up. On a LAMP, only a custom version of Python will do. -* Protobuf must be compiled and installed -* Required python packages need to be installed, too. -* The databases must be downloaded and set up -* The configuration files must be changed to point the validator at those databases. - -## Virtual environment - -Variant validator currently requires python 2.7. - -When installing Variant Validator it is wise to use a virtual environment, as it requires specific versions of several libraries. -First, download and set up conda (in this case miniconda as we don't need all packages) - > wget https://repo.anaconda.com/miniconda/Miniconda2-latest-Linux-x86_64.sh - > bash Miniconda2-latest-Linux-x86_64.sh - > echo ". /local/miniconda2/etc/profile.d/conda.sh" >> ~/.bashrc - > source ~/.bashrc -Then create the conda environment and install the necessary programs (this should be done in an environment.yml file eventually). Note, installing biotools downgrades the version of setuptools so that needs to be reinstalled before the pip command to install hgvs=1.1.3 - > conda create -n VVenv - > conda activate VVenv - > conda install -c conda-forge sqlite python=2.7 protobuf=3.5.1 docutils python-daemon httplib2 mysql-connector-python mysql-python - > conda install -c auto biotools - > conda install -c bioconda pyliftover pysam - > conda install setuptools numpy - > conda install -c anaconda pytest - > pip install hgvs==1.1.3 -The packages required for variant validator to function are now set up in the environment "VVenv". - -## Installing validator code - -To clone this software from GIT, use: - > git clone https://github.com/pjdp2/variantValidator.git -This'll create a variantValidator folder in the directory you run it in. - > cd variantValidator -Run the installation script to integrate variant validator with python's site packages. - > python setup.py install -For development purposes, you can use - > pip install -e . -to ensure any changes you make in the local variant validator folder is reflected in your python site-packages. - -## Setting up MySQL - -This step is not optional for getting variant validator to work. Install packages with: - > sudo apt-get install mysql-server - -This will install everything you need and start the database server. Make sure you note down the root account password that you're prompted for during installation! -Check it runs with: - > sudo service mysql status -If it's not running, use - > sudo service mysql start -to boot it up. -Enter mysql from any user's shell prompt with - > mysql -u root -p -This will prompt you for the root password you made earlier. Within MySQL, create the variant validator user: - > CREATE USER 'vvadmin'@'localhost' IDENTIFIED BY 'var1ant'; -You should create the database too - > CREATE DATABASE validator; - > USE validator; -Grant access rights to the vvadmin user: - > GRANT SELECT,INSERT,UPDATE,DELETE ON validator.* TO vvadmin; -Quit mysql with - > \q -Bye indeed. - -You must source a copy of the validator database from somewhere. That'll have to be fixed for release... -Copy it over to a temporary folder (say, temp, in your home directory). - > scp someone@somewhere~/databases/validator_2018-11-08.sql ~/temp/validator_2018-11-08.sql -Then, upload it to the running MySQL with: - > mysql -u root -p validator < ~/databases/validator_2018-11-08.sql -You should log into MySQL and check to see if the database uploaded correctly. Login with vvadmin, password "var1ant". -Then: - > USE validator; - > SHOW TABLES; -which should give some good lines. - -## Setting up PostGreSQL - -It's recommended for performance reasons to use a local varsion of the UTA database. To do this, first install the required packages with: - > sudo apt-get install postgresql postgresql-contrib -You need to switch to the "postgres" user to make anything work initially. - > sudo -i -u postgres -Create a new user with a name matching your user account. In my case - pjdp2. When prompted, make yourself a superuser. - > createuser --interactive -The postgres user doesn't have a unix password, so you'll need to use exit to get your account back. - > exit -Enter the database with psql. You'll be signed by default into the "postgres" database, which serves as a kind of master database for controlling user accounts. - > psql postgres -Inside psql, create the uta_admin role, and set the password when prompted to "uta_admin". - > CREATE ROLE uta_admin WITH CREATEDB; - > ALTER ROLE uta_admin WITH LOGIN; - > \password uta_admin -Create an empty uta database - > CREATE DATABASE uta WITH OWNER=uta_admin TEMPLATE=template0; -That's enough setting up. Quit psql with: - > \q -Now you're back to your own prompt, download the gzipped uta genetics database, and upload it into psql. You'll be prompted for your password. - > wget http://dl.biocommons.org/uta/uta_20180821.pgd.gz - > gzip -cdq uta_20180821.pgd.gz | psql -U uta_admin -v ON_ERROR_STOP=0 -d uta -Eae -The database should now be uploaded. Don't worry, you can access the database uta with uta_admin if it's uploaded by someone else. -If the database returns errors when the validator runs, you will need to change the postgresql authentication methods, by editing - > pg_hba.conf -This file lives, on linux, in /etc/postgresql/9.3/main/pg_hba.conf but on other systems you may need to search for it. -Inside the file, you should change all instances of "peer" to "md5". - -## Setting up Seqrepo - -Similarly, things run much faster with a local SeqRepo database. You've installed the seqrepo package with pip, but you'll need to download an actual sequence repository. These instructions assume you are using your home directory; you can put it anywhere so long as you modify the config.ini file accordingly. - > mkdir seqrepo -Then make a cup of tea while this command runs: - > seqrepo --root-directory ~/seqrepo pull -i 2018-08-21 -After it finishes downloading, check it installed correctly: - > seqrepo --root-directory ~/seqrepo list-local-instances +# Installation + +These instructions will allow you to install the package and accompanying databases on Linux. Mac OS X computers operate similarly. + +## Pre-requisites + +Required: +* MySQL +* Python 3.6 or above +* SQLite version 3.8.0 or above + +Optional: +* PostgreSQL version 9.5 or above, PostgreSQL 10 is not supported. + +## Download the source code + +To download the VariantValidator source code simply clone the master branch. + +``` +$ git clone https://github.com/openvar/variantValidator.git +$ cd variantValidator/ +``` + +## Python 3.6 environment + +When installing VariantValidator we recommend using a virtual environment, as it requires specific versions of several libraries including python and sqlite. This can be done either via conda **or** pip. + +#### Via conda +After [installing conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) you can create a new virtual environment with the correct python and sqlite versions by running: +``` +$ conda env create -f environment.yml +$ conda activate vvenv +``` +The packages required for variant validator to function are now set up in the environment "vvenv". + +#### Via pip + +If you already have the right versions of python (>=3.6) and sqlite (>=3.8), then you can use pip to install the remaining packages. + +``` +$ python -m venv vvenv +$ source activate vvenv +$ pip install -r requirements.txt +``` + +## Installing Variant Validator + +To install VariantValidator within your virtual environment run: +``` +$ python setup.py install +``` + +## Setting up validator database (MySQL) + +A MySQL database is required to run VariantValidator. We recommend creating a user and password specific to the +VariantValidator database, for example: + +```mysql +CREATE USER ''@'' IDENTIFIED BY ''; +CREATE DATABASE validator; +GRANT SELECT,INSERT,UPDATE,DELETE ON validator.* TO ''@''; +``` + +In the `VariantValidator/configuration` folder is a copy of the empty mysql database needed by Variant Validator to run. You need to upload it to the running MySQL database with: +``` +$ mysql validator < VariantValidator/configuration/empty_vv_db.sql +``` + +See the [Manual](MANUAL.md) for instructions on populating this database. + +## Setting up Seqrepo (SQLite >=3.8) + +VariantValidator requires a local SeqRepo database. The seqrepo package has already been installed into the virtual environment, but you'll need to download an actual seqrepo database. This can go anywhere on your system drive. + +``` +$ mkdir /path/to/seqrepo +$ seqrepo --root-directory /path/to/seqrepo pull -i 2018-08-21 +``` +To check it has downloaded: +``` +$ seqrepo --root-directory /path/to/seqrepo list-local-instances +``` + +## Setting up UTA database (Optional, PostGreSQL >=9.5) + +It's recommended for performance reasons to use a local version of the UTA database. We again recommend creating a specific user account, for example: +``` +CREATE ROLE WITH CREATEDB; +ALTER ROLE WITH LOGIN; +\password +CREATE DATABASE uta WITH OWNER= TEMPLATE=template0; +``` + +To fill this database, download the gzipped uta genetics database, and upload it into psql. +``` +$ wget http://dl.biocommons.org/uta/uta_20180821.pgd.gz +$ gzip -cdq uta_20180821.pgd.gz | psql -U uta_admin -v ON_ERROR_STOP=0 -d uta -Eae +``` + +If you wish to use the remote, public UTA database, see the instructions [here](https://github.com/biocommons/uta#accessing-the-public-uta-instance). ## Configuration -See the file MANUAL.md for configuration instructions - but before you attempt to configure anything, run - > python simpleTestScript.py -to check that validator's depedencies are installed correctly, and allow it to create a blank configuration file on your system. +Before using VariantValidator some configuration is required, as described in the [Manual](MANUAL.md). + +## Developers + +To work on the Variant Validator code, you'll need to install additional dependencies and install VariantValidator in an editable manner. Tests can be run using PyTest. +```bash +cd variantValidator/ +pip install requirements_dev.txt +pip install -e . +pytest +``` +Please make all Pull Requests to the develop branch. diff --git a/docs/MANUAL.md b/docs/MANUAL.md index dcedfc35..7a9cd984 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -1,73 +1,116 @@ -# Variant Validator Operation Manual +# Manual ## Configuration -Variant Validator will create a configuration file for each user if it does not detect one, located in the folder - > ~/.config/VariantValidator/config.ini -This file, freshly created, will be missing the path to the SeqRepo directory which you should fill in after installation accordingly. If the configuration file hasn't been filled in correctly, the validator will exit immediately with an error. -It's possible to use a remote seqrepo directory, at a cost of greatly reduced performane. +After first installing Variant Validator, a configuration file will need to be created and edited to contain the database credentials and locations. To do this run the configuration script installed alongside the package. -The mysql database is configured in this section: - > [mysql] - > host = 127.0.0.1 - > database = validator - > user = vvadmin - > password = var1ant -Information here also needs to be changed if the variant validator database login details are different. +```bash +vv_configure.py +``` -The [uta] section also contains path information to the UTA archive, if it's installed. +This will ask you to enter a value for each item in the configuration file. +The default/existing value is shown in square brackets and will continue to be used +if you don't enter anything else. The items in the configuration file are: -The section - > [logging] -contains several headings which can be changed to alter the level of verbosity of the validator output. +```text +[mysql] +host = localhost +database = validator +user = USERNAME +password = PASSWORD -* file - If "True", writes the logging output to the "vvLog.txt" file in the current working directory. While useful for diagnostics, logging in this way has permissions issues and will fill up the hard disk of an automated installation quickly. -* level - Can be one of several values. All errors below the selected level of severity will not be logged. By default, and to help with setting things up, the info level statements will be logged, but you should change this to make the validator less talkative in normal use. -** debug - Logs all events, including debugging. -** info - Information events on the decisions the validator is making are logged. -** warning - Warnings indicate malformed variants. This is the default logging level. -** error - Variants that produce errors are nonsensical to the point where they cannot be validated. -** critical - Fatal errors that crash the validator are logged at this level. -* trace - Used for diagnosis during development. Can be set to 'True' if you need to profile the validator code. +[seqrepo] +version = 2018-08-21 +location = /PATH/TO/SEQREPO -The validator itself will set environment variables to allow for the correct operation of HGVS software. +[postgres] +host = localhost +database = uta +version = uta_20180821 +user = USERNAME +password = PASSWORD -## Operation +[logging] +level = info +console = true +file = false +trace = false + +[EntrezID] +entrezid = admin@variantvalidator.org -Validating variants, provided the software is installed correctly, is as simple as: +[liftover] +location = /PATH/TO/LIFTOVER +``` -> from VariantValidator import Validator -> -> validator = Validator() -> variant = 'NC_000012.11:g.122064776delG' -> select_transcripts = 'all' -> selected_assembly = 'GRCh37' -> -> out=Validator().validate(variant, selected_assembly, select_transcripts) +The values in capitals must be replaced for Variant Validator to run, +except for the liftover path, which is optional. -The 'out' object is a simple dictionary containing the genetic information of the validated variant. The simpleTestScript.py will validate this variant and then print the output nicely as a json. +By default the edited configuration will be placed in the users home directory (`~/.variantvalidator`), this location can be changed for all users by editing the `VariantValidator/settings.py` file. -The accepted formats for variants include: -> NM_000088.3:c.589G>T -> NC_000017.10:g.48275363C>A -> NG_007400.1:g.8638G>T -> LRG_1:g.8638G>T -> LRG_1t1:c.589G>T -> 17-50198002-C-A (GRCh38) -> chr17:50198002C>A (GRCh38) +## Database updates -Possible assemblies are: -> GRCh37 -> hg19 -> hg38 +To import the initial data into the Validator MySQL database, run the following script: -You can select all transcripts by passing 'all', or use multiple transcripts with: -> select_transcripts = 'NM_022356.3| NM_001146289.1| NM_001243246.1' +```bash +update_vdb.py +``` -## Unit testing +This will download the required data to convert between LRG and RefSeq IDs. We recommend re-running this command on a regular basis as changes are continually made to the RefSeq and LRG collections. + +## Operation -Variant Validator is written to be pytest-compatible. Run -> pytest -in the variant validator testing folder, the same as that in which this file resides. The test will take several minutes to complete, but runs through over three hundred common and malformed variants. +To run Variant Validator, we have provided the installed script `variant_validator.py`, running this with the flag `-h` shows the running options: + +```text +usage: variant_validator.py [-h] -v VARIANT [VARIANT ...] + [-g [{GRCh37,GRCh38,hg19,hg38}]] + [-t [TRANSCRIPTS]] [-s {individual,batch}] + [-f {dict,list,json}] [-o OUTPUT] + +optional arguments: + -h, --help show this help message and exit + -v VARIANT [VARIANT ...], --variant VARIANT [VARIANT ...] + Variant(s) to validate + -g [{GRCh37,GRCh38,hg19,hg38}], --genome [{GRCh37,GRCh38,hg19,hg38}] + Genome assembly (default: GRCh37) + -t [TRANSCRIPTS], --transcripts [TRANSCRIPTS] + Transcripts to output results for (default: all) + -s {individual,batch}, --submission {individual,batch} + Submit variants individually or as a single batch + validation (default: individual) + -f {dict,list,json}, --output_format {dict,list,json} + Output validations as a list or as a dictionary + (default: dict) + -o OUTPUT, --output OUTPUT + Specifies the output file (default: stdout) +``` + +From this script you can run the validator with a number of different input and output options. + +You can also import and use the package directly within python. For example: + +```python +import VariantValidator +validator = VariantValidator.Validator() + +# To validate a variant +output = validator.validate('NM_000088.3:c.589G>T', 'GRCh37', 'all') +# This returns an ValOutput object that can be used to output the results in a number of different ways +output.format_as_dict(with_meta=True) + +# The Validator object also contains other useful methods, such as finding all transcripts from a gene ID/symbol +validator.gene2transcripts('COL1A1') +``` + +The accepted format for variants include: +```text +NM_000088.3:c.589G>T +NC_000017.10:g.48275363C>A +NG_007400.1:g.8638G>T +LRG_1:g.8638G>T +LRG_1t1:c.589G>T +17-50198002-C-A (GRCh38) +chr17:50198002C>A (GRCh38) +``` - From f3007133505b8b5fc0ff4dd2dbe4cee1597c0cdf Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 24 Jun 2019 10:37:49 +0100 Subject: [PATCH 152/223] Added table and json format to output object --- VariantValidator/modules/valoutput.py | 35 +++++++++++++++++++++++++++ bin/variant_validator.py | 16 +++++++++--- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index 97a633de..3ff7c169 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -1,6 +1,7 @@ import os from .liftover import liftover from .logger import Logger +import json class ValOutput(object): @@ -130,6 +131,40 @@ def format_as_dict(self, with_meta=True): # return batch_out return validation_output + def format_as_json(self, with_meta=True): + dictionary_output = self.format_as_dict(with_meta) + return json.dumps(dictionary_output) + + def format_as_table(self, with_meta=True): + """ + Currently the table format will only output correctly validated results, all warnings and obsolete records will + be squashed. + :param with_meta: + :return: + """ + outputstrings = [] + if with_meta: + outputstrings.append('#' + str(self.add_meta())) + + outputstrings.append(['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', + 'Gene_Symbol', 'Transcript_description']) + for variant in self.output_list: + if variant.output_type_flag == 'gene': + if variant.warnings == ['Validation error'] or (variant.is_obsolete() and + variant.hgvs_transcript_variant == ''): + continue + else: + outputstrings.append([ + variant.original, + variant.hgvs_transcript_variant, + variant.hgvs_refseqgene_variant, + variant.hgvs_lrg_variant, + variant.hgvs_lrg_transcript_variant, + variant.gene_symbol, + variant.description + ]) + return outputstrings + def add_meta(self): """ Returns dictionary of metadata diff --git a/bin/variant_validator.py b/bin/variant_validator.py index bb25a964..59662e08 100644 --- a/bin/variant_validator.py +++ b/bin/variant_validator.py @@ -1,7 +1,6 @@ #! /usr/bin/env python import argparse -import json import sys from VariantValidator import Validator @@ -10,9 +9,18 @@ def output_results(valoutput, outformat): if outformat == 'dict': return str(valoutput.format_as_dict()) elif outformat == 'json': - return json.dumps(valoutput.format_as_dict()) + return str(valoutput.format_as_json()) else: - return str(valoutput.format_as_dict()) + # table format + table = valoutput.format_as_table() + newtable = [] + for row in table: + if isinstance(row, list): + newrow = '\t'.join(row) + else: + newrow = str(row) + newtable.append(newrow) + return '\n'.join(newtable) if __name__ == '__main__': @@ -24,7 +32,7 @@ def output_results(valoutput, outformat): help='Transcripts to output results for (default: %(default)s)') parser.add_argument('-s', '--submission', choices=['individual', 'batch'], default='individual', help='Submit variants individually or as a single batch validation (default: %(default)s)') - parser.add_argument('-f', '--output_format', choices=['dict', 'list', 'json'], default='dict', + parser.add_argument('-f', '--output_format', choices=['dict', 'table', 'json'], default='dict', help='Output validations as a list or as a dictionary (default: %(default)s)') parser.add_argument('-o', '--output', type=argparse.FileType('w'), default='-', help='Specifies the output file (default: stdout)') From 47f2386091bd29ff9cfbcb582216ec5176d53e12 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 24 Jun 2019 10:45:46 +0100 Subject: [PATCH 153/223] Moved liftover step out of output obj and into validator method --- VariantValidator/modules/valoutput.py | 57 ------------------------ VariantValidator/modules/vvMixinCore.py | 59 +++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 57 deletions(-) diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index 3ff7c169..fd6b7d08 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -1,5 +1,4 @@ import os -from .liftover import liftover from .logger import Logger import json @@ -66,62 +65,6 @@ def format_as_dict(self, with_meta=True): validation_intergenic_counter = validation_intergenic_counter + 1 identification_key = 'intergenic_variant_%s' % validation_intergenic_counter - # Attempt to liftover between genome builds - # Note: pyliftover uses the UCSC liftOver tool. - # https://pypi.org/project/pyliftover/ - genomic_position_info = variant.primary_assembly_loci - for g_p_key in list(genomic_position_info.keys()): - build_to = '' - build_from = '' - - # Identify the current build and hgvs_genomic descripsion - if 'hg' in g_p_key: - # incoming_vcf = genomic_position_info[g_p_key]['vcf'] - # set builds - if g_p_key == 'hg38': - build_to = 'hg19' - build_from = 'hg38' - if g_p_key == 'hg19': - build_to = 'hg38' - build_from = 'hg19' - elif 'grc' in g_p_key: - # incoming_vcf = genomic_position_info[g_p_key]['vcf'] - # set builds - if g_p_key == 'grch38': - build_to = 'GRCh37' - build_from = 'GRCh38' - if g_p_key == 'grch37': - build_to = 'GRCh38' - build_from = 'GRCh37' - - # Liftover - lifted_response = liftover(genomic_position_info[g_p_key]['hgvs_genomic_description'], build_from, - build_to, variant.hn, variant.reverse_normalizer, - variant.evm, self.validator) - - # Sort the respomse into primary assembly and ALT - primary_assembly_loci = {} - alt_genomic_loci = [] - for build_key, accession_dict in list(lifted_response.items()): - try: - accession_key = list(accession_dict.keys())[0] - if 'NC_' in accession_dict[accession_key]['hgvs_genomic_description']: - primary_assembly_loci[build_key.lower()] = accession_dict[accession_key] - else: - alt_genomic_loci.append({build_key.lower(): accession_dict[accession_key]}) - - # KeyError if the dicts are empty - except KeyError: - continue - except IndexError: - continue - - # Add the dictionaries from lifted response to the output - if primary_assembly_loci != {}: - variant.primary_assembly_loci = primary_assembly_loci - if alt_genomic_loci: - variant.alt_genomic_loci = alt_genomic_loci - # Finalise the output dictionary validation_output[identification_key] = variant.output_dict() diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 87131b23..2adc92e4 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -16,6 +16,7 @@ from . import use_checking from . import mappers from . import valoutput +from .liftover import liftover class Mixin(vvMixinConverters.Mixin): @@ -683,6 +684,64 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if ref_records != {}: variant.reference_sequence_records = ref_records + if variant.output_type_flag == 'intergenic': + # Attempt to liftover between genome builds + # Note: pyliftover uses the UCSC liftOver tool. + # https://pypi.org/project/pyliftover/ + genomic_position_info = variant.primary_assembly_loci + for g_p_key in list(genomic_position_info.keys()): + build_to = '' + build_from = '' + + # Identify the current build and hgvs_genomic descripsion + if 'hg' in g_p_key: + # incoming_vcf = genomic_position_info[g_p_key]['vcf'] + # set builds + if g_p_key == 'hg38': + build_to = 'hg19' + build_from = 'hg38' + if g_p_key == 'hg19': + build_to = 'hg38' + build_from = 'hg19' + elif 'grc' in g_p_key: + # incoming_vcf = genomic_position_info[g_p_key]['vcf'] + # set builds + if g_p_key == 'grch38': + build_to = 'GRCh37' + build_from = 'GRCh38' + if g_p_key == 'grch37': + build_to = 'GRCh38' + build_from = 'GRCh37' + + # Liftover + lifted_response = liftover(genomic_position_info[g_p_key]['hgvs_genomic_description'], + build_from, + build_to, variant.hn, variant.reverse_normalizer, + variant.evm, self) + + # Sort the respomse into primary assembly and ALT + primary_assembly_loci = {} + alt_genomic_loci = [] + for build_key, accession_dict in list(lifted_response.items()): + try: + accession_key = list(accession_dict.keys())[0] + if 'NC_' in accession_dict[accession_key]['hgvs_genomic_description']: + primary_assembly_loci[build_key.lower()] = accession_dict[accession_key] + else: + alt_genomic_loci.append({build_key.lower(): accession_dict[accession_key]}) + + # KeyError if the dicts are empty + except KeyError: + continue + except IndexError: + continue + + # Add the dictionaries from lifted response to the output + if primary_assembly_loci != {}: + variant.primary_assembly_loci = primary_assembly_loci + if alt_genomic_loci: + variant.alt_genomic_loci = alt_genomic_loci + # Append to a list for return batch_out.append(variant) From 70a876241e3d31857af051bd5ce086c46b56c9af Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 24 Jun 2019 10:51:24 +0100 Subject: [PATCH 154/223] Added intergenic results to table output --- VariantValidator/modules/valoutput.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index fd6b7d08..fdb700e5 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -106,6 +106,16 @@ def format_as_table(self, with_meta=True): variant.gene_symbol, variant.description ]) + elif variant.output_type_flag == 'intergenic': + outputstrings.append([ + variant.original, + variant.hgvs_transcript_variant, + variant.hgvs_refseqgene_variant, + variant.hgvs_lrg_variant, + variant.hgvs_lrg_transcript_variant, + variant.gene_symbol, + variant.description + ]) return outputstrings def add_meta(self): From 83a4944261f55acb17339084bf9fc3b8df797fdb Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 24 Jun 2019 10:58:32 +0100 Subject: [PATCH 155/223] Added metadata option to validator script --- bin/variant_validator.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bin/variant_validator.py b/bin/variant_validator.py index 59662e08..beb3a1fc 100644 --- a/bin/variant_validator.py +++ b/bin/variant_validator.py @@ -5,14 +5,14 @@ from VariantValidator import Validator -def output_results(valoutput, outformat): +def output_results(valoutput, outformat, with_meta): if outformat == 'dict': - return str(valoutput.format_as_dict()) + return str(valoutput.format_as_dict(with_meta=with_meta)) elif outformat == 'json': - return str(valoutput.format_as_json()) + return str(valoutput.format_as_json(with_meta=with_meta)) else: # table format - table = valoutput.format_as_table() + table = valoutput.format_as_table(with_meta=with_meta) newtable = [] for row in table: if isinstance(row, list): @@ -36,6 +36,8 @@ def output_results(valoutput, outformat): help='Output validations as a list or as a dictionary (default: %(default)s)') parser.add_argument('-o', '--output', type=argparse.FileType('w'), default='-', help='Specifies the output file (default: stdout)') + parser.add_argument('-m', '--meta', action='store_true', default=False, + help='Also output metadata (default: %(default)s)') args = parser.parse_args() @@ -44,9 +46,9 @@ def output_results(valoutput, outformat): if args.submission == 'individual': for variant in args.variant: output = validator.validate(variant, args.genome, args.transcripts) - args.output.write(output_results(output, args.output_format) + '\n') + args.output.write(output_results(output, args.output_format, args.meta) + '\n') else: batch = '|'.join(args.variant) sys.stderr.write("Submitting batch query: %s\n" % batch) output = validator.validate(batch, args.genome, args.transcripts) - args.output.write(output_results(output, args.output_format) + '\n') + args.output.write(output_results(output, args.output_format, args.meta) + '\n') From bc8933b80944f0e9dfbf64e721e48e0eea167e8b Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 24 Jun 2019 11:02:00 +0100 Subject: [PATCH 156/223] Removed unwanted files --- VariantValidator/setup | 27 --------------------------- VariantValidator/simpleTestScript.py | 14 -------------- 2 files changed, 41 deletions(-) delete mode 100755 VariantValidator/setup delete mode 100644 VariantValidator/simpleTestScript.py diff --git a/VariantValidator/setup b/VariantValidator/setup deleted file mode 100755 index 94f9cc60..00000000 --- a/VariantValidator/setup +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Tool for deploying the batchValidator source code. All files will be copied -# to Python site-packages, replacing any modified code. - -# To run this # tool, type './setup' at the command line. - -# rm -r /local/python/2.7.12/lib/python2.7/site-packages/variantValidator/ -rsync -rv .// /local/python/2.7.12/lib/python2.7/site-packages/VariantValidator/ --delete -rsync -rv .// /local/python/2.7/lib/python2.7/site-packages/VariantValidator/ --delete - - -# -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# \ No newline at end of file diff --git a/VariantValidator/simpleTestScript.py b/VariantValidator/simpleTestScript.py deleted file mode 100644 index 1ff732fc..00000000 --- a/VariantValidator/simpleTestScript.py +++ /dev/null @@ -1,14 +0,0 @@ -import json -import os - -from .VariantValidator import Validator - -#variant = 'NM_000088.3:c.589G>T' -variant = 'NC_000012.11:g.122064776delG' -select_transcripts = 'all' -selected_assembly = 'GRCh37' - -validator=Validator() -out=Validator().validate(variant, selected_assembly, select_transcripts) - -print((json.dumps(out, sort_keys=True, indent=4, separators=(',', ': ')))) From e2ae35c6aa234f6b782aa87be5f1636eb3a56f63 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 24 Jun 2019 14:22:07 +0100 Subject: [PATCH 157/223] Renamed test dir to tests --- {test => tests}/inputVariants.txt | 0 {test => tests}/test_inputs.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename {test => tests}/inputVariants.txt (100%) rename {test => tests}/test_inputs.py (100%) diff --git a/test/inputVariants.txt b/tests/inputVariants.txt similarity index 100% rename from test/inputVariants.txt rename to tests/inputVariants.txt diff --git a/test/test_inputs.py b/tests/test_inputs.py similarity index 100% rename from test/test_inputs.py rename to tests/test_inputs.py From 8cf995b77fb352d81d260874c873a197dbfc8795 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 24 Jun 2019 14:27:35 +0100 Subject: [PATCH 158/223] Tidied up use_checking.py --- VariantValidator/modules/use_checking.py | 54 ++++++++++++++---------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index abcbbb24..6f721503 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -1,5 +1,7 @@ import re import hgvs +import hgvs.exceptions +import hgvs.variantmapper from . import utils as fn from .logger import Logger import copy @@ -36,7 +38,7 @@ def refseq_common_mistakes(variant): # NM_ NC_ NG_ NR_ p. if (variant.quibble.startswith('NM_') or variant.quibble.startswith('NR_') or variant.quibble.startswith('NC_') or - variant.quibble.startswith('NG_')) and variant.reftype == ':p.': + variant.quibble.startswith('NG_')) and variant.reftype == ':p.': error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is ' \ 'not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' variant.warnings.append(error) @@ -121,7 +123,6 @@ def structure_checks_c(variant, validator): structure checks for when reftype is coding :param variant: :param validator: - :param hn: :return: """ @@ -163,18 +164,21 @@ def structure_checks_c(variant, validator): identity_info = validator.hdp.get_tx_identity_info(variant.input_parses.ac) ref_start = identity_info[3] ref_end = identity_info[4] - if '-' in str(variant.input_parses.posedit.pos.start) and variant.input_parses.posedit.pos.start.offset == 0: + if '-' in str(variant.input_parses.posedit.pos.start) and \ + variant.input_parses.posedit.pos.start.offset == 0: # upstream positions boundary = -ref_start remainder = variant.input_parses.posedit.pos.start.base - boundary variant.input_parses.posedit.pos.start.base = boundary variant.input_parses.posedit.pos.start.offset = remainder - if '-' in str(variant.input_parses.posedit.pos.end) and variant.input_parses.posedit.pos.end.offset == 0: + if '-' in str(variant.input_parses.posedit.pos.end) and \ + variant.input_parses.posedit.pos.end.offset == 0: boundary = -ref_start remainder = variant.input_parses.posedit.pos.end.base - boundary variant.input_parses.posedit.pos.end.base = boundary variant.input_parses.posedit.pos.end.offset = remainder - if '*' in str(variant.input_parses.posedit.pos.start) and variant.input_parses.posedit.pos.start.offset == 0: + if '*' in str(variant.input_parses.posedit.pos.start) and \ + variant.input_parses.posedit.pos.start.offset == 0: # downstream positions tot_end_pos = str(variant.input_parses.posedit.pos.start).replace('*', '') ts_seq = validator.sf.fetch_seq(variant.input_parses.ac) @@ -182,7 +186,8 @@ def structure_checks_c(variant, validator): variant.input_parses.posedit.pos.start.base = boundary offset = int(tot_end_pos) - boundary variant.input_parses.posedit.pos.start.offset = offset - if '*' in str(variant.input_parses.posedit.pos.end) and variant.input_parses.posedit.pos.end.offset == 0: + if '*' in str(variant.input_parses.posedit.pos.end) and \ + variant.input_parses.posedit.pos.end.offset == 0: tot_end_pos = str(variant.input_parses.posedit.pos.end).replace('*', '') ts_seq = validator.sf.fetch_seq(variant.input_parses.ac) boundary = len(ts_seq) - ref_end @@ -232,7 +237,7 @@ def structure_checks_c(variant, validator): variant.input_parses = variant.evm.n_to_c(variant.input_parses) # Intronic positions in UTRs - if re.search(r'\d\-\d', str(variant.input_parses)) or re.search(r'\d\+\d', str(variant.input_parses)): + if re.search(r'\d-\d', str(variant.input_parses)) or re.search(r'\d\+\d', str(variant.input_parses)): # Can we go c-g-c try: to_genome = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, @@ -277,7 +282,7 @@ def structure_checks_c(variant, validator): variant.input_parses.posedit.pos.end.offset = offset report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, - variant.primary_assembly, variant.hn) + variant.primary_assembly, variant.hn) report_gen = variant.hn.normalize(report_gen) error = 'Using a transcript reference sequence to specify a variant position that lies ' \ 'outside of the reference sequence is not HGVS-compliant. Instead re-submit '\ @@ -301,7 +306,7 @@ def structure_checks_c(variant, validator): acs = '; '.join(gens) error = 'Cannot map ' + fn.valstr(variant.input_parses) + ' to a genomic position. '\ + variant.input_parses.ac + ' can only be partially aligned to genomic reference ' \ - 'sequences ' + acs + 'sequences ' + acs variant.warnings.append(error) Logger.warning(error) return True @@ -342,6 +347,7 @@ def structure_checks_c(variant, validator): # Create a specific minimal evm with no normalizer and no replace_reference # Have to use this method due to potential multi chromosome error, note normalizes but does not replace sequence + output = None try: output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant) except hgvs.exceptions.HGVSDataNotAvailableError: @@ -452,7 +458,8 @@ def structure_checks_n(variant, validator): to_n = variant.evm.c_to_n(variant.input_parses) actual_ref = to_n.posedit.edit.ref if called_ref != actual_ref: - error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + actual_ref + ')' + error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + \ + actual_ref + ')' variant.warnings.append(error) Logger.warning(str(error)) return True @@ -477,8 +484,8 @@ def structure_checks_n(variant, validator): remainder = remainder + 1 variant.input_parses.posedit.pos.end.base = boundary variant.input_parses.posedit.pos.end.offset = remainder - report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, - variant.hn) + report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, + variant.primary_assembly, variant.hn) report_gen = variant.hn.normalize(report_gen) error = 'Using a transcript reference sequence to specify a variant position that lies outside of' \ ' the reference sequence is not HGVS-compliant. Instead re-submit ' + fn.valstr(report_gen) @@ -496,7 +503,7 @@ def structure_checks_n(variant, validator): error = 'Using a transcript reference sequence to specify a variant position that lies outside of the ' \ 'reference sequence is not HGVS-compliant. Instead re-submit ' genomic_position = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, - variant.hn) + variant.hn) genomic_position = variant.hn.normalize(genomic_position) error = error + fn.valstr(genomic_position) variant.warnings.append(error) @@ -511,10 +518,10 @@ def structure_checks_n(variant, validator): error = str(e) if 'bounds' in error: try: - report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, - variant.hn) + report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, + variant.primary_assembly, variant.hn) report_gen = variant.hn.normalize(report_gen) - except hgvs.exceptions.HGVSError as e: + except hgvs.exceptions.HGVSError: fn.exceptPass() else: error = 'Using a transcript reference sequence to specify a variant position that lies outside of '\ @@ -533,20 +540,21 @@ def structure_checks_n(variant, validator): correction.posedit.pos.start = ed correction.posedit.pos.end = st error = error + ': Did you mean ' + str(correction) + '?' - # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end position ' + str(input_parses.posedit.pos.end) + # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end + # position ' + str(input_parses.posedit.pos.end) variant.warnings.append(error) Logger.warning(error) return True elif 'Cannot validate sequence of an intronic variant' in error: try: test_g = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, - variant.hn) + variant.hn) back_to_n = variant.evm.g_to_t(test_g, variant.input_parses.ac) except hgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, - variant.primary_assembly, variant.hn) + variant.primary_assembly, variant.hn) report_gen = variant.hn.normalize(report_gen) error = 'Using a transcript reference sequence to specify a variant position that lies ' \ 'outside of the reference sequence is not HGVS-compliant. Instead re-submit ' + \ @@ -556,10 +564,12 @@ def structure_checks_n(variant, validator): return True # Create a specific minimal evm with no normalizer and no replace_reference - # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace sequence + # Have to use this method due to potential multi chromosome error, note, normalizes but does not replace + # sequence + output = None try: output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant) - except hgvs.exceptions.HGVSDataNotAvailableError as e: + except hgvs.exceptions.HGVSDataNotAvailableError: errors = ['Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' 'Transcript Archive', 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' @@ -584,7 +594,7 @@ def structure_checks_n(variant, validator): ed = variant.input_parses.posedit.pos.end correction.posedit.pos.start = ed correction.posedit.pos.end = st - error = error + ': Did you mean ' + str(correction) + '?' + # error = error + ': Did you mean ' + str(correction) + '?' error = 'Interval start position ' + str( variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( variant.input_parses.posedit.pos.end) From 68681faff30b939741a2af91dc5d718af4989f5c Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 25 Jun 2019 10:07:33 +0100 Subject: [PATCH 159/223] Added tests for utils --- tests/test_utils.py | 364 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 364 insertions(+) create mode 100644 tests/test_utils.py diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..86204643 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,364 @@ +from unittest import TestCase +from VariantValidator.modules import utils +import hgvs.parser +import json + + +class TestHGNCRest(TestCase): + """Test the hgnc_rest function""" + + def test_non_json(self): + path = '' + with self.assertRaises(json.decoder.JSONDecodeError): + utils.hgnc_rest(path) + + def test_symbol(self): + path = '/fetch/symbol/NANOG' + output = utils.hgnc_rest(path) + self.assertIsInstance(output, dict) + self.assertEqual(list(output.keys()), ['record', 'error']) + self.assertNotEqual(output['record'], '') + self.assertEqual(output['error'], 'false') + self.assertIsInstance(output['record'], dict) + self.assertListEqual(list(output['record'].keys()), ['responseHeader', 'response']) + self.assertGreater(output['record']['response']['numFound'], 0) + + def test_symbol_wrong(self): + path = '/fetch/symbol/IAMNOTAGENE' + output = utils.hgnc_rest(path) + self.assertIsInstance(output, dict) + self.assertEqual(list(output.keys()), ['record', 'error']) + self.assertNotEqual(output['record'], '') + self.assertEqual(output['error'], 'false') + self.assertIsInstance(output['record'], dict) + self.assertListEqual(list(output['record'].keys()), ['responseHeader', 'response']) + self.assertEqual(output['record']['response']['numFound'], 0) + + +class TestValStr(TestCase): + """Test the valstr function""" + + def setUp(self): + self.hp = hgvs.parser.Parser() + + def test_string(self): + var = '' + with self.assertRaises(AttributeError): + utils.valstr(var) + + def test_variant_sub(self): + """ Will test that reference isn't removed """ + stringvar = 'NM_015120.4:c.34C>T' + var = self.hp.parse(stringvar) + output = utils.valstr(var) + self.assertEqual(var.posedit.edit.type, 'sub') + self.assertEqual(output, stringvar) + + def test_variant_identity(self): + """ Will test that the reference is removed """ + stringvar = 'NM_015120.4:c.34CG=' + var = self.hp.parse(stringvar) + output = utils.valstr(var) + self.assertEqual(var.posedit.edit.type, 'identity') + self.assertEqual(output, 'NM_015120.4:c.34=') + + def test_variant_identity2(self): + """ Will test that the reference is not removed """ + stringvar = 'NM_015120.4:c.34C=' + var = self.hp.parse(stringvar) + output = utils.valstr(var) + self.assertEqual(var.posedit.edit.type, 'identity') + self.assertEqual(output, 'NM_015120.4:c.34C=') + + +class TestProteinInv(TestCase): + """Test the pro_inv_info function""" + + def test_empty(self): + pro1 = '' + pro2 = '' + output = utils.pro_inv_info(pro1, pro2) + self.assertIsNone(output) + + def test_equal(self): + output = utils.pro_inv_info('MTACGP', 'MTACGP') + self.assertIsNone(output) + + def test_equal_with_ter(self): + output = utils.pro_inv_info('MTACGP*', 'MTACGP*') + self.assertIsNone(output) + + def test_unequal(self): + output = utils.pro_inv_info('MTACGP', 'MGCATP') + self.assertIsNone(output) + + def test_ref_has_ter(self): + output = utils.pro_inv_info('MTACGP*', 'MTACGPAL') + self.assertIsNone(output) + + def test_has_ter(self): + output = utils.pro_inv_info('MTACGP', 'MTACGP*') + self.assertIsInstance(output, dict) + print(output) + self.assertEqual(output['variant'], 'true') + self.assertEqual(output['terminate'], 'true') + self.assertEqual(output['ter_pos'], 7) + self.assertEqual(output['error'], 'false') + self.assertEqual(output['prot_del_seq'], '*') + self.assertEqual(output['prot_ins_seq'], '**') + self.assertEqual(output['edit_start'], 7) + self.assertEqual(output['edit_end'], 7) + + def test_has_ter_inv(self): + output = utils.pro_inv_info('MTATGLCGP*', 'MTALGTCGP*') + self.assertIsInstance(output, dict) + print(output) + self.assertEqual(output['variant'], 'true') + self.assertEqual(output['terminate'], 'true') + self.assertEqual(output['ter_pos'], 10) + self.assertEqual(output['error'], 'false') + self.assertEqual(output['prot_del_seq'], 'TGL') + self.assertEqual(output['prot_ins_seq'], 'LGT') + self.assertEqual(output['edit_start'], 4) + self.assertEqual(output['edit_end'], 6) + + def test_has_ter_sub(self): + output = utils.pro_inv_info('MTATCGP*', 'MTACCGP*') + self.assertIsInstance(output, dict) + print(output) + self.assertEqual(output['variant'], 'true') + self.assertEqual(output['terminate'], 'true') + self.assertEqual(output['ter_pos'], 8) + self.assertEqual(output['error'], 'false') + self.assertEqual(output['prot_del_seq'], 'T') + self.assertEqual(output['prot_ins_seq'], 'C') + self.assertEqual(output['edit_start'], 4) + self.assertEqual(output['edit_end'], 4) + + def test_has_ter_del(self): + output = utils.pro_inv_info('MTATCGP*', 'MTACGP*') + self.assertIsInstance(output, dict) + print(output) + self.assertEqual(output['variant'], 'true') + self.assertEqual(output['terminate'], 'true') + self.assertEqual(output['ter_pos'], 7) + self.assertEqual(output['error'], 'false') + self.assertEqual(output['prot_del_seq'], 'TCGP') + self.assertEqual(output['prot_ins_seq'], 'CGP*') + self.assertEqual(output['edit_start'], 4) + self.assertEqual(output['edit_end'], 7) + + def test_has_ter_ins(self): + output = utils.pro_inv_info('MTACGP*', 'MTATCGP*') + self.assertIsInstance(output, dict) + print(output) + self.assertEqual(output['variant'], 'true') + self.assertEqual(output['terminate'], 'true') + self.assertEqual(output['ter_pos'], 8) + self.assertEqual(output['error'], 'false') + self.assertEqual(output['prot_del_seq'], '*') + self.assertEqual(output['prot_ins_seq'], 'T*') + self.assertEqual(output['edit_start'], 4) + self.assertEqual(output['edit_end'], 4) + + +class TestProteinDelIns(TestCase): + """Test the pro_delins_info function""" + + def test_empty(self): + pro1 = '' + pro2 = '' + output = utils.pro_delins_info(pro1, pro2) + self.assertIsNone(output) + + def test_equal(self): + output = utils.pro_delins_info('MTACGP', 'MTACGP') + self.assertIsNone(output) + + def test_equal_with_ter(self): + output = utils.pro_delins_info('MTACGP*', 'MTACGP*') + self.assertIsNone(output) + + def test_unequal(self): + output = utils.pro_delins_info('MTACGP', 'MGCATP') + self.assertIsNone(output) + + def test_ref_has_ter(self): + output = utils.pro_delins_info('MTACGP*', 'MTACGPAL') + self.assertIsNone(output) + + def test_has_ter(self): + output = utils.pro_delins_info('MTACGP', 'MTACGP*') + self.assertIsInstance(output, dict) + print(output) + self.assertEqual(output['variant'], 'true') + self.assertEqual(output['terminate'], 'true') + self.assertEqual(output['ter_pos'], 7) + self.assertEqual(output['error'], 'false') + self.assertEqual(output['prot_del_seq'], '') + self.assertEqual(output['prot_ins_seq'], '*') + self.assertEqual(output['edit_start'], 7) + self.assertEqual(output['edit_end'], 6) + + def test_has_ter_inv(self): + output = utils.pro_delins_info('MTATGLCGP*', 'MTALGTCGP*') + self.assertIsInstance(output, dict) + print(output) + self.assertEqual(output['variant'], 'true') + self.assertEqual(output['terminate'], 'true') + self.assertEqual(output['ter_pos'], 10) + self.assertEqual(output['error'], 'false') + self.assertEqual(output['prot_del_seq'], 'TGL') + self.assertEqual(output['prot_ins_seq'], 'LGT') + self.assertEqual(output['edit_start'], 4) + self.assertEqual(output['edit_end'], 6) + + def test_has_ter_sub(self): + output = utils.pro_delins_info('MTATCGP*', 'MTACCGP*') + self.assertIsInstance(output, dict) + print(output) + self.assertEqual(output['variant'], 'true') + self.assertEqual(output['terminate'], 'true') + self.assertEqual(output['ter_pos'], 8) + self.assertEqual(output['error'], 'false') + self.assertEqual(output['prot_del_seq'], 'T') + self.assertEqual(output['prot_ins_seq'], 'C') + self.assertEqual(output['edit_start'], 4) + self.assertEqual(output['edit_end'], 4) + + def test_has_ter_del(self): + output = utils.pro_delins_info('MTATCGP*', 'MTACGP*') + self.assertIsInstance(output, dict) + print(output) + self.assertEqual(output['variant'], 'true') + self.assertEqual(output['terminate'], 'true') + self.assertEqual(output['ter_pos'], 8) + self.assertEqual(output['error'], 'false') + self.assertEqual(output['prot_del_seq'], 'T') + self.assertEqual(output['prot_ins_seq'], '') + self.assertEqual(output['edit_start'], 4) + self.assertEqual(output['edit_end'], 4) + + def test_has_ter_ins(self): + output = utils.pro_delins_info('MTACGP*', 'MTATCGP*') + self.assertIsInstance(output, dict) + print(output) + self.assertEqual(output['variant'], 'true') + self.assertEqual(output['terminate'], 'true') + self.assertEqual(output['ter_pos'], 8) + self.assertEqual(output['error'], 'false') + self.assertEqual(output['prot_del_seq'], '') + self.assertEqual(output['prot_ins_seq'], 'T') + self.assertEqual(output['edit_start'], 4) + self.assertEqual(output['edit_end'], 3) + + +class TestProteinSwap(TestCase): + """ Test one_to_three function """ + + def test_empty(self): + output = utils.one_to_three('') + self.assertEqual(output, '') + + def test_wrong(self): + with self.assertRaises(TypeError): + utils.one_to_three('Z') + + def test_single(self): + output = utils.one_to_three('A') + self.assertEqual(output, 'Ala') + + def test_wrong_pair(self): + with self.assertRaises(TypeError): + utils.one_to_three('AZ') + + def test_all(self): + output = utils.one_to_three('RKDEQNHSTYCWMAILFVPG*') + self.assertEqual(output, 'ArgLysAspGluGlnAsnHisSerThrTyrCysTrpMetAlaIleLeuPheValProGlyTer') + + +class TestNInversion(TestCase): + """ Test n_inversion function. To be honest this looks more like an del+ins """ + + def test_empty(self): + output = utils.n_inversion('', '', '', 0, 0) + self.assertEqual(output, '') + + def test_empty2(self): + """ + Warning this output might need checking. + Passing in 0 as first integer becomes -1 which has meaning! + """ + output = utils.n_inversion('ATGGAC', '', '', 0, 0) + self.assertEqual(output, 'ATGGAATGGAC') + + def test_empty3(self): + output = utils.n_inversion('ATGGAC', '', '', 1, 0) + self.assertEqual(output, 'ATGGAC') + + def test_correct(self): + output = utils.n_inversion('ATGGAC', 'GG', 'AA', 3, 4) + self.assertEqual(output, 'ATAAAC') + + def test_del_incorrect(self): + output = utils.n_inversion('ATGGAC', 'GC', 'AA', 3, 4) + self.assertEqual(output, 'error') + + def test_start_incorrect(self): + output = utils.n_inversion('ATGGAC', 'GG', 'AA', 2, 4) + self.assertEqual(output, 'error') + + def test_end_incorrect(self): + output = utils.n_inversion('ATGGAC', 'GG', 'AA', 3, 3) + self.assertEqual(output, 'error') + + def test_types(self): + with self.assertRaises(TypeError): + utils.n_inversion('ATGGAC', 'GG', 'AA', '3', 3) + + def test_types2(self): + with self.assertRaises(TypeError): + utils.n_inversion('ATGGAC', '', 0, 0, 0) + + def test_types3(self): + with self.assertRaises(TypeError): + utils.n_inversion(0, 0, 0, 0, 0) + + +class TestHGVSdup2indel(TestCase): + """ Will test the hgvs_dup2indel function""" + + def setUp(self): + self.hp = hgvs.parser.Parser() + + def test_empty(self): + with self.assertRaises(AttributeError): + utils.hgvs_dup2indel('') + + def test_sub(self): + stringseq = 'NM_015120.4:c.34C>T' + hgvsseq = self.hp.parse(stringseq) + output = utils.hgvs_dup2indel(hgvsseq) + self.assertIsInstance(output, str) + self.assertEqual(output, 'NM_015120.4:c.34_34delCinsCC') + + def test_del(self): + stringseq = 'NM_015120.4:c.34del' + hgvsseq = self.hp.parse(stringseq) + output = utils.hgvs_dup2indel(hgvsseq) + self.assertIsInstance(output, str) + self.assertEqual(output, 'NM_015120.4:c.34_34delins') + + def test_dup(self): + stringseq = 'NM_015120.4:c.34dupG' + hgvsseq = self.hp.parse(stringseq) + output = utils.hgvs_dup2indel(hgvsseq) + self.assertIsInstance(output, str) + self.assertEqual(output, 'NM_015120.4:c.34_34delGinsGG') + + def test_dup_pair(self): + stringseq = 'NM_015120.4:c.34dupGA' + hgvsseq = self.hp.parse(stringseq) + output = utils.hgvs_dup2indel(hgvsseq) + self.assertIsInstance(output, str) + self.assertEqual(output, 'NM_015120.4:c.34_34delGAinsGAGA') From e59447da44b2b0b5b2f5a95a04f931625d26b7b5 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 25 Jun 2019 10:21:04 +0100 Subject: [PATCH 160/223] Updated travis file --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1a135ce0..9f4c753e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,7 +45,7 @@ install: # Set up validator database - mysql validator < configuration/empty_vv_db.sql - - python bin/update_vdb.py + - update_vdb.py - df -h script: From 5143dfa7952088241e644a2b0588a4e2456c4cb4 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 25 Jun 2019 10:58:57 +0100 Subject: [PATCH 161/223] Updated travis file --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9f4c753e..84966c1d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,7 +41,7 @@ install: # Test dependencies - pip install -r requirements_dev.txt - - pip install . + - python setup.py install # Set up validator database - mysql validator < configuration/empty_vv_db.sql From e57c673b31c350833e8bd2f75f13c11e58139873 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 25 Jun 2019 15:15:56 +0100 Subject: [PATCH 162/223] Updated travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 84966c1d..4c0cdb5e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,7 +41,7 @@ install: # Test dependencies - pip install -r requirements_dev.txt - - python setup.py install + - pip install -e . # Set up validator database - mysql validator < configuration/empty_vv_db.sql From 30c8b8ca8a9dc6a3456ff68d29789d47c72e8ad1 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 25 Jun 2019 15:21:45 +0100 Subject: [PATCH 163/223] Added variant obj tests and made minor changes in obj --- VariantValidator/modules/variant.py | 7 +- tests/test_variant.py | 362 ++++++++++++++++++++++++++++ 2 files changed, 367 insertions(+), 2 deletions(-) create mode 100644 tests/test_variant.py diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 82966720..c2b6fcad 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -24,7 +24,10 @@ def __init__(self, original, quibble=None, warnings=None, write=True, primary_as if warnings is None: self.warnings = [] else: - self.warnings = warnings + if isinstance(warnings, list): + self.warnings = warnings + else: + self.warnings = [warnings] self.description = '' # hgnc_gene_info variable self.coding = '' self.coding_g = '' @@ -206,7 +209,7 @@ def process_warnings(self): refined = [] for warning in self.warnings: warning = re.sub('del[GATC][GATC][GATC][GATC]+', 'del', warning) - warning.strip() + warning = warning.strip() warning = warning.replace("'", "") if warning == '': continue diff --git a/tests/test_variant.py b/tests/test_variant.py new file mode 100644 index 00000000..da7157a7 --- /dev/null +++ b/tests/test_variant.py @@ -0,0 +1,362 @@ +from unittest import TestCase +from VariantValidator.modules.variant import Variant +from VariantValidator.modules.utils import VariantValidatorError + + +class TestCreation(TestCase): + """ + Test the creation and attributes of the Variant obj. + """ + + def test_create(self): + var = Variant('NM_015120.4:c.34=') + + self.assertIsInstance(var, Variant) + self.assertEqual(var.original, 'NM_015120.4:c.34=') + + def test_create_all_init(self): + var = Variant('NM_015120.4:c.34=', quibble='NM_015120.4:c.34=', warnings=['Got a warning'], write=False, + primary_assembly='GRCh37', order=1) + + self.assertEqual(var.quibble, 'NM_015120.4:c.34=') + self.assertEqual(var.warnings, ['Got a warning']) + self.assertFalse(var.write) + self.assertEqual(var.primary_assembly, 'GRCh37') + self.assertEqual(var.order, 1) + + def test_quibble_type(self): + var = Variant('NM_015120.4:c.34=', quibble=0) + self.assertEqual(var.quibble, 0) + + def test_quibble_not_set(self): + var = Variant('NM_015120.4:c.34=') + self.assertEqual(var.quibble, 'NM_015120.4:c.34=') + + def test_warnings_type(self): + var = Variant('NM_015120.4:c.34=', warnings='string') + self.assertEqual(var.warnings, ['string']) + + def test_warnings_type_int(self): + var = Variant('NM_015120.4:c.34=', warnings=0) + self.assertEqual(var.warnings, [0]) + + def test_warnings_not_set(self): + var = Variant('NM_015120.4:c.34=') + self.assertEqual(var.warnings, []) + + def test_write_type(self): + var = Variant('NM_015120.4:c.34=', write='banana') + self.assertEqual(var.write, 'banana') + + def test_write_not_set(self): + var = Variant('NM_015120.4:c.34=') + self.assertEqual(var.write, True) + + def test_primary_assembly_not_set(self): + var = Variant('NM_015120.4:c.34=') + self.assertEqual(var.primary_assembly, False) + + def test_order_not_set(self): + var = Variant('NM_015120.4:c.34=') + self.assertEqual(var.order, False) + + def test_all_defaults(self): + var = Variant('NM_015120.4:c.34=') + + self.assertEqual(var.hgvs_formatted, None) + self.assertEqual(var.hgvs_genomic, None) + self.assertEqual(var.hgvs_coding, None) + self.assertEqual(var.post_format_conversion, None) + self.assertEqual(var.pre_RNA_conversion, None) + self.assertEqual(var.input_parses, None) + + self.assertEqual(var.description, '') + self.assertEqual(var.coding, '') + self.assertEqual(var.coding_g, '') + self.assertEqual(var.genomic_r, '') + self.assertEqual(var.genomic_g, '') + self.assertEqual(var.protein, '') + self.assertEqual(var.output_type_flag, 'warning') + self.assertEqual(var.gene_symbol, '') + + self.assertEqual(var.timing, {}) + + self.assertEqual(var.refsource, None) + self.assertEqual(var.reftype, None) + + # Normalizers + self.assertEqual(var.hn, None) + self.assertEqual(var.reverse_normalizer, None) + self.assertEqual(var.evm, None) + self.assertEqual(var.no_norm_evm, None) + self.assertEqual(var.min_evm, None) + self.assertEqual(var.lose_vm, None) + + # Required for output + self.assertEqual(var.hgvs_transcript_variant, None) + self.assertEqual(var.genome_context_intronic_sequence, None) + self.assertEqual(var.refseqgene_context_intronic_sequence, None) + self.assertEqual(var.hgvs_refseqgene_variant, None) + self.assertEqual(var.hgvs_predicted_protein_consequence, None) + self.assertEqual(var.hgvs_lrg_transcript_variant, None) + self.assertEqual(var.hgvs_lrg_variant, None) + self.assertEqual(var.alt_genomic_loci, None) + self.assertEqual(var.primary_assembly_loci, None) + self.assertEqual(var.reference_sequence_records, None) + self.assertEqual(var.validated, False) + + +class TestMethods(TestCase): + """ Test each method in the Variant Obj""" + + def setUp(self): + self.var = Variant('NM_015120.4:c.34=') + + def test_is_ascii(self): + self.assertTrue(self.var.is_ascii()) + + def test_is_ascii_false(self): + self.var.quibble = 'NM_015120.4:c.34=\u0086' + self.assertFalse(self.var.is_ascii()) + + def test_is_ascii_false2(self): + self.var.quibble = 'NM_015120.4:c.34=†' + self.assertFalse(self.var.is_ascii()) + + def test_get_non_ascii(self): + chars, pos = self.var.get_non_ascii() + self.assertEqual(chars, []) + self.assertEqual(pos, []) + + def test_get_non_ascii_encoded(self): + self.var.quibble = 'NM_\u0086015120.4:c.34=' + chars, pos = self.var.get_non_ascii() + self.assertEqual(chars, ['†']) + self.assertEqual(pos, [4]) + + def test_get_non_ascii_decoded(self): + self.var.quibble = 'NM_015120.4:c.34=†' + chars, pos = self.var.get_non_ascii() + self.assertEqual(chars, ['†']) + self.assertEqual(pos, [18]) + + def test_get_non_ascii_pair(self): + self.var.quibble = 'NM_\u0086015120.†4:c.34=' + chars, pos = self.var.get_non_ascii() + self.assertEqual(chars, ['†', '†']) + self.assertEqual(pos, [4, 12]) + + def test_remove_whitespace(self): + self.var.remove_whitespace() + self.assertEqual(self.var.quibble, 'NM_015120.4:c.34=') + + def test_remove_whitespace_space(self): + self.var.quibble = 'NM_015120 .4:c. 34=' + self.var.remove_whitespace() + self.assertEqual(self.var.quibble, 'NM_015120.4:c.34=') + + def test_remove_whitespace_tab(self): + self.var.quibble = 'NM_015120.\t4:c.34 =' + self.var.remove_whitespace() + self.assertEqual(self.var.quibble, 'NM_015120.4:c.34=') + + def test_remove_whitespace_newline(self): + self.var.quibble = 'NM_015120.4:c\n.34=' + self.var.remove_whitespace() + self.assertEqual(self.var.quibble, 'NM_015120.4:c.34=') + + def test_format_quibble(self): + output = self.var.format_quibble() + self.assertEqual(self.var.quibble, 'NM_015120.4:c.34=') + self.assertFalse(output) + + def test_format_quibble_brackets(self): + self.var.quibble = 'NM_0151(REMOVE)20.4:c.34=' + output = self.var.format_quibble() + self.assertEqual(self.var.quibble, 'NM_015120.4:c.34=') + self.assertFalse(output) + + def test_format_quibble_source_fail(self): + self.var.quibble = 'F_015120.4:c.34=' + output = self.var.format_quibble() + self.assertTrue(output) + self.assertEqual(self.var.quibble, 'F_015120.4:c.34=') + + def test_format_quibble_type_fail(self): + self.var.quibble = 'NM_015120.4:w.34=' + output = self.var.format_quibble() + self.assertTrue(output) + self.assertEqual(self.var.quibble, 'NM_015120.4:w.34=') + + def test_set_reftype(self): + self.var.set_reftype() + self.assertEqual(self.var.reftype, ':c.') + + def test_set_reftype_rna(self): + self.var.quibble = 'NM_015120.4:r.34=' + self.var.set_reftype() + self.assertEqual(self.var.reftype, ':r.') + + def test_set_reftype_nucl(self): + self.var.quibble = 'NM_015120.4:n.34=' + self.var.set_reftype() + self.assertEqual(self.var.reftype, ':n.') + + def test_set_reftype_mito(self): + self.var.quibble = 'NM_015120.4:m.34=' + self.var.set_reftype() + self.assertEqual(self.var.reftype, ':m.') + + def test_set_reftype_genome(self): + self.var.quibble = 'NM_015120.4:g.34=' + self.var.set_reftype() + self.assertEqual(self.var.reftype, ':g.') + + def test_set_reftype_prot(self): + self.var.quibble = 'NM_015120.4:p.34=' + self.var.set_reftype() + self.assertEqual(self.var.reftype, ':p.') + + def test_set_reftype_est(self): + self.var.quibble = 'NM_015120.4:3.34=' + self.var.set_reftype() + self.assertEqual(self.var.reftype, 'est') + + def test_set_reftype_none(self): + self.var.quibble = 'NM_015120.4:.34=' + with self.assertRaises(VariantValidatorError): + self.var.set_reftype() + + def test_set_source(self): + self.var.set_refsource() + self.assertEqual(self.var.refsource, 'RefSeq') + + def test_set_source_refseq_min(self): + self.var.quibble = 'N' + self.var.set_refsource() + self.assertEqual(self.var.refsource, 'RefSeq') + + def test_set_source_lrg(self): + self.var.quibble = 'LRG_015120.4:c.34=' + self.var.set_refsource() + self.assertEqual(self.var.refsource, 'LRG') + + def test_set_source_lrg_min(self): + self.var.quibble = 'LRG' + self.var.set_refsource() + self.assertEqual(self.var.refsource, 'LRG') + + def test_set_source_ens(self): + self.var.quibble = 'ENSG_015120.4:c.34=' + self.var.set_refsource() + self.assertEqual(self.var.refsource, 'ENS') + + def test_set_source_ens_min(self): + self.var.quibble = 'ENS' + self.var.set_refsource() + self.assertEqual(self.var.refsource, 'ENS') + + def test_set_source_none(self): + self.var.quibble = 'SOMETHING ELSE' + with self.assertRaises(VariantValidatorError): + self.var.set_refsource() + + def test_set_quibble(self): + self.var.set_quibble('New:c.var') + self.assertEqual(self.var.quibble, 'New:c.var') + self.assertEqual(self.var.refsource, 'RefSeq') + self.assertEqual(self.var.reftype, ':c.') + + def test_is_obsolete(self): + self.assertFalse(self.var.is_obsolete()) + + def test_is_obsolete_false(self): + self.var.warnings = ['Nearly obso', 'lete'] + self.assertFalse(self.var.is_obsolete()) + + def test_is_obsolete_true(self): + self.var.warnings = ['obsoleteANDother'] + self.assertTrue(self.var.is_obsolete()) + + def test_process_warnings(self): + output = self.var.process_warnings() + self.assertIsInstance(output, list) + self.assertEqual(output, []) + + def test_process_warnings_sub(self): + self.var.warnings = ['variantdelATGCTAGCTA'] + output = self.var.process_warnings() + self.assertEqual(output, ['variantdel']) + + def test_process_warnings_sub_not(self): + self.var.warnings = ['variantdelATG'] + output = self.var.process_warnings() + self.assertEqual(output, ['variantdelATG']) + + def test_process_warnings_strip(self): + self.var.warnings = [' warning '] + output = self.var.process_warnings() + self.assertEqual(output, ['warning']) + + def test_process_warnings_replace(self): + self.var.warnings = ['\'warning\''] + output = self.var.process_warnings() + self.assertEqual(output, ['warning']) + + def test_process_warnings_unique(self): + self.var.warnings = ['one', 'two', 'one'] + output = self.var.process_warnings() + self.assertEqual(output, ['one', 'two']) + + def test_output_dict_empty(self): + output = self.var.output_dict() + self.assertIsInstance(output, dict) + self.assertEqual(output, { + 'submitted_variant': 'NM_015120.4:c.34=', + 'gene_symbol': '', + 'transcript_description': '', + 'hgvs_transcript_variant': None, + 'genome_context_intronic_sequence': None, + 'refseqgene_context_intronic_sequence': None, + 'hgvs_refseqgene_variant': None, + 'hgvs_predicted_protein_consequence': None, + 'validation_warnings': [], + 'hgvs_lrg_transcript_variant': None, + 'hgvs_lrg_variant': None, + 'alt_genomic_loci': None, + 'primary_assembly_loci': None, + 'reference_sequence_records': None, + }) + + def test_output_dict_set(self): + self.var.gene_symbol = 'Symbol' + self.var.description = 'Desc' + self.var.hgvs_transcript_variant = 'hgvsvar' + self.var.genome_context_intronic_sequence = 'gintronic' + self.var.refseqgene_context_intronic_sequence = 'rintronic' + self.var.hgvs_refseqgene_variant = 'refseq' + self.var.hgvs_predicted_protein_consequence = 'prot' + self.var.warnings = ['warning'] + self.var.hgvs_lrg_transcript_variant = 'lrgT' + self.var.hgvs_lrg_variant = 'lrg' + self.var.alt_genomic_loci = 'alt' + self.var.primary_assembly_loci = 'primary' + self.var.reference_sequence_records = 'records' + output = self.var.output_dict() + self.assertIsInstance(output, dict) + self.assertEqual(output, { + 'submitted_variant': 'NM_015120.4:c.34=', + 'gene_symbol': 'Symbol', + 'transcript_description': 'Desc', + 'hgvs_transcript_variant': 'hgvsvar', + 'genome_context_intronic_sequence': 'gintronic', + 'refseqgene_context_intronic_sequence': 'rintronic', + 'hgvs_refseqgene_variant': 'refseq', + 'hgvs_predicted_protein_consequence': 'prot', + 'validation_warnings': ['warning'], + 'hgvs_lrg_transcript_variant': 'lrgT', + 'hgvs_lrg_variant': 'lrg', + 'alt_genomic_loci': 'alt', + 'primary_assembly_loci': 'primary', + 'reference_sequence_records': 'records', + }) From 88ee239db679d90f9e980a7999647dc2e980b281 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 25 Jun 2019 16:51:41 +0100 Subject: [PATCH 164/223] Removed a couple of unnecessary methods from database obj --- VariantValidator/modules/vvDBInit.py | 12 ++++++------ VariantValidator/modules/vvDBInsert.py | 8 -------- VariantValidator/modules/vvDatabase.py | 4 ++-- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/VariantValidator/modules/vvDBInit.py b/VariantValidator/modules/vvDBInit.py index cae320b2..80a2db51 100644 --- a/VariantValidator/modules/vvDBInit.py +++ b/VariantValidator/modules/vvDBInit.py @@ -1,17 +1,17 @@ import mysql.connector from mysql.connector.pooling import MySQLConnectionPool -import os -class Mixin(): - ''' + +class Mixin: + """ A mixin containing the database initialisation routines. - ''' - def __init__(self, dbConfig): + """ + def __init__(self, db_config): self.conn = None # self.cursor will be none UNLESS you're wrapping a function in @handleCursor, which automatically opens and # closes connections for you. self.cursor = None - self.dbConfig = dbConfig + self.dbConfig = db_config self.pool = mysql.connector.pooling.MySQLConnectionPool(pool_size=10, **self.dbConfig) self.conn = self.pool.get_connection() diff --git a/VariantValidator/modules/vvDBInsert.py b/VariantValidator/modules/vvDBInsert.py index 9021bf42..dd09db91 100644 --- a/VariantValidator/modules/vvDBInsert.py +++ b/VariantValidator/modules/vvDBInsert.py @@ -6,12 +6,6 @@ class Mixin(vvDBGet.Mixin): """ This object is a function container for inserting objects into the database. """ - # Add new entry - def add_entry(self, entry, data, table): - return self.insert(entry, data, table) - - def insert_transcript_loci(self, add_data, primary_assembly): - return self.insert_transcript_loci(add_data, primary_assembly) @handleCursor def insert(self, entry, data, table): @@ -116,5 +110,3 @@ def update_refseq_gene_data(self, rsg_data): self.conn.commit() return success - def update_entry(self, entry, data): - return self.update(entry, data) diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 3b30d794..742c2d06 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -142,10 +142,10 @@ def update_transcript_info_record(self, accession, validator): returned_data = self.in_entries(version, table) # If the entry is not in the database add it if 'none' in returned_data: - self.add_entry(version, query_info, table) + self.insert(version, query_info, table) # If the data in the entry has changed, update it else: - self.update_entry(version, query_info) + self.update(version, query_info) return def update_refseqgene_loci(self, rsg_data): From 26e7abda9b59271d19b71112689fd23a1ab0fdeb Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 8 Jul 2019 12:31:10 +0100 Subject: [PATCH 165/223] Fixed error with missing gene_symbol when database is empty --- VariantValidator/modules/vvMixinCore.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 2adc92e4..87bc302f 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -1005,6 +1005,7 @@ def _get_transcript_info(self, variant): Logger.warning(error) return True variant.description = entry['description'] + variant.gene_symbol = entry['hgnc_symbol'] else: variant.description = entry['description'] # If the none key is found add the description to the database @@ -1020,6 +1021,7 @@ def _get_transcript_info(self, variant): Logger.warning(error) return True variant.description = entry['description'] + variant.gene_symbol = entry['hgnc_symbol'] # If no correct keys are found else: From 547b17fdec6c85c4544411f7e0abab9595eb4847 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 8 Jul 2019 14:24:38 +0100 Subject: [PATCH 166/223] Changed default value for liftover in config to be lowercase and edited manual --- VariantValidator/modules/liftover.py | 2 +- configuration/default.ini | 2 +- configuration/travis.ini | 2 +- docs/MANUAL.md | 9 ++++++--- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/VariantValidator/modules/liftover.py b/VariantValidator/modules/liftover.py index ef721c06..6324b7aa 100644 --- a/VariantValidator/modules/liftover.py +++ b/VariantValidator/modules/liftover.py @@ -223,7 +223,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va from_vcf = hgvs_utils.report_hgvs2vcf(hgvs_genomic, lo_from, reverse_normalizer, validator.sf) pyliftover_dir = None - if validator.liftoverPath is not None and validator.liftoverPath != '/PATH/TO/LIFTOVER': + if validator.liftoverPath is not None and validator.liftoverPath != '/path/to/liftover': pyliftover_dir = validator.liftoverPath if pyliftover_dir is not None: diff --git a/configuration/default.ini b/configuration/default.ini index 9e1f2960..50ca29a2 100644 --- a/configuration/default.ini +++ b/configuration/default.ini @@ -29,7 +29,7 @@ trace = false entrezid = admin@variantvalidator.org [liftover] -location = /PATH/TO/LIFTOVER +location = /path/to/liftover # diff --git a/configuration/travis.ini b/configuration/travis.ini index 3a67ae52..3840a517 100644 --- a/configuration/travis.ini +++ b/configuration/travis.ini @@ -29,7 +29,7 @@ trace = false entrezid = admin@variantvalidator.org [liftover] -location = /PATH/TO/LIFTOVER +location = /path/to/liftover # diff --git a/docs/MANUAL.md b/docs/MANUAL.md index 7a9cd984..f129ec6e 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -40,14 +40,17 @@ trace = false entrezid = admin@variantvalidator.org [liftover] -location = /PATH/TO/LIFTOVER +location = /path/to/liftover ``` -The values in capitals must be replaced for Variant Validator to run, -except for the liftover path, which is optional. +The values in capitals must be replaced for Variant Validator to run. By default the edited configuration will be placed in the users home directory (`~/.variantvalidator`), this location can be changed for all users by editing the `VariantValidator/settings.py` file. +#####Liftover + +If the UCSC Liftover [files](http://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/) have been previously downloaded their location can be set within the configuration file. By default the necessary files will be downloaded automatically when first requested. + ## Database updates To import the initial data into the Validator MySQL database, run the following script: From 1ccd779528cb4bb5f8f488c0104ce758bb57e40f Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 8 Jul 2019 15:08:18 +0100 Subject: [PATCH 167/223] Changed version number file reading so it should install better --- VERSION.txt | 1 - VariantValidator/version.py | 5 +---- setup.py | 7 ++++++- 3 files changed, 7 insertions(+), 6 deletions(-) delete mode 100644 VERSION.txt diff --git a/VERSION.txt b/VERSION.txt deleted file mode 100644 index abfc95d6..00000000 --- a/VERSION.txt +++ /dev/null @@ -1 +0,0 @@ -0.9a diff --git a/VariantValidator/version.py b/VariantValidator/version.py index 7281e05e..1f356cc5 100644 --- a/VariantValidator/version.py +++ b/VariantValidator/version.py @@ -1,4 +1 @@ -import os - -with open(os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'VERSION.txt')) as fh: - __version__ = fh.read().strip() +__version__ = '1.0.0' diff --git a/setup.py b/setup.py index 1e457085..8c8b07b5 100644 --- a/setup.py +++ b/setup.py @@ -3,9 +3,14 @@ # Prefer setuptools over distutils from setuptools import setup, find_packages +with open('VariantValidator/version.py') as ins: + version = ins.read() + version = version.split('=')[1].strip() + version = version.replace("'", "") + setup( name='VariantValidator', - version=open('VERSION.txt').read().strip(), + version=version, description='API for accurate, mapping and formatting of sequence variants using HGVS nomenclature', long_description=open('README.md').read(), url='https://variantvalidator.org', From 7588fae0ef28174ed590a3fae32dd820ad4f5f69 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 8 Jul 2019 15:08:45 +0100 Subject: [PATCH 168/223] Updated travis file --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4c0cdb5e..84966c1d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,7 +41,7 @@ install: # Test dependencies - pip install -r requirements_dev.txt - - pip install -e . + - python setup.py install # Set up validator database - mysql validator < configuration/empty_vv_db.sql From a9351a28ca9bc9dc8d1cda92665ec6e33c9cd98e Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 8 Jul 2019 16:21:09 +0100 Subject: [PATCH 169/223] Updated test following RefSeq LRG update --- tests/test_inputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_inputs.py b/tests/test_inputs.py index 0a207ce6..f558b549 100644 --- a/tests/test_inputs.py +++ b/tests/test_inputs.py @@ -17535,7 +17535,7 @@ def test_variant320(self): assert results['NM_000166.5:c.-101C>T']['refseqgene_context_intronic_sequence'] == '' self.assertCountEqual(results['NM_000166.5:c.-101C>T']['alt_genomic_loci'], []) assert results['NM_000166.5:c.-101C>T']['gene_symbol'] == 'GJB1' - assert results['NM_000166.5:c.-101C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000157.1:p.?', 'slr': 'NP_000157.1:p.?'} + assert results['NM_000166.5:c.-101C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000157.1(LRG_245p2):p.?', 'slr': 'NP_000157.1:p.?'} assert results['NM_000166.5:c.-101C>T']['submitted_variant'] == 'X-70443101-C-T' assert results['NM_000166.5:c.-101C>T']['genome_context_intronic_sequence'] == '' assert results['NM_000166.5:c.-101C>T']['hgvs_lrg_variant'] == '' From 3091f4f59d147cfd82efdf938b1f7335f2c0a8be Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 9 Jul 2019 09:01:38 +0100 Subject: [PATCH 170/223] Removed old version file from manifest --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index ecaf8cb5..5e0852cd 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ recursive-include configuration * -include README.md VERSION.txt +include README.md recursive-include bin * \ No newline at end of file From f409a9165ba16eff5e0d07e4762f2e5bc00c7c56 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 9 Jul 2019 09:03:52 +0100 Subject: [PATCH 171/223] Replaced urllib with requests --- VariantValidator/modules/utils.py | 23 ++---- VariantValidator/modules/vvMixinConverters.py | 4 -- VariantValidator/update_vv_db.py | 70 +++++-------------- environment.yml | 1 + requirements.txt | 1 + requirements_dev.txt | 1 + setup.py | 1 + 7 files changed, 28 insertions(+), 73 deletions(-) diff --git a/VariantValidator/modules/utils.py b/VariantValidator/modules/utils.py index 900d5d1d..c0286b1e 100644 --- a/VariantValidator/modules/utils.py +++ b/VariantValidator/modules/utils.py @@ -1,8 +1,6 @@ from Bio.Seq import Seq from Bio.Alphabet import IUPAC -import httplib2 as http -import json -from urllib.parse import urlparse # Python 3 +import requests import functools import traceback import sys @@ -34,21 +32,14 @@ def hgnc_rest(path): headers = { 'Accept': 'application/json', } - uri = 'http://rest.genenames.org' - target = urlparse(uri + path) - method = 'GET' - body = '' - h = http.Http() - # collect the response - response, content = h.request( - target.geturl(), - method, - body, - headers) - if response['status'] == '200': + domain = 'http://rest.genenames.org' + url = domain + path + r = requests.get(url, headers=headers) + + if r.status_code == 200: # assume that content is a json reply # parse content with the json module - data['record'] = json.loads(content) + data['record'] = r.json() else: data['error'] = "Unable to contact the HGNC database: Please try again later" return data diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 2191a8d3..c1a23f4d 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -1,5 +1,4 @@ import re -import sys import copy from .logger import Logger import hgvs @@ -7,9 +6,6 @@ from . import vvMixinInit from . import seq_data from . import hgvs_utils -from urllib.parse import urlparse -import httplib2 as http -import json from Bio import Entrez,SeqIO from . import utils as fn diff --git a/VariantValidator/update_vv_db.py b/VariantValidator/update_vv_db.py index 6ee8cd96..a3fb49ab 100644 --- a/VariantValidator/update_vv_db.py +++ b/VariantValidator/update_vv_db.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- - -import os -import urllib.request, urllib.error, urllib.parse +import requests import copy from configparser import ConfigParser from .modules import vvDatabase @@ -12,7 +10,7 @@ def connect(): config = ConfigParser() config.read(configure.CONFIG_DIR) - dbConfig = { + db_config = { 'user': config["mysql"]["user"], 'password': config["mysql"]["password"], 'host': config["mysql"]["host"], @@ -20,7 +18,7 @@ def connect(): 'raise_on_warnings': True } # Create database access objects - db = vvDatabase.Database(dbConfig) + db = vvDatabase.Database(db_config) return db @@ -45,39 +43,21 @@ def update(): def update_refseq(dbcnx): print('Updating RefSeqGene no Missmatch MySQL data') - # Set os path - # Set up os paths data and log folders - ROOT = os.path.dirname(os.path.abspath(__file__)) # Download data from RefSeqGene # Download data - rsg = urllib.request.Request('http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/gene_RefSeqGene') - response = urllib.request.urlopen(rsg) - rsg_file = response.read() - rsg_data_line = rsg_file.split(b'\n') - rsg_data = [] - for data in rsg_data_line: - rsg_data.append(data.decode()) + rsg = requests.get('http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/gene_RefSeqGene') + rsg_data = rsg.text.strip().split('\n') # Download data - grch37 = urllib.request.Request( + grch37 = requests.get( 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.25_refseqgene_alignments.gff3') - response = urllib.request.urlopen(grch37) - grch37_file = response.read() - grch37_data_line = grch37_file.split(b'\n') - grch37_align_data = [] - for data in grch37_data_line: - grch37_align_data.append(data.decode()) + grch37_align_data = grch37.text.strip().split('\n') # Download data - grch38 = urllib.request.Request( + grch38 = requests.get( 'http://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/GCF_000001405.28_refseqgene_alignments.gff3') - response = urllib.request.urlopen(grch38) - grch38_file = response.read() - grch38_data_line = grch38_file.split(b'\n') - grch38_align_data = [] - for data in grch38_data_line: - grch38_align_data.append(data.decode()) + grch38_align_data = grch38.text.strip().split('\n') # Open Lists # rsg_data = open(os.path.join(ROOT, 'gene_RefSeqGene'), 'r') @@ -125,7 +105,6 @@ def update_refseq(dbcnx): elif ng_nc != 'failed': grch38_align.append(ng_nc) - # Create a data array containing the database db = [] # map line @@ -195,31 +174,16 @@ def update_refseq(dbcnx): def update_lrg(dbcnx): print('Updating LRG lookup tables') - lr2rs_download = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt') - # Open and read - lr2rs_data = urllib.request.urlopen(lr2rs_download) - lr2rs = lr2rs_data.read() - # List the data - lr2rs = lr2rs.strip().decode() - lr2rs = lr2rs.split('\n') + lr2rs_download = requests.get('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt') + lr2rs = lr2rs_download.text.strip().split('\n') # Download - lrg_status_download = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_GRCh38.txt') - # Open and read - lrg_status_data = urllib.request.urlopen(lrg_status_download) - lrg_status = lrg_status_data.read() - # List the data - lrg_status = lrg_status.strip().decode() - lrg_status = lrg_status.split('\n') + lrg_status_download = requests.get('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_GRCh38.txt') + lrg_status = lrg_status_download.text.strip().split('\n') # Download LRG transcript (_t) to LRG Protein (__p) data file - lr_t2p_downloaded = urllib.request.Request('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_proteins_RefSeq.txt') - # Open and read - lr_t2p_data = urllib.request.urlopen(lr_t2p_downloaded) - lr_t2p = lr_t2p_data.read() - # List the data - lr_t2p = lr_t2p.strip().decode() - lr_t2p = lr_t2p.split('\n') + lr_t2p_downloaded = requests.get('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_proteins_RefSeq.txt') + lr_t2p = lr_t2p_downloaded.text.strip().split('\n') # Dictionary the status by LRG_ID lrg_status_dict = {} @@ -256,9 +220,9 @@ def update_lrg(dbcnx): dbcnx.update_lrg_rs_lookup(lrg_rs_lookup) # lrg_t2nm_ - lrgtx_to_rstID = [lrg_tx, rstid] + lrgtx_to_rst_id = [lrg_tx, rstid] # update database - dbcnx.update_lrgt_rst(lrgtx_to_rstID) + dbcnx.update_lrgt_rst(lrgtx_to_rst_id) print('Update LRG protein lookup table') # Populate LRG protein RefSeqProtein lokup table diff --git a/environment.yml b/environment.yml index 8296266a..36fc75f6 100644 --- a/environment.yml +++ b/environment.yml @@ -11,6 +11,7 @@ dependencies: - numpy - httplib2>=0.9.0 - configparser>=3.5.0 + - requests - pip: - git+https://github.com/openvar/vv_hgvs@master#egg=hgvs - biotools>=0.3.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 44abbe98..09a472c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ configparser>=3.5.0 pyliftover>=0.3 biotools>=0.3.0 mysql-connector-python +requests diff --git a/requirements_dev.txt b/requirements_dev.txt index 0c691c7c..3a673ff7 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -5,6 +5,7 @@ configparser>=3.5.0 pyliftover>=0.3 biotools>=0.3.0 mysql-connector-python +requests pytest>=3.6 pytest-cov codecov \ No newline at end of file diff --git a/setup.py b/setup.py index 8c8b07b5..729d329b 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,7 @@ "configparser >= 3.5.0", "pyliftover >= 0.3", "biotools >= 0.3.0", + "requests", ], dependency_links=[ "git+https://github.com/openvar/vv_hgvs@master#egg=hgvs" From 4fa123532998eabcfc2c29173738711985843245 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 10 Jul 2019 08:51:13 +0100 Subject: [PATCH 172/223] Changed process for finding transcript info, now distinguishes between obsolete accession and database connection errors --- VariantValidator/modules/utils.py | 8 ++ VariantValidator/modules/vvDatabase.py | 101 ++++++++++++------------ VariantValidator/modules/vvMixinCore.py | 23 +++--- 3 files changed, 73 insertions(+), 59 deletions(-) diff --git a/VariantValidator/modules/utils.py b/VariantValidator/modules/utils.py index c0286b1e..ec3c0a42 100644 --- a/VariantValidator/modules/utils.py +++ b/VariantValidator/modules/utils.py @@ -446,3 +446,11 @@ class mergeHGVSerror(Exception): class alleleVariantError(Exception): pass + + +class DatabaseConnectionError(Exception): + pass + + +class ObsoleteSeqError(Exception): + pass diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 742c2d06..1492667f 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -3,6 +3,7 @@ from .utils import handleCursor from . import vvDBInsert import re +import hgvs.exceptions class Database(vvDBInsert.Mixin): @@ -74,67 +75,67 @@ def update_transcript_info_record(self, accession, validator): """ Search Entrez for corresponding record for the RefSeq ID """ - # Prime these entries, just in case. - previous_entry = self.in_entries(accession, 'transcript_info') - accession = accession - if 'none' not in previous_entry.keys(): - description = previous_entry['description'] - variant = previous_entry['variant'] - version = previous_entry['version'] - hgnc_symbol = previous_entry['hgnc_symbol'] - uta_symbol = previous_entry['uta_symbol'] + try: record = validator.entrez_efetch(db="nucleotide", id=accession, rettype="gb", retmode="text") - version = record.id - description = record.description + except IOError: + raise utils.DatabaseConnectionError("Cannot retrieve data from NCBI Entrez") + + version = record.id + description = record.description + + if 'comment' in record.annotations: + comment = record.annotations['comment'] + if 'WARNING' in comment and 'this sequence was replaced by' in comment: + raise utils.ObsoleteSeqError("Sequence is obsolete in NCBI Entrez record") + + if 'transcript variant' in description: + tv = re.search(r'transcript variant \w+', description) + tv = str(tv.group(0)) + tv = tv.replace('transcript variant', '') + variant = tv.strip() + variant = variant.upper() # Some tv descriptions are a or A + else: variant = '0' - if 'transcript variant' in description: - tv = re.search(r'transcript variant \w+', description) - tv = str(tv.group(0)) - tv = tv.replace('transcript variant', '') - variant = tv.strip() - variant = variant.upper() # Some tv descriptions are a or A - else: - variant = '0' - - # Get information from UTA + # Get information from UTA + try: + uta_info = validator.hdp.get_tx_identity_info(version) + except hgvs.exceptions.HGVSDataNotAvailableError: + version_ac_ver = version.split('.') + version = version_ac_ver[0] + '.' + str(int(version_ac_ver[1]) - 1) try: uta_info = validator.hdp.get_tx_identity_info(version) - except: - version_ac_ver = version.split('.') - version = version_ac_ver[0] + '.' + str(int(version_ac_ver[1]) - 1) - uta_info = validator.hdp.get_tx_identity_info(version) + except hgvs.exceptions.HGVSDataNotAvailableError: + raise utils.DatabaseConnectionError("Cannot retrieve data from UTA database") - uta_symbol = str(uta_info[6]) + uta_symbol = str(uta_info[6]) + if uta_symbol == '': + raise utils.ObsoleteSeqError("Cannot find UTA symbol, accession is likely obsolete") - # First perform a search against the input gene symbol or the symbol inferred from UTA - initial = utils.hgnc_rest(path="/fetch/symbol/" + uta_symbol) - # Check for a record - if str(initial['record']['response']['numFound']) != '0': - hgnc_symbol = uta_symbol - # No record found, is it a previous symbol? - else: - # Search hgnc rest to see if symbol is out of date - rest_data = utils.hgnc_rest(path="/search/prev_symbol/" + uta_symbol) - # If the name is correct no record will be found - if rest_data['error'] == 'false': - if int(rest_data['record']['response']['numFound']) == 0: - hgnc_symbol = uta_info[6] - else: - hgnc_symbol = rest_data['record']['response']['docs'][0]['symbol'] - else: - hgnc_symbol = 'unassigned' + # First perform a search against the input gene symbol or the symbol inferred from UTA + initial = utils.hgnc_rest(path="/fetch/symbol/" + uta_symbol) + + if initial['error'] != 'false': + raise utils.DatabaseConnectionError("Unable to retrieve data from the HGNC database") - # List of connection error types. May need to be expanded. - # Outcome - Put off update for 3 months! - except Exception as e: - if not str(e) == '': - # Issues with DNSSEC for the nih.gov - raise + # Check for a record + if str(initial['record']['response']['numFound']) != '0': + hgnc_symbol = uta_symbol + # No record found, is it a previous symbol? + else: + # Search hgnc rest to see if symbol is out of date + rest_data = utils.hgnc_rest(path="/search/prev_symbol/" + uta_symbol) + # If the name is correct no record will be found + if rest_data['error'] == 'false': + if int(rest_data['record']['response']['numFound']) == 0: + hgnc_symbol = uta_info[6] + else: + hgnc_symbol = rest_data['record']['response']['docs'][0]['symbol'] + else: + hgnc_symbol = 'unassigned' # Query information - # query_info = [accession, description, variant, version, hgnc_symbol, uta_symbol] query_info = [version, description, variant, version, hgnc_symbol, uta_symbol] table = 'transcript_info' diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 87bc302f..077fd1e5 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -997,10 +997,13 @@ def _get_transcript_info(self, variant): variant.warnings.append(error) Logger.warning(error) return True - except Exception: - error = 'Unable to assign transcript identity records to ' + accession + \ - ', potentially an obsolete record or there is an issue retrieving data from NCBI. ' \ - 'Please try again later and if the problem persists contact admin' + except fn.ObsoleteSeqError as e: + error = 'Unable to assign transcript identity records to %s. %s' % (accession, str(e)) + variant.warnings.append(error) + Logger.warning(error) + return True + except fn.DatabaseConnectionError as e: + error = '%s. Please try again later and if the problem persists contact admin.' % str(e) variant.warnings.append(error) Logger.warning(error) return True @@ -1012,11 +1015,13 @@ def _get_transcript_info(self, variant): elif 'none' in entry: try: entry = self.db.data_add(accession=accession, validator=self) - except Exception as e: - Logger.warning(str(e)) - error = 'Unable to assign transcript identity records to ' + accession + \ - ', potentially an obsolete record or there is an issue retrieving data from NCBI. ' \ - 'Please try again later and if the problem persists contact admin' + except fn.ObsoleteSeqError as e: + error = 'Unable to assign transcript identity records to %s. %s' % (accession, str(e)) + variant.warnings.append(error) + Logger.warning(error) + return True + except fn.DatabaseConnectionError as e: + error = '%s. Please try again later and if the problem persists contact admin.' % str(e) variant.warnings.append(error) Logger.warning(error) return True From d33ef414d8ecf2e3050f43e7dc30b5ddae00d27b Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 10 Jul 2019 08:52:32 +0100 Subject: [PATCH 173/223] Changed output flag to default to warning and only alter it to gene_variant or intergenic --- VariantValidator/modules/valoutput.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index fdb700e5..73c49dd3 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -12,7 +12,7 @@ def __init__(self, outputlist, validator): self.validator = validator def format_as_dict(self, with_meta=True): - validation_output = {'flag': None} + validation_output = {'flag': 'warning'} validation_error_counter = 0 validation_obsolete_counter = 0 @@ -47,7 +47,7 @@ def format_as_dict(self, with_meta=True): # Gene symbol as reference sequence # Gene as transcript reference sequence if variant.output_type_flag == 'warning': - validation_output['flag'] = 'warning' + # validation_output['flag'] = 'warning' if variant.warnings == ['Validation error']: validation_error_counter = validation_error_counter + 1 identification_key = 'validation_error_%s' % validation_error_counter From 3b258d13b3a71e3e20c996a55aafbe062bf64b0f Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 10 Jul 2019 09:13:09 +0100 Subject: [PATCH 174/223] Removed check on status in Entrez gene record as only want to make obsolete sequence that also aren't in UTA --- VariantValidator/modules/vvDatabase.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 1492667f..41247647 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -84,10 +84,11 @@ def update_transcript_info_record(self, accession, validator): version = record.id description = record.description - if 'comment' in record.annotations: - comment = record.annotations['comment'] - if 'WARNING' in comment and 'this sequence was replaced by' in comment: - raise utils.ObsoleteSeqError("Sequence is obsolete in NCBI Entrez record") + # Although it is obsolete, might still be in UTA database so would work in our case + # if 'comment' in record.annotations: + # comment = record.annotations['comment'] + # if 'WARNING' in comment and 'this sequence was replaced by' in comment: + # raise utils.ObsoleteSeqError("Sequence is obsolete in NCBI Entrez record") if 'transcript variant' in description: tv = re.search(r'transcript variant \w+', description) From 9fdc9220321fab2adc76cc0604aac17e90b88bc1 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 11 Jul 2019 11:40:49 +0100 Subject: [PATCH 175/223] Complete overhaul of logging process --- VariantValidator/__init__.py | 1 + VariantValidator/logger.py | 16 ++ VariantValidator/modules/format_converters.py | 136 ++++++++-------- VariantValidator/modules/gapped_mapping.py | 121 +++++++------- VariantValidator/modules/liftover.py | 12 +- VariantValidator/modules/logger.py | 154 ------------------ VariantValidator/modules/mappers.py | 91 ++++++----- VariantValidator/modules/use_checking.py | 122 +++++++------- VariantValidator/modules/utils.py | 19 +-- VariantValidator/modules/valoutput.py | 12 +- VariantValidator/modules/vvDBGet.py | 8 +- VariantValidator/modules/vvDatabase.py | 6 +- VariantValidator/modules/vvMixinConverters.py | 87 +++++----- VariantValidator/modules/vvMixinCore.py | 109 ++++++------- VariantValidator/modules/vvMixinInit.py | 14 -- VariantValidator/settings.py | 37 +++++ VariantValidator/update_vv_db.py | 21 ++- configuration/default.ini | 12 +- configuration/travis.ini | 12 +- docs/MANUAL.md | 16 +- 20 files changed, 443 insertions(+), 563 deletions(-) create mode 100644 VariantValidator/logger.py delete mode 100644 VariantValidator/modules/logger.py diff --git a/VariantValidator/__init__.py b/VariantValidator/__init__.py index c3d9eec2..89d852d8 100644 --- a/VariantValidator/__init__.py +++ b/VariantValidator/__init__.py @@ -1,4 +1,5 @@ from . import configure +from . import logger from .version import __version__ from .variantValidator import Validator diff --git a/VariantValidator/logger.py b/VariantValidator/logger.py new file mode 100644 index 00000000..dbee415b --- /dev/null +++ b/VariantValidator/logger.py @@ -0,0 +1,16 @@ +import logging.config +from configparser import ConfigParser +from . import settings + +# Change settings based on config +config = ConfigParser() +config.read(settings.CONFIG_DIR) + +if config['logging'].getboolean('log') is True: + settings.LOGGING_CONFIG['handlers']['console']['level'] = config['logging']['console'].upper() + settings.LOGGING_CONFIG['handlers']['file']['level'] = config['logging']['file'].upper() + + logging.config.dictConfig(settings.LOGGING_CONFIG) +else: + logging.getLogger('VariantValidator').addHandler(logging.NullHandler()) + diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index eb721aa8..2bae3844 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -1,11 +1,13 @@ import re import hgvs.exceptions import copy -from .logger import Logger +import logging from .variant import Variant from . import seq_data from . import utils as fn +logger = logging.getLogger(__name__) + def initial_format_conversions(variant, validator, select_transcripts_dict_plus_version): # VCF type 1 @@ -81,12 +83,12 @@ def vcf2hgvs_stage1(variant, validator): pre_input = variant.quibble vcf_elements = pre_input.split('-') variant.warnings = ['Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + - pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + - pre_input + ' as ALT = REF'] + pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + + pre_input + ' as ALT = REF'] variant.warnings.append('VariantValidator has output both alternatives') - Logger.resub('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat ' + - pre_input + ' as a deletion whereas VCF specification 4.1 onwards would treat ' + pre_input + - ' as ALT = REF. Validator will output both alternatives.') + logger.info('Not stating ALT bases is ambiguous because VCF specification 4.0 would treat %s as a deletion ' + 'whereas VCF specification 4.1 onwards would treat %s as ALT = REF. Validator will output ' + 'both alternatives.', pre_input, pre_input) variant.write = False input_a = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], 'del') input_b = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[2]) @@ -96,6 +98,8 @@ def vcf2hgvs_stage1(variant, validator): primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query_a) validator.batch_list.append(query_b) + logger.info("Submitting new variant with format %s", input_a) + logger.info("Submitting new variant with format %s", input_b) skipvar = True elif re.search(r'[-:]\d+[-:][-:][GATC]+', variant.quibble) or \ re.search(r'[-:]\d+[-:][.][-:][GATC]+', variant.quibble): @@ -112,7 +116,7 @@ def vcf2hgvs_stage1(variant, validator): vcf_elements = pre_input.split('-') variant.quibble = '%s:%s%s>%s' % (vcf_elements[0], vcf_elements[1], vcf_elements[2], vcf_elements[3]) - Logger.trace("Completed VCF-HVGS step 1", variant) + logger.debug("Completed VCF-HVGS step 1 for %s", variant.quibble) return skipvar @@ -151,7 +155,7 @@ def vcf2hgvs_stage2(variant, validator): accession = seq_data.to_accession(chr_num, validator.selected_assembly) if accession is None: variant.warnings.append(chr_num + ' is not part of genome build ' + validator.selected_assembly) - Logger.warning(chr_num + ' is not part of genome build ' + validator.selected_assembly) + logger.warning(chr_num + ' is not part of genome build ' + validator.selected_assembly) skipvar = True else: accession = input_list[0] @@ -187,8 +191,8 @@ def vcf2hgvs_stage2(variant, validator): accession = accession variant.quibble = str(accession) + ref_type + str(position_and_edit) - except: - fn.exceptPass(variant) + except Exception as e: + logger.debug("Except passed, %s", e) # Descriptions lacking the colon : if re.search(r'[gcnmrp]\.', variant.quibble) and not re.search(r':[gcnmrp]\.', variant.quibble): @@ -196,11 +200,11 @@ def vcf2hgvs_stage2(variant, validator): 'descriptions to separate the reference accession from the reference type i.e. :. ' \ 'e.g. :c.' % variant.quibble variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) skipvar = True # Ambiguous chr reference - Logger.trace("Completed VCF-HVGS step 2", variant) + logger.debug("Completed VCF-HVGS step 2 for %s", variant.quibble) return skipvar @@ -243,12 +247,13 @@ def vcf2hgvs_stage3(variant, validator): accession = seq_data.to_accession(chr_num, validator.selected_assembly) if accession is None: variant.warnings.append(chr_num + ' is not part of genome build ' + validator.selected_assembly) + logger.warning(chr_num + ' is not part of genome build ' + validator.selected_assembly) skipvar = True variant.quibble = str(accession) + ':' + str(position_and_edit) - except Exception: - fn.exceptPass(variant) + except Exception as e: + logger.debug("Except passed, %s", e) - Logger.trace("Completed VCF-HGVS step 3", variant) + logger.debug("Completed VCF-HGVS step 3 for %s", variant.quibble) return skipvar @@ -288,21 +293,22 @@ def gene_symbol_catch(variant, validator, select_transcripts_dict_plus_version): warnings=variant.warnings, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) - Logger.resub('HGVS variant nomenclature does not allow the use of a gene symbol (' + - query_a_symbol + ') in place of a valid reference sequence') + logger.info('HGVS variant nomenclature does not allow the use of a gene symbol (' + + query_a_symbol + ') in place of a valid reference sequence') + logger.info("Submitting new variant with format %s", refreshed_description) else: variant.warnings.append('HGVS variant nomenclature does not allow the use of a gene symbol (' + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + variant.quibble + ' and specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts) - Logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + - query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + - variant.quibble + ' and specify transcripts from the following: ' + - 'select_transcripts=' + select_from_these_transcripts) + logger.warning('HGVS variant nomenclature does not allow the use of a gene symbol (' + + query_a_symbol + ') in place of a valid reference sequence: Re-submit ' + + variant.quibble + ' and specify transcripts from the following: ' + + 'select_transcripts=' + select_from_these_transcripts) skipvar = True - except: - fn.exceptPass() - Logger.trace("Gene symbol reference catching complete", variant) + except Exception as e: + logger.debug("Except passed, %s", e) + logger.debug("Gene symbol reference catching complete") return skipvar @@ -339,38 +345,39 @@ def refseq_catch(variant, validator, select_transcripts_dict_plus_version): warnings=variant.warnings, primary_assembly=variant.primary_assembly, order=variant.order) - Logger.resub('NG_:c.PositionVariation descriptions should not be used unless a transcript ' - 'reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation. ' - 'Resubmitting corrected version.') + logger.info('NG_:c.PositionVariation descriptions should not be used unless a transcript ' + 'reference sequence has also been provided e.g. NG_(NM_):c.PositionVariation. ' + 'Resubmitting corrected version.') validator.batch_list.append(query) + logger.info("Submitting new variant with format %s", refreshed_description) else: variant.warnings.append('A transcript reference sequence has not been provided e.g. ' 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + ' but also specify transcripts from the following: ' + 'select_transcripts=' + select_from_these_transcripts) - Logger.warning('A transcript reference sequence has not been provided e.g. ' - 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + ' but also ' - 'specify transcripts from the following: select_transcripts=' + - select_from_these_transcripts) + logger.warning('A transcript reference sequence has not been provided e.g. ' + 'NG_(NM_):c.PositionVariation. Re-submit ' + variant.quibble + ' but also ' + 'specify transcripts from the following: select_transcripts=' + + select_from_these_transcripts) skipvar = True else: variant.warnings.append('A transcript reference sequence has not been provided e.g. ' 'NG_(NM_):c.PositionVariation') - Logger.warning( + logger.warning( 'A transcript reference sequence has not been provided e.g. NG_(NM_):c.PositionVariation') skipvar = True elif variant.quibble.startswith('NC_'): variant.warnings.append('A transcript reference sequence has not been provided e.g. ' 'NC_(NM_):c.PositionVariation. Unable to predict available transcripts ' 'because chromosomal position is not specified') - Logger.warning( + logger.warning( 'A transcript reference sequence has not been provided e.g. NC_(NM_):c.PositionVariation. ' 'Unable to predict available transcripts because chromosomal position is not specified') skipvar = True - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) - Logger.trace("Chromosomal/RefSeqGene reference catching complete", variant) + logger.debug("Chromosomal/RefSeqGene reference catching complete") return skipvar @@ -444,8 +451,8 @@ def vcf2hgvs_stage4(variant, validator): end_pos = int(start_pos) + (len(delete) - 1) not_delins = ref_ac + ':' + ref_type + '.' + start_pos + '_' + str( end_pos) + 'del' + delete + 'ins' + insert - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) not_delins = not_sub # Parse into hgvs object hgvs_not_delins = None @@ -460,7 +467,7 @@ def vcf2hgvs_stage4(variant, validator): # Assemble and re-submit for alt in alt_list: variant.warnings = ['Multiple ALT sequences detected: ' - 'auto-submitting all possible combinations'] + 'auto-submitting all possible combinations'] variant.write = False refreshed_description = header + '>' + alt query = Variant(variant.original, quibble=refreshed_description, @@ -468,13 +475,13 @@ def vcf2hgvs_stage4(variant, validator): order=variant.order) validator.batch_list.append(query) - Logger.resub( - 'Multiple ALT sequences detected. Auto-submitting all possible combinations.') + logger.info('Multiple ALT sequences detected. Auto-submitting all possible combinations.') + logger.info("Submitting new variant with format %s", refreshed_description) skipvar = True else: error = str(e) variant.warnings.append(error) - Logger.warning(str(e)) + logger.warning(str(e)) skipvar = True try: @@ -485,16 +492,16 @@ def vcf2hgvs_stage4(variant, validator): not_delins = not_delins else: variant.warnings.append(error) - Logger.warning(str(e)) + logger.warning(str(e)) skipvar = True # Create warning automap = variant.quibble + ' automapped to ' + not_delins variant.warnings.append(automap) # Change input to normalized variant variant.quibble = not_delins - except: - fn.exceptPass() - Logger.trace("Completed VCF-HVGS step 4", variant) + except Exception as e: + logger.debug("Except passed, %s", e) + logger.debug("Completed VCF-HVGS step 4 for %s", variant.quibble) return skipvar @@ -528,17 +535,17 @@ def indel_catching(variant, validator): issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' error = error + ' please refer to ' + issue_link variant.warnings.append(error) - Logger.warning(str(error) + " " + str(e)) + logger.warning(str(error) + " " + str(e)) return True hgvs_failed.posedit.edit = str(hgvs_failed.posedit.edit).replace(digits, '') failed = str(hgvs_failed) automap = 'Non HGVS compliant variant description ' + variant.quibble + ' automapped to ' + failed variant.warnings.append(automap) - Logger.warning(automap) + logger.info(automap) variant.quibble = failed - Logger.trace("Ins/Del reference catching complete", variant) + logger.debug("Ins/Del reference catching complete for %s", variant.quibble) return False @@ -561,7 +568,7 @@ def intronic_converter(variant): transy = transy.group(1) transy = transy.replace(')', '') variant.quibble = transy - Logger.trace("HVGS typesetting complete", variant) + logger.debug("HVGS typesetting complete") def allele_parser(variant, validation): @@ -599,7 +606,7 @@ def allele_parser(variant, validation): caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + \ refseqgene_reference + ':' + variation variant.warnings.append(caution) - Logger.warning(str(caution)) + logger.info(caution) elif re.match(r'^LRG_\d+t\d+:c.', variant.quibble) or re.match(r'^LRG_\d+t\d+:n.', variant.quibble) or \ re.match(r'^LRG_\d+t\d+:p.', variant.quibble) or re.match(r'^LRG_\d+t\d+:g.', variant.quibble): lrg_reference, variation = variant.quibble.split(':') @@ -614,32 +621,33 @@ def allele_parser(variant, validation): caution = caution + ': ' + lrg_reference + ':' + variation + ' automapped to ' + \ refseqtranscript_reference + ':' + variation variant.warnings.append(caution) - Logger.warning(str(caution)) + logger.info(caution) else: pass try: # Submit to allele extraction function alleles = validation.hgvs_alleles(variant.quibble, variant.hn) variant.warnings.append('Automap has extracted possible variant descriptions') - Logger.resub('Automap has extracted possible variant descriptions, resubmitting') + logger.info('Automap has extracted possible variant descriptions, resubmitting') for allele in alleles: query = Variant(variant.original, quibble=allele, warnings=variant.warnings, write=True, primary_assembly=variant.primary_assembly, order=variant.order) validation.batch_list.append(query) + logger.info("Submitting new variant with format %s", allele) variant.write = False return True except fn.alleleVariantError as e: if "Cannot validate sequence of an intronic variant" in str(e): variant.warnings.append('Intronic positions not supported for HGVS Allele descriptions') - Logger.warning('Intronic positions not supported for HGVS Allele descriptions') + logger.warning('Intronic positions not supported for HGVS Allele descriptions') return True elif "No transcript definition for " in str(e): variant.warnings.append(str(e)) - Logger.warning(str(e)) + logger.warning(str(e)) return True else: raise fn.VariantValidatorError(str(e)) - Logger.trace("HVGS String allele parsing pass 1 complete", variant) + logger.debug("HVGS String allele parsing pass 1 complete") return False @@ -665,7 +673,7 @@ def lrg_to_refseq(variant, validator): variant.set_quibble(str(variant.hgvs_formatted)) caution += lrg_reference + ':' + variation + ' automapped to ' + refseqtrans_reference + ':' + variation variant.warnings.append(caution) - Logger.warning(caution) + logger.info(caution) elif re.match(r'^LRG_\d+:', variant.quibble): lrg_reference, variation = variant.quibble.split(':') refseqgene_reference = validator.db.get_refseq_id_from_lrg_id(lrg_reference) @@ -674,7 +682,7 @@ def lrg_to_refseq(variant, validator): variant.set_quibble(str(variant.hgvs_formatted)) caution += lrg_reference + ':' + variation + ' automapped to ' + refseqgene_reference + ':' + variation variant.warnings.append(caution) - Logger.warning(caution) + logger.info(caution) def mitochondrial(variant, validator): @@ -692,12 +700,12 @@ def mitochondrial(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True except KeyError: error = 'Currently unable to validate ' + hgvs_mito.ac + ' sequence variation' variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: # Any transcripts? @@ -707,7 +715,7 @@ def mitochondrial(variant, validator): if len(rel_var) == 0: variant.genomic_g = fn.valstr(hgvs_mito) variant.description = 'Homo sapiens mitochondrion, complete genome' - Logger.info('Homo sapiens mitochondrion, complete genome') + logger.info('Homo sapiens mitochondrion, complete genome') return True return False @@ -728,7 +736,7 @@ def proteins(variant, validator): error = str(e) if error: variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: # Get accurate descriptions from the relevant databases @@ -750,7 +758,7 @@ def proteins(variant, validator): ' in the genetic code' variant.warnings.extend([reason, error]) variant.protein = str(hgvs_object) - Logger.warning(reason + ": " + error) + logger.warning(reason + ": " + error) return True return False @@ -769,7 +777,7 @@ def rna(variant, validator): except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) variant.warnings.append(error) - Logger.warning(str(error)) + logger.warning(str(error)) return True variant.hgvs_formatted = hgvs_c diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 0a4c4b56..354637a6 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -1,11 +1,11 @@ import copy import re - +import logging import hgvs.exceptions - from . import utils as fn from . import hgvs_utils -from .logger import Logger + +logger = logging.getLogger(__name__) class GapMapper(object): @@ -251,8 +251,8 @@ def gapped_g_to_c(self, rel_var): if len(stash_hgvs_not_delins.posedit.edit.ref) > len(hgvs_stash_t.posedit.edit.ref): try: self.variant.hn.normalize(hgvs_stash_t) - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) else: gap_length = len(stash_hgvs_not_delins.posedit.edit.ref) - len( hgvs_stash_t.posedit.edit.ref) @@ -281,7 +281,7 @@ def gapped_g_to_c(self, rel_var): if str(e) == 'start or end or both are beyond the bounds of transcript record': hgvs_not_delins = saved_hgvs_coding self.disparity_deletion_in = ['false', 'false'] - Logger.warning(str(e)) + logger.info(str(e)) try: self.variant.hn.normalize(self.tx_hgvs_not_delins) except hgvs.exceptions.HGVSUnsupportedOperationError as e: @@ -294,7 +294,7 @@ def gapped_g_to_c(self, rel_var): elif 'Normalization of intronic variants is not supported' in error: # We know that this cannot be because of an intronic variant, so must be aligned to tx gap self.disparity_deletion_in = ['transcript', 'Requires Analysis'] - Logger.warning(error) + logger.info(error) # Pre-processing of self.tx_hgvs_not_delins try: if self.tx_hgvs_not_delins.posedit.edit.alt is None: @@ -412,7 +412,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): hgvs_genomic = self.validator.myevm_t_to_g(hgvs_coding, self.variant.no_norm_evm, self.variant.primary_assembly, self.variant.hn) - Logger.warning('g_to_t gap code 1 active') + logger.debug('g_to_t gap code 1 active') rn_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic) self.hgvs_genomic_possibilities.append(rn_hgvs_genomic) @@ -436,8 +436,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): stash_tx_left = '' try: hgvs_stash = self.variant.no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) try: stash_ac = hgvs_stash.ac stash_dict = hgvs_utils.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, self.validator.sf) @@ -452,8 +452,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: stash_hgvs_not_delins = self.variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) # stash_genomic = vm.t_to_g(test_stash_tx_right, hgvs_genomic.ac) @@ -509,17 +509,17 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): stash_tx_right = test_stash_tx_right self.hgvs_genomic_possibilities.append(stash_genomic) except hgvs.exceptions.HGVSError as e: - fn.exceptPass() + logger.debug("Except passed, %s", e) # Intronic positions not supported. Will cause a Value Error - except ValueError: - fn.exceptPass() + except ValueError as e: + logger.debug("Except passed, %s", e) # Then to the left hgvs_stash = copy.deepcopy(hgvs_coding) try: hgvs_stash = self.variant.no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) try: stash_ac = hgvs_stash.ac stash_dict = hgvs_utils.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, @@ -535,8 +535,8 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: stash_hgvs_not_delins = self.variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) # Store a tx copy for later use test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) # stash_genomic = vm.t_to_g(test_stash_tx_left, hgvs_genomic.ac) @@ -593,22 +593,13 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): stash_tx_left = test_stash_tx_left self.hgvs_genomic_possibilities.append(stash_genomic) except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - except ValueError: - fn.exceptPass() + logger.debug("Except passed, %s", e) + except ValueError as e: + logger.debug("Except passed, %s", e) # direct mapping from reverse_normalized transcript insertions in the delins format self.rev_norm_ins(hgvs_coding, hgvs_genomic) - Logger.info('\nGENOMIC POSSIBILITIES') - for possibility in self.hgvs_genomic_possibilities: - if possibility == '': - Logger.info('X') - else: - Logger.info(fn.valstr(possibility)) - - Logger.info('\n') - # Set variables for problem specific warnings self.gapped_transcripts = '' self.auto_info = '' @@ -909,7 +900,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): 'Unsupported normalization of variants spanning the exon-intron boundary' in error: hgvs_refreshed_variant = saved_hgvs_coding else: - Logger.warning(error) + logger.info(error) continue # Quick check to make sure the coding variant has not changed @@ -959,7 +950,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): 'descriptions: If you are unsure, please contact admin' self.auto_info = self.auto_info.replace('\n', ': ') self.variant.warnings.append(self.auto_info) - Logger.warning(self.auto_info) + logger.info(self.auto_info) # Normailse hgvs_genomic try: hgvs_genomic = self.variant.hn.normalize(hgvs_genomic) @@ -992,7 +983,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): return hgvs_genomic, suppress_c_normalization, hgvs_coding def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): - Logger.warning('g_to_t gap code 2 active') + logger.debug('g_to_t gap code 2 active') hgvs_genomic_variant = hgvs_genomic reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic_variant) @@ -1083,7 +1074,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): self.validator.vm.g_to_t(hgvs_not_delins, self.tx_hgvs_not_delins.ac) except Exception as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': - Logger.warning(str(e)) + logger.warning(str(e)) return True try: self.variant.hn.normalize(self.tx_hgvs_not_delins) @@ -1092,7 +1083,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): if 'Normalization of intronic variants is not supported' in error or \ 'Unsupported normalization of variants spanning the exon-intron boundary' in error: if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: - Logger.warning(error) + logger.warning(error) return True elif 'Normalization of intronic variants is not supported' in error: # We know that this cannot be because of an intronic variant, so must be aligned to tx gap @@ -1148,7 +1139,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a self.orientation = int(ori[0]['alt_strand']) hgvs_genomic = copy.deepcopy(hgvs_alt_genomic) - Logger.warning('g_to_t gap code 3 active') + logger.debug('g_to_t gap code 3 active') rn_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_alt_genomic) self.hgvs_genomic_possibilities.append(rn_hgvs_genomic) if self.orientation != -1: @@ -1173,8 +1164,8 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a stash_tx_left = '' try: hgvs_stash = self.variant.no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) try: stash_ac = hgvs_stash.ac stash_dict = hgvs_utils.hard_right_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, self.variant.hn, @@ -1190,8 +1181,8 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: stash_hgvs_not_delins = self.variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) # Store a tx copy for later use test_stash_tx_right = copy.deepcopy(stash_hgvs_not_delins) stash_genomic = self.validator.myvm_t_to_g(test_stash_tx_right, hgvs_alt_genomic.ac, @@ -1245,17 +1236,17 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a else: stash_tx_right = test_stash_tx_right self.hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except ValueError: - fn.exceptPass() + except hgvs.exceptions.HGVSError as e: + logger.debug("Except passed, %s", e) + except ValueError as e: + logger.debug("Except passed, %s", e) # Then to the left hgvs_stash = copy.deepcopy(hgvs_coding) try: hgvs_stash = self.variant.no_norm_evm.c_to_n(hgvs_stash) - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) try: stash_ac = hgvs_stash.ac stash_dict = hgvs_utils.hard_left_hgvs2vcf(hgvs_stash, self.variant.primary_assembly, @@ -1271,8 +1262,8 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a stash_pos) + '_' + stash_end + 'del' + stash_ref + 'ins' + stash_alt) try: stash_hgvs_not_delins = self.variant.no_norm_evm.n_to_c(stash_hgvs_not_delins) - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) # Store a tx copy for later use test_stash_tx_left = copy.deepcopy(stash_hgvs_not_delins) stash_genomic = self.validator.myvm_t_to_g(test_stash_tx_left, hgvs_alt_genomic.ac, @@ -1327,9 +1318,9 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a stash_tx_left = test_stash_tx_left self.hgvs_genomic_possibilities.append(stash_genomic) except hgvs.exceptions.HGVSError as e: - fn.exceptPass() - except ValueError: - fn.exceptPass() + logger.debug("Except passed, %s", e) + except ValueError as e: + logger.debug("Except passed, %s", e) # direct mapping from reverse_normalized transcript insertions in the delins format self.rev_norm_ins(hgvs_coding, hgvs_genomic) @@ -1743,8 +1734,8 @@ def remove_offsetting_to_span_gap(self, rn_tx_hgvs_not_delins): rn_tx_hgvs_not_delins.posedit.edit.ref = '' try: rn_tx_hgvs_not_delins.posedit.edit.alt = '' - except: - pass + except Exception as e: + logger.debug("Except passed, %s", e) return rn_tx_hgvs_not_delins @@ -1887,8 +1878,8 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ try: tx_gap_fill_variant = self.validator.vm.n_to_c(tx_gap_fill_variant) - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) genomic_gap_fill_variant = self.validator.vm.t_to_g(tx_gap_fill_variant, reverse_normalized_hgvs_genomic.ac) genomic_gap_fill_variant.posedit.edit.alt = genomic_gap_fill_variant.posedit.edit.ref @@ -2010,10 +2001,10 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ self.tx_hgvs_not_delins = c2 try: self.tx_hgvs_not_delins = self.validator.vm.c_to_n(self.tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError: - fn.exceptPass() - except hgvs.exceptions.HGVSInvalidVariantError: - fn.exceptPass() + except hgvs.exceptions.HGVSError as e: + logger.debug("Except passed, %s", e) + except hgvs.exceptions.HGVSInvalidVariantError as e: + logger.debug("Except passed, %s", e) if '+' in str(self.tx_hgvs_not_delins.posedit.pos.start) and \ '+' not in str(self.tx_hgvs_not_delins.posedit.pos.end): @@ -2181,10 +2172,10 @@ def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding, do_co if hgvs_t_possibility.posedit.edit.type == 'ins': try: hgvs_t_possibility = self.validator.vm.c_to_n(hgvs_t_possibility) - except: + except Exception as e: if do_continue: continue - fn.exceptPass() + logger.debug("Except passed, %s", e) if offset_check: if hgvs_t_possibility.posedit.pos.start.offset != 0 or \ hgvs_t_possibility.posedit.pos.end.offset != 0: @@ -2194,10 +2185,10 @@ def logic_check(self, hgvs_not_delins, rn_tx_hgvs_not_delins, hgvs_coding, do_co hgvs_t_possibility.posedit.pos.start.base + 1) try: hgvs_t_possibility = self.validator.vm.n_to_c(hgvs_t_possibility) - except: + except Exception as e: if do_continue: continue - fn.exceptPass() + logger.debug("Except passed, %s", e) hgvs_t_possibility.posedit.edit.ref = ins_ref hgvs_t_possibility.posedit.edit.alt = ins_ref[ 0] + hgvs_t_possibility.posedit.edit.alt + ins_ref[1] @@ -2360,4 +2351,4 @@ def rev_norm_ins(self, hgvs_coding, hgvs_genomic): self.hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) except hgvs.exceptions.HGVSUnsupportedOperationError as e: - fn.exceptPass() + logger.debug("Except passed, %s", e) diff --git a/VariantValidator/modules/liftover.py b/VariantValidator/modules/liftover.py index 6324b7aa..04f0448a 100644 --- a/VariantValidator/modules/liftover.py +++ b/VariantValidator/modules/liftover.py @@ -8,15 +8,17 @@ # import modules import hgvs.exceptions import hgvs.sequencevariant +import logging from . import seq_data from . import hgvs_utils -from .logger import Logger from pyliftover import LiftOver from Bio.Seq import Seq # Pre compile variables hgvs.global_config.formatting.max_ref_length = 1000000 +logger = logging.getLogger(__name__) + def mystr(hgvs_nucleotide): hgvs_nucleotide_refless = hgvs_nucleotide.format({'max_ref_length': 0}) @@ -40,8 +42,8 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va try: hgvs_genomic = validator.hp.parse_hgvs_variant(hgvs_genomic) - except TypeError: - pass + except TypeError as e: + logger.debug("Except passed, %s", e) # Create return dictionary lifted_response = {} @@ -259,7 +261,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va accession = seq_data.to_accession(chrom, lo_to) if accession is None: wrn = 'Unable to identify an equivalent %s chromosome ID for %s' % (str(lo_to), str(chrom)) - Logger.warning(wrn) + logger.info(wrn) continue else: not_delins = accession + ':g.' + str(pos) + '_' + str( @@ -269,7 +271,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va try: validator.vr.validate(hgvs_not_delins) except hgvs.exceptions.HGVSError as e: - Logger.warning(str(e)) + logger.info(str(e)) # Most likely incorrect bases continue else: diff --git a/VariantValidator/modules/logger.py b/VariantValidator/modules/logger.py deleted file mode 100644 index e074d9b2..00000000 --- a/VariantValidator/modules/logger.py +++ /dev/null @@ -1,154 +0,0 @@ -import logging -import datetime -import os -from io import StringIO - -VALIDATOR_DEBUG = os.environ.get('VALIDATOR_DEBUG') - - -class Logger(): - """ - Grand unified variant validator logging static class. - """ - # logString=StringIO() - - @staticmethod - def loggingSetup(): - """ - Set up logging - I need to use the VVObfuscator in the logger global dictionary - becuase it's a global variable tied to the logger module - Modules are singletons, but their variables are not. Consequently - this is the only sensible way to ensure that the logging setup is called - once. If another programmer has any better ideas that leave these functions - with a configured VV logger object that only has its handlers added once, - feel free to fix it up. - """ - # print("Entering setup") - # The logger must be at the very least drawn from the logging library's dictionary - # for every time this module is imported. - Logger.logger = logging.getLogger("VV") - if "VVObfuscator" in logging.Logger.manager.loggerDict: - return - logging.getLogger("VVObfuscator") - # print("Engaging setup") - - global VALIDATOR_DEBUG - # Check environment variables - VALIDATOR_DEBUG = os.environ.get('VALIDATOR_DEBUG') - # print("VD",os.environ.get('VALIDATOR_DEBUG')) - - if VALIDATOR_DEBUG is None: - VALIDATOR_DEBUG = "info console" # Set default value - # Set logging urgency levels. - if "debug" in VALIDATOR_DEBUG: - loglevel = logging.DEBUG - elif "warning" in VALIDATOR_DEBUG: - loglevel = logging.WARNING - elif "info" in VALIDATOR_DEBUG: - loglevel = logging.INFO - elif "error" in VALIDATOR_DEBUG: - loglevel = logging.ERROR - elif "critical" in VALIDATOR_DEBUG: - loglevel = logging.CRITICAL - else: - loglevel = logging.WARNING - - if "file" in VALIDATOR_DEBUG: - logFileHandler = logging.FileHandler("VV-log.txt") - logFileHandler.setLevel(loglevel) - Logger.logger.addHandler(logFileHandler) - - if "console" in VALIDATOR_DEBUG: - logConsoleHandler = logging.StreamHandler() - logConsoleHandler.setLevel(loglevel) - Logger.logger.addHandler(logConsoleHandler) - - # Create a log string to add to validations. - # Since it has to survive multiple imports, I'm stuffing it into the logger dictionary. - # Feel free to amend this coding monstrosity without my knowledge. - logging.Logger.manager.loggerDict["VVLogString"] = StringIO() - logStringHandler = logging.StreamHandler(logging.Logger.manager.loggerDict["VVLogString"]) - # We want the validation metadata to not contain debug info which may change with program operation - logStringHandler.setLevel(logging.INFO) - Logger.logger.addHandler(logStringHandler) - Logger.logger.setLevel(logging.DEBUG) # The logger itself must be set with an appropriate level of urgency. - - Logger.logger.propagate = False - - @staticmethod - def debug(s): - Logger.loggingSetup() - Logger.logger.debug("DEBUG: " + s) - - @staticmethod - def info(s): - Logger.loggingSetup() - Logger.logger.info("INFO : " + s) - - @staticmethod - def warning(s): - Logger.loggingSetup() - Logger.logger.warning("WARN : " + s) - - @staticmethod - def error(s): - Logger.loggingSetup() - Logger.logger.error("ERROR: " + s) - - @staticmethod - def critical(s): - Logger.loggingSetup() - Logger.logger.critical("CRIT : " + s) - - @staticmethod - def trace(s, v=None): - # v should be a variant object with a 'timing' attribute. - # global VALIDATOR_DEBUG - # print(VALIDATOR_DEBUG) - # if "trace" in VALIDATOR_DEBUG: - # logger.loggingSetup() - if not v: - Logger.logger.debug("TRACE: " + s) - else: - Logger.logger.debug("TRACE: " + s) - v.timing['traceLabels'].append(s) - v.timing['traceTimes'].append(str((datetime.datetime.now() - v.timing['checkDT']).microseconds//1000)) - v.timing['checkDT'] = datetime.datetime.now() - - @staticmethod - def resub(s): - # Resubmit one or multiple variants - Logger.loggingSetup() - Logger.logger.warning("RESUB: " + s) - - @staticmethod - def getString(): - Logger.loggingSetup() - # print("RETURNING:") - # print(logging.Logger.manager.loggerDict["VVLogString"].getvalue()) - return logging.Logger.manager.loggerDict["VVLogString"].getvalue() - - @staticmethod - def traceStart(v): - Logger.loggingSetup() -# global VALIDATOR_DEBUG -# if "trace" in VALIDATOR_DEBUG: - if True: - v.timing = {} - v.timing['traceLabels'] = [] - v.timing['traceTimes'] = [] - v.timing['startDT'] = datetime.datetime.now() - v.timing['checkDT'] = datetime.datetime.now() - - @staticmethod - def traceEnd(v): - Logger.loggingSetup() - # global VALIDATOR_DEBUG - # if "trace" in VALIDATOR_DEBUG: - if True: - v.timing['traceLabels'].append("complete") - v.timing['traceTimes'].append((datetime.datetime.now() - v.timing['startDT']).microseconds//1000) - del v.timing['startDT'] - del v.timing['checkDT'] - diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 93e5c7c9..b4ef653c 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -2,13 +2,15 @@ import re import copy import hgvs.exceptions -from .logger import Logger +import logging from . import hgvs_utils from .variant import Variant from . import seq_data from . import utils as fn from . import gapped_mapping +logger = logging.getLogger(__name__) + def gene_to_transcripts(variant, validator): g_query = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) @@ -23,7 +25,7 @@ def gene_to_transcripts(variant, validator): error = 'Reference sequence ' + variant.hgvs_genomic.ac + ' is either not supported or does not exist' if error != 'false': variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True # Set test to see if Norm alters the coords @@ -100,11 +102,12 @@ def gene_to_transcripts(variant, validator): primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) + logger.info('Submitting new variant with format %s', genomic_input) else: error = 'Mapping unavailable for RefSeqGene ' + str(variant.hgvs_formatted) + \ ' using alignment method = ' + validator.alt_aln_method variant.warnings.append(error) - Logger.warning(str(error)) + logger.warning(str(error)) return True # Chromosome build is not supported or intergenic??? @@ -116,7 +119,7 @@ def gene_to_transcripts(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - Logger.warning(str(error)) + logger.warning(str(error)) return True else: # Map to RefSeqGene if available @@ -137,13 +140,13 @@ def gene_to_transcripts(variant, validator): variant.warnings.append(error) variant.genomic_g = fn.valstr(variant.hgvs_genomic) variant.genomic_r = str(rsg_data.split('(')[0]) - Logger.warning(str(error)) + logger.warning(str(error)) return True else: error = 'Please ensure the requested chromosome version relates to a supported genome build. ' \ 'Supported genome builds are: GRCh37, GRCh38, hg19 and hg38' variant.warnings.append(error) - Logger.warning(str(error)) + logger.warning(str(error)) return True else: @@ -178,7 +181,7 @@ def gene_to_transcripts(variant, validator): query = Variant(variant.original, quibble=str(c_description), warnings=variant.warnings, primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) - Logger.warning("Continue reached when mapping transcript types to variants") + logger.info("Submitting new variant with format %s", str(c_description)) # Call next description return True return False @@ -228,21 +231,21 @@ def transcripts_to_gene(variant, validator): 'supported: Use the Gene to Transcripts function to determine whether an updated ' \ 'transcript reference sequence is available' variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True errors = ['Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive', 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' 'available transcripts' % tx_ac.split('.')[0]] variant.warnings.extend(errors) - Logger.warning(str(errors)) + logger.info(str(errors)) return True except TypeError: errors = ['Required information for ' + tx_ac + ' is missing from the Universal Transcript Archive', 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' 'available transcripts' % tx_ac.split('.')[0]] variant.warnings.extend(errors) - Logger.warning(str(errors)) + logger.info(str(errors)) return True # Get orientation of the gene wrt genome and a list of exons mapped to the genome @@ -257,14 +260,14 @@ def transcripts_to_gene(variant, validator): error = "If the following error message does not address the issue and the problem persists please " \ "contact admin: " + str(to_g) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: error = "If the following error message does not address the issue and the problem persists please " \ "contact admin: " + str(to_g) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: @@ -288,14 +291,14 @@ def transcripts_to_gene(variant, validator): error = "If the following error message does not address the issue and the problem persists " \ "please contact admin: " + str(to_g) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: error = "If the following error message does not address the issue and the problem persists " \ "please contact admin: " + str(to_g) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: # Insertions at exon boundaries are miss-handled by vm.g_to_t @@ -322,7 +325,7 @@ def transcripts_to_gene(variant, validator): automap = 'Use of the corresponding genomic sequence variant descriptions may be invalid. ' \ 'Please refer to https://www35.lamp.le.ac.uk/recommendations/' variant.warnings.extend([caution, automap]) - Logger.warning(caution + ": " + automap) + logger.info(caution + ": " + automap) else: formatted_variant = str(h_variant) @@ -331,7 +334,7 @@ def transcripts_to_gene(variant, validator): valid = True else: variant.warnings.append(str(error)) - Logger.warning(str(error)) + logger.warning(str(error)) return True # Tackle the plus intronic offset @@ -394,6 +397,7 @@ def transcripts_to_gene(variant, validator): query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) + logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) # Coding else: @@ -410,15 +414,16 @@ def transcripts_to_gene(variant, validator): error = 'Automap is unable to correct the input exon/intron boundary coordinates, ' \ 'please check your variant description' variant.warnings.append(error) + logger.warning(error) return True else: - fn.exceptPass() + logger.debug("Except passed, %s", e) # genome back to C coordinates try: post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) except hgvs.exceptions.HGVSError as error: variant.warnings.append(str(error)) - Logger.warning(str(error)) + logger.warning(str(error)) return True test = validator.hp.parse_hgvs_variant(quibble_input) @@ -439,6 +444,7 @@ def transcripts_to_gene(variant, validator): query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) + logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) else: # del not in formatted_variant if ':r.' in variant.pre_RNA_conversion: @@ -476,6 +482,7 @@ def transcripts_to_gene(variant, validator): query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) + logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) else: coding = validator.coding(formatted_variant) @@ -505,6 +512,7 @@ def transcripts_to_gene(variant, validator): query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) + logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) # If cck not true elif ':r.' in variant.pre_RNA_conversion: @@ -519,7 +527,7 @@ def transcripts_to_gene(variant, validator): error = "If the following error message does not address the issue and the problem persists " \ "please contact admin: " + to_g variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: @@ -562,6 +570,7 @@ def transcripts_to_gene(variant, validator): query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) + logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) elif ':g.' not in quibble_input: query = validator.hp.parse_hgvs_variant(formatted_variant) @@ -582,6 +591,7 @@ def transcripts_to_gene(variant, validator): query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], primary_assembly=variant.primary_assembly, order=variant.order) validator.batch_list.append(query) + logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) # VALIDATION of intronic variants pre_valid = validator.hp.parse_hgvs_variant(quibble_input) @@ -596,12 +606,13 @@ def transcripts_to_gene(variant, validator): if caution == '': caution = fn.valstr(pre_valid) + ' automapped to ' + fn.valstr(post_valid) variant.warnings.append(caution) - Logger.warning(caution) + logger.info(caution) # Apply validation to intronic variant descriptions (should be valid but make sure) error = validator.validateHGVS(genomic_validation) if error != 'false': variant.warnings.append(error) + logger.warning(error) return True # v0.1a1 edit @@ -637,11 +648,11 @@ def transcripts_to_gene(variant, validator): gap_compensation = False except hgvs.exceptions.HGVSError as error: variant.warnings.append(str(error)) - Logger.warning(str(error)) + logger.warning(str(error)) return True # Warn status - Logger.warning("gap_compensation_1 = " + str(gap_compensation)) + logger.debug("gap_compensation_1 = " + str(gap_compensation)) # Genomic sequence hgvs_genomic = validator.myevm_t_to_g(hgvs_coding, variant.no_norm_evm, variant.primary_assembly, variant.hn) @@ -668,7 +679,7 @@ def transcripts_to_gene(variant, validator): # --- GAP MAPPING 2 --- # Loop out gap finding code under these circumstances! - Logger.warning("gap_compensation_2 = " + str(gap_compensation)) + logger.debug("gap_compensation_2 = " + str(gap_compensation)) if gap_compensation is True: hgvs_coding = gap_mapper.g_to_t_gapped_mapping_stage2(ori, hgvs_coding, hgvs_genomic) @@ -710,7 +721,7 @@ def transcripts_to_gene(variant, validator): if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] else: - Logger.error(error) + logger.warning(error) return True # Gene orientation wrt genome @@ -721,7 +732,7 @@ def transcripts_to_gene(variant, validator): # Look for normalized variant options that do not match hgvs_coding # boundary crossing normalization hgvs_seek_var, query_genomic, hgvs_coding = gap_mapper.get_hgvs_seek_var(hgvs_genomic, hgvs_coding, - ori=ori, with_query_genomic=True) + ori=ori, with_query_genomic=True) if hgvs_coding.posedit.edit.type != hgvs_seek_var.posedit.edit.type: pass @@ -735,8 +746,8 @@ def transcripts_to_gene(variant, validator): automap = fn.valstr(hgvs_coding) + ' normalized to ' + fn.valstr(hgvs_seek_var) hgvs_coding = hgvs_seek_var variant.warnings.append(automap) - except NotImplementedError: - fn.exceptPass() + except NotImplementedError as e: + logger.debug("Except passed, %s", e) if ori == -1: rng = variant.hn.normalize(query_genomic) try: @@ -753,8 +764,8 @@ def transcripts_to_gene(variant, validator): if error == 'Cannot identify an in-frame Termination codon in the variant mRNA sequence': hgvs_protein = protein_dict['hgvs_protein'] variant.warnings.append(error) - except NotImplementedError: - fn.exceptPass() + except NotImplementedError as e: + logger.debug("Except passed, %s", e) elif ori == 1: # Double check protein position by reverse_norm genomic, and normalize back to c. # for normalize or not to normalize issue @@ -772,8 +783,8 @@ def transcripts_to_gene(variant, validator): c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) try: variant.hn.normalize(c_for_p) - except hgvs.exceptions.HGVSError: - fn.exceptPass() + except hgvs.exceptions.HGVSError as e: + logger.debug("Except passed, %s", e) else: # hgvs_protein = va_func.protein(str(c_for_p), variant.evm, hp) protein_dict = validator.myc_to_p(c_for_p, variant.evm, re_to_p=False, hn=variant.hn) @@ -785,8 +796,8 @@ def transcripts_to_gene(variant, validator): hgvs_protein = protein_dict['hgvs_protein'] variant.warnings.append(error) # Replace protein description in vars table - except Exception: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) # Final protein check, i.e. does the output make sense # We are looking for exonic c. descriptioms labelled as p.? @@ -818,8 +829,8 @@ def transcripts_to_gene(variant, validator): version_tracking): version_tracking = query_version update = accession[3] - except ValueError: - fn.exceptPass() + except ValueError as e: + logger.debug("Except passed, %s", e) if update != '': hgvs_updated = copy.deepcopy(hgvs_coding) @@ -871,11 +882,11 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): if 'boundary' in error or 'spanning' in error: gap_compensation = False - except hgvs.exceptions.HGVSError: - fn.exceptPass() + except hgvs.exceptions.HGVSError as e: + logger.debug("Except passed, %s", e) # Warn gap code status - Logger.warning("gap_compensation_3 = " + str(gap_compensation)) + logger.debug("gap_compensation_3 = " + str(gap_compensation)) multi_g = [] multi_list = [] mapping_options = validator.hdp.get_tx_mapping_options(variant.hgvs_coding.ac) @@ -885,6 +896,7 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): multi_list.append(alt_chr[1]) for alt_chr in multi_list: + logger.debug("Trying to do final gap mapping with %s", alt_chr) try: # Re set ori ori = validator.tx_exons(tx_ac=variant.hgvs_coding.ac, alt_ac=alt_chr, @@ -911,7 +923,6 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ 'genomic reference sequence %s' % (variant.hgvs_coding.ac, alt_chr) except hgvs.exceptions.HGVSError as e: - Logger.error(str(e)) - Logger.debug(str(e)) + logger.warning(str(e)) return multi_g diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index 6f721503..46a91d58 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -2,10 +2,12 @@ import hgvs import hgvs.exceptions import hgvs.variantmapper +import logging from . import utils as fn -from .logger import Logger import copy +logger = logging.getLogger(__name__) + def refseq_common_mistakes(variant): """ @@ -17,7 +19,7 @@ def refseq_common_mistakes(variant): error = 'Transcript reference sequence input as genomic (g.) reference sequence. ' \ 'Did you mean ' + suggestion + '?' variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True # NR_ c. if variant.quibble.startswith('NR_') and variant.reftype == ':c.': @@ -25,7 +27,7 @@ def refseq_common_mistakes(variant): error = 'Non-coding transcript reference sequence input as coding (c.) reference sequence. ' \ 'Did you mean ' + suggestion + '?' variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True # NM_ n. if variant.quibble.startswith('NM_') and variant.reftype == ':n.': @@ -33,7 +35,7 @@ def refseq_common_mistakes(variant): error = 'Coding transcript reference sequence input as non-coding transcript (n.) reference sequence. ' \ 'Did you mean ' + suggestion + '?' variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True # NM_ NC_ NG_ NR_ p. @@ -42,7 +44,7 @@ def refseq_common_mistakes(variant): error = 'Using a nucleotide reference sequence (NM_ NR_ NG_ NC_) to specify protein-level (p.) variation is ' \ 'not HGVS compliant. Please select an appropriate protein reference sequence (NP_)' variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True # NG_ c or NC_c.. @@ -51,7 +53,7 @@ def refseq_common_mistakes(variant): error = 'NG_:c.PositionVariation descriptions should not be used unless a transcript reference sequence has ' \ 'also been provided e.g. NG_(NM_):c.PositionVariation' variant.warnings.extend([error, suggestion]) - Logger.warning(error) + logger.warning(error) return True return False @@ -95,7 +97,7 @@ def structure_checks_g(variant, validator): and not variant.quibble.startswith('NT_') and not variant.quibble.startswith('NW_'): error = 'Invalid reference sequence identifier (' + variant.input_parses.ac + ')' variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True try: @@ -103,7 +105,7 @@ def structure_checks_g(variant, validator): except Exception as e: error = str(e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True # Additional test @@ -112,7 +114,7 @@ def structure_checks_g(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True return False @@ -140,14 +142,14 @@ def structure_checks_c(variant, validator): except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True actual_ref = to_n.posedit.edit.ref if called_ref != actual_ref: error = 'Variant reference (' + called_ref + ') does not agree with reference sequence ' \ '(' + actual_ref + ')' variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: variant.input_parses.posedit.edit.ref = '' @@ -156,8 +158,8 @@ def structure_checks_c(variant, validator): if 'bounds' in error or 'intronic variant' in error: try: variant.hn.normalize(variant.input_parses) - except hgvs.exceptions.HGVSError: - fn.exceptPass() + except hgvs.exceptions.HGVSError as e: + logger.debug("Except passed, %s", e) if 'bounds' in error: try: @@ -207,10 +209,10 @@ def structure_checks_c(variant, validator): error = 'Using a transcript reference sequence to specify a variant position that lies ' \ 'outside of the reference sequence is not HGVS-compliant: ' \ 'Instead re-submit ' + fn.valstr(report_gen) - except Exception: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True try: @@ -218,7 +220,7 @@ def structure_checks_c(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - Logger.warning(e) + logger.warning(e) return True if 'n.1-' in str(variant.input_parses): @@ -230,7 +232,7 @@ def structure_checks_c(variant, validator): genomic_position = variant.hn.normalize(genomic_position) error = error + fn.valstr(genomic_position) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True # Re-map input_parses back to c. variant @@ -287,10 +289,10 @@ def structure_checks_c(variant, validator): error = 'Using a transcript reference sequence to specify a variant position that lies ' \ 'outside of the reference sequence is not HGVS-compliant. Instead re-submit '\ + fn.valstr(report_gen) - except Exception: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True except hgvs.exceptions.HGVSDataNotAvailableError as e: @@ -308,7 +310,7 @@ def structure_checks_c(variant, validator): + variant.input_parses.ac + ' can only be partially aligned to genomic reference ' \ 'sequences ' + acs variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True elif re.search(r'\d-', str(variant.input_parses)) or re.search(r'\d\+', str(variant.input_parses)): @@ -322,17 +324,17 @@ def structure_checks_c(variant, validator): report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) report_gen = variant.hn.normalize(report_gen) - except hgvs.exceptions.HGVSError: - fn.exceptPass() + except hgvs.exceptions.HGVSError as e: + logger.debug("Except passed, %s", e) else: error = 'Using a transcript reference sequence to specify a variant position that lies outside of '\ 'the reference sequence is not HGVS-compliant. Instead re-submit ' + fn.valstr(report_gen) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True elif 'insertion length must be 1' in error: variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True elif 'base start position must be <= end position' in error: correction = copy.deepcopy(variant.input_parses) @@ -342,7 +344,7 @@ def structure_checks_c(variant, validator): correction.posedit.pos.end = st error = error + ': Did you mean ' + str(correction) + '?' variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True # Create a specific minimal evm with no normalizer and no replace_reference @@ -355,7 +357,7 @@ def structure_checks_c(variant, validator): 'Transcript Archive', 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' 'available transcripts' % variant.input_parses.ac.split('.')[0]] variant.warnings.extend(errors) - Logger.warning(str(errors)) + logger.info(str(errors)) return True except ValueError as e: error = str(e) @@ -363,7 +365,7 @@ def structure_checks_c(variant, validator): error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end '\ 'position ' + str(variant.input_parses.posedit.pos.end) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) @@ -377,11 +379,11 @@ def structure_checks_c(variant, validator): error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end' \ ' position ' + str(variant.input_parses.posedit.pos.end) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True try: @@ -389,7 +391,7 @@ def structure_checks_c(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True try: @@ -397,15 +399,15 @@ def structure_checks_c(variant, validator): except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: # All other variation try: validator.vr.validate(variant.input_parses) - except hgvs.exceptions.HGVSUnsupportedOperationError: - fn.exceptPass() + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + logger.debug("Except passed, %s", e) except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) # This catches errors in introns @@ -419,20 +421,20 @@ def structure_checks_c(variant, validator): error = 'Interval start position ' + str(variant.input_parses.posedit.pos.start) + ' > interval end '\ 'position ' + str(variant.input_parses.posedit.pos.end) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True except hgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: error += ' (' + variant.input_parses.ac + ')' variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True return False @@ -461,7 +463,7 @@ def structure_checks_n(variant, validator): error = 'Variant reference (' + called_ref + ') does not agree with reference sequence (' + \ actual_ref + ')' variant.warnings.append(error) - Logger.warning(str(error)) + logger.warning(str(error)) return True else: variant.input_parses.posedit.edit.ref = '' @@ -489,14 +491,14 @@ def structure_checks_n(variant, validator): report_gen = variant.hn.normalize(report_gen) error = 'Using a transcript reference sequence to specify a variant position that lies outside of' \ ' the reference sequence is not HGVS-compliant. Instead re-submit ' + fn.valstr(report_gen) - except Exception: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True if 'n.1-' in str(variant.input_parses): @@ -507,7 +509,7 @@ def structure_checks_n(variant, validator): genomic_position = variant.hn.normalize(genomic_position) error = error + fn.valstr(genomic_position) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True if re.search(r'\d-', str(variant.input_parses)) or re.search(r'\d\+', str(variant.input_parses)): @@ -521,17 +523,17 @@ def structure_checks_n(variant, validator): report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) report_gen = variant.hn.normalize(report_gen) - except hgvs.exceptions.HGVSError: - fn.exceptPass() + except hgvs.exceptions.HGVSError as e: + logger.debug("Except passed, %s", e) else: error = 'Using a transcript reference sequence to specify a variant position that lies outside of '\ 'the reference sequence is not HGVS-compliant. Instead re-submit ' + fn.valstr(report_gen) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True elif 'insertion length must be 1' in error: variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True elif 'base start position must be <= end position' in error: correction = copy.deepcopy(variant.input_parses) @@ -543,7 +545,7 @@ def structure_checks_n(variant, validator): # error = 'Interval start position ' + str(input_parses.posedit.pos.start) + ' > interval end # position ' + str(input_parses.posedit.pos.end) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True elif 'Cannot validate sequence of an intronic variant' in error: try: @@ -560,7 +562,7 @@ def structure_checks_n(variant, validator): 'outside of the reference sequence is not HGVS-compliant. Instead re-submit ' + \ fn.valstr(report_gen) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True # Create a specific minimal evm with no normalizer and no replace_reference @@ -575,7 +577,7 @@ def structure_checks_n(variant, validator): 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' 'available transcripts' % variant.input_parses.ac.split('.')[0]] variant.warnings.extend(errors) - Logger.warning(str(errors)) + logger.info(str(errors)) return True except ValueError as e: error = str(e) @@ -584,7 +586,7 @@ def structure_checks_n(variant, validator): variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( variant.input_parses.posedit.pos.end) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) @@ -599,22 +601,22 @@ def structure_checks_n(variant, validator): variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( variant.input_parses.posedit.pos.end) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True try: validator.vr.validate(output) except hgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True else: # All other variation try: validator.vr.validate(variant.input_parses) - except hgvs.exceptions.HGVSUnsupportedOperationError: - fn.exceptPass() + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + logger.debug("Except passed, %s", e) except hgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) # if re.search('Length implied by coordinates', error): @@ -645,22 +647,22 @@ def structure_checks_n(variant, validator): error = 'Interval start position ' + str( variant.input_parses.posedit.pos.start) + ' > interval end position ' + str( variant.input_parses.posedit.pos.end) - Logger.warning(error) + logger.warning(error) variant.warnings.append(error) return True variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True except hgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: error = error + ' (' + variant.input_parses.ac + ')' variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True return False diff --git a/VariantValidator/modules/utils.py b/VariantValidator/modules/utils.py index ec3c0a42..a87c8b67 100644 --- a/VariantValidator/modules/utils.py +++ b/VariantValidator/modules/utils.py @@ -2,12 +2,12 @@ from Bio.Alphabet import IUPAC import requests import functools -import traceback -import sys -from .logger import Logger +import logging import re import copy +logger = logging.getLogger(__name__) + def handleCursor(func): """ @@ -76,19 +76,6 @@ def remove_reference(hgvs_nucleotide): return hgvs_nucleotide_refless -def exceptPass(validation=None): - exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - tbk = [str(exc_type), str(exc_value), str(te)] - er = str('\n'.join(tbk)) - if last_traceback: - Logger.warning( - "Except pass for " + str(exc_type) + " " + str(exc_value) + " at line " + str(last_traceback.tb_lineno)) - else: - Logger.warning("Except pass for " + str(exc_type) + " " + str(exc_value)) - Logger.debug(er) - - def user_input(query): """ user_input diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index 73c49dd3..b7ad4bff 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -1,7 +1,8 @@ -import os -from .logger import Logger +import logging import json +logger = logging.getLogger(__name__) + class ValOutput(object): """This object will hold the all final, validated outputs (Variant objects) and provide methods to return this @@ -20,6 +21,7 @@ def format_as_dict(self, with_meta=True): validation_intergenic_counter = 0 if len(self.output_list) == 0: + logger.warning("No variants available to output") validation_output['flag'] = 'empty_result' for variant in self.output_list: @@ -125,12 +127,6 @@ def add_meta(self): """ metadata = {} - if os.environ.get("ADD_LOGS") == "True": - logs = [] - for l in Logger.getString().split("\n"): - logs.append(l) - metadata["logs"] = logs - # metadata["variant"] = batch_variant # original input string to validate function # metadata["assembly"] = selected_assembly # metadata["transcripts"] = select_transcripts diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index 30a57331..ba0e86bf 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -1,7 +1,9 @@ +import logging from .utils import handleCursor -from .logger import Logger from . import vvDBInit +logger = logging.getLogger(__name__) + class Mixin(vvDBInit.Mixin): """ @@ -13,7 +15,7 @@ def execute(self, query): self.cursor.execute(query) row = self.cursor.fetchone() if row is None: - Logger.debug("No data returned from query " + str(query)) + logger.debug("No data returned from query " + str(query)) row = ['none', 'No data'] return row @@ -22,7 +24,7 @@ def execute_all(self, query): self.cursor.execute(query) rows = self.cursor.fetchall() if not rows: - Logger.debug("No data returned from query " + str(query)) + logger.debug("No data returned from query " + str(query)) rows = ['none', 'No data'] return rows diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 41247647..d624c25a 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -1,9 +1,11 @@ -from .logger import Logger from . import utils from .utils import handleCursor from . import vvDBInsert import re import hgvs.exceptions +import logging + +logger = logging.getLogger(__name__) class Database(vvDBInsert.Mixin): @@ -29,7 +31,7 @@ def query_with_fetchone(self, entry): row = self.cursor.fetchone() if row is None: row = ['none', 'No data'] - Logger.debug("No data returned from query " + str(query)) + logger.debug("No data returned from query " + str(query)) return row # From data diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index c1a23f4d..658db0c0 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -1,16 +1,18 @@ import re import copy -from .logger import Logger +import logging import hgvs import hgvs.validator from . import vvMixinInit from . import seq_data from . import hgvs_utils -from Bio import Entrez,SeqIO +from Bio import Entrez, SeqIO from . import utils as fn from hgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError +logger = logging.getLogger(__name__) + class Mixin(vvMixinInit.Mixin): """ @@ -178,7 +180,7 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): # If the gene symbol is not in the list, the value False will be returned utilise_gap_code = seq_data.gap_black_list(gene_symbol) # Warn gap code in use - Logger.warning("gap_compensation_myevm = " + str(utilise_gap_code)) + logger.debug("gap_compensation_myevm = " + str(utilise_gap_code)) if utilise_gap_code is True and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' @@ -332,7 +334,6 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): break except Exception as e: err += str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' - print(e) continue if chr_num_val and chr_num != 'false': try: @@ -340,7 +341,6 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): break except Exception as e: err += str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' - print(e) continue elif chr_num_val is False and chr_num == 'false': try: @@ -348,7 +348,6 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): break except Exception as e: err += str(e) + "/" + hgvs_c.ac + "/" + option[1] + '~' - print(e) continue return hgvs_genomic, err @@ -455,7 +454,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): genomic_gap_variant = None # Warn of variant location wrt the gap if 'Length implied by coordinates must equal sequence deletion length' in str(e): - Logger.warning('Variant is proximal to the flank of a genomic gap') + logger.info('Variant is proximal to the flank of a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: hn.normalize(genomic_gap_variant) @@ -476,7 +475,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): genomic_gap_variant = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) if error_type_1 == 'base start position must be <= end position': - Logger.warning('Variant is fully within a genomic gap') + logger.info('Variant is fully within a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) # Logic @@ -494,7 +493,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): if 'Length implied by coordinates must equal sequence deletion length' in str(e): # This will only happen if the variant is flanking the gap but is # not inside the gap - Logger.warning('Variant is on the flank of a genomic gap but not within the gap') + logger.info('Variant is on the flank of a genomic gap but not within the gap') gap_start = genomic_gap_variant.posedit.pos.start.base - 1 gap_end = genomic_gap_variant.posedit.pos.end.base + 1 genomic_gap_variant.posedit.pos.start.base = gap_start @@ -506,7 +505,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): try: genomic_gap_variant.posedit.edit.alt = '' except Exception as e: - pass + logger.debug("Except passed, %s", e) # Should be a delins so will normalize statically and replace the reference bases genomic_gap_variant = hn.normalize(genomic_gap_variant) @@ -517,8 +516,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): try: transcript_gap_variant = hn.normalize(transcript_gap_variant) except hgvs.exceptions.HGVSUnsupportedOperationError as e: - if ' Unsupported normalization of variants spanning the UTR-exon boundary' in str(e): - pass + logger.debug("Except passed, %s", e) # if NM_ need the n. position if str(hgvs_c.ac).startswith('NM_'): @@ -657,8 +655,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): try: genomic_gap_variant.posedit.edit.alt = '' except Exception as e: - if str(e) == "'Dup' object has no attribute 'alt'": - pass + logger.debug("Except passed, %s", e) # Should be a delins so will normalize statically and replace the reference bases genomic_gap_variant = hn.normalize(genomic_gap_variant) # Static map to c. and static normalize @@ -781,7 +778,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) except Exception as e: error = str(e) - Logger.warning('Ins mapping error in myt_to_g ' + error) + logger.warning('Ins mapping error in myt_to_g ' + error) return hgvs_genomic @@ -822,7 +819,6 @@ def search_in_options(hgvs_genomic, seqtype, chr_num_val, final=False): break except Exception as e: err += str(e) + "/" + hgvs_c.ac + "/" + op[1] + '~' - print(e) continue chr_num = seq_data.supported_for_mapping(str(op[1]), variant.primary_assembly) if chr_num_val and chr_num != 'false': @@ -831,7 +827,6 @@ def search_in_options(hgvs_genomic, seqtype, chr_num_val, final=False): break except Exception as e: err += str(e) + "/" + hgvs_c.ac + "/" + op[1] + '~' - print(e) continue elif not chr_num_val and chr_num == 'false': try: @@ -839,7 +834,6 @@ def search_in_options(hgvs_genomic, seqtype, chr_num_val, final=False): break except Exception as e: err += str(e) + "/" + hgvs_c.ac + "/" + op[1] + '~' - print(e) continue return hgvs_genomic, err @@ -915,7 +909,7 @@ def search_in_options(hgvs_genomic, seqtype, chr_num_val, final=False): hgvs_genomic = variant.no_norm_evm.t_to_g(hgvs_c) except Exception as e: error = str(e) - Logger.warning('Ins mapping error in myt_to_g ' + error) + logger.warning('Ins mapping error in myt_to_g ' + error) return hgvs_genomic @@ -943,7 +937,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # If the gene symbol is not in the list, the value False will be returned utilise_gap_code = seq_data.gap_black_list(gene_symbol) # Warn gap code in use - Logger.warning("gap_compensation_mvm = " + str(utilise_gap_code)) + logger.debug("gap_compensation_mvm = " + str(utilise_gap_code)) if utilise_gap_code and (hgvs_c.posedit.edit.type == 'identity' or hgvs_c.posedit.edit.type == 'del' or hgvs_c.posedit.edit.type == 'delins' or hgvs_c.posedit.edit.type == 'dup' @@ -960,7 +954,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): except hgvs.exceptions.HGVSError as e: error = str(e) if 'intronic variant' in error: - pass + logger.debug("Except passed, %s", e) elif 'Length implied by coordinates must equal sequence deletion length' in error and \ hgvs_c.ac.startswith('NR_'): hgvs_c.posedit.pos.end.base = hgvs_c.posedit.pos.start.base + len(hgvs_c.posedit.edit.ref) - 1 @@ -1126,7 +1120,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): genomic_gap_variant = None # Warn of variant location wrt the gap if 'Length implied by coordinates must equal sequence deletion length' in str(e): - Logger.warning('Variant is proximal to the flank of a genomic gap') + logger.info('Variant is proximal to the flank of a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) try: hn.normalize(genomic_gap_variant) @@ -1146,7 +1140,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): genomic_gap_variant = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) if error_type_1 == 'base start position must be <= end position': - Logger.warning('Variant is fully within a genomic gap') + logger.info('Variant is fully within a genomic gap') genomic_gap_variant = self.vm.t_to_g(stored_hgvs_c, hgvs_genomic.ac) # Logic @@ -1164,7 +1158,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): if 'Length implied by coordinates must equal sequence deletion length' in str(e): # This will only happen if the variant is flanking the gap but is # not inside the gap - Logger.warning('Variant is on the flank of a genomic gap but not within the gap') + logger.info('Variant is on the flank of a genomic gap but not within the gap') gap_start = genomic_gap_variant.posedit.pos.start.base - 1 gap_end = genomic_gap_variant.posedit.pos.end.base + 1 genomic_gap_variant.posedit.pos.start.base = gap_start @@ -1176,7 +1170,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): try: genomic_gap_variant.posedit.edit.alt = '' except Exception as e: - pass + logger.debug("Except passed, %s", e) # Should be a delins so will normalize statically and replace the reference bases genomic_gap_variant = hn.normalize(genomic_gap_variant) @@ -1185,8 +1179,8 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): if 'Length implied by coordinates must equal sequence deletion length' not in str(e): try: transcript_gap_variant = hn.normalize(transcript_gap_variant) - except hgvs.exceptions.HGVSUnsupportedOperationError: - pass + except hgvs.exceptions.HGVSUnsupportedOperationError as e: + logger.debug("Except passed, %s", e) # if NM_ need the n. position if str(hgvs_c.ac).startswith('NM_'): @@ -1323,7 +1317,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): try: genomic_gap_variant.posedit.edit.alt = '' except Exception as e: - pass + logger.debug("Except passed, %s", e) # Should be a delins so will normalize statically and replace the reference bases genomic_gap_variant = hn.normalize(genomic_gap_variant) # Static map to c. and static normalize @@ -1445,7 +1439,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) except Exception as e: error = str(e) - Logger.warning('Ins mapping error in myt_to_g ' + error) + logger.warning('Ins mapping error in myt_to_g ' + error) return hgvs_genomic @@ -1790,8 +1784,7 @@ def merge_hgvs_3pr(self, hgvs_variant_list, hn): try: hgvs_v = self.hp.parse_hgvs_variant(hgvs_v) except Exception as e: - print(e) - pass + logger.debug("Except passed, %s" % e) # Validate self.vr.validate(hgvs_v) # Let hgvs errors deal with invalid variants and not hgvs objects @@ -1822,8 +1815,8 @@ def merge_hgvs_3pr(self, hgvs_variant_list, hn): raise fn.mergeHGVSerror("Base-offset position submitted") if hgvs_v.posedit.pos.end.offset != 0: raise fn.mergeHGVSerror("Base-offset position submitted") - except AttributeError: - pass + except AttributeError as e: + logger.debug("Except passed, %s", e) # Normalize the variant (allow cross intron) which also adds the reference sequence (?) hgvs_v = hn.normalize(hgvs_v) @@ -1878,13 +1871,13 @@ def merge_hgvs_3pr(self, hgvs_variant_list, hn): hgvs_delins = self.hp.parse_hgvs_variant(delins) try: hgvs_delins = self.vm.n_to_c(hgvs_delins) - except: - pass + except Exception as e: + logger.debug("Except passed, %s", e) # Normalize (allow variants crossing into different exons) try: hgvs_delins = hn.normalize(hgvs_delins) - except HGVSUnsupportedOperationError: - pass + except HGVSUnsupportedOperationError as e: + logger.debug("Except passed, %s", e) return hgvs_delins def merge_hgvs_5pr(self, hgvs_variant_list): @@ -1900,8 +1893,8 @@ def merge_hgvs_5pr(self, hgvs_variant_list): # For testing include parser try: hgvs_v = self.hp.parse_hgvs_variant(hgvs_v) - except: - pass + except Exception as e: + logger.debug("Except passed, %s", e) # Validate self.vr.validate(hgvs_v) # Let hgvs errors deal with invalid variants and not hgvs objects @@ -1929,8 +1922,8 @@ def merge_hgvs_5pr(self, hgvs_variant_list): raise fn.mergeHGVSerror("Base-offset position submitted") if hgvs_v.posedit.pos.end.offset != 0: raise fn.mergeHGVSerror("Base-offset position submitted") - except AttributeError: - pass + except AttributeError as e: + logger.debug("Except passed, %s", e) # Normalize the variant (allow cross intron) which also adds the reference sequence (?) hgvs_v = self.reverse_hn.normalize(hgvs_v) @@ -1986,13 +1979,13 @@ def merge_hgvs_5pr(self, hgvs_variant_list): hgvs_delins = self.hp.parse_hgvs_variant(delins) try: hgvs_delins = self.vm.n_to_c(hgvs_delins) - except: - pass + except Exception as e: + logger.debug("Except passed, %s", e) # Normalize (allow variants crossing into different exons) try: hgvs_delins = self.reverse_hn.normalize(hgvs_delins) - except HGVSUnsupportedOperationError: - pass + except HGVSUnsupportedOperationError as e: + logger.debug("Except passed, %s", e) return hgvs_delins # def merge_pseudo_vcf(self, vcf_list, genome_build, hn): @@ -2305,8 +2298,8 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn): # normalize try: hgvs_refseqgene = hn.normalize(hgvs_refseqgene) - except: - pass + except Exception as e: + logger.debug("Except passed, %s", e) # split the description # Accessions rsg_ac = hgvs_refseqgene.ac diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 077fd1e5..407f67ba 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -4,9 +4,8 @@ import re import copy import sys -import traceback +import logging from hgvs.assemblymapper import AssemblyMapper -from .logger import Logger from . import hgvs_utils from . import utils as fn from . import seq_data @@ -18,6 +17,8 @@ from . import valoutput from .liftover import liftover +logger = logging.getLogger(__name__) + class Mixin(vvMixinConverters.Mixin): """ @@ -35,13 +36,13 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr :param transcript_set: 'refseq' or 'ensembl'. Currently only 'refseq' is supported :return: """ - Logger.info(batch_variant + ' : ' + selected_assembly) + logger.debug("Running validate with inputs %s and assembly %s", batch_variant, selected_assembly) if transcript_set == "refseq": self.alt_aln_method = 'splign' elif transcript_set == "ensembl": self.alt_aln_method = 'genebuild' - Logger.warning("Ensembl is currently not supported") + logger.warning("Ensembl is currently not supported") raise Exception("Ensembl is currently not supported") else: raise Exception("The transcriptSet variable '%s' is invalid, it must be 'refseq' or 'ensembl'" % @@ -80,6 +81,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr queries = queries.strip() query = Variant(queries) self.batch_list.append(query) + logger.info("Submitting variant with format %s", queries) # Create List to carry batch data output batch_out = [] @@ -97,10 +99,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr flag : gene """ - Logger.debug("Batch list length " + str(len(self.batch_list))) + logger.debug("Batch list length " + str(len(self.batch_list))) for my_variant in self.batch_list: - # Start timing - Logger.traceStart(my_variant) # Create Normalizers my_variant.hn = hgvs.normalizer.Normalizer(self.hdp, @@ -122,7 +122,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: # Note, ID is not touched. It is always the input variant description. # Quibble will be altered but id will not if type = g. - Logger.trace("Commenced validation of " + str(my_variant.quibble), my_variant) + logger.info("Started validation of %s (originally %s)", str(my_variant.quibble), + my_variant.original) if not my_variant.is_ascii(): chars, positions = my_variant.get_non_ascii() @@ -130,7 +131,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr 'Please remove this character and re-submit: A useful search function for ' \ 'Unicode characters can be found at https://unicode-search.net/' % (chars, positions) my_variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) continue # Remove whitespace @@ -138,7 +139,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if my_variant.quibble != my_variant.original: caution = 'Whitespace removed from variant description %s' % my_variant.original my_variant.warnings.append(caution) - Logger.info(caution) + logger.debug(caution) # Set the primary_assembly if not my_variant.primary_assembly: @@ -161,12 +162,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr primary_assembly = 'GRCh38' my_variant.warnings.append('Invalid genome build has been specified. Automap has selected ' 'the default build (GRCh38)') - Logger.warning( + logger.warning( 'Invalid genome build has been specified. Automap has selected the ' 'default build ' + my_variant.primary_assembly) else: primary_assembly = my_variant.primary_assembly - Logger.trace("Completed string formatting", my_variant) + logger.debug("Completed string formatting") toskip = format_converters.initial_format_conversions(my_variant, self, select_transcripts_dict_plus_version) @@ -183,7 +184,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: error = 'Variant description ' + my_variant.quibble + ' is not in an accepted format' my_variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) continue formatted_variant = my_variant.quibble @@ -193,14 +194,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgnc_gene_info = 'false' - Logger.trace("Variant input formatted, proceeding to validate.", my_variant) + logger.debug("Variant input formatted, proceeding to validate.") # Conversions # Conversions are not currently supported. The HGVS format for conversions # is rarely seen wrt genomic sequencing data and needs to be re-evaluated if 'con' in my_variant.quibble: my_variant.warnings.append('Gene conversions currently unsupported') - Logger.warning('Gene conversions currently unsupported') + logger.warning('Gene conversions currently unsupported') continue # Change RNA bases to upper case but nothing else @@ -218,7 +219,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr my_variant.hgvs_formatted = input_parses except hgvs.exceptions.HGVSError as e: my_variant.warnings.append(str(e)) - Logger.warning(str(e)) + logger.warning(str(e)) continue if 'LRG' in my_variant.hgvs_formatted.ac: @@ -252,14 +253,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Unable to map ' + my_variant.hgvs_formatted.ac + \ ' to an equivalent RefSeq transcript' my_variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) continue else: my_variant.warnings.append(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript ' + my_variant.quibble) - Logger.warning(str(trap_ens_in) + ' automapped to equivalent RefSeq ' + logger.info(str(trap_ens_in) + ' automapped to equivalent RefSeq ' 'transcript ' + my_variant.quibble) - Logger.trace("HVGS acceptance test passed", my_variant) + logger.debug("HVGS acceptance test passed") # Check whether supported genome build is requested for non g. descriptions mapable_assemblies = { @@ -302,7 +303,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Mapping of ' + formatted_variant + ' to genome assembly ' + \ primary_assembly + ' is not supported' my_variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) continue # Catch interval end > interval start @@ -317,19 +318,19 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create easy variant mapper (over variant mapper) and splign locked evm try: to_n = my_variant.evm.c_to_n(input_parses_copy) - except hgvs.exceptions.HGVSError: - fn.exceptPass() + except hgvs.exceptions.HGVSError as e: + logger.debug("Except passed, %s", e) else: if to_n.posedit.pos.end.base < to_n.posedit.pos.start.base: error = 'Interval end position < interval start position ' my_variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) continue elif my_variant.hgvs_formatted.posedit.pos.end.base < my_variant.hgvs_formatted.posedit.pos.start.base: error = 'Interval end position ' + str(my_variant.hgvs_formatted.posedit.pos.end.base) + \ ' < interval start position ' + str(my_variant.hgvs_formatted.posedit.pos.start.base) my_variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) continue # Catch missing version number in refseq @@ -338,26 +339,26 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'RefSeq variant accession numbers MUST include a version number' my_variant.warnings.append(error) continue - Logger.trace("HVGS interval/version mapping complete", my_variant) + logger.debug("HVGS interval/version mapping complete") # handle LRG inputs if my_variant.refsource == 'LRG': format_converters.lrg_to_refseq(my_variant, self) - Logger.trace("LRG check for conversion to refseq completed", my_variant) + logger.debug("LRG check for conversion to refseq completed") # Additional Incorrectly input variant capture training if my_variant.refsource == 'RefSeq': toskip = use_checking.refseq_common_mistakes(my_variant) if toskip: continue - Logger.trace("Passed 'common mistakes' catcher", my_variant) + logger.debug("Passed 'common mistakes' catcher") # Primary validation of the input toskip = use_checking.structure_checks(my_variant, self) if toskip: continue - Logger.trace("Variant structure and contents searches passed", my_variant) + logger.debug("Variant structure and contents searches passed") # Mitochondrial variants toskip = format_converters.mitochondrial(my_variant, self) @@ -396,7 +397,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr my_variant.output_type_flag = 'gene' my_variant.description = hgnc_gene_info my_variant.primary_assembly = primary_assembly - Logger.traceEnd(my_variant) + logger.info("Completed initial validation for %s", my_variant.quibble) # Report errors to User and VV admin except KeyboardInterrupt: raise @@ -405,16 +406,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr error = 'Validation error' my_variant.warnings.append(error) exc_type, exc_value, last_traceback = sys.exc_info() - te = traceback.format_exc() - tbk = [str(exc_type), str(exc_value), str(te)] - er = str('\n'.join(tbk)) - Logger.error(str(exc_type) + " " + str(exc_value)) - Logger.debug(er) + logger.error(str(exc_type) + " " + str(exc_value)) raise # Outside the for loop ###################### - Logger.trace("End of for loop") + logger.debug("End of 1st for loop") # order the rows by_order = sorted(self.batch_list, key=lambda x: x.order) @@ -649,8 +646,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if refseqgene_variant != '': try: refseqgene_variant = fn.valstr(hgvs_refseqgene_variant) - except: - fn.exceptPass() + except Exception as e: + logger.debug("Except passed, %s", e) # Add single letter AA code to protein descriptions predicted_protein_variant_dict = {"tlr": str(predicted_protein_variant), "slr": ''} @@ -662,8 +659,8 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr re_parse_protein = self.hp.parse_hgvs_variant(format_p) re_parse_protein_single_aa = fn.single_letter_protein(re_parse_protein) predicted_protein_variant_dict["slr"] = str(re_parse_protein_single_aa) - except hgvs.exceptions.HGVSParseError: - fn.exceptPass() + except hgvs.exceptions.HGVSParseError as e: + logger.debug("Except passed, %s", e) else: predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) @@ -754,7 +751,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr except BaseException: # Debug mode exc_type, exc_value, last_traceback = sys.exc_info() - Logger.critical(str(exc_type) + " " + str(exc_value)) + logger.critical(str(exc_type) + " " + str(exc_value)) raise fn.VariantValidatorError('Validation error') def gene2transcripts(self, query): @@ -794,8 +791,8 @@ def gene2transcripts(self, query): hgnc = tx_info[6] found_res = True break - except hgvs.exceptions.HGVSError: - pass + except hgvs.exceptions.HGVSError as e: + logger.debug("Except passed, %s", e) if not found_res: return {'error': 'No transcript definition for (tx_ac=' + hgnc + ')'} @@ -897,7 +894,7 @@ def hgvs2ref(self, query): :param query: :return: """ - Logger.info('Fetching reference sequence for ' + query) + logger.debug('Fetching reference sequence for ' + query) # Dictionary to store the data reference = {'variant': query, 'start_position': '', @@ -940,7 +937,7 @@ def hgvs2ref(self, query): sequence = self.sf.fetch_seq(accession, start, end) except Exception as e: reference['error'] = str(e) - Logger.warning(str(e)) + logger.warning(str(e)) else: reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) @@ -963,7 +960,7 @@ def _get_transcript_info(self, variant): reason = "VariantValidator cannot recover information for transcript " + str( hgvs_vt.ac) + ' because it is not available in the Universal Transcript Archive' variant.warnings.append(reason) - Logger.warning(str(reason) + ": " + str(error)) + logger.warning(str(reason) + ": " + str(error)) return True # Get accurate transcript descriptions from the relevant databases @@ -983,7 +980,7 @@ def _get_transcript_info(self, variant): # Open a hgvs exception log file in append mode error = entry['description'] variant.warnings.extend([str(error), 'A Database error occurred, please contact admin']) - Logger.warning(str(error) + ": A Database error occurred, please contact admin") + logger.warning(str(error) + ": A Database error occurred, please contact admin") return True # If the accession key is found @@ -995,17 +992,17 @@ def _get_transcript_info(self, variant): except hgvs.exceptions.HGVSError: error = 'Transcript %s is not currently supported' % accession variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True except fn.ObsoleteSeqError as e: error = 'Unable to assign transcript identity records to %s. %s' % (accession, str(e)) variant.warnings.append(error) - Logger.warning(error) + logger.info(error) return True except fn.DatabaseConnectionError as e: error = '%s. Please try again later and if the problem persists contact admin.' % str(e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True variant.description = entry['description'] variant.gene_symbol = entry['hgnc_symbol'] @@ -1018,12 +1015,12 @@ def _get_transcript_info(self, variant): except fn.ObsoleteSeqError as e: error = 'Unable to assign transcript identity records to %s. %s' % (accession, str(e)) variant.warnings.append(error) - Logger.warning(error) + logger.info(error) return True except fn.DatabaseConnectionError as e: error = '%s. Please try again later and if the problem persists contact admin.' % str(e) variant.warnings.append(error) - Logger.warning(error) + logger.warning(error) return True variant.description = entry['description'] variant.gene_symbol = entry['hgnc_symbol'] @@ -1033,7 +1030,7 @@ def _get_transcript_info(self, variant): # Open a hgvs exception log file in append mode error = 'Unknown error type' variant.warnings.extend([error, ': A Database error occurred, please contact admin']) - Logger.warning(error) + logger.warning(error) return True # Ensembl databases @@ -1051,7 +1048,7 @@ def _get_transcript_info(self, variant): # Open a hgvs exception log file in append mode error = entry['description'] variant.warnings.extend([str(error), ': A Database error occurred, please contact admin']) - Logger.warning(str(error)) + logger.warning(str(error)) return True # If the accession key is found @@ -1067,12 +1064,12 @@ def _get_transcript_info(self, variant): try: entry = self.db.data_add(accession=accession, validator=self) except Exception as e: - Logger.warning(str(e)) + logger.info(str(e)) error = 'Unable to assign transcript identity records to ' + accession + \ ', potentially an obsolete record or there is an issue retrieving data from NCBI. ' \ 'Please try again later and if the problem persists contact admin' variant.warnings.append(error) - Logger.warning(error) + logger.info(error) return True variant.description = entry['description'] @@ -1081,6 +1078,6 @@ def _get_transcript_info(self, variant): # Open a hgvs exception log file in append mode error = 'Unknown error type' variant.warnings.extend([error, ': A Database error occurred, please contact admin']) - Logger.warning(error) + logger.warning(error) return True return False diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 7fea22d8..fb9a9963 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -57,20 +57,6 @@ def __init__(self): config = ConfigParser() config.read(CONFIG_DIR) - # The custom vvLogging module will set itself up using the VALDIATOR_DEBUG environment variable. - level_string = config["logging"]['level'] - console_string = config["logging"]['console'] - if console_string.lower() == "true": - console_string = "console" - file_string = config["logging"]['file'] - if file_string.lower() == "true": - file_string = "file" - trace_string = config["logging"]['trace'] - if trace_string.lower() == "true": - trace_string = "trace" - log_string = level_string+" "+console_string+" "+file_string+" "+trace_string - os.environ["VALIDATOR_DEBUG"] = log_string - # Handle databases self.entrezID = config["EntrezID"]["entrezID"] self.seqrepoVersion = config["seqrepo"]["version"] diff --git a/VariantValidator/settings.py b/VariantValidator/settings.py index 52e2856c..6ec5d442 100644 --- a/VariantValidator/settings.py +++ b/VariantValidator/settings.py @@ -1,3 +1,40 @@ import os CONFIG_DIR = os.path.join(os.path.expanduser('~'), '.variantvalidator') + +LOG_FILE = os.path.join(os.path.expanduser('~'), '.vv_errorlog') + +LOGGING_CONFIG = { + 'version': 1, + 'formatters': { + 'simple': { + 'class': 'logging.Formatter', + 'format': '%(levelname)s: %(message)s' + }, + 'detailed': { + 'class': 'logging.Formatter', + 'format': '%(asctime)s %(name)-5s %(funcName)-10s (line %(lineno)d) %(levelname)-8s %(message)s' + } + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'level': 'DEBUG', + 'formatter': 'simple' + }, + 'file': { + 'class': 'logging.FileHandler', + 'level': 'ERROR', + 'filename': LOG_FILE, + 'mode': 'a', + 'formatter': 'detailed', + }, + }, + 'loggers': { + 'VariantValidator': { + 'level': 'DEBUG', + 'handlers': ['console', 'file'], + 'propagate': 'no', + } + } +} diff --git a/VariantValidator/update_vv_db.py b/VariantValidator/update_vv_db.py index a3fb49ab..8e63be43 100644 --- a/VariantValidator/update_vv_db.py +++ b/VariantValidator/update_vv_db.py @@ -1,10 +1,13 @@ # -*- coding: utf-8 -*- import requests import copy +import logging from configparser import ConfigParser from .modules import vvDatabase from . import configure +logger = logging.getLogger(__name__) + def connect(): config = ConfigParser() @@ -17,6 +20,7 @@ def connect(): 'database': config["mysql"]["database"], 'raise_on_warnings': True } + logger.debug("Connecting to database with config %s", db_config) # Create database access objects db = vvDatabase.Database(db_config) return db @@ -31,6 +35,7 @@ def delete(): db.execute('DELETE FROM LRG_transcripts') db.execute('DELETE FROM LRG_proteins') db.execute('DELETE FROM LRG_RSG_lookup') + logger.debug("Deleted data from all tables including transcript_info") def update(): @@ -42,7 +47,7 @@ def update(): def update_refseq(dbcnx): - print('Updating RefSeqGene no Missmatch MySQL data') + logger.debug('Updating RefSeqGene no Missmatch MySQL data') # Download data from RefSeqGene # Download data @@ -129,7 +134,7 @@ def update_refseq(dbcnx): line.append(rsg_to_symbol[identifier]['symbol']) line.append(rsg_to_symbol[identifier]['gene_id']) except KeyError: - print("Can't identify gene symbol for %s" % line[0]) + logger.info("Can't identify gene symbol for %s", line[0]) missing.append(line[0]) # Open a text file to be used as a simple database and write the database @@ -165,14 +170,14 @@ def update_refseq(dbcnx): line[10] = current_symbol dbcnx.update_refseqgene_loci(line) - print('Total NG_ to NC_ alignments = ' + str(total_rsg_to_nc)) - print('Gaps within NG_ to NC_ alignments = ' + str(total_rsg_to_nc_rejected)) + logger.info('Total NG_ to NC_ alignments = ' + str(total_rsg_to_nc)) + logger.info('Gaps within NG_ to NC_ alignments = ' + str(total_rsg_to_nc_rejected)) return def update_lrg(dbcnx): - print('Updating LRG lookup tables') + logger.debug('Updating LRG lookup tables') lr2rs_download = requests.get('http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt') lr2rs = lr2rs_download.text.strip().split('\n') @@ -199,7 +204,7 @@ def update_lrg(dbcnx): # LRG_ID RefSeqTranscriptID # LRG_T2LRG_P - print('Update LRG and LRG_transcript lookup tables') + logger.debug('Update LRG and LRG_transcript lookup tables') # Populate lists lrg_rs_lookup (LRG to RefSeqGene) and lrg_t2nm_ (LRG Transcript to RefSeq Transcript) for line in lr2rs: if line.startswith('#'): @@ -224,7 +229,7 @@ def update_lrg(dbcnx): # update database dbcnx.update_lrgt_rst(lrgtx_to_rst_id) - print('Update LRG protein lookup table') + logger.debug('Update LRG protein lookup table') # Populate LRG protein RefSeqProtein lokup table for line in lr_t2p: if line.startswith('#'): @@ -236,7 +241,7 @@ def update_lrg(dbcnx): # update LRG to RefSeqGene database dbcnx.update_lrg_p_rs_p_lookup(lrg_p, rs_p) - print('LRG lookup tables updated') + logger.info('LRG lookup tables updated') return diff --git a/configuration/default.ini b/configuration/default.ini index 50ca29a2..a3a4fdbf 100644 --- a/configuration/default.ini +++ b/configuration/default.ini @@ -16,14 +16,10 @@ user = USERNAME password = PASSWORD [logging] -#Levels control verbosity and can be set to "critical" "error" "warning" "info" or "debug". -level = info -#level = debug -console = true -# Beware - file logging has permission issues. -file = false -# Trace is used for debugging to track variants through the validator function -trace = false +#Levels control verbosity and can be set to "CRITICAL" "ERROR" "WARNING" "INFO" or "DEBUG". +log = True +console = INFO +file = WARNING [EntrezID] entrezid = admin@variantvalidator.org diff --git a/configuration/travis.ini b/configuration/travis.ini index 3840a517..d9c34e76 100644 --- a/configuration/travis.ini +++ b/configuration/travis.ini @@ -16,14 +16,10 @@ user = anonymous password = anonymous [logging] -#Levels control verbosity and can be set to "critical" "error" "warning" "info" or "debug". -level = info -#level = debug -console = true -# Beware - file logging has permission issues. -file = false -# Trace is used for debugging to track variants through the validator function -trace = false +#Levels control verbosity and can be set to "CRITICAL" "ERROR" "WARNING" "INFO" or "DEBUG". +log = True +console = DEBUG +file = ERROR [EntrezID] entrezid = admin@variantvalidator.org diff --git a/docs/MANUAL.md b/docs/MANUAL.md index f129ec6e..09a8ddba 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -31,10 +31,9 @@ user = USERNAME password = PASSWORD [logging] -level = info -console = true -file = false -trace = false +log = True +console = INFO +file = WARNING [EntrezID] entrezid = admin@variantvalidator.org @@ -47,10 +46,17 @@ The values in capitals must be replaced for Variant Validator to run. By default the edited configuration will be placed in the users home directory (`~/.variantvalidator`), this location can be changed for all users by editing the `VariantValidator/settings.py` file. -#####Liftover +####Liftover If the UCSC Liftover [files](http://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/) have been previously downloaded their location can be set within the configuration file. By default the necessary files will be downloaded automatically when first requested. +####Logging + +By default Variant Validator will log to both the console and to a file, the output level for each can be set in the configuration file. +The levels control verbosity and can be set to "CRITICAL", "ERROR", "WARNING", "INFO" or "DEBUG". To turn off logging, set the log configuration to "False". The log file name and +log options can be changed for all users by editing the `VariantValidator/settings.py` file. By default the file log is +set to output in the users home directory (`~/.vv_errorlog`). + ## Database updates To import the initial data into the Validator MySQL database, run the following script: From 323fc11ee1467cc2138242d31019e19aede741ea Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 11 Jul 2019 13:33:53 +0100 Subject: [PATCH 176/223] Updated travis file --- configuration/travis.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configuration/travis.ini b/configuration/travis.ini index d9c34e76..2ed10513 100644 --- a/configuration/travis.ini +++ b/configuration/travis.ini @@ -18,7 +18,7 @@ password = anonymous [logging] #Levels control verbosity and can be set to "CRITICAL" "ERROR" "WARNING" "INFO" or "DEBUG". log = True -console = DEBUG +console = INFO file = ERROR [EntrezID] From 9a336c1a4cff5c9a03a6a115ea26768c47b12eab Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 11 Jul 2019 14:56:09 +0100 Subject: [PATCH 177/223] Bug fix in mixinConverters --- VariantValidator/modules/vvMixinConverters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 658db0c0..5d4c4997 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -574,7 +574,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): 1): if i in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[i]) - else: + elif i in list(ref_base_dict.keys()): alternate_sequence_bases.append(ref_base_dict[i]) alternate_sequence = ''.join(alternate_sequence_bases) alternate_sequence = alternate_sequence.replace('X', '') From 35be838be9f191061994d879c39ff5ad333f37cf Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 11 Jul 2019 14:56:53 +0100 Subject: [PATCH 178/223] Update travis file --- configuration/travis.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configuration/travis.ini b/configuration/travis.ini index 2ed10513..d26b1510 100644 --- a/configuration/travis.ini +++ b/configuration/travis.ini @@ -17,7 +17,7 @@ password = anonymous [logging] #Levels control verbosity and can be set to "CRITICAL" "ERROR" "WARNING" "INFO" or "DEBUG". -log = True +log = False console = INFO file = ERROR From 9503ca12530b73e62b78c6b9b2c81986d90cb73d Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 11 Jul 2019 15:31:25 +0100 Subject: [PATCH 179/223] Updated travis file --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 84966c1d..9f4c753e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,7 +41,7 @@ install: # Test dependencies - pip install -r requirements_dev.txt - - python setup.py install + - pip install . # Set up validator database - mysql validator < configuration/empty_vv_db.sql From b310f0df6fdab83380def978c5dde2e412ba3f6d Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 11 Jul 2019 15:32:25 +0100 Subject: [PATCH 180/223] Added possible options for variants 65, 138, 169 and 232 that get different results that are all biologically okay --- tests/test_inputs.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/tests/test_inputs.py b/tests/test_inputs.py index f558b549..081984c1 100644 --- a/tests/test_inputs.py +++ b/tests/test_inputs.py @@ -1970,7 +1970,13 @@ def test_variant65(self): assert results['NM_014249.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.3:c.946_949=' assert results['NM_014249.3:c.946_949=']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8034_8037=' assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': 'chr15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}} + result_options = [ + {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', + 'vcf': {'chr': 'chr15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}}, + {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', + 'vcf': {'alt': 'GACC', 'chr': 'chr15', 'pos': '71813587', 'ref': 'GACC'}} + ] + self.assertIn(results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'], result_options) assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}} assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} @@ -4316,7 +4322,13 @@ def test_variant138(self): assert results['NM_000949.5:c.*6523_*6524=']['hgvs_lrg_variant'] == '' assert results['NM_000949.5:c.*6523_*6524=']['hgvs_transcript_variant'] == 'NM_000949.5:c.*6523_*6524=' assert results['NM_000949.5:c.*6523_*6524=']['hgvs_refseqgene_variant'] == 'NG_029042.1:g.177156_177157=' - assert results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', 'vcf': {'chr': 'chr5', 'ref': 'AAGA', 'pos': '35058666', 'alt': 'AAGA'}} + result_options = [ + {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', + 'vcf': {'chr': 'chr5', 'ref': 'AAGA', 'pos': '35058666', 'alt': 'AAGA'}}, + {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', + 'vcf': {'alt': 'AG', 'chr': 'chr5', 'pos': '35058667', 'ref': 'AG'}} + ] + self.assertIn(results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['hg19'], result_options) assert 'hg38' not in list(results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci'].keys()) assert results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', 'vcf': {'chr': '5', 'ref': 'AAGA', 'pos': '35058666', 'alt': 'AAGA'}} assert 'grch38' not in list(results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci'].keys()) @@ -5757,7 +5769,13 @@ def test_variant169(self): assert results['NM_000949.5:c.*6525_*6526=']['hgvs_lrg_variant'] == '' assert results['NM_000949.5:c.*6525_*6526=']['hgvs_transcript_variant'] == 'NM_000949.5:c.*6525_*6526=' assert results['NM_000949.5:c.*6525_*6526=']['hgvs_refseqgene_variant'] == 'NG_029042.1:g.177158_177159=' - assert results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'ACAAG', 'pos': '35058664', 'alt': 'ACAAG'}} + result_options = [ + {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', + 'vcf': {'chr': 'chr5', 'ref': 'ACAAG', 'pos': '35058664', 'alt': 'ACAAG'}}, + {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', + 'vcf': {'alt': 'CA', 'chr': 'chr5', 'pos': '35058665', 'ref': 'CA'}} + ] + self.assertIn(results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['hg19'], result_options) assert 'hg38' not in list(results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci'].keys()) assert results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', 'vcf': {'chr': '5', 'ref': 'ACAAG', 'pos': '35058664', 'alt': 'ACAAG'}} assert 'grch38' not in list(results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci'].keys()) @@ -9446,7 +9464,13 @@ def test_variant232(self): assert results['NM_014249.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.3:c.946_949=' assert results['NM_014249.3:c.946_949=']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8034_8037=' assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': 'chr15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}} + result_options = [ + {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', + 'vcf': {'chr': 'chr15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}}, + {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', + 'vcf': {'alt': 'GACC', 'chr': 'chr15', 'pos': '71813587', 'ref': 'GACC'}} + ] + self.assertIn(results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'], result_options) assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}} assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} From 47300c0f9064f73c714174358bce54f05629306c Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 11 Jul 2019 15:35:05 +0100 Subject: [PATCH 181/223] Updated travis config --- configuration/travis.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configuration/travis.ini b/configuration/travis.ini index d26b1510..4e8d9cbc 100644 --- a/configuration/travis.ini +++ b/configuration/travis.ini @@ -17,8 +17,8 @@ password = anonymous [logging] #Levels control verbosity and can be set to "CRITICAL" "ERROR" "WARNING" "INFO" or "DEBUG". -log = False -console = INFO +log = True +console = WARNING file = ERROR [EntrezID] From ec3fcc8c2dc704d03184297e47d65da855c83c6f Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 11 Jul 2019 16:19:17 +0100 Subject: [PATCH 182/223] Updated travis file --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9f4c753e..4c0cdb5e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,7 +41,7 @@ install: # Test dependencies - pip install -r requirements_dev.txt - - pip install . + - pip install -e . # Set up validator database - mysql validator < configuration/empty_vv_db.sql From 6d157cd0c1bbde42c3a6b4cca2d8a6b67f9bed91 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 11 Jul 2019 16:23:04 +0100 Subject: [PATCH 183/223] Fixed more instances of same variant_6 bug --- VariantValidator/modules/vvMixinConverters.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 5d4c4997..8cfcd4c3 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -716,7 +716,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): transcript_gap_n.posedit.pos.end.base + 1, 1): if i in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[i]) - else: + elif i in list(ref_base_dict.keys()): alternate_sequence_bases.append(ref_base_dict[i]) alternate_sequence = ''.join(alternate_sequence_bases) alternate_sequence = alternate_sequence.replace('X', '') @@ -1238,7 +1238,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): 1): if i in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[i]) - else: + elif i in list(ref_base_dict.keys()): alternate_sequence_bases.append(ref_base_dict[i]) alternate_sequence = ''.join(alternate_sequence_bases) alternate_sequence = alternate_sequence.replace('X', '') @@ -1378,7 +1378,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): transcript_gap_n.posedit.pos.end.base + 1, 1): if i in list(alt_base_dict.keys()): alternate_sequence_bases.append(alt_base_dict[i]) - else: + elif i in list(ref_base_dict.keys()): alternate_sequence_bases.append(ref_base_dict[i]) alternate_sequence = ''.join(alternate_sequence_bases) alternate_sequence = alternate_sequence.replace('X', '') From fcc174711e36722df8303142c5b4bd0506d3ed5a Mon Sep 17 00:00:00 2001 From: TeriForey Date: Thu, 11 Jul 2019 16:59:20 +0100 Subject: [PATCH 184/223] Updated gene symbol adding to database to resemble v0 and updated tests accordingly --- VariantValidator/modules/vvDatabase.py | 40 +++++----- tests/test_inputs.py | 104 ++++++++++++------------- 2 files changed, 72 insertions(+), 72 deletions(-) diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index d624c25a..eae563e1 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -86,6 +86,8 @@ def update_transcript_info_record(self, accession, validator): version = record.id description = record.description + genbank_symbol = str(record.features[1].qualifiers['gene'][0]) + # Although it is obsolete, might still be in UTA database so would work in our case # if 'comment' in record.annotations: # comment = record.annotations['comment'] @@ -113,30 +115,28 @@ def update_transcript_info_record(self, accession, validator): raise utils.DatabaseConnectionError("Cannot retrieve data from UTA database") uta_symbol = str(uta_info[6]) + symbol = uta_symbol if uta_symbol == '': - raise utils.ObsoleteSeqError("Cannot find UTA symbol, accession is likely obsolete") - - # First perform a search against the input gene symbol or the symbol inferred from UTA - initial = utils.hgnc_rest(path="/fetch/symbol/" + uta_symbol) + # raise utils.ObsoleteSeqError("Cannot find UTA symbol, accession is likely obsolete") + uta_symbol = 'unassigned' + symbol = genbank_symbol - if initial['error'] != 'false': - raise utils.DatabaseConnectionError("Unable to retrieve data from the HGNC database") + hgnc_symbol = symbol - # Check for a record - if str(initial['record']['response']['numFound']) != '0': - hgnc_symbol = uta_symbol - # No record found, is it a previous symbol? - else: - # Search hgnc rest to see if symbol is out of date - rest_data = utils.hgnc_rest(path="/search/prev_symbol/" + uta_symbol) - # If the name is correct no record will be found - if rest_data['error'] == 'false': - if int(rest_data['record']['response']['numFound']) == 0: - hgnc_symbol = uta_info[6] - else: + try: + # First perform a search against the input gene symbol or the symbol inferred from UTA + initial = utils.hgnc_rest(path="/fetch/symbol/" + symbol) + + # Check for a record + if str(initial['record']['response']['numFound']) == '0': + # Search hgnc rest to see if symbol is out of date + rest_data = utils.hgnc_rest(path="/search/prev_symbol/" + symbol) + # If the name is correct no record will be found + if rest_data['error'] == 'false' and int(rest_data['record']['response']['numFound']) != 0: hgnc_symbol = rest_data['record']['response']['docs'][0]['symbol'] - else: - hgnc_symbol = 'unassigned' + + except Exception: + logger.info("Unable to connect to HGNC with symbol %s", symbol) # Query information query_info = [version, description, variant, version, hgnc_symbol, uta_symbol] diff --git a/tests/test_inputs.py b/tests/test_inputs.py index 081984c1..f76eaefd 100644 --- a/tests/test_inputs.py +++ b/tests/test_inputs.py @@ -162,9 +162,9 @@ def test_variant6(self): assert results['NM_001145026.1:c.715A>G']['hgvs_lrg_variant'] == '' assert results['NM_001145026.1:c.715A>G']['hgvs_transcript_variant'] == 'NM_001145026.1:c.715A>G' assert results['NM_001145026.1:c.715A>G']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in list(results['NM_001145026.1:c.715A>G']['primary_assembly_loci'].keys()) + assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.80860617_80860618insG', 'vcf': {'chr': 'chr12', 'pos': '80860617', 'ref': 'C', 'alt': 'CG'}} assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': 'chr12', 'ref': 'A', 'pos': '80460707', 'alt': 'G'}} - assert 'grch37' not in list(results['NM_001145026.1:c.715A>G']['primary_assembly_loci'].keys()) + assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.80860617_80860618insG', 'vcf': {'chr': '12', 'pos': '80860617', 'ref': 'C', 'alt': 'CG'}} assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': '12', 'ref': 'A', 'pos': '80460707', 'alt': 'G'}} assert results['NM_001145026.1:c.715A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001138498.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001145026.1'} @@ -15204,56 +15204,56 @@ def test_variant283(self): assert 'grch38' not in list(results['NM_007159.2:c.1135+565del']['primary_assembly_loci'].keys()) assert results['NM_007159.2:c.1135+565del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.2'} - assert 'obsolete_record_3' in list(results.keys()) - assert results['obsolete_record_3']['hgvs_lrg_transcript_variant'] == '' - assert results['obsolete_record_3']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['obsolete_record_3']['alt_genomic_loci'], []) - assert results['obsolete_record_3']['gene_symbol'] == '' - assert results['obsolete_record_3']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} - assert results['obsolete_record_3']['submitted_variant'] == '3-57851007-AG-A' - assert results['obsolete_record_3']['genome_context_intronic_sequence'] == '' - assert results['obsolete_record_3']['hgvs_lrg_variant'] == '' - assert results['obsolete_record_3']['hgvs_transcript_variant'] == '' - assert results['obsolete_record_3']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in list(results['obsolete_record_3']['primary_assembly_loci'].keys()) - assert 'hg38' not in list(results['obsolete_record_3']['primary_assembly_loci'].keys()) - assert 'grch37' not in list(results['obsolete_record_3']['primary_assembly_loci'].keys()) - assert 'grch38' not in list(results['obsolete_record_3']['primary_assembly_loci'].keys()) - assert results['obsolete_record_3']['reference_sequence_records'] == '' - - assert 'obsolete_record_2' in list(results.keys()) - assert results['obsolete_record_2']['hgvs_lrg_transcript_variant'] == '' - assert results['obsolete_record_2']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['obsolete_record_2']['alt_genomic_loci'], []) - assert results['obsolete_record_2']['gene_symbol'] == '' - assert results['obsolete_record_2']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} - assert results['obsolete_record_2']['submitted_variant'] == '3-57851007-AG-A' - assert results['obsolete_record_2']['genome_context_intronic_sequence'] == '' - assert results['obsolete_record_2']['hgvs_lrg_variant'] == '' - assert results['obsolete_record_2']['hgvs_transcript_variant'] == '' - assert results['obsolete_record_2']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in list(results['obsolete_record_2']['primary_assembly_loci'].keys()) - assert 'hg38' not in list(results['obsolete_record_2']['primary_assembly_loci'].keys()) - assert 'grch37' not in list(results['obsolete_record_2']['primary_assembly_loci'].keys()) - assert 'grch38' not in list(results['obsolete_record_2']['primary_assembly_loci'].keys()) - assert results['obsolete_record_2']['reference_sequence_records'] == '' - - assert 'obsolete_record_1' in list(results.keys()) - assert results['obsolete_record_1']['hgvs_lrg_transcript_variant'] == '' - assert results['obsolete_record_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['obsolete_record_1']['alt_genomic_loci'], []) - assert results['obsolete_record_1']['gene_symbol'] == '' - assert results['obsolete_record_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} - assert results['obsolete_record_1']['submitted_variant'] == '3-57851007-AG-A' - assert results['obsolete_record_1']['genome_context_intronic_sequence'] == '' - assert results['obsolete_record_1']['hgvs_lrg_variant'] == '' - assert results['obsolete_record_1']['hgvs_transcript_variant'] == '' - assert results['obsolete_record_1']['hgvs_refseqgene_variant'] == '' - assert 'hg19' not in list(results['obsolete_record_1']['primary_assembly_loci'].keys()) - assert 'hg38' not in list(results['obsolete_record_1']['primary_assembly_loci'].keys()) - assert 'grch37' not in list(results['obsolete_record_1']['primary_assembly_loci'].keys()) - assert 'grch38' not in list(results['obsolete_record_1']['primary_assembly_loci'].keys()) - assert results['obsolete_record_1']['reference_sequence_records'] == '' + assert 'NM_007159.3:c.1135+565del' in list(results.keys()) + assert results['NM_007159.3:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007159.3:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' + self.assertCountEqual(results['NM_007159.3:c.1135+565del']['alt_genomic_loci'], []) + assert results['NM_007159.3:c.1135+565del']['gene_symbol'] == 'SLMAP' + assert results['NM_007159.3:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009090.2:p.?', 'slr': 'NP_009090.2:p.?'} + assert results['NM_007159.3:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' + assert results['NM_007159.3:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_007159.3):c.1135+565del' + assert results['NM_007159.3:c.1135+565del']['hgvs_lrg_variant'] == '' + assert results['NM_007159.3:c.1135+565del']['hgvs_transcript_variant'] == 'NM_007159.3:c.1135+565del' + assert results['NM_007159.3:c.1135+565del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007159.3:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_007159.3:c.1135+565del']['primary_assembly_loci'].keys()) + assert results['NM_007159.3:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_007159.3:c.1135+565del']['primary_assembly_loci'].keys()) + assert results['NM_007159.3:c.1135+565del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2'} + + assert 'NM_001304420.1:c.1186+424del' in list(results.keys()) + assert results['NM_001304420.1:c.1186+424del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001304420.1:c.1186+424del']['refseqgene_context_intronic_sequence'] == '' + self.assertCountEqual(results['NM_001304420.1:c.1186+424del']['alt_genomic_loci'], []) + assert results['NM_001304420.1:c.1186+424del']['gene_symbol'] == 'SLMAP' + assert results['NM_001304420.1:c.1186+424del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291349.1:p.?', 'slr': 'NP_001291349.1:p.?'} + assert results['NM_001304420.1:c.1186+424del']['submitted_variant'] == '3-57851007-AG-A' + assert results['NM_001304420.1:c.1186+424del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304420.1):c.1186+424del' + assert results['NM_001304420.1:c.1186+424del']['hgvs_lrg_variant'] == '' + assert results['NM_001304420.1:c.1186+424del']['hgvs_transcript_variant'] == 'NM_001304420.1:c.1186+424del' + assert results['NM_001304420.1:c.1186+424del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001304420.1:c.1186+424del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_001304420.1:c.1186+424del']['primary_assembly_loci'].keys()) + assert results['NM_001304420.1:c.1186+424del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_001304420.1:c.1186+424del']['primary_assembly_loci'].keys()) + assert results['NM_001304420.1:c.1186+424del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304420.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291349.1'} + + assert 'NM_001304421.1:c.1135+565del' in list(results.keys()) + assert results['NM_001304421.1:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001304421.1:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' + self.assertCountEqual(results['NM_001304421.1:c.1135+565del']['alt_genomic_loci'], []) + assert results['NM_001304421.1:c.1135+565del']['gene_symbol'] == 'SLMAP' + assert results['NM_001304421.1:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291350.1:p.?', 'slr': 'NP_001291350.1:p.?'} + assert results['NM_001304421.1:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' + assert results['NM_001304421.1:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304421.1):c.1135+565del' + assert results['NM_001304421.1:c.1135+565del']['hgvs_lrg_variant'] == '' + assert results['NM_001304421.1:c.1135+565del']['hgvs_transcript_variant'] == 'NM_001304421.1:c.1135+565del' + assert results['NM_001304421.1:c.1135+565del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001304421.1:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_001304421.1:c.1135+565del']['primary_assembly_loci'].keys()) + assert results['NM_001304421.1:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_001304421.1:c.1135+565del']['primary_assembly_loci'].keys()) + assert results['NM_001304421.1:c.1135+565del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304421.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291350.1'} def test_variant284(self): From c8e03336b508b589c6eaabf0a78cf6dd4fe9e903 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 12 Jul 2019 09:10:00 +0100 Subject: [PATCH 185/223] Added more changes to tests for variants 65, 138, 169 and 232 --- tests/test_inputs.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/tests/test_inputs.py b/tests/test_inputs.py index f76eaefd..c31fb51d 100644 --- a/tests/test_inputs.py +++ b/tests/test_inputs.py @@ -1978,7 +1978,13 @@ def test_variant65(self): ] self.assertIn(results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'], result_options) assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}} + result_options2 = [ + {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', + 'vcf': {'chr': '15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}}, + {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', + 'vcf': {'alt': 'GACC', 'chr': '15', 'pos': '71813587', 'ref': 'GACC'}} + ] + self.assertIn(results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'], result_options2) assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} assert 'NM_016346.2:c.946_949=' in list(results.keys()) @@ -4330,7 +4336,13 @@ def test_variant138(self): ] self.assertIn(results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['hg19'], result_options) assert 'hg38' not in list(results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci'].keys()) - assert results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', 'vcf': {'chr': '5', 'ref': 'AAGA', 'pos': '35058666', 'alt': 'AAGA'}} + result_options2 = [ + {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', + 'vcf': {'chr': '5', 'ref': 'AAGA', 'pos': '35058666', 'alt': 'AAGA'}}, + {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', + 'vcf': {'alt': 'AG', 'chr': '5', 'pos': '35058667', 'ref': 'AG'}} + ] + self.assertIn(results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['grch37'], result_options2) assert 'grch38' not in list(results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci'].keys()) assert results['NM_000949.5:c.*6523_*6524=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029042.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5'} @@ -5777,7 +5789,13 @@ def test_variant169(self): ] self.assertIn(results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['hg19'], result_options) assert 'hg38' not in list(results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci'].keys()) - assert results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', 'vcf': {'chr': '5', 'ref': 'ACAAG', 'pos': '35058664', 'alt': 'ACAAG'}} + result_options2 = [ + {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', + 'vcf': {'chr': '5', 'ref': 'ACAAG', 'pos': '35058664', 'alt': 'ACAAG'}}, + {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', + 'vcf': {'alt': 'CA', 'chr': '5', 'pos': '35058665', 'ref': 'CA'}} + ] + self.assertIn(results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['grch37'], result_options2) assert 'grch38' not in list(results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci'].keys()) assert results['NM_000949.5:c.*6525_*6526=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029042.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5'} @@ -9472,7 +9490,13 @@ def test_variant232(self): ] self.assertIn(results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'], result_options) assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}} + result_options2 = [ + {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', + 'vcf': {'chr': '15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}}, + {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', + 'vcf': {'alt': 'GACC', 'chr': '15', 'pos': '71813587', 'ref': 'GACC'}} + ] + self.assertIn(results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'], result_options2) assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} assert 'NM_016346.2:c.946_949=' in list(results.keys()) From dd9951f107f5681ba3e49a19e3c03ca02b05fa6a Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 12 Jul 2019 11:02:16 +0100 Subject: [PATCH 186/223] Added optional Entrez API KEY and appropriate docs --- VariantValidator/modules/vvMixinConverters.py | 4 ++- VariantValidator/modules/vvMixinInit.py | 7 +++-- configuration/default.ini | 5 ++-- configuration/travis.ini | 5 ++-- docs/MANUAL.md | 28 ++++++++++++------- 5 files changed, 32 insertions(+), 17 deletions(-) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 8cfcd4c3..c4c04357 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -1723,7 +1723,9 @@ def entrez_efetch(self, db, id, rettype, retmode): Search Entrez databases with efetch and SeqIO """ # from Bio import Entrez - Entrez.email = self.entrezID + Entrez.email = self.entrez_email + if self.entrez_api_key: + Entrez.api_key = self.entrez_api_key # from Bio import SeqIO handle = Entrez.efetch(db=db, id=id, rettype=rettype, retmode=retmode) # Get record diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index fb9a9963..8e9482d7 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -58,7 +58,11 @@ def __init__(self): config.read(CONFIG_DIR) # Handle databases - self.entrezID = config["EntrezID"]["entrezID"] + self.entrez_email = config["Entrez"]["email"] + self.entrez_api_key = None + if config['Entrez']['api_key'] != 'YOUR_API_KEY': + self.entrez_api_key = config['Entrez']['api_key'] + self.seqrepoVersion = config["seqrepo"]["version"] self.seqrepoPath = os.path.join(config["seqrepo"]["location"], self.seqrepoVersion) os.environ['HGVS_SEQREPO_DIR'] = self.seqrepoPath @@ -93,7 +97,6 @@ def __init__(self): # Set up other configuration variables self.liftoverPath = config["liftover"]["location"] - self.entrezID = config["EntrezID"]['entrezid'] # Set up HGVS # Configure hgvs package global settings diff --git a/configuration/default.ini b/configuration/default.ini index a3a4fdbf..f013b176 100644 --- a/configuration/default.ini +++ b/configuration/default.ini @@ -21,8 +21,9 @@ log = True console = INFO file = WARNING -[EntrezID] -entrezid = admin@variantvalidator.org +[Entrez] +email = YOUR@EMAIL.COM +api_key = YOUR_API_KEY [liftover] location = /path/to/liftover diff --git a/configuration/travis.ini b/configuration/travis.ini index 4e8d9cbc..a1d148b2 100644 --- a/configuration/travis.ini +++ b/configuration/travis.ini @@ -21,8 +21,9 @@ log = True console = WARNING file = ERROR -[EntrezID] -entrezid = admin@variantvalidator.org +[Entrez] +email = admin@variantvalidator.org +api_key = YOUR_API_KEY [liftover] location = /path/to/liftover diff --git a/docs/MANUAL.md b/docs/MANUAL.md index 09a8ddba..dc35dd13 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -2,7 +2,9 @@ ## Configuration -After first installing Variant Validator, a configuration file will need to be created and edited to contain the database credentials and locations. To do this run the configuration script installed alongside the package. +After first installing Variant Validator, a configuration file will need to be created and edited to contain the database credentials and locations. +By default the edited configuration will be placed in the users home directory (`~/.variantvalidator`), this location can be changed for all users by editing the `VariantValidator/settings.py` file. +To create this file automatically, run the configuration script installed alongside the package. ```bash vv_configure.py @@ -35,20 +37,15 @@ log = True console = INFO file = WARNING -[EntrezID] -entrezid = admin@variantvalidator.org +[Entrez] +email = YOUR@EMAIL.COM +api_key = YOUR_API_KEY [liftover] location = /path/to/liftover ``` -The values in capitals must be replaced for Variant Validator to run. - -By default the edited configuration will be placed in the users home directory (`~/.variantvalidator`), this location can be changed for all users by editing the `VariantValidator/settings.py` file. - -####Liftover - -If the UCSC Liftover [files](http://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/) have been previously downloaded their location can be set within the configuration file. By default the necessary files will be downloaded automatically when first requested. +The values in capitals must be replaced for Variant Validator to run, see below for more details. ####Logging @@ -57,6 +54,17 @@ The levels control verbosity and can be set to "CRITICAL", "ERROR", "WARNING", " log options can be changed for all users by editing the `VariantValidator/settings.py` file. By default the file log is set to output in the users home directory (`~/.vv_errorlog`). +####Entrez + +For access to the NCBI Entrez database you must provide a valid email address in +the respective configuration setting. Optionally, you can also provide an NCBI API key that will increase the number of requests +made per second. See [this article](https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/) on how to generate an API key. + +####Liftover + +If the UCSC Liftover [files](http://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/) have been previously downloaded their location can be set within the configuration file. By default the necessary files will be downloaded automatically when first requested. + + ## Database updates To import the initial data into the Validator MySQL database, run the following script: From 131238b6678f0a6bca1354f48e4a84f4b0d35ff6 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 12 Jul 2019 15:37:26 +0100 Subject: [PATCH 187/223] Added in stableGeneIds table to database, associated methods and updated all tests to include new field. --- VariantValidator/modules/variant.py | 2 + VariantValidator/modules/vvDBGet.py | 8 + VariantValidator/modules/vvDBInsert.py | 44 + VariantValidator/modules/vvDatabase.py | 47 +- VariantValidator/modules/vvMixinCore.py | 29 + configuration/empty_vv_db.sql | 31 +- tests/inputVariants.txt | 4 +- tests/test_inputs.py | 26055 +++++++++++----------- tests/test_variant.py | 3 + 9 files changed, 13483 insertions(+), 12740 deletions(-) diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index c2b6fcad..15838242 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -54,6 +54,7 @@ def __init__(self, original, quibble=None, warnings=None, write=True, primary_as self.lose_vm = None # Required for output + self.stable_gene_ids = None self.hgvs_transcript_variant = None # variant.coding but edited self.genome_context_intronic_sequence = None self.refseqgene_context_intronic_sequence = None @@ -183,6 +184,7 @@ def output_dict(self): dict_out = { 'submitted_variant': self.original, 'gene_symbol': self.gene_symbol, + 'gene_ids': self.stable_gene_ids, 'transcript_description': self.description, 'hgvs_transcript_variant': self.hgvs_transcript_variant, 'genome_context_intronic_sequence': self.genome_context_intronic_sequence, diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index ba0e86bf..dbfb3c72 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -103,6 +103,14 @@ def get_all_transcript_id(self): query = "SELECT refSeqID FROM transcript_info" return self.execute_all(query) + def get_stable_gene_id_info(self, hgnc_symbol): + query = "SELECT * FROM stableGeneIds WHERE hgnc_symbol = '%s'" % hgnc_symbol + return self.execute(query) + + def get_stable_gene_id_from_hgnc_id(self, hgnc_id): + query = "SELECT * FROM stableGeneIds WHERE hgnc_id = '%s'" % hgnc_id + return self.execute(query) + # Direct methods (GET) def get_uta_symbol(self, gene_symbol): # returns the UTA gene symbol when HGNC gene symbol is input diff --git a/VariantValidator/modules/vvDBInsert.py b/VariantValidator/modules/vvDBInsert.py index dd09db91..60d69412 100644 --- a/VariantValidator/modules/vvDBInsert.py +++ b/VariantValidator/modules/vvDBInsert.py @@ -87,6 +87,29 @@ def insert_lrg_protein_data(self, lrg_p, rs_p): self.conn.commit() return success + @handleCursor + def insert_gene_stable_ids(self, data): + query = "INSERT INTO stableGeneIds(hgnc_id, hgnc_symbol, entrez_id, ensembl_gene_id, omim_id, ucsc_id, " \ + "vega_id, ccds_ids) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)" + self.cursor.execute(query, ( + data['hgnc_id'], + data['hgnc_symbol'], + data['entrez_id'], + data['ensembl_gene_id'], + data['omim_id'], + data['ucsc_id'], + data['vega_id'], + data['ccds_id'] + )) + + if self.cursor.lastrowid: + success = 'true' + else: + success = 'unknown error' + + self.conn.commit() + return success + @handleCursor def update(self, entry, data): accession = entry @@ -110,3 +133,24 @@ def update_refseq_gene_data(self, rsg_data): self.conn.commit() return success + @handleCursor + def update_gene_stable_ids(self, gene_stable_ids): + + # Insert or update combined statement + query = "UPDATE stableGeneIds SET hgnc_symbol=%s, entrez_id=%s, ensembl_gene_id=%s, omim_id=%s, ucsc_id=%s, " \ + "vega_id=%s, ccds_ids=%s WHERE hgnc_id=%s" + + self.cursor.execute(query, ( + gene_stable_ids["hgnc_symbol"], + gene_stable_ids["entrez_id"], + gene_stable_ids["ensembl_gene_id"], + gene_stable_ids["omim_id"], + gene_stable_ids["ucsc_id"], + gene_stable_ids["vega_id"], + gene_stable_ids["ccds_id"], + gene_stable_ids["hgnc_id"] + )) + success = 'true' + self.conn.commit() + return success + diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index eae563e1..587a41f9 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -4,6 +4,7 @@ import re import hgvs.exceptions import logging +import json logger = logging.getLogger(__name__) @@ -134,8 +135,52 @@ def update_transcript_info_record(self, accession, validator): # If the name is correct no record will be found if rest_data['error'] == 'false' and int(rest_data['record']['response']['numFound']) != 0: hgnc_symbol = rest_data['record']['response']['docs'][0]['symbol'] + initial = utils.hgnc_rest(path="/fetch/symbol/" + hgnc_symbol) + + if hgnc_symbol != 'unassigned' and int(initial['record']['response']['numFound']) != 0: + docs = initial['record']['response']['docs'][0] + hgnc_id = '' + entrez_id = '' + ensembl_gene_id = '' + omim_id = json.dumps([]) + ucsc_id = '' + vega_id = '' + ccds_id = json.dumps([]) + + if 'hgnc_id' in docs: + hgnc_id = docs['hgnc_id'] + if 'entrez_id' in docs: + entrez_id = docs['entrez_id'] + if 'ensembl_gene_id' in docs: + ensembl_gene_id = docs['ensembl_gene_id'] + if 'omim_id' in docs: + omim_id = json.dumps(docs['omim_id']) + if 'ucsc_id' in docs: + ucsc_id = docs['ucsc_id'] + if 'vega_id' in docs: + vega_id = docs['vega_id'] + if 'ccds_id' in docs: + ccds_id = json.dumps(docs['ccds_id']) + + gene_stable_ids = { + "hgnc_id": hgnc_id, + "entrez_id": entrez_id, + "ensembl_gene_id": ensembl_gene_id, + "omim_id": omim_id, + "ucsc_id": ucsc_id, + "vega_id": vega_id, + "ccds_id": ccds_id, + "hgnc_symbol": hgnc_symbol + + } + gene_id_info = self.get_stable_gene_id_from_hgnc_id(gene_stable_ids["hgnc_id"]) + if gene_id_info[1] != 'No data': + self.update_gene_stable_ids(gene_stable_ids) + else: + self.insert_gene_stable_ids(gene_stable_ids) - except Exception: + except Exception as e: + logger.debug("Except pass, %s", e) logger.info("Unable to connect to HGNC with symbol %s", symbol) # Query information diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 407f67ba..24f037da 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -5,6 +5,7 @@ import copy import sys import logging +import json from hgvs.assemblymapper import AssemblyMapper from . import hgvs_utils from . import utils as fn @@ -455,6 +456,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Transcript sequence variation tx_variant = variant.coding hgvs_transcript_variant = tx_variant + hgvs_tx_variant = None if tx_variant != '': if '(' in tx_variant and ')' in tx_variant: tx_variant = tx_variant.split('(')[1] @@ -664,6 +666,33 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr else: predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) + # Add stable gene_ids + stable_gene_ids = {} + if variant.gene_symbol != '': + gene_stable_info = self.db.get_stable_gene_id_info(variant.gene_symbol) + + # Add or update stable ID and transcript data + if gene_stable_info[1] == 'No data' and hgvs_tx_variant is not None: + self.db.update_transcript_info_record(hgvs_tx_variant.ac, self) + gene_stable_info = self.db.get_stable_gene_id_info(variant.gene_symbol) + + # Update gene_symbol + if variant.gene_symbol != str(gene_stable_info[1]) and str(gene_stable_info[1]) != 'No data': + variant.gene_symbol = str(gene_stable_info[1]) + + try: + # Dictionary the output + stable_gene_ids['hgnc_id'] = gene_stable_info[2] + stable_gene_ids['entrez_gene_id'] = gene_stable_info[3] + # stable_gene_ids['ensembl_gene_id'] = gene_stable_info[4] + stable_gene_ids['ucsc_id'] = gene_stable_info[5] + stable_gene_ids['omim_id'] = json.loads(gene_stable_info[6]) + # stable_gene_ids['vega_id'] = gene_stable_info[7] + # stable_gene_ids['ccds_id'] = gene_stable_info[8] + except IndexError as e: + logger.debug("Except pass, %s", e) + + variant.stable_gene_ids = stable_gene_ids variant.hgvs_transcript_variant = tx_variant variant.genome_context_intronic_sequence = genome_context_transcript_variant variant.refseqgene_context_intronic_sequence = refseqgene_context_transcript_variant diff --git a/configuration/empty_vv_db.sql b/configuration/empty_vv_db.sql index de6d0247..eec09a0a 100644 --- a/configuration/empty_vv_db.sql +++ b/configuration/empty_vv_db.sql @@ -29,7 +29,7 @@ CREATE TABLE `LRG_RSG_lookup` ( `RefSeqGeneID` varchar(25) NOT NULL DEFAULT '', `status` text NOT NULL, PRIMARY KEY (`id`) -) ENGINE=InnoDB AUTO_INCREMENT=2092 DEFAULT CHARSET=utf8; +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -44,7 +44,7 @@ CREATE TABLE `LRG_proteins` ( `LRGproteinID` varchar(25) DEFAULT NULL, `RefSeqProteinID` varchar(25) DEFAULT NULL, PRIMARY KEY (`id`) -) ENGINE=InnoDB AUTO_INCREMENT=1381 DEFAULT CHARSET=utf8; +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -59,7 +59,7 @@ CREATE TABLE `LRG_transcripts` ( `LRGtranscriptID` varchar(25) NOT NULL DEFAULT '', `RefSeqTranscriptID` varchar(25) NOT NULL DEFAULT '', PRIMARY KEY (`id`) -) ENGINE=InnoDB AUTO_INCREMENT=2607 DEFAULT CHARSET=utf8; +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -84,7 +84,7 @@ CREATE TABLE `refSeqGene_loci` ( `hgncSymbol` varchar(20) NOT NULL, `updated` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (`id`) -) ENGINE=InnoDB AUTO_INCREMENT=27012 DEFAULT CHARSET=utf8; +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; -- @@ -105,7 +105,28 @@ CREATE TABLE `transcript_info` ( `updated` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (`id`), KEY `refi` (`refSeqID`) -) ENGINE=InnoDB AUTO_INCREMENT=740 DEFAULT CHARSET=utf8; +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Table structure for table `LRG_proteins` +-- + +DROP TABLE IF EXISTS `stableGeneIds`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `stableGeneIds` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `hgnc_symbol` varchar(25) DEFAULT NULL, + `hgnc_id` varchar(25) DEFAULT NULL, + `entrez_id` varchar(25) DEFAULT NULL, + `ensembl_gene_id` varchar(100) DEFAULT NULL, + `ucsc_id` varchar(100) DEFAULT NULL, + `omim_id` varchar(100) DEFAULT NULL, + `vega_id` varchar(100) DEFAULT NULL, + `ccds_ids` varchar(1000) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; diff --git a/tests/inputVariants.txt b/tests/inputVariants.txt index 8dd63cdc..9bd8a764 100644 --- a/tests/inputVariants.txt +++ b/tests/inputVariants.txt @@ -1,4 +1,3 @@ -NM_000088.3:c.589G>T NM_015120.4:c.35T>C NM_015120.4:c.39G>C NM_015120.4:c.34C>T @@ -142,6 +141,7 @@ NM_000088.3:c.2023_2028del NM_000088.3:c.2024_2028+1del ENST00000450616.1:n.31+1G>C ENST00000491747:c.5071A>T +NM_000088.3:c.589G>T NG_007400.1:g.8638G>T LRG_1:g.8638G>T LRG_1t1:c.589G>T @@ -332,3 +332,5 @@ NM_002693.2:c.-186_-185delinsCC NG_009616.1:g.29052_29053insCTACATAG NM_000061.2:c.588_588+1insCTACATAG NM_000061.2:c.588_589insCTACATAG +NM_000492.3:c.1210-12_1210-6delinsTTTTTTTTT +NM_000088.3:c.589-18_589-14delinsTTTTTTTTTT \ No newline at end of file diff --git a/tests/test_inputs.py b/tests/test_inputs.py index c31fb51d..5c54bd71 100644 --- a/tests/test_inputs.py +++ b/tests/test_inputs.py @@ -1,8 +1,7 @@ from VariantValidator import Validator from unittest import TestCase - -class TestVariants(TestCase): +class TestVariantsAuto(TestCase): @classmethod def setup_class(cls): @@ -13,24 +12,24 @@ def test_variant1(self): results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_015120.4:c.35T>C' in list(results.keys()) - assert results['NM_015120.4:c.35T>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.35T>C' - assert results['NM_015120.4:c.35T>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_015120.4:c.35T>C']['alt_genomic_loci'], []) - assert results['NM_015120.4:c.35T>C']['gene_symbol'] == 'ALMS1' - assert results['NM_015120.4:c.35T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12Pro)', 'slr': 'NP_055935.4:p.(L12P)'} assert results['NM_015120.4:c.35T>C']['submitted_variant'] == 'NM_015120.4:c.35T>C' - assert results['NM_015120.4:c.35T>C']['genome_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.35T>C']['hgvs_lrg_variant'] == 'LRG_741:g.5146T>C' + assert results['NM_015120.4:c.35T>C']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.35T>C']['gene_ids'] == {'hgnc_id': 'HGNC:428', 'entrez_gene_id': '7840', 'ucsc_id': 'uc032nrd.1', 'omim_id': ['606844']} assert results['NM_015120.4:c.35T>C']['hgvs_transcript_variant'] == 'NM_015120.4:c.35T>C' + assert results['NM_015120.4:c.35T>C']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.35T>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.35T>C']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.5146T>C' - assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73613031', 'alt': 'CGGA'}} - assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73385903', 'alt': 'CGGA'}} - assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73613031', 'alt': 'CGGA'}} - assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73385903', 'alt': 'CGGA'}} - assert results['NM_015120.4:c.35T>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_015120.4:c.35T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12Pro)', 'slr': 'NP_055935.4:p.(L12P)'} + assert results['NM_015120.4:c.35T>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.35T>C' + assert results['NM_015120.4:c.35T>C']['hgvs_lrg_variant'] == 'LRG_741:g.5146T>C' + self.assertCountEqual(results['NM_015120.4:c.35T>C']['alt_genomic_loci'], []) + assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': 'chr2', 'pos': '73613031', 'ref': 'T', 'alt': 'CGGA'}} + assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': 'chr2', 'pos': '73385903', 'ref': 'T', 'alt': 'CGGA'}} + assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': '2', 'pos': '73613031', 'ref': 'T', 'alt': 'CGGA'}} + assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': '2', 'pos': '73385903', 'ref': 'T', 'alt': 'CGGA'}} + assert results['NM_015120.4:c.35T>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} def test_variant2(self): variant = 'NM_015120.4:c.39G>C' @@ -39,22 +38,22 @@ def test_variant2(self): assert results['flag'] == 'gene_variant' assert 'NM_015120.4:c.39G>C' in list(results.keys()) - assert results['NM_015120.4:c.39G>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.39G>C' - assert results['NM_015120.4:c.39G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_015120.4:c.39G>C']['alt_genomic_loci'], []) - assert results['NM_015120.4:c.39G>C']['gene_symbol'] == 'ALMS1' - assert results['NM_015120.4:c.39G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Glu13Asp)', 'slr': 'NP_055935.4:p.(E13D)'} assert results['NM_015120.4:c.39G>C']['submitted_variant'] == 'NM_015120.4:c.39G>C' - assert results['NM_015120.4:c.39G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.39G>C']['hgvs_lrg_variant'] == 'LRG_741:g.5150G>C' + assert results['NM_015120.4:c.39G>C']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.39G>C']['gene_ids'] == {'hgnc_id': 'HGNC:428', 'entrez_gene_id': '7840', 'ucsc_id': 'uc032nrd.1', 'omim_id': ['606844']} assert results['NM_015120.4:c.39G>C']['hgvs_transcript_variant'] == 'NM_015120.4:c.39G>C' + assert results['NM_015120.4:c.39G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.39G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.39G>C']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.5150G>C' - assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '73613032', 'alt': 'GGAC'}} - assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '73385904', 'alt': 'GGAC'}} - assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '73613032', 'alt': 'GGAC'}} - assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '73385904', 'alt': 'GGAC'}} - assert results['NM_015120.4:c.39G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} - + assert results['NM_015120.4:c.39G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Glu13Asp)', 'slr': 'NP_055935.4:p.(E13D)'} + assert results['NM_015120.4:c.39G>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.39G>C' + assert results['NM_015120.4:c.39G>C']['hgvs_lrg_variant'] == 'LRG_741:g.5150G>C' + self.assertCountEqual(results['NM_015120.4:c.39G>C']['alt_genomic_loci'], []) + assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': 'chr2', 'pos': '73613032', 'ref': 'G', 'alt': 'GGAC'}} + assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': 'chr2', 'pos': '73385904', 'ref': 'G', 'alt': 'GGAC'}} + assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': '2', 'pos': '73613032', 'ref': 'G', 'alt': 'GGAC'}} + assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': '2', 'pos': '73385904', 'ref': 'G', 'alt': 'GGAC'}} + assert results['NM_015120.4:c.39G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} def test_variant3(self): variant = 'NM_015120.4:c.34C>T' @@ -63,22 +62,22 @@ def test_variant3(self): assert results['flag'] == 'gene_variant' assert 'NM_015120.4:c.34C>T' in list(results.keys()) - assert results['NM_015120.4:c.34C>T']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.34C>T' - assert results['NM_015120.4:c.34C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_015120.4:c.34C>T']['alt_genomic_loci'], []) - assert results['NM_015120.4:c.34C>T']['gene_symbol'] == 'ALMS1' - assert results['NM_015120.4:c.34C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12=)', 'slr': 'NP_055935.4:p.(L12=)'} assert results['NM_015120.4:c.34C>T']['submitted_variant'] == 'NM_015120.4:c.34C>T' - assert results['NM_015120.4:c.34C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.34C>T']['hgvs_lrg_variant'] == 'LRG_741:g.5145C>T' + assert results['NM_015120.4:c.34C>T']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.34C>T']['gene_ids'] == {'hgnc_id': 'HGNC:428', 'entrez_gene_id': '7840', 'ucsc_id': 'uc032nrd.1', 'omim_id': ['606844']} assert results['NM_015120.4:c.34C>T']['hgvs_transcript_variant'] == 'NM_015120.4:c.34C>T' + assert results['NM_015120.4:c.34C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.34C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.34C>T']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.5145C>T' - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '73613030', 'alt': 'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '73385902', 'alt': 'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '73613030', 'alt': 'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '73385902', 'alt': 'T'}} - assert results['NM_015120.4:c.34C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} - + assert results['NM_015120.4:c.34C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12=)', 'slr': 'NP_055935.4:p.(L12=)'} + assert results['NM_015120.4:c.34C>T']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.34C>T' + assert results['NM_015120.4:c.34C>T']['hgvs_lrg_variant'] == 'LRG_741:g.5145C>T' + self.assertCountEqual(results['NM_015120.4:c.34C>T']['alt_genomic_loci'], []) + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': 'chr2', 'pos': '73613030', 'ref': 'C', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': 'chr2', 'pos': '73385902', 'ref': 'C', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': '2', 'pos': '73613030', 'ref': 'C', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': '2', 'pos': '73385902', 'ref': 'C', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} def test_variant4(self): variant = 'NC_000002.11:g.73613030C>T' @@ -87,22 +86,22 @@ def test_variant4(self): assert results['flag'] == 'gene_variant' assert 'NM_015120.4:c.34C>T' in list(results.keys()) - assert results['NM_015120.4:c.34C>T']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.34C>T' - assert results['NM_015120.4:c.34C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_015120.4:c.34C>T']['alt_genomic_loci'], []) - assert results['NM_015120.4:c.34C>T']['gene_symbol'] == 'ALMS1' - assert results['NM_015120.4:c.34C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12=)', 'slr': 'NP_055935.4:p.(L12=)'} assert results['NM_015120.4:c.34C>T']['submitted_variant'] == 'NC_000002.11:g.73613030C>T' - assert results['NM_015120.4:c.34C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.34C>T']['hgvs_lrg_variant'] == 'LRG_741:g.5145C>T' + assert results['NM_015120.4:c.34C>T']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.34C>T']['gene_ids'] == {'hgnc_id': 'HGNC:428', 'entrez_gene_id': '7840', 'ucsc_id': 'uc032nrd.1', 'omim_id': ['606844']} assert results['NM_015120.4:c.34C>T']['hgvs_transcript_variant'] == 'NM_015120.4:c.34C>T' + assert results['NM_015120.4:c.34C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.34C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.34C>T']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.5145C>T' - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '73613030', 'alt': 'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '73385902', 'alt': 'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '73613030', 'alt': 'T'}} - assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '73385902', 'alt': 'T'}} - assert results['NM_015120.4:c.34C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} - + assert results['NM_015120.4:c.34C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu12=)', 'slr': 'NP_055935.4:p.(L12=)'} + assert results['NM_015120.4:c.34C>T']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.34C>T' + assert results['NM_015120.4:c.34C>T']['hgvs_lrg_variant'] == 'LRG_741:g.5145C>T' + self.assertCountEqual(results['NM_015120.4:c.34C>T']['alt_genomic_loci'], []) + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': 'chr2', 'pos': '73613030', 'ref': 'C', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': 'chr2', 'pos': '73385902', 'ref': 'C', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613030C>T', 'vcf': {'chr': '2', 'pos': '73613030', 'ref': 'C', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385902C>T', 'vcf': {'chr': '2', 'pos': '73385902', 'ref': 'C', 'alt': 'T'}} + assert results['NM_015120.4:c.34C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} def test_variant5(self): variant = 'NC_000023.10:g.33229673A>T' @@ -111,39 +110,40 @@ def test_variant5(self): assert results['flag'] == 'gene_variant' assert 'NM_000109.3:c.7+127703T>A' in list(results.keys()) - assert results['NM_000109.3:c.7+127703T>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000109.3:c.7+127703T>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000109.3:c.7+127703T>A']['alt_genomic_loci'], []) - assert results['NM_000109.3:c.7+127703T>A']['gene_symbol'] == 'DMD' - assert results['NM_000109.3:c.7+127703T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000100.2:p.?', 'slr': 'NP_000100.2:p.?'} assert results['NM_000109.3:c.7+127703T>A']['submitted_variant'] == 'NC_000023.10:g.33229673A>T' - assert results['NM_000109.3:c.7+127703T>A']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_000109.3):c.7+127703T>A' - assert results['NM_000109.3:c.7+127703T>A']['hgvs_lrg_variant'] == '' + assert results['NM_000109.3:c.7+127703T>A']['gene_symbol'] == 'DMD' + assert results['NM_000109.3:c.7+127703T>A']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_000109.3:c.7+127703T>A']['hgvs_transcript_variant'] == 'NM_000109.3:c.7+127703T>A' + assert results['NM_000109.3:c.7+127703T>A']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_000109.3):c.7+127703T>A' + assert results['NM_000109.3:c.7+127703T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000109.3:c.7+127703T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '33229673', 'alt': 'T'}} - assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '33211556', 'alt': 'T'}} - assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '33229673', 'alt': 'T'}} - assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '33211556', 'alt': 'T'}} - assert results['NM_000109.3:c.7+127703T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000100.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000109.3'} + assert results['NM_000109.3:c.7+127703T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000100.2:p.?', 'slr': 'NP_000100.2:p.?'} + assert results['NM_000109.3:c.7+127703T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000109.3:c.7+127703T>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000109.3:c.7+127703T>A']['alt_genomic_loci'], []) + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'chrX', 'pos': '33229673', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'chrX', 'pos': '33211556', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'X', 'pos': '33229673', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000109.3:c.7+127703T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'X', 'pos': '33211556', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000109.3:c.7+127703T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000109.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000100.2'} assert 'NM_004006.2:c.-244T>A' in list(results.keys()) - assert results['NM_004006.2:c.-244T>A']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.-244T>A' - assert results['NM_004006.2:c.-244T>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.-244T>A']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.-244T>A']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.-244T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.?', 'slr': 'NP_003997.1:p.?'} assert results['NM_004006.2:c.-244T>A']['submitted_variant'] == 'NC_000023.10:g.33229673A>T' - assert results['NM_004006.2:c.-244T>A']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.-244T>A']['hgvs_lrg_variant'] == 'LRG_199:g.133054T>A' + assert results['NM_004006.2:c.-244T>A']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.-244T>A']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_004006.2:c.-244T>A']['hgvs_transcript_variant'] == 'NM_004006.2:c.-244T>A' + assert results['NM_004006.2:c.-244T>A']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.-244T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.-244T>A']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.133054T>A' - assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '33229673', 'alt': 'T'}} - assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '33211556', 'alt': 'T'}} - assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '33229673', 'alt': 'T'}} - assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '33211556', 'alt': 'T'}} - assert results['NM_004006.2:c.-244T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - + assert results['NM_004006.2:c.-244T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.?', 'slr': 'NP_003997.1:p.?'} + assert results['NM_004006.2:c.-244T>A']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.-244T>A' + assert results['NM_004006.2:c.-244T>A']['hgvs_lrg_variant'] == 'LRG_199:g.133054T>A' + self.assertCountEqual(results['NM_004006.2:c.-244T>A']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'chrX', 'pos': '33229673', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'chrX', 'pos': '33211556', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.33229673A>T', 'vcf': {'chr': 'X', 'pos': '33229673', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004006.2:c.-244T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.33211556A>T', 'vcf': {'chr': 'X', 'pos': '33211556', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004006.2:c.-244T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant6(self): variant = 'NM_001145026.1:c.715A>G' @@ -152,233 +152,244 @@ def test_variant6(self): assert results['flag'] == 'gene_variant' assert 'NM_001145026.1:c.715A>G' in list(results.keys()) - assert results['NM_001145026.1:c.715A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001145026.1:c.715A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001145026.1:c.715A>G']['alt_genomic_loci'], []) - assert results['NM_001145026.1:c.715A>G']['gene_symbol'] == 'PTPRQ' - assert results['NM_001145026.1:c.715A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001138498.1:p.(Arg239Gly)', 'slr': 'NP_001138498.1:p.(R239G)'} assert results['NM_001145026.1:c.715A>G']['submitted_variant'] == 'NM_001145026.1:c.715A>G' - assert results['NM_001145026.1:c.715A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001145026.1:c.715A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001145026.1:c.715A>G']['gene_symbol'] == 'PTPRQ' + assert results['NM_001145026.1:c.715A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9679', 'entrez_gene_id': '374462', 'ucsc_id': 'uc031zgj.2', 'omim_id': ['603317']} assert results['NM_001145026.1:c.715A>G']['hgvs_transcript_variant'] == 'NM_001145026.1:c.715A>G' + assert results['NM_001145026.1:c.715A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001145026.1:c.715A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001145026.1:c.715A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001145026.1:c.715A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001138498.1:p.(Arg239Gly)', 'slr': 'NP_001138498.1:p.(R239G)'} + assert results['NM_001145026.1:c.715A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001145026.1:c.715A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001145026.1:c.715A>G']['alt_genomic_loci'], []) assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.80860617_80860618insG', 'vcf': {'chr': 'chr12', 'pos': '80860617', 'ref': 'C', 'alt': 'CG'}} - assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': 'chr12', 'ref': 'A', 'pos': '80460707', 'alt': 'G'}} + assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': 'chr12', 'pos': '80460707', 'ref': 'A', 'alt': 'G'}} assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.80860617_80860618insG', 'vcf': {'chr': '12', 'pos': '80860617', 'ref': 'C', 'alt': 'CG'}} - assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': '12', 'ref': 'A', 'pos': '80460707', 'alt': 'G'}} - assert results['NM_001145026.1:c.715A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001138498.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001145026.1'} - + assert results['NM_001145026.1:c.715A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.80460707A>G', 'vcf': {'chr': '12', 'pos': '80460707', 'ref': 'A', 'alt': 'G'}} + assert results['NM_001145026.1:c.715A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001145026.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001138498.1'} def test_variant7(self): variant = 'NC_000016.9:g.2099572TC>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000548.4:c.138+821del' in list(results.keys()) - assert results['NM_000548.4:c.138+821del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000548.4:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000548.4:c.138+821del']['alt_genomic_loci'], []) - assert results['NM_000548.4:c.138+821del']['gene_symbol'] == 'TSC2' - assert results['NM_000548.4:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.?', 'slr': 'NP_000539.2:p.?'} assert results['NM_000548.4:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_000548.4:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_000548.4):c.138+821del' - assert results['NM_000548.4:c.138+821del']['hgvs_lrg_variant'] == '' + assert results['NM_000548.4:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_000548.4:c.138+821del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} assert results['NM_000548.4:c.138+821del']['hgvs_transcript_variant'] == 'NM_000548.4:c.138+821del' + assert results['NM_000548.4:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_000548.4):c.138+821del' + assert results['NM_000548.4:c.138+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000548.4:c.138+821del']['hgvs_refseqgene_variant'] == '' - assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_000548.4:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4'} + assert results['NM_000548.4:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.?', 'slr': 'NP_000539.2:p.?'} + assert results['NM_000548.4:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000548.4:c.138+821del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000548.4:c.138+821del']['alt_genomic_loci'], []) + assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000548.4:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000548.4:c.138+821del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2'} - assert 'NM_001077183.2:c.138+821del' in list(results.keys()) - assert results['NM_001077183.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001077183.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001077183.2:c.138+821del']['alt_genomic_loci'], []) - assert results['NM_001077183.2:c.138+821del']['gene_symbol'] == 'TSC2' - assert results['NM_001077183.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.?', 'slr': 'NP_001070651.1:p.?'} - assert results['NM_001077183.2:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_001077183.2:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001077183.2):c.138+821del' - assert results['NM_001077183.2:c.138+821del']['hgvs_lrg_variant'] == '' - assert results['NM_001077183.2:c.138+821del']['hgvs_transcript_variant'] == 'NM_001077183.2:c.138+821del' - assert results['NM_001077183.2:c.138+821del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001077183.2:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2'} + assert 'NM_001318832.1:c.171+821del' in list(results.keys()) + assert results['NM_001318832.1:c.171+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001318832.1:c.171+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001318832.1:c.171+821del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_001318832.1:c.171+821del']['hgvs_transcript_variant'] == 'NM_001318832.1:c.171+821del' + assert results['NM_001318832.1:c.171+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318832.1):c.171+821del' + assert results['NM_001318832.1:c.171+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318832.1:c.171+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318832.1:c.171+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305761.1:p.?', 'slr': 'NP_001305761.1:p.?'} + assert results['NM_001318832.1:c.171+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318832.1:c.171+821del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001318832.1:c.171+821del']['alt_genomic_loci'], []) + assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318832.1:c.171+821del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1'} - assert 'NM_001318831.1:c.-89+821del' in list(results.keys()) - assert results['NM_001318831.1:c.-89+821del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001318831.1:c.-89+821del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001318831.1:c.-89+821del']['alt_genomic_loci'], []) - assert results['NM_001318831.1:c.-89+821del']['gene_symbol'] == 'TSC2' - assert results['NM_001318831.1:c.-89+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305760.1:p.?', 'slr': 'NP_001305760.1:p.?'} - assert results['NM_001318831.1:c.-89+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_001318831.1:c.-89+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318831.1):c.-89+821del' - assert results['NM_001318831.1:c.-89+821del']['hgvs_lrg_variant'] == '' - assert results['NM_001318831.1:c.-89+821del']['hgvs_transcript_variant'] == 'NM_001318831.1:c.-89+821del' - assert results['NM_001318831.1:c.-89+821del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001318831.1:c.-89+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305760.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318831.1'} + assert 'NM_001318827.1:c.138+821del' in list(results.keys()) + assert results['NM_001318827.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001318827.1:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001318827.1:c.138+821del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_001318827.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001318827.1:c.138+821del' + assert results['NM_001318827.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318827.1):c.138+821del' + assert results['NM_001318827.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318827.1:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318827.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305756.1:p.?', 'slr': 'NP_001305756.1:p.?'} + assert results['NM_001318827.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318827.1:c.138+821del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001318827.1:c.138+821del']['alt_genomic_loci'], []) + assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318827.1:c.138+821del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1'} - assert 'NM_000548.3:c.138+821del' in list(results.keys()) - assert results['NM_000548.3:c.138+821del']['hgvs_lrg_transcript_variant'] == 'LRG_487t1:c.138+821del' - assert results['NM_000548.3:c.138+821del']['refseqgene_context_intronic_sequence'] == 'NG_005895.1(NM_000548.3):c.138+821del' - self.assertCountEqual(results['NM_000548.3:c.138+821del']['alt_genomic_loci'], []) - assert results['NM_000548.3:c.138+821del']['gene_symbol'] == 'TSC2' - assert results['NM_000548.3:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.?', 'slr': 'NP_000539.2:p.?'} - assert results['NM_000548.3:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_000548.3:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_000548.3):c.138+821del' - assert results['NM_000548.3:c.138+821del']['hgvs_lrg_variant'] == 'LRG_487:g.5269del' - assert results['NM_000548.3:c.138+821del']['hgvs_transcript_variant'] == 'NM_000548.3:c.138+821del' - assert results['NM_000548.3:c.138+821del']['hgvs_refseqgene_variant'] == 'NG_005895.1:g.5269del' - assert results['NM_000548.3:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'hg38' not in list(results['NM_000548.3:c.138+821del']['primary_assembly_loci'].keys()) - assert results['NM_000548.3:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'grch38' not in list(results['NM_000548.3:c.138+821del']['primary_assembly_loci'].keys()) - assert results['NM_000548.3:c.138+821del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005895.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_487.xml'} + assert 'NM_001114382.2:c.138+821del' in list(results.keys()) + assert results['NM_001114382.2:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001114382.2:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001114382.2:c.138+821del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_001114382.2:c.138+821del']['hgvs_transcript_variant'] == 'NM_001114382.2:c.138+821del' + assert results['NM_001114382.2:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001114382.2):c.138+821del' + assert results['NM_001114382.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001114382.2:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001114382.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.?', 'slr': 'NP_001107854.1:p.?'} + assert results['NM_001114382.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001114382.2:c.138+821del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001114382.2:c.138+821del']['alt_genomic_loci'], []) + assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001114382.2:c.138+821del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1'} assert 'NM_001114382.1:c.138+821del' in list(results.keys()) - assert results['NM_001114382.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001114382.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001114382.1:c.138+821del']['alt_genomic_loci'], []) - assert results['NM_001114382.1:c.138+821del']['gene_symbol'] == 'TSC2' - assert results['NM_001114382.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.?', 'slr': 'NP_001107854.1:p.?'} assert results['NM_001114382.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_001114382.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001114382.1):c.138+821del' - assert results['NM_001114382.1:c.138+821del']['hgvs_lrg_variant'] == '' + assert results['NM_001114382.1:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001114382.1:c.138+821del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} assert results['NM_001114382.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001114382.1:c.138+821del' + assert results['NM_001114382.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001114382.1):c.138+821del' + assert results['NM_001114382.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001114382.1:c.138+821del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001114382.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001114382.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.?', 'slr': 'NP_001107854.1:p.?'} + assert results['NM_001114382.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001114382.1:c.138+821del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001114382.1:c.138+821del']['alt_genomic_loci'], []) + assert results['NM_001114382.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} assert 'hg38' not in list(results['NM_001114382.1:c.138+821del']['primary_assembly_loci'].keys()) - assert results['NM_001114382.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} + assert results['NM_001114382.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} assert 'grch38' not in list(results['NM_001114382.1:c.138+821del']['primary_assembly_loci'].keys()) - assert results['NM_001114382.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1'} - - assert 'NM_001318832.1:c.171+821del' in list(results.keys()) - assert results['NM_001318832.1:c.171+821del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001318832.1:c.171+821del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001318832.1:c.171+821del']['alt_genomic_loci'], []) - assert results['NM_001318832.1:c.171+821del']['gene_symbol'] == 'TSC2' - assert results['NM_001318832.1:c.171+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305761.1:p.?', 'slr': 'NP_001305761.1:p.?'} - assert results['NM_001318832.1:c.171+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_001318832.1:c.171+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318832.1):c.171+821del' - assert results['NM_001318832.1:c.171+821del']['hgvs_lrg_variant'] == '' - assert results['NM_001318832.1:c.171+821del']['hgvs_transcript_variant'] == 'NM_001318832.1:c.171+821del' - assert results['NM_001318832.1:c.171+821del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001318832.1:c.171+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001318832.1:c.171+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1'} + assert results['NM_001114382.1:c.138+821del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1'} - assert 'NM_001363528.1:c.138+821del' in list(results.keys()) - assert results['NM_001363528.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363528.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363528.1:c.138+821del']['alt_genomic_loci'], []) - assert results['NM_001363528.1:c.138+821del']['gene_symbol'] == 'TSC2' - assert results['NM_001363528.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350457.1:p.?', 'slr': 'NP_001350457.1:p.?'} - assert results['NM_001363528.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_001363528.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363528.1):c.138+821del' - assert results['NM_001363528.1:c.138+821del']['hgvs_lrg_variant'] == '' - assert results['NM_001363528.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001363528.1:c.138+821del' - assert results['NM_001363528.1:c.138+821del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363528.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'hg38' not in list(results['NM_001363528.1:c.138+821del']['primary_assembly_loci'].keys()) - assert results['NM_001363528.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'grch38' not in list(results['NM_001363528.1:c.138+821del']['primary_assembly_loci'].keys()) - assert results['NM_001363528.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1'} + assert 'NM_001077183.1:c.138+821del' in list(results.keys()) + assert results['NM_001077183.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001077183.1:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001077183.1:c.138+821del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_001077183.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001077183.1:c.138+821del' + assert results['NM_001077183.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001077183.1):c.138+821del' + assert results['NM_001077183.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001077183.1:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001077183.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.?', 'slr': 'NP_001070651.1:p.?'} + assert results['NM_001077183.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077183.1:c.138+821del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001077183.1:c.138+821del']['alt_genomic_loci'], []) + assert results['NM_001077183.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_001077183.1:c.138+821del']['primary_assembly_loci'].keys()) + assert results['NM_001077183.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_001077183.1:c.138+821del']['primary_assembly_loci'].keys()) + assert results['NM_001077183.1:c.138+821del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1'} - assert results['flag'] == 'gene_variant' assert 'NM_021055.2:c.138+821del' in list(results.keys()) - assert results['NM_021055.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021055.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021055.2:c.138+821del']['alt_genomic_loci'], []) - assert results['NM_021055.2:c.138+821del']['gene_symbol'] == 'TSC2' - assert results['NM_021055.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066399.2:p.?', 'slr': 'NP_066399.2:p.?'} assert results['NM_021055.2:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_021055.2:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_021055.2):c.138+821del' - assert results['NM_021055.2:c.138+821del']['hgvs_lrg_variant'] == '' + assert results['NM_021055.2:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_021055.2:c.138+821del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} assert results['NM_021055.2:c.138+821del']['hgvs_transcript_variant'] == 'NM_021055.2:c.138+821del' + assert results['NM_021055.2:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_021055.2):c.138+821del' + assert results['NM_021055.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021055.2:c.138+821del']['hgvs_refseqgene_variant'] == '' - assert results['NM_021055.2:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'hg38' not in list(results['NM_021055.2:c.138+821del']['primary_assembly_loci'].keys()) - assert results['NM_021055.2:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'grch38' not in list(results['NM_021055.2:c.138+821del']['primary_assembly_loci'].keys()) - assert results['NM_021055.2:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2'} - - assert 'NM_001077183.1:c.138+821del' in list(results.keys()) - assert results['NM_001077183.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001077183.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001077183.1:c.138+821del']['alt_genomic_loci'], []) - assert results['NM_001077183.1:c.138+821del']['gene_symbol'] == 'TSC2' - assert results['NM_001077183.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.?', 'slr': 'NP_001070651.1:p.?'} - assert results['NM_001077183.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_001077183.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001077183.1):c.138+821del' - assert results['NM_001077183.1:c.138+821del']['hgvs_lrg_variant'] == '' - assert results['NM_001077183.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001077183.1:c.138+821del' - assert results['NM_001077183.1:c.138+821del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001077183.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'hg38' not in list(results['NM_001077183.1:c.138+821del']['primary_assembly_loci'].keys()) - assert results['NM_001077183.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert 'grch38' not in list(results['NM_001077183.1:c.138+821del']['primary_assembly_loci'].keys()) - assert results['NM_001077183.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1'} - - assert 'NM_001318827.1:c.138+821del' in list(results.keys()) - assert results['NM_001318827.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001318827.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001318827.1:c.138+821del']['alt_genomic_loci'], []) - assert results['NM_001318827.1:c.138+821del']['gene_symbol'] == 'TSC2' - assert results['NM_001318827.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305756.1:p.?', 'slr': 'NP_001305756.1:p.?'} - assert results['NM_001318827.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_001318827.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318827.1):c.138+821del' - assert results['NM_001318827.1:c.138+821del']['hgvs_lrg_variant'] == '' - assert results['NM_001318827.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001318827.1:c.138+821del' - assert results['NM_001318827.1:c.138+821del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001318827.1:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001318827.1:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1'} + assert results['NM_021055.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066399.2:p.?', 'slr': 'NP_066399.2:p.?'} + assert results['NM_021055.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021055.2:c.138+821del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021055.2:c.138+821del']['alt_genomic_loci'], []) + assert results['NM_021055.2:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_021055.2:c.138+821del']['primary_assembly_loci'].keys()) + assert results['NM_021055.2:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_021055.2:c.138+821del']['primary_assembly_loci'].keys()) + assert results['NM_021055.2:c.138+821del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2'} - assert 'NM_001114382.2:c.138+821del' in list(results.keys()) - assert results['NM_001114382.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001114382.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001114382.2:c.138+821del']['alt_genomic_loci'], []) - assert results['NM_001114382.2:c.138+821del']['gene_symbol'] == 'TSC2' - assert results['NM_001114382.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.?', 'slr': 'NP_001107854.1:p.?'} - assert results['NM_001114382.2:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_001114382.2:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001114382.2):c.138+821del' - assert results['NM_001114382.2:c.138+821del']['hgvs_lrg_variant'] == '' - assert results['NM_001114382.2:c.138+821del']['hgvs_transcript_variant'] == 'NM_001114382.2:c.138+821del' - assert results['NM_001114382.2:c.138+821del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001114382.2:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001114382.2:c.138+821del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2'} + assert 'NM_001077183.2:c.138+821del' in list(results.keys()) + assert results['NM_001077183.2:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001077183.2:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001077183.2:c.138+821del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_001077183.2:c.138+821del']['hgvs_transcript_variant'] == 'NM_001077183.2:c.138+821del' + assert results['NM_001077183.2:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001077183.2):c.138+821del' + assert results['NM_001077183.2:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001077183.2:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001077183.2:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.?', 'slr': 'NP_001070651.1:p.?'} + assert results['NM_001077183.2:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077183.2:c.138+821del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001077183.2:c.138+821del']['alt_genomic_loci'], []) + assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001077183.2:c.138+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001077183.2:c.138+821del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1'} assert 'NM_001318829.1:c.-9-826del' in list(results.keys()) - assert results['NM_001318829.1:c.-9-826del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001318829.1:c.-9-826del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001318829.1:c.-9-826del']['alt_genomic_loci'], []) - assert results['NM_001318829.1:c.-9-826del']['gene_symbol'] == 'TSC2' - assert results['NM_001318829.1:c.-9-826del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305758.1:p.?', 'slr': 'NP_001305758.1:p.?'} assert results['NM_001318829.1:c.-9-826del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' - assert results['NM_001318829.1:c.-9-826del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318829.1):c.-9-826del' - assert results['NM_001318829.1:c.-9-826del']['hgvs_lrg_variant'] == '' + assert results['NM_001318829.1:c.-9-826del']['gene_symbol'] == 'TSC2' + assert results['NM_001318829.1:c.-9-826del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} assert results['NM_001318829.1:c.-9-826del']['hgvs_transcript_variant'] == 'NM_001318829.1:c.-9-826del' + assert results['NM_001318829.1:c.-9-826del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318829.1):c.-9-826del' + assert results['NM_001318829.1:c.-9-826del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001318829.1:c.-9-826del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2099572', 'alt': 'T'}} - assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'ref': 'TC', 'pos': '2049571', 'alt': 'T'}} - assert results['NM_001318829.1:c.-9-826del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305758.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318829.1'} + assert results['NM_001318829.1:c.-9-826del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305758.1:p.?', 'slr': 'NP_001305758.1:p.?'} + assert results['NM_001318829.1:c.-9-826del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318829.1:c.-9-826del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001318829.1:c.-9-826del']['alt_genomic_loci'], []) + assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318829.1:c.-9-826del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318829.1:c.-9-826del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318829.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305758.1'} + + assert 'NM_001363528.1:c.138+821del' in list(results.keys()) + assert results['NM_001363528.1:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001363528.1:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001363528.1:c.138+821del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_001363528.1:c.138+821del']['hgvs_transcript_variant'] == 'NM_001363528.1:c.138+821del' + assert results['NM_001363528.1:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363528.1):c.138+821del' + assert results['NM_001363528.1:c.138+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363528.1:c.138+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363528.1:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350457.1:p.?', 'slr': 'NP_001350457.1:p.?'} + assert results['NM_001363528.1:c.138+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363528.1:c.138+821del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363528.1:c.138+821del']['alt_genomic_loci'], []) + assert results['NM_001363528.1:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_001363528.1:c.138+821del']['primary_assembly_loci'].keys()) + assert results['NM_001363528.1:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_001363528.1:c.138+821del']['primary_assembly_loci'].keys()) + assert results['NM_001363528.1:c.138+821del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1'} + + assert 'NM_001318831.1:c.-89+821del' in list(results.keys()) + assert results['NM_001318831.1:c.-89+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_001318831.1:c.-89+821del']['gene_symbol'] == 'TSC2' + assert results['NM_001318831.1:c.-89+821del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_001318831.1:c.-89+821del']['hgvs_transcript_variant'] == 'NM_001318831.1:c.-89+821del' + assert results['NM_001318831.1:c.-89+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318831.1):c.-89+821del' + assert results['NM_001318831.1:c.-89+821del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318831.1:c.-89+821del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318831.1:c.-89+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305760.1:p.?', 'slr': 'NP_001305760.1:p.?'} + assert results['NM_001318831.1:c.-89+821del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318831.1:c.-89+821del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001318831.1:c.-89+821del']['alt_genomic_loci'], []) + assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': 'chr16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318831.1:c.-89+821del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2049574del', 'vcf': {'chr': '16', 'pos': '2049571', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001318831.1:c.-89+821del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318831.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305760.1'} + assert 'NM_000548.3:c.138+821del' in list(results.keys()) + assert results['NM_000548.3:c.138+821del']['submitted_variant'] == 'NC_000016.9:g.2099572TC>T' + assert results['NM_000548.3:c.138+821del']['gene_symbol'] == 'TSC2' + assert results['NM_000548.3:c.138+821del']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_000548.3:c.138+821del']['hgvs_transcript_variant'] == 'NM_000548.3:c.138+821del' + assert results['NM_000548.3:c.138+821del']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_000548.3):c.138+821del' + assert results['NM_000548.3:c.138+821del']['refseqgene_context_intronic_sequence'] == 'NG_005895.1(NM_000548.3):c.138+821del' + assert results['NM_000548.3:c.138+821del']['hgvs_refseqgene_variant'] == 'NG_005895.1:g.5269del' + assert results['NM_000548.3:c.138+821del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.?', 'slr': 'NP_000539.2:p.?'} + assert results['NM_000548.3:c.138+821del']['hgvs_lrg_transcript_variant'] == 'LRG_487t1:c.138+821del' + assert results['NM_000548.3:c.138+821del']['hgvs_lrg_variant'] == 'LRG_487:g.5269del' + self.assertCountEqual(results['NM_000548.3:c.138+821del']['alt_genomic_loci'], []) + assert results['NM_000548.3:c.138+821del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': 'chr16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_000548.3:c.138+821del']['primary_assembly_loci'].keys()) + assert results['NM_000548.3:c.138+821del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2099575del', 'vcf': {'chr': '16', 'pos': '2099572', 'ref': 'TC', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_000548.3:c.138+821del']['primary_assembly_loci'].keys()) + assert results['NM_000548.3:c.138+821del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005895.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_487.xml'} def test_variant8(self): variant = 'NM_000088.3:c.589GG>CT' @@ -387,169 +398,172 @@ def test_variant8(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589_590delinsCT' in list(results.keys()) - assert results['NM_000088.3:c.589_590delinsCT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589_590delinsCT' - assert results['NM_000088.3:c.589_590delinsCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.589_590delinsCT']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589_590delinsCT']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589_590delinsCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Leu)', 'slr': 'NP_000079.2:p.(G197L)'} assert results['NM_000088.3:c.589_590delinsCT']['submitted_variant'] == 'NM_000088.3:c.589GG>CT' - assert results['NM_000088.3:c.589_590delinsCT']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589_590delinsCT']['hgvs_lrg_variant'] == 'LRG_1:g.8638_8639delinsCT' + assert results['NM_000088.3:c.589_590delinsCT']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589_590delinsCT']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589_590delinsCT']['hgvs_transcript_variant'] == 'NM_000088.3:c.589_590delinsCT' + assert results['NM_000088.3:c.589_590delinsCT']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589_590delinsCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589_590delinsCT']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638_8639delinsCT' - assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275362_48275363delinsAG', 'vcf': {'chr': 'chr17', 'ref': 'CC', 'pos': '48275362', 'alt': 'AG'}} - assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198001_50198002delinsAG', 'vcf': {'chr': 'chr17', 'ref': 'CC', 'pos': '50198001', 'alt': 'AG'}} - assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275362_48275363delinsAG', 'vcf': {'chr': '17', 'ref': 'CC', 'pos': '48275362', 'alt': 'AG'}} - assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198001_50198002delinsAG', 'vcf': {'chr': '17', 'ref': 'CC', 'pos': '50198001', 'alt': 'AG'}} - assert results['NM_000088.3:c.589_590delinsCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589_590delinsCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Leu)', 'slr': 'NP_000079.2:p.(G197L)'} + assert results['NM_000088.3:c.589_590delinsCT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589_590delinsCT' + assert results['NM_000088.3:c.589_590delinsCT']['hgvs_lrg_variant'] == 'LRG_1:g.8638_8639delinsCT' + self.assertCountEqual(results['NM_000088.3:c.589_590delinsCT']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275362_48275363delinsAG', 'vcf': {'chr': 'chr17', 'pos': '48275362', 'ref': 'CC', 'alt': 'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198001_50198002delinsAG', 'vcf': {'chr': 'chr17', 'pos': '50198001', 'ref': 'CC', 'alt': 'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275362_48275363delinsAG', 'vcf': {'chr': '17', 'pos': '48275362', 'ref': 'CC', 'alt': 'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198001_50198002delinsAG', 'vcf': {'chr': '17', 'pos': '50198001', 'ref': 'CC', 'alt': 'AG'}} + assert results['NM_000088.3:c.589_590delinsCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant9(self): variant = 'NM_000094.3:c.6751-2_6751-3del' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000094.3:c.6751-2_6751-3del' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant10(self): variant = 'COL5A1:c.5071A>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'COL5A1:c.5071A>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant11(self): variant = 'NG_007400.1:c.5071A>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NG_007400.1:c.5071A>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant12(self): variant = 'chr16:15832508_15832509delinsAC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_001040114.1:c.3055_3056inv' in list(results.keys()) + assert results['NM_001040114.1:c.3055_3056inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' + assert results['NM_001040114.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' + assert results['NM_001040114.1:c.3055_3056inv']['gene_ids'] == {'hgnc_id': 'HGNC:7569', 'entrez_gene_id': '4629', 'ucsc_id': 'uc002ddy.4', 'omim_id': ['160745']} + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040114.1:c.3055_3056inv' + assert results['NM_001040114.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035203.1:p.(Thr1019Val)', 'slr': 'NP_001035203.1:p.(T1019V)'} + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}]) + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1'} + assert 'NM_002474.2:c.3034_3035inv' in list(results.keys()) - assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) - assert results['NM_002474.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' - assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1(LRG_1401p1):p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} assert results['NM_002474.2:c.3034_3035inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' - assert results['NM_002474.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' - assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + assert results['NM_002474.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' + assert results['NM_002474.2:c.3034_3035inv']['gene_ids'] == {'hgnc_id': 'HGNC:7569', 'entrez_gene_id': '4629', 'ucsc_id': 'uc002ddy.4', 'omim_id': ['160745']} assert results['NM_002474.2:c.3034_3035inv']['hgvs_transcript_variant'] == 'NM_002474.2:c.3034_3035inv' + assert results['NM_002474.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' + assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002474.2:c.3034_3035inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_002474.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2'} + assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1(LRG_1401p1):p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} + assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}]) + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1'} assert 'NM_022844.2:c.3034_3035inv' in list(results.keys()) - assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_022844.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) - assert results['NM_022844.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' - assert results['NM_022844.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_074035.1:p.(Thr1012Val)', 'slr': 'NP_074035.1:p.(T1012V)'} assert results['NM_022844.2:c.3034_3035inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' - assert results['NM_022844.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' - assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + assert results['NM_022844.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' + assert results['NM_022844.2:c.3034_3035inv']['gene_ids'] == {'hgnc_id': 'HGNC:7569', 'entrez_gene_id': '4629', 'ucsc_id': 'uc002ddy.4', 'omim_id': ['160745']} assert results['NM_022844.2:c.3034_3035inv']['hgvs_transcript_variant'] == 'NM_022844.2:c.3034_3035inv' + assert results['NM_022844.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' + assert results['NM_022844.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_022844.2:c.3034_3035inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_022844.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2'} - - assert 'NM_001040114.1:c.3055_3056inv' in list(results.keys()) - assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001040114.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) - assert results['NM_001040114.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' - assert results['NM_001040114.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035203.1:p.(Thr1019Val)', 'slr': 'NP_001035203.1:p.(T1019V)'} - assert results['NM_001040114.1:c.3055_3056inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' - assert results['NM_001040114.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' - assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_variant'] == '' - assert results['NM_001040114.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040114.1:c.3055_3056inv' - assert results['NM_001040114.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_001040114.1:c.3055_3056inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1'} + assert results['NM_022844.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_074035.1:p.(Thr1012Val)', 'slr': 'NP_074035.1:p.(T1012V)'} + assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}]) + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001040113.1:c.3055_3056inv' in list(results.keys()) - assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) - assert results['NM_001040113.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' - assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1(LRG_1401p2):p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} assert results['NM_001040113.1:c.3055_3056inv']['submitted_variant'] == 'chr16:15832508_15832509delinsAC' - assert results['NM_001040113.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' - assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_variant'] == 'LRG_1401:g.123379_123380inv' + assert results['NM_001040113.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' + assert results['NM_001040113.1:c.3055_3056inv']['gene_ids'] == {'hgnc_id': 'HGNC:7569', 'entrez_gene_id': '4629', 'ucsc_id': 'uc002ddy.4', 'omim_id': ['160745']} assert results['NM_001040113.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040113.1:c.3055_3056inv' + assert results['NM_001040113.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040113.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == 'NG_009299.1:g.123379_123380inv' - assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_001040113.1:c.3055_3056inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009299.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1401.xml', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1'} - + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1(LRG_1401p2):p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_variant'] == 'LRG_1401:g.123379_123380inv' + self.assertCountEqual(results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}]) + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009299.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1401.xml'} def test_variant13(self): variant = 'NM_000088.3:c.589-1GG>G' @@ -558,22 +572,22 @@ def test_variant13(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.590del' in list(results.keys()) - assert results['NM_000088.3:c.590del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590del' - assert results['NM_000088.3:c.590del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.590del']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.590del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.590del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197ValfsTer68)', 'slr': 'NP_000079.2:p.(G197Vfs*68)'} assert results['NM_000088.3:c.590del']['submitted_variant'] == 'NM_000088.3:c.589-1GG>G' - assert results['NM_000088.3:c.590del']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.590del']['hgvs_lrg_variant'] == 'LRG_1:g.8639del' + assert results['NM_000088.3:c.590del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.590del']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.590del']['hgvs_transcript_variant'] == 'NM_000088.3:c.590del' + assert results['NM_000088.3:c.590del']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.590del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.590del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8639del' - assert results['NM_000088.3:c.590del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364del', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '48275361', 'alt': 'A'}} - assert results['NM_000088.3:c.590del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003del', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '50198000', 'alt': 'A'}} - assert results['NM_000088.3:c.590del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364del', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '48275361', 'alt': 'A'}} - assert results['NM_000088.3:c.590del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003del', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '50198000', 'alt': 'A'}} - assert results['NM_000088.3:c.590del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.590del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197ValfsTer68)', 'slr': 'NP_000079.2:p.(G197Vfs*68)'} + assert results['NM_000088.3:c.590del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590del' + assert results['NM_000088.3:c.590del']['hgvs_lrg_variant'] == 'LRG_1:g.8639del' + self.assertCountEqual(results['NM_000088.3:c.590del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.590del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364del', 'vcf': {'chr': 'chr17', 'pos': '48275361', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_000088.3:c.590del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003del', 'vcf': {'chr': 'chr17', 'pos': '50198000', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_000088.3:c.590del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364del', 'vcf': {'chr': '17', 'pos': '48275361', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_000088.3:c.590del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003del', 'vcf': {'chr': '17', 'pos': '50198000', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_000088.3:c.590del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant14(self): variant = 'NM_000088.3:c.642+1GT>G' @@ -582,22 +596,22 @@ def test_variant14(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.642+2del' in list(results.keys()) - assert results['NM_000088.3:c.642+2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' - assert results['NM_000088.3:c.642+2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' - self.assertCountEqual(results['NM_000088.3:c.642+2del']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.642+2del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.642+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.642+2del']['submitted_variant'] == 'NM_000088.3:c.642+1GT>G' - assert results['NM_000088.3:c.642+2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+2del' - assert results['NM_000088.3:c.642+2del']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + assert results['NM_000088.3:c.642+2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642+2del']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.642+2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+2del' + assert results['NM_000088.3:c.642+2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+2del' + assert results['NM_000088.3:c.642+2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' assert results['NM_000088.3:c.642+2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.642+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + self.assertCountEqual(results['NM_000088.3:c.642+2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': 'chr17', 'pos': '48275307', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': 'chr17', 'pos': '50197946', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': '17', 'pos': '48275307', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': '17', 'pos': '50197946', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant15(self): variant = 'NM_000088.3:c.589-2AG>G' @@ -606,22 +620,22 @@ def test_variant15(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589-2del' in list(results.keys()) - assert results['NM_000088.3:c.589-2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' - assert results['NM_000088.3:c.589-2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' - self.assertCountEqual(results['NM_000088.3:c.589-2del']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-2del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-2del']['submitted_variant'] == 'NM_000088.3:c.589-2AG>G' - assert results['NM_000088.3:c.589-2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2del' - assert results['NM_000088.3:c.589-2del']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + assert results['NM_000088.3:c.589-2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-2del']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589-2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2del' + assert results['NM_000088.3:c.589-2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2del' + assert results['NM_000088.3:c.589-2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' assert results['NM_000088.3:c.589-2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589-2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + self.assertCountEqual(results['NM_000088.3:c.589-2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': 'chr17', 'pos': '48275364', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': 'chr17', 'pos': '50198003', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': '17', 'pos': '48275364', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': '17', 'pos': '50198003', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant16(self): variant = 'NC_000017.10:g.48279242G>T' @@ -630,119 +644,119 @@ def test_variant16(self): assert results['flag'] == 'intergenic' assert 'intergenic_variant_1' in list(results.keys()) - assert results['intergenic_variant_1']['hgvs_lrg_transcript_variant'] == '' - assert results['intergenic_variant_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['intergenic_variant_1']['alt_genomic_loci'], []) - assert results['intergenic_variant_1']['gene_symbol'] == '' - assert results['intergenic_variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['intergenic_variant_1']['submitted_variant'] == 'NC_000017.10:g.48279242G>T' - assert results['intergenic_variant_1']['genome_context_intronic_sequence'] == '' - assert results['intergenic_variant_1']['hgvs_lrg_variant'] == 'LRG_1:g.4759C>A' + assert results['intergenic_variant_1']['gene_symbol'] == '' + assert results['intergenic_variant_1']['gene_ids'] == {} assert results['intergenic_variant_1']['hgvs_transcript_variant'] == '' + assert results['intergenic_variant_1']['genome_context_intronic_sequence'] == '' + assert results['intergenic_variant_1']['refseqgene_context_intronic_sequence'] == '' assert results['intergenic_variant_1']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.4759C>A' - assert results['intergenic_variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48279242G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} - assert results['intergenic_variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50201881G>T', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} - assert results['intergenic_variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48279242G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48279242', 'alt': 'T'}} - assert results['intergenic_variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50201881G>T', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50201881', 'alt': 'T'}} + assert results['intergenic_variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['intergenic_variant_1']['hgvs_lrg_transcript_variant'] == '' + assert results['intergenic_variant_1']['hgvs_lrg_variant'] == 'LRG_1:g.4759C>A' + self.assertCountEqual(results['intergenic_variant_1']['alt_genomic_loci'], []) + assert results['intergenic_variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48279242G>T', 'vcf': {'chr': 'chr17', 'pos': '48279242', 'ref': 'G', 'alt': 'T'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50201881G>T', 'vcf': {'chr': 'chr17', 'pos': '50201881', 'ref': 'G', 'alt': 'T'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48279242G>T', 'vcf': {'chr': '17', 'pos': '48279242', 'ref': 'G', 'alt': 'T'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50201881G>T', 'vcf': {'chr': '17', 'pos': '50201881', 'ref': 'G', 'alt': 'T'}} assert results['intergenic_variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - def test_variant17(self): variant = 'NM_000500.7:c.-107-19C>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000500.7:c.-107-19C>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant18(self): variant = 'NM_000518.4:c.-130C>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-130C>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant19(self): variant = 'NM_000518.4:c.-50-80C>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-50-80C>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant20(self): variant = 'NM_000518.4:c.316_*342delinsCTACTT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.316_*342delinsCTACTT' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant21(self): variant = 'NM_000518.4:c.316_*100del' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -750,215 +764,215 @@ def test_variant21(self): assert results['flag'] == 'gene_variant' assert 'NM_000518.4:c.316_*100del' in list(results.keys()) - assert results['NM_000518.4:c.316_*100del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000518.4:c.316_*100del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000518.4:c.316_*100del']['alt_genomic_loci'], []) - assert results['NM_000518.4:c.316_*100del']['gene_symbol'] == 'HBB' - assert results['NM_000518.4:c.316_*100del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Leu106SerfsTer3)', 'slr': 'NP_000509.1:p.(L106Sfs*3)'} assert results['NM_000518.4:c.316_*100del']['submitted_variant'] == 'NM_000518.4:c.316_*100del' - assert results['NM_000518.4:c.316_*100del']['genome_context_intronic_sequence'] == '' - assert results['NM_000518.4:c.316_*100del']['hgvs_lrg_variant'] == '' + assert results['NM_000518.4:c.316_*100del']['gene_symbol'] == 'HBB' + assert results['NM_000518.4:c.316_*100del']['gene_ids'] == {'hgnc_id': 'HGNC:4827', 'entrez_gene_id': '3043', 'ucsc_id': 'uc001mae.2', 'omim_id': ['141900']} assert results['NM_000518.4:c.316_*100del']['hgvs_transcript_variant'] == 'NM_000518.4:c.316_*100del' + assert results['NM_000518.4:c.316_*100del']['genome_context_intronic_sequence'] == '' + assert results['NM_000518.4:c.316_*100del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000518.4:c.316_*100del']['hgvs_refseqgene_variant'] == 'NG_000007.3:g.71890_72118del' - assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5246728_5246956del', 'vcf': {'chr': 'chr11', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'pos': '5246727', 'alt': 'A'}} - assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5225498_5225726del', 'vcf': {'chr': 'chr11', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'pos': '5225497', 'alt': 'A'}} - assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5246728_5246956del', 'vcf': {'chr': '11', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'pos': '5246727', 'alt': 'A'}} - assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5225498_5225726del', 'vcf': {'chr': '11', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'pos': '5225497', 'alt': 'A'}} - assert results['NM_000518.4:c.316_*100del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_000007.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.4'} - + assert results['NM_000518.4:c.316_*100del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Leu106SerfsTer3)', 'slr': 'NP_000509.1:p.(L106Sfs*3)'} + assert results['NM_000518.4:c.316_*100del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000518.4:c.316_*100del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000518.4:c.316_*100del']['alt_genomic_loci'], []) + assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5246728_5246956del', 'vcf': {'chr': 'chr11', 'pos': '5246727', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'alt': 'A'}} + assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5225498_5225726del', 'vcf': {'chr': 'chr11', 'pos': '5225497', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'alt': 'A'}} + assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5246728_5246956del', 'vcf': {'chr': '11', 'pos': '5246727', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'alt': 'A'}} + assert results['NM_000518.4:c.316_*100del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5225498_5225726del', 'vcf': {'chr': '11', 'pos': '5225497', 'ref': 'AATCCAGATGCTCAAGGCCCTTCATAATATCCCCCAGTTTAGTAGTTGGACTTAGGGAACAAAGGAACCTTTAATAGAAATTGGACAGCAAGAAAGCGAGCTTAGTGATACTTGTGGGCCAGGGCATTAGCCACACCAGCCACCACTTTCTGATAGGCAGCCTGCACTGGTGGGGTGAATTCTTTGCCAAAGTGATGGGCCAGCACACAGACCAGCACGTTGCCCAGGAG', 'alt': 'A'}} + assert results['NM_000518.4:c.316_*100del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_000007.3'} def test_variant22(self): variant = 'NM_000518.4:c.*2000C>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.*2000C>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant23(self): variant = 'NM_000518.4:c.*132+1868C>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.*132+1868C>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant24(self): variant = 'NM_000518.4:c.-130_*2000=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-130_*2000=' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant25(self): variant = 'NM_000518.4:c.-50-80_*132+1868=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000518.4:c.-50-80_*132+1868=' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant26(self): variant = 'NR_138595.1:n.-810C>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.-810C>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant27(self): variant = 'NR_138595.1:n.1-810C>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.1-810C>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant28(self): variant = 'NR_138595.1:n.1071+1A=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.1071+1A=' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant29(self): variant = 'NR_138595.1:n.-810_1071+1=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NR_138595.1:n.-810_1071+1=' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant30(self): variant = 'NC_000017.10:g.48261457_48261463TTATGTT=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -966,22 +980,22 @@ def test_variant30(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.*1400_*1406=' in list(results.keys()) - assert results['NM_000088.3:c.*1400_*1406=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.*1400_*1406=' - assert results['NM_000088.3:c.*1400_*1406=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.*1400_*1406=']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.*1400_*1406=']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.*1400_*1406=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.*1400_*1406=']['submitted_variant'] == 'NC_000017.10:g.48261457_48261463TTATGTT=' - assert results['NM_000088.3:c.*1400_*1406=']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.*1400_*1406=']['hgvs_lrg_variant'] == 'LRG_1:g.22538_22544=' + assert results['NM_000088.3:c.*1400_*1406=']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.*1400_*1406=']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.*1400_*1406=']['hgvs_transcript_variant'] == 'NM_000088.3:c.*1400_*1406=' + assert results['NM_000088.3:c.*1400_*1406=']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.*1400_*1406=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.*1400_*1406=']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.22538_22544=' - assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48261457_48261463=', 'vcf': {'chr': 'chr17', 'ref': 'TTATGTT', 'pos': '48261457', 'alt': 'TTATGTT'}} - assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50184096_50184102=', 'vcf': {'chr': 'chr17', 'ref': 'TTATGTT', 'pos': '50184096', 'alt': 'TTATGTT'}} - assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48261457_48261463=', 'vcf': {'chr': '17', 'ref': 'TTATGTT', 'pos': '48261457', 'alt': 'TTATGTT'}} - assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50184096_50184102=', 'vcf': {'chr': '17', 'ref': 'TTATGTT', 'pos': '50184096', 'alt': 'TTATGTT'}} - assert results['NM_000088.3:c.*1400_*1406=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.*1400_*1406=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.*1400_*1406=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.*1400_*1406=' + assert results['NM_000088.3:c.*1400_*1406=']['hgvs_lrg_variant'] == 'LRG_1:g.22538_22544=' + self.assertCountEqual(results['NM_000088.3:c.*1400_*1406=']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48261457_48261463=', 'vcf': {'chr': 'chr17', 'pos': '48261457', 'ref': 'TTATGTT', 'alt': 'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50184096_50184102=', 'vcf': {'chr': 'chr17', 'pos': '50184096', 'ref': 'TTATGTT', 'alt': 'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48261457_48261463=', 'vcf': {'chr': '17', 'pos': '48261457', 'ref': 'TTATGTT', 'alt': 'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50184096_50184102=', 'vcf': {'chr': '17', 'pos': '50184096', 'ref': 'TTATGTT', 'alt': 'TTATGTT'}} + assert results['NM_000088.3:c.*1400_*1406=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant31(self): variant = 'NC_000017.10:g.48275363C>A' @@ -990,22 +1004,22 @@ def test_variant31(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589G>T' in list(results.keys()) - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' - assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'NC_000017.10:g.48275363C>A' - assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant32(self): variant = 'NM_000088.3:c.589-1G>T' @@ -1014,46 +1028,46 @@ def test_variant32(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589-1G>T' in list(results.keys()) - assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' - assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' - self.assertCountEqual(results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-1G>T']['submitted_variant'] == 'NM_000088.3:c.589-1G>T' - assert results['NM_000088.3:c.589-1G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-1G>T' - assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' + assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-1G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589-1G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8637G>T' - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' + self.assertCountEqual(results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'pos': '48275364', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'pos': '50198003', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'pos': '48275364', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'pos': '50198003', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant33(self): variant = 'NM_000088.3:c.591_593inv' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.591_593inv' in list(results.keys()) - assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.591_593inv' - assert results['NM_000088.3:c.591_593inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.591_593inv']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.591_593inv']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.591_593inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Pro198Asp)', 'slr': 'NP_000079.2:p.(P198D)'} assert results['NM_000088.3:c.591_593inv']['submitted_variant'] == 'NM_000088.3:c.591_593inv' - assert results['NM_000088.3:c.591_593inv']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_variant'] == 'LRG_1:g.8640_8642inv' + assert results['NM_000088.3:c.591_593inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.591_593inv']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.591_593inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.591_593inv' + assert results['NM_000088.3:c.591_593inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.591_593inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.591_593inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8640_8642inv' - assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': 'chr17', 'ref': 'GGA', 'pos': '48275359', 'alt': 'TCC'}} - assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': 'chr17', 'ref': 'GGA', 'pos': '50197998', 'alt': 'TCC'}} - assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': '17', 'ref': 'GGA', 'pos': '48275359', 'alt': 'TCC'}} - assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': '17', 'ref': 'GGA', 'pos': '50197998', 'alt': 'TCC'}} - assert results['NM_000088.3:c.591_593inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000088.3:c.591_593inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Pro198Asp)', 'slr': 'NP_000079.2:p.(P198D)'} + assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.591_593inv' + assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_variant'] == 'LRG_1:g.8640_8642inv' + self.assertCountEqual(results['NM_000088.3:c.591_593inv']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': 'chr17', 'pos': '48275359', 'ref': 'GGA', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': 'chr17', 'pos': '50197998', 'ref': 'GGA', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': '17', 'pos': '48275359', 'ref': 'GGA', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': '17', 'pos': '50197998', 'ref': 'GGA', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant34(self): variant = '11-5248232-T-A' @@ -1062,39 +1076,40 @@ def test_variant34(self): assert results['flag'] == 'gene_variant' assert 'NM_000518.5:c.20A>T' in list(results.keys()) - assert results['NM_000518.5:c.20A>T']['hgvs_lrg_transcript_variant'] == 'LRG_1232t1:c.20A>T' - assert results['NM_000518.5:c.20A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000518.5:c.20A>T']['alt_genomic_loci'], []) - assert results['NM_000518.5:c.20A>T']['gene_symbol'] == 'HBB' - assert results['NM_000518.5:c.20A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Glu7Val)', 'slr': 'NP_000509.1:p.(E7V)'} assert results['NM_000518.5:c.20A>T']['submitted_variant'] == '11-5248232-T-A' - assert results['NM_000518.5:c.20A>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000518.5:c.20A>T']['hgvs_lrg_variant'] == '' + assert results['NM_000518.5:c.20A>T']['gene_symbol'] == 'HBB' + assert results['NM_000518.5:c.20A>T']['gene_ids'] == {'hgnc_id': 'HGNC:4827', 'entrez_gene_id': '3043', 'ucsc_id': 'uc001mae.2', 'omim_id': ['141900']} assert results['NM_000518.5:c.20A>T']['hgvs_transcript_variant'] == 'NM_000518.5:c.20A>T' + assert results['NM_000518.5:c.20A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000518.5:c.20A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000518.5:c.20A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_000518.5:c.20A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': 'chr11', 'ref': 'T', 'pos': '5248232', 'alt': 'A'}} + assert results['NM_000518.5:c.20A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Glu7Val)', 'slr': 'NP_000509.1:p.(E7V)'} + assert results['NM_000518.5:c.20A>T']['hgvs_lrg_transcript_variant'] == 'LRG_1232t1:c.20A>T' + assert results['NM_000518.5:c.20A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000518.5:c.20A>T']['alt_genomic_loci'], []) + assert results['NM_000518.5:c.20A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': 'chr11', 'pos': '5248232', 'ref': 'T', 'alt': 'A'}} assert 'hg38' not in list(results['NM_000518.5:c.20A>T']['primary_assembly_loci'].keys()) - assert results['NM_000518.5:c.20A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': '11', 'ref': 'T', 'pos': '5248232', 'alt': 'A'}} + assert results['NM_000518.5:c.20A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': '11', 'pos': '5248232', 'ref': 'T', 'alt': 'A'}} assert 'grch38' not in list(results['NM_000518.5:c.20A>T']['primary_assembly_loci'].keys()) - assert results['NM_000518.5:c.20A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.5'} + assert results['NM_000518.5:c.20A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1'} assert 'NM_000518.4:c.20A>T' in list(results.keys()) - assert results['NM_000518.4:c.20A>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000518.4:c.20A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000518.4:c.20A>T']['alt_genomic_loci'], []) - assert results['NM_000518.4:c.20A>T']['gene_symbol'] == 'HBB' - assert results['NM_000518.4:c.20A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Glu7Val)', 'slr': 'NP_000509.1:p.(E7V)'} assert results['NM_000518.4:c.20A>T']['submitted_variant'] == '11-5248232-T-A' - assert results['NM_000518.4:c.20A>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000518.4:c.20A>T']['hgvs_lrg_variant'] == '' + assert results['NM_000518.4:c.20A>T']['gene_symbol'] == 'HBB' + assert results['NM_000518.4:c.20A>T']['gene_ids'] == {'hgnc_id': 'HGNC:4827', 'entrez_gene_id': '3043', 'ucsc_id': 'uc001mae.2', 'omim_id': ['141900']} assert results['NM_000518.4:c.20A>T']['hgvs_transcript_variant'] == 'NM_000518.4:c.20A>T' + assert results['NM_000518.4:c.20A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000518.4:c.20A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000518.4:c.20A>T']['hgvs_refseqgene_variant'] == 'NG_000007.3:g.70614A>T' - assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': 'chr11', 'ref': 'T', 'pos': '5248232', 'alt': 'A'}} - assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5227002T>A', 'vcf': {'chr': 'chr11', 'ref': 'T', 'pos': '5227002', 'alt': 'A'}} - assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': '11', 'ref': 'T', 'pos': '5248232', 'alt': 'A'}} - assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5227002T>A', 'vcf': {'chr': '11', 'ref': 'T', 'pos': '5227002', 'alt': 'A'}} - assert results['NM_000518.4:c.20A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_000007.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.4'} - + assert results['NM_000518.4:c.20A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000509.1(LRG_1232p1):p.(Glu7Val)', 'slr': 'NP_000509.1:p.(E7V)'} + assert results['NM_000518.4:c.20A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000518.4:c.20A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000518.4:c.20A>T']['alt_genomic_loci'], []) + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': 'chr11', 'pos': '5248232', 'ref': 'T', 'alt': 'A'}} + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5227002T>A', 'vcf': {'chr': 'chr11', 'pos': '5227002', 'ref': 'T', 'alt': 'A'}} + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.5248232T>A', 'vcf': {'chr': '11', 'pos': '5248232', 'ref': 'T', 'alt': 'A'}} + assert results['NM_000518.4:c.20A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.5227002T>A', 'vcf': {'chr': '11', 'pos': '5227002', 'ref': 'T', 'alt': 'A'}} + assert results['NM_000518.4:c.20A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000518.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000509.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_000007.3'} def test_variant35(self): variant = 'NG_007400.1(NM_000088.3):c.589-1G>T' @@ -1103,22 +1118,22 @@ def test_variant35(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589-1G>T' in list(results.keys()) - assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' - assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' - self.assertCountEqual(results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-1G>T']['submitted_variant'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' - assert results['NM_000088.3:c.589-1G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-1G>T' - assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' + assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-1G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589-1G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8637G>T' - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' + self.assertCountEqual(results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'pos': '48275364', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'pos': '50198003', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'pos': '48275364', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'pos': '50198003', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant36(self): variant = '1:150550916G>A' @@ -1127,56 +1142,58 @@ def test_variant36(self): assert results['flag'] == 'gene_variant' assert 'NM_182763.2:c.688+403C>T' in list(results.keys()) - assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_182763.2:c.688+403C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'], []) - assert results['NM_182763.2:c.688+403C>T']['gene_symbol'] == 'MCL1' - assert results['NM_182763.2:c.688+403C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_877495.1:p.?', 'slr': 'NP_877495.1:p.?'} assert results['NM_182763.2:c.688+403C>T']['submitted_variant'] == '1:150550916G>A' - assert results['NM_182763.2:c.688+403C>T']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_182763.2):c.688+403C>T' - assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_variant'] == '' + assert results['NM_182763.2:c.688+403C>T']['gene_symbol'] == 'MCL1' + assert results['NM_182763.2:c.688+403C>T']['gene_ids'] == {'hgnc_id': 'HGNC:6943', 'entrez_gene_id': '4170', 'ucsc_id': 'uc001euz.4', 'omim_id': ['159552']} assert results['NM_182763.2:c.688+403C>T']['hgvs_transcript_variant'] == 'NM_182763.2:c.688+403C>T' + assert results['NM_182763.2:c.688+403C>T']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_182763.2):c.688+403C>T' + assert results['NM_182763.2:c.688+403C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_182763.2:c.688+403C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_182763.2:c.688+403C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2'} + assert results['NM_182763.2:c.688+403C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_877495.1:p.?', 'slr': 'NP_877495.1:p.?'} + assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'], []) + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1'} assert 'NM_001197320.1:c.281C>T' in list(results.keys()) - assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001197320.1:c.281C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001197320.1:c.281C>T']['alt_genomic_loci'], []) - assert results['NM_001197320.1:c.281C>T']['gene_symbol'] == 'MCL1' - assert results['NM_001197320.1:c.281C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001184249.1:p.(Ser94Phe)', 'slr': 'NP_001184249.1:p.(S94F)'} assert results['NM_001197320.1:c.281C>T']['submitted_variant'] == '1:150550916G>A' - assert results['NM_001197320.1:c.281C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001197320.1:c.281C>T']['gene_symbol'] == 'MCL1' + assert results['NM_001197320.1:c.281C>T']['gene_ids'] == {'hgnc_id': 'HGNC:6943', 'entrez_gene_id': '4170', 'ucsc_id': 'uc001euz.4', 'omim_id': ['159552']} assert results['NM_001197320.1:c.281C>T']['hgvs_transcript_variant'] == 'NM_001197320.1:c.281C>T' + assert results['NM_001197320.1:c.281C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001197320.1:c.281C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001197320.1:c.281C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_001197320.1:c.281C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1'} + assert results['NM_001197320.1:c.281C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001184249.1:p.(Ser94Phe)', 'slr': 'NP_001184249.1:p.(S94F)'} + assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001197320.1:c.281C>T']['alt_genomic_loci'], []) + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1'} assert 'NM_021960.4:c.740C>T' in list(results.keys()) - assert results['NM_021960.4:c.740C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021960.4:c.740C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021960.4:c.740C>T']['alt_genomic_loci'], []) - assert results['NM_021960.4:c.740C>T']['gene_symbol'] == 'MCL1' - assert results['NM_021960.4:c.740C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068779.1:p.(Ser247Phe)', 'slr': 'NP_068779.1:p.(S247F)'} assert results['NM_021960.4:c.740C>T']['submitted_variant'] == '1:150550916G>A' - assert results['NM_021960.4:c.740C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_021960.4:c.740C>T']['hgvs_lrg_variant'] == '' + assert results['NM_021960.4:c.740C>T']['gene_symbol'] == 'MCL1' + assert results['NM_021960.4:c.740C>T']['gene_ids'] == {'hgnc_id': 'HGNC:6943', 'entrez_gene_id': '4170', 'ucsc_id': 'uc001euz.4', 'omim_id': ['159552']} assert results['NM_021960.4:c.740C>T']['hgvs_transcript_variant'] == 'NM_021960.4:c.740C>T' + assert results['NM_021960.4:c.740C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_021960.4:c.740C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021960.4:c.740C>T']['hgvs_refseqgene_variant'] == 'NG_029146.1:g.6299C>T' - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_021960.4:c.740C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029146.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4'} - + assert results['NM_021960.4:c.740C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068779.1:p.(Ser247Phe)', 'slr': 'NP_068779.1:p.(S247F)'} + assert results['NM_021960.4:c.740C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021960.4:c.740C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021960.4:c.740C>T']['alt_genomic_loci'], []) + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029146.1'} def test_variant37(self): variant = '1-150550916-G-A' @@ -1185,105 +1202,107 @@ def test_variant37(self): assert results['flag'] == 'gene_variant' assert 'NM_182763.2:c.688+403C>T' in list(results.keys()) - assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_182763.2:c.688+403C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'], []) - assert results['NM_182763.2:c.688+403C>T']['gene_symbol'] == 'MCL1' - assert results['NM_182763.2:c.688+403C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_877495.1:p.?', 'slr': 'NP_877495.1:p.?'} assert results['NM_182763.2:c.688+403C>T']['submitted_variant'] == '1-150550916-G-A' - assert results['NM_182763.2:c.688+403C>T']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_182763.2):c.688+403C>T' - assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_variant'] == '' + assert results['NM_182763.2:c.688+403C>T']['gene_symbol'] == 'MCL1' + assert results['NM_182763.2:c.688+403C>T']['gene_ids'] == {'hgnc_id': 'HGNC:6943', 'entrez_gene_id': '4170', 'ucsc_id': 'uc001euz.4', 'omim_id': ['159552']} assert results['NM_182763.2:c.688+403C>T']['hgvs_transcript_variant'] == 'NM_182763.2:c.688+403C>T' + assert results['NM_182763.2:c.688+403C>T']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_182763.2):c.688+403C>T' + assert results['NM_182763.2:c.688+403C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_182763.2:c.688+403C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_182763.2:c.688+403C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2'} + assert results['NM_182763.2:c.688+403C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_877495.1:p.?', 'slr': 'NP_877495.1:p.?'} + assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_182763.2:c.688+403C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_182763.2:c.688+403C>T']['alt_genomic_loci'], []) + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_182763.2:c.688+403C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182763.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_877495.1'} assert 'NM_001197320.1:c.281C>T' in list(results.keys()) - assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001197320.1:c.281C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001197320.1:c.281C>T']['alt_genomic_loci'], []) - assert results['NM_001197320.1:c.281C>T']['gene_symbol'] == 'MCL1' - assert results['NM_001197320.1:c.281C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001184249.1:p.(Ser94Phe)', 'slr': 'NP_001184249.1:p.(S94F)'} assert results['NM_001197320.1:c.281C>T']['submitted_variant'] == '1-150550916-G-A' - assert results['NM_001197320.1:c.281C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001197320.1:c.281C>T']['gene_symbol'] == 'MCL1' + assert results['NM_001197320.1:c.281C>T']['gene_ids'] == {'hgnc_id': 'HGNC:6943', 'entrez_gene_id': '4170', 'ucsc_id': 'uc001euz.4', 'omim_id': ['159552']} assert results['NM_001197320.1:c.281C>T']['hgvs_transcript_variant'] == 'NM_001197320.1:c.281C>T' + assert results['NM_001197320.1:c.281C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001197320.1:c.281C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001197320.1:c.281C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_001197320.1:c.281C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1'} + assert results['NM_001197320.1:c.281C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001184249.1:p.(Ser94Phe)', 'slr': 'NP_001184249.1:p.(S94F)'} + assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001197320.1:c.281C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001197320.1:c.281C>T']['alt_genomic_loci'], []) + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001197320.1:c.281C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001197320.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001184249.1'} assert 'NM_021960.4:c.740C>T' in list(results.keys()) - assert results['NM_021960.4:c.740C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021960.4:c.740C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021960.4:c.740C>T']['alt_genomic_loci'], []) - assert results['NM_021960.4:c.740C>T']['gene_symbol'] == 'MCL1' - assert results['NM_021960.4:c.740C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068779.1:p.(Ser247Phe)', 'slr': 'NP_068779.1:p.(S247F)'} assert results['NM_021960.4:c.740C>T']['submitted_variant'] == '1-150550916-G-A' - assert results['NM_021960.4:c.740C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_021960.4:c.740C>T']['hgvs_lrg_variant'] == '' + assert results['NM_021960.4:c.740C>T']['gene_symbol'] == 'MCL1' + assert results['NM_021960.4:c.740C>T']['gene_ids'] == {'hgnc_id': 'HGNC:6943', 'entrez_gene_id': '4170', 'ucsc_id': 'uc001euz.4', 'omim_id': ['159552']} assert results['NM_021960.4:c.740C>T']['hgvs_transcript_variant'] == 'NM_021960.4:c.740C>T' + assert results['NM_021960.4:c.740C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_021960.4:c.740C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021960.4:c.740C>T']['hgvs_refseqgene_variant'] == 'NG_029146.1:g.6299C>T' - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150550916', 'alt': 'A'}} - assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '150578440', 'alt': 'A'}} - assert results['NM_021960.4:c.740C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029146.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4'} - + assert results['NM_021960.4:c.740C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068779.1:p.(Ser247Phe)', 'slr': 'NP_068779.1:p.(S247F)'} + assert results['NM_021960.4:c.740C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021960.4:c.740C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021960.4:c.740C>T']['alt_genomic_loci'], []) + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': 'chr1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': 'chr1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.150550916G>A', 'vcf': {'chr': '1', 'pos': '150550916', 'ref': 'G', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.150578440G>A', 'vcf': {'chr': '1', 'pos': '150578440', 'ref': 'G', 'alt': 'A'}} + assert results['NM_021960.4:c.740C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021960.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068779.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029146.1'} def test_variant38(self): variant = 'NG_008123.1(LEPRE1_v003):c.2055+18G>A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NG_008123.1(LEPRE1_v003):c.2055+18G>A' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant39(self): variant = 'NG_008123.1:c.2055+18G>A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NG_008123.1:c.2055+18G>A' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant40(self): variant = 'NG_008123.1(NM_022356.3):c.2055+18G>A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -1291,22 +1310,22 @@ def test_variant40(self): assert results['flag'] == 'gene_variant' assert 'NM_022356.3:c.2055+18G>A' in list(results.keys()) - assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t1:c.2055+18G>A' - assert results['NM_022356.3:c.2055+18G>A']['refseqgene_context_intronic_sequence'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' - self.assertCountEqual(results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'], []) - assert results['NM_022356.3:c.2055+18G>A']['gene_symbol'] == 'P3H1' - assert results['NM_022356.3:c.2055+18G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_071751.3(LRG_5p1):p.?', 'slr': 'NP_071751.3:p.?'} assert results['NM_022356.3:c.2055+18G>A']['submitted_variant'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' - assert results['NM_022356.3:c.2055+18G>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_022356.3):c.2055+18G>A' - assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' + assert results['NM_022356.3:c.2055+18G>A']['gene_symbol'] == 'P3H1' + assert results['NM_022356.3:c.2055+18G>A']['gene_ids'] == {'hgnc_id': 'HGNC:19316', 'entrez_gene_id': '64175', 'ucsc_id': '', 'omim_id': ['610339']} assert results['NM_022356.3:c.2055+18G>A']['hgvs_transcript_variant'] == 'NM_022356.3:c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_022356.3):c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['refseqgene_context_intronic_sequence'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' assert results['NM_022356.3:c.2055+18G>A']['hgvs_refseqgene_variant'] == 'NG_008123.1:g.24831G>A' - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} - assert results['NM_022356.3:c.2055+18G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} - + assert results['NM_022356.3:c.2055+18G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_071751.3(LRG_5p1):p.?', 'slr': 'NP_071751.3:p.?'} + assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t1:c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' + self.assertCountEqual(results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'], []) + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'pos': '43212925', 'ref': 'C', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'pos': '42747254', 'ref': 'C', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'pos': '43212925', 'ref': 'C', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'pos': '42747254', 'ref': 'C', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} def test_variant41(self): variant = 'NM_021983.4:c.490G>C' @@ -1315,22 +1334,22 @@ def test_variant41(self): assert results['flag'] == 'gene_variant' assert 'NM_021983.4:c.490G>C' in list(results.keys()) - assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021983.4:c.490G>C']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}]) - assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' - assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} assert results['NM_021983.4:c.490G>C']['submitted_variant'] == 'NM_021983.4:c.490G>C' - assert results['NM_021983.4:c.490G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' + assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' + assert results['NM_021983.4:c.490G>C']['gene_ids'] == {'hgnc_id': 'HGNC:4952', 'entrez_gene_id': '3126', 'ucsc_id': 'uc011jsg.3', 'omim_id': []} assert results['NM_021983.4:c.490G>C']['hgvs_transcript_variant'] == 'NM_021983.4:c.490G>C' + assert results['NM_021983.4:c.490G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021983.4:c.490G>C']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.5724C>G' + assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021983.4:c.490G>C']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3853244', 'ref': 'C', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'pos': '3853244', 'ref': 'C', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3852542', 'ref': 'C', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'pos': '3852542', 'ref': 'C', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3884432', 'ref': 'C', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'pos': '3884432', 'ref': 'C', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'pos': '3842538', 'ref': 'T', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'pos': '3842538', 'ref': 'T', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'pos': '3848158', 'ref': 'T', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'pos': '3848158', 'ref': 'T', 'alt': 'G'}}}]) assert 'hg19' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) - assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} - + assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1'} def test_variant42(self): variant = 'NM_032470.3:c.4del' @@ -1339,46 +1358,46 @@ def test_variant42(self): assert results['flag'] == 'gene_variant' assert 'NM_032470.3:c.4del' in list(results.keys()) - assert results['NM_032470.3:c.4del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_032470.3:c.4del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032470.3:c.4del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}]) - assert results['NM_032470.3:c.4del']['gene_symbol'] == 'TNXB' - assert results['NM_032470.3:c.4del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_115859.2:p.(Arg2AlafsTer91)', 'slr': 'NP_115859.2:p.(R2Afs*91)'} assert results['NM_032470.3:c.4del']['submitted_variant'] == 'NM_032470.3:c.4del' - assert results['NM_032470.3:c.4del']['genome_context_intronic_sequence'] == '' - assert results['NM_032470.3:c.4del']['hgvs_lrg_variant'] == '' + assert results['NM_032470.3:c.4del']['gene_symbol'] == 'TNXB' + assert results['NM_032470.3:c.4del']['gene_ids'] == {'hgnc_id': 'HGNC:11976', 'entrez_gene_id': '7148', 'ucsc_id': 'uc063nnw.1', 'omim_id': ['600985']} assert results['NM_032470.3:c.4del']['hgvs_transcript_variant'] == 'NM_032470.3:c.4del' + assert results['NM_032470.3:c.4del']['genome_context_intronic_sequence'] == '' + assert results['NM_032470.3:c.4del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032470.3:c.4del']['hgvs_refseqgene_variant'] == '' - assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} - assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} - assert results['NM_032470.3:c.4del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_115859.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032470.3'} - + assert results['NM_032470.3:c.4del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_115859.2:p.(Arg2AlafsTer91)', 'slr': 'NP_115859.2:p.(R2Afs*91)'} + assert results['NM_032470.3:c.4del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_032470.3:c.4del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_032470.3:c.4del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3345700', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'pos': '3345700', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3392833', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'pos': '3392833', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'pos': '3483643', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'pos': '3483643', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3387248', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'pos': '3387248', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'pos': '3268450', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'pos': '3268450', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'pos': '3274046', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'pos': '3274046', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'pos': '3292209', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'pos': '3292209', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'pos': '3483537', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'pos': '3483537', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'pos': '3286624', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'pos': '3286624', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3346402', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'pos': '3346402', 'ref': 'CG', 'alt': 'C'}}}]) + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'pos': '32012992', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': 'chr6', 'pos': '32045215', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'pos': '32012992', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': '6', 'pos': '32045215', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032470.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_115859.2'} def test_variant43(self): variant = 'NM_001194958.2:c.20C>A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_001194958.2:c.20C>A' in list(results.keys()) - assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001194958.2:c.20C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001194958.2:c.20C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}]) - assert results['NM_001194958.2:c.20C>A']['gene_symbol'] == 'KCNJ18' - assert results['NM_001194958.2:c.20C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181887.2:p.(Ala7Asp)', 'slr': 'NP_001181887.2:p.(A7D)'} assert results['NM_001194958.2:c.20C>A']['submitted_variant'] == 'NM_001194958.2:c.20C>A' - assert results['NM_001194958.2:c.20C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001194958.2:c.20C>A']['gene_symbol'] == 'KCNJ18' + assert results['NM_001194958.2:c.20C>A']['gene_ids'] == {'hgnc_id': 'HGNC:39080', 'entrez_gene_id': '100134444', 'ucsc_id': 'uc032exz.1', 'omim_id': ['613236']} assert results['NM_001194958.2:c.20C>A']['hgvs_transcript_variant'] == 'NM_001194958.2:c.20C>A' + assert results['NM_001194958.2:c.20C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001194958.2:c.20C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001194958.2:c.20C>A']['hgvs_refseqgene_variant'] == 'NG_033093.1:g.15284C>A' + assert results['NM_001194958.2:c.20C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181887.2:p.(Ala7Asp)', 'slr': 'NP_001181887.2:p.(A7D)'} + assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001194958.2:c.20C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'pos': '355171', 'ref': 'C', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'pos': '355171', 'ref': 'C', 'alt': 'A'}}}]) assert 'hg19' not in list(results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys()) - assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} + assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': 'chr17', 'pos': '21702806', 'ref': 'C', 'alt': 'A'}} assert 'grch37' not in list(results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys()) - assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} - assert results['NM_001194958.2:c.20C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033093.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2'} - - assert results['flag'] == 'gene_variant' + assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': '17', 'pos': '21702806', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001194958.2:c.20C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033093.1'} def test_variant44(self): variant = 'NM_000022.2:c.534A>G' @@ -1387,22 +1406,22 @@ def test_variant44(self): assert results['flag'] == 'gene_variant' assert 'NM_000022.2:c.534A>G' in list(results.keys()) - assert results['NM_000022.2:c.534A>G']['hgvs_lrg_transcript_variant'] == 'LRG_16t1:c.534A>G' - assert results['NM_000022.2:c.534A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000022.2:c.534A>G']['alt_genomic_loci'], []) - assert results['NM_000022.2:c.534A>G']['gene_symbol'] == 'ADA' - assert results['NM_000022.2:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} assert results['NM_000022.2:c.534A>G']['submitted_variant'] == 'NM_000022.2:c.534A>G' - assert results['NM_000022.2:c.534A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_000022.2:c.534A>G']['hgvs_lrg_variant'] == 'LRG_16:g.32462A>G' + assert results['NM_000022.2:c.534A>G']['gene_symbol'] == 'ADA' + assert results['NM_000022.2:c.534A>G']['gene_ids'] == {'hgnc_id': 'HGNC:186', 'entrez_gene_id': '100', 'ucsc_id': 'uc002xmj.4', 'omim_id': ['608958']} assert results['NM_000022.2:c.534A>G']['hgvs_transcript_variant'] == 'NM_000022.2:c.534A>G' + assert results['NM_000022.2:c.534A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000022.2:c.534A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000022.2:c.534A>G']['hgvs_refseqgene_variant'] == 'NG_007385.1:g.32462A>G' - assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NM_000022.2:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} + assert results['NM_000022.2:c.534A>G']['hgvs_lrg_transcript_variant'] == 'LRG_16t1:c.534A>G' + assert results['NM_000022.2:c.534A>G']['hgvs_lrg_variant'] == 'LRG_16:g.32462A>G' + self.assertCountEqual(results['NM_000022.2:c.534A>G']['alt_genomic_loci'], []) + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} assert 'hg38' not in list(results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys()) - assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} assert 'grch38' not in list(results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys()) - assert results['NM_000022.2:c.534A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007385.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_16.xml'} - + assert results['NM_000022.2:c.534A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007385.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_16.xml'} def test_variant45(self): variant = 'HSCHR6_MHC_SSTO_CTG1-3852542-C-G' @@ -1411,22 +1430,22 @@ def test_variant45(self): assert results['flag'] == 'gene_variant' assert 'NM_021983.4:c.490G>C' in list(results.keys()) - assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021983.4:c.490G>C']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}]) - assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' - assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} assert results['NM_021983.4:c.490G>C']['submitted_variant'] == 'HSCHR6_MHC_SSTO_CTG1-3852542-C-G' - assert results['NM_021983.4:c.490G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' + assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' + assert results['NM_021983.4:c.490G>C']['gene_ids'] == {'hgnc_id': 'HGNC:4952', 'entrez_gene_id': '3126', 'ucsc_id': 'uc011jsg.3', 'omim_id': []} assert results['NM_021983.4:c.490G>C']['hgvs_transcript_variant'] == 'NM_021983.4:c.490G>C' + assert results['NM_021983.4:c.490G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021983.4:c.490G>C']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.5724C>G' + assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021983.4:c.490G>C']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3853244', 'ref': 'C', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'pos': '3853244', 'ref': 'C', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3852542', 'ref': 'C', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'pos': '3852542', 'ref': 'C', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3884432', 'ref': 'C', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'pos': '3884432', 'ref': 'C', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'pos': '3842538', 'ref': 'T', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'pos': '3842538', 'ref': 'T', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'pos': '3848158', 'ref': 'T', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'pos': '3848158', 'ref': 'T', 'alt': 'G'}}}]) assert 'hg19' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) - assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} - + assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1'} def test_variant46(self): variant = 'NM_000368.4:c.363+1dupG' @@ -1435,22 +1454,22 @@ def test_variant46(self): assert results['flag'] == 'gene_variant' assert 'NM_000368.4:c.363+1dup' in list(results.keys()) - assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' - self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) - assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' - assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.?', 'slr': 'NP_000359.1:p.?'} assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == 'NM_000368.4:c.363+1dupG' - assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_000368.4:c.363+1dup']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} - + assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.?', 'slr': 'NP_000359.1:p.?'} + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} def test_variant47(self): variant = 'NM_000368.4:c.363dupG' @@ -1459,22 +1478,22 @@ def test_variant47(self): assert results['flag'] == 'gene_variant' assert 'NM_000368.4:c.363+1dup' in list(results.keys()) - assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' - self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) - assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' - assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)', 'slr': 'NP_000359.1:p.(M122Dfs*4)'} assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == 'NM_000368.4:c.363dupG' - assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_000368.4:c.363+1dup']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} - + assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)', 'slr': 'NP_000359.1:p.(M122Dfs*4)'} + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} def test_variant48(self): variant = 'NM_000089.3:c.1033_1035delGTT' @@ -1483,22 +1502,22 @@ def test_variant48(self): assert results['flag'] == 'gene_variant' assert 'NM_000089.3:c.1035_1035+2del' in list(results.keys()) - assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' - assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' - self.assertCountEqual(results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'], []) - assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' - assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} assert results['NM_000089.3:c.1035_1035+2del']['submitted_variant'] == 'NM_000089.3:c.1033_1035delGTT' - assert results['NM_000089.3:c.1035_1035+2del']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000089.3):c.1035_1035+2del' - assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_variant'] == 'LRG_2:g.20261_20263del' + assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' + assert results['NM_000089.3:c.1035_1035+2del']['gene_ids'] == {'hgnc_id': 'HGNC:2198', 'entrez_gene_id': '1278', 'ucsc_id': 'uc003ung.1', 'omim_id': ['120160']} assert results['NM_000089.3:c.1035_1035+2del']['hgvs_transcript_variant'] == 'NM_000089.3:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000089.3):c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['hgvs_refseqgene_variant'] == 'NG_007405.1:g.20261_20263del' - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} - + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_variant'] == 'LRG_2:g.20261_20263del' + self.assertCountEqual(results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'], []) + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': 'chr7', 'pos': '94039128', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': 'chr7', 'pos': '94409816', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': '7', 'pos': '94039128', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': '7', 'pos': '94409816', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} def test_variant49(self): variant = 'NM_000089.3:c.1035_1035+2delTGT' @@ -1507,22 +1526,22 @@ def test_variant49(self): assert results['flag'] == 'gene_variant' assert 'NM_000089.3:c.1035_1035+2del' in list(results.keys()) - assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' - assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' - self.assertCountEqual(results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'], []) - assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' - assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} assert results['NM_000089.3:c.1035_1035+2del']['submitted_variant'] == 'NM_000089.3:c.1035_1035+2delTGT' - assert results['NM_000089.3:c.1035_1035+2del']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000089.3):c.1035_1035+2del' - assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_variant'] == 'LRG_2:g.20261_20263del' + assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' + assert results['NM_000089.3:c.1035_1035+2del']['gene_ids'] == {'hgnc_id': 'HGNC:2198', 'entrez_gene_id': '1278', 'ucsc_id': 'uc003ung.1', 'omim_id': ['120160']} assert results['NM_000089.3:c.1035_1035+2del']['hgvs_transcript_variant'] == 'NM_000089.3:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000089.3):c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['hgvs_refseqgene_variant'] == 'NG_007405.1:g.20261_20263del' - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} - + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_variant'] == 'LRG_2:g.20261_20263del' + self.assertCountEqual(results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'], []) + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': 'chr7', 'pos': '94039128', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': 'chr7', 'pos': '94409816', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': '7', 'pos': '94039128', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': '7', 'pos': '94409816', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} def test_variant50(self): variant = 'NM_000088.3:c.2023_2028delGCAAGA' @@ -1531,22 +1550,22 @@ def test_variant50(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.2024_2028+1del' in list(results.keys()) - assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' - assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' - self.assertCountEqual(results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)', 'slr': 'NP_000079.2:p.(A675_R676del)'} assert results['NM_000088.3:c.2024_2028+1del']['submitted_variant'] == 'NM_000088.3:c.2023_2028delGCAAGA' - assert results['NM_000088.3:c.2024_2028+1del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.2024_2028+1del' - assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' + assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.2024_2028+1del']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.2024_2028+1del']['hgvs_transcript_variant'] == 'NM_000088.3:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.14656_14661del' - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)', 'slr': 'NP_000079.2:p.(A675_R676del)'} + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' + self.assertCountEqual(results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': 'chr17', 'pos': '48269339', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': 'chr17', 'pos': '50191978', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': '17', 'pos': '48269339', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': '17', 'pos': '50191978', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant51(self): variant = 'NM_000089.3:c.938-1delG' @@ -1555,22 +1574,22 @@ def test_variant51(self): assert results['flag'] == 'gene_variant' assert 'NM_000089.3:c.938del' in list(results.keys()) - assert results['NM_000089.3:c.938del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.938del' - assert results['NM_000089.3:c.938del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000089.3:c.938del']['alt_genomic_loci'], []) - assert results['NM_000089.3:c.938del']['gene_symbol'] == 'COL1A2' - assert results['NM_000089.3:c.938del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Gly313AlafsTer86)', 'slr': 'NP_000080.2:p.(G313Afs*86)'} assert results['NM_000089.3:c.938del']['submitted_variant'] == 'NM_000089.3:c.938-1delG' - assert results['NM_000089.3:c.938del']['genome_context_intronic_sequence'] == '' - assert results['NM_000089.3:c.938del']['hgvs_lrg_variant'] == 'LRG_2:g.20164del' + assert results['NM_000089.3:c.938del']['gene_symbol'] == 'COL1A2' + assert results['NM_000089.3:c.938del']['gene_ids'] == {'hgnc_id': 'HGNC:2198', 'entrez_gene_id': '1278', 'ucsc_id': 'uc003ung.1', 'omim_id': ['120160']} assert results['NM_000089.3:c.938del']['hgvs_transcript_variant'] == 'NM_000089.3:c.938del' + assert results['NM_000089.3:c.938del']['genome_context_intronic_sequence'] == '' + assert results['NM_000089.3:c.938del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000089.3:c.938del']['hgvs_refseqgene_variant'] == 'NG_007405.1:g.20164del' - assert results['NM_000089.3:c.938del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039036del', 'vcf': {'chr': 'chr7', 'ref': 'AG', 'pos': '94039033', 'alt': 'A'}} - assert results['NM_000089.3:c.938del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409724del', 'vcf': {'chr': 'chr7', 'ref': 'AG', 'pos': '94409721', 'alt': 'A'}} - assert results['NM_000089.3:c.938del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039036del', 'vcf': {'chr': '7', 'ref': 'AG', 'pos': '94039033', 'alt': 'A'}} - assert results['NM_000089.3:c.938del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409724del', 'vcf': {'chr': '7', 'ref': 'AG', 'pos': '94409721', 'alt': 'A'}} - assert results['NM_000089.3:c.938del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} - + assert results['NM_000089.3:c.938del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Gly313AlafsTer86)', 'slr': 'NP_000080.2:p.(G313Afs*86)'} + assert results['NM_000089.3:c.938del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.938del' + assert results['NM_000089.3:c.938del']['hgvs_lrg_variant'] == 'LRG_2:g.20164del' + self.assertCountEqual(results['NM_000089.3:c.938del']['alt_genomic_loci'], []) + assert results['NM_000089.3:c.938del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039036del', 'vcf': {'chr': 'chr7', 'pos': '94039033', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_000089.3:c.938del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409724del', 'vcf': {'chr': 'chr7', 'pos': '94409721', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_000089.3:c.938del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039036del', 'vcf': {'chr': '7', 'pos': '94039033', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_000089.3:c.938del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409724del', 'vcf': {'chr': '7', 'pos': '94409721', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_000089.3:c.938del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} def test_variant52(self): variant = 'NM_000088.3:c.589G=' @@ -1579,22 +1598,22 @@ def test_variant52(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589G=' in list(results.keys()) - assert results['NM_000088.3:c.589G=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G=' - assert results['NM_000088.3:c.589G=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.589G=']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589G=']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197=)', 'slr': 'NP_000079.2:p.(G197=)'} assert results['NM_000088.3:c.589G=']['submitted_variant'] == 'NM_000088.3:c.589G=' - assert results['NM_000088.3:c.589G=']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G=']['hgvs_lrg_variant'] == 'LRG_1:g.8638G=' + assert results['NM_000088.3:c.589G=']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G=']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589G=']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G=' + assert results['NM_000088.3:c.589G=']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G=']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G=' - assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C=', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'C'}} - assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C=', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'C'}} - assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C=', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'C'}} - assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C=', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'C'}} - assert results['NM_000088.3:c.589G=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197=)', 'slr': 'NP_000079.2:p.(G197=)'} + assert results['NM_000088.3:c.589G=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G=' + assert results['NM_000088.3:c.589G=']['hgvs_lrg_variant'] == 'LRG_1:g.8638G=' + self.assertCountEqual(results['NM_000088.3:c.589G=']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C=', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'C'}} + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C=', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'C'}} + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C=', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'C'}} + assert results['NM_000088.3:c.589G=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C=', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'C'}} + assert results['NM_000088.3:c.589G=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant53(self): variant = 'NM_000088.3:c.642A=' @@ -1603,22 +1622,22 @@ def test_variant53(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.642A=' in list(results.keys()) - assert results['NM_000088.3:c.642A=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642A=' - assert results['NM_000088.3:c.642A=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.642A=']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.642A=']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.642A=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ser214=)', 'slr': 'NP_000079.2:p.(S214=)'} assert results['NM_000088.3:c.642A=']['submitted_variant'] == 'NM_000088.3:c.642A=' - assert results['NM_000088.3:c.642A=']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.642A=']['hgvs_lrg_variant'] == 'LRG_1:g.8691A=' + assert results['NM_000088.3:c.642A=']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642A=']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.642A=']['hgvs_transcript_variant'] == 'NM_000088.3:c.642A=' + assert results['NM_000088.3:c.642A=']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.642A=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.642A=']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8691A=' - assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275310T=', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '48275310', 'alt': 'T'}} - assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197949T=', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '50197949', 'alt': 'T'}} - assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275310T=', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '48275310', 'alt': 'T'}} - assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197949T=', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '50197949', 'alt': 'T'}} - assert results['NM_000088.3:c.642A=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.642A=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ser214=)', 'slr': 'NP_000079.2:p.(S214=)'} + assert results['NM_000088.3:c.642A=']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642A=' + assert results['NM_000088.3:c.642A=']['hgvs_lrg_variant'] == 'LRG_1:g.8691A=' + self.assertCountEqual(results['NM_000088.3:c.642A=']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275310T=', 'vcf': {'chr': 'chr17', 'pos': '48275310', 'ref': 'T', 'alt': 'T'}} + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197949T=', 'vcf': {'chr': 'chr17', 'pos': '50197949', 'ref': 'T', 'alt': 'T'}} + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275310T=', 'vcf': {'chr': '17', 'pos': '48275310', 'ref': 'T', 'alt': 'T'}} + assert results['NM_000088.3:c.642A=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197949T=', 'vcf': {'chr': '17', 'pos': '50197949', 'ref': 'T', 'alt': 'T'}} + assert results['NM_000088.3:c.642A=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant54(self): variant = 'NM_000088.3:c.642+1GG>G' @@ -1627,22 +1646,22 @@ def test_variant54(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.642+2del' in list(results.keys()) - assert results['NM_000088.3:c.642+2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' - assert results['NM_000088.3:c.642+2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' - self.assertCountEqual(results['NM_000088.3:c.642+2del']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.642+2del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.642+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.642+2del']['submitted_variant'] == 'NM_000088.3:c.642+1GG>G' - assert results['NM_000088.3:c.642+2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+2del' - assert results['NM_000088.3:c.642+2del']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + assert results['NM_000088.3:c.642+2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642+2del']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.642+2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+2del' + assert results['NM_000088.3:c.642+2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+2del' + assert results['NM_000088.3:c.642+2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' assert results['NM_000088.3:c.642+2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.642+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + self.assertCountEqual(results['NM_000088.3:c.642+2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': 'chr17', 'pos': '48275307', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': 'chr17', 'pos': '50197946', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': '17', 'pos': '48275307', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': '17', 'pos': '50197946', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant55(self): variant = 'NM_000088.3:c.589-2GG>G' @@ -1651,22 +1670,22 @@ def test_variant55(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589-2del' in list(results.keys()) - assert results['NM_000088.3:c.589-2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' - assert results['NM_000088.3:c.589-2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' - self.assertCountEqual(results['NM_000088.3:c.589-2del']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-2del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-2del']['submitted_variant'] == 'NM_000088.3:c.589-2GG>G' - assert results['NM_000088.3:c.589-2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2del' - assert results['NM_000088.3:c.589-2del']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + assert results['NM_000088.3:c.589-2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-2del']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589-2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2del' + assert results['NM_000088.3:c.589-2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2del' + assert results['NM_000088.3:c.589-2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' assert results['NM_000088.3:c.589-2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589-2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + self.assertCountEqual(results['NM_000088.3:c.589-2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': 'chr17', 'pos': '48275364', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': 'chr17', 'pos': '50198003', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': '17', 'pos': '48275364', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': '17', 'pos': '50198003', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant56(self): variant = 'NM_000088.3:c.589-6_589-5insTTTT' @@ -1675,22 +1694,22 @@ def test_variant56(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589-5_589-4insTTTT' in list(results.keys()) - assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-5_589-4insTTTT' - assert results['NM_000088.3:c.589-5_589-4insTTTT']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-5_589-4insTTTT' - self.assertCountEqual(results['NM_000088.3:c.589-5_589-4insTTTT']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-5_589-4insTTTT']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-5_589-4insTTTT']['submitted_variant'] == 'NM_000088.3:c.589-6_589-5insTTTT' - assert results['NM_000088.3:c.589-5_589-4insTTTT']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-5_589-4insTTTT' - assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_lrg_variant'] == 'LRG_1:g.8633_8634insTTTT' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-5_589-4insTTTT' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-5_589-4insTTTT' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-5_589-4insTTTT' assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8633_8634insTTTT' - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275368_48275369insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48275367', 'alt': 'GAAAA'}} - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198007_50198008insAAAA', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50198006', 'alt': 'GAAAA'}} - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275368_48275369insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48275367', 'alt': 'GAAAA'}} - assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198007_50198008insAAAA', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50198006', 'alt': 'GAAAA'}} - assert results['NM_000088.3:c.589-5_589-4insTTTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-5_589-4insTTTT' + assert results['NM_000088.3:c.589-5_589-4insTTTT']['hgvs_lrg_variant'] == 'LRG_1:g.8633_8634insTTTT' + self.assertCountEqual(results['NM_000088.3:c.589-5_589-4insTTTT']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275368_48275369insAAAA', 'vcf': {'chr': 'chr17', 'pos': '48275367', 'ref': 'G', 'alt': 'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198007_50198008insAAAA', 'vcf': {'chr': 'chr17', 'pos': '50198006', 'ref': 'G', 'alt': 'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275368_48275369insAAAA', 'vcf': {'chr': '17', 'pos': '48275367', 'ref': 'G', 'alt': 'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198007_50198008insAAAA', 'vcf': {'chr': '17', 'pos': '50198006', 'ref': 'G', 'alt': 'GAAAA'}} + assert results['NM_000088.3:c.589-5_589-4insTTTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant57(self): variant = 'NM_000088.3:c.642+3_642+4insAAAA' @@ -1699,22 +1718,22 @@ def test_variant57(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.642+4_642+5insAAAA' in list(results.keys()) - assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+4_642+5insAAAA' - assert results['NM_000088.3:c.642+4_642+5insAAAA']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+4_642+5insAAAA' - self.assertCountEqual(results['NM_000088.3:c.642+4_642+5insAAAA']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.642+4_642+5insAAAA']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.642+4_642+5insAAAA']['submitted_variant'] == 'NM_000088.3:c.642+3_642+4insAAAA' - assert results['NM_000088.3:c.642+4_642+5insAAAA']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+4_642+5insAAAA' - assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_lrg_variant'] == 'LRG_1:g.8695_8696insAAAA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+4_642+5insAAAA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+4_642+5insAAAA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+4_642+5insAAAA' assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8695_8696insAAAA' - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275307_48275308insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275305', 'alt': 'CTTTT'}} - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197946_50197947insTTTT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50197944', 'alt': 'CTTTT'}} - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275307_48275308insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275305', 'alt': 'CTTTT'}} - assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197946_50197947insTTTT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50197944', 'alt': 'CTTTT'}} - assert results['NM_000088.3:c.642+4_642+5insAAAA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+4_642+5insAAAA' + assert results['NM_000088.3:c.642+4_642+5insAAAA']['hgvs_lrg_variant'] == 'LRG_1:g.8695_8696insAAAA' + self.assertCountEqual(results['NM_000088.3:c.642+4_642+5insAAAA']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275307_48275308insTTTT', 'vcf': {'chr': 'chr17', 'pos': '48275305', 'ref': 'C', 'alt': 'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197946_50197947insTTTT', 'vcf': {'chr': 'chr17', 'pos': '50197944', 'ref': 'C', 'alt': 'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275307_48275308insTTTT', 'vcf': {'chr': '17', 'pos': '48275305', 'ref': 'C', 'alt': 'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197946_50197947insTTTT', 'vcf': {'chr': '17', 'pos': '50197944', 'ref': 'C', 'alt': 'CTTTT'}} + assert results['NM_000088.3:c.642+4_642+5insAAAA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant58(self): variant = 'NM_000088.3:c.589-4_589-3insTT' @@ -1723,22 +1742,22 @@ def test_variant58(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589-4_589-3insTT' in list(results.keys()) - assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-4_589-3insTT' - assert results['NM_000088.3:c.589-4_589-3insTT']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-4_589-3insTT' - self.assertCountEqual(results['NM_000088.3:c.589-4_589-3insTT']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-4_589-3insTT']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-4_589-3insTT']['submitted_variant'] == 'NM_000088.3:c.589-4_589-3insTT' - assert results['NM_000088.3:c.589-4_589-3insTT']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-4_589-3insTT' - assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_lrg_variant'] == 'LRG_1:g.8634_8635insTT' + assert results['NM_000088.3:c.589-4_589-3insTT']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-4_589-3insTT']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-4_589-3insTT' + assert results['NM_000088.3:c.589-4_589-3insTT']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-4_589-3insTT' + assert results['NM_000088.3:c.589-4_589-3insTT']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-4_589-3insTT' assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8634_8635insTT' - assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366_48275367insAA', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '48275366', 'alt': 'TAA'}} - assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005_50198006insAA', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '50198005', 'alt': 'TAA'}} - assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366_48275367insAA', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '48275366', 'alt': 'TAA'}} - assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005_50198006insAA', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '50198005', 'alt': 'TAA'}} - assert results['NM_000088.3:c.589-4_589-3insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-4_589-3insTT' + assert results['NM_000088.3:c.589-4_589-3insTT']['hgvs_lrg_variant'] == 'LRG_1:g.8634_8635insTT' + self.assertCountEqual(results['NM_000088.3:c.589-4_589-3insTT']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366_48275367insAA', 'vcf': {'chr': 'chr17', 'pos': '48275366', 'ref': 'T', 'alt': 'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005_50198006insAA', 'vcf': {'chr': 'chr17', 'pos': '50198005', 'ref': 'T', 'alt': 'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366_48275367insAA', 'vcf': {'chr': '17', 'pos': '48275366', 'ref': 'T', 'alt': 'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005_50198006insAA', 'vcf': {'chr': '17', 'pos': '50198005', 'ref': 'T', 'alt': 'TAA'}} + assert results['NM_000088.3:c.589-4_589-3insTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant59(self): variant = 'NM_000088.3:c.589-8del' @@ -1747,46 +1766,46 @@ def test_variant59(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589-7del' in list(results.keys()) - assert results['NM_000088.3:c.589-7del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-7del' - assert results['NM_000088.3:c.589-7del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-7del' - self.assertCountEqual(results['NM_000088.3:c.589-7del']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-7del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-7del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-7del']['submitted_variant'] == 'NM_000088.3:c.589-8del' + assert results['NM_000088.3:c.589-7del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-7del']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} + assert results['NM_000088.3:c.589-7del']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-7del' assert results['NM_000088.3:c.589-7del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-7del' - assert results['NM_000088.3:c.589-7del']['hgvs_lrg_variant'] == 'LRG_1:g.8631del' - assert results['NM_000088.3:c.589-7del']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-7del' + assert results['NM_000088.3:c.589-7del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-7del' assert results['NM_000088.3:c.589-7del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8631del' - assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275371del', 'vcf': {'chr': 'chr17', 'ref': 'GA', 'pos': '48275369', 'alt': 'G'}} - assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198010del', 'vcf': {'chr': 'chr17', 'ref': 'GA', 'pos': '50198008', 'alt': 'G'}} - assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275371del', 'vcf': {'chr': '17', 'ref': 'GA', 'pos': '48275369', 'alt': 'G'}} - assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198010del', 'vcf': {'chr': '17', 'ref': 'GA', 'pos': '50198008', 'alt': 'G'}} - assert results['NM_000088.3:c.589-7del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589-7del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-7del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-7del' + assert results['NM_000088.3:c.589-7del']['hgvs_lrg_variant'] == 'LRG_1:g.8631del' + self.assertCountEqual(results['NM_000088.3:c.589-7del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275371del', 'vcf': {'chr': 'chr17', 'pos': '48275369', 'ref': 'GA', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198010del', 'vcf': {'chr': 'chr17', 'pos': '50198008', 'ref': 'GA', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275371del', 'vcf': {'chr': '17', 'pos': '48275369', 'ref': 'GA', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198010del', 'vcf': {'chr': '17', 'pos': '50198008', 'ref': 'GA', 'alt': 'G'}} + assert results['NM_000088.3:c.589-7del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant60(self): variant = 'NM_000527.4:c.-187_-185delCTC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000527.4:c.-187_-185del' in list(results.keys()) - assert results['NM_000527.4:c.-187_-185del']['hgvs_lrg_transcript_variant'] == 'LRG_274t1:c.-187_-185del' - assert results['NM_000527.4:c.-187_-185del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000527.4:c.-187_-185del']['alt_genomic_loci'], []) - assert results['NM_000527.4:c.-187_-185del']['gene_symbol'] == 'LDLR' - assert results['NM_000527.4:c.-187_-185del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000518.1(LRG_274p1):p.?', 'slr': 'NP_000518.1:p.?'} assert results['NM_000527.4:c.-187_-185del']['submitted_variant'] == 'NM_000527.4:c.-187_-185delCTC' - assert results['NM_000527.4:c.-187_-185del']['genome_context_intronic_sequence'] == '' - assert results['NM_000527.4:c.-187_-185del']['hgvs_lrg_variant'] == 'LRG_274:g.4982_4984del' + assert results['NM_000527.4:c.-187_-185del']['gene_symbol'] == 'LDLR' + assert results['NM_000527.4:c.-187_-185del']['gene_ids'] == {'hgnc_id': 'HGNC:6547', 'entrez_gene_id': '3949', 'ucsc_id': 'uc002mqk.5', 'omim_id': ['606945']} assert results['NM_000527.4:c.-187_-185del']['hgvs_transcript_variant'] == 'NM_000527.4:c.-187_-185del' + assert results['NM_000527.4:c.-187_-185del']['genome_context_intronic_sequence'] == '' + assert results['NM_000527.4:c.-187_-185del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000527.4:c.-187_-185del']['hgvs_refseqgene_variant'] == 'NG_009060.1:g.4982_4984del' - assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.11200038_11200040del', 'vcf': {'chr': 'chr19', 'ref': 'ACTC', 'pos': '11200031', 'alt': 'A'}} - assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.11089362_11089364del', 'vcf': {'chr': 'chr19', 'ref': 'ACTC', 'pos': '11089355', 'alt': 'A'}} - assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.11200038_11200040del', 'vcf': {'chr': '19', 'ref': 'ACTC', 'pos': '11200031', 'alt': 'A'}} - assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.11089362_11089364del', 'vcf': {'chr': '19', 'ref': 'ACTC', 'pos': '11089355', 'alt': 'A'}} - assert results['NM_000527.4:c.-187_-185del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009060.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000518.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000527.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_274.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000527.4:c.-187_-185del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000518.1(LRG_274p1):p.?', 'slr': 'NP_000518.1:p.?'} + assert results['NM_000527.4:c.-187_-185del']['hgvs_lrg_transcript_variant'] == 'LRG_274t1:c.-187_-185del' + assert results['NM_000527.4:c.-187_-185del']['hgvs_lrg_variant'] == 'LRG_274:g.4982_4984del' + self.assertCountEqual(results['NM_000527.4:c.-187_-185del']['alt_genomic_loci'], []) + assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.11200038_11200040del', 'vcf': {'chr': 'chr19', 'pos': '11200031', 'ref': 'ACTC', 'alt': 'A'}} + assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.11089362_11089364del', 'vcf': {'chr': 'chr19', 'pos': '11089355', 'ref': 'ACTC', 'alt': 'A'}} + assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.11200038_11200040del', 'vcf': {'chr': '19', 'pos': '11200031', 'ref': 'ACTC', 'alt': 'A'}} + assert results['NM_000527.4:c.-187_-185del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.11089362_11089364del', 'vcf': {'chr': '19', 'pos': '11089355', 'ref': 'ACTC', 'alt': 'A'}} + assert results['NM_000527.4:c.-187_-185del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000527.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000518.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009060.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_274.xml'} def test_variant61(self): variant = 'NM_206933.2:c.6317C>G' @@ -1795,70 +1814,70 @@ def test_variant61(self): assert results['flag'] == 'gene_variant' assert 'NM_206933.2:c.6317C>G' in list(results.keys()) - assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_206933.2:c.6317C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_206933.2:c.6317C>G']['alt_genomic_loci'], []) - assert results['NM_206933.2:c.6317C>G']['gene_symbol'] == 'USH2A' - assert results['NM_206933.2:c.6317C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_996816.2:p.(Thr2106Arg)', 'slr': 'NP_996816.2:p.(T2106R)'} assert results['NM_206933.2:c.6317C>G']['submitted_variant'] == 'NM_206933.2:c.6317C>G' - assert results['NM_206933.2:c.6317C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_variant'] == '' + assert results['NM_206933.2:c.6317C>G']['gene_symbol'] == 'USH2A' + assert results['NM_206933.2:c.6317C>G']['gene_ids'] == {'hgnc_id': 'HGNC:12601', 'entrez_gene_id': '7399', 'ucsc_id': 'uc001hku.1', 'omim_id': ['608400']} assert results['NM_206933.2:c.6317C>G']['hgvs_transcript_variant'] == 'NM_206933.2:c.6317C>G' + assert results['NM_206933.2:c.6317C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_206933.2:c.6317C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_206933.2:c.6317C>G']['hgvs_refseqgene_variant'] == 'NG_009497.1:g.381958C>G' - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216219781', 'alt': 'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216046439', 'alt': 'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216219781', 'alt': 'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216046439', 'alt': 'C'}} - assert results['NM_206933.2:c.6317C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009497.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2'} - + assert results['NM_206933.2:c.6317C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_996816.2:p.(Thr2106Arg)', 'slr': 'NP_996816.2:p.(T2106R)'} + assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_206933.2:c.6317C>G']['alt_genomic_loci'], []) + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': 'chr1', 'pos': '216219781', 'ref': 'A', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': 'chr1', 'pos': '216046439', 'ref': 'A', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': '1', 'pos': '216219781', 'ref': 'A', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': '1', 'pos': '216046439', 'ref': 'A', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009497.1'} def test_variant62(self): variant = 'NC_000013.10:g.32929387T>C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000059.3:c.7397C=' in list(results.keys()) - assert results['NM_000059.3:c.7397C=']['hgvs_lrg_transcript_variant'] == 'LRG_293t1:c.7397C=' - assert results['NM_000059.3:c.7397C=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000059.3:c.7397C=']['alt_genomic_loci'], []) - assert results['NM_000059.3:c.7397C=']['gene_symbol'] == 'BRCA2' - assert results['NM_000059.3:c.7397C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000050.2(LRG_293p1):p.(Ala2466=)', 'slr': 'NP_000050.2:p.(A2466=)'} assert results['NM_000059.3:c.7397C=']['submitted_variant'] == 'NC_000013.10:g.32929387T>C' - assert results['NM_000059.3:c.7397C=']['genome_context_intronic_sequence'] == '' - assert results['NM_000059.3:c.7397C=']['hgvs_lrg_variant'] == 'LRG_293:g.44771C=' + assert results['NM_000059.3:c.7397C=']['gene_symbol'] == 'BRCA2' + assert results['NM_000059.3:c.7397C=']['gene_ids'] == {'hgnc_id': 'HGNC:1101', 'entrez_gene_id': '675', 'ucsc_id': 'uc001uub.2', 'omim_id': ['600185']} assert results['NM_000059.3:c.7397C=']['hgvs_transcript_variant'] == 'NM_000059.3:c.7397C=' + assert results['NM_000059.3:c.7397C=']['genome_context_intronic_sequence'] == '' + assert results['NM_000059.3:c.7397C=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000059.3:c.7397C=']['hgvs_refseqgene_variant'] == 'NG_012772.3:g.44771C=' - assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000013.10:g.32929387T>C', 'vcf': {'chr': 'chr13', 'ref': 'T', 'pos': '32929387', 'alt': 'C'}} - assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000013.11:g.32355250T>C', 'vcf': {'chr': 'chr13', 'ref': 'T', 'pos': '32355250', 'alt': 'C'}} - assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000013.10:g.32929387T>C', 'vcf': {'chr': '13', 'ref': 'T', 'pos': '32929387', 'alt': 'C'}} - assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000013.11:g.32355250T>C', 'vcf': {'chr': '13', 'ref': 'T', 'pos': '32355250', 'alt': 'C'}} - assert results['NM_000059.3:c.7397C=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012772.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000050.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000059.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_293.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000059.3:c.7397C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000050.2(LRG_293p1):p.(Ala2466=)', 'slr': 'NP_000050.2:p.(A2466=)'} + assert results['NM_000059.3:c.7397C=']['hgvs_lrg_transcript_variant'] == 'LRG_293t1:c.7397C=' + assert results['NM_000059.3:c.7397C=']['hgvs_lrg_variant'] == 'LRG_293:g.44771C=' + self.assertCountEqual(results['NM_000059.3:c.7397C=']['alt_genomic_loci'], []) + assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000013.10:g.32929387T>C', 'vcf': {'chr': 'chr13', 'pos': '32929387', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000013.11:g.32355250T>C', 'vcf': {'chr': 'chr13', 'pos': '32355250', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000013.10:g.32929387T>C', 'vcf': {'chr': '13', 'pos': '32929387', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000059.3:c.7397C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000013.11:g.32355250T>C', 'vcf': {'chr': '13', 'pos': '32355250', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000059.3:c.7397C=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000059.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000050.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012772.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_293.xml'} def test_variant63(self): variant = 'NM_015102.3:c.2818-2T>A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_015102.3:c.2818-2T>A' in list(results.keys()) - assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_015102.3:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == 'NG_011724.2(NM_015102.3):c.2818-2A=' - self.assertCountEqual(results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'], []) - assert results['NM_015102.3:c.2818-2T>A']['gene_symbol'] == 'NPHP4' - assert results['NM_015102.3:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} assert results['NM_015102.3:c.2818-2T>A']['submitted_variant'] == 'NM_015102.3:c.2818-2T>A' - assert results['NM_015102.3:c.2818-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_015102.3):c.2818-2T>A' - assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_variant'] == '' + assert results['NM_015102.3:c.2818-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NM_015102.3:c.2818-2T>A']['gene_ids'] == {'hgnc_id': 'HGNC:19104', 'entrez_gene_id': '261734', 'ucsc_id': 'uc001alq.3', 'omim_id': ['607215']} assert results['NM_015102.3:c.2818-2T>A']['hgvs_transcript_variant'] == 'NM_015102.3:c.2818-2T>A' + assert results['NM_015102.3:c.2818-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_015102.3):c.2818-2T>A' + assert results['NM_015102.3:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == 'NG_011724.2(NM_015102.3):c.2818-2A=' assert results['NM_015102.3:c.2818-2T>A']['hgvs_refseqgene_variant'] == 'NG_011724.2:g.122370A=' - assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} + assert results['NM_015102.3:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} + assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'], []) + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} assert 'hg38' not in list(results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys()) - assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} assert 'grch38' not in list(results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys()) - assert results['NM_015102.3:c.2818-2T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011724.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3'} - - assert results['flag'] == 'gene_variant' + assert results['NM_015102.3:c.2818-2T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011724.2'} def test_variant64(self): variant = '19-41123094-G-GG' @@ -1866,110 +1885,133 @@ def test_variant64(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001042544.1:c.3233_3235=' in list(results.keys()) - assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001042544.1:c.3233_3235=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'], []) - assert results['NM_001042544.1:c.3233_3235=']['gene_symbol'] == 'LTBP4' - assert results['NM_001042544.1:c.3233_3235=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078=)', 'slr': 'NP_001036009.1:p.(Q1078=)'} - assert results['NM_001042544.1:c.3233_3235=']['submitted_variant'] == '19-41123094-G-GG' - assert results['NM_001042544.1:c.3233_3235=']['genome_context_intronic_sequence'] == '' - assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_variant'] == '' - assert results['NM_001042544.1:c.3233_3235=']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3233_3235=' - assert results['NM_001042544.1:c.3233_3235=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_001042544.1:c.3233_3235=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} - assert 'NM_001042545.1:c.3032_3034=' in list(results.keys()) - assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001042545.1:c.3032_3034=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'], []) - assert results['NM_001042545.1:c.3032_3034=']['gene_symbol'] == 'LTBP4' - assert results['NM_001042545.1:c.3032_3034=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011=)', 'slr': 'NP_001036010.1:p.(Q1011=)'} assert results['NM_001042545.1:c.3032_3034=']['submitted_variant'] == '19-41123094-G-GG' - assert results['NM_001042545.1:c.3032_3034=']['genome_context_intronic_sequence'] == '' - assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_variant'] == '' + assert results['NM_001042545.1:c.3032_3034=']['gene_symbol'] == 'LTBP4' + assert results['NM_001042545.1:c.3032_3034=']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} assert results['NM_001042545.1:c.3032_3034=']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3032_3034=' + assert results['NM_001042545.1:c.3032_3034=']['genome_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3032_3034=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042545.1:c.3032_3034=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_001042545.1:c.3032_3034=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} + assert results['NM_001042545.1:c.3032_3034=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011=)', 'slr': 'NP_001036010.1:p.(Q1011=)'} + assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'], []) + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} assert 'NM_003573.2:c.3122_3124=' in list(results.keys()) - assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003573.2:c.3122_3124=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'], []) - assert results['NM_003573.2:c.3122_3124=']['gene_symbol'] == 'LTBP4' - assert results['NM_003573.2:c.3122_3124=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041=)', 'slr': 'NP_003564.2:p.(Q1041=)'} assert results['NM_003573.2:c.3122_3124=']['submitted_variant'] == '19-41123094-G-GG' - assert results['NM_003573.2:c.3122_3124=']['genome_context_intronic_sequence'] == '' - assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_variant'] == '' + assert results['NM_003573.2:c.3122_3124=']['gene_symbol'] == 'LTBP4' + assert results['NM_003573.2:c.3122_3124=']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} assert results['NM_003573.2:c.3122_3124=']['hgvs_transcript_variant'] == 'NM_003573.2:c.3122_3124=' + assert results['NM_003573.2:c.3122_3124=']['genome_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3122_3124=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003573.2:c.3122_3124=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_003573.2:c.3122_3124=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} + assert results['NM_003573.2:c.3122_3124=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041=)', 'slr': 'NP_003564.2:p.(Q1041=)'} + assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'], []) + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} + assert 'NM_001042544.1:c.3233_3235=' in list(results.keys()) + assert results['NM_001042544.1:c.3233_3235=']['submitted_variant'] == '19-41123094-G-GG' + assert results['NM_001042544.1:c.3233_3235=']['gene_symbol'] == 'LTBP4' + assert results['NM_001042544.1:c.3233_3235=']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} + assert results['NM_001042544.1:c.3233_3235=']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3233_3235=' + assert results['NM_001042544.1:c.3233_3235=']['genome_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3233_3235=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078=)', 'slr': 'NP_001036009.1:p.(Q1078=)'} + assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'], []) + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} def test_variant65(self): variant = '15-72105928-AC-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_014249.2:c.946_949=' in list(results.keys()) - assert results['NM_014249.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.2:c.946_949=']['alt_genomic_loci'], []) - assert results['NM_014249.2:c.946_949=']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} assert results['NM_014249.2:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' - assert results['NM_014249.2:c.946_949=']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.946_949=']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_014249.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.2:c.946_949=' + assert results['NM_014249.2:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.946_949=']['hgvs_refseqgene_variant'] == '' - assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} + assert results['NM_014249.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.946_949=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.2:c.946_949=']['alt_genomic_loci'], []) + assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} assert 'hg38' not in list(results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} assert 'grch38' not in list(results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + assert results['NM_014249.2:c.946_949=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1'} assert 'NM_016346.3:c.946_949=' in list(results.keys()) - assert results['NM_016346.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.3:c.946_949=']['alt_genomic_loci'], []) - assert results['NM_016346.3:c.946_949=']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} assert results['NM_016346.3:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' - assert results['NM_016346.3:c.946_949=']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.946_949=']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_016346.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.3:c.946_949=' + assert results['NM_016346.3:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.946_949=']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': 'chr15', 'ref': 'GACC', 'pos': '71813587', 'alt': 'GACC'}} - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': '15', 'ref': 'GACC', 'pos': '71813587', 'alt': 'GACC'}} - assert results['NM_016346.3:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + assert results['NM_016346.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} + assert results['NM_016346.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.946_949=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.3:c.946_949=']['alt_genomic_loci'], []) + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': 'chr15', 'pos': '71813587', 'ref': 'GACC', 'alt': 'GACC'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': '15', 'pos': '71813587', 'ref': 'GACC', 'alt': 'GACC'}} + assert results['NM_016346.3:c.946_949=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} + + assert 'NM_016346.2:c.946_949=' in list(results.keys()) + assert results['NM_016346.2:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' + assert results['NM_016346.2:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.946_949=']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} + assert results['NM_016346.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.2:c.946_949=' + assert results['NM_016346.2:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.946_949=']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} + assert results['NM_016346.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.946_949=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.2:c.946_949=']['alt_genomic_loci'], []) + assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) + assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) + assert results['NM_016346.2:c.946_949=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} - assert results['flag'] == 'gene_variant' assert 'NM_014249.3:c.946_949=' in list(results.keys()) - assert results['NM_014249.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.3:c.946_949=']['alt_genomic_loci'], []) - assert results['NM_014249.3:c.946_949=']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} assert results['NM_014249.3:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' - assert results['NM_014249.3:c.946_949=']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_014249.3:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.946_949=']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_014249.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.3:c.946_949=' + assert results['NM_014249.3:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.3:c.946_949=']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8034_8037=' - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} + assert results['NM_014249.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.946_949=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.3:c.946_949=']['alt_genomic_loci'], []) + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} result_options = [ {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': 'chr15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}}, @@ -1977,7 +2019,7 @@ def test_variant65(self): 'vcf': {'alt': 'GACC', 'chr': 'chr15', 'pos': '71813587', 'ref': 'GACC'}} ] self.assertIn(results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'], result_options) - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} result_options2 = [ {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}}, @@ -1985,25 +2027,7 @@ def test_variant65(self): 'vcf': {'alt': 'GACC', 'chr': '15', 'pos': '71813587', 'ref': 'GACC'}} ] self.assertIn(results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'], result_options2) - assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} - - assert 'NM_016346.2:c.946_949=' in list(results.keys()) - assert results['NM_016346.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.2:c.946_949=']['alt_genomic_loci'], []) - assert results['NM_016346.2:c.946_949=']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} - assert results['NM_016346.2:c.946_949=']['submitted_variant'] == '15-72105928-AC-A' - assert results['NM_016346.2:c.946_949=']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.946_949=']['hgvs_lrg_variant'] == '' - assert results['NM_016346.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.2:c.946_949=' - assert results['NM_016346.2:c.946_949=']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'hg38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'grch38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} - + assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1'} def test_variant66(self): variant = '12-122064773-CCCGCCA-C' @@ -2012,46 +2036,46 @@ def test_variant66(self): assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.126_128=' in list(results.keys()) - assert results['NM_032790.3:c.126_128=']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_128=' - assert results['NM_032790.3:c.126_128=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.126_128=']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'CCCGCCA', 'pos': '302871', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'CCCGCCA', 'pos': '302871', 'alt': 'C'}}}]) - assert results['NM_032790.3:c.126_128=']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.126_128=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42=)', 'slr': 'NP_116179.2:p.(A42=)'} assert results['NM_032790.3:c.126_128=']['submitted_variant'] == '12-122064773-CCCGCCA-C' - assert results['NM_032790.3:c.126_128=']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.126_128=']['hgvs_lrg_variant'] == 'LRG_93:g.5299_5301=' + assert results['NM_032790.3:c.126_128=']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.126_128=']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.126_128=']['hgvs_transcript_variant'] == 'NM_032790.3:c.126_128=' + assert results['NM_032790.3:c.126_128=']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126_128=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.126_128=']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299_5301=' - assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064790del', 'vcf': {'chr': 'chr12', 'ref': 'CCCGCCA', 'pos': '122064773', 'alt': 'C'}} - assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626875=', 'vcf': {'chr': 'chr12', 'ref': 'CCC', 'pos': '121626873', 'alt': 'CCC'}} - assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064790del', 'vcf': {'chr': '12', 'ref': 'CCCGCCA', 'pos': '122064773', 'alt': 'C'}} - assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626875=', 'vcf': {'chr': '12', 'ref': 'CCC', 'pos': '121626873', 'alt': 'CCC'}} - assert results['NM_032790.3:c.126_128=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - + assert results['NM_032790.3:c.126_128=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42=)', 'slr': 'NP_116179.2:p.(A42=)'} + assert results['NM_032790.3:c.126_128=']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_128=' + assert results['NM_032790.3:c.126_128=']['hgvs_lrg_variant'] == 'LRG_93:g.5299_5301=' + self.assertCountEqual(results['NM_032790.3:c.126_128=']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302871', 'ref': 'CCCGCCA', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302888del', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302871', 'ref': 'CCCGCCA', 'alt': 'C'}}}]) + assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064790del', 'vcf': {'chr': 'chr12', 'pos': '122064773', 'ref': 'CCCGCCA', 'alt': 'C'}} + assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626875=', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'CCC', 'alt': 'CCC'}} + assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064790del', 'vcf': {'chr': '12', 'pos': '122064773', 'ref': 'CCCGCCA', 'alt': 'C'}} + assert results['NM_032790.3:c.126_128=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626875=', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'CCC', 'alt': 'CCC'}} + assert results['NM_032790.3:c.126_128=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant67(self): variant = '12-122064774-CCGCCA-CCGCCA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.132_137dup' in list(results.keys()) - assert results['NM_032790.3:c.132_137dup']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.132_137dup' - assert results['NM_032790.3:c.132_137dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.132_137dup']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '302868', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '302868', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}]) - assert results['NM_032790.3:c.132_137dup']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.132_137dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46_Pro47dup)', 'slr': 'NP_116179.2:p.(P46_P47dup)'} assert results['NM_032790.3:c.132_137dup']['submitted_variant'] == '12-122064774-CCGCCA-CCGCCA' - assert results['NM_032790.3:c.132_137dup']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.132_137dup']['hgvs_lrg_variant'] == 'LRG_93:g.5305_5310dup' + assert results['NM_032790.3:c.132_137dup']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.132_137dup']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.132_137dup']['hgvs_transcript_variant'] == 'NM_032790.3:c.132_137dup' + assert results['NM_032790.3:c.132_137dup']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.132_137dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.132_137dup']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5305_5310dup' - assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064770_122064789=', 'vcf': {'chr': 'chr12', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '122064770', 'alt': 'GGCCCCGCCACCGCCACCGC'}} - assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626884dup', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCA'}} - assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064770_122064789=', 'vcf': {'chr': '12', 'ref': 'GGCCCCGCCACCGCCACCGC', 'pos': '122064770', 'alt': 'GGCCCCGCCACCGCCACCGC'}} - assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626884dup', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCA'}} - assert results['NM_032790.3:c.132_137dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_032790.3:c.132_137dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46_Pro47dup)', 'slr': 'NP_116179.2:p.(P46_P47dup)'} + assert results['NM_032790.3:c.132_137dup']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.132_137dup' + assert results['NM_032790.3:c.132_137dup']['hgvs_lrg_variant'] == 'LRG_93:g.5305_5310dup' + self.assertCountEqual(results['NM_032790.3:c.132_137dup']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302868', 'ref': 'GGCCCCGCCACCGCCACCGC', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302868_302887=', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302868', 'ref': 'GGCCCCGCCACCGCCACCGC', 'alt': 'GGCCCCGCCACCGCCACCGC'}}}]) + assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064770_122064789=', 'vcf': {'chr': 'chr12', 'pos': '122064770', 'ref': 'GGCCCCGCCACCGCCACCGC', 'alt': 'GGCCCCGCCACCGCCACCGC'}} + assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626884dup', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGCCA'}} + assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064770_122064789=', 'vcf': {'chr': '12', 'pos': '122064770', 'ref': 'GGCCCCGCCACCGCCACCGC', 'alt': 'GGCCCCGCCACCGCCACCGC'}} + assert results['NM_032790.3:c.132_137dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626884dup', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGCCA'}} + assert results['NM_032790.3:c.132_137dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant68(self): variant = '12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC' @@ -2060,22 +2084,22 @@ def test_variant68(self): assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.132_135delinsGCCGT' in list(results.keys()) - assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.132_135delinsGCCGT' - assert results['NM_032790.3:c.132_135delinsGCCGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.132_135delinsGCCGT']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'ACCG', 'pos': '302883', 'alt': 'GCCGT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'ACCG', 'pos': '302883', 'alt': 'GCCGT'}}}]) - assert results['NM_032790.3:c.132_135delinsGCCGT']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46SerfsTer42)', 'slr': 'NP_116179.2:p.(P46Sfs*42)'} assert results['NM_032790.3:c.132_135delinsGCCGT']['submitted_variant'] == '12-122064773-CCCGCCACCGCCACCGC-CCCGCCACCGCCGCCGTC' - assert results['NM_032790.3:c.132_135delinsGCCGT']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_lrg_variant'] == 'LRG_93:g.5305_5308delinsGCCGT' + assert results['NM_032790.3:c.132_135delinsGCCGT']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.132_135delinsGCCGT']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_transcript_variant'] == 'NM_032790.3:c.132_135delinsGCCGT' + assert results['NM_032790.3:c.132_135delinsGCCGT']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.132_135delinsGCCGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5305_5308delinsGCCGT' - assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064788delinsGCCGT', 'vcf': {'chr': 'chr12', 'ref': 'ACCG', 'pos': '122064785', 'alt': 'GCCGT'}} - assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626882delinsGCCGT', 'vcf': {'chr': 'chr12', 'ref': 'ACCG', 'pos': '121626879', 'alt': 'GCCGT'}} - assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064788delinsGCCGT', 'vcf': {'chr': '12', 'ref': 'ACCG', 'pos': '122064785', 'alt': 'GCCGT'}} - assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626882delinsGCCGT', 'vcf': {'chr': '12', 'ref': 'ACCG', 'pos': '121626879', 'alt': 'GCCGT'}} - assert results['NM_032790.3:c.132_135delinsGCCGT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - + assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46SerfsTer42)', 'slr': 'NP_116179.2:p.(P46Sfs*42)'} + assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.132_135delinsGCCGT' + assert results['NM_032790.3:c.132_135delinsGCCGT']['hgvs_lrg_variant'] == 'LRG_93:g.5305_5308delinsGCCGT' + self.assertCountEqual(results['NM_032790.3:c.132_135delinsGCCGT']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302883', 'ref': 'ACCG', 'alt': 'GCCGT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302883_302886delinsGCCGT', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302883', 'ref': 'ACCG', 'alt': 'GCCGT'}}}]) + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064788delinsGCCGT', 'vcf': {'chr': 'chr12', 'pos': '122064785', 'ref': 'ACCG', 'alt': 'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626882delinsGCCGT', 'vcf': {'chr': 'chr12', 'pos': '121626879', 'ref': 'ACCG', 'alt': 'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064785_122064788delinsGCCGT', 'vcf': {'chr': '12', 'pos': '122064785', 'ref': 'ACCG', 'alt': 'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626879_121626882delinsGCCGT', 'vcf': {'chr': '12', 'pos': '121626879', 'ref': 'ACCG', 'alt': 'GCCGT'}} + assert results['NM_032790.3:c.132_135delinsGCCGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant69(self): variant = 'NC_000012.11:g.122064777C>A' @@ -2084,22 +2108,22 @@ def test_variant69(self): assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.129_130insACACCG' in list(results.keys()) - assert results['NM_032790.3:c.129_130insACACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insACACCG' - assert results['NM_032790.3:c.129_130insACACCG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.129_130insACACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302875', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302875', 'alt': 'A'}}}]) - assert results['NM_032790.3:c.129_130insACACCG']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.129_130insACACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43_Pro44insThrPro)', 'slr': 'NP_116179.2:p.(P43_P44insTP)'} assert results['NM_032790.3:c.129_130insACACCG']['submitted_variant'] == 'NC_000012.11:g.122064777C>A' - assert results['NM_032790.3:c.129_130insACACCG']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.129_130insACACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5302_5303insACACCG' + assert results['NM_032790.3:c.129_130insACACCG']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.129_130insACACCG']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.129_130insACACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.129_130insACACCG' + assert results['NM_032790.3:c.129_130insACACCG']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.129_130insACACCG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.129_130insACACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5302_5303insACACCG' - assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064777C>A', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064777', 'alt': 'A'}} - assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insACACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGACA'}} - assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064777C>A', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064777', 'alt': 'A'}} - assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insACACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGACA'}} - assert results['NM_032790.3:c.129_130insACACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - + assert results['NM_032790.3:c.129_130insACACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43_Pro44insThrPro)', 'slr': 'NP_116179.2:p.(P43_P44insTP)'} + assert results['NM_032790.3:c.129_130insACACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insACACCG' + assert results['NM_032790.3:c.129_130insACACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5302_5303insACACCG' + self.assertCountEqual(results['NM_032790.3:c.129_130insACACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302875', 'ref': 'C', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302875C>A', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302875', 'ref': 'C', 'alt': 'A'}}}]) + assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064777C>A', 'vcf': {'chr': 'chr12', 'pos': '122064777', 'ref': 'C', 'alt': 'A'}} + assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insACACCG', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGACA'}} + assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064777C>A', 'vcf': {'chr': '12', 'pos': '122064777', 'ref': 'C', 'alt': 'A'}} + assert results['NM_032790.3:c.129_130insACACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insACACCG', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGACA'}} + assert results['NM_032790.3:c.129_130insACACCG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant70(self): variant = 'NC_000012.11:g.122064776delG' @@ -2108,22 +2132,22 @@ def test_variant70(self): assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.128_129insCCACC' in list(results.keys()) - assert results['NM_032790.3:c.128_129insCCACC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.128_129insCCACC' - assert results['NM_032790.3:c.128_129insCCACC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.128_129insCCACC']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'CG', 'pos': '302873', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'CG', 'pos': '302873', 'alt': 'C'}}}]) - assert results['NM_032790.3:c.128_129insCCACC']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.128_129insCCACC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44HisfsTer22)', 'slr': 'NP_116179.2:p.(P44Hfs*22)'} assert results['NM_032790.3:c.128_129insCCACC']['submitted_variant'] == 'NC_000012.11:g.122064776delG' - assert results['NM_032790.3:c.128_129insCCACC']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.128_129insCCACC']['hgvs_lrg_variant'] == 'LRG_93:g.5301_5302insCCACC' + assert results['NM_032790.3:c.128_129insCCACC']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.128_129insCCACC']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.128_129insCCACC']['hgvs_transcript_variant'] == 'NM_032790.3:c.128_129insCCACC' + assert results['NM_032790.3:c.128_129insCCACC']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.128_129insCCACC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.128_129insCCACC']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5301_5302insCCACC' - assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776del', 'vcf': {'chr': 'chr12', 'ref': 'CG', 'pos': '122064775', 'alt': 'C'}} - assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCACC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCCCA'}} - assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776del', 'vcf': {'chr': '12', 'ref': 'CG', 'pos': '122064775', 'alt': 'C'}} - assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCACC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCCCA'}} - assert results['NM_032790.3:c.128_129insCCACC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - + assert results['NM_032790.3:c.128_129insCCACC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44HisfsTer22)', 'slr': 'NP_116179.2:p.(P44Hfs*22)'} + assert results['NM_032790.3:c.128_129insCCACC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.128_129insCCACC' + assert results['NM_032790.3:c.128_129insCCACC']['hgvs_lrg_variant'] == 'LRG_93:g.5301_5302insCCACC' + self.assertCountEqual(results['NM_032790.3:c.128_129insCCACC']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302873', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874del', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302873', 'ref': 'CG', 'alt': 'C'}}}]) + assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776del', 'vcf': {'chr': 'chr12', 'pos': '122064775', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCACC', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCCCA'}} + assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776del', 'vcf': {'chr': '12', 'pos': '122064775', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_032790.3:c.128_129insCCACC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCACC', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCCCA'}} + assert results['NM_032790.3:c.128_129insCCACC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant71(self): variant = 'NC_000012.11:g.122064776dupG' @@ -2132,22 +2156,22 @@ def test_variant71(self): assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.129_130insGCCACCG' in list(results.keys()) - assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insGCCACCG' - assert results['NM_032790.3:c.129_130insGCCACCG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.129_130insGCCACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302873', 'alt': 'CG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302873', 'alt': 'CG'}}}]) - assert results['NM_032790.3:c.129_130insGCCACCG']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44AlafsTer46)', 'slr': 'NP_116179.2:p.(P44Afs*46)'} assert results['NM_032790.3:c.129_130insGCCACCG']['submitted_variant'] == 'NC_000012.11:g.122064776dupG' - assert results['NM_032790.3:c.129_130insGCCACCG']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5302_5303insGCCACCG' + assert results['NM_032790.3:c.129_130insGCCACCG']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.129_130insGCCACCG']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.129_130insGCCACCG' + assert results['NM_032790.3:c.129_130insGCCACCG']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.129_130insGCCACCG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5302_5303insGCCACCG' - assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776dup', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064775', 'alt': 'CG'}} - assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insGCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGGCCA'}} - assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776dup', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064775', 'alt': 'CG'}} - assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insGCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGGCCA'}} - assert results['NM_032790.3:c.129_130insGCCACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - + assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44AlafsTer46)', 'slr': 'NP_116179.2:p.(P44Afs*46)'} + assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insGCCACCG' + assert results['NM_032790.3:c.129_130insGCCACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5302_5303insGCCACCG' + self.assertCountEqual(results['NM_032790.3:c.129_130insGCCACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302873', 'ref': 'C', 'alt': 'CG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874dup', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302873', 'ref': 'C', 'alt': 'CG'}}}]) + assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776dup', 'vcf': {'chr': 'chr12', 'pos': '122064775', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insGCCACCG', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGGCCA'}} + assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776dup', 'vcf': {'chr': '12', 'pos': '122064775', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_032790.3:c.129_130insGCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insGCCACCG', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGGCCA'}} + assert results['NM_032790.3:c.129_130insGCCACCG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant72(self): variant = 'NC_000012.11:g.122064776_122064777insTTT' @@ -2156,22 +2180,22 @@ def test_variant72(self): assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.129_130insTTTCCACCG' in list(results.keys()) - assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insTTTCCACCG' - assert results['NM_032790.3:c.129_130insTTTCCACCG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.129_130insTTTCCACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302874', 'alt': 'GTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302874', 'alt': 'GTTT'}}}]) - assert results['NM_032790.3:c.129_130insTTTCCACCG']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43_Pro44insPheProPro)', 'slr': 'NP_116179.2:p.(P43_P44insFPP)'} assert results['NM_032790.3:c.129_130insTTTCCACCG']['submitted_variant'] == 'NC_000012.11:g.122064776_122064777insTTT' - assert results['NM_032790.3:c.129_130insTTTCCACCG']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5302_5303insTTTCCACCG' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.129_130insTTTCCACCG' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5302_5303insTTTCCACCG' - assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776_122064777insTTT', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '122064776', 'alt': 'GTTT'}} - assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insTTTCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGTTTCCA'}} - assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776_122064777insTTT', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '122064776', 'alt': 'GTTT'}} - assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insTTTCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGTTTCCA'}} - assert results['NM_032790.3:c.129_130insTTTCCACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - + assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43_Pro44insPheProPro)', 'slr': 'NP_116179.2:p.(P43_P44insFPP)'} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.129_130insTTTCCACCG' + assert results['NM_032790.3:c.129_130insTTTCCACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5302_5303insTTTCCACCG' + self.assertCountEqual(results['NM_032790.3:c.129_130insTTTCCACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302874', 'ref': 'G', 'alt': 'GTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302874_302875insTTT', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302874', 'ref': 'G', 'alt': 'GTTT'}}}]) + assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776_122064777insTTT', 'vcf': {'chr': 'chr12', 'pos': '122064776', 'ref': 'G', 'alt': 'GTTT'}} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insTTTCCACCG', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGTTTCCA'}} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064776_122064777insTTT', 'vcf': {'chr': '12', 'pos': '122064776', 'ref': 'G', 'alt': 'GTTT'}} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626876_121626877insTTTCCACCG', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGTTTCCA'}} + assert results['NM_032790.3:c.129_130insTTTCCACCG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant73(self): variant = 'NC_000012.11:g.122064772_122064775del' @@ -2180,22 +2204,22 @@ def test_variant73(self): assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.125_126delinsGCCA' in list(results.keys()) - assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.125_126delinsGCCA' - assert results['NM_032790.3:c.125_126delinsGCCA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.125_126delinsGCCA']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GCCCC', 'pos': '302869', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GCCCC', 'pos': '302869', 'alt': 'G'}}}]) - assert results['NM_032790.3:c.125_126delinsGCCA']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42GlyfsTer23)', 'slr': 'NP_116179.2:p.(A42Gfs*23)'} assert results['NM_032790.3:c.125_126delinsGCCA']['submitted_variant'] == 'NC_000012.11:g.122064772_122064775del' - assert results['NM_032790.3:c.125_126delinsGCCA']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_lrg_variant'] == 'LRG_93:g.5298_5299delinsGCCA' + assert results['NM_032790.3:c.125_126delinsGCCA']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.125_126delinsGCCA']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_transcript_variant'] == 'NM_032790.3:c.125_126delinsGCCA' + assert results['NM_032790.3:c.125_126delinsGCCA']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.125_126delinsGCCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5298_5299delinsGCCA' - assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775del', 'vcf': {'chr': 'chr12', 'ref': 'GCCCC', 'pos': '122064771', 'alt': 'G'}} - assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626867_121626873delinsGCCA', 'vcf': {'chr': 'chr12', 'ref': 'CCCCGCC', 'pos': '121626867', 'alt': 'GCCA'}} - assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775del', 'vcf': {'chr': '12', 'ref': 'GCCCC', 'pos': '122064771', 'alt': 'G'}} - assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626867_121626873delinsGCCA', 'vcf': {'chr': '12', 'ref': 'CCCCGCC', 'pos': '121626867', 'alt': 'GCCA'}} - assert results['NM_032790.3:c.125_126delinsGCCA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - + assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42GlyfsTer23)', 'slr': 'NP_116179.2:p.(A42Gfs*23)'} + assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.125_126delinsGCCA' + assert results['NM_032790.3:c.125_126delinsGCCA']['hgvs_lrg_variant'] == 'LRG_93:g.5298_5299delinsGCCA' + self.assertCountEqual(results['NM_032790.3:c.125_126delinsGCCA']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302869', 'ref': 'GCCCC', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873del', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302869', 'ref': 'GCCCC', 'alt': 'G'}}}]) + assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775del', 'vcf': {'chr': 'chr12', 'pos': '122064771', 'ref': 'GCCCC', 'alt': 'G'}} + assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626867_121626873delinsGCCA', 'vcf': {'chr': 'chr12', 'pos': '121626867', 'ref': 'CCCCGCC', 'alt': 'GCCA'}} + assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775del', 'vcf': {'chr': '12', 'pos': '122064771', 'ref': 'GCCCC', 'alt': 'G'}} + assert results['NM_032790.3:c.125_126delinsGCCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626867_121626873delinsGCCA', 'vcf': {'chr': '12', 'pos': '121626867', 'ref': 'CCCCGCC', 'alt': 'GCCA'}} + assert results['NM_032790.3:c.125_126delinsGCCA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant74(self): variant = 'NC_000012.11:g.122064772_122064775dup' @@ -2204,70 +2228,70 @@ def test_variant74(self): assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.128_129insCCCCGCCACC' in list(results.keys()) - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.128_129insCCCCGCCACC' - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.128_129insCCCCGCCACC']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCC'}}}]) - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro45AlafsTer46)', 'slr': 'NP_116179.2:p.(P45Afs*46)'} assert results['NM_032790.3:c.128_129insCCCCGCCACC']['submitted_variant'] == 'NC_000012.11:g.122064772_122064775dup' - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_lrg_variant'] == 'LRG_93:g.5301_5302insCCCCGCCACC' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_transcript_variant'] == 'NM_032790.3:c.128_129insCCCCGCCACC' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5301_5302insCCCCGCCACC' - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775dup', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCC'}} - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCCCGCCACC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCCCCCGCCA'}} - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775dup', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCC'}} - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCCCGCCACC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCCCCCGCCA'}} - assert results['NM_032790.3:c.128_129insCCCCGCCACC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro45AlafsTer46)', 'slr': 'NP_116179.2:p.(P45Afs*46)'} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.128_129insCCCCGCCACC' + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['hgvs_lrg_variant'] == 'LRG_93:g.5301_5302insCCCCGCCACC' + self.assertCountEqual(results['NM_032790.3:c.128_129insCCCCGCCACC']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302869', 'ref': 'G', 'alt': 'GCCCC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302870_302873dup', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302869', 'ref': 'G', 'alt': 'GCCCC'}}}]) + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775dup', 'vcf': {'chr': 'chr12', 'pos': '122064771', 'ref': 'G', 'alt': 'GCCCC'}} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCCCGCCACC', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCCCCCGCCA'}} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064772_122064775dup', 'vcf': {'chr': '12', 'pos': '122064771', 'ref': 'G', 'alt': 'GCCCC'}} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626875_121626876insCCCCGCCACC', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCCCCCGCCA'}} + assert results['NM_032790.3:c.128_129insCCCCGCCACC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant75(self): variant = 'NC_000012.11:g.122064773_122064774insTTTT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.126_127insTTTTCCGCCA' in list(results.keys()) - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_127insTTTTCCGCCA' - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.126_127insTTTTCCGCCA']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302871', 'alt': 'CTTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302871', 'alt': 'CTTTT'}}}]) - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43PhefsTer48)', 'slr': 'NP_116179.2:p.(P43Ffs*48)'} assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['submitted_variant'] == 'NC_000012.11:g.122064773_122064774insTTTT' - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_lrg_variant'] == 'LRG_93:g.5299_5300insTTTTCCGCCA' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_transcript_variant'] == 'NM_032790.3:c.126_127insTTTTCCGCCA' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299_5300insTTTTCCGCCA' - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064774insTTTT', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064773', 'alt': 'CTTTT'}} - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insTTTTCCGCCA', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CTTTTCCGCCA'}} - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064774insTTTT', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064773', 'alt': 'CTTTT'}} - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insTTTTCCGCCA', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CTTTTCCGCCA'}} - assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43PhefsTer48)', 'slr': 'NP_116179.2:p.(P43Ffs*48)'} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_127insTTTTCCGCCA' + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['hgvs_lrg_variant'] == 'LRG_93:g.5299_5300insTTTTCCGCCA' + self.assertCountEqual(results['NM_032790.3:c.126_127insTTTTCCGCCA']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302871', 'ref': 'C', 'alt': 'CTTTT'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302872insTTTT', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302871', 'ref': 'C', 'alt': 'CTTTT'}}}]) + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064774insTTTT', 'vcf': {'chr': 'chr12', 'pos': '122064773', 'ref': 'C', 'alt': 'CTTTT'}} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insTTTTCCGCCA', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'CTTTTCCGCCA'}} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064774insTTTT', 'vcf': {'chr': '12', 'pos': '122064773', 'ref': 'C', 'alt': 'CTTTT'}} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insTTTTCCGCCA', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'CTTTTCCGCCA'}} + assert results['NM_032790.3:c.126_127insTTTTCCGCCA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant76(self): variant = 'NC_000012.11:g.122064772_122064777del' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.126C>A' in list(results.keys()) - assert results['NM_032790.3:c.126C>A']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126C>A' - assert results['NM_032790.3:c.126C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.126C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GCCCCGC', 'pos': '302869', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GCCCCGC', 'pos': '302869', 'alt': 'G'}}}]) - assert results['NM_032790.3:c.126C>A']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.126C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42=)', 'slr': 'NP_116179.2:p.(A42=)'} assert results['NM_032790.3:c.126C>A']['submitted_variant'] == 'NC_000012.11:g.122064772_122064777del' - assert results['NM_032790.3:c.126C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.126C>A']['hgvs_lrg_variant'] == 'LRG_93:g.5299C>A' + assert results['NM_032790.3:c.126C>A']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.126C>A']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.126C>A']['hgvs_transcript_variant'] == 'NM_032790.3:c.126C>A' + assert results['NM_032790.3:c.126C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.126C>A']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299C>A' - assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': 'chr12', 'ref': 'GCCCCGC', 'pos': '122064771', 'alt': 'G'}} - assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'A'}} - assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': '12', 'ref': 'GCCCCGC', 'pos': '122064771', 'alt': 'G'}} - assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'A'}} - assert results['NM_032790.3:c.126C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_032790.3:c.126C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42=)', 'slr': 'NP_116179.2:p.(A42=)'} + assert results['NM_032790.3:c.126C>A']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126C>A' + assert results['NM_032790.3:c.126C>A']['hgvs_lrg_variant'] == 'LRG_93:g.5299C>A' + self.assertCountEqual(results['NM_032790.3:c.126C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}]) + assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': 'chr12', 'pos': '122064771', 'ref': 'GCCCCGC', 'alt': 'G'}} + assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'A'}} + assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': '12', 'pos': '122064771', 'ref': 'GCCCCGC', 'alt': 'G'}} + assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'A'}} + assert results['NM_032790.3:c.126C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant77(self): variant = 'NC_000012.11:g.122064772_122064777dup' @@ -2276,46 +2300,46 @@ def test_variant77(self): assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.131_132insCCCGCCACCGCC' in list(results.keys()) - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.131_132insCCCGCCACCGCC' - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.131_132insCCCGCCACCGCC']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'G', 'pos': '302869', 'alt': 'GCCCCGC'}}}]) - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44_Pro47dup)', 'slr': 'NP_116179.2:p.(P44_P47dup)'} assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['submitted_variant'] == 'NC_000012.11:g.122064772_122064777dup' - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_lrg_variant'] == 'LRG_93:g.5304_5305insCCCGCCACCGCC' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_transcript_variant'] == 'NM_032790.3:c.131_132insCCCGCCACCGCC' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5304_5305insCCCGCCACCGCC' - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778dup', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCCGC'}} - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCCCCGCCA'}} - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778dup', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '122064771', 'alt': 'GCCCCGC'}} - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCCCCGCCA'}} - assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro44_Pro47dup)', 'slr': 'NP_116179.2:p.(P44_P47dup)'} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.131_132insCCCGCCACCGCC' + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['hgvs_lrg_variant'] == 'LRG_93:g.5304_5305insCCCGCCACCGCC' + self.assertCountEqual(results['NM_032790.3:c.131_132insCCCGCCACCGCC']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302869', 'ref': 'G', 'alt': 'GCCCCGC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876dup', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302869', 'ref': 'G', 'alt': 'GCCCCGC'}}}]) + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778dup', 'vcf': {'chr': 'chr12', 'pos': '122064771', 'ref': 'G', 'alt': 'GCCCCGC'}} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGCCCCCGCCA'}} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778dup', 'vcf': {'chr': '12', 'pos': '122064771', 'ref': 'G', 'alt': 'GCCCCGC'}} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626878_121626879insCCCGCCACCGCC', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGCCCCCGCCA'}} + assert results['NM_032790.3:c.131_132insCCCGCCACCGCC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant78(self): variant = 'NC_000012.11:g.122064779_122064782dup' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.135_136insACCGCCACCG' in list(results.keys()) - assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.135_136insACCGCCACCG' - assert results['NM_032790.3:c.135_136insACCGCCACCG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.135_136insACCGCCACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'C', 'pos': '302876', 'alt': 'CACCG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'C', 'pos': '302876', 'alt': 'CACCG'}}}]) - assert results['NM_032790.3:c.135_136insACCGCCACCG']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46ThrfsTer45)', 'slr': 'NP_116179.2:p.(P46Tfs*45)'} assert results['NM_032790.3:c.135_136insACCGCCACCG']['submitted_variant'] == 'NC_000012.11:g.122064779_122064782dup' - assert results['NM_032790.3:c.135_136insACCGCCACCG']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5308_5309insACCGCCACCG' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_transcript_variant'] == 'NM_032790.3:c.135_136insACCGCCACCG' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5308_5309insACCGCCACCG' - assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064779_122064782dup', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '122064778', 'alt': 'CACCG'}} - assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626882_121626883insACCGCCACCG', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCACCGA'}} - assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064779_122064782dup', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '122064778', 'alt': 'CACCG'}} - assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626882_121626883insACCGCCACCG', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CCCGCCACCGA'}} - assert results['NM_032790.3:c.135_136insACCGCCACCG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro46ThrfsTer45)', 'slr': 'NP_116179.2:p.(P46Tfs*45)'} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.135_136insACCGCCACCG' + assert results['NM_032790.3:c.135_136insACCGCCACCG']['hgvs_lrg_variant'] == 'LRG_93:g.5308_5309insACCGCCACCG' + self.assertCountEqual(results['NM_032790.3:c.135_136insACCGCCACCG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302876', 'ref': 'C', 'alt': 'CACCG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302877_302880dup', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302876', 'ref': 'C', 'alt': 'CACCG'}}}]) + assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064779_122064782dup', 'vcf': {'chr': 'chr12', 'pos': '122064778', 'ref': 'C', 'alt': 'CACCG'}} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626882_121626883insACCGCCACCG', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGCCACCGA'}} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064779_122064782dup', 'vcf': {'chr': '12', 'pos': '122064778', 'ref': 'C', 'alt': 'CACCG'}} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626882_121626883insACCGCCACCG', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'CCCGCCACCGA'}} + assert results['NM_032790.3:c.135_136insACCGCCACCG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant79(self): variant = 'NC_000012.11:g.122064772_122064782del' @@ -2324,322 +2348,334 @@ def test_variant79(self): assert results['flag'] == 'gene_variant' assert 'NM_032790.3:c.126_127insA' in list(results.keys()) - assert results['NM_032790.3:c.126_127insA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_127insA' - assert results['NM_032790.3:c.126_127insA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032790.3:c.126_127insA']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'ref': 'GGCCCC', 'pos': '302868', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'NW_004504303.2', 'ref': 'GGCCCC', 'pos': '302868', 'alt': 'G'}}}]) - assert results['NM_032790.3:c.126_127insA']['gene_symbol'] == 'ORAI1' - assert results['NM_032790.3:c.126_127insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43ThrfsTer45)', 'slr': 'NP_116179.2:p.(P43Tfs*45)'} assert results['NM_032790.3:c.126_127insA']['submitted_variant'] == 'NC_000012.11:g.122064772_122064782del' - assert results['NM_032790.3:c.126_127insA']['genome_context_intronic_sequence'] == '' - assert results['NM_032790.3:c.126_127insA']['hgvs_lrg_variant'] == 'LRG_93:g.5299_5300insA' + assert results['NM_032790.3:c.126_127insA']['gene_symbol'] == 'ORAI1' + assert results['NM_032790.3:c.126_127insA']['gene_ids'] == {'hgnc_id': 'HGNC:25896', 'entrez_gene_id': '84876', 'ucsc_id': 'uc031zps.1', 'omim_id': ['610277']} assert results['NM_032790.3:c.126_127insA']['hgvs_transcript_variant'] == 'NM_032790.3:c.126_127insA' + assert results['NM_032790.3:c.126_127insA']['genome_context_intronic_sequence'] == '' + assert results['NM_032790.3:c.126_127insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032790.3:c.126_127insA']['hgvs_refseqgene_variant'] == 'NG_007500.1:g.5299_5300insA' - assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064774_122064778del', 'vcf': {'chr': 'chr12', 'ref': 'GGCCCC', 'pos': '122064770', 'alt': 'G'}} - assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insA', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '121626873', 'alt': 'CA'}} - assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064774_122064778del', 'vcf': {'chr': '12', 'ref': 'GGCCCC', 'pos': '122064770', 'alt': 'G'}} - assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insA', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '121626873', 'alt': 'CA'}} - assert results['NM_032790.3:c.126_127insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} - + assert results['NM_032790.3:c.126_127insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Pro43ThrfsTer45)', 'slr': 'NP_116179.2:p.(P43Tfs*45)'} + assert results['NM_032790.3:c.126_127insA']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126_127insA' + assert results['NM_032790.3:c.126_127insA']['hgvs_lrg_variant'] == 'LRG_93:g.5299_5300insA' + self.assertCountEqual(results['NM_032790.3:c.126_127insA']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302868', 'ref': 'GGCCCC', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302872_302876del', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302868', 'ref': 'GGCCCC', 'alt': 'G'}}}]) + assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064774_122064778del', 'vcf': {'chr': 'chr12', 'pos': '122064770', 'ref': 'GGCCCC', 'alt': 'G'}} + assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insA', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064774_122064778del', 'vcf': {'chr': '12', 'pos': '122064770', 'ref': 'GGCCCC', 'alt': 'G'}} + assert results['NM_032790.3:c.126_127insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873_121626874insA', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_032790.3:c.126_127insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'} def test_variant80(self): variant = 'NC_000002.11:g.95847041_95847043GCG=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_001017396.1:c.345_347dup' in list(results.keys()) + assert results['NM_001017396.1:c.345_347dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' + assert results['NM_001017396.1:c.345_347dup']['gene_symbol'] == 'ZNF2' + assert results['NM_001017396.1:c.345_347dup']['gene_ids'] == {'hgnc_id': 'HGNC:12991', 'entrez_gene_id': '7549', 'ucsc_id': 'uc032nuy.2', 'omim_id': ['194500']} + assert results['NM_001017396.1:c.345_347dup']['hgvs_transcript_variant'] == 'NM_001017396.1:c.345_347dup' + assert results['NM_001017396.1:c.345_347dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001017396.1:c.345_347dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001017396.1:c.345_347dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001017396.1:c.345_347dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001017396.1:p.(Arg117dup)', 'slr': 'NP_001017396.1:p.(R117dup)'} + assert results['NM_001017396.1:c.345_347dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001017396.1:c.345_347dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001017396.1:c.345_347dup']['alt_genomic_loci'], []) + assert results['NM_001017396.1:c.345_347dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert 'hg38' not in list(results['NM_001017396.1:c.345_347dup']['primary_assembly_loci'].keys()) + assert results['NM_001017396.1:c.345_347dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert 'grch38' not in list(results['NM_001017396.1:c.345_347dup']['primary_assembly_loci'].keys()) + assert results['NM_001017396.1:c.345_347dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1'} + + assert 'NM_001291604.1:c.231_233dup' in list(results.keys()) + assert results['NM_001291604.1:c.231_233dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' + assert results['NM_001291604.1:c.231_233dup']['gene_symbol'] == 'ZNF2' + assert results['NM_001291604.1:c.231_233dup']['gene_ids'] == {'hgnc_id': 'HGNC:12991', 'entrez_gene_id': '7549', 'ucsc_id': 'uc032nuy.2', 'omim_id': ['194500']} + assert results['NM_001291604.1:c.231_233dup']['hgvs_transcript_variant'] == 'NM_001291604.1:c.231_233dup' + assert results['NM_001291604.1:c.231_233dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001291604.1:c.231_233dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001291604.1:c.231_233dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001291604.1:c.231_233dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278533.1:p.(Arg79dup)', 'slr': 'NP_001278533.1:p.(R79dup)'} + assert results['NM_001291604.1:c.231_233dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291604.1:c.231_233dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001291604.1:c.231_233dup']['alt_genomic_loci'], []) + assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'pos': '95181295', 'ref': 'T', 'alt': 'TGCG'}} + assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'pos': '95181295', 'ref': 'T', 'alt': 'TGCG'}} + assert results['NM_001291604.1:c.231_233dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291604.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278533.1'} + assert 'NM_021088.3:c.471_473dup' in list(results.keys()) - assert results['NM_021088.3:c.471_473dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021088.3:c.471_473dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021088.3:c.471_473dup']['alt_genomic_loci'], []) - assert results['NM_021088.3:c.471_473dup']['gene_symbol'] == 'ZNF2' - assert results['NM_021088.3:c.471_473dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066574.2:p.(Arg159dup)', 'slr': 'NP_066574.2:p.(R159dup)'} assert results['NM_021088.3:c.471_473dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' - assert results['NM_021088.3:c.471_473dup']['genome_context_intronic_sequence'] == '' - assert results['NM_021088.3:c.471_473dup']['hgvs_lrg_variant'] == '' + assert results['NM_021088.3:c.471_473dup']['gene_symbol'] == 'ZNF2' + assert results['NM_021088.3:c.471_473dup']['gene_ids'] == {'hgnc_id': 'HGNC:12991', 'entrez_gene_id': '7549', 'ucsc_id': 'uc032nuy.2', 'omim_id': ['194500']} assert results['NM_021088.3:c.471_473dup']['hgvs_transcript_variant'] == 'NM_021088.3:c.471_473dup' + assert results['NM_021088.3:c.471_473dup']['genome_context_intronic_sequence'] == '' + assert results['NM_021088.3:c.471_473dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021088.3:c.471_473dup']['hgvs_refseqgene_variant'] == 'NG_033798.1:g.20883_20885dup' - assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} - assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} - assert results['NM_021088.3:c.471_473dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033798.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.3'} + assert results['NM_021088.3:c.471_473dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066574.2:p.(Arg159dup)', 'slr': 'NP_066574.2:p.(R159dup)'} + assert results['NM_021088.3:c.471_473dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021088.3:c.471_473dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021088.3:c.471_473dup']['alt_genomic_loci'], []) + assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'pos': '95181295', 'ref': 'T', 'alt': 'TGCG'}} + assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_021088.3:c.471_473dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'pos': '95181295', 'ref': 'T', 'alt': 'TGCG'}} + assert results['NM_021088.3:c.471_473dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033798.1'} assert 'NM_001291605.1:c.510_512dup' in list(results.keys()) - assert results['NM_001291605.1:c.510_512dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001291605.1:c.510_512dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001291605.1:c.510_512dup']['alt_genomic_loci'], []) - assert results['NM_001291605.1:c.510_512dup']['gene_symbol'] == 'ZNF2' - assert results['NM_001291605.1:c.510_512dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278534.1:p.(Arg172dup)', 'slr': 'NP_001278534.1:p.(R172dup)'} assert results['NM_001291605.1:c.510_512dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' - assert results['NM_001291605.1:c.510_512dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001291605.1:c.510_512dup']['hgvs_lrg_variant'] == '' + assert results['NM_001291605.1:c.510_512dup']['gene_symbol'] == 'ZNF2' + assert results['NM_001291605.1:c.510_512dup']['gene_ids'] == {'hgnc_id': 'HGNC:12991', 'entrez_gene_id': '7549', 'ucsc_id': 'uc032nuy.2', 'omim_id': ['194500']} assert results['NM_001291605.1:c.510_512dup']['hgvs_transcript_variant'] == 'NM_001291605.1:c.510_512dup' + assert results['NM_001291605.1:c.510_512dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001291605.1:c.510_512dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001291605.1:c.510_512dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} - assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} - assert results['NM_001291605.1:c.510_512dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278534.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291605.1'} - - assert 'NM_001017396.2:c.345_347dup' in list(results.keys()) - assert results['NM_001017396.2:c.345_347dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001017396.2:c.345_347dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001017396.2:c.345_347dup']['alt_genomic_loci'], []) - assert results['NM_001017396.2:c.345_347dup']['gene_symbol'] == 'ZNF2' - assert results['NM_001017396.2:c.345_347dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001017396.1:p.(Arg117dup)', 'slr': 'NP_001017396.1:p.(R117dup)'} - assert results['NM_001017396.2:c.345_347dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' - assert results['NM_001017396.2:c.345_347dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001017396.2:c.345_347dup']['hgvs_lrg_variant'] == '' - assert results['NM_001017396.2:c.345_347dup']['hgvs_transcript_variant'] == 'NM_001017396.2:c.345_347dup' - assert results['NM_001017396.2:c.345_347dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} - assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} - assert results['NM_001017396.2:c.345_347dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.2'} - - assert 'NM_001282398.1:c.357_359dup' in list(results.keys()) - assert results['NM_001282398.1:c.357_359dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001282398.1:c.357_359dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001282398.1:c.357_359dup']['alt_genomic_loci'], []) - assert results['NM_001282398.1:c.357_359dup']['gene_symbol'] == 'ZNF2' - assert results['NM_001282398.1:c.357_359dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269327.1:p.(Arg121dup)', 'slr': 'NP_001269327.1:p.(R121dup)'} - assert results['NM_001282398.1:c.357_359dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' - assert results['NM_001282398.1:c.357_359dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001282398.1:c.357_359dup']['hgvs_lrg_variant'] == '' - assert results['NM_001282398.1:c.357_359dup']['hgvs_transcript_variant'] == 'NM_001282398.1:c.357_359dup' - assert results['NM_001282398.1:c.357_359dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} - assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} - assert results['NM_001282398.1:c.357_359dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269327.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282398.1'} - - assert results['flag'] == 'gene_variant' - assert 'NM_001291604.1:c.231_233dup' in list(results.keys()) - assert results['NM_001291604.1:c.231_233dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001291604.1:c.231_233dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001291604.1:c.231_233dup']['alt_genomic_loci'], []) - assert results['NM_001291604.1:c.231_233dup']['gene_symbol'] == 'ZNF2' - assert results['NM_001291604.1:c.231_233dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278533.1:p.(Arg79dup)', 'slr': 'NP_001278533.1:p.(R79dup)'} - assert results['NM_001291604.1:c.231_233dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' - assert results['NM_001291604.1:c.231_233dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001291604.1:c.231_233dup']['hgvs_lrg_variant'] == '' - assert results['NM_001291604.1:c.231_233dup']['hgvs_transcript_variant'] == 'NM_001291604.1:c.231_233dup' - assert results['NM_001291604.1:c.231_233dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} - assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert results['NM_001291604.1:c.231_233dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '95181295', 'alt': 'TGCG'}} - assert results['NM_001291604.1:c.231_233dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278533.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291604.1'} + assert results['NM_001291605.1:c.510_512dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278534.1:p.(Arg172dup)', 'slr': 'NP_001278534.1:p.(R172dup)'} + assert results['NM_001291605.1:c.510_512dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291605.1:c.510_512dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001291605.1:c.510_512dup']['alt_genomic_loci'], []) + assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'pos': '95181295', 'ref': 'T', 'alt': 'TGCG'}} + assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001291605.1:c.510_512dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'pos': '95181295', 'ref': 'T', 'alt': 'TGCG'}} + assert results['NM_001291605.1:c.510_512dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291605.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278534.1'} assert 'NM_021088.2:c.471_473dup' in list(results.keys()) - assert results['NM_021088.2:c.471_473dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021088.2:c.471_473dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021088.2:c.471_473dup']['alt_genomic_loci'], []) - assert results['NM_021088.2:c.471_473dup']['gene_symbol'] == 'ZNF2' - assert results['NM_021088.2:c.471_473dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066574.2:p.(Arg159dup)', 'slr': 'NP_066574.2:p.(R159dup)'} assert results['NM_021088.2:c.471_473dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' - assert results['NM_021088.2:c.471_473dup']['genome_context_intronic_sequence'] == '' - assert results['NM_021088.2:c.471_473dup']['hgvs_lrg_variant'] == '' + assert results['NM_021088.2:c.471_473dup']['gene_symbol'] == 'ZNF2' + assert results['NM_021088.2:c.471_473dup']['gene_ids'] == {'hgnc_id': 'HGNC:12991', 'entrez_gene_id': '7549', 'ucsc_id': 'uc032nuy.2', 'omim_id': ['194500']} assert results['NM_021088.2:c.471_473dup']['hgvs_transcript_variant'] == 'NM_021088.2:c.471_473dup' + assert results['NM_021088.2:c.471_473dup']['genome_context_intronic_sequence'] == '' + assert results['NM_021088.2:c.471_473dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021088.2:c.471_473dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_021088.2:c.471_473dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_021088.2:c.471_473dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066574.2:p.(Arg159dup)', 'slr': 'NP_066574.2:p.(R159dup)'} + assert results['NM_021088.2:c.471_473dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021088.2:c.471_473dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021088.2:c.471_473dup']['alt_genomic_loci'], []) + assert results['NM_021088.2:c.471_473dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} assert 'hg38' not in list(results['NM_021088.2:c.471_473dup']['primary_assembly_loci'].keys()) - assert results['NM_021088.2:c.471_473dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_021088.2:c.471_473dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} assert 'grch38' not in list(results['NM_021088.2:c.471_473dup']['primary_assembly_loci'].keys()) - assert results['NM_021088.2:c.471_473dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.2'} - - assert 'NM_001017396.1:c.345_347dup' in list(results.keys()) - assert results['NM_001017396.1:c.345_347dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001017396.1:c.345_347dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001017396.1:c.345_347dup']['alt_genomic_loci'], []) - assert results['NM_001017396.1:c.345_347dup']['gene_symbol'] == 'ZNF2' - assert results['NM_001017396.1:c.345_347dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001017396.1:p.(Arg117dup)', 'slr': 'NP_001017396.1:p.(R117dup)'} - assert results['NM_001017396.1:c.345_347dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' - assert results['NM_001017396.1:c.345_347dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001017396.1:c.345_347dup']['hgvs_lrg_variant'] == '' - assert results['NM_001017396.1:c.345_347dup']['hgvs_transcript_variant'] == 'NM_001017396.1:c.345_347dup' - assert results['NM_001017396.1:c.345_347dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001017396.1:c.345_347dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert 'hg38' not in list(results['NM_001017396.1:c.345_347dup']['primary_assembly_loci'].keys()) - assert results['NM_001017396.1:c.345_347dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'ref': 'GCTTGCGGCGGCGA', 'pos': '95847037', 'alt': 'GCTTGCGGCGGCGA'}} - assert 'grch38' not in list(results['NM_001017396.1:c.345_347dup']['primary_assembly_loci'].keys()) - assert results['NM_001017396.1:c.345_347dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.1'} + assert results['NM_021088.2:c.471_473dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021088.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066574.2'} + assert 'NM_001282398.1:c.357_359dup' in list(results.keys()) + assert results['NM_001282398.1:c.357_359dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' + assert results['NM_001282398.1:c.357_359dup']['gene_symbol'] == 'ZNF2' + assert results['NM_001282398.1:c.357_359dup']['gene_ids'] == {'hgnc_id': 'HGNC:12991', 'entrez_gene_id': '7549', 'ucsc_id': 'uc032nuy.2', 'omim_id': ['194500']} + assert results['NM_001282398.1:c.357_359dup']['hgvs_transcript_variant'] == 'NM_001282398.1:c.357_359dup' + assert results['NM_001282398.1:c.357_359dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001282398.1:c.357_359dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282398.1:c.357_359dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282398.1:c.357_359dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269327.1:p.(Arg121dup)', 'slr': 'NP_001269327.1:p.(R121dup)'} + assert results['NM_001282398.1:c.357_359dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001282398.1:c.357_359dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001282398.1:c.357_359dup']['alt_genomic_loci'], []) + assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'pos': '95181295', 'ref': 'T', 'alt': 'TGCG'}} + assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001282398.1:c.357_359dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'pos': '95181295', 'ref': 'T', 'alt': 'TGCG'}} + assert results['NM_001282398.1:c.357_359dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282398.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269327.1'} - def test_variant81(self): - variant = 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' - results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() - print(results) - - assert 'NM_001083585.1:c.*344_*368dup' in list(results.keys()) - assert results['NM_001083585.1:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001083585.1:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001083585.1:c.*344_*368dup']['alt_genomic_loci'], []) - assert results['NM_001083585.1:c.*344_*368dup']['gene_symbol'] == 'RABEP1' - assert results['NM_001083585.1:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001077054.1:p.?', 'slr': 'NP_001077054.1:p.?'} - assert results['NM_001083585.1:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' - assert results['NM_001083585.1:c.*344_*368dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001083585.1:c.*344_*368dup']['hgvs_lrg_variant'] == '' - assert results['NM_001083585.1:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_001083585.1:c.*344_*368dup' - assert results['NM_001083585.1:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert 'hg38' not in list(results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci'].keys()) - assert results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert 'grch38' not in list(results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci'].keys()) - assert results['NM_001083585.1:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001077054.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001083585.1'} + assert 'NM_001017396.2:c.345_347dup' in list(results.keys()) + assert results['NM_001017396.2:c.345_347dup']['submitted_variant'] == 'NC_000002.11:g.95847041_95847043GCG=' + assert results['NM_001017396.2:c.345_347dup']['gene_symbol'] == 'ZNF2' + assert results['NM_001017396.2:c.345_347dup']['gene_ids'] == {'hgnc_id': 'HGNC:12991', 'entrez_gene_id': '7549', 'ucsc_id': 'uc032nuy.2', 'omim_id': ['194500']} + assert results['NM_001017396.2:c.345_347dup']['hgvs_transcript_variant'] == 'NM_001017396.2:c.345_347dup' + assert results['NM_001017396.2:c.345_347dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001017396.2:c.345_347dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001017396.2:c.345_347dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001017396.2:c.345_347dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001017396.1:p.(Arg117dup)', 'slr': 'NP_001017396.1:p.(R117dup)'} + assert results['NM_001017396.2:c.345_347dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001017396.2:c.345_347dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001017396.2:c.345_347dup']['alt_genomic_loci'], []) + assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': 'chr2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': 'chr2', 'pos': '95181295', 'ref': 'T', 'alt': 'TGCG'}} + assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.95847037_95847050=', 'vcf': {'chr': '2', 'pos': '95847037', 'ref': 'GCTTGCGGCGGCGA', 'alt': 'GCTTGCGGCGGCGA'}} + assert results['NM_001017396.2:c.345_347dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.95181299_95181301dup', 'vcf': {'chr': '2', 'pos': '95181295', 'ref': 'T', 'alt': 'TGCG'}} + assert results['NM_001017396.2:c.345_347dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001017396.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001017396.1'} - assert 'NM_004703.5:c.*344_*368dup' in list(results.keys()) - assert results['NM_004703.5:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_004703.5:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004703.5:c.*344_*368dup']['alt_genomic_loci'], []) - assert results['NM_004703.5:c.*344_*368dup']['gene_symbol'] == 'RABEP1' - assert results['NM_004703.5:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004694.2:p.?', 'slr': 'NP_004694.2:p.?'} - assert results['NM_004703.5:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' - assert results['NM_004703.5:c.*344_*368dup']['genome_context_intronic_sequence'] == '' - assert results['NM_004703.5:c.*344_*368dup']['hgvs_lrg_variant'] == '' - assert results['NM_004703.5:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_004703.5:c.*344_*368dup' - assert results['NM_004703.5:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} - assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} - assert results['NM_004703.5:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.5'} + def test_variant81(self): + variant = 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() + print(results) + assert results['flag'] == 'gene_variant' assert 'NM_004703.4:c.*344_*368dup' in list(results.keys()) - assert results['NM_004703.4:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_004703.4:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004703.4:c.*344_*368dup']['alt_genomic_loci'], []) - assert results['NM_004703.4:c.*344_*368dup']['gene_symbol'] == 'RABEP1' - assert results['NM_004703.4:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004694.2:p.?', 'slr': 'NP_004694.2:p.?'} assert results['NM_004703.4:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' - assert results['NM_004703.4:c.*344_*368dup']['genome_context_intronic_sequence'] == '' - assert results['NM_004703.4:c.*344_*368dup']['hgvs_lrg_variant'] == '' + assert results['NM_004703.4:c.*344_*368dup']['gene_symbol'] == 'RABEP1' + assert results['NM_004703.4:c.*344_*368dup']['gene_ids'] == {'hgnc_id': 'HGNC:17677', 'entrez_gene_id': '9135', 'ucsc_id': 'uc032ery.2', 'omim_id': ['603616']} assert results['NM_004703.4:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_004703.4:c.*344_*368dup' + assert results['NM_004703.4:c.*344_*368dup']['genome_context_intronic_sequence'] == '' + assert results['NM_004703.4:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004703.4:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_004703.4:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004694.2:p.?', 'slr': 'NP_004694.2:p.?'} + assert results['NM_004703.4:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004703.4:c.*344_*368dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004703.4:c.*344_*368dup']['alt_genomic_loci'], []) + assert results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'pos': '5286857', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} assert 'hg38' not in list(results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci'].keys()) - assert results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'pos': '5286857', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} assert 'grch38' not in list(results['NM_004703.4:c.*344_*368dup']['primary_assembly_loci'].keys()) - assert results['NM_004703.4:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.4'} + assert results['NM_004703.4:c.*344_*368dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2'} + + assert 'NM_004703.5:c.*344_*368dup' in list(results.keys()) + assert results['NM_004703.5:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' + assert results['NM_004703.5:c.*344_*368dup']['gene_symbol'] == 'RABEP1' + assert results['NM_004703.5:c.*344_*368dup']['gene_ids'] == {'hgnc_id': 'HGNC:17677', 'entrez_gene_id': '9135', 'ucsc_id': 'uc032ery.2', 'omim_id': ['603616']} + assert results['NM_004703.5:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_004703.5:c.*344_*368dup' + assert results['NM_004703.5:c.*344_*368dup']['genome_context_intronic_sequence'] == '' + assert results['NM_004703.5:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004703.5:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_004703.5:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004694.2:p.?', 'slr': 'NP_004694.2:p.?'} + assert results['NM_004703.5:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004703.5:c.*344_*368dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004703.5:c.*344_*368dup']['alt_genomic_loci'], []) + assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'pos': '5286857', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': 'chr17', 'pos': '5383566', 'ref': 'G', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'pos': '5286857', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_004703.5:c.*344_*368dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': '17', 'pos': '5383566', 'ref': 'G', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_004703.5:c.*344_*368dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004703.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004694.2'} - assert results['flag'] == 'gene_variant' assert 'NM_001291581.1:c.*344_*368dup' in list(results.keys()) - assert results['NM_001291581.1:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001291581.1:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001291581.1:c.*344_*368dup']['alt_genomic_loci'], []) - assert results['NM_001291581.1:c.*344_*368dup']['gene_symbol'] == 'RABEP1' - assert results['NM_001291581.1:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278510.1:p.?', 'slr': 'NP_001278510.1:p.?'} assert results['NM_001291581.1:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' - assert results['NM_001291581.1:c.*344_*368dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001291581.1:c.*344_*368dup']['hgvs_lrg_variant'] == '' + assert results['NM_001291581.1:c.*344_*368dup']['gene_symbol'] == 'RABEP1' + assert results['NM_001291581.1:c.*344_*368dup']['gene_ids'] == {'hgnc_id': 'HGNC:17677', 'entrez_gene_id': '9135', 'ucsc_id': 'uc032ery.2', 'omim_id': ['603616']} assert results['NM_001291581.1:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_001291581.1:c.*344_*368dup' + assert results['NM_001291581.1:c.*344_*368dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001291581.1:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001291581.1:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} - assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} - assert results['NM_001291581.1:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278510.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291581.1'} + assert results['NM_001291581.1:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278510.1:p.?', 'slr': 'NP_001278510.1:p.?'} + assert results['NM_001291581.1:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291581.1:c.*344_*368dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001291581.1:c.*344_*368dup']['alt_genomic_loci'], []) + assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'pos': '5286857', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': 'chr17', 'pos': '5383566', 'ref': 'G', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'pos': '5286857', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_001291581.1:c.*344_*368dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': '17', 'pos': '5383566', 'ref': 'G', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_001291581.1:c.*344_*368dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291581.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278510.1'} assert 'NM_001083585.2:c.*344_*368dup' in list(results.keys()) - assert results['NM_001083585.2:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001083585.2:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001083585.2:c.*344_*368dup']['alt_genomic_loci'], []) - assert results['NM_001083585.2:c.*344_*368dup']['gene_symbol'] == 'RABEP1' - assert results['NM_001083585.2:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001077054.1:p.?', 'slr': 'NP_001077054.1:p.?'} assert results['NM_001083585.2:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' - assert results['NM_001083585.2:c.*344_*368dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001083585.2:c.*344_*368dup']['hgvs_lrg_variant'] == '' + assert results['NM_001083585.2:c.*344_*368dup']['gene_symbol'] == 'RABEP1' + assert results['NM_001083585.2:c.*344_*368dup']['gene_ids'] == {'hgnc_id': 'HGNC:17677', 'entrez_gene_id': '9135', 'ucsc_id': 'uc032ery.2', 'omim_id': ['603616']} assert results['NM_001083585.2:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_001083585.2:c.*344_*368dup' + assert results['NM_001083585.2:c.*344_*368dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001083585.2:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001083585.2:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} - assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'pos': '5286857', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} - assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '5383566', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} - assert results['NM_001083585.2:c.*344_*368dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001077054.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001083585.2'} + assert results['NM_001083585.2:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001077054.1:p.?', 'slr': 'NP_001077054.1:p.?'} + assert results['NM_001083585.2:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001083585.2:c.*344_*368dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001083585.2:c.*344_*368dup']['alt_genomic_loci'], []) + assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'pos': '5286857', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': 'chr17', 'pos': '5383566', 'ref': 'G', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'pos': '5286857', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert results['NM_001083585.2:c.*344_*368dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.5383567_5383591dup', 'vcf': {'chr': '17', 'pos': '5383566', 'ref': 'G', 'alt': 'GTAGTGTTTGGAATTTTCTGTTCATA'}} + assert results['NM_001083585.2:c.*344_*368dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001083585.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001077054.1'} + assert 'NM_001083585.1:c.*344_*368dup' in list(results.keys()) + assert results['NM_001083585.1:c.*344_*368dup']['submitted_variant'] == 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG=' + assert results['NM_001083585.1:c.*344_*368dup']['gene_symbol'] == 'RABEP1' + assert results['NM_001083585.1:c.*344_*368dup']['gene_ids'] == {'hgnc_id': 'HGNC:17677', 'entrez_gene_id': '9135', 'ucsc_id': 'uc032ery.2', 'omim_id': ['603616']} + assert results['NM_001083585.1:c.*344_*368dup']['hgvs_transcript_variant'] == 'NM_001083585.1:c.*344_*368dup' + assert results['NM_001083585.1:c.*344_*368dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001083585.1:c.*344_*368dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001083585.1:c.*344_*368dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001083585.1:c.*344_*368dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001077054.1:p.?', 'slr': 'NP_001077054.1:p.?'} + assert results['NM_001083585.1:c.*344_*368dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001083585.1:c.*344_*368dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001083585.1:c.*344_*368dup']['alt_genomic_loci'], []) + assert results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': 'chr17', 'pos': '5286857', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert 'hg38' not in list(results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci'].keys()) + assert results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.5286857_5286915=', 'vcf': {'chr': '17', 'pos': '5286857', 'ref': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA', 'alt': 'CCCAGTAGTGTTTGGAATTTTCTGTTCATATAGTGTTTGGAATTTTCTGTTCATAGATA'}} + assert 'grch38' not in list(results['NM_001083585.1:c.*344_*368dup']['primary_assembly_loci'].keys()) + assert results['NM_001083585.1:c.*344_*368dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001083585.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001077054.1'} def test_variant82(self): variant = 'NC_000003.11:g.14561629_14561630GC=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001080423.3:c.1020del' in list(results.keys()) - assert results['NM_001080423.3:c.1020del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001080423.3:c.1020del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001080423.3:c.1020del']['alt_genomic_loci'], []) - assert results['NM_001080423.3:c.1020del']['gene_symbol'] == 'GRIP2' - assert results['NM_001080423.3:c.1020del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Ser341GlnfsTer4)', 'slr': 'NP_001073892.3:p.(S341Qfs*4)'} - assert results['NM_001080423.3:c.1020del']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630GC=' - assert results['NM_001080423.3:c.1020del']['genome_context_intronic_sequence'] == '' - assert results['NM_001080423.3:c.1020del']['hgvs_lrg_variant'] == '' - assert results['NM_001080423.3:c.1020del']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1020del' - assert results['NM_001080423.3:c.1020del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} - assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '14520119', 'alt': 'A'}} - assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} - assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '14520119', 'alt': 'A'}} - assert results['NM_001080423.3:c.1020del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} - assert results['flag'] == 'gene_variant' assert 'NM_001080423.2:c.1311del' in list(results.keys()) - assert results['NM_001080423.2:c.1311del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001080423.2:c.1311del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001080423.2:c.1311del']['alt_genomic_loci'], []) - assert results['NM_001080423.2:c.1311del']['gene_symbol'] == 'GRIP2' - assert results['NM_001080423.2:c.1311del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Ser438GlnfsTer4)', 'slr': 'NP_001073892.2:p.(S438Qfs*4)'} assert results['NM_001080423.2:c.1311del']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630GC=' - assert results['NM_001080423.2:c.1311del']['genome_context_intronic_sequence'] == '' - assert results['NM_001080423.2:c.1311del']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.2:c.1311del']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.2:c.1311del']['gene_ids'] == {'hgnc_id': 'HGNC:23841', 'entrez_gene_id': '80852', 'ucsc_id': 'uc032rfi.1', 'omim_id': []} assert results['NM_001080423.2:c.1311del']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1311del' + assert results['NM_001080423.2:c.1311del']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1311del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.2:c.1311del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.2:c.1311del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Ser438GlnfsTer4)', 'slr': 'NP_001073892.2:p.(S438Qfs*4)'} + assert results['NM_001080423.2:c.1311del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.2:c.1311del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001080423.2:c.1311del']['alt_genomic_loci'], []) + assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'pos': '14561624', 'ref': 'CTGAGGC', 'alt': 'CTGAGGC'}} assert 'hg38' not in list(results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys()) - assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'pos': '14561624', 'ref': 'CTGAGGC', 'alt': 'CTGAGGC'}} assert 'grch38' not in list(results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys()) - assert results['NM_001080423.2:c.1311del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} + assert results['NM_001080423.2:c.1311del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2'} + assert 'NM_001080423.3:c.1020del' in list(results.keys()) + assert results['NM_001080423.3:c.1020del']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630GC=' + assert results['NM_001080423.3:c.1020del']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.3:c.1020del']['gene_ids'] == {'hgnc_id': 'HGNC:23841', 'entrez_gene_id': '80852', 'ucsc_id': 'uc032rfi.1', 'omim_id': []} + assert results['NM_001080423.3:c.1020del']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1020del' + assert results['NM_001080423.3:c.1020del']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1020del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1020del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.3:c.1020del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Ser341GlnfsTer4)', 'slr': 'NP_001073892.3:p.(S341Qfs*4)'} + assert results['NM_001080423.3:c.1020del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.3:c.1020del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001080423.3:c.1020del']['alt_genomic_loci'], []) + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'pos': '14561624', 'ref': 'CTGAGGC', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': 'chr3', 'pos': '14520119', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'pos': '14561624', 'ref': 'CTGAGGC', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': '3', 'pos': '14520119', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001080423.3:c.1020del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3'} def test_variant83(self): variant = 'NC_000003.11:g.14561629_14561630insG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001080423.3:c.1016_1020=' in list(results.keys()) - assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001080423.3:c.1016_1020=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'], []) - assert results['NM_001080423.3:c.1016_1020=']['gene_symbol'] == 'GRIP2' - assert results['NM_001080423.3:c.1016_1020=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Arg339=)', 'slr': 'NP_001073892.3:p.(R339=)'} - assert results['NM_001080423.3:c.1016_1020=']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630insG' - assert results['NM_001080423.3:c.1016_1020=']['genome_context_intronic_sequence'] == '' - assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_variant'] == '' - assert results['NM_001080423.3:c.1016_1020=']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1016_1020=' - assert results['NM_001080423.3:c.1016_1020=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': 'chr3', 'ref': 'GGGCC', 'pos': '14520120', 'alt': 'GGGCC'}} - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': '3', 'ref': 'GGGCC', 'pos': '14520120', 'alt': 'GGGCC'}} - assert results['NM_001080423.3:c.1016_1020=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} - assert results['flag'] == 'gene_variant' assert 'NM_001080423.2:c.1307_1311=' in list(results.keys()) - assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001080423.2:c.1307_1311=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'], []) - assert results['NM_001080423.2:c.1307_1311=']['gene_symbol'] == 'GRIP2' - assert results['NM_001080423.2:c.1307_1311=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Arg436=)', 'slr': 'NP_001073892.2:p.(R436=)'} assert results['NM_001080423.2:c.1307_1311=']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630insG' - assert results['NM_001080423.2:c.1307_1311=']['genome_context_intronic_sequence'] == '' - assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.2:c.1307_1311=']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.2:c.1307_1311=']['gene_ids'] == {'hgnc_id': 'HGNC:23841', 'entrez_gene_id': '80852', 'ucsc_id': 'uc032rfi.1', 'omim_id': []} assert results['NM_001080423.2:c.1307_1311=']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1307_1311=' + assert results['NM_001080423.2:c.1307_1311=']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1307_1311=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.2:c.1307_1311=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert results['NM_001080423.2:c.1307_1311=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Arg436=)', 'slr': 'NP_001073892.2:p.(R436=)'} + assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'], []) + assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'pos': '14561627', 'ref': 'A', 'alt': 'AG'}} assert 'hg38' not in list(results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys()) - assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'pos': '14561627', 'ref': 'A', 'alt': 'AG'}} assert 'grch38' not in list(results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys()) - assert results['NM_001080423.2:c.1307_1311=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} + assert results['NM_001080423.2:c.1307_1311=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2'} + assert 'NM_001080423.3:c.1016_1020=' in list(results.keys()) + assert results['NM_001080423.3:c.1016_1020=']['submitted_variant'] == 'NC_000003.11:g.14561629_14561630insG' + assert results['NM_001080423.3:c.1016_1020=']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.3:c.1016_1020=']['gene_ids'] == {'hgnc_id': 'HGNC:23841', 'entrez_gene_id': '80852', 'ucsc_id': 'uc032rfi.1', 'omim_id': []} + assert results['NM_001080423.3:c.1016_1020=']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1016_1020=' + assert results['NM_001080423.3:c.1016_1020=']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1016_1020=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Arg339=)', 'slr': 'NP_001073892.3:p.(R339=)'} + assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'], []) + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'pos': '14561627', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': 'chr3', 'pos': '14520120', 'ref': 'GGGCC', 'alt': 'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'pos': '14561627', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': '3', 'pos': '14520120', 'ref': 'GGGCC', 'alt': 'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3'} def test_variant84(self): variant = 'NC_000004.11:g.140811111_140811122del' @@ -2648,80 +2684,82 @@ def test_variant84(self): assert results['flag'] == 'gene_variant' assert 'NM_018717.5:c.1515_1526del' in list(results.keys()) - assert results['NM_018717.5:c.1515_1526del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_018717.5:c.1515_1526del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_018717.5:c.1515_1526del']['alt_genomic_loci'], []) - assert results['NM_018717.5:c.1515_1526del']['gene_symbol'] == 'MAML3' - assert results['NM_018717.5:c.1515_1526del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln507_Gln510del)', 'slr': 'NP_061187.3:p.(Q507_Q510del)'} assert results['NM_018717.5:c.1515_1526del']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122del' - assert results['NM_018717.5:c.1515_1526del']['genome_context_intronic_sequence'] == '' - assert results['NM_018717.5:c.1515_1526del']['hgvs_lrg_variant'] == '' + assert results['NM_018717.5:c.1515_1526del']['gene_symbol'] == 'MAML3' + assert results['NM_018717.5:c.1515_1526del']['gene_ids'] == {'hgnc_id': 'HGNC:16272', 'entrez_gene_id': '55534', 'ucsc_id': 'uc062zte.1', 'omim_id': ['608991']} assert results['NM_018717.5:c.1515_1526del']['hgvs_transcript_variant'] == 'NM_018717.5:c.1515_1526del' + assert results['NM_018717.5:c.1515_1526del']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1515_1526del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.5:c.1515_1526del']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.5:c.1515_1526del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGCTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert results['NM_018717.5:c.1515_1526del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln507_Gln510del)', 'slr': 'NP_061187.3:p.(Q507_Q510del)'} + assert results['NM_018717.5:c.1515_1526del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.5:c.1515_1526del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_018717.5:c.1515_1526del']['alt_genomic_loci'], []) + assert results['NM_018717.5:c.1515_1526del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': 'chr4', 'pos': '140811063', 'ref': 'TTGCTGCTGCTGC', 'alt': 'T'}} assert 'hg38' not in list(results['NM_018717.5:c.1515_1526del']['primary_assembly_loci'].keys()) - assert results['NM_018717.5:c.1515_1526del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': '4', 'ref': 'TTGCTGCTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert results['NM_018717.5:c.1515_1526del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': '4', 'pos': '140811063', 'ref': 'TTGCTGCTGCTGC', 'alt': 'T'}} assert 'grch38' not in list(results['NM_018717.5:c.1515_1526del']['primary_assembly_loci'].keys()) - assert results['NM_018717.5:c.1515_1526del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} + assert results['NM_018717.5:c.1515_1526del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3'} assert 'NM_018717.4:c.1465_1469=' in list(results.keys()) - assert results['NM_018717.4:c.1465_1469=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_018717.4:c.1465_1469=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_018717.4:c.1465_1469=']['alt_genomic_loci'], []) - assert results['NM_018717.4:c.1465_1469=']['gene_symbol'] == 'MAML3' - assert results['NM_018717.4:c.1465_1469=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln489=)', 'slr': 'NP_061187.2:p.(Q489=)'} assert results['NM_018717.4:c.1465_1469=']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122del' - assert results['NM_018717.4:c.1465_1469=']['genome_context_intronic_sequence'] == '' - assert results['NM_018717.4:c.1465_1469=']['hgvs_lrg_variant'] == '' + assert results['NM_018717.4:c.1465_1469=']['gene_symbol'] == 'MAML3' + assert results['NM_018717.4:c.1465_1469=']['gene_ids'] == {'hgnc_id': 'HGNC:16272', 'entrez_gene_id': '55534', 'ucsc_id': 'uc062zte.1', 'omim_id': ['608991']} assert results['NM_018717.4:c.1465_1469=']['hgvs_transcript_variant'] == 'NM_018717.4:c.1465_1469=' + assert results['NM_018717.4:c.1465_1469=']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1465_1469=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.4:c.1465_1469=']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGCTGCTGC', 'pos': '140811063', 'alt': 'T'}} - assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889968del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGCTGCTGC', 'pos': '139889909', 'alt': 'T'}} - assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': '4', 'ref': 'TTGCTGCTGCTGC', 'pos': '140811063', 'alt': 'T'}} - assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889968del', 'vcf': {'chr': '4', 'ref': 'TTGCTGCTGCTGC', 'pos': '139889909', 'alt': 'T'}} - assert results['NM_018717.4:c.1465_1469=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4'} - + assert results['NM_018717.4:c.1465_1469=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln489=)', 'slr': 'NP_061187.2:p.(Q489=)'} + assert results['NM_018717.4:c.1465_1469=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.4:c.1465_1469=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_018717.4:c.1465_1469=']['alt_genomic_loci'], []) + assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': 'chr4', 'pos': '140811063', 'ref': 'TTGCTGCTGCTGC', 'alt': 'T'}} + assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889968del', 'vcf': {'chr': 'chr4', 'pos': '139889909', 'ref': 'TTGCTGCTGCTGC', 'alt': 'T'}} + assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122del', 'vcf': {'chr': '4', 'pos': '140811063', 'ref': 'TTGCTGCTGCTGC', 'alt': 'T'}} + assert results['NM_018717.4:c.1465_1469=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889968del', 'vcf': {'chr': '4', 'pos': '139889909', 'ref': 'TTGCTGCTGCTGC', 'alt': 'T'}} + assert results['NM_018717.4:c.1465_1469=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2'} def test_variant85(self): variant = 'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_018717.5:c.1468_1479=' in list(results.keys()) - assert results['NM_018717.5:c.1468_1479=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_018717.5:c.1468_1479=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_018717.5:c.1468_1479=']['alt_genomic_loci'], []) - assert results['NM_018717.5:c.1468_1479=']['gene_symbol'] == 'MAML3' - assert results['NM_018717.5:c.1468_1479=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln490=)', 'slr': 'NP_061187.3:p.(Q490=)'} assert results['NM_018717.5:c.1468_1479=']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' - assert results['NM_018717.5:c.1468_1479=']['genome_context_intronic_sequence'] == '' - assert results['NM_018717.5:c.1468_1479=']['hgvs_lrg_variant'] == '' + assert results['NM_018717.5:c.1468_1479=']['gene_symbol'] == 'MAML3' + assert results['NM_018717.5:c.1468_1479=']['gene_ids'] == {'hgnc_id': 'HGNC:16272', 'entrez_gene_id': '55534', 'ucsc_id': 'uc062zte.1', 'omim_id': ['608991']} assert results['NM_018717.5:c.1468_1479=']['hgvs_transcript_variant'] == 'NM_018717.5:c.1468_1479=' + assert results['NM_018717.5:c.1468_1479=']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1468_1479=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.5:c.1468_1479=']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.5:c.1468_1479=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122=', 'vcf': {'chr': 'chr4', 'ref': 'CTGCTGCTGCTG', 'pos': '140811111', 'alt': 'CTGCTGCTGCTG'}} + assert results['NM_018717.5:c.1468_1479=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln490=)', 'slr': 'NP_061187.3:p.(Q490=)'} + assert results['NM_018717.5:c.1468_1479=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.5:c.1468_1479=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_018717.5:c.1468_1479=']['alt_genomic_loci'], []) + assert results['NM_018717.5:c.1468_1479=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122=', 'vcf': {'chr': 'chr4', 'pos': '140811111', 'ref': 'CTGCTGCTGCTG', 'alt': 'CTGCTGCTGCTG'}} assert 'hg38' not in list(results['NM_018717.5:c.1468_1479=']['primary_assembly_loci'].keys()) - assert results['NM_018717.5:c.1468_1479=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122=', 'vcf': {'chr': '4', 'ref': 'CTGCTGCTGCTG', 'pos': '140811111', 'alt': 'CTGCTGCTGCTG'}} + assert results['NM_018717.5:c.1468_1479=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811122=', 'vcf': {'chr': '4', 'pos': '140811111', 'ref': 'CTGCTGCTGCTG', 'alt': 'CTGCTGCTGCTG'}} assert 'grch38' not in list(results['NM_018717.5:c.1468_1479=']['primary_assembly_loci'].keys()) - assert results['NM_018717.5:c.1468_1479=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} + assert results['NM_018717.5:c.1468_1479=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3'} - assert results['flag'] == 'gene_variant' assert 'NM_018717.4:c.1503_1514dup' in list(results.keys()) - assert results['NM_018717.4:c.1503_1514dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_018717.4:c.1503_1514dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_018717.4:c.1503_1514dup']['alt_genomic_loci'], []) - assert results['NM_018717.4:c.1503_1514dup']['gene_symbol'] == 'MAML3' - assert results['NM_018717.4:c.1503_1514dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln503_Gln506dup)', 'slr': 'NP_061187.2:p.(Q503_Q506dup)'} assert results['NM_018717.4:c.1503_1514dup']['submitted_variant'] == 'NC_000004.11:g.140811111_140811122CTGCTGCTGCTG=' - assert results['NM_018717.4:c.1503_1514dup']['genome_context_intronic_sequence'] == '' - assert results['NM_018717.4:c.1503_1514dup']['hgvs_lrg_variant'] == '' + assert results['NM_018717.4:c.1503_1514dup']['gene_symbol'] == 'MAML3' + assert results['NM_018717.4:c.1503_1514dup']['gene_ids'] == {'hgnc_id': 'HGNC:16272', 'entrez_gene_id': '55534', 'ucsc_id': 'uc062zte.1', 'omim_id': ['608991']} assert results['NM_018717.4:c.1503_1514dup']['hgvs_transcript_variant'] == 'NM_018717.4:c.1503_1514dup' + assert results['NM_018717.4:c.1503_1514dup']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1503_1514dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.4:c.1503_1514dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811095_140811128=', 'vcf': {'chr': 'chr4', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'pos': '140811095', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} - assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889941_139889974=', 'vcf': {'chr': 'chr4', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'pos': '139889941', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} - assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811095_140811128=', 'vcf': {'chr': '4', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'pos': '140811095', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} - assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889941_139889974=', 'vcf': {'chr': '4', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'pos': '139889941', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} - assert results['NM_018717.4:c.1503_1514dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4'} - + assert results['NM_018717.4:c.1503_1514dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln503_Gln506dup)', 'slr': 'NP_061187.2:p.(Q503_Q506dup)'} + assert results['NM_018717.4:c.1503_1514dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.4:c.1503_1514dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_018717.4:c.1503_1514dup']['alt_genomic_loci'], []) + assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811095_140811128=', 'vcf': {'chr': 'chr4', 'pos': '140811095', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} + assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889941_139889974=', 'vcf': {'chr': 'chr4', 'pos': '139889941', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} + assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811095_140811128=', 'vcf': {'chr': '4', 'pos': '140811095', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} + assert results['NM_018717.4:c.1503_1514dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889941_139889974=', 'vcf': {'chr': '4', 'pos': '139889941', 'ref': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG', 'alt': 'GCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTG'}} + assert results['NM_018717.4:c.1503_1514dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2'} def test_variant86(self): variant = 'NC_000004.11:g.140811117_140811122del' @@ -2730,80 +2768,82 @@ def test_variant86(self): assert results['flag'] == 'gene_variant' assert 'NM_018717.5:c.1521_1526del' in list(results.keys()) - assert results['NM_018717.5:c.1521_1526del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_018717.5:c.1521_1526del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_018717.5:c.1521_1526del']['alt_genomic_loci'], []) - assert results['NM_018717.5:c.1521_1526del']['gene_symbol'] == 'MAML3' - assert results['NM_018717.5:c.1521_1526del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln509_Gln510del)', 'slr': 'NP_061187.3:p.(Q509_Q510del)'} assert results['NM_018717.5:c.1521_1526del']['submitted_variant'] == 'NC_000004.11:g.140811117_140811122del' - assert results['NM_018717.5:c.1521_1526del']['genome_context_intronic_sequence'] == '' - assert results['NM_018717.5:c.1521_1526del']['hgvs_lrg_variant'] == '' + assert results['NM_018717.5:c.1521_1526del']['gene_symbol'] == 'MAML3' + assert results['NM_018717.5:c.1521_1526del']['gene_ids'] == {'hgnc_id': 'HGNC:16272', 'entrez_gene_id': '55534', 'ucsc_id': 'uc062zte.1', 'omim_id': ['608991']} assert results['NM_018717.5:c.1521_1526del']['hgvs_transcript_variant'] == 'NM_018717.5:c.1521_1526del' + assert results['NM_018717.5:c.1521_1526del']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1521_1526del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.5:c.1521_1526del']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.5:c.1521_1526del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert results['NM_018717.5:c.1521_1526del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln509_Gln510del)', 'slr': 'NP_061187.3:p.(Q509_Q510del)'} + assert results['NM_018717.5:c.1521_1526del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.5:c.1521_1526del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_018717.5:c.1521_1526del']['alt_genomic_loci'], []) + assert results['NM_018717.5:c.1521_1526del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': 'chr4', 'pos': '140811063', 'ref': 'TTGCTGC', 'alt': 'T'}} assert 'hg38' not in list(results['NM_018717.5:c.1521_1526del']['primary_assembly_loci'].keys()) - assert results['NM_018717.5:c.1521_1526del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': '4', 'ref': 'TTGCTGC', 'pos': '140811063', 'alt': 'T'}} + assert results['NM_018717.5:c.1521_1526del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': '4', 'pos': '140811063', 'ref': 'TTGCTGC', 'alt': 'T'}} assert 'grch38' not in list(results['NM_018717.5:c.1521_1526del']['primary_assembly_loci'].keys()) - assert results['NM_018717.5:c.1521_1526del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} + assert results['NM_018717.5:c.1521_1526del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3'} assert 'NM_018717.4:c.1509_1514dup' in list(results.keys()) - assert results['NM_018717.4:c.1509_1514dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_018717.4:c.1509_1514dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_018717.4:c.1509_1514dup']['alt_genomic_loci'], []) - assert results['NM_018717.4:c.1509_1514dup']['gene_symbol'] == 'MAML3' - assert results['NM_018717.4:c.1509_1514dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln505_Gln506dup)', 'slr': 'NP_061187.2:p.(Q505_Q506dup)'} assert results['NM_018717.4:c.1509_1514dup']['submitted_variant'] == 'NC_000004.11:g.140811117_140811122del' - assert results['NM_018717.4:c.1509_1514dup']['genome_context_intronic_sequence'] == '' - assert results['NM_018717.4:c.1509_1514dup']['hgvs_lrg_variant'] == '' + assert results['NM_018717.4:c.1509_1514dup']['gene_symbol'] == 'MAML3' + assert results['NM_018717.4:c.1509_1514dup']['gene_ids'] == {'hgnc_id': 'HGNC:16272', 'entrez_gene_id': '55534', 'ucsc_id': 'uc062zte.1', 'omim_id': ['608991']} assert results['NM_018717.4:c.1509_1514dup']['hgvs_transcript_variant'] == 'NM_018717.4:c.1509_1514dup' + assert results['NM_018717.4:c.1509_1514dup']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1509_1514dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.4:c.1509_1514dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGC', 'pos': '140811063', 'alt': 'T'}} - assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963_139889968del', 'vcf': {'chr': 'chr4', 'ref': 'TTGCTGC', 'pos': '139889909', 'alt': 'T'}} - assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': '4', 'ref': 'TTGCTGC', 'pos': '140811063', 'alt': 'T'}} - assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963_139889968del', 'vcf': {'chr': '4', 'ref': 'TTGCTGC', 'pos': '139889909', 'alt': 'T'}} - assert results['NM_018717.4:c.1509_1514dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4'} - + assert results['NM_018717.4:c.1509_1514dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln505_Gln506dup)', 'slr': 'NP_061187.2:p.(Q505_Q506dup)'} + assert results['NM_018717.4:c.1509_1514dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.4:c.1509_1514dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_018717.4:c.1509_1514dup']['alt_genomic_loci'], []) + assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': 'chr4', 'pos': '140811063', 'ref': 'TTGCTGC', 'alt': 'T'}} + assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963_139889968del', 'vcf': {'chr': 'chr4', 'pos': '139889909', 'ref': 'TTGCTGC', 'alt': 'T'}} + assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117_140811122del', 'vcf': {'chr': '4', 'pos': '140811063', 'ref': 'TTGCTGC', 'alt': 'T'}} + assert results['NM_018717.4:c.1509_1514dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963_139889968del', 'vcf': {'chr': '4', 'pos': '139889909', 'ref': 'TTGCTGC', 'alt': 'T'}} + assert results['NM_018717.4:c.1509_1514dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2'} def test_variant87(self): variant = 'NC_000004.11:g.140811111_140811117del' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_018717.5:c.1473_1479del' in list(results.keys()) - assert results['NM_018717.5:c.1473_1479del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_018717.5:c.1473_1479del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_018717.5:c.1473_1479del']['alt_genomic_loci'], []) - assert results['NM_018717.5:c.1473_1479del']['gene_symbol'] == 'MAML3' - assert results['NM_018717.5:c.1473_1479del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln491HisfsTer29)', 'slr': 'NP_061187.3:p.(Q491Hfs*29)'} assert results['NM_018717.5:c.1473_1479del']['submitted_variant'] == 'NC_000004.11:g.140811111_140811117del' - assert results['NM_018717.5:c.1473_1479del']['genome_context_intronic_sequence'] == '' - assert results['NM_018717.5:c.1473_1479del']['hgvs_lrg_variant'] == '' + assert results['NM_018717.5:c.1473_1479del']['gene_symbol'] == 'MAML3' + assert results['NM_018717.5:c.1473_1479del']['gene_ids'] == {'hgnc_id': 'HGNC:16272', 'entrez_gene_id': '55534', 'ucsc_id': 'uc062zte.1', 'omim_id': ['608991']} assert results['NM_018717.5:c.1473_1479del']['hgvs_transcript_variant'] == 'NM_018717.5:c.1473_1479del' + assert results['NM_018717.5:c.1473_1479del']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1473_1479del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.5:c.1473_1479del']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.5:c.1473_1479del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': 'chr4', 'ref': 'GCTGCTGC', 'pos': '140811110', 'alt': 'G'}} + assert results['NM_018717.5:c.1473_1479del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln491HisfsTer29)', 'slr': 'NP_061187.3:p.(Q491Hfs*29)'} + assert results['NM_018717.5:c.1473_1479del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.5:c.1473_1479del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_018717.5:c.1473_1479del']['alt_genomic_loci'], []) + assert results['NM_018717.5:c.1473_1479del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': 'chr4', 'pos': '140811110', 'ref': 'GCTGCTGC', 'alt': 'G'}} assert 'hg38' not in list(results['NM_018717.5:c.1473_1479del']['primary_assembly_loci'].keys()) - assert results['NM_018717.5:c.1473_1479del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': '4', 'ref': 'GCTGCTGC', 'pos': '140811110', 'alt': 'G'}} + assert results['NM_018717.5:c.1473_1479del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': '4', 'pos': '140811110', 'ref': 'GCTGCTGC', 'alt': 'G'}} assert 'grch38' not in list(results['NM_018717.5:c.1473_1479del']['primary_assembly_loci'].keys()) - assert results['NM_018717.5:c.1473_1479del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} + assert results['NM_018717.5:c.1473_1479del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3'} - assert results['flag'] == 'gene_variant' assert 'NM_018717.4:c.1468_1472dup' in list(results.keys()) - assert results['NM_018717.4:c.1468_1472dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_018717.4:c.1468_1472dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_018717.4:c.1468_1472dup']['alt_genomic_loci'], []) - assert results['NM_018717.4:c.1468_1472dup']['gene_symbol'] == 'MAML3' - assert results['NM_018717.4:c.1468_1472dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln491HisfsTer29)', 'slr': 'NP_061187.2:p.(Q491Hfs*29)'} assert results['NM_018717.4:c.1468_1472dup']['submitted_variant'] == 'NC_000004.11:g.140811111_140811117del' - assert results['NM_018717.4:c.1468_1472dup']['genome_context_intronic_sequence'] == '' - assert results['NM_018717.4:c.1468_1472dup']['hgvs_lrg_variant'] == '' + assert results['NM_018717.4:c.1468_1472dup']['gene_symbol'] == 'MAML3' + assert results['NM_018717.4:c.1468_1472dup']['gene_ids'] == {'hgnc_id': 'HGNC:16272', 'entrez_gene_id': '55534', 'ucsc_id': 'uc062zte.1', 'omim_id': ['608991']} assert results['NM_018717.4:c.1468_1472dup']['hgvs_transcript_variant'] == 'NM_018717.4:c.1468_1472dup' + assert results['NM_018717.4:c.1468_1472dup']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1468_1472dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.4:c.1468_1472dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': 'chr4', 'ref': 'GCTGCTGC', 'pos': '140811110', 'alt': 'G'}} - assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889963del', 'vcf': {'chr': 'chr4', 'ref': 'GCTGCTGC', 'pos': '139889956', 'alt': 'G'}} - assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': '4', 'ref': 'GCTGCTGC', 'pos': '140811110', 'alt': 'G'}} - assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889963del', 'vcf': {'chr': '4', 'ref': 'GCTGCTGC', 'pos': '139889956', 'alt': 'G'}} - assert results['NM_018717.4:c.1468_1472dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4'} - + assert results['NM_018717.4:c.1468_1472dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln491HisfsTer29)', 'slr': 'NP_061187.2:p.(Q491Hfs*29)'} + assert results['NM_018717.4:c.1468_1472dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.4:c.1468_1472dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_018717.4:c.1468_1472dup']['alt_genomic_loci'], []) + assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': 'chr4', 'pos': '140811110', 'ref': 'GCTGCTGC', 'alt': 'G'}} + assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889963del', 'vcf': {'chr': 'chr4', 'pos': '139889956', 'ref': 'GCTGCTGC', 'alt': 'G'}} + assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811111_140811117del', 'vcf': {'chr': '4', 'pos': '140811110', 'ref': 'GCTGCTGC', 'alt': 'G'}} + assert results['NM_018717.4:c.1468_1472dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889957_139889963del', 'vcf': {'chr': '4', 'pos': '139889956', 'ref': 'GCTGCTGC', 'alt': 'G'}} + assert results['NM_018717.4:c.1468_1472dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2'} def test_variant88(self): variant = 'NC_000004.11:g.140811117C>A' @@ -2811,64 +2851,65 @@ def test_variant88(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' in list(results.keys()) - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['alt_genomic_loci'], []) - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['gene_symbol'] == 'MAML3' - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln490_Gln491insHisGlnGlnGln)', 'slr': 'NP_061187.2:p.(Q490_Q491insHQQQ)'} - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['submitted_variant'] == 'NC_000004.11:g.140811117C>A' - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['genome_context_intronic_sequence'] == '' - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_lrg_variant'] == '' - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_transcript_variant'] == 'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '140811117', 'alt': 'A'}} - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963C>A', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '139889963', 'alt': 'A'}} - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '140811117', 'alt': 'A'}} - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963C>A', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '139889963', 'alt': 'A'}} - assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4'} - assert 'NM_018717.5:c.1473G>T' in list(results.keys()) - assert results['NM_018717.5:c.1473G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_018717.5:c.1473G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_018717.5:c.1473G>T']['alt_genomic_loci'], []) - assert results['NM_018717.5:c.1473G>T']['gene_symbol'] == 'MAML3' - assert results['NM_018717.5:c.1473G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln491His)', 'slr': 'NP_061187.3:p.(Q491H)'} assert results['NM_018717.5:c.1473G>T']['submitted_variant'] == 'NC_000004.11:g.140811117C>A' - assert results['NM_018717.5:c.1473G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_018717.5:c.1473G>T']['hgvs_lrg_variant'] == '' + assert results['NM_018717.5:c.1473G>T']['gene_symbol'] == 'MAML3' + assert results['NM_018717.5:c.1473G>T']['gene_ids'] == {'hgnc_id': 'HGNC:16272', 'entrez_gene_id': '55534', 'ucsc_id': 'uc062zte.1', 'omim_id': ['608991']} assert results['NM_018717.5:c.1473G>T']['hgvs_transcript_variant'] == 'NM_018717.5:c.1473G>T' + assert results['NM_018717.5:c.1473G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.5:c.1473G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_018717.5:c.1473G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_018717.5:c.1473G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '140811117', 'alt': 'A'}} + assert results['NM_018717.5:c.1473G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.3:p.(Gln491His)', 'slr': 'NP_061187.3:p.(Q491H)'} + assert results['NM_018717.5:c.1473G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.5:c.1473G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_018717.5:c.1473G>T']['alt_genomic_loci'], []) + assert results['NM_018717.5:c.1473G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': 'chr4', 'pos': '140811117', 'ref': 'C', 'alt': 'A'}} assert 'hg38' not in list(results['NM_018717.5:c.1473G>T']['primary_assembly_loci'].keys()) - assert results['NM_018717.5:c.1473G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '140811117', 'alt': 'A'}} + assert results['NM_018717.5:c.1473G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': '4', 'pos': '140811117', 'ref': 'C', 'alt': 'A'}} assert 'grch38' not in list(results['NM_018717.5:c.1473G>T']['primary_assembly_loci'].keys()) - assert results['NM_018717.5:c.1473G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5'} + assert results['NM_018717.5:c.1473G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.3'} + assert 'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' in list(results.keys()) + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['submitted_variant'] == 'NC_000004.11:g.140811117C>A' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['gene_symbol'] == 'MAML3' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['gene_ids'] == {'hgnc_id': 'HGNC:16272', 'entrez_gene_id': '55534', 'ucsc_id': 'uc062zte.1', 'omim_id': ['608991']} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_transcript_variant'] == 'NM_018717.4:c.1472_1473insTCAGCAGCAGCA' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['genome_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_refseqgene_variant'] == '' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061187.2:p.(Gln490_Gln491insHisGlnGlnGln)', 'slr': 'NP_061187.2:p.(Q490_Q491insHQQQ)'} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['alt_genomic_loci'], []) + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': 'chr4', 'pos': '140811117', 'ref': 'C', 'alt': 'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963C>A', 'vcf': {'chr': 'chr4', 'pos': '139889963', 'ref': 'C', 'alt': 'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.140811117C>A', 'vcf': {'chr': '4', 'pos': '140811117', 'ref': 'C', 'alt': 'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.139889963C>A', 'vcf': {'chr': '4', 'pos': '139889963', 'ref': 'C', 'alt': 'A'}} + assert results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_018717.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061187.2'} def test_variant89(self): variant = 'NC_000002.11:g.73675227_73675228insCTC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_015120.4:c.1573_1579=' in list(results.keys()) - assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1573_1579=' - assert results['NM_015120.4:c.1573_1579=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'], []) - assert results['NM_015120.4:c.1573_1579=']['gene_symbol'] == 'ALMS1' - assert results['NM_015120.4:c.1573_1579=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Ser525=)', 'slr': 'NP_055935.4:p.(S525=)'} assert results['NM_015120.4:c.1573_1579=']['submitted_variant'] == 'NC_000002.11:g.73675227_73675228insCTC' - assert results['NM_015120.4:c.1573_1579=']['genome_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_variant'] == 'LRG_741:g.67345_67351=' + assert results['NM_015120.4:c.1573_1579=']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.1573_1579=']['gene_ids'] == {'hgnc_id': 'HGNC:428', 'entrez_gene_id': '7840', 'ucsc_id': 'uc032nrd.1', 'omim_id': ['606844']} assert results['NM_015120.4:c.1573_1579=']['hgvs_transcript_variant'] == 'NM_015120.4:c.1573_1579=' + assert results['NM_015120.4:c.1573_1579=']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1573_1579=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.1573_1579=']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.67345_67351=' - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': 'chr2', 'ref': 'TCTCCTC', 'pos': '73448097', 'alt': 'TCTCCTC'}} - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': '2', 'ref': 'TCTCCTC', 'pos': '73448097', 'alt': 'TCTCCTC'}} - assert results['NM_015120.4:c.1573_1579=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_015120.4:c.1573_1579=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Ser525=)', 'slr': 'NP_055935.4:p.(S525=)'} + assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1573_1579=' + assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_variant'] == 'LRG_741:g.67345_67351=' + self.assertCountEqual(results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'], []) + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': 'chr2', 'pos': '73675227', 'ref': 'T', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': 'chr2', 'pos': '73448097', 'ref': 'TCTCCTC', 'alt': 'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': '2', 'pos': '73675227', 'ref': 'T', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': '2', 'pos': '73448097', 'ref': 'TCTCCTC', 'alt': 'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} def test_variant90(self): variant = '9-136132908-T-TC' @@ -2877,22 +2918,22 @@ def test_variant90(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.260_262=' in list(results.keys()) - assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020469.2:c.260_262=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020469.2:c.260_262=']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}]) - assert results['NM_020469.2:c.260_262=']['gene_symbol'] == 'ABO' - assert results['NM_020469.2:c.260_262=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87=)', 'slr': 'NP_065202.2:p.(V87=)'} assert results['NM_020469.2:c.260_262=']['submitted_variant'] == '9-136132908-T-TC' - assert results['NM_020469.2:c.260_262=']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == '' + assert results['NM_020469.2:c.260_262=']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.260_262=']['gene_ids'] == {'hgnc_id': 'HGNC:79', 'entrez_gene_id': '28', 'ucsc_id': 'uc064wua.1', 'omim_id': ['110300']} assert results['NM_020469.2:c.260_262=']['hgvs_transcript_variant'] == 'NM_020469.2:c.260_262=' + assert results['NM_020469.2:c.260_262=']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.260_262=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.260_262=']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20145_20147=' - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} - assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} - + assert results['NM_020469.2:c.260_262=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87=)', 'slr': 'NP_065202.2:p.(V87=)'} + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020469.2:c.260_262=']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'pos': '83614', 'ref': 'TCA', 'alt': 'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'pos': '83614', 'ref': 'TCA', 'alt': 'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'pos': '83614', 'ref': 'TCA', 'alt': 'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'pos': '83614', 'ref': 'TCA', 'alt': 'TCA'}}}]) + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'pos': '136132908', 'ref': 'T', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'pos': '133257521', 'ref': 'T', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'pos': '136132908', 'ref': 'T', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'pos': '133257521', 'ref': 'T', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1'} def test_variant91(self): variant = '9-136132908-TAC-TCA' @@ -2901,46 +2942,46 @@ def test_variant91(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.259del' in list(results.keys()) - assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020469.2:c.259del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020469.2:c.259del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}]) - assert results['NM_020469.2:c.259del']['gene_symbol'] == 'ABO' - assert results['NM_020469.2:c.259del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87Ter)', 'slr': 'NP_065202.2:p.(V87*)'} assert results['NM_020469.2:c.259del']['submitted_variant'] == '9-136132908-TAC-TCA' - assert results['NM_020469.2:c.259del']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == '' + assert results['NM_020469.2:c.259del']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.259del']['gene_ids'] == {'hgnc_id': 'HGNC:79', 'entrez_gene_id': '28', 'ucsc_id': 'uc064wua.1', 'omim_id': ['110300']} assert results['NM_020469.2:c.259del']['hgvs_transcript_variant'] == 'NM_020469.2:c.259del' + assert results['NM_020469.2:c.259del']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.259del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.259del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20144del' - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} - assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} - + assert results['NM_020469.2:c.259del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87Ter)', 'slr': 'NP_065202.2:p.(V87*)'} + assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020469.2:c.259del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'pos': '83616', 'ref': 'AC', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'pos': '83616', 'ref': 'AC', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'pos': '83616', 'ref': 'AC', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'pos': '83616', 'ref': 'AC', 'alt': 'A'}}}]) + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'pos': '136132909', 'ref': 'AC', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'pos': '133257522', 'ref': 'AC', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'pos': '136132909', 'ref': 'AC', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'pos': '133257522', 'ref': 'AC', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1'} def test_variant92(self): variant = '9-136132908-TA-TA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.261del' in list(results.keys()) - assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020469.2:c.261del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020469.2:c.261del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}]) - assert results['NM_020469.2:c.261del']['gene_symbol'] == 'ABO' - assert results['NM_020469.2:c.261del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)', 'slr': 'NP_065202.2:p.(T88Pfs*31)'} assert results['NM_020469.2:c.261del']['submitted_variant'] == '9-136132908-TA-TA' - assert results['NM_020469.2:c.261del']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261del']['hgvs_lrg_variant'] == '' + assert results['NM_020469.2:c.261del']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.261del']['gene_ids'] == {'hgnc_id': 'HGNC:79', 'entrez_gene_id': '28', 'ucsc_id': 'uc064wua.1', 'omim_id': ['110300']} assert results['NM_020469.2:c.261del']['hgvs_transcript_variant'] == 'NM_020469.2:c.261del' + assert results['NM_020469.2:c.261del']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.261del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146del' - assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} - assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} - assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} - assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} - assert results['NM_020469.2:c.261del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} - - assert results['flag'] == 'gene_variant' + assert results['NM_020469.2:c.261del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)', 'slr': 'NP_065202.2:p.(T88Pfs*31)'} + assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020469.2:c.261del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020469.2:c.261del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'pos': '83614', 'ref': 'TC', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'pos': '83614', 'ref': 'TC', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'pos': '83614', 'ref': 'TC', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'pos': '83614', 'ref': 'TC', 'alt': 'T'}}}]) + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': 'chr9', 'pos': '136132908', 'ref': 'TA', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': 'chr9', 'pos': '133257521', 'ref': 'TA', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': '9', 'pos': '136132908', 'ref': 'TA', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': '9', 'pos': '133257521', 'ref': 'TA', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1'} def test_variant93(self): variant = 'NM_020469.2:c.258delG' @@ -2949,22 +2990,22 @@ def test_variant93(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.259del' in list(results.keys()) - assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020469.2:c.259del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020469.2:c.259del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'AC', 'pos': '83616', 'alt': 'A'}}}]) - assert results['NM_020469.2:c.259del']['gene_symbol'] == 'ABO' - assert results['NM_020469.2:c.259del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87Ter)', 'slr': 'NP_065202.2:p.(V87*)'} assert results['NM_020469.2:c.259del']['submitted_variant'] == 'NM_020469.2:c.258delG' - assert results['NM_020469.2:c.259del']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == '' + assert results['NM_020469.2:c.259del']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.259del']['gene_ids'] == {'hgnc_id': 'HGNC:79', 'entrez_gene_id': '28', 'ucsc_id': 'uc064wua.1', 'omim_id': ['110300']} assert results['NM_020469.2:c.259del']['hgvs_transcript_variant'] == 'NM_020469.2:c.259del' + assert results['NM_020469.2:c.259del']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.259del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.259del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20144del' - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '136132909', 'alt': 'CA'}} - assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'ref': 'AC', 'pos': '133257522', 'alt': 'CA'}} - assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} - + assert results['NM_020469.2:c.259del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87Ter)', 'slr': 'NP_065202.2:p.(V87*)'} + assert results['NM_020469.2:c.259del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020469.2:c.259del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020469.2:c.259del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'HG79_PATCH', 'pos': '83616', 'ref': 'AC', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83618del', 'vcf': {'chr': 'NW_003315925.1', 'pos': '83616', 'ref': 'AC', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'HG2030_PATCH', 'pos': '83616', 'ref': 'AC', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83618del', 'vcf': {'chr': 'NW_009646201.1', 'pos': '83616', 'ref': 'AC', 'alt': 'A'}}}]) + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': 'chr9', 'pos': '136132909', 'ref': 'AC', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': 'chr9', 'pos': '133257522', 'ref': 'AC', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910delinsCA', 'vcf': {'chr': '9', 'pos': '136132909', 'ref': 'AC', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523delinsCA', 'vcf': {'chr': '9', 'pos': '133257522', 'ref': 'AC', 'alt': 'CA'}} + assert results['NM_020469.2:c.259del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1'} def test_variant94(self): variant = 'NM_020469.2:c.260_262TGA=' @@ -2973,46 +3014,46 @@ def test_variant94(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.260_262=' in list(results.keys()) - assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020469.2:c.260_262=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020469.2:c.260_262=']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TCA', 'pos': '83614', 'alt': 'TCA'}}}]) - assert results['NM_020469.2:c.260_262=']['gene_symbol'] == 'ABO' - assert results['NM_020469.2:c.260_262=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87=)', 'slr': 'NP_065202.2:p.(V87=)'} assert results['NM_020469.2:c.260_262=']['submitted_variant'] == 'NM_020469.2:c.260_262TGA=' - assert results['NM_020469.2:c.260_262=']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == '' + assert results['NM_020469.2:c.260_262=']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.260_262=']['gene_ids'] == {'hgnc_id': 'HGNC:79', 'entrez_gene_id': '28', 'ucsc_id': 'uc064wua.1', 'omim_id': ['110300']} assert results['NM_020469.2:c.260_262=']['hgvs_transcript_variant'] == 'NM_020469.2:c.260_262=' + assert results['NM_020469.2:c.260_262=']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.260_262=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.260_262=']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20145_20147=' - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TC'}} - assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TC'}} - assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} - + assert results['NM_020469.2:c.260_262=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Val87=)', 'slr': 'NP_065202.2:p.(V87=)'} + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020469.2:c.260_262=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020469.2:c.260_262=']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'HG79_PATCH', 'pos': '83614', 'ref': 'TCA', 'alt': 'TCA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83616=', 'vcf': {'chr': 'NW_003315925.1', 'pos': '83614', 'ref': 'TCA', 'alt': 'TCA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'HG2030_PATCH', 'pos': '83614', 'ref': 'TCA', 'alt': 'TCA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83616=', 'vcf': {'chr': 'NW_009646201.1', 'pos': '83614', 'ref': 'TCA', 'alt': 'TCA'}}}]) + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': 'chr9', 'pos': '136132908', 'ref': 'T', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': 'chr9', 'pos': '133257521', 'ref': 'T', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insC', 'vcf': {'chr': '9', 'pos': '136132908', 'ref': 'T', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insC', 'vcf': {'chr': '9', 'pos': '133257521', 'ref': 'T', 'alt': 'TC'}} + assert results['NM_020469.2:c.260_262=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1'} def test_variant95(self): variant = 'NM_020469.2:c.261delG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.261del' in list(results.keys()) - assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020469.2:c.261del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020469.2:c.261del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'TC', 'pos': '83614', 'alt': 'T'}}}]) - assert results['NM_020469.2:c.261del']['gene_symbol'] == 'ABO' - assert results['NM_020469.2:c.261del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)', 'slr': 'NP_065202.2:p.(T88Pfs*31)'} assert results['NM_020469.2:c.261del']['submitted_variant'] == 'NM_020469.2:c.261delG' - assert results['NM_020469.2:c.261del']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261del']['hgvs_lrg_variant'] == '' + assert results['NM_020469.2:c.261del']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.261del']['gene_ids'] == {'hgnc_id': 'HGNC:79', 'entrez_gene_id': '28', 'ucsc_id': 'uc064wua.1', 'omim_id': ['110300']} assert results['NM_020469.2:c.261del']['hgvs_transcript_variant'] == 'NM_020469.2:c.261del' + assert results['NM_020469.2:c.261del']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.261del']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146del' - assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} - assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': 'chr9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} - assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '136132908', 'alt': 'TA'}} - assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': '9', 'ref': 'TA', 'pos': '133257521', 'alt': 'TA'}} - assert results['NM_020469.2:c.261del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} - - assert results['flag'] == 'gene_variant' + assert results['NM_020469.2:c.261del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88ProfsTer31)', 'slr': 'NP_065202.2:p.(T88Pfs*31)'} + assert results['NM_020469.2:c.261del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020469.2:c.261del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020469.2:c.261del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'HG79_PATCH', 'pos': '83614', 'ref': 'TC', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615del', 'vcf': {'chr': 'NW_003315925.1', 'pos': '83614', 'ref': 'TC', 'alt': 'T'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'HG2030_PATCH', 'pos': '83614', 'ref': 'TC', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615del', 'vcf': {'chr': 'NW_009646201.1', 'pos': '83614', 'ref': 'TC', 'alt': 'T'}}}]) + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': 'chr9', 'pos': '136132908', 'ref': 'TA', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': 'chr9', 'pos': '133257521', 'ref': 'TA', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909=', 'vcf': {'chr': '9', 'pos': '136132908', 'ref': 'TA', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522=', 'vcf': {'chr': '9', 'pos': '133257521', 'ref': 'TA', 'alt': 'TA'}} + assert results['NM_020469.2:c.261del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1'} def test_variant96(self): variant = 'NM_020469.2:c.261dupG' @@ -3021,22 +3062,22 @@ def test_variant96(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.261dup' in list(results.keys()) - assert results['NM_020469.2:c.261dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020469.2:c.261dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020469.2:c.261dup']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'T', 'pos': '83614', 'alt': 'TC'}}}]) - assert results['NM_020469.2:c.261dup']['gene_symbol'] == 'ABO' - assert results['NM_020469.2:c.261dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88AspfsTer107)', 'slr': 'NP_065202.2:p.(T88Dfs*107)'} assert results['NM_020469.2:c.261dup']['submitted_variant'] == 'NM_020469.2:c.261dupG' - assert results['NM_020469.2:c.261dup']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261dup']['hgvs_lrg_variant'] == '' + assert results['NM_020469.2:c.261dup']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.261dup']['gene_ids'] == {'hgnc_id': 'HGNC:79', 'entrez_gene_id': '28', 'ucsc_id': 'uc064wua.1', 'omim_id': ['110300']} assert results['NM_020469.2:c.261dup']['hgvs_transcript_variant'] == 'NM_020469.2:c.261dup' + assert results['NM_020469.2:c.261dup']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.261dup']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146dup' - assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TCC'}} - assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TCC'}} - assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TCC'}} - assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TCC'}} - assert results['NM_020469.2:c.261dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} - + assert results['NM_020469.2:c.261dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88AspfsTer107)', 'slr': 'NP_065202.2:p.(T88Dfs*107)'} + assert results['NM_020469.2:c.261dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020469.2:c.261dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020469.2:c.261dup']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'HG79_PATCH', 'pos': '83614', 'ref': 'T', 'alt': 'TC'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83615dup', 'vcf': {'chr': 'NW_003315925.1', 'pos': '83614', 'ref': 'T', 'alt': 'TC'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'HG2030_PATCH', 'pos': '83614', 'ref': 'T', 'alt': 'TC'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83615dup', 'vcf': {'chr': 'NW_009646201.1', 'pos': '83614', 'ref': 'T', 'alt': 'TC'}}}]) + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': 'chr9', 'pos': '136132908', 'ref': 'T', 'alt': 'TCC'}} + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': 'chr9', 'pos': '133257521', 'ref': 'T', 'alt': 'TCC'}} + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132908_136132909insCC', 'vcf': {'chr': '9', 'pos': '136132908', 'ref': 'T', 'alt': 'TCC'}} + assert results['NM_020469.2:c.261dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257521_133257522insCC', 'vcf': {'chr': '9', 'pos': '133257521', 'ref': 'T', 'alt': 'TCC'}} + assert results['NM_020469.2:c.261dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1'} def test_variant97(self): variant = 'NM_020469.2:c.261_262insTT' @@ -3045,22 +3086,22 @@ def test_variant97(self): assert results['flag'] == 'gene_variant' assert 'NM_020469.2:c.261_262insTT' in list(results.keys()) - assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020469.2:c.261_262insTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020469.2:c.261_262insTT']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'HG79_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_003315925.1', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'HG2030_PATCH', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_009646201.1', 'ref': 'T', 'pos': '83614', 'alt': 'TAA'}}}]) - assert results['NM_020469.2:c.261_262insTT']['gene_symbol'] == 'ABO' - assert results['NM_020469.2:c.261_262insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88LeufsTer32)', 'slr': 'NP_065202.2:p.(T88Lfs*32)'} assert results['NM_020469.2:c.261_262insTT']['submitted_variant'] == 'NM_020469.2:c.261_262insTT' - assert results['NM_020469.2:c.261_262insTT']['genome_context_intronic_sequence'] == '' - assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_variant'] == '' + assert results['NM_020469.2:c.261_262insTT']['gene_symbol'] == 'ABO' + assert results['NM_020469.2:c.261_262insTT']['gene_ids'] == {'hgnc_id': 'HGNC:79', 'entrez_gene_id': '28', 'ucsc_id': 'uc064wua.1', 'omim_id': ['110300']} assert results['NM_020469.2:c.261_262insTT']['hgvs_transcript_variant'] == 'NM_020469.2:c.261_262insTT' + assert results['NM_020469.2:c.261_262insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_020469.2:c.261_262insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020469.2:c.261_262insTT']['hgvs_refseqgene_variant'] == 'NG_006669.1:g.20146_20147insTT' - assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '136132908', 'alt': 'TAAC'}} - assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '133257521', 'alt': 'TAAC'}} - assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '136132908', 'alt': 'TAAC'}} - assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '133257521', 'alt': 'TAAC'}} - assert results['NM_020469.2:c.261_262insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2'} - + assert results['NM_020469.2:c.261_262insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065202.2(LRG_792p1):p.(Thr88LeufsTer32)', 'slr': 'NP_065202.2:p.(T88Lfs*32)'} + assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020469.2:c.261_262insTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020469.2:c.261_262insTT']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'HG79_PATCH', 'pos': '83614', 'ref': 'T', 'alt': 'TAA'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315925.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_003315925.1', 'pos': '83614', 'ref': 'T', 'alt': 'TAA'}}}, {'grch38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'HG2030_PATCH', 'pos': '83614', 'ref': 'T', 'alt': 'TAA'}}}, {'hg38': {'hgvs_genomic_description': 'NW_009646201.1:g.83614_83615insAA', 'vcf': {'chr': 'NW_009646201.1', 'pos': '83614', 'ref': 'T', 'alt': 'TAA'}}}]) + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': 'chr9', 'pos': '136132908', 'ref': 'T', 'alt': 'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': 'chr9', 'pos': '133257521', 'ref': 'T', 'alt': 'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.136132909_136132910insACA', 'vcf': {'chr': '9', 'pos': '136132908', 'ref': 'T', 'alt': 'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.133257522_133257523insACA', 'vcf': {'chr': '9', 'pos': '133257521', 'ref': 'T', 'alt': 'TAAC'}} + assert results['NM_020469.2:c.261_262insTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020469.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065202.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_006669.1'} def test_variant98(self): variant = 'NC_000019.10:g.50378563_50378564insTAC' @@ -3069,39 +3110,40 @@ def test_variant98(self): assert results['flag'] == 'gene_variant' assert 'NM_007121.5:c.515A>T' in list(results.keys()) - assert results['NM_007121.5:c.515A>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007121.5:c.515A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007121.5:c.515A>T']['alt_genomic_loci'], []) - assert results['NM_007121.5:c.515A>T']['gene_symbol'] == 'NR1H2' - assert results['NM_007121.5:c.515A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172Ile)', 'slr': 'NP_009052.3:p.(K172I)'} assert results['NM_007121.5:c.515A>T']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAC' - assert results['NM_007121.5:c.515A>T']['genome_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.515A>T']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.515A>T']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.515A>T']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_007121.5:c.515A>T']['hgvs_transcript_variant'] == 'NM_007121.5:c.515A>T' + assert results['NM_007121.5:c.515A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.515A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.515A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAC'}} - assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAC'}} - assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAC'}} - assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAC'}} - assert results['NM_007121.5:c.515A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + assert results['NM_007121.5:c.515A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172Ile)', 'slr': 'NP_009052.3:p.(K172I)'} + assert results['NM_007121.5:c.515A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.515A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007121.5:c.515A>T']['alt_genomic_loci'], []) + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATAC'}} + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATAC'}} + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATAC'}} + assert results['NM_007121.5:c.515A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATAC'}} + assert results['NM_007121.5:c.515A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3'} assert 'NM_001256647.1:c.224A>T' in list(results.keys()) - assert results['NM_001256647.1:c.224A>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256647.1:c.224A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256647.1:c.224A>T']['alt_genomic_loci'], []) - assert results['NM_001256647.1:c.224A>T']['gene_symbol'] == 'NR1H2' - assert results['NM_001256647.1:c.224A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75Ile)', 'slr': 'NP_001243576.1:p.(K75I)'} assert results['NM_001256647.1:c.224A>T']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAC' - assert results['NM_001256647.1:c.224A>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.224A>T']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.224A>T']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.224A>T']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_001256647.1:c.224A>T']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224A>T' + assert results['NM_001256647.1:c.224A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.224A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAC'}} - assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAC'}} - assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAC'}} - assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAC'}} - assert results['NM_001256647.1:c.224A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} - + assert results['NM_001256647.1:c.224A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75Ile)', 'slr': 'NP_001243576.1:p.(K75I)'} + assert results['NM_001256647.1:c.224A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.224A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256647.1:c.224A>T']['alt_genomic_loci'], []) + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAC', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAC', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATAC'}} + assert results['NM_001256647.1:c.224A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1'} def test_variant99(self): variant = 'NC_000019.10:g.50378563_50378564insC' @@ -3110,39 +3152,40 @@ def test_variant99(self): assert results['flag'] == 'gene_variant' assert 'NM_007121.5:c.515_516del' in list(results.keys()) - assert results['NM_007121.5:c.515_516del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007121.5:c.515_516del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007121.5:c.515_516del']['alt_genomic_loci'], []) - assert results['NM_007121.5:c.515_516del']['gene_symbol'] == 'NR1H2' - assert results['NM_007121.5:c.515_516del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172ThrfsTer34)', 'slr': 'NP_009052.3:p.(K172Tfs*34)'} assert results['NM_007121.5:c.515_516del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insC' + assert results['NM_007121.5:c.515_516del']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.515_516del']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} + assert results['NM_007121.5:c.515_516del']['hgvs_transcript_variant'] == 'NM_007121.5:c.515_516del' assert results['NM_007121.5:c.515_516del']['genome_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.515_516del']['hgvs_lrg_variant'] == '' - assert results['NM_007121.5:c.515_516del']['hgvs_transcript_variant'] == 'NM_007121.5:c.515_516del' + assert results['NM_007121.5:c.515_516del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.515_516del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AC'}} - assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AC'}} - assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AC'}} - assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AC'}} - assert results['NM_007121.5:c.515_516del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + assert results['NM_007121.5:c.515_516del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172ThrfsTer34)', 'slr': 'NP_009052.3:p.(K172Tfs*34)'} + assert results['NM_007121.5:c.515_516del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.515_516del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007121.5:c.515_516del']['alt_genomic_loci'], []) + assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_007121.5:c.515_516del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_007121.5:c.515_516del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3'} assert 'NM_001256647.1:c.224_225del' in list(results.keys()) - assert results['NM_001256647.1:c.224_225del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256647.1:c.224_225del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256647.1:c.224_225del']['alt_genomic_loci'], []) - assert results['NM_001256647.1:c.224_225del']['gene_symbol'] == 'NR1H2' - assert results['NM_001256647.1:c.224_225del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75ThrfsTer34)', 'slr': 'NP_001243576.1:p.(K75Tfs*34)'} assert results['NM_001256647.1:c.224_225del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insC' - assert results['NM_001256647.1:c.224_225del']['genome_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.224_225del']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.224_225del']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.224_225del']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_001256647.1:c.224_225del']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224_225del' + assert results['NM_001256647.1:c.224_225del']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224_225del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.224_225del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AC'}} - assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AC'}} - assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AC'}} - assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AC'}} - assert results['NM_001256647.1:c.224_225del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} - + assert results['NM_001256647.1:c.224_225del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75ThrfsTer34)', 'slr': 'NP_001243576.1:p.(K75Tfs*34)'} + assert results['NM_001256647.1:c.224_225del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.224_225del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256647.1:c.224_225del']['alt_genomic_loci'], []) + assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insC', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001256647.1:c.224_225del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insC', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001256647.1:c.224_225del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1'} def test_variant100(self): variant = 'NC_000019.10:g.50378564_50378565insTACA' @@ -3151,80 +3194,82 @@ def test_variant100(self): assert results['flag'] == 'gene_variant' assert 'NM_007121.5:c.515_516insT' in list(results.keys()) - assert results['NM_007121.5:c.515_516insT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007121.5:c.515_516insT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007121.5:c.515_516insT']['alt_genomic_loci'], []) - assert results['NM_007121.5:c.515_516insT']['gene_symbol'] == 'NR1H2' - assert results['NM_007121.5:c.515_516insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172AsnfsTer35)', 'slr': 'NP_009052.3:p.(K172Nfs*35)'} assert results['NM_007121.5:c.515_516insT']['submitted_variant'] == 'NC_000019.10:g.50378564_50378565insTACA' - assert results['NM_007121.5:c.515_516insT']['genome_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.515_516insT']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.515_516insT']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.515_516insT']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_007121.5:c.515_516insT']['hgvs_transcript_variant'] == 'NM_007121.5:c.515_516insT' + assert results['NM_007121.5:c.515_516insT']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.515_516insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.515_516insT']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AATAC'}} - assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AATAC'}} - assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AATAC'}} - assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AATAC'}} - assert results['NM_007121.5:c.515_516insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + assert results['NM_007121.5:c.515_516insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172AsnfsTer35)', 'slr': 'NP_009052.3:p.(K172Nfs*35)'} + assert results['NM_007121.5:c.515_516insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.515_516insT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007121.5:c.515_516insT']['alt_genomic_loci'], []) + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'AATAC'}} + assert results['NM_007121.5:c.515_516insT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3'} assert 'NM_001256647.1:c.224_225insT' in list(results.keys()) - assert results['NM_001256647.1:c.224_225insT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256647.1:c.224_225insT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256647.1:c.224_225insT']['alt_genomic_loci'], []) - assert results['NM_001256647.1:c.224_225insT']['gene_symbol'] == 'NR1H2' - assert results['NM_001256647.1:c.224_225insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75AsnfsTer35)', 'slr': 'NP_001243576.1:p.(K75Nfs*35)'} assert results['NM_001256647.1:c.224_225insT']['submitted_variant'] == 'NC_000019.10:g.50378564_50378565insTACA' - assert results['NM_001256647.1:c.224_225insT']['genome_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.224_225insT']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.224_225insT']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.224_225insT']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_001256647.1:c.224_225insT']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224_225insT' + assert results['NM_001256647.1:c.224_225insT']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224_225insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.224_225insT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AATAC'}} - assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AATAC'}} - assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AATAC'}} - assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AATAC'}} - assert results['NM_001256647.1:c.224_225insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} - + assert results['NM_001256647.1:c.224_225insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75AsnfsTer35)', 'slr': 'NP_001243576.1:p.(K75Nfs*35)'} + assert results['NM_001256647.1:c.224_225insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.224_225insT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256647.1:c.224_225insT']['alt_genomic_loci'], []) + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881821_50881822insTACA', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378564_50378565insTACA', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'AATAC'}} + assert results['NM_001256647.1:c.224_225insT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1'} def test_variant101(self): variant = 'NC_000019.10:g.50378565_50378567dup' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_007121.5:c.514_520=' in list(results.keys()) - assert results['NM_007121.5:c.514_520=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007121.5:c.514_520=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007121.5:c.514_520=']['alt_genomic_loci'], []) - assert results['NM_007121.5:c.514_520=']['gene_symbol'] == 'NR1H2' - assert results['NM_007121.5:c.514_520=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172=)', 'slr': 'NP_009052.3:p.(K172=)'} assert results['NM_007121.5:c.514_520=']['submitted_variant'] == 'NC_000019.10:g.50378565_50378567dup' - assert results['NM_007121.5:c.514_520=']['genome_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.514_520=']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.514_520=']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.514_520=']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_007121.5:c.514_520=']['hgvs_transcript_variant'] == 'NM_007121.5:c.514_520=' + assert results['NM_007121.5:c.514_520=']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.514_520=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.514_520=']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AAAC'}} - assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AAAC'}} - assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AAAC'}} - assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AAAC'}} - assert results['NM_007121.5:c.514_520=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + assert results['NM_007121.5:c.514_520=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172=)', 'slr': 'NP_009052.3:p.(K172=)'} + assert results['NM_007121.5:c.514_520=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.514_520=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007121.5:c.514_520=']['alt_genomic_loci'], []) + assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_007121.5:c.514_520=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_007121.5:c.514_520=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3'} assert 'NM_001256647.1:c.223_229=' in list(results.keys()) - assert results['NM_001256647.1:c.223_229=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256647.1:c.223_229=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256647.1:c.223_229=']['alt_genomic_loci'], []) - assert results['NM_001256647.1:c.223_229=']['gene_symbol'] == 'NR1H2' - assert results['NM_001256647.1:c.223_229=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75=)', 'slr': 'NP_001243576.1:p.(K75=)'} assert results['NM_001256647.1:c.223_229=']['submitted_variant'] == 'NC_000019.10:g.50378565_50378567dup' - assert results['NM_001256647.1:c.223_229=']['genome_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.223_229=']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.223_229=']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.223_229=']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_001256647.1:c.223_229=']['hgvs_transcript_variant'] == 'NM_001256647.1:c.223_229=' + assert results['NM_001256647.1:c.223_229=']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.223_229=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.223_229=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'AAAC'}} - assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'AAAC'}} - assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'AAAC'}} - assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'AAAC'}} - assert results['NM_001256647.1:c.223_229=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} - - assert results['flag'] == 'gene_variant' + assert results['NM_001256647.1:c.223_229=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75=)', 'slr': 'NP_001243576.1:p.(K75=)'} + assert results['NM_001256647.1:c.223_229=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.223_229=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256647.1:c.223_229=']['alt_genomic_loci'], []) + assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881822_50881824dup', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_001256647.1:c.223_229=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378565_50378567dup', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_001256647.1:c.223_229=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1'} def test_variant102(self): variant = 'NC_000019.10:g.50378563_50378564=' @@ -3233,80 +3278,82 @@ def test_variant102(self): assert results['flag'] == 'gene_variant' assert 'NM_007121.5:c.519_521del' in list(results.keys()) - assert results['NM_007121.5:c.519_521del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007121.5:c.519_521del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007121.5:c.519_521del']['alt_genomic_loci'], []) - assert results['NM_007121.5:c.519_521del']['gene_symbol'] == 'NR1H2' - assert results['NM_007121.5:c.519_521del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Gln176del)', 'slr': 'NP_009052.3:p.(Q176del)'} assert results['NM_007121.5:c.519_521del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564=' - assert results['NM_007121.5:c.519_521del']['genome_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.519_521del']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.519_521del']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.519_521del']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_007121.5:c.519_521del']['hgvs_transcript_variant'] == 'NM_007121.5:c.519_521del' + assert results['NM_007121.5:c.519_521del']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.519_521del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.519_521del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': 'chr19', 'ref': 'AA', 'pos': '50881820', 'alt': 'AA'}} - assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': 'chr19', 'ref': 'AA', 'pos': '50378563', 'alt': 'AA'}} - assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': '19', 'ref': 'AA', 'pos': '50881820', 'alt': 'AA'}} - assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': '19', 'ref': 'AA', 'pos': '50378563', 'alt': 'AA'}} - assert results['NM_007121.5:c.519_521del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + assert results['NM_007121.5:c.519_521del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Gln176del)', 'slr': 'NP_009052.3:p.(Q176del)'} + assert results['NM_007121.5:c.519_521del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.519_521del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007121.5:c.519_521del']['alt_genomic_loci'], []) + assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_007121.5:c.519_521del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_007121.5:c.519_521del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3'} assert 'NM_001256647.1:c.228_230del' in list(results.keys()) - assert results['NM_001256647.1:c.228_230del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256647.1:c.228_230del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256647.1:c.228_230del']['alt_genomic_loci'], []) - assert results['NM_001256647.1:c.228_230del']['gene_symbol'] == 'NR1H2' - assert results['NM_001256647.1:c.228_230del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Gln79del)', 'slr': 'NP_001243576.1:p.(Q79del)'} assert results['NM_001256647.1:c.228_230del']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564=' - assert results['NM_001256647.1:c.228_230del']['genome_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.228_230del']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.228_230del']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.228_230del']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_001256647.1:c.228_230del']['hgvs_transcript_variant'] == 'NM_001256647.1:c.228_230del' + assert results['NM_001256647.1:c.228_230del']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.228_230del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.228_230del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': 'chr19', 'ref': 'AA', 'pos': '50881820', 'alt': 'AA'}} - assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': 'chr19', 'ref': 'AA', 'pos': '50378563', 'alt': 'AA'}} - assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': '19', 'ref': 'AA', 'pos': '50881820', 'alt': 'AA'}} - assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': '19', 'ref': 'AA', 'pos': '50378563', 'alt': 'AA'}} - assert results['NM_001256647.1:c.228_230del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} - + assert results['NM_001256647.1:c.228_230del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Gln79del)', 'slr': 'NP_001243576.1:p.(Q79del)'} + assert results['NM_001256647.1:c.228_230del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.228_230del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256647.1:c.228_230del']['alt_genomic_loci'], []) + assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821=', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_001256647.1:c.228_230del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564=', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_001256647.1:c.228_230del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1'} def test_variant103(self): variant = 'NC_000019.10:g.50378563_50378564insTCGG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001256647.1:c.224_226delinsTCGG' in list(results.keys()) - assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256647.1:c.224_226delinsTCGG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256647.1:c.224_226delinsTCGG']['alt_genomic_loci'], []) - assert results['NM_001256647.1:c.224_226delinsTCGG']['gene_symbol'] == 'NR1H2' - assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75IlefsTer35)', 'slr': 'NP_001243576.1:p.(K75Ifs*35)'} - assert results['NM_001256647.1:c.224_226delinsTCGG']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTCGG' - assert results['NM_001256647.1:c.224_226delinsTCGG']['genome_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_lrg_variant'] == '' - assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224_226delinsTCGG' - assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATCGG'}} - assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATCGG'}} - assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATCGG'}} - assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATCGG'}} - assert results['NM_001256647.1:c.224_226delinsTCGG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} - assert results['flag'] == 'gene_variant' assert 'NM_007121.5:c.515_517delinsTCGG' in list(results.keys()) - assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007121.5:c.515_517delinsTCGG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007121.5:c.515_517delinsTCGG']['alt_genomic_loci'], []) - assert results['NM_007121.5:c.515_517delinsTCGG']['gene_symbol'] == 'NR1H2' - assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172IlefsTer35)', 'slr': 'NP_009052.3:p.(K172Ifs*35)'} assert results['NM_007121.5:c.515_517delinsTCGG']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTCGG' - assert results['NM_007121.5:c.515_517delinsTCGG']['genome_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.515_517delinsTCGG']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.515_517delinsTCGG']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_transcript_variant'] == 'NM_007121.5:c.515_517delinsTCGG' + assert results['NM_007121.5:c.515_517delinsTCGG']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.515_517delinsTCGG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATCGG'}} - assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATCGG'}} - assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATCGG'}} - assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATCGG'}} - assert results['NM_007121.5:c.515_517delinsTCGG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172IlefsTer35)', 'slr': 'NP_009052.3:p.(K172Ifs*35)'} + assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.515_517delinsTCGG']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007121.5:c.515_517delinsTCGG']['alt_genomic_loci'], []) + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATCGG'}} + assert results['NM_007121.5:c.515_517delinsTCGG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3'} + assert 'NM_001256647.1:c.224_226delinsTCGG' in list(results.keys()) + assert results['NM_001256647.1:c.224_226delinsTCGG']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTCGG' + assert results['NM_001256647.1:c.224_226delinsTCGG']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.224_226delinsTCGG']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} + assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_transcript_variant'] == 'NM_001256647.1:c.224_226delinsTCGG' + assert results['NM_001256647.1:c.224_226delinsTCGG']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224_226delinsTCGG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75IlefsTer35)', 'slr': 'NP_001243576.1:p.(K75Ifs*35)'} + assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.224_226delinsTCGG']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256647.1:c.224_226delinsTCGG']['alt_genomic_loci'], []) + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTCGG', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTCGG', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATCGG'}} + assert results['NM_001256647.1:c.224_226delinsTCGG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1'} def test_variant104(self): variant = 'NC_000019.10:g.50378563delinsTTAC' @@ -3315,80 +3362,82 @@ def test_variant104(self): assert results['flag'] == 'gene_variant' assert 'NM_007121.5:c.514_515inv' in list(results.keys()) - assert results['NM_007121.5:c.514_515inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007121.5:c.514_515inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007121.5:c.514_515inv']['alt_genomic_loci'], []) - assert results['NM_007121.5:c.514_515inv']['gene_symbol'] == 'NR1H2' - assert results['NM_007121.5:c.514_515inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172Leu)', 'slr': 'NP_009052.3:p.(K172L)'} assert results['NM_007121.5:c.514_515inv']['submitted_variant'] == 'NC_000019.10:g.50378563delinsTTAC' - assert results['NM_007121.5:c.514_515inv']['genome_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.514_515inv']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.514_515inv']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.514_515inv']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_007121.5:c.514_515inv']['hgvs_transcript_variant'] == 'NM_007121.5:c.514_515inv' + assert results['NM_007121.5:c.514_515inv']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.514_515inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.514_515inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'TTAC'}} - assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'TTAC'}} - assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'TTAC'}} - assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'TTAC'}} - assert results['NM_007121.5:c.514_515inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + assert results['NM_007121.5:c.514_515inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172Leu)', 'slr': 'NP_009052.3:p.(K172L)'} + assert results['NM_007121.5:c.514_515inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.514_515inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007121.5:c.514_515inv']['alt_genomic_loci'], []) + assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'TTAC'}} + assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'TTAC'}} + assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'TTAC'}} + assert results['NM_007121.5:c.514_515inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'TTAC'}} + assert results['NM_007121.5:c.514_515inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3'} assert 'NM_001256647.1:c.223_224inv' in list(results.keys()) - assert results['NM_001256647.1:c.223_224inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256647.1:c.223_224inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256647.1:c.223_224inv']['alt_genomic_loci'], []) - assert results['NM_001256647.1:c.223_224inv']['gene_symbol'] == 'NR1H2' - assert results['NM_001256647.1:c.223_224inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75Leu)', 'slr': 'NP_001243576.1:p.(K75L)'} assert results['NM_001256647.1:c.223_224inv']['submitted_variant'] == 'NC_000019.10:g.50378563delinsTTAC' - assert results['NM_001256647.1:c.223_224inv']['genome_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.223_224inv']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.223_224inv']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.223_224inv']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_001256647.1:c.223_224inv']['hgvs_transcript_variant'] == 'NM_001256647.1:c.223_224inv' + assert results['NM_001256647.1:c.223_224inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.223_224inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.223_224inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'TTAC'}} - assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'TTAC'}} - assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'TTAC'}} - assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'TTAC'}} - assert results['NM_001256647.1:c.223_224inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} - + assert results['NM_001256647.1:c.223_224inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75Leu)', 'slr': 'NP_001243576.1:p.(K75L)'} + assert results['NM_001256647.1:c.223_224inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.223_224inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256647.1:c.223_224inv']['alt_genomic_loci'], []) + assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'TTAC'}} + assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'TTAC'}} + assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820delinsTTAC', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'TTAC'}} + assert results['NM_001256647.1:c.223_224inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563delinsTTAC', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'TTAC'}} + assert results['NM_001256647.1:c.223_224inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1'} def test_variant105(self): variant = 'NC_000019.10:g.50378563_50378564insTAAC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_007121.5:c.514_515insT' in list(results.keys()) - assert results['NM_007121.5:c.514_515insT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007121.5:c.514_515insT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007121.5:c.514_515insT']['alt_genomic_loci'], []) - assert results['NM_007121.5:c.514_515insT']['gene_symbol'] == 'NR1H2' - assert results['NM_007121.5:c.514_515insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172IlefsTer35)', 'slr': 'NP_009052.3:p.(K172Ifs*35)'} assert results['NM_007121.5:c.514_515insT']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAAC' - assert results['NM_007121.5:c.514_515insT']['genome_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.514_515insT']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.514_515insT']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.514_515insT']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_007121.5:c.514_515insT']['hgvs_transcript_variant'] == 'NM_007121.5:c.514_515insT' + assert results['NM_007121.5:c.514_515insT']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.514_515insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.514_515insT']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAAC'}} - assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAAC'}} - assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAAC'}} - assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAAC'}} - assert results['NM_007121.5:c.514_515insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + assert results['NM_007121.5:c.514_515insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172IlefsTer35)', 'slr': 'NP_009052.3:p.(K172Ifs*35)'} + assert results['NM_007121.5:c.514_515insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.514_515insT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007121.5:c.514_515insT']['alt_genomic_loci'], []) + assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATAAC'}} + assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATAAC'}} + assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATAAC'}} + assert results['NM_007121.5:c.514_515insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATAAC'}} + assert results['NM_007121.5:c.514_515insT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3'} assert 'NM_001256647.1:c.223_224insT' in list(results.keys()) - assert results['NM_001256647.1:c.223_224insT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256647.1:c.223_224insT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256647.1:c.223_224insT']['alt_genomic_loci'], []) - assert results['NM_001256647.1:c.223_224insT']['gene_symbol'] == 'NR1H2' - assert results['NM_001256647.1:c.223_224insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75IlefsTer35)', 'slr': 'NP_001243576.1:p.(K75Ifs*35)'} assert results['NM_001256647.1:c.223_224insT']['submitted_variant'] == 'NC_000019.10:g.50378563_50378564insTAAC' - assert results['NM_001256647.1:c.223_224insT']['genome_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.223_224insT']['hgvs_lrg_variant'] == '' + assert results['NM_001256647.1:c.223_224insT']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.223_224insT']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_001256647.1:c.223_224insT']['hgvs_transcript_variant'] == 'NM_001256647.1:c.223_224insT' + assert results['NM_001256647.1:c.223_224insT']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.223_224insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256647.1:c.223_224insT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAAC'}} - assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAAC'}} - assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50881820', 'alt': 'ATAAC'}} - assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '50378563', 'alt': 'ATAAC'}} - assert results['NM_001256647.1:c.223_224insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} - - assert results['flag'] == 'gene_variant' + assert results['NM_001256647.1:c.223_224insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75IlefsTer35)', 'slr': 'NP_001243576.1:p.(K75Ifs*35)'} + assert results['NM_001256647.1:c.223_224insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.223_224insT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256647.1:c.223_224insT']['alt_genomic_loci'], []) + assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': 'chr19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATAAC'}} + assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': 'chr19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATAAC'}} + assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881820_50881821insTAAC', 'vcf': {'chr': '19', 'pos': '50881820', 'ref': 'A', 'alt': 'ATAAC'}} + assert results['NM_001256647.1:c.223_224insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378563_50378564insTAAC', 'vcf': {'chr': '19', 'pos': '50378563', 'ref': 'A', 'alt': 'ATAAC'}} + assert results['NM_001256647.1:c.223_224insT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1'} def test_variant106(self): variant = 'NC_000019.10:g.50378562_50378565del' @@ -3396,81 +3445,83 @@ def test_variant106(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001256647.1:c.222_228del' in list(results.keys()) - assert results['NM_001256647.1:c.222_228del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256647.1:c.222_228del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256647.1:c.222_228del']['alt_genomic_loci'], []) - assert results['NM_001256647.1:c.222_228del']['gene_symbol'] == 'NR1H2' - assert results['NM_001256647.1:c.222_228del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75SerfsTer47)', 'slr': 'NP_001243576.1:p.(K75Sfs*47)'} - assert results['NM_001256647.1:c.222_228del']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565del' - assert results['NM_001256647.1:c.222_228del']['genome_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.222_228del']['hgvs_lrg_variant'] == '' - assert results['NM_001256647.1:c.222_228del']['hgvs_transcript_variant'] == 'NM_001256647.1:c.222_228del' - assert results['NM_001256647.1:c.222_228del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': 'chr19', 'ref': 'GGAAA', 'pos': '50881818', 'alt': 'G'}} - assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': 'chr19', 'ref': 'GGAAA', 'pos': '50378561', 'alt': 'G'}} - assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': '19', 'ref': 'GGAAA', 'pos': '50881818', 'alt': 'G'}} - assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': '19', 'ref': 'GGAAA', 'pos': '50378561', 'alt': 'G'}} - assert results['NM_001256647.1:c.222_228del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} - assert 'NM_007121.5:c.513_519del' in list(results.keys()) - assert results['NM_007121.5:c.513_519del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007121.5:c.513_519del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007121.5:c.513_519del']['alt_genomic_loci'], []) - assert results['NM_007121.5:c.513_519del']['gene_symbol'] == 'NR1H2' - assert results['NM_007121.5:c.513_519del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172SerfsTer47)', 'slr': 'NP_009052.3:p.(K172Sfs*47)'} assert results['NM_007121.5:c.513_519del']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565del' - assert results['NM_007121.5:c.513_519del']['genome_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.513_519del']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.513_519del']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.513_519del']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_007121.5:c.513_519del']['hgvs_transcript_variant'] == 'NM_007121.5:c.513_519del' + assert results['NM_007121.5:c.513_519del']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.513_519del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.513_519del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': 'chr19', 'ref': 'GGAAA', 'pos': '50881818', 'alt': 'G'}} - assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': 'chr19', 'ref': 'GGAAA', 'pos': '50378561', 'alt': 'G'}} - assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': '19', 'ref': 'GGAAA', 'pos': '50881818', 'alt': 'G'}} - assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': '19', 'ref': 'GGAAA', 'pos': '50378561', 'alt': 'G'}} - assert results['NM_007121.5:c.513_519del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + assert results['NM_007121.5:c.513_519del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172SerfsTer47)', 'slr': 'NP_009052.3:p.(K172Sfs*47)'} + assert results['NM_007121.5:c.513_519del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.513_519del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007121.5:c.513_519del']['alt_genomic_loci'], []) + assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': 'chr19', 'pos': '50881818', 'ref': 'GGAAA', 'alt': 'G'}} + assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': 'chr19', 'pos': '50378561', 'ref': 'GGAAA', 'alt': 'G'}} + assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': '19', 'pos': '50881818', 'ref': 'GGAAA', 'alt': 'G'}} + assert results['NM_007121.5:c.513_519del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': '19', 'pos': '50378561', 'ref': 'GGAAA', 'alt': 'G'}} + assert results['NM_007121.5:c.513_519del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3'} + assert 'NM_001256647.1:c.222_228del' in list(results.keys()) + assert results['NM_001256647.1:c.222_228del']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565del' + assert results['NM_001256647.1:c.222_228del']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.222_228del']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} + assert results['NM_001256647.1:c.222_228del']['hgvs_transcript_variant'] == 'NM_001256647.1:c.222_228del' + assert results['NM_001256647.1:c.222_228del']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.222_228del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.222_228del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.222_228del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75SerfsTer47)', 'slr': 'NP_001243576.1:p.(K75Sfs*47)'} + assert results['NM_001256647.1:c.222_228del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.222_228del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256647.1:c.222_228del']['alt_genomic_loci'], []) + assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': 'chr19', 'pos': '50881818', 'ref': 'GGAAA', 'alt': 'G'}} + assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': 'chr19', 'pos': '50378561', 'ref': 'GGAAA', 'alt': 'G'}} + assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822del', 'vcf': {'chr': '19', 'pos': '50881818', 'ref': 'GGAAA', 'alt': 'G'}} + assert results['NM_001256647.1:c.222_228del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565del', 'vcf': {'chr': '19', 'pos': '50378561', 'ref': 'GGAAA', 'alt': 'G'}} + assert results['NM_001256647.1:c.222_228del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1'} def test_variant107(self): variant = 'NC_000019.10:g.50378562_50378565delinsTC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001256647.1:c.222_228delinsTC' in list(results.keys()) - assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256647.1:c.222_228delinsTC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256647.1:c.222_228delinsTC']['alt_genomic_loci'], []) - assert results['NM_001256647.1:c.222_228delinsTC']['gene_symbol'] == 'NR1H2' - assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75ProfsTer33)', 'slr': 'NP_001243576.1:p.(K75Pfs*33)'} - assert results['NM_001256647.1:c.222_228delinsTC']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565delinsTC' - assert results['NM_001256647.1:c.222_228delinsTC']['genome_context_intronic_sequence'] == '' - assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_lrg_variant'] == '' - assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_transcript_variant'] == 'NM_001256647.1:c.222_228delinsTC' - assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': 'chr19', 'ref': 'GAAA', 'pos': '50881819', 'alt': 'TC'}} - assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': 'chr19', 'ref': 'GAAA', 'pos': '50378562', 'alt': 'TC'}} - assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': '19', 'ref': 'GAAA', 'pos': '50881819', 'alt': 'TC'}} - assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': '19', 'ref': 'GAAA', 'pos': '50378562', 'alt': 'TC'}} - assert results['NM_001256647.1:c.222_228delinsTC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1'} - assert results['flag'] == 'gene_variant' assert 'NM_007121.5:c.513_519delinsTC' in list(results.keys()) - assert results['NM_007121.5:c.513_519delinsTC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007121.5:c.513_519delinsTC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007121.5:c.513_519delinsTC']['alt_genomic_loci'], []) - assert results['NM_007121.5:c.513_519delinsTC']['gene_symbol'] == 'NR1H2' - assert results['NM_007121.5:c.513_519delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172ProfsTer33)', 'slr': 'NP_009052.3:p.(K172Pfs*33)'} assert results['NM_007121.5:c.513_519delinsTC']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565delinsTC' - assert results['NM_007121.5:c.513_519delinsTC']['genome_context_intronic_sequence'] == '' - assert results['NM_007121.5:c.513_519delinsTC']['hgvs_lrg_variant'] == '' + assert results['NM_007121.5:c.513_519delinsTC']['gene_symbol'] == 'NR1H2' + assert results['NM_007121.5:c.513_519delinsTC']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} assert results['NM_007121.5:c.513_519delinsTC']['hgvs_transcript_variant'] == 'NM_007121.5:c.513_519delinsTC' + assert results['NM_007121.5:c.513_519delinsTC']['genome_context_intronic_sequence'] == '' + assert results['NM_007121.5:c.513_519delinsTC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007121.5:c.513_519delinsTC']['hgvs_refseqgene_variant'] == '' - assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': 'chr19', 'ref': 'GAAA', 'pos': '50881819', 'alt': 'TC'}} - assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': 'chr19', 'ref': 'GAAA', 'pos': '50378562', 'alt': 'TC'}} - assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': '19', 'ref': 'GAAA', 'pos': '50881819', 'alt': 'TC'}} - assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': '19', 'ref': 'GAAA', 'pos': '50378562', 'alt': 'TC'}} - assert results['NM_007121.5:c.513_519delinsTC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5'} + assert results['NM_007121.5:c.513_519delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009052.3:p.(Lys172ProfsTer33)', 'slr': 'NP_009052.3:p.(K172Pfs*33)'} + assert results['NM_007121.5:c.513_519delinsTC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007121.5:c.513_519delinsTC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007121.5:c.513_519delinsTC']['alt_genomic_loci'], []) + assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': 'chr19', 'pos': '50881819', 'ref': 'GAAA', 'alt': 'TC'}} + assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': 'chr19', 'pos': '50378562', 'ref': 'GAAA', 'alt': 'TC'}} + assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': '19', 'pos': '50881819', 'ref': 'GAAA', 'alt': 'TC'}} + assert results['NM_007121.5:c.513_519delinsTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': '19', 'pos': '50378562', 'ref': 'GAAA', 'alt': 'TC'}} + assert results['NM_007121.5:c.513_519delinsTC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007121.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009052.3'} + assert 'NM_001256647.1:c.222_228delinsTC' in list(results.keys()) + assert results['NM_001256647.1:c.222_228delinsTC']['submitted_variant'] == 'NC_000019.10:g.50378562_50378565delinsTC' + assert results['NM_001256647.1:c.222_228delinsTC']['gene_symbol'] == 'NR1H2' + assert results['NM_001256647.1:c.222_228delinsTC']['gene_ids'] == {'hgnc_id': 'HGNC:7965', 'entrez_gene_id': '7376', 'ucsc_id': 'uc010enw.5', 'omim_id': ['600380']} + assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_transcript_variant'] == 'NM_001256647.1:c.222_228delinsTC' + assert results['NM_001256647.1:c.222_228delinsTC']['genome_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.222_228delinsTC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243576.1:p.(Lys75ProfsTer33)', 'slr': 'NP_001243576.1:p.(K75Pfs*33)'} + assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256647.1:c.222_228delinsTC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256647.1:c.222_228delinsTC']['alt_genomic_loci'], []) + assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': 'chr19', 'pos': '50881819', 'ref': 'GAAA', 'alt': 'TC'}} + assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': 'chr19', 'pos': '50378562', 'ref': 'GAAA', 'alt': 'TC'}} + assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.50881819_50881822delinsTC', 'vcf': {'chr': '19', 'pos': '50881819', 'ref': 'GAAA', 'alt': 'TC'}} + assert results['NM_001256647.1:c.222_228delinsTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.50378562_50378565delinsTC', 'vcf': {'chr': '19', 'pos': '50378562', 'ref': 'GAAA', 'alt': 'TC'}} + assert results['NM_001256647.1:c.222_228delinsTC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256647.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243576.1'} def test_variant108(self): variant = 'NC_000007.14:g.149779575_149779577delinsT' @@ -3479,22 +3530,22 @@ def test_variant108(self): assert results['flag'] == 'gene_variant' assert 'NM_198455.2:c.1115_1116insT' in list(results.keys()) - assert results['NM_198455.2:c.1115_1116insT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_198455.2:c.1115_1116insT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_198455.2:c.1115_1116insT']['alt_genomic_loci'], []) - assert results['NM_198455.2:c.1115_1116insT']['gene_symbol'] == 'SSPO' - assert results['NM_198455.2:c.1115_1116insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Leu374ProfsTer16)', 'slr': 'NP_940857.2:p.(L374Pfs*16)'} assert results['NM_198455.2:c.1115_1116insT']['submitted_variant'] == 'NC_000007.14:g.149779575_149779577delinsT' - assert results['NM_198455.2:c.1115_1116insT']['genome_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1115_1116insT']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1115_1116insT']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1115_1116insT']['gene_ids'] == {'hgnc_id': 'HGNC:21998', 'entrez_gene_id': '23145', 'ucsc_id': 'uc064jau.1', 'omim_id': ['617356']} assert results['NM_198455.2:c.1115_1116insT']['hgvs_transcript_variant'] == 'NM_198455.2:c.1115_1116insT' + assert results['NM_198455.2:c.1115_1116insT']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1115_1116insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1115_1116insT']['hgvs_refseqgene_variant'] == '' - assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476664_149476666delinsTC', 'vcf': {'chr': 'chr7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'TC'}} - assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779575_149779577delinsT', 'vcf': {'chr': 'chr7', 'ref': 'CAG', 'pos': '149779575', 'alt': 'T'}} - assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476664_149476666delinsTC', 'vcf': {'chr': '7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'TC'}} - assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779575_149779577delinsT', 'vcf': {'chr': '7', 'ref': 'CAG', 'pos': '149779575', 'alt': 'T'}} - assert results['NM_198455.2:c.1115_1116insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} - + assert results['NM_198455.2:c.1115_1116insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Leu374ProfsTer16)', 'slr': 'NP_940857.2:p.(L374Pfs*16)'} + assert results['NM_198455.2:c.1115_1116insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1115_1116insT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_198455.2:c.1115_1116insT']['alt_genomic_loci'], []) + assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476664_149476666delinsTC', 'vcf': {'chr': 'chr7', 'pos': '149476664', 'ref': 'CAG', 'alt': 'TC'}} + assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779575_149779577delinsT', 'vcf': {'chr': 'chr7', 'pos': '149779575', 'ref': 'CAG', 'alt': 'T'}} + assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476664_149476666delinsTC', 'vcf': {'chr': '7', 'pos': '149476664', 'ref': 'CAG', 'alt': 'TC'}} + assert results['NM_198455.2:c.1115_1116insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779575_149779577delinsT', 'vcf': {'chr': '7', 'pos': '149779575', 'ref': 'CAG', 'alt': 'T'}} + assert results['NM_198455.2:c.1115_1116insT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2'} def test_variant109(self): variant = 'NC_000007.14:g.149779575_149779577=' @@ -3503,22 +3554,22 @@ def test_variant109(self): assert results['flag'] == 'gene_variant' assert 'NM_198455.2:c.1116_1118=' in list(results.keys()) - assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_198455.2:c.1116_1118=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'], []) - assert results['NM_198455.2:c.1116_1118=']['gene_symbol'] == 'SSPO' - assert results['NM_198455.2:c.1116_1118=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372=)', 'slr': 'NP_940857.2:p.(D372=)'} assert results['NM_198455.2:c.1116_1118=']['submitted_variant'] == 'NC_000007.14:g.149779575_149779577=' - assert results['NM_198455.2:c.1116_1118=']['genome_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1116_1118=']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1116_1118=']['gene_ids'] == {'hgnc_id': 'HGNC:21998', 'entrez_gene_id': '23145', 'ucsc_id': 'uc064jau.1', 'omim_id': ['617356']} assert results['NM_198455.2:c.1116_1118=']['hgvs_transcript_variant'] == 'NM_198455.2:c.1116_1118=' + assert results['NM_198455.2:c.1116_1118=']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1116_1118=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1116_1118=']['hgvs_refseqgene_variant'] == '' - assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': 'chr7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'C'}} - assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': 'chr7', 'ref': 'ACAG', 'pos': '149779574', 'alt': 'A'}} - assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': '7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'C'}} - assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': '7', 'ref': 'ACAG', 'pos': '149779574', 'alt': 'A'}} - assert results['NM_198455.2:c.1116_1118=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} - + assert results['NM_198455.2:c.1116_1118=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372=)', 'slr': 'NP_940857.2:p.(D372=)'} + assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'], []) + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': 'chr7', 'pos': '149476664', 'ref': 'CAG', 'alt': 'C'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': 'chr7', 'pos': '149779574', 'ref': 'ACAG', 'alt': 'A'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': '7', 'pos': '149476664', 'ref': 'CAG', 'alt': 'C'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': '7', 'pos': '149779574', 'ref': 'ACAG', 'alt': 'A'}} + assert results['NM_198455.2:c.1116_1118=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2'} def test_variant110(self): variant = 'NC_000007.14:g.149779576_149779578del' @@ -3527,22 +3578,22 @@ def test_variant110(self): assert results['flag'] == 'gene_variant' assert 'NM_198455.2:c.1116_1118=' in list(results.keys()) - assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_198455.2:c.1116_1118=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'], []) - assert results['NM_198455.2:c.1116_1118=']['gene_symbol'] == 'SSPO' - assert results['NM_198455.2:c.1116_1118=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372=)', 'slr': 'NP_940857.2:p.(D372=)'} assert results['NM_198455.2:c.1116_1118=']['submitted_variant'] == 'NC_000007.14:g.149779576_149779578del' - assert results['NM_198455.2:c.1116_1118=']['genome_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1116_1118=']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1116_1118=']['gene_ids'] == {'hgnc_id': 'HGNC:21998', 'entrez_gene_id': '23145', 'ucsc_id': 'uc064jau.1', 'omim_id': ['617356']} assert results['NM_198455.2:c.1116_1118=']['hgvs_transcript_variant'] == 'NM_198455.2:c.1116_1118=' + assert results['NM_198455.2:c.1116_1118=']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1116_1118=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1116_1118=']['hgvs_refseqgene_variant'] == '' - assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': 'chr7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'C'}} - assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': 'chr7', 'ref': 'ACAG', 'pos': '149779574', 'alt': 'A'}} - assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': '7', 'ref': 'CAG', 'pos': '149476664', 'alt': 'C'}} - assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': '7', 'ref': 'ACAG', 'pos': '149779574', 'alt': 'A'}} - assert results['NM_198455.2:c.1116_1118=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} - + assert results['NM_198455.2:c.1116_1118=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372=)', 'slr': 'NP_940857.2:p.(D372=)'} + assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1116_1118=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_198455.2:c.1116_1118=']['alt_genomic_loci'], []) + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': 'chr7', 'pos': '149476664', 'ref': 'CAG', 'alt': 'C'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': 'chr7', 'pos': '149779574', 'ref': 'ACAG', 'alt': 'A'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476665_149476666del', 'vcf': {'chr': '7', 'pos': '149476664', 'ref': 'CAG', 'alt': 'C'}} + assert results['NM_198455.2:c.1116_1118=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779576_149779578del', 'vcf': {'chr': '7', 'pos': '149779574', 'ref': 'ACAG', 'alt': 'A'}} + assert results['NM_198455.2:c.1116_1118=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2'} def test_variant111(self): variant = 'NC_000007.14:g.149779577del' @@ -3551,118 +3602,118 @@ def test_variant111(self): assert results['flag'] == 'gene_variant' assert 'NM_198455.2:c.1115_1116dup' in list(results.keys()) - assert results['NM_198455.2:c.1115_1116dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_198455.2:c.1115_1116dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_198455.2:c.1115_1116dup']['alt_genomic_loci'], []) - assert results['NM_198455.2:c.1115_1116dup']['gene_symbol'] == 'SSPO' - assert results['NM_198455.2:c.1115_1116dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Pro373ThrfsTer6)', 'slr': 'NP_940857.2:p.(P373Tfs*6)'} assert results['NM_198455.2:c.1115_1116dup']['submitted_variant'] == 'NC_000007.14:g.149779577del' - assert results['NM_198455.2:c.1115_1116dup']['genome_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1115_1116dup']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1115_1116dup']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1115_1116dup']['gene_ids'] == {'hgnc_id': 'HGNC:21998', 'entrez_gene_id': '23145', 'ucsc_id': 'uc064jau.1', 'omim_id': ['617356']} assert results['NM_198455.2:c.1115_1116dup']['hgvs_transcript_variant'] == 'NM_198455.2:c.1115_1116dup' + assert results['NM_198455.2:c.1115_1116dup']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1115_1116dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1115_1116dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476666G>C', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '149476666', 'alt': 'C'}} - assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779577del', 'vcf': {'chr': 'chr7', 'ref': 'AG', 'pos': '149779576', 'alt': 'A'}} - assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476666G>C', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '149476666', 'alt': 'C'}} - assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779577del', 'vcf': {'chr': '7', 'ref': 'AG', 'pos': '149779576', 'alt': 'A'}} - assert results['NM_198455.2:c.1115_1116dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} - + assert results['NM_198455.2:c.1115_1116dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Pro373ThrfsTer6)', 'slr': 'NP_940857.2:p.(P373Tfs*6)'} + assert results['NM_198455.2:c.1115_1116dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1115_1116dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_198455.2:c.1115_1116dup']['alt_genomic_loci'], []) + assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476666G>C', 'vcf': {'chr': 'chr7', 'pos': '149476666', 'ref': 'G', 'alt': 'C'}} + assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779577del', 'vcf': {'chr': 'chr7', 'pos': '149779576', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476666G>C', 'vcf': {'chr': '7', 'pos': '149476666', 'ref': 'G', 'alt': 'C'}} + assert results['NM_198455.2:c.1115_1116dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779577del', 'vcf': {'chr': '7', 'pos': '149779576', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_198455.2:c.1115_1116dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2'} def test_variant112(self): variant = 'NC_000007.14:g.149779573_149779579del' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_198455.2:c.1114_1117del' in list(results.keys()) - assert results['NM_198455.2:c.1114_1117del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_198455.2:c.1114_1117del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_198455.2:c.1114_1117del']['alt_genomic_loci'], []) - assert results['NM_198455.2:c.1114_1117del']['gene_symbol'] == 'SSPO' - assert results['NM_198455.2:c.1114_1117del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372ProfsTer5)', 'slr': 'NP_940857.2:p.(D372Pfs*5)'} assert results['NM_198455.2:c.1114_1117del']['submitted_variant'] == 'NC_000007.14:g.149779573_149779579del' - assert results['NM_198455.2:c.1114_1117del']['genome_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1114_1117del']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1114_1117del']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1114_1117del']['gene_ids'] == {'hgnc_id': 'HGNC:21998', 'entrez_gene_id': '23145', 'ucsc_id': 'uc064jau.1', 'omim_id': ['617356']} assert results['NM_198455.2:c.1114_1117del']['hgvs_transcript_variant'] == 'NM_198455.2:c.1114_1117del' + assert results['NM_198455.2:c.1114_1117del']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1114_1117del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1114_1117del']['hgvs_refseqgene_variant'] == '' - assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667del', 'vcf': {'chr': 'chr7', 'ref': 'TGACAGC', 'pos': '149476661', 'alt': 'T'}} - assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579del', 'vcf': {'chr': 'chr7', 'ref': 'TGACAGCC', 'pos': '149779572', 'alt': 'T'}} - assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667del', 'vcf': {'chr': '7', 'ref': 'TGACAGC', 'pos': '149476661', 'alt': 'T'}} - assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579del', 'vcf': {'chr': '7', 'ref': 'TGACAGCC', 'pos': '149779572', 'alt': 'T'}} - assert results['NM_198455.2:c.1114_1117del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} - - assert results['flag'] == 'gene_variant' + assert results['NM_198455.2:c.1114_1117del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372ProfsTer5)', 'slr': 'NP_940857.2:p.(D372Pfs*5)'} + assert results['NM_198455.2:c.1114_1117del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1114_1117del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_198455.2:c.1114_1117del']['alt_genomic_loci'], []) + assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667del', 'vcf': {'chr': 'chr7', 'pos': '149476661', 'ref': 'TGACAGC', 'alt': 'T'}} + assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579del', 'vcf': {'chr': 'chr7', 'pos': '149779572', 'ref': 'TGACAGCC', 'alt': 'T'}} + assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667del', 'vcf': {'chr': '7', 'pos': '149476661', 'ref': 'TGACAGC', 'alt': 'T'}} + assert results['NM_198455.2:c.1114_1117del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579del', 'vcf': {'chr': '7', 'pos': '149779572', 'ref': 'TGACAGCC', 'alt': 'T'}} + assert results['NM_198455.2:c.1114_1117del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2'} def test_variant113(self): variant = 'NC_000007.14:g.149779573_149779579delinsCA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_198455.2:c.1114_1117delinsCA' in list(results.keys()) - assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_198455.2:c.1114_1117delinsCA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_198455.2:c.1114_1117delinsCA']['alt_genomic_loci'], []) - assert results['NM_198455.2:c.1114_1117delinsCA']['gene_symbol'] == 'SSPO' - assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372HisfsTer17)', 'slr': 'NP_940857.2:p.(D372Hfs*17)'} assert results['NM_198455.2:c.1114_1117delinsCA']['submitted_variant'] == 'NC_000007.14:g.149779573_149779579delinsCA' - assert results['NM_198455.2:c.1114_1117delinsCA']['genome_context_intronic_sequence'] == '' - assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_lrg_variant'] == '' + assert results['NM_198455.2:c.1114_1117delinsCA']['gene_symbol'] == 'SSPO' + assert results['NM_198455.2:c.1114_1117delinsCA']['gene_ids'] == {'hgnc_id': 'HGNC:21998', 'entrez_gene_id': '23145', 'ucsc_id': 'uc064jau.1', 'omim_id': ['617356']} assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_transcript_variant'] == 'NM_198455.2:c.1114_1117delinsCA' + assert results['NM_198455.2:c.1114_1117delinsCA']['genome_context_intronic_sequence'] == '' + assert results['NM_198455.2:c.1114_1117delinsCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_refseqgene_variant'] == '' - assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667delinsCA', 'vcf': {'chr': 'chr7', 'ref': 'GACAGC', 'pos': '149476662', 'alt': 'CA'}} - assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579delinsCA', 'vcf': {'chr': 'chr7', 'ref': 'GACAGCC', 'pos': '149779573', 'alt': 'CA'}} - assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667delinsCA', 'vcf': {'chr': '7', 'ref': 'GACAGC', 'pos': '149476662', 'alt': 'CA'}} - assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579delinsCA', 'vcf': {'chr': '7', 'ref': 'GACAGCC', 'pos': '149779573', 'alt': 'CA'}} - assert results['NM_198455.2:c.1114_1117delinsCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2'} - - assert results['flag'] == 'gene_variant' + assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_940857.2:p.(Asp372HisfsTer17)', 'slr': 'NP_940857.2:p.(D372Hfs*17)'} + assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198455.2:c.1114_1117delinsCA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_198455.2:c.1114_1117delinsCA']['alt_genomic_loci'], []) + assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667delinsCA', 'vcf': {'chr': 'chr7', 'pos': '149476662', 'ref': 'GACAGC', 'alt': 'CA'}} + assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579delinsCA', 'vcf': {'chr': 'chr7', 'pos': '149779573', 'ref': 'GACAGCC', 'alt': 'CA'}} + assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.149476662_149476667delinsCA', 'vcf': {'chr': '7', 'pos': '149476662', 'ref': 'GACAGC', 'alt': 'CA'}} + assert results['NM_198455.2:c.1114_1117delinsCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.149779573_149779579delinsCA', 'vcf': {'chr': '7', 'pos': '149779573', 'ref': 'GACAGCC', 'alt': 'CA'}} + assert results['NM_198455.2:c.1114_1117delinsCA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198455.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_940857.2'} def test_variant114(self): variant = 'NM_000088.3:c.590_591inv' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.590_591inv' in list(results.keys()) - assert results['NM_000088.3:c.590_591inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590_591inv' - assert results['NM_000088.3:c.590_591inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.590_591inv']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.590_591inv']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.590_591inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Asp)', 'slr': 'NP_000079.2:p.(G197D)'} assert results['NM_000088.3:c.590_591inv']['submitted_variant'] == 'NM_000088.3:c.590_591inv' - assert results['NM_000088.3:c.590_591inv']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.590_591inv']['hgvs_lrg_variant'] == 'LRG_1:g.8639_8640inv' + assert results['NM_000088.3:c.590_591inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.590_591inv']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.590_591inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.590_591inv' + assert results['NM_000088.3:c.590_591inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.590_591inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.590_591inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8639_8640inv' - assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275362inv', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '48275361', 'alt': 'GT'}} - assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198001inv', 'vcf': {'chr': 'chr17', 'ref': 'AC', 'pos': '50198000', 'alt': 'GT'}} - assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275362inv', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '48275361', 'alt': 'GT'}} - assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198001inv', 'vcf': {'chr': '17', 'ref': 'AC', 'pos': '50198000', 'alt': 'GT'}} - assert results['NM_000088.3:c.590_591inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000088.3:c.590_591inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Asp)', 'slr': 'NP_000079.2:p.(G197D)'} + assert results['NM_000088.3:c.590_591inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.590_591inv' + assert results['NM_000088.3:c.590_591inv']['hgvs_lrg_variant'] == 'LRG_1:g.8639_8640inv' + self.assertCountEqual(results['NM_000088.3:c.590_591inv']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275362inv', 'vcf': {'chr': 'chr17', 'pos': '48275361', 'ref': 'AC', 'alt': 'GT'}} + assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198001inv', 'vcf': {'chr': 'chr17', 'pos': '50198000', 'ref': 'AC', 'alt': 'GT'}} + assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275362inv', 'vcf': {'chr': '17', 'pos': '48275361', 'ref': 'AC', 'alt': 'GT'}} + assert results['NM_000088.3:c.590_591inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198001inv', 'vcf': {'chr': '17', 'pos': '50198000', 'ref': 'AC', 'alt': 'GT'}} + assert results['NM_000088.3:c.590_591inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant115(self): variant = 'NM_024989.3:c.1778_1779inv' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_024989.3:c.1778_1779inv' in list(results.keys()) - assert results['NM_024989.3:c.1778_1779inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_024989.3:c.1778_1779inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_024989.3:c.1778_1779inv']['alt_genomic_loci'], []) - assert results['NM_024989.3:c.1778_1779inv']['gene_symbol'] == 'PGAP1' - assert results['NM_024989.3:c.1778_1779inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_079265.2:p.(Phe593Ter)', 'slr': 'NP_079265.2:p.(F593*)'} assert results['NM_024989.3:c.1778_1779inv']['submitted_variant'] == 'NM_024989.3:c.1778_1779inv' - assert results['NM_024989.3:c.1778_1779inv']['genome_context_intronic_sequence'] == '' - assert results['NM_024989.3:c.1778_1779inv']['hgvs_lrg_variant'] == '' + assert results['NM_024989.3:c.1778_1779inv']['gene_symbol'] == 'PGAP1' + assert results['NM_024989.3:c.1778_1779inv']['gene_ids'] == {'hgnc_id': 'HGNC:25712', 'entrez_gene_id': '80055', 'ucsc_id': 'uc002utw.4', 'omim_id': ['611655']} assert results['NM_024989.3:c.1778_1779inv']['hgvs_transcript_variant'] == 'NM_024989.3:c.1778_1779inv' + assert results['NM_024989.3:c.1778_1779inv']['genome_context_intronic_sequence'] == '' + assert results['NM_024989.3:c.1778_1779inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024989.3:c.1778_1779inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.197729793_197729794inv', 'vcf': {'chr': 'chr2', 'ref': 'AA', 'pos': '197729793', 'alt': 'TT'}} - assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.196865069_196865070inv', 'vcf': {'chr': 'chr2', 'ref': 'AA', 'pos': '196865069', 'alt': 'TT'}} - assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.197729793_197729794inv', 'vcf': {'chr': '2', 'ref': 'AA', 'pos': '197729793', 'alt': 'TT'}} - assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.196865069_196865070inv', 'vcf': {'chr': '2', 'ref': 'AA', 'pos': '196865069', 'alt': 'TT'}} - assert results['NM_024989.3:c.1778_1779inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_079265.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024989.3'} - - assert results['flag'] == 'gene_variant' + assert results['NM_024989.3:c.1778_1779inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_079265.2:p.(Phe593Ter)', 'slr': 'NP_079265.2:p.(F593*)'} + assert results['NM_024989.3:c.1778_1779inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024989.3:c.1778_1779inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_024989.3:c.1778_1779inv']['alt_genomic_loci'], []) + assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.197729793_197729794inv', 'vcf': {'chr': 'chr2', 'pos': '197729793', 'ref': 'AA', 'alt': 'TT'}} + assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.196865069_196865070inv', 'vcf': {'chr': 'chr2', 'pos': '196865069', 'ref': 'AA', 'alt': 'TT'}} + assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.197729793_197729794inv', 'vcf': {'chr': '2', 'pos': '197729793', 'ref': 'AA', 'alt': 'TT'}} + assert results['NM_024989.3:c.1778_1779inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.196865069_196865070inv', 'vcf': {'chr': '2', 'pos': '196865069', 'ref': 'AA', 'alt': 'TT'}} + assert results['NM_024989.3:c.1778_1779inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024989.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_079265.2'} def test_variant116(self): variant = 'NM_032815.3:c.555_556inv' @@ -3671,46 +3722,46 @@ def test_variant116(self): assert results['flag'] == 'gene_variant' assert 'NM_032815.3:c.555_556inv' in list(results.keys()) - assert results['NM_032815.3:c.555_556inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_032815.3:c.555_556inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032815.3:c.555_556inv']['alt_genomic_loci'], []) - assert results['NM_032815.3:c.555_556inv']['gene_symbol'] == 'NFATC2IP' - assert results['NM_032815.3:c.555_556inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116204.3:p.(Glu185_Glu186delinsAspTer)', 'slr': 'NP_116204.3:p.(E185_E186delinsD*)'} assert results['NM_032815.3:c.555_556inv']['submitted_variant'] == 'NM_032815.3:c.555_556inv' - assert results['NM_032815.3:c.555_556inv']['genome_context_intronic_sequence'] == '' - assert results['NM_032815.3:c.555_556inv']['hgvs_lrg_variant'] == '' + assert results['NM_032815.3:c.555_556inv']['gene_symbol'] == 'NFATC2IP' + assert results['NM_032815.3:c.555_556inv']['gene_ids'] == {'hgnc_id': 'HGNC:25906', 'entrez_gene_id': '84901', 'ucsc_id': 'uc002dru.4', 'omim_id': ['614525']} assert results['NM_032815.3:c.555_556inv']['hgvs_transcript_variant'] == 'NM_032815.3:c.555_556inv' + assert results['NM_032815.3:c.555_556inv']['genome_context_intronic_sequence'] == '' + assert results['NM_032815.3:c.555_556inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032815.3:c.555_556inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.28965980_28965981inv', 'vcf': {'chr': 'chr16', 'ref': 'AG', 'pos': '28965980', 'alt': 'CT'}} - assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.28954659_28954660inv', 'vcf': {'chr': 'chr16', 'ref': 'AG', 'pos': '28954659', 'alt': 'CT'}} - assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.28965980_28965981inv', 'vcf': {'chr': '16', 'ref': 'AG', 'pos': '28965980', 'alt': 'CT'}} - assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.28954659_28954660inv', 'vcf': {'chr': '16', 'ref': 'AG', 'pos': '28954659', 'alt': 'CT'}} - assert results['NM_032815.3:c.555_556inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116204.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032815.3'} - + assert results['NM_032815.3:c.555_556inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116204.3:p.(Glu185_Glu186delinsAspTer)', 'slr': 'NP_116204.3:p.(E185_E186delinsD*)'} + assert results['NM_032815.3:c.555_556inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_032815.3:c.555_556inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_032815.3:c.555_556inv']['alt_genomic_loci'], []) + assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.28965980_28965981inv', 'vcf': {'chr': 'chr16', 'pos': '28965980', 'ref': 'AG', 'alt': 'CT'}} + assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.28954659_28954660inv', 'vcf': {'chr': 'chr16', 'pos': '28954659', 'ref': 'AG', 'alt': 'CT'}} + assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.28965980_28965981inv', 'vcf': {'chr': '16', 'pos': '28965980', 'ref': 'AG', 'alt': 'CT'}} + assert results['NM_032815.3:c.555_556inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.28954659_28954660inv', 'vcf': {'chr': '16', 'pos': '28954659', 'ref': 'AG', 'alt': 'CT'}} + assert results['NM_032815.3:c.555_556inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032815.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116204.3'} def test_variant117(self): variant = 'NM_006138.4:c.3_4inv' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_006138.4:c.3_4inv' in list(results.keys()) - assert results['NM_006138.4:c.3_4inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_006138.4:c.3_4inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_006138.4:c.3_4inv']['alt_genomic_loci'], []) - assert results['NM_006138.4:c.3_4inv']['gene_symbol'] == 'MS4A3' - assert results['NM_006138.4:c.3_4inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006129.4:p.(Met1?)', 'slr': 'NP_006129.4:p.(M1?)'} assert results['NM_006138.4:c.3_4inv']['submitted_variant'] == 'NM_006138.4:c.3_4inv' - assert results['NM_006138.4:c.3_4inv']['genome_context_intronic_sequence'] == '' - assert results['NM_006138.4:c.3_4inv']['hgvs_lrg_variant'] == '' + assert results['NM_006138.4:c.3_4inv']['gene_symbol'] == 'MS4A3' + assert results['NM_006138.4:c.3_4inv']['gene_ids'] == {'hgnc_id': 'HGNC:7317', 'entrez_gene_id': '932', 'ucsc_id': 'uc001nom.4', 'omim_id': ['606498']} assert results['NM_006138.4:c.3_4inv']['hgvs_transcript_variant'] == 'NM_006138.4:c.3_4inv' + assert results['NM_006138.4:c.3_4inv']['genome_context_intronic_sequence'] == '' + assert results['NM_006138.4:c.3_4inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006138.4:c.3_4inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.59828636_59828637inv', 'vcf': {'chr': 'chr11', 'ref': 'GG', 'pos': '59828636', 'alt': 'CC'}} - assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.60061163_60061164inv', 'vcf': {'chr': 'chr11', 'ref': 'GG', 'pos': '60061163', 'alt': 'CC'}} - assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.59828636_59828637inv', 'vcf': {'chr': '11', 'ref': 'GG', 'pos': '59828636', 'alt': 'CC'}} - assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.60061163_60061164inv', 'vcf': {'chr': '11', 'ref': 'GG', 'pos': '60061163', 'alt': 'CC'}} - assert results['NM_006138.4:c.3_4inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006129.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006138.4'} - - assert results['flag'] == 'gene_variant' + assert results['NM_006138.4:c.3_4inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006129.4:p.(Met1?)', 'slr': 'NP_006129.4:p.(M1?)'} + assert results['NM_006138.4:c.3_4inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006138.4:c.3_4inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_006138.4:c.3_4inv']['alt_genomic_loci'], []) + assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.59828636_59828637inv', 'vcf': {'chr': 'chr11', 'pos': '59828636', 'ref': 'GG', 'alt': 'CC'}} + assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.60061163_60061164inv', 'vcf': {'chr': 'chr11', 'pos': '60061163', 'ref': 'GG', 'alt': 'CC'}} + assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.59828636_59828637inv', 'vcf': {'chr': '11', 'pos': '59828636', 'ref': 'GG', 'alt': 'CC'}} + assert results['NM_006138.4:c.3_4inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.60061163_60061164inv', 'vcf': {'chr': '11', 'pos': '60061163', 'ref': 'GG', 'alt': 'CC'}} + assert results['NM_006138.4:c.3_4inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006138.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006129.4'} def test_variant118(self): variant = 'NM_000038.5:c.3927_3928delAAinsTT' @@ -3719,22 +3770,22 @@ def test_variant118(self): assert results['flag'] == 'gene_variant' assert 'NM_000038.5:c.3927_3928inv' in list(results.keys()) - assert results['NM_000038.5:c.3927_3928inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000038.5:c.3927_3928inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000038.5:c.3927_3928inv']['alt_genomic_loci'], []) - assert results['NM_000038.5:c.3927_3928inv']['gene_symbol'] == 'APC' - assert results['NM_000038.5:c.3927_3928inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000029.2(LRG_130p1):p.(Glu1309_Lys1310delinsAspTer)', 'slr': 'NP_000029.2:p.(E1309_K1310delinsD*)'} assert results['NM_000038.5:c.3927_3928inv']['submitted_variant'] == 'NM_000038.5:c.3927_3928delAAinsTT' - assert results['NM_000038.5:c.3927_3928inv']['genome_context_intronic_sequence'] == '' - assert results['NM_000038.5:c.3927_3928inv']['hgvs_lrg_variant'] == '' + assert results['NM_000038.5:c.3927_3928inv']['gene_symbol'] == 'APC' + assert results['NM_000038.5:c.3927_3928inv']['gene_ids'] == {'hgnc_id': 'HGNC:583', 'entrez_gene_id': '324', 'ucsc_id': 'uc003kpy.5', 'omim_id': ['611731']} assert results['NM_000038.5:c.3927_3928inv']['hgvs_transcript_variant'] == 'NM_000038.5:c.3927_3928inv' + assert results['NM_000038.5:c.3927_3928inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000038.5:c.3927_3928inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000038.5:c.3927_3928inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.112175218_112175219inv', 'vcf': {'chr': 'chr5', 'ref': 'AA', 'pos': '112175218', 'alt': 'TT'}} - assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.112839521_112839522inv', 'vcf': {'chr': 'chr5', 'ref': 'AA', 'pos': '112839521', 'alt': 'TT'}} - assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.112175218_112175219inv', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '112175218', 'alt': 'TT'}} - assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.112839521_112839522inv', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '112839521', 'alt': 'TT'}} - assert results['NM_000038.5:c.3927_3928inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000029.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000038.5'} - + assert results['NM_000038.5:c.3927_3928inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000029.2(LRG_130p1):p.(Glu1309_Lys1310delinsAspTer)', 'slr': 'NP_000029.2:p.(E1309_K1310delinsD*)'} + assert results['NM_000038.5:c.3927_3928inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000038.5:c.3927_3928inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000038.5:c.3927_3928inv']['alt_genomic_loci'], []) + assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.112175218_112175219inv', 'vcf': {'chr': 'chr5', 'pos': '112175218', 'ref': 'AA', 'alt': 'TT'}} + assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.112839521_112839522inv', 'vcf': {'chr': 'chr5', 'pos': '112839521', 'ref': 'AA', 'alt': 'TT'}} + assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.112175218_112175219inv', 'vcf': {'chr': '5', 'pos': '112175218', 'ref': 'AA', 'alt': 'TT'}} + assert results['NM_000038.5:c.3927_3928inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.112839521_112839522inv', 'vcf': {'chr': '5', 'pos': '112839521', 'ref': 'AA', 'alt': 'TT'}} + assert results['NM_000038.5:c.3927_3928inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000038.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000029.2'} def test_variant119(self): variant = 'NM_001034853.1:c.2847_2848delAGinsCT' @@ -3743,46 +3794,46 @@ def test_variant119(self): assert results['flag'] == 'gene_variant' assert 'NM_001034853.1:c.2847_2848inv' in list(results.keys()) - assert results['NM_001034853.1:c.2847_2848inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001034853.1:c.2847_2848inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001034853.1:c.2847_2848inv']['alt_genomic_loci'], []) - assert results['NM_001034853.1:c.2847_2848inv']['gene_symbol'] == 'RPGR' - assert results['NM_001034853.1:c.2847_2848inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001030025.1:p.(Glu949_Glu950delinsAspTer)', 'slr': 'NP_001030025.1:p.(E949_E950delinsD*)'} assert results['NM_001034853.1:c.2847_2848inv']['submitted_variant'] == 'NM_001034853.1:c.2847_2848delAGinsCT' - assert results['NM_001034853.1:c.2847_2848inv']['genome_context_intronic_sequence'] == '' - assert results['NM_001034853.1:c.2847_2848inv']['hgvs_lrg_variant'] == '' + assert results['NM_001034853.1:c.2847_2848inv']['gene_symbol'] == 'RPGR' + assert results['NM_001034853.1:c.2847_2848inv']['gene_ids'] == {'hgnc_id': 'HGNC:10295', 'entrez_gene_id': '6103', 'ucsc_id': 'uc004deb.4', 'omim_id': ['312610']} assert results['NM_001034853.1:c.2847_2848inv']['hgvs_transcript_variant'] == 'NM_001034853.1:c.2847_2848inv' + assert results['NM_001034853.1:c.2847_2848inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001034853.1:c.2847_2848inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001034853.1:c.2847_2848inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.38145404_38145405inv', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '38145404', 'alt': 'AG'}} - assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.38286151_38286152inv', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '38286151', 'alt': 'AG'}} - assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.38145404_38145405inv', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '38145404', 'alt': 'AG'}} - assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.38286151_38286152inv', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '38286151', 'alt': 'AG'}} - assert results['NM_001034853.1:c.2847_2848inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001030025.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001034853.1'} - + assert results['NM_001034853.1:c.2847_2848inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001030025.1:p.(Glu949_Glu950delinsAspTer)', 'slr': 'NP_001030025.1:p.(E949_E950delinsD*)'} + assert results['NM_001034853.1:c.2847_2848inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001034853.1:c.2847_2848inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001034853.1:c.2847_2848inv']['alt_genomic_loci'], []) + assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.38145404_38145405inv', 'vcf': {'chr': 'chrX', 'pos': '38145404', 'ref': 'CT', 'alt': 'AG'}} + assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.38286151_38286152inv', 'vcf': {'chr': 'chrX', 'pos': '38286151', 'ref': 'CT', 'alt': 'AG'}} + assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.38145404_38145405inv', 'vcf': {'chr': 'X', 'pos': '38145404', 'ref': 'CT', 'alt': 'AG'}} + assert results['NM_001034853.1:c.2847_2848inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.38286151_38286152inv', 'vcf': {'chr': 'X', 'pos': '38286151', 'ref': 'CT', 'alt': 'AG'}} + assert results['NM_001034853.1:c.2847_2848inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001034853.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001030025.1'} def test_variant120(self): variant = 'NM_000088.3:c.4392_*2inv' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.4394_4395inv' in list(results.keys()) - assert results['NM_000088.3:c.4394_4395inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4394_4395inv' - assert results['NM_000088.3:c.4394_4395inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.4394_4395inv']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.4394_4395inv']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.4394_4395inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ter1465PheextTer27)', 'slr': 'NP_000079.2:p.(*1465Fext*27)'} assert results['NM_000088.3:c.4394_4395inv']['submitted_variant'] == 'NM_000088.3:c.4392_*2inv' - assert results['NM_000088.3:c.4394_4395inv']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.4394_4395inv']['hgvs_lrg_variant'] == 'LRG_1:g.21137_21138inv' + assert results['NM_000088.3:c.4394_4395inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.4394_4395inv']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.4394_4395inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.4394_4395inv' + assert results['NM_000088.3:c.4394_4395inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4394_4395inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.4394_4395inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.21137_21138inv' - assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262863_48262864inv', 'vcf': {'chr': 'chr17', 'ref': 'TT', 'pos': '48262863', 'alt': 'AA'}} - assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185502_50185503inv', 'vcf': {'chr': 'chr17', 'ref': 'TT', 'pos': '50185502', 'alt': 'AA'}} - assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262863_48262864inv', 'vcf': {'chr': '17', 'ref': 'TT', 'pos': '48262863', 'alt': 'AA'}} - assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185502_50185503inv', 'vcf': {'chr': '17', 'ref': 'TT', 'pos': '50185502', 'alt': 'AA'}} - assert results['NM_000088.3:c.4394_4395inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000088.3:c.4394_4395inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ter1465PheextTer27)', 'slr': 'NP_000079.2:p.(*1465Fext*27)'} + assert results['NM_000088.3:c.4394_4395inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4394_4395inv' + assert results['NM_000088.3:c.4394_4395inv']['hgvs_lrg_variant'] == 'LRG_1:g.21137_21138inv' + self.assertCountEqual(results['NM_000088.3:c.4394_4395inv']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262863_48262864inv', 'vcf': {'chr': 'chr17', 'pos': '48262863', 'ref': 'TT', 'alt': 'AA'}} + assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185502_50185503inv', 'vcf': {'chr': 'chr17', 'pos': '50185502', 'ref': 'TT', 'alt': 'AA'}} + assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262863_48262864inv', 'vcf': {'chr': '17', 'pos': '48262863', 'ref': 'TT', 'alt': 'AA'}} + assert results['NM_000088.3:c.4394_4395inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185502_50185503inv', 'vcf': {'chr': '17', 'pos': '50185502', 'ref': 'TT', 'alt': 'AA'}} + assert results['NM_000088.3:c.4394_4395inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant121(self): variant = 'NM_000088.3:c.4392_*5inv' @@ -3791,22 +3842,22 @@ def test_variant121(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.4392_*5inv' in list(results.keys()) - assert results['NM_000088.3:c.4392_*5inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4393_*4inv' - assert results['NM_000088.3:c.4392_*5inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.4392_*5inv']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.4392_*5inv']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.4392_*5inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.4392_*5inv']['submitted_variant'] == 'NM_000088.3:c.4392_*5inv' - assert results['NM_000088.3:c.4392_*5inv']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.4392_*5inv']['hgvs_lrg_variant'] == 'LRG_1:g.21136_21142inv' + assert results['NM_000088.3:c.4392_*5inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.4392_*5inv']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.4392_*5inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.4392_*5inv' + assert results['NM_000088.3:c.4392_*5inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4392_*5inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.4392_*5inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.21136_21142inv' - assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262859_48262865inv', 'vcf': {'chr': 'chr17', 'ref': 'GAGTTTA', 'pos': '48262859', 'alt': 'TAAACTC'}} - assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185498_50185504inv', 'vcf': {'chr': 'chr17', 'ref': 'GAGTTTA', 'pos': '50185498', 'alt': 'TAAACTC'}} - assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262859_48262865inv', 'vcf': {'chr': '17', 'ref': 'GAGTTTA', 'pos': '48262859', 'alt': 'TAAACTC'}} - assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185498_50185504inv', 'vcf': {'chr': '17', 'ref': 'GAGTTTA', 'pos': '50185498', 'alt': 'TAAACTC'}} - assert results['NM_000088.3:c.4392_*5inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.4392_*5inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.4392_*5inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4393_*4inv' + assert results['NM_000088.3:c.4392_*5inv']['hgvs_lrg_variant'] == 'LRG_1:g.21136_21142inv' + self.assertCountEqual(results['NM_000088.3:c.4392_*5inv']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262859_48262865inv', 'vcf': {'chr': 'chr17', 'pos': '48262859', 'ref': 'GAGTTTA', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185498_50185504inv', 'vcf': {'chr': 'chr17', 'pos': '50185498', 'ref': 'GAGTTTA', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262859_48262865inv', 'vcf': {'chr': '17', 'pos': '48262859', 'ref': 'GAGTTTA', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185498_50185504inv', 'vcf': {'chr': '17', 'pos': '50185498', 'ref': 'GAGTTTA', 'alt': 'TAAACTC'}} + assert results['NM_000088.3:c.4392_*5inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant122(self): variant = 'NM_000088.3:c.4390_*7inv' @@ -3815,95 +3866,95 @@ def test_variant122(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.4390_*7inv' in list(results.keys()) - assert results['NM_000088.3:c.4390_*7inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4390_*7inv' - assert results['NM_000088.3:c.4390_*7inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.4390_*7inv']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.4390_*7inv']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.4390_*7inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.4390_*7inv']['submitted_variant'] == 'NM_000088.3:c.4390_*7inv' - assert results['NM_000088.3:c.4390_*7inv']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.4390_*7inv']['hgvs_lrg_variant'] == 'LRG_1:g.21133_21145inv' + assert results['NM_000088.3:c.4390_*7inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.4390_*7inv']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.4390_*7inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.4390_*7inv' + assert results['NM_000088.3:c.4390_*7inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4390_*7inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.4390_*7inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.21133_21145inv' - assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262856_48262868inv', 'vcf': {'chr': 'chr17', 'ref': 'AGGGAGTTTACAG', 'pos': '48262856', 'alt': 'CTGTAAACTCCCT'}} - assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185495_50185507inv', 'vcf': {'chr': 'chr17', 'ref': 'AGGGAGTTTACAG', 'pos': '50185495', 'alt': 'CTGTAAACTCCCT'}} - assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262856_48262868inv', 'vcf': {'chr': '17', 'ref': 'AGGGAGTTTACAG', 'pos': '48262856', 'alt': 'CTGTAAACTCCCT'}} - assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185495_50185507inv', 'vcf': {'chr': '17', 'ref': 'AGGGAGTTTACAG', 'pos': '50185495', 'alt': 'CTGTAAACTCCCT'}} - assert results['NM_000088.3:c.4390_*7inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.4390_*7inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.4390_*7inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4390_*7inv' + assert results['NM_000088.3:c.4390_*7inv']['hgvs_lrg_variant'] == 'LRG_1:g.21133_21145inv' + self.assertCountEqual(results['NM_000088.3:c.4390_*7inv']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262856_48262868inv', 'vcf': {'chr': 'chr17', 'pos': '48262856', 'ref': 'AGGGAGTTTACAG', 'alt': 'CTGTAAACTCCCT'}} + assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185495_50185507inv', 'vcf': {'chr': 'chr17', 'pos': '50185495', 'ref': 'AGGGAGTTTACAG', 'alt': 'CTGTAAACTCCCT'}} + assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262856_48262868inv', 'vcf': {'chr': '17', 'pos': '48262856', 'ref': 'AGGGAGTTTACAG', 'alt': 'CTGTAAACTCCCT'}} + assert results['NM_000088.3:c.4390_*7inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185495_50185507inv', 'vcf': {'chr': '17', 'pos': '50185495', 'ref': 'AGGGAGTTTACAG', 'alt': 'CTGTAAACTCCCT'}} + assert results['NM_000088.3:c.4390_*7inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant123(self): variant = 'NM_005732.3:c.2923-5insT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_005732.3:c.2923-5insT' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant124(self): variant = 'NM_198283.1(EYS):c.*743120C>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_198283.1(EYS):c.*743120C>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant125(self): variant = 'NM_133379.4(TTN):c.*265+26591C>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_133379.4(TTN):c.*265+26591C>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant126(self): variant = 'NM_000088.3:c.589-2_589-1AG>G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -3911,22 +3962,22 @@ def test_variant126(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589-2del' in list(results.keys()) - assert results['NM_000088.3:c.589-2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' - assert results['NM_000088.3:c.589-2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' - self.assertCountEqual(results['NM_000088.3:c.589-2del']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-2del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-2del']['submitted_variant'] == 'NM_000088.3:c.589-2_589-1AG>G' - assert results['NM_000088.3:c.589-2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2del' - assert results['NM_000088.3:c.589-2del']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + assert results['NM_000088.3:c.589-2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-2del']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589-2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-2del' + assert results['NM_000088.3:c.589-2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-2del' + assert results['NM_000088.3:c.589-2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-2del' assert results['NM_000088.3:c.589-2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8636del' - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': 'chr17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '48275364', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': '17', 'ref': 'CT', 'pos': '50198003', 'alt': 'C'}} - assert results['NM_000088.3:c.589-2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589-2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-2del' + assert results['NM_000088.3:c.589-2del']['hgvs_lrg_variant'] == 'LRG_1:g.8636del' + self.assertCountEqual(results['NM_000088.3:c.589-2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': 'chr17', 'pos': '48275364', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': 'chr17', 'pos': '50198003', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275366del', 'vcf': {'chr': '17', 'pos': '48275364', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198005del', 'vcf': {'chr': '17', 'pos': '50198003', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000088.3:c.589-2del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant127(self): variant = 'NM_000088.3:c.642+1_642+2delGTinsG' @@ -3935,47 +3986,47 @@ def test_variant127(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.642+2del' in list(results.keys()) - assert results['NM_000088.3:c.642+2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' - assert results['NM_000088.3:c.642+2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' - self.assertCountEqual(results['NM_000088.3:c.642+2del']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.642+2del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.642+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.642+2del']['submitted_variant'] == 'NM_000088.3:c.642+1_642+2delGTinsG' - assert results['NM_000088.3:c.642+2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+2del' - assert results['NM_000088.3:c.642+2del']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + assert results['NM_000088.3:c.642+2del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.642+2del']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.642+2del']['hgvs_transcript_variant'] == 'NM_000088.3:c.642+2del' + assert results['NM_000088.3:c.642+2del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.642+2del' + assert results['NM_000088.3:c.642+2del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.642+2del' assert results['NM_000088.3:c.642+2del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8693del' - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': 'chr17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '48275307', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': '17', 'ref': 'TA', 'pos': '50197946', 'alt': 'T'}} - assert results['NM_000088.3:c.642+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.642+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.642+2del' + assert results['NM_000088.3:c.642+2del']['hgvs_lrg_variant'] == 'LRG_1:g.8693del' + self.assertCountEqual(results['NM_000088.3:c.642+2del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': 'chr17', 'pos': '48275307', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': 'chr17', 'pos': '50197946', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275308del', 'vcf': {'chr': '17', 'pos': '48275307', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197947del', 'vcf': {'chr': '17', 'pos': '50197946', 'ref': 'TA', 'alt': 'T'}} + assert results['NM_000088.3:c.642+2del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant128(self): variant = 'NM_004415.3:c.1-1insA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_004415.3:c.1-1insA' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant129(self): variant = 'NM_004415.3:c.-1_1insA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -3983,47 +4034,47 @@ def test_variant129(self): assert results['flag'] == 'gene_variant' assert 'NM_004415.3:c.-1_1insA' in list(results.keys()) - assert results['NM_004415.3:c.-1_1insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_004415.3:c.-1_1insA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004415.3:c.-1_1insA']['alt_genomic_loci'], []) - assert results['NM_004415.3:c.-1_1insA']['gene_symbol'] == 'DSP' - assert results['NM_004415.3:c.-1_1insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004406.2(LRG_423p1):p.(Met1?)', 'slr': 'NP_004406.2:p.(M1?)'} assert results['NM_004415.3:c.-1_1insA']['submitted_variant'] == 'NM_004415.3:c.-1_1insA' - assert results['NM_004415.3:c.-1_1insA']['genome_context_intronic_sequence'] == '' - assert results['NM_004415.3:c.-1_1insA']['hgvs_lrg_variant'] == '' + assert results['NM_004415.3:c.-1_1insA']['gene_symbol'] == 'DSP' + assert results['NM_004415.3:c.-1_1insA']['gene_ids'] == {'hgnc_id': 'HGNC:3052', 'entrez_gene_id': '1832', 'ucsc_id': 'uc003mxp.2', 'omim_id': ['125647']} assert results['NM_004415.3:c.-1_1insA']['hgvs_transcript_variant'] == 'NM_004415.3:c.-1_1insA' + assert results['NM_004415.3:c.-1_1insA']['genome_context_intronic_sequence'] == '' + assert results['NM_004415.3:c.-1_1insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004415.3:c.-1_1insA']['hgvs_refseqgene_variant'] == '' - assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.7542149dup', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '7542148', 'alt': 'CA'}} - assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.7541916dup', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '7541915', 'alt': 'CA'}} - assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.7542149dup', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '7542148', 'alt': 'CA'}} - assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.7541916dup', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '7541915', 'alt': 'CA'}} - assert results['NM_004415.3:c.-1_1insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004406.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004415.3'} - + assert results['NM_004415.3:c.-1_1insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004406.2(LRG_423p1):p.(Met1?)', 'slr': 'NP_004406.2:p.(M1?)'} + assert results['NM_004415.3:c.-1_1insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004415.3:c.-1_1insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004415.3:c.-1_1insA']['alt_genomic_loci'], []) + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.7542149dup', 'vcf': {'chr': 'chr6', 'pos': '7542148', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.7541916dup', 'vcf': {'chr': 'chr6', 'pos': '7541915', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.7542149dup', 'vcf': {'chr': '6', 'pos': '7542148', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.7541916dup', 'vcf': {'chr': '6', 'pos': '7541915', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_004415.3:c.-1_1insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004415.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004406.2'} def test_variant130(self): variant = 'NM_000273.2:c.1-5028_253del' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000273.2:c.1-5028_253del' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant131(self): variant = 'NM_002929.2:c.1006C>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -4031,22 +4082,22 @@ def test_variant131(self): assert results['flag'] == 'gene_variant' assert 'NM_002929.2:c.1006C>T' in list(results.keys()) - assert results['NM_002929.2:c.1006C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_002929.2:c.1006C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_002929.2:c.1006C>T']['alt_genomic_loci'], []) - assert results['NM_002929.2:c.1006C>T']['gene_symbol'] == 'GRK1' - assert results['NM_002929.2:c.1006C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002920.1:p.(Leu336Phe)', 'slr': 'NP_002920.1:p.(L336F)'} assert results['NM_002929.2:c.1006C>T']['submitted_variant'] == 'NM_002929.2:c.1006C>T' - assert results['NM_002929.2:c.1006C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_002929.2:c.1006C>T']['hgvs_lrg_variant'] == '' + assert results['NM_002929.2:c.1006C>T']['gene_symbol'] == 'GRK1' + assert results['NM_002929.2:c.1006C>T']['gene_ids'] == {'hgnc_id': 'HGNC:10013', 'entrez_gene_id': '6011', 'ucsc_id': 'uc010tkf.3', 'omim_id': ['180381']} assert results['NM_002929.2:c.1006C>T']['hgvs_transcript_variant'] == 'NM_002929.2:c.1006C>T' + assert results['NM_002929.2:c.1006C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_002929.2:c.1006C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002929.2:c.1006C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_002929.2:c.1006C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002920.1:p.(Leu336Phe)', 'slr': 'NP_002920.1:p.(L336F)'} + assert results['NM_002929.2:c.1006C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_002929.2:c.1006C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_002929.2:c.1006C>T']['alt_genomic_loci'], []) assert 'hg19' not in list(results['NM_002929.2:c.1006C>T']['primary_assembly_loci'].keys()) - assert results['NM_002929.2:c.1006C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000013.11:g.113723094C>T', 'vcf': {'chr': 'chr13', 'ref': 'C', 'pos': '113723094', 'alt': 'T'}} + assert results['NM_002929.2:c.1006C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000013.11:g.113723094C>T', 'vcf': {'chr': 'chr13', 'pos': '113723094', 'ref': 'C', 'alt': 'T'}} assert 'grch37' not in list(results['NM_002929.2:c.1006C>T']['primary_assembly_loci'].keys()) - assert results['NM_002929.2:c.1006C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000013.11:g.113723094C>T', 'vcf': {'chr': '13', 'ref': 'C', 'pos': '113723094', 'alt': 'T'}} - assert results['NM_002929.2:c.1006C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002920.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002929.2'} - + assert results['NM_002929.2:c.1006C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000013.11:g.113723094C>T', 'vcf': {'chr': '13', 'pos': '113723094', 'ref': 'C', 'alt': 'T'}} + assert results['NM_002929.2:c.1006C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002929.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002920.1'} def test_variant132(self): variant = 'NR_125367.1:n.167+18165G>A' @@ -4055,95 +4106,95 @@ def test_variant132(self): assert results['flag'] == 'gene_variant' assert 'NR_125367.1:n.167+18165G>A' in list(results.keys()) - assert results['NR_125367.1:n.167+18165G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_125367.1:n.167+18165G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_125367.1:n.167+18165G>A']['alt_genomic_loci'], []) - assert results['NR_125367.1:n.167+18165G>A']['gene_symbol'] == 'MYHAS' - assert results['NR_125367.1:n.167+18165G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_125367.1:n.167+18165G>A']['submitted_variant'] == 'NR_125367.1:n.167+18165G>A' - assert results['NR_125367.1:n.167+18165G>A']['genome_context_intronic_sequence'] == 'NC_000017.10(NR_125367.1):c.167+18165G>A' - assert results['NR_125367.1:n.167+18165G>A']['hgvs_lrg_variant'] == '' + assert results['NR_125367.1:n.167+18165G>A']['gene_symbol'] == 'MYHAS' + assert results['NR_125367.1:n.167+18165G>A']['gene_ids'] == {'hgnc_id': 'HGNC:50609', 'entrez_gene_id': '100128560', 'ucsc_id': '', 'omim_id': []} assert results['NR_125367.1:n.167+18165G>A']['hgvs_transcript_variant'] == 'NR_125367.1:n.167+18165G>A' + assert results['NR_125367.1:n.167+18165G>A']['genome_context_intronic_sequence'] == 'NC_000017.10(NR_125367.1):c.167+18165G>A' + assert results['NR_125367.1:n.167+18165G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_125367.1:n.167+18165G>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.10327720G>A', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '10327720', 'alt': 'A'}} - assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.10424403G>A', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '10424403', 'alt': 'A'}} - assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.10327720G>A', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '10327720', 'alt': 'A'}} - assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.10424403G>A', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '10424403', 'alt': 'A'}} + assert results['NR_125367.1:n.167+18165G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_125367.1:n.167+18165G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_125367.1:n.167+18165G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_125367.1:n.167+18165G>A']['alt_genomic_loci'], []) + assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.10327720G>A', 'vcf': {'chr': 'chr17', 'pos': '10327720', 'ref': 'G', 'alt': 'A'}} + assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.10424403G>A', 'vcf': {'chr': 'chr17', 'pos': '10424403', 'ref': 'G', 'alt': 'A'}} + assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.10327720G>A', 'vcf': {'chr': '17', 'pos': '10327720', 'ref': 'G', 'alt': 'A'}} + assert results['NR_125367.1:n.167+18165G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.10424403G>A', 'vcf': {'chr': '17', 'pos': '10424403', 'ref': 'G', 'alt': 'A'}} assert results['NR_125367.1:n.167+18165G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_125367.1'} - def test_variant133(self): variant = 'NM_006005.3:c.3071_3073delinsTTA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_006005.3:c.3071_3073delinsTTA' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant134(self): variant = 'NM_000089.3:n.1504_1506del' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000089.3:n.1504_1506del' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant135(self): variant = 'NC_012920.1:m.1011C>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NC_012920.1:m.1011C>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' - assert results['validation_warning_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'chrM', 'ref': 'C', 'pos': '1011', 'alt': 'T'}} - assert results['validation_warning_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'chrM', 'ref': 'C', 'pos': '1011', 'alt': 'T'}} - assert results['validation_warning_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'M', 'ref': 'C', 'pos': '1011', 'alt': 'T'}} - assert results['validation_warning_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'M', 'ref': 'C', 'pos': '1011', 'alt': 'T'}} + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) + assert results['validation_warning_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'chrM', 'pos': '1011', 'ref': 'C', 'alt': 'T'}} + assert results['validation_warning_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'chrM', 'pos': '1011', 'ref': 'C', 'alt': 'T'}} + assert results['validation_warning_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'M', 'pos': '1011', 'ref': 'C', 'alt': 'T'}} + assert results['validation_warning_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_012920.1:m.1011C>T', 'vcf': {'chr': 'M', 'pos': '1011', 'ref': 'C', 'alt': 'T'}} assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant136(self): variant = 'NC_000006.11:g.90403795G=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -4151,183 +4202,191 @@ def test_variant136(self): assert results['flag'] == 'gene_variant' assert 'NM_014611.1:c.9879T>C' in list(results.keys()) - assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014611.1:c.9879T>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014611.1:c.9879T>C']['alt_genomic_loci'], []) - assert results['NM_014611.1:c.9879T>C']['gene_symbol'] == 'MDN1' - assert results['NM_014611.1:c.9879T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} assert results['NM_014611.1:c.9879T>C']['submitted_variant'] == 'NC_000006.11:g.90403795G=' - assert results['NM_014611.1:c.9879T>C']['genome_context_intronic_sequence'] == '' - assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_variant'] == '' + assert results['NM_014611.1:c.9879T>C']['gene_symbol'] == 'MDN1' + assert results['NM_014611.1:c.9879T>C']['gene_ids'] == {'hgnc_id': 'HGNC:18302', 'entrez_gene_id': '23195', 'ucsc_id': 'uc003pnn.2', 'omim_id': ['618200']} assert results['NM_014611.1:c.9879T>C']['hgvs_transcript_variant'] == 'NM_014611.1:c.9879T>C' + assert results['NM_014611.1:c.9879T>C']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.1:c.9879T>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014611.1:c.9879T>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert results['NM_014611.1:c.9879T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014611.1:c.9879T>C']['alt_genomic_loci'], []) + assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'pos': '90403795', 'ref': 'G', 'alt': 'G'}} assert 'hg38' not in list(results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys()) - assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'pos': '90403795', 'ref': 'G', 'alt': 'G'}} assert 'grch38' not in list(results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys()) - assert results['NM_014611.1:c.9879T>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1'} + assert results['NM_014611.1:c.9879T>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1'} assert 'NM_014611.2:c.9879C=' in list(results.keys()) - assert results['NM_014611.2:c.9879C=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014611.2:c.9879C=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014611.2:c.9879C=']['alt_genomic_loci'], []) - assert results['NM_014611.2:c.9879C=']['gene_symbol'] == 'MDN1' - assert results['NM_014611.2:c.9879C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} assert results['NM_014611.2:c.9879C=']['submitted_variant'] == 'NC_000006.11:g.90403795G=' - assert results['NM_014611.2:c.9879C=']['genome_context_intronic_sequence'] == '' - assert results['NM_014611.2:c.9879C=']['hgvs_lrg_variant'] == '' + assert results['NM_014611.2:c.9879C=']['gene_symbol'] == 'MDN1' + assert results['NM_014611.2:c.9879C=']['gene_ids'] == {'hgnc_id': 'HGNC:18302', 'entrez_gene_id': '23195', 'ucsc_id': 'uc003pnn.2', 'omim_id': ['618200']} assert results['NM_014611.2:c.9879C=']['hgvs_transcript_variant'] == 'NM_014611.2:c.9879C=' + assert results['NM_014611.2:c.9879C=']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.2:c.9879C=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014611.2:c.9879C=']['hgvs_refseqgene_variant'] == '' - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '89694076', 'alt': 'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '89694076', 'alt': 'G'}} - assert results['NM_014611.2:c.9879C=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2'} - + assert results['NM_014611.2:c.9879C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.2:c.9879C=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.2:c.9879C=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014611.2:c.9879C=']['alt_genomic_loci'], []) + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'pos': '90403795', 'ref': 'G', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': 'chr6', 'pos': '89694076', 'ref': 'G', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'pos': '90403795', 'ref': 'G', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': '6', 'pos': '89694076', 'ref': 'G', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1'} def test_variant137(self): variant = '1-169519049-T-.' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000130.4:c.1602del' in list(results.keys()) - assert results['NM_000130.4:c.1602del']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601del' - assert results['NM_000130.4:c.1602del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000130.4:c.1602del']['alt_genomic_loci'], []) - assert results['NM_000130.4:c.1602del']['gene_symbol'] == 'F5' - assert results['NM_000130.4:c.1602del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534GlnfsTer40)', 'slr': 'NP_000121.2:p.(R534Qfs*40)'} assert results['NM_000130.4:c.1602del']['submitted_variant'] == '1-169519049-T-.' - assert results['NM_000130.4:c.1602del']['genome_context_intronic_sequence'] == '' - assert results['NM_000130.4:c.1602del']['hgvs_lrg_variant'] == 'LRG_553:g.41721del' + assert results['NM_000130.4:c.1602del']['gene_symbol'] == 'F5' + assert results['NM_000130.4:c.1602del']['gene_ids'] == {'hgnc_id': 'HGNC:3542', 'entrez_gene_id': '2153', 'ucsc_id': 'uc001ggg.2', 'omim_id': ['612309']} assert results['NM_000130.4:c.1602del']['hgvs_transcript_variant'] == 'NM_000130.4:c.1602del' + assert results['NM_000130.4:c.1602del']['genome_context_intronic_sequence'] == '' + assert results['NM_000130.4:c.1602del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000130.4:c.1602del']['hgvs_refseqgene_variant'] == 'NG_011806.1:g.41721del' - assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049del', 'vcf': {'chr': 'chr1', 'ref': 'CT', 'pos': '169519047', 'alt': 'C'}} - assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549810del', 'vcf': {'chr': 'chr1', 'ref': 'CT', 'pos': '169549809', 'alt': 'C'}} - assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049del', 'vcf': {'chr': '1', 'ref': 'CT', 'pos': '169519047', 'alt': 'C'}} - assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549810del', 'vcf': {'chr': '1', 'ref': 'CT', 'pos': '169549809', 'alt': 'C'}} - assert results['NM_000130.4:c.1602del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} + assert results['NM_000130.4:c.1602del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534GlnfsTer40)', 'slr': 'NP_000121.2:p.(R534Qfs*40)'} + assert results['NM_000130.4:c.1602del']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601del' + assert results['NM_000130.4:c.1602del']['hgvs_lrg_variant'] == 'LRG_553:g.41721del' + self.assertCountEqual(results['NM_000130.4:c.1602del']['alt_genomic_loci'], []) + assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049del', 'vcf': {'chr': 'chr1', 'pos': '169519047', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549810del', 'vcf': {'chr': 'chr1', 'pos': '169549809', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049del', 'vcf': {'chr': '1', 'pos': '169519047', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000130.4:c.1602del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549810del', 'vcf': {'chr': '1', 'pos': '169549809', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_000130.4:c.1602del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_000130.4:c.1601G>A' in list(results.keys()) - assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601G>A' - assert results['NM_000130.4:c.1601G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000130.4:c.1601G>A']['alt_genomic_loci'], []) - assert results['NM_000130.4:c.1601G>A']['gene_symbol'] == 'F5' - assert results['NM_000130.4:c.1601G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534Gln)', 'slr': 'NP_000121.2:p.(R534Q)'} assert results['NM_000130.4:c.1601G>A']['submitted_variant'] == '1-169519049-T-.' - assert results['NM_000130.4:c.1601G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_variant'] == 'LRG_553:g.41721G>A' + assert results['NM_000130.4:c.1601G>A']['gene_symbol'] == 'F5' + assert results['NM_000130.4:c.1601G>A']['gene_ids'] == {'hgnc_id': 'HGNC:3542', 'entrez_gene_id': '2153', 'ucsc_id': 'uc001ggg.2', 'omim_id': ['612309']} assert results['NM_000130.4:c.1601G>A']['hgvs_transcript_variant'] == 'NM_000130.4:c.1601G>A' + assert results['NM_000130.4:c.1601G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_000130.4:c.1601G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000130.4:c.1601G>A']['hgvs_refseqgene_variant'] == 'NG_011806.1:g.41721G>A' - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '169549811', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '169549811', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} - + assert results['NM_000130.4:c.1601G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534Gln)', 'slr': 'NP_000121.2:p.(R534Q)'} + assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601G>A' + assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_variant'] == 'LRG_553:g.41721G>A' + self.assertCountEqual(results['NM_000130.4:c.1601G>A']['alt_genomic_loci'], []) + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': 'chr1', 'pos': '169519049', 'ref': 'T', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': 'chr1', 'pos': '169549811', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': '1', 'pos': '169519049', 'ref': 'T', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': '1', 'pos': '169549811', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} def test_variant138(self): variant = 'NC_000005.9:g.35058667_35058668AG=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001204317.1:c.856-9155_856-9154=' in list(results.keys()) - assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001204317.1:c.856-9155_856-9154=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001204317.1:c.856-9155_856-9154=']['alt_genomic_loci'], []) - assert results['NM_001204317.1:c.856-9155_856-9154=']['gene_symbol'] == 'PRLR' - assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191246.1:p.?', 'slr': 'NP_001191246.1:p.?'} - assert results['NM_001204317.1:c.856-9155_856-9154=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' - assert results['NM_001204317.1:c.856-9155_856-9154=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204317.1):c.856-9155_856-9154=' - assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_lrg_variant'] == '' - assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_transcript_variant'] == 'NM_001204317.1:c.856-9155_856-9154=' - assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} - assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': 'chr5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} - assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} - assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} - assert results['NM_001204317.1:c.856-9155_856-9154=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_000949.6:c.*6528del' in list(results.keys()) + assert results['NM_000949.6:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NM_000949.6:c.*6528del']['gene_symbol'] == 'PRLR' + assert results['NM_000949.6:c.*6528del']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} + assert results['NM_000949.6:c.*6528del']['hgvs_transcript_variant'] == 'NM_000949.6:c.*6528del' + assert results['NM_000949.6:c.*6528del']['genome_context_intronic_sequence'] == '' + assert results['NM_000949.6:c.*6528del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000949.6:c.*6528del']['hgvs_refseqgene_variant'] == '' + assert results['NM_000949.6:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} + assert results['NM_000949.6:c.*6528del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000949.6:c.*6528del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000949.6:c.*6528del']['alt_genomic_loci'], []) + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'pos': '35058662', 'ref': 'AGACAAG', 'alt': 'AGACAAG'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'pos': '35058560', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'pos': '35058662', 'ref': 'AGACAAG', 'alt': 'AGACAAG'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'pos': '35058560', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_000949.6:c.*6528del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.6', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1'} + + assert 'NR_037910.1:n.828-9155_828-9154=' in list(results.keys()) + assert results['NR_037910.1:n.828-9155_828-9154=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NR_037910.1:n.828-9155_828-9154=']['gene_symbol'] == 'PRLR' + assert results['NR_037910.1:n.828-9155_828-9154=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} + assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_transcript_variant'] == 'NR_037910.1:n.828-9155_828-9154=' + assert results['NR_037910.1:n.828-9155_828-9154=']['genome_context_intronic_sequence'] == 'NC_000005.9(NR_037910.1):c.828-9155_828-9154=' + assert results['NR_037910.1:n.828-9155_828-9154=']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_refseqgene_variant'] == '' + assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_037910.1:n.828-9155_828-9154=']['alt_genomic_loci'], []) + assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'pos': '35058667', 'ref': 'AG', 'alt': 'AG'}} + assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': 'chr5', 'pos': '35058562', 'ref': 'AA', 'alt': 'AA'}} + assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'pos': '35058667', 'ref': 'AG', 'alt': 'AG'}} + assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': '5', 'pos': '35058562', 'ref': 'AA', 'alt': 'AA'}} + assert results['NR_037910.1:n.828-9155_828-9154=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1'} assert 'NM_001204316.1:c.1009+7383_1009+7384=' in list(results.keys()) - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001204316.1:c.1009+7383_1009+7384=']['alt_genomic_loci'], []) - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['gene_symbol'] == 'PRLR' - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191245.1:p.?', 'slr': 'NP_001191245.1:p.?'} assert results['NM_001204316.1:c.1009+7383_1009+7384=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204316.1):c.1009+7383_1009+7384=' - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_lrg_variant'] == '' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['gene_symbol'] == 'PRLR' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_transcript_variant'] == 'NM_001204316.1:c.1009+7383_1009+7384=' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204316.1):c.1009+7383_1009+7384=' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058565_35058566=', 'vcf': {'chr': 'chr5', 'ref': 'AT', 'pos': '35058565', 'alt': 'AT'}} - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058565_35058566=', 'vcf': {'chr': '5', 'ref': 'AT', 'pos': '35058565', 'alt': 'AT'}} - assert results['NM_001204316.1:c.1009+7383_1009+7384=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1'} + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191245.1:p.?', 'slr': 'NP_001191245.1:p.?'} + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001204316.1:c.1009+7383_1009+7384=']['alt_genomic_loci'], []) + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'pos': '35058667', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058565_35058566=', 'vcf': {'chr': 'chr5', 'pos': '35058565', 'ref': 'AT', 'alt': 'AT'}} + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'pos': '35058667', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058565_35058566=', 'vcf': {'chr': '5', 'pos': '35058565', 'ref': 'AT', 'alt': 'AT'}} + assert results['NM_001204316.1:c.1009+7383_1009+7384=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1'} - assert 'NM_001204314.2:c.*6528del' in list(results.keys()) - assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001204314.2:c.*6528del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001204314.2:c.*6528del']['alt_genomic_loci'], []) - assert results['NM_001204314.2:c.*6528del']['gene_symbol'] == 'PRLR' - assert results['NM_001204314.2:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} - assert results['NM_001204314.2:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' - assert results['NM_001204314.2:c.*6528del']['genome_context_intronic_sequence'] == '' - assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_variant'] == '' - assert results['NM_001204314.2:c.*6528del']['hgvs_transcript_variant'] == 'NM_001204314.2:c.*6528del' - assert results['NM_001204314.2:c.*6528del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} - assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} - assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} - assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} - assert results['NM_001204314.2:c.*6528del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2'} + assert 'NM_001204317.1:c.856-9155_856-9154=' in list(results.keys()) + assert results['NM_001204317.1:c.856-9155_856-9154=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NM_001204317.1:c.856-9155_856-9154=']['gene_symbol'] == 'PRLR' + assert results['NM_001204317.1:c.856-9155_856-9154=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} + assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_transcript_variant'] == 'NM_001204317.1:c.856-9155_856-9154=' + assert results['NM_001204317.1:c.856-9155_856-9154=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204317.1):c.856-9155_856-9154=' + assert results['NM_001204317.1:c.856-9155_856-9154=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191246.1:p.?', 'slr': 'NP_001191246.1:p.?'} + assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204317.1:c.856-9155_856-9154=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001204317.1:c.856-9155_856-9154=']['alt_genomic_loci'], []) + assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'pos': '35058667', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': 'chr5', 'pos': '35058562', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'pos': '35058667', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001204317.1:c.856-9155_856-9154=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': '5', 'pos': '35058562', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_001204317.1:c.856-9155_856-9154=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1'} assert 'NM_001204318.1:c.686-9155_686-9154=' in list(results.keys()) - assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001204318.1:c.686-9155_686-9154=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001204318.1:c.686-9155_686-9154=']['alt_genomic_loci'], []) - assert results['NM_001204318.1:c.686-9155_686-9154=']['gene_symbol'] == 'PRLR' - assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191247.1:p.?', 'slr': 'NP_001191247.1:p.?'} assert results['NM_001204318.1:c.686-9155_686-9154=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' - assert results['NM_001204318.1:c.686-9155_686-9154=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204318.1):c.686-9155_686-9154=' - assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_lrg_variant'] == '' + assert results['NM_001204318.1:c.686-9155_686-9154=']['gene_symbol'] == 'PRLR' + assert results['NM_001204318.1:c.686-9155_686-9154=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_transcript_variant'] == 'NM_001204318.1:c.686-9155_686-9154=' + assert results['NM_001204318.1:c.686-9155_686-9154=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204318.1):c.686-9155_686-9154=' + assert results['NM_001204318.1:c.686-9155_686-9154=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} - assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': 'chr5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} - assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} - assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} - assert results['NM_001204318.1:c.686-9155_686-9154=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1'} - - assert 'NR_037910.1:n.828-9155_828-9154=' in list(results.keys()) - assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_037910.1:n.828-9155_828-9154=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_037910.1:n.828-9155_828-9154=']['alt_genomic_loci'], []) - assert results['NR_037910.1:n.828-9155_828-9154=']['gene_symbol'] == 'PRLR' - assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_037910.1:n.828-9155_828-9154=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' - assert results['NR_037910.1:n.828-9155_828-9154=']['genome_context_intronic_sequence'] == 'NC_000005.9(NR_037910.1):c.828-9155_828-9154=' - assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_lrg_variant'] == '' - assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_transcript_variant'] == 'NR_037910.1:n.828-9155_828-9154=' - assert results['NR_037910.1:n.828-9155_828-9154=']['hgvs_refseqgene_variant'] == '' - assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} - assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': 'chr5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} - assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} - assert results['NR_037910.1:n.828-9155_828-9154=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': '5', 'ref': 'AA', 'pos': '35058562', 'alt': 'AA'}} - assert results['NR_037910.1:n.828-9155_828-9154=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1'} + assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191247.1:p.?', 'slr': 'NP_001191247.1:p.?'} + assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204318.1:c.686-9155_686-9154=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001204318.1:c.686-9155_686-9154=']['alt_genomic_loci'], []) + assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'pos': '35058667', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': 'chr5', 'pos': '35058562', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'pos': '35058667', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001204318.1:c.686-9155_686-9154=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058562_35058563=', 'vcf': {'chr': '5', 'pos': '35058562', 'ref': 'AA', 'alt': 'AA'}} + assert results['NM_001204318.1:c.686-9155_686-9154=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1'} - assert results['flag'] == 'gene_variant' assert 'NM_000949.5:c.*6523_*6524=' in list(results.keys()) - assert results['NM_000949.5:c.*6523_*6524=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000949.5:c.*6523_*6524=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000949.5:c.*6523_*6524=']['alt_genomic_loci'], []) - assert results['NM_000949.5:c.*6523_*6524=']['gene_symbol'] == 'PRLR' - assert results['NM_000949.5:c.*6523_*6524=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} assert results['NM_000949.5:c.*6523_*6524=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' - assert results['NM_000949.5:c.*6523_*6524=']['genome_context_intronic_sequence'] == '' - assert results['NM_000949.5:c.*6523_*6524=']['hgvs_lrg_variant'] == '' + assert results['NM_000949.5:c.*6523_*6524=']['gene_symbol'] == 'PRLR' + assert results['NM_000949.5:c.*6523_*6524=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} assert results['NM_000949.5:c.*6523_*6524=']['hgvs_transcript_variant'] == 'NM_000949.5:c.*6523_*6524=' + assert results['NM_000949.5:c.*6523_*6524=']['genome_context_intronic_sequence'] == '' + assert results['NM_000949.5:c.*6523_*6524=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000949.5:c.*6523_*6524=']['hgvs_refseqgene_variant'] == 'NG_029042.1:g.177156_177157=' + assert results['NM_000949.5:c.*6523_*6524=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} + assert results['NM_000949.5:c.*6523_*6524=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000949.5:c.*6523_*6524=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000949.5:c.*6523_*6524=']['alt_genomic_loci'], []) result_options = [ {'hgvs_genomic_description': 'NC_000005.9:g.35058666_35058669=', 'vcf': {'chr': 'chr5', 'ref': 'AAGA', 'pos': '35058666', 'alt': 'AAGA'}}, @@ -4344,67 +4403,68 @@ def test_variant138(self): ] self.assertIn(results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci']['grch37'], result_options2) assert 'grch38' not in list(results['NM_000949.5:c.*6523_*6524=']['primary_assembly_loci'].keys()) - assert results['NM_000949.5:c.*6523_*6524=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029042.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5'} + assert results['NM_000949.5:c.*6523_*6524=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029042.1'} assert 'NM_001204314.1:c.*6523_*6524=' in list(results.keys()) - assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001204314.1:c.*6523_*6524=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001204314.1:c.*6523_*6524=']['alt_genomic_loci'], []) - assert results['NM_001204314.1:c.*6523_*6524=']['gene_symbol'] == 'PRLR' - assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} assert results['NM_001204314.1:c.*6523_*6524=']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' - assert results['NM_001204314.1:c.*6523_*6524=']['genome_context_intronic_sequence'] == '' - assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_lrg_variant'] == '' + assert results['NM_001204314.1:c.*6523_*6524=']['gene_symbol'] == 'PRLR' + assert results['NM_001204314.1:c.*6523_*6524=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_transcript_variant'] == 'NM_001204314.1:c.*6523_*6524=' + assert results['NM_001204314.1:c.*6523_*6524=']['genome_context_intronic_sequence'] == '' + assert results['NM_001204314.1:c.*6523_*6524=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} + assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204314.1:c.*6523_*6524=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001204314.1:c.*6523_*6524=']['alt_genomic_loci'], []) + assert results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': 'chr5', 'pos': '35058667', 'ref': 'AG', 'alt': 'AG'}} assert 'hg38' not in list(results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci'].keys()) - assert results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058667', 'alt': 'AG'}} + assert results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058667_35058668=', 'vcf': {'chr': '5', 'pos': '35058667', 'ref': 'AG', 'alt': 'AG'}} assert 'grch38' not in list(results['NM_001204314.1:c.*6523_*6524=']['primary_assembly_loci'].keys()) - assert results['NM_001204314.1:c.*6523_*6524=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1'} - - assert 'NM_000949.6:c.*6528del' in list(results.keys()) - assert results['NM_000949.6:c.*6528del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000949.6:c.*6528del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000949.6:c.*6528del']['alt_genomic_loci'], []) - assert results['NM_000949.6:c.*6528del']['gene_symbol'] == 'PRLR' - assert results['NM_000949.6:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} - assert results['NM_000949.6:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' - assert results['NM_000949.6:c.*6528del']['genome_context_intronic_sequence'] == '' - assert results['NM_000949.6:c.*6528del']['hgvs_lrg_variant'] == '' - assert results['NM_000949.6:c.*6528del']['hgvs_transcript_variant'] == 'NM_000949.6:c.*6528del' - assert results['NM_000949.6:c.*6528del']['hgvs_refseqgene_variant'] == '' - assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} - assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} - assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} - assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} - assert results['NM_000949.6:c.*6528del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.6'} + assert results['NM_001204314.1:c.*6523_*6524=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1'} + assert 'NM_001204314.2:c.*6528del' in list(results.keys()) + assert results['NM_001204314.2:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058667_35058668AG=' + assert results['NM_001204314.2:c.*6528del']['gene_symbol'] == 'PRLR' + assert results['NM_001204314.2:c.*6528del']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} + assert results['NM_001204314.2:c.*6528del']['hgvs_transcript_variant'] == 'NM_001204314.2:c.*6528del' + assert results['NM_001204314.2:c.*6528del']['genome_context_intronic_sequence'] == '' + assert results['NM_001204314.2:c.*6528del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204314.2:c.*6528del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204314.2:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} + assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001204314.2:c.*6528del']['alt_genomic_loci'], []) + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'pos': '35058662', 'ref': 'AGACAAG', 'alt': 'AGACAAG'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'pos': '35058560', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'pos': '35058662', 'ref': 'AGACAAG', 'alt': 'AGACAAG'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'pos': '35058560', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001204314.2:c.*6528del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1'} def test_variant139(self): variant = 'NM_000251.1:c.1296_1348del' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'NM_000251.1:c.1296_1348del' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant140(self): variant = 'NM_000088.3:c.2023_2028del' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -4412,22 +4472,22 @@ def test_variant140(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.2024_2028+1del' in list(results.keys()) - assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' - assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' - self.assertCountEqual(results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)', 'slr': 'NP_000079.2:p.(A675_R676del)'} assert results['NM_000088.3:c.2024_2028+1del']['submitted_variant'] == 'NM_000088.3:c.2023_2028del' - assert results['NM_000088.3:c.2024_2028+1del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.2024_2028+1del' - assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' + assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.2024_2028+1del']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.2024_2028+1del']['hgvs_transcript_variant'] == 'NM_000088.3:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.14656_14661del' - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ala675_Arg676del)', 'slr': 'NP_000079.2:p.(A675_R676del)'} + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' + self.assertCountEqual(results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': 'chr17', 'pos': '48269339', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': 'chr17', 'pos': '50191978', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': '17', 'pos': '48269339', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': '17', 'pos': '50191978', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant141(self): variant = 'NM_000088.3:c.2024_2028+1del' @@ -4436,71 +4496,71 @@ def test_variant141(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.2024_2028+1del' in list(results.keys()) - assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' - assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' - self.assertCountEqual(results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.2024_2028+1del']['submitted_variant'] == 'NM_000088.3:c.2024_2028+1del' - assert results['NM_000088.3:c.2024_2028+1del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.2024_2028+1del' - assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' + assert results['NM_000088.3:c.2024_2028+1del']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.2024_2028+1del']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.2024_2028+1del']['hgvs_transcript_variant'] == 'NM_000088.3:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.2024_2028+1del' assert results['NM_000088.3:c.2024_2028+1del']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.14656_14661del' - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': 'chr17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '48269339', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': '17', 'ref': 'ACTCTTG', 'pos': '50191978', 'alt': 'A'}} - assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.2024_2028+1del' + assert results['NM_000088.3:c.2024_2028+1del']['hgvs_lrg_variant'] == 'LRG_1:g.14656_14661del' + self.assertCountEqual(results['NM_000088.3:c.2024_2028+1del']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': 'chr17', 'pos': '48269339', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': 'chr17', 'pos': '50191978', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48269343_48269348del', 'vcf': {'chr': '17', 'pos': '48269339', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50191982_50191987del', 'vcf': {'chr': '17', 'pos': '50191978', 'ref': 'ACTCTTG', 'alt': 'A'}} + assert results['NM_000088.3:c.2024_2028+1del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant142(self): variant = 'ENST00000450616.1:n.31+1G>C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'ENST00000450616.1:n.31+1G>C' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant143(self): variant = 'ENST00000491747:c.5071A>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'ENST00000491747:c.5071A>T' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant144(self): variant = 'NM_000088.3:c.589G>T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -4508,22 +4568,22 @@ def test_variant144(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589G>T' in list(results.keys()) - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' - assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'NM_000088.3:c.589G>T' - assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant145(self): variant = 'NG_007400.1:g.8638G>T' @@ -4532,22 +4592,22 @@ def test_variant145(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589G>T' in list(results.keys()) - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' - assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant146(self): variant = 'LRG_1:g.8638G>T' @@ -4556,22 +4616,22 @@ def test_variant146(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589G>T' in list(results.keys()) - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' - assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'LRG_1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant147(self): variant = 'LRG_1t1:c.589G>T' @@ -4580,172 +4640,178 @@ def test_variant147(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589G>T' in list(results.keys()) - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' - assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == 'LRG_1t1:c.589G>T' - assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant148(self): variant = 'chr16:g.15832508_15832509delinsAC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_001040114.1:c.3055_3056inv' in list(results.keys()) + assert results['NM_001040114.1:c.3055_3056inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' + assert results['NM_001040114.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' + assert results['NM_001040114.1:c.3055_3056inv']['gene_ids'] == {'hgnc_id': 'HGNC:7569', 'entrez_gene_id': '4629', 'ucsc_id': 'uc002ddy.4', 'omim_id': ['160745']} + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040114.1:c.3055_3056inv' + assert results['NM_001040114.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035203.1:p.(Thr1019Val)', 'slr': 'NP_001035203.1:p.(T1019V)'} + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}]) + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040114.1:c.3055_3056inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1'} + assert 'NM_002474.2:c.3034_3035inv' in list(results.keys()) - assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) - assert results['NM_002474.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' - assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1(LRG_1401p1):p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} assert results['NM_002474.2:c.3034_3035inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' - assert results['NM_002474.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' - assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + assert results['NM_002474.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' + assert results['NM_002474.2:c.3034_3035inv']['gene_ids'] == {'hgnc_id': 'HGNC:7569', 'entrez_gene_id': '4629', 'ucsc_id': 'uc002ddy.4', 'omim_id': ['160745']} assert results['NM_002474.2:c.3034_3035inv']['hgvs_transcript_variant'] == 'NM_002474.2:c.3034_3035inv' + assert results['NM_002474.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' + assert results['NM_002474.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002474.2:c.3034_3035inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_002474.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2'} + assert results['NM_002474.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002465.1(LRG_1401p1):p.(Thr1012Val)', 'slr': 'NP_002465.1:p.(T1012V)'} + assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_002474.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_002474.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}]) + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_002474.2:c.3034_3035inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002474.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002465.1'} assert 'NM_022844.2:c.3034_3035inv' in list(results.keys()) - assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_022844.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) - assert results['NM_022844.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' - assert results['NM_022844.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_074035.1:p.(Thr1012Val)', 'slr': 'NP_074035.1:p.(T1012V)'} assert results['NM_022844.2:c.3034_3035inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' - assert results['NM_022844.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' - assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + assert results['NM_022844.2:c.3034_3035inv']['gene_symbol'] == 'MYH11' + assert results['NM_022844.2:c.3034_3035inv']['gene_ids'] == {'hgnc_id': 'HGNC:7569', 'entrez_gene_id': '4629', 'ucsc_id': 'uc002ddy.4', 'omim_id': ['160745']} assert results['NM_022844.2:c.3034_3035inv']['hgvs_transcript_variant'] == 'NM_022844.2:c.3034_3035inv' + assert results['NM_022844.2:c.3034_3035inv']['genome_context_intronic_sequence'] == '' + assert results['NM_022844.2:c.3034_3035inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_022844.2:c.3034_3035inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_022844.2:c.3034_3035inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2'} - - assert 'NM_001040114.1:c.3055_3056inv' in list(results.keys()) - assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001040114.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) - assert results['NM_001040114.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' - assert results['NM_001040114.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035203.1:p.(Thr1019Val)', 'slr': 'NP_001035203.1:p.(T1019V)'} - assert results['NM_001040114.1:c.3055_3056inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' - assert results['NM_001040114.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' - assert results['NM_001040114.1:c.3055_3056inv']['hgvs_lrg_variant'] == '' - assert results['NM_001040114.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040114.1:c.3055_3056inv' - assert results['NM_001040114.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == '' - assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_001040114.1:c.3055_3056inv']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035203.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040114.1'} + assert results['NM_022844.2:c.3034_3035inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_074035.1:p.(Thr1012Val)', 'slr': 'NP_074035.1:p.(T1012V)'} + assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_022844.2:c.3034_3035inv']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_022844.2:c.3034_3035inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}]) + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_022844.2:c.3034_3035inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022844.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_074035.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001040113.1:c.3055_3056inv' in list(results.keys()) - assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'ref': 'GT', 'pos': '1396662', 'alt': 'AC'}}}]) - assert results['NM_001040113.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' - assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1(LRG_1401p2):p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} assert results['NM_001040113.1:c.3055_3056inv']['submitted_variant'] == 'chr16:g.15832508_15832509delinsAC' - assert results['NM_001040113.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' - assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_variant'] == 'LRG_1401:g.123379_123380inv' + assert results['NM_001040113.1:c.3055_3056inv']['gene_symbol'] == 'MYH11' + assert results['NM_001040113.1:c.3055_3056inv']['gene_ids'] == {'hgnc_id': 'HGNC:7569', 'entrez_gene_id': '4629', 'ucsc_id': 'uc002ddy.4', 'omim_id': ['160745']} assert results['NM_001040113.1:c.3055_3056inv']['hgvs_transcript_variant'] == 'NM_001040113.1:c.3055_3056inv' + assert results['NM_001040113.1:c.3055_3056inv']['genome_context_intronic_sequence'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040113.1:c.3055_3056inv']['hgvs_refseqgene_variant'] == 'NG_009299.1:g.123379_123380inv' - assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15832508', 'alt': 'AC'}} - assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'ref': 'GT', 'pos': '15738651', 'alt': 'AC'}} - assert results['NM_001040113.1:c.3055_3056inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009299.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1401.xml', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1'} - + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035202.1(LRG_1401p2):p.(Thr1019Val)', 'slr': 'NP_001035202.1:p.(T1019V)'} + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040113.1:c.3055_3056inv']['hgvs_lrg_variant'] == 'LRG_1401:g.123379_123380inv' + self.assertCountEqual(results['NM_001040113.1:c.3055_3056inv']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'HSCHR16_1_CTG1', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187607.1:g.1396662_1396663inv', 'vcf': {'chr': 'chr16_KI270853v1_alt', 'pos': '1396662', 'ref': 'GT', 'alt': 'AC'}}}]) + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': 'chr16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': 'chr16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.15832508_15832509inv', 'vcf': {'chr': '16', 'pos': '15832508', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.15738651_15738652inv', 'vcf': {'chr': '16', 'pos': '15738651', 'ref': 'GT', 'alt': 'AC'}} + assert results['NM_001040113.1:c.3055_3056inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035202.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009299.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1401.xml'} def test_variant149(self): variant = 'NG_012386.1:g.24048dupG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_001162427.1:c.210+1615dup' in list(results.keys()) - assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001162427.1:c.210+1615dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'], []) - assert results['NM_001162427.1:c.210+1615dup']['gene_symbol'] == 'TSC1' - assert results['NM_001162427.1:c.210+1615dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.?', 'slr': 'NP_001155899.1:p.?'} assert results['NM_001162427.1:c.210+1615dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' - assert results['NM_001162427.1:c.210+1615dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162427.1):c.210+1615dup' - assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_variant'] == '' + assert results['NM_001162427.1:c.210+1615dup']['gene_symbol'] == 'TSC1' + assert results['NM_001162427.1:c.210+1615dup']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} assert results['NM_001162427.1:c.210+1615dup']['hgvs_transcript_variant'] == 'NM_001162427.1:c.210+1615dup' + assert results['NM_001162427.1:c.210+1615dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162427.1):c.210+1615dup' + assert results['NM_001162427.1:c.210+1615dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001162427.1:c.210+1615dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1'} + assert results['NM_001162427.1:c.210+1615dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.?', 'slr': 'NP_001155899.1:p.?'} + assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'], []) + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1'} assert 'NM_001162426.1:c.363+1dup' in list(results.keys()) - assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001162426.1:c.363+1dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'], []) - assert results['NM_001162426.1:c.363+1dup']['gene_symbol'] == 'TSC1' - assert results['NM_001162426.1:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.?', 'slr': 'NP_001155898.1:p.?'} assert results['NM_001162426.1:c.363+1dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' - assert results['NM_001162426.1:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162426.1):c.363+1dup' - assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_variant'] == '' + assert results['NM_001162426.1:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_001162426.1:c.363+1dup']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} assert results['NM_001162426.1:c.363+1dup']['hgvs_transcript_variant'] == 'NM_001162426.1:c.363+1dup' + assert results['NM_001162426.1:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162426.1):c.363+1dup' + assert results['NM_001162426.1:c.363+1dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001162426.1:c.363+1dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1'} - - assert results['flag'] == 'gene_variant' - assert 'NM_001362177.1:c.-1+1dup' in list(results.keys()) - assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001362177.1:c.-1+1dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'], []) - assert results['NM_001362177.1:c.-1+1dup']['gene_symbol'] == 'TSC1' - assert results['NM_001362177.1:c.-1+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.?', 'slr': 'NP_001349106.1:p.?'} - assert results['NM_001362177.1:c.-1+1dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' - assert results['NM_001362177.1:c.-1+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001362177.1):c.-1+1dup' - assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_variant'] == '' - assert results['NM_001362177.1:c.-1+1dup']['hgvs_transcript_variant'] == 'NM_001362177.1:c.-1+1dup' - assert results['NM_001362177.1:c.-1+1dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1'} + assert results['NM_001162426.1:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.?', 'slr': 'NP_001155898.1:p.?'} + assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'], []) + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1'} assert 'NM_000368.4:c.363+1dup' in list(results.keys()) - assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' - self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) - assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' - assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.?', 'slr': 'NP_000359.1:p.?'} assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' - assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_000368.4:c.363+1dup']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} + assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.?', 'slr': 'NP_000359.1:p.?'} + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} + assert 'NM_001362177.1:c.-1+1dup' in list(results.keys()) + assert results['NM_001362177.1:c.-1+1dup']['submitted_variant'] == 'NG_012386.1:g.24048dupG' + assert results['NM_001362177.1:c.-1+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_001362177.1:c.-1+1dup']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} + assert results['NM_001362177.1:c.-1+1dup']['hgvs_transcript_variant'] == 'NM_001362177.1:c.-1+1dup' + assert results['NM_001362177.1:c.-1+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001362177.1):c.-1+1dup' + assert results['NM_001362177.1:c.-1+1dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.?', 'slr': 'NP_001349106.1:p.?'} + assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'], []) + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1'} def test_variant150(self): variant = 'NM_033517.1:c.1307_1309delCGA' @@ -4754,176 +4820,178 @@ def test_variant150(self): assert results['flag'] == 'gene_variant' assert 'NM_033517.1:c.1307_1309del' in list(results.keys()) - assert results['NM_033517.1:c.1307_1309del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_033517.1:c.1307_1309del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_033517.1:c.1307_1309del']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'HG1311_PATCH', 'ref': 'CCGA', 'pos': '33720', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'NW_015148969.1', 'ref': 'CCGA', 'pos': '33720', 'alt': 'C'}}}]) - assert results['NM_033517.1:c.1307_1309del']['gene_symbol'] == 'SHANK3' - assert results['NM_033517.1:c.1307_1309del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_277052.1:p.(Pro436_Ser437delinsArg)', 'slr': 'NP_277052.1:p.(P436_S437delinsR)'} assert results['NM_033517.1:c.1307_1309del']['submitted_variant'] == 'NM_033517.1:c.1307_1309delCGA' - assert results['NM_033517.1:c.1307_1309del']['genome_context_intronic_sequence'] == '' - assert results['NM_033517.1:c.1307_1309del']['hgvs_lrg_variant'] == '' + assert results['NM_033517.1:c.1307_1309del']['gene_symbol'] == 'SHANK3' + assert results['NM_033517.1:c.1307_1309del']['gene_ids'] == {'hgnc_id': 'HGNC:14294', 'entrez_gene_id': '85358', 'ucsc_id': 'uc062fon.2', 'omim_id': ['606230']} assert results['NM_033517.1:c.1307_1309del']['hgvs_transcript_variant'] == 'NM_033517.1:c.1307_1309del' + assert results['NM_033517.1:c.1307_1309del']['genome_context_intronic_sequence'] == '' + assert results['NM_033517.1:c.1307_1309del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_033517.1:c.1307_1309del']['hgvs_refseqgene_variant'] == '' + assert results['NM_033517.1:c.1307_1309del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_277052.1:p.(Pro436_Ser437delinsArg)', 'slr': 'NP_277052.1:p.(P436_S437delinsR)'} + assert results['NM_033517.1:c.1307_1309del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_033517.1:c.1307_1309del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_033517.1:c.1307_1309del']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'HG1311_PATCH', 'pos': '33720', 'ref': 'CCGA', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_015148969.1:g.33721_33723del', 'vcf': {'chr': 'NW_015148969.1', 'pos': '33720', 'ref': 'CCGA', 'alt': 'C'}}}]) assert 'hg19' not in list(results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['NM_033517.1:c.1307_1309del']['primary_assembly_loci'].keys()) - assert results['NM_033517.1:c.1307_1309del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_277052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033517.1'} - + assert results['NM_033517.1:c.1307_1309del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033517.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_277052.1'} def test_variant151(self): variant = 'HG1311_PATCH-33720-CCGA-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'HG1311_PATCH-33720-CCGA-C' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant152(self): variant = '2-73675227-TCTC-TCTCCTC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_015120.4:c.1573_1579=' in list(results.keys()) - assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1573_1579=' - assert results['NM_015120.4:c.1573_1579=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'], []) - assert results['NM_015120.4:c.1573_1579=']['gene_symbol'] == 'ALMS1' - assert results['NM_015120.4:c.1573_1579=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Ser525=)', 'slr': 'NP_055935.4:p.(S525=)'} assert results['NM_015120.4:c.1573_1579=']['submitted_variant'] == '2-73675227-TCTC-TCTCCTC' - assert results['NM_015120.4:c.1573_1579=']['genome_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_variant'] == 'LRG_741:g.67345_67351=' + assert results['NM_015120.4:c.1573_1579=']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.1573_1579=']['gene_ids'] == {'hgnc_id': 'HGNC:428', 'entrez_gene_id': '7840', 'ucsc_id': 'uc032nrd.1', 'omim_id': ['606844']} assert results['NM_015120.4:c.1573_1579=']['hgvs_transcript_variant'] == 'NM_015120.4:c.1573_1579=' + assert results['NM_015120.4:c.1573_1579=']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1573_1579=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.1573_1579=']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.67345_67351=' - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': 'chr2', 'ref': 'TCTCCTC', 'pos': '73448097', 'alt': 'TCTCCTC'}} - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73675227', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': '2', 'ref': 'TCTCCTC', 'pos': '73448097', 'alt': 'TCTCCTC'}} - assert results['NM_015120.4:c.1573_1579=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_015120.4:c.1573_1579=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Ser525=)', 'slr': 'NP_055935.4:p.(S525=)'} + assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1573_1579=' + assert results['NM_015120.4:c.1573_1579=']['hgvs_lrg_variant'] == 'LRG_741:g.67345_67351=' + self.assertCountEqual(results['NM_015120.4:c.1573_1579=']['alt_genomic_loci'], []) + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': 'chr2', 'pos': '73675227', 'ref': 'T', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': 'chr2', 'pos': '73448097', 'ref': 'TCTCCTC', 'alt': 'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675228_73675230dup', 'vcf': {'chr': '2', 'pos': '73675227', 'ref': 'T', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1573_1579=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448097_73448103=', 'vcf': {'chr': '2', 'pos': '73448097', 'ref': 'TCTCCTC', 'alt': 'TCTCCTC'}} + assert results['NM_015120.4:c.1573_1579=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} def test_variant153(self): variant = '2-73675227-TC-TC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_015120.4:c.1577_1579del' in list(results.keys()) - assert results['NM_015120.4:c.1577_1579del']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1577_1579del' - assert results['NM_015120.4:c.1577_1579del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_015120.4:c.1577_1579del']['alt_genomic_loci'], []) - assert results['NM_015120.4:c.1577_1579del']['gene_symbol'] == 'ALMS1' - assert results['NM_015120.4:c.1577_1579del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Pro526del)', 'slr': 'NP_055935.4:p.(P526del)'} assert results['NM_015120.4:c.1577_1579del']['submitted_variant'] == '2-73675227-TC-TC' - assert results['NM_015120.4:c.1577_1579del']['genome_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.1577_1579del']['hgvs_lrg_variant'] == 'LRG_741:g.67349_67351del' + assert results['NM_015120.4:c.1577_1579del']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.1577_1579del']['gene_ids'] == {'hgnc_id': 'HGNC:428', 'entrez_gene_id': '7840', 'ucsc_id': 'uc032nrd.1', 'omim_id': ['606844']} assert results['NM_015120.4:c.1577_1579del']['hgvs_transcript_variant'] == 'NM_015120.4:c.1577_1579del' + assert results['NM_015120.4:c.1577_1579del']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1577_1579del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.1577_1579del']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.67349_67351del' - assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675227_73675228=', 'vcf': {'chr': 'chr2', 'ref': 'TC', 'pos': '73675227', 'alt': 'TC'}} - assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448101_73448103del', 'vcf': {'chr': 'chr2', 'ref': 'TCTC', 'pos': '73448097', 'alt': 'T'}} - assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675227_73675228=', 'vcf': {'chr': '2', 'ref': 'TC', 'pos': '73675227', 'alt': 'TC'}} - assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448101_73448103del', 'vcf': {'chr': '2', 'ref': 'TCTC', 'pos': '73448097', 'alt': 'T'}} - assert results['NM_015120.4:c.1577_1579del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_015120.4:c.1577_1579del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Pro526del)', 'slr': 'NP_055935.4:p.(P526del)'} + assert results['NM_015120.4:c.1577_1579del']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1577_1579del' + assert results['NM_015120.4:c.1577_1579del']['hgvs_lrg_variant'] == 'LRG_741:g.67349_67351del' + self.assertCountEqual(results['NM_015120.4:c.1577_1579del']['alt_genomic_loci'], []) + assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675227_73675228=', 'vcf': {'chr': 'chr2', 'pos': '73675227', 'ref': 'TC', 'alt': 'TC'}} + assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448101_73448103del', 'vcf': {'chr': 'chr2', 'pos': '73448097', 'ref': 'TCTC', 'alt': 'T'}} + assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675227_73675228=', 'vcf': {'chr': '2', 'pos': '73675227', 'ref': 'TC', 'alt': 'TC'}} + assert results['NM_015120.4:c.1577_1579del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448101_73448103del', 'vcf': {'chr': '2', 'pos': '73448097', 'ref': 'TCTC', 'alt': 'T'}} + assert results['NM_015120.4:c.1577_1579del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} def test_variant154(self): variant = '3-14561627-AG-AGG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001080423.3:c.1016_1020=' in list(results.keys()) - assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001080423.3:c.1016_1020=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'], []) - assert results['NM_001080423.3:c.1016_1020=']['gene_symbol'] == 'GRIP2' - assert results['NM_001080423.3:c.1016_1020=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Arg339=)', 'slr': 'NP_001073892.3:p.(R339=)'} - assert results['NM_001080423.3:c.1016_1020=']['submitted_variant'] == '3-14561627-AG-AGG' - assert results['NM_001080423.3:c.1016_1020=']['genome_context_intronic_sequence'] == '' - assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_variant'] == '' - assert results['NM_001080423.3:c.1016_1020=']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1016_1020=' - assert results['NM_001080423.3:c.1016_1020=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': 'chr3', 'ref': 'GGGCC', 'pos': '14520120', 'alt': 'GGGCC'}} - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} - assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': '3', 'ref': 'GGGCC', 'pos': '14520120', 'alt': 'GGGCC'}} - assert results['NM_001080423.3:c.1016_1020=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} - assert results['flag'] == 'gene_variant' assert 'NM_001080423.2:c.1307_1311=' in list(results.keys()) - assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001080423.2:c.1307_1311=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'], []) - assert results['NM_001080423.2:c.1307_1311=']['gene_symbol'] == 'GRIP2' - assert results['NM_001080423.2:c.1307_1311=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Arg436=)', 'slr': 'NP_001073892.2:p.(R436=)'} assert results['NM_001080423.2:c.1307_1311=']['submitted_variant'] == '3-14561627-AG-AGG' - assert results['NM_001080423.2:c.1307_1311=']['genome_context_intronic_sequence'] == '' - assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.2:c.1307_1311=']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.2:c.1307_1311=']['gene_ids'] == {'hgnc_id': 'HGNC:23841', 'entrez_gene_id': '80852', 'ucsc_id': 'uc032rfi.1', 'omim_id': []} assert results['NM_001080423.2:c.1307_1311=']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1307_1311=' + assert results['NM_001080423.2:c.1307_1311=']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1307_1311=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.2:c.1307_1311=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert results['NM_001080423.2:c.1307_1311=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Arg436=)', 'slr': 'NP_001073892.2:p.(R436=)'} + assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.2:c.1307_1311=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001080423.2:c.1307_1311=']['alt_genomic_loci'], []) + assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'pos': '14561627', 'ref': 'A', 'alt': 'AG'}} assert 'hg38' not in list(results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys()) - assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'ref': 'A', 'pos': '14561627', 'alt': 'AG'}} + assert results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'pos': '14561627', 'ref': 'A', 'alt': 'AG'}} assert 'grch38' not in list(results['NM_001080423.2:c.1307_1311=']['primary_assembly_loci'].keys()) - assert results['NM_001080423.2:c.1307_1311=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} + assert results['NM_001080423.2:c.1307_1311=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2'} + assert 'NM_001080423.3:c.1016_1020=' in list(results.keys()) + assert results['NM_001080423.3:c.1016_1020=']['submitted_variant'] == '3-14561627-AG-AGG' + assert results['NM_001080423.3:c.1016_1020=']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.3:c.1016_1020=']['gene_ids'] == {'hgnc_id': 'HGNC:23841', 'entrez_gene_id': '80852', 'ucsc_id': 'uc032rfi.1', 'omim_id': []} + assert results['NM_001080423.3:c.1016_1020=']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1016_1020=' + assert results['NM_001080423.3:c.1016_1020=']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1016_1020=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Arg339=)', 'slr': 'NP_001073892.3:p.(R339=)'} + assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.3:c.1016_1020=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001080423.3:c.1016_1020=']['alt_genomic_loci'], []) + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': 'chr3', 'pos': '14561627', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': 'chr3', 'pos': '14520120', 'ref': 'GGGCC', 'alt': 'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561629dup', 'vcf': {'chr': '3', 'pos': '14561627', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001080423.3:c.1016_1020=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520120_14520124=', 'vcf': {'chr': '3', 'pos': '14520120', 'ref': 'GGGCC', 'alt': 'GGGCC'}} + assert results['NM_001080423.3:c.1016_1020=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3'} def test_variant155(self): variant = '3-14561630-CC-CC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001080423.3:c.1020del' in list(results.keys()) - assert results['NM_001080423.3:c.1020del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001080423.3:c.1020del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001080423.3:c.1020del']['alt_genomic_loci'], []) - assert results['NM_001080423.3:c.1020del']['gene_symbol'] == 'GRIP2' - assert results['NM_001080423.3:c.1020del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Ser341GlnfsTer4)', 'slr': 'NP_001073892.3:p.(S341Qfs*4)'} - assert results['NM_001080423.3:c.1020del']['submitted_variant'] == '3-14561630-CC-CC' - assert results['NM_001080423.3:c.1020del']['genome_context_intronic_sequence'] == '' - assert results['NM_001080423.3:c.1020del']['hgvs_lrg_variant'] == '' - assert results['NM_001080423.3:c.1020del']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1020del' - assert results['NM_001080423.3:c.1020del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} - assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '14520119', 'alt': 'A'}} - assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} - assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '14520119', 'alt': 'A'}} - assert results['NM_001080423.3:c.1020del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3'} - assert results['flag'] == 'gene_variant' assert 'NM_001080423.2:c.1311del' in list(results.keys()) - assert results['NM_001080423.2:c.1311del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001080423.2:c.1311del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001080423.2:c.1311del']['alt_genomic_loci'], []) - assert results['NM_001080423.2:c.1311del']['gene_symbol'] == 'GRIP2' - assert results['NM_001080423.2:c.1311del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Ser438GlnfsTer4)', 'slr': 'NP_001073892.2:p.(S438Qfs*4)'} assert results['NM_001080423.2:c.1311del']['submitted_variant'] == '3-14561630-CC-CC' - assert results['NM_001080423.2:c.1311del']['genome_context_intronic_sequence'] == '' - assert results['NM_001080423.2:c.1311del']['hgvs_lrg_variant'] == '' + assert results['NM_001080423.2:c.1311del']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.2:c.1311del']['gene_ids'] == {'hgnc_id': 'HGNC:23841', 'entrez_gene_id': '80852', 'ucsc_id': 'uc032rfi.1', 'omim_id': []} assert results['NM_001080423.2:c.1311del']['hgvs_transcript_variant'] == 'NM_001080423.2:c.1311del' + assert results['NM_001080423.2:c.1311del']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.2:c.1311del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001080423.2:c.1311del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.2:c.1311del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.2:p.(Ser438GlnfsTer4)', 'slr': 'NP_001073892.2:p.(S438Qfs*4)'} + assert results['NM_001080423.2:c.1311del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.2:c.1311del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001080423.2:c.1311del']['alt_genomic_loci'], []) + assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'pos': '14561624', 'ref': 'CTGAGGC', 'alt': 'CTGAGGC'}} assert 'hg38' not in list(results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys()) - assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'ref': 'CTGAGGC', 'pos': '14561624', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.2:c.1311del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'pos': '14561624', 'ref': 'CTGAGGC', 'alt': 'CTGAGGC'}} assert 'grch38' not in list(results['NM_001080423.2:c.1311del']['primary_assembly_loci'].keys()) - assert results['NM_001080423.2:c.1311del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2'} + assert results['NM_001080423.2:c.1311del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.2'} + assert 'NM_001080423.3:c.1020del' in list(results.keys()) + assert results['NM_001080423.3:c.1020del']['submitted_variant'] == '3-14561630-CC-CC' + assert results['NM_001080423.3:c.1020del']['gene_symbol'] == 'GRIP2' + assert results['NM_001080423.3:c.1020del']['gene_ids'] == {'hgnc_id': 'HGNC:23841', 'entrez_gene_id': '80852', 'ucsc_id': 'uc032rfi.1', 'omim_id': []} + assert results['NM_001080423.3:c.1020del']['hgvs_transcript_variant'] == 'NM_001080423.3:c.1020del' + assert results['NM_001080423.3:c.1020del']['genome_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1020del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001080423.3:c.1020del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001080423.3:c.1020del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073892.3:p.(Ser341GlnfsTer4)', 'slr': 'NP_001073892.3:p.(S341Qfs*4)'} + assert results['NM_001080423.3:c.1020del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001080423.3:c.1020del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001080423.3:c.1020del']['alt_genomic_loci'], []) + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': 'chr3', 'pos': '14561624', 'ref': 'CTGAGGC', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': 'chr3', 'pos': '14520119', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.14561624_14561630=', 'vcf': {'chr': '3', 'pos': '14561624', 'ref': 'CTGAGGC', 'alt': 'CTGAGGC'}} + assert results['NM_001080423.3:c.1020del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.14520122del', 'vcf': {'chr': '3', 'pos': '14520119', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001080423.3:c.1020del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001080423.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073892.3'} def test_variant156(self): variant = '6-90403795-G-G' @@ -4932,80 +5000,82 @@ def test_variant156(self): assert results['flag'] == 'gene_variant' assert 'NM_014611.1:c.9879T>C' in list(results.keys()) - assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014611.1:c.9879T>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014611.1:c.9879T>C']['alt_genomic_loci'], []) - assert results['NM_014611.1:c.9879T>C']['gene_symbol'] == 'MDN1' - assert results['NM_014611.1:c.9879T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} assert results['NM_014611.1:c.9879T>C']['submitted_variant'] == '6-90403795-G-G' - assert results['NM_014611.1:c.9879T>C']['genome_context_intronic_sequence'] == '' - assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_variant'] == '' + assert results['NM_014611.1:c.9879T>C']['gene_symbol'] == 'MDN1' + assert results['NM_014611.1:c.9879T>C']['gene_ids'] == {'hgnc_id': 'HGNC:18302', 'entrez_gene_id': '23195', 'ucsc_id': 'uc003pnn.2', 'omim_id': ['618200']} assert results['NM_014611.1:c.9879T>C']['hgvs_transcript_variant'] == 'NM_014611.1:c.9879T>C' + assert results['NM_014611.1:c.9879T>C']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.1:c.9879T>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014611.1:c.9879T>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert results['NM_014611.1:c.9879T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.1:c.9879T>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014611.1:c.9879T>C']['alt_genomic_loci'], []) + assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'pos': '90403795', 'ref': 'G', 'alt': 'G'}} assert 'hg38' not in list(results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys()) - assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} + assert results['NM_014611.1:c.9879T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'pos': '90403795', 'ref': 'G', 'alt': 'G'}} assert 'grch38' not in list(results['NM_014611.1:c.9879T>C']['primary_assembly_loci'].keys()) - assert results['NM_014611.1:c.9879T>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1'} + assert results['NM_014611.1:c.9879T>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1'} assert 'NM_014611.2:c.9879C=' in list(results.keys()) - assert results['NM_014611.2:c.9879C=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014611.2:c.9879C=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014611.2:c.9879C=']['alt_genomic_loci'], []) - assert results['NM_014611.2:c.9879C=']['gene_symbol'] == 'MDN1' - assert results['NM_014611.2:c.9879C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} assert results['NM_014611.2:c.9879C=']['submitted_variant'] == '6-90403795-G-G' - assert results['NM_014611.2:c.9879C=']['genome_context_intronic_sequence'] == '' - assert results['NM_014611.2:c.9879C=']['hgvs_lrg_variant'] == '' + assert results['NM_014611.2:c.9879C=']['gene_symbol'] == 'MDN1' + assert results['NM_014611.2:c.9879C=']['gene_ids'] == {'hgnc_id': 'HGNC:18302', 'entrez_gene_id': '23195', 'ucsc_id': 'uc003pnn.2', 'omim_id': ['618200']} assert results['NM_014611.2:c.9879C=']['hgvs_transcript_variant'] == 'NM_014611.2:c.9879C=' + assert results['NM_014611.2:c.9879C=']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.2:c.9879C=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014611.2:c.9879C=']['hgvs_refseqgene_variant'] == '' - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '89694076', 'alt': 'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'G'}} - assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '89694076', 'alt': 'G'}} - assert results['NM_014611.2:c.9879C=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2'} - + assert results['NM_014611.2:c.9879C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.2:c.9879C=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.2:c.9879C=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014611.2:c.9879C=']['alt_genomic_loci'], []) + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': 'chr6', 'pos': '90403795', 'ref': 'G', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': 'chr6', 'pos': '89694076', 'ref': 'G', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G=', 'vcf': {'chr': '6', 'pos': '90403795', 'ref': 'G', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G=', 'vcf': {'chr': '6', 'pos': '89694076', 'ref': 'G', 'alt': 'G'}} + assert results['NM_014611.2:c.9879C=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1'} def test_variant157(self): variant = '6-90403795-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_014611.2:c.9879C>T' in list(results.keys()) - assert results['NM_014611.2:c.9879C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014611.2:c.9879C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014611.2:c.9879C>T']['alt_genomic_loci'], []) - assert results['NM_014611.2:c.9879C>T']['gene_symbol'] == 'MDN1' - assert results['NM_014611.2:c.9879C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} - assert results['NM_014611.2:c.9879C>T']['submitted_variant'] == '6-90403795-G-A' - assert results['NM_014611.2:c.9879C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_014611.2:c.9879C>T']['hgvs_lrg_variant'] == '' - assert results['NM_014611.2:c.9879C>T']['hgvs_transcript_variant'] == 'NM_014611.2:c.9879C>T' - assert results['NM_014611.2:c.9879C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'A'}} - assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G>A', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '89694076', 'alt': 'A'}} - assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'A'}} - assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G>A', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '89694076', 'alt': 'A'}} - assert results['NM_014611.2:c.9879C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2'} - assert results['flag'] == 'gene_variant' assert 'NM_014611.1:c.9879T=' in list(results.keys()) - assert results['NM_014611.1:c.9879T=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014611.1:c.9879T=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014611.1:c.9879T=']['alt_genomic_loci'], []) - assert results['NM_014611.1:c.9879T=']['gene_symbol'] == 'MDN1' - assert results['NM_014611.1:c.9879T=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} assert results['NM_014611.1:c.9879T=']['submitted_variant'] == '6-90403795-G-A' - assert results['NM_014611.1:c.9879T=']['genome_context_intronic_sequence'] == '' - assert results['NM_014611.1:c.9879T=']['hgvs_lrg_variant'] == '' + assert results['NM_014611.1:c.9879T=']['gene_symbol'] == 'MDN1' + assert results['NM_014611.1:c.9879T=']['gene_ids'] == {'hgnc_id': 'HGNC:18302', 'entrez_gene_id': '23195', 'ucsc_id': 'uc003pnn.2', 'omim_id': ['618200']} assert results['NM_014611.1:c.9879T=']['hgvs_transcript_variant'] == 'NM_014611.1:c.9879T=' + assert results['NM_014611.1:c.9879T=']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.1:c.9879T=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014611.1:c.9879T=']['hgvs_refseqgene_variant'] == '' - assert results['NM_014611.1:c.9879T=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': 'chr6', 'ref': 'G', 'pos': '90403795', 'alt': 'A'}} + assert results['NM_014611.1:c.9879T=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.1:c.9879T=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.1:c.9879T=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014611.1:c.9879T=']['alt_genomic_loci'], []) + assert results['NM_014611.1:c.9879T=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': 'chr6', 'pos': '90403795', 'ref': 'G', 'alt': 'A'}} assert 'hg38' not in list(results['NM_014611.1:c.9879T=']['primary_assembly_loci'].keys()) - assert results['NM_014611.1:c.9879T=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': '6', 'ref': 'G', 'pos': '90403795', 'alt': 'A'}} + assert results['NM_014611.1:c.9879T=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': '6', 'pos': '90403795', 'ref': 'G', 'alt': 'A'}} assert 'grch38' not in list(results['NM_014611.1:c.9879T=']['primary_assembly_loci'].keys()) - assert results['NM_014611.1:c.9879T=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1'} + assert results['NM_014611.1:c.9879T=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1'} + assert 'NM_014611.2:c.9879C>T' in list(results.keys()) + assert results['NM_014611.2:c.9879C>T']['submitted_variant'] == '6-90403795-G-A' + assert results['NM_014611.2:c.9879C>T']['gene_symbol'] == 'MDN1' + assert results['NM_014611.2:c.9879C>T']['gene_ids'] == {'hgnc_id': 'HGNC:18302', 'entrez_gene_id': '23195', 'ucsc_id': 'uc003pnn.2', 'omim_id': ['618200']} + assert results['NM_014611.2:c.9879C>T']['hgvs_transcript_variant'] == 'NM_014611.2:c.9879C>T' + assert results['NM_014611.2:c.9879C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_014611.2:c.9879C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014611.2:c.9879C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_014611.2:c.9879C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055426.1:p.(Val3293=)', 'slr': 'NP_055426.1:p.(V3293=)'} + assert results['NM_014611.2:c.9879C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014611.2:c.9879C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014611.2:c.9879C>T']['alt_genomic_loci'], []) + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': 'chr6', 'pos': '90403795', 'ref': 'G', 'alt': 'A'}} + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G>A', 'vcf': {'chr': 'chr6', 'pos': '89694076', 'ref': 'G', 'alt': 'A'}} + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.90403795G>A', 'vcf': {'chr': '6', 'pos': '90403795', 'ref': 'G', 'alt': 'A'}} + assert results['NM_014611.2:c.9879C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.89694076G>A', 'vcf': {'chr': '6', 'pos': '89694076', 'ref': 'G', 'alt': 'A'}} + assert results['NM_014611.2:c.9879C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014611.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055426.1'} def test_variant158(self): variant = '6-32012992-CG-C' @@ -5013,74 +5083,77 @@ def test_variant158(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_032470.3:c.4del' in list(results.keys()) - assert results['NM_032470.3:c.4del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_032470.3:c.4del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032470.3:c.4del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'CG', 'pos': '3274046', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'ref': 'CG', 'pos': '3268450', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'CG', 'pos': '3345700', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'CG', 'pos': '3346402', 'alt': 'C'}}}]) - assert results['NM_032470.3:c.4del']['gene_symbol'] == 'TNXB' - assert results['NM_032470.3:c.4del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_115859.2:p.(Arg2AlafsTer91)', 'slr': 'NP_115859.2:p.(R2Afs*91)'} - assert results['NM_032470.3:c.4del']['submitted_variant'] == '6-32012992-CG-C' - assert results['NM_032470.3:c.4del']['genome_context_intronic_sequence'] == '' - assert results['NM_032470.3:c.4del']['hgvs_lrg_variant'] == '' - assert results['NM_032470.3:c.4del']['hgvs_transcript_variant'] == 'NM_032470.3:c.4del' - assert results['NM_032470.3:c.4del']['hgvs_refseqgene_variant'] == '' - assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} - assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} - assert results['NM_032470.3:c.4del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_115859.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032470.3'} - - assert 'NM_001365276.1:c.10717del' in list(results.keys()) - assert results['NM_001365276.1:c.10717del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001365276.1:c.10717del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001365276.1:c.10717del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}]) - assert results['NM_001365276.1:c.10717del']['gene_symbol'] == 'TNXB' - assert results['NM_001365276.1:c.10717del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001352205.1:p.(Arg3573AlafsTer91)', 'slr': 'NP_001352205.1:p.(R3573Afs*91)'} - assert results['NM_001365276.1:c.10717del']['submitted_variant'] == '6-32012992-CG-C' - assert results['NM_001365276.1:c.10717del']['genome_context_intronic_sequence'] == '' - assert results['NM_001365276.1:c.10717del']['hgvs_lrg_variant'] == '' - assert results['NM_001365276.1:c.10717del']['hgvs_transcript_variant'] == 'NM_001365276.1:c.10717del' - assert results['NM_001365276.1:c.10717del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001365276.1:c.10717del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert 'hg38' not in list(results['NM_001365276.1:c.10717del']['primary_assembly_loci'].keys()) - assert results['NM_001365276.1:c.10717del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert 'grch38' not in list(results['NM_001365276.1:c.10717del']['primary_assembly_loci'].keys()) - assert results['NM_001365276.1:c.10717del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001352205.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001365276.1'} + assert 'NM_019105.6:c.10711del' in list(results.keys()) + assert results['NM_019105.6:c.10711del']['submitted_variant'] == '6-32012992-CG-C' + assert results['NM_019105.6:c.10711del']['gene_symbol'] == 'TNXB' + assert results['NM_019105.6:c.10711del']['gene_ids'] == {'hgnc_id': 'HGNC:11976', 'entrez_gene_id': '7148', 'ucsc_id': 'uc063nnw.1', 'omim_id': ['600985']} + assert results['NM_019105.6:c.10711del']['hgvs_transcript_variant'] == 'NM_019105.6:c.10711del' + assert results['NM_019105.6:c.10711del']['genome_context_intronic_sequence'] == '' + assert results['NM_019105.6:c.10711del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_019105.6:c.10711del']['hgvs_refseqgene_variant'] == 'NG_008337.2:g.69159del' + assert results['NM_019105.6:c.10711del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061978.6:p.(Arg3571AlafsTer91)', 'slr': 'NP_061978.6:p.(R3571Afs*91)'} + assert results['NM_019105.6:c.10711del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_019105.6:c.10711del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_019105.6:c.10711del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3392833', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'pos': '3392833', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'pos': '3292209', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'pos': '3292209', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'pos': '3286624', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'pos': '3286624', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'pos': '3483537', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'pos': '3483537', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'pos': '3271858', 'ref': 'AG', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'chr6_qbl_hap6', 'pos': '3271858', 'ref': 'AG', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'pos': '3483643', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'pos': '3483643', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3387248', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'pos': '3387248', 'ref': 'CG', 'alt': 'C'}}}]) + assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'pos': '32012992', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': 'chr6', 'pos': '32045215', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'pos': '32012992', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': '6', 'pos': '32045215', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_019105.6:c.10711del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_019105.6', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061978.6', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008337.2'} assert 'NM_019105.7:c.10711del' in list(results.keys()) - assert results['NM_019105.7:c.10711del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_019105.7:c.10711del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_019105.7:c.10711del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}]) - assert results['NM_019105.7:c.10711del']['gene_symbol'] == 'TNXB' - assert results['NM_019105.7:c.10711del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061978.6:p.(Arg3571AlafsTer91)', 'slr': 'NP_061978.6:p.(R3571Afs*91)'} assert results['NM_019105.7:c.10711del']['submitted_variant'] == '6-32012992-CG-C' - assert results['NM_019105.7:c.10711del']['genome_context_intronic_sequence'] == '' - assert results['NM_019105.7:c.10711del']['hgvs_lrg_variant'] == '' + assert results['NM_019105.7:c.10711del']['gene_symbol'] == 'TNXB' + assert results['NM_019105.7:c.10711del']['gene_ids'] == {'hgnc_id': 'HGNC:11976', 'entrez_gene_id': '7148', 'ucsc_id': 'uc063nnw.1', 'omim_id': ['600985']} assert results['NM_019105.7:c.10711del']['hgvs_transcript_variant'] == 'NM_019105.7:c.10711del' + assert results['NM_019105.7:c.10711del']['genome_context_intronic_sequence'] == '' + assert results['NM_019105.7:c.10711del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_019105.7:c.10711del']['hgvs_refseqgene_variant'] == '' - assert results['NM_019105.7:c.10711del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert results['NM_019105.7:c.10711del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061978.6:p.(Arg3571AlafsTer91)', 'slr': 'NP_061978.6:p.(R3571Afs*91)'} + assert results['NM_019105.7:c.10711del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_019105.7:c.10711del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_019105.7:c.10711del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'pos': '3483643', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'pos': '3483643', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'pos': '3292209', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'pos': '3292209', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3392833', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'pos': '3392833', 'ref': 'CG', 'alt': 'C'}}}]) + assert results['NM_019105.7:c.10711del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'pos': '32012992', 'ref': 'CG', 'alt': 'C'}} assert 'hg38' not in list(results['NM_019105.7:c.10711del']['primary_assembly_loci'].keys()) - assert results['NM_019105.7:c.10711del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} + assert results['NM_019105.7:c.10711del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'pos': '32012992', 'ref': 'CG', 'alt': 'C'}} assert 'grch38' not in list(results['NM_019105.7:c.10711del']['primary_assembly_loci'].keys()) - assert results['NM_019105.7:c.10711del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061978.6', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_019105.7'} + assert results['NM_019105.7:c.10711del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_019105.7', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061978.6'} - assert 'NM_019105.6:c.10711del' in list(results.keys()) - assert results['NM_019105.6:c.10711del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_019105.6:c.10711del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_019105.6:c.10711del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'ref': 'CG', 'pos': '3483643', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'ref': 'CG', 'pos': '3483537', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'ref': 'CG', 'pos': '3292209', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'ref': 'CG', 'pos': '3286624', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'CG', 'pos': '3392833', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'ref': 'CG', 'pos': '3387248', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'ref': 'AG', 'pos': '3271858', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3271861del', 'vcf': {'chr': 'chr6_qbl_hap6', 'ref': 'AG', 'pos': '3271858', 'alt': 'A'}}}]) - assert results['NM_019105.6:c.10711del']['gene_symbol'] == 'TNXB' - assert results['NM_019105.6:c.10711del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061978.6:p.(Arg3571AlafsTer91)', 'slr': 'NP_061978.6:p.(R3571Afs*91)'} - assert results['NM_019105.6:c.10711del']['submitted_variant'] == '6-32012992-CG-C' - assert results['NM_019105.6:c.10711del']['genome_context_intronic_sequence'] == '' - assert results['NM_019105.6:c.10711del']['hgvs_lrg_variant'] == '' - assert results['NM_019105.6:c.10711del']['hgvs_transcript_variant'] == 'NM_019105.6:c.10711del' - assert results['NM_019105.6:c.10711del']['hgvs_refseqgene_variant'] == 'NG_008337.2:g.69159del' - assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': 'chr6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} - assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32012992', 'alt': 'C'}} - assert results['NM_019105.6:c.10711del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': '6', 'ref': 'CG', 'pos': '32045215', 'alt': 'C'}} - assert results['NM_019105.6:c.10711del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008337.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061978.6', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_019105.6'} + assert 'NM_001365276.1:c.10717del' in list(results.keys()) + assert results['NM_001365276.1:c.10717del']['submitted_variant'] == '6-32012992-CG-C' + assert results['NM_001365276.1:c.10717del']['gene_symbol'] == 'TNXB' + assert results['NM_001365276.1:c.10717del']['gene_ids'] == {'hgnc_id': 'HGNC:11976', 'entrez_gene_id': '7148', 'ucsc_id': 'uc063nnw.1', 'omim_id': ['600985']} + assert results['NM_001365276.1:c.10717del']['hgvs_transcript_variant'] == 'NM_001365276.1:c.10717del' + assert results['NM_001365276.1:c.10717del']['genome_context_intronic_sequence'] == '' + assert results['NM_001365276.1:c.10717del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001365276.1:c.10717del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001365276.1:c.10717del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001352205.1:p.(Arg3573AlafsTer91)', 'slr': 'NP_001352205.1:p.(R3573Afs*91)'} + assert results['NM_001365276.1:c.10717del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001365276.1:c.10717del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001365276.1:c.10717del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'pos': '3292209', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'pos': '3292209', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'pos': '3483643', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'pos': '3483643', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3392833', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'pos': '3392833', 'ref': 'CG', 'alt': 'C'}}}]) + assert results['NM_001365276.1:c.10717del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'pos': '32012992', 'ref': 'CG', 'alt': 'C'}} + assert 'hg38' not in list(results['NM_001365276.1:c.10717del']['primary_assembly_loci'].keys()) + assert results['NM_001365276.1:c.10717del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'pos': '32012992', 'ref': 'CG', 'alt': 'C'}} + assert 'grch38' not in list(results['NM_001365276.1:c.10717del']['primary_assembly_loci'].keys()) + assert results['NM_001365276.1:c.10717del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001365276.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001352205.1'} + assert 'NM_032470.3:c.4del' in list(results.keys()) + assert results['NM_032470.3:c.4del']['submitted_variant'] == '6-32012992-CG-C' + assert results['NM_032470.3:c.4del']['gene_symbol'] == 'TNXB' + assert results['NM_032470.3:c.4del']['gene_ids'] == {'hgnc_id': 'HGNC:11976', 'entrez_gene_id': '7148', 'ucsc_id': 'uc063nnw.1', 'omim_id': ['600985']} + assert results['NM_032470.3:c.4del']['hgvs_transcript_variant'] == 'NM_032470.3:c.4del' + assert results['NM_032470.3:c.4del']['genome_context_intronic_sequence'] == '' + assert results['NM_032470.3:c.4del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_032470.3:c.4del']['hgvs_refseqgene_variant'] == '' + assert results['NM_032470.3:c.4del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_115859.2:p.(Arg2AlafsTer91)', 'slr': 'NP_115859.2:p.(R2Afs*91)'} + assert results['NM_032470.3:c.4del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_032470.3:c.4del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_032470.3:c.4del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3345700', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3345701del', 'vcf': {'chr': 'chr6_ssto_hap7', 'pos': '3345700', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3392833', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3392834del', 'vcf': {'chr': 'chr6_mcf_hap5', 'pos': '3392833', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'pos': '3483643', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_113891.2:g.3483644del', 'vcf': {'chr': 'chr6_cox_hap2', 'pos': '3483643', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3387248', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167247.2:g.3387249del', 'vcf': {'chr': 'chr6_GL000254v2_alt', 'pos': '3387248', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'pos': '3268450', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167248.2:g.3268451del', 'vcf': {'chr': 'chr6_GL000255v2_alt', 'pos': '3268450', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'HSCHR6_MHC_QBL_CTG1', 'pos': '3274046', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167248.1:g.3274047del', 'vcf': {'chr': 'chr6_qbl_hap6', 'pos': '3274046', 'ref': 'CG', 'alt': 'C'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'pos': '3292209', 'ref': 'CG', 'alt': 'C'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167245.1:g.3292210del', 'vcf': {'chr': 'chr6_dbb_hap3', 'pos': '3292209', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'HSCHR6_MHC_COX_CTG1', 'pos': '3483537', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_113891.3:g.3483538del', 'vcf': {'chr': 'chr6_GL000251v2_alt', 'pos': '3483537', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'HSCHR6_MHC_DBB_CTG1', 'pos': '3286624', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167245.2:g.3286625del', 'vcf': {'chr': 'chr6_GL000252v2_alt', 'pos': '3286624', 'ref': 'CG', 'alt': 'C'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3346402', 'ref': 'CG', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3346403del', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'pos': '3346402', 'ref': 'CG', 'alt': 'C'}}}]) + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': 'chr6', 'pos': '32012992', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': 'chr6', 'pos': '32045215', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.32012993del', 'vcf': {'chr': '6', 'pos': '32012992', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.32045216del', 'vcf': {'chr': '6', 'pos': '32045215', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_032470.3:c.4del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032470.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_115859.2'} def test_variant159(self): variant = '17-48275363-C-A' @@ -5089,22 +5162,22 @@ def test_variant159(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589G>T' in list(results.keys()) - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' - assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} assert results['NM_000088.3:c.589G>T']['submitted_variant'] == '17-48275363-C-A' - assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + assert results['NM_000088.3:c.589G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589G>T' + assert results['NM_000088.3:c.589G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638G>T' - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275363', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198002', 'alt': 'A'}} - assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197Cys)', 'slr': 'NP_000079.2:p.(G197C)'} + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589G>T' + assert results['NM_000088.3:c.589G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8638G>T' + self.assertCountEqual(results['NM_000088.3:c.589G>T']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': 'chr17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': 'chr17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275363C>A', 'vcf': {'chr': '17', 'pos': '48275363', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198002C>A', 'vcf': {'chr': '17', 'pos': '50198002', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant160(self): variant = '17-48275364-C-A' @@ -5113,46 +5186,46 @@ def test_variant160(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589-1G>T' in list(results.keys()) - assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' - assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' - self.assertCountEqual(results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-1G>T']['submitted_variant'] == '17-48275364-C-A' - assert results['NM_000088.3:c.589-1G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-1G>T' - assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' + assert results['NM_000088.3:c.589-1G>T']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-1G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589-1G>T']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-1G>T' assert results['NM_000088.3:c.589-1G>T']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8637G>T' - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '48275364', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '50198003', 'alt': 'A'}} - assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-1G>T' + assert results['NM_000088.3:c.589-1G>T']['hgvs_lrg_variant'] == 'LRG_1:g.8637G>T' + self.assertCountEqual(results['NM_000088.3:c.589-1G>T']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': 'chr17', 'pos': '48275364', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': 'chr17', 'pos': '50198003', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275364C>A', 'vcf': {'chr': '17', 'pos': '48275364', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198003C>A', 'vcf': {'chr': '17', 'pos': '50198003', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000088.3:c.589-1G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant161(self): variant = '17-48275359-GGA-TCC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.591_593inv' in list(results.keys()) - assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.591_593inv' - assert results['NM_000088.3:c.591_593inv']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.591_593inv']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.591_593inv']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.591_593inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Pro198Asp)', 'slr': 'NP_000079.2:p.(P198D)'} assert results['NM_000088.3:c.591_593inv']['submitted_variant'] == '17-48275359-GGA-TCC' - assert results['NM_000088.3:c.591_593inv']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_variant'] == 'LRG_1:g.8640_8642inv' + assert results['NM_000088.3:c.591_593inv']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.591_593inv']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.591_593inv']['hgvs_transcript_variant'] == 'NM_000088.3:c.591_593inv' + assert results['NM_000088.3:c.591_593inv']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.591_593inv']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.591_593inv']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8640_8642inv' - assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': 'chr17', 'ref': 'GGA', 'pos': '48275359', 'alt': 'TCC'}} - assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': 'chr17', 'ref': 'GGA', 'pos': '50197998', 'alt': 'TCC'}} - assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': '17', 'ref': 'GGA', 'pos': '48275359', 'alt': 'TCC'}} - assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': '17', 'ref': 'GGA', 'pos': '50197998', 'alt': 'TCC'}} - assert results['NM_000088.3:c.591_593inv']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000088.3:c.591_593inv']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Pro198Asp)', 'slr': 'NP_000079.2:p.(P198D)'} + assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.591_593inv' + assert results['NM_000088.3:c.591_593inv']['hgvs_lrg_variant'] == 'LRG_1:g.8640_8642inv' + self.assertCountEqual(results['NM_000088.3:c.591_593inv']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': 'chr17', 'pos': '48275359', 'ref': 'GGA', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': 'chr17', 'pos': '50197998', 'ref': 'GGA', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275359_48275361inv', 'vcf': {'chr': '17', 'pos': '48275359', 'ref': 'GGA', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50197998_50198000inv', 'vcf': {'chr': '17', 'pos': '50197998', 'ref': 'GGA', 'alt': 'TCC'}} + assert results['NM_000088.3:c.591_593inv']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant162(self): variant = '7-94039128-CTTG-C' @@ -5161,97 +5234,100 @@ def test_variant162(self): assert results['flag'] == 'gene_variant' assert 'NM_000089.3:c.1035_1035+2del' in list(results.keys()) - assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' - assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' - self.assertCountEqual(results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'], []) - assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' - assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} assert results['NM_000089.3:c.1035_1035+2del']['submitted_variant'] == '7-94039128-CTTG-C' - assert results['NM_000089.3:c.1035_1035+2del']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000089.3):c.1035_1035+2del' - assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_variant'] == 'LRG_2:g.20261_20263del' + assert results['NM_000089.3:c.1035_1035+2del']['gene_symbol'] == 'COL1A2' + assert results['NM_000089.3:c.1035_1035+2del']['gene_ids'] == {'hgnc_id': 'HGNC:2198', 'entrez_gene_id': '1278', 'ucsc_id': 'uc003ung.1', 'omim_id': ['120160']} assert results['NM_000089.3:c.1035_1035+2del']['hgvs_transcript_variant'] == 'NM_000089.3:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000089.3):c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['refseqgene_context_intronic_sequence'] == 'NG_007405.1(NM_000089.3):c.1035_1035+2del' assert results['NM_000089.3:c.1035_1035+2del']['hgvs_refseqgene_variant'] == 'NG_007405.1:g.20261_20263del' - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': 'chr7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94039128', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': '7', 'ref': 'CTTG', 'pos': '94409816', 'alt': 'C'}} - assert results['NM_000089.3:c.1035_1035+2del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} - + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000080.2(LRG_2p1):p.(Val345del)', 'slr': 'NP_000080.2:p.(V345del)'} + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_transcript_variant'] == 'LRG_2t1:c.1035_1035+2del' + assert results['NM_000089.3:c.1035_1035+2del']['hgvs_lrg_variant'] == 'LRG_2:g.20261_20263del' + self.assertCountEqual(results['NM_000089.3:c.1035_1035+2del']['alt_genomic_loci'], []) + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': 'chr7', 'pos': '94039128', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': 'chr7', 'pos': '94409816', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.94039133_94039135del', 'vcf': {'chr': '7', 'pos': '94039128', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.94409821_94409823del', 'vcf': {'chr': '7', 'pos': '94409816', 'ref': 'CTTG', 'alt': 'C'}} + assert results['NM_000089.3:c.1035_1035+2del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000089.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000080.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007405.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_2.xml'} def test_variant163(self): variant = '9-135800972-AC-ACC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_001162427.1:c.210+1615dup' in list(results.keys()) - assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001162427.1:c.210+1615dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'], []) - assert results['NM_001162427.1:c.210+1615dup']['gene_symbol'] == 'TSC1' - assert results['NM_001162427.1:c.210+1615dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.?', 'slr': 'NP_001155899.1:p.?'} assert results['NM_001162427.1:c.210+1615dup']['submitted_variant'] == '9-135800972-AC-ACC' - assert results['NM_001162427.1:c.210+1615dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162427.1):c.210+1615dup' - assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_variant'] == '' + assert results['NM_001162427.1:c.210+1615dup']['gene_symbol'] == 'TSC1' + assert results['NM_001162427.1:c.210+1615dup']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} assert results['NM_001162427.1:c.210+1615dup']['hgvs_transcript_variant'] == 'NM_001162427.1:c.210+1615dup' + assert results['NM_001162427.1:c.210+1615dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162427.1):c.210+1615dup' + assert results['NM_001162427.1:c.210+1615dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001162427.1:c.210+1615dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162427.1:c.210+1615dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1'} + assert results['NM_001162427.1:c.210+1615dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.?', 'slr': 'NP_001155899.1:p.?'} + assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162427.1:c.210+1615dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001162427.1:c.210+1615dup']['alt_genomic_loci'], []) + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162427.1:c.210+1615dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1'} assert 'NM_001162426.1:c.363+1dup' in list(results.keys()) - assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001162426.1:c.363+1dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'], []) - assert results['NM_001162426.1:c.363+1dup']['gene_symbol'] == 'TSC1' - assert results['NM_001162426.1:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.(Met122AspfsTer4)', 'slr': 'NP_001155898.1:p.(M122Dfs*4)'} assert results['NM_001162426.1:c.363+1dup']['submitted_variant'] == '9-135800972-AC-ACC' - assert results['NM_001162426.1:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162426.1):c.363+1dup' - assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_variant'] == '' + assert results['NM_001162426.1:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_001162426.1:c.363+1dup']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} assert results['NM_001162426.1:c.363+1dup']['hgvs_transcript_variant'] == 'NM_001162426.1:c.363+1dup' + assert results['NM_001162426.1:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001162426.1):c.363+1dup' + assert results['NM_001162426.1:c.363+1dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001162426.1:c.363+1dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001162426.1:c.363+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1'} - - assert results['flag'] == 'gene_variant' - assert 'NM_001362177.1:c.-1+1dup' in list(results.keys()) - assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001362177.1:c.-1+1dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'], []) - assert results['NM_001362177.1:c.-1+1dup']['gene_symbol'] == 'TSC1' - assert results['NM_001362177.1:c.-1+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.?', 'slr': 'NP_001349106.1:p.?'} - assert results['NM_001362177.1:c.-1+1dup']['submitted_variant'] == '9-135800972-AC-ACC' - assert results['NM_001362177.1:c.-1+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001362177.1):c.-1+1dup' - assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_variant'] == '' - assert results['NM_001362177.1:c.-1+1dup']['hgvs_transcript_variant'] == 'NM_001362177.1:c.-1+1dup' - assert results['NM_001362177.1:c.-1+1dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_001362177.1:c.-1+1dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1'} + assert results['NM_001162426.1:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.(Met122AspfsTer4)', 'slr': 'NP_001155898.1:p.(M122Dfs*4)'} + assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162426.1:c.363+1dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001162426.1:c.363+1dup']['alt_genomic_loci'], []) + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001162426.1:c.363+1dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1'} assert 'NM_000368.4:c.363+1dup' in list(results.keys()) - assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' - self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) - assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' - assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)', 'slr': 'NP_000359.1:p.(M122Dfs*4)'} assert results['NM_000368.4:c.363+1dup']['submitted_variant'] == '9-135800972-AC-ACC' - assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' - assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + assert results['NM_000368.4:c.363+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_000368.4:c.363+1dup']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} assert results['NM_000368.4:c.363+1dup']['hgvs_transcript_variant'] == 'NM_000368.4:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000368.4):c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['refseqgene_context_intronic_sequence'] == 'NG_012386.1(NM_000368.4):c.363+1dup' assert results['NM_000368.4:c.363+1dup']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.24048dup' - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '135800972', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'ref': 'A', 'pos': '132925585', 'alt': 'AC'}} - assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} + assert results['NM_000368.4:c.363+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Met122AspfsTer4)', 'slr': 'NP_000359.1:p.(M122Dfs*4)'} + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.363+1dup' + assert results['NM_000368.4:c.363+1dup']['hgvs_lrg_variant'] == 'LRG_486:g.24048dup' + self.assertCountEqual(results['NM_000368.4:c.363+1dup']['alt_genomic_loci'], []) + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_000368.4:c.363+1dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} + assert 'NM_001362177.1:c.-1+1dup' in list(results.keys()) + assert results['NM_001362177.1:c.-1+1dup']['submitted_variant'] == '9-135800972-AC-ACC' + assert results['NM_001362177.1:c.-1+1dup']['gene_symbol'] == 'TSC1' + assert results['NM_001362177.1:c.-1+1dup']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} + assert results['NM_001362177.1:c.-1+1dup']['hgvs_transcript_variant'] == 'NM_001362177.1:c.-1+1dup' + assert results['NM_001362177.1:c.-1+1dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001362177.1):c.-1+1dup' + assert results['NM_001362177.1:c.-1+1dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.?', 'slr': 'NP_001349106.1:p.?'} + assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001362177.1:c.-1+1dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001362177.1:c.-1+1dup']['alt_genomic_loci'], []) + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': 'chr9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': 'chr9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135800974dup', 'vcf': {'chr': '9', 'pos': '135800972', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132925587dup', 'vcf': {'chr': '9', 'pos': '132925585', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_001362177.1:c.-1+1dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1'} def test_variant164(self): variant = '1-43212925-C-T' @@ -5259,174 +5335,180 @@ def test_variant164(self): print(results) assert results['flag'] == 'gene_variant' + assert 'NM_022356.3:c.2055+18G>A' in list(results.keys()) + assert results['NM_022356.3:c.2055+18G>A']['submitted_variant'] == '1-43212925-C-T' + assert results['NM_022356.3:c.2055+18G>A']['gene_symbol'] == 'P3H1' + assert results['NM_022356.3:c.2055+18G>A']['gene_ids'] == {'hgnc_id': 'HGNC:19316', 'entrez_gene_id': '64175', 'ucsc_id': '', 'omim_id': ['610339']} + assert results['NM_022356.3:c.2055+18G>A']['hgvs_transcript_variant'] == 'NM_022356.3:c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_022356.3):c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['refseqgene_context_intronic_sequence'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_refseqgene_variant'] == 'NG_008123.1:g.24831G>A' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_071751.3(LRG_5p1):p.?', 'slr': 'NP_071751.3:p.?'} + assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t1:c.2055+18G>A' + assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' + self.assertCountEqual(results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'], []) + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'pos': '43212925', 'ref': 'C', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'pos': '42747254', 'ref': 'C', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'pos': '43212925', 'ref': 'C', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'pos': '42747254', 'ref': 'C', 'alt': 'T'}} + assert results['NM_022356.3:c.2055+18G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} + assert 'NM_001243246.1:c.2073G>A' in list(results.keys()) - assert results['NM_001243246.1:c.2073G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t3:c.2073G>A' - assert results['NM_001243246.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001243246.1:c.2073G>A']['alt_genomic_loci'], []) - assert results['NM_001243246.1:c.2073G>A']['gene_symbol'] == 'P3H1' - assert results['NM_001243246.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230175.1(LRG_5p3):p.(Ala691=)', 'slr': 'NP_001230175.1:p.(A691=)'} assert results['NM_001243246.1:c.2073G>A']['submitted_variant'] == '1-43212925-C-T' - assert results['NM_001243246.1:c.2073G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001243246.1:c.2073G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001243246.1:c.2073G>A']['gene_symbol'] == 'P3H1' + assert results['NM_001243246.1:c.2073G>A']['gene_ids'] == {'hgnc_id': 'HGNC:19316', 'entrez_gene_id': '64175', 'ucsc_id': '', 'omim_id': ['610339']} assert results['NM_001243246.1:c.2073G>A']['hgvs_transcript_variant'] == 'NM_001243246.1:c.2073G>A' + assert results['NM_001243246.1:c.2073G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001243246.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001243246.1:c.2073G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} - assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} - assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} - assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} - assert results['NM_001243246.1:c.2073G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230175.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243246.1'} + assert results['NM_001243246.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230175.1(LRG_5p3):p.(Ala691=)', 'slr': 'NP_001230175.1:p.(A691=)'} + assert results['NM_001243246.1:c.2073G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t3:c.2073G>A' + assert results['NM_001243246.1:c.2073G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001243246.1:c.2073G>A']['alt_genomic_loci'], []) + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'pos': '43212925', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'pos': '42747254', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'pos': '43212925', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001243246.1:c.2073G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'pos': '42747254', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001243246.1:c.2073G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243246.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230175.1'} assert 'NM_001146289.1:c.2073G>A' in list(results.keys()) - assert results['NM_001146289.1:c.2073G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t2:c.2073G>A' - assert results['NM_001146289.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001146289.1:c.2073G>A']['alt_genomic_loci'], []) - assert results['NM_001146289.1:c.2073G>A']['gene_symbol'] == 'P3H1' - assert results['NM_001146289.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001139761.1(LRG_5p2):p.(Ala691=)', 'slr': 'NP_001139761.1:p.(A691=)'} assert results['NM_001146289.1:c.2073G>A']['submitted_variant'] == '1-43212925-C-T' - assert results['NM_001146289.1:c.2073G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001146289.1:c.2073G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' + assert results['NM_001146289.1:c.2073G>A']['gene_symbol'] == 'P3H1' + assert results['NM_001146289.1:c.2073G>A']['gene_ids'] == {'hgnc_id': 'HGNC:19316', 'entrez_gene_id': '64175', 'ucsc_id': '', 'omim_id': ['610339']} assert results['NM_001146289.1:c.2073G>A']['hgvs_transcript_variant'] == 'NM_001146289.1:c.2073G>A' + assert results['NM_001146289.1:c.2073G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001146289.1:c.2073G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001146289.1:c.2073G>A']['hgvs_refseqgene_variant'] == 'NG_008123.1:g.24831G>A' - assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} - assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} - assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} - assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} - assert results['NM_001146289.1:c.2073G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001139761.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001146289.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} - - assert 'NM_022356.3:c.2055+18G>A' in list(results.keys()) - assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t1:c.2055+18G>A' - assert results['NM_022356.3:c.2055+18G>A']['refseqgene_context_intronic_sequence'] == 'NG_008123.1(NM_022356.3):c.2055+18G>A' - self.assertCountEqual(results['NM_022356.3:c.2055+18G>A']['alt_genomic_loci'], []) - assert results['NM_022356.3:c.2055+18G>A']['gene_symbol'] == 'P3H1' - assert results['NM_022356.3:c.2055+18G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_071751.3(LRG_5p1):p.?', 'slr': 'NP_071751.3:p.?'} - assert results['NM_022356.3:c.2055+18G>A']['submitted_variant'] == '1-43212925-C-T' - assert results['NM_022356.3:c.2055+18G>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_022356.3):c.2055+18G>A' - assert results['NM_022356.3:c.2055+18G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' - assert results['NM_022356.3:c.2055+18G>A']['hgvs_transcript_variant'] == 'NM_022356.3:c.2055+18G>A' - assert results['NM_022356.3:c.2055+18G>A']['hgvs_refseqgene_variant'] == 'NG_008123.1:g.24831G>A' - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '43212925', 'alt': 'T'}} - assert results['NM_022356.3:c.2055+18G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '42747254', 'alt': 'T'}} - assert results['NM_022356.3:c.2055+18G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_071751.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_022356.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} - + assert results['NM_001146289.1:c.2073G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001139761.1(LRG_5p2):p.(Ala691=)', 'slr': 'NP_001139761.1:p.(A691=)'} + assert results['NM_001146289.1:c.2073G>A']['hgvs_lrg_transcript_variant'] == 'LRG_5t2:c.2073G>A' + assert results['NM_001146289.1:c.2073G>A']['hgvs_lrg_variant'] == 'LRG_5:g.24831G>A' + self.assertCountEqual(results['NM_001146289.1:c.2073G>A']['alt_genomic_loci'], []) + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': 'chr1', 'pos': '43212925', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': 'chr1', 'pos': '42747254', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.43212925C>T', 'vcf': {'chr': '1', 'pos': '43212925', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001146289.1:c.2073G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.42747254C>T', 'vcf': {'chr': '1', 'pos': '42747254', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001146289.1:c.2073G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001146289.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001139761.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008123.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_5.xml'} def test_variant165(self): variant = 'HG987_PATCH-355171-C-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_001194958.2:c.20C>A' in list(results.keys()) - assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001194958.2:c.20C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001194958.2:c.20C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'ref': 'C', 'pos': '355171', 'alt': 'A'}}}]) - assert results['NM_001194958.2:c.20C>A']['gene_symbol'] == 'KCNJ18' - assert results['NM_001194958.2:c.20C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181887.2:p.(Ala7Asp)', 'slr': 'NP_001181887.2:p.(A7D)'} assert results['NM_001194958.2:c.20C>A']['submitted_variant'] == 'HG987_PATCH-355171-C-A' - assert results['NM_001194958.2:c.20C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001194958.2:c.20C>A']['gene_symbol'] == 'KCNJ18' + assert results['NM_001194958.2:c.20C>A']['gene_ids'] == {'hgnc_id': 'HGNC:39080', 'entrez_gene_id': '100134444', 'ucsc_id': 'uc032exz.1', 'omim_id': ['613236']} assert results['NM_001194958.2:c.20C>A']['hgvs_transcript_variant'] == 'NM_001194958.2:c.20C>A' + assert results['NM_001194958.2:c.20C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001194958.2:c.20C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001194958.2:c.20C>A']['hgvs_refseqgene_variant'] == 'NG_033093.1:g.15284C>A' + assert results['NM_001194958.2:c.20C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181887.2:p.(Ala7Asp)', 'slr': 'NP_001181887.2:p.(A7D)'} + assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001194958.2:c.20C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001194958.2:c.20C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'HG987_PATCH', 'pos': '355171', 'ref': 'C', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003315950.2:g.355171C>A', 'vcf': {'chr': 'NW_003315950.2', 'pos': '355171', 'ref': 'C', 'alt': 'A'}}}]) assert 'hg19' not in list(results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys()) - assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} + assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': 'chr17', 'pos': '21702806', 'ref': 'C', 'alt': 'A'}} assert 'grch37' not in list(results['NM_001194958.2:c.20C>A']['primary_assembly_loci'].keys()) - assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '21702806', 'alt': 'A'}} - assert results['NM_001194958.2:c.20C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033093.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2'} - - assert results['flag'] == 'gene_variant' + assert results['NM_001194958.2:c.20C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.21702806C>A', 'vcf': {'chr': '17', 'pos': '21702806', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001194958.2:c.20C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194958.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181887.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033093.1'} def test_variant166(self): variant = '20-43252915-T-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_000022.3:c.534A>G' in list(results.keys()) - assert results['NM_000022.3:c.534A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000022.3:c.534A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000022.3:c.534A>G']['alt_genomic_loci'], []) - assert results['NM_000022.3:c.534A>G']['gene_symbol'] == 'ADA' - assert results['NM_000022.3:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} - assert results['NM_000022.3:c.534A>G']['submitted_variant'] == '20-43252915-T-C' - assert results['NM_000022.3:c.534A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_000022.3:c.534A>G']['hgvs_lrg_variant'] == '' - assert results['NM_000022.3:c.534A>G']['hgvs_transcript_variant'] == 'NM_000022.3:c.534A>G' - assert results['NM_000022.3:c.534A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} - assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} - assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} - assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} - assert results['NM_000022.3:c.534A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.3'} - + assert results['flag'] == 'gene_variant' assert 'NM_001322051.1:c.534A>G' in list(results.keys()) - assert results['NM_001322051.1:c.534A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322051.1:c.534A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322051.1:c.534A>G']['alt_genomic_loci'], []) - assert results['NM_001322051.1:c.534A>G']['gene_symbol'] == 'ADA' - assert results['NM_001322051.1:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308980.1:p.(Val178=)', 'slr': 'NP_001308980.1:p.(V178=)'} assert results['NM_001322051.1:c.534A>G']['submitted_variant'] == '20-43252915-T-C' - assert results['NM_001322051.1:c.534A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322051.1:c.534A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322051.1:c.534A>G']['gene_symbol'] == 'ADA' + assert results['NM_001322051.1:c.534A>G']['gene_ids'] == {'hgnc_id': 'HGNC:186', 'entrez_gene_id': '100', 'ucsc_id': 'uc002xmj.4', 'omim_id': ['608958']} assert results['NM_001322051.1:c.534A>G']['hgvs_transcript_variant'] == 'NM_001322051.1:c.534A>G' + assert results['NM_001322051.1:c.534A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322051.1:c.534A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322051.1:c.534A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} - assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} - assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} - assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} - assert results['NM_001322051.1:c.534A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308980.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322051.1'} + assert results['NM_001322051.1:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308980.1:p.(Val178=)', 'slr': 'NP_001308980.1:p.(V178=)'} + assert results['NM_001322051.1:c.534A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322051.1:c.534A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322051.1:c.534A>G']['alt_genomic_loci'], []) + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'pos': '44624274', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322051.1:c.534A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'pos': '44624274', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322051.1:c.534A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322051.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308980.1'} assert 'NM_000022.2:c.534A>G' in list(results.keys()) - assert results['NM_000022.2:c.534A>G']['hgvs_lrg_transcript_variant'] == 'LRG_16t1:c.534A>G' - assert results['NM_000022.2:c.534A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000022.2:c.534A>G']['alt_genomic_loci'], []) - assert results['NM_000022.2:c.534A>G']['gene_symbol'] == 'ADA' - assert results['NM_000022.2:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} assert results['NM_000022.2:c.534A>G']['submitted_variant'] == '20-43252915-T-C' - assert results['NM_000022.2:c.534A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_000022.2:c.534A>G']['hgvs_lrg_variant'] == 'LRG_16:g.32462A>G' + assert results['NM_000022.2:c.534A>G']['gene_symbol'] == 'ADA' + assert results['NM_000022.2:c.534A>G']['gene_ids'] == {'hgnc_id': 'HGNC:186', 'entrez_gene_id': '100', 'ucsc_id': 'uc002xmj.4', 'omim_id': ['608958']} assert results['NM_000022.2:c.534A>G']['hgvs_transcript_variant'] == 'NM_000022.2:c.534A>G' + assert results['NM_000022.2:c.534A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000022.2:c.534A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000022.2:c.534A>G']['hgvs_refseqgene_variant'] == 'NG_007385.1:g.32462A>G' - assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NM_000022.2:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} + assert results['NM_000022.2:c.534A>G']['hgvs_lrg_transcript_variant'] == 'LRG_16t1:c.534A>G' + assert results['NM_000022.2:c.534A>G']['hgvs_lrg_variant'] == 'LRG_16:g.32462A>G' + self.assertCountEqual(results['NM_000022.2:c.534A>G']['alt_genomic_loci'], []) + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} assert 'hg38' not in list(results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys()) - assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} + assert results['NM_000022.2:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} assert 'grch38' not in list(results['NM_000022.2:c.534A>G']['primary_assembly_loci'].keys()) - assert results['NM_000022.2:c.534A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007385.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_16.xml'} + assert results['NM_000022.2:c.534A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007385.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_16.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_001322050.1:c.129A>G' in list(results.keys()) - assert results['NM_001322050.1:c.129A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322050.1:c.129A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322050.1:c.129A>G']['alt_genomic_loci'], []) - assert results['NM_001322050.1:c.129A>G']['gene_symbol'] == 'ADA' - assert results['NM_001322050.1:c.129A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308979.1:p.(Val43=)', 'slr': 'NP_001308979.1:p.(V43=)'} assert results['NM_001322050.1:c.129A>G']['submitted_variant'] == '20-43252915-T-C' - assert results['NM_001322050.1:c.129A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322050.1:c.129A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322050.1:c.129A>G']['gene_symbol'] == 'ADA' + assert results['NM_001322050.1:c.129A>G']['gene_ids'] == {'hgnc_id': 'HGNC:186', 'entrez_gene_id': '100', 'ucsc_id': 'uc002xmj.4', 'omim_id': ['608958']} assert results['NM_001322050.1:c.129A>G']['hgvs_transcript_variant'] == 'NM_001322050.1:c.129A>G' + assert results['NM_001322050.1:c.129A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322050.1:c.129A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322050.1:c.129A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} - assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} - assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} - assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} - assert results['NM_001322050.1:c.129A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308979.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322050.1'} + assert results['NM_001322050.1:c.129A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308979.1:p.(Val43=)', 'slr': 'NP_001308979.1:p.(V43=)'} + assert results['NM_001322050.1:c.129A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322050.1:c.129A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322050.1:c.129A>G']['alt_genomic_loci'], []) + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'pos': '44624274', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322050.1:c.129A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'pos': '44624274', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322050.1:c.129A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322050.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308979.1'} + + assert 'NM_000022.3:c.534A>G' in list(results.keys()) + assert results['NM_000022.3:c.534A>G']['submitted_variant'] == '20-43252915-T-C' + assert results['NM_000022.3:c.534A>G']['gene_symbol'] == 'ADA' + assert results['NM_000022.3:c.534A>G']['gene_ids'] == {'hgnc_id': 'HGNC:186', 'entrez_gene_id': '100', 'ucsc_id': 'uc002xmj.4', 'omim_id': ['608958']} + assert results['NM_000022.3:c.534A>G']['hgvs_transcript_variant'] == 'NM_000022.3:c.534A>G' + assert results['NM_000022.3:c.534A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000022.3:c.534A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000022.3:c.534A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_000022.3:c.534A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000013.2(LRG_16p1):p.(Val178=)', 'slr': 'NP_000013.2:p.(V178=)'} + assert results['NM_000022.3:c.534A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000022.3:c.534A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000022.3:c.534A>G']['alt_genomic_loci'], []) + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'pos': '44624274', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000022.3:c.534A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'pos': '44624274', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000022.3:c.534A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000022.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000013.2'} assert 'NR_136160.1:n.685A>G' in list(results.keys()) - assert results['NR_136160.1:n.685A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_136160.1:n.685A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_136160.1:n.685A>G']['alt_genomic_loci'], []) - assert results['NR_136160.1:n.685A>G']['gene_symbol'] == 'ADA' - assert results['NR_136160.1:n.685A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_136160.1:n.685A>G']['submitted_variant'] == '20-43252915-T-C' - assert results['NR_136160.1:n.685A>G']['genome_context_intronic_sequence'] == '' - assert results['NR_136160.1:n.685A>G']['hgvs_lrg_variant'] == '' + assert results['NR_136160.1:n.685A>G']['gene_symbol'] == 'ADA' + assert results['NR_136160.1:n.685A>G']['gene_ids'] == {'hgnc_id': 'HGNC:186', 'entrez_gene_id': '100', 'ucsc_id': 'uc002xmj.4', 'omim_id': ['608958']} assert results['NR_136160.1:n.685A>G']['hgvs_transcript_variant'] == 'NR_136160.1:n.685A>G' + assert results['NR_136160.1:n.685A>G']['genome_context_intronic_sequence'] == '' + assert results['NR_136160.1:n.685A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NR_136160.1:n.685A>G']['hgvs_refseqgene_variant'] == '' - assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} - assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} - assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '43252915', 'alt': 'C'}} - assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'ref': 'T', 'pos': '44624274', 'alt': 'C'}} + assert results['NR_136160.1:n.685A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_136160.1:n.685A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_136160.1:n.685A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_136160.1:n.685A>G']['alt_genomic_loci'], []) + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': 'chr20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': 'chr20', 'pos': '44624274', 'ref': 'T', 'alt': 'C'}} + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000020.10:g.43252915T>C', 'vcf': {'chr': '20', 'pos': '43252915', 'ref': 'T', 'alt': 'C'}} + assert results['NR_136160.1:n.685A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000020.11:g.44624274T>C', 'vcf': {'chr': '20', 'pos': '44624274', 'ref': 'T', 'alt': 'C'}} assert results['NR_136160.1:n.685A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_136160.1'} - def test_variant167(self): variant = '1-216219781-A-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -5434,353 +5516,353 @@ def test_variant167(self): assert results['flag'] == 'gene_variant' assert 'NM_206933.2:c.6317C>G' in list(results.keys()) - assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_206933.2:c.6317C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_206933.2:c.6317C>G']['alt_genomic_loci'], []) - assert results['NM_206933.2:c.6317C>G']['gene_symbol'] == 'USH2A' - assert results['NM_206933.2:c.6317C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_996816.2:p.(Thr2106Arg)', 'slr': 'NP_996816.2:p.(T2106R)'} assert results['NM_206933.2:c.6317C>G']['submitted_variant'] == '1-216219781-A-C' - assert results['NM_206933.2:c.6317C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_variant'] == '' + assert results['NM_206933.2:c.6317C>G']['gene_symbol'] == 'USH2A' + assert results['NM_206933.2:c.6317C>G']['gene_ids'] == {'hgnc_id': 'HGNC:12601', 'entrez_gene_id': '7399', 'ucsc_id': 'uc001hku.1', 'omim_id': ['608400']} assert results['NM_206933.2:c.6317C>G']['hgvs_transcript_variant'] == 'NM_206933.2:c.6317C>G' + assert results['NM_206933.2:c.6317C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_206933.2:c.6317C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_206933.2:c.6317C>G']['hgvs_refseqgene_variant'] == 'NG_009497.1:g.381958C>G' - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216219781', 'alt': 'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '216046439', 'alt': 'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216219781', 'alt': 'C'}} - assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '216046439', 'alt': 'C'}} - assert results['NM_206933.2:c.6317C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009497.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2'} - + assert results['NM_206933.2:c.6317C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_996816.2:p.(Thr2106Arg)', 'slr': 'NP_996816.2:p.(T2106R)'} + assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_206933.2:c.6317C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_206933.2:c.6317C>G']['alt_genomic_loci'], []) + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': 'chr1', 'pos': '216219781', 'ref': 'A', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': 'chr1', 'pos': '216046439', 'ref': 'A', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.216219781A>C', 'vcf': {'chr': '1', 'pos': '216219781', 'ref': 'A', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.216046439A>C', 'vcf': {'chr': '1', 'pos': '216046439', 'ref': 'A', 'alt': 'C'}} + assert results['NM_206933.2:c.6317C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_206933.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_996816.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009497.1'} def test_variant168(self): variant = '2-209113113-G-A,C,T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_005896.3:c.394C>G' in list(results.keys()) - assert results['NM_005896.3:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>G' - assert results['NM_005896.3:c.394C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005896.3:c.394C>G']['alt_genomic_loci'], []) - assert results['NM_005896.3:c.394C>G']['gene_symbol'] == 'IDH1' - assert results['NM_005896.3:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Gly)', 'slr': 'NP_005887.2:p.(R132G)'} - assert results['NM_005896.3:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_005896.3:c.394C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_005896.3:c.394C>G']['hgvs_lrg_variant'] == '' - assert results['NM_005896.3:c.394C>G']['hgvs_transcript_variant'] == 'NM_005896.3:c.394C>G' - assert results['NM_005896.3:c.394C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} - assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} - assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} - assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} - assert results['NM_005896.3:c.394C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3'} - - assert 'NM_001282387.1:c.394C>G' in list(results.keys()) - assert results['NM_001282387.1:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>G' - assert results['NM_001282387.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001282387.1:c.394C>G']['alt_genomic_loci'], []) - assert results['NM_001282387.1:c.394C>G']['gene_symbol'] == 'IDH1' - assert results['NM_001282387.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1(LRG_610p2):p.(Arg132Gly)', 'slr': 'NP_001269316.1:p.(R132G)'} - assert results['NM_001282387.1:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_001282387.1:c.394C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001282387.1:c.394C>G']['hgvs_lrg_variant'] == '' - assert results['NM_001282387.1:c.394C>G']['hgvs_transcript_variant'] == 'NM_001282387.1:c.394C>G' - assert results['NM_001282387.1:c.394C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} - assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} - assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} - assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} - assert results['NM_001282387.1:c.394C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1'} - - assert 'NM_001282387.1:c.394C>A' in list(results.keys()) - assert results['NM_001282387.1:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>A' - assert results['NM_001282387.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001282387.1:c.394C>A']['alt_genomic_loci'], []) - assert results['NM_001282387.1:c.394C>A']['gene_symbol'] == 'IDH1' - assert results['NM_001282387.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1(LRG_610p2):p.(Arg132Ser)', 'slr': 'NP_001269316.1:p.(R132S)'} - assert results['NM_001282387.1:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_001282387.1:c.394C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001282387.1:c.394C>A']['hgvs_lrg_variant'] == '' - assert results['NM_001282387.1:c.394C>A']['hgvs_transcript_variant'] == 'NM_001282387.1:c.394C>A' - assert results['NM_001282387.1:c.394C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} - assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} - assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} - assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} - assert results['NM_001282387.1:c.394C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1'} - - assert 'NM_005896.3:c.394C>A' in list(results.keys()) - assert results['NM_005896.3:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>A' - assert results['NM_005896.3:c.394C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005896.3:c.394C>A']['alt_genomic_loci'], []) - assert results['NM_005896.3:c.394C>A']['gene_symbol'] == 'IDH1' - assert results['NM_005896.3:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Ser)', 'slr': 'NP_005887.2:p.(R132S)'} - assert results['NM_005896.3:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_005896.3:c.394C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_005896.3:c.394C>A']['hgvs_lrg_variant'] == '' - assert results['NM_005896.3:c.394C>A']['hgvs_transcript_variant'] == 'NM_005896.3:c.394C>A' - assert results['NM_005896.3:c.394C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} - assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} - assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} - assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} - assert results['NM_005896.3:c.394C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3'} - + assert results['flag'] == 'gene_variant' assert 'NM_001282386.1:c.394C>T' in list(results.keys()) - assert results['NM_001282386.1:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>T' - assert results['NM_001282386.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001282386.1:c.394C>T']['alt_genomic_loci'], []) - assert results['NM_001282386.1:c.394C>T']['gene_symbol'] == 'IDH1' - assert results['NM_001282386.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1(LRG_610p3):p.(Arg132Cys)', 'slr': 'NP_001269315.1:p.(R132C)'} assert results['NM_001282386.1:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_001282386.1:c.394C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001282386.1:c.394C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001282386.1:c.394C>T']['gene_symbol'] == 'IDH1' + assert results['NM_001282386.1:c.394C>T']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} assert results['NM_001282386.1:c.394C>T']['hgvs_transcript_variant'] == 'NM_001282386.1:c.394C>T' + assert results['NM_001282386.1:c.394C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001282386.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282386.1:c.394C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} - assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} - assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} - assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} - assert results['NM_001282386.1:c.394C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1'} - - assert 'NM_005896.2:c.394C>A' in list(results.keys()) - assert results['NM_005896.2:c.394C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_005896.2:c.394C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005896.2:c.394C>A']['alt_genomic_loci'], []) - assert results['NM_005896.2:c.394C>A']['gene_symbol'] == 'IDH1' - assert results['NM_005896.2:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Ser)', 'slr': 'NP_005887.2:p.(R132S)'} - assert results['NM_005896.2:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_005896.2:c.394C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_005896.2:c.394C>A']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>A' - assert results['NM_005896.2:c.394C>A']['hgvs_transcript_variant'] == 'NM_005896.2:c.394C>A' - assert results['NM_005896.2:c.394C>A']['hgvs_refseqgene_variant'] == 'NG_023319.2:g.22686C>A' - assert results['NM_005896.2:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} - assert 'hg38' not in list(results['NM_005896.2:c.394C>A']['primary_assembly_loci'].keys()) - assert results['NM_005896.2:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} - assert 'grch38' not in list(results['NM_005896.2:c.394C>A']['primary_assembly_loci'].keys()) - assert results['NM_005896.2:c.394C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} - - assert 'NM_005896.2:c.394C>G' in list(results.keys()) - assert results['NM_005896.2:c.394C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_005896.2:c.394C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005896.2:c.394C>G']['alt_genomic_loci'], []) - assert results['NM_005896.2:c.394C>G']['gene_symbol'] == 'IDH1' - assert results['NM_005896.2:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Gly)', 'slr': 'NP_005887.2:p.(R132G)'} - assert results['NM_005896.2:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_005896.2:c.394C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_005896.2:c.394C>G']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>G' - assert results['NM_005896.2:c.394C>G']['hgvs_transcript_variant'] == 'NM_005896.2:c.394C>G' - assert results['NM_005896.2:c.394C>G']['hgvs_refseqgene_variant'] == 'NG_023319.2:g.22686C>G' - assert results['NM_005896.2:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} - assert 'hg38' not in list(results['NM_005896.2:c.394C>G']['primary_assembly_loci'].keys()) - assert results['NM_005896.2:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} - assert 'grch38' not in list(results['NM_005896.2:c.394C>G']['primary_assembly_loci'].keys()) - assert results['NM_005896.2:c.394C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} + assert results['NM_001282386.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1(LRG_610p3):p.(Arg132Cys)', 'slr': 'NP_001269315.1:p.(R132C)'} + assert results['NM_001282386.1:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>T' + assert results['NM_001282386.1:c.394C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001282386.1:c.394C>T']['alt_genomic_loci'], []) + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'pos': '208248389', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001282386.1:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'pos': '208248389', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001282386.1:c.394C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1'} - assert results['flag'] == 'gene_variant' assert 'NM_005896.3:c.394C>T' in list(results.keys()) - assert results['NM_005896.3:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>T' - assert results['NM_005896.3:c.394C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005896.3:c.394C>T']['alt_genomic_loci'], []) - assert results['NM_005896.3:c.394C>T']['gene_symbol'] == 'IDH1' - assert results['NM_005896.3:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Cys)', 'slr': 'NP_005887.2:p.(R132C)'} assert results['NM_005896.3:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_005896.3:c.394C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_005896.3:c.394C>T']['hgvs_lrg_variant'] == '' + assert results['NM_005896.3:c.394C>T']['gene_symbol'] == 'IDH1' + assert results['NM_005896.3:c.394C>T']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} assert results['NM_005896.3:c.394C>T']['hgvs_transcript_variant'] == 'NM_005896.3:c.394C>T' + assert results['NM_005896.3:c.394C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.3:c.394C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005896.3:c.394C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} - assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} - assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} - assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} - assert results['NM_005896.3:c.394C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3'} + assert results['NM_005896.3:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Cys)', 'slr': 'NP_005887.2:p.(R132C)'} + assert results['NM_005896.3:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>T' + assert results['NM_005896.3:c.394C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_005896.3:c.394C>T']['alt_genomic_loci'], []) + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'A'}} + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'pos': '208248389', 'ref': 'G', 'alt': 'A'}} + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'A'}} + assert results['NM_005896.3:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'pos': '208248389', 'ref': 'G', 'alt': 'A'}} + assert results['NM_005896.3:c.394C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2'} assert 'NM_001282387.1:c.394C>T' in list(results.keys()) - assert results['NM_001282387.1:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>T' - assert results['NM_001282387.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001282387.1:c.394C>T']['alt_genomic_loci'], []) - assert results['NM_001282387.1:c.394C>T']['gene_symbol'] == 'IDH1' - assert results['NM_001282387.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1(LRG_610p2):p.(Arg132Cys)', 'slr': 'NP_001269316.1:p.(R132C)'} assert results['NM_001282387.1:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_001282387.1:c.394C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001282387.1:c.394C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001282387.1:c.394C>T']['gene_symbol'] == 'IDH1' + assert results['NM_001282387.1:c.394C>T']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} assert results['NM_001282387.1:c.394C>T']['hgvs_transcript_variant'] == 'NM_001282387.1:c.394C>T' + assert results['NM_001282387.1:c.394C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001282387.1:c.394C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282387.1:c.394C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} - assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} - assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} - assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'A'}} - assert results['NM_001282387.1:c.394C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1'} - - assert 'NM_001282386.1:c.394C>G' in list(results.keys()) - assert results['NM_001282386.1:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>G' - assert results['NM_001282386.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001282386.1:c.394C>G']['alt_genomic_loci'], []) - assert results['NM_001282386.1:c.394C>G']['gene_symbol'] == 'IDH1' - assert results['NM_001282386.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1(LRG_610p3):p.(Arg132Gly)', 'slr': 'NP_001269315.1:p.(R132G)'} - assert results['NM_001282386.1:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_001282386.1:c.394C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001282386.1:c.394C>G']['hgvs_lrg_variant'] == '' - assert results['NM_001282386.1:c.394C>G']['hgvs_transcript_variant'] == 'NM_001282386.1:c.394C>G' - assert results['NM_001282386.1:c.394C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} - assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} - assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'C'}} - assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'C'}} - assert results['NM_001282386.1:c.394C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1'} + assert results['NM_001282387.1:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1(LRG_610p2):p.(Arg132Cys)', 'slr': 'NP_001269316.1:p.(R132C)'} + assert results['NM_001282387.1:c.394C>T']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>T' + assert results['NM_001282387.1:c.394C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001282387.1:c.394C>T']['alt_genomic_loci'], []) + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': 'chr2', 'pos': '208248389', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001282387.1:c.394C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>A', 'vcf': {'chr': '2', 'pos': '208248389', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001282387.1:c.394C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1'} assert 'NM_005896.2:c.394C>T' in list(results.keys()) - assert results['NM_005896.2:c.394C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_005896.2:c.394C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005896.2:c.394C>T']['alt_genomic_loci'], []) - assert results['NM_005896.2:c.394C>T']['gene_symbol'] == 'IDH1' - assert results['NM_005896.2:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Cys)', 'slr': 'NP_005887.2:p.(R132C)'} assert results['NM_005896.2:c.394C>T']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_005896.2:c.394C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_005896.2:c.394C>T']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>T' + assert results['NM_005896.2:c.394C>T']['gene_symbol'] == 'IDH1' + assert results['NM_005896.2:c.394C>T']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} assert results['NM_005896.2:c.394C>T']['hgvs_transcript_variant'] == 'NM_005896.2:c.394C>T' + assert results['NM_005896.2:c.394C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.2:c.394C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005896.2:c.394C>T']['hgvs_refseqgene_variant'] == 'NG_023319.2:g.22686C>T' - assert results['NM_005896.2:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} + assert results['NM_005896.2:c.394C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Cys)', 'slr': 'NP_005887.2:p.(R132C)'} + assert results['NM_005896.2:c.394C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005896.2:c.394C>T']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>T' + self.assertCountEqual(results['NM_005896.2:c.394C>T']['alt_genomic_loci'], []) + assert results['NM_005896.2:c.394C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'A'}} assert 'hg38' not in list(results['NM_005896.2:c.394C>T']['primary_assembly_loci'].keys()) - assert results['NM_005896.2:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'A'}} + assert results['NM_005896.2:c.394C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>A', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'A'}} assert 'grch38' not in list(results['NM_005896.2:c.394C>T']['primary_assembly_loci'].keys()) - assert results['NM_005896.2:c.394C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} + assert results['NM_005896.2:c.394C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} + + assert 'NM_001282386.1:c.394C>G' in list(results.keys()) + assert results['NM_001282386.1:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_001282386.1:c.394C>G']['gene_symbol'] == 'IDH1' + assert results['NM_001282386.1:c.394C>G']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} + assert results['NM_001282386.1:c.394C>G']['hgvs_transcript_variant'] == 'NM_001282386.1:c.394C>G' + assert results['NM_001282386.1:c.394C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001282386.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282386.1:c.394C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282386.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1(LRG_610p3):p.(Arg132Gly)', 'slr': 'NP_001269315.1:p.(R132G)'} + assert results['NM_001282386.1:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>G' + assert results['NM_001282386.1:c.394C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001282386.1:c.394C>G']['alt_genomic_loci'], []) + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'pos': '208248389', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001282386.1:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'pos': '208248389', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001282386.1:c.394C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1'} + + assert 'NM_005896.3:c.394C>G' in list(results.keys()) + assert results['NM_005896.3:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_005896.3:c.394C>G']['gene_symbol'] == 'IDH1' + assert results['NM_005896.3:c.394C>G']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} + assert results['NM_005896.3:c.394C>G']['hgvs_transcript_variant'] == 'NM_005896.3:c.394C>G' + assert results['NM_005896.3:c.394C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.3:c.394C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005896.3:c.394C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_005896.3:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Gly)', 'slr': 'NP_005887.2:p.(R132G)'} + assert results['NM_005896.3:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>G' + assert results['NM_005896.3:c.394C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_005896.3:c.394C>G']['alt_genomic_loci'], []) + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'C'}} + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'pos': '208248389', 'ref': 'G', 'alt': 'C'}} + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'C'}} + assert results['NM_005896.3:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'pos': '208248389', 'ref': 'G', 'alt': 'C'}} + assert results['NM_005896.3:c.394C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2'} + + assert 'NM_001282387.1:c.394C>G' in list(results.keys()) + assert results['NM_001282387.1:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_001282387.1:c.394C>G']['gene_symbol'] == 'IDH1' + assert results['NM_001282387.1:c.394C>G']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} + assert results['NM_001282387.1:c.394C>G']['hgvs_transcript_variant'] == 'NM_001282387.1:c.394C>G' + assert results['NM_001282387.1:c.394C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001282387.1:c.394C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282387.1:c.394C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282387.1:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1(LRG_610p2):p.(Arg132Gly)', 'slr': 'NP_001269316.1:p.(R132G)'} + assert results['NM_001282387.1:c.394C>G']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>G' + assert results['NM_001282387.1:c.394C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001282387.1:c.394C>G']['alt_genomic_loci'], []) + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': 'chr2', 'pos': '208248389', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001282387.1:c.394C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>C', 'vcf': {'chr': '2', 'pos': '208248389', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001282387.1:c.394C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1'} + + assert 'NM_005896.2:c.394C>G' in list(results.keys()) + assert results['NM_005896.2:c.394C>G']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_005896.2:c.394C>G']['gene_symbol'] == 'IDH1' + assert results['NM_005896.2:c.394C>G']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} + assert results['NM_005896.2:c.394C>G']['hgvs_transcript_variant'] == 'NM_005896.2:c.394C>G' + assert results['NM_005896.2:c.394C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.2:c.394C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005896.2:c.394C>G']['hgvs_refseqgene_variant'] == 'NG_023319.2:g.22686C>G' + assert results['NM_005896.2:c.394C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Gly)', 'slr': 'NP_005887.2:p.(R132G)'} + assert results['NM_005896.2:c.394C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005896.2:c.394C>G']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>G' + self.assertCountEqual(results['NM_005896.2:c.394C>G']['alt_genomic_loci'], []) + assert results['NM_005896.2:c.394C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'C'}} + assert 'hg38' not in list(results['NM_005896.2:c.394C>G']['primary_assembly_loci'].keys()) + assert results['NM_005896.2:c.394C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>C', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'C'}} + assert 'grch38' not in list(results['NM_005896.2:c.394C>G']['primary_assembly_loci'].keys()) + assert results['NM_005896.2:c.394C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} assert 'NM_001282386.1:c.394C>A' in list(results.keys()) - assert results['NM_001282386.1:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>A' - assert results['NM_001282386.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001282386.1:c.394C>A']['alt_genomic_loci'], []) - assert results['NM_001282386.1:c.394C>A']['gene_symbol'] == 'IDH1' - assert results['NM_001282386.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1(LRG_610p3):p.(Arg132Ser)', 'slr': 'NP_001269315.1:p.(R132S)'} assert results['NM_001282386.1:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' - assert results['NM_001282386.1:c.394C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001282386.1:c.394C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001282386.1:c.394C>A']['gene_symbol'] == 'IDH1' + assert results['NM_001282386.1:c.394C>A']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} assert results['NM_001282386.1:c.394C>A']['hgvs_transcript_variant'] == 'NM_001282386.1:c.394C>A' + assert results['NM_001282386.1:c.394C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001282386.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282386.1:c.394C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} - assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} - assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '209113113', 'alt': 'T'}} - assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '208248389', 'alt': 'T'}} - assert results['NM_001282386.1:c.394C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1'} + assert results['NM_001282386.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269315.1(LRG_610p3):p.(Arg132Ser)', 'slr': 'NP_001269315.1:p.(R132S)'} + assert results['NM_001282386.1:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t3:c.394C>A' + assert results['NM_001282386.1:c.394C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001282386.1:c.394C>A']['alt_genomic_loci'], []) + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'pos': '208248389', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001282386.1:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'pos': '208248389', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001282386.1:c.394C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269315.1'} + + assert 'NM_005896.3:c.394C>A' in list(results.keys()) + assert results['NM_005896.3:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_005896.3:c.394C>A']['gene_symbol'] == 'IDH1' + assert results['NM_005896.3:c.394C>A']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} + assert results['NM_005896.3:c.394C>A']['hgvs_transcript_variant'] == 'NM_005896.3:c.394C>A' + assert results['NM_005896.3:c.394C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.3:c.394C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005896.3:c.394C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_005896.3:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Ser)', 'slr': 'NP_005887.2:p.(R132S)'} + assert results['NM_005896.3:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t1:c.394C>A' + assert results['NM_005896.3:c.394C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_005896.3:c.394C>A']['alt_genomic_loci'], []) + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'T'}} + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'pos': '208248389', 'ref': 'G', 'alt': 'T'}} + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'T'}} + assert results['NM_005896.3:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'pos': '208248389', 'ref': 'G', 'alt': 'T'}} + assert results['NM_005896.3:c.394C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2'} + + assert 'NM_001282387.1:c.394C>A' in list(results.keys()) + assert results['NM_001282387.1:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_001282387.1:c.394C>A']['gene_symbol'] == 'IDH1' + assert results['NM_001282387.1:c.394C>A']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} + assert results['NM_001282387.1:c.394C>A']['hgvs_transcript_variant'] == 'NM_001282387.1:c.394C>A' + assert results['NM_001282387.1:c.394C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001282387.1:c.394C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282387.1:c.394C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282387.1:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269316.1(LRG_610p2):p.(Arg132Ser)', 'slr': 'NP_001269316.1:p.(R132S)'} + assert results['NM_001282387.1:c.394C>A']['hgvs_lrg_transcript_variant'] == 'LRG_610t2:c.394C>A' + assert results['NM_001282387.1:c.394C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001282387.1:c.394C>A']['alt_genomic_loci'], []) + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': 'chr2', 'pos': '208248389', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001282387.1:c.394C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.208248389G>T', 'vcf': {'chr': '2', 'pos': '208248389', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001282387.1:c.394C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282387.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269316.1'} + assert 'NM_005896.2:c.394C>A' in list(results.keys()) + assert results['NM_005896.2:c.394C>A']['submitted_variant'] == '2-209113113-G-A,C,T' + assert results['NM_005896.2:c.394C>A']['gene_symbol'] == 'IDH1' + assert results['NM_005896.2:c.394C>A']['gene_ids'] == {'hgnc_id': 'HGNC:5382', 'entrez_gene_id': '3417', 'ucsc_id': 'uc002vcu.5', 'omim_id': ['147700']} + assert results['NM_005896.2:c.394C>A']['hgvs_transcript_variant'] == 'NM_005896.2:c.394C>A' + assert results['NM_005896.2:c.394C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_005896.2:c.394C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005896.2:c.394C>A']['hgvs_refseqgene_variant'] == 'NG_023319.2:g.22686C>A' + assert results['NM_005896.2:c.394C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005887.2(LRG_610p1):p.(Arg132Ser)', 'slr': 'NP_005887.2:p.(R132S)'} + assert results['NM_005896.2:c.394C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005896.2:c.394C>A']['hgvs_lrg_variant'] == 'LRG_610:g.22686C>A' + self.assertCountEqual(results['NM_005896.2:c.394C>A']['alt_genomic_loci'], []) + assert results['NM_005896.2:c.394C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': 'chr2', 'pos': '209113113', 'ref': 'G', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_005896.2:c.394C>A']['primary_assembly_loci'].keys()) + assert results['NM_005896.2:c.394C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.209113113G>T', 'vcf': {'chr': '2', 'pos': '209113113', 'ref': 'G', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_005896.2:c.394C>A']['primary_assembly_loci'].keys()) + assert results['NM_005896.2:c.394C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005896.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005887.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_023319.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_610.xml'} def test_variant169(self): variant = 'NC_000005.9:g.35058665_35058666CA=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001204314.1:c.*6525_*6526=' in list(results.keys()) - assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001204314.1:c.*6525_*6526=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001204314.1:c.*6525_*6526=']['alt_genomic_loci'], []) - assert results['NM_001204314.1:c.*6525_*6526=']['gene_symbol'] == 'PRLR' - assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} - assert results['NM_001204314.1:c.*6525_*6526=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' - assert results['NM_001204314.1:c.*6525_*6526=']['genome_context_intronic_sequence'] == '' - assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_lrg_variant'] == '' - assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_transcript_variant'] == 'NM_001204314.1:c.*6525_*6526=' - assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} - assert 'hg38' not in list(results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci'].keys()) - assert results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} - assert 'grch38' not in list(results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci'].keys()) - assert results['NM_001204314.1:c.*6525_*6526=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1'} - - assert 'NM_001204314.2:c.*6528del' in list(results.keys()) - assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001204314.2:c.*6528del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001204314.2:c.*6528del']['alt_genomic_loci'], []) - assert results['NM_001204314.2:c.*6528del']['gene_symbol'] == 'PRLR' - assert results['NM_001204314.2:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} - assert results['NM_001204314.2:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' - assert results['NM_001204314.2:c.*6528del']['genome_context_intronic_sequence'] == '' - assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_variant'] == '' - assert results['NM_001204314.2:c.*6528del']['hgvs_transcript_variant'] == 'NM_001204314.2:c.*6528del' - assert results['NM_001204314.2:c.*6528del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} - assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} - assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} - assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} - assert results['NM_001204314.2:c.*6528del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2'} + assert results['flag'] == 'gene_variant' + assert 'NM_000949.6:c.*6528del' in list(results.keys()) + assert results['NM_000949.6:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NM_000949.6:c.*6528del']['gene_symbol'] == 'PRLR' + assert results['NM_000949.6:c.*6528del']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} + assert results['NM_000949.6:c.*6528del']['hgvs_transcript_variant'] == 'NM_000949.6:c.*6528del' + assert results['NM_000949.6:c.*6528del']['genome_context_intronic_sequence'] == '' + assert results['NM_000949.6:c.*6528del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000949.6:c.*6528del']['hgvs_refseqgene_variant'] == '' + assert results['NM_000949.6:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} + assert results['NM_000949.6:c.*6528del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000949.6:c.*6528del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000949.6:c.*6528del']['alt_genomic_loci'], []) + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'pos': '35058662', 'ref': 'AGACAAG', 'alt': 'AGACAAG'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'pos': '35058560', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'pos': '35058662', 'ref': 'AGACAAG', 'alt': 'AGACAAG'}} + assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'pos': '35058560', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_000949.6:c.*6528del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.6', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1'} - assert 'NM_001204317.1:c.856-9153_856-9152=' in list(results.keys()) - assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001204317.1:c.856-9153_856-9152=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001204317.1:c.856-9153_856-9152=']['alt_genomic_loci'], []) - assert results['NM_001204317.1:c.856-9153_856-9152=']['gene_symbol'] == 'PRLR' - assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191246.1:p.?', 'slr': 'NP_001191246.1:p.?'} - assert results['NM_001204317.1:c.856-9153_856-9152=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' - assert results['NM_001204317.1:c.856-9153_856-9152=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204317.1):c.856-9153_856-9152=' - assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_lrg_variant'] == '' - assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_transcript_variant'] == 'NM_001204317.1:c.856-9153_856-9152=' - assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} - assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} - assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} - assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} - assert results['NM_001204317.1:c.856-9153_856-9152=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1'} + assert 'NR_037910.1:n.828-9153_828-9152=' in list(results.keys()) + assert results['NR_037910.1:n.828-9153_828-9152=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NR_037910.1:n.828-9153_828-9152=']['gene_symbol'] == 'PRLR' + assert results['NR_037910.1:n.828-9153_828-9152=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} + assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_transcript_variant'] == 'NR_037910.1:n.828-9153_828-9152=' + assert results['NR_037910.1:n.828-9153_828-9152=']['genome_context_intronic_sequence'] == 'NC_000005.9(NR_037910.1):c.828-9153_828-9152=' + assert results['NR_037910.1:n.828-9153_828-9152=']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_refseqgene_variant'] == '' + assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_037910.1:n.828-9153_828-9152=']['alt_genomic_loci'], []) + assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'pos': '35058665', 'ref': 'CA', 'alt': 'CA'}} + assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': 'chr5', 'pos': '35058560', 'ref': 'CA', 'alt': 'CA'}} + assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'pos': '35058665', 'ref': 'CA', 'alt': 'CA'}} + assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'pos': '35058560', 'ref': 'CA', 'alt': 'CA'}} + assert results['NR_037910.1:n.828-9153_828-9152=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1'} assert 'NM_001204316.1:c.1009+7385_1009+7386=' in list(results.keys()) - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001204316.1:c.1009+7385_1009+7386=']['alt_genomic_loci'], []) - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['gene_symbol'] == 'PRLR' - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191245.1:p.?', 'slr': 'NP_001191245.1:p.?'} assert results['NM_001204316.1:c.1009+7385_1009+7386=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204316.1):c.1009+7385_1009+7386=' - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_lrg_variant'] == '' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['gene_symbol'] == 'PRLR' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_transcript_variant'] == 'NM_001204316.1:c.1009+7385_1009+7386=' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204316.1):c.1009+7385_1009+7386=' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563_35058564=', 'vcf': {'chr': 'chr5', 'ref': 'AG', 'pos': '35058563', 'alt': 'AG'}} - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563_35058564=', 'vcf': {'chr': '5', 'ref': 'AG', 'pos': '35058563', 'alt': 'AG'}} - assert results['NM_001204316.1:c.1009+7385_1009+7386=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1'} + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191245.1:p.?', 'slr': 'NP_001191245.1:p.?'} + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001204316.1:c.1009+7385_1009+7386=']['alt_genomic_loci'], []) + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'pos': '35058665', 'ref': 'CA', 'alt': 'CA'}} + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563_35058564=', 'vcf': {'chr': 'chr5', 'pos': '35058563', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'pos': '35058665', 'ref': 'CA', 'alt': 'CA'}} + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563_35058564=', 'vcf': {'chr': '5', 'pos': '35058563', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001204316.1:c.1009+7385_1009+7386=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204316.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191245.1'} - assert results['flag'] == 'gene_variant' - assert 'NR_037910.1:n.828-9153_828-9152=' in list(results.keys()) - assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_037910.1:n.828-9153_828-9152=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_037910.1:n.828-9153_828-9152=']['alt_genomic_loci'], []) - assert results['NR_037910.1:n.828-9153_828-9152=']['gene_symbol'] == 'PRLR' - assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_037910.1:n.828-9153_828-9152=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' - assert results['NR_037910.1:n.828-9153_828-9152=']['genome_context_intronic_sequence'] == 'NC_000005.9(NR_037910.1):c.828-9153_828-9152=' - assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_lrg_variant'] == '' - assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_transcript_variant'] == 'NR_037910.1:n.828-9153_828-9152=' - assert results['NR_037910.1:n.828-9153_828-9152=']['hgvs_refseqgene_variant'] == '' - assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} - assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} - assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} - assert results['NR_037910.1:n.828-9153_828-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} - assert results['NR_037910.1:n.828-9153_828-9152=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037910.1'} + assert 'NM_001204317.1:c.856-9153_856-9152=' in list(results.keys()) + assert results['NM_001204317.1:c.856-9153_856-9152=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NM_001204317.1:c.856-9153_856-9152=']['gene_symbol'] == 'PRLR' + assert results['NM_001204317.1:c.856-9153_856-9152=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} + assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_transcript_variant'] == 'NM_001204317.1:c.856-9153_856-9152=' + assert results['NM_001204317.1:c.856-9153_856-9152=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204317.1):c.856-9153_856-9152=' + assert results['NM_001204317.1:c.856-9153_856-9152=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191246.1:p.?', 'slr': 'NP_001191246.1:p.?'} + assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204317.1:c.856-9153_856-9152=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001204317.1:c.856-9153_856-9152=']['alt_genomic_loci'], []) + assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'pos': '35058665', 'ref': 'CA', 'alt': 'CA'}} + assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': 'chr5', 'pos': '35058560', 'ref': 'CA', 'alt': 'CA'}} + assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'pos': '35058665', 'ref': 'CA', 'alt': 'CA'}} + assert results['NM_001204317.1:c.856-9153_856-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'pos': '35058560', 'ref': 'CA', 'alt': 'CA'}} + assert results['NM_001204317.1:c.856-9153_856-9152=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204317.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191246.1'} assert 'NM_001204318.1:c.686-9153_686-9152=' in list(results.keys()) - assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001204318.1:c.686-9153_686-9152=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001204318.1:c.686-9153_686-9152=']['alt_genomic_loci'], []) - assert results['NM_001204318.1:c.686-9153_686-9152=']['gene_symbol'] == 'PRLR' - assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191247.1:p.?', 'slr': 'NP_001191247.1:p.?'} assert results['NM_001204318.1:c.686-9153_686-9152=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' - assert results['NM_001204318.1:c.686-9153_686-9152=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204318.1):c.686-9153_686-9152=' - assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_lrg_variant'] == '' + assert results['NM_001204318.1:c.686-9153_686-9152=']['gene_symbol'] == 'PRLR' + assert results['NM_001204318.1:c.686-9153_686-9152=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_transcript_variant'] == 'NM_001204318.1:c.686-9153_686-9152=' + assert results['NM_001204318.1:c.686-9153_686-9152=']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001204318.1):c.686-9153_686-9152=' + assert results['NM_001204318.1:c.686-9153_686-9152=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} - assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} - assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058665', 'alt': 'CA'}} - assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'CA'}} - assert results['NM_001204318.1:c.686-9153_686-9152=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1'} + assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191247.1:p.?', 'slr': 'NP_001191247.1:p.?'} + assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204318.1:c.686-9153_686-9152=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001204318.1:c.686-9153_686-9152=']['alt_genomic_loci'], []) + assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'pos': '35058665', 'ref': 'CA', 'alt': 'CA'}} + assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': 'chr5', 'pos': '35058560', 'ref': 'CA', 'alt': 'CA'}} + assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'pos': '35058665', 'ref': 'CA', 'alt': 'CA'}} + assert results['NM_001204318.1:c.686-9153_686-9152=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058560_35058561=', 'vcf': {'chr': '5', 'pos': '35058560', 'ref': 'CA', 'alt': 'CA'}} + assert results['NM_001204318.1:c.686-9153_686-9152=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204318.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191247.1'} assert 'NM_000949.5:c.*6525_*6526=' in list(results.keys()) - assert results['NM_000949.5:c.*6525_*6526=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000949.5:c.*6525_*6526=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000949.5:c.*6525_*6526=']['alt_genomic_loci'], []) - assert results['NM_000949.5:c.*6525_*6526=']['gene_symbol'] == 'PRLR' - assert results['NM_000949.5:c.*6525_*6526=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} assert results['NM_000949.5:c.*6525_*6526=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' - assert results['NM_000949.5:c.*6525_*6526=']['genome_context_intronic_sequence'] == '' - assert results['NM_000949.5:c.*6525_*6526=']['hgvs_lrg_variant'] == '' + assert results['NM_000949.5:c.*6525_*6526=']['gene_symbol'] == 'PRLR' + assert results['NM_000949.5:c.*6525_*6526=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} assert results['NM_000949.5:c.*6525_*6526=']['hgvs_transcript_variant'] == 'NM_000949.5:c.*6525_*6526=' + assert results['NM_000949.5:c.*6525_*6526=']['genome_context_intronic_sequence'] == '' + assert results['NM_000949.5:c.*6525_*6526=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000949.5:c.*6525_*6526=']['hgvs_refseqgene_variant'] == 'NG_029042.1:g.177158_177159=' + assert results['NM_000949.5:c.*6525_*6526=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} + assert results['NM_000949.5:c.*6525_*6526=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000949.5:c.*6525_*6526=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000949.5:c.*6525_*6526=']['alt_genomic_loci'], []) result_options = [ {'hgvs_genomic_description': 'NC_000005.9:g.35058664_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'ACAAG', 'pos': '35058664', 'alt': 'ACAAG'}}, @@ -5797,25 +5879,43 @@ def test_variant169(self): ] self.assertIn(results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci']['grch37'], result_options2) assert 'grch38' not in list(results['NM_000949.5:c.*6525_*6526=']['primary_assembly_loci'].keys()) - assert results['NM_000949.5:c.*6525_*6526=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029042.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5'} + assert results['NM_000949.5:c.*6525_*6526=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029042.1'} - assert 'NM_000949.6:c.*6528del' in list(results.keys()) - assert results['NM_000949.6:c.*6528del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000949.6:c.*6528del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000949.6:c.*6528del']['alt_genomic_loci'], []) - assert results['NM_000949.6:c.*6528del']['gene_symbol'] == 'PRLR' - assert results['NM_000949.6:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000940.1:p.?', 'slr': 'NP_000940.1:p.?'} - assert results['NM_000949.6:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' - assert results['NM_000949.6:c.*6528del']['genome_context_intronic_sequence'] == '' - assert results['NM_000949.6:c.*6528del']['hgvs_lrg_variant'] == '' - assert results['NM_000949.6:c.*6528del']['hgvs_transcript_variant'] == 'NM_000949.6:c.*6528del' - assert results['NM_000949.6:c.*6528del']['hgvs_refseqgene_variant'] == '' - assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} - assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} - assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'ref': 'AGACAAG', 'pos': '35058662', 'alt': 'AGACAAG'}} - assert results['NM_000949.6:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'ref': 'CA', 'pos': '35058560', 'alt': 'C'}} - assert results['NM_000949.6:c.*6528del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000949.6'} + assert 'NM_001204314.1:c.*6525_*6526=' in list(results.keys()) + assert results['NM_001204314.1:c.*6525_*6526=']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NM_001204314.1:c.*6525_*6526=']['gene_symbol'] == 'PRLR' + assert results['NM_001204314.1:c.*6525_*6526=']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} + assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_transcript_variant'] == 'NM_001204314.1:c.*6525_*6526=' + assert results['NM_001204314.1:c.*6525_*6526=']['genome_context_intronic_sequence'] == '' + assert results['NM_001204314.1:c.*6525_*6526=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} + assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204314.1:c.*6525_*6526=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001204314.1:c.*6525_*6526=']['alt_genomic_loci'], []) + assert results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': 'chr5', 'pos': '35058665', 'ref': 'CA', 'alt': 'CA'}} + assert 'hg38' not in list(results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci'].keys()) + assert results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058665_35058666=', 'vcf': {'chr': '5', 'pos': '35058665', 'ref': 'CA', 'alt': 'CA'}} + assert 'grch38' not in list(results['NM_001204314.1:c.*6525_*6526=']['primary_assembly_loci'].keys()) + assert results['NM_001204314.1:c.*6525_*6526=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1'} + assert 'NM_001204314.2:c.*6528del' in list(results.keys()) + assert results['NM_001204314.2:c.*6528del']['submitted_variant'] == 'NC_000005.9:g.35058665_35058666CA=' + assert results['NM_001204314.2:c.*6528del']['gene_symbol'] == 'PRLR' + assert results['NM_001204314.2:c.*6528del']['gene_ids'] == {'hgnc_id': 'HGNC:9446', 'entrez_gene_id': '5618', 'ucsc_id': 'uc032uqm.2', 'omim_id': ['176761']} + assert results['NM_001204314.2:c.*6528del']['hgvs_transcript_variant'] == 'NM_001204314.2:c.*6528del' + assert results['NM_001204314.2:c.*6528del']['genome_context_intronic_sequence'] == '' + assert results['NM_001204314.2:c.*6528del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001204314.2:c.*6528del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001204314.2:c.*6528del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001191243.1:p.?', 'slr': 'NP_001191243.1:p.?'} + assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001204314.2:c.*6528del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001204314.2:c.*6528del']['alt_genomic_loci'], []) + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': 'chr5', 'pos': '35058662', 'ref': 'AGACAAG', 'alt': 'AGACAAG'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': 'chr5', 'pos': '35058560', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.35058662_35058668=', 'vcf': {'chr': '5', 'pos': '35058662', 'ref': 'AGACAAG', 'alt': 'AGACAAG'}} + assert results['NM_001204314.2:c.*6528del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.35058563del', 'vcf': {'chr': '5', 'pos': '35058560', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001204314.2:c.*6528del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001204314.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001191243.1'} def test_variant170(self): variant = 'NC_000002.11:g.73675227_73675229delTCTinsTCTCTC' @@ -5824,22 +5924,22 @@ def test_variant170(self): assert results['flag'] == 'gene_variant' assert 'NM_015120.4:c.1580_1581insCCT' in list(results.keys()) - assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1580_1581insCCT' - assert results['NM_015120.4:c.1580_1581insCCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_015120.4:c.1580_1581insCCT']['alt_genomic_loci'], []) - assert results['NM_015120.4:c.1580_1581insCCT']['gene_symbol'] == 'ALMS1' - assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu527dup)', 'slr': 'NP_055935.4:p.(L527dup)'} assert results['NM_015120.4:c.1580_1581insCCT']['submitted_variant'] == 'NC_000002.11:g.73675227_73675229delTCTinsTCTCTC' - assert results['NM_015120.4:c.1580_1581insCCT']['genome_context_intronic_sequence'] == '' - assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_lrg_variant'] == 'LRG_741:g.67352_67353insCCT' + assert results['NM_015120.4:c.1580_1581insCCT']['gene_symbol'] == 'ALMS1' + assert results['NM_015120.4:c.1580_1581insCCT']['gene_ids'] == {'hgnc_id': 'HGNC:428', 'entrez_gene_id': '7840', 'ucsc_id': 'uc032nrd.1', 'omim_id': ['606844']} assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_transcript_variant'] == 'NM_015120.4:c.1580_1581insCCT' + assert results['NM_015120.4:c.1580_1581insCCT']['genome_context_intronic_sequence'] == '' + assert results['NM_015120.4:c.1580_1581insCCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_refseqgene_variant'] == 'NG_011690.1:g.67352_67353insCCT' - assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675231_73675232insCCT', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73675229', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448104_73448105insCCT', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '73448102', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675231_73675232insCCT', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73675229', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448104_73448105insCCT', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '73448102', 'alt': 'TCTC'}} - assert results['NM_015120.4:c.1580_1581insCCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} - + assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055935.4(LRG_741p1):p.(Leu527dup)', 'slr': 'NP_055935.4:p.(L527dup)'} + assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.1580_1581insCCT' + assert results['NM_015120.4:c.1580_1581insCCT']['hgvs_lrg_variant'] == 'LRG_741:g.67352_67353insCCT' + self.assertCountEqual(results['NM_015120.4:c.1580_1581insCCT']['alt_genomic_loci'], []) + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675231_73675232insCCT', 'vcf': {'chr': 'chr2', 'pos': '73675229', 'ref': 'T', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448104_73448105insCCT', 'vcf': {'chr': 'chr2', 'pos': '73448102', 'ref': 'T', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73675231_73675232insCCT', 'vcf': {'chr': '2', 'pos': '73675229', 'ref': 'T', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73448104_73448105insCCT', 'vcf': {'chr': '2', 'pos': '73448102', 'ref': 'T', 'alt': 'TCTC'}} + assert results['NM_015120.4:c.1580_1581insCCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'} def test_variant171(self): variant = 'NM_000828.4:c.-2dupG' @@ -5848,80 +5948,82 @@ def test_variant171(self): assert results['flag'] == 'gene_variant' assert 'NM_000828.4:c.-2dup' in list(results.keys()) - assert results['NM_000828.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000828.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000828.4:c.-2dup']['alt_genomic_loci'], []) - assert results['NM_000828.4:c.-2dup']['gene_symbol'] == 'GRIA3' - assert results['NM_000828.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2dup']['submitted_variant'] == 'NM_000828.4:c.-2dupG' - assert results['NM_000828.4:c.-2dup']['genome_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2dup']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2dup']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2dup']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_000828.4:c.-2dup']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2dup' + assert results['NM_000828.4:c.-2dup']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_000828.4:c.-2dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} - + assert results['NM_000828.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000828.4:c.-2dup']['alt_genomic_loci'], []) + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'pos': '123184533', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'pos': '123184533', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3'} def test_variant172(self): variant = 'X-122318386-A-AGG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_007325.4:c.-2dup' in list(results.keys()) - assert results['NM_007325.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007325.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007325.4:c.-2dup']['alt_genomic_loci'], []) - assert results['NM_007325.4:c.-2dup']['gene_symbol'] == 'GRIA3' - assert results['NM_007325.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_015564.4:p.?', 'slr': 'NP_015564.4:p.?'} - assert results['NM_007325.4:c.-2dup']['submitted_variant'] == 'X-122318386-A-AGG' - assert results['NM_007325.4:c.-2dup']['genome_context_intronic_sequence'] == '' - assert results['NM_007325.4:c.-2dup']['hgvs_lrg_variant'] == '' - assert results['NM_007325.4:c.-2dup']['hgvs_transcript_variant'] == 'NM_007325.4:c.-2dup' - assert results['NM_007325.4:c.-2dup']['hgvs_refseqgene_variant'] == 'NG_009377.1:g.5292dup' - assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} - assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} - assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_007325.4:c.-2dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009377.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4'} - assert results['flag'] == 'gene_variant' assert 'NM_001256743.1:c.-2dup' in list(results.keys()) - assert results['NM_001256743.1:c.-2dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256743.1:c.-2dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256743.1:c.-2dup']['alt_genomic_loci'], []) - assert results['NM_001256743.1:c.-2dup']['gene_symbol'] == 'GRIA3' - assert results['NM_001256743.1:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243672.1:p.?', 'slr': 'NP_001243672.1:p.?'} assert results['NM_001256743.1:c.-2dup']['submitted_variant'] == 'X-122318386-A-AGG' - assert results['NM_001256743.1:c.-2dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001256743.1:c.-2dup']['hgvs_lrg_variant'] == '' + assert results['NM_001256743.1:c.-2dup']['gene_symbol'] == 'GRIA3' + assert results['NM_001256743.1:c.-2dup']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_001256743.1:c.-2dup']['hgvs_transcript_variant'] == 'NM_001256743.1:c.-2dup' + assert results['NM_001256743.1:c.-2dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001256743.1:c.-2dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256743.1:c.-2dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} - assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} - assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_001256743.1:c.-2dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1'} + assert results['NM_001256743.1:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243672.1:p.?', 'slr': 'NP_001243672.1:p.?'} + assert results['NM_001256743.1:c.-2dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256743.1:c.-2dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256743.1:c.-2dup']['alt_genomic_loci'], []) + assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'AGG'}} + assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'pos': '123184533', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'AGG'}} + assert results['NM_001256743.1:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'pos': '123184533', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001256743.1:c.-2dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1'} + + assert 'NM_007325.4:c.-2dup' in list(results.keys()) + assert results['NM_007325.4:c.-2dup']['submitted_variant'] == 'X-122318386-A-AGG' + assert results['NM_007325.4:c.-2dup']['gene_symbol'] == 'GRIA3' + assert results['NM_007325.4:c.-2dup']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} + assert results['NM_007325.4:c.-2dup']['hgvs_transcript_variant'] == 'NM_007325.4:c.-2dup' + assert results['NM_007325.4:c.-2dup']['genome_context_intronic_sequence'] == '' + assert results['NM_007325.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007325.4:c.-2dup']['hgvs_refseqgene_variant'] == 'NG_009377.1:g.5292dup' + assert results['NM_007325.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_015564.4:p.?', 'slr': 'NP_015564.4:p.?'} + assert results['NM_007325.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007325.4:c.-2dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007325.4:c.-2dup']['alt_genomic_loci'], []) + assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'AGG'}} + assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'pos': '123184533', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'AGG'}} + assert results['NM_007325.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'pos': '123184533', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_007325.4:c.-2dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009377.1'} assert 'NM_000828.4:c.-2dup' in list(results.keys()) - assert results['NM_000828.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000828.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000828.4:c.-2dup']['alt_genomic_loci'], []) - assert results['NM_000828.4:c.-2dup']['gene_symbol'] == 'GRIA3' - assert results['NM_000828.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2dup']['submitted_variant'] == 'X-122318386-A-AGG' - assert results['NM_000828.4:c.-2dup']['genome_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2dup']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2dup']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2dup']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_000828.4:c.-2dup']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2dup' + assert results['NM_000828.4:c.-2dup']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AGG'}} - assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AG'}} - assert results['NM_000828.4:c.-2dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} - + assert results['NM_000828.4:c.-2dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000828.4:c.-2dup']['alt_genomic_loci'], []) + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'chrX', 'pos': '123184533', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGG', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'AGG'}} + assert results['NM_000828.4:c.-2dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534dup', 'vcf': {'chr': 'X', 'pos': '123184533', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3'} def test_variant173(self): variant = 'NM_000828.4:c.-2G>T' @@ -5930,46 +6032,46 @@ def test_variant173(self): assert results['flag'] == 'gene_variant' assert 'NM_000828.4:c.-2G>T' in list(results.keys()) - assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000828.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000828.4:c.-2G>T']['alt_genomic_loci'], []) - assert results['NM_000828.4:c.-2G>T']['gene_symbol'] == 'GRIA3' - assert results['NM_000828.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2G>T']['submitted_variant'] == 'NM_000828.4:c.-2G>T' - assert results['NM_000828.4:c.-2G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2G>T']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2G>T']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_000828.4:c.-2G>T']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2G>T' + assert results['NM_000828.4:c.-2G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} - assert results['NM_000828.4:c.-2G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} - + assert results['NM_000828.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000828.4:c.-2G>T']['alt_genomic_loci'], []) + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'pos': '123184534', 'ref': 'G', 'alt': 'T'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'pos': '123184534', 'ref': 'G', 'alt': 'T'}} + assert results['NM_000828.4:c.-2G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3'} def test_variant174(self): variant = 'NM_000828.4:c.-2G=' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000828.4:c.-2G=' in list(results.keys()) - assert results['NM_000828.4:c.-2G=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000828.4:c.-2G=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000828.4:c.-2G=']['alt_genomic_loci'], []) - assert results['NM_000828.4:c.-2G=']['gene_symbol'] == 'GRIA3' - assert results['NM_000828.4:c.-2G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2G=']['submitted_variant'] == 'NM_000828.4:c.-2G=' - assert results['NM_000828.4:c.-2G=']['genome_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2G=']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2G=']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2G=']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_000828.4:c.-2G=']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2G=' + assert results['NM_000828.4:c.-2G=']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2G=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2G=']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AG'}} - assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G=', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'G'}} - assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AG'}} - assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G=', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'G'}} - assert results['NM_000828.4:c.-2G=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000828.4:c.-2G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2G=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2G=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000828.4:c.-2G=']['alt_genomic_loci'], []) + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insG', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G=', 'vcf': {'chr': 'chrX', 'pos': '123184534', 'ref': 'G', 'alt': 'G'}} + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insG', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_000828.4:c.-2G=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G=', 'vcf': {'chr': 'X', 'pos': '123184534', 'ref': 'G', 'alt': 'G'}} + assert results['NM_000828.4:c.-2G=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3'} def test_variant175(self): variant = 'X-122318386-A-AT' @@ -5977,57 +6079,59 @@ def test_variant175(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000828.4:c.-2G>T' in list(results.keys()) - assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000828.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000828.4:c.-2G>T']['alt_genomic_loci'], []) - assert results['NM_000828.4:c.-2G>T']['gene_symbol'] == 'GRIA3' - assert results['NM_000828.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} - assert results['NM_000828.4:c.-2G>T']['submitted_variant'] == 'X-122318386-A-AT' - assert results['NM_000828.4:c.-2G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_variant'] == '' - assert results['NM_000828.4:c.-2G>T']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2G>T' - assert results['NM_000828.4:c.-2G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} - assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} - assert results['NM_000828.4:c.-2G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} - assert 'NM_001256743.1:c.-2G>T' in list(results.keys()) - assert results['NM_001256743.1:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256743.1:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256743.1:c.-2G>T']['alt_genomic_loci'], []) - assert results['NM_001256743.1:c.-2G>T']['gene_symbol'] == 'GRIA3' - assert results['NM_001256743.1:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243672.1:p.?', 'slr': 'NP_001243672.1:p.?'} assert results['NM_001256743.1:c.-2G>T']['submitted_variant'] == 'X-122318386-A-AT' - assert results['NM_001256743.1:c.-2G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001256743.1:c.-2G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001256743.1:c.-2G>T']['gene_symbol'] == 'GRIA3' + assert results['NM_001256743.1:c.-2G>T']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_001256743.1:c.-2G>T']['hgvs_transcript_variant'] == 'NM_001256743.1:c.-2G>T' + assert results['NM_001256743.1:c.-2G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001256743.1:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001256743.1:c.-2G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} - assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} - assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} - assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} - assert results['NM_001256743.1:c.-2G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1'} + assert results['NM_001256743.1:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243672.1:p.?', 'slr': 'NP_001243672.1:p.?'} + assert results['NM_001256743.1:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256743.1:c.-2G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256743.1:c.-2G>T']['alt_genomic_loci'], []) + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'pos': '123184534', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_001256743.1:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'pos': '123184534', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001256743.1:c.-2G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256743.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243672.1'} assert 'NM_007325.4:c.-2G>T' in list(results.keys()) - assert results['NM_007325.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007325.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007325.4:c.-2G>T']['alt_genomic_loci'], []) - assert results['NM_007325.4:c.-2G>T']['gene_symbol'] == 'GRIA3' - assert results['NM_007325.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_015564.4:p.?', 'slr': 'NP_015564.4:p.?'} assert results['NM_007325.4:c.-2G>T']['submitted_variant'] == 'X-122318386-A-AT' - assert results['NM_007325.4:c.-2G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_007325.4:c.-2G>T']['hgvs_lrg_variant'] == '' + assert results['NM_007325.4:c.-2G>T']['gene_symbol'] == 'GRIA3' + assert results['NM_007325.4:c.-2G>T']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_007325.4:c.-2G>T']['hgvs_transcript_variant'] == 'NM_007325.4:c.-2G>T' + assert results['NM_007325.4:c.-2G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_007325.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007325.4:c.-2G>T']['hgvs_refseqgene_variant'] == 'NG_009377.1:g.5292G>T' - assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} - assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} - assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AT'}} - assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'T'}} - assert results['NM_007325.4:c.-2G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009377.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4'} + assert results['NM_007325.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_015564.4:p.?', 'slr': 'NP_015564.4:p.?'} + assert results['NM_007325.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007325.4:c.-2G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007325.4:c.-2G>T']['alt_genomic_loci'], []) + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'pos': '123184534', 'ref': 'G', 'alt': 'T'}} + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_007325.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'pos': '123184534', 'ref': 'G', 'alt': 'T'}} + assert results['NM_007325.4:c.-2G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007325.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_015564.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009377.1'} + assert 'NM_000828.4:c.-2G>T' in list(results.keys()) + assert results['NM_000828.4:c.-2G>T']['submitted_variant'] == 'X-122318386-A-AT' + assert results['NM_000828.4:c.-2G>T']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2G>T']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} + assert results['NM_000828.4:c.-2G>T']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2G>T' + assert results['NM_000828.4:c.-2G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_000828.4:c.-2G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000828.4:c.-2G>T']['alt_genomic_loci'], []) + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'chrX', 'pos': '123184534', 'ref': 'G', 'alt': 'T'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insT', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_000828.4:c.-2G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534G>T', 'vcf': {'chr': 'X', 'pos': '123184534', 'ref': 'G', 'alt': 'T'}} + assert results['NM_000828.4:c.-2G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3'} def test_variant176(self): variant = 'NM_000828.4:c.-2_-1insT' @@ -6036,46 +6140,46 @@ def test_variant176(self): assert results['flag'] == 'gene_variant' assert 'NM_000828.4:c.-2_-1insT' in list(results.keys()) - assert results['NM_000828.4:c.-2_-1insT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000828.4:c.-2_-1insT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000828.4:c.-2_-1insT']['alt_genomic_loci'], []) - assert results['NM_000828.4:c.-2_-1insT']['gene_symbol'] == 'GRIA3' - assert results['NM_000828.4:c.-2_-1insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2_-1insT']['submitted_variant'] == 'NM_000828.4:c.-2_-1insT' - assert results['NM_000828.4:c.-2_-1insT']['genome_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2_-1insT']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2_-1insT']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2_-1insT']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_000828.4:c.-2_-1insT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2_-1insT' + assert results['NM_000828.4:c.-2_-1insT']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2_-1insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2_-1insT']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'AGT'}} - assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535insT', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'GT'}} - assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'AGT'}} - assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535insT', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'GT'}} - assert results['NM_000828.4:c.-2_-1insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} - + assert results['NM_000828.4:c.-2_-1insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2_-1insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2_-1insT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000828.4:c.-2_-1insT']['alt_genomic_loci'], []) + assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGT', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'AGT'}} + assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535insT', 'vcf': {'chr': 'chrX', 'pos': '123184534', 'ref': 'G', 'alt': 'GT'}} + assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insGT', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'AGT'}} + assert results['NM_000828.4:c.-2_-1insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535insT', 'vcf': {'chr': 'X', 'pos': '123184534', 'ref': 'G', 'alt': 'GT'}} + assert results['NM_000828.4:c.-2_-1insT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3'} def test_variant177(self): variant = 'NM_000828.4:c.-3_-2insT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000828.4:c.-3_-2insT' in list(results.keys()) - assert results['NM_000828.4:c.-3_-2insT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000828.4:c.-3_-2insT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000828.4:c.-3_-2insT']['alt_genomic_loci'], []) - assert results['NM_000828.4:c.-3_-2insT']['gene_symbol'] == 'GRIA3' - assert results['NM_000828.4:c.-3_-2insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-3_-2insT']['submitted_variant'] == 'NM_000828.4:c.-3_-2insT' - assert results['NM_000828.4:c.-3_-2insT']['genome_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-3_-2insT']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-3_-2insT']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-3_-2insT']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_000828.4:c.-3_-2insT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-3_-2insT' + assert results['NM_000828.4:c.-3_-2insT']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-3_-2insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-3_-2insT']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTG', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'ATG'}} - assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534insT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '123184533', 'alt': 'AT'}} - assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTG', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'ATG'}} - assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534insT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '123184533', 'alt': 'AT'}} - assert results['NM_000828.4:c.-3_-2insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000828.4:c.-3_-2insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-3_-2insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-3_-2insT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000828.4:c.-3_-2insT']['alt_genomic_loci'], []) + assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTG', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'ATG'}} + assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534insT', 'vcf': {'chr': 'chrX', 'pos': '123184533', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTG', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'ATG'}} + assert results['NM_000828.4:c.-3_-2insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534insT', 'vcf': {'chr': 'X', 'pos': '123184533', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_000828.4:c.-3_-2insT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3'} def test_variant178(self): variant = 'NM_000828.4:c.-2delGinsTT' @@ -6084,22 +6188,22 @@ def test_variant178(self): assert results['flag'] == 'gene_variant' assert 'NM_000828.4:c.-2delinsTT' in list(results.keys()) - assert results['NM_000828.4:c.-2delinsTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000828.4:c.-2delinsTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000828.4:c.-2delinsTT']['alt_genomic_loci'], []) - assert results['NM_000828.4:c.-2delinsTT']['gene_symbol'] == 'GRIA3' - assert results['NM_000828.4:c.-2delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2delinsTT']['submitted_variant'] == 'NM_000828.4:c.-2delGinsTT' - assert results['NM_000828.4:c.-2delinsTT']['genome_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2delinsTT']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2delinsTT']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_000828.4:c.-2delinsTT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2delinsTT' + assert results['NM_000828.4:c.-2delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2delinsTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'ATT'}} - assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '123184534', 'alt': 'TT'}} - assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'ATT'}} - assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534delinsTT', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '123184534', 'alt': 'TT'}} - assert results['NM_000828.4:c.-2delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} - + assert results['NM_000828.4:c.-2delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2delinsTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000828.4:c.-2delinsTT']['alt_genomic_loci'], []) + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTT', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'ATT'}} + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534delinsTT', 'vcf': {'chr': 'chrX', 'pos': '123184534', 'ref': 'G', 'alt': 'TT'}} + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386_122318387insTT', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'ATT'}} + assert results['NM_000828.4:c.-2delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534delinsTT', 'vcf': {'chr': 'X', 'pos': '123184534', 'ref': 'G', 'alt': 'TT'}} + assert results['NM_000828.4:c.-2delinsTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3'} def test_variant179(self): variant = 'NM_000828.4:c.-2_-1delGCinsTT' @@ -6108,346 +6212,358 @@ def test_variant179(self): assert results['flag'] == 'gene_variant' assert 'NM_000828.4:c.-2_-1delinsTT' in list(results.keys()) - assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000828.4:c.-2_-1delinsTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000828.4:c.-2_-1delinsTT']['alt_genomic_loci'], []) - assert results['NM_000828.4:c.-2_-1delinsTT']['gene_symbol'] == 'GRIA3' - assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-2_-1delinsTT']['submitted_variant'] == 'NM_000828.4:c.-2_-1delGCinsTT' - assert results['NM_000828.4:c.-2_-1delinsTT']['genome_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-2_-1delinsTT']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-2_-1delinsTT']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-2_-1delinsTT' + assert results['NM_000828.4:c.-2_-1delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-2_-1delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318387delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '122318387', 'alt': 'TT'}} - assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'GC', 'pos': '123184534', 'alt': 'TT'}} - assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318387delinsTT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '122318387', 'alt': 'TT'}} - assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535delinsTT', 'vcf': {'chr': 'X', 'ref': 'GC', 'pos': '123184534', 'alt': 'TT'}} - assert results['NM_000828.4:c.-2_-1delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} - + assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-2_-1delinsTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000828.4:c.-2_-1delinsTT']['alt_genomic_loci'], []) + assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318387delinsTT', 'vcf': {'chr': 'chrX', 'pos': '122318387', 'ref': 'C', 'alt': 'TT'}} + assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535delinsTT', 'vcf': {'chr': 'chrX', 'pos': '123184534', 'ref': 'GC', 'alt': 'TT'}} + assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318387delinsTT', 'vcf': {'chr': 'X', 'pos': '122318387', 'ref': 'C', 'alt': 'TT'}} + assert results['NM_000828.4:c.-2_-1delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184534_123184535delinsTT', 'vcf': {'chr': 'X', 'pos': '123184534', 'ref': 'GC', 'alt': 'TT'}} + assert results['NM_000828.4:c.-2_-1delinsTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3'} def test_variant180(self): variant = 'NM_000828.4:c.-3_-2delAGinsTT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000828.4:c.-3_-2delinsTT' in list(results.keys()) - assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000828.4:c.-3_-2delinsTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000828.4:c.-3_-2delinsTT']['alt_genomic_loci'], []) - assert results['NM_000828.4:c.-3_-2delinsTT']['gene_symbol'] == 'GRIA3' - assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} assert results['NM_000828.4:c.-3_-2delinsTT']['submitted_variant'] == 'NM_000828.4:c.-3_-2delAGinsTT' - assert results['NM_000828.4:c.-3_-2delinsTT']['genome_context_intronic_sequence'] == '' - assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_000828.4:c.-3_-2delinsTT']['gene_symbol'] == 'GRIA3' + assert results['NM_000828.4:c.-3_-2delinsTT']['gene_ids'] == {'hgnc_id': 'HGNC:4573', 'entrez_gene_id': '2892', 'ucsc_id': 'uc033etl.2', 'omim_id': ['305915']} assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_transcript_variant'] == 'NM_000828.4:c.-3_-2delinsTT' + assert results['NM_000828.4:c.-3_-2delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_000828.4:c.-3_-2delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '122318386', 'alt': 'TT'}} - assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534delinsTT', 'vcf': {'chr': 'chrX', 'ref': 'AG', 'pos': '123184533', 'alt': 'TT'}} - assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386delinsTT', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '122318386', 'alt': 'TT'}} - assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534delinsTT', 'vcf': {'chr': 'X', 'ref': 'AG', 'pos': '123184533', 'alt': 'TT'}} - assert results['NM_000828.4:c.-3_-2delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000819.3:p.?', 'slr': 'NP_000819.3:p.?'} + assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000828.4:c.-3_-2delinsTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000828.4:c.-3_-2delinsTT']['alt_genomic_loci'], []) + assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386delinsTT', 'vcf': {'chr': 'chrX', 'pos': '122318386', 'ref': 'A', 'alt': 'TT'}} + assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534delinsTT', 'vcf': {'chr': 'chrX', 'pos': '123184533', 'ref': 'AG', 'alt': 'TT'}} + assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.122318386delinsTT', 'vcf': {'chr': 'X', 'pos': '122318386', 'ref': 'A', 'alt': 'TT'}} + assert results['NM_000828.4:c.-3_-2delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.123184533_123184534delinsTT', 'vcf': {'chr': 'X', 'pos': '123184533', 'ref': 'AG', 'alt': 'TT'}} + assert results['NM_000828.4:c.-3_-2delinsTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000828.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000819.3'} def test_variant181(self): variant = '15-72105929-C-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_014249.3:c.951dup' in list(results.keys()) - assert results['NM_014249.3:c.951dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.3:c.951dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.3:c.951dup']['alt_genomic_loci'], []) - assert results['NM_014249.3:c.951dup']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.3:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Thr318HisfsTer23)', 'slr': 'NP_055064.1:p.(T318Hfs*23)'} - assert results['NM_014249.3:c.951dup']['submitted_variant'] == '15-72105929-C-C' - assert results['NM_014249.3:c.951dup']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.951dup']['hgvs_lrg_variant'] == '' - assert results['NM_014249.3:c.951dup']['hgvs_transcript_variant'] == 'NM_014249.3:c.951dup' - assert results['NM_014249.3:c.951dup']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8039dup' - assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} - assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'AC'}} - assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} - assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'AC'}} - assert results['NM_014249.3:c.951dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} - + assert results['flag'] == 'gene_variant' assert 'NM_014249.2:c.951dup' in list(results.keys()) - assert results['NM_014249.2:c.951dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.2:c.951dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.2:c.951dup']['alt_genomic_loci'], []) - assert results['NM_014249.2:c.951dup']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.2:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Thr318HisfsTer23)', 'slr': 'NP_055064.1:p.(T318Hfs*23)'} assert results['NM_014249.2:c.951dup']['submitted_variant'] == '15-72105929-C-C' - assert results['NM_014249.2:c.951dup']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.951dup']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.951dup']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.951dup']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_014249.2:c.951dup']['hgvs_transcript_variant'] == 'NM_014249.2:c.951dup' + assert results['NM_014249.2:c.951dup']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.951dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.951dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_014249.2:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert results['NM_014249.2:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Thr318HisfsTer23)', 'slr': 'NP_055064.1:p.(T318Hfs*23)'} + assert results['NM_014249.2:c.951dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.951dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.2:c.951dup']['alt_genomic_loci'], []) + assert results['NM_014249.2:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'pos': '72105924', 'ref': 'GTGGACCCCCA', 'alt': 'GTGGACCCCCA'}} assert 'hg38' not in list(results['NM_014249.2:c.951dup']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert results['NM_014249.2:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'pos': '72105924', 'ref': 'GTGGACCCCCA', 'alt': 'GTGGACCCCCA'}} assert 'grch38' not in list(results['NM_014249.2:c.951dup']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.951dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + assert results['NM_014249.2:c.951dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1'} - assert results['flag'] == 'gene_variant' assert 'NM_016346.3:c.951dup' in list(results.keys()) - assert results['NM_016346.3:c.951dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.3:c.951dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.3:c.951dup']['alt_genomic_loci'], []) - assert results['NM_016346.3:c.951dup']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.3:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Thr318HisfsTer23)', 'slr': 'NP_057430.1:p.(T318Hfs*23)'} assert results['NM_016346.3:c.951dup']['submitted_variant'] == '15-72105929-C-C' - assert results['NM_016346.3:c.951dup']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.951dup']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.951dup']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.951dup']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_016346.3:c.951dup']['hgvs_transcript_variant'] == 'NM_016346.3:c.951dup' + assert results['NM_016346.3:c.951dup']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.951dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.951dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} - assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'AC'}} - assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} - assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'AC'}} - assert results['NM_016346.3:c.951dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + assert results['NM_016346.3:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Thr318HisfsTer23)', 'slr': 'NP_057430.1:p.(T318Hfs*23)'} + assert results['NM_016346.3:c.951dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.951dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.3:c.951dup']['alt_genomic_loci'], []) + assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'pos': '72105924', 'ref': 'GTGGACCCCCA', 'alt': 'GTGGACCCCCA'}} + assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': 'chr15', 'pos': '71813588', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'pos': '72105924', 'ref': 'GTGGACCCCCA', 'alt': 'GTGGACCCCCA'}} + assert results['NM_016346.3:c.951dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': '15', 'pos': '71813588', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_016346.3:c.951dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} assert 'NM_016346.2:c.951dup' in list(results.keys()) - assert results['NM_016346.2:c.951dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.2:c.951dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.2:c.951dup']['alt_genomic_loci'], []) - assert results['NM_016346.2:c.951dup']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.2:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Thr318HisfsTer23)', 'slr': 'NP_057430.1:p.(T318Hfs*23)'} assert results['NM_016346.2:c.951dup']['submitted_variant'] == '15-72105929-C-C' - assert results['NM_016346.2:c.951dup']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.951dup']['hgvs_lrg_variant'] == '' + assert results['NM_016346.2:c.951dup']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.951dup']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_016346.2:c.951dup']['hgvs_transcript_variant'] == 'NM_016346.2:c.951dup' + assert results['NM_016346.2:c.951dup']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.951dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.2:c.951dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.2:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert results['NM_016346.2:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Thr318HisfsTer23)', 'slr': 'NP_057430.1:p.(T318Hfs*23)'} + assert results['NM_016346.2:c.951dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.951dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.2:c.951dup']['alt_genomic_loci'], []) + assert results['NM_016346.2:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'pos': '72105924', 'ref': 'GTGGACCCCCA', 'alt': 'GTGGACCCCCA'}} assert 'hg38' not in list(results['NM_016346.2:c.951dup']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'ref': 'GTGGACCCCCA', 'pos': '72105924', 'alt': 'GTGGACCCCCA'}} + assert results['NM_016346.2:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'pos': '72105924', 'ref': 'GTGGACCCCCA', 'alt': 'GTGGACCCCCA'}} assert 'grch38' not in list(results['NM_016346.2:c.951dup']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.951dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} + assert results['NM_016346.2:c.951dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} + assert 'NM_014249.3:c.951dup' in list(results.keys()) + assert results['NM_014249.3:c.951dup']['submitted_variant'] == '15-72105929-C-C' + assert results['NM_014249.3:c.951dup']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.951dup']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} + assert results['NM_014249.3:c.951dup']['hgvs_transcript_variant'] == 'NM_014249.3:c.951dup' + assert results['NM_014249.3:c.951dup']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.951dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.951dup']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8039dup' + assert results['NM_014249.3:c.951dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Thr318HisfsTer23)', 'slr': 'NP_055064.1:p.(T318Hfs*23)'} + assert results['NM_014249.3:c.951dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.951dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.3:c.951dup']['alt_genomic_loci'], []) + assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': 'chr15', 'pos': '72105924', 'ref': 'GTGGACCCCCA', 'alt': 'GTGGACCCCCA'}} + assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': 'chr15', 'pos': '71813588', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105924_72105934=', 'vcf': {'chr': '15', 'pos': '72105924', 'ref': 'GTGGACCCCCA', 'alt': 'GTGGACCCCCA'}} + assert results['NM_014249.3:c.951dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813592dup', 'vcf': {'chr': '15', 'pos': '71813588', 'ref': 'A', 'alt': 'AC'}} + assert results['NM_014249.3:c.951dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1'} def test_variant182(self): variant = '15-72105928-AC-ATT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_014249.2:c.947_948insTT' in list(results.keys()) - assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'], []) - assert results['NM_014249.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} assert results['NM_014249.2:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' - assert results['NM_014249.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.947_948insTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_014249.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.2:c.947_948insTT' + assert results['NM_014249.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_014249.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} + assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'], []) + assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} assert 'hg38' not in list(results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} assert 'grch38' not in list(results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + assert results['NM_014249.2:c.947_948insTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1'} assert 'NM_016346.3:c.947_948insTT' in list(results.keys()) - assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'], []) - assert results['NM_016346.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} assert results['NM_016346.3:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' - assert results['NM_016346.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.947_948insTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_016346.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.3:c.947_948insTT' + assert results['NM_016346.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.947_948insTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} - assert results['NM_016346.3:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + assert results['NM_016346.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} + assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'], []) + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'pos': '71813588', 'ref': 'A', 'alt': 'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'pos': '71813588', 'ref': 'A', 'alt': 'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} - assert results['flag'] == 'gene_variant' assert 'NM_016346.2:c.947_948insTT' in list(results.keys()) - assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'], []) - assert results['NM_016346.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} assert results['NM_016346.2:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' - assert results['NM_016346.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_016346.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.947_948insTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_016346.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.2:c.947_948insTT' + assert results['NM_016346.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_016346.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} + assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'], []) + assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} assert 'hg38' not in list(results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} assert 'grch38' not in list(results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} + assert results['NM_016346.2:c.947_948insTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} assert 'NM_014249.3:c.947_948insTT' in list(results.keys()) - assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'], []) - assert results['NM_014249.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} assert results['NM_014249.3:c.947_948insTT']['submitted_variant'] == '15-72105928-AC-ATT' - assert results['NM_014249.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_014249.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.947_948insTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_014249.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.3:c.947_948insTT' + assert results['NM_014249.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.3:c.947_948insTT']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8035_8036insTT' - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} - assert results['NM_014249.3:c.947_948insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} - + assert results['NM_014249.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} + assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'], []) + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'pos': '71813588', 'ref': 'A', 'alt': 'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'pos': '71813588', 'ref': 'A', 'alt': 'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1'} def test_variant183(self): variant = '15-72105928-ACC-ATT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_014249.2:c.947_948insTT' in list(results.keys()) - assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'], []) - assert results['NM_014249.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} assert results['NM_014249.2:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' - assert results['NM_014249.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.947_948insTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_014249.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.2:c.947_948insTT' + assert results['NM_014249.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_014249.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} + assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.2:c.947_948insTT']['alt_genomic_loci'], []) + assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} assert 'hg38' not in list(results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_014249.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} assert 'grch38' not in list(results['NM_014249.2:c.947_948insTT']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + assert results['NM_014249.2:c.947_948insTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1'} assert 'NM_016346.3:c.947_948insTT' in list(results.keys()) - assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'], []) - assert results['NM_016346.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} assert results['NM_016346.3:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' - assert results['NM_016346.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.947_948insTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_016346.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.3:c.947_948insTT' + assert results['NM_016346.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.947_948insTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} - assert results['NM_016346.3:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + assert results['NM_016346.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} + assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.3:c.947_948insTT']['alt_genomic_loci'], []) + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'pos': '71813588', 'ref': 'A', 'alt': 'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} + assert results['NM_016346.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'pos': '71813588', 'ref': 'A', 'alt': 'ATT'}} + assert results['NM_016346.3:c.947_948insTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} - assert results['flag'] == 'gene_variant' assert 'NM_016346.2:c.947_948insTT' in list(results.keys()) - assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'], []) - assert results['NM_016346.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} assert results['NM_016346.2:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' - assert results['NM_016346.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_016346.2:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.947_948insTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_016346.2:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_016346.2:c.947_948insTT' + assert results['NM_016346.2:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.2:c.947_948insTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_016346.2:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Pro317SerfsTer8)', 'slr': 'NP_057430.1:p.(P317Sfs*8)'} + assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.947_948insTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.2:c.947_948insTT']['alt_genomic_loci'], []) + assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} assert 'hg38' not in list(results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} + assert results['NM_016346.2:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} assert 'grch38' not in list(results['NM_016346.2:c.947_948insTT']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.947_948insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} + assert results['NM_016346.2:c.947_948insTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} assert 'NM_014249.3:c.947_948insTT' in list(results.keys()) - assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'], []) - assert results['NM_014249.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} assert results['NM_014249.3:c.947_948insTT']['submitted_variant'] == '15-72105928-ACC-ATT' - assert results['NM_014249.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + assert results['NM_014249.3:c.947_948insTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.947_948insTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_014249.3:c.947_948insTT']['hgvs_transcript_variant'] == 'NM_014249.3:c.947_948insTT' + assert results['NM_014249.3:c.947_948insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.947_948insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.3:c.947_948insTT']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8035_8036insTT' - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '72105929', 'alt': 'TT'}} - assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'ATT'}} - assert results['NM_014249.3:c.947_948insTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} - + assert results['NM_014249.3:c.947_948insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Pro317SerfsTer8)', 'slr': 'NP_055064.1:p.(P317Sfs*8)'} + assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.947_948insTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.3:c.947_948insTT']['alt_genomic_loci'], []) + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': 'chr15', 'pos': '71813588', 'ref': 'A', 'alt': 'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105929', 'ref': 'C', 'alt': 'TT'}} + assert results['NM_014249.3:c.947_948insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588_71813589insTT', 'vcf': {'chr': '15', 'pos': '71813588', 'ref': 'A', 'alt': 'ATT'}} + assert results['NM_014249.3:c.947_948insTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1'} def test_variant184(self): variant = '15-72105927-GACC-GTT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_014249.3:c.947delinsTT' in list(results.keys()) - assert results['NM_014249.3:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.3:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.3:c.947delinsTT']['alt_genomic_loci'], []) - assert results['NM_014249.3:c.947delinsTT']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.3:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316ValfsTer25)', 'slr': 'NP_055064.1:p.(D316Vfs*25)'} - assert results['NM_014249.3:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' - assert results['NM_014249.3:c.947delinsTT']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.947delinsTT']['hgvs_lrg_variant'] == '' - assert results['NM_014249.3:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_014249.3:c.947delinsTT' - assert results['NM_014249.3:c.947delinsTT']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8035delinsTT' - assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'TT'}} - assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'TT'}} - assert results['NM_014249.3:c.947delinsTT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} - - assert 'NM_016346.2:c.947delinsTT' in list(results.keys()) - assert results['NM_016346.2:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.2:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.2:c.947delinsTT']['alt_genomic_loci'], []) - assert results['NM_016346.2:c.947delinsTT']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.2:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316ValfsTer25)', 'slr': 'NP_057430.1:p.(D316Vfs*25)'} - assert results['NM_016346.2:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' - assert results['NM_016346.2:c.947delinsTT']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.947delinsTT']['hgvs_lrg_variant'] == '' - assert results['NM_016346.2:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_016346.2:c.947delinsTT' - assert results['NM_016346.2:c.947delinsTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.2:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert 'hg38' not in list(results['NM_016346.2:c.947delinsTT']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert 'grch38' not in list(results['NM_016346.2:c.947delinsTT']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.947delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} - + assert results['flag'] == 'gene_variant' assert 'NM_014249.2:c.947delinsTT' in list(results.keys()) - assert results['NM_014249.2:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.2:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.2:c.947delinsTT']['alt_genomic_loci'], []) - assert results['NM_014249.2:c.947delinsTT']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.2:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316ValfsTer25)', 'slr': 'NP_055064.1:p.(D316Vfs*25)'} assert results['NM_014249.2:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' - assert results['NM_014249.2:c.947delinsTT']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.947delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.947delinsTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.947delinsTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_014249.2:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_014249.2:c.947delinsTT' + assert results['NM_014249.2:c.947delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.947delinsTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_014249.2:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} + assert results['NM_014249.2:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316ValfsTer25)', 'slr': 'NP_055064.1:p.(D316Vfs*25)'} + assert results['NM_014249.2:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.947delinsTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.2:c.947delinsTT']['alt_genomic_loci'], []) + assert results['NM_014249.2:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105928', 'ref': 'AC', 'alt': 'TT'}} assert 'hg38' not in list(results['NM_014249.2:c.947delinsTT']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} + assert results['NM_014249.2:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105928', 'ref': 'AC', 'alt': 'TT'}} assert 'grch38' not in list(results['NM_014249.2:c.947delinsTT']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.947delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + assert results['NM_014249.2:c.947delinsTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1'} - assert results['flag'] == 'gene_variant' assert 'NM_016346.3:c.947delinsTT' in list(results.keys()) - assert results['NM_016346.3:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.3:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.3:c.947delinsTT']['alt_genomic_loci'], []) - assert results['NM_016346.3:c.947delinsTT']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.3:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316ValfsTer25)', 'slr': 'NP_057430.1:p.(D316Vfs*25)'} assert results['NM_016346.3:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' - assert results['NM_016346.3:c.947delinsTT']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.947delinsTT']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.947delinsTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.947delinsTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_016346.3:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_016346.3:c.947delinsTT' + assert results['NM_016346.3:c.947delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.947delinsTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': 'chr15', 'ref': 'A', 'pos': '71813588', 'alt': 'TT'}} - assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'TT'}} - assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': '15', 'ref': 'A', 'pos': '71813588', 'alt': 'TT'}} - assert results['NM_016346.3:c.947delinsTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + assert results['NM_016346.3:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316ValfsTer25)', 'slr': 'NP_057430.1:p.(D316Vfs*25)'} + assert results['NM_016346.3:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.947delinsTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.3:c.947delinsTT']['alt_genomic_loci'], []) + assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105928', 'ref': 'AC', 'alt': 'TT'}} + assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': 'chr15', 'pos': '71813588', 'ref': 'A', 'alt': 'TT'}} + assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105928', 'ref': 'AC', 'alt': 'TT'}} + assert results['NM_016346.3:c.947delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': '15', 'pos': '71813588', 'ref': 'A', 'alt': 'TT'}} + assert results['NM_016346.3:c.947delinsTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} + + assert 'NM_016346.2:c.947delinsTT' in list(results.keys()) + assert results['NM_016346.2:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' + assert results['NM_016346.2:c.947delinsTT']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.947delinsTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} + assert results['NM_016346.2:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_016346.2:c.947delinsTT' + assert results['NM_016346.2:c.947delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.947delinsTT']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.2:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316ValfsTer25)', 'slr': 'NP_057430.1:p.(D316Vfs*25)'} + assert results['NM_016346.2:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.947delinsTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.2:c.947delinsTT']['alt_genomic_loci'], []) + assert results['NM_016346.2:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105928', 'ref': 'AC', 'alt': 'TT'}} + assert 'hg38' not in list(results['NM_016346.2:c.947delinsTT']['primary_assembly_loci'].keys()) + assert results['NM_016346.2:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105928', 'ref': 'AC', 'alt': 'TT'}} + assert 'grch38' not in list(results['NM_016346.2:c.947delinsTT']['primary_assembly_loci'].keys()) + assert results['NM_016346.2:c.947delinsTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} + assert 'NM_014249.3:c.947delinsTT' in list(results.keys()) + assert results['NM_014249.3:c.947delinsTT']['submitted_variant'] == '15-72105927-GACC-GTT' + assert results['NM_014249.3:c.947delinsTT']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.947delinsTT']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} + assert results['NM_014249.3:c.947delinsTT']['hgvs_transcript_variant'] == 'NM_014249.3:c.947delinsTT' + assert results['NM_014249.3:c.947delinsTT']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.947delinsTT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.947delinsTT']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8035delinsTT' + assert results['NM_014249.3:c.947delinsTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316ValfsTer25)', 'slr': 'NP_055064.1:p.(D316Vfs*25)'} + assert results['NM_014249.3:c.947delinsTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.947delinsTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.3:c.947delinsTT']['alt_genomic_loci'], []) + assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': 'chr15', 'pos': '72105928', 'ref': 'AC', 'alt': 'TT'}} + assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': 'chr15', 'pos': '71813588', 'ref': 'A', 'alt': 'TT'}} + assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105928_72105929delinsTT', 'vcf': {'chr': '15', 'pos': '72105928', 'ref': 'AC', 'alt': 'TT'}} + assert results['NM_014249.3:c.947delinsTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813588delinsTT', 'vcf': {'chr': '15', 'pos': '71813588', 'ref': 'A', 'alt': 'TT'}} + assert results['NM_014249.3:c.947delinsTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1'} def test_variant185(self): variant = '19-41123093-A-AG' @@ -6455,231 +6571,239 @@ def test_variant185(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001042544.1:c.3233_3235=' in list(results.keys()) - assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001042544.1:c.3233_3235=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'], []) - assert results['NM_001042544.1:c.3233_3235=']['gene_symbol'] == 'LTBP4' - assert results['NM_001042544.1:c.3233_3235=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078=)', 'slr': 'NP_001036009.1:p.(Q1078=)'} - assert results['NM_001042544.1:c.3233_3235=']['submitted_variant'] == '19-41123093-A-AG' - assert results['NM_001042544.1:c.3233_3235=']['genome_context_intronic_sequence'] == '' - assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_variant'] == '' - assert results['NM_001042544.1:c.3233_3235=']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3233_3235=' - assert results['NM_001042544.1:c.3233_3235=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_001042544.1:c.3233_3235=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} - assert 'NM_001042545.1:c.3032_3034=' in list(results.keys()) - assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001042545.1:c.3032_3034=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'], []) - assert results['NM_001042545.1:c.3032_3034=']['gene_symbol'] == 'LTBP4' - assert results['NM_001042545.1:c.3032_3034=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011=)', 'slr': 'NP_001036010.1:p.(Q1011=)'} assert results['NM_001042545.1:c.3032_3034=']['submitted_variant'] == '19-41123093-A-AG' - assert results['NM_001042545.1:c.3032_3034=']['genome_context_intronic_sequence'] == '' - assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_variant'] == '' + assert results['NM_001042545.1:c.3032_3034=']['gene_symbol'] == 'LTBP4' + assert results['NM_001042545.1:c.3032_3034=']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} assert results['NM_001042545.1:c.3032_3034=']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3032_3034=' + assert results['NM_001042545.1:c.3032_3034=']['genome_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3032_3034=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042545.1:c.3032_3034=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_001042545.1:c.3032_3034=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} + assert results['NM_001042545.1:c.3032_3034=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011=)', 'slr': 'NP_001036010.1:p.(Q1011=)'} + assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042545.1:c.3032_3034=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001042545.1:c.3032_3034=']['alt_genomic_loci'], []) + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3032_3034=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_001042545.1:c.3032_3034=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} assert 'NM_003573.2:c.3122_3124=' in list(results.keys()) - assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003573.2:c.3122_3124=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'], []) - assert results['NM_003573.2:c.3122_3124=']['gene_symbol'] == 'LTBP4' - assert results['NM_003573.2:c.3122_3124=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041=)', 'slr': 'NP_003564.2:p.(Q1041=)'} assert results['NM_003573.2:c.3122_3124=']['submitted_variant'] == '19-41123093-A-AG' - assert results['NM_003573.2:c.3122_3124=']['genome_context_intronic_sequence'] == '' - assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_variant'] == '' + assert results['NM_003573.2:c.3122_3124=']['gene_symbol'] == 'LTBP4' + assert results['NM_003573.2:c.3122_3124=']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} assert results['NM_003573.2:c.3122_3124=']['hgvs_transcript_variant'] == 'NM_003573.2:c.3122_3124=' + assert results['NM_003573.2:c.3122_3124=']['genome_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3122_3124=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003573.2:c.3122_3124=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'AGG'}} - assert results['NM_003573.2:c.3122_3124=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} + assert results['NM_003573.2:c.3122_3124=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041=)', 'slr': 'NP_003564.2:p.(Q1041=)'} + assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003573.2:c.3122_3124=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003573.2:c.3122_3124=']['alt_genomic_loci'], []) + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_003573.2:c.3122_3124=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_003573.2:c.3122_3124=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} + assert 'NM_001042544.1:c.3233_3235=' in list(results.keys()) + assert results['NM_001042544.1:c.3233_3235=']['submitted_variant'] == '19-41123093-A-AG' + assert results['NM_001042544.1:c.3233_3235=']['gene_symbol'] == 'LTBP4' + assert results['NM_001042544.1:c.3233_3235=']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} + assert results['NM_001042544.1:c.3233_3235=']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3233_3235=' + assert results['NM_001042544.1:c.3233_3235=']['genome_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3233_3235=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29022_29024=' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078=)', 'slr': 'NP_001036009.1:p.(Q1078=)'} + assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042544.1:c.3233_3235=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001042544.1:c.3233_3235=']['alt_genomic_loci'], []) + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095dup', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'A', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3233_3235=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617187_40617189=', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'AGG'}} + assert results['NM_001042544.1:c.3233_3235=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} def test_variant186(self): variant = '19-41123093-A-AT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_003573.2:c.3123G>T' in list(results.keys()) - assert results['NM_003573.2:c.3123G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003573.2:c.3123G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003573.2:c.3123G>T']['alt_genomic_loci'], []) - assert results['NM_003573.2:c.3123G>T']['gene_symbol'] == 'LTBP4' - assert results['NM_003573.2:c.3123G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041His)', 'slr': 'NP_003564.2:p.(Q1041H)'} - assert results['NM_003573.2:c.3123G>T']['submitted_variant'] == '19-41123093-A-AT' - assert results['NM_003573.2:c.3123G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_003573.2:c.3123G>T']['hgvs_lrg_variant'] == '' - assert results['NM_003573.2:c.3123G>T']['hgvs_transcript_variant'] == 'NM_003573.2:c.3123G>T' - assert results['NM_003573.2:c.3123G>T']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29023G>T' - assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} - assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} - assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} - assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} - assert results['NM_003573.2:c.3123G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} - assert results['flag'] == 'gene_variant' assert 'NM_001042545.1:c.3033G>T' in list(results.keys()) - assert results['NM_001042545.1:c.3033G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001042545.1:c.3033G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001042545.1:c.3033G>T']['alt_genomic_loci'], []) - assert results['NM_001042545.1:c.3033G>T']['gene_symbol'] == 'LTBP4' - assert results['NM_001042545.1:c.3033G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011His)', 'slr': 'NP_001036010.1:p.(Q1011H)'} assert results['NM_001042545.1:c.3033G>T']['submitted_variant'] == '19-41123093-A-AT' - assert results['NM_001042545.1:c.3033G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001042545.1:c.3033G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001042545.1:c.3033G>T']['gene_symbol'] == 'LTBP4' + assert results['NM_001042545.1:c.3033G>T']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} assert results['NM_001042545.1:c.3033G>T']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3033G>T' + assert results['NM_001042545.1:c.3033G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3033G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042545.1:c.3033G>T']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29023G>T' - assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} - assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} - assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} - assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} - assert results['NM_001042545.1:c.3033G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} + assert results['NM_001042545.1:c.3033G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gln1011His)', 'slr': 'NP_001036010.1:p.(Q1011H)'} + assert results['NM_001042545.1:c.3033G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042545.1:c.3033G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001042545.1:c.3033G>T']['alt_genomic_loci'], []) + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'pos': '40617188', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_001042545.1:c.3033G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'pos': '40617188', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001042545.1:c.3033G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} + + assert 'NM_003573.2:c.3123G>T' in list(results.keys()) + assert results['NM_003573.2:c.3123G>T']['submitted_variant'] == '19-41123093-A-AT' + assert results['NM_003573.2:c.3123G>T']['gene_symbol'] == 'LTBP4' + assert results['NM_003573.2:c.3123G>T']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} + assert results['NM_003573.2:c.3123G>T']['hgvs_transcript_variant'] == 'NM_003573.2:c.3123G>T' + assert results['NM_003573.2:c.3123G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3123G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3123G>T']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29023G>T' + assert results['NM_003573.2:c.3123G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gln1041His)', 'slr': 'NP_003564.2:p.(Q1041H)'} + assert results['NM_003573.2:c.3123G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003573.2:c.3123G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003573.2:c.3123G>T']['alt_genomic_loci'], []) + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'pos': '40617188', 'ref': 'G', 'alt': 'T'}} + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_003573.2:c.3123G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'pos': '40617188', 'ref': 'G', 'alt': 'T'}} + assert results['NM_003573.2:c.3123G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} assert 'NM_001042544.1:c.3234G>T' in list(results.keys()) - assert results['NM_001042544.1:c.3234G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001042544.1:c.3234G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001042544.1:c.3234G>T']['alt_genomic_loci'], []) - assert results['NM_001042544.1:c.3234G>T']['gene_symbol'] == 'LTBP4' - assert results['NM_001042544.1:c.3234G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078His)', 'slr': 'NP_001036009.1:p.(Q1078H)'} assert results['NM_001042544.1:c.3234G>T']['submitted_variant'] == '19-41123093-A-AT' - assert results['NM_001042544.1:c.3234G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001042544.1:c.3234G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001042544.1:c.3234G>T']['gene_symbol'] == 'LTBP4' + assert results['NM_001042544.1:c.3234G>T']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} assert results['NM_001042544.1:c.3234G>T']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3234G>T' + assert results['NM_001042544.1:c.3234G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3234G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042544.1:c.3234G>T']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29023G>T' - assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} - assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} - assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '41123093', 'alt': 'AT'}} - assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '40617188', 'alt': 'T'}} - assert results['NM_001042544.1:c.3234G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} - + assert results['NM_001042544.1:c.3234G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gln1078His)', 'slr': 'NP_001036009.1:p.(Q1078H)'} + assert results['NM_001042544.1:c.3234G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042544.1:c.3234G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001042544.1:c.3234G>T']['alt_genomic_loci'], []) + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': 'chr19', 'pos': '40617188', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094insT', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_001042544.1:c.3234G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617188G>T', 'vcf': {'chr': '19', 'pos': '40617188', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001042544.1:c.3234G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} def test_variant187(self): variant = '19-41123093-AG-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001042544.1:c.3235_3236del' in list(results.keys()) - assert results['NM_001042544.1:c.3235_3236del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001042544.1:c.3235_3236del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001042544.1:c.3235_3236del']['alt_genomic_loci'], []) - assert results['NM_001042544.1:c.3235_3236del']['gene_symbol'] == 'LTBP4' - assert results['NM_001042544.1:c.3235_3236del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gly1079LeufsTer17)', 'slr': 'NP_001036009.1:p.(G1079Lfs*17)'} - assert results['NM_001042544.1:c.3235_3236del']['submitted_variant'] == '19-41123093-AG-A' - assert results['NM_001042544.1:c.3235_3236del']['genome_context_intronic_sequence'] == '' - assert results['NM_001042544.1:c.3235_3236del']['hgvs_lrg_variant'] == '' - assert results['NM_001042544.1:c.3235_3236del']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3235_3236del' - assert results['NM_001042544.1:c.3235_3236del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29024_29025del' - assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} - assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} - assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_001042544.1:c.3235_3236del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001042545.1:c.3034_3035del' in list(results.keys()) - assert results['NM_001042545.1:c.3034_3035del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001042545.1:c.3034_3035del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001042545.1:c.3034_3035del']['alt_genomic_loci'], []) - assert results['NM_001042545.1:c.3034_3035del']['gene_symbol'] == 'LTBP4' - assert results['NM_001042545.1:c.3034_3035del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gly1012LeufsTer17)', 'slr': 'NP_001036010.1:p.(G1012Lfs*17)'} assert results['NM_001042545.1:c.3034_3035del']['submitted_variant'] == '19-41123093-AG-A' - assert results['NM_001042545.1:c.3034_3035del']['genome_context_intronic_sequence'] == '' - assert results['NM_001042545.1:c.3034_3035del']['hgvs_lrg_variant'] == '' + assert results['NM_001042545.1:c.3034_3035del']['gene_symbol'] == 'LTBP4' + assert results['NM_001042545.1:c.3034_3035del']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} assert results['NM_001042545.1:c.3034_3035del']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3034_3035del' + assert results['NM_001042545.1:c.3034_3035del']['genome_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3034_3035del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042545.1:c.3034_3035del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29024_29025del' - assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} - assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} - assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_001042545.1:c.3034_3035del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} + assert results['NM_001042545.1:c.3034_3035del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gly1012LeufsTer17)', 'slr': 'NP_001036010.1:p.(G1012Lfs*17)'} + assert results['NM_001042545.1:c.3034_3035del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042545.1:c.3034_3035del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001042545.1:c.3034_3035del']['alt_genomic_loci'], []) + assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'A'}} + assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001042545.1:c.3034_3035del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'A'}} + assert results['NM_001042545.1:c.3034_3035del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} assert 'NM_003573.2:c.3124_3125del' in list(results.keys()) - assert results['NM_003573.2:c.3124_3125del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003573.2:c.3124_3125del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003573.2:c.3124_3125del']['alt_genomic_loci'], []) - assert results['NM_003573.2:c.3124_3125del']['gene_symbol'] == 'LTBP4' - assert results['NM_003573.2:c.3124_3125del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gly1042LeufsTer17)', 'slr': 'NP_003564.2:p.(G1042Lfs*17)'} assert results['NM_003573.2:c.3124_3125del']['submitted_variant'] == '19-41123093-AG-A' - assert results['NM_003573.2:c.3124_3125del']['genome_context_intronic_sequence'] == '' - assert results['NM_003573.2:c.3124_3125del']['hgvs_lrg_variant'] == '' + assert results['NM_003573.2:c.3124_3125del']['gene_symbol'] == 'LTBP4' + assert results['NM_003573.2:c.3124_3125del']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} assert results['NM_003573.2:c.3124_3125del']['hgvs_transcript_variant'] == 'NM_003573.2:c.3124_3125del' + assert results['NM_003573.2:c.3124_3125del']['genome_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3124_3125del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003573.2:c.3124_3125del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29024_29025del' - assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} - assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'A'}} - assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': '19', 'ref': 'AGG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_003573.2:c.3124_3125del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} + assert results['NM_003573.2:c.3124_3125del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gly1042LeufsTer17)', 'slr': 'NP_003564.2:p.(G1042Lfs*17)'} + assert results['NM_003573.2:c.3124_3125del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003573.2:c.3124_3125del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003573.2:c.3124_3125del']['alt_genomic_loci'], []) + assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'A'}} + assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_003573.2:c.3124_3125del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'A'}} + assert results['NM_003573.2:c.3124_3125del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} + assert 'NM_001042544.1:c.3235_3236del' in list(results.keys()) + assert results['NM_001042544.1:c.3235_3236del']['submitted_variant'] == '19-41123093-AG-A' + assert results['NM_001042544.1:c.3235_3236del']['gene_symbol'] == 'LTBP4' + assert results['NM_001042544.1:c.3235_3236del']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} + assert results['NM_001042544.1:c.3235_3236del']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3235_3236del' + assert results['NM_001042544.1:c.3235_3236del']['genome_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3235_3236del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3235_3236del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29024_29025del' + assert results['NM_001042544.1:c.3235_3236del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gly1079LeufsTer17)', 'slr': 'NP_001036009.1:p.(G1079Lfs*17)'} + assert results['NM_001042544.1:c.3235_3236del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042544.1:c.3235_3236del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001042544.1:c.3235_3236del']['alt_genomic_loci'], []) + assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'A'}} + assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123095del', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001042544.1:c.3235_3236del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617189_40617190del', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AGG', 'alt': 'A'}} + assert results['NM_001042544.1:c.3235_3236del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} def test_variant188(self): variant = '19-41123093-AG-AG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_001042545.1:c.3035del' in list(results.keys()) - assert results['NM_001042545.1:c.3035del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001042545.1:c.3035del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001042545.1:c.3035del']['alt_genomic_loci'], []) - assert results['NM_001042545.1:c.3035del']['gene_symbol'] == 'LTBP4' - assert results['NM_001042545.1:c.3035del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gly1012ValfsTer14)', 'slr': 'NP_001036010.1:p.(G1012Vfs*14)'} assert results['NM_001042545.1:c.3035del']['submitted_variant'] == '19-41123093-AG-AG' - assert results['NM_001042545.1:c.3035del']['genome_context_intronic_sequence'] == '' - assert results['NM_001042545.1:c.3035del']['hgvs_lrg_variant'] == '' + assert results['NM_001042545.1:c.3035del']['gene_symbol'] == 'LTBP4' + assert results['NM_001042545.1:c.3035del']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} assert results['NM_001042545.1:c.3035del']['hgvs_transcript_variant'] == 'NM_001042545.1:c.3035del' + assert results['NM_001042545.1:c.3035del']['genome_context_intronic_sequence'] == '' + assert results['NM_001042545.1:c.3035del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042545.1:c.3035del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29025del' - assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_001042545.1:c.3035del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1'} - - assert results['flag'] == 'gene_variant' - assert 'NM_001042544.1:c.3236del' in list(results.keys()) - assert results['NM_001042544.1:c.3236del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001042544.1:c.3236del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001042544.1:c.3236del']['alt_genomic_loci'], []) - assert results['NM_001042544.1:c.3236del']['gene_symbol'] == 'LTBP4' - assert results['NM_001042544.1:c.3236del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gly1079ValfsTer14)', 'slr': 'NP_001036009.1:p.(G1079Vfs*14)'} - assert results['NM_001042544.1:c.3236del']['submitted_variant'] == '19-41123093-AG-AG' - assert results['NM_001042544.1:c.3236del']['genome_context_intronic_sequence'] == '' - assert results['NM_001042544.1:c.3236del']['hgvs_lrg_variant'] == '' - assert results['NM_001042544.1:c.3236del']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3236del' - assert results['NM_001042544.1:c.3236del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29025del' - assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_001042544.1:c.3236del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1'} + assert results['NM_001042545.1:c.3035del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036010.1:p.(Gly1012ValfsTer14)', 'slr': 'NP_001036010.1:p.(G1012Vfs*14)'} + assert results['NM_001042545.1:c.3035del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042545.1:c.3035del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001042545.1:c.3035del']['alt_genomic_loci'], []) + assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001042545.1:c.3035del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001042545.1:c.3035del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042545.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036010.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} assert 'NM_003573.2:c.3125del' in list(results.keys()) - assert results['NM_003573.2:c.3125del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003573.2:c.3125del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003573.2:c.3125del']['alt_genomic_loci'], []) - assert results['NM_003573.2:c.3125del']['gene_symbol'] == 'LTBP4' - assert results['NM_003573.2:c.3125del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gly1042ValfsTer14)', 'slr': 'NP_003564.2:p.(G1042Vfs*14)'} assert results['NM_003573.2:c.3125del']['submitted_variant'] == '19-41123093-AG-AG' - assert results['NM_003573.2:c.3125del']['genome_context_intronic_sequence'] == '' - assert results['NM_003573.2:c.3125del']['hgvs_lrg_variant'] == '' + assert results['NM_003573.2:c.3125del']['gene_symbol'] == 'LTBP4' + assert results['NM_003573.2:c.3125del']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} assert results['NM_003573.2:c.3125del']['hgvs_transcript_variant'] == 'NM_003573.2:c.3125del' + assert results['NM_003573.2:c.3125del']['genome_context_intronic_sequence'] == '' + assert results['NM_003573.2:c.3125del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003573.2:c.3125del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29025del' - assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': 'chr19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '41123093', 'alt': 'AG'}} - assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': '19', 'ref': 'AG', 'pos': '40617187', 'alt': 'A'}} - assert results['NM_003573.2:c.3125del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2'} + assert results['NM_003573.2:c.3125del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003564.2:p.(Gly1042ValfsTer14)', 'slr': 'NP_003564.2:p.(G1042Vfs*14)'} + assert results['NM_003573.2:c.3125del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003573.2:c.3125del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003573.2:c.3125del']['alt_genomic_loci'], []) + assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_003573.2:c.3125del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_003573.2:c.3125del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003573.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003564.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} + assert 'NM_001042544.1:c.3236del' in list(results.keys()) + assert results['NM_001042544.1:c.3236del']['submitted_variant'] == '19-41123093-AG-AG' + assert results['NM_001042544.1:c.3236del']['gene_symbol'] == 'LTBP4' + assert results['NM_001042544.1:c.3236del']['gene_ids'] == {'hgnc_id': 'HGNC:6717', 'entrez_gene_id': '8425', 'ucsc_id': 'uc032hxp.2', 'omim_id': ['604710']} + assert results['NM_001042544.1:c.3236del']['hgvs_transcript_variant'] == 'NM_001042544.1:c.3236del' + assert results['NM_001042544.1:c.3236del']['genome_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3236del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001042544.1:c.3236del']['hgvs_refseqgene_variant'] == 'NG_021201.1:g.29025del' + assert results['NM_001042544.1:c.3236del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036009.1:p.(Gly1079ValfsTer14)', 'slr': 'NP_001036009.1:p.(G1079Vfs*14)'} + assert results['NM_001042544.1:c.3236del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042544.1:c.3236del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001042544.1:c.3236del']['alt_genomic_loci'], []) + assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': 'chr19', 'pos': '41123093', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': 'chr19', 'pos': '40617187', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.41123093_41123094=', 'vcf': {'chr': '19', 'pos': '41123093', 'ref': 'AG', 'alt': 'AG'}} + assert results['NM_001042544.1:c.3236del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.40617190del', 'vcf': {'chr': '19', 'pos': '40617187', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001042544.1:c.3236del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042544.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036009.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_021201.1'} def test_variant189(self): variant = 'NM_012309.4:c.913-5058G>A' @@ -6688,22 +6812,22 @@ def test_variant189(self): assert results['flag'] == 'gene_variant' assert 'NM_012309.4:c.913-5058G>A' in list(results.keys()) - assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_012309.4:c.913-5058G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}]) - assert results['NM_012309.4:c.913-5058G>A']['gene_symbol'] == 'SHANK2' - assert results['NM_012309.4:c.913-5058G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.?', 'slr': 'NP_036441.2:p.?'} assert results['NM_012309.4:c.913-5058G>A']['submitted_variant'] == 'NM_012309.4:c.913-5058G>A' - assert results['NM_012309.4:c.913-5058G>A']['genome_context_intronic_sequence'] == 'NC_000011.10(NM_012309.4):c.913-5058G>A' - assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_variant'] == '' + assert results['NM_012309.4:c.913-5058G>A']['gene_symbol'] == 'SHANK2' + assert results['NM_012309.4:c.913-5058G>A']['gene_ids'] == {'hgnc_id': 'HGNC:14295', 'entrez_gene_id': '22941', 'ucsc_id': 'uc058etp.1', 'omim_id': ['603290']} assert results['NM_012309.4:c.913-5058G>A']['hgvs_transcript_variant'] == 'NM_012309.4:c.913-5058G>A' + assert results['NM_012309.4:c.913-5058G>A']['genome_context_intronic_sequence'] == 'NC_000011.10(NM_012309.4):c.913-5058G>A' + assert results['NM_012309.4:c.913-5058G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_012309.4:c.913-5058G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.?', 'slr': 'NP_036441.2:p.?'} + assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'pos': '574546', 'ref': 'C', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'pos': '574546', 'ref': 'C', 'alt': 'T'}}}]) assert 'hg19' not in list(results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys()) - assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '71080333', 'alt': 'T'}} + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': 'chr11', 'pos': '71080333', 'ref': 'C', 'alt': 'T'}} assert 'grch37' not in list(results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys()) - assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '71080333', 'alt': 'T'}} - assert results['NM_012309.4:c.913-5058G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} - + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': '11', 'pos': '71080333', 'ref': 'C', 'alt': 'T'}} + assert results['NM_012309.4:c.913-5058G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2'} def test_variant190(self): variant = 'LRG_199t1:c.2376[G>C];[G>C]' @@ -6712,63 +6836,64 @@ def test_variant190(self): assert results['flag'] == 'gene_variant' assert 'NM_004006.2:c.2376G>C' in list(results.keys()) - assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' - assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.2376[G>C];[G>C]' - assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.2376G>C']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - + assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'pos': '32519876', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'pos': '32501759', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'pos': '32519876', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'pos': '32501759', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant191(self): variant = 'LRG_199t1:c.[2376G>C];[3103del]' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_004006.2:c.3103del' in list(results.keys()) - assert results['NM_004006.2:c.3103del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.3103del' - assert results['NM_004006.2:c.3103del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.3103del']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.3103del']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.3103del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)', 'slr': 'NP_003997.1:p.(Q1035Sfs*9)'} - assert results['NM_004006.2:c.3103del']['submitted_variant'] == 'LRG_199t1:c.[2376G>C];[3103del]' - assert results['NM_004006.2:c.3103del']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.3103del']['hgvs_lrg_variant'] == 'LRG_199:g.876053del' - assert results['NM_004006.2:c.3103del']['hgvs_transcript_variant'] == 'NM_004006.2:c.3103del' - assert results['NM_004006.2:c.3103del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.876053del' - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_004006.2:c.2376G>C' in list(results.keys()) - assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' - assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.[2376G>C];[3103del]' - assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.2376G>C']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'pos': '32519876', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'pos': '32501759', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'pos': '32519876', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'pos': '32501759', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert 'NM_004006.2:c.3103del' in list(results.keys()) + assert results['NM_004006.2:c.3103del']['submitted_variant'] == 'LRG_199t1:c.[2376G>C];[3103del]' + assert results['NM_004006.2:c.3103del']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.3103del']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} + assert results['NM_004006.2:c.3103del']['hgvs_transcript_variant'] == 'NM_004006.2:c.3103del' + assert results['NM_004006.2:c.3103del']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.3103del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.3103del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.876053del' + assert results['NM_004006.2:c.3103del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)', 'slr': 'NP_003997.1:p.(Q1035Sfs*9)'} + assert results['NM_004006.2:c.3103del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.3103del' + assert results['NM_004006.2:c.3103del']['hgvs_lrg_variant'] == 'LRG_199:g.876053del' + self.assertCountEqual(results['NM_004006.2:c.3103del']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'chrX', 'pos': '32486673', 'ref': 'TG', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'chrX', 'pos': '32468556', 'ref': 'TG', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'X', 'pos': '32486673', 'ref': 'TG', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'X', 'pos': '32468556', 'ref': 'TG', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant192(self): variant = 'LRG_199t1:c.[4358_4359del;4361_4372del]' @@ -6776,81 +6901,83 @@ def test_variant192(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.4362_4373del' in list(results.keys()) - assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4362_4373del' - assert results['NM_004006.2:c.4362_4373del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.4362_4373del']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.4362_4373del']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.4362_4373del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ser1455_Phe1458del)', 'slr': 'NP_003997.1:p.(S1455_F1458del)'} - assert results['NM_004006.2:c.4362_4373del']['submitted_variant'] == 'LRG_199t1:c.[4358_4359del;4361_4372del]' - assert results['NM_004006.2:c.4362_4373del']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_variant'] == 'LRG_199:g.954953_954964del' - assert results['NM_004006.2:c.4362_4373del']['hgvs_transcript_variant'] == 'NM_004006.2:c.4362_4373del' - assert results['NM_004006.2:c.4362_4373del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.954953_954964del' - assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407775del', 'vcf': {'chr': 'chrX', 'ref': 'AAACTTCATGGAG', 'pos': '32407762', 'alt': 'A'}} - assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389658del', 'vcf': {'chr': 'chrX', 'ref': 'AAACTTCATGGAG', 'pos': '32389645', 'alt': 'A'}} - assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407775del', 'vcf': {'chr': 'X', 'ref': 'AAACTTCATGGAG', 'pos': '32407762', 'alt': 'A'}} - assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389658del', 'vcf': {'chr': 'X', 'ref': 'AAACTTCATGGAG', 'pos': '32389645', 'alt': 'A'}} - assert results['NM_004006.2:c.4362_4373del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert 'NM_004006.2:c.4358_4359del' in list(results.keys()) - assert results['NM_004006.2:c.4358_4359del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4358_4359del' - assert results['NM_004006.2:c.4358_4359del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.4358_4359del']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.4358_4359del']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.4358_4359del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Asp1453GlyfsTer15)', 'slr': 'NP_003997.1:p.(D1453Gfs*15)'} assert results['NM_004006.2:c.4358_4359del']['submitted_variant'] == 'LRG_199t1:c.[4358_4359del;4361_4372del]' - assert results['NM_004006.2:c.4358_4359del']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.4358_4359del']['hgvs_lrg_variant'] == 'LRG_199:g.954949_954950del' + assert results['NM_004006.2:c.4358_4359del']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.4358_4359del']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_004006.2:c.4358_4359del']['hgvs_transcript_variant'] == 'NM_004006.2:c.4358_4359del' + assert results['NM_004006.2:c.4358_4359del']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.4358_4359del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.4358_4359del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.954949_954950del' - assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407777_32407778del', 'vcf': {'chr': 'chrX', 'ref': 'CAT', 'pos': '32407776', 'alt': 'C'}} - assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389660_32389661del', 'vcf': {'chr': 'chrX', 'ref': 'CAT', 'pos': '32389659', 'alt': 'C'}} - assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407777_32407778del', 'vcf': {'chr': 'X', 'ref': 'CAT', 'pos': '32407776', 'alt': 'C'}} - assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389660_32389661del', 'vcf': {'chr': 'X', 'ref': 'CAT', 'pos': '32389659', 'alt': 'C'}} - assert results['NM_004006.2:c.4358_4359del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert results['NM_004006.2:c.4358_4359del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Asp1453GlyfsTer15)', 'slr': 'NP_003997.1:p.(D1453Gfs*15)'} + assert results['NM_004006.2:c.4358_4359del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4358_4359del' + assert results['NM_004006.2:c.4358_4359del']['hgvs_lrg_variant'] == 'LRG_199:g.954949_954950del' + self.assertCountEqual(results['NM_004006.2:c.4358_4359del']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407777_32407778del', 'vcf': {'chr': 'chrX', 'pos': '32407776', 'ref': 'CAT', 'alt': 'C'}} + assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389660_32389661del', 'vcf': {'chr': 'chrX', 'pos': '32389659', 'ref': 'CAT', 'alt': 'C'}} + assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407777_32407778del', 'vcf': {'chr': 'X', 'pos': '32407776', 'ref': 'CAT', 'alt': 'C'}} + assert results['NM_004006.2:c.4358_4359del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389660_32389661del', 'vcf': {'chr': 'X', 'pos': '32389659', 'ref': 'CAT', 'alt': 'C'}} + assert results['NM_004006.2:c.4358_4359del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert 'NM_004006.2:c.4362_4373del' in list(results.keys()) + assert results['NM_004006.2:c.4362_4373del']['submitted_variant'] == 'LRG_199t1:c.[4358_4359del;4361_4372del]' + assert results['NM_004006.2:c.4362_4373del']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.4362_4373del']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} + assert results['NM_004006.2:c.4362_4373del']['hgvs_transcript_variant'] == 'NM_004006.2:c.4362_4373del' + assert results['NM_004006.2:c.4362_4373del']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.4362_4373del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.4362_4373del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.954953_954964del' + assert results['NM_004006.2:c.4362_4373del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ser1455_Phe1458del)', 'slr': 'NP_003997.1:p.(S1455_F1458del)'} + assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.4362_4373del' + assert results['NM_004006.2:c.4362_4373del']['hgvs_lrg_variant'] == 'LRG_199:g.954953_954964del' + self.assertCountEqual(results['NM_004006.2:c.4362_4373del']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407775del', 'vcf': {'chr': 'chrX', 'pos': '32407762', 'ref': 'AAACTTCATGGAG', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389658del', 'vcf': {'chr': 'chrX', 'pos': '32389645', 'ref': 'AAACTTCATGGAG', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32407764_32407775del', 'vcf': {'chr': 'X', 'pos': '32407762', 'ref': 'AAACTTCATGGAG', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32389647_32389658del', 'vcf': {'chr': 'X', 'pos': '32389645', 'ref': 'AAACTTCATGGAG', 'alt': 'A'}} + assert results['NM_004006.2:c.4362_4373del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant193(self): variant = 'LRG_199t1:c.2376G>C(;)3103del' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_004006.2:c.3103del' in list(results.keys()) - assert results['NM_004006.2:c.3103del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.3103del' - assert results['NM_004006.2:c.3103del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.3103del']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.3103del']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.3103del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)', 'slr': 'NP_003997.1:p.(Q1035Sfs*9)'} - assert results['NM_004006.2:c.3103del']['submitted_variant'] == 'LRG_199t1:c.2376G>C(;)3103del' - assert results['NM_004006.2:c.3103del']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.3103del']['hgvs_lrg_variant'] == 'LRG_199:g.876053del' - assert results['NM_004006.2:c.3103del']['hgvs_transcript_variant'] == 'NM_004006.2:c.3103del' - assert results['NM_004006.2:c.3103del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.876053del' - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'chrX', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32486673', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'X', 'ref': 'TG', 'pos': '32468556', 'alt': 'T'}} - assert results['NM_004006.2:c.3103del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_004006.2:c.2376G>C' in list(results.keys()) - assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' - assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.2376G>C(;)3103del' - assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.2376G>C']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'pos': '32519876', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'pos': '32501759', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'pos': '32519876', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'pos': '32501759', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert 'NM_004006.2:c.3103del' in list(results.keys()) + assert results['NM_004006.2:c.3103del']['submitted_variant'] == 'LRG_199t1:c.2376G>C(;)3103del' + assert results['NM_004006.2:c.3103del']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.3103del']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} + assert results['NM_004006.2:c.3103del']['hgvs_transcript_variant'] == 'NM_004006.2:c.3103del' + assert results['NM_004006.2:c.3103del']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.3103del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.3103del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.876053del' + assert results['NM_004006.2:c.3103del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Gln1035SerfsTer9)', 'slr': 'NP_003997.1:p.(Q1035Sfs*9)'} + assert results['NM_004006.2:c.3103del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.3103del' + assert results['NM_004006.2:c.3103del']['hgvs_lrg_variant'] == 'LRG_199:g.876053del' + self.assertCountEqual(results['NM_004006.2:c.3103del']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'chrX', 'pos': '32486673', 'ref': 'TG', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'chrX', 'pos': '32468556', 'ref': 'TG', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32486676del', 'vcf': {'chr': 'X', 'pos': '32486673', 'ref': 'TG', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32468559del', 'vcf': {'chr': 'X', 'pos': '32468556', 'ref': 'TG', 'alt': 'T'}} + assert results['NM_004006.2:c.3103del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant194(self): variant = 'LRG_199t1:c.2376[G>C];[(G>C)]' @@ -6859,22 +6986,22 @@ def test_variant194(self): assert results['flag'] == 'gene_variant' assert 'NM_004006.2:c.2376G>C' in list(results.keys()) - assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' - assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.2376[G>C];[(G>C)]' - assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.2376G>C']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - + assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'pos': '32519876', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'pos': '32501759', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'pos': '32519876', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'pos': '32501759', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant195(self): variant = 'LRG_199t1:c.[2376G>C];[?]' @@ -6883,22 +7010,22 @@ def test_variant195(self): assert results['flag'] == 'gene_variant' assert 'NM_004006.2:c.2376G>C' in list(results.keys()) - assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' - assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} assert results['NM_004006.2:c.2376G>C']['submitted_variant'] == 'LRG_199t1:c.[2376G>C];[?]' - assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + assert results['NM_004006.2:c.2376G>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.2376G>C']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_004006.2:c.2376G>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.2376G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.2376G>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.842851G>C' - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32519876', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '32501759', 'alt': 'G'}} - assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - + assert results['NM_004006.2:c.2376G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Val792=)', 'slr': 'NP_003997.1:p.(V792=)'} + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.2376G>C' + assert results['NM_004006.2:c.2376G>C']['hgvs_lrg_variant'] == 'LRG_199:g.842851G>C' + self.assertCountEqual(results['NM_004006.2:c.2376G>C']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'chrX', 'pos': '32519876', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'chrX', 'pos': '32501759', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32519876C>G', 'vcf': {'chr': 'X', 'pos': '32519876', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32501759C>G', 'vcf': {'chr': 'X', 'pos': '32501759', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004006.2:c.2376G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant196(self): variant = 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' @@ -6906,412 +7033,425 @@ def test_variant196(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_004006.2:c.476T=' in list(results.keys()) - assert results['NM_004006.2:c.476T=']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.476T=' - assert results['NM_004006.2:c.476T=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.476T=']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.476T=']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.476T=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Phe159=)', 'slr': 'NP_003997.1:p.(F159=)'} - assert results['NM_004006.2:c.476T=']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' - assert results['NM_004006.2:c.476T=']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.476T=']['hgvs_lrg_variant'] == 'LRG_199:g.528088T=' - assert results['NM_004006.2:c.476T=']['hgvs_transcript_variant'] == 'NM_004006.2:c.476T=' - assert results['NM_004006.2:c.476T=']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.528088T=' - assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A=', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32834639', 'alt': 'A'}} - assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32816522', 'alt': 'A'}} - assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A=', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32834639', 'alt': 'A'}} - assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32816522', 'alt': 'A'}} - assert results['NM_004006.2:c.476T=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert 'NM_004006.2:c.1083A>C' in list(results.keys()) - assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1083A>C' - assert results['NM_004006.2:c.1083A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.1083A>C']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.1083A>C']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.1083A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Thr361=)', 'slr': 'NP_003997.1:p.(T361=)'} assert results['NM_004006.2:c.1083A>C']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' - assert results['NM_004006.2:c.1083A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_variant'] == 'LRG_199:g.699580A>C' + assert results['NM_004006.2:c.1083A>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.1083A>C']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_004006.2:c.1083A>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.1083A>C' + assert results['NM_004006.2:c.1083A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.1083A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.1083A>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.699580A>C' - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'chrX', 'ref': 'T', 'pos': '32663147', 'alt': 'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'chrX', 'ref': 'T', 'pos': '32645030', 'alt': 'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'X', 'ref': 'T', 'pos': '32663147', 'alt': 'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'X', 'ref': 'T', 'pos': '32645030', 'alt': 'G'}} - assert results['NM_004006.2:c.1083A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert results['NM_004006.2:c.1083A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Thr361=)', 'slr': 'NP_003997.1:p.(T361=)'} + assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1083A>C' + assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_variant'] == 'LRG_199:g.699580A>C' + self.assertCountEqual(results['NM_004006.2:c.1083A>C']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'chrX', 'pos': '32663147', 'ref': 'T', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'chrX', 'pos': '32645030', 'ref': 'T', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'X', 'pos': '32663147', 'ref': 'T', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'X', 'pos': '32645030', 'ref': 'T', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} assert 'NM_004006.2:c.296T>G' in list(results.keys()) - assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' - assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.296T>G']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.296T>G']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.296T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} assert results['NM_004006.2:c.296T>G']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' - assert results['NM_004006.2:c.296T>G']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.296T>G']['hgvs_lrg_variant'] == 'LRG_199:g.521254T>G' + assert results['NM_004006.2:c.296T>G']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.296T>G']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_004006.2:c.296T>G']['hgvs_transcript_variant'] == 'NM_004006.2:c.296T>G' + assert results['NM_004006.2:c.296T>G']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.296T>G']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.521254T>G' - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert results['NM_004006.2:c.296T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} + assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' + assert results['NM_004006.2:c.296T>G']['hgvs_lrg_variant'] == 'LRG_199:g.521254T>G' + self.assertCountEqual(results['NM_004006.2:c.296T>G']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'pos': '32841473', 'ref': 'A', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'pos': '32823356', 'ref': 'A', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'pos': '32841473', 'ref': 'A', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'pos': '32823356', 'ref': 'A', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert 'NM_004006.2:c.476T=' in list(results.keys()) + assert results['NM_004006.2:c.476T=']['submitted_variant'] == 'LRG_199t1:c.[296T>G;476T=];[476T=](;)1083A>C' + assert results['NM_004006.2:c.476T=']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.476T=']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} + assert results['NM_004006.2:c.476T=']['hgvs_transcript_variant'] == 'NM_004006.2:c.476T=' + assert results['NM_004006.2:c.476T=']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.476T=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.476T=']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.528088T=' + assert results['NM_004006.2:c.476T=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Phe159=)', 'slr': 'NP_003997.1:p.(F159=)'} + assert results['NM_004006.2:c.476T=']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.476T=' + assert results['NM_004006.2:c.476T=']['hgvs_lrg_variant'] == 'LRG_199:g.528088T=' + self.assertCountEqual(results['NM_004006.2:c.476T=']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A=', 'vcf': {'chr': 'chrX', 'pos': '32834639', 'ref': 'A', 'alt': 'A'}} + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'chrX', 'pos': '32816522', 'ref': 'A', 'alt': 'A'}} + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A=', 'vcf': {'chr': 'X', 'pos': '32834639', 'ref': 'A', 'alt': 'A'}} + assert results['NM_004006.2:c.476T=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A=', 'vcf': {'chr': 'X', 'pos': '32816522', 'ref': 'A', 'alt': 'A'}} + assert results['NM_004006.2:c.476T=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant197(self): variant = 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_004006.2:c.1408del' in list(results.keys()) - assert results['NM_004006.2:c.1408del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1408del' - assert results['NM_004006.2:c.1408del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.1408del']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.1408del']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.1408del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Arg470GlufsTer17)', 'slr': 'NP_003997.1:p.(R470Efs*17)'} - assert results['NM_004006.2:c.1408del']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' - assert results['NM_004006.2:c.1408del']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.1408del']['hgvs_lrg_variant'] == 'LRG_199:g.730233del' - assert results['NM_004006.2:c.1408del']['hgvs_transcript_variant'] == 'NM_004006.2:c.1408del' - assert results['NM_004006.2:c.1408del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.730233del' - assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32632496del', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '32632493', 'alt': 'C'}} - assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32614379del', 'vcf': {'chr': 'chrX', 'ref': 'CT', 'pos': '32614376', 'alt': 'C'}} - assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32632496del', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '32632493', 'alt': 'C'}} - assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32614379del', 'vcf': {'chr': 'X', 'ref': 'CT', 'pos': '32614376', 'alt': 'C'}} - assert results['NM_004006.2:c.1408del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - assert results['flag'] == 'gene_variant' + assert 'NM_004006.2:c.1083A>C' in list(results.keys()) + assert results['NM_004006.2:c.1083A>C']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' + assert results['NM_004006.2:c.1083A>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.1083A>C']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} + assert results['NM_004006.2:c.1083A>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.1083A>C' + assert results['NM_004006.2:c.1083A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.1083A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.1083A>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.699580A>C' + assert results['NM_004006.2:c.1083A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Thr361=)', 'slr': 'NP_003997.1:p.(T361=)'} + assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1083A>C' + assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_variant'] == 'LRG_199:g.699580A>C' + self.assertCountEqual(results['NM_004006.2:c.1083A>C']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'chrX', 'pos': '32663147', 'ref': 'T', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'chrX', 'pos': '32645030', 'ref': 'T', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'X', 'pos': '32663147', 'ref': 'T', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'X', 'pos': '32645030', 'ref': 'T', 'alt': 'G'}} + assert results['NM_004006.2:c.1083A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert 'NM_004006.2:c.296T>G' in list(results.keys()) - assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' - assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.296T>G']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.296T>G']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.296T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} assert results['NM_004006.2:c.296T>G']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' - assert results['NM_004006.2:c.296T>G']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.296T>G']['hgvs_lrg_variant'] == 'LRG_199:g.521254T>G' + assert results['NM_004006.2:c.296T>G']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.296T>G']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_004006.2:c.296T>G']['hgvs_transcript_variant'] == 'NM_004006.2:c.296T>G' + assert results['NM_004006.2:c.296T>G']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.296T>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.296T>G']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.521254T>G' - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32841473', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32823356', 'alt': 'C'}} - assert results['NM_004006.2:c.296T>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert results['NM_004006.2:c.296T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Ile99Ser)', 'slr': 'NP_003997.1:p.(I99S)'} + assert results['NM_004006.2:c.296T>G']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.296T>G' + assert results['NM_004006.2:c.296T>G']['hgvs_lrg_variant'] == 'LRG_199:g.521254T>G' + self.assertCountEqual(results['NM_004006.2:c.296T>G']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'chrX', 'pos': '32841473', 'ref': 'A', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'chrX', 'pos': '32823356', 'ref': 'A', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32841473A>C', 'vcf': {'chr': 'X', 'pos': '32841473', 'ref': 'A', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32823356A>C', 'vcf': {'chr': 'X', 'pos': '32823356', 'ref': 'A', 'alt': 'C'}} + assert results['NM_004006.2:c.296T>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} assert 'NM_004006.2:c.476T>C' in list(results.keys()) - assert results['NM_004006.2:c.476T>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.476T>C' - assert results['NM_004006.2:c.476T>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.476T>C']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.476T>C']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.476T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Phe159Ser)', 'slr': 'NP_003997.1:p.(F159S)'} assert results['NM_004006.2:c.476T>C']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' - assert results['NM_004006.2:c.476T>C']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.476T>C']['hgvs_lrg_variant'] == 'LRG_199:g.528088T>C' + assert results['NM_004006.2:c.476T>C']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.476T>C']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} assert results['NM_004006.2:c.476T>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.476T>C' + assert results['NM_004006.2:c.476T>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.476T>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004006.2:c.476T>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.528088T>C' - assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A>G', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32834639', 'alt': 'G'}} - assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A>G', 'vcf': {'chr': 'chrX', 'ref': 'A', 'pos': '32816522', 'alt': 'G'}} - assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A>G', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32834639', 'alt': 'G'}} - assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A>G', 'vcf': {'chr': 'X', 'ref': 'A', 'pos': '32816522', 'alt': 'G'}} - assert results['NM_004006.2:c.476T>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} - - assert 'NM_004006.2:c.1083A>C' in list(results.keys()) - assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1083A>C' - assert results['NM_004006.2:c.1083A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004006.2:c.1083A>C']['alt_genomic_loci'], []) - assert results['NM_004006.2:c.1083A>C']['gene_symbol'] == 'DMD' - assert results['NM_004006.2:c.1083A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Thr361=)', 'slr': 'NP_003997.1:p.(T361=)'} - assert results['NM_004006.2:c.1083A>C']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' - assert results['NM_004006.2:c.1083A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_004006.2:c.1083A>C']['hgvs_lrg_variant'] == 'LRG_199:g.699580A>C' - assert results['NM_004006.2:c.1083A>C']['hgvs_transcript_variant'] == 'NM_004006.2:c.1083A>C' - assert results['NM_004006.2:c.1083A>C']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.699580A>C' - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'chrX', 'ref': 'T', 'pos': '32663147', 'alt': 'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'chrX', 'ref': 'T', 'pos': '32645030', 'alt': 'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32663147T>G', 'vcf': {'chr': 'X', 'ref': 'T', 'pos': '32663147', 'alt': 'G'}} - assert results['NM_004006.2:c.1083A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32645030T>G', 'vcf': {'chr': 'X', 'ref': 'T', 'pos': '32645030', 'alt': 'G'}} - assert results['NM_004006.2:c.1083A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert results['NM_004006.2:c.476T>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Phe159Ser)', 'slr': 'NP_003997.1:p.(F159S)'} + assert results['NM_004006.2:c.476T>C']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.476T>C' + assert results['NM_004006.2:c.476T>C']['hgvs_lrg_variant'] == 'LRG_199:g.528088T>C' + self.assertCountEqual(results['NM_004006.2:c.476T>C']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A>G', 'vcf': {'chr': 'chrX', 'pos': '32834639', 'ref': 'A', 'alt': 'G'}} + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A>G', 'vcf': {'chr': 'chrX', 'pos': '32816522', 'ref': 'A', 'alt': 'G'}} + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32834639A>G', 'vcf': {'chr': 'X', 'pos': '32834639', 'ref': 'A', 'alt': 'G'}} + assert results['NM_004006.2:c.476T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32816522A>G', 'vcf': {'chr': 'X', 'pos': '32816522', 'ref': 'A', 'alt': 'G'}} + assert results['NM_004006.2:c.476T>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} + assert 'NM_004006.2:c.1408del' in list(results.keys()) + assert results['NM_004006.2:c.1408del']['submitted_variant'] == 'LRG_199t1:c.[296T>G];[476T>C](;)1083A>C(;)1406del' + assert results['NM_004006.2:c.1408del']['gene_symbol'] == 'DMD' + assert results['NM_004006.2:c.1408del']['gene_ids'] == {'hgnc_id': 'HGNC:2928', 'entrez_gene_id': '1756', 'ucsc_id': 'uc004dda.2', 'omim_id': ['300377']} + assert results['NM_004006.2:c.1408del']['hgvs_transcript_variant'] == 'NM_004006.2:c.1408del' + assert results['NM_004006.2:c.1408del']['genome_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.1408del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004006.2:c.1408del']['hgvs_refseqgene_variant'] == 'NG_012232.1:g.730233del' + assert results['NM_004006.2:c.1408del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003997.1(LRG_199p1):p.(Arg470GlufsTer17)', 'slr': 'NP_003997.1:p.(R470Efs*17)'} + assert results['NM_004006.2:c.1408del']['hgvs_lrg_transcript_variant'] == 'LRG_199t1:c.1408del' + assert results['NM_004006.2:c.1408del']['hgvs_lrg_variant'] == 'LRG_199:g.730233del' + self.assertCountEqual(results['NM_004006.2:c.1408del']['alt_genomic_loci'], []) + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.32632496del', 'vcf': {'chr': 'chrX', 'pos': '32632493', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32614379del', 'vcf': {'chr': 'chrX', 'pos': '32614376', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.32632496del', 'vcf': {'chr': 'X', 'pos': '32632493', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.32614379del', 'vcf': {'chr': 'X', 'pos': '32614376', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_004006.2:c.1408del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004006.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003997.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012232.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_199.xml'} def test_variant198(self): variant = 'LRG_199t1:c.[976-20T>A;976-17_976-1dup]' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'warning' assert 'validation_warning_1' in list(results.keys()) - assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' - assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) - assert results['validation_warning_1']['gene_symbol'] == '' - assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['validation_warning_1']['submitted_variant'] == 'LRG_199t1:c.[976-20T>A;976-17_976-1dup]' - assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' - assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + assert results['validation_warning_1']['gene_symbol'] == '' + assert results['validation_warning_1']['gene_ids'] == {} assert results['validation_warning_1']['hgvs_transcript_variant'] == '' + assert results['validation_warning_1']['genome_context_intronic_sequence'] == '' + assert results['validation_warning_1']['refseqgene_context_intronic_sequence'] == '' assert results['validation_warning_1']['hgvs_refseqgene_variant'] == '' + assert results['validation_warning_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['validation_warning_1']['hgvs_lrg_transcript_variant'] == '' + assert results['validation_warning_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['validation_warning_1']['alt_genomic_loci'], []) assert 'hg19' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['validation_warning_1']['primary_assembly_loci'].keys()) assert results['validation_warning_1']['reference_sequence_records'] == '' - assert results['flag'] == 'warning' - def test_variant199(self): variant = '1-5935162-A-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_015102.3:c.2818-2T>A' in list(results.keys()) - assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_015102.3:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == 'NG_011724.2(NM_015102.3):c.2818-2A=' - self.assertCountEqual(results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'], []) - assert results['NM_015102.3:c.2818-2T>A']['gene_symbol'] == 'NPHP4' - assert results['NM_015102.3:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} - assert results['NM_015102.3:c.2818-2T>A']['submitted_variant'] == '1-5935162-A-T' - assert results['NM_015102.3:c.2818-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_015102.3):c.2818-2T>A' - assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_variant'] == '' - assert results['NM_015102.3:c.2818-2T>A']['hgvs_transcript_variant'] == 'NM_015102.3:c.2818-2T>A' - assert results['NM_015102.3:c.2818-2T>A']['hgvs_refseqgene_variant'] == 'NG_011724.2:g.122370A=' - assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} - assert 'hg38' not in list(results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys()) - assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} - assert 'grch38' not in list(results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys()) - assert results['NM_015102.3:c.2818-2T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011724.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3'} + assert results['flag'] == 'gene_variant' + assert 'NR_111987.1:n.3633-2T>A' in list(results.keys()) + assert results['NR_111987.1:n.3633-2T>A']['submitted_variant'] == '1-5935162-A-T' + assert results['NR_111987.1:n.3633-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NR_111987.1:n.3633-2T>A']['gene_ids'] == {'hgnc_id': 'HGNC:19104', 'entrez_gene_id': '261734', 'ucsc_id': 'uc001alq.3', 'omim_id': ['607215']} + assert results['NR_111987.1:n.3633-2T>A']['hgvs_transcript_variant'] == 'NR_111987.1:n.3633-2T>A' + assert results['NR_111987.1:n.3633-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NR_111987.1):c.3633-2T>A' + assert results['NR_111987.1:n.3633-2T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_111987.1:n.3633-2T>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_111987.1:n.3633-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_111987.1:n.3633-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_111987.1:n.3633-2T>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_111987.1:n.3633-2T>A']['alt_genomic_loci'], []) + assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} + assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'pos': '5875102', 'ref': 'T', 'alt': 'T'}} + assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} + assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'pos': '5875102', 'ref': 'T', 'alt': 'T'}} + assert results['NR_111987.1:n.3633-2T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111987.1'} + + assert 'NM_015102.4:c.2818-2T>A' in list(results.keys()) + assert results['NM_015102.4:c.2818-2T>A']['submitted_variant'] == '1-5935162-A-T' + assert results['NM_015102.4:c.2818-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NM_015102.4:c.2818-2T>A']['gene_ids'] == {'hgnc_id': 'HGNC:19104', 'entrez_gene_id': '261734', 'ucsc_id': 'uc001alq.3', 'omim_id': ['607215']} + assert results['NM_015102.4:c.2818-2T>A']['hgvs_transcript_variant'] == 'NM_015102.4:c.2818-2T>A' + assert results['NM_015102.4:c.2818-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_015102.4):c.2818-2T>A' + assert results['NM_015102.4:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_015102.4:c.2818-2T>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_015102.4:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} + assert results['NM_015102.4:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_015102.4:c.2818-2T>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_015102.4:c.2818-2T>A']['alt_genomic_loci'], []) + assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} + assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'pos': '5875102', 'ref': 'T', 'alt': 'T'}} + assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} + assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'pos': '5875102', 'ref': 'T', 'alt': 'T'}} + assert results['NM_015102.4:c.2818-2T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1'} assert 'NM_001291593.1:c.1279-2T>A' in list(results.keys()) - assert results['NM_001291593.1:c.1279-2T>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001291593.1:c.1279-2T>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001291593.1:c.1279-2T>A']['alt_genomic_loci'], []) - assert results['NM_001291593.1:c.1279-2T>A']['gene_symbol'] == 'NPHP4' - assert results['NM_001291593.1:c.1279-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278522.1:p.?', 'slr': 'NP_001278522.1:p.?'} assert results['NM_001291593.1:c.1279-2T>A']['submitted_variant'] == '1-5935162-A-T' - assert results['NM_001291593.1:c.1279-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001291593.1):c.1279-2T>A' - assert results['NM_001291593.1:c.1279-2T>A']['hgvs_lrg_variant'] == '' + assert results['NM_001291593.1:c.1279-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NM_001291593.1:c.1279-2T>A']['gene_ids'] == {'hgnc_id': 'HGNC:19104', 'entrez_gene_id': '261734', 'ucsc_id': 'uc001alq.3', 'omim_id': ['607215']} assert results['NM_001291593.1:c.1279-2T>A']['hgvs_transcript_variant'] == 'NM_001291593.1:c.1279-2T>A' + assert results['NM_001291593.1:c.1279-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001291593.1):c.1279-2T>A' + assert results['NM_001291593.1:c.1279-2T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001291593.1:c.1279-2T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} - assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} - assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NM_001291593.1:c.1279-2T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291593.1'} + assert results['NM_001291593.1:c.1279-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278522.1:p.?', 'slr': 'NP_001278522.1:p.?'} + assert results['NM_001291593.1:c.1279-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291593.1:c.1279-2T>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001291593.1:c.1279-2T>A']['alt_genomic_loci'], []) + assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'pos': '5875102', 'ref': 'T', 'alt': 'T'}} + assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001291593.1:c.1279-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'pos': '5875102', 'ref': 'T', 'alt': 'T'}} + assert results['NM_001291593.1:c.1279-2T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291593.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278522.1'} - assert 'NM_015102.4:c.2818-2T>A' in list(results.keys()) - assert results['NM_015102.4:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_015102.4:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_015102.4:c.2818-2T>A']['alt_genomic_loci'], []) - assert results['NM_015102.4:c.2818-2T>A']['gene_symbol'] == 'NPHP4' - assert results['NM_015102.4:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} - assert results['NM_015102.4:c.2818-2T>A']['submitted_variant'] == '1-5935162-A-T' - assert results['NM_015102.4:c.2818-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_015102.4):c.2818-2T>A' - assert results['NM_015102.4:c.2818-2T>A']['hgvs_lrg_variant'] == '' - assert results['NM_015102.4:c.2818-2T>A']['hgvs_transcript_variant'] == 'NM_015102.4:c.2818-2T>A' - assert results['NM_015102.4:c.2818-2T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} - assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} - assert results['NM_015102.4:c.2818-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NM_015102.4:c.2818-2T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.4'} + assert 'NM_015102.3:c.2818-2T>A' in list(results.keys()) + assert results['NM_015102.3:c.2818-2T>A']['submitted_variant'] == '1-5935162-A-T' + assert results['NM_015102.3:c.2818-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NM_015102.3:c.2818-2T>A']['gene_ids'] == {'hgnc_id': 'HGNC:19104', 'entrez_gene_id': '261734', 'ucsc_id': 'uc001alq.3', 'omim_id': ['607215']} + assert results['NM_015102.3:c.2818-2T>A']['hgvs_transcript_variant'] == 'NM_015102.3:c.2818-2T>A' + assert results['NM_015102.3:c.2818-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_015102.3):c.2818-2T>A' + assert results['NM_015102.3:c.2818-2T>A']['refseqgene_context_intronic_sequence'] == 'NG_011724.2(NM_015102.3):c.2818-2A=' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_refseqgene_variant'] == 'NG_011724.2:g.122370A=' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055917.1:p.?', 'slr': 'NP_055917.1:p.?'} + assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_015102.3:c.2818-2T>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_015102.3:c.2818-2T>A']['alt_genomic_loci'], []) + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys()) + assert results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_015102.3:c.2818-2T>A']['primary_assembly_loci'].keys()) + assert results['NM_015102.3:c.2818-2T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015102.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055917.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011724.2'} assert 'NM_001291594.1:c.1282-2T>A' in list(results.keys()) - assert results['NM_001291594.1:c.1282-2T>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001291594.1:c.1282-2T>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001291594.1:c.1282-2T>A']['alt_genomic_loci'], []) - assert results['NM_001291594.1:c.1282-2T>A']['gene_symbol'] == 'NPHP4' - assert results['NM_001291594.1:c.1282-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278523.1:p.?', 'slr': 'NP_001278523.1:p.?'} assert results['NM_001291594.1:c.1282-2T>A']['submitted_variant'] == '1-5935162-A-T' - assert results['NM_001291594.1:c.1282-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001291594.1):c.1282-2T>A' - assert results['NM_001291594.1:c.1282-2T>A']['hgvs_lrg_variant'] == '' + assert results['NM_001291594.1:c.1282-2T>A']['gene_symbol'] == 'NPHP4' + assert results['NM_001291594.1:c.1282-2T>A']['gene_ids'] == {'hgnc_id': 'HGNC:19104', 'entrez_gene_id': '261734', 'ucsc_id': 'uc001alq.3', 'omim_id': ['607215']} assert results['NM_001291594.1:c.1282-2T>A']['hgvs_transcript_variant'] == 'NM_001291594.1:c.1282-2T>A' + assert results['NM_001291594.1:c.1282-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001291594.1):c.1282-2T>A' + assert results['NM_001291594.1:c.1282-2T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001291594.1:c.1282-2T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} - assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} - assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NM_001291594.1:c.1282-2T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278523.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291594.1'} + assert results['NM_001291594.1:c.1282-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278523.1:p.?', 'slr': 'NP_001278523.1:p.?'} + assert results['NM_001291594.1:c.1282-2T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291594.1:c.1282-2T>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001291594.1:c.1282-2T>A']['alt_genomic_loci'], []) + assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'pos': '5875102', 'ref': 'T', 'alt': 'T'}} + assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'pos': '5935162', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001291594.1:c.1282-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'pos': '5875102', 'ref': 'T', 'alt': 'T'}} + assert results['NM_001291594.1:c.1282-2T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291594.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278523.1'} + + def test_variant200(self): + variant = '1-12065948-C-T' + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() + print(results) assert results['flag'] == 'gene_variant' - assert 'NR_111987.1:n.3633-2T>A' in list(results.keys()) - assert results['NR_111987.1:n.3633-2T>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_111987.1:n.3633-2T>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_111987.1:n.3633-2T>A']['alt_genomic_loci'], []) - assert results['NR_111987.1:n.3633-2T>A']['gene_symbol'] == 'NPHP4' - assert results['NR_111987.1:n.3633-2T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_111987.1:n.3633-2T>A']['submitted_variant'] == '1-5935162-A-T' - assert results['NR_111987.1:n.3633-2T>A']['genome_context_intronic_sequence'] == 'NC_000001.10(NR_111987.1):c.3633-2T>A' - assert results['NR_111987.1:n.3633-2T>A']['hgvs_lrg_variant'] == '' - assert results['NR_111987.1:n.3633-2T>A']['hgvs_transcript_variant'] == 'NR_111987.1:n.3633-2T>A' - assert results['NR_111987.1:n.3633-2T>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': 'chr1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} - assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.5935162A>T', 'vcf': {'chr': '1', 'ref': 'A', 'pos': '5935162', 'alt': 'T'}} - assert results['NR_111987.1:n.3633-2T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.5875102T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '5875102', 'alt': 'T'}} - assert results['NR_111987.1:n.3633-2T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111987.1'} - - - def test_variant200(self): - variant = '1-12065948-C-T' - results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() - print(results) - - assert results['flag'] == 'gene_variant' - assert 'NM_001127660.1:c.1676C>T' in list(results.keys()) - assert results['NM_001127660.1:c.1676C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001127660.1:c.1676C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001127660.1:c.1676C>T']['alt_genomic_loci'], []) - assert results['NM_001127660.1:c.1676C>T']['gene_symbol'] == 'MFN2' - assert results['NM_001127660.1:c.1676C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001121132.1:p.(Pro559Leu)', 'slr': 'NP_001121132.1:p.(P559L)'} - assert results['NM_001127660.1:c.1676C>T']['submitted_variant'] == '1-12065948-C-T' - assert results['NM_001127660.1:c.1676C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001127660.1:c.1676C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001127660.1:c.1676C>T']['hgvs_transcript_variant'] == 'NM_001127660.1:c.1676C>T' - assert results['NM_001127660.1:c.1676C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '12065948', 'alt': 'T'}} - assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '12005891', 'alt': 'T'}} - assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '12065948', 'alt': 'T'}} - assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '12005891', 'alt': 'T'}} - assert results['NM_001127660.1:c.1676C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001121132.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001127660.1'} - assert 'NM_014874.3:c.1676C>T' in list(results.keys()) - assert results['NM_014874.3:c.1676C>T']['hgvs_lrg_transcript_variant'] == 'LRG_255t1:c.1676C>T' - assert results['NM_014874.3:c.1676C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014874.3:c.1676C>T']['alt_genomic_loci'], []) - assert results['NM_014874.3:c.1676C>T']['gene_symbol'] == 'MFN2' - assert results['NM_014874.3:c.1676C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055689.1(LRG_255p1):p.(Pro559Leu)', 'slr': 'NP_055689.1:p.(P559L)'} assert results['NM_014874.3:c.1676C>T']['submitted_variant'] == '1-12065948-C-T' - assert results['NM_014874.3:c.1676C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_014874.3:c.1676C>T']['hgvs_lrg_variant'] == 'LRG_255:g.30711C>T' + assert results['NM_014874.3:c.1676C>T']['gene_symbol'] == 'MFN2' + assert results['NM_014874.3:c.1676C>T']['gene_ids'] == {'hgnc_id': 'HGNC:16877', 'entrez_gene_id': '9927', 'ucsc_id': 'uc009vni.4', 'omim_id': ['608507']} assert results['NM_014874.3:c.1676C>T']['hgvs_transcript_variant'] == 'NM_014874.3:c.1676C>T' + assert results['NM_014874.3:c.1676C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_014874.3:c.1676C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014874.3:c.1676C>T']['hgvs_refseqgene_variant'] == 'NG_007945.1:g.30711C>T' - assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '12065948', 'alt': 'T'}} - assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '12005891', 'alt': 'T'}} - assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '12065948', 'alt': 'T'}} - assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '12005891', 'alt': 'T'}} - assert results['NM_014874.3:c.1676C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007945.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055689.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014874.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_255.xml'} + assert results['NM_014874.3:c.1676C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055689.1(LRG_255p1):p.(Pro559Leu)', 'slr': 'NP_055689.1:p.(P559L)'} + assert results['NM_014874.3:c.1676C>T']['hgvs_lrg_transcript_variant'] == 'LRG_255t1:c.1676C>T' + assert results['NM_014874.3:c.1676C>T']['hgvs_lrg_variant'] == 'LRG_255:g.30711C>T' + self.assertCountEqual(results['NM_014874.3:c.1676C>T']['alt_genomic_loci'], []) + assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': 'chr1', 'pos': '12065948', 'ref': 'C', 'alt': 'T'}} + assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': 'chr1', 'pos': '12005891', 'ref': 'C', 'alt': 'T'}} + assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': '1', 'pos': '12065948', 'ref': 'C', 'alt': 'T'}} + assert results['NM_014874.3:c.1676C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': '1', 'pos': '12005891', 'ref': 'C', 'alt': 'T'}} + assert results['NM_014874.3:c.1676C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014874.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055689.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007945.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_255.xml'} + assert 'NM_001127660.1:c.1676C>T' in list(results.keys()) + assert results['NM_001127660.1:c.1676C>T']['submitted_variant'] == '1-12065948-C-T' + assert results['NM_001127660.1:c.1676C>T']['gene_symbol'] == 'MFN2' + assert results['NM_001127660.1:c.1676C>T']['gene_ids'] == {'hgnc_id': 'HGNC:16877', 'entrez_gene_id': '9927', 'ucsc_id': 'uc009vni.4', 'omim_id': ['608507']} + assert results['NM_001127660.1:c.1676C>T']['hgvs_transcript_variant'] == 'NM_001127660.1:c.1676C>T' + assert results['NM_001127660.1:c.1676C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001127660.1:c.1676C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001127660.1:c.1676C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001127660.1:c.1676C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001121132.1:p.(Pro559Leu)', 'slr': 'NP_001121132.1:p.(P559L)'} + assert results['NM_001127660.1:c.1676C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001127660.1:c.1676C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001127660.1:c.1676C>T']['alt_genomic_loci'], []) + assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': 'chr1', 'pos': '12065948', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': 'chr1', 'pos': '12005891', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.12065948C>T', 'vcf': {'chr': '1', 'pos': '12065948', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001127660.1:c.1676C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.12005891C>T', 'vcf': {'chr': '1', 'pos': '12005891', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001127660.1:c.1676C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001127660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001121132.1'} def test_variant201(self): variant = '1-46655125-CTCAC-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001290129.1:c.1829+5_1829+8del' in list(results.keys()) - assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001290129.1:c.1829+5_1829+8del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001290129.1:c.1829+5_1829+8del']['alt_genomic_loci'], []) - assert results['NM_001290129.1:c.1829+5_1829+8del']['gene_symbol'] == 'POMGNT1' - assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001277058.1:p.?', 'slr': 'NP_001277058.1:p.?'} - assert results['NM_001290129.1:c.1829+5_1829+8del']['submitted_variant'] == '1-46655125-CTCAC-C' - assert results['NM_001290129.1:c.1829+5_1829+8del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001290129.1):c.1829+5_1829+8del' - assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_lrg_variant'] == '' - assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_transcript_variant'] == 'NM_001290129.1:c.1829+5_1829+8del' - assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_001290129.1:c.1829+5_1829+8del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277058.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290129.1'} - - assert 'NM_001290130.1:c.1466+5_1466+8del' in list(results.keys()) - assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001290130.1:c.1466+5_1466+8del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001290130.1:c.1466+5_1466+8del']['alt_genomic_loci'], []) - assert results['NM_001290130.1:c.1466+5_1466+8del']['gene_symbol'] == 'POMGNT1' - assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001277059.1:p.?', 'slr': 'NP_001277059.1:p.?'} - assert results['NM_001290130.1:c.1466+5_1466+8del']['submitted_variant'] == '1-46655125-CTCAC-C' - assert results['NM_001290130.1:c.1466+5_1466+8del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001290130.1):c.1466+5_1466+8del' - assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_lrg_variant'] == '' - assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_transcript_variant'] == 'NM_001290130.1:c.1466+5_1466+8del' - assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_001290130.1:c.1466+5_1466+8del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277059.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290130.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_001243766.1:c.1869+31_1869+34del' in list(results.keys()) + assert results['NM_001243766.1:c.1869+31_1869+34del']['submitted_variant'] == '1-46655125-CTCAC-C' + assert results['NM_001243766.1:c.1869+31_1869+34del']['gene_symbol'] == 'POMGNT1' + assert results['NM_001243766.1:c.1869+31_1869+34del']['gene_ids'] == {'hgnc_id': 'HGNC:19139', 'entrez_gene_id': '55624', 'ucsc_id': 'uc001cpe.4', 'omim_id': ['606822']} + assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_transcript_variant'] == 'NM_001243766.1:c.1869+31_1869+34del' + assert results['NM_001243766.1:c.1869+31_1869+34del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001243766.1):c.1869+31_1869+34del' + assert results['NM_001243766.1:c.1869+31_1869+34del']['refseqgene_context_intronic_sequence'] == 'NG_009205.2(NM_001243766.1):c.1869+31_1869+34del' + assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_refseqgene_variant'] == 'NG_009205.2:g.35853_35856del' + assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230695.1(LRG_701p1):p.?', 'slr': 'NP_001230695.1:p.?'} + assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_lrg_transcript_variant'] == 'LRG_701t1:c.1869+31_1869+34del' + assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_lrg_variant'] == 'LRG_701:g.35853_35856del' + self.assertCountEqual(results['NM_001243766.1:c.1869+31_1869+34del']['alt_genomic_loci'], []) + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'pos': '46655121', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'pos': '46189449', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'pos': '46655121', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'pos': '46189449', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001243766.1:c.1869+31_1869+34del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243766.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230695.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009205.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_701.xml'} assert 'NM_017739.3:c.1895+5_1895+8del' in list(results.keys()) - assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_lrg_transcript_variant'] == 'LRG_701t2:c.1895+5_1895+8del' - assert results['NM_017739.3:c.1895+5_1895+8del']['refseqgene_context_intronic_sequence'] == 'NG_009205.2(NM_017739.3):c.1895+5_1895+8del' - self.assertCountEqual(results['NM_017739.3:c.1895+5_1895+8del']['alt_genomic_loci'], []) - assert results['NM_017739.3:c.1895+5_1895+8del']['gene_symbol'] == 'POMGNT1' - assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_060209.3(LRG_701p2):p.?', 'slr': 'NP_060209.3:p.?'} assert results['NM_017739.3:c.1895+5_1895+8del']['submitted_variant'] == '1-46655125-CTCAC-C' - assert results['NM_017739.3:c.1895+5_1895+8del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_017739.3):c.1895+5_1895+8del' - assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_lrg_variant'] == 'LRG_701:g.35853_35856del' + assert results['NM_017739.3:c.1895+5_1895+8del']['gene_symbol'] == 'POMGNT1' + assert results['NM_017739.3:c.1895+5_1895+8del']['gene_ids'] == {'hgnc_id': 'HGNC:19139', 'entrez_gene_id': '55624', 'ucsc_id': 'uc001cpe.4', 'omim_id': ['606822']} assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_transcript_variant'] == 'NM_017739.3:c.1895+5_1895+8del' + assert results['NM_017739.3:c.1895+5_1895+8del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_017739.3):c.1895+5_1895+8del' + assert results['NM_017739.3:c.1895+5_1895+8del']['refseqgene_context_intronic_sequence'] == 'NG_009205.2(NM_017739.3):c.1895+5_1895+8del' assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_refseqgene_variant'] == 'NG_009205.2:g.35853_35856del' - assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_017739.3:c.1895+5_1895+8del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009205.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_060209.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_017739.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_701.xml'} + assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_060209.3(LRG_701p2):p.?', 'slr': 'NP_060209.3:p.?'} + assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_lrg_transcript_variant'] == 'LRG_701t2:c.1895+5_1895+8del' + assert results['NM_017739.3:c.1895+5_1895+8del']['hgvs_lrg_variant'] == 'LRG_701:g.35853_35856del' + self.assertCountEqual(results['NM_017739.3:c.1895+5_1895+8del']['alt_genomic_loci'], []) + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'pos': '46655121', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'pos': '46189449', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'pos': '46655121', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'pos': '46189449', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_017739.3:c.1895+5_1895+8del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_017739.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_060209.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009205.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_701.xml'} - assert 'NM_001243766.1:c.1869+31_1869+34del' in list(results.keys()) - assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_lrg_transcript_variant'] == 'LRG_701t1:c.1869+31_1869+34del' - assert results['NM_001243766.1:c.1869+31_1869+34del']['refseqgene_context_intronic_sequence'] == 'NG_009205.2(NM_001243766.1):c.1869+31_1869+34del' - self.assertCountEqual(results['NM_001243766.1:c.1869+31_1869+34del']['alt_genomic_loci'], []) - assert results['NM_001243766.1:c.1869+31_1869+34del']['gene_symbol'] == 'POMGNT1' - assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230695.1(LRG_701p1):p.?', 'slr': 'NP_001230695.1:p.?'} - assert results['NM_001243766.1:c.1869+31_1869+34del']['submitted_variant'] == '1-46655125-CTCAC-C' - assert results['NM_001243766.1:c.1869+31_1869+34del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001243766.1):c.1869+31_1869+34del' - assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_lrg_variant'] == 'LRG_701:g.35853_35856del' - assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_transcript_variant'] == 'NM_001243766.1:c.1869+31_1869+34del' - assert results['NM_001243766.1:c.1869+31_1869+34del']['hgvs_refseqgene_variant'] == 'NG_009205.2:g.35853_35856del' - assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46655121', 'alt': 'G'}} - assert results['NM_001243766.1:c.1869+31_1869+34del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'ref': 'GTCAC', 'pos': '46189449', 'alt': 'G'}} - assert results['NM_001243766.1:c.1869+31_1869+34del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009205.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230695.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243766.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_701.xml'} + assert 'NM_001290130.1:c.1466+5_1466+8del' in list(results.keys()) + assert results['NM_001290130.1:c.1466+5_1466+8del']['submitted_variant'] == '1-46655125-CTCAC-C' + assert results['NM_001290130.1:c.1466+5_1466+8del']['gene_symbol'] == 'POMGNT1' + assert results['NM_001290130.1:c.1466+5_1466+8del']['gene_ids'] == {'hgnc_id': 'HGNC:19139', 'entrez_gene_id': '55624', 'ucsc_id': 'uc001cpe.4', 'omim_id': ['606822']} + assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_transcript_variant'] == 'NM_001290130.1:c.1466+5_1466+8del' + assert results['NM_001290130.1:c.1466+5_1466+8del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001290130.1):c.1466+5_1466+8del' + assert results['NM_001290130.1:c.1466+5_1466+8del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001277059.1:p.?', 'slr': 'NP_001277059.1:p.?'} + assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001290130.1:c.1466+5_1466+8del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001290130.1:c.1466+5_1466+8del']['alt_genomic_loci'], []) + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'pos': '46655121', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'pos': '46189449', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'pos': '46655121', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'pos': '46189449', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001290130.1:c.1466+5_1466+8del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290130.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277059.1'} - assert results['flag'] == 'gene_variant' + assert 'NM_001290129.1:c.1829+5_1829+8del' in list(results.keys()) + assert results['NM_001290129.1:c.1829+5_1829+8del']['submitted_variant'] == '1-46655125-CTCAC-C' + assert results['NM_001290129.1:c.1829+5_1829+8del']['gene_symbol'] == 'POMGNT1' + assert results['NM_001290129.1:c.1829+5_1829+8del']['gene_ids'] == {'hgnc_id': 'HGNC:19139', 'entrez_gene_id': '55624', 'ucsc_id': 'uc001cpe.4', 'omim_id': ['606822']} + assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_transcript_variant'] == 'NM_001290129.1:c.1829+5_1829+8del' + assert results['NM_001290129.1:c.1829+5_1829+8del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001290129.1):c.1829+5_1829+8del' + assert results['NM_001290129.1:c.1829+5_1829+8del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001277058.1:p.?', 'slr': 'NP_001277058.1:p.?'} + assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001290129.1:c.1829+5_1829+8del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001290129.1:c.1829+5_1829+8del']['alt_genomic_loci'], []) + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': 'chr1', 'pos': '46655121', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': 'chr1', 'pos': '46189449', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.46655126_46655129del', 'vcf': {'chr': '1', 'pos': '46655121', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.46189454_46189457del', 'vcf': {'chr': '1', 'pos': '46189449', 'ref': 'GTCAC', 'alt': 'G'}} + assert results['NM_001290129.1:c.1829+5_1829+8del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001290129.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001277058.1'} def test_variant202(self): variant = '1-68912523-TGAGCCAGAG-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000329.2:c.106_114del' in list(results.keys()) - assert results['NM_000329.2:c.106_114del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000329.2:c.106_114del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000329.2:c.106_114del']['alt_genomic_loci'], []) - assert results['NM_000329.2:c.106_114del']['gene_symbol'] == 'RPE65' - assert results['NM_000329.2:c.106_114del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000320.1:p.(Leu36_Leu38del)', 'slr': 'NP_000320.1:p.(L36_L38del)'} assert results['NM_000329.2:c.106_114del']['submitted_variant'] == '1-68912523-TGAGCCAGAG-T' - assert results['NM_000329.2:c.106_114del']['genome_context_intronic_sequence'] == '' - assert results['NM_000329.2:c.106_114del']['hgvs_lrg_variant'] == '' + assert results['NM_000329.2:c.106_114del']['gene_symbol'] == 'RPE65' + assert results['NM_000329.2:c.106_114del']['gene_ids'] == {'hgnc_id': 'HGNC:10294', 'entrez_gene_id': '6121', 'ucsc_id': 'uc001dei.2', 'omim_id': ['180069']} assert results['NM_000329.2:c.106_114del']['hgvs_transcript_variant'] == 'NM_000329.2:c.106_114del' + assert results['NM_000329.2:c.106_114del']['genome_context_intronic_sequence'] == '' + assert results['NM_000329.2:c.106_114del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000329.2:c.106_114del']['hgvs_refseqgene_variant'] == 'NG_008472.1:g.8111_8119del' - assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912525_68912533del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCAGAG', 'pos': '68912523', 'alt': 'T'}} - assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446842_68446850del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCAGAG', 'pos': '68446840', 'alt': 'T'}} - assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912525_68912533del', 'vcf': {'chr': '1', 'ref': 'TGAGCCAGAG', 'pos': '68912523', 'alt': 'T'}} - assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446842_68446850del', 'vcf': {'chr': '1', 'ref': 'TGAGCCAGAG', 'pos': '68446840', 'alt': 'T'}} - assert results['NM_000329.2:c.106_114del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008472.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000329.2:c.106_114del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000320.1:p.(Leu36_Leu38del)', 'slr': 'NP_000320.1:p.(L36_L38del)'} + assert results['NM_000329.2:c.106_114del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000329.2:c.106_114del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000329.2:c.106_114del']['alt_genomic_loci'], []) + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912525_68912533del', 'vcf': {'chr': 'chr1', 'pos': '68912523', 'ref': 'TGAGCCAGAG', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446842_68446850del', 'vcf': {'chr': 'chr1', 'pos': '68446840', 'ref': 'TGAGCCAGAG', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912525_68912533del', 'vcf': {'chr': '1', 'pos': '68912523', 'ref': 'TGAGCCAGAG', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446842_68446850del', 'vcf': {'chr': '1', 'pos': '68446840', 'ref': 'TGAGCCAGAG', 'alt': 'T'}} + assert results['NM_000329.2:c.106_114del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008472.1'} def test_variant203(self): variant = '1-68912526-GCCAGAG-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000329.2:c.109_114del' in list(results.keys()) - assert results['NM_000329.2:c.109_114del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000329.2:c.109_114del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000329.2:c.109_114del']['alt_genomic_loci'], []) - assert results['NM_000329.2:c.109_114del']['gene_symbol'] == 'RPE65' - assert results['NM_000329.2:c.109_114del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000320.1:p.(Trp37_Leu38del)', 'slr': 'NP_000320.1:p.(W37_L38del)'} assert results['NM_000329.2:c.109_114del']['submitted_variant'] == '1-68912526-GCCAGAG-G' - assert results['NM_000329.2:c.109_114del']['genome_context_intronic_sequence'] == '' - assert results['NM_000329.2:c.109_114del']['hgvs_lrg_variant'] == '' + assert results['NM_000329.2:c.109_114del']['gene_symbol'] == 'RPE65' + assert results['NM_000329.2:c.109_114del']['gene_ids'] == {'hgnc_id': 'HGNC:10294', 'entrez_gene_id': '6121', 'ucsc_id': 'uc001dei.2', 'omim_id': ['180069']} assert results['NM_000329.2:c.109_114del']['hgvs_transcript_variant'] == 'NM_000329.2:c.109_114del' + assert results['NM_000329.2:c.109_114del']['genome_context_intronic_sequence'] == '' + assert results['NM_000329.2:c.109_114del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000329.2:c.109_114del']['hgvs_refseqgene_variant'] == 'NG_008472.1:g.8114_8119del' - assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912527_68912532del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCA', 'pos': '68912523', 'alt': 'T'}} - assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446844_68446849del', 'vcf': {'chr': 'chr1', 'ref': 'TGAGCCA', 'pos': '68446840', 'alt': 'T'}} - assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912527_68912532del', 'vcf': {'chr': '1', 'ref': 'TGAGCCA', 'pos': '68912523', 'alt': 'T'}} - assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446844_68446849del', 'vcf': {'chr': '1', 'ref': 'TGAGCCA', 'pos': '68446840', 'alt': 'T'}} - assert results['NM_000329.2:c.109_114del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008472.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000329.2:c.109_114del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000320.1:p.(Trp37_Leu38del)', 'slr': 'NP_000320.1:p.(W37_L38del)'} + assert results['NM_000329.2:c.109_114del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000329.2:c.109_114del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000329.2:c.109_114del']['alt_genomic_loci'], []) + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912527_68912532del', 'vcf': {'chr': 'chr1', 'pos': '68912523', 'ref': 'TGAGCCA', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446844_68446849del', 'vcf': {'chr': 'chr1', 'pos': '68446840', 'ref': 'TGAGCCA', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.68912527_68912532del', 'vcf': {'chr': '1', 'pos': '68912523', 'ref': 'TGAGCCA', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.68446844_68446849del', 'vcf': {'chr': '1', 'pos': '68446840', 'ref': 'TGAGCCA', 'alt': 'T'}} + assert results['NM_000329.2:c.109_114del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000329.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000320.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008472.1'} def test_variant204(self): variant = '1-109817590-G-T' @@ -7320,22 +7460,22 @@ def test_variant204(self): assert results['flag'] == 'gene_variant' assert 'NM_001408.2:c.*919G>T' in list(results.keys()) - assert results['NM_001408.2:c.*919G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001408.2:c.*919G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001408.2:c.*919G>T']['alt_genomic_loci'], []) - assert results['NM_001408.2:c.*919G>T']['gene_symbol'] == 'CELSR2' - assert results['NM_001408.2:c.*919G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001399.1:p.?', 'slr': 'NP_001399.1:p.?'} assert results['NM_001408.2:c.*919G>T']['submitted_variant'] == '1-109817590-G-T' - assert results['NM_001408.2:c.*919G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001408.2:c.*919G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001408.2:c.*919G>T']['gene_symbol'] == 'CELSR2' + assert results['NM_001408.2:c.*919G>T']['gene_ids'] == {'hgnc_id': 'HGNC:3231', 'entrez_gene_id': '1952', 'ucsc_id': 'uc001dxa.5', 'omim_id': ['604265']} assert results['NM_001408.2:c.*919G>T']['hgvs_transcript_variant'] == 'NM_001408.2:c.*919G>T' + assert results['NM_001408.2:c.*919G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001408.2:c.*919G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001408.2:c.*919G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.109817590G>T', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '109817590', 'alt': 'T'}} - assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.109274968G>T', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '109274968', 'alt': 'T'}} - assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.109817590G>T', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '109817590', 'alt': 'T'}} - assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.109274968G>T', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '109274968', 'alt': 'T'}} - assert results['NM_001408.2:c.*919G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001408.2'} - + assert results['NM_001408.2:c.*919G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001399.1:p.?', 'slr': 'NP_001399.1:p.?'} + assert results['NM_001408.2:c.*919G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001408.2:c.*919G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001408.2:c.*919G>T']['alt_genomic_loci'], []) + assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.109817590G>T', 'vcf': {'chr': 'chr1', 'pos': '109817590', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.109274968G>T', 'vcf': {'chr': 'chr1', 'pos': '109274968', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.109817590G>T', 'vcf': {'chr': '1', 'pos': '109817590', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001408.2:c.*919G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.109274968G>T', 'vcf': {'chr': '1', 'pos': '109274968', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001408.2:c.*919G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001408.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001399.1'} def test_variant205(self): variant = '1-145597475-GAAGT-G' @@ -7344,56 +7484,58 @@ def test_variant205(self): assert results['flag'] == 'gene_variant' assert 'NM_006468.6:c.1070+35_1070+38del' in list(results.keys()) - assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_006468.6:c.1070+35_1070+38del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_006468.6:c.1070+35_1070+38del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}]) - assert results['NM_006468.6:c.1070+35_1070+38del']['gene_symbol'] == 'POLR3C' - assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006459.3:p.?', 'slr': 'NP_006459.3:p.?'} assert results['NM_006468.6:c.1070+35_1070+38del']['submitted_variant'] == '1-145597475-GAAGT-G' - assert results['NM_006468.6:c.1070+35_1070+38del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_006468.6):c.1070+35_1070+38del' - assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_lrg_variant'] == '' + assert results['NM_006468.6:c.1070+35_1070+38del']['gene_symbol'] == 'POLR3C' + assert results['NM_006468.6:c.1070+35_1070+38del']['gene_ids'] == {'hgnc_id': 'HGNC:30076', 'entrez_gene_id': '10623', 'ucsc_id': 'uc001eoh.3', 'omim_id': ['617454']} assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_transcript_variant'] == 'NM_006468.6:c.1070+35_1070+38del' + assert results['NM_006468.6:c.1070+35_1070+38del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_006468.6):c.1070+35_1070+38del' + assert results['NM_006468.6:c.1070+35_1070+38del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_refseqgene_variant'] == '' - assert results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': 'chr1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} + assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006459.3:p.?', 'slr': 'NP_006459.3:p.?'} + assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006468.6:c.1070+35_1070+38del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_006468.6:c.1070+35_1070+38del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'pos': '2653042', 'ref': 'ATACT', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'pos': '2653042', 'ref': 'ATACT', 'alt': 'A'}}}]) + assert results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': 'chr1', 'pos': '145597475', 'ref': 'GAAGT', 'alt': 'G'}} assert 'hg38' not in list(results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci'].keys()) - assert results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': '1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} + assert results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': '1', 'pos': '145597475', 'ref': 'GAAGT', 'alt': 'G'}} assert 'grch38' not in list(results['NM_006468.6:c.1070+35_1070+38del']['primary_assembly_loci'].keys()) - assert results['NM_006468.6:c.1070+35_1070+38del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006459.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006468.6'} + assert results['NM_006468.6:c.1070+35_1070+38del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006468.6', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006459.3'} assert 'NM_001303456.1:c.1109+35_1109+38del' in list(results.keys()) - assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001303456.1:c.1109+35_1109+38del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001303456.1:c.1109+35_1109+38del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}]) - assert results['NM_001303456.1:c.1109+35_1109+38del']['gene_symbol'] == 'POLR3C' - assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001290385.1:p.?', 'slr': 'NP_001290385.1:p.?'} assert results['NM_001303456.1:c.1109+35_1109+38del']['submitted_variant'] == '1-145597475-GAAGT-G' - assert results['NM_001303456.1:c.1109+35_1109+38del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001303456.1):c.1109+35_1109+38del' - assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_lrg_variant'] == '' + assert results['NM_001303456.1:c.1109+35_1109+38del']['gene_symbol'] == 'POLR3C' + assert results['NM_001303456.1:c.1109+35_1109+38del']['gene_ids'] == {'hgnc_id': 'HGNC:30076', 'entrez_gene_id': '10623', 'ucsc_id': 'uc001eoh.3', 'omim_id': ['617454']} assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_transcript_variant'] == 'NM_001303456.1:c.1109+35_1109+38del' + assert results['NM_001303456.1:c.1109+35_1109+38del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001303456.1):c.1109+35_1109+38del' + assert results['NM_001303456.1:c.1109+35_1109+38del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': 'chr1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} - assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': 'chr1', 'ref': 'ATACT', 'pos': '145837629', 'alt': 'A'}} - assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': '1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} - assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': '1', 'ref': 'ATACT', 'pos': '145837629', 'alt': 'A'}} - assert results['NM_001303456.1:c.1109+35_1109+38del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001290385.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001303456.1'} + assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001290385.1:p.?', 'slr': 'NP_001290385.1:p.?'} + assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001303456.1:c.1109+35_1109+38del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001303456.1:c.1109+35_1109+38del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'pos': '2653042', 'ref': 'ATACT', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'pos': '2653042', 'ref': 'ATACT', 'alt': 'A'}}}]) + assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': 'chr1', 'pos': '145597475', 'ref': 'GAAGT', 'alt': 'G'}} + assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': 'chr1', 'pos': '145837629', 'ref': 'ATACT', 'alt': 'A'}} + assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': '1', 'pos': '145597475', 'ref': 'GAAGT', 'alt': 'G'}} + assert results['NM_001303456.1:c.1109+35_1109+38del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': '1', 'pos': '145837629', 'ref': 'ATACT', 'alt': 'A'}} + assert results['NM_001303456.1:c.1109+35_1109+38del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001303456.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001290385.1'} assert 'NM_006468.7:c.1070+35_1070+38del' in list(results.keys()) - assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_006468.7:c.1070+35_1070+38del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_006468.7:c.1070+35_1070+38del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'ref': 'ATACT', 'pos': '2653042', 'alt': 'A'}}}]) - assert results['NM_006468.7:c.1070+35_1070+38del']['gene_symbol'] == 'POLR3C' - assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006459.3:p.?', 'slr': 'NP_006459.3:p.?'} assert results['NM_006468.7:c.1070+35_1070+38del']['submitted_variant'] == '1-145597475-GAAGT-G' - assert results['NM_006468.7:c.1070+35_1070+38del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_006468.7):c.1070+35_1070+38del' - assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_lrg_variant'] == '' + assert results['NM_006468.7:c.1070+35_1070+38del']['gene_symbol'] == 'POLR3C' + assert results['NM_006468.7:c.1070+35_1070+38del']['gene_ids'] == {'hgnc_id': 'HGNC:30076', 'entrez_gene_id': '10623', 'ucsc_id': 'uc001eoh.3', 'omim_id': ['617454']} assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_transcript_variant'] == 'NM_006468.7:c.1070+35_1070+38del' + assert results['NM_006468.7:c.1070+35_1070+38del']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_006468.7):c.1070+35_1070+38del' + assert results['NM_006468.7:c.1070+35_1070+38del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_refseqgene_variant'] == '' - assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': 'chr1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} - assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': 'chr1', 'ref': 'ATACT', 'pos': '145837629', 'alt': 'A'}} - assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': '1', 'ref': 'GAAGT', 'pos': '145597475', 'alt': 'G'}} - assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': '1', 'ref': 'ATACT', 'pos': '145837629', 'alt': 'A'}} - assert results['NM_006468.7:c.1070+35_1070+38del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006459.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006468.7'} - + assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006459.3:p.?', 'slr': 'NP_006459.3:p.?'} + assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006468.7:c.1070+35_1070+38del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_006468.7:c.1070+35_1070+38del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'HG1287_PATCH', 'pos': '2653042', 'ref': 'ATACT', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871055.3:g.2653044_2653047del', 'vcf': {'chr': 'NW_003871055.3', 'pos': '2653042', 'ref': 'ATACT', 'alt': 'A'}}}]) + assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': 'chr1', 'pos': '145597475', 'ref': 'GAAGT', 'alt': 'G'}} + assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': 'chr1', 'pos': '145837629', 'ref': 'ATACT', 'alt': 'A'}} + assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.145597477_145597480del', 'vcf': {'chr': '1', 'pos': '145597475', 'ref': 'GAAGT', 'alt': 'G'}} + assert results['NM_006468.7:c.1070+35_1070+38del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.145837631_145837634del', 'vcf': {'chr': '1', 'pos': '145837629', 'ref': 'ATACT', 'alt': 'A'}} + assert results['NM_006468.7:c.1070+35_1070+38del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006468.7', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006459.3'} def test_variant206(self): variant = '1-153791300-CTG-C' @@ -7402,274 +7544,286 @@ def test_variant206(self): assert results['flag'] == 'gene_variant' assert 'NM_020699.2:c.562_563del' in list(results.keys()) - assert results['NM_020699.2:c.562_563del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020699.2:c.562_563del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020699.2:c.562_563del']['alt_genomic_loci'], []) - assert results['NM_020699.2:c.562_563del']['gene_symbol'] == 'GATAD2B' - assert results['NM_020699.2:c.562_563del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Gln188GlufsTer36)', 'slr': 'NP_065750.1:p.(Q188Efs*36)'} assert results['NM_020699.2:c.562_563del']['submitted_variant'] == '1-153791300-CTG-C' - assert results['NM_020699.2:c.562_563del']['genome_context_intronic_sequence'] == '' - assert results['NM_020699.2:c.562_563del']['hgvs_lrg_variant'] == '' + assert results['NM_020699.2:c.562_563del']['gene_symbol'] == 'GATAD2B' + assert results['NM_020699.2:c.562_563del']['gene_ids'] == {'hgnc_id': 'HGNC:30778', 'entrez_gene_id': '57459', 'ucsc_id': 'uc001fdb.5', 'omim_id': ['614998']} assert results['NM_020699.2:c.562_563del']['hgvs_transcript_variant'] == 'NM_020699.2:c.562_563del' + assert results['NM_020699.2:c.562_563del']['genome_context_intronic_sequence'] == '' + assert results['NM_020699.2:c.562_563del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020699.2:c.562_563del']['hgvs_refseqgene_variant'] == '' - assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} - assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} - assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} - assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} - assert results['NM_020699.2:c.562_563del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2'} + assert results['NM_020699.2:c.562_563del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Gln188GlufsTer36)', 'slr': 'NP_065750.1:p.(Q188Efs*36)'} + assert results['NM_020699.2:c.562_563del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020699.2:c.562_563del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020699.2:c.562_563del']['alt_genomic_loci'], []) + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': 'chr1', 'pos': '153791300', 'ref': 'CTG', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': 'chr1', 'pos': '153818824', 'ref': 'CTG', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': '1', 'pos': '153791300', 'ref': 'CTG', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': '1', 'pos': '153818824', 'ref': 'CTG', 'alt': 'C'}} + assert results['NM_020699.2:c.562_563del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1'} assert 'NM_020699.3:c.562_563del' in list(results.keys()) - assert results['NM_020699.3:c.562_563del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020699.3:c.562_563del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020699.3:c.562_563del']['alt_genomic_loci'], []) - assert results['NM_020699.3:c.562_563del']['gene_symbol'] == 'GATAD2B' - assert results['NM_020699.3:c.562_563del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Gln188GlufsTer36)', 'slr': 'NP_065750.1:p.(Q188Efs*36)'} assert results['NM_020699.3:c.562_563del']['submitted_variant'] == '1-153791300-CTG-C' - assert results['NM_020699.3:c.562_563del']['genome_context_intronic_sequence'] == '' - assert results['NM_020699.3:c.562_563del']['hgvs_lrg_variant'] == '' + assert results['NM_020699.3:c.562_563del']['gene_symbol'] == 'GATAD2B' + assert results['NM_020699.3:c.562_563del']['gene_ids'] == {'hgnc_id': 'HGNC:30778', 'entrez_gene_id': '57459', 'ucsc_id': 'uc001fdb.5', 'omim_id': ['614998']} assert results['NM_020699.3:c.562_563del']['hgvs_transcript_variant'] == 'NM_020699.3:c.562_563del' + assert results['NM_020699.3:c.562_563del']['genome_context_intronic_sequence'] == '' + assert results['NM_020699.3:c.562_563del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020699.3:c.562_563del']['hgvs_refseqgene_variant'] == '' - assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} - assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': 'chr1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} - assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153791300', 'alt': 'C'}} - assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': '1', 'ref': 'CTG', 'pos': '153818824', 'alt': 'C'}} - assert results['NM_020699.3:c.562_563del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.3'} - + assert results['NM_020699.3:c.562_563del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Gln188GlufsTer36)', 'slr': 'NP_065750.1:p.(Q188Efs*36)'} + assert results['NM_020699.3:c.562_563del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020699.3:c.562_563del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020699.3:c.562_563del']['alt_genomic_loci'], []) + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': 'chr1', 'pos': '153791300', 'ref': 'CTG', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': 'chr1', 'pos': '153818824', 'ref': 'CTG', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153791302_153791303del', 'vcf': {'chr': '1', 'pos': '153791300', 'ref': 'CTG', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153818826_153818827del', 'vcf': {'chr': '1', 'pos': '153818824', 'ref': 'CTG', 'alt': 'C'}} + assert results['NM_020699.3:c.562_563del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1'} def test_variant207(self): variant = '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_005572.3:c.711_734delinsCCCC' in list(results.keys()) - assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == 'LRG_254t1:c.711_734delinsCCCC' - assert results['NM_005572.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005572.3:c.711_734delinsCCCC']['alt_genomic_loci'], []) - assert results['NM_005572.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' - assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005563.1(LRG_254p1):p.(Glu238ProfsTer9)', 'slr': 'NP_005563.1:p.(E238Pfs*9)'} assert results['NM_005572.3:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' - assert results['NM_005572.3:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' - assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_lrg_variant'] == 'LRG_254:g.57304_57327delinsCCCC' + assert results['NM_005572.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_005572.3:c.711_734delinsCCCC']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_transcript_variant'] == 'NM_005572.3:c.711_734delinsCCCC' + assert results['NM_005572.3:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_005572.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_refseqgene_variant'] == 'NG_008692.2:g.57304_57327delinsCCCC' - assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_005572.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008692.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005563.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005572.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_254.xml'} + assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005563.1(LRG_254p1):p.(Glu238ProfsTer9)', 'slr': 'NP_005563.1:p.(E238Pfs*9)'} + assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == 'LRG_254t1:c.711_734delinsCCCC' + assert results['NM_005572.3:c.711_734delinsCCCC']['hgvs_lrg_variant'] == 'LRG_254:g.57304_57327delinsCCCC' + self.assertCountEqual(results['NM_005572.3:c.711_734delinsCCCC']['alt_genomic_loci'], []) + assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_005572.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_005572.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005572.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005563.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008692.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_254.xml'} - assert 'NM_001257374.1:c.375_398delinsCCCC' in list(results.keys()) - assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001257374.1:c.375_398delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001257374.1:c.375_398delinsCCCC']['alt_genomic_loci'], []) - assert results['NM_001257374.1:c.375_398delinsCCCC']['gene_symbol'] == 'LMNA' - assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Glu126ProfsTer9)', 'slr': 'NP_001244303.1:p.(E126Pfs*9)'} - assert results['NM_001257374.1:c.375_398delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' - assert results['NM_001257374.1:c.375_398delinsCCCC']['genome_context_intronic_sequence'] == '' - assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_lrg_variant'] == '' - assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_transcript_variant'] == 'NM_001257374.1:c.375_398delinsCCCC' - assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_refseqgene_variant'] == '' - assert results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert 'hg38' not in list(results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci'].keys()) - assert results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert 'grch38' not in list(results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci'].keys()) - assert results['NM_001257374.1:c.375_398delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1'} + assert 'NM_170707.3:c.711_734delinsCCCC' in list(results.keys()) + assert results['NM_170707.3:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + assert results['NM_170707.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_170707.3:c.711_734delinsCCCC']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} + assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_transcript_variant'] == 'NM_170707.3:c.711_734delinsCCCC' + assert results['NM_170707.3:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_170707.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_refseqgene_variant'] == '' + assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733821.1(LRG_254p2):p.(Glu238ProfsTer9)', 'slr': 'NP_733821.1:p.(E238Pfs*9)'} + assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_170707.3:c.711_734delinsCCCC']['alt_genomic_loci'], []) + assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_170707.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1'} assert 'NM_001257374.2:c.375_398delinsCCCC' in list(results.keys()) - assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001257374.2:c.375_398delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001257374.2:c.375_398delinsCCCC']['alt_genomic_loci'], []) - assert results['NM_001257374.2:c.375_398delinsCCCC']['gene_symbol'] == 'LMNA' - assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Glu126ProfsTer9)', 'slr': 'NP_001244303.1:p.(E126Pfs*9)'} assert results['NM_001257374.2:c.375_398delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' - assert results['NM_001257374.2:c.375_398delinsCCCC']['genome_context_intronic_sequence'] == '' - assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_lrg_variant'] == '' + assert results['NM_001257374.2:c.375_398delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_001257374.2:c.375_398delinsCCCC']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_transcript_variant'] == 'NM_001257374.2:c.375_398delinsCCCC' + assert results['NM_001257374.2:c.375_398delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_001257374.2:c.375_398delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_refseqgene_variant'] == '' - assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_001257374.2:c.375_398delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2'} - - assert 'NM_001282624.1:c.468_491delinsCCCC' in list(results.keys()) - assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001282624.1:c.468_491delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001282624.1:c.468_491delinsCCCC']['alt_genomic_loci'], []) - assert results['NM_001282624.1:c.468_491delinsCCCC']['gene_symbol'] == 'LMNA' - assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269553.1:p.(Glu157ProfsTer9)', 'slr': 'NP_001269553.1:p.(E157Pfs*9)'} - assert results['NM_001282624.1:c.468_491delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' - assert results['NM_001282624.1:c.468_491delinsCCCC']['genome_context_intronic_sequence'] == '' - assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_lrg_variant'] == '' - assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_transcript_variant'] == 'NM_001282624.1:c.468_491delinsCCCC' - assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_001282624.1:c.468_491delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269553.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282624.1'} + assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Glu126ProfsTer9)', 'slr': 'NP_001244303.1:p.(E126Pfs*9)'} + assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257374.2:c.375_398delinsCCCC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001257374.2:c.375_398delinsCCCC']['alt_genomic_loci'], []) + assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001257374.2:c.375_398delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001257374.2:c.375_398delinsCCCC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1'} - assert results['flag'] == 'gene_variant' assert 'NM_170708.3:c.711_734delinsCCCC' in list(results.keys()) - assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_170708.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_170708.3:c.711_734delinsCCCC']['alt_genomic_loci'], []) - assert results['NM_170708.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' - assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733822.1(LRG_254p3):p.(Glu238ProfsTer9)', 'slr': 'NP_733822.1:p.(E238Pfs*9)'} assert results['NM_170708.3:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' - assert results['NM_170708.3:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' - assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' + assert results['NM_170708.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_170708.3:c.711_734delinsCCCC']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_transcript_variant'] == 'NM_170708.3:c.711_734delinsCCCC' + assert results['NM_170708.3:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_170708.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_refseqgene_variant'] == '' - assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_170708.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3'} - - assert 'NM_170707.3:c.711_734delinsCCCC' in list(results.keys()) - assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_170707.3:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_170707.3:c.711_734delinsCCCC']['alt_genomic_loci'], []) - assert results['NM_170707.3:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' - assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733821.1(LRG_254p2):p.(Glu238ProfsTer9)', 'slr': 'NP_733821.1:p.(E238Pfs*9)'} - assert results['NM_170707.3:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' - assert results['NM_170707.3:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' - assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' - assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_transcript_variant'] == 'NM_170707.3:c.711_734delinsCCCC' - assert results['NM_170707.3:c.711_734delinsCCCC']['hgvs_refseqgene_variant'] == '' - assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_170707.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_170707.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3'} + assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733822.1(LRG_254p3):p.(Glu238ProfsTer9)', 'slr': 'NP_733822.1:p.(E238Pfs*9)'} + assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_170708.3:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_170708.3:c.711_734delinsCCCC']['alt_genomic_loci'], []) + assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_170708.3:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_170708.3:c.711_734delinsCCCC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1'} assert 'NM_001282626.1:c.711_734delinsCCCC' in list(results.keys()) - assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001282626.1:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001282626.1:c.711_734delinsCCCC']['alt_genomic_loci'], []) - assert results['NM_001282626.1:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' - assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269555.1:p.(Glu238ProfsTer9)', 'slr': 'NP_001269555.1:p.(E238Pfs*9)'} assert results['NM_001282626.1:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' - assert results['NM_001282626.1:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' - assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' + assert results['NM_001282626.1:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_001282626.1:c.711_734delinsCCCC']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_transcript_variant'] == 'NM_001282626.1:c.711_734delinsCCCC' + assert results['NM_001282626.1:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_001282626.1:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_001282626.1:c.711_734delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1'} + assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269555.1:p.(Glu238ProfsTer9)', 'slr': 'NP_001269555.1:p.(E238Pfs*9)'} + assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001282626.1:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001282626.1:c.711_734delinsCCCC']['alt_genomic_loci'], []) + assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282626.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282626.1:c.711_734delinsCCCC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1'} + + assert 'NM_001282624.1:c.468_491delinsCCCC' in list(results.keys()) + assert results['NM_001282624.1:c.468_491delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + assert results['NM_001282624.1:c.468_491delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_001282624.1:c.468_491delinsCCCC']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} + assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_transcript_variant'] == 'NM_001282624.1:c.468_491delinsCCCC' + assert results['NM_001282624.1:c.468_491delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_001282624.1:c.468_491delinsCCCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269553.1:p.(Glu157ProfsTer9)', 'slr': 'NP_001269553.1:p.(E157Pfs*9)'} + assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001282624.1:c.468_491delinsCCCC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001282624.1:c.468_491delinsCCCC']['alt_genomic_loci'], []) + assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282624.1:c.468_491delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282624.1:c.468_491delinsCCCC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282624.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269553.1'} + + assert 'NM_001257374.1:c.375_398delinsCCCC' in list(results.keys()) + assert results['NM_001257374.1:c.375_398delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' + assert results['NM_001257374.1:c.375_398delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_001257374.1:c.375_398delinsCCCC']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} + assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_transcript_variant'] == 'NM_001257374.1:c.375_398delinsCCCC' + assert results['NM_001257374.1:c.375_398delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_001257374.1:c.375_398delinsCCCC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_refseqgene_variant'] == '' + assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Glu126ProfsTer9)', 'slr': 'NP_001244303.1:p.(E126Pfs*9)'} + assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257374.1:c.375_398delinsCCCC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001257374.1:c.375_398delinsCCCC']['alt_genomic_loci'], []) + assert results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert 'hg38' not in list(results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci'].keys()) + assert results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert 'grch38' not in list(results['NM_001257374.1:c.375_398delinsCCCC']['primary_assembly_loci'].keys()) + assert results['NM_001257374.1:c.375_398delinsCCCC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1'} assert 'NM_001282625.1:c.711_734delinsCCCC' in list(results.keys()) - assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001282625.1:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001282625.1:c.711_734delinsCCCC']['alt_genomic_loci'], []) - assert results['NM_001282625.1:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' - assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269554.1:p.(Glu238ProfsTer9)', 'slr': 'NP_001269554.1:p.(E238Pfs*9)'} assert results['NM_001282625.1:c.711_734delinsCCCC']['submitted_variant'] == '1-156104666-TTGAGAGCCGGCTGGCGGATGCGCT-TCCCC' - assert results['NM_001282625.1:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' - assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' + assert results['NM_001282625.1:c.711_734delinsCCCC']['gene_symbol'] == 'LMNA' + assert results['NM_001282625.1:c.711_734delinsCCCC']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_transcript_variant'] == 'NM_001282625.1:c.711_734delinsCCCC' + assert results['NM_001282625.1:c.711_734delinsCCCC']['genome_context_intronic_sequence'] == '' + assert results['NM_001282625.1:c.711_734delinsCCCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156104667', 'alt': 'CCCC'}} - assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'pos': '156134876', 'alt': 'CCCC'}} - assert results['NM_001282625.1:c.711_734delinsCCCC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269554.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282625.1'} - + assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269554.1:p.(Glu238ProfsTer9)', 'slr': 'NP_001269554.1:p.(E238Pfs*9)'} + assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001282625.1:c.711_734delinsCCCC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001282625.1:c.711_734delinsCCCC']['alt_genomic_loci'], []) + assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': 'chr1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156104667_156104690delinsCCCC', 'vcf': {'chr': '1', 'pos': '156104667', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282625.1:c.711_734delinsCCCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156134876_156134899delinsCCCC', 'vcf': {'chr': '1', 'pos': '156134876', 'ref': 'TGAGAGCCGGCTGGCGGATGCGCT', 'alt': 'CCCC'}} + assert results['NM_001282625.1:c.711_734delinsCCCC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282625.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269554.1'} def test_variant208(self): variant = '1-156108541-G-GG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_170707.3:c.1961dup' in list(results.keys()) - assert results['NM_170707.3:c.1961dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_170707.3:c.1961dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_170707.3:c.1961dup']['alt_genomic_loci'], []) - assert results['NM_170707.3:c.1961dup']['gene_symbol'] == 'LMNA' - assert results['NM_170707.3:c.1961dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733821.1(LRG_254p2):p.(Thr655AsnfsTer49)', 'slr': 'NP_733821.1:p.(T655Nfs*49)'} assert results['NM_170707.3:c.1961dup']['submitted_variant'] == '1-156108541-G-GG' - assert results['NM_170707.3:c.1961dup']['genome_context_intronic_sequence'] == '' - assert results['NM_170707.3:c.1961dup']['hgvs_lrg_variant'] == '' + assert results['NM_170707.3:c.1961dup']['gene_symbol'] == 'LMNA' + assert results['NM_170707.3:c.1961dup']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} assert results['NM_170707.3:c.1961dup']['hgvs_transcript_variant'] == 'NM_170707.3:c.1961dup' + assert results['NM_170707.3:c.1961dup']['genome_context_intronic_sequence'] == '' + assert results['NM_170707.3:c.1961dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_170707.3:c.1961dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} - assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} - assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} - assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} - assert results['NM_170707.3:c.1961dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3'} - - assert 'NM_001282626.1:c.1818+143dup' in list(results.keys()) - assert results['NM_001282626.1:c.1818+143dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001282626.1:c.1818+143dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001282626.1:c.1818+143dup']['alt_genomic_loci'], []) - assert results['NM_001282626.1:c.1818+143dup']['gene_symbol'] == 'LMNA' - assert results['NM_001282626.1:c.1818+143dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269555.1:p.?', 'slr': 'NP_001269555.1:p.?'} - assert results['NM_001282626.1:c.1818+143dup']['submitted_variant'] == '1-156108541-G-GG' - assert results['NM_001282626.1:c.1818+143dup']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001282626.1):c.1818+143dup' - assert results['NM_001282626.1:c.1818+143dup']['hgvs_lrg_variant'] == '' - assert results['NM_001282626.1:c.1818+143dup']['hgvs_transcript_variant'] == 'NM_001282626.1:c.1818+143dup' - assert results['NM_001282626.1:c.1818+143dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} - assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} - assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} - assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} - assert results['NM_001282626.1:c.1818+143dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1'} + assert results['NM_170707.3:c.1961dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733821.1(LRG_254p2):p.(Thr655AsnfsTer49)', 'slr': 'NP_733821.1:p.(T655Nfs*49)'} + assert results['NM_170707.3:c.1961dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_170707.3:c.1961dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_170707.3:c.1961dup']['alt_genomic_loci'], []) + assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'pos': '156108540', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'pos': '156138749', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'pos': '156108540', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_170707.3:c.1961dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'pos': '156138749', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_170707.3:c.1961dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170707.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733821.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001257374.2:c.1625dup' in list(results.keys()) - assert results['NM_001257374.2:c.1625dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001257374.2:c.1625dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001257374.2:c.1625dup']['alt_genomic_loci'], []) - assert results['NM_001257374.2:c.1625dup']['gene_symbol'] == 'LMNA' - assert results['NM_001257374.2:c.1625dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Thr543AsnfsTer90)', 'slr': 'NP_001244303.1:p.(T543Nfs*90)'} assert results['NM_001257374.2:c.1625dup']['submitted_variant'] == '1-156108541-G-GG' - assert results['NM_001257374.2:c.1625dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001257374.2:c.1625dup']['hgvs_lrg_variant'] == '' + assert results['NM_001257374.2:c.1625dup']['gene_symbol'] == 'LMNA' + assert results['NM_001257374.2:c.1625dup']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} assert results['NM_001257374.2:c.1625dup']['hgvs_transcript_variant'] == 'NM_001257374.2:c.1625dup' + assert results['NM_001257374.2:c.1625dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001257374.2:c.1625dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001257374.2:c.1625dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} - assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} - assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} - assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} - assert results['NM_001257374.2:c.1625dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2'} + assert results['NM_001257374.2:c.1625dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Thr543AsnfsTer90)', 'slr': 'NP_001244303.1:p.(T543Nfs*90)'} + assert results['NM_001257374.2:c.1625dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257374.2:c.1625dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001257374.2:c.1625dup']['alt_genomic_loci'], []) + assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'pos': '156108540', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'pos': '156138749', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'pos': '156108540', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_001257374.2:c.1625dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'pos': '156138749', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_001257374.2:c.1625dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1'} assert 'NM_170708.3:c.1871dup' in list(results.keys()) - assert results['NM_170708.3:c.1871dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_170708.3:c.1871dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_170708.3:c.1871dup']['alt_genomic_loci'], []) - assert results['NM_170708.3:c.1871dup']['gene_symbol'] == 'LMNA' - assert results['NM_170708.3:c.1871dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733822.1(LRG_254p3):p.(Thr625AsnfsTer49)', 'slr': 'NP_733822.1:p.(T625Nfs*49)'} assert results['NM_170708.3:c.1871dup']['submitted_variant'] == '1-156108541-G-GG' - assert results['NM_170708.3:c.1871dup']['genome_context_intronic_sequence'] == '' - assert results['NM_170708.3:c.1871dup']['hgvs_lrg_variant'] == '' + assert results['NM_170708.3:c.1871dup']['gene_symbol'] == 'LMNA' + assert results['NM_170708.3:c.1871dup']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} assert results['NM_170708.3:c.1871dup']['hgvs_transcript_variant'] == 'NM_170708.3:c.1871dup' + assert results['NM_170708.3:c.1871dup']['genome_context_intronic_sequence'] == '' + assert results['NM_170708.3:c.1871dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_170708.3:c.1871dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} - assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} - assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} - assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156138749', 'alt': 'CG'}} - assert results['NM_170708.3:c.1871dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3'} + assert results['NM_170708.3:c.1871dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_733822.1(LRG_254p3):p.(Thr625AsnfsTer49)', 'slr': 'NP_733822.1:p.(T625Nfs*49)'} + assert results['NM_170708.3:c.1871dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_170708.3:c.1871dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_170708.3:c.1871dup']['alt_genomic_loci'], []) + assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'pos': '156108540', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'pos': '156138749', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'pos': '156108540', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_170708.3:c.1871dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'pos': '156138749', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_170708.3:c.1871dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_170708.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_733822.1'} + + assert 'NM_001282626.1:c.1818+143dup' in list(results.keys()) + assert results['NM_001282626.1:c.1818+143dup']['submitted_variant'] == '1-156108541-G-GG' + assert results['NM_001282626.1:c.1818+143dup']['gene_symbol'] == 'LMNA' + assert results['NM_001282626.1:c.1818+143dup']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} + assert results['NM_001282626.1:c.1818+143dup']['hgvs_transcript_variant'] == 'NM_001282626.1:c.1818+143dup' + assert results['NM_001282626.1:c.1818+143dup']['genome_context_intronic_sequence'] == 'NC_000001.10(NM_001282626.1):c.1818+143dup' + assert results['NM_001282626.1:c.1818+143dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001282626.1:c.1818+143dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001282626.1:c.1818+143dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001269555.1:p.?', 'slr': 'NP_001269555.1:p.?'} + assert results['NM_001282626.1:c.1818+143dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001282626.1:c.1818+143dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001282626.1:c.1818+143dup']['alt_genomic_loci'], []) + assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'pos': '156108540', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': 'chr1', 'pos': '156138749', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'pos': '156108540', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_001282626.1:c.1818+143dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.156138750dup', 'vcf': {'chr': '1', 'pos': '156138749', 'ref': 'C', 'alt': 'CG'}} + assert results['NM_001282626.1:c.1818+143dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001282626.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001269555.1'} assert 'NM_001257374.1:c.1625dup' in list(results.keys()) - assert results['NM_001257374.1:c.1625dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001257374.1:c.1625dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001257374.1:c.1625dup']['alt_genomic_loci'], []) - assert results['NM_001257374.1:c.1625dup']['gene_symbol'] == 'LMNA' - assert results['NM_001257374.1:c.1625dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Thr543AsnfsTer90)', 'slr': 'NP_001244303.1:p.(T543Nfs*90)'} assert results['NM_001257374.1:c.1625dup']['submitted_variant'] == '1-156108541-G-GG' - assert results['NM_001257374.1:c.1625dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001257374.1:c.1625dup']['hgvs_lrg_variant'] == '' + assert results['NM_001257374.1:c.1625dup']['gene_symbol'] == 'LMNA' + assert results['NM_001257374.1:c.1625dup']['gene_ids'] == {'hgnc_id': 'HGNC:6636', 'entrez_gene_id': '4000', 'ucsc_id': 'uc001fni.4', 'omim_id': ['150330']} assert results['NM_001257374.1:c.1625dup']['hgvs_transcript_variant'] == 'NM_001257374.1:c.1625dup' + assert results['NM_001257374.1:c.1625dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001257374.1:c.1625dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001257374.1:c.1625dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001257374.1:c.1625dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert results['NM_001257374.1:c.1625dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244303.1:p.(Thr543AsnfsTer90)', 'slr': 'NP_001244303.1:p.(T543Nfs*90)'} + assert results['NM_001257374.1:c.1625dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257374.1:c.1625dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001257374.1:c.1625dup']['alt_genomic_loci'], []) + assert results['NM_001257374.1:c.1625dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': 'chr1', 'pos': '156108540', 'ref': 'C', 'alt': 'CG'}} assert 'hg38' not in list(results['NM_001257374.1:c.1625dup']['primary_assembly_loci'].keys()) - assert results['NM_001257374.1:c.1625dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '156108540', 'alt': 'CG'}} + assert results['NM_001257374.1:c.1625dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.156108541dup', 'vcf': {'chr': '1', 'pos': '156108540', 'ref': 'C', 'alt': 'CG'}} assert 'grch38' not in list(results['NM_001257374.1:c.1625dup']['primary_assembly_loci'].keys()) - assert results['NM_001257374.1:c.1625dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1'} - + assert results['NM_001257374.1:c.1625dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257374.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244303.1'} def test_variant209(self): variant = '1-161279695-T-A' @@ -7678,56 +7832,58 @@ def test_variant209(self): assert results['flag'] == 'gene_variant' assert 'NM_001315491.1:c.1A>T' in list(results.keys()) - assert results['NM_001315491.1:c.1A>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001315491.1:c.1A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001315491.1:c.1A>T']['alt_genomic_loci'], []) - assert results['NM_001315491.1:c.1A>T']['gene_symbol'] == 'MPZ' - assert results['NM_001315491.1:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001302420.1:p.(Met1?)', 'slr': 'NP_001302420.1:p.(M1?)'} assert results['NM_001315491.1:c.1A>T']['submitted_variant'] == '1-161279695-T-A' - assert results['NM_001315491.1:c.1A>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001315491.1:c.1A>T']['hgvs_lrg_variant'] == '' + assert results['NM_001315491.1:c.1A>T']['gene_symbol'] == 'MPZ' + assert results['NM_001315491.1:c.1A>T']['gene_ids'] == {'hgnc_id': 'HGNC:7225', 'entrez_gene_id': '4359', 'ucsc_id': 'uc001gaf.4', 'omim_id': ['159440']} assert results['NM_001315491.1:c.1A>T']['hgvs_transcript_variant'] == 'NM_001315491.1:c.1A>T' + assert results['NM_001315491.1:c.1A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001315491.1:c.1A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001315491.1:c.1A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} - assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '161309905', 'alt': 'A'}} - assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} - assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '161309905', 'alt': 'A'}} - assert results['NM_001315491.1:c.1A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001302420.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001315491.1'} + assert results['NM_001315491.1:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001302420.1:p.(Met1?)', 'slr': 'NP_001302420.1:p.(M1?)'} + assert results['NM_001315491.1:c.1A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001315491.1:c.1A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001315491.1:c.1A>T']['alt_genomic_loci'], []) + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'pos': '161279695', 'ref': 'T', 'alt': 'A'}} + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': 'chr1', 'pos': '161309905', 'ref': 'T', 'alt': 'A'}} + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'pos': '161279695', 'ref': 'T', 'alt': 'A'}} + assert results['NM_001315491.1:c.1A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': '1', 'pos': '161309905', 'ref': 'T', 'alt': 'A'}} + assert results['NM_001315491.1:c.1A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001315491.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001302420.1'} assert 'NM_000530.7:c.1A>T' in list(results.keys()) - assert results['NM_000530.7:c.1A>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000530.7:c.1A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000530.7:c.1A>T']['alt_genomic_loci'], []) - assert results['NM_000530.7:c.1A>T']['gene_symbol'] == 'MPZ' - assert results['NM_000530.7:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000521.2(LRG_256p1):p.(Met1?)', 'slr': 'NP_000521.2:p.(M1?)'} assert results['NM_000530.7:c.1A>T']['submitted_variant'] == '1-161279695-T-A' - assert results['NM_000530.7:c.1A>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000530.7:c.1A>T']['hgvs_lrg_variant'] == '' + assert results['NM_000530.7:c.1A>T']['gene_symbol'] == 'MPZ' + assert results['NM_000530.7:c.1A>T']['gene_ids'] == {'hgnc_id': 'HGNC:7225', 'entrez_gene_id': '4359', 'ucsc_id': 'uc001gaf.4', 'omim_id': ['159440']} assert results['NM_000530.7:c.1A>T']['hgvs_transcript_variant'] == 'NM_000530.7:c.1A>T' + assert results['NM_000530.7:c.1A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000530.7:c.1A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000530.7:c.1A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} - assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '161309905', 'alt': 'A'}} - assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} - assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '161309905', 'alt': 'A'}} - assert results['NM_000530.7:c.1A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.7'} + assert results['NM_000530.7:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000521.2(LRG_256p1):p.(Met1?)', 'slr': 'NP_000521.2:p.(M1?)'} + assert results['NM_000530.7:c.1A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000530.7:c.1A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000530.7:c.1A>T']['alt_genomic_loci'], []) + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'pos': '161279695', 'ref': 'T', 'alt': 'A'}} + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': 'chr1', 'pos': '161309905', 'ref': 'T', 'alt': 'A'}} + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'pos': '161279695', 'ref': 'T', 'alt': 'A'}} + assert results['NM_000530.7:c.1A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.161309905T>A', 'vcf': {'chr': '1', 'pos': '161309905', 'ref': 'T', 'alt': 'A'}} + assert results['NM_000530.7:c.1A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.7', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2'} assert 'NM_000530.6:c.1A>T' in list(results.keys()) - assert results['NM_000530.6:c.1A>T']['hgvs_lrg_transcript_variant'] == 'LRG_256t1:c.1A>T' - assert results['NM_000530.6:c.1A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000530.6:c.1A>T']['alt_genomic_loci'], []) - assert results['NM_000530.6:c.1A>T']['gene_symbol'] == 'MPZ' - assert results['NM_000530.6:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000521.2(LRG_256p1):p.(Met1?)', 'slr': 'NP_000521.2:p.(M1?)'} assert results['NM_000530.6:c.1A>T']['submitted_variant'] == '1-161279695-T-A' + assert results['NM_000530.6:c.1A>T']['gene_symbol'] == 'MPZ' + assert results['NM_000530.6:c.1A>T']['gene_ids'] == {'hgnc_id': 'HGNC:7225', 'entrez_gene_id': '4359', 'ucsc_id': 'uc001gaf.4', 'omim_id': ['159440']} + assert results['NM_000530.6:c.1A>T']['hgvs_transcript_variant'] == 'NM_000530.6:c.1A>T' assert results['NM_000530.6:c.1A>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000530.6:c.1A>T']['hgvs_lrg_variant'] == 'LRG_256:g.5068A>T' - assert results['NM_000530.6:c.1A>T']['hgvs_transcript_variant'] == 'NM_000530.6:c.1A>T' + assert results['NM_000530.6:c.1A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000530.6:c.1A>T']['hgvs_refseqgene_variant'] == 'NG_008055.1:g.5068A>T' - assert results['NM_000530.6:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} + assert results['NM_000530.6:c.1A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000521.2(LRG_256p1):p.(Met1?)', 'slr': 'NP_000521.2:p.(M1?)'} + assert results['NM_000530.6:c.1A>T']['hgvs_lrg_transcript_variant'] == 'LRG_256t1:c.1A>T' + assert results['NM_000530.6:c.1A>T']['hgvs_lrg_variant'] == 'LRG_256:g.5068A>T' + self.assertCountEqual(results['NM_000530.6:c.1A>T']['alt_genomic_loci'], []) + assert results['NM_000530.6:c.1A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': 'chr1', 'pos': '161279695', 'ref': 'T', 'alt': 'A'}} assert 'hg38' not in list(results['NM_000530.6:c.1A>T']['primary_assembly_loci'].keys()) - assert results['NM_000530.6:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '161279695', 'alt': 'A'}} + assert results['NM_000530.6:c.1A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.161279695T>A', 'vcf': {'chr': '1', 'pos': '161279695', 'ref': 'T', 'alt': 'A'}} assert 'grch38' not in list(results['NM_000530.6:c.1A>T']['primary_assembly_loci'].keys()) - assert results['NM_000530.6:c.1A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008055.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.6', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_256.xml'} - + assert results['NM_000530.6:c.1A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000530.6', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000521.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008055.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_256.xml'} def test_variant210(self): variant = '1-169519049-T-T' @@ -7736,97 +7892,100 @@ def test_variant210(self): assert results['flag'] == 'gene_variant' assert 'NM_000130.4:c.1601G>A' in list(results.keys()) - assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601G>A' - assert results['NM_000130.4:c.1601G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000130.4:c.1601G>A']['alt_genomic_loci'], []) - assert results['NM_000130.4:c.1601G>A']['gene_symbol'] == 'F5' - assert results['NM_000130.4:c.1601G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534Gln)', 'slr': 'NP_000121.2:p.(R534Q)'} assert results['NM_000130.4:c.1601G>A']['submitted_variant'] == '1-169519049-T-T' - assert results['NM_000130.4:c.1601G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_variant'] == 'LRG_553:g.41721G>A' + assert results['NM_000130.4:c.1601G>A']['gene_symbol'] == 'F5' + assert results['NM_000130.4:c.1601G>A']['gene_ids'] == {'hgnc_id': 'HGNC:3542', 'entrez_gene_id': '2153', 'ucsc_id': 'uc001ggg.2', 'omim_id': ['612309']} assert results['NM_000130.4:c.1601G>A']['hgvs_transcript_variant'] == 'NM_000130.4:c.1601G>A' + assert results['NM_000130.4:c.1601G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_000130.4:c.1601G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000130.4:c.1601G>A']['hgvs_refseqgene_variant'] == 'NG_011806.1:g.41721G>A' - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': 'chr1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': 'chr1', 'ref': 'C', 'pos': '169549811', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': '1', 'ref': 'T', 'pos': '169519049', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': '1', 'ref': 'C', 'pos': '169549811', 'alt': 'T'}} - assert results['NM_000130.4:c.1601G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} - + assert results['NM_000130.4:c.1601G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000121.2(LRG_553p1):p.(Arg534Gln)', 'slr': 'NP_000121.2:p.(R534Q)'} + assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_transcript_variant'] == 'LRG_553t1:c.1601G>A' + assert results['NM_000130.4:c.1601G>A']['hgvs_lrg_variant'] == 'LRG_553:g.41721G>A' + self.assertCountEqual(results['NM_000130.4:c.1601G>A']['alt_genomic_loci'], []) + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': 'chr1', 'pos': '169519049', 'ref': 'T', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': 'chr1', 'pos': '169549811', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.169519049T=', 'vcf': {'chr': '1', 'pos': '169519049', 'ref': 'T', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.169549811C>T', 'vcf': {'chr': '1', 'pos': '169549811', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000130.4:c.1601G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000130.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000121.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011806.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_553.xml'} def test_variant211(self): variant = '1-226125468-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_003240.4:c.774C>T' in list(results.keys()) - assert results['NM_003240.4:c.774C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003240.4:c.774C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003240.4:c.774C>T']['alt_genomic_loci'], []) - assert results['NM_003240.4:c.774C>T']['gene_symbol'] == 'LEFTY2' - assert results['NM_003240.4:c.774C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003231.2:p.(Thr258=)', 'slr': 'NP_003231.2:p.(T258=)'} - assert results['NM_003240.4:c.774C>T']['submitted_variant'] == '1-226125468-G-A' - assert results['NM_003240.4:c.774C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_003240.4:c.774C>T']['hgvs_lrg_variant'] == '' - assert results['NM_003240.4:c.774C>T']['hgvs_transcript_variant'] == 'NM_003240.4:c.774C>T' - assert results['NM_003240.4:c.774C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} - assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} - assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} - assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} - assert results['NM_003240.4:c.774C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.4'} + assert results['flag'] == 'gene_variant' + assert 'NM_001172425.1:c.672C>T' in list(results.keys()) + assert results['NM_001172425.1:c.672C>T']['submitted_variant'] == '1-226125468-G-A' + assert results['NM_001172425.1:c.672C>T']['gene_symbol'] == 'LEFTY2' + assert results['NM_001172425.1:c.672C>T']['gene_ids'] == {'hgnc_id': 'HGNC:3122', 'entrez_gene_id': '7044', 'ucsc_id': 'uc001hpt.3', 'omim_id': ['601877']} + assert results['NM_001172425.1:c.672C>T']['hgvs_transcript_variant'] == 'NM_001172425.1:c.672C>T' + assert results['NM_001172425.1:c.672C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001172425.1:c.672C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001172425.1:c.672C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001172425.1:c.672C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001165896.1:p.(Thr224=)', 'slr': 'NP_001165896.1:p.(T224=)'} + assert results['NM_001172425.1:c.672C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001172425.1:c.672C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001172425.1:c.672C>T']['alt_genomic_loci'], []) + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'pos': '226125468', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'pos': '225937768', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'pos': '226125468', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'pos': '225937768', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001172425.1:c.672C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1'} assert 'NM_003240.3:c.774C>T' in list(results.keys()) - assert results['NM_003240.3:c.774C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003240.3:c.774C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003240.3:c.774C>T']['alt_genomic_loci'], []) - assert results['NM_003240.3:c.774C>T']['gene_symbol'] == 'LEFTY2' - assert results['NM_003240.3:c.774C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003231.2:p.(Thr258=)', 'slr': 'NP_003231.2:p.(T258=)'} assert results['NM_003240.3:c.774C>T']['submitted_variant'] == '1-226125468-G-A' - assert results['NM_003240.3:c.774C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_003240.3:c.774C>T']['hgvs_lrg_variant'] == '' + assert results['NM_003240.3:c.774C>T']['gene_symbol'] == 'LEFTY2' + assert results['NM_003240.3:c.774C>T']['gene_ids'] == {'hgnc_id': 'HGNC:3122', 'entrez_gene_id': '7044', 'ucsc_id': 'uc001hpt.3', 'omim_id': ['601877']} assert results['NM_003240.3:c.774C>T']['hgvs_transcript_variant'] == 'NM_003240.3:c.774C>T' + assert results['NM_003240.3:c.774C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_003240.3:c.774C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003240.3:c.774C>T']['hgvs_refseqgene_variant'] == 'NG_008118.1:g.8453C>T' - assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} - assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} - assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} - assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} - assert results['NM_003240.3:c.774C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008118.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.3'} - - assert 'NM_001172425.1:c.672C>T' in list(results.keys()) - assert results['NM_001172425.1:c.672C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001172425.1:c.672C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001172425.1:c.672C>T']['alt_genomic_loci'], []) - assert results['NM_001172425.1:c.672C>T']['gene_symbol'] == 'LEFTY2' - assert results['NM_001172425.1:c.672C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001165896.1:p.(Thr224=)', 'slr': 'NP_001165896.1:p.(T224=)'} - assert results['NM_001172425.1:c.672C>T']['submitted_variant'] == '1-226125468-G-A' - assert results['NM_001172425.1:c.672C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001172425.1:c.672C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001172425.1:c.672C>T']['hgvs_transcript_variant'] == 'NM_001172425.1:c.672C>T' - assert results['NM_001172425.1:c.672C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} - assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} - assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} - assert results['NM_001172425.1:c.672C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} - assert results['NM_001172425.1:c.672C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.1'} + assert results['NM_003240.3:c.774C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003231.2:p.(Thr258=)', 'slr': 'NP_003231.2:p.(T258=)'} + assert results['NM_003240.3:c.774C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003240.3:c.774C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003240.3:c.774C>T']['alt_genomic_loci'], []) + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'pos': '226125468', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'pos': '225937768', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'pos': '226125468', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003240.3:c.774C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'pos': '225937768', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003240.3:c.774C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008118.1'} assert 'NM_001172425.2:c.672C>T' in list(results.keys()) - assert results['NM_001172425.2:c.672C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001172425.2:c.672C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001172425.2:c.672C>T']['alt_genomic_loci'], []) - assert results['NM_001172425.2:c.672C>T']['gene_symbol'] == 'LEFTY2' - assert results['NM_001172425.2:c.672C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001165896.1:p.(Thr224=)', 'slr': 'NP_001165896.1:p.(T224=)'} assert results['NM_001172425.2:c.672C>T']['submitted_variant'] == '1-226125468-G-A' - assert results['NM_001172425.2:c.672C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001172425.2:c.672C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001172425.2:c.672C>T']['gene_symbol'] == 'LEFTY2' + assert results['NM_001172425.2:c.672C>T']['gene_ids'] == {'hgnc_id': 'HGNC:3122', 'entrez_gene_id': '7044', 'ucsc_id': 'uc001hpt.3', 'omim_id': ['601877']} assert results['NM_001172425.2:c.672C>T']['hgvs_transcript_variant'] == 'NM_001172425.2:c.672C>T' + assert results['NM_001172425.2:c.672C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001172425.2:c.672C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001172425.2:c.672C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} - assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} - assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '226125468', 'alt': 'A'}} - assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '225937768', 'alt': 'A'}} - assert results['NM_001172425.2:c.672C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.2'} + assert results['NM_001172425.2:c.672C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001165896.1:p.(Thr224=)', 'slr': 'NP_001165896.1:p.(T224=)'} + assert results['NM_001172425.2:c.672C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001172425.2:c.672C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001172425.2:c.672C>T']['alt_genomic_loci'], []) + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'pos': '226125468', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'pos': '225937768', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'pos': '226125468', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001172425.2:c.672C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'pos': '225937768', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001172425.2:c.672C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001172425.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001165896.1'} - assert results['flag'] == 'gene_variant' + assert 'NM_003240.4:c.774C>T' in list(results.keys()) + assert results['NM_003240.4:c.774C>T']['submitted_variant'] == '1-226125468-G-A' + assert results['NM_003240.4:c.774C>T']['gene_symbol'] == 'LEFTY2' + assert results['NM_003240.4:c.774C>T']['gene_ids'] == {'hgnc_id': 'HGNC:3122', 'entrez_gene_id': '7044', 'ucsc_id': 'uc001hpt.3', 'omim_id': ['601877']} + assert results['NM_003240.4:c.774C>T']['hgvs_transcript_variant'] == 'NM_003240.4:c.774C>T' + assert results['NM_003240.4:c.774C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_003240.4:c.774C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003240.4:c.774C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_003240.4:c.774C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003231.2:p.(Thr258=)', 'slr': 'NP_003231.2:p.(T258=)'} + assert results['NM_003240.4:c.774C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003240.4:c.774C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003240.4:c.774C>T']['alt_genomic_loci'], []) + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': 'chr1', 'pos': '226125468', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': 'chr1', 'pos': '225937768', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.226125468G>A', 'vcf': {'chr': '1', 'pos': '226125468', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003240.4:c.774C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.225937768G>A', 'vcf': {'chr': '1', 'pos': '225937768', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003240.4:c.774C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003240.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003231.2'} def test_variant212(self): variant = '10-89623035-CGCA-C' @@ -7835,519 +7994,544 @@ def test_variant212(self): assert results['flag'] == 'gene_variant' assert 'NM_001126049.1:c.-794_-792del' in list(results.keys()) - assert results['NM_001126049.1:c.-794_-792del']['hgvs_lrg_transcript_variant'] == 'LRG_1087t1:c.-794_-792del' - assert results['NM_001126049.1:c.-794_-792del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126049.1:c.-794_-792del']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'HG2334_PATCH', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'NW_013171807.1', 'ref': 'CGCA', 'pos': '79102', 'alt': 'C'}}}]) - assert results['NM_001126049.1:c.-794_-792del']['gene_symbol'] == 'KLLN' - assert results['NM_001126049.1:c.-794_-792del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119521.1(LRG_1087p1):p.?', 'slr': 'NP_001119521.1:p.?'} assert results['NM_001126049.1:c.-794_-792del']['submitted_variant'] == '10-89623035-CGCA-C' - assert results['NM_001126049.1:c.-794_-792del']['genome_context_intronic_sequence'] == '' - assert results['NM_001126049.1:c.-794_-792del']['hgvs_lrg_variant'] == 'LRG_1087:g.5157_5159del' + assert results['NM_001126049.1:c.-794_-792del']['gene_symbol'] == 'KLLN' + assert results['NM_001126049.1:c.-794_-792del']['gene_ids'] == {'hgnc_id': 'HGNC:37212', 'entrez_gene_id': '100144748', 'ucsc_id': 'uc009xti.3', 'omim_id': ['612105']} assert results['NM_001126049.1:c.-794_-792del']['hgvs_transcript_variant'] == 'NM_001126049.1:c.-794_-792del' + assert results['NM_001126049.1:c.-794_-792del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126049.1:c.-794_-792del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126049.1:c.-794_-792del']['hgvs_refseqgene_variant'] == 'NG_033079.1:g.5157_5159del' - assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000010.10:g.89623039_89623041del', 'vcf': {'chr': 'chr10', 'ref': 'CGCA', 'pos': '89623035', 'alt': 'C'}} - assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000010.11:g.87863282_87863284del', 'vcf': {'chr': 'chr10', 'ref': 'CGCA', 'pos': '87863278', 'alt': 'C'}} - assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000010.10:g.89623039_89623041del', 'vcf': {'chr': '10', 'ref': 'CGCA', 'pos': '89623035', 'alt': 'C'}} - assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000010.11:g.87863282_87863284del', 'vcf': {'chr': '10', 'ref': 'CGCA', 'pos': '87863278', 'alt': 'C'}} - assert results['NM_001126049.1:c.-794_-792del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033079.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119521.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126049.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1087.xml'} - + assert results['NM_001126049.1:c.-794_-792del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119521.1(LRG_1087p1):p.?', 'slr': 'NP_001119521.1:p.?'} + assert results['NM_001126049.1:c.-794_-792del']['hgvs_lrg_transcript_variant'] == 'LRG_1087t1:c.-794_-792del' + assert results['NM_001126049.1:c.-794_-792del']['hgvs_lrg_variant'] == 'LRG_1087:g.5157_5159del' + self.assertCountEqual(results['NM_001126049.1:c.-794_-792del']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'HG2334_PATCH', 'pos': '79102', 'ref': 'CGCA', 'alt': 'C'}}}, {'hg38': {'hgvs_genomic_description': 'NW_013171807.1:g.79106_79108del', 'vcf': {'chr': 'NW_013171807.1', 'pos': '79102', 'ref': 'CGCA', 'alt': 'C'}}}]) + assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000010.10:g.89623039_89623041del', 'vcf': {'chr': 'chr10', 'pos': '89623035', 'ref': 'CGCA', 'alt': 'C'}} + assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000010.11:g.87863282_87863284del', 'vcf': {'chr': 'chr10', 'pos': '87863278', 'ref': 'CGCA', 'alt': 'C'}} + assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000010.10:g.89623039_89623041del', 'vcf': {'chr': '10', 'pos': '89623035', 'ref': 'CGCA', 'alt': 'C'}} + assert results['NM_001126049.1:c.-794_-792del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000010.11:g.87863282_87863284del', 'vcf': {'chr': '10', 'pos': '87863278', 'ref': 'CGCA', 'alt': 'C'}} + assert results['NM_001126049.1:c.-794_-792del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126049.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119521.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_033079.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1087.xml'} def test_variant213(self): variant = '11-62457852-C-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NR_037946.1:n.3896G>T' in list(results.keys()) - assert results['NR_037946.1:n.3896G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_037946.1:n.3896G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_037946.1:n.3896G>T']['alt_genomic_loci'], []) - assert results['NR_037946.1:n.3896G>T']['gene_symbol'] == 'HNRNPUL2-BSCL2' - assert results['NR_037946.1:n.3896G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_037946.1:n.3896G>T']['submitted_variant'] == '11-62457852-C-A' - assert results['NR_037946.1:n.3896G>T']['genome_context_intronic_sequence'] == '' - assert results['NR_037946.1:n.3896G>T']['hgvs_lrg_variant'] == '' - assert results['NR_037946.1:n.3896G>T']['hgvs_transcript_variant'] == 'NR_037946.1:n.3896G>T' - assert results['NR_037946.1:n.3896G>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} - assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} - assert results['NR_037946.1:n.3896G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037946.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_001122955.3:c.1376G>T' in list(results.keys()) + assert results['NM_001122955.3:c.1376G>T']['submitted_variant'] == '11-62457852-C-A' + assert results['NM_001122955.3:c.1376G>T']['gene_symbol'] == 'BSCL2' + assert results['NM_001122955.3:c.1376G>T']['gene_ids'] == {'hgnc_id': 'HGNC:15832', 'entrez_gene_id': '26580', 'ucsc_id': 'uc001nup.4', 'omim_id': ['606158']} + assert results['NM_001122955.3:c.1376G>T']['hgvs_transcript_variant'] == 'NM_001122955.3:c.1376G>T' + assert results['NM_001122955.3:c.1376G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001122955.3:c.1376G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001122955.3:c.1376G>T']['hgvs_refseqgene_variant'] == 'NG_008461.1:g.24195G>T' + assert results['NM_001122955.3:c.1376G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001116427.1(LRG_235p1):p.(Cys459Phe)', 'slr': 'NP_001116427.1:p.(C459F)'} + assert results['NM_001122955.3:c.1376G>T']['hgvs_lrg_transcript_variant'] == 'LRG_235t1:c.1376G>T' + assert results['NM_001122955.3:c.1376G>T']['hgvs_lrg_variant'] == 'LRG_235:g.24195G>T' + self.assertCountEqual(results['NM_001122955.3:c.1376G>T']['alt_genomic_loci'], []) + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001122955.3:c.1376G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001122955.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001116427.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008461.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_235.xml'} assert 'NM_032667.6:c.1184G>T' in list(results.keys()) - assert results['NM_032667.6:c.1184G>T']['hgvs_lrg_transcript_variant'] == 'LRG_235t2:c.1184G>T' - assert results['NM_032667.6:c.1184G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_032667.6:c.1184G>T']['alt_genomic_loci'], []) - assert results['NM_032667.6:c.1184G>T']['gene_symbol'] == 'BSCL2' - assert results['NM_032667.6:c.1184G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116056.3(LRG_235p2):p.(Cys395Phe)', 'slr': 'NP_116056.3:p.(C395F)'} assert results['NM_032667.6:c.1184G>T']['submitted_variant'] == '11-62457852-C-A' - assert results['NM_032667.6:c.1184G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_032667.6:c.1184G>T']['hgvs_lrg_variant'] == '' + assert results['NM_032667.6:c.1184G>T']['gene_symbol'] == 'BSCL2' + assert results['NM_032667.6:c.1184G>T']['gene_ids'] == {'hgnc_id': 'HGNC:15832', 'entrez_gene_id': '26580', 'ucsc_id': 'uc001nup.4', 'omim_id': ['606158']} assert results['NM_032667.6:c.1184G>T']['hgvs_transcript_variant'] == 'NM_032667.6:c.1184G>T' + assert results['NM_032667.6:c.1184G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_032667.6:c.1184G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_032667.6:c.1184G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} - assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} - assert results['NM_032667.6:c.1184G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116056.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032667.6'} - - assert 'NR_037949.1:n.1984G>T' in list(results.keys()) - assert results['NR_037949.1:n.1984G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_037949.1:n.1984G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_037949.1:n.1984G>T']['alt_genomic_loci'], []) - assert results['NR_037949.1:n.1984G>T']['gene_symbol'] == 'BSCL2' - assert results['NR_037949.1:n.1984G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_037949.1:n.1984G>T']['submitted_variant'] == '11-62457852-C-A' - assert results['NR_037949.1:n.1984G>T']['genome_context_intronic_sequence'] == '' - assert results['NR_037949.1:n.1984G>T']['hgvs_lrg_variant'] == '' - assert results['NR_037949.1:n.1984G>T']['hgvs_transcript_variant'] == 'NR_037949.1:n.1984G>T' - assert results['NR_037949.1:n.1984G>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} - assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} - assert results['NR_037949.1:n.1984G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037949.1'} + assert results['NM_032667.6:c.1184G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116056.3(LRG_235p2):p.(Cys395Phe)', 'slr': 'NP_116056.3:p.(C395F)'} + assert results['NM_032667.6:c.1184G>T']['hgvs_lrg_transcript_variant'] == 'LRG_235t2:c.1184G>T' + assert results['NM_032667.6:c.1184G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_032667.6:c.1184G>T']['alt_genomic_loci'], []) + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NM_032667.6:c.1184G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} + assert results['NM_032667.6:c.1184G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032667.6', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116056.3'} assert 'NR_037948.1:n.1978G>T' in list(results.keys()) - assert results['NR_037948.1:n.1978G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_037948.1:n.1978G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_037948.1:n.1978G>T']['alt_genomic_loci'], []) - assert results['NR_037948.1:n.1978G>T']['gene_symbol'] == 'BSCL2' - assert results['NR_037948.1:n.1978G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_037948.1:n.1978G>T']['submitted_variant'] == '11-62457852-C-A' - assert results['NR_037948.1:n.1978G>T']['genome_context_intronic_sequence'] == '' - assert results['NR_037948.1:n.1978G>T']['hgvs_lrg_variant'] == '' + assert results['NR_037948.1:n.1978G>T']['gene_symbol'] == 'BSCL2' + assert results['NR_037948.1:n.1978G>T']['gene_ids'] == {'hgnc_id': 'HGNC:15832', 'entrez_gene_id': '26580', 'ucsc_id': 'uc001nup.4', 'omim_id': ['606158']} assert results['NR_037948.1:n.1978G>T']['hgvs_transcript_variant'] == 'NR_037948.1:n.1978G>T' + assert results['NR_037948.1:n.1978G>T']['genome_context_intronic_sequence'] == '' + assert results['NR_037948.1:n.1978G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_037948.1:n.1978G>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} - assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} + assert results['NR_037948.1:n.1978G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037948.1:n.1978G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037948.1:n.1978G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_037948.1:n.1978G>T']['alt_genomic_loci'], []) + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037948.1:n.1978G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} assert results['NR_037948.1:n.1978G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037948.1'} - assert 'NM_001122955.3:c.1376G>T' in list(results.keys()) - assert results['NM_001122955.3:c.1376G>T']['hgvs_lrg_transcript_variant'] == 'LRG_235t1:c.1376G>T' - assert results['NM_001122955.3:c.1376G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001122955.3:c.1376G>T']['alt_genomic_loci'], []) - assert results['NM_001122955.3:c.1376G>T']['gene_symbol'] == 'BSCL2' - assert results['NM_001122955.3:c.1376G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001116427.1(LRG_235p1):p.(Cys459Phe)', 'slr': 'NP_001116427.1:p.(C459F)'} - assert results['NM_001122955.3:c.1376G>T']['submitted_variant'] == '11-62457852-C-A' - assert results['NM_001122955.3:c.1376G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001122955.3:c.1376G>T']['hgvs_lrg_variant'] == 'LRG_235:g.24195G>T' - assert results['NM_001122955.3:c.1376G>T']['hgvs_transcript_variant'] == 'NM_001122955.3:c.1376G>T' - assert results['NM_001122955.3:c.1376G>T']['hgvs_refseqgene_variant'] == 'NG_008461.1:g.24195G>T' - assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} - assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NM_001122955.3:c.1376G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} - assert results['NM_001122955.3:c.1376G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008461.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001116427.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001122955.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_235.xml'} + assert 'NR_037946.1:n.3896G>T' in list(results.keys()) + assert results['NR_037946.1:n.3896G>T']['submitted_variant'] == '11-62457852-C-A' + assert results['NR_037946.1:n.3896G>T']['gene_symbol'] == 'HNRNPUL2-BSCL2' + assert results['NR_037946.1:n.3896G>T']['gene_ids'] == {'hgnc_id': 'HGNC:49189', 'entrez_gene_id': '100534595', 'ucsc_id': '', 'omim_id': []} + assert results['NR_037946.1:n.3896G>T']['hgvs_transcript_variant'] == 'NR_037946.1:n.3896G>T' + assert results['NR_037946.1:n.3896G>T']['genome_context_intronic_sequence'] == '' + assert results['NR_037946.1:n.3896G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_037946.1:n.3896G>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_037946.1:n.3896G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037946.1:n.3896G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037946.1:n.3896G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_037946.1:n.3896G>T']['alt_genomic_loci'], []) + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037946.1:n.3896G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037946.1:n.3896G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037946.1'} + + assert 'NR_037949.1:n.1984G>T' in list(results.keys()) + assert results['NR_037949.1:n.1984G>T']['submitted_variant'] == '11-62457852-C-A' + assert results['NR_037949.1:n.1984G>T']['gene_symbol'] == 'BSCL2' + assert results['NR_037949.1:n.1984G>T']['gene_ids'] == {'hgnc_id': 'HGNC:15832', 'entrez_gene_id': '26580', 'ucsc_id': 'uc001nup.4', 'omim_id': ['606158']} + assert results['NR_037949.1:n.1984G>T']['hgvs_transcript_variant'] == 'NR_037949.1:n.1984G>T' + assert results['NR_037949.1:n.1984G>T']['genome_context_intronic_sequence'] == '' + assert results['NR_037949.1:n.1984G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_037949.1:n.1984G>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_037949.1:n.1984G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037949.1:n.1984G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037949.1:n.1984G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_037949.1:n.1984G>T']['alt_genomic_loci'], []) + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037949.1:n.1984G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037949.1:n.1984G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037949.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001130702.2:c.*178G>T' in list(results.keys()) - assert results['NM_001130702.2:c.*178G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130702.2:c.*178G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130702.2:c.*178G>T']['alt_genomic_loci'], []) - assert results['NM_001130702.2:c.*178G>T']['gene_symbol'] == 'BSCL2' - assert results['NM_001130702.2:c.*178G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124174.2:p.?', 'slr': 'NP_001124174.2:p.?'} assert results['NM_001130702.2:c.*178G>T']['submitted_variant'] == '11-62457852-C-A' - assert results['NM_001130702.2:c.*178G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001130702.2:c.*178G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001130702.2:c.*178G>T']['gene_symbol'] == 'BSCL2' + assert results['NM_001130702.2:c.*178G>T']['gene_ids'] == {'hgnc_id': 'HGNC:15832', 'entrez_gene_id': '26580', 'ucsc_id': 'uc001nup.4', 'omim_id': ['606158']} assert results['NM_001130702.2:c.*178G>T']['hgvs_transcript_variant'] == 'NM_001130702.2:c.*178G>T' + assert results['NM_001130702.2:c.*178G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001130702.2:c.*178G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130702.2:c.*178G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} - assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62457852', 'alt': 'A'}} - assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '62690380', 'alt': 'A'}} - assert results['NM_001130702.2:c.*178G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124174.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130702.2'} - + assert results['NM_001130702.2:c.*178G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124174.2:p.?', 'slr': 'NP_001124174.2:p.?'} + assert results['NM_001130702.2:c.*178G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130702.2:c.*178G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130702.2:c.*178G>T']['alt_genomic_loci'], []) + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': 'chr11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': 'chr11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.62457852C>A', 'vcf': {'chr': '11', 'pos': '62457852', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001130702.2:c.*178G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.62690380C>A', 'vcf': {'chr': '11', 'pos': '62690380', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001130702.2:c.*178G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130702.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124174.2'} def test_variant214(self): variant = '11-108178710-A-AT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_001351834.1:c.5761_5762insT' in list(results.keys()) - assert results['NM_001351834.1:c.5761_5762insT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001351834.1:c.5761_5762insT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001351834.1:c.5761_5762insT']['alt_genomic_loci'], []) - assert results['NM_001351834.1:c.5761_5762insT']['gene_symbol'] == 'ATM' - assert results['NM_001351834.1:c.5761_5762insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001338763.1:p.(Arg1921MetfsTer9)', 'slr': 'NP_001338763.1:p.(R1921Mfs*9)'} assert results['NM_001351834.1:c.5761_5762insT']['submitted_variant'] == '11-108178710-A-AT' - assert results['NM_001351834.1:c.5761_5762insT']['genome_context_intronic_sequence'] == '' - assert results['NM_001351834.1:c.5761_5762insT']['hgvs_lrg_variant'] == '' + assert results['NM_001351834.1:c.5761_5762insT']['gene_symbol'] == 'ATM' + assert results['NM_001351834.1:c.5761_5762insT']['gene_ids'] == {'hgnc_id': 'HGNC:795', 'entrez_gene_id': '472', 'ucsc_id': 'uc001pkb.1', 'omim_id': ['607585']} assert results['NM_001351834.1:c.5761_5762insT']['hgvs_transcript_variant'] == 'NM_001351834.1:c.5761_5762insT' + assert results['NM_001351834.1:c.5761_5762insT']['genome_context_intronic_sequence'] == '' + assert results['NM_001351834.1:c.5761_5762insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001351834.1:c.5761_5762insT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': 'chr11', 'ref': 'A', 'pos': '108178710', 'alt': 'AT'}} - assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': 'chr11', 'ref': 'A', 'pos': '108307983', 'alt': 'AT'}} - assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': '11', 'ref': 'A', 'pos': '108178710', 'alt': 'AT'}} - assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': '11', 'ref': 'A', 'pos': '108307983', 'alt': 'AT'}} - assert results['NM_001351834.1:c.5761_5762insT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338763.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351834.1'} + assert results['NM_001351834.1:c.5761_5762insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001338763.1:p.(Arg1921MetfsTer9)', 'slr': 'NP_001338763.1:p.(R1921Mfs*9)'} + assert results['NM_001351834.1:c.5761_5762insT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001351834.1:c.5761_5762insT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001351834.1:c.5761_5762insT']['alt_genomic_loci'], []) + assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': 'chr11', 'pos': '108178710', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': 'chr11', 'pos': '108307983', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': '11', 'pos': '108178710', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_001351834.1:c.5761_5762insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': '11', 'pos': '108307983', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_001351834.1:c.5761_5762insT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351834.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338763.1'} - assert results['flag'] == 'gene_variant' assert 'NM_000051.3:c.5761_5762insT' in list(results.keys()) - assert results['NM_000051.3:c.5761_5762insT']['hgvs_lrg_transcript_variant'] == 'LRG_135t1:c.5761_5762insT' - assert results['NM_000051.3:c.5761_5762insT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000051.3:c.5761_5762insT']['alt_genomic_loci'], []) - assert results['NM_000051.3:c.5761_5762insT']['gene_symbol'] == 'ATM' - assert results['NM_000051.3:c.5761_5762insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000042.3(LRG_135p1):p.(Arg1921MetfsTer9)', 'slr': 'NP_000042.3:p.(R1921Mfs*9)'} assert results['NM_000051.3:c.5761_5762insT']['submitted_variant'] == '11-108178710-A-AT' - assert results['NM_000051.3:c.5761_5762insT']['genome_context_intronic_sequence'] == '' - assert results['NM_000051.3:c.5761_5762insT']['hgvs_lrg_variant'] == 'LRG_135:g.90152_90153insT' + assert results['NM_000051.3:c.5761_5762insT']['gene_symbol'] == 'ATM' + assert results['NM_000051.3:c.5761_5762insT']['gene_ids'] == {'hgnc_id': 'HGNC:795', 'entrez_gene_id': '472', 'ucsc_id': 'uc001pkb.1', 'omim_id': ['607585']} assert results['NM_000051.3:c.5761_5762insT']['hgvs_transcript_variant'] == 'NM_000051.3:c.5761_5762insT' + assert results['NM_000051.3:c.5761_5762insT']['genome_context_intronic_sequence'] == '' + assert results['NM_000051.3:c.5761_5762insT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000051.3:c.5761_5762insT']['hgvs_refseqgene_variant'] == 'NG_009830.1:g.90152_90153insT' - assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': 'chr11', 'ref': 'A', 'pos': '108178710', 'alt': 'AT'}} - assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': 'chr11', 'ref': 'A', 'pos': '108307983', 'alt': 'AT'}} - assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': '11', 'ref': 'A', 'pos': '108178710', 'alt': 'AT'}} - assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': '11', 'ref': 'A', 'pos': '108307983', 'alt': 'AT'}} - assert results['NM_000051.3:c.5761_5762insT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009830.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000042.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000051.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_135.xml'} - - + assert results['NM_000051.3:c.5761_5762insT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000042.3(LRG_135p1):p.(Arg1921MetfsTer9)', 'slr': 'NP_000042.3:p.(R1921Mfs*9)'} + assert results['NM_000051.3:c.5761_5762insT']['hgvs_lrg_transcript_variant'] == 'LRG_135t1:c.5761_5762insT' + assert results['NM_000051.3:c.5761_5762insT']['hgvs_lrg_variant'] == 'LRG_135:g.90152_90153insT' + self.assertCountEqual(results['NM_000051.3:c.5761_5762insT']['alt_genomic_loci'], []) + assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': 'chr11', 'pos': '108178710', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': 'chr11', 'pos': '108307983', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.108178710_108178711insT', 'vcf': {'chr': '11', 'pos': '108178710', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_000051.3:c.5761_5762insT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.108307983_108307984insT', 'vcf': {'chr': '11', 'pos': '108307983', 'ref': 'A', 'alt': 'AT'}} + assert results['NM_000051.3:c.5761_5762insT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000051.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000042.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009830.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_135.xml'} + def test_variant215(self): variant = '11-111735981-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001352419.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352419.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352419.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352419.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352419.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352419.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339348.1:p.?', 'slr': 'NP_001339348.1:p.?'} - assert results['NM_001352419.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352419.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352419.1):c.-108-7C>T' - assert results['NM_001352419.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001352419.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352419.1:c.-108-7C>T' - assert results['NM_001352419.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352419.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339348.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352419.1'} - - assert 'NM_001352412.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352412.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352412.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352412.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352412.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352412.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339341.1:p.?', 'slr': 'NP_001339341.1:p.?'} - assert results['NM_001352412.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352412.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352412.1):c.-108-7C>T' - assert results['NM_001352412.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001352412.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352412.1:c.-108-7C>T' - assert results['NM_001352412.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352412.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339341.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352412.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_001077691.1:c.-108-7C>T' in list(results.keys()) + assert results['NM_001077691.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001077691.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001077691.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NM_001077691.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001077691.1:c.-108-7C>T' + assert results['NM_001077691.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001077691.1):c.-108-7C>T' + assert results['NM_001077691.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001077691.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001077691.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071159.1:p.?', 'slr': 'NP_001071159.1:p.?'} + assert results['NM_001077691.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077691.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001077691.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077691.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077691.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071159.1'} assert 'NM_001077692.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001077692.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001077692.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001077692.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001077692.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001077692.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071160.1:p.?', 'slr': 'NP_001071160.1:p.?'} assert results['NM_001077692.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001077692.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001077692.1):c.-108-7C>T' - assert results['NM_001077692.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001077692.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001077692.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} assert results['NM_001077692.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001077692.1:c.-108-7C>T' + assert results['NM_001077692.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001077692.1):c.-108-7C>T' + assert results['NM_001077692.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001077692.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001077692.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071160.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077692.1'} - - assert 'NM_001352418.1:c.406-7C>T' in list(results.keys()) - assert results['NM_001352418.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352418.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352418.1:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352418.1:c.406-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352418.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339347.1:p.?', 'slr': 'NP_001339347.1:p.?'} - assert results['NM_001352418.1:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352418.1:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352418.1):c.406-7C>T' - assert results['NM_001352418.1:c.406-7C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001352418.1:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_001352418.1:c.406-7C>T' - assert results['NM_001352418.1:c.406-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352418.1:c.406-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339347.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352418.1'} + assert results['NM_001077692.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071160.1:p.?', 'slr': 'NP_001071160.1:p.?'} + assert results['NM_001077692.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077692.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001077692.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077692.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077692.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071160.1'} assert 'NM_001352423.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352423.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352423.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352423.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352423.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352423.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339352.1:p.?', 'slr': 'NP_001339352.1:p.?'} assert results['NM_001352423.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352423.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352423.1):c.-108-7C>T' - assert results['NM_001352423.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352423.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352423.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} assert results['NM_001352423.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352423.1:c.-108-7C>T' + assert results['NM_001352423.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352423.1):c.-108-7C>T' + assert results['NM_001352423.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001352423.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352423.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339352.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352423.1'} - - assert 'NM_001352415.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352415.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352415.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352415.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352415.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352415.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339344.1:p.?', 'slr': 'NP_001339344.1:p.?'} - assert results['NM_001352415.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352415.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352415.1):c.-108-7C>T' - assert results['NM_001352415.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001352415.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352415.1:c.-108-7C>T' - assert results['NM_001352415.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352415.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339344.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352415.1'} + assert results['NM_001352423.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339352.1:p.?', 'slr': 'NP_001339352.1:p.?'} + assert results['NM_001352423.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352423.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352423.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352423.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352423.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339352.1'} - assert 'NM_001352421.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352421.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352421.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352421.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352421.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352421.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339350.1:p.?', 'slr': 'NP_001339350.1:p.?'} - assert results['NM_001352421.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352421.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352421.1):c.-108-7C>T' - assert results['NM_001352421.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001352421.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352421.1:c.-108-7C>T' - assert results['NM_001352421.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352421.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339350.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352421.1'} + assert 'NM_001352414.1:c.-108-7C>T' in list(results.keys()) + assert results['NM_001352414.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352414.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352414.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NM_001352414.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352414.1:c.-108-7C>T' + assert results['NM_001352414.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352414.1):c.-108-7C>T' + assert results['NM_001352414.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352414.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352414.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339343.1:p.?', 'slr': 'NP_001339343.1:p.?'} + assert results['NM_001352414.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352414.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352414.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352414.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352414.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339343.1'} assert 'NM_001352411.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352411.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352411.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352411.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352411.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352411.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339340.1:p.?', 'slr': 'NP_001339340.1:p.?'} assert results['NM_001352411.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352411.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352411.1):c.-108-7C>T' - assert results['NM_001352411.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352411.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352411.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} assert results['NM_001352411.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352411.1:c.-108-7C>T' + assert results['NM_001352411.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352411.1):c.-108-7C>T' + assert results['NM_001352411.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001352411.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352411.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339340.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352411.1'} + assert results['NM_001352411.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339340.1:p.?', 'slr': 'NP_001339340.1:p.?'} + assert results['NM_001352411.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352411.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352411.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352411.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352411.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339340.1'} - assert 'NR_147984.1:n.782-7C>T' in list(results.keys()) - assert results['NR_147984.1:n.782-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_147984.1:n.782-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_147984.1:n.782-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NR_147984.1:n.782-7C>T']['gene_symbol'] == 'ALG9' - assert results['NR_147984.1:n.782-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_147984.1:n.782-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NR_147984.1:n.782-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NR_147984.1):c.782-7C>T' - assert results['NR_147984.1:n.782-7C>T']['hgvs_lrg_variant'] == '' - assert results['NR_147984.1:n.782-7C>T']['hgvs_transcript_variant'] == 'NR_147984.1:n.782-7C>T' - assert results['NR_147984.1:n.782-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NR_147984.1:n.782-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_147984.1'} + assert 'NM_001352418.1:c.406-7C>T' in list(results.keys()) + assert results['NM_001352418.1:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352418.1:c.406-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352418.1:c.406-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NM_001352418.1:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_001352418.1:c.406-7C>T' + assert results['NM_001352418.1:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352418.1):c.406-7C>T' + assert results['NM_001352418.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352418.1:c.406-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352418.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339347.1:p.?', 'slr': 'NP_001339347.1:p.?'} + assert results['NM_001352418.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352418.1:c.406-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352418.1:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352418.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352418.1:c.406-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352418.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339347.1'} - assert 'NM_001077691.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001077691.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001077691.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001077691.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001077691.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001077691.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071159.1:p.?', 'slr': 'NP_001071159.1:p.?'} - assert results['NM_001077691.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001077691.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001077691.1):c.-108-7C>T' - assert results['NM_001077691.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001077691.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001077691.1:c.-108-7C>T' - assert results['NM_001077691.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001077691.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001077691.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071159.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077691.1'} + assert 'NM_001352416.1:c.-108-7C>T' in list(results.keys()) + assert results['NM_001352416.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352416.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352416.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NM_001352416.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352416.1:c.-108-7C>T' + assert results['NM_001352416.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352416.1):c.-108-7C>T' + assert results['NM_001352416.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352416.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352416.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339345.1:p.?', 'slr': 'NP_001339345.1:p.?'} + assert results['NM_001352416.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352416.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352416.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352416.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352416.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339345.1'} assert 'NM_001352410.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352410.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352410.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352410.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352410.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352410.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339339.1:p.?', 'slr': 'NP_001339339.1:p.?'} assert results['NM_001352410.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352410.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352410.1):c.-108-7C>T' - assert results['NM_001352410.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352410.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352410.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} assert results['NM_001352410.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352410.1:c.-108-7C>T' + assert results['NM_001352410.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352410.1):c.-108-7C>T' + assert results['NM_001352410.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001352410.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352410.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339339.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352410.1'} + assert results['NM_001352410.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339339.1:p.?', 'slr': 'NP_001339339.1:p.?'} + assert results['NM_001352410.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352410.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352410.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352410.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352410.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339339.1'} + + assert 'NM_001352415.1:c.-108-7C>T' in list(results.keys()) + assert results['NM_001352415.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352415.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352415.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NM_001352415.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352415.1:c.-108-7C>T' + assert results['NM_001352415.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352415.1):c.-108-7C>T' + assert results['NM_001352415.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352415.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352415.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339344.1:p.?', 'slr': 'NP_001339344.1:p.?'} + assert results['NM_001352415.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352415.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352415.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352415.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352415.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339344.1'} + + assert 'NM_001352413.1:c.-108-7C>T' in list(results.keys()) + assert results['NM_001352413.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352413.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352413.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NM_001352413.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352413.1:c.-108-7C>T' + assert results['NM_001352413.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352413.1):c.-108-7C>T' + assert results['NM_001352413.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352413.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352413.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339342.1:p.?', 'slr': 'NP_001339342.1:p.?'} + assert results['NM_001352413.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352413.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352413.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352413.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352413.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339342.1'} assert 'NM_001077690.1:c.406-7C>T' in list(results.keys()) - assert results['NM_001077690.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001077690.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001077690.1:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001077690.1:c.406-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001077690.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071158.1:p.?', 'slr': 'NP_001071158.1:p.?'} assert results['NM_001077690.1:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001077690.1:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001077690.1):c.406-7C>T' - assert results['NM_001077690.1:c.406-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001077690.1:c.406-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001077690.1:c.406-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} assert results['NM_001077690.1:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_001077690.1:c.406-7C>T' + assert results['NM_001077690.1:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001077690.1):c.406-7C>T' + assert results['NM_001077690.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001077690.1:c.406-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001077690.1:c.406-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071158.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077690.1'} + assert results['NM_001077690.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001071158.1:p.?', 'slr': 'NP_001071158.1:p.?'} + assert results['NM_001077690.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077690.1:c.406-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001077690.1:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077690.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001077690.1:c.406-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001071158.1'} + + assert 'NM_001352419.1:c.-108-7C>T' in list(results.keys()) + assert results['NM_001352419.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352419.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352419.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NM_001352419.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352419.1:c.-108-7C>T' + assert results['NM_001352419.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352419.1):c.-108-7C>T' + assert results['NM_001352419.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352419.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352419.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339348.1:p.?', 'slr': 'NP_001339348.1:p.?'} + assert results['NM_001352419.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352419.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352419.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352419.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352419.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339348.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001352422.1:c.-326-7C>T' in list(results.keys()) - assert results['NM_001352422.1:c.-326-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352422.1:c.-326-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352422.1:c.-326-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352422.1:c.-326-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352422.1:c.-326-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339351.1:p.?', 'slr': 'NP_001339351.1:p.?'} assert results['NM_001352422.1:c.-326-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352422.1:c.-326-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352422.1):c.-326-7C>T' - assert results['NM_001352422.1:c.-326-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352422.1:c.-326-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352422.1:c.-326-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} assert results['NM_001352422.1:c.-326-7C>T']['hgvs_transcript_variant'] == 'NM_001352422.1:c.-326-7C>T' + assert results['NM_001352422.1:c.-326-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352422.1):c.-326-7C>T' + assert results['NM_001352422.1:c.-326-7C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001352422.1:c.-326-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352422.1:c.-326-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339351.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352422.1'} + assert results['NM_001352422.1:c.-326-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339351.1:p.?', 'slr': 'NP_001339351.1:p.?'} + assert results['NM_001352422.1:c.-326-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352422.1:c.-326-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352422.1:c.-326-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352422.1:c.-326-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352422.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339351.1'} - assert 'NM_001352416.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352416.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352416.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352416.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352416.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352416.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339345.1:p.?', 'slr': 'NP_001339345.1:p.?'} - assert results['NM_001352416.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352416.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352416.1):c.-108-7C>T' - assert results['NM_001352416.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001352416.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352416.1:c.-108-7C>T' - assert results['NM_001352416.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352416.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352416.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339345.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352416.1'} + assert 'NM_001352412.1:c.-108-7C>T' in list(results.keys()) + assert results['NM_001352412.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352412.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352412.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NM_001352412.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352412.1:c.-108-7C>T' + assert results['NM_001352412.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352412.1):c.-108-7C>T' + assert results['NM_001352412.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352412.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352412.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339341.1:p.?', 'slr': 'NP_001339341.1:p.?'} + assert results['NM_001352412.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352412.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352412.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352412.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352412.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339341.1'} - assert 'NM_001352420.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352420.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352420.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352420.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352420.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352420.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339349.1:p.?', 'slr': 'NP_001339349.1:p.?'} - assert results['NM_001352420.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352420.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352420.1):c.-108-7C>T' - assert results['NM_001352420.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001352420.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352420.1:c.-108-7C>T' - assert results['NM_001352420.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352420.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339349.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352420.1'} + assert 'NM_001352421.1:c.-108-7C>T' in list(results.keys()) + assert results['NM_001352421.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352421.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352421.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NM_001352421.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352421.1:c.-108-7C>T' + assert results['NM_001352421.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352421.1):c.-108-7C>T' + assert results['NM_001352421.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352421.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352421.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339350.1:p.?', 'slr': 'NP_001339350.1:p.?'} + assert results['NM_001352421.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352421.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352421.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352421.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352421.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339350.1'} + + assert 'NM_001352417.1:c.406-7C>T' in list(results.keys()) + assert results['NM_001352417.1:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352417.1:c.406-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352417.1:c.406-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NM_001352417.1:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_001352417.1:c.406-7C>T' + assert results['NM_001352417.1:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352417.1):c.406-7C>T' + assert results['NM_001352417.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352417.1:c.406-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352417.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339346.1:p.?', 'slr': 'NP_001339346.1:p.?'} + assert results['NM_001352417.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352417.1:c.406-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352417.1:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352417.1:c.406-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352417.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339346.1'} + + assert 'NR_147984.1:n.782-7C>T' in list(results.keys()) + assert results['NR_147984.1:n.782-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NR_147984.1:n.782-7C>T']['gene_symbol'] == 'ALG9' + assert results['NR_147984.1:n.782-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NR_147984.1:n.782-7C>T']['hgvs_transcript_variant'] == 'NR_147984.1:n.782-7C>T' + assert results['NR_147984.1:n.782-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NR_147984.1):c.782-7C>T' + assert results['NR_147984.1:n.782-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_147984.1:n.782-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_147984.1:n.782-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_147984.1:n.782-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_147984.1:n.782-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_147984.1:n.782-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NR_147984.1:n.782-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NR_147984.1:n.782-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_147984.1'} assert 'NM_024740.2:c.406-7C>T' in list(results.keys()) - assert results['NM_024740.2:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_024740.2:c.406-7C>T']['refseqgene_context_intronic_sequence'] == 'NG_009210.1(NM_024740.2):c.406-7C>T' - self.assertCountEqual(results['NM_024740.2:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_024740.2:c.406-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_024740.2:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_079016.2:p.?', 'slr': 'NP_079016.2:p.?'} assert results['NM_024740.2:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_024740.2:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_024740.2):c.406-7C>T' - assert results['NM_024740.2:c.406-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_024740.2:c.406-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_024740.2:c.406-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} assert results['NM_024740.2:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_024740.2:c.406-7C>T' + assert results['NM_024740.2:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_024740.2):c.406-7C>T' + assert results['NM_024740.2:c.406-7C>T']['refseqgene_context_intronic_sequence'] == 'NG_009210.1(NM_024740.2):c.406-7C>T' assert results['NM_024740.2:c.406-7C>T']['hgvs_refseqgene_variant'] == 'NG_009210.1:g.11324C>T' - assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_024740.2:c.406-7C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009210.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_079016.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024740.2'} - - assert 'NM_001352414.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352414.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352414.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352414.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352414.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352414.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339343.1:p.?', 'slr': 'NP_001339343.1:p.?'} - assert results['NM_001352414.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352414.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352414.1):c.-108-7C>T' - assert results['NM_001352414.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001352414.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352414.1:c.-108-7C>T' - assert results['NM_001352414.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352414.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352414.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339343.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352414.1'} + assert results['NM_024740.2:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_079016.2:p.?', 'slr': 'NP_079016.2:p.?'} + assert results['NM_024740.2:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024740.2:c.406-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_024740.2:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_024740.2:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_024740.2:c.406-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024740.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_079016.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009210.1'} - assert 'NM_001352417.1:c.406-7C>T' in list(results.keys()) - assert results['NM_001352417.1:c.406-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352417.1:c.406-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352417.1:c.406-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352417.1:c.406-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352417.1:c.406-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339346.1:p.?', 'slr': 'NP_001339346.1:p.?'} - assert results['NM_001352417.1:c.406-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352417.1:c.406-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352417.1):c.406-7C>T' - assert results['NM_001352417.1:c.406-7C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001352417.1:c.406-7C>T']['hgvs_transcript_variant'] == 'NM_001352417.1:c.406-7C>T' - assert results['NM_001352417.1:c.406-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352417.1:c.406-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352417.1:c.406-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339346.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352417.1'} + assert 'NM_001352420.1:c.-108-7C>T' in list(results.keys()) + assert results['NM_001352420.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' + assert results['NM_001352420.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352420.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} + assert results['NM_001352420.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352420.1:c.-108-7C>T' + assert results['NM_001352420.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352420.1):c.-108-7C>T' + assert results['NM_001352420.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001352420.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001352420.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339349.1:p.?', 'slr': 'NP_001339349.1:p.?'} + assert results['NM_001352420.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352420.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352420.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352420.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352420.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339349.1'} assert 'NM_001352409.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352409.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352409.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352409.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352409.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352409.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339338.1:p.?', 'slr': 'NP_001339338.1:p.?'} assert results['NM_001352409.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352409.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352409.1):c.-108-7C>T' - assert results['NM_001352409.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001352409.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' + assert results['NM_001352409.1:c.-108-7C>T']['gene_ids'] == {'hgnc_id': 'HGNC:15672', 'entrez_gene_id': '79796', 'ucsc_id': 'uc031yet.2', 'omim_id': ['606941']} assert results['NM_001352409.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352409.1:c.-108-7C>T' + assert results['NM_001352409.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352409.1):c.-108-7C>T' + assert results['NM_001352409.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001352409.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352409.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339338.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352409.1'} - - assert 'NM_001352413.1:c.-108-7C>T' in list(results.keys()) - assert results['NM_001352413.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001352413.1:c.-108-7C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001352413.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'ref': 'G', 'pos': '117249', 'alt': 'A'}}}]) - assert results['NM_001352413.1:c.-108-7C>T']['gene_symbol'] == 'ALG9' - assert results['NM_001352413.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339342.1:p.?', 'slr': 'NP_001339342.1:p.?'} - assert results['NM_001352413.1:c.-108-7C>T']['submitted_variant'] == '11-111735981-G-A' - assert results['NM_001352413.1:c.-108-7C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NM_001352413.1):c.-108-7C>T' - assert results['NM_001352413.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001352413.1:c.-108-7C>T']['hgvs_transcript_variant'] == 'NM_001352413.1:c.-108-7C>T' - assert results['NM_001352413.1:c.-108-7C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111735981', 'alt': 'A'}} - assert results['NM_001352413.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '111865258', 'alt': 'A'}} - assert results['NM_001352413.1:c.-108-7C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339342.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352413.1'} - + assert results['NM_001352409.1:c.-108-7C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001339338.1:p.?', 'slr': 'NP_001339338.1:p.?'} + assert results['NM_001352409.1:c.-108-7C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001352409.1:c.-108-7C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001352409.1:c.-108-7C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'HG388_HG400_PATCH', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871080.1:g.117249G>A', 'vcf': {'chr': 'NW_003871080.1', 'pos': '117249', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': 'chr11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': 'chr11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.111735981G>A', 'vcf': {'chr': '11', 'pos': '111735981', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.111865258G>A', 'vcf': {'chr': '11', 'pos': '111865258', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001352409.1:c.-108-7C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001352409.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001339338.1'} def test_variant216(self): variant = '12-11023080-C-A' @@ -8356,97 +8540,100 @@ def test_variant216(self): assert results['flag'] == 'gene_variant' assert 'NR_037918.2:n.1184+11736G>T' in list(results.keys()) - assert results['NR_037918.2:n.1184+11736G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_037918.2:n.1184+11736G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_037918.2:n.1184+11736G>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_3_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'chr12_KI270904v1_alt', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'HG1133_PATCH', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'NW_003571047.1', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'NW_003571050.1', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'chr12_GL877876v1_alt', 'ref': 'C', 'pos': '69187', 'alt': 'A'}}}]) - assert results['NR_037918.2:n.1184+11736G>T']['gene_symbol'] == 'PRH1-PRR4' - assert results['NR_037918.2:n.1184+11736G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_037918.2:n.1184+11736G>T']['submitted_variant'] == '12-11023080-C-A' - assert results['NR_037918.2:n.1184+11736G>T']['genome_context_intronic_sequence'] == 'NC_000012.11(NR_037918.2):c.1184+11736G>T' - assert results['NR_037918.2:n.1184+11736G>T']['hgvs_lrg_variant'] == '' + assert results['NR_037918.2:n.1184+11736G>T']['gene_symbol'] == 'PRH1-PRR4' + assert results['NR_037918.2:n.1184+11736G>T']['gene_ids'] == {} assert results['NR_037918.2:n.1184+11736G>T']['hgvs_transcript_variant'] == 'NR_037918.2:n.1184+11736G>T' + assert results['NR_037918.2:n.1184+11736G>T']['genome_context_intronic_sequence'] == 'NC_000012.11(NR_037918.2):c.1184+11736G>T' + assert results['NR_037918.2:n.1184+11736G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_037918.2:n.1184+11736G>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.11023080C>A', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '11023080', 'alt': 'A'}} - assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.10870481C>A', 'vcf': {'chr': 'chr12', 'ref': 'C', 'pos': '10870481', 'alt': 'A'}} - assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.11023080C>A', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '11023080', 'alt': 'A'}} - assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.10870481C>A', 'vcf': {'chr': '12', 'ref': 'C', 'pos': '10870481', 'alt': 'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_037918.2:n.1184+11736G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_037918.2:n.1184+11736G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_037918.2:n.1184+11736G>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'pos': '69187', 'ref': 'C', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'NW_003571050.1', 'pos': '69187', 'ref': 'C', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_2_CTG2', 'pos': '69187', 'ref': 'C', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NW_003571050.1:g.69187C>A', 'vcf': {'chr': 'chr12_GL877876v1_alt', 'pos': '69187', 'ref': 'C', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'HG1133_PATCH', 'pos': '69187', 'ref': 'C', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571047.1:g.69187C>A', 'vcf': {'chr': 'NW_003571047.1', 'pos': '69187', 'ref': 'C', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'HSCHR12_3_CTG2', 'pos': '69187', 'ref': 'C', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187658.1:g.69187C>A', 'vcf': {'chr': 'chr12_KI270904v1_alt', 'pos': '69187', 'ref': 'C', 'alt': 'A'}}}]) + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.11023080C>A', 'vcf': {'chr': 'chr12', 'pos': '11023080', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.10870481C>A', 'vcf': {'chr': 'chr12', 'pos': '10870481', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.11023080C>A', 'vcf': {'chr': '12', 'pos': '11023080', 'ref': 'C', 'alt': 'A'}} + assert results['NR_037918.2:n.1184+11736G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.10870481C>A', 'vcf': {'chr': '12', 'pos': '10870481', 'ref': 'C', 'alt': 'A'}} assert results['NR_037918.2:n.1184+11736G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_037918.2'} - def test_variant217(self): variant = '12-22018712-TC-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_020297.3:c.2199-1302del' in list(results.keys()) - assert results['NM_020297.3:c.2199-1302del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020297.3:c.2199-1302del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020297.3:c.2199-1302del']['alt_genomic_loci'], []) - assert results['NM_020297.3:c.2199-1302del']['gene_symbol'] == 'ABCC9' - assert results['NM_020297.3:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_064693.2(LRG_377p1):p.?', 'slr': 'NP_064693.2:p.?'} - assert results['NM_020297.3:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' - assert results['NM_020297.3:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_020297.3):c.2199-1302del' - assert results['NM_020297.3:c.2199-1302del']['hgvs_lrg_variant'] == '' - assert results['NM_020297.3:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_020297.3:c.2199-1302del' - assert results['NM_020297.3:c.2199-1302del']['hgvs_refseqgene_variant'] == '' - assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} - assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '21865778', 'alt': 'T'}} - assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} - assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '21865778', 'alt': 'T'}} - assert results['NM_020297.3:c.2199-1302del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.3'} - - assert 'NM_005691.3:c.2199-1302del' in list(results.keys()) - assert results['NM_005691.3:c.2199-1302del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_005691.3:c.2199-1302del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005691.3:c.2199-1302del']['alt_genomic_loci'], []) - assert results['NM_005691.3:c.2199-1302del']['gene_symbol'] == 'ABCC9' - assert results['NM_005691.3:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005682.2(LRG_377p2):p.?', 'slr': 'NP_005682.2:p.?'} - assert results['NM_005691.3:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' - assert results['NM_005691.3:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_005691.3):c.2199-1302del' - assert results['NM_005691.3:c.2199-1302del']['hgvs_lrg_variant'] == '' - assert results['NM_005691.3:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_005691.3:c.2199-1302del' - assert results['NM_005691.3:c.2199-1302del']['hgvs_refseqgene_variant'] == '' - assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} - assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '21865778', 'alt': 'T'}} - assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} - assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '21865778', 'alt': 'T'}} - assert results['NM_005691.3:c.2199-1302del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.3'} - + assert results['flag'] == 'gene_variant' assert 'NM_020297.2:c.2199-1302del' in list(results.keys()) - assert results['NM_020297.2:c.2199-1302del']['hgvs_lrg_transcript_variant'] == 'LRG_377t1:c.2199-1302del' - assert results['NM_020297.2:c.2199-1302del']['refseqgene_context_intronic_sequence'] == 'NG_012819.1(NM_020297.2):c.2199-1302del' - self.assertCountEqual(results['NM_020297.2:c.2199-1302del']['alt_genomic_loci'], []) - assert results['NM_020297.2:c.2199-1302del']['gene_symbol'] == 'ABCC9' - assert results['NM_020297.2:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_064693.2(LRG_377p1):p.?', 'slr': 'NP_064693.2:p.?'} assert results['NM_020297.2:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' - assert results['NM_020297.2:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_020297.2):c.2199-1302del' - assert results['NM_020297.2:c.2199-1302del']['hgvs_lrg_variant'] == 'LRG_377:g.75916del' + assert results['NM_020297.2:c.2199-1302del']['gene_symbol'] == 'ABCC9' + assert results['NM_020297.2:c.2199-1302del']['gene_ids'] == {'hgnc_id': 'HGNC:60', 'entrez_gene_id': '10060', 'ucsc_id': 'uc001rfi.2', 'omim_id': ['601439']} assert results['NM_020297.2:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_020297.2:c.2199-1302del' + assert results['NM_020297.2:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_020297.2):c.2199-1302del' + assert results['NM_020297.2:c.2199-1302del']['refseqgene_context_intronic_sequence'] == 'NG_012819.1(NM_020297.2):c.2199-1302del' assert results['NM_020297.2:c.2199-1302del']['hgvs_refseqgene_variant'] == 'NG_012819.1:g.75916del' - assert results['NM_020297.2:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert results['NM_020297.2:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_064693.2(LRG_377p1):p.?', 'slr': 'NP_064693.2:p.?'} + assert results['NM_020297.2:c.2199-1302del']['hgvs_lrg_transcript_variant'] == 'LRG_377t1:c.2199-1302del' + assert results['NM_020297.2:c.2199-1302del']['hgvs_lrg_variant'] == 'LRG_377:g.75916del' + self.assertCountEqual(results['NM_020297.2:c.2199-1302del']['alt_genomic_loci'], []) + assert results['NM_020297.2:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'pos': '22018712', 'ref': 'TC', 'alt': 'T'}} assert 'hg38' not in list(results['NM_020297.2:c.2199-1302del']['primary_assembly_loci'].keys()) - assert results['NM_020297.2:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert results['NM_020297.2:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'pos': '22018712', 'ref': 'TC', 'alt': 'T'}} assert 'grch38' not in list(results['NM_020297.2:c.2199-1302del']['primary_assembly_loci'].keys()) - assert results['NM_020297.2:c.2199-1302del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012819.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_377.xml'} + assert results['NM_020297.2:c.2199-1302del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012819.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_377.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_005691.2:c.2199-1302del' in list(results.keys()) - assert results['NM_005691.2:c.2199-1302del']['hgvs_lrg_transcript_variant'] == 'LRG_377t2:c.2199-1302del' - assert results['NM_005691.2:c.2199-1302del']['refseqgene_context_intronic_sequence'] == 'NG_012819.1(NM_005691.2):c.2199-1302del' - self.assertCountEqual(results['NM_005691.2:c.2199-1302del']['alt_genomic_loci'], []) - assert results['NM_005691.2:c.2199-1302del']['gene_symbol'] == 'ABCC9' - assert results['NM_005691.2:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005682.2(LRG_377p2):p.?', 'slr': 'NP_005682.2:p.?'} assert results['NM_005691.2:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' - assert results['NM_005691.2:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_005691.2):c.2199-1302del' - assert results['NM_005691.2:c.2199-1302del']['hgvs_lrg_variant'] == 'LRG_377:g.75916del' + assert results['NM_005691.2:c.2199-1302del']['gene_symbol'] == 'ABCC9' + assert results['NM_005691.2:c.2199-1302del']['gene_ids'] == {'hgnc_id': 'HGNC:60', 'entrez_gene_id': '10060', 'ucsc_id': 'uc001rfi.2', 'omim_id': ['601439']} assert results['NM_005691.2:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_005691.2:c.2199-1302del' + assert results['NM_005691.2:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_005691.2):c.2199-1302del' + assert results['NM_005691.2:c.2199-1302del']['refseqgene_context_intronic_sequence'] == 'NG_012819.1(NM_005691.2):c.2199-1302del' assert results['NM_005691.2:c.2199-1302del']['hgvs_refseqgene_variant'] == 'NG_012819.1:g.75916del' - assert results['NM_005691.2:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert results['NM_005691.2:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005682.2(LRG_377p2):p.?', 'slr': 'NP_005682.2:p.?'} + assert results['NM_005691.2:c.2199-1302del']['hgvs_lrg_transcript_variant'] == 'LRG_377t2:c.2199-1302del' + assert results['NM_005691.2:c.2199-1302del']['hgvs_lrg_variant'] == 'LRG_377:g.75916del' + self.assertCountEqual(results['NM_005691.2:c.2199-1302del']['alt_genomic_loci'], []) + assert results['NM_005691.2:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'pos': '22018712', 'ref': 'TC', 'alt': 'T'}} assert 'hg38' not in list(results['NM_005691.2:c.2199-1302del']['primary_assembly_loci'].keys()) - assert results['NM_005691.2:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '22018712', 'alt': 'T'}} + assert results['NM_005691.2:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'pos': '22018712', 'ref': 'TC', 'alt': 'T'}} assert 'grch38' not in list(results['NM_005691.2:c.2199-1302del']['primary_assembly_loci'].keys()) - assert results['NM_005691.2:c.2199-1302del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012819.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_377.xml'} + assert results['NM_005691.2:c.2199-1302del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012819.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_377.xml'} + + assert 'NM_005691.3:c.2199-1302del' in list(results.keys()) + assert results['NM_005691.3:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' + assert results['NM_005691.3:c.2199-1302del']['gene_symbol'] == 'ABCC9' + assert results['NM_005691.3:c.2199-1302del']['gene_ids'] == {'hgnc_id': 'HGNC:60', 'entrez_gene_id': '10060', 'ucsc_id': 'uc001rfi.2', 'omim_id': ['601439']} + assert results['NM_005691.3:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_005691.3:c.2199-1302del' + assert results['NM_005691.3:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_005691.3):c.2199-1302del' + assert results['NM_005691.3:c.2199-1302del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005691.3:c.2199-1302del']['hgvs_refseqgene_variant'] == '' + assert results['NM_005691.3:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005682.2(LRG_377p2):p.?', 'slr': 'NP_005682.2:p.?'} + assert results['NM_005691.3:c.2199-1302del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005691.3:c.2199-1302del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_005691.3:c.2199-1302del']['alt_genomic_loci'], []) + assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'pos': '22018712', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': 'chr12', 'pos': '21865778', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'pos': '22018712', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_005691.3:c.2199-1302del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': '12', 'pos': '21865778', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_005691.3:c.2199-1302del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005691.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005682.2'} + assert 'NM_020297.3:c.2199-1302del' in list(results.keys()) + assert results['NM_020297.3:c.2199-1302del']['submitted_variant'] == '12-22018712-TC-T' + assert results['NM_020297.3:c.2199-1302del']['gene_symbol'] == 'ABCC9' + assert results['NM_020297.3:c.2199-1302del']['gene_ids'] == {'hgnc_id': 'HGNC:60', 'entrez_gene_id': '10060', 'ucsc_id': 'uc001rfi.2', 'omim_id': ['601439']} + assert results['NM_020297.3:c.2199-1302del']['hgvs_transcript_variant'] == 'NM_020297.3:c.2199-1302del' + assert results['NM_020297.3:c.2199-1302del']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_020297.3):c.2199-1302del' + assert results['NM_020297.3:c.2199-1302del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_020297.3:c.2199-1302del']['hgvs_refseqgene_variant'] == '' + assert results['NM_020297.3:c.2199-1302del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_064693.2(LRG_377p1):p.?', 'slr': 'NP_064693.2:p.?'} + assert results['NM_020297.3:c.2199-1302del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020297.3:c.2199-1302del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020297.3:c.2199-1302del']['alt_genomic_loci'], []) + assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': 'chr12', 'pos': '22018712', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': 'chr12', 'pos': '21865778', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.22018713del', 'vcf': {'chr': '12', 'pos': '22018712', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_020297.3:c.2199-1302del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.21865779del', 'vcf': {'chr': '12', 'pos': '21865778', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_020297.3:c.2199-1302del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020297.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_064693.2'} def test_variant218(self): variant = '12-52912946-T-C' @@ -8455,373 +8642,388 @@ def test_variant218(self): assert results['flag'] == 'gene_variant' assert 'NM_000424.3:c.556-2A>G' in list(results.keys()) - assert results['NM_000424.3:c.556-2A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000424.3:c.556-2A>G']['refseqgene_context_intronic_sequence'] == 'NG_008297.1(NM_000424.3):c.556-2A>G' - self.assertCountEqual(results['NM_000424.3:c.556-2A>G']['alt_genomic_loci'], []) - assert results['NM_000424.3:c.556-2A>G']['gene_symbol'] == 'KRT5' - assert results['NM_000424.3:c.556-2A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000415.2:p.?', 'slr': 'NP_000415.2:p.?'} assert results['NM_000424.3:c.556-2A>G']['submitted_variant'] == '12-52912946-T-C' - assert results['NM_000424.3:c.556-2A>G']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_000424.3):c.556-2A>G' - assert results['NM_000424.3:c.556-2A>G']['hgvs_lrg_variant'] == '' + assert results['NM_000424.3:c.556-2A>G']['gene_symbol'] == 'KRT5' + assert results['NM_000424.3:c.556-2A>G']['gene_ids'] == {'hgnc_id': 'HGNC:6442', 'entrez_gene_id': '3852', 'ucsc_id': 'uc001san.4', 'omim_id': ['148040']} assert results['NM_000424.3:c.556-2A>G']['hgvs_transcript_variant'] == 'NM_000424.3:c.556-2A>G' + assert results['NM_000424.3:c.556-2A>G']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_000424.3):c.556-2A>G' + assert results['NM_000424.3:c.556-2A>G']['refseqgene_context_intronic_sequence'] == 'NG_008297.1(NM_000424.3):c.556-2A>G' assert results['NM_000424.3:c.556-2A>G']['hgvs_refseqgene_variant'] == 'NG_008297.1:g.6298A>G' - assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.52912946T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '52912946', 'alt': 'C'}} - assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.52519162T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '52519162', 'alt': 'C'}} - assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.52912946T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '52912946', 'alt': 'C'}} - assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.52519162T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '52519162', 'alt': 'C'}} - assert results['NM_000424.3:c.556-2A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008297.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000415.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000424.3'} - + assert results['NM_000424.3:c.556-2A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000415.2:p.?', 'slr': 'NP_000415.2:p.?'} + assert results['NM_000424.3:c.556-2A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000424.3:c.556-2A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000424.3:c.556-2A>G']['alt_genomic_loci'], []) + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.52912946T>C', 'vcf': {'chr': 'chr12', 'pos': '52912946', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.52519162T>C', 'vcf': {'chr': 'chr12', 'pos': '52519162', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.52912946T>C', 'vcf': {'chr': '12', 'pos': '52912946', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000424.3:c.556-2A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.52519162T>C', 'vcf': {'chr': '12', 'pos': '52519162', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000424.3:c.556-2A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000424.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000415.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008297.1'} def test_variant219(self): variant = '12-103234292-TC-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_000277.1:c.1200del' in list(results.keys()) + assert results['NM_000277.1:c.1200del']['submitted_variant'] == '12-103234292-TC-T' + assert results['NM_000277.1:c.1200del']['gene_symbol'] == 'PAH' + assert results['NM_000277.1:c.1200del']['gene_ids'] == {'hgnc_id': 'HGNC:8582', 'entrez_gene_id': '5053', 'ucsc_id': 'uc001tjq.2', 'omim_id': ['612349']} + assert results['NM_000277.1:c.1200del']['hgvs_transcript_variant'] == 'NM_000277.1:c.1200del' + assert results['NM_000277.1:c.1200del']['genome_context_intronic_sequence'] == '' + assert results['NM_000277.1:c.1200del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000277.1:c.1200del']['hgvs_refseqgene_variant'] == 'NG_008690.1:g.82088del' + assert results['NM_000277.1:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_000268.1:p.(N401Tfs*51)'} + assert results['NM_000277.1:c.1200del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000277.1:c.1200del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000277.1:c.1200del']['alt_genomic_loci'], []) + assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': 'chr12', 'pos': '103234292', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': 'chr12', 'pos': '102840514', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': '12', 'pos': '103234292', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': '12', 'pos': '102840514', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000277.1:c.1200del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008690.1'} + assert 'NM_001354304.1:c.1200del' in list(results.keys()) - assert results['NM_001354304.1:c.1200del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001354304.1:c.1200del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001354304.1:c.1200del']['alt_genomic_loci'], []) - assert results['NM_001354304.1:c.1200del']['gene_symbol'] == 'PAH' - assert results['NM_001354304.1:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341233.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_001341233.1:p.(N401Tfs*51)'} assert results['NM_001354304.1:c.1200del']['submitted_variant'] == '12-103234292-TC-T' - assert results['NM_001354304.1:c.1200del']['genome_context_intronic_sequence'] == '' - assert results['NM_001354304.1:c.1200del']['hgvs_lrg_variant'] == '' + assert results['NM_001354304.1:c.1200del']['gene_symbol'] == 'PAH' + assert results['NM_001354304.1:c.1200del']['gene_ids'] == {'hgnc_id': 'HGNC:8582', 'entrez_gene_id': '5053', 'ucsc_id': 'uc001tjq.2', 'omim_id': ['612349']} assert results['NM_001354304.1:c.1200del']['hgvs_transcript_variant'] == 'NM_001354304.1:c.1200del' + assert results['NM_001354304.1:c.1200del']['genome_context_intronic_sequence'] == '' + assert results['NM_001354304.1:c.1200del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001354304.1:c.1200del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} - assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} - assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} - assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} - assert results['NM_001354304.1:c.1200del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1'} + assert results['NM_001354304.1:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341233.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_001341233.1:p.(N401Tfs*51)'} + assert results['NM_001354304.1:c.1200del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354304.1:c.1200del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001354304.1:c.1200del']['alt_genomic_loci'], []) + assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': 'chr12', 'pos': '103234292', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': 'chr12', 'pos': '102840514', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': '12', 'pos': '103234292', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001354304.1:c.1200del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': '12', 'pos': '102840514', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_001354304.1:c.1200del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1'} assert 'NM_000277.2:c.1200del' in list(results.keys()) - assert results['NM_000277.2:c.1200del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000277.2:c.1200del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000277.2:c.1200del']['alt_genomic_loci'], []) - assert results['NM_000277.2:c.1200del']['gene_symbol'] == 'PAH' - assert results['NM_000277.2:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_000268.1:p.(N401Tfs*51)'} assert results['NM_000277.2:c.1200del']['submitted_variant'] == '12-103234292-TC-T' - assert results['NM_000277.2:c.1200del']['genome_context_intronic_sequence'] == '' - assert results['NM_000277.2:c.1200del']['hgvs_lrg_variant'] == '' + assert results['NM_000277.2:c.1200del']['gene_symbol'] == 'PAH' + assert results['NM_000277.2:c.1200del']['gene_ids'] == {'hgnc_id': 'HGNC:8582', 'entrez_gene_id': '5053', 'ucsc_id': 'uc001tjq.2', 'omim_id': ['612349']} assert results['NM_000277.2:c.1200del']['hgvs_transcript_variant'] == 'NM_000277.2:c.1200del' + assert results['NM_000277.2:c.1200del']['genome_context_intronic_sequence'] == '' + assert results['NM_000277.2:c.1200del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000277.2:c.1200del']['hgvs_refseqgene_variant'] == '' - assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} - assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} - assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} - assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} - assert results['NM_000277.2:c.1200del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2'} + assert results['NM_000277.2:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_000268.1:p.(N401Tfs*51)'} + assert results['NM_000277.2:c.1200del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000277.2:c.1200del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000277.2:c.1200del']['alt_genomic_loci'], []) + assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': 'chr12', 'pos': '103234292', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': 'chr12', 'pos': '102840514', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': '12', 'pos': '103234292', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000277.2:c.1200del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': '12', 'pos': '102840514', 'ref': 'TC', 'alt': 'T'}} + assert results['NM_000277.2:c.1200del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1'} + + def test_variant220(self): + variant = '12-103311124-T-C' + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_000277.1:c.1200del' in list(results.keys()) - assert results['NM_000277.1:c.1200del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000277.1:c.1200del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000277.1:c.1200del']['alt_genomic_loci'], []) - assert results['NM_000277.1:c.1200del']['gene_symbol'] == 'PAH' - assert results['NM_000277.1:c.1200del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.(Asn401ThrfsTer51)', 'slr': 'NP_000268.1:p.(N401Tfs*51)'} - assert results['NM_000277.1:c.1200del']['submitted_variant'] == '12-103234292-TC-T' - assert results['NM_000277.1:c.1200del']['genome_context_intronic_sequence'] == '' - assert results['NM_000277.1:c.1200del']['hgvs_lrg_variant'] == '' - assert results['NM_000277.1:c.1200del']['hgvs_transcript_variant'] == 'NM_000277.1:c.1200del' - assert results['NM_000277.1:c.1200del']['hgvs_refseqgene_variant'] == 'NG_008690.1:g.82088del' - assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} - assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': 'chr12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} - assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103234294del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '103234292', 'alt': 'T'}} - assert results['NM_000277.1:c.1200del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102840516del', 'vcf': {'chr': '12', 'ref': 'TC', 'pos': '102840514', 'alt': 'T'}} - assert results['NM_000277.1:c.1200del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.1'} - - - def test_variant220(self): - variant = '12-103311124-T-C' - results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() - print(results) + assert 'NM_000277.1:c.-215A>G' in list(results.keys()) + assert results['NM_000277.1:c.-215A>G']['submitted_variant'] == '12-103311124-T-C' + assert results['NM_000277.1:c.-215A>G']['gene_symbol'] == 'PAH' + assert results['NM_000277.1:c.-215A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8582', 'entrez_gene_id': '5053', 'ucsc_id': 'uc001tjq.2', 'omim_id': ['612349']} + assert results['NM_000277.1:c.-215A>G']['hgvs_transcript_variant'] == 'NM_000277.1:c.-215A>G' + assert results['NM_000277.1:c.-215A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000277.1:c.-215A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000277.1:c.-215A>G']['hgvs_refseqgene_variant'] == 'NG_008690.1:g.5258A>G' + assert results['NM_000277.1:c.-215A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.?', 'slr': 'NP_000268.1:p.?'} + assert results['NM_000277.1:c.-215A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000277.1:c.-215A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000277.1:c.-215A>G']['alt_genomic_loci'], []) + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'pos': '103311124', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'pos': '102917346', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'pos': '103311124', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'pos': '102917346', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000277.1:c.-215A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008690.1'} assert 'NM_001354304.1:c.-95-121A>G' in list(results.keys()) - assert results['NM_001354304.1:c.-95-121A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001354304.1:c.-95-121A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001354304.1:c.-95-121A>G']['alt_genomic_loci'], []) - assert results['NM_001354304.1:c.-95-121A>G']['gene_symbol'] == 'PAH' - assert results['NM_001354304.1:c.-95-121A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341233.1:p.?', 'slr': 'NP_001341233.1:p.?'} assert results['NM_001354304.1:c.-95-121A>G']['submitted_variant'] == '12-103311124-T-C' - assert results['NM_001354304.1:c.-95-121A>G']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001354304.1):c.-95-121A>G' - assert results['NM_001354304.1:c.-95-121A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001354304.1:c.-95-121A>G']['gene_symbol'] == 'PAH' + assert results['NM_001354304.1:c.-95-121A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8582', 'entrez_gene_id': '5053', 'ucsc_id': 'uc001tjq.2', 'omim_id': ['612349']} assert results['NM_001354304.1:c.-95-121A>G']['hgvs_transcript_variant'] == 'NM_001354304.1:c.-95-121A>G' + assert results['NM_001354304.1:c.-95-121A>G']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001354304.1):c.-95-121A>G' + assert results['NM_001354304.1:c.-95-121A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001354304.1:c.-95-121A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} - assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} - assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} - assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} - assert results['NM_001354304.1:c.-95-121A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1'} + assert results['NM_001354304.1:c.-95-121A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341233.1:p.?', 'slr': 'NP_001341233.1:p.?'} + assert results['NM_001354304.1:c.-95-121A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354304.1:c.-95-121A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001354304.1:c.-95-121A>G']['alt_genomic_loci'], []) + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'pos': '103311124', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'pos': '102917346', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'pos': '103311124', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'pos': '102917346', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001354304.1:c.-95-121A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354304.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341233.1'} - assert results['flag'] == 'gene_variant' assert 'NM_000277.2:c.-216A>G' in list(results.keys()) - assert results['NM_000277.2:c.-216A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000277.2:c.-216A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000277.2:c.-216A>G']['alt_genomic_loci'], []) - assert results['NM_000277.2:c.-216A>G']['gene_symbol'] == 'PAH' - assert results['NM_000277.2:c.-216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.?', 'slr': 'NP_000268.1:p.?'} assert results['NM_000277.2:c.-216A>G']['submitted_variant'] == '12-103311124-T-C' - assert results['NM_000277.2:c.-216A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_000277.2:c.-216A>G']['hgvs_lrg_variant'] == '' + assert results['NM_000277.2:c.-216A>G']['gene_symbol'] == 'PAH' + assert results['NM_000277.2:c.-216A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8582', 'entrez_gene_id': '5053', 'ucsc_id': 'uc001tjq.2', 'omim_id': ['612349']} assert results['NM_000277.2:c.-216A>G']['hgvs_transcript_variant'] == 'NM_000277.2:c.-216A>G' + assert results['NM_000277.2:c.-216A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000277.2:c.-216A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000277.2:c.-216A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} - assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} - assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} - assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} - assert results['NM_000277.2:c.-216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2'} - - assert 'NM_000277.1:c.-215A>G' in list(results.keys()) - assert results['NM_000277.1:c.-215A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000277.1:c.-215A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000277.1:c.-215A>G']['alt_genomic_loci'], []) - assert results['NM_000277.1:c.-215A>G']['gene_symbol'] == 'PAH' - assert results['NM_000277.1:c.-215A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.?', 'slr': 'NP_000268.1:p.?'} - assert results['NM_000277.1:c.-215A>G']['submitted_variant'] == '12-103311124-T-C' - assert results['NM_000277.1:c.-215A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_000277.1:c.-215A>G']['hgvs_lrg_variant'] == '' - assert results['NM_000277.1:c.-215A>G']['hgvs_transcript_variant'] == 'NM_000277.1:c.-215A>G' - assert results['NM_000277.1:c.-215A>G']['hgvs_refseqgene_variant'] == 'NG_008690.1:g.5258A>G' - assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} - assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} - assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '103311124', 'alt': 'C'}} - assert results['NM_000277.1:c.-215A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'ref': 'T', 'pos': '102917346', 'alt': 'C'}} - assert results['NM_000277.1:c.-215A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008690.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.1'} - + assert results['NM_000277.2:c.-216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000268.1:p.?', 'slr': 'NP_000268.1:p.?'} + assert results['NM_000277.2:c.-216A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000277.2:c.-216A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000277.2:c.-216A>G']['alt_genomic_loci'], []) + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': 'chr12', 'pos': '103311124', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': 'chr12', 'pos': '102917346', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.103311124T>C', 'vcf': {'chr': '12', 'pos': '103311124', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000277.2:c.-216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.102917346T>C', 'vcf': {'chr': '12', 'pos': '102917346', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000277.2:c.-216A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000277.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000268.1'} def test_variant221(self): variant = '12-111064166-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001319681.1:c.-366-1G>A' in list(results.keys()) - assert results['NM_001319681.1:c.-366-1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001319681.1:c.-366-1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001319681.1:c.-366-1G>A']['alt_genomic_loci'], []) - assert results['NM_001319681.1:c.-366-1G>A']['gene_symbol'] == 'TCTN1' - assert results['NM_001319681.1:c.-366-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306610.1:p.?', 'slr': 'NP_001306610.1:p.?'} - assert results['NM_001319681.1:c.-366-1G>A']['submitted_variant'] == '12-111064166-G-A' - assert results['NM_001319681.1:c.-366-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001319681.1):c.-366-1G>A' - assert results['NM_001319681.1:c.-366-1G>A']['hgvs_lrg_variant'] == '' - assert results['NM_001319681.1:c.-366-1G>A']['hgvs_transcript_variant'] == 'NM_001319681.1:c.-366-1G>A' - assert results['NM_001319681.1:c.-366-1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001319681.1:c.-366-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306610.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319681.1'} - + assert results['flag'] == 'gene_variant' assert 'NM_001319680.1:c.342-1G>A' in list(results.keys()) - assert results['NM_001319680.1:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001319680.1:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001319680.1:c.342-1G>A']['alt_genomic_loci'], []) - assert results['NM_001319680.1:c.342-1G>A']['gene_symbol'] == 'TCTN1' - assert results['NM_001319680.1:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306609.1:p.?', 'slr': 'NP_001306609.1:p.?'} assert results['NM_001319680.1:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' - assert results['NM_001319680.1:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001319680.1):c.342-1G>A' - assert results['NM_001319680.1:c.342-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001319680.1:c.342-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001319680.1:c.342-1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:26113', 'entrez_gene_id': '79600', 'ucsc_id': 'uc001trn.6', 'omim_id': ['609863']} assert results['NM_001319680.1:c.342-1G>A']['hgvs_transcript_variant'] == 'NM_001319680.1:c.342-1G>A' + assert results['NM_001319680.1:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001319680.1):c.342-1G>A' + assert results['NM_001319680.1:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001319680.1:c.342-1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001319680.1:c.342-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306609.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319680.1'} - - assert 'NM_001082538.2:c.342-1G>A' in list(results.keys()) - assert results['NM_001082538.2:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001082538.2:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001082538.2:c.342-1G>A']['alt_genomic_loci'], []) - assert results['NM_001082538.2:c.342-1G>A']['gene_symbol'] == 'TCTN1' - assert results['NM_001082538.2:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001076007.1:p.?', 'slr': 'NP_001076007.1:p.?'} - assert results['NM_001082538.2:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' - assert results['NM_001082538.2:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001082538.2):c.342-1G>A' - assert results['NM_001082538.2:c.342-1G>A']['hgvs_lrg_variant'] == '' - assert results['NM_001082538.2:c.342-1G>A']['hgvs_transcript_variant'] == 'NM_001082538.2:c.342-1G>A' - assert results['NM_001082538.2:c.342-1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001082538.2:c.342-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076007.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082538.2'} + assert results['NM_001319680.1:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306609.1:p.?', 'slr': 'NP_001306609.1:p.?'} + assert results['NM_001319680.1:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001319680.1:c.342-1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001319680.1:c.342-1G>A']['alt_genomic_loci'], []) + assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319680.1:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319680.1:c.342-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319680.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306609.1'} - assert 'NM_001173976.1:c.162-1G>A' in list(results.keys()) - assert results['NM_001173976.1:c.162-1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001173976.1:c.162-1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001173976.1:c.162-1G>A']['alt_genomic_loci'], []) - assert results['NM_001173976.1:c.162-1G>A']['gene_symbol'] == 'TCTN1' - assert results['NM_001173976.1:c.162-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167447.1:p.?', 'slr': 'NP_001167447.1:p.?'} - assert results['NM_001173976.1:c.162-1G>A']['submitted_variant'] == '12-111064166-G-A' - assert results['NM_001173976.1:c.162-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001173976.1):c.162-1G>A' - assert results['NM_001173976.1:c.162-1G>A']['hgvs_lrg_variant'] == '' - assert results['NM_001173976.1:c.162-1G>A']['hgvs_transcript_variant'] == 'NM_001173976.1:c.162-1G>A' - assert results['NM_001173976.1:c.162-1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001173976.1:c.162-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167447.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173976.1'} + assert 'NM_001173975.2:c.174-1G>A' in list(results.keys()) + assert results['NM_001173975.2:c.174-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001173975.2:c.174-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001173975.2:c.174-1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:26113', 'entrez_gene_id': '79600', 'ucsc_id': 'uc001trn.6', 'omim_id': ['609863']} + assert results['NM_001173975.2:c.174-1G>A']['hgvs_transcript_variant'] == 'NM_001173975.2:c.174-1G>A' + assert results['NM_001173975.2:c.174-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001173975.2):c.174-1G>A' + assert results['NM_001173975.2:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001173975.2:c.174-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001173975.2:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167446.1:p.?', 'slr': 'NP_001167446.1:p.?'} + assert results['NM_001173975.2:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001173975.2:c.174-1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001173975.2:c.174-1G>A']['alt_genomic_loci'], []) + assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001173975.2:c.174-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001082537.2:c.342-1G>A' in list(results.keys()) - assert results['NM_001082537.2:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001082537.2:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001082537.2:c.342-1G>A']['alt_genomic_loci'], []) - assert results['NM_001082537.2:c.342-1G>A']['gene_symbol'] == 'TCTN1' - assert results['NM_001082537.2:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001076006.1:p.?', 'slr': 'NP_001076006.1:p.?'} assert results['NM_001082537.2:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' - assert results['NM_001082537.2:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001082537.2):c.342-1G>A' - assert results['NM_001082537.2:c.342-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001082537.2:c.342-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001082537.2:c.342-1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:26113', 'entrez_gene_id': '79600', 'ucsc_id': 'uc001trn.6', 'omim_id': ['609863']} assert results['NM_001082537.2:c.342-1G>A']['hgvs_transcript_variant'] == 'NM_001082537.2:c.342-1G>A' + assert results['NM_001082537.2:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001082537.2):c.342-1G>A' + assert results['NM_001082537.2:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001082537.2:c.342-1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001082537.2:c.342-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076006.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082537.2'} + assert results['NM_001082537.2:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001076006.1:p.?', 'slr': 'NP_001076006.1:p.?'} + assert results['NM_001082537.2:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001082537.2:c.342-1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001082537.2:c.342-1G>A']['alt_genomic_loci'], []) + assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001082537.2:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001082537.2:c.342-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082537.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076006.1'} + + assert 'NM_001319682.1:c.174-1G>A' in list(results.keys()) + assert results['NM_001319682.1:c.174-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001319682.1:c.174-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001319682.1:c.174-1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:26113', 'entrez_gene_id': '79600', 'ucsc_id': 'uc001trn.6', 'omim_id': ['609863']} + assert results['NM_001319682.1:c.174-1G>A']['hgvs_transcript_variant'] == 'NM_001319682.1:c.174-1G>A' + assert results['NM_001319682.1:c.174-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001319682.1):c.174-1G>A' + assert results['NM_001319682.1:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001319682.1:c.174-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001319682.1:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306611.1:p.?', 'slr': 'NP_001306611.1:p.?'} + assert results['NM_001319682.1:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001319682.1:c.174-1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001319682.1:c.174-1G>A']['alt_genomic_loci'], []) + assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319682.1:c.174-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319682.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306611.1'} + + assert 'NM_001082538.2:c.342-1G>A' in list(results.keys()) + assert results['NM_001082538.2:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001082538.2:c.342-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001082538.2:c.342-1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:26113', 'entrez_gene_id': '79600', 'ucsc_id': 'uc001trn.6', 'omim_id': ['609863']} + assert results['NM_001082538.2:c.342-1G>A']['hgvs_transcript_variant'] == 'NM_001082538.2:c.342-1G>A' + assert results['NM_001082538.2:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001082538.2):c.342-1G>A' + assert results['NM_001082538.2:c.342-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001082538.2:c.342-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001082538.2:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001076007.1:p.?', 'slr': 'NP_001076007.1:p.?'} + assert results['NM_001082538.2:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001082538.2:c.342-1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001082538.2:c.342-1G>A']['alt_genomic_loci'], []) + assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001082538.2:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001082538.2:c.342-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001082538.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001076007.1'} assert 'NR_135088.1:n.559-1G>A' in list(results.keys()) - assert results['NR_135088.1:n.559-1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_135088.1:n.559-1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_135088.1:n.559-1G>A']['alt_genomic_loci'], []) - assert results['NR_135088.1:n.559-1G>A']['gene_symbol'] == 'TCTN1' - assert results['NR_135088.1:n.559-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_135088.1:n.559-1G>A']['submitted_variant'] == '12-111064166-G-A' - assert results['NR_135088.1:n.559-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NR_135088.1):c.559-1G>A' - assert results['NR_135088.1:n.559-1G>A']['hgvs_lrg_variant'] == '' + assert results['NR_135088.1:n.559-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NR_135088.1:n.559-1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:26113', 'entrez_gene_id': '79600', 'ucsc_id': 'uc001trn.6', 'omim_id': ['609863']} assert results['NR_135088.1:n.559-1G>A']['hgvs_transcript_variant'] == 'NR_135088.1:n.559-1G>A' + assert results['NR_135088.1:n.559-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NR_135088.1):c.559-1G>A' + assert results['NR_135088.1:n.559-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_135088.1:n.559-1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} + assert results['NR_135088.1:n.559-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_135088.1:n.559-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_135088.1:n.559-1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_135088.1:n.559-1G>A']['alt_genomic_loci'], []) + assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NR_135088.1:n.559-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} assert results['NR_135088.1:n.559-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_135088.1'} - assert 'NM_024549.5:c.342-1G>A' in list(results.keys()) - assert results['NM_024549.5:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_024549.5:c.342-1G>A']['refseqgene_context_intronic_sequence'] == 'NG_030381.1(NM_024549.5):c.342-1G>A' - self.assertCountEqual(results['NM_024549.5:c.342-1G>A']['alt_genomic_loci'], []) - assert results['NM_024549.5:c.342-1G>A']['gene_symbol'] == 'TCTN1' - assert results['NM_024549.5:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_078825.2:p.?', 'slr': 'NP_078825.2:p.?'} - assert results['NM_024549.5:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' - assert results['NM_024549.5:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_024549.5):c.342-1G>A' - assert results['NM_024549.5:c.342-1G>A']['hgvs_lrg_variant'] == '' - assert results['NM_024549.5:c.342-1G>A']['hgvs_transcript_variant'] == 'NM_024549.5:c.342-1G>A' - assert results['NM_024549.5:c.342-1G>A']['hgvs_refseqgene_variant'] == 'NG_030381.1:g.17335G>A' - assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_024549.5:c.342-1G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_030381.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_078825.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024549.5'} - - assert 'NM_001173975.2:c.174-1G>A' in list(results.keys()) - assert results['NM_001173975.2:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001173975.2:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001173975.2:c.174-1G>A']['alt_genomic_loci'], []) - assert results['NM_001173975.2:c.174-1G>A']['gene_symbol'] == 'TCTN1' - assert results['NM_001173975.2:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167446.1:p.?', 'slr': 'NP_001167446.1:p.?'} - assert results['NM_001173975.2:c.174-1G>A']['submitted_variant'] == '12-111064166-G-A' - assert results['NM_001173975.2:c.174-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001173975.2):c.174-1G>A' - assert results['NM_001173975.2:c.174-1G>A']['hgvs_lrg_variant'] == '' - assert results['NM_001173975.2:c.174-1G>A']['hgvs_transcript_variant'] == 'NM_001173975.2:c.174-1G>A' - assert results['NM_001173975.2:c.174-1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001173975.2:c.174-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001173975.2:c.174-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.2'} - assert 'NM_001173975.1:c.174-1G>A' in list(results.keys()) - assert results['NM_001173975.1:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001173975.1:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001173975.1:c.174-1G>A']['alt_genomic_loci'], []) - assert results['NM_001173975.1:c.174-1G>A']['gene_symbol'] == 'TCTN1' - assert results['NM_001173975.1:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167446.1:p.?', 'slr': 'NP_001167446.1:p.?'} assert results['NM_001173975.1:c.174-1G>A']['submitted_variant'] == '12-111064166-G-A' - assert results['NM_001173975.1:c.174-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001173975.1):c.174-1G>A' - assert results['NM_001173975.1:c.174-1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001173975.1:c.174-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001173975.1:c.174-1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:26113', 'entrez_gene_id': '79600', 'ucsc_id': 'uc001trn.6', 'omim_id': ['609863']} assert results['NM_001173975.1:c.174-1G>A']['hgvs_transcript_variant'] == 'NM_001173975.1:c.174-1G>A' + assert results['NM_001173975.1:c.174-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001173975.1):c.174-1G>A' + assert results['NM_001173975.1:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001173975.1:c.174-1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001173975.1:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167446.1:p.?', 'slr': 'NP_001167446.1:p.?'} + assert results['NM_001173975.1:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001173975.1:c.174-1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001173975.1:c.174-1G>A']['alt_genomic_loci'], []) + assert results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} assert 'hg38' not in list(results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci'].keys()) - assert results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} + assert results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} assert 'grch38' not in list(results['NM_001173975.1:c.174-1G>A']['primary_assembly_loci'].keys()) - assert results['NM_001173975.1:c.174-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.1'} - - assert 'NM_001319682.1:c.174-1G>A' in list(results.keys()) - assert results['NM_001319682.1:c.174-1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001319682.1:c.174-1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001319682.1:c.174-1G>A']['alt_genomic_loci'], []) - assert results['NM_001319682.1:c.174-1G>A']['gene_symbol'] == 'TCTN1' - assert results['NM_001319682.1:c.174-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306611.1:p.?', 'slr': 'NP_001306611.1:p.?'} - assert results['NM_001319682.1:c.174-1G>A']['submitted_variant'] == '12-111064166-G-A' - assert results['NM_001319682.1:c.174-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001319682.1):c.174-1G>A' - assert results['NM_001319682.1:c.174-1G>A']['hgvs_lrg_variant'] == '' - assert results['NM_001319682.1:c.174-1G>A']['hgvs_transcript_variant'] == 'NM_001319682.1:c.174-1G>A' - assert results['NM_001319682.1:c.174-1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '111064166', 'alt': 'A'}} - assert results['NM_001319682.1:c.174-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'ref': 'G', 'pos': '110626361', 'alt': 'A'}} - assert results['NM_001319682.1:c.174-1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306611.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319682.1'} + assert results['NM_001173975.1:c.174-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173975.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167446.1'} + assert 'NM_024549.5:c.342-1G>A' in list(results.keys()) + assert results['NM_024549.5:c.342-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_024549.5:c.342-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_024549.5:c.342-1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:26113', 'entrez_gene_id': '79600', 'ucsc_id': 'uc001trn.6', 'omim_id': ['609863']} + assert results['NM_024549.5:c.342-1G>A']['hgvs_transcript_variant'] == 'NM_024549.5:c.342-1G>A' + assert results['NM_024549.5:c.342-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_024549.5):c.342-1G>A' + assert results['NM_024549.5:c.342-1G>A']['refseqgene_context_intronic_sequence'] == 'NG_030381.1(NM_024549.5):c.342-1G>A' + assert results['NM_024549.5:c.342-1G>A']['hgvs_refseqgene_variant'] == 'NG_030381.1:g.17335G>A' + assert results['NM_024549.5:c.342-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_078825.2:p.?', 'slr': 'NP_078825.2:p.?'} + assert results['NM_024549.5:c.342-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024549.5:c.342-1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_024549.5:c.342-1G>A']['alt_genomic_loci'], []) + assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_024549.5:c.342-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_024549.5:c.342-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024549.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_078825.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_030381.1'} - def test_variant222(self): - variant = '12-123738430-CA-C' - results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() - print(results) + assert 'NM_001319681.1:c.-366-1G>A' in list(results.keys()) + assert results['NM_001319681.1:c.-366-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001319681.1:c.-366-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001319681.1:c.-366-1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:26113', 'entrez_gene_id': '79600', 'ucsc_id': 'uc001trn.6', 'omim_id': ['609863']} + assert results['NM_001319681.1:c.-366-1G>A']['hgvs_transcript_variant'] == 'NM_001319681.1:c.-366-1G>A' + assert results['NM_001319681.1:c.-366-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001319681.1):c.-366-1G>A' + assert results['NM_001319681.1:c.-366-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001319681.1:c.-366-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001319681.1:c.-366-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001306610.1:p.?', 'slr': 'NP_001306610.1:p.?'} + assert results['NM_001319681.1:c.-366-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001319681.1:c.-366-1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001319681.1:c.-366-1G>A']['alt_genomic_loci'], []) + assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319681.1:c.-366-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001319681.1:c.-366-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001319681.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001306610.1'} - assert 'NM_001194995.1:c.210del' in list(results.keys()) - assert results['NM_001194995.1:c.210del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001194995.1:c.210del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001194995.1:c.210del']['alt_genomic_loci'], []) - assert results['NM_001194995.1:c.210del']['gene_symbol'] == 'C12orf65' - assert results['NM_001194995.1:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181924.1:p.(Gly72AlafsTer13)', 'slr': 'NP_001181924.1:p.(G72Afs*13)'} + assert 'NM_001173976.1:c.162-1G>A' in list(results.keys()) + assert results['NM_001173976.1:c.162-1G>A']['submitted_variant'] == '12-111064166-G-A' + assert results['NM_001173976.1:c.162-1G>A']['gene_symbol'] == 'TCTN1' + assert results['NM_001173976.1:c.162-1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:26113', 'entrez_gene_id': '79600', 'ucsc_id': 'uc001trn.6', 'omim_id': ['609863']} + assert results['NM_001173976.1:c.162-1G>A']['hgvs_transcript_variant'] == 'NM_001173976.1:c.162-1G>A' + assert results['NM_001173976.1:c.162-1G>A']['genome_context_intronic_sequence'] == 'NC_000012.11(NM_001173976.1):c.162-1G>A' + assert results['NM_001173976.1:c.162-1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001173976.1:c.162-1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001173976.1:c.162-1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167447.1:p.?', 'slr': 'NP_001167447.1:p.?'} + assert results['NM_001173976.1:c.162-1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001173976.1:c.162-1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001173976.1:c.162-1G>A']['alt_genomic_loci'], []) + assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': 'chr12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': 'chr12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.111064166G>A', 'vcf': {'chr': '12', 'pos': '111064166', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001173976.1:c.162-1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.110626361G>A', 'vcf': {'chr': '12', 'pos': '110626361', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001173976.1:c.162-1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001173976.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167447.1'} + + def test_variant222(self): + variant = '12-123738430-CA-C' + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() + print(results) + + assert results['flag'] == 'gene_variant' + assert 'NM_001194995.1:c.210del' in list(results.keys()) assert results['NM_001194995.1:c.210del']['submitted_variant'] == '12-123738430-CA-C' - assert results['NM_001194995.1:c.210del']['genome_context_intronic_sequence'] == '' - assert results['NM_001194995.1:c.210del']['hgvs_lrg_variant'] == '' + assert results['NM_001194995.1:c.210del']['gene_symbol'] == 'C12orf65' + assert results['NM_001194995.1:c.210del']['gene_ids'] == {'hgnc_id': 'HGNC:26784', 'entrez_gene_id': '91574', 'ucsc_id': 'uc001uen.4', 'omim_id': ['613541']} assert results['NM_001194995.1:c.210del']['hgvs_transcript_variant'] == 'NM_001194995.1:c.210del' + assert results['NM_001194995.1:c.210del']['genome_context_intronic_sequence'] == '' + assert results['NM_001194995.1:c.210del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001194995.1:c.210del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} - assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} - assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} - assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} - assert results['NM_001194995.1:c.210del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181924.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194995.1'} + assert results['NM_001194995.1:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001181924.1:p.(Gly72AlafsTer13)', 'slr': 'NP_001181924.1:p.(G72Afs*13)'} + assert results['NM_001194995.1:c.210del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001194995.1:c.210del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001194995.1:c.210del']['alt_genomic_loci'], []) + assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': 'chr12', 'pos': '123738430', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': 'chr12', 'pos': '123253883', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': '12', 'pos': '123738430', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001194995.1:c.210del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': '12', 'pos': '123253883', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001194995.1:c.210del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001194995.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001181924.1'} - assert results['flag'] == 'gene_variant' assert 'NM_152269.4:c.210del' in list(results.keys()) - assert results['NM_152269.4:c.210del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_152269.4:c.210del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_152269.4:c.210del']['alt_genomic_loci'], []) - assert results['NM_152269.4:c.210del']['gene_symbol'] == 'C12orf65' - assert results['NM_152269.4:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_689482.1:p.(Gly72AlafsTer13)', 'slr': 'NP_689482.1:p.(G72Afs*13)'} assert results['NM_152269.4:c.210del']['submitted_variant'] == '12-123738430-CA-C' - assert results['NM_152269.4:c.210del']['genome_context_intronic_sequence'] == '' - assert results['NM_152269.4:c.210del']['hgvs_lrg_variant'] == '' + assert results['NM_152269.4:c.210del']['gene_symbol'] == 'C12orf65' + assert results['NM_152269.4:c.210del']['gene_ids'] == {'hgnc_id': 'HGNC:26784', 'entrez_gene_id': '91574', 'ucsc_id': 'uc001uen.4', 'omim_id': ['613541']} assert results['NM_152269.4:c.210del']['hgvs_transcript_variant'] == 'NM_152269.4:c.210del' + assert results['NM_152269.4:c.210del']['genome_context_intronic_sequence'] == '' + assert results['NM_152269.4:c.210del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_152269.4:c.210del']['hgvs_refseqgene_variant'] == 'NG_027517.1:g.25588del' - assert results['NM_152269.4:c.210del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} - assert results['NM_152269.4:c.210del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} - assert results['NM_152269.4:c.210del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} - assert results['NM_152269.4:c.210del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} - assert results['NM_152269.4:c.210del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_027517.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_689482.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_152269.4'} + assert results['NM_152269.4:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_689482.1:p.(Gly72AlafsTer13)', 'slr': 'NP_689482.1:p.(G72Afs*13)'} + assert results['NM_152269.4:c.210del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_152269.4:c.210del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_152269.4:c.210del']['alt_genomic_loci'], []) + assert results['NM_152269.4:c.210del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': 'chr12', 'pos': '123738430', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_152269.4:c.210del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': 'chr12', 'pos': '123253883', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_152269.4:c.210del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': '12', 'pos': '123738430', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_152269.4:c.210del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': '12', 'pos': '123253883', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_152269.4:c.210del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_152269.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_689482.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_027517.1'} assert 'NM_001143905.2:c.210del' in list(results.keys()) - assert results['NM_001143905.2:c.210del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001143905.2:c.210del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001143905.2:c.210del']['alt_genomic_loci'], []) - assert results['NM_001143905.2:c.210del']['gene_symbol'] == 'C12orf65' - assert results['NM_001143905.2:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001137377.1:p.(Gly72AlafsTer13)', 'slr': 'NP_001137377.1:p.(G72Afs*13)'} assert results['NM_001143905.2:c.210del']['submitted_variant'] == '12-123738430-CA-C' - assert results['NM_001143905.2:c.210del']['genome_context_intronic_sequence'] == '' - assert results['NM_001143905.2:c.210del']['hgvs_lrg_variant'] == '' + assert results['NM_001143905.2:c.210del']['gene_symbol'] == 'C12orf65' + assert results['NM_001143905.2:c.210del']['gene_ids'] == {'hgnc_id': 'HGNC:26784', 'entrez_gene_id': '91574', 'ucsc_id': 'uc001uen.4', 'omim_id': ['613541']} assert results['NM_001143905.2:c.210del']['hgvs_transcript_variant'] == 'NM_001143905.2:c.210del' + assert results['NM_001143905.2:c.210del']['genome_context_intronic_sequence'] == '' + assert results['NM_001143905.2:c.210del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001143905.2:c.210del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} - assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': 'chr12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} - assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123738430', 'alt': 'C'}} - assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': '12', 'ref': 'CA', 'pos': '123253883', 'alt': 'C'}} - assert results['NM_001143905.2:c.210del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001137377.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001143905.2'} - + assert results['NM_001143905.2:c.210del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001137377.1:p.(Gly72AlafsTer13)', 'slr': 'NP_001137377.1:p.(G72Afs*13)'} + assert results['NM_001143905.2:c.210del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001143905.2:c.210del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001143905.2:c.210del']['alt_genomic_loci'], []) + assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': 'chr12', 'pos': '123738430', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': 'chr12', 'pos': '123253883', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.123738431del', 'vcf': {'chr': '12', 'pos': '123738430', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001143905.2:c.210del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.123253884del', 'vcf': {'chr': '12', 'pos': '123253883', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_001143905.2:c.210del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001143905.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001137377.1'} def test_variant223(self): variant = '13-31789169-CT-C' @@ -8830,658 +9032,704 @@ def test_variant223(self): assert results['flag'] == 'gene_variant' assert 'NM_194318.3:c.71-5del' in list(results.keys()) - assert results['NM_194318.3:c.71-5del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_194318.3:c.71-5del']['refseqgene_context_intronic_sequence'] == 'NG_011732.1(NM_194318.3):c.71-5del' - self.assertCountEqual(results['NM_194318.3:c.71-5del']['alt_genomic_loci'], []) - assert results['NM_194318.3:c.71-5del']['gene_symbol'] == 'B3GLCT' - assert results['NM_194318.3:c.71-5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_919299.3:p.?', 'slr': 'NP_919299.3:p.?'} assert results['NM_194318.3:c.71-5del']['submitted_variant'] == '13-31789169-CT-C' - assert results['NM_194318.3:c.71-5del']['genome_context_intronic_sequence'] == 'NC_000013.10(NM_194318.3):c.71-5del' - assert results['NM_194318.3:c.71-5del']['hgvs_lrg_variant'] == '' + assert results['NM_194318.3:c.71-5del']['gene_symbol'] == 'B3GLCT' + assert results['NM_194318.3:c.71-5del']['gene_ids'] == {'hgnc_id': 'HGNC:20207', 'entrez_gene_id': '145173', 'ucsc_id': '', 'omim_id': ['610308']} assert results['NM_194318.3:c.71-5del']['hgvs_transcript_variant'] == 'NM_194318.3:c.71-5del' + assert results['NM_194318.3:c.71-5del']['genome_context_intronic_sequence'] == 'NC_000013.10(NM_194318.3):c.71-5del' + assert results['NM_194318.3:c.71-5del']['refseqgene_context_intronic_sequence'] == 'NG_011732.1(NM_194318.3):c.71-5del' assert results['NM_194318.3:c.71-5del']['hgvs_refseqgene_variant'] == 'NG_011732.1:g.20072del' - assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000013.10:g.31789183del', 'vcf': {'chr': 'chr13', 'ref': 'CT', 'pos': '31789169', 'alt': 'C'}} - assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000013.11:g.31215046del', 'vcf': {'chr': 'chr13', 'ref': 'CT', 'pos': '31215032', 'alt': 'C'}} - assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000013.10:g.31789183del', 'vcf': {'chr': '13', 'ref': 'CT', 'pos': '31789169', 'alt': 'C'}} - assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000013.11:g.31215046del', 'vcf': {'chr': '13', 'ref': 'CT', 'pos': '31215032', 'alt': 'C'}} - assert results['NM_194318.3:c.71-5del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011732.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_919299.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_194318.3'} - + assert results['NM_194318.3:c.71-5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_919299.3:p.?', 'slr': 'NP_919299.3:p.?'} + assert results['NM_194318.3:c.71-5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_194318.3:c.71-5del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_194318.3:c.71-5del']['alt_genomic_loci'], []) + assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000013.10:g.31789183del', 'vcf': {'chr': 'chr13', 'pos': '31789169', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000013.11:g.31215046del', 'vcf': {'chr': 'chr13', 'pos': '31215032', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000013.10:g.31789183del', 'vcf': {'chr': '13', 'pos': '31789169', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_194318.3:c.71-5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000013.11:g.31215046del', 'vcf': {'chr': '13', 'pos': '31215032', 'ref': 'CT', 'alt': 'C'}} + assert results['NM_194318.3:c.71-5del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_194318.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_919299.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011732.1'} def test_variant224(self): variant = '14-62187287-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NR_144368.1:n.214-3552C>T' in list(results.keys()) - assert results['NR_144368.1:n.214-3552C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_144368.1:n.214-3552C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_144368.1:n.214-3552C>T']['alt_genomic_loci'], []) - assert results['NR_144368.1:n.214-3552C>T']['gene_symbol'] == 'LOC105370526' - assert results['NR_144368.1:n.214-3552C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_144368.1:n.214-3552C>T']['submitted_variant'] == '14-62187287-G-A' - assert results['NR_144368.1:n.214-3552C>T']['genome_context_intronic_sequence'] == 'NC_000014.8(NR_144368.1):c.214-3552C>T' - assert results['NR_144368.1:n.214-3552C>T']['hgvs_lrg_variant'] == '' + assert results['NR_144368.1:n.214-3552C>T']['gene_symbol'] == 'LOC105370526' + assert results['NR_144368.1:n.214-3552C>T']['gene_ids'] == {} assert results['NR_144368.1:n.214-3552C>T']['hgvs_transcript_variant'] == 'NR_144368.1:n.214-3552C>T' + assert results['NR_144368.1:n.214-3552C>T']['genome_context_intronic_sequence'] == 'NC_000014.8(NR_144368.1):c.214-3552C>T' + assert results['NR_144368.1:n.214-3552C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_144368.1:n.214-3552C>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} - assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} - assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} - assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} + assert results['NR_144368.1:n.214-3552C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_144368.1:n.214-3552C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_144368.1:n.214-3552C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_144368.1:n.214-3552C>T']['alt_genomic_loci'], []) + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'pos': '62187287', 'ref': 'G', 'alt': 'A'}} + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'pos': '61720569', 'ref': 'G', 'alt': 'A'}} + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'pos': '62187287', 'ref': 'G', 'alt': 'A'}} + assert results['NR_144368.1:n.214-3552C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'pos': '61720569', 'ref': 'G', 'alt': 'A'}} assert results['NR_144368.1:n.214-3552C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_144368.1'} assert 'NM_181054.2:c.223G>A' in list(results.keys()) - assert results['NM_181054.2:c.223G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_181054.2:c.223G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_181054.2:c.223G>A']['alt_genomic_loci'], []) - assert results['NM_181054.2:c.223G>A']['gene_symbol'] == 'HIF1A' - assert results['NM_181054.2:c.223G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_851397.1:p.(Ala75Thr)', 'slr': 'NP_851397.1:p.(A75T)'} assert results['NM_181054.2:c.223G>A']['submitted_variant'] == '14-62187287-G-A' - assert results['NM_181054.2:c.223G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_181054.2:c.223G>A']['hgvs_lrg_variant'] == '' + assert results['NM_181054.2:c.223G>A']['gene_symbol'] == 'HIF1A' + assert results['NM_181054.2:c.223G>A']['gene_ids'] == {'hgnc_id': 'HGNC:4910', 'entrez_gene_id': '3091', 'ucsc_id': 'uc001xfq.3', 'omim_id': ['603348']} assert results['NM_181054.2:c.223G>A']['hgvs_transcript_variant'] == 'NM_181054.2:c.223G>A' + assert results['NM_181054.2:c.223G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_181054.2:c.223G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181054.2:c.223G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} - assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} - assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} - assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} - assert results['NM_181054.2:c.223G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_851397.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181054.2'} + assert results['NM_181054.2:c.223G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_851397.1:p.(Ala75Thr)', 'slr': 'NP_851397.1:p.(A75T)'} + assert results['NM_181054.2:c.223G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181054.2:c.223G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_181054.2:c.223G>A']['alt_genomic_loci'], []) + assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'pos': '62187287', 'ref': 'G', 'alt': 'A'}} + assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'pos': '61720569', 'ref': 'G', 'alt': 'A'}} + assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'pos': '62187287', 'ref': 'G', 'alt': 'A'}} + assert results['NM_181054.2:c.223G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'pos': '61720569', 'ref': 'G', 'alt': 'A'}} + assert results['NM_181054.2:c.223G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181054.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_851397.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001243084.1:c.295G>A' in list(results.keys()) - assert results['NM_001243084.1:c.295G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001243084.1:c.295G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001243084.1:c.295G>A']['alt_genomic_loci'], []) - assert results['NM_001243084.1:c.295G>A']['gene_symbol'] == 'HIF1A' - assert results['NM_001243084.1:c.295G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230013.1:p.(Ala99Thr)', 'slr': 'NP_001230013.1:p.(A99T)'} assert results['NM_001243084.1:c.295G>A']['submitted_variant'] == '14-62187287-G-A' - assert results['NM_001243084.1:c.295G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001243084.1:c.295G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001243084.1:c.295G>A']['gene_symbol'] == 'HIF1A' + assert results['NM_001243084.1:c.295G>A']['gene_ids'] == {'hgnc_id': 'HGNC:4910', 'entrez_gene_id': '3091', 'ucsc_id': 'uc001xfq.3', 'omim_id': ['603348']} assert results['NM_001243084.1:c.295G>A']['hgvs_transcript_variant'] == 'NM_001243084.1:c.295G>A' + assert results['NM_001243084.1:c.295G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001243084.1:c.295G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001243084.1:c.295G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} - assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} - assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} - assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} - assert results['NM_001243084.1:c.295G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1'} + assert results['NM_001243084.1:c.295G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230013.1:p.(Ala99Thr)', 'slr': 'NP_001230013.1:p.(A99T)'} + assert results['NM_001243084.1:c.295G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001243084.1:c.295G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001243084.1:c.295G>A']['alt_genomic_loci'], []) + assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'pos': '62187287', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'pos': '61720569', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'pos': '62187287', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001243084.1:c.295G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'pos': '61720569', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001243084.1:c.295G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1'} assert 'NM_001530.3:c.223G>A' in list(results.keys()) - assert results['NM_001530.3:c.223G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001530.3:c.223G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001530.3:c.223G>A']['alt_genomic_loci'], []) - assert results['NM_001530.3:c.223G>A']['gene_symbol'] == 'HIF1A' - assert results['NM_001530.3:c.223G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001521.1:p.(Ala75Thr)', 'slr': 'NP_001521.1:p.(A75T)'} assert results['NM_001530.3:c.223G>A']['submitted_variant'] == '14-62187287-G-A' - assert results['NM_001530.3:c.223G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001530.3:c.223G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001530.3:c.223G>A']['gene_symbol'] == 'HIF1A' + assert results['NM_001530.3:c.223G>A']['gene_ids'] == {'hgnc_id': 'HGNC:4910', 'entrez_gene_id': '3091', 'ucsc_id': 'uc001xfq.3', 'omim_id': ['603348']} assert results['NM_001530.3:c.223G>A']['hgvs_transcript_variant'] == 'NM_001530.3:c.223G>A' + assert results['NM_001530.3:c.223G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001530.3:c.223G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001530.3:c.223G>A']['hgvs_refseqgene_variant'] == 'NG_029606.1:g.30169G>A' - assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} - assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} - assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '62187287', 'alt': 'A'}} - assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'ref': 'G', 'pos': '61720569', 'alt': 'A'}} - assert results['NM_001530.3:c.223G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029606.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001521.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001530.3'} - + assert results['NM_001530.3:c.223G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001521.1:p.(Ala75Thr)', 'slr': 'NP_001521.1:p.(A75T)'} + assert results['NM_001530.3:c.223G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001530.3:c.223G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001530.3:c.223G>A']['alt_genomic_loci'], []) + assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': 'chr14', 'pos': '62187287', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': 'chr14', 'pos': '61720569', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62187287G>A', 'vcf': {'chr': '14', 'pos': '62187287', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001530.3:c.223G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61720569G>A', 'vcf': {'chr': '14', 'pos': '61720569', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001530.3:c.223G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001530.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001521.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029606.1'} def test_variant225(self): variant = '14-62188231-TT-GA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NR_144368.1:n.214-4497_214-4496delinsTC' in list(results.keys()) - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_144368.1:n.214-4497_214-4496delinsTC']['alt_genomic_loci'], []) - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['gene_symbol'] == 'LOC105370526' - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['submitted_variant'] == '14-62188231-TT-GA' - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['genome_context_intronic_sequence'] == 'NC_000014.8(NR_144368.1):c.214-4497_214-4496delinsTC' - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_lrg_variant'] == '' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['gene_symbol'] == 'LOC105370526' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['gene_ids'] == {} assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_transcript_variant'] == 'NR_144368.1:n.214-4497_214-4496delinsTC' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['genome_context_intronic_sequence'] == 'NC_000014.8(NR_144368.1):c.214-4497_214-4496delinsTC' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['refseqgene_context_intronic_sequence'] == '' assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_refseqgene_variant'] == '' - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} - assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_144368.1:n.214-4497_214-4496delinsTC']['alt_genomic_loci'], []) + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'pos': '62188231', 'ref': 'TT', 'alt': 'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'pos': '61721513', 'ref': 'TT', 'alt': 'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'pos': '62188231', 'ref': 'TT', 'alt': 'GA'}} + assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'pos': '61721513', 'ref': 'TT', 'alt': 'GA'}} assert results['NR_144368.1:n.214-4497_214-4496delinsTC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_144368.1'} - assert 'NM_001530.3:c.231_232delinsGA' in list(results.keys()) - assert results['NM_001530.3:c.231_232delinsGA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001530.3:c.231_232delinsGA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001530.3:c.231_232delinsGA']['alt_genomic_loci'], []) - assert results['NM_001530.3:c.231_232delinsGA']['gene_symbol'] == 'HIF1A' - assert results['NM_001530.3:c.231_232delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001521.1:p.(Asp77_Leu78delinsGluMet)', 'slr': 'NP_001521.1:p.(D77_L78delinsEM)'} - assert results['NM_001530.3:c.231_232delinsGA']['submitted_variant'] == '14-62188231-TT-GA' - assert results['NM_001530.3:c.231_232delinsGA']['genome_context_intronic_sequence'] == '' - assert results['NM_001530.3:c.231_232delinsGA']['hgvs_lrg_variant'] == '' - assert results['NM_001530.3:c.231_232delinsGA']['hgvs_transcript_variant'] == 'NM_001530.3:c.231_232delinsGA' - assert results['NM_001530.3:c.231_232delinsGA']['hgvs_refseqgene_variant'] == 'NG_029606.1:g.31113_31114delinsGA' - assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} - assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} - assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} - assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} - assert results['NM_001530.3:c.231_232delinsGA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029606.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001521.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001530.3'} + assert 'NM_181054.2:c.231_232delinsGA' in list(results.keys()) + assert results['NM_181054.2:c.231_232delinsGA']['submitted_variant'] == '14-62188231-TT-GA' + assert results['NM_181054.2:c.231_232delinsGA']['gene_symbol'] == 'HIF1A' + assert results['NM_181054.2:c.231_232delinsGA']['gene_ids'] == {'hgnc_id': 'HGNC:4910', 'entrez_gene_id': '3091', 'ucsc_id': 'uc001xfq.3', 'omim_id': ['603348']} + assert results['NM_181054.2:c.231_232delinsGA']['hgvs_transcript_variant'] == 'NM_181054.2:c.231_232delinsGA' + assert results['NM_181054.2:c.231_232delinsGA']['genome_context_intronic_sequence'] == '' + assert results['NM_181054.2:c.231_232delinsGA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181054.2:c.231_232delinsGA']['hgvs_refseqgene_variant'] == '' + assert results['NM_181054.2:c.231_232delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_851397.1:p.(Asp77_Leu78delinsGluMet)', 'slr': 'NP_851397.1:p.(D77_L78delinsEM)'} + assert results['NM_181054.2:c.231_232delinsGA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181054.2:c.231_232delinsGA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_181054.2:c.231_232delinsGA']['alt_genomic_loci'], []) + assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'pos': '62188231', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'pos': '61721513', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'pos': '62188231', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'pos': '61721513', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_181054.2:c.231_232delinsGA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181054.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_851397.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001243084.1:c.303_304delinsGA' in list(results.keys()) - assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001243084.1:c.303_304delinsGA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001243084.1:c.303_304delinsGA']['alt_genomic_loci'], []) - assert results['NM_001243084.1:c.303_304delinsGA']['gene_symbol'] == 'HIF1A' - assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230013.1:p.(Asp101_Leu102delinsGluMet)', 'slr': 'NP_001230013.1:p.(D101_L102delinsEM)'} assert results['NM_001243084.1:c.303_304delinsGA']['submitted_variant'] == '14-62188231-TT-GA' - assert results['NM_001243084.1:c.303_304delinsGA']['genome_context_intronic_sequence'] == '' - assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_lrg_variant'] == '' + assert results['NM_001243084.1:c.303_304delinsGA']['gene_symbol'] == 'HIF1A' + assert results['NM_001243084.1:c.303_304delinsGA']['gene_ids'] == {'hgnc_id': 'HGNC:4910', 'entrez_gene_id': '3091', 'ucsc_id': 'uc001xfq.3', 'omim_id': ['603348']} assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_transcript_variant'] == 'NM_001243084.1:c.303_304delinsGA' + assert results['NM_001243084.1:c.303_304delinsGA']['genome_context_intronic_sequence'] == '' + assert results['NM_001243084.1:c.303_304delinsGA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_refseqgene_variant'] == '' - assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} - assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} - assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} - assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} - assert results['NM_001243084.1:c.303_304delinsGA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1'} - - assert 'NM_181054.2:c.231_232delinsGA' in list(results.keys()) - assert results['NM_181054.2:c.231_232delinsGA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_181054.2:c.231_232delinsGA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_181054.2:c.231_232delinsGA']['alt_genomic_loci'], []) - assert results['NM_181054.2:c.231_232delinsGA']['gene_symbol'] == 'HIF1A' - assert results['NM_181054.2:c.231_232delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_851397.1:p.(Asp77_Leu78delinsGluMet)', 'slr': 'NP_851397.1:p.(D77_L78delinsEM)'} - assert results['NM_181054.2:c.231_232delinsGA']['submitted_variant'] == '14-62188231-TT-GA' - assert results['NM_181054.2:c.231_232delinsGA']['genome_context_intronic_sequence'] == '' - assert results['NM_181054.2:c.231_232delinsGA']['hgvs_lrg_variant'] == '' - assert results['NM_181054.2:c.231_232delinsGA']['hgvs_transcript_variant'] == 'NM_181054.2:c.231_232delinsGA' - assert results['NM_181054.2:c.231_232delinsGA']['hgvs_refseqgene_variant'] == '' - assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} - assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} - assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '62188231', 'alt': 'GA'}} - assert results['NM_181054.2:c.231_232delinsGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'ref': 'TT', 'pos': '61721513', 'alt': 'GA'}} - assert results['NM_181054.2:c.231_232delinsGA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_851397.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181054.2'} + assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001230013.1:p.(Asp101_Leu102delinsGluMet)', 'slr': 'NP_001230013.1:p.(D101_L102delinsEM)'} + assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001243084.1:c.303_304delinsGA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001243084.1:c.303_304delinsGA']['alt_genomic_loci'], []) + assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'pos': '62188231', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'pos': '61721513', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'pos': '62188231', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_001243084.1:c.303_304delinsGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'pos': '61721513', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_001243084.1:c.303_304delinsGA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001243084.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001230013.1'} + assert 'NM_001530.3:c.231_232delinsGA' in list(results.keys()) + assert results['NM_001530.3:c.231_232delinsGA']['submitted_variant'] == '14-62188231-TT-GA' + assert results['NM_001530.3:c.231_232delinsGA']['gene_symbol'] == 'HIF1A' + assert results['NM_001530.3:c.231_232delinsGA']['gene_ids'] == {'hgnc_id': 'HGNC:4910', 'entrez_gene_id': '3091', 'ucsc_id': 'uc001xfq.3', 'omim_id': ['603348']} + assert results['NM_001530.3:c.231_232delinsGA']['hgvs_transcript_variant'] == 'NM_001530.3:c.231_232delinsGA' + assert results['NM_001530.3:c.231_232delinsGA']['genome_context_intronic_sequence'] == '' + assert results['NM_001530.3:c.231_232delinsGA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001530.3:c.231_232delinsGA']['hgvs_refseqgene_variant'] == 'NG_029606.1:g.31113_31114delinsGA' + assert results['NM_001530.3:c.231_232delinsGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001521.1:p.(Asp77_Leu78delinsGluMet)', 'slr': 'NP_001521.1:p.(D77_L78delinsEM)'} + assert results['NM_001530.3:c.231_232delinsGA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001530.3:c.231_232delinsGA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001530.3:c.231_232delinsGA']['alt_genomic_loci'], []) + assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': 'chr14', 'pos': '62188231', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': 'chr14', 'pos': '61721513', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.62188231_62188232delinsGA', 'vcf': {'chr': '14', 'pos': '62188231', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_001530.3:c.231_232delinsGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.61721513_61721514delinsGA', 'vcf': {'chr': '14', 'pos': '61721513', 'ref': 'TT', 'alt': 'GA'}} + assert results['NM_001530.3:c.231_232delinsGA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001530.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001521.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029606.1'} def test_variant226(self): variant = '14-63174827-C-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_139318.3:c.2366G>T' in list(results.keys()) - assert results['NM_139318.3:c.2366G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_139318.3:c.2366G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_139318.3:c.2366G>T']['alt_genomic_loci'], []) - assert results['NM_139318.3:c.2366G>T']['gene_symbol'] == 'KCNH5' - assert results['NM_139318.3:c.2366G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_647479.2:p.(Gly789Val)', 'slr': 'NP_647479.2:p.(G789V)'} - assert results['NM_139318.3:c.2366G>T']['submitted_variant'] == '14-63174827-C-A' - assert results['NM_139318.3:c.2366G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_139318.3:c.2366G>T']['hgvs_lrg_variant'] == '' - assert results['NM_139318.3:c.2366G>T']['hgvs_transcript_variant'] == 'NM_139318.3:c.2366G>T' - assert results['NM_139318.3:c.2366G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_139318.3:c.2366G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} - assert 'hg38' not in list(results['NM_139318.3:c.2366G>T']['primary_assembly_loci'].keys()) - assert results['NM_139318.3:c.2366G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} - assert 'grch38' not in list(results['NM_139318.3:c.2366G>T']['primary_assembly_loci'].keys()) - assert results['NM_139318.3:c.2366G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.3'} - + assert results['flag'] == 'gene_variant' assert 'NM_172375.1:c.*333G>T' in list(results.keys()) - assert results['NM_172375.1:c.*333G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_172375.1:c.*333G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_172375.1:c.*333G>T']['alt_genomic_loci'], []) - assert results['NM_172375.1:c.*333G>T']['gene_symbol'] == 'KCNH5' - assert results['NM_172375.1:c.*333G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_758963.1:p.?', 'slr': 'NP_758963.1:p.?'} assert results['NM_172375.1:c.*333G>T']['submitted_variant'] == '14-63174827-C-A' - assert results['NM_172375.1:c.*333G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_172375.1:c.*333G>T']['hgvs_lrg_variant'] == '' + assert results['NM_172375.1:c.*333G>T']['gene_symbol'] == 'KCNH5' + assert results['NM_172375.1:c.*333G>T']['gene_ids'] == {'hgnc_id': 'HGNC:6254', 'entrez_gene_id': '27133', 'ucsc_id': 'uc001xfx.5', 'omim_id': ['605716']} assert results['NM_172375.1:c.*333G>T']['hgvs_transcript_variant'] == 'NM_172375.1:c.*333G>T' + assert results['NM_172375.1:c.*333G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_172375.1:c.*333G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_172375.1:c.*333G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_172375.1:c.*333G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} + assert results['NM_172375.1:c.*333G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_758963.1:p.?', 'slr': 'NP_758963.1:p.?'} + assert results['NM_172375.1:c.*333G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_172375.1:c.*333G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_172375.1:c.*333G>T']['alt_genomic_loci'], []) + assert results['NM_172375.1:c.*333G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'pos': '63174827', 'ref': 'C', 'alt': 'A'}} assert 'hg38' not in list(results['NM_172375.1:c.*333G>T']['primary_assembly_loci'].keys()) - assert results['NM_172375.1:c.*333G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} + assert results['NM_172375.1:c.*333G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'pos': '63174827', 'ref': 'C', 'alt': 'A'}} assert 'grch38' not in list(results['NM_172375.1:c.*333G>T']['primary_assembly_loci'].keys()) - assert results['NM_172375.1:c.*333G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.1'} + assert results['NM_172375.1:c.*333G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1'} assert 'NM_172375.2:c.*333G>T' in list(results.keys()) - assert results['NM_172375.2:c.*333G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_172375.2:c.*333G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_172375.2:c.*333G>T']['alt_genomic_loci'], []) - assert results['NM_172375.2:c.*333G>T']['gene_symbol'] == 'KCNH5' - assert results['NM_172375.2:c.*333G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_758963.1:p.?', 'slr': 'NP_758963.1:p.?'} assert results['NM_172375.2:c.*333G>T']['submitted_variant'] == '14-63174827-C-A' - assert results['NM_172375.2:c.*333G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_172375.2:c.*333G>T']['hgvs_lrg_variant'] == '' + assert results['NM_172375.2:c.*333G>T']['gene_symbol'] == 'KCNH5' + assert results['NM_172375.2:c.*333G>T']['gene_ids'] == {'hgnc_id': 'HGNC:6254', 'entrez_gene_id': '27133', 'ucsc_id': 'uc001xfx.5', 'omim_id': ['605716']} assert results['NM_172375.2:c.*333G>T']['hgvs_transcript_variant'] == 'NM_172375.2:c.*333G>T' + assert results['NM_172375.2:c.*333G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_172375.2:c.*333G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_172375.2:c.*333G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} - assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '62708109', 'alt': 'A'}} - assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} - assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '62708109', 'alt': 'A'}} - assert results['NM_172375.2:c.*333G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.2'} + assert results['NM_172375.2:c.*333G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_758963.1:p.?', 'slr': 'NP_758963.1:p.?'} + assert results['NM_172375.2:c.*333G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_172375.2:c.*333G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_172375.2:c.*333G>T']['alt_genomic_loci'], []) + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'pos': '63174827', 'ref': 'C', 'alt': 'A'}} + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': 'chr14', 'pos': '62708109', 'ref': 'C', 'alt': 'A'}} + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'pos': '63174827', 'ref': 'C', 'alt': 'A'}} + assert results['NM_172375.2:c.*333G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': '14', 'pos': '62708109', 'ref': 'C', 'alt': 'A'}} + assert results['NM_172375.2:c.*333G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_172375.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_758963.1'} - assert results['flag'] == 'gene_variant' assert 'NM_139318.4:c.2366G>T' in list(results.keys()) - assert results['NM_139318.4:c.2366G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_139318.4:c.2366G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_139318.4:c.2366G>T']['alt_genomic_loci'], []) - assert results['NM_139318.4:c.2366G>T']['gene_symbol'] == 'KCNH5' - assert results['NM_139318.4:c.2366G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_647479.2:p.(Gly789Val)', 'slr': 'NP_647479.2:p.(G789V)'} assert results['NM_139318.4:c.2366G>T']['submitted_variant'] == '14-63174827-C-A' - assert results['NM_139318.4:c.2366G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_139318.4:c.2366G>T']['hgvs_lrg_variant'] == '' + assert results['NM_139318.4:c.2366G>T']['gene_symbol'] == 'KCNH5' + assert results['NM_139318.4:c.2366G>T']['gene_ids'] == {'hgnc_id': 'HGNC:6254', 'entrez_gene_id': '27133', 'ucsc_id': 'uc001xfx.5', 'omim_id': ['605716']} assert results['NM_139318.4:c.2366G>T']['hgvs_transcript_variant'] == 'NM_139318.4:c.2366G>T' + assert results['NM_139318.4:c.2366G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_139318.4:c.2366G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_139318.4:c.2366G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} - assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': 'chr14', 'ref': 'C', 'pos': '62708109', 'alt': 'A'}} - assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '63174827', 'alt': 'A'}} - assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': '14', 'ref': 'C', 'pos': '62708109', 'alt': 'A'}} - assert results['NM_139318.4:c.2366G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.4'} + assert results['NM_139318.4:c.2366G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_647479.2:p.(Gly789Val)', 'slr': 'NP_647479.2:p.(G789V)'} + assert results['NM_139318.4:c.2366G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_139318.4:c.2366G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_139318.4:c.2366G>T']['alt_genomic_loci'], []) + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'pos': '63174827', 'ref': 'C', 'alt': 'A'}} + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': 'chr14', 'pos': '62708109', 'ref': 'C', 'alt': 'A'}} + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'pos': '63174827', 'ref': 'C', 'alt': 'A'}} + assert results['NM_139318.4:c.2366G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000014.9:g.62708109C>A', 'vcf': {'chr': '14', 'pos': '62708109', 'ref': 'C', 'alt': 'A'}} + assert results['NM_139318.4:c.2366G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2'} + assert 'NM_139318.3:c.2366G>T' in list(results.keys()) + assert results['NM_139318.3:c.2366G>T']['submitted_variant'] == '14-63174827-C-A' + assert results['NM_139318.3:c.2366G>T']['gene_symbol'] == 'KCNH5' + assert results['NM_139318.3:c.2366G>T']['gene_ids'] == {'hgnc_id': 'HGNC:6254', 'entrez_gene_id': '27133', 'ucsc_id': 'uc001xfx.5', 'omim_id': ['605716']} + assert results['NM_139318.3:c.2366G>T']['hgvs_transcript_variant'] == 'NM_139318.3:c.2366G>T' + assert results['NM_139318.3:c.2366G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_139318.3:c.2366G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_139318.3:c.2366G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_139318.3:c.2366G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_647479.2:p.(Gly789Val)', 'slr': 'NP_647479.2:p.(G789V)'} + assert results['NM_139318.3:c.2366G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_139318.3:c.2366G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_139318.3:c.2366G>T']['alt_genomic_loci'], []) + assert results['NM_139318.3:c.2366G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': 'chr14', 'pos': '63174827', 'ref': 'C', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_139318.3:c.2366G>T']['primary_assembly_loci'].keys()) + assert results['NM_139318.3:c.2366G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000014.8:g.63174827C>A', 'vcf': {'chr': '14', 'pos': '63174827', 'ref': 'C', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_139318.3:c.2366G>T']['primary_assembly_loci'].keys()) + assert results['NM_139318.3:c.2366G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_139318.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_647479.2'} def test_variant227(self): variant = '15-42680000-CA-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000070.2:c.550del' in list(results.keys()) - assert results['NM_000070.2:c.550del']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.550del' - assert results['NM_000070.2:c.550del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000070.2:c.550del']['alt_genomic_loci'], []) - assert results['NM_000070.2:c.550del']['gene_symbol'] == 'CAPN3' - assert results['NM_000070.2:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Thr184ArgfsTer36)', 'slr': 'NP_000061.1:p.(T184Rfs*36)'} assert results['NM_000070.2:c.550del']['submitted_variant'] == '15-42680000-CA-C' - assert results['NM_000070.2:c.550del']['genome_context_intronic_sequence'] == '' - assert results['NM_000070.2:c.550del']['hgvs_lrg_variant'] == 'LRG_849:g.44702del' + assert results['NM_000070.2:c.550del']['gene_symbol'] == 'CAPN3' + assert results['NM_000070.2:c.550del']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_000070.2:c.550del']['hgvs_transcript_variant'] == 'NM_000070.2:c.550del' + assert results['NM_000070.2:c.550del']['genome_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.550del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000070.2:c.550del']['hgvs_refseqgene_variant'] == 'NG_008660.1:g.44702del' - assert results['NM_000070.2:c.550del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} - assert results['NM_000070.2:c.550del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} - assert results['NM_000070.2:c.550del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} - assert results['NM_000070.2:c.550del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} - assert results['NM_000070.2:c.550del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} + assert results['NM_000070.2:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Thr184ArgfsTer36)', 'slr': 'NP_000061.1:p.(T184Rfs*36)'} + assert results['NM_000070.2:c.550del']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.550del' + assert results['NM_000070.2:c.550del']['hgvs_lrg_variant'] == 'LRG_849:g.44702del' + self.assertCountEqual(results['NM_000070.2:c.550del']['alt_genomic_loci'], []) + assert results['NM_000070.2:c.550del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': 'chr15', 'pos': '42680000', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_000070.2:c.550del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': 'chr15', 'pos': '42387802', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_000070.2:c.550del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': '15', 'pos': '42680000', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_000070.2:c.550del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': '15', 'pos': '42387802', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_000070.2:c.550del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_024344.1:c.550del' in list(results.keys()) - assert results['NM_024344.1:c.550del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_024344.1:c.550del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_024344.1:c.550del']['alt_genomic_loci'], []) - assert results['NM_024344.1:c.550del']['gene_symbol'] == 'CAPN3' - assert results['NM_024344.1:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Thr184ArgfsTer36)', 'slr': 'NP_077320.1:p.(T184Rfs*36)'} assert results['NM_024344.1:c.550del']['submitted_variant'] == '15-42680000-CA-C' - assert results['NM_024344.1:c.550del']['genome_context_intronic_sequence'] == '' - assert results['NM_024344.1:c.550del']['hgvs_lrg_variant'] == '' + assert results['NM_024344.1:c.550del']['gene_symbol'] == 'CAPN3' + assert results['NM_024344.1:c.550del']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_024344.1:c.550del']['hgvs_transcript_variant'] == 'NM_024344.1:c.550del' + assert results['NM_024344.1:c.550del']['genome_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.550del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024344.1:c.550del']['hgvs_refseqgene_variant'] == '' - assert results['NM_024344.1:c.550del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} - assert results['NM_024344.1:c.550del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} - assert results['NM_024344.1:c.550del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} - assert results['NM_024344.1:c.550del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} - assert results['NM_024344.1:c.550del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1'} + assert results['NM_024344.1:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Thr184ArgfsTer36)', 'slr': 'NP_077320.1:p.(T184Rfs*36)'} + assert results['NM_024344.1:c.550del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024344.1:c.550del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_024344.1:c.550del']['alt_genomic_loci'], []) + assert results['NM_024344.1:c.550del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': 'chr15', 'pos': '42680000', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_024344.1:c.550del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': 'chr15', 'pos': '42387802', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_024344.1:c.550del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': '15', 'pos': '42680000', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_024344.1:c.550del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': '15', 'pos': '42387802', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_024344.1:c.550del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1'} assert 'NM_173087.1:c.550del' in list(results.keys()) - assert results['NM_173087.1:c.550del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_173087.1:c.550del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_173087.1:c.550del']['alt_genomic_loci'], []) - assert results['NM_173087.1:c.550del']['gene_symbol'] == 'CAPN3' - assert results['NM_173087.1:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Thr184ArgfsTer36)', 'slr': 'NP_775110.1:p.(T184Rfs*36)'} assert results['NM_173087.1:c.550del']['submitted_variant'] == '15-42680000-CA-C' - assert results['NM_173087.1:c.550del']['genome_context_intronic_sequence'] == '' - assert results['NM_173087.1:c.550del']['hgvs_lrg_variant'] == '' + assert results['NM_173087.1:c.550del']['gene_symbol'] == 'CAPN3' + assert results['NM_173087.1:c.550del']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_173087.1:c.550del']['hgvs_transcript_variant'] == 'NM_173087.1:c.550del' + assert results['NM_173087.1:c.550del']['genome_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.550del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173087.1:c.550del']['hgvs_refseqgene_variant'] == '' - assert results['NM_173087.1:c.550del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} - assert results['NM_173087.1:c.550del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': 'chr15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} - assert results['NM_173087.1:c.550del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42680000', 'alt': 'C'}} - assert results['NM_173087.1:c.550del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': '15', 'ref': 'CA', 'pos': '42387802', 'alt': 'C'}} - assert results['NM_173087.1:c.550del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1'} - + assert results['NM_173087.1:c.550del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Thr184ArgfsTer36)', 'slr': 'NP_775110.1:p.(T184Rfs*36)'} + assert results['NM_173087.1:c.550del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173087.1:c.550del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_173087.1:c.550del']['alt_genomic_loci'], []) + assert results['NM_173087.1:c.550del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': 'chr15', 'pos': '42680000', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_173087.1:c.550del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': 'chr15', 'pos': '42387802', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_173087.1:c.550del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002del', 'vcf': {'chr': '15', 'pos': '42680000', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_173087.1:c.550del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804del', 'vcf': {'chr': '15', 'pos': '42387802', 'ref': 'CA', 'alt': 'C'}} + assert results['NM_173087.1:c.550del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1'} def test_variant228(self): variant = '15-42680000-CA-CAA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_000070.2:c.550dup' in list(results.keys()) + assert results['NM_000070.2:c.550dup']['submitted_variant'] == '15-42680000-CA-CAA' + assert results['NM_000070.2:c.550dup']['gene_symbol'] == 'CAPN3' + assert results['NM_000070.2:c.550dup']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} + assert results['NM_000070.2:c.550dup']['hgvs_transcript_variant'] == 'NM_000070.2:c.550dup' + assert results['NM_000070.2:c.550dup']['genome_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.550dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.550dup']['hgvs_refseqgene_variant'] == 'NG_008660.1:g.44702dup' + assert results['NM_000070.2:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Thr184AsnfsTer16)', 'slr': 'NP_000061.1:p.(T184Nfs*16)'} + assert results['NM_000070.2:c.550dup']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.550dup' + assert results['NM_000070.2:c.550dup']['hgvs_lrg_variant'] == 'LRG_849:g.44702dup' + self.assertCountEqual(results['NM_000070.2:c.550dup']['alt_genomic_loci'], []) + assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': 'chr15', 'pos': '42680000', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': 'chr15', 'pos': '42387802', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': '15', 'pos': '42680000', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': '15', 'pos': '42387802', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_000070.2:c.550dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} + assert 'NM_024344.1:c.550dup' in list(results.keys()) - assert results['NM_024344.1:c.550dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_024344.1:c.550dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_024344.1:c.550dup']['alt_genomic_loci'], []) - assert results['NM_024344.1:c.550dup']['gene_symbol'] == 'CAPN3' - assert results['NM_024344.1:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Thr184AsnfsTer16)', 'slr': 'NP_077320.1:p.(T184Nfs*16)'} assert results['NM_024344.1:c.550dup']['submitted_variant'] == '15-42680000-CA-CAA' - assert results['NM_024344.1:c.550dup']['genome_context_intronic_sequence'] == '' - assert results['NM_024344.1:c.550dup']['hgvs_lrg_variant'] == '' + assert results['NM_024344.1:c.550dup']['gene_symbol'] == 'CAPN3' + assert results['NM_024344.1:c.550dup']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_024344.1:c.550dup']['hgvs_transcript_variant'] == 'NM_024344.1:c.550dup' + assert results['NM_024344.1:c.550dup']['genome_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.550dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024344.1:c.550dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} - assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} - assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} - assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} - assert results['NM_024344.1:c.550dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1'} + assert results['NM_024344.1:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Thr184AsnfsTer16)', 'slr': 'NP_077320.1:p.(T184Nfs*16)'} + assert results['NM_024344.1:c.550dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024344.1:c.550dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_024344.1:c.550dup']['alt_genomic_loci'], []) + assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': 'chr15', 'pos': '42680000', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': 'chr15', 'pos': '42387802', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': '15', 'pos': '42680000', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_024344.1:c.550dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': '15', 'pos': '42387802', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_024344.1:c.550dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1'} assert 'NM_173087.1:c.550dup' in list(results.keys()) - assert results['NM_173087.1:c.550dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_173087.1:c.550dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_173087.1:c.550dup']['alt_genomic_loci'], []) - assert results['NM_173087.1:c.550dup']['gene_symbol'] == 'CAPN3' - assert results['NM_173087.1:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Thr184AsnfsTer16)', 'slr': 'NP_775110.1:p.(T184Nfs*16)'} assert results['NM_173087.1:c.550dup']['submitted_variant'] == '15-42680000-CA-CAA' - assert results['NM_173087.1:c.550dup']['genome_context_intronic_sequence'] == '' - assert results['NM_173087.1:c.550dup']['hgvs_lrg_variant'] == '' + assert results['NM_173087.1:c.550dup']['gene_symbol'] == 'CAPN3' + assert results['NM_173087.1:c.550dup']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_173087.1:c.550dup']['hgvs_transcript_variant'] == 'NM_173087.1:c.550dup' + assert results['NM_173087.1:c.550dup']['genome_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.550dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173087.1:c.550dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} - assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} - assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} - assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} - assert results['NM_173087.1:c.550dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1'} - - assert results['flag'] == 'gene_variant' - assert 'NM_000070.2:c.550dup' in list(results.keys()) - assert results['NM_000070.2:c.550dup']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.550dup' - assert results['NM_000070.2:c.550dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000070.2:c.550dup']['alt_genomic_loci'], []) - assert results['NM_000070.2:c.550dup']['gene_symbol'] == 'CAPN3' - assert results['NM_000070.2:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Thr184AsnfsTer16)', 'slr': 'NP_000061.1:p.(T184Nfs*16)'} - assert results['NM_000070.2:c.550dup']['submitted_variant'] == '15-42680000-CA-CAA' - assert results['NM_000070.2:c.550dup']['genome_context_intronic_sequence'] == '' - assert results['NM_000070.2:c.550dup']['hgvs_lrg_variant'] == 'LRG_849:g.44702dup' - assert results['NM_000070.2:c.550dup']['hgvs_transcript_variant'] == 'NM_000070.2:c.550dup' - assert results['NM_000070.2:c.550dup']['hgvs_refseqgene_variant'] == 'NG_008660.1:g.44702dup' - assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} - assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} - assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42680000', 'alt': 'CA'}} - assert results['NM_000070.2:c.550dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '42387802', 'alt': 'CA'}} - assert results['NM_000070.2:c.550dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} - + assert results['NM_173087.1:c.550dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Thr184AsnfsTer16)', 'slr': 'NP_775110.1:p.(T184Nfs*16)'} + assert results['NM_173087.1:c.550dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173087.1:c.550dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_173087.1:c.550dup']['alt_genomic_loci'], []) + assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': 'chr15', 'pos': '42680000', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': 'chr15', 'pos': '42387802', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42680002dup', 'vcf': {'chr': '15', 'pos': '42680000', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_173087.1:c.550dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42387804dup', 'vcf': {'chr': '15', 'pos': '42387802', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_173087.1:c.550dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1'} def test_variant229(self): variant = '15-42703179-T-TTCA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_173088.1:c.825_826insTCA' in list(results.keys()) - assert results['NM_173088.1:c.825_826insTCA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_173088.1:c.825_826insTCA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_173088.1:c.825_826insTCA']['alt_genomic_loci'], []) - assert results['NM_173088.1:c.825_826insTCA']['gene_symbol'] == 'CAPN3' - assert results['NM_173088.1:c.825_826insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775111.1:p.(Val275_Arg276insSer)', 'slr': 'NP_775111.1:p.(V275_R276insS)'} assert results['NM_173088.1:c.825_826insTCA']['submitted_variant'] == '15-42703179-T-TTCA' - assert results['NM_173088.1:c.825_826insTCA']['genome_context_intronic_sequence'] == '' - assert results['NM_173088.1:c.825_826insTCA']['hgvs_lrg_variant'] == '' + assert results['NM_173088.1:c.825_826insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_173088.1:c.825_826insTCA']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_173088.1:c.825_826insTCA']['hgvs_transcript_variant'] == 'NM_173088.1:c.825_826insTCA' + assert results['NM_173088.1:c.825_826insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_173088.1:c.825_826insTCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173088.1:c.825_826insTCA']['hgvs_refseqgene_variant'] == '' - assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_173088.1:c.825_826insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1'} + assert results['NM_173088.1:c.825_826insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775111.1:p.(Val275_Arg276insSer)', 'slr': 'NP_775111.1:p.(V275_R276insS)'} + assert results['NM_173088.1:c.825_826insTCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173088.1:c.825_826insTCA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_173088.1:c.825_826insTCA']['alt_genomic_loci'], []) + assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173088.1:c.825_826insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173088.1:c.825_826insTCA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1'} assert 'NM_173090.1:c.366_367insTCA' in list(results.keys()) - assert results['NM_173090.1:c.366_367insTCA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_173090.1:c.366_367insTCA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_173090.1:c.366_367insTCA']['alt_genomic_loci'], []) - assert results['NM_173090.1:c.366_367insTCA']['gene_symbol'] == 'CAPN3' - assert results['NM_173090.1:c.366_367insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775113.1:p.(Val122_Arg123insSer)', 'slr': 'NP_775113.1:p.(V122_R123insS)'} assert results['NM_173090.1:c.366_367insTCA']['submitted_variant'] == '15-42703179-T-TTCA' - assert results['NM_173090.1:c.366_367insTCA']['genome_context_intronic_sequence'] == '' - assert results['NM_173090.1:c.366_367insTCA']['hgvs_lrg_variant'] == '' + assert results['NM_173090.1:c.366_367insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_173090.1:c.366_367insTCA']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_173090.1:c.366_367insTCA']['hgvs_transcript_variant'] == 'NM_173090.1:c.366_367insTCA' + assert results['NM_173090.1:c.366_367insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_173090.1:c.366_367insTCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173090.1:c.366_367insTCA']['hgvs_refseqgene_variant'] == '' - assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_173090.1:c.366_367insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1'} - - assert 'NM_173089.1:c.366_367insTCA' in list(results.keys()) - assert results['NM_173089.1:c.366_367insTCA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_173089.1:c.366_367insTCA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_173089.1:c.366_367insTCA']['alt_genomic_loci'], []) - assert results['NM_173089.1:c.366_367insTCA']['gene_symbol'] == 'CAPN3' - assert results['NM_173089.1:c.366_367insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775112.1:p.(Val122_Arg123insSer)', 'slr': 'NP_775112.1:p.(V122_R123insS)'} - assert results['NM_173089.1:c.366_367insTCA']['submitted_variant'] == '15-42703179-T-TTCA' - assert results['NM_173089.1:c.366_367insTCA']['genome_context_intronic_sequence'] == '' - assert results['NM_173089.1:c.366_367insTCA']['hgvs_lrg_variant'] == '' - assert results['NM_173089.1:c.366_367insTCA']['hgvs_transcript_variant'] == 'NM_173089.1:c.366_367insTCA' - assert results['NM_173089.1:c.366_367insTCA']['hgvs_refseqgene_variant'] == '' - assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_173089.1:c.366_367insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1'} - - assert 'NM_173087.1:c.2085_2086insTCA' in list(results.keys()) - assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_173087.1:c.2085_2086insTCA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_173087.1:c.2085_2086insTCA']['alt_genomic_loci'], []) - assert results['NM_173087.1:c.2085_2086insTCA']['gene_symbol'] == 'CAPN3' - assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Val695_Arg696insSer)', 'slr': 'NP_775110.1:p.(V695_R696insS)'} - assert results['NM_173087.1:c.2085_2086insTCA']['submitted_variant'] == '15-42703179-T-TTCA' - assert results['NM_173087.1:c.2085_2086insTCA']['genome_context_intronic_sequence'] == '' - assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_lrg_variant'] == '' - assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_transcript_variant'] == 'NM_173087.1:c.2085_2086insTCA' - assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_refseqgene_variant'] == '' - assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_173087.1:c.2085_2086insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1'} + assert results['NM_173090.1:c.366_367insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775113.1:p.(Val122_Arg123insSer)', 'slr': 'NP_775113.1:p.(V122_R123insS)'} + assert results['NM_173090.1:c.366_367insTCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173090.1:c.366_367insTCA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_173090.1:c.366_367insTCA']['alt_genomic_loci'], []) + assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173090.1:c.366_367insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173090.1:c.366_367insTCA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1'} - assert results['flag'] == 'gene_variant' assert 'NM_000070.2:c.2361_2362insTCA' in list(results.keys()) - assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.2361_2362insTCA' - assert results['NM_000070.2:c.2361_2362insTCA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000070.2:c.2361_2362insTCA']['alt_genomic_loci'], []) - assert results['NM_000070.2:c.2361_2362insTCA']['gene_symbol'] == 'CAPN3' - assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Val787_Arg788insSer)', 'slr': 'NP_000061.1:p.(V787_R788insS)'} assert results['NM_000070.2:c.2361_2362insTCA']['submitted_variant'] == '15-42703179-T-TTCA' - assert results['NM_000070.2:c.2361_2362insTCA']['genome_context_intronic_sequence'] == '' - assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_lrg_variant'] == 'LRG_849:g.67879_67880insTCA' + assert results['NM_000070.2:c.2361_2362insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_000070.2:c.2361_2362insTCA']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_transcript_variant'] == 'NM_000070.2:c.2361_2362insTCA' + assert results['NM_000070.2:c.2361_2362insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.2361_2362insTCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_refseqgene_variant'] == 'NG_008660.1:g.67879_67880insTCA' - assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_000070.2:c.2361_2362insTCA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} + assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Val787_Arg788insSer)', 'slr': 'NP_000061.1:p.(V787_R788insS)'} + assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.2361_2362insTCA' + assert results['NM_000070.2:c.2361_2362insTCA']['hgvs_lrg_variant'] == 'LRG_849:g.67879_67880insTCA' + self.assertCountEqual(results['NM_000070.2:c.2361_2362insTCA']['alt_genomic_loci'], []) + assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_000070.2:c.2361_2362insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_000070.2:c.2361_2362insTCA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} + + assert 'NM_173089.1:c.366_367insTCA' in list(results.keys()) + assert results['NM_173089.1:c.366_367insTCA']['submitted_variant'] == '15-42703179-T-TTCA' + assert results['NM_173089.1:c.366_367insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_173089.1:c.366_367insTCA']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} + assert results['NM_173089.1:c.366_367insTCA']['hgvs_transcript_variant'] == 'NM_173089.1:c.366_367insTCA' + assert results['NM_173089.1:c.366_367insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_173089.1:c.366_367insTCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173089.1:c.366_367insTCA']['hgvs_refseqgene_variant'] == '' + assert results['NM_173089.1:c.366_367insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775112.1:p.(Val122_Arg123insSer)', 'slr': 'NP_775112.1:p.(V122_R123insS)'} + assert results['NM_173089.1:c.366_367insTCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173089.1:c.366_367insTCA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_173089.1:c.366_367insTCA']['alt_genomic_loci'], []) + assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173089.1:c.366_367insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173089.1:c.366_367insTCA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1'} assert 'NM_024344.1:c.2343_2344insTCA' in list(results.keys()) - assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_024344.1:c.2343_2344insTCA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_024344.1:c.2343_2344insTCA']['alt_genomic_loci'], []) - assert results['NM_024344.1:c.2343_2344insTCA']['gene_symbol'] == 'CAPN3' - assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Val781_Arg782insSer)', 'slr': 'NP_077320.1:p.(V781_R782insS)'} assert results['NM_024344.1:c.2343_2344insTCA']['submitted_variant'] == '15-42703179-T-TTCA' - assert results['NM_024344.1:c.2343_2344insTCA']['genome_context_intronic_sequence'] == '' - assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_lrg_variant'] == '' + assert results['NM_024344.1:c.2343_2344insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_024344.1:c.2343_2344insTCA']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_transcript_variant'] == 'NM_024344.1:c.2343_2344insTCA' + assert results['NM_024344.1:c.2343_2344insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.2343_2344insTCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_refseqgene_variant'] == '' - assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42703179', 'alt': 'TTCA'}} - assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'ref': 'T', 'pos': '42410981', 'alt': 'TTCA'}} - assert results['NM_024344.1:c.2343_2344insTCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1'} + assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Val781_Arg782insSer)', 'slr': 'NP_077320.1:p.(V781_R782insS)'} + assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024344.1:c.2343_2344insTCA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_024344.1:c.2343_2344insTCA']['alt_genomic_loci'], []) + assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_024344.1:c.2343_2344insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_024344.1:c.2343_2344insTCA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1'} + assert 'NM_173087.1:c.2085_2086insTCA' in list(results.keys()) + assert results['NM_173087.1:c.2085_2086insTCA']['submitted_variant'] == '15-42703179-T-TTCA' + assert results['NM_173087.1:c.2085_2086insTCA']['gene_symbol'] == 'CAPN3' + assert results['NM_173087.1:c.2085_2086insTCA']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} + assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_transcript_variant'] == 'NM_173087.1:c.2085_2086insTCA' + assert results['NM_173087.1:c.2085_2086insTCA']['genome_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.2085_2086insTCA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_refseqgene_variant'] == '' + assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Val695_Arg696insSer)', 'slr': 'NP_775110.1:p.(V695_R696insS)'} + assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173087.1:c.2085_2086insTCA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_173087.1:c.2085_2086insTCA']['alt_genomic_loci'], []) + assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': 'chr15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': 'chr15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703179_42703180insTCA', 'vcf': {'chr': '15', 'pos': '42703179', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173087.1:c.2085_2086insTCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410981_42410982insTCA', 'vcf': {'chr': '15', 'pos': '42410981', 'ref': 'T', 'alt': 'TTCA'}} + assert results['NM_173087.1:c.2085_2086insTCA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1'} def test_variant230(self): variant = '15-42703179-TAG-TTCATCT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_024344.1:c.2344_2345delinsTCATCT' in list(results.keys()) - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_024344.1:c.2344_2345delinsTCATCT']['alt_genomic_loci'], []) - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['gene_symbol'] == 'CAPN3' - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Arg782SerfsTer14)', 'slr': 'NP_077320.1:p.(R782Sfs*14)'} - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['genome_context_intronic_sequence'] == '' - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_lrg_variant'] == '' - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_transcript_variant'] == 'NM_024344.1:c.2344_2345delinsTCATCT' - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_024344.1:c.2344_2345delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_173088.1:c.826_827delinsTCATCT' in list(results.keys()) + assert results['NM_173088.1:c.826_827delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' + assert results['NM_173088.1:c.826_827delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_173088.1:c.826_827delinsTCATCT']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} + assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_transcript_variant'] == 'NM_173088.1:c.826_827delinsTCATCT' + assert results['NM_173088.1:c.826_827delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_173088.1:c.826_827delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_refseqgene_variant'] == '' + assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775111.1:p.(Arg276SerfsTer14)', 'slr': 'NP_775111.1:p.(R276Sfs*14)'} + assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_173088.1:c.826_827delinsTCATCT']['alt_genomic_loci'], []) + assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173088.1:c.826_827delinsTCATCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1'} assert 'NM_173090.1:c.367_368delinsTCATCT' in list(results.keys()) - assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_173090.1:c.367_368delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_173090.1:c.367_368delinsTCATCT']['alt_genomic_loci'], []) - assert results['NM_173090.1:c.367_368delinsTCATCT']['gene_symbol'] == 'CAPN3' - assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775113.1:p.(Arg123SerfsTer14)', 'slr': 'NP_775113.1:p.(R123Sfs*14)'} assert results['NM_173090.1:c.367_368delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' - assert results['NM_173090.1:c.367_368delinsTCATCT']['genome_context_intronic_sequence'] == '' - assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_lrg_variant'] == '' + assert results['NM_173090.1:c.367_368delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_173090.1:c.367_368delinsTCATCT']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_transcript_variant'] == 'NM_173090.1:c.367_368delinsTCATCT' + assert results['NM_173090.1:c.367_368delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_173090.1:c.367_368delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_173090.1:c.367_368delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1'} + assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775113.1:p.(Arg123SerfsTer14)', 'slr': 'NP_775113.1:p.(R123Sfs*14)'} + assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173090.1:c.367_368delinsTCATCT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_173090.1:c.367_368delinsTCATCT']['alt_genomic_loci'], []) + assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173090.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173090.1:c.367_368delinsTCATCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173090.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775113.1'} - assert results['flag'] == 'gene_variant' assert 'NM_000070.2:c.2362_2363delinsTCATCT' in list(results.keys()) - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.2362_2363delinsTCATCT' - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000070.2:c.2362_2363delinsTCATCT']['alt_genomic_loci'], []) - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['gene_symbol'] == 'CAPN3' - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Arg788SerfsTer14)', 'slr': 'NP_000061.1:p.(R788Sfs*14)'} assert results['NM_000070.2:c.2362_2363delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['genome_context_intronic_sequence'] == '' - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_lrg_variant'] == 'LRG_849:g.67880_67881delinsTCATCT' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_transcript_variant'] == 'NM_000070.2:c.2362_2363delinsTCATCT' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_refseqgene_variant'] == 'NG_008660.1:g.67880_67881delinsTCATCT' - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_000070.2:c.2362_2363delinsTCATCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} - - assert 'NM_173088.1:c.826_827delinsTCATCT' in list(results.keys()) - assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_173088.1:c.826_827delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_173088.1:c.826_827delinsTCATCT']['alt_genomic_loci'], []) - assert results['NM_173088.1:c.826_827delinsTCATCT']['gene_symbol'] == 'CAPN3' - assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775111.1:p.(Arg276SerfsTer14)', 'slr': 'NP_775111.1:p.(R276Sfs*14)'} - assert results['NM_173088.1:c.826_827delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' - assert results['NM_173088.1:c.826_827delinsTCATCT']['genome_context_intronic_sequence'] == '' - assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_lrg_variant'] == '' - assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_transcript_variant'] == 'NM_173088.1:c.826_827delinsTCATCT' - assert results['NM_173088.1:c.826_827delinsTCATCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_173088.1:c.826_827delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_173088.1:c.826_827delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775111.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173088.1'} + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000061.1(LRG_849p1):p.(Arg788SerfsTer14)', 'slr': 'NP_000061.1:p.(R788Sfs*14)'} + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_lrg_transcript_variant'] == 'LRG_849t1:c.2362_2363delinsTCATCT' + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['hgvs_lrg_variant'] == 'LRG_849:g.67880_67881delinsTCATCT' + self.assertCountEqual(results['NM_000070.2:c.2362_2363delinsTCATCT']['alt_genomic_loci'], []) + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_000070.2:c.2362_2363delinsTCATCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000070.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000061.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008660.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_849.xml'} assert 'NM_173089.1:c.367_368delinsTCATCT' in list(results.keys()) - assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_173089.1:c.367_368delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_173089.1:c.367_368delinsTCATCT']['alt_genomic_loci'], []) - assert results['NM_173089.1:c.367_368delinsTCATCT']['gene_symbol'] == 'CAPN3' - assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775112.1:p.(Arg123SerfsTer14)', 'slr': 'NP_775112.1:p.(R123Sfs*14)'} assert results['NM_173089.1:c.367_368delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' - assert results['NM_173089.1:c.367_368delinsTCATCT']['genome_context_intronic_sequence'] == '' - assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_lrg_variant'] == '' + assert results['NM_173089.1:c.367_368delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_173089.1:c.367_368delinsTCATCT']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_transcript_variant'] == 'NM_173089.1:c.367_368delinsTCATCT' + assert results['NM_173089.1:c.367_368delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_173089.1:c.367_368delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_173089.1:c.367_368delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1'} + assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775112.1:p.(Arg123SerfsTer14)', 'slr': 'NP_775112.1:p.(R123Sfs*14)'} + assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173089.1:c.367_368delinsTCATCT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_173089.1:c.367_368delinsTCATCT']['alt_genomic_loci'], []) + assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173089.1:c.367_368delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173089.1:c.367_368delinsTCATCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173089.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775112.1'} + + assert 'NM_024344.1:c.2344_2345delinsTCATCT' in list(results.keys()) + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_transcript_variant'] == 'NM_024344.1:c.2344_2345delinsTCATCT' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_refseqgene_variant'] == '' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077320.1:p.(Arg782SerfsTer14)', 'slr': 'NP_077320.1:p.(R782Sfs*14)'} + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_024344.1:c.2344_2345delinsTCATCT']['alt_genomic_loci'], []) + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_024344.1:c.2344_2345delinsTCATCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024344.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077320.1'} assert 'NM_173087.1:c.2086_2087delinsTCATCT' in list(results.keys()) - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_173087.1:c.2086_2087delinsTCATCT']['alt_genomic_loci'], []) - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['gene_symbol'] == 'CAPN3' - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Arg696SerfsTer14)', 'slr': 'NP_775110.1:p.(R696Sfs*14)'} assert results['NM_173087.1:c.2086_2087delinsTCATCT']['submitted_variant'] == '15-42703179-TAG-TTCATCT' - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['genome_context_intronic_sequence'] == '' - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_lrg_variant'] == '' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['gene_symbol'] == 'CAPN3' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['gene_ids'] == {'hgnc_id': 'HGNC:1480', 'entrez_gene_id': '825', 'ucsc_id': 'uc001zpp.2', 'omim_id': ['114240']} assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_transcript_variant'] == 'NM_173087.1:c.2086_2087delinsTCATCT' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['genome_context_intronic_sequence'] == '' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42703180', 'alt': 'TCATCT'}} - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'ref': 'AG', 'pos': '42410982', 'alt': 'TCATCT'}} - assert results['NM_173087.1:c.2086_2087delinsTCATCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1'} - + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_775110.1:p.(Arg696SerfsTer14)', 'slr': 'NP_775110.1:p.(R696Sfs*14)'} + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_173087.1:c.2086_2087delinsTCATCT']['alt_genomic_loci'], []) + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': 'chr15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.42703180_42703181delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42703180', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.42410982_42410983delinsTCATCT', 'vcf': {'chr': '15', 'pos': '42410982', 'ref': 'AG', 'alt': 'TCATCT'}} + assert results['NM_173087.1:c.2086_2087delinsTCATCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_173087.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_775110.1'} def test_variant231(self): variant = '15-48782203-C-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000138.4:c.2927G>A' in list(results.keys()) - assert results['NM_000138.4:c.2927G>A']['hgvs_lrg_transcript_variant'] == 'LRG_778t1:c.2927G>A' - assert results['NM_000138.4:c.2927G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000138.4:c.2927G>A']['alt_genomic_loci'], []) - assert results['NM_000138.4:c.2927G>A']['gene_symbol'] == 'FBN1' - assert results['NM_000138.4:c.2927G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000129.3(LRG_778p1):p.(Arg976His)', 'slr': 'NP_000129.3:p.(R976H)'} assert results['NM_000138.4:c.2927G>A']['submitted_variant'] == '15-48782203-C-T' - assert results['NM_000138.4:c.2927G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_000138.4:c.2927G>A']['hgvs_lrg_variant'] == 'LRG_778:g.160783G>A' + assert results['NM_000138.4:c.2927G>A']['gene_symbol'] == 'FBN1' + assert results['NM_000138.4:c.2927G>A']['gene_ids'] == {'hgnc_id': 'HGNC:3603', 'entrez_gene_id': '2200', 'ucsc_id': 'uc001zwx.3', 'omim_id': ['134797']} assert results['NM_000138.4:c.2927G>A']['hgvs_transcript_variant'] == 'NM_000138.4:c.2927G>A' + assert results['NM_000138.4:c.2927G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_000138.4:c.2927G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000138.4:c.2927G>A']['hgvs_refseqgene_variant'] == 'NG_008805.2:g.160783G>A' - assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.48782203C>T', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '48782203', 'alt': 'T'}} - assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.48490006C>T', 'vcf': {'chr': 'chr15', 'ref': 'C', 'pos': '48490006', 'alt': 'T'}} - assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.48782203C>T', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '48782203', 'alt': 'T'}} - assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.48490006C>T', 'vcf': {'chr': '15', 'ref': 'C', 'pos': '48490006', 'alt': 'T'}} - assert results['NM_000138.4:c.2927G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008805.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000129.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000138.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_778.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000138.4:c.2927G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000129.3(LRG_778p1):p.(Arg976His)', 'slr': 'NP_000129.3:p.(R976H)'} + assert results['NM_000138.4:c.2927G>A']['hgvs_lrg_transcript_variant'] == 'LRG_778t1:c.2927G>A' + assert results['NM_000138.4:c.2927G>A']['hgvs_lrg_variant'] == 'LRG_778:g.160783G>A' + self.assertCountEqual(results['NM_000138.4:c.2927G>A']['alt_genomic_loci'], []) + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.48782203C>T', 'vcf': {'chr': 'chr15', 'pos': '48782203', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.48490006C>T', 'vcf': {'chr': 'chr15', 'pos': '48490006', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.48782203C>T', 'vcf': {'chr': '15', 'pos': '48782203', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000138.4:c.2927G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.48490006C>T', 'vcf': {'chr': '15', 'pos': '48490006', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000138.4:c.2927G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000138.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000129.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008805.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_778.xml'} def test_variant232(self): variant = '15-72105929-CC-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_014249.2:c.946_949=' in list(results.keys()) - assert results['NM_014249.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.2:c.946_949=']['alt_genomic_loci'], []) - assert results['NM_014249.2:c.946_949=']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} assert results['NM_014249.2:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' - assert results['NM_014249.2:c.946_949=']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.2:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_014249.2:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.2:c.946_949=']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_014249.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.2:c.946_949=' + assert results['NM_014249.2:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.2:c.946_949=']['hgvs_refseqgene_variant'] == '' - assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} + assert results['NM_014249.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.2:c.946_949=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.2:c.946_949=']['alt_genomic_loci'], []) + assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} assert 'hg38' not in list(results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} assert 'grch38' not in list(results['NM_014249.2:c.946_949=']['primary_assembly_loci'].keys()) - assert results['NM_014249.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2'} + assert results['NM_014249.2:c.946_949=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1'} assert 'NM_016346.3:c.946_949=' in list(results.keys()) - assert results['NM_016346.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.3:c.946_949=']['alt_genomic_loci'], []) - assert results['NM_016346.3:c.946_949=']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} assert results['NM_016346.3:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' - assert results['NM_016346.3:c.946_949=']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.3:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_016346.3:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.3:c.946_949=']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_016346.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.3:c.946_949=' + assert results['NM_016346.3:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016346.3:c.946_949=']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': 'chr15', 'ref': 'GACC', 'pos': '71813587', 'alt': 'GACC'}} - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': '15', 'ref': 'GACC', 'pos': '71813587', 'alt': 'GACC'}} - assert results['NM_016346.3:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3'} + assert results['NM_016346.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} + assert results['NM_016346.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.3:c.946_949=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.3:c.946_949=']['alt_genomic_loci'], []) + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': 'chr15', 'pos': '71813587', 'ref': 'GACC', 'alt': 'GACC'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_016346.3:c.946_949=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.71813587_71813590=', 'vcf': {'chr': '15', 'pos': '71813587', 'ref': 'GACC', 'alt': 'GACC'}} + assert results['NM_016346.3:c.946_949=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} + + assert 'NM_016346.2:c.946_949=' in list(results.keys()) + assert results['NM_016346.2:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' + assert results['NM_016346.2:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_016346.2:c.946_949=']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} + assert results['NM_016346.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.2:c.946_949=' + assert results['NM_016346.2:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_016346.2:c.946_949=']['hgvs_refseqgene_variant'] == '' + assert results['NM_016346.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} + assert results['NM_016346.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_016346.2:c.946_949=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_016346.2:c.946_949=']['alt_genomic_loci'], []) + assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) + assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'pos': '72105928', 'ref': 'AC', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) + assert results['NM_016346.2:c.946_949=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1'} - assert results['flag'] == 'gene_variant' assert 'NM_014249.3:c.946_949=' in list(results.keys()) - assert results['NM_014249.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014249.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014249.3:c.946_949=']['alt_genomic_loci'], []) - assert results['NM_014249.3:c.946_949=']['gene_symbol'] == 'NR2E3' - assert results['NM_014249.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} assert results['NM_014249.3:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' - assert results['NM_014249.3:c.946_949=']['genome_context_intronic_sequence'] == '' - assert results['NM_014249.3:c.946_949=']['hgvs_lrg_variant'] == '' + assert results['NM_014249.3:c.946_949=']['gene_symbol'] == 'NR2E3' + assert results['NM_014249.3:c.946_949=']['gene_ids'] == {'hgnc_id': 'HGNC:7974', 'entrez_gene_id': '10002', 'ucsc_id': 'uc032cil.2', 'omim_id': ['604485']} assert results['NM_014249.3:c.946_949=']['hgvs_transcript_variant'] == 'NM_014249.3:c.946_949=' + assert results['NM_014249.3:c.946_949=']['genome_context_intronic_sequence'] == '' + assert results['NM_014249.3:c.946_949=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014249.3:c.946_949=']['hgvs_refseqgene_variant'] == 'NG_009113.1:g.8034_8037=' - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.3:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055064.1:p.(Asp316=)', 'slr': 'NP_055064.1:p.(D316=)'} + assert results['NM_014249.3:c.946_949=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014249.3:c.946_949=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014249.3:c.946_949=']['alt_genomic_loci'], []) + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg19'] == { + 'hgvs_genomic_description': 'NC_000015.9:g.72105933del', + 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} result_options = [ {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': 'chr15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}}, @@ -9489,7 +9737,9 @@ def test_variant232(self): 'vcf': {'alt': 'GACC', 'chr': 'chr15', 'pos': '71813587', 'ref': 'GACC'}} ] self.assertIn(results['NM_014249.3:c.946_949=']['primary_assembly_loci']['hg38'], result_options) - assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} + assert results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch37'] == { + 'hgvs_genomic_description': 'NC_000015.9:g.72105933del', + 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} result_options2 = [ {'hgvs_genomic_description': 'NC_000015.10:g.71813586_71813591=', 'vcf': {'chr': '15', 'ref': 'GGACCC', 'pos': '71813586', 'alt': 'GGACCC'}}, @@ -9497,277 +9747,271 @@ def test_variant232(self): 'vcf': {'alt': 'GACC', 'chr': '15', 'pos': '71813587', 'ref': 'GACC'}} ] self.assertIn(results['NM_014249.3:c.946_949=']['primary_assembly_loci']['grch38'], result_options2) - assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3'} - - assert 'NM_016346.2:c.946_949=' in list(results.keys()) - assert results['NM_016346.2:c.946_949=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_016346.2:c.946_949=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016346.2:c.946_949=']['alt_genomic_loci'], []) - assert results['NM_016346.2:c.946_949=']['gene_symbol'] == 'NR2E3' - assert results['NM_016346.2:c.946_949=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057430.1:p.(Asp316=)', 'slr': 'NP_057430.1:p.(D316=)'} - assert results['NM_016346.2:c.946_949=']['submitted_variant'] == '15-72105929-CC-C' - assert results['NM_016346.2:c.946_949=']['genome_context_intronic_sequence'] == '' - assert results['NM_016346.2:c.946_949=']['hgvs_lrg_variant'] == '' - assert results['NM_016346.2:c.946_949=']['hgvs_transcript_variant'] == 'NM_016346.2:c.946_949=' - assert results['NM_016346.2:c.946_949=']['hgvs_refseqgene_variant'] == '' - assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': 'chr15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'hg38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.946_949=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.72105933del', 'vcf': {'chr': '15', 'ref': 'AC', 'pos': '72105928', 'alt': 'A'}} - assert 'grch38' not in list(results['NM_016346.2:c.946_949=']['primary_assembly_loci'].keys()) - assert results['NM_016346.2:c.946_949=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057430.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016346.2'} - + assert results['NM_014249.3:c.946_949=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014249.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055064.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009113.1'} def test_variant233(self): variant = '15-89873415-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_002693.2:c.752C>T' in list(results.keys()) - assert results['NM_002693.2:c.752C>T']['hgvs_lrg_transcript_variant'] == 'LRG_765t1:c.752C>T' - assert results['NM_002693.2:c.752C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_002693.2:c.752C>T']['alt_genomic_loci'], []) - assert results['NM_002693.2:c.752C>T']['gene_symbol'] == 'POLG' - assert results['NM_002693.2:c.752C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002684.1(LRG_765p1):p.(Thr251Ile)', 'slr': 'NP_002684.1:p.(T251I)'} - assert results['NM_002693.2:c.752C>T']['submitted_variant'] == '15-89873415-G-A' - assert results['NM_002693.2:c.752C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_002693.2:c.752C>T']['hgvs_lrg_variant'] == '' - assert results['NM_002693.2:c.752C>T']['hgvs_transcript_variant'] == 'NM_002693.2:c.752C>T' - assert results['NM_002693.2:c.752C>T']['hgvs_refseqgene_variant'] == 'NG_008218.1:g.9612C>T' - assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': 'chr15', 'ref': 'G', 'pos': '89873415', 'alt': 'A'}} - assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': 'chr15', 'ref': 'G', 'pos': '89330184', 'alt': 'A'}} - assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': '15', 'ref': 'G', 'pos': '89873415', 'alt': 'A'}} - assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': '15', 'ref': 'G', 'pos': '89330184', 'alt': 'A'}} - assert results['NM_002693.2:c.752C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008218.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2'} - assert results['flag'] == 'gene_variant' assert 'NM_001126131.1:c.752C>T' in list(results.keys()) - assert results['NM_001126131.1:c.752C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001126131.1:c.752C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126131.1:c.752C>T']['alt_genomic_loci'], []) - assert results['NM_001126131.1:c.752C>T']['gene_symbol'] == 'POLG' - assert results['NM_001126131.1:c.752C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119603.1:p.(Thr251Ile)', 'slr': 'NP_001119603.1:p.(T251I)'} assert results['NM_001126131.1:c.752C>T']['submitted_variant'] == '15-89873415-G-A' - assert results['NM_001126131.1:c.752C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001126131.1:c.752C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001126131.1:c.752C>T']['gene_symbol'] == 'POLG' + assert results['NM_001126131.1:c.752C>T']['gene_ids'] == {'hgnc_id': 'HGNC:9179', 'entrez_gene_id': '5428', 'ucsc_id': 'uc002bns.5', 'omim_id': ['174763']} assert results['NM_001126131.1:c.752C>T']['hgvs_transcript_variant'] == 'NM_001126131.1:c.752C>T' + assert results['NM_001126131.1:c.752C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001126131.1:c.752C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126131.1:c.752C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': 'chr15', 'ref': 'G', 'pos': '89873415', 'alt': 'A'}} - assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': 'chr15', 'ref': 'G', 'pos': '89330184', 'alt': 'A'}} - assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': '15', 'ref': 'G', 'pos': '89873415', 'alt': 'A'}} - assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': '15', 'ref': 'G', 'pos': '89330184', 'alt': 'A'}} - assert results['NM_001126131.1:c.752C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119603.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126131.1'} + assert results['NM_001126131.1:c.752C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119603.1:p.(Thr251Ile)', 'slr': 'NP_001119603.1:p.(T251I)'} + assert results['NM_001126131.1:c.752C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001126131.1:c.752C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001126131.1:c.752C>T']['alt_genomic_loci'], []) + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': 'chr15', 'pos': '89873415', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': 'chr15', 'pos': '89330184', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': '15', 'pos': '89873415', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001126131.1:c.752C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': '15', 'pos': '89330184', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001126131.1:c.752C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126131.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119603.1'} + assert 'NM_002693.2:c.752C>T' in list(results.keys()) + assert results['NM_002693.2:c.752C>T']['submitted_variant'] == '15-89873415-G-A' + assert results['NM_002693.2:c.752C>T']['gene_symbol'] == 'POLG' + assert results['NM_002693.2:c.752C>T']['gene_ids'] == {'hgnc_id': 'HGNC:9179', 'entrez_gene_id': '5428', 'ucsc_id': 'uc002bns.5', 'omim_id': ['174763']} + assert results['NM_002693.2:c.752C>T']['hgvs_transcript_variant'] == 'NM_002693.2:c.752C>T' + assert results['NM_002693.2:c.752C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_002693.2:c.752C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_002693.2:c.752C>T']['hgvs_refseqgene_variant'] == 'NG_008218.1:g.9612C>T' + assert results['NM_002693.2:c.752C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002684.1(LRG_765p1):p.(Thr251Ile)', 'slr': 'NP_002684.1:p.(T251I)'} + assert results['NM_002693.2:c.752C>T']['hgvs_lrg_transcript_variant'] == 'LRG_765t1:c.752C>T' + assert results['NM_002693.2:c.752C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_002693.2:c.752C>T']['alt_genomic_loci'], []) + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': 'chr15', 'pos': '89873415', 'ref': 'G', 'alt': 'A'}} + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': 'chr15', 'pos': '89330184', 'ref': 'G', 'alt': 'A'}} + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89873415G>A', 'vcf': {'chr': '15', 'pos': '89873415', 'ref': 'G', 'alt': 'A'}} + assert results['NM_002693.2:c.752C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89330184G>A', 'vcf': {'chr': '15', 'pos': '89330184', 'ref': 'G', 'alt': 'A'}} + assert results['NM_002693.2:c.752C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008218.1'} def test_variant234(self): variant = '16-2103394-C-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_000548.3:c.277C>T' in list(results.keys()) - assert results['NM_000548.3:c.277C>T']['hgvs_lrg_transcript_variant'] == 'LRG_487t1:c.277C>T' - assert results['NM_000548.3:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000548.3:c.277C>T']['alt_genomic_loci'], []) - assert results['NM_000548.3:c.277C>T']['gene_symbol'] == 'TSC2' - assert results['NM_000548.3:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.(Arg93Trp)', 'slr': 'NP_000539.2:p.(R93W)'} - assert results['NM_000548.3:c.277C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_000548.3:c.277C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000548.3:c.277C>T']['hgvs_lrg_variant'] == 'LRG_487:g.9088C>T' - assert results['NM_000548.3:c.277C>T']['hgvs_transcript_variant'] == 'NM_000548.3:c.277C>T' - assert results['NM_000548.3:c.277C>T']['hgvs_refseqgene_variant'] == 'NG_005895.1:g.9088C>T' - assert results['NM_000548.3:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'hg38' not in list(results['NM_000548.3:c.277C>T']['primary_assembly_loci'].keys()) - assert results['NM_000548.3:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'grch38' not in list(results['NM_000548.3:c.277C>T']['primary_assembly_loci'].keys()) - assert results['NM_000548.3:c.277C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005895.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_487.xml'} + assert results['flag'] == 'gene_variant' + assert 'NM_000548.4:c.277C>T' in list(results.keys()) + assert results['NM_000548.4:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_000548.4:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_000548.4:c.277C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_000548.4:c.277C>T']['hgvs_transcript_variant'] == 'NM_000548.4:c.277C>T' + assert results['NM_000548.4:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000548.4:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000548.4:c.277C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_000548.4:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.(Arg93Trp)', 'slr': 'NP_000539.2:p.(R93W)'} + assert results['NM_000548.4:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000548.4:c.277C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000548.4:c.277C>T']['alt_genomic_loci'], []) + assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000548.4:c.277C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2'} assert 'NM_001318832.1:c.310C>T' in list(results.keys()) - assert results['NM_001318832.1:c.310C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001318832.1:c.310C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001318832.1:c.310C>T']['alt_genomic_loci'], []) - assert results['NM_001318832.1:c.310C>T']['gene_symbol'] == 'TSC2' - assert results['NM_001318832.1:c.310C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305761.1:p.(Arg104Trp)', 'slr': 'NP_001305761.1:p.(R104W)'} assert results['NM_001318832.1:c.310C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_001318832.1:c.310C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001318832.1:c.310C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001318832.1:c.310C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001318832.1:c.310C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} assert results['NM_001318832.1:c.310C>T']['hgvs_transcript_variant'] == 'NM_001318832.1:c.310C>T' + assert results['NM_001318832.1:c.310C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001318832.1:c.310C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001318832.1:c.310C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001318832.1:c.310C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1'} + assert results['NM_001318832.1:c.310C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305761.1:p.(Arg104Trp)', 'slr': 'NP_001305761.1:p.(R104W)'} + assert results['NM_001318832.1:c.310C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318832.1:c.310C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001318832.1:c.310C>T']['alt_genomic_loci'], []) + assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318832.1:c.310C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318832.1:c.310C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318832.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305761.1'} - assert 'NM_001318829.1:c.130C>T' in list(results.keys()) - assert results['NM_001318829.1:c.130C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001318829.1:c.130C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001318829.1:c.130C>T']['alt_genomic_loci'], []) - assert results['NM_001318829.1:c.130C>T']['gene_symbol'] == 'TSC2' - assert results['NM_001318829.1:c.130C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305758.1:p.(Arg44Trp)', 'slr': 'NP_001305758.1:p.(R44W)'} - assert results['NM_001318829.1:c.130C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_001318829.1:c.130C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001318829.1:c.130C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001318829.1:c.130C>T']['hgvs_transcript_variant'] == 'NM_001318829.1:c.130C>T' - assert results['NM_001318829.1:c.130C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001318829.1:c.130C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305758.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318829.1'} + assert 'NM_001318827.1:c.226-903C>T' in list(results.keys()) + assert results['NM_001318827.1:c.226-903C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001318827.1:c.226-903C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001318827.1:c.226-903C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_001318827.1:c.226-903C>T']['hgvs_transcript_variant'] == 'NM_001318827.1:c.226-903C>T' + assert results['NM_001318827.1:c.226-903C>T']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318827.1):c.226-903C>T' + assert results['NM_001318827.1:c.226-903C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318827.1:c.226-903C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318827.1:c.226-903C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305756.1:p.?', 'slr': 'NP_001305756.1:p.?'} + assert results['NM_001318827.1:c.226-903C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318827.1:c.226-903C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001318827.1:c.226-903C>T']['alt_genomic_loci'], []) + assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318827.1:c.226-903C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1'} - assert 'NM_001077183.2:c.277C>T' in list(results.keys()) - assert results['NM_001077183.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001077183.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001077183.2:c.277C>T']['alt_genomic_loci'], []) - assert results['NM_001077183.2:c.277C>T']['gene_symbol'] == 'TSC2' - assert results['NM_001077183.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.(Arg93Trp)', 'slr': 'NP_001070651.1:p.(R93W)'} - assert results['NM_001077183.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_001077183.2:c.277C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001077183.2:c.277C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001077183.2:c.277C>T']['hgvs_transcript_variant'] == 'NM_001077183.2:c.277C>T' - assert results['NM_001077183.2:c.277C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001077183.2:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2'} + assert 'NM_001114382.2:c.277C>T' in list(results.keys()) + assert results['NM_001114382.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001114382.2:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001114382.2:c.277C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_001114382.2:c.277C>T']['hgvs_transcript_variant'] == 'NM_001114382.2:c.277C>T' + assert results['NM_001114382.2:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001114382.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001114382.2:c.277C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001114382.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.(Arg93Trp)', 'slr': 'NP_001107854.1:p.(R93W)'} + assert results['NM_001114382.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001114382.2:c.277C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001114382.2:c.277C>T']['alt_genomic_loci'], []) + assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001114382.2:c.277C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1'} assert 'NM_001114382.1:c.277C>T' in list(results.keys()) - assert results['NM_001114382.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001114382.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001114382.1:c.277C>T']['alt_genomic_loci'], []) - assert results['NM_001114382.1:c.277C>T']['gene_symbol'] == 'TSC2' - assert results['NM_001114382.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.(Arg93Trp)', 'slr': 'NP_001107854.1:p.(R93W)'} assert results['NM_001114382.1:c.277C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_001114382.1:c.277C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001114382.1:c.277C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001114382.1:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001114382.1:c.277C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} assert results['NM_001114382.1:c.277C>T']['hgvs_transcript_variant'] == 'NM_001114382.1:c.277C>T' + assert results['NM_001114382.1:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001114382.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001114382.1:c.277C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001114382.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001114382.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.(Arg93Trp)', 'slr': 'NP_001107854.1:p.(R93W)'} + assert results['NM_001114382.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001114382.1:c.277C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001114382.1:c.277C>T']['alt_genomic_loci'], []) + assert results['NM_001114382.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} assert 'hg38' not in list(results['NM_001114382.1:c.277C>T']['primary_assembly_loci'].keys()) - assert results['NM_001114382.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001114382.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} assert 'grch38' not in list(results['NM_001114382.1:c.277C>T']['primary_assembly_loci'].keys()) - assert results['NM_001114382.1:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1'} + assert results['NM_001114382.1:c.277C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1'} assert 'NM_001077183.1:c.277C>T' in list(results.keys()) - assert results['NM_001077183.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001077183.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001077183.1:c.277C>T']['alt_genomic_loci'], []) - assert results['NM_001077183.1:c.277C>T']['gene_symbol'] == 'TSC2' - assert results['NM_001077183.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.(Arg93Trp)', 'slr': 'NP_001070651.1:p.(R93W)'} assert results['NM_001077183.1:c.277C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_001077183.1:c.277C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001077183.1:c.277C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001077183.1:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001077183.1:c.277C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} assert results['NM_001077183.1:c.277C>T']['hgvs_transcript_variant'] == 'NM_001077183.1:c.277C>T' + assert results['NM_001077183.1:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001077183.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001077183.1:c.277C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001077183.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001077183.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.(Arg93Trp)', 'slr': 'NP_001070651.1:p.(R93W)'} + assert results['NM_001077183.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077183.1:c.277C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001077183.1:c.277C>T']['alt_genomic_loci'], []) + assert results['NM_001077183.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} assert 'hg38' not in list(results['NM_001077183.1:c.277C>T']['primary_assembly_loci'].keys()) - assert results['NM_001077183.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001077183.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} assert 'grch38' not in list(results['NM_001077183.1:c.277C>T']['primary_assembly_loci'].keys()) - assert results['NM_001077183.1:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1'} + assert results['NM_001077183.1:c.277C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1'} - assert 'NM_001318827.1:c.226-903C>T' in list(results.keys()) - assert results['NM_001318827.1:c.226-903C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001318827.1:c.226-903C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001318827.1:c.226-903C>T']['alt_genomic_loci'], []) - assert results['NM_001318827.1:c.226-903C>T']['gene_symbol'] == 'TSC2' - assert results['NM_001318827.1:c.226-903C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305756.1:p.?', 'slr': 'NP_001305756.1:p.?'} - assert results['NM_001318827.1:c.226-903C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_001318827.1:c.226-903C>T']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318827.1):c.226-903C>T' - assert results['NM_001318827.1:c.226-903C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001318827.1:c.226-903C>T']['hgvs_transcript_variant'] == 'NM_001318827.1:c.226-903C>T' - assert results['NM_001318827.1:c.226-903C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001318827.1:c.226-903C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001318827.1:c.226-903C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305756.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318827.1'} + assert 'NM_021055.2:c.277C>T' in list(results.keys()) + assert results['NM_021055.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_021055.2:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_021055.2:c.277C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_021055.2:c.277C>T']['hgvs_transcript_variant'] == 'NM_021055.2:c.277C>T' + assert results['NM_021055.2:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_021055.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_021055.2:c.277C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_021055.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066399.2:p.(Arg93Trp)', 'slr': 'NP_066399.2:p.(R93W)'} + assert results['NM_021055.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021055.2:c.277C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021055.2:c.277C>T']['alt_genomic_loci'], []) + assert results['NM_021055.2:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_021055.2:c.277C>T']['primary_assembly_loci'].keys()) + assert results['NM_021055.2:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_021055.2:c.277C>T']['primary_assembly_loci'].keys()) + assert results['NM_021055.2:c.277C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2'} - assert results['flag'] == 'gene_variant' - assert 'NM_001114382.2:c.277C>T' in list(results.keys()) - assert results['NM_001114382.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001114382.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001114382.2:c.277C>T']['alt_genomic_loci'], []) - assert results['NM_001114382.2:c.277C>T']['gene_symbol'] == 'TSC2' - assert results['NM_001114382.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001107854.1:p.(Arg93Trp)', 'slr': 'NP_001107854.1:p.(R93W)'} - assert results['NM_001114382.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_001114382.2:c.277C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001114382.2:c.277C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001114382.2:c.277C>T']['hgvs_transcript_variant'] == 'NM_001114382.2:c.277C>T' - assert results['NM_001114382.2:c.277C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001114382.2:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001114382.2:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001107854.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001114382.2'} + assert 'NM_001077183.2:c.277C>T' in list(results.keys()) + assert results['NM_001077183.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001077183.2:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001077183.2:c.277C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_001077183.2:c.277C>T']['hgvs_transcript_variant'] == 'NM_001077183.2:c.277C>T' + assert results['NM_001077183.2:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001077183.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001077183.2:c.277C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001077183.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001070651.1:p.(Arg93Trp)', 'slr': 'NP_001070651.1:p.(R93W)'} + assert results['NM_001077183.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001077183.2:c.277C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001077183.2:c.277C>T']['alt_genomic_loci'], []) + assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001077183.2:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001077183.2:c.277C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001077183.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001070651.1'} + + assert 'NM_001318829.1:c.130C>T' in list(results.keys()) + assert results['NM_001318829.1:c.130C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_001318829.1:c.130C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001318829.1:c.130C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_001318829.1:c.130C>T']['hgvs_transcript_variant'] == 'NM_001318829.1:c.130C>T' + assert results['NM_001318829.1:c.130C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001318829.1:c.130C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001318829.1:c.130C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001318829.1:c.130C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305758.1:p.(Arg44Trp)', 'slr': 'NP_001305758.1:p.(R44W)'} + assert results['NM_001318829.1:c.130C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318829.1:c.130C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001318829.1:c.130C>T']['alt_genomic_loci'], []) + assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318829.1:c.130C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318829.1:c.130C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318829.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305758.1'} assert 'NM_001363528.1:c.277C>T' in list(results.keys()) - assert results['NM_001363528.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363528.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363528.1:c.277C>T']['alt_genomic_loci'], []) - assert results['NM_001363528.1:c.277C>T']['gene_symbol'] == 'TSC2' - assert results['NM_001363528.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350457.1:p.(Arg93Trp)', 'slr': 'NP_001350457.1:p.(R93W)'} assert results['NM_001363528.1:c.277C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_001363528.1:c.277C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001363528.1:c.277C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001363528.1:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001363528.1:c.277C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} assert results['NM_001363528.1:c.277C>T']['hgvs_transcript_variant'] == 'NM_001363528.1:c.277C>T' + assert results['NM_001363528.1:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001363528.1:c.277C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363528.1:c.277C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363528.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001363528.1:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350457.1:p.(Arg93Trp)', 'slr': 'NP_001350457.1:p.(R93W)'} + assert results['NM_001363528.1:c.277C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363528.1:c.277C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363528.1:c.277C>T']['alt_genomic_loci'], []) + assert results['NM_001363528.1:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} assert 'hg38' not in list(results['NM_001363528.1:c.277C>T']['primary_assembly_loci'].keys()) - assert results['NM_001363528.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} + assert results['NM_001363528.1:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} assert 'grch38' not in list(results['NM_001363528.1:c.277C>T']['primary_assembly_loci'].keys()) - assert results['NM_001363528.1:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1'} - - assert 'NM_021055.2:c.277C>T' in list(results.keys()) - assert results['NM_021055.2:c.277C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021055.2:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021055.2:c.277C>T']['alt_genomic_loci'], []) - assert results['NM_021055.2:c.277C>T']['gene_symbol'] == 'TSC2' - assert results['NM_021055.2:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066399.2:p.(Arg93Trp)', 'slr': 'NP_066399.2:p.(R93W)'} - assert results['NM_021055.2:c.277C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_021055.2:c.277C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_021055.2:c.277C>T']['hgvs_lrg_variant'] == '' - assert results['NM_021055.2:c.277C>T']['hgvs_transcript_variant'] == 'NM_021055.2:c.277C>T' - assert results['NM_021055.2:c.277C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_021055.2:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'hg38' not in list(results['NM_021055.2:c.277C>T']['primary_assembly_loci'].keys()) - assert results['NM_021055.2:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert 'grch38' not in list(results['NM_021055.2:c.277C>T']['primary_assembly_loci'].keys()) - assert results['NM_021055.2:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066399.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021055.2'} - - assert 'NM_000548.4:c.277C>T' in list(results.keys()) - assert results['NM_000548.4:c.277C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000548.4:c.277C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000548.4:c.277C>T']['alt_genomic_loci'], []) - assert results['NM_000548.4:c.277C>T']['gene_symbol'] == 'TSC2' - assert results['NM_000548.4:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.(Arg93Trp)', 'slr': 'NP_000539.2:p.(R93W)'} - assert results['NM_000548.4:c.277C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_000548.4:c.277C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000548.4:c.277C>T']['hgvs_lrg_variant'] == '' - assert results['NM_000548.4:c.277C>T']['hgvs_transcript_variant'] == 'NM_000548.4:c.277C>T' - assert results['NM_000548.4:c.277C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_000548.4:c.277C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_000548.4:c.277C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.4'} + assert results['NM_001363528.1:c.277C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363528.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350457.1'} assert 'NM_001318831.1:c.-1-2803C>T' in list(results.keys()) - assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001318831.1:c.-1-2803C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001318831.1:c.-1-2803C>T']['alt_genomic_loci'], []) - assert results['NM_001318831.1:c.-1-2803C>T']['gene_symbol'] == 'TSC2' - assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305760.1:p.?', 'slr': 'NP_001305760.1:p.?'} assert results['NM_001318831.1:c.-1-2803C>T']['submitted_variant'] == '16-2103394-C-T' - assert results['NM_001318831.1:c.-1-2803C>T']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318831.1):c.-1-2803C>T' - assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001318831.1:c.-1-2803C>T']['gene_symbol'] == 'TSC2' + assert results['NM_001318831.1:c.-1-2803C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_transcript_variant'] == 'NM_001318831.1:c.-1-2803C>T' + assert results['NM_001318831.1:c.-1-2803C>T']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001318831.1):c.-1-2803C>T' + assert results['NM_001318831.1:c.-1-2803C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2103394', 'alt': 'T'}} - assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '2053393', 'alt': 'T'}} - assert results['NM_001318831.1:c.-1-2803C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305760.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318831.1'} + assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001305760.1:p.?', 'slr': 'NP_001305760.1:p.?'} + assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001318831.1:c.-1-2803C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001318831.1:c.-1-2803C>T']['alt_genomic_loci'], []) + assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': 'chr16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318831.1:c.-1-2803C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.2053393C>T', 'vcf': {'chr': '16', 'pos': '2053393', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001318831.1:c.-1-2803C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001318831.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001305760.1'} + assert 'NM_000548.3:c.277C>T' in list(results.keys()) + assert results['NM_000548.3:c.277C>T']['submitted_variant'] == '16-2103394-C-T' + assert results['NM_000548.3:c.277C>T']['gene_symbol'] == 'TSC2' + assert results['NM_000548.3:c.277C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12363', 'entrez_gene_id': '7249', 'ucsc_id': 'uc002con.4', 'omim_id': ['191092']} + assert results['NM_000548.3:c.277C>T']['hgvs_transcript_variant'] == 'NM_000548.3:c.277C>T' + assert results['NM_000548.3:c.277C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000548.3:c.277C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000548.3:c.277C>T']['hgvs_refseqgene_variant'] == 'NG_005895.1:g.9088C>T' + assert results['NM_000548.3:c.277C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000539.2(LRG_487p1):p.(Arg93Trp)', 'slr': 'NP_000539.2:p.(R93W)'} + assert results['NM_000548.3:c.277C>T']['hgvs_lrg_transcript_variant'] == 'LRG_487t1:c.277C>T' + assert results['NM_000548.3:c.277C>T']['hgvs_lrg_variant'] == 'LRG_487:g.9088C>T' + self.assertCountEqual(results['NM_000548.3:c.277C>T']['alt_genomic_loci'], []) + assert results['NM_000548.3:c.277C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': 'chr16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_000548.3:c.277C>T']['primary_assembly_loci'].keys()) + assert results['NM_000548.3:c.277C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.2103394C>T', 'vcf': {'chr': '16', 'pos': '2103394', 'ref': 'C', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_000548.3:c.277C>T']['primary_assembly_loci'].keys()) + assert results['NM_000548.3:c.277C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000548.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000539.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005895.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_487.xml'} def test_variant235(self): variant = '16-3779300-C-G' @@ -9775,2336 +10019,2441 @@ def test_variant235(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001079846.1:c.5634G>C' in list(results.keys()) - assert results['NM_001079846.1:c.5634G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001079846.1:c.5634G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001079846.1:c.5634G>C']['alt_genomic_loci'], []) - assert results['NM_001079846.1:c.5634G>C']['gene_symbol'] == 'CREBBP' - assert results['NM_001079846.1:c.5634G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073315.1:p.(Met1878Ile)', 'slr': 'NP_001073315.1:p.(M1878I)'} - assert results['NM_001079846.1:c.5634G>C']['submitted_variant'] == '16-3779300-C-G' - assert results['NM_001079846.1:c.5634G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_001079846.1:c.5634G>C']['hgvs_lrg_variant'] == '' - assert results['NM_001079846.1:c.5634G>C']['hgvs_transcript_variant'] == 'NM_001079846.1:c.5634G>C' - assert results['NM_001079846.1:c.5634G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '3779300', 'alt': 'G'}} - assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '3729299', 'alt': 'G'}} - assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '3779300', 'alt': 'G'}} - assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '3729299', 'alt': 'G'}} - assert results['NM_001079846.1:c.5634G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073315.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001079846.1'} - assert 'NM_004380.2:c.5748G>C' in list(results.keys()) - assert results['NM_004380.2:c.5748G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_004380.2:c.5748G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004380.2:c.5748G>C']['alt_genomic_loci'], []) - assert results['NM_004380.2:c.5748G>C']['gene_symbol'] == 'CREBBP' - assert results['NM_004380.2:c.5748G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004371.2:p.(Met1916Ile)', 'slr': 'NP_004371.2:p.(M1916I)'} assert results['NM_004380.2:c.5748G>C']['submitted_variant'] == '16-3779300-C-G' - assert results['NM_004380.2:c.5748G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_004380.2:c.5748G>C']['hgvs_lrg_variant'] == '' + assert results['NM_004380.2:c.5748G>C']['gene_symbol'] == 'CREBBP' + assert results['NM_004380.2:c.5748G>C']['gene_ids'] == {'hgnc_id': 'HGNC:2348', 'entrez_gene_id': '1387', 'ucsc_id': 'uc002cvv.4', 'omim_id': ['600140']} assert results['NM_004380.2:c.5748G>C']['hgvs_transcript_variant'] == 'NM_004380.2:c.5748G>C' + assert results['NM_004380.2:c.5748G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_004380.2:c.5748G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004380.2:c.5748G>C']['hgvs_refseqgene_variant'] == 'NG_009873.1:g.155822G>C' - assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '3779300', 'alt': 'G'}} - assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '3729299', 'alt': 'G'}} - assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '3779300', 'alt': 'G'}} - assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '3729299', 'alt': 'G'}} - assert results['NM_004380.2:c.5748G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009873.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004371.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004380.2'} + assert results['NM_004380.2:c.5748G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004371.2:p.(Met1916Ile)', 'slr': 'NP_004371.2:p.(M1916I)'} + assert results['NM_004380.2:c.5748G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004380.2:c.5748G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004380.2:c.5748G>C']['alt_genomic_loci'], []) + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': 'chr16', 'pos': '3779300', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': 'chr16', 'pos': '3729299', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': '16', 'pos': '3779300', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004380.2:c.5748G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': '16', 'pos': '3729299', 'ref': 'C', 'alt': 'G'}} + assert results['NM_004380.2:c.5748G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004380.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004371.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009873.1'} + assert 'NM_001079846.1:c.5634G>C' in list(results.keys()) + assert results['NM_001079846.1:c.5634G>C']['submitted_variant'] == '16-3779300-C-G' + assert results['NM_001079846.1:c.5634G>C']['gene_symbol'] == 'CREBBP' + assert results['NM_001079846.1:c.5634G>C']['gene_ids'] == {'hgnc_id': 'HGNC:2348', 'entrez_gene_id': '1387', 'ucsc_id': 'uc002cvv.4', 'omim_id': ['600140']} + assert results['NM_001079846.1:c.5634G>C']['hgvs_transcript_variant'] == 'NM_001079846.1:c.5634G>C' + assert results['NM_001079846.1:c.5634G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001079846.1:c.5634G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001079846.1:c.5634G>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001079846.1:c.5634G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001073315.1:p.(Met1878Ile)', 'slr': 'NP_001073315.1:p.(M1878I)'} + assert results['NM_001079846.1:c.5634G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001079846.1:c.5634G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001079846.1:c.5634G>C']['alt_genomic_loci'], []) + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': 'chr16', 'pos': '3779300', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': 'chr16', 'pos': '3729299', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.3779300C>G', 'vcf': {'chr': '16', 'pos': '3779300', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001079846.1:c.5634G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.3729299C>G', 'vcf': {'chr': '16', 'pos': '3729299', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001079846.1:c.5634G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001079846.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001073315.1'} def test_variant236(self): variant = '16-5128843-C-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_001330504.1:c.493C>G' in list(results.keys()) - assert results['NM_001330504.1:c.493C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330504.1:c.493C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330504.1:c.493C>G']['alt_genomic_loci'], []) - assert results['NM_001330504.1:c.493C>G']['gene_symbol'] == 'ALG1' - assert results['NM_001330504.1:c.493C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317433.1:p.(Arg165Gly)', 'slr': 'NP_001317433.1:p.(R165G)'} assert results['NM_001330504.1:c.493C>G']['submitted_variant'] == '16-5128843-C-G' - assert results['NM_001330504.1:c.493C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330504.1:c.493C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330504.1:c.493C>G']['gene_symbol'] == 'ALG1' + assert results['NM_001330504.1:c.493C>G']['gene_ids'] == {'hgnc_id': 'HGNC:18294', 'entrez_gene_id': '56052', 'ucsc_id': 'uc002cym.4', 'omim_id': ['605907']} assert results['NM_001330504.1:c.493C>G']['hgvs_transcript_variant'] == 'NM_001330504.1:c.493C>G' + assert results['NM_001330504.1:c.493C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330504.1:c.493C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330504.1:c.493C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '5128843', 'alt': 'G'}} - assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '5078842', 'alt': 'G'}} - assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '5128843', 'alt': 'G'}} - assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '5078842', 'alt': 'G'}} - assert results['NM_001330504.1:c.493C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317433.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330504.1'} + assert results['NM_001330504.1:c.493C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317433.1:p.(Arg165Gly)', 'slr': 'NP_001317433.1:p.(R165G)'} + assert results['NM_001330504.1:c.493C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330504.1:c.493C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330504.1:c.493C>G']['alt_genomic_loci'], []) + assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': 'chr16', 'pos': '5128843', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': 'chr16', 'pos': '5078842', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': '16', 'pos': '5128843', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001330504.1:c.493C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': '16', 'pos': '5078842', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001330504.1:c.493C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330504.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317433.1'} - assert results['flag'] == 'gene_variant' assert 'NM_019109.4:c.826C>G' in list(results.keys()) - assert results['NM_019109.4:c.826C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_019109.4:c.826C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_019109.4:c.826C>G']['alt_genomic_loci'], []) - assert results['NM_019109.4:c.826C>G']['gene_symbol'] == 'ALG1' - assert results['NM_019109.4:c.826C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061982.3:p.(Arg276Gly)', 'slr': 'NP_061982.3:p.(R276G)'} assert results['NM_019109.4:c.826C>G']['submitted_variant'] == '16-5128843-C-G' - assert results['NM_019109.4:c.826C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_019109.4:c.826C>G']['hgvs_lrg_variant'] == '' + assert results['NM_019109.4:c.826C>G']['gene_symbol'] == 'ALG1' + assert results['NM_019109.4:c.826C>G']['gene_ids'] == {'hgnc_id': 'HGNC:18294', 'entrez_gene_id': '56052', 'ucsc_id': 'uc002cym.4', 'omim_id': ['605907']} assert results['NM_019109.4:c.826C>G']['hgvs_transcript_variant'] == 'NM_019109.4:c.826C>G' + assert results['NM_019109.4:c.826C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_019109.4:c.826C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_019109.4:c.826C>G']['hgvs_refseqgene_variant'] == 'NG_009202.1:g.12034C>G' - assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '5128843', 'alt': 'G'}} - assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '5078842', 'alt': 'G'}} - assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '5128843', 'alt': 'G'}} - assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '5078842', 'alt': 'G'}} - assert results['NM_019109.4:c.826C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009202.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061982.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_019109.4'} - + assert results['NM_019109.4:c.826C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_061982.3:p.(Arg276Gly)', 'slr': 'NP_061982.3:p.(R276G)'} + assert results['NM_019109.4:c.826C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_019109.4:c.826C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_019109.4:c.826C>G']['alt_genomic_loci'], []) + assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': 'chr16', 'pos': '5128843', 'ref': 'C', 'alt': 'G'}} + assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': 'chr16', 'pos': '5078842', 'ref': 'C', 'alt': 'G'}} + assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.5128843C>G', 'vcf': {'chr': '16', 'pos': '5128843', 'ref': 'C', 'alt': 'G'}} + assert results['NM_019109.4:c.826C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.5078842C>G', 'vcf': {'chr': '16', 'pos': '5078842', 'ref': 'C', 'alt': 'G'}} + assert results['NM_019109.4:c.826C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_019109.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_061982.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009202.1'} def test_variant237(self): variant = '16-74808559-C-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_024306.4:c.95G>A' in list(results.keys()) - assert results['NM_024306.4:c.95G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_024306.4:c.95G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_024306.4:c.95G>A']['alt_genomic_loci'], []) - assert results['NM_024306.4:c.95G>A']['gene_symbol'] == 'FA2H' - assert results['NM_024306.4:c.95G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077282.3:p.(Arg32His)', 'slr': 'NP_077282.3:p.(R32H)'} assert results['NM_024306.4:c.95G>A']['submitted_variant'] == '16-74808559-C-T' - assert results['NM_024306.4:c.95G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_024306.4:c.95G>A']['hgvs_lrg_variant'] == '' + assert results['NM_024306.4:c.95G>A']['gene_symbol'] == 'FA2H' + assert results['NM_024306.4:c.95G>A']['gene_ids'] == {'hgnc_id': 'HGNC:21197', 'entrez_gene_id': '79152', 'ucsc_id': 'uc002fde.3', 'omim_id': ['611026']} assert results['NM_024306.4:c.95G>A']['hgvs_transcript_variant'] == 'NM_024306.4:c.95G>A' + assert results['NM_024306.4:c.95G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_024306.4:c.95G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024306.4:c.95G>A']['hgvs_refseqgene_variant'] == 'NG_017070.1:g.5171G>A' - assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.74808559C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '74808559', 'alt': 'T'}} - assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.74774661C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '74774661', 'alt': 'T'}} - assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.74808559C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '74808559', 'alt': 'T'}} - assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.74774661C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '74774661', 'alt': 'T'}} - assert results['NM_024306.4:c.95G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017070.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077282.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024306.4'} - - assert results['flag'] == 'gene_variant' + assert results['NM_024306.4:c.95G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_077282.3:p.(Arg32His)', 'slr': 'NP_077282.3:p.(R32H)'} + assert results['NM_024306.4:c.95G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_024306.4:c.95G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_024306.4:c.95G>A']['alt_genomic_loci'], []) + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.74808559C>T', 'vcf': {'chr': 'chr16', 'pos': '74808559', 'ref': 'C', 'alt': 'T'}} + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.74774661C>T', 'vcf': {'chr': 'chr16', 'pos': '74774661', 'ref': 'C', 'alt': 'T'}} + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.74808559C>T', 'vcf': {'chr': '16', 'pos': '74808559', 'ref': 'C', 'alt': 'T'}} + assert results['NM_024306.4:c.95G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.74774661C>T', 'vcf': {'chr': '16', 'pos': '74774661', 'ref': 'C', 'alt': 'T'}} + assert results['NM_024306.4:c.95G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024306.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_077282.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017070.1'} def test_variant238(self): variant = '16-89574804-C-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_003119.3:c.-22C>A' in list(results.keys()) - assert results['NM_003119.3:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.-22C>A']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.-22C>A']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.3:c.-22C>A']['submitted_variant'] == '16-89574804-C-A' - assert results['NM_003119.3:c.-22C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.-22C>A']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.-22C>A']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.-22C>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.3:c.-22C>A']['hgvs_transcript_variant'] == 'NM_003119.3:c.-22C>A' + assert results['NM_003119.3:c.-22C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.-22C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} - assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508396', 'alt': 'A'}} - assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} - assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508396', 'alt': 'A'}} - assert results['NM_003119.3:c.-22C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - - assert results['flag'] == 'gene_variant' - assert 'NM_199367.2:c.-22C>A' in list(results.keys()) - assert results['NM_199367.2:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.-22C>A']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.-22C>A']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} - assert results['NM_199367.2:c.-22C>A']['submitted_variant'] == '16-89574804-C-A' - assert results['NM_199367.2:c.-22C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.-22C>A']['hgvs_lrg_variant'] == '' - assert results['NM_199367.2:c.-22C>A']['hgvs_transcript_variant'] == 'NM_199367.2:c.-22C>A' - assert results['NM_199367.2:c.-22C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} - assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508396', 'alt': 'A'}} - assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} - assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508396', 'alt': 'A'}} - assert results['NM_199367.2:c.-22C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + assert results['NM_003119.3:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.-22C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.-22C>A']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': 'chr16', 'pos': '89574804', 'ref': 'C', 'alt': 'A'}} + assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': 'chr16', 'pos': '89508396', 'ref': 'C', 'alt': 'A'}} + assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': '16', 'pos': '89574804', 'ref': 'C', 'alt': 'A'}} + assert results['NM_003119.3:c.-22C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': '16', 'pos': '89508396', 'ref': 'C', 'alt': 'A'}} + assert results['NM_003119.3:c.-22C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} assert 'NM_001363850.1:c.-22C>A' in list(results.keys()) - assert results['NM_001363850.1:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.-22C>A']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.-22C>A']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} assert results['NM_001363850.1:c.-22C>A']['submitted_variant'] == '16-89574804-C-A' - assert results['NM_001363850.1:c.-22C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.-22C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.-22C>A']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.-22C>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.-22C>A']['hgvs_transcript_variant'] == 'NM_001363850.1:c.-22C>A' + assert results['NM_001363850.1:c.-22C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.-22C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.-22C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} + assert results['NM_001363850.1:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.-22C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.-22C>A']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.-22C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': 'chr16', 'pos': '89574804', 'ref': 'C', 'alt': 'A'}} assert 'hg38' not in list(results['NM_001363850.1:c.-22C>A']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.-22C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89574804', 'alt': 'A'}} + assert results['NM_001363850.1:c.-22C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': '16', 'pos': '89574804', 'ref': 'C', 'alt': 'A'}} assert 'grch38' not in list(results['NM_001363850.1:c.-22C>A']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.-22C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + assert results['NM_001363850.1:c.-22C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} + assert 'NM_199367.2:c.-22C>A' in list(results.keys()) + assert results['NM_199367.2:c.-22C>A']['submitted_variant'] == '16-89574804-C-A' + assert results['NM_199367.2:c.-22C>A']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.-22C>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.2:c.-22C>A']['hgvs_transcript_variant'] == 'NM_199367.2:c.-22C>A' + assert results['NM_199367.2:c.-22C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.-22C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.-22C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.-22C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.2:c.-22C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.-22C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.-22C>A']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': 'chr16', 'pos': '89574804', 'ref': 'C', 'alt': 'A'}} + assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': 'chr16', 'pos': '89508396', 'ref': 'C', 'alt': 'A'}} + assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574804C>A', 'vcf': {'chr': '16', 'pos': '89574804', 'ref': 'C', 'alt': 'A'}} + assert results['NM_199367.2:c.-22C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508396C>A', 'vcf': {'chr': '16', 'pos': '89508396', 'ref': 'C', 'alt': 'A'}} + assert results['NM_199367.2:c.-22C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} def test_variant239(self): variant = '16-89574826-A-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_003119.2:c.1A>C' in list(results.keys()) - assert results['NM_003119.2:c.1A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.1A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.2:c.1A>C']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.1A>C']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Met1?)', 'slr': 'NP_003110.1:p.(M1?)'} - assert results['NM_003119.2:c.1A>C']['submitted_variant'] == '16-89574826-A-C' - assert results['NM_003119.2:c.1A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.1A>C']['hgvs_lrg_variant'] == '' - assert results['NM_003119.2:c.1A>C']['hgvs_transcript_variant'] == 'NM_003119.2:c.1A>C' - assert results['NM_003119.2:c.1A>C']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5022A>C' - assert results['NM_003119.2:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert 'hg38' not in list(results['NM_003119.2:c.1A>C']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert 'grch38' not in list(results['NM_003119.2:c.1A>C']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.1A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + assert results['flag'] == 'gene_variant' + assert 'NM_003119.3:c.1A>C' in list(results.keys()) + assert results['NM_003119.3:c.1A>C']['submitted_variant'] == '16-89574826-A-C' + assert results['NM_003119.3:c.1A>C']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.1A>C']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.3:c.1A>C']['hgvs_transcript_variant'] == 'NM_003119.3:c.1A>C' + assert results['NM_003119.3:c.1A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Met1?)', 'slr': 'NP_003110.1:p.(M1?)'} + assert results['NM_003119.3:c.1A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.1A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.1A>C']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'pos': '89574826', 'ref': 'A', 'alt': 'C'}} + assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': 'chr16', 'pos': '89508418', 'ref': 'A', 'alt': 'C'}} + assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'pos': '89574826', 'ref': 'A', 'alt': 'C'}} + assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': '16', 'pos': '89508418', 'ref': 'A', 'alt': 'C'}} + assert results['NM_003119.3:c.1A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} + + assert 'NM_001363850.1:c.1A>C' in list(results.keys()) + assert results['NM_001363850.1:c.1A>C']['submitted_variant'] == '16-89574826-A-C' + assert results['NM_001363850.1:c.1A>C']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.1A>C']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_001363850.1:c.1A>C']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1A>C' + assert results['NM_001363850.1:c.1A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Met1?)', 'slr': 'NP_001350779.1:p.(M1?)'} + assert results['NM_001363850.1:c.1A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.1A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.1A>C']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'pos': '89574826', 'ref': 'A', 'alt': 'C'}} + assert 'hg38' not in list(results['NM_001363850.1:c.1A>C']['primary_assembly_loci'].keys()) + assert results['NM_001363850.1:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'pos': '89574826', 'ref': 'A', 'alt': 'C'}} + assert 'grch38' not in list(results['NM_001363850.1:c.1A>C']['primary_assembly_loci'].keys()) + assert results['NM_001363850.1:c.1A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} assert 'NM_199367.1:c.1A>C' in list(results.keys()) - assert results['NM_199367.1:c.1A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.1:c.1A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.1:c.1A>C']['alt_genomic_loci'], []) - assert results['NM_199367.1:c.1A>C']['gene_symbol'] == 'SPG7' - assert results['NM_199367.1:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Met1?)', 'slr': 'NP_955399.1:p.(M1?)'} assert results['NM_199367.1:c.1A>C']['submitted_variant'] == '16-89574826-A-C' - assert results['NM_199367.1:c.1A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.1A>C']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.1A>C']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.1A>C']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_199367.1:c.1A>C']['hgvs_transcript_variant'] == 'NM_199367.1:c.1A>C' + assert results['NM_199367.1:c.1A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.1A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.1A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.1:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert results['NM_199367.1:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Met1?)', 'slr': 'NP_955399.1:p.(M1?)'} + assert results['NM_199367.1:c.1A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.1A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.1:c.1A>C']['alt_genomic_loci'], []) + assert results['NM_199367.1:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'pos': '89574826', 'ref': 'A', 'alt': 'C'}} assert 'hg38' not in list(results['NM_199367.1:c.1A>C']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} + assert results['NM_199367.1:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'pos': '89574826', 'ref': 'A', 'alt': 'C'}} assert 'grch38' not in list(results['NM_199367.1:c.1A>C']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.1A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + assert results['NM_199367.1:c.1A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} - assert 'NM_001363850.1:c.1A>C' in list(results.keys()) - assert results['NM_001363850.1:c.1A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.1A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.1A>C']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.1A>C']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Met1?)', 'slr': 'NP_001350779.1:p.(M1?)'} - assert results['NM_001363850.1:c.1A>C']['submitted_variant'] == '16-89574826-A-C' - assert results['NM_001363850.1:c.1A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.1A>C']['hgvs_lrg_variant'] == '' - assert results['NM_001363850.1:c.1A>C']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1A>C' - assert results['NM_001363850.1:c.1A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert 'hg38' not in list(results['NM_001363850.1:c.1A>C']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert 'grch38' not in list(results['NM_001363850.1:c.1A>C']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.1A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + assert 'NM_003119.2:c.1A>C' in list(results.keys()) + assert results['NM_003119.2:c.1A>C']['submitted_variant'] == '16-89574826-A-C' + assert results['NM_003119.2:c.1A>C']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.1A>C']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.2:c.1A>C']['hgvs_transcript_variant'] == 'NM_003119.2:c.1A>C' + assert results['NM_003119.2:c.1A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1A>C']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5022A>C' + assert results['NM_003119.2:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Met1?)', 'slr': 'NP_003110.1:p.(M1?)'} + assert results['NM_003119.2:c.1A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.1A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.1A>C']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'pos': '89574826', 'ref': 'A', 'alt': 'C'}} + assert 'hg38' not in list(results['NM_003119.2:c.1A>C']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'pos': '89574826', 'ref': 'A', 'alt': 'C'}} + assert 'grch38' not in list(results['NM_003119.2:c.1A>C']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.1A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} assert 'NM_199367.2:c.1A>C' in list(results.keys()) - assert results['NM_199367.2:c.1A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.1A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.1A>C']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.1A>C']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Met1?)', 'slr': 'NP_955399.1:p.(M1?)'} assert results['NM_199367.2:c.1A>C']['submitted_variant'] == '16-89574826-A-C' - assert results['NM_199367.2:c.1A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.1A>C']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.1A>C']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.1A>C']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_199367.2:c.1A>C']['hgvs_transcript_variant'] == 'NM_199367.2:c.1A>C' + assert results['NM_199367.2:c.1A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.1A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.1A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89508418', 'alt': 'C'}} - assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89508418', 'alt': 'C'}} - assert results['NM_199367.2:c.1A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - - assert results['flag'] == 'gene_variant' - assert 'NM_003119.3:c.1A>C' in list(results.keys()) - assert results['NM_003119.3:c.1A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.1A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.1A>C']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.1A>C']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Met1?)', 'slr': 'NP_003110.1:p.(M1?)'} - assert results['NM_003119.3:c.1A>C']['submitted_variant'] == '16-89574826-A-C' - assert results['NM_003119.3:c.1A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.1A>C']['hgvs_lrg_variant'] == '' - assert results['NM_003119.3:c.1A>C']['hgvs_transcript_variant'] == 'NM_003119.3:c.1A>C' - assert results['NM_003119.3:c.1A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89508418', 'alt': 'C'}} - assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574826', 'alt': 'C'}} - assert results['NM_003119.3:c.1A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89508418', 'alt': 'C'}} - assert results['NM_003119.3:c.1A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - + assert results['NM_199367.2:c.1A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Met1?)', 'slr': 'NP_955399.1:p.(M1?)'} + assert results['NM_199367.2:c.1A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.1A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.1A>C']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': 'chr16', 'pos': '89574826', 'ref': 'A', 'alt': 'C'}} + assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': 'chr16', 'pos': '89508418', 'ref': 'A', 'alt': 'C'}} + assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574826A>C', 'vcf': {'chr': '16', 'pos': '89574826', 'ref': 'A', 'alt': 'C'}} + assert results['NM_199367.2:c.1A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508418A>C', 'vcf': {'chr': '16', 'pos': '89508418', 'ref': 'A', 'alt': 'C'}} + assert results['NM_199367.2:c.1A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} def test_variant240(self): variant = '16-89574914-G-GT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_003119.3:c.90dup' in list(results.keys()) + assert results['NM_003119.3:c.90dup']['submitted_variant'] == '16-89574914-G-GT' + assert results['NM_003119.3:c.90dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.90dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.3:c.90dup']['hgvs_transcript_variant'] == 'NM_003119.3:c.90dup' + assert results['NM_003119.3:c.90dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.90dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.90dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Pro31SerfsTer43)', 'slr': 'NP_003110.1:p.(P31Sfs*43)'} + assert results['NM_003119.3:c.90dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.90dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.90dup']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'pos': '89574914', 'ref': 'G', 'alt': 'GT'}} + assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': 'chr16', 'pos': '89508506', 'ref': 'G', 'alt': 'GT'}} + assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'pos': '89574914', 'ref': 'G', 'alt': 'GT'}} + assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': '16', 'pos': '89508506', 'ref': 'G', 'alt': 'GT'}} + assert results['NM_003119.3:c.90dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} + assert 'NM_001363850.1:c.90dup' in list(results.keys()) - assert results['NM_001363850.1:c.90dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.90dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.90dup']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.90dup']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Pro31SerfsTer43)', 'slr': 'NP_001350779.1:p.(P31Sfs*43)'} assert results['NM_001363850.1:c.90dup']['submitted_variant'] == '16-89574914-G-GT' - assert results['NM_001363850.1:c.90dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.90dup']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.90dup']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.90dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.90dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.90dup' + assert results['NM_001363850.1:c.90dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.90dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.90dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert results['NM_001363850.1:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Pro31SerfsTer43)', 'slr': 'NP_001350779.1:p.(P31Sfs*43)'} + assert results['NM_001363850.1:c.90dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.90dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.90dup']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'pos': '89574914', 'ref': 'G', 'alt': 'GT'}} assert 'hg38' not in list(results['NM_001363850.1:c.90dup']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert results['NM_001363850.1:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'pos': '89574914', 'ref': 'G', 'alt': 'GT'}} assert 'grch38' not in list(results['NM_001363850.1:c.90dup']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.90dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + assert results['NM_001363850.1:c.90dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} assert 'NM_199367.1:c.90dup' in list(results.keys()) - assert results['NM_199367.1:c.90dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.1:c.90dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.1:c.90dup']['alt_genomic_loci'], []) - assert results['NM_199367.1:c.90dup']['gene_symbol'] == 'SPG7' - assert results['NM_199367.1:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Pro31SerfsTer43)', 'slr': 'NP_955399.1:p.(P31Sfs*43)'} assert results['NM_199367.1:c.90dup']['submitted_variant'] == '16-89574914-G-GT' - assert results['NM_199367.1:c.90dup']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.90dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.90dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.90dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_199367.1:c.90dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.90dup' + assert results['NM_199367.1:c.90dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.90dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.90dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.1:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert results['NM_199367.1:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Pro31SerfsTer43)', 'slr': 'NP_955399.1:p.(P31Sfs*43)'} + assert results['NM_199367.1:c.90dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.90dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.1:c.90dup']['alt_genomic_loci'], []) + assert results['NM_199367.1:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'pos': '89574914', 'ref': 'G', 'alt': 'GT'}} assert 'hg38' not in list(results['NM_199367.1:c.90dup']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert results['NM_199367.1:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'pos': '89574914', 'ref': 'G', 'alt': 'GT'}} assert 'grch38' not in list(results['NM_199367.1:c.90dup']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.90dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + assert results['NM_199367.1:c.90dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} assert 'NM_003119.2:c.90dup' in list(results.keys()) - assert results['NM_003119.2:c.90dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.90dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.2:c.90dup']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.90dup']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Pro31SerfsTer43)', 'slr': 'NP_003110.1:p.(P31Sfs*43)'} assert results['NM_003119.2:c.90dup']['submitted_variant'] == '16-89574914-G-GT' - assert results['NM_003119.2:c.90dup']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.90dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.90dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.90dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.2:c.90dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.90dup' + assert results['NM_003119.2:c.90dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.90dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.90dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5111dup' - assert results['NM_003119.2:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert results['NM_003119.2:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Pro31SerfsTer43)', 'slr': 'NP_003110.1:p.(P31Sfs*43)'} + assert results['NM_003119.2:c.90dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.90dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.90dup']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'pos': '89574914', 'ref': 'G', 'alt': 'GT'}} assert 'hg38' not in list(results['NM_003119.2:c.90dup']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} + assert results['NM_003119.2:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'pos': '89574914', 'ref': 'G', 'alt': 'GT'}} assert 'grch38' not in list(results['NM_003119.2:c.90dup']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.90dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + assert results['NM_003119.2:c.90dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} assert 'NM_199367.2:c.90dup' in list(results.keys()) - assert results['NM_199367.2:c.90dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.90dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.90dup']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.90dup']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Pro31SerfsTer43)', 'slr': 'NP_955399.1:p.(P31Sfs*43)'} assert results['NM_199367.2:c.90dup']['submitted_variant'] == '16-89574914-G-GT' - assert results['NM_199367.2:c.90dup']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.90dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.90dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.90dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_199367.2:c.90dup']['hgvs_transcript_variant'] == 'NM_199367.2:c.90dup' + assert results['NM_199367.2:c.90dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.90dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.90dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} - assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89508506', 'alt': 'GT'}} - assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} - assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89508506', 'alt': 'GT'}} - assert results['NM_199367.2:c.90dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + assert results['NM_199367.2:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Pro31SerfsTer43)', 'slr': 'NP_955399.1:p.(P31Sfs*43)'} + assert results['NM_199367.2:c.90dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.90dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.90dup']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'pos': '89574914', 'ref': 'G', 'alt': 'GT'}} + assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': 'chr16', 'pos': '89508506', 'ref': 'G', 'alt': 'GT'}} + assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'pos': '89574914', 'ref': 'G', 'alt': 'GT'}} + assert results['NM_199367.2:c.90dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': '16', 'pos': '89508506', 'ref': 'G', 'alt': 'GT'}} + assert results['NM_199367.2:c.90dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} + + def test_variant241(self): + variant = '16-89574916-C-CGTC' + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() + print(results) assert results['flag'] == 'gene_variant' - assert 'NM_003119.3:c.90dup' in list(results.keys()) - assert results['NM_003119.3:c.90dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.90dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.90dup']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.90dup']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.90dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Pro31SerfsTer43)', 'slr': 'NP_003110.1:p.(P31Sfs*43)'} - assert results['NM_003119.3:c.90dup']['submitted_variant'] == '16-89574914-G-GT' - assert results['NM_003119.3:c.90dup']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.90dup']['hgvs_lrg_variant'] == '' - assert results['NM_003119.3:c.90dup']['hgvs_transcript_variant'] == 'NM_003119.3:c.90dup' - assert results['NM_003119.3:c.90dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} - assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89508506', 'alt': 'GT'}} - assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574915dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89574914', 'alt': 'GT'}} - assert results['NM_003119.3:c.90dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508507dup', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89508506', 'alt': 'GT'}} - assert results['NM_003119.3:c.90dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - - - def test_variant241(self): - variant = '16-89574916-C-CGTC' - results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() - print(results) - - assert 'NM_199367.2:c.89_91dup' in list(results.keys()) - assert results['NM_199367.2:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.89_91dup']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.89_91dup']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_955399.1:p.(S30_P31insR)'} - assert results['NM_199367.2:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' - assert results['NM_199367.2:c.89_91dup']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.89_91dup']['hgvs_lrg_variant'] == '' - assert results['NM_199367.2:c.89_91dup']['hgvs_transcript_variant'] == 'NM_199367.2:c.89_91dup' - assert results['NM_199367.2:c.89_91dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} - assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89508505', 'alt': 'AGTC'}} - assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} - assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89508505', 'alt': 'AGTC'}} - assert results['NM_199367.2:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - assert 'NM_003119.3:c.89_91dup' in list(results.keys()) - assert results['NM_003119.3:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.89_91dup']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.89_91dup']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_003110.1:p.(S30_P31insR)'} assert results['NM_003119.3:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' - assert results['NM_003119.3:c.89_91dup']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.89_91dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.89_91dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.89_91dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.3:c.89_91dup']['hgvs_transcript_variant'] == 'NM_003119.3:c.89_91dup' + assert results['NM_003119.3:c.89_91dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.89_91dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} - assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89508505', 'alt': 'AGTC'}} - assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} - assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89508505', 'alt': 'AGTC'}} - assert results['NM_003119.3:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + assert results['NM_003119.3:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_003110.1:p.(S30_P31insR)'} + assert results['NM_003119.3:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.89_91dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.89_91dup']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'pos': '89574913', 'ref': 'A', 'alt': 'AGTC'}} + assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': 'chr16', 'pos': '89508505', 'ref': 'A', 'alt': 'AGTC'}} + assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'pos': '89574913', 'ref': 'A', 'alt': 'AGTC'}} + assert results['NM_003119.3:c.89_91dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': '16', 'pos': '89508505', 'ref': 'A', 'alt': 'AGTC'}} + assert results['NM_003119.3:c.89_91dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} assert 'NM_001363850.1:c.89_91dup' in list(results.keys()) - assert results['NM_001363850.1:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.89_91dup']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.89_91dup']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_001350779.1:p.(S30_P31insR)'} assert results['NM_001363850.1:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' - assert results['NM_001363850.1:c.89_91dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.89_91dup']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.89_91dup']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.89_91dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.89_91dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.89_91dup' + assert results['NM_001363850.1:c.89_91dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.89_91dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert results['NM_001363850.1:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_001350779.1:p.(S30_P31insR)'} + assert results['NM_001363850.1:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.89_91dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.89_91dup']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'pos': '89574913', 'ref': 'A', 'alt': 'AGTC'}} assert 'hg38' not in list(results['NM_001363850.1:c.89_91dup']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert results['NM_001363850.1:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'pos': '89574913', 'ref': 'A', 'alt': 'AGTC'}} assert 'grch38' not in list(results['NM_001363850.1:c.89_91dup']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + assert results['NM_001363850.1:c.89_91dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} - assert results['flag'] == 'gene_variant' assert 'NM_199367.1:c.89_91dup' in list(results.keys()) - assert results['NM_199367.1:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.1:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.1:c.89_91dup']['alt_genomic_loci'], []) - assert results['NM_199367.1:c.89_91dup']['gene_symbol'] == 'SPG7' - assert results['NM_199367.1:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_955399.1:p.(S30_P31insR)'} assert results['NM_199367.1:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' - assert results['NM_199367.1:c.89_91dup']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.89_91dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.89_91dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.89_91dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_199367.1:c.89_91dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.89_91dup' + assert results['NM_199367.1:c.89_91dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.89_91dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.1:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert results['NM_199367.1:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_955399.1:p.(S30_P31insR)'} + assert results['NM_199367.1:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.89_91dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.1:c.89_91dup']['alt_genomic_loci'], []) + assert results['NM_199367.1:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'pos': '89574913', 'ref': 'A', 'alt': 'AGTC'}} assert 'hg38' not in list(results['NM_199367.1:c.89_91dup']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert results['NM_199367.1:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'pos': '89574913', 'ref': 'A', 'alt': 'AGTC'}} assert 'grch38' not in list(results['NM_199367.1:c.89_91dup']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.89_91dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + assert results['NM_199367.1:c.89_91dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} assert 'NM_003119.2:c.89_91dup' in list(results.keys()) - assert results['NM_003119.2:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.2:c.89_91dup']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.89_91dup']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_003110.1:p.(S30_P31insR)'} assert results['NM_003119.2:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' - assert results['NM_003119.2:c.89_91dup']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.89_91dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.89_91dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.89_91dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.2:c.89_91dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.89_91dup' + assert results['NM_003119.2:c.89_91dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.89_91dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5110_5112dup' - assert results['NM_003119.2:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert results['NM_003119.2:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_003110.1:p.(S30_P31insR)'} + assert results['NM_003119.2:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.89_91dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.89_91dup']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'pos': '89574913', 'ref': 'A', 'alt': 'AGTC'}} assert 'hg38' not in list(results['NM_003119.2:c.89_91dup']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89574913', 'alt': 'AGTC'}} + assert results['NM_003119.2:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'pos': '89574913', 'ref': 'A', 'alt': 'AGTC'}} assert 'grch38' not in list(results['NM_003119.2:c.89_91dup']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.89_91dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + assert results['NM_003119.2:c.89_91dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} + assert 'NM_199367.2:c.89_91dup' in list(results.keys()) + assert results['NM_199367.2:c.89_91dup']['submitted_variant'] == '16-89574916-C-CGTC' + assert results['NM_199367.2:c.89_91dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.89_91dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.2:c.89_91dup']['hgvs_transcript_variant'] == 'NM_199367.2:c.89_91dup' + assert results['NM_199367.2:c.89_91dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.89_91dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.89_91dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.89_91dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Ser30_Pro31insArg)', 'slr': 'NP_955399.1:p.(S30_P31insR)'} + assert results['NM_199367.2:c.89_91dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.89_91dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.89_91dup']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': 'chr16', 'pos': '89574913', 'ref': 'A', 'alt': 'AGTC'}} + assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': 'chr16', 'pos': '89508505', 'ref': 'A', 'alt': 'AGTC'}} + assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89574914_89574916dup', 'vcf': {'chr': '16', 'pos': '89574913', 'ref': 'A', 'alt': 'AGTC'}} + assert results['NM_199367.2:c.89_91dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508506_89508508dup', 'vcf': {'chr': '16', 'pos': '89508505', 'ref': 'A', 'alt': 'AGTC'}} + assert results['NM_199367.2:c.89_91dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} def test_variant242(self): variant = '16-89575009-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_199367.2:c.183+1G>A' in list(results.keys()) - assert results['NM_199367.2:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.183+1G>A']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.183+1G>A']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} - assert results['NM_199367.2:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' - assert results['NM_199367.2:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.183+1G>A' - assert results['NM_199367.2:c.183+1G>A']['hgvs_lrg_variant'] == '' - assert results['NM_199367.2:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_199367.2:c.183+1G>A' - assert results['NM_199367.2:c.183+1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89508601', 'alt': 'A'}} - assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89508601', 'alt': 'A'}} - assert results['NM_199367.2:c.183+1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - - assert 'NM_003119.2:c.183+1G>A' in list(results.keys()) - assert results['NM_003119.2:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.183+1G>A']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+1G>A' - self.assertCountEqual(results['NM_003119.2:c.183+1G>A']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.183+1G>A']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} - assert results['NM_003119.2:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' - assert results['NM_003119.2:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.183+1G>A' - assert results['NM_003119.2:c.183+1G>A']['hgvs_lrg_variant'] == '' - assert results['NM_003119.2:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_003119.2:c.183+1G>A' - assert results['NM_003119.2:c.183+1G>A']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5205G>A' - assert results['NM_003119.2:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert 'hg38' not in list(results['NM_003119.2:c.183+1G>A']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert 'grch38' not in list(results['NM_003119.2:c.183+1G>A']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.183+1G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - assert results['flag'] == 'gene_variant' - assert 'NM_199367.1:c.183+1G>A' in list(results.keys()) - assert results['NM_199367.1:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.1:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.1:c.183+1G>A']['alt_genomic_loci'], []) - assert results['NM_199367.1:c.183+1G>A']['gene_symbol'] == 'SPG7' - assert results['NM_199367.1:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} - assert results['NM_199367.1:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' - assert results['NM_199367.1:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.183+1G>A' - assert results['NM_199367.1:c.183+1G>A']['hgvs_lrg_variant'] == '' - assert results['NM_199367.1:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+1G>A' - assert results['NM_199367.1:c.183+1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.1:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert 'hg38' not in list(results['NM_199367.1:c.183+1G>A']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert 'grch38' not in list(results['NM_199367.1:c.183+1G>A']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.183+1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + assert 'NM_003119.3:c.183+1G>A' in list(results.keys()) + assert results['NM_003119.3:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' + assert results['NM_003119.3:c.183+1G>A']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.183+1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.3:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_003119.3:c.183+1G>A' + assert results['NM_003119.3:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.183+1G>A' + assert results['NM_003119.3:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.183+1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.183+1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.183+1G>A']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'pos': '89575009', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': 'chr16', 'pos': '89508601', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'pos': '89575009', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': '16', 'pos': '89508601', 'ref': 'G', 'alt': 'A'}} + assert results['NM_003119.3:c.183+1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} assert 'NM_001363850.1:c.183+1G>A' in list(results.keys()) - assert results['NM_001363850.1:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.183+1G>A']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.183+1G>A']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} assert results['NM_001363850.1:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' - assert results['NM_001363850.1:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.183+1G>A' - assert results['NM_001363850.1:c.183+1G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.183+1G>A']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.183+1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_001363850.1:c.183+1G>A' + assert results['NM_001363850.1:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.183+1G>A' + assert results['NM_001363850.1:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.183+1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert results['NM_001363850.1:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.183+1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.183+1G>A']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'pos': '89575009', 'ref': 'G', 'alt': 'A'}} assert 'hg38' not in list(results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} + assert results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'pos': '89575009', 'ref': 'G', 'alt': 'A'}} assert 'grch38' not in list(results['NM_001363850.1:c.183+1G>A']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.183+1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + assert results['NM_001363850.1:c.183+1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} - assert 'NM_003119.3:c.183+1G>A' in list(results.keys()) - assert results['NM_003119.3:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.183+1G>A']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.183+1G>A']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} - assert results['NM_003119.3:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' - assert results['NM_003119.3:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.183+1G>A' - assert results['NM_003119.3:c.183+1G>A']['hgvs_lrg_variant'] == '' - assert results['NM_003119.3:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_003119.3:c.183+1G>A' - assert results['NM_003119.3:c.183+1G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': 'chr16', 'ref': 'G', 'pos': '89508601', 'alt': 'A'}} - assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89575009', 'alt': 'A'}} - assert results['NM_003119.3:c.183+1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': '16', 'ref': 'G', 'pos': '89508601', 'alt': 'A'}} - assert results['NM_003119.3:c.183+1G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + assert 'NM_199367.1:c.183+1G>A' in list(results.keys()) + assert results['NM_199367.1:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' + assert results['NM_199367.1:c.183+1G>A']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.183+1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.1:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+1G>A' + assert results['NM_199367.1:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.183+1G>A' + assert results['NM_199367.1:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.183+1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.1:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.183+1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.1:c.183+1G>A']['alt_genomic_loci'], []) + assert results['NM_199367.1:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'pos': '89575009', 'ref': 'G', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_199367.1:c.183+1G>A']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'pos': '89575009', 'ref': 'G', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_199367.1:c.183+1G>A']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.183+1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} + + assert 'NM_003119.2:c.183+1G>A' in list(results.keys()) + assert results['NM_003119.2:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' + assert results['NM_003119.2:c.183+1G>A']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.183+1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.2:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_003119.2:c.183+1G>A' + assert results['NM_003119.2:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.183+1G>A' + assert results['NM_003119.2:c.183+1G>A']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+1G>A' + assert results['NM_003119.2:c.183+1G>A']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5205G>A' + assert results['NM_003119.2:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.2:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.183+1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.183+1G>A']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'pos': '89575009', 'ref': 'G', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_003119.2:c.183+1G>A']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'pos': '89575009', 'ref': 'G', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_003119.2:c.183+1G>A']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.183+1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} + assert 'NM_199367.2:c.183+1G>A' in list(results.keys()) + assert results['NM_199367.2:c.183+1G>A']['submitted_variant'] == '16-89575009-G-A' + assert results['NM_199367.2:c.183+1G>A']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.183+1G>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.2:c.183+1G>A']['hgvs_transcript_variant'] == 'NM_199367.2:c.183+1G>A' + assert results['NM_199367.2:c.183+1G>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.183+1G>A' + assert results['NM_199367.2:c.183+1G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.183+1G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.183+1G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.2:c.183+1G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.183+1G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.183+1G>A']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': 'chr16', 'pos': '89575009', 'ref': 'G', 'alt': 'A'}} + assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': 'chr16', 'pos': '89508601', 'ref': 'G', 'alt': 'A'}} + assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575009G>A', 'vcf': {'chr': '16', 'pos': '89575009', 'ref': 'G', 'alt': 'A'}} + assert results['NM_199367.2:c.183+1G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508601G>A', 'vcf': {'chr': '16', 'pos': '89508601', 'ref': 'G', 'alt': 'A'}} + assert results['NM_199367.2:c.183+1G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} def test_variant243(self): variant = '16-89575040-C-A,CA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_199367.1:c.183+32_183+33insA' in list(results.keys()) - assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.1:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.1:c.183+32_183+33insA']['alt_genomic_loci'], []) - assert results['NM_199367.1:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' - assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} - assert results['NM_199367.1:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' - assert results['NM_199367.1:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.183+32_183+33insA' - assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' - assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+32_183+33insA' - assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert 'hg38' not in list(results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert 'grch38' not in list(results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_003119.3:c.183+32C>A' in list(results.keys()) + assert results['NM_003119.3:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_003119.3:c.183+32C>A']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.183+32C>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.3:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_003119.3:c.183+32C>A' + assert results['NM_003119.3:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.183+32C>A' + assert results['NM_003119.3:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.183+32C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.183+32C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.183+32C>A']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'pos': '89575040', 'ref': 'C', 'alt': 'A'}} + assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': 'chr16', 'pos': '89508632', 'ref': 'C', 'alt': 'A'}} + assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'pos': '89575040', 'ref': 'C', 'alt': 'A'}} + assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': '16', 'pos': '89508632', 'ref': 'C', 'alt': 'A'}} + assert results['NM_003119.3:c.183+32C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} assert 'NM_001363850.1:c.183+32C>A' in list(results.keys()) - assert results['NM_001363850.1:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.183+32C>A']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.183+32C>A']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} assert results['NM_001363850.1:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' - assert results['NM_001363850.1:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.183+32C>A' - assert results['NM_001363850.1:c.183+32C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.183+32C>A']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.183+32C>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_001363850.1:c.183+32C>A' + assert results['NM_001363850.1:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.183+32C>A' + assert results['NM_001363850.1:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.183+32C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert results['NM_001363850.1:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.183+32C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.183+32C>A']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'pos': '89575040', 'ref': 'C', 'alt': 'A'}} assert 'hg38' not in list(results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} + assert results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'pos': '89575040', 'ref': 'C', 'alt': 'A'}} assert 'grch38' not in list(results['NM_001363850.1:c.183+32C>A']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + assert results['NM_001363850.1:c.183+32C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} - assert 'NM_001363850.1:c.183+32_183+33insA' in list(results.keys()) - assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.183+32_183+33insA']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} - assert results['NM_001363850.1:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' - assert results['NM_001363850.1:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.183+32_183+33insA' - assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' - assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_001363850.1:c.183+32_183+33insA' - assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert 'hg38' not in list(results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert 'grch38' not in list(results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + assert 'NM_199367.1:c.183+32C>A' in list(results.keys()) + assert results['NM_199367.1:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_199367.1:c.183+32C>A']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.183+32C>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.1:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+32C>A' + assert results['NM_199367.1:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.183+32C>A' + assert results['NM_199367.1:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.183+32C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.1:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.183+32C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.1:c.183+32C>A']['alt_genomic_loci'], []) + assert results['NM_199367.1:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'pos': '89575040', 'ref': 'C', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_199367.1:c.183+32C>A']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'pos': '89575040', 'ref': 'C', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_199367.1:c.183+32C>A']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.183+32C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} + + assert 'NM_003119.2:c.183+32C>A' in list(results.keys()) + assert results['NM_003119.2:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_003119.2:c.183+32C>A']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.183+32C>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.2:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_003119.2:c.183+32C>A' + assert results['NM_003119.2:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.183+32C>A' + assert results['NM_003119.2:c.183+32C>A']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+32C>A' + assert results['NM_003119.2:c.183+32C>A']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5236C>A' + assert results['NM_003119.2:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.2:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.183+32C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.183+32C>A']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'pos': '89575040', 'ref': 'C', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_003119.2:c.183+32C>A']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'pos': '89575040', 'ref': 'C', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_003119.2:c.183+32C>A']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.183+32C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} assert 'NM_199367.2:c.183+32C>A' in list(results.keys()) - assert results['NM_199367.2:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.183+32C>A']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.183+32C>A']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.2:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' - assert results['NM_199367.2:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.183+32C>A' - assert results['NM_199367.2:c.183+32C>A']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.183+32C>A']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.183+32C>A']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_199367.2:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_199367.2:c.183+32C>A' + assert results['NM_199367.2:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.183+32C>A' + assert results['NM_199367.2:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.183+32C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508632', 'alt': 'A'}} - assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508632', 'alt': 'A'}} - assert results['NM_199367.2:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + assert results['NM_199367.2:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.2:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.183+32C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.183+32C>A']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'pos': '89575040', 'ref': 'C', 'alt': 'A'}} + assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': 'chr16', 'pos': '89508632', 'ref': 'C', 'alt': 'A'}} + assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'pos': '89575040', 'ref': 'C', 'alt': 'A'}} + assert results['NM_199367.2:c.183+32C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': '16', 'pos': '89508632', 'ref': 'C', 'alt': 'A'}} + assert results['NM_199367.2:c.183+32C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} assert 'NM_003119.3:c.183+32_183+33insA' in list(results.keys()) - assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.183+32_183+33insA']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.3:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' - assert results['NM_003119.3:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.183+32_183+33insA' - assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.183+32_183+33insA']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_003119.3:c.183+32_183+33insA' + assert results['NM_003119.3:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.183+32_183+33insA' + assert results['NM_003119.3:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508632', 'alt': 'CA'}} - assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508632', 'alt': 'CA'}} - assert results['NM_003119.3:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.183+32_183+33insA']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'pos': '89575040', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': 'chr16', 'pos': '89508632', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'pos': '89575040', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_003119.3:c.183+32_183+33insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': '16', 'pos': '89508632', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_003119.3:c.183+32_183+33insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} + + assert 'NM_001363850.1:c.183+32_183+33insA' in list(results.keys()) + assert results['NM_001363850.1:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_001363850.1:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.183+32_183+33insA']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_001363850.1:c.183+32_183+33insA' + assert results['NM_001363850.1:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.183+32_183+33insA' + assert results['NM_001363850.1:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.183+32_183+33insA']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'pos': '89575040', 'ref': 'C', 'alt': 'CA'}} + assert 'hg38' not in list(results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) + assert results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'pos': '89575040', 'ref': 'C', 'alt': 'CA'}} + assert 'grch38' not in list(results['NM_001363850.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) + assert results['NM_001363850.1:c.183+32_183+33insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} + + assert 'NM_199367.1:c.183+32_183+33insA' in list(results.keys()) + assert results['NM_199367.1:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' + assert results['NM_199367.1:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.183+32_183+33insA']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+32_183+33insA' + assert results['NM_199367.1:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.183+32_183+33insA' + assert results['NM_199367.1:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.1:c.183+32_183+33insA']['alt_genomic_loci'], []) + assert results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'pos': '89575040', 'ref': 'C', 'alt': 'CA'}} + assert 'hg38' not in list(results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'pos': '89575040', 'ref': 'C', 'alt': 'CA'}} + assert 'grch38' not in list(results['NM_199367.1:c.183+32_183+33insA']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.183+32_183+33insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} - assert results['flag'] == 'gene_variant' assert 'NM_003119.2:c.183+32_183+33insA' in list(results.keys()) - assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+32_183+33insA' - self.assertCountEqual(results['NM_003119.2:c.183+32_183+33insA']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.2:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' - assert results['NM_003119.2:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.183+32_183+33insA' - assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.183+32_183+33insA']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_003119.2:c.183+32_183+33insA' + assert results['NM_003119.2:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.183+32_183+33insA' + assert results['NM_003119.2:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+32_183+33insA' assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5236_5237insA' - assert results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.183+32_183+33insA']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'pos': '89575040', 'ref': 'C', 'alt': 'CA'}} assert 'hg38' not in list(results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} + assert results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'pos': '89575040', 'ref': 'C', 'alt': 'CA'}} assert 'grch38' not in list(results['NM_003119.2:c.183+32_183+33insA']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.183+32_183+33insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - - assert 'NM_199367.1:c.183+32C>A' in list(results.keys()) - assert results['NM_199367.1:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.1:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.1:c.183+32C>A']['alt_genomic_loci'], []) - assert results['NM_199367.1:c.183+32C>A']['gene_symbol'] == 'SPG7' - assert results['NM_199367.1:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} - assert results['NM_199367.1:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' - assert results['NM_199367.1:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.183+32C>A' - assert results['NM_199367.1:c.183+32C>A']['hgvs_lrg_variant'] == '' - assert results['NM_199367.1:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_199367.1:c.183+32C>A' - assert results['NM_199367.1:c.183+32C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.1:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert 'hg38' not in list(results['NM_199367.1:c.183+32C>A']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert 'grch38' not in list(results['NM_199367.1:c.183+32C>A']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - - assert 'NM_003119.3:c.183+32C>A' in list(results.keys()) - assert results['NM_003119.3:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.183+32C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.183+32C>A']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.183+32C>A']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} - assert results['NM_003119.3:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' - assert results['NM_003119.3:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.183+32C>A' - assert results['NM_003119.3:c.183+32C>A']['hgvs_lrg_variant'] == '' - assert results['NM_003119.3:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_003119.3:c.183+32C>A' - assert results['NM_003119.3:c.183+32C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508632', 'alt': 'A'}} - assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert results['NM_003119.3:c.183+32C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508632', 'alt': 'A'}} - assert results['NM_003119.3:c.183+32C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + assert results['NM_003119.2:c.183+32_183+33insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} assert 'NM_199367.2:c.183+32_183+33insA' in list(results.keys()) - assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.183+32_183+33insA']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.2:c.183+32_183+33insA']['submitted_variant'] == '16-89575040-C-A,CA' - assert results['NM_199367.2:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.183+32_183+33insA' - assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.183+32_183+33insA']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.183+32_183+33insA']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_transcript_variant'] == 'NM_199367.2:c.183+32_183+33insA' + assert results['NM_199367.2:c.183+32_183+33insA']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.183+32_183+33insA' + assert results['NM_199367.2:c.183+32_183+33insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89508632', 'alt': 'CA'}} - assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'CA'}} - assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89508632', 'alt': 'CA'}} - assert results['NM_199367.2:c.183+32_183+33insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - - assert 'NM_003119.2:c.183+32C>A' in list(results.keys()) - assert results['NM_003119.2:c.183+32C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.183+32C>A']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.183+32C>A' - self.assertCountEqual(results['NM_003119.2:c.183+32C>A']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.183+32C>A']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.183+32C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} - assert results['NM_003119.2:c.183+32C>A']['submitted_variant'] == '16-89575040-C-A,CA' - assert results['NM_003119.2:c.183+32C>A']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.183+32C>A' - assert results['NM_003119.2:c.183+32C>A']['hgvs_lrg_variant'] == '' - assert results['NM_003119.2:c.183+32C>A']['hgvs_transcript_variant'] == 'NM_003119.2:c.183+32C>A' - assert results['NM_003119.2:c.183+32C>A']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.5236C>A' - assert results['NM_003119.2:c.183+32C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert 'hg38' not in list(results['NM_003119.2:c.183+32C>A']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.183+32C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040C>A', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89575040', 'alt': 'A'}} - assert 'grch38' not in list(results['NM_003119.2:c.183+32C>A']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.183+32C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - + assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.183+32_183+33insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.183+32_183+33insA']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': 'chr16', 'pos': '89575040', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': 'chr16', 'pos': '89508632', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89575040_89575041insA', 'vcf': {'chr': '16', 'pos': '89575040', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_199367.2:c.183+32_183+33insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89508632_89508633insA', 'vcf': {'chr': '16', 'pos': '89508632', 'ref': 'C', 'alt': 'CA'}} + assert results['NM_199367.2:c.183+32_183+33insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} def test_variant244(self): variant = '16-89576896-A-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_199367.2:c.184-2A>C' in list(results.keys()) - assert results['NM_199367.2:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.184-2A>C']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.184-2A>C']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} - assert results['NM_199367.2:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' - assert results['NM_199367.2:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.184-2A>C' - assert results['NM_199367.2:c.184-2A>C']['hgvs_lrg_variant'] == '' - assert results['NM_199367.2:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_199367.2:c.184-2A>C' - assert results['NM_199367.2:c.184-2A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89510488', 'alt': 'C'}} - assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89510488', 'alt': 'C'}} - assert results['NM_199367.2:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - - assert 'NM_003119.2:c.184-2A>C' in list(results.keys()) - assert results['NM_003119.2:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.184-2A>C']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.184-2A>C' - self.assertCountEqual(results['NM_003119.2:c.184-2A>C']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.184-2A>C']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} - assert results['NM_003119.2:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' - assert results['NM_003119.2:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.184-2A>C' - assert results['NM_003119.2:c.184-2A>C']['hgvs_lrg_variant'] == '' - assert results['NM_003119.2:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_003119.2:c.184-2A>C' - assert results['NM_003119.2:c.184-2A>C']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7092A>C' - assert results['NM_003119.2:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert 'hg38' not in list(results['NM_003119.2:c.184-2A>C']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert 'grch38' not in list(results['NM_003119.2:c.184-2A>C']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.184-2A>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - + assert results['flag'] == 'gene_variant' assert 'NM_003119.3:c.184-2A>C' in list(results.keys()) - assert results['NM_003119.3:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.184-2A>C']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.184-2A>C']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.3:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' - assert results['NM_003119.3:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.184-2A>C' - assert results['NM_003119.3:c.184-2A>C']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.184-2A>C']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.184-2A>C']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.3:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_003119.3:c.184-2A>C' + assert results['NM_003119.3:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.184-2A>C' + assert results['NM_003119.3:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.184-2A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89510488', 'alt': 'C'}} - assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} - assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89510488', 'alt': 'C'}} - assert results['NM_003119.3:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + assert results['NM_003119.3:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.184-2A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.184-2A>C']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'pos': '89576896', 'ref': 'A', 'alt': 'C'}} + assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': 'chr16', 'pos': '89510488', 'ref': 'A', 'alt': 'C'}} + assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'pos': '89576896', 'ref': 'A', 'alt': 'C'}} + assert results['NM_003119.3:c.184-2A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': '16', 'pos': '89510488', 'ref': 'A', 'alt': 'C'}} + assert results['NM_003119.3:c.184-2A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} assert 'NM_001363850.1:c.184-2A>C' in list(results.keys()) - assert results['NM_001363850.1:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.184-2A>C']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.184-2A>C']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} assert results['NM_001363850.1:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' - assert results['NM_001363850.1:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.184-2A>C' - assert results['NM_001363850.1:c.184-2A>C']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.184-2A>C']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.184-2A>C']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_001363850.1:c.184-2A>C' + assert results['NM_001363850.1:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.184-2A>C' + assert results['NM_001363850.1:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.184-2A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert results['NM_001363850.1:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.184-2A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.184-2A>C']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'pos': '89576896', 'ref': 'A', 'alt': 'C'}} assert 'hg38' not in list(results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'pos': '89576896', 'ref': 'A', 'alt': 'C'}} assert 'grch38' not in list(results['NM_001363850.1:c.184-2A>C']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + assert results['NM_001363850.1:c.184-2A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} - assert results['flag'] == 'gene_variant' assert 'NM_199367.1:c.184-2A>C' in list(results.keys()) - assert results['NM_199367.1:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.1:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.1:c.184-2A>C']['alt_genomic_loci'], []) - assert results['NM_199367.1:c.184-2A>C']['gene_symbol'] == 'SPG7' - assert results['NM_199367.1:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} assert results['NM_199367.1:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' - assert results['NM_199367.1:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.184-2A>C' - assert results['NM_199367.1:c.184-2A>C']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.184-2A>C']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.184-2A>C']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_199367.1:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_199367.1:c.184-2A>C' + assert results['NM_199367.1:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.1):c.184-2A>C' + assert results['NM_199367.1:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.184-2A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.1:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert results['NM_199367.1:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.1:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.184-2A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.1:c.184-2A>C']['alt_genomic_loci'], []) + assert results['NM_199367.1:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'pos': '89576896', 'ref': 'A', 'alt': 'C'}} assert 'hg38' not in list(results['NM_199367.1:c.184-2A>C']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'ref': 'A', 'pos': '89576896', 'alt': 'C'}} + assert results['NM_199367.1:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'pos': '89576896', 'ref': 'A', 'alt': 'C'}} assert 'grch38' not in list(results['NM_199367.1:c.184-2A>C']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.184-2A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + assert results['NM_199367.1:c.184-2A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} + + assert 'NM_003119.2:c.184-2A>C' in list(results.keys()) + assert results['NM_003119.2:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' + assert results['NM_003119.2:c.184-2A>C']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.184-2A>C']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.2:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_003119.2:c.184-2A>C' + assert results['NM_003119.2:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.184-2A>C' + assert results['NM_003119.2:c.184-2A>C']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.184-2A>C' + assert results['NM_003119.2:c.184-2A>C']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7092A>C' + assert results['NM_003119.2:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.2:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.184-2A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.184-2A>C']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'pos': '89576896', 'ref': 'A', 'alt': 'C'}} + assert 'hg38' not in list(results['NM_003119.2:c.184-2A>C']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'pos': '89576896', 'ref': 'A', 'alt': 'C'}} + assert 'grch38' not in list(results['NM_003119.2:c.184-2A>C']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.184-2A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} + assert 'NM_199367.2:c.184-2A>C' in list(results.keys()) + assert results['NM_199367.2:c.184-2A>C']['submitted_variant'] == '16-89576896-A-C' + assert results['NM_199367.2:c.184-2A>C']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.184-2A>C']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.2:c.184-2A>C']['hgvs_transcript_variant'] == 'NM_199367.2:c.184-2A>C' + assert results['NM_199367.2:c.184-2A>C']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_199367.2):c.184-2A>C' + assert results['NM_199367.2:c.184-2A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.184-2A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.184-2A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.?', 'slr': 'NP_955399.1:p.?'} + assert results['NM_199367.2:c.184-2A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.184-2A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.184-2A>C']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': 'chr16', 'pos': '89576896', 'ref': 'A', 'alt': 'C'}} + assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': 'chr16', 'pos': '89510488', 'ref': 'A', 'alt': 'C'}} + assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576896A>C', 'vcf': {'chr': '16', 'pos': '89576896', 'ref': 'A', 'alt': 'C'}} + assert results['NM_199367.2:c.184-2A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510488A>C', 'vcf': {'chr': '16', 'pos': '89510488', 'ref': 'A', 'alt': 'C'}} + assert results['NM_199367.2:c.184-2A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} def test_variant245(self): variant = '16-89576930-T-TA,TT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_003119.3:c.216dup' in list(results.keys()) - assert results['NM_003119.3:c.216dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.216dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.216dup']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.216dup']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73Ter)', 'slr': 'NP_003110.1:p.(E73*)'} - assert results['NM_003119.3:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' - assert results['NM_003119.3:c.216dup']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.216dup']['hgvs_lrg_variant'] == '' - assert results['NM_003119.3:c.216dup']['hgvs_transcript_variant'] == 'NM_003119.3:c.216dup' - assert results['NM_003119.3:c.216dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} - assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89510519', 'alt': 'CT'}} - assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} - assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89510519', 'alt': 'CT'}} - assert results['NM_003119.3:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + assert results['flag'] == 'gene_variant' + assert 'NM_003119.3:c.216_217insA' in list(results.keys()) + assert results['NM_003119.3:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_003119.3:c.216_217insA']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.216_217insA']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.3:c.216_217insA']['hgvs_transcript_variant'] == 'NM_003119.3:c.216_217insA' + assert results['NM_003119.3:c.216_217insA']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.216_217insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_003110.1:p.(E73Rfs*30)'} + assert results['NM_003119.3:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.216_217insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.216_217insA']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'pos': '89576930', 'ref': 'T', 'alt': 'TA'}} + assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': 'chr16', 'pos': '89510522', 'ref': 'T', 'alt': 'TA'}} + assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'pos': '89576930', 'ref': 'T', 'alt': 'TA'}} + assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': '16', 'pos': '89510522', 'ref': 'T', 'alt': 'TA'}} + assert results['NM_003119.3:c.216_217insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} + + assert 'NM_001363850.1:c.216_217insA' in list(results.keys()) + assert results['NM_001363850.1:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_001363850.1:c.216_217insA']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.216_217insA']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_001363850.1:c.216_217insA']['hgvs_transcript_variant'] == 'NM_001363850.1:c.216_217insA' + assert results['NM_001363850.1:c.216_217insA']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.216_217insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363850.1:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_001350779.1:p.(E73Rfs*30)'} + assert results['NM_001363850.1:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.216_217insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.216_217insA']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'pos': '89576930', 'ref': 'T', 'alt': 'TA'}} + assert 'hg38' not in list(results['NM_001363850.1:c.216_217insA']['primary_assembly_loci'].keys()) + assert results['NM_001363850.1:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'pos': '89576930', 'ref': 'T', 'alt': 'TA'}} + assert 'grch38' not in list(results['NM_001363850.1:c.216_217insA']['primary_assembly_loci'].keys()) + assert results['NM_001363850.1:c.216_217insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} + + assert 'NM_199367.1:c.216_217insA' in list(results.keys()) + assert results['NM_199367.1:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_199367.1:c.216_217insA']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.216_217insA']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.1:c.216_217insA']['hgvs_transcript_variant'] == 'NM_199367.1:c.216_217insA' + assert results['NM_199367.1:c.216_217insA']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.216_217insA']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_955399.1:p.(E73Rfs*30)'} + assert results['NM_199367.1:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.216_217insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.1:c.216_217insA']['alt_genomic_loci'], []) + assert results['NM_199367.1:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'pos': '89576930', 'ref': 'T', 'alt': 'TA'}} + assert 'hg38' not in list(results['NM_199367.1:c.216_217insA']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'pos': '89576930', 'ref': 'T', 'alt': 'TA'}} + assert 'grch38' not in list(results['NM_199367.1:c.216_217insA']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.216_217insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} assert 'NM_003119.2:c.216_217insA' in list(results.keys()) - assert results['NM_003119.2:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.2:c.216_217insA']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.216_217insA']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_003110.1:p.(E73Rfs*30)'} assert results['NM_003119.2:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' - assert results['NM_003119.2:c.216_217insA']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.216_217insA']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.216_217insA']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.216_217insA']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.2:c.216_217insA']['hgvs_transcript_variant'] == 'NM_003119.2:c.216_217insA' + assert results['NM_003119.2:c.216_217insA']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.216_217insA']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7126_7127insA' - assert results['NM_003119.2:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert results['NM_003119.2:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_003110.1:p.(E73Rfs*30)'} + assert results['NM_003119.2:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.216_217insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.216_217insA']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'pos': '89576930', 'ref': 'T', 'alt': 'TA'}} assert 'hg38' not in list(results['NM_003119.2:c.216_217insA']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} + assert results['NM_003119.2:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'pos': '89576930', 'ref': 'T', 'alt': 'TA'}} assert 'grch38' not in list(results['NM_003119.2:c.216_217insA']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.216_217insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - - assert 'NM_199367.2:c.216dup' in list(results.keys()) - assert results['NM_199367.2:c.216dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.216dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.216dup']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.216dup']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73Ter)', 'slr': 'NP_955399.1:p.(E73*)'} - assert results['NM_199367.2:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' - assert results['NM_199367.2:c.216dup']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.216dup']['hgvs_lrg_variant'] == '' - assert results['NM_199367.2:c.216dup']['hgvs_transcript_variant'] == 'NM_199367.2:c.216dup' - assert results['NM_199367.2:c.216dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} - assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89510519', 'alt': 'CT'}} - assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} - assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89510519', 'alt': 'CT'}} - assert results['NM_199367.2:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + assert results['NM_003119.2:c.216_217insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} assert 'NM_199367.2:c.216_217insA' in list(results.keys()) - assert results['NM_199367.2:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.216_217insA']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.216_217insA']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_955399.1:p.(E73Rfs*30)'} assert results['NM_199367.2:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' - assert results['NM_199367.2:c.216_217insA']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.216_217insA']['hgvs_lrg_variant'] == '' + assert results['NM_199367.2:c.216_217insA']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.216_217insA']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_199367.2:c.216_217insA']['hgvs_transcript_variant'] == 'NM_199367.2:c.216_217insA' + assert results['NM_199367.2:c.216_217insA']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.2:c.216_217insA']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89510522', 'alt': 'TA'}} - assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510522', 'alt': 'TA'}} - assert results['NM_199367.2:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + assert results['NM_199367.2:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_955399.1:p.(E73Rfs*30)'} + assert results['NM_199367.2:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.216_217insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.216_217insA']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'pos': '89576930', 'ref': 'T', 'alt': 'TA'}} + assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': 'chr16', 'pos': '89510522', 'ref': 'T', 'alt': 'TA'}} + assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'pos': '89576930', 'ref': 'T', 'alt': 'TA'}} + assert results['NM_199367.2:c.216_217insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': '16', 'pos': '89510522', 'ref': 'T', 'alt': 'TA'}} + assert results['NM_199367.2:c.216_217insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} + + assert 'NM_003119.3:c.216dup' in list(results.keys()) + assert results['NM_003119.3:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_003119.3:c.216dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.216dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.3:c.216dup']['hgvs_transcript_variant'] == 'NM_003119.3:c.216dup' + assert results['NM_003119.3:c.216dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.216dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.216dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73Ter)', 'slr': 'NP_003110.1:p.(E73*)'} + assert results['NM_003119.3:c.216dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.216dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.216dup']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'pos': '89576927', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': 'chr16', 'pos': '89510519', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'pos': '89576927', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_003119.3:c.216dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': '16', 'pos': '89510519', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_003119.3:c.216dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} assert 'NM_001363850.1:c.216dup' in list(results.keys()) - assert results['NM_001363850.1:c.216dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.216dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.216dup']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.216dup']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73Ter)', 'slr': 'NP_001350779.1:p.(E73*)'} assert results['NM_001363850.1:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' - assert results['NM_001363850.1:c.216dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.216dup']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.216dup']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.216dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.216dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.216dup' + assert results['NM_001363850.1:c.216dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.216dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.216dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert results['NM_001363850.1:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73Ter)', 'slr': 'NP_001350779.1:p.(E73*)'} + assert results['NM_001363850.1:c.216dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.216dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.216dup']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'pos': '89576927', 'ref': 'C', 'alt': 'CT'}} assert 'hg38' not in list(results['NM_001363850.1:c.216dup']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert results['NM_001363850.1:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'pos': '89576927', 'ref': 'C', 'alt': 'CT'}} assert 'grch38' not in list(results['NM_001363850.1:c.216dup']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - - assert results['flag'] == 'gene_variant' - assert 'NM_001363850.1:c.216_217insA' in list(results.keys()) - assert results['NM_001363850.1:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.216_217insA']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.216_217insA']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_001350779.1:p.(E73Rfs*30)'} - assert results['NM_001363850.1:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' - assert results['NM_001363850.1:c.216_217insA']['genome_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.216_217insA']['hgvs_lrg_variant'] == '' - assert results['NM_001363850.1:c.216_217insA']['hgvs_transcript_variant'] == 'NM_001363850.1:c.216_217insA' - assert results['NM_001363850.1:c.216_217insA']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert 'hg38' not in list(results['NM_001363850.1:c.216_217insA']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert 'grch38' not in list(results['NM_001363850.1:c.216_217insA']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - - assert 'NM_199367.1:c.216_217insA' in list(results.keys()) - assert results['NM_199367.1:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.1:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.1:c.216_217insA']['alt_genomic_loci'], []) - assert results['NM_199367.1:c.216_217insA']['gene_symbol'] == 'SPG7' - assert results['NM_199367.1:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_955399.1:p.(E73Rfs*30)'} - assert results['NM_199367.1:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' - assert results['NM_199367.1:c.216_217insA']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.216_217insA']['hgvs_lrg_variant'] == '' - assert results['NM_199367.1:c.216_217insA']['hgvs_transcript_variant'] == 'NM_199367.1:c.216_217insA' - assert results['NM_199367.1:c.216_217insA']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.1:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert 'hg38' not in list(results['NM_199367.1:c.216_217insA']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert 'grch38' not in list(results['NM_199367.1:c.216_217insA']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + assert results['NM_001363850.1:c.216dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} assert 'NM_199367.1:c.216dup' in list(results.keys()) - assert results['NM_199367.1:c.216dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.1:c.216dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.1:c.216dup']['alt_genomic_loci'], []) - assert results['NM_199367.1:c.216dup']['gene_symbol'] == 'SPG7' - assert results['NM_199367.1:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73Ter)', 'slr': 'NP_955399.1:p.(E73*)'} assert results['NM_199367.1:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' - assert results['NM_199367.1:c.216dup']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.216dup']['hgvs_lrg_variant'] == '' + assert results['NM_199367.1:c.216dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.216dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_199367.1:c.216dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.216dup' + assert results['NM_199367.1:c.216dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.216dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_199367.1:c.216dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.1:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert results['NM_199367.1:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73Ter)', 'slr': 'NP_955399.1:p.(E73*)'} + assert results['NM_199367.1:c.216dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.216dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.1:c.216dup']['alt_genomic_loci'], []) + assert results['NM_199367.1:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'pos': '89576927', 'ref': 'C', 'alt': 'CT'}} assert 'hg38' not in list(results['NM_199367.1:c.216dup']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert results['NM_199367.1:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'pos': '89576927', 'ref': 'C', 'alt': 'CT'}} assert 'grch38' not in list(results['NM_199367.1:c.216dup']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.216dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - - assert 'NM_003119.3:c.216_217insA' in list(results.keys()) - assert results['NM_003119.3:c.216_217insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.216_217insA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.216_217insA']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.216_217insA']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.216_217insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ArgfsTer30)', 'slr': 'NP_003110.1:p.(E73Rfs*30)'} - assert results['NM_003119.3:c.216_217insA']['submitted_variant'] == '16-89576930-T-TA,TT' - assert results['NM_003119.3:c.216_217insA']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.216_217insA']['hgvs_lrg_variant'] == '' - assert results['NM_003119.3:c.216_217insA']['hgvs_transcript_variant'] == 'NM_003119.3:c.216_217insA' - assert results['NM_003119.3:c.216_217insA']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89510522', 'alt': 'TA'}} - assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576930', 'alt': 'TA'}} - assert results['NM_003119.3:c.216_217insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523insA', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510522', 'alt': 'TA'}} - assert results['NM_003119.3:c.216_217insA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + assert results['NM_199367.1:c.216dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} assert 'NM_003119.2:c.216dup' in list(results.keys()) - assert results['NM_003119.2:c.216dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.216dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.2:c.216dup']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.216dup']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73Ter)', 'slr': 'NP_003110.1:p.(E73*)'} assert results['NM_003119.2:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' - assert results['NM_003119.2:c.216dup']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.216dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.216dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.216dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.2:c.216dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.216dup' + assert results['NM_003119.2:c.216dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.216dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.216dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7126dup' - assert results['NM_003119.2:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert results['NM_003119.2:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73Ter)', 'slr': 'NP_003110.1:p.(E73*)'} + assert results['NM_003119.2:c.216dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.216dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.216dup']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'pos': '89576927', 'ref': 'C', 'alt': 'CT'}} assert 'hg38' not in list(results['NM_003119.2:c.216dup']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89576927', 'alt': 'CT'}} + assert results['NM_003119.2:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'pos': '89576927', 'ref': 'C', 'alt': 'CT'}} assert 'grch38' not in list(results['NM_003119.2:c.216dup']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.216dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + assert results['NM_003119.2:c.216dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} + assert 'NM_199367.2:c.216dup' in list(results.keys()) + assert results['NM_199367.2:c.216dup']['submitted_variant'] == '16-89576930-T-TA,TT' + assert results['NM_199367.2:c.216dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.216dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.2:c.216dup']['hgvs_transcript_variant'] == 'NM_199367.2:c.216dup' + assert results['NM_199367.2:c.216dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.216dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.216dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.216dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73Ter)', 'slr': 'NP_955399.1:p.(E73*)'} + assert results['NM_199367.2:c.216dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.216dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.216dup']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': 'chr16', 'pos': '89576927', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': 'chr16', 'pos': '89510519', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930dup', 'vcf': {'chr': '16', 'pos': '89576927', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_199367.2:c.216dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522dup', 'vcf': {'chr': '16', 'pos': '89510519', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_199367.2:c.216dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} def test_variant246(self): variant = '16-89576931-G-GTG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_199367.1:c.216_217dup' in list(results.keys()) - assert results['NM_199367.1:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.1:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.1:c.216_217dup']['alt_genomic_loci'], []) - assert results['NM_199367.1:c.216_217dup']['gene_symbol'] == 'SPG7' - assert results['NM_199367.1:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ValfsTer9)', 'slr': 'NP_955399.1:p.(E73Vfs*9)'} - assert results['NM_199367.1:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' - assert results['NM_199367.1:c.216_217dup']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.216_217dup']['hgvs_lrg_variant'] == '' - assert results['NM_199367.1:c.216_217dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.216_217dup' - assert results['NM_199367.1:c.216_217dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.1:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert 'hg38' not in list(results['NM_199367.1:c.216_217dup']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert 'grch38' not in list(results['NM_199367.1:c.216_217dup']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} - + assert results['flag'] == 'gene_variant' assert 'NM_003119.3:c.216_217dup' in list(results.keys()) - assert results['NM_003119.3:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.216_217dup']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.216_217dup']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ValfsTer9)', 'slr': 'NP_003110.1:p.(E73Vfs*9)'} assert results['NM_003119.3:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' - assert results['NM_003119.3:c.216_217dup']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.216_217dup']['hgvs_lrg_variant'] == '' + assert results['NM_003119.3:c.216_217dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.216_217dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.3:c.216_217dup']['hgvs_transcript_variant'] == 'NM_003119.3:c.216_217dup' + assert results['NM_003119.3:c.216_217dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.3:c.216_217dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89510521', 'alt': 'TTG'}} - assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510521', 'alt': 'TTG'}} - assert results['NM_003119.3:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - - assert 'NM_199367.2:c.216_217dup' in list(results.keys()) - assert results['NM_199367.2:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.216_217dup']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.216_217dup']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ValfsTer9)', 'slr': 'NP_955399.1:p.(E73Vfs*9)'} - assert results['NM_199367.2:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' - assert results['NM_199367.2:c.216_217dup']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.216_217dup']['hgvs_lrg_variant'] == '' - assert results['NM_199367.2:c.216_217dup']['hgvs_transcript_variant'] == 'NM_199367.2:c.216_217dup' - assert results['NM_199367.2:c.216_217dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89510521', 'alt': 'TTG'}} - assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89510521', 'alt': 'TTG'}} - assert results['NM_199367.2:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} - - assert 'NM_003119.2:c.216_217dup' in list(results.keys()) - assert results['NM_003119.2:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.2:c.216_217dup']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.216_217dup']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ValfsTer9)', 'slr': 'NP_003110.1:p.(E73Vfs*9)'} - assert results['NM_003119.2:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' - assert results['NM_003119.2:c.216_217dup']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.216_217dup']['hgvs_lrg_variant'] == '' - assert results['NM_003119.2:c.216_217dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.216_217dup' - assert results['NM_003119.2:c.216_217dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7126_7127dup' - assert results['NM_003119.2:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert 'hg38' not in list(results['NM_003119.2:c.216_217dup']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} - assert 'grch38' not in list(results['NM_003119.2:c.216_217dup']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.216_217dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} + assert results['NM_003119.3:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ValfsTer9)', 'slr': 'NP_003110.1:p.(E73Vfs*9)'} + assert results['NM_003119.3:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.216_217dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.216_217dup']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'pos': '89576929', 'ref': 'T', 'alt': 'TTG'}} + assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': 'chr16', 'pos': '89510521', 'ref': 'T', 'alt': 'TTG'}} + assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'pos': '89576929', 'ref': 'T', 'alt': 'TTG'}} + assert results['NM_003119.3:c.216_217dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': '16', 'pos': '89510521', 'ref': 'T', 'alt': 'TTG'}} + assert results['NM_003119.3:c.216_217dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001363850.1:c.216_217dup' in list(results.keys()) - assert results['NM_001363850.1:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.216_217dup']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.216_217dup']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73ValfsTer9)', 'slr': 'NP_001350779.1:p.(E73Vfs*9)'} assert results['NM_001363850.1:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' - assert results['NM_001363850.1:c.216_217dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.216_217dup']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.216_217dup']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.216_217dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.216_217dup']['hgvs_transcript_variant'] == 'NM_001363850.1:c.216_217dup' + assert results['NM_001363850.1:c.216_217dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.216_217dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert results['NM_001363850.1:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Glu73ValfsTer9)', 'slr': 'NP_001350779.1:p.(E73Vfs*9)'} + assert results['NM_001363850.1:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.216_217dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.216_217dup']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'pos': '89576929', 'ref': 'T', 'alt': 'TTG'}} assert 'hg38' not in list(results['NM_001363850.1:c.216_217dup']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'ref': 'T', 'pos': '89576929', 'alt': 'TTG'}} + assert results['NM_001363850.1:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'pos': '89576929', 'ref': 'T', 'alt': 'TTG'}} assert 'grch38' not in list(results['NM_001363850.1:c.216_217dup']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.216_217dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + assert results['NM_001363850.1:c.216_217dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} + + assert 'NM_199367.1:c.216_217dup' in list(results.keys()) + assert results['NM_199367.1:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' + assert results['NM_199367.1:c.216_217dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.216_217dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.1:c.216_217dup']['hgvs_transcript_variant'] == 'NM_199367.1:c.216_217dup' + assert results['NM_199367.1:c.216_217dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.216_217dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ValfsTer9)', 'slr': 'NP_955399.1:p.(E73Vfs*9)'} + assert results['NM_199367.1:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.216_217dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.1:c.216_217dup']['alt_genomic_loci'], []) + assert results['NM_199367.1:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'pos': '89576929', 'ref': 'T', 'alt': 'TTG'}} + assert 'hg38' not in list(results['NM_199367.1:c.216_217dup']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'pos': '89576929', 'ref': 'T', 'alt': 'TTG'}} + assert 'grch38' not in list(results['NM_199367.1:c.216_217dup']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.216_217dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} + + assert 'NM_003119.2:c.216_217dup' in list(results.keys()) + assert results['NM_003119.2:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' + assert results['NM_003119.2:c.216_217dup']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.216_217dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.2:c.216_217dup']['hgvs_transcript_variant'] == 'NM_003119.2:c.216_217dup' + assert results['NM_003119.2:c.216_217dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.216_217dup']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.7126_7127dup' + assert results['NM_003119.2:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Glu73ValfsTer9)', 'slr': 'NP_003110.1:p.(E73Vfs*9)'} + assert results['NM_003119.2:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.216_217dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.216_217dup']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'pos': '89576929', 'ref': 'T', 'alt': 'TTG'}} + assert 'hg38' not in list(results['NM_003119.2:c.216_217dup']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'pos': '89576929', 'ref': 'T', 'alt': 'TTG'}} + assert 'grch38' not in list(results['NM_003119.2:c.216_217dup']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.216_217dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} + assert 'NM_199367.2:c.216_217dup' in list(results.keys()) + assert results['NM_199367.2:c.216_217dup']['submitted_variant'] == '16-89576931-G-GTG' + assert results['NM_199367.2:c.216_217dup']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.216_217dup']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.2:c.216_217dup']['hgvs_transcript_variant'] == 'NM_199367.2:c.216_217dup' + assert results['NM_199367.2:c.216_217dup']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.216_217dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.216_217dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.216_217dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Glu73ValfsTer9)', 'slr': 'NP_955399.1:p.(E73Vfs*9)'} + assert results['NM_199367.2:c.216_217dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.216_217dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.216_217dup']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': 'chr16', 'pos': '89576929', 'ref': 'T', 'alt': 'TTG'}} + assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': 'chr16', 'pos': '89510521', 'ref': 'T', 'alt': 'TTG'}} + assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89576930_89576931dup', 'vcf': {'chr': '16', 'pos': '89576929', 'ref': 'T', 'alt': 'TTG'}} + assert results['NM_199367.2:c.216_217dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89510522_89510523dup', 'vcf': {'chr': '16', 'pos': '89510521', 'ref': 'T', 'alt': 'TTG'}} + assert results['NM_199367.2:c.216_217dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} def test_variant247(self): variant = '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_199367.1:c.1046_1071del' in list(results.keys()) - assert results['NM_199367.1:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.1:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.1:c.1046_1071del']['alt_genomic_loci'], []) - assert results['NM_199367.1:c.1046_1071del']['gene_symbol'] == 'SPG7' - assert results['NM_199367.1:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Gly349AlafsTer38)', 'slr': 'NP_955399.1:p.(G349Afs*38)'} - assert results['NM_199367.1:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' - assert results['NM_199367.1:c.1046_1071del']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.1:c.1046_1071del']['hgvs_lrg_variant'] == '' - assert results['NM_199367.1:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_199367.1:c.1046_1071del' - assert results['NM_199367.1:c.1046_1071del']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.1:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert 'hg38' not in list(results['NM_199367.1:c.1046_1071del']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert 'grch38' not in list(results['NM_199367.1:c.1046_1071del']['primary_assembly_loci'].keys()) - assert results['NM_199367.1:c.1046_1071del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_003119.3:c.1046_1071del' in list(results.keys()) + assert results['NM_003119.3:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' + assert results['NM_003119.3:c.1046_1071del']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.1046_1071del']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.3:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_003119.3:c.1046_1071del' + assert results['NM_003119.3:c.1046_1071del']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1046_1071del']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Gly349AlafsTer38)', 'slr': 'NP_003110.1:p.(G349Afs*38)'} + assert results['NM_003119.3:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.1046_1071del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.1046_1071del']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'pos': '89598368', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} + assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': 'chr16', 'pos': '89531960', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} + assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'pos': '89598368', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} + assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': '16', 'pos': '89531960', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} + assert results['NM_003119.3:c.1046_1071del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} assert 'NM_001363850.1:c.1046_1071del' in list(results.keys()) - assert results['NM_001363850.1:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.1046_1071del']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.1046_1071del']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Gly349AlafsTer38)', 'slr': 'NP_001350779.1:p.(G349Afs*38)'} assert results['NM_001363850.1:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' - assert results['NM_001363850.1:c.1046_1071del']['genome_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.1046_1071del']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.1046_1071del']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.1046_1071del']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1046_1071del' + assert results['NM_001363850.1:c.1046_1071del']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.1046_1071del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert results['NM_001363850.1:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Gly349AlafsTer38)', 'slr': 'NP_001350779.1:p.(G349Afs*38)'} + assert results['NM_001363850.1:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.1046_1071del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.1046_1071del']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'pos': '89598368', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} assert 'hg38' not in list(results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'pos': '89598368', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} assert 'grch38' not in list(results['NM_001363850.1:c.1046_1071del']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.1046_1071del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + assert results['NM_001363850.1:c.1046_1071del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} - assert 'NM_199367.2:c.1046_1071del' in list(results.keys()) - assert results['NM_199367.2:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_199367.2:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_199367.2:c.1046_1071del']['alt_genomic_loci'], []) - assert results['NM_199367.2:c.1046_1071del']['gene_symbol'] == 'SPG7' - assert results['NM_199367.2:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Gly349AlafsTer38)', 'slr': 'NP_955399.1:p.(G349Afs*38)'} - assert results['NM_199367.2:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' - assert results['NM_199367.2:c.1046_1071del']['genome_context_intronic_sequence'] == '' - assert results['NM_199367.2:c.1046_1071del']['hgvs_lrg_variant'] == '' - assert results['NM_199367.2:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_199367.2:c.1046_1071del' - assert results['NM_199367.2:c.1046_1071del']['hgvs_refseqgene_variant'] == '' - assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89531960', 'alt': 'C'}} - assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89531960', 'alt': 'C'}} - assert results['NM_199367.2:c.1046_1071del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2'} + assert 'NM_199367.1:c.1046_1071del' in list(results.keys()) + assert results['NM_199367.1:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' + assert results['NM_199367.1:c.1046_1071del']['gene_symbol'] == 'SPG7' + assert results['NM_199367.1:c.1046_1071del']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.1:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_199367.1:c.1046_1071del' + assert results['NM_199367.1:c.1046_1071del']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.1:c.1046_1071del']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.1:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Gly349AlafsTer38)', 'slr': 'NP_955399.1:p.(G349Afs*38)'} + assert results['NM_199367.1:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.1:c.1046_1071del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.1:c.1046_1071del']['alt_genomic_loci'], []) + assert results['NM_199367.1:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'pos': '89598368', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} + assert 'hg38' not in list(results['NM_199367.1:c.1046_1071del']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'pos': '89598368', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} + assert 'grch38' not in list(results['NM_199367.1:c.1046_1071del']['primary_assembly_loci'].keys()) + assert results['NM_199367.1:c.1046_1071del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} - assert results['flag'] == 'gene_variant' assert 'NM_003119.2:c.1046_1071del' in list(results.keys()) - assert results['NM_003119.2:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.2:c.1046_1071del']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.1046_1071del']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Gly349AlafsTer38)', 'slr': 'NP_003110.1:p.(G349Afs*38)'} assert results['NM_003119.2:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' - assert results['NM_003119.2:c.1046_1071del']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.1046_1071del']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.1046_1071del']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.1046_1071del']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.2:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_003119.2:c.1046_1071del' + assert results['NM_003119.2:c.1046_1071del']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.1046_1071del']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.28566_28591del' - assert results['NM_003119.2:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert results['NM_003119.2:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Gly349AlafsTer38)', 'slr': 'NP_003110.1:p.(G349Afs*38)'} + assert results['NM_003119.2:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.1046_1071del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.1046_1071del']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'pos': '89598368', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} assert 'hg38' not in list(results['NM_003119.2:c.1046_1071del']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} + assert results['NM_003119.2:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'pos': '89598368', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} assert 'grch38' not in list(results['NM_003119.2:c.1046_1071del']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.1046_1071del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - - assert 'NM_003119.3:c.1046_1071del' in list(results.keys()) - assert results['NM_003119.3:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.1046_1071del']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.1046_1071del']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Gly349AlafsTer38)', 'slr': 'NP_003110.1:p.(G349Afs*38)'} - assert results['NM_003119.3:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' - assert results['NM_003119.3:c.1046_1071del']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.1046_1071del']['hgvs_lrg_variant'] == '' - assert results['NM_003119.3:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_003119.3:c.1046_1071del' - assert results['NM_003119.3:c.1046_1071del']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': 'chr16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89531960', 'alt': 'C'}} - assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89598368', 'alt': 'C'}} - assert results['NM_003119.3:c.1046_1071del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': '16', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'pos': '89531960', 'alt': 'C'}} - assert results['NM_003119.3:c.1046_1071del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + assert results['NM_003119.2:c.1046_1071del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} + assert 'NM_199367.2:c.1046_1071del' in list(results.keys()) + assert results['NM_199367.2:c.1046_1071del']['submitted_variant'] == '16-89598368-CGGCCCCCCCGGCTGTGGGAAGACGCT-C' + assert results['NM_199367.2:c.1046_1071del']['gene_symbol'] == 'SPG7' + assert results['NM_199367.2:c.1046_1071del']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_199367.2:c.1046_1071del']['hgvs_transcript_variant'] == 'NM_199367.2:c.1046_1071del' + assert results['NM_199367.2:c.1046_1071del']['genome_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.1046_1071del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_199367.2:c.1046_1071del']['hgvs_refseqgene_variant'] == '' + assert results['NM_199367.2:c.1046_1071del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_955399.1:p.(Gly349AlafsTer38)', 'slr': 'NP_955399.1:p.(G349Afs*38)'} + assert results['NM_199367.2:c.1046_1071del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_199367.2:c.1046_1071del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_199367.2:c.1046_1071del']['alt_genomic_loci'], []) + assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': 'chr16', 'pos': '89598368', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} + assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': 'chr16', 'pos': '89531960', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} + assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89598370_89598395del', 'vcf': {'chr': '16', 'pos': '89598368', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} + assert results['NM_199367.2:c.1046_1071del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89531962_89531987del', 'vcf': {'chr': '16', 'pos': '89531960', 'ref': 'CGGCCCCCCCGGCTGTGGGAAGACGCT', 'alt': 'C'}} + assert results['NM_199367.2:c.1046_1071del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_199367.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_955399.1'} def test_variant248(self): variant = '16-89613064-AGGAGAGGCG-AT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_003119.3:c.1450-1_1457delinsT' in list(results.keys()) + assert results['NM_003119.3:c.1450-1_1457delinsT']['submitted_variant'] == '16-89613064-AGGAGAGGCG-AT' + assert results['NM_003119.3:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.1450-1_1457delinsT']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_transcript_variant'] == 'NM_003119.3:c.1450-1_1457delinsT' + assert results['NM_003119.3:c.1450-1_1457delinsT']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.1450-1_1457delinsT' + assert results['NM_003119.3:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.1450-1_1457delinsT']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': 'chr16', 'pos': '89613065', 'ref': 'GGAGAGGCG', 'alt': 'T'}} + assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546657_89546665delinsT', 'vcf': {'chr': 'chr16', 'pos': '89546657', 'ref': 'GGAGAGGCG', 'alt': 'T'}} + assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': '16', 'pos': '89613065', 'ref': 'GGAGAGGCG', 'alt': 'T'}} + assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546657_89546665delinsT', 'vcf': {'chr': '16', 'pos': '89546657', 'ref': 'GGAGAGGCG', 'alt': 'T'}} + assert results['NM_003119.3:c.1450-1_1457delinsT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} + assert 'NM_001363850.1:c.1450-1_1457delinsT' in list(results.keys()) - assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.1450-1_1457delinsT']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} assert results['NM_001363850.1:c.1450-1_1457delinsT']['submitted_variant'] == '16-89613064-AGGAGAGGCG-AT' - assert results['NM_001363850.1:c.1450-1_1457delinsT']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.1450-1_1457delinsT' - assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1450-1_1457delinsT' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_001363850.1):c.1450-1_1457delinsT' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} + assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.?', 'slr': 'NP_001350779.1:p.?'} + assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.1450-1_1457delinsT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.1450-1_1457delinsT']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': 'chr16', 'pos': '89613065', 'ref': 'GGAGAGGCG', 'alt': 'T'}} assert 'hg38' not in list(results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': '16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} + assert results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': '16', 'pos': '89613065', 'ref': 'GGAGAGGCG', 'alt': 'T'}} assert 'grch38' not in list(results['NM_001363850.1:c.1450-1_1457delinsT']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.1450-1_1457delinsT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} + assert results['NM_001363850.1:c.1450-1_1457delinsT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} - assert results['flag'] == 'gene_variant' assert 'NM_003119.2:c.1450-1_1457delinsT' in list(results.keys()) - assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.1450-1_1457delinsT' - self.assertCountEqual(results['NM_003119.2:c.1450-1_1457delinsT']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} assert results['NM_003119.2:c.1450-1_1457delinsT']['submitted_variant'] == '16-89613064-AGGAGAGGCG-AT' - assert results['NM_003119.2:c.1450-1_1457delinsT']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.1450-1_1457delinsT' - assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.1450-1_1457delinsT']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_transcript_variant'] == 'NM_003119.2:c.1450-1_1457delinsT' + assert results['NM_003119.2:c.1450-1_1457delinsT']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.2):c.1450-1_1457delinsT' + assert results['NM_003119.2:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == 'NG_008082.1(NM_003119.2):c.1450-1_1457delinsT' assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.43261_43269delinsT' - assert results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} + assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} + assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.1450-1_1457delinsT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.1450-1_1457delinsT']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': 'chr16', 'pos': '89613065', 'ref': 'GGAGAGGCG', 'alt': 'T'}} assert 'hg38' not in list(results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': '16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} + assert results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': '16', 'pos': '89613065', 'ref': 'GGAGAGGCG', 'alt': 'T'}} assert 'grch38' not in list(results['NM_003119.2:c.1450-1_1457delinsT']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.1450-1_1457delinsT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - - assert 'NM_003119.3:c.1450-1_1457delinsT' in list(results.keys()) - assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.1450-1_1457delinsT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.1450-1_1457delinsT']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.1450-1_1457delinsT']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.?', 'slr': 'NP_003110.1:p.?'} - assert results['NM_003119.3:c.1450-1_1457delinsT']['submitted_variant'] == '16-89613064-AGGAGAGGCG-AT' - assert results['NM_003119.3:c.1450-1_1457delinsT']['genome_context_intronic_sequence'] == 'NC_000016.9(NM_003119.3):c.1450-1_1457delinsT' - assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_lrg_variant'] == '' - assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_transcript_variant'] == 'NM_003119.3:c.1450-1_1457delinsT' - assert results['NM_003119.3:c.1450-1_1457delinsT']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} - assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546657_89546665delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGAGAGGCG', 'pos': '89546657', 'alt': 'T'}} - assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613065_89613073delinsT', 'vcf': {'chr': '16', 'ref': 'GGAGAGGCG', 'pos': '89613065', 'alt': 'T'}} - assert results['NM_003119.3:c.1450-1_1457delinsT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546657_89546665delinsT', 'vcf': {'chr': '16', 'ref': 'GGAGAGGCG', 'pos': '89546657', 'alt': 'T'}} - assert results['NM_003119.3:c.1450-1_1457delinsT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - + assert results['NM_003119.2:c.1450-1_1457delinsT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} def test_variant249(self): variant = '16-89613069-AGGCGGGAGA-AT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_003119.2:c.1454_1462delinsT' in list(results.keys()) - assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.2:c.1454_1462delinsT']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Arg485IlefsTer3)', 'slr': 'NP_003110.1:p.(R485Ifs*3)'} - assert results['NM_003119.2:c.1454_1462delinsT']['submitted_variant'] == '16-89613069-AGGCGGGAGA-AT' - assert results['NM_003119.2:c.1454_1462delinsT']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_lrg_variant'] == '' - assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_transcript_variant'] == 'NM_003119.2:c.1454_1462delinsT' - assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.43266_43274delinsT' - assert results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} - assert 'hg38' not in list(results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': '16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} - assert 'grch38' not in list(results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.1454_1462delinsT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - assert results['flag'] == 'gene_variant' + assert 'NM_003119.3:c.1454_1462delinsT' in list(results.keys()) + assert results['NM_003119.3:c.1454_1462delinsT']['submitted_variant'] == '16-89613069-AGGCGGGAGA-AT' + assert results['NM_003119.3:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.1454_1462delinsT']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_transcript_variant'] == 'NM_003119.3:c.1454_1462delinsT' + assert results['NM_003119.3:c.1454_1462delinsT']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Arg485IlefsTer3)', 'slr': 'NP_003110.1:p.(R485Ifs*3)'} + assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.1454_1462delinsT']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': 'chr16', 'pos': '89613070', 'ref': 'GGCGGGAGA', 'alt': 'T'}} + assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546662_89546670delinsT', 'vcf': {'chr': 'chr16', 'pos': '89546662', 'ref': 'GGCGGGAGA', 'alt': 'T'}} + assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': '16', 'pos': '89613070', 'ref': 'GGCGGGAGA', 'alt': 'T'}} + assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546662_89546670delinsT', 'vcf': {'chr': '16', 'pos': '89546662', 'ref': 'GGCGGGAGA', 'alt': 'T'}} + assert results['NM_003119.3:c.1454_1462delinsT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} + assert 'NM_001363850.1:c.1454_1462delinsT' in list(results.keys()) - assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.1454_1462delinsT']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Arg485IlefsTer3)', 'slr': 'NP_001350779.1:p.(R485Ifs*3)'} assert results['NM_001363850.1:c.1454_1462delinsT']['submitted_variant'] == '16-89613069-AGGCGGGAGA-AT' - assert results['NM_001363850.1:c.1454_1462delinsT']['genome_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.1454_1462delinsT']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1454_1462delinsT' + assert results['NM_001363850.1:c.1454_1462delinsT']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} + assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Arg485IlefsTer3)', 'slr': 'NP_001350779.1:p.(R485Ifs*3)'} + assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.1454_1462delinsT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.1454_1462delinsT']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': 'chr16', 'pos': '89613070', 'ref': 'GGCGGGAGA', 'alt': 'T'}} assert 'hg38' not in list(results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': '16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} + assert results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': '16', 'pos': '89613070', 'ref': 'GGCGGGAGA', 'alt': 'T'}} assert 'grch38' not in list(results['NM_001363850.1:c.1454_1462delinsT']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.1454_1462delinsT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - - assert 'NM_003119.3:c.1454_1462delinsT' in list(results.keys()) - assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.1454_1462delinsT']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Arg485IlefsTer3)', 'slr': 'NP_003110.1:p.(R485Ifs*3)'} - assert results['NM_003119.3:c.1454_1462delinsT']['submitted_variant'] == '16-89613069-AGGCGGGAGA-AT' - assert results['NM_003119.3:c.1454_1462delinsT']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_lrg_variant'] == '' - assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_transcript_variant'] == 'NM_003119.3:c.1454_1462delinsT' - assert results['NM_003119.3:c.1454_1462delinsT']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} - assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546662_89546670delinsT', 'vcf': {'chr': 'chr16', 'ref': 'GGCGGGAGA', 'pos': '89546662', 'alt': 'T'}} - assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': '16', 'ref': 'GGCGGGAGA', 'pos': '89613070', 'alt': 'T'}} - assert results['NM_003119.3:c.1454_1462delinsT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546662_89546670delinsT', 'vcf': {'chr': '16', 'ref': 'GGCGGGAGA', 'pos': '89546662', 'alt': 'T'}} - assert results['NM_003119.3:c.1454_1462delinsT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} - + assert results['NM_001363850.1:c.1454_1462delinsT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} - def test_variant250(self): - variant = '16-89613145-C-T' + assert 'NM_003119.2:c.1454_1462delinsT' in list(results.keys()) + assert results['NM_003119.2:c.1454_1462delinsT']['submitted_variant'] == '16-89613069-AGGCGGGAGA-AT' + assert results['NM_003119.2:c.1454_1462delinsT']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.1454_1462delinsT']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_transcript_variant'] == 'NM_003119.2:c.1454_1462delinsT' + assert results['NM_003119.2:c.1454_1462delinsT']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1454_1462delinsT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.43266_43274delinsT' + assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Arg485IlefsTer3)', 'slr': 'NP_003110.1:p.(R485Ifs*3)'} + assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.1454_1462delinsT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.1454_1462delinsT']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': 'chr16', 'pos': '89613070', 'ref': 'GGCGGGAGA', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613070_89613078delinsT', 'vcf': {'chr': '16', 'pos': '89613070', 'ref': 'GGCGGGAGA', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_003119.2:c.1454_1462delinsT']['primary_assembly_loci'].keys()) + assert results['NM_003119.2:c.1454_1462delinsT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} + + def test_variant250(self): + variant = '16-89613145-C-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_003119.3:c.1529C>T' in list(results.keys()) + assert results['NM_003119.3:c.1529C>T']['submitted_variant'] == '16-89613145-C-T' + assert results['NM_003119.3:c.1529C>T']['gene_symbol'] == 'SPG7' + assert results['NM_003119.3:c.1529C>T']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} + assert results['NM_003119.3:c.1529C>T']['hgvs_transcript_variant'] == 'NM_003119.3:c.1529C>T' + assert results['NM_003119.3:c.1529C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_003119.3:c.1529C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_003119.3:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ala510Val)', 'slr': 'NP_003110.1:p.(A510V)'} + assert results['NM_003119.3:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.3:c.1529C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.3:c.1529C>T']['alt_genomic_loci'], []) + assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': 'chr16', 'pos': '89613145', 'ref': 'C', 'alt': 'T'}} + assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546737C>T', 'vcf': {'chr': 'chr16', 'pos': '89546737', 'ref': 'C', 'alt': 'T'}} + assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': '16', 'pos': '89613145', 'ref': 'C', 'alt': 'T'}} + assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546737C>T', 'vcf': {'chr': '16', 'pos': '89546737', 'ref': 'C', 'alt': 'T'}} + assert results['NM_003119.3:c.1529C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1'} + assert 'NM_001363850.1:c.1529C>T' in list(results.keys()) - assert results['NM_001363850.1:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363850.1:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363850.1:c.1529C>T']['alt_genomic_loci'], []) - assert results['NM_001363850.1:c.1529C>T']['gene_symbol'] == 'SPG7' - assert results['NM_001363850.1:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Ala510Val)', 'slr': 'NP_001350779.1:p.(A510V)'} assert results['NM_001363850.1:c.1529C>T']['submitted_variant'] == '16-89613145-C-T' - assert results['NM_001363850.1:c.1529C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001363850.1:c.1529C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001363850.1:c.1529C>T']['gene_symbol'] == 'SPG7' + assert results['NM_001363850.1:c.1529C>T']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_001363850.1:c.1529C>T']['hgvs_transcript_variant'] == 'NM_001363850.1:c.1529C>T' + assert results['NM_001363850.1:c.1529C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001363850.1:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001363850.1:c.1529C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363850.1:c.1529C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} + assert results['NM_001363850.1:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350779.1:p.(Ala510Val)', 'slr': 'NP_001350779.1:p.(A510V)'} + assert results['NM_001363850.1:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363850.1:c.1529C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363850.1:c.1529C>T']['alt_genomic_loci'], []) + assert results['NM_001363850.1:c.1529C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': 'chr16', 'pos': '89613145', 'ref': 'C', 'alt': 'T'}} assert 'hg38' not in list(results['NM_001363850.1:c.1529C>T']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.1529C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} + assert results['NM_001363850.1:c.1529C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': '16', 'pos': '89613145', 'ref': 'C', 'alt': 'T'}} assert 'grch38' not in list(results['NM_001363850.1:c.1529C>T']['primary_assembly_loci'].keys()) - assert results['NM_001363850.1:c.1529C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1'} - - assert 'NM_003119.3:c.1529C>T' in list(results.keys()) - assert results['NM_003119.3:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.3:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.3:c.1529C>T']['alt_genomic_loci'], []) - assert results['NM_003119.3:c.1529C>T']['gene_symbol'] == 'SPG7' - assert results['NM_003119.3:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ala510Val)', 'slr': 'NP_003110.1:p.(A510V)'} - assert results['NM_003119.3:c.1529C>T']['submitted_variant'] == '16-89613145-C-T' - assert results['NM_003119.3:c.1529C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.3:c.1529C>T']['hgvs_lrg_variant'] == '' - assert results['NM_003119.3:c.1529C>T']['hgvs_transcript_variant'] == 'NM_003119.3:c.1529C>T' - assert results['NM_003119.3:c.1529C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} - assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546737C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89546737', 'alt': 'T'}} - assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} - assert results['NM_003119.3:c.1529C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000016.10:g.89546737C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89546737', 'alt': 'T'}} - assert results['NM_003119.3:c.1529C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.3'} + assert results['NM_001363850.1:c.1529C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350779.1'} - assert results['flag'] == 'gene_variant' assert 'NM_003119.2:c.1529C>T' in list(results.keys()) - assert results['NM_003119.2:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003119.2:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003119.2:c.1529C>T']['alt_genomic_loci'], []) - assert results['NM_003119.2:c.1529C>T']['gene_symbol'] == 'SPG7' - assert results['NM_003119.2:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ala510Val)', 'slr': 'NP_003110.1:p.(A510V)'} assert results['NM_003119.2:c.1529C>T']['submitted_variant'] == '16-89613145-C-T' - assert results['NM_003119.2:c.1529C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_003119.2:c.1529C>T']['hgvs_lrg_variant'] == '' + assert results['NM_003119.2:c.1529C>T']['gene_symbol'] == 'SPG7' + assert results['NM_003119.2:c.1529C>T']['gene_ids'] == {'hgnc_id': 'HGNC:11237', 'entrez_gene_id': '6687', 'ucsc_id': 'uc002fnj.4', 'omim_id': ['602783']} assert results['NM_003119.2:c.1529C>T']['hgvs_transcript_variant'] == 'NM_003119.2:c.1529C>T' + assert results['NM_003119.2:c.1529C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_003119.2:c.1529C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003119.2:c.1529C>T']['hgvs_refseqgene_variant'] == 'NG_008082.1:g.43341C>T' - assert results['NM_003119.2:c.1529C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': 'chr16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} + assert results['NM_003119.2:c.1529C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003110.1:p.(Ala510Val)', 'slr': 'NP_003110.1:p.(A510V)'} + assert results['NM_003119.2:c.1529C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003119.2:c.1529C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003119.2:c.1529C>T']['alt_genomic_loci'], []) + assert results['NM_003119.2:c.1529C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': 'chr16', 'pos': '89613145', 'ref': 'C', 'alt': 'T'}} assert 'hg38' not in list(results['NM_003119.2:c.1529C>T']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.1529C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': '16', 'ref': 'C', 'pos': '89613145', 'alt': 'T'}} + assert results['NM_003119.2:c.1529C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000016.9:g.89613145C>T', 'vcf': {'chr': '16', 'pos': '89613145', 'ref': 'C', 'alt': 'T'}} assert 'grch38' not in list(results['NM_003119.2:c.1529C>T']['primary_assembly_loci'].keys()) - assert results['NM_003119.2:c.1529C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2'} - + assert results['NM_003119.2:c.1529C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003119.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003110.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008082.1'} def test_variant251(self): variant = '17-7578194-GCAC-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001276695.1:c.535_537del' in list(results.keys()) - assert results['NM_001276695.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276695.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276695.1:c.535_537del']['alt_genomic_loci'], []) - assert results['NM_001276695.1:c.535_537del']['gene_symbol'] == 'TP53' - assert results['NM_001276695.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263624.1:p.(Val179del)', 'slr': 'NP_001263624.1:p.(V179del)'} - assert results['NM_001276695.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001276695.1:c.535_537del']['genome_context_intronic_sequence'] == '' - assert results['NM_001276695.1:c.535_537del']['hgvs_lrg_variant'] == '' - assert results['NM_001276695.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276695.1:c.535_537del' - assert results['NM_001276695.1:c.535_537del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276695.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1'} - + assert results['flag'] == 'gene_variant' assert 'NM_001126113.2:c.652_654del' in list(results.keys()) - assert results['NM_001126113.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t4:c.652_654del' - assert results['NM_001126113.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126113.2:c.652_654del']['alt_genomic_loci'], []) - assert results['NM_001126113.2:c.652_654del']['gene_symbol'] == 'TP53' - assert results['NM_001126113.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1(LRG_321p4):p.(Val218del)', 'slr': 'NP_001119585.1:p.(V218del)'} assert results['NM_001126113.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001126113.2:c.652_654del']['genome_context_intronic_sequence'] == '' - assert results['NM_001126113.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126113.2:c.652_654del']['gene_symbol'] == 'TP53' + assert results['NM_001126113.2:c.652_654del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_001126113.2:c.652_654del']['hgvs_transcript_variant'] == 'NM_001126113.2:c.652_654del' + assert results['NM_001126113.2:c.652_654del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126113.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126113.2:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126113.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert results['NM_001126113.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1(LRG_321p4):p.(Val218del)', 'slr': 'NP_001119585.1:p.(V218del)'} + assert results['NM_001126113.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t4:c.652_654del' + assert results['NM_001126113.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + self.assertCountEqual(results['NM_001126113.2:c.652_654del']['alt_genomic_loci'], []) + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126113.2:c.652_654del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126118.1:c.535_537del' in list(results.keys()) - assert results['NM_001126118.1:c.535_537del']['hgvs_lrg_transcript_variant'] == 'LRG_321t8:c.535_537del' - assert results['NM_001126118.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126118.1:c.535_537del']['alt_genomic_loci'], []) - assert results['NM_001126118.1:c.535_537del']['gene_symbol'] == 'TP53' - assert results['NM_001126118.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1(LRG_321p8):p.(Val179del)', 'slr': 'NP_001119590.1:p.(V179del)'} assert results['NM_001126118.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001126118.1:c.535_537del']['genome_context_intronic_sequence'] == '' - assert results['NM_001126118.1:c.535_537del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126118.1:c.535_537del']['gene_symbol'] == 'TP53' + assert results['NM_001126118.1:c.535_537del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_001126118.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001126118.1:c.535_537del' + assert results['NM_001126118.1:c.535_537del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126118.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126118.1:c.535_537del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126118.1:c.535_537del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert results['NM_001126118.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1(LRG_321p8):p.(Val179del)', 'slr': 'NP_001119590.1:p.(V179del)'} + assert results['NM_001126118.1:c.535_537del']['hgvs_lrg_transcript_variant'] == 'LRG_321t8:c.535_537del' + assert results['NM_001126118.1:c.535_537del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + self.assertCountEqual(results['NM_001126118.1:c.535_537del']['alt_genomic_loci'], []) + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126118.1:c.535_537del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001126115.1:c.256_258del' in list(results.keys()) + assert results['NM_001126115.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001126115.1:c.256_258del']['gene_symbol'] == 'TP53' + assert results['NM_001126115.1:c.256_258del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001126115.1:c.256_258del']['hgvs_transcript_variant'] == 'NM_001126115.1:c.256_258del' + assert results['NM_001126115.1:c.256_258del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126115.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126115.1:c.256_258del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' + assert results['NM_001126115.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1(LRG_321p5):p.(Val86del)', 'slr': 'NP_001119587.1:p.(V86del)'} + assert results['NM_001126115.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t5:c.256_258del' + assert results['NM_001126115.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + self.assertCountEqual(results['NM_001126115.1:c.256_258del']['alt_genomic_loci'], []) + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126115.1:c.256_258del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126116.1:c.256_258del' in list(results.keys()) - assert results['NM_001126116.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t6:c.256_258del' - assert results['NM_001126116.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126116.1:c.256_258del']['alt_genomic_loci'], []) - assert results['NM_001126116.1:c.256_258del']['gene_symbol'] == 'TP53' - assert results['NM_001126116.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1(LRG_321p6):p.(Val86del)', 'slr': 'NP_001119588.1:p.(V86del)'} assert results['NM_001126116.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001126116.1:c.256_258del']['genome_context_intronic_sequence'] == '' - assert results['NM_001126116.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126116.1:c.256_258del']['gene_symbol'] == 'TP53' + assert results['NM_001126116.1:c.256_258del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_001126116.1:c.256_258del']['hgvs_transcript_variant'] == 'NM_001126116.1:c.256_258del' + assert results['NM_001126116.1:c.256_258del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126116.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126116.1:c.256_258del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126116.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert results['NM_001126116.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1(LRG_321p6):p.(Val86del)', 'slr': 'NP_001119588.1:p.(V86del)'} + assert results['NM_001126116.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t6:c.256_258del' + assert results['NM_001126116.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + self.assertCountEqual(results['NM_001126116.1:c.256_258del']['alt_genomic_loci'], []) + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126116.1:c.256_258del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001276699.1:c.175_177del' in list(results.keys()) + assert results['NM_001276699.1:c.175_177del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276699.1:c.175_177del']['gene_symbol'] == 'TP53' + assert results['NM_001276699.1:c.175_177del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001276699.1:c.175_177del']['hgvs_transcript_variant'] == 'NM_001276699.1:c.175_177del' + assert results['NM_001276699.1:c.175_177del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276699.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276699.1:c.175_177del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276699.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263628.1:p.(Val59del)', 'slr': 'NP_001263628.1:p.(V59del)'} + assert results['NM_001276699.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276699.1:c.175_177del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276699.1:c.175_177del']['alt_genomic_loci'], []) + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276699.1:c.175_177del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1'} + + assert 'NM_001276696.1:c.535_537del' in list(results.keys()) + assert results['NM_001276696.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276696.1:c.535_537del']['gene_symbol'] == 'TP53' + assert results['NM_001276696.1:c.535_537del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001276696.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276696.1:c.535_537del' + assert results['NM_001276696.1:c.535_537del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276696.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276696.1:c.535_537del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276696.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263625.1:p.(Val179del)', 'slr': 'NP_001263625.1:p.(V179del)'} + assert results['NM_001276696.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276696.1:c.535_537del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276696.1:c.535_537del']['alt_genomic_loci'], []) + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276696.1:c.535_537del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1'} + + assert 'NM_001126114.2:c.652_654del' in list(results.keys()) + assert results['NM_001126114.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001126114.2:c.652_654del']['gene_symbol'] == 'TP53' + assert results['NM_001126114.2:c.652_654del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001126114.2:c.652_654del']['hgvs_transcript_variant'] == 'NM_001126114.2:c.652_654del' + assert results['NM_001126114.2:c.652_654del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126114.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126114.2:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' + assert results['NM_001126114.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1(LRG_321p3):p.(Val218del)', 'slr': 'NP_001119586.1:p.(V218del)'} + assert results['NM_001126114.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t3:c.652_654del' + assert results['NM_001126114.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + self.assertCountEqual(results['NM_001126114.2:c.652_654del']['alt_genomic_loci'], []) + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126114.2:c.652_654del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126117.1:c.256_258del' in list(results.keys()) - assert results['NM_001126117.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t7:c.256_258del' - assert results['NM_001126117.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126117.1:c.256_258del']['alt_genomic_loci'], []) - assert results['NM_001126117.1:c.256_258del']['gene_symbol'] == 'TP53' - assert results['NM_001126117.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1(LRG_321p7):p.(Val86del)', 'slr': 'NP_001119589.1:p.(V86del)'} assert results['NM_001126117.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001126117.1:c.256_258del']['genome_context_intronic_sequence'] == '' - assert results['NM_001126117.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126117.1:c.256_258del']['gene_symbol'] == 'TP53' + assert results['NM_001126117.1:c.256_258del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_001126117.1:c.256_258del']['hgvs_transcript_variant'] == 'NM_001126117.1:c.256_258del' + assert results['NM_001126117.1:c.256_258del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126117.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126117.1:c.256_258del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126117.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert results['NM_001126117.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1(LRG_321p7):p.(Val86del)', 'slr': 'NP_001119589.1:p.(V86del)'} + assert results['NM_001126117.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t7:c.256_258del' + assert results['NM_001126117.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + self.assertCountEqual(results['NM_001126117.1:c.256_258del']['alt_genomic_loci'], []) + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126117.1:c.256_258del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001276761.1:c.535_537del' in list(results.keys()) - assert results['NM_001276761.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276761.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276761.1:c.535_537del']['alt_genomic_loci'], []) - assert results['NM_001276761.1:c.535_537del']['gene_symbol'] == 'TP53' - assert results['NM_001276761.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263690.1:p.(Val179del)', 'slr': 'NP_001263690.1:p.(V179del)'} - assert results['NM_001276761.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001276761.1:c.535_537del']['genome_context_intronic_sequence'] == '' - assert results['NM_001276761.1:c.535_537del']['hgvs_lrg_variant'] == '' - assert results['NM_001276761.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276761.1:c.535_537del' - assert results['NM_001276761.1:c.535_537del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276761.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1'} + assert 'NM_000546.5:c.652_654del' in list(results.keys()) + assert results['NM_000546.5:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_000546.5:c.652_654del']['gene_symbol'] == 'TP53' + assert results['NM_000546.5:c.652_654del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_000546.5:c.652_654del']['hgvs_transcript_variant'] == 'NM_000546.5:c.652_654del' + assert results['NM_000546.5:c.652_654del']['genome_context_intronic_sequence'] == '' + assert results['NM_000546.5:c.652_654del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000546.5:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' + assert results['NM_000546.5:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000537.3(LRG_321p1):p.(Val218del)', 'slr': 'NP_000537.3:p.(V218del)'} + assert results['NM_000546.5:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t1:c.652_654del' + assert results['NM_000546.5:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + self.assertCountEqual(results['NM_000546.5:c.652_654del']['alt_genomic_loci'], []) + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_000546.5:c.652_654del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126112.2:c.652_654del' in list(results.keys()) - assert results['NM_001126112.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t2:c.652_654del' - assert results['NM_001126112.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126112.2:c.652_654del']['alt_genomic_loci'], []) - assert results['NM_001126112.2:c.652_654del']['gene_symbol'] == 'TP53' - assert results['NM_001126112.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119584.1:p.(Val218del)', 'slr': 'NP_001119584.1:p.(V218del)'} assert results['NM_001126112.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001126112.2:c.652_654del']['genome_context_intronic_sequence'] == '' - assert results['NM_001126112.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + assert results['NM_001126112.2:c.652_654del']['gene_symbol'] == 'TP53' + assert results['NM_001126112.2:c.652_654del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_001126112.2:c.652_654del']['hgvs_transcript_variant'] == 'NM_001126112.2:c.652_654del' + assert results['NM_001126112.2:c.652_654del']['genome_context_intronic_sequence'] == '' + assert results['NM_001126112.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126112.2:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126112.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert results['NM_001126112.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119584.1:p.(Val218del)', 'slr': 'NP_001119584.1:p.(V218del)'} + assert results['NM_001126112.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t2:c.652_654del' + assert results['NM_001126112.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' + self.assertCountEqual(results['NM_001126112.2:c.652_654del']['alt_genomic_loci'], []) + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001126112.2:c.652_654del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_001276697.1:c.175_177del' in list(results.keys()) - assert results['NM_001276697.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276697.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276697.1:c.175_177del']['alt_genomic_loci'], []) - assert results['NM_001276697.1:c.175_177del']['gene_symbol'] == 'TP53' - assert results['NM_001276697.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263626.1:p.(Val59del)', 'slr': 'NP_001263626.1:p.(V59del)'} assert results['NM_001276697.1:c.175_177del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001276697.1:c.175_177del']['genome_context_intronic_sequence'] == '' - assert results['NM_001276697.1:c.175_177del']['hgvs_lrg_variant'] == '' + assert results['NM_001276697.1:c.175_177del']['gene_symbol'] == 'TP53' + assert results['NM_001276697.1:c.175_177del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_001276697.1:c.175_177del']['hgvs_transcript_variant'] == 'NM_001276697.1:c.175_177del' + assert results['NM_001276697.1:c.175_177del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276697.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276697.1:c.175_177del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276697.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1'} - - assert 'NM_001276696.1:c.535_537del' in list(results.keys()) - assert results['NM_001276696.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276696.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276696.1:c.535_537del']['alt_genomic_loci'], []) - assert results['NM_001276696.1:c.535_537del']['gene_symbol'] == 'TP53' - assert results['NM_001276696.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263625.1:p.(Val179del)', 'slr': 'NP_001263625.1:p.(V179del)'} - assert results['NM_001276696.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001276696.1:c.535_537del']['genome_context_intronic_sequence'] == '' - assert results['NM_001276696.1:c.535_537del']['hgvs_lrg_variant'] == '' - assert results['NM_001276696.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276696.1:c.535_537del' - assert results['NM_001276696.1:c.535_537del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276696.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276696.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1'} + assert results['NM_001276697.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263626.1:p.(Val59del)', 'slr': 'NP_001263626.1:p.(V59del)'} + assert results['NM_001276697.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276697.1:c.175_177del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276697.1:c.175_177del']['alt_genomic_loci'], []) + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276697.1:c.175_177del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1'} - assert 'NM_001276698.1:c.175_177del' in list(results.keys()) - assert results['NM_001276698.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276698.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276698.1:c.175_177del']['alt_genomic_loci'], []) - assert results['NM_001276698.1:c.175_177del']['gene_symbol'] == 'TP53' - assert results['NM_001276698.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263627.1:p.(Val59del)', 'slr': 'NP_001263627.1:p.(V59del)'} - assert results['NM_001276698.1:c.175_177del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001276698.1:c.175_177del']['genome_context_intronic_sequence'] == '' - assert results['NM_001276698.1:c.175_177del']['hgvs_lrg_variant'] == '' - assert results['NM_001276698.1:c.175_177del']['hgvs_transcript_variant'] == 'NM_001276698.1:c.175_177del' - assert results['NM_001276698.1:c.175_177del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276698.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1'} - - assert 'NM_001126115.1:c.256_258del' in list(results.keys()) - assert results['NM_001126115.1:c.256_258del']['hgvs_lrg_transcript_variant'] == 'LRG_321t5:c.256_258del' - assert results['NM_001126115.1:c.256_258del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126115.1:c.256_258del']['alt_genomic_loci'], []) - assert results['NM_001126115.1:c.256_258del']['gene_symbol'] == 'TP53' - assert results['NM_001126115.1:c.256_258del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1(LRG_321p5):p.(Val86del)', 'slr': 'NP_001119587.1:p.(V86del)'} - assert results['NM_001126115.1:c.256_258del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001126115.1:c.256_258del']['genome_context_intronic_sequence'] == '' - assert results['NM_001126115.1:c.256_258del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' - assert results['NM_001126115.1:c.256_258del']['hgvs_transcript_variant'] == 'NM_001126115.1:c.256_258del' - assert results['NM_001126115.1:c.256_258del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126115.1:c.256_258del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126115.1:c.256_258del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - - assert 'NM_001126114.2:c.652_654del' in list(results.keys()) - assert results['NM_001126114.2:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t3:c.652_654del' - assert results['NM_001126114.2:c.652_654del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126114.2:c.652_654del']['alt_genomic_loci'], []) - assert results['NM_001126114.2:c.652_654del']['gene_symbol'] == 'TP53' - assert results['NM_001126114.2:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1(LRG_321p3):p.(Val218del)', 'slr': 'NP_001119586.1:p.(V218del)'} - assert results['NM_001126114.2:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001126114.2:c.652_654del']['genome_context_intronic_sequence'] == '' - assert results['NM_001126114.2:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' - assert results['NM_001126114.2:c.652_654del']['hgvs_transcript_variant'] == 'NM_001126114.2:c.652_654del' - assert results['NM_001126114.2:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001126114.2:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001126114.2:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - - assert 'NM_001276699.1:c.175_177del' in list(results.keys()) - assert results['NM_001276699.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276699.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276699.1:c.175_177del']['alt_genomic_loci'], []) - assert results['NM_001276699.1:c.175_177del']['gene_symbol'] == 'TP53' - assert results['NM_001276699.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263628.1:p.(Val59del)', 'slr': 'NP_001263628.1:p.(V59del)'} - assert results['NM_001276699.1:c.175_177del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001276699.1:c.175_177del']['genome_context_intronic_sequence'] == '' - assert results['NM_001276699.1:c.175_177del']['hgvs_lrg_variant'] == '' - assert results['NM_001276699.1:c.175_177del']['hgvs_transcript_variant'] == 'NM_001276699.1:c.175_177del' - assert results['NM_001276699.1:c.175_177del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276699.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276699.1:c.175_177del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1'} + assert 'NM_001276695.1:c.535_537del' in list(results.keys()) + assert results['NM_001276695.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276695.1:c.535_537del']['gene_symbol'] == 'TP53' + assert results['NM_001276695.1:c.535_537del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001276695.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276695.1:c.535_537del' + assert results['NM_001276695.1:c.535_537del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276695.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276695.1:c.535_537del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276695.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263624.1:p.(Val179del)', 'slr': 'NP_001263624.1:p.(V179del)'} + assert results['NM_001276695.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276695.1:c.535_537del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276695.1:c.535_537del']['alt_genomic_loci'], []) + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276695.1:c.535_537del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1'} assert 'NM_001276760.1:c.535_537del' in list(results.keys()) - assert results['NM_001276760.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276760.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276760.1:c.535_537del']['alt_genomic_loci'], []) - assert results['NM_001276760.1:c.535_537del']['gene_symbol'] == 'TP53' - assert results['NM_001276760.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263689.1:p.(Val179del)', 'slr': 'NP_001263689.1:p.(V179del)'} assert results['NM_001276760.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_001276760.1:c.535_537del']['genome_context_intronic_sequence'] == '' - assert results['NM_001276760.1:c.535_537del']['hgvs_lrg_variant'] == '' + assert results['NM_001276760.1:c.535_537del']['gene_symbol'] == 'TP53' + assert results['NM_001276760.1:c.535_537del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_001276760.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276760.1:c.535_537del' + assert results['NM_001276760.1:c.535_537del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276760.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276760.1:c.535_537del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_001276760.1:c.535_537del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1'} + assert results['NM_001276760.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263689.1:p.(Val179del)', 'slr': 'NP_001263689.1:p.(V179del)'} + assert results['NM_001276760.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276760.1:c.535_537del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276760.1:c.535_537del']['alt_genomic_loci'], []) + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276760.1:c.535_537del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1'} - assert 'NM_000546.5:c.652_654del' in list(results.keys()) - assert results['NM_000546.5:c.652_654del']['hgvs_lrg_transcript_variant'] == 'LRG_321t1:c.652_654del' - assert results['NM_000546.5:c.652_654del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000546.5:c.652_654del']['alt_genomic_loci'], []) - assert results['NM_000546.5:c.652_654del']['gene_symbol'] == 'TP53' - assert results['NM_000546.5:c.652_654del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000537.3(LRG_321p1):p.(Val218del)', 'slr': 'NP_000537.3:p.(V218del)'} - assert results['NM_000546.5:c.652_654del']['submitted_variant'] == '17-7578194-GCAC-G' - assert results['NM_000546.5:c.652_654del']['genome_context_intronic_sequence'] == '' - assert results['NM_000546.5:c.652_654del']['hgvs_lrg_variant'] == 'LRG_321:g.17672_17674del' - assert results['NM_000546.5:c.652_654del']['hgvs_transcript_variant'] == 'NM_000546.5:c.652_654del' - assert results['NM_000546.5:c.652_654del']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17672_17674del' - assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7578194', 'alt': 'G'}} - assert results['NM_000546.5:c.652_654del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'ref': 'GCAC', 'pos': '7674876', 'alt': 'G'}} - assert results['NM_000546.5:c.652_654del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert 'NM_001276761.1:c.535_537del' in list(results.keys()) + assert results['NM_001276761.1:c.535_537del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276761.1:c.535_537del']['gene_symbol'] == 'TP53' + assert results['NM_001276761.1:c.535_537del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001276761.1:c.535_537del']['hgvs_transcript_variant'] == 'NM_001276761.1:c.535_537del' + assert results['NM_001276761.1:c.535_537del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276761.1:c.535_537del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276761.1:c.535_537del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276761.1:c.535_537del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263690.1:p.(Val179del)', 'slr': 'NP_001263690.1:p.(V179del)'} + assert results['NM_001276761.1:c.535_537del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276761.1:c.535_537del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276761.1:c.535_537del']['alt_genomic_loci'], []) + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276761.1:c.535_537del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1'} + assert 'NM_001276698.1:c.175_177del' in list(results.keys()) + assert results['NM_001276698.1:c.175_177del']['submitted_variant'] == '17-7578194-GCAC-G' + assert results['NM_001276698.1:c.175_177del']['gene_symbol'] == 'TP53' + assert results['NM_001276698.1:c.175_177del']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001276698.1:c.175_177del']['hgvs_transcript_variant'] == 'NM_001276698.1:c.175_177del' + assert results['NM_001276698.1:c.175_177del']['genome_context_intronic_sequence'] == '' + assert results['NM_001276698.1:c.175_177del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276698.1:c.175_177del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276698.1:c.175_177del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263627.1:p.(Val59del)', 'slr': 'NP_001263627.1:p.(V59del)'} + assert results['NM_001276698.1:c.175_177del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276698.1:c.175_177del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276698.1:c.175_177del']['alt_genomic_loci'], []) + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': 'chr17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': 'chr17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578201_7578203del', 'vcf': {'chr': '17', 'pos': '7578194', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7674883_7674885del', 'vcf': {'chr': '17', 'pos': '7674876', 'ref': 'GCAC', 'alt': 'G'}} + assert results['NM_001276698.1:c.175_177del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1'} def test_variant252(self): variant = '17-7578523-T-TG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001276760.1:c.289dup' in list(results.keys()) - assert results['NM_001276760.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276760.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276760.1:c.289dup']['alt_genomic_loci'], []) - assert results['NM_001276760.1:c.289dup']['gene_symbol'] == 'TP53' - assert results['NM_001276760.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263689.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263689.1:p.(Q97Pfs*13)'} - assert results['NM_001276760.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001276760.1:c.289dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001276760.1:c.289dup']['hgvs_lrg_variant'] == '' - assert results['NM_001276760.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276760.1:c.289dup' - assert results['NM_001276760.1:c.289dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276760.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_001126113.2:c.406dup' in list(results.keys()) + assert results['NM_001126113.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001126113.2:c.406dup']['gene_symbol'] == 'TP53' + assert results['NM_001126113.2:c.406dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001126113.2:c.406dup']['hgvs_transcript_variant'] == 'NM_001126113.2:c.406dup' + assert results['NM_001126113.2:c.406dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126113.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126113.2:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_001126113.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1(LRG_321p4):p.(Gln136ProfsTer13)', 'slr': 'NP_001119585.1:p.(Q136Pfs*13)'} + assert results['NM_001126113.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t4:c.406dup' + assert results['NM_001126113.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + self.assertCountEqual(results['NM_001126113.2:c.406dup']['alt_genomic_loci'], []) + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126113.2:c.406dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126118.1:c.289dup' in list(results.keys()) - assert results['NM_001126118.1:c.289dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t8:c.289dup' - assert results['NM_001126118.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126118.1:c.289dup']['alt_genomic_loci'], []) - assert results['NM_001126118.1:c.289dup']['gene_symbol'] == 'TP53' - assert results['NM_001126118.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1(LRG_321p8):p.(Gln97ProfsTer13)', 'slr': 'NP_001119590.1:p.(Q97Pfs*13)'} assert results['NM_001126118.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001126118.1:c.289dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001126118.1:c.289dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_001126118.1:c.289dup']['gene_symbol'] == 'TP53' + assert results['NM_001126118.1:c.289dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_001126118.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001126118.1:c.289dup' + assert results['NM_001126118.1:c.289dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126118.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126118.1:c.289dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126118.1:c.289dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert results['NM_001126118.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119590.1(LRG_321p8):p.(Gln97ProfsTer13)', 'slr': 'NP_001119590.1:p.(Q97Pfs*13)'} + assert results['NM_001126118.1:c.289dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t8:c.289dup' + assert results['NM_001126118.1:c.289dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + self.assertCountEqual(results['NM_001126118.1:c.289dup']['alt_genomic_loci'], []) + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126118.1:c.289dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126118.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119590.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001276695.1:c.289dup' in list(results.keys()) - assert results['NM_001276695.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276695.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276695.1:c.289dup']['alt_genomic_loci'], []) - assert results['NM_001276695.1:c.289dup']['gene_symbol'] == 'TP53' - assert results['NM_001276695.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263624.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263624.1:p.(Q97Pfs*13)'} - assert results['NM_001276695.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001276695.1:c.289dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001276695.1:c.289dup']['hgvs_lrg_variant'] == '' - assert results['NM_001276695.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276695.1:c.289dup' - assert results['NM_001276695.1:c.289dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276695.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1'} + assert 'NM_001126115.1:c.10dup' in list(results.keys()) + assert results['NM_001126115.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001126115.1:c.10dup']['gene_symbol'] == 'TP53' + assert results['NM_001126115.1:c.10dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001126115.1:c.10dup']['hgvs_transcript_variant'] == 'NM_001126115.1:c.10dup' + assert results['NM_001126115.1:c.10dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126115.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126115.1:c.10dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_001126115.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1(LRG_321p5):p.(Gln4ProfsTer13)', 'slr': 'NP_001119587.1:p.(Q4Pfs*13)'} + assert results['NM_001126115.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t5:c.10dup' + assert results['NM_001126115.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + self.assertCountEqual(results['NM_001126115.1:c.10dup']['alt_genomic_loci'], []) + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126115.1:c.10dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + + assert 'NM_001126116.1:c.10dup' in list(results.keys()) + assert results['NM_001126116.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001126116.1:c.10dup']['gene_symbol'] == 'TP53' + assert results['NM_001126116.1:c.10dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001126116.1:c.10dup']['hgvs_transcript_variant'] == 'NM_001126116.1:c.10dup' + assert results['NM_001126116.1:c.10dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126116.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126116.1:c.10dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_001126116.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1(LRG_321p6):p.(Gln4ProfsTer13)', 'slr': 'NP_001119588.1:p.(Q4Pfs*13)'} + assert results['NM_001126116.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t6:c.10dup' + assert results['NM_001126116.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + self.assertCountEqual(results['NM_001126116.1:c.10dup']['alt_genomic_loci'], []) + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126116.1:c.10dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001276699.1:c.-72dup' in list(results.keys()) - assert results['NM_001276699.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276699.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276699.1:c.-72dup']['alt_genomic_loci'], []) - assert results['NM_001276699.1:c.-72dup']['gene_symbol'] == 'TP53' - assert results['NM_001276699.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263628.1:p.?', 'slr': 'NP_001263628.1:p.?'} assert results['NM_001276699.1:c.-72dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001276699.1:c.-72dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001276699.1:c.-72dup']['hgvs_lrg_variant'] == '' + assert results['NM_001276699.1:c.-72dup']['gene_symbol'] == 'TP53' + assert results['NM_001276699.1:c.-72dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_001276699.1:c.-72dup']['hgvs_transcript_variant'] == 'NM_001276699.1:c.-72dup' + assert results['NM_001276699.1:c.-72dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276699.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001276699.1:c.-72dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276699.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1'} + assert results['NM_001276699.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263628.1:p.?', 'slr': 'NP_001263628.1:p.?'} + assert results['NM_001276699.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276699.1:c.-72dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276699.1:c.-72dup']['alt_genomic_loci'], []) + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276699.1:c.-72dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263628.1'} - assert 'NM_001126115.1:c.10dup' in list(results.keys()) - assert results['NM_001126115.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t5:c.10dup' - assert results['NM_001126115.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126115.1:c.10dup']['alt_genomic_loci'], []) - assert results['NM_001126115.1:c.10dup']['gene_symbol'] == 'TP53' - assert results['NM_001126115.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119587.1(LRG_321p5):p.(Gln4ProfsTer13)', 'slr': 'NP_001119587.1:p.(Q4Pfs*13)'} - assert results['NM_001126115.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001126115.1:c.10dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001126115.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' - assert results['NM_001126115.1:c.10dup']['hgvs_transcript_variant'] == 'NM_001126115.1:c.10dup' - assert results['NM_001126115.1:c.10dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126115.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126115.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119587.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126115.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert 'NM_001276696.1:c.289dup' in list(results.keys()) + assert results['NM_001276696.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276696.1:c.289dup']['gene_symbol'] == 'TP53' + assert results['NM_001276696.1:c.289dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001276696.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276696.1:c.289dup' + assert results['NM_001276696.1:c.289dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276696.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276696.1:c.289dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276696.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263625.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263625.1:p.(Q97Pfs*13)'} + assert results['NM_001276696.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276696.1:c.289dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276696.1:c.289dup']['alt_genomic_loci'], []) + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276696.1:c.289dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1'} - assert 'NM_001276697.1:c.-72dup' in list(results.keys()) - assert results['NM_001276697.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276697.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276697.1:c.-72dup']['alt_genomic_loci'], []) - assert results['NM_001276697.1:c.-72dup']['gene_symbol'] == 'TP53' - assert results['NM_001276697.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263626.1:p.?', 'slr': 'NP_001263626.1:p.?'} - assert results['NM_001276697.1:c.-72dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001276697.1:c.-72dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001276697.1:c.-72dup']['hgvs_lrg_variant'] == '' - assert results['NM_001276697.1:c.-72dup']['hgvs_transcript_variant'] == 'NM_001276697.1:c.-72dup' - assert results['NM_001276697.1:c.-72dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276697.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1'} + assert 'NM_001126114.2:c.406dup' in list(results.keys()) + assert results['NM_001126114.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001126114.2:c.406dup']['gene_symbol'] == 'TP53' + assert results['NM_001126114.2:c.406dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001126114.2:c.406dup']['hgvs_transcript_variant'] == 'NM_001126114.2:c.406dup' + assert results['NM_001126114.2:c.406dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126114.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001126114.2:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' + assert results['NM_001126114.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1(LRG_321p3):p.(Gln136ProfsTer13)', 'slr': 'NP_001119586.1:p.(Q136Pfs*13)'} + assert results['NM_001126114.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t3:c.406dup' + assert results['NM_001126114.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + self.assertCountEqual(results['NM_001126114.2:c.406dup']['alt_genomic_loci'], []) + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126114.2:c.406dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126117.1:c.10dup' in list(results.keys()) - assert results['NM_001126117.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t7:c.10dup' - assert results['NM_001126117.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126117.1:c.10dup']['alt_genomic_loci'], []) - assert results['NM_001126117.1:c.10dup']['gene_symbol'] == 'TP53' - assert results['NM_001126117.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1(LRG_321p7):p.(Gln4ProfsTer13)', 'slr': 'NP_001119589.1:p.(Q4Pfs*13)'} assert results['NM_001126117.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001126117.1:c.10dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001126117.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_001126117.1:c.10dup']['gene_symbol'] == 'TP53' + assert results['NM_001126117.1:c.10dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_001126117.1:c.10dup']['hgvs_transcript_variant'] == 'NM_001126117.1:c.10dup' + assert results['NM_001126117.1:c.10dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126117.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126117.1:c.10dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126117.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert results['NM_001126117.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119589.1(LRG_321p7):p.(Gln4ProfsTer13)', 'slr': 'NP_001119589.1:p.(Q4Pfs*13)'} + assert results['NM_001126117.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t7:c.10dup' + assert results['NM_001126117.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + self.assertCountEqual(results['NM_001126117.1:c.10dup']['alt_genomic_loci'], []) + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126117.1:c.10dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126117.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119589.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_000546.5:c.406dup' in list(results.keys()) - assert results['NM_000546.5:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t1:c.406dup' - assert results['NM_000546.5:c.406dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000546.5:c.406dup']['alt_genomic_loci'], []) - assert results['NM_000546.5:c.406dup']['gene_symbol'] == 'TP53' - assert results['NM_000546.5:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000537.3(LRG_321p1):p.(Gln136ProfsTer13)', 'slr': 'NP_000537.3:p.(Q136Pfs*13)'} assert results['NM_000546.5:c.406dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_000546.5:c.406dup']['genome_context_intronic_sequence'] == '' - assert results['NM_000546.5:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_000546.5:c.406dup']['gene_symbol'] == 'TP53' + assert results['NM_000546.5:c.406dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_000546.5:c.406dup']['hgvs_transcript_variant'] == 'NM_000546.5:c.406dup' + assert results['NM_000546.5:c.406dup']['genome_context_intronic_sequence'] == '' + assert results['NM_000546.5:c.406dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000546.5:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_000546.5:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - - assert results['flag'] == 'gene_variant' - assert 'NM_001276696.1:c.289dup' in list(results.keys()) - assert results['NM_001276696.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276696.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276696.1:c.289dup']['alt_genomic_loci'], []) - assert results['NM_001276696.1:c.289dup']['gene_symbol'] == 'TP53' - assert results['NM_001276696.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263625.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263625.1:p.(Q97Pfs*13)'} - assert results['NM_001276696.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001276696.1:c.289dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001276696.1:c.289dup']['hgvs_lrg_variant'] == '' - assert results['NM_001276696.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276696.1:c.289dup' - assert results['NM_001276696.1:c.289dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276696.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276696.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263625.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276696.1'} - - assert 'NM_001276698.1:c.-72dup' in list(results.keys()) - assert results['NM_001276698.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276698.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276698.1:c.-72dup']['alt_genomic_loci'], []) - assert results['NM_001276698.1:c.-72dup']['gene_symbol'] == 'TP53' - assert results['NM_001276698.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263627.1:p.?', 'slr': 'NP_001263627.1:p.?'} - assert results['NM_001276698.1:c.-72dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001276698.1:c.-72dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001276698.1:c.-72dup']['hgvs_lrg_variant'] == '' - assert results['NM_001276698.1:c.-72dup']['hgvs_transcript_variant'] == 'NM_001276698.1:c.-72dup' - assert results['NM_001276698.1:c.-72dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276698.1:c.-72dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1'} - - assert 'NM_001276761.1:c.289dup' in list(results.keys()) - assert results['NM_001276761.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001276761.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001276761.1:c.289dup']['alt_genomic_loci'], []) - assert results['NM_001276761.1:c.289dup']['gene_symbol'] == 'TP53' - assert results['NM_001276761.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263690.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263690.1:p.(Q97Pfs*13)'} - assert results['NM_001276761.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001276761.1:c.289dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001276761.1:c.289dup']['hgvs_lrg_variant'] == '' - assert results['NM_001276761.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276761.1:c.289dup' - assert results['NM_001276761.1:c.289dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001276761.1:c.289dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1'} - - assert 'NM_001126113.2:c.406dup' in list(results.keys()) - assert results['NM_001126113.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t4:c.406dup' - assert results['NM_001126113.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126113.2:c.406dup']['alt_genomic_loci'], []) - assert results['NM_001126113.2:c.406dup']['gene_symbol'] == 'TP53' - assert results['NM_001126113.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119585.1(LRG_321p4):p.(Gln136ProfsTer13)', 'slr': 'NP_001119585.1:p.(Q136Pfs*13)'} - assert results['NM_001126113.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001126113.2:c.406dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001126113.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' - assert results['NM_001126113.2:c.406dup']['hgvs_transcript_variant'] == 'NM_001126113.2:c.406dup' - assert results['NM_001126113.2:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126113.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126113.2:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119585.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126113.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - - assert 'NM_001126116.1:c.10dup' in list(results.keys()) - assert results['NM_001126116.1:c.10dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t6:c.10dup' - assert results['NM_001126116.1:c.10dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126116.1:c.10dup']['alt_genomic_loci'], []) - assert results['NM_001126116.1:c.10dup']['gene_symbol'] == 'TP53' - assert results['NM_001126116.1:c.10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119588.1(LRG_321p6):p.(Gln4ProfsTer13)', 'slr': 'NP_001119588.1:p.(Q4Pfs*13)'} - assert results['NM_001126116.1:c.10dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001126116.1:c.10dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001126116.1:c.10dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' - assert results['NM_001126116.1:c.10dup']['hgvs_transcript_variant'] == 'NM_001126116.1:c.10dup' - assert results['NM_001126116.1:c.10dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126116.1:c.10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126116.1:c.10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119588.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126116.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert results['NM_000546.5:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000537.3(LRG_321p1):p.(Gln136ProfsTer13)', 'slr': 'NP_000537.3:p.(Q136Pfs*13)'} + assert results['NM_000546.5:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t1:c.406dup' + assert results['NM_000546.5:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + self.assertCountEqual(results['NM_000546.5:c.406dup']['alt_genomic_loci'], []) + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_000546.5:c.406dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000546.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000537.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} assert 'NM_001126112.2:c.406dup' in list(results.keys()) - assert results['NM_001126112.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t2:c.406dup' - assert results['NM_001126112.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126112.2:c.406dup']['alt_genomic_loci'], []) - assert results['NM_001126112.2:c.406dup']['gene_symbol'] == 'TP53' - assert results['NM_001126112.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119584.1:p.(Gln136ProfsTer13)', 'slr': 'NP_001119584.1:p.(Q136Pfs*13)'} assert results['NM_001126112.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001126112.2:c.406dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001126112.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + assert results['NM_001126112.2:c.406dup']['gene_symbol'] == 'TP53' + assert results['NM_001126112.2:c.406dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} assert results['NM_001126112.2:c.406dup']['hgvs_transcript_variant'] == 'NM_001126112.2:c.406dup' + assert results['NM_001126112.2:c.406dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001126112.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001126112.2:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126112.2:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert results['NM_001126112.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119584.1:p.(Gln136ProfsTer13)', 'slr': 'NP_001119584.1:p.(Q136Pfs*13)'} + assert results['NM_001126112.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t2:c.406dup' + assert results['NM_001126112.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' + self.assertCountEqual(results['NM_001126112.2:c.406dup']['alt_genomic_loci'], []) + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001126112.2:c.406dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126112.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119584.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} - assert 'NM_001126114.2:c.406dup' in list(results.keys()) - assert results['NM_001126114.2:c.406dup']['hgvs_lrg_transcript_variant'] == 'LRG_321t3:c.406dup' - assert results['NM_001126114.2:c.406dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001126114.2:c.406dup']['alt_genomic_loci'], []) - assert results['NM_001126114.2:c.406dup']['gene_symbol'] == 'TP53' - assert results['NM_001126114.2:c.406dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001119586.1(LRG_321p3):p.(Gln136ProfsTer13)', 'slr': 'NP_001119586.1:p.(Q136Pfs*13)'} - assert results['NM_001126114.2:c.406dup']['submitted_variant'] == '17-7578523-T-TG' - assert results['NM_001126114.2:c.406dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001126114.2:c.406dup']['hgvs_lrg_variant'] == 'LRG_321:g.17345dup' - assert results['NM_001126114.2:c.406dup']['hgvs_transcript_variant'] == 'NM_001126114.2:c.406dup' - assert results['NM_001126114.2:c.406dup']['hgvs_refseqgene_variant'] == 'NG_017013.2:g.17345dup' - assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7578523', 'alt': 'TG'}} - assert results['NM_001126114.2:c.406dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'ref': 'T', 'pos': '7675205', 'alt': 'TG'}} - assert results['NM_001126114.2:c.406dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_017013.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001119586.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001126114.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_321.xml'} + assert 'NM_001276697.1:c.-72dup' in list(results.keys()) + assert results['NM_001276697.1:c.-72dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276697.1:c.-72dup']['gene_symbol'] == 'TP53' + assert results['NM_001276697.1:c.-72dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001276697.1:c.-72dup']['hgvs_transcript_variant'] == 'NM_001276697.1:c.-72dup' + assert results['NM_001276697.1:c.-72dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276697.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276697.1:c.-72dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276697.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263626.1:p.?', 'slr': 'NP_001263626.1:p.?'} + assert results['NM_001276697.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276697.1:c.-72dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276697.1:c.-72dup']['alt_genomic_loci'], []) + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276697.1:c.-72dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276697.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263626.1'} + + assert 'NM_001276695.1:c.289dup' in list(results.keys()) + assert results['NM_001276695.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276695.1:c.289dup']['gene_symbol'] == 'TP53' + assert results['NM_001276695.1:c.289dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001276695.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276695.1:c.289dup' + assert results['NM_001276695.1:c.289dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276695.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276695.1:c.289dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276695.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263624.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263624.1:p.(Q97Pfs*13)'} + assert results['NM_001276695.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276695.1:c.289dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276695.1:c.289dup']['alt_genomic_loci'], []) + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276695.1:c.289dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276695.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263624.1'} + + assert 'NM_001276760.1:c.289dup' in list(results.keys()) + assert results['NM_001276760.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276760.1:c.289dup']['gene_symbol'] == 'TP53' + assert results['NM_001276760.1:c.289dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001276760.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276760.1:c.289dup' + assert results['NM_001276760.1:c.289dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276760.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276760.1:c.289dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276760.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263689.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263689.1:p.(Q97Pfs*13)'} + assert results['NM_001276760.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276760.1:c.289dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276760.1:c.289dup']['alt_genomic_loci'], []) + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276760.1:c.289dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276760.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263689.1'} + + assert 'NM_001276761.1:c.289dup' in list(results.keys()) + assert results['NM_001276761.1:c.289dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276761.1:c.289dup']['gene_symbol'] == 'TP53' + assert results['NM_001276761.1:c.289dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001276761.1:c.289dup']['hgvs_transcript_variant'] == 'NM_001276761.1:c.289dup' + assert results['NM_001276761.1:c.289dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276761.1:c.289dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276761.1:c.289dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276761.1:c.289dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263690.1:p.(Gln97ProfsTer13)', 'slr': 'NP_001263690.1:p.(Q97Pfs*13)'} + assert results['NM_001276761.1:c.289dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276761.1:c.289dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276761.1:c.289dup']['alt_genomic_loci'], []) + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276761.1:c.289dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276761.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263690.1'} + assert 'NM_001276698.1:c.-72dup' in list(results.keys()) + assert results['NM_001276698.1:c.-72dup']['submitted_variant'] == '17-7578523-T-TG' + assert results['NM_001276698.1:c.-72dup']['gene_symbol'] == 'TP53' + assert results['NM_001276698.1:c.-72dup']['gene_ids'] == {'hgnc_id': 'HGNC:11998', 'entrez_gene_id': '7157', 'ucsc_id': 'uc060aur.1', 'omim_id': ['191170']} + assert results['NM_001276698.1:c.-72dup']['hgvs_transcript_variant'] == 'NM_001276698.1:c.-72dup' + assert results['NM_001276698.1:c.-72dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001276698.1:c.-72dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001276698.1:c.-72dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001276698.1:c.-72dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001263627.1:p.?', 'slr': 'NP_001263627.1:p.?'} + assert results['NM_001276698.1:c.-72dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001276698.1:c.-72dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001276698.1:c.-72dup']['alt_genomic_loci'], []) + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': 'chr17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': 'chr17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.7578525dup', 'vcf': {'chr': '17', 'pos': '7578523', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.7675207dup', 'vcf': {'chr': '17', 'pos': '7675205', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001276698.1:c.-72dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001276698.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001263627.1'} def test_variant253(self): variant = '17-17119692-A-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_144997.6:c.1300+2T>G' in list(results.keys()) - assert results['NM_144997.6:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_144997.6:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_144997.6:c.1300+2T>G']['alt_genomic_loci'], []) - assert results['NM_144997.6:c.1300+2T>G']['gene_symbol'] == 'FLCN' - assert results['NM_144997.6:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_659434.2(LRG_325p1):p.?', 'slr': 'NP_659434.2:p.?'} - assert results['NM_144997.6:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' - assert results['NM_144997.6:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_144997.6):c.1300+2T>G' - assert results['NM_144997.6:c.1300+2T>G']['hgvs_lrg_variant'] == '' - assert results['NM_144997.6:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_144997.6:c.1300+2T>G' - assert results['NM_144997.6:c.1300+2T>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} - assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} - assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} - assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} - assert results['NM_144997.6:c.1300+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.6'} - - assert 'NM_001353230.1:c.1300+2T>G' in list(results.keys()) - assert results['NM_001353230.1:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353230.1:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353230.1:c.1300+2T>G']['alt_genomic_loci'], []) - assert results['NM_001353230.1:c.1300+2T>G']['gene_symbol'] == 'FLCN' - assert results['NM_001353230.1:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340159.1:p.?', 'slr': 'NP_001340159.1:p.?'} - assert results['NM_001353230.1:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' - assert results['NM_001353230.1:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_001353230.1):c.1300+2T>G' - assert results['NM_001353230.1:c.1300+2T>G']['hgvs_lrg_variant'] == '' - assert results['NM_001353230.1:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_001353230.1:c.1300+2T>G' - assert results['NM_001353230.1:c.1300+2T>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} - assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} - assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} - assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} - assert results['NM_001353230.1:c.1300+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340159.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353230.1'} - + assert results['flag'] == 'gene_variant' assert 'NM_001353229.1:c.1354+2T>G' in list(results.keys()) - assert results['NM_001353229.1:c.1354+2T>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353229.1:c.1354+2T>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353229.1:c.1354+2T>G']['alt_genomic_loci'], []) - assert results['NM_001353229.1:c.1354+2T>G']['gene_symbol'] == 'FLCN' - assert results['NM_001353229.1:c.1354+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340158.1:p.?', 'slr': 'NP_001340158.1:p.?'} assert results['NM_001353229.1:c.1354+2T>G']['submitted_variant'] == '17-17119692-A-C' - assert results['NM_001353229.1:c.1354+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_001353229.1):c.1354+2T>G' - assert results['NM_001353229.1:c.1354+2T>G']['hgvs_lrg_variant'] == '' + assert results['NM_001353229.1:c.1354+2T>G']['gene_symbol'] == 'FLCN' + assert results['NM_001353229.1:c.1354+2T>G']['gene_ids'] == {'hgnc_id': 'HGNC:27310', 'entrez_gene_id': '201163', 'ucsc_id': 'uc002gra.5', 'omim_id': ['607273']} assert results['NM_001353229.1:c.1354+2T>G']['hgvs_transcript_variant'] == 'NM_001353229.1:c.1354+2T>G' + assert results['NM_001353229.1:c.1354+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_001353229.1):c.1354+2T>G' + assert results['NM_001353229.1:c.1354+2T>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353229.1:c.1354+2T>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} - assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} - assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} - assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} - assert results['NM_001353229.1:c.1354+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340158.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353229.1'} + assert results['NM_001353229.1:c.1354+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340158.1:p.?', 'slr': 'NP_001340158.1:p.?'} + assert results['NM_001353229.1:c.1354+2T>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353229.1:c.1354+2T>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353229.1:c.1354+2T>G']['alt_genomic_loci'], []) + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'pos': '17119692', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'pos': '17216378', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'pos': '17119692', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'pos': '17216378', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353229.1:c.1354+2T>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353229.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340158.1'} - assert results['flag'] == 'gene_variant' assert 'NM_144997.5:c.1300+2T>G' in list(results.keys()) - assert results['NM_144997.5:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == 'LRG_325t1:c.1300+2T>G' - assert results['NM_144997.5:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == 'NG_008001.2(NM_144997.5):c.1300+2T>G' - self.assertCountEqual(results['NM_144997.5:c.1300+2T>G']['alt_genomic_loci'], []) - assert results['NM_144997.5:c.1300+2T>G']['gene_symbol'] == 'FLCN' - assert results['NM_144997.5:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_659434.2(LRG_325p1):p.?', 'slr': 'NP_659434.2:p.?'} assert results['NM_144997.5:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' - assert results['NM_144997.5:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_144997.5):c.1300+2T>G' - assert results['NM_144997.5:c.1300+2T>G']['hgvs_lrg_variant'] == 'LRG_325:g.25811T>G' + assert results['NM_144997.5:c.1300+2T>G']['gene_symbol'] == 'FLCN' + assert results['NM_144997.5:c.1300+2T>G']['gene_ids'] == {'hgnc_id': 'HGNC:27310', 'entrez_gene_id': '201163', 'ucsc_id': 'uc002gra.5', 'omim_id': ['607273']} assert results['NM_144997.5:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_144997.5:c.1300+2T>G' + assert results['NM_144997.5:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_144997.5):c.1300+2T>G' + assert results['NM_144997.5:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == 'NG_008001.2(NM_144997.5):c.1300+2T>G' assert results['NM_144997.5:c.1300+2T>G']['hgvs_refseqgene_variant'] == 'NG_008001.2:g.25811T>G' - assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} - assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} - assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} - assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} - assert results['NM_144997.5:c.1300+2T>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008001.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_325.xml'} + assert results['NM_144997.5:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_659434.2(LRG_325p1):p.?', 'slr': 'NP_659434.2:p.?'} + assert results['NM_144997.5:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == 'LRG_325t1:c.1300+2T>G' + assert results['NM_144997.5:c.1300+2T>G']['hgvs_lrg_variant'] == 'LRG_325:g.25811T>G' + self.assertCountEqual(results['NM_144997.5:c.1300+2T>G']['alt_genomic_loci'], []) + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'pos': '17119692', 'ref': 'A', 'alt': 'C'}} + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'pos': '17216378', 'ref': 'A', 'alt': 'C'}} + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'pos': '17119692', 'ref': 'A', 'alt': 'C'}} + assert results['NM_144997.5:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'pos': '17216378', 'ref': 'A', 'alt': 'C'}} + assert results['NM_144997.5:c.1300+2T>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008001.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_325.xml'} assert 'NM_001353231.1:c.1300+2T>G' in list(results.keys()) - assert results['NM_001353231.1:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353231.1:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353231.1:c.1300+2T>G']['alt_genomic_loci'], []) - assert results['NM_001353231.1:c.1300+2T>G']['gene_symbol'] == 'FLCN' - assert results['NM_001353231.1:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340160.1:p.?', 'slr': 'NP_001340160.1:p.?'} assert results['NM_001353231.1:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' - assert results['NM_001353231.1:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_001353231.1):c.1300+2T>G' - assert results['NM_001353231.1:c.1300+2T>G']['hgvs_lrg_variant'] == '' + assert results['NM_001353231.1:c.1300+2T>G']['gene_symbol'] == 'FLCN' + assert results['NM_001353231.1:c.1300+2T>G']['gene_ids'] == {'hgnc_id': 'HGNC:27310', 'entrez_gene_id': '201163', 'ucsc_id': 'uc002gra.5', 'omim_id': ['607273']} assert results['NM_001353231.1:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_001353231.1:c.1300+2T>G' + assert results['NM_001353231.1:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_001353231.1):c.1300+2T>G' + assert results['NM_001353231.1:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353231.1:c.1300+2T>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} - assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} - assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17119692', 'alt': 'C'}} - assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '17216378', 'alt': 'C'}} - assert results['NM_001353231.1:c.1300+2T>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340160.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353231.1'} + assert results['NM_001353231.1:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340160.1:p.?', 'slr': 'NP_001340160.1:p.?'} + assert results['NM_001353231.1:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353231.1:c.1300+2T>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353231.1:c.1300+2T>G']['alt_genomic_loci'], []) + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'pos': '17119692', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'pos': '17216378', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'pos': '17119692', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'pos': '17216378', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353231.1:c.1300+2T>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353231.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340160.1'} + + assert 'NM_001353230.1:c.1300+2T>G' in list(results.keys()) + assert results['NM_001353230.1:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' + assert results['NM_001353230.1:c.1300+2T>G']['gene_symbol'] == 'FLCN' + assert results['NM_001353230.1:c.1300+2T>G']['gene_ids'] == {'hgnc_id': 'HGNC:27310', 'entrez_gene_id': '201163', 'ucsc_id': 'uc002gra.5', 'omim_id': ['607273']} + assert results['NM_001353230.1:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_001353230.1:c.1300+2T>G' + assert results['NM_001353230.1:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_001353230.1):c.1300+2T>G' + assert results['NM_001353230.1:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353230.1:c.1300+2T>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353230.1:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340159.1:p.?', 'slr': 'NP_001340159.1:p.?'} + assert results['NM_001353230.1:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353230.1:c.1300+2T>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353230.1:c.1300+2T>G']['alt_genomic_loci'], []) + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'pos': '17119692', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'pos': '17216378', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'pos': '17119692', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'pos': '17216378', 'ref': 'A', 'alt': 'C'}} + assert results['NM_001353230.1:c.1300+2T>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353230.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340159.1'} + assert 'NM_144997.6:c.1300+2T>G' in list(results.keys()) + assert results['NM_144997.6:c.1300+2T>G']['submitted_variant'] == '17-17119692-A-C' + assert results['NM_144997.6:c.1300+2T>G']['gene_symbol'] == 'FLCN' + assert results['NM_144997.6:c.1300+2T>G']['gene_ids'] == {'hgnc_id': 'HGNC:27310', 'entrez_gene_id': '201163', 'ucsc_id': 'uc002gra.5', 'omim_id': ['607273']} + assert results['NM_144997.6:c.1300+2T>G']['hgvs_transcript_variant'] == 'NM_144997.6:c.1300+2T>G' + assert results['NM_144997.6:c.1300+2T>G']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_144997.6):c.1300+2T>G' + assert results['NM_144997.6:c.1300+2T>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_144997.6:c.1300+2T>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_144997.6:c.1300+2T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_659434.2(LRG_325p1):p.?', 'slr': 'NP_659434.2:p.?'} + assert results['NM_144997.6:c.1300+2T>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_144997.6:c.1300+2T>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_144997.6:c.1300+2T>G']['alt_genomic_loci'], []) + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': 'chr17', 'pos': '17119692', 'ref': 'A', 'alt': 'C'}} + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': 'chr17', 'pos': '17216378', 'ref': 'A', 'alt': 'C'}} + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.17119692A>C', 'vcf': {'chr': '17', 'pos': '17119692', 'ref': 'A', 'alt': 'C'}} + assert results['NM_144997.6:c.1300+2T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.17216378A>C', 'vcf': {'chr': '17', 'pos': '17216378', 'ref': 'A', 'alt': 'C'}} + assert results['NM_144997.6:c.1300+2T>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_144997.6', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_659434.2'} def test_variant254(self): variant = '17-41197588-GGACA-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_007294.3:c.*103_*106del' in list(results.keys()) - assert results['NM_007294.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == 'LRG_292t1:c.*103_*106del' - assert results['NM_007294.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007294.3:c.*103_*106del']['alt_genomic_loci'], []) - assert results['NM_007294.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' - assert results['NM_007294.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009225.1(LRG_292p1):p.?', 'slr': 'NP_009225.1:p.?'} - assert results['NM_007294.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' - assert results['NM_007294.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' - assert results['NM_007294.3:c.*103_*106del']['hgvs_lrg_variant'] == 'LRG_292:g.172409_172412del' - assert results['NM_007294.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007294.3:c.*103_*106del' - assert results['NM_007294.3:c.*103_*106del']['hgvs_refseqgene_variant'] == 'NG_005905.2:g.172409_172412del' - assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007294.3:c.*103_*106del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005905.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_292.xml'} - - assert 'NM_007297.3:c.*103_*106del' in list(results.keys()) - assert results['NM_007297.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007297.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007297.3:c.*103_*106del']['alt_genomic_loci'], []) - assert results['NM_007297.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' - assert results['NM_007297.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009228.2:p.?', 'slr': 'NP_009228.2:p.?'} - assert results['NM_007297.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' - assert results['NM_007297.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' - assert results['NM_007297.3:c.*103_*106del']['hgvs_lrg_variant'] == '' - assert results['NM_007297.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007297.3:c.*103_*106del' - assert results['NM_007297.3:c.*103_*106del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007297.3:c.*103_*106del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3'} - + assert results['flag'] == 'gene_variant' assert 'NR_027676.1:n.5831_5834del' in list(results.keys()) - assert results['NR_027676.1:n.5831_5834del']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_027676.1:n.5831_5834del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_027676.1:n.5831_5834del']['alt_genomic_loci'], []) - assert results['NR_027676.1:n.5831_5834del']['gene_symbol'] == 'BRCA1' - assert results['NR_027676.1:n.5831_5834del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_027676.1:n.5831_5834del']['submitted_variant'] == '17-41197588-GGACA-G' - assert results['NR_027676.1:n.5831_5834del']['genome_context_intronic_sequence'] == '' - assert results['NR_027676.1:n.5831_5834del']['hgvs_lrg_variant'] == '' + assert results['NR_027676.1:n.5831_5834del']['gene_symbol'] == 'BRCA1' + assert results['NR_027676.1:n.5831_5834del']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} assert results['NR_027676.1:n.5831_5834del']['hgvs_transcript_variant'] == 'NR_027676.1:n.5831_5834del' + assert results['NR_027676.1:n.5831_5834del']['genome_context_intronic_sequence'] == '' + assert results['NR_027676.1:n.5831_5834del']['refseqgene_context_intronic_sequence'] == '' assert results['NR_027676.1:n.5831_5834del']['hgvs_refseqgene_variant'] == '' - assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_027676.1:n.5831_5834del']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_027676.1:n.5831_5834del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_027676.1:n.5831_5834del']['alt_genomic_loci'], []) + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NR_027676.1:n.5831_5834del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} assert results['NR_027676.1:n.5831_5834del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_027676.1'} assert 'NM_007300.3:c.*103_*106del' in list(results.keys()) - assert results['NM_007300.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007300.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007300.3:c.*103_*106del']['alt_genomic_loci'], []) - assert results['NM_007300.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' - assert results['NM_007300.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009231.2:p.?', 'slr': 'NP_009231.2:p.?'} assert results['NM_007300.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' - assert results['NM_007300.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' - assert results['NM_007300.3:c.*103_*106del']['hgvs_lrg_variant'] == '' + assert results['NM_007300.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' + assert results['NM_007300.3:c.*103_*106del']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} assert results['NM_007300.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007300.3:c.*103_*106del' + assert results['NM_007300.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' + assert results['NM_007300.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007300.3:c.*103_*106del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007300.3:c.*103_*106del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3'} + assert results['NM_007300.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009231.2:p.?', 'slr': 'NP_009231.2:p.?'} + assert results['NM_007300.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007300.3:c.*103_*106del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007300.3:c.*103_*106del']['alt_genomic_loci'], []) + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007300.3:c.*103_*106del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2'} + + assert 'NM_007297.3:c.*103_*106del' in list(results.keys()) + assert results['NM_007297.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' + assert results['NM_007297.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' + assert results['NM_007297.3:c.*103_*106del']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} + assert results['NM_007297.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007297.3:c.*103_*106del' + assert results['NM_007297.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' + assert results['NM_007297.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007297.3:c.*103_*106del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007297.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009228.2:p.?', 'slr': 'NP_009228.2:p.?'} + assert results['NM_007297.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007297.3:c.*103_*106del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007297.3:c.*103_*106del']['alt_genomic_loci'], []) + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007297.3:c.*103_*106del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2'} - assert results['flag'] == 'gene_variant' assert 'NM_007299.3:c.*209_*212del' in list(results.keys()) - assert results['NM_007299.3:c.*209_*212del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007299.3:c.*209_*212del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007299.3:c.*209_*212del']['alt_genomic_loci'], []) - assert results['NM_007299.3:c.*209_*212del']['gene_symbol'] == 'BRCA1' - assert results['NM_007299.3:c.*209_*212del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009230.2:p.?', 'slr': 'NP_009230.2:p.?'} assert results['NM_007299.3:c.*209_*212del']['submitted_variant'] == '17-41197588-GGACA-G' - assert results['NM_007299.3:c.*209_*212del']['genome_context_intronic_sequence'] == '' - assert results['NM_007299.3:c.*209_*212del']['hgvs_lrg_variant'] == '' + assert results['NM_007299.3:c.*209_*212del']['gene_symbol'] == 'BRCA1' + assert results['NM_007299.3:c.*209_*212del']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} assert results['NM_007299.3:c.*209_*212del']['hgvs_transcript_variant'] == 'NM_007299.3:c.*209_*212del' + assert results['NM_007299.3:c.*209_*212del']['genome_context_intronic_sequence'] == '' + assert results['NM_007299.3:c.*209_*212del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007299.3:c.*209_*212del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007299.3:c.*209_*212del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3'} + assert results['NM_007299.3:c.*209_*212del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009230.2:p.?', 'slr': 'NP_009230.2:p.?'} + assert results['NM_007299.3:c.*209_*212del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007299.3:c.*209_*212del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007299.3:c.*209_*212del']['alt_genomic_loci'], []) + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007299.3:c.*209_*212del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2'} assert 'NM_007298.3:c.*103_*106del' in list(results.keys()) - assert results['NM_007298.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007298.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007298.3:c.*103_*106del']['alt_genomic_loci'], []) - assert results['NM_007298.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' - assert results['NM_007298.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009229.2:p.?', 'slr': 'NP_009229.2:p.?'} assert results['NM_007298.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' - assert results['NM_007298.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' - assert results['NM_007298.3:c.*103_*106del']['hgvs_lrg_variant'] == '' + assert results['NM_007298.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' + assert results['NM_007298.3:c.*103_*106del']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} assert results['NM_007298.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007298.3:c.*103_*106del' + assert results['NM_007298.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' + assert results['NM_007298.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007298.3:c.*103_*106del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '41197588', 'alt': 'G'}} - assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'ref': 'GGACA', 'pos': '43045571', 'alt': 'G'}} - assert results['NM_007298.3:c.*103_*106del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3'} + assert results['NM_007298.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009229.2:p.?', 'slr': 'NP_009229.2:p.?'} + assert results['NM_007298.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007298.3:c.*103_*106del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007298.3:c.*103_*106del']['alt_genomic_loci'], []) + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007298.3:c.*103_*106del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2'} + assert 'NM_007294.3:c.*103_*106del' in list(results.keys()) + assert results['NM_007294.3:c.*103_*106del']['submitted_variant'] == '17-41197588-GGACA-G' + assert results['NM_007294.3:c.*103_*106del']['gene_symbol'] == 'BRCA1' + assert results['NM_007294.3:c.*103_*106del']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} + assert results['NM_007294.3:c.*103_*106del']['hgvs_transcript_variant'] == 'NM_007294.3:c.*103_*106del' + assert results['NM_007294.3:c.*103_*106del']['genome_context_intronic_sequence'] == '' + assert results['NM_007294.3:c.*103_*106del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007294.3:c.*103_*106del']['hgvs_refseqgene_variant'] == 'NG_005905.2:g.172409_172412del' + assert results['NM_007294.3:c.*103_*106del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009225.1(LRG_292p1):p.?', 'slr': 'NP_009225.1:p.?'} + assert results['NM_007294.3:c.*103_*106del']['hgvs_lrg_transcript_variant'] == 'LRG_292t1:c.*103_*106del' + assert results['NM_007294.3:c.*103_*106del']['hgvs_lrg_variant'] == 'LRG_292:g.172409_172412del' + self.assertCountEqual(results['NM_007294.3:c.*103_*106del']['alt_genomic_loci'], []) + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': 'chr17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': 'chr17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41197590_41197593del', 'vcf': {'chr': '17', 'pos': '41197588', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43045573_43045576del', 'vcf': {'chr': '17', 'pos': '43045571', 'ref': 'GGACA', 'alt': 'G'}} + assert results['NM_007294.3:c.*103_*106del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005905.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_292.xml'} def test_variant255(self): variant = '17-41256884-C-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_007299.3:c.301+1G>C' in list(results.keys()) - assert results['NM_007299.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007299.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007299.3:c.301+1G>C']['alt_genomic_loci'], []) - assert results['NM_007299.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' - assert results['NM_007299.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009230.2:p.?', 'slr': 'NP_009230.2:p.?'} - assert results['NM_007299.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' - assert results['NM_007299.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007299.3):c.301+1G>C' - assert results['NM_007299.3:c.301+1G>C']['hgvs_lrg_variant'] == '' - assert results['NM_007299.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007299.3:c.301+1G>C' - assert results['NM_007299.3:c.301+1G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} - assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} - assert results['NM_007299.3:c.301+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3'} - + assert results['flag'] == 'gene_variant' assert 'NR_027676.1:n.440+1G>C' in list(results.keys()) - assert results['NR_027676.1:n.440+1G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_027676.1:n.440+1G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_027676.1:n.440+1G>C']['alt_genomic_loci'], []) - assert results['NR_027676.1:n.440+1G>C']['gene_symbol'] == 'BRCA1' - assert results['NR_027676.1:n.440+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_027676.1:n.440+1G>C']['submitted_variant'] == '17-41256884-C-G' - assert results['NR_027676.1:n.440+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NR_027676.1):c.440+1G>C' - assert results['NR_027676.1:n.440+1G>C']['hgvs_lrg_variant'] == '' + assert results['NR_027676.1:n.440+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NR_027676.1:n.440+1G>C']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} assert results['NR_027676.1:n.440+1G>C']['hgvs_transcript_variant'] == 'NR_027676.1:n.440+1G>C' + assert results['NR_027676.1:n.440+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NR_027676.1):c.440+1G>C' + assert results['NR_027676.1:n.440+1G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NR_027676.1:n.440+1G>C']['hgvs_refseqgene_variant'] == '' - assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} - assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} + assert results['NR_027676.1:n.440+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_027676.1:n.440+1G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_027676.1:n.440+1G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_027676.1:n.440+1G>C']['alt_genomic_loci'], []) + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NR_027676.1:n.440+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} assert results['NR_027676.1:n.440+1G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_027676.1'} assert 'NM_007300.3:c.301+1G>C' in list(results.keys()) - assert results['NM_007300.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007300.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007300.3:c.301+1G>C']['alt_genomic_loci'], []) - assert results['NM_007300.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' - assert results['NM_007300.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009231.2:p.?', 'slr': 'NP_009231.2:p.?'} assert results['NM_007300.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' - assert results['NM_007300.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007300.3):c.301+1G>C' - assert results['NM_007300.3:c.301+1G>C']['hgvs_lrg_variant'] == '' + assert results['NM_007300.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NM_007300.3:c.301+1G>C']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} assert results['NM_007300.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007300.3:c.301+1G>C' + assert results['NM_007300.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007300.3):c.301+1G>C' + assert results['NM_007300.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007300.3:c.301+1G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} - assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} - assert results['NM_007300.3:c.301+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3'} - - assert 'NM_007298.3:c.301+1G>C' in list(results.keys()) - assert results['NM_007298.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007298.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007298.3:c.301+1G>C']['alt_genomic_loci'], []) - assert results['NM_007298.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' - assert results['NM_007298.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009229.2:p.?', 'slr': 'NP_009229.2:p.?'} - assert results['NM_007298.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' - assert results['NM_007298.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007298.3):c.301+1G>C' - assert results['NM_007298.3:c.301+1G>C']['hgvs_lrg_variant'] == '' - assert results['NM_007298.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007298.3:c.301+1G>C' - assert results['NM_007298.3:c.301+1G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} - assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} - assert results['NM_007298.3:c.301+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3'} + assert results['NM_007300.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009231.2:p.?', 'slr': 'NP_009231.2:p.?'} + assert results['NM_007300.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007300.3:c.301+1G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007300.3:c.301+1G>C']['alt_genomic_loci'], []) + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007300.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007300.3:c.301+1G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007300.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009231.2'} assert 'NM_007297.3:c.160+1G>C' in list(results.keys()) - assert results['NM_007297.3:c.160+1G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007297.3:c.160+1G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007297.3:c.160+1G>C']['alt_genomic_loci'], []) - assert results['NM_007297.3:c.160+1G>C']['gene_symbol'] == 'BRCA1' - assert results['NM_007297.3:c.160+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009228.2:p.?', 'slr': 'NP_009228.2:p.?'} assert results['NM_007297.3:c.160+1G>C']['submitted_variant'] == '17-41256884-C-G' - assert results['NM_007297.3:c.160+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007297.3):c.160+1G>C' - assert results['NM_007297.3:c.160+1G>C']['hgvs_lrg_variant'] == '' + assert results['NM_007297.3:c.160+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NM_007297.3:c.160+1G>C']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} assert results['NM_007297.3:c.160+1G>C']['hgvs_transcript_variant'] == 'NM_007297.3:c.160+1G>C' + assert results['NM_007297.3:c.160+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007297.3):c.160+1G>C' + assert results['NM_007297.3:c.160+1G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007297.3:c.160+1G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} - assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} - assert results['NM_007297.3:c.160+1G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3'} + assert results['NM_007297.3:c.160+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009228.2:p.?', 'slr': 'NP_009228.2:p.?'} + assert results['NM_007297.3:c.160+1G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007297.3:c.160+1G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007297.3:c.160+1G>C']['alt_genomic_loci'], []) + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007297.3:c.160+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007297.3:c.160+1G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007297.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009228.2'} + + assert 'NM_007299.3:c.301+1G>C' in list(results.keys()) + assert results['NM_007299.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' + assert results['NM_007299.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NM_007299.3:c.301+1G>C']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} + assert results['NM_007299.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007299.3:c.301+1G>C' + assert results['NM_007299.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007299.3):c.301+1G>C' + assert results['NM_007299.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007299.3:c.301+1G>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_007299.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009230.2:p.?', 'slr': 'NP_009230.2:p.?'} + assert results['NM_007299.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007299.3:c.301+1G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007299.3:c.301+1G>C']['alt_genomic_loci'], []) + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007299.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007299.3:c.301+1G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007299.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009230.2'} + + assert 'NM_007298.3:c.301+1G>C' in list(results.keys()) + assert results['NM_007298.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' + assert results['NM_007298.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NM_007298.3:c.301+1G>C']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} + assert results['NM_007298.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007298.3:c.301+1G>C' + assert results['NM_007298.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007298.3):c.301+1G>C' + assert results['NM_007298.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007298.3:c.301+1G>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_007298.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009229.2:p.?', 'slr': 'NP_009229.2:p.?'} + assert results['NM_007298.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007298.3:c.301+1G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007298.3:c.301+1G>C']['alt_genomic_loci'], []) + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007298.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007298.3:c.301+1G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007298.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009229.2'} - assert results['flag'] == 'gene_variant' assert 'NM_007294.3:c.301+1G>C' in list(results.keys()) - assert results['NM_007294.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == 'LRG_292t1:c.301+1G>C' - assert results['NM_007294.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == 'NG_005905.2(NM_007294.3):c.301+1G>C' - self.assertCountEqual(results['NM_007294.3:c.301+1G>C']['alt_genomic_loci'], []) - assert results['NM_007294.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' - assert results['NM_007294.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009225.1(LRG_292p1):p.?', 'slr': 'NP_009225.1:p.?'} assert results['NM_007294.3:c.301+1G>C']['submitted_variant'] == '17-41256884-C-G' - assert results['NM_007294.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007294.3):c.301+1G>C' - assert results['NM_007294.3:c.301+1G>C']['hgvs_lrg_variant'] == 'LRG_292:g.113117G>C' + assert results['NM_007294.3:c.301+1G>C']['gene_symbol'] == 'BRCA1' + assert results['NM_007294.3:c.301+1G>C']['gene_ids'] == {'hgnc_id': 'HGNC:1100', 'entrez_gene_id': '672', 'ucsc_id': 'uc002ict.4', 'omim_id': ['113705']} assert results['NM_007294.3:c.301+1G>C']['hgvs_transcript_variant'] == 'NM_007294.3:c.301+1G>C' + assert results['NM_007294.3:c.301+1G>C']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_007294.3):c.301+1G>C' + assert results['NM_007294.3:c.301+1G>C']['refseqgene_context_intronic_sequence'] == 'NG_005905.2(NM_007294.3):c.301+1G>C' assert results['NM_007294.3:c.301+1G>C']['hgvs_refseqgene_variant'] == 'NG_005905.2:g.113117G>C' - assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} - assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '41256884', 'alt': 'G'}} - assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '43104867', 'alt': 'G'}} - assert results['NM_007294.3:c.301+1G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005905.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_292.xml'} - + assert results['NM_007294.3:c.301+1G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009225.1(LRG_292p1):p.?', 'slr': 'NP_009225.1:p.?'} + assert results['NM_007294.3:c.301+1G>C']['hgvs_lrg_transcript_variant'] == 'LRG_292t1:c.301+1G>C' + assert results['NM_007294.3:c.301+1G>C']['hgvs_lrg_variant'] == 'LRG_292:g.113117G>C' + self.assertCountEqual(results['NM_007294.3:c.301+1G>C']['alt_genomic_loci'], []) + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': 'chr17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': 'chr17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.41256884C>G', 'vcf': {'chr': '17', 'pos': '41256884', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007294.3:c.301+1G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.43104867C>G', 'vcf': {'chr': '17', 'pos': '43104867', 'ref': 'C', 'alt': 'G'}} + assert results['NM_007294.3:c.301+1G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007294.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009225.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_005905.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_292.xml'} def test_variant256(self): variant = '17-42991428-C-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001363846.1:c.490G>T' in list(results.keys()) - assert results['NM_001363846.1:c.490G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363846.1:c.490G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363846.1:c.490G>T']['alt_genomic_loci'], []) - assert results['NM_001363846.1:c.490G>T']['gene_symbol'] == 'GFAP' - assert results['NM_001363846.1:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350775.1:p.(Glu164Ter)', 'slr': 'NP_001350775.1:p.(E164*)'} - assert results['NM_001363846.1:c.490G>T']['submitted_variant'] == '17-42991428-C-A' - assert results['NM_001363846.1:c.490G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001363846.1:c.490G>T']['hgvs_lrg_variant'] == '' - assert results['NM_001363846.1:c.490G>T']['hgvs_transcript_variant'] == 'NM_001363846.1:c.490G>T' - assert results['NM_001363846.1:c.490G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363846.1:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} - assert 'hg38' not in list(results['NM_001363846.1:c.490G>T']['primary_assembly_loci'].keys()) - assert results['NM_001363846.1:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} - assert 'grch38' not in list(results['NM_001363846.1:c.490G>T']['primary_assembly_loci'].keys()) - assert results['NM_001363846.1:c.490G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350775.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363846.1'} - + assert results['flag'] == 'gene_variant' assert 'NM_001131019.2:c.490G>T' in list(results.keys()) - assert results['NM_001131019.2:c.490G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001131019.2:c.490G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001131019.2:c.490G>T']['alt_genomic_loci'], []) - assert results['NM_001131019.2:c.490G>T']['gene_symbol'] == 'GFAP' - assert results['NM_001131019.2:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124491.1:p.(Glu164Ter)', 'slr': 'NP_001124491.1:p.(E164*)'} assert results['NM_001131019.2:c.490G>T']['submitted_variant'] == '17-42991428-C-A' - assert results['NM_001131019.2:c.490G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001131019.2:c.490G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001131019.2:c.490G>T']['gene_symbol'] == 'GFAP' + assert results['NM_001131019.2:c.490G>T']['gene_ids'] == {'hgnc_id': 'HGNC:4235', 'entrez_gene_id': '2670', 'ucsc_id': 'uc002ihq.3', 'omim_id': ['137780']} assert results['NM_001131019.2:c.490G>T']['hgvs_transcript_variant'] == 'NM_001131019.2:c.490G>T' + assert results['NM_001131019.2:c.490G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001131019.2:c.490G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001131019.2:c.490G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} - assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} - assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} - assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} - assert results['NM_001131019.2:c.490G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124491.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001131019.2'} + assert results['NM_001131019.2:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124491.1:p.(Glu164Ter)', 'slr': 'NP_001124491.1:p.(E164*)'} + assert results['NM_001131019.2:c.490G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001131019.2:c.490G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001131019.2:c.490G>T']['alt_genomic_loci'], []) + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'pos': '42991428', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'pos': '44914060', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'pos': '42991428', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001131019.2:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'pos': '44914060', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001131019.2:c.490G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001131019.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124491.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001242376.1:c.490G>T' in list(results.keys()) - assert results['NM_001242376.1:c.490G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001242376.1:c.490G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001242376.1:c.490G>T']['alt_genomic_loci'], []) - assert results['NM_001242376.1:c.490G>T']['gene_symbol'] == 'GFAP' - assert results['NM_001242376.1:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001229305.1:p.(Glu164Ter)', 'slr': 'NP_001229305.1:p.(E164*)'} assert results['NM_001242376.1:c.490G>T']['submitted_variant'] == '17-42991428-C-A' - assert results['NM_001242376.1:c.490G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001242376.1:c.490G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001242376.1:c.490G>T']['gene_symbol'] == 'GFAP' + assert results['NM_001242376.1:c.490G>T']['gene_ids'] == {'hgnc_id': 'HGNC:4235', 'entrez_gene_id': '2670', 'ucsc_id': 'uc002ihq.3', 'omim_id': ['137780']} assert results['NM_001242376.1:c.490G>T']['hgvs_transcript_variant'] == 'NM_001242376.1:c.490G>T' + assert results['NM_001242376.1:c.490G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001242376.1:c.490G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001242376.1:c.490G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} - assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} - assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} - assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} - assert results['NM_001242376.1:c.490G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001229305.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001242376.1'} + assert results['NM_001242376.1:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001229305.1:p.(Glu164Ter)', 'slr': 'NP_001229305.1:p.(E164*)'} + assert results['NM_001242376.1:c.490G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001242376.1:c.490G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001242376.1:c.490G>T']['alt_genomic_loci'], []) + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'pos': '42991428', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'pos': '44914060', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'pos': '42991428', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001242376.1:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'pos': '44914060', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001242376.1:c.490G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001242376.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001229305.1'} + + assert 'NM_001363846.1:c.490G>T' in list(results.keys()) + assert results['NM_001363846.1:c.490G>T']['submitted_variant'] == '17-42991428-C-A' + assert results['NM_001363846.1:c.490G>T']['gene_symbol'] == 'GFAP' + assert results['NM_001363846.1:c.490G>T']['gene_ids'] == {'hgnc_id': 'HGNC:4235', 'entrez_gene_id': '2670', 'ucsc_id': 'uc002ihq.3', 'omim_id': ['137780']} + assert results['NM_001363846.1:c.490G>T']['hgvs_transcript_variant'] == 'NM_001363846.1:c.490G>T' + assert results['NM_001363846.1:c.490G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001363846.1:c.490G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363846.1:c.490G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363846.1:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350775.1:p.(Glu164Ter)', 'slr': 'NP_001350775.1:p.(E164*)'} + assert results['NM_001363846.1:c.490G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001363846.1:c.490G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001363846.1:c.490G>T']['alt_genomic_loci'], []) + assert results['NM_001363846.1:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'pos': '42991428', 'ref': 'C', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_001363846.1:c.490G>T']['primary_assembly_loci'].keys()) + assert results['NM_001363846.1:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'pos': '42991428', 'ref': 'C', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_001363846.1:c.490G>T']['primary_assembly_loci'].keys()) + assert results['NM_001363846.1:c.490G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363846.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350775.1'} assert 'NM_002055.4:c.490G>T' in list(results.keys()) - assert results['NM_002055.4:c.490G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_002055.4:c.490G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_002055.4:c.490G>T']['alt_genomic_loci'], []) - assert results['NM_002055.4:c.490G>T']['gene_symbol'] == 'GFAP' - assert results['NM_002055.4:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002046.1:p.(Glu164Ter)', 'slr': 'NP_002046.1:p.(E164*)'} assert results['NM_002055.4:c.490G>T']['submitted_variant'] == '17-42991428-C-A' - assert results['NM_002055.4:c.490G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_002055.4:c.490G>T']['hgvs_lrg_variant'] == '' + assert results['NM_002055.4:c.490G>T']['gene_symbol'] == 'GFAP' + assert results['NM_002055.4:c.490G>T']['gene_ids'] == {'hgnc_id': 'HGNC:4235', 'entrez_gene_id': '2670', 'ucsc_id': 'uc002ihq.3', 'omim_id': ['137780']} assert results['NM_002055.4:c.490G>T']['hgvs_transcript_variant'] == 'NM_002055.4:c.490G>T' + assert results['NM_002055.4:c.490G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_002055.4:c.490G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002055.4:c.490G>T']['hgvs_refseqgene_variant'] == 'NG_008401.1:g.6487G>T' - assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} - assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} - assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '42991428', 'alt': 'A'}} - assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '44914060', 'alt': 'A'}} - assert results['NM_002055.4:c.490G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008401.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002046.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002055.4'} - + assert results['NM_002055.4:c.490G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002046.1:p.(Glu164Ter)', 'slr': 'NP_002046.1:p.(E164*)'} + assert results['NM_002055.4:c.490G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_002055.4:c.490G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_002055.4:c.490G>T']['alt_genomic_loci'], []) + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': 'chr17', 'pos': '42991428', 'ref': 'C', 'alt': 'A'}} + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': 'chr17', 'pos': '44914060', 'ref': 'C', 'alt': 'A'}} + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.42991428C>A', 'vcf': {'chr': '17', 'pos': '42991428', 'ref': 'C', 'alt': 'A'}} + assert results['NM_002055.4:c.490G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.44914060C>A', 'vcf': {'chr': '17', 'pos': '44914060', 'ref': 'C', 'alt': 'A'}} + assert results['NM_002055.4:c.490G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002055.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002046.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008401.1'} def test_variant257(self): variant = '17-48252809-A-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NR_135553.1:n.1022A>T' in list(results.keys()) - assert results['NR_135553.1:n.1022A>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_135553.1:n.1022A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_135553.1:n.1022A>T']['alt_genomic_loci'], []) - assert results['NR_135553.1:n.1022A>T']['gene_symbol'] == 'SGCA' - assert results['NR_135553.1:n.1022A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_135553.1:n.1022A>T']['submitted_variant'] == '17-48252809-A-T' - assert results['NR_135553.1:n.1022A>T']['genome_context_intronic_sequence'] == '' - assert results['NR_135553.1:n.1022A>T']['hgvs_lrg_variant'] == '' - assert results['NR_135553.1:n.1022A>T']['hgvs_transcript_variant'] == 'NR_135553.1:n.1022A>T' - assert results['NR_135553.1:n.1022A>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} - assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} - assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} - assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} - assert results['NR_135553.1:n.1022A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_135553.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_001135697.2:c.*11A>T' in list(results.keys()) + assert results['NM_001135697.2:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' + assert results['NM_001135697.2:c.*11A>T']['gene_symbol'] == 'SGCA' + assert results['NM_001135697.2:c.*11A>T']['gene_ids'] == {'hgnc_id': 'HGNC:10805', 'entrez_gene_id': '6442', 'ucsc_id': 'uc002iqi.4', 'omim_id': ['600119']} + assert results['NM_001135697.2:c.*11A>T']['hgvs_transcript_variant'] == 'NM_001135697.2:c.*11A>T' + assert results['NM_001135697.2:c.*11A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001135697.2:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001135697.2:c.*11A>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001135697.2:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129169.1:p.?', 'slr': 'NP_001129169.1:p.?'} + assert results['NM_001135697.2:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135697.2:c.*11A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001135697.2:c.*11A>T']['alt_genomic_loci'], []) + assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'pos': '48252809', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': 'chr17', 'pos': '50175448', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'pos': '48252809', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': '17', 'pos': '50175448', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001135697.2:c.*11A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1'} + + assert 'NM_000023.3:c.*11A>T' in list(results.keys()) + assert results['NM_000023.3:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' + assert results['NM_000023.3:c.*11A>T']['gene_symbol'] == 'SGCA' + assert results['NM_000023.3:c.*11A>T']['gene_ids'] == {'hgnc_id': 'HGNC:10805', 'entrez_gene_id': '6442', 'ucsc_id': 'uc002iqi.4', 'omim_id': ['600119']} + assert results['NM_000023.3:c.*11A>T']['hgvs_transcript_variant'] == 'NM_000023.3:c.*11A>T' + assert results['NM_000023.3:c.*11A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000023.3:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000023.3:c.*11A>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_000023.3:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000014.1(LRG_203p1):p.?', 'slr': 'NP_000014.1:p.?'} + assert results['NM_000023.3:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000023.3:c.*11A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000023.3:c.*11A>T']['alt_genomic_loci'], []) + assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'pos': '48252809', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': 'chr17', 'pos': '50175448', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'pos': '48252809', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': '17', 'pos': '50175448', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000023.3:c.*11A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000023.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000014.1'} assert 'NM_001135697.1:c.*11A>T' in list(results.keys()) - assert results['NM_001135697.1:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001135697.1:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001135697.1:c.*11A>T']['alt_genomic_loci'], []) - assert results['NM_001135697.1:c.*11A>T']['gene_symbol'] == 'SGCA' - assert results['NM_001135697.1:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129169.1:p.?', 'slr': 'NP_001129169.1:p.?'} assert results['NM_001135697.1:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' - assert results['NM_001135697.1:c.*11A>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001135697.1:c.*11A>T']['hgvs_lrg_variant'] == '' + assert results['NM_001135697.1:c.*11A>T']['gene_symbol'] == 'SGCA' + assert results['NM_001135697.1:c.*11A>T']['gene_ids'] == {'hgnc_id': 'HGNC:10805', 'entrez_gene_id': '6442', 'ucsc_id': 'uc002iqi.4', 'omim_id': ['600119']} assert results['NM_001135697.1:c.*11A>T']['hgvs_transcript_variant'] == 'NM_001135697.1:c.*11A>T' + assert results['NM_001135697.1:c.*11A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001135697.1:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001135697.1:c.*11A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001135697.1:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert results['NM_001135697.1:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129169.1:p.?', 'slr': 'NP_001129169.1:p.?'} + assert results['NM_001135697.1:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135697.1:c.*11A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001135697.1:c.*11A>T']['alt_genomic_loci'], []) + assert results['NM_001135697.1:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'pos': '48252809', 'ref': 'A', 'alt': 'T'}} assert 'hg38' not in list(results['NM_001135697.1:c.*11A>T']['primary_assembly_loci'].keys()) - assert results['NM_001135697.1:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert results['NM_001135697.1:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'pos': '48252809', 'ref': 'A', 'alt': 'T'}} assert 'grch38' not in list(results['NM_001135697.1:c.*11A>T']['primary_assembly_loci'].keys()) - assert results['NM_001135697.1:c.*11A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.1'} + assert results['NM_001135697.1:c.*11A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1'} - assert results['flag'] == 'gene_variant' assert 'NM_000023.2:c.*11A>T' in list(results.keys()) - assert results['NM_000023.2:c.*11A>T']['hgvs_lrg_transcript_variant'] == 'LRG_203t1:c.*11A>T' - assert results['NM_000023.2:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000023.2:c.*11A>T']['alt_genomic_loci'], []) - assert results['NM_000023.2:c.*11A>T']['gene_symbol'] == 'SGCA' - assert results['NM_000023.2:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000014.1(LRG_203p1):p.?', 'slr': 'NP_000014.1:p.?'} assert results['NM_000023.2:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' - assert results['NM_000023.2:c.*11A>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000023.2:c.*11A>T']['hgvs_lrg_variant'] == 'LRG_203:g.14444A>T' + assert results['NM_000023.2:c.*11A>T']['gene_symbol'] == 'SGCA' + assert results['NM_000023.2:c.*11A>T']['gene_ids'] == {'hgnc_id': 'HGNC:10805', 'entrez_gene_id': '6442', 'ucsc_id': 'uc002iqi.4', 'omim_id': ['600119']} assert results['NM_000023.2:c.*11A>T']['hgvs_transcript_variant'] == 'NM_000023.2:c.*11A>T' + assert results['NM_000023.2:c.*11A>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000023.2:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000023.2:c.*11A>T']['hgvs_refseqgene_variant'] == 'NG_008889.1:g.14444A>T' - assert results['NM_000023.2:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert results['NM_000023.2:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000014.1(LRG_203p1):p.?', 'slr': 'NP_000014.1:p.?'} + assert results['NM_000023.2:c.*11A>T']['hgvs_lrg_transcript_variant'] == 'LRG_203t1:c.*11A>T' + assert results['NM_000023.2:c.*11A>T']['hgvs_lrg_variant'] == 'LRG_203:g.14444A>T' + self.assertCountEqual(results['NM_000023.2:c.*11A>T']['alt_genomic_loci'], []) + assert results['NM_000023.2:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'pos': '48252809', 'ref': 'A', 'alt': 'T'}} assert 'hg38' not in list(results['NM_000023.2:c.*11A>T']['primary_assembly_loci'].keys()) - assert results['NM_000023.2:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} + assert results['NM_000023.2:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'pos': '48252809', 'ref': 'A', 'alt': 'T'}} assert 'grch38' not in list(results['NM_000023.2:c.*11A>T']['primary_assembly_loci'].keys()) - assert results['NM_000023.2:c.*11A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008889.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000023.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_203.xml'} - - assert 'NM_001135697.2:c.*11A>T' in list(results.keys()) - assert results['NM_001135697.2:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001135697.2:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001135697.2:c.*11A>T']['alt_genomic_loci'], []) - assert results['NM_001135697.2:c.*11A>T']['gene_symbol'] == 'SGCA' - assert results['NM_001135697.2:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129169.1:p.?', 'slr': 'NP_001129169.1:p.?'} - assert results['NM_001135697.2:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' - assert results['NM_001135697.2:c.*11A>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001135697.2:c.*11A>T']['hgvs_lrg_variant'] == '' - assert results['NM_001135697.2:c.*11A>T']['hgvs_transcript_variant'] == 'NM_001135697.2:c.*11A>T' - assert results['NM_001135697.2:c.*11A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} - assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} - assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} - assert results['NM_001135697.2:c.*11A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} - assert results['NM_001135697.2:c.*11A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129169.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135697.2'} - - assert 'NM_000023.3:c.*11A>T' in list(results.keys()) - assert results['NM_000023.3:c.*11A>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000023.3:c.*11A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000023.3:c.*11A>T']['alt_genomic_loci'], []) - assert results['NM_000023.3:c.*11A>T']['gene_symbol'] == 'SGCA' - assert results['NM_000023.3:c.*11A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000014.1(LRG_203p1):p.?', 'slr': 'NP_000014.1:p.?'} - assert results['NM_000023.3:c.*11A>T']['submitted_variant'] == '17-48252809-A-T' - assert results['NM_000023.3:c.*11A>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000023.3:c.*11A>T']['hgvs_lrg_variant'] == '' - assert results['NM_000023.3:c.*11A>T']['hgvs_transcript_variant'] == 'NM_000023.3:c.*11A>T' - assert results['NM_000023.3:c.*11A>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} - assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': 'chr17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} - assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '48252809', 'alt': 'T'}} - assert results['NM_000023.3:c.*11A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': '17', 'ref': 'A', 'pos': '50175448', 'alt': 'T'}} - assert results['NM_000023.3:c.*11A>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000023.3'} + assert results['NM_000023.2:c.*11A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000023.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000014.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008889.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_203.xml'} + assert 'NR_135553.1:n.1022A>T' in list(results.keys()) + assert results['NR_135553.1:n.1022A>T']['submitted_variant'] == '17-48252809-A-T' + assert results['NR_135553.1:n.1022A>T']['gene_symbol'] == 'SGCA' + assert results['NR_135553.1:n.1022A>T']['gene_ids'] == {'hgnc_id': 'HGNC:10805', 'entrez_gene_id': '6442', 'ucsc_id': 'uc002iqi.4', 'omim_id': ['600119']} + assert results['NR_135553.1:n.1022A>T']['hgvs_transcript_variant'] == 'NR_135553.1:n.1022A>T' + assert results['NR_135553.1:n.1022A>T']['genome_context_intronic_sequence'] == '' + assert results['NR_135553.1:n.1022A>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_135553.1:n.1022A>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_135553.1:n.1022A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_135553.1:n.1022A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_135553.1:n.1022A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_135553.1:n.1022A>T']['alt_genomic_loci'], []) + assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': 'chr17', 'pos': '48252809', 'ref': 'A', 'alt': 'T'}} + assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': 'chr17', 'pos': '50175448', 'ref': 'A', 'alt': 'T'}} + assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48252809A>T', 'vcf': {'chr': '17', 'pos': '48252809', 'ref': 'A', 'alt': 'T'}} + assert results['NR_135553.1:n.1022A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50175448A>T', 'vcf': {'chr': '17', 'pos': '50175448', 'ref': 'A', 'alt': 'T'}} + assert results['NR_135553.1:n.1022A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_135553.1'} def test_variant258(self): variant = '17-62022709-G-GTC' @@ -12113,22 +12462,22 @@ def test_variant258(self): assert results['flag'] == 'gene_variant' assert 'NM_000334.4:c.3720+9_3720+10dup' in list(results.keys()) - assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000334.4:c.3720+9_3720+10dup']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3720+9_3720+10dup' - self.assertCountEqual(results['NM_000334.4:c.3720+9_3720+10dup']['alt_genomic_loci'], []) - assert results['NM_000334.4:c.3720+9_3720+10dup']['gene_symbol'] == 'SCN4A' - assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} assert results['NM_000334.4:c.3720+9_3720+10dup']['submitted_variant'] == '17-62022709-G-GTC' - assert results['NM_000334.4:c.3720+9_3720+10dup']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3720+9_3720+10dup' - assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_lrg_variant'] == '' + assert results['NM_000334.4:c.3720+9_3720+10dup']['gene_symbol'] == 'SCN4A' + assert results['NM_000334.4:c.3720+9_3720+10dup']['gene_ids'] == {'hgnc_id': 'HGNC:10591', 'entrez_gene_id': '6329', 'ucsc_id': 'uc060iti.1', 'omim_id': ['603967']} assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_transcript_variant'] == 'NM_000334.4:c.3720+9_3720+10dup' + assert results['NM_000334.4:c.3720+9_3720+10dup']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3720+9_3720+10dup' + assert results['NM_000334.4:c.3720+9_3720+10dup']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3720+9_3720+10dup' assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.32568_32569dup' - assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022710_62022711dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '62022709', 'alt': 'GTC'}} - assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945350_63945351dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '63945349', 'alt': 'GTC'}} - assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022710_62022711dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '62022709', 'alt': 'GTC'}} - assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945350_63945351dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '63945349', 'alt': 'GTC'}} - assert results['NM_000334.4:c.3720+9_3720+10dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} - + assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} + assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000334.4:c.3720+9_3720+10dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000334.4:c.3720+9_3720+10dup']['alt_genomic_loci'], []) + assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022710_62022711dup', 'vcf': {'chr': 'chr17', 'pos': '62022709', 'ref': 'G', 'alt': 'GTC'}} + assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945350_63945351dup', 'vcf': {'chr': 'chr17', 'pos': '63945349', 'ref': 'G', 'alt': 'GTC'}} + assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022710_62022711dup', 'vcf': {'chr': '17', 'pos': '62022709', 'ref': 'G', 'alt': 'GTC'}} + assert results['NM_000334.4:c.3720+9_3720+10dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945350_63945351dup', 'vcf': {'chr': '17', 'pos': '63945349', 'ref': 'G', 'alt': 'GTC'}} + assert results['NM_000334.4:c.3720+9_3720+10dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1'} def test_variant259(self): variant = '17-62022711-C-CT' @@ -12137,22 +12486,22 @@ def test_variant259(self): assert results['flag'] == 'gene_variant' assert 'NM_000334.4:c.3720+8_3720+9insA' in list(results.keys()) - assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000334.4:c.3720+8_3720+9insA']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3720+8_3720+9insA' - self.assertCountEqual(results['NM_000334.4:c.3720+8_3720+9insA']['alt_genomic_loci'], []) - assert results['NM_000334.4:c.3720+8_3720+9insA']['gene_symbol'] == 'SCN4A' - assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} assert results['NM_000334.4:c.3720+8_3720+9insA']['submitted_variant'] == '17-62022711-C-CT' - assert results['NM_000334.4:c.3720+8_3720+9insA']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3720+8_3720+9insA' - assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_lrg_variant'] == '' + assert results['NM_000334.4:c.3720+8_3720+9insA']['gene_symbol'] == 'SCN4A' + assert results['NM_000334.4:c.3720+8_3720+9insA']['gene_ids'] == {'hgnc_id': 'HGNC:10591', 'entrez_gene_id': '6329', 'ucsc_id': 'uc060iti.1', 'omim_id': ['603967']} assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_transcript_variant'] == 'NM_000334.4:c.3720+8_3720+9insA' + assert results['NM_000334.4:c.3720+8_3720+9insA']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3720+8_3720+9insA' + assert results['NM_000334.4:c.3720+8_3720+9insA']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3720+8_3720+9insA' assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.32567_32568insA' - assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022711_62022712insT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '62022711', 'alt': 'CT'}} - assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945351_63945352insT', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '63945351', 'alt': 'CT'}} - assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022711_62022712insT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '62022711', 'alt': 'CT'}} - assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945351_63945352insT', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '63945351', 'alt': 'CT'}} - assert results['NM_000334.4:c.3720+8_3720+9insA']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} - + assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} + assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000334.4:c.3720+8_3720+9insA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000334.4:c.3720+8_3720+9insA']['alt_genomic_loci'], []) + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022711_62022712insT', 'vcf': {'chr': 'chr17', 'pos': '62022711', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945351_63945352insT', 'vcf': {'chr': 'chr17', 'pos': '63945351', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62022711_62022712insT', 'vcf': {'chr': '17', 'pos': '62022711', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945351_63945352insT', 'vcf': {'chr': '17', 'pos': '63945351', 'ref': 'C', 'alt': 'CT'}} + assert results['NM_000334.4:c.3720+8_3720+9insA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1'} def test_variant260(self): variant = '17-62023005-G-GGC' @@ -12161,22 +12510,22 @@ def test_variant260(self): assert results['flag'] == 'gene_variant' assert 'NM_000334.4:c.3442-8_3442-7insGC' in list(results.keys()) - assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000334.4:c.3442-8_3442-7insGC']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3442-8_3442-7insGC' - self.assertCountEqual(results['NM_000334.4:c.3442-8_3442-7insGC']['alt_genomic_loci'], []) - assert results['NM_000334.4:c.3442-8_3442-7insGC']['gene_symbol'] == 'SCN4A' - assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} assert results['NM_000334.4:c.3442-8_3442-7insGC']['submitted_variant'] == '17-62023005-G-GGC' - assert results['NM_000334.4:c.3442-8_3442-7insGC']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3442-8_3442-7insGC' - assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_lrg_variant'] == '' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['gene_symbol'] == 'SCN4A' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['gene_ids'] == {'hgnc_id': 'HGNC:10591', 'entrez_gene_id': '6329', 'ucsc_id': 'uc060iti.1', 'omim_id': ['603967']} assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_transcript_variant'] == 'NM_000334.4:c.3442-8_3442-7insGC' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3442-8_3442-7insGC' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3442-8_3442-7insGC' assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.32273_32274insGC' - assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023005_62023006insGC', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '62023005', 'alt': 'GGC'}} - assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945645_63945646insGC', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '63945645', 'alt': 'GGC'}} - assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023005_62023006insGC', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '62023005', 'alt': 'GGC'}} - assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945645_63945646insGC', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '63945645', 'alt': 'GGC'}} - assert results['NM_000334.4:c.3442-8_3442-7insGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} - + assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000334.4:c.3442-8_3442-7insGC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000334.4:c.3442-8_3442-7insGC']['alt_genomic_loci'], []) + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023005_62023006insGC', 'vcf': {'chr': 'chr17', 'pos': '62023005', 'ref': 'G', 'alt': 'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945645_63945646insGC', 'vcf': {'chr': 'chr17', 'pos': '63945645', 'ref': 'G', 'alt': 'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023005_62023006insGC', 'vcf': {'chr': '17', 'pos': '62023005', 'ref': 'G', 'alt': 'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945645_63945646insGC', 'vcf': {'chr': '17', 'pos': '63945645', 'ref': 'G', 'alt': 'GGC'}} + assert results['NM_000334.4:c.3442-8_3442-7insGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1'} def test_variant261(self): variant = '17-62023006-C-A' @@ -12185,22 +12534,22 @@ def test_variant261(self): assert results['flag'] == 'gene_variant' assert 'NM_000334.4:c.3442-8G>T' in list(results.keys()) - assert results['NM_000334.4:c.3442-8G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000334.4:c.3442-8G>T']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3442-8G>T' - self.assertCountEqual(results['NM_000334.4:c.3442-8G>T']['alt_genomic_loci'], []) - assert results['NM_000334.4:c.3442-8G>T']['gene_symbol'] == 'SCN4A' - assert results['NM_000334.4:c.3442-8G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} assert results['NM_000334.4:c.3442-8G>T']['submitted_variant'] == '17-62023006-C-A' - assert results['NM_000334.4:c.3442-8G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3442-8G>T' - assert results['NM_000334.4:c.3442-8G>T']['hgvs_lrg_variant'] == '' + assert results['NM_000334.4:c.3442-8G>T']['gene_symbol'] == 'SCN4A' + assert results['NM_000334.4:c.3442-8G>T']['gene_ids'] == {'hgnc_id': 'HGNC:10591', 'entrez_gene_id': '6329', 'ucsc_id': 'uc060iti.1', 'omim_id': ['603967']} assert results['NM_000334.4:c.3442-8G>T']['hgvs_transcript_variant'] == 'NM_000334.4:c.3442-8G>T' + assert results['NM_000334.4:c.3442-8G>T']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000334.4):c.3442-8G>T' + assert results['NM_000334.4:c.3442-8G>T']['refseqgene_context_intronic_sequence'] == 'NG_011699.1(NM_000334.4):c.3442-8G>T' assert results['NM_000334.4:c.3442-8G>T']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.32273G>T' - assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023006C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '62023006', 'alt': 'A'}} - assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945646C>A', 'vcf': {'chr': 'chr17', 'ref': 'C', 'pos': '63945646', 'alt': 'A'}} - assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023006C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '62023006', 'alt': 'A'}} - assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945646C>A', 'vcf': {'chr': '17', 'ref': 'C', 'pos': '63945646', 'alt': 'A'}} - assert results['NM_000334.4:c.3442-8G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} - + assert results['NM_000334.4:c.3442-8G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.?', 'slr': 'NP_000325.4:p.?'} + assert results['NM_000334.4:c.3442-8G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000334.4:c.3442-8G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000334.4:c.3442-8G>T']['alt_genomic_loci'], []) + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023006C>A', 'vcf': {'chr': 'chr17', 'pos': '62023006', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945646C>A', 'vcf': {'chr': 'chr17', 'pos': '63945646', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62023006C>A', 'vcf': {'chr': '17', 'pos': '62023006', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000334.4:c.3442-8G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63945646C>A', 'vcf': {'chr': '17', 'pos': '63945646', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000334.4:c.3442-8G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1'} def test_variant262(self): variant = '17-62034787-G-A' @@ -12209,148 +12558,154 @@ def test_variant262(self): assert results['flag'] == 'gene_variant' assert 'NM_000334.4:c.2111C>T' in list(results.keys()) - assert results['NM_000334.4:c.2111C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000334.4:c.2111C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000334.4:c.2111C>T']['alt_genomic_loci'], []) - assert results['NM_000334.4:c.2111C>T']['gene_symbol'] == 'SCN4A' - assert results['NM_000334.4:c.2111C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.(Thr704Met)', 'slr': 'NP_000325.4:p.(T704M)'} assert results['NM_000334.4:c.2111C>T']['submitted_variant'] == '17-62034787-G-A' - assert results['NM_000334.4:c.2111C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000334.4:c.2111C>T']['hgvs_lrg_variant'] == '' + assert results['NM_000334.4:c.2111C>T']['gene_symbol'] == 'SCN4A' + assert results['NM_000334.4:c.2111C>T']['gene_ids'] == {'hgnc_id': 'HGNC:10591', 'entrez_gene_id': '6329', 'ucsc_id': 'uc060iti.1', 'omim_id': ['603967']} assert results['NM_000334.4:c.2111C>T']['hgvs_transcript_variant'] == 'NM_000334.4:c.2111C>T' + assert results['NM_000334.4:c.2111C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000334.4:c.2111C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000334.4:c.2111C>T']['hgvs_refseqgene_variant'] == 'NG_011699.1:g.20492C>T' - assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62034787G>A', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '62034787', 'alt': 'A'}} - assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63957427G>A', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '63957427', 'alt': 'A'}} - assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62034787G>A', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '62034787', 'alt': 'A'}} - assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63957427G>A', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '63957427', 'alt': 'A'}} - assert results['NM_000334.4:c.2111C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4'} - + assert results['NM_000334.4:c.2111C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000325.4:p.(Thr704Met)', 'slr': 'NP_000325.4:p.(T704M)'} + assert results['NM_000334.4:c.2111C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000334.4:c.2111C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000334.4:c.2111C>T']['alt_genomic_loci'], []) + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.62034787G>A', 'vcf': {'chr': 'chr17', 'pos': '62034787', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63957427G>A', 'vcf': {'chr': 'chr17', 'pos': '63957427', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.62034787G>A', 'vcf': {'chr': '17', 'pos': '62034787', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000334.4:c.2111C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.63957427G>A', 'vcf': {'chr': '17', 'pos': '63957427', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000334.4:c.2111C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000334.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000325.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011699.1'} def test_variant263(self): variant = '18-24128261-GTCCTCC-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001351443.1:c.-16+941_-16+946del' in list(results.keys()) - assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001351443.1:c.-16+941_-16+946del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001351443.1:c.-16+941_-16+946del']['alt_genomic_loci'], []) - assert results['NM_001351443.1:c.-16+941_-16+946del']['gene_symbol'] == 'KCTD1' - assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001338372.1:p.?', 'slr': 'NP_001338372.1:p.?'} - assert results['NM_001351443.1:c.-16+941_-16+946del']['submitted_variant'] == '18-24128261-GTCCTCC-G' - assert results['NM_001351443.1:c.-16+941_-16+946del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001351443.1):c.-16+941_-16+946del' - assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_lrg_variant'] == '' - assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_transcript_variant'] == 'NM_001351443.1:c.-16+941_-16+946del' - assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001351443.1:c.-16+941_-16+946del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338372.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351443.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_001258221.1:c.-16+1426_-16+1431del' in list(results.keys()) + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['submitted_variant'] == '18-24128261-GTCCTCC-G' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['gene_symbol'] == 'KCTD1' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['gene_ids'] == {'hgnc_id': 'HGNC:18249', 'entrez_gene_id': '284252', 'ucsc_id': 'uc010xbk.5', 'omim_id': ['613420']} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_transcript_variant'] == 'NM_001258221.1:c.-16+1426_-16+1431del' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001258221.1):c.-16+1426_-16+1431del' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245150.1:p.?', 'slr': 'NP_001245150.1:p.?'} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001258221.1:c.-16+1426_-16+1431del']['alt_genomic_loci'], []) + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258221.1:c.-16+1426_-16+1431del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258221.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245150.1'} assert 'NM_001258222.1:c.10-47053_10-47048del' in list(results.keys()) - assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001258222.1:c.10-47053_10-47048del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001258222.1:c.10-47053_10-47048del']['alt_genomic_loci'], []) - assert results['NM_001258222.1:c.10-47053_10-47048del']['gene_symbol'] == 'KCTD1' - assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245151.1:p.?', 'slr': 'NP_001245151.1:p.?'} assert results['NM_001258222.1:c.10-47053_10-47048del']['submitted_variant'] == '18-24128261-GTCCTCC-G' - assert results['NM_001258222.1:c.10-47053_10-47048del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001258222.1):c.10-47053_10-47048del' - assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_lrg_variant'] == '' + assert results['NM_001258222.1:c.10-47053_10-47048del']['gene_symbol'] == 'KCTD1' + assert results['NM_001258222.1:c.10-47053_10-47048del']['gene_ids'] == {'hgnc_id': 'HGNC:18249', 'entrez_gene_id': '284252', 'ucsc_id': 'uc010xbk.5', 'omim_id': ['613420']} assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_transcript_variant'] == 'NM_001258222.1:c.10-47053_10-47048del' + assert results['NM_001258222.1:c.10-47053_10-47048del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001258222.1):c.10-47053_10-47048del' + assert results['NM_001258222.1:c.10-47053_10-47048del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001258222.1:c.10-47053_10-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.1'} - - assert 'NM_001258221.1:c.-16+1426_-16+1431del' in list(results.keys()) - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001258221.1:c.-16+1426_-16+1431del']['alt_genomic_loci'], []) - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['gene_symbol'] == 'KCTD1' - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245150.1:p.?', 'slr': 'NP_001245150.1:p.?'} - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['submitted_variant'] == '18-24128261-GTCCTCC-G' - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001258221.1):c.-16+1426_-16+1431del' - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_lrg_variant'] == '' - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_transcript_variant'] == 'NM_001258221.1:c.-16+1426_-16+1431del' - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001258221.1:c.-16+1426_-16+1431del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245150.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258221.1'} + assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245151.1:p.?', 'slr': 'NP_001245151.1:p.?'} + assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001258222.1:c.10-47053_10-47048del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001258222.1:c.10-47053_10-47048del']['alt_genomic_loci'], []) + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258222.1:c.10-47053_10-47048del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1'} - assert 'NM_001258222.2:c.10-47053_10-47048del' in list(results.keys()) - assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001258222.2:c.10-47053_10-47048del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001258222.2:c.10-47053_10-47048del']['alt_genomic_loci'], []) - assert results['NM_001258222.2:c.10-47053_10-47048del']['gene_symbol'] == 'KCTD1' - assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245151.1:p.?', 'slr': 'NP_001245151.1:p.?'} - assert results['NM_001258222.2:c.10-47053_10-47048del']['submitted_variant'] == '18-24128261-GTCCTCC-G' - assert results['NM_001258222.2:c.10-47053_10-47048del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001258222.2):c.10-47053_10-47048del' - assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_lrg_variant'] == '' - assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_transcript_variant'] == 'NM_001258222.2:c.10-47053_10-47048del' - assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001258222.2:c.10-47053_10-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.2'} + assert 'NM_198991.3:c.-15-47053_-15-47048del' in list(results.keys()) + assert results['NM_198991.3:c.-15-47053_-15-47048del']['submitted_variant'] == '18-24128261-GTCCTCC-G' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['gene_symbol'] == 'KCTD1' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['gene_ids'] == {'hgnc_id': 'HGNC:18249', 'entrez_gene_id': '284252', 'ucsc_id': 'uc010xbk.5', 'omim_id': ['613420']} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_transcript_variant'] == 'NM_198991.3:c.-15-47053_-15-47048del' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_198991.3):c.-15-47053_-15-47048del' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_refseqgene_variant'] == '' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_945342.1:p.?', 'slr': 'NP_945342.1:p.?'} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_198991.3:c.-15-47053_-15-47048del']['alt_genomic_loci'], []) + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_198991.3:c.-15-47053_-15-47048del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198991.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_945342.1'} + + assert 'NM_001351443.1:c.-16+941_-16+946del' in list(results.keys()) + assert results['NM_001351443.1:c.-16+941_-16+946del']['submitted_variant'] == '18-24128261-GTCCTCC-G' + assert results['NM_001351443.1:c.-16+941_-16+946del']['gene_symbol'] == 'KCTD1' + assert results['NM_001351443.1:c.-16+941_-16+946del']['gene_ids'] == {'hgnc_id': 'HGNC:18249', 'entrez_gene_id': '284252', 'ucsc_id': 'uc010xbk.5', 'omim_id': ['613420']} + assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_transcript_variant'] == 'NM_001351443.1:c.-16+941_-16+946del' + assert results['NM_001351443.1:c.-16+941_-16+946del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001351443.1):c.-16+941_-16+946del' + assert results['NM_001351443.1:c.-16+941_-16+946del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001338372.1:p.?', 'slr': 'NP_001338372.1:p.?'} + assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001351443.1:c.-16+941_-16+946del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001351443.1:c.-16+941_-16+946del']['alt_genomic_loci'], []) + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001351443.1:c.-16+941_-16+946del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001351443.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001338372.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001136205.2:c.-16+588_-16+593del' in list(results.keys()) - assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001136205.2:c.-16+588_-16+593del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001136205.2:c.-16+588_-16+593del']['alt_genomic_loci'], []) - assert results['NM_001136205.2:c.-16+588_-16+593del']['gene_symbol'] == 'KCTD1' - assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129677.1:p.?', 'slr': 'NP_001129677.1:p.?'} assert results['NM_001136205.2:c.-16+588_-16+593del']['submitted_variant'] == '18-24128261-GTCCTCC-G' - assert results['NM_001136205.2:c.-16+588_-16+593del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001136205.2):c.-16+588_-16+593del' - assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_lrg_variant'] == '' + assert results['NM_001136205.2:c.-16+588_-16+593del']['gene_symbol'] == 'KCTD1' + assert results['NM_001136205.2:c.-16+588_-16+593del']['gene_ids'] == {'hgnc_id': 'HGNC:18249', 'entrez_gene_id': '284252', 'ucsc_id': 'uc010xbk.5', 'omim_id': ['613420']} assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_transcript_variant'] == 'NM_001136205.2:c.-16+588_-16+593del' + assert results['NM_001136205.2:c.-16+588_-16+593del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001136205.2):c.-16+588_-16+593del' + assert results['NM_001136205.2:c.-16+588_-16+593del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001136205.2:c.-16+588_-16+593del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129677.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001136205.2'} + assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129677.1:p.?', 'slr': 'NP_001129677.1:p.?'} + assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001136205.2:c.-16+588_-16+593del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001136205.2:c.-16+588_-16+593del']['alt_genomic_loci'], []) + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001136205.2:c.-16+588_-16+593del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001136205.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129677.1'} - assert 'NM_198991.3:c.-15-47053_-15-47048del' in list(results.keys()) - assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_198991.3:c.-15-47053_-15-47048del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_198991.3:c.-15-47053_-15-47048del']['alt_genomic_loci'], []) - assert results['NM_198991.3:c.-15-47053_-15-47048del']['gene_symbol'] == 'KCTD1' - assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_945342.1:p.?', 'slr': 'NP_945342.1:p.?'} - assert results['NM_198991.3:c.-15-47053_-15-47048del']['submitted_variant'] == '18-24128261-GTCCTCC-G' - assert results['NM_198991.3:c.-15-47053_-15-47048del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_198991.3):c.-15-47053_-15-47048del' - assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_lrg_variant'] == '' - assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_transcript_variant'] == 'NM_198991.3:c.-15-47053_-15-47048del' - assert results['NM_198991.3:c.-15-47053_-15-47048del']['hgvs_refseqgene_variant'] == '' - assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_198991.3:c.-15-47053_-15-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_198991.3:c.-15-47053_-15-47048del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_945342.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198991.3'} + assert 'NM_001258222.2:c.10-47053_10-47048del' in list(results.keys()) + assert results['NM_001258222.2:c.10-47053_10-47048del']['submitted_variant'] == '18-24128261-GTCCTCC-G' + assert results['NM_001258222.2:c.10-47053_10-47048del']['gene_symbol'] == 'KCTD1' + assert results['NM_001258222.2:c.10-47053_10-47048del']['gene_ids'] == {'hgnc_id': 'HGNC:18249', 'entrez_gene_id': '284252', 'ucsc_id': 'uc010xbk.5', 'omim_id': ['613420']} + assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_transcript_variant'] == 'NM_001258222.2:c.10-47053_10-47048del' + assert results['NM_001258222.2:c.10-47053_10-47048del']['genome_context_intronic_sequence'] == 'NC_000018.9(NM_001258222.2):c.10-47053_10-47048del' + assert results['NM_001258222.2:c.10-47053_10-47048del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001245151.1:p.?', 'slr': 'NP_001245151.1:p.?'} + assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001258222.2:c.10-47053_10-47048del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001258222.2:c.10-47053_10-47048del']['alt_genomic_loci'], []) + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001258222.2:c.10-47053_10-47048del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001258222.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001245151.1'} assert 'NM_001142730.2:c.234_239del' in list(results.keys()) - assert results['NM_001142730.2:c.234_239del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001142730.2:c.234_239del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001142730.2:c.234_239del']['alt_genomic_loci'], []) - assert results['NM_001142730.2:c.234_239del']['gene_symbol'] == 'KCTD1' - assert results['NM_001142730.2:c.234_239del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001136202.1:p.(Glu78_Glu79del)', 'slr': 'NP_001136202.1:p.(E78_E79del)'} assert results['NM_001142730.2:c.234_239del']['submitted_variant'] == '18-24128261-GTCCTCC-G' - assert results['NM_001142730.2:c.234_239del']['genome_context_intronic_sequence'] == '' - assert results['NM_001142730.2:c.234_239del']['hgvs_lrg_variant'] == '' + assert results['NM_001142730.2:c.234_239del']['gene_symbol'] == 'KCTD1' + assert results['NM_001142730.2:c.234_239del']['gene_ids'] == {'hgnc_id': 'HGNC:18249', 'entrez_gene_id': '284252', 'ucsc_id': 'uc010xbk.5', 'omim_id': ['613420']} assert results['NM_001142730.2:c.234_239del']['hgvs_transcript_variant'] == 'NM_001142730.2:c.234_239del' + assert results['NM_001142730.2:c.234_239del']['genome_context_intronic_sequence'] == '' + assert results['NM_001142730.2:c.234_239del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001142730.2:c.234_239del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '24128261', 'alt': 'G'}} - assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'ref': 'GTCCTCC', 'pos': '26548297', 'alt': 'G'}} - assert results['NM_001142730.2:c.234_239del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001136202.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001142730.2'} - + assert results['NM_001142730.2:c.234_239del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001136202.1:p.(Glu78_Glu79del)', 'slr': 'NP_001136202.1:p.(E78_E79del)'} + assert results['NM_001142730.2:c.234_239del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001142730.2:c.234_239del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001142730.2:c.234_239del']['alt_genomic_loci'], []) + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': 'chr18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': 'chr18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000018.9:g.24128273_24128278del', 'vcf': {'chr': '18', 'pos': '24128261', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000018.10:g.26548309_26548314del', 'vcf': {'chr': '18', 'pos': '26548297', 'ref': 'GTCCTCC', 'alt': 'G'}} + assert results['NM_001142730.2:c.234_239del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001142730.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001136202.1'} def test_variant264(self): variant = '19-15291774-G-A' @@ -12359,22 +12714,22 @@ def test_variant264(self): assert results['flag'] == 'gene_variant' assert 'NM_000435.2:c.2992C>T' in list(results.keys()) - assert results['NM_000435.2:c.2992C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000435.2:c.2992C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000435.2:c.2992C>T']['alt_genomic_loci'], []) - assert results['NM_000435.2:c.2992C>T']['gene_symbol'] == 'NOTCH3' - assert results['NM_000435.2:c.2992C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000426.2:p.(Gln998Ter)', 'slr': 'NP_000426.2:p.(Q998*)'} assert results['NM_000435.2:c.2992C>T']['submitted_variant'] == '19-15291774-G-A' - assert results['NM_000435.2:c.2992C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000435.2:c.2992C>T']['hgvs_lrg_variant'] == '' + assert results['NM_000435.2:c.2992C>T']['gene_symbol'] == 'NOTCH3' + assert results['NM_000435.2:c.2992C>T']['gene_ids'] == {'hgnc_id': 'HGNC:7883', 'entrez_gene_id': '4854', 'ucsc_id': 'uc002nan.4', 'omim_id': ['600276']} assert results['NM_000435.2:c.2992C>T']['hgvs_transcript_variant'] == 'NM_000435.2:c.2992C>T' + assert results['NM_000435.2:c.2992C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000435.2:c.2992C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000435.2:c.2992C>T']['hgvs_refseqgene_variant'] == 'NG_009819.1:g.25019C>T' - assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.15291774G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '15291774', 'alt': 'A'}} - assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15180963G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '15180963', 'alt': 'A'}} - assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.15291774G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '15291774', 'alt': 'A'}} - assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15180963G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '15180963', 'alt': 'A'}} - assert results['NM_000435.2:c.2992C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009819.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000426.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000435.2'} - + assert results['NM_000435.2:c.2992C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000426.2:p.(Gln998Ter)', 'slr': 'NP_000426.2:p.(Q998*)'} + assert results['NM_000435.2:c.2992C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000435.2:c.2992C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000435.2:c.2992C>T']['alt_genomic_loci'], []) + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.15291774G>A', 'vcf': {'chr': 'chr19', 'pos': '15291774', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15180963G>A', 'vcf': {'chr': 'chr19', 'pos': '15180963', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.15291774G>A', 'vcf': {'chr': '19', 'pos': '15291774', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000435.2:c.2992C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15180963G>A', 'vcf': {'chr': '19', 'pos': '15180963', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000435.2:c.2992C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000435.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000426.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009819.1'} def test_variant265(self): variant = '19-15311794-A-G' @@ -12383,23 +12738,23 @@ def test_variant265(self): assert results['flag'] == 'intergenic' assert 'intergenic_variant_1' in list(results.keys()) - assert results['intergenic_variant_1']['hgvs_lrg_transcript_variant'] == '' - assert results['intergenic_variant_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['intergenic_variant_1']['alt_genomic_loci'], []) - assert results['intergenic_variant_1']['gene_symbol'] == '' - assert results['intergenic_variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['intergenic_variant_1']['submitted_variant'] == '19-15311794-A-G' - assert results['intergenic_variant_1']['genome_context_intronic_sequence'] == '' - assert results['intergenic_variant_1']['hgvs_lrg_variant'] == '' + assert results['intergenic_variant_1']['gene_symbol'] == '' + assert results['intergenic_variant_1']['gene_ids'] == {} assert results['intergenic_variant_1']['hgvs_transcript_variant'] == '' + assert results['intergenic_variant_1']['genome_context_intronic_sequence'] == '' + assert results['intergenic_variant_1']['refseqgene_context_intronic_sequence'] == '' assert results['intergenic_variant_1']['hgvs_refseqgene_variant'] == 'NG_009819.1:g.4999T>C' - assert results['intergenic_variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.15311794A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} - assert results['intergenic_variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15200983A>G', 'vcf': {'chr': 'chr19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} - assert results['intergenic_variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.15311794A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15311794', 'alt': 'G'}} - assert results['intergenic_variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15200983A>G', 'vcf': {'chr': '19', 'ref': 'A', 'pos': '15200983', 'alt': 'G'}} + assert results['intergenic_variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['intergenic_variant_1']['hgvs_lrg_transcript_variant'] == '' + assert results['intergenic_variant_1']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['intergenic_variant_1']['alt_genomic_loci'], []) + assert results['intergenic_variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.15311794A>G', 'vcf': {'chr': 'chr19', 'pos': '15311794', 'ref': 'A', 'alt': 'G'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15200983A>G', 'vcf': {'chr': 'chr19', 'pos': '15200983', 'ref': 'A', 'alt': 'G'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.15311794A>G', 'vcf': {'chr': '19', 'pos': '15311794', 'ref': 'A', 'alt': 'G'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.15200983A>G', 'vcf': {'chr': '19', 'pos': '15200983', 'ref': 'A', 'alt': 'G'}} assert results['intergenic_variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009819.1'} - def test_variant266(self): variant = '19-39076592-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() @@ -12407,2016 +12762,2122 @@ def test_variant266(self): assert results['flag'] == 'gene_variant' assert 'NM_000540.2:c.14818G>A' in list(results.keys()) - assert results['NM_000540.2:c.14818G>A']['hgvs_lrg_transcript_variant'] == 'LRG_766t1:c.14818G>A' - assert results['NM_000540.2:c.14818G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000540.2:c.14818G>A']['alt_genomic_loci'], []) - assert results['NM_000540.2:c.14818G>A']['gene_symbol'] == 'RYR1' - assert results['NM_000540.2:c.14818G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000531.2(LRG_766p1):p.(Ala4940Thr)', 'slr': 'NP_000531.2:p.(A4940T)'} assert results['NM_000540.2:c.14818G>A']['submitted_variant'] == '19-39076592-G-A' - assert results['NM_000540.2:c.14818G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_000540.2:c.14818G>A']['hgvs_lrg_variant'] == 'LRG_766:g.157253G>A' + assert results['NM_000540.2:c.14818G>A']['gene_symbol'] == 'RYR1' + assert results['NM_000540.2:c.14818G>A']['gene_ids'] == {'hgnc_id': 'HGNC:10483', 'entrez_gene_id': '6261', 'ucsc_id': 'uc002oit.4', 'omim_id': ['180901']} assert results['NM_000540.2:c.14818G>A']['hgvs_transcript_variant'] == 'NM_000540.2:c.14818G>A' + assert results['NM_000540.2:c.14818G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_000540.2:c.14818G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000540.2:c.14818G>A']['hgvs_refseqgene_variant'] == 'NG_008866.1:g.157253G>A' - assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '39076592', 'alt': 'A'}} - assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '38585952', 'alt': 'A'}} - assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '39076592', 'alt': 'A'}} - assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '38585952', 'alt': 'A'}} - assert results['NM_000540.2:c.14818G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008866.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000531.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000540.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_766.xml'} + assert results['NM_000540.2:c.14818G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000531.2(LRG_766p1):p.(Ala4940Thr)', 'slr': 'NP_000531.2:p.(A4940T)'} + assert results['NM_000540.2:c.14818G>A']['hgvs_lrg_transcript_variant'] == 'LRG_766t1:c.14818G>A' + assert results['NM_000540.2:c.14818G>A']['hgvs_lrg_variant'] == 'LRG_766:g.157253G>A' + self.assertCountEqual(results['NM_000540.2:c.14818G>A']['alt_genomic_loci'], []) + assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': 'chr19', 'pos': '39076592', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': 'chr19', 'pos': '38585952', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': '19', 'pos': '39076592', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000540.2:c.14818G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': '19', 'pos': '38585952', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000540.2:c.14818G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000540.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000531.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008866.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_766.xml'} assert 'NM_001042723.1:c.14803G>A' in list(results.keys()) - assert results['NM_001042723.1:c.14803G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001042723.1:c.14803G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001042723.1:c.14803G>A']['alt_genomic_loci'], []) - assert results['NM_001042723.1:c.14803G>A']['gene_symbol'] == 'RYR1' - assert results['NM_001042723.1:c.14803G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036188.1:p.(Ala4935Thr)', 'slr': 'NP_001036188.1:p.(A4935T)'} assert results['NM_001042723.1:c.14803G>A']['submitted_variant'] == '19-39076592-G-A' - assert results['NM_001042723.1:c.14803G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001042723.1:c.14803G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001042723.1:c.14803G>A']['gene_symbol'] == 'RYR1' + assert results['NM_001042723.1:c.14803G>A']['gene_ids'] == {'hgnc_id': 'HGNC:10483', 'entrez_gene_id': '6261', 'ucsc_id': 'uc002oit.4', 'omim_id': ['180901']} assert results['NM_001042723.1:c.14803G>A']['hgvs_transcript_variant'] == 'NM_001042723.1:c.14803G>A' + assert results['NM_001042723.1:c.14803G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001042723.1:c.14803G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001042723.1:c.14803G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '39076592', 'alt': 'A'}} - assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': 'chr19', 'ref': 'G', 'pos': '38585952', 'alt': 'A'}} - assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '39076592', 'alt': 'A'}} - assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': '19', 'ref': 'G', 'pos': '38585952', 'alt': 'A'}} - assert results['NM_001042723.1:c.14803G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036188.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042723.1'} - + assert results['NM_001042723.1:c.14803G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001036188.1:p.(Ala4935Thr)', 'slr': 'NP_001036188.1:p.(A4935T)'} + assert results['NM_001042723.1:c.14803G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001042723.1:c.14803G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001042723.1:c.14803G>A']['alt_genomic_loci'], []) + assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': 'chr19', 'pos': '39076592', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': 'chr19', 'pos': '38585952', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000019.9:g.39076592G>A', 'vcf': {'chr': '19', 'pos': '39076592', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001042723.1:c.14803G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000019.10:g.38585952G>A', 'vcf': {'chr': '19', 'pos': '38585952', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001042723.1:c.14803G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001042723.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001036188.1'} def test_variant267(self): variant = '2-50149352-T-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001330086.1:c.4245A>G' in list(results.keys()) - assert results['NM_001330086.1:c.4245A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330086.1:c.4245A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330086.1:c.4245A>G']['alt_genomic_loci'], []) - assert results['NM_001330086.1:c.4245A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330086.1:c.4245A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317015.1:p.(Pro1415=)', 'slr': 'NP_001317015.1:p.(P1415=)'} - assert results['NM_001330086.1:c.4245A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330086.1:c.4245A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330086.1:c.4245A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001330086.1:c.4245A>G']['hgvs_transcript_variant'] == 'NM_001330086.1:c.4245A>G' - assert results['NM_001330086.1:c.4245A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330086.1:c.4245A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1'} - - assert 'NM_001330083.1:c.4089A>G' in list(results.keys()) - assert results['NM_001330083.1:c.4089A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330083.1:c.4089A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330083.1:c.4089A>G']['alt_genomic_loci'], []) - assert results['NM_001330083.1:c.4089A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330083.1:c.4089A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317012.1:p.(Pro1363=)', 'slr': 'NP_001317012.1:p.(P1363=)'} - assert results['NM_001330083.1:c.4089A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330083.1:c.4089A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330083.1:c.4089A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001330083.1:c.4089A>G']['hgvs_transcript_variant'] == 'NM_001330083.1:c.4089A>G' - assert results['NM_001330083.1:c.4089A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330083.1:c.4089A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1'} - - assert 'NM_001330095.1:c.4113A>G' in list(results.keys()) - assert results['NM_001330095.1:c.4113A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330095.1:c.4113A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330095.1:c.4113A>G']['alt_genomic_loci'], []) - assert results['NM_001330095.1:c.4113A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330095.1:c.4113A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317024.1:p.(Pro1371=)', 'slr': 'NP_001317024.1:p.(P1371=)'} - assert results['NM_001330095.1:c.4113A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330095.1:c.4113A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330095.1:c.4113A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001330095.1:c.4113A>G']['hgvs_transcript_variant'] == 'NM_001330095.1:c.4113A>G' - assert results['NM_001330095.1:c.4113A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330095.1:c.4113A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1'} - - assert 'NM_138735.2:c.1059A>G' in list(results.keys()) - assert results['NM_138735.2:c.1059A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_138735.2:c.1059A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_138735.2:c.1059A>G']['alt_genomic_loci'], []) - assert results['NM_138735.2:c.1059A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_138735.2:c.1059A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_620072.1:p.(Pro353=)', 'slr': 'NP_620072.1:p.(P353=)'} - assert results['NM_138735.2:c.1059A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_138735.2:c.1059A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_138735.2:c.1059A>G']['hgvs_lrg_variant'] == '' - assert results['NM_138735.2:c.1059A>G']['hgvs_transcript_variant'] == 'NM_138735.2:c.1059A>G' - assert results['NM_138735.2:c.1059A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_138735.2:c.1059A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.2'} - - assert 'NM_001330078.1:c.4254A>G' in list(results.keys()) - assert results['NM_001330078.1:c.4254A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330078.1:c.4254A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330078.1:c.4254A>G']['alt_genomic_loci'], []) - assert results['NM_001330078.1:c.4254A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330078.1:c.4254A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317007.1:p.(Pro1418=)', 'slr': 'NP_001317007.1:p.(P1418=)'} - assert results['NM_001330078.1:c.4254A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330078.1:c.4254A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330078.1:c.4254A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001330078.1:c.4254A>G']['hgvs_transcript_variant'] == 'NM_001330078.1:c.4254A>G' - assert results['NM_001330078.1:c.4254A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330078.1:c.4254A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1'} - - assert 'NM_001330094.1:c.4233A>G' in list(results.keys()) - assert results['NM_001330094.1:c.4233A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330094.1:c.4233A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330094.1:c.4233A>G']['alt_genomic_loci'], []) - assert results['NM_001330094.1:c.4233A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330094.1:c.4233A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317023.1:p.(Pro1411=)', 'slr': 'NP_001317023.1:p.(P1411=)'} - assert results['NM_001330094.1:c.4233A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330094.1:c.4233A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330094.1:c.4233A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001330094.1:c.4233A>G']['hgvs_transcript_variant'] == 'NM_001330094.1:c.4233A>G' - assert results['NM_001330094.1:c.4233A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330094.1:c.4233A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1'} - - assert 'NM_001320157.3:c.150A>G' in list(results.keys()) - assert results['NM_001320157.3:c.150A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001320157.3:c.150A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001320157.3:c.150A>G']['alt_genomic_loci'], []) - assert results['NM_001320157.3:c.150A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001320157.3:c.150A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307086.1:p.(Pro50=)', 'slr': 'NP_001307086.1:p.(P50=)'} - assert results['NM_001320157.3:c.150A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001320157.3:c.150A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001320157.3:c.150A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001320157.3:c.150A>G']['hgvs_transcript_variant'] == 'NM_001320157.3:c.150A>G' - assert results['NM_001320157.3:c.150A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001320157.3:c.150A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.3'} - - assert 'NM_001330088.1:c.4074A>G' in list(results.keys()) - assert results['NM_001330088.1:c.4074A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330088.1:c.4074A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330088.1:c.4074A>G']['alt_genomic_loci'], []) - assert results['NM_001330088.1:c.4074A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330088.1:c.4074A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317017.1:p.(Pro1358=)', 'slr': 'NP_001317017.1:p.(P1358=)'} - assert results['NM_001330088.1:c.4074A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330088.1:c.4074A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330088.1:c.4074A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001330088.1:c.4074A>G']['hgvs_transcript_variant'] == 'NM_001330088.1:c.4074A>G' - assert results['NM_001330088.1:c.4074A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330088.1:c.4074A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_001330087.1:c.4053A>G' in list(results.keys()) + assert results['NM_001330087.1:c.4053A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330087.1:c.4053A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330087.1:c.4053A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330087.1:c.4053A>G']['hgvs_transcript_variant'] == 'NM_001330087.1:c.4053A>G' + assert results['NM_001330087.1:c.4053A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330087.1:c.4053A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330087.1:c.4053A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330087.1:c.4053A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317016.1:p.(Pro1351=)', 'slr': 'NP_001317016.1:p.(P1351=)'} + assert results['NM_001330087.1:c.4053A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330087.1:c.4053A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330087.1:c.4053A>G']['alt_genomic_loci'], []) + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330087.1:c.4053A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1'} assert 'NM_001330092.1:c.1149A>G' in list(results.keys()) - assert results['NM_001330092.1:c.1149A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330092.1:c.1149A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330092.1:c.1149A>G']['alt_genomic_loci'], []) - assert results['NM_001330092.1:c.1149A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330092.1:c.1149A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317021.1:p.(Pro383=)', 'slr': 'NP_001317021.1:p.(P383=)'} assert results['NM_001330092.1:c.1149A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330092.1:c.1149A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330092.1:c.1149A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330092.1:c.1149A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330092.1:c.1149A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} assert results['NM_001330092.1:c.1149A>G']['hgvs_transcript_variant'] == 'NM_001330092.1:c.1149A>G' + assert results['NM_001330092.1:c.1149A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330092.1:c.1149A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330092.1:c.1149A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330092.1:c.1149A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317021.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330092.1'} - - assert 'NM_138735.4:c.1059A>G' in list(results.keys()) - assert results['NM_138735.4:c.1059A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_138735.4:c.1059A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_138735.4:c.1059A>G']['alt_genomic_loci'], []) - assert results['NM_138735.4:c.1059A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_138735.4:c.1059A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_620072.1:p.(Pro353=)', 'slr': 'NP_620072.1:p.(P353=)'} - assert results['NM_138735.4:c.1059A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_138735.4:c.1059A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_138735.4:c.1059A>G']['hgvs_lrg_variant'] == '' - assert results['NM_138735.4:c.1059A>G']['hgvs_transcript_variant'] == 'NM_138735.4:c.1059A>G' - assert results['NM_138735.4:c.1059A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_138735.4:c.1059A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.4'} + assert results['NM_001330092.1:c.1149A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317021.1:p.(Pro383=)', 'slr': 'NP_001317021.1:p.(P383=)'} + assert results['NM_001330092.1:c.1149A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330092.1:c.1149A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330092.1:c.1149A>G']['alt_genomic_loci'], []) + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330092.1:c.1149A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330092.1:c.1149A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330092.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317021.1'} - assert 'NM_001330096.1:c.4044A>G' in list(results.keys()) - assert results['NM_001330096.1:c.4044A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330096.1:c.4044A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330096.1:c.4044A>G']['alt_genomic_loci'], []) - assert results['NM_001330096.1:c.4044A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330096.1:c.4044A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317025.1:p.(Pro1348=)', 'slr': 'NP_001317025.1:p.(P1348=)'} - assert results['NM_001330096.1:c.4044A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330096.1:c.4044A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330096.1:c.4044A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001330096.1:c.4044A>G']['hgvs_transcript_variant'] == 'NM_001330096.1:c.4044A>G' - assert results['NM_001330096.1:c.4044A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330096.1:c.4044A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1'} + assert 'NM_004801.4:c.4164A>G' in list(results.keys()) + assert results['NM_004801.4:c.4164A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_004801.4:c.4164A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_004801.4:c.4164A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_004801.4:c.4164A>G']['hgvs_transcript_variant'] == 'NM_004801.4:c.4164A>G' + assert results['NM_004801.4:c.4164A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_004801.4:c.4164A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004801.4:c.4164A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_004801.4:c.4164A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro1388=)', 'slr': 'NP_004792.1:p.(P1388=)'} + assert results['NM_004801.4:c.4164A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004801.4:c.4164A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004801.4:c.4164A>G']['alt_genomic_loci'], []) + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_004801.4:c.4164A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1'} - assert 'NM_001135659.2:c.4374A>G' in list(results.keys()) - assert results['NM_001135659.2:c.4374A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001135659.2:c.4374A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001135659.2:c.4374A>G']['alt_genomic_loci'], []) - assert results['NM_001135659.2:c.4374A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001135659.2:c.4374A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro1458=)', 'slr': 'NP_001129131.1:p.(P1458=)'} - assert results['NM_001135659.2:c.4374A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001135659.2:c.4374A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001135659.2:c.4374A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001135659.2:c.4374A>G']['hgvs_transcript_variant'] == 'NM_001135659.2:c.4374A>G' - assert results['NM_001135659.2:c.4374A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001135659.2:c.4374A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2'} + assert 'NM_004801.5:c.4164A>G' in list(results.keys()) + assert results['NM_004801.5:c.4164A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_004801.5:c.4164A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_004801.5:c.4164A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_004801.5:c.4164A>G']['hgvs_transcript_variant'] == 'NM_004801.5:c.4164A>G' + assert results['NM_004801.5:c.4164A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_004801.5:c.4164A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004801.5:c.4164A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_004801.5:c.4164A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro1388=)', 'slr': 'NP_004792.1:p.(P1388=)'} + assert results['NM_004801.5:c.4164A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004801.5:c.4164A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004801.5:c.4164A>G']['alt_genomic_loci'], []) + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_004801.5:c.4164A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1'} assert 'NM_001330085.1:c.4227A>G' in list(results.keys()) - assert results['NM_001330085.1:c.4227A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330085.1:c.4227A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330085.1:c.4227A>G']['alt_genomic_loci'], []) - assert results['NM_001330085.1:c.4227A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330085.1:c.4227A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317014.1:p.(Pro1409=)', 'slr': 'NP_001317014.1:p.(P1409=)'} assert results['NM_001330085.1:c.4227A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330085.1:c.4227A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330085.1:c.4227A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330085.1:c.4227A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330085.1:c.4227A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} assert results['NM_001330085.1:c.4227A>G']['hgvs_transcript_variant'] == 'NM_001330085.1:c.4227A>G' + assert results['NM_001330085.1:c.4227A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330085.1:c.4227A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330085.1:c.4227A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330085.1:c.4227A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1'} + assert results['NM_001330085.1:c.4227A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317014.1:p.(Pro1409=)', 'slr': 'NP_001317014.1:p.(P1409=)'} + assert results['NM_001330085.1:c.4227A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330085.1:c.4227A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330085.1:c.4227A>G']['alt_genomic_loci'], []) + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330085.1:c.4227A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330085.1:c.4227A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1'} - assert 'NM_001320156.1:c.159A>G' in list(results.keys()) - assert results['NM_001320156.1:c.159A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001320156.1:c.159A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001320156.1:c.159A>G']['alt_genomic_loci'], []) - assert results['NM_001320156.1:c.159A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001320156.1:c.159A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307085.1:p.(Pro53=)', 'slr': 'NP_001307085.1:p.(P53=)'} - assert results['NM_001320156.1:c.159A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001320156.1:c.159A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001320156.1:c.159A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001320156.1:c.159A>G']['hgvs_transcript_variant'] == 'NM_001320156.1:c.159A>G' - assert results['NM_001320156.1:c.159A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001320156.1:c.159A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.1'} + assert 'NM_001135659.2:c.4374A>G' in list(results.keys()) + assert results['NM_001135659.2:c.4374A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001135659.2:c.4374A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001135659.2:c.4374A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001135659.2:c.4374A>G']['hgvs_transcript_variant'] == 'NM_001135659.2:c.4374A>G' + assert results['NM_001135659.2:c.4374A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001135659.2:c.4374A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001135659.2:c.4374A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001135659.2:c.4374A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro1458=)', 'slr': 'NP_001129131.1:p.(P1458=)'} + assert results['NM_001135659.2:c.4374A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135659.2:c.4374A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001135659.2:c.4374A>G']['alt_genomic_loci'], []) + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001135659.2:c.4374A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001135659.2:c.4374A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1'} assert 'NM_001330077.1:c.4230A>G' in list(results.keys()) - assert results['NM_001330077.1:c.4230A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330077.1:c.4230A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330077.1:c.4230A>G']['alt_genomic_loci'], []) - assert results['NM_001330077.1:c.4230A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330077.1:c.4230A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317006.1:p.(Pro1410=)', 'slr': 'NP_001317006.1:p.(P1410=)'} assert results['NM_001330077.1:c.4230A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330077.1:c.4230A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330077.1:c.4230A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330077.1:c.4230A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330077.1:c.4230A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} assert results['NM_001330077.1:c.4230A>G']['hgvs_transcript_variant'] == 'NM_001330077.1:c.4230A>G' + assert results['NM_001330077.1:c.4230A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330077.1:c.4230A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330077.1:c.4230A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330077.1:c.4230A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1'} + assert results['NM_001330077.1:c.4230A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317006.1:p.(Pro1410=)', 'slr': 'NP_001317006.1:p.(P1410=)'} + assert results['NM_001330077.1:c.4230A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330077.1:c.4230A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330077.1:c.4230A>G']['alt_genomic_loci'], []) + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330077.1:c.4230A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330077.1:c.4230A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1'} + + assert 'NM_138735.4:c.1059A>G' in list(results.keys()) + assert results['NM_138735.4:c.1059A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_138735.4:c.1059A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_138735.4:c.1059A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_138735.4:c.1059A>G']['hgvs_transcript_variant'] == 'NM_138735.4:c.1059A>G' + assert results['NM_138735.4:c.1059A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_138735.4:c.1059A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_138735.4:c.1059A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_138735.4:c.1059A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_620072.1:p.(Pro353=)', 'slr': 'NP_620072.1:p.(P353=)'} + assert results['NM_138735.4:c.1059A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_138735.4:c.1059A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_138735.4:c.1059A>G']['alt_genomic_loci'], []) + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_138735.4:c.1059A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_138735.4:c.1059A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1'} + + assert 'NM_001320156.3:c.159A>G' in list(results.keys()) + assert results['NM_001320156.3:c.159A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001320156.3:c.159A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001320156.3:c.159A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001320156.3:c.159A>G']['hgvs_transcript_variant'] == 'NM_001320156.3:c.159A>G' + assert results['NM_001320156.3:c.159A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001320156.3:c.159A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001320156.3:c.159A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001320156.3:c.159A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307085.1:p.(Pro53=)', 'slr': 'NP_001307085.1:p.(P53=)'} + assert results['NM_001320156.3:c.159A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001320156.3:c.159A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001320156.3:c.159A>G']['alt_genomic_loci'], []) + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320156.3:c.159A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1'} + + assert 'NM_138735.2:c.1059A>G' in list(results.keys()) + assert results['NM_138735.2:c.1059A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_138735.2:c.1059A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_138735.2:c.1059A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_138735.2:c.1059A>G']['hgvs_transcript_variant'] == 'NM_138735.2:c.1059A>G' + assert results['NM_138735.2:c.1059A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_138735.2:c.1059A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_138735.2:c.1059A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_138735.2:c.1059A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_620072.1:p.(Pro353=)', 'slr': 'NP_620072.1:p.(P353=)'} + assert results['NM_138735.2:c.1059A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_138735.2:c.1059A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_138735.2:c.1059A>G']['alt_genomic_loci'], []) + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_138735.2:c.1059A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_138735.2:c.1059A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_138735.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_620072.1'} assert 'NM_001330093.1:c.4251A>G' in list(results.keys()) - assert results['NM_001330093.1:c.4251A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330093.1:c.4251A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330093.1:c.4251A>G']['alt_genomic_loci'], []) - assert results['NM_001330093.1:c.4251A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330093.1:c.4251A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317022.1:p.(Pro1417=)', 'slr': 'NP_001317022.1:p.(P1417=)'} assert results['NM_001330093.1:c.4251A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330093.1:c.4251A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330093.1:c.4251A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330093.1:c.4251A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330093.1:c.4251A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} assert results['NM_001330093.1:c.4251A>G']['hgvs_transcript_variant'] == 'NM_001330093.1:c.4251A>G' + assert results['NM_001330093.1:c.4251A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330093.1:c.4251A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330093.1:c.4251A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330093.1:c.4251A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1'} + assert results['NM_001330093.1:c.4251A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317022.1:p.(Pro1417=)', 'slr': 'NP_001317022.1:p.(P1417=)'} + assert results['NM_001330093.1:c.4251A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330093.1:c.4251A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330093.1:c.4251A>G']['alt_genomic_loci'], []) + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330093.1:c.4251A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330093.1:c.4251A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1'} - assert 'NM_001135659.1:c.4374A>G' in list(results.keys()) - assert results['NM_001135659.1:c.4374A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001135659.1:c.4374A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001135659.1:c.4374A>G']['alt_genomic_loci'], []) - assert results['NM_001135659.1:c.4374A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001135659.1:c.4374A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro1458=)', 'slr': 'NP_001129131.1:p.(P1458=)'} - assert results['NM_001135659.1:c.4374A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001135659.1:c.4374A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001135659.1:c.4374A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001135659.1:c.4374A>G']['hgvs_transcript_variant'] == 'NM_001135659.1:c.4374A>G' - assert results['NM_001135659.1:c.4374A>G']['hgvs_refseqgene_variant'] == 'NG_011878.1:g.1115323A>G' - assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001135659.1:c.4374A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011878.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1'} + assert 'NM_001330084.1:c.4188A>G' in list(results.keys()) + assert results['NM_001330084.1:c.4188A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330084.1:c.4188A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330084.1:c.4188A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330084.1:c.4188A>G']['hgvs_transcript_variant'] == 'NM_001330084.1:c.4188A>G' + assert results['NM_001330084.1:c.4188A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330084.1:c.4188A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330084.1:c.4188A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330084.1:c.4188A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317013.1:p.(Pro1396=)', 'slr': 'NP_001317013.1:p.(P1396=)'} + assert results['NM_001330084.1:c.4188A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330084.1:c.4188A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330084.1:c.4188A>G']['alt_genomic_loci'], []) + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330084.1:c.4188A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1'} assert 'NM_001320157.1:c.150A>G' in list(results.keys()) - assert results['NM_001320157.1:c.150A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001320157.1:c.150A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001320157.1:c.150A>G']['alt_genomic_loci'], []) - assert results['NM_001320157.1:c.150A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001320157.1:c.150A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307086.1:p.(Pro50=)', 'slr': 'NP_001307086.1:p.(P50=)'} assert results['NM_001320157.1:c.150A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001320157.1:c.150A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001320157.1:c.150A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001320157.1:c.150A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001320157.1:c.150A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} assert results['NM_001320157.1:c.150A>G']['hgvs_transcript_variant'] == 'NM_001320157.1:c.150A>G' + assert results['NM_001320157.1:c.150A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001320157.1:c.150A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001320157.1:c.150A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001320157.1:c.150A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.1'} + assert results['NM_001320157.1:c.150A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307086.1:p.(Pro50=)', 'slr': 'NP_001307086.1:p.(P50=)'} + assert results['NM_001320157.1:c.150A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001320157.1:c.150A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001320157.1:c.150A>G']['alt_genomic_loci'], []) + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320157.1:c.150A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320157.1:c.150A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1'} - assert 'NM_001330084.1:c.4188A>G' in list(results.keys()) - assert results['NM_001330084.1:c.4188A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330084.1:c.4188A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330084.1:c.4188A>G']['alt_genomic_loci'], []) - assert results['NM_001330084.1:c.4188A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330084.1:c.4188A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317013.1:p.(Pro1396=)', 'slr': 'NP_001317013.1:p.(P1396=)'} - assert results['NM_001330084.1:c.4188A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330084.1:c.4188A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330084.1:c.4188A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001330084.1:c.4188A>G']['hgvs_transcript_variant'] == 'NM_001330084.1:c.4188A>G' - assert results['NM_001330084.1:c.4188A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330084.1:c.4188A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330084.1:c.4188A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1'} + assert 'NM_001330086.1:c.4245A>G' in list(results.keys()) + assert results['NM_001330086.1:c.4245A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330086.1:c.4245A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330086.1:c.4245A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330086.1:c.4245A>G']['hgvs_transcript_variant'] == 'NM_001330086.1:c.4245A>G' + assert results['NM_001330086.1:c.4245A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330086.1:c.4245A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330086.1:c.4245A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330086.1:c.4245A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317015.1:p.(Pro1415=)', 'slr': 'NP_001317015.1:p.(P1415=)'} + assert results['NM_001330086.1:c.4245A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330086.1:c.4245A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330086.1:c.4245A>G']['alt_genomic_loci'], []) + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330086.1:c.4245A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330086.1:c.4245A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1'} - assert 'NM_004801.4:c.4164A>G' in list(results.keys()) - assert results['NM_004801.4:c.4164A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_004801.4:c.4164A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004801.4:c.4164A>G']['alt_genomic_loci'], []) - assert results['NM_004801.4:c.4164A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_004801.4:c.4164A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro1388=)', 'slr': 'NP_004792.1:p.(P1388=)'} - assert results['NM_004801.4:c.4164A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_004801.4:c.4164A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_004801.4:c.4164A>G']['hgvs_lrg_variant'] == '' - assert results['NM_004801.4:c.4164A>G']['hgvs_transcript_variant'] == 'NM_004801.4:c.4164A>G' - assert results['NM_004801.4:c.4164A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_004801.4:c.4164A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_004801.4:c.4164A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4'} + assert 'NM_001330078.1:c.4254A>G' in list(results.keys()) + assert results['NM_001330078.1:c.4254A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330078.1:c.4254A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330078.1:c.4254A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330078.1:c.4254A>G']['hgvs_transcript_variant'] == 'NM_001330078.1:c.4254A>G' + assert results['NM_001330078.1:c.4254A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330078.1:c.4254A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330078.1:c.4254A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330078.1:c.4254A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317007.1:p.(Pro1418=)', 'slr': 'NP_001317007.1:p.(P1418=)'} + assert results['NM_001330078.1:c.4254A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330078.1:c.4254A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330078.1:c.4254A>G']['alt_genomic_loci'], []) + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330078.1:c.4254A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330078.1:c.4254A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1'} - assert 'NM_001330082.1:c.4221A>G' in list(results.keys()) - assert results['NM_001330082.1:c.4221A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330082.1:c.4221A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330082.1:c.4221A>G']['alt_genomic_loci'], []) - assert results['NM_001330082.1:c.4221A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330082.1:c.4221A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317011.1:p.(Pro1407=)', 'slr': 'NP_001317011.1:p.(P1407=)'} - assert results['NM_001330082.1:c.4221A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330082.1:c.4221A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330082.1:c.4221A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001330082.1:c.4221A>G']['hgvs_transcript_variant'] == 'NM_001330082.1:c.4221A>G' - assert results['NM_001330082.1:c.4221A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330082.1:c.4221A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1'} + assert 'NM_001320156.1:c.159A>G' in list(results.keys()) + assert results['NM_001320156.1:c.159A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001320156.1:c.159A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001320156.1:c.159A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001320156.1:c.159A>G']['hgvs_transcript_variant'] == 'NM_001320156.1:c.159A>G' + assert results['NM_001320156.1:c.159A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001320156.1:c.159A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001320156.1:c.159A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001320156.1:c.159A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307085.1:p.(Pro53=)', 'slr': 'NP_001307085.1:p.(P53=)'} + assert results['NM_001320156.1:c.159A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001320156.1:c.159A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001320156.1:c.159A>G']['alt_genomic_loci'], []) + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320156.1:c.159A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320156.1:c.159A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1'} - assert results['flag'] == 'gene_variant' - assert 'NM_001330091.1:c.1140A>G' in list(results.keys()) - assert results['NM_001330091.1:c.1140A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330091.1:c.1140A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330091.1:c.1140A>G']['alt_genomic_loci'], []) - assert results['NM_001330091.1:c.1140A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330091.1:c.1140A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317020.1:p.(Pro380=)', 'slr': 'NP_001317020.1:p.(P380=)'} - assert results['NM_001330091.1:c.1140A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330091.1:c.1140A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330091.1:c.1140A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001330091.1:c.1140A>G']['hgvs_transcript_variant'] == 'NM_001330091.1:c.1140A>G' - assert results['NM_001330091.1:c.1140A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330091.1:c.1140A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317020.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330091.1'} + assert 'NM_001330096.1:c.4044A>G' in list(results.keys()) + assert results['NM_001330096.1:c.4044A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330096.1:c.4044A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330096.1:c.4044A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330096.1:c.4044A>G']['hgvs_transcript_variant'] == 'NM_001330096.1:c.4044A>G' + assert results['NM_001330096.1:c.4044A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330096.1:c.4044A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330096.1:c.4044A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330096.1:c.4044A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317025.1:p.(Pro1348=)', 'slr': 'NP_001317025.1:p.(P1348=)'} + assert results['NM_001330096.1:c.4044A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330096.1:c.4044A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330096.1:c.4044A>G']['alt_genomic_loci'], []) + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330096.1:c.4044A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330096.1:c.4044A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1'} - assert 'NM_001320156.3:c.159A>G' in list(results.keys()) - assert results['NM_001320156.3:c.159A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001320156.3:c.159A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001320156.3:c.159A>G']['alt_genomic_loci'], []) - assert results['NM_001320156.3:c.159A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001320156.3:c.159A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307085.1:p.(Pro53=)', 'slr': 'NP_001307085.1:p.(P53=)'} - assert results['NM_001320156.3:c.159A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001320156.3:c.159A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001320156.3:c.159A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001320156.3:c.159A>G']['hgvs_transcript_variant'] == 'NM_001320156.3:c.159A>G' - assert results['NM_001320156.3:c.159A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001320156.3:c.159A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001320156.3:c.159A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307085.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320156.3'} + assert 'NM_001135659.1:c.4374A>G' in list(results.keys()) + assert results['NM_001135659.1:c.4374A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001135659.1:c.4374A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001135659.1:c.4374A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001135659.1:c.4374A>G']['hgvs_transcript_variant'] == 'NM_001135659.1:c.4374A>G' + assert results['NM_001135659.1:c.4374A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001135659.1:c.4374A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001135659.1:c.4374A>G']['hgvs_refseqgene_variant'] == 'NG_011878.1:g.1115323A>G' + assert results['NM_001135659.1:c.4374A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro1458=)', 'slr': 'NP_001129131.1:p.(P1458=)'} + assert results['NM_001135659.1:c.4374A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135659.1:c.4374A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001135659.1:c.4374A>G']['alt_genomic_loci'], []) + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001135659.1:c.4374A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001135659.1:c.4374A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011878.1'} - assert 'NM_001330087.1:c.4053A>G' in list(results.keys()) - assert results['NM_001330087.1:c.4053A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330087.1:c.4053A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330087.1:c.4053A>G']['alt_genomic_loci'], []) - assert results['NM_001330087.1:c.4053A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330087.1:c.4053A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317016.1:p.(Pro1351=)', 'slr': 'NP_001317016.1:p.(P1351=)'} - assert results['NM_001330087.1:c.4053A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330087.1:c.4053A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330087.1:c.4053A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001330087.1:c.4053A>G']['hgvs_transcript_variant'] == 'NM_001330087.1:c.4053A>G' - assert results['NM_001330087.1:c.4053A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330087.1:c.4053A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330087.1:c.4053A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1'} + assert 'NM_001320157.3:c.150A>G' in list(results.keys()) + assert results['NM_001320157.3:c.150A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001320157.3:c.150A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001320157.3:c.150A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001320157.3:c.150A>G']['hgvs_transcript_variant'] == 'NM_001320157.3:c.150A>G' + assert results['NM_001320157.3:c.150A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001320157.3:c.150A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001320157.3:c.150A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001320157.3:c.150A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001307086.1:p.(Pro50=)', 'slr': 'NP_001307086.1:p.(P50=)'} + assert results['NM_001320157.3:c.150A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001320157.3:c.150A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001320157.3:c.150A>G']['alt_genomic_loci'], []) + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320157.3:c.150A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001320157.3:c.150A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001320157.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001307086.1'} + + assert 'NM_001330088.1:c.4074A>G' in list(results.keys()) + assert results['NM_001330088.1:c.4074A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330088.1:c.4074A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330088.1:c.4074A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330088.1:c.4074A>G']['hgvs_transcript_variant'] == 'NM_001330088.1:c.4074A>G' + assert results['NM_001330088.1:c.4074A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330088.1:c.4074A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330088.1:c.4074A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330088.1:c.4074A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317017.1:p.(Pro1358=)', 'slr': 'NP_001317017.1:p.(P1358=)'} + assert results['NM_001330088.1:c.4074A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330088.1:c.4074A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330088.1:c.4074A>G']['alt_genomic_loci'], []) + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330088.1:c.4074A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330088.1:c.4074A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1'} + + assert 'NM_001330083.1:c.4089A>G' in list(results.keys()) + assert results['NM_001330083.1:c.4089A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330083.1:c.4089A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330083.1:c.4089A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330083.1:c.4089A>G']['hgvs_transcript_variant'] == 'NM_001330083.1:c.4089A>G' + assert results['NM_001330083.1:c.4089A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330083.1:c.4089A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330083.1:c.4089A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330083.1:c.4089A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317012.1:p.(Pro1363=)', 'slr': 'NP_001317012.1:p.(P1363=)'} + assert results['NM_001330083.1:c.4089A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330083.1:c.4089A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330083.1:c.4089A>G']['alt_genomic_loci'], []) + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330083.1:c.4089A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330083.1:c.4089A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1'} + + assert 'NM_001330095.1:c.4113A>G' in list(results.keys()) + assert results['NM_001330095.1:c.4113A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330095.1:c.4113A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330095.1:c.4113A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330095.1:c.4113A>G']['hgvs_transcript_variant'] == 'NM_001330095.1:c.4113A>G' + assert results['NM_001330095.1:c.4113A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330095.1:c.4113A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330095.1:c.4113A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330095.1:c.4113A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317024.1:p.(Pro1371=)', 'slr': 'NP_001317024.1:p.(P1371=)'} + assert results['NM_001330095.1:c.4113A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330095.1:c.4113A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330095.1:c.4113A>G']['alt_genomic_loci'], []) + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330095.1:c.4113A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330095.1:c.4113A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1'} assert 'NM_001330097.1:c.1050A>G' in list(results.keys()) - assert results['NM_001330097.1:c.1050A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330097.1:c.1050A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330097.1:c.1050A>G']['alt_genomic_loci'], []) - assert results['NM_001330097.1:c.1050A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_001330097.1:c.1050A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317026.1:p.(Pro350=)', 'slr': 'NP_001317026.1:p.(P350=)'} assert results['NM_001330097.1:c.1050A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_001330097.1:c.1050A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001330097.1:c.1050A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001330097.1:c.1050A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330097.1:c.1050A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} assert results['NM_001330097.1:c.1050A>G']['hgvs_transcript_variant'] == 'NM_001330097.1:c.1050A>G' + assert results['NM_001330097.1:c.1050A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330097.1:c.1050A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330097.1:c.1050A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_001330097.1:c.1050A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317026.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330097.1'} + assert results['NM_001330097.1:c.1050A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317026.1:p.(Pro350=)', 'slr': 'NP_001317026.1:p.(P350=)'} + assert results['NM_001330097.1:c.1050A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330097.1:c.1050A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330097.1:c.1050A>G']['alt_genomic_loci'], []) + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330097.1:c.1050A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330097.1:c.1050A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330097.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317026.1'} - assert 'NM_004801.5:c.4164A>G' in list(results.keys()) - assert results['NM_004801.5:c.4164A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_004801.5:c.4164A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004801.5:c.4164A>G']['alt_genomic_loci'], []) - assert results['NM_004801.5:c.4164A>G']['gene_symbol'] == 'NRXN1' - assert results['NM_004801.5:c.4164A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro1388=)', 'slr': 'NP_004792.1:p.(P1388=)'} - assert results['NM_004801.5:c.4164A>G']['submitted_variant'] == '2-50149352-T-C' - assert results['NM_004801.5:c.4164A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_004801.5:c.4164A>G']['hgvs_lrg_variant'] == '' - assert results['NM_004801.5:c.4164A>G']['hgvs_transcript_variant'] == 'NM_004801.5:c.4164A>G' - assert results['NM_004801.5:c.4164A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '50149352', 'alt': 'C'}} - assert results['NM_004801.5:c.4164A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'ref': 'T', 'pos': '49922214', 'alt': 'C'}} - assert results['NM_004801.5:c.4164A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5'} + assert 'NM_001330094.1:c.4233A>G' in list(results.keys()) + assert results['NM_001330094.1:c.4233A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330094.1:c.4233A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330094.1:c.4233A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330094.1:c.4233A>G']['hgvs_transcript_variant'] == 'NM_001330094.1:c.4233A>G' + assert results['NM_001330094.1:c.4233A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330094.1:c.4233A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330094.1:c.4233A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330094.1:c.4233A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317023.1:p.(Pro1411=)', 'slr': 'NP_001317023.1:p.(P1411=)'} + assert results['NM_001330094.1:c.4233A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330094.1:c.4233A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330094.1:c.4233A>G']['alt_genomic_loci'], []) + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330094.1:c.4233A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330094.1:c.4233A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1'} + assert 'NM_001330091.1:c.1140A>G' in list(results.keys()) + assert results['NM_001330091.1:c.1140A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330091.1:c.1140A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330091.1:c.1140A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330091.1:c.1140A>G']['hgvs_transcript_variant'] == 'NM_001330091.1:c.1140A>G' + assert results['NM_001330091.1:c.1140A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330091.1:c.1140A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330091.1:c.1140A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330091.1:c.1140A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317020.1:p.(Pro380=)', 'slr': 'NP_001317020.1:p.(P380=)'} + assert results['NM_001330091.1:c.1140A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330091.1:c.1140A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330091.1:c.1140A>G']['alt_genomic_loci'], []) + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330091.1:c.1140A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330091.1:c.1140A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330091.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317020.1'} + + assert 'NM_001330082.1:c.4221A>G' in list(results.keys()) + assert results['NM_001330082.1:c.4221A>G']['submitted_variant'] == '2-50149352-T-C' + assert results['NM_001330082.1:c.4221A>G']['gene_symbol'] == 'NRXN1' + assert results['NM_001330082.1:c.4221A>G']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330082.1:c.4221A>G']['hgvs_transcript_variant'] == 'NM_001330082.1:c.4221A>G' + assert results['NM_001330082.1:c.4221A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001330082.1:c.4221A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330082.1:c.4221A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330082.1:c.4221A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317011.1:p.(Pro1407=)', 'slr': 'NP_001317011.1:p.(P1407=)'} + assert results['NM_001330082.1:c.4221A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330082.1:c.4221A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330082.1:c.4221A>G']['alt_genomic_loci'], []) + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': 'chr2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': 'chr2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50149352T>C', 'vcf': {'chr': '2', 'pos': '50149352', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330082.1:c.4221A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.49922214T>C', 'vcf': {'chr': '2', 'pos': '49922214', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001330082.1:c.4221A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1'} def test_variant268(self): variant = '2-50847195-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001330096.1:c.1201C>T' in list(results.keys()) - assert results['NM_001330096.1:c.1201C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330096.1:c.1201C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330096.1:c.1201C>T']['alt_genomic_loci'], []) - assert results['NM_001330096.1:c.1201C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330096.1:c.1201C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317025.1:p.(Pro401Ser)', 'slr': 'NP_001317025.1:p.(P401S)'} - assert results['NM_001330096.1:c.1201C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330096.1:c.1201C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330096.1:c.1201C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001330096.1:c.1201C>T']['hgvs_transcript_variant'] == 'NM_001330096.1:c.1201C>T' - assert results['NM_001330096.1:c.1201C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330096.1:c.1201C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_001330087.1:c.1201C>T' in list(results.keys()) + assert results['NM_001330087.1:c.1201C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330087.1:c.1201C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330087.1:c.1201C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330087.1:c.1201C>T']['hgvs_transcript_variant'] == 'NM_001330087.1:c.1201C>T' + assert results['NM_001330087.1:c.1201C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330087.1:c.1201C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330087.1:c.1201C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330087.1:c.1201C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317016.1:p.(Pro401Ser)', 'slr': 'NP_001317016.1:p.(P401S)'} + assert results['NM_001330087.1:c.1201C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330087.1:c.1201C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330087.1:c.1201C>T']['alt_genomic_loci'], []) + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330087.1:c.1201C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1'} - assert 'NM_001330084.1:c.1246C>T' in list(results.keys()) - assert results['NM_001330084.1:c.1246C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330084.1:c.1246C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330084.1:c.1246C>T']['alt_genomic_loci'], []) - assert results['NM_001330084.1:c.1246C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330084.1:c.1246C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317013.1:p.(Pro416Ser)', 'slr': 'NP_001317013.1:p.(P416S)'} - assert results['NM_001330084.1:c.1246C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330084.1:c.1246C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330084.1:c.1246C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001330084.1:c.1246C>T']['hgvs_transcript_variant'] == 'NM_001330084.1:c.1246C>T' - assert results['NM_001330084.1:c.1246C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330084.1:c.1246C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1'} + assert 'NM_004801.4:c.1285C>T' in list(results.keys()) + assert results['NM_004801.4:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_004801.4:c.1285C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_004801.4:c.1285C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_004801.4:c.1285C>T']['hgvs_transcript_variant'] == 'NM_004801.4:c.1285C>T' + assert results['NM_004801.4:c.1285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_004801.4:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004801.4:c.1285C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_004801.4:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro429Ser)', 'slr': 'NP_004792.1:p.(P429S)'} + assert results['NM_004801.4:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004801.4:c.1285C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004801.4:c.1285C>T']['alt_genomic_loci'], []) + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004801.4:c.1285C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1'} + + assert 'NM_004801.5:c.1285C>T' in list(results.keys()) + assert results['NM_004801.5:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_004801.5:c.1285C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_004801.5:c.1285C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_004801.5:c.1285C>T']['hgvs_transcript_variant'] == 'NM_004801.5:c.1285C>T' + assert results['NM_004801.5:c.1285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_004801.5:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004801.5:c.1285C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_004801.5:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro429Ser)', 'slr': 'NP_004792.1:p.(P429S)'} + assert results['NM_004801.5:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004801.5:c.1285C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004801.5:c.1285C>T']['alt_genomic_loci'], []) + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004801.5:c.1285C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1'} + + assert 'NM_001330085.1:c.1285C>T' in list(results.keys()) + assert results['NM_001330085.1:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330085.1:c.1285C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330085.1:c.1285C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330085.1:c.1285C>T']['hgvs_transcript_variant'] == 'NM_001330085.1:c.1285C>T' + assert results['NM_001330085.1:c.1285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330085.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330085.1:c.1285C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330085.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317014.1:p.(Pro429Ser)', 'slr': 'NP_001317014.1:p.(P429S)'} + assert results['NM_001330085.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330085.1:c.1285C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330085.1:c.1285C>T']['alt_genomic_loci'], []) + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330085.1:c.1285C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1'} + + assert 'NM_001135659.2:c.1405C>T' in list(results.keys()) + assert results['NM_001135659.2:c.1405C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001135659.2:c.1405C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001135659.2:c.1405C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001135659.2:c.1405C>T']['hgvs_transcript_variant'] == 'NM_001135659.2:c.1405C>T' + assert results['NM_001135659.2:c.1405C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001135659.2:c.1405C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001135659.2:c.1405C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001135659.2:c.1405C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro469Ser)', 'slr': 'NP_001129131.1:p.(P469S)'} + assert results['NM_001135659.2:c.1405C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135659.2:c.1405C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001135659.2:c.1405C>T']['alt_genomic_loci'], []) + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001135659.2:c.1405C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1'} assert 'NM_001330077.1:c.1261C>T' in list(results.keys()) - assert results['NM_001330077.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330077.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330077.1:c.1261C>T']['alt_genomic_loci'], []) - assert results['NM_001330077.1:c.1261C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330077.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317006.1:p.(Pro421Ser)', 'slr': 'NP_001317006.1:p.(P421S)'} assert results['NM_001330077.1:c.1261C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330077.1:c.1261C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330077.1:c.1261C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330077.1:c.1261C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330077.1:c.1261C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} assert results['NM_001330077.1:c.1261C>T']['hgvs_transcript_variant'] == 'NM_001330077.1:c.1261C>T' + assert results['NM_001330077.1:c.1261C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330077.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330077.1:c.1261C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330077.1:c.1261C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1'} - - assert 'NM_001330086.1:c.1285C>T' in list(results.keys()) - assert results['NM_001330086.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330086.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330086.1:c.1285C>T']['alt_genomic_loci'], []) - assert results['NM_001330086.1:c.1285C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330086.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317015.1:p.(Pro429Ser)', 'slr': 'NP_001317015.1:p.(P429S)'} - assert results['NM_001330086.1:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330086.1:c.1285C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330086.1:c.1285C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001330086.1:c.1285C>T']['hgvs_transcript_variant'] == 'NM_001330086.1:c.1285C>T' - assert results['NM_001330086.1:c.1285C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330086.1:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1'} - - assert 'NM_001330088.1:c.1231C>T' in list(results.keys()) - assert results['NM_001330088.1:c.1231C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330088.1:c.1231C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330088.1:c.1231C>T']['alt_genomic_loci'], []) - assert results['NM_001330088.1:c.1231C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330088.1:c.1231C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317017.1:p.(Pro411Ser)', 'slr': 'NP_001317017.1:p.(P411S)'} - assert results['NM_001330088.1:c.1231C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330088.1:c.1231C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330088.1:c.1231C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001330088.1:c.1231C>T']['hgvs_transcript_variant'] == 'NM_001330088.1:c.1231C>T' - assert results['NM_001330088.1:c.1231C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330088.1:c.1231C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1'} + assert results['NM_001330077.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317006.1:p.(Pro421Ser)', 'slr': 'NP_001317006.1:p.(P421S)'} + assert results['NM_001330077.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330077.1:c.1261C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330077.1:c.1261C>T']['alt_genomic_loci'], []) + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330077.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330077.1:c.1261C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330077.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317006.1'} assert 'NM_001330093.1:c.1282C>T' in list(results.keys()) - assert results['NM_001330093.1:c.1282C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330093.1:c.1282C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330093.1:c.1282C>T']['alt_genomic_loci'], []) - assert results['NM_001330093.1:c.1282C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330093.1:c.1282C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317022.1:p.(Pro428Ser)', 'slr': 'NP_001317022.1:p.(P428S)'} assert results['NM_001330093.1:c.1282C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330093.1:c.1282C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330093.1:c.1282C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330093.1:c.1282C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330093.1:c.1282C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} assert results['NM_001330093.1:c.1282C>T']['hgvs_transcript_variant'] == 'NM_001330093.1:c.1282C>T' + assert results['NM_001330093.1:c.1282C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330093.1:c.1282C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330093.1:c.1282C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330093.1:c.1282C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1'} + assert results['NM_001330093.1:c.1282C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317022.1:p.(Pro428Ser)', 'slr': 'NP_001317022.1:p.(P428S)'} + assert results['NM_001330093.1:c.1282C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330093.1:c.1282C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330093.1:c.1282C>T']['alt_genomic_loci'], []) + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330093.1:c.1282C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330093.1:c.1282C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330093.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317022.1'} - assert 'NM_001330087.1:c.1201C>T' in list(results.keys()) - assert results['NM_001330087.1:c.1201C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330087.1:c.1201C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330087.1:c.1201C>T']['alt_genomic_loci'], []) - assert results['NM_001330087.1:c.1201C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330087.1:c.1201C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317016.1:p.(Pro401Ser)', 'slr': 'NP_001317016.1:p.(P401S)'} - assert results['NM_001330087.1:c.1201C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330087.1:c.1201C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330087.1:c.1201C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001330087.1:c.1201C>T']['hgvs_transcript_variant'] == 'NM_001330087.1:c.1201C>T' - assert results['NM_001330087.1:c.1201C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330087.1:c.1201C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330087.1:c.1201C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317016.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330087.1'} + assert 'NM_001330084.1:c.1246C>T' in list(results.keys()) + assert results['NM_001330084.1:c.1246C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330084.1:c.1246C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330084.1:c.1246C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330084.1:c.1246C>T']['hgvs_transcript_variant'] == 'NM_001330084.1:c.1246C>T' + assert results['NM_001330084.1:c.1246C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330084.1:c.1246C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330084.1:c.1246C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330084.1:c.1246C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317013.1:p.(Pro416Ser)', 'slr': 'NP_001317013.1:p.(P416S)'} + assert results['NM_001330084.1:c.1246C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330084.1:c.1246C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330084.1:c.1246C>T']['alt_genomic_loci'], []) + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330084.1:c.1246C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330084.1:c.1246C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330084.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317013.1'} - assert 'NM_001330082.1:c.1261C>T' in list(results.keys()) - assert results['NM_001330082.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330082.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330082.1:c.1261C>T']['alt_genomic_loci'], []) - assert results['NM_001330082.1:c.1261C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330082.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317011.1:p.(Pro421Ser)', 'slr': 'NP_001317011.1:p.(P421S)'} - assert results['NM_001330082.1:c.1261C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330082.1:c.1261C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330082.1:c.1261C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001330082.1:c.1261C>T']['hgvs_transcript_variant'] == 'NM_001330082.1:c.1261C>T' - assert results['NM_001330082.1:c.1261C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330082.1:c.1261C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1'} + assert 'NM_001330086.1:c.1285C>T' in list(results.keys()) + assert results['NM_001330086.1:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330086.1:c.1285C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330086.1:c.1285C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330086.1:c.1285C>T']['hgvs_transcript_variant'] == 'NM_001330086.1:c.1285C>T' + assert results['NM_001330086.1:c.1285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330086.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330086.1:c.1285C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330086.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317015.1:p.(Pro429Ser)', 'slr': 'NP_001317015.1:p.(P429S)'} + assert results['NM_001330086.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330086.1:c.1285C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330086.1:c.1285C>T']['alt_genomic_loci'], []) + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330086.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330086.1:c.1285C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330086.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317015.1'} assert 'NM_001330078.1:c.1285C>T' in list(results.keys()) - assert results['NM_001330078.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330078.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330078.1:c.1285C>T']['alt_genomic_loci'], []) - assert results['NM_001330078.1:c.1285C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330078.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317007.1:p.(Pro429Ser)', 'slr': 'NP_001317007.1:p.(P429S)'} assert results['NM_001330078.1:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330078.1:c.1285C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330078.1:c.1285C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330078.1:c.1285C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330078.1:c.1285C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} assert results['NM_001330078.1:c.1285C>T']['hgvs_transcript_variant'] == 'NM_001330078.1:c.1285C>T' + assert results['NM_001330078.1:c.1285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330078.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330078.1:c.1285C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330078.1:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1'} + assert results['NM_001330078.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317007.1:p.(Pro429Ser)', 'slr': 'NP_001317007.1:p.(P429S)'} + assert results['NM_001330078.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330078.1:c.1285C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330078.1:c.1285C>T']['alt_genomic_loci'], []) + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330078.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330078.1:c.1285C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330078.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317007.1'} - assert 'NM_001330094.1:c.1273C>T' in list(results.keys()) - assert results['NM_001330094.1:c.1273C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330094.1:c.1273C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330094.1:c.1273C>T']['alt_genomic_loci'], []) - assert results['NM_001330094.1:c.1273C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330094.1:c.1273C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317023.1:p.(Pro425Ser)', 'slr': 'NP_001317023.1:p.(P425S)'} - assert results['NM_001330094.1:c.1273C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330094.1:c.1273C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330094.1:c.1273C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001330094.1:c.1273C>T']['hgvs_transcript_variant'] == 'NM_001330094.1:c.1273C>T' - assert results['NM_001330094.1:c.1273C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330094.1:c.1273C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1'} + assert 'NM_001330096.1:c.1201C>T' in list(results.keys()) + assert results['NM_001330096.1:c.1201C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330096.1:c.1201C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330096.1:c.1201C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330096.1:c.1201C>T']['hgvs_transcript_variant'] == 'NM_001330096.1:c.1201C>T' + assert results['NM_001330096.1:c.1201C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330096.1:c.1201C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330096.1:c.1201C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330096.1:c.1201C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317025.1:p.(Pro401Ser)', 'slr': 'NP_001317025.1:p.(P401S)'} + assert results['NM_001330096.1:c.1201C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330096.1:c.1201C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330096.1:c.1201C>T']['alt_genomic_loci'], []) + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330096.1:c.1201C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330096.1:c.1201C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330096.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317025.1'} - assert results['flag'] == 'gene_variant' - assert 'NM_001135659.2:c.1405C>T' in list(results.keys()) - assert results['NM_001135659.2:c.1405C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001135659.2:c.1405C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001135659.2:c.1405C>T']['alt_genomic_loci'], []) - assert results['NM_001135659.2:c.1405C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001135659.2:c.1405C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro469Ser)', 'slr': 'NP_001129131.1:p.(P469S)'} - assert results['NM_001135659.2:c.1405C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001135659.2:c.1405C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001135659.2:c.1405C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001135659.2:c.1405C>T']['hgvs_transcript_variant'] == 'NM_001135659.2:c.1405C>T' - assert results['NM_001135659.2:c.1405C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001135659.2:c.1405C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001135659.2:c.1405C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.2'} + assert 'NM_001135659.1:c.1405C>T' in list(results.keys()) + assert results['NM_001135659.1:c.1405C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001135659.1:c.1405C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001135659.1:c.1405C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001135659.1:c.1405C>T']['hgvs_transcript_variant'] == 'NM_001135659.1:c.1405C>T' + assert results['NM_001135659.1:c.1405C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001135659.1:c.1405C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001135659.1:c.1405C>T']['hgvs_refseqgene_variant'] == 'NG_011878.1:g.417480C>T' + assert results['NM_001135659.1:c.1405C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro469Ser)', 'slr': 'NP_001129131.1:p.(P469S)'} + assert results['NM_001135659.1:c.1405C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001135659.1:c.1405C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001135659.1:c.1405C>T']['alt_genomic_loci'], []) + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001135659.1:c.1405C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011878.1'} + + assert 'NM_001330088.1:c.1231C>T' in list(results.keys()) + assert results['NM_001330088.1:c.1231C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330088.1:c.1231C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330088.1:c.1231C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330088.1:c.1231C>T']['hgvs_transcript_variant'] == 'NM_001330088.1:c.1231C>T' + assert results['NM_001330088.1:c.1231C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330088.1:c.1231C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330088.1:c.1231C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330088.1:c.1231C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317017.1:p.(Pro411Ser)', 'slr': 'NP_001317017.1:p.(P411S)'} + assert results['NM_001330088.1:c.1231C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330088.1:c.1231C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330088.1:c.1231C>T']['alt_genomic_loci'], []) + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330088.1:c.1231C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330088.1:c.1231C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330088.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317017.1'} assert 'NM_001330083.1:c.1246C>T' in list(results.keys()) - assert results['NM_001330083.1:c.1246C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330083.1:c.1246C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330083.1:c.1246C>T']['alt_genomic_loci'], []) - assert results['NM_001330083.1:c.1246C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330083.1:c.1246C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317012.1:p.(Pro416Ser)', 'slr': 'NP_001317012.1:p.(P416S)'} assert results['NM_001330083.1:c.1246C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330083.1:c.1246C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330083.1:c.1246C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330083.1:c.1246C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330083.1:c.1246C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} assert results['NM_001330083.1:c.1246C>T']['hgvs_transcript_variant'] == 'NM_001330083.1:c.1246C>T' + assert results['NM_001330083.1:c.1246C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330083.1:c.1246C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330083.1:c.1246C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330083.1:c.1246C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1'} - - assert 'NM_004801.5:c.1285C>T' in list(results.keys()) - assert results['NM_004801.5:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_004801.5:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004801.5:c.1285C>T']['alt_genomic_loci'], []) - assert results['NM_004801.5:c.1285C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_004801.5:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro429Ser)', 'slr': 'NP_004792.1:p.(P429S)'} - assert results['NM_004801.5:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_004801.5:c.1285C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_004801.5:c.1285C>T']['hgvs_lrg_variant'] == '' - assert results['NM_004801.5:c.1285C>T']['hgvs_transcript_variant'] == 'NM_004801.5:c.1285C>T' - assert results['NM_004801.5:c.1285C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_004801.5:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_004801.5:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.5'} - - assert 'NM_001330085.1:c.1285C>T' in list(results.keys()) - assert results['NM_001330085.1:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330085.1:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330085.1:c.1285C>T']['alt_genomic_loci'], []) - assert results['NM_001330085.1:c.1285C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330085.1:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317014.1:p.(Pro429Ser)', 'slr': 'NP_001317014.1:p.(P429S)'} - assert results['NM_001330085.1:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330085.1:c.1285C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330085.1:c.1285C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001330085.1:c.1285C>T']['hgvs_transcript_variant'] == 'NM_001330085.1:c.1285C>T' - assert results['NM_001330085.1:c.1285C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330085.1:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330085.1:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330085.1'} + assert results['NM_001330083.1:c.1246C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317012.1:p.(Pro416Ser)', 'slr': 'NP_001317012.1:p.(P416S)'} + assert results['NM_001330083.1:c.1246C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330083.1:c.1246C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330083.1:c.1246C>T']['alt_genomic_loci'], []) + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330083.1:c.1246C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330083.1:c.1246C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330083.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317012.1'} assert 'NM_001330095.1:c.1261C>T' in list(results.keys()) - assert results['NM_001330095.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330095.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330095.1:c.1261C>T']['alt_genomic_loci'], []) - assert results['NM_001330095.1:c.1261C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001330095.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317024.1:p.(Pro421Ser)', 'slr': 'NP_001317024.1:p.(P421S)'} assert results['NM_001330095.1:c.1261C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001330095.1:c.1261C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001330095.1:c.1261C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001330095.1:c.1261C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330095.1:c.1261C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} assert results['NM_001330095.1:c.1261C>T']['hgvs_transcript_variant'] == 'NM_001330095.1:c.1261C>T' + assert results['NM_001330095.1:c.1261C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330095.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001330095.1:c.1261C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001330095.1:c.1261C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1'} - - assert 'NM_004801.4:c.1285C>T' in list(results.keys()) - assert results['NM_004801.4:c.1285C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_004801.4:c.1285C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004801.4:c.1285C>T']['alt_genomic_loci'], []) - assert results['NM_004801.4:c.1285C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_004801.4:c.1285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004792.1:p.(Pro429Ser)', 'slr': 'NP_004792.1:p.(P429S)'} - assert results['NM_004801.4:c.1285C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_004801.4:c.1285C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_004801.4:c.1285C>T']['hgvs_lrg_variant'] == '' - assert results['NM_004801.4:c.1285C>T']['hgvs_transcript_variant'] == 'NM_004801.4:c.1285C>T' - assert results['NM_004801.4:c.1285C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_004801.4:c.1285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_004801.4:c.1285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004792.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004801.4'} + assert results['NM_001330095.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317024.1:p.(Pro421Ser)', 'slr': 'NP_001317024.1:p.(P421S)'} + assert results['NM_001330095.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330095.1:c.1261C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330095.1:c.1261C>T']['alt_genomic_loci'], []) + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330095.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330095.1:c.1261C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330095.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317024.1'} - assert 'NM_001135659.1:c.1405C>T' in list(results.keys()) - assert results['NM_001135659.1:c.1405C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001135659.1:c.1405C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001135659.1:c.1405C>T']['alt_genomic_loci'], []) - assert results['NM_001135659.1:c.1405C>T']['gene_symbol'] == 'NRXN1' - assert results['NM_001135659.1:c.1405C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001129131.1:p.(Pro469Ser)', 'slr': 'NP_001129131.1:p.(P469S)'} - assert results['NM_001135659.1:c.1405C>T']['submitted_variant'] == '2-50847195-G-A' - assert results['NM_001135659.1:c.1405C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001135659.1:c.1405C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001135659.1:c.1405C>T']['hgvs_transcript_variant'] == 'NM_001135659.1:c.1405C>T' - assert results['NM_001135659.1:c.1405C>T']['hgvs_refseqgene_variant'] == 'NG_011878.1:g.417480C>T' - assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50847195', 'alt': 'A'}} - assert results['NM_001135659.1:c.1405C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '50620057', 'alt': 'A'}} - assert results['NM_001135659.1:c.1405C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011878.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001129131.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001135659.1'} + assert 'NM_001330094.1:c.1273C>T' in list(results.keys()) + assert results['NM_001330094.1:c.1273C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330094.1:c.1273C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330094.1:c.1273C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330094.1:c.1273C>T']['hgvs_transcript_variant'] == 'NM_001330094.1:c.1273C>T' + assert results['NM_001330094.1:c.1273C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330094.1:c.1273C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330094.1:c.1273C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330094.1:c.1273C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317023.1:p.(Pro425Ser)', 'slr': 'NP_001317023.1:p.(P425S)'} + assert results['NM_001330094.1:c.1273C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330094.1:c.1273C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330094.1:c.1273C>T']['alt_genomic_loci'], []) + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330094.1:c.1273C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330094.1:c.1273C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330094.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317023.1'} + assert 'NM_001330082.1:c.1261C>T' in list(results.keys()) + assert results['NM_001330082.1:c.1261C>T']['submitted_variant'] == '2-50847195-G-A' + assert results['NM_001330082.1:c.1261C>T']['gene_symbol'] == 'NRXN1' + assert results['NM_001330082.1:c.1261C>T']['gene_ids'] == {'hgnc_id': 'HGNC:8008', 'entrez_gene_id': '9378', 'ucsc_id': 'uc061jbg.1', 'omim_id': ['600565']} + assert results['NM_001330082.1:c.1261C>T']['hgvs_transcript_variant'] == 'NM_001330082.1:c.1261C>T' + assert results['NM_001330082.1:c.1261C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001330082.1:c.1261C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330082.1:c.1261C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330082.1:c.1261C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317011.1:p.(Pro421Ser)', 'slr': 'NP_001317011.1:p.(P421S)'} + assert results['NM_001330082.1:c.1261C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330082.1:c.1261C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330082.1:c.1261C>T']['alt_genomic_loci'], []) + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': 'chr2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': 'chr2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.50847195G>A', 'vcf': {'chr': '2', 'pos': '50847195', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330082.1:c.1261C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.50620057G>A', 'vcf': {'chr': '2', 'pos': '50620057', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001330082.1:c.1261C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330082.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317011.1'} def test_variant269(self): variant = '2-71825797-C-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_001130985.1:c.3678C>G' in list(results.keys()) + assert results['NM_001130985.1:c.3678C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130985.1:c.3678C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130985.1:c.3678C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} + assert results['NM_001130985.1:c.3678C>G']['hgvs_transcript_variant'] == 'NM_001130985.1:c.3678C>G' + assert results['NM_001130985.1:c.3678C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130985.1:c.3678C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130985.1:c.3678C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130985.1:c.3678C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124457.1:p.(Ile1226Met)', 'slr': 'NP_001124457.1:p.(I1226M)'} + assert results['NM_001130985.1:c.3678C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130985.1:c.3678C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130985.1:c.3678C>G']['alt_genomic_loci'], []) + assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130985.1:c.3678C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130985.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124457.1'} + + assert 'NM_001130981.1:c.3675C>G' in list(results.keys()) + assert results['NM_001130981.1:c.3675C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130981.1:c.3675C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130981.1:c.3675C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} + assert results['NM_001130981.1:c.3675C>G']['hgvs_transcript_variant'] == 'NM_001130981.1:c.3675C>G' + assert results['NM_001130981.1:c.3675C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130981.1:c.3675C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130981.1:c.3675C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130981.1:c.3675C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124453.1:p.(Ile1225Met)', 'slr': 'NP_001124453.1:p.(I1225M)'} + assert results['NM_001130981.1:c.3675C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130981.1:c.3675C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130981.1:c.3675C>G']['alt_genomic_loci'], []) + assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130981.1:c.3675C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130981.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124453.1'} + assert 'NM_001130976.1:c.3582C>G' in list(results.keys()) - assert results['NM_001130976.1:c.3582C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130976.1:c.3582C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130976.1:c.3582C>G']['alt_genomic_loci'], []) - assert results['NM_001130976.1:c.3582C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130976.1:c.3582C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124448.1:p.(Ile1194Met)', 'slr': 'NP_001124448.1:p.(I1194M)'} assert results['NM_001130976.1:c.3582C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130976.1:c.3582C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130976.1:c.3582C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130976.1:c.3582C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130976.1:c.3582C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} assert results['NM_001130976.1:c.3582C>G']['hgvs_transcript_variant'] == 'NM_001130976.1:c.3582C>G' + assert results['NM_001130976.1:c.3582C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130976.1:c.3582C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130976.1:c.3582C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130976.1:c.3582C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124448.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130976.1'} + assert results['NM_001130976.1:c.3582C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124448.1:p.(Ile1194Met)', 'slr': 'NP_001124448.1:p.(I1194M)'} + assert results['NM_001130976.1:c.3582C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130976.1:c.3582C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130976.1:c.3582C>G']['alt_genomic_loci'], []) + assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130976.1:c.3582C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130976.1:c.3582C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130976.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124448.1'} + + assert 'NM_001130978.1:c.3624C>G' in list(results.keys()) + assert results['NM_001130978.1:c.3624C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130978.1:c.3624C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130978.1:c.3624C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} + assert results['NM_001130978.1:c.3624C>G']['hgvs_transcript_variant'] == 'NM_001130978.1:c.3624C>G' + assert results['NM_001130978.1:c.3624C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130978.1:c.3624C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130978.1:c.3624C>G']['hgvs_refseqgene_variant'] == 'NG_008694.1:g.150045C>G' + assert results['NM_001130978.1:c.3624C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124450.1:p.(Ile1208Met)', 'slr': 'NP_001124450.1:p.(I1208M)'} + assert results['NM_001130978.1:c.3624C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130978.1:c.3624C>G']['hgvs_lrg_variant'] == 'LRG_845:g.150045C>G' + self.assertCountEqual(results['NM_001130978.1:c.3624C>G']['alt_genomic_loci'], []) + assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130978.1:c.3624C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130978.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124450.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008694.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_845.xml'} + + assert 'NM_001130987.1:c.3678C>G' in list(results.keys()) + assert results['NM_001130987.1:c.3678C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130987.1:c.3678C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130987.1:c.3678C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} + assert results['NM_001130987.1:c.3678C>G']['hgvs_transcript_variant'] == 'NM_001130987.1:c.3678C>G' + assert results['NM_001130987.1:c.3678C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130987.1:c.3678C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130987.1:c.3678C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130987.1:c.3678C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124459.1(LRG_845p2):p.(Ile1226Met)', 'slr': 'NP_001124459.1:p.(I1226M)'} + assert results['NM_001130987.1:c.3678C>G']['hgvs_lrg_transcript_variant'] == 'LRG_845t2:c.3678C>G' + assert results['NM_001130987.1:c.3678C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130987.1:c.3678C>G']['alt_genomic_loci'], []) + assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130987.1:c.3678C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130987.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124459.1'} + + assert 'NM_001130982.1:c.3720C>G' in list(results.keys()) + assert results['NM_001130982.1:c.3720C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130982.1:c.3720C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130982.1:c.3720C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} + assert results['NM_001130982.1:c.3720C>G']['hgvs_transcript_variant'] == 'NM_001130982.1:c.3720C>G' + assert results['NM_001130982.1:c.3720C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130982.1:c.3720C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130982.1:c.3720C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130982.1:c.3720C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124454.1:p.(Ile1240Met)', 'slr': 'NP_001124454.1:p.(I1240M)'} + assert results['NM_001130982.1:c.3720C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130982.1:c.3720C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130982.1:c.3720C>G']['alt_genomic_loci'], []) + assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130982.1:c.3720C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130982.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124454.1'} + + assert 'NM_001130977.1:c.3582C>G' in list(results.keys()) + assert results['NM_001130977.1:c.3582C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130977.1:c.3582C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130977.1:c.3582C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} + assert results['NM_001130977.1:c.3582C>G']['hgvs_transcript_variant'] == 'NM_001130977.1:c.3582C>G' + assert results['NM_001130977.1:c.3582C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130977.1:c.3582C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130977.1:c.3582C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130977.1:c.3582C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124449.1:p.(Ile1194Met)', 'slr': 'NP_001124449.1:p.(I1194M)'} + assert results['NM_001130977.1:c.3582C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130977.1:c.3582C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130977.1:c.3582C>G']['alt_genomic_loci'], []) + assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130977.1:c.3582C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130977.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124449.1'} - assert 'NM_001130981.1:c.3675C>G' in list(results.keys()) - assert results['NM_001130981.1:c.3675C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130981.1:c.3675C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130981.1:c.3675C>G']['alt_genomic_loci'], []) - assert results['NM_001130981.1:c.3675C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130981.1:c.3675C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124453.1:p.(Ile1225Met)', 'slr': 'NP_001124453.1:p.(I1225M)'} - assert results['NM_001130981.1:c.3675C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130981.1:c.3675C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130981.1:c.3675C>G']['hgvs_lrg_variant'] == '' - assert results['NM_001130981.1:c.3675C>G']['hgvs_transcript_variant'] == 'NM_001130981.1:c.3675C>G' - assert results['NM_001130981.1:c.3675C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130981.1:c.3675C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130981.1:c.3675C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124453.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130981.1'} + assert 'NM_001130984.1:c.3585C>G' in list(results.keys()) + assert results['NM_001130984.1:c.3585C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130984.1:c.3585C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130984.1:c.3585C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} + assert results['NM_001130984.1:c.3585C>G']['hgvs_transcript_variant'] == 'NM_001130984.1:c.3585C>G' + assert results['NM_001130984.1:c.3585C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130984.1:c.3585C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130984.1:c.3585C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130984.1:c.3585C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124456.1:p.(Ile1195Met)', 'slr': 'NP_001124456.1:p.(I1195M)'} + assert results['NM_001130984.1:c.3585C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130984.1:c.3585C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130984.1:c.3585C>G']['alt_genomic_loci'], []) + assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130984.1:c.3585C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130984.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124456.1'} + + assert 'NM_001130986.1:c.3585C>G' in list(results.keys()) + assert results['NM_001130986.1:c.3585C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130986.1:c.3585C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130986.1:c.3585C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} + assert results['NM_001130986.1:c.3585C>G']['hgvs_transcript_variant'] == 'NM_001130986.1:c.3585C>G' + assert results['NM_001130986.1:c.3585C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130986.1:c.3585C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130986.1:c.3585C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130986.1:c.3585C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124458.1:p.(Ile1195Met)', 'slr': 'NP_001124458.1:p.(I1195M)'} + assert results['NM_001130986.1:c.3585C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130986.1:c.3585C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130986.1:c.3585C>G']['alt_genomic_loci'], []) + assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130986.1:c.3585C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130986.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124458.1'} assert 'NM_001130979.1:c.3717C>G' in list(results.keys()) - assert results['NM_001130979.1:c.3717C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130979.1:c.3717C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130979.1:c.3717C>G']['alt_genomic_loci'], []) - assert results['NM_001130979.1:c.3717C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130979.1:c.3717C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124451.1:p.(Ile1239Met)', 'slr': 'NP_001124451.1:p.(I1239M)'} assert results['NM_001130979.1:c.3717C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130979.1:c.3717C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130979.1:c.3717C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130979.1:c.3717C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130979.1:c.3717C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} assert results['NM_001130979.1:c.3717C>G']['hgvs_transcript_variant'] == 'NM_001130979.1:c.3717C>G' + assert results['NM_001130979.1:c.3717C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130979.1:c.3717C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130979.1:c.3717C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130979.1:c.3717C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124451.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130979.1'} - - assert 'NM_001130985.1:c.3678C>G' in list(results.keys()) - assert results['NM_001130985.1:c.3678C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130985.1:c.3678C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130985.1:c.3678C>G']['alt_genomic_loci'], []) - assert results['NM_001130985.1:c.3678C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130985.1:c.3678C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124457.1:p.(Ile1226Met)', 'slr': 'NP_001124457.1:p.(I1226M)'} - assert results['NM_001130985.1:c.3678C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130985.1:c.3678C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130985.1:c.3678C>G']['hgvs_lrg_variant'] == '' - assert results['NM_001130985.1:c.3678C>G']['hgvs_transcript_variant'] == 'NM_001130985.1:c.3678C>G' - assert results['NM_001130985.1:c.3678C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130985.1:c.3678C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130985.1:c.3678C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124457.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130985.1'} + assert results['NM_001130979.1:c.3717C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124451.1:p.(Ile1239Met)', 'slr': 'NP_001124451.1:p.(I1239M)'} + assert results['NM_001130979.1:c.3717C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130979.1:c.3717C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130979.1:c.3717C>G']['alt_genomic_loci'], []) + assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130979.1:c.3717C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130979.1:c.3717C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130979.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124451.1'} - assert 'NM_001130987.1:c.3678C>G' in list(results.keys()) - assert results['NM_001130987.1:c.3678C>G']['hgvs_lrg_transcript_variant'] == 'LRG_845t2:c.3678C>G' - assert results['NM_001130987.1:c.3678C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130987.1:c.3678C>G']['alt_genomic_loci'], []) - assert results['NM_001130987.1:c.3678C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130987.1:c.3678C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124459.1(LRG_845p2):p.(Ile1226Met)', 'slr': 'NP_001124459.1:p.(I1226M)'} - assert results['NM_001130987.1:c.3678C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130987.1:c.3678C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130987.1:c.3678C>G']['hgvs_lrg_variant'] == '' - assert results['NM_001130987.1:c.3678C>G']['hgvs_transcript_variant'] == 'NM_001130987.1:c.3678C>G' - assert results['NM_001130987.1:c.3678C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130987.1:c.3678C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130987.1:c.3678C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124459.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130987.1'} + assert 'NM_001130455.1:c.3627C>G' in list(results.keys()) + assert results['NM_001130455.1:c.3627C>G']['submitted_variant'] == '2-71825797-C-G' + assert results['NM_001130455.1:c.3627C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130455.1:c.3627C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} + assert results['NM_001130455.1:c.3627C>G']['hgvs_transcript_variant'] == 'NM_001130455.1:c.3627C>G' + assert results['NM_001130455.1:c.3627C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130455.1:c.3627C>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001130455.1:c.3627C>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001130455.1:c.3627C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001123927.1:p.(Ile1209Met)', 'slr': 'NP_001123927.1:p.(I1209M)'} + assert results['NM_001130455.1:c.3627C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130455.1:c.3627C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130455.1:c.3627C>G']['alt_genomic_loci'], []) + assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130455.1:c.3627C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130455.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001123927.1'} assert 'NM_001130983.1:c.3627C>G' in list(results.keys()) - assert results['NM_001130983.1:c.3627C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130983.1:c.3627C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130983.1:c.3627C>G']['alt_genomic_loci'], []) - assert results['NM_001130983.1:c.3627C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130983.1:c.3627C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124455.1:p.(Ile1209Met)', 'slr': 'NP_001124455.1:p.(I1209M)'} assert results['NM_001130983.1:c.3627C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130983.1:c.3627C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130983.1:c.3627C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130983.1:c.3627C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130983.1:c.3627C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} assert results['NM_001130983.1:c.3627C>G']['hgvs_transcript_variant'] == 'NM_001130983.1:c.3627C>G' + assert results['NM_001130983.1:c.3627C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130983.1:c.3627C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130983.1:c.3627C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130983.1:c.3627C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124455.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130983.1'} + assert results['NM_001130983.1:c.3627C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124455.1:p.(Ile1209Met)', 'slr': 'NP_001124455.1:p.(I1209M)'} + assert results['NM_001130983.1:c.3627C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130983.1:c.3627C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130983.1:c.3627C>G']['alt_genomic_loci'], []) + assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130983.1:c.3627C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130983.1:c.3627C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130983.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124455.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001130980.1:c.3675C>G' in list(results.keys()) - assert results['NM_001130980.1:c.3675C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130980.1:c.3675C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130980.1:c.3675C>G']['alt_genomic_loci'], []) - assert results['NM_001130980.1:c.3675C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130980.1:c.3675C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124452.1:p.(Ile1225Met)', 'slr': 'NP_001124452.1:p.(I1225M)'} assert results['NM_001130980.1:c.3675C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130980.1:c.3675C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130980.1:c.3675C>G']['hgvs_lrg_variant'] == '' + assert results['NM_001130980.1:c.3675C>G']['gene_symbol'] == 'DYSF' + assert results['NM_001130980.1:c.3675C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} assert results['NM_001130980.1:c.3675C>G']['hgvs_transcript_variant'] == 'NM_001130980.1:c.3675C>G' + assert results['NM_001130980.1:c.3675C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001130980.1:c.3675C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001130980.1:c.3675C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130980.1:c.3675C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124452.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130980.1'} + assert results['NM_001130980.1:c.3675C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124452.1:p.(Ile1225Met)', 'slr': 'NP_001124452.1:p.(I1225M)'} + assert results['NM_001130980.1:c.3675C>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001130980.1:c.3675C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001130980.1:c.3675C>G']['alt_genomic_loci'], []) + assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130980.1:c.3675C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_001130980.1:c.3675C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130980.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124452.1'} assert 'NM_003494.3:c.3624C>G' in list(results.keys()) - assert results['NM_003494.3:c.3624C>G']['hgvs_lrg_transcript_variant'] == 'LRG_845t1:c.3624C>G' - assert results['NM_003494.3:c.3624C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003494.3:c.3624C>G']['alt_genomic_loci'], []) - assert results['NM_003494.3:c.3624C>G']['gene_symbol'] == 'DYSF' - assert results['NM_003494.3:c.3624C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003485.1(LRG_845p1):p.(Ile1208Met)', 'slr': 'NP_003485.1:p.(I1208M)'} assert results['NM_003494.3:c.3624C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_003494.3:c.3624C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_003494.3:c.3624C>G']['hgvs_lrg_variant'] == '' + assert results['NM_003494.3:c.3624C>G']['gene_symbol'] == 'DYSF' + assert results['NM_003494.3:c.3624C>G']['gene_ids'] == {'hgnc_id': 'HGNC:3097', 'entrez_gene_id': '8291', 'ucsc_id': 'uc002sie.4', 'omim_id': ['603009']} assert results['NM_003494.3:c.3624C>G']['hgvs_transcript_variant'] == 'NM_003494.3:c.3624C>G' + assert results['NM_003494.3:c.3624C>G']['genome_context_intronic_sequence'] == '' + assert results['NM_003494.3:c.3624C>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003494.3:c.3624C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_003494.3:c.3624C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003485.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003494.3'} - - assert 'NM_001130984.1:c.3585C>G' in list(results.keys()) - assert results['NM_001130984.1:c.3585C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130984.1:c.3585C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130984.1:c.3585C>G']['alt_genomic_loci'], []) - assert results['NM_001130984.1:c.3585C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130984.1:c.3585C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124456.1:p.(Ile1195Met)', 'slr': 'NP_001124456.1:p.(I1195M)'} - assert results['NM_001130984.1:c.3585C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130984.1:c.3585C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130984.1:c.3585C>G']['hgvs_lrg_variant'] == '' - assert results['NM_001130984.1:c.3585C>G']['hgvs_transcript_variant'] == 'NM_001130984.1:c.3585C>G' - assert results['NM_001130984.1:c.3585C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130984.1:c.3585C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130984.1:c.3585C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124456.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130984.1'} - - assert 'NM_001130977.1:c.3582C>G' in list(results.keys()) - assert results['NM_001130977.1:c.3582C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130977.1:c.3582C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130977.1:c.3582C>G']['alt_genomic_loci'], []) - assert results['NM_001130977.1:c.3582C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130977.1:c.3582C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124449.1:p.(Ile1194Met)', 'slr': 'NP_001124449.1:p.(I1194M)'} - assert results['NM_001130977.1:c.3582C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130977.1:c.3582C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130977.1:c.3582C>G']['hgvs_lrg_variant'] == '' - assert results['NM_001130977.1:c.3582C>G']['hgvs_transcript_variant'] == 'NM_001130977.1:c.3582C>G' - assert results['NM_001130977.1:c.3582C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130977.1:c.3582C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130977.1:c.3582C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124449.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130977.1'} - - assert 'NM_001130455.1:c.3627C>G' in list(results.keys()) - assert results['NM_001130455.1:c.3627C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130455.1:c.3627C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130455.1:c.3627C>G']['alt_genomic_loci'], []) - assert results['NM_001130455.1:c.3627C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130455.1:c.3627C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001123927.1:p.(Ile1209Met)', 'slr': 'NP_001123927.1:p.(I1209M)'} - assert results['NM_001130455.1:c.3627C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130455.1:c.3627C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130455.1:c.3627C>G']['hgvs_lrg_variant'] == '' - assert results['NM_001130455.1:c.3627C>G']['hgvs_transcript_variant'] == 'NM_001130455.1:c.3627C>G' - assert results['NM_001130455.1:c.3627C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130455.1:c.3627C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130455.1:c.3627C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001123927.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130455.1'} - - assert 'NM_001130978.1:c.3624C>G' in list(results.keys()) - assert results['NM_001130978.1:c.3624C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130978.1:c.3624C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130978.1:c.3624C>G']['alt_genomic_loci'], []) - assert results['NM_001130978.1:c.3624C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130978.1:c.3624C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124450.1:p.(Ile1208Met)', 'slr': 'NP_001124450.1:p.(I1208M)'} - assert results['NM_001130978.1:c.3624C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130978.1:c.3624C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130978.1:c.3624C>G']['hgvs_lrg_variant'] == 'LRG_845:g.150045C>G' - assert results['NM_001130978.1:c.3624C>G']['hgvs_transcript_variant'] == 'NM_001130978.1:c.3624C>G' - assert results['NM_001130978.1:c.3624C>G']['hgvs_refseqgene_variant'] == 'NG_008694.1:g.150045C>G' - assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130978.1:c.3624C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130978.1:c.3624C>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008694.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124450.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130978.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_845.xml'} - - assert 'NM_001130982.1:c.3720C>G' in list(results.keys()) - assert results['NM_001130982.1:c.3720C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130982.1:c.3720C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130982.1:c.3720C>G']['alt_genomic_loci'], []) - assert results['NM_001130982.1:c.3720C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130982.1:c.3720C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124454.1:p.(Ile1240Met)', 'slr': 'NP_001124454.1:p.(I1240M)'} - assert results['NM_001130982.1:c.3720C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130982.1:c.3720C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130982.1:c.3720C>G']['hgvs_lrg_variant'] == '' - assert results['NM_001130982.1:c.3720C>G']['hgvs_transcript_variant'] == 'NM_001130982.1:c.3720C>G' - assert results['NM_001130982.1:c.3720C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130982.1:c.3720C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130982.1:c.3720C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124454.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130982.1'} - - assert 'NM_001130986.1:c.3585C>G' in list(results.keys()) - assert results['NM_001130986.1:c.3585C>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001130986.1:c.3585C>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001130986.1:c.3585C>G']['alt_genomic_loci'], []) - assert results['NM_001130986.1:c.3585C>G']['gene_symbol'] == 'DYSF' - assert results['NM_001130986.1:c.3585C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001124458.1:p.(Ile1195Met)', 'slr': 'NP_001124458.1:p.(I1195M)'} - assert results['NM_001130986.1:c.3585C>G']['submitted_variant'] == '2-71825797-C-G' - assert results['NM_001130986.1:c.3585C>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001130986.1:c.3585C>G']['hgvs_lrg_variant'] == '' - assert results['NM_001130986.1:c.3585C>G']['hgvs_transcript_variant'] == 'NM_001130986.1:c.3585C>G' - assert results['NM_001130986.1:c.3585C>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71825797', 'alt': 'G'}} - assert results['NM_001130986.1:c.3585C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '71598667', 'alt': 'G'}} - assert results['NM_001130986.1:c.3585C>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001124458.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001130986.1'} - + assert results['NM_003494.3:c.3624C>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003485.1(LRG_845p1):p.(Ile1208Met)', 'slr': 'NP_003485.1:p.(I1208M)'} + assert results['NM_003494.3:c.3624C>G']['hgvs_lrg_transcript_variant'] == 'LRG_845t1:c.3624C>G' + assert results['NM_003494.3:c.3624C>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003494.3:c.3624C>G']['alt_genomic_loci'], []) + assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': 'chr2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': 'chr2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.71825797C>G', 'vcf': {'chr': '2', 'pos': '71825797', 'ref': 'C', 'alt': 'G'}} + assert results['NM_003494.3:c.3624C>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.71598667C>G', 'vcf': {'chr': '2', 'pos': '71598667', 'ref': 'C', 'alt': 'G'}} + assert results['NM_003494.3:c.3624C>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003494.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003485.1'} def test_variant270(self): variant = '2-166179712-G-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_001040142.1:c.1718G>C' in list(results.keys()) + assert results['NM_001040142.1:c.1718G>C']['submitted_variant'] == '2-166179712-G-C' + assert results['NM_001040142.1:c.1718G>C']['gene_symbol'] == 'SCN2A' + assert results['NM_001040142.1:c.1718G>C']['gene_ids'] == {'hgnc_id': 'HGNC:10588', 'entrez_gene_id': '6326', 'ucsc_id': 'uc061pdo.1', 'omim_id': ['182390']} + assert results['NM_001040142.1:c.1718G>C']['hgvs_transcript_variant'] == 'NM_001040142.1:c.1718G>C' + assert results['NM_001040142.1:c.1718G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001040142.1:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040142.1:c.1718G>C']['hgvs_refseqgene_variant'] == 'NG_008143.1:g.88801G>C' + assert results['NM_001040142.1:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035232.1:p.(Ser573Thr)', 'slr': 'NP_001035232.1:p.(S573T)'} + assert results['NM_001040142.1:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040142.1:c.1718G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001040142.1:c.1718G>C']['alt_genomic_loci'], []) + assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': 'chr2', 'pos': '166179712', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': 'chr2', 'pos': '165323202', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': '2', 'pos': '166179712', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': '2', 'pos': '165323202', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001040142.1:c.1718G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040142.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035232.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008143.1'} + assert 'NM_021007.2:c.1718G>C' in list(results.keys()) - assert results['NM_021007.2:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021007.2:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021007.2:c.1718G>C']['alt_genomic_loci'], []) - assert results['NM_021007.2:c.1718G>C']['gene_symbol'] == 'SCN2A' - assert results['NM_021007.2:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066287.2:p.(Ser573Thr)', 'slr': 'NP_066287.2:p.(S573T)'} assert results['NM_021007.2:c.1718G>C']['submitted_variant'] == '2-166179712-G-C' - assert results['NM_021007.2:c.1718G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_021007.2:c.1718G>C']['hgvs_lrg_variant'] == '' + assert results['NM_021007.2:c.1718G>C']['gene_symbol'] == 'SCN2A' + assert results['NM_021007.2:c.1718G>C']['gene_ids'] == {'hgnc_id': 'HGNC:10588', 'entrez_gene_id': '6326', 'ucsc_id': 'uc061pdo.1', 'omim_id': ['182390']} assert results['NM_021007.2:c.1718G>C']['hgvs_transcript_variant'] == 'NM_021007.2:c.1718G>C' + assert results['NM_021007.2:c.1718G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_021007.2:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021007.2:c.1718G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} - assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} - assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} - assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} - assert results['NM_021007.2:c.1718G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2'} + assert results['NM_021007.2:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066287.2:p.(Ser573Thr)', 'slr': 'NP_066287.2:p.(S573T)'} + assert results['NM_021007.2:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021007.2:c.1718G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021007.2:c.1718G>C']['alt_genomic_loci'], []) + assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': 'chr2', 'pos': '166179712', 'ref': 'G', 'alt': 'C'}} + assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': 'chr2', 'pos': '165323202', 'ref': 'G', 'alt': 'C'}} + assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': '2', 'pos': '166179712', 'ref': 'G', 'alt': 'C'}} + assert results['NM_021007.2:c.1718G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': '2', 'pos': '165323202', 'ref': 'G', 'alt': 'C'}} + assert results['NM_021007.2:c.1718G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2'} - assert results['flag'] == 'gene_variant' assert 'NM_001040143.1:c.1718G>C' in list(results.keys()) - assert results['NM_001040143.1:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001040143.1:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001040143.1:c.1718G>C']['alt_genomic_loci'], []) - assert results['NM_001040143.1:c.1718G>C']['gene_symbol'] == 'SCN2A' - assert results['NM_001040143.1:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035233.1:p.(Ser573Thr)', 'slr': 'NP_001035233.1:p.(S573T)'} assert results['NM_001040143.1:c.1718G>C']['submitted_variant'] == '2-166179712-G-C' - assert results['NM_001040143.1:c.1718G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_001040143.1:c.1718G>C']['hgvs_lrg_variant'] == '' + assert results['NM_001040143.1:c.1718G>C']['gene_symbol'] == 'SCN2A' + assert results['NM_001040143.1:c.1718G>C']['gene_ids'] == {'hgnc_id': 'HGNC:10588', 'entrez_gene_id': '6326', 'ucsc_id': 'uc061pdo.1', 'omim_id': ['182390']} assert results['NM_001040143.1:c.1718G>C']['hgvs_transcript_variant'] == 'NM_001040143.1:c.1718G>C' + assert results['NM_001040143.1:c.1718G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001040143.1:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040143.1:c.1718G>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} - assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} - assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} - assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} - assert results['NM_001040143.1:c.1718G>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1'} - - assert 'NM_001040142.1:c.1718G>C' in list(results.keys()) - assert results['NM_001040142.1:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001040142.1:c.1718G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001040142.1:c.1718G>C']['alt_genomic_loci'], []) - assert results['NM_001040142.1:c.1718G>C']['gene_symbol'] == 'SCN2A' - assert results['NM_001040142.1:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035232.1:p.(Ser573Thr)', 'slr': 'NP_001035232.1:p.(S573T)'} - assert results['NM_001040142.1:c.1718G>C']['submitted_variant'] == '2-166179712-G-C' - assert results['NM_001040142.1:c.1718G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_001040142.1:c.1718G>C']['hgvs_lrg_variant'] == '' - assert results['NM_001040142.1:c.1718G>C']['hgvs_transcript_variant'] == 'NM_001040142.1:c.1718G>C' - assert results['NM_001040142.1:c.1718G>C']['hgvs_refseqgene_variant'] == 'NG_008143.1:g.88801G>C' - assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} - assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} - assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '166179712', 'alt': 'C'}} - assert results['NM_001040142.1:c.1718G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '165323202', 'alt': 'C'}} - assert results['NM_001040142.1:c.1718G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008143.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035232.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040142.1'} - + assert results['NM_001040143.1:c.1718G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035233.1:p.(Ser573Thr)', 'slr': 'NP_001035233.1:p.(S573T)'} + assert results['NM_001040143.1:c.1718G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040143.1:c.1718G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001040143.1:c.1718G>C']['alt_genomic_loci'], []) + assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': 'chr2', 'pos': '166179712', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': 'chr2', 'pos': '165323202', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166179712G>C', 'vcf': {'chr': '2', 'pos': '166179712', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001040143.1:c.1718G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165323202G>C', 'vcf': {'chr': '2', 'pos': '165323202', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001040143.1:c.1718G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1'} def test_variant271(self): variant = '2-166183371-A-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_001040142.1:c.2026A>G' in list(results.keys()) + assert results['NM_001040142.1:c.2026A>G']['submitted_variant'] == '2-166183371-A-G' + assert results['NM_001040142.1:c.2026A>G']['gene_symbol'] == 'SCN2A' + assert results['NM_001040142.1:c.2026A>G']['gene_ids'] == {'hgnc_id': 'HGNC:10588', 'entrez_gene_id': '6326', 'ucsc_id': 'uc061pdo.1', 'omim_id': ['182390']} + assert results['NM_001040142.1:c.2026A>G']['hgvs_transcript_variant'] == 'NM_001040142.1:c.2026A>G' + assert results['NM_001040142.1:c.2026A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001040142.1:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001040142.1:c.2026A>G']['hgvs_refseqgene_variant'] == 'NG_008143.1:g.92460A>G' + assert results['NM_001040142.1:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035232.1:p.(Thr676Ala)', 'slr': 'NP_001035232.1:p.(T676A)'} + assert results['NM_001040142.1:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040142.1:c.2026A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001040142.1:c.2026A>G']['alt_genomic_loci'], []) + assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': 'chr2', 'pos': '166183371', 'ref': 'A', 'alt': 'G'}} + assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': 'chr2', 'pos': '165326861', 'ref': 'A', 'alt': 'G'}} + assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': '2', 'pos': '166183371', 'ref': 'A', 'alt': 'G'}} + assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': '2', 'pos': '165326861', 'ref': 'A', 'alt': 'G'}} + assert results['NM_001040142.1:c.2026A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040142.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035232.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008143.1'} + assert 'NM_021007.2:c.2026A>G' in list(results.keys()) - assert results['NM_021007.2:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021007.2:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021007.2:c.2026A>G']['alt_genomic_loci'], []) - assert results['NM_021007.2:c.2026A>G']['gene_symbol'] == 'SCN2A' - assert results['NM_021007.2:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066287.2:p.(Thr676Ala)', 'slr': 'NP_066287.2:p.(T676A)'} assert results['NM_021007.2:c.2026A>G']['submitted_variant'] == '2-166183371-A-G' - assert results['NM_021007.2:c.2026A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_021007.2:c.2026A>G']['hgvs_lrg_variant'] == '' + assert results['NM_021007.2:c.2026A>G']['gene_symbol'] == 'SCN2A' + assert results['NM_021007.2:c.2026A>G']['gene_ids'] == {'hgnc_id': 'HGNC:10588', 'entrez_gene_id': '6326', 'ucsc_id': 'uc061pdo.1', 'omim_id': ['182390']} assert results['NM_021007.2:c.2026A>G']['hgvs_transcript_variant'] == 'NM_021007.2:c.2026A>G' + assert results['NM_021007.2:c.2026A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_021007.2:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021007.2:c.2026A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} - assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} - assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} - assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} - assert results['NM_021007.2:c.2026A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2'} + assert results['NM_021007.2:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_066287.2:p.(Thr676Ala)', 'slr': 'NP_066287.2:p.(T676A)'} + assert results['NM_021007.2:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021007.2:c.2026A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021007.2:c.2026A>G']['alt_genomic_loci'], []) + assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': 'chr2', 'pos': '166183371', 'ref': 'A', 'alt': 'G'}} + assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': 'chr2', 'pos': '165326861', 'ref': 'A', 'alt': 'G'}} + assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': '2', 'pos': '166183371', 'ref': 'A', 'alt': 'G'}} + assert results['NM_021007.2:c.2026A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': '2', 'pos': '165326861', 'ref': 'A', 'alt': 'G'}} + assert results['NM_021007.2:c.2026A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021007.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_066287.2'} - assert results['flag'] == 'gene_variant' assert 'NM_001040143.1:c.2026A>G' in list(results.keys()) - assert results['NM_001040143.1:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001040143.1:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001040143.1:c.2026A>G']['alt_genomic_loci'], []) - assert results['NM_001040143.1:c.2026A>G']['gene_symbol'] == 'SCN2A' - assert results['NM_001040143.1:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035233.1:p.(Thr676Ala)', 'slr': 'NP_001035233.1:p.(T676A)'} assert results['NM_001040143.1:c.2026A>G']['submitted_variant'] == '2-166183371-A-G' - assert results['NM_001040143.1:c.2026A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001040143.1:c.2026A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001040143.1:c.2026A>G']['gene_symbol'] == 'SCN2A' + assert results['NM_001040143.1:c.2026A>G']['gene_ids'] == {'hgnc_id': 'HGNC:10588', 'entrez_gene_id': '6326', 'ucsc_id': 'uc061pdo.1', 'omim_id': ['182390']} assert results['NM_001040143.1:c.2026A>G']['hgvs_transcript_variant'] == 'NM_001040143.1:c.2026A>G' + assert results['NM_001040143.1:c.2026A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001040143.1:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001040143.1:c.2026A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} - assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} - assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} - assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} - assert results['NM_001040143.1:c.2026A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1'} - - assert 'NM_001040142.1:c.2026A>G' in list(results.keys()) - assert results['NM_001040142.1:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001040142.1:c.2026A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001040142.1:c.2026A>G']['alt_genomic_loci'], []) - assert results['NM_001040142.1:c.2026A>G']['gene_symbol'] == 'SCN2A' - assert results['NM_001040142.1:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035232.1:p.(Thr676Ala)', 'slr': 'NP_001035232.1:p.(T676A)'} - assert results['NM_001040142.1:c.2026A>G']['submitted_variant'] == '2-166183371-A-G' - assert results['NM_001040142.1:c.2026A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001040142.1:c.2026A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001040142.1:c.2026A>G']['hgvs_transcript_variant'] == 'NM_001040142.1:c.2026A>G' - assert results['NM_001040142.1:c.2026A>G']['hgvs_refseqgene_variant'] == 'NG_008143.1:g.92460A>G' - assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} - assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': 'chr2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} - assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '166183371', 'alt': 'G'}} - assert results['NM_001040142.1:c.2026A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': '2', 'ref': 'A', 'pos': '165326861', 'alt': 'G'}} - assert results['NM_001040142.1:c.2026A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008143.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035232.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040142.1'} - + assert results['NM_001040143.1:c.2026A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001035233.1:p.(Thr676Ala)', 'slr': 'NP_001035233.1:p.(T676A)'} + assert results['NM_001040143.1:c.2026A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001040143.1:c.2026A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001040143.1:c.2026A>G']['alt_genomic_loci'], []) + assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': 'chr2', 'pos': '166183371', 'ref': 'A', 'alt': 'G'}} + assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': 'chr2', 'pos': '165326861', 'ref': 'A', 'alt': 'G'}} + assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166183371A>G', 'vcf': {'chr': '2', 'pos': '166183371', 'ref': 'A', 'alt': 'G'}} + assert results['NM_001040143.1:c.2026A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.165326861A>G', 'vcf': {'chr': '2', 'pos': '165326861', 'ref': 'A', 'alt': 'G'}} + assert results['NM_001040143.1:c.2026A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001040143.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001035233.1'} def test_variant272(self): variant = '2-166929889-GTCCAGGTCCT-GAC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_001353951.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353951.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353951.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001353951.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340880.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340880.1:p.(E78Gfs*7)'} assert results['NM_001353951.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001353951.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353951.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353951.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353951.1:c.233_242delinsGT' + assert results['NM_001353951.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353951.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353951.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1'} + assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340880.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340880.1:p.(E78Gfs*7)'} + assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353951.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353951.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353951.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1'} + + assert 'NM_001353952.1:c.233_242delinsGT' in list(results.keys()) + assert results['NM_001353952.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353952.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353952.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353952.1:c.233_242delinsGT' + assert results['NM_001353952.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353952.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340881.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340881.1:p.(E78Gfs*7)'} + assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353952.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353952.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1'} assert 'NM_001353958.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353958.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353958.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001353958.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340887.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340887.1:p.(E78Gfs*7)'} assert results['NM_001353958.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001353958.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353958.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353958.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353958.1:c.233_242delinsGT' + assert results['NM_001353958.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353958.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353958.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1'} - - assert 'NM_001202435.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001202435.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001202435.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001202435.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} - assert results['NM_001202435.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001202435.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' - assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001202435.1:c.233_242delinsGT' - assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001202435.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1'} + assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340887.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340887.1:p.(E78Gfs*7)'} + assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353958.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353958.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353958.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1'} - assert 'NR_148667.1:n.638_647delinsGT' in list(results.keys()) - assert results['NR_148667.1:n.638_647delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_148667.1:n.638_647delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_148667.1:n.638_647delinsGT']['alt_genomic_loci'], []) - assert results['NR_148667.1:n.638_647delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NR_148667.1:n.638_647delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_148667.1:n.638_647delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NR_148667.1:n.638_647delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NR_148667.1:n.638_647delinsGT']['hgvs_lrg_variant'] == '' - assert results['NR_148667.1:n.638_647delinsGT']['hgvs_transcript_variant'] == 'NR_148667.1:n.638_647delinsGT' - assert results['NR_148667.1:n.638_647delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NR_148667.1:n.638_647delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1'} + assert 'NM_001165964.2:c.233_242delinsGT' in list(results.keys()) + assert results['NM_001165964.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001165964.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001165964.2:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165964.2:c.233_242delinsGT' + assert results['NM_001165964.2:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001165964.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} + assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001165964.2:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165964.2:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1'} assert 'NM_001165964.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001165964.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001165964.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001165964.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} assert results['NM_001165964.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001165964.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001165964.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001165964.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165964.1:c.233_242delinsGT' + assert results['NM_001165964.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001165964.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001165964.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1'} + assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} + assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165964.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001165964.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165964.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1'} - assert 'NM_001202435.2:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001202435.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001202435.2:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001202435.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} - assert results['NM_001202435.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001202435.2:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' - assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001202435.2:c.233_242delinsGT' - assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001202435.2:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2'} + assert 'NM_006920.4:c.233_242delinsGT' in list(results.keys()) + assert results['NM_006920.4:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_006920.4:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_006920.4:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_006920.4:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_006920.4:c.233_242delinsGT' + assert results['NM_006920.4:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_006920.4:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006920.4:c.233_242delinsGT']['hgvs_refseqgene_variant'] == 'NG_011906.1:g.5251_5260delinsGT' + assert results['NM_006920.4:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} + assert results['NM_006920.4:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == 'LRG_8t1:c.233_242delinsGT' + assert results['NM_006920.4:c.233_242delinsGT']['hgvs_lrg_variant'] == 'LRG_8:g.5251_5260delinsGT' + self.assertCountEqual(results['NM_006920.4:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_006920.4:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011906.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_8.xml'} - assert 'NM_006920.5:c.233_242delinsGT' in list(results.keys()) - assert results['NM_006920.5:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_006920.5:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_006920.5:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_006920.5:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_006920.5:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} - assert results['NM_006920.5:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_006920.5:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_006920.5:c.233_242delinsGT']['hgvs_lrg_variant'] == '' - assert results['NM_006920.5:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_006920.5:c.233_242delinsGT' - assert results['NM_006920.5:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_006920.5:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5'} + assert 'NM_001353950.1:c.233_242delinsGT' in list(results.keys()) + assert results['NM_001353950.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353950.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353950.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353950.1:c.233_242delinsGT' + assert results['NM_001353950.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353950.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340879.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340879.1:p.(E78Gfs*7)'} + assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353950.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353950.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1'} + + assert 'NM_001202435.1:c.233_242delinsGT' in list(results.keys()) + assert results['NM_001202435.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001202435.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001202435.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001202435.1:c.233_242delinsGT' + assert results['NM_001202435.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001202435.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} + assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001202435.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001202435.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001202435.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1'} assert 'NM_001165963.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001165963.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001165963.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001165963.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} assert results['NM_001165963.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001165963.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001165963.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001165963.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165963.1:c.233_242delinsGT' + assert results['NM_001165963.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001165963.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001165963.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1'} - - assert 'NM_001353955.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353955.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353955.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001353955.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340884.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340884.1:p.(E78Gfs*7)'} - assert results['NM_001353955.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001353955.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' - assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353955.1:c.233_242delinsGT' - assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353955.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1'} - - assert 'NM_001353961.1:c.-2193_-2184delinsGT' in list(results.keys()) - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353961.1:c.-2193_-2184delinsGT']['alt_genomic_loci'], []) - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340890.1:p.?', 'slr': 'NP_001340890.1:p.?'} - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_lrg_variant'] == '' - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_transcript_variant'] == 'NM_001353961.1:c.-2193_-2184delinsGT' - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353961.1:c.-2193_-2184delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1'} - - assert 'NM_001165963.2:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001165963.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001165963.2:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001165963.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} - assert results['NM_001165963.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001165963.2:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' - assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165963.2:c.233_242delinsGT' - assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001165963.2:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2'} - - assert 'NM_001353950.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353950.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353950.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001353950.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340879.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340879.1:p.(E78Gfs*7)'} - assert results['NM_001353950.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001353950.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' - assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353950.1:c.233_242delinsGT' - assert results['NM_001353950.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353950.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353950.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1'} + assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} + assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165963.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001165963.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165963.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1'} - assert results['flag'] == 'gene_variant' - assert 'NM_001353948.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353948.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353948.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001353948.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340877.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340877.1:p.(E78Gfs*7)'} - assert results['NM_001353948.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001353948.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' - assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353948.1:c.233_242delinsGT' - assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353948.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1'} + assert 'NM_001202435.2:c.233_242delinsGT' in list(results.keys()) + assert results['NM_001202435.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001202435.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001202435.2:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001202435.2:c.233_242delinsGT' + assert results['NM_001202435.2:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001202435.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} + assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001202435.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001202435.2:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001202435.2:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1'} assert 'NM_001353949.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353949.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353949.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001353949.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340878.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340878.1:p.(E78Gfs*7)'} assert results['NM_001353949.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001353949.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353949.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353949.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353949.1:c.233_242delinsGT' + assert results['NM_001353949.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353949.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353949.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1'} + assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340878.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340878.1:p.(E78Gfs*7)'} + assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353949.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353949.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353949.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1'} assert 'NM_001353957.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353957.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353957.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001353957.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340886.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340886.1:p.(E78Gfs*7)'} assert results['NM_001353957.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001353957.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353957.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353957.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353957.1:c.233_242delinsGT' + assert results['NM_001353957.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353957.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353957.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1'} - - assert 'NM_001353952.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353952.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353952.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001353952.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340881.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340881.1:p.(E78Gfs*7)'} - assert results['NM_001353952.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001353952.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' - assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353952.1:c.233_242delinsGT' - assert results['NM_001353952.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353952.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353952.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1'} + assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340886.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340886.1:p.(E78Gfs*7)'} + assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353957.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353957.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353957.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1'} assert 'NM_001353954.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353954.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353954.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001353954.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340883.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340883.1:p.(E78Gfs*7)'} assert results['NM_001353954.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001353954.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353954.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353954.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353954.1:c.233_242delinsGT' + assert results['NM_001353954.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353954.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353954.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1'} + assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340883.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340883.1:p.(E78Gfs*7)'} + assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353954.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353954.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353954.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1'} - assert 'NM_006920.4:c.233_242delinsGT' in list(results.keys()) - assert results['NM_006920.4:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == 'LRG_8t1:c.233_242delinsGT' - assert results['NM_006920.4:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_006920.4:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_006920.4:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_006920.4:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} - assert results['NM_006920.4:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_006920.4:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_006920.4:c.233_242delinsGT']['hgvs_lrg_variant'] == 'LRG_8:g.5251_5260delinsGT' - assert results['NM_006920.4:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_006920.4:c.233_242delinsGT' - assert results['NM_006920.4:c.233_242delinsGT']['hgvs_refseqgene_variant'] == 'NG_011906.1:g.5251_5260delinsGT' - assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_006920.4:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_006920.4:c.233_242delinsGT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011906.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_8.xml'} + assert 'NM_001165963.2:c.233_242delinsGT' in list(results.keys()) + assert results['NM_001165963.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001165963.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001165963.2:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165963.2:c.233_242delinsGT' + assert results['NM_001165963.2:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001165963.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} + assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165963.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001165963.2:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001165963.2:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1'} + + assert 'NM_001353961.1:c.-2193_-2184delinsGT' in list(results.keys()) + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_transcript_variant'] == 'NM_001353961.1:c.-2193_-2184delinsGT' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340890.1:p.?', 'slr': 'NP_001340890.1:p.?'} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353961.1:c.-2193_-2184delinsGT']['alt_genomic_loci'], []) + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353961.1:c.-2193_-2184delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1'} + + assert 'NM_001353948.1:c.233_242delinsGT' in list(results.keys()) + assert results['NM_001353948.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353948.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353948.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353948.1:c.233_242delinsGT' + assert results['NM_001353948.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353948.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340877.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340877.1:p.(E78Gfs*7)'} + assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353948.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353948.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353948.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1'} + + assert 'NR_148667.1:n.638_647delinsGT' in list(results.keys()) + assert results['NR_148667.1:n.638_647delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NR_148667.1:n.638_647delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NR_148667.1:n.638_647delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NR_148667.1:n.638_647delinsGT']['hgvs_transcript_variant'] == 'NR_148667.1:n.638_647delinsGT' + assert results['NR_148667.1:n.638_647delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NR_148667.1:n.638_647delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_148667.1:n.638_647delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NR_148667.1:n.638_647delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_148667.1:n.638_647delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_148667.1:n.638_647delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_148667.1:n.638_647delinsGT']['alt_genomic_loci'], []) + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NR_148667.1:n.638_647delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1'} + + assert 'NM_001353955.1:c.233_242delinsGT' in list(results.keys()) + assert results['NM_001353955.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_001353955.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353955.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353955.1:c.233_242delinsGT' + assert results['NM_001353955.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353955.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340884.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340884.1:p.(E78Gfs*7)'} + assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353955.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353955.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353955.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1'} + + assert 'NM_006920.5:c.233_242delinsGT' in list(results.keys()) + assert results['NM_006920.5:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' + assert results['NM_006920.5:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_006920.5:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_006920.5:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_006920.5:c.233_242delinsGT' + assert results['NM_006920.5:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_006920.5:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006920.5:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' + assert results['NM_006920.5:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} + assert results['NM_006920.5:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006920.5:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_006920.5:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_006920.5:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3'} assert 'NM_001353960.1:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353960.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353960.1:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001353960.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340889.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340889.1:p.(E78Gfs*7)'} assert results['NM_001353960.1:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001353960.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + assert results['NM_001353960.1:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' + assert results['NM_001353960.1:c.233_242delinsGT']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001353960.1:c.233_242delinsGT' + assert results['NM_001353960.1:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' + assert results['NM_001353960.1:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001353960.1:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1'} - - assert 'NM_001165964.2:c.233_242delinsGT' in list(results.keys()) - assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001165964.2:c.233_242delinsGT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001165964.2:c.233_242delinsGT']['alt_genomic_loci'], []) - assert results['NM_001165964.2:c.233_242delinsGT']['gene_symbol'] == 'SCN1A' - assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} - assert results['NM_001165964.2:c.233_242delinsGT']['submitted_variant'] == '2-166929889-GTCCAGGTCCT-GAC' - assert results['NM_001165964.2:c.233_242delinsGT']['genome_context_intronic_sequence'] == '' - assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_lrg_variant'] == '' - assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_transcript_variant'] == 'NM_001165964.2:c.233_242delinsGT' - assert results['NM_001165964.2:c.233_242delinsGT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166929890', 'alt': 'AC'}} - assert results['NM_001165964.2:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'ref': 'TCCAGGTCCT', 'pos': '166073380', 'alt': 'AC'}} - assert results['NM_001165964.2:c.233_242delinsGT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2'} - + assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340889.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340889.1:p.(E78Gfs*7)'} + assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353960.1:c.233_242delinsGT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353960.1:c.233_242delinsGT']['alt_genomic_loci'], []) + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': 'chr2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929890_166929899delinsAC', 'vcf': {'chr': '2', 'pos': '166929890', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073380_166073389delinsAC', 'vcf': {'chr': '2', 'pos': '166073380', 'ref': 'TCCAGGTCCT', 'alt': 'AC'}} + assert results['NM_001353960.1:c.233_242delinsGT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1'} def test_variant273(self): variant = '2-166929891-CCAGGTCCT-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NR_148667.1:n.638_645del' in list(results.keys()) - assert results['NR_148667.1:n.638_645del']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_148667.1:n.638_645del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_148667.1:n.638_645del']['alt_genomic_loci'], []) - assert results['NR_148667.1:n.638_645del']['gene_symbol'] == 'SCN1A' - assert results['NR_148667.1:n.638_645del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_148667.1:n.638_645del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NR_148667.1:n.638_645del']['genome_context_intronic_sequence'] == '' - assert results['NR_148667.1:n.638_645del']['hgvs_lrg_variant'] == '' - assert results['NR_148667.1:n.638_645del']['hgvs_transcript_variant'] == 'NR_148667.1:n.638_645del' - assert results['NR_148667.1:n.638_645del']['hgvs_refseqgene_variant'] == '' - assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NR_148667.1:n.638_645del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1'} - - assert 'NM_001165964.2:c.233_240del' in list(results.keys()) - assert results['NM_001165964.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001165964.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001165964.2:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001165964.2:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001165964.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} - assert results['NM_001165964.2:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001165964.2:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001165964.2:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_001165964.2:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165964.2:c.233_240del' - assert results['NM_001165964.2:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165964.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2'} - + assert results['flag'] == 'gene_variant' assert 'NM_001353951.1:c.233_240del' in list(results.keys()) - assert results['NM_001353951.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353951.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353951.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001353951.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001353951.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340880.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340880.1:p.(E78Gfs*7)'} assert results['NM_001353951.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001353951.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001353951.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353951.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353951.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001353951.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353951.1:c.233_240del' + assert results['NM_001353951.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353951.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353951.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353951.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1'} + assert results['NM_001353951.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340880.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340880.1:p.(E78Gfs*7)'} + assert results['NM_001353951.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353951.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353951.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353951.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353951.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340880.1'} - assert 'NM_001353954.1:c.233_240del' in list(results.keys()) - assert results['NM_001353954.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353954.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353954.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001353954.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001353954.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340883.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340883.1:p.(E78Gfs*7)'} - assert results['NM_001353954.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001353954.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001353954.1:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_001353954.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353954.1:c.233_240del' - assert results['NM_001353954.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353954.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1'} + assert 'NM_001353952.1:c.233_240del' in list(results.keys()) + assert results['NM_001353952.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353952.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353952.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353952.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353952.1:c.233_240del' + assert results['NM_001353952.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353952.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353952.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353952.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340881.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340881.1:p.(E78Gfs*7)'} + assert results['NM_001353952.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353952.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353952.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353952.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1'} + + assert 'NM_001353958.1:c.233_240del' in list(results.keys()) + assert results['NM_001353958.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353958.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353958.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353958.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353958.1:c.233_240del' + assert results['NM_001353958.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353958.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353958.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353958.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340887.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340887.1:p.(E78Gfs*7)'} + assert results['NM_001353958.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353958.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353958.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353958.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1'} - assert 'NM_001353961.1:c.-2193_-2186del' in list(results.keys()) - assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353961.1:c.-2193_-2186del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353961.1:c.-2193_-2186del']['alt_genomic_loci'], []) - assert results['NM_001353961.1:c.-2193_-2186del']['gene_symbol'] == 'SCN1A' - assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340890.1:p.?', 'slr': 'NP_001340890.1:p.?'} - assert results['NM_001353961.1:c.-2193_-2186del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001353961.1:c.-2193_-2186del']['genome_context_intronic_sequence'] == '' - assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_lrg_variant'] == '' - assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_transcript_variant'] == 'NM_001353961.1:c.-2193_-2186del' - assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353961.1:c.-2193_-2186del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1'} + assert 'NM_001165964.2:c.233_240del' in list(results.keys()) + assert results['NM_001165964.2:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001165964.2:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001165964.2:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001165964.2:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165964.2:c.233_240del' + assert results['NM_001165964.2:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001165964.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165964.2:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165964.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} + assert results['NM_001165964.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165964.2:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001165964.2:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165964.2:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1'} - assert 'NM_001353948.1:c.233_240del' in list(results.keys()) - assert results['NM_001353948.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353948.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353948.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001353948.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001353948.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340877.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340877.1:p.(E78Gfs*7)'} - assert results['NM_001353948.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001353948.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001353948.1:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_001353948.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353948.1:c.233_240del' - assert results['NM_001353948.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353948.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1'} + assert 'NM_001165964.1:c.233_240del' in list(results.keys()) + assert results['NM_001165964.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001165964.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001165964.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001165964.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165964.1:c.233_240del' + assert results['NM_001165964.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001165964.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165964.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165964.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} + assert results['NM_001165964.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165964.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001165964.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165964.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1'} - assert 'NM_001353960.1:c.233_240del' in list(results.keys()) - assert results['NM_001353960.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353960.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353960.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001353960.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001353960.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340889.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340889.1:p.(E78Gfs*7)'} - assert results['NM_001353960.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001353960.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001353960.1:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_001353960.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353960.1:c.233_240del' - assert results['NM_001353960.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353960.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1'} + assert 'NM_006920.4:c.233_240del' in list(results.keys()) + assert results['NM_006920.4:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_006920.4:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_006920.4:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_006920.4:c.233_240del']['hgvs_transcript_variant'] == 'NM_006920.4:c.233_240del' + assert results['NM_006920.4:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_006920.4:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006920.4:c.233_240del']['hgvs_refseqgene_variant'] == 'NG_011906.1:g.5251_5258del' + assert results['NM_006920.4:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} + assert results['NM_006920.4:c.233_240del']['hgvs_lrg_transcript_variant'] == 'LRG_8t1:c.233_240del' + assert results['NM_006920.4:c.233_240del']['hgvs_lrg_variant'] == 'LRG_8:g.5251_5258del' + self.assertCountEqual(results['NM_006920.4:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_006920.4:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011906.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_8.xml'} + + assert 'NM_001353950.1:c.233_240del' in list(results.keys()) + assert results['NM_001353950.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353950.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353950.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353950.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353950.1:c.233_240del' + assert results['NM_001353950.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353950.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353950.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353950.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340879.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340879.1:p.(E78Gfs*7)'} + assert results['NM_001353950.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353950.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353950.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353950.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1'} assert 'NM_001202435.1:c.233_240del' in list(results.keys()) - assert results['NM_001202435.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001202435.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001202435.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001202435.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001202435.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} assert results['NM_001202435.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001202435.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001202435.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001202435.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001202435.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001202435.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001202435.1:c.233_240del' + assert results['NM_001202435.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001202435.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001202435.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001202435.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1'} + assert results['NM_001202435.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} + assert results['NM_001202435.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001202435.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001202435.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001202435.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1'} + + assert 'NM_001165963.1:c.233_240del' in list(results.keys()) + assert results['NM_001165963.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001165963.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001165963.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001165963.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165963.1:c.233_240del' + assert results['NM_001165963.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001165963.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001165963.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001165963.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} + assert results['NM_001165963.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165963.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001165963.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165963.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1'} assert 'NM_001202435.2:c.233_240del' in list(results.keys()) - assert results['NM_001202435.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001202435.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001202435.2:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001202435.2:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001202435.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} assert results['NM_001202435.2:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001202435.2:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001202435.2:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001202435.2:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001202435.2:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001202435.2:c.233_240del']['hgvs_transcript_variant'] == 'NM_001202435.2:c.233_240del' + assert results['NM_001202435.2:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001202435.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001202435.2:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001202435.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2'} - - assert 'NM_006920.5:c.233_240del' in list(results.keys()) - assert results['NM_006920.5:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_006920.5:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_006920.5:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_006920.5:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_006920.5:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} - assert results['NM_006920.5:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_006920.5:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_006920.5:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_006920.5:c.233_240del']['hgvs_transcript_variant'] == 'NM_006920.5:c.233_240del' - assert results['NM_006920.5:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_006920.5:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5'} - - assert 'NM_001353955.1:c.233_240del' in list(results.keys()) - assert results['NM_001353955.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353955.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353955.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001353955.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001353955.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340884.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340884.1:p.(E78Gfs*7)'} - assert results['NM_001353955.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001353955.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001353955.1:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_001353955.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353955.1:c.233_240del' - assert results['NM_001353955.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353955.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1'} + assert results['NM_001202435.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001189364.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001189364.1:p.(E78Gfs*7)'} + assert results['NM_001202435.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001202435.2:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001202435.2:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001202435.2:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001202435.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001189364.1'} - assert 'NM_001353952.1:c.233_240del' in list(results.keys()) - assert results['NM_001353952.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353952.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353952.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001353952.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001353952.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340881.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340881.1:p.(E78Gfs*7)'} - assert results['NM_001353952.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001353952.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001353952.1:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_001353952.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353952.1:c.233_240del' - assert results['NM_001353952.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353952.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353952.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340881.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353952.1'} + assert 'NM_001353949.1:c.233_240del' in list(results.keys()) + assert results['NM_001353949.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353949.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353949.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353949.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353949.1:c.233_240del' + assert results['NM_001353949.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353949.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353949.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353949.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340878.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340878.1:p.(E78Gfs*7)'} + assert results['NM_001353949.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353949.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353949.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353949.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1'} assert 'NM_001353957.1:c.233_240del' in list(results.keys()) - assert results['NM_001353957.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353957.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353957.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001353957.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001353957.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340886.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340886.1:p.(E78Gfs*7)'} assert results['NM_001353957.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001353957.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001353957.1:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001353957.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353957.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001353957.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353957.1:c.233_240del' + assert results['NM_001353957.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353957.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001353957.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353957.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1'} - - assert results['flag'] == 'gene_variant' - assert 'NM_006920.4:c.233_240del' in list(results.keys()) - assert results['NM_006920.4:c.233_240del']['hgvs_lrg_transcript_variant'] == 'LRG_8t1:c.233_240del' - assert results['NM_006920.4:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_006920.4:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_006920.4:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_006920.4:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} - assert results['NM_006920.4:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_006920.4:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_006920.4:c.233_240del']['hgvs_lrg_variant'] == 'LRG_8:g.5251_5258del' - assert results['NM_006920.4:c.233_240del']['hgvs_transcript_variant'] == 'NM_006920.4:c.233_240del' - assert results['NM_006920.4:c.233_240del']['hgvs_refseqgene_variant'] == 'NG_011906.1:g.5251_5258del' - assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_006920.4:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_006920.4:c.233_240del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011906.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_8.xml'} + assert results['NM_001353957.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340886.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340886.1:p.(E78Gfs*7)'} + assert results['NM_001353957.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353957.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353957.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353957.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353957.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340886.1'} - assert 'NM_001353950.1:c.233_240del' in list(results.keys()) - assert results['NM_001353950.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353950.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353950.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001353950.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001353950.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340879.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340879.1:p.(E78Gfs*7)'} - assert results['NM_001353950.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001353950.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001353950.1:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_001353950.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353950.1:c.233_240del' - assert results['NM_001353950.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353950.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353950.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340879.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353950.1'} + assert 'NM_001353954.1:c.233_240del' in list(results.keys()) + assert results['NM_001353954.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353954.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353954.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353954.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353954.1:c.233_240del' + assert results['NM_001353954.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353954.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353954.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353954.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340883.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340883.1:p.(E78Gfs*7)'} + assert results['NM_001353954.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353954.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353954.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353954.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353954.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340883.1'} assert 'NM_001165963.2:c.233_240del' in list(results.keys()) - assert results['NM_001165963.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001165963.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001165963.2:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001165963.2:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001165963.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} assert results['NM_001165963.2:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001165963.2:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001165963.2:c.233_240del']['hgvs_lrg_variant'] == '' + assert results['NM_001165963.2:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001165963.2:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} assert results['NM_001165963.2:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165963.2:c.233_240del' + assert results['NM_001165963.2:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001165963.2:c.233_240del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001165963.2:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165963.2:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2'} + assert results['NM_001165963.2:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} + assert results['NM_001165963.2:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001165963.2:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001165963.2:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001165963.2:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1'} - assert 'NM_001165963.1:c.233_240del' in list(results.keys()) - assert results['NM_001165963.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001165963.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001165963.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001165963.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001165963.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159435.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159435.1:p.(E78Gfs*7)'} - assert results['NM_001165963.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001165963.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001165963.1:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_001165963.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165963.1:c.233_240del' - assert results['NM_001165963.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165963.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165963.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165963.1'} + assert 'NM_001353961.1:c.-2193_-2186del' in list(results.keys()) + assert results['NM_001353961.1:c.-2193_-2186del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353961.1:c.-2193_-2186del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353961.1:c.-2193_-2186del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_transcript_variant'] == 'NM_001353961.1:c.-2193_-2186del' + assert results['NM_001353961.1:c.-2193_-2186del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353961.1:c.-2193_-2186del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340890.1:p.?', 'slr': 'NP_001340890.1:p.?'} + assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353961.1:c.-2193_-2186del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353961.1:c.-2193_-2186del']['alt_genomic_loci'], []) + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353961.1:c.-2193_-2186del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353961.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340890.1'} - assert 'NM_001165964.1:c.233_240del' in list(results.keys()) - assert results['NM_001165964.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001165964.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001165964.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001165964.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001165964.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001159436.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001159436.1:p.(E78Gfs*7)'} - assert results['NM_001165964.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001165964.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001165964.1:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_001165964.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001165964.1:c.233_240del' - assert results['NM_001165964.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001165964.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001165964.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001159436.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001165964.1'} + assert 'NM_001353948.1:c.233_240del' in list(results.keys()) + assert results['NM_001353948.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353948.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353948.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353948.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353948.1:c.233_240del' + assert results['NM_001353948.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353948.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353948.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353948.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340877.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340877.1:p.(E78Gfs*7)'} + assert results['NM_001353948.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353948.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353948.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353948.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353948.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340877.1'} - assert 'NM_001353958.1:c.233_240del' in list(results.keys()) - assert results['NM_001353958.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353958.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353958.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001353958.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001353958.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340887.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340887.1:p.(E78Gfs*7)'} - assert results['NM_001353958.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001353958.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001353958.1:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_001353958.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353958.1:c.233_240del' - assert results['NM_001353958.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353958.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353958.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340887.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353958.1'} + assert 'NR_148667.1:n.638_645del' in list(results.keys()) + assert results['NR_148667.1:n.638_645del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NR_148667.1:n.638_645del']['gene_symbol'] == 'SCN1A' + assert results['NR_148667.1:n.638_645del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NR_148667.1:n.638_645del']['hgvs_transcript_variant'] == 'NR_148667.1:n.638_645del' + assert results['NR_148667.1:n.638_645del']['genome_context_intronic_sequence'] == '' + assert results['NR_148667.1:n.638_645del']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_148667.1:n.638_645del']['hgvs_refseqgene_variant'] == '' + assert results['NR_148667.1:n.638_645del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_148667.1:n.638_645del']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_148667.1:n.638_645del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_148667.1:n.638_645del']['alt_genomic_loci'], []) + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NR_148667.1:n.638_645del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148667.1'} - assert 'NM_001353949.1:c.233_240del' in list(results.keys()) - assert results['NM_001353949.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001353949.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001353949.1:c.233_240del']['alt_genomic_loci'], []) - assert results['NM_001353949.1:c.233_240del']['gene_symbol'] == 'SCN1A' - assert results['NM_001353949.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340878.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340878.1:p.(E78Gfs*7)'} - assert results['NM_001353949.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' - assert results['NM_001353949.1:c.233_240del']['genome_context_intronic_sequence'] == '' - assert results['NM_001353949.1:c.233_240del']['hgvs_lrg_variant'] == '' - assert results['NM_001353949.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353949.1:c.233_240del' - assert results['NM_001353949.1:c.233_240del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166929891', 'alt': 'C'}} - assert results['NM_001353949.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'ref': 'CCAGGTCCT', 'pos': '166073381', 'alt': 'C'}} - assert results['NM_001353949.1:c.233_240del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340878.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353949.1'} + assert 'NM_001353955.1:c.233_240del' in list(results.keys()) + assert results['NM_001353955.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353955.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353955.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353955.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353955.1:c.233_240del' + assert results['NM_001353955.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353955.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353955.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353955.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340884.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340884.1:p.(E78Gfs*7)'} + assert results['NM_001353955.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353955.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353955.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353955.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353955.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340884.1'} + + assert 'NM_006920.5:c.233_240del' in list(results.keys()) + assert results['NM_006920.5:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_006920.5:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_006920.5:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_006920.5:c.233_240del']['hgvs_transcript_variant'] == 'NM_006920.5:c.233_240del' + assert results['NM_006920.5:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_006920.5:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006920.5:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_006920.5:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_008851.3(LRG_8p1):p.(Glu78GlyfsTer7)', 'slr': 'NP_008851.3:p.(E78Gfs*7)'} + assert results['NM_006920.5:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006920.5:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_006920.5:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_006920.5:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006920.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_008851.3'} + assert 'NM_001353960.1:c.233_240del' in list(results.keys()) + assert results['NM_001353960.1:c.233_240del']['submitted_variant'] == '2-166929891-CCAGGTCCT-C' + assert results['NM_001353960.1:c.233_240del']['gene_symbol'] == 'SCN1A' + assert results['NM_001353960.1:c.233_240del']['gene_ids'] == {'hgnc_id': 'HGNC:10585', 'entrez_gene_id': '6323', 'ucsc_id': 'uc061peu.1', 'omim_id': ['182389']} + assert results['NM_001353960.1:c.233_240del']['hgvs_transcript_variant'] == 'NM_001353960.1:c.233_240del' + assert results['NM_001353960.1:c.233_240del']['genome_context_intronic_sequence'] == '' + assert results['NM_001353960.1:c.233_240del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001353960.1:c.233_240del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001353960.1:c.233_240del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001340889.1:p.(Glu78GlyfsTer7)', 'slr': 'NP_001340889.1:p.(E78Gfs*7)'} + assert results['NM_001353960.1:c.233_240del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001353960.1:c.233_240del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001353960.1:c.233_240del']['alt_genomic_loci'], []) + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': 'chr2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': 'chr2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.166929893_166929900del', 'vcf': {'chr': '2', 'pos': '166929891', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.166073383_166073390del', 'vcf': {'chr': '2', 'pos': '166073381', 'ref': 'CCAGGTCCT', 'alt': 'C'}} + assert results['NM_001353960.1:c.233_240del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001353960.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001340889.1'} def test_variant274(self): variant = '2-179393504-G-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001256850.1:c.102051C>A' in list(results.keys()) - assert results['NM_001256850.1:c.102051C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001256850.1:c.102051C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001256850.1:c.102051C>A']['alt_genomic_loci'], []) - assert results['NM_001256850.1:c.102051C>A']['gene_symbol'] == 'TTN' - assert results['NM_001256850.1:c.102051C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243779.1:p.(Ser34017Arg)', 'slr': 'NP_001243779.1:p.(S34017R)'} - assert results['NM_001256850.1:c.102051C>A']['submitted_variant'] == '2-179393504-G-T' - assert results['NM_001256850.1:c.102051C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001256850.1:c.102051C>A']['hgvs_lrg_variant'] == '' - assert results['NM_001256850.1:c.102051C>A']['hgvs_transcript_variant'] == 'NM_001256850.1:c.102051C>A' - assert results['NM_001256850.1:c.102051C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_001256850.1:c.102051C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243779.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256850.1'} - - assert 'NR_038271.1:n.446+5141G>T' in list(results.keys()) - assert results['NR_038271.1:n.446+5141G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_038271.1:n.446+5141G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_038271.1:n.446+5141G>T']['alt_genomic_loci'], []) - assert results['NR_038271.1:n.446+5141G>T']['gene_symbol'] == 'TTN-AS1' - assert results['NR_038271.1:n.446+5141G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_038271.1:n.446+5141G>T']['submitted_variant'] == '2-179393504-G-T' - assert results['NR_038271.1:n.446+5141G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NR_038271.1):c.446+5141G>T' - assert results['NR_038271.1:n.446+5141G>T']['hgvs_lrg_variant'] == '' - assert results['NR_038271.1:n.446+5141G>T']['hgvs_transcript_variant'] == 'NR_038271.1:n.446+5141G>T' - assert results['NR_038271.1:n.446+5141G>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NR_038271.1:n.446+5141G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_038271.1'} - - assert 'NM_133432.3:c.80154C>A' in list(results.keys()) - assert results['NM_133432.3:c.80154C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_133432.3:c.80154C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_133432.3:c.80154C>A']['alt_genomic_loci'], []) - assert results['NM_133432.3:c.80154C>A']['gene_symbol'] == 'TTN' - assert results['NM_133432.3:c.80154C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597676.3:p.(Ser26718Arg)', 'slr': 'NP_597676.3:p.(S26718R)'} - assert results['NM_133432.3:c.80154C>A']['submitted_variant'] == '2-179393504-G-T' - assert results['NM_133432.3:c.80154C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_133432.3:c.80154C>A']['hgvs_lrg_variant'] == '' - assert results['NM_133432.3:c.80154C>A']['hgvs_transcript_variant'] == 'NM_133432.3:c.80154C>A' - assert results['NM_133432.3:c.80154C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_133432.3:c.80154C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597676.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133432.3'} - - assert 'NM_001267550.1:c.106974C>A' in list(results.keys()) - assert results['NM_001267550.1:c.106974C>A']['hgvs_lrg_transcript_variant'] == 'LRG_391t1:c.106974C>A' - assert results['NM_001267550.1:c.106974C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001267550.1:c.106974C>A']['alt_genomic_loci'], []) - assert results['NM_001267550.1:c.106974C>A']['gene_symbol'] == 'TTN' - assert results['NM_001267550.1:c.106974C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001254479.1(LRG_391p1):p.(Ser35658Arg)', 'slr': 'NP_001254479.1:p.(S35658R)'} - assert results['NM_001267550.1:c.106974C>A']['submitted_variant'] == '2-179393504-G-T' - assert results['NM_001267550.1:c.106974C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001267550.1:c.106974C>A']['hgvs_lrg_variant'] == 'LRG_391:g.307026C>A' - assert results['NM_001267550.1:c.106974C>A']['hgvs_transcript_variant'] == 'NM_001267550.1:c.106974C>A' - assert results['NM_001267550.1:c.106974C>A']['hgvs_refseqgene_variant'] == 'NG_011618.3:g.307026C>A' - assert results['NM_001267550.1:c.106974C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert 'hg38' not in list(results['NM_001267550.1:c.106974C>A']['primary_assembly_loci'].keys()) - assert results['NM_001267550.1:c.106974C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert 'grch38' not in list(results['NM_001267550.1:c.106974C>A']['primary_assembly_loci'].keys()) - assert results['NM_001267550.1:c.106974C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011618.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_391.xml'} - + assert results['flag'] == 'gene_variant' assert 'NR_038272.1:n.219+5141G>T' in list(results.keys()) - assert results['NR_038272.1:n.219+5141G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_038272.1:n.219+5141G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_038272.1:n.219+5141G>T']['alt_genomic_loci'], []) - assert results['NR_038272.1:n.219+5141G>T']['gene_symbol'] == 'TTN-AS1' - assert results['NR_038272.1:n.219+5141G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_038272.1:n.219+5141G>T']['submitted_variant'] == '2-179393504-G-T' - assert results['NR_038272.1:n.219+5141G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NR_038272.1):c.219+5141G>T' - assert results['NR_038272.1:n.219+5141G>T']['hgvs_lrg_variant'] == '' + assert results['NR_038272.1:n.219+5141G>T']['gene_symbol'] == 'TTN-AS1' + assert results['NR_038272.1:n.219+5141G>T']['gene_ids'] == {'hgnc_id': 'HGNC:44124', 'entrez_gene_id': '100506866', 'ucsc_id': 'uc061qcw.1', 'omim_id': []} assert results['NR_038272.1:n.219+5141G>T']['hgvs_transcript_variant'] == 'NR_038272.1:n.219+5141G>T' + assert results['NR_038272.1:n.219+5141G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NR_038272.1):c.219+5141G>T' + assert results['NR_038272.1:n.219+5141G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_038272.1:n.219+5141G>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} + assert results['NR_038272.1:n.219+5141G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_038272.1:n.219+5141G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_038272.1:n.219+5141G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_038272.1:n.219+5141G>T']['alt_genomic_loci'], []) + assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NR_038272.1:n.219+5141G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} assert results['NR_038272.1:n.219+5141G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_038272.1'} + assert 'NM_133437.3:c.80355C>A' in list(results.keys()) + assert results['NM_133437.3:c.80355C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_133437.3:c.80355C>A']['gene_symbol'] == 'TTN' + assert results['NM_133437.3:c.80355C>A']['gene_ids'] == {'hgnc_id': 'HGNC:12403', 'entrez_gene_id': '7273', 'ucsc_id': 'uc031rqc.3', 'omim_id': ['188840']} + assert results['NM_133437.3:c.80355C>A']['hgvs_transcript_variant'] == 'NM_133437.3:c.80355C>A' + assert results['NM_133437.3:c.80355C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_133437.3:c.80355C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_133437.3:c.80355C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_133437.3:c.80355C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597681.3:p.(Ser26785Arg)', 'slr': 'NP_597681.3:p.(S26785R)'} + assert results['NM_133437.3:c.80355C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133437.3:c.80355C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_133437.3:c.80355C>A']['alt_genomic_loci'], []) + assert results['NM_133437.3:c.80355C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_133437.3:c.80355C>A']['primary_assembly_loci'].keys()) + assert results['NM_133437.3:c.80355C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_133437.3:c.80355C>A']['primary_assembly_loci'].keys()) + assert results['NM_133437.3:c.80355C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.3'} + assert 'NM_133437.4:c.80355C>A' in list(results.keys()) - assert results['NM_133437.4:c.80355C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_133437.4:c.80355C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_133437.4:c.80355C>A']['alt_genomic_loci'], []) - assert results['NM_133437.4:c.80355C>A']['gene_symbol'] == 'TTN' - assert results['NM_133437.4:c.80355C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597681.4:p.(Ser26785Arg)', 'slr': 'NP_597681.4:p.(S26785R)'} assert results['NM_133437.4:c.80355C>A']['submitted_variant'] == '2-179393504-G-T' - assert results['NM_133437.4:c.80355C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_133437.4:c.80355C>A']['hgvs_lrg_variant'] == '' + assert results['NM_133437.4:c.80355C>A']['gene_symbol'] == 'TTN' + assert results['NM_133437.4:c.80355C>A']['gene_ids'] == {'hgnc_id': 'HGNC:12403', 'entrez_gene_id': '7273', 'ucsc_id': 'uc031rqc.3', 'omim_id': ['188840']} assert results['NM_133437.4:c.80355C>A']['hgvs_transcript_variant'] == 'NM_133437.4:c.80355C>A' + assert results['NM_133437.4:c.80355C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_133437.4:c.80355C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_133437.4:c.80355C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_133437.4:c.80355C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.4'} + assert results['NM_133437.4:c.80355C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597681.4:p.(Ser26785Arg)', 'slr': 'NP_597681.4:p.(S26785R)'} + assert results['NM_133437.4:c.80355C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133437.4:c.80355C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_133437.4:c.80355C>A']['alt_genomic_loci'], []) + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133437.4:c.80355C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133437.4:c.80355C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.4'} - assert results['flag'] == 'gene_variant' - assert 'NM_133378.4:c.99270C>A' in list(results.keys()) - assert results['NM_133378.4:c.99270C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_133378.4:c.99270C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_133378.4:c.99270C>A']['alt_genomic_loci'], []) - assert results['NM_133378.4:c.99270C>A']['gene_symbol'] == 'TTN' - assert results['NM_133378.4:c.99270C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_596869.4:p.(Ser33090Arg)', 'slr': 'NP_596869.4:p.(S33090R)'} - assert results['NM_133378.4:c.99270C>A']['submitted_variant'] == '2-179393504-G-T' - assert results['NM_133378.4:c.99270C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_133378.4:c.99270C>A']['hgvs_lrg_variant'] == '' - assert results['NM_133378.4:c.99270C>A']['hgvs_transcript_variant'] == 'NM_133378.4:c.99270C>A' - assert results['NM_133378.4:c.99270C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_133378.4:c.99270C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_596869.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133378.4'} + assert 'NM_001256850.1:c.102051C>A' in list(results.keys()) + assert results['NM_001256850.1:c.102051C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_001256850.1:c.102051C>A']['gene_symbol'] == 'TTN' + assert results['NM_001256850.1:c.102051C>A']['gene_ids'] == {'hgnc_id': 'HGNC:12403', 'entrez_gene_id': '7273', 'ucsc_id': 'uc031rqc.3', 'omim_id': ['188840']} + assert results['NM_001256850.1:c.102051C>A']['hgvs_transcript_variant'] == 'NM_001256850.1:c.102051C>A' + assert results['NM_001256850.1:c.102051C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001256850.1:c.102051C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001256850.1:c.102051C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001256850.1:c.102051C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243779.1:p.(Ser34017Arg)', 'slr': 'NP_001243779.1:p.(S34017R)'} + assert results['NM_001256850.1:c.102051C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001256850.1:c.102051C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001256850.1:c.102051C>A']['alt_genomic_loci'], []) + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001256850.1:c.102051C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001256850.1:c.102051C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001256850.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243779.1'} assert 'NM_001267550.2:c.106974C>A' in list(results.keys()) - assert results['NM_001267550.2:c.106974C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001267550.2:c.106974C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001267550.2:c.106974C>A']['alt_genomic_loci'], []) - assert results['NM_001267550.2:c.106974C>A']['gene_symbol'] == 'TTN' - assert results['NM_001267550.2:c.106974C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001254479.2:p.(Ser35658Arg)', 'slr': 'NP_001254479.2:p.(S35658R)'} assert results['NM_001267550.2:c.106974C>A']['submitted_variant'] == '2-179393504-G-T' - assert results['NM_001267550.2:c.106974C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001267550.2:c.106974C>A']['hgvs_lrg_variant'] == '' + assert results['NM_001267550.2:c.106974C>A']['gene_symbol'] == 'TTN' + assert results['NM_001267550.2:c.106974C>A']['gene_ids'] == {'hgnc_id': 'HGNC:12403', 'entrez_gene_id': '7273', 'ucsc_id': 'uc031rqc.3', 'omim_id': ['188840']} assert results['NM_001267550.2:c.106974C>A']['hgvs_transcript_variant'] == 'NM_001267550.2:c.106974C>A' + assert results['NM_001267550.2:c.106974C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001267550.2:c.106974C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001267550.2:c.106974C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_001267550.2:c.106974C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.2'} + assert results['NM_001267550.2:c.106974C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001254479.2:p.(Ser35658Arg)', 'slr': 'NP_001254479.2:p.(S35658R)'} + assert results['NM_001267550.2:c.106974C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001267550.2:c.106974C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001267550.2:c.106974C>A']['alt_genomic_loci'], []) + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001267550.2:c.106974C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001267550.2:c.106974C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.2'} - assert 'NM_133437.3:c.80355C>A' in list(results.keys()) - assert results['NM_133437.3:c.80355C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_133437.3:c.80355C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_133437.3:c.80355C>A']['alt_genomic_loci'], []) - assert results['NM_133437.3:c.80355C>A']['gene_symbol'] == 'TTN' - assert results['NM_133437.3:c.80355C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597681.3:p.(Ser26785Arg)', 'slr': 'NP_597681.3:p.(S26785R)'} - assert results['NM_133437.3:c.80355C>A']['submitted_variant'] == '2-179393504-G-T' - assert results['NM_133437.3:c.80355C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_133437.3:c.80355C>A']['hgvs_lrg_variant'] == '' - assert results['NM_133437.3:c.80355C>A']['hgvs_transcript_variant'] == 'NM_133437.3:c.80355C>A' - assert results['NM_133437.3:c.80355C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_133437.3:c.80355C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert 'hg38' not in list(results['NM_133437.3:c.80355C>A']['primary_assembly_loci'].keys()) - assert results['NM_133437.3:c.80355C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert 'grch38' not in list(results['NM_133437.3:c.80355C>A']['primary_assembly_loci'].keys()) - assert results['NM_133437.3:c.80355C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597681.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133437.3'} + assert 'NR_038271.1:n.446+5141G>T' in list(results.keys()) + assert results['NR_038271.1:n.446+5141G>T']['submitted_variant'] == '2-179393504-G-T' + assert results['NR_038271.1:n.446+5141G>T']['gene_symbol'] == 'TTN-AS1' + assert results['NR_038271.1:n.446+5141G>T']['gene_ids'] == {'hgnc_id': 'HGNC:44124', 'entrez_gene_id': '100506866', 'ucsc_id': 'uc061qcw.1', 'omim_id': []} + assert results['NR_038271.1:n.446+5141G>T']['hgvs_transcript_variant'] == 'NR_038271.1:n.446+5141G>T' + assert results['NR_038271.1:n.446+5141G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NR_038271.1):c.446+5141G>T' + assert results['NR_038271.1:n.446+5141G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_038271.1:n.446+5141G>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_038271.1:n.446+5141G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_038271.1:n.446+5141G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_038271.1:n.446+5141G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_038271.1:n.446+5141G>T']['alt_genomic_loci'], []) + assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NR_038271.1:n.446+5141G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NR_038271.1:n.446+5141G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_038271.1'} + + assert 'NM_133378.4:c.99270C>A' in list(results.keys()) + assert results['NM_133378.4:c.99270C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_133378.4:c.99270C>A']['gene_symbol'] == 'TTN' + assert results['NM_133378.4:c.99270C>A']['gene_ids'] == {'hgnc_id': 'HGNC:12403', 'entrez_gene_id': '7273', 'ucsc_id': 'uc031rqc.3', 'omim_id': ['188840']} + assert results['NM_133378.4:c.99270C>A']['hgvs_transcript_variant'] == 'NM_133378.4:c.99270C>A' + assert results['NM_133378.4:c.99270C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_133378.4:c.99270C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_133378.4:c.99270C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_133378.4:c.99270C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_596869.4:p.(Ser33090Arg)', 'slr': 'NP_596869.4:p.(S33090R)'} + assert results['NM_133378.4:c.99270C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133378.4:c.99270C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_133378.4:c.99270C>A']['alt_genomic_loci'], []) + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133378.4:c.99270C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133378.4:c.99270C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133378.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_596869.4'} assert 'NM_003319.4:c.79779C>A' in list(results.keys()) - assert results['NM_003319.4:c.79779C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003319.4:c.79779C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003319.4:c.79779C>A']['alt_genomic_loci'], []) - assert results['NM_003319.4:c.79779C>A']['gene_symbol'] == 'TTN' - assert results['NM_003319.4:c.79779C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003310.4:p.(Ser26593Arg)', 'slr': 'NP_003310.4:p.(S26593R)'} assert results['NM_003319.4:c.79779C>A']['submitted_variant'] == '2-179393504-G-T' - assert results['NM_003319.4:c.79779C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_003319.4:c.79779C>A']['hgvs_lrg_variant'] == '' + assert results['NM_003319.4:c.79779C>A']['gene_symbol'] == 'TTN' + assert results['NM_003319.4:c.79779C>A']['gene_ids'] == {'hgnc_id': 'HGNC:12403', 'entrez_gene_id': '7273', 'ucsc_id': 'uc031rqc.3', 'omim_id': ['188840']} assert results['NM_003319.4:c.79779C>A']['hgvs_transcript_variant'] == 'NM_003319.4:c.79779C>A' + assert results['NM_003319.4:c.79779C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_003319.4:c.79779C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003319.4:c.79779C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '179393504', 'alt': 'T'}} - assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '178528777', 'alt': 'T'}} - assert results['NM_003319.4:c.79779C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003310.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003319.4'} + assert results['NM_003319.4:c.79779C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003310.4:p.(Ser26593Arg)', 'slr': 'NP_003310.4:p.(S26593R)'} + assert results['NM_003319.4:c.79779C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003319.4:c.79779C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003319.4:c.79779C>A']['alt_genomic_loci'], []) + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_003319.4:c.79779C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_003319.4:c.79779C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003319.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003310.4'} + + assert 'NM_133432.3:c.80154C>A' in list(results.keys()) + assert results['NM_133432.3:c.80154C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_133432.3:c.80154C>A']['gene_symbol'] == 'TTN' + assert results['NM_133432.3:c.80154C>A']['gene_ids'] == {'hgnc_id': 'HGNC:12403', 'entrez_gene_id': '7273', 'ucsc_id': 'uc031rqc.3', 'omim_id': ['188840']} + assert results['NM_133432.3:c.80154C>A']['hgvs_transcript_variant'] == 'NM_133432.3:c.80154C>A' + assert results['NM_133432.3:c.80154C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_133432.3:c.80154C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_133432.3:c.80154C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_133432.3:c.80154C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_597676.3:p.(Ser26718Arg)', 'slr': 'NP_597676.3:p.(S26718R)'} + assert results['NM_133432.3:c.80154C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133432.3:c.80154C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_133432.3:c.80154C>A']['alt_genomic_loci'], []) + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': 'chr2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133432.3:c.80154C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.178528777G>T', 'vcf': {'chr': '2', 'pos': '178528777', 'ref': 'G', 'alt': 'T'}} + assert results['NM_133432.3:c.80154C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133432.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_597676.3'} + assert 'NM_001267550.1:c.106974C>A' in list(results.keys()) + assert results['NM_001267550.1:c.106974C>A']['submitted_variant'] == '2-179393504-G-T' + assert results['NM_001267550.1:c.106974C>A']['gene_symbol'] == 'TTN' + assert results['NM_001267550.1:c.106974C>A']['gene_ids'] == {'hgnc_id': 'HGNC:12403', 'entrez_gene_id': '7273', 'ucsc_id': 'uc031rqc.3', 'omim_id': ['188840']} + assert results['NM_001267550.1:c.106974C>A']['hgvs_transcript_variant'] == 'NM_001267550.1:c.106974C>A' + assert results['NM_001267550.1:c.106974C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001267550.1:c.106974C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001267550.1:c.106974C>A']['hgvs_refseqgene_variant'] == 'NG_011618.3:g.307026C>A' + assert results['NM_001267550.1:c.106974C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001254479.1(LRG_391p1):p.(Ser35658Arg)', 'slr': 'NP_001254479.1:p.(S35658R)'} + assert results['NM_001267550.1:c.106974C>A']['hgvs_lrg_transcript_variant'] == 'LRG_391t1:c.106974C>A' + assert results['NM_001267550.1:c.106974C>A']['hgvs_lrg_variant'] == 'LRG_391:g.307026C>A' + self.assertCountEqual(results['NM_001267550.1:c.106974C>A']['alt_genomic_loci'], []) + assert results['NM_001267550.1:c.106974C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': 'chr2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert 'hg38' not in list(results['NM_001267550.1:c.106974C>A']['primary_assembly_loci'].keys()) + assert results['NM_001267550.1:c.106974C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.179393504G>T', 'vcf': {'chr': '2', 'pos': '179393504', 'ref': 'G', 'alt': 'T'}} + assert 'grch38' not in list(results['NM_001267550.1:c.106974C>A']['primary_assembly_loci'].keys()) + assert results['NM_001267550.1:c.106974C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001267550.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001254479.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011618.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_391.xml'} def test_variant275(self): variant = '2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T' @@ -14425,22 +14886,22 @@ def test_variant275(self): assert results['flag'] == 'gene_variant' assert 'NM_194250.1:c.3324_3347del' in list(results.keys()) - assert results['NM_194250.1:c.3324_3347del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_194250.1:c.3324_3347del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_194250.1:c.3324_3347del']['alt_genomic_loci'], []) - assert results['NM_194250.1:c.3324_3347del']['gene_symbol'] == 'ZNF804A' - assert results['NM_194250.1:c.3324_3347del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_919226.1:p.(Ala1112_Ala1119del)', 'slr': 'NP_919226.1:p.(A1112_A1119del)'} assert results['NM_194250.1:c.3324_3347del']['submitted_variant'] == '2-185803444-TGCAGCTGCTGCAGCTGCAGCTGCA-T' - assert results['NM_194250.1:c.3324_3347del']['genome_context_intronic_sequence'] == '' - assert results['NM_194250.1:c.3324_3347del']['hgvs_lrg_variant'] == '' + assert results['NM_194250.1:c.3324_3347del']['gene_symbol'] == 'ZNF804A' + assert results['NM_194250.1:c.3324_3347del']['gene_ids'] == {'hgnc_id': 'HGNC:21711', 'entrez_gene_id': '91752', 'ucsc_id': 'uc002uph.4', 'omim_id': ['612282']} assert results['NM_194250.1:c.3324_3347del']['hgvs_transcript_variant'] == 'NM_194250.1:c.3324_3347del' + assert results['NM_194250.1:c.3324_3347del']['genome_context_intronic_sequence'] == '' + assert results['NM_194250.1:c.3324_3347del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_194250.1:c.3324_3347del']['hgvs_refseqgene_variant'] == '' - assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.185803447_185803470del', 'vcf': {'chr': 'chr2', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'pos': '185803444', 'alt': 'T'}} - assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.184938720_184938743del', 'vcf': {'chr': 'chr2', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'pos': '184938717', 'alt': 'T'}} - assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.185803447_185803470del', 'vcf': {'chr': '2', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'pos': '185803444', 'alt': 'T'}} - assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.184938720_184938743del', 'vcf': {'chr': '2', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'pos': '184938717', 'alt': 'T'}} - assert results['NM_194250.1:c.3324_3347del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_919226.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_194250.1'} - + assert results['NM_194250.1:c.3324_3347del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_919226.1:p.(Ala1112_Ala1119del)', 'slr': 'NP_919226.1:p.(A1112_A1119del)'} + assert results['NM_194250.1:c.3324_3347del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_194250.1:c.3324_3347del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_194250.1:c.3324_3347del']['alt_genomic_loci'], []) + assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.185803447_185803470del', 'vcf': {'chr': 'chr2', 'pos': '185803444', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'alt': 'T'}} + assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.184938720_184938743del', 'vcf': {'chr': 'chr2', 'pos': '184938717', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'alt': 'T'}} + assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.185803447_185803470del', 'vcf': {'chr': '2', 'pos': '185803444', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'alt': 'T'}} + assert results['NM_194250.1:c.3324_3347del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.184938720_184938743del', 'vcf': {'chr': '2', 'pos': '184938717', 'ref': 'TGCAGCTGCTGCAGCTGCAGCTGCA', 'alt': 'T'}} + assert results['NM_194250.1:c.3324_3347del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_194250.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_919226.1'} def test_variant276(self): variant = '2-201950249-G-T' @@ -14448,814 +14909,834 @@ def test_variant276(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_002491.2:c.208G>T' in list(results.keys()) - assert results['NM_002491.2:c.208G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_002491.2:c.208G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_002491.2:c.208G>T']['alt_genomic_loci'], []) - assert results['NM_002491.2:c.208G>T']['gene_symbol'] == 'NDUFB3' - assert results['NM_002491.2:c.208G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002482.1:p.(Gly70Ter)', 'slr': 'NP_002482.1:p.(G70*)'} - assert results['NM_002491.2:c.208G>T']['submitted_variant'] == '2-201950249-G-T' - assert results['NM_002491.2:c.208G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_002491.2:c.208G>T']['hgvs_lrg_variant'] == '' - assert results['NM_002491.2:c.208G>T']['hgvs_transcript_variant'] == 'NM_002491.2:c.208G>T' - assert results['NM_002491.2:c.208G>T']['hgvs_refseqgene_variant'] == 'NG_032156.1:g.18788G>T' - assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '201950249', 'alt': 'T'}} - assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '201085526', 'alt': 'T'}} - assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '201950249', 'alt': 'T'}} - assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '201085526', 'alt': 'T'}} - assert results['NM_002491.2:c.208G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_032156.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002482.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002491.2'} - assert 'NM_001257102.1:c.208G>T' in list(results.keys()) - assert results['NM_001257102.1:c.208G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001257102.1:c.208G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001257102.1:c.208G>T']['alt_genomic_loci'], []) - assert results['NM_001257102.1:c.208G>T']['gene_symbol'] == 'NDUFB3' - assert results['NM_001257102.1:c.208G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244031.1:p.(Gly70Ter)', 'slr': 'NP_001244031.1:p.(G70*)'} assert results['NM_001257102.1:c.208G>T']['submitted_variant'] == '2-201950249-G-T' - assert results['NM_001257102.1:c.208G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001257102.1:c.208G>T']['hgvs_lrg_variant'] == '' + assert results['NM_001257102.1:c.208G>T']['gene_symbol'] == 'NDUFB3' + assert results['NM_001257102.1:c.208G>T']['gene_ids'] == {'hgnc_id': 'HGNC:7698', 'entrez_gene_id': '4709', 'ucsc_id': 'uc002uwx.6', 'omim_id': ['603839']} assert results['NM_001257102.1:c.208G>T']['hgvs_transcript_variant'] == 'NM_001257102.1:c.208G>T' + assert results['NM_001257102.1:c.208G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001257102.1:c.208G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001257102.1:c.208G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '201950249', 'alt': 'T'}} - assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '201085526', 'alt': 'T'}} - assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '201950249', 'alt': 'T'}} - assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '201085526', 'alt': 'T'}} - assert results['NM_001257102.1:c.208G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244031.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257102.1'} + assert results['NM_001257102.1:c.208G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001244031.1:p.(Gly70Ter)', 'slr': 'NP_001244031.1:p.(G70*)'} + assert results['NM_001257102.1:c.208G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257102.1:c.208G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001257102.1:c.208G>T']['alt_genomic_loci'], []) + assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': 'chr2', 'pos': '201950249', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': 'chr2', 'pos': '201085526', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': '2', 'pos': '201950249', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001257102.1:c.208G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': '2', 'pos': '201085526', 'ref': 'G', 'alt': 'T'}} + assert results['NM_001257102.1:c.208G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257102.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001244031.1'} + assert 'NM_002491.2:c.208G>T' in list(results.keys()) + assert results['NM_002491.2:c.208G>T']['submitted_variant'] == '2-201950249-G-T' + assert results['NM_002491.2:c.208G>T']['gene_symbol'] == 'NDUFB3' + assert results['NM_002491.2:c.208G>T']['gene_ids'] == {'hgnc_id': 'HGNC:7698', 'entrez_gene_id': '4709', 'ucsc_id': 'uc002uwx.6', 'omim_id': ['603839']} + assert results['NM_002491.2:c.208G>T']['hgvs_transcript_variant'] == 'NM_002491.2:c.208G>T' + assert results['NM_002491.2:c.208G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_002491.2:c.208G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_002491.2:c.208G>T']['hgvs_refseqgene_variant'] == 'NG_032156.1:g.18788G>T' + assert results['NM_002491.2:c.208G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002482.1:p.(Gly70Ter)', 'slr': 'NP_002482.1:p.(G70*)'} + assert results['NM_002491.2:c.208G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_002491.2:c.208G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_002491.2:c.208G>T']['alt_genomic_loci'], []) + assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': 'chr2', 'pos': '201950249', 'ref': 'G', 'alt': 'T'}} + assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': 'chr2', 'pos': '201085526', 'ref': 'G', 'alt': 'T'}} + assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.201950249G>T', 'vcf': {'chr': '2', 'pos': '201950249', 'ref': 'G', 'alt': 'T'}} + assert results['NM_002491.2:c.208G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.201085526G>T', 'vcf': {'chr': '2', 'pos': '201085526', 'ref': 'G', 'alt': 'T'}} + assert results['NM_002491.2:c.208G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002491.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002482.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_032156.1'} def test_variant277(self): variant = '2-238268730-C-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_004369.3:c.6282+1G>T' in list(results.keys()) - assert results['NM_004369.3:c.6282+1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_473t1:c.6282+1G>T' - assert results['NM_004369.3:c.6282+1G>T']['refseqgene_context_intronic_sequence'] == 'NG_008676.1(NM_004369.3):c.6282+1G>T' - self.assertCountEqual(results['NM_004369.3:c.6282+1G>T']['alt_genomic_loci'], []) - assert results['NM_004369.3:c.6282+1G>T']['gene_symbol'] == 'COL6A3' - assert results['NM_004369.3:c.6282+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004360.2(LRG_473p1):p.?', 'slr': 'NP_004360.2:p.?'} assert results['NM_004369.3:c.6282+1G>T']['submitted_variant'] == '2-238268730-C-A' - assert results['NM_004369.3:c.6282+1G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NM_004369.3):c.6282+1G>T' - assert results['NM_004369.3:c.6282+1G>T']['hgvs_lrg_variant'] == 'LRG_473:g.59121G>T' + assert results['NM_004369.3:c.6282+1G>T']['gene_symbol'] == 'COL6A3' + assert results['NM_004369.3:c.6282+1G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2213', 'entrez_gene_id': '1293', 'ucsc_id': 'uc002vwl.3', 'omim_id': ['120250']} assert results['NM_004369.3:c.6282+1G>T']['hgvs_transcript_variant'] == 'NM_004369.3:c.6282+1G>T' + assert results['NM_004369.3:c.6282+1G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NM_004369.3):c.6282+1G>T' + assert results['NM_004369.3:c.6282+1G>T']['refseqgene_context_intronic_sequence'] == 'NG_008676.1(NM_004369.3):c.6282+1G>T' assert results['NM_004369.3:c.6282+1G>T']['hgvs_refseqgene_variant'] == 'NG_008676.1:g.59121G>T' - assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} - assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} - assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} - assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} - assert results['NM_004369.3:c.6282+1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008676.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004360.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004369.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_473.xml'} - - assert results['flag'] == 'gene_variant' - assert 'NM_057166.4:c.4461+1G>T' in list(results.keys()) - assert results['NM_057166.4:c.4461+1G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_057166.4:c.4461+1G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_057166.4:c.4461+1G>T']['alt_genomic_loci'], []) - assert results['NM_057166.4:c.4461+1G>T']['gene_symbol'] == 'COL6A3' - assert results['NM_057166.4:c.4461+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_476507.3:p.?', 'slr': 'NP_476507.3:p.?'} - assert results['NM_057166.4:c.4461+1G>T']['submitted_variant'] == '2-238268730-C-A' - assert results['NM_057166.4:c.4461+1G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NM_057166.4):c.4461+1G>T' - assert results['NM_057166.4:c.4461+1G>T']['hgvs_lrg_variant'] == '' - assert results['NM_057166.4:c.4461+1G>T']['hgvs_transcript_variant'] == 'NM_057166.4:c.4461+1G>T' - assert results['NM_057166.4:c.4461+1G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} - assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} - assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} - assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} - assert results['NM_057166.4:c.4461+1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_476507.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_057166.4'} + assert results['NM_004369.3:c.6282+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004360.2(LRG_473p1):p.?', 'slr': 'NP_004360.2:p.?'} + assert results['NM_004369.3:c.6282+1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_473t1:c.6282+1G>T' + assert results['NM_004369.3:c.6282+1G>T']['hgvs_lrg_variant'] == 'LRG_473:g.59121G>T' + self.assertCountEqual(results['NM_004369.3:c.6282+1G>T']['alt_genomic_loci'], []) + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'pos': '238268730', 'ref': 'C', 'alt': 'A'}} + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'pos': '237360087', 'ref': 'C', 'alt': 'A'}} + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'pos': '238268730', 'ref': 'C', 'alt': 'A'}} + assert results['NM_004369.3:c.6282+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'pos': '237360087', 'ref': 'C', 'alt': 'A'}} + assert results['NM_004369.3:c.6282+1G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004369.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004360.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008676.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_473.xml'} assert 'NM_057167.3:c.5664+1G>T' in list(results.keys()) - assert results['NM_057167.3:c.5664+1G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_057167.3:c.5664+1G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_057167.3:c.5664+1G>T']['alt_genomic_loci'], []) - assert results['NM_057167.3:c.5664+1G>T']['gene_symbol'] == 'COL6A3' - assert results['NM_057167.3:c.5664+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_476508.2:p.?', 'slr': 'NP_476508.2:p.?'} assert results['NM_057167.3:c.5664+1G>T']['submitted_variant'] == '2-238268730-C-A' - assert results['NM_057167.3:c.5664+1G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NM_057167.3):c.5664+1G>T' - assert results['NM_057167.3:c.5664+1G>T']['hgvs_lrg_variant'] == '' + assert results['NM_057167.3:c.5664+1G>T']['gene_symbol'] == 'COL6A3' + assert results['NM_057167.3:c.5664+1G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2213', 'entrez_gene_id': '1293', 'ucsc_id': 'uc002vwl.3', 'omim_id': ['120250']} assert results['NM_057167.3:c.5664+1G>T']['hgvs_transcript_variant'] == 'NM_057167.3:c.5664+1G>T' + assert results['NM_057167.3:c.5664+1G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NM_057167.3):c.5664+1G>T' + assert results['NM_057167.3:c.5664+1G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_057167.3:c.5664+1G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} - assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} - assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '238268730', 'alt': 'A'}} - assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'ref': 'C', 'pos': '237360087', 'alt': 'A'}} - assert results['NM_057167.3:c.5664+1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_476508.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_057167.3'} + assert results['NM_057167.3:c.5664+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_476508.2:p.?', 'slr': 'NP_476508.2:p.?'} + assert results['NM_057167.3:c.5664+1G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_057167.3:c.5664+1G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_057167.3:c.5664+1G>T']['alt_genomic_loci'], []) + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'pos': '238268730', 'ref': 'C', 'alt': 'A'}} + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'pos': '237360087', 'ref': 'C', 'alt': 'A'}} + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'pos': '238268730', 'ref': 'C', 'alt': 'A'}} + assert results['NM_057167.3:c.5664+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'pos': '237360087', 'ref': 'C', 'alt': 'A'}} + assert results['NM_057167.3:c.5664+1G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_057167.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_476508.2'} + assert 'NM_057166.4:c.4461+1G>T' in list(results.keys()) + assert results['NM_057166.4:c.4461+1G>T']['submitted_variant'] == '2-238268730-C-A' + assert results['NM_057166.4:c.4461+1G>T']['gene_symbol'] == 'COL6A3' + assert results['NM_057166.4:c.4461+1G>T']['gene_ids'] == {'hgnc_id': 'HGNC:2213', 'entrez_gene_id': '1293', 'ucsc_id': 'uc002vwl.3', 'omim_id': ['120250']} + assert results['NM_057166.4:c.4461+1G>T']['hgvs_transcript_variant'] == 'NM_057166.4:c.4461+1G>T' + assert results['NM_057166.4:c.4461+1G>T']['genome_context_intronic_sequence'] == 'NC_000002.11(NM_057166.4):c.4461+1G>T' + assert results['NM_057166.4:c.4461+1G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_057166.4:c.4461+1G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_057166.4:c.4461+1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_476507.3:p.?', 'slr': 'NP_476507.3:p.?'} + assert results['NM_057166.4:c.4461+1G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_057166.4:c.4461+1G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_057166.4:c.4461+1G>T']['alt_genomic_loci'], []) + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': 'chr2', 'pos': '238268730', 'ref': 'C', 'alt': 'A'}} + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': 'chr2', 'pos': '237360087', 'ref': 'C', 'alt': 'A'}} + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.238268730C>A', 'vcf': {'chr': '2', 'pos': '238268730', 'ref': 'C', 'alt': 'A'}} + assert results['NM_057166.4:c.4461+1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.237360087C>A', 'vcf': {'chr': '2', 'pos': '237360087', 'ref': 'C', 'alt': 'A'}} + assert results['NM_057166.4:c.4461+1G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_057166.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_476507.3'} def test_variant278(self): variant = '21-43897396-C-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_080860.3:c.727+5G>A' in list(results.keys()) + assert results['NM_080860.3:c.727+5G>A']['submitted_variant'] == '21-43897396-C-T' + assert results['NM_080860.3:c.727+5G>A']['gene_symbol'] == 'RSPH1' + assert results['NM_080860.3:c.727+5G>A']['gene_ids'] == {'hgnc_id': 'HGNC:12371', 'entrez_gene_id': '89765', 'ucsc_id': 'uc002zbg.4', 'omim_id': ['609314']} + assert results['NM_080860.3:c.727+5G>A']['hgvs_transcript_variant'] == 'NM_080860.3:c.727+5G>A' + assert results['NM_080860.3:c.727+5G>A']['genome_context_intronic_sequence'] == 'NC_000021.8(NM_080860.3):c.727+5G>A' + assert results['NM_080860.3:c.727+5G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_080860.3:c.727+5G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_080860.3:c.727+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543136.1:p.?', 'slr': 'NP_543136.1:p.?'} + assert results['NM_080860.3:c.727+5G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080860.3:c.727+5G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_080860.3:c.727+5G>A']['alt_genomic_loci'], []) + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'pos': '43897396', 'ref': 'C', 'alt': 'T'}} + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': 'chr21', 'pos': '42477286', 'ref': 'C', 'alt': 'T'}} + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'pos': '43897396', 'ref': 'C', 'alt': 'T'}} + assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': '21', 'pos': '42477286', 'ref': 'C', 'alt': 'T'}} + assert results['NM_080860.3:c.727+5G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1'} + assert 'NM_080860.2:c.727+5G>A' in list(results.keys()) - assert results['NM_080860.2:c.727+5G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_080860.2:c.727+5G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_080860.2:c.727+5G>A']['alt_genomic_loci'], []) - assert results['NM_080860.2:c.727+5G>A']['gene_symbol'] == 'RSPH1' - assert results['NM_080860.2:c.727+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543136.1:p.?', 'slr': 'NP_543136.1:p.?'} assert results['NM_080860.2:c.727+5G>A']['submitted_variant'] == '21-43897396-C-T' - assert results['NM_080860.2:c.727+5G>A']['genome_context_intronic_sequence'] == 'NC_000021.8(NM_080860.2):c.727+5G>A' - assert results['NM_080860.2:c.727+5G>A']['hgvs_lrg_variant'] == '' + assert results['NM_080860.2:c.727+5G>A']['gene_symbol'] == 'RSPH1' + assert results['NM_080860.2:c.727+5G>A']['gene_ids'] == {'hgnc_id': 'HGNC:12371', 'entrez_gene_id': '89765', 'ucsc_id': 'uc002zbg.4', 'omim_id': ['609314']} assert results['NM_080860.2:c.727+5G>A']['hgvs_transcript_variant'] == 'NM_080860.2:c.727+5G>A' + assert results['NM_080860.2:c.727+5G>A']['genome_context_intronic_sequence'] == 'NC_000021.8(NM_080860.2):c.727+5G>A' + assert results['NM_080860.2:c.727+5G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_080860.2:c.727+5G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_080860.2:c.727+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} + assert results['NM_080860.2:c.727+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543136.1:p.?', 'slr': 'NP_543136.1:p.?'} + assert results['NM_080860.2:c.727+5G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080860.2:c.727+5G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_080860.2:c.727+5G>A']['alt_genomic_loci'], []) + assert results['NM_080860.2:c.727+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'pos': '43897396', 'ref': 'C', 'alt': 'T'}} assert 'hg38' not in list(results['NM_080860.2:c.727+5G>A']['primary_assembly_loci'].keys()) - assert results['NM_080860.2:c.727+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} + assert results['NM_080860.2:c.727+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'pos': '43897396', 'ref': 'C', 'alt': 'T'}} assert 'grch38' not in list(results['NM_080860.2:c.727+5G>A']['primary_assembly_loci'].keys()) - assert results['NM_080860.2:c.727+5G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.2'} - - assert results['flag'] == 'gene_variant' - assert 'NM_080860.3:c.727+5G>A' in list(results.keys()) - assert results['NM_080860.3:c.727+5G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_080860.3:c.727+5G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_080860.3:c.727+5G>A']['alt_genomic_loci'], []) - assert results['NM_080860.3:c.727+5G>A']['gene_symbol'] == 'RSPH1' - assert results['NM_080860.3:c.727+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543136.1:p.?', 'slr': 'NP_543136.1:p.?'} - assert results['NM_080860.3:c.727+5G>A']['submitted_variant'] == '21-43897396-C-T' - assert results['NM_080860.3:c.727+5G>A']['genome_context_intronic_sequence'] == 'NC_000021.8(NM_080860.3):c.727+5G>A' - assert results['NM_080860.3:c.727+5G>A']['hgvs_lrg_variant'] == '' - assert results['NM_080860.3:c.727+5G>A']['hgvs_transcript_variant'] == 'NM_080860.3:c.727+5G>A' - assert results['NM_080860.3:c.727+5G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} - assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': 'chr21', 'ref': 'C', 'pos': '42477286', 'alt': 'T'}} - assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} - assert results['NM_080860.3:c.727+5G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': '21', 'ref': 'C', 'pos': '42477286', 'alt': 'T'}} - assert results['NM_080860.3:c.727+5G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.3'} + assert results['NM_080860.2:c.727+5G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080860.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543136.1'} assert 'NM_001286506.1:c.613+5G>A' in list(results.keys()) - assert results['NM_001286506.1:c.613+5G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001286506.1:c.613+5G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001286506.1:c.613+5G>A']['alt_genomic_loci'], []) - assert results['NM_001286506.1:c.613+5G>A']['gene_symbol'] == 'RSPH1' - assert results['NM_001286506.1:c.613+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001273435.1:p.?', 'slr': 'NP_001273435.1:p.?'} assert results['NM_001286506.1:c.613+5G>A']['submitted_variant'] == '21-43897396-C-T' - assert results['NM_001286506.1:c.613+5G>A']['genome_context_intronic_sequence'] == 'NC_000021.8(NM_001286506.1):c.613+5G>A' - assert results['NM_001286506.1:c.613+5G>A']['hgvs_lrg_variant'] == '' + assert results['NM_001286506.1:c.613+5G>A']['gene_symbol'] == 'RSPH1' + assert results['NM_001286506.1:c.613+5G>A']['gene_ids'] == {'hgnc_id': 'HGNC:12371', 'entrez_gene_id': '89765', 'ucsc_id': 'uc002zbg.4', 'omim_id': ['609314']} assert results['NM_001286506.1:c.613+5G>A']['hgvs_transcript_variant'] == 'NM_001286506.1:c.613+5G>A' + assert results['NM_001286506.1:c.613+5G>A']['genome_context_intronic_sequence'] == 'NC_000021.8(NM_001286506.1):c.613+5G>A' + assert results['NM_001286506.1:c.613+5G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001286506.1:c.613+5G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} - assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': 'chr21', 'ref': 'C', 'pos': '42477286', 'alt': 'T'}} - assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'ref': 'C', 'pos': '43897396', 'alt': 'T'}} - assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': '21', 'ref': 'C', 'pos': '42477286', 'alt': 'T'}} - assert results['NM_001286506.1:c.613+5G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001273435.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001286506.1'} - + assert results['NM_001286506.1:c.613+5G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001273435.1:p.?', 'slr': 'NP_001273435.1:p.?'} + assert results['NM_001286506.1:c.613+5G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001286506.1:c.613+5G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001286506.1:c.613+5G>A']['alt_genomic_loci'], []) + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': 'chr21', 'pos': '43897396', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': 'chr21', 'pos': '42477286', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000021.8:g.43897396C>T', 'vcf': {'chr': '21', 'pos': '43897396', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001286506.1:c.613+5G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000021.9:g.42477286C>T', 'vcf': {'chr': '21', 'pos': '42477286', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001286506.1:c.613+5G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001286506.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001273435.1'} def test_variant279(self): variant = '22-30064360-G-GCGACGC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_000268.3:c.924_925insCGACGC' in list(results.keys()) - assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == 'LRG_511t1:c.924_925insCGACGC' - assert results['NM_000268.3:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000268.3:c.924_925insCGACGC']['alt_genomic_loci'], []) - assert results['NM_000268.3:c.924_925insCGACGC']['gene_symbol'] == 'NF2' - assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000259.1(LRG_511p1):p.(Arg310_Arg311dup)', 'slr': 'NP_000259.1:p.(R310_R311dup)'} - assert results['NM_000268.3:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' - assert results['NM_000268.3:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' - assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_lrg_variant'] == 'LRG_511:g.69816_69817insCGACGC' - assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_transcript_variant'] == 'NM_000268.3:c.924_925insCGACGC' - assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_refseqgene_variant'] == 'NG_009057.1:g.69816_69817insCGACGC' - assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_000268.3:c.924_925insCGACGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009057.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000259.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000268.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_511.xml'} - - assert 'NM_181828.2:c.798_799insCGACGC' in list(results.keys()) - assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_181828.2:c.798_799insCGACGC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_181828.2:c.798_799insCGACGC']['alt_genomic_loci'], []) - assert results['NM_181828.2:c.798_799insCGACGC']['gene_symbol'] == 'NF2' - assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861966.1:p.(Arg268_Arg269dup)', 'slr': 'NP_861966.1:p.(R268_R269dup)'} - assert results['NM_181828.2:c.798_799insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' - assert results['NM_181828.2:c.798_799insCGACGC']['genome_context_intronic_sequence'] == '' - assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_lrg_variant'] == '' - assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_transcript_variant'] == 'NM_181828.2:c.798_799insCGACGC' - assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_refseqgene_variant'] == '' - assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181828.2:c.798_799insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861966.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181828.2'} + assert results['flag'] == 'gene_variant' + assert 'NR_156186.1:n.1483_1484insCGACGC' in list(results.keys()) + assert results['NR_156186.1:n.1483_1484insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NR_156186.1:n.1483_1484insCGACGC']['gene_symbol'] == 'NF2' + assert results['NR_156186.1:n.1483_1484insCGACGC']['gene_ids'] == {'hgnc_id': 'HGNC:7773', 'entrez_gene_id': '4771', 'ucsc_id': 'uc003age.5', 'omim_id': ['607379']} + assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_transcript_variant'] == 'NR_156186.1:n.1483_1484insCGACGC' + assert results['NR_156186.1:n.1483_1484insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NR_156186.1:n.1483_1484insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_156186.1:n.1483_1484insCGACGC']['alt_genomic_loci'], []) + assert results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert 'hg38' not in list(results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci'].keys()) + assert results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert 'grch38' not in list(results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci'].keys()) + assert results['NR_156186.1:n.1483_1484insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_156186.1'} - assert 'NM_181830.2:c.675_676insCGACGC' in list(results.keys()) - assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_181830.2:c.675_676insCGACGC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_181830.2:c.675_676insCGACGC']['alt_genomic_loci'], []) - assert results['NM_181830.2:c.675_676insCGACGC']['gene_symbol'] == 'NF2' - assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861968.1:p.(Arg227_Arg228dup)', 'slr': 'NP_861968.1:p.(R227_R228dup)'} + assert 'NM_181829.2:c.801_802insCGACGC' in list(results.keys()) + assert results['NM_181829.2:c.801_802insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_181829.2:c.801_802insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181829.2:c.801_802insCGACGC']['gene_ids'] == {'hgnc_id': 'HGNC:7773', 'entrez_gene_id': '4771', 'ucsc_id': 'uc003age.5', 'omim_id': ['607379']} + assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_transcript_variant'] == 'NM_181829.2:c.801_802insCGACGC' + assert results['NM_181829.2:c.801_802insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181829.2:c.801_802insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861967.1:p.(Arg269_Arg270dup)', 'slr': 'NP_861967.1:p.(R269_R270dup)'} + assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_181829.2:c.801_802insCGACGC']['alt_genomic_loci'], []) + assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181829.2:c.801_802insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181829.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861967.1'} + + assert 'NM_181830.2:c.675_676insCGACGC' in list(results.keys()) assert results['NM_181830.2:c.675_676insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' - assert results['NM_181830.2:c.675_676insCGACGC']['genome_context_intronic_sequence'] == '' - assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_lrg_variant'] == '' + assert results['NM_181830.2:c.675_676insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181830.2:c.675_676insCGACGC']['gene_ids'] == {'hgnc_id': 'HGNC:7773', 'entrez_gene_id': '4771', 'ucsc_id': 'uc003age.5', 'omim_id': ['607379']} assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_transcript_variant'] == 'NM_181830.2:c.675_676insCGACGC' + assert results['NM_181830.2:c.675_676insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181830.2:c.675_676insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_refseqgene_variant'] == '' - assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181830.2:c.675_676insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861968.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181830.2'} + assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861968.1:p.(Arg227_Arg228dup)', 'slr': 'NP_861968.1:p.(R227_R228dup)'} + assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181830.2:c.675_676insCGACGC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_181830.2:c.675_676insCGACGC']['alt_genomic_loci'], []) + assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181830.2:c.675_676insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181830.2:c.675_676insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181830.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861968.1'} + + assert 'NM_181833.2:c.447+26086_447+26087insCGACGC' in list(results.keys()) + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['gene_ids'] == {'hgnc_id': 'HGNC:7773', 'entrez_gene_id': '4771', 'ucsc_id': 'uc003age.5', 'omim_id': ['607379']} + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_transcript_variant'] == 'NM_181833.2:c.447+26086_447+26087insCGACGC' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['genome_context_intronic_sequence'] == 'NC_000022.10(NM_181833.2):c.447+26086_447+26087insCGACGC' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861971.1:p.?', 'slr': 'NP_861971.1:p.?'} + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_181833.2:c.447+26086_447+26087insCGACGC']['alt_genomic_loci'], []) + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181833.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861971.1'} assert 'NM_181825.2:c.924_925insCGACGC' in list(results.keys()) - assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_181825.2:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_181825.2:c.924_925insCGACGC']['alt_genomic_loci'], []) - assert results['NM_181825.2:c.924_925insCGACGC']['gene_symbol'] == 'NF2' - assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861546.1:p.(Arg310_Arg311dup)', 'slr': 'NP_861546.1:p.(R310_R311dup)'} assert results['NM_181825.2:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' - assert results['NM_181825.2:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' - assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_lrg_variant'] == '' + assert results['NM_181825.2:c.924_925insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181825.2:c.924_925insCGACGC']['gene_ids'] == {'hgnc_id': 'HGNC:7773', 'entrez_gene_id': '4771', 'ucsc_id': 'uc003age.5', 'omim_id': ['607379']} assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_transcript_variant'] == 'NM_181825.2:c.924_925insCGACGC' + assert results['NM_181825.2:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181825.2:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_refseqgene_variant'] == '' - assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181825.2:c.924_925insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861546.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181825.2'} + assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861546.1:p.(Arg310_Arg311dup)', 'slr': 'NP_861546.1:p.(R310_R311dup)'} + assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181825.2:c.924_925insCGACGC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_181825.2:c.924_925insCGACGC']['alt_genomic_loci'], []) + assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181825.2:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181825.2:c.924_925insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181825.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861546.1'} + + assert 'NM_181831.2:c.675_676insCGACGC' in list(results.keys()) + assert results['NM_181831.2:c.675_676insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_181831.2:c.675_676insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181831.2:c.675_676insCGACGC']['gene_ids'] == {'hgnc_id': 'HGNC:7773', 'entrez_gene_id': '4771', 'ucsc_id': 'uc003age.5', 'omim_id': ['607379']} + assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_transcript_variant'] == 'NM_181831.2:c.675_676insCGACGC' + assert results['NM_181831.2:c.675_676insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181831.2:c.675_676insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861969.1:p.(Arg227_Arg228dup)', 'slr': 'NP_861969.1:p.(R227_R228dup)'} + assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_181831.2:c.675_676insCGACGC']['alt_genomic_loci'], []) + assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181831.2:c.675_676insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181831.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861969.1'} assert 'NM_181832.2:c.924_925insCGACGC' in list(results.keys()) - assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_181832.2:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_181832.2:c.924_925insCGACGC']['alt_genomic_loci'], []) - assert results['NM_181832.2:c.924_925insCGACGC']['gene_symbol'] == 'NF2' - assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861970.1:p.(Arg310_Arg311dup)', 'slr': 'NP_861970.1:p.(R310_R311dup)'} assert results['NM_181832.2:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' - assert results['NM_181832.2:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' - assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_lrg_variant'] == '' + assert results['NM_181832.2:c.924_925insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181832.2:c.924_925insCGACGC']['gene_ids'] == {'hgnc_id': 'HGNC:7773', 'entrez_gene_id': '4771', 'ucsc_id': 'uc003age.5', 'omim_id': ['607379']} assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_transcript_variant'] == 'NM_181832.2:c.924_925insCGACGC' + assert results['NM_181832.2:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181832.2:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_refseqgene_variant'] == '' - assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181832.2:c.924_925insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861970.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181832.2'} + assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861970.1:p.(Arg310_Arg311dup)', 'slr': 'NP_861970.1:p.(R310_R311dup)'} + assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181832.2:c.924_925insCGACGC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_181832.2:c.924_925insCGACGC']['alt_genomic_loci'], []) + assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181832.2:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181832.2:c.924_925insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181832.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861970.1'} - assert 'NM_181833.2:c.447+26086_447+26087insCGACGC' in list(results.keys()) - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_181833.2:c.447+26086_447+26087insCGACGC']['alt_genomic_loci'], []) - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['gene_symbol'] == 'NF2' - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861971.1:p.?', 'slr': 'NP_861971.1:p.?'} - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['genome_context_intronic_sequence'] == 'NC_000022.10(NM_181833.2):c.447+26086_447+26087insCGACGC' - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_lrg_variant'] == '' - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_transcript_variant'] == 'NM_181833.2:c.447+26086_447+26087insCGACGC' - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['hgvs_refseqgene_variant'] == '' - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181833.2:c.447+26086_447+26087insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861971.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181833.2'} + assert 'NM_181828.2:c.798_799insCGACGC' in list(results.keys()) + assert results['NM_181828.2:c.798_799insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_181828.2:c.798_799insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_181828.2:c.798_799insCGACGC']['gene_ids'] == {'hgnc_id': 'HGNC:7773', 'entrez_gene_id': '4771', 'ucsc_id': 'uc003age.5', 'omim_id': ['607379']} + assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_transcript_variant'] == 'NM_181828.2:c.798_799insCGACGC' + assert results['NM_181828.2:c.798_799insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_181828.2:c.798_799insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_refseqgene_variant'] == '' + assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861966.1:p.(Arg268_Arg269dup)', 'slr': 'NP_861966.1:p.(R268_R269dup)'} + assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_181828.2:c.798_799insCGACGC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_181828.2:c.798_799insCGACGC']['alt_genomic_loci'], []) + assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181828.2:c.798_799insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_181828.2:c.798_799insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181828.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861966.1'} + + assert 'NM_000268.3:c.924_925insCGACGC' in list(results.keys()) + assert results['NM_000268.3:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' + assert results['NM_000268.3:c.924_925insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_000268.3:c.924_925insCGACGC']['gene_ids'] == {'hgnc_id': 'HGNC:7773', 'entrez_gene_id': '4771', 'ucsc_id': 'uc003age.5', 'omim_id': ['607379']} + assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_transcript_variant'] == 'NM_000268.3:c.924_925insCGACGC' + assert results['NM_000268.3:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_000268.3:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_refseqgene_variant'] == 'NG_009057.1:g.69816_69817insCGACGC' + assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000259.1(LRG_511p1):p.(Arg310_Arg311dup)', 'slr': 'NP_000259.1:p.(R310_R311dup)'} + assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == 'LRG_511t1:c.924_925insCGACGC' + assert results['NM_000268.3:c.924_925insCGACGC']['hgvs_lrg_variant'] == 'LRG_511:g.69816_69817insCGACGC' + self.assertCountEqual(results['NM_000268.3:c.924_925insCGACGC']['alt_genomic_loci'], []) + assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_000268.3:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_000268.3:c.924_925insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000268.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000259.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009057.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_511.xml'} assert 'NM_016418.5:c.924_925insCGACGC' in list(results.keys()) - assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == 'LRG_511t2:c.924_925insCGACGC' - assert results['NM_016418.5:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_016418.5:c.924_925insCGACGC']['alt_genomic_loci'], []) - assert results['NM_016418.5:c.924_925insCGACGC']['gene_symbol'] == 'NF2' - assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057502.2(LRG_511p2):p.(Arg310_Arg311dup)', 'slr': 'NP_057502.2:p.(R310_R311dup)'} assert results['NM_016418.5:c.924_925insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' - assert results['NM_016418.5:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' - assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_lrg_variant'] == 'LRG_511:g.69816_69817insCGACGC' + assert results['NM_016418.5:c.924_925insCGACGC']['gene_symbol'] == 'NF2' + assert results['NM_016418.5:c.924_925insCGACGC']['gene_ids'] == {'hgnc_id': 'HGNC:7773', 'entrez_gene_id': '4771', 'ucsc_id': 'uc003age.5', 'omim_id': ['607379']} assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_transcript_variant'] == 'NM_016418.5:c.924_925insCGACGC' + assert results['NM_016418.5:c.924_925insCGACGC']['genome_context_intronic_sequence'] == '' + assert results['NM_016418.5:c.924_925insCGACGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_refseqgene_variant'] == 'NG_009057.1:g.69816_69817insCGACGC' - assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_016418.5:c.924_925insCGACGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009057.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057502.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016418.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_511.xml'} - - assert 'NM_181829.2:c.801_802insCGACGC' in list(results.keys()) - assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_181829.2:c.801_802insCGACGC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_181829.2:c.801_802insCGACGC']['alt_genomic_loci'], []) - assert results['NM_181829.2:c.801_802insCGACGC']['gene_symbol'] == 'NF2' - assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861967.1:p.(Arg269_Arg270dup)', 'slr': 'NP_861967.1:p.(R269_R270dup)'} - assert results['NM_181829.2:c.801_802insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' - assert results['NM_181829.2:c.801_802insCGACGC']['genome_context_intronic_sequence'] == '' - assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_lrg_variant'] == '' - assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_transcript_variant'] == 'NM_181829.2:c.801_802insCGACGC' - assert results['NM_181829.2:c.801_802insCGACGC']['hgvs_refseqgene_variant'] == '' - assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181829.2:c.801_802insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181829.2:c.801_802insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861967.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181829.2'} - - assert results['flag'] == 'gene_variant' - assert 'NR_156186.1:n.1483_1484insCGACGC' in list(results.keys()) - assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_156186.1:n.1483_1484insCGACGC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_156186.1:n.1483_1484insCGACGC']['alt_genomic_loci'], []) - assert results['NR_156186.1:n.1483_1484insCGACGC']['gene_symbol'] == 'NF2' - assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_156186.1:n.1483_1484insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' - assert results['NR_156186.1:n.1483_1484insCGACGC']['genome_context_intronic_sequence'] == '' - assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_lrg_variant'] == '' - assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_transcript_variant'] == 'NR_156186.1:n.1483_1484insCGACGC' - assert results['NR_156186.1:n.1483_1484insCGACGC']['hgvs_refseqgene_variant'] == '' - assert results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert 'hg38' not in list(results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci'].keys()) - assert results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert 'grch38' not in list(results['NR_156186.1:n.1483_1484insCGACGC']['primary_assembly_loci'].keys()) - assert results['NR_156186.1:n.1483_1484insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_156186.1'} - - assert 'NM_181831.2:c.675_676insCGACGC' in list(results.keys()) - assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_181831.2:c.675_676insCGACGC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_181831.2:c.675_676insCGACGC']['alt_genomic_loci'], []) - assert results['NM_181831.2:c.675_676insCGACGC']['gene_symbol'] == 'NF2' - assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_861969.1:p.(Arg227_Arg228dup)', 'slr': 'NP_861969.1:p.(R227_R228dup)'} - assert results['NM_181831.2:c.675_676insCGACGC']['submitted_variant'] == '22-30064360-G-GCGACGC' - assert results['NM_181831.2:c.675_676insCGACGC']['genome_context_intronic_sequence'] == '' - assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_lrg_variant'] == '' - assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_transcript_variant'] == 'NM_181831.2:c.675_676insCGACGC' - assert results['NM_181831.2:c.675_676insCGACGC']['hgvs_refseqgene_variant'] == '' - assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '30064360', 'alt': 'GCGACGC'}} - assert results['NM_181831.2:c.675_676insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'ref': 'G', 'pos': '29668371', 'alt': 'GCGACGC'}} - assert results['NM_181831.2:c.675_676insCGACGC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_861969.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_181831.2'} - + assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_057502.2(LRG_511p2):p.(Arg310_Arg311dup)', 'slr': 'NP_057502.2:p.(R310_R311dup)'} + assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_lrg_transcript_variant'] == 'LRG_511t2:c.924_925insCGACGC' + assert results['NM_016418.5:c.924_925insCGACGC']['hgvs_lrg_variant'] == 'LRG_511:g.69816_69817insCGACGC' + self.assertCountEqual(results['NM_016418.5:c.924_925insCGACGC']['alt_genomic_loci'], []) + assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': 'chr22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000022.10:g.30064360_30064361insCGACGC', 'vcf': {'chr': '22', 'pos': '30064360', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_016418.5:c.924_925insCGACGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000022.11:g.29668371_29668372insCGACGC', 'vcf': {'chr': '22', 'pos': '29668371', 'ref': 'G', 'alt': 'GCGACGC'}} + assert results['NM_016418.5:c.924_925insCGACGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_016418.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_057502.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009057.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_511.xml'} def test_variant280(self): variant = '3-10188187-TGTCCCGATAG-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_000551.3:c.341-7_343del' in list(results.keys()) + assert results['NM_000551.3:c.341-7_343del']['submitted_variant'] == '3-10188187-TGTCCCGATAG-T' + assert results['NM_000551.3:c.341-7_343del']['gene_symbol'] == 'VHL' + assert results['NM_000551.3:c.341-7_343del']['gene_ids'] == {'hgnc_id': 'HGNC:12687', 'entrez_gene_id': '7428', 'ucsc_id': 'uc003bvc.4', 'omim_id': ['608537']} + assert results['NM_000551.3:c.341-7_343del']['hgvs_transcript_variant'] == 'NM_000551.3:c.341-7_343del' + assert results['NM_000551.3:c.341-7_343del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_000551.3):c.341-7_343del' + assert results['NM_000551.3:c.341-7_343del']['refseqgene_context_intronic_sequence'] == 'NG_008212.3(NM_000551.3):c.341-7_343del' + assert results['NM_000551.3:c.341-7_343del']['hgvs_refseqgene_variant'] == 'NG_008212.3:g.9873_9882del' + assert results['NM_000551.3:c.341-7_343del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000542.1(LRG_322p1):p.?', 'slr': 'NP_000542.1:p.?'} + assert results['NM_000551.3:c.341-7_343del']['hgvs_lrg_transcript_variant'] == 'LRG_322t1:c.341-7_343del' + assert results['NM_000551.3:c.341-7_343del']['hgvs_lrg_variant'] == 'LRG_322:g.9873_9882del' + self.assertCountEqual(results['NM_000551.3:c.341-7_343del']['alt_genomic_loci'], []) + assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': 'chr3', 'pos': '10188187', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': 'chr3', 'pos': '10146503', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': '3', 'pos': '10188187', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': '3', 'pos': '10146503', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_000551.3:c.341-7_343del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000551.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000542.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008212.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_322.xml'} + assert 'NM_198156.2:c.341-3280_341-3271del' in list(results.keys()) - assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_198156.2:c.341-3280_341-3271del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_198156.2:c.341-3280_341-3271del']['alt_genomic_loci'], []) - assert results['NM_198156.2:c.341-3280_341-3271del']['gene_symbol'] == 'VHL' - assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_937799.1:p.?', 'slr': 'NP_937799.1:p.?'} assert results['NM_198156.2:c.341-3280_341-3271del']['submitted_variant'] == '3-10188187-TGTCCCGATAG-T' - assert results['NM_198156.2:c.341-3280_341-3271del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_198156.2):c.341-3280_341-3271del' - assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_lrg_variant'] == '' + assert results['NM_198156.2:c.341-3280_341-3271del']['gene_symbol'] == 'VHL' + assert results['NM_198156.2:c.341-3280_341-3271del']['gene_ids'] == {'hgnc_id': 'HGNC:12687', 'entrez_gene_id': '7428', 'ucsc_id': 'uc003bvc.4', 'omim_id': ['608537']} assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_transcript_variant'] == 'NM_198156.2:c.341-3280_341-3271del' + assert results['NM_198156.2:c.341-3280_341-3271del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_198156.2):c.341-3280_341-3271del' + assert results['NM_198156.2:c.341-3280_341-3271del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_refseqgene_variant'] == '' - assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} - assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} - assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} - assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} - assert results['NM_198156.2:c.341-3280_341-3271del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_937799.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198156.2'} + assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_937799.1:p.?', 'slr': 'NP_937799.1:p.?'} + assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198156.2:c.341-3280_341-3271del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_198156.2:c.341-3280_341-3271del']['alt_genomic_loci'], []) + assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': 'chr3', 'pos': '10188187', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': 'chr3', 'pos': '10146503', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': '3', 'pos': '10188187', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_198156.2:c.341-3280_341-3271del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': '3', 'pos': '10146503', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_198156.2:c.341-3280_341-3271del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198156.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_937799.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001354723.1:c.*18-3280_*18-3271del' in list(results.keys()) - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001354723.1:c.*18-3280_*18-3271del']['alt_genomic_loci'], []) - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['gene_symbol'] == 'VHL' - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341652.1:p.?', 'slr': 'NP_001341652.1:p.?'} assert results['NM_001354723.1:c.*18-3280_*18-3271del']['submitted_variant'] == '3-10188187-TGTCCCGATAG-T' - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001354723.1):c.*18-3280_*18-3271del' - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_lrg_variant'] == '' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['gene_symbol'] == 'VHL' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['gene_ids'] == {'hgnc_id': 'HGNC:12687', 'entrez_gene_id': '7428', 'ucsc_id': 'uc003bvc.4', 'omim_id': ['608537']} assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_transcript_variant'] == 'NM_001354723.1:c.*18-3280_*18-3271del' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001354723.1):c.*18-3280_*18-3271del' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} - assert results['NM_001354723.1:c.*18-3280_*18-3271del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341652.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354723.1'} - - assert 'NM_000551.3:c.341-7_343del' in list(results.keys()) - assert results['NM_000551.3:c.341-7_343del']['hgvs_lrg_transcript_variant'] == 'LRG_322t1:c.341-7_343del' - assert results['NM_000551.3:c.341-7_343del']['refseqgene_context_intronic_sequence'] == 'NG_008212.3(NM_000551.3):c.341-7_343del' - self.assertCountEqual(results['NM_000551.3:c.341-7_343del']['alt_genomic_loci'], []) - assert results['NM_000551.3:c.341-7_343del']['gene_symbol'] == 'VHL' - assert results['NM_000551.3:c.341-7_343del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000542.1(LRG_322p1):p.?', 'slr': 'NP_000542.1:p.?'} - assert results['NM_000551.3:c.341-7_343del']['submitted_variant'] == '3-10188187-TGTCCCGATAG-T' - assert results['NM_000551.3:c.341-7_343del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_000551.3):c.341-7_343del' - assert results['NM_000551.3:c.341-7_343del']['hgvs_lrg_variant'] == 'LRG_322:g.9873_9882del' - assert results['NM_000551.3:c.341-7_343del']['hgvs_transcript_variant'] == 'NM_000551.3:c.341-7_343del' - assert results['NM_000551.3:c.341-7_343del']['hgvs_refseqgene_variant'] == 'NG_008212.3:g.9873_9882del' - assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} - assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': 'chr3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} - assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10188187', 'alt': 'T'}} - assert results['NM_000551.3:c.341-7_343del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': '3', 'ref': 'TGTCCCGATAG', 'pos': '10146503', 'alt': 'T'}} - assert results['NM_000551.3:c.341-7_343del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008212.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000542.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000551.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_322.xml'} - + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341652.1:p.?', 'slr': 'NP_001341652.1:p.?'} + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001354723.1:c.*18-3280_*18-3271del']['alt_genomic_loci'], []) + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': 'chr3', 'pos': '10188187', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': 'chr3', 'pos': '10146503', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.10188191_10188200del', 'vcf': {'chr': '3', 'pos': '10188187', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.10146507_10146516del', 'vcf': {'chr': '3', 'pos': '10146503', 'ref': 'TGTCCCGATAG', 'alt': 'T'}} + assert results['NM_001354723.1:c.*18-3280_*18-3271del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354723.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341652.1'} def test_variant281(self): variant = '3-50402127-T-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001005505.2:c.3408A>C' in list(results.keys()) - assert results['NM_001005505.2:c.3408A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001005505.2:c.3408A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001005505.2:c.3408A>C']['alt_genomic_loci'], []) - assert results['NM_001005505.2:c.3408A>C']['gene_symbol'] == 'CACNA2D2' - assert results['NM_001005505.2:c.3408A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Gln1136His)', 'slr': 'NP_001005505.1:p.(Q1136H)'} - assert results['NM_001005505.2:c.3408A>C']['submitted_variant'] == '3-50402127-T-G' - assert results['NM_001005505.2:c.3408A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_001005505.2:c.3408A>C']['hgvs_lrg_variant'] == '' - assert results['NM_001005505.2:c.3408A>C']['hgvs_transcript_variant'] == 'NM_001005505.2:c.3408A>C' - assert results['NM_001005505.2:c.3408A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} - assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} - assert results['NM_001005505.2:c.3408A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2'} - - assert 'NM_006030.2:c.3402A>C' in list(results.keys()) - assert results['NM_006030.2:c.3402A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_006030.2:c.3402A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_006030.2:c.3402A>C']['alt_genomic_loci'], []) - assert results['NM_006030.2:c.3402A>C']['gene_symbol'] == 'CACNA2D2' - assert results['NM_006030.2:c.3402A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Gln1134His)', 'slr': 'NP_006021.2:p.(Q1134H)'} - assert results['NM_006030.2:c.3402A>C']['submitted_variant'] == '3-50402127-T-G' - assert results['NM_006030.2:c.3402A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_006030.2:c.3402A>C']['hgvs_lrg_variant'] == '' - assert results['NM_006030.2:c.3402A>C']['hgvs_transcript_variant'] == 'NM_006030.2:c.3402A>C' - assert results['NM_006030.2:c.3402A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_006030.2:c.3402A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert 'hg38' not in list(results['NM_006030.2:c.3402A>C']['primary_assembly_loci'].keys()) - assert results['NM_006030.2:c.3402A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert 'grch38' not in list(results['NM_006030.2:c.3402A>C']['primary_assembly_loci'].keys()) - assert results['NM_006030.2:c.3402A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2'} - - assert 'NM_001174051.1:c.3423A>C' in list(results.keys()) - assert results['NM_001174051.1:c.3423A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001174051.1:c.3423A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001174051.1:c.3423A>C']['alt_genomic_loci'], []) - assert results['NM_001174051.1:c.3423A>C']['gene_symbol'] == 'CACNA2D2' - assert results['NM_001174051.1:c.3423A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Gln1141His)', 'slr': 'NP_001167522.1:p.(Q1141H)'} - assert results['NM_001174051.1:c.3423A>C']['submitted_variant'] == '3-50402127-T-G' - assert results['NM_001174051.1:c.3423A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_001174051.1:c.3423A>C']['hgvs_lrg_variant'] == '' - assert results['NM_001174051.1:c.3423A>C']['hgvs_transcript_variant'] == 'NM_001174051.1:c.3423A>C' - assert results['NM_001174051.1:c.3423A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001174051.1:c.3423A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert 'hg38' not in list(results['NM_001174051.1:c.3423A>C']['primary_assembly_loci'].keys()) - assert results['NM_001174051.1:c.3423A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert 'grch38' not in list(results['NM_001174051.1:c.3423A>C']['primary_assembly_loci'].keys()) - assert results['NM_001174051.1:c.3423A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_001291101.1:c.3201A>C' in list(results.keys()) + assert results['NM_001291101.1:c.3201A>C']['submitted_variant'] == '3-50402127-T-G' + assert results['NM_001291101.1:c.3201A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001291101.1:c.3201A>C']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} + assert results['NM_001291101.1:c.3201A>C']['hgvs_transcript_variant'] == 'NM_001291101.1:c.3201A>C' + assert results['NM_001291101.1:c.3201A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001291101.1:c.3201A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001291101.1:c.3201A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001291101.1:c.3201A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278030.1:p.(Gln1067His)', 'slr': 'NP_001278030.1:p.(Q1067H)'} + assert results['NM_001291101.1:c.3201A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291101.1:c.3201A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001291101.1:c.3201A>C']['alt_genomic_loci'], []) + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'pos': '50364696', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'pos': '50364696', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001291101.1:c.3201A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1'} assert 'NM_001174051.2:c.3423A>C' in list(results.keys()) - assert results['NM_001174051.2:c.3423A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001174051.2:c.3423A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001174051.2:c.3423A>C']['alt_genomic_loci'], []) - assert results['NM_001174051.2:c.3423A>C']['gene_symbol'] == 'CACNA2D2' - assert results['NM_001174051.2:c.3423A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Gln1141His)', 'slr': 'NP_001167522.1:p.(Q1141H)'} assert results['NM_001174051.2:c.3423A>C']['submitted_variant'] == '3-50402127-T-G' - assert results['NM_001174051.2:c.3423A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_001174051.2:c.3423A>C']['hgvs_lrg_variant'] == '' + assert results['NM_001174051.2:c.3423A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001174051.2:c.3423A>C']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} assert results['NM_001174051.2:c.3423A>C']['hgvs_transcript_variant'] == 'NM_001174051.2:c.3423A>C' + assert results['NM_001174051.2:c.3423A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001174051.2:c.3423A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001174051.2:c.3423A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} - assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} - assert results['NM_001174051.2:c.3423A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2'} + assert results['NM_001174051.2:c.3423A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Gln1141His)', 'slr': 'NP_001167522.1:p.(Q1141H)'} + assert results['NM_001174051.2:c.3423A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001174051.2:c.3423A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001174051.2:c.3423A>C']['alt_genomic_loci'], []) + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'pos': '50364696', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001174051.2:c.3423A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'pos': '50364696', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001174051.2:c.3423A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1'} + + assert 'NR_111912.1:n.443-1601T>G' in list(results.keys()) + assert results['NR_111912.1:n.443-1601T>G']['submitted_variant'] == '3-50402127-T-G' + assert results['NR_111912.1:n.443-1601T>G']['gene_symbol'] == 'CYB561D2' + assert results['NR_111912.1:n.443-1601T>G']['gene_ids'] == {'hgnc_id': 'HGNC:30253', 'entrez_gene_id': '11068', 'ucsc_id': 'uc003dam.4', 'omim_id': ['607068']} + assert results['NR_111912.1:n.443-1601T>G']['hgvs_transcript_variant'] == 'NR_111912.1:n.443-1601T>G' + assert results['NR_111912.1:n.443-1601T>G']['genome_context_intronic_sequence'] == 'NC_000003.11(NR_111912.1):c.443-1601T>G' + assert results['NR_111912.1:n.443-1601T>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_111912.1:n.443-1601T>G']['hgvs_refseqgene_variant'] == '' + assert results['NR_111912.1:n.443-1601T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_111912.1:n.443-1601T>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_111912.1:n.443-1601T>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_111912.1:n.443-1601T>G']['alt_genomic_loci'], []) + assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'pos': '50364696', 'ref': 'T', 'alt': 'G'}} + assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'pos': '50364696', 'ref': 'T', 'alt': 'G'}} + assert results['NR_111912.1:n.443-1601T>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111912.1'} assert 'NM_006030.3:c.3402A>C' in list(results.keys()) - assert results['NM_006030.3:c.3402A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_006030.3:c.3402A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_006030.3:c.3402A>C']['alt_genomic_loci'], []) - assert results['NM_006030.3:c.3402A>C']['gene_symbol'] == 'CACNA2D2' - assert results['NM_006030.3:c.3402A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Gln1134His)', 'slr': 'NP_006021.2:p.(Q1134H)'} assert results['NM_006030.3:c.3402A>C']['submitted_variant'] == '3-50402127-T-G' - assert results['NM_006030.3:c.3402A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_006030.3:c.3402A>C']['hgvs_lrg_variant'] == '' + assert results['NM_006030.3:c.3402A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_006030.3:c.3402A>C']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} assert results['NM_006030.3:c.3402A>C']['hgvs_transcript_variant'] == 'NM_006030.3:c.3402A>C' + assert results['NM_006030.3:c.3402A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_006030.3:c.3402A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006030.3:c.3402A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} - assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} - assert results['NM_006030.3:c.3402A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3'} + assert results['NM_006030.3:c.3402A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Gln1134His)', 'slr': 'NP_006021.2:p.(Q1134H)'} + assert results['NM_006030.3:c.3402A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006030.3:c.3402A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_006030.3:c.3402A>C']['alt_genomic_loci'], []) + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'pos': '50364696', 'ref': 'T', 'alt': 'G'}} + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert results['NM_006030.3:c.3402A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'pos': '50364696', 'ref': 'T', 'alt': 'G'}} + assert results['NM_006030.3:c.3402A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2'} - assert 'NM_001291101.1:c.3201A>C' in list(results.keys()) - assert results['NM_001291101.1:c.3201A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001291101.1:c.3201A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001291101.1:c.3201A>C']['alt_genomic_loci'], []) - assert results['NM_001291101.1:c.3201A>C']['gene_symbol'] == 'CACNA2D2' - assert results['NM_001291101.1:c.3201A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278030.1:p.(Gln1067His)', 'slr': 'NP_001278030.1:p.(Q1067H)'} - assert results['NM_001291101.1:c.3201A>C']['submitted_variant'] == '3-50402127-T-G' - assert results['NM_001291101.1:c.3201A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_001291101.1:c.3201A>C']['hgvs_lrg_variant'] == '' - assert results['NM_001291101.1:c.3201A>C']['hgvs_transcript_variant'] == 'NM_001291101.1:c.3201A>C' - assert results['NM_001291101.1:c.3201A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} - assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert results['NM_001291101.1:c.3201A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} - assert results['NM_001291101.1:c.3201A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1'} + assert 'NM_006030.2:c.3402A>C' in list(results.keys()) + assert results['NM_006030.2:c.3402A>C']['submitted_variant'] == '3-50402127-T-G' + assert results['NM_006030.2:c.3402A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_006030.2:c.3402A>C']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} + assert results['NM_006030.2:c.3402A>C']['hgvs_transcript_variant'] == 'NM_006030.2:c.3402A>C' + assert results['NM_006030.2:c.3402A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_006030.2:c.3402A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006030.2:c.3402A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_006030.2:c.3402A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Gln1134His)', 'slr': 'NP_006021.2:p.(Q1134H)'} + assert results['NM_006030.2:c.3402A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006030.2:c.3402A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_006030.2:c.3402A>C']['alt_genomic_loci'], []) + assert results['NM_006030.2:c.3402A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert 'hg38' not in list(results['NM_006030.2:c.3402A>C']['primary_assembly_loci'].keys()) + assert results['NM_006030.2:c.3402A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert 'grch38' not in list(results['NM_006030.2:c.3402A>C']['primary_assembly_loci'].keys()) + assert results['NM_006030.2:c.3402A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2'} - assert results['flag'] == 'gene_variant' - assert 'NR_111912.1:n.443-1601T>G' in list(results.keys()) - assert results['NR_111912.1:n.443-1601T>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_111912.1:n.443-1601T>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_111912.1:n.443-1601T>G']['alt_genomic_loci'], []) - assert results['NR_111912.1:n.443-1601T>G']['gene_symbol'] == 'CYB561D2' - assert results['NR_111912.1:n.443-1601T>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_111912.1:n.443-1601T>G']['submitted_variant'] == '3-50402127-T-G' - assert results['NR_111912.1:n.443-1601T>G']['genome_context_intronic_sequence'] == 'NC_000003.11(NR_111912.1):c.443-1601T>G' - assert results['NR_111912.1:n.443-1601T>G']['hgvs_lrg_variant'] == '' - assert results['NR_111912.1:n.443-1601T>G']['hgvs_transcript_variant'] == 'NR_111912.1:n.443-1601T>G' - assert results['NR_111912.1:n.443-1601T>G']['hgvs_refseqgene_variant'] == '' - assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} - assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} - assert results['NR_111912.1:n.443-1601T>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50364696', 'alt': 'G'}} - assert results['NR_111912.1:n.443-1601T>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111912.1'} + assert 'NM_001005505.2:c.3408A>C' in list(results.keys()) + assert results['NM_001005505.2:c.3408A>C']['submitted_variant'] == '3-50402127-T-G' + assert results['NM_001005505.2:c.3408A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001005505.2:c.3408A>C']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} + assert results['NM_001005505.2:c.3408A>C']['hgvs_transcript_variant'] == 'NM_001005505.2:c.3408A>C' + assert results['NM_001005505.2:c.3408A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001005505.2:c.3408A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001005505.2:c.3408A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001005505.2:c.3408A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Gln1136His)', 'slr': 'NP_001005505.1:p.(Q1136H)'} + assert results['NM_001005505.2:c.3408A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001005505.2:c.3408A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001005505.2:c.3408A>C']['alt_genomic_loci'], []) + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': 'chr3', 'pos': '50364696', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001005505.2:c.3408A>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50364696T>G', 'vcf': {'chr': '3', 'pos': '50364696', 'ref': 'T', 'alt': 'G'}} + assert results['NM_001005505.2:c.3408A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1'} assert 'NM_001005505.1:c.3408A>C' in list(results.keys()) - assert results['NM_001005505.1:c.3408A>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001005505.1:c.3408A>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001005505.1:c.3408A>C']['alt_genomic_loci'], []) - assert results['NM_001005505.1:c.3408A>C']['gene_symbol'] == 'CACNA2D2' - assert results['NM_001005505.1:c.3408A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Gln1136His)', 'slr': 'NP_001005505.1:p.(Q1136H)'} assert results['NM_001005505.1:c.3408A>C']['submitted_variant'] == '3-50402127-T-G' - assert results['NM_001005505.1:c.3408A>C']['genome_context_intronic_sequence'] == '' - assert results['NM_001005505.1:c.3408A>C']['hgvs_lrg_variant'] == '' + assert results['NM_001005505.1:c.3408A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001005505.1:c.3408A>C']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} assert results['NM_001005505.1:c.3408A>C']['hgvs_transcript_variant'] == 'NM_001005505.1:c.3408A>C' + assert results['NM_001005505.1:c.3408A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001005505.1:c.3408A>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001005505.1:c.3408A>C']['hgvs_refseqgene_variant'] == '' - assert results['NM_001005505.1:c.3408A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NM_001005505.1:c.3408A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Gln1136His)', 'slr': 'NP_001005505.1:p.(Q1136H)'} + assert results['NM_001005505.1:c.3408A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001005505.1:c.3408A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001005505.1:c.3408A>C']['alt_genomic_loci'], []) + assert results['NM_001005505.1:c.3408A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} assert 'hg38' not in list(results['NM_001005505.1:c.3408A>C']['primary_assembly_loci'].keys()) - assert results['NM_001005505.1:c.3408A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'ref': 'T', 'pos': '50402127', 'alt': 'G'}} + assert results['NM_001005505.1:c.3408A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} assert 'grch38' not in list(results['NM_001005505.1:c.3408A>C']['primary_assembly_loci'].keys()) - assert results['NM_001005505.1:c.3408A>C']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1'} + assert results['NM_001005505.1:c.3408A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1'} + assert 'NM_001174051.1:c.3423A>C' in list(results.keys()) + assert results['NM_001174051.1:c.3423A>C']['submitted_variant'] == '3-50402127-T-G' + assert results['NM_001174051.1:c.3423A>C']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001174051.1:c.3423A>C']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} + assert results['NM_001174051.1:c.3423A>C']['hgvs_transcript_variant'] == 'NM_001174051.1:c.3423A>C' + assert results['NM_001174051.1:c.3423A>C']['genome_context_intronic_sequence'] == '' + assert results['NM_001174051.1:c.3423A>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001174051.1:c.3423A>C']['hgvs_refseqgene_variant'] == '' + assert results['NM_001174051.1:c.3423A>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Gln1141His)', 'slr': 'NP_001167522.1:p.(Q1141H)'} + assert results['NM_001174051.1:c.3423A>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001174051.1:c.3423A>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001174051.1:c.3423A>C']['alt_genomic_loci'], []) + assert results['NM_001174051.1:c.3423A>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': 'chr3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert 'hg38' not in list(results['NM_001174051.1:c.3423A>C']['primary_assembly_loci'].keys()) + assert results['NM_001174051.1:c.3423A>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402127T>G', 'vcf': {'chr': '3', 'pos': '50402127', 'ref': 'T', 'alt': 'G'}} + assert 'grch38' not in list(results['NM_001174051.1:c.3423A>C']['primary_assembly_loci'].keys()) + assert results['NM_001174051.1:c.3423A>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1'} def test_variant282(self): variant = '3-50402890-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_001291101.1:c.2788C>T' in list(results.keys()) + assert results['NM_001291101.1:c.2788C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_001291101.1:c.2788C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001291101.1:c.2788C>T']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} + assert results['NM_001291101.1:c.2788C>T']['hgvs_transcript_variant'] == 'NM_001291101.1:c.2788C>T' + assert results['NM_001291101.1:c.2788C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001291101.1:c.2788C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001291101.1:c.2788C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001291101.1:c.2788C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278030.1:p.(Pro930Ser)', 'slr': 'NP_001278030.1:p.(P930S)'} + assert results['NM_001291101.1:c.2788C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001291101.1:c.2788C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001291101.1:c.2788C>T']['alt_genomic_loci'], []) + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001291101.1:c.2788C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1'} + assert 'NR_111913.1:n.126G>A' in list(results.keys()) - assert results['NR_111913.1:n.126G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_111913.1:n.126G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_111913.1:n.126G>A']['alt_genomic_loci'], []) - assert results['NR_111913.1:n.126G>A']['gene_symbol'] == 'CYB561D2' - assert results['NR_111913.1:n.126G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_111913.1:n.126G>A']['submitted_variant'] == '3-50402890-G-A' - assert results['NR_111913.1:n.126G>A']['genome_context_intronic_sequence'] == '' - assert results['NR_111913.1:n.126G>A']['hgvs_lrg_variant'] == '' + assert results['NR_111913.1:n.126G>A']['gene_symbol'] == 'CYB561D2' + assert results['NR_111913.1:n.126G>A']['gene_ids'] == {'hgnc_id': 'HGNC:30253', 'entrez_gene_id': '11068', 'ucsc_id': 'uc003dam.4', 'omim_id': ['607068']} assert results['NR_111913.1:n.126G>A']['hgvs_transcript_variant'] == 'NR_111913.1:n.126G>A' + assert results['NR_111913.1:n.126G>A']['genome_context_intronic_sequence'] == '' + assert results['NR_111913.1:n.126G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_111913.1:n.126G>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NR_111913.1:n.126G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_111913.1:n.126G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_111913.1:n.126G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_111913.1:n.126G>A']['alt_genomic_loci'], []) + assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NR_111913.1:n.126G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} assert results['NR_111913.1:n.126G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111913.1'} + assert 'NM_001174051.2:c.3016C>T' in list(results.keys()) + assert results['NM_001174051.2:c.3016C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_001174051.2:c.3016C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001174051.2:c.3016C>T']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} + assert results['NM_001174051.2:c.3016C>T']['hgvs_transcript_variant'] == 'NM_001174051.2:c.3016C>T' + assert results['NM_001174051.2:c.3016C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001174051.2:c.3016C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001174051.2:c.3016C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001174051.2:c.3016C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Pro1006Ser)', 'slr': 'NP_001167522.1:p.(P1006S)'} + assert results['NM_001174051.2:c.3016C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001174051.2:c.3016C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001174051.2:c.3016C>T']['alt_genomic_loci'], []) + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001174051.2:c.3016C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1'} + assert 'NR_111912.1:n.443-838G>A' in list(results.keys()) - assert results['NR_111912.1:n.443-838G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_111912.1:n.443-838G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_111912.1:n.443-838G>A']['alt_genomic_loci'], []) - assert results['NR_111912.1:n.443-838G>A']['gene_symbol'] == 'CYB561D2' - assert results['NR_111912.1:n.443-838G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_111912.1:n.443-838G>A']['submitted_variant'] == '3-50402890-G-A' - assert results['NR_111912.1:n.443-838G>A']['genome_context_intronic_sequence'] == 'NC_000003.11(NR_111912.1):c.443-838G>A' - assert results['NR_111912.1:n.443-838G>A']['hgvs_lrg_variant'] == '' + assert results['NR_111912.1:n.443-838G>A']['gene_symbol'] == 'CYB561D2' + assert results['NR_111912.1:n.443-838G>A']['gene_ids'] == {'hgnc_id': 'HGNC:30253', 'entrez_gene_id': '11068', 'ucsc_id': 'uc003dam.4', 'omim_id': ['607068']} assert results['NR_111912.1:n.443-838G>A']['hgvs_transcript_variant'] == 'NR_111912.1:n.443-838G>A' + assert results['NR_111912.1:n.443-838G>A']['genome_context_intronic_sequence'] == 'NC_000003.11(NR_111912.1):c.443-838G>A' + assert results['NR_111912.1:n.443-838G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NR_111912.1:n.443-838G>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} + assert results['NR_111912.1:n.443-838G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_111912.1:n.443-838G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_111912.1:n.443-838G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_111912.1:n.443-838G>A']['alt_genomic_loci'], []) + assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NR_111912.1:n.443-838G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} assert results['NR_111912.1:n.443-838G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111912.1'} - assert 'NM_001291101.1:c.2788C>T' in list(results.keys()) - assert results['NM_001291101.1:c.2788C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001291101.1:c.2788C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001291101.1:c.2788C>T']['alt_genomic_loci'], []) - assert results['NM_001291101.1:c.2788C>T']['gene_symbol'] == 'CACNA2D2' - assert results['NM_001291101.1:c.2788C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278030.1:p.(Pro930Ser)', 'slr': 'NP_001278030.1:p.(P930S)'} - assert results['NM_001291101.1:c.2788C>T']['submitted_variant'] == '3-50402890-G-A' - assert results['NM_001291101.1:c.2788C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001291101.1:c.2788C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001291101.1:c.2788C>T']['hgvs_transcript_variant'] == 'NM_001291101.1:c.2788C>T' - assert results['NM_001291101.1:c.2788C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NM_001291101.1:c.2788C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NM_001291101.1:c.2788C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278030.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001291101.1'} - - assert 'NM_006030.2:c.2995C>T' in list(results.keys()) - assert results['NM_006030.2:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_006030.2:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_006030.2:c.2995C>T']['alt_genomic_loci'], []) - assert results['NM_006030.2:c.2995C>T']['gene_symbol'] == 'CACNA2D2' - assert results['NM_006030.2:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Pro999Ser)', 'slr': 'NP_006021.2:p.(P999S)'} - assert results['NM_006030.2:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' - assert results['NM_006030.2:c.2995C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_006030.2:c.2995C>T']['hgvs_lrg_variant'] == '' - assert results['NM_006030.2:c.2995C>T']['hgvs_transcript_variant'] == 'NM_006030.2:c.2995C>T' - assert results['NM_006030.2:c.2995C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_006030.2:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert 'hg38' not in list(results['NM_006030.2:c.2995C>T']['primary_assembly_loci'].keys()) - assert results['NM_006030.2:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert 'grch38' not in list(results['NM_006030.2:c.2995C>T']['primary_assembly_loci'].keys()) - assert results['NM_006030.2:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2'} - - assert 'NR_111914.1:n.126G>A' in list(results.keys()) - assert results['NR_111914.1:n.126G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_111914.1:n.126G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_111914.1:n.126G>A']['alt_genomic_loci'], []) - assert results['NR_111914.1:n.126G>A']['gene_symbol'] == 'CYB561D2' - assert results['NR_111914.1:n.126G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_111914.1:n.126G>A']['submitted_variant'] == '3-50402890-G-A' - assert results['NR_111914.1:n.126G>A']['genome_context_intronic_sequence'] == '' - assert results['NR_111914.1:n.126G>A']['hgvs_lrg_variant'] == '' - assert results['NR_111914.1:n.126G>A']['hgvs_transcript_variant'] == 'NR_111914.1:n.126G>A' - assert results['NR_111914.1:n.126G>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NR_111914.1:n.126G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111914.1'} - - assert 'NM_001005505.2:c.2995C>T' in list(results.keys()) - assert results['NM_001005505.2:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001005505.2:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001005505.2:c.2995C>T']['alt_genomic_loci'], []) - assert results['NM_001005505.2:c.2995C>T']['gene_symbol'] == 'CACNA2D2' - assert results['NM_001005505.2:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Pro999Ser)', 'slr': 'NP_001005505.1:p.(P999S)'} - assert results['NM_001005505.2:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' - assert results['NM_001005505.2:c.2995C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001005505.2:c.2995C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001005505.2:c.2995C>T']['hgvs_transcript_variant'] == 'NM_001005505.2:c.2995C>T' - assert results['NM_001005505.2:c.2995C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NM_001005505.2:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2'} - - assert results['flag'] == 'gene_variant' - assert 'NM_001174051.1:c.3016C>T' in list(results.keys()) - assert results['NM_001174051.1:c.3016C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001174051.1:c.3016C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001174051.1:c.3016C>T']['alt_genomic_loci'], []) - assert results['NM_001174051.1:c.3016C>T']['gene_symbol'] == 'CACNA2D2' - assert results['NM_001174051.1:c.3016C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Pro1006Ser)', 'slr': 'NP_001167522.1:p.(P1006S)'} - assert results['NM_001174051.1:c.3016C>T']['submitted_variant'] == '3-50402890-G-A' - assert results['NM_001174051.1:c.3016C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001174051.1:c.3016C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001174051.1:c.3016C>T']['hgvs_transcript_variant'] == 'NM_001174051.1:c.3016C>T' - assert results['NM_001174051.1:c.3016C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001174051.1:c.3016C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert 'hg38' not in list(results['NM_001174051.1:c.3016C>T']['primary_assembly_loci'].keys()) - assert results['NM_001174051.1:c.3016C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert 'grch38' not in list(results['NM_001174051.1:c.3016C>T']['primary_assembly_loci'].keys()) - assert results['NM_001174051.1:c.3016C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1'} - - assert 'NM_001174051.2:c.3016C>T' in list(results.keys()) - assert results['NM_001174051.2:c.3016C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001174051.2:c.3016C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001174051.2:c.3016C>T']['alt_genomic_loci'], []) - assert results['NM_001174051.2:c.3016C>T']['gene_symbol'] == 'CACNA2D2' - assert results['NM_001174051.2:c.3016C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Pro1006Ser)', 'slr': 'NP_001167522.1:p.(P1006S)'} - assert results['NM_001174051.2:c.3016C>T']['submitted_variant'] == '3-50402890-G-A' - assert results['NM_001174051.2:c.3016C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001174051.2:c.3016C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001174051.2:c.3016C>T']['hgvs_transcript_variant'] == 'NM_001174051.2:c.3016C>T' - assert results['NM_001174051.2:c.3016C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NM_001174051.2:c.3016C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NM_001174051.2:c.3016C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.2'} - assert 'NM_006030.3:c.2995C>T' in list(results.keys()) - assert results['NM_006030.3:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_006030.3:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_006030.3:c.2995C>T']['alt_genomic_loci'], []) - assert results['NM_006030.3:c.2995C>T']['gene_symbol'] == 'CACNA2D2' - assert results['NM_006030.3:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Pro999Ser)', 'slr': 'NP_006021.2:p.(P999S)'} assert results['NM_006030.3:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' - assert results['NM_006030.3:c.2995C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_006030.3:c.2995C>T']['hgvs_lrg_variant'] == '' + assert results['NM_006030.3:c.2995C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_006030.3:c.2995C>T']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} assert results['NM_006030.3:c.2995C>T']['hgvs_transcript_variant'] == 'NM_006030.3:c.2995C>T' + assert results['NM_006030.3:c.2995C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_006030.3:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_006030.3:c.2995C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} - assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50365459', 'alt': 'A'}} - assert results['NM_006030.3:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3'} + assert results['NM_006030.3:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Pro999Ser)', 'slr': 'NP_006021.2:p.(P999S)'} + assert results['NM_006030.3:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006030.3:c.2995C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_006030.3:c.2995C>T']['alt_genomic_loci'], []) + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NM_006030.3:c.2995C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NM_006030.3:c.2995C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2'} + + assert 'NM_006030.2:c.2995C>T' in list(results.keys()) + assert results['NM_006030.2:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_006030.2:c.2995C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_006030.2:c.2995C>T']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} + assert results['NM_006030.2:c.2995C>T']['hgvs_transcript_variant'] == 'NM_006030.2:c.2995C>T' + assert results['NM_006030.2:c.2995C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_006030.2:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_006030.2:c.2995C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_006030.2:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_006021.2:p.(Pro999Ser)', 'slr': 'NP_006021.2:p.(P999S)'} + assert results['NM_006030.2:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_006030.2:c.2995C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_006030.2:c.2995C>T']['alt_genomic_loci'], []) + assert results['NM_006030.2:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_006030.2:c.2995C>T']['primary_assembly_loci'].keys()) + assert results['NM_006030.2:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_006030.2:c.2995C>T']['primary_assembly_loci'].keys()) + assert results['NM_006030.2:c.2995C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_006030.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_006021.2'} + + assert 'NM_001005505.2:c.2995C>T' in list(results.keys()) + assert results['NM_001005505.2:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_001005505.2:c.2995C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001005505.2:c.2995C>T']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} + assert results['NM_001005505.2:c.2995C>T']['hgvs_transcript_variant'] == 'NM_001005505.2:c.2995C>T' + assert results['NM_001005505.2:c.2995C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001005505.2:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001005505.2:c.2995C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001005505.2:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Pro999Ser)', 'slr': 'NP_001005505.1:p.(P999S)'} + assert results['NM_001005505.2:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001005505.2:c.2995C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001005505.2:c.2995C>T']['alt_genomic_loci'], []) + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001005505.2:c.2995C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001005505.2:c.2995C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1'} + + assert 'NR_111914.1:n.126G>A' in list(results.keys()) + assert results['NR_111914.1:n.126G>A']['submitted_variant'] == '3-50402890-G-A' + assert results['NR_111914.1:n.126G>A']['gene_symbol'] == 'CYB561D2' + assert results['NR_111914.1:n.126G>A']['gene_ids'] == {'hgnc_id': 'HGNC:30253', 'entrez_gene_id': '11068', 'ucsc_id': 'uc003dam.4', 'omim_id': ['607068']} + assert results['NR_111914.1:n.126G>A']['hgvs_transcript_variant'] == 'NR_111914.1:n.126G>A' + assert results['NR_111914.1:n.126G>A']['genome_context_intronic_sequence'] == '' + assert results['NR_111914.1:n.126G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_111914.1:n.126G>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_111914.1:n.126G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_111914.1:n.126G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_111914.1:n.126G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_111914.1:n.126G>A']['alt_genomic_loci'], []) + assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': 'chr3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert results['NR_111914.1:n.126G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.50365459G>A', 'vcf': {'chr': '3', 'pos': '50365459', 'ref': 'G', 'alt': 'A'}} + assert results['NR_111914.1:n.126G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_111914.1'} assert 'NM_001005505.1:c.2995C>T' in list(results.keys()) - assert results['NM_001005505.1:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001005505.1:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001005505.1:c.2995C>T']['alt_genomic_loci'], []) - assert results['NM_001005505.1:c.2995C>T']['gene_symbol'] == 'CACNA2D2' - assert results['NM_001005505.1:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Pro999Ser)', 'slr': 'NP_001005505.1:p.(P999S)'} assert results['NM_001005505.1:c.2995C>T']['submitted_variant'] == '3-50402890-G-A' - assert results['NM_001005505.1:c.2995C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001005505.1:c.2995C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001005505.1:c.2995C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001005505.1:c.2995C>T']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} assert results['NM_001005505.1:c.2995C>T']['hgvs_transcript_variant'] == 'NM_001005505.1:c.2995C>T' + assert results['NM_001005505.1:c.2995C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001005505.1:c.2995C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001005505.1:c.2995C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001005505.1:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NM_001005505.1:c.2995C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001005505.1:p.(Pro999Ser)', 'slr': 'NP_001005505.1:p.(P999S)'} + assert results['NM_001005505.1:c.2995C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001005505.1:c.2995C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001005505.1:c.2995C>T']['alt_genomic_loci'], []) + assert results['NM_001005505.1:c.2995C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} assert 'hg38' not in list(results['NM_001005505.1:c.2995C>T']['primary_assembly_loci'].keys()) - assert results['NM_001005505.1:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '50402890', 'alt': 'A'}} + assert results['NM_001005505.1:c.2995C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} assert 'grch38' not in list(results['NM_001005505.1:c.2995C>T']['primary_assembly_loci'].keys()) - assert results['NM_001005505.1:c.2995C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1'} + assert results['NM_001005505.1:c.2995C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001005505.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001005505.1'} + assert 'NM_001174051.1:c.3016C>T' in list(results.keys()) + assert results['NM_001174051.1:c.3016C>T']['submitted_variant'] == '3-50402890-G-A' + assert results['NM_001174051.1:c.3016C>T']['gene_symbol'] == 'CACNA2D2' + assert results['NM_001174051.1:c.3016C>T']['gene_ids'] == {'hgnc_id': 'HGNC:1400', 'entrez_gene_id': '9254', 'ucsc_id': 'uc003daq.4', 'omim_id': ['607082']} + assert results['NM_001174051.1:c.3016C>T']['hgvs_transcript_variant'] == 'NM_001174051.1:c.3016C>T' + assert results['NM_001174051.1:c.3016C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001174051.1:c.3016C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001174051.1:c.3016C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001174051.1:c.3016C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001167522.1:p.(Pro1006Ser)', 'slr': 'NP_001167522.1:p.(P1006S)'} + assert results['NM_001174051.1:c.3016C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001174051.1:c.3016C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001174051.1:c.3016C>T']['alt_genomic_loci'], []) + assert results['NM_001174051.1:c.3016C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': 'chr3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_001174051.1:c.3016C>T']['primary_assembly_loci'].keys()) + assert results['NM_001174051.1:c.3016C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.50402890G>A', 'vcf': {'chr': '3', 'pos': '50402890', 'ref': 'G', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_001174051.1:c.3016C>T']['primary_assembly_loci'].keys()) + assert results['NM_001174051.1:c.3016C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001174051.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001167522.1'} def test_variant283(self): variant = '3-57851007-AG-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_007159.4:c.1135+565del' in list(results.keys()) - assert results['NM_007159.4:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007159.4:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007159.4:c.1135+565del']['alt_genomic_loci'], []) - assert results['NM_007159.4:c.1135+565del']['gene_symbol'] == 'SLMAP' - assert results['NM_007159.4:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009090.2:p.?', 'slr': 'NP_009090.2:p.?'} - assert results['NM_007159.4:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' - assert results['NM_007159.4:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_007159.4):c.1135+565del' - assert results['NM_007159.4:c.1135+565del']['hgvs_lrg_variant'] == '' - assert results['NM_007159.4:c.1135+565del']['hgvs_transcript_variant'] == 'NM_007159.4:c.1135+565del' - assert results['NM_007159.4:c.1135+565del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} - assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} - assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} - assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} - assert results['NM_007159.4:c.1135+565del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.4'} - - assert 'NM_001304420.2:c.1186+424del' in list(results.keys()) - assert results['NM_001304420.2:c.1186+424del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001304420.2:c.1186+424del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001304420.2:c.1186+424del']['alt_genomic_loci'], []) - assert results['NM_001304420.2:c.1186+424del']['gene_symbol'] == 'SLMAP' - assert results['NM_001304420.2:c.1186+424del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291349.1:p.?', 'slr': 'NP_001291349.1:p.?'} - assert results['NM_001304420.2:c.1186+424del']['submitted_variant'] == '3-57851007-AG-A' - assert results['NM_001304420.2:c.1186+424del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304420.2):c.1186+424del' - assert results['NM_001304420.2:c.1186+424del']['hgvs_lrg_variant'] == '' - assert results['NM_001304420.2:c.1186+424del']['hgvs_transcript_variant'] == 'NM_001304420.2:c.1186+424del' - assert results['NM_001304420.2:c.1186+424del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} - assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} - assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} - assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} - assert results['NM_001304420.2:c.1186+424del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291349.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304420.2'} - + assert results['flag'] == 'gene_variant' assert 'NM_001304421.2:c.1135+565del' in list(results.keys()) - assert results['NM_001304421.2:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001304421.2:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001304421.2:c.1135+565del']['alt_genomic_loci'], []) - assert results['NM_001304421.2:c.1135+565del']['gene_symbol'] == 'SLMAP' - assert results['NM_001304421.2:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291350.1:p.?', 'slr': 'NP_001291350.1:p.?'} assert results['NM_001304421.2:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' - assert results['NM_001304421.2:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304421.2):c.1135+565del' - assert results['NM_001304421.2:c.1135+565del']['hgvs_lrg_variant'] == '' + assert results['NM_001304421.2:c.1135+565del']['gene_symbol'] == 'SLMAP' + assert results['NM_001304421.2:c.1135+565del']['gene_ids'] == {'hgnc_id': 'HGNC:16643', 'entrez_gene_id': '7871', 'ucsc_id': 'uc062kya.1', 'omim_id': ['602701']} assert results['NM_001304421.2:c.1135+565del']['hgvs_transcript_variant'] == 'NM_001304421.2:c.1135+565del' + assert results['NM_001304421.2:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304421.2):c.1135+565del' + assert results['NM_001304421.2:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001304421.2:c.1135+565del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} - assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} - assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} - assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57865280', 'alt': 'A'}} - assert results['NM_001304421.2:c.1135+565del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291350.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304421.2'} - - assert results['flag'] == 'gene_variant' - assert 'NM_007159.2:c.1135+565del' in list(results.keys()) - assert results['NM_007159.2:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007159.2:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007159.2:c.1135+565del']['alt_genomic_loci'], []) - assert results['NM_007159.2:c.1135+565del']['gene_symbol'] == 'SLMAP' - assert results['NM_007159.2:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009090.2:p.?', 'slr': 'NP_009090.2:p.?'} - assert results['NM_007159.2:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' - assert results['NM_007159.2:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_007159.2):c.1135+565del' - assert results['NM_007159.2:c.1135+565del']['hgvs_lrg_variant'] == '' - assert results['NM_007159.2:c.1135+565del']['hgvs_transcript_variant'] == 'NM_007159.2:c.1135+565del' - assert results['NM_007159.2:c.1135+565del']['hgvs_refseqgene_variant'] == '' - assert results['NM_007159.2:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} - assert 'hg38' not in list(results['NM_007159.2:c.1135+565del']['primary_assembly_loci'].keys()) - assert results['NM_007159.2:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'ref': 'AG', 'pos': '57851007', 'alt': 'A'}} - assert 'grch38' not in list(results['NM_007159.2:c.1135+565del']['primary_assembly_loci'].keys()) - assert results['NM_007159.2:c.1135+565del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.2'} + assert results['NM_001304421.2:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291350.1:p.?', 'slr': 'NP_001291350.1:p.?'} + assert results['NM_001304421.2:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001304421.2:c.1135+565del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001304421.2:c.1135+565del']['alt_genomic_loci'], []) + assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': 'chr3', 'pos': '57865280', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001304421.2:c.1135+565del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': '3', 'pos': '57865280', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001304421.2:c.1135+565del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304421.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291350.1'} assert 'NM_007159.3:c.1135+565del' in list(results.keys()) - assert results['NM_007159.3:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_007159.3:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_007159.3:c.1135+565del']['alt_genomic_loci'], []) - assert results['NM_007159.3:c.1135+565del']['gene_symbol'] == 'SLMAP' - assert results['NM_007159.3:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009090.2:p.?', 'slr': 'NP_009090.2:p.?'} assert results['NM_007159.3:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' - assert results['NM_007159.3:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_007159.3):c.1135+565del' - assert results['NM_007159.3:c.1135+565del']['hgvs_lrg_variant'] == '' + assert results['NM_007159.3:c.1135+565del']['gene_symbol'] == 'SLMAP' + assert results['NM_007159.3:c.1135+565del']['gene_ids'] == {'hgnc_id': 'HGNC:16643', 'entrez_gene_id': '7871', 'ucsc_id': 'uc062kya.1', 'omim_id': ['602701']} assert results['NM_007159.3:c.1135+565del']['hgvs_transcript_variant'] == 'NM_007159.3:c.1135+565del' + assert results['NM_007159.3:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_007159.3):c.1135+565del' + assert results['NM_007159.3:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_007159.3:c.1135+565del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007159.3:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009090.2:p.?', 'slr': 'NP_009090.2:p.?'} + assert results['NM_007159.3:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007159.3:c.1135+565del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007159.3:c.1135+565del']['alt_genomic_loci'], []) assert results['NM_007159.3:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} assert 'hg38' not in list(results['NM_007159.3:c.1135+565del']['primary_assembly_loci'].keys()) assert results['NM_007159.3:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} assert 'grch38' not in list(results['NM_007159.3:c.1135+565del']['primary_assembly_loci'].keys()) assert results['NM_007159.3:c.1135+565del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2'} + assert 'NM_007159.4:c.1135+565del' in list(results.keys()) + assert results['NM_007159.4:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' + assert results['NM_007159.4:c.1135+565del']['gene_symbol'] == 'SLMAP' + assert results['NM_007159.4:c.1135+565del']['gene_ids'] == {'hgnc_id': 'HGNC:16643', 'entrez_gene_id': '7871', 'ucsc_id': 'uc062kya.1', 'omim_id': ['602701']} + assert results['NM_007159.4:c.1135+565del']['hgvs_transcript_variant'] == 'NM_007159.4:c.1135+565del' + assert results['NM_007159.4:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_007159.4):c.1135+565del' + assert results['NM_007159.4:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007159.4:c.1135+565del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007159.4:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009090.2:p.?', 'slr': 'NP_009090.2:p.?'} + assert results['NM_007159.4:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007159.4:c.1135+565del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007159.4:c.1135+565del']['alt_genomic_loci'], []) + assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': 'chr3', 'pos': '57865280', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_007159.4:c.1135+565del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': '3', 'pos': '57865280', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_007159.4:c.1135+565del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2'} + + assert 'NM_007159.2:c.1135+565del' in list(results.keys()) + assert results['NM_007159.2:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' + assert results['NM_007159.2:c.1135+565del']['gene_symbol'] == 'SLMAP' + assert results['NM_007159.2:c.1135+565del']['gene_ids'] == {'hgnc_id': 'HGNC:16643', 'entrez_gene_id': '7871', 'ucsc_id': 'uc062kya.1', 'omim_id': ['602701']} + assert results['NM_007159.2:c.1135+565del']['hgvs_transcript_variant'] == 'NM_007159.2:c.1135+565del' + assert results['NM_007159.2:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_007159.2):c.1135+565del' + assert results['NM_007159.2:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_007159.2:c.1135+565del']['hgvs_refseqgene_variant'] == '' + assert results['NM_007159.2:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_009090.2:p.?', 'slr': 'NP_009090.2:p.?'} + assert results['NM_007159.2:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_007159.2:c.1135+565del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_007159.2:c.1135+565del']['alt_genomic_loci'], []) + assert results['NM_007159.2:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert 'hg38' not in list(results['NM_007159.2:c.1135+565del']['primary_assembly_loci'].keys()) + assert results['NM_007159.2:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert 'grch38' not in list(results['NM_007159.2:c.1135+565del']['primary_assembly_loci'].keys()) + assert results['NM_007159.2:c.1135+565del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_007159.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_009090.2'} + assert 'NM_001304420.1:c.1186+424del' in list(results.keys()) - assert results['NM_001304420.1:c.1186+424del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001304420.1:c.1186+424del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001304420.1:c.1186+424del']['alt_genomic_loci'], []) - assert results['NM_001304420.1:c.1186+424del']['gene_symbol'] == 'SLMAP' - assert results['NM_001304420.1:c.1186+424del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291349.1:p.?', 'slr': 'NP_001291349.1:p.?'} assert results['NM_001304420.1:c.1186+424del']['submitted_variant'] == '3-57851007-AG-A' - assert results['NM_001304420.1:c.1186+424del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304420.1):c.1186+424del' - assert results['NM_001304420.1:c.1186+424del']['hgvs_lrg_variant'] == '' + assert results['NM_001304420.1:c.1186+424del']['gene_symbol'] == 'SLMAP' + assert results['NM_001304420.1:c.1186+424del']['gene_ids'] == {'hgnc_id': 'HGNC:16643', 'entrez_gene_id': '7871', 'ucsc_id': 'uc062kya.1', 'omim_id': ['602701']} assert results['NM_001304420.1:c.1186+424del']['hgvs_transcript_variant'] == 'NM_001304420.1:c.1186+424del' + assert results['NM_001304420.1:c.1186+424del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304420.1):c.1186+424del' + assert results['NM_001304420.1:c.1186+424del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001304420.1:c.1186+424del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001304420.1:c.1186+424del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291349.1:p.?', 'slr': 'NP_001291349.1:p.?'} + assert results['NM_001304420.1:c.1186+424del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001304420.1:c.1186+424del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001304420.1:c.1186+424del']['alt_genomic_loci'], []) assert results['NM_001304420.1:c.1186+424del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} assert 'hg38' not in list(results['NM_001304420.1:c.1186+424del']['primary_assembly_loci'].keys()) assert results['NM_001304420.1:c.1186+424del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} @@ -15263,138 +15744,160 @@ def test_variant283(self): assert results['NM_001304420.1:c.1186+424del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304420.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291349.1'} assert 'NM_001304421.1:c.1135+565del' in list(results.keys()) - assert results['NM_001304421.1:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001304421.1:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001304421.1:c.1135+565del']['alt_genomic_loci'], []) - assert results['NM_001304421.1:c.1135+565del']['gene_symbol'] == 'SLMAP' - assert results['NM_001304421.1:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291350.1:p.?', 'slr': 'NP_001291350.1:p.?'} assert results['NM_001304421.1:c.1135+565del']['submitted_variant'] == '3-57851007-AG-A' - assert results['NM_001304421.1:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304421.1):c.1135+565del' - assert results['NM_001304421.1:c.1135+565del']['hgvs_lrg_variant'] == '' + assert results['NM_001304421.1:c.1135+565del']['gene_symbol'] == 'SLMAP' + assert results['NM_001304421.1:c.1135+565del']['gene_ids'] == {'hgnc_id': 'HGNC:16643', 'entrez_gene_id': '7871', 'ucsc_id': 'uc062kya.1', 'omim_id': ['602701']} assert results['NM_001304421.1:c.1135+565del']['hgvs_transcript_variant'] == 'NM_001304421.1:c.1135+565del' + assert results['NM_001304421.1:c.1135+565del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304421.1):c.1135+565del' + assert results['NM_001304421.1:c.1135+565del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001304421.1:c.1135+565del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001304421.1:c.1135+565del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291350.1:p.?', 'slr': 'NP_001291350.1:p.?'} + assert results['NM_001304421.1:c.1135+565del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001304421.1:c.1135+565del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001304421.1:c.1135+565del']['alt_genomic_loci'], []) assert results['NM_001304421.1:c.1135+565del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} assert 'hg38' not in list(results['NM_001304421.1:c.1135+565del']['primary_assembly_loci'].keys()) assert results['NM_001304421.1:c.1135+565del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} assert 'grch38' not in list(results['NM_001304421.1:c.1135+565del']['primary_assembly_loci'].keys()) assert results['NM_001304421.1:c.1135+565del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304421.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291350.1'} + assert 'NM_001304420.2:c.1186+424del' in list(results.keys()) + assert results['NM_001304420.2:c.1186+424del']['submitted_variant'] == '3-57851007-AG-A' + assert results['NM_001304420.2:c.1186+424del']['gene_symbol'] == 'SLMAP' + assert results['NM_001304420.2:c.1186+424del']['gene_ids'] == {'hgnc_id': 'HGNC:16643', 'entrez_gene_id': '7871', 'ucsc_id': 'uc062kya.1', 'omim_id': ['602701']} + assert results['NM_001304420.2:c.1186+424del']['hgvs_transcript_variant'] == 'NM_001304420.2:c.1186+424del' + assert results['NM_001304420.2:c.1186+424del']['genome_context_intronic_sequence'] == 'NC_000003.11(NM_001304420.2):c.1186+424del' + assert results['NM_001304420.2:c.1186+424del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001304420.2:c.1186+424del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001304420.2:c.1186+424del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001291349.1:p.?', 'slr': 'NP_001291349.1:p.?'} + assert results['NM_001304420.2:c.1186+424del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001304420.2:c.1186+424del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001304420.2:c.1186+424del']['alt_genomic_loci'], []) + assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': 'chr3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': 'chr3', 'pos': '57865280', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.57851008del', 'vcf': {'chr': '3', 'pos': '57851007', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001304420.2:c.1186+424del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.57865281del', 'vcf': {'chr': '3', 'pos': '57865280', 'ref': 'AG', 'alt': 'A'}} + assert results['NM_001304420.2:c.1186+424del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001304420.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001291349.1'} def test_variant284(self): variant = '3-122003832-G-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001178065.1:c.3061C=' in list(results.keys()) - assert results['NM_001178065.1:c.3061C=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001178065.1:c.3061C=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001178065.1:c.3061C=']['alt_genomic_loci'], []) - assert results['NM_001178065.1:c.3061C=']['gene_symbol'] == 'CASR' - assert results['NM_001178065.1:c.3061C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001171536.1:p.(Gln1021=)', 'slr': 'NP_001171536.1:p.(Q1021=)'} - assert results['NM_001178065.1:c.3061C=']['submitted_variant'] == '3-122003832-G-C' - assert results['NM_001178065.1:c.3061C=']['genome_context_intronic_sequence'] == '' - assert results['NM_001178065.1:c.3061C=']['hgvs_lrg_variant'] == '' - assert results['NM_001178065.1:c.3061C=']['hgvs_transcript_variant'] == 'NM_001178065.1:c.3061C=' - assert results['NM_001178065.1:c.3061C=']['hgvs_refseqgene_variant'] == '' - assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '122003832', 'alt': 'C'}} - assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '122284985', 'alt': 'C'}} - assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '122003832', 'alt': 'C'}} - assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '122284985', 'alt': 'C'}} - assert results['NM_001178065.1:c.3061C=']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001171536.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001178065.1'} - assert results['flag'] == 'gene_variant' assert 'NM_000388.3:c.3031C=' in list(results.keys()) - assert results['NM_000388.3:c.3031C=']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000388.3:c.3031C=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000388.3:c.3031C=']['alt_genomic_loci'], []) - assert results['NM_000388.3:c.3031C=']['gene_symbol'] == 'CASR' - assert results['NM_000388.3:c.3031C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000379.2:p.(Gln1011=)', 'slr': 'NP_000379.2:p.(Q1011=)'} assert results['NM_000388.3:c.3031C=']['submitted_variant'] == '3-122003832-G-C' - assert results['NM_000388.3:c.3031C=']['genome_context_intronic_sequence'] == '' - assert results['NM_000388.3:c.3031C=']['hgvs_lrg_variant'] == '' + assert results['NM_000388.3:c.3031C=']['gene_symbol'] == 'CASR' + assert results['NM_000388.3:c.3031C=']['gene_ids'] == {'hgnc_id': 'HGNC:1514', 'entrez_gene_id': '846', 'ucsc_id': 'uc003eev.5', 'omim_id': ['601199']} assert results['NM_000388.3:c.3031C=']['hgvs_transcript_variant'] == 'NM_000388.3:c.3031C=' + assert results['NM_000388.3:c.3031C=']['genome_context_intronic_sequence'] == '' + assert results['NM_000388.3:c.3031C=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000388.3:c.3031C=']['hgvs_refseqgene_variant'] == 'NG_009058.1:g.106303C=' - assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '122003832', 'alt': 'C'}} - assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': 'chr3', 'ref': 'G', 'pos': '122284985', 'alt': 'C'}} - assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '122003832', 'alt': 'C'}} - assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': '3', 'ref': 'G', 'pos': '122284985', 'alt': 'C'}} - assert results['NM_000388.3:c.3031C=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009058.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000379.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000388.3'} + assert results['NM_000388.3:c.3031C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000379.2:p.(Gln1011=)', 'slr': 'NP_000379.2:p.(Q1011=)'} + assert results['NM_000388.3:c.3031C=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000388.3:c.3031C=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000388.3:c.3031C=']['alt_genomic_loci'], []) + assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': 'chr3', 'pos': '122003832', 'ref': 'G', 'alt': 'C'}} + assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': 'chr3', 'pos': '122284985', 'ref': 'G', 'alt': 'C'}} + assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': '3', 'pos': '122003832', 'ref': 'G', 'alt': 'C'}} + assert results['NM_000388.3:c.3031C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': '3', 'pos': '122284985', 'ref': 'G', 'alt': 'C'}} + assert results['NM_000388.3:c.3031C=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000388.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000379.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009058.1'} + assert 'NM_001178065.1:c.3061C=' in list(results.keys()) + assert results['NM_001178065.1:c.3061C=']['submitted_variant'] == '3-122003832-G-C' + assert results['NM_001178065.1:c.3061C=']['gene_symbol'] == 'CASR' + assert results['NM_001178065.1:c.3061C=']['gene_ids'] == {'hgnc_id': 'HGNC:1514', 'entrez_gene_id': '846', 'ucsc_id': 'uc003eev.5', 'omim_id': ['601199']} + assert results['NM_001178065.1:c.3061C=']['hgvs_transcript_variant'] == 'NM_001178065.1:c.3061C=' + assert results['NM_001178065.1:c.3061C=']['genome_context_intronic_sequence'] == '' + assert results['NM_001178065.1:c.3061C=']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001178065.1:c.3061C=']['hgvs_refseqgene_variant'] == '' + assert results['NM_001178065.1:c.3061C=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001171536.1:p.(Gln1021=)', 'slr': 'NP_001171536.1:p.(Q1021=)'} + assert results['NM_001178065.1:c.3061C=']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001178065.1:c.3061C=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001178065.1:c.3061C=']['alt_genomic_loci'], []) + assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': 'chr3', 'pos': '122003832', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': 'chr3', 'pos': '122284985', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000003.11:g.122003832G>C', 'vcf': {'chr': '3', 'pos': '122003832', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001178065.1:c.3061C=']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000003.12:g.122284985G>C', 'vcf': {'chr': '3', 'pos': '122284985', 'ref': 'G', 'alt': 'C'}} + assert results['NM_001178065.1:c.3061C=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001178065.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001171536.1'} def test_variant285(self): variant = '4-153332910-C-CAGG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_001349798.1:c.45_46insCCT' in list(results.keys()) - assert results['NM_001349798.1:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001349798.1:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001349798.1:c.45_46insCCT']['alt_genomic_loci'], []) - assert results['NM_001349798.1:c.45_46insCCT']['gene_symbol'] == 'FBXW7' - assert results['NM_001349798.1:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_361014.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_361014.1:p.(T15_G16insP)'} assert results['NM_001349798.1:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' - assert results['NM_001349798.1:c.45_46insCCT']['genome_context_intronic_sequence'] == '' - assert results['NM_001349798.1:c.45_46insCCT']['hgvs_lrg_variant'] == '' + assert results['NM_001349798.1:c.45_46insCCT']['gene_symbol'] == 'FBXW7' + assert results['NM_001349798.1:c.45_46insCCT']['gene_ids'] == {'hgnc_id': 'HGNC:16712', 'entrez_gene_id': '55294', 'ucsc_id': 'uc003imt.4', 'omim_id': ['606278']} assert results['NM_001349798.1:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001349798.1:c.45_46insCCT' + assert results['NM_001349798.1:c.45_46insCCT']['genome_context_intronic_sequence'] == '' + assert results['NM_001349798.1:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001349798.1:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} - assert results['NM_001349798.1:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.1'} - - assert 'NM_033632.3:c.45_46insCCT' in list(results.keys()) - assert results['NM_033632.3:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_033632.3:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_033632.3:c.45_46insCCT']['alt_genomic_loci'], []) - assert results['NM_033632.3:c.45_46insCCT']['gene_symbol'] == 'FBXW7' - assert results['NM_033632.3:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_361014.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_361014.1:p.(T15_G16insP)'} - assert results['NM_033632.3:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' - assert results['NM_033632.3:c.45_46insCCT']['genome_context_intronic_sequence'] == '' - assert results['NM_033632.3:c.45_46insCCT']['hgvs_lrg_variant'] == '' - assert results['NM_033632.3:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_033632.3:c.45_46insCCT' - assert results['NM_033632.3:c.45_46insCCT']['hgvs_refseqgene_variant'] == 'NG_029466.1:g.128262_128263insCCT' - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} - assert results['NM_033632.3:c.45_46insCCT']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029466.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033632.3'} + assert results['NM_001349798.1:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_361014.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_361014.1:p.(T15_G16insP)'} + assert results['NM_001349798.1:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001349798.1:c.45_46insCCT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001349798.1:c.45_46insCCT']['alt_genomic_loci'], []) + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'pos': '153332910', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': 'chr4', 'pos': '152411758', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'pos': '153332910', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': '4', 'pos': '152411758', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_001349798.1:c.45_46insCCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1'} assert 'NM_001257069.1:c.45_46insCCT' in list(results.keys()) - assert results['NM_001257069.1:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001257069.1:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001257069.1:c.45_46insCCT']['alt_genomic_loci'], []) - assert results['NM_001257069.1:c.45_46insCCT']['gene_symbol'] == 'FBXW7' - assert results['NM_001257069.1:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243998.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_001243998.1:p.(T15_G16insP)'} assert results['NM_001257069.1:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' - assert results['NM_001257069.1:c.45_46insCCT']['genome_context_intronic_sequence'] == '' - assert results['NM_001257069.1:c.45_46insCCT']['hgvs_lrg_variant'] == '' + assert results['NM_001257069.1:c.45_46insCCT']['gene_symbol'] == 'FBXW7' + assert results['NM_001257069.1:c.45_46insCCT']['gene_ids'] == {'hgnc_id': 'HGNC:16712', 'entrez_gene_id': '55294', 'ucsc_id': 'uc003imt.4', 'omim_id': ['606278']} assert results['NM_001257069.1:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001257069.1:c.45_46insCCT' + assert results['NM_001257069.1:c.45_46insCCT']['genome_context_intronic_sequence'] == '' + assert results['NM_001257069.1:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001257069.1:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} - assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '152411758', 'alt': 'CAGG'}} - assert results['NM_001257069.1:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243998.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257069.1'} + assert results['NM_001257069.1:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001243998.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_001243998.1:p.(T15_G16insP)'} + assert results['NM_001257069.1:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001257069.1:c.45_46insCCT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001257069.1:c.45_46insCCT']['alt_genomic_loci'], []) + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'pos': '153332910', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': 'chr4', 'pos': '152411758', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'pos': '153332910', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': '4', 'pos': '152411758', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_001257069.1:c.45_46insCCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001257069.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001243998.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001349798.2:c.45_46insCCT' in list(results.keys()) - assert results['NM_001349798.2:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == 'LRG_1141t1:c.45_46insCCT' - assert results['NM_001349798.2:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001349798.2:c.45_46insCCT']['alt_genomic_loci'], []) - assert results['NM_001349798.2:c.45_46insCCT']['gene_symbol'] == 'FBXW7' - assert results['NM_001349798.2:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001336727.1(LRG_1141p1):p.(Thr15_Gly16insPro)', 'slr': 'NP_001336727.1:p.(T15_G16insP)'} assert results['NM_001349798.2:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' - assert results['NM_001349798.2:c.45_46insCCT']['genome_context_intronic_sequence'] == '' - assert results['NM_001349798.2:c.45_46insCCT']['hgvs_lrg_variant'] == '' + assert results['NM_001349798.2:c.45_46insCCT']['gene_symbol'] == 'FBXW7' + assert results['NM_001349798.2:c.45_46insCCT']['gene_ids'] == {'hgnc_id': 'HGNC:16712', 'entrez_gene_id': '55294', 'ucsc_id': 'uc003imt.4', 'omim_id': ['606278']} assert results['NM_001349798.2:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_001349798.2:c.45_46insCCT' + assert results['NM_001349798.2:c.45_46insCCT']['genome_context_intronic_sequence'] == '' + assert results['NM_001349798.2:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001349798.2:c.45_46insCCT']['hgvs_refseqgene_variant'] == '' - assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001349798.2:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001336727.1(LRG_1141p1):p.(Thr15_Gly16insPro)', 'slr': 'NP_001336727.1:p.(T15_G16insP)'} + assert results['NM_001349798.2:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == 'LRG_1141t1:c.45_46insCCT' + assert results['NM_001349798.2:c.45_46insCCT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001349798.2:c.45_46insCCT']['alt_genomic_loci'], []) + assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'pos': '153332910', 'ref': 'C', 'alt': 'CAGG'}} assert 'hg38' not in list(results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci'].keys()) - assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'ref': 'C', 'pos': '153332910', 'alt': 'CAGG'}} + assert results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'pos': '153332910', 'ref': 'C', 'alt': 'CAGG'}} assert 'grch38' not in list(results['NM_001349798.2:c.45_46insCCT']['primary_assembly_loci'].keys()) - assert results['NM_001349798.2:c.45_46insCCT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001336727.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.2'} + assert results['NM_001349798.2:c.45_46insCCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001349798.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001336727.1'} + assert 'NM_033632.3:c.45_46insCCT' in list(results.keys()) + assert results['NM_033632.3:c.45_46insCCT']['submitted_variant'] == '4-153332910-C-CAGG' + assert results['NM_033632.3:c.45_46insCCT']['gene_symbol'] == 'FBXW7' + assert results['NM_033632.3:c.45_46insCCT']['gene_ids'] == {'hgnc_id': 'HGNC:16712', 'entrez_gene_id': '55294', 'ucsc_id': 'uc003imt.4', 'omim_id': ['606278']} + assert results['NM_033632.3:c.45_46insCCT']['hgvs_transcript_variant'] == 'NM_033632.3:c.45_46insCCT' + assert results['NM_033632.3:c.45_46insCCT']['genome_context_intronic_sequence'] == '' + assert results['NM_033632.3:c.45_46insCCT']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_033632.3:c.45_46insCCT']['hgvs_refseqgene_variant'] == 'NG_029466.1:g.128262_128263insCCT' + assert results['NM_033632.3:c.45_46insCCT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_361014.1:p.(Thr15_Gly16insPro)', 'slr': 'NP_361014.1:p.(T15_G16insP)'} + assert results['NM_033632.3:c.45_46insCCT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_033632.3:c.45_46insCCT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_033632.3:c.45_46insCCT']['alt_genomic_loci'], []) + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': 'chr4', 'pos': '153332910', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': 'chr4', 'pos': '152411758', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000004.11:g.153332912_153332913insGAG', 'vcf': {'chr': '4', 'pos': '153332910', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000004.12:g.152411760_152411761insGAG', 'vcf': {'chr': '4', 'pos': '152411758', 'ref': 'C', 'alt': 'CAGG'}} + assert results['NM_033632.3:c.45_46insCCT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033632.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_361014.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_029466.1'} def test_variant286(self): variant = '5-1295183-G-A' @@ -15403,298 +15906,310 @@ def test_variant286(self): assert results['flag'] == 'intergenic' assert 'intergenic_variant_1' in list(results.keys()) - assert results['intergenic_variant_1']['hgvs_lrg_transcript_variant'] == '' - assert results['intergenic_variant_1']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['intergenic_variant_1']['alt_genomic_loci'], []) - assert results['intergenic_variant_1']['gene_symbol'] == '' - assert results['intergenic_variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} assert results['intergenic_variant_1']['submitted_variant'] == '5-1295183-G-A' - assert results['intergenic_variant_1']['genome_context_intronic_sequence'] == '' - assert results['intergenic_variant_1']['hgvs_lrg_variant'] == 'LRG_343:g.4980C>T' + assert results['intergenic_variant_1']['gene_symbol'] == '' + assert results['intergenic_variant_1']['gene_ids'] == {} assert results['intergenic_variant_1']['hgvs_transcript_variant'] == '' + assert results['intergenic_variant_1']['genome_context_intronic_sequence'] == '' + assert results['intergenic_variant_1']['refseqgene_context_intronic_sequence'] == '' assert results['intergenic_variant_1']['hgvs_refseqgene_variant'] == 'NG_009265.1:g.4980C>T' - assert results['intergenic_variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.1295183G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} - assert results['intergenic_variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.1295068G>A', 'vcf': {'chr': 'chr5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} - assert results['intergenic_variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.1295183G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295183', 'alt': 'A'}} - assert results['intergenic_variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.1295068G>A', 'vcf': {'chr': '5', 'ref': 'G', 'pos': '1295068', 'alt': 'A'}} + assert results['intergenic_variant_1']['hgvs_predicted_protein_consequence'] == {'tlr': '', 'slr': ''} + assert results['intergenic_variant_1']['hgvs_lrg_transcript_variant'] == '' + assert results['intergenic_variant_1']['hgvs_lrg_variant'] == 'LRG_343:g.4980C>T' + self.assertCountEqual(results['intergenic_variant_1']['alt_genomic_loci'], []) + assert results['intergenic_variant_1']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.1295183G>A', 'vcf': {'chr': 'chr5', 'pos': '1295183', 'ref': 'G', 'alt': 'A'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.1295068G>A', 'vcf': {'chr': 'chr5', 'pos': '1295068', 'ref': 'G', 'alt': 'A'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.1295183G>A', 'vcf': {'chr': '5', 'pos': '1295183', 'ref': 'G', 'alt': 'A'}} + assert results['intergenic_variant_1']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.1295068G>A', 'vcf': {'chr': '5', 'pos': '1295068', 'ref': 'G', 'alt': 'A'}} assert results['intergenic_variant_1']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009265.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_343.xml'} - def test_variant287(self): variant = '5-77396835-TTTC-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_003664.4:c.2409_2411del' in list(results.keys()) - assert results['NM_003664.4:c.2409_2411del']['hgvs_lrg_transcript_variant'] == 'LRG_170t1:c.2409_2411del' - assert results['NM_003664.4:c.2409_2411del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003664.4:c.2409_2411del']['alt_genomic_loci'], []) - assert results['NM_003664.4:c.2409_2411del']['gene_symbol'] == 'AP3B1' - assert results['NM_003664.4:c.2409_2411del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003655.3(LRG_170p1):p.(Lys804del)', 'slr': 'NP_003655.3:p.(K804del)'} assert results['NM_003664.4:c.2409_2411del']['submitted_variant'] == '5-77396835-TTTC-T' - assert results['NM_003664.4:c.2409_2411del']['genome_context_intronic_sequence'] == '' - assert results['NM_003664.4:c.2409_2411del']['hgvs_lrg_variant'] == 'LRG_170:g.198691_198693del' + assert results['NM_003664.4:c.2409_2411del']['gene_symbol'] == 'AP3B1' + assert results['NM_003664.4:c.2409_2411del']['gene_ids'] == {'hgnc_id': 'HGNC:566', 'entrez_gene_id': '8546', 'ucsc_id': 'uc003kfj.5', 'omim_id': ['603401']} assert results['NM_003664.4:c.2409_2411del']['hgvs_transcript_variant'] == 'NM_003664.4:c.2409_2411del' + assert results['NM_003664.4:c.2409_2411del']['genome_context_intronic_sequence'] == '' + assert results['NM_003664.4:c.2409_2411del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003664.4:c.2409_2411del']['hgvs_refseqgene_variant'] == 'NG_007268.1:g.198691_198693del' - assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} - assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} - assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} - assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} - assert results['NM_003664.4:c.2409_2411del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007268.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_170.xml'} + assert results['NM_003664.4:c.2409_2411del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003655.3(LRG_170p1):p.(Lys804del)', 'slr': 'NP_003655.3:p.(K804del)'} + assert results['NM_003664.4:c.2409_2411del']['hgvs_lrg_transcript_variant'] == 'LRG_170t1:c.2409_2411del' + assert results['NM_003664.4:c.2409_2411del']['hgvs_lrg_variant'] == 'LRG_170:g.198691_198693del' + self.assertCountEqual(results['NM_003664.4:c.2409_2411del']['alt_genomic_loci'], []) + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': 'chr5', 'pos': '77396835', 'ref': 'TTTC', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': 'chr5', 'pos': '78101011', 'ref': 'TTTC', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': '5', 'pos': '77396835', 'ref': 'TTTC', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': '5', 'pos': '78101011', 'ref': 'TTTC', 'alt': 'T'}} + assert results['NM_003664.4:c.2409_2411del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007268.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_170.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_003664.3:c.2409_2411del' in list(results.keys()) - assert results['NM_003664.3:c.2409_2411del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003664.3:c.2409_2411del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003664.3:c.2409_2411del']['alt_genomic_loci'], []) - assert results['NM_003664.3:c.2409_2411del']['gene_symbol'] == 'AP3B1' - assert results['NM_003664.3:c.2409_2411del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003655.3(LRG_170p1):p.(Lys804del)', 'slr': 'NP_003655.3:p.(K804del)'} assert results['NM_003664.3:c.2409_2411del']['submitted_variant'] == '5-77396835-TTTC-T' - assert results['NM_003664.3:c.2409_2411del']['genome_context_intronic_sequence'] == '' - assert results['NM_003664.3:c.2409_2411del']['hgvs_lrg_variant'] == '' + assert results['NM_003664.3:c.2409_2411del']['gene_symbol'] == 'AP3B1' + assert results['NM_003664.3:c.2409_2411del']['gene_ids'] == {'hgnc_id': 'HGNC:566', 'entrez_gene_id': '8546', 'ucsc_id': 'uc003kfj.5', 'omim_id': ['603401']} assert results['NM_003664.3:c.2409_2411del']['hgvs_transcript_variant'] == 'NM_003664.3:c.2409_2411del' + assert results['NM_003664.3:c.2409_2411del']['genome_context_intronic_sequence'] == '' + assert results['NM_003664.3:c.2409_2411del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003664.3:c.2409_2411del']['hgvs_refseqgene_variant'] == '' - assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_003664.3:c.2409_2411del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003655.3(LRG_170p1):p.(Lys804del)', 'slr': 'NP_003655.3:p.(K804del)'} + assert results['NM_003664.3:c.2409_2411del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003664.3:c.2409_2411del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003664.3:c.2409_2411del']['alt_genomic_loci'], []) + assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': 'chr5', 'pos': '77396835', 'ref': 'TTTC', 'alt': 'T'}} assert 'hg38' not in list(results['NM_003664.3:c.2409_2411del']['primary_assembly_loci'].keys()) - assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} + assert results['NM_003664.3:c.2409_2411del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': '5', 'pos': '77396835', 'ref': 'TTTC', 'alt': 'T'}} assert 'grch38' not in list(results['NM_003664.3:c.2409_2411del']['primary_assembly_loci'].keys()) - assert results['NM_003664.3:c.2409_2411del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.3'} + assert results['NM_003664.3:c.2409_2411del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003664.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003655.3'} assert 'NM_001271769.1:c.2262_2264del' in list(results.keys()) - assert results['NM_001271769.1:c.2262_2264del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001271769.1:c.2262_2264del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001271769.1:c.2262_2264del']['alt_genomic_loci'], []) - assert results['NM_001271769.1:c.2262_2264del']['gene_symbol'] == 'AP3B1' - assert results['NM_001271769.1:c.2262_2264del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001258698.1:p.(Lys755del)', 'slr': 'NP_001258698.1:p.(K755del)'} assert results['NM_001271769.1:c.2262_2264del']['submitted_variant'] == '5-77396835-TTTC-T' - assert results['NM_001271769.1:c.2262_2264del']['genome_context_intronic_sequence'] == '' - assert results['NM_001271769.1:c.2262_2264del']['hgvs_lrg_variant'] == '' + assert results['NM_001271769.1:c.2262_2264del']['gene_symbol'] == 'AP3B1' + assert results['NM_001271769.1:c.2262_2264del']['gene_ids'] == {'hgnc_id': 'HGNC:566', 'entrez_gene_id': '8546', 'ucsc_id': 'uc003kfj.5', 'omim_id': ['603401']} assert results['NM_001271769.1:c.2262_2264del']['hgvs_transcript_variant'] == 'NM_001271769.1:c.2262_2264del' + assert results['NM_001271769.1:c.2262_2264del']['genome_context_intronic_sequence'] == '' + assert results['NM_001271769.1:c.2262_2264del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001271769.1:c.2262_2264del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} - assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': 'chr5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} - assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '77396835', 'alt': 'T'}} - assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': '5', 'ref': 'TTTC', 'pos': '78101011', 'alt': 'T'}} - assert results['NM_001271769.1:c.2262_2264del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001258698.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001271769.1'} - + assert results['NM_001271769.1:c.2262_2264del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001258698.1:p.(Lys755del)', 'slr': 'NP_001258698.1:p.(K755del)'} + assert results['NM_001271769.1:c.2262_2264del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001271769.1:c.2262_2264del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001271769.1:c.2262_2264del']['alt_genomic_loci'], []) + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': 'chr5', 'pos': '77396835', 'ref': 'TTTC', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': 'chr5', 'pos': '78101011', 'ref': 'TTTC', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.77396838_77396840del', 'vcf': {'chr': '5', 'pos': '77396835', 'ref': 'TTTC', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.78101014_78101016del', 'vcf': {'chr': '5', 'pos': '78101011', 'ref': 'TTTC', 'alt': 'T'}} + assert results['NM_001271769.1:c.2262_2264del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001271769.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001258698.1'} def test_variant288(self): variant = '5-118811422-GGTGA-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_000414.3:c.302+3_302+6del' in list(results.keys()) - assert results['NM_000414.3:c.302+3_302+6del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000414.3:c.302+3_302+6del']['refseqgene_context_intronic_sequence'] == 'NG_008182.1(NM_000414.3):c.302+3_302+6del' - self.assertCountEqual(results['NM_000414.3:c.302+3_302+6del']['alt_genomic_loci'], []) - assert results['NM_000414.3:c.302+3_302+6del']['gene_symbol'] == 'HSD17B4' - assert results['NM_000414.3:c.302+3_302+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000405.1:p.?', 'slr': 'NP_000405.1:p.?'} - assert results['NM_000414.3:c.302+3_302+6del']['submitted_variant'] == '5-118811422-GGTGA-G' - assert results['NM_000414.3:c.302+3_302+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_000414.3):c.302+3_302+6del' - assert results['NM_000414.3:c.302+3_302+6del']['hgvs_lrg_variant'] == '' - assert results['NM_000414.3:c.302+3_302+6del']['hgvs_transcript_variant'] == 'NM_000414.3:c.302+3_302+6del' - assert results['NM_000414.3:c.302+3_302+6del']['hgvs_refseqgene_variant'] == 'NG_008182.1:g.28278_28281del' - assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} - assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} - assert results['NM_000414.3:c.302+3_302+6del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008182.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3'} - + assert results['flag'] == 'gene_variant' assert 'NM_001292028.1:c.-110+3_-110+6del' in list(results.keys()) - assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001292028.1:c.-110+3_-110+6del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001292028.1:c.-110+3_-110+6del']['alt_genomic_loci'], []) - assert results['NM_001292028.1:c.-110+3_-110+6del']['gene_symbol'] == 'HSD17B4' - assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278957.1:p.?', 'slr': 'NP_001278957.1:p.?'} assert results['NM_001292028.1:c.-110+3_-110+6del']['submitted_variant'] == '5-118811422-GGTGA-G' - assert results['NM_001292028.1:c.-110+3_-110+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292028.1):c.-110+3_-110+6del' - assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_lrg_variant'] == '' + assert results['NM_001292028.1:c.-110+3_-110+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001292028.1:c.-110+3_-110+6del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_transcript_variant'] == 'NM_001292028.1:c.-110+3_-110+6del' + assert results['NM_001292028.1:c.-110+3_-110+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292028.1):c.-110+3_-110+6del' + assert results['NM_001292028.1:c.-110+3_-110+6del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} - assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} - assert results['NM_001292028.1:c.-110+3_-110+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1'} + assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278957.1:p.?', 'slr': 'NP_001278957.1:p.?'} + assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001292028.1:c.-110+3_-110+6del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001292028.1:c.-110+3_-110+6del']['alt_genomic_loci'], []) + assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'pos': '119475727', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+3_-110+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'pos': '119475727', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+3_-110+6del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1'} - assert 'NM_001199291.2:c.377+3_377+6del' in list(results.keys()) - assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001199291.2:c.377+3_377+6del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001199291.2:c.377+3_377+6del']['alt_genomic_loci'], []) - assert results['NM_001199291.2:c.377+3_377+6del']['gene_symbol'] == 'HSD17B4' - assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} - assert results['NM_001199291.2:c.377+3_377+6del']['submitted_variant'] == '5-118811422-GGTGA-G' - assert results['NM_001199291.2:c.377+3_377+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.2):c.377+3_377+6del' - assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_lrg_variant'] == '' - assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_transcript_variant'] == 'NM_001199291.2:c.377+3_377+6del' - assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} - assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} - assert results['NM_001199291.2:c.377+3_377+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2'} + assert 'NM_000414.3:c.302+3_302+6del' in list(results.keys()) + assert results['NM_000414.3:c.302+3_302+6del']['submitted_variant'] == '5-118811422-GGTGA-G' + assert results['NM_000414.3:c.302+3_302+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_000414.3:c.302+3_302+6del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} + assert results['NM_000414.3:c.302+3_302+6del']['hgvs_transcript_variant'] == 'NM_000414.3:c.302+3_302+6del' + assert results['NM_000414.3:c.302+3_302+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_000414.3):c.302+3_302+6del' + assert results['NM_000414.3:c.302+3_302+6del']['refseqgene_context_intronic_sequence'] == 'NG_008182.1(NM_000414.3):c.302+3_302+6del' + assert results['NM_000414.3:c.302+3_302+6del']['hgvs_refseqgene_variant'] == 'NG_008182.1:g.28278_28281del' + assert results['NM_000414.3:c.302+3_302+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000405.1:p.?', 'slr': 'NP_000405.1:p.?'} + assert results['NM_000414.3:c.302+3_302+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000414.3:c.302+3_302+6del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000414.3:c.302+3_302+6del']['alt_genomic_loci'], []) + assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'pos': '119475727', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_000414.3:c.302+3_302+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'pos': '119475727', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_000414.3:c.302+3_302+6del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008182.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001292027.1:c.230+3_230+6del' in list(results.keys()) - assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001292027.1:c.230+3_230+6del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001292027.1:c.230+3_230+6del']['alt_genomic_loci'], []) - assert results['NM_001292027.1:c.230+3_230+6del']['gene_symbol'] == 'HSD17B4' - assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278956.1:p.?', 'slr': 'NP_001278956.1:p.?'} assert results['NM_001292027.1:c.230+3_230+6del']['submitted_variant'] == '5-118811422-GGTGA-G' - assert results['NM_001292027.1:c.230+3_230+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292027.1):c.230+3_230+6del' - assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_lrg_variant'] == '' + assert results['NM_001292027.1:c.230+3_230+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001292027.1:c.230+3_230+6del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_transcript_variant'] == 'NM_001292027.1:c.230+3_230+6del' + assert results['NM_001292027.1:c.230+3_230+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292027.1):c.230+3_230+6del' + assert results['NM_001292027.1:c.230+3_230+6del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} - assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} - assert results['NM_001292027.1:c.230+3_230+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278956.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292027.1'} + assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278956.1:p.?', 'slr': 'NP_001278956.1:p.?'} + assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001292027.1:c.230+3_230+6del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001292027.1:c.230+3_230+6del']['alt_genomic_loci'], []) + assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'pos': '119475727', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+3_230+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'pos': '119475727', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+3_230+6del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292027.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278956.1'} assert 'NM_001199291.1:c.377+3_377+6del' in list(results.keys()) - assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001199291.1:c.377+3_377+6del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001199291.1:c.377+3_377+6del']['alt_genomic_loci'], []) - assert results['NM_001199291.1:c.377+3_377+6del']['gene_symbol'] == 'HSD17B4' - assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} assert results['NM_001199291.1:c.377+3_377+6del']['submitted_variant'] == '5-118811422-GGTGA-G' - assert results['NM_001199291.1:c.377+3_377+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.1):c.377+3_377+6del' - assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_lrg_variant'] == '' + assert results['NM_001199291.1:c.377+3_377+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199291.1:c.377+3_377+6del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_transcript_variant'] == 'NM_001199291.1:c.377+3_377+6del' + assert results['NM_001199291.1:c.377+3_377+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.1):c.377+3_377+6del' + assert results['NM_001199291.1:c.377+3_377+6del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} + assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199291.1:c.377+3_377+6del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001199291.1:c.377+3_377+6del']['alt_genomic_loci'], []) + assert results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} assert 'hg38' not in list(results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci'].keys()) - assert results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} + assert results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} assert 'grch38' not in list(results['NM_001199291.1:c.377+3_377+6del']['primary_assembly_loci'].keys()) - assert results['NM_001199291.1:c.377+3_377+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1'} + assert results['NM_001199291.1:c.377+3_377+6del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1'} assert 'NM_001199292.1:c.248+3_248+6del' in list(results.keys()) - assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001199292.1:c.248+3_248+6del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001199292.1:c.248+3_248+6del']['alt_genomic_loci'], []) - assert results['NM_001199292.1:c.248+3_248+6del']['gene_symbol'] == 'HSD17B4' - assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186221.1:p.?', 'slr': 'NP_001186221.1:p.?'} assert results['NM_001199292.1:c.248+3_248+6del']['submitted_variant'] == '5-118811422-GGTGA-G' - assert results['NM_001199292.1:c.248+3_248+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199292.1):c.248+3_248+6del' - assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_lrg_variant'] == '' + assert results['NM_001199292.1:c.248+3_248+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199292.1:c.248+3_248+6del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_transcript_variant'] == 'NM_001199292.1:c.248+3_248+6del' + assert results['NM_001199292.1:c.248+3_248+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199292.1):c.248+3_248+6del' + assert results['NM_001199292.1:c.248+3_248+6del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} - assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '118811422', 'alt': 'G'}} - assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'ref': 'GGTGA', 'pos': '119475727', 'alt': 'G'}} - assert results['NM_001199292.1:c.248+3_248+6del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186221.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199292.1'} + assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186221.1:p.?', 'slr': 'NP_001186221.1:p.?'} + assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199292.1:c.248+3_248+6del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001199292.1:c.248+3_248+6del']['alt_genomic_loci'], []) + assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'pos': '119475727', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+3_248+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'pos': '119475727', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+3_248+6del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199292.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186221.1'} + assert 'NM_001199291.2:c.377+3_377+6del' in list(results.keys()) + assert results['NM_001199291.2:c.377+3_377+6del']['submitted_variant'] == '5-118811422-GGTGA-G' + assert results['NM_001199291.2:c.377+3_377+6del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199291.2:c.377+3_377+6del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} + assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_transcript_variant'] == 'NM_001199291.2:c.377+3_377+6del' + assert results['NM_001199291.2:c.377+3_377+6del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.2):c.377+3_377+6del' + assert results['NM_001199291.2:c.377+3_377+6del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} + assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199291.2:c.377+3_377+6del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001199291.2:c.377+3_377+6del']['alt_genomic_loci'], []) + assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': 'chr5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': 'chr5', 'pos': '119475727', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811425_118811428del', 'vcf': {'chr': '5', 'pos': '118811422', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+3_377+6del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475730_119475733del', 'vcf': {'chr': '5', 'pos': '119475727', 'ref': 'GGTGA', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+3_377+6del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1'} def test_variant289(self): variant = '5-118811422-GGTGAG-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_001292028.1:c.-110+1_-110+5del' in list(results.keys()) - assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001292028.1:c.-110+1_-110+5del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001292028.1:c.-110+1_-110+5del']['alt_genomic_loci'], []) - assert results['NM_001292028.1:c.-110+1_-110+5del']['gene_symbol'] == 'HSD17B4' - assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278957.1:p.?', 'slr': 'NP_001278957.1:p.?'} assert results['NM_001292028.1:c.-110+1_-110+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' - assert results['NM_001292028.1:c.-110+1_-110+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292028.1):c.-110+1_-110+5del' - assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_lrg_variant'] == '' + assert results['NM_001292028.1:c.-110+1_-110+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001292028.1:c.-110+1_-110+5del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_transcript_variant'] == 'NM_001292028.1:c.-110+1_-110+5del' + assert results['NM_001292028.1:c.-110+1_-110+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292028.1):c.-110+1_-110+5del' + assert results['NM_001292028.1:c.-110+1_-110+5del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} - assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} - assert results['NM_001292028.1:c.-110+1_-110+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1'} + assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278957.1:p.?', 'slr': 'NP_001278957.1:p.?'} + assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001292028.1:c.-110+1_-110+5del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001292028.1:c.-110+1_-110+5del']['alt_genomic_loci'], []) + assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'pos': '119475726', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+1_-110+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'pos': '119475726', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001292028.1:c.-110+1_-110+5del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292028.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278957.1'} assert 'NM_000414.3:c.302+1_302+5del' in list(results.keys()) - assert results['NM_000414.3:c.302+1_302+5del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000414.3:c.302+1_302+5del']['refseqgene_context_intronic_sequence'] == 'NG_008182.1(NM_000414.3):c.302+1_302+5del' - self.assertCountEqual(results['NM_000414.3:c.302+1_302+5del']['alt_genomic_loci'], []) - assert results['NM_000414.3:c.302+1_302+5del']['gene_symbol'] == 'HSD17B4' - assert results['NM_000414.3:c.302+1_302+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000405.1:p.?', 'slr': 'NP_000405.1:p.?'} - assert results['NM_000414.3:c.302+1_302+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' - assert results['NM_000414.3:c.302+1_302+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_000414.3):c.302+1_302+5del' - assert results['NM_000414.3:c.302+1_302+5del']['hgvs_lrg_variant'] == '' - assert results['NM_000414.3:c.302+1_302+5del']['hgvs_transcript_variant'] == 'NM_000414.3:c.302+1_302+5del' - assert results['NM_000414.3:c.302+1_302+5del']['hgvs_refseqgene_variant'] == 'NG_008182.1:g.28276_28280del' - assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} - assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} - assert results['NM_000414.3:c.302+1_302+5del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008182.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3'} - - assert 'NM_001199291.2:c.377+1_377+5del' in list(results.keys()) - assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001199291.2:c.377+1_377+5del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001199291.2:c.377+1_377+5del']['alt_genomic_loci'], []) - assert results['NM_001199291.2:c.377+1_377+5del']['gene_symbol'] == 'HSD17B4' - assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} - assert results['NM_001199291.2:c.377+1_377+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' - assert results['NM_001199291.2:c.377+1_377+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.2):c.377+1_377+5del' - assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_lrg_variant'] == '' - assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_transcript_variant'] == 'NM_001199291.2:c.377+1_377+5del' - assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} - assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} - assert results['NM_001199291.2:c.377+1_377+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2'} + assert results['NM_000414.3:c.302+1_302+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' + assert results['NM_000414.3:c.302+1_302+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_000414.3:c.302+1_302+5del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} + assert results['NM_000414.3:c.302+1_302+5del']['hgvs_transcript_variant'] == 'NM_000414.3:c.302+1_302+5del' + assert results['NM_000414.3:c.302+1_302+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_000414.3):c.302+1_302+5del' + assert results['NM_000414.3:c.302+1_302+5del']['refseqgene_context_intronic_sequence'] == 'NG_008182.1(NM_000414.3):c.302+1_302+5del' + assert results['NM_000414.3:c.302+1_302+5del']['hgvs_refseqgene_variant'] == 'NG_008182.1:g.28276_28280del' + assert results['NM_000414.3:c.302+1_302+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000405.1:p.?', 'slr': 'NP_000405.1:p.?'} + assert results['NM_000414.3:c.302+1_302+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000414.3:c.302+1_302+5del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000414.3:c.302+1_302+5del']['alt_genomic_loci'], []) + assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'pos': '119475726', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_000414.3:c.302+1_302+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'pos': '119475726', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_000414.3:c.302+1_302+5del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000414.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000405.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008182.1'} - assert 'NM_001199292.1:c.248+1_248+5del' in list(results.keys()) - assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001199292.1:c.248+1_248+5del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001199292.1:c.248+1_248+5del']['alt_genomic_loci'], []) - assert results['NM_001199292.1:c.248+1_248+5del']['gene_symbol'] == 'HSD17B4' - assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186221.1:p.?', 'slr': 'NP_001186221.1:p.?'} - assert results['NM_001199292.1:c.248+1_248+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' - assert results['NM_001199292.1:c.248+1_248+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199292.1):c.248+1_248+5del' - assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_lrg_variant'] == '' - assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_transcript_variant'] == 'NM_001199292.1:c.248+1_248+5del' - assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} - assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} - assert results['NM_001199292.1:c.248+1_248+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186221.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199292.1'} + assert 'NM_001292027.1:c.230+1_230+5del' in list(results.keys()) + assert results['NM_001292027.1:c.230+1_230+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' + assert results['NM_001292027.1:c.230+1_230+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001292027.1:c.230+1_230+5del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} + assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_transcript_variant'] == 'NM_001292027.1:c.230+1_230+5del' + assert results['NM_001292027.1:c.230+1_230+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292027.1):c.230+1_230+5del' + assert results['NM_001292027.1:c.230+1_230+5del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278956.1:p.?', 'slr': 'NP_001278956.1:p.?'} + assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001292027.1:c.230+1_230+5del']['alt_genomic_loci'], []) + assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'pos': '119475726', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'pos': '119475726', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001292027.1:c.230+1_230+5del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292027.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278956.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001199291.1:c.377+1_377+5del' in list(results.keys()) - assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001199291.1:c.377+1_377+5del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001199291.1:c.377+1_377+5del']['alt_genomic_loci'], []) - assert results['NM_001199291.1:c.377+1_377+5del']['gene_symbol'] == 'HSD17B4' - assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} assert results['NM_001199291.1:c.377+1_377+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' - assert results['NM_001199291.1:c.377+1_377+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.1):c.377+1_377+5del' - assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_lrg_variant'] == '' + assert results['NM_001199291.1:c.377+1_377+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199291.1:c.377+1_377+5del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_transcript_variant'] == 'NM_001199291.1:c.377+1_377+5del' + assert results['NM_001199291.1:c.377+1_377+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.1):c.377+1_377+5del' + assert results['NM_001199291.1:c.377+1_377+5del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} + assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199291.1:c.377+1_377+5del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001199291.1:c.377+1_377+5del']['alt_genomic_loci'], []) + assert results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} assert 'hg38' not in list(results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci'].keys()) - assert results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} + assert results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} assert 'grch38' not in list(results['NM_001199291.1:c.377+1_377+5del']['primary_assembly_loci'].keys()) - assert results['NM_001199291.1:c.377+1_377+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1'} + assert results['NM_001199291.1:c.377+1_377+5del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1'} - assert 'NM_001292027.1:c.230+1_230+5del' in list(results.keys()) - assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001292027.1:c.230+1_230+5del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001292027.1:c.230+1_230+5del']['alt_genomic_loci'], []) - assert results['NM_001292027.1:c.230+1_230+5del']['gene_symbol'] == 'HSD17B4' - assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001278956.1:p.?', 'slr': 'NP_001278956.1:p.?'} - assert results['NM_001292027.1:c.230+1_230+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' - assert results['NM_001292027.1:c.230+1_230+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001292027.1):c.230+1_230+5del' - assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_lrg_variant'] == '' - assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_transcript_variant'] == 'NM_001292027.1:c.230+1_230+5del' - assert results['NM_001292027.1:c.230+1_230+5del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} - assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '118811421', 'alt': 'G'}} - assert results['NM_001292027.1:c.230+1_230+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'ref': 'GGGTGA', 'pos': '119475726', 'alt': 'G'}} - assert results['NM_001292027.1:c.230+1_230+5del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001278956.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001292027.1'} + assert 'NM_001199292.1:c.248+1_248+5del' in list(results.keys()) + assert results['NM_001199292.1:c.248+1_248+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' + assert results['NM_001199292.1:c.248+1_248+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199292.1:c.248+1_248+5del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} + assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_transcript_variant'] == 'NM_001199292.1:c.248+1_248+5del' + assert results['NM_001199292.1:c.248+1_248+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199292.1):c.248+1_248+5del' + assert results['NM_001199292.1:c.248+1_248+5del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186221.1:p.?', 'slr': 'NP_001186221.1:p.?'} + assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199292.1:c.248+1_248+5del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001199292.1:c.248+1_248+5del']['alt_genomic_loci'], []) + assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'pos': '119475726', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+1_248+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'pos': '119475726', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001199292.1:c.248+1_248+5del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199292.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186221.1'} + assert 'NM_001199291.2:c.377+1_377+5del' in list(results.keys()) + assert results['NM_001199291.2:c.377+1_377+5del']['submitted_variant'] == '5-118811422-GGTGAG-G' + assert results['NM_001199291.2:c.377+1_377+5del']['gene_symbol'] == 'HSD17B4' + assert results['NM_001199291.2:c.377+1_377+5del']['gene_ids'] == {'hgnc_id': 'HGNC:5213', 'entrez_gene_id': '3295', 'ucsc_id': 'uc003ksj.4', 'omim_id': ['601860']} + assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_transcript_variant'] == 'NM_001199291.2:c.377+1_377+5del' + assert results['NM_001199291.2:c.377+1_377+5del']['genome_context_intronic_sequence'] == 'NC_000005.9(NM_001199291.2):c.377+1_377+5del' + assert results['NM_001199291.2:c.377+1_377+5del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001186220.1:p.?', 'slr': 'NP_001186220.1:p.?'} + assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001199291.2:c.377+1_377+5del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001199291.2:c.377+1_377+5del']['alt_genomic_loci'], []) + assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': 'chr5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': 'chr5', 'pos': '119475726', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.118811423_118811427del', 'vcf': {'chr': '5', 'pos': '118811421', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+1_377+5del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.119475728_119475732del', 'vcf': {'chr': '5', 'pos': '119475726', 'ref': 'GGGTGA', 'alt': 'G'}} + assert results['NM_001199291.2:c.377+1_377+5del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001199291.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001186220.1'} def test_variant290(self): variant = '5-131705587-CG-C' @@ -15702,57 +16217,59 @@ def test_variant290(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NR_110997.1:n.21del' in list(results.keys()) - assert results['NR_110997.1:n.21del']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_110997.1:n.21del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_110997.1:n.21del']['alt_genomic_loci'], []) - assert results['NR_110997.1:n.21del']['gene_symbol'] == 'MIR3936HG' - assert results['NR_110997.1:n.21del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_110997.1:n.21del']['submitted_variant'] == '5-131705587-CG-C' - assert results['NR_110997.1:n.21del']['genome_context_intronic_sequence'] == '' - assert results['NR_110997.1:n.21del']['hgvs_lrg_variant'] == '' - assert results['NR_110997.1:n.21del']['hgvs_transcript_variant'] == 'NR_110997.1:n.21del' - assert results['NR_110997.1:n.21del']['hgvs_refseqgene_variant'] == '' - assert results['NR_110997.1:n.21del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} - assert results['NR_110997.1:n.21del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} - assert results['NR_110997.1:n.21del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} - assert results['NR_110997.1:n.21del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} - assert results['NR_110997.1:n.21del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_110997.1'} + assert 'NM_001308122.1:c.-75del' in list(results.keys()) + assert results['NM_001308122.1:c.-75del']['submitted_variant'] == '5-131705587-CG-C' + assert results['NM_001308122.1:c.-75del']['gene_symbol'] == 'SLC22A5' + assert results['NM_001308122.1:c.-75del']['gene_ids'] == {'hgnc_id': 'HGNC:10969', 'entrez_gene_id': '6584', 'ucsc_id': 'uc003kww.5', 'omim_id': ['603377']} + assert results['NM_001308122.1:c.-75del']['hgvs_transcript_variant'] == 'NM_001308122.1:c.-75del' + assert results['NM_001308122.1:c.-75del']['genome_context_intronic_sequence'] == '' + assert results['NM_001308122.1:c.-75del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001308122.1:c.-75del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001308122.1:c.-75del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295051.1:p.?', 'slr': 'NP_001295051.1:p.?'} + assert results['NM_001308122.1:c.-75del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001308122.1:c.-75del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001308122.1:c.-75del']['alt_genomic_loci'], []) + assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': 'chr5', 'pos': '131705587', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': 'chr5', 'pos': '132369895', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': '5', 'pos': '131705587', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': '5', 'pos': '132369895', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_001308122.1:c.-75del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308122.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295051.1'} assert 'NM_003060.3:c.-75del' in list(results.keys()) - assert results['NM_003060.3:c.-75del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003060.3:c.-75del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003060.3:c.-75del']['alt_genomic_loci'], []) - assert results['NM_003060.3:c.-75del']['gene_symbol'] == 'SLC22A5' - assert results['NM_003060.3:c.-75del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003051.1:p.?', 'slr': 'NP_003051.1:p.?'} assert results['NM_003060.3:c.-75del']['submitted_variant'] == '5-131705587-CG-C' - assert results['NM_003060.3:c.-75del']['genome_context_intronic_sequence'] == '' - assert results['NM_003060.3:c.-75del']['hgvs_lrg_variant'] == '' + assert results['NM_003060.3:c.-75del']['gene_symbol'] == 'SLC22A5' + assert results['NM_003060.3:c.-75del']['gene_ids'] == {'hgnc_id': 'HGNC:10969', 'entrez_gene_id': '6584', 'ucsc_id': 'uc003kww.5', 'omim_id': ['603377']} assert results['NM_003060.3:c.-75del']['hgvs_transcript_variant'] == 'NM_003060.3:c.-75del' + assert results['NM_003060.3:c.-75del']['genome_context_intronic_sequence'] == '' + assert results['NM_003060.3:c.-75del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003060.3:c.-75del']['hgvs_refseqgene_variant'] == '' - assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} - assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} - assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} - assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} - assert results['NM_003060.3:c.-75del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003051.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003060.3'} - - assert 'NM_001308122.1:c.-75del' in list(results.keys()) - assert results['NM_001308122.1:c.-75del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001308122.1:c.-75del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001308122.1:c.-75del']['alt_genomic_loci'], []) - assert results['NM_001308122.1:c.-75del']['gene_symbol'] == 'SLC22A5' - assert results['NM_001308122.1:c.-75del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295051.1:p.?', 'slr': 'NP_001295051.1:p.?'} - assert results['NM_001308122.1:c.-75del']['submitted_variant'] == '5-131705587-CG-C' - assert results['NM_001308122.1:c.-75del']['genome_context_intronic_sequence'] == '' - assert results['NM_001308122.1:c.-75del']['hgvs_lrg_variant'] == '' - assert results['NM_001308122.1:c.-75del']['hgvs_transcript_variant'] == 'NM_001308122.1:c.-75del' - assert results['NM_001308122.1:c.-75del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} - assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': 'chr5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} - assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '131705587', 'alt': 'C'}} - assert results['NM_001308122.1:c.-75del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': '5', 'ref': 'CG', 'pos': '132369895', 'alt': 'C'}} - assert results['NM_001308122.1:c.-75del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295051.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308122.1'} + assert results['NM_003060.3:c.-75del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003051.1:p.?', 'slr': 'NP_003051.1:p.?'} + assert results['NM_003060.3:c.-75del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003060.3:c.-75del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003060.3:c.-75del']['alt_genomic_loci'], []) + assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': 'chr5', 'pos': '131705587', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': 'chr5', 'pos': '132369895', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': '5', 'pos': '131705587', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_003060.3:c.-75del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': '5', 'pos': '132369895', 'ref': 'CG', 'alt': 'C'}} + assert results['NM_003060.3:c.-75del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003060.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003051.1'} + assert 'NR_110997.1:n.21del' in list(results.keys()) + assert results['NR_110997.1:n.21del']['submitted_variant'] == '5-131705587-CG-C' + assert results['NR_110997.1:n.21del']['gene_symbol'] == 'MIR3936HG' + assert results['NR_110997.1:n.21del']['gene_ids'] == {'hgnc_id': 'HGNC:40538', 'entrez_gene_id': '553103', 'ucsc_id': '', 'omim_id': []} + assert results['NR_110997.1:n.21del']['hgvs_transcript_variant'] == 'NR_110997.1:n.21del' + assert results['NR_110997.1:n.21del']['genome_context_intronic_sequence'] == '' + assert results['NR_110997.1:n.21del']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_110997.1:n.21del']['hgvs_refseqgene_variant'] == '' + assert results['NR_110997.1:n.21del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_110997.1:n.21del']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_110997.1:n.21del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_110997.1:n.21del']['alt_genomic_loci'], []) + assert results['NR_110997.1:n.21del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': 'chr5', 'pos': '131705587', 'ref': 'CG', 'alt': 'C'}} + assert results['NR_110997.1:n.21del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': 'chr5', 'pos': '132369895', 'ref': 'CG', 'alt': 'C'}} + assert results['NR_110997.1:n.21del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.131705590del', 'vcf': {'chr': '5', 'pos': '131705587', 'ref': 'CG', 'alt': 'C'}} + assert results['NR_110997.1:n.21del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.132369898del', 'vcf': {'chr': '5', 'pos': '132369895', 'ref': 'CG', 'alt': 'C'}} + assert results['NR_110997.1:n.21del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_110997.1'} def test_variant291(self): variant = '5-148406482-T-C' @@ -15761,70 +16278,70 @@ def test_variant291(self): assert results['flag'] == 'gene_variant' assert 'NM_024577.3:c.2813A>G' in list(results.keys()) - assert results['NM_024577.3:c.2813A>G']['hgvs_lrg_transcript_variant'] == 'LRG_269t1:c.2813A>G' - assert results['NM_024577.3:c.2813A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_024577.3:c.2813A>G']['alt_genomic_loci'], []) - assert results['NM_024577.3:c.2813A>G']['gene_symbol'] == 'SH3TC2' - assert results['NM_024577.3:c.2813A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_078853.2(LRG_269p1):p.(His938Arg)', 'slr': 'NP_078853.2:p.(H938R)'} assert results['NM_024577.3:c.2813A>G']['submitted_variant'] == '5-148406482-T-C' - assert results['NM_024577.3:c.2813A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_024577.3:c.2813A>G']['hgvs_lrg_variant'] == 'LRG_269:g.41256A>G' + assert results['NM_024577.3:c.2813A>G']['gene_symbol'] == 'SH3TC2' + assert results['NM_024577.3:c.2813A>G']['gene_ids'] == {'hgnc_id': 'HGNC:29427', 'entrez_gene_id': '79628', 'ucsc_id': 'uc003lpu.4', 'omim_id': ['608206']} assert results['NM_024577.3:c.2813A>G']['hgvs_transcript_variant'] == 'NM_024577.3:c.2813A>G' + assert results['NM_024577.3:c.2813A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_024577.3:c.2813A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_024577.3:c.2813A>G']['hgvs_refseqgene_variant'] == 'NG_007947.2:g.41256A>G' - assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.148406482T>C', 'vcf': {'chr': 'chr5', 'ref': 'T', 'pos': '148406482', 'alt': 'C'}} - assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.149026919T>C', 'vcf': {'chr': 'chr5', 'ref': 'T', 'pos': '149026919', 'alt': 'C'}} - assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.148406482T>C', 'vcf': {'chr': '5', 'ref': 'T', 'pos': '148406482', 'alt': 'C'}} - assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.149026919T>C', 'vcf': {'chr': '5', 'ref': 'T', 'pos': '149026919', 'alt': 'C'}} - assert results['NM_024577.3:c.2813A>G']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007947.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_078853.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024577.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_269.xml'} - + assert results['NM_024577.3:c.2813A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_078853.2(LRG_269p1):p.(His938Arg)', 'slr': 'NP_078853.2:p.(H938R)'} + assert results['NM_024577.3:c.2813A>G']['hgvs_lrg_transcript_variant'] == 'LRG_269t1:c.2813A>G' + assert results['NM_024577.3:c.2813A>G']['hgvs_lrg_variant'] == 'LRG_269:g.41256A>G' + self.assertCountEqual(results['NM_024577.3:c.2813A>G']['alt_genomic_loci'], []) + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000005.9:g.148406482T>C', 'vcf': {'chr': 'chr5', 'pos': '148406482', 'ref': 'T', 'alt': 'C'}} + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000005.10:g.149026919T>C', 'vcf': {'chr': 'chr5', 'pos': '149026919', 'ref': 'T', 'alt': 'C'}} + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000005.9:g.148406482T>C', 'vcf': {'chr': '5', 'pos': '148406482', 'ref': 'T', 'alt': 'C'}} + assert results['NM_024577.3:c.2813A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000005.10:g.149026919T>C', 'vcf': {'chr': '5', 'pos': '149026919', 'ref': 'T', 'alt': 'C'}} + assert results['NM_024577.3:c.2813A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_024577.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_078853.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007947.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_269.xml'} def test_variant292(self): variant = '6-110036337-T-TCAG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_014845.5:c.123_124insCAG' in list(results.keys()) - assert results['NM_014845.5:c.123_124insCAG']['hgvs_lrg_transcript_variant'] == 'LRG_241t1:c.123_124insCAG' - assert results['NM_014845.5:c.123_124insCAG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014845.5:c.123_124insCAG']['alt_genomic_loci'], []) - assert results['NM_014845.5:c.123_124insCAG']['gene_symbol'] == 'FIG4' - assert results['NM_014845.5:c.123_124insCAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055660.1(LRG_241p1):p.(Ile41_Asp42insGln)', 'slr': 'NP_055660.1:p.(I41_D42insQ)'} assert results['NM_014845.5:c.123_124insCAG']['submitted_variant'] == '6-110036337-T-TCAG' - assert results['NM_014845.5:c.123_124insCAG']['genome_context_intronic_sequence'] == '' - assert results['NM_014845.5:c.123_124insCAG']['hgvs_lrg_variant'] == 'LRG_241:g.28914_28915insCAG' + assert results['NM_014845.5:c.123_124insCAG']['gene_symbol'] == 'FIG4' + assert results['NM_014845.5:c.123_124insCAG']['gene_ids'] == {'hgnc_id': 'HGNC:16873', 'entrez_gene_id': '9896', 'ucsc_id': 'uc003ptt.3', 'omim_id': ['609390']} assert results['NM_014845.5:c.123_124insCAG']['hgvs_transcript_variant'] == 'NM_014845.5:c.123_124insCAG' + assert results['NM_014845.5:c.123_124insCAG']['genome_context_intronic_sequence'] == '' + assert results['NM_014845.5:c.123_124insCAG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014845.5:c.123_124insCAG']['hgvs_refseqgene_variant'] == 'NG_007977.1:g.28914_28915insCAG' - assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036337_110036338insCAG', 'vcf': {'chr': 'chr6', 'ref': 'T', 'pos': '110036337', 'alt': 'TCAG'}} - assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715134_109715135insCAG', 'vcf': {'chr': 'chr6', 'ref': 'T', 'pos': '109715134', 'alt': 'TCAG'}} - assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036337_110036338insCAG', 'vcf': {'chr': '6', 'ref': 'T', 'pos': '110036337', 'alt': 'TCAG'}} - assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715134_109715135insCAG', 'vcf': {'chr': '6', 'ref': 'T', 'pos': '109715134', 'alt': 'TCAG'}} - assert results['NM_014845.5:c.123_124insCAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007977.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055660.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014845.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_241.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_014845.5:c.123_124insCAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055660.1(LRG_241p1):p.(Ile41_Asp42insGln)', 'slr': 'NP_055660.1:p.(I41_D42insQ)'} + assert results['NM_014845.5:c.123_124insCAG']['hgvs_lrg_transcript_variant'] == 'LRG_241t1:c.123_124insCAG' + assert results['NM_014845.5:c.123_124insCAG']['hgvs_lrg_variant'] == 'LRG_241:g.28914_28915insCAG' + self.assertCountEqual(results['NM_014845.5:c.123_124insCAG']['alt_genomic_loci'], []) + assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036337_110036338insCAG', 'vcf': {'chr': 'chr6', 'pos': '110036337', 'ref': 'T', 'alt': 'TCAG'}} + assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715134_109715135insCAG', 'vcf': {'chr': 'chr6', 'pos': '109715134', 'ref': 'T', 'alt': 'TCAG'}} + assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036337_110036338insCAG', 'vcf': {'chr': '6', 'pos': '110036337', 'ref': 'T', 'alt': 'TCAG'}} + assert results['NM_014845.5:c.123_124insCAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715134_109715135insCAG', 'vcf': {'chr': '6', 'pos': '109715134', 'ref': 'T', 'alt': 'TCAG'}} + assert results['NM_014845.5:c.123_124insCAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014845.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055660.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007977.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_241.xml'} def test_variant293(self): variant = '6-110036337-TGAT-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_014845.5:c.124_126del' in list(results.keys()) - assert results['NM_014845.5:c.124_126del']['hgvs_lrg_transcript_variant'] == 'LRG_241t1:c.124_126del' - assert results['NM_014845.5:c.124_126del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014845.5:c.124_126del']['alt_genomic_loci'], []) - assert results['NM_014845.5:c.124_126del']['gene_symbol'] == 'FIG4' - assert results['NM_014845.5:c.124_126del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055660.1(LRG_241p1):p.(Asp42del)', 'slr': 'NP_055660.1:p.(D42del)'} assert results['NM_014845.5:c.124_126del']['submitted_variant'] == '6-110036337-TGAT-T' - assert results['NM_014845.5:c.124_126del']['genome_context_intronic_sequence'] == '' - assert results['NM_014845.5:c.124_126del']['hgvs_lrg_variant'] == 'LRG_241:g.28915_28917del' + assert results['NM_014845.5:c.124_126del']['gene_symbol'] == 'FIG4' + assert results['NM_014845.5:c.124_126del']['gene_ids'] == {'hgnc_id': 'HGNC:16873', 'entrez_gene_id': '9896', 'ucsc_id': 'uc003ptt.3', 'omim_id': ['609390']} assert results['NM_014845.5:c.124_126del']['hgvs_transcript_variant'] == 'NM_014845.5:c.124_126del' + assert results['NM_014845.5:c.124_126del']['genome_context_intronic_sequence'] == '' + assert results['NM_014845.5:c.124_126del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014845.5:c.124_126del']['hgvs_refseqgene_variant'] == 'NG_007977.1:g.28915_28917del' - assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036338_110036340del', 'vcf': {'chr': 'chr6', 'ref': 'TTGA', 'pos': '110036336', 'alt': 'T'}} - assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715135_109715137del', 'vcf': {'chr': 'chr6', 'ref': 'TTGA', 'pos': '109715133', 'alt': 'T'}} - assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036338_110036340del', 'vcf': {'chr': '6', 'ref': 'TTGA', 'pos': '110036336', 'alt': 'T'}} - assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715135_109715137del', 'vcf': {'chr': '6', 'ref': 'TTGA', 'pos': '109715133', 'alt': 'T'}} - assert results['NM_014845.5:c.124_126del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007977.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055660.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014845.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_241.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_014845.5:c.124_126del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055660.1(LRG_241p1):p.(Asp42del)', 'slr': 'NP_055660.1:p.(D42del)'} + assert results['NM_014845.5:c.124_126del']['hgvs_lrg_transcript_variant'] == 'LRG_241t1:c.124_126del' + assert results['NM_014845.5:c.124_126del']['hgvs_lrg_variant'] == 'LRG_241:g.28915_28917del' + self.assertCountEqual(results['NM_014845.5:c.124_126del']['alt_genomic_loci'], []) + assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036338_110036340del', 'vcf': {'chr': 'chr6', 'pos': '110036336', 'ref': 'TTGA', 'alt': 'T'}} + assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715135_109715137del', 'vcf': {'chr': 'chr6', 'pos': '109715133', 'ref': 'TTGA', 'alt': 'T'}} + assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.110036338_110036340del', 'vcf': {'chr': '6', 'pos': '110036336', 'ref': 'TTGA', 'alt': 'T'}} + assert results['NM_014845.5:c.124_126del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.109715135_109715137del', 'vcf': {'chr': '6', 'pos': '109715133', 'ref': 'TTGA', 'alt': 'T'}} + assert results['NM_014845.5:c.124_126del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014845.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055660.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007977.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_241.xml'} def test_variant294(self): variant = '6-152651802-C-A' @@ -15833,39 +16350,40 @@ def test_variant294(self): assert results['flag'] == 'gene_variant' assert 'NM_182961.3:c.14018G>T' in list(results.keys()) - assert results['NM_182961.3:c.14018G>T']['hgvs_lrg_transcript_variant'] == 'LRG_427t1:c.14018G>T' - assert results['NM_182961.3:c.14018G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_182961.3:c.14018G>T']['alt_genomic_loci'], []) - assert results['NM_182961.3:c.14018G>T']['gene_symbol'] == 'SYNE1' - assert results['NM_182961.3:c.14018G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_892006.3(LRG_427p1):p.(Arg4673Leu)', 'slr': 'NP_892006.3:p.(R4673L)'} assert results['NM_182961.3:c.14018G>T']['submitted_variant'] == '6-152651802-C-A' - assert results['NM_182961.3:c.14018G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_182961.3:c.14018G>T']['hgvs_lrg_variant'] == 'LRG_427:g.311733G>T' + assert results['NM_182961.3:c.14018G>T']['gene_symbol'] == 'SYNE1' + assert results['NM_182961.3:c.14018G>T']['gene_ids'] == {'hgnc_id': 'HGNC:17089', 'entrez_gene_id': '23345', 'ucsc_id': 'uc003qou.4', 'omim_id': ['608441']} assert results['NM_182961.3:c.14018G>T']['hgvs_transcript_variant'] == 'NM_182961.3:c.14018G>T' + assert results['NM_182961.3:c.14018G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_182961.3:c.14018G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_182961.3:c.14018G>T']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.311733G>T' - assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152651802', 'alt': 'A'}} - assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152330667', 'alt': 'A'}} - assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152651802', 'alt': 'A'}} - assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152330667', 'alt': 'A'}} - assert results['NM_182961.3:c.14018G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} + assert results['NM_182961.3:c.14018G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_892006.3(LRG_427p1):p.(Arg4673Leu)', 'slr': 'NP_892006.3:p.(R4673L)'} + assert results['NM_182961.3:c.14018G>T']['hgvs_lrg_transcript_variant'] == 'LRG_427t1:c.14018G>T' + assert results['NM_182961.3:c.14018G>T']['hgvs_lrg_variant'] == 'LRG_427:g.311733G>T' + self.assertCountEqual(results['NM_182961.3:c.14018G>T']['alt_genomic_loci'], []) + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': 'chr6', 'pos': '152651802', 'ref': 'C', 'alt': 'A'}} + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': 'chr6', 'pos': '152330667', 'ref': 'C', 'alt': 'A'}} + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': '6', 'pos': '152651802', 'ref': 'C', 'alt': 'A'}} + assert results['NM_182961.3:c.14018G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': '6', 'pos': '152330667', 'ref': 'C', 'alt': 'A'}} + assert results['NM_182961.3:c.14018G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} assert 'NM_033071.3:c.13805G>T' in list(results.keys()) - assert results['NM_033071.3:c.13805G>T']['hgvs_lrg_transcript_variant'] == 'LRG_427t2:c.13805G>T' - assert results['NM_033071.3:c.13805G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_033071.3:c.13805G>T']['alt_genomic_loci'], []) - assert results['NM_033071.3:c.13805G>T']['gene_symbol'] == 'SYNE1' - assert results['NM_033071.3:c.13805G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_149062.1(LRG_427p2):p.(Arg4602Leu)', 'slr': 'NP_149062.1:p.(R4602L)'} assert results['NM_033071.3:c.13805G>T']['submitted_variant'] == '6-152651802-C-A' - assert results['NM_033071.3:c.13805G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_033071.3:c.13805G>T']['hgvs_lrg_variant'] == 'LRG_427:g.311733G>T' + assert results['NM_033071.3:c.13805G>T']['gene_symbol'] == 'SYNE1' + assert results['NM_033071.3:c.13805G>T']['gene_ids'] == {'hgnc_id': 'HGNC:17089', 'entrez_gene_id': '23345', 'ucsc_id': 'uc003qou.4', 'omim_id': ['608441']} assert results['NM_033071.3:c.13805G>T']['hgvs_transcript_variant'] == 'NM_033071.3:c.13805G>T' + assert results['NM_033071.3:c.13805G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_033071.3:c.13805G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_033071.3:c.13805G>T']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.311733G>T' - assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152651802', 'alt': 'A'}} - assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152330667', 'alt': 'A'}} - assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152651802', 'alt': 'A'}} - assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152330667', 'alt': 'A'}} - assert results['NM_033071.3:c.13805G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} - + assert results['NM_033071.3:c.13805G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_149062.1(LRG_427p2):p.(Arg4602Leu)', 'slr': 'NP_149062.1:p.(R4602L)'} + assert results['NM_033071.3:c.13805G>T']['hgvs_lrg_transcript_variant'] == 'LRG_427t2:c.13805G>T' + assert results['NM_033071.3:c.13805G>T']['hgvs_lrg_variant'] == 'LRG_427:g.311733G>T' + self.assertCountEqual(results['NM_033071.3:c.13805G>T']['alt_genomic_loci'], []) + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': 'chr6', 'pos': '152651802', 'ref': 'C', 'alt': 'A'}} + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': 'chr6', 'pos': '152330667', 'ref': 'C', 'alt': 'A'}} + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152651802C>A', 'vcf': {'chr': '6', 'pos': '152651802', 'ref': 'C', 'alt': 'A'}} + assert results['NM_033071.3:c.13805G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152330667C>A', 'vcf': {'chr': '6', 'pos': '152330667', 'ref': 'C', 'alt': 'A'}} + assert results['NM_033071.3:c.13805G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} def test_variant295(self): variant = '6-152737643-C-G' @@ -15873,646 +16391,677 @@ def test_variant295(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_033071.3:c.5950G>C' in list(results.keys()) - assert results['NM_033071.3:c.5950G>C']['hgvs_lrg_transcript_variant'] == 'LRG_427t2:c.5950G>C' - assert results['NM_033071.3:c.5950G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_033071.3:c.5950G>C']['alt_genomic_loci'], []) - assert results['NM_033071.3:c.5950G>C']['gene_symbol'] == 'SYNE1' - assert results['NM_033071.3:c.5950G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_149062.1(LRG_427p2):p.(Ala1984Pro)', 'slr': 'NP_149062.1:p.(A1984P)'} - assert results['NM_033071.3:c.5950G>C']['submitted_variant'] == '6-152737643-C-G' - assert results['NM_033071.3:c.5950G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_033071.3:c.5950G>C']['hgvs_lrg_variant'] == 'LRG_427:g.225892G>C' - assert results['NM_033071.3:c.5950G>C']['hgvs_transcript_variant'] == 'NM_033071.3:c.5950G>C' - assert results['NM_033071.3:c.5950G>C']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.225892G>C' - assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152737643', 'alt': 'G'}} - assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152416508', 'alt': 'G'}} - assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152737643', 'alt': 'G'}} - assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152416508', 'alt': 'G'}} - assert results['NM_033071.3:c.5950G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} - assert 'NM_182961.3:c.5929G>C' in list(results.keys()) - assert results['NM_182961.3:c.5929G>C']['hgvs_lrg_transcript_variant'] == 'LRG_427t1:c.5929G>C' - assert results['NM_182961.3:c.5929G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_182961.3:c.5929G>C']['alt_genomic_loci'], []) - assert results['NM_182961.3:c.5929G>C']['gene_symbol'] == 'SYNE1' - assert results['NM_182961.3:c.5929G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_892006.3(LRG_427p1):p.(Ala1977Pro)', 'slr': 'NP_892006.3:p.(A1977P)'} assert results['NM_182961.3:c.5929G>C']['submitted_variant'] == '6-152737643-C-G' - assert results['NM_182961.3:c.5929G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_182961.3:c.5929G>C']['hgvs_lrg_variant'] == 'LRG_427:g.225892G>C' + assert results['NM_182961.3:c.5929G>C']['gene_symbol'] == 'SYNE1' + assert results['NM_182961.3:c.5929G>C']['gene_ids'] == {'hgnc_id': 'HGNC:17089', 'entrez_gene_id': '23345', 'ucsc_id': 'uc003qou.4', 'omim_id': ['608441']} assert results['NM_182961.3:c.5929G>C']['hgvs_transcript_variant'] == 'NM_182961.3:c.5929G>C' + assert results['NM_182961.3:c.5929G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_182961.3:c.5929G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_182961.3:c.5929G>C']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.225892G>C' - assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152737643', 'alt': 'G'}} - assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': 'chr6', 'ref': 'C', 'pos': '152416508', 'alt': 'G'}} - assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152737643', 'alt': 'G'}} - assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': '6', 'ref': 'C', 'pos': '152416508', 'alt': 'G'}} - assert results['NM_182961.3:c.5929G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} + assert results['NM_182961.3:c.5929G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_892006.3(LRG_427p1):p.(Ala1977Pro)', 'slr': 'NP_892006.3:p.(A1977P)'} + assert results['NM_182961.3:c.5929G>C']['hgvs_lrg_transcript_variant'] == 'LRG_427t1:c.5929G>C' + assert results['NM_182961.3:c.5929G>C']['hgvs_lrg_variant'] == 'LRG_427:g.225892G>C' + self.assertCountEqual(results['NM_182961.3:c.5929G>C']['alt_genomic_loci'], []) + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': 'chr6', 'pos': '152737643', 'ref': 'C', 'alt': 'G'}} + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': 'chr6', 'pos': '152416508', 'ref': 'C', 'alt': 'G'}} + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': '6', 'pos': '152737643', 'ref': 'C', 'alt': 'G'}} + assert results['NM_182961.3:c.5929G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': '6', 'pos': '152416508', 'ref': 'C', 'alt': 'G'}} + assert results['NM_182961.3:c.5929G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_182961.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_892006.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} + assert 'NM_033071.3:c.5950G>C' in list(results.keys()) + assert results['NM_033071.3:c.5950G>C']['submitted_variant'] == '6-152737643-C-G' + assert results['NM_033071.3:c.5950G>C']['gene_symbol'] == 'SYNE1' + assert results['NM_033071.3:c.5950G>C']['gene_ids'] == {'hgnc_id': 'HGNC:17089', 'entrez_gene_id': '23345', 'ucsc_id': 'uc003qou.4', 'omim_id': ['608441']} + assert results['NM_033071.3:c.5950G>C']['hgvs_transcript_variant'] == 'NM_033071.3:c.5950G>C' + assert results['NM_033071.3:c.5950G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_033071.3:c.5950G>C']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_033071.3:c.5950G>C']['hgvs_refseqgene_variant'] == 'NG_012855.1:g.225892G>C' + assert results['NM_033071.3:c.5950G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_149062.1(LRG_427p2):p.(Ala1984Pro)', 'slr': 'NP_149062.1:p.(A1984P)'} + assert results['NM_033071.3:c.5950G>C']['hgvs_lrg_transcript_variant'] == 'LRG_427t2:c.5950G>C' + assert results['NM_033071.3:c.5950G>C']['hgvs_lrg_variant'] == 'LRG_427:g.225892G>C' + self.assertCountEqual(results['NM_033071.3:c.5950G>C']['alt_genomic_loci'], []) + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': 'chr6', 'pos': '152737643', 'ref': 'C', 'alt': 'G'}} + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': 'chr6', 'pos': '152416508', 'ref': 'C', 'alt': 'G'}} + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000006.11:g.152737643C>G', 'vcf': {'chr': '6', 'pos': '152737643', 'ref': 'C', 'alt': 'G'}} + assert results['NM_033071.3:c.5950G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000006.12:g.152416508C>G', 'vcf': {'chr': '6', 'pos': '152416508', 'ref': 'C', 'alt': 'G'}} + assert results['NM_033071.3:c.5950G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033071.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_149062.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012855.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_427.xml'} def test_variant296(self): variant = '7-6026775-T-C' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001322012.1:c.688A>G' in list(results.keys()) - assert results['NM_001322012.1:c.688A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322012.1:c.688A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322012.1:c.688A>G']['alt_genomic_loci'], []) - assert results['NM_001322012.1:c.688A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322012.1:c.688A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308941.1:p.(Lys230Glu)', 'slr': 'NP_001308941.1:p.(K230E)'} - assert results['NM_001322012.1:c.688A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322012.1:c.688A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322012.1:c.688A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001322012.1:c.688A>G']['hgvs_transcript_variant'] == 'NM_001322012.1:c.688A>G' - assert results['NM_001322012.1:c.688A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322012.1:c.688A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308941.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322012.1'} - - assert 'NM_001322010.1:c.1060A>G' in list(results.keys()) - assert results['NM_001322010.1:c.1060A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322010.1:c.1060A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322010.1:c.1060A>G']['alt_genomic_loci'], []) - assert results['NM_001322010.1:c.1060A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322010.1:c.1060A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308939.1:p.(Lys354Glu)', 'slr': 'NP_001308939.1:p.(K354E)'} - assert results['NM_001322010.1:c.1060A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322010.1:c.1060A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322010.1:c.1060A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001322010.1:c.1060A>G']['hgvs_transcript_variant'] == 'NM_001322010.1:c.1060A>G' - assert results['NM_001322010.1:c.1060A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322010.1:c.1060A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308939.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322010.1'} - - assert 'NM_001322015.1:c.1312A>G' in list(results.keys()) - assert results['NM_001322015.1:c.1312A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322015.1:c.1312A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322015.1:c.1312A>G']['alt_genomic_loci'], []) - assert results['NM_001322015.1:c.1312A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322015.1:c.1312A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308944.1:p.(Lys438Glu)', 'slr': 'NP_001308944.1:p.(K438E)'} - assert results['NM_001322015.1:c.1312A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322015.1:c.1312A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322015.1:c.1312A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001322015.1:c.1312A>G']['hgvs_transcript_variant'] == 'NM_001322015.1:c.1312A>G' - assert results['NM_001322015.1:c.1312A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322015.1:c.1312A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308944.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322015.1'} + assert results['flag'] == 'gene_variant' + assert 'NR_003085.2:n.1703G=' in list(results.keys()) + assert results['NR_003085.2:n.1703G=']['submitted_variant'] == '7-6026775-T-C' + assert results['NR_003085.2:n.1703G=']['gene_symbol'] == 'PMS2' + assert results['NR_003085.2:n.1703G=']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} + assert results['NR_003085.2:n.1703G=']['hgvs_transcript_variant'] == 'NR_003085.2:n.1703G=' + assert results['NR_003085.2:n.1703G=']['genome_context_intronic_sequence'] == '' + assert results['NR_003085.2:n.1703G=']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_003085.2:n.1703G=']['hgvs_refseqgene_variant'] == '' + assert results['NR_003085.2:n.1703G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_003085.2:n.1703G=']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_003085.2:n.1703G=']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_003085.2:n.1703G=']['alt_genomic_loci'], []) + assert results['NR_003085.2:n.1703G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert 'hg38' not in list(results['NR_003085.2:n.1703G=']['primary_assembly_loci'].keys()) + assert results['NR_003085.2:n.1703G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert 'grch38' not in list(results['NR_003085.2:n.1703G=']['primary_assembly_loci'].keys()) + assert results['NR_003085.2:n.1703G=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_003085.2'} - assert 'NM_001322003.1:c.1216A>G' in list(results.keys()) - assert results['NM_001322003.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322003.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322003.1:c.1216A>G']['alt_genomic_loci'], []) - assert results['NM_001322003.1:c.1216A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322003.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308932.1:p.(Lys406Glu)', 'slr': 'NP_001308932.1:p.(K406E)'} - assert results['NM_001322003.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322003.1:c.1216A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322003.1:c.1216A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001322003.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322003.1:c.1216A>G' - assert results['NM_001322003.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322003.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308932.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322003.1'} + assert 'NM_001322006.1:c.1465A>G' in list(results.keys()) + assert results['NM_001322006.1:c.1465A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322006.1:c.1465A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322006.1:c.1465A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} + assert results['NM_001322006.1:c.1465A>G']['hgvs_transcript_variant'] == 'NM_001322006.1:c.1465A>G' + assert results['NM_001322006.1:c.1465A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322006.1:c.1465A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322006.1:c.1465A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322006.1:c.1465A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308935.1:p.(Lys489Glu)', 'slr': 'NP_001308935.1:p.(K489E)'} + assert results['NM_001322006.1:c.1465A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322006.1:c.1465A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322006.1:c.1465A>G']['alt_genomic_loci'], []) + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322006.1:c.1465A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322006.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308935.1'} assert 'NM_001322014.1:c.1621A>G' in list(results.keys()) - assert results['NM_001322014.1:c.1621A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322014.1:c.1621A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322014.1:c.1621A>G']['alt_genomic_loci'], []) - assert results['NM_001322014.1:c.1621A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322014.1:c.1621A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308943.1:p.(Lys541Glu)', 'slr': 'NP_001308943.1:p.(K541E)'} assert results['NM_001322014.1:c.1621A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322014.1:c.1621A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322014.1:c.1621A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322014.1:c.1621A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322014.1:c.1621A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} assert results['NM_001322014.1:c.1621A>G']['hgvs_transcript_variant'] == 'NM_001322014.1:c.1621A>G' + assert results['NM_001322014.1:c.1621A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322014.1:c.1621A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322014.1:c.1621A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322014.1:c.1621A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308943.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322014.1'} + assert results['NM_001322014.1:c.1621A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308943.1:p.(Lys541Glu)', 'slr': 'NP_001308943.1:p.(K541E)'} + assert results['NM_001322014.1:c.1621A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322014.1:c.1621A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322014.1:c.1621A>G']['alt_genomic_loci'], []) + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322014.1:c.1621A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322014.1:c.1621A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322014.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308943.1'} + + assert 'NM_001322012.1:c.688A>G' in list(results.keys()) + assert results['NM_001322012.1:c.688A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322012.1:c.688A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322012.1:c.688A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} + assert results['NM_001322012.1:c.688A>G']['hgvs_transcript_variant'] == 'NM_001322012.1:c.688A>G' + assert results['NM_001322012.1:c.688A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322012.1:c.688A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322012.1:c.688A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322012.1:c.688A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308941.1:p.(Lys230Glu)', 'slr': 'NP_001308941.1:p.(K230E)'} + assert results['NM_001322012.1:c.688A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322012.1:c.688A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322012.1:c.688A>G']['alt_genomic_loci'], []) + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322012.1:c.688A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322012.1:c.688A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322012.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308941.1'} + + assert 'NM_001322013.1:c.1048A>G' in list(results.keys()) + assert results['NM_001322013.1:c.1048A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322013.1:c.1048A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322013.1:c.1048A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} + assert results['NM_001322013.1:c.1048A>G']['hgvs_transcript_variant'] == 'NM_001322013.1:c.1048A>G' + assert results['NM_001322013.1:c.1048A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322013.1:c.1048A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322013.1:c.1048A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322013.1:c.1048A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308942.1:p.(Lys350Glu)', 'slr': 'NP_001308942.1:p.(K350E)'} + assert results['NM_001322013.1:c.1048A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322013.1:c.1048A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322013.1:c.1048A>G']['alt_genomic_loci'], []) + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322013.1:c.1048A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322013.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308942.1'} assert 'NM_001322004.1:c.1216A>G' in list(results.keys()) - assert results['NM_001322004.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322004.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322004.1:c.1216A>G']['alt_genomic_loci'], []) - assert results['NM_001322004.1:c.1216A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322004.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308933.1:p.(Lys406Glu)', 'slr': 'NP_001308933.1:p.(K406E)'} assert results['NM_001322004.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322004.1:c.1216A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322004.1:c.1216A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322004.1:c.1216A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322004.1:c.1216A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} assert results['NM_001322004.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322004.1:c.1216A>G' + assert results['NM_001322004.1:c.1216A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322004.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322004.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322004.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308933.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322004.1'} + assert results['NM_001322004.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308933.1:p.(Lys406Glu)', 'slr': 'NP_001308933.1:p.(K406E)'} + assert results['NM_001322004.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322004.1:c.1216A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322004.1:c.1216A>G']['alt_genomic_loci'], []) + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322004.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322004.1:c.1216A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322004.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308933.1'} assert 'NM_001322008.1:c.1303A>G' in list(results.keys()) - assert results['NM_001322008.1:c.1303A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322008.1:c.1303A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322008.1:c.1303A>G']['alt_genomic_loci'], []) - assert results['NM_001322008.1:c.1303A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322008.1:c.1303A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308937.1:p.(Lys435Glu)', 'slr': 'NP_001308937.1:p.(K435E)'} assert results['NM_001322008.1:c.1303A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322008.1:c.1303A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322008.1:c.1303A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322008.1:c.1303A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322008.1:c.1303A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} assert results['NM_001322008.1:c.1303A>G']['hgvs_transcript_variant'] == 'NM_001322008.1:c.1303A>G' + assert results['NM_001322008.1:c.1303A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322008.1:c.1303A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322008.1:c.1303A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322008.1:c.1303A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308937.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322008.1'} - - assert 'NM_001322006.1:c.1465A>G' in list(results.keys()) - assert results['NM_001322006.1:c.1465A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322006.1:c.1465A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322006.1:c.1465A>G']['alt_genomic_loci'], []) - assert results['NM_001322006.1:c.1465A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322006.1:c.1465A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308935.1:p.(Lys489Glu)', 'slr': 'NP_001308935.1:p.(K489E)'} - assert results['NM_001322006.1:c.1465A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322006.1:c.1465A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322006.1:c.1465A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001322006.1:c.1465A>G']['hgvs_transcript_variant'] == 'NM_001322006.1:c.1465A>G' - assert results['NM_001322006.1:c.1465A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322006.1:c.1465A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322006.1:c.1465A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308935.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322006.1'} - - assert 'NM_001322013.1:c.1048A>G' in list(results.keys()) - assert results['NM_001322013.1:c.1048A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322013.1:c.1048A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322013.1:c.1048A>G']['alt_genomic_loci'], []) - assert results['NM_001322013.1:c.1048A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322013.1:c.1048A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308942.1:p.(Lys350Glu)', 'slr': 'NP_001308942.1:p.(K350E)'} - assert results['NM_001322013.1:c.1048A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322013.1:c.1048A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322013.1:c.1048A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001322013.1:c.1048A>G']['hgvs_transcript_variant'] == 'NM_001322013.1:c.1048A>G' - assert results['NM_001322013.1:c.1048A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322013.1:c.1048A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322013.1:c.1048A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308942.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322013.1'} - - assert 'NM_001322009.1:c.1216A>G' in list(results.keys()) - assert results['NM_001322009.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322009.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322009.1:c.1216A>G']['alt_genomic_loci'], []) - assert results['NM_001322009.1:c.1216A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322009.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308938.1:p.(Lys406Glu)', 'slr': 'NP_001308938.1:p.(K406E)'} - assert results['NM_001322009.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322009.1:c.1216A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322009.1:c.1216A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001322009.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322009.1:c.1216A>G' - assert results['NM_001322009.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322009.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308938.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322009.1'} + assert results['NM_001322008.1:c.1303A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308937.1:p.(Lys435Glu)', 'slr': 'NP_001308937.1:p.(K435E)'} + assert results['NM_001322008.1:c.1303A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322008.1:c.1303A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322008.1:c.1303A>G']['alt_genomic_loci'], []) + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322008.1:c.1303A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322008.1:c.1303A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322008.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308937.1'} - assert 'NR_003085.2:n.1703G=' in list(results.keys()) - assert results['NR_003085.2:n.1703G=']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_003085.2:n.1703G=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_003085.2:n.1703G=']['alt_genomic_loci'], []) - assert results['NR_003085.2:n.1703G=']['gene_symbol'] == 'PMS2' - assert results['NR_003085.2:n.1703G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_003085.2:n.1703G=']['submitted_variant'] == '7-6026775-T-C' - assert results['NR_003085.2:n.1703G=']['genome_context_intronic_sequence'] == '' - assert results['NR_003085.2:n.1703G=']['hgvs_lrg_variant'] == '' - assert results['NR_003085.2:n.1703G=']['hgvs_transcript_variant'] == 'NR_003085.2:n.1703G=' - assert results['NR_003085.2:n.1703G=']['hgvs_refseqgene_variant'] == '' - assert results['NR_003085.2:n.1703G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert 'hg38' not in list(results['NR_003085.2:n.1703G=']['primary_assembly_loci'].keys()) - assert results['NR_003085.2:n.1703G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert 'grch38' not in list(results['NR_003085.2:n.1703G=']['primary_assembly_loci'].keys()) - assert results['NR_003085.2:n.1703G=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_003085.2'} + assert 'NM_001322015.1:c.1312A>G' in list(results.keys()) + assert results['NM_001322015.1:c.1312A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322015.1:c.1312A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322015.1:c.1312A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} + assert results['NM_001322015.1:c.1312A>G']['hgvs_transcript_variant'] == 'NM_001322015.1:c.1312A>G' + assert results['NM_001322015.1:c.1312A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322015.1:c.1312A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322015.1:c.1312A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322015.1:c.1312A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308944.1:p.(Lys438Glu)', 'slr': 'NP_001308944.1:p.(K438E)'} + assert results['NM_001322015.1:c.1312A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322015.1:c.1312A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322015.1:c.1312A>G']['alt_genomic_loci'], []) + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322015.1:c.1312A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322015.1:c.1312A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322015.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308944.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001322005.1:c.1216A>G' in list(results.keys()) - assert results['NM_001322005.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322005.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322005.1:c.1216A>G']['alt_genomic_loci'], []) - assert results['NM_001322005.1:c.1216A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322005.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308934.1:p.(Lys406Glu)', 'slr': 'NP_001308934.1:p.(K406E)'} assert results['NM_001322005.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322005.1:c.1216A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322005.1:c.1216A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322005.1:c.1216A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322005.1:c.1216A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} assert results['NM_001322005.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322005.1:c.1216A>G' + assert results['NM_001322005.1:c.1216A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322005.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322005.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322005.1:c.1216A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308934.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322005.1'} + assert results['NM_001322005.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308934.1:p.(Lys406Glu)', 'slr': 'NP_001308934.1:p.(K406E)'} + assert results['NM_001322005.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322005.1:c.1216A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322005.1:c.1216A>G']['alt_genomic_loci'], []) + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322005.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322005.1:c.1216A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322005.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308934.1'} - assert 'NM_001322007.1:c.1303A>G' in list(results.keys()) - assert results['NM_001322007.1:c.1303A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322007.1:c.1303A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322007.1:c.1303A>G']['alt_genomic_loci'], []) - assert results['NM_001322007.1:c.1303A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322007.1:c.1303A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308936.1:p.(Lys435Glu)', 'slr': 'NP_001308936.1:p.(K435E)'} - assert results['NM_001322007.1:c.1303A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322007.1:c.1303A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322007.1:c.1303A>G']['hgvs_lrg_variant'] == '' - assert results['NM_001322007.1:c.1303A>G']['hgvs_transcript_variant'] == 'NM_001322007.1:c.1303A>G' - assert results['NM_001322007.1:c.1303A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322007.1:c.1303A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308936.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322007.1'} + assert 'NM_001322003.1:c.1216A>G' in list(results.keys()) + assert results['NM_001322003.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322003.1:c.1216A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322003.1:c.1216A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} + assert results['NM_001322003.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322003.1:c.1216A>G' + assert results['NM_001322003.1:c.1216A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322003.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322003.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322003.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308932.1:p.(Lys406Glu)', 'slr': 'NP_001308932.1:p.(K406E)'} + assert results['NM_001322003.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322003.1:c.1216A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322003.1:c.1216A>G']['alt_genomic_loci'], []) + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322003.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322003.1:c.1216A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322003.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308932.1'} + + assert 'NM_001322010.1:c.1060A>G' in list(results.keys()) + assert results['NM_001322010.1:c.1060A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322010.1:c.1060A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322010.1:c.1060A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} + assert results['NM_001322010.1:c.1060A>G']['hgvs_transcript_variant'] == 'NM_001322010.1:c.1060A>G' + assert results['NM_001322010.1:c.1060A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322010.1:c.1060A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322010.1:c.1060A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322010.1:c.1060A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308939.1:p.(Lys354Glu)', 'slr': 'NP_001308939.1:p.(K354E)'} + assert results['NM_001322010.1:c.1060A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322010.1:c.1060A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322010.1:c.1060A>G']['alt_genomic_loci'], []) + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322010.1:c.1060A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322010.1:c.1060A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322010.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308939.1'} + + assert 'NM_000535.6:c.1621A>G' in list(results.keys()) + assert results['NM_000535.6:c.1621A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_000535.6:c.1621A>G']['gene_symbol'] == 'PMS2' + assert results['NM_000535.6:c.1621A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} + assert results['NM_000535.6:c.1621A>G']['hgvs_transcript_variant'] == 'NM_000535.6:c.1621A>G' + assert results['NM_000535.6:c.1621A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_000535.6:c.1621A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000535.6:c.1621A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_000535.6:c.1621A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000526.2:p.(Lys541Glu)', 'slr': 'NP_000526.2:p.(K541E)'} + assert results['NM_000535.6:c.1621A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000535.6:c.1621A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000535.6:c.1621A>G']['alt_genomic_loci'], []) + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_000535.6:c.1621A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.6', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.2'} + + assert 'NR_136154.1:n.1708A>G' in list(results.keys()) + assert results['NR_136154.1:n.1708A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NR_136154.1:n.1708A>G']['gene_symbol'] == 'PMS2' + assert results['NR_136154.1:n.1708A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} + assert results['NR_136154.1:n.1708A>G']['hgvs_transcript_variant'] == 'NR_136154.1:n.1708A>G' + assert results['NR_136154.1:n.1708A>G']['genome_context_intronic_sequence'] == '' + assert results['NR_136154.1:n.1708A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_136154.1:n.1708A>G']['hgvs_refseqgene_variant'] == '' + assert results['NR_136154.1:n.1708A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_136154.1:n.1708A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_136154.1:n.1708A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_136154.1:n.1708A>G']['alt_genomic_loci'], []) + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NR_136154.1:n.1708A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_136154.1'} assert 'NM_000535.5:c.1621G=' in list(results.keys()) - assert results['NM_000535.5:c.1621G=']['hgvs_lrg_transcript_variant'] == 'LRG_161t1:c.1621G=' - assert results['NM_000535.5:c.1621G=']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000535.5:c.1621G=']['alt_genomic_loci'], []) - assert results['NM_000535.5:c.1621G=']['gene_symbol'] == 'PMS2' - assert results['NM_000535.5:c.1621G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000526.1(LRG_161p1):p.(Glu541=)', 'slr': 'NP_000526.1:p.(E541=)'} assert results['NM_000535.5:c.1621G=']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_000535.5:c.1621G=']['genome_context_intronic_sequence'] == '' - assert results['NM_000535.5:c.1621G=']['hgvs_lrg_variant'] == 'LRG_161:g.26963G=' + assert results['NM_000535.5:c.1621G=']['gene_symbol'] == 'PMS2' + assert results['NM_000535.5:c.1621G=']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} assert results['NM_000535.5:c.1621G=']['hgvs_transcript_variant'] == 'NM_000535.5:c.1621G=' + assert results['NM_000535.5:c.1621G=']['genome_context_intronic_sequence'] == '' + assert results['NM_000535.5:c.1621G=']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000535.5:c.1621G=']['hgvs_refseqgene_variant'] == 'NG_008466.1:g.26963G=' - assert results['NM_000535.5:c.1621G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_000535.5:c.1621G=']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000526.1(LRG_161p1):p.(Glu541=)', 'slr': 'NP_000526.1:p.(E541=)'} + assert results['NM_000535.5:c.1621G=']['hgvs_lrg_transcript_variant'] == 'LRG_161t1:c.1621G=' + assert results['NM_000535.5:c.1621G=']['hgvs_lrg_variant'] == 'LRG_161:g.26963G=' + self.assertCountEqual(results['NM_000535.5:c.1621G=']['alt_genomic_loci'], []) + assert results['NM_000535.5:c.1621G=']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} assert 'hg38' not in list(results['NM_000535.5:c.1621G=']['primary_assembly_loci'].keys()) - assert results['NM_000535.5:c.1621G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} + assert results['NM_000535.5:c.1621G=']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} assert 'grch38' not in list(results['NM_000535.5:c.1621G=']['primary_assembly_loci'].keys()) - assert results['NM_000535.5:c.1621G=']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008466.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.5', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_161.xml'} - - assert 'NR_136154.1:n.1708A>G' in list(results.keys()) - assert results['NR_136154.1:n.1708A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_136154.1:n.1708A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_136154.1:n.1708A>G']['alt_genomic_loci'], []) - assert results['NR_136154.1:n.1708A>G']['gene_symbol'] == 'PMS2' - assert results['NR_136154.1:n.1708A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_136154.1:n.1708A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NR_136154.1:n.1708A>G']['genome_context_intronic_sequence'] == '' - assert results['NR_136154.1:n.1708A>G']['hgvs_lrg_variant'] == '' - assert results['NR_136154.1:n.1708A>G']['hgvs_transcript_variant'] == 'NR_136154.1:n.1708A>G' - assert results['NR_136154.1:n.1708A>G']['hgvs_refseqgene_variant'] == '' - assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NR_136154.1:n.1708A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NR_136154.1:n.1708A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_136154.1'} + assert results['NM_000535.5:c.1621G=']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008466.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_161.xml'} assert 'NM_001322011.1:c.688A>G' in list(results.keys()) - assert results['NM_001322011.1:c.688A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001322011.1:c.688A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001322011.1:c.688A>G']['alt_genomic_loci'], []) - assert results['NM_001322011.1:c.688A>G']['gene_symbol'] == 'PMS2' - assert results['NM_001322011.1:c.688A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308940.1:p.(Lys230Glu)', 'slr': 'NP_001308940.1:p.(K230E)'} assert results['NM_001322011.1:c.688A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_001322011.1:c.688A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_001322011.1:c.688A>G']['hgvs_lrg_variant'] == '' + assert results['NM_001322011.1:c.688A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322011.1:c.688A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} assert results['NM_001322011.1:c.688A>G']['hgvs_transcript_variant'] == 'NM_001322011.1:c.688A>G' + assert results['NM_001322011.1:c.688A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322011.1:c.688A>G']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001322011.1:c.688A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_001322011.1:c.688A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308940.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322011.1'} + assert results['NM_001322011.1:c.688A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308940.1:p.(Lys230Glu)', 'slr': 'NP_001308940.1:p.(K230E)'} + assert results['NM_001322011.1:c.688A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322011.1:c.688A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322011.1:c.688A>G']['alt_genomic_loci'], []) + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322011.1:c.688A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322011.1:c.688A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322011.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308940.1'} - assert 'NM_000535.6:c.1621A>G' in list(results.keys()) - assert results['NM_000535.6:c.1621A>G']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000535.6:c.1621A>G']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000535.6:c.1621A>G']['alt_genomic_loci'], []) - assert results['NM_000535.6:c.1621A>G']['gene_symbol'] == 'PMS2' - assert results['NM_000535.6:c.1621A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000526.2:p.(Lys541Glu)', 'slr': 'NP_000526.2:p.(K541E)'} - assert results['NM_000535.6:c.1621A>G']['submitted_variant'] == '7-6026775-T-C' - assert results['NM_000535.6:c.1621A>G']['genome_context_intronic_sequence'] == '' - assert results['NM_000535.6:c.1621A>G']['hgvs_lrg_variant'] == '' - assert results['NM_000535.6:c.1621A>G']['hgvs_transcript_variant'] == 'NM_000535.6:c.1621A>G' - assert results['NM_000535.6:c.1621A>G']['hgvs_refseqgene_variant'] == '' - assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '6026775', 'alt': 'C'}} - assert results['NM_000535.6:c.1621A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'ref': 'T', 'pos': '5987144', 'alt': 'C'}} - assert results['NM_000535.6:c.1621A>G']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000526.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000535.6'} + assert 'NM_001322009.1:c.1216A>G' in list(results.keys()) + assert results['NM_001322009.1:c.1216A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322009.1:c.1216A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322009.1:c.1216A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} + assert results['NM_001322009.1:c.1216A>G']['hgvs_transcript_variant'] == 'NM_001322009.1:c.1216A>G' + assert results['NM_001322009.1:c.1216A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322009.1:c.1216A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322009.1:c.1216A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322009.1:c.1216A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308938.1:p.(Lys406Glu)', 'slr': 'NP_001308938.1:p.(K406E)'} + assert results['NM_001322009.1:c.1216A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322009.1:c.1216A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322009.1:c.1216A>G']['alt_genomic_loci'], []) + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322009.1:c.1216A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322009.1:c.1216A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322009.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308938.1'} + assert 'NM_001322007.1:c.1303A>G' in list(results.keys()) + assert results['NM_001322007.1:c.1303A>G']['submitted_variant'] == '7-6026775-T-C' + assert results['NM_001322007.1:c.1303A>G']['gene_symbol'] == 'PMS2' + assert results['NM_001322007.1:c.1303A>G']['gene_ids'] == {'hgnc_id': 'HGNC:9122', 'entrez_gene_id': '5395', 'ucsc_id': 'uc003spl.4', 'omim_id': ['600259']} + assert results['NM_001322007.1:c.1303A>G']['hgvs_transcript_variant'] == 'NM_001322007.1:c.1303A>G' + assert results['NM_001322007.1:c.1303A>G']['genome_context_intronic_sequence'] == '' + assert results['NM_001322007.1:c.1303A>G']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001322007.1:c.1303A>G']['hgvs_refseqgene_variant'] == '' + assert results['NM_001322007.1:c.1303A>G']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001308936.1:p.(Lys435Glu)', 'slr': 'NP_001308936.1:p.(K435E)'} + assert results['NM_001322007.1:c.1303A>G']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001322007.1:c.1303A>G']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001322007.1:c.1303A>G']['alt_genomic_loci'], []) + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': 'chr7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': 'chr7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.6026775T>C', 'vcf': {'chr': '7', 'pos': '6026775', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322007.1:c.1303A>G']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.5987144T>C', 'vcf': {'chr': '7', 'pos': '5987144', 'ref': 'T', 'alt': 'C'}} + assert results['NM_001322007.1:c.1303A>G']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001322007.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001308936.1'} def test_variant297(self): variant = '7-55242465-GGAATTAAGAGAAGCA-G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001346900.1:c.2077_2091del' in list(results.keys()) - assert results['NM_001346900.1:c.2077_2091del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001346900.1:c.2077_2091del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001346900.1:c.2077_2091del']['alt_genomic_loci'], []) - assert results['NM_001346900.1:c.2077_2091del']['gene_symbol'] == 'EGFR' - assert results['NM_001346900.1:c.2077_2091del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333829.1:p.(Glu693_Ala697del)', 'slr': 'NP_001333829.1:p.(E693_A697del)'} - assert results['NM_001346900.1:c.2077_2091del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' - assert results['NM_001346900.1:c.2077_2091del']['genome_context_intronic_sequence'] == '' - assert results['NM_001346900.1:c.2077_2091del']['hgvs_lrg_variant'] == '' - assert results['NM_001346900.1:c.2077_2091del']['hgvs_transcript_variant'] == 'NM_001346900.1:c.2077_2091del' - assert results['NM_001346900.1:c.2077_2091del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_001346900.1:c.2077_2091del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1'} - - assert 'NM_001346898.1:c.2236_2250del' in list(results.keys()) - assert results['NM_001346898.1:c.2236_2250del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001346898.1:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001346898.1:c.2236_2250del']['alt_genomic_loci'], []) - assert results['NM_001346898.1:c.2236_2250del']['gene_symbol'] == 'EGFR' - assert results['NM_001346898.1:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333827.1:p.(Glu746_Ala750del)', 'slr': 'NP_001333827.1:p.(E746_A750del)'} - assert results['NM_001346898.1:c.2236_2250del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' - assert results['NM_001346898.1:c.2236_2250del']['genome_context_intronic_sequence'] == '' - assert results['NM_001346898.1:c.2236_2250del']['hgvs_lrg_variant'] == '' - assert results['NM_001346898.1:c.2236_2250del']['hgvs_transcript_variant'] == 'NM_001346898.1:c.2236_2250del' - assert results['NM_001346898.1:c.2236_2250del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_001346898.1:c.2236_2250del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1'} - - assert 'NM_001346941.1:c.1435_1449del' in list(results.keys()) - assert results['NM_001346941.1:c.1435_1449del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001346941.1:c.1435_1449del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001346941.1:c.1435_1449del']['alt_genomic_loci'], []) - assert results['NM_001346941.1:c.1435_1449del']['gene_symbol'] == 'EGFR' - assert results['NM_001346941.1:c.1435_1449del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333870.1:p.(Glu479_Ala483del)', 'slr': 'NP_001333870.1:p.(E479_A483del)'} - assert results['NM_001346941.1:c.1435_1449del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' - assert results['NM_001346941.1:c.1435_1449del']['genome_context_intronic_sequence'] == '' - assert results['NM_001346941.1:c.1435_1449del']['hgvs_lrg_variant'] == '' - assert results['NM_001346941.1:c.1435_1449del']['hgvs_transcript_variant'] == 'NM_001346941.1:c.1435_1449del' - assert results['NM_001346941.1:c.1435_1449del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_001346941.1:c.1435_1449del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1'} - assert results['flag'] == 'gene_variant' assert 'NM_001346899.1:c.2101_2115del' in list(results.keys()) - assert results['NM_001346899.1:c.2101_2115del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001346899.1:c.2101_2115del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001346899.1:c.2101_2115del']['alt_genomic_loci'], []) - assert results['NM_001346899.1:c.2101_2115del']['gene_symbol'] == 'EGFR' - assert results['NM_001346899.1:c.2101_2115del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333828.1:p.(Glu701_Ala705del)', 'slr': 'NP_001333828.1:p.(E701_A705del)'} assert results['NM_001346899.1:c.2101_2115del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' - assert results['NM_001346899.1:c.2101_2115del']['genome_context_intronic_sequence'] == '' - assert results['NM_001346899.1:c.2101_2115del']['hgvs_lrg_variant'] == '' + assert results['NM_001346899.1:c.2101_2115del']['gene_symbol'] == 'EGFR' + assert results['NM_001346899.1:c.2101_2115del']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} assert results['NM_001346899.1:c.2101_2115del']['hgvs_transcript_variant'] == 'NM_001346899.1:c.2101_2115del' + assert results['NM_001346899.1:c.2101_2115del']['genome_context_intronic_sequence'] == '' + assert results['NM_001346899.1:c.2101_2115del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346899.1:c.2101_2115del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_001346899.1:c.2101_2115del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1'} + assert results['NM_001346899.1:c.2101_2115del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333828.1:p.(Glu701_Ala705del)', 'slr': 'NP_001333828.1:p.(E701_A705del)'} + assert results['NM_001346899.1:c.2101_2115del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346899.1:c.2101_2115del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001346899.1:c.2101_2115del']['alt_genomic_loci'], []) + assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346899.1:c.2101_2115del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346899.1:c.2101_2115del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1'} - assert 'NM_001346897.1:c.2101_2115del' in list(results.keys()) - assert results['NM_001346897.1:c.2101_2115del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001346897.1:c.2101_2115del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001346897.1:c.2101_2115del']['alt_genomic_loci'], []) - assert results['NM_001346897.1:c.2101_2115del']['gene_symbol'] == 'EGFR' - assert results['NM_001346897.1:c.2101_2115del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333826.1:p.(Glu701_Ala705del)', 'slr': 'NP_001333826.1:p.(E701_A705del)'} - assert results['NM_001346897.1:c.2101_2115del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' - assert results['NM_001346897.1:c.2101_2115del']['genome_context_intronic_sequence'] == '' - assert results['NM_001346897.1:c.2101_2115del']['hgvs_lrg_variant'] == '' - assert results['NM_001346897.1:c.2101_2115del']['hgvs_transcript_variant'] == 'NM_001346897.1:c.2101_2115del' - assert results['NM_001346897.1:c.2101_2115del']['hgvs_refseqgene_variant'] == '' - assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_001346897.1:c.2101_2115del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333826.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346897.1'} + assert 'NM_001346898.1:c.2236_2250del' in list(results.keys()) + assert results['NM_001346898.1:c.2236_2250del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' + assert results['NM_001346898.1:c.2236_2250del']['gene_symbol'] == 'EGFR' + assert results['NM_001346898.1:c.2236_2250del']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} + assert results['NM_001346898.1:c.2236_2250del']['hgvs_transcript_variant'] == 'NM_001346898.1:c.2236_2250del' + assert results['NM_001346898.1:c.2236_2250del']['genome_context_intronic_sequence'] == '' + assert results['NM_001346898.1:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346898.1:c.2236_2250del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346898.1:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333827.1:p.(Glu746_Ala750del)', 'slr': 'NP_001333827.1:p.(E746_A750del)'} + assert results['NM_001346898.1:c.2236_2250del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346898.1:c.2236_2250del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001346898.1:c.2236_2250del']['alt_genomic_loci'], []) + assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346898.1:c.2236_2250del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346898.1:c.2236_2250del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1'} assert 'NM_005228.3:c.2236_2250del' in list(results.keys()) - assert results['NM_005228.3:c.2236_2250del']['hgvs_lrg_transcript_variant'] == 'LRG_304t1:c.2236_2250del' - assert results['NM_005228.3:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005228.3:c.2236_2250del']['alt_genomic_loci'], []) - assert results['NM_005228.3:c.2236_2250del']['gene_symbol'] == 'EGFR' - assert results['NM_005228.3:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.(Glu746_Ala750del)', 'slr': 'NP_005219.2:p.(E746_A750del)'} assert results['NM_005228.3:c.2236_2250del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' - assert results['NM_005228.3:c.2236_2250del']['genome_context_intronic_sequence'] == '' - assert results['NM_005228.3:c.2236_2250del']['hgvs_lrg_variant'] == 'LRG_304:g.160742_160756del' + assert results['NM_005228.3:c.2236_2250del']['gene_symbol'] == 'EGFR' + assert results['NM_005228.3:c.2236_2250del']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} assert results['NM_005228.3:c.2236_2250del']['hgvs_transcript_variant'] == 'NM_005228.3:c.2236_2250del' + assert results['NM_005228.3:c.2236_2250del']['genome_context_intronic_sequence'] == '' + assert results['NM_005228.3:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005228.3:c.2236_2250del']['hgvs_refseqgene_variant'] == 'NG_007726.3:g.160742_160756del' - assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_005228.3:c.2236_2250del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007726.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_304.xml'} + assert results['NM_005228.3:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.(Glu746_Ala750del)', 'slr': 'NP_005219.2:p.(E746_A750del)'} + assert results['NM_005228.3:c.2236_2250del']['hgvs_lrg_transcript_variant'] == 'LRG_304t1:c.2236_2250del' + assert results['NM_005228.3:c.2236_2250del']['hgvs_lrg_variant'] == 'LRG_304:g.160742_160756del' + self.assertCountEqual(results['NM_005228.3:c.2236_2250del']['alt_genomic_loci'], []) + assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_005228.3:c.2236_2250del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_005228.3:c.2236_2250del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007726.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_304.xml'} + + assert 'NM_001346900.1:c.2077_2091del' in list(results.keys()) + assert results['NM_001346900.1:c.2077_2091del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' + assert results['NM_001346900.1:c.2077_2091del']['gene_symbol'] == 'EGFR' + assert results['NM_001346900.1:c.2077_2091del']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} + assert results['NM_001346900.1:c.2077_2091del']['hgvs_transcript_variant'] == 'NM_001346900.1:c.2077_2091del' + assert results['NM_001346900.1:c.2077_2091del']['genome_context_intronic_sequence'] == '' + assert results['NM_001346900.1:c.2077_2091del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346900.1:c.2077_2091del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346900.1:c.2077_2091del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333829.1:p.(Glu693_Ala697del)', 'slr': 'NP_001333829.1:p.(E693_A697del)'} + assert results['NM_001346900.1:c.2077_2091del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346900.1:c.2077_2091del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001346900.1:c.2077_2091del']['alt_genomic_loci'], []) + assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346900.1:c.2077_2091del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346900.1:c.2077_2091del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1'} assert 'NM_005228.4:c.2236_2250del' in list(results.keys()) - assert results['NM_005228.4:c.2236_2250del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_005228.4:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005228.4:c.2236_2250del']['alt_genomic_loci'], []) - assert results['NM_005228.4:c.2236_2250del']['gene_symbol'] == 'EGFR' - assert results['NM_005228.4:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.(Glu746_Ala750del)', 'slr': 'NP_005219.2:p.(E746_A750del)'} assert results['NM_005228.4:c.2236_2250del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' - assert results['NM_005228.4:c.2236_2250del']['genome_context_intronic_sequence'] == '' - assert results['NM_005228.4:c.2236_2250del']['hgvs_lrg_variant'] == '' + assert results['NM_005228.4:c.2236_2250del']['gene_symbol'] == 'EGFR' + assert results['NM_005228.4:c.2236_2250del']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} assert results['NM_005228.4:c.2236_2250del']['hgvs_transcript_variant'] == 'NM_005228.4:c.2236_2250del' + assert results['NM_005228.4:c.2236_2250del']['genome_context_intronic_sequence'] == '' + assert results['NM_005228.4:c.2236_2250del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005228.4:c.2236_2250del']['hgvs_refseqgene_variant'] == '' - assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55242465', 'alt': 'G'}} - assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'ref': 'GGAATTAAGAGAAGCA', 'pos': '55174772', 'alt': 'G'}} - assert results['NM_005228.4:c.2236_2250del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.4'} + assert results['NM_005228.4:c.2236_2250del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.(Glu746_Ala750del)', 'slr': 'NP_005219.2:p.(E746_A750del)'} + assert results['NM_005228.4:c.2236_2250del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005228.4:c.2236_2250del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_005228.4:c.2236_2250del']['alt_genomic_loci'], []) + assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_005228.4:c.2236_2250del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_005228.4:c.2236_2250del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2'} + + assert 'NM_001346897.1:c.2101_2115del' in list(results.keys()) + assert results['NM_001346897.1:c.2101_2115del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' + assert results['NM_001346897.1:c.2101_2115del']['gene_symbol'] == 'EGFR' + assert results['NM_001346897.1:c.2101_2115del']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} + assert results['NM_001346897.1:c.2101_2115del']['hgvs_transcript_variant'] == 'NM_001346897.1:c.2101_2115del' + assert results['NM_001346897.1:c.2101_2115del']['genome_context_intronic_sequence'] == '' + assert results['NM_001346897.1:c.2101_2115del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346897.1:c.2101_2115del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346897.1:c.2101_2115del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333826.1:p.(Glu701_Ala705del)', 'slr': 'NP_001333826.1:p.(E701_A705del)'} + assert results['NM_001346897.1:c.2101_2115del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346897.1:c.2101_2115del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001346897.1:c.2101_2115del']['alt_genomic_loci'], []) + assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346897.1:c.2101_2115del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346897.1:c.2101_2115del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346897.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333826.1'} + assert 'NM_001346941.1:c.1435_1449del' in list(results.keys()) + assert results['NM_001346941.1:c.1435_1449del']['submitted_variant'] == '7-55242465-GGAATTAAGAGAAGCA-G' + assert results['NM_001346941.1:c.1435_1449del']['gene_symbol'] == 'EGFR' + assert results['NM_001346941.1:c.1435_1449del']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} + assert results['NM_001346941.1:c.1435_1449del']['hgvs_transcript_variant'] == 'NM_001346941.1:c.1435_1449del' + assert results['NM_001346941.1:c.1435_1449del']['genome_context_intronic_sequence'] == '' + assert results['NM_001346941.1:c.1435_1449del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346941.1:c.1435_1449del']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346941.1:c.1435_1449del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333870.1:p.(Glu479_Ala483del)', 'slr': 'NP_001333870.1:p.(E479_A483del)'} + assert results['NM_001346941.1:c.1435_1449del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346941.1:c.1435_1449del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001346941.1:c.1435_1449del']['alt_genomic_loci'], []) + assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': 'chr7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': 'chr7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55242466_55242480del', 'vcf': {'chr': '7', 'pos': '55242465', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346941.1:c.1435_1449del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55174773_55174787del', 'vcf': {'chr': '7', 'pos': '55174772', 'ref': 'GGAATTAAGAGAAGCA', 'alt': 'G'}} + assert results['NM_001346941.1:c.1435_1449del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1'} def test_variant298(self): variant = '7-55248992-T-TTCCAGGAAGCCT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_005228.3:c.2284-5_2290dup' in list(results.keys()) - assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == 'LRG_304t1:c.2284-5_2290dup' - assert results['NM_005228.3:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == 'NG_007726.3(NM_005228.3):c.2284-5_2290dup' - self.assertCountEqual(results['NM_005228.3:c.2284-5_2290dup']['alt_genomic_loci'], []) - assert results['NM_005228.3:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' - assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.?', 'slr': 'NP_005219.2:p.?'} - assert results['NM_005228.3:c.2284-5_2290dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' - assert results['NM_005228.3:c.2284-5_2290dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_005228.3):c.2284-5_2290dup' - assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_lrg_variant'] == 'LRG_304:g.167257_167268dup' - assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_transcript_variant'] == 'NM_005228.3:c.2284-5_2290dup' - assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_refseqgene_variant'] == 'NG_007726.3:g.167257_167268dup' - assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_005228.3:c.2284-5_2290dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007726.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_304.xml'} + assert results['flag'] == 'gene_variant' + assert 'NR_047551.1:n.1272_1283dup' in list(results.keys()) + assert results['NR_047551.1:n.1272_1283dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NR_047551.1:n.1272_1283dup']['gene_symbol'] == 'EGFR-AS1' + assert results['NR_047551.1:n.1272_1283dup']['gene_ids'] == {'hgnc_id': 'HGNC:40207', 'entrez_gene_id': '100507500', 'ucsc_id': 'uc003tqo.5', 'omim_id': []} + assert results['NR_047551.1:n.1272_1283dup']['hgvs_transcript_variant'] == 'NR_047551.1:n.1272_1283dup' + assert results['NR_047551.1:n.1272_1283dup']['genome_context_intronic_sequence'] == '' + assert results['NR_047551.1:n.1272_1283dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_047551.1:n.1272_1283dup']['hgvs_refseqgene_variant'] == '' + assert results['NR_047551.1:n.1272_1283dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_047551.1:n.1272_1283dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_047551.1:n.1272_1283dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_047551.1:n.1272_1283dup']['alt_genomic_loci'], []) + assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NR_047551.1:n.1272_1283dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_047551.1'} - assert 'NM_001346899.1:c.2149-5_2155dup' in list(results.keys()) - assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001346899.1:c.2149-5_2155dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001346899.1:c.2149-5_2155dup']['alt_genomic_loci'], []) - assert results['NM_001346899.1:c.2149-5_2155dup']['gene_symbol'] == 'EGFR' - assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333828.1:p.?', 'slr': 'NP_001333828.1:p.?'} + assert 'NM_001346899.1:c.2149-5_2155dup' in list(results.keys()) assert results['NM_001346899.1:c.2149-5_2155dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' - assert results['NM_001346899.1:c.2149-5_2155dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346899.1):c.2149-5_2155dup' - assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_lrg_variant'] == '' + assert results['NM_001346899.1:c.2149-5_2155dup']['gene_symbol'] == 'EGFR' + assert results['NM_001346899.1:c.2149-5_2155dup']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_transcript_variant'] == 'NM_001346899.1:c.2149-5_2155dup' + assert results['NM_001346899.1:c.2149-5_2155dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346899.1):c.2149-5_2155dup' + assert results['NM_001346899.1:c.2149-5_2155dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346899.1:c.2149-5_2155dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1'} - - assert 'NM_005228.4:c.2284-5_2290dup' in list(results.keys()) - assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_005228.4:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005228.4:c.2284-5_2290dup']['alt_genomic_loci'], []) - assert results['NM_005228.4:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' - assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.?', 'slr': 'NP_005219.2:p.?'} - assert results['NM_005228.4:c.2284-5_2290dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' - assert results['NM_005228.4:c.2284-5_2290dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_005228.4):c.2284-5_2290dup' - assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_lrg_variant'] == '' - assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_transcript_variant'] == 'NM_005228.4:c.2284-5_2290dup' - assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_005228.4:c.2284-5_2290dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.4'} + assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333828.1:p.?', 'slr': 'NP_001333828.1:p.?'} + assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346899.1:c.2149-5_2155dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001346899.1:c.2149-5_2155dup']['alt_genomic_loci'], []) + assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346899.1:c.2149-5_2155dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346899.1:c.2149-5_2155dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346899.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333828.1'} assert 'NM_001346898.1:c.2284-5_2290dup' in list(results.keys()) - assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001346898.1:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001346898.1:c.2284-5_2290dup']['alt_genomic_loci'], []) - assert results['NM_001346898.1:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' - assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333827.1:p.?', 'slr': 'NP_001333827.1:p.?'} assert results['NM_001346898.1:c.2284-5_2290dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' - assert results['NM_001346898.1:c.2284-5_2290dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346898.1):c.2284-5_2290dup' - assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_lrg_variant'] == '' + assert results['NM_001346898.1:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' + assert results['NM_001346898.1:c.2284-5_2290dup']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_transcript_variant'] == 'NM_001346898.1:c.2284-5_2290dup' + assert results['NM_001346898.1:c.2284-5_2290dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346898.1):c.2284-5_2290dup' + assert results['NM_001346898.1:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346898.1:c.2284-5_2290dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1'} + assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333827.1:p.?', 'slr': 'NP_001333827.1:p.?'} + assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346898.1:c.2284-5_2290dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001346898.1:c.2284-5_2290dup']['alt_genomic_loci'], []) + assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346898.1:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346898.1:c.2284-5_2290dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346898.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333827.1'} - assert 'NM_001346941.1:c.1483-5_1489dup' in list(results.keys()) - assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001346941.1:c.1483-5_1489dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001346941.1:c.1483-5_1489dup']['alt_genomic_loci'], []) - assert results['NM_001346941.1:c.1483-5_1489dup']['gene_symbol'] == 'EGFR' - assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333870.1:p.?', 'slr': 'NP_001333870.1:p.?'} - assert results['NM_001346941.1:c.1483-5_1489dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' - assert results['NM_001346941.1:c.1483-5_1489dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346941.1):c.1483-5_1489dup' - assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_lrg_variant'] == '' - assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_transcript_variant'] == 'NM_001346941.1:c.1483-5_1489dup' - assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346941.1:c.1483-5_1489dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1'} + assert 'NM_005228.3:c.2284-5_2290dup' in list(results.keys()) + assert results['NM_005228.3:c.2284-5_2290dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NM_005228.3:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' + assert results['NM_005228.3:c.2284-5_2290dup']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} + assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_transcript_variant'] == 'NM_005228.3:c.2284-5_2290dup' + assert results['NM_005228.3:c.2284-5_2290dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_005228.3):c.2284-5_2290dup' + assert results['NM_005228.3:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == 'NG_007726.3(NM_005228.3):c.2284-5_2290dup' + assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_refseqgene_variant'] == 'NG_007726.3:g.167257_167268dup' + assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.?', 'slr': 'NP_005219.2:p.?'} + assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == 'LRG_304t1:c.2284-5_2290dup' + assert results['NM_005228.3:c.2284-5_2290dup']['hgvs_lrg_variant'] == 'LRG_304:g.167257_167268dup' + self.assertCountEqual(results['NM_005228.3:c.2284-5_2290dup']['alt_genomic_loci'], []) + assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.3:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.3:c.2284-5_2290dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007726.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_304.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_001346900.1:c.2125-5_2131dup' in list(results.keys()) - assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001346900.1:c.2125-5_2131dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001346900.1:c.2125-5_2131dup']['alt_genomic_loci'], []) - assert results['NM_001346900.1:c.2125-5_2131dup']['gene_symbol'] == 'EGFR' - assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333829.1:p.?', 'slr': 'NP_001333829.1:p.?'} assert results['NM_001346900.1:c.2125-5_2131dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' - assert results['NM_001346900.1:c.2125-5_2131dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346900.1):c.2125-5_2131dup' - assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_lrg_variant'] == '' + assert results['NM_001346900.1:c.2125-5_2131dup']['gene_symbol'] == 'EGFR' + assert results['NM_001346900.1:c.2125-5_2131dup']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_transcript_variant'] == 'NM_001346900.1:c.2125-5_2131dup' + assert results['NM_001346900.1:c.2125-5_2131dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346900.1):c.2125-5_2131dup' + assert results['NM_001346900.1:c.2125-5_2131dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346900.1:c.2125-5_2131dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1'} + assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333829.1:p.?', 'slr': 'NP_001333829.1:p.?'} + assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346900.1:c.2125-5_2131dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001346900.1:c.2125-5_2131dup']['alt_genomic_loci'], []) + assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346900.1:c.2125-5_2131dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346900.1:c.2125-5_2131dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346900.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333829.1'} - assert 'NR_047551.1:n.1272_1283dup' in list(results.keys()) - assert results['NR_047551.1:n.1272_1283dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_047551.1:n.1272_1283dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_047551.1:n.1272_1283dup']['alt_genomic_loci'], []) - assert results['NR_047551.1:n.1272_1283dup']['gene_symbol'] == 'EGFR-AS1' - assert results['NR_047551.1:n.1272_1283dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_047551.1:n.1272_1283dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' - assert results['NR_047551.1:n.1272_1283dup']['genome_context_intronic_sequence'] == '' - assert results['NR_047551.1:n.1272_1283dup']['hgvs_lrg_variant'] == '' - assert results['NR_047551.1:n.1272_1283dup']['hgvs_transcript_variant'] == 'NR_047551.1:n.1272_1283dup' - assert results['NR_047551.1:n.1272_1283dup']['hgvs_refseqgene_variant'] == '' - assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NR_047551.1:n.1272_1283dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NR_047551.1:n.1272_1283dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_047551.1'} + assert 'NM_005228.4:c.2284-5_2290dup' in list(results.keys()) + assert results['NM_005228.4:c.2284-5_2290dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NM_005228.4:c.2284-5_2290dup']['gene_symbol'] == 'EGFR' + assert results['NM_005228.4:c.2284-5_2290dup']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} + assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_transcript_variant'] == 'NM_005228.4:c.2284-5_2290dup' + assert results['NM_005228.4:c.2284-5_2290dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_005228.4):c.2284-5_2290dup' + assert results['NM_005228.4:c.2284-5_2290dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005219.2(LRG_304p1):p.?', 'slr': 'NP_005219.2:p.?'} + assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005228.4:c.2284-5_2290dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_005228.4:c.2284-5_2290dup']['alt_genomic_loci'], []) + assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.4:c.2284-5_2290dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_005228.4:c.2284-5_2290dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005228.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005219.2'} assert 'NM_001346897.1:c.2149-5_2155dup' in list(results.keys()) - assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001346897.1:c.2149-5_2155dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001346897.1:c.2149-5_2155dup']['alt_genomic_loci'], []) - assert results['NM_001346897.1:c.2149-5_2155dup']['gene_symbol'] == 'EGFR' - assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333826.1:p.?', 'slr': 'NP_001333826.1:p.?'} assert results['NM_001346897.1:c.2149-5_2155dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' - assert results['NM_001346897.1:c.2149-5_2155dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346897.1):c.2149-5_2155dup' - assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_lrg_variant'] == '' + assert results['NM_001346897.1:c.2149-5_2155dup']['gene_symbol'] == 'EGFR' + assert results['NM_001346897.1:c.2149-5_2155dup']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_transcript_variant'] == 'NM_001346897.1:c.2149-5_2155dup' + assert results['NM_001346897.1:c.2149-5_2155dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346897.1):c.2149-5_2155dup' + assert results['NM_001346897.1:c.2149-5_2155dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55248980', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '55181287', 'alt': 'CTCCAGGAAGCCT'}} - assert results['NM_001346897.1:c.2149-5_2155dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333826.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346897.1'} + assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333826.1:p.?', 'slr': 'NP_001333826.1:p.?'} + assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346897.1:c.2149-5_2155dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001346897.1:c.2149-5_2155dup']['alt_genomic_loci'], []) + assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346897.1:c.2149-5_2155dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346897.1:c.2149-5_2155dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346897.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333826.1'} + assert 'NM_001346941.1:c.1483-5_1489dup' in list(results.keys()) + assert results['NM_001346941.1:c.1483-5_1489dup']['submitted_variant'] == '7-55248992-T-TTCCAGGAAGCCT' + assert results['NM_001346941.1:c.1483-5_1489dup']['gene_symbol'] == 'EGFR' + assert results['NM_001346941.1:c.1483-5_1489dup']['gene_ids'] == {'hgnc_id': 'HGNC:3236', 'entrez_gene_id': '1956', 'ucsc_id': 'uc003tqk.4', 'omim_id': ['131550']} + assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_transcript_variant'] == 'NM_001346941.1:c.1483-5_1489dup' + assert results['NM_001346941.1:c.1483-5_1489dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_001346941.1):c.1483-5_1489dup' + assert results['NM_001346941.1:c.1483-5_1489dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001333870.1:p.?', 'slr': 'NP_001333870.1:p.?'} + assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001346941.1:c.1483-5_1489dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001346941.1:c.1483-5_1489dup']['alt_genomic_loci'], []) + assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': 'chr7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': 'chr7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.55248981_55248992dup', 'vcf': {'chr': '7', 'pos': '55248980', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346941.1:c.1483-5_1489dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.55181288_55181299dup', 'vcf': {'chr': '7', 'pos': '55181287', 'ref': 'C', 'alt': 'CTCCAGGAAGCCT'}} + assert results['NM_001346941.1:c.1483-5_1489dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001346941.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001333870.1'} def test_variant299(self): variant = '7-75932111-C-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001540.4:c.82C>A' in list(results.keys()) - assert results['NM_001540.4:c.82C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001540.4:c.82C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001540.4:c.82C>A']['alt_genomic_loci'], []) - assert results['NM_001540.4:c.82C>A']['gene_symbol'] == 'HSPB1' - assert results['NM_001540.4:c.82C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001531.1(LRG_248p1):p.(Leu28Ile)', 'slr': 'NP_001531.1:p.(L28I)'} - assert results['NM_001540.4:c.82C>A']['submitted_variant'] == '7-75932111-C-A' - assert results['NM_001540.4:c.82C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001540.4:c.82C>A']['hgvs_lrg_variant'] == '' - assert results['NM_001540.4:c.82C>A']['hgvs_transcript_variant'] == 'NM_001540.4:c.82C>A' - assert results['NM_001540.4:c.82C>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '75932111', 'alt': 'A'}} - assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '76302794', 'alt': 'A'}} - assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '75932111', 'alt': 'A'}} - assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '76302794', 'alt': 'A'}} - assert results['NM_001540.4:c.82C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001531.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001540.4'} - assert results['flag'] == 'gene_variant' assert 'NM_001540.3:c.82C>A' in list(results.keys()) - assert results['NM_001540.3:c.82C>A']['hgvs_lrg_transcript_variant'] == 'LRG_248t1:c.82C>A' - assert results['NM_001540.3:c.82C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001540.3:c.82C>A']['alt_genomic_loci'], []) - assert results['NM_001540.3:c.82C>A']['gene_symbol'] == 'HSPB1' - assert results['NM_001540.3:c.82C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001531.1(LRG_248p1):p.(Leu28Ile)', 'slr': 'NP_001531.1:p.(L28I)'} assert results['NM_001540.3:c.82C>A']['submitted_variant'] == '7-75932111-C-A' - assert results['NM_001540.3:c.82C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001540.3:c.82C>A']['hgvs_lrg_variant'] == 'LRG_248:g.5237C>A' + assert results['NM_001540.3:c.82C>A']['gene_symbol'] == 'HSPB1' + assert results['NM_001540.3:c.82C>A']['gene_ids'] == {'hgnc_id': 'HGNC:5246', 'entrez_gene_id': '3315', 'ucsc_id': 'uc003uew.4', 'omim_id': ['602195']} assert results['NM_001540.3:c.82C>A']['hgvs_transcript_variant'] == 'NM_001540.3:c.82C>A' + assert results['NM_001540.3:c.82C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001540.3:c.82C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001540.3:c.82C>A']['hgvs_refseqgene_variant'] == 'NG_008995.1:g.5237C>A' - assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '75932111', 'alt': 'A'}} - assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '76302794', 'alt': 'A'}} - assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '75932111', 'alt': 'A'}} - assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '76302794', 'alt': 'A'}} - assert results['NM_001540.3:c.82C>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008995.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001531.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001540.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_248.xml'} + assert results['NM_001540.3:c.82C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001531.1(LRG_248p1):p.(Leu28Ile)', 'slr': 'NP_001531.1:p.(L28I)'} + assert results['NM_001540.3:c.82C>A']['hgvs_lrg_transcript_variant'] == 'LRG_248t1:c.82C>A' + assert results['NM_001540.3:c.82C>A']['hgvs_lrg_variant'] == 'LRG_248:g.5237C>A' + self.assertCountEqual(results['NM_001540.3:c.82C>A']['alt_genomic_loci'], []) + assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': 'chr7', 'pos': '75932111', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': 'chr7', 'pos': '76302794', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': '7', 'pos': '75932111', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001540.3:c.82C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': '7', 'pos': '76302794', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001540.3:c.82C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001540.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001531.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008995.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_248.xml'} + assert 'NM_001540.4:c.82C>A' in list(results.keys()) + assert results['NM_001540.4:c.82C>A']['submitted_variant'] == '7-75932111-C-A' + assert results['NM_001540.4:c.82C>A']['gene_symbol'] == 'HSPB1' + assert results['NM_001540.4:c.82C>A']['gene_ids'] == {'hgnc_id': 'HGNC:5246', 'entrez_gene_id': '3315', 'ucsc_id': 'uc003uew.4', 'omim_id': ['602195']} + assert results['NM_001540.4:c.82C>A']['hgvs_transcript_variant'] == 'NM_001540.4:c.82C>A' + assert results['NM_001540.4:c.82C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001540.4:c.82C>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001540.4:c.82C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001540.4:c.82C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001531.1(LRG_248p1):p.(Leu28Ile)', 'slr': 'NP_001531.1:p.(L28I)'} + assert results['NM_001540.4:c.82C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001540.4:c.82C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001540.4:c.82C>A']['alt_genomic_loci'], []) + assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': 'chr7', 'pos': '75932111', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': 'chr7', 'pos': '76302794', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.75932111C>A', 'vcf': {'chr': '7', 'pos': '75932111', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001540.4:c.82C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.76302794C>A', 'vcf': {'chr': '7', 'pos': '76302794', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001540.4:c.82C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001540.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001531.1'} def test_variant300(self): variant = '7-91652178-A-AAAC' @@ -16521,39 +17070,40 @@ def test_variant300(self): assert results['flag'] == 'gene_variant' assert 'NM_005751.4:c.4004_4006dup' in list(results.keys()) - assert results['NM_005751.4:c.4004_4006dup']['hgvs_lrg_transcript_variant'] == 'LRG_331t1:c.4004_4006dup' - assert results['NM_005751.4:c.4004_4006dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005751.4:c.4004_4006dup']['alt_genomic_loci'], []) - assert results['NM_005751.4:c.4004_4006dup']['gene_symbol'] == 'AKAP9' - assert results['NM_005751.4:c.4004_4006dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005742.4(LRG_331p1):p.(Lys1335_Leu1336insGln)', 'slr': 'NP_005742.4:p.(K1335_L1336insQ)'} assert results['NM_005751.4:c.4004_4006dup']['submitted_variant'] == '7-91652178-A-AAAC' - assert results['NM_005751.4:c.4004_4006dup']['genome_context_intronic_sequence'] == '' - assert results['NM_005751.4:c.4004_4006dup']['hgvs_lrg_variant'] == 'LRG_331:g.86991_86993dup' + assert results['NM_005751.4:c.4004_4006dup']['gene_symbol'] == 'AKAP9' + assert results['NM_005751.4:c.4004_4006dup']['gene_ids'] == {'hgnc_id': 'HGNC:379', 'entrez_gene_id': '10142', 'ucsc_id': 'uc003ulg.4', 'omim_id': ['604001']} assert results['NM_005751.4:c.4004_4006dup']['hgvs_transcript_variant'] == 'NM_005751.4:c.4004_4006dup' + assert results['NM_005751.4:c.4004_4006dup']['genome_context_intronic_sequence'] == '' + assert results['NM_005751.4:c.4004_4006dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005751.4:c.4004_4006dup']['hgvs_refseqgene_variant'] == 'NG_011623.1:g.86991_86993dup' - assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '91652178', 'alt': 'AAAC'}} - assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '92022864', 'alt': 'AAAC'}} - assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '91652178', 'alt': 'AAAC'}} - assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '92022864', 'alt': 'AAAC'}} - assert results['NM_005751.4:c.4004_4006dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011623.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005742.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005751.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_331.xml'} + assert results['NM_005751.4:c.4004_4006dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005742.4(LRG_331p1):p.(Lys1335_Leu1336insGln)', 'slr': 'NP_005742.4:p.(K1335_L1336insQ)'} + assert results['NM_005751.4:c.4004_4006dup']['hgvs_lrg_transcript_variant'] == 'LRG_331t1:c.4004_4006dup' + assert results['NM_005751.4:c.4004_4006dup']['hgvs_lrg_variant'] == 'LRG_331:g.86991_86993dup' + self.assertCountEqual(results['NM_005751.4:c.4004_4006dup']['alt_genomic_loci'], []) + assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': 'chr7', 'pos': '91652178', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': 'chr7', 'pos': '92022864', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': '7', 'pos': '91652178', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_005751.4:c.4004_4006dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': '7', 'pos': '92022864', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_005751.4:c.4004_4006dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005751.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005742.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011623.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_331.xml'} assert 'NM_147185.2:c.4004_4006dup' in list(results.keys()) - assert results['NM_147185.2:c.4004_4006dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_147185.2:c.4004_4006dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_147185.2:c.4004_4006dup']['alt_genomic_loci'], []) - assert results['NM_147185.2:c.4004_4006dup']['gene_symbol'] == 'AKAP9' - assert results['NM_147185.2:c.4004_4006dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_671714.1:p.(Lys1335_Leu1336insGln)', 'slr': 'NP_671714.1:p.(K1335_L1336insQ)'} assert results['NM_147185.2:c.4004_4006dup']['submitted_variant'] == '7-91652178-A-AAAC' - assert results['NM_147185.2:c.4004_4006dup']['genome_context_intronic_sequence'] == '' - assert results['NM_147185.2:c.4004_4006dup']['hgvs_lrg_variant'] == '' + assert results['NM_147185.2:c.4004_4006dup']['gene_symbol'] == 'AKAP9' + assert results['NM_147185.2:c.4004_4006dup']['gene_ids'] == {'hgnc_id': 'HGNC:379', 'entrez_gene_id': '10142', 'ucsc_id': 'uc003ulg.4', 'omim_id': ['604001']} assert results['NM_147185.2:c.4004_4006dup']['hgvs_transcript_variant'] == 'NM_147185.2:c.4004_4006dup' + assert results['NM_147185.2:c.4004_4006dup']['genome_context_intronic_sequence'] == '' + assert results['NM_147185.2:c.4004_4006dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_147185.2:c.4004_4006dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '91652178', 'alt': 'AAAC'}} - assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '92022864', 'alt': 'AAAC'}} - assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '91652178', 'alt': 'AAAC'}} - assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '92022864', 'alt': 'AAAC'}} - assert results['NM_147185.2:c.4004_4006dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_671714.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_147185.2'} - + assert results['NM_147185.2:c.4004_4006dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_671714.1:p.(Lys1335_Leu1336insGln)', 'slr': 'NP_671714.1:p.(K1335_L1336insQ)'} + assert results['NM_147185.2:c.4004_4006dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_147185.2:c.4004_4006dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_147185.2:c.4004_4006dup']['alt_genomic_loci'], []) + assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': 'chr7', 'pos': '91652178', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': 'chr7', 'pos': '92022864', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.91652179_91652181dup', 'vcf': {'chr': '7', 'pos': '91652178', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_147185.2:c.4004_4006dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.92022865_92022867dup', 'vcf': {'chr': '7', 'pos': '92022864', 'ref': 'A', 'alt': 'AAAC'}} + assert results['NM_147185.2:c.4004_4006dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_147185.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_671714.1'} def test_variant301(self): variant = '7-117199644-ATCT-A' @@ -16561,265 +17111,275 @@ def test_variant301(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NR_149084.1:n.221+1140_221+1142del' in list(results.keys()) - assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_149084.1:n.221+1140_221+1142del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_149084.1:n.221+1140_221+1142del']['alt_genomic_loci'], []) - assert results['NR_149084.1:n.221+1140_221+1142del']['gene_symbol'] == 'CFTR-AS1' - assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_149084.1:n.221+1140_221+1142del']['submitted_variant'] == '7-117199644-ATCT-A' - assert results['NR_149084.1:n.221+1140_221+1142del']['genome_context_intronic_sequence'] == 'NC_000007.13(NR_149084.1):c.221+1140_221+1142del' - assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_lrg_variant'] == '' - assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_transcript_variant'] == 'NR_149084.1:n.221+1140_221+1142del' - assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_refseqgene_variant'] == '' - assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} - assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} - assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} - assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} - assert results['NR_149084.1:n.221+1140_221+1142del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_149084.1'} - assert 'NM_000492.3:c.1521_1523del' in list(results.keys()) - assert results['NM_000492.3:c.1521_1523del']['hgvs_lrg_transcript_variant'] == 'LRG_663t1:c.1521_1523del' - assert results['NM_000492.3:c.1521_1523del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000492.3:c.1521_1523del']['alt_genomic_loci'], []) - assert results['NM_000492.3:c.1521_1523del']['gene_symbol'] == 'CFTR' - assert results['NM_000492.3:c.1521_1523del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000483.3(LRG_663p1):p.(Phe508del)', 'slr': 'NP_000483.3:p.(F508del)'} assert results['NM_000492.3:c.1521_1523del']['submitted_variant'] == '7-117199644-ATCT-A' - assert results['NM_000492.3:c.1521_1523del']['genome_context_intronic_sequence'] == '' - assert results['NM_000492.3:c.1521_1523del']['hgvs_lrg_variant'] == '' + assert results['NM_000492.3:c.1521_1523del']['gene_symbol'] == 'CFTR' + assert results['NM_000492.3:c.1521_1523del']['gene_ids'] == {'hgnc_id': 'HGNC:1884', 'entrez_gene_id': '1080', 'ucsc_id': 'uc003vjd.4', 'omim_id': ['602421']} assert results['NM_000492.3:c.1521_1523del']['hgvs_transcript_variant'] == 'NM_000492.3:c.1521_1523del' + assert results['NM_000492.3:c.1521_1523del']['genome_context_intronic_sequence'] == '' + assert results['NM_000492.3:c.1521_1523del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000492.3:c.1521_1523del']['hgvs_refseqgene_variant'] == 'NG_016465.3:g.98809_98811del' - assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} - assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': 'chr7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} - assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117199644', 'alt': 'A'}} - assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': '7', 'ref': 'ATCT', 'pos': '117559590', 'alt': 'A'}} - assert results['NM_000492.3:c.1521_1523del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_016465.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000483.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000492.3'} - - - def test_variant302(self): - variant = '7-140453136-AC-CT' - results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() - print(results) - - assert 'NR_148928.1:n.2896_2897delinsAG' in list(results.keys()) - assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_148928.1:n.2896_2897delinsAG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_148928.1:n.2896_2897delinsAG']['alt_genomic_loci'], []) - assert results['NR_148928.1:n.2896_2897delinsAG']['gene_symbol'] == 'BRAF' - assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_148928.1:n.2896_2897delinsAG']['submitted_variant'] == '7-140453136-AC-CT' - assert results['NR_148928.1:n.2896_2897delinsAG']['genome_context_intronic_sequence'] == '' - assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_lrg_variant'] == '' - assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_transcript_variant'] == 'NR_148928.1:n.2896_2897delinsAG' - assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_refseqgene_variant'] == '' - assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} - assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} - assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} - assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} - assert results['NR_148928.1:n.2896_2897delinsAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} + assert results['NM_000492.3:c.1521_1523del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000483.3(LRG_663p1):p.(Phe508del)', 'slr': 'NP_000483.3:p.(F508del)'} + assert results['NM_000492.3:c.1521_1523del']['hgvs_lrg_transcript_variant'] == 'LRG_663t1:c.1521_1523del' + assert results['NM_000492.3:c.1521_1523del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000492.3:c.1521_1523del']['alt_genomic_loci'], []) + assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': 'chr7', 'pos': '117199644', 'ref': 'ATCT', 'alt': 'A'}} + assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': 'chr7', 'pos': '117559590', 'ref': 'ATCT', 'alt': 'A'}} + assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': '7', 'pos': '117199644', 'ref': 'ATCT', 'alt': 'A'}} + assert results['NM_000492.3:c.1521_1523del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': '7', 'pos': '117559590', 'ref': 'ATCT', 'alt': 'A'}} + assert results['NM_000492.3:c.1521_1523del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000492.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000483.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_016465.3'} - assert 'NM_004333.4:c.1798_1799delinsAG' in list(results.keys()) - assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1798_1799delinsAG' - assert results['NM_004333.4:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004333.4:c.1798_1799delinsAG']['alt_genomic_loci'], []) - assert results['NM_004333.4:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' - assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Arg)', 'slr': 'NP_004324.2:p.(V600R)'} - assert results['NM_004333.4:c.1798_1799delinsAG']['submitted_variant'] == '7-140453136-AC-CT' - assert results['NM_004333.4:c.1798_1799delinsAG']['genome_context_intronic_sequence'] == '' - assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' - assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_transcript_variant'] == 'NM_004333.4:c.1798_1799delinsAG' - assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_refseqgene_variant'] == 'NG_007873.2:g.176428_176429delinsAG' - assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} - assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} - assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} - assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} - assert results['NM_004333.4:c.1798_1799delinsAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4'} + assert 'NR_149084.1:n.221+1140_221+1142del' in list(results.keys()) + assert results['NR_149084.1:n.221+1140_221+1142del']['submitted_variant'] == '7-117199644-ATCT-A' + assert results['NR_149084.1:n.221+1140_221+1142del']['gene_symbol'] == 'CFTR-AS1' + assert results['NR_149084.1:n.221+1140_221+1142del']['gene_ids'] == {'hgnc_id': 'HGNC:40144', 'entrez_gene_id': '111082987', 'ucsc_id': '', 'omim_id': []} + assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_transcript_variant'] == 'NR_149084.1:n.221+1140_221+1142del' + assert results['NR_149084.1:n.221+1140_221+1142del']['genome_context_intronic_sequence'] == 'NC_000007.13(NR_149084.1):c.221+1140_221+1142del' + assert results['NR_149084.1:n.221+1140_221+1142del']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_refseqgene_variant'] == '' + assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_149084.1:n.221+1140_221+1142del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_149084.1:n.221+1140_221+1142del']['alt_genomic_loci'], []) + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': 'chr7', 'pos': '117199644', 'ref': 'ATCT', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': 'chr7', 'pos': '117559590', 'ref': 'ATCT', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.117199646_117199648del', 'vcf': {'chr': '7', 'pos': '117199644', 'ref': 'ATCT', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117559592_117559594del', 'vcf': {'chr': '7', 'pos': '117559590', 'ref': 'ATCT', 'alt': 'A'}} + assert results['NR_149084.1:n.221+1140_221+1142del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_149084.1'} - assert 'NM_004333.5:c.1798_1799delinsAG' in list(results.keys()) - assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_004333.5:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004333.5:c.1798_1799delinsAG']['alt_genomic_loci'], []) - assert results['NM_004333.5:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' - assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Arg)', 'slr': 'NP_004324.2:p.(V600R)'} - assert results['NM_004333.5:c.1798_1799delinsAG']['submitted_variant'] == '7-140453136-AC-CT' - assert results['NM_004333.5:c.1798_1799delinsAG']['genome_context_intronic_sequence'] == '' - assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' - assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_transcript_variant'] == 'NM_004333.5:c.1798_1799delinsAG' - assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} - assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} - assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} - assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} - assert results['NM_004333.5:c.1798_1799delinsAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5'} + def test_variant302(self): + variant = '7-140453136-AC-CT' + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() + print(results) assert results['flag'] == 'gene_variant' assert 'NM_001354609.1:c.1798_1799delinsAG' in list(results.keys()) - assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001354609.1:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001354609.1:c.1798_1799delinsAG']['alt_genomic_loci'], []) - assert results['NM_001354609.1:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' - assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Arg)', 'slr': 'NP_001341538.1:p.(V600R)'} assert results['NM_001354609.1:c.1798_1799delinsAG']['submitted_variant'] == '7-140453136-AC-CT' - assert results['NM_001354609.1:c.1798_1799delinsAG']['genome_context_intronic_sequence'] == '' - assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' + assert results['NM_001354609.1:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' + assert results['NM_001354609.1:c.1798_1799delinsAG']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_transcript_variant'] == 'NM_001354609.1:c.1798_1799delinsAG' + assert results['NM_001354609.1:c.1798_1799delinsAG']['genome_context_intronic_sequence'] == '' + assert results['NM_001354609.1:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} - assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} - assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140453136', 'alt': 'CT'}} - assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'ref': 'AC', 'pos': '140753336', 'alt': 'CT'}} - assert results['NM_001354609.1:c.1798_1799delinsAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1'} + assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Arg)', 'slr': 'NP_001341538.1:p.(V600R)'} + assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354609.1:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001354609.1:c.1798_1799delinsAG']['alt_genomic_loci'], []) + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'pos': '140453136', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'pos': '140753336', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'pos': '140453136', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'pos': '140753336', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_001354609.1:c.1798_1799delinsAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1'} + + assert 'NM_004333.5:c.1798_1799delinsAG' in list(results.keys()) + assert results['NM_004333.5:c.1798_1799delinsAG']['submitted_variant'] == '7-140453136-AC-CT' + assert results['NM_004333.5:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' + assert results['NM_004333.5:c.1798_1799delinsAG']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} + assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_transcript_variant'] == 'NM_004333.5:c.1798_1799delinsAG' + assert results['NM_004333.5:c.1798_1799delinsAG']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.5:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_refseqgene_variant'] == '' + assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Arg)', 'slr': 'NP_004324.2:p.(V600R)'} + assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004333.5:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004333.5:c.1798_1799delinsAG']['alt_genomic_loci'], []) + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'pos': '140453136', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'pos': '140753336', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'pos': '140453136', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'pos': '140753336', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_004333.5:c.1798_1799delinsAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2'} + + assert 'NM_004333.4:c.1798_1799delinsAG' in list(results.keys()) + assert results['NM_004333.4:c.1798_1799delinsAG']['submitted_variant'] == '7-140453136-AC-CT' + assert results['NM_004333.4:c.1798_1799delinsAG']['gene_symbol'] == 'BRAF' + assert results['NM_004333.4:c.1798_1799delinsAG']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} + assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_transcript_variant'] == 'NM_004333.4:c.1798_1799delinsAG' + assert results['NM_004333.4:c.1798_1799delinsAG']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.4:c.1798_1799delinsAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_refseqgene_variant'] == 'NG_007873.2:g.176428_176429delinsAG' + assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Arg)', 'slr': 'NP_004324.2:p.(V600R)'} + assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1798_1799delinsAG' + assert results['NM_004333.4:c.1798_1799delinsAG']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004333.4:c.1798_1799delinsAG']['alt_genomic_loci'], []) + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'pos': '140453136', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'pos': '140753336', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'pos': '140453136', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'pos': '140753336', 'ref': 'AC', 'alt': 'CT'}} + assert results['NM_004333.4:c.1798_1799delinsAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2'} + assert 'NR_148928.1:n.2896_2897delinsAG' in list(results.keys()) + assert results['NR_148928.1:n.2896_2897delinsAG']['submitted_variant'] == '7-140453136-AC-CT' + assert results['NR_148928.1:n.2896_2897delinsAG']['gene_symbol'] == 'BRAF' + assert results['NR_148928.1:n.2896_2897delinsAG']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} + assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_transcript_variant'] == 'NR_148928.1:n.2896_2897delinsAG' + assert results['NR_148928.1:n.2896_2897delinsAG']['genome_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2896_2897delinsAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_refseqgene_variant'] == '' + assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_148928.1:n.2896_2897delinsAG']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_148928.1:n.2896_2897delinsAG']['alt_genomic_loci'], []) + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': 'chr7', 'pos': '140453136', 'ref': 'AC', 'alt': 'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': 'chr7', 'pos': '140753336', 'ref': 'AC', 'alt': 'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136_140453137delinsCT', 'vcf': {'chr': '7', 'pos': '140453136', 'ref': 'AC', 'alt': 'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336_140753337delinsCT', 'vcf': {'chr': '7', 'pos': '140753336', 'ref': 'AC', 'alt': 'CT'}} + assert results['NR_148928.1:n.2896_2897delinsAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} def test_variant303(self): variant = '7-140453136-A-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_001354609.1:c.1799T>A' in list(results.keys()) - assert results['NM_001354609.1:c.1799T>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001354609.1:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001354609.1:c.1799T>A']['alt_genomic_loci'], []) - assert results['NM_001354609.1:c.1799T>A']['gene_symbol'] == 'BRAF' - assert results['NM_001354609.1:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Glu)', 'slr': 'NP_001341538.1:p.(V600E)'} assert results['NM_001354609.1:c.1799T>A']['submitted_variant'] == '7-140453136-A-T' - assert results['NM_001354609.1:c.1799T>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001354609.1:c.1799T>A']['hgvs_lrg_variant'] == '' + assert results['NM_001354609.1:c.1799T>A']['gene_symbol'] == 'BRAF' + assert results['NM_001354609.1:c.1799T>A']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} assert results['NM_001354609.1:c.1799T>A']['hgvs_transcript_variant'] == 'NM_001354609.1:c.1799T>A' + assert results['NM_001354609.1:c.1799T>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001354609.1:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001354609.1:c.1799T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} - assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} - assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} - assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} - assert results['NM_001354609.1:c.1799T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1'} - - assert 'NR_148928.1:n.2897T>A' in list(results.keys()) - assert results['NR_148928.1:n.2897T>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_148928.1:n.2897T>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_148928.1:n.2897T>A']['alt_genomic_loci'], []) - assert results['NR_148928.1:n.2897T>A']['gene_symbol'] == 'BRAF' - assert results['NR_148928.1:n.2897T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_148928.1:n.2897T>A']['submitted_variant'] == '7-140453136-A-T' - assert results['NR_148928.1:n.2897T>A']['genome_context_intronic_sequence'] == '' - assert results['NR_148928.1:n.2897T>A']['hgvs_lrg_variant'] == '' - assert results['NR_148928.1:n.2897T>A']['hgvs_transcript_variant'] == 'NR_148928.1:n.2897T>A' - assert results['NR_148928.1:n.2897T>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} - assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} - assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} - assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} - assert results['NR_148928.1:n.2897T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} + assert results['NM_001354609.1:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Glu)', 'slr': 'NP_001341538.1:p.(V600E)'} + assert results['NM_001354609.1:c.1799T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354609.1:c.1799T>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001354609.1:c.1799T>A']['alt_genomic_loci'], []) + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'pos': '140453136', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'pos': '140753336', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'pos': '140453136', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001354609.1:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'pos': '140753336', 'ref': 'A', 'alt': 'T'}} + assert results['NM_001354609.1:c.1799T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1'} assert 'NM_004333.5:c.1799T>A' in list(results.keys()) - assert results['NM_004333.5:c.1799T>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_004333.5:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004333.5:c.1799T>A']['alt_genomic_loci'], []) - assert results['NM_004333.5:c.1799T>A']['gene_symbol'] == 'BRAF' - assert results['NM_004333.5:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Glu)', 'slr': 'NP_004324.2:p.(V600E)'} assert results['NM_004333.5:c.1799T>A']['submitted_variant'] == '7-140453136-A-T' - assert results['NM_004333.5:c.1799T>A']['genome_context_intronic_sequence'] == '' - assert results['NM_004333.5:c.1799T>A']['hgvs_lrg_variant'] == '' + assert results['NM_004333.5:c.1799T>A']['gene_symbol'] == 'BRAF' + assert results['NM_004333.5:c.1799T>A']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} assert results['NM_004333.5:c.1799T>A']['hgvs_transcript_variant'] == 'NM_004333.5:c.1799T>A' + assert results['NM_004333.5:c.1799T>A']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.5:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004333.5:c.1799T>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} - assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} - assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} - assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} - assert results['NM_004333.5:c.1799T>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5'} + assert results['NM_004333.5:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Glu)', 'slr': 'NP_004324.2:p.(V600E)'} + assert results['NM_004333.5:c.1799T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004333.5:c.1799T>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004333.5:c.1799T>A']['alt_genomic_loci'], []) + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'pos': '140453136', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'pos': '140753336', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'pos': '140453136', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004333.5:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'pos': '140753336', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004333.5:c.1799T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2'} - assert results['flag'] == 'gene_variant' assert 'NM_004333.4:c.1799T>A' in list(results.keys()) - assert results['NM_004333.4:c.1799T>A']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1799T>A' - assert results['NM_004333.4:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004333.4:c.1799T>A']['alt_genomic_loci'], []) - assert results['NM_004333.4:c.1799T>A']['gene_symbol'] == 'BRAF' - assert results['NM_004333.4:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Glu)', 'slr': 'NP_004324.2:p.(V600E)'} assert results['NM_004333.4:c.1799T>A']['submitted_variant'] == '7-140453136-A-T' - assert results['NM_004333.4:c.1799T>A']['genome_context_intronic_sequence'] == '' - assert results['NM_004333.4:c.1799T>A']['hgvs_lrg_variant'] == '' + assert results['NM_004333.4:c.1799T>A']['gene_symbol'] == 'BRAF' + assert results['NM_004333.4:c.1799T>A']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} assert results['NM_004333.4:c.1799T>A']['hgvs_transcript_variant'] == 'NM_004333.4:c.1799T>A' + assert results['NM_004333.4:c.1799T>A']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.4:c.1799T>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004333.4:c.1799T>A']['hgvs_refseqgene_variant'] == 'NG_007873.2:g.176429T>A' - assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} - assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} - assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140453136', 'alt': 'T'}} - assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '140753336', 'alt': 'T'}} - assert results['NM_004333.4:c.1799T>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4'} + assert results['NM_004333.4:c.1799T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Glu)', 'slr': 'NP_004324.2:p.(V600E)'} + assert results['NM_004333.4:c.1799T>A']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1799T>A' + assert results['NM_004333.4:c.1799T>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004333.4:c.1799T>A']['alt_genomic_loci'], []) + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'pos': '140453136', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'pos': '140753336', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'pos': '140453136', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004333.4:c.1799T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'pos': '140753336', 'ref': 'A', 'alt': 'T'}} + assert results['NM_004333.4:c.1799T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2'} + assert 'NR_148928.1:n.2897T>A' in list(results.keys()) + assert results['NR_148928.1:n.2897T>A']['submitted_variant'] == '7-140453136-A-T' + assert results['NR_148928.1:n.2897T>A']['gene_symbol'] == 'BRAF' + assert results['NR_148928.1:n.2897T>A']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} + assert results['NR_148928.1:n.2897T>A']['hgvs_transcript_variant'] == 'NR_148928.1:n.2897T>A' + assert results['NR_148928.1:n.2897T>A']['genome_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2897T>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2897T>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_148928.1:n.2897T>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_148928.1:n.2897T>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_148928.1:n.2897T>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_148928.1:n.2897T>A']['alt_genomic_loci'], []) + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': 'chr7', 'pos': '140453136', 'ref': 'A', 'alt': 'T'}} + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': 'chr7', 'pos': '140753336', 'ref': 'A', 'alt': 'T'}} + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453136A>T', 'vcf': {'chr': '7', 'pos': '140453136', 'ref': 'A', 'alt': 'T'}} + assert results['NR_148928.1:n.2897T>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753336A>T', 'vcf': {'chr': '7', 'pos': '140753336', 'ref': 'A', 'alt': 'T'}} + assert results['NR_148928.1:n.2897T>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} def test_variant304(self): variant = '7-140453137-C-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NR_148928.1:n.2896G>A' in list(results.keys()) - assert results['NR_148928.1:n.2896G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_148928.1:n.2896G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_148928.1:n.2896G>A']['alt_genomic_loci'], []) - assert results['NR_148928.1:n.2896G>A']['gene_symbol'] == 'BRAF' - assert results['NR_148928.1:n.2896G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_148928.1:n.2896G>A']['submitted_variant'] == '7-140453137-C-T' - assert results['NR_148928.1:n.2896G>A']['genome_context_intronic_sequence'] == '' - assert results['NR_148928.1:n.2896G>A']['hgvs_lrg_variant'] == '' - assert results['NR_148928.1:n.2896G>A']['hgvs_transcript_variant'] == 'NR_148928.1:n.2896G>A' - assert results['NR_148928.1:n.2896G>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} - assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} - assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} - assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} - assert results['NR_148928.1:n.2896G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} + assert results['flag'] == 'gene_variant' + assert 'NM_001354609.1:c.1798G>A' in list(results.keys()) + assert results['NM_001354609.1:c.1798G>A']['submitted_variant'] == '7-140453137-C-T' + assert results['NM_001354609.1:c.1798G>A']['gene_symbol'] == 'BRAF' + assert results['NM_001354609.1:c.1798G>A']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} + assert results['NM_001354609.1:c.1798G>A']['hgvs_transcript_variant'] == 'NM_001354609.1:c.1798G>A' + assert results['NM_001354609.1:c.1798G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_001354609.1:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001354609.1:c.1798G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_001354609.1:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Met)', 'slr': 'NP_001341538.1:p.(V600M)'} + assert results['NM_001354609.1:c.1798G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001354609.1:c.1798G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001354609.1:c.1798G>A']['alt_genomic_loci'], []) + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'pos': '140453137', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'pos': '140753337', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'pos': '140453137', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'pos': '140753337', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001354609.1:c.1798G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1'} assert 'NM_004333.5:c.1798G>A' in list(results.keys()) - assert results['NM_004333.5:c.1798G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_004333.5:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004333.5:c.1798G>A']['alt_genomic_loci'], []) - assert results['NM_004333.5:c.1798G>A']['gene_symbol'] == 'BRAF' - assert results['NM_004333.5:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Met)', 'slr': 'NP_004324.2:p.(V600M)'} assert results['NM_004333.5:c.1798G>A']['submitted_variant'] == '7-140453137-C-T' - assert results['NM_004333.5:c.1798G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_004333.5:c.1798G>A']['hgvs_lrg_variant'] == '' + assert results['NM_004333.5:c.1798G>A']['gene_symbol'] == 'BRAF' + assert results['NM_004333.5:c.1798G>A']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} assert results['NM_004333.5:c.1798G>A']['hgvs_transcript_variant'] == 'NM_004333.5:c.1798G>A' + assert results['NM_004333.5:c.1798G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.5:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004333.5:c.1798G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} - assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} - assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} - assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} - assert results['NM_004333.5:c.1798G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5'} + assert results['NM_004333.5:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Met)', 'slr': 'NP_004324.2:p.(V600M)'} + assert results['NM_004333.5:c.1798G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_004333.5:c.1798G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004333.5:c.1798G>A']['alt_genomic_loci'], []) + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'pos': '140453137', 'ref': 'C', 'alt': 'T'}} + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'pos': '140753337', 'ref': 'C', 'alt': 'T'}} + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'pos': '140453137', 'ref': 'C', 'alt': 'T'}} + assert results['NM_004333.5:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'pos': '140753337', 'ref': 'C', 'alt': 'T'}} + assert results['NM_004333.5:c.1798G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2'} assert 'NM_004333.4:c.1798G>A' in list(results.keys()) - assert results['NM_004333.4:c.1798G>A']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1798G>A' - assert results['NM_004333.4:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004333.4:c.1798G>A']['alt_genomic_loci'], []) - assert results['NM_004333.4:c.1798G>A']['gene_symbol'] == 'BRAF' - assert results['NM_004333.4:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Met)', 'slr': 'NP_004324.2:p.(V600M)'} assert results['NM_004333.4:c.1798G>A']['submitted_variant'] == '7-140453137-C-T' - assert results['NM_004333.4:c.1798G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_004333.4:c.1798G>A']['hgvs_lrg_variant'] == '' + assert results['NM_004333.4:c.1798G>A']['gene_symbol'] == 'BRAF' + assert results['NM_004333.4:c.1798G>A']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} assert results['NM_004333.4:c.1798G>A']['hgvs_transcript_variant'] == 'NM_004333.4:c.1798G>A' + assert results['NM_004333.4:c.1798G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_004333.4:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004333.4:c.1798G>A']['hgvs_refseqgene_variant'] == 'NG_007873.2:g.176428G>A' - assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} - assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} - assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} - assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} - assert results['NM_004333.4:c.1798G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4'} - - assert 'NM_001354609.1:c.1798G>A' in list(results.keys()) - assert results['NM_001354609.1:c.1798G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001354609.1:c.1798G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001354609.1:c.1798G>A']['alt_genomic_loci'], []) - assert results['NM_001354609.1:c.1798G>A']['gene_symbol'] == 'BRAF' - assert results['NM_001354609.1:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001341538.1:p.(Val600Met)', 'slr': 'NP_001341538.1:p.(V600M)'} - assert results['NM_001354609.1:c.1798G>A']['submitted_variant'] == '7-140453137-C-T' - assert results['NM_001354609.1:c.1798G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_001354609.1:c.1798G>A']['hgvs_lrg_variant'] == '' - assert results['NM_001354609.1:c.1798G>A']['hgvs_transcript_variant'] == 'NM_001354609.1:c.1798G>A' - assert results['NM_001354609.1:c.1798G>A']['hgvs_refseqgene_variant'] == '' - assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} - assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} - assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140453137', 'alt': 'T'}} - assert results['NM_001354609.1:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '140753337', 'alt': 'T'}} - assert results['NM_001354609.1:c.1798G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001341538.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001354609.1'} + assert results['NM_004333.4:c.1798G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004324.2(LRG_299p1):p.(Val600Met)', 'slr': 'NP_004324.2:p.(V600M)'} + assert results['NM_004333.4:c.1798G>A']['hgvs_lrg_transcript_variant'] == 'LRG_299t1:c.1798G>A' + assert results['NM_004333.4:c.1798G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_004333.4:c.1798G>A']['alt_genomic_loci'], []) + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'pos': '140453137', 'ref': 'C', 'alt': 'T'}} + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'pos': '140753337', 'ref': 'C', 'alt': 'T'}} + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'pos': '140453137', 'ref': 'C', 'alt': 'T'}} + assert results['NM_004333.4:c.1798G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'pos': '140753337', 'ref': 'C', 'alt': 'T'}} + assert results['NM_004333.4:c.1798G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004333.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004324.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007873.2'} - assert results['flag'] == 'gene_variant' + assert 'NR_148928.1:n.2896G>A' in list(results.keys()) + assert results['NR_148928.1:n.2896G>A']['submitted_variant'] == '7-140453137-C-T' + assert results['NR_148928.1:n.2896G>A']['gene_symbol'] == 'BRAF' + assert results['NR_148928.1:n.2896G>A']['gene_ids'] == {'hgnc_id': 'HGNC:1097', 'entrez_gene_id': '673', 'ucsc_id': 'uc003vwc.5', 'omim_id': ['164757']} + assert results['NR_148928.1:n.2896G>A']['hgvs_transcript_variant'] == 'NR_148928.1:n.2896G>A' + assert results['NR_148928.1:n.2896G>A']['genome_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2896G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_148928.1:n.2896G>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_148928.1:n.2896G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_148928.1:n.2896G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_148928.1:n.2896G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_148928.1:n.2896G>A']['alt_genomic_loci'], []) + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': 'chr7', 'pos': '140453137', 'ref': 'C', 'alt': 'T'}} + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': 'chr7', 'pos': '140753337', 'ref': 'C', 'alt': 'T'}} + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.140453137C>T', 'vcf': {'chr': '7', 'pos': '140453137', 'ref': 'C', 'alt': 'T'}} + assert results['NR_148928.1:n.2896G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.140753337C>T', 'vcf': {'chr': '7', 'pos': '140753337', 'ref': 'C', 'alt': 'T'}} + assert results['NR_148928.1:n.2896G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_148928.1'} def test_variant305(self): variant = '7-143013488-A-T' @@ -16828,80 +17388,82 @@ def test_variant305(self): assert results['flag'] == 'gene_variant' assert 'NM_000083.2:c.180+3A>T' in list(results.keys()) - assert results['NM_000083.2:c.180+3A>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000083.2:c.180+3A>T']['refseqgene_context_intronic_sequence'] == 'NG_009815.1(NM_000083.2):c.180+3A>T' - self.assertCountEqual(results['NM_000083.2:c.180+3A>T']['alt_genomic_loci'], []) - assert results['NM_000083.2:c.180+3A>T']['gene_symbol'] == 'CLCN1' - assert results['NM_000083.2:c.180+3A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.?', 'slr': 'NP_000074.2:p.?'} assert results['NM_000083.2:c.180+3A>T']['submitted_variant'] == '7-143013488-A-T' - assert results['NM_000083.2:c.180+3A>T']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000083.2):c.180+3A>T' - assert results['NM_000083.2:c.180+3A>T']['hgvs_lrg_variant'] == '' + assert results['NM_000083.2:c.180+3A>T']['gene_symbol'] == 'CLCN1' + assert results['NM_000083.2:c.180+3A>T']['gene_ids'] == {'hgnc_id': 'HGNC:2019', 'entrez_gene_id': '1180', 'ucsc_id': 'uc003wcr.2', 'omim_id': ['118425']} assert results['NM_000083.2:c.180+3A>T']['hgvs_transcript_variant'] == 'NM_000083.2:c.180+3A>T' + assert results['NM_000083.2:c.180+3A>T']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000083.2):c.180+3A>T' + assert results['NM_000083.2:c.180+3A>T']['refseqgene_context_intronic_sequence'] == 'NG_009815.1(NM_000083.2):c.180+3A>T' assert results['NM_000083.2:c.180+3A>T']['hgvs_refseqgene_variant'] == 'NG_009815.1:g.5270A>T' - assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '143013488', 'alt': 'T'}} - assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '143316395', 'alt': 'T'}} - assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '143013488', 'alt': 'T'}} - assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '143316395', 'alt': 'T'}} - assert results['NM_000083.2:c.180+3A>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009815.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2'} + assert results['NM_000083.2:c.180+3A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.?', 'slr': 'NP_000074.2:p.?'} + assert results['NM_000083.2:c.180+3A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000083.2:c.180+3A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000083.2:c.180+3A>T']['alt_genomic_loci'], []) + assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': 'chr7', 'pos': '143013488', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': 'chr7', 'pos': '143316395', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': '7', 'pos': '143013488', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000083.2:c.180+3A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': '7', 'pos': '143316395', 'ref': 'A', 'alt': 'T'}} + assert results['NM_000083.2:c.180+3A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009815.1'} assert 'NR_046453.1:n.267+3A>T' in list(results.keys()) - assert results['NR_046453.1:n.267+3A>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_046453.1:n.267+3A>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_046453.1:n.267+3A>T']['alt_genomic_loci'], []) - assert results['NR_046453.1:n.267+3A>T']['gene_symbol'] == 'CLCN1' - assert results['NR_046453.1:n.267+3A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_046453.1:n.267+3A>T']['submitted_variant'] == '7-143013488-A-T' - assert results['NR_046453.1:n.267+3A>T']['genome_context_intronic_sequence'] == 'NC_000007.13(NR_046453.1):c.267+3A>T' - assert results['NR_046453.1:n.267+3A>T']['hgvs_lrg_variant'] == '' + assert results['NR_046453.1:n.267+3A>T']['gene_symbol'] == 'CLCN1' + assert results['NR_046453.1:n.267+3A>T']['gene_ids'] == {'hgnc_id': 'HGNC:2019', 'entrez_gene_id': '1180', 'ucsc_id': 'uc003wcr.2', 'omim_id': ['118425']} assert results['NR_046453.1:n.267+3A>T']['hgvs_transcript_variant'] == 'NR_046453.1:n.267+3A>T' + assert results['NR_046453.1:n.267+3A>T']['genome_context_intronic_sequence'] == 'NC_000007.13(NR_046453.1):c.267+3A>T' + assert results['NR_046453.1:n.267+3A>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_046453.1:n.267+3A>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '143013488', 'alt': 'T'}} - assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': 'chr7', 'ref': 'A', 'pos': '143316395', 'alt': 'T'}} - assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '143013488', 'alt': 'T'}} - assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': '7', 'ref': 'A', 'pos': '143316395', 'alt': 'T'}} - assert results['NR_046453.1:n.267+3A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1'} - - - def test_variant306(self): - variant = '7-143018934-G-A' - results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() - print(results) - - assert 'NR_046453.1:n.776G>A' in list(results.keys()) - assert results['NR_046453.1:n.776G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_046453.1:n.776G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_046453.1:n.776G>A']['alt_genomic_loci'], []) - assert results['NR_046453.1:n.776G>A']['gene_symbol'] == 'CLCN1' - assert results['NR_046453.1:n.776G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_046453.1:n.776G>A']['submitted_variant'] == '7-143018934-G-A' - assert results['NR_046453.1:n.776G>A']['genome_context_intronic_sequence'] == '' - assert results['NR_046453.1:n.776G>A']['hgvs_lrg_variant'] == '' - assert results['NR_046453.1:n.776G>A']['hgvs_transcript_variant'] == 'NR_046453.1:n.776G>A' - assert results['NR_046453.1:n.776G>A']['hgvs_refseqgene_variant'] == '' - assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '143018934', 'alt': 'A'}} - assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '143321841', 'alt': 'A'}} - assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '143018934', 'alt': 'A'}} - assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '143321841', 'alt': 'A'}} - assert results['NR_046453.1:n.776G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1'} + assert results['NR_046453.1:n.267+3A>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_046453.1:n.267+3A>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_046453.1:n.267+3A>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_046453.1:n.267+3A>T']['alt_genomic_loci'], []) + assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': 'chr7', 'pos': '143013488', 'ref': 'A', 'alt': 'T'}} + assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': 'chr7', 'pos': '143316395', 'ref': 'A', 'alt': 'T'}} + assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143013488A>T', 'vcf': {'chr': '7', 'pos': '143013488', 'ref': 'A', 'alt': 'T'}} + assert results['NR_046453.1:n.267+3A>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143316395A>T', 'vcf': {'chr': '7', 'pos': '143316395', 'ref': 'A', 'alt': 'T'}} + assert results['NR_046453.1:n.267+3A>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1'} + + def test_variant306(self): + variant = '7-143018934-G-A' + results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() + print(results) assert results['flag'] == 'gene_variant' assert 'NM_000083.2:c.689G>A' in list(results.keys()) - assert results['NM_000083.2:c.689G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000083.2:c.689G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000083.2:c.689G>A']['alt_genomic_loci'], []) - assert results['NM_000083.2:c.689G>A']['gene_symbol'] == 'CLCN1' - assert results['NM_000083.2:c.689G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.(Gly230Glu)', 'slr': 'NP_000074.2:p.(G230E)'} assert results['NM_000083.2:c.689G>A']['submitted_variant'] == '7-143018934-G-A' - assert results['NM_000083.2:c.689G>A']['genome_context_intronic_sequence'] == '' - assert results['NM_000083.2:c.689G>A']['hgvs_lrg_variant'] == '' + assert results['NM_000083.2:c.689G>A']['gene_symbol'] == 'CLCN1' + assert results['NM_000083.2:c.689G>A']['gene_ids'] == {'hgnc_id': 'HGNC:2019', 'entrez_gene_id': '1180', 'ucsc_id': 'uc003wcr.2', 'omim_id': ['118425']} assert results['NM_000083.2:c.689G>A']['hgvs_transcript_variant'] == 'NM_000083.2:c.689G>A' + assert results['NM_000083.2:c.689G>A']['genome_context_intronic_sequence'] == '' + assert results['NM_000083.2:c.689G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000083.2:c.689G>A']['hgvs_refseqgene_variant'] == 'NG_009815.1:g.10716G>A' - assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '143018934', 'alt': 'A'}} - assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '143321841', 'alt': 'A'}} - assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '143018934', 'alt': 'A'}} - assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '143321841', 'alt': 'A'}} - assert results['NM_000083.2:c.689G>A']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009815.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2'} + assert results['NM_000083.2:c.689G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.(Gly230Glu)', 'slr': 'NP_000074.2:p.(G230E)'} + assert results['NM_000083.2:c.689G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000083.2:c.689G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000083.2:c.689G>A']['alt_genomic_loci'], []) + assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': 'chr7', 'pos': '143018934', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': 'chr7', 'pos': '143321841', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': '7', 'pos': '143018934', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000083.2:c.689G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': '7', 'pos': '143321841', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000083.2:c.689G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009815.1'} + assert 'NR_046453.1:n.776G>A' in list(results.keys()) + assert results['NR_046453.1:n.776G>A']['submitted_variant'] == '7-143018934-G-A' + assert results['NR_046453.1:n.776G>A']['gene_symbol'] == 'CLCN1' + assert results['NR_046453.1:n.776G>A']['gene_ids'] == {'hgnc_id': 'HGNC:2019', 'entrez_gene_id': '1180', 'ucsc_id': 'uc003wcr.2', 'omim_id': ['118425']} + assert results['NR_046453.1:n.776G>A']['hgvs_transcript_variant'] == 'NR_046453.1:n.776G>A' + assert results['NR_046453.1:n.776G>A']['genome_context_intronic_sequence'] == '' + assert results['NR_046453.1:n.776G>A']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_046453.1:n.776G>A']['hgvs_refseqgene_variant'] == '' + assert results['NR_046453.1:n.776G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_046453.1:n.776G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_046453.1:n.776G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_046453.1:n.776G>A']['alt_genomic_loci'], []) + assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': 'chr7', 'pos': '143018934', 'ref': 'G', 'alt': 'A'}} + assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': 'chr7', 'pos': '143321841', 'ref': 'G', 'alt': 'A'}} + assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143018934G>A', 'vcf': {'chr': '7', 'pos': '143018934', 'ref': 'G', 'alt': 'A'}} + assert results['NR_046453.1:n.776G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143321841G>A', 'vcf': {'chr': '7', 'pos': '143321841', 'ref': 'G', 'alt': 'A'}} + assert results['NR_046453.1:n.776G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1'} def test_variant307(self): variant = '7-143048771-C-T' @@ -16909,432 +17471,449 @@ def test_variant307(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NR_046453.1:n.2620C>T' in list(results.keys()) - assert results['NR_046453.1:n.2620C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_046453.1:n.2620C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_046453.1:n.2620C>T']['alt_genomic_loci'], []) - assert results['NR_046453.1:n.2620C>T']['gene_symbol'] == 'CLCN1' - assert results['NR_046453.1:n.2620C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} - assert results['NR_046453.1:n.2620C>T']['submitted_variant'] == '7-143048771-C-T' - assert results['NR_046453.1:n.2620C>T']['genome_context_intronic_sequence'] == '' - assert results['NR_046453.1:n.2620C>T']['hgvs_lrg_variant'] == '' - assert results['NR_046453.1:n.2620C>T']['hgvs_transcript_variant'] == 'NR_046453.1:n.2620C>T' - assert results['NR_046453.1:n.2620C>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '143048771', 'alt': 'T'}} - assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '143351678', 'alt': 'T'}} - assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '143048771', 'alt': 'T'}} - assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '143351678', 'alt': 'T'}} - assert results['NR_046453.1:n.2620C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1'} - assert 'NM_000083.2:c.2680C>T' in list(results.keys()) - assert results['NM_000083.2:c.2680C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000083.2:c.2680C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000083.2:c.2680C>T']['alt_genomic_loci'], []) - assert results['NM_000083.2:c.2680C>T']['gene_symbol'] == 'CLCN1' - assert results['NM_000083.2:c.2680C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.(Arg894Ter)', 'slr': 'NP_000074.2:p.(R894*)'} assert results['NM_000083.2:c.2680C>T']['submitted_variant'] == '7-143048771-C-T' - assert results['NM_000083.2:c.2680C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000083.2:c.2680C>T']['hgvs_lrg_variant'] == '' + assert results['NM_000083.2:c.2680C>T']['gene_symbol'] == 'CLCN1' + assert results['NM_000083.2:c.2680C>T']['gene_ids'] == {'hgnc_id': 'HGNC:2019', 'entrez_gene_id': '1180', 'ucsc_id': 'uc003wcr.2', 'omim_id': ['118425']} assert results['NM_000083.2:c.2680C>T']['hgvs_transcript_variant'] == 'NM_000083.2:c.2680C>T' + assert results['NM_000083.2:c.2680C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000083.2:c.2680C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000083.2:c.2680C>T']['hgvs_refseqgene_variant'] == 'NG_009815.1:g.40553C>T' - assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '143048771', 'alt': 'T'}} - assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': 'chr7', 'ref': 'C', 'pos': '143351678', 'alt': 'T'}} - assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '143048771', 'alt': 'T'}} - assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': '7', 'ref': 'C', 'pos': '143351678', 'alt': 'T'}} - assert results['NM_000083.2:c.2680C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009815.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2'} + assert results['NM_000083.2:c.2680C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000074.2:p.(Arg894Ter)', 'slr': 'NP_000074.2:p.(R894*)'} + assert results['NM_000083.2:c.2680C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000083.2:c.2680C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000083.2:c.2680C>T']['alt_genomic_loci'], []) + assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': 'chr7', 'pos': '143048771', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': 'chr7', 'pos': '143351678', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': '7', 'pos': '143048771', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000083.2:c.2680C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': '7', 'pos': '143351678', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000083.2:c.2680C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000083.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000074.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009815.1'} + assert 'NR_046453.1:n.2620C>T' in list(results.keys()) + assert results['NR_046453.1:n.2620C>T']['submitted_variant'] == '7-143048771-C-T' + assert results['NR_046453.1:n.2620C>T']['gene_symbol'] == 'CLCN1' + assert results['NR_046453.1:n.2620C>T']['gene_ids'] == {'hgnc_id': 'HGNC:2019', 'entrez_gene_id': '1180', 'ucsc_id': 'uc003wcr.2', 'omim_id': ['118425']} + assert results['NR_046453.1:n.2620C>T']['hgvs_transcript_variant'] == 'NR_046453.1:n.2620C>T' + assert results['NR_046453.1:n.2620C>T']['genome_context_intronic_sequence'] == '' + assert results['NR_046453.1:n.2620C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NR_046453.1:n.2620C>T']['hgvs_refseqgene_variant'] == '' + assert results['NR_046453.1:n.2620C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_046453.1:n.2620C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_046453.1:n.2620C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_046453.1:n.2620C>T']['alt_genomic_loci'], []) + assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': 'chr7', 'pos': '143048771', 'ref': 'C', 'alt': 'T'}} + assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': 'chr7', 'pos': '143351678', 'ref': 'C', 'alt': 'T'}} + assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.143048771C>T', 'vcf': {'chr': '7', 'pos': '143048771', 'ref': 'C', 'alt': 'T'}} + assert results['NR_046453.1:n.2620C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.143351678C>T', 'vcf': {'chr': '7', 'pos': '143351678', 'ref': 'C', 'alt': 'T'}} + assert results['NR_046453.1:n.2620C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_046453.1'} def test_variant308(self): variant = '8-1871951-C-T' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_014629.3:c.2399C>T' in list(results.keys()) - assert results['NM_014629.3:c.2399C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_014629.3:c.2399C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014629.3:c.2399C>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}]) - assert results['NM_014629.3:c.2399C>T']['gene_symbol'] == 'ARHGEF10' - assert results['NM_014629.3:c.2399C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055444.2(LRG_234p1):p.(Pro800Leu)', 'slr': 'NP_055444.2:p.(P800L)'} - assert results['NM_014629.3:c.2399C>T']['submitted_variant'] == '8-1871951-C-T' - assert results['NM_014629.3:c.2399C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_014629.3:c.2399C>T']['hgvs_lrg_variant'] == '' - assert results['NM_014629.3:c.2399C>T']['hgvs_transcript_variant'] == 'NM_014629.3:c.2399C>T' - assert results['NM_014629.3:c.2399C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} - assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} - assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} - assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} - assert results['NM_014629.3:c.2399C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.3'} - + assert results['flag'] == 'gene_variant' assert 'NM_014629.2:c.2399C>T' in list(results.keys()) - assert results['NM_014629.2:c.2399C>T']['hgvs_lrg_transcript_variant'] == 'LRG_234t1:c.2399C>T' - assert results['NM_014629.2:c.2399C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_014629.2:c.2399C>T']['alt_genomic_loci'], []) - assert results['NM_014629.2:c.2399C>T']['gene_symbol'] == 'ARHGEF10' - assert results['NM_014629.2:c.2399C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055444.2(LRG_234p1):p.(Pro800Leu)', 'slr': 'NP_055444.2:p.(P800L)'} assert results['NM_014629.2:c.2399C>T']['submitted_variant'] == '8-1871951-C-T' - assert results['NM_014629.2:c.2399C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_014629.2:c.2399C>T']['hgvs_lrg_variant'] == 'LRG_234:g.104803C>T' + assert results['NM_014629.2:c.2399C>T']['gene_symbol'] == 'ARHGEF10' + assert results['NM_014629.2:c.2399C>T']['gene_ids'] == {'hgnc_id': 'HGNC:14103', 'entrez_gene_id': '9639', 'ucsc_id': 'uc003wpr.4', 'omim_id': ['608136']} assert results['NM_014629.2:c.2399C>T']['hgvs_transcript_variant'] == 'NM_014629.2:c.2399C>T' + assert results['NM_014629.2:c.2399C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_014629.2:c.2399C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_014629.2:c.2399C>T']['hgvs_refseqgene_variant'] == 'NG_008480.1:g.104803C>T' - assert results['NM_014629.2:c.2399C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} + assert results['NM_014629.2:c.2399C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055444.2(LRG_234p1):p.(Pro800Leu)', 'slr': 'NP_055444.2:p.(P800L)'} + assert results['NM_014629.2:c.2399C>T']['hgvs_lrg_transcript_variant'] == 'LRG_234t1:c.2399C>T' + assert results['NM_014629.2:c.2399C>T']['hgvs_lrg_variant'] == 'LRG_234:g.104803C>T' + self.assertCountEqual(results['NM_014629.2:c.2399C>T']['alt_genomic_loci'], []) + assert results['NM_014629.2:c.2399C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'pos': '1871951', 'ref': 'C', 'alt': 'T'}} assert 'hg38' not in list(results['NM_014629.2:c.2399C>T']['primary_assembly_loci'].keys()) - assert results['NM_014629.2:c.2399C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} + assert results['NM_014629.2:c.2399C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'pos': '1871951', 'ref': 'C', 'alt': 'T'}} assert 'grch38' not in list(results['NM_014629.2:c.2399C>T']['primary_assembly_loci'].keys()) - assert results['NM_014629.2:c.2399C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008480.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_234.xml'} + assert results['NM_014629.2:c.2399C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008480.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_234.xml'} - assert 'NM_001308153.1:c.2471C>T' in list(results.keys()) - assert results['NM_001308153.1:c.2471C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001308153.1:c.2471C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001308153.1:c.2471C>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}]) - assert results['NM_001308153.1:c.2471C>T']['gene_symbol'] == 'ARHGEF10' - assert results['NM_001308153.1:c.2471C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295082.1:p.(Pro824Leu)', 'slr': 'NP_001295082.1:p.(P824L)'} - assert results['NM_001308153.1:c.2471C>T']['submitted_variant'] == '8-1871951-C-T' - assert results['NM_001308153.1:c.2471C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001308153.1:c.2471C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001308153.1:c.2471C>T']['hgvs_transcript_variant'] == 'NM_001308153.1:c.2471C>T' - assert results['NM_001308153.1:c.2471C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} - assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} - assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} - assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} - assert results['NM_001308153.1:c.2471C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295082.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308153.1'} + assert 'NM_014629.3:c.2399C>T' in list(results.keys()) + assert results['NM_014629.3:c.2399C>T']['submitted_variant'] == '8-1871951-C-T' + assert results['NM_014629.3:c.2399C>T']['gene_symbol'] == 'ARHGEF10' + assert results['NM_014629.3:c.2399C>T']['gene_ids'] == {'hgnc_id': 'HGNC:14103', 'entrez_gene_id': '9639', 'ucsc_id': 'uc003wpr.4', 'omim_id': ['608136']} + assert results['NM_014629.3:c.2399C>T']['hgvs_transcript_variant'] == 'NM_014629.3:c.2399C>T' + assert results['NM_014629.3:c.2399C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_014629.3:c.2399C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_014629.3:c.2399C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_014629.3:c.2399C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_055444.2(LRG_234p1):p.(Pro800Leu)', 'slr': 'NP_055444.2:p.(P800L)'} + assert results['NM_014629.3:c.2399C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_014629.3:c.2399C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_014629.3:c.2399C>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'pos': '107161', 'ref': 'C', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'pos': '107161', 'ref': 'C', 'alt': 'T'}}}]) + assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'pos': '1871951', 'ref': 'C', 'alt': 'T'}} + assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': 'chr8', 'pos': '1923785', 'ref': 'C', 'alt': 'T'}} + assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'pos': '1871951', 'ref': 'C', 'alt': 'T'}} + assert results['NM_014629.3:c.2399C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': '8', 'pos': '1923785', 'ref': 'C', 'alt': 'T'}} + assert results['NM_014629.3:c.2399C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_014629.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055444.2'} - assert results['flag'] == 'gene_variant' assert 'NM_001308152.1:c.2285C>T' in list(results.keys()) - assert results['NM_001308152.1:c.2285C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001308152.1:c.2285C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001308152.1:c.2285C>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'ref': 'C', 'pos': '107161', 'alt': 'T'}}}]) - assert results['NM_001308152.1:c.2285C>T']['gene_symbol'] == 'ARHGEF10' - assert results['NM_001308152.1:c.2285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295081.1:p.(Pro762Leu)', 'slr': 'NP_001295081.1:p.(P762L)'} assert results['NM_001308152.1:c.2285C>T']['submitted_variant'] == '8-1871951-C-T' - assert results['NM_001308152.1:c.2285C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001308152.1:c.2285C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001308152.1:c.2285C>T']['gene_symbol'] == 'ARHGEF10' + assert results['NM_001308152.1:c.2285C>T']['gene_ids'] == {'hgnc_id': 'HGNC:14103', 'entrez_gene_id': '9639', 'ucsc_id': 'uc003wpr.4', 'omim_id': ['608136']} assert results['NM_001308152.1:c.2285C>T']['hgvs_transcript_variant'] == 'NM_001308152.1:c.2285C>T' + assert results['NM_001308152.1:c.2285C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001308152.1:c.2285C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001308152.1:c.2285C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} - assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': 'chr8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} - assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1871951', 'alt': 'T'}} - assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': '8', 'ref': 'C', 'pos': '1923785', 'alt': 'T'}} - assert results['NM_001308152.1:c.2285C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295081.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308152.1'} + assert results['NM_001308152.1:c.2285C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295081.1:p.(Pro762Leu)', 'slr': 'NP_001295081.1:p.(P762L)'} + assert results['NM_001308152.1:c.2285C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001308152.1:c.2285C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001308152.1:c.2285C>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'pos': '107161', 'ref': 'C', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'pos': '107161', 'ref': 'C', 'alt': 'T'}}}]) + assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'pos': '1871951', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': 'chr8', 'pos': '1923785', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'pos': '1871951', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001308152.1:c.2285C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': '8', 'pos': '1923785', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001308152.1:c.2285C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308152.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295081.1'} + assert 'NM_001308153.1:c.2471C>T' in list(results.keys()) + assert results['NM_001308153.1:c.2471C>T']['submitted_variant'] == '8-1871951-C-T' + assert results['NM_001308153.1:c.2471C>T']['gene_symbol'] == 'ARHGEF10' + assert results['NM_001308153.1:c.2471C>T']['gene_ids'] == {'hgnc_id': 'HGNC:14103', 'entrez_gene_id': '9639', 'ucsc_id': 'uc003wpr.4', 'omim_id': ['608136']} + assert results['NM_001308153.1:c.2471C>T']['hgvs_transcript_variant'] == 'NM_001308153.1:c.2471C>T' + assert results['NM_001308153.1:c.2471C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001308153.1:c.2471C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001308153.1:c.2471C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001308153.1:c.2471C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001295082.1:p.(Pro824Leu)', 'slr': 'NP_001295082.1:p.(P824L)'} + assert results['NM_001308153.1:c.2471C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001308153.1:c.2471C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001308153.1:c.2471C>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'HSCHR8_8_CTG1', 'pos': '107161', 'ref': 'C', 'alt': 'T'}}}, {'hg38': {'hgvs_genomic_description': 'NT_187576.1:g.107161C>T', 'vcf': {'chr': 'chr8_KI270821v1_alt', 'pos': '107161', 'ref': 'C', 'alt': 'T'}}}]) + assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': 'chr8', 'pos': '1871951', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': 'chr8', 'pos': '1923785', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000008.10:g.1871951C>T', 'vcf': {'chr': '8', 'pos': '1871951', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001308153.1:c.2471C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000008.11:g.1923785C>T', 'vcf': {'chr': '8', 'pos': '1923785', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001308153.1:c.2471C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001308153.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001295082.1'} def test_variant309(self): variant = '9-13112056-T-TG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_001261407.1:c.5504dup' in list(results.keys()) - assert results['NM_001261407.1:c.5504dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001261407.1:c.5504dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001261407.1:c.5504dup']['alt_genomic_loci'], []) - assert results['NM_001261407.1:c.5504dup']['gene_symbol'] == 'MPDZ' - assert results['NM_001261407.1:c.5504dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001248336.1:p.(Thr1836AsnfsTer15)', 'slr': 'NP_001248336.1:p.(T1836Nfs*15)'} - assert results['NM_001261407.1:c.5504dup']['submitted_variant'] == '9-13112056-T-TG' - assert results['NM_001261407.1:c.5504dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001261407.1:c.5504dup']['hgvs_lrg_variant'] == '' - assert results['NM_001261407.1:c.5504dup']['hgvs_transcript_variant'] == 'NM_001261407.1:c.5504dup' - assert results['NM_001261407.1:c.5504dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_001261407.1:c.5504dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248336.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261407.1'} - - assert 'NM_001330637.1:c.5690dup' in list(results.keys()) - assert results['NM_001330637.1:c.5690dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001330637.1:c.5690dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001330637.1:c.5690dup']['alt_genomic_loci'], []) - assert results['NM_001330637.1:c.5690dup']['gene_symbol'] == 'MPDZ' - assert results['NM_001330637.1:c.5690dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317566.1:p.(Thr1898AsnfsTer15)', 'slr': 'NP_001317566.1:p.(T1898Nfs*15)'} - assert results['NM_001330637.1:c.5690dup']['submitted_variant'] == '9-13112056-T-TG' - assert results['NM_001330637.1:c.5690dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001330637.1:c.5690dup']['hgvs_lrg_variant'] == '' - assert results['NM_001330637.1:c.5690dup']['hgvs_transcript_variant'] == 'NM_001330637.1:c.5690dup' - assert results['NM_001330637.1:c.5690dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_001330637.1:c.5690dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317566.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330637.1'} - + assert results['flag'] == 'gene_variant' assert 'NM_001261406.1:c.5591dup' in list(results.keys()) - assert results['NM_001261406.1:c.5591dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001261406.1:c.5591dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001261406.1:c.5591dup']['alt_genomic_loci'], []) - assert results['NM_001261406.1:c.5591dup']['gene_symbol'] == 'MPDZ' - assert results['NM_001261406.1:c.5591dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001248335.1:p.(Thr1865AsnfsTer15)', 'slr': 'NP_001248335.1:p.(T1865Nfs*15)'} assert results['NM_001261406.1:c.5591dup']['submitted_variant'] == '9-13112056-T-TG' - assert results['NM_001261406.1:c.5591dup']['genome_context_intronic_sequence'] == '' - assert results['NM_001261406.1:c.5591dup']['hgvs_lrg_variant'] == '' + assert results['NM_001261406.1:c.5591dup']['gene_symbol'] == 'MPDZ' + assert results['NM_001261406.1:c.5591dup']['gene_ids'] == {'hgnc_id': 'HGNC:7208', 'entrez_gene_id': '8777', 'ucsc_id': 'uc003zlb.5', 'omim_id': ['603785']} assert results['NM_001261406.1:c.5591dup']['hgvs_transcript_variant'] == 'NM_001261406.1:c.5591dup' + assert results['NM_001261406.1:c.5591dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001261406.1:c.5591dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001261406.1:c.5591dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_001261406.1:c.5591dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248335.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261406.1'} + assert results['NM_001261406.1:c.5591dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001248335.1:p.(Thr1865AsnfsTer15)', 'slr': 'NP_001248335.1:p.(T1865Nfs*15)'} + assert results['NM_001261406.1:c.5591dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001261406.1:c.5591dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001261406.1:c.5591dup']['alt_genomic_loci'], []) + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'pos': '13112056', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'pos': '13112057', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'pos': '13112056', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'pos': '13112057', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001261406.1:c.5591dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261406.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248335.1'} + + assert 'NM_001261407.1:c.5504dup' in list(results.keys()) + assert results['NM_001261407.1:c.5504dup']['submitted_variant'] == '9-13112056-T-TG' + assert results['NM_001261407.1:c.5504dup']['gene_symbol'] == 'MPDZ' + assert results['NM_001261407.1:c.5504dup']['gene_ids'] == {'hgnc_id': 'HGNC:7208', 'entrez_gene_id': '8777', 'ucsc_id': 'uc003zlb.5', 'omim_id': ['603785']} + assert results['NM_001261407.1:c.5504dup']['hgvs_transcript_variant'] == 'NM_001261407.1:c.5504dup' + assert results['NM_001261407.1:c.5504dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001261407.1:c.5504dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001261407.1:c.5504dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001261407.1:c.5504dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001248336.1:p.(Thr1836AsnfsTer15)', 'slr': 'NP_001248336.1:p.(T1836Nfs*15)'} + assert results['NM_001261407.1:c.5504dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001261407.1:c.5504dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001261407.1:c.5504dup']['alt_genomic_loci'], []) + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'pos': '13112056', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'pos': '13112057', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'pos': '13112056', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'pos': '13112057', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001261407.1:c.5504dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001261407.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001248336.1'} - assert results['flag'] == 'gene_variant' assert 'NM_003829.4:c.5603dup' in list(results.keys()) - assert results['NM_003829.4:c.5603dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_003829.4:c.5603dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_003829.4:c.5603dup']['alt_genomic_loci'], []) - assert results['NM_003829.4:c.5603dup']['gene_symbol'] == 'MPDZ' - assert results['NM_003829.4:c.5603dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003820.2:p.(Thr1869AsnfsTer15)', 'slr': 'NP_003820.2:p.(T1869Nfs*15)'} assert results['NM_003829.4:c.5603dup']['submitted_variant'] == '9-13112056-T-TG' - assert results['NM_003829.4:c.5603dup']['genome_context_intronic_sequence'] == '' - assert results['NM_003829.4:c.5603dup']['hgvs_lrg_variant'] == '' + assert results['NM_003829.4:c.5603dup']['gene_symbol'] == 'MPDZ' + assert results['NM_003829.4:c.5603dup']['gene_ids'] == {'hgnc_id': 'HGNC:7208', 'entrez_gene_id': '8777', 'ucsc_id': 'uc003zlb.5', 'omim_id': ['603785']} assert results['NM_003829.4:c.5603dup']['hgvs_transcript_variant'] == 'NM_003829.4:c.5603dup' + assert results['NM_003829.4:c.5603dup']['genome_context_intronic_sequence'] == '' + assert results['NM_003829.4:c.5603dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_003829.4:c.5603dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112056', 'alt': 'TG'}} - assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '13112057', 'alt': 'TG'}} - assert results['NM_003829.4:c.5603dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003820.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003829.4'} + assert results['NM_003829.4:c.5603dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003820.2:p.(Thr1869AsnfsTer15)', 'slr': 'NP_003820.2:p.(T1869Nfs*15)'} + assert results['NM_003829.4:c.5603dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_003829.4:c.5603dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_003829.4:c.5603dup']['alt_genomic_loci'], []) + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'pos': '13112056', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'pos': '13112057', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'pos': '13112056', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'pos': '13112057', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_003829.4:c.5603dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003829.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003820.2'} + assert 'NM_001330637.1:c.5690dup' in list(results.keys()) + assert results['NM_001330637.1:c.5690dup']['submitted_variant'] == '9-13112056-T-TG' + assert results['NM_001330637.1:c.5690dup']['gene_symbol'] == 'MPDZ' + assert results['NM_001330637.1:c.5690dup']['gene_ids'] == {'hgnc_id': 'HGNC:7208', 'entrez_gene_id': '8777', 'ucsc_id': 'uc003zlb.5', 'omim_id': ['603785']} + assert results['NM_001330637.1:c.5690dup']['hgvs_transcript_variant'] == 'NM_001330637.1:c.5690dup' + assert results['NM_001330637.1:c.5690dup']['genome_context_intronic_sequence'] == '' + assert results['NM_001330637.1:c.5690dup']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001330637.1:c.5690dup']['hgvs_refseqgene_variant'] == '' + assert results['NM_001330637.1:c.5690dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001317566.1:p.(Thr1898AsnfsTer15)', 'slr': 'NP_001317566.1:p.(T1898Nfs*15)'} + assert results['NM_001330637.1:c.5690dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001330637.1:c.5690dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001330637.1:c.5690dup']['alt_genomic_loci'], []) + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': 'chr9', 'pos': '13112056', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': 'chr9', 'pos': '13112057', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.13112059dup', 'vcf': {'chr': '9', 'pos': '13112056', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.13112060dup', 'vcf': {'chr': '9', 'pos': '13112057', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001330637.1:c.5690dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001330637.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001317566.1'} def test_variant310(self): variant = '9-21971208-C-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_058197.4:c.*74-1G>T' in list(results.keys()) - assert results['NM_058197.4:c.*74-1G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_058197.4:c.*74-1G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_058197.4:c.*74-1G>T']['alt_genomic_loci'], []) - assert results['NM_058197.4:c.*74-1G>T']['gene_symbol'] == 'CDKN2A' - assert results['NM_058197.4:c.*74-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_478104.2:p.?', 'slr': 'NP_478104.2:p.?'} - assert results['NM_058197.4:c.*74-1G>T']['submitted_variant'] == '9-21971208-C-A' - assert results['NM_058197.4:c.*74-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_058197.4):c.*74-1G>T' - assert results['NM_058197.4:c.*74-1G>T']['hgvs_lrg_variant'] == '' - assert results['NM_058197.4:c.*74-1G>T']['hgvs_transcript_variant'] == 'NM_058197.4:c.*74-1G>T' - assert results['NM_058197.4:c.*74-1G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} - assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} - assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} - assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} - assert results['NM_058197.4:c.*74-1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_478104.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_058197.4'} - - assert 'NM_000077.4:c.151-1G>T' in list(results.keys()) - assert results['NM_000077.4:c.151-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_11t1:c.151-1G>T' - assert results['NM_000077.4:c.151-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007485.1(NM_000077.4):c.151-1G>T' - self.assertCountEqual(results['NM_000077.4:c.151-1G>T']['alt_genomic_loci'], []) - assert results['NM_000077.4:c.151-1G>T']['gene_symbol'] == 'CDKN2A' - assert results['NM_000077.4:c.151-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000068.1(LRG_11p1):p.?', 'slr': 'NP_000068.1:p.?'} - assert results['NM_000077.4:c.151-1G>T']['submitted_variant'] == '9-21971208-C-A' - assert results['NM_000077.4:c.151-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000077.4):c.151-1G>T' - assert results['NM_000077.4:c.151-1G>T']['hgvs_lrg_variant'] == 'LRG_11:g.28283G>T' - assert results['NM_000077.4:c.151-1G>T']['hgvs_transcript_variant'] == 'NM_000077.4:c.151-1G>T' - assert results['NM_000077.4:c.151-1G>T']['hgvs_refseqgene_variant'] == 'NG_007485.1:g.28283G>T' - assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} - assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} - assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} - assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} - assert results['NM_000077.4:c.151-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007485.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000068.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000077.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_11.xml'} - + assert results['flag'] == 'gene_variant' assert 'NM_001363763.1:c.-3-1G>T' in list(results.keys()) - assert results['NM_001363763.1:c.-3-1G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001363763.1:c.-3-1G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001363763.1:c.-3-1G>T']['alt_genomic_loci'], []) - assert results['NM_001363763.1:c.-3-1G>T']['gene_symbol'] == 'CDKN2A' - assert results['NM_001363763.1:c.-3-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350692.1:p.?', 'slr': 'NP_001350692.1:p.?'} assert results['NM_001363763.1:c.-3-1G>T']['submitted_variant'] == '9-21971208-C-A' + assert results['NM_001363763.1:c.-3-1G>T']['gene_symbol'] == 'CDKN2A' + assert results['NM_001363763.1:c.-3-1G>T']['gene_ids'] == {'hgnc_id': 'HGNC:1787', 'entrez_gene_id': '1029', 'ucsc_id': 'uc003zpk.4', 'omim_id': ['600160']} + assert results['NM_001363763.1:c.-3-1G>T']['hgvs_transcript_variant'] == 'NM_001363763.1:c.-3-1G>T' assert results['NM_001363763.1:c.-3-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001363763.1):c.-3-1G>T' + assert results['NM_001363763.1:c.-3-1G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001363763.1:c.-3-1G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001363763.1:c.-3-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001350692.1:p.?', 'slr': 'NP_001350692.1:p.?'} + assert results['NM_001363763.1:c.-3-1G>T']['hgvs_lrg_transcript_variant'] == '' assert results['NM_001363763.1:c.-3-1G>T']['hgvs_lrg_variant'] == '' - assert results['NM_001363763.1:c.-3-1G>T']['hgvs_transcript_variant'] == 'NM_001363763.1:c.-3-1G>T' - assert results['NM_001363763.1:c.-3-1G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + self.assertCountEqual(results['NM_001363763.1:c.-3-1G>T']['alt_genomic_loci'], []) + assert results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'pos': '21971208', 'ref': 'C', 'alt': 'A'}} assert 'hg38' not in list(results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci'].keys()) - assert results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} + assert results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'pos': '21971208', 'ref': 'C', 'alt': 'A'}} assert 'grch38' not in list(results['NM_001363763.1:c.-3-1G>T']['primary_assembly_loci'].keys()) - assert results['NM_001363763.1:c.-3-1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350692.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363763.1'} - - assert 'NM_001195132.1:c.151-1G>T' in list(results.keys()) - assert results['NM_001195132.1:c.151-1G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001195132.1:c.151-1G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001195132.1:c.151-1G>T']['alt_genomic_loci'], []) - assert results['NM_001195132.1:c.151-1G>T']['gene_symbol'] == 'CDKN2A' - assert results['NM_001195132.1:c.151-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001182061.1:p.?', 'slr': 'NP_001182061.1:p.?'} - assert results['NM_001195132.1:c.151-1G>T']['submitted_variant'] == '9-21971208-C-A' - assert results['NM_001195132.1:c.151-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001195132.1):c.151-1G>T' - assert results['NM_001195132.1:c.151-1G>T']['hgvs_lrg_variant'] == '' - assert results['NM_001195132.1:c.151-1G>T']['hgvs_transcript_variant'] == 'NM_001195132.1:c.151-1G>T' - assert results['NM_001195132.1:c.151-1G>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} - assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} - assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} - assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} - assert results['NM_001195132.1:c.151-1G>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001182061.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001195132.1'} + assert results['NM_001363763.1:c.-3-1G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001363763.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001350692.1'} assert 'NM_058195.3:c.194-1G>T' in list(results.keys()) - assert results['NM_058195.3:c.194-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_11t2:c.194-1G>T' - assert results['NM_058195.3:c.194-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007485.1(NM_058195.3):c.194-1G>T' - self.assertCountEqual(results['NM_058195.3:c.194-1G>T']['alt_genomic_loci'], []) - assert results['NM_058195.3:c.194-1G>T']['gene_symbol'] == 'CDKN2A' - assert results['NM_058195.3:c.194-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_478102.2(LRG_11p2):p.?', 'slr': 'NP_478102.2:p.?'} assert results['NM_058195.3:c.194-1G>T']['submitted_variant'] == '9-21971208-C-A' - assert results['NM_058195.3:c.194-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_058195.3):c.194-1G>T' - assert results['NM_058195.3:c.194-1G>T']['hgvs_lrg_variant'] == 'LRG_11:g.28283G>T' + assert results['NM_058195.3:c.194-1G>T']['gene_symbol'] == 'CDKN2A' + assert results['NM_058195.3:c.194-1G>T']['gene_ids'] == {'hgnc_id': 'HGNC:1787', 'entrez_gene_id': '1029', 'ucsc_id': 'uc003zpk.4', 'omim_id': ['600160']} assert results['NM_058195.3:c.194-1G>T']['hgvs_transcript_variant'] == 'NM_058195.3:c.194-1G>T' + assert results['NM_058195.3:c.194-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_058195.3):c.194-1G>T' + assert results['NM_058195.3:c.194-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007485.1(NM_058195.3):c.194-1G>T' assert results['NM_058195.3:c.194-1G>T']['hgvs_refseqgene_variant'] == 'NG_007485.1:g.28283G>T' - assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} - assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} - assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971208', 'alt': 'A'}} - assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'ref': 'C', 'pos': '21971209', 'alt': 'A'}} - assert results['NM_058195.3:c.194-1G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007485.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_478102.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_058195.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_11.xml'} + assert results['NM_058195.3:c.194-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_478102.2(LRG_11p2):p.?', 'slr': 'NP_478102.2:p.?'} + assert results['NM_058195.3:c.194-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_11t2:c.194-1G>T' + assert results['NM_058195.3:c.194-1G>T']['hgvs_lrg_variant'] == 'LRG_11:g.28283G>T' + self.assertCountEqual(results['NM_058195.3:c.194-1G>T']['alt_genomic_loci'], []) + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'pos': '21971208', 'ref': 'C', 'alt': 'A'}} + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'pos': '21971209', 'ref': 'C', 'alt': 'A'}} + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'pos': '21971208', 'ref': 'C', 'alt': 'A'}} + assert results['NM_058195.3:c.194-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'pos': '21971209', 'ref': 'C', 'alt': 'A'}} + assert results['NM_058195.3:c.194-1G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_058195.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_478102.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007485.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_11.xml'} - assert results['flag'] == 'gene_variant' + assert 'NM_000077.4:c.151-1G>T' in list(results.keys()) + assert results['NM_000077.4:c.151-1G>T']['submitted_variant'] == '9-21971208-C-A' + assert results['NM_000077.4:c.151-1G>T']['gene_symbol'] == 'CDKN2A' + assert results['NM_000077.4:c.151-1G>T']['gene_ids'] == {'hgnc_id': 'HGNC:1787', 'entrez_gene_id': '1029', 'ucsc_id': 'uc003zpk.4', 'omim_id': ['600160']} + assert results['NM_000077.4:c.151-1G>T']['hgvs_transcript_variant'] == 'NM_000077.4:c.151-1G>T' + assert results['NM_000077.4:c.151-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_000077.4):c.151-1G>T' + assert results['NM_000077.4:c.151-1G>T']['refseqgene_context_intronic_sequence'] == 'NG_007485.1(NM_000077.4):c.151-1G>T' + assert results['NM_000077.4:c.151-1G>T']['hgvs_refseqgene_variant'] == 'NG_007485.1:g.28283G>T' + assert results['NM_000077.4:c.151-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000068.1(LRG_11p1):p.?', 'slr': 'NP_000068.1:p.?'} + assert results['NM_000077.4:c.151-1G>T']['hgvs_lrg_transcript_variant'] == 'LRG_11t1:c.151-1G>T' + assert results['NM_000077.4:c.151-1G>T']['hgvs_lrg_variant'] == 'LRG_11:g.28283G>T' + self.assertCountEqual(results['NM_000077.4:c.151-1G>T']['alt_genomic_loci'], []) + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'pos': '21971208', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'pos': '21971209', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'pos': '21971208', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000077.4:c.151-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'pos': '21971209', 'ref': 'C', 'alt': 'A'}} + assert results['NM_000077.4:c.151-1G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000077.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000068.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007485.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_11.xml'} + + assert 'NM_058197.4:c.*74-1G>T' in list(results.keys()) + assert results['NM_058197.4:c.*74-1G>T']['submitted_variant'] == '9-21971208-C-A' + assert results['NM_058197.4:c.*74-1G>T']['gene_symbol'] == 'CDKN2A' + assert results['NM_058197.4:c.*74-1G>T']['gene_ids'] == {'hgnc_id': 'HGNC:1787', 'entrez_gene_id': '1029', 'ucsc_id': 'uc003zpk.4', 'omim_id': ['600160']} + assert results['NM_058197.4:c.*74-1G>T']['hgvs_transcript_variant'] == 'NM_058197.4:c.*74-1G>T' + assert results['NM_058197.4:c.*74-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_058197.4):c.*74-1G>T' + assert results['NM_058197.4:c.*74-1G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_058197.4:c.*74-1G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_058197.4:c.*74-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_478104.2:p.?', 'slr': 'NP_478104.2:p.?'} + assert results['NM_058197.4:c.*74-1G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_058197.4:c.*74-1G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_058197.4:c.*74-1G>T']['alt_genomic_loci'], []) + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'pos': '21971208', 'ref': 'C', 'alt': 'A'}} + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'pos': '21971209', 'ref': 'C', 'alt': 'A'}} + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'pos': '21971208', 'ref': 'C', 'alt': 'A'}} + assert results['NM_058197.4:c.*74-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'pos': '21971209', 'ref': 'C', 'alt': 'A'}} + assert results['NM_058197.4:c.*74-1G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_058197.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_478104.2'} + + assert 'NM_001195132.1:c.151-1G>T' in list(results.keys()) + assert results['NM_001195132.1:c.151-1G>T']['submitted_variant'] == '9-21971208-C-A' + assert results['NM_001195132.1:c.151-1G>T']['gene_symbol'] == 'CDKN2A' + assert results['NM_001195132.1:c.151-1G>T']['gene_ids'] == {'hgnc_id': 'HGNC:1787', 'entrez_gene_id': '1029', 'ucsc_id': 'uc003zpk.4', 'omim_id': ['600160']} + assert results['NM_001195132.1:c.151-1G>T']['hgvs_transcript_variant'] == 'NM_001195132.1:c.151-1G>T' + assert results['NM_001195132.1:c.151-1G>T']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001195132.1):c.151-1G>T' + assert results['NM_001195132.1:c.151-1G>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001195132.1:c.151-1G>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001195132.1:c.151-1G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001182061.1:p.?', 'slr': 'NP_001182061.1:p.?'} + assert results['NM_001195132.1:c.151-1G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001195132.1:c.151-1G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001195132.1:c.151-1G>T']['alt_genomic_loci'], []) + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': 'chr9', 'pos': '21971208', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': 'chr9', 'pos': '21971209', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.21971208C>A', 'vcf': {'chr': '9', 'pos': '21971208', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001195132.1:c.151-1G>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.21971209C>A', 'vcf': {'chr': '9', 'pos': '21971209', 'ref': 'C', 'alt': 'A'}} + assert results['NM_001195132.1:c.151-1G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001195132.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001182061.1'} def test_variant311(self): variant = '9-35683240-T-TG' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_003289.3:c.773-3dup' in list(results.keys()) + assert results['NM_003289.3:c.773-3dup']['submitted_variant'] == '9-35683240-T-TG' + assert results['NM_003289.3:c.773-3dup']['gene_symbol'] == 'TPM2' + assert results['NM_003289.3:c.773-3dup']['gene_ids'] == {'hgnc_id': 'HGNC:12011', 'entrez_gene_id': '7169', 'ucsc_id': 'uc064syd.1', 'omim_id': ['190990']} + assert results['NM_003289.3:c.773-3dup']['hgvs_transcript_variant'] == 'NM_003289.3:c.773-3dup' + assert results['NM_003289.3:c.773-3dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_003289.3):c.773-3dup' + assert results['NM_003289.3:c.773-3dup']['refseqgene_context_intronic_sequence'] == 'NG_011620.1(NM_003289.3):c.773-3dup' + assert results['NM_003289.3:c.773-3dup']['hgvs_refseqgene_variant'] == 'NG_011620.1:g.11814dup' + assert results['NM_003289.3:c.773-3dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003280.2(LRG_680p2):p.?', 'slr': 'NP_003280.2:p.?'} + assert results['NM_003289.3:c.773-3dup']['hgvs_lrg_transcript_variant'] == 'LRG_680t2:c.773-3dup' + assert results['NM_003289.3:c.773-3dup']['hgvs_lrg_variant'] == 'LRG_680:g.11814dup' + self.assertCountEqual(results['NM_003289.3:c.773-3dup']['alt_genomic_loci'], []) + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'pos': '35683240', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'pos': '35683243', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'pos': '35683240', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'pos': '35683243', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_003289.3:c.773-3dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003289.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003280.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011620.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_680.xml'} + assert 'NM_001301227.1:c.773-3dup' in list(results.keys()) - assert results['NM_001301227.1:c.773-3dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001301227.1:c.773-3dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001301227.1:c.773-3dup']['alt_genomic_loci'], []) - assert results['NM_001301227.1:c.773-3dup']['gene_symbol'] == 'TPM2' - assert results['NM_001301227.1:c.773-3dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001288156.1:p.?', 'slr': 'NP_001288156.1:p.?'} assert results['NM_001301227.1:c.773-3dup']['submitted_variant'] == '9-35683240-T-TG' - assert results['NM_001301227.1:c.773-3dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001301227.1):c.773-3dup' - assert results['NM_001301227.1:c.773-3dup']['hgvs_lrg_variant'] == '' + assert results['NM_001301227.1:c.773-3dup']['gene_symbol'] == 'TPM2' + assert results['NM_001301227.1:c.773-3dup']['gene_ids'] == {'hgnc_id': 'HGNC:12011', 'entrez_gene_id': '7169', 'ucsc_id': 'uc064syd.1', 'omim_id': ['190990']} assert results['NM_001301227.1:c.773-3dup']['hgvs_transcript_variant'] == 'NM_001301227.1:c.773-3dup' + assert results['NM_001301227.1:c.773-3dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001301227.1):c.773-3dup' + assert results['NM_001301227.1:c.773-3dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001301227.1:c.773-3dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_001301227.1:c.773-3dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288156.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301227.1'} + assert results['NM_001301227.1:c.773-3dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001288156.1:p.?', 'slr': 'NP_001288156.1:p.?'} + assert results['NM_001301227.1:c.773-3dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001301227.1:c.773-3dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001301227.1:c.773-3dup']['alt_genomic_loci'], []) + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'pos': '35683240', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'pos': '35683243', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'pos': '35683240', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'pos': '35683243', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001301227.1:c.773-3dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301227.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288156.1'} assert 'NM_001301226.1:c.772+1002dup' in list(results.keys()) - assert results['NM_001301226.1:c.772+1002dup']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001301226.1:c.772+1002dup']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001301226.1:c.772+1002dup']['alt_genomic_loci'], []) - assert results['NM_001301226.1:c.772+1002dup']['gene_symbol'] == 'TPM2' - assert results['NM_001301226.1:c.772+1002dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001288155.1:p.?', 'slr': 'NP_001288155.1:p.?'} assert results['NM_001301226.1:c.772+1002dup']['submitted_variant'] == '9-35683240-T-TG' - assert results['NM_001301226.1:c.772+1002dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001301226.1):c.772+1002dup' - assert results['NM_001301226.1:c.772+1002dup']['hgvs_lrg_variant'] == '' + assert results['NM_001301226.1:c.772+1002dup']['gene_symbol'] == 'TPM2' + assert results['NM_001301226.1:c.772+1002dup']['gene_ids'] == {'hgnc_id': 'HGNC:12011', 'entrez_gene_id': '7169', 'ucsc_id': 'uc064syd.1', 'omim_id': ['190990']} assert results['NM_001301226.1:c.772+1002dup']['hgvs_transcript_variant'] == 'NM_001301226.1:c.772+1002dup' + assert results['NM_001301226.1:c.772+1002dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_001301226.1):c.772+1002dup' + assert results['NM_001301226.1:c.772+1002dup']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001301226.1:c.772+1002dup']['hgvs_refseqgene_variant'] == '' - assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_001301226.1:c.772+1002dup']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288155.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301226.1'} + assert results['NM_001301226.1:c.772+1002dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001288155.1:p.?', 'slr': 'NP_001288155.1:p.?'} + assert results['NM_001301226.1:c.772+1002dup']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001301226.1:c.772+1002dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001301226.1:c.772+1002dup']['alt_genomic_loci'], []) + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'pos': '35683240', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'pos': '35683243', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'pos': '35683240', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'pos': '35683243', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_001301226.1:c.772+1002dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001301226.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001288155.1'} assert 'NM_213674.1:c.772+1002dup' in list(results.keys()) - assert results['NM_213674.1:c.772+1002dup']['hgvs_lrg_transcript_variant'] == 'LRG_680t1:c.772+1002dup' - assert results['NM_213674.1:c.772+1002dup']['refseqgene_context_intronic_sequence'] == 'NG_011620.1(NM_213674.1):c.772+1002dup' - self.assertCountEqual(results['NM_213674.1:c.772+1002dup']['alt_genomic_loci'], []) - assert results['NM_213674.1:c.772+1002dup']['gene_symbol'] == 'TPM2' - assert results['NM_213674.1:c.772+1002dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_998839.1(LRG_680p1):p.?', 'slr': 'NP_998839.1:p.?'} assert results['NM_213674.1:c.772+1002dup']['submitted_variant'] == '9-35683240-T-TG' - assert results['NM_213674.1:c.772+1002dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_213674.1):c.772+1002dup' - assert results['NM_213674.1:c.772+1002dup']['hgvs_lrg_variant'] == 'LRG_680:g.11814dup' + assert results['NM_213674.1:c.772+1002dup']['gene_symbol'] == 'TPM2' + assert results['NM_213674.1:c.772+1002dup']['gene_ids'] == {'hgnc_id': 'HGNC:12011', 'entrez_gene_id': '7169', 'ucsc_id': 'uc064syd.1', 'omim_id': ['190990']} assert results['NM_213674.1:c.772+1002dup']['hgvs_transcript_variant'] == 'NM_213674.1:c.772+1002dup' + assert results['NM_213674.1:c.772+1002dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_213674.1):c.772+1002dup' + assert results['NM_213674.1:c.772+1002dup']['refseqgene_context_intronic_sequence'] == 'NG_011620.1(NM_213674.1):c.772+1002dup' assert results['NM_213674.1:c.772+1002dup']['hgvs_refseqgene_variant'] == 'NG_011620.1:g.11814dup' - assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_213674.1:c.772+1002dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011620.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_998839.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_213674.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_680.xml'} - - assert 'NM_003289.3:c.773-3dup' in list(results.keys()) - assert results['NM_003289.3:c.773-3dup']['hgvs_lrg_transcript_variant'] == 'LRG_680t2:c.773-3dup' - assert results['NM_003289.3:c.773-3dup']['refseqgene_context_intronic_sequence'] == 'NG_011620.1(NM_003289.3):c.773-3dup' - self.assertCountEqual(results['NM_003289.3:c.773-3dup']['alt_genomic_loci'], []) - assert results['NM_003289.3:c.773-3dup']['gene_symbol'] == 'TPM2' - assert results['NM_003289.3:c.773-3dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_003280.2(LRG_680p2):p.?', 'slr': 'NP_003280.2:p.?'} - assert results['NM_003289.3:c.773-3dup']['submitted_variant'] == '9-35683240-T-TG' - assert results['NM_003289.3:c.773-3dup']['genome_context_intronic_sequence'] == 'NC_000009.11(NM_003289.3):c.773-3dup' - assert results['NM_003289.3:c.773-3dup']['hgvs_lrg_variant'] == 'LRG_680:g.11814dup' - assert results['NM_003289.3:c.773-3dup']['hgvs_transcript_variant'] == 'NM_003289.3:c.773-3dup' - assert results['NM_003289.3:c.773-3dup']['hgvs_refseqgene_variant'] == 'NG_011620.1:g.11814dup' - assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683240', 'alt': 'TG'}} - assert results['NM_003289.3:c.773-3dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'ref': 'T', 'pos': '35683243', 'alt': 'TG'}} - assert results['NM_003289.3:c.773-3dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011620.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_003280.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_003289.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_680.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_213674.1:c.772+1002dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_998839.1(LRG_680p1):p.?', 'slr': 'NP_998839.1:p.?'} + assert results['NM_213674.1:c.772+1002dup']['hgvs_lrg_transcript_variant'] == 'LRG_680t1:c.772+1002dup' + assert results['NM_213674.1:c.772+1002dup']['hgvs_lrg_variant'] == 'LRG_680:g.11814dup' + self.assertCountEqual(results['NM_213674.1:c.772+1002dup']['alt_genomic_loci'], []) + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': 'chr9', 'pos': '35683240', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': 'chr9', 'pos': '35683243', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.35683248dup', 'vcf': {'chr': '9', 'pos': '35683240', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.35683251dup', 'vcf': {'chr': '9', 'pos': '35683243', 'ref': 'T', 'alt': 'TG'}} + assert results['NM_213674.1:c.772+1002dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_213674.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_998839.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011620.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_680.xml'} def test_variant312(self): variant = '9-135796754-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_000368.4:c.733C>T' in list(results.keys()) - assert results['NM_000368.4:c.733C>T']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.733C>T' - assert results['NM_000368.4:c.733C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000368.4:c.733C>T']['alt_genomic_loci'], []) - assert results['NM_000368.4:c.733C>T']['gene_symbol'] == 'TSC1' - assert results['NM_000368.4:c.733C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Arg245Ter)', 'slr': 'NP_000359.1:p.(R245*)'} - assert results['NM_000368.4:c.733C>T']['submitted_variant'] == '9-135796754-G-A' - assert results['NM_000368.4:c.733C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000368.4:c.733C>T']['hgvs_lrg_variant'] == 'LRG_486:g.28267C>T' - assert results['NM_000368.4:c.733C>T']['hgvs_transcript_variant'] == 'NM_000368.4:c.733C>T' - assert results['NM_000368.4:c.733C>T']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.28267C>T' - assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} - assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} - assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} - assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} - assert results['NM_000368.4:c.733C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} + assert results['flag'] == 'gene_variant' + assert 'NM_001162427.1:c.580C>T' in list(results.keys()) + assert results['NM_001162427.1:c.580C>T']['submitted_variant'] == '9-135796754-G-A' + assert results['NM_001162427.1:c.580C>T']['gene_symbol'] == 'TSC1' + assert results['NM_001162427.1:c.580C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} + assert results['NM_001162427.1:c.580C>T']['hgvs_transcript_variant'] == 'NM_001162427.1:c.580C>T' + assert results['NM_001162427.1:c.580C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001162427.1:c.580C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001162427.1:c.580C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_001162427.1:c.580C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.(Arg194Ter)', 'slr': 'NP_001155899.1:p.(R194*)'} + assert results['NM_001162427.1:c.580C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162427.1:c.580C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001162427.1:c.580C>T']['alt_genomic_loci'], []) + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'pos': '135796754', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'pos': '132921367', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'pos': '135796754', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'pos': '132921367', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001162427.1:c.580C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1'} assert 'NM_001162426.1:c.733C>T' in list(results.keys()) - assert results['NM_001162426.1:c.733C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001162426.1:c.733C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001162426.1:c.733C>T']['alt_genomic_loci'], []) - assert results['NM_001162426.1:c.733C>T']['gene_symbol'] == 'TSC1' - assert results['NM_001162426.1:c.733C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.(Arg245Ter)', 'slr': 'NP_001155898.1:p.(R245*)'} assert results['NM_001162426.1:c.733C>T']['submitted_variant'] == '9-135796754-G-A' - assert results['NM_001162426.1:c.733C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001162426.1:c.733C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001162426.1:c.733C>T']['gene_symbol'] == 'TSC1' + assert results['NM_001162426.1:c.733C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} assert results['NM_001162426.1:c.733C>T']['hgvs_transcript_variant'] == 'NM_001162426.1:c.733C>T' + assert results['NM_001162426.1:c.733C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001162426.1:c.733C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001162426.1:c.733C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} - assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} - assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} - assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} - assert results['NM_001162426.1:c.733C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1'} + assert results['NM_001162426.1:c.733C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155898.1:p.(Arg245Ter)', 'slr': 'NP_001155898.1:p.(R245*)'} + assert results['NM_001162426.1:c.733C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001162426.1:c.733C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001162426.1:c.733C>T']['alt_genomic_loci'], []) + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'pos': '135796754', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'pos': '132921367', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'pos': '135796754', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001162426.1:c.733C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'pos': '132921367', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001162426.1:c.733C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162426.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155898.1'} + + assert 'NM_000368.4:c.733C>T' in list(results.keys()) + assert results['NM_000368.4:c.733C>T']['submitted_variant'] == '9-135796754-G-A' + assert results['NM_000368.4:c.733C>T']['gene_symbol'] == 'TSC1' + assert results['NM_000368.4:c.733C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} + assert results['NM_000368.4:c.733C>T']['hgvs_transcript_variant'] == 'NM_000368.4:c.733C>T' + assert results['NM_000368.4:c.733C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000368.4:c.733C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000368.4:c.733C>T']['hgvs_refseqgene_variant'] == 'NG_012386.1:g.28267C>T' + assert results['NM_000368.4:c.733C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000359.1(LRG_486p1):p.(Arg245Ter)', 'slr': 'NP_000359.1:p.(R245*)'} + assert results['NM_000368.4:c.733C>T']['hgvs_lrg_transcript_variant'] == 'LRG_486t1:c.733C>T' + assert results['NM_000368.4:c.733C>T']['hgvs_lrg_variant'] == 'LRG_486:g.28267C>T' + self.assertCountEqual(results['NM_000368.4:c.733C>T']['alt_genomic_loci'], []) + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'pos': '135796754', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'pos': '132921367', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'pos': '135796754', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000368.4:c.733C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'pos': '132921367', 'ref': 'G', 'alt': 'A'}} + assert results['NM_000368.4:c.733C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000368.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000359.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_012386.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_486.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_001362177.1:c.370C>T' in list(results.keys()) - assert results['NM_001362177.1:c.370C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001362177.1:c.370C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001362177.1:c.370C>T']['alt_genomic_loci'], []) - assert results['NM_001362177.1:c.370C>T']['gene_symbol'] == 'TSC1' - assert results['NM_001362177.1:c.370C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.(Arg124Ter)', 'slr': 'NP_001349106.1:p.(R124*)'} assert results['NM_001362177.1:c.370C>T']['submitted_variant'] == '9-135796754-G-A' - assert results['NM_001362177.1:c.370C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001362177.1:c.370C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001362177.1:c.370C>T']['gene_symbol'] == 'TSC1' + assert results['NM_001362177.1:c.370C>T']['gene_ids'] == {'hgnc_id': 'HGNC:12362', 'entrez_gene_id': '7248', 'ucsc_id': 'uc004cca.3', 'omim_id': ['605284']} assert results['NM_001362177.1:c.370C>T']['hgvs_transcript_variant'] == 'NM_001362177.1:c.370C>T' + assert results['NM_001362177.1:c.370C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001362177.1:c.370C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001362177.1:c.370C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} - assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} - assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} - assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} - assert results['NM_001362177.1:c.370C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1'} - - assert 'NM_001162427.1:c.580C>T' in list(results.keys()) - assert results['NM_001162427.1:c.580C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001162427.1:c.580C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001162427.1:c.580C>T']['alt_genomic_loci'], []) - assert results['NM_001162427.1:c.580C>T']['gene_symbol'] == 'TSC1' - assert results['NM_001162427.1:c.580C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001155899.1:p.(Arg194Ter)', 'slr': 'NP_001155899.1:p.(R194*)'} - assert results['NM_001162427.1:c.580C>T']['submitted_variant'] == '9-135796754-G-A' - assert results['NM_001162427.1:c.580C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001162427.1:c.580C>T']['hgvs_lrg_variant'] == '' - assert results['NM_001162427.1:c.580C>T']['hgvs_transcript_variant'] == 'NM_001162427.1:c.580C>T' - assert results['NM_001162427.1:c.580C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} - assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} - assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '135796754', 'alt': 'A'}} - assert results['NM_001162427.1:c.580C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '132921367', 'alt': 'A'}} - assert results['NM_001162427.1:c.580C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001155899.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001162427.1'} - + assert results['NM_001362177.1:c.370C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001349106.1:p.(Arg124Ter)', 'slr': 'NP_001349106.1:p.(R124*)'} + assert results['NM_001362177.1:c.370C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001362177.1:c.370C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001362177.1:c.370C>T']['alt_genomic_loci'], []) + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': 'chr9', 'pos': '135796754', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': 'chr9', 'pos': '132921367', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.135796754G>A', 'vcf': {'chr': '9', 'pos': '135796754', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001362177.1:c.370C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.132921367G>A', 'vcf': {'chr': '9', 'pos': '132921367', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001362177.1:c.370C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001362177.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001349106.1'} def test_variant313(self): variant = 'HG536_PATCH-10391-AC-A' @@ -17343,97 +17922,100 @@ def test_variant313(self): assert results['flag'] == 'gene_variant' assert 'NM_005247.2:c.616del' in list(results.keys()) - assert results['NM_005247.2:c.616del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_005247.2:c.616del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_005247.2:c.616del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003571046.1:g.10396del', 'vcf': {'chr': 'HG536_PATCH', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571046.1:g.10396del', 'vcf': {'chr': 'NW_003571046.1', 'ref': 'AC', 'pos': '10391', 'alt': 'A'}}}]) - assert results['NM_005247.2:c.616del']['gene_symbol'] == 'FGF3' - assert results['NM_005247.2:c.616del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005238.1(LRG_1303p1):p.(Val206SerfsTer117)', 'slr': 'NP_005238.1:p.(V206Sfs*117)'} assert results['NM_005247.2:c.616del']['submitted_variant'] == 'HG536_PATCH-10391-AC-A' - assert results['NM_005247.2:c.616del']['genome_context_intronic_sequence'] == '' - assert results['NM_005247.2:c.616del']['hgvs_lrg_variant'] == 'LRG_1303:g.14016del' + assert results['NM_005247.2:c.616del']['gene_symbol'] == 'FGF3' + assert results['NM_005247.2:c.616del']['gene_ids'] == {'hgnc_id': 'HGNC:3681', 'entrez_gene_id': '2248', 'ucsc_id': 'uc001oph.4', 'omim_id': ['164950']} assert results['NM_005247.2:c.616del']['hgvs_transcript_variant'] == 'NM_005247.2:c.616del' + assert results['NM_005247.2:c.616del']['genome_context_intronic_sequence'] == '' + assert results['NM_005247.2:c.616del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_005247.2:c.616del']['hgvs_refseqgene_variant'] == 'NG_009016.1:g.14016del' - assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625181del', 'vcf': {'chr': 'chr11', 'ref': 'AC', 'pos': '69625176', 'alt': 'A'}} - assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810413del', 'vcf': {'chr': 'chr11', 'ref': 'AC', 'pos': '69810408', 'alt': 'A'}} - assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625181del', 'vcf': {'chr': '11', 'ref': 'AC', 'pos': '69625176', 'alt': 'A'}} - assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810413del', 'vcf': {'chr': '11', 'ref': 'AC', 'pos': '69810408', 'alt': 'A'}} - assert results['NM_005247.2:c.616del']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009016.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005238.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005247.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1303.xml'} - + assert results['NM_005247.2:c.616del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_005238.1(LRG_1303p1):p.(Val206SerfsTer117)', 'slr': 'NP_005238.1:p.(V206Sfs*117)'} + assert results['NM_005247.2:c.616del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_005247.2:c.616del']['hgvs_lrg_variant'] == 'LRG_1303:g.14016del' + self.assertCountEqual(results['NM_005247.2:c.616del']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003571046.1:g.10396del', 'vcf': {'chr': 'HG536_PATCH', 'pos': '10391', 'ref': 'AC', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003571046.1:g.10396del', 'vcf': {'chr': 'NW_003571046.1', 'pos': '10391', 'ref': 'AC', 'alt': 'A'}}}]) + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625181del', 'vcf': {'chr': 'chr11', 'pos': '69625176', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810413del', 'vcf': {'chr': 'chr11', 'pos': '69810408', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.69625181del', 'vcf': {'chr': '11', 'pos': '69625176', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.69810413del', 'vcf': {'chr': '11', 'pos': '69810408', 'ref': 'AC', 'alt': 'A'}} + assert results['NM_005247.2:c.616del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_005247.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_005238.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009016.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_1303.xml'} def test_variant314(self): variant = 'HG865_PATCH-33547-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NR_110766.1:n.833+969C>T' in list(results.keys()) - assert results['NR_110766.1:n.833+969C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NR_110766.1:n.833+969C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NR_110766.1:n.833+969C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}]) - assert results['NR_110766.1:n.833+969C>T']['gene_symbol'] == 'SHANK2' - assert results['NR_110766.1:n.833+969C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} assert results['NR_110766.1:n.833+969C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' - assert results['NR_110766.1:n.833+969C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NR_110766.1):c.833+969C>T' - assert results['NR_110766.1:n.833+969C>T']['hgvs_lrg_variant'] == '' + assert results['NR_110766.1:n.833+969C>T']['gene_symbol'] == 'SHANK2' + assert results['NR_110766.1:n.833+969C>T']['gene_ids'] == {'hgnc_id': 'HGNC:14295', 'entrez_gene_id': '22941', 'ucsc_id': 'uc058etp.1', 'omim_id': ['603290']} assert results['NR_110766.1:n.833+969C>T']['hgvs_transcript_variant'] == 'NR_110766.1:n.833+969C>T' + assert results['NR_110766.1:n.833+969C>T']['genome_context_intronic_sequence'] == 'NC_000011.9(NR_110766.1):c.833+969C>T' + assert results['NR_110766.1:n.833+969C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NR_110766.1:n.833+969C>T']['hgvs_refseqgene_variant'] == '' - assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} - assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} - assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} - assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} + assert results['NR_110766.1:n.833+969C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'Non-coding :n.', 'slr': 'Non-coding :n.'} + assert results['NR_110766.1:n.833+969C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NR_110766.1:n.833+969C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NR_110766.1:n.833+969C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'pos': '33547', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'pos': '33547', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'pos': '70335439', 'ref': 'G', 'alt': 'A'}} + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'pos': '70489334', 'ref': 'G', 'alt': 'A'}} + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'pos': '70335439', 'ref': 'G', 'alt': 'A'}} + assert results['NR_110766.1:n.833+969C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'pos': '70489334', 'ref': 'G', 'alt': 'A'}} assert results['NR_110766.1:n.833+969C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NR_110766.1'} assert 'NM_012309.4:c.2566C>T' in list(results.keys()) - assert results['NM_012309.4:c.2566C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_012309.4:c.2566C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_012309.4:c.2566C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}]) - assert results['NM_012309.4:c.2566C>T']['gene_symbol'] == 'SHANK2' - assert results['NM_012309.4:c.2566C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.(Leu856=)', 'slr': 'NP_036441.2:p.(L856=)'} assert results['NM_012309.4:c.2566C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' - assert results['NM_012309.4:c.2566C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_012309.4:c.2566C>T']['hgvs_lrg_variant'] == '' - assert results['NM_012309.4:c.2566C>T']['hgvs_transcript_variant'] == 'NM_012309.4:c.2566C>T' - assert results['NM_012309.4:c.2566C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70336423G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70336423', 'alt': 'A'}} - assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} - assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70336423G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70336423', 'alt': 'A'}} - assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} - assert results['NM_012309.4:c.2566C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} - - assert 'NM_133266.4:c.802C>T' in list(results.keys()) - assert results['NM_133266.4:c.802C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_133266.4:c.802C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_133266.4:c.802C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}]) - assert results['NM_133266.4:c.802C>T']['gene_symbol'] == 'SHANK2' - assert results['NM_133266.4:c.802C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_573573.2:p.(Leu268=)', 'slr': 'NP_573573.2:p.(L268=)'} - assert results['NM_133266.4:c.802C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' - assert results['NM_133266.4:c.802C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_133266.4:c.802C>T']['hgvs_lrg_variant'] == '' - assert results['NM_133266.4:c.802C>T']['hgvs_transcript_variant'] == 'NM_133266.4:c.802C>T' - assert results['NM_133266.4:c.802C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} - assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} - assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} - assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70489334', 'alt': 'A'}} - assert results['NM_133266.4:c.802C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.4'} + assert results['NM_012309.4:c.2566C>T']['gene_symbol'] == 'SHANK2' + assert results['NM_012309.4:c.2566C>T']['gene_ids'] == {'hgnc_id': 'HGNC:14295', 'entrez_gene_id': '22941', 'ucsc_id': 'uc058etp.1', 'omim_id': ['603290']} + assert results['NM_012309.4:c.2566C>T']['hgvs_transcript_variant'] == 'NM_012309.4:c.2566C>T' + assert results['NM_012309.4:c.2566C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_012309.4:c.2566C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_012309.4:c.2566C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_012309.4:c.2566C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.(Leu856=)', 'slr': 'NP_036441.2:p.(L856=)'} + assert results['NM_012309.4:c.2566C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_012309.4:c.2566C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_012309.4:c.2566C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'pos': '33547', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'pos': '33547', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70336423G>A', 'vcf': {'chr': 'chr11', 'pos': '70336423', 'ref': 'G', 'alt': 'A'}} + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'pos': '70489334', 'ref': 'G', 'alt': 'A'}} + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70336423G>A', 'vcf': {'chr': '11', 'pos': '70336423', 'ref': 'G', 'alt': 'A'}} + assert results['NM_012309.4:c.2566C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'pos': '70489334', 'ref': 'G', 'alt': 'A'}} + assert results['NM_012309.4:c.2566C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2'} - assert results['flag'] == 'gene_variant' assert 'NM_133266.3:c.802C>T' in list(results.keys()) - assert results['NM_133266.3:c.802C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_133266.3:c.802C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_133266.3:c.802C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '33547', 'alt': 'A'}}}]) - assert results['NM_133266.3:c.802C>T']['gene_symbol'] == 'SHANK2' - assert results['NM_133266.3:c.802C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_573573.2:p.(Leu268=)', 'slr': 'NP_573573.2:p.(L268=)'} assert results['NM_133266.3:c.802C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' - assert results['NM_133266.3:c.802C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_133266.3:c.802C>T']['hgvs_lrg_variant'] == '' + assert results['NM_133266.3:c.802C>T']['gene_symbol'] == 'SHANK2' + assert results['NM_133266.3:c.802C>T']['gene_ids'] == {'hgnc_id': 'HGNC:14295', 'entrez_gene_id': '22941', 'ucsc_id': 'uc058etp.1', 'omim_id': ['603290']} assert results['NM_133266.3:c.802C>T']['hgvs_transcript_variant'] == 'NM_133266.3:c.802C>T' + assert results['NM_133266.3:c.802C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_133266.3:c.802C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_133266.3:c.802C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_133266.3:c.802C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} + assert results['NM_133266.3:c.802C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_573573.2:p.(Leu268=)', 'slr': 'NP_573573.2:p.(L268=)'} + assert results['NM_133266.3:c.802C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133266.3:c.802C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_133266.3:c.802C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'pos': '33547', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'pos': '33547', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_133266.3:c.802C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'pos': '70335439', 'ref': 'G', 'alt': 'A'}} assert 'hg38' not in list(results['NM_133266.3:c.802C>T']['primary_assembly_loci'].keys()) - assert results['NM_133266.3:c.802C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '70335439', 'alt': 'A'}} + assert results['NM_133266.3:c.802C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'pos': '70335439', 'ref': 'G', 'alt': 'A'}} assert 'grch38' not in list(results['NM_133266.3:c.802C>T']['primary_assembly_loci'].keys()) - assert results['NM_133266.3:c.802C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.3'} + assert results['NM_133266.3:c.802C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2'} + assert 'NM_133266.4:c.802C>T' in list(results.keys()) + assert results['NM_133266.4:c.802C>T']['submitted_variant'] == 'HG865_PATCH-33547-G-A' + assert results['NM_133266.4:c.802C>T']['gene_symbol'] == 'SHANK2' + assert results['NM_133266.4:c.802C>T']['gene_ids'] == {'hgnc_id': 'HGNC:14295', 'entrez_gene_id': '22941', 'ucsc_id': 'uc058etp.1', 'omim_id': ['603290']} + assert results['NM_133266.4:c.802C>T']['hgvs_transcript_variant'] == 'NM_133266.4:c.802C>T' + assert results['NM_133266.4:c.802C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_133266.4:c.802C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_133266.4:c.802C>T']['hgvs_refseqgene_variant'] == '' + assert results['NM_133266.4:c.802C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_573573.2:p.(Leu268=)', 'slr': 'NP_573573.2:p.(L268=)'} + assert results['NM_133266.4:c.802C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_133266.4:c.802C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_133266.4:c.802C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'HG865_PATCH', 'pos': '33547', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.33547G>A', 'vcf': {'chr': 'NW_004070871.1', 'pos': '33547', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': 'chr11', 'pos': '70335439', 'ref': 'G', 'alt': 'A'}} + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': 'chr11', 'pos': '70489334', 'ref': 'G', 'alt': 'A'}} + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000011.9:g.70335439G>A', 'vcf': {'chr': '11', 'pos': '70335439', 'ref': 'G', 'alt': 'A'}} + assert results['NM_133266.4:c.802C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.70489334G>A', 'vcf': {'chr': '11', 'pos': '70489334', 'ref': 'G', 'alt': 'A'}} + assert results['NM_133266.4:c.802C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_133266.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_573573.2'} def test_variant315(self): variant = 'HG865_PATCH-569441-G-T' @@ -17442,22 +18024,22 @@ def test_variant315(self): assert results['flag'] == 'gene_variant' assert 'NM_012309.4:c.960C>A' in list(results.keys()) - assert results['NM_012309.4:c.960C>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_012309.4:c.960C>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_012309.4:c.960C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'G', 'pos': '569441', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'G', 'pos': '569441', 'alt': 'T'}}}]) - assert results['NM_012309.4:c.960C>A']['gene_symbol'] == 'SHANK2' - assert results['NM_012309.4:c.960C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.(Tyr320Ter)', 'slr': 'NP_036441.2:p.(Y320*)'} assert results['NM_012309.4:c.960C>A']['submitted_variant'] == 'HG865_PATCH-569441-G-T' - assert results['NM_012309.4:c.960C>A']['genome_context_intronic_sequence'] == '' - assert results['NM_012309.4:c.960C>A']['hgvs_lrg_variant'] == '' + assert results['NM_012309.4:c.960C>A']['gene_symbol'] == 'SHANK2' + assert results['NM_012309.4:c.960C>A']['gene_ids'] == {'hgnc_id': 'HGNC:14295', 'entrez_gene_id': '22941', 'ucsc_id': 'uc058etp.1', 'omim_id': ['603290']} assert results['NM_012309.4:c.960C>A']['hgvs_transcript_variant'] == 'NM_012309.4:c.960C>A' + assert results['NM_012309.4:c.960C>A']['genome_context_intronic_sequence'] == '' + assert results['NM_012309.4:c.960C>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_012309.4:c.960C>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_012309.4:c.960C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.(Tyr320Ter)', 'slr': 'NP_036441.2:p.(Y320*)'} + assert results['NM_012309.4:c.960C>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_012309.4:c.960C>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_012309.4:c.960C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'HG865_PATCH', 'pos': '569441', 'ref': 'G', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.569441G>T', 'vcf': {'chr': 'NW_004070871.1', 'pos': '569441', 'ref': 'G', 'alt': 'T'}}}]) assert 'hg19' not in list(results['NM_012309.4:c.960C>A']['primary_assembly_loci'].keys()) - assert results['NM_012309.4:c.960C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71075228G>T', 'vcf': {'chr': 'chr11', 'ref': 'G', 'pos': '71075228', 'alt': 'T'}} + assert results['NM_012309.4:c.960C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71075228G>T', 'vcf': {'chr': 'chr11', 'pos': '71075228', 'ref': 'G', 'alt': 'T'}} assert 'grch37' not in list(results['NM_012309.4:c.960C>A']['primary_assembly_loci'].keys()) - assert results['NM_012309.4:c.960C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71075228G>T', 'vcf': {'chr': '11', 'ref': 'G', 'pos': '71075228', 'alt': 'T'}} - assert results['NM_012309.4:c.960C>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} - + assert results['NM_012309.4:c.960C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71075228G>T', 'vcf': {'chr': '11', 'pos': '71075228', 'ref': 'G', 'alt': 'T'}} + assert results['NM_012309.4:c.960C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2'} def test_variant316(self): variant = 'HG865_PATCH-574546-C-T' @@ -17466,22 +18048,22 @@ def test_variant316(self): assert results['flag'] == 'gene_variant' assert 'NM_012309.4:c.913-5058G>A' in list(results.keys()) - assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_012309.4:c.913-5058G>A']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'ref': 'C', 'pos': '574546', 'alt': 'T'}}}]) - assert results['NM_012309.4:c.913-5058G>A']['gene_symbol'] == 'SHANK2' - assert results['NM_012309.4:c.913-5058G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.?', 'slr': 'NP_036441.2:p.?'} assert results['NM_012309.4:c.913-5058G>A']['submitted_variant'] == 'HG865_PATCH-574546-C-T' - assert results['NM_012309.4:c.913-5058G>A']['genome_context_intronic_sequence'] == 'NC_000011.10(NM_012309.4):c.913-5058G>A' - assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_variant'] == '' + assert results['NM_012309.4:c.913-5058G>A']['gene_symbol'] == 'SHANK2' + assert results['NM_012309.4:c.913-5058G>A']['gene_ids'] == {'hgnc_id': 'HGNC:14295', 'entrez_gene_id': '22941', 'ucsc_id': 'uc058etp.1', 'omim_id': ['603290']} assert results['NM_012309.4:c.913-5058G>A']['hgvs_transcript_variant'] == 'NM_012309.4:c.913-5058G>A' + assert results['NM_012309.4:c.913-5058G>A']['genome_context_intronic_sequence'] == 'NC_000011.10(NM_012309.4):c.913-5058G>A' + assert results['NM_012309.4:c.913-5058G>A']['refseqgene_context_intronic_sequence'] == '' assert results['NM_012309.4:c.913-5058G>A']['hgvs_refseqgene_variant'] == '' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_036441.2:p.?', 'slr': 'NP_036441.2:p.?'} + assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_012309.4:c.913-5058G>A']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_012309.4:c.913-5058G>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'HG865_PATCH', 'pos': '574546', 'ref': 'C', 'alt': 'T'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070871.1:g.574546C>T', 'vcf': {'chr': 'NW_004070871.1', 'pos': '574546', 'ref': 'C', 'alt': 'T'}}}]) assert 'hg19' not in list(results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys()) - assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': 'chr11', 'ref': 'C', 'pos': '71080333', 'alt': 'T'}} + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': 'chr11', 'pos': '71080333', 'ref': 'C', 'alt': 'T'}} assert 'grch37' not in list(results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci'].keys()) - assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': '11', 'ref': 'C', 'pos': '71080333', 'alt': 'T'}} - assert results['NM_012309.4:c.913-5058G>A']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4'} - + assert results['NM_012309.4:c.913-5058G>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000011.10:g.71080333C>T', 'vcf': {'chr': '11', 'pos': '71080333', 'ref': 'C', 'alt': 'T'}} + assert results['NM_012309.4:c.913-5058G>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_012309.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_036441.2'} def test_variant317(self): variant = 'HSCHR1_1_CTG31-133178-TAG-T' @@ -17490,22 +18072,22 @@ def test_variant317(self): assert results['flag'] == 'gene_variant' assert 'NM_020699.2:c.802_803insTT' in list(results.keys()) - assert results['NM_020699.2:c.802_803insTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_020699.2:c.802_803insTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_020699.2:c.802_803insTT']['alt_genomic_loci'], []) - assert results['NM_020699.2:c.802_803insTT']['gene_symbol'] == 'GATAD2B' - assert results['NM_020699.2:c.802_803insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Pro268LeufsTer26)', 'slr': 'NP_065750.1:p.(P268Lfs*26)'} assert results['NM_020699.2:c.802_803insTT']['submitted_variant'] == 'HSCHR1_1_CTG31-133178-TAG-T' - assert results['NM_020699.2:c.802_803insTT']['genome_context_intronic_sequence'] == '' - assert results['NM_020699.2:c.802_803insTT']['hgvs_lrg_variant'] == '' + assert results['NM_020699.2:c.802_803insTT']['gene_symbol'] == 'GATAD2B' + assert results['NM_020699.2:c.802_803insTT']['gene_ids'] == {'hgnc_id': 'HGNC:30778', 'entrez_gene_id': '57459', 'ucsc_id': 'uc001fdb.5', 'omim_id': ['614998']} assert results['NM_020699.2:c.802_803insTT']['hgvs_transcript_variant'] == 'NM_020699.2:c.802_803insTT' + assert results['NM_020699.2:c.802_803insTT']['genome_context_intronic_sequence'] == '' + assert results['NM_020699.2:c.802_803insTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_020699.2:c.802_803insTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946insAA', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '153789945', 'alt': 'GAA'}} - assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': 'chr1', 'ref': 'G', 'pos': '153817469', 'alt': 'GAA'}} - assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946insAA', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '153789945', 'alt': 'GAA'}} - assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': '1', 'ref': 'G', 'pos': '153817469', 'alt': 'GAA'}} - assert results['NM_020699.2:c.802_803insTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2'} - + assert results['NM_020699.2:c.802_803insTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_065750.1:p.(Pro268LeufsTer26)', 'slr': 'NP_065750.1:p.(P268Lfs*26)'} + assert results['NM_020699.2:c.802_803insTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_020699.2:c.802_803insTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_020699.2:c.802_803insTT']['alt_genomic_loci'], []) + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946insAA', 'vcf': {'chr': 'chr1', 'pos': '153789945', 'ref': 'G', 'alt': 'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': 'chr1', 'pos': '153817469', 'ref': 'G', 'alt': 'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000001.10:g.153789945_153789946insAA', 'vcf': {'chr': '1', 'pos': '153789945', 'ref': 'G', 'alt': 'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000001.11:g.153817469_153817470insAA', 'vcf': {'chr': '1', 'pos': '153817469', 'ref': 'G', 'alt': 'GAA'}} + assert results['NM_020699.2:c.802_803insTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_020699.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_065750.1'} def test_variant318(self): variant = 'HSCHR6_MHC_MANN_CTG1-3848158-T-G' @@ -17514,22 +18096,22 @@ def test_variant318(self): assert results['flag'] == 'gene_variant' assert 'NM_021983.4:c.490G>C' in list(results.keys()) - assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021983.4:c.490G>C']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'T', 'pos': '3848158', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'T', 'pos': '3842538', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3884432', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3852542', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3853244', 'alt': 'G'}}}]) - assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' - assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} assert results['NM_021983.4:c.490G>C']['submitted_variant'] == 'HSCHR6_MHC_MANN_CTG1-3848158-T-G' - assert results['NM_021983.4:c.490G>C']['genome_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' + assert results['NM_021983.4:c.490G>C']['gene_symbol'] == 'HLA-DRB4' + assert results['NM_021983.4:c.490G>C']['gene_ids'] == {'hgnc_id': 'HGNC:4952', 'entrez_gene_id': '3126', 'ucsc_id': 'uc011jsg.3', 'omim_id': []} assert results['NM_021983.4:c.490G>C']['hgvs_transcript_variant'] == 'NM_021983.4:c.490G>C' + assert results['NM_021983.4:c.490G>C']['genome_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.490G>C']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021983.4:c.490G>C']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.5724C>G' + assert results['NM_021983.4:c.490G>C']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Gly164Arg)', 'slr': 'NP_068818.4:p.(G164R)'} + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021983.4:c.490G>C']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021983.4:c.490G>C']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3853244', 'ref': 'C', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3853244C>G', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'pos': '3853244', 'ref': 'C', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3852542', 'ref': 'C', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3852542C>G', 'vcf': {'chr': 'chr6_ssto_hap7', 'pos': '3852542', 'ref': 'C', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3884432', 'ref': 'C', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3884432C>G', 'vcf': {'chr': 'chr6_mcf_hap5', 'pos': '3884432', 'ref': 'C', 'alt': 'G'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'pos': '3842538', 'ref': 'T', 'alt': 'G'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3842538T>G', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'pos': '3842538', 'ref': 'T', 'alt': 'G'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'pos': '3848158', 'ref': 'T', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3848158T>G', 'vcf': {'chr': 'chr6_mann_hap4', 'pos': '3848158', 'ref': 'T', 'alt': 'G'}}}]) assert 'hg19' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['NM_021983.4:c.490G>C']['primary_assembly_loci'].keys()) - assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} - + assert results['NM_021983.4:c.490G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1'} def test_variant319(self): variant = 'HSCHR6_MHC_MANN_CTG1-3851043-C-A' @@ -17538,22 +18120,22 @@ def test_variant319(self): assert results['flag'] == 'gene_variant' assert 'NM_021983.4:c.346G>T' in list(results.keys()) - assert results['NM_021983.4:c.346G>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_021983.4:c.346G>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_021983.4:c.346G>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'C', 'pos': '3851043', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'chr6_mann_hap4', 'ref': 'C', 'pos': '3851043', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'ref': 'C', 'pos': '3845423', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'ref': 'C', 'pos': '3845423', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'ref': 'C', 'pos': '3887313', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'chr6_mcf_hap5', 'ref': 'C', 'pos': '3887313', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3855423', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'chr6_ssto_hap7', 'ref': 'C', 'pos': '3855423', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'ref': 'C', 'pos': '3856125', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'ref': 'C', 'pos': '3856125', 'alt': 'A'}}}]) - assert results['NM_021983.4:c.346G>T']['gene_symbol'] == 'HLA-DRB4' - assert results['NM_021983.4:c.346G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Glu116Ter)', 'slr': 'NP_068818.4:p.(E116*)'} assert results['NM_021983.4:c.346G>T']['submitted_variant'] == 'HSCHR6_MHC_MANN_CTG1-3851043-C-A' - assert results['NM_021983.4:c.346G>T']['genome_context_intronic_sequence'] == '' - assert results['NM_021983.4:c.346G>T']['hgvs_lrg_variant'] == '' + assert results['NM_021983.4:c.346G>T']['gene_symbol'] == 'HLA-DRB4' + assert results['NM_021983.4:c.346G>T']['gene_ids'] == {'hgnc_id': 'HGNC:4952', 'entrez_gene_id': '3126', 'ucsc_id': 'uc011jsg.3', 'omim_id': []} assert results['NM_021983.4:c.346G>T']['hgvs_transcript_variant'] == 'NM_021983.4:c.346G>T' + assert results['NM_021983.4:c.346G>T']['genome_context_intronic_sequence'] == '' + assert results['NM_021983.4:c.346G>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_021983.4:c.346G>T']['hgvs_refseqgene_variant'] == 'NG_002433.1:g.8605C>A' + assert results['NM_021983.4:c.346G>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_068818.4:p.(Glu116Ter)', 'slr': 'NP_068818.4:p.(E116*)'} + assert results['NM_021983.4:c.346G>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_021983.4:c.346G>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_021983.4:c.346G>T']['alt_genomic_loci'], [{'grch38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3856125', 'ref': 'C', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167249.2:g.3856125C>A', 'vcf': {'chr': 'chr6_GL000256v2_alt', 'pos': '3856125', 'ref': 'C', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'HSCHR6_MHC_SSTO_CTG1', 'pos': '3855423', 'ref': 'C', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167249.1:g.3855423C>A', 'vcf': {'chr': 'chr6_ssto_hap7', 'pos': '3855423', 'ref': 'C', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'HSCHR6_MHC_MCF_CTG1', 'pos': '3887313', 'ref': 'C', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167247.1:g.3887313C>A', 'vcf': {'chr': 'chr6_mcf_hap5', 'pos': '3887313', 'ref': 'C', 'alt': 'A'}}}, {'grch38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'pos': '3845423', 'ref': 'C', 'alt': 'A'}}}, {'hg38': {'hgvs_genomic_description': 'NT_167246.2:g.3845423C>A', 'vcf': {'chr': 'chr6_GL000253v2_alt', 'pos': '3845423', 'ref': 'C', 'alt': 'A'}}}, {'grch37': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'HSCHR6_MHC_MANN_CTG1', 'pos': '3851043', 'ref': 'C', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NT_167246.1:g.3851043C>A', 'vcf': {'chr': 'chr6_mann_hap4', 'pos': '3851043', 'ref': 'C', 'alt': 'A'}}}]) assert 'hg19' not in list(results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys()) assert 'hg38' not in list(results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys()) assert 'grch37' not in list(results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys()) assert 'grch38' not in list(results['NM_021983.4:c.346G>T']['primary_assembly_loci'].keys()) - assert results['NM_021983.4:c.346G>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4'} - + assert results['NM_021983.4:c.346G>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_021983.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_068818.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_002433.1'} def test_variant320(self): variant = 'X-70443101-C-T' @@ -17561,173 +18143,179 @@ def test_variant320(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001097642.2:c.-16-441C>T' in list(results.keys()) - assert results['NM_001097642.2:c.-16-441C>T']['hgvs_lrg_transcript_variant'] == 'LRG_245t1:c.-16-441C>T' - assert results['NM_001097642.2:c.-16-441C>T']['refseqgene_context_intronic_sequence'] == 'NG_008357.1(NM_001097642.2):c.-16-441C>T' - self.assertCountEqual(results['NM_001097642.2:c.-16-441C>T']['alt_genomic_loci'], []) - assert results['NM_001097642.2:c.-16-441C>T']['gene_symbol'] == 'GJB1' - assert results['NM_001097642.2:c.-16-441C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001091111.1(LRG_245p1):p.?', 'slr': 'NP_001091111.1:p.?'} - assert results['NM_001097642.2:c.-16-441C>T']['submitted_variant'] == 'X-70443101-C-T' - assert results['NM_001097642.2:c.-16-441C>T']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_001097642.2):c.-16-441C>T' - assert results['NM_001097642.2:c.-16-441C>T']['hgvs_lrg_variant'] == 'LRG_245:g.13040C>T' - assert results['NM_001097642.2:c.-16-441C>T']['hgvs_transcript_variant'] == 'NM_001097642.2:c.-16-441C>T' - assert results['NM_001097642.2:c.-16-441C>T']['hgvs_refseqgene_variant'] == 'NG_008357.1:g.13040C>T' - assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '70443101', 'alt': 'T'}} - assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '71223251', 'alt': 'T'}} - assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '70443101', 'alt': 'T'}} - assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '71223251', 'alt': 'T'}} - assert results['NM_001097642.2:c.-16-441C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008357.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001091111.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001097642.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_245.xml'} - assert 'NM_000166.5:c.-101C>T' in list(results.keys()) - assert results['NM_000166.5:c.-101C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000166.5:c.-101C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000166.5:c.-101C>T']['alt_genomic_loci'], []) - assert results['NM_000166.5:c.-101C>T']['gene_symbol'] == 'GJB1' - assert results['NM_000166.5:c.-101C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000157.1(LRG_245p2):p.?', 'slr': 'NP_000157.1:p.?'} assert results['NM_000166.5:c.-101C>T']['submitted_variant'] == 'X-70443101-C-T' - assert results['NM_000166.5:c.-101C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_000166.5:c.-101C>T']['hgvs_lrg_variant'] == '' + assert results['NM_000166.5:c.-101C>T']['gene_symbol'] == 'GJB1' + assert results['NM_000166.5:c.-101C>T']['gene_ids'] == {'hgnc_id': 'HGNC:4283', 'entrez_gene_id': '2705', 'ucsc_id': 'uc004dzf.4', 'omim_id': ['304040']} assert results['NM_000166.5:c.-101C>T']['hgvs_transcript_variant'] == 'NM_000166.5:c.-101C>T' + assert results['NM_000166.5:c.-101C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_000166.5:c.-101C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000166.5:c.-101C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '70443101', 'alt': 'T'}} - assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '71223251', 'alt': 'T'}} - assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '70443101', 'alt': 'T'}} - assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '71223251', 'alt': 'T'}} - assert results['NM_000166.5:c.-101C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000157.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000166.5'} + assert results['NM_000166.5:c.-101C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000157.1(LRG_245p2):p.?', 'slr': 'NP_000157.1:p.?'} + assert results['NM_000166.5:c.-101C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000166.5:c.-101C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000166.5:c.-101C>T']['alt_genomic_loci'], []) + assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'chrX', 'pos': '70443101', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'chrX', 'pos': '71223251', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'X', 'pos': '70443101', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000166.5:c.-101C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'X', 'pos': '71223251', 'ref': 'C', 'alt': 'T'}} + assert results['NM_000166.5:c.-101C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000166.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000157.1'} + assert 'NM_001097642.2:c.-16-441C>T' in list(results.keys()) + assert results['NM_001097642.2:c.-16-441C>T']['submitted_variant'] == 'X-70443101-C-T' + assert results['NM_001097642.2:c.-16-441C>T']['gene_symbol'] == 'GJB1' + assert results['NM_001097642.2:c.-16-441C>T']['gene_ids'] == {'hgnc_id': 'HGNC:4283', 'entrez_gene_id': '2705', 'ucsc_id': 'uc004dzf.4', 'omim_id': ['304040']} + assert results['NM_001097642.2:c.-16-441C>T']['hgvs_transcript_variant'] == 'NM_001097642.2:c.-16-441C>T' + assert results['NM_001097642.2:c.-16-441C>T']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_001097642.2):c.-16-441C>T' + assert results['NM_001097642.2:c.-16-441C>T']['refseqgene_context_intronic_sequence'] == 'NG_008357.1(NM_001097642.2):c.-16-441C>T' + assert results['NM_001097642.2:c.-16-441C>T']['hgvs_refseqgene_variant'] == 'NG_008357.1:g.13040C>T' + assert results['NM_001097642.2:c.-16-441C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001091111.1(LRG_245p1):p.?', 'slr': 'NP_001091111.1:p.?'} + assert results['NM_001097642.2:c.-16-441C>T']['hgvs_lrg_transcript_variant'] == 'LRG_245t1:c.-16-441C>T' + assert results['NM_001097642.2:c.-16-441C>T']['hgvs_lrg_variant'] == 'LRG_245:g.13040C>T' + self.assertCountEqual(results['NM_001097642.2:c.-16-441C>T']['alt_genomic_loci'], []) + assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'chrX', 'pos': '70443101', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'chrX', 'pos': '71223251', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.70443101C>T', 'vcf': {'chr': 'X', 'pos': '70443101', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001097642.2:c.-16-441C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.71223251C>T', 'vcf': {'chr': 'X', 'pos': '71223251', 'ref': 'C', 'alt': 'T'}} + assert results['NM_001097642.2:c.-16-441C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001097642.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001091111.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008357.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_245.xml'} def test_variant321(self): variant = 'X-107845202-GACCACC-GACC,G' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) - assert 'NM_033380.2:c.2130_2135del' in list(results.keys()) - assert results['NM_033380.2:c.2130_2135del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_033380.2:c.2130_2135del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_033380.2:c.2130_2135del']['alt_genomic_loci'], []) - assert results['NM_033380.2:c.2130_2135del']['gene_symbol'] == 'COL4A5' - assert results['NM_033380.2:c.2130_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_203699.1(LRG_232p2):p.(Pro711_Pro712del)', 'slr': 'NP_203699.1:p.(P711_P712del)'} - assert results['NM_033380.2:c.2130_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' - assert results['NM_033380.2:c.2130_2135del']['genome_context_intronic_sequence'] == '' - assert results['NM_033380.2:c.2130_2135del']['hgvs_lrg_variant'] == '' - assert results['NM_033380.2:c.2130_2135del']['hgvs_transcript_variant'] == 'NM_033380.2:c.2130_2135del' - assert results['NM_033380.2:c.2130_2135del']['hgvs_refseqgene_variant'] == '' - assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'chrX', 'ref': 'GACCACC', 'pos': '107845202', 'alt': 'G'}} - assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'chrX', 'ref': 'GACCACC', 'pos': '108601972', 'alt': 'G'}} - assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'X', 'ref': 'GACCACC', 'pos': '107845202', 'alt': 'G'}} - assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'X', 'ref': 'GACCACC', 'pos': '108601972', 'alt': 'G'}} - assert results['NM_033380.2:c.2130_2135del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_203699.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033380.2'} - - assert 'NM_000495.4:c.2130_2135del' in list(results.keys()) - assert results['NM_000495.4:c.2130_2135del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000495.4:c.2130_2135del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000495.4:c.2130_2135del']['alt_genomic_loci'], []) - assert results['NM_000495.4:c.2130_2135del']['gene_symbol'] == 'COL4A5' - assert results['NM_000495.4:c.2130_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000486.1(LRG_232p1):p.(Pro711_Pro712del)', 'slr': 'NP_000486.1:p.(P711_P712del)'} - assert results['NM_000495.4:c.2130_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' - assert results['NM_000495.4:c.2130_2135del']['genome_context_intronic_sequence'] == '' - assert results['NM_000495.4:c.2130_2135del']['hgvs_lrg_variant'] == '' - assert results['NM_000495.4:c.2130_2135del']['hgvs_transcript_variant'] == 'NM_000495.4:c.2130_2135del' - assert results['NM_000495.4:c.2130_2135del']['hgvs_refseqgene_variant'] == '' - assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'chrX', 'ref': 'GACCACC', 'pos': '107845202', 'alt': 'G'}} - assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'chrX', 'ref': 'GACCACC', 'pos': '108601972', 'alt': 'G'}} - assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'X', 'ref': 'GACCACC', 'pos': '107845202', 'alt': 'G'}} - assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'X', 'ref': 'GACCACC', 'pos': '108601972', 'alt': 'G'}} - assert results['NM_000495.4:c.2130_2135del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4'} - assert results['flag'] == 'gene_variant' assert 'NM_000495.4:c.2133_2135del' in list(results.keys()) - assert results['NM_000495.4:c.2133_2135del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000495.4:c.2133_2135del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000495.4:c.2133_2135del']['alt_genomic_loci'], []) - assert results['NM_000495.4:c.2133_2135del']['gene_symbol'] == 'COL4A5' - assert results['NM_000495.4:c.2133_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000486.1(LRG_232p1):p.(Pro712del)', 'slr': 'NP_000486.1:p.(P712del)'} assert results['NM_000495.4:c.2133_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' - assert results['NM_000495.4:c.2133_2135del']['genome_context_intronic_sequence'] == '' - assert results['NM_000495.4:c.2133_2135del']['hgvs_lrg_variant'] == '' + assert results['NM_000495.4:c.2133_2135del']['gene_symbol'] == 'COL4A5' + assert results['NM_000495.4:c.2133_2135del']['gene_ids'] == {'hgnc_id': 'HGNC:2207', 'entrez_gene_id': '1287', 'ucsc_id': 'uc065aqj.1', 'omim_id': ['303630']} assert results['NM_000495.4:c.2133_2135del']['hgvs_transcript_variant'] == 'NM_000495.4:c.2133_2135del' + assert results['NM_000495.4:c.2133_2135del']['genome_context_intronic_sequence'] == '' + assert results['NM_000495.4:c.2133_2135del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000495.4:c.2133_2135del']['hgvs_refseqgene_variant'] == '' - assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'chrX', 'ref': 'GACC', 'pos': '107845202', 'alt': 'G'}} - assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'chrX', 'ref': 'GACC', 'pos': '108601972', 'alt': 'G'}} - assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'X', 'ref': 'GACC', 'pos': '107845202', 'alt': 'G'}} - assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'X', 'ref': 'GACC', 'pos': '108601972', 'alt': 'G'}} - assert results['NM_000495.4:c.2133_2135del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4'} + assert results['NM_000495.4:c.2133_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000486.1(LRG_232p1):p.(Pro712del)', 'slr': 'NP_000486.1:p.(P712del)'} + assert results['NM_000495.4:c.2133_2135del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000495.4:c.2133_2135del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000495.4:c.2133_2135del']['alt_genomic_loci'], []) + assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'chrX', 'pos': '107845202', 'ref': 'GACC', 'alt': 'G'}} + assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'chrX', 'pos': '108601972', 'ref': 'GACC', 'alt': 'G'}} + assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'X', 'pos': '107845202', 'ref': 'GACC', 'alt': 'G'}} + assert results['NM_000495.4:c.2133_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'X', 'pos': '108601972', 'ref': 'GACC', 'alt': 'G'}} + assert results['NM_000495.4:c.2133_2135del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1'} assert 'NM_033380.2:c.2133_2135del' in list(results.keys()) - assert results['NM_033380.2:c.2133_2135del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_033380.2:c.2133_2135del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_033380.2:c.2133_2135del']['alt_genomic_loci'], []) - assert results['NM_033380.2:c.2133_2135del']['gene_symbol'] == 'COL4A5' - assert results['NM_033380.2:c.2133_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_203699.1(LRG_232p2):p.(Pro712del)', 'slr': 'NP_203699.1:p.(P712del)'} assert results['NM_033380.2:c.2133_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' - assert results['NM_033380.2:c.2133_2135del']['genome_context_intronic_sequence'] == '' - assert results['NM_033380.2:c.2133_2135del']['hgvs_lrg_variant'] == '' + assert results['NM_033380.2:c.2133_2135del']['gene_symbol'] == 'COL4A5' + assert results['NM_033380.2:c.2133_2135del']['gene_ids'] == {'hgnc_id': 'HGNC:2207', 'entrez_gene_id': '1287', 'ucsc_id': 'uc065aqj.1', 'omim_id': ['303630']} assert results['NM_033380.2:c.2133_2135del']['hgvs_transcript_variant'] == 'NM_033380.2:c.2133_2135del' + assert results['NM_033380.2:c.2133_2135del']['genome_context_intronic_sequence'] == '' + assert results['NM_033380.2:c.2133_2135del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_033380.2:c.2133_2135del']['hgvs_refseqgene_variant'] == '' - assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'chrX', 'ref': 'GACC', 'pos': '107845202', 'alt': 'G'}} - assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'chrX', 'ref': 'GACC', 'pos': '108601972', 'alt': 'G'}} - assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'X', 'ref': 'GACC', 'pos': '107845202', 'alt': 'G'}} - assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'X', 'ref': 'GACC', 'pos': '108601972', 'alt': 'G'}} - assert results['NM_033380.2:c.2133_2135del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_203699.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033380.2'} + assert results['NM_033380.2:c.2133_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_203699.1(LRG_232p2):p.(Pro712del)', 'slr': 'NP_203699.1:p.(P712del)'} + assert results['NM_033380.2:c.2133_2135del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_033380.2:c.2133_2135del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_033380.2:c.2133_2135del']['alt_genomic_loci'], []) + assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'chrX', 'pos': '107845202', 'ref': 'GACC', 'alt': 'G'}} + assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'chrX', 'pos': '108601972', 'ref': 'GACC', 'alt': 'G'}} + assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845206_107845208del', 'vcf': {'chr': 'X', 'pos': '107845202', 'ref': 'GACC', 'alt': 'G'}} + assert results['NM_033380.2:c.2133_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601976_108601978del', 'vcf': {'chr': 'X', 'pos': '108601972', 'ref': 'GACC', 'alt': 'G'}} + assert results['NM_033380.2:c.2133_2135del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033380.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_203699.1'} + + assert 'NM_000495.4:c.2130_2135del' in list(results.keys()) + assert results['NM_000495.4:c.2130_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' + assert results['NM_000495.4:c.2130_2135del']['gene_symbol'] == 'COL4A5' + assert results['NM_000495.4:c.2130_2135del']['gene_ids'] == {'hgnc_id': 'HGNC:2207', 'entrez_gene_id': '1287', 'ucsc_id': 'uc065aqj.1', 'omim_id': ['303630']} + assert results['NM_000495.4:c.2130_2135del']['hgvs_transcript_variant'] == 'NM_000495.4:c.2130_2135del' + assert results['NM_000495.4:c.2130_2135del']['genome_context_intronic_sequence'] == '' + assert results['NM_000495.4:c.2130_2135del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_000495.4:c.2130_2135del']['hgvs_refseqgene_variant'] == '' + assert results['NM_000495.4:c.2130_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000486.1(LRG_232p1):p.(Pro711_Pro712del)', 'slr': 'NP_000486.1:p.(P711_P712del)'} + assert results['NM_000495.4:c.2130_2135del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000495.4:c.2130_2135del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000495.4:c.2130_2135del']['alt_genomic_loci'], []) + assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'chrX', 'pos': '107845202', 'ref': 'GACCACC', 'alt': 'G'}} + assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'chrX', 'pos': '108601972', 'ref': 'GACCACC', 'alt': 'G'}} + assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'X', 'pos': '107845202', 'ref': 'GACCACC', 'alt': 'G'}} + assert results['NM_000495.4:c.2130_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'X', 'pos': '108601972', 'ref': 'GACCACC', 'alt': 'G'}} + assert results['NM_000495.4:c.2130_2135del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000495.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000486.1'} + assert 'NM_033380.2:c.2130_2135del' in list(results.keys()) + assert results['NM_033380.2:c.2130_2135del']['submitted_variant'] == 'X-107845202-GACCACC-GACC,G' + assert results['NM_033380.2:c.2130_2135del']['gene_symbol'] == 'COL4A5' + assert results['NM_033380.2:c.2130_2135del']['gene_ids'] == {'hgnc_id': 'HGNC:2207', 'entrez_gene_id': '1287', 'ucsc_id': 'uc065aqj.1', 'omim_id': ['303630']} + assert results['NM_033380.2:c.2130_2135del']['hgvs_transcript_variant'] == 'NM_033380.2:c.2130_2135del' + assert results['NM_033380.2:c.2130_2135del']['genome_context_intronic_sequence'] == '' + assert results['NM_033380.2:c.2130_2135del']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_033380.2:c.2130_2135del']['hgvs_refseqgene_variant'] == '' + assert results['NM_033380.2:c.2130_2135del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_203699.1(LRG_232p2):p.(Pro711_Pro712del)', 'slr': 'NP_203699.1:p.(P711_P712del)'} + assert results['NM_033380.2:c.2130_2135del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_033380.2:c.2130_2135del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_033380.2:c.2130_2135del']['alt_genomic_loci'], []) + assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'chrX', 'pos': '107845202', 'ref': 'GACCACC', 'alt': 'G'}} + assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'chrX', 'pos': '108601972', 'ref': 'GACCACC', 'alt': 'G'}} + assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.107845203_107845208del', 'vcf': {'chr': 'X', 'pos': '107845202', 'ref': 'GACCACC', 'alt': 'G'}} + assert results['NM_033380.2:c.2130_2135del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.108601973_108601978del', 'vcf': {'chr': 'X', 'pos': '108601972', 'ref': 'GACCACC', 'alt': 'G'}} + assert results['NM_033380.2:c.2130_2135del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_033380.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_203699.1'} def test_variant322(self): variant = 'X-153296777-G-A' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' + assert 'NM_001110792.1:c.538C>T' in list(results.keys()) + assert results['NM_001110792.1:c.538C>T']['submitted_variant'] == 'X-153296777-G-A' + assert results['NM_001110792.1:c.538C>T']['gene_symbol'] == 'MECP2' + assert results['NM_001110792.1:c.538C>T']['gene_ids'] == {'hgnc_id': 'HGNC:6990', 'entrez_gene_id': '4204', 'ucsc_id': 'uc004fjv.3', 'omim_id': ['300005']} + assert results['NM_001110792.1:c.538C>T']['hgvs_transcript_variant'] == 'NM_001110792.1:c.538C>T' + assert results['NM_001110792.1:c.538C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001110792.1:c.538C>T']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001110792.1:c.538C>T']['hgvs_refseqgene_variant'] == 'NG_007107.2:g.110802C>T' + assert results['NM_001110792.1:c.538C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001104262.1(LRG_764p1):p.(Arg180Ter)', 'slr': 'NP_001104262.1:p.(R180*)'} + assert results['NM_001110792.1:c.538C>T']['hgvs_lrg_transcript_variant'] == 'LRG_764t1:c.538C>T' + assert results['NM_001110792.1:c.538C>T']['hgvs_lrg_variant'] == 'LRG_764:g.110802C>T' + self.assertCountEqual(results['NM_001110792.1:c.538C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'pos': '1465305', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'pos': '1465305', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'pos': '153296777', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'pos': '154031326', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'pos': '153296777', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'pos': '154031326', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001110792.1:c.538C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001110792.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001104262.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007107.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_764.xml'} + assert 'NM_004992.3:c.502C>T' in list(results.keys()) - assert results['NM_004992.3:c.502C>T']['hgvs_lrg_transcript_variant'] == 'LRG_764t2:c.502C>T' - assert results['NM_004992.3:c.502C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_004992.3:c.502C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}]) - assert results['NM_004992.3:c.502C>T']['gene_symbol'] == 'MECP2' - assert results['NM_004992.3:c.502C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004983.1(LRG_764p2):p.(Arg168Ter)', 'slr': 'NP_004983.1:p.(R168*)'} assert results['NM_004992.3:c.502C>T']['submitted_variant'] == 'X-153296777-G-A' - assert results['NM_004992.3:c.502C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_004992.3:c.502C>T']['hgvs_lrg_variant'] == 'LRG_764:g.110802C>T' + assert results['NM_004992.3:c.502C>T']['gene_symbol'] == 'MECP2' + assert results['NM_004992.3:c.502C>T']['gene_ids'] == {'hgnc_id': 'HGNC:6990', 'entrez_gene_id': '4204', 'ucsc_id': 'uc004fjv.3', 'omim_id': ['300005']} assert results['NM_004992.3:c.502C>T']['hgvs_transcript_variant'] == 'NM_004992.3:c.502C>T' + assert results['NM_004992.3:c.502C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_004992.3:c.502C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_004992.3:c.502C>T']['hgvs_refseqgene_variant'] == 'NG_007107.2:g.110802C>T' - assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} - assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} - assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} - assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} - assert results['NM_004992.3:c.502C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007107.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004983.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004992.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_764.xml'} + assert results['NM_004992.3:c.502C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_004983.1(LRG_764p2):p.(Arg168Ter)', 'slr': 'NP_004983.1:p.(R168*)'} + assert results['NM_004992.3:c.502C>T']['hgvs_lrg_transcript_variant'] == 'LRG_764t2:c.502C>T' + assert results['NM_004992.3:c.502C>T']['hgvs_lrg_variant'] == 'LRG_764:g.110802C>T' + self.assertCountEqual(results['NM_004992.3:c.502C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'pos': '1465305', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'pos': '1465305', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'pos': '153296777', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'pos': '154031326', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'pos': '153296777', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004992.3:c.502C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'pos': '154031326', 'ref': 'G', 'alt': 'A'}} + assert results['NM_004992.3:c.502C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_004992.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_004983.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007107.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_764.xml'} - assert results['flag'] == 'gene_variant' assert 'NM_001316337.1:c.223C>T' in list(results.keys()) - assert results['NM_001316337.1:c.223C>T']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001316337.1:c.223C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001316337.1:c.223C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}]) - assert results['NM_001316337.1:c.223C>T']['gene_symbol'] == 'MECP2' - assert results['NM_001316337.1:c.223C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001303266.1:p.(Arg75Ter)', 'slr': 'NP_001303266.1:p.(R75*)'} assert results['NM_001316337.1:c.223C>T']['submitted_variant'] == 'X-153296777-G-A' - assert results['NM_001316337.1:c.223C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001316337.1:c.223C>T']['hgvs_lrg_variant'] == '' + assert results['NM_001316337.1:c.223C>T']['gene_symbol'] == 'MECP2' + assert results['NM_001316337.1:c.223C>T']['gene_ids'] == {'hgnc_id': 'HGNC:6990', 'entrez_gene_id': '4204', 'ucsc_id': 'uc004fjv.3', 'omim_id': ['300005']} assert results['NM_001316337.1:c.223C>T']['hgvs_transcript_variant'] == 'NM_001316337.1:c.223C>T' + assert results['NM_001316337.1:c.223C>T']['genome_context_intronic_sequence'] == '' + assert results['NM_001316337.1:c.223C>T']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001316337.1:c.223C>T']['hgvs_refseqgene_variant'] == '' - assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} - assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} - assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} - assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} - assert results['NM_001316337.1:c.223C>T']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001303266.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001316337.1'} - - assert 'NM_001110792.1:c.538C>T' in list(results.keys()) - assert results['NM_001110792.1:c.538C>T']['hgvs_lrg_transcript_variant'] == 'LRG_764t1:c.538C>T' - assert results['NM_001110792.1:c.538C>T']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001110792.1:c.538C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'ref': 'G', 'pos': '1465305', 'alt': 'A'}}}]) - assert results['NM_001110792.1:c.538C>T']['gene_symbol'] == 'MECP2' - assert results['NM_001110792.1:c.538C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001104262.1(LRG_764p1):p.(Arg180Ter)', 'slr': 'NP_001104262.1:p.(R180*)'} - assert results['NM_001110792.1:c.538C>T']['submitted_variant'] == 'X-153296777-G-A' - assert results['NM_001110792.1:c.538C>T']['genome_context_intronic_sequence'] == '' - assert results['NM_001110792.1:c.538C>T']['hgvs_lrg_variant'] == 'LRG_764:g.110802C>T' - assert results['NM_001110792.1:c.538C>T']['hgvs_transcript_variant'] == 'NM_001110792.1:c.538C>T' - assert results['NM_001110792.1:c.538C>T']['hgvs_refseqgene_variant'] == 'NG_007107.2:g.110802C>T' - assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} - assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} - assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '153296777', 'alt': 'A'}} - assert results['NM_001110792.1:c.538C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'ref': 'G', 'pos': '154031326', 'alt': 'A'}} - assert results['NM_001110792.1:c.538C>T']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007107.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001104262.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001110792.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/pending/LRG_764.xml'} - + assert results['NM_001316337.1:c.223C>T']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001303266.1:p.(Arg75Ter)', 'slr': 'NP_001303266.1:p.(R75*)'} + assert results['NM_001316337.1:c.223C>T']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001316337.1:c.223C>T']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001316337.1:c.223C>T']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'HG1497_PATCH', 'pos': '1465305', 'ref': 'G', 'alt': 'A'}}}, {'hg19': {'hgvs_genomic_description': 'NW_003871103.3:g.1465305G>A', 'vcf': {'chr': 'NW_003871103.3', 'pos': '1465305', 'ref': 'G', 'alt': 'A'}}}]) + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'chrX', 'pos': '153296777', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'chrX', 'pos': '154031326', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.153296777G>A', 'vcf': {'chr': 'X', 'pos': '153296777', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001316337.1:c.223C>T']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.154031326G>A', 'vcf': {'chr': 'X', 'pos': '154031326', 'ref': 'G', 'alt': 'A'}} + assert results['NM_001316337.1:c.223C>T']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001316337.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001303266.1'} def test_variant323(self): variant = 'NM_198180.2:c.408_410delGTG' @@ -17736,70 +18324,70 @@ def test_variant323(self): assert results['flag'] == 'gene_variant' assert 'NM_198180.2:c.408_410del' in list(results.keys()) - assert results['NM_198180.2:c.408_410del']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_198180.2:c.408_410del']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_198180.2:c.408_410del']['alt_genomic_loci'], []) - assert results['NM_198180.2:c.408_410del']['gene_symbol'] == 'QRFP' - assert results['NM_198180.2:c.408_410del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_937823.1:p.?', 'slr': 'NP_937823.1:p.?'} assert results['NM_198180.2:c.408_410del']['submitted_variant'] == 'NM_198180.2:c.408_410delGTG' - assert results['NM_198180.2:c.408_410del']['genome_context_intronic_sequence'] == '' - assert results['NM_198180.2:c.408_410del']['hgvs_lrg_variant'] == '' + assert results['NM_198180.2:c.408_410del']['gene_symbol'] == 'QRFP' + assert results['NM_198180.2:c.408_410del']['gene_ids'] == {'hgnc_id': 'HGNC:29982', 'entrez_gene_id': '347148', 'ucsc_id': 'uc064woc.1', 'omim_id': ['609795']} assert results['NM_198180.2:c.408_410del']['hgvs_transcript_variant'] == 'NM_198180.2:c.408_410del' + assert results['NM_198180.2:c.408_410del']['genome_context_intronic_sequence'] == '' + assert results['NM_198180.2:c.408_410del']['refseqgene_context_intronic_sequence'] == '' assert results['NM_198180.2:c.408_410del']['hgvs_refseqgene_variant'] == '' - assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.133768817_133768819del', 'vcf': {'chr': 'chr9', 'ref': 'TCAC', 'pos': '133768815', 'alt': 'T'}} - assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.130893430_130893432del', 'vcf': {'chr': 'chr9', 'ref': 'TCAC', 'pos': '130893428', 'alt': 'T'}} - assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.133768817_133768819del', 'vcf': {'chr': '9', 'ref': 'TCAC', 'pos': '133768815', 'alt': 'T'}} - assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.130893430_130893432del', 'vcf': {'chr': '9', 'ref': 'TCAC', 'pos': '130893428', 'alt': 'T'}} - assert results['NM_198180.2:c.408_410del']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_937823.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198180.2'} - + assert results['NM_198180.2:c.408_410del']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_937823.1:p.?', 'slr': 'NP_937823.1:p.?'} + assert results['NM_198180.2:c.408_410del']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_198180.2:c.408_410del']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_198180.2:c.408_410del']['alt_genomic_loci'], []) + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.133768817_133768819del', 'vcf': {'chr': 'chr9', 'pos': '133768815', 'ref': 'TCAC', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.130893430_130893432del', 'vcf': {'chr': 'chr9', 'pos': '130893428', 'ref': 'TCAC', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.133768817_133768819del', 'vcf': {'chr': '9', 'pos': '133768815', 'ref': 'TCAC', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.130893430_130893432del', 'vcf': {'chr': '9', 'pos': '130893428', 'ref': 'TCAC', 'alt': 'T'}} + assert results['NM_198180.2:c.408_410del']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_198180.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_937823.1'} def test_variant324(self): variant = 'NM_080877.2:c.1733_1735delinsTTT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_080877.2:c.1733_1735delinsTTT' in list(results.keys()) - assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_080877.2:c.1733_1735delinsTTT']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_080877.2:c.1733_1735delinsTTT']['alt_genomic_loci'], []) - assert results['NM_080877.2:c.1733_1735delinsTTT']['gene_symbol'] == 'SLC34A3' - assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Pro578_Gln598del)', 'slr': 'NP_543153.1:p.(P578_Q598del)'} assert results['NM_080877.2:c.1733_1735delinsTTT']['submitted_variant'] == 'NM_080877.2:c.1733_1735delinsTTT' - assert results['NM_080877.2:c.1733_1735delinsTTT']['genome_context_intronic_sequence'] == '' - assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_lrg_variant'] == '' + assert results['NM_080877.2:c.1733_1735delinsTTT']['gene_symbol'] == 'SLC34A3' + assert results['NM_080877.2:c.1733_1735delinsTTT']['gene_ids'] == {'hgnc_id': 'HGNC:20305', 'entrez_gene_id': '142680', 'ucsc_id': 'uc011met.2', 'omim_id': ['609826']} assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_transcript_variant'] == 'NM_080877.2:c.1733_1735delinsTTT' + assert results['NM_080877.2:c.1733_1735delinsTTT']['genome_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1733_1735delinsTTT']['refseqgene_context_intronic_sequence'] == '' assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_refseqgene_variant'] == '' - assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130801_140130803delinsTTT', 'vcf': {'chr': 'chr9', 'ref': 'CGA', 'pos': '140130801', 'alt': 'TTT'}} - assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236349_137236351delinsTTT', 'vcf': {'chr': 'chr9', 'ref': 'CGA', 'pos': '137236349', 'alt': 'TTT'}} - assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130801_140130803delinsTTT', 'vcf': {'chr': '9', 'ref': 'CGA', 'pos': '140130801', 'alt': 'TTT'}} - assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236349_137236351delinsTTT', 'vcf': {'chr': '9', 'ref': 'CGA', 'pos': '137236349', 'alt': 'TTT'}} - assert results['NM_080877.2:c.1733_1735delinsTTT']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2'} - - assert results['flag'] == 'gene_variant' + assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Pro578_Gln598del)', 'slr': 'NP_543153.1:p.(P578_Q598del)'} + assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080877.2:c.1733_1735delinsTTT']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_080877.2:c.1733_1735delinsTTT']['alt_genomic_loci'], []) + assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130801_140130803delinsTTT', 'vcf': {'chr': 'chr9', 'pos': '140130801', 'ref': 'CGA', 'alt': 'TTT'}} + assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236349_137236351delinsTTT', 'vcf': {'chr': 'chr9', 'pos': '137236349', 'ref': 'CGA', 'alt': 'TTT'}} + assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130801_140130803delinsTTT', 'vcf': {'chr': '9', 'pos': '140130801', 'ref': 'CGA', 'alt': 'TTT'}} + assert results['NM_080877.2:c.1733_1735delinsTTT']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236349_137236351delinsTTT', 'vcf': {'chr': '9', 'pos': '137236349', 'ref': 'CGA', 'alt': 'TTT'}} + assert results['NM_080877.2:c.1733_1735delinsTTT']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1'} def test_variant325(self): variant = 'NM_080877.2:c.1735_1737delinsTGA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_080877.2:c.1735_1737delinsTGA' in list(results.keys()) - assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_080877.2:c.1735_1737delinsTGA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_080877.2:c.1735_1737delinsTGA']['alt_genomic_loci'], []) - assert results['NM_080877.2:c.1735_1737delinsTGA']['gene_symbol'] == 'SLC34A3' - assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579Ter)', 'slr': 'NP_543153.1:p.(K579*)'} assert results['NM_080877.2:c.1735_1737delinsTGA']['submitted_variant'] == 'NM_080877.2:c.1735_1737delinsTGA' - assert results['NM_080877.2:c.1735_1737delinsTGA']['genome_context_intronic_sequence'] == '' - assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_lrg_variant'] == '' + assert results['NM_080877.2:c.1735_1737delinsTGA']['gene_symbol'] == 'SLC34A3' + assert results['NM_080877.2:c.1735_1737delinsTGA']['gene_ids'] == {'hgnc_id': 'HGNC:20305', 'entrez_gene_id': '142680', 'ucsc_id': 'uc011met.2', 'omim_id': ['609826']} assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_transcript_variant'] == 'NM_080877.2:c.1735_1737delinsTGA' + assert results['NM_080877.2:c.1735_1737delinsTGA']['genome_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1735_1737delinsTGA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_refseqgene_variant'] == '' - assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTGA', 'vcf': {'chr': 'chr9', 'ref': 'AAG', 'pos': '140130803', 'alt': 'TGA'}} - assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTGA', 'vcf': {'chr': 'chr9', 'ref': 'AAG', 'pos': '137236351', 'alt': 'TGA'}} - assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTGA', 'vcf': {'chr': '9', 'ref': 'AAG', 'pos': '140130803', 'alt': 'TGA'}} - assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTGA', 'vcf': {'chr': '9', 'ref': 'AAG', 'pos': '137236351', 'alt': 'TGA'}} - assert results['NM_080877.2:c.1735_1737delinsTGA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2'} - - assert results['flag'] == 'gene_variant' + assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579Ter)', 'slr': 'NP_543153.1:p.(K579*)'} + assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080877.2:c.1735_1737delinsTGA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_080877.2:c.1735_1737delinsTGA']['alt_genomic_loci'], []) + assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTGA', 'vcf': {'chr': 'chr9', 'pos': '140130803', 'ref': 'AAG', 'alt': 'TGA'}} + assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTGA', 'vcf': {'chr': 'chr9', 'pos': '137236351', 'ref': 'AAG', 'alt': 'TGA'}} + assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTGA', 'vcf': {'chr': '9', 'pos': '140130803', 'ref': 'AAG', 'alt': 'TGA'}} + assert results['NM_080877.2:c.1735_1737delinsTGA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTGA', 'vcf': {'chr': '9', 'pos': '137236351', 'ref': 'AAG', 'alt': 'TGA'}} + assert results['NM_080877.2:c.1735_1737delinsTGA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1'} def test_variant326(self): variant = 'NM_080877.2:c.1735_1737delinsTAATTGTTC' @@ -17808,22 +18396,22 @@ def test_variant326(self): assert results['flag'] == 'gene_variant' assert 'NM_080877.2:c.1735_1737delinsTAATTGTTC' in list(results.keys()) - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['alt_genomic_loci'], []) - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['gene_symbol'] == 'SLC34A3' - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579Ter)', 'slr': 'NP_543153.1:p.(K579*)'} assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['submitted_variant'] == 'NM_080877.2:c.1735_1737delinsTAATTGTTC' - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['genome_context_intronic_sequence'] == '' - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_lrg_variant'] == '' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['gene_symbol'] == 'SLC34A3' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['gene_ids'] == {'hgnc_id': 'HGNC:20305', 'entrez_gene_id': '142680', 'ucsc_id': 'uc011met.2', 'omim_id': ['609826']} assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_transcript_variant'] == 'NM_080877.2:c.1735_1737delinsTAATTGTTC' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['genome_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_refseqgene_variant'] == '' - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTAATTGTTC', 'vcf': {'chr': 'chr9', 'ref': 'AAG', 'pos': '140130803', 'alt': 'TAATTGTTC'}} - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTAATTGTTC', 'vcf': {'chr': 'chr9', 'ref': 'AAG', 'pos': '137236351', 'alt': 'TAATTGTTC'}} - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTAATTGTTC', 'vcf': {'chr': '9', 'ref': 'AAG', 'pos': '140130803', 'alt': 'TAATTGTTC'}} - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTAATTGTTC', 'vcf': {'chr': '9', 'ref': 'AAG', 'pos': '137236351', 'alt': 'TAATTGTTC'}} - assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2'} - + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579Ter)', 'slr': 'NP_543153.1:p.(K579*)'} + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['alt_genomic_loci'], []) + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTAATTGTTC', 'vcf': {'chr': 'chr9', 'pos': '140130803', 'ref': 'AAG', 'alt': 'TAATTGTTC'}} + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTAATTGTTC', 'vcf': {'chr': 'chr9', 'pos': '137236351', 'ref': 'AAG', 'alt': 'TAATTGTTC'}} + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130803_140130805delinsTAATTGTTC', 'vcf': {'chr': '9', 'pos': '140130803', 'ref': 'AAG', 'alt': 'TAATTGTTC'}} + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236351_137236353delinsTAATTGTTC', 'vcf': {'chr': '9', 'pos': '137236351', 'ref': 'AAG', 'alt': 'TAATTGTTC'}} + assert results['NM_080877.2:c.1735_1737delinsTAATTGTTC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1'} def test_variant327(self): variant = 'NM_080877.2:c.1737delinsATTGTTC' @@ -17832,22 +18420,22 @@ def test_variant327(self): assert results['flag'] == 'gene_variant' assert 'NM_080877.2:c.1737delinsATTGTTC' in list(results.keys()) - assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_080877.2:c.1737delinsATTGTTC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_080877.2:c.1737delinsATTGTTC']['alt_genomic_loci'], []) - assert results['NM_080877.2:c.1737delinsATTGTTC']['gene_symbol'] == 'SLC34A3' - assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579_Ala580insLeuPhe)', 'slr': 'NP_543153.1:p.(K579_A580insLF)'} assert results['NM_080877.2:c.1737delinsATTGTTC']['submitted_variant'] == 'NM_080877.2:c.1737delinsATTGTTC' - assert results['NM_080877.2:c.1737delinsATTGTTC']['genome_context_intronic_sequence'] == '' - assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_lrg_variant'] == '' + assert results['NM_080877.2:c.1737delinsATTGTTC']['gene_symbol'] == 'SLC34A3' + assert results['NM_080877.2:c.1737delinsATTGTTC']['gene_ids'] == {'hgnc_id': 'HGNC:20305', 'entrez_gene_id': '142680', 'ucsc_id': 'uc011met.2', 'omim_id': ['609826']} assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_transcript_variant'] == 'NM_080877.2:c.1737delinsATTGTTC' + assert results['NM_080877.2:c.1737delinsATTGTTC']['genome_context_intronic_sequence'] == '' + assert results['NM_080877.2:c.1737delinsATTGTTC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_refseqgene_variant'] == '' - assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130805delinsATTGTTC', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '140130805', 'alt': 'ATTGTTC'}} - assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236353delinsATTGTTC', 'vcf': {'chr': 'chr9', 'ref': 'G', 'pos': '137236353', 'alt': 'ATTGTTC'}} - assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130805delinsATTGTTC', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '140130805', 'alt': 'ATTGTTC'}} - assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236353delinsATTGTTC', 'vcf': {'chr': '9', 'ref': 'G', 'pos': '137236353', 'alt': 'ATTGTTC'}} - assert results['NM_080877.2:c.1737delinsATTGTTC']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2'} - + assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_543153.1:p.(Lys579_Ala580insLeuPhe)', 'slr': 'NP_543153.1:p.(K579_A580insLF)'} + assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_080877.2:c.1737delinsATTGTTC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_080877.2:c.1737delinsATTGTTC']['alt_genomic_loci'], []) + assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130805delinsATTGTTC', 'vcf': {'chr': 'chr9', 'pos': '140130805', 'ref': 'G', 'alt': 'ATTGTTC'}} + assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236353delinsATTGTTC', 'vcf': {'chr': 'chr9', 'pos': '137236353', 'ref': 'G', 'alt': 'ATTGTTC'}} + assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000009.11:g.140130805delinsATTGTTC', 'vcf': {'chr': '9', 'pos': '140130805', 'ref': 'G', 'alt': 'ATTGTTC'}} + assert results['NM_080877.2:c.1737delinsATTGTTC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000009.12:g.137236353delinsATTGTTC', 'vcf': {'chr': '9', 'pos': '137236353', 'ref': 'G', 'alt': 'ATTGTTC'}} + assert results['NM_080877.2:c.1737delinsATTGTTC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_080877.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_543153.1'} def test_variant328(self): variant = 'NM_000088.3:c.4392_*2delinsAGAG' @@ -17856,22 +18444,22 @@ def test_variant328(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.4392_*2delinsAGAG' in list(results.keys()) - assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4392_*2delinsAGAG' - assert results['NM_000088.3:c.4392_*2delinsAGAG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.4392_*2delinsAGAG']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.4392_*2delinsAGAG']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ter1465GluextTer84)', 'slr': 'NP_000079.2:p.(*1465Eext*84)'} assert results['NM_000088.3:c.4392_*2delinsAGAG']['submitted_variant'] == 'NM_000088.3:c.4392_*2delinsAGAG' - assert results['NM_000088.3:c.4392_*2delinsAGAG']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_lrg_variant'] == 'LRG_1:g.21135_21140delinsAGAG' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_transcript_variant'] == 'NM_000088.3:c.4392_*2delinsAGAG' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.21135_21140delinsAGAG' - assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262861_48262866delinsCTCT', 'vcf': {'chr': 'chr17', 'ref': 'GTTTAC', 'pos': '48262861', 'alt': 'CTCT'}} - assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185500_50185505delinsCTCT', 'vcf': {'chr': 'chr17', 'ref': 'GTTTAC', 'pos': '50185500', 'alt': 'CTCT'}} - assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262861_48262866delinsCTCT', 'vcf': {'chr': '17', 'ref': 'GTTTAC', 'pos': '48262861', 'alt': 'CTCT'}} - assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185500_50185505delinsCTCT', 'vcf': {'chr': '17', 'ref': 'GTTTAC', 'pos': '50185500', 'alt': 'CTCT'}} - assert results['NM_000088.3:c.4392_*2delinsAGAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Ter1465GluextTer84)', 'slr': 'NP_000079.2:p.(*1465Eext*84)'} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.4392_*2delinsAGAG' + assert results['NM_000088.3:c.4392_*2delinsAGAG']['hgvs_lrg_variant'] == 'LRG_1:g.21135_21140delinsAGAG' + self.assertCountEqual(results['NM_000088.3:c.4392_*2delinsAGAG']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262861_48262866delinsCTCT', 'vcf': {'chr': 'chr17', 'pos': '48262861', 'ref': 'GTTTAC', 'alt': 'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185500_50185505delinsCTCT', 'vcf': {'chr': 'chr17', 'pos': '50185500', 'ref': 'GTTTAC', 'alt': 'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48262861_48262866delinsCTCT', 'vcf': {'chr': '17', 'pos': '48262861', 'ref': 'GTTTAC', 'alt': 'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50185500_50185505delinsCTCT', 'vcf': {'chr': '17', 'pos': '50185500', 'ref': 'GTTTAC', 'alt': 'CTCT'}} + assert results['NM_000088.3:c.4392_*2delinsAGAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant329(self): variant = 'NM_000088.3:c.589_591delinsAGAAGC' @@ -17880,70 +18468,70 @@ def test_variant329(self): assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589_591delinsAGAAGC' in list(results.keys()) - assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589_591delinsAGAAGC' - assert results['NM_000088.3:c.589_591delinsAGAAGC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000088.3:c.589_591delinsAGAAGC']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589_591delinsAGAAGC']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197delinsArgSer)', 'slr': 'NP_000079.2:p.(G197delinsRS)'} assert results['NM_000088.3:c.589_591delinsAGAAGC']['submitted_variant'] == 'NM_000088.3:c.589_591delinsAGAAGC' - assert results['NM_000088.3:c.589_591delinsAGAAGC']['genome_context_intronic_sequence'] == '' - assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_lrg_variant'] == 'LRG_1:g.8638_8640delinsAGAAGC' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_transcript_variant'] == 'NM_000088.3:c.589_591delinsAGAAGC' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['genome_context_intronic_sequence'] == '' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8638_8640delinsAGAAGC' - assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275363delinsGCTTCT', 'vcf': {'chr': 'chr17', 'ref': 'ACC', 'pos': '48275361', 'alt': 'GCTTCT'}} - assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198002delinsGCTTCT', 'vcf': {'chr': 'chr17', 'ref': 'ACC', 'pos': '50198000', 'alt': 'GCTTCT'}} - assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275363delinsGCTTCT', 'vcf': {'chr': '17', 'ref': 'ACC', 'pos': '48275361', 'alt': 'GCTTCT'}} - assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198002delinsGCTTCT', 'vcf': {'chr': '17', 'ref': 'ACC', 'pos': '50198000', 'alt': 'GCTTCT'}} - assert results['NM_000088.3:c.589_591delinsAGAAGC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - + assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.(Gly197delinsArgSer)', 'slr': 'NP_000079.2:p.(G197delinsRS)'} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589_591delinsAGAAGC' + assert results['NM_000088.3:c.589_591delinsAGAAGC']['hgvs_lrg_variant'] == 'LRG_1:g.8638_8640delinsAGAAGC' + self.assertCountEqual(results['NM_000088.3:c.589_591delinsAGAAGC']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275363delinsGCTTCT', 'vcf': {'chr': 'chr17', 'pos': '48275361', 'ref': 'ACC', 'alt': 'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198002delinsGCTTCT', 'vcf': {'chr': 'chr17', 'pos': '50198000', 'ref': 'ACC', 'alt': 'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275361_48275363delinsGCTTCT', 'vcf': {'chr': '17', 'pos': '48275361', 'ref': 'ACC', 'alt': 'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198000_50198002delinsGCTTCT', 'vcf': {'chr': '17', 'pos': '50198000', 'ref': 'ACC', 'alt': 'GCTTCT'}} + assert results['NM_000088.3:c.589_591delinsAGAAGC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} def test_variant330(self): variant = 'NM_000885.5:c.*2536delinsAGAAAAATCA' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000885.5:c.*2536delinsAGAAAAATCA' in list(results.keys()) - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000885.5:c.*2536delinsAGAAAAATCA']['alt_genomic_loci'], []) - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['gene_symbol'] == 'ITGA4' - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000876.3:p.?', 'slr': 'NP_000876.3:p.?'} assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['submitted_variant'] == 'NM_000885.5:c.*2536delinsAGAAAAATCA' - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['genome_context_intronic_sequence'] == '' - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_lrg_variant'] == '' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['gene_symbol'] == 'ITGA4' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['gene_ids'] == {'hgnc_id': 'HGNC:6140', 'entrez_gene_id': '3676', 'ucsc_id': 'uc002unu.4', 'omim_id': ['192975']} assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_transcript_variant'] == 'NM_000885.5:c.*2536delinsAGAAAAATCA' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['genome_context_intronic_sequence'] == '' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_refseqgene_variant'] == '' - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.182402790delinsAGAAAAATCA', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '182402790', 'alt': 'AGAAAAATCA'}} - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.181538063delinsAGAAAAATCA', 'vcf': {'chr': 'chr2', 'ref': 'G', 'pos': '181538063', 'alt': 'AGAAAAATCA'}} - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.182402790delinsAGAAAAATCA', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '182402790', 'alt': 'AGAAAAATCA'}} - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.181538063delinsAGAAAAATCA', 'vcf': {'chr': '2', 'ref': 'G', 'pos': '181538063', 'alt': 'AGAAAAATCA'}} - assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000876.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000885.5'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000876.3:p.?', 'slr': 'NP_000876.3:p.?'} + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000885.5:c.*2536delinsAGAAAAATCA']['alt_genomic_loci'], []) + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.182402790delinsAGAAAAATCA', 'vcf': {'chr': 'chr2', 'pos': '182402790', 'ref': 'G', 'alt': 'AGAAAAATCA'}} + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.181538063delinsAGAAAAATCA', 'vcf': {'chr': 'chr2', 'pos': '181538063', 'ref': 'G', 'alt': 'AGAAAAATCA'}} + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.182402790delinsAGAAAAATCA', 'vcf': {'chr': '2', 'pos': '182402790', 'ref': 'G', 'alt': 'AGAAAAATCA'}} + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.181538063delinsAGAAAAATCA', 'vcf': {'chr': '2', 'pos': '181538063', 'ref': 'G', 'alt': 'AGAAAAATCA'}} + assert results['NM_000885.5:c.*2536delinsAGAAAAATCA']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000885.5', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000876.3'} def test_variant331(self): variant = 'NM_002693.2:c.-186_-185delinsCC' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_002693.2:c.-186_-185delinsCC' in list(results.keys()) - assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_lrg_transcript_variant'] == 'LRG_765t1:c.-186_-185delinsCC' - assert results['NM_002693.2:c.-186_-185delinsCC']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_002693.2:c.-186_-185delinsCC']['alt_genomic_loci'], []) - assert results['NM_002693.2:c.-186_-185delinsCC']['gene_symbol'] == 'POLG' - assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002684.1(LRG_765p1):p.?', 'slr': 'NP_002684.1:p.?'} assert results['NM_002693.2:c.-186_-185delinsCC']['submitted_variant'] == 'NM_002693.2:c.-186_-185delinsCC' - assert results['NM_002693.2:c.-186_-185delinsCC']['genome_context_intronic_sequence'] == '' - assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_lrg_variant'] == '' + assert results['NM_002693.2:c.-186_-185delinsCC']['gene_symbol'] == 'POLG' + assert results['NM_002693.2:c.-186_-185delinsCC']['gene_ids'] == {'hgnc_id': 'HGNC:9179', 'entrez_gene_id': '5428', 'ucsc_id': 'uc002bns.5', 'omim_id': ['174763']} assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_transcript_variant'] == 'NM_002693.2:c.-186_-185delinsCC' + assert results['NM_002693.2:c.-186_-185delinsCC']['genome_context_intronic_sequence'] == '' + assert results['NM_002693.2:c.-186_-185delinsCC']['refseqgene_context_intronic_sequence'] == '' assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_refseqgene_variant'] == 'NG_008218.1:g.5097_5098delinsCC' - assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89877929_89877930delinsGG', 'vcf': {'chr': 'chr15', 'ref': 'CT', 'pos': '89877929', 'alt': 'GG'}} - assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89334698_89334699delinsGG', 'vcf': {'chr': 'chr15', 'ref': 'CT', 'pos': '89334698', 'alt': 'GG'}} - assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89877929_89877930delinsGG', 'vcf': {'chr': '15', 'ref': 'CT', 'pos': '89877929', 'alt': 'GG'}} - assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89334698_89334699delinsGG', 'vcf': {'chr': '15', 'ref': 'CT', 'pos': '89334698', 'alt': 'GG'}} - assert results['NM_002693.2:c.-186_-185delinsCC']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008218.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2'} - - assert results['flag'] == 'gene_variant' + assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_002684.1(LRG_765p1):p.?', 'slr': 'NP_002684.1:p.?'} + assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_lrg_transcript_variant'] == 'LRG_765t1:c.-186_-185delinsCC' + assert results['NM_002693.2:c.-186_-185delinsCC']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_002693.2:c.-186_-185delinsCC']['alt_genomic_loci'], []) + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000015.9:g.89877929_89877930delinsGG', 'vcf': {'chr': 'chr15', 'pos': '89877929', 'ref': 'CT', 'alt': 'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89334698_89334699delinsGG', 'vcf': {'chr': 'chr15', 'pos': '89334698', 'ref': 'CT', 'alt': 'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000015.9:g.89877929_89877930delinsGG', 'vcf': {'chr': '15', 'pos': '89877929', 'ref': 'CT', 'alt': 'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000015.10:g.89334698_89334699delinsGG', 'vcf': {'chr': '15', 'pos': '89334698', 'ref': 'CT', 'alt': 'GG'}} + assert results['NM_002693.2:c.-186_-185delinsCC']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_002693.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_002684.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_008218.1'} def test_variant332(self): variant = 'NG_009616.1:g.29052_29053insCTACATAG' @@ -17951,57 +18539,59 @@ def test_variant332(self): print(results) assert results['flag'] == 'gene_variant' - assert 'NM_001287344.1:c.690_690+1insCTACATAG' in list(results.keys()) - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001287344.1:c.690_690+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['gene_symbol'] == 'BTK' - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274273.1:p.?', 'slr': 'NP_001274273.1:p.?'} - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_001287344.1):c.690_690+1insCTACATAG' - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_lrg_variant'] == '' - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_001287344.1:c.690_690+1insCTACATAG' - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_001287344.1:c.690_690+1insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274273.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287344.1'} - assert 'NM_001287345.1:c.588_588+1insCTACATAG' in list(results.keys()) - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_001287345.1:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274274.1:p.?', 'slr': 'NP_001274274.1:p.?'} assert results['NM_001287345.1:c.588_588+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_001287345.1):c.588_588+1insCTACATAG' - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == '' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['gene_ids'] == {'hgnc_id': 'HGNC:1133', 'entrez_gene_id': '695', 'ucsc_id': 'uc004ehg.3', 'omim_id': ['300300']} assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_001287345.1:c.588_588+1insCTACATAG' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_001287345.1):c.588_588+1insCTACATAG' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_001287345.1:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274274.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287345.1'} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274274.1:p.?', 'slr': 'NP_001274274.1:p.?'} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001287345.1:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'pos': '43848', 'ref': 'C', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'pos': '43848', 'ref': 'C', 'alt': 'CCTATGTAG'}}}]) + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'pos': '100617160', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'pos': '101362172', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'pos': '100617160', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'pos': '101362172', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_001287345.1:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287345.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274274.1'} + + assert 'NM_001287344.1:c.690_690+1insCTACATAG' in list(results.keys()) + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['gene_symbol'] == 'BTK' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['gene_ids'] == {'hgnc_id': 'HGNC:1133', 'entrez_gene_id': '695', 'ucsc_id': 'uc004ehg.3', 'omim_id': ['300300']} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_001287344.1:c.690_690+1insCTACATAG' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_001287344.1):c.690_690+1insCTACATAG' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['refseqgene_context_intronic_sequence'] == '' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_refseqgene_variant'] == '' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_001274273.1:p.?', 'slr': 'NP_001274273.1:p.?'} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_lrg_transcript_variant'] == '' + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_001287344.1:c.690_690+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'pos': '43848', 'ref': 'C', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'pos': '43848', 'ref': 'C', 'alt': 'CCTATGTAG'}}}]) + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'pos': '100617160', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'pos': '101362172', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'pos': '100617160', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'pos': '101362172', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_001287344.1:c.690_690+1insCTACATAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_001287344.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_001274273.1'} assert 'NM_000061.2:c.588_588+1insCTACATAG' in list(results.keys()) - assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' - self.assertCountEqual(results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) - assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} assert results['NM_000061.2:c.588_588+1insCTACATAG']['submitted_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == 'LRG_128:g.29052_29053insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_ids'] == {'hgnc_id': 'HGNC:1133', 'entrez_gene_id': '695', 'ucsc_id': 'uc004ehg.3', 'omim_id': ['300300']} assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009616.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_128.xml'} - + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == 'LRG_128:g.29052_29053insCTACATAG' + self.assertCountEqual(results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'pos': '43848', 'ref': 'C', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'pos': '43848', 'ref': 'C', 'alt': 'CCTATGTAG'}}}]) + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'pos': '100617160', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'pos': '101362172', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'pos': '100617160', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'pos': '101362172', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009616.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_128.xml'} def test_variant333(self): variant = 'NM_000061.2:c.588_588+1insCTACATAG' @@ -18010,22 +18600,22 @@ def test_variant333(self): assert results['flag'] == 'gene_variant' assert 'NM_000061.2:c.588_588+1insCTACATAG' in list(results.keys()) - assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' - self.assertCountEqual(results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'ref': 'C', 'pos': '43848', 'alt': 'CCTATGTAG'}}}]) - assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} assert results['NM_000061.2:c.588_588+1insCTACATAG']['submitted_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == 'LRG_128:g.29052_29053insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_symbol'] == 'BTK' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['gene_ids'] == {'hgnc_id': 'HGNC:1133', 'entrez_gene_id': '695', 'ucsc_id': 'uc004ehg.3', 'omim_id': ['300300']} assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['genome_context_intronic_sequence'] == 'NC_000023.10(NM_000061.2):c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['refseqgene_context_intronic_sequence'] == 'NG_009616.1(NM_000061.2):c.588_588+1insCTACATAG' assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_refseqgene_variant'] == 'NG_009616.1:g.29052_29053insCTACATAG' - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '100617160', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'ref': 'C', 'pos': '101362172', 'alt': 'CCTATGTAG'}} - assert results['NM_000061.2:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009616.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_128.xml'} - + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.?', 'slr': 'NP_000052.1:p.?'} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_588+1insCTACATAG' + assert results['NM_000061.2:c.588_588+1insCTACATAG']['hgvs_lrg_variant'] == 'LRG_128:g.29052_29053insCTACATAG' + self.assertCountEqual(results['NM_000061.2:c.588_588+1insCTACATAG']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'HG1439_PATCH', 'pos': '43848', 'ref': 'C', 'alt': 'CCTATGTAG'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004070883.1:g.43850_43851insATGTAGCT', 'vcf': {'chr': 'NW_004070883.1', 'pos': '43848', 'ref': 'C', 'alt': 'CCTATGTAG'}}}]) + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'chrX', 'pos': '100617160', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'chrX', 'pos': '101362172', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100617162_100617163insATGTAGCT', 'vcf': {'chr': 'X', 'pos': '100617160', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000023.11:g.101362174_101362175insATGTAGCT', 'vcf': {'chr': 'X', 'pos': '101362172', 'ref': 'C', 'alt': 'CCTATGTAG'}} + assert results['NM_000061.2:c.588_588+1insCTACATAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_009616.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_128.xml'} def test_variant334(self): variant = 'NM_000061.2:c.588_589insCTACATAG' @@ -18034,68 +18624,67 @@ def test_variant334(self): assert results['flag'] == 'gene_variant' assert 'NM_000061.2:c.588_589insCTACATAG' in list(results.keys()) - assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_589insCTACATAG' - assert results['NM_000061.2:c.588_589insCTACATAG']['refseqgene_context_intronic_sequence'] == '' - self.assertCountEqual(results['NM_000061.2:c.588_589insCTACATAG']['alt_genomic_loci'], []) - assert results['NM_000061.2:c.588_589insCTACATAG']['gene_symbol'] == 'BTK' - assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.(Ile197LeufsTer5)', 'slr': 'NP_000052.1:p.(I197Lfs*5)'} assert results['NM_000061.2:c.588_589insCTACATAG']['submitted_variant'] == 'NM_000061.2:c.588_589insCTACATAG' - assert results['NM_000061.2:c.588_589insCTACATAG']['genome_context_intronic_sequence'] == '' - assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_lrg_variant'] == '' + assert results['NM_000061.2:c.588_589insCTACATAG']['gene_symbol'] == 'BTK' + assert results['NM_000061.2:c.588_589insCTACATAG']['gene_ids'] == {'hgnc_id': 'HGNC:1133', 'entrez_gene_id': '695', 'ucsc_id': 'uc004ehg.3', 'omim_id': ['300300']} assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_transcript_variant'] == 'NM_000061.2:c.588_589insCTACATAG' + assert results['NM_000061.2:c.588_589insCTACATAG']['genome_context_intronic_sequence'] == '' + assert results['NM_000061.2:c.588_589insCTACATAG']['refseqgene_context_intronic_sequence'] == '' assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_refseqgene_variant'] == '' - assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615752_100617160del', 'vcf': {'chr': 'chrX', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'pos': '100615751', 'alt': 'G'}} + assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000052.1(LRG_128p1):p.(Ile197LeufsTer5)', 'slr': 'NP_000052.1:p.(I197Lfs*5)'} + assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_lrg_transcript_variant'] == 'LRG_128t1:c.588_589insCTACATAG' + assert results['NM_000061.2:c.588_589insCTACATAG']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000061.2:c.588_589insCTACATAG']['alt_genomic_loci'], []) + assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615752_100617160del', 'vcf': {'chr': 'chrX', 'pos': '100615751', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'alt': 'G'}} assert 'hg38' not in list(results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci'].keys()) - assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615752_100617160del', 'vcf': {'chr': 'X', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'pos': '100615751', 'alt': 'G'}} + assert results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000023.10:g.100615752_100617160del', 'vcf': {'chr': 'X', 'pos': '100615751', 'ref': 'GTTAGGAGAAAAGGTAGGAGGGTTTGTCAAGATACCAAGCACTCTTCTCTTCTCTCCCAACTCTCTGGCTTACTCAAGACACCCAAATCAGGCATACTAAAATATTACTCAGCAGTCATTCAACAACCATTTTTAAGCACCAGTGCAGGAGTTCTCAGCCTTGCACACATATAAAGACCATGTATGGAACTTTTAAATTCCAATGTACTTTCGGAGGCCAAGGCGGGCGGATCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGACGAAACCCCATCTCTACTAAAAATGCAAAAATCAGCTGGGCATGGTAGTGTGTGCATATAGCCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATAACTTGACCCCAGGAGGCGGAGGTTGTAGTGAGCCAAGACCACGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAAAAACCCACAAAAAACAAAAAACCAATTCCAATACCTAGTCAGTTTCCTCACAGACCAATTACATCAAAATCAAACTCTCAGGAATGGGACCCAAACATTACTATTTTTAAAGCTCACTAGACAAAAACCATTTATAGCTAAGGTCAGGAAACCGGCTTGGCACTAAACTTGTACGTGAATCTACTAAGTGGCTCAGAACCTTGGTTTCCTTCTTTGTAAAATGAGTATAATAATACCTGCTCTACTTACTTACAATATGTGAGAAAGGGCTTTCTAGCCCTCAAGAAGGAACCAAAAAAAAAAAAAAAACTTCTGAAGTGTTAGTGATAGGTGGTGTTAGTGCTAAGTGTTGAGTATGTTGGTATTAAGTGTTAAATTCTTCTAACTTTACTGTATGTTTGAAAATACTTTCCAGCTGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTAGGAGGCCGAGGCGGGTTCGAGACCAGCCTGAACAACATGGTGACACCCCCATCTCTACTAAAAATACAAAAATTAGCTGGGCGTGGTGGTGCATGCCTGTAATCCCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTTGAATCTGGGAGGCGGAGGTTGCAGTGAGCCGAGATCATGCCATTACACTCGAACCTGGGCGACAGAGCGAGACTCCGTCTCAAAACAAAAACAAAAGCAAAAACAAAACAACAACAAAAACTTTACATTAAAAAAATCAGGTTTTGTTCTAAACAGGTGATTGGATTACATGGTTGCTGAGAGCCTTCTATCTTTCCATCGAGGAGGAAATCCTAATTAGAAGAACAAATCCCCCATCTTAGCAAGAATACCAATTAACACTGCCAAGTCCCAGGGTAATTCTAAGACTCTAGTGTGTTCTTAGGGCTTGACTATAAGTTTCCATTTAAGCAGTGGCAGCACCCAGTTTCCCTGTATAC', 'alt': 'G'}} assert 'grch38' not in list(results['NM_000061.2:c.588_589insCTACATAG']['primary_assembly_loci'].keys()) - assert results['NM_000061.2:c.588_589insCTACATAG']['reference_sequence_records'] == {'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2'} - + assert results['NM_000061.2:c.588_589insCTACATAG']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000061.2', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000052.1'} def test_variant335(self): variant = 'NM_000492.3:c.1210-12_1210-6delinsTTTTTTTTT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000492.3:c.1210-7_1210-6dup' in list(results.keys()) - assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_lrg_transcript_variant'] == 'LRG_663t1:c.1210-7_1210-6dup' - assert results['NM_000492.3:c.1210-7_1210-6dup']['refseqgene_context_intronic_sequence'] == 'NG_016465.3(NM_000492.3):c.1210-7_1210-6dup' - self.assertCountEqual(results['NM_000492.3:c.1210-7_1210-6dup']['alt_genomic_loci'], []) - assert results['NM_000492.3:c.1210-7_1210-6dup']['gene_symbol'] == 'CFTR' - assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000483.3(LRG_663p1):p.?', 'slr': 'NP_000483.3:p.?'} assert results['NM_000492.3:c.1210-7_1210-6dup']['submitted_variant'] == 'NM_000492.3:c.1210-12_1210-6delinsTTTTTTTTT' - assert results['NM_000492.3:c.1210-7_1210-6dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000492.3):c.1210-7_1210-6dup' - assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_lrg_variant'] == '' + assert results['NM_000492.3:c.1210-7_1210-6dup']['gene_symbol'] == 'CFTR' + assert results['NM_000492.3:c.1210-7_1210-6dup']['gene_ids'] == {'hgnc_id': 'HGNC:1884', 'entrez_gene_id': '1080', 'ucsc_id': 'uc003vjd.4', 'omim_id': ['602421']} assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_transcript_variant'] == 'NM_000492.3:c.1210-7_1210-6dup' + assert results['NM_000492.3:c.1210-7_1210-6dup']['genome_context_intronic_sequence'] == 'NC_000007.13(NM_000492.3):c.1210-7_1210-6dup' + assert results['NM_000492.3:c.1210-7_1210-6dup']['refseqgene_context_intronic_sequence'] == 'NG_016465.3(NM_000492.3):c.1210-7_1210-6dup' assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_refseqgene_variant'] == 'NG_016465.3:g.87851_87852dup' - assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.117188688_117188689dup', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '117188682', 'alt': 'GTT'}} - assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117548634_117548635dup', 'vcf': {'chr': 'chr7', 'ref': 'G', 'pos': '117548628', 'alt': 'GTT'}} - assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.117188688_117188689dup', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '117188682', 'alt': 'GTT'}} - assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117548634_117548635dup', 'vcf': {'chr': '7', 'ref': 'G', 'pos': '117548628', 'alt': 'GTT'}} - assert results['NM_000492.3:c.1210-7_1210-6dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_016465.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000483.3', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000492.3'} - - assert results['flag'] == 'gene_variant' - + assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000483.3(LRG_663p1):p.?', 'slr': 'NP_000483.3:p.?'} + assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_lrg_transcript_variant'] == 'LRG_663t1:c.1210-7_1210-6dup' + assert results['NM_000492.3:c.1210-7_1210-6dup']['hgvs_lrg_variant'] == '' + self.assertCountEqual(results['NM_000492.3:c.1210-7_1210-6dup']['alt_genomic_loci'], []) + assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000007.13:g.117188688_117188689dup', 'vcf': {'chr': 'chr7', 'pos': '117188682', 'ref': 'G', 'alt': 'GTT'}} + assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117548634_117548635dup', 'vcf': {'chr': 'chr7', 'pos': '117548628', 'ref': 'G', 'alt': 'GTT'}} + assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000007.13:g.117188688_117188689dup', 'vcf': {'chr': '7', 'pos': '117188682', 'ref': 'G', 'alt': 'GTT'}} + assert results['NM_000492.3:c.1210-7_1210-6dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000007.14:g.117548634_117548635dup', 'vcf': {'chr': '7', 'pos': '117548628', 'ref': 'G', 'alt': 'GTT'}} + assert results['NM_000492.3:c.1210-7_1210-6dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000492.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000483.3', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_016465.3'} def test_variant336(self): variant = 'NM_000088.3:c.589-18_589-14delinsTTTTTTTTTT' results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict() print(results) + assert results['flag'] == 'gene_variant' assert 'NM_000088.3:c.589-18_589-14dup' in list(results.keys()) - assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-18_589-14dup' - assert results['NM_000088.3:c.589-18_589-14dup']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-18_589-14dup' - self.assertCountEqual(results['NM_000088.3:c.589-18_589-14dup']['alt_genomic_loci'], []) - assert results['NM_000088.3:c.589-18_589-14dup']['gene_symbol'] == 'COL1A1' - assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} assert results['NM_000088.3:c.589-18_589-14dup']['submitted_variant'] == 'NM_000088.3:c.589-18_589-14delinsTTTTTTTTTT' - assert results['NM_000088.3:c.589-18_589-14dup']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-18_589-14dup' - assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_lrg_variant'] == 'LRG_1:g.8620_8624dup' + assert results['NM_000088.3:c.589-18_589-14dup']['gene_symbol'] == 'COL1A1' + assert results['NM_000088.3:c.589-18_589-14dup']['gene_ids'] == {'hgnc_id': 'HGNC:2197', 'entrez_gene_id': '1277', 'ucsc_id': 'uc002iqm.4', 'omim_id': ['120150']} assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_transcript_variant'] == 'NM_000088.3:c.589-18_589-14dup' + assert results['NM_000088.3:c.589-18_589-14dup']['genome_context_intronic_sequence'] == 'NC_000017.10(NM_000088.3):c.589-18_589-14dup' + assert results['NM_000088.3:c.589-18_589-14dup']['refseqgene_context_intronic_sequence'] == 'NG_007400.1(NM_000088.3):c.589-18_589-14dup' assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_refseqgene_variant'] == 'NG_007400.1:g.8620_8624dup' - assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275377_48275381dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '48275376', 'alt': 'GAAAAA'}} - assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198016_50198020dup', 'vcf': {'chr': 'chr17', 'ref': 'G', 'pos': '50198015', 'alt': 'GAAAAA'}} - assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275377_48275381dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '48275376', 'alt': 'GAAAAA'}} - assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198016_50198020dup', 'vcf': {'chr': '17', 'ref': 'G', 'pos': '50198015', 'alt': 'GAAAAA'}} - assert results['NM_000088.3:c.589-18_589-14dup']['reference_sequence_records'] == {'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} - - assert results['flag'] == 'gene_variant' + assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_000079.2(LRG_1p1):p.?', 'slr': 'NP_000079.2:p.?'} + assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_lrg_transcript_variant'] == 'LRG_1t1:c.589-18_589-14dup' + assert results['NM_000088.3:c.589-18_589-14dup']['hgvs_lrg_variant'] == 'LRG_1:g.8620_8624dup' + self.assertCountEqual(results['NM_000088.3:c.589-18_589-14dup']['alt_genomic_loci'], []) + assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275377_48275381dup', 'vcf': {'chr': 'chr17', 'pos': '48275376', 'ref': 'G', 'alt': 'GAAAAA'}} + assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198016_50198020dup', 'vcf': {'chr': 'chr17', 'pos': '50198015', 'ref': 'G', 'alt': 'GAAAAA'}} + assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275377_48275381dup', 'vcf': {'chr': '17', 'pos': '48275376', 'ref': 'G', 'alt': 'GAAAAA'}} + assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198016_50198020dup', 'vcf': {'chr': '17', 'pos': '50198015', 'ref': 'G', 'alt': 'GAAAAA'}} + assert results['NM_000088.3:c.589-18_589-14dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} diff --git a/tests/test_variant.py b/tests/test_variant.py index da7157a7..54cfe11f 100644 --- a/tests/test_variant.py +++ b/tests/test_variant.py @@ -313,6 +313,7 @@ def test_output_dict_empty(self): self.assertIsInstance(output, dict) self.assertEqual(output, { 'submitted_variant': 'NM_015120.4:c.34=', + 'gene_ids': None, 'gene_symbol': '', 'transcript_description': '', 'hgvs_transcript_variant': None, @@ -331,6 +332,7 @@ def test_output_dict_empty(self): def test_output_dict_set(self): self.var.gene_symbol = 'Symbol' self.var.description = 'Desc' + self.var.stable_gene_ids = 'My_id' self.var.hgvs_transcript_variant = 'hgvsvar' self.var.genome_context_intronic_sequence = 'gintronic' self.var.refseqgene_context_intronic_sequence = 'rintronic' @@ -347,6 +349,7 @@ def test_output_dict_set(self): self.assertEqual(output, { 'submitted_variant': 'NM_015120.4:c.34=', 'gene_symbol': 'Symbol', + 'gene_ids': 'My_id', 'transcript_description': 'Desc', 'hgvs_transcript_variant': 'hgvsvar', 'genome_context_intronic_sequence': 'gintronic', From 857fc63c103ebb9ec33bfc2e21f69724430dbb03 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 15 Jul 2019 15:26:46 +0100 Subject: [PATCH 188/223] Tidying up validator.py --- VariantValidator/__init__.py | 2 +- .../{variantValidator.py => validator.py} | 15 ++------------- 2 files changed, 3 insertions(+), 14 deletions(-) rename VariantValidator/{variantValidator.py => validator.py} (59%) diff --git a/VariantValidator/__init__.py b/VariantValidator/__init__.py index 89d852d8..04dce59d 100644 --- a/VariantValidator/__init__.py +++ b/VariantValidator/__init__.py @@ -2,6 +2,6 @@ from . import logger from .version import __version__ -from .variantValidator import Validator +from .validator import Validator __all__ = ["Validator"] diff --git a/VariantValidator/variantValidator.py b/VariantValidator/validator.py similarity index 59% rename from VariantValidator/variantValidator.py rename to VariantValidator/validator.py index cba84e94..0f0d19ae 100644 --- a/VariantValidator/variantValidator.py +++ b/VariantValidator/validator.py @@ -1,19 +1,8 @@ from .modules import vvMixinCore as vvMixinCore -class Validation(): - ''' - #Validation objects contain a number of variant interpretations - ''' - pass - -class ValOutput(): - ''' - #This object contains a single possible interpretation of a variant - ''' - pass class Validator(vvMixinCore.Mixin): - ''' + """ #Mixins are used to split this very large, complex object over multiple files. #There is a logical chain to it, though: # vvMixinInit @@ -23,7 +12,7 @@ class Validator(vvMixinCore.Mixin): # vvMixinCore # v # Validator <- this object. - ''' + """ pass From 44c55859ee32e004bf010050c38f143050ec3135 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 15 Jul 2019 15:27:17 +0100 Subject: [PATCH 189/223] Added new tests for configuration --- VariantValidator/configure.py | 2 +- tests/test_configuration.py | 233 ++++++++++++++++++++++++++++++++++ 2 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 tests/test_configuration.py diff --git a/VariantValidator/configure.py b/VariantValidator/configure.py index b7712f2c..4107daa4 100644 --- a/VariantValidator/configure.py +++ b/VariantValidator/configure.py @@ -16,7 +16,7 @@ def read_configuration(): print("PostgreSQL username and password have not been updated from default.") exit_with_message() - if config['seqrepo']['location'] == 'PATH/TO/SEQREPO': + if config['seqrepo']['location'] == '/PATH/TO/SEQREPO': print("Seqrepo directory location has not been updated from default.") exit_with_message() diff --git a/tests/test_configuration.py b/tests/test_configuration.py new file mode 100644 index 00000000..e81d1aec --- /dev/null +++ b/tests/test_configuration.py @@ -0,0 +1,233 @@ +import unittest +import shutil +import os +import subprocess +import sys +import pytest +from configparser import ConfigParser + + +class TestConfigSetUp(unittest.TestCase): + """ + Will test the configuration set up works as it should + """ + + @classmethod + def setUpClass(cls): + cls.filename = os.path.join(os.path.expanduser('~'), '.variantvalidator') + + def setUp(self): + new_filename = self.filename + '_ori' + shutil.move(self.filename, new_filename) + print('Moved file') + + def insert_blank(self): + subprocess.check_output(['python', '-c', 'import VariantValidator']) + + def open_config(self): + self.config = ConfigParser() + self.config.read(self.filename) + + def write_config(self): + with open(self.filename, 'w') as fh: + self.config.write(fh) + + def test_no_config_file(self): + if 'VariantValidator' in list(sys.modules.keys()): + pytest.skip("VariantValidator already imported") + self.assertFalse(os.path.exists(self.filename)) + with self.assertRaises(SystemExit): + import VariantValidator + + def test_no_config_file_msg(self): + self.assertFalse(os.path.exists(self.filename)) + output = subprocess.check_output(['python', '-c', 'import VariantValidator']) + print(output) + self.assertTrue('Welcome to VariantValidator' in output.decode()) + self.assertTrue('Please edit this file' in output.decode()) + + def test_unchanged_file(self): + if 'VariantValidator' in list(sys.modules.keys()): + pytest.skip("VariantValidator already imported") + self.assertFalse(os.path.exists(self.filename)) + self.insert_blank() + self.assertTrue(os.path.exists(self.filename)) + with self.assertRaises(SystemExit): + import VariantValidator + + def test_unchanged_file_msg(self): + self.insert_blank() + self.assertTrue(os.path.exists(self.filename)) + output = subprocess.check_output(['python', '-c', 'import VariantValidator']) + print(output) + self.assertTrue('MySQL' in output.decode()) + self.assertTrue('Please edit your configuration' in output.decode()) + + def test_changed_mysql(self): + if 'VariantValidator' in list(sys.modules.keys()): + pytest.skip("VariantValidator already imported") + self.insert_blank() + self.open_config() + self.assertEqual(self.config['mysql']['user'], 'USERNAME') + self.config['mysql']['user'] = 'myusername' + self.assertEqual(self.config['mysql']['password'], 'PASSWORD') + self.config['mysql']['password'] = 'mypass' + self.write_config() + + self.assertTrue(os.path.exists(self.filename)) + with self.assertRaises(SystemExit): + import VariantValidator + + def test_changed_mysql_msg(self): + self.insert_blank() + self.open_config() + self.assertEqual(self.config['mysql']['user'], 'USERNAME') + self.config['mysql']['user'] = 'myusername' + self.assertEqual(self.config['mysql']['password'], 'PASSWORD') + self.config['mysql']['password'] = 'mypass' + self.write_config() + + output = subprocess.check_output(['python', '-c', 'import VariantValidator']) + print(output) + self.assertTrue('PostgreSQL' in output.decode()) + self.assertTrue('Please edit your configuration' in output.decode()) + + def test_changed_postgres(self): + if 'VariantValidator' in list(sys.modules.keys()): + pytest.skip("VariantValidator already imported") + self.insert_blank() + self.open_config() + self.config['mysql']['user'] = 'myusername' + self.config['mysql']['password'] = 'mypass' + + self.assertEqual(self.config['postgres']['user'], 'USERNAME') + self.assertEqual(self.config['postgres']['password'], 'PASSWORD') + self.config['postgres']['user'] = 'me' + self.config['postgres']['password'] = 'pass' + self.write_config() + + self.assertTrue(os.path.exists(self.filename)) + with self.assertRaises(SystemExit): + import VariantValidator + + def test_changed_postgres_msg(self): + self.insert_blank() + self.open_config() + self.config['mysql']['user'] = 'myusername' + self.config['mysql']['password'] = 'mypass' + + self.assertEqual(self.config['postgres']['user'], 'USERNAME') + self.assertEqual(self.config['postgres']['password'], 'PASSWORD') + self.config['postgres']['user'] = 'me' + self.config['postgres']['password'] = 'pass' + self.write_config() + + output = subprocess.check_output(['python', '-c', 'import VariantValidator']) + self.assertTrue('Seqrepo' in output.decode()) + self.assertTrue('Please edit your configuration' in output.decode()) + + def test_zz_changed_seqrepo(self): + """ + Test is named as such so it runs last - as it will successfully import VariantValidator + :return: + """ + self.insert_blank() + self.open_config() + self.config['mysql']['user'] = 'myusername' + self.config['mysql']['password'] = 'mypass' + self.config['postgres']['user'] = 'me' + self.config['postgres']['password'] = 'pass' + + self.assertEqual(self.config['seqrepo']['location'], '/PATH/TO/SEQREPO') + self.config['seqrepo']['location'] = 'here' + self.write_config() + + self.assertTrue(os.path.exists(self.filename)) + try: + import VariantValidator + except SystemExit: + self.fail('SystemExit raised on Import') + + def tearDown(self): + original = os.path.join(os.path.expanduser('~'), '.variantvalidator') + new_filename = original + '_ori' + shutil.move(new_filename, original) + print('Moved file back') + + +class TestConfigValues(unittest.TestCase): + """ + This class will test the config values that we're using, and that they are being read into VV correctly. + """ + + @classmethod + def setUpClass(cls): + cls.filename = os.path.join(os.path.expanduser('~'), '.variantvalidator') + + def setUp(self): + self.original = self.filename + '_ori' + shutil.copy(self.filename, self.original) + config = ConfigParser() + config.read(self.filename) + self.config = config + + def write_config(self): + with open(self.filename, 'w') as fh: + self.config.write(fh) + + def test_file_structure(self): + self.assertEqual(self.config.sections(), ['mysql', 'seqrepo', 'postgres', 'logging', 'Entrez', 'liftover']) + self.assertEqual(list(self.config['mysql']), ['host', 'database', 'user', 'password']) + self.assertEqual(list(self.config['seqrepo']), ['version', 'location']) + self.assertEqual(list(self.config['postgres']), ['host', 'database', 'version', 'user', 'password']) + self.assertEqual(list(self.config['logging']), ['log', 'console', 'file']) + self.assertEqual(list(self.config['Entrez']), ['email', 'api_key']) + self.assertEqual(list(self.config['liftover']), ['location']) + + def test_file_contents(self): + self.assertNotEqual(self.config['mysql']['user'], 'USERNAME') + self.assertNotEqual(self.config['mysql']['password'], 'PASSWORD') + + self.assertEqual(self.config['seqrepo']['version'], '2018-08-21') + path = os.path.join(self.config['seqrepo']['location'], self.config['seqrepo']['version']) + self.assertTrue(os.path.exists(path)) + + self.assertEqual(self.config['postgres']['version'], 'uta_20180821') + self.assertNotEqual(self.config['postgres']['user'], 'USERNAME') + self.assertNotEqual(self.config['postgres']['password'], 'PASSWORD') + + self.assertIsInstance(self.config['logging'].getboolean('log'), bool) + self.assertIn(self.config['logging']['console'].upper(), ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG']) + self.assertIn(self.config['logging']['file'].upper(), ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG']) + + self.assertRegex(self.config['Entrez']['email'], r'\w+@\w+.\w+') + + def test_file_parsing(self): + import VariantValidator + + vv = VariantValidator.Validator() + + self.assertEqual(self.config['mysql']['user'], vv.dbConfig['user']) + self.assertEqual(self.config['mysql']['password'], vv.dbConfig['password']) + self.assertEqual(self.config['mysql']['host'], vv.dbConfig['host']) + self.assertEqual(self.config['mysql']['database'], vv.dbConfig['database']) + + self.assertEqual(vv.seqrepoPath, + os.path.join(self.config['seqrepo']['location'], self.config['seqrepo']['version'])) + + self.assertEqual(vv.utaPath, "postgresql://%s:%s@%s/%s/%s" % ( + self.config["postgres"]["user"], + self.config["postgres"]["password"], + self.config['postgres']['host'], + self.config['postgres']['database'], + self.config['postgres']['version'] + )) + + self.assertEqual(vv.entrez_email, self.config['Entrez']['email']) + if self.config['Entrez']['api_key'] == 'YOUR_API_KEY': + self.assertEqual(vv.entrez_api_key, None) + else: + self.assertEqual(vv.entrez_api_key, self.config['Entrez']['api_key']) + + def tearDown(self): + shutil.move(self.original, self.filename) From 8f68e2f97b9f5eb0c2f7a3c28a9c2dc1d6fb7b17 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 15 Jul 2019 17:00:18 +0100 Subject: [PATCH 190/223] Added more tests and fixed bugs that arose --- VariantValidator/modules/format_converters.py | 2 +- VariantValidator/modules/mappers.py | 4 +- VariantValidator/modules/vvMixinCore.py | 2 +- tests/test_core.py | 147 ++++++++++++++++++ 4 files changed, 151 insertions(+), 4 deletions(-) create mode 100644 tests/test_core.py diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index 2bae3844..c501b99e 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -773,7 +773,7 @@ def rna(variant, validator): variant.reftype = ':c.' # Change input to reflect! try: - hgvs_c = validator.va_func.hgvs_r_to_c(hgvs_input) + hgvs_c = validator.hgvs_r_to_c(hgvs_input) except hgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) variant.warnings.append(error) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index b4ef653c..c81dda8a 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -187,7 +187,7 @@ def gene_to_transcripts(variant, validator): return False -def transcripts_to_gene(variant, validator): +def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version): """This seems to use the quibble and not the HGVS formatted variant format.""" # Flag for validation @@ -204,7 +204,7 @@ def transcripts_to_gene(variant, validator): # Do we keep it? if validator.select_transcripts != 'all': - if tx_ac not in list(validator.select_transcripts_dict_plus_version.keys()): + if tx_ac not in list(select_transcripts_dict_plus_version.keys()): # By marking it as Do Not Write and continuing through the validation loop variant.write = False return True diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 24f037da..01ba124f 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -390,7 +390,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue if format_type == ':c.' or format_type == ':n.': - toskip = mappers.transcripts_to_gene(my_variant, self) + toskip = mappers.transcripts_to_gene(my_variant, self, select_transcripts_dict_plus_version) if toskip: continue diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 00000000..3bbcc58a --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,147 @@ +import unittest +import VariantValidator + + +class TestValidator(unittest.TestCase): + """ + Going to test the Validator function with a series of different inputs/situations that aren't covered in + test_inputs.py + """ + + @classmethod + def setUpClass(cls): + cls.vv = VariantValidator.Validator() + + def test_transcript_seq_nonsense(self): + var = 'NM_015120.4:c.34C>T' + with self.assertRaises(Exception): + self.vv.validate(var, 'GRCh37', 'all', transcript_set='nonsense') + + def test_transcript_seq_ensembl(self): + var = 'NM_015120.4:c.34C>T' + with self.assertRaises(Exception): + self.vv.validate(var, 'GRCh37', 'all', transcript_set='ensembl') + + self.assertEqual(self.vv.alt_aln_method, 'genebuild') + + def test_transcript_list(self): + var = 'NM_015120.4:c.34C>T' + + output = self.vv.validate(var, 'GRCh37', 'Trans1').format_as_dict() + print(output) + self.assertEqual(output['flag'], 'empty_result') + + def test_transcript_list_realid(self): + var = 'NM_015120.4:c.34C>T' + + output = self.vv.validate(var, 'GRCh37', 'NM_015120.4').format_as_dict() + print(output) + self.assertEqual(output['flag'], 'gene_variant') + self.assertEqual(list(output), ['flag', 'NM_015120.4:c.34C>T', 'metadata']) + + def test_transcript_list_real_pair(self): + var = 'NM_015120.4:c.34C>T' + + output = self.vv.validate(var, 'GRCh37', 'NM_015120.4|NM_015120.5').format_as_dict() + print(output) + self.assertEqual(output['flag'], 'gene_variant') + self.assertEqual(list(output), ['flag', 'NM_015120.4:c.34C>T', 'metadata']) + + def test_non_ascii(self): + var = 'NM_015120.4:c.34C>T\202' + + output = self.vv.validate(var, 'GRCh37', 'all').format_as_dict() + print(output) + self.assertEqual(output['flag'], 'warning') + self.assertIn('Submitted variant description contains an invalid character', + str(output['validation_warning_1']['validation_warnings'])) + + def test_assembly_hg19(self): + var = 'NM_015120.4:c.34C>T' + + out = self.vv.validate(var, 'hg19', 'all') + for variant in out.output_list: + self.assertEqual(variant.primary_assembly, 'GRCh37') + output = out.format_as_dict() + print(output) + self.assertEqual(output['flag'], 'gene_variant') + self.assertEqual(list(output), ['flag', 'NM_015120.4:c.34C>T', 'metadata']) + + def test_assembly_hg38(self): + var = 'NM_015120.4:c.34C>T' + + out = self.vv.validate(var, 'hg38', 'all') + for variant in out.output_list: + self.assertEqual(variant.primary_assembly, 'GRCh38') + output = out.format_as_dict() + print(output) + self.assertEqual(output['flag'], 'gene_variant') + self.assertEqual(list(output), ['flag', 'NM_015120.4:c.34C>T', 'metadata']) + + def test_assembly_grch(self): + var = 'NM_015120.4:c.34C>T' + + out = self.vv.validate(var, 'grch37', 'all') + for variant in out.output_list: + self.assertEqual(variant.primary_assembly, 'GRCh37') + output = out.format_as_dict() + print(output) + self.assertEqual(output['flag'], 'gene_variant') + self.assertEqual(list(output), ['flag', 'NM_015120.4:c.34C>T', 'metadata']) + + def test_assembly_invalid(self): + var = 'NM_015120.4:c.34C>T' + + out = self.vv.validate(var, 'nonsense', 'all') + for variant in out.output_list: + self.assertEqual(variant.primary_assembly, 'GRCh38') + output = out.format_as_dict() + self.assertEqual(output['flag'], 'gene_variant') + self.assertEqual(list(output), ['flag', 'NM_015120.4:c.34C>T', 'metadata']) + self.assertIn('Invalid genome build has been specified', + str(output['NM_015120.4:c.34C>T']['validation_warnings'])) + + def test_variant_invalid(self): + var = 'NM_015120.4c.34C>T' + + output = self.vv.validate(var, 'GRCh37', 'all').format_as_dict() + print(output) + self.assertEqual(output['flag'], 'warning') + self.assertIn('Unable to identify a colon (:) in the variant description', + str(output['validation_warning_1']['validation_warnings'])) + + def test_variant_invalid_2(self): + var = 'NM_015120.4:c34C>T' + + output = self.vv.validate(var, 'GRCh37', 'all').format_as_dict() + print(output) + self.assertEqual(output['flag'], 'warning') + self.assertIn('lacks the . character between and ', + str(output['validation_warning_1']['validation_warnings'])) + + def test_variant_invalid_3(self): + var = 'nonsense' + + output = self.vv.validate(var, 'GRCh37', 'all').format_as_dict() + print(output) + self.assertEqual(output['flag'], 'warning') + self.assertIn('Variant description nonsense is not in an accepted format', + str(output['validation_warning_1']['validation_warnings'])) + + def test_variant_con(self): + var = 'NM_015120.4:c.34con' + + output = self.vv.validate(var, 'GRCh37', 'all').format_as_dict() + print(output) + self.assertEqual(output['flag'], 'warning') + self.assertIn('Gene conversions currently unsupported', + str(output['validation_warning_1']['validation_warnings'])) + + def test_variant_RNA(self): + # TODO: This situation needs looking at as I'm sure it shouldn't be returning an empty string. + var = 'NM_015120.4:r.34DEL' + + output = self.vv.validate(var, 'GRCh37', 'all').format_as_dict() + print(output) + self.assertEqual(output['flag'], 'gene_variant') + self.assertEqual(list(output), ['flag', '', 'metadata']) From ab61b754c34eb76f30abbad3dca2b2a68a3a7262 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 16 Jul 2019 10:56:35 +0100 Subject: [PATCH 191/223] Added more tests for core methods and fixed identified bugs --- VariantValidator/modules/vvMixinCore.py | 6 +- tests/test_core.py | 187 ++++++++++++++++++++++++ 2 files changed, 192 insertions(+), 1 deletion(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 01ba124f..1de9bb0c 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -797,7 +797,8 @@ def gene2transcripts(self, query): elif 'LRG' in query: lrg_id = query.split('T')[0] lrg_to_hgnc = self.db.get_lrg_data_from_lrg_id(lrg_id) - query = lrg_to_hgnc[2] + if lrg_to_hgnc and lrg_to_hgnc[0] != 'none': + query = lrg_to_hgnc[2] # Quick check for blank form if query == '': @@ -845,6 +846,9 @@ def gene2transcripts(self, query): # Re-set the previous variable previous = fn.hgnc_rest(path="/fetch/symbol/" + current_sym) + if len(previous['record']['response']['docs']) == 0: + return {'error': 'Unable to recognise gene symbol %s' % current_sym} + # Extract the relevant data if 'prev_symbol' in list(previous['record']['response']['docs'][0].keys()): previous_sym = previous['record']['response']['docs'][0]['prev_symbol'][0] diff --git a/tests/test_core.py b/tests/test_core.py index 3bbcc58a..e6c29cf7 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -47,6 +47,13 @@ def test_transcript_list_real_pair(self): self.assertEqual(output['flag'], 'gene_variant') self.assertEqual(list(output), ['flag', 'NM_015120.4:c.34C>T', 'metadata']) + def test_transcript_list_lrg(self): + var = 'NM_015120.4:c.34C>T' + + output = self.vv.validate(var, 'GRCh37', 'LRG1').format_as_dict() + print(output) + self.assertEqual(output['flag'], 'empty_result') + def test_non_ascii(self): var = 'NM_015120.4:c.34C>T\202' @@ -145,3 +152,183 @@ def test_variant_RNA(self): print(output) self.assertEqual(output['flag'], 'gene_variant') self.assertEqual(list(output), ['flag', '', 'metadata']) + + +class TestGene2Transcripts(unittest.TestCase): + """ + This class will test the gene2transcripts method of the validator + """ + + @classmethod + def setUpClass(cls): + cls.vv = VariantValidator.Validator() + + def test_empty(self): + output = self.vv.gene2transcripts('') + print(output) + self.assertEqual(list(output), ['error']) + self.assertEqual(output['error'], 'Please enter HGNC gene name or transcript identifier (NM_, NR_, or ENST)') + + def test_nonsense(self): + output = self.vv.gene2transcripts('nonsense') + print(output) + self.assertEqual(list(output), ['error']) + self.assertEqual(output['error'], 'Unable to recognise gene symbol NONSENSE') + + def test_nonsense_NM(self): + output = self.vv.gene2transcripts('NM_nonsense') + print(output) + self.assertEqual(list(output), ['error']) + self.assertEqual(output['error'], 'No transcript definition for (tx_ac=NM_NONSENSE)') + + def test_nonsense_NR(self): + output = self.vv.gene2transcripts('nonNR_sense') + print(output) + self.assertEqual(list(output), ['error']) + self.assertEqual(output['error'], 'No transcript definition for (tx_ac=NONNR_SENSE)') + + def test_nonsense_NM_dot(self): + output = self.vv.gene2transcripts('NM_nonsens.e') + print(output) + self.assertEqual(list(output), ['error']) + self.assertEqual(output['error'], 'No transcript definition for (tx_ac=NM_NONSENS.E)') + + def test_nonsense_NM_dot_orf(self): + output = self.vv.gene2transcripts('NM_nonsense.1ORF2') + print(output) + self.assertEqual(list(output), ['error']) + self.assertEqual(output['error'], 'No transcript definition for (tx_ac=NM_NONSENSE.1orf2)') + + def test_nonsense_LRG(self): + output = self.vv.gene2transcripts('LRG_nonsense') + print(output) + self.assertEqual(list(output), ['error']) + self.assertEqual(output['error'], 'Unable to recognise gene symbol LRG_NONSENSE') + + def test_nonsense_LRGT(self): + output = self.vv.gene2transcripts('LRGT_nonsense') + print(output) + self.assertEqual(list(output), ['error']) + self.assertEqual(output['error'], 'Unable to recognise gene symbol LRGT_NONSENSE') + + def test_NM(self): + output = self.vv.gene2transcripts('NM_024865.3') + print(output) + self.assertEqual(list(output), ['current_symbol', 'previous_symbol', 'current_name', + 'previous_name', 'transcripts']) + self.assertEqual(output['current_symbol'], 'NANOG') + self.assertEqual(len(output['transcripts']), 3) + + def test_NM_noversion(self): + output = self.vv.gene2transcripts('NM_024865') + print(output) + self.assertEqual(list(output), ['current_symbol', 'previous_symbol', 'current_name', + 'previous_name', 'transcripts']) + self.assertEqual(output['current_symbol'], 'NANOG') + self.assertEqual(len(output['transcripts']), 3) + + def test_sym(self): + output = self.vv.gene2transcripts('NANOG') + print(output) + self.assertEqual(list(output), ['current_symbol', 'previous_symbol', 'current_name', + 'previous_name', 'transcripts']) + self.assertEqual(output['current_symbol'], 'NANOG') + self.assertEqual(len(output['transcripts']), 3) + + def test_old_sym(self): + output = self.vv.gene2transcripts('OTF3') + print(output) + self.assertEqual(list(output), ['current_symbol', 'previous_symbol', 'current_name', + 'previous_name', 'transcripts']) + self.assertEqual(output['current_symbol'], 'POU5F1') + self.assertEqual(len(output['transcripts']), 8) + + def test_ens(self): + output = self.vv.gene2transcripts('ENSG00000204531') + print(output) + self.assertEqual(list(output), ['error']) + self.assertEqual(output['error'], 'Unable to recognise gene symbol ENSG00000204531') + + +class TestHGVS2Ref(unittest.TestCase): + """ + class will test the inputs for the hgvs2ref method of the validator() + """ + + @classmethod + def setUpClass(cls): + cls.vv = VariantValidator.Validator() + + def test_empty(self): + output = self.vv.hgvs2ref('') + print(output) + self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) + self.assertEqual(output['error'], ': char 1: end of input') + + def test_nonsense(self): + output = self.vv.hgvs2ref('nonsense') + print(output) + self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) + self.assertEqual(output['error'], 'nonsense: char 9: end of input') + + def test_nonsense_colon(self): + output = self.vv.hgvs2ref('non:sense') + print(output) + self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) + self.assertEqual(output['error'], + 'non:sense: char 4: expected one of \'c\', \'g\', \'m\', \'n\', \'p\', or \'r\'') + + def test_nonsense_hgvs(self): + output = self.vv.hgvs2ref('nonsense:c.34C>T') + print(output) + self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) + self.assertEqual(output['error'], 'Failed to fetch nonsense from SeqRepo (/local/VariantValidator/' + 'seqrepo/2018-08-21) (\'Alias nonsense (namespace: None)\')') + + def test_valid_c(self): + output = self.vv.hgvs2ref('NM_015120.4:c.34C>T') + print(output) + self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) + self.assertEqual(output['error'], '') + self.assertEqual(output['start_position'], '34') + self.assertEqual(output['sequence'], 'C') + + def test_valid_g(self): + output = self.vv.hgvs2ref('NM_015120.4:g.34C>T') + print(output) + self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) + self.assertEqual(output['error'], '') + self.assertEqual(output['start_position'], '34') + self.assertEqual(output['sequence'], 'A') + + def test_valid_n(self): + output = self.vv.hgvs2ref('NM_015120.4:n.34C>T') + print(output) + self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) + self.assertEqual(output['error'], '') + self.assertEqual(output['start_position'], '34') + self.assertEqual(output['sequence'], 'A') + + def test_valid_p(self): + output = self.vv.hgvs2ref('NM_015120.4:p.Thr34=') + print(output) + self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) + self.assertEqual(output['error'], '') + self.assertEqual(output['start_position'], '34') + self.assertEqual(output['sequence'], 'A') + + def test_valid_m(self): + output = self.vv.hgvs2ref('NM_015120.4:m.34C>T') + print(output) + self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) + self.assertEqual(output['error'], '') + self.assertEqual(output['start_position'], '') + self.assertEqual(output['sequence'], '') + + def test_valid_r(self): + output = self.vv.hgvs2ref('NM_015120.4:r.34C>U') + print(output) + self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) + self.assertEqual(output['error'], '') + self.assertEqual(output['start_position'], '') + self.assertEqual(output['sequence'], '') From e517000396b36cf89fb4dbcb85b229dd0b748089 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 16 Jul 2019 14:36:04 +0100 Subject: [PATCH 192/223] Added tests for ValOutput and fixed one identified bug --- VariantValidator/modules/valoutput.py | 2 +- tests/test_valoutput.py | 264 ++++++++++++++++++++++++++ 2 files changed, 265 insertions(+), 1 deletion(-) create mode 100644 tests/test_valoutput.py diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index b7ad4bff..e7c94e3a 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -30,7 +30,7 @@ def format_as_dict(self, with_meta=True): validation_output['flag'] = 'gene_variant' if variant.warnings == ['Validation error']: validation_error_counter = validation_error_counter + 1 - identification_key = 'Validation_Error_%s' % validation_error_counter + identification_key = 'validation_error_%s' % validation_error_counter else: if variant.is_obsolete() and variant.hgvs_transcript_variant == '': validation_obsolete_counter += 1 diff --git a/tests/test_valoutput.py b/tests/test_valoutput.py new file mode 100644 index 00000000..780950d0 --- /dev/null +++ b/tests/test_valoutput.py @@ -0,0 +1,264 @@ +import json +from unittest import TestCase +import VariantValidator +from VariantValidator.modules.valoutput import ValOutput +from VariantValidator.modules.variant import Variant + + +class TestValOutput(TestCase): + """ + class will test the ValOutput object and methods + """ + @classmethod + def setUpClass(cls): + cls.vv = VariantValidator.Validator() + + def test_creation_empty(self): + obj = ValOutput([], self.vv) + + self.assertIsInstance(obj, ValOutput) + self.assertEqual(obj.output_list, []) + self.assertEqual(obj.validator, self.vv) + + def test_creation_str(self): + obj = ValOutput('hello', 'validator') + + self.assertEqual(obj.output_list, 'hello') + self.assertEqual(obj.validator, 'validator') + with self.assertRaises(AttributeError): + obj.format_as_dict() + + def test_output_meta(self): + obj = ValOutput([], self.vv) + res = obj.format_as_dict(with_meta=True) + self.assertIsInstance(res, dict) + self.assertEqual(list(res), ['flag', 'metadata']) + self.assertIsInstance(res['metadata'], dict) + self.assertEqual(list(res['metadata']), ['variantvalidator_version', 'variantvalidator_hgvs_version', + 'uta_schema', 'seqrepo_db']) + + res2 = obj.format_as_table(with_meta=True) + self.assertIsInstance(res2, list) + self.assertTrue(res2[0].startswith('#')) + self.assertTrue('variantvalidator_version' in res2[0]) + + def test_dict_no_variants(self): + obj = ValOutput([], self.vv) + res = obj.format_as_dict(with_meta=False) + self.assertIsInstance(res, dict) + self.assertEqual(res, {'flag': 'empty_result'}) + + def test_dict_one_variant(self): + var = Variant('') + obj = ValOutput([var], self.vv) + res = obj.format_as_dict(with_meta=False) + self.assertIsInstance(res, dict) + self.assertEqual(list(res), ['flag', 'validation_warning_1']) + self.assertEqual(res['flag'], 'warning') + + def test_dict_two_variants(self): + var1 = Variant('var1') + var2 = Variant('var2') + obj = ValOutput([var1, var2], self.vv) + res = obj.format_as_dict(with_meta=False) + self.assertIsInstance(res, dict) + self.assertEqual(list(res), ['flag', 'validation_warning_1', 'validation_warning_2']) + self.assertEqual(res['flag'], 'warning') + self.assertEqual(res['validation_warning_1']['submitted_variant'], 'var1') + self.assertEqual(res['validation_warning_2']['submitted_variant'], 'var2') + + def test_dict_one_good_variant(self): + var1 = Variant('var1') + var1.output_type_flag = 'gene' + var2 = Variant('var2') + obj = ValOutput([var1, var2], self.vv) + res = obj.format_as_dict(with_meta=False) + print(res) + self.assertIsInstance(res, dict) + self.assertEqual(list(res), ['flag', 'None', 'validation_warning_1']) + self.assertEqual(res['flag'], 'gene_variant') + self.assertEqual(res['None']['submitted_variant'], 'var1') + self.assertEqual(res['validation_warning_1']['submitted_variant'], 'var2') + + def test_dict_one_intergenic(self): + var1 = Variant('var1') + var1.output_type_flag = 'intergenic' + var2 = Variant('var2') + obj = ValOutput([var1, var2], self.vv) + res = obj.format_as_dict(with_meta=False) + print(res) + self.assertIsInstance(res, dict) + self.assertEqual(list(res), ['flag', 'intergenic_variant_1', 'validation_warning_1']) + self.assertEqual(res['flag'], 'intergenic') + self.assertEqual(res['intergenic_variant_1']['submitted_variant'], 'var1') + self.assertEqual(res['validation_warning_1']['submitted_variant'], 'var2') + + def test_dict_one_intergenic_and_one_gene(self): + var1 = Variant('var1') + var1.output_type_flag = 'intergenic' + var2 = Variant('var2') + var2.output_type_flag = 'gene' + obj = ValOutput([var1, var2], self.vv) + res = obj.format_as_dict(with_meta=False) + print(res) + self.assertIsInstance(res, dict) + self.assertEqual(list(res), ['flag', 'intergenic_variant_1', 'None']) + self.assertEqual(res['flag'], 'gene_variant') + self.assertEqual(res['intergenic_variant_1']['submitted_variant'], 'var1') + self.assertEqual(res['None']['submitted_variant'], 'var2') + + def test_dict_one_intergenic_and_one_gene_reversed(self): + var1 = Variant('var1') + var1.output_type_flag = 'intergenic' + var2 = Variant('var2') + var2.output_type_flag = 'gene' + obj = ValOutput([var2, var1], self.vv) + res = obj.format_as_dict(with_meta=False) + print(res) + self.assertIsInstance(res, dict) + self.assertEqual(list(res), ['flag', 'None', 'intergenic_variant_1']) + self.assertEqual(res['flag'], 'intergenic') + self.assertEqual(res['intergenic_variant_1']['submitted_variant'], 'var1') + self.assertEqual(res['None']['submitted_variant'], 'var2') + + def test_dict_each_with_error_in_warnings(self): + var1 = Variant('var1') + var1.warnings = ['Validation error'] + var1.output_type_flag = 'gene' + var2 = Variant('var2') + var2.warnings = ['Validation error'] + var2.output_type_flag = 'intergenic' + var3 = Variant('var3') + var3.warnings = ['Validation error'] + + obj = ValOutput([var1, var2, var3], self.vv) + res = obj.format_as_dict(with_meta=False) + print(res) + + self.assertIsInstance(res, dict) + self.assertEqual(list(res), ['flag', 'validation_error_1', 'intergenic_variant_1', 'validation_error_2']) + self.assertEqual(res['flag'], 'intergenic') + self.assertEqual(res['validation_error_1']['submitted_variant'], 'var1') + self.assertEqual(res['intergenic_variant_1']['submitted_variant'], 'var2') + self.assertEqual(res['validation_error_2']['submitted_variant'], 'var3') + + def test_dict_each_obsolete(self): + var1 = Variant('var1') + var1.warnings = ['obsolete'] + var1.output_type_flag = 'gene' + var2 = Variant('var2') + var2.warnings = ['obsolete'] + var2.output_type_flag = 'intergenic' + var3 = Variant('var3') + var3.warnings = ['obsolete'] + var4 = Variant('var4') + var4.warnings = ['obsolete'] + var4.output_type_flag = 'gene' + var4.hgvs_transcript_variant = '' + + obj = ValOutput([var1, var2, var3, var4], self.vv) + res = obj.format_as_dict(with_meta=False) + print(res) + + self.assertIsInstance(res, dict) + self.assertEqual(list(res), ['flag', 'None', 'intergenic_variant_1', 'obsolete_record_1', 'obsolete_record_2']) + self.assertEqual(res['flag'], 'gene_variant') + self.assertEqual(res['None']['submitted_variant'], 'var1') + self.assertEqual(res['intergenic_variant_1']['submitted_variant'], 'var2') + self.assertEqual(res['obsolete_record_1']['submitted_variant'], 'var3') + self.assertEqual(res['obsolete_record_2']['submitted_variant'], 'var4') + + def test_json(self): + var = Variant('') + obj = ValOutput([var], self.vv) + res = obj.format_as_json(with_meta=False) + self.assertIsInstance(res, str) + self.assertIn('"flag": "warning"', res) + self.assertIn('"validation_warning_1": {"submitted_variant": ""', res) + self.assertEqual(json.loads(res), obj.format_as_dict(with_meta=False)) + + def test_table_empty(self): + obj = ValOutput([], self.vv) + res = obj.format_as_table(with_meta=False) + print(res) + self.assertIsInstance(res, list) + self.assertEqual(res, [['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', + 'Gene_Symbol', 'Transcript_description']]) + + def test_table_one(self): + var1 = Variant('var1') + obj = ValOutput([var1], self.vv) + res = obj.format_as_table(with_meta=False) + print(res) + self.assertIsInstance(res, list) + self.assertEqual(res, [['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', + 'Gene_Symbol', 'Transcript_description']]) + + def test_table_one_gene(self): + var1 = Variant('var1') + var1.output_type_flag = 'gene' + obj = ValOutput([var1], self.vv) + res = obj.format_as_table(with_meta=False) + print(res) + self.assertIsInstance(res, list) + self.assertEqual(res[0], ['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', + 'Gene_Symbol', 'Transcript_description']) + self.assertEqual(res[1], ['var1', None, None, None, None, '', '']) + self.assertEqual(len(res), 2) + + def test_table_intergenic(self): + var1 = Variant('var1') + var1.output_type_flag = 'intergenic' + obj = ValOutput([var1], self.vv) + res = obj.format_as_table(with_meta=False) + print(res) + self.assertIsInstance(res, list) + self.assertEqual(res[1], ['var1', None, None, None, None, '', '']) + self.assertEqual(len(res), 2) + + def test_table_gene_warnings(self): + var1 = Variant('var1') + var1.output_type_flag = 'gene' + var1.warnings = ['Validation error'] + var2 = Variant('var2') + var2.output_type_flag = 'gene' + var2.warnings = ['obsolete'] + var3 = Variant('var3') + var3.output_type_flag = 'gene' + var3.warnings = ['obsolete'] + var3.hgvs_transcript_variant = '' + + obj = ValOutput([var1, var2, var3], self.vv) + res = obj.format_as_table(with_meta=False) + print(res) + + self.assertIsInstance(res, list) + self.assertEqual(res[0], ['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', + 'Gene_Symbol', 'Transcript_description']) + self.assertEqual(res[1], ['var2', None, None, None, None, '', '']) + self.assertEqual(len(res), 2) + + def test_table_intergenic_warnings(self): + var1 = Variant('var1') + var1.output_type_flag = 'intergenic' + var1.warnings = ['Validation error'] + var2 = Variant('var2') + var2.output_type_flag = 'intergenic' + var2.warnings = ['obsolete'] + var3 = Variant('var3') + var3.output_type_flag = 'intergenic' + var3.warnings = ['obsolete'] + var3.hgvs_transcript_variant = '' + + obj = ValOutput([var1, var2, var3], self.vv) + res = obj.format_as_table(with_meta=False) + print(res) + + self.assertIsInstance(res, list) + self.assertEqual(res[0], ['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', + 'Gene_Symbol', 'Transcript_description']) + self.assertEqual(res[1], ['var1', None, None, None, None, '', '']) + self.assertEqual(res[2], ['var2', None, None, None, None, '', '']) + self.assertEqual(res[3], ['var3', '', None, None, None, '', '']) + self.assertEqual(len(res), 4) + From 15c8aead3f5c8730a00ddfa8397ad8d1ce20739e Mon Sep 17 00:00:00 2001 From: TeriForey Date: Tue, 16 Jul 2019 14:36:15 +0100 Subject: [PATCH 193/223] Fixed bug in test --- tests/test_core.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index e6c29cf7..a5efc981 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -282,8 +282,7 @@ def test_nonsense_hgvs(self): output = self.vv.hgvs2ref('nonsense:c.34C>T') print(output) self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) - self.assertEqual(output['error'], 'Failed to fetch nonsense from SeqRepo (/local/VariantValidator/' - 'seqrepo/2018-08-21) (\'Alias nonsense (namespace: None)\')') + self.assertTrue('Failed to fetch nonsense from SeqRepo' in output['error']) def test_valid_c(self): output = self.vv.hgvs2ref('NM_015120.4:c.34C>T') From d7717e6d2ce14088423f210c3003884363d18334 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 17 Jul 2019 13:41:58 +0100 Subject: [PATCH 194/223] Added test for db_update process --- VariantValidator/update_vv_db.py | 5 +++ tests/test_aa_db_update.py | 54 ++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 tests/test_aa_db_update.py diff --git a/VariantValidator/update_vv_db.py b/VariantValidator/update_vv_db.py index 8e63be43..a372e89d 100644 --- a/VariantValidator/update_vv_db.py +++ b/VariantValidator/update_vv_db.py @@ -35,6 +35,7 @@ def delete(): db.execute('DELETE FROM LRG_transcripts') db.execute('DELETE FROM LRG_proteins') db.execute('DELETE FROM LRG_RSG_lookup') + db.conn.commit() logger.debug("Deleted data from all tables including transcript_info") @@ -197,6 +198,8 @@ def update_lrg(dbcnx): if line.startswith('#'): continue data = line.split() + if len(data) < 3: + continue lrg_status_dict[data[0]] = data[2] # Required lookup tables @@ -210,6 +213,8 @@ def update_lrg(dbcnx): if line.startswith('#'): continue data = line.split() + if len(data) < 5: + continue # Assign objects lrg_id = data[0] symbol = data[1] diff --git a/tests/test_aa_db_update.py b/tests/test_aa_db_update.py new file mode 100644 index 00000000..9096a72a --- /dev/null +++ b/tests/test_aa_db_update.py @@ -0,0 +1,54 @@ +import pytest +from unittest import TestCase +from VariantValidator.modules.vvDatabase import Database +from VariantValidator import update_vv_db + + +class TestUpdate(TestCase): + """ + I want these tests to run first, and in order so that I can make this testcase part of the preparation for + running VV on travis. + """ + + def count_rows(self, db, table): + query = "SELECT COUNT(*) FROM %s" % table + row = db.execute(query) + print(row) + return row[0] + + def test_connection(self): + db_conn = update_vv_db.connect() + + self.assertIsInstance(db_conn, Database) + self.assertTrue(db_conn.conn.is_connected()) + + def test_deletion(self): + db = update_vv_db.connect() + initial_count = self.count_rows(db, 'LRG_transcripts') + if initial_count > 0: + pytest.skip("Already have data so not going to run this test here.") + db.insert_refseq_gene_data(['id', 'chr', 'genome', '0', '1', '1', '10', '2', '2', '1', 'hgnc', 'False']) + + count = self.count_rows(db, 'refSeqGene_loci') + self.assertGreaterEqual(count, 1) + + update_vv_db.delete() + db = update_vv_db.connect() + for table in ['transcript_info', 'refSeqGene_loci', 'LRG_transcripts', 'LRG_proteins', 'LRG_RSG_lookup']: + print(table) + count = self.count_rows(db, table) + self.assertEqual(count, 0) + + def test_update(self): + db = update_vv_db.connect() + initial_count = self.count_rows(db, 'LRG_transcripts') + if initial_count > 0: + pytest.skip("Already have data so not going to run this test here.") + + update_vv_db.update() + for table in ['refSeqGene_loci', 'LRG_transcripts', 'LRG_proteins', 'LRG_RSG_lookup']: + print(table) + count = self.count_rows(db, table) + self.assertGreater(count, 0) + + From e33caf6b4db743ad08684a40c9b5a49644c866fb Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 17 Jul 2019 13:42:48 +0100 Subject: [PATCH 195/223] Updated travis file so it won't fill database twice --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4c0cdb5e..34ac7040 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,7 +45,7 @@ install: # Set up validator database - mysql validator < configuration/empty_vv_db.sql - - update_vdb.py +# - update_vdb.py - df -h script: From 16aa396f0a140ebc81f26eb6589cf55330294d2a Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 17 Jul 2019 14:27:10 +0100 Subject: [PATCH 196/223] Bug fix in test_aa_db_update.py: --- tests/test_aa_db_update.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_aa_db_update.py b/tests/test_aa_db_update.py index 9096a72a..f91df0ee 100644 --- a/tests/test_aa_db_update.py +++ b/tests/test_aa_db_update.py @@ -46,6 +46,7 @@ def test_update(self): pytest.skip("Already have data so not going to run this test here.") update_vv_db.update() + db = update_vv_db.connect() for table in ['refSeqGene_loci', 'LRG_transcripts', 'LRG_proteins', 'LRG_RSG_lookup']: print(table) count = self.count_rows(db, table) From 2f9bec27842e765cf96b4f22eeb06659f1638da8 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 17 Jul 2019 14:50:20 +0100 Subject: [PATCH 197/223] Removed problematic section with RNA variants --- VariantValidator/modules/mappers.py | 220 +++++++++------------------- 1 file changed, 70 insertions(+), 150 deletions(-) diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index c81dda8a..769a4911 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -356,163 +356,83 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version # Get the desired transcript if cck: - # This should only ever hit coding and RNA variants + # This should only ever hit coding variants (RNA has been converted to c by now) if 'del' in formatted_variant: - # RNA - looking at trapped variant which was saved before RNA converted to cDNA - if ':r.' in variant.pre_RNA_conversion: - coding = validator.coding(formatted_variant) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, - variant.hn) - # genome back to C coordinates - post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) - - test = validator.hp.parse_hgvs_variant(quibble_input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ - post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected ' \ - 'transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the ' \ - 'selected transcript' - # automapping of variant completed - # Change to rna variant - # TODO: Need to look this section over. Doesn't make any sense. - # THERE IS NO SUCH THING AS QUERY. THIS WOULDN'T HAVE WORKED AND ISN'T RUN IN ANY TESTS - query = variant # Deliberately won't work so I can fix this once I have an appropriate test. - posedit = query.posedit - posedit = posedit.lower() - query.posedit = posedit - query.type = 'r' - post_var = str(query) - automap = variant.pre_RNA_conversion + ' automapped to ' + str(post_var) - variant.warnings.extend([str(caution), str(automap)]) - - # Kill current line and append for re-submission - # Tag the line so that it is not written out - variant.write = False - # Set the values and append to batch_list - hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) - assert str(hgvs_vt) == str(post_var) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], - primary_assembly=variant.primary_assembly, order=variant.order) - validator.batch_list.append(query) - logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) - - # Coding - else: - coding = validator.coding(formatted_variant) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = validator.hp.parse_hgvs_variant(formatted_variant) - try: - pre_var = validator.myevm_t_to_g(pre_var, variant.no_norm_evm, variant.primary_assembly, - variant.hn) - except Exception as e: - error = str(e) - if error == 'expected from_start_i <= from_end_i': - error = 'Automap is unable to correct the input exon/intron boundary coordinates, ' \ - 'please check your variant description' - variant.warnings.append(error) - logger.warning(error) - return True - else: - logger.debug("Except passed, %s", e) - # genome back to C coordinates - try: - post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) - except hgvs.exceptions.HGVSError as error: - variant.warnings.append(str(error)) - logger.warning(str(error)) + coding = validator.coding(formatted_variant) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = validator.hp.parse_hgvs_variant(formatted_variant) + try: + pre_var = validator.myevm_t_to_g(pre_var, variant.no_norm_evm, variant.primary_assembly, + variant.hn) + except Exception as e: + error = str(e) + if error == 'expected from_start_i <= from_end_i': + error = 'Automap is unable to correct the input exon/intron boundary coordinates, ' \ + 'please check your variant description' + variant.warnings.append(error) + logger.warning(error) return True - test = validator.hp.parse_hgvs_variant(quibble_input) - - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ - post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the ' \ - 'selected transcript:' - # automapping of variant completed - automap = variant.pre_RNA_conversion + ' automapped to ' + str(post_var) - variant.warnings.extend([caution, automap]) - - # Kill current line and append for re-submission - # Tag the line so that it is not written out - variant.write = False - # Set the values and append to batch_list - hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) - assert str(hgvs_vt) == str(post_var) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], - primary_assembly=variant.primary_assembly, order=variant.order) - validator.batch_list.append(query) - logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) - - else: # del not in formatted_variant - if ':r.' in variant.pre_RNA_conversion: - coding = validator.coding(formatted_variant) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, - variant.hn) - # genome back to C coordinates + else: + logger.debug("Except passed, %s", e) + # genome back to C coordinates + try: post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) + except hgvs.exceptions.HGVSError as error: + variant.warnings.append(str(error)) + logger.warning(str(error)) + return True + test = validator.hp.parse_hgvs_variant(quibble_input) + + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ + post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the ' \ + 'selected transcript:' + # automapping of variant completed + automap = variant.pre_RNA_conversion + ' automapped to ' + str(post_var) + variant.warnings.extend([caution, automap]) - test = validator.hp.parse_hgvs_variant(quibble_input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript:' - automap = 'Automap has corrected the coordinates to match the intron/exon boundaries for the selected transcript' - # automapping of variant completed - # Change to rna variant - # TODO: As before this section needs fixing - # THERE IS NO SUCH THING AS QUERY. THIS WOULDN'T HAVE WORKED AND ISN'T RUN IN ANY TESTS - query = variant - posedit = query.posedit - posedit = posedit.lower() - query.posedit = posedit - query.type = 'r' - post_var = str(query) - automap = quibble_input + ' automapped to ' + post_var - variant.warnings.extend([caution, automap]) - - # Kill current line and append for re-submission - # Tag the line so that it is not written out - variant.write = False - # Set the values and append to batch_list - hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) - assert str(hgvs_vt) == str(post_var) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], - primary_assembly=variant.primary_assembly, order=variant.order) - validator.batch_list.append(query) - logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + variant.write = False + # Set the values and append to batch_list + hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) + assert str(hgvs_vt) == str(post_var) + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], + primary_assembly=variant.primary_assembly, order=variant.order) + validator.batch_list.append(query) + logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) - else: - coding = validator.coding(formatted_variant) - trans_acc = coding.ac - # c to Genome coordinates - Map the variant to the genome - pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, - variant.hn) + else: # del not in formatted_variant - # genome back to C coordinates - post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) + coding = validator.coding(formatted_variant) + trans_acc = coding.ac + # c to Genome coordinates - Map the variant to the genome + pre_var = validator.genomic(formatted_variant, variant.no_norm_evm, variant.primary_assembly, + variant.hn) + + # genome back to C coordinates + post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) + + test = validator.hp.parse_hgvs_variant(quibble_input) + if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ + post_var.posedit.pos.end.base != test.posedit.pos.end.base: + caution = 'The entered coordinates do not agree with the intron/exon boundaries for the ' \ + 'selected transcript:' + # automapping of variant completed + automap = str(variant.pre_RNA_conversion) + ' automapped to ' + str(post_var) + variant.warnings.extend([caution, automap]) - test = validator.hp.parse_hgvs_variant(quibble_input) - if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \ - post_var.posedit.pos.end.base != test.posedit.pos.end.base: - caution = 'The entered coordinates do not agree with the intron/exon boundaries for the ' \ - 'selected transcript:' - # automapping of variant completed - automap = str(variant.pre_RNA_conversion) + ' automapped to ' + str(post_var) - variant.warnings.extend([caution, automap]) - - # Kill current line and append for re-submission - # Tag the line so that it is not written out - variant.write = False - # Set the values and append to batch_list - hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) - assert str(hgvs_vt) == str(post_var) - query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], - primary_assembly=variant.primary_assembly, order=variant.order) - validator.batch_list.append(query) - logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) + # Kill current line and append for re-submission + # Tag the line so that it is not written out + variant.write = False + # Set the values and append to batch_list + hgvs_vt = validator.hp.parse_hgvs_variant(str(post_var)) + assert str(hgvs_vt) == str(post_var) + query = Variant(variant.original, quibble=fn.valstr(hgvs_vt), warnings=[automap], + primary_assembly=variant.primary_assembly, order=variant.order) + validator.batch_list.append(query) + logger.info("Submitting new variant with format %s", fn.valstr(hgvs_vt)) # If cck not true elif ':r.' in variant.pre_RNA_conversion: From 48508bb95f2ed216116f2f14fbeb49db8df3c70a Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 22 Jul 2019 14:01:31 +0100 Subject: [PATCH 198/223] Updated license information in each file --- README.md | 9 +++++---- VariantValidator/__init__.py | 17 +++++++++++++++++ VariantValidator/configure.py | 17 +++++++++++++++++ VariantValidator/logger.py | 16 ++++++++++++++++ VariantValidator/modules/__init__.py | 16 ++++++++++++++++ VariantValidator/modules/format_converters.py | 17 +++++++++++++++++ VariantValidator/modules/gapped_mapping.py | 17 +++++++++++++++++ VariantValidator/modules/hgvs_utils.py | 5 +++-- VariantValidator/modules/liftover.py | 4 +--- VariantValidator/modules/mappers.py | 17 +++++++++++++++++ VariantValidator/modules/seq_data.py | 17 +++++++++++++++++ VariantValidator/modules/use_checking.py | 17 +++++++++++++++++ VariantValidator/modules/utils.py | 17 +++++++++++++++++ VariantValidator/modules/valoutput.py | 17 +++++++++++++++++ VariantValidator/modules/variant.py | 17 +++++++++++++++++ VariantValidator/modules/vvDBGet.py | 17 +++++++++++++++++ VariantValidator/modules/vvDBInit.py | 17 +++++++++++++++++ VariantValidator/modules/vvDBInsert.py | 16 ++++++++++++++++ VariantValidator/modules/vvDatabase.py | 17 +++++++++++++++++ VariantValidator/modules/vvMixinConverters.py | 17 +++++++++++++++++ VariantValidator/modules/vvMixinCore.py | 17 +++++++++++++++++ VariantValidator/modules/vvMixinInit.py | 17 +++++++++++++++++ VariantValidator/settings.py | 17 +++++++++++++++++ VariantValidator/update_vv_db.py | 2 +- VariantValidator/validator.py | 17 ++++++++++++++++- bin/update_vdb.py | 17 +++++++++++++++++ bin/variant_validator.py | 17 +++++++++++++++++ bin/vv_configure.py | 17 +++++++++++++++++ configuration/default.ini | 2 +- configuration/travis.ini | 2 +- setup.py | 8 ++++---- tests/test_aa_db_update.py | 17 ++++++++++++++++- tests/test_configuration.py | 17 +++++++++++++++++ tests/test_core.py | 17 +++++++++++++++++ tests/test_inputs.py | 17 +++++++++++++++++ tests/test_utils.py | 17 +++++++++++++++++ tests/test_valoutput.py | 16 ++++++++++++++++ tests/test_variant.py | 17 +++++++++++++++++ 38 files changed, 537 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index dc02883e..38bd2f9d 100644 --- a/README.md +++ b/README.md @@ -59,18 +59,19 @@ VariantValidator: Accurate validation, mapping and formatting of sequence variat Freeman PJ, Hart RK, Gretton LJ, Brookes AJ, Dalgleish R. -> Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -> +> +> Copyright (C) 2019 VariantValidator Contributors +> > This program is free software: you can redistribute it and/or modify > it under the terms of the GNU Affero General Public License as > published by the Free Software Foundation, either version 3 of the > License, or (at your option) any later version. -> +> > This program is distributed in the hope that it will be useful, > but WITHOUT ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU Affero General Public License for more details. -> +> > You should have received a copy of the GNU Affero General Public License > along with this program. If not, see . > diff --git a/VariantValidator/__init__.py b/VariantValidator/__init__.py index 04dce59d..adf3dd38 100644 --- a/VariantValidator/__init__.py +++ b/VariantValidator/__init__.py @@ -5,3 +5,20 @@ from .validator import Validator __all__ = ["Validator"] + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/configure.py b/VariantValidator/configure.py index 4107daa4..34ca2c62 100644 --- a/VariantValidator/configure.py +++ b/VariantValidator/configure.py @@ -40,3 +40,20 @@ def exit_with_message(): print("Items that must be changed are highlighted in capitals.") print() raise SystemExit + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/logger.py b/VariantValidator/logger.py index dbee415b..f5c38b21 100644 --- a/VariantValidator/logger.py +++ b/VariantValidator/logger.py @@ -14,3 +14,19 @@ else: logging.getLogger('VariantValidator').addHandler(logging.NullHandler()) +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/__init__.py b/VariantValidator/modules/__init__.py index e69de29b..17fac5fa 100644 --- a/VariantValidator/modules/__init__.py +++ b/VariantValidator/modules/__init__.py @@ -0,0 +1,16 @@ +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index c501b99e..ed5b1c34 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -782,3 +782,20 @@ def rna(variant, validator): variant.hgvs_formatted = hgvs_c return False + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 354637a6..4d11af15 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -2352,3 +2352,20 @@ def rev_norm_ins(self, hgvs_coding, hgvs_genomic): except hgvs.exceptions.HGVSUnsupportedOperationError as e: logger.debug("Except passed, %s", e) + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/hgvs_utils.py b/VariantValidator/modules/hgvs_utils.py index 36125703..b9037539 100644 --- a/VariantValidator/modules/hgvs_utils.py +++ b/VariantValidator/modules/hgvs_utils.py @@ -948,7 +948,8 @@ def hgvs_ref_alt(hgvs_variant, sf): ref_alt_dict = {'ref': ref, 'alt': alt} return ref_alt_dict -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester +# +# Copyright (C) 2019 VariantValidator Contributors # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -962,4 +963,4 @@ def hgvs_ref_alt(hgvs_variant, sf): # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -# \ No newline at end of file +# diff --git a/VariantValidator/modules/liftover.py b/VariantValidator/modules/liftover.py index 04f0448a..20815428 100644 --- a/VariantValidator/modules/liftover.py +++ b/VariantValidator/modules/liftover.py @@ -338,7 +338,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va return lifted_response # -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester +# Copyright (C) 2019 VariantValidator Contributors # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -353,5 +353,3 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # - - diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 769a4911..6f72ab47 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -846,3 +846,20 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): logger.warning(str(e)) return multi_g + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/seq_data.py b/VariantValidator/modules/seq_data.py index 472f53c2..ace8c872 100644 --- a/VariantValidator/modules/seq_data.py +++ b/VariantValidator/modules/seq_data.py @@ -3373,3 +3373,20 @@ def gap_black_list(symbol): return True return False + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index 46a91d58..02d6c821 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -666,3 +666,20 @@ def structure_checks_n(variant, validator): logger.warning(error) return True return False + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/utils.py b/VariantValidator/modules/utils.py index a87c8b67..528533df 100644 --- a/VariantValidator/modules/utils.py +++ b/VariantValidator/modules/utils.py @@ -441,3 +441,20 @@ class DatabaseConnectionError(Exception): class ObsoleteSeqError(Exception): pass + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index e7c94e3a..ccf4c8e4 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -140,3 +140,20 @@ def add_meta(self): metadata['uta_schema'] = self.validator.utaSchema metadata['seqrepo_db'] = self.validator.seqrepoVersion return metadata + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 15838242..79c96fa4 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -218,3 +218,20 @@ def process_warnings(self): if warning not in refined: refined.append(warning) return refined + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/vvDBGet.py b/VariantValidator/modules/vvDBGet.py index dbfb3c72..063301e0 100644 --- a/VariantValidator/modules/vvDBGet.py +++ b/VariantValidator/modules/vvDBGet.py @@ -150,3 +150,20 @@ def get_urls(self, dict_out): # "http://www.ensembl.org/id/" ? What about historic versions????? return report_urls + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/vvDBInit.py b/VariantValidator/modules/vvDBInit.py index 80a2db51..c90a2b18 100644 --- a/VariantValidator/modules/vvDBInit.py +++ b/VariantValidator/modules/vvDBInit.py @@ -25,3 +25,20 @@ def __del__(self): self.conn = None if self.pool: self.pool = None + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/vvDBInsert.py b/VariantValidator/modules/vvDBInsert.py index 60d69412..119673fb 100644 --- a/VariantValidator/modules/vvDBInsert.py +++ b/VariantValidator/modules/vvDBInsert.py @@ -154,3 +154,19 @@ def update_gene_stable_ids(self, gene_stable_ids): self.conn.commit() return success +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 587a41f9..86f6e814 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -247,3 +247,20 @@ def ref_type_assign(self, accession): # shouldn't reach this point raise Exception('Unable to recognise accession') return ref_type + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index c4c04357..2cc46378 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -2420,3 +2420,20 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn): # Return the required data. This is a dictionary containing the rsg description, validation status and gene ID return descriptions + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 1de9bb0c..2fc83dce 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -1114,3 +1114,20 @@ def _get_transcript_info(self, variant): logger.warning(error) return True return False + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 8e9482d7..6e71a815 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -459,3 +459,20 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): hgvs_transcript_to_hgvs_protein['error'] = 'Unable to map %s to %s' % ( hgvs_transcript.ac, associated_protein_accession) return hgvs_transcript_to_hgvs_protein + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/settings.py b/VariantValidator/settings.py index 6ec5d442..3ddb59cd 100644 --- a/VariantValidator/settings.py +++ b/VariantValidator/settings.py @@ -38,3 +38,20 @@ } } } + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/VariantValidator/update_vv_db.py b/VariantValidator/update_vv_db.py index a372e89d..fe99c243 100644 --- a/VariantValidator/update_vv_db.py +++ b/VariantValidator/update_vv_db.py @@ -295,7 +295,7 @@ def map_line(line, genome, rsg_id_info): return ml # -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester +# Copyright (C) 2019 VariantValidator Contributors # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as diff --git a/VariantValidator/validator.py b/VariantValidator/validator.py index 0f0d19ae..f046dcc8 100644 --- a/VariantValidator/validator.py +++ b/VariantValidator/validator.py @@ -16,4 +16,19 @@ class Validator(vvMixinCore.Mixin): pass - +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/bin/update_vdb.py b/bin/update_vdb.py index f15995c7..6453bc87 100644 --- a/bin/update_vdb.py +++ b/bin/update_vdb.py @@ -14,3 +14,20 @@ update_vv_db.delete() update_vv_db.update() + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/bin/variant_validator.py b/bin/variant_validator.py index beb3a1fc..88d35318 100644 --- a/bin/variant_validator.py +++ b/bin/variant_validator.py @@ -52,3 +52,20 @@ def output_results(valoutput, outformat, with_meta): sys.stderr.write("Submitting batch query: %s\n" % batch) output = validator.validate(batch, args.genome, args.transcripts) args.output.write(output_results(output, args.output_format, args.meta) + '\n') + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/bin/vv_configure.py b/bin/vv_configure.py index c2e1e955..f2834a40 100644 --- a/bin/vv_configure.py +++ b/bin/vv_configure.py @@ -59,3 +59,20 @@ def read_settings(): if newfile or values_changed: with open(settings['CONFIG_DIR'], 'w') as fh: config.write(fh) + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/configuration/default.ini b/configuration/default.ini index f013b176..14441cf5 100644 --- a/configuration/default.ini +++ b/configuration/default.ini @@ -30,7 +30,7 @@ location = /path/to/liftover # -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester +# Copyright (C) 2019 VariantValidator Contributors # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as diff --git a/configuration/travis.ini b/configuration/travis.ini index a1d148b2..f8138bc5 100644 --- a/configuration/travis.ini +++ b/configuration/travis.ini @@ -30,7 +30,7 @@ location = /path/to/liftover # -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester +# Copyright (C) 2019 VariantValidator Contributors # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as diff --git a/setup.py b/setup.py index 729d329b..edc4a077 100644 --- a/setup.py +++ b/setup.py @@ -72,18 +72,18 @@ ) # -# Copyright (C) 2018 Peter Causey-Freeman, University of Leicester -# +# Copyright (C) 2019 VariantValidator Contributors +# # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # diff --git a/tests/test_aa_db_update.py b/tests/test_aa_db_update.py index f91df0ee..25b70baa 100644 --- a/tests/test_aa_db_update.py +++ b/tests/test_aa_db_update.py @@ -52,4 +52,19 @@ def test_update(self): count = self.count_rows(db, table) self.assertGreater(count, 0) - +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/tests/test_configuration.py b/tests/test_configuration.py index e81d1aec..a39107c1 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -231,3 +231,20 @@ def test_file_parsing(self): def tearDown(self): shutil.move(self.original, self.filename) + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/tests/test_core.py b/tests/test_core.py index a5efc981..8c9b5579 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -331,3 +331,20 @@ def test_valid_r(self): self.assertEqual(output['error'], '') self.assertEqual(output['start_position'], '') self.assertEqual(output['sequence'], '') + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/tests/test_inputs.py b/tests/test_inputs.py index 5c54bd71..15f911fe 100644 --- a/tests/test_inputs.py +++ b/tests/test_inputs.py @@ -18688,3 +18688,20 @@ def test_variant336(self): assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000017.10:g.48275377_48275381dup', 'vcf': {'chr': '17', 'pos': '48275376', 'ref': 'G', 'alt': 'GAAAAA'}} assert results['NM_000088.3:c.589-18_589-14dup']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000017.11:g.50198016_50198020dup', 'vcf': {'chr': '17', 'pos': '50198015', 'ref': 'G', 'alt': 'GAAAAA'}} assert results['NM_000088.3:c.589-18_589-14dup']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_000088.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_000079.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007400.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1.xml'} + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/tests/test_utils.py b/tests/test_utils.py index 86204643..a1945dbc 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -362,3 +362,20 @@ def test_dup_pair(self): output = utils.hgvs_dup2indel(hgvsseq) self.assertIsInstance(output, str) self.assertEqual(output, 'NM_015120.4:c.34_34delGAinsGAGA') + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/tests/test_valoutput.py b/tests/test_valoutput.py index 780950d0..b57ea27b 100644 --- a/tests/test_valoutput.py +++ b/tests/test_valoutput.py @@ -262,3 +262,19 @@ def test_table_intergenic_warnings(self): self.assertEqual(res[3], ['var3', '', None, None, None, '', '']) self.assertEqual(len(res), 4) +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/tests/test_variant.py b/tests/test_variant.py index 54cfe11f..700f30be 100644 --- a/tests/test_variant.py +++ b/tests/test_variant.py @@ -363,3 +363,20 @@ def test_output_dict_set(self): 'primary_assembly_loci': 'primary', 'reference_sequence_records': 'records', }) + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# From de32419fc7c2cf1bf4bcde45c8139f45d6c9e4ae Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 24 Jul 2019 10:02:29 +0100 Subject: [PATCH 199/223] Added instructions and Dockerfiles for docker installation --- Dockerfile | 20 +++++++++++++++++ configuration/docker.ini | 47 +++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 31 ++++++++++++++++++++++++++ docs/DOCKER.md | 48 ++++++++++++++++++++++++++++++++++++++++ docs/INSTALLATION.md | 1 + vdb_docker.df | 13 +++++++++++ 6 files changed, 160 insertions(+) create mode 100644 Dockerfile create mode 100644 configuration/docker.ini create mode 100644 docker-compose.yml create mode 100644 docs/DOCKER.md create mode 100644 vdb_docker.df diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..80585224 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.6 + +#RUN seqrepo -r ${SEQREPO_DATA_DIR} pull -i ${SEQREPO_DATA_RELEASE} +#RUN touch ${SEQREPO_DATA_DIR}/testing.txt + +#RUN apt update && apt install -y git + +WORKDIR /app + +COPY . /app + +RUN pip install -r requirements_dev.txt + +RUN pip install -e . + +COPY configuration/docker.ini /root/.variantvalidator + +CMD python3 bin/variant_validator.py + + diff --git a/configuration/docker.ini b/configuration/docker.ini new file mode 100644 index 00000000..0fa27faa --- /dev/null +++ b/configuration/docker.ini @@ -0,0 +1,47 @@ +[mysql] +host = vdb +database = validator +user = vvadmin +password = var1ant + +[seqrepo] +version = 2018-08-21 +location = /usr/local/share/seqrepo + +[postgres] +host = uta +database = uta +version = uta_20171026 +user = anonymous +password = + +[logging] +#Levels control verbosity and can be set to "CRITICAL" "ERROR" "WARNING" "INFO" or "DEBUG". +log = True +console = WARNING +file = ERROR + +[Entrez] +email = admin@variantvalidator.org +api_key = YOUR_API_KEY + +[liftover] +location = /path/to/liftover + + +# +# Copyright (C) 2019 VariantValidator Contributors +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..1c2e131a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,31 @@ +version: '3' + +services: + vdb: + build: + context: . + dockerfile: vdb_docker.df + ports: + - "3306:3306" + expose: + - "3306" + uta: + image: biocommons/uta + ports: + - "5432:5432" + expose: + - "5432" + seqrepo: + image: biocommons/seqrepo:2018-08-21 + volumes: + - seqdata:/usr/local/share/seqrepo + vv: + build: . + depends_on: + - vdb + - uta + volumes: + - seqdata:/usr/local/share/seqrepo + +volumes: + seqdata: \ No newline at end of file diff --git a/docs/DOCKER.md b/docs/DOCKER.md new file mode 100644 index 00000000..dff18971 --- /dev/null +++ b/docs/DOCKER.md @@ -0,0 +1,48 @@ +# Docker + +To install VariantValidator via Docker, first ensure you have both docker and docker-compose installed. +See their [documentation](https://docs.docker.com/compose/install/) for information. + +Then, clone the repository and move into that directory. + +```bash +git clone https://github.com/openvar/variantValidator +cd variantValidator/ +``` + +You can then launch the docker containers and run them using + +```bash +docker-compose up +``` + +Note, the first time this is run it will download each of the databases including the pre-populated +validator database and could take up to 30 minutes depending on your connection. We do not recommend +running this in the background as you need to see the logs and therefore when the databases are +ready to be used. + +Once installed and running it is possible to run just the container containing VariantValidator, either to +run the validator script + +```bash +docker-compose run vv variant_validator.py +``` + +run python + +```bash +docker-compose run vv python +``` + +or go into the container via bash + +```bash +docker-compose run vv bash +``` + +Note, that each time one of these commands is run a new container is created. +For more information on how to use docker-compose see their [documentation](https://docs.docker.com/compose/). + +It is possible to access both the UTA and Validator databases outside of docker as they expose the + default PostgreSQL and MySQL ports (5432 and 3306 respectively). In the current set-up it is not possible to + access the seqrepo database outside of docker. \ No newline at end of file diff --git a/docs/INSTALLATION.md b/docs/INSTALLATION.md index f704101b..0816b0fb 100644 --- a/docs/INSTALLATION.md +++ b/docs/INSTALLATION.md @@ -1,6 +1,7 @@ # Installation These instructions will allow you to install the package and accompanying databases on Linux. Mac OS X computers operate similarly. +For any other systems, or if you cannot install the databases, we recommend installing via [docker](DOCKER.md). ## Pre-requisites diff --git a/vdb_docker.df b/vdb_docker.df new file mode 100644 index 00000000..87a1a547 --- /dev/null +++ b/vdb_docker.df @@ -0,0 +1,13 @@ +FROM mysql:latest + +ENV MYSQL_RANDOM_ROOT_PASSWORD yes + +ENV MYSQL_DATABASE validator + +ENV MYSQL_USER vvadmin + +ENV MYSQL_PASSWORD var1ant + +RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/* + +RUN wget https://leicester.figshare.com/ndownloader/files/16237784 -O /docker-entrypoint-initdb.d/validator_2019-07-10.sql.gz From aaef7a4082fc16235ee421432a01bf4ac50de073 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 24 Jul 2019 13:30:37 +0100 Subject: [PATCH 200/223] Updated docs --- README.md | 8 ++++---- docs/DOCKER.md | 6 +++++- docs/MANUAL.md | 6 +++--- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 38bd2f9d..7b3aba7b 100644 --- a/README.md +++ b/README.md @@ -31,21 +31,21 @@ For sequence variations falling within the open reading frames of genes, Variant ## Pre-requisites -Variant Validator will work on Mac OS X or Linux-compatible computers. +Variant Validator will work locally on Mac OS X or Linux-compatible computers. It can also work within a [docker container](docs/DOCKER.md). Required software: * MySQL -* Python 2.7 +* Python 3.6 or above * SQLite version 3.8.0 or above Optional software: * Postgres version 9.5 or above, Postgres 10 is not supported. -For installation instructions please see [INSTALLATION.md](docs/INSTALLATION.md) +For installation instructions please see [INSTALLATION.md](docs/INSTALLATION.md). For the older python 2 version, see previous releases. # Operation and configuration -Please see [MANUAL.md](docs/MANUAL.md) +Please see [MANUAL.md](docs/MANUAL.md). Note that the latest version is not compatible with previous releases. ## License diff --git a/docs/DOCKER.md b/docs/DOCKER.md index dff18971..f2d16d11 100644 --- a/docs/DOCKER.md +++ b/docs/DOCKER.md @@ -45,4 +45,8 @@ For more information on how to use docker-compose see their [documentation](http It is possible to access both the UTA and Validator databases outside of docker as they expose the default PostgreSQL and MySQL ports (5432 and 3306 respectively). In the current set-up it is not possible to - access the seqrepo database outside of docker. \ No newline at end of file + access the seqrepo database outside of docker. + +Finally, it should be noted that the current UTA docker container is not up-to-date and only contains the +2017-10-26 release. Therefore use caution when interpreting these results, and be advised the + VariantValidator tests will fail. \ No newline at end of file diff --git a/docs/MANUAL.md b/docs/MANUAL.md index dc35dd13..a9b4ae3d 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -47,20 +47,20 @@ location = /path/to/liftover The values in capitals must be replaced for Variant Validator to run, see below for more details. -####Logging +#### Logging By default Variant Validator will log to both the console and to a file, the output level for each can be set in the configuration file. The levels control verbosity and can be set to "CRITICAL", "ERROR", "WARNING", "INFO" or "DEBUG". To turn off logging, set the log configuration to "False". The log file name and log options can be changed for all users by editing the `VariantValidator/settings.py` file. By default the file log is set to output in the users home directory (`~/.vv_errorlog`). -####Entrez +#### Entrez For access to the NCBI Entrez database you must provide a valid email address in the respective configuration setting. Optionally, you can also provide an NCBI API key that will increase the number of requests made per second. See [this article](https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/) on how to generate an API key. -####Liftover +#### Liftover If the UCSC Liftover [files](http://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/) have been previously downloaded their location can be set within the configuration file. By default the necessary files will be downloaded automatically when first requested. From c18ccb0873f032516dc3f30700aac5a669b97228 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 24 Jul 2019 13:52:37 +0100 Subject: [PATCH 201/223] Fixed issue in hgvs2ref from v0 --- VariantValidator/modules/vvMixinCore.py | 4 ++-- tests/test_core.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 2fc83dce..f5f94446 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -972,8 +972,8 @@ def hgvs2ref(self, query): reference['error'] = str(e) logger.warning(str(e)) else: - reference['start_position'] = str(input_hgvs_query.posedit.pos.start.base) - reference['end_position'] = str(input_hgvs_query.posedit.pos.end.base) + reference['start_position'] = str(input_hgvs_query.posedit.pos.start) + reference['end_position'] = str(input_hgvs_query.posedit.pos.end) reference['sequence'] = sequence # Return the resulting reference sequence and error message diff --git a/tests/test_core.py b/tests/test_core.py index 8c9b5579..49efba0a 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -313,7 +313,7 @@ def test_valid_p(self): print(output) self.assertEqual(list(output), ['variant', 'start_position', 'end_position', 'warning', 'sequence', 'error']) self.assertEqual(output['error'], '') - self.assertEqual(output['start_position'], '34') + self.assertEqual(output['start_position'], 'Thr34') self.assertEqual(output['sequence'], 'A') def test_valid_m(self): From 267dba570b623d705f2134798e13f3ecaf2673ec Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 24 Jul 2019 13:58:54 +0100 Subject: [PATCH 202/223] Added codecov and travis badges to readme --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7b3aba7b..a5b135d9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ -# About Variant Validator +# VariantValidator +[![codecov](https://codecov.io/gh/openvar/variantValidator/branch/restructuring_py3/graph/badge.svg)](https://codecov.io/gh/openvar/variantValidator) [![Build Status](https://travis-ci.org/openvar/variantValidator.png)](https://travis-ci.org/openvar/variantValidator) + +## About VariantValidator is a user-friendly software tool designed to validate the syntax and parameters of DNA variant descriptions according to the HGVS Sequence Variant From 4ffc661210c24a7850d931d495587a1e0c154aa2 Mon Sep 17 00:00:00 2001 From: Peter Causey-Freeman Date: Thu, 25 Jul 2019 09:55:52 +0100 Subject: [PATCH 203/223] Modified docs that helped me install and successfully test vvPy3. Note: I could not figure out whether the current install instructions include an option to install the pre-populated database. This would be beneficial due to known sporadic issues with external APIs --- README.md | 2 +- docs/INSTALLATION.md | 8 ++++---- docs/MANUAL.md | 16 ++++++++-------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index a5b135d9..841b9c50 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ For sequence variations falling within the open reading frames of genes, Variant ## Pre-requisites -Variant Validator will work locally on Mac OS X or Linux-compatible computers. It can also work within a [docker container](docs/DOCKER.md). +VariantValidator will work locally on Mac OS X or Linux-compatible computers. It can also work within a [docker container](docs/DOCKER.md). Required software: * MySQL diff --git a/docs/INSTALLATION.md b/docs/INSTALLATION.md index 0816b0fb..310fad25 100644 --- a/docs/INSTALLATION.md +++ b/docs/INSTALLATION.md @@ -32,7 +32,7 @@ After [installing conda](https://docs.conda.io/projects/conda/en/latest/user-gui $ conda env create -f environment.yml $ conda activate vvenv ``` -The packages required for variant validator to function are now set up in the environment "vvenv". +The packages required for VariantValidator to function are now set up in the environment "vvenv". #### Via pip @@ -44,7 +44,7 @@ $ source activate vvenv $ pip install -r requirements.txt ``` -## Installing Variant Validator +## Installing VariantValidator To install VariantValidator within your virtual environment run: ``` @@ -62,7 +62,7 @@ CREATE DATABASE validator; GRANT SELECT,INSERT,UPDATE,DELETE ON validator.* TO ''@''; ``` -In the `VariantValidator/configuration` folder is a copy of the empty mysql database needed by Variant Validator to run. You need to upload it to the running MySQL database with: +In the `VariantValidator/configuration` folder is a copy of the empty mysql database needed by VariantValidator to run. You need to upload it to the running MySQL database with: ``` $ mysql validator < VariantValidator/configuration/empty_vv_db.sql ``` @@ -106,7 +106,7 @@ Before using VariantValidator some configuration is required, as described in th ## Developers -To work on the Variant Validator code, you'll need to install additional dependencies and install VariantValidator in an editable manner. Tests can be run using PyTest. +To work on the VariantValidator code, you'll need to install additional dependencies and install VariantValidator in an editable manner. Tests can be run using PyTest. ```bash cd variantValidator/ diff --git a/docs/MANUAL.md b/docs/MANUAL.md index a9b4ae3d..15b2aaa8 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -2,12 +2,12 @@ ## Configuration -After first installing Variant Validator, a configuration file will need to be created and edited to contain the database credentials and locations. +After first installing VariantValidator, a configuration file will need to be created and edited to contain the database credentials and locations. By default the edited configuration will be placed in the users home directory (`~/.variantvalidator`), this location can be changed for all users by editing the `VariantValidator/settings.py` file. To create this file automatically, run the configuration script installed alongside the package. ```bash -vv_configure.py +python bin/vv_configure.py ``` This will ask you to enter a value for each item in the configuration file. @@ -42,14 +42,14 @@ email = YOUR@EMAIL.COM api_key = YOUR_API_KEY [liftover] -location = /path/to/liftover +location = /PATH/TO/LIFTOVER/ ``` -The values in capitals must be replaced for Variant Validator to run, see below for more details. +The values in capitals must be replaced for VariantValidator to run, see below for more details. #### Logging -By default Variant Validator will log to both the console and to a file, the output level for each can be set in the configuration file. +By default VariantValidator will log to both the console and to a file, the output level for each can be set in the configuration file. The levels control verbosity and can be set to "CRITICAL", "ERROR", "WARNING", "INFO" or "DEBUG". To turn off logging, set the log configuration to "False". The log file name and log options can be changed for all users by editing the `VariantValidator/settings.py` file. By default the file log is set to output in the users home directory (`~/.vv_errorlog`). @@ -62,7 +62,7 @@ made per second. See [this article](https://ncbiinsights.ncbi.nlm.nih.gov/2017/1 #### Liftover -If the UCSC Liftover [files](http://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/) have been previously downloaded their location can be set within the configuration file. By default the necessary files will be downloaded automatically when first requested. +Optional. Download the UCSC GRCh38 Liftover file [hg38ToHg19.over.chain.gz](http://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/) and GRCh37 Liftover file [hg19ToHg38.over.chain.gz](http://hgdownload.cse.ucsc.edu/gbdb/hg19/liftOver/) and set their location within the configuration file. By default the necessary files are accessed remotely which incurs a time-penalty when validating intergenic variants. ## Database updates @@ -70,14 +70,14 @@ If the UCSC Liftover [files](http://hgdownload.soe.ucsc.edu/goldenPath/hg38/lift To import the initial data into the Validator MySQL database, run the following script: ```bash -update_vdb.py +python bin/update_vdb.py ``` This will download the required data to convert between LRG and RefSeq IDs. We recommend re-running this command on a regular basis as changes are continually made to the RefSeq and LRG collections. ## Operation -To run Variant Validator, we have provided the installed script `variant_validator.py`, running this with the flag `-h` shows the running options: +To run VariantValidator, we have provided the installed script `variant_validator.py`, running this with the flag `-h` shows the running options: ```text usage: variant_validator.py [-h] -v VARIANT [VARIANT ...] From 8843a1ac398c67ba78cb3fee7e0b56e8061d740e Mon Sep 17 00:00:00 2001 From: Peter Causey-Freeman Date: Thu, 25 Jul 2019 15:09:15 +0100 Subject: [PATCH 204/223] correct bug in MixinInit identified by VF integration --- VariantValidator/modules/vvMixinInit.py | 32 ++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 6e71a815..334fb5c3 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -129,7 +129,6 @@ def __init__(self): alt_aln_method='splign' ) - # Create normalizer self.merge_normalizer = hgvs.normalizer.Normalizer( self.hdp, cross_boundaries=False, @@ -144,6 +143,33 @@ def __init__(self): alt_aln_method='splign', validate=False ) + + # When we are able to access Ensembl data we will need to use these normalizer instances + # These are currently implemented in VF + self.splign_normalizer = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='splign' # RefSeq + ) + + self.genebuild_normalizer = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + alt_aln_method='genebuild' # Ensembl + ) + + self.reverse_splign_normalizer = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=5, + alt_aln_method='splign' + ) + + self.reverse_genebuild_normalizer = hgvs.normalizer.Normalizer(self.hdp, + cross_boundaries=False, + shuffle_direction=5, + alt_aln_method='genebuild' + ) + # create no_norm_evm self.no_norm_evm_38 = hgvs.assemblymapper.AssemblyMapper(self.hdp, assembly_name='GRCh38', @@ -192,7 +218,7 @@ def protein(self, variant, evm, hpUnused): if ass_prot is None: cod = str(var_c) cod = cod.replace('inv', 'del') - cod = self.hp.parse(cod) + cod = self.hp.parse_hgvs_variant(cod) p = evm.c_to_p(cod) ass_prot = p.ac var_p = hgvs.sequencevariant.SequenceVariant(ac=ass_prot, type='p', posedit='(Met1?)') @@ -218,7 +244,7 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): if associated_protein_accession is None: cod = str(hgvs_transcript) cod = cod.replace('inv', 'del') - cod = self.hp.parse(cod) + cod = self.hp.parse_hgvs_variant(cod) p = evm.c_to_p(cod) associated_protein_accession = p.ac From 1de0f08c458b02f9c6c123d211424b426a1f2abf Mon Sep 17 00:00:00 2001 From: Peter Causey-Freeman Date: Thu, 25 Jul 2019 21:01:22 +0100 Subject: [PATCH 205/223] Incorrect use up hp.parse corrected. Need to fix test tests/test_core.py:285: --- VariantValidator/modules/vvMixinInit.py | 8 ++++---- tests/test_utils.py | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 334fb5c3..55d18efd 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -209,7 +209,7 @@ def protein(self, variant, evm, hpUnused): # If the :c. pattern is present in the input variant if ':c.' in variant: # convert the input string into a hgvs object - var_c = self.hp.parse(variant) + var_c = self.hp.parse_hgvs_variant(variant) # Does the edit affect the start codon? if ((1 <= var_c.posedit.pos.start.base <= 3 and var_c.posedit.pos.start.offset == 0) or ( 1 <= var_c.posedit.pos.end.base <= 3 and var_c.posedit.pos.end.offset == 0)) and '*' not in str( @@ -227,7 +227,7 @@ def protein(self, variant, evm, hpUnused): return var_p if ':n.' in variant: - var_p = self.hp.parse(variant) + var_p = self.hp.parse_hgvs_variant(variant) var_p.ac = 'Non-coding transcript' var_p.posedit = '' return var_p @@ -266,11 +266,11 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): except IndexError as e: error = str(e) if 'string index out of range' in error and 'dup' in str(hgvs_transcript): - hgvs_ins = self.hp.parse(str(hgvs_transcript)) + hgvs_ins = self.hp.parse_hgvs_variant(str(hgvs_transcript)) hgvs_ins = hn.normalize(hgvs_ins) inst = hgvs_ins.ac + ':c.' + str(hgvs_ins.posedit.pos.start.base - 1) + '_' + \ str(hgvs_ins.posedit.pos.start.base) + 'ins' + hgvs_ins.posedit.edit.ref - hgvs_transcript = self.hp.parse(inst) + hgvs_transcript = self.hp.parse_hgvs_variant(inst) hgvs_protein = evm.c_to_p(hgvs_transcript) if hgvs_protein: diff --git a/tests/test_utils.py b/tests/test_utils.py index a1945dbc..b62805f7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -49,7 +49,7 @@ def test_string(self): def test_variant_sub(self): """ Will test that reference isn't removed """ stringvar = 'NM_015120.4:c.34C>T' - var = self.hp.parse(stringvar) + var = self.hp.parse_hgvs_variant(stringvar) output = utils.valstr(var) self.assertEqual(var.posedit.edit.type, 'sub') self.assertEqual(output, stringvar) @@ -57,7 +57,7 @@ def test_variant_sub(self): def test_variant_identity(self): """ Will test that the reference is removed """ stringvar = 'NM_015120.4:c.34CG=' - var = self.hp.parse(stringvar) + var = self.hp.parse_hgvs_variant(stringvar) output = utils.valstr(var) self.assertEqual(var.posedit.edit.type, 'identity') self.assertEqual(output, 'NM_015120.4:c.34=') @@ -65,7 +65,7 @@ def test_variant_identity(self): def test_variant_identity2(self): """ Will test that the reference is not removed """ stringvar = 'NM_015120.4:c.34C=' - var = self.hp.parse(stringvar) + var = self.hp.parse_hgvs_variant(stringvar) output = utils.valstr(var) self.assertEqual(var.posedit.edit.type, 'identity') self.assertEqual(output, 'NM_015120.4:c.34C=') @@ -337,28 +337,28 @@ def test_empty(self): def test_sub(self): stringseq = 'NM_015120.4:c.34C>T' - hgvsseq = self.hp.parse(stringseq) + hgvsseq = self.hp.parse_hgvs_variant(stringseq) output = utils.hgvs_dup2indel(hgvsseq) self.assertIsInstance(output, str) self.assertEqual(output, 'NM_015120.4:c.34_34delCinsCC') def test_del(self): stringseq = 'NM_015120.4:c.34del' - hgvsseq = self.hp.parse(stringseq) + hgvsseq = self.hp.parse_hgvs_variant(stringseq) output = utils.hgvs_dup2indel(hgvsseq) self.assertIsInstance(output, str) self.assertEqual(output, 'NM_015120.4:c.34_34delins') def test_dup(self): stringseq = 'NM_015120.4:c.34dupG' - hgvsseq = self.hp.parse(stringseq) + hgvsseq = self.hp.parse_hgvs_variant(stringseq) output = utils.hgvs_dup2indel(hgvsseq) self.assertIsInstance(output, str) self.assertEqual(output, 'NM_015120.4:c.34_34delGinsGG') def test_dup_pair(self): stringseq = 'NM_015120.4:c.34dupGA' - hgvsseq = self.hp.parse(stringseq) + hgvsseq = self.hp.parse_hgvs_variant(stringseq) output = utils.hgvs_dup2indel(hgvsseq) self.assertIsInstance(output, str) self.assertEqual(output, 'NM_015120.4:c.34_34delGAinsGAGA') From ab1c87560e09c4d3c81bf4424569387541384390 Mon Sep 17 00:00:00 2001 From: Peter Causey-Freeman Date: Fri, 26 Jul 2019 14:54:19 +0100 Subject: [PATCH 206/223] Code tweak to allow multithreading. Installs SeqRepo > 0.5 in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index edc4a077..992cd536 100644 --- a/setup.py +++ b/setup.py @@ -59,7 +59,7 @@ # List run-time dependencies here. These will be installed by pip when the project is installed. install_requires=[ "hgvs", # This will install BioPython - "biocommons.seqrepo >= 0.3.5", + "biocommons.seqrepo >= 0.5.1", "httplib2 >= 0.9.0", "configparser >= 3.5.0", "pyliftover >= 0.3", From ddbcbd06578f6f5f975f6ea09d324e389903c1f0 Mon Sep 17 00:00:00 2001 From: Peter Causey-Freeman Date: Fri, 26 Jul 2019 14:57:33 +0100 Subject: [PATCH 207/223] Code tweak to allow multithreading. Installs SeqRepo > 0.5 in setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 992cd536..2ac6692d 100644 --- a/setup.py +++ b/setup.py @@ -69,6 +69,7 @@ dependency_links=[ "git+https://github.com/openvar/vv_hgvs@master#egg=hgvs" ] + ) # From 7ac7e7fb72c4db2814f696ed3c190df1ecab6041 Mon Sep 17 00:00:00 2001 From: Peter Causey-Freeman Date: Fri, 26 Jul 2019 15:02:14 +0100 Subject: [PATCH 208/223] Install a higher version of SeqRepo to allow vv_hgvs to invove miltithreased SeqRepo --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 2ac6692d..992cd536 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,6 @@ dependency_links=[ "git+https://github.com/openvar/vv_hgvs@master#egg=hgvs" ] - ) # From ca2dcdf7ddd079d99a08426c5a88ca584bef3117 Mon Sep 17 00:00:00 2001 From: Peter Causey-Freeman Date: Fri, 26 Jul 2019 15:41:22 +0100 Subject: [PATCH 209/223] updated additional files with update for SeqRepo. Note, SeqRepo not mentioned in environment.yml --- requirements.txt | 2 +- requirements_dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 09a472c7..a5a7287e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ git+https://github.com/openvar/vv_hgvs@master#egg=hgvs -biocommons.seqrepo>=0.3.5 +biocommons.seqrepo >= 0.5.1 httplib2>=0.9.0 configparser>=3.5.0 pyliftover>=0.3 diff --git a/requirements_dev.txt b/requirements_dev.txt index 3a673ff7..4b5b980f 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,5 +1,5 @@ git+https://github.com/openvar/vv_hgvs@master#egg=hgvs -biocommons.seqrepo>=0.3.5 +biocommons.seqrepo >= 0.5.1 httplib2>=0.9.0 configparser>=3.5.0 pyliftover>=0.3 From e59b28e00eb3288f8abb53e9f9c60e08c7bd2a6f Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 29 Jul 2019 12:36:35 +0100 Subject: [PATCH 210/223] Removed liftover location so is always downloaded on first use --- VariantValidator/modules/liftover.py | 20 ++------------------ VariantValidator/modules/vvMixinInit.py | 3 --- configuration/default.ini | 3 --- configuration/docker.ini | 3 --- configuration/travis.ini | 3 --- docs/MANUAL.md | 7 ------- tests/test_configuration.py | 3 +-- 7 files changed, 3 insertions(+), 39 deletions(-) diff --git a/VariantValidator/modules/liftover.py b/VariantValidator/modules/liftover.py index 20815428..87b38708 100644 --- a/VariantValidator/modules/liftover.py +++ b/VariantValidator/modules/liftover.py @@ -224,17 +224,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va # Create liftover vcf from_vcf = hgvs_utils.report_hgvs2vcf(hgvs_genomic, lo_from, reverse_normalizer, validator.sf) - pyliftover_dir = None - if validator.liftoverPath is not None and validator.liftoverPath != '/path/to/liftover': - pyliftover_dir = validator.liftoverPath - - if pyliftover_dir is not None: - lo_filename_to = pyliftover_dir + "%sTo%s.over.chain" % (lo_from, lo_to) - lo_filename_to = str(lo_filename_to.replace('Tohg', 'ToHg')) - - lo = LiftOver(lo_filename_to) - else: - lo = LiftOver(lo_from, lo_to) + lo = LiftOver(lo_from, lo_to) # Fix the GRC CHR if from_vcf[from_set].startswith('chr'): @@ -277,13 +267,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va else: hgvs_lifted = hn.normalize(hgvs_not_delins) # Now try map back - if pyliftover_dir is not None: - lo_filename_from = pyliftover_dir + "%sTo%s.over.chain" % (lo_to, lo_from) - - lo_filename_from = str(lo_filename_from.replace('Tohg', 'ToHg')) - lo = LiftOver(lo_filename_from) - else: - lo = LiftOver(lo_to, lo_from) + lo = LiftOver(lo_to, lo_from) # Lift back liftback_list = lo.convert_coordinate(chrom, pos) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 55d18efd..51be5c22 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -95,9 +95,6 @@ def __init__(self): self.releasedVersion = False self.hgvsVersion = hgvs.__version__ - # Set up other configuration variables - self.liftoverPath = config["liftover"]["location"] - # Set up HGVS # Configure hgvs package global settings hgvs.global_config.uta.pool_max = 25 diff --git a/configuration/default.ini b/configuration/default.ini index 14441cf5..76265444 100644 --- a/configuration/default.ini +++ b/configuration/default.ini @@ -25,9 +25,6 @@ file = WARNING email = YOUR@EMAIL.COM api_key = YOUR_API_KEY -[liftover] -location = /path/to/liftover - # # Copyright (C) 2019 VariantValidator Contributors diff --git a/configuration/docker.ini b/configuration/docker.ini index 0fa27faa..2f35537d 100644 --- a/configuration/docker.ini +++ b/configuration/docker.ini @@ -25,9 +25,6 @@ file = ERROR email = admin@variantvalidator.org api_key = YOUR_API_KEY -[liftover] -location = /path/to/liftover - # # Copyright (C) 2019 VariantValidator Contributors diff --git a/configuration/travis.ini b/configuration/travis.ini index f8138bc5..9ee72931 100644 --- a/configuration/travis.ini +++ b/configuration/travis.ini @@ -25,9 +25,6 @@ file = ERROR email = admin@variantvalidator.org api_key = YOUR_API_KEY -[liftover] -location = /path/to/liftover - # # Copyright (C) 2019 VariantValidator Contributors diff --git a/docs/MANUAL.md b/docs/MANUAL.md index 15b2aaa8..fc9b6b98 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -40,9 +40,6 @@ file = WARNING [Entrez] email = YOUR@EMAIL.COM api_key = YOUR_API_KEY - -[liftover] -location = /PATH/TO/LIFTOVER/ ``` The values in capitals must be replaced for VariantValidator to run, see below for more details. @@ -60,10 +57,6 @@ For access to the NCBI Entrez database you must provide a valid email address i the respective configuration setting. Optionally, you can also provide an NCBI API key that will increase the number of requests made per second. See [this article](https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/) on how to generate an API key. -#### Liftover - -Optional. Download the UCSC GRCh38 Liftover file [hg38ToHg19.over.chain.gz](http://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/) and GRCh37 Liftover file [hg19ToHg38.over.chain.gz](http://hgdownload.cse.ucsc.edu/gbdb/hg19/liftOver/) and set their location within the configuration file. By default the necessary files are accessed remotely which incurs a time-penalty when validating intergenic variants. - ## Database updates diff --git a/tests/test_configuration.py b/tests/test_configuration.py index a39107c1..49a3c9b0 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -176,13 +176,12 @@ def write_config(self): self.config.write(fh) def test_file_structure(self): - self.assertEqual(self.config.sections(), ['mysql', 'seqrepo', 'postgres', 'logging', 'Entrez', 'liftover']) + self.assertEqual(self.config.sections(), ['mysql', 'seqrepo', 'postgres', 'logging', 'Entrez']) self.assertEqual(list(self.config['mysql']), ['host', 'database', 'user', 'password']) self.assertEqual(list(self.config['seqrepo']), ['version', 'location']) self.assertEqual(list(self.config['postgres']), ['host', 'database', 'version', 'user', 'password']) self.assertEqual(list(self.config['logging']), ['log', 'console', 'file']) self.assertEqual(list(self.config['Entrez']), ['email', 'api_key']) - self.assertEqual(list(self.config['liftover']), ['location']) def test_file_contents(self): self.assertNotEqual(self.config['mysql']['user'], 'USERNAME') From 2b061aba0496387c397cffb439520908d9769c57 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 29 Jul 2019 16:15:44 +0100 Subject: [PATCH 211/223] Changed install and imports to use vvhgvs instead of hgvs --- VariantValidator/modules/format_converters.py | 18 ++-- VariantValidator/modules/gapped_mapping.py | 76 ++++++++-------- VariantValidator/modules/hgvs_utils.py | 6 +- VariantValidator/modules/liftover.py | 10 +-- VariantValidator/modules/mappers.py | 34 +++---- VariantValidator/modules/use_checking.py | 64 +++++++------- VariantValidator/modules/vvDatabase.py | 6 +- VariantValidator/modules/vvMixinConverters.py | 82 ++++++++--------- VariantValidator/modules/vvMixinCore.py | 32 +++---- VariantValidator/modules/vvMixinInit.py | 88 +++++++++---------- environment.yml | 2 +- requirements.txt | 2 +- requirements_dev.txt | 4 +- setup.py | 3 +- tests/test_utils.py | 6 +- 15 files changed, 216 insertions(+), 217 deletions(-) diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index ed5b1c34..45841fca 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -1,5 +1,5 @@ import re -import hgvs.exceptions +import vvhgvs.exceptions import copy import logging from .variant import Variant @@ -458,7 +458,7 @@ def vcf2hgvs_stage4(variant, validator): hgvs_not_delins = None try: hgvs_not_delins = validator.hp.parse_hgvs_variant(not_delins) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: # Sort out multiple ALTS from VCF inputs if re.search(r"([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): header, alts = not_delins.split('>') @@ -486,7 +486,7 @@ def vcf2hgvs_stage4(variant, validator): try: not_delins = str(variant.hn.normalize(hgvs_not_delins)) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'Normalization of intronic variants is not supported' in error: not_delins = not_delins @@ -529,7 +529,7 @@ def indel_catching(variant, validator): # Remove them so that the string SHOULD parse try: hgvs_failed = validator.hp.parse_hgvs_variant(failed) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = 'The syntax of the input variant description is invalid ' if failed.endswith('ins'): issue_link = 'http://varnomen.hgvs.org/recommendations/DNA/variant/insertion/' @@ -697,7 +697,7 @@ def mitochondrial(variant, validator): hgvs_mito.type = 'm' try: validator.vr.validate(hgvs_mito) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) logger.warning(error) @@ -728,11 +728,11 @@ def proteins(variant, validator): # Try to validate the variant try: hgvs_object = validator.hp.parse_hgvs_variant(variant.hgvs_formatted) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) try: validator.vr.validate(hgvs_object) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if error: variant.warnings.append(error) @@ -749,7 +749,7 @@ def proteins(variant, validator): try: validator.vr.validate(hgvs_object) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) else: error = str( @@ -774,7 +774,7 @@ def rna(variant, validator): # Change input to reflect! try: hgvs_c = validator.hgvs_r_to_c(hgvs_input) - except hgvs.exceptions.HGVSDataNotAvailableError as e: + except vvhgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) variant.warnings.append(error) logger.warning(str(error)) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 4d11af15..7a7cbcbd 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -1,7 +1,7 @@ import copy import re import logging -import hgvs.exceptions +import vvhgvs.exceptions from . import utils as fn from . import hgvs_utils @@ -136,7 +136,7 @@ def gapped_g_to_c(self, rel_var): try: self.variant.hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'Normalization of intronic variants is not supported' in error or \ 'Unsupported normalization of variants spanning the exon-intron boundary' in error: @@ -196,9 +196,9 @@ def gapped_g_to_c(self, rel_var): try: self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: + except vvhgvs.exceptions.HGVSInvalidIntervalError: self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(self.hgvs_genomic_5pr, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': self.tx_hgvs_not_delins = saved_hgvs_coding @@ -243,7 +243,7 @@ def gapped_g_to_c(self, rel_var): restore_stash_hgvs_not_delins = copy.copy(stash_hgvs_not_delins) try: hgvs_stash_t = self.validator.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: if 'bounds' in str(e): stash_hgvs_not_delins = copy.copy(stored_hgvs_not_delins) hgvs_stash_t = self.validator.vm.g_to_t(stash_hgvs_not_delins, saved_hgvs_coding.ac) @@ -284,7 +284,7 @@ def gapped_g_to_c(self, rel_var): logger.info(str(e)) try: self.variant.hn.normalize(self.tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'Normalization of intronic variants is not supported' in error or \ 'Unsupported normalization of variants spanning the exon-intron boundary' in error: @@ -421,7 +421,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): chromosome_normalized_hgvs_coding = self.variant.reverse_normalizer.normalize(hgvs_coding) else: chromosome_normalized_hgvs_coding = self.variant.hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError: + except vvhgvs.exceptions.HGVSUnsupportedOperationError: chromosome_normalized_hgvs_coding = hgvs_coding most_3pr_hgvs_genomic = self.validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, hgvs_genomic.ac, @@ -492,7 +492,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): hgvs_reform_ident = self.validator.hp.parse_hgvs_variant(reform_ident) try: self.variant.hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if re.search('spanning the exon-intron boundary', error): stash_tx_right = test_stash_tx_right @@ -503,12 +503,12 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): else: try: self.variant.hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: + except vvhgvs.exceptions.HGVSUnsupportedOperationError: self.hgvs_genomic_possibilities.append('') else: stash_tx_right = test_stash_tx_right self.hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) # Intronic positions not supported. Will cause a Value Error except ValueError as e: @@ -576,7 +576,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): hgvs_reform_ident = self.validator.hp.parse_hgvs_variant(reform_ident) try: self.variant.hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if re.search('spanning the exon-intron boundary', error): stash_tx_left = test_stash_tx_left @@ -587,12 +587,12 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): else: try: self.variant.hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: + except vvhgvs.exceptions.HGVSUnsupportedOperationError: self.hgvs_genomic_possibilities.append('') else: stash_tx_left = test_stash_tx_left self.hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) except ValueError as e: logger.debug("Except passed, %s", e) @@ -626,7 +626,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # Reverse normalize hgvs_genomic_variant: NOTE will replace ref try: reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize(hgvs_genomic_variant) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: # Strange error caused by gap in genomic error = str(e) if 'base start position must be <= end position' in error: @@ -683,7 +683,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # Save a copy of current hgvs_coding try: saved_hgvs_coding = self.variant.no_norm_evm.g_to_t(stored_hgvs_not_delins, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: + except vvhgvs.exceptions.HGVSInvalidIntervalError as e: if str(e) == 'start or end or both are beyond the bounds of transcript record': continue else: @@ -704,7 +704,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): try: self.variant.hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'Normalization of intronic variants is not supported' in error or \ 'Unsupported normalization of variants spanning the exon-intron boundary' in error: @@ -763,7 +763,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): try: self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(hgvs_not_delins, saved_hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: + except vvhgvs.exceptions.HGVSInvalidIntervalError: self.tx_hgvs_not_delins = self.variant.no_norm_evm.g_to_n(reverse_normalized_hgvs_genomic, saved_hgvs_coding.ac) # Create normalized version of tx_hgvs_not_delins @@ -807,7 +807,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): continue try: self.variant.hn.normalize(self.tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'Normalization of intronic variants is not supported' in error or \ 'Unsupported normalization of variants spanning the exon-intron boundary' in error: @@ -954,7 +954,7 @@ def g_to_t_compensation(self, ori, hgvs_coding, rec_var): # Normailse hgvs_genomic try: hgvs_genomic = self.variant.hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: # Strange error caused by gap in genomic error = str(e) @@ -1078,7 +1078,7 @@ def g_to_t_gapped_mapping_stage2(self, ori, hgvs_coding, hgvs_genomic): return True try: self.variant.hn.normalize(self.tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'Normalization of intronic variants is not supported' in error or \ 'Unsupported normalization of variants spanning the exon-intron boundary' in error: @@ -1146,12 +1146,12 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a try: chromosome_normalized_hgvs_coding = self.variant.reverse_normalizer.normalize( hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError: + except vvhgvs.exceptions.HGVSUnsupportedOperationError: chromosome_normalized_hgvs_coding = hgvs_coding else: try: chromosome_normalized_hgvs_coding = self.variant.hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError: + except vvhgvs.exceptions.HGVSUnsupportedOperationError: chromosome_normalized_hgvs_coding = hgvs_coding most_3pr_hgvs_genomic = self.validator.myvm_t_to_g(chromosome_normalized_hgvs_coding, alt_chr, @@ -1220,7 +1220,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a hgvs_reform_ident = self.validator.hp.parse_hgvs_variant(reform_ident) try: self.variant.hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'spanning the exon-intron boundary' in error: stash_tx_right = test_stash_tx_right @@ -1231,12 +1231,12 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a else: try: self.variant.hn.normalize(test_stash_tx_right) - except hgvs.exceptions.HGVSUnsupportedOperationError: + except vvhgvs.exceptions.HGVSUnsupportedOperationError: self.hgvs_genomic_possibilities.append('') else: stash_tx_right = test_stash_tx_right self.hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) except ValueError as e: logger.debug("Except passed, %s", e) @@ -1301,7 +1301,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a hgvs_reform_ident = self.validator.hp.parse_hgvs_variant(reform_ident) try: self.variant.hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'spanning the exon-intron boundary' in error: stash_tx_left = test_stash_tx_left @@ -1312,12 +1312,12 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a else: try: self.variant.hn.normalize(test_stash_tx_left) - except hgvs.exceptions.HGVSUnsupportedOperationError: + except vvhgvs.exceptions.HGVSUnsupportedOperationError: self.hgvs_genomic_possibilities.append('') else: stash_tx_left = test_stash_tx_left self.hgvs_genomic_possibilities.append(stash_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) except ValueError as e: logger.debug("Except passed, %s", e) @@ -1347,7 +1347,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a try: reverse_normalized_hgvs_genomic = self.variant.reverse_normalizer.normalize( hgvs_genomic_variant) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: # Strange error caused by gap in genomic error = str(e) if 'base start position must be <= end position' in error: @@ -1431,7 +1431,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a try: self.variant.hn.normalize(hgvs_seek_var) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'Normalization of intronic variants is not supported' in error or \ 'Unsupported normalization of variants spanning the exon-intron boundary' in error: @@ -1516,7 +1516,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a continue try: self.variant.hn.normalize(self.tx_hgvs_not_delins) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'Normalization of intronic variants is not supported' in error or \ 'Unsupported normalization of variants spanning the exon-intron boundary' in error: @@ -1642,7 +1642,7 @@ def g_to_t_gap_compensation_version3(self, hgvs_alt_genomic, hgvs_coding, ori, a # Normailse hgvs_genomic try: hgvs_alt_genomic = self.variant.hn.normalize(hgvs_alt_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: # Strange error caused by gap in genomic error = str(e) if 'base start position must be <= end position' in error and self.disparity_deletion_in[0] == 'chromosome': @@ -2001,9 +2001,9 @@ def transcript_disparity(self, reverse_normalized_hgvs_genomic, stored_hgvs_not_ self.tx_hgvs_not_delins = c2 try: self.tx_hgvs_not_delins = self.validator.vm.c_to_n(self.tx_hgvs_not_delins) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: logger.debug("Except passed, %s", e) if '+' in str(self.tx_hgvs_not_delins.posedit.pos.start) and \ @@ -2240,7 +2240,7 @@ def get_hgvs_seek_var(self, hgvs_genomic, hgvs_coding, ori=None, with_query_geno # Map to the transcript and test for movement try: hgvs_seek_var = self.variant.evm.g_to_t(query_genomic, hgvs_coding.ac) - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: hgvs_seek_var = hgvs_coding if with_query_genomic: @@ -2281,7 +2281,7 @@ def rev_norm_ins(self, hgvs_coding, hgvs_genomic): try: genomic_from_most_3pr_hgvs_transcript_variant = self.variant.hn.normalize( genomic_from_most_3pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if error == 'base start position must be <= end position': start = genomic_from_most_3pr_hgvs_transcript_variant.posedit.pos.start.base @@ -2293,7 +2293,7 @@ def rev_norm_ins(self, hgvs_coding, hgvs_genomic): try: genomic_from_most_5pr_hgvs_transcript_variant = self.variant.hn.normalize( genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if error == 'base start position must be <= end position': start = genomic_from_most_5pr_hgvs_transcript_variant.posedit.pos.start.base @@ -2350,7 +2350,7 @@ def rev_norm_ins(self, hgvs_coding, hgvs_genomic): most_5pr_hgvs_transcript_variant.posedit.edit.alt): self.hgvs_genomic_possibilities.append(genomic_from_most_5pr_hgvs_transcript_variant) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: logger.debug("Except passed, %s", e) # diff --git a/VariantValidator/modules/hgvs_utils.py b/VariantValidator/modules/hgvs_utils.py index b9037539..34ba7be5 100644 --- a/VariantValidator/modules/hgvs_utils.py +++ b/VariantValidator/modules/hgvs_utils.py @@ -10,8 +10,8 @@ # Import Biopython modules from Bio.Seq import Seq -import hgvs -import hgvs.exceptions +import vvhgvs +import vvhgvs.exceptions # Database connections and hgvs objects are now passed from VariantValidator.py @@ -144,7 +144,7 @@ def pvcf_to_hgvs(query, selected_assembly, normalization_direction, reverse_norm # Parse into hgvs object try: hgvs_not_delins = validator.hp.parse_hgvs_variant(not_delins) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: # Sort out multiple ALTS from VCF inputs if re.search("([GATCgatc]+)>([GATCgatc]+),([GATCgatc]+)", not_delins): # header,alts = not_delins.split('>') diff --git a/VariantValidator/modules/liftover.py b/VariantValidator/modules/liftover.py index 87b38708..e27be556 100644 --- a/VariantValidator/modules/liftover.py +++ b/VariantValidator/modules/liftover.py @@ -6,8 +6,8 @@ """ # import modules -import hgvs.exceptions -import hgvs.sequencevariant +import vvhgvs.exceptions +import vvhgvs.sequencevariant import logging from . import seq_data from . import hgvs_utils @@ -15,7 +15,7 @@ from Bio.Seq import Seq # Pre compile variables -hgvs.global_config.formatting.max_ref_length = 1000000 +vvhgvs.global_config.formatting.max_ref_length = 1000000 logger = logging.getLogger(__name__) @@ -208,7 +208,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va } } added_data = True - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: continue if lifted_response != {} and added_data is not False: @@ -260,7 +260,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va hgvs_not_delins = validator.hp.parse_hgvs_variant(not_delins) try: validator.vr.validate(hgvs_not_delins) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.info(str(e)) # Most likely incorrect bases continue diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index 6f72ab47..ae360862 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -1,7 +1,7 @@ -import hgvs +import vvhgvs import re import copy -import hgvs.exceptions +import vvhgvs.exceptions import logging from . import hgvs_utils from .variant import Variant @@ -19,7 +19,7 @@ def gene_to_transcripts(variant, validator): error = 'false' try: validator.vr.validate(g_query) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) except KeyError: error = 'Reference sequence ' + variant.hgvs_genomic.ac + ' is either not supported or does not exist' @@ -54,7 +54,7 @@ def gene_to_transcripts(variant, validator): try: variant.hgvs_genomic = validator.myevm_t_to_g(hgvs_coding_variant, variant.no_norm_evm, variant.primary_assembly, variant.hn) - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: try_rel_var = [] else: try_rel_var = validator.relevant_transcripts(variant.hgvs_genomic, variant.evm, @@ -116,7 +116,7 @@ def gene_to_transcripts(variant, validator): if sfm: try: validator.vr.validate(variant.hgvs_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) logger.warning(str(error)) @@ -216,7 +216,7 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version try: to_g = validator.myevm_t_to_g(obj, variant.no_norm_evm, variant.primary_assembly, variant.hn) genomic_ac = to_g.ac - except hgvs.exceptions.HGVSDataNotAvailableError as e: + except vvhgvs.exceptions.HGVSDataNotAvailableError as e: if ('~' in str(e) and 'Alignment is incomplete' in str(e)) or "No relevant genomic mapping options" in str(e): # Unable to map the input variant onto a genomic position if '~' in str(e) and 'Alignment is incomplete' in str(e): @@ -317,7 +317,7 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version # Normalize the variant try: h_variant = variant.hn.normalize(obj) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'Unsupported normalization of variants spanning the exon-intron boundary' in error: formatted_variant = formatted_variant @@ -378,7 +378,7 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version # genome back to C coordinates try: post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc) - except hgvs.exceptions.HGVSError as error: + except vvhgvs.exceptions.HGVSError as error: variant.warnings.append(str(error)) logger.warning(str(error)) return True @@ -460,7 +460,7 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version hgvs_inp = validator.hp.parse_hgvs_variant(inp) try: hgvs_otp = variant.hn.normalize(hgvs_inp) - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: hgvs_otp = hgvs_inp # Set remaining variables @@ -550,7 +550,7 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version try: hgvs_coding = variant.hn.normalize(hgvs_coding) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) # Gap compensating code status @@ -566,7 +566,7 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version try: hgvs_coding = variant.evm._maybe_normalize(hgvs_coding) gap_compensation = False - except hgvs.exceptions.HGVSError as error: + except vvhgvs.exceptions.HGVSError as error: variant.warnings.append(str(error)) logger.warning(str(error)) return True @@ -672,7 +672,7 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version rng = variant.hn.normalize(query_genomic) try: c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) - except hgvs.exceptions.HGVSInvalidIntervalError: + except vvhgvs.exceptions.HGVSInvalidIntervalError: c_for_p = fn.valstr(hgvs_seek_var) try: # Predicted effect on protein @@ -703,7 +703,7 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version c_for_p = validator.vm.g_to_t(rng, hgvs_coding.ac) try: variant.hn.normalize(c_for_p) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) else: # hgvs_protein = va_func.protein(str(c_for_p), variant.evm, hp) @@ -758,7 +758,7 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version try: validator.vr.validate(hgvs_updated) # Updated reference sequence - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'does not agree with reference sequence' in error: match = re.findall(r'\(([GATC]+)\)', error) @@ -797,12 +797,12 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): # Look for variants spanning introns try: variant.hn.normalize(variant.hgvs_coding) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: error = str(e) if 'boundary' in error or 'spanning' in error: gap_compensation = False - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) # Warn gap code status @@ -842,7 +842,7 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant): except KeyError: warnings = warnings + ': Suspected incomplete alignment between transcript %s and ' \ 'genomic reference sequence %s' % (variant.hgvs_coding.ac, alt_chr) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.warning(str(e)) return multi_g diff --git a/VariantValidator/modules/use_checking.py b/VariantValidator/modules/use_checking.py index 02d6c821..a85f2469 100644 --- a/VariantValidator/modules/use_checking.py +++ b/VariantValidator/modules/use_checking.py @@ -1,7 +1,7 @@ import re -import hgvs -import hgvs.exceptions -import hgvs.variantmapper +import vvhgvs +import vvhgvs.exceptions +import vvhgvs.variantmapper import logging from . import utils as fn import copy @@ -111,7 +111,7 @@ def structure_checks_g(variant, validator): # Additional test try: variant.hn.normalize(variant.input_parses) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) logger.warning(error) @@ -133,13 +133,13 @@ def structure_checks_c(variant, validator): # These should be in the sequence so can be directly validated. Need to pass to n. try: validator.vr.validate(variant.input_parses) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'datums is ill-defined' in error: called_ref = variant.input_parses.posedit.edit.ref try: to_n = variant.evm.c_to_n(variant.input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) variant.warnings.append(error) logger.warning(error) @@ -158,7 +158,7 @@ def structure_checks_c(variant, validator): if 'bounds' in error or 'intronic variant' in error: try: variant.hn.normalize(variant.input_parses) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) if 'bounds' in error: @@ -198,7 +198,7 @@ def structure_checks_c(variant, validator): variant.input_parses.posedit.pos.end.offset = offset # Create a lose vm instance - variant.lose_vm = hgvs.variantmapper.VariantMapper(validator.hdp, + variant.lose_vm = vvhgvs.variantmapper.VariantMapper(validator.hdp, replace_reference=True, prevalidation_level=None ) @@ -217,7 +217,7 @@ def structure_checks_c(variant, validator): try: variant.input_parses = variant.evm.c_to_n(variant.input_parses) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) logger.warning(e) @@ -245,7 +245,7 @@ def structure_checks_c(variant, validator): to_genome = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) to_tx = variant.evm.g_to_t(to_genome, variant.input_parses.ac) - except hgvs.exceptions.HGVSInvalidIntervalError as e: + except vvhgvs.exceptions.HGVSInvalidIntervalError as e: error = str(e) if 'bounds' in error: try: @@ -295,7 +295,7 @@ def structure_checks_c(variant, validator): logger.warning(error) return True - except hgvs.exceptions.HGVSDataNotAvailableError as e: + except vvhgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) if 'Alignment is incomplete' in error: e_list = error.split('~') @@ -317,14 +317,14 @@ def structure_checks_c(variant, validator): # Quick look at syntax validation try: validator.vr.validate(variant.input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if 'bounds' in error: try: report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) report_gen = variant.hn.normalize(report_gen) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) else: error = 'Using a transcript reference sequence to specify a variant position that lies outside of '\ @@ -352,7 +352,7 @@ def structure_checks_c(variant, validator): output = None try: output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant) - except hgvs.exceptions.HGVSDataNotAvailableError: + except vvhgvs.exceptions.HGVSDataNotAvailableError: errors = ['Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' 'Transcript Archive', 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' 'available transcripts' % variant.input_parses.ac.split('.')[0]] @@ -367,7 +367,7 @@ def structure_checks_c(variant, validator): variant.warnings.append(error) logger.warning(error) return True - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if 'base start position must be <= end position' in error: # correction = copy.deepcopy(variant.input_parses) @@ -388,7 +388,7 @@ def structure_checks_c(variant, validator): try: variant.evm.g_to_t(output, variant.input_parses.ac) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) logger.warning(error) @@ -396,7 +396,7 @@ def structure_checks_c(variant, validator): try: validator.vr.validate(output) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) logger.warning(error) @@ -406,9 +406,9 @@ def structure_checks_c(variant, validator): # All other variation try: validator.vr.validate(variant.input_parses) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: logger.debug("Except passed, %s", e) - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) # This catches errors in introns if 'base start position must be <= end position' in error: @@ -424,12 +424,12 @@ def structure_checks_c(variant, validator): logger.warning(error) return True - except hgvs.exceptions.HGVSDataNotAvailableError as e: + except vvhgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) variant.warnings.append(error) logger.warning(error) return True - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: error += ' (' + variant.input_parses.ac + ')' @@ -451,7 +451,7 @@ def structure_checks_n(variant, validator): # These should be in the sequence so can be directly validated. Need to pass to n. try: validator.vr.validate(variant.input_parses) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'intronic variant' in error: pass @@ -516,14 +516,14 @@ def structure_checks_n(variant, validator): # Quick look at syntax validation try: validator.vr.validate(variant.input_parses) - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if 'bounds' in error: try: report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) report_gen = variant.hn.normalize(report_gen) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) else: error = 'Using a transcript reference sequence to specify a variant position that lies outside of '\ @@ -552,7 +552,7 @@ def structure_checks_n(variant, validator): test_g = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, variant.primary_assembly, variant.hn) back_to_n = variant.evm.g_to_t(test_g, variant.input_parses.ac) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: report_gen = validator.myevm_t_to_g(variant.input_parses, variant.no_norm_evm, @@ -571,7 +571,7 @@ def structure_checks_n(variant, validator): output = None try: output = validator.noreplace_myevm_t_to_g(variant.input_parses, variant) - except hgvs.exceptions.HGVSDataNotAvailableError: + except vvhgvs.exceptions.HGVSDataNotAvailableError: errors = ['Required information for ' + variant.input_parses.ac + ' is missing from the Universal ' 'Transcript Archive', 'Query https://rest.variantvalidator.org/tools/gene2transcripts/%s for ' @@ -588,7 +588,7 @@ def structure_checks_n(variant, validator): variant.warnings.append(error) logger.warning(error) return True - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) if 'base start position must be <= end position' in error: correction = copy.deepcopy(variant.input_parses) @@ -605,7 +605,7 @@ def structure_checks_n(variant, validator): return True try: validator.vr.validate(output) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) variant.warnings.append(error) logger.warning(error) @@ -615,9 +615,9 @@ def structure_checks_n(variant, validator): # All other variation try: validator.vr.validate(variant.input_parses) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: logger.debug("Except passed, %s", e) - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: error = str(e) # if re.search('Length implied by coordinates', error): # # Applies to del and inv @@ -653,12 +653,12 @@ def structure_checks_n(variant, validator): variant.warnings.append(error) logger.warning(error) return True - except hgvs.exceptions.HGVSDataNotAvailableError as e: + except vvhgvs.exceptions.HGVSDataNotAvailableError as e: error = str(e) variant.warnings.append(error) logger.warning(error) return True - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'bounds' in error: error = error + ' (' + variant.input_parses.ac + ')' diff --git a/VariantValidator/modules/vvDatabase.py b/VariantValidator/modules/vvDatabase.py index 86f6e814..18461bd6 100644 --- a/VariantValidator/modules/vvDatabase.py +++ b/VariantValidator/modules/vvDatabase.py @@ -2,7 +2,7 @@ from .utils import handleCursor from . import vvDBInsert import re -import hgvs.exceptions +import vvhgvs.exceptions import logging import json @@ -107,12 +107,12 @@ def update_transcript_info_record(self, accession, validator): # Get information from UTA try: uta_info = validator.hdp.get_tx_identity_info(version) - except hgvs.exceptions.HGVSDataNotAvailableError: + except vvhgvs.exceptions.HGVSDataNotAvailableError: version_ac_ver = version.split('.') version = version_ac_ver[0] + '.' + str(int(version_ac_ver[1]) - 1) try: uta_info = validator.hdp.get_tx_identity_info(version) - except hgvs.exceptions.HGVSDataNotAvailableError: + except vvhgvs.exceptions.HGVSDataNotAvailableError: raise utils.DatabaseConnectionError("Cannot retrieve data from UTA database") uta_symbol = str(uta_info[6]) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index 2cc46378..ebfd7a8a 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -1,15 +1,15 @@ import re import copy import logging -import hgvs -import hgvs.validator +import vvhgvs +import vvhgvs.validator from . import vvMixinInit from . import seq_data from . import hgvs_utils from Bio import Entrez, SeqIO from . import utils as fn -from hgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError +from vvhgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError logger = logging.getLogger(__name__) @@ -39,7 +39,7 @@ class Mixin(vvMixinInit.Mixin): # acession # refseq_ac = RefSeqGene ac # """ - # vr = hgvs.validator.Validator(self.hdp) + # vr = vvhgvs.validator.Validator(self.hdp) # # parse the variant into hgvs object # var_c = self.hp.parse_hgvs_variant(variant) # # map to the genomic co-ordinates using the easy variant mapper set to alt_aln_method = alt_aln_method @@ -129,7 +129,7 @@ def genomic(self, variant, evm, primary_assembly, hn): hgvs_var = self.hp.parse_hgvs_variant(variant) try: var_g = self.myevm_t_to_g(hgvs_var, evm, primary_assembly, hn) # genomic level variant - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: return 'error ' + str(e) return var_g @@ -194,7 +194,7 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): # Check for intronic try: hn.normalize(hgvs_c) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'intronic variant' not in error and \ 'Length implied by coordinates must equal sequence deletion length' in error and \ @@ -279,14 +279,14 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): if str(hgvs_c.ac).startswith('NM_'): try: hgvs_c = no_norm_evm.n_to_c(hgvs_c) - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: hgvs_c = copy.deepcopy(stored_hgvs_c) # Ensure the altered c. variant has not crossed intro exon boundaries hgvs_check_boundaries = copy.deepcopy(hgvs_c) try: hn.normalize(hgvs_check_boundaries) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'spanning the exon-intron boundary' in error: hgvs_c = copy.deepcopy(stored_hgvs_c) @@ -298,7 +298,7 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'spanning the exon-intron boundary' in error or 'Normalization of intronic variants' in error: hgvs_c = copy.deepcopy(stored_hgvs_c) @@ -311,7 +311,7 @@ def myevm_t_to_g(self, hgvs_c, no_norm_evm, primary_assembly, hn): hgvs_genomic = no_norm_evm.t_to_g(hgvs_c) hn.normalize(hgvs_genomic) # Check the validity of the mapping # This will fail on multiple refs for NC_ - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: # Recover all available mapping options from UTA mapping_options = self.hdp.get_tx_mapping_options(hgvs_c.ac) @@ -404,7 +404,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: try: hgvs_genomic = hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': ref = self.sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, @@ -444,7 +444,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): try: hn.normalize(nr_genomic) - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: error_type_1 = str(e) if 'Length implied by coordinates must equal sequence deletion length' in str(e) or str( e) == 'base start position must be <= end position': @@ -459,7 +459,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): try: hn.normalize(genomic_gap_variant) # Still a problem - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: if 'base start position must be <= end position' in str(e) and \ 'Length implied by coordinates must equal' in error_type_1: make_gen_var = copy.copy(nr_genomic) @@ -515,7 +515,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): if 'Length implied by coordinates must equal sequence deletion length' not in str(e): try: transcript_gap_variant = hn.normalize(transcript_gap_variant) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: logger.debug("Except passed, %s", e) # if NM_ need the n. position @@ -756,7 +756,7 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False): hgvs_c.posedit.pos.end.offset == 0: try: hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': if hgvs_c.type == 'c': @@ -800,7 +800,7 @@ def noreplace_myevm_t_to_g(self, hgvs_c, variant): hgvs_genomic = variant.evm.t_to_g(hgvs_c) variant.hn.normalize(hgvs_genomic) # This will fail on multiple refs for NC_ - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: # Recover all available mapping options from UTA mapping_options = self.hdp.get_tx_mapping_options(hgvs_c.ac) @@ -888,7 +888,7 @@ def search_in_options(hgvs_genomic, seqtype, chr_num_val, final=False): hgvs_c.posedit.pos.end.offset == 0: try: variant.hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': if hgvs_c.type == 'c': @@ -951,7 +951,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): # Check for intronic try: hn.normalize(hgvs_c) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'intronic variant' in error: logger.debug("Except passed, %s", e) @@ -1040,14 +1040,14 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): if str(hgvs_c.ac).startswith('NM_'): try: hgvs_c = no_norm_evm.n_to_c(hgvs_c) - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: hgvs_c = copy.deepcopy(stored_hgvs_c) # Ensure the altered c. variant has not crossed intro exon boundaries hgvs_check_boundaries = copy.deepcopy(hgvs_c) try: hn.normalize(hgvs_check_boundaries) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'spanning the exon-intron boundary' in error: hgvs_c = copy.deepcopy(stored_hgvs_c) @@ -1059,7 +1059,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): hgvs_reform_ident = self.hp.parse_hgvs_variant(reform_ident) try: hn.normalize(hgvs_reform_ident) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'spanning the exon-intron boundary' in error or 'Normalization of intronic variants' in error: hgvs_c = copy.deepcopy(stored_hgvs_c) @@ -1071,7 +1071,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): if hgvs_genomic.posedit.edit.type == 'ins' and utilise_gap_code is True: try: hgvs_genomic = hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': ref = self.sf.fetch_seq(str(hgvs_genomic.ac), hgvs_genomic.posedit.pos.start.base - 1, @@ -1110,7 +1110,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): nr_genomic = self.nr_vm.t_to_g(hgvs_c, hgvs_genomic.ac) try: hn.normalize(nr_genomic) - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: error_type_1 = str(e) if 'Length implied by coordinates must equal sequence deletion length' in str(e) or str( e) == 'base start position must be <= end position': @@ -1125,7 +1125,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): try: hn.normalize(genomic_gap_variant) # Still a problem - except hgvs.exceptions.HGVSInvalidVariantError as e: + except vvhgvs.exceptions.HGVSInvalidVariantError as e: if 'base start position must be <= end position' in str(e) and \ 'Length implied by coordinates must equal' in error_type_1: make_gen_var = copy.copy(nr_genomic) @@ -1179,7 +1179,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): if 'Length implied by coordinates must equal sequence deletion length' not in str(e): try: transcript_gap_variant = hn.normalize(transcript_gap_variant) - except hgvs.exceptions.HGVSUnsupportedOperationError as e: + except vvhgvs.exceptions.HGVSUnsupportedOperationError as e: logger.debug("Except passed, %s", e) # if NM_ need the n. position @@ -1418,7 +1418,7 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn): hgvs_c.posedit.pos.end.offset == 0: try: hn.normalize(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if error == 'insertion length must be 1': if hgvs_c.type == 'c': @@ -1493,7 +1493,7 @@ def hgvs_r_to_c(self, hgvs_object): # """ # Input c. r. n. variant string # Use uta.py (hdp) to return the identity information for the transcript variant - # see hgvs.dataproviders.uta.py for details + # see vvhgvs.dataproviders.uta.py for details # """ # # If the :c. pattern is present in the input variant # if ':c.' in variant: @@ -1529,7 +1529,7 @@ def hgvs_r_to_c(self, hgvs_object): # """ # Input c. r. nd accession string # Use uta.py (hdp) to return the identity information for the transcript variant - # see hgvs.dataproviders.uta.py for details + # see vvhgvs.dataproviders.uta.py for details # """ # tx_id_info = self.hdp.get_tx_identity_info(alt_ac) # # NOTE The hgnc id is the 6th element in this list tx_ac is the 0th element in the list @@ -1538,7 +1538,7 @@ def hgvs_r_to_c(self, hgvs_object): # def tx_for_gene(self, hgnc): # """ # Use uta.py (hdp) to return the transcript information for a specified gene (HGNC SYMBOL) - # see hgvs.dataproviders.uta.py for details + # see vvhgvs.dataproviders.uta.py for details # """ # # Interface with the UTA database via get_tx_for_gene in uta.py # tx_for_gene = self.hdp.get_tx_for_gene(hgnc) @@ -1547,7 +1547,7 @@ def hgvs_r_to_c(self, hgvs_object): # def ng_extract(self, tx_for_gene): # """ # Extract RefSeqGene Accession from transcript information - # see hgvs.dataproviders.uta.py for details + # see vvhgvs.dataproviders.uta.py for details # """ # # For each list in the list of lists tx_for_gene # for item in tx_for_gene: @@ -1561,12 +1561,12 @@ def tx_exons(self, tx_ac, alt_ac, alt_aln_method): """ Returns exon information for a given transcript e.g. how the exons align to the genomic reference - see hgvs.dataproviders.uta.py for details + see vvhgvs.dataproviders.uta.py for details """ # Interface with the UTA database via get_tx_exons in uta.py try: tx_exons = self.hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: #e tx_exons = 'hgvs Exception: ' + str(e) return tx_exons @@ -1611,11 +1611,11 @@ def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method, reverse_normal # Check for coding transcripts try: variant = evm.g_to_t(hgvs_genomic, y) - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: # Check for non-coding transcripts try: variant = evm.g_to_t(hgvs_genomic, y) - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: continue except: continue @@ -1628,7 +1628,7 @@ def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method, reverse_normal # Interface with the UTA database via get_tx_exons in uta.py try: tx_exons = self.hdp.get_tx_exons(tx_ac, alt_ac, alt_aln_method) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: tx_exons = 'hgvs Exception: ' + str(e) return tx_exons try: @@ -1654,7 +1654,7 @@ def relevant_transcripts(self, hgvs_genomic, evm, alt_aln_method, reverse_normal variant = evm.g_to_t(rev_hgvs_genomic, tx_ac) try: self.hp.parse_hgvs_variant(str(variant)) - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: continue except TypeError: continue @@ -1682,7 +1682,7 @@ def validateHGVS(self, query): try: self.vr.validate(hgvs_input) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: return e else: return 'false' @@ -2210,7 +2210,7 @@ def chr_to_rsg(self, hgvs_genomic, hn): continue try: self.vr.validate(hgvs_refseqgene) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'does not agree with reference sequence' in error: match = re.findall(r'\(([GATC]+)\)', error) @@ -2271,7 +2271,7 @@ def chr_to_rsg(self, hgvs_genomic, hn): continue try: self.vr.validate(hgvs_refseqgene) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'does not agree with reference sequence' in error: match = re.findall(r'\(([GATC]+)\)', error) @@ -2350,7 +2350,7 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn): hgvs_genomic = hn.normalize(hgvs_genomic) try: self.vr.validate(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'does not agree with reference sequence' in error: match = re.findall(r'\(([GATC]+)\)', error) @@ -2406,7 +2406,7 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn): hgvs_genomic = hn.normalize(hgvs_genomic) try: self.vr.validate(hgvs_genomic) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'does not agree with reference sequence' in error: match = re.findall(r'\(([GATC]+)\)', error) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index f5f94446..4abca837 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -1,12 +1,12 @@ -import hgvs -import hgvs.exceptions -import hgvs.normalizer +import vvhgvs +import vvhgvs.exceptions +import vvhgvs.normalizer import re import copy import sys import logging import json -from hgvs.assemblymapper import AssemblyMapper +from vvhgvs.assemblymapper import AssemblyMapper from . import hgvs_utils from . import utils as fn from . import seq_data @@ -104,12 +104,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for my_variant in self.batch_list: # Create Normalizers - my_variant.hn = hgvs.normalizer.Normalizer(self.hdp, + my_variant.hn = vvhgvs.normalizer.Normalizer(self.hdp, cross_boundaries=False, shuffle_direction=3, alt_aln_method=self.alt_aln_method ) - my_variant.reverse_normalizer = hgvs.normalizer.Normalizer(self.hdp, + my_variant.reverse_normalizer = vvhgvs.normalizer.Normalizer(self.hdp, cross_boundaries=False, shuffle_direction=5, alt_aln_method=self.alt_aln_method @@ -218,7 +218,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: input_parses = self.hp.parse_hgvs_variant(formatted_variant) my_variant.hgvs_formatted = input_parses - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: my_variant.warnings.append(str(e)) logger.warning(str(e)) continue @@ -319,7 +319,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Create easy variant mapper (over variant mapper) and splign locked evm try: to_n = my_variant.evm.c_to_n(input_parses_copy) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) else: if to_n.posedit.pos.end.base < to_n.posedit.pos.start.base: @@ -500,7 +500,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr hgvs_transcript_variant = self.hp.parse_hgvs_variant(str_transcript) try: self.vr.validate(hgvs_transcript_variant) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = str(e) if 'intronic variant' in error: genome_context_transcript_variant = genomic_accession + '(' + transcript_accession +\ @@ -556,7 +556,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr for alt_gen_var in multi_gen_vars: try: alt_gen_var = variant.hn.normalize(alt_gen_var) - except hgvs.exceptions.HGVSInvalidVariantError: + except vvhgvs.exceptions.HGVSInvalidVariantError: continue for build in self.genome_builds: test = seq_data.supported_for_mapping(alt_gen_var.ac, build) @@ -564,7 +564,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr try: vcf_dict = hgvs_utils.report_hgvs2vcf(alt_gen_var, build, variant.reverse_normalizer, self.sf) - except hgvs.exceptions.HGVSInvalidVariantError: + except vvhgvs.exceptions.HGVSInvalidVariantError: continue # Identify primary assembly positions if 'NC_' in alt_gen_var.ac: @@ -661,7 +661,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr re_parse_protein = self.hp.parse_hgvs_variant(format_p) re_parse_protein_single_aa = fn.single_letter_protein(re_parse_protein) predicted_protein_variant_dict["slr"] = str(re_parse_protein_single_aa) - except hgvs.exceptions.HGVSParseError as e: + except vvhgvs.exceptions.HGVSParseError as e: logger.debug("Except passed, %s", e) else: predicted_protein_variant_dict["slr"] = str(predicted_protein_variant) @@ -810,7 +810,7 @@ def gene2transcripts(self, query): try: tx_info = self.hdp.get_tx_identity_info(hgnc) hgnc = tx_info[6] - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: return {'error': str(e)} else: found_res = False @@ -821,7 +821,7 @@ def gene2transcripts(self, query): hgnc = tx_info[6] found_res = True break - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: logger.debug("Except passed, %s", e) if not found_res: return {'error': 'No transcript definition for (tx_ac=' + hgnc + ')'} @@ -988,7 +988,7 @@ def _get_transcript_info(self, variant): hgvs_vt = self.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) try: self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) - except hgvs.exceptions.HGVSError as e: + except vvhgvs.exceptions.HGVSError as e: error = 'Please inform UTA admin of the following error: ' + str(e) reason = "VariantValidator cannot recover information for transcript " + str( hgvs_vt.ac) + ' because it is not available in the Universal Transcript Archive' @@ -1022,7 +1022,7 @@ def _get_transcript_info(self, variant): if entry['expiry'] == 'true': try: entry = self.db.data_add(accession=accession, validator=self) - except hgvs.exceptions.HGVSError: + except vvhgvs.exceptions.HGVSError: error = 'Transcript %s is not currently supported' % accession variant.warnings.append(error) logger.warning(error) diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py index 51be5c22..7ec9f5ee 100644 --- a/VariantValidator/modules/vvMixinInit.py +++ b/VariantValidator/modules/vvMixinInit.py @@ -1,18 +1,18 @@ import os from configparser import ConfigParser -import hgvs -import hgvs.parser -import hgvs.dataproviders.uta -import hgvs.dataproviders.seqfetcher -import hgvs.assemblymapper -import hgvs.variantmapper -import hgvs.sequencevariant -import hgvs.validator -import hgvs.exceptions -import hgvs.location -import hgvs.posedit -import hgvs.edit -import hgvs.normalizer +import vvhgvs +import vvhgvs.parser +import vvhgvs.dataproviders.uta +import vvhgvs.dataproviders.seqfetcher +import vvhgvs.assemblymapper +import vvhgvs.variantmapper +import vvhgvs.sequencevariant +import vvhgvs.validator +import vvhgvs.exceptions +import vvhgvs.location +import vvhgvs.posedit +import vvhgvs.edit +import vvhgvs.normalizer from Bio.Seq import Seq import re @@ -93,89 +93,89 @@ def __init__(self): _is_released_version = True else: self.releasedVersion = False - self.hgvsVersion = hgvs.__version__ + self.hgvsVersion = vvhgvs.__version__ # Set up HGVS # Configure hgvs package global settings - hgvs.global_config.uta.pool_max = 25 - hgvs.global_config.formatting.max_ref_length = 1000000 + vvhgvs.global_config.uta.pool_max = 25 + vvhgvs.global_config.formatting.max_ref_length = 1000000 # Create HGVS objects - self.hdp = hgvs.dataproviders.uta.connect(pooling=True) - self.hp = hgvs.parser.Parser() # Parser - self.vr = hgvs.validator.Validator(self.hdp) # Validator - self.vm = hgvs.variantmapper.VariantMapper(self.hdp) # Variant mapper + self.hdp = vvhgvs.dataproviders.uta.connect(pooling=True) + self.hp = vvhgvs.parser.Parser() # Parser + self.vr = vvhgvs.validator.Validator(self.hdp) # Validator + self.vm = vvhgvs.variantmapper.VariantMapper(self.hdp) # Variant mapper # Create a lose vm instance - self.lose_vm = hgvs.variantmapper.VariantMapper(self.hdp, + self.lose_vm = vvhgvs.variantmapper.VariantMapper(self.hdp, replace_reference=True, prevalidation_level=None ) - self.nr_vm = hgvs.variantmapper.VariantMapper(self.hdp, replace_reference=False) # No reverse variant mapper - self.sf = hgvs.dataproviders.seqfetcher.SeqFetcher() # Seqfetcher + self.nr_vm = vvhgvs.variantmapper.VariantMapper(self.hdp, replace_reference=False) # No reverse variant mapper + self.sf = vvhgvs.dataproviders.seqfetcher.SeqFetcher() # Seqfetcher # Set standard genome builds self.genome_builds = ['GRCh37', 'hg19', 'GRCh38'] self.utaSchema = str(self.hdp.data_version()) # Create normalizer - self.reverse_hn = hgvs.normalizer.Normalizer(self.hdp, + self.reverse_hn = vvhgvs.normalizer.Normalizer(self.hdp, cross_boundaries=False, shuffle_direction=5, alt_aln_method='splign' ) - self.merge_normalizer = hgvs.normalizer.Normalizer( + self.merge_normalizer = vvhgvs.normalizer.Normalizer( self.hdp, cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + shuffle_direction=vvhgvs.global_config.normalizer.shuffle_direction, alt_aln_method='splign', validate=False ) - self.reverse_merge_normalizer = hgvs.normalizer.Normalizer( + self.reverse_merge_normalizer = vvhgvs.normalizer.Normalizer( self.hdp, cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + shuffle_direction=vvhgvs.global_config.normalizer.shuffle_direction, alt_aln_method='splign', validate=False ) # When we are able to access Ensembl data we will need to use these normalizer instances # These are currently implemented in VF - self.splign_normalizer = hgvs.normalizer.Normalizer(self.hdp, + self.splign_normalizer = vvhgvs.normalizer.Normalizer(self.hdp, cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + shuffle_direction=vvhgvs.global_config.normalizer.shuffle_direction, alt_aln_method='splign' # RefSeq ) - self.genebuild_normalizer = hgvs.normalizer.Normalizer(self.hdp, + self.genebuild_normalizer = vvhgvs.normalizer.Normalizer(self.hdp, cross_boundaries=False, - shuffle_direction=hgvs.global_config.normalizer.shuffle_direction, + shuffle_direction=vvhgvs.global_config.normalizer.shuffle_direction, alt_aln_method='genebuild' # Ensembl ) - self.reverse_splign_normalizer = hgvs.normalizer.Normalizer(self.hdp, + self.reverse_splign_normalizer = vvhgvs.normalizer.Normalizer(self.hdp, cross_boundaries=False, shuffle_direction=5, alt_aln_method='splign' ) - self.reverse_genebuild_normalizer = hgvs.normalizer.Normalizer(self.hdp, + self.reverse_genebuild_normalizer = vvhgvs.normalizer.Normalizer(self.hdp, cross_boundaries=False, shuffle_direction=5, alt_aln_method='genebuild' ) # create no_norm_evm - self.no_norm_evm_38 = hgvs.assemblymapper.AssemblyMapper(self.hdp, + self.no_norm_evm_38 = vvhgvs.assemblymapper.AssemblyMapper(self.hdp, assembly_name='GRCh38', alt_aln_method='splign', normalize=False, replace_reference=True ) - self.no_norm_evm_37 = hgvs.assemblymapper.AssemblyMapper(self.hdp, + self.no_norm_evm_37 = vvhgvs.assemblymapper.AssemblyMapper(self.hdp, assembly_name='GRCh37', alt_aln_method='splign', normalize=False, @@ -218,7 +218,7 @@ def protein(self, variant, evm, hpUnused): cod = self.hp.parse_hgvs_variant(cod) p = evm.c_to_p(cod) ass_prot = p.ac - var_p = hgvs.sequencevariant.SequenceVariant(ac=ass_prot, type='p', posedit='(Met1?)') + var_p = vvhgvs.sequencevariant.SequenceVariant(ac=ass_prot, type='p', posedit='(Met1?)') else: var_p = evm.c_to_p(var_c) return var_p @@ -255,7 +255,7 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): if ((1 <= hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0 ) or (1 <= hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) and '*' not in str(hgvs_transcript.posedit.pos): - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + hgvs_protein = vvhgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit='(Met1?)') else: try: @@ -322,12 +322,12 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): hgvs_transcript.posedit.pos.start.offset == 0) or (1 <= hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0))\ and '*' not in str(hgvs_transcript.posedit.pos): - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + hgvs_protein = vvhgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit='(Met1?)') else: # Make the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + hgvs_protein = vvhgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit='?') hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein return hgvs_transcript_to_hgvs_protein @@ -356,7 +356,7 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): except IndexError: hgvs_transcript_to_hgvs_protein['error'] = \ 'Cannot identify an in-frame Termination codon in the variant mRNA sequence' - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + hgvs_protein = vvhgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit='?') hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein return hgvs_transcript_to_hgvs_protein @@ -372,7 +372,7 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): 1 <= hgvs_transcript.posedit.pos.end.base <= 3 and hgvs_transcript.posedit.pos.end.offset == 0)) \ and '*' not in str(hgvs_transcript.posedit.pos): - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + hgvs_protein = vvhgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit='(Met1?)') hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein @@ -397,7 +397,7 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): # The Nucleotide variant has not affected the protein sequence i.e. synonymous elif pro_inv_info['variant'] != 'true': # Make the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + hgvs_protein = vvhgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit='=') hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein return hgvs_transcript_to_hgvs_protein @@ -459,7 +459,7 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn): ins_thr + ')' # Complete the variant - hgvs_protein = hgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, + hgvs_protein = vvhgvs.sequencevariant.SequenceVariant(ac=associated_protein_accession, type='p', posedit=posedit) hgvs_transcript_to_hgvs_protein['hgvs_protein'] = hgvs_protein diff --git a/environment.yml b/environment.yml index 36fc75f6..b950064a 100644 --- a/environment.yml +++ b/environment.yml @@ -13,5 +13,5 @@ dependencies: - configparser>=3.5.0 - requests - pip: - - git+https://github.com/openvar/vv_hgvs@master#egg=hgvs + - git+https://github.com/openvar/vv_hgvs@master#egg=vvhgvs - biotools>=0.3.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a5a7287e..48d6da1a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/openvar/vv_hgvs@master#egg=hgvs +git+https://github.com/openvar/vv_hgvs@master#egg=vvhgvs biocommons.seqrepo >= 0.5.1 httplib2>=0.9.0 configparser>=3.5.0 diff --git a/requirements_dev.txt b/requirements_dev.txt index 4b5b980f..3286e56c 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,5 +1,5 @@ -git+https://github.com/openvar/vv_hgvs@master#egg=hgvs -biocommons.seqrepo >= 0.5.1 +git+https://github.com/openvar/vv_hgvs@master#egg=vvhgvs +biocommons.seqrepo>=0.5.1 httplib2>=0.9.0 configparser>=3.5.0 pyliftover>=0.3 diff --git a/setup.py b/setup.py index 992cd536..b9b2983b 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,6 @@ # List run-time dependencies here. These will be installed by pip when the project is installed. install_requires=[ - "hgvs", # This will install BioPython "biocommons.seqrepo >= 0.5.1", "httplib2 >= 0.9.0", "configparser >= 3.5.0", @@ -67,7 +66,7 @@ "requests", ], dependency_links=[ - "git+https://github.com/openvar/vv_hgvs@master#egg=hgvs" + "git+https://github.com/openvar/vv_hgvs@master#egg=vvhgvs" ] ) diff --git a/tests/test_utils.py b/tests/test_utils.py index b62805f7..02e69a93 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,6 @@ from unittest import TestCase from VariantValidator.modules import utils -import hgvs.parser +import vvhgvs.parser import json @@ -39,7 +39,7 @@ class TestValStr(TestCase): """Test the valstr function""" def setUp(self): - self.hp = hgvs.parser.Parser() + self.hp = vvhgvs.parser.Parser() def test_string(self): var = '' @@ -329,7 +329,7 @@ class TestHGVSdup2indel(TestCase): """ Will test the hgvs_dup2indel function""" def setUp(self): - self.hp = hgvs.parser.Parser() + self.hp = vvhgvs.parser.Parser() def test_empty(self): with self.assertRaises(AttributeError): From da049d487060b1c328222110becda1a79464010d Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 29 Jul 2019 16:21:30 +0100 Subject: [PATCH 212/223] Added biopython to project requirements --- environment.yml | 3 ++- requirements.txt | 1 + requirements_dev.txt | 1 + setup.py | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index b950064a..45decae0 100644 --- a/environment.yml +++ b/environment.yml @@ -14,4 +14,5 @@ dependencies: - requests - pip: - git+https://github.com/openvar/vv_hgvs@master#egg=vvhgvs - - biotools>=0.3.0 \ No newline at end of file + - biotools>=0.3.0 + - biopython \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 48d6da1a..2345a1c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ configparser>=3.5.0 pyliftover>=0.3 biotools>=0.3.0 mysql-connector-python +biopython requests diff --git a/requirements_dev.txt b/requirements_dev.txt index 3286e56c..e06b204f 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -6,6 +6,7 @@ pyliftover>=0.3 biotools>=0.3.0 mysql-connector-python requests +biopython pytest>=3.6 pytest-cov codecov \ No newline at end of file diff --git a/setup.py b/setup.py index b9b2983b..167f3430 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,7 @@ "configparser >= 3.5.0", "pyliftover >= 0.3", "biotools >= 0.3.0", + "biopython", "requests", ], dependency_links=[ From abdfcc1a70d737ea00b286e663bd5885d58753b3 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 5 Aug 2019 09:41:25 +0100 Subject: [PATCH 213/223] Fixed bug with missing transcript descriptions --- VariantValidator/modules/vvMixinCore.py | 5 +---- tests/test_core.py | 8 ++++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 4abca837..f3815a01 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -193,8 +193,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr my_variant.post_format_conversion = stash_input format_type = my_variant.reftype - hgnc_gene_info = 'false' - logger.debug("Variant input formatted, proceeding to validate.") # Conversions @@ -396,7 +394,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Set the data my_variant.output_type_flag = 'gene' - my_variant.description = hgnc_gene_info my_variant.primary_assembly = primary_assembly logger.info("Completed initial validation for %s", my_variant.quibble) # Report errors to User and VV admin @@ -984,7 +981,7 @@ def _get_transcript_info(self, variant): Collect transcript information from a non-genomic variant. Should only be called during the validator process """ - + logger.debug("Looking for transcript info") hgvs_vt = self.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) try: self.hdp.get_tx_identity_info(str(hgvs_vt.ac)) diff --git a/tests/test_core.py b/tests/test_core.py index 49efba0a..a3cbfad3 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -153,6 +153,14 @@ def test_variant_RNA(self): self.assertEqual(output['flag'], 'gene_variant') self.assertEqual(list(output), ['flag', '', 'metadata']) + def test_variant_description(self): + var = 'NM_015120.4:c.34C>T' + + out = self.vv.validate(var, 'grch37', 'all').format_as_dict() + self.assertNotEqual(out['NM_015120.4:c.34C>T']['transcript_description'], 'false') + self.assertEqual(out['NM_015120.4:c.34C>T']['transcript_description'], + 'Homo sapiens ALMS1 centrosome and basal body associated protein (ALMS1), mRNA') + class TestGene2Transcripts(unittest.TestCase): """ From 363a70759eca3b2370c6ff9f0cae65a5d93dfd90 Mon Sep 17 00:00:00 2001 From: Peter Causey-Freeman Date: Mon, 5 Aug 2019 12:05:53 +0100 Subject: [PATCH 214/223] Change setup-py to ensure vvhgvs installs --- setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 167f3430..097f8f60 100644 --- a/setup.py +++ b/setup.py @@ -65,10 +65,8 @@ "biotools >= 0.3.0", "biopython", "requests", + "vvhgvs @ git+https://github.com/openvar/vv_hgvs@master#egg=vvhgvs", ], - dependency_links=[ - "git+https://github.com/openvar/vv_hgvs@master#egg=vvhgvs" - ] ) # From 03f0fcb19a0f6a337687dd162a17b896bb40d471 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Wed, 7 Aug 2019 10:26:02 +0100 Subject: [PATCH 215/223] Fixed bug with select_transcripts_dict not being passed around --- VariantValidator/modules/gapped_mapping.py | 4 ++-- VariantValidator/modules/mappers.py | 4 ++-- VariantValidator/modules/vvMixinCore.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/VariantValidator/modules/gapped_mapping.py b/VariantValidator/modules/gapped_mapping.py index 7a7cbcbd..eac38365 100644 --- a/VariantValidator/modules/gapped_mapping.py +++ b/VariantValidator/modules/gapped_mapping.py @@ -28,7 +28,7 @@ def __init__(self, variant, validator): self.hgvs_genomic_5pr = None self.tx_hgvs_not_delins = None - def gapped_g_to_c(self, rel_var): + def gapped_g_to_c(self, rel_var, select_transcripts_dict): """ Gap aware projection from g. to c. """ @@ -112,7 +112,7 @@ def gapped_g_to_c(self, rel_var): if self.validator.select_transcripts != 'all': tx_ac = saved_hgvs_coding.ac # If it's in the selected tx dict, keep it - if tx_ac.split('.')[0] in list(self.validator.select_transcripts_dict.keys()): + if tx_ac.split('.')[0] in list(select_transcripts_dict.keys()): pass # If not get rid of it! else: diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py index ae360862..4803d1d5 100644 --- a/VariantValidator/modules/mappers.py +++ b/VariantValidator/modules/mappers.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -def gene_to_transcripts(variant, validator): +def gene_to_transcripts(variant, validator, select_transcripts_dict): g_query = validator.hp.parse_hgvs_variant(str(variant.hgvs_formatted)) # Genomic coordinates can be validated immediately @@ -155,7 +155,7 @@ def gene_to_transcripts(variant, validator): gap_mapper = gapped_mapping.GapMapper(variant, validator) - data, nw_rel_var = gap_mapper.gapped_g_to_c(rel_var) + data, nw_rel_var = gap_mapper.gapped_g_to_c(rel_var, select_transcripts_dict) # # Warn the user that the g. description is not valid # if data['gapped_alignment_warning'] != '': diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index f3815a01..df51fc33 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -383,7 +383,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Now start mapping from genome to transcripts if my_variant.reftype == ':g.': - toskip = mappers.gene_to_transcripts(my_variant, self) + toskip = mappers.gene_to_transcripts(my_variant, self, select_transcripts_dict) if toskip: continue From e63ceff3df72b93b32c82f97ebd606bd5ace789b Mon Sep 17 00:00:00 2001 From: TeriForey Date: Fri, 9 Aug 2019 11:02:09 +0100 Subject: [PATCH 216/223] Changed table output to include more fields --- VariantValidator/modules/valoutput.py | 81 +++++++++++++++++---------- 1 file changed, 52 insertions(+), 29 deletions(-) diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index ccf4c8e4..38521e68 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -82,42 +82,65 @@ def format_as_json(self, with_meta=True): def format_as_table(self, with_meta=True): """ - Currently the table format will only output correctly validated results, all warnings and obsolete records will - be squashed. + The table format will output all results. :param with_meta: :return: """ outputstrings = [] if with_meta: - outputstrings.append('#' + str(self.add_meta())) + outputstrings.append('# Metadata: ' + ', '.join(['%s: %s' % (k, v) for k, v in self.add_meta().items()])) - outputstrings.append(['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', - 'Gene_Symbol', 'Transcript_description']) + outputstrings.append(['Input', 'Flag', 'Warnings', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', + 'HGVS_LRG_transcript', 'HGVS_Predicted_Protein', 'HGVS_Genomic_GRCh37', 'GRCh37_CHR', + 'GRCh37_POS', 'GRCh37_ID', 'GRCh37_REF', 'GRCh37_ALT', 'HGVS_Genomic_GRCh38', + 'GRCh38_CHR', 'GRCh38_POS', 'GRCh38_ID', 'GRCh38_REF', 'GRCh38_ALT', + 'Gene_Symbol', 'Transcript_description', 'Alt_genomic_loci']) for variant in self.output_list: - if variant.output_type_flag == 'gene': - if variant.warnings == ['Validation error'] or (variant.is_obsolete() and - variant.hgvs_transcript_variant == ''): - continue - else: - outputstrings.append([ - variant.original, - variant.hgvs_transcript_variant, - variant.hgvs_refseqgene_variant, - variant.hgvs_lrg_variant, - variant.hgvs_lrg_transcript_variant, - variant.gene_symbol, - variant.description - ]) - elif variant.output_type_flag == 'intergenic': - outputstrings.append([ - variant.original, - variant.hgvs_transcript_variant, - variant.hgvs_refseqgene_variant, - variant.hgvs_lrg_variant, - variant.hgvs_lrg_transcript_variant, - variant.gene_symbol, - variant.description - ]) + prot = '' + if 'tlr' in variant.hgvs_predicted_protein_consequence: + prot = variant.hgvs_predicted_protein_consequence['tlr'] + grch37 = '' + grch37_vcf = {'chr': '', 'pos': '', 'ref': '', 'alt': '', 'id': ''} + if 'grch37' in variant.primary_assembly_loci: + grch37 = variant.primary_assembly_loci['grch37']['hgvs_genomic_description'] + grch37_vcf = variant.primary_assembly_loci['grch37']['vcf'] + grch37_vcf['id'] = '.' + grch38 = '' + grch38_vcf = {'chr': '', 'pos': '', 'ref': '', 'alt': '', 'id': ''} + if 'grch38' in variant.primary_assembly_loci: + grch38 = variant.primary_assembly_loci['grch38']['hgvs_genomic_description'] + grch38_vcf = variant.primary_assembly_loci['grch38']['vcf'] + grch38_vcf['id'] = '.' + alt_genomic = [] + for alt in variant.alt_genomic_loci: + for k, v in alt.items(): + if k == 'grch37' or k == 'grch38': + alt_genomic.append(v['hgvs_genomic_description']) + outputstrings.append([ + variant.original, + variant.output_type_flag, + '|'.join(variant.process_warnings()), + variant.hgvs_transcript_variant, + variant.hgvs_refseqgene_variant, + variant.hgvs_lrg_variant, + variant.hgvs_lrg_transcript_variant, + prot, + grch37, + grch37_vcf['chr'], + grch37_vcf['pos'], + grch37_vcf['id'], + grch37_vcf['ref'], + grch37_vcf['alt'], + grch38, + grch38_vcf['chr'], + grch38_vcf['pos'], + grch38_vcf['id'], + grch38_vcf['ref'], + grch38_vcf['alt'], + variant.gene_symbol, + variant.description, + '|'.join(alt_genomic) + ]) return outputstrings def add_meta(self): From 1569430c2ae2bf44db21ea97afa2047034f58525 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 12 Aug 2019 10:01:20 +0100 Subject: [PATCH 217/223] Tweaked table output again and updated tests --- VariantValidator/modules/valoutput.py | 25 +++++++------ tests/test_valoutput.py | 51 ++++++++++++++++++--------- 2 files changed, 49 insertions(+), 27 deletions(-) diff --git a/VariantValidator/modules/valoutput.py b/VariantValidator/modules/valoutput.py index 38521e68..845d4a26 100644 --- a/VariantValidator/modules/valoutput.py +++ b/VariantValidator/modules/valoutput.py @@ -90,35 +90,39 @@ def format_as_table(self, with_meta=True): if with_meta: outputstrings.append('# Metadata: ' + ', '.join(['%s: %s' % (k, v) for k, v in self.add_meta().items()])) - outputstrings.append(['Input', 'Flag', 'Warnings', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', + outputstrings.append(['Input', 'Warnings', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', 'HGVS_Predicted_Protein', 'HGVS_Genomic_GRCh37', 'GRCh37_CHR', 'GRCh37_POS', 'GRCh37_ID', 'GRCh37_REF', 'GRCh37_ALT', 'HGVS_Genomic_GRCh38', 'GRCh38_CHR', 'GRCh38_POS', 'GRCh38_ID', 'GRCh38_REF', 'GRCh38_ALT', - 'Gene_Symbol', 'Transcript_description', 'Alt_genomic_loci']) + 'Gene_Symbol', 'HGNC_Gene_ID', 'Transcript_description', 'Alt_genomic_loci']) for variant in self.output_list: prot = '' - if 'tlr' in variant.hgvs_predicted_protein_consequence: + if variant.hgvs_predicted_protein_consequence is not None: prot = variant.hgvs_predicted_protein_consequence['tlr'] grch37 = '' grch37_vcf = {'chr': '', 'pos': '', 'ref': '', 'alt': '', 'id': ''} - if 'grch37' in variant.primary_assembly_loci: + if variant.primary_assembly_loci and 'grch37' in variant.primary_assembly_loci: grch37 = variant.primary_assembly_loci['grch37']['hgvs_genomic_description'] grch37_vcf = variant.primary_assembly_loci['grch37']['vcf'] grch37_vcf['id'] = '.' grch38 = '' grch38_vcf = {'chr': '', 'pos': '', 'ref': '', 'alt': '', 'id': ''} - if 'grch38' in variant.primary_assembly_loci: + if variant.primary_assembly_loci and 'grch38' in variant.primary_assembly_loci: grch38 = variant.primary_assembly_loci['grch38']['hgvs_genomic_description'] grch38_vcf = variant.primary_assembly_loci['grch38']['vcf'] grch38_vcf['id'] = '.' alt_genomic = [] - for alt in variant.alt_genomic_loci: - for k, v in alt.items(): - if k == 'grch37' or k == 'grch38': - alt_genomic.append(v['hgvs_genomic_description']) + if variant.alt_genomic_loci: + for alt in variant.alt_genomic_loci: + for k, v in alt.items(): + if k == 'grch37' or k == 'grch38': + alt_genomic.append(v['hgvs_genomic_description']) + gene_id = '' + if variant.stable_gene_ids: + if 'hgnc_id' in variant.stable_gene_ids: + gene_id = variant.stable_gene_ids['hgnc_id'] outputstrings.append([ variant.original, - variant.output_type_flag, '|'.join(variant.process_warnings()), variant.hgvs_transcript_variant, variant.hgvs_refseqgene_variant, @@ -138,6 +142,7 @@ def format_as_table(self, with_meta=True): grch38_vcf['ref'], grch38_vcf['alt'], variant.gene_symbol, + gene_id, variant.description, '|'.join(alt_genomic) ]) diff --git a/tests/test_valoutput.py b/tests/test_valoutput.py index b57ea27b..0316a8ee 100644 --- a/tests/test_valoutput.py +++ b/tests/test_valoutput.py @@ -182,8 +182,11 @@ def test_table_empty(self): res = obj.format_as_table(with_meta=False) print(res) self.assertIsInstance(res, list) - self.assertEqual(res, [['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', - 'Gene_Symbol', 'Transcript_description']]) + self.assertEqual(res, [['Input', 'Warnings', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', + 'HGVS_LRG_transcript', 'HGVS_Predicted_Protein', 'HGVS_Genomic_GRCh37', 'GRCh37_CHR', + 'GRCh37_POS', 'GRCh37_ID', 'GRCh37_REF', 'GRCh37_ALT', 'HGVS_Genomic_GRCh38', + 'GRCh38_CHR', 'GRCh38_POS', 'GRCh38_ID', 'GRCh38_REF', 'GRCh38_ALT', + 'Gene_Symbol', 'HGNC_Gene_ID', 'Transcript_description', 'Alt_genomic_loci']]) def test_table_one(self): var1 = Variant('var1') @@ -191,8 +194,13 @@ def test_table_one(self): res = obj.format_as_table(with_meta=False) print(res) self.assertIsInstance(res, list) - self.assertEqual(res, [['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', - 'Gene_Symbol', 'Transcript_description']]) + self.assertEqual(res, [['Input', 'Warnings', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', + 'HGVS_LRG_transcript', 'HGVS_Predicted_Protein', 'HGVS_Genomic_GRCh37', 'GRCh37_CHR', + 'GRCh37_POS', 'GRCh37_ID', 'GRCh37_REF', 'GRCh37_ALT', 'HGVS_Genomic_GRCh38', + 'GRCh38_CHR', 'GRCh38_POS', 'GRCh38_ID', 'GRCh38_REF', 'GRCh38_ALT', + 'Gene_Symbol', 'HGNC_Gene_ID', 'Transcript_description', 'Alt_genomic_loci'], + ['var1', '', None, None, None, None, '', '', '', '', '', '', '', '', '', + '', '', '', '', '', '', '', '']]) def test_table_one_gene(self): var1 = Variant('var1') @@ -201,9 +209,13 @@ def test_table_one_gene(self): res = obj.format_as_table(with_meta=False) print(res) self.assertIsInstance(res, list) - self.assertEqual(res[0], ['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', - 'Gene_Symbol', 'Transcript_description']) - self.assertEqual(res[1], ['var1', None, None, None, None, '', '']) + self.assertEqual(res[0], ['Input', 'Warnings', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', + 'HGVS_LRG_transcript', 'HGVS_Predicted_Protein', 'HGVS_Genomic_GRCh37', 'GRCh37_CHR', + 'GRCh37_POS', 'GRCh37_ID', 'GRCh37_REF', 'GRCh37_ALT', 'HGVS_Genomic_GRCh38', + 'GRCh38_CHR', 'GRCh38_POS', 'GRCh38_ID', 'GRCh38_REF', 'GRCh38_ALT', + 'Gene_Symbol', 'HGNC_Gene_ID', 'Transcript_description', 'Alt_genomic_loci']) + self.assertEqual(res[1], ['var1', '', None, None, None, None, '', '', '', '', '', '', '', '', '', '', + '', '', '', '', '', '', '']) self.assertEqual(len(res), 2) def test_table_intergenic(self): @@ -213,7 +225,8 @@ def test_table_intergenic(self): res = obj.format_as_table(with_meta=False) print(res) self.assertIsInstance(res, list) - self.assertEqual(res[1], ['var1', None, None, None, None, '', '']) + self.assertEqual(res[1], ['var1', '', None, None, None, None, '', '', '', '', '', '', '', '', '', + '', '', '', '', '', '', '', '']) self.assertEqual(len(res), 2) def test_table_gene_warnings(self): @@ -233,10 +246,13 @@ def test_table_gene_warnings(self): print(res) self.assertIsInstance(res, list) - self.assertEqual(res[0], ['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', - 'Gene_Symbol', 'Transcript_description']) - self.assertEqual(res[1], ['var2', None, None, None, None, '', '']) - self.assertEqual(len(res), 2) + self.assertEqual(res[1], ['var1', 'Validation error', None, None, None, None, '', '', '', '', '', '', + '', '', '', '', '', '', '', '', '', '', '']) + self.assertEqual(res[2], ['var2', 'obsolete', None, None, None, None, '', '', '', '', '', '', '', + '', '', '', '', '', '', '', '', '', '']) + self.assertEqual(res[3], ['var3', 'obsolete', '', None, None, None, '', '', '', '', '', '', '', + '', '', '', '', '', '', '', '', '', '']) + self.assertEqual(len(res), 4) def test_table_intergenic_warnings(self): var1 = Variant('var1') @@ -255,11 +271,12 @@ def test_table_intergenic_warnings(self): print(res) self.assertIsInstance(res, list) - self.assertEqual(res[0], ['Input', 'HGVS_transcript', 'HGVS_RefSeqGene', 'HGVS_LRG', 'HGVS_LRG_transcript', - 'Gene_Symbol', 'Transcript_description']) - self.assertEqual(res[1], ['var1', None, None, None, None, '', '']) - self.assertEqual(res[2], ['var2', None, None, None, None, '', '']) - self.assertEqual(res[3], ['var3', '', None, None, None, '', '']) + self.assertEqual(res[1], ['var1', 'Validation error', None, None, None, None, '', '', '', '', '', + '', '', '', '', '', '', '', '', '', '', '', '']) + self.assertEqual(res[2], ['var2', 'obsolete', None, None, None, None, '', '', '', '', '', '', '', + '', '', '', '', '', '', '', '', '', '']) + self.assertEqual(res[3], ['var3', 'obsolete', '', None, None, None, '', '', '', '', '', '', '', + '', '', '', '', '', '', '', '', '', '']) self.assertEqual(len(res), 4) # From b3fd1413966e55e5c67246959f7d6ce3da0e518e Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 12 Aug 2019 10:01:56 +0100 Subject: [PATCH 218/223] Changed how removed whitespace warning is output --- VariantValidator/modules/variant.py | 4 ++++ VariantValidator/modules/vvMixinCore.py | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 79c96fa4..027e4f9d 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -101,7 +101,11 @@ def remove_whitespace(self): Will remove all whitespace from quibble :return: """ + prev = self.quibble self.quibble = ''.join(self.quibble.split()) + if self.quibble != prev: + caution = 'Whitespace removed from variant description %s' % self.quibble + self.warnings.append(caution) def format_quibble(self): """ diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index df51fc33..0704de22 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -137,10 +137,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Remove whitespace my_variant.remove_whitespace() - if my_variant.quibble != my_variant.original: - caution = 'Whitespace removed from variant description %s' % my_variant.original - my_variant.warnings.append(caution) - logger.debug(caution) # Set the primary_assembly if not my_variant.primary_assembly: From 912dc547c4fcd6d1d2d7385fcdc5a2ec2e4a1d9e Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 12 Aug 2019 10:39:14 +0100 Subject: [PATCH 219/223] Fixed bug with RNA variant descriptions not being processed correctly --- VariantValidator/modules/vvMixinCore.py | 5 ++--- tests/test_core.py | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 0704de22..2f7ee986 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -187,7 +187,6 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr formatted_variant = my_variant.quibble stash_input = my_variant.quibble my_variant.post_format_conversion = stash_input - format_type = my_variant.reftype logger.debug("Variant input formatted, proceeding to validate.") @@ -200,7 +199,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr continue # Change RNA bases to upper case but nothing else - if format_type == ":r.": + if my_variant.reftype == ":r.": formatted_variant = formatted_variant.upper() formatted_variant = formatted_variant.replace(':R.', ':r.') # lowercase the supported variant types @@ -383,7 +382,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr if toskip: continue - if format_type == ':c.' or format_type == ':n.': + if my_variant.reftype == ':c.' or my_variant.reftype == ':n.': toskip = mappers.transcripts_to_gene(my_variant, self, select_transcripts_dict_plus_version) if toskip: continue diff --git a/tests/test_core.py b/tests/test_core.py index a3cbfad3..6baba93a 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -145,13 +145,12 @@ def test_variant_con(self): str(output['validation_warning_1']['validation_warnings'])) def test_variant_RNA(self): - # TODO: This situation needs looking at as I'm sure it shouldn't be returning an empty string. var = 'NM_015120.4:r.34DEL' output = self.vv.validate(var, 'GRCh37', 'all').format_as_dict() print(output) self.assertEqual(output['flag'], 'gene_variant') - self.assertEqual(list(output), ['flag', '', 'metadata']) + self.assertEqual(list(output), ['flag', 'NM_015120.4:c.34del', 'metadata']) def test_variant_description(self): var = 'NM_015120.4:c.34C>T' From 2c19598ee28a86e0f6c250b9d2d5ce2d17678ada Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 12 Aug 2019 10:49:07 +0100 Subject: [PATCH 220/223] Added tool to Entrez connection, fixes #82 --- VariantValidator/modules/vvMixinConverters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/VariantValidator/modules/vvMixinConverters.py b/VariantValidator/modules/vvMixinConverters.py index ebfd7a8a..c4812450 100644 --- a/VariantValidator/modules/vvMixinConverters.py +++ b/VariantValidator/modules/vvMixinConverters.py @@ -1724,6 +1724,7 @@ def entrez_efetch(self, db, id, rettype, retmode): """ # from Bio import Entrez Entrez.email = self.entrez_email + Entrez.tool = 'VariantValidator' if self.entrez_api_key: Entrez.api_key = self.entrez_api_key # from Bio import SeqIO From 2c50d1434a9f7a55a59b60fee53235161d17544b Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 12 Aug 2019 11:10:54 +0100 Subject: [PATCH 221/223] Added catch for variants that raise an exception within the allele handling and test to confirm --- VariantValidator/modules/format_converters.py | 7 ++++++- tests/test_core.py | 8 ++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/VariantValidator/modules/format_converters.py b/VariantValidator/modules/format_converters.py index 45841fca..351d91fb 100644 --- a/VariantValidator/modules/format_converters.py +++ b/VariantValidator/modules/format_converters.py @@ -626,7 +626,12 @@ def allele_parser(variant, validation): pass try: # Submit to allele extraction function - alleles = validation.hgvs_alleles(variant.quibble, variant.hn) + try: + alleles = validation.hgvs_alleles(variant.quibble, variant.hn) + except fn.alleleVariantError as e: + variant.warnings.append(str(e)) + logger.warning(str(e)) + return True variant.warnings.append('Automap has extracted possible variant descriptions') logger.info('Automap has extracted possible variant descriptions, resubmitting') for allele in alleles: diff --git a/tests/test_core.py b/tests/test_core.py index 6baba93a..61287e3d 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -160,6 +160,14 @@ def test_variant_description(self): self.assertEqual(out['NM_015120.4:c.34C>T']['transcript_description'], 'Homo sapiens ALMS1 centrosome and basal body associated protein (ALMS1), mRNA') + def test_variant_format(self): + var = "NM_020812.3:c.[3190_3191delCT];[(3190_3191delCT)]" + + out = self.vv.validate(var, 'grch37', 'all').format_as_dict() + self.assertEqual(out['flag'], 'warning') + self.assertEqual(out['validation_warning_1']['validation_warnings'], + ['Unsupported format c.[3190_3191delCT];[(3190_3191delCT)]']) + class TestGene2Transcripts(unittest.TestCase): """ From bf85133931673dafa5be04eed7f8e89ad9827a57 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 12 Aug 2019 11:23:33 +0100 Subject: [PATCH 222/223] Added step to remove quotation marks from around input sequence, fixes #65 --- VariantValidator/modules/variant.py | 6 ++++++ VariantValidator/modules/vvMixinCore.py | 1 + tests/test_core.py | 22 ++++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/VariantValidator/modules/variant.py b/VariantValidator/modules/variant.py index 027e4f9d..b5374e46 100644 --- a/VariantValidator/modules/variant.py +++ b/VariantValidator/modules/variant.py @@ -107,6 +107,12 @@ def remove_whitespace(self): caution = 'Whitespace removed from variant description %s' % self.quibble self.warnings.append(caution) + def remove_quotes(self): + if self.quibble.startswith('"') or self.quibble.startswith("'"): + self.quibble = self.quibble[1:] + if self.quibble.endswith('"') or self.quibble.endswith("'"): + self.quibble = self.quibble[:-1] + def format_quibble(self): """ Removes whitespace from the ends of the string diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py index 2f7ee986..a18c2be4 100644 --- a/VariantValidator/modules/vvMixinCore.py +++ b/VariantValidator/modules/vvMixinCore.py @@ -137,6 +137,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr # Remove whitespace my_variant.remove_whitespace() + my_variant.remove_quotes() # Set the primary_assembly if not my_variant.primary_assembly: diff --git a/tests/test_core.py b/tests/test_core.py index 61287e3d..f30a8f72 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -168,6 +168,28 @@ def test_variant_format(self): self.assertEqual(out['validation_warning_1']['validation_warnings'], ['Unsupported format c.[3190_3191delCT];[(3190_3191delCT)]']) + def test_variant_quotes_start(self): + var = '"NM_015120.4:c.34C>T' + + out = self.vv.validate(var, 'GRCh37', 'all').format_as_dict() + self.assertEqual(out['flag'], 'gene_variant') + self.assertTrue('NM_015120.4:c.34C>T' in out.keys()) + + def test_variant_quotes_end(self): + var = 'NM_015120.4:c.34C>T"' + + out = self.vv.validate(var, 'GRCh37', 'all').format_as_dict() + print(out) + self.assertEqual(out['flag'], 'gene_variant') + self.assertTrue('NM_015120.4:c.34C>T' in out.keys()) + + def test_variant_quotes_both(self): + var = '"NM_015120.4:c.34C>T"' + + out = self.vv.validate(var, 'GRCh37', 'all').format_as_dict() + self.assertEqual(out['flag'], 'gene_variant') + self.assertTrue('NM_015120.4:c.34C>T' in out.keys()) + class TestGene2Transcripts(unittest.TestCase): """ From f88525ceb77b587077c2301ff8caea6dba5ee195 Mon Sep 17 00:00:00 2001 From: TeriForey Date: Mon, 12 Aug 2019 12:41:17 +0100 Subject: [PATCH 223/223] Updated Manual --- docs/MANUAL.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/MANUAL.md b/docs/MANUAL.md index fc9b6b98..2c3f15e4 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -7,7 +7,7 @@ By default the edited configuration will be placed in the users home directory ( To create this file automatically, run the configuration script installed alongside the package. ```bash -python bin/vv_configure.py +vv_configure.py ``` This will ask you to enter a value for each item in the configuration file. @@ -63,7 +63,7 @@ made per second. See [this article](https://ncbiinsights.ncbi.nlm.nih.gov/2017/1 To import the initial data into the Validator MySQL database, run the following script: ```bash -python bin/update_vdb.py +update_vdb.py ``` This will download the required data to convert between LRG and RefSeq IDs. We recommend re-running this command on a regular basis as changes are continually made to the RefSeq and LRG collections. @@ -76,7 +76,7 @@ To run VariantValidator, we have provided the installed script `variant_validato usage: variant_validator.py [-h] -v VARIANT [VARIANT ...] [-g [{GRCh37,GRCh38,hg19,hg38}]] [-t [TRANSCRIPTS]] [-s {individual,batch}] - [-f {dict,list,json}] [-o OUTPUT] + [-f {dict,table,json}] [-o OUTPUT] [-m] optional arguments: -h, --help show this help message and exit @@ -89,11 +89,12 @@ optional arguments: -s {individual,batch}, --submission {individual,batch} Submit variants individually or as a single batch validation (default: individual) - -f {dict,list,json}, --output_format {dict,list,json} + -f {dict,table,json}, --output_format {dict,table,json} Output validations as a list or as a dictionary (default: dict) -o OUTPUT, --output OUTPUT Specifies the output file (default: stdout) + -m, --meta Also output metadata (default: False) ``` From this script you can run the validator with a number of different input and output options. @@ -106,7 +107,7 @@ validator = VariantValidator.Validator() # To validate a variant output = validator.validate('NM_000088.3:c.589G>T', 'GRCh37', 'all') -# This returns an ValOutput object that can be used to output the results in a number of different ways +# This returns an ValOutput object that can be used to output the results in a number of different ways (dictionary, json or table) output.format_as_dict(with_meta=True) # The Validator object also contains other useful methods, such as finding all transcripts from a gene ID/symbol @@ -120,7 +121,7 @@ NC_000017.10:g.48275363C>A NG_007400.1:g.8638G>T LRG_1:g.8638G>T LRG_1t1:c.589G>T -17-50198002-C-A (GRCh38) -chr17:50198002C>A (GRCh38) +17-50198002-C-A # GRCh38 only +chr17:50198002C>A # GRCh38 only ```